diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cff3b7d89d71fff1553e1b2106463dad65716a9c --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +See https://github.com/k2-fsa/icefall/pull/1766 for details diff --git a/data/lang_bpe_500/bpe.model b/data/lang_bpe_500/bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..0a7fdb4e15f063e06d9936c71e13525b31c588e3 --- /dev/null +++ b/data/lang_bpe_500/bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53433de083c4a6ad12d034550ef22de68cec62c4f58932a7b6b8b2f1e743fa5 +size 244865 diff --git a/data/lang_bpe_500/tokens.txt b/data/lang_bpe_500/tokens.txt new file mode 100644 index 0000000000000000000000000000000000000000..b4d1bf82e68de1354f2a3f5fd51094d61d65c77c --- /dev/null +++ b/data/lang_bpe_500/tokens.txt @@ -0,0 +1,502 @@ + 0 + 1 + 2 +S 3 +▁THE 4 +▁A 5 +T 6 +▁AND 7 +ED 8 +▁OF 9 +▁TO 10 +E 11 +D 12 +N 13 +ING 14 +▁IN 15 +Y 16 +M 17 +C 18 +▁I 19 +A 20 +P 21 +▁HE 22 +R 23 +O 24 +L 25 +RE 26 +I 27 +U 28 +ER 29 +▁IT 30 +LY 31 +▁THAT 32 +▁WAS 33 +▁ 34 +▁S 35 +AR 36 +▁BE 37 +F 38 +▁C 39 +IN 40 +B 41 +▁FOR 42 +OR 43 +LE 44 +' 45 +▁HIS 46 +▁YOU 47 +AL 48 +▁RE 49 +V 50 +▁B 51 +G 52 +RI 53 +▁E 54 +▁WITH 55 +▁T 56 +▁AS 57 +LL 58 +▁P 59 +▁HER 60 +ST 61 +▁HAD 62 +▁SO 63 +▁F 64 +W 65 +CE 66 +▁IS 67 +ND 68 +▁NOT 69 +TH 70 +▁BUT 71 +EN 72 +▁SHE 73 +▁ON 74 +VE 75 +ON 76 +SE 77 +▁DE 78 +UR 79 +▁G 80 +CH 81 +K 82 +TER 83 +▁AT 84 +IT 85 +▁ME 86 +RO 87 +NE 88 +RA 89 +ES 90 +IL 91 +NG 92 +IC 93 +▁NO 94 +▁HIM 95 +ENT 96 +IR 97 +▁WE 98 +H 99 +▁DO 100 +▁ALL 101 +▁HAVE 102 +LO 103 +▁BY 104 +▁MY 105 +▁MO 106 +▁THIS 107 +LA 108 +▁ST 109 +▁WHICH 110 +▁CON 111 +▁THEY 112 +CK 113 +TE 114 +▁SAID 115 +▁FROM 116 +▁GO 117 +▁WHO 118 +▁TH 119 +▁OR 120 +▁D 121 +▁W 122 +VER 123 +LI 124 +▁SE 125 +▁ONE 126 +▁CA 127 +▁AN 128 +▁LA 129 +▁WERE 130 +EL 131 +▁HA 132 +▁MAN 133 +▁FA 134 +▁EX 135 +AD 136 +▁SU 137 +RY 138 +▁MI 139 +AT 140 +▁BO 141 +▁WHEN 142 +AN 143 +THER 144 +PP 145 +ATION 146 +▁FI 147 +▁WOULD 148 +▁PRO 149 +OW 150 +ET 151 +▁O 152 +▁THERE 153 +▁HO 154 +ION 155 +▁WHAT 156 +▁FE 157 +▁PA 158 +US 159 +MENT 160 +▁MA 161 +UT 162 +▁OUT 163 +▁THEIR 164 +▁IF 165 +▁LI 166 +▁K 167 +▁WILL 168 +▁ARE 169 +ID 170 +▁RO 171 +DE 172 +TION 173 +▁WA 174 +PE 175 +▁UP 176 +▁SP 177 +▁PO 178 +IGHT 179 +▁UN 180 +RU 181 +▁LO 182 +AS 183 +OL 184 +▁LE 185 +▁BEEN 186 +▁SH 187 +▁RA 188 +▁SEE 189 +KE 190 +UL 191 +TED 192 +▁SA 193 +UN 194 +UND 195 +ANT 196 +▁NE 197 +IS 198 +▁THEM 199 +CI 200 +GE 201 +▁COULD 202 +▁DIS 203 +OM 204 +ISH 205 +HE 206 +EST 207 +▁SOME 208 +ENCE 209 +ITY 210 +IVE 211 +▁US 212 +▁MORE 213 +▁EN 214 +ARD 215 +ATE 216 +▁YOUR 217 +▁INTO 218 +▁KNOW 219 +▁CO 220 +ANCE 221 +▁TIME 222 +▁WI 223 +▁YE 224 +AGE 225 +▁NOW 226 +TI 227 +FF 228 +ABLE 229 +▁VERY 230 +▁LIKE 231 +AM 232 +HI 233 +Z 234 +▁OTHER 235 +▁THAN 236 +▁LITTLE 237 +▁DID 238 +▁LOOK 239 +TY 240 +ERS 241 +▁CAN 242 +▁CHA 243 +▁AR 244 +X 245 +FUL 246 +UGH 247 +▁BA 248 +▁DAY 249 +▁ABOUT 250 +TEN 251 +IM 252 +▁ANY 253 +▁PRE 254 +▁OVER 255 +IES 256 +NESS 257 +ME 258 +BLE 259 +▁M 260 +ROW 261 +▁HAS 262 +▁GREAT 263 +▁VI 264 +TA 265 +▁AFTER 266 +PER 267 +▁AGAIN 268 +HO 269 +SH 270 +▁UPON 271 +▁DI 272 +▁HAND 273 +▁COM 274 +IST 275 +TURE 276 +▁STA 277 +▁THEN 278 +▁SHOULD 279 +▁GA 280 +OUS 281 +OUR 282 +▁WELL 283 +▁ONLY 284 +MAN 285 +▁GOOD 286 +▁TWO 287 +▁MAR 288 +▁SAY 289 +▁HU 290 +TING 291 +▁OUR 292 +RESS 293 +▁DOWN 294 +IOUS 295 +▁BEFORE 296 +▁DA 297 +▁NA 298 +QUI 299 +▁MADE 300 +▁EVERY 301 +▁OLD 302 +▁EVEN 303 +IG 304 +▁COME 305 +▁GRA 306 +▁RI 307 +▁LONG 308 +OT 309 +SIDE 310 +WARD 311 +▁FO 312 +▁WHERE 313 +MO 314 +LESS 315 +▁SC 316 +▁MUST 317 +▁NEVER 318 +▁HOW 319 +▁CAME 320 +▁SUCH 321 +▁RU 322 +▁TAKE 323 +▁WO 324 +▁CAR 325 +UM 326 +AK 327 +▁THINK 328 +▁MUCH 329 +▁MISTER 330 +▁MAY 331 +▁JO 332 +▁WAY 333 +▁COMP 334 +▁THOUGHT 335 +▁STO 336 +▁MEN 337 +▁BACK 338 +▁DON 339 +J 340 +▁LET 341 +▁TRA 342 +▁FIRST 343 +▁JUST 344 +▁VA 345 +▁OWN 346 +▁PLA 347 +▁MAKE 348 +ATED 349 +▁HIMSELF 350 +▁WENT 351 +▁PI 352 +GG 353 +RING 354 +▁DU 355 +▁MIGHT 356 +▁PART 357 +▁GIVE 358 +▁IMP 359 +▁BU 360 +▁PER 361 +▁PLACE 362 +▁HOUSE 363 +▁THROUGH 364 +IAN 365 +▁SW 366 +▁UNDER 367 +QUE 368 +▁AWAY 369 +▁LOVE 370 +QUA 371 +▁LIFE 372 +▁GET 373 +▁WITHOUT 374 +▁PASS 375 +▁TURN 376 +IGN 377 +▁HEAD 378 +▁MOST 379 +▁THOSE 380 +▁SHALL 381 +▁EYES 382 +▁COL 383 +▁STILL 384 +▁NIGHT 385 +▁NOTHING 386 +ITION 387 +HA 388 +▁TELL 389 +▁WORK 390 +▁LAST 391 +▁NEW 392 +▁FACE 393 +▁HI 394 +▁WORD 395 +▁FOUND 396 +▁COUNT 397 +▁OB 398 +▁WHILE 399 +▁SHA 400 +▁MEAN 401 +▁SAW 402 +▁PEOPLE 403 +▁FRIEND 404 +▁THREE 405 +▁ROOM 406 +▁SAME 407 +▁THOUGH 408 +▁RIGHT 409 +▁CHILD 410 +▁FATHER 411 +▁ANOTHER 412 +▁HEART 413 +▁WANT 414 +▁TOOK 415 +OOK 416 +▁LIGHT 417 +▁MISSUS 418 +▁OPEN 419 +▁JU 420 +▁ASKED 421 +PORT 422 +▁LEFT 423 +▁JA 424 +▁WORLD 425 +▁HOME 426 +▁WHY 427 +▁ALWAYS 428 +▁ANSWER 429 +▁SEEMED 430 +▁SOMETHING 431 +▁GIRL 432 +▁BECAUSE 433 +▁NAME 434 +▁TOLD 435 +▁NI 436 +▁HIGH 437 +IZE 438 +▁WOMAN 439 +▁FOLLOW 440 +▁RETURN 441 +▁KNEW 442 +▁EACH 443 +▁KIND 444 +▁JE 445 +▁ACT 446 +▁LU 447 +▁CERTAIN 448 +▁YEARS 449 +▁QUITE 450 +▁APPEAR 451 +▁BETTER 452 +▁HALF 453 +▁PRESENT 454 +▁PRINCE 455 +SHIP 456 +▁ALSO 457 +▁BEGAN 458 +▁HAVING 459 +▁ENOUGH 460 +▁PERSON 461 +▁LADY 462 +▁WHITE 463 +▁COURSE 464 +▁VOICE 465 +▁SPEAK 466 +▁POWER 467 +▁MORNING 468 +▁BETWEEN 469 +▁AMONG 470 +▁KEEP 471 +▁WALK 472 +▁MATTER 473 +▁TEA 474 +▁BELIEVE 475 +▁SMALL 476 +▁TALK 477 +▁FELT 478 +▁HORSE 479 +▁MYSELF 480 +▁SIX 481 +▁HOWEVER 482 +▁FULL 483 +▁HERSELF 484 +▁POINT 485 +▁STOOD 486 +▁HUNDRED 487 +▁ALMOST 488 +▁SINCE 489 +▁LARGE 490 +▁LEAVE 491 +▁PERHAPS 492 +▁DARK 493 +▁SUDDEN 494 +▁REPLIED 495 +▁ANYTHING 496 +▁WONDER 497 +▁UNTIL 498 +Q 499 +#0 500 +#1 501 diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..89b073dc600c0ef3e7c1aebbe4901226bdbee05a --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,11950 @@ +%WER = 1.96 +Errors: 121 insertions, 71 deletions, 839 substitutions, over 52576 reference words (51666 correct) +Search below for sections starting with PER-UTT DETAILS:, SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS: + +PER-UTT DETAILS: corr or (ref->hyp) +1320-122612-0011-286: IF A ROCK OR A RIVULET OR A BIT OF EARTH HARDER THAN COMMON SEVERED THE LINKS OF THE (CLEW->CLUE) THEY FOLLOWED THE TRUE EYE OF THE SCOUT RECOVERED THEM AT A DISTANCE AND SELDOM RENDERED THE DELAY OF A SINGLE MOMENT NECESSARY +1221-135767-0009-196: BUT PEARL WHO WAS A DAUNTLESS CHILD AFTER FROWNING STAMPING HER FOOT AND SHAKING HER LITTLE HAND WITH A VARIETY OF THREATENING GESTURES SUDDENLY MADE A RUSH AT THE KNOT OF HER ENEMIES AND PUT THEM ALL TO FLIGHT +4077-13751-0006-1080: THEIR EYES WERE FROM THE FIRST TURNED IN ANTICIPATION TOWARD THE EVENING SUN NOT MERELY THAT THE WORK OF PROSELYTING SHOULD BE CARRIED ON IN THE WEST BUT THAT THE HEADQUARTERS OF THE CHURCH SHOULD BE THERE ESTABLISHED +3570-5694-0010-930: THE OBJECTION OF COURSE PRESENTS ITSELF THAT EXPENDITURE ON WOMEN'S DRESS AND HOUSEHOLD PARAPHERNALIA IS AN OBVIOUS EXCEPTION TO THIS RULE BUT IT WILL APPEAR IN THE SEQUEL THAT THIS EXCEPTION IS MUCH MORE OBVIOUS THAN SUBSTANTIAL +2094-142345-0014-525: THE FACT THAT IT WAS CHURNING DAY WAS ANOTHER REASON WHY IT WAS INCONVENIENT TO HAVE THE (WHITTAWS->WIDOWS) AND WHY CONSEQUENTLY MISSUS POYSER SHOULD SCOLD MOLLY THE HOUSEMAID WITH UNUSUAL SEVERITY +1284-134647-0004-271: SOME OF THE PENAL REGULATIONS WERE COPIED FROM THE EDICTS OF DIOCLETIAN AND THIS METHOD OF CONVERSION WAS APPLAUDED BY THE SAME BISHOPS WHO HAD FELT THE HAND OF OPPRESSION AND PLEADED FOR THE RIGHTS OF HUMANITY +8224-274381-0015-2295: THOUGH THE DISCIPLINE OF THE FORMER PARLIAMENTARY ARMY WAS NOT CONTEMPTIBLE A MORE EXACT PLAN WAS INTRODUCED AND RIGOROUSLY EXECUTED BY THESE NEW COMMANDERS +1995-1837-0014-495: HE STOOD A MOMENT BEWILDERED THEN TURNED AND RUSHED UPON THE ISLAND A GREAT SHEET OF DAZZLING SUNLIGHT SWEPT THE PLACE AND BENEATH LAY A MIGHTY MASS OF OLIVE GREEN THICK TALL WET AND WILLOWY +6930-81414-0009-2010: I AWOKE TO CONSCIOUSNESS FIGHTING AT FIRST IT SEEMED AS IF I WAS FIGHTING WITH A PHANTOM BUT GRADUALLY MY OPPONENT BECAME MORE REAL TO ME IT WAS (KAFFAR->KAFFIR) +237-126133-0001-615: EVERY CHANCE SHE COULD STEAL AFTER PRACTICE HOURS WERE OVER AND AFTER THE CLAMOROUS DEMANDS OF THE BOYS UPON HER TIME WERE FULLY SATISFIED WAS SEIZED TO FLY ON THE WINGS OF THE WIND TO THE FLOWERS +260-123286-0019-721: I SUPPOSE PROFESSOR LIEDENBROCK WAS OF MY OPINION TOO AND EVEN SHARED MY FEARS FOR AFTER HAVING EXAMINED THE PICK HIS EYES TRAVERSED THE OCEAN FROM SIDE TO SIDE +6829-68769-0000-1860: KENNETH AND BETH REFRAINED FROM TELLING THE OTHER GIRLS OR UNCLE JOHN OF OLD WILL ROGERS'S VISIT BUT THEY GOT MISTER WATSON IN THE LIBRARY AND QUESTIONED HIM CLOSELY ABOUT THE PENALTY FOR FORGING A CHECK +8230-279154-0028-2340: I CAN SET TO WORK NOW TO REMEMBER THINGS I NEVER REMEMBERED BEFORE SUCH AS WHAT I HAD TO EAT FOR BREAKFAST THIS MORNING AND IT CAN HARDLY BE (WHOLLY->HOLY) HABIT THAT ENABLES ME TO DO THIS +5105-28240-0008-1425: A NARROW CHANNEL FORMED A PASSAGE THROUGH THE RIDGE OF ROCKS THAT PROTECTED IT FROM THE OPEN SEA AND WHICH EVEN IN THE ROUGHEST WEATHER WOULD (ENSURE->INSURE) THE CALMNESS OF ITS WATERS +672-122797-0002-1787: HE DID NOT THINK OF THE WARM SUN AND OF THE FRESH AIR HE DID NOT CARE FOR THE LITTLE COTTAGE CHILDREN THAT RAN ABOUT AND PRATTLED WHEN THEY WERE IN THE WOODS LOOKING FOR WILD STRAWBERRIES +8555-284447-0021-2522: THEN THEY SPED IN GREAT HASTE FOR THE DOOR AND THE GOAT GAVE A FINAL BUTT THAT SENT THE ROW OF ROYAL LADIES ALL DIVING INTO THE CORRIDOR IN ANOTHER TANGLE WHEREUPON THEY SHRIEKED IN A MANNER THAT TERRIFIED (EVERYONE->EVERY ONE) WITHIN SOUND OF THEIR VOICES +8555-284447-0006-2507: IN ONE PLACE AT THE HEAD OF THE ROOM WAS A RAISED PLATFORM FOR THE ROYAL FAMILY WITH ELEGANT THRONE CHAIRS FOR THE KING AND QUEEN AND SIX SMALLER BUT RICHLY UPHOLSTERED CHAIRS FOR THE SNUBNOSED PRINCESSES +8455-210777-0030-2386: MISTER NEVERBEND BEGAN THE CAPTAIN AND I OBSERVED THAT UP TO THAT MOMENT HE HAD GENERALLY ADDRESSED ME AS PRESIDENT IT CANNOT BE DENIED THAT WE HAVE COME HERE ON AN UNPLEASANT MISSION +237-134500-0018-677: HE REACHED UP AMONG THE BRANCHES AND BEGAN TO PICK THE SWEET INSIPID FRUIT LONG IVORY (COLORED->COLOURED) BERRIES TIPPED WITH FAINT PINK LIKE WHITE CORAL THAT FALL TO THE GROUND UNHEEDED ALL SUMMER THROUGH +1284-1181-0013-258: OJO BECAME A BIT UNEASY AT THIS FOR HE HAD ALREADY PUT QUITE A LOT OF THE CLEVERNESS POWDER IN THE DISH BUT HE DARED NOT INTERFERE AND SO HE COMFORTED HIMSELF WITH THE THOUGHT THAT ONE CANNOT HAVE TOO MUCH CLEVERNESS +2961-961-0019-916: FOR AT THE PERIL OF HER OWN EXISTENCE AND WHEN THE OTHER (HELLENES->HELLENS) HAD DESERTED HER SHE REPELLED THE INVADER AND OF HER OWN ACCORD GAVE LIBERTY TO ALL THE NATIONS WITHIN THE PILLARS +5639-40744-0028-1592: I HAVE GREAT THINGS TO TELL YOU SENOR SAID DONA (ESTAFANIA->STEPHANIA) TO HER HUSBAND THE CREAM AND SUBSTANCE OF WHICH IS THIS THE FAINTING GIRL BEFORE YOU IS YOUR DAUGHTER AND (THAT->THE) BOY IS YOUR GRANDSON +260-123440-0004-767: ALICE TOOK UP THE FAN AND GLOVES AND AS THE HALL WAS VERY HOT SHE KEPT FANNING HERSELF ALL THE TIME SHE WENT ON TALKING DEAR DEAR HOW QUEER EVERYTHING IS TO DAY +4077-13754-0016-1112: AND SO THE STORY OF MORMONISM RUNS ON ITS FINALE HAS NOT YET BEEN WRITTEN THE CURRENT PRESS PRESENTS CONTINUOUSLY NEW STAGES OF ITS PROGRESS NEW DEVELOPMENTS OF ITS PLAN +4992-41806-0008-1396: OLIVE HAS ANOTHER LOVELY GIFT FOR THE YELLOW HOUSE SAID MOTHER CAREY RISING AND TO CARRY OUT THE NEXT PART OF THE PROGRAMME WE SHALL HAVE TO GO IN PROCESSION UPSTAIRS TO MY BEDROOM +5105-28240-0009-1426: SLIGHTLY CHANGING HER COURSE SHE FIRST STRUCK HER MAINSAIL AND IN ORDER TO FACILITATE THE MOVEMENTS OF HER HELMSMAN SOON CARRIED NOTHING BUT HER TWO TOPSAILS BRIGANTINE AND JIB +2961-961-0004-901: LISTEN THEN SOCRATES TO A TALE OF (SOLON'S->SOLONS) WHO BEING THE FRIEND OF (DROPIDAS->DROPIDUS) MY GREAT GRANDFATHER TOLD IT TO MY GRANDFATHER (CRITIAS->CRITIUS) AND HE TOLD ME +4970-29093-0011-1292: (O->OH) VERY WELL SAID (GRINGO->GRINGE) TURNING AWAY WITH A SHADE OF CONTEMPT YOU'LL FIND IF YOU ARE GOING INTO LITERATURE AND NEWSPAPER WORK THAT YOU CAN'T AFFORD A CONSCIENCE LIKE THAT +2300-131720-0014-586: MISTER EDISON WAS A LEADER FAR AHEAD OF THE TIME +4446-2275-0010-1185: ALEXANDER LEANED FORWARD AND WARMED HIS HANDS BEFORE THE BLAZE +1284-1180-0013-225: AND YOU MUST BE OJO THE UNLUCKY SHE ADDED +1284-1180-0014-226: OJO HAD NEVER EATEN SUCH A FINE MEAL IN ALL HIS LIFE +1580-141083-0041-375: LET US HEAR THE SUSPICIONS I WILL LOOK AFTER THE PROOFS +2830-3980-0028-825: THIS SHOULD GO FAR IN SHUTTING THE MOUTHS OF THE FALSE APOSTLES +1089-134686-0016-16: THEN YOU CAN ASK HIM QUESTIONS ON THE CATECHISM DEDALUS +2094-142345-0044-555: SAID MISTER IRWINE WITH HIS STATELY CORDIALITY +61-70968-0029-1710: THE SQUIRE HELPED TO THRUST THEM ALL IN AND ENTERED SWIFTLY HIMSELF +5142-33396-0023-1485: IT WAS SO DARK THAT I COULD SEE NOTHING BUT A FEW SPARKS ON THE HEARTH +1089-134691-0007-45: SOON THE WHOLE BRIDGE WAS TREMBLING AND RESOUNDING +672-122797-0060-1845: IT IS A VERY STUPID STORY +1580-141084-0018-406: I THINK SO YOU HAVE FORMED A CONCLUSION +4446-2273-0002-1140: LAMB WOULDN'T CARE A GREAT DEAL ABOUT MANY OF THEM I FANCY +1089-134686-0001-1: STUFF IT INTO YOU HIS BELLY COUNSELLED HIM +8555-284449-0009-2535: YOU ARE MATE REPLIED THE SAILOR +121-127105-0032-166: YES BUT THAT'S JUST THE BEAUTY OF HER PASSION +237-134500-0016-675: I DON'T KNOW ALL OF THEM BUT I KNOW LINDENS ARE +1221-135767-0008-195: COME THEREFORE AND LET US FLING MUD AT THEM +8455-210777-0029-2385: THINKING OF ALL THIS I WENT TO SLEEP +260-123440-0017-780: THAT WILL BE A QUEER THING TO BE SURE +2094-142345-0045-556: OH SIR DON'T MENTION IT SAID MISSUS POYSER +4992-23283-0006-1350: PERHAPS I AM MISTAKEN ANSWERED SHE +8463-294825-0018-2460: (METER->METRE) ROUGHLY ONE YARD THREE INCHES +4970-29095-0000-1305: SHE WAS TIRED OF OTHER THINGS +1580-141083-0011-345: A BROKEN TIP OF LEAD WAS LYING THERE ALSO +7021-85628-0025-2085: BUT HIS MOTHER HUGGED HIM CLOSE +61-70968-0044-1725: IT WILL NOT BE SAFE FOR YOU TO STAY HERE NOW +908-157963-0002-2565: WHY FADES THE LOTUS OF THE WATER +4446-2273-0017-1155: HOW JOLLY IT WAS BEING YOUNG HILDA +1089-134686-0030-30: BEWARE OF MAKING THAT MISTAKE +2094-142345-0060-571: OH I'VE NO DOUBT IT'S IN CAPITAL ORDER +121-127105-0017-151: IT WAS ALMOST THE TONE OF HOPE EVERYBODY WILL STAY +2830-3980-0058-855: MOHAMMED ALSO SPEAKS HIGHLY OF CHRIST +5639-40744-0011-1575: SHE FOUND THE DOOR BUT IT WAS LOCKED OUTSIDE +1188-133604-0042-106: THE SCENE IS ABSOLUTELY ARCADIAN +5683-32865-0014-1620: HE'S NOT A MAN FOR COUNTRY QUARTERS +6829-68771-0021-1935: BUT IT CAN'T BE PROTESTED THE GIRL +7021-79759-0001-2055: THAT IS COMPARATIVELY NOTHING +4970-29095-0015-1320: IF I CAN GET (PATIENTS->PATIENCE) +5683-32866-0026-1650: I BOLDLY LIGHTED MY (CHEROOT->CERUT) +1320-122617-0038-330: WILL THE HURONS HEAR HIS GROANS +7176-92135-0033-2220: RELAPSES INTO SILENCE FOR THE REST OF THE EVENING +61-70968-0059-1740: (IT WILL->IT'LL) BE NO DISAPPOINTMENT TO ME +4446-2275-0025-1200: WEREN'T YOU HAPPY THEN AT ALL +6829-68769-0045-1905: I'LL PAY ALL THE (COSTS->COST) BESIDES +5142-33396-0008-1470: I PAINTED THE EYES RED FOR ANGER +121-127105-0016-150: PROBABLY NOT TILL THE SECOND POST +1580-141084-0017-405: NO GOOD MY DEAR WATSON +2830-3980-0043-840: EXPERIENCE PROVES THIS +5683-32879-0025-1680: THANK YOU DORCAS DEAR +237-134500-0001-660: MARIE SIGHED +7127-75947-0012-2130: INDEED AH +2830-3979-0011-795: THE LORD WHO HAS GIVEN US POWER TO TEACH AND TO HEAR LET HIM ALSO GIVE US THE POWER TO SERVE AND TO DO LUKE TWO +3575-170457-0035-1005: I WISH IT WOULD RECUR AGAIN BUT IT WILL TAKE TWO OR THREE INTERVIEWS BEFORE THE STIFFNESS THE ESTRANGEMENT OF THIS LONG SEPARATION WILL WEAR AWAY +1580-141084-0048-436: IT WILL BE CLEAR TO YOU FROM WHAT I HAVE SAID THAT ONLY YOU COULD HAVE LET THIS YOUNG MAN OUT SINCE YOU WERE LEFT IN THE ROOM AND MUST HAVE LOCKED THE DOOR WHEN YOU WENT OUT +4970-29093-0009-1290: PHILIP THEREFORE READ DILIGENTLY IN THE ASTOR LIBRARY PLANNED LITERARY WORKS THAT SHOULD COMPEL ATTENTION AND NURSED HIS GENIUS +2830-3980-0029-826: ALTHOUGH THE BRETHREN WITH ME ARE NOT APOSTLES LIKE MYSELF YET THEY ARE ALL OF ONE MIND WITH ME THINK WRITE AND TEACH AS I DO +1995-1836-0014-480: FORTUNATELY SAID MISTER (VANDERPOOL->VANERPOLE) NORTHERNERS AND SOUTHERNERS ARE ARRIVING AT A BETTER MUTUAL UNDERSTANDING ON MOST OF THESE MATTERS +1995-1836-0000-466: THE (HON->HONOURABLE) CHARLES SMITH MISS SARAH'S BROTHER WAS WALKING SWIFTLY (UPTOWN->UP TOWN) FROM MISTER EASTERLY'S WALL STREET OFFICE AND HIS FACE WAS PALE +1995-1826-0011-450: HERE SHE WAS TEACHING DIRTY CHILDREN AND THE SMELL OF CONFUSED (ODORS->ODOURS) AND BODILY PERSPIRATION WAS TO HER AT TIMES UNBEARABLE +7176-92135-0018-2205: IN THE MODERN WELL CONSTRUCTED PLAY HE SIMPLY RINGS UP AN IMAGINARY CONFEDERATE AND TELLS HIM WHAT HE IS GOING TO DO COULD ANYTHING BE MORE NATURAL +237-126133-0002-616: THEN DEAR SAID MISSUS WHITNEY YOU MUST BE KINDER TO HER THAN EVER THINK WHAT IT WOULD BE FOR ONE OF YOU TO BE AWAY FROM HOME EVEN AMONG FRIENDS +3729-6852-0039-1066: HE HIMSELF RECITED THE SAME PASSAGE IN FRENCH AND POLITELY POINTED OUT THE PARTS IN WHICH HE THOUGHT THAT I HAD IMPROVED ON THE ORIGINAL +4992-41797-0015-1380: CYRIL THERE MUST BE SOME BETTER WAY OF DOING I JUST DRAW THE OUTLINE OF AN ANIMAL AND THEN I PUT HAIRS OR FEATHERS ON IT THEY HAVE NO BODIES +3575-170457-0020-990: I KNOW THE FIRST LETTER I WROTE TO YOU WAS ALL SENSELESS TRASH FROM BEGINNING TO END BUT I AM NOT ALTOGETHER THE IDLE DREAMING BEING IT WOULD SEEM TO DENOTE +2094-142345-0029-540: DID EVER ANYBODY SEE THE LIKE SCREAMED MISSUS POYSER RUNNING TOWARDS THE TABLE WHEN HER EYE HAD FALLEN ON THE BLUE STREAM +7729-102255-0002-2235: THAT SUMMER'S EMIGRATION HOWEVER BEING MAINLY FROM THE FREE STATES GREATLY CHANGED THE RELATIVE STRENGTH OF THE TWO PARTIES +3575-170457-0006-976: HER FEEBLE HEALTH GAVE HER HER YIELDING MANNER FOR SHE COULD NEVER OPPOSE ANY ONE WITHOUT GATHERING UP ALL HER STRENGTH FOR THE STRUGGLE +3729-6852-0023-1050: MY REMARK PLEASES HIM BUT I SOON PROVE TO HIM THAT IT IS NOT THE RIGHT WAY TO SPEAK HOWEVER PERFECT MAY HAVE BEEN THE LANGUAGE OF THAT ANCIENT WRITER +7127-75947-0027-2145: TO SAY NOTHING SAID MONTALAIS SO THAT WHEN MADEMOISELLE DE (TONNAY CHARENTE->TONAICHERANT) THINKS (ATHENAIS->ETHONAY) IS THE ONLY ONE WHO KNOWS IT +7021-85628-0010-2070: ON HUGE SILVER PLATTERS WERE PYRAMIDS OF TARTS AND CAKES AND RED WINE SPARKLED IN GLITTERING DECANTERS +8463-294828-0028-2490: I OPENED A LINE OF CREDIT SUFFICIENT TO COVER THE (BABIRUSA->BARBAROOSA) AND CONSEIL AT MY HEELS I JUMPED INTO A CARRIAGE +8463-287645-0003-2430: OF THIS PARTY EDWARD A BOY OF SEVENTEEN CALLED FORTH MUCH SYMPATHY HE TOO WAS CLAIMED BY (HOLLAN->HOLLAND) +1284-134647-0003-270: CONSTANTINE EASILY BELIEVED THAT THE HERETICS WHO PRESUMED TO DISPUTE HIS OPINIONS OR TO OPPOSE HIS COMMANDS WERE GUILTY OF THE MOST ABSURD AND CRIMINAL OBSTINACY AND THAT A SEASONABLE APPLICATION OF MODERATE SEVERITIES MIGHT SAVE THOSE UNHAPPY MEN FROM THE DANGER OF AN EVERLASTING CONDEMNATION +1188-133604-0026-90: HERE IS AN EQUALLY TYPICAL GREEK SCHOOL LANDSCAPE BY WILSON LOST WHOLLY IN GOLDEN MIST THE TREES SO SLIGHTLY DRAWN THAT YOU DON'T KNOW IF THEY ARE TREES OR TOWERS AND NO CARE FOR COLOR (WHATEVER->WHATSOEVER) PERFECTLY DECEPTIVE AND (MARVELOUS->MARVELLOUS) EFFECT OF SUNSHINE THROUGH THE MIST APOLLO AND THE PYTHON +4077-13751-0021-1095: IT WILL BE OBSERVED THAT AN ORGANIZED MOB AIDED BY MANY OF THE CIVIL AND MILITARY OFFICERS OF MISSOURI WITH GOVERNOR BOGGS AT THEIR HEAD HAVE BEEN THE PROMINENT ACTORS IN THIS BUSINESS INCITED TOO IT APPEARS AGAINST THE MORMONS BY POLITICAL HATRED AND BY THE ADDITIONAL MOTIVES OF PLUNDER AND REVENGE +5105-28240-0023-1440: A SLIGHT DIMINUTION IN THE EXCESSIVELY HIGH TEMPERATURE WHICH HAD PREVAILED FOR THE LAST FEW WEEKS WAS THE ONLY APPARENT CHANGE IN THE GENERAL ORDER OF THINGS BUT WHETHER THIS WAS TO BE ATTRIBUTED TO ANY ALTERATION IN THE EARTH'S ORBIT WAS A QUESTION WHICH WOULD STILL REQUIRE SEVERAL DAYS TO DECIDE +5639-40744-0041-1605: NOR WAS (RODOLFO->RUDOLPHO) LESS SURPRISED THAN THEY AND THE BETTER TO ASSURE HIMSELF OF SO WONDERFUL A FACT HE BEGGED (LEOCADIA->LOCADIA) TO GIVE HIM SOME TOKEN WHICH SHOULD MAKE PERFECTLY CLEAR TO HIM THAT WHICH INDEED HE DID NOT DOUBT SINCE IT WAS AUTHENTICATED BY HIS PARENTS +6930-75918-0014-1965: BUT CONTINUED RAOUL NOT INTERRUPTED BY THIS MOVEMENT OF HIS FRIEND HEAVEN BE PRAISED THE FRENCH WHO ARE PRONOUNCED TO BE THOUGHTLESS AND INDISCREET RECKLESS EVEN ARE CAPABLE OF BRINGING A CALM AND SOUND JUDGMENT TO BEAR ON MATTERS OF SUCH HIGH IMPORTANCE +3570-5696-0001-960: BUT THE ACTUAL COURSE OF DEVELOPMENT HAS BEEN SOMEWHAT DIFFERENT FROM THIS IDEAL SCHEME LEISURE HELD THE FIRST PLACE AT THE START AND CAME TO HOLD A RANK VERY MUCH ABOVE WASTEFUL CONSUMPTION OF GOODS BOTH AS A DIRECT EXPONENT OF WEALTH AND AS AN ELEMENT IN THE STANDARD OF DECENCY DURING THE (QUASI->QUARSAI) PEACEABLE CULTURE +2830-3980-0073-870: THIS ATTITUDE SPRINGS FROM A FALSE CONCEPTION OF SIN THE CONCEPTION THAT SIN IS A SMALL MATTER EASILY TAKEN CARE OF BY GOOD WORKS THAT WE MUST PRESENT OURSELVES UNTO GOD WITH A GOOD CONSCIENCE THAT WE MUST FEEL NO SIN BEFORE WE MAY FEEL THAT CHRIST WAS GIVEN FOR OUR SINS +1221-135767-0023-210: THERE WERE A FEW ROSE BUSHES HOWEVER AND A NUMBER OF APPLE TREES PROBABLY THE DESCENDANTS OF THOSE PLANTED BY THE REVEREND MISTER BLACKSTONE THE FIRST SETTLER OF THE PENINSULA THAT HALF MYTHOLOGICAL PERSONAGE WHO RIDES THROUGH OUR EARLY ANNALS SEATED ON THE BACK OF A BULL +1995-1826-0026-465: NOW FOR ONE LITTLE HALF HOUR SHE HAD BEEN A WOMAN TALKING TO A BOY NO NOT EVEN THAT SHE HAD BEEN TALKING JUST TALKING THERE WERE NO PERSONS IN THE CONVERSATION JUST THINGS ONE THING COTTON +4507-16021-0039-1260: ONE PERCEIVES WITHOUT UNDERSTANDING IT A HIDEOUS MURMUR SOUNDING ALMOST LIKE HUMAN ACCENTS BUT MORE NEARLY RESEMBLING A HOWL THAN AN ARTICULATE WORD +1188-133604-0011-75: THEY ARE BEYOND ALL OTHER WORKS THAT I KNOW EXISTING DEPENDENT FOR THEIR EFFECT ON LOW SUBDUED TONES THEIR FAVORITE CHOICE IN TIME OF DAY BEING EITHER DAWN OR TWILIGHT AND EVEN THEIR BRIGHTEST SUNSETS PRODUCED CHIEFLY OUT OF (GRAY->GREY) PAPER +7729-102255-0017-2250: SHERIFF JONES HAD HIS POCKETS ALWAYS FULL OF WRITS ISSUED IN THE SPIRIT OF PERSECUTION BUT WAS OFTEN BAFFLED BY THE SHARP WITS AND READY RESOURCES OF THE FREE STATE PEOPLE AND SOMETIMES DEFIED OUTRIGHT +1089-134691-0008-46: THE UNCOUTH FACES PASSED HIM TWO BY TWO STAINED YELLOW OR RED OR LIVID BY THE SEA AND AS HE STROVE TO LOOK AT THEM WITH EASE AND INDIFFERENCE A FAINT STAIN OF PERSONAL SHAME AND COMMISERATION ROSE TO HIS OWN FACE +260-123440-0002-765: IT WAS THE WHITE RABBIT RETURNING SPLENDIDLY DRESSED WITH A PAIR OF WHITE KID GLOVES IN ONE HAND AND A LARGE FAN IN THE OTHER HE CAME TROTTING ALONG IN A GREAT HURRY MUTTERING TO HIMSELF AS HE CAME OH THE DUCHESS THE DUCHESS +1188-133604-0012-76: IT MAY BE THAT A GREAT COLORIST WILL USE HIS UTMOST FORCE OF COLOR AS A SINGER HIS FULL POWER OF VOICE BUT LOUD OR LOW THE VIRTUE IS IN BOTH CASES ALWAYS IN REFINEMENT NEVER IN LOUDNESS +8555-292519-0003-2550: IN A SUNSET GLOWING OF CRIMSON AND GOLD SHE LIES THE GLORY OF THE WORLD A BEACHED KING'S GALLEY WHOSE SAILS ARE FURLED WHO IS HUNG WITH TAPESTRIES RICH AND OLD +8230-279154-0013-2325: IN AN IMAGE OF A WELL KNOWN FACE FOR EXAMPLE SOME PARTS MAY FEEL MORE FAMILIAR THAN OTHERS WHEN THIS HAPPENS WE HAVE MORE BELIEF IN THE ACCURACY OF THE FAMILIAR PARTS THAN IN THAT OF THE UNFAMILIAR PARTS +5639-40744-0026-1590: (LUIS->LEWIS) WAS OUT OF DANGER IN A FORTNIGHT IN A MONTH HE ROSE FROM HIS BED AND DURING ALL THAT TIME HE WAS VISITED DAILY BY HIS MOTHER AND GRANDMOTHER AND TREATED BY THE MASTER AND MISTRESS OF THE HOUSE AS IF HE WAS THEIR OWN CHILD +672-122797-0045-1830: TIME ENOUGH HAD HE TOO FOR HIS REFLECTIONS FOR DAYS AND NIGHTS PASSED ON AND NOBODY CAME UP AND WHEN AT LAST SOMEBODY DID COME IT WAS ONLY TO PUT SOME GREAT TRUNKS IN A CORNER OUT OF THE WAY +2300-131720-0013-585: UNLESS HE COULD SECURE AN ENGINE OF SMOOTHER RUNNING AND MORE EXACTLY (GOVERNED->GOVERN) AND REGULATED THAN THOSE AVAILABLE FOR HIS DYNAMO AND LAMP EDISON REALIZED THAT HE WOULD FIND IT ALMOST IMPOSSIBLE TO GIVE A STEADY LIGHT +3575-170457-0008-978: AND SO LIFE AND DEATH HAVE DISPERSED THE CIRCLE OF VIOLENT RADICALS AND DISSENTERS INTO WHICH TWENTY YEARS AGO THE LITTLE QUIET RESOLUTE CLERGYMAN'S DAUGHTER WAS RECEIVED AND BY WHOM SHE WAS TRULY LOVED AND HONOURED +6829-68771-0009-1923: LOUISE HOPED FOR EXCELLENT RESULTS FROM THIS ORGANIZATION AND WISHED THE ENTERTAINMENT TO BE SO EFFECTIVE IN WINNING THEIR GOOD WILL THAT THEY WOULD WORK EARNESTLY FOR THE CAUSE IN WHICH THEY WERE ENLISTED +2300-131720-0002-574: THERE MESSRS JOHNSON AND HAMMER PUT INTO PRACTICE MANY OF THE IDEAS NOW STANDARD IN THE ART AND SECURED MUCH USEFUL DATA FOR THE WORK IN NEW YORK OF WHICH THE STORY HAS JUST BEEN TOLD +8224-274381-0003-2283: THE KING'S EARS WERE NOW OPEN TO MONTROSE'S COUNSELS WHO PROPOSED NONE BUT THE BOLDEST AND MOST DARING AGREEABLY TO THE DESPERATE STATE OF THE ROYAL CAUSE IN SCOTLAND +2094-142345-0033-544: WHEN SHE HAD THAT BAD ILLNESS AND I WAS ONLY ELEVEN YEARS OLD SHE USED TO SAY YOU'LL HAVE A FRIEND ON EARTH (IN->AND) YOUR AUNT RACHEL IF I'M TAKEN FROM YOU FOR SHE HAS A KIND HEART AND I'M SURE I'VE FOUND IT SO +1995-1837-0018-499: HERE LAY THE READING OF THE RIDDLE WITH INFINITE WORK AND (PAIN->PAINS) SOME ONE HAD DUG A CANAL FROM THE LAGOON TO THE CREEK INTO WHICH THE FORMER HAD DRAINED BY A LONG AND CROOKED WAY THUS ALLOWING IT TO EMPTY DIRECTLY +8230-279154-0031-2343: THEY GIVE LAWS ACCORDING TO WHICH IMAGES OF PAST OCCURRENCES COME INTO OUR MINDS BUT DO NOT DISCUSS OUR BELIEF THAT THESE IMAGES REFER TO PAST OCCURRENCES WHICH IS WHAT CONSTITUTES KNOWLEDGE (*->OF) MEMORY +4077-13751-0009-1083: THE FIRST WELL ESTABLISHED SEAT OF THE CHURCH WAS IN THE PRETTY LITTLE TOWN OF (KIRTLAND->PORTLAND) OHIO ALMOST WITHIN SIGHT OF LAKE ERIE AND HERE SOON ROSE THE FIRST TEMPLE OF MODERN TIMES +4970-29095-0018-1323: RUTH ASKED THE ENTHUSIASTS IF THEY WOULD LIKE TO LIVE IN SUCH A SOUNDING MAUSOLEUM WITH ITS GREAT HALLS AND ECHOING ROOMS AND NO COMFORTABLE PLACE IN IT FOR THE ACCOMMODATION OF (ANY BODY->ANYBODY) +7021-85628-0013-2073: THE PRINCESS CERTAINLY WAS BEAUTIFUL AND HE WOULD HAVE DEARLY LIKED TO BE KISSED BY HER BUT THE CAP WHICH HIS MOTHER HAD MADE HE WOULD NOT GIVE UP ON ANY CONDITION +4970-29095-0033-1338: WHAT A BOX WOMEN ARE PUT INTO MEASURED FOR IT AND PUT IN YOUNG IF WE GO ANYWHERE IT'S IN A BOX (VEILED->BALED) AND PINIONED AND SHUT IN BY DISABILITIES +3575-170457-0053-1023: BUT ANNE HAD BEGUN TO SUFFER JUST BEFORE THE HOLIDAYS AND CHARLOTTE WATCHED OVER HER YOUNGER SISTERS WITH THE JEALOUS VIGILANCE OF SOME WILD CREATURE THAT CHANGES HER VERY NATURE IF DANGER THREATENS HER YOUNG +3575-170457-0038-1008: AND MEANTIME I KNOW THE GREATNESS OF JEHOVAH I ACKNOWLEDGE THE PERFECTION OF HIS WORD I ADORE THE PURITY OF THE CHRISTIAN FAITH MY THEORY IS RIGHT MY PRACTICE HORRIBLY WRONG +5683-32879-0013-1668: CHELFORD HAD A NOTE FROM MISTER WYLDER THIS MORNING ANOTHER NOTE HIS COMING DELAYED AND SOMETHING OF HIS HAVING TO SEE SOME PERSON WHO (IS->WAS) ABROAD CONTINUED DORCAS AFTER A LITTLE PAUSE +6829-68769-0001-1861: IT WAS A SERIOUS CRIME INDEED MISTER WATSON TOLD THEM AND TOM GATES BADE FAIR TO SERVE A LENGTHY TERM IN (STATE'S->THE STATES) PRISON AS A CONSEQUENCE OF HIS RASH ACT +7021-79740-0002-2041: NOW DELIA CONTRIVED TO OBTAIN A GREAT INFLUENCE AND (ASCENDENCY->ASCENDANCY) OVER THE MINDS OF THE CHILDREN BY MEANS OF THESE DOLLS +4507-16021-0025-1246: TO MEET THE NEEDS OF THIS CONFLICT WRETCHEDNESS HAS INVENTED (A->THE) LANGUAGE OF COMBAT WHICH IS SLANG +672-122797-0016-1801: YES THEN SOMETHING BETTER SOMETHING STILL GRANDER WILL SURELY FOLLOW OR WHEREFORE SHOULD THEY THUS ORNAMENT ME +5683-32879-0011-1666: SHE SPOKE WITH A SUDDEN ENERGY WHICH PARTOOK OF FEAR AND PASSION AND FLUSHED HER THIN CHEEK AND MADE HER LANGUID EYES FLASH +1580-141084-0004-392: OF COURSE HE DID NOT REALIZE THAT IT WAS I WHO WAS KNOCKING BUT NONE THE LESS HIS CONDUCT WAS VERY UNCOURTEOUS AND INDEED UNDER THE CIRCUMSTANCES RATHER SUSPICIOUS +7021-85628-0011-2071: THE PRINCESS SAT DOWN UNDER A BLUE CANOPY WITH BOUQUETS OF ROSES AND SHE LET (ANDERS->ANDER) SIT IN A GOLDEN CHAIR BY HER SIDE +1089-134686-0017-17: STEPHEN LEANING BACK AND DRAWING IDLY ON HIS SCRIBBLER LISTENED TO THE TALK ABOUT HIM WHICH HERON CHECKED FROM TIME TO TIME BY SAYING +1995-1826-0013-452: SO FOR THE HUNDREDTH TIME SHE WAS THINKING (TODAY->TO DAY) AS SHE WALKED ALONE UP THE LANE BACK OF THE BARN AND THEN SLOWLY DOWN THROUGH THE BOTTOMS +8455-210777-0000-2356: I REMAINED THERE ALONE FOR MANY HOURS BUT I MUST ACKNOWLEDGE THAT BEFORE I LEFT THE CHAMBERS I HAD GRADUALLY BROUGHT MYSELF TO LOOK AT THE MATTER IN ANOTHER LIGHT +8455-210777-0015-2371: I AND MY WIFE AND SON AND THE TWO (CRASWELLERS->CRESSWELLERS) AND THREE OR FOUR OTHERS AGREED TO DINE ON BOARD THE SHIP ON THE NEXT +5639-40744-0012-1576: SHE SUCCEEDED IN OPENING THE WINDOW AND THE MOONLIGHT SHONE IN SO BRIGHTLY THAT SHE COULD DISTINGUISH THE (COLOUR->COLOR) OF SOME DAMASK (HANGINGS->HANGING) IN THE ROOM +7127-75946-0013-2101: THE KING HAD COMPLETED HIS (TOILETTE->TOILET) BY NINE O'CLOCK HE APPEARED IN AN OPEN CARRIAGE DECORATED WITH BRANCHES OF TREES AND FLOWERS +8230-279154-0029-2341: THE FACT THAT A MAN CAN RECITE A POEM DOES NOT SHOW THAT HE REMEMBERS ANY PREVIOUS OCCASION ON WHICH HE HAS RECITED OR READ IT +1284-1181-0012-257: NO ONE SAW HIM DO THIS FOR ALL WERE LOOKING AT THE POWDER OF LIFE BUT SOON THE WOMAN REMEMBERED WHAT SHE HAD BEEN DOING AND CAME BACK TO THE CUPBOARD +5683-32866-0012-1636: NOW THAT'S IMPOSSIBLE RADIE FOR I REALLY DON'T THINK I ONCE THOUGHT OF HIM ALL THIS EVENING EXCEPT JUST WHILE WE WERE TALKING +5105-28240-0024-1441: DOUBTS NOW AROSE AND SOME DISCUSSION FOLLOWED WHETHER OR NOT IT WAS DESIRABLE FOR BEN ZOOF TO ACCOMPANY HIS MASTER +1221-135767-0010-197: SHE SCREAMED AND SHOUTED TOO WITH A TERRIFIC VOLUME OF SOUND WHICH DOUBTLESS CAUSED THE HEARTS OF THE FUGITIVES TO QUAKE WITHIN THEM +1284-1180-0000-212: HE WORE BLUE SILK STOCKINGS BLUE KNEE PANTS WITH GOLD BUCKLES A BLUE RUFFLED WAIST AND A JACKET OF BRIGHT BLUE BRAIDED WITH GOLD +1320-122617-0040-332: HE HAD NO OCCASION TO DELAY FOR AT THE NEXT INSTANT A BURST OF CRIES FILLED THE OUTER AIR AND RAN ALONG THE WHOLE EXTENT OF THE VILLAGE +8230-279154-0014-2326: I COME NOW TO THE OTHER CHARACTERISTIC WHICH MEMORY IMAGES MUST HAVE IN ORDER TO ACCOUNT FOR OUR KNOWLEDGE OF THE PAST +1284-1180-0030-242: WHEN I FOUND IT I SAID TO MYSELF THAT IT WOULD DO NICELY FOR MY SERVANT GIRL FOR WHEN SHE WAS BROUGHT TO LIFE SHE WOULD NOT BE PROUD NOR HAUGHTY AS THE GLASS CAT IS FOR SUCH A DREADFUL MIXTURE OF COLORS WOULD DISCOURAGE HER FROM TRYING TO BE AS DIGNIFIED AS THE BLUE MUNCHKINS ARE +2094-142345-0030-541: TOTTY HOWEVER HAD DESCENDED FROM HER CHAIR WITH GREAT SWIFTNESS AND WAS ALREADY IN RETREAT TOWARDS THE DAIRY WITH A SORT OF WADDLING RUN AND AN AMOUNT OF FAT ON THE NAPE OF HER NECK WHICH MADE HER LOOK LIKE THE METAMORPHOSIS OF A WHITE (SUCKLING->SUCKING) PIG +7127-75946-0028-2116: BY DEGREES ALL HIS HAPPINESS ALL HIS BRILLIANCY SUBSIDED INTO REGRET AND UNEASINESS SO THAT HIS LIMBS LOST THEIR POWER HIS ARMS HUNG HEAVILY BY HIS SIDES AND HIS HEAD DROOPED AS THOUGH HE WAS STUPEFIED +4507-16021-0010-1231: WE HAVE ALWAYS THOUGHT THAT IT WAS SOMETIMES A COURAGEOUS ACT AND AT LEAST A SIMPLE AND USEFUL DEED WORTHY OF THE SYMPATHETIC ATTENTION WHICH DUTY ACCEPTED (AND->IN) FULFILLED MERITS +672-122797-0001-1786: THE PLACE HE HAD WAS A VERY GOOD ONE THE SUN SHONE ON HIM AS TO FRESH AIR THERE WAS ENOUGH OF THAT AND ROUND HIM GREW MANY LARGE SIZED COMRADES PINES AS WELL AS FIRS +1221-135766-0010-181: IT WAS A LOOK SO INTELLIGENT YET INEXPLICABLE PERVERSE SOMETIMES SO MALICIOUS BUT GENERALLY ACCOMPANIED BY A WILD FLOW OF SPIRITS THAT HESTER COULD NOT HELP QUESTIONING AT SUCH MOMENTS WHETHER PEARL WAS A HUMAN CHILD +4970-29095-0031-1336: HE DOESN'T SAY EXACTLY WHAT IT IS SAID RUTH A LITTLE DUBIOUSLY BUT IT'S SOMETHING ABOUT LAND AND RAILROADS AND THEE KNOWS FATHER THAT FORTUNES ARE MADE NOBODY KNOWS EXACTLY HOW IN A NEW COUNTRY +2094-142345-0015-526: TO ALL APPEARANCE MOLLY HAD GOT THROUGH HER AFTER DINNER WORK IN AN EXEMPLARY MANNER HAD CLEANED HERSELF WITH GREAT DISPATCH AND NOW CAME TO ASK SUBMISSIVELY IF SHE SHOULD SIT DOWN TO HER SPINNING TILL MILKING TIME +4970-29093-0010-1291: HE HAD NO FRIEND WISE ENOUGH TO TELL HIM TO STEP INTO THE DORKING CONVENTION THEN IN SESSION MAKE A SKETCH OF THE MEN AND WOMEN ON THE PLATFORM AND TAKE IT TO THE EDITOR OF THE DAILY (GRAPEVINE->GRAPE VINE) AND SEE WHAT HE COULD GET A LINE FOR IT +6930-75918-0001-1952: THE ENGLISH FORWARDED TO THE FRENCH BASKETS OF FLOWERS OF WHICH THEY HAD MADE A PLENTIFUL PROVISION TO GREET THE ARRIVAL OF THE YOUNG PRINCESS THE FRENCH IN RETURN INVITED THE ENGLISH TO A SUPPER WHICH WAS TO BE GIVEN THE NEXT DAY +3575-170457-0037-1007: IF CHRISTIAN PERFECTION BE NECESSARY TO SALVATION I SHALL NEVER BE SAVED MY HEART IS A VERY (HOTBED->HOT BED) FOR SINFUL THOUGHTS AND WHEN I DECIDE ON AN ACTION I SCARCELY REMEMBER TO LOOK TO MY REDEEMER FOR DIRECTION +4970-29095-0001-1306: SHE TRIED THIS MORNING AN AIR OR TWO UPON THE PIANO SANG A SIMPLE SONG IN A SWEET BUT SLIGHTLY METALLIC VOICE AND THEN SEATING HERSELF BY THE OPEN WINDOW READ PHILIP'S LETTER +4992-41806-0007-1395: OLIVE HAD PAINTED THE MOTTO ON A LONG NARROW PANEL OF CANVAS AND GIVING IT TO MISTER POPHAM STOOD BY THE FIRESIDE WHILE HE DEFTLY FITTED IT INTO THE PLACE PREPARED FOR IT +908-31957-0001-2595: I SIT BENEATH THY LOOKS AS CHILDREN DO IN THE NOON SUN WITH SOULS THAT TREMBLE THROUGH THEIR HAPPY EYELIDS FROM AN UNAVERRED YET PRODIGAL INWARD JOY +4507-16021-0009-1230: WHEN IT IS A QUESTION OF PROBING A WOUND A GULF A SOCIETY SINCE WHEN HAS IT BEEN CONSIDERED WRONG TO GO TOO FAR TO GO TO THE BOTTOM +1188-133604-0027-91: NOW HERE IS (RAPHAEL->RAFAELLE) EXACTLY BETWEEN THE TWO TREES STILL DRAWN LEAF BY LEAF WHOLLY FORMAL BUT BEAUTIFUL MIST COMING GRADUALLY INTO THE DISTANCE +4077-13754-0014-1110: THE PEOPLE CONTESTED THESE MEASURES ONE BY ONE IN THE COURTS PRESENTING IN CASE AFTER CASE THE DIFFERENT PHASES OF THE SUBJECT AND URGING THE UNCONSTITUTIONALITY OF THE MEASURE +237-134493-0005-645: HE WAS A SPLENDID FIGURE OF A BOY TALL AND STRAIGHT AS A YOUNG PINE TREE WITH A HANDSOME HEAD AND STORMY GRAY EYES DEEPLY SET UNDER A SERIOUS BROW +121-127105-0031-165: SHE WAS YOUNG UNTRIED NERVOUS IT WAS A VISION OF SERIOUS DUTIES (AND->IN) LITTLE COMPANY OF REALLY GREAT LONELINESS +237-134500-0031-690: SOMETIMES I DON'T WANT TO DO ANYTHING AT ALL AND SOMETIMES I WANT TO PULL THE FOUR CORNERS OF THE DIVIDE TOGETHER HE THREW OUT HIS ARM AND BROUGHT IT BACK WITH A JERK SO LIKE A TABLE CLOTH +1089-134686-0000-0: HE HOPED THERE WOULD BE STEW FOR DINNER TURNIPS AND CARROTS AND BRUISED POTATOES AND FAT MUTTON PIECES TO BE LADLED OUT IN THICK PEPPERED FLOUR FATTENED SAUCE +8555-284447-0019-2520: BEFORE ANY COULD STOP HIM HE BUTTED HIS MAJESTY SO FURIOUSLY THAT THE KING SOARED FAR INTO THE AIR AND TUMBLED IN A HEAP AMONG THE BENCHES WHERE HE LAY MOANING AND GROANING +1320-122612-0010-285: SEE SAID UNCAS POINTING NORTH AND SOUTH AT THE EVIDENT MARKS OF THE BROAD TRAIL ON EITHER SIDE OF HIM THE DARK HAIR HAS GONE TOWARD THE FOREST +1221-135766-0009-180: AS TO ANY OTHER KIND OF DISCIPLINE WHETHER ADDRESSED TO HER MIND OR HEART LITTLE PEARL MIGHT OR MIGHT NOT BE WITHIN ITS REACH IN ACCORDANCE WITH THE CAPRICE THAT RULED THE MOMENT +1188-133604-0041-105: IT IS A GLEANER BRINGING DOWN HER ONE SHEAF OF CORN TO AN OLD (WATERMILL->WATER MILL) ITSELF MOSSY AND RENT SCARCELY ABLE TO GET ITS STONES TO TURN +2094-142345-0059-570: THIS MISSUS POYSER SAID BLUSHING AND BELIEVING THAT THE CAPTAIN WAS REALLY INTERESTED IN HER MILK PANS AND WOULD ADJUST HIS OPINION OF HER TO THE APPEARANCE OF HER DAIRY +8463-294825-0003-2445: NEMO BUILDS A FABULOUS FUTURISTIC SUBMARINE THE NAUTILUS THEN CONDUCTS AN UNDERWATER CAMPAIGN OF VENGEANCE AGAINST HIS IMPERIALIST OPPRESSOR +7127-75946-0012-2100: THE NEWS CIRCULATED WITH THE RAPIDITY OF LIGHTNING DURING ITS PROGRESS IT KINDLED EVERY VARIETY OF COQUETRY DESIRE AND WILD AMBITION +7127-75946-0027-2115: DISDAINFUL OF A SUCCESS OF WHICH MADAME SHOWED NO (ACKNOWLEDGEMENT->ACKNOWLEDGMENT) HE THOUGHT OF NOTHING BUT BOLDLY REGAINING THE MARKED PREFERENCE OF THE PRINCESS +3575-170457-0036-1006: MY EYES FILL WITH TEARS WHEN I CONTRAST THE BLISS OF SUCH A STATE BRIGHTENED BY HOPES OF THE FUTURE WITH THE MELANCHOLY STATE I NOW LIVE IN UNCERTAIN THAT I EVER FELT TRUE CONTRITION WANDERING IN THOUGHT AND DEED LONGING FOR HOLINESS WHICH I SHALL NEVER NEVER OBTAIN SMITTEN AT TIMES TO THE HEART WITH THE CONVICTION THAT GHASTLY CALVINISTIC DOCTRINES ARE TRUE DARKENED IN SHORT BY THE VERY SHADOWS OF SPIRITUAL DEATH +2300-131720-0028-600: THERE WAS INFINITE (SCEPTICISM->SKEPTICISM) AROUND HIM ON THE SUBJECT AND WHILE OTHER INVENTORS WERE ALSO GIVING THE SUBJECT THEIR THOUGHT THE PUBLIC TOOK IT FOR GRANTED THAT ANYTHING SO UTTERLY INTANGIBLE AS ELECTRICITY THAT COULD NOT BE SEEN OR WEIGHED AND ONLY GAVE SECONDARY EVIDENCE OF ITSELF AT THE EXACT POINT OF USE COULD NOT BE BROUGHT TO ACCURATE REGISTRATION +5142-36377-0014-1545: A PRETTY GIRL AND SO FAR AS I COULD JUDGE BY APPEARANCES A GOOD GIRL TOO DESCRIBING HER GENERALLY I MAY SAY THAT SHE HAD A SMALL HEAD WELL CARRIED AND WELL SET ON HER SHOULDERS BRIGHT GRAY EYES THAT LOOKED AT YOU HONESTLY AND MEANT WHAT THEY LOOKED A TRIM SLIGHT LITTLE FIGURE TOO SLIGHT FOR OUR ENGLISH NOTIONS OF BEAUTY A STRONG AMERICAN ACCENT AND A RARE THING IN AMERICA A PLEASANTLY TONED VOICE WHICH MADE THE ACCENT AGREEABLE TO ENGLISH EARS +8230-279154-0043-2355: OF COURSE THE WORDS THIS HAS EXISTED BEFORE ARE A VERY INADEQUATE TRANSLATION OF WHAT ACTUALLY HAPPENS WHEN WE FORM A JUDGMENT OF RECOGNITION BUT THAT IS UNAVOIDABLE WORDS ARE FRAMED TO EXPRESS A LEVEL OF THOUGHT WHICH IS BY NO MEANS PRIMITIVE AND ARE QUITE INCAPABLE OF EXPRESSING SUCH AN ELEMENTARY OCCURRENCE AS RECOGNITION +3729-6852-0008-1035: SHE WAS (HONOURABLY->HONOROURABLY) BURIED IN THE CHURCH OF SAINT (SAUVEUR->SOUVERT) WITHOUT THE SLIGHTEST OPPOSITION FROM THE VENERABLE PRIEST WHO FAR FROM SHARING THE ANTI (CHRISTAIN->CHRISTIAN) INTOLERANCY OF THE CLERGY IN GENERAL SAID THAT HER PROFESSION AS AN ACTRESS HAD NOT HINDERED HER FROM BEING A GOOD CHRISTIAN AND THAT THE EARTH WAS (THE->A) COMMON MOTHER OF ALL HUMAN BEINGS AS JESUS CHRIST HAD BEEN THE SAVIOUR OF ALL MANKIND +2094-142345-0000-511: IT IS A VERY FINE OLD PLACE OF RED BRICK SOFTENED BY A PALE POWDERY LICHEN WHICH HAS DISPERSED ITSELF WITH HAPPY IRREGULARITY SO AS TO BRING THE RED BRICK INTO TERMS OF FRIENDLY COMPANIONSHIP WITH THE LIMESTONE ORNAMENTS SURROUNDING THE THREE GABLES THE WINDOWS AND THE DOOR PLACE +2961-960-0011-885: WITH (HERACLEITUS->HERACLITUS) HE ACKNOWLEDGES THE PERPETUAL FLUX LIKE (ANAXAGORAS->ANXAGARIS) HE ASSERTS THE PREDOMINANCE OF MIND ALTHOUGH ADMITTING AN ELEMENT OF NECESSITY WHICH REASON IS INCAPABLE OF SUBDUING LIKE THE PYTHAGOREANS HE SUPPOSES THE MYSTERY OF THE WORLD TO BE CONTAINED IN NUMBER +4446-2273-0032-1170: HE STOOD A LITTLE BEHIND HER AND TRIED TO STEADY HIMSELF AS HE SAID IT'S SOFT AND MISTY SEE HOW WHITE THE STARS ARE +1320-122617-0023-315: UNCAS WHO HAD ALREADY APPROACHED THE DOOR IN READINESS TO LEAD THE WAY NOW RECOILED AND PLACED HIMSELF ONCE MORE IN THE BOTTOM OF THE LODGE +3570-5695-0002-945: BUT AS WE DESCEND THE SOCIAL SCALE THE POINT IS PRESENTLY REACHED WHERE THE DUTIES OF VICARIOUS LEISURE AND CONSUMPTION DEVOLVE UPON THE WIFE ALONE +5683-32879-0010-1665: POOR RACHEL HER NATURE RECOILED FROM DECEIT AND SHE TOLD AT ALL EVENTS AS MUCH OF THE TRUTH AS SHE DARED +1284-1181-0011-256: VERY CAREFULLY THE MAGICIAN REMOVED THIS POWDER PLACING IT (ALL TOGETHER->ALTOGETHER) IN A GOLDEN DISH WHERE HE MIXED IT WITH A GOLDEN SPOON +1089-134691-0023-61: IT WAS A PAIN TO SEE THEM AND A SWORD LIKE PAIN TO SEE THE SIGNS OF ADOLESCENCE THAT MADE REPELLENT THEIR PITIABLE NAKEDNESS +1320-122617-0009-301: I GREATLY MOURN THAT ONE SO WELL DISPOSED SHOULD DIE IN HIS IGNORANCE AND I HAVE SOUGHT A GOODLY HYMN CAN YOU LEAD ME TO HIM +1320-122617-0024-316: BUT HAWKEYE WHO WAS TOO MUCH OCCUPIED WITH HIS OWN THOUGHTS TO NOTE THE MOVEMENT CONTINUED SPEAKING MORE TO HIMSELF THAN TO HIS COMPANION +3570-5696-0002-961: (OTHER->ARE THE) CIRCUMSTANCES PERMITTING THAT INSTINCT DISPOSES MEN TO LOOK WITH FAVOR UPON PRODUCTIVE EFFICIENCY AND ON WHATEVER IS OF HUMAN USE +121-127105-0002-136: CRIED ONE OF THE WOMEN HE TOOK NO NOTICE OF HER HE LOOKED AT ME BUT AS IF INSTEAD OF ME HE SAW WHAT HE SPOKE OF +61-70968-0014-1695: WHAT IS THE TUMULT AND RIOTING CRIED OUT THE SQUIRE AUTHORITATIVELY AND HE BLEW TWICE ON (A->THE) SILVER WHISTLE WHICH HUNG AT HIS BELT +260-123286-0003-705: YOU SEEM ANXIOUS MY UNCLE I SAID SEEING HIM CONTINUALLY WITH HIS GLASS TO HIS EYE ANXIOUS +5683-32866-0011-1635: THEIR WALK CONTINUED SILENT FOR THE GREATER PART NEITHER WAS QUITE SATISFIED WITH THE OTHER BUT RACHEL AT LAST SAID +3575-170457-0005-975: SHE A TORY AND CLERGYMAN'S DAUGHTER WAS ALWAYS IN A MINORITY OF ONE IN OUR HOUSE OF VIOLENT DISSENT AND RADICALISM +260-123288-0002-736: THE ATMOSPHERE IS CHARGED WITH (VAPOURS->VAPORS) PERVADED WITH THE ELECTRICITY GENERATED BY THE EVAPORATION OF SALINE WATERS +8463-294828-0013-2475: HE WAS A FANATIC ON FORMALITY AND HE ONLY ADDRESSED ME IN THE THIRD PERSON TO THE POINT WHERE IT GOT TIRESOME +8455-210777-0044-2400: I WAS TO BE TAKEN AWAY AND CARRIED TO ENGLAND OR ELSEWHERE OR DROWNED UPON THE VOYAGE IT MATTERED NOT WHICH +1580-141083-0012-346: NOT ONLY THIS BUT ON THE TABLE I FOUND A SMALL BALL OF BLACK DOUGH OR CLAY WITH SPECKS OF SOMETHING WHICH LOOKS LIKE SAWDUST IN IT +1320-122617-0039-331: THE MOHICAN STARTED ON HIS FEET AND SHOOK HIS SHAGGY COVERING AS THOUGH THE ANIMAL HE COUNTERFEITED WAS ABOUT TO MAKE SOME DESPERATE EFFORT +1580-141084-0033-421: COME COME SAID HOLMES KINDLY IT IS HUMAN TO ERR AND AT LEAST NO ONE CAN ACCUSE YOU OF BEING A CALLOUS CRIMINAL +4446-2275-0040-1215: THE SIGHT OF YOU BARTLEY TO SEE YOU LIVING AND HAPPY AND SUCCESSFUL CAN I NEVER MAKE YOU UNDERSTAND WHAT THAT MEANS TO ME +2961-960-0012-886: MANY IF NOT ALL THE ELEMENTS OF THE (PRE SOCRATIC->PRESOCRATIC) PHILOSOPHY ARE INCLUDED IN THE TIMAEUS +1089-134686-0031-31: STEPHEN'S HEART BEGAN SLOWLY TO FOLD AND FADE WITH FEAR LIKE A WITHERING FLOWER +6829-68769-0015-1875: SOMETIMES I'M THAT (YEARNING->YEARNIN) FOR A SMOKE I'M NEARLY CRAZY (AN->AND) I (DUNNO->DON'TO) WHICH IS (WORST->WORSE) DYIN ONE WAY OR (ANOTHER->TOTHER) +908-31957-0016-2610: DEAREST TEACH ME SO TO POUR OUT GRATITUDE AS THOU DOST GOOD +7729-102255-0032-2265: THE FREE STATE HOTEL A STONE BUILDING IN DIMENSIONS FIFTY BY SEVENTY FEET THREE STORIES HIGH AND HANDSOMELY FURNISHED PREVIOUSLY OCCUPIED ONLY FOR LODGING ROOMS ON THAT DAY FOR THE FIRST TIME OPENED ITS TABLE ACCOMMODATIONS TO THE PUBLIC AND PROVIDED A FREE DINNER IN HONOR OF THE OCCASION +1089-134691-0009-47: ANGRY WITH HIMSELF HE TRIED TO HIDE HIS FACE FROM THEIR EYES BY GAZING DOWN SIDEWAYS INTO THE SHALLOW SWIRLING WATER UNDER THE BRIDGE BUT HE STILL SAW A REFLECTION THEREIN OF THEIR TOP HEAVY SILK HATS AND HUMBLE TAPE LIKE COLLARS AND LOOSELY HANGING CLERICAL CLOTHES BROTHER (HICKEY->HICKIE) +7729-102255-0003-2236: FOR GENERAL SERVICE THEREFORE REQUIRING NO SPECIAL EFFORT THE NUMERICAL STRENGTH OF THE FACTIONS WAS ABOUT EQUAL WHILE ON EXTRAORDINARY OCCASIONS THE TWO THOUSAND BORDER RUFFIAN RESERVE LYING A LITTLE FARTHER BACK FROM THE STATE LINE COULD AT ANY TIME EASILY TURN THE SCALE +8224-274384-0012-2310: IF ANY STILL RETAINED (RANCOR->RANCOUR) AGAINST HIM IN HIS PRESENT CONDITION THEY PASSED IN SILENCE WHILE HIS WELL WISHERS MORE GENEROUS THAN PRUDENT ACCOMPANIED HIS MARCH WITH TEARS WITH ACCLAMATIONS AND WITH PRAYERS FOR HIS SAFETY +3729-6852-0009-1036: YOU WILL FORGIVE ME DEAR READER IF I HAVE MADE YOU ATTEND THE FUNERAL OF (SILVIA->SYLVIA) TEN YEARS BEFORE HER DEATH BELIEVE ME I HAVE NO INTENTION OF PERFORMING A MIRACLE YOU MAY CONSOLE YOURSELF WITH THE IDEA THAT I SHALL SPARE YOU THAT UNPLEASANT TASK WHEN POOR (SILVIA->SYLVIA) DIES +8224-274381-0000-2280: THOUGH THROWN INTO PRISON FOR THIS ENTERPRISE AND DETAINED SOME TIME HE WAS NOT DISCOURAGED BUT STILL CONTINUED BY HIS COUNTENANCE AND PROTECTION TO INFUSE SPIRIT INTO THE DISTRESSED ROYALISTS +4077-13751-0007-1081: THE BOOK (OF->A) MORMON HAD TAUGHT THE PEOPLE THE TRUE ORIGIN AND DESTINY OF THE AMERICAN INDIANS AND TOWARD THIS DARK SKINNED REMNANT OF A ONCE MIGHTY PEOPLE THE MISSIONARIES OF MORMONISM EARLY TURNED THEIR EYES AND WITH THEIR EYES WENT THEIR HEARTS AND THEIR HOPES +7176-88083-0001-2160: THE MERGANSER HAD A CRESTED HEAD OF IRIDESCENT GREEN BLACK A BROAD COLLAR OF LUSTROUS WHITE BLACK BACK BLACK AND WHITE WINGS WHITE BELLY SIDES FINELY PENCILLED IN BLACK AND WHITE AND A BREAST OF RICH CHESTNUT RED STREAKED WITH BLACK +6829-68771-0007-1921: THE DESCRIPTION SHE GAVE OF THE COMING RECEPTION TO THE (WOMAN'S->WOMEN'S) POLITICAL LEAGUE WAS SO HUMOROUS AND DIVERTING THAT THEY WERE BOTH LAUGHING HEARTILY OVER THE THING WHEN THE YOUNG PEOPLE PASSED THEM AND THUS MISTER HOPKINS FAILED TO NOTICE WHO THE OCCUPANTS OF THE OTHER VEHICLE WERE +1284-1181-0010-255: THEIR CONTENTS HAD ALL BOILED AWAY LEAVING IN THE BOTTOM OF EACH KETTLE A FEW GRAINS OF FINE WHITE POWDER +2300-131720-0029-601: HENCE THE EDISON ELECTROLYTIC (METER->METRE) IS NO LONGER USED DESPITE ITS EXCELLENT QUALITIES +3575-170457-0050-1020: TABBY HAD TENDED THEM IN THEIR CHILDHOOD THEY AND NONE OTHER SHOULD TEND HER IN HER INFIRMITY AND AGE +237-126133-0017-631: THERE JAP YOU'VE CAUGHT IT LAUGHED PERCY WHILE THE OTHERS SCREAMED AT THE SIGHT OF JASPER'S FACE +1995-1826-0012-451: SHE WANTED A GLANCE OF THE NEW BOOKS AND PERIODICALS AND TALK OF GREAT PHILANTHROPIES AND REFORMS +61-70970-0011-1755: AS ANY IN ENGLAND I WOULD SAY SAID GAMEWELL PROUDLY THAT IS IN HIS DAY +1284-1180-0028-240: A BED QUILT MADE OF PATCHES OF DIFFERENT KINDS AND (COLORS->COLOURS) OF CLOTH ALL NEATLY SEWED TOGETHER +7021-79740-0001-2040: DELLA HAD A YOUNG SISTER NAMED MARIA AND A COUSIN WHOSE NAME WAS JANE +6829-68771-0006-1920: AND THIS WAS WHY KENNETH AND BETH DISCOVERED HIM CONVERSING WITH THE YOUNG WOMAN IN THE BUGGY +1580-141084-0002-390: (THIS->THE) SET OF ROOMS IS QUITE THE OLDEST IN THE COLLEGE AND IT IS NOT UNUSUAL FOR VISITORS TO GO OVER THEM +1580-141083-0042-376: MY SCHOLAR HAS BEEN LEFT VERY POOR BUT HE IS HARD WORKING AND INDUSTRIOUS HE WILL DO WELL +1221-135767-0024-211: PEARL SEEING THE ROSE BUSHES BEGAN TO CRY FOR A RED ROSE AND WOULD NOT BE PACIFIED +1089-134686-0015-15: BUT THE DUSK DEEPENING IN THE SCHOOLROOM COVERED OVER HIS THOUGHTS THE BELL RANG +3729-6852-0038-1065: WHAT SIR I SAID TO HIM AM I FORTUNATE ENOUGH TO SEE YOU +2830-3980-0074-871: THIS ATTITUDE IS UNIVERSAL AND PARTICULARLY DEVELOPED IN THOSE WHO CONSIDER THEMSELVES BETTER THAN OTHERS +260-123286-0018-720: I SAW AT THE HAMBURG MUSEUM THE SKELETON OF ONE OF THESE CREATURES THIRTY FEET IN LENGTH +1089-134691-0022-60: HE STOOD STILL IN DEFERENCE TO THEIR CALLS AND PARRIED THEIR BANTER WITH EASY WORDS +1995-1837-0029-510: HE DARTED THROUGH THE TREES AND PAUSED A TALL MAN STRONGLY BUT SLIMLY MADE +8455-210777-0059-2415: BUT IT IS SURMISED THAT YOU WILL FIND DIFFICULTIES IN THE WAY OF YOUR ENTERING AT ONCE UPON YOUR (GOVERNMENT->GOVERNOR) +4992-41797-0000-1365: YES DEAD THESE FOUR YEARS (AN->AND) A GOOD JOB FOR HER TOO +8555-284447-0004-2505: AS SOON AS THEY ENTERED THE ROOM OF THE GREAT KNIFE THE BOOLOOROO GAVE A YELL OF DISAPPOINTMENT +61-70970-0026-1770: FITZOOTH'S HAND RESTED AT LAST UPON THE TOP RUNG OF (A->THE) LADDER AND SLOWLY THE TRUTH CAME TO HIM +3570-5695-0003-946: IN THE COMMUNITIES OF THE WESTERN CULTURE THIS POINT IS AT PRESENT FOUND AMONG THE LOWER MIDDLE (CLASS->CLASSES) +1284-1180-0029-241: SOMETIMES IT IS CALLED A CRAZY QUILT BECAUSE THE PATCHES AND (COLORS->COLOURS) ARE SO MIXED UP +2961-961-0018-915: THE MOST FAMOUS OF THEM ALL WAS THE OVERTHROW OF THE ISLAND OF ATLANTIS +1580-141084-0047-435: I HAVE A LETTER HERE MISTER (SOAMES->SOLMES) WHICH I WROTE TO YOU EARLY THIS MORNING IN THE MIDDLE OF A RESTLESS NIGHT +1580-141083-0027-361: HOW LONG WOULD IT TAKE HIM TO DO THAT USING EVERY POSSIBLE CONTRACTION A QUARTER OF AN HOUR NOT LESS +260-123288-0017-751: IS THE ATMOSPHERIC CONDITION HAVING ONCE REACHED (THIS->ITS) DENSITY TO BECOME FINAL +6829-68771-0036-1950: ELIZA CLOSED THE DOOR BEHIND HER WITH A DECIDED SLAM AND A KEY CLICKED IN THE LOCK +6930-76324-0008-1980: I THOUGHT WE WERE STUMPED AGAIN WHEN I FIRST SAW THAT PICTURE BUT IT'S BEEN OF SOME USE AFTER ALL +4507-16021-0024-1245: ALGEBRA MEDICINE BOTANY HAVE EACH THEIR SLANG +8455-210777-0046-2402: YOU MAY BE QUITE SURE IT'S THERE SAID CAPTAIN (BATTLEAX->BATTLE AX) AND THAT I CAN SO USE IT AS TO HALF OBLITERATE YOUR TOWN WITHIN TWO MINUTES OF MY RETURN ON BOARD +7127-75946-0029-2117: THE KING WHO HAD FROM THIS MOMENT BECOME IN REALITY THE PRINCIPAL DANCER IN THE QUADRILLE CAST A LOOK UPON HIS VANQUISHED RIVAL +6930-76324-0011-1983: THEY WORRY ME TERRIBLY AND BESIDES I'D LIKE TO SEE WHAT THIS LOVELY FURNITURE LOOKS LIKE WITHOUT SUCH QUANTITIES OF DUST ALL OVER IT GOOD SCHEME (CYN->SYM) +8463-294828-0001-2463: THREE SECONDS BEFORE THE ARRIVAL OF J B HOBSON'S LETTER I NO MORE DREAMED OF CHASING THE UNICORN THAN OF TRYING FOR THE (NORTHWEST->NORTH WEST) PASSAGE +4992-41797-0018-1383: THERE IN THE CEDAR HOLLOW THEN LIVED OLIVE LORD AN ANGRY RESENTFUL LITTLE CREATURE WEIGHED DOWN BY A FIERCE SENSE OF INJURY +3575-170457-0023-993: I CAREFULLY AVOID ANY APPEARANCE OF PREOCCUPATION AND ECCENTRICITY WHICH MIGHT LEAD THOSE I LIVE AMONGST TO SUSPECT THE NATURE OF MY PURSUITS +7729-102255-0019-2252: TO EMBARRASS THIS DAMAGING EXPOSURE JUDGE (LECOMPTE->LE COMTE) ISSUED A WRIT AGAINST THE EX GOVERNOR ON A FRIVOLOUS CHARGE OF CONTEMPT +260-123288-0003-737: THE ELECTRIC LIGHT CAN SCARCELY PENETRATE THROUGH THE DENSE CURTAIN WHICH (HAS->IS) DROPPED OVER THE THEATRE ON WHICH THE BATTLE OF THE ELEMENTS IS ABOUT TO BE WAGED +2300-131720-0015-587: HE OBTAINED THE DESIRED SPEED AND LOAD WITH A FRICTION BRAKE ALSO REGULATOR OF SPEED BUT WAITED FOR AN INDICATOR TO VERIFY IT +8230-279154-0000-2312: THE ANALYSIS OF KNOWLEDGE WILL OCCUPY US UNTIL THE END OF THE THIRTEENTH LECTURE AND IS THE MOST DIFFICULT PART OF OUR WHOLE ENTERPRISE +2961-960-0014-888: THE IDEAS ALSO REMAIN BUT THEY HAVE BECOME TYPES IN NATURE FORMS OF MEN ANIMALS BIRDS FISHES +1995-1837-0001-482: IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT +4970-29093-0012-1293: BUT PHILIP DID AFFORD IT AND HE WROTE THANKING HIS FRIENDS AND DECLINING BECAUSE HE SAID THE POLITICAL SCHEME WOULD FAIL AND OUGHT TO FAIL +4992-41797-0017-1382: HE WOULDN'T SEARCH SO DON'T WORRY REPLIED CYRIL QUIETLY AND THE TWO LOOKED AT EACH OTHER AND KNEW THAT IT WAS SO +7021-85628-0027-2087: IF YOU DRESSED IN SILK AND GOLD FROM TOP TO TOE YOU COULD NOT LOOK ANY NICER THAN IN YOUR LITTLE RED CAP +2830-3980-0031-828: WHY DO THEY NOT INVADE THE CATHOLIC PROVINCES AND PREACH THEIR DOCTRINE TO GODLESS PRINCES BISHOPS AND DOCTORS AS WE HAVE DONE BY THE HELP OF GOD +4992-23283-0009-1353: OH MY LORD CRIED MISS WOODLEY WITH A MOST FORCIBLE ACCENT YOU ARE THE LAST PERSON ON EARTH SHE WOULD PARDON ME FOR (ENTRUSTING->INTRUSTING) +8230-279154-0015-2327: THEY MUST HAVE SOME CHARACTERISTIC WHICH MAKES US REGARD THEM AS REFERRING TO MORE OR LESS REMOTE PORTIONS OF THE PAST +2094-142345-0001-512: BUT THE WINDOWS ARE PATCHED WITH WOODEN PANES AND THE DOOR I THINK IS LIKE THE GATE IT IS NEVER OPENED +1089-134691-0025-63: A MOMENT BEFORE THE GHOST OF THE ANCIENT KINGDOM OF THE DANES HAD LOOKED FORTH THROUGH THE VESTURE OF THE (HAZEWRAPPED->HAZE WRAPT) CITY +6930-75918-0015-1966: THUS IT IS THAT THE (HONOR->HONOUR) OF THREE IS SAVED OUR (COUNTRY'S->COUNTRY) OUR MASTER'S AND OUR OWN +1320-122617-0025-317: SO UNCAS YOU HAD BETTER TAKE THE LEAD WHILE I WILL PUT ON THE SKIN AGAIN AND TRUST TO CUNNING FOR WANT OF SPEED +237-134500-0033-692: I WISH YOU WEREN'T SO RESTLESS AND DIDN'T GET SO WORKED UP OVER THINGS SHE SAID SADLY +4507-16021-0056-1277: HOWEVER HE WHO SAYS LIGHT DOES NOT NECESSARILY SAY JOY +2961-961-0020-917: THIS IS THE EXPLANATION OF THE SHALLOWS WHICH ARE FOUND IN THAT PART OF THE ATLANTIC OCEAN +8463-287645-0004-2431: JOHN WESLEY COMBASH JACOB TAYLOR AND THOMAS EDWARD SKINNER +4446-2275-0012-1187: SHE LOOKED AT HIS HEAVY SHOULDERS AND BIG DETERMINED HEAD THRUST FORWARD LIKE A CATAPULT IN LEASH +1995-1836-0001-467: AT LAST THE COTTON COMBINE WAS TO ALL APPEARANCES AN ASSURED FACT AND HE WAS SLATED FOR THE SENATE +1284-1180-0015-227: WE ARE TRAVELING REPLIED OJO AND WE STOPPED AT YOUR HOUSE JUST TO REST AND REFRESH OURSELVES +3575-170457-0022-992: IN THE EVENINGS I CONFESS I DO THINK BUT I NEVER TROUBLE ANY ONE ELSE WITH MY THOUGHTS +2830-3980-0075-872: BUT THE REAL SIGNIFICANCE AND COMFORT OF THE WORDS FOR OUR SINS IS LOST UPON THEM +5142-33396-0054-1516: THAT IS THE BEST WAY TO DECIDE FOR THE SPEAR WILL ALWAYS POINT SOMEWHERE AND ONE THING IS AS GOOD AS ANOTHER +8463-294828-0014-2476: THERE WAS GOOD REASON TO STOP AND THINK EVEN FOR THE WORLD'S MOST EMOTIONLESS MAN +61-70968-0030-1711: NOW BE SILENT ON YOUR LIVES HE BEGAN BUT THE CAPTURED APPRENTICE SET UP AN INSTANT SHOUT +4992-41797-0002-1367: GRANDFATHER WAS ALEXANDER CAREY L L D DOCTOR OF LAWS THAT IS +4507-16021-0011-1232: WHY SHOULD ONE NOT EXPLORE EVERYTHING AND STUDY EVERYTHING +6829-68769-0031-1891: AS REGARDS MY ROBBING THE COMPANY I'LL SAY THAT I SAVED (THEM->HIM) A HEAVY LOSS ONE DAY +3729-6852-0024-1051: I SEE A CROWD IN ONE CORNER OF THE GARDEN EVERYBODY STANDING STILL AND LOOKING UP +5105-28233-0006-1412: NO CATHEDRAL NOT EVEN BURGOS ITSELF COULD VIE WITH THE CHURCH AT (MONTMARTRE->MONT MARTRE) +4446-2275-0026-1201: SHE CLOSED HER EYES AND TOOK A DEEP BREATH AS IF TO DRAW IN AGAIN THE FRAGRANCE OF THOSE DAYS +4970-29095-0002-1307: WELL MOTHER SAID THE YOUNG STUDENT LOOKING UP WITH A SHADE OF IMPATIENCE +8224-274384-0013-2311: HIS DEATH IN THIS CONJUNCTURE WAS A PUBLIC MISFORTUNE +4446-2273-0004-1142: I SHOULD NEVER HAVE ASKED YOU IF MOLLY HAD BEEN HERE FOR I REMEMBER YOU DON'T LIKE ENGLISH COOKERY +4446-2275-0042-1217: AND THEN YOU CAME BACK NOT CARING VERY MUCH BUT IT MADE NO DIFFERENCE +5142-36377-0001-1532: IN FIVE MINUTES I WAS IN A NEW WORLD AND MY MELANCHOLY ROOM WAS FULL OF THE LIVELIEST FRENCH COMPANY +61-70968-0015-1696: NAY WE (REFUSED->REFUSE) THEIR REQUEST MOST POLITELY MOST NOBLE SAID THE LITTLE STROLLER +5142-33396-0024-1486: I STOOD WITH MY BACK TO THE WALL FOR I WANTED NO SWORD REACHING OUT OF THE DARK FOR ME +4446-2271-0014-1127: WESTMERE AND I WERE BACK AFTER THE FIRST ACT AND WE THOUGHT SHE SEEMED QUITE UNCERTAIN OF HERSELF +8463-294828-0029-2491: OUR BAGGAGE WAS IMMEDIATELY CARRIED TO THE DECK OF THE FRIGATE I RUSHED ABOARD +7021-79759-0002-2056: THEY ARE CHIEFLY FORMED FROM COMBINATIONS OF THE IMPRESSIONS MADE IN CHILDHOOD +2830-3980-0030-827: THEY DO NOT GO WHERE THE ENEMIES OF THE GOSPEL PREDOMINATE THEY GO WHERE THE CHRISTIANS ARE +260-123288-0001-735: THE WEATHER IF WE MAY USE (THAT->THE) TERM WILL CHANGE BEFORE LONG +237-134500-0002-661: A BRISK WIND HAD COME UP AND WAS DRIVING PUFFY WHITE CLOUDS ACROSS THE SKY +5142-36586-0003-1560: BUT THIS SUBJECT WILL BE MORE PROPERLY DISCUSSED WHEN WE TREAT OF THE DIFFERENT RACES OF MANKIND +6930-81414-0024-2025: MY TONGUE REFUSED TO ARTICULATE MY POWER OF SPEECH LEFT ME +121-127105-0001-135: (SOMEONE->SOME ONE) ELSE TOLD A STORY NOT PARTICULARLY EFFECTIVE WHICH I SAW HE WAS NOT FOLLOWING +237-134500-0032-691: I GET TIRED OF SEEING MEN AND HORSES GOING UP AND DOWN UP AND DOWN +1580-141084-0032-420: FOR A MOMENT (GILCHRIST->GILCRIST) WITH UPRAISED HAND TRIED TO CONTROL HIS WRITHING FEATURES +908-157963-0017-2580: THE CLOUD THEN (SHEWD->SHOWED) HIS GOLDEN HEAD AND HIS BRIGHT FORM EMERG'D +3575-170457-0051-1021: AT TEA TIME THEY WERE SAD AND SILENT AND THE MEAL WENT AWAY UNTOUCHED BY ANY OF THE THREE +6829-68769-0030-1890: I WAS BOOKKEEPER SO IT WAS EASY TO GET A BLANK (CHECK->CHEQUE) AND FORGE THE SIGNATURE +260-123288-0016-750: I REFER TO THE THERMOMETER IT INDICATES THE FIGURE IS OBLITERATED +6930-76324-0023-1995: AND MY POCKET MONEY IS GETTING LOW AGAIN AND YOU HAVEN'T ANY LEFT AS USUAL +5105-28241-0013-1455: WOULD NOT THE LOFTIEST EMINENCES OF THE CITY AT LEAST BE VISIBLE +4077-13754-0000-1096: THE ARMY FOUND THE PEOPLE IN POVERTY AND LEFT THEM IN COMPARATIVE WEALTH +1580-141083-0026-360: AS A MATTER OF FACT HE COULD NOT SAID (SOAMES->SOLMES) FOR I ENTERED BY THE SIDE DOOR +5105-28233-0004-1410: ONCE IN ACTION HE WAS LEADING A DETACHMENT OF INFANTRY THROUGH AN (INTRENCHMENT->ENTRENCHMENT) +2961-961-0003-900: I WILL IF (TIMAEUS->TIMY AS) APPROVES I APPROVE +4970-29095-0030-1335: FATHER THEE'S UNJUST TO PHILIP HE'S GOING INTO BUSINESS +672-122797-0030-1815: PERHAPS THE OTHER TREES FROM THE FOREST WILL COME TO LOOK AT ME +1995-1837-0015-496: THE SQUARES OF COTTON SHARP EDGED HEAVY WERE JUST ABOUT TO BURST TO (BOLLS->BOWLS) +672-122797-0015-1800: (WERE->WHERE) I IN THE WARM ROOM WITH ALL THE SPLENDOR AND MAGNIFICENCE +237-134493-0006-646: THAT'S NOT MUCH OF A JOB FOR AN ATHLETE HERE I'VE BEEN TO TOWN AND BACK +4507-16021-0054-1275: THAT IS WHY WE CRY EDUCATION SCIENCE +237-126133-0016-630: OH NO (JASPER->JAPSER) I MUST GO BY MY VERY OWN SELF +5142-33396-0068-1530: SO I LIVED AND NOW AM YOUR TOOTH THRALL WELL IT IS THE LUCK OF WAR +1320-122617-0008-300: THE YOUNG MAN IS IN BONDAGE AND MUCH I FEAR HIS DEATH IS DECREED +5142-33396-0038-1500: HE SHALL NOT LEAVE YOU DAY OR NIGHT WHETHER YOU ARE WORKING OR PLAYING OR SLEEPING +3575-170457-0021-991: I THOUGHT IT THEREFORE MY DUTY WHEN I LEFT SCHOOL TO BECOME A GOVERNESS +2830-3980-0013-810: HE MENTIONS THE APOSTLES FIRST BECAUSE THEY WERE APPOINTED DIRECTLY BY GOD +8455-210777-0014-2370: SIR KENNINGTON OVAL IS A VERY FINE PLAYER SAID MY WIFE +1580-141084-0003-391: NO NAMES PLEASE SAID HOLMES AS WE KNOCKED AT (GILCHRIST'S->GILKERTH'S) DOOR +672-122797-0000-1785: OUT IN THE (WOODS->WOOD) STOOD A NICE LITTLE FIR TREE +121-121726-0012-121: HUSSY WOMAN AND BOND TIE +121-121726-0011-120: HUSBAND THE NEXT THING TO A WIFE +7176-92135-0003-2190: YOUR PLAY MUST BE NOT MERELY A GOOD PLAY BUT A SUCCESSFUL ONE +5142-33396-0053-1515: I TOOK FIVE GREAT BRACELETS OF GOLD FROM OUR TREASURE CHEST AND GAVE THEM TO HIM +7176-88083-0016-2175: STRAIGHTWAY THE HAWK GLIDED FROM HIS PERCH AND DARTED AFTER HIM +1995-1837-0000-481: HE KNEW THE SILVER FLEECE HIS AND (ZORA'S->TSORA'S) MUST BE RUINED +4446-2271-0012-1125: I SAY SIR HARRY THE LITTLE GIRL'S GOING FAMOUSLY TO NIGHT ISN'T SHE +4992-41797-0001-1366: WELL AS I SAY IT'S AN AWFUL QUEER WORLD THEY CLAP ALL THE BURGLARS (INTO->AND) JAIL (AND->*) THE MURDERERS AND THE WIFE BEATERS (I'VE->I) ALLERS THOUGHT A (GENTLE->GENLE) REPROOF WOULD BE ENOUGH PUNISHMENT FOR A WIFE (BEATER->BEAER) CAUSE HE (PROBABLY->PROB'BLY) HAS A LOT (O PROVOCATION->OF PROVOCATIONS) THAT NOBODY KNOWS AND THE FIREBUGS CAN'T THINK (O->OF) THE RIGHT NAME SOMETHING LIKE (CENDENARIES AN->SENDIARIES AND) THE BREAKERS (O->OF) THE PEACE (AN->AND) WHAT NOT (AN->AND) YET THE LAW HAS (NOTHIN->NOTHING) TO SAY TO A MAN LIKE (HEN->HAND) LORD +8224-274381-0016-2296: VALOR INDEED WAS VERY GENERALLY DIFFUSED OVER THE ONE PARTY AS WELL AS THE OTHER DURING THIS PERIOD DISCIPLINE ALSO WAS ATTAINED BY THE FORCES OF THE PARLIAMENT BUT THE PERFECTION OF THE MILITARY ART IN CONCERTING THE GENERAL PLANS OF ACTION AND THE OPERATIONS OF THE FIELD SEEMS STILL ON BOTH SIDES TO HAVE BEEN IN A GREAT MEASURE WANTING +1284-134647-0005-272: THEY ASSERTED WITH CONFIDENCE AND ALMOST WITH EXULTATION THAT THE APOSTOLICAL SUCCESSION WAS INTERRUPTED THAT ALL THE BISHOPS OF EUROPE AND ASIA WERE INFECTED BY THE CONTAGION OF GUILT AND SCHISM AND THAT THE PREROGATIVES OF THE CATHOLIC CHURCH WERE CONFINED TO THE CHOSEN PORTION OF THE AFRICAN BELIEVERS WHO ALONE HAD PRESERVED INVIOLATE THE INTEGRITY OF THEIR FAITH AND DISCIPLINE +5639-40744-0027-1591: THUS SAYING AND PRESSING THE CRUCIFIX TO HER BREAST SHE FELL FAINTING INTO THE ARMS OF DONA (ESTAFANIA->ESTEPHANIA) WHO AS A (GENTLEWOMAN->GENTLE WOMAN) TO WHOSE SEX PITY IS AS NATURAL AS CRUELTY (IS->AS) TO MAN INSTANTLY PRESSED HER LIPS TO THOSE OF THE FAINTING GIRL SHEDDING OVER HER SO MANY TEARS THAT THERE NEEDED NO OTHER SPRINKLING OF WATER TO RECOVER (LEOCADIA->THE OCADIA) FROM HER SWOON +8224-274381-0001-2281: AMONG OTHER PERSONS OF DISTINCTION WHO UNITED THEMSELVES TO HIM WAS LORD NAPIER OF (MERCHISTON->MURCHESTON) SON OF THE FAMOUS INVENTOR OF THE LOGARITHMS THE PERSON TO WHOM THE TITLE OF A GREAT MAN IS MORE JUSTLY DUE THAN TO ANY OTHER WHOM HIS COUNTRY EVER PRODUCED +4077-13754-0015-1111: THEN THE CHURCH WAS DISINCORPORATED AND ITS PROPERTY BOTH REAL AND PERSONAL CONFISCATED AND (ESCHEATED->INITIATED) TO THE GOVERNMENT OF THE UNITED STATES AND ALTHOUGH THE PERSONAL PROPERTY WAS SOON RESTORED REAL ESTATE OF GREAT VALUE LONG LAY IN THE HANDS OF THE COURT'S RECEIVER AND THE MORMON CHURCH HAD TO PAY THE NATIONAL GOVERNMENT HIGH RENTAL ON ITS OWN PROPERTY +1221-135766-0011-182: BEHOLDING IT HESTER WAS CONSTRAINED TO RUSH TOWARDS THE CHILD TO PURSUE THE LITTLE ELF IN THE FLIGHT WHICH SHE INVARIABLY BEGAN TO SNATCH HER TO HER BOSOM WITH A CLOSE PRESSURE AND EARNEST KISSES NOT SO MUCH FROM OVERFLOWING LOVE AS TO ASSURE HERSELF THAT PEARL WAS FLESH AND BLOOD AND NOT UTTERLY DELUSIVE +908-31957-0019-2613: THOU CANST WAIT THROUGH SORROW AND SICKNESS TO BRING SOULS TO TOUCH AND THINK IT SOON WHEN OTHERS CRY TOO LATE +1995-1826-0000-439: IN THE DEBATE BETWEEN THE SENIOR SOCIETIES HER (DEFENCE->DEFENSE) OF THE FIFTEENTH AMENDMENT HAD BEEN NOT ONLY A NOTABLE BIT OF REASONING BUT DELIVERED WITH REAL ENTHUSIASM +8463-294828-0017-2479: PACK AS MUCH INTO MY TRUNK AS YOU CAN MY TRAVELING KIT MY SUITS SHIRTS AND SOCKS DON'T BOTHER COUNTING JUST SQUEEZE IT ALL IN AND HURRY +1580-141083-0000-334: I WILL ENDEAVOUR IN MY STATEMENT TO AVOID SUCH TERMS AS WOULD SERVE TO LIMIT THE EVENTS TO ANY PARTICULAR PLACE OR GIVE A CLUE AS TO THE PEOPLE CONCERNED +2961-961-0007-904: (THE->HIS) SUBJECT WAS A VERY NOBLE ONE HE DESCRIBED THE MOST FAMOUS ACTION IN WHICH THE ATHENIAN PEOPLE WERE EVER ENGAGED +121-121726-0000-109: ALSO A POPULAR CONTRIVANCE WHEREBY LOVE MAKING MAY BE SUSPENDED BUT NOT STOPPED DURING THE PICNIC SEASON +61-70970-0015-1759: WILL CRIED HE SOFTLY AND STUTELEY WHO HAD CHOSEN HIS COUCH ACROSS THE DOOR OF HIS YOUNG MASTER'S CHAMBER SPRANG UP AT ONCE IN ANSWER +5683-32879-0014-1669: YES SOMETHING EVERYTHING SAID RACHEL HURRIEDLY LOOKING FROWNINGLY AT A FLOWER WHICH SHE WAS TWIRLING IN HER FINGERS +8555-292519-0007-2554: IT IS MY HEART HUNG IN THE SKY AND NO CLOUDS EVER FLOAT BETWEEN THE GRAVE FLOWERS AND MY HEART ON HIGH +8555-284447-0008-2509: RICH JEWELS OF BLUE STONES GLITTERED UPON THEIR PERSONS AND THE ROYAL LADIES WERE FULLY AS GORGEOUS AS THEY WERE HAUGHTY AND OVERBEARING +6930-76324-0027-1999: FORGETTING ALL THEIR WEARINESS THEY SEIZED THEIR CANDLES AND SCURRIED THROUGH THE HOUSE FINDING (AN->ON) OCCASIONAL PAPER TUCKED AWAY IN SOME ODD CORNER +7176-92135-0022-2209: TO BE OR NOT TO BE THAT IS THE QUESTION WHETHER TIS NOBLER IN THE MIND TO SUFFER THE SLINGS AND ARROWS WHAT NO HAMLET SPEAKING +908-157963-0006-2569: AND GENTLE SLEEP THE SLEEP OF DEATH AND GENTLY HEAR THE VOICE OF HIM THAT WALKETH IN THE GARDEN IN THE EVENING TIME +4970-29093-0013-1294: AND HE WENT BACK TO HIS BOOKS AND TO HIS WAITING FOR AN OPENING LARGE ENOUGH FOR HIS DIGNIFIED ENTRANCE INTO THE LITERARY WORLD +3575-170457-0054-1024: STUNG BY ANXIETY FOR THIS LITTLE SISTER SHE UPBRAIDED MISS W FOR HER FANCIED INDIFFERENCE TO ANNE'S STATE OF HEALTH +1995-1836-0003-469: SHE WAS NOT HERSELF A NOTABLY INTELLIGENT WOMAN SHE GREATLY ADMIRED INTELLIGENCE OR WHATEVER LOOKED TO HER LIKE INTELLIGENCE IN OTHERS +7729-102255-0021-2254: BUT THE AFFAIR WAS MAGNIFIED AS A CROWNING PROOF THAT THE FREE STATE MEN WERE INSURRECTIONISTS AND OUTLAWS +8230-279154-0017-2329: THERE MAY BE A SPECIFIC FEELING WHICH COULD BE CALLED THE FEELING OF PASTNESS ESPECIALLY WHERE IMMEDIATE MEMORY IS CONCERNED +1284-1181-0014-259: HE SELECTED A SMALL GOLD BOTTLE WITH A PEPPER BOX TOP SO THAT THE POWDER MIGHT BE SPRINKLED ON ANY OBJECT THROUGH THE SMALL HOLES +2830-3980-0047-844: TO DO SO IS TO LOSE GOD ALTOGETHER BECAUSE GOD BECOMES INTOLERABLE WHEN WE SEEK TO MEASURE AND TO COMPREHEND HIS INFINITE MAJESTY +7729-102255-0004-2237: THE FREE STATE MEN HAD ONLY THEIR CONVICTIONS THEIR INTELLIGENCE THEIR COURAGE AND THE MORAL SUPPORT OF THE NORTH THE CONSPIRACY HAD ITS SECRET COMBINATION THE TERRITORIAL OFFICIALS THE LEGISLATURE THE BOGUS LAWS THE COURTS THE MILITIA OFFICERS THE PRESIDENT AND THE ARMY +2300-131720-0016-588: THEN AGAIN THERE WAS NO KNOWN WAY TO LUBRICATE AN ENGINE FOR CONTINUOUS RUNNING AND MISTER EDISON INFORMED ME THAT AS A MARINE ENGINE STARTED BEFORE THE SHIP LEFT NEW YORK AND CONTINUED RUNNING UNTIL IT REACHED ITS HOME PORT SO AN ENGINE FOR HIS PURPOSES MUST PRODUCE LIGHT AT ALL TIMES +1188-133604-0028-92: WELL THEN LAST HERE IS TURNER'S GREEK SCHOOL OF THE HIGHEST CLASS AND YOU DEFINE HIS ART ABSOLUTELY AS FIRST THE DISPLAYING INTENSELY AND WITH THE STERNEST INTELLECT OF NATURAL FORM AS IT IS AND THEN THE ENVELOPMENT OF IT WITH CLOUD AND FIRE +1188-133604-0044-108: IT WILL BE WELL FOR YOU IF YOU JOIN NOT WITH THOSE WHO INSTEAD OF KITES FLY FALCONS WHO INSTEAD OF OBEYING THE LAST WORDS OF THE GREAT CLOUD SHEPHERD TO FEED HIS SHEEP LIVE THE LIVES HOW MUCH LESS THAN VANITY OF THE WAR WOLF AND THE (GIER->GEAR) EAGLE +8455-210777-0001-2357: HAD EVA CRASWELLER NOT BEEN GOOD LOOKING HAD JACK BEEN STILL AT COLLEGE HAD SIR KENNINGTON OVAL REMAINED IN ENGLAND HAD MISTER (BUNNIT->BENNETT) AND THE BAR KEEPER NOT SUCCEEDED IN STOPPING MY CARRIAGE ON THE HILL SHOULD I HAVE SUCCEEDED IN ARRANGING FOR THE FINAL DEPARTURE OF MY OLD FRIEND +8224-274381-0017-2297: HISTORIANS AT LEAST PERHAPS FROM THEIR OWN IGNORANCE AND INEXPERIENCE HAVE NOT REMARKED ANY THING BUT A HEADLONG IMPETUOUS CONDUCT EACH PARTY HURRYING TO A BATTLE WHERE VALOR AND FORTUNE CHIEFLY (DETERMINED->DETERMINE) THE SUCCESS +8555-284449-0011-2537: THE GUARDS HAD A TERRIBLE STRUGGLE WITH THE GOAT WHICH WAS LOOSE IN THE ROOM AND STILL WANTED TO FIGHT BUT FINALLY THEY SUBDUED THE ANIMAL AND THEN THEY TOOK THE BOOLOOROO OUT OF THE FRAME HE WAS TIED IN AND BROUGHT BOTH HIM AND THE GOAT BEFORE QUEEN TROT WHO AWAITED THEM IN THE THRONE ROOM OF THE PALACE +1221-135767-0011-198: IT WAS FURTHER DECORATED WITH STRANGE AND SEEMINGLY CABALISTIC FIGURES AND DIAGRAMS SUITABLE TO THE QUAINT TASTE OF THE AGE WHICH HAD BEEN DRAWN IN THE STUCCO WHEN NEWLY LAID ON AND HAD NOW GROWN HARD AND DURABLE FOR THE ADMIRATION OF AFTER TIMES +8455-210777-0016-2372: THIS I FELT WAS PAID TO ME AS BEING PRESIDENT OF THE REPUBLIC AND I ENDEAVOURED TO BEHAVE MYSELF WITH SUCH MINGLED HUMILITY AND DIGNITY AS MIGHT BEFIT THE OCCASION BUT I COULD NOT BUT FEEL THAT SOMETHING WAS WANTING TO THE SIMPLICITY OF MY ORDINARY LIFE +4507-16021-0026-1247: TO KEEP AFLOAT AND TO RESCUE FROM OBLIVION TO HOLD ABOVE THE GULF WERE IT BUT A FRAGMENT OF SOME LANGUAGE WHICH MAN HAS SPOKEN AND WHICH WOULD OTHERWISE BE LOST THAT IS TO SAY ONE OF THE ELEMENTS GOOD OR BAD OF WHICH CIVILIZATION IS COMPOSED OR BY WHICH IT IS COMPLICATED TO EXTEND THE RECORDS OF SOCIAL OBSERVATION IS TO SERVE CIVILIZATION ITSELF +5105-28241-0015-1457: TO THE SURPRISE OF ALL AND ESPECIALLY OF LIEUTENANT PROCOPE THE LINE INDICATED A BOTTOM AT A NEARLY UNIFORM DEPTH OF FROM FOUR TO FIVE FATHOMS AND ALTHOUGH THE SOUNDING WAS PERSEVERED WITH CONTINUOUSLY FOR MORE THAN TWO HOURS OVER A CONSIDERABLE AREA THE DIFFERENCES OF LEVEL WERE INSIGNIFICANT NOT CORRESPONDING IN ANY DEGREE TO WHAT WOULD BE EXPECTED OVER THE SITE OF A CITY THAT HAD BEEN TERRACED LIKE THE SEATS OF AN (AMPHITHEATER->AMPHITHEATRE) +5105-28233-0007-1413: BEN ZOOF'S MOST AMBITIOUS DESIRE WAS TO INDUCE THE CAPTAIN TO GO WITH HIM AND END HIS DAYS IN HIS MUCH LOVED HOME AND SO INCESSANTLY WERE SERVADAC'S EARS BESIEGED WITH DESCRIPTIONS OF THE UNPARALLELED BEAUTIES AND ADVANTAGES OF THIS EIGHTEENTH (ARRONDISSEMENT->ARONNDISSIMON) OF PARIS THAT HE COULD SCARCELY HEAR THE NAME OF (MONTMARTRE->MONTMARCHRE) WITHOUT A CONSCIOUS THRILL OF AVERSION +3570-5696-0003-962: A RECONCILIATION BETWEEN THE TWO CONFLICTING REQUIREMENTS IS EFFECTED BY A RESORT TO MAKE BELIEVE MANY AND INTRICATE POLITE OBSERVANCES AND SOCIAL DUTIES OF A CEREMONIAL NATURE ARE DEVELOPED MANY ORGANIZATIONS ARE FOUNDED WITH SOME SPECIOUS OBJECT OF AMELIORATION EMBODIED IN THEIR OFFICIAL STYLE AND TITLE THERE IS MUCH COMING AND GOING AND A DEAL OF TALK TO THE END THAT THE TALKERS MAY NOT HAVE OCCASION TO REFLECT ON WHAT IS THE EFFECTUAL ECONOMIC VALUE OF THEIR TRAFFIC +8224-274381-0002-2282: WHILE THE FORMER FORETOLD THAT THE SCOTTISH COVENANTERS WERE SECRETLY FORMING A UNION WITH THE ENGLISH PARLIAMENT AND INCULCATED THE NECESSITY OF PREVENTING THEM BY SOME VIGOROUS UNDERTAKING THE LATTER STILL INSISTED THAT EVERY SUCH ATTEMPT WOULD PRECIPITATE THEM INTO MEASURES TO WHICH OTHERWISE THEY WERE NOT PERHAPS INCLINED +908-157963-0019-2582: IT IS TO TENFOLD LIFE TO LOVE TO PEACE AND RAPTURES HOLY UNSEEN DESCENDING WEIGH MY LIGHT WINGS UPON BALMY FLOWERS AND COURT THE FAIR (EYED->EY'D) DEW TO TAKE ME TO HER SHINING TENT THE WEEPING VIRGIN TREMBLING KNEELS BEFORE THE RISEN SUN +8555-284447-0007-2508: THEREFORE HER MAJESTY PAID NO ATTENTION TO ANYONE AND NO ONE PAID ANY ATTENTION TO HER +5105-28241-0016-1458: YOU MUST SEE LIEUTENANT I SHOULD THINK THAT WE ARE NOT SO NEAR THE COAST OF ALGERIA AS YOU IMAGINED +7127-75947-0015-2133: THERE CANNOT BE A DOUBT HE RECEIVED YOU KINDLY FOR IN FACT YOU RETURNED WITHOUT HIS PERMISSION +6829-68771-0024-1938: FOR THE FIRST TIME THE MAID SEEMED A LITTLE CONFUSED AND HER GAZE WANDERED FROM THE FACE OF HER VISITOR +8455-210777-0002-2358: ON ARRIVING AT HOME AT MY OWN RESIDENCE I FOUND THAT OUR SALON WAS FILLED WITH A BRILLIANT COMPANY +6930-75918-0017-1968: BUT IN THIS FRIENDLY PRESSURE (RAOUL->RALPH) COULD DETECT THE NERVOUS AGITATION OF A GREAT INTERNAL CONFLICT +4446-2273-0035-1173: BARTLEY LEANED OVER HER SHOULDER WITHOUT TOUCHING HER AND WHISPERED IN HER EAR YOU ARE GIVING ME A CHANCE YES +5105-28240-0011-1428: I LEFT YOU ON A CONTINENT AND HERE I HAVE THE (HONOR->HONOUR) OF FINDING YOU ON AN ISLAND +4446-2275-0043-1218: BARTLEY BENT OVER AND TOOK HER IN HIS ARMS KISSING HER MOUTH AND HER WET TIRED EYES +121-127105-0005-139: I COULD WRITE TO MY MAN AND ENCLOSE THE KEY HE COULD SEND DOWN THE PACKET AS HE FINDS IT +7176-88083-0019-2178: AS HE FLEW HIS DOWN REACHING CLUTCHING TALONS WERE NOT HALF A YARD ABOVE THE FUGITIVE'S HEAD +1089-134686-0034-34: THE RECTOR PAUSED AND THEN SHAKING HIS CLASPED HANDS BEFORE HIM WENT ON +1284-1180-0032-244: I WILL SHOW YOU WHAT A GOOD JOB I DID AND SHE WENT TO A TALL CUPBOARD AND THREW OPEN THE DOORS +6829-68769-0032-1892: I DISCOVERED AND PUT OUT A FIRE THAT WOULD HAVE DESTROYED THE WHOLE PLANT BUT (MARSHALL->MARTIAL) NEVER EVEN THANKED ME +1320-122617-0027-319: AS SOON AS THESE DISPOSITIONS WERE MADE THE SCOUT TURNED TO DAVID AND GAVE HIM HIS PARTING INSTRUCTIONS +7729-102255-0035-2268: THE MILITARY FORCE PARTLY (RABBLE->REBEL) PARTLY ORGANIZED HAD MEANWHILE MOVED INTO THE TOWN +5683-32866-0028-1652: THE SOMBRE OLD TREES LIKE GIGANTIC HEARSE PLUMES BLACK AND AWFUL +5142-36377-0002-1533: THE SOUND OF AN IMPERATIVE AND UNCOMPROMISING BELL RECALLED ME IN DUE TIME TO THE REGIONS OF REALITY +3570-5694-0013-933: THIS DIFFERENTIATION IS FURTHERED BY THE INHERITANCE OF WEALTH AND THE CONSEQUENT INHERITANCE OF GENTILITY +61-70968-0061-1742: YOU ARE A WORTHY LEECH WILL PRESENTLY WHISPERED ROBIN THE WINE HAS WORKED A MARVEL +1580-141083-0044-378: I DARE NOT GO SO FAR AS THAT BUT OF THE THREE HE IS PERHAPS THE LEAST UNLIKELY +7176-92135-0005-2192: BUT SUPPOSE YOU SAID I'M FOND OF WRITING MY PEOPLE ALWAYS SAY MY LETTERS HOME ARE GOOD ENOUGH FOR PUNCH +5683-32865-0017-1623: ALL THE TIME HE WAS TALKING TO ME HIS ANGRY LITTLE EYES WERE FOLLOWING LAKE +6829-68771-0023-1937: YOU SPEAK LIKE AN EDUCATED PERSON SAID BETH WONDERINGLY WHERE IS YOUR HOME +8455-210777-0017-2373: MY WIFE ON THE SPUR OF THE MOMENT MANAGED TO GIVE THE (GENTLEMEN->GENTLEMAN) A VERY GOOD DINNER +7021-85628-0012-2072: BUT YOU MUST NOT EAT WITH YOUR CAP ON YOUR HEAD SHE SAID AND WAS GOING TO TAKE IT OFF +7127-75947-0029-2147: THE YOUNG GIRLS HAD INDEED MADE THEMSELVES SMALL INDEED INVISIBLE +1320-122617-0026-318: WELL WHAT CAN'T BE DONE BY MAIN COURAGE IN WAR MUST BE DONE BY CIRCUMVENTION +1089-134686-0004-4: NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND +7729-102255-0005-2238: THIS WAS A FORMIDABLE ARRAY OF ADVANTAGES SLAVERY WAS PLAYING WITH LOADED DICE +61-70968-0017-1698: I COULD NOT SEE MY BOY INJURED EXCELLENCE FOR BUT DOING HIS DUTY AS ONE OF CUMBERLAND'S SONS +1320-122612-0001-276: THE DEWS WERE SUFFERED TO EXHALE AND THE SUN HAD DISPERSED THE MISTS AND WAS SHEDDING A STRONG AND CLEAR LIGHT IN THE FOREST WHEN THE (TRAVELERS->TRAVELLERS) RESUMED THEIR JOURNEY +1284-1181-0016-261: I AM NOT ALLOWED TO PERFORM MAGIC EXCEPT FOR MY OWN AMUSEMENT HE TOLD HIS VISITORS AS HE LIGHTED A PIPE WITH A CROOKED STEM AND BEGAN TO SMOKE +3570-5696-0007-966: THE USE OF THE WORD WASTE AS A TECHNICAL TERM THEREFORE IMPLIES NO DEPRECATION OF THE MOTIVES OR OF THE ENDS SOUGHT BY THE CONSUMER UNDER THIS CANON OF CONSPICUOUS WASTE +5639-40744-0016-1580: ON THE CONTRARY HE RESOLVED TO TELL THEM THAT REPENTING OF HIS VIOLENCE AND MOVED BY HER TEARS HE HAD ONLY CARRIED HER HALF WAY TOWARDS HIS HOUSE AND THEN LET HER GO +8463-294828-0003-2465: I WANTED NOTHING MORE THAN TO SEE MY COUNTRY AGAIN MY FRIENDS MY MODEST QUARTERS BY THE BOTANICAL GARDENS MY DEARLY BELOVED COLLECTIONS +3575-170457-0025-995: AGAIN I THANK YOU THIS INCIDENT I SUPPOSE WILL BE RENEWED NO MORE IF I LIVE TO BE AN OLD WOMAN I SHALL REMEMBER IT THIRTY YEARS HENCE AS A BRIGHT DREAM +7176-92135-0038-2225: A STAGE MEAL IS POPULAR BECAUSE IT PROVES TO THE AUDIENCE THAT THE ACTORS EVEN WHEN CALLED CHARLES (HAWTREY->HOULTREE) OR OWEN (NARES->NAYERS) ARE REAL PEOPLE JUST LIKE YOU AND ME +2094-142345-0005-516: SEVERAL CLOTHES HORSES A PILLION A SPINNING WHEEL AND AN OLD BOX WIDE OPEN AND STUFFED FULL OF COLOURED RAGS +1188-133604-0001-65: THEY UNITE EVERY QUALITY AND SOMETIMES YOU WILL FIND ME REFERRING TO THEM AS COLORISTS SOMETIMES AS CHIAROSCURISTS +5683-32879-0000-1655: IT WAS NOT VERY MUCH PAST ELEVEN THAT MORNING WHEN THE PONY CARRIAGE FROM BRANDON DREW UP BEFORE THE LITTLE GARDEN WICKET OF REDMAN'S FARM +672-122797-0020-1805: BUT THE TREE DID NOT REJOICE AT ALL HE GREW AND GREW AND WAS GREEN BOTH WINTER AND SUMMER +1580-141084-0023-411: IN A FEW HOURS THE EXAMINATION WOULD COMMENCE AND HE WAS STILL IN THE DILEMMA BETWEEN MAKING THE FACTS PUBLIC AND ALLOWING THE CULPRIT TO COMPETE FOR THE VALUABLE SCHOLARSHIP +237-126133-0007-621: BUT POLLY COULDN'T SPEAK AND IF JASPER HADN'T CAUGHT HER JUST IN TIME SHE WOULD HAVE TUMBLED OVER BACKWARD FROM THE STOOL PHRONSIE AND ALL +2300-131720-0034-606: THE OTHERS HAVING BEEN IN OPERATION TOO SHORT A TIME TO SHOW DEFINITE RESULTS ALTHOUGH THEY ALSO WENT QUICKLY TO A DIVIDEND BASIS +3570-5694-0015-935: SO MANY OF THEM HOWEVER AS MAKE UP THE RETAINER AND HANGERS ON OF THE PATRON MAY BE CLASSED AS VICARIOUS CONSUMER WITHOUT QUALIFICATION +2961-960-0001-875: THE INFLUENCE (WITH->WHICH) THE TIMAEUS HAS EXERCISED UPON POSTERITY IS DUE PARTLY TO A MISUNDERSTANDING +61-70968-0049-1730: HAVE YOUR WILL CHILD IF THE BOY ALSO WILLS IT MONTFICHET ANSWERED FEELING TOO ILL TO OPPOSE ANYTHING VERY STRONGLY JUST THEN +8455-210777-0019-2375: THEN THERE WERE THREE OR FOUR LEADING MEN OF THE COMMUNITY WITH THEIR WIVES WHO WERE FOR THE MOST PART THE FATHERS AND MOTHERS OF THE YOUNG LADIES +2094-142345-0034-545: AND THERE'S LINEN IN THE HOUSE AS I COULD WELL SPARE YOU FOR (I'VE->I) GOT LOTS (O->OF) SHEETING AND TABLE CLOTHING AND (TOWELLING->TOWELING) AS ISN'T MADE UP +1320-122617-0029-321: IF YOU ARE NOT THEN KNOCKED ON THE HEAD YOUR BEING A NON (COMPOSSER->COMPOSOR) WILL PROTECT YOU AND YOU'LL THEN HAVE (A->*) GOOD REASON TO EXPECT TO DIE IN YOUR BED +2961-960-0017-891: NOTHING CAN EXCEED THE BEAUTY OR ART OF (THE->*) INTRODUCTION IN WHICH HE IS USING WORDS AFTER HIS ACCUSTOMED MANNER +61-70970-0027-1771: ROBIN CAREFULLY DESCENDED THE LADDER AND FOUND HIMSELF SOON UPON FIRM ROCKY GROUND +2300-131720-0000-572: THE PARIS PLANT LIKE THAT AT THE CRYSTAL PALACE WAS A TEMPORARY EXHIBIT +7127-75947-0013-2131: I REMEMBER NOW AND I CONGRATULATE MYSELF DO YOU LOVE (ANY ONE->ANYONE) +7021-79740-0003-2042: TO GIVE AN IDEA OF THESE CONVERSATIONS I WILL REPORT ONE OF THEM IN FULL +4992-23283-0008-1352: HE SEEMED TO WAIT FOR HER REPLY BUT AS SHE MADE NONE HE PROCEEDED +61-70968-0000-1681: HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT +1188-133604-0043-107: SEE THAT YOUR (LIVES->LIES) BE IN NOTHING WORSE THAN A BOY'S CLIMBING FOR HIS ENTANGLED KITE +672-122797-0017-1802: SOMETHING BETTER SOMETHING STILL GRANDER MUST FOLLOW BUT WHAT +260-123286-0005-707: I AM NOT COMPLAINING THAT THE RATE IS SLOW BUT THAT THE SEA IS SO WIDE +908-31957-0002-2596: I DID NOT WRONG MYSELF SO BUT I PLACED A WRONG ON THEE +4446-2275-0041-1216: YOU SEE LOVING (SOME ONE->SOMEONE) AS I LOVE YOU MAKES THE WHOLE WORLD DIFFERENT +5683-32866-0027-1651: A COLD BRIGHT MOON WAS SHINING WITH CLEAR SHARP LIGHTS AND SHADOWS +672-122797-0046-1831: TIS NOW WINTER OUT OF DOORS THOUGHT THE TREE +6930-81414-0011-2012: A FEELING OF FREEDOM AND I WAS AWAKE WHERE +1580-141084-0034-422: WELL WELL DON'T TROUBLE TO ANSWER LISTEN AND SEE THAT I DO YOU NO INJUSTICE +7176-92135-0034-2221: THE DUCHESS OF SOUTHBRIDGE TO LORD REGGIE OH REGGIE WHAT DID YOU SAY +4446-2271-0013-1126: (DO->*) YOU KNOW I THOUGHT THE DANCE A BIT CONSCIOUS (TO NIGHT->TONIGHT) FOR THE FIRST TIME +5683-32879-0012-1667: THANK YOU RACHEL MY COUSIN RACHEL MY ONLY FRIEND +4992-41806-0009-1397: EXCLAIMED BILL HARMON TO HIS WIFE AS THEY WENT THROUGH THE LIGHTED HALL +61-70970-0013-1757: THERE WAS NO CHANCE TO ALTER HIS SLEEPING ROOM TO ONE NEARER TO GAMEWELL'S CHAMBER +5142-36377-0015-1546: OUR FIRST IMPRESSIONS OF PEOPLE ARE IN NINE CASES OUT OF TEN THE RIGHT IMPRESSIONS +1580-141083-0013-347: ABOVE ALL THINGS I DESIRE TO SETTLE THE MATTER QUIETLY AND DISCREETLY +908-157963-0018-2581: AND FEAREST THOU BECAUSE I VANISH AND AM SEEN NO MORE +5683-32865-0015-1621: I HAD A HORRID DREAM ABOUT HIM LAST NIGHT THAT +6829-68769-0016-1876: HE UNLOCKED THE DOOR AND CALLED HERE'S VISITORS TOM +6930-76324-0025-1997: WHY IT'S GOLIATH AS USUAL THEY BOTH CRIED PEERING IN +237-126133-0018-632: DON'T MIND IT POLLY WHISPERED JASPER TWASN'T HER FAULT +8555-284447-0020-2521: THE GOAT'S WARLIKE SPIRIT WAS ROUSED BY THIS SUCCESSFUL ATTACK +1089-134686-0032-32: HE IS CALLED AS YOU KNOW THE APOSTLE OF THE INDIES +6930-76324-0024-1996: THEY SAY ILLUMINATION BY (CANDLE LIGHT->CANDLELIGHT) IS THE PRETTIEST IN THE WORLD +4992-23283-0007-1351: TO ASK ANY MORE QUESTIONS OF YOU I BELIEVE WOULD BE UNFAIR +672-122797-0032-1817: CRIED THE YOUNG LADIES AND THEY QUICKLY PUT OUT THE FIRE +672-122797-0031-1816: IT BLAZED UP FAMOUSLY HELP HELP +4970-29095-0017-1322: THE (SIGHT SEERS->SIGHTSEERS) RETURNED IN HIGH SPIRITS FROM THE CITY +4507-16021-0040-1261: ONE THINKS ONE HEARS HYDRAS TALKING +6930-81414-0010-2011: A SOUND OF VOICES A FLASH OF LIGHT +6829-68771-0022-1936: I ATTEND TO THE HOUSEHOLD MENDING YOU KNOW AND CARE FOR THE LINEN +2961-961-0005-902: SOME POEMS OF SOLON WERE RECITED BY THE BOYS +4077-13754-0001-1097: BUT A WORD FURTHER CONCERNING THE EXPEDITION IN GENERAL +672-122797-0063-1848: THEN GOOD BYE SAID THE RATS AND THEY WENT HOME +6930-75918-0002-1953: CONGRATULATIONS WERE POURED IN UPON THE PRINCESS EVERYWHERE DURING HER JOURNEY +4992-23283-0010-1354: BUT IN SUCH A CASE MISS MILNER'S ELECTION OF A HUSBAND SHALL NOT DIRECT MINE +260-123440-0020-783: WE WON'T TALK ABOUT HER ANY MORE IF YOU'D RATHER NOT WE INDEED +1580-141083-0015-349: DID (ANYONE->ANY ONE) KNOW THAT THESE PROOFS WOULD BE THERE NO ONE SAVE THE PRINTER +2961-961-0021-918: BUT I WOULD NOT SPEAK AT THE TIME BECAUSE I WANTED TO REFRESH MY MEMORY +672-122797-0018-1803: REJOICE IN OUR PRESENCE SAID THE AIR AND THE SUNLIGHT +8463-294828-0015-2477: CONSEIL I CALLED A THIRD TIME CONSEIL APPEARED +1284-1180-0031-243: AT THE EMERALD CITY WHERE OUR PRINCESS OZMA LIVES GREEN IS THE POPULAR COLOR +2830-3980-0076-873: ON THE OTHER HAND WE ARE NOT TO REGARD THEM AS SO TERRIBLE THAT WE MUST DESPAIR +61-70968-0047-1728: MASTER MONCEUX THE SHERIFF OF NOTTINGHAM WAS MIGHTILY PUT ABOUT WHEN TOLD OF THE RIOTING +672-122797-0003-1788: BUT THIS WAS WHAT THE TREE COULD NOT BEAR TO HEAR +4992-41797-0019-1384: OLIVE'S MOURNFUL BLACK EYES MET NANCY'S SPARKLING BROWN ONES +5105-28240-0012-1429: NEVER MIND NOW INTERPOSED THE CAPTAIN WE WILL TALK OF THAT BY AND BY +3570-5696-0004-963: THE SALIENT FEATURES OF THIS DEVELOPMENT OF DOMESTIC SERVICE HAVE ALREADY BEEN INDICATED +3729-6852-0026-1053: YES BUT THE MERIDIAN OF THE PALAIS ROYAL IS THE MOST EXACT +5142-36377-0017-1548: THE ONLY CHEERFUL CONVERSATION WAS THE CONVERSATION ACROSS THE TABLE BETWEEN NAOMI AND ME +7021-79759-0003-2057: VAST IMPORTANCE AND INFLUENCE OF THIS MENTAL FURNISHING +4507-16021-0057-1278: PEOPLE SUFFER IN THE LIGHT EXCESS BURNS +2961-961-0006-903: AND WHAT WAS THE SUBJECT OF THE POEM SAID THE PERSON WHO MADE THE REMARK +5142-33396-0012-1474: THEN I WILL GET ME A FARM AND WILL WINTER IN THAT LAND NOW WHO WILL FOLLOW ME +260-123286-0007-709: HE CALLED THIS SEA A POND AND OUR LONG VOYAGE TAKING A LITTLE SAIL +6930-81414-0012-2013: SAID ANOTHER VOICE WHICH I RECOGNIZED AS VOLTAIRE'S (KAFFAR->CAFFER) +1188-133604-0014-78: DO NOT THEREFORE THINK THAT THE GOTHIC SCHOOL IS AN EASY ONE +1580-141083-0045-379: HE WAS STILL SUFFERING FROM THIS SUDDEN DISTURBANCE OF THE QUIET ROUTINE OF HIS LIFE +260-123288-0004-738: THE AIR IS HEAVY THE SEA IS CALM +61-70968-0032-1713: HE FELT FOR AND FOUND THE WIZARD'S BLACK CLOTH THE SQUIRE WAS QUITE OUT OF BREATH +6829-68769-0003-1863: IT WAS A DELIBERATE THEFT FROM HIS EMPLOYERS TO PROTECT A GIRL HE LOVED +1320-122617-0041-333: UNCAS CAST HIS SKIN AND STEPPED FORTH IN HIS OWN BEAUTIFUL PROPORTIONS +4446-2273-0005-1143: I HAVEN'T HAD A CHANCE YET TO TELL YOU WHAT A JOLLY LITTLE PLACE I THINK THIS IS +61-70970-0029-1773: FROM THE BLACKNESS BEHIND THE LIGHT THEY HEARD A VOICE WARRENTON'S +6829-68769-0033-1893: IT WAS BETTER FOR HIM TO THINK THE GIRL UNFEELING THAN TO KNOW THE TRUTH +1580-141084-0021-409: ON THE PALM WERE THREE LITTLE PYRAMIDS OF BLACK DOUGHY CLAY +4507-16021-0043-1264: THE EARTH IS NOT DEVOID OF RESEMBLANCE TO A JAIL +5683-32866-0014-1638: DON'T INSULT ME STANLEY BY TALKING AGAIN AS YOU DID THIS MORNING +2830-3980-0001-798: THEY SAID TO THE GALATIANS YOU HAVE NO RIGHT TO THINK HIGHLY OF PAUL +908-31957-0018-2612: BUT THOU ART NOT SUCH A LOVER MY BELOVED +5142-36377-0003-1534: AMBROSE MET ME AT THE BOTTOM OF THE STAIRS AND SHOWED ME THE WAY TO THE SUPPER ROOM +6930-81414-0027-2028: FOR SOME TIME AFTER THAT I REMEMBERED NOTHING DISTINCTLY +5105-28233-0005-1411: SOMETIMES HE WOULD WANDER ON FOOT UPON THE SANDY SHORE AND SOMETIMES HE WOULD ENJOY A RIDE ALONG THE SUMMIT OF THE CLIFF ALTOGETHER BEING IN NO HURRY AT ALL TO BRING HIS TASK TO AN END +7729-102255-0018-2251: LITTLE BY LITTLE HOWEVER THE LATTER BECAME HEMMED AND BOUND IN THE MESHES OF THE VARIOUS DEVICES AND PROCEEDINGS WHICH THE TERRITORIAL OFFICIALS EVOLVED FROM THE BOGUS LAWS +8455-210777-0045-2401: THEN THE REPUBLIC OF BRITANNULA WAS TO BE DECLARED AS NON EXISTENT AND THE BRITISH FLAG WAS TO BE EXALTED AND A BRITISH GOVERNOR INSTALLED IN THE EXECUTIVE CHAMBERS +3570-5694-0011-931: THE CUSTOM OF FESTIVE GATHERINGS PROBABLY ORIGINATED IN MOTIVES OF CONVIVIALITY AND RELIGION THESE MOTIVES ARE ALSO PRESENT IN THE LATER DEVELOPMENT BUT THEY DO NOT CONTINUE TO BE THE SOLE MOTIVES +3729-6852-0040-1067: FOR THE FIRST DAY SIR I THINK THAT WHAT YOU HAVE DONE GIVES GREAT HOPES OF YOU AND WITHOUT ANY DOUBT YOU WILL MAKE RAPID PROGRESS +3570-5695-0004-947: IF BEAUTY OR COMFORT IS ACHIEVED AND IT IS A MORE OR LESS FORTUITOUS CIRCUMSTANCE IF THEY ARE THEY MUST BE ACHIEVED BY MEANS AND METHODS THAT COMMEND THEMSELVES TO THE GREAT ECONOMIC LAW OF WASTED EFFORT +2961-960-0013-887: IT IS PROBABLE THAT THE RELATION OF THE IDEAS TO GOD OR OF GOD TO THE WORLD WAS DIFFERENTLY CONCEIVED BY HIM AT DIFFERENT TIMES OF HIS LIFE +4077-13751-0008-1082: IT IS NOTABLE THAT THE INDIAN TRIBES HAVE GENERALLY REGARDED THE RELIGION OF THE LATTER DAY SAINTS WITH FAVOR SEEING IN THE BOOK OF MORMON STRIKING AGREEMENT WITH THEIR OWN TRADITIONS +1320-122612-0012-287: EXTINGUISHED BRANDS WERE LYING AROUND A SPRING THE OFFALS OF A DEER WERE SCATTERED ABOUT THE PLACE AND THE TREES BORE EVIDENT MARKS OF HAVING BEEN BROWSED BY THE HORSES +7176-88083-0018-2177: BUT THIS FREQUENTER OF THE HEIGHTS OF AIR FOR ALL HIS SAVAGE VALOR WAS TROUBLED AT THE LEAPING WAVES AND THE TOSSING FOAM OF THESE MAD RAPIDS HE DID NOT UNDERSTAND THEM +7127-75946-0014-2102: THE QUEENS HAD TAKEN THEIR SEATS UPON A MAGNIFICENT (DIAS->DAIS) OR PLATFORM ERECTED UPON THE BORDERS OF THE LAKE IN A (THEATER->THEATRE) OF WONDERFUL ELEGANCE OF CONSTRUCTION +7127-75947-0014-2132: FORGIVE ME I HARDLY KNOW WHAT I AM SAYING A THOUSAND TIMES FORGIVE ME MADAME WAS RIGHT QUITE RIGHT THIS BRUTAL EXILE HAS COMPLETELY TURNED MY BRAIN +6930-75918-0016-1967: YES I NEED REPOSE MANY THINGS HAVE AGITATED ME TO DAY BOTH IN MIND AND BODY WHEN YOU RETURN TO MORROW I SHALL NO LONGER BE THE SAME MAN +1320-122617-0010-302: THE TASK WILL NOT BE DIFFICULT RETURNED DAVID HESITATING THOUGH I GREATLY FEAR YOUR PRESENCE WOULD RATHER INCREASE THAN MITIGATE HIS UNHAPPY FORTUNES +2300-131720-0030-602: THE PRINCIPLE EMPLOYED IN THE EDISON ELECTROLYTIC (METER->METRE) IS THAT WHICH EXEMPLIFIES THE POWER OF ELECTRICITY TO DECOMPOSE A CHEMICAL SUBSTANCE +5683-32866-0013-1637: THERE WAS A BRIGHT MOONLIGHT BROKEN BY THE SHADOWS OF OVERHANGING BOUGHS AND WITHERED LEAVES AND THE MOTTLED LIGHTS AND SHADOWS GLIDED ODDLY ACROSS HIS PALE FEATURES +8555-292519-0005-2552: WHILE THE OLD GOLD (AND->IN) THE MARBLE STAYS (FOREVER->FOR EVER) GLEAMING ITS SOFT STRONG BLAZE CALM IN THE EARLY EVENING GLOW +1221-135766-0012-183: BROODING OVER ALL THESE MATTERS THE MOTHER FELT LIKE ONE WHO HAS EVOKED A SPIRIT BUT BY SOME IRREGULARITY IN THE PROCESS OF CONJURATION HAS FAILED TO WIN THE MASTER WORD THAT SHOULD CONTROL THIS NEW AND INCOMPREHENSIBLE INTELLIGENCE +1188-133604-0015-79: THE LAW OF THAT SCHOOL IS THAT EVERYTHING SHALL BE SEEN CLEARLY OR AT LEAST ONLY IN SUCH MIST OR FAINTNESS AS SHALL BE DELIGHTFUL AND I HAVE NO DOUBT THAT THE BEST INTRODUCTION TO IT WOULD BE THE ELEMENTARY PRACTICE OF PAINTING EVERY STUDY ON A GOLDEN GROUND +8463-287645-0005-2432: A FEW YEARS BACK ONE OF THEIR SLAVES A COACHMAN WAS KEPT ON THE COACH BOX ONE COLD NIGHT WHEN THEY WERE OUT AT A BALL UNTIL HE BECAME ALMOST FROZEN TO DEATH IN FACT HE DID DIE IN THE INFIRMARY FROM THE EFFECTS OF THE FROST ABOUT ONE WEEK AFTERWARDS +1089-134686-0018-18: IT WAS STRANGE TOO THAT HE FOUND AN ARID PLEASURE IN FOLLOWING UP TO THE END THE RIGID LINES OF THE DOCTRINES OF THE CHURCH AND PENETRATING INTO OBSCURE SILENCES ONLY TO HEAR AND FEEL THE MORE DEEPLY HIS OWN CONDEMNATION +8455-210777-0061-2417: YOU WILL CARRY OUT WITH YOU ONE HUNDRED MEN OF THE NORTH (NORTH WEST->NORTHWEST) BIRMINGHAM REGIMENT WHICH WILL PROBABLY SUFFICE FOR YOUR OWN SECURITY AS IT IS THOUGHT THAT IF MISTER NEVERBEND BE WITHDRAWN THE PEOPLE WILL REVERT EASILY TO THEIR OLD HABITS OF OBEDIENCE +2094-142345-0002-513: FOR IT IS A SOLID HEAVY HANDSOME DOOR AND MUST ONCE HAVE BEEN IN THE HABIT OF SHUTTING WITH A SONOROUS BANG BEHIND (A->THE) LIVERIED LACKEY WHO HAD JUST SEEN HIS MASTER AND MISTRESS OFF THE GROUNDS IN A CARRIAGE AND PAIR +908-31957-0004-2598: SHALL I NEVER MISS HOME TALK AND BLESSING AND THE COMMON KISS THAT COMES TO EACH IN TURN NOR COUNT IT STRANGE WHEN I LOOK UP TO DROP ON A NEW RANGE OF WALLS AND FLOORS ANOTHER HOME THAN THIS +2300-131720-0001-573: THE LONDON PLANT WAS LESS TEMPORARY BUT NOT PERMANENT SUPPLYING BEFORE IT WAS TORN OUT NO FEWER THAN THREE THOUSAND LAMPS IN HOTELS CHURCHES STORES AND DWELLINGS IN THE VICINITY OF HOLBORN (VIADUCT->VIADUC) +121-127105-0020-154: WHO WAS IT SHE WAS IN LOVE WITH THE STORY WILL TELL I TOOK UPON MYSELF TO REPLY OH I CAN'T WAIT FOR THE STORY THE STORY WON'T TELL SAID DOUGLAS NOT IN ANY LITERAL VULGAR WAY MORE'S THE PITY THEN +121-127105-0035-169: SHE PROMISED TO DO THIS AND SHE MENTIONED TO ME THAT WHEN FOR A MOMENT DISBURDENED DELIGHTED HE HELD HER HAND THANKING HER FOR THE SACRIFICE SHE ALREADY FELT REWARDED +908-157963-0004-2567: (THEL->FELL) IS LIKE A (WATRY->WATERY) BOW AND LIKE A PARTING CLOUD LIKE A REFLECTION IN A GLASS LIKE SHADOWS IN THE WATER LIKE DREAMS OF INFANTS LIKE A SMILE UPON AN (INFANTS->INFANT'S) FACE +1089-134686-0019-19: THE SENTENCE OF SAINT JAMES WHICH SAYS THAT HE WHO OFFENDS AGAINST ONE COMMANDMENT BECOMES GUILTY OF ALL HAD SEEMED TO HIM FIRST A SWOLLEN PHRASE UNTIL HE HAD BEGUN TO GROPE IN THE DARKNESS OF HIS OWN STATE +5142-36600-0001-1563: IN DETERMINING WHETHER TWO OR MORE ALLIED FORMS OUGHT TO BE RANKED AS SPECIES OR VARIETIES NATURALISTS ARE PRACTICALLY GUIDED BY THE FOLLOWING CONSIDERATIONS NAMELY THE AMOUNT OF DIFFERENCE BETWEEN THEM AND WHETHER SUCH DIFFERENCES RELATE TO FEW OR MANY POINTS OF STRUCTURE AND WHETHER THEY ARE OF PHYSIOLOGICAL IMPORTANCE BUT MORE ESPECIALLY WHETHER THEY ARE CONSTANT +3570-5695-0005-948: THE MAN OF THE HOUSEHOLD ALSO CAN DO SOMETHING IN THIS DIRECTION AND INDEED HE COMMONLY DOES BUT WITH A STILL LOWER DESCENT INTO THE LEVELS OF INDIGENCE ALONG THE MARGIN OF THE SLUMS THE MAN AND PRESENTLY ALSO THE CHILDREN VIRTUALLY CEASE TO CONSUME VALUABLE GOODS FOR APPEARANCES AND THE WOMAN REMAINS VIRTUALLY THE SOLE EXPONENT OF THE HOUSEHOLD'S PECUNIARY DECENCY +1089-134691-0011-49: THEIR PIETY WOULD BE LIKE THEIR NAMES LIKE THEIR FACES LIKE THEIR CLOTHES AND IT WAS IDLE FOR HIM TO TELL HIMSELF THAT THEIR HUMBLE AND CONTRITE HEARTS IT MIGHT BE PAID A FAR RICHER TRIBUTE OF DEVOTION THAN HIS HAD EVER BEEN A GIFT TENFOLD MORE ACCEPTABLE THAN HIS ELABORATE ADORATION +7127-75946-0000-2088: AT THE CONCLUSION OF THE BANQUET WHICH WAS SERVED AT FIVE O'CLOCK THE KING ENTERED HIS CABINET WHERE HIS TAILORS WERE AWAITING HIM FOR THE PURPOSE OF TRYING ON THE CELEBRATED COSTUME REPRESENTING SPRING WHICH WAS THE RESULT OF SO MUCH IMAGINATION AND HAD (COST->CAUSED) SO MANY EFFORTS OF THOUGHT TO THE DESIGNERS AND ORNAMENT WORKERS OF THE COURT +7127-75947-0000-2118: EVERY ONE COULD OBSERVE HIS AGITATION AND PROSTRATION A PROSTRATION WHICH WAS INDEED THE MORE REMARKABLE SINCE PEOPLE WERE NOT ACCUSTOMED TO SEE HIM WITH HIS ARMS HANGING LISTLESSLY BY HIS SIDE HIS HEAD BEWILDERED AND HIS EYES WITH ALL THEIR BRIGHT INTELLIGENCE (BEDIMMED->BE DIMMED) +4077-13754-0002-1098: IT WAS THROUGH FLOYD'S ADVICE (THAT->THE) BUCHANAN ORDERED THE MILITARY EXPEDITION TO UTAH OSTENSIBLY TO INSTALL CERTAIN FEDERAL OFFICIALS AND TO REPRESS AN ALLEGED INFANTILE REBELLION WHICH IN FACT HAD NEVER COME INTO EXISTENCE BUT IN REALITY TO FURTHER THE INTERESTS OF THE SECESSIONISTS +121-123852-0000-124: THOSE PRETTY WRONGS THAT LIBERTY COMMITS WHEN I AM SOMETIME ABSENT FROM THY HEART THY BEAUTY AND THY YEARS FULL WELL BEFITS FOR STILL TEMPTATION FOLLOWS WHERE THOU ART +8230-279154-0001-2313: WHAT IS CALLED PERCEPTION DIFFERS FROM SENSATION BY THE FACT THAT THE SENSATIONAL INGREDIENTS BRING UP HABITUAL ASSOCIATES IMAGES AND EXPECTATIONS OF THEIR USUAL (CORRELATES->CORELETS) ALL OF WHICH ARE SUBJECTIVELY INDISTINGUISHABLE FROM THE SENSATION +2300-131720-0031-603: ASSOCIATED WITH THIS SIMPLE FORM OF APPARATUS WERE VARIOUS INGENIOUS DETAILS AND REFINEMENTS TO SECURE REGULARITY OF OPERATION FREEDOM FROM INACCURACY AND IMMUNITY FROM SUCH TAMPERING AS WOULD PERMIT THEFT OF CURRENT OR DAMAGE +4077-13754-0004-1100: ALREADY A NORTH AND A SOUTH WERE TALKED OF WHY NOT SET UP ALSO A WEST +5142-36377-0018-1549: HE LOOKED UP AT (NAOMI->NAROWMY) DOUBTINGLY FROM HIS PLATE AND LOOKED DOWN AGAIN SLOWLY WITH A FROWN +260-123288-0006-740: THE ATMOSPHERE IS EVIDENTLY CHARGED AND SURCHARGED WITH ELECTRICITY +5683-32866-0016-1640: MARK MY WORDS YOU'LL FIND HIM TOO STRONG FOR YOU (AYE->AY) AND TOO DEEP +672-122797-0050-1835: THEY SNUFFED ABOUT THE FIR TREE AND RUSTLED AMONG THE BRANCHES +5683-32866-0030-1654: A LITTLE BIT OF PLASTER TUMBLED DOWN THE CHIMNEY AND STARTLED ME CONFOUNDEDLY +1284-1180-0003-215: FOR A LONG TIME HE HAD WISHED TO EXPLORE THE BEAUTIFUL LAND OF OZ IN WHICH THEY LIVED +3575-170457-0010-980: I AM NOT DEPRECIATING IT WHEN I SAY THAT IN THESE TIMES IT IS NOT RARE +1221-135766-0014-185: PEARL SAW AND GAZED INTENTLY BUT NEVER SOUGHT TO MAKE ACQUAINTANCE +4970-29095-0035-1340: AND IF I HAD A FORTUNE WOULD THEE WANT ME TO LEAD A USELESS LIFE +7127-75947-0032-2150: YES BUT PERHAPS I FRIGHTENED HER IN WHAT WAY +121-127105-0006-140: THE OTHERS RESENTED POSTPONEMENT BUT IT WAS JUST HIS SCRUPLES THAT CHARMED ME +7176-88083-0005-2164: ONCE FAIRLY (A WING->AWING) HOWEVER HE WHEELED AND MADE BACK HURRIEDLY FOR HIS PERCH +6930-76324-0012-1984: WE'LL COME IN HERE THIS AFTERNOON WITH OLD CLOTHES ON AND HAVE A REGULAR HOUSE CLEANING +4970-29095-0005-1310: THY WAYS GREATLY TRY ME RUTH AND ALL THY RELATIONS +908-31957-0005-2599: ALAS I HAVE GRIEVED SO I AM HARD TO LOVE +8463-294828-0032-2494: IN PERSON WELCOME ABOARD PROFESSOR YOUR CABIN IS WAITING FOR YOU +61-70970-0016-1760: WE WILL GO OUT TOGETHER TO THE BOWER THERE IS A WAY DOWN TO THE COURT FROM MY WINDOW +237-126133-0021-635: SHE ASKED IMPULSIVELY I DIDN'T BELIEVE YOU COULD PERSUADE HER FATHER +61-70968-0003-1684: HE WAS LIKE UNTO MY FATHER IN A WAY AND YET WAS NOT MY FATHER +6930-76324-0013-1985: IT CAN'T HURT ANYTHING I'M SURE FOR WE WON'T DISTURB THINGS AT ALL +7176-88083-0006-2165: IT MIGHT HAVE SEEMED THAT A TROUT OF THIS SIZE WAS A FAIRLY SUBSTANTIAL MEAL +4970-29093-0014-1295: WELL I'M GOING AS AN ENGINEER YOU (CAN->COULD) GO AS ONE +1580-141083-0016-350: I WAS IN SUCH A HURRY TO COME TO YOU YOU LEFT YOUR DOOR OPEN +1188-133604-0031-95: THERE'S ONE AND THERE'S ANOTHER THE DUDLEY AND THE FLINT +61-70968-0034-1715: A MONTFICHET A MONTFICHET GAMEWELL TO THE RESCUE +237-134500-0006-665: JUST SMELL THE WILD ROSES THEY ARE ALWAYS SO SPICY AFTER A RAIN +4992-23283-0011-1355: IF SHE DOES NOT KNOW HOW TO ESTIMATE HER OWN VALUE I DO +3729-6852-0043-1070: I RESIDE IN THE MARAIS RUE (DE DOUZE PORTES->DES DUSPORT) +3570-5696-0006-965: AS USED IN THE SPEECH OF EVERYDAY LIFE THE WORD CARRIES AN UNDERTONE OF DEPRECATION +121-127105-0036-170: BUT WAS THAT ALL HER REWARD ONE OF THE LADIES ASKED +672-122797-0019-1804: REJOICE IN THY OWN FRESH YOUTH +5105-28241-0003-1445: STEAM UP AND CANVAS SPREAD THE SCHOONER STARTED EASTWARDS +1284-1181-0000-245: OJO EXAMINED THIS CURIOUS CONTRIVANCE WITH WONDER +61-70968-0004-1685: ALSO THERE WAS A STRIPLING PAGE WHO TURNED INTO A MAID +8230-279154-0032-2344: IT IS THIS THAT IS OF INTEREST TO THEORY OF KNOWLEDGE +4507-16021-0029-1250: TO THIS WE REPLY IN ONE WORD ONLY +4992-41797-0005-1370: DONE HE AIN'T DONE A (THING HE'D->THANK HE) OUGHTER (SENCE->SINCE) HE WAS BORN +5142-33396-0043-1505: THEIR EYES DANCED BIG (THORLEIF->TOAR LEAF) STOOD UP AND STRETCHED HIMSELF +5142-33396-0028-1490: ON A BENCH IN A FAR CORNER WERE A DOZEN PEOPLE HUDDLED TOGETHER +3570-5694-0014-934: MANY OF THESE AFFILIATED GENTLEMEN OF LEISURE ARE AT THE SAME TIME LESSER MEN OF SUBSTANCE IN THEIR OWN RIGHT SO THAT SOME OF THEM ARE SCARCELY AT ALL OTHERS ONLY PARTIALLY TO BE RATED AS VICARIOUS CONSUMERS +8463-294825-0006-2448: HIS SPECIFICATIONS FOR AN OPEN SEA SUBMARINE AND A SELF (CONTAINED->CONTAINING) DIVING SUIT WERE DECADES BEFORE THEIR TIME YET MODERN TECHNOLOGY BEARS THEM OUT TRIUMPHANTLY +7729-102255-0020-2253: THE INCIDENT WAS NOT VIOLENT NOR EVEN DRAMATIC NO POSSE WAS SUMMONED NO FURTHER EFFORT MADE AND (REEDER->READER) FEARING PERSONAL VIOLENCE SOON FLED IN DISGUISE +3729-6852-0041-1068: I BELIEVE IT SIR AND THAT IS WHAT I FEAR THEREFORE THE PRINCIPAL OBJECT OF MY VISIT HERE IS TO DEVOTE MYSELF ENTIRELY TO THE STUDY OF THE FRENCH LANGUAGE +1188-133604-0000-64: YOU WILL FIND ME CONTINUALLY SPEAKING OF FOUR MEN TITIAN HOLBEIN TURNER AND TINTORET IN ALMOST THE SAME TERMS +1284-1180-0017-229: AT ONE END STOOD A GREAT FIREPLACE IN WHICH A BLUE LOG WAS BLAZING WITH A BLUE FLAME AND OVER THE FIRE HUNG FOUR KETTLES IN A ROW ALL BUBBLING AND STEAMING AT A GREAT RATE +8230-279154-0016-2328: IN ACTUAL FACT THERE ARE DOUBTLESS VARIOUS FACTORS THAT CONCUR IN GIVING US THE FEELING OF GREATER OR LESS REMOTENESS IN SOME REMEMBERED EVENT +4507-16021-0013-1234: NOTHING IS MORE LUGUBRIOUS THAN THE CONTEMPLATION THUS IN ITS NUDITY IN THE BROAD LIGHT OF THOUGHT OF THE HORRIBLE SWARMING OF SLANG +4507-16021-0042-1263: IT IS BLACK IN MISFORTUNE IT IS BLACKER STILL IN CRIME THESE TWO BLACKNESSES AMALGAMATED COMPOSE SLANG +4446-2271-0016-1129: HE WAS BEGINNING TO FEEL A KEEN INTEREST IN THE SLENDER BAREFOOT DONKEY GIRL WHO SLIPPED IN AND OUT OF THE PLAY SINGING LIKE SOME ONE WINDING THROUGH A HILLY FIELD +8555-292519-0006-2553: THE PLEASANT GRAVEYARD OF MY SOUL WITH SENTIMENTAL CYPRESS TREES AND FLOWERS IS FILLED THAT I MAY STROLL IN MEDITATION AT MY EASE +1284-134647-0006-273: BISHOPS VIRGINS AND EVEN SPOTLESS INFANTS WERE SUBJECTED TO THE DISGRACE OF A PUBLIC PENANCE BEFORE THEY COULD BE ADMITTED TO THE COMMUNION OF THE DONATISTS +1580-141083-0029-363: HE WAS IN THE MIDST OF THAT WHEN YOUR RETURN CAUSED HIM TO MAKE A VERY HURRIED RETREAT VERY HURRIED SINCE HE HAD NOT TIME TO REPLACE THE PAPERS WHICH WOULD TELL YOU THAT HE HAD BEEN THERE +4970-29095-0003-1308: I HOPE THEE TOLD THE ELDERS THAT FATHER AND I ARE RESPONSIBLE FOR THE PIANO AND THAT MUCH AS THEE LOVES MUSIC THEE IS NEVER IN THE ROOM WHEN IT IS PLAYED +8555-284449-0012-2538: I'LL GLADLY DO THAT PROMISED THE NEW BOOLOOROO AND I'LL FEED THE HONORABLE GOAT ALL THE SHAVINGS AND LEATHER AND TIN CANS HE CAN EAT BESIDES THE GRASS +908-157963-0020-2583: TILL WE ARISE (LINK'D->LINKED) IN A GOLDEN BAND AND NEVER PART BUT WALK UNITED BEARING FOOD TO ALL OUR TENDER FLOWERS +1320-122617-0011-303: THE LODGE IN WHICH UNCAS WAS CONFINED WAS IN THE VERY CENTER OF THE VILLAGE AND IN A SITUATION PERHAPS MORE DIFFICULT THAN ANY OTHER TO APPROACH OR LEAVE WITHOUT OBSERVATION +1320-122612-0000-275: SINCE THE PERIOD OF OUR TALE THE ACTIVE SPIRIT OF THE COUNTRY HAS SURROUNDED IT WITH A BELT OF RICH AND THRIVING SETTLEMENTS THOUGH NONE BUT THE HUNTER OR THE SAVAGE IS EVER KNOWN EVEN NOW TO PENETRATE ITS WILD RECESSES +3570-5696-0005-964: THROUGHOUT THE ENTIRE (EVOLUTION->REVOLUTION) OF CONSPICUOUS EXPENDITURE WHETHER OF GOODS OR OF SERVICES OR HUMAN LIFE RUNS THE OBVIOUS IMPLICATION THAT IN ORDER TO EFFECTUALLY MEND THE CONSUMER'S GOOD FAME IT MUST BE AN EXPENDITURE OF SUPERFLUITIES +7021-79759-0005-2059: THE PAIN PRODUCED BY AN ACT OF HASTY AND ANGRY VIOLENCE TO WHICH A FATHER SUBJECTS HIS SON MAY SOON PASS AWAY BUT THE MEMORY OF IT DOES NOT PASS AWAY WITH THE PAIN +5105-28241-0017-1459: AFTER PONDERING (AWHILE->A WHILE) HE SAID IF WE WERE FARTHER AWAY I SHOULD EXPECT TO FIND A DEPTH OF TWO OR THREE HUNDRED FATHOMS INSTEAD OF FIVE FATHOMS FIVE FATHOMS +3575-170457-0039-1009: THE CHRISTMAS HOLIDAYS CAME AND SHE AND ANNE RETURNED TO THE PARSONAGE AND TO THAT HAPPY HOME CIRCLE IN WHICH ALONE THEIR NATURES EXPANDED AMONGST ALL OTHER PEOPLE THEY SHRIVELLED UP MORE OR LESS +260-123288-0005-739: FROM TIME TO TIME A FLEECY TUFT OF MIST WITH YET SOME GLEAMING LIGHT LEFT UPON IT DROPS DOWN UPON THE DENSE FLOOR OF GREY AND LOSES ITSELF IN THE OPAQUE AND IMPENETRABLE MASS +2300-131720-0018-590: HE SOON FORESAW THAT STILL GREATER ECONOMY WOULD BE NECESSARY FOR COMMERCIAL SUCCESS NOT ALONE FOR THE LARGER TERRITORY OPENING BUT FOR THE COMPACT (DISTRICTS->DISTRICT) OF LARGE CITIES +4446-2271-0017-1130: ONE NIGHT WHEN HE AND (WINIFRED->WINNIFRED) WERE SITTING TOGETHER ON THE BRIDGE HE TOLD HER THAT THINGS HAD HAPPENED WHILE HE WAS STUDYING ABROAD THAT HE WAS SORRY FOR ONE THING IN PARTICULAR AND HE ASKED HER WHETHER SHE THOUGHT SHE OUGHT TO KNOW ABOUT THEM +3575-170457-0024-994: I DON'T ALWAYS SUCCEED FOR SOMETIMES WHEN I'M TEACHING OR SEWING I WOULD RATHER BE READING OR WRITING BUT I (TRY->TRIED) TO DENY MYSELF AND MY FATHER'S APPROBATION AMPLY REWARDED ME FOR THE PRIVATION +3570-5694-0000-920: BUT ALREADY AT A POINT IN ECONOMIC EVOLUTION FAR ANTEDATING THE EMERGENCE OF THE LADY (SPECIALISED->SPECIALIZED) CONSUMPTION OF GOODS AS AN EVIDENCE OF PECUNIARY STRENGTH HAD BEGUN TO WORK OUT IN A MORE OR LESS ELABORATE SYSTEM +1284-1180-0018-230: IT TAKES ME SEVERAL YEARS TO MAKE THIS MAGIC POWDER BUT AT THIS MOMENT I AM PLEASED TO SAY IT IS NEARLY DONE YOU SEE I AM MAKING IT FOR MY GOOD WIFE MARGOLOTTE WHO WANTS TO USE SOME OF IT FOR A PURPOSE OF HER OWN +1221-135767-0013-200: LIFTING THE IRON HAMMER THAT HUNG AT THE PORTAL HESTER PRYNNE GAVE A SUMMONS WHICH WAS ANSWERED BY ONE OF THE GOVERNOR'S BOND (SERVANT->SERVANTS) A FREE BORN ENGLISHMAN BUT NOW A SEVEN YEARS SLAVE +3575-170457-0055-1025: STILL HER HEART HAD RECEIVED A SHOCK IN THE PERCEPTION OF ANNE'S DELICACY AND ALL THESE HOLIDAYS SHE WATCHED OVER HER WITH THE LONGING FOND ANXIETY WHICH IS SO FULL OF SUDDEN PANGS OF FEAR +2300-131720-0032-604: THE STANDARD EDISON METER PRACTICE WAS TO REMOVE THE CELLS ONCE A MONTH TO THE METER ROOM OF THE CENTRAL STATION COMPANY FOR EXAMINATION ANOTHER SET BEING SUBSTITUTED +4077-13751-0011-1085: SOON THOUSANDS OF CONVERTS HAD RENTED OR PURCHASED HOMES IN MISSOURI INDEPENDENCE JACKSON COUNTY BEING THEIR (CENTER->CENTRE) BUT FROM THE FIRST THEY WERE UNPOPULAR AMONG THE (MISSOURIANS->MISSOURIIANS) +7127-75946-0016-2104: THE SEASONS ALLIES OF SPRING FOLLOWED HIM CLOSELY TO FORM A QUADRILLE WHICH AFTER MANY WORDS OF MORE OR LESS FLATTERING IMPORT WAS THE COMMENCEMENT OF THE DANCE +5639-40744-0015-1579: THIS PERSON WAS (RODOLFO->RUDOLPHO) WHO THOUGH HE HAD GONE TO LOOK FOR HIS FRIENDS HAD CHANGED HIS MIND IN THAT RESPECT NOT THINKING IT ADVISABLE TO ACQUAINT THEM WITH WHAT HAD PASSED BETWEEN HIM AND THE GIRL +5105-28241-0002-1444: THE LATE ASTOUNDING EVENTS HOWEVER HAD RENDERED PROCOPE MANIFESTLY UNEASY AND NOT THE LESS SO FROM HIS CONSCIOUSNESS THAT THE COUNT SECRETLY PARTOOK OF HIS OWN ANXIETY +6930-75918-0018-1969: THE NIGHT WAS CLEAR (STARLIT->STARLET) AND SPLENDID THE TEMPEST HAD PASSED AWAY AND THE SWEET INFLUENCES OF THE EVENING HAD RESTORED LIFE PEACE AND SECURITY EVERYWHERE +1320-122617-0013-305: DELIVERED IN A STRONG TONE OF ASSENT ANNOUNCED THE GRATIFICATION THE SAVAGE WOULD RECEIVE IN WITNESSING SUCH AN EXHIBITION OF WEAKNESS IN AN ENEMY SO LONG HATED AND SO MUCH FEARED +5105-28233-0008-1414: WHEN A PRIVATE IN THE EIGHTH CAVALRY HE HAD BEEN ON THE POINT OF QUITTING THE ARMY AT TWENTY EIGHT YEARS OF AGE BUT UNEXPECTEDLY HE HAD BEEN APPOINTED ORDERLY TO CAPTAIN SERVADAC +8455-210777-0003-2359: AS I SPOKE I MADE HIM A GRACIOUS BOW AND I THINK I SHOWED HIM BY MY MODE OF ADDRESS THAT I DID NOT BEAR ANY GRUDGE AS TO MY INDIVIDUAL SELF +2830-3980-0048-845: HE CAME DOWN TO EARTH LIVED AMONG MEN SUFFERED WAS CRUCIFIED AND THEN HE DIED STANDING CLEARLY BEFORE US SO THAT OUR HEARTS AND EYES MAY FASTEN UPON HIM +1580-141083-0001-335: I HAD ALWAYS KNOWN HIM TO BE RESTLESS IN HIS MANNER BUT ON THIS PARTICULAR OCCASION HE WAS IN SUCH A STATE OF UNCONTROLLABLE AGITATION THAT IT WAS CLEAR SOMETHING VERY UNUSUAL HAD OCCURRED +672-122797-0004-1789: IN WINTER WHEN THE SNOW LAY GLITTERING ON THE GROUND A HARE WOULD OFTEN COME LEAPING ALONG AND JUMP RIGHT OVER THE LITTLE TREE +908-157963-0021-2584: LIVES NOT ALONE NOR (OR->OF) ITSELF FEAR NOT AND I WILL CALL THE WEAK WORM FROM ITS LOWLY BED AND THOU SHALT HEAR ITS VOICE +1995-1826-0001-440: THE SOUTH SHE HAD NOT THOUGHT OF SERIOUSLY AND YET KNOWING OF ITS DELIGHTFUL HOSPITALITY AND MILD CLIMATE SHE WAS NOT AVERSE TO CHARLESTON OR NEW ORLEANS +6829-68771-0010-1924: THE FAIRVIEW BAND WAS ENGAGED TO DISCOURSE AS MUCH HARMONY AS IT COULD PRODUCE AND THE RESOURCES OF THE GREAT HOUSE WERE TAXED TO ENTERTAIN THE GUESTS +3570-5695-0007-950: THERE IS NO CLASS (AND->IN) NO COUNTRY THAT HAS YIELDED SO ABJECTLY BEFORE THE PRESSURE OF PHYSICAL WANT AS TO DENY THEMSELVES ALL GRATIFICATION OF THIS HIGHER OR SPIRITUAL NEED +1089-134686-0005-5: THE MUSIC CAME NEARER AND HE RECALLED THE WORDS THE WORDS OF SHELLEY'S FRAGMENT UPON THE MOON WANDERING COMPANIONLESS PALE FOR WEARINESS +4970-29095-0004-1309: I HEARD FATHER TELL COUSIN ABNER THAT HE WAS WHIPPED SO OFTEN FOR WHISTLING WHEN HE WAS A BOY THAT HE WAS DETERMINED TO HAVE WHAT COMPENSATION HE COULD GET NOW +2961-960-0000-874: HE PASSES ABRUPTLY FROM PERSONS TO IDEAS AND NUMBERS AND FROM IDEAS AND NUMBERS TO PERSONS FROM THE HEAVENS TO MAN FROM ASTRONOMY TO PHYSIOLOGY HE CONFUSES OR RATHER DOES NOT DISTINGUISH SUBJECT AND OBJECT FIRST AND FINAL CAUSES AND IS DREAMING OF GEOMETRICAL FIGURES LOST IN A FLUX OF SENSE +2961-961-0022-919: THEN NOW LET ME EXPLAIN TO YOU THE ORDER OF OUR ENTERTAINMENT FIRST TIMAEUS WHO IS A NATURAL PHILOSOPHER WILL SPEAK OF THE ORIGIN OF THE WORLD GOING DOWN TO THE CREATION OF (MAN->MEN) AND THEN I SHALL RECEIVE THE MEN WHOM HE HAS CREATED AND SOME OF WHOM WILL HAVE BEEN EDUCATED BY YOU AND (INTRODUCE->INTRODUCED) THEM TO YOU AS THE LOST ATHENIAN CITIZENS OF WHOM THE EGYPTIAN (RECORD->RECORDS) SPOKE +7021-79759-0004-2058: WITHOUT GOING TO ANY SUCH EXTREME AS THIS WE CAN EASILY SEE ON REFLECTION HOW VAST AN INFLUENCE ON THE IDEAS AND CONCEPTIONS AS WELL AS ON THE PRINCIPLES OF ACTION IN MATURE YEARS MUST BE EXERTED BY THE NATURE AND CHARACTER OF THE IMAGES WHICH THE PERIOD OF INFANCY AND CHILDHOOD (IMPRESSES->IMPRESS) UPON THE MIND +6930-75918-0003-1954: FROM THE RESPECT PAID HER ON ALL SIDES SHE SEEMED LIKE A QUEEN AND FROM THE ADORATION WITH WHICH SHE WAS TREATED BY TWO OR THREE SHE APPEARED AN OBJECT OF WORSHIP THE QUEEN MOTHER GAVE THE FRENCH THE MOST AFFECTIONATE RECEPTION FRANCE WAS HER NATIVE COUNTRY AND SHE HAD SUFFERED TOO MUCH UNHAPPINESS IN ENGLAND FOR ENGLAND TO HAVE MADE HER FORGET FRANCE +5639-40744-0030-1594: JUST THEN (LEOCADIA->LEOKADIA) CAME TO HERSELF AND EMBRACING THE CROSS SEEMED CHANGED INTO A SEA OF TEARS AND THE GENTLEMAN (REMAINED->REMAINING) IN UTTER BEWILDERMENT UNTIL HIS WIFE HAD REPEATED TO HIM FROM BEGINNING TO END (LEOCADIA'S->LEIRCADIAS) WHOLE STORY AND HE BELIEVED IT THROUGH THE BLESSED DISPENSATION OF HEAVEN WHICH HAD CONFIRMED IT BY SO MANY CONVINCING TESTIMONIES +8224-274381-0004-2284: FIVE HUNDRED MEN MORE WHO HAD BEEN LEVIED BY THE COVENANTERS WERE PERSUADED TO EMBRACE THE ROYAL CAUSE AND WITH THIS COMBINED FORCE HE HASTENED TO ATTACK LORD (ELCHO->ELCO) WHO LAY AT PERTH WITH AN ARMY OF SIX THOUSAND MEN ASSEMBLED UPON THE FIRST NEWS OF THE IRISH INVASION +5105-28241-0001-1443: AFTER AN APPRENTICESHIP ON A MERCHANT SHIP HE HAD ENTERED THE IMPERIAL NAVY AND HAD ALREADY REACHED THE RANK OF LIEUTENANT WHEN THE COUNT APPOINTED HIM TO THE CHARGE OF HIS OWN PRIVATE YACHT IN WHICH HE WAS ACCUSTOMED TO SPEND BY FAR THE GREATER PART OF HIS TIME THROUGHOUT THE WINTER GENERALLY CRUISING IN THE MEDITERRANEAN WHILST IN THE SUMMER HE VISITED MORE NORTHERN WATERS +4446-2273-0003-1141: WHEN BARTLEY ARRIVED AT BEDFORD SQUARE ON SUNDAY EVENING MARIE THE PRETTY LITTLE FRENCH GIRL MET HIM AT THE DOOR AND CONDUCTED HIM UPSTAIRS +908-31957-0017-2611: MUSSULMANS AND (GIAOURS->GUYORES) THROW KERCHIEFS AT A SMILE AND HAVE NO (RUTH->RUOTH) FOR ANY WEEPING +3575-170457-0007-977: HE SPOKE FRENCH PERFECTLY I HAVE BEEN TOLD WHEN NEED WAS BUT DELIGHTED USUALLY IN TALKING THE BROADEST YORKSHIRE +121-127105-0003-137: THERE WAS A UNANIMOUS GROAN AT THIS AND MUCH REPROACH AFTER WHICH IN HIS PREOCCUPIED WAY HE EXPLAINED +7176-88083-0003-2162: BUT HERE HE WAS AT A TERRIBLE DISADVANTAGE AS COMPARED WITH THE OWLS HAWKS AND EAGLES HE HAD NO RENDING CLAWS +672-122797-0061-1846: DON'T YOU KNOW ONE ABOUT BACON AND TALLOW CANDLES CAN'T YOU TELL ANY LARDER STORIES +1580-141084-0049-437: IT WAS SIMPLE ENOUGH SIR IF YOU ONLY HAD KNOWN BUT WITH ALL YOUR CLEVERNESS IT WAS IMPOSSIBLE THAT YOU COULD KNOW +7176-88083-0002-2161: HIS FEET WERE RED HIS LONG NARROW BEAK WITH ITS SAW TOOTHED EDGES AND SHARP HOOKED TIP WAS BRIGHT RED +7127-75947-0028-2146: QUICK QUICK THEN AMONG THE HIGH REED GRASS SAID MONTALAIS STOOP (ATHENAIS->ETHINAE) YOU ARE SO TALL +8230-279154-0030-2342: (SEMON'S->SYMONDS) TWO BOOKS MENTIONED IN AN EARLIER LECTURE DO NOT TOUCH KNOWLEDGE MEMORY AT ALL CLOSELY +4507-16021-0055-1276: TO TEACH READING MEANS TO LIGHT THE FIRE EVERY SYLLABLE SPELLED OUT SPARKLES +1995-1837-0016-497: FOR ONE LONG MOMENT HE PAUSED STUPID AGAPE WITH UTTER AMAZEMENT THEN LEANED DIZZILY AGAINST A TREE +6829-68771-0008-1922: THESE WOMEN WERE FLATTERED BY THE ATTENTION OF THE YOUNG LADY AND HAD PROMISED TO ASSIST IN ELECTING MISTER FORBES +7176-92135-0020-2207: DOUBLE NINE TWO THREE (ELSINORE->ELZINOR) DOUBLE (NINE->NOT) YES (HALLO->HULLO) IS THAT YOU HORATIO HAMLET SPEAKING +8455-210777-0060-2416: THE JOHN BRIGHT IS ARMED WITH A WEAPON OF GREAT POWER AGAINST WHICH IT IS IMPOSSIBLE THAT THE PEOPLE OF (BRITANNULA->BRITANULA) SHOULD PREVAIL +4970-29095-0016-1321: RUTH SAT QUITE STILL FOR A TIME WITH FACE INTENT AND FLUSHED IT WAS OUT NOW +5639-40744-0013-1577: SHE SAW THAT THE BED WAS GILDED AND SO RICH THAT IT SEEMED THAT OF A PRINCE RATHER THAN OF A PRIVATE GENTLEMAN +8555-284447-0005-2506: THE ROOM OF THE GREAT KNIFE WAS HIGH AND BIG AND AROUND IT RAN ROWS OF BENCHES FOR THE SPECTATORS TO SIT UPON +7729-102255-0033-2266: AS HE HAD PROMISED TO PROTECT THE HOTEL THE REASSURED CITIZENS BEGAN TO LAUGH AT THEIR OWN FEARS +260-123440-0019-782: CRIED ALICE AGAIN FOR THIS TIME THE MOUSE WAS BRISTLING ALL OVER AND SHE FELT CERTAIN IT MUST BE REALLY OFFENDED +1089-134686-0002-2: AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS +4970-29095-0032-1337: BUT PHILIP IS HONEST AND HE HAS TALENT ENOUGH IF HE WILL STOP SCRIBBLING TO MAKE HIS WAY +237-126133-0003-617: SOMEHOW OF ALL THE DAYS WHEN THE HOME FEELING WAS THE STRONGEST THIS DAY IT SEEMED AS IF SHE COULD BEAR IT NO LONGER +61-70970-0028-1772: STUTELEY WAS BY HIS SIDE IN A FLASH AND THEN THEY BOTH BEGAN FEELING ABOUT THEM TO ASCERTAIN THE SHAPE AND CHARACTER OF THIS VAULT +5105-28241-0000-1442: HER SEA GOING QUALITIES WERE EXCELLENT AND WOULD HAVE AMPLY SUFFICED FOR A CIRCUMNAVIGATION OF THE GLOBE +8224-274384-0001-2299: THE SCOTTISH GENERALS AND COMMISSIONERS AFFECTED GREAT SURPRISE ON THE APPEARANCE OF THE KING AND THOUGH THEY PAID HIM ALL THE EXTERIOR RESPECT DUE TO HIS DIGNITY THEY INSTANTLY SET A GUARD UPON HIM UNDER COLOR OF PROTECTION AND MADE HIM IN REALITY A PRISONER +2300-131720-0017-589: EDISON HAD INSTALLED HIS HISTORIC FIRST GREAT CENTRAL STATION SYSTEM IN NEW YORK ON THE MULTIPLE ARC SYSTEM COVERED BY HIS FEEDER AND MAIN INVENTION WHICH RESULTED IN A NOTABLE SAVING IN THE COST OF CONDUCTORS AS AGAINST A STRAIGHT TWO WIRE SYSTEM THROUGHOUT OF THE TREE KIND +2300-131720-0033-605: IN DECEMBER EIGHTEEN EIGHTY EIGHT MISTER W J JENKS READ AN INTERESTING PAPER BEFORE THE AMERICAN INSTITUTE OF ELECTRICAL ENGINEERS ON THE SIX YEARS OF PRACTICAL EXPERIENCE HAD UP TO THAT TIME WITH THE METER THEN MORE GENERALLY IN USE THAN ANY OTHER +7729-102255-0006-2239: (COMING->COMMON) BY WAY OF THE MISSOURI RIVER TOWNS HE FELL FIRST AMONG BORDER RUFFIAN COMPANIONSHIP AND INFLUENCES AND PERHAPS HAVING HIS INCLINATIONS ALREADY MOLDED BY HIS WASHINGTON INSTRUCTIONS HIS EARLY IMPRESSIONS WERE DECIDEDLY ADVERSE TO THE FREE STATE CAUSE +1089-134686-0020-20: IF A MAN HAD STOLEN A POUND IN HIS YOUTH AND HAD USED THAT POUND TO AMASS A HUGE FORTUNE HOW MUCH WAS HE OBLIGED TO GIVE BACK THE POUND HE HAD STOLEN ONLY OR THE POUND TOGETHER WITH THE COMPOUND INTEREST ACCRUING UPON IT OR ALL HIS HUGE FORTUNE +2300-131720-0003-575: THE DYNAMO ELECTRIC MACHINE THOUGH SMALL WAS ROBUST FOR UNDER ALL THE VARYING SPEEDS OF WATER POWER AND THE VICISSITUDES OF THE PLANT TO WHICH IT BELONGED IT CONTINUED IN ACTIVE USE UNTIL EIGHTEEN NINETY NINE SEVENTEEN YEARS +1188-133604-0016-80: THIS AT ONCE COMPELS YOU TO UNDERSTAND THAT THE WORK IS TO BE IMAGINATIVE AND DECORATIVE THAT IT REPRESENTS BEAUTIFUL THINGS IN THE CLEAREST WAY BUT NOT UNDER EXISTING CONDITIONS AND THAT IN FACT YOU ARE PRODUCING (JEWELER'S->JEWELLERS) WORK RATHER THAN PICTURES +8230-279154-0002-2314: WHETHER OR NOT THIS PRINCIPLE IS LIABLE TO EXCEPTIONS (EVERYONE->EVERY ONE) WOULD AGREE THAT (IS->IT) HAS A BROAD MEASURE OF TRUTH THOUGH THE WORD EXACTLY MIGHT SEEM AN OVERSTATEMENT AND IT MIGHT SEEM MORE CORRECT TO SAY THAT IDEAS APPROXIMATELY REPRESENT IMPRESSIONS +1089-134691-0013-51: IDLE AND EMBITTERING FINALLY TO ARGUE AGAINST HIS OWN DISPASSIONATE CERTITUDE THAT THE COMMANDMENT OF LOVE BADE US NOT TO LOVE OUR NEIGHBOUR AS OURSELVES WITH THE SAME AMOUNT AND INTENSITY OF LOVE BUT TO LOVE HIM AS OURSELVES WITH THE SAME KIND OF LOVE +2830-3979-0001-785: THE CONDITION IS THAT I WILL BE PERMITTED TO MAKE LUTHER TALK AMERICAN STREAMLINE HIM SO TO SPEAK BECAUSE (YOU->HE) WILL NEVER GET PEOPLE WHETHER IN OR OUTSIDE THE LUTHERAN CHURCH ACTUALLY TO READ LUTHER UNLESS WE MAKE HIM TALK AS HE WOULD TALK (TODAY->TO DAY) TO AMERICANS +1320-122617-0028-320: MY PURSUITS ARE PEACEFUL AND MY TEMPER I HUMBLY TRUST IS GREATLY GIVEN TO MERCY AND LOVE RETURNED DAVID A LITTLE NETTLED AT SO DIRECT AN ATTACK ON HIS MANHOOD BUT THERE ARE NONE WHO CAN SAY THAT I HAVE EVER FORGOTTEN MY FAITH IN THE LORD EVEN IN THE GREATEST STRAITS +5639-40744-0000-1564: ELEVEN O'CLOCK HAD STRUCK IT WAS A FINE CLEAR NIGHT THEY WERE THE ONLY PERSONS ON THE ROAD AND THEY SAUNTERED LEISURELY ALONG TO AVOID PAYING THE PRICE OF FATIGUE FOR THE RECREATION PROVIDED FOR THE TOLEDANS IN (THEIR->THE) VALLEY OR ON THE BANKS OF (THEIR->THE) RIVER +672-122797-0064-1849: AT LAST THE LITTLE MICE STAYED AWAY ALSO AND THE TREE SIGHED AFTER ALL IT WAS VERY PLEASANT WHEN THE SLEEK LITTLE MICE SAT ROUND ME AND LISTENED TO WHAT I TOLD THEM +3729-6852-0042-1069: I AM A VERY UNPLEASANT PUPIL ALWAYS ASKING QUESTIONS CURIOUS TROUBLESOME INSATIABLE AND EVEN SUPPOSING THAT I COULD MEET WITH THE TEACHER I REQUIRE I AM AFRAID I AM NOT RICH ENOUGH TO PAY HIM +1089-134691-0012-50: IT WAS IDLE FOR HIM TO MOVE HIMSELF TO BE GENEROUS TOWARDS THEM TO TELL HIMSELF THAT IF HE EVER CAME TO THEIR GATES STRIPPED OF HIS PRIDE BEATEN AND IN (BEGGAR'S->BEGGARS) WEEDS THAT THEY WOULD BE GENEROUS TOWARDS HIM LOVING HIM AS THEMSELVES +1284-1180-0019-231: YOU MUST KNOW SAID MARGOLOTTE WHEN THEY WERE ALL SEATED TOGETHER ON THE BROAD WINDOW SEAT THAT MY HUSBAND FOOLISHLY GAVE AWAY ALL THE POWDER OF LIFE HE FIRST MADE TO OLD (MOMBI->MOMBY) THE WITCH WHO USED TO LIVE IN THE COUNTRY OF THE (GILLIKINS->GILLEKINS) TO THE NORTH OF HERE +2300-131720-0019-591: THE STRONG POSITION HELD BY THE EDISON SYSTEM UNDER THE STRENUOUS COMPETITION THAT WAS ALREADY SPRINGING UP WAS ENORMOUSLY IMPROVED BY THE INTRODUCTION OF THE THREE WIRE SYSTEM AND IT GAVE AN IMMEDIATE IMPETUS TO INCANDESCENT LIGHTING +2094-142345-0003-514: A LARGE OPEN FIREPLACE WITH RUSTY DOGS IN IT AND A BARE BOARDED FLOOR AT THE FAR END FLEECES OF WOOL STACKED UP IN THE MIDDLE OF THE FLOOR SOME EMPTY CORN BAGS +5105-28233-0009-1415: THE BOND OF UNION THUS EFFECTED COULD NEVER BE SEVERED AND ALTHOUGH BEN ZOOF'S ACHIEVEMENTS HAD FAIRLY EARNED HIM THE RIGHT OF RETIREMENT HE FIRMLY DECLINED ALL HONORS OR ANY PENSION THAT MIGHT PART HIM FROM HIS SUPERIOR OFFICER +1284-134647-0007-274: PROSCRIBED BY THE CIVIL AND ECCLESIASTICAL POWERS OF THE EMPIRE THE DONATISTS STILL MAINTAINED IN SOME PROVINCES PARTICULARLY IN NUMIDIA THEIR SUPERIOR NUMBERS AND FOUR HUNDRED BISHOPS ACKNOWLEDGED THE JURISDICTION OF THEIR PRIMATE +1221-135767-0012-199: THEY APPROACHED THE DOOR WHICH WAS OF AN ARCHED FORM AND FLANKED ON EACH SIDE BY A NARROW TOWER OR PROJECTION OF THE EDIFICE IN BOTH OF WHICH WERE LATTICE WINDOWS (THE->WITH) WOODEN SHUTTERS TO CLOSE OVER THEM AT NEED +2961-960-0002-876: IN THE SUPPOSED DEPTHS OF THIS DIALOGUE THE NEO (PLATONISTS->PLATINISTS) FOUND HIDDEN MEANINGS (AND CONNECTIONS->IN CONNECTION) WITH THE JEWISH AND CHRISTIAN SCRIPTURES AND OUT OF THEM THEY ELICITED DOCTRINES QUITE AT VARIANCE WITH THE SPIRIT OF PLATO +4970-29095-0007-1312: I HAVE NOT ASKED HIM RUTH REPLIED WITH A LOOK THAT MIGHT IMPLY THAT SHE WAS ONE OF THOSE DETERMINED LITTLE BODIES WHO FIRST MADE UP HER OWN MIND AND THEN COMPELLED OTHERS TO MAKE UP THEIRS IN ACCORDANCE WITH HERS +4507-16021-0030-1251: ASSUREDLY IF THE TONGUE WHICH A NATION OR A PROVINCE HAS SPOKEN IS WORTHY OF INTEREST THE LANGUAGE WHICH HAS BEEN SPOKEN BY A MISERY IS STILL MORE WORTHY OF ATTENTION AND STUDY +8230-279154-0034-2346: WHENEVER THE SENSE OF FAMILIARITY OCCURS WITHOUT A DEFINITE OBJECT IT LEADS US TO SEARCH THE ENVIRONMENT UNTIL WE ARE SATISFIED THAT WE HAVE FOUND THE APPROPRIATE OBJECT WHICH LEADS US TO THE JUDGMENT THIS IS FAMILIAR +6930-75918-0020-1971: BRAGELONNE WATCHED FOR SOME TIME THE CONDUCT OF THE TWO LOVERS LISTENED TO THE LOUD AND UNCIVIL SLUMBERS OF MANICAMP WHO SNORED AS IMPERIOUSLY AS THOUGH HE WAS WEARING HIS BLUE AND GOLD INSTEAD OF HIS VIOLET SUIT +4507-16021-0031-1252: AND THEN WE INSIST UPON IT THE STUDY OF SOCIAL DEFORMITIES AND INFIRMITIES AND THE TASK OF POINTING THEM OUT WITH A VIEW TO REMEDY IS NOT A BUSINESS IN WHICH CHOICE IS PERMITTED +7176-88083-0007-2166: BUT SUCH WAS HIS KEENNESS THAT EVEN WHILE THE WIDE FLUKES OF HIS ENGORGED VICTIM WERE STILL STICKING OUT AT THE CORNERS OF HIS BEAK HIS FIERCE RED EYES WERE ONCE MORE PEERING DOWNWARD INTO THE TORRENT IN SEARCH OF FRESH PREY +3570-5694-0016-936: MANY OF THESE AGAIN AND ALSO MANY OF THE OTHER ARISTOCRACY OF LESS DEGREE HAVE IN TURN ATTACHED TO THEIR PERSONS A MORE OR LESS COMPREHENSIVE GROUP OF VICARIOUS CONSUMER IN THE PERSONS OF THEIR WIVES AND CHILDREN THEIR SERVANTS RETAINERS ET CETERA +908-31957-0022-2616: THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE COMFORT FAST WHILE BUDDING AT THY SIGHT MY PILGRIM'S STAFF GAVE OUT GREEN LEAVES WITH MORNING DEWS (IMPEARLED->EMPEARLED) +8230-279154-0004-2316: THERE IS NO LOGICAL IMPOSSIBILITY IN THE HYPOTHESIS THAT THE WORLD SPRANG INTO BEING FIVE MINUTES AGO EXACTLY AS IT THEN WAS WITH A POPULATION THAT REMEMBERED A WHOLLY UNREAL PAST +3575-170457-0041-1011: SHE WAS GONE OUT INTO THE VILLAGE ON SOME ERRAND WHEN AS SHE WAS DESCENDING THE STEEP STREET HER FOOT SLIPPED ON THE ICE AND SHE FELL IT WAS DARK AND NO ONE SAW HER MISCHANCE TILL AFTER A TIME HER GROANS ATTRACTED THE ATTENTION OF A PASSER BY +672-122797-0006-1791: TO GROW AND GROW TO GET OLDER AND BE TALL THOUGHT THE TREE THAT AFTER ALL IS THE MOST DELIGHTFUL THING IN THE WORLD +2094-142345-0006-517: AT THE EDGE OF THIS BOX THERE LIES A GREAT WOODEN DOLL WHICH SO FAR AS MUTILATION IS CONCERNED BEARS A STRONG RESEMBLANCE TO THE FINEST GREEK SCULPTURE AND ESPECIALLY IN THE TOTAL LOSS OF ITS NOSE +2300-131720-0004-576: OWING TO HIS INSISTENCE ON LOW PRESSURE DIRECT CURRENT FOR USE IN DENSELY POPULATED DISTRICTS AS THE ONLY SAFE AND TRULY UNIVERSAL PROFITABLE WAY OF DELIVERING ELECTRICAL ENERGY TO THE CONSUMERS EDISON HAS BEEN FREQUENTLY SPOKEN OF AS AN OPPONENT OF THE ALTERNATING CURRENT +4970-29095-0021-1326: BUT NEITHER SAINT (GIRARD->GERARD) NOR BROAD STREET NEITHER WONDERS OF THE MINT NOR THE GLORIES OF THE HALL WHERE THE GHOSTS OF OUR FATHERS SIT ALWAYS SIGNING THE DECLARATION IMPRESSED THE VISITORS SO MUCH AS THE (SPLENDORS->SPLENDOURS) OF THE CHESTNUT STREET WINDOWS AND THE BARGAINS ON EIGHTH STREET +7729-102255-0008-2241: ALL THE TERRITORIAL DIGNITARIES WERE PRESENT GOVERNOR SHANNON PRESIDED JOHN CALHOUN THE SURVEYOR GENERAL MADE THE PRINCIPAL SPEECH A DENUNCIATION OF THE (ABOLITIONISTS->ABOLITIONIST) SUPPORTING THE TOPEKA MOVEMENT CHIEF JUSTICE (LECOMPTE->LEC COMTE) DIGNIFIED THE OCCASION WITH APPROVING REMARKS +1188-133604-0002-66: BY BEING STUDIOUS OF COLOR THEY ARE STUDIOUS OF DIVISION AND WHILE THE (CHIAROSCURIST->CHIOSCURIST) DEVOTES HIMSELF TO THE REPRESENTATION OF DEGREES OF FORCE IN ONE THING UNSEPARATED LIGHT THE COLORISTS HAVE FOR THEIR FUNCTION THE ATTAINMENT OF BEAUTY BY ARRANGEMENT OF THE DIVISIONS OF LIGHT +4077-13754-0005-1101: THEY KNEW NO NORTH NO SOUTH NO EAST NO WEST THEY STOOD POSITIVELY BY THE CONSTITUTION AND WOULD HAVE NOTHING TO DO IN THE BLOODY STRIFE BETWEEN BROTHERS UNLESS INDEED THEY WERE SUMMONED BY THE AUTHORITY TO WHICH THEY HAD ALREADY ONCE LOYALLY RESPONDED TO FURNISH MEN AND ARMS FOR THEIR COUNTRY'S NEED +5639-40744-0032-1596: FOR GOD'S SAKE MY LADY MOTHER GIVE ME A WIFE WHO WOULD BE AN AGREEABLE COMPANION NOT ONE WHO WILL DISGUST ME SO THAT WE MAY BOTH BEAR EVENLY AND WITH MUTUAL GOOD WILL THE YOKE IMPOSED ON US BY HEAVEN INSTEAD OF PULLING THIS WAY AND THAT WAY AND FRETTING EACH OTHER TO DEATH +5105-28241-0004-1446: ALTHOUGH ONLY A MODERATE BREEZE WAS BLOWING THE SEA WAS ROUGH A CIRCUMSTANCE TO BE ACCOUNTED FOR ONLY BY THE DIMINUTION IN THE FORCE OF THE EARTH'S ATTRACTION RENDERING THE LIQUID PARTICLES SO BUOYANT THAT BY THE MERE EFFECT OF OSCILLATION THEY WERE CARRIED TO A HEIGHT THAT WAS QUITE UNPRECEDENTED +121-123852-0002-126: NO MATTER THEN ALTHOUGH MY FOOT DID STAND UPON THE FARTHEST EARTH (REMOV'D->REMOVED) FROM THEE FOR NIMBLE THOUGHT CAN JUMP BOTH SEA AND LAND AS SOON AS THINK THE PLACE WHERE HE WOULD BE BUT AH +8224-274381-0006-2286: THIS NOBLEMAN'S CHARACTER THOUGH CELEBRATED FOR POLITICAL COURAGE AND CONDUCT WAS VERY LOW FOR MILITARY PROWESS AND AFTER SOME SKIRMISHES IN WHICH HE WAS WORSTED HE HERE ALLOWED MONTROSE TO ESCAPE HIM +3575-170457-0026-996: P S PRAY SIR EXCUSE ME FOR WRITING TO YOU A SECOND TIME I COULD NOT HELP WRITING PARTLY TO TELL YOU HOW THANKFUL I AM FOR YOUR KINDNESS AND PARTLY TO LET YOU KNOW THAT YOUR ADVICE SHALL NOT BE WASTED HOWEVER SORROWFULLY AND RELUCTANTLY IT MAY BE AT FIRST FOLLOWED C B +7176-88083-0022-2181: THE HAWK EMBITTERED BY THE LOSS OF HIS FIRST QUARRY HAD BECOME AS DOGGED IN PURSUIT AS A WEASEL NOT TO BE SHAKEN OFF OR EVADED OR DECEIVED +260-123286-0025-727: FLIGHT WAS OUT OF THE QUESTION NOW THE REPTILES ROSE THEY WHEELED AROUND OUR LITTLE RAFT WITH A RAPIDITY GREATER THAN THAT OF EXPRESS TRAINS +1580-141084-0024-412: HE COULD HARDLY STAND STILL SO GREAT WAS HIS MENTAL AGITATION AND HE RAN TOWARDS HOLMES WITH TWO EAGER HANDS OUTSTRETCHED THANK HEAVEN THAT YOU HAVE COME +5639-40744-0033-1597: HER BEARING WAS GRACEFUL AND ANIMATED SHE LED HER SON BY THE HAND AND BEFORE HER WALKED TWO MAIDS WITH WAX LIGHTS AND SILVER CANDLESTICKS +7127-75946-0018-2106: THERE WAS SOMETHING IN HIS CARRIAGE WHICH RESEMBLED THE BUOYANT MOVEMENTS OF AN IMMORTAL AND HE DID NOT DANCE SO MUCH AS (SEEM->SEEMED) TO SOAR ALONG +5639-40744-0002-1566: (RODOLFO->RUDOLPHO) AND HIS COMPANIONS WITH THEIR FACES MUFFLED IN THEIR CLOAKS STARED RUDELY AND INSOLENTLY AT THE MOTHER THE DAUGHTER AND THE SERVANT MAID +7127-75947-0033-2151: HOW IS IT LA (VALLIERE->VALLIER) SAID MADEMOISELLE DE (TONNAY CHARENTE->TENNICHANT) THAT THE VICOMTE DE (BRAGELONNE->BRAGELONE) SPOKE OF YOU AS LOUISE +1995-1837-0007-488: THEN OF A SUDDEN AT MIDDAY THE SUN SHOT OUT HOT AND STILL NO BREATH OF AIR STIRRED THE SKY WAS LIKE BLUE STEEL THE EARTH STEAMED +8555-284449-0001-2527: THEN THEY ALL MARCHED OUT A LITTLE WAY INTO THE FIELDS AND FOUND THAT THE ARMY OF PINKIES HAD ALREADY FORMED AND WAS ADVANCING STEADILY TOWARD THEM +3729-6852-0029-1056: IT IS SOLD EVERYWHERE BUT FOR THE LAST THREE WEEKS NOBODY WILL USE ANY SNUFF BUT THAT SOLD AT THE (CIVET->CEVETTE) CAT +5105-28240-0015-1432: FOR SOME MOMENTS HE SEEMED PERFECTLY STUPEFIED THEN RECOVERING HIMSELF HE BEGAN TO OVERWHELM THE COUNT WITH A TORRENT OF QUESTIONS +908-31957-0023-2617: I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I LOVE THEE PURELY AS THEY TURN FROM PRAISE +5105-28241-0005-1447: FOR A FEW MILES SHE FOLLOWED THE LINE HITHERTO PRESUMABLY OCCUPIED BY THE COAST OF ALGERIA BUT NO LAND APPEARED TO THE SOUTH +2961-960-0018-892: BUT IN THE REST OF THE WORK THE POWER OF LANGUAGE SEEMS TO FAIL HIM AND THE DRAMATIC FORM IS WHOLLY GIVEN UP +3570-5694-0017-937: THE WEARING OF UNIFORMS OR LIVERIES IMPLIES A CONSIDERABLE DEGREE OF DEPENDENCE AND MAY EVEN BE SAID TO BE A MARK OF SERVITUDE REAL OR OSTENSIBLE +260-123440-0010-773: HOW CHEERFULLY HE SEEMS TO GRIN HOW NEATLY SPREAD HIS CLAWS AND WELCOME LITTLE FISHES IN WITH GENTLY SMILING JAWS +4992-41797-0021-1386: (SHE'S->SHE IS) WONDERFUL MORE WONDERFUL THAN ANYBODY WE'VE EVER SEEN ANYWHERE AND SHE (DRAWS->DRAWLS) BETTER THAN THE TEACHER IN CHARLESTOWN +2830-3979-0003-787: THE UNDERTAKING WHICH SEEMED SO ATTRACTIVE WHEN VIEWED AS A LITERARY TASK PROVED A MOST DIFFICULT ONE AND AT TIMES BECAME OPPRESSIVE +61-70968-0035-1716: TAKING ADVANTAGE OF THIS THE SQUIRE'S FEW MEN REDOUBLED THEIR EFFORTS AND ENCOURAGED BY ROBIN'S AND THE LITTLE STROLLER'S CRIES FOUGHT THEIR WAY TO HIM +7729-102255-0038-2271: ATCHISON WHO HAD BEEN HARANGUING THE MOB PLANTED HIS TWO GUNS BEFORE THE BUILDING AND TRAINED THEM UPON IT +6829-68769-0021-1881: A FRESH WHOLESOME LOOKING BOY WAS TOM GATES WITH STEADY (GRAY->GREY) EYES AN INTELLIGENT FOREHEAD BUT A SENSITIVE RATHER WEAK MOUTH +61-70968-0060-1741: NO THANKS I AM GLAD TO GIVE YOU SUCH EASY HAPPINESS +2830-3980-0000-797: IN EVERY WAY THEY SOUGHT TO UNDERMINE THE AUTHORITY OF SAINT PAUL +8463-294825-0004-2446: IN ALL THE NOVEL HAD A DIFFICULT GESTATION +7176-88083-0017-2176: BUT AT THIS POINT IN THE RAPIDS IT WAS IMPOSSIBLE FOR HIM TO STAY DOWN +260-123440-0018-781: I AM VERY TIRED OF SWIMMING ABOUT HERE O MOUSE +2830-3979-0012-796: THE WORD OF OUR GOD SHALL STAND FOREVER +260-123440-0003-766: OH WON'T SHE BE SAVAGE IF I'VE KEPT HER WAITING +5142-36586-0004-1561: EFFECTS OF THE INCREASED USE AND DISUSE OF PARTS +2830-3980-0045-842: MEN SHOULD NOT SPECULATE ABOUT THE NATURE OF GOD +6930-75918-0000-1951: CONCORD RETURNED TO ITS PLACE AMIDST THE TENTS +2830-3980-0044-841: HOWEVER THE GRACE AND PEACE OF GOD WILL +61-70968-0045-1726: PRAY FOLLOW US WITH MINE AND MY LORD SHERIFF'S MEN +260-123286-0004-706: ONE MIGHT BE WITH LESS REASON THAN NOW +6930-76324-0009-1981: DO YOU SUPPOSE THE MINIATURE WAS A COPY OF THE SAME THING +4446-2273-0018-1156: DO YOU REMEMBER THAT FIRST WALK WE TOOK TOGETHER IN PARIS +8463-294825-0019-2461: MILLIMETER ROUGHLY ONE TWENTY FIFTH OF AN INCH +5142-36377-0000-1531: IT WAS ONE OF THE MASTERLY AND CHARMING STORIES OF (DUMAS->DE MAU) THE ELDER +5142-33396-0009-1471: THERE STAND SO I SAID AND GLARE AND HISS AT MY FOES +4446-2273-0033-1171: FOR A LONG TIME NEITHER HILDA NOR BARTLEY SPOKE +4992-41797-0016-1381: THEY COULDN'T RUN (NOR->OR) MOVE THEY'RE JUST PASTEBOARD +8555-292519-0004-2551: THE PITY THAT WE MUST COME AND GO +260-123288-0018-752: THE RAFT BEARS ON STILL TO THE (SOUTH EAST->SOUTHEAST) +237-134500-0003-662: THE ORCHARD WAS SPARKLING AND RIPPLING IN THE SUN +61-70970-0012-1756: YET HE WILL TEACH YOU A FEW TRICKS WHEN MORNING IS COME +1580-141084-0019-407: YES MY DEAR WATSON I HAVE SOLVED THE MYSTERY +908-157963-0003-2566: WHY FADE THESE CHILDREN OF THE SPRING +260-123286-0020-722: TUESDAY AUGUST EIGHTEENTH +1188-133604-0013-77: IT MUST REMEMBER BE ONE OR THE OTHER +5105-28241-0014-1456: ANOTHER CIRCUMSTANCE WAS MOST REMARKABLE +2094-142345-0031-542: AND SHE WAS VERY FOND OF YOU TOO AUNT RACHEL +121-127105-0018-152: CRIED THE LADIES WHOSE DEPARTURE HAD BEEN FIXED +7021-85628-0026-2086: NO MY LITTLE SON SHE SAID +1580-141083-0043-377: THE TOP FLOOR BELONGS TO MILES MC LAREN +2830-3980-0015-812: FOR A PERSON TO POSSESS KNOWLEDGE IS NOT ENOUGH +2830-3980-0060-857: HE NEVER LOSES SIGHT OF THE PURPOSE OF HIS EPISTLE +1580-141083-0028-362: THEN HE TOSSED IT DOWN AND SEIZED THE NEXT +6829-68769-0046-1906: YOU'RE FOOLISH WHY SHOULD YOU DO ALL THIS +6930-81414-0025-2026: MY POSITION WAS TOO TERRIBLE +2830-3980-0014-811: THE CALL IS NOT TO BE TAKEN LIGHTLY +8555-284449-0010-2536: IT WILL BE SUCH A SATISFACTION +121-121726-0013-122: TIED TO A WOMAN +4446-2275-0011-1186: (BARTLEY->PARTLEY) BENT LOWER OVER THE FIRE +7176-92135-0004-2191: FRANKLY I CANNOT ALWAYS SAY +7176-92135-0019-2206: I WANT DOUBLE NINE (HAL LO->HALLOA) +121-127105-0033-167: IT WAS THE BEAUTY OF IT +2094-142345-0016-527: SPINNING INDEED +1089-134691-0024-62: (STEPHANOS DEDALOS->STEPHANO'S DEAD LOS) +5683-32865-0000-1606: YOU KNOW CAPTAIN LAKE +237-134500-0017-676: IF I FEEL THAT WAY I FEEL THAT WAY +237-134493-0007-647: ALEXANDRA LETS YOU SLEEP LATE +5142-33396-0039-1501: I NAMED NINE OTHERS AND SAID +2094-142345-0046-557: I DELIGHT IN YOUR KITCHEN +2830-3980-0059-856: PAUL STICKS TO HIS THEME +3729-6852-0045-1072: HE HAD A GOOD APPETITE COULD TELL A GOOD STORY WITHOUT LAUGHING WAS CELEBRATED FOR HIS WITTY REPARTEES AND HIS SOCIABLE MANNERS BUT HE SPENT HIS LIFE AT HOME SELDOM GOING OUT AND SEEING HARDLY (ANYONE->ANY ONE) BECAUSE HE ALWAYS HAD A PIPE IN HIS MOUTH AND WAS SURROUNDED BY AT LEAST TWENTY CATS WITH WHICH HE WOULD AMUSE HIMSELF ALL DAY +8463-294825-0009-2451: THIS COMPULSION LEADS NEMO INTO UGLY CONTRADICTIONS (HE'S->HE IS) A FIGHTER FOR FREEDOM YET ALL WHO BOARD HIS SHIP ARE IMPRISONED THERE FOR GOOD HE WORKS TO SAVE LIVES BOTH HUMAN AND ANIMAL YET HE HIMSELF CREATES A HOLOCAUST HE DETESTS IMPERIALISM YET HE LAYS PERSONAL CLAIM TO THE SOUTH POLE +908-157963-0008-2571: THOU GENTLE MAID OF SILENT VALLEYS AND OF MODEST BROOKS FOR THOU (SHALL->SHALT) BE CLOTHED IN LIGHT AND FED WITH MORNING MANNA TILL (SUMMERS->SUMMER'S) HEAT MELTS THEE BESIDE THE FOUNTAINS AND THE SPRINGS TO FLOURISH IN ETERNAL VALES THEY WHY (SHOULD THEL->SHOULDST THOU) COMPLAIN +5142-36377-0021-1552: ENVY HATRED MALICE AND UNCHARITABLENESS ARE NEVER SO ESSENTIALLY DETESTABLE TO MY MIND AS WHEN THEY ARE ANIMATED BY (A->THE) SENSE OF PROPRIETY AND WORK UNDER THE SURFACE BUT FOR MY INTEREST IN NAOMI AND MY OTHER INTEREST IN THE LITTLE LOVE LOOKS WHICH I NOW AND THEN SURPRISED PASSING BETWEEN HER AND AMBROSE I SHOULD NEVER HAVE SAT THROUGH THAT SUPPER +8230-279154-0019-2331: IT WOULD SEEM THAT ONLY RATHER RECENT EVENTS CAN BE PLACED AT ALL ACCURATELY BY MEANS OF FEELINGS GIVING THEIR TEMPORAL RELATION TO THE PRESENT BUT IT IS CLEAR THAT SUCH FEELINGS MUST PLAY AN ESSENTIAL PART IN THE PROCESS OF DATING REMEMBERED EVENTS +8555-292519-0009-2556: HO YE SAILS THAT SEEM TO WANDER IN DREAM FILLED MEADOWS SAY IS (THE->THIS) SHORE WHERE I STAND THE ONLY FIELD OF STRUGGLE OR ARE YE HIT AND BATTERED OUT THERE BY WAVES AND WIND GUSTS AS YE TACK OVER A CLASHING SEA OF WATERY ECHOES +2961-960-0003-877: THEY WERE ABSORBED IN HIS THEOLOGY AND WERE UNDER THE DOMINION OF HIS NAME WHILE THAT WHICH WAS TRULY GREAT AND TRULY CHARACTERISTIC IN HIM HIS EFFORT TO REALIZE AND CONNECT ABSTRACTIONS WAS NOT UNDERSTOOD BY THEM AT ALL +4507-16021-0016-1237: CAN ONE IMAGINE A NATURALIST REFUSING TO STUDY THE VIPER THE BAT THE SCORPION THE CENTIPEDE THE TARANTULA AND ONE WHO WOULD CAST THEM BACK INTO THEIR DARKNESS SAYING OH HOW UGLY THAT IS +1221-135766-0001-172: GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONOURED BOSOM TO CONNECT HER PARENT (FOR EVER->FOREVER) WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN +61-70968-0016-1697: AND THEN THEY BECAME VEXED AND WOULD HAVE SNATCHED YOUR PURSE FROM US +1188-133604-0029-93: ONLY THERE ARE TWO SORTS OF CLOUD AND FIRE +8463-294828-0000-2462: CHAPTER THREE AS MASTER WISHES +61-70968-0001-1682: GIVE NOT SO EARNEST A MIND TO THESE (MUMMERIES->MEMORIES) CHILD +4446-2273-0034-1172: HE FELT A TREMOR RUN THROUGH THE SLENDER YELLOW FIGURE IN FRONT OF HIM +61-70968-0046-1727: NOTTINGHAM CASTLE WAS REACHED AND ADMITTANCE WAS DEMANDED +6829-68769-0017-1877: WORSE TOM WORSE (N->THAN) EVER REPLIED THE JAILER GLOOMILY +121-127105-0019-153: MISSUS GRIFFIN HOWEVER EXPRESSED THE NEED FOR A LITTLE MORE LIGHT +5142-33396-0010-1472: IN THE STERN I (CURVED->CARVED) THE TAIL UP ALMOST AS HIGH AS THE HEAD +7176-92135-0035-2222: THEN LORD (TUPPENY->TUPPENNY) WELL WHAT ABOUT AUCTION +1089-134686-0033-33: A GREAT SAINT SAINT FRANCIS (XAVIER->ZAVIOUR) +672-122797-0047-1832: HOW KIND MAN IS AFTER ALL +5142-33396-0025-1487: COME COME I CALLED WHEN NO ONE OBEYED A FIRE +237-126133-0004-618: IF SHE COULD ONLY SEE PHRONSIE FOR JUST ONE MOMENT +2094-142345-0032-543: I OFTEN HEARD HER TALK OF YOU IN THE SAME SORT OF WAY +3570-5694-0012-932: THERE IS A MORE OR LESS ELABORATE SYSTEM OF RANK AND GRADES +1089-134691-0010-48: BROTHER (MAC ARDLE->MICAWAL) BROTHER (KEOGH->KIOV) +121-121726-0014-123: HYPOCRITE A HORSE DEALER +4446-2273-0019-1157: COME WE'LL HAVE OUR COFFEE IN THE OTHER ROOM AND YOU CAN SMOKE +1995-1837-0017-498: HE GAZED ABOUT PERPLEXED ASTONISHED +6829-68769-0002-1862: I CAN'T SEE IT IN THAT LIGHT SAID THE OLD LAWYER +6930-81414-0026-2027: MY (OVERWROUGHT->OVER WROUGHT) NERVES YIELDED AT LAST +5683-32865-0016-1622: OH I KNOW THAT'S (LORNE->LORN) BRANDON +3575-170457-0052-1022: SHE HAD ANOTHER WEIGHT ON HER MIND THIS CHRISTMAS +3729-6852-0025-1052: IS THERE NOT A MERIDIAN EVERYWHERE +4446-2275-0027-1202: HE MOVED UNEASILY AND HIS CHAIR CREAKED +4507-16021-0041-1262: IT IS UNINTELLIGIBLE IN THE DARK +5105-28240-0010-1427: CAPTAIN SERVADAC HASTENED (TOWARDS->TOWARD) HIM +61-70968-0031-1712: SILENCE YOU KNAVE CRIED MONTFICHET +1580-141084-0020-408: LOOK AT THAT HE HELD OUT HIS HAND +1580-141083-0014-348: TO THE BEST OF MY BELIEF THEY WERE ROLLED UP +2094-142345-0017-528: I NEVER KNEW YOUR EQUALS FOR GALLOWSNESS +5142-33396-0040-1502: AND THESE SHALL FOLLOW YOUR THRALLS IN THE SAME WAY +1995-1837-0002-483: AH THE SWAMP THE CRUEL SWAMP +1580-141084-0050-438: IF MISTER (SOAMES->SOLMES) SAW THEM THE GAME WAS UP +5142-33396-0055-1517: THAT TIME IT POINTED US INTO YOUR FATHER'S SHIPS +5142-36377-0016-1547: FOR ONCE IN A WAY I PROVED A TRUE PROPHET +3729-6852-0010-1037: I NEVER HAD ANY FAMILY +7729-102255-0034-2267: TO THEIR SORROW THEY WERE SOON UNDECEIVED +8463-294828-0030-2492: I ASKED FOR COMMANDER FARRAGUT +6930-76324-0010-1982: WHAT IN THE WORLD IS (THAT->IT) QUERIED JOYCE +1089-134686-0003-3: (HELLO->HALLO) BERTIE ANY GOOD IN YOUR MIND +672-122797-0062-1847: NO SAID THE TREE +1580-141084-0035-423: HE COULD EXAMINE THE PAPERS IN HIS OWN OFFICE +5683-32865-0001-1607: SAID LORD CHELFORD ADDRESSING ME +5142-36600-0000-1562: CHAPTER SEVEN ON THE RACES OF MAN +1580-141084-0005-393: THAT IS VERY IMPORTANT SAID HOLMES +1995-1826-0014-453: (COTTON->CARTON) SHE PAUSED +6829-68769-0047-1907: I HAVE MY OWN REASONS MISTER MARSHALL +1995-1836-0002-468: WHY SHOULD HE NOT BE AS OTHER MEN +2094-142345-0047-558: POYSER IS NOT AT HOME IS HE +1284-1180-0016-228: THE WOMAN SEEMED THOUGHTFUL +121-127105-0004-138: THE (STORY'S->STORIES) WRITTEN +121-127105-0022-156: WELL IF I DON'T KNOW WHO SHE WAS IN LOVE WITH I KNOW WHO HE WAS +61-70968-0005-1686: THIS WAS SO SWEET A LADY SIR AND IN SOME MANNER I DO THINK SHE DIED +237-126133-0022-636: I DIDN'T HAVE ANY FEARS IF I WORKED IT RIGHTLY SAID THE OLD GENTLEMAN COMPLACENTLY +237-134493-0011-651: HOW BROWN YOU'VE GOT SINCE YOU CAME HOME I WISH I HAD AN ATHLETE TO MOW MY ORCHARD +8455-210777-0035-2391: THAT IS ALL QUITE TRUE MISTER NEVERBEND SAID SIR (FERDINANDO BROWN->FERDINAND O'BROWN) +1320-122617-0014-306: THEY DREW BACK A LITTLE FROM THE ENTRANCE AND MOTIONED TO THE SUPPOSED (CONJURER->CONJUROR) TO ENTER +4507-16021-0045-1266: IT IS SO MADE THAT EVERYWHERE WE FEEL THE SENSE OF PUNISHMENT +5142-33396-0044-1506: (I AM->I'M) STIFF WITH LONG SITTING HE SAID I ITCH FOR A FIGHT I TURNED TO THE FARMER +672-122797-0066-1851: WHY ONE MORNING THERE CAME A QUANTITY OF PEOPLE AND SET TO WORK IN THE LOFT +7127-75946-0003-2091: GENTLEMEN TO YOUR POSTS WHEREUPON SAINT (AIGNAN->ANNON) AND (VILLEROY->VILLEROI) TOOK THEIR LEAVE +6930-76324-0014-1986: THIS THOUGHT HOWEVER DID NOT ENTER THE HEADS OF THE ENTHUSIASTIC PAIR +672-122797-0051-1836: I AM BY NO MEANS OLD SAID THE FIR TREE +4446-2275-0001-1176: SHE BLUSHED AND SMILED AND FUMBLED HIS CARD IN HER CONFUSION BEFORE SHE RAN UPSTAIRS +8555-284447-0024-2525: COME AND GET THE BOOLOOROO SHE SAID GOING TOWARD THE BENCHES +1188-133604-0017-81: THAT A STYLE IS RESTRAINED OR SEVERE DOES NOT MEAN THAT IT IS ALSO ERRONEOUS +908-157963-0022-2585: COME FORTH WORM (AND->IN) THE SILENT VALLEY TO THY PENSIVE QUEEN +1995-1826-0002-441: JOHN TAYLOR WHO HAD SUPPORTED HER THROUGH COLLEGE WAS INTERESTED IN COTTON +5683-32866-0017-1641: I AM VERY UNEASY ABOUT IT WHATEVER IT IS I CAN'T HELP IT +4992-41797-0006-1371: HE KEEPS THE THOU SHALT NOT (COMMANDMENTS->COMMAND AS) FIRST RATE HEN LORD DOES +5142-36377-0020-1551: A MORE DREARY AND MORE DISUNITED FAMILY PARTY I NEVER SAT AT (THE->*) TABLE WITH +4970-29095-0006-1311: IS THY FATHER WILLING THEE SHOULD GO AWAY TO A SCHOOL OF THE WORLD'S PEOPLE +7176-92135-0008-2195: LEND ME YOUR EAR FOR TEN MINUTES AND YOU SHALL LEARN JUST WHAT (STAGECRAFT->STAGE CRAFT) IS +121-121726-0002-111: (ANGOR->ANGER) PAIN PAINFUL TO HEAR +7176-92135-0009-2196: AND I SHOULD BEGIN WITH A SHORT HOMILY ON SOLILOQUY +2830-3979-0002-786: LET US BEGIN WITH THAT HIS COMMENTARY ON (GALATIANS->GALLATIONS) +1284-1180-0004-216: WHEN THEY WERE OUTSIDE UNC SIMPLY LATCHED THE DOOR AND STARTED UP THE PATH +7021-85628-0016-2076: THAT IS A VERY FINE CAP YOU HAVE HE SAID +61-70970-0002-1746: MOST OF ALL ROBIN THOUGHT OF HIS FATHER WHAT WOULD HE COUNSEL +672-122797-0021-1806: AND TOWARDS CHRISTMAS HE WAS ONE OF THE FIRST THAT WAS CUT DOWN +1580-141083-0032-366: WATSON I HAVE ALWAYS DONE YOU AN INJUSTICE THERE ARE OTHERS +8455-210777-0049-2405: LIEUTENANT CROSSTREES IS A VERY GALLANT OFFICER +7176-92135-0024-2211: TO BE OR NOT TO BE THAT IS THE QUESTION WHETHER TIS NOBLER +2094-142345-0050-561: BUT THERE'S FATHER (*->IN) THE BARN SIR IF HE'D BE OF ANY USE +7127-75947-0018-2136: I HAVE BEEN HERE THIS QUARTER OF AN HOUR REPLIED LA (VALLIERE->VALLIER) +8463-294825-0008-2450: BUT MUCH OF THE NOVEL'S BROODING POWER COMES FROM CAPTAIN NEMO +8224-274384-0003-2301: OR HATH HE GIVEN US ANY GIFT +4507-16021-0015-1236: SINCE WHEN HAS MALADY BANISHED MEDICINE +8455-210777-0064-2420: AND I HAVE NO ONE READY TO WHOM I CAN GIVE UP THE ARCHIVES OF THE GOVERNMENT +2830-3980-0004-801: INDEED HE PERSECUTED THE CHURCH OF CHRIST FOR A LONG TIME +7127-75946-0002-2090: LET HIM COME IN THEN SAID THE KING AND AS IF COLBERT HAD BEEN LISTENING AT THE DOOR FOR THE PURPOSE OF KEEPING HIMSELF (AU COURANT->ACCORANT) WITH THE CONVERSATION HE ENTERED AS SOON AS THE KING HAD PRONOUNCED HIS NAME TO THE TWO COURTIERS +5105-28241-0018-1460: ITS DEPTH REMAINED INVARIABLE STILL FOUR OR AT MOST FIVE FATHOMS AND ALTHOUGH ITS BOTTOM WAS ASSIDUOUSLY DREDGED IT WAS ONLY TO PROVE IT BARREN OF MARINE PRODUCTION OF ANY TYPE +7176-88083-0021-2180: BUT AS BEFORE THE LEAPING WAVES OF THE RAPIDS WERE TOO MUCH FOR HIS PURSUER AND HE WAS ABLE TO FLAP HIS WAY ONWARD IN A CLOUD OF FOAM WHILE DOOM HUNG LOW ABOVE HIS HEAD YET HESITATED TO STRIKE +5639-40744-0001-1565: SECURE AS HE THOUGHT IN THE CAREFUL ADMINISTRATION OF JUSTICE IN THAT CITY AND THE CHARACTER OF ITS WELL DISPOSED INHABITANTS THE GOOD (HIDALGO->HEDALGO) WAS FAR FROM THINKING THAT ANY DISASTER COULD (BEFAL->BEFALL) HIS FAMILY +1221-135766-0000-171: HOW STRANGE IT SEEMED TO THE SAD WOMAN AS SHE WATCHED THE GROWTH AND THE BEAUTY THAT BECAME EVERY DAY MORE BRILLIANT AND THE INTELLIGENCE THAT THREW ITS QUIVERING SUNSHINE OVER THE TINY FEATURES OF THIS CHILD +4970-29095-0020-1325: THERE CERTAINLY WAS NO END TO IT AND EVEN RUTH WAS (PHILADELPHIAN->PHILADELPHIIAN) ENOUGH TO BELIEVE THAT A STREET OUGHT NOT TO HAVE ANY END OR ARCHITECTURAL POINT UPON WHICH THE WEARY EYE COULD REST +7127-75946-0017-2105: HIS LEGS THE BEST SHAPED AT COURT WERE DISPLAYED TO GREAT ADVANTAGE IN FLESH (COLORED->COLOURED) SILKEN HOSE OF SILK SO FINE AND SO TRANSPARENT THAT IT SEEMED ALMOST LIKE FLESH ITSELF +8230-279154-0018-2330: THERE IS OF COURSE A DIFFERENCE BETWEEN KNOWING THE TEMPORAL RELATION OF A REMEMBERED EVENT TO THE PRESENT AND KNOWING THE TIME ORDER OF TWO REMEMBERED EVENTS +7729-102255-0022-2255: FROM THESE AGAIN SPRANG BARRICADED AND FORTIFIED DWELLINGS CAMPS AND (SCOUTING->SCOUT) PARTIES FINALLY CULMINATING IN ROVING GUERRILLA BANDS HALF PARTISAN HALF PREDATORY +8230-279154-0033-2345: IT IS BY NO MEANS ALWAYS RELIABLE ALMOST EVERYBODY HAS AT SOME TIME EXPERIENCED THE WELL KNOWN ILLUSION THAT ALL THAT IS HAPPENING NOW HAPPENED BEFORE AT SOME TIME +6930-75918-0019-1970: UPON THE LARGE SQUARE IN FRONT OF THE HOTEL THE SHADOWS OF THE TENTS INTERSECTED BY THE GOLDEN MOONBEAMS FORMED AS IT WERE A HUGE MOSAIC OF JET AND YELLOW FLAGSTONES +7021-85628-0015-2075: WELL BUT NOW SAID THE PRINCESS AND SHE FILLED HIS POCKETS WITH CAKES AND PUT HER OWN HEAVY GOLD CHAIN AROUND HIS NECK AND BENT DOWN AND KISSED HIM +7729-102255-0007-2240: HIS RECEPTION SPEECH AT WESTPORT IN WHICH HE MAINTAINED THE LEGALITY OF THE LEGISLATURE AND HIS DETERMINATION TO ENFORCE THEIR LAWS DELIGHTED HIS PRO SLAVERY AUDITORS +7021-79730-0001-2030: TO SUPPOSE THAT THE OBJECT OF THIS WORK IS TO AID IN EFFECTING SUCH A SUBSTITUTION AS THAT IS ENTIRELY TO MISTAKE ITS NATURE AND DESIGN +1284-1181-0001-246: MARGOLOTTE HAD FIRST MADE THE GIRL'S FORM FROM THE PATCHWORK QUILT AND THEN SHE HAD DRESSED IT WITH A PATCHWORK SKIRT AND AN APRON WITH POCKETS IN IT USING THE SAME GAY MATERIAL THROUGHOUT +2830-3980-0017-814: WHEN I WAS A YOUNG MAN I THOUGHT PAUL WAS MAKING TOO MUCH OF HIS CALL +4446-2275-0013-1188: I'LL DO ANYTHING YOU WISH ME TO BARTLEY SHE SAID TREMULOUSLY +1221-135766-0013-184: PEARL WAS A BORN OUTCAST OF THE INFANTILE WORLD +4446-2275-0028-1203: YES YES SHE HURRIED PULLING HER HAND GENTLY AWAY FROM HIM +8555-284447-0022-2523: I HAD A NOTION IT WAS YOU (MATE AS SAVED->MADE TO SEE) ME FROM THE KNIFE +1995-1826-0015-454: SHE HAD ALMOST FORGOTTEN THAT IT WAS HERE WITHIN TOUCH AND SIGHT +5142-33396-0011-1473: THERE SHE SAT ON THE ROLLERS AS FAIR A SHIP AS I EVER SAW +1320-122612-0014-289: THE EXAMINATION HOWEVER RESULTED IN NO DISCOVERY +4446-2271-0000-1113: (MAINHALL->MAYHALL) LIKED ALEXANDER BECAUSE HE WAS AN ENGINEER +1580-141083-0030-364: MISTER (SOAMES->SOLMES) WAS SOMEWHAT OVERWHELMED BY THIS FLOOD OF INFORMATION +61-70970-0014-1758: PRESENTLY HE CROSSED THE FLOOR OF HIS ROOM WITH DECIDED STEP +8463-294828-0016-2478: DID MASTER SUMMON ME HE SAID ENTERING +260-123286-0022-724: TWO HOURS AFTERWARDS A TERRIBLE SHOCK AWOKE ME +2094-142345-0018-529: WHO TAUGHT YOU TO SCRUB A FLOOR I SHOULD LIKE TO KNOW +260-123440-0005-768: AND YESTERDAY THINGS WENT ON JUST AS USUAL +6930-76324-0026-1998: ISN'T HE THE GREATEST FOR GETTING INTO ODD CORNERS +237-134500-0034-693: THANK YOU HE RETURNED SHORTLY +5142-33396-0026-1488: MY MEN LAUGHED YES A STINGY HOST +5142-33396-0056-1518: HERE THEY SAID IS A RASCAL WHO HAS BEEN HARRYING OUR COASTS +8455-210777-0062-2418: WHEN DO YOU INTEND THAT THE JOHN BRIGHT SHALL START +237-134493-0009-649: PLEASE WAIT FOR ME MARIE (EMIL->AMY) COAXED +61-70968-0002-1683: A GOLDEN FORTUNE AND A HAPPY LIFE +260-123288-0019-753: AT NOON THE VIOLENCE OF THE STORM REDOUBLES +260-123288-0020-754: EACH OF US IS LASHED TO SOME PART OF THE RAFT +237-134500-0019-678: HE DROPPED A HANDFUL INTO HER LAP +2830-3980-0046-843: WAS IT NOT ENOUGH TO SAY FROM GOD THE FATHER +4992-41797-0003-1368: MISTER POPHAM LAID DOWN HIS BRUSH +5142-33396-0041-1503: SO I SET GUARDS OVER EVERY ONE IN THAT HOUSE +5683-32865-0002-1608: HE HAD HIS HAND UPON LAKE'S SHOULDER +7127-75947-0030-2148: SHE WAS HERE JUST NOW SAID THE COUNT +4446-2273-0020-1158: I THINK WE DID SHE ANSWERED DEMURELY +4507-16021-0012-1233: WHY SHOULD ONE HALT ON THE WAY +260-123440-0006-769: I WONDER IF (I'VE->I HAVE) BEEN CHANGED IN THE NIGHT +4446-2271-0015-1128: A LITTLE ATTACK OF NERVES POSSIBLY +7176-92135-0021-2208: I SAY I'VE BEEN WONDERING ABOUT THIS BUSINESS +61-70968-0062-1743: (AY->AYE) AND SHOW YOU SOME PRETTY TRICKS +237-134500-0035-694: AND YOU NEVER USED TO BE CROSS TO ME +260-123286-0021-723: DURING HIS WATCH I SLEPT +8455-210777-0047-2403: YOU PROPOSE TO KIDNAP ME I SAID +1580-141084-0036-424: THE INDIAN I ALSO THOUGHT NOTHING OF +237-134500-0005-664: OH BUT I'M GLAD TO GET THIS PLACE MOWED +2830-3980-0016-813: IT SPOILS ONE'S BEST WORK +237-134493-0008-648: SHE GATHERED UP HER REINS +5683-32866-0029-1653: SOMEHOW I HAD GROWN NERVOUS +237-126133-0020-634: HOW DID HER MOTHER EVER LET HER GO +237-134500-0004-663: THAT INVITATION DECIDED HER +237-134500-0020-679: YES DON'T YOU +4992-41806-0010-1398: AIN'T THEY THE GREATEST +8455-210777-0032-2388: IT IS A DUTY SAID I +1580-141084-0006-394: YOU DON'T SEEM TO REALIZE THE POSITION +2830-3980-0002-799: HE WAS THE LAST TO TURN TO CHRIST +4507-16021-0027-1248: PHOENICIAN VERY GOOD +1188-133604-0030-94: HE KNOWS THEM BOTH +672-122797-0033-1818: A STORY +1995-1836-0004-470: AS SHE AWAITED HER (GUESTS->GUEST) SHE SURVEYED THE TABLE WITH BOTH SATISFACTION AND DISQUIETUDE FOR HER SOCIAL FUNCTIONS WERE FEW (TONIGHT->TO NIGHT) THERE WERE SHE CHECKED THEM OFF ON HER FINGERS SIR JAMES (CREIGHTON->CRIGHTON) THE RICH ENGLISH MANUFACTURER AND LADY (CREIGHTON->CRYIGHTON) MISTER AND MISSUS (VANDERPOOL->VANERPOLE) MISTER HARRY (CRESSWELL->CRASWELL) AND HIS SISTER JOHN TAYLOR AND HIS SISTER AND MISTER (CHARLES->CHARLESS) SMITH WHOM THE EVENING PAPERS MENTIONED AS LIKELY TO BE UNITED STATES SENATOR FROM NEW JERSEY A SELECTION OF GUESTS THAT HAD BEEN DETERMINED UNKNOWN TO THE HOSTESS BY THE MEETING OF COTTON INTERESTS EARLIER IN THE DAY +5639-40744-0031-1595: SO PERSUASIVE WERE HER ENTREATIES AND SO STRONG HER ASSURANCES THAT NO HARM WHATEVER COULD RESULT TO THEM FROM THE INFORMATION SHE SOUGHT THEY WERE INDUCED TO CONFESS THAT ONE SUMMER'S NIGHT THE SAME SHE HAD MENTIONED THEMSELVES AND ANOTHER FRIEND BEING OUT ON A STROLL WITH (RODOLFO->UDOLPHO) THEY HAD BEEN CONCERNED IN THE (ABDUCTION->ADOCTION) OF A GIRL WHOM (RODOLFO->RUDOLPHO) CARRIED OFF WHILST THE REST OF THEM DETAINED HER FAMILY WHO MADE A GREAT OUTCRY AND WOULD HAVE DEFENDED HER IF THEY COULD +8224-274381-0005-2285: DREADING THE SUPERIOR POWER OF ARGYLE WHO HAVING JOINED HIS VASSALS TO A FORCE LEVIED BY THE PUBLIC WAS APPROACHING WITH A CONSIDERABLE ARMY MONTROSE HASTENED (NORTHWARDS->NORTHWARD) IN ORDER TO ROUSE AGAIN THE MARQUIS OF HUNTLEY AND THE GORDONS WHO HAVING BEFORE HASTILY TAKEN ARMS HAD BEEN INSTANTLY SUPPRESSED BY THE COVENANTERS +8463-287645-0007-2434: ANOTHER CASE SAID JOHN WESLEY WAS A LITTLE GIRL HALF GROWN WHO WAS WASHING WINDOWS UP STAIRS ONE DAY AND UNLUCKILY FELL ASLEEP IN THE WINDOW AND IN THIS POSITION WAS FOUND BY HER MISTRESS IN A RAGE THE MISTRESS (HIT->HID) HER A HEAVY SLAP KNOCKED HER OUT OF THE WINDOW AND SHE FELL TO THE PAVEMENT AND DIED IN A FEW HOURS FROM THE EFFECTS THEREOF +8463-294825-0007-2449: EVEN THE SUPPORTING CAST IS SHREWDLY DRAWN PROFESSOR ARONNAX THE CAREER SCIENTIST CAUGHT IN AN ETHICAL CONFLICT CONSEIL THE COMPULSIVE CLASSIFIER WHO SUPPLIES HUMOROUS TAG LINES FOR (VERNE'S->VERRNE'S) FAST FACTS THE HARPOONER NED LAND A CREATURE OF CONSTANT APPETITES MAN AS HEROIC ANIMAL +7176-92135-0025-2212: IT IS TO LET HAMLET IF THAT HAPPEN TO BE THE NAME OF YOUR CHARACTER ENTER WITH A SMALL DOG PET FALCON MONGOOSE TAME BEAR OR WHATEVER ANIMAL IS MOST IN KEEPING WITH THE PART AND CONFIDE IN THIS ANIMAL SUCH SORROWS HOPES OR SECRET HISTORY AS THE AUDIENCE HAS GOT TO KNOW +5639-40744-0018-1582: THAT WOULD BE VERY WELL MY CHILD REPLIED HER FATHER IF YOUR PLAN WERE NOT LIABLE TO BE FRUSTRATED BY ORDINARY CUNNING BUT NO DOUBT THIS IMAGE (HAS->HAD) BEEN ALREADY MISSED BY ITS OWNER AND HE WILL HAVE SET IT DOWN FOR CERTAIN THAT IT WAS TAKEN OUT OF THE ROOM BY THE PERSON HE LOCKED UP THERE +1221-135767-0016-203: WITH MANY VARIATIONS SUGGESTED BY THE NATURE OF HIS BUILDING MATERIALS DIVERSITY OF CLIMATE AND A DIFFERENT MODE OF SOCIAL LIFE GOVERNOR BELLINGHAM HAD PLANNED HIS NEW HABITATION AFTER THE RESIDENCES OF GENTLEMEN OF FAIR ESTATE IN HIS NATIVE LAND +4992-41806-0014-1402: (THINKS I TO->THINKSIDE OF) MYSELF (I->I'VE) NEVER SEEN ANYTHING OSH (POPHAM->PAPA) COULDN'T MEND IF HE TOOK TIME ENOUGH AND GLUE ENOUGH SO I CARRIED THIS LITTLE FELLER HOME IN A BUSHEL BASKET ONE NIGHT LAST MONTH (AN->AND) I'VE SPENT ELEVEN (EVENIN'S PUTTIN->EVENINGS PUTTING) HIM TOGETHER +8455-210777-0036-2392: I CAN AFFORD TO SMILE BECAUSE I AM ABSOLUTELY POWERLESS BEFORE YOU BUT I DO NOT THE LESS FEEL THAT IN A MATTER (IN->OF) WHICH THE PROGRESS OF THE WORLD IS CONCERNED I OR RATHER WE HAVE BEEN PUT DOWN BY BRUTE FORCE +8463-294828-0035-2497: AND SO IF (I'D->I HAD) BEEN DELAYED BY A QUARTER OF AN HOUR OR EVEN LESS THE FRIGATE WOULD HAVE GONE WITHOUT ME AND I WOULD HAVE MISSED OUT ON THIS UNEARTHLY EXTRAORDINARY AND INCONCEIVABLE EXPEDITION WHOSE TRUE STORY MIGHT WELL MEET WITH SOME SKEPTICISM +260-123288-0024-758: THE (FIREBALL->FIRE BALL) HALF OF IT WHITE HALF AZURE BLUE AND THE SIZE OF A TEN INCH SHELL MOVED SLOWLY ABOUT THE RAFT BUT REVOLVING ON ITS OWN AXIS WITH ASTONISHING VELOCITY AS IF WHIPPED ROUND BY THE FORCE OF THE WHIRLWIND +5683-32879-0017-1672: OF MARK WYLDER I SAY THIS HIS NAME HAS BEEN FOR YEARS HATEFUL TO ME AND RECENTLY IT HAS BECOME FRIGHTFUL AND YOU WILL PROMISE ME SIMPLY THIS THAT YOU WILL NEVER ASK ME TO SPEAK AGAIN ABOUT HIM +121-127105-0024-158: POOR DOUGLAS BEFORE HIS DEATH WHEN IT WAS IN SIGHT COMMITTED TO ME THE MANUSCRIPT THAT REACHED HIM ON THE THIRD OF THESE DAYS AND THAT ON THE SAME SPOT WITH IMMENSE EFFECT HE BEGAN TO READ TO OUR HUSHED LITTLE CIRCLE ON THE NIGHT OF THE FOURTH +1320-122617-0001-293: IN HIS RETURN TO THE CAMP HIS ACUTE AND (PRACTISED->PRACTICED) INTELLECTS WERE INTENTLY ENGAGED IN DEVISING MEANS TO COUNTERACT A WATCHFULNESS AND SUSPICION ON THE PART OF HIS ENEMIES THAT HE KNEW WERE IN NO DEGREE INFERIOR TO HIS OWN +8455-210777-0051-2407: WHAT (WORLD WIDE->WORLDWIDE) INIQUITY SUCH A SPEECH AS THAT DISCLOSES SAID I STILL TURNING MYSELF TO THE CAPTAIN FOR THOUGH I WOULD HAVE CRUSHED THEM BOTH BY MY WORDS HAD IT BEEN POSSIBLE MY DISLIKE (CENTRED->CENTERED) ITSELF ON SIR FERDINANDO +1188-133604-0019-83: THE LARGE LETTER CONTAINS INDEED ENTIRELY FEEBLE AND ILL DRAWN FIGURES THAT IS MERELY CHILDISH AND FAILING WORK OF AN INFERIOR HAND IT IS NOT CHARACTERISTIC OF GOTHIC OR ANY OTHER SCHOOL +7021-85628-0001-2061: HE MADE A BOW SO DEEP THAT HIS BACK CAME NEAR BREAKING AND HE WAS DUMBFOUNDED I CAN TELL YOU WHEN HE SAW IT WAS NOBODY BUT ANDERS +260-123288-0007-741: THE WIND NEVER LULLS BUT TO ACQUIRE INCREASED STRENGTH THE VAST BANK OF HEAVY CLOUDS IS A HUGE RESERVOIR OF FEARFUL WINDY GUSTS AND RUSHING STORMS +1089-134686-0022-22: HOW COMES IT THAT WHILE THE FIRST BEATITUDE PROMISES THE KINGDOM OF HEAVEN TO THE POOR OF HEART THE SECOND BEATITUDE PROMISES ALSO TO THE MEEK THAT THEY SHALL POSSESS THE LAND +6930-75918-0004-1955: SHE TAUGHT HER DAUGHTER THEN BY HER OWN AFFECTION FOR IT THAT LOVE FOR A COUNTRY WHERE THEY HAD BOTH BEEN HOSPITABLY RECEIVED AND WHERE A BRILLIANT FUTURE OPENED (BEFORE->FOR) THEM +7729-102255-0037-2270: HALF AN HOUR LATER TURNING A DEAF EAR TO ALL REMONSTRANCE HE GAVE THE PROPRIETORS UNTIL FIVE O'CLOCK TO REMOVE THEIR FAMILIES AND PERSONAL PROPERTY FROM THE FREE STATE HOTEL +1188-133604-0032-96: IT IS ONLY A PENCIL OUTLINE BY EDWARD BURNE JONES IN ILLUSTRATION OF THE STORY OF PSYCHE IT IS THE INTRODUCTION OF PSYCHE AFTER ALL HER TROUBLES INTO HEAVEN +121-127105-0023-157: LET ME SAY HERE DISTINCTLY TO HAVE DONE WITH IT THAT THIS NARRATIVE FROM AN EXACT TRANSCRIPT OF MY OWN MADE MUCH LATER IS WHAT I SHALL PRESENTLY GIVE +908-31957-0021-2615: OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE YEARS THAT THEY SHOULD LEND IT UTTERANCE AND SALUTE LOVE THAT ENDURES FROM LIFE THAT DISAPPEARS +1995-1836-0005-471: MISSUS (GREY->GRAY) HAD MET SOUTHERNERS BEFORE BUT NOT INTIMATELY AND SHE ALWAYS HAD IN MIND VIVIDLY THEIR CRUELTY TO POOR NEGROES A SUBJECT SHE MADE A POINT OF INTRODUCING FORTHWITH +3570-5694-0002-922: SUCH CONSUMPTION AS FALLS TO THE WOMEN IS MERELY INCIDENTAL TO THEIR WORK IT IS A MEANS TO THEIR CONTINUED (LABOUR->LABOR) AND NOT A CONSUMPTION DIRECTED TO THEIR OWN COMFORT AND (FULNESS->FULLNESS) OF LIFE +1089-134686-0006-6: THE DULL LIGHT FELL MORE FAINTLY UPON THE PAGE WHEREON ANOTHER EQUATION BEGAN TO UNFOLD ITSELF SLOWLY AND TO SPREAD ABROAD ITS WIDENING (TAIL->TALE) +2300-131720-0020-592: IT WAS SPECIALLY SUITED FOR A TRIAL (PLANT->PLAT) ALSO IN THE EARLY DAYS WHEN A YIELD OF SIX OR EIGHT LAMPS TO THE HORSE POWER WAS CONSIDERED SUBJECT FOR CONGRATULATION +5683-32879-0016-1671: AND THE WAN ORACLE HAVING SPOKEN SHE (SATE->SAT) DOWN IN THE SAME SORT OF ABSTRACTION AGAIN BESIDE DORCAS AND SHE LOOKED FULL IN HER COUSIN'S EYES +5683-32865-0005-1611: BUT HER GREETING TO CAPTAIN LAKE WAS MORE THAN USUALLY HAUGHTY AND FROZEN AND HER FEATURES I FANCIED PARTICULARLY PROUD AND PALE +6930-76324-0028-2000: WELL I'M CONVINCED THAT THE BOARDED UP HOUSE MYSTERY HAPPENED NOT EARLIER THAN APRIL SIXTEENTH EIGHTEEN SIXTY ONE AND PROBABLY NOT MUCH LATER +8224-274384-0002-2300: THEY INFORMED THE ENGLISH PARLIAMENT OF THIS UNEXPECTED INCIDENT AND ASSURED THEM THAT THEY HAD ENTERED INTO NO PRIVATE TREATY WITH THE KING +908-157963-0023-2586: THE HELPLESS WORM AROSE AND SAT UPON THE (LILLYS->LILY'S) LEAF AND THE BRIGHT CLOUD (SAILD->SAILED) ON TO FIND HIS PARTNER IN THE VALE +7176-92135-0006-2193: I'VE GOT A LITTLE IDEA FOR A PLAY ABOUT A MAN AND A WOMAN AND ANOTHER WOMAN AND BUT PERHAPS I'D BETTER KEEP THE PLOT A SECRET FOR THE MOMENT +8463-294828-0031-2493: ONE OF THE SAILORS LED ME TO THE AFTERDECK WHERE I STOOD IN THE PRESENCE OF A SMART LOOKING OFFICER WHO EXTENDED HIS HAND TO ME +1284-1180-0001-213: HIS HAT HAD A PEAKED CROWN (AND->AT) A FLAT BRIM AND AROUND THE BRIM WAS A ROW OF TINY GOLDEN BELLS THAT TINKLED WHEN HE MOVED +5639-40744-0014-1578: AMONG OTHER THINGS ON WHICH SHE CAST HER EYES WAS A SMALL CRUCIFIX OF SOLID SILVER STANDING ON A CABINET NEAR THE WINDOW +8463-287645-0006-2433: THE DOCTOR WHO ATTENDED THE INJURED CREATURE IN THIS CASE WAS SIMPLY TOLD THAT SHE SLIPPED AND FELL DOWN (*->THE) STAIRS AS SHE WAS COMING DOWN +8463-294825-0005-2447: OTHER SUBTLETIES OCCUR INSIDE EACH EPISODE THE TEXTURES SPARKLING WITH WIT INFORMATION AND INSIGHT +1284-1180-0002-214: INSTEAD OF SHOES THE OLD MAN WORE BOOTS WITH TURNOVER TOPS AND HIS BLUE COAT HAD WIDE CUFFS OF GOLD BRAID +8455-210777-0031-2387: YOU HAVE RECEIVED US WITH ALL THAT COURTESY AND HOSPITALITY FOR WHICH YOUR CHARACTER IN ENGLAND STANDS SO HIGH +8224-274384-0000-2298: HE PASSED THROUGH HENLEY SAINT ALBANS AND CAME SO NEAR TO LONDON AS HARROW ON THE HILL +7127-75946-0015-2103: SUDDENLY FOR THE PURPOSE OF RESTORING PEACE AND ORDER SPRING ACCOMPANIED BY HIS WHOLE COURT MADE HIS APPEARANCE +7176-88083-0004-2163: BUT SUDDENLY STRAIGHT AND SWIFT AS A DIVING (CORMORANT->CORMERANT) HE SHOT DOWN INTO THE TORRENT AND DISAPPEARED BENEATH THE SURFACE +121-127105-0034-168: IT SOUNDED DULL IT SOUNDED STRANGE AND ALL THE MORE SO BECAUSE OF HIS MAIN CONDITION WHICH WAS +260-123286-0006-708: WE ARE LOSING TIME AND THE FACT IS I HAVE NOT COME ALL THIS WAY TO TAKE A LITTLE SAIL UPON A POND ON A RAFT +3729-6852-0011-1038: I HAD A NAME I BELIEVE IN MY YOUNG DAYS BUT I HAVE FORGOTTEN IT SINCE I HAVE BEEN IN SERVICE +5639-40744-0029-1593: THIS TRUTH WHICH I HAVE LEARNED FROM HER LIPS IS CONFIRMED BY HIS FACE IN WHICH WE HAVE BOTH BEHELD THAT OF OUR SON +237-126133-0019-633: DEAR ME EJACULATED THE OLD GENTLEMAN IN THE UTMOST AMAZEMENT AND SUCH A TIME AS I'VE HAD TO GET HER HERE TOO +2830-3980-0061-858: NOT GOLD OR SILVER OR (PASCHAL->PATIAL) LAMBS OR AN ANGEL BUT HIMSELF WHAT FOR +6829-68769-0018-1878: MISS DE GRAF SAID KENNETH NOTICING THE BOY'S FACE CRITICALLY AS HE STOOD WHERE THE LIGHT FROM THE PASSAGE FELL UPON IT +908-157963-0005-2568: LIKE THE (DOVES->DOVE'S) VOICE LIKE TRANSIENT DAY LIKE MUSIC IN THE AIR AH +908-31957-0003-2597: WHEN CALLED BEFORE I TOLD HOW HASTILY I DROPPED MY FLOWERS OR (BRAKE->BREAK) OFF FROM A GAME +672-122797-0048-1833: IF IT ONLY WERE NOT SO DARK HERE AND SO TERRIBLY LONELY +1320-122612-0013-288: A CIRCLE OF A FEW HUNDRED FEET IN CIRCUMFERENCE WAS DRAWN AND EACH OF THE PARTY TOOK A SEGMENT FOR HIS PORTION +6829-68769-0048-1908: GIVE ME A CHECK FOR A HUNDRED AND FIFTY AND I'LL TURN OVER TO YOU THE FORGED CHECK AND QUASH FURTHER PROCEEDINGS +7176-92135-0036-2223: THE CROWD DRIFTS OFF LEAVING THE HERO AND HEROINE ALONE IN THE MIDDLE OF THE STAGE AND THEN YOU CAN BEGIN +7021-79740-0004-2043: YOU HAVE COME (ANDELLA ANDELLA->ANDELA AND DELA) WAS THE NAME OF JANE'S DOLL TO MAKE ROSALIE A VISIT +4507-16021-0001-1222: IT ENGENDERS A WHOLE WORLD LA PEGRE FOR (WHICH READ->WITCH RED) THEFT AND A HELL LA (PEGRENNE->PEGREN) FOR (WHICH READ->WITCH RED) HUNGER +3729-6852-0000-1027: TO CELEBRATE THE ARRIVAL OF HER SON (SILVIA->SYLVIA) GAVE A SPLENDID SUPPER TO WHICH SHE HAD INVITED ALL HER RELATIVES AND IT WAS A GOOD OPPORTUNITY FOR ME TO MAKE THEIR ACQUAINTANCE +5683-32879-0002-1657: SO THERE CAME A STEP AND A LITTLE RUSTLING OF FEMININE DRAPERIES THE SMALL DOOR OPENED AND RACHEL ENTERED WITH HER HAND EXTENDED AND A PALE SMILE OF WELCOME +2300-131720-0021-593: THE STREET CONDUCTORS WERE OF THE OVERHEAD POLE LINE CONSTRUCTION AND WERE INSTALLED BY THE CONSTRUCTION COMPANY THAT HAD BEEN ORGANIZED BY EDISON TO BUILD AND EQUIP CENTRAL STATIONS +4970-29093-0002-1283: HE HAS NO TRADITIONS TO BIND HIM OR GUIDE HIM AND HIS IMPULSE IS TO BREAK AWAY FROM THE OCCUPATION HIS FATHER HAS FOLLOWED AND MAKE A NEW WAY FOR HIMSELF +1188-133604-0004-68: SOME OF THE TOUCHES INDEED WHEN THE TINT HAS BEEN MIXED WITH MUCH WATER HAVE BEEN LAID IN LITTLE DROPS OR PONDS SO THAT THE PIGMENT MIGHT CRYSTALLIZE HARD AT THE EDGE +4507-16021-0017-1238: HE WOULD BE LIKE A PHILOLOGIST REFUSING TO EXAMINE A FACT IN LANGUAGE A PHILOSOPHER HESITATING TO SCRUTINIZE A FACT IN HUMANITY +2094-142345-0037-548: WE CAN ALL BE SERVANTS OF GOD WHEREVER OUR LOT IS CAST BUT HE GIVES US DIFFERENT SORTS OF WORK ACCORDING AS HE FITS US FOR IT AND CALLS US TO IT +7176-92135-0040-2227: HOSTESS REPLACES LUMP AND INCLINES EMPTY TEAPOT OVER TRAY FOR A MOMENT THEN HANDS HIM A CUP PAINTED BROWN INSIDE THUS DECEIVING THE GENTLEMAN WITH THE TELESCOPE IN THE UPPER CIRCLE +7729-102255-0024-2257: THE FREE STATE MEN CLUNG TO THEIR PRAIRIE TOWNS AND PRAIRIE RAVINES WITH ALL THE OBSTINACY AND COURAGE OF TRUE DEFENDERS OF THEIR HOMES AND FIRESIDES +8224-274381-0007-2287: BY QUICK MARCHES THROUGH THESE INACCESSIBLE MOUNTAINS THAT GENERAL FREED HIMSELF FROM THE SUPERIOR FORCES OF THE COVENANTERS +1320-122617-0016-308: THE CUNNING MAN IS AFRAID THAT HIS BREATH WILL BLOW UPON HIS BROTHERS AND TAKE AWAY THEIR COURAGE TOO CONTINUED DAVID IMPROVING THE HINT HE RECEIVED THEY MUST STAND FURTHER OFF +908-31957-0008-2602: NAY I RATHER THRILLED DISTRUSTING EVERY LIGHT THAT SEEMED TO GILD THE ONWARD PATH AND FEARED TO OVERLEAN A FINGER EVEN +1320-122612-0003-278: HE OFTEN STOPPED TO EXAMINE THE TREES NOR DID HE CROSS A RIVULET WITHOUT ATTENTIVELY CONSIDERING THE QUANTITY THE VELOCITY AND THE COLOR OF ITS WATERS +1284-1180-0021-233: I THINK THE NEXT GLASS CAT THE MAGICIAN MAKES WILL HAVE NEITHER BRAINS NOR HEART FOR THEN IT WILL NOT OBJECT TO CATCHING MICE AND MAY PROVE OF SOME USE TO US +237-126133-0024-638: AND THE OLD GENTLEMAN WAS SO DELIGHTED WITH HIS SUCCESS THAT HE HAD TO BURST OUT INTO A SERIES OF SHORT HAPPY BITS OF LAUGHTER THAT OCCUPIED QUITE A SPACE OF TIME +7176-88083-0023-2182: HE HAD A LOT OF LINE OUT AND THE PLACE WAS NONE TOO FREE FOR A LONG CAST BUT HE WAS IMPATIENT TO DROP HIS FLIES AGAIN ON THE SPOT WHERE THE BIG FISH WAS FEEDING +3570-5695-0009-952: EACH WILL THEREFORE SERVE ABOUT EQUALLY WELL DURING THE EARLIER STAGES OF SOCIAL GROWTH +1995-1826-0018-457: HER REGARD SHIFTED TO THE GREEN STALKS AND LEAVES AGAIN AND SHE STARTED TO MOVE AWAY +1580-141084-0039-427: I ENTERED AND I TOOK YOU INTO MY CONFIDENCE AS TO THE SUGGESTIONS OF THE SIDE TABLE +4446-2273-0024-1162: (BARTLEY->PARTLEY) STARTED WHEN HILDA RANG THE LITTLE BELL BESIDE HER DEAR ME WHY DID YOU DO THAT +1221-135766-0002-173: YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION +4970-29095-0022-1327: IS THEE GOING TO THE YEARLY MEETING RUTH ASKED ONE OF THE GIRLS +1089-134691-0014-52: THE PHRASE AND THE DAY AND THE SCENE HARMONIZED IN (A CHORD->ACCORD) +1580-141084-0009-397: IT IS POSSIBLE THAT I MAY BE IN A POSITION THEN TO INDICATE SOME COURSE OF ACTION +5142-36377-0006-1537: A NEW MEMBER OF THE FAMILY CIRCLE WHO INSTANTLY ATTRACTED MY ATTENTION ENTERED THE ROOM +61-70970-0018-1762: THE HOURS PASSED WEARILY BY AND MOVEMENT COULD YET BE HEARD ABOUT THE HALL +6829-68769-0052-1912: HE MIGHT HAVE HAD THAT FORGED CHECK FOR THE FACE OF IT IF HE'D BEEN SHARP +4507-16021-0046-1267: EACH DAY HAS ITS OWN GREAT GRIEF OR ITS LITTLE CARE +8463-294825-0010-2452: AND IN THIS LAST ACTION HE FALLS INTO THE CLASSIC SIN OF PRIDE +3575-170457-0027-997: I CANNOT DENY MYSELF THE GRATIFICATION OF INSERTING SOUTHEY'S REPLY +8463-294828-0019-2481: ANYHOW WE'LL LEAVE INSTRUCTIONS TO SHIP THE WHOLE MENAGERIE TO FRANCE +8455-210777-0006-2362: WE ARE QUITE SATISFIED NOW CAPTAIN (BATTLEAX->BATTLE AXE) SAID MY WIFE +1580-141083-0004-338: I HAD TO READ IT OVER CAREFULLY AS THE TEXT MUST BE ABSOLUTELY CORRECT +1284-1181-0003-248: THE HAIR WAS OF BROWN YARN AND HUNG DOWN ON HER NECK IN SEVERAL NEAT BRAIDS +7127-75946-0004-2092: CERTAINLY SIRE BUT I MUST HAVE MONEY TO DO THAT WHAT +8463-287645-0010-2437: HE WORKED ME VERY HARD HE WANTED TO BE BEATING ME ALL THE TIME +4077-13751-0013-1087: THEIR SUFFERINGS HAVE NEVER YET BEEN FITLY CHRONICLED BY HUMAN SCRIBE +5142-33396-0015-1477: AS OUR BOAT FLASHED DOWN THE ROLLERS INTO THE WATER I MADE THIS SONG AND SANG IT +672-122797-0052-1837: THERE'S MANY A ONE CONSIDERABLY OLDER THAN I AM +1089-134686-0007-7: A COLD LUCID INDIFFERENCE REIGNED IN HIS SOUL +6829-68769-0022-1882: WE HAVE HEARD SOMETHING OF YOUR STORY SAID KENNETH AND ARE INTERESTED IN IT +908-157963-0009-2572: WHY SHOULD THE MISTRESS OF THE VALES OF (HAR->HARR) UTTER A SIGH +121-121726-0004-113: HEAVEN A GOOD PLACE TO BE RAISED TO +4446-2273-0009-1147: IT'S NOT PARTICULARLY RARE SHE SAID BUT SOME OF IT WAS MY MOTHER'S +1320-122617-0030-322: SO CHOOSE FOR YOURSELF TO MAKE A RUSH OR TARRY HERE +8455-210777-0050-2406: ONE OF US ALWAYS REMAINS ON BOARD WHILE THE OTHER IS ON SHORE +8455-210777-0065-2421: I SHALL BE HAPPY TO TAKE CHARGE OF THEM SAID SIR FERDINANDO +7127-75947-0019-2137: DID NOT THE DANCING AMUSE YOU NO +237-126133-0008-622: ASKED PHRONSIE WITH HER LITTLE FACE CLOSE TO POLLY'S OWN +6829-68769-0007-1867: BUT UNDER THE CIRCUMSTANCES I DOUBT IF SUCH AN ARRANGEMENT COULD BE MADE +1580-141083-0049-383: (ANYONE->ANY ONE) IN THE ROOM COULD GET OUT YES SIR +1284-1181-0002-247: THE HEAD OF THE PATCHWORK GIRL WAS THE MOST CURIOUS PART OF HER +61-70970-0003-1747: IF FOR A WHIM YOU BEGGAR YOURSELF I CANNOT STAY YOU +4446-2271-0019-1132: AFTER THAT IT WAS EASY TO FORGET ACTUALLY TO FORGET +237-134500-0008-667: I SUPPOSE IT'S THE WET SEASON WILL YOU HAVE TO CUT THEM TOO +4992-41806-0011-1399: MOTHER CAREY POURED COFFEE NANCY CHOCOLATE AND THE OTHERS HELPED SERVE THE SANDWICHES AND CAKE DOUGHNUTS AND TARTS +2961-960-0015-889: THE STYLE AND PLAN OF THE TIMAEUS DIFFER GREATLY FROM THAT OF ANY OTHER OF THE PLATONIC DIALOGUES +6829-68771-0025-1939: SHE SAT DOWN IN A ROCKING CHAIR AND CLASPING HER HANDS IN HER LAP ROCKED SLOWLY BACK AND FORTH I'M SORRY SAID BETH +2961-960-0016-890: BUT PLATO HAS NOT THE SAME MASTERY OVER HIS INSTRUMENT WHICH HE EXHIBITS IN THE (PHAEDRUS->PHEDROS) OR SYMPOSIUM +7729-102255-0036-2269: HE PLANTED A COMPANY BEFORE THE HOTEL AND DEMANDED A SURRENDER OF THE ARMS BELONGING TO THE FREE STATE MILITARY COMPANIES +1320-122617-0012-304: FOUR OR FIVE OF THE LATTER ONLY LINGERED ABOUT THE DOOR OF THE PRISON OF UNCAS WARY BUT CLOSE OBSERVERS OF THE MANNER OF THEIR CAPTIVE +8455-210777-0033-2389: BUT YOUR POWER IS SO SUPERIOR TO ANY THAT I CAN ADVANCE AS TO MAKE US HERE FEEL THAT THERE IS NO DISGRACE IN YIELDING TO IT +4992-41797-0020-1385: NANCY'S CURLY CHESTNUT CROP SHONE IN THE SUN AND OLIVE'S THICK BLACK (PLAITS->PLATES) LOOKED BLACKER BY CONTRAST +7127-75947-0016-2134: OH MADEMOISELLE WHY HAVE I NOT A DEVOTED SISTER OR A TRUE FRIEND SUCH AS YOURSELF +3570-5695-0006-949: VERY MUCH OF SQUALOR AND DISCOMFORT WILL BE ENDURED BEFORE THE LAST TRINKET OR THE LAST (PRETENSE->PRETENCE) OF PECUNIARY DECENCY IS PUT AWAY +6829-68769-0049-1909: HE DETESTED THE GRASPING DISPOSITION THAT WOULD ENDEAVOR TO TAKE ADVANTAGE OF HIS EVIDENT DESIRE TO HELP YOUNG GATES +1995-1837-0003-484: THE REVELATION OF HIS LOVE LIGHTED AND BRIGHTENED SLOWLY TILL IT FLAMED LIKE A SUNRISE OVER HIM AND LEFT HIM IN BURNING WONDER +6930-81414-0013-2014: I HAD SCARCELY KNOWN WHAT I HAD BEEN SAYING OR DOING UP TO THIS TIME BUT AS HE SPOKE I LOOKED AT MY HAND +4992-41797-0004-1369: I (SWAN->SWAIN) TO MAN HE EJACULATED IF YOU DON'T WORK HARD YOU CAN'T KEEP UP WITH THE TIMES DOCTOR OF LAWS +2830-3980-0032-829: WE LOOK FOR THAT REWARD WHICH EYE HATH NOT SEEN NOR EAR HEARD NEITHER HATH ENTERED INTO THE HEART OF MAN +8555-284447-0023-2524: I COULDN'T SHIVER MUCH (BEIN->BEING) BOUND SO TIGHT BUT WHEN I'M LOOSE I MEAN TO HAVE (JUS->JUST) ONE GOOD SHIVER TO RELIEVE MY (FEELIN'S->FEELINGS) +2961-961-0008-905: BUT THE MEMORY OF THEIR EXPLOITS (HAS->HAD) PASSED AWAY OWING TO THE LAPSE OF TIME AND THE EXTINCTION OF THE ACTORS +6829-68769-0004-1864: BUT THEY COULD NOT HAVE PROVEN A CASE AGAINST LUCY IF SHE WAS INNOCENT AND ALL THEIR THREATS OF ARRESTING HER WERE PROBABLY (*->A) MERE BLUFF +3575-170457-0040-1010: INDEED THERE WERE ONLY ONE OR TWO STRANGERS WHO COULD BE ADMITTED AMONG THE SISTERS WITHOUT PRODUCING THE SAME RESULT +3575-170457-0009-979: JANUARY AND FEBRUARY OF EIGHTEEN THIRTY SEVEN HAD PASSED AWAY AND STILL THERE WAS NO REPLY FROM SOUTHEY +4992-41806-0012-1400: AT THAT MOMENT THE GENTLEMAN ENTERED BEARING A HUGE OBJECT CONCEALED BY A PIECE OF GREEN FELT +4077-13751-0010-1084: TO THE FERVENT LATTER DAY SAINT A TEMPLE IS NOT SIMPLY A CHURCH BUILDING A HOUSE FOR (*->A) RELIGIOUS ASSEMBLY +7127-75947-0001-2119: UPON THIS MADAME DEIGNED TO TURN HER EYES LANGUISHINGLY TOWARDS THE COMTE OBSERVING +237-134500-0036-695: I CAN'T PLAY WITH YOU LIKE A LITTLE BOY ANY MORE HE SAID SLOWLY THAT'S WHAT YOU MISS MARIE +237-126133-0005-619: OH SHE'S ALWAYS AT THE PIANO SAID VAN SHE MUST BE THERE NOW SOMEWHERE AND THEN SOMEBODY LAUGHED +908-157963-0007-2570: THE (LILLY->LILY) OF THE VALLEY BREATHING IN THE HUMBLE GRASS (ANSWERD->ANSWERED) THE LOVELY (MAID AND->MAIDEN) SAID I AM A (WATRY WEED->WATERYED) AND I AM VERY SMALL AND LOVE TO DWELL IN LOWLY VALES SO WEAK THE GILDED BUTTERFLY SCARCE PERCHES ON MY HEAD YET I AM VISITED FROM HEAVEN AND HE THAT SMILES ON ALL WALKS IN THE VALLEY AND EACH MORN OVER ME SPREADS HIS HAND SAYING REJOICE THOU HUMBLE GRASS THOU NEW BORN LILY FLOWER +5639-40744-0003-1567: IN A MOMENT HE COMMUNICATED HIS THOUGHTS TO HIS COMPANIONS AND IN THE NEXT MOMENT THEY RESOLVED TO TURN BACK AND CARRY HER OFF TO PLEASE (RODOLFO->RUDOLPHO) FOR THE RICH WHO ARE OPEN HANDED ALWAYS FIND (PARASITES->PARRICIDE) READY TO ENCOURAGE THEIR BAD PROPENSITIES AND THUS TO CONCEIVE THIS WICKED DESIGN TO COMMUNICATE IT APPROVE IT RESOLVE ON RAVISHING (LEOCADIA->THE ARCADIA) AND TO CARRY THAT DESIGN INTO EFFECT WAS THE WORK OF A MOMENT +2300-131720-0035-607: IN THIS CONNECTION IT SHOULD BE MENTIONED THAT THE ASSOCIATION OF EDISON ILLUMINATING COMPANIES IN THE SAME YEAR ADOPTED RESOLUTIONS UNANIMOUSLY TO THE EFFECT THAT THE EDISON METER WAS ACCURATE AND THAT ITS USE WAS NOT EXPENSIVE FOR STATIONS ABOVE ONE THOUSAND LIGHTS AND THAT THE BEST FINANCIAL RESULTS WERE INVARIABLY SECURED IN A STATION SELLING CURRENT BY METER +1221-135767-0000-187: HESTER PRYNNE WENT ONE DAY TO THE MANSION OF GOVERNOR BELLINGHAM WITH A PAIR OF GLOVES WHICH (SHE->HE) HAD FRINGED AND EMBROIDERED TO HIS ORDER AND WHICH WERE TO BE WORN ON SOME GREAT OCCASION OF STATE FOR THOUGH THE CHANCES OF A POPULAR ELECTION HAD CAUSED THIS FORMER RULER TO DESCEND A STEP OR TWO FROM THE HIGHEST RANK HE STILL HELD AN (HONOURABLE->HONORABLE) AND INFLUENTIAL PLACE AMONG THE COLONIAL MAGISTRACY +121-123852-0003-127: THOUGHT KILLS ME THAT I AM NOT THOUGHT TO LEAP LARGE LENGTHS OF MILES WHEN THOU ART GONE BUT THAT SO MUCH OF EARTH AND WATER WROUGHT I MUST ATTEND TIME'S LEISURE WITH MY MOAN RECEIVING NOUGHT BY ELEMENTS SO SLOW BUT HEAVY TEARS BADGES OF EITHER'S WOE +4992-23283-0012-1356: INDEPENDENT OF HER FORTUNE SHE HAS BEAUTY TO CAPTIVATE THE HEART OF ANY MAN AND WITH ALL HER FOLLIES SHE HAS A FRANKNESS IN HER MANNER AN UNAFFECTED WISDOM IN HER THOUGHTS A VIVACITY IN HER CONVERSATION AND WITHAL A SOFTNESS IN HER (DEMEANOUR->DEMEANOR) THAT MIGHT ALONE ENGAGE THE AFFECTIONS OF A MAN OF THE NICEST SENTIMENTS AND THE STRONGEST UNDERSTANDING +8455-210777-0004-2360: I HAVE COME TO YOUR SHORES MISTER PRESIDENT WITH THE PURPOSE OF SEEING HOW THINGS ARE PROGRESSING IN THIS DISTANT QUARTER OF THE WORLD +1320-122617-0000-292: NOTWITHSTANDING THE HIGH RESOLUTION OF HAWKEYE HE FULLY COMPREHENDED ALL THE DIFFICULTIES AND DANGER HE WAS ABOUT TO INCUR +4446-2273-0008-1146: I'VE MANAGED TO SAVE SOMETHING EVERY YEAR AND THAT WITH HELPING MY THREE SISTERS NOW AND THEN AND TIDING POOR COUSIN MIKE OVER BAD SEASONS +8455-210777-0034-2390: NOT A DOUBT BUT HAD YOUR FORCE BEEN ONLY DOUBLE OR (TREBLE->TROUBLE) OUR OWN I SHOULD HAVE FOUND IT MY DUTY TO STRUGGLE WITH YOU +1320-122612-0002-277: AFTER PROCEEDING A FEW MILES THE PROGRESS OF HAWKEYE WHO LED THE ADVANCE BECAME MORE DELIBERATE AND WATCHFUL +1580-141083-0033-367: I WAS HOPING THAT IF THE PAPER ON WHICH HE WROTE WAS THIN SOME TRACE OF IT MIGHT COME THROUGH UPON THIS POLISHED SURFACE NO I SEE NOTHING +6930-81414-0014-2015: IN THE LIGHT OF THE MOON I SAW A KNIFE RED WITH BLOOD AND MY HAND TOO WAS ALSO DISCOLOURED +5683-32865-0004-1610: WHATEVER LORD CHELFORD SAID MISS BRANDON RECEIVED IT VERY GRACIOUSLY AND EVEN WITH A MOMENTARY SMILE +4446-2275-0016-1191: HILDA WATCHED HIM FROM (HER->THE) CORNER TREMBLING AND SCARCELY BREATHING DARK SHADOWS GROWING ABOUT HER EYES IT +6829-68769-0006-1866: IF THE PROSECUTION WERE WITHDRAWN AND THE CASE SETTLED WITH THE VICTIM OF THE FORGED CHECK THEN THE YOUNG MAN WOULD BE ALLOWED HIS FREEDOM +237-134500-0037-696: BUT (EMIL->AMY) IF I UNDERSTAND THEN ALL OUR GOOD TIMES ARE OVER WE CAN NEVER DO NICE THINGS TOGETHER ANY MORE +5142-36377-0005-1536: THE DOOR OPENED AGAIN WHILE I WAS STILL STUDYING THE TWO BROTHERS WITHOUT I HONESTLY CONFESS BEING VERY (FAVORABLY->FAVOURABLY) IMPRESSED BY EITHER OF THEM +1221-135767-0014-201: YEA HIS HONOURABLE WORSHIP IS WITHIN BUT HE HATH A GODLY MINISTER OR TWO WITH HIM AND LIKEWISE A LEECH +2830-3980-0019-816: I KNEW NOTHING OF THE DOCTRINE OF FAITH BECAUSE WE WERE TAUGHT SOPHISTRY INSTEAD OF CERTAINTY AND NOBODY UNDERSTOOD SPIRITUAL BOASTING +3575-170457-0011-981: BUT IT IS NOT WITH A VIEW TO DISTINCTION THAT YOU SHOULD CULTIVATE THIS TALENT IF YOU CONSULT YOUR OWN HAPPINESS +7021-79740-0007-2046: THEN TURNING TO JANE SHE ASKED IN A SOMEWHAT ALTERED TONE HAS SHE BEEN A GOOD GIRL JANE +3729-6852-0044-1071: I WILL MAKE YOU TRANSLATE THEM INTO FRENCH AND YOU NEED NOT BE AFRAID OF MY FINDING YOU INSATIABLE +3570-5695-0008-951: THE QUESTION IS WHICH OF THE TWO METHODS WILL MOST EFFECTIVELY REACH THE PERSONS WHOSE CONVICTIONS IT IS DESIRED TO (AFFECT->EFFECT) +1580-141084-0008-396: I CANNOT ALLOW THE EXAMINATION TO BE HELD IF ONE OF THE PAPERS HAS BEEN TAMPERED WITH THE SITUATION MUST BE FACED +121-121726-0003-112: HAY FEVER A HEART TROUBLE CAUSED BY FALLING IN LOVE WITH A GRASS WIDOW +6829-68769-0005-1865: HE WAS SOFT HEARTED AND IMPETUOUS SAID BETH AND BEING IN LOVE HE DIDN'T STOP TO COUNT THE COST +1188-133604-0033-97: EVERY PLANT IN THE GRASS IS SET FORMALLY GROWS PERFECTLY AND MAY BE REALIZED COMPLETELY +1089-134686-0021-21: IF A LAYMAN IN GIVING BAPTISM POUR THE WATER BEFORE SAYING THE WORDS IS THE CHILD BAPTIZED +1284-1180-0005-217: NO ONE WOULD DISTURB THEIR LITTLE HOUSE EVEN IF (ANYONE->ANY ONE) CAME SO FAR INTO THE THICK FOREST WHILE THEY WERE GONE +1580-141083-0003-337: WITHOUT HIS (SCRAPBOOKS->SCRAP BOOKS) HIS CHEMICALS AND HIS HOMELY UNTIDINESS HE WAS AN UNCOMFORTABLE MAN +8230-279154-0020-2332: IF WE HAD RETAINED THE SUBJECT OR ACT IN KNOWLEDGE THE WHOLE PROBLEM OF MEMORY WOULD HAVE BEEN COMPARATIVELY SIMPLE +2961-961-0010-907: BUT IN EGYPT THE TRADITIONS OF OUR OWN AND OTHER LANDS ARE BY US REGISTERED FOR EVER IN OUR TEMPLES +8230-279154-0005-2317: ALL THAT I AM DOING IS TO USE ITS LOGICAL TENABILITY AS A HELP IN THE ANALYSIS OF WHAT OCCURS WHEN WE REMEMBER +1995-1836-0006-472: SHE WAS THEREFORE MOST AGREEABLY SURPRISED TO HEAR MISTER (CRESSWELL->CRASWELL) EXPRESS HIMSELF SO CORDIALLY AS APPROVING OF NEGRO EDUCATION +4446-2275-0002-1177: ALEXANDER PACED UP AND DOWN THE HALLWAY BUTTONING AND UNBUTTONING HIS OVERCOAT UNTIL SHE RETURNED AND TOOK HIM UP TO HILDA'S LIVING ROOM +8230-279154-0035-2347: THUS NO KNOWLEDGE AS TO THE PAST IS TO BE DERIVED FROM THE FEELING OF FAMILIARITY ALONE +2830-3980-0050-847: DID NOT CHRIST HIMSELF SAY I AM THE WAY AND THE TRUTH AND THE LIFE NO MAN COMETH UNTO THE FATHER BUT BY ME +3570-5696-0008-967: BUT IT IS ON OTHER GROUNDS WORTH NOTING THAT THE TERM WASTE IN THE LANGUAGE OF EVERYDAY LIFE IMPLIES DEPRECATION OF WHAT IS CHARACTERIZED AS WASTEFUL +6829-68771-0013-1927: THE ATTENDANCE WAS UNEXPECTEDLY LARGE AND THE GIRLS WERE DELIGHTED FORESEEING GREAT SUCCESS FOR THEIR (FETE->FIGHT) +2300-131720-0036-608: THE (METER->METRE) CONTINUED IN GENERAL SERVICE DURING EIGHTEEN NINETY NINE AND PROBABLY UP TO THE CLOSE OF THE CENTURY +1580-141083-0034-368: AS HOLMES DREW THE CURTAIN I WAS AWARE FROM SOME LITTLE RIGIDITY AND (*->AN) ALERTNESS OF HIS ATTITUDE THAT HE WAS PREPARED FOR AN EMERGENCY +2094-142345-0036-547: NAY DEAR AUNT YOU NEVER HEARD ME SAY THAT ALL PEOPLE ARE CALLED TO FORSAKE THEIR WORK AND THEIR FAMILIES +4992-41797-0007-1372: HE (GIVE->GAVE) UP HIS POSITION AND SHUT THE FAMILY UP IN THAT TOMB OF A HOUSE SO (T->*) HE COULD STUDY HIS BOOKS +2300-131720-0005-577: WHY IF WE ERECT A STATION AT THE FALLS IT IS A GREAT ECONOMY TO GET IT UP TO THE CITY +4970-29095-0037-1342: BUT THAT WISE AND PLACID WOMAN UNDERSTOOD THE SWEET (REBEL->REBBLE) A GREAT DEAL BETTER THAN RUTH UNDERSTOOD HERSELF +1284-1180-0006-218: AT THE FOOT OF THE MOUNTAIN THAT SEPARATED THE COUNTRY OF THE MUNCHKINS FROM THE COUNTRY OF THE (GILLIKINS->GILLAKANS) THE PATH DIVIDED +7729-102255-0039-2272: THE INMATES BEING REMOVED AT THE APPOINTED HOUR A FEW CANNON BALLS WERE FIRED THROUGH THE STONE WALLS +1089-134686-0008-8: THE CHAOS IN WHICH HIS (ARDOUR->ARDOR) EXTINGUISHED ITSELF WAS A COLD INDIFFERENT KNOWLEDGE OF HIMSELF +237-126133-0023-637: HE CRIED IN HIGH DUDGEON JUST AS IF HE OWNED THE WHOLE OF THE PEPPERS AND COULD DISPOSE OF THEM ALL TO SUIT HIS FANCY +4992-23283-0013-1357: MY LORD MISS MILNER'S TASTE IS NOT A DEPRAVED ONE IT IS BUT TOO REFINED +2094-142345-0052-563: I MUST COME ANOTHER DAY AND SEE YOUR HUSBAND I WANT TO HAVE A CONSULTATION WITH HIM ABOUT HORSES +2830-3980-0065-862: PAUL ANSWERS THE MAN WHO IS NAMED JESUS CHRIST AND THE SON OF GOD GAVE HIMSELF FOR OUR SINS +7021-85628-0002-2062: HE WAS SUCH A BIG BOY THAT HE WORE HIGH BOOTS AND CARRIED A (JACK KNIFE->JACKKNIFE) +2830-3980-0005-802: DO YOU SUPPOSE THAT GOD FOR THE SAKE OF A FEW LUTHERAN HERETICS WOULD DISOWN HIS ENTIRE CHURCH +4992-41797-0022-1387: SHE'S OLDER THAN I AM BUT SO TINY AND SAD AND SHY THAT SHE SEEMS LIKE A CHILD +2094-142345-0048-559: SAID CAPTAIN DONNITHORNE SEATING HIMSELF WHERE HE COULD SEE ALONG THE SHORT PASSAGE TO THE OPEN DAIRY DOOR +1320-122612-0015-290: THE WHOLE PARTY CROWDED TO THE SPOT WHERE UNCAS POINTED OUT THE IMPRESSION OF A MOCCASIN IN THE MOIST (ALLUVION->ALLUVIAN) +1995-1837-0004-485: HE PANTED TO KNOW IF SHE TOO KNEW OR KNEW AND CARED NOT OR CARED AND KNEW NOT +4446-2271-0001-1114: HE HAD PRECONCEIVED IDEAS ABOUT EVERYTHING AND HIS IDEA ABOUT AMERICANS WAS THAT THEY SHOULD BE ENGINEERS OR MECHANICS +4446-2275-0029-1204: PLEASE TELL ME ONE THING BARTLEY AT LEAST TELL ME THAT YOU BELIEVE I THOUGHT I WAS MAKING YOU HAPPY +1580-141083-0031-365: HOLMES HELD OUT A SMALL CHIP WITH THE LETTERS N N AND A SPACE OF CLEAR WOOD AFTER THEM YOU SEE +8463-294828-0002-2464: EVEN SO I HAD JUST RETURNED FROM AN ARDUOUS JOURNEY EXHAUSTED AND BADLY NEEDING A REST +237-126133-0006-620: AT THIS THE BUNDLE OPENED SUDDENLY AND OUT POPPED PHRONSIE +2094-142345-0049-560: NO SIR HE ISN'T HE'S GONE TO (ROSSETER->ROSSITER) TO SEE MISTER WEST THE FACTOR ABOUT THE WOOL +2830-3979-0000-784: WE WANT YOU TO HELP US PUBLISH SOME LEADING WORK OF (LUTHER'S->LUTHERS) FOR THE GENERAL AMERICAN MARKET WILL YOU DO IT +5142-33396-0042-1504: SO NO TALES GOT OUT TO THE (NEIGHBORS->NEIGHBOURS) BESIDES IT WAS A LONELY PLACE AND BY GOOD LUCK NO ONE CAME THAT WAY +61-70970-0000-1744: YOUNG FITZOOTH HAD BEEN COMMANDED TO HIS MOTHER'S CHAMBER SO SOON AS HE HAD COME OUT FROM HIS CONVERSE WITH THE SQUIRE +6829-68769-0034-1894: I'M GOING TO SEE MISTER MARSHALL SAID KENNETH AND DISCOVER WHAT I CAN DO TO ASSIST YOU THANK YOU SIR +8455-210777-0018-2374: THIS SHE SAID WAS TRUE HOSPITALITY AND I AM NOT SURE THAT I DID NOT AGREE WITH HER +121-121726-0001-110: HARANGUE THE TIRESOME PRODUCT OF A TIRELESS TONGUE +1995-1826-0016-455: THE GLIMMERING SEA OF DELICATE LEAVES WHISPERED AND MURMURED BEFORE HER STRETCHING AWAY TO THE NORTHWARD +908-31957-0020-2614: I THANK ALL WHO HAVE LOVED ME IN THEIR HEARTS WITH THANKS AND LOVE FROM MINE +260-123286-0023-725: THE RAFT WAS HEAVED UP ON A WATERY MOUNTAIN AND PITCHED DOWN AGAIN AT A DISTANCE OF TWENTY FATHOMS +4970-29095-0034-1339: WHY SHOULD I RUST AND BE STUPID AND SIT IN INACTION BECAUSE I AM A GIRL +8555-284449-0013-2539: (SCUSE->EXCUSE) ME SAID TROT I NEGLECTED TO TELL YOU THAT YOU'RE NOT THE BOOLOOROO ANY MORE +61-70968-0033-1714: THRUSTING OPEN THE PROPER ENTRANCE OF THE TENT ROBIN SUDDENLY RUSHED FORTH WITH HIS BURDEN WITH A GREAT SHOUT +4077-13754-0003-1099: MOREOVER HAD THE PEOPLE BEEN INCLINED TO REBELLION WHAT GREATER OPPORTUNITY COULD THEY HAVE WISHED +2830-3980-0062-859: NOT FOR A CROWN OR A KINGDOM OR OUR GOODNESS BUT FOR OUR SINS +7176-88083-0020-2179: WHERE THE WAVES FOR AN INSTANT SANK THEY CAME CLOSER BUT NOT QUITE WITHIN GRASPING REACH +2830-3980-0063-860: UNDERSCORE THESE WORDS FOR THEY ARE FULL OF COMFORT FOR SORE CONSCIENCES +1995-1837-0019-500: HE SAT DOWN WEAK BEWILDERED AND ONE THOUGHT WAS UPPERMOST (ZORA->SORA) +3729-6852-0028-1055: ALL THESE HONEST PERSONS ARE WAITING THEIR TURN TO GET THEIR SNUFF BOXES FILLED +4446-2273-0021-1159: WHAT SHE WANTED FROM US WAS NEITHER OUR FLOWERS NOR OUR FRANCS BUT JUST OUR YOUTH +237-134500-0021-680: OH EVER SO MUCH ONLY HE SEEMS KIND OF STAID (AND->IN) SCHOOL (TEACHERY->TEACHER) +1284-1181-0015-260: MOST PEOPLE TALK TOO MUCH SO IT IS A RELIEF TO FIND ONE WHO TALKS TOO LITTLE +672-122797-0034-1819: A STORY CRIED THE CHILDREN DRAWING A LITTLE FAT MAN TOWARDS THE TREE +3570-5694-0003-923: WITH A FURTHER ADVANCE IN CULTURE THIS (TABU->TABOO) MAY CHANGE INTO SIMPLE CUSTOM OF A MORE OR LESS RIGOROUS CHARACTER BUT WHATEVER BE THE THEORETICAL BASIS OF THE DISTINCTION WHICH IS MAINTAINED WHETHER IT BE A (TABU->TABOO) OR A LARGER CONVENTIONALITY THE FEATURES OF THE CONVENTIONAL SCHEME OF CONSUMPTION DO NOT CHANGE EASILY +672-122797-0022-1807: THE AXE STRUCK DEEP INTO THE VERY PITH THE TREE FELL TO THE EARTH WITH A SIGH HE FELT A PANG IT WAS LIKE A SWOON HE COULD NOT THINK OF HAPPINESS FOR HE WAS SORROWFUL AT BEING SEPARATED FROM HIS HOME FROM THE PLACE WHERE HE HAD SPRUNG UP +3575-170457-0043-1013: HOWEVER REMEMBERING WHAT YOU TOLD ME NAMELY THAT YOU HAD COMMENDED THE MATTER TO A HIGHER DECISION THAN OURS AND THAT YOU WERE RESOLVED TO SUBMIT WITH RESIGNATION TO THAT DECISION WHATEVER IT MIGHT BE I HOLD IT MY DUTY TO YIELD ALSO AND TO BE SILENT (IT->AND) MAY BE ALL FOR THE BEST +7729-102255-0009-2242: ALL DISSENT ALL NON COMPLIANCE ALL HESITATION ALL MERE SILENCE EVEN WERE IN THEIR STRONGHOLD TOWNS LIKE LEAVENWORTH BRANDED AS ABOLITIONISM DECLARED TO BE HOSTILITY TO THE PUBLIC WELFARE AND PUNISHED WITH PROSCRIPTION PERSONAL VIOLENCE EXPULSION AND FREQUENTLY DEATH +7021-79740-0008-2047: FOR INSTANCE ONE DAY THE CHILDREN HAD BEEN PLAYING UPON THE PIAZZA WITH BLOCKS AND OTHER PLAYTHINGS AND FINALLY HAD GONE INTO THE HOUSE LEAVING ALL THE THINGS ON THE FLOOR OF THE PIAZZA INSTEAD OF PUTTING THEM AWAY IN THEIR PLACES AS THEY OUGHT TO HAVE DONE +7021-79730-0004-2033: THE MOTHER AS SOON AS THE (CHAISE->CHASE) IS SO FAR TURNED THAT MARY CAN NO LONGER WATCH THE EXPRESSION OF HER COUNTENANCE GOES AWAY FROM THE DOOR WITH A SMILE OF COMPLACENCY AND SATISFACTION (UPON->ON) HER FACE AT THE INGENUITY AND SUCCESS OF HER LITTLE ARTIFICE +6930-81414-0001-2002: I HEARD A NOISE BEHIND I TURNED AND SAW (KAFFAR->KAFFIR) HIS BLACK EYES SHINING WHILE IN HIS HAND HE HELD A GLEAMING KNIFE HE LIFTED IT ABOVE HIS HEAD AS IF TO STRIKE BUT I HAD THE STRENGTH OF TEN MEN AND I HURLED HIM FROM ME +121-123859-0000-129: YOU ARE MY ALL THE WORLD AND I MUST STRIVE TO KNOW MY SHAMES AND PRAISES FROM YOUR TONGUE NONE ELSE TO ME NOR I TO NONE ALIVE THAT MY (STEEL'D->STEELED) SENSE OR CHANGES RIGHT OR WRONG +8455-210777-0022-2378: OF WHAT MISSUS NEVERBEND HAD GONE THROUGH IN PROVIDING BIRDS BEASTS AND FISHES NOT TO TALK OF TARTS AND JELLIES FOR THE DINNER OF THAT DAY NO ONE BUT MYSELF CAN HAVE ANY IDEA BUT IT MUST BE ADMITTED THAT SHE ACCOMPLISHED HER TASK WITH THOROUGH SUCCESS +121-123852-0004-128: MY HEART DOTH PLEAD THAT THOU IN HIM DOST LIE A CLOSET NEVER (PIERC'D->PIERCED) WITH CRYSTAL EYES BUT THE DEFENDANT DOTH THAT PLEA DENY AND SAYS IN HIM THY FAIR APPEARANCE LIES +1221-135767-0002-189: AT THAT EPOCH OF PRISTINE SIMPLICITY HOWEVER MATTERS OF EVEN SLIGHTER PUBLIC INTEREST AND OF FAR LESS INTRINSIC WEIGHT THAN THE WELFARE OF HESTER AND HER CHILD WERE STRANGELY MIXED UP WITH THE DELIBERATIONS OF LEGISLATORS AND ACTS OF STATE +5105-28240-0001-1418: IT WAS QUITE TRUE THAT A VESSEL WAS IN SIGHT HARDLY MORE THAN SIX MILES FROM THE SHORE BUT OWING TO THE INCREASE IN THE EARTH'S CONVEXITY AND THE CONSEQUENT LIMITATION OF THE RANGE OF VISION THE RIGGING OF THE (TOPMASTS->TOP MASTS) ALONE WAS VISIBLE ABOVE THE WATER +121-127105-0025-159: THE DEPARTING LADIES WHO HAD SAID THEY WOULD STAY DIDN'T OF COURSE THANK HEAVEN STAY THEY DEPARTED IN CONSEQUENCE OF ARRANGEMENTS MADE IN A RAGE OF CURIOSITY AS THEY PROFESSED PRODUCED BY THE TOUCHES WITH WHICH HE HAD ALREADY WORKED US UP +8230-279154-0007-2319: HABIT IS A CONCEPT INVOLVING THE OCCURRENCE OF SIMILAR EVENTS AT DIFFERENT TIMES IF THE (BEHAVIOURIST->BEHAVIORIST) FEELS CONFIDENT THAT THERE IS SUCH A PHENOMENON AS HABIT THAT CAN ONLY BE BECAUSE HE TRUSTS HIS MEMORY WHEN IT ASSURES HIM THAT THERE HAVE BEEN OTHER TIMES +4992-41806-0016-1404: THE FACE OF THE MAHOGANY SHONE WITH DELIGHT AND WHY NOT WHEN IT WAS DOING EVERYTHING ALMOST EVERYTHING WITHIN THE SCOPE OF A PIANO AND YET THE FAMILY HAD ENJOYED WEEKS OF GOOD NOURISHING MEALS ON WHAT HAD BEEN SAVED BY ITS EXERTIONS +5142-36377-0008-1539: MISTER (MEADOWCROFT->MEDICROFT) THE ELDER HAVING NOT SPOKEN ONE WORD THUS FAR HIMSELF INTRODUCED THE (NEWCOMER->NEW COMER) TO ME WITH A SIDE GLANCE AT HIS SONS WHICH HAD SOMETHING LIKE DEFIANCE IN IT A GLANCE WHICH AS I WAS SORRY TO NOTICE WAS RETURNED WITH (THE->A) DEFIANCE ON THEIR SIDE BY THE TWO YOUNG MEN +4970-29093-0003-1284: THE MODEST FELLOW WOULD HAVE LIKED FAME THRUST UPON HIM FOR SOME WORTHY ACHIEVEMENT IT MIGHT BE FOR A BOOK OR FOR THE (SKILLFUL->SKILFUL) MANAGEMENT OF SOME GREAT NEWSPAPER OR FOR SOME DARING EXPEDITION LIKE THAT OF LIEUTENANT STRAIN OR DOCTOR KANE +7021-85628-0003-2063: NOW THIS KNIFE WAS A SPLENDID ONE THOUGH HALF THE BLADE WAS GONE AND THE HANDLE WAS A LITTLE CRACKED AND ANDERS KNEW THAT ONE IS ALMOST A MAN AS SOON AS ONE HAS A JACK KNIFE +7729-102255-0026-2259: IN THE SHOOTING OF SHERIFF JONES IN LAWRENCE AND IN THE REFUSAL OF EX GOVERNOR (BEEDER->READER) TO ALLOW THE DEPUTY MARSHAL TO ARREST HIM THEY DISCOVERED GRAVE OFFENSES AGAINST THE TERRITORIAL AND (*->THE) UNITED STATES LAWS +4992-41806-0000-1388: NATTY HARMON TRIED THE KITCHEN PUMP SECRETLY SEVERAL TIMES DURING THE EVENING FOR THE WATER HAD TO RUN (UP HILL->UPHILL) ALL THE WAY FROM THE WELL TO THE KITCHEN SINK AND HE BELIEVED THIS TO BE A CONTINUAL MIRACLE THAT MIGHT GIVE OUT AT ANY MOMENT +4077-13754-0007-1103: WE BELIEVE IN A LITERAL RESURRECTION AND AN ACTUAL HEREAFTER IN WHICH FUTURE STATE SHALL BE RECOGNIZED EVERY SANCTIFIED AND AUTHORIZED RELATIONSHIP EXISTING HERE ON EARTH OF PARENT AND CHILD BROTHER AND SISTER HUSBAND AND WIFE +1221-135766-0003-174: THE CHILD HAD A NATIVE GRACE WHICH DOES NOT INVARIABLY (CO EXIST->COEXIST) WITH FAULTLESS BEAUTY ITS ATTIRE HOWEVER SIMPLE ALWAYS IMPRESSED THE BEHOLDER AS IF IT WERE THE VERY GARB THAT PRECISELY BECAME IT BEST +5142-33396-0001-1463: WHAT IS YOUR COUNTRY OLAF HAVE YOU ALWAYS BEEN A THRALL THE THRALL'S EYES FLASHED +1580-141084-0011-399: WHEN WE WERE OUT IN THE DARKNESS OF THE QUADRANGLE WE AGAIN LOOKED UP AT THE WINDOWS +1580-141083-0035-369: HOLMES TURNED AWAY AND STOOPED SUDDENLY TO THE FLOOR (HALLOA WHAT'S->HALLO WHAT IS) THIS +7176-92135-0041-2228: (RE ENTER->REENTRE) BUTLER AND THREE FOOTMEN WHO (REMOVE->MOVE) THE TEA THINGS HOSTESS (TO GUEST->TWO GUESTS) +8455-210777-0052-2408: YOU WILL ALLOW ME TO SUGGEST SAID HE THAT THAT IS A MATTER OF OPINION +4446-2275-0019-1194: THE WORLD IS ALL THERE JUST AS IT USED TO BE BUT I CAN'T GET AT IT ANY MORE +4507-16021-0003-1224: SHE HAS A SON THEFT AND A DAUGHTER HUNGER +6829-68769-0023-1883: I DIDN'T STOP TO THINK WHETHER IT WAS FOOLISH OR NOT I DID IT AND I'M GLAD I DID (*->IT) +2830-3980-0052-849: WE ARE TO HEAR CHRIST WHO HAS BEEN APPOINTED BY THE FATHER AS OUR DIVINE TEACHER +260-123440-0011-774: NO I'VE MADE UP MY MIND ABOUT IT IF I'M MABEL I'LL STAY DOWN HERE +4446-2273-0025-1163: IT WAS VERY JOLLY HE MURMURED LAZILY AS MARIE CAME IN TO TAKE AWAY THE COFFEE +6829-68771-0014-1928: WE OUGHT TO HAVE MORE (ATTENDANTS->ATTENDANCE) BETH SAID LOUISE APPROACHING HER COUSIN +6829-68769-0008-1868: FAIRVIEW WAS TWELVE MILES AWAY BUT BY TEN O'CLOCK THEY DREW UP AT THE COUNTY JAIL +8455-210777-0037-2393: YOU HAVE COME TO US THREATENING US WITH ABSOLUTE DESTRUCTION +61-70968-0022-1703: TIS FINE FOR YOU TO TALK OLD MAN ANSWERED THE LEAN SULLEN APPRENTICE +3570-5695-0010-953: THE MODERN ORGANIZATION OF INDUSTRY WORKS IN THE SAME DIRECTION ALSO BY ANOTHER LINE +237-134500-0039-698: THAT WON'T LAST IT WILL GO AWAY AND THINGS WILL BE JUST AS THEY USED TO +4992-23283-0014-1358: WHAT CAN YOU MEAN BY THAT MISS WOODLEY YOU TALK MYSTERIOUSLY +61-70970-0034-1778: NAY NAY LORDING ANSWERED WARRENTON WITH A HALF LAUGH +1580-141084-0010-398: I WILL TAKE THE BLACK CLAY WITH ME ALSO THE PENCIL CUTTINGS GOOD (BYE->BY) +7127-75947-0035-2153: GOOD GRACIOUS HAS THE KING ANY RIGHT TO INTERFERE IN MATTERS OF THAT KIND +237-134500-0024-683: I LIKE TO TALK TO CARL ABOUT NEW YORK AND WHAT A FELLOW CAN DO THERE +3729-6852-0031-1058: BUT HOW DID SHE MANAGE TO RENDER IT SO FASHIONABLE +61-70968-0037-1718: WHAT IS YOUR NAME LORDING ASKED THE LITTLE STROLLER PRESENTLY +260-123286-0011-713: NOTHING NEW WEATHER UNCHANGED THE WIND FRESHENS +3729-6852-0016-1043: MADAME (QUINSON->QUINCON) BESIDES CAN ANSWER YOUR (ENQUIRIES->INQUIRIES) +5105-28240-0016-1433: TO ALL THESE INQUIRIES THE COUNT RESPONDED IN THE AFFIRMATIVE +2300-131720-0006-578: THERE SEEMS NO GOOD REASON FOR BELIEVING THAT IT WILL CHANGE +237-134493-0013-653: INDEED HE HAD LOOKED AWAY WITH THE PURPOSE OF NOT SEEING IT +7176-88083-0009-2168: THE GREAT HAWK FOLLOWED HURRIEDLY TO RETRIEVE HIS PREY FROM THE GROUND +237-134500-0040-699: I PRAY FOR YOU BUT THAT'S NOT THE SAME AS IF YOU PRAYED YOURSELF +672-122797-0068-1853: BUT IT WAS NOT THE FIR TREE THAT THEY MEANT +237-134500-0010-669: IT'S EXCITING TO SEE EVERYTHING GROWING SO FAST AND TO GET THE GRASS CUT +237-126133-0009-623: NOW YOU'LL STAY CRIED VAN SAY POLLY WON'T YOU +4970-29095-0023-1328: I HAVE NOTHING TO WEAR REPLIED THAT DEMURE PERSON +61-70970-0019-1763: AT LAST ALL WAS QUIET AND BLACK IN THE COURTYARD OF GAMEWELL +8455-210777-0067-2423: OR OF THE HABITS OF OUR PEOPLE IT IS QUITE IMPOSSIBLE +237-126133-0025-639: AT LAST HE CAME OUT OF THEM AND WIPED HIS FACE VIGOROUSLY +3570-5694-0019-939: BUT THE GENERAL DISTINCTION IS NOT ON THAT ACCOUNT TO BE OVERLOOKED +1580-141084-0042-430: A SUDDEN IMPULSE CAME OVER HIM TO ENTER AND SEE IF THEY WERE INDEED THE PROOFS +61-70968-0023-1704: BUT I WRESTLED WITH THIS FELLOW AND DO KNOW THAT HE PLAYED UNFAIRLY IN THE SECOND BOUT +61-70970-0020-1764: WILL WHISPERED ROBIN OPENING HIS DOOR AS HE SPOKE ARE YOU READY +2830-3980-0053-850: AT THE SAME TIME PAUL CONFIRMS OUR CREED THAT CHRIST IS VERY GOD +672-122797-0069-1854: IT WAS IN A CORNER THAT HE LAY AMONG WEEDS AND NETTLES +2830-3980-0068-865: THE GREATNESS OF THE RANSOM CHRIST THE SON OF GOD INDICATES THIS +6829-68769-0024-1884: OLD WILL IS A FINE FELLOW BUT POOR AND HELPLESS SINCE MISSUS ROGERS HAD HER ACCIDENT +2830-3980-0008-805: PAUL TAKES PRIDE IN HIS MINISTRY NOT TO HIS OWN PRAISE BUT TO THE PRAISE OF GOD +6930-75918-0008-1959: CAN YOU IMAGINE WHY BUCKINGHAM HAS BEEN SO VIOLENT I SUSPECT +8455-210777-0023-2379: WE SAT WITH THE OFFICERS SOME LITTLE TIME AFTER DINNER AND THEN WENT ASHORE +260-123286-0013-715: THE SHADOW OF THE RAFT WAS CLEARLY OUTLINED UPON THE SURFACE OF THE WAVES +5683-32866-0005-1629: AND HE MADE A LITTLE DIP OF HIS CANE TOWARDS BRANDON HALL OVER HIS SHOULDER +2830-3979-0006-790: A WORD SHOULD NOW BE SAID ABOUT THE ORIGIN OF LUTHER'S COMMENTARY (ON GALATIANS->UNGULATIONS) +6829-68771-0015-1929: WON'T YOU RUN INTO THE HOUSE AND SEE IF MARTHA CAN'T SPARE ONE OR TWO MORE MAIDS +61-70968-0009-1690: LIKE AS NOT YOUNG MASTER THOUGH I AM AN OLD MAN +4992-23283-0016-1360: AGAIN HE SEARCHED HIS OWN THOUGHTS NOR INEFFECTUALLY AS BEFORE +4446-2275-0005-1180: I FELT IT IN MY BONES WHEN I WOKE THIS MORNING THAT SOMETHING SPLENDID WAS GOING TO TURN UP +1089-134686-0010-10: WELL NOW ENNIS I DECLARE YOU HAVE A HEAD AND SO HAS MY STICK +672-122797-0054-1839: I KNOW NO SUCH PLACE SAID THE TREE +61-70968-0053-1734: HE IS MY ESQUIRE EXCELLENCY RETURNED ROBIN WITH DIGNITY +6829-68769-0009-1869: THEY WERE RECEIVED IN THE LITTLE OFFICE BY A MAN NAMED MARKHAM WHO WAS THE JAILER +5683-32866-0006-1630: YES SO THEY SAID BUT THAT WOULD I THINK HAVE BEEN WORSE +672-122797-0024-1809: THE DEPARTURE WAS NOT AT ALL AGREEABLE +237-134493-0000-640: IT IS SIXTEEN YEARS SINCE JOHN (BERGSON->BERKSON) DIED +4446-2275-0035-1210: ALEXANDER ROSE AND SHOOK HIMSELF ANGRILY YES I KNOW (I'M->I AM) COWARDLY +6829-68769-0039-1899: HE LOOKED UP RATHER UNGRACIOUSLY BUT MOTIONED THEM TO BE SEATED +672-122797-0039-1824: I WON'T TREMBLE TO MORROW THOUGHT THE FIR TREE +5105-28240-0002-1419: EXCLAIMED SERVADAC KEEPING HIS EYE UNMOVED AT HIS TELESCOPE +7127-75947-0021-2139: LA (VALLIERE->VALLIER) IS QUITE A POETESS SAID (TONNAY CHARENTE->TONNICHERANT) +1580-141083-0036-370: HOLMES HELD IT OUT ON HIS OPEN PALM IN THE GLARE OF THE ELECTRIC LIGHT +5142-33396-0048-1510: BY THE HAMMER OF THOR SHOUTED GRIM (HERE->THERE) IS NO STINGY COWARD +5142-33396-0018-1480: (MY->A) DRAGON'S BELLY IS NEVER FULL AND ON BOARD WENT THE GOLD +5683-32866-0020-1644: THE FLOOR MORE THAN ANYTHING ELSE SHOWED THE GREAT AGE OF THE ROOM +121-121726-0006-115: HEREDITY THE CAUSE OF ALL OUR FAULTS +1284-1181-0005-250: NO I FORGOT ALL ABOUT THE BRAINS EXCLAIMED THE WOMAN +4970-29095-0025-1330: AND THEE WON'T GO WHY SHOULD I +61-70968-0039-1720: AND MINE IS WILL STUTELEY SHALL WE BE COMRADES +1995-1837-0009-490: THE LAGOON HAD BEEN LEVEL WITH THE (DYKES->DIKES) A WEEK AGO AND NOW +4446-2273-0027-1165: WHEN SHE FINISHED ALEXANDER SHOOK HIMSELF OUT OF A REVERIE +1320-122617-0032-324: KEEP SILENT AS LONG AS MAY BE AND IT WOULD BE WISE WHEN YOU DO SPEAK TO BREAK OUT SUDDENLY IN ONE OF YOUR SHOUTINGS WHICH WILL SERVE TO REMIND THE INDIANS THAT YOU ARE NOT ALTOGETHER AS RESPONSIBLE AS MEN SHOULD BE +5142-36377-0022-1553: I WISH YOU GOOD NIGHT SHE LAID HER BONY HANDS ON THE BACK OF MISTER (MEADOWCROFT'S->MEDICROFT'S) INVALID CHAIR CUT HIM SHORT IN HIS FAREWELL SALUTATION TO ME AND WHEELED HIM OUT TO HIS BED AS IF SHE WERE WHEELING HIM OUT TO HIS GRAVE +5683-32879-0004-1659: BUT POOR RACHEL LAKE HAD MORE THAN THAT STOICAL HYPOCRISY WHICH ENABLES THE TORTURED SPIRITS OF HER SEX TO LIFT A PALE FACE THROUGH THE FLAMES AND SMILE +8555-284449-0002-2528: AT THE HEAD OF THE PINKIES WERE GHIP GHISIZZLE AND BUTTON BRIGHT WHO HAD THE PARROT ON HIS SHOULDER AND THEY WERE SUPPORTED BY CAPTAIN CORALIE AND CAPTAIN (TINTINT->TINTENT) AND ROSALIE THE WITCH +1089-134686-0009-9: AT MOST BY AN ALMS GIVEN TO A BEGGAR WHOSE BLESSING HE FLED FROM HE MIGHT HOPE WEARILY TO WIN FOR HIMSELF SOME MEASURE OF ACTUAL GRACE +61-70970-0004-1748: BUT TAKE IT WHILST I LIVE AND WEAR MONTFICHET'S SHIELD IN THE DAYS WHEN MY EYES CAN BE REJOICED BY SO BRAVE A SIGHT FOR YOU WILL (NE'ER->NEVER) DISGRACE OUR (SCUTCHEON->DUCHEON) I WARRANT ME +2961-960-0005-879: IN THE PRESENT DAY WE ARE WELL AWARE THAT AN ANCIENT PHILOSOPHER IS TO BE INTERPRETED FROM HIMSELF AND BY THE CONTEMPORARY HISTORY OF THOUGHT +1188-133604-0020-84: BUT OBSERVE YOU CAN ONLY DO THIS ON ONE CONDITION THAT OF STRIVING ALSO TO CREATE IN REALITY THE BEAUTY WHICH YOU SEEK IN IMAGINATION +2830-3980-0007-804: AS THE AMBASSADOR OF A GOVERNMENT IS HONORED FOR HIS OFFICE AND NOT FOR HIS PRIVATE PERSON SO THE MINISTER OF CHRIST SHOULD EXALT HIS OFFICE IN ORDER TO GAIN AUTHORITY AMONG MEN +260-123288-0010-744: ON THE MAST ALREADY I SEE THE LIGHT PLAY OF (A LAMBENT->LAMOT) SAINT (ELMO'S->ELBEL'S) FIRE THE OUTSTRETCHED SAIL CATCHES NOT A BREATH OF WIND AND HANGS LIKE A SHEET OF LEAD +2961-960-0020-894: AND HENCE WE FIND THE SAME SORT OF CLUMSINESS IN THE TIMAEUS OF PLATO WHICH CHARACTERIZES THE PHILOSOPHICAL POEM OF LUCRETIUS +5639-40744-0020-1584: THUS DID (THIS->THE) HUMANE AND RIGHT MINDED FATHER COMFORT HIS UNHAPPY DAUGHTER AND HER MOTHER EMBRACING HER AGAIN DID ALL SHE COULD TO SOOTHE (HER->THE) FEELINGS +5142-33396-0032-1494: THE FARMER SAT GLOOMILY ON THE BENCH AND WOULD NOT EAT AND YOU CANNOT WONDER FOR HE SAW US PUTTING POTFULS OF HIS GOOD BEEF AND BASKET LOADS OF BREAD INTO OUR BIG MOUTHS +3575-170457-0044-1014: AFTER THIS DISAPPOINTMENT I NEVER DARE RECKON WITH CERTAINTY ON THE ENJOYMENT OF A PLEASURE AGAIN IT SEEMS AS IF SOME FATALITY STOOD BETWEEN YOU AND ME +4970-29093-0018-1299: THE TWO YOUNG MEN WHO WERE BY THIS TIME FULL OF THE ADVENTURE WENT DOWN TO THE WALL STREET OFFICE OF HENRY'S UNCLE AND HAD A TALK WITH THAT WILY OPERATOR +672-122797-0023-1808: HE WELL KNEW THAT HE SHOULD NEVER SEE HIS DEAR OLD COMRADES THE LITTLE BUSHES AND FLOWERS AROUND HIM (ANYMORE->ANY MORE) PERHAPS NOT EVEN THE BIRDS +4077-13751-0000-1074: ON THE SIXTH OF APRIL EIGHTEEN THIRTY THE CHURCH OF JESUS CHRIST OF LATTER DAY SAINTS WAS FORMALLY ORGANIZED AND THUS TOOK ON A LEGAL EXISTENCE +7729-102255-0011-2244: THE PRESENT CHAPTERS CAN ONLY TOUCH UPON THE MORE SALIENT MOVEMENTS OF THE CIVIL WAR IN KANSAS WHICH HAPPILY (WERE->ARE) NOT SANGUINARY IF HOWEVER THE INDIVIDUAL AND MORE ISOLATED CASES OF BLOODSHED COULD BE DESCRIBED THEY WOULD SHOW A STARTLING AGGREGATE OF BARBARITY AND LOSS OF LIFE FOR OPINION'S SAKE +2094-142345-0038-549: I CAN NO MORE HELP SPENDING MY LIFE IN TRYING TO DO WHAT I CAN FOR THE SOULS OF OTHERS THAN YOU COULD HELP RUNNING IF YOU HEARD LITTLE TOTTY CRYING AT THE OTHER END OF THE HOUSE THE VOICE WOULD GO TO YOUR HEART YOU WOULD THINK THE DEAR CHILD WAS IN TROUBLE OR IN DANGER AND YOU COULDN'T REST WITHOUT RUNNING TO HELP HER AND COMFORT HER +4077-13754-0008-1104: IT HAS BEEN MY PRIVILEGE TO TREAD THE SOIL OF MANY LANDS TO OBSERVE THE CUSTOMS AND STUDY THE HABITS OF MORE NATIONS THAN ONE AND I HAVE YET TO FIND THE PLACE AND MEET THE PEOPLE WHERE AND WITH WHOM THE PURITY OF MAN AND WOMAN IS HELD MORE PRECIOUS THAN AMONG THE MALIGNED MORMONS IN THE MOUNTAIN VALLEYS OF THE WEST +1221-135767-0003-190: THE PERIOD WAS HARDLY IF AT ALL EARLIER THAN THAT OF OUR STORY WHEN A DISPUTE CONCERNING THE RIGHT OF PROPERTY IN A PIG NOT ONLY CAUSED A FIERCE AND BITTER CONTEST IN THE LEGISLATIVE BODY OF THE COLONY BUT RESULTED IN AN IMPORTANT MODIFICATION OF THE FRAMEWORK ITSELF OF THE LEGISLATURE +3729-6852-0017-1044: I SEE A QUANTITY OF CHAIRS FOR HIRE AT THE RATE OF ONE (SOU->SOUS) MEN READING THE NEWSPAPER UNDER THE SHADE OF THE TREES GIRLS AND MEN BREAKFASTING EITHER ALONE OR IN COMPANY WAITERS WHO WERE RAPIDLY GOING UP AND DOWN A NARROW STAIRCASE HIDDEN UNDER THE FOLIAGE +5639-40744-0035-1599: SHE REFLECTED HOW NEAR SHE STOOD TO THE CRISIS WHICH WAS TO DETERMINE WHETHER SHE WAS TO BE BLESSED OR UNHAPPY FOR EVER AND RACKED BY THE INTENSITY OF HER EMOTIONS SHE SUDDENLY CHANGED (COLOUR->COLOR) HER HEAD DROPPED AND SHE FELL FORWARD IN A SWOON INTO THE ARMS OF THE DISMAYED (ESTAFANIA->STEPHANIA) +1995-1837-0023-504: THE NET AND WEB OF ENDLESS THINGS HAD BEEN CRAWLING AND CREEPING AROUND HER SHE HAD STRUGGLED IN DUMB SPEECHLESS TERROR AGAINST SOME MIGHTY GRASPING THAT STROVE FOR HER LIFE WITH GNARLED AND CREEPING FINGERS BUT NOW AT LAST WEAKLY SHE OPENED HER EYES AND QUESTIONED +1221-135767-0017-204: ON THE TABLE IN TOKEN THAT THE SENTIMENT OF OLD ENGLISH HOSPITALITY HAD NOT BEEN LEFT BEHIND STOOD A LARGE PEWTER TANKARD AT THE BOTTOM OF WHICH HAD HESTER OR PEARL PEEPED INTO IT THEY MIGHT HAVE SEEN THE FROTHY REMNANT OF A RECENT DRAUGHT OF ALE +7176-92135-0012-2199: INDEED IRRESOLUTION BEING THE KEYNOTE OF HAMLET'S SOLILOQUY A CLEVER PLAYER COULD TO SOME EXTENT INDICATE THE WHOLE THIRTY LINES BY A SILENT WORKING OF THE JAW BUT AT THE SAME TIME IT WOULD BE IDLE TO DENY THAT HE WOULD MISS THE FINER SHADES OF THE DRAMATIST'S MEANING +260-123288-0026-760: WE SHALL BE BLOWN UP BUT NO THE DAZZLING DISK OF MYSTERIOUS LIGHT NIMBLY LEAPS ASIDE IT APPROACHES HANS WHO FIXES HIS BLUE EYE UPON IT STEADILY IT THREATENS THE HEAD OF MY UNCLE WHO FALLS UPON HIS KNEES WITH HIS HEAD DOWN TO AVOID IT +2961-960-0021-895: THERE IS A WANT OF FLOW AND OFTEN A DEFECT OF RHYTHM THE MEANING IS SOMETIMES OBSCURE AND THERE IS A GREATER USE OF APPOSITION AND MORE OF REPETITION THAN OCCURS IN PLATO'S EARLIER WRITINGS +2961-960-0006-880: THE FANCIES OF THE NEO (PLATONISTS->PLATINISTS) ARE ONLY INTERESTING TO US BECAUSE THEY EXHIBIT A PHASE OF THE HUMAN MIND WHICH PREVAILED WIDELY IN THE FIRST CENTURIES OF THE CHRISTIAN ERA AND IS NOT WHOLLY EXTINCT IN OUR OWN DAY +3570-5695-0012-955: IT IS ALSO NOTICEABLE THAT THE SERVICEABILITY OF CONSUMPTION AS A MEANS OF REPUTE AS WELL AS THE INSISTENCE ON IT AS AN ELEMENT OF DECENCY IS AT ITS BEST IN THOSE PORTIONS OF THE COMMUNITY WHERE THE HUMAN CONTACT OF THE INDIVIDUAL IS WIDEST AND THE MOBILITY OF THE POPULATION IS GREATEST +4077-13751-0016-1090: BE IT SAID TO THE HONOR OF SOME OF THE OFFICERS (ENTRUSTED->INTRUSTED) WITH THE TERRIBLE COMMISSION THAT WHEN THEY LEARNED ITS TRUE SIGNIFICANCE THEY RESIGNED THEIR AUTHORITY RATHER THAN HAVE ANYTHING TO DO WITH WHAT THEY DESIGNATED A COLD BLOODED BUTCHERY +1188-133604-0037-101: EVERY LINE IN WHICH THE MASTER TRACES IT EVEN WHERE SEEMINGLY NEGLIGENT IS LOVELY AND SET DOWN WITH A MEDITATIVE CALMNESS WHICH MAKES THESE TWO ETCHINGS CAPABLE OF BEING PLACED BESIDE THE MOST TRANQUIL WORK OF HOLBEIN OR (DUERER->DIURE) +3575-170457-0015-985: BUT DO NOT SUPPOSE THAT I DISPARAGE THE GIFT WHICH YOU POSSESS NOR THAT I WOULD DISCOURAGE YOU FROM EXERCISING IT I ONLY EXHORT YOU SO TO THINK OF IT AND SO TO USE IT AS TO RENDER IT CONDUCIVE TO YOUR OWN PERMANENT GOOD +2094-142345-0009-520: FOR THE GREAT BARN DOORS ARE THROWN WIDE OPEN AND MEN ARE BUSY THERE MENDING THE HARNESS UNDER THE SUPERINTENDENCE OF MISTER GOBY THE (WHITTAW->WIDOW) OTHERWISE SADDLER WHO ENTERTAINS THEM WITH THE LATEST TREDDLESTON GOSSIP +1188-133604-0007-71: THE BROWN GROUND BENEATH IS LEFT FOR THE MOST PART ONE TOUCH OF BLACK IS PUT FOR THE HOLLOW TWO DELICATE LINES OF DARK GRAY DEFINE THE OUTER CURVE AND ONE LITTLE QUIVERING TOUCH OF WHITE DRAWS THE INNER EDGE OF THE MANDIBLE +3570-5694-0020-940: SO THOSE OFFICES WHICH ARE BY RIGHT THE PROPER EMPLOYMENT OF THE LEISURE CLASS ARE NOBLE SUCH AS GOVERNMENT FIGHTING HUNTING THE CARE OF ARMS AND ACCOUTREMENTS AND THE LIKE IN SHORT THOSE WHICH MAY BE CLASSED AS OSTENSIBLY PREDATORY EMPLOYMENTS +1188-133604-0021-85: IT WILL BE WHOLLY IMPOSSIBLE FOR YOU TO RETAIN THE TRANQUILLITY OF TEMPER AND FELICITY OF FAITH NECESSARY FOR NOBLE (PURIST->PUREST) PAINTING UNLESS YOU ARE ACTIVELY ENGAGED IN PROMOTING THE FELICITY AND PEACE OF PRACTICAL LIFE +8555-284449-0004-2530: SINCE LAST THURSDAY I GHIP GHISIZZLE HAVE BEEN THE LAWFUL BOOLOOROO OF THE BLUE COUNTRY BUT NOW THAT YOU ARE CONQUERED BY QUEEN TROT I SUPPOSE I AM CONQUERED TOO AND YOU HAVE NO BOOLOOROO AT ALL +4446-2275-0021-1196: HILDA'S FACE QUIVERED BUT SHE WHISPERED YES I THINK IT MUST HAVE BEEN +7021-85628-0005-2065: SEEING THAT I AM SO FINE I MAY AS WELL GO AND VISIT THE KING +6930-81414-0020-2021: I SAY YOU DO KNOW WHAT THIS MEANS AND YOU MUST TELL US +5142-33396-0019-1481: OH IT IS BETTER TO LIVE ON THE SEA AND LET OTHER MEN RAISE YOUR CROPS AND COOK YOUR MEALS +1320-122612-0006-281: LET US RETRACE OUR STEPS AND EXAMINE AS WE GO WITH KEENER EYES +121-127105-0012-146: IT WASN'T SIMPLY THAT SHE SAID SO BUT THAT I KNEW SHE HADN'T I WAS SURE I COULD SEE +6829-68769-0040-1900: SOME GIRL HAS BEEN (*->IN) HERE TWICE TO INTERVIEW MY MEN AND I HAVE REFUSED TO ADMIT HER +2300-131720-0024-596: BUT THE PLANT RAN AND IT WAS THE FIRST THREE WIRE STATION IN THIS COUNTRY +7127-75946-0007-2095: IT IS NECESSARY THEREFORE THAT HE SHOULD COMPLY THE KING FROWNED +8463-294828-0023-2485: YOU SEE MY FRIEND IT'S AN ISSUE OF THE MONSTER THE NOTORIOUS NARWHALE +1995-1826-0022-461: I SUPPOSE THOUGH IT'S TOO EARLY FOR THEM THEN CAME THE EXPLOSION +6829-68769-0011-1871: I'M RUNNING FOR REPRESENTATIVE ON THE REPUBLICAN TICKET SAID KENNETH QUIETLY +3570-5695-0013-956: CONSUMPTION BECOMES A LARGER ELEMENT IN THE STANDARD OF LIVING IN THE CITY THAN IN THE COUNTRY +6829-68769-0026-1886: HE SPOKE SIMPLY BUT PACED UP AND DOWN THE NARROW CELL IN FRONT OF THEM +8455-210777-0009-2365: NO DOUBT IN PROCESS OF TIME THE LADIES WILL FOLLOW +1580-141083-0007-341: THE MOMENT I LOOKED AT MY TABLE I WAS AWARE THAT (SOMEONE->SOME ONE) HAD RUMMAGED AMONG MY PAPERS +260-123286-0029-731: THOSE HUGE CREATURES ATTACKED EACH OTHER WITH THE GREATEST ANIMOSITY +5105-28233-0000-1406: LENGTH OF SERVICE FOURTEEN YEARS THREE MONTHS AND FIVE DAYS +61-70970-0007-1751: HE WAS IN DEEP CONVERSE WITH THE CLERK AND ENTERED THE HALL HOLDING HIM BY THE ARM +7127-75946-0008-2096: DOES YOUR MAJESTY THEN NO LONGER BELIEVE THE DISLOYAL ATTEMPT +237-126133-0012-626: THERE THERE HE SAID SOOTHINGLY PATTING HER BROWN FUZZY HEAD +61-70968-0025-1706: COME TO ME MEN HERE HERE HE RAISED HIS VOICE STILL LOUDER +5142-36377-0010-1541: HE IS NOT WELL HE HAS COME OVER THE OCEAN FOR REST AND CHANGE (OF SCENE->IS SEEN) +4507-16021-0005-1226: WE HAVE NEVER UNDERSTOOD THIS SORT OF OBJECTIONS +8555-292519-0013-2560: THAT WAS BUT RUSTLING OF DRIPPING PLANTS IN THE DARK +5683-32866-0007-1631: IF A FELLOW'S BEEN A LITTLE BIT WILD (HE'S->HE IS) BEELZEBUB AT ONCE +5683-32879-0021-1676: DORCAS IN HER STRANGE WAY WAS MOVED +7729-102255-0012-2245: SEVERAL HUNDRED FREE STATE MEN PROMPTLY RESPONDED TO THE SUMMONS +1089-134686-0026-26: THE RECTOR DID NOT ASK FOR A CATECHISM TO HEAR THE LESSON FROM +3575-170457-0031-1001: ON AUGUST TWENTY SEVENTH EIGHTEEN THIRTY SEVEN SHE WRITES +61-70970-0022-1766: ROBIN ENTERED THE HUT DRAGGING THE UNWILLING ESQUIRE AFTER HIM +61-70968-0055-1736: ROBIN WAS GLAD WHEN AT LENGTH THEY WERE LEFT TO THEIR OWN DEVICES +61-70968-0040-1721: RIGHT WILLINGLY FOR BETWEEN US WE HAVE WON THE BATTLE ANSWERED ROBIN +2830-3980-0024-821: IN THIS WHOLE EPISTLE PAUL TREATS OF THE RESURRECTION OF CHRIST +4507-16021-0050-1271: AND YOU BELONG TO THAT SMALL CLASS WHO ARE HAPPY +2830-3980-0009-806: PAUL AN APOSTLE NOT OF MEN ET CETERA +672-122797-0041-1826: IN THE MORNING THE SERVANT AND THE HOUSEMAID CAME IN +672-122797-0010-1795: REJOICE IN THY GROWTH SAID THE SUNBEAMS +1221-135767-0019-206: MOTHER CRIED SHE I SEE YOU HERE LOOK LOOK +5683-32879-0003-1658: WOMEN CAN HIDE THEIR PAIN BETTER THAN WE MEN AND BEAR IT BETTER TOO EXCEPT WHEN SHAME DROPS FIRE INTO THE DREADFUL CHALICE +908-157963-0025-2588: I SEE THEY LAY HELPLESS AND NAKED WEEPING AND NONE TO ANSWER NONE TO CHERISH THEE WITH (MOTHERS->MOTHER'S) SMILES +5683-32866-0004-1628: BY THIS TIME LORD CHELFORD AND WYLDER RETURNED AND DISGUSTED RATHER WITH MYSELF I RUMINATED ON MY WANT OF (GENERAL SHIP->GENERALSHIP) +6930-76324-0016-1988: THE LURE PROVED TOO MUCH FOR HIM AND HE CAME SPORTING AFTER IT AS (FRISKILY->FRISKLY) AS A YOUNG KITTEN MUCH TO CYNTHIA'S DELIGHT WHEN SHE CAUGHT SIGHT OF HIM +3575-170457-0013-983: THE MORE SHE IS ENGAGED IN HER PROPER DUTIES THE LESS LEISURE WILL SHE HAVE FOR IT EVEN AS AN ACCOMPLISHMENT AND A RECREATION +1089-134691-0016-54: THEY WERE VOYAGING ACROSS THE DESERTS OF THE SKY A HOST OF NOMADS ON THE MARCH VOYAGING HIGH OVER IRELAND WESTWARD BOUND +4992-41797-0008-1373: MISTER POPHAM EXAGGERATED NOTHING BUT ON THE CONTRARY LEFT MUCH UNSAID IN HIS NARRATIVE OF THE FAMILY AT THE HOUSE OF LORDS +6829-68771-0029-1943: BETH WAS A BEAUTIFUL GIRL THE HANDSOMEST OF THE THREE COUSINS BY FAR YET ELIZA SURPASSED HER IN NATURAL CHARM AND SEEMED WELL AWARE OF THE FACT +3570-5696-0009-968: IN STRICT ACCURACY NOTHING SHOULD BE INCLUDED UNDER THE HEAD OF CONSPICUOUS WASTE BUT SUCH EXPENDITURE AS IS INCURRED ON THE GROUND OF AN INVIDIOUS PECUNIARY COMPARISON +672-122797-0038-1823: THOUGHT THE FIR TREE AND BELIEVED IT ALL BECAUSE THE MAN WHO TOLD THE STORY WAS SO GOOD LOOKING WELL WELL +8224-274384-0005-2303: ANOTHER PREACHER AFTER REPROACHING HIM TO HIS FACE WITH HIS MISGOVERNMENT ORDERED THIS PSALM TO BE SUNG +4970-29095-0038-1343: RUTH WAS GLAD TO HEAR THAT PHILIP HAD MADE A PUSH INTO THE WORLD AND SHE WAS SURE THAT HIS TALENT AND COURAGE WOULD MAKE A WAY FOR HIM +1188-133604-0005-69: IT IS THE HEAD OF A PARROT WITH A LITTLE FLOWER IN HIS BEAK FROM A PICTURE OF (CARPACCIO'S->CARPATIOS) ONE OF HIS SERIES OF THE LIFE OF SAINT GEORGE +7729-102255-0010-2243: OF THE LYNCHINGS THE MOBS AND THE MURDERS IT WOULD BE IMPOSSIBLE EXCEPT IN A VERY EXTENDED WORK TO NOTE THE FREQUENT AND ATROCIOUS DETAILS +2961-960-0004-878: THERE IS NO DANGER OF THE MODERN COMMENTATORS ON THE TIMAEUS FALLING INTO THE ABSURDITIES OF THE (NEO PLATONISTS->NEOP PLATINISTS) +7021-85628-0018-2078: AND IT IS MADE OF MOTHER'S BEST YARN AND SHE KNITTED IT HERSELF AND EVERYBODY WANTS TO GET IT AWAY FROM ME +7176-88083-0024-2183: THE LAST DROP FLY AS LUCK WOULD HAVE IT (CAUGHT->GOT) JUST IN THE CORNER OF THE HAWK'S ANGRILY OPEN BEAK HOOKING ITSELF FIRMLY +2830-3980-0067-864: THIS SENTENCE ALSO DEFINES OUR SINS AS GREAT SO GREAT IN FACT THAT THE WHOLE WORLD COULD NOT MAKE AMENDS FOR A SINGLE SIN +1580-141084-0041-429: NO HARM WOULD HAVE BEEN DONE HAD IT NOT BEEN THAT AS HE PASSED YOUR DOOR HE PERCEIVED THE KEY WHICH HAD BEEN LEFT BY THE CARELESSNESS OF YOUR SERVANT +2300-131720-0037-609: HE WEIGHED AND (REWEIGHED->RE WEIGHED) THE METER PLATES AND PURSUED EVERY LINE OF INVESTIGATION IMAGINABLE BUT ALL IN VAIN +5142-33396-0031-1493: THEY SET UP A CRANE OVER THE FIRE AND HUNG THE POT UPON IT AND WE SAT AND WATCHED IT BOIL WHILE WE JOKED AT LAST THE SUPPER BEGAN +8555-284447-0015-2516: THEY LOOK SOMETHING ALIKE YOU KNOW SUGGESTED THE CAPTAIN OF THE GUARDS LOOKING FROM ONE TO THE OTHER DOUBTFULLY AND THEY'RE NEARLY THE SAME SIZE IF YOU STAND THE GOAT ON HIS HIND LEGS THEY'VE BOTH GOT THE SAME STYLE OF WHISKERS AND THEY'RE BOTH OF (EM->THEM) OBSTINATE AND DANGEROUS SO THEY OUGHT TO MAKE A GOOD PATCH SPLENDID +1221-135767-0004-191: WE HAVE SPOKEN OF PEARL'S RICH AND LUXURIANT BEAUTY A BEAUTY THAT SHONE WITH DEEP AND VIVID TINTS A BRIGHT COMPLEXION EYES POSSESSING INTENSITY BOTH OF DEPTH AND GLOW AND HAIR ALREADY OF A DEEP GLOSSY BROWN AND WHICH IN AFTER YEARS WOULD BE NEARLY AKIN TO BLACK +3729-6852-0034-1061: LET A MAN RUN AND EVERYBODY WILL RUN AFTER HIM THE CROWD WILL NOT STOP UNLESS THE MAN IS PROVED TO BE MAD BUT TO PROVE IT IS INDEED A DIFFICULT TASK BECAUSE WE HAVE A CROWD OF MEN WHO MAD FROM THEIR BIRTH ARE STILL CONSIDERED WISE +1320-122617-0035-327: THEN HEAVING A HEAVY SIGH PROBABLY AMONG THE LAST HE EVER DREW (IN->AND) PINING FOR A CONDITION HE HAD SO LONG ABANDONED HE ADDED IT IS WHAT I WOULD WISH TO PRACTISE MYSELF AS ONE WITHOUT A CROSS OF BLOOD THOUGH IT IS NOT ALWAYS EASY TO DEAL WITH AN INDIAN AS YOU WOULD WITH A FELLOW CHRISTIAN +4992-41806-0002-1390: THEY BEGAN WITH THE ONE IN THE FAMILY SITTING ROOM COLONEL WHEELER RALPH THURSTON MISTER AND MISSUS BILL HARMON WITH (NATTY->NATTIE) AND (RUFUS->RUFFUS) MISTER AND MISSUS POPHAM WITH DIGBY AND LALLIE JOY ALL STANDING IN ADMIRING GROUPS AND THRILLING WITH DELIGHT AT THE ORDER OF EVENTS +8463-294825-0013-2455: FOR MANY THEN THIS BOOK HAS BEEN A SOURCE OF FASCINATION SURELY ONE OF THE MOST INFLUENTIAL NOVELS EVER WRITTEN AN INSPIRATION FOR SUCH SCIENTISTS AND DISCOVERERS AS ENGINEER SIMON LAKE OCEANOGRAPHER WILLIAM (BEEBE->B B) POLAR (TRAVELER->TRAVELLER) SIR ERNEST SHACKLETON +8230-279154-0009-2321: OUR CONFIDENCE OR LACK OF CONFIDENCE IN THE ACCURACY OF A MEMORY IMAGE MUST IN FUNDAMENTAL CASES BE BASED UPON A CHARACTERISTIC OF THE IMAGE ITSELF SINCE WE CANNOT EVOKE THE PAST BODILY AND COMPARE IT WITH THE PRESENT IMAGE +1221-135766-0005-176: HESTER COULD ONLY ACCOUNT FOR THE CHILD'S CHARACTER AND EVEN THEN MOST VAGUELY AND IMPERFECTLY BY RECALLING WHAT SHE HERSELF HAD BEEN DURING THAT MOMENTOUS PERIOD WHILE PEARL WAS IMBIBING HER SOUL FROM THE SPIRITUAL WORLD AND HER BODILY FRAME FROM ITS MATERIAL OF EARTH +3729-6852-0004-1031: HER FACE WAS AN ENIGMA FOR IT INSPIRED (EVERYONE->EVERY ONE) WITH THE WARMEST SYMPATHY AND YET IF YOU EXAMINED IT ATTENTIVELY THERE WAS NOT ONE BEAUTIFUL FEATURE SHE COULD NOT BE CALLED HANDSOME BUT NO ONE COULD HAVE THOUGHT HER UGLY +2094-142345-0011-522: DO NOT SUPPOSE HOWEVER THAT MISSUS POYSER WAS ELDERLY OR SHREWISH IN HER APPEARANCE SHE WAS A GOOD LOOKING WOMAN NOT MORE THAN EIGHT AND THIRTY OF FAIR COMPLEXION AND SANDY HAIR WELL SHAPEN LIGHT FOOTED +2300-131720-0008-580: EVERYTHING HE HAS DONE HAS BEEN AIMED AT THE CONSERVATION OF ENERGY THE CONTRACTION OF SPACE THE INTENSIFICATION OF CULTURE +8555-284447-0013-2514: WHY YOU (SAID->SENT) TO FETCH THE FIRST LIVING CREATURE WE MET AND THAT WAS (THIS BILLYGOAT->THE SPILLY GOAT) REPLIED THE CAPTAIN PANTING HARD AS HE HELD FAST TO ONE OF THE GOAT'S HORNS +260-123288-0011-745: BUT IF WE HAVE NOW CEASED TO ADVANCE WHY DO WE YET LEAVE THAT SAIL LOOSE WHICH AT THE FIRST SHOCK OF (THE->A) TEMPEST MAY CAPSIZE US IN A MOMENT +3575-170457-0030-1000: OF THIS SECOND LETTER ALSO SHE SPOKE AND TOLD ME THAT IT CONTAINED AN INVITATION FOR HER TO GO AND SEE THE POET IF EVER SHE VISITED THE LAKES +237-134500-0041-700: I CAN'T PRAY TO HAVE THE THINGS I WANT HE SAID SLOWLY AND I WON'T PRAY NOT TO HAVE THEM NOT IF I'M DAMNED FOR IT +8555-284449-0003-2529: WHEN THE BLUESKINS SAW GHIP GHISIZZLE THEY RAISED ANOTHER GREAT SHOUT FOR HE WAS THE FAVORITE OF THE SOLDIERS AND VERY POPULAR WITH ALL THE PEOPLE +3729-6852-0002-1029: IN ORDER TO PLEASE HER I SPOKE TO HER OF THE ABBE CONTI AND I HAD OCCASION TO QUOTE TWO LINES OF THAT PROFOUND WRITER +4077-13751-0001-1075: ITS ORIGIN WAS SMALL A GERM AN INSIGNIFICANT SEED HARDLY TO BE THOUGHT OF AS LIKELY TO AROUSE OPPOSITION +5105-28241-0008-1450: (*->THAT) THE EARTH HAS UNDOUBTEDLY ENTERED UPON A NEW ORBIT BUT SHE IS NOT INCURRING ANY PROBABLE RISK OF BEING PRECIPITATED (ONTO->ON TO) THE SUN +3570-5694-0005-925: UNDER THE (TABU->TABOO) CERTAIN VICTUALS AND MORE PARTICULARLY CERTAIN BEVERAGES ARE STRICTLY RESERVED FOR THE USE OF THE SUPERIOR CLASS +4992-41806-0001-1389: TO NIGHT THERE WAS NO NEED OF EXTRA HEAT AND THERE WERE GREAT CEREMONIES TO BE OBSERVED IN LIGHTING THE FIRES ON THE HEARTHSTONES +3570-5695-0011-954: IT IS EVIDENT THEREFORE THAT THE PRESENT TREND OF THE DEVELOPMENT IS IN THE DIRECTION OF HEIGHTENING THE UTILITY OF CONSPICUOUS CONSUMPTION AS COMPARED WITH LEISURE +3575-170457-0000-970: AND OFTEN (HAS->AS) MY MOTHER SAID WHILE ON HER LAP I LAID MY HEAD SHE FEARED FOR TIME I WAS NOT MADE BUT FOR ETERNITY +672-122797-0055-1840: AND THEN HE TOLD ALL ABOUT HIS YOUTH AND THE LITTLE MICE HAD NEVER HEARD THE LIKE BEFORE AND THEY LISTENED AND SAID +908-157963-0026-2589: AND SAYS THOU MOTHER OF MY CHILDREN I HAVE LOVED THEE AND I HAVE GIVEN THEE A CROWN THAT NONE CAN TAKE AWAY +5639-40744-0006-1570: (RODOLFO->UDOLPHO) ARRIVED AT HIS OWN HOUSE WITHOUT ANY IMPEDIMENT (AND LEOCADIA'S->ANDUCADIA'S) PARENTS REACHED THEIRS (HEART BROKEN->HEARTBROKEN) AND DESPAIRING +7021-79730-0005-2034: SO YOU WILL BE A GOOD GIRL I KNOW AND NOT MAKE ANY TROUBLE BUT WILL STAY AT HOME CONTENTEDLY WON'T YOU +7127-75946-0006-2094: HE HAS GIVEN THEM WITH TOO MUCH GRACE NOT TO HAVE OTHERS STILL TO GIVE IF THEY ARE REQUIRED WHICH IS THE CASE AT THE PRESENT MOMENT +1188-133604-0036-100: IN BOTH THESE HIGH MYTHICAL SUBJECTS THE SURROUNDING NATURE THOUGH SUFFERING IS STILL DIGNIFIED AND BEAUTIFUL +5683-32866-0021-1645: MY BED WAS UNEXCEPTIONABLY COMFORTABLE BUT IN MY THEN MOOD I COULD HAVE WISHED IT A GREAT DEAL MORE MODERN +61-70968-0054-1735: MISTRESS FITZOOTH HAD BEEN CARRIED OFF BY THE SHERIFF'S DAUGHTER AND HER MAIDS AS SOON AS THEY (HAD->*) ENTERED THE HOUSE SO THAT ROBIN ALONE HAD THE CARE OF MONTFICHET +7127-75946-0024-2112: MONSIEUR WAS THE ONLY ONE WHO DID NOT UNDERSTAND ANYTHING ABOUT THE MATTER +7176-88083-0012-2171: THE HAWK ALIGHTED ON THE DEAD BRANCH AND SAT UPRIGHT MOTIONLESS AS IF SURPRISED +6930-76324-0005-1977: THE TWIN BROTHER DID SOMETHING SHE DIDN'T LIKE AND SHE TURNED HIS PICTURE TO THE WALL +121-121726-0008-117: HOSE MAN'S EXCUSE FOR WETTING THE WALK +908-157963-0028-2591: OR AN EYE OF GIFTS AND GRACES (SHOWRING->SHOWERING) FRUITS (AND->IN) COINED GOLD +61-70968-0026-1707: THE STROLLERS TOOK THEIR PART IN IT WITH HEARTY ZEST NOW THAT THEY HAD SOME CHANCE OF BEATING OFF THEIR FOES +8224-274384-0009-2307: THE PARLIAMENT AND THE SCOTS LAID THEIR PROPOSALS BEFORE THE KING +672-122797-0027-1812: THE SERVANTS AS WELL AS THE YOUNG LADIES DECORATED IT +237-134493-0018-658: THERE IS EVEN A WHITE ROW OF BEEHIVES IN THE ORCHARD UNDER THE WALNUT TREES +8463-287645-0000-2427: THIS WAS WHAT DID THE MISCHIEF SO FAR AS THE RUNNING AWAY WAS CONCERNED +7127-75946-0009-2097: NOT AT ALL YOU ARE ON THE CONTRARY MOST AGREEABLE TO ME +1995-1836-0011-477: POSITIVELY HEROIC ADDED (CRESSWELL->CRASWELL) AVOIDING HIS SISTER'S EYES +4970-29095-0012-1317: AND BESIDES SUPPOSE (THEE->THEY) DOES LEARN MEDICINE +2961-961-0000-897: SOCRATES BEGINS THE TIMAEUS WITH (A->THE) SUMMARY OF THE REPUBLIC +4992-23283-0003-1347: SO THERE IS TO ME ADDED SANDFORD WITH A SARCASTIC SNEER +1580-141084-0000-388: IT WAS THE INDIAN WHOSE DARK SILHOUETTE APPEARED SUDDENLY UPON HIS BLIND +8230-279154-0039-2351: THIS KNOWLEDGE IS MEMORY IN ONE SENSE THOUGH IN ANOTHER IT IS NOT +237-134500-0028-687: (I'M->I AM) SURE ALEXANDRA HOPES YOU WILL STAY ON HERE SHE MURMURED +908-157963-0014-2577: DESCEND O LITTLE CLOUD AND HOVER BEFORE THE EYES OF (THEL->THELL) +5105-28233-0001-1407: HE SEEMED BORN TO PLEASE WITHOUT BEING CONSCIOUS OF THE POWER HE POSSESSED +1580-141083-0024-358: YOU LEFT HIM IN A CHAIR YOU SAY WHICH CHAIR BY THE WINDOW THERE +1320-122617-0005-297: THE BEAR SHOOK HIS SHAGGY SIDES AND THEN A WELL KNOWN VOICE REPLIED +908-157963-0013-2576: AND WHY IT SCATTERS ITS BRIGHT BEAUTY (THRO->THROUGH) THE HUMID AIR +1580-141083-0008-342: THE PROOF WAS IN THREE LONG SLIPS I HAD LEFT THEM ALL TOGETHER +3570-5694-0022-942: THE LIVERY BECOMES OBNOXIOUS TO NEARLY ALL WHO ARE REQUIRED TO WEAR IT +6829-68769-0012-1872: OH SAY THAT'S DIFFERENT OBSERVED MARKHAM ALTERING HIS (DEMEANOR->DEMEANOUR) +1284-1180-0011-223: I AM MY DEAR AND ALL STRANGERS ARE WELCOME TO MY HOME +5683-32879-0022-1677: I LIKE YOU STILL RACHEL I'M SURE I'LL ALWAYS LIKE YOU +8463-294828-0009-2471: NOT ONCE DID HE COMMENT ON THE LENGTH OR THE HARDSHIPS OF (A->THE) JOURNEY +7127-75947-0008-2126: THE ARROW PIERCED HIS HEART AND WOUNDED HIM MORTALLY +1284-1181-0007-252: SHE POURED INTO THE DISH A QUANTITY FROM EACH OF THESE BOTTLES +1580-141083-0053-387: YOU HAVEN'T SEEN ANY OF THEM NO SIR +6829-68771-0003-1917: THE DEMOCRATIC COMMITTEE FIGURED OUT A WAY TO DO THIS +1089-134691-0020-58: (HELLO->HALLO) STEPHANOS HERE COMES THE (DEDALUS->DAEDALUS) +1580-141084-0014-402: WHY BANNISTER THE SERVANT WHAT'S HIS GAME IN THE MATTER +5142-33396-0035-1497: DID YOU EVER HAVE SUCH A LORDLY GUEST BEFORE I WENT ON +8555-284449-0006-2532: DON'T WORRY SIZZLE DEAR IT'LL ALL COME RIGHT PRETTY SOON +2094-142345-0056-567: I THINK I SHOULD BE DOING YOU A SERVICE TO TURN YOU OUT OF SUCH A PLACE +260-123440-0014-777: AND I DECLARE IT'S TOO BAD THAT IT IS +2961-961-0002-899: AND THEREFORE TO YOU I TURN TIMAEUS CITIZEN OF LOCRIS WHO ARE AT ONCE A PHILOSOPHER AND A STATESMAN AND TO YOU (CRITIAS->CRITIUS) WHOM ALL ATHENIANS KNOW TO BE SIMILARLY ACCOMPLISHED AND TO HERMOCRATES WHO IS ALSO FITTED BY NATURE AND EDUCATION TO SHARE IN OUR DISCOURSE +8224-274381-0012-2292: MONTROSE WEAK IN CAVALRY HERE LINED HIS TROOPS OF HORSE WITH INFANTRY AND AFTER PUTTING THE ENEMY'S HORSE TO ROUT FELL WITH UNITED FORCE UPON THEIR FOOT WHO WERE ENTIRELY CUT IN PIECES THOUGH WITH THE LOSS OF THE GALLANT LORD GORDON ON THE PART OF THE ROYALISTS +3729-6852-0005-1032: (SILVIA->SYLVIA) WAS THE ADORATION OF FRANCE AND HER TALENT WAS THE REAL SUPPORT OF ALL THE COMEDIES WHICH THE GREATEST AUTHORS WROTE FOR HER ESPECIALLY OF THE PLAYS OF (MARIVAUX->MAREVAUX) FOR WITHOUT HER HIS COMEDIES WOULD NEVER HAVE GONE TO POSTERITY +1320-122617-0007-299: COME COME RETURNED HAWKEYE UNCASING HIS HONEST COUNTENANCE THE BETTER TO ASSURE THE WAVERING CONFIDENCE OF HIS COMPANION YOU MAY SEE A SKIN WHICH IF IT BE NOT AS WHITE AS ONE OF THE GENTLE ONES HAS NO TINGE OF RED TO IT THAT THE WINDS OF THE HEAVEN AND THE SUN HAVE NOT BESTOWED NOW LET US TO BUSINESS +2300-131720-0026-598: THE ARC LAMP INSTALLED OUTSIDE A (CUSTOMER'S->CUSTOMERS) PREMISES OR IN A CIRCUIT FOR PUBLIC STREET LIGHTING BURNED SO MANY HOURS NIGHTLY SO MANY NIGHTS IN THE MONTH AND WAS PAID FOR AT THAT RATE SUBJECT TO REBATE FOR HOURS WHEN THE LAMP MIGHT BE OUT THROUGH ACCIDENT +7729-102255-0044-2277: HERE HE WAS PLACED IN THE CUSTODY OF CAPTAIN MARTIN OF THE KICKAPOO RANGERS WHO PROVED A KIND JAILER AND MATERIALLY ASSISTED IN PROTECTING HIM FROM THE DANGEROUS INTENTIONS OF THE MOB WHICH AT THAT TIME HELD LEAVENWORTH UNDER (A->THE) REIGN OF TERROR +4077-13751-0003-1077: IN PLACE OF A SINGLE HAMLET IN THE SMALLEST CORNER OF WHICH THE MEMBERS COULD HAVE CONGREGATED THERE NOW ARE ABOUT SEVENTY STAKES OF ZION AND ABOUT SEVEN HUNDRED ORGANIZED WARDS EACH WARD AND STAKE WITH ITS FULL COMPLEMENT OF OFFICERS AND PRIESTHOOD ORGANIZATIONS +8230-279154-0040-2352: THERE ARE HOWEVER SEVERAL POINTS IN WHICH SUCH AN ACCOUNT OF RECOGNITION IS INADEQUATE TO BEGIN WITH IT MIGHT SEEM AT FIRST SIGHT MORE CORRECT TO DEFINE RECOGNITION AS I HAVE SEEN THIS BEFORE THAN AS THIS HAS EXISTED BEFORE +4077-13751-0004-1078: THE (PRACTISE->PRACTICE) OF GATHERING ITS PROSELYTES INTO ONE PLACE PREVENTS THE BUILDING UP AND STRENGTHENING OF FOREIGN BRANCHES AND INASMUCH AS EXTENSIVE AND STRONG ORGANIZATIONS ARE SELDOM MET WITH ABROAD VERY ERRONEOUS IDEAS EXIST CONCERNING THE STRENGTH OF THE CHURCH +7021-85628-0007-2067: AND SHE TOOK (ANDERS->ANDREW'S) HAND AND WALKED WITH HIM UP THE BROAD MARBLE STAIRS WHERE SOLDIERS WERE POSTED AT EVERY THIRD STEP AND THROUGH THE MAGNIFICENT HALLS WHERE COURTIERS IN SILK AND VELVET STOOD BOWING WHEREVER HE WENT +1995-1826-0021-460: DON'T KNOW WELL OF ALL THINGS INWARDLY COMMENTED MISS TAYLOR LITERALLY BORN IN COTTON AND OH WELL AS MUCH AS TO ASK WHAT'S THE USE SHE TURNED AGAIN TO GO +7021-79730-0006-2035: THE MOTHER IN MANAGING THE CASE IN THIS WAY RELIES PARTLY ON CONVINCING THE REASON OF THE CHILD AND PARTLY ON AN APPEAL TO HER AFFECTION +4507-16021-0034-1255: THEY CONSTITUTE TWO DIFFERENT ORDERS OF FACTS WHICH CORRESPOND TO EACH OTHER WHICH ARE ALWAYS INTERLACED AND WHICH OFTEN BRING FORTH RESULTS +1221-135767-0018-205: LITTLE PEARL WHO WAS AS GREATLY PLEASED WITH THE GLEAMING (ARMOUR->ARMOR) AS SHE HAD BEEN WITH THE GLITTERING FRONTISPIECE OF THE HOUSE SPENT SOME TIME LOOKING INTO THE POLISHED MIRROR OF THE BREASTPLATE +2300-131720-0023-595: I THINK HE WAS PERHAPS MORE APPRECIATIVE (THAN->THAT) I WAS OF THE DISCIPLINE OF THE EDISON CONSTRUCTION DEPARTMENT AND THOUGHT IT WOULD BE WELL FOR US TO WAIT UNTIL THE MORNING OF THE FOURTH BEFORE WE STARTED UP +1320-122617-0033-325: IF HOWEVER THEY TAKE YOUR SCALP AS I TRUST AND BELIEVE THEY WILL NOT DEPEND (ON->UPON) IT UNCAS AND I WILL NOT FORGET THE DEED BUT REVENGE IT AS BECOMES TRUE WARRIORS AND TRUSTY FRIENDS +5639-40744-0021-1585: SHE MEANWHILE PASSED HER LIFE WITH HER PARENTS IN THE STRICTEST RETIREMENT NEVER LETTING HERSELF BE SEEN BUT SHUNNING EVERY EYE LEST IT SHOULD READ HER MISFORTUNE IN HER FACE +4992-41806-0017-1405: WE SHUT OUR EYES THE FLOWERS BLOOM ON WE MURMUR BUT THE CORN EARS FILL WE CHOOSE THE SHADOW BUT THE SUN THAT (CASTS->CAST) IT SHINES BEHIND US STILL +1580-141083-0006-340: THE ONLY DUPLICATE WHICH EXISTED SO FAR AS I KNEW WAS THAT WHICH BELONGED TO MY SERVANT BANNISTER A MAN WHO HAS LOOKED AFTER MY ROOM FOR TEN YEARS AND WHOSE HONESTY IS ABSOLUTELY ABOVE SUSPICION +672-122797-0025-1810: THE TREE ONLY CAME TO HIMSELF WHEN HE WAS UNLOADED IN A (COURT YARD->COURTYARD) WITH THE OTHER TREES AND HEARD A MAN SAY THAT ONE IS SPLENDID WE DON'T WANT THE OTHERS +1284-1180-0008-220: ALL THE MORNING THEY TRUDGED UP THE MOUNTAIN PATH AND AT NOON UNC AND OJO SAT ON A FALLEN TREE TRUNK AND ATE THE LAST OF THE BREAD WHICH THE OLD MUNCHKIN HAD PLACED IN HIS POCKET +908-157963-0011-2574: WHICH THOU DOST SCATTER ON EVERY LITTLE BLADE OF GRASS THAT SPRINGS REVIVES THE MILKED COW AND TAMES THE FIRE BREATHING STEED +6829-68771-0001-1915: ONE OF MISTER (HOPKINS'S->HOPKINS) FIRST TASKS AFTER CALLING HIS FAITHFUL HENCHMEN AROUND HIM WAS TO MAKE A CAREFUL CANVASS OF THE VOTERS OF HIS DISTRICT TO SEE WHAT WAS STILL TO BE ACCOMPLISHED +8455-210777-0038-2394: THEREFORE I FEEL MYSELF QUITE ABLE AS PRESIDENT OF THIS REPUBLIC TO RECEIVE YOU WITH A COURTESY DUE TO THE SERVANTS OF A FRIENDLY ALLY +5683-32865-0009-1615: AND HE PLACED IT IN THAT GENTLEMAN'S FINGERS WHO NOW TOOK HIS TURN AT THE LAMP AND CONTEMPLATED THE LITTLE PARALLELOGRAM WITH A GLEAM OF SLY AMUSEMENT +1320-122617-0018-310: IT WAS SILENT AND GLOOMY BEING TENANTED SOLELY BY THE CAPTIVE AND LIGHTED BY THE DYING EMBERS OF A FIRE WHICH HAD BEEN USED FOR THE (PURPOSED->PURPOSE) OF COOKERY +6930-81414-0004-2005: THE STORY OF ITS EVIL INFLUENCE CAME BACK TO ME AND IN MY BEWILDERED CONDITION I WONDERED WHETHER THERE WAS NOT SOME TRUTH IN WHAT HAD BEEN SAID +4507-16021-0047-1268: YESTERDAY YOU WERE TREMBLING FOR A HEALTH THAT IS DEAR TO YOU TO DAY YOU FEAR FOR YOUR OWN TO MORROW IT WILL BE ANXIETY ABOUT MONEY THE DAY AFTER TO MORROW THE DIATRIBE OF A SLANDERER THE DAY AFTER THAT THE MISFORTUNE OF SOME FRIEND THEN THE PREVAILING WEATHER THEN SOMETHING THAT HAS BEEN BROKEN OR LOST THEN A PLEASURE WITH WHICH YOUR CONSCIENCE AND YOUR VERTEBRAL COLUMN REPROACH YOU AGAIN THE COURSE OF PUBLIC AFFAIRS +7021-79730-0003-2032: AS THE (CHAISE->CHASE) DRIVES AWAY MARY STANDS BEWILDERED AND PERPLEXED ON THE (DOOR STEP->DOORSTEP) HER MIND IN A TUMULT OF EXCITEMENT IN WHICH HATRED OF THE DOCTOR DISTRUST AND SUSPICION OF HER MOTHER DISAPPOINTMENT VEXATION AND ILL HUMOR SURGE AND SWELL AMONG THOSE DELICATE ORGANIZATIONS ON WHICH THE STRUCTURE AND DEVELOPMENT OF THE SOUL SO CLOSELY DEPEND DOING PERHAPS AN IRREPARABLE INJURY +4507-16021-0032-1253: HE MUST DESCEND WITH HIS HEART FULL OF CHARITY AND SEVERITY AT THE SAME TIME AS A BROTHER AND AS A JUDGE TO THOSE IMPENETRABLE CASEMATES WHERE CRAWL PELL MELL THOSE WHO BLEED AND THOSE WHO DEAL THE BLOW THOSE WHO WEEP AND THOSE WHO CURSE THOSE WHO FAST AND THOSE WHO DEVOUR THOSE WHO ENDURE EVIL AND THOSE WHO INFLICT IT +1188-133604-0034-98: EXQUISITE ORDER AND UNIVERSAL WITH ETERNAL LIFE AND LIGHT THIS IS THE FAITH AND EFFORT OF THE SCHOOLS OF CRYSTAL AND YOU MAY DESCRIBE AND COMPLETE THEIR WORK QUITE LITERALLY BY TAKING ANY VERSES OF CHAUCER IN HIS TENDER MOOD AND OBSERVING HOW HE INSISTS ON THE CLEARNESS AND BRIGHTNESS FIRST AND THEN ON THE ORDER +8224-274384-0004-2302: AND THE MEN OF ISRAEL ANSWERED THE MEN OF JUDAH AND SAID WE HAVE TEN PARTS IN THE KING AND WE HAVE ALSO MORE RIGHT IN DAVID THAN YE WHY THEN DID YE DESPISE US THAT OUR ADVICE SHOULD NOT BE FIRST HAD IN BRINGING BACK OUR KING +2961-961-0011-908: THE GENEALOGIES WHICH YOU HAVE RECITED TO US OUT OF YOUR OWN (ANNALS SOLON->ANNAL SONG) ARE A MERE CHILDREN'S STORY +3570-5694-0018-938: THE WEARERS OF UNIFORMS AND LIVERIES MAY BE ROUGHLY DIVIDED INTO TWO CLASSES THE FREE AND THE SERVILE OR THE NOBLE AND THE IGNOBLE +908-31957-0009-2603: AND THOUGH I HAVE GROWN SERENE AND STRONG SINCE THEN I THINK THAT GOD HAS WILLED A STILL RENEWABLE FEAR +3570-5696-0010-969: AN ARTICLE MAY BE USEFUL AND WASTEFUL BOTH AND ITS UTILITY TO THE CONSUMER MAY BE MADE UP OF USE AND WASTE IN THE MOST VARYING PROPORTIONS +4446-2271-0020-1133: OF COURSE HE REFLECTED SHE ALWAYS HAD THAT COMBINATION OF SOMETHING HOMELY AND SENSIBLE AND SOMETHING UTTERLY WILD AND DAFT +5105-28241-0006-1448: THE LOG AND THE COMPASS THEREFORE WERE ABLE TO BE CALLED UPON TO DO THE WORK OF THE SEXTANT WHICH HAD BECOME UTTERLY USELESS +908-31957-0024-2618: I LOVE THEE WITH THE PASSION PUT TO USE (IN->AND) MY OLD GRIEFS AND WITH MY CHILDHOOD'S FAITH +8230-279154-0006-2318: THE (BEHAVIOURIST->BEHAVIORIST) WHO ATTEMPTS TO MAKE PSYCHOLOGY A RECORD OF (BEHAVIOUR->BEHAVIOR) HAS TO TRUST HIS MEMORY IN MAKING THE RECORD +5683-32879-0018-1673: IT IS AN ANTIPATHY AN ANTIPATHY I CANNOT GET OVER DEAR DORCAS YOU MAY THINK IT A MADNESS BUT DON'T BLAME ME +61-70970-0035-1779: WARRENTON SPOKE THUS WITH SIGNIFICANCE TO SHOW ROBIN THAT HE WAS NOT TO THINK GEOFFREY'S CLAIMS TO THE ESTATE WOULD BE PASSED BY +7176-88083-0025-2184: AT THE SUDDEN SHARP STING OF IT THE GREAT BIRD TURNED HIS HEAD AND NOTICED FOR THE FIRST TIME THE FISHERMAN STANDING ON THE BANK +8463-294828-0006-2468: FROM RUBBING SHOULDERS WITH SCIENTISTS IN OUR LITTLE UNIVERSE BY THE BOTANICAL GARDENS THE BOY HAD COME TO KNOW A THING OR TWO +7176-92135-0042-2229: IN NOVELS THE HERO HAS OFTEN PUSHED HIS MEALS AWAY UNTASTED BUT NO (STAGE->STEED) HERO WOULD DO ANYTHING SO UNNATURAL AS THIS +260-123286-0027-729: I (CAN->COULD) DISTINGUISH THE EYE OF THE (ICHTHYOSAURUS->ICHTHIOSAURUS) GLOWING LIKE A RED HOT COAL AND AS LARGE AS A MAN'S HEAD +1284-1181-0004-249: GOLD IS THE MOST COMMON METAL IN THE LAND OF OZ AND IS USED FOR MANY PURPOSES BECAUSE IT IS SOFT AND PLIABLE +4446-2275-0033-1208: WHAT I MEAN IS THAT I WANT YOU TO PROMISE NEVER TO SEE ME AGAIN NO MATTER HOW OFTEN I COME NO MATTER HOW HARD I BEG +1580-141084-0026-414: IF THIS MATTER IS NOT TO BECOME PUBLIC WE MUST GIVE OURSELVES CERTAIN POWERS AND RESOLVE OURSELVES INTO A SMALL PRIVATE COURT MARTIAL +8463-294828-0036-2498: THE WHARVES OF BROOKLYN AND EVERY PART OF NEW YORK BORDERING THE EAST RIVER WERE CROWDED WITH CURIOSITY SEEKERS +1995-1836-0008-474: I BELIEVE IN THE TRAINING OF PEOPLE TO (THEIR->THE) HIGHEST CAPACITY THE ENGLISHMAN HERE HEARTILY SECONDED HIM +260-123286-0026-728: TWO MONSTERS ONLY WERE CREATING ALL THIS COMMOTION AND BEFORE MY EYES ARE TWO REPTILES OF THE PRIMITIVE WORLD +7176-88083-0010-2169: THE CAT GROWLED SOFTLY PICKED UP THE PRIZE IN HER JAWS AND TROTTED INTO THE BUSHES TO DEVOUR IT +3575-170457-0014-984: TO THOSE DUTIES YOU HAVE NOT YET BEEN CALLED AND WHEN YOU ARE YOU WILL BE LESS EAGER FOR CELEBRITY +4992-23283-0000-1344: BUT THE MORE FORGETFULNESS HAD THEN PREVAILED THE MORE POWERFUL WAS THE FORCE OF REMEMBRANCE WHEN SHE AWOKE +8230-279154-0021-2333: REMEMBERING HAS TO BE A PRESENT OCCURRENCE IN SOME WAY RESEMBLING OR RELATED TO WHAT IS REMEMBERED +7127-75946-0020-2108: FAR FROM IT SIRE YOUR MAJESTY HAVING GIVEN NO DIRECTIONS ABOUT IT THE MUSICIANS HAVE RETAINED IT +2094-142345-0008-519: BUT THERE IS ALWAYS A STRONGER SENSE OF LIFE WHEN THE SUN IS BRILLIANT AFTER RAIN AND NOW HE IS POURING DOWN HIS BEAMS AND MAKING SPARKLES AMONG THE WET STRAW AND LIGHTING UP EVERY PATCH OF VIVID GREEN MOSS ON THE RED TILES OF THE COW SHED AND TURNING EVEN THE MUDDY WATER THAT IS HURRYING ALONG THE CHANNEL TO THE DRAIN INTO A MIRROR FOR THE YELLOW BILLED DUCKS WHO ARE SEIZING THE OPPORTUNITY OF GETTING A DRINK WITH AS MUCH BODY IN IT AS POSSIBLE +672-122797-0008-1793: THIS HAPPENED EVERY YEAR AND THE YOUNG FIR TREE THAT HAD NOW GROWN TO A VERY COMELY SIZE TREMBLED AT THE SIGHT FOR THE MAGNIFICENT GREAT TREES FELL TO THE EARTH WITH NOISE AND CRACKING THE BRANCHES WERE LOPPED OFF AND THE TREES LOOKED LONG AND BARE THEY WERE HARDLY TO BE (RECOGNISED->RECOGNIZED) AND THEN THEY WERE (LAID IN->LADEN) CARTS AND THE HORSES DRAGGED THEM OUT OF THE WOOD +8224-274381-0009-2289: THIS SEVERITY BY WHICH MONTROSE SULLIED HIS VICTORIES WAS THE RESULT OF PRIVATE ANIMOSITY AGAINST THE CHIEFTAIN AS MUCH AS OF ZEAL FOR THE PUBLIC CAUSE ARGYLE COLLECTING THREE THOUSAND MEN MARCHED IN QUEST OF THE ENEMY WHO HAD RETIRED WITH THEIR PLUNDER AND HE LAY AT (INNERLOCHY->INNER LOCKI) SUPPOSING HIMSELF STILL AT A CONSIDERABLE DISTANCE FROM THEM +8224-274381-0008-2288: WITH THESE AND SOME (REENFORCEMENTS->REINFORCEMENTS) OF THE (ATHOLEMEN->ATHOL MEN) AND MACDONALDS WHOM HE HAD RECALLED MONTROSE FELL SUDDENLY UPON ARGYLE'S COUNTRY AND LET LOOSE UPON IT ALL THE RAGE OF WAR CARRYING OFF THE CATTLE BURNING THE HOUSES AND PUTTING THE INHABITANTS TO THE SWORD +5683-32866-0019-1643: THE MYSTERY OF THEIR ORIGIN THEIR CAPACITY FOR EVOLVING LATENT FACULTIES OF CRIME AND THE STEADY VITALITY WITH WHICH THEY SURVIVE THE HEARSE AND SPEAK THEIR DEEP MOUTHED MALIGNITIES IN EVERY NEW BORN GENERATION HAVE ASSOCIATED THEM SOMEHOW IN MY MIND WITH A SPELL OF LIFE EXCEEDING AND DISTINCT FROM HUMAN AND (A SPECIAL->ESPECIAL) SATANIC ACTION +8230-279154-0037-2349: RECOGNITION IN THIS SENSE DOES NOT NECESSARILY INVOLVE MORE THAN A HABIT OF ASSOCIATION THE KIND OF OBJECT WE ARE SEEING AT THE MOMENT IS ASSOCIATED WITH THE WORD CAT OR WITH AN AUDITORY IMAGE OF PURRING OR WHATEVER OTHER CHARACTERISTIC WE MAY HAPPEN TO RECOGNIZE IN THE CAT OF THE MOMENT +8455-210777-0055-2411: SIR I HAVE IT IN COMMAND TO INFORM YOUR EXCELLENCY THAT YOU HAVE BEEN APPOINTED GOVERNOR OF THE CROWN COLONY WHICH IS CALLED BRITANNULA +1995-1837-0025-506: SHE ROSE WITH A FLEETING GLANCE GATHERED THE SHAWL (ROUND->AROUND) HER THEN GLIDING FORWARD WAVERING TREMULOUS SLIPPED ACROSS THE ROAD AND INTO THE SWAMP +1320-122617-0034-326: HOLD SAID DAVID PERCEIVING THAT WITH THIS ASSURANCE THEY WERE ABOUT TO LEAVE HIM I AM AN UNWORTHY AND HUMBLE FOLLOWER OF ONE WHO TAUGHT NOT THE DAMNABLE PRINCIPLE OF REVENGE +4992-41806-0003-1391: KATHLEEN WAVED THE TORCH TO AND FRO AS SHE RECITED SOME BEAUTIFUL LINES WRITTEN FOR SOME SUCH PURPOSE AS THAT WHICH CALLED THEM TOGETHER TO NIGHT +7729-102255-0042-2275: RELOCATED FOOTNOTE GOVERNOR ROBINSON BEING ON HIS WAY EAST THE STEAMBOAT ON WHICH HE WAS TRAVELING STOPPED AT LEXINGTON MISSOURI +237-134493-0017-657: ANY ONE THEREABOUTS WOULD HAVE TOLD YOU THAT THIS WAS ONE OF THE RICHEST FARMS ON THE DIVIDE AND THAT THE FARMER WAS A WOMAN ALEXANDRA BERGSON +6829-68771-0002-1916: THE WEAK (KNEED->NEED) CONTINGENCY MUST BE STRENGTHENED AND FORTIFIED AND A COUPLE OF HUNDRED VOTES IN ONE WAY OR (ANOTHER->THE OTHER) SECURED FROM (THE->*) OPPOSITION +8455-210777-0069-2425: IF YOU WILL GIVE US YOUR PROMISE TO MEET CAPTAIN (BATTLEAX->ATTILAX) HERE AT THIS TIME TO MORROW WE WILL STRETCH A POINT AND DELAY THE DEPARTURE OF THE JOHN BRIGHT FOR TWENTY FOUR HOURS +1320-122617-0020-312: THE SCOUT WHO HAD LEFT DAVID AT THE DOOR TO ASCERTAIN THEY WERE NOT OBSERVED THOUGHT IT PRUDENT TO PRESERVE HIS DISGUISE UNTIL ASSURED OF THEIR PRIVACY +672-122797-0071-1856: IN THE COURT YARD SOME OF THE MERRY CHILDREN WERE PLAYING WHO HAD DANCED AT CHRISTMAS ROUND THE FIR TREE AND WERE SO GLAD AT THE SIGHT OF HIM +7127-75947-0037-2155: OH I AM SPEAKING SERIOUSLY REPLIED MONTALAIS AND MY OPINION IN THIS CASE IS QUITE AS GOOD AS THE KING'S I SUPPOSE IS IT NOT LOUISE +1284-1180-0025-237: I THINK I MUST SHOW YOU MY PATCHWORK GIRL SAID MARGOLOTTE LAUGHING AT THE BOY'S ASTONISHMENT FOR SHE IS RATHER DIFFICULT TO EXPLAIN +1284-1180-0010-222: UNC KNOCKED AT THE DOOR OF THE HOUSE AND A CHUBBY PLEASANT FACED WOMAN DRESSED ALL IN BLUE OPENED IT AND GREETED THE VISITORS WITH A SMILE +1284-134647-0000-267: THE GRATEFUL APPLAUSE OF THE CLERGY HAS CONSECRATED THE MEMORY OF A PRINCE WHO INDULGED THEIR PASSIONS AND PROMOTED THEIR INTEREST +8555-284447-0014-2515: THE IDEA OF PATCHING CAP'N BILL TO A GOAT WAS VASTLY AMUSING TO HIM AND THE MORE HE THOUGHT OF IT THE MORE HE ROARED WITH LAUGHTER +61-70968-0010-1691: FORTHWITH ALL RAN TO THE OPENING OF THE TENT TO SEE WHAT MIGHT BE AMISS BUT MASTER WILL WHO PEEPED OUT FIRST NEEDED NO MORE THAN ONE GLANCE +1320-122617-0019-311: UNCAS OCCUPIED A DISTANT CORNER IN A RECLINING ATTITUDE BEING RIGIDLY BOUND BOTH HANDS AND FEET BY STRONG AND PAINFUL WITHES +1995-1826-0023-462: (GOOBERS->GOOBBLES) DON'T GROW ON (THE->DE) TOPS OF VINES BUT (UNDERGROUND->ON DE GROUN) ON (THE->DE) ROOTS LIKE YAMS IS THAT SO +1995-1837-0026-507: SHE HAD BEEN BORN WITHIN ITS BORDERS WITHIN ITS BORDERS SHE HAD LIVED AND GROWN AND WITHIN ITS BORDERS SHE HAD MET HER LOVE +1580-141084-0029-417: HIS TROUBLED BLUE EYES GLANCED AT EACH OF US AND FINALLY RESTED WITH AN EXPRESSION OF BLANK DISMAY UPON BANNISTER IN THE FARTHER CORNER +7176-92135-0014-2201: IF IT BE GRANTED FIRST THAT THE THOUGHTS OF A CERTAIN CHARACTER SHOULD BE KNOWN TO THE AUDIENCE AND SECONDLY THAT SOLILOQUY OR THE HABIT OF THINKING ALOUD IS IN OPPOSITION TO MODERN STAGE TECHNIQUE HOW SHALL A SOLILOQUY BE AVOIDED WITHOUT DAMAGE TO THE PLAY +2961-960-0008-882: WE DO NOT KNOW HOW PLATO WOULD HAVE ARRANGED HIS OWN DIALOGUES OR WHETHER THE THOUGHT OF ARRANGING ANY OF THEM BESIDES THE TWO TRILOGIES WHICH HE HAS EXPRESSLY CONNECTED WAS EVER PRESENT TO HIS MIND +2300-131720-0025-597: THEY WERE LATER USED AS RESERVE MACHINES AND FINALLY WITH THE ENGINE RETIRED FROM SERVICE AS PART OF THE COLLECTION OF EDISONIA BUT THEY REMAIN IN PRACTICALLY AS GOOD CONDITION AS WHEN INSTALLED IN EIGHTEEN EIGHTY THREE +2830-3979-0007-791: MUCH LATER WHEN A FRIEND OF HIS WAS PREPARING AN EDITION OF ALL HIS LATIN WORKS HE REMARKED TO HIS HOME CIRCLE IF I HAD MY WAY ABOUT IT THEY WOULD REPUBLISH ONLY THOSE OF MY BOOKS WHICH HAVE DOCTRINE (MY GALATIANS->MIGALATIONS) FOR INSTANCE +3570-5694-0006-926: DRUNKENNESS AND THE OTHER PATHOLOGICAL CONSEQUENCES OF THE FREE USE OF STIMULANTS THEREFORE TEND IN THEIR TURN TO BECOME HONORIFIC AS BEING A MARK AT THE SECOND REMOVE OF THE SUPERIOR STATUS OF THOSE WHO ARE ABLE TO AFFORD THE INDULGENCE +4507-16021-0021-1242: THE PAINTER WHO SAYS MY GRINDER THE NOTARY WHO SAYS MY SKIP THE GUTTER THE HAIRDRESSER WHO SAYS MY (MEALYBACK->MEALLY BACK) THE COBBLER WHO SAYS MY CUB TALKS SLANG +4992-23283-0017-1361: MISS WOODLEY WAS TOO LITTLE VERSED IN THE SUBJECT TO KNOW THIS WOULD HAVE BEEN NOT TO LOVE AT ALL AT LEAST NOT TO THE EXTENT OF BREAKING THROUGH ENGAGEMENTS AND ALL THE VARIOUS OBSTACLES THAT STILL (MILITATED->MITIGATED) AGAINST THEIR UNION +4077-13754-0011-1107: FEDERAL JUDGES AND UNITED STATES ATTORNEYS IN UTAH WHO WERE NOT MORMONS NOR LOVERS OF MORMONISM REFUSED TO ENTERTAIN COMPLAINTS OR PROSECUTE CASES UNDER THE LAW BECAUSE OF ITS MANIFEST INJUSTICE AND INADEQUACY +672-122797-0026-1811: THERE TOO WERE LARGE EASY CHAIRS SILKEN SOFAS LARGE TABLES FULL OF PICTURE BOOKS AND FULL OF TOYS WORTH HUNDREDS AND HUNDREDS OF CROWNS AT LEAST THE CHILDREN SAID SO +5639-40744-0023-1587: WHEN THE BOY WALKED THROUGH THE STREETS BLESSINGS WERE SHOWERED UPON HIM BY ALL WHO SAW HIM (BLESSINGS->BLESSING) UPON HIS BEAUTY UPON THE MOTHER THAT BORE HIM UPON THE FATHER THAT BEGOT HIM UPON THOSE WHO BROUGHT HIM UP SO WELL +121-127105-0027-161: HE HAD FOR HIS OWN TOWN RESIDENCE A BIG HOUSE FILLED WITH THE SPOILS OF TRAVEL AND THE TROPHIES OF THE CHASE BUT IT WAS TO HIS COUNTRY HOME AN OLD FAMILY PLACE IN ESSEX THAT HE WISHED HER IMMEDIATELY TO PROCEED +5639-40744-0038-1602: JUST AT THE MOMENT WHEN THE TEARS OF THE PITYING BEHOLDERS FLOWED FASTEST AND THEIR EJACULATIONS WERE MOST EXPRESSIVE OF DESPAIR (LEOCADIA->THE ARCADIA) GAVE SIGNS OF RECOVERY AND BROUGHT BACK GLADNESS TO THE HEARTS OF ALL +61-70968-0057-1738: THESE ESCAPADES ARE NOT FOR OLD GAMEWELL LAD HIS DAY HAS COME TO TWILIGHT +260-123286-0031-733: AS FOR THE (ICHTHYOSAURUS->ITHIOSAURUS) HAS HE RETURNED TO HIS SUBMARINE CAVERN +5683-32879-0023-1678: YOU RESEMBLE ME RACHEL YOU ARE FEARLESS AND INFLEXIBLE AND GENEROUS +5142-33396-0007-1469: AT THE PROW I CARVED THE HEAD WITH OPEN MOUTH AND FORKED TONGUE THRUST OUT +2094-142345-0058-569: BY THE (BY I'VE->BYE I HAVE) NEVER SEEN YOUR DAIRY I MUST SEE YOUR DAIRY MISSUS POYSER +260-123440-0016-779: I SHALL BE PUNISHED FOR IT NOW I SUPPOSE BY BEING DROWNED IN MY OWN TEARS +2830-3980-0041-838: GRACE INVOLVES THE REMISSION OF SINS PEACE AND A HAPPY CONSCIENCE +1284-1180-0012-224: WE HAVE COME FROM A FAR LONELIER PLACE THAN THIS A LONELIER PLACE +2830-3980-0072-869: THIS PASSAGE THEN BEARS OUT THE FACT THAT ALL MEN ARE SOLD UNDER SIN +3570-5695-0000-943: IN A GENERAL WAY THOUGH NOT WHOLLY NOR CONSISTENTLY THESE TWO GROUPS COINCIDE +8455-210777-0042-2398: YOU HEAR WHAT SIR (FERDINANDO->FERDINAND O') BROWN HAS SAID REPLIED CAPTAIN (BATTLEAX->BATTLE AXE) +5105-28240-0022-1439: IT WAS ON THE LAST DAY OF JANUARY THAT THE REPAIRS OF THE SCHOONER WERE COMPLETED +7176-88083-0014-2173: THE HAWK SAT UPON THE BRANCH AND WATCHED HIS QUARRY SWIMMING BENEATH THE SURFACE +1089-134686-0029-29: ON FRIDAY CONFESSION WILL BE HEARD ALL THE AFTERNOON AFTER BEADS +4446-2273-0001-1139: THEY ASKED HIM TO COME TO SEE THEM IN CHELSEA AND THEY SPOKE VERY TENDERLY OF HILDA +5105-28240-0007-1424: SERVADAC TOOK IT FOR GRANTED THAT THE DOBRYNA WAS ENDEAVORING TO PUT IN +4446-2275-0038-1213: I WILL ASK THE LEAST IMAGINABLE BUT I MUST HAVE SOMETHING +908-157963-0030-2593: WHY AN EAR A WHIRLPOOL FIERCE TO DRAW CREATIONS IN +4446-2273-0015-1153: DON'T I THOUGH I'M SO SORRY TO HEAR IT HOW DID HER SON TURN OUT +8463-294825-0016-2458: MILLIGRAM ROUGHLY ONE TWENTY EIGHT (THOUSAND->THOUSANDTH) OF AN OUNCE +672-122797-0058-1843: WHO IS HUMPY DUMPY ASKED THE MICE +6930-76324-0006-1978: HERS HAPPENED TO BE (IN->ON) THE SAME FRAME TOO BUT SHE EVIDENTLY DIDN'T CARE ABOUT (THAT->IT) +6930-81414-0007-2008: NOTHING MORE NOT EVEN THE (WRIST->RISK) TO WHICH IT MIGHT BE ATTACHED +6930-81414-0022-2023: I HAD AGAIN BEEN ACTING UNDER THE INFLUENCE OF THIS MAN'S POWER +5142-33396-0036-1498: SO I WILL GIVE OUT THIS LAW THAT MY MEN SHALL NEVER LEAVE YOU ALONE +237-134500-0014-673: AND (EMIL->AMY) MOWED HIS WAY SLOWLY DOWN TOWARD THE CHERRY TREES +4446-2275-0008-1183: WHEN DID YOU COME BARTLEY AND HOW DID IT HAPPEN YOU HAVEN'T SPOKEN A WORD +260-123286-0017-719: I SHUDDER AS I RECALL THESE MONSTERS TO MY REMEMBRANCE +2830-3980-0071-868: WE THINK THAT BY SOME LITTLE WORK OR MERIT WE CAN DISMISS SIN +7127-75946-0025-2113: THE BALLET BEGAN THE EFFECT WAS MORE THAN BEAUTIFUL +4446-2271-0011-1124: SIR HARRY (TOWNE->TOWN) MISTER (BARTLEY->BERTLEY) ALEXANDER THE AMERICAN ENGINEER +8463-294828-0011-2473: HE WENT HERE THERE AND EVERYWHERE IN PERFECT CONTENTMENT +1580-141083-0025-359: THE MAN ENTERED AND TOOK THE PAPERS SHEET BY SHEET FROM THE CENTRAL TABLE +4970-29095-0029-1334: IS HE GOING TO START A DAILY NEWSPAPER AMONG THE (KICK A POOS->KICKAPOOS) +4507-16021-0038-1259: IN THIS GUISE IT BECOMES HORRIBLE +1320-122612-0009-284: IT WOULD HAVE BEEN MORE WONDERFUL HAD HE SPOKEN WITHOUT A BIDDING +1320-122617-0022-314: THE DELAWARES ARE CHILDREN OF THE TORTOISE AND THEY OUTSTRIP THE DEER +4446-2275-0023-1198: ALEXANDER GROANED I MEANT TO BUT SOMEHOW I COULDN'T +672-122797-0043-1828: WHAT'S THE MEANING OF THIS THOUGHT THE TREE +2094-142345-0012-523: THE FAMILY LIKENESS BETWEEN HER AND HER NIECE DINAH MORRIS WITH THE CONTRAST BETWEEN HER KEENNESS AND DINAH'S SERAPHIC GENTLENESS OF EXPRESSION MIGHT HAVE SERVED A PAINTER AS AN EXCELLENT SUGGESTION FOR A MARTHA AND MARY +2300-131720-0011-583: BUT WHEN IT CAME TO BE A QUESTION OF LIGHTING A SCATTERED SUBURB A GROUP OF DWELLINGS ON THE OUTSKIRTS A REMOTE COUNTRY RESIDENCE OR A FARM HOUSE THE ALTERNATING CURRENT IN ALL ELEMENTS SAVE ITS DANGER WAS AND IS IDEAL +672-122797-0042-1827: BUT THEY DRAGGED HIM OUT OF THE ROOM AND UP THE STAIRS INTO THE LOFT AND HERE IN A DARK CORNER WHERE NO DAYLIGHT COULD ENTER THEY LEFT HIM +1188-133604-0024-88: NOTHING WILL BE MORE PRECIOUS TO YOU I THINK IN THE PRACTICAL STUDY OF ART THAN THE CONVICTION WHICH WILL FORCE ITSELF ON YOU MORE AND MORE EVERY HOUR OF THE WAY ALL THINGS ARE BOUND TOGETHER LITTLE AND GREAT IN SPIRIT AND IN MATTER +8230-279154-0010-2322: WE SOMETIMES HAVE IMAGES THAT ARE BY NO MEANS PECULIARLY VAGUE WHICH YET WE DO NOT TRUST FOR EXAMPLE UNDER THE INFLUENCE OF FATIGUE WE MAY SEE A FRIEND'S FACE VIVIDLY AND CLEARLY BUT HORRIBLY DISTORTED +7176-88083-0013-2172: LIKE HIS UNFORTUNATE LITTLE COUSIN THE TEAL HE TOO HAD FELT THE FEAR OF DEATH SMITTEN INTO HIS HEART AND WAS HEADING DESPERATELY FOR THE REFUGE OF SOME DARK OVERHANGING BANK DEEP FRINGED WITH WEEDS WHERE THE DREADFUL EYE OF THE HAWK SHOULD NOT DISCERN HIM +8555-292519-0000-2547: BRIGHTER THAN EARLY DAWN'S MOST BRILLIANT DYE ARE BLOWN CLEAR BANDS OF COLOR THROUGH THE SKY THAT SWIRL AND SWEEP AND MEET TO BREAK AND FOAM LIKE RAINBOW VEILS UPON A BUBBLE'S DOME +4970-29095-0028-1333: HE DOESN'T SAY BUT IT'S ON THE FRONTIER AND ON THE MAP EVERYTHING BEYOND IT IS MARKED INDIANS AND DESERT AND LOOKS AS DESOLATE AS A WEDNESDAY MEETING HUMPH IT WAS TIME FOR HIM TO DO SOMETHING +7176-92135-0000-2187: HE IS A WELCOME FIGURE AT THE GARDEN PARTIES OF THE ELECT WHO ARE ALWAYS READY TO ENCOURAGE HIM BY ACCEPTING FREE SEATS FOR HIS PLAY ACTOR MANAGERS NOD TO HIM EDITORS ALLOW HIM TO CONTRIBUTE WITHOUT CHARGE TO A SYMPOSIUM ON THE PRICE OF GOLF BALLS +5639-40744-0039-1603: WHEN SHE CAME TO HER SENSES AND BLUSHING TO FIND HERSELF IN (RODOLFO'S->RUDOLPHO'S) ARMS WOULD HAVE DISENGAGED HERSELF NO SENORA HE SAID THAT MUST NOT BE STRIVE NOT TO WITHDRAW FROM THE ARMS OF HIM WHO HOLDS YOU IN HIS SOUL +7021-79740-0014-2053: AND THIS METHOD OF TREATING THE CASE WAS MUCH MORE EFFECTUAL IN MAKING THEM DISPOSED TO AVOID COMMITTING A SIMILAR FAULT ANOTHER TIME THAN ANY DIRECT REBUKES OR EXPRESSIONS OF DISPLEASURE ADDRESSED PERSONALLY TO THEM WOULD HAVE BEEN +5105-28240-0006-1423: THE WIND BEING ADVERSE THE DOBRYNA DID NOT MAKE VERY RAPID PROGRESS BUT AS THE WEATHER IN SPITE OF A FEW CLOUDS REMAINED CALM AND THE SEA WAS QUITE SMOOTH SHE WAS ENABLED TO HOLD A STEADY COURSE +4077-13754-0009-1105: AT THE INCEPTION OF (PLURAL->BORAL) MARRIAGE AMONG THE LATTER DAY SAINTS THERE WAS NO LAW NATIONAL OR STATE AGAINST ITS (PRACTISE->PRACTICE) +8555-284449-0019-2545: THAT EVENING TROT GAVE A GRAND BALL IN THE PALACE TO WHICH THE MOST IMPORTANT OF THE PINKIES AND THE BLUESKINS WERE INVITED +8455-210777-0024-2380: HOW MUCH OF EVIL OF REAL ACCOMPLISHED EVIL HAD THERE NOT OCCURRED TO ME DURING THE LAST FEW DAYS +121-127105-0026-160: THE FIRST OF THESE TOUCHES CONVEYED THAT THE WRITTEN STATEMENT TOOK UP THE TALE AT A POINT AFTER IT HAD IN A MANNER BEGUN +4970-29093-0019-1300: THE NIGHT WAS SPENT IN PACKING UP AND WRITING LETTERS FOR PHILIP WOULD NOT TAKE SUCH AN IMPORTANT STEP WITHOUT INFORMING HIS FRIENDS +260-123286-0028-730: ITS JAW IS ENORMOUS AND ACCORDING TO NATURALISTS IT IS ARMED WITH NO LESS THAN ONE HUNDRED AND EIGHTY TWO TEETH +1221-135766-0004-175: THIS OUTWARD MUTABILITY INDICATED AND DID NOT MORE THAN FAIRLY EXPRESS THE VARIOUS PROPERTIES OF HER INNER LIFE +237-134493-0001-641: HIS WIFE NOW LIES BESIDE HIM AND THE WHITE SHAFT THAT MARKS THEIR GRAVES GLEAMS ACROSS THE WHEAT FIELDS +6930-75918-0009-1960: IT IS YOU WHO ARE MISTAKEN RAOUL I HAVE READ HIS DISTRESS IN HIS EYES IN HIS EVERY GESTURE AND ACTION THE WHOLE DAY +1995-1826-0007-446: FIND SOME (CRESSWELLS->CROSS WELLS) THERE BIG PLANTATIONS RATED AT TWO HUNDRED AND FIFTY THOUSAND DOLLARS +8555-284449-0018-2544: SO GHIP GHISIZZLE ORDERED THE CAPTAIN TO TAKE A FILE OF SOLDIERS AND ESCORT THE RAVING BEAUTIES TO THEIR NEW HOME +6829-68771-0016-1930: SHE WAS VERY FOND OF THE YOUNG LADIES WHOM SHE HAD KNOWN WHEN AUNT JANE WAS (THE->THEIR) MISTRESS HERE AND BETH WAS HER ESPECIAL (FAVORITE->FAVOURITE) +8455-210777-0053-2409: WERE I TO COMPLY WITH YOUR ORDERS WITHOUT EXPRESSING MY OWN OPINION I SHOULD SEEM TO HAVE DONE SO WILLINGLY HEREAFTER +4992-41797-0010-1375: ALWAYS IRRITABLE COLD INDIFFERENT HE HAD GROWN RAPIDLY MORE SO AS YEARS WENT ON +8224-274384-0006-2304: THE KING STOOD UP AND CALLED FOR THAT PSALM WHICH BEGINS WITH THESE WORDS +61-70970-0036-1780: ROBIN FITZOOTH SAW THAT HIS DOUBTS OF WARRENTON HAD BEEN UNFAIR AND HE BECAME ASHAMED OF HIMSELF FOR HARBORING THEM +1284-1181-0020-265: DEAR ME WHAT A CHATTERBOX YOU'RE GETTING TO BE (UNC->ONK) REMARKED THE MAGICIAN WHO WAS PLEASED WITH THE COMPLIMENT +121-121726-0007-116: HORSE SENSE A DEGREE OF WISDOM THAT KEEPS ONE FROM BETTING ON THE RACES +1995-1836-0009-475: BUT (CRESSWELL->CRASWELL) ADDED SIGNIFICANTLY CAPACITY DIFFERS ENORMOUSLY BETWEEN RACES +237-126133-0011-625: ISN'T HE SPLENDID CRIED JASPER IN INTENSE PRIDE SWELLING UP FATHER KNEW HOW TO DO IT +8463-287645-0013-2440: AS TO HIS AGE AND ALSO THE NAME OF HIS MASTER JACOB'S STATEMENT VARIED SOMEWHAT FROM THE ADVERTISEMENT +1089-134686-0025-25: A GENTLE KICK FROM THE TALL BOY (IN->ON) THE BENCH BEHIND URGED STEPHEN TO ASK A DIFFICULT QUESTION +237-134493-0015-655: THERE WAS SOMETHING INDIVIDUAL ABOUT THE GREAT FARM A MOST UNUSUAL TRIMNESS AND CARE FOR DETAIL +5683-32879-0020-1675: AND SHE THREW HER ARMS ROUND HER COUSIN'S NECK AND BRAVE RACHEL AT LAST BURST INTO TEARS +3575-170457-0045-1015: I AM NOT GOOD ENOUGH FOR YOU AND YOU MUST BE KEPT FROM THE CONTAMINATION OF TOO INTIMATE SOCIETY +7021-85628-0020-2080: HE DARTED LIKE AN ARROW THROUGH ALL THE HALLS DOWN ALL THE STAIRS AND ACROSS THE YARD +1284-1181-0006-251: WELL THAT MAY BE TRUE AGREED MARGOLOTTE BUT ON THE CONTRARY A SERVANT WITH TOO MUCH BRAINS IS SURE TO BECOME INDEPENDENT AND HIGH AND MIGHTY AND FEEL ABOVE HER WORK +7127-75947-0038-2156: LET US RUN THEN SAID ALL THREE AND GRACEFULLY LIFTING UP THE LONG SKIRTS OF THEIR SILK DRESSES THEY LIGHTLY RAN ACROSS THE OPEN SPACE BETWEEN THE LAKE AND THE THICKEST COVERT OF THE PARK +7729-102255-0043-2276: IN A FEW DAYS AN OFFICER CAME WITH A REQUISITION FROM GOVERNOR (SHANNON->SHANON) AND TOOK THE PRISONER BY LAND TO WESTPORT AND AFTERWARDS FROM THERE TO KANSAS CITY AND LEAVENWORTH +7127-75946-0022-2110: SIRE HE SAID YOUR MAJESTY'S MOST DEVOTED SERVANT APPROACHES TO PERFORM A SERVICE ON THIS OCCASION WITH SIMILAR ZEAL (THAT->THAN) HE HAS ALREADY SHOWN ON THE FIELD OF BATTLE +237-134500-0012-671: IN A FEW MOMENTS HE HEARD THE CHERRIES DROPPING SMARTLY INTO THE PAIL AND HE BEGAN TO SWING HIS SCYTHE WITH THAT LONG EVEN STROKE THAT FEW AMERICAN BOYS EVER LEARN +4970-29095-0026-1331: IF I GO TO MEETING AT ALL I LIKE BEST TO SIT IN THE QUIET OLD HOUSE IN GERMANTOWN WHERE THE WINDOWS ARE ALL OPEN AND I CAN SEE THE TREES AND HEAR THE STIR OF THE LEAVES +3570-5694-0021-941: WHENEVER AS IN THESE CASES THE MENIAL SERVICE IN QUESTION HAS TO DO DIRECTLY WITH THE PRIMARY LEISURE EMPLOYMENTS OF FIGHTING AND HUNTING IT EASILY ACQUIRES A REFLECTED HONORIFIC CHARACTER +7021-79740-0011-2050: SO SAYING SHE LED THE WAY ON TIPTOE FOLLOWED BY THE CHILDREN OUT OF THE ROOM AND ROUND BY A CIRCUITOUS ROUTE TO THE PIAZZA THERE +908-157963-0012-2575: BUT (THEL->THOU) IS LIKE A FAINT CLOUD KINDLED AT THE RISING SUN I VANISH FROM MY PEARLY THRONE AND WHO SHALL FIND MY PLACE +237-134493-0016-656: ON EITHER SIDE OF THE ROAD FOR A MILE BEFORE YOU REACHED THE FOOT OF THE HILL STOOD TALL OSAGE ORANGE HEDGES THEIR GLOSSY GREEN MARKING OFF THE YELLOW FIELDS +2830-3980-0054-851: THAT CHRIST IS VERY GOD IS APPARENT IN THAT PAUL ASCRIBES TO HIM DIVINE POWERS EQUALLY WITH THE FATHER AS FOR INSTANCE THE POWER TO DISPENSE GRACE AND PEACE +7176-88083-0011-2170: IN FACT HE HAD JUST FINISHED IT THE LAST OF THE TROUT'S TAIL HAD JUST VANISHED WITH A SPASM DOWN HIS STRAINED GULLET WHEN THE BAFFLED HAWK CAUGHT SIGHT OF HIM AND SWOOPED +4077-13751-0002-1076: INSTEAD OF BUT SIX REGULARLY AFFILIATED MEMBERS AND AT MOST TWO SCORE OF ADHERENTS THE ORGANIZATION NUMBERS (TODAY->TO DAY) MANY HUNDRED THOUSAND SOULS +1188-133604-0022-86: YOU MUST LOOK (AT->*) HIM IN THE FACE FIGHT HIM CONQUER HIM WITH WHAT SCATHE YOU MAY YOU NEED NOT THINK TO KEEP OUT OF THE WAY OF HIM +2300-131720-0009-581: FOR SOME YEARS IT WAS NOT FOUND FEASIBLE TO OPERATE MOTORS ON ALTERNATING CURRENT CIRCUITS AND THAT REASON WAS OFTEN URGED AGAINST IT SERIOUSLY +7729-102255-0028-2261: PRIVATE PERSONS WHO HAD LEASED THE FREE STATE HOTEL VAINLY BESOUGHT THE VARIOUS AUTHORITIES TO (PREVENT->PRESENT) THE DESTRUCTION OF THEIR PROPERTY +2961-961-0014-911: NINE THOUSAND YEARS HAVE ELAPSED SINCE SHE (FOUNDED->FOUND IT) YOURS AND EIGHT THOUSAND SINCE SHE (FOUNDED->FOUND IT) OURS AS OUR ANNALS RECORD +8463-294828-0033-2495: I WAS WELL SATISFIED WITH MY CABIN WHICH WAS LOCATED IN THE STERN AND OPENED INTO THE OFFICERS MESS +4446-2275-0000-1175: THE STOP AT QUEENSTOWN THE TEDIOUS PASSAGE UP THE (MERSEY->MERCY) WERE THINGS THAT HE NOTED DIMLY THROUGH HIS GROWING IMPATIENCE +7176-92135-0023-2210: YOU GAVE ME DOUBLE FIVE I WANT DOUBLE NINE HALLO IS THAT YOU HORATIO HAMLET SPEAKING +4507-16021-0059-1280: TO BURN WITHOUT CEASING TO FLY THEREIN LIES THE MARVEL OF GENIUS +61-70970-0001-1745: THERE BEFELL AN ANXIOUS INTERVIEW MISTRESS FITZOOTH ARGUING FOR AND AGAINST THE SQUIRE'S PROJECT IN A BREATH +1995-1826-0017-456: THERE MIGHT BE A BIT OF POETRY HERE AND THERE BUT MOST OF THIS PLACE WAS SUCH DESPERATE PROSE +1580-141083-0002-336: MY FRIEND'S TEMPER HAD NOT IMPROVED SINCE HE HAD BEEN DEPRIVED OF THE CONGENIAL SURROUNDINGS OF BAKER STREET +4507-16021-0014-1235: NOW WHEN HAS HORROR EVER EXCLUDED STUDY +4446-2273-0023-1161: THE STRANGE WOMAN AND HER PASSIONATE SENTENCE THAT RANG OUT SO SHARPLY HAD FRIGHTENED THEM BOTH +8555-292519-0008-2555: OVER THE TRACK LINED CITY STREET THE YOUNG MEN THE GRINNING (MEN->MAN) PASS +4992-41806-0013-1401: APPROACHING THE DINING TABLE HE CAREFULLY PLACED THE ARTICLE IN THE CENTRE AND REMOVED THE CLOTH +7021-79740-0006-2045: I EXPECT YOU HAVE BEEN A VERY GOOD GIRL ANDELLA SINCE YOU WERE HERE LAST +908-31957-0006-2600: OPEN THY HEART WIDE AND FOLD WITHIN THE WET WINGS OF THY DOVE +5639-40744-0017-1581: CHOKING WITH EMOTION (LEOCADI->THE ARCADIA) MADE A SIGN TO HER PARENTS THAT SHE WISHED TO BE ALONE WITH THEM +4446-2273-0022-1160: THEY WERE BOTH REMEMBERING WHAT THE WOMAN HAD SAID WHEN SHE TOOK THE MONEY GOD GIVE YOU A HAPPY LOVE +260-123286-0009-711: I TAKE THIS AS MY ANSWER AND I LEAVE THE PROFESSOR TO BITE HIS LIPS WITH IMPATIENCE +121-127105-0007-141: TO THIS HIS ANSWER WAS PROMPT OH THANK GOD NO AND IS THE RECORD YOURS +4446-2271-0018-1131: SHE CONSIDERED (*->FOR) A MOMENT AND THEN SAID NO I THINK NOT THOUGH I AM GLAD YOU (ASK->ASKED) ME +3729-6852-0014-1041: HERE GO AND GET ME (CHANGE->CHANGED) FOR A LOUIS I HAVE IT SIR +2961-961-0009-906: TELL US SAID THE OTHER THE WHOLE STORY AND WHERE (SOLON->SOLOMON) HEARD THE STORY +3570-5694-0001-921: THE UTILITY OF CONSUMPTION AS AN EVIDENCE OF WEALTH IS TO BE CLASSED AS A DERIVATIVE GROWTH +6829-68771-0011-1925: TABLES WERE SPREAD ON THE LAWN AND A DAINTY BUT SUBSTANTIAL REPAST WAS TO BE SERVED +5142-36377-0004-1535: SHE SIGNED TO ME WITH A GHOSTLY SOLEMNITY TO TAKE THE VACANT PLACE ON THE LEFT OF HER FATHER +61-70968-0019-1700: IT IS ENOUGH SAID GEORGE GAMEWELL SHARPLY (AND->AS) HE TURNED UPON THE CROWD +5142-33396-0059-1521: YES AND WITH ALL YOUR FINGERS IT TOOK YOU A YEAR TO CATCH ME THE KING FROWNED MORE ANGRILY +5105-28241-0019-1461: NOTHING WAS TO BE DONE BUT TO PUT ABOUT AND RETURN IN DISAPPOINTMENT (TOWARDS->TOWARD) THE NORTH +4970-29095-0036-1341: HAS THEE CONSULTED THY MOTHER ABOUT A CAREER I SUPPOSE IT IS A CAREER (*->OF) THEE WANTS +237-134500-0022-681: WHEN SHE USED TO TELL ME ABOUT HIM I ALWAYS WONDERED WHETHER SHE WASN'T A LITTLE IN LOVE WITH HIM +61-70970-0031-1775: CRIED HE WAVING THE (LANTHORN->LANTERN) BEFORE HIM TO MAKE SURE THAT THESE WERE NO GHOSTS IN FRONT OF HIM +6829-68769-0020-1880: SIT DOWN PLEASE SAID GATES IN A CHEERFUL AND PLEASANT VOICE THERE'S A BENCH HERE +5683-32866-0002-1626: BUT DON'T THESE VERY WISE THINGS SOMETIMES TURN OUT VERY FOOLISHLY +4446-2271-0009-1122: (MAINHALL->MAYHALL) VOUCHED FOR HER CONSTANCY WITH A LOFTINESS THAT MADE ALEXANDER SMILE EVEN WHILE A KIND OF RAPID EXCITEMENT WAS TINGLING THROUGH HIM +672-122797-0012-1797: I WOULD FAIN KNOW IF I AM DESTINED FOR SO GLORIOUS A CAREER CRIED THE TREE REJOICING +2961-960-0007-881: BUT THEY HAVE NOTHING TO DO WITH THE INTERPRETATION OF PLATO AND IN SPIRIT THEY ARE OPPOSED TO HIM +908-31957-0012-2606: IF HE TO KEEP ONE OATH MUST LOSE ONE JOY BY HIS LIFE'S STAR FORETOLD +4507-16021-0035-1256: TRUE HISTORY BEING A MIXTURE OF ALL THINGS THE TRUE HISTORIAN MINGLES IN EVERYTHING +1580-141083-0038-372: I UNDERSTAND YOU TO SAY THAT THERE ARE THREE STUDENTS WHO USE THIS (STAIR->STARE) AND ARE IN THE HABIT OF PASSING YOUR DOOR YES THERE ARE +260-123286-0030-732: SUDDENLY THE (ICHTHYOSAURUS->ICHDEOSAURUS) AND THE (PLESIOSAURUS->PLECEOSAURUS) DISAPPEAR BELOW LEAVING A WHIRLPOOL EDDYING IN THE WATER +2961-960-0022-896: PLATO HAD NOT THE COMMAND OF HIS MATERIALS WHICH WOULD HAVE ENABLED HIM TO PRODUCE A PERFECT WORK OF ART +5105-28240-0005-1422: HE RECKONED THEREFORE NOT ONLY UPON ASCERTAINING THE EXTENT OF THE LATE CATASTROPHE BUT UPON LEARNING ITS CAUSE +2830-3980-0055-852: TO BESTOW PEACE AND GRACE LIES IN THE PROVINCE OF GOD WHO ALONE CAN CREATE THESE BLESSINGS THE ANGELS CANNOT +260-123286-0000-702: SATURDAY AUGUST FIFTEENTH THE SEA UNBROKEN ALL ROUND NO LAND IN SIGHT +5105-28241-0009-1451: AND WHAT DEMONSTRATION DO YOU OFFER ASKED SERVADAC EAGERLY THAT IT WILL NOT HAPPEN +61-70968-0041-1722: I LIKE YOU WILL YOU ARE THE SECOND WILL THAT I HAVE MET AND LIKED WITHIN TWO DAYS IS THERE A SIGN IN THAT +237-126133-0013-627: I KNOW GASPED POLLY CONTROLLING HER SOBS I WON'T ONLY I CAN'T THANK YOU +2961-961-0015-912: MANY LAWS EXIST AMONG US WHICH ARE THE COUNTERPART OF YOURS AS THEY WERE IN THE OLDEN TIME +6930-81414-0006-2007: WHAT THEN A HUMAN HAND LARGE AND SHAPELY APPEARED DISTINCTLY ON THE SURFACE OF THE POND +5683-32879-0007-1662: RACHEL'S PALE AND SHARPENED FEATURES AND DILATED EYE STRUCK HER WITH A PAINFUL SURPRISE +121-127105-0028-162: THE AWKWARD THING WAS THAT THEY HAD PRACTICALLY NO OTHER RELATIONS AND THAT HIS OWN AFFAIRS TOOK UP ALL HIS TIME +5142-33396-0034-1496: THEN I DRANK HALF OF THE HORNFUL AND (SENT->SET) THE REST ACROSS THE FIRE TO THE FARMER HE TOOK IT AND SMILED SAYING +4992-23283-0018-1362: TO RELIEVE HER FROM BOTH HE LAID HIS HAND WITH FORCE UPON HIS HEART AND SAID DO YOU BELIEVE ME +672-122797-0057-1842: YES IN REALITY THOSE WERE HAPPY TIMES +6829-68771-0032-1946: HOWEVER HER FEATURES AND FORM MIGHT REPRESS ANY EVIDENCE OF NERVOUSNESS THESE HANDS TOLD A DIFFERENT STORY +2830-3980-0010-807: EITHER HE CALLS MINISTERS THROUGH THE AGENCY OF MEN OR HE CALLS THEM DIRECTLY AS HE CALLED THE PROPHETS AND APOSTLES +3575-170457-0047-1017: TABBY HAD LIVED WITH THEM FOR TEN OR TWELVE YEARS AND WAS AS CHARLOTTE EXPRESSED IT ONE OF THE FAMILY +8224-274381-0011-2291: HIS CONDUCT AND PRESENCE OF MIND IN THIS EMERGENCE APPEARED CONSPICUOUS +2830-3980-0051-848: WHEN YOU ARGUE ABOUT THE NATURE OF GOD APART FROM THE QUESTION OF JUSTIFICATION YOU MAY BE AS PROFOUND AS YOU LIKE +672-122797-0007-1792: IN AUTUMN THE WOOD CUTTERS ALWAYS CAME AND FELLED SOME OF THE LARGEST TREES +2830-3980-0006-803: AGAINST THESE BOASTING FALSE APOSTLES PAUL BOLDLY DEFENDS HIS APOSTOLIC AUTHORITY AND MINISTRY +1320-122617-0031-323: BRAVELY AND GENEROUSLY HAS HE BATTLED IN MY BEHALF AND THIS AND MORE WILL I DARE IN HIS SERVICE +2830-3980-0066-863: SINCE CHRIST WAS GIVEN FOR OUR SINS IT STANDS TO REASON THAT THEY CANNOT BE PUT AWAY BY OUR OWN EFFORTS +1580-141084-0040-428: HE RETURNED CARRYING HIS JUMPING SHOES WHICH ARE PROVIDED AS YOU (ARE->*) AWARE WITH SEVERAL SHARP SPIKES +7127-75947-0003-2121: YES THE CHARACTER WHICH YOUR ROYAL HIGHNESS ASSUMED IS IN PERFECT HARMONY WITH YOUR OWN +8463-294828-0020-2482: YES WE ARE CERTAINLY I REPLIED EVASIVELY BUT AFTER WE MAKE A DETOUR +8555-284449-0016-2542: AS A PRIVATE CITIZEN I SHALL BE A MODEL OF DEPORTMENT BECAUSE IT WOULD BE DANGEROUS TO BE OTHERWISE +1284-1180-0020-232: THE FIRST LOT WE TESTED ON OUR GLASS CAT WHICH NOT ONLY BEGAN TO LIVE BUT HAS LIVED EVER SINCE +3575-170457-0012-982: YOU WILL SAY THAT A WOMAN HAS NO NEED OF SUCH A CAUTION THERE CAN BE NO PERIL IN IT FOR HER +6930-75918-0006-1957: THIS HAS INDEED BEEN A HARASSING DAY CONTINUED THE YOUNG MAN HIS EYES FIXED UPON HIS FRIEND +908-31957-0007-2601: COULD IT MEAN TO LAST A LOVE SET PENDULOUS BETWEEN SORROW AND SORROW +8555-292519-0010-2557: OLD DANCES ARE SIMPLIFIED OF THEIR YEARNING BLEACHED BY TIME +2830-3980-0036-833: WHEREVER THE MEANS OF GRACE ARE FOUND THERE IS THE HOLY CHURCH EVEN THOUGH ANTICHRIST REIGNS THERE +8455-210777-0005-2361: WE HAVE OUR LITTLE STRUGGLES HERE AS ELSEWHERE AND ALL THINGS CANNOT BE DONE BY ROSE WATER +61-70968-0050-1731: HE MADE AN EFFORT TO HIDE HIS CONDITION FROM THEM ALL AND ROBIN FELT HIS FINGERS TIGHTEN UPON HIS ARM +6829-68769-0036-1896: THEY LEFT HIM THEN FOR THE JAILER ARRIVED TO UNLOCK THE DOOR AND ESCORT THEM TO THE OFFICE +260-123288-0008-742: THERE'S A HEAVY STORM COMING ON I CRIED POINTING TOWARDS THE HORIZON +7729-102255-0023-2256: THEIR DISTINCTIVE CHARACTERS HOWEVER DISPLAY ONE BROAD AND UNFAILING DIFFERENCE +5105-28240-0000-1417: FAST AS HIS LEGS COULD CARRY HIM SERVADAC HAD MADE HIS WAY TO THE TOP OF THE CLIFF +5683-32866-0018-1642: TO MY MIND THERE HAS ALWAYS BEEN SOMETHING INEXPRESSIBLY AWFUL IN FAMILY FEUDS +672-122797-0036-1821: HUMPY DUMPY FELL DOWNSTAIRS AND YET HE MARRIED THE PRINCESS +2094-142345-0021-532: THAT'S THE WAY WITH YOU THAT'S THE ROAD YOU'D ALL LIKE TO GO HEADLONGS TO RUIN +6829-68771-0027-1941: THEY THEY EXCITE ME IN SOME WAY AND I I CAN'T BEAR THEM YOU MUST EXCUSE ME +2094-142345-0051-562: NO THANK YOU I'LL JUST LOOK AT THE WHELPS AND LEAVE A MESSAGE ABOUT THEM WITH YOUR SHEPHERD +1995-1826-0019-458: COTTON IS A WONDERFUL THING IS IT NOT BOYS SHE SAID RATHER PRIMLY +1089-134686-0037-37: IN THE SILENCE THEIR DARK FIRE KINDLED THE DUSK INTO A TAWNY GLOW +1320-122617-0015-307: BUT THE BEAR INSTEAD OF OBEYING MAINTAINED THE SEAT IT HAD TAKEN AND GROWLED +8555-284449-0015-2541: I'LL NOT BE WICKED ANY MORE SIGHED THE OLD BOOLOOROO I'LL REFORM +61-70968-0020-1701: SHAME ON YOU CITIZENS CRIED HE I BLUSH FOR MY FELLOWS OF NOTTINGHAM +1320-122612-0004-279: DISTRUSTING HIS OWN JUDGMENT HIS APPEALS TO THE OPINION OF CHINGACHGOOK WERE FREQUENT AND EARNEST +2830-3980-0037-834: SO MUCH FOR THE TITLE OF THE EPISTLE NOW FOLLOWS THE (GREETING->READING) OF THE APOSTLE VERSE THREE +8463-287645-0011-2438: SHE WAS A LARGE HOMELY WOMAN THEY WERE COMMON WHITE PEOPLE WITH NO REPUTATION IN THE COMMUNITY +6829-68769-0053-1913: AND TO THINK WE CAN SAVE ALL THAT MISERY AND DESPAIR BY THE PAYMENT OF A HUNDRED AND FIFTY DOLLARS +5683-32879-0019-1674: I HAVE VERY FEW TO LOVE ME NOW AND I THOUGHT YOU MIGHT LOVE ME AS I HAVE BEGUN TO LOVE YOU +908-157963-0010-2573: SHE (CEASD->CEASED) AND (SMILD->SMILED) IN TEARS THEN SAT DOWN IN HER SILVER SHRINE +1284-1180-0007-219: HE KNEW IT WOULD TAKE THEM TO THE HOUSE OF THE CROOKED MAGICIAN WHOM HE HAD NEVER SEEN BUT WHO WAS THEIR NEAREST NEIGHBOR +237-126133-0010-624: OH YOU ARE THE DEAREST AND BEST MISTER KING I EVER SAW BUT HOW DID YOU MAKE MAMMY LET HER COME +6829-68771-0030-1944: HER MANNER WAS NEITHER INDEPENDENT NOR ASSERTIVE BUT RATHER ONE OF WELL BRED COMPOSURE AND CALM RELIANCE +5142-36377-0007-1538: A LITTLE CRACKED THAT IN THE POPULAR PHRASE WAS MY IMPRESSION OF THE STRANGER WHO NOW MADE HIS APPEARANCE IN THE SUPPER ROOM +1995-1826-0020-459: MISS TAYLOR DID NOT KNOW MUCH ABOUT COTTON BUT AT LEAST ONE MORE (REMARK->REMARKED) SEEMED CALLED FOR +5683-32865-0008-1614: I BELIEVE I HAVE A LITTLE TASTE THAT WAY THOSE ARE ALL REAL YOU KNOW THOSE JEWELS +5683-32865-0007-1613: I'M GLAD YOU LIKE IT SAYS WYLDER CHUCKLING BENIGNANTLY ON IT OVER HIS SHOULDER +3575-170457-0029-999: YOUR LETTER HAS GIVEN ME GREAT PLEASURE AND I SHOULD NOT FORGIVE MYSELF IF I DID NOT TELL YOU SO +4970-29095-0024-1329: IT HAS OCCUPIED MOTHER A LONG TIME TO FIND AT THE SHOPS THE EXACT SHADE FOR HER NEW BONNET +5142-36377-0023-1554: YOU WERE QUITE RIGHT TO SAY NO AMBROSE BEGAN NEVER SMOKE WITH JOHN (JAGO->IAGO) HIS CIGARS WILL POISON YOU +5105-28240-0017-1434: SOME MYSTERIOUS FORCE SEEMED TO HAVE BROUGHT ABOUT A CONVULSION OF THE ELEMENTS +1320-122617-0017-309: THEN AS IF SATISFIED OF THEIR SAFETY THE SCOUT LEFT HIS POSITION AND SLOWLY ENTERED THE PLACE +5639-40744-0005-1569: FINALLY THE ONE PARTY WENT OFF EXULTING AND THE OTHER WAS LEFT IN DESOLATION AND WOE +4970-29095-0009-1314: MARGARET BOLTON ALMOST LOST FOR A MOMENT HER HABITUAL PLACIDITY +6930-76324-0002-1974: THE POOR LITTLE THINGS CRIED CYNTHIA THINK OF THEM HAVING BEEN TURNED TO THE WALL ALL THESE YEARS +4507-16021-0033-1254: DO WE REALLY KNOW THE MOUNTAIN WELL WHEN WE ARE NOT ACQUAINTED WITH THE CAVERN +3575-170457-0028-998: KESWICK MARCH TWENTY SECOND EIGHTEEN THIRTY SEVEN DEAR (MADAM->MADAME) +7729-102255-0025-2258: THEIR ASSUMED CHARACTER CHANGED WITH THEIR CHANGING OPPORTUNITIES OR NECESSITIES +1089-134691-0001-39: FOR A FULL HOUR HE HAD PACED UP AND DOWN WAITING BUT HE COULD WAIT NO LONGER +6930-76324-0017-1989: OH LET HIM COME ALONG SHE URGED I DO LOVE TO SEE HIM ABOUT THAT OLD HOUSE +3570-5694-0004-924: IN THE NATURE OF THINGS LUXURIES AND THE COMFORTS OF LIFE BELONG TO THE LEISURE CLASS +4507-16021-0048-1269: THIS WITHOUT RECKONING IN THE PAINS OF THE HEART AND SO IT GOES ON +2094-142345-0023-534: (YOU'RE->YOU ARE) A RARE (UN->ONE) FOR SITTING DOWN TO YOUR WORK A LITTLE WHILE AFTER IT'S TIME TO PUT BY +1580-141083-0020-354: THEN HE APPROACHED IT AND STANDING ON TIPTOE WITH HIS NECK CRANED HE LOOKED INTO THE ROOM +1995-1826-0005-444: BUT JOHN THERE'S NO SOCIETY JUST ELEMENTARY WORK +121-123859-0002-131: BUT RECKONING TIME WHOSE (MILLION'D->MILLIONED) ACCIDENTS CREEP IN TWIXT VOWS AND CHANGE DECREES OF KINGS (TAN->TAN'S) SACRED BEAUTY BLUNT THE (SHARP'ST INTENTS->SHARPEST INTENSE) DIVERT STRONG MINDS TO THE COURSE OF ALTERING THINGS ALAS WHY FEARING OF TIME'S TYRANNY MIGHT I NOT THEN SAY NOW I LOVE YOU BEST WHEN I WAS CERTAIN O'ER (INCERTAINTY->IN CERTAINTY) CROWNING THE PRESENT DOUBTING OF THE REST +3729-6852-0033-1060: YOU ARE NOW IN THE ONLY COUNTRY IN THE WORLD WHERE WIT CAN MAKE A FORTUNE BY SELLING EITHER A GENUINE OR A FALSE ARTICLE IN THE FIRST CASE IT RECEIVES THE WELCOME OF INTELLIGENT AND TALENTED PEOPLE AND IN THE SECOND FOOLS ARE ALWAYS READY TO REWARD IT FOR SILLINESS IS TRULY A CHARACTERISTIC OF THE PEOPLE HERE AND HOWEVER WONDERFUL IT MAY APPEAR SILLINESS IS THE DAUGHTER OF WIT +121-123859-0001-130: O TIS THE FIRST TIS FLATTERY IN MY SEEING AND MY GREAT MIND MOST KINGLY DRINKS IT UP MINE EYE WELL KNOWS WHAT WITH HIS GUST IS (GREEING->GREEN) AND TO HIS PALATE DOTH PREPARE THE CUP IF IT BE (POISON'D->POISONED) TIS THE LESSER SIN THAT MINE EYE LOVES IT AND DOTH FIRST BEGIN +8230-279154-0038-2350: WE ARE OF COURSE IN FACT ABLE TO JUDGE WHEN WE RECOGNIZE AN OBJECT THAT WE HAVE SEEN IT BEFORE BUT THIS (JUDGMENT->JUDGEMENT) IS SOMETHING OVER AND ABOVE RECOGNITION IN THIS FIRST SENSE AND MAY VERY PROBABLY BE IMPOSSIBLE TO ANIMALS THAT NEVERTHELESS HAVE THE EXPERIENCE OF RECOGNITION IN THIS FIRST SENSE OF THE WORD +3729-6852-0003-1030: (MADAM->MADAME) CORRECTED ME WITH A PATRONIZING AIR FOR MY PRONUNCIATION OF THE WORD (SCEVRA->SCEVERA) WHICH MEANS DIVIDED SAYING THAT IT OUGHT TO BE PRONOUNCED (SCEURA->SCURA) AND SHE ADDED THAT I OUGHT TO BE VERY GLAD TO HAVE LEARNED SO MUCH ON THE FIRST DAY OF MY ARRIVAL IN PARIS TELLING ME THAT IT WOULD BE AN IMPORTANT DAY IN MY LIFE +908-31957-0025-2619: I LOVE THEE WITH A LOVE I SEEMED TO LOSE WITH MY LOST SAINTS I LOVE THEE WITH THE BREATH SMILES TEARS OF ALL MY LIFE AND IF GOD CHOOSE I SHALL BUT LOVE THEE BETTER AFTER DEATH +8230-279154-0022-2334: SOME POINTS MAY BE TAKEN AS FIXED AND SUCH AS ANY THEORY OF MEMORY MUST ARRIVE AT +1320-122617-0003-295: THERE WAS SOMETHING IN HIS AIR AND MANNER THAT BETRAYED TO THE SCOUT THE UTTER CONFUSION OF THE STATE OF HIS MIND +2094-142345-0039-550: I'VE STRONG ASSURANCE THAT NO EVIL WILL HAPPEN TO YOU AND MY UNCLE AND THE CHILDREN FROM ANYTHING I'VE DONE +672-122797-0070-1855: THE GOLDEN STAR OF TINSEL WAS STILL ON THE TOP OF THE TREE AND GLITTERED IN THE SUNSHINE +3729-6852-0018-1045: I SIT DOWN AT A SMALL TABLE A WAITER COMES IMMEDIATELY TO (ENQUIRE->INQUIRE) MY WISHES +2830-3980-0023-820: THESE PERVERTERS OF THE RIGHTEOUSNESS OF CHRIST RESIST THE FATHER AND THE SON AND THE WORKS OF THEM BOTH +5683-32879-0005-1660: THIS TRANSIENT SPRING AND LIGHTING UP ARE BEAUTIFUL A GLAMOUR BEGUILING OUR SENSES +61-70968-0024-1705: SPOKE THE SQUIRE LOSING ALL PATIENCE AND IT WAS TO YOU THAT I GAVE ANOTHER (PURSE IN->PERSON) CONSOLATION +8463-294825-0012-2454: THE NAUTILUS NEARLY PERISHES IN THE ANTARCTIC AND NEMO SINKS INTO A GROWING DEPRESSION +1320-122612-0005-280: YET HERE ARE WE WITHIN A SHORT RANGE OF THE SCAROONS AND NOT A SIGN OF A TRAIL HAVE WE CROSSED +4507-16021-0049-1270: THERE IS HARDLY ONE DAY OUT OF A HUNDRED WHICH IS WHOLLY JOYOUS AND SUNNY +121-127105-0011-145: SHE WAS THE MOST AGREEABLE WOMAN I'VE EVER KNOWN IN HER POSITION SHE WOULD HAVE BEEN WORTHY OF ANY WHATEVER +5142-36377-0024-1555: (NAOMI->THEY ONLY) SHOOK HER FOREFINGER REPROACHFULLY AT THEM AS IF THE TWO STURDY YOUNG FARMERS HAD BEEN TWO CHILDREN +6829-68769-0025-1885: THEN ROGERS WOULDN'T DO ANYTHING BUT LEAD HER AROUND AND WAIT UPON HER AND THE PLACE WENT TO RACK AND RUIN +2094-142345-0054-565: OH SIR SAID MISSUS POYSER RATHER ALARMED YOU WOULDN'T LIKE IT AT ALL +1284-1180-0023-235: YOU SEE I'VE LIVED ALL MY LIFE WITH UNC NUNKIE THE SILENT ONE AND THERE WAS NO ONE TO TELL ME ANYTHING +2300-131720-0038-610: HE FELT HE WAS UP AGAINST IT AND THAT PERHAPS ANOTHER KIND OF A JOB WOULD SUIT HIM BETTER +2830-3980-0038-835: GRACE BE TO YOU AND PEACE FROM GOD THE FATHER AND FROM OUR LORD JESUS CHRIST +5105-28240-0003-1420: SHE IS UNDER SAIL BUT SHE IS COUNT TIMASCHEFF'S YACHT HE WAS RIGHT +7127-75947-0007-2125: SHE THEN ROSE HUMMING THE AIR TO WHICH SHE WAS PRESENTLY GOING TO DANCE +8463-287645-0012-2439: SUBSTANTIALLY THIS WAS JACOB'S UNVARNISHED DESCRIPTION OF HIS MASTER AND MISTRESS +61-70970-0021-1765: THEY THEN RENEWED THEIR JOURNEY AND UNDER THE BETTER LIGHT MADE A SAFE CROSSING OF THE STABLE ROOFS +1995-1837-0024-505: FOR A WHILE SHE LAY IN HER CHAIR IN HAPPY DREAMY PLEASURE AT SUN AND BIRD AND TREE +8463-294828-0037-2499: DEPARTING FROM FIVE HUNDRED THOUSAND THROATS THREE CHEERS BURST FORTH IN SUCCESSION +5142-33396-0033-1495: YOU WOULD NOT EAT WITH US YOU CANNOT SAY NO TO HALF OF MY ALE I DRINK THIS TO YOUR HEALTH +2094-142345-0024-535: (MUNNY->MONEY) MY IRON'S (TWITE->TWICE) TOLD (PEASE->PLEASE) PUT IT DOWN TO WARM +260-123440-0012-775: IT'LL BE NO USE THEIR PUTTING THEIR HEADS DOWN AND SAYING COME UP AGAIN DEAR +8555-292519-0012-2559: THROUGH THE BLACK NIGHT RAIN HE SANG TO HER WINDOW BARS +5142-36377-0009-1540: PHILIP (LEFRANK->FRANK) THIS IS MY OVERLOOKER MISTER (JAGO->JAAGO) SAID THE OLD MAN FORMALLY PRESENTING US +672-122797-0040-1825: AND THE WHOLE NIGHT THE TREE STOOD STILL AND IN DEEP THOUGHT +2961-961-0013-910: (SOLON->SOLOMON) MARVELLED AND DESIRED TO BE INFORMED OF THE PARTICULARS +260-123286-0008-710: THEREFORE DON'T TALK TO ME ABOUT VIEWS AND PROSPECTS +1580-141083-0046-380: BUT I HAVE OCCASIONALLY DONE THE SAME THING AT OTHER TIMES +5683-32865-0003-1609: THEY ARE COUSINS YOU KNOW WE ARE ALL COUSINS +1089-134686-0035-35: HE HAD THE FAITH IN HIM THAT MOVES MOUNTAINS +8455-210777-0048-2404: WHAT WOULD BECOME OF YOUR GUN WERE I TO KIDNAP YOU +2830-3980-0018-815: I DID NOT THEN REALIZE THE IMPORTANCE OF THE MINISTRY +260-123440-0007-770: I ALMOST THINK I CAN REMEMBER FEELING A LITTLE DIFFERENT +3729-6852-0013-1040: YOU DO ME A GREAT (HONOUR->HONOR) +7176-92135-0007-2194: ANYHOW IT'S JOLLY EXCITING AND I CAN DO THE DIALOGUE ALL RIGHT +4507-16021-0028-1249: EVEN DIALECT LET THAT PASS +61-70970-0030-1774: SAVE ME MASTERS BUT YOU STARTLED ME RARELY +4446-2273-0036-1174: ALEXANDER (UNCLENCHED->CLENCHED) THE TWO HANDS AT HIS SIDES +4507-16021-0058-1279: THE FLAME IS THE ENEMY OF THE WING +4446-2273-0007-1145: THOSE FELLOWS ARE ALL VERY LOYAL EVEN MAINHALL +5142-33396-0057-1519: WE SUNK HIS SHIP AND MEN BUT HIM WE BROUGHT TO YOU +61-70968-0048-1729: AND HENRY MIGHT RETURN TO ENGLAND AT ANY MOMENT +3729-6852-0027-1054: THAT IS TRUE (BADAUDERIE->BADDELT GRI) +4446-2275-0015-1190: HE PULLED UP A WINDOW AS IF THE AIR WERE HEAVY +1580-141084-0037-425: WHEN I APPROACHED YOUR ROOM I EXAMINED THE WINDOW +5105-28240-0013-1430: NOTHING MORE THAN YOU KNOW YOURSELF +4446-2273-0006-1144: THEY ARE ALL SKETCHES MADE ABOUT THE (VILLA D'ESTE->VILIDESTA) YOU SEE +237-134493-0010-650: I NEVER SEE (LOU'S->LOSE) SCYTHE OVER HERE +3729-6852-0012-1039: I SHALL CALL YOU (ESPRIT->A SPREE) +7176-92135-0037-2224: THEN IS THE TIME TO INTRODUCE A MEAL ON THE STAGE +5683-32866-0015-1639: WHAT I SAY IS ALTOGETHER ON YOUR OWN ACCOUNT +4970-29095-0019-1324: AND THEN THERE WAS BROAD STREET +260-123288-0021-755: THE WAVES RISE ABOVE OUR HEADS +2830-3980-0033-830: NOT ALL THE (GALATIANS->GLACIERS) HAD BECOME PERVERTED +6829-68769-0019-1879: SORRY WE HAVEN'T ANY RECEPTION ROOM IN THE JAIL +4446-2275-0030-1205: YES HILDA I KNOW THAT HE SAID SIMPLY +2094-142345-0019-530: COMB THE WOOL FOR THE (WHITTAWS->WIDOWS) INDEED +5683-32866-0000-1624: MISS LAKE DECLINED THE CARRIAGE TO NIGHT +2094-142345-0004-515: AND WHAT THROUGH THE LEFT HAND WINDOW +4446-2275-0045-1220: WE'VE TORTURED EACH OTHER ENOUGH FOR (TONIGHT->TO NIGHT) +4446-2275-0044-1219: (*->AH) DON'T CRY DON'T CRY HE WHISPERED +4446-2275-0014-1189: I CAN'T STAND SEEING YOU MISERABLE +2830-3980-0003-800: PAUL CAME LATER AND IS BENEATH US +7021-79730-0000-2029: THE THREE MODES OF MANAGEMENT +61-70968-0018-1699: SO I DID PUSH THIS FELLOW +4446-2271-0002-1115: IT'S TREMENDOUSLY WELL PUT ON TOO +7021-85628-0014-2074: HE ONLY SHOOK HIS HEAD +8455-210777-0063-2419: TO DAY I SHOUTED +5142-33396-0027-1489: HE ACTS AS THOUGH HE HAD NOT EXPECTED US +4507-16021-0044-1265: LOOK CLOSELY AT LIFE +7127-75946-0001-2089: AH VERY WELL +7021-79740-0005-2044: I AM VERY GLAD +5142-33396-0013-1475: HE IS BUT A BOY THE (MEN->MAN) SAID +1580-141084-0022-410: AND ONE MORE THIS MORNING +672-122797-0049-1834: (SQUEAK SQUEAK->SQUICK QUICK) +1580-141084-0007-395: TO MORROW IS THE EXAMINATION +121-127105-0021-155: WON'T YOU TELL DOUGLAS +7127-75947-0031-2149: YOU ARE POSITIVE THEN +121-123852-0001-125: AY ME +1221-135766-0006-177: THEY WERE NOW ILLUMINATED BY THE MORNING RADIANCE OF A YOUNG CHILD'S DISPOSITION BUT LATER IN THE DAY OF EARTHLY EXISTENCE MIGHT BE PROLIFIC OF THE STORM AND WHIRLWIND +237-134493-0002-642: FROM THE NORWEGIAN GRAVEYARD ONE LOOKS OUT OVER A VAST (CHECKER->CHEQUER) BOARD MARKED OFF IN SQUARES OF WHEAT AND CORN LIGHT AND DARK DARK AND LIGHT +260-123288-0013-747: THE PILED UP VAPOURS (CONDENSE->CONDENSED) INTO WATER AND THE AIR PUT INTO VIOLENT ACTION TO SUPPLY THE VACUUM LEFT BY THE CONDENSATION OF THE (MISTS->MIST) ROUSES ITSELF INTO A WHIRLWIND +121-123859-0003-132: LOVE IS A BABE THEN MIGHT I NOT SAY SO TO GIVE FULL GROWTH TO THAT WHICH STILL DOTH GROW +2300-131720-0010-582: IT COULD NOT BE USED FOR ELECTROPLATING OR DEPOSITION NOR COULD IT CHARGE STORAGE BATTERIES ALL OF WHICH ARE EASILY WITHIN THE ABILITY OF THE DIRECT CURRENT +237-134500-0013-672: MARIE PICKED CHERRIES AND SANG SOFTLY TO HERSELF STRIPPING ONE GLITTERING BRANCH AFTER ANOTHER SHIVERING WHEN SHE CAUGHT A SHOWER OF RAINDROPS ON HER NECK AND HAIR +3570-5694-0007-927: IT HAS EVEN HAPPENED THAT THE NAME FOR CERTAIN DISEASED CONDITIONS OF THE BODY ARISING FROM SUCH AN ORIGIN HAS PASSED INTO EVERYDAY SPEECH AS A SYNONYM FOR NOBLE OR GENTLE +3570-5695-0014-957: AMONG THE COUNTRY POPULATION ITS PLACE IS TO SOME EXTENT TAKEN BY SAVINGS AND HOME COMFORTS KNOWN THROUGH THE MEDIUM OF (NEIGHBORHOOD->NEIGHBOURHOOD) GOSSIP SUFFICIENTLY TO SERVE THE LIKE GENERAL PURPOSE OF PECUNIARY REPUTE +61-70970-0038-1782: THE OLD SERVANT TOLD HIM QUIETLY AS THEY CREPT BACK TO GAMEWELL THAT THIS (PASSAGE WAY->PASSAGEWAY) LED FROM THE HUT IN THE (PLEASANCE->PLEASANTS) TO SHERWOOD AND THAT (GEOFFREY->JEFFREY) FOR THE TIME WAS HIDING WITH THE OUTLAWS IN THE FOREST +1284-134647-0001-268: THE EDICT OF MILAN THE GREAT CHARTER OF TOLERATION HAD CONFIRMED TO EACH INDIVIDUAL OF THE ROMAN WORLD THE PRIVILEGE OF CHOOSING AND PROFESSING HIS OWN RELIGION +2094-142345-0057-568: I (KNOW HIS->KNOW'S) FARM IS IN BETTER ORDER THAN ANY OTHER WITHIN TEN MILES OF US AND AS FOR THE KITCHEN HE ADDED SMILING I DON'T BELIEVE THERE'S ONE IN THE KINGDOM TO BEAT IT +7127-75947-0039-2157: IN FACT THE SOUND OF MADAME'S AND THE QUEEN'S CARRIAGES COULD BE HEARD IN THE DISTANCE UPON THE HARD DRY GROUND OF THE ROADS FOLLOWED BY THE (MOUNTED->MOUNTAIN) CAVALIERS +5683-32866-0024-1648: I SHAN'T TROUBLE YOU ABOUT MY TRAIN OF THOUGHTS OR FANCIES BUT I BEGAN TO FEEL VERY LIKE A GENTLEMAN IN A GHOST STORY WATCHING EXPERIMENTALLY IN A HAUNTED CHAMBER +4992-41797-0013-1378: SHE MAKES EFFORT AFTER EFFORT TREMBLING WITH EAGERNESS AND WHEN SHE FAILS TO REPRODUCE WHAT SHE SEES SHE WORKS HERSELF INTO A FRENZY OF GRIEF AND DISAPPOINTMENT +4970-29095-0027-1332: IT'S SUCH A CRUSH AT THE YEARLY MEETING AT ARCH STREET AND THEN THERE'S THE ROW OF SLEEK LOOKING YOUNG MEN WHO (LINE->LIE IN) THE CURBSTONE AND STARE AT US AS WE COME OUT +7176-92135-0031-2218: AND THERE YOU ARE YOU WILL OF COURSE APPRECIATE THAT THE UNFINISHED SENTENCES NOT ONLY SAVE TIME BUT ALSO MAKE THE MANOEUVRING VERY MUCH MORE NATURAL +8555-284447-0000-2501: THEN HE RUSHED (DOWN STAIRS->DOWNSTAIRS) INTO THE COURTYARD SHOUTING LOUDLY FOR HIS SOLDIERS AND THREATENING TO PATCH EVERYBODY IN HIS DOMINIONS IF THE SAILORMAN WAS NOT RECAPTURED +2094-142345-0010-521: (HETTY SORREL->HETTY'S SYREL) OFTEN TOOK THE OPPORTUNITY WHEN HER AUNT'S BACK WAS TURNED OF LOOKING AT THE PLEASING REFLECTION OF HERSELF IN THOSE POLISHED (SURFACES->SERVICES) FOR THE OAK TABLE WAS USUALLY TURNED UP LIKE A SCREEN AND WAS MORE FOR ORNAMENT THAN FOR USE AND SHE COULD SEE HERSELF SOMETIMES IN THE GREAT ROUND PEWTER DISHES THAT WERE RANGED ON THE SHELVES ABOVE THE LONG DEAL DINNER TABLE OR IN THE HOBS OF THE GRATE WHICH ALWAYS SHONE LIKE JASPER +3575-170457-0046-1016: A GOOD NEIGHBOUR OF THE BRONTES A CLEVER INTELLIGENT YORKSHIRE WOMAN WHO KEEPS A (DRUGGIST'S->DRUGGIST) SHOP IN (HAWORTH->HAWWORTH) AND FROM HER OCCUPATION HER EXPERIENCE (AND->IN) EXCELLENT SENSE HOLDS THE POSITION OF VILLAGE DOCTRESS AND NURSE AND AS SUCH HAS BEEN A FRIEND IN MANY A TIME OF TRIAL AND SICKNESS AND DEATH IN THE HOUSEHOLDS ROUND TOLD ME A CHARACTERISTIC LITTLE INCIDENT CONNECTED WITH TABBY'S FRACTURED LEG +1995-1836-0010-476: THE (VANDERPOOLS->VAN DERPOOLS) WERE SURE OF THIS AND THE ENGLISHMAN INSTANCING INDIA BECAME QUITE ELOQUENT MISSUS (GREY->GRAY) WAS MYSTIFIED BUT HARDLY DARED ADMIT IT THE GENERAL TREND OF THE CONVERSATION SEEMED TO BE THAT MOST INDIVIDUALS NEEDED TO BE SUBMITTED TO THE SHARPEST SCRUTINY BEFORE BEING ALLOWED MUCH EDUCATION AND AS FOR THE LOWER RACES IT WAS SIMPLY CRIMINAL TO OPEN SUCH USELESS OPPORTUNITIES TO THEM +4507-16021-0020-1241: WE MAY BE STOPPED THE FACT MAY BE PUT TO US IN GENERAL TERMS WHICH IS ONE WAY OF ATTENUATING IT WE MAY BE TOLD THAT ALL TRADES PROFESSIONS IT MAY BE ADDED ALL THE ACCIDENTS OF THE SOCIAL HIERARCHY AND ALL FORMS OF INTELLIGENCE HAVE THEIR OWN SLANG +4970-29093-0005-1286: SOMETIMES HE THOUGHT HE WOULD LIKE TO STAND IN A CONSPICUOUS PULPIT AND HUMBLY PREACH THE GOSPEL OF REPENTANCE AND IT EVEN CROSSED HIS MIND THAT IT WOULD BE NOBLE TO GIVE HIMSELF TO A MISSIONARY LIFE TO SOME BENIGHTED REGION WHERE THE DATE PALM GROWS AND THE NIGHTINGALE'S VOICE IS IN TUNE AND THE (BUL BUL->BULBUL) SINGS ON THE OFF NIGHTS +1188-133604-0008-72: FOR BELIEVE ME THE FINAL PHILOSOPHY OF ART CAN ONLY RATIFY THEIR OPINION THAT THE BEAUTY OF A COCK ROBIN IS TO BE RED AND OF A GRASS PLOT TO BE GREEN AND THE BEST SKILL OF ART IS IN INSTANTLY SEIZING ON THE MANIFOLD DELICIOUSNESS OF LIGHT WHICH YOU CAN ONLY SEIZE BY PRECISION OF INSTANTANEOUS TOUCH +1089-134686-0028-28: THE RETREAT WILL BEGIN ON WEDNESDAY AFTERNOON IN (HONOUR->HONOR) OF SAINT FRANCIS (XAVIER->ZAVIOR) WHOSE FEAST DAY IS SATURDAY +2961-961-0016-913: I WILL BRIEFLY DESCRIBE THEM TO YOU AND YOU SHALL READ THE ACCOUNT OF THEM AT YOUR LEISURE IN THE SACRED REGISTERS +3575-170457-0003-973: SURELY IT MUST BE BECAUSE WE ARE IN DANGER OF LOVING EACH OTHER TOO WELL OF LOSING SIGHT OF THE CREATOR (IN->AND) IDOLATRY OF THE CREATURE +1995-1826-0009-448: YOU OUGHT TO KNOW JOHN IF I TEACH NEGROES I'LL SCARCELY SEE MUCH OF PEOPLE IN MY OWN CLASS +7176-92135-0001-2188: IN SHORT HE BECOMES A PROMINENT FIGURE IN LONDON SOCIETY AND IF HE IS NOT CAREFUL SOMEBODY WILL SAY SO +6829-68771-0019-1933: SHE WAS DRESSED IN THE REGULATION COSTUME OF THE MAIDS AT ELMHURST A PLAIN BLACK GOWN WITH (*->A) WHITE APRON AND CAP +7021-79740-0013-2052: PUT THESE PLAYTHINGS ALL AWAY QUICK AND CAREFULLY AND WE WILL NOT LET THEM KNOW (ANY THING->ANYTHING) ABOUT YOUR LEAVING THEM OUT +6930-76324-0021-1993: SURFACE DUST AT LEAST HAD BEEN REMOVED AND THE FINE OLD FURNITURE GAVE A HINT OF ITS REAL ELEGANCE AND POLISH +7127-75947-0024-2142: LOOK YONDER DO YOU NOT SEE THE MOON SLOWLY RISING SILVERING THE TOPMOST BRANCHES OF THE CHESTNUTS AND THE OAKS +121-127105-0029-163: THERE WERE PLENTY OF PEOPLE TO HELP BUT OF COURSE THE YOUNG LADY WHO SHOULD GO DOWN AS GOVERNESS WOULD BE IN SUPREME AUTHORITY +121-121726-0009-118: HOTEL A PLACE WHERE A GUEST OFTEN GIVES UP GOOD DOLLARS FOR POOR QUARTERS +61-70970-0024-1768: THEY MOVED THEREAFTER CAUTIOUSLY ABOUT THE HUT GROPING BEFORE AND ABOUT THEM TO FIND SOMETHING TO SHOW THAT WARRENTON HAD FULFILLED HIS MISSION +7021-85628-0008-2068: FOR LIKE AS NOT THEY MUST HAVE THOUGHT HIM A PRINCE WHEN THEY SAW HIS FINE CAP +7729-102255-0029-2262: TEN DAYS WERE CONSUMED IN THESE NEGOTIATIONS BUT THE SPIRIT OF VENGEANCE REFUSED TO YIELD +1580-141083-0009-343: THE ALTERNATIVE WAS THAT (SOMEONE->SOME ONE) PASSING HAD OBSERVED THE KEY IN THE DOOR HAD KNOWN THAT I WAS OUT AND HAD ENTERED TO LOOK AT THE PAPERS +260-123286-0016-718: THESE THOUGHTS AGITATED ME ALL DAY AND MY IMAGINATION SCARCELY CALMED DOWN AFTER SEVERAL HOURS SLEEP +4970-29093-0007-1288: IT IS SUCH A NOBLE AMBITION THAT IT IS A PITY IT HAS USUALLY SUCH A SHALLOW FOUNDATION +61-70968-0027-1708: ROBIN AND THE LITTLE TUMBLER BETWEEN THEM TRIED TO FORCE THE SQUIRE TO STAND BACK AND VERY VALIANTLY DID THESE TWO COMPORT THEMSELVES +237-126133-0014-628: ASKED PHRONSIE IN INTENSE INTEREST SLIPPING DOWN OUT OF POLLY'S ARMS AND CROWDING UP CLOSE TO JASPER'S SIDE +7176-92135-0015-2202: AND SO ON TILL YOU GET TO THE END WHEN OPHELIA MIGHT SAY AH YES OR SOMETHING NON COMMITTAL OF THAT SORT +1995-1837-0027-508: ON SHE HURRIED UNTIL SWEEPING DOWN TO THE LAGOON AND THE ISLAND LO THE COTTON LAY BEFORE HER +61-70970-0039-1783: HE IMPLORES US TO BE DISCREET AS THE GRAVE IN THIS MATTER FOR IN SOOTH HIS LIFE IS IN THE HOLLOW OF OUR HANDS +8455-210777-0011-2367: I DID NOT MEAN SAID CAPTAIN (BATTLEAX->BATTLE AX) TO TOUCH UPON PUBLIC SUBJECTS AT SUCH A MOMENT AS THIS +1188-133604-0039-103: IT HAS NO BEAUTY WHATSOEVER NO SPECIALTY OF PICTURESQUENESS AND ALL ITS LINES ARE CRAMPED AND POOR +4992-23283-0019-1363: I WILL MAKE NO UNJUST USE OF WHAT I KNOW HE REPLIED WITH FIRMNESS I BELIEVE YOU MY LORD +5105-28233-0010-1416: UNLIKE HIS MASTER HE MADE NO PRETENSION TO ANY GIFT OF POETIC POWER BUT HIS INEXHAUSTIBLE MEMORY MADE HIM A LIVING ENCYCLOPAEDIA AND FOR HIS STOCK OF ANECDOTES AND (TROOPER'S->TROOPERS) TALES HE WAS MATCHLESS +6930-75918-0005-1956: THE COUNT HAD THROWN HIMSELF BACK ON HIS SEAT LEANING HIS SHOULDERS AGAINST THE PARTITION OF THE TENT AND REMAINED THUS HIS FACE BURIED IN HIS HANDS WITH HEAVING CHEST AND RESTLESS LIMBS +8555-284447-0010-2511: CONTROL YOURSELVES MY DEARS REPLIED THE BOOLOOROO THE WORST PUNISHMENT I KNOW HOW TO INFLICT ON (ANYONE->ANY ONE) THIS PRISONER IS ABOUT TO SUFFER (YOU'LL->YOU WILL) SEE A VERY PRETTY PATCHING MY ROYAL DAUGHTERS +6930-81414-0000-2001: NO WORDS WERE SPOKEN NO LANGUAGE WAS UTTERED SAVE THAT OF WAILING AND HISSING AND THAT SOMEHOW WAS INDISTINCT AS IF IT EXISTED IN FANCY AND NOT IN REALITY +4077-13754-0006-1102: WHAT THE LATTER DAY SAINTS CALL CELESTIAL MARRIAGE IS CHARACTERISTIC OF THE CHURCH AND IS IN VERY GENERAL (PRACTISE->PRACTICE) BUT OF CELESTIAL MARRIAGE PLURALITY OF WIVES WAS AN INCIDENT NEVER AN ESSENTIAL +1188-133604-0003-67: MY FIRST AND PRINCIPAL REASON WAS THAT THEY ENFORCED BEYOND ALL RESISTANCE ON ANY STUDENT WHO MIGHT ATTEMPT TO COPY THEM THIS METHOD OF LAYING PORTIONS OF DISTINCT HUE SIDE BY SIDE +6829-68771-0012-1926: THIS WAS THE FIRST OCCASION WITHIN A GENERATION WHEN SUCH AN ENTERTAINMENT HAD BEEN GIVEN AT ELMHURST AND THE ONLY (ONE->WHEN) WITHIN THE MEMORY OF MAN WHERE THE NEIGHBORS AND COUNTRY PEOPLE HAD BEEN (*->THE) INVITED GUESTS +3575-170457-0042-1012: UNFORTUNATELY THE FRACTURE COULD NOT BE SET TILL SIX O'CLOCK THE NEXT MORNING AS NO SURGEON WAS TO BE HAD BEFORE THAT TIME AND SHE NOW LIES AT OUR HOUSE IN A VERY DOUBTFUL AND DANGEROUS STATE +8555-284449-0000-2526: SO THEY WERE QUITE WILLING TO OBEY THE ORDERS OF THEIR GIRL QUEEN AND IN A SHORT TIME THE (BLASTS->BLAST) OF TRUMPETS AND ROLL OF DRUMS AND CLASHING OF CYMBALS TOLD TROT AND CAP'N BILL THAT THE BLUE BANDS HAD ASSEMBLED BEFORE THE PALACE +4077-13751-0012-1086: THE LIEUTENANT GOVERNOR LILBURN W BOGGS AFTERWARD GOVERNOR WAS A PRONOUNCED MORMON HATER AND THROUGHOUT THE PERIOD OF THE TROUBLES HE (MANIFESTED->MANIFESTS HIS) SYMPATHY WITH THE PERSECUTORS +4446-2271-0004-1117: DO YOU KNOW ALEXANDER (MAINHALL->MAYHALL) LOOKED WITH PERPLEXITY UP INTO THE TOP OF THE HANSOM AND RUBBED HIS PINK CHEEK WITH HIS GLOVED FINGER DO YOU KNOW I SOMETIMES THINK OF TAKING TO CRITICISM SERIOUSLY MYSELF +4970-29093-0001-1282: TO THE YOUNG AMERICAN HERE OR ELSEWHERE THE PATHS TO FORTUNE ARE INNUMERABLE AND ALL OPEN THERE IS INVITATION IN THE AIR AND SUCCESS IN ALL HIS WIDE HORIZON +1284-1181-0017-262: THE WIZARD OF OZ WHO USED TO BE A HUMBUG AND KNEW NO MAGIC AT ALL HAS BEEN TAKING LESSONS OF GLINDA AND I'M TOLD HE IS GETTING TO BE A PRETTY GOOD WIZARD BUT HE IS MERELY THE ASSISTANT OF THE GREAT SORCERESS +1188-133604-0018-82: IN ALL EARLY GOTHIC ART INDEED YOU WILL FIND FAILURE OF THIS KIND ESPECIALLY DISTORTION AND RIGIDITY WHICH ARE IN MANY RESPECTS PAINFULLY TO BE COMPARED WITH THE SPLENDID REPOSE OF CLASSIC ART +260-123440-0008-771: I'LL TRY IF I KNOW ALL THE THINGS I USED TO KNOW +6930-81414-0015-2016: I DO NOT KNOW I AM DAZED BEWILDERED +8463-287645-0009-2436: I NEVER KNEW OF BUT ONE MAN WHO COULD EVER PLEASE HIM +260-123288-0022-756: THEY SEEM TO BE WE ARE LOST BUT I AM NOT SURE +4446-2271-0003-1116: IT'S BEEN ON ONLY TWO WEEKS AND I'VE BEEN HALF A DOZEN TIMES ALREADY +5683-32879-0001-1656: WELL SHE WAS BETTER THOUGH SHE HAD HAD A BAD NIGHT +2094-142345-0035-546: BUT NOT MORE THAN WHAT'S IN THE BIBLE AUNT SAID DINAH +6829-68769-0051-1911: THERE WAS A GRIM SMILE OF AMUSEMENT ON HIS SHREWD FACE +1320-122612-0016-291: RUN BACK UNCAS AND BRING ME THE SIZE OF THE SINGER'S FOOT +5683-32866-0001-1625: AND HE ADDED SOMETHING STILL LESS COMPLIMENTARY +2830-3980-0049-846: EMBRACE HIM AND FORGET ABOUT THE NATURE OF GOD +3575-170457-0056-1026: I DOUBT WHETHER BRANWELL WAS MAINTAINING HIMSELF AT THIS TIME +8463-287645-0008-2435: AS USUAL NOTHING WAS DONE IN THE WAY OF PUNISHMENT +4970-29093-0015-1296: YOU CAN BEGIN BY CARRYING A ROD AND PUTTING DOWN THE FIGURES +8555-284447-0009-2510: (MORNIN->MORNING) GIRLS HOPE (YE->YOU) FEEL AS WELL AS (YE->YOU) LOOK +672-122797-0005-1790: OH THAT MADE HIM SO ANGRY +1089-134686-0036-36: A GREAT SAINT SAINT FRANCIS (XAVIER->ZAVIOUR) +5142-33396-0014-1476: THIRTY MEN ONE AFTER ANOTHER RAISED THEIR HORNS AND SAID +7127-75947-0002-2120: DO YOU THINK SO SHE REPLIED WITH INDIFFERENCE +5142-33396-0058-1520: A ROBBER VIKING SAID THE KING AND (*->HE) SCOWLED AT ME +1995-1837-0020-501: THE YEARS OF THE DAYS OF HER DYING WERE TEN +8230-279154-0003-2315: AND WHAT SORT OF EVIDENCE IS LOGICALLY POSSIBLE +8455-210777-0020-2376: OH YES SAID JACK (AND->THEN) I'M NOWHERE +61-70970-0032-1776: ENQUIRED ROBIN WITH HIS (SUSPICIONS->SUSPICION) STILL UPON HIM +7176-92135-0039-2226: TEA PLEASE MATTHEWS BUTLER IMPASSIVELY +5105-28240-0014-1431: ARE YOU CERTAIN THAT THIS IS THE MEDITERRANEAN +4446-2275-0031-1206: I UNDERSTAND BARTLEY I WAS WRONG +260-123286-0024-726: THERE'S A WHALE A WHALE CRIED THE PROFESSOR +672-122797-0065-1850: NOW THAT TOO IS OVER +4970-29093-0000-1281: YOU'LL NEVER DIG IT OUT OF THE ASTOR LIBRARY +7021-85628-0000-2060: BUT (ANDERS->ANDREWS) CARED NOTHING ABOUT THAT +2830-3980-0034-831: THESE MEANS CANNOT BE CONTAMINATED +1580-141084-0038-426: NO ONE LESS THAN THAT WOULD HAVE A CHANCE +8463-294828-0018-2480: WE'LL DEAL WITH THEM LATER WHAT +6829-68769-0050-1910: BETH UNEASY AT HIS SILENCE NUDGED HIM +672-122797-0035-1820: BUT I SHALL TELL ONLY ONE STORY +2830-3980-0064-861: HOW MAY WE OBTAIN REMISSION OF OUR SINS +6829-68769-0035-1895: IT WON'T BE MUCH BUT I'M GRATEFUL TO FIND A FRIEND +7021-79730-0002-2031: BY REASON AND AFFECTION +7127-75947-0017-2135: WHAT ALREADY HERE THEY SAID TO HER +1995-1837-0005-486: SHE WAS SO STRANGE AND HUMAN A CREATURE +1221-135766-0015-186: IF SPOKEN TO SHE WOULD NOT SPEAK AGAIN +1580-141083-0047-381: DID YOU LOOK AT THESE PAPERS ON THE TABLE +4507-16021-0000-1221: CHAPTER ONE ORIGIN +61-70970-0017-1761: REST AND BE STILL UNTIL I WARN YOU +2094-142345-0020-531: THAT'S WHAT YOU'D LIKE TO BE DOING IS IT +5142-36377-0019-1550: WHEN I ADDRESSED HIM HE ANSWERED CONSTRAINEDLY +237-134500-0007-666: WE NEVER HAD SO MANY OF THEM IN HERE BEFORE +1580-141083-0017-351: SO IT SEEMS TO ME +8555-284449-0014-2540: THE FORMER BOOLOOROO GROANED +5142-33396-0029-1491: BRING IN THE TABLE WE ARE HUNGRY +6829-68771-0026-1940: ELIZA PARSONS SHOOK HER HEAD +5683-32879-0015-1670: YES SAID RACHEL +237-134493-0012-652: I GET WET TO MY KNEES WHEN I GO DOWN TO PICK CHERRIES +6829-68771-0028-1942: SHE EVEN SEEMED MILDLY AMUSED AT THE ATTENTION SHE ATTRACTED +8463-294828-0034-2496: WE'LL BE QUITE COMFORTABLE HERE I TOLD CONSEIL +2830-3980-0020-817: THIS IS NO SINFUL PRIDE IT IS HOLY PRIDE +908-157963-0024-2587: IMAGE OF WEAKNESS ART THOU BUT A WORM +61-70970-0033-1777: TRULY SUCH A HORSE (SHOULD->WOULD) BE WORTH MUCH IN NOTTINGHAM FAIR +1089-134691-0015-53: WORDS WAS IT THEIR (COLOURS->COLORS) +5683-32865-0006-1612: AT DINNER LAKE WAS EASY AND AMUSING +2830-3980-0035-832: THEY REMAIN DIVINE REGARDLESS OF MEN'S OPINION +7176-88083-0008-2167: IN DESPAIR HE HURLED HIMSELF DOWNWARD TOO SOON +4446-2275-0032-1207: BUT I DIDN'T KNOW YOU'VE ONLY TO TELL ME NOW +1284-1181-0018-263: IT TRULY IS ASSERTED THE MAGICIAN +61-70968-0051-1732: BEG ME A ROOM OF THE SHERIFF CHILD QUICKLY +260-123440-0009-772: I SHALL NEVER GET TO TWENTY AT THAT RATE +1995-1826-0003-442: BETTER GO HE HAD COUNSELLED SENTENTIOUSLY +1995-1837-0021-502: THE HOPE AND DREAM OF HARVEST WAS UPON THE LAND +237-134500-0023-682: IT WOULD SERVE YOU ALL RIGHT IF SHE WALKED OFF WITH (CARL->KARL) +6930-76324-0000-1972: GOLIATH MAKES ANOTHER DISCOVERY +7127-75947-0034-2152: IT SEEMS THE KING WILL NOT CONSENT TO IT +1995-1837-0006-487: THE WORLD WAS WATER VEILED IN MISTS +61-70968-0006-1687: BUT THEN THE PICTURE WAS GONE AS QUICKLY AS IT CAME +61-70968-0036-1717: GEORGE MONTFICHET WILL NEVER FORGET THIS DAY +5683-32866-0003-1627: IN THE MEANTIME I HAD FORMED A NEW IDEA OF HER +1221-135767-0015-202: YE MAY NOT SEE HIS WORSHIP NOW +3729-6852-0030-1057: IS IT BETTER THAN ANYWHERE ELSE +1580-141083-0048-382: HOW CAME YOU TO LEAVE THE KEY IN THE DOOR +5142-33396-0030-1492: THE THRALLS WERE BRINGING IN A GREAT POT OF MEAT +121-127105-0008-142: HE HUNG FIRE AGAIN A WOMAN'S +8455-210777-0066-2422: THEY OF COURSE MUST ALL BE ALTERED +8455-210777-0021-2377: BUT I MEAN TO HAVE MY INNINGS BEFORE LONG +1580-141083-0019-353: ABOVE WERE THREE STUDENTS ONE ON EACH STORY +61-70968-0021-1702: SURELY WE CAN SUBMIT WITH GOOD GRACE +1580-141083-0018-352: NOW MISTER (SOAMES->SOLMES) AT YOUR DISPOSAL +7127-75946-0019-2107: YES IT IS SUPPRESSED +5142-33396-0060-1522: TAKE HIM OUT (THORKEL->TURKLE) AND LET HIM TASTE YOUR SWORD +3729-6852-0015-1042: AT YOUR SERVICE SIR +260-123286-0010-712: SUNDAY AUGUST SIXTEENTH +6829-68769-0037-1897: I'VE SEEN LOTS OF THAT KIND IN MY DAY +4446-2275-0017-1192: BUT IT'S WORSE NOW IT'S UNBEARABLE +8463-294828-0005-2467: CONSEIL WAS MY MANSERVANT +237-134500-0038-697: AND ANYHOW THERE'S NOTHING TO UNDERSTAND +260-123288-0023-757: HE NODS HIS CONSENT +5142-33396-0045-1507: THIS IS OUR LAST FEAST WITH YOU I SAID +8463-294828-0004-2466: BUT NOW NOTHING COULD HOLD ME BACK +121-127105-0009-143: SHE HAS BEEN DEAD THESE TWENTY YEARS +4970-29093-0016-1297: NO (ITS->IT'S) NOT TOO SOON +6930-81414-0016-2017: BUT THAT IS (KAFFAR'S->KAFFIR'S) KNIFE +7176-92135-0010-2197: (HAM->HIM) TO BE OR NOT TO BE +7021-85628-0017-2077: SO IT IS SAID (ANDERS->ANDREWS) +1089-134691-0000-38: HE COULD WAIT NO LONGER +5142-33396-0000-1462: AT ANOTHER TIME (HARALD->HAROLD) ASKED +672-122797-0037-1822: THAT'S THE WAY OF THE WORLD +8555-284447-0011-2512: SUPPOSE IT'S A FRIEND +7127-75947-0004-2122: EXPLAIN YOURSELF +8230-279154-0024-2336: THE FIRST OF OUR VAGUE BUT INDUBITABLE DATA IS THAT THERE IS KNOWLEDGE OF THE PAST +5683-32865-0010-1616: I WAS THINKING IT'S VERY LIKE THE ACE OF HEARTS ANSWERED THE CAPTAIN SOFTLY SMILING ON +260-123288-0027-761: A SUFFOCATING SMELL OF NITROGEN FILLS THE AIR IT ENTERS THE THROAT IT FILLS THE LUNGS +1284-1180-0009-221: THEN THEY STARTED ON AGAIN AND TWO HOURS LATER CAME IN SIGHT OF THE HOUSE OF DOCTOR PIPT +8230-279154-0023-2335: IN THIS CASE AS IN MOST OTHERS WHAT MAY BE TAKEN AS CERTAIN IN ADVANCE IS RATHER VAGUE +5105-28240-0019-1436: MY YACHT IS AT YOUR SERVICE SIR EVEN SHOULD YOU REQUIRE TO MAKE A TOUR ROUND THE WORLD +8224-274384-0007-2305: HAVE MERCY LORD ON ME I PRAY FOR MEN WOULD ME DEVOUR +8455-210777-0040-2396: THEN SAID SIR FERDINANDO THERE IS NOTHING FOR IT BUT THAT (HE->WE) MUST TAKE YOU WITH HIM +6930-76324-0004-1976: BUT JOYCE HAD NOT BEEN LISTENING ALL AT ONCE SHE PUT DOWN HER CANDLE ON THE TABLE AND FACED HER COMPANION +5105-28240-0004-1421: IF THE COUNT WERE ON BOARD A STRANGE FATALITY WAS BRINGING HIM TO THE PRESENCE OF HIS RIVAL +61-70970-0037-1781: HIS TONES RANG PLEASANTLY ON WARRENTON'S EARS AND FORTHWITH A GOOD FELLOWSHIP WAS HERALDED BETWEEN THEM +8455-210777-0070-2426: AND THIS PLAN WAS ADOPTED TOO IN ORDER TO EXTRACT FROM ME A PROMISE THAT I WOULD DEPART IN PEACE +7729-102255-0027-2260: FOOTNOTE SUMNER TO SHANNON MAY TWELFTH EIGHTEEN FIFTY SIX +5639-40744-0007-1571: MEANWHILE (RODOLFO->RUDOLPHO) HAD (LEOCADIA->LOCALIA) SAFE IN HIS CUSTODY AND IN HIS OWN APARTMENT +1580-141083-0037-371: WHAT COULD HE DO HE CAUGHT UP EVERYTHING WHICH WOULD BETRAY HIM AND HE RUSHED INTO YOUR BEDROOM TO CONCEAL HIMSELF +8455-210777-0039-2395: I CAN ASSURE YOU HE HAS NOT EVEN ALLOWED ME TO SEE THE TRIGGER SINCE I HAVE BEEN ON BOARD +4970-29093-0020-1301: WHY IT'S IN MISSOURI SOMEWHERE ON THE FRONTIER I THINK WE'LL GET A MAP +2830-3980-0069-866: THE VICIOUS CHARACTER OF SIN IS BROUGHT OUT BY THE WORDS WHO GAVE HIMSELF FOR OUR SINS +7176-88083-0026-2185: THE DRAG UPON HIS BEAK AND THE LIGHT CHECK UPON HIS WINGS WERE INEXPLICABLE TO HIM AND APPALLING +4446-2271-0008-1121: IRENE (BURGOYNE->BURGOIN) ONE OF HER FAMILY TOLD ME IN CONFIDENCE THAT THERE WAS A ROMANCE SOMEWHERE BACK IN THE BEGINNING +4992-41797-0011-1376: WHATEVER APPEALED TO HER SENSE OF BEAUTY WAS STRAIGHTWAY TRANSFERRED TO PAPER OR CANVAS +4446-2273-0028-1166: NONSENSE OF COURSE I CAN'T REALLY SING EXCEPT THE WAY MY MOTHER AND GRANDMOTHER DID BEFORE ME +7021-85628-0021-2081: HE STILL HELD ON TO IT WITH BOTH HANDS AS HE RUSHED INTO HIS MOTHER'S COTTAGE +1284-1180-0024-236: THAT IS ONE REASON YOU ARE OJO THE UNLUCKY SAID THE WOMAN IN A SYMPATHETIC TONE +908-157963-0027-2590: AND LAY ME DOWN IN THY COLD BED AND LEAVE MY SHINING LOT +672-122797-0056-1841: SAID THE FIR TREE THINKING OVER WHAT HE HAD HIMSELF RELATED +2830-3980-0039-836: THE TERMS OF GRACE AND PEACE ARE COMMON TERMS WITH PAUL AND ARE NOW PRETTY WELL UNDERSTOOD +7176-92135-0044-2231: BUT IT IS (THE->A) CIGARETTE WHICH CHIEFLY HAS BROUGHT THE MODERN DRAMA TO ITS PRESENT STATE OF PERFECTION +7127-75947-0023-2141: IT IS TOO DIFFICULT REPLIED MADEMOISELLE DE (TONNAY CHARENTE->TUNNICHAVENT) LAUGHING LOUDLY +4077-13751-0017-1091: OH WHAT A RECORD TO READ WHAT A PICTURE TO GAZE UPON HOW AWFUL THE FACT +8555-284449-0020-2546: THE COMBINED BANDS OF BOTH THE COUNTRIES PLAYED THE MUSIC AND A FINE SUPPER WAS SERVED +61-70968-0011-1692: HE GAVE WAY TO THE OTHERS VERY READILY AND RETREATED UNPERCEIVED BY THE SQUIRE AND MISTRESS FITZOOTH TO THE REAR OF THE TENT +8455-210777-0041-2397: THERE CAME UPON ME A SUDDEN SHOCK WHEN I HEARD THESE WORDS WHICH EXCEEDED ANYTHING WHICH I HAD YET FELT +5683-32865-0011-1617: WHEREUPON LAKE LAUGHED QUIETLY STILL LOOKING ON THE ACE OF HEARTS WITH HIS SLY EYES +6930-76324-0020-1992: YET LITTLE AS IT WAS IT HAD ALREADY MADE A VAST DIFFERENCE IN THE ASPECT OF THE ROOM +260-123440-0015-778: I WISH I HADN'T CRIED SO MUCH SAID ALICE AS SHE SWAM ABOUT TRYING TO FIND HER WAY OUT +908-31957-0013-2607: SLOW TO WORLD GREETINGS QUICK WITH ITS O LIST WHEN THE ANGELS SPEAK +4507-16021-0051-1272: IN THIS (WORLD->WORLD'S) EVIDENTLY THE VESTIBULE OF ANOTHER THERE ARE NO FORTUNATE +1284-1181-0008-253: I THINK THAT WILL DO SHE CONTINUED FOR THE OTHER QUALITIES ARE NOT NEEDED IN A SERVANT +121-127105-0013-147: YOU'LL EASILY JUDGE WHY WHEN YOU HEAR BECAUSE THE THING HAD BEEN SUCH A SCARE HE CONTINUED TO FIX ME +1221-135767-0005-192: IT WAS THE SCARLET LETTER IN ANOTHER FORM THE SCARLET LETTER ENDOWED WITH LIFE +1320-122617-0006-298: CAN THESE THINGS BE RETURNED DAVID BREATHING MORE FREELY AS THE TRUTH BEGAN TO DAWN UPON HIM +2094-142345-0027-538: (MUNNY->MONEY) I (TOULD IKE->DID LIKE) TO DO INTO (DE->THE) BARN TO TOMMY TO SEE (DE WHITTAWD->THE WIDOWED) +3575-170457-0048-1018: HE REFUSED AT FIRST TO LISTEN TO THE CAREFUL ADVICE IT WAS REPUGNANT TO HIS LIBERAL NATURE +8455-210777-0056-2412: THE PECULIAR CIRCUMSTANCES OF THE COLONY ARE WITHIN YOUR EXCELLENCY'S KNOWLEDGE +1320-122612-0007-282: CHINGACHGOOK HAD CAUGHT THE LOOK AND MOTIONING WITH HIS HAND HE BADE HIM SPEAK +2830-3980-0011-808: PAUL DECLARES THAT THE FALSE APOSTLES WERE CALLED OR SENT NEITHER BY MEN NOR BY MAN +2300-131720-0040-612: WE WERE MORE INTERESTED IN THE TECHNICAL CONDITION OF THE STATION THAN IN THE COMMERCIAL PART +6829-68771-0033-1947: SHE ROSE QUICKLY TO HER FEET WITH AN IMPETUOUS GESTURE THAT MADE HER VISITOR CATCH HER BREATH +4507-16021-0036-1257: FACTS FORM ONE OF THESE AND IDEAS THE OTHER +1188-133604-0038-102: BUT NOW HERE IS A SUBJECT OF WHICH YOU WILL WONDER AT FIRST WHY TURNER DREW IT AT ALL +1089-134691-0005-43: WHOSE FEET ARE AS THE FEET OF (HARTS->HEARTS) AND UNDERNEATH THE EVERLASTING ARMS +2830-3980-0056-853: OTHERWISE PAUL SHOULD HAVE WRITTEN GRACE FROM GOD THE FATHER AND PEACE FROM OUR LORD JESUS CHRIST +1320-122617-0021-313: WHAT SHALL WE DO WITH THE MINGOES AT THE DOOR THEY COUNT SIX AND (THIS->THE) SINGER IS AS GOOD AS NOTHING +7729-102255-0014-2247: THE LEADERS OF THE CONSPIRACY BECAME DISTRUSTFUL OF THEIR POWER TO CRUSH THE TOWN +3729-6852-0036-1063: WHEN THE KING COMES TO PARIS EVERYBODY CALLS OUT VIVE (LE ROI->LA ROY) +7176-92135-0045-2232: LORD JOHN TAKING (OUT->A) GOLD CIGARETTE CASE FROM HIS LEFT HAND UPPER WAISTCOAT POCKET +260-123286-0015-717: IT MUST BE AS WIDE AS THE MEDITERRANEAN OR THE ATLANTIC AND WHY NOT +5142-33396-0065-1527: SOFT HEART HE SAID GENTLY TO HER THEN TO (THORKEL->TURKLE) WELL LET HIM GO (THORKEL->TURKLE) +1089-134691-0004-42: PRIDE AFTER SATISFACTION UPLIFTED HIM LIKE LONG SLOW WAVES +7021-85628-0022-2082: AND ALL HIS BROTHERS AND SISTERS STOOD ROUND AND LISTENED WITH THEIR MOUTHS OPEN +1995-1826-0024-463: THE GOLDEN FLEECE IT'S THE SILVER FLEECE HE (HARKENED->HEARKENED) +5142-33396-0046-1508: BY THE BEARD OF ODIN I CRIED YOU HAVE TAKEN OUR JOKE LIKE A MAN +237-134493-0014-654: THEY THINK (YOU'RE->YOU ARE) PROUD BECAUSE YOU'VE BEEN AWAY TO SCHOOL OR SOMETHING +7021-79740-0009-2048: THEY WERE NOW PLAYING WITH THEIR DOLLS IN THE PARLOR +61-70968-0007-1688: SISTER NELL DO YOU HEAR THESE MARVELS +1995-1836-0007-473: (BUT->DO) YOU BELIEVE IN SOME EDUCATION ASKED MARY TAYLOR +260-123288-0009-743: THOSE CLOUDS SEEM AS IF THEY WERE GOING TO CRUSH THE SEA +1995-1837-0022-503: UP IN THE SICK ROOM (ZORA->TSORA) LAY ON THE LITTLE WHITE BED +4446-2271-0005-1118: SHE SAVES HER HAND TOO SHE'S AT HER BEST IN THE SECOND ACT +2094-142345-0053-564: FOR IF HE'S ANYWHERE ON THE FARM WE CAN SEND FOR HIM IN A MINUTE +6930-75918-0007-1958: YOU WILL BE FRANK WITH ME I ALWAYS AM +6930-81414-0002-2003: ONWARD SAID A DISTANT VOICE +2830-3980-0022-819: THE CLAUSE (SEEMS->SEEMED) SUPERFLUOUS ON FIRST SIGHT +8230-279154-0036-2348: A FURTHER STAGE IS RECOGNITION +6930-76324-0001-1973: (THEY->THERE) WERE CERTAINLY NO NEARER THE SOLUTION OF THEIR PROBLEM +1284-1181-0019-264: I NOW USE THEM AS ORNAMENTAL STATUARY IN MY GARDEN +4507-16021-0002-1223: THUS IDLENESS IS THE MOTHER +121-121726-0005-114: HEDGE A FENCE +1580-141083-0050-384: I REALLY DON'T THINK HE KNEW MUCH ABOUT IT MISTER HOLMES +8555-292519-0011-2558: HE HAD GOT INTO HER COURTYARD +4970-29095-0008-1313: MOTHER (I'M->I AM) GOING TO STUDY MEDICINE +1995-1826-0004-443: MIGHT LEARN SOMETHING USEFUL DOWN THERE +672-122797-0053-1838: THEY WERE SO EXTREMELY CURIOUS +7176-92135-0026-2213: ENTER HAMLET WITH HIS FAVOURITE (BOAR HOUND->BOARHOUND) +1188-133604-0035-99: THUS IN CHAUCER'S DREAM +2830-3980-0021-818: AND GOD THE FATHER WHO RAISED HIM FROM THE DEAD +7176-92135-0011-2198: NOW THE OBJECT OF THIS SOLILOQUY IS PLAIN +1284-1180-0022-234: (I'M->I AM) AFRAID I DON'T KNOW MUCH ABOUT THE LAND OF OZ +4970-29093-0017-1298: I'VE BEEN READY TO GO ANYWHERE FOR SIX MONTHS +121-127105-0010-144: SHE SENT ME THE PAGES IN QUESTION BEFORE SHE DIED +1580-141083-0005-339: I WAS ABSENT RATHER MORE THAN AN HOUR +8463-294828-0021-2483: A ROUTE SLIGHTLY LESS DIRECT THAT'S ALL +2094-142345-0007-518: THE HISTORY OF THE HOUSE IS PLAIN NOW +7127-75946-0005-2093: WHAT DO YOU MEAN INQUIRED (LOUIS->LOUISE) +8455-210777-0007-2363: QUITE SATISFIED SAID EVA +61-70968-0052-1733: BUT WHO IS THIS FELLOW PLUCKING AT YOUR SLEEVE +6829-68769-0038-1898: AND IT RUINS A MAN'S DISPOSITION +260-123286-0012-714: BUT THERE SEEMED NO REASON TO FEAR +1580-141084-0025-413: YOU KNOW HIM I THINK SO +6930-81414-0017-2018: I KNOW HE HAD IT THIS VERY EVENING +4446-2275-0018-1193: I GET NOTHING BUT MISERY OUT OF EITHER +2094-142345-0022-533: MISTER (OTTLEY'S->OAKLEY'S) INDEED +8555-284447-0012-2513: THE CAPTAIN SHOOK HIS HEAD +237-134500-0009-668: I SUPPOSE THAT'S THE WET SEASON TOO THEN +8463-294825-0011-2453: (HE'S->HE IS) SWIFTLY PUNISHED +7127-75947-0020-2138: NO MORE THAN THE DANCING +5142-33396-0016-1478: SO WE HARRIED THE COAST OF NORWAY +7127-75947-0005-2123: I ALLUDE TO THE GODDESS +5142-33396-0061-1523: YOUR MOTHER THE QUEEN WAS STANDING BY +2830-3979-0005-789: THE WORK HAD TO BE CONDENSED +4446-2275-0003-1178: THE ROOM WAS EMPTY WHEN HE ENTERED +1995-1837-0008-489: WHERE WAS THE USE OF IMAGINING +2830-3979-0004-788: IT WAS WRITTEN IN LATIN +237-134500-0025-684: OH (EMIL->AMY) +4970-29093-0004-1285: HE WAS UNABLE TO DECIDE EXACTLY WHAT IT SHOULD BE +1580-141083-0021-355: THERE IS NO OPENING EXCEPT THE ONE PANE SAID OUR LEARNED GUIDE +4992-23283-0015-1359: IS SHE NOT AFRAID THAT I WILL THWART HER INCLINATIONS +5142-33396-0002-1464: TWO HUNDRED WARRIORS FEASTED IN HIS HALL AND FOLLOWED HIM TO BATTLE +4446-2271-0021-1134: SHE MUST CARE ABOUT THE THEATRE A GREAT DEAL MORE THAN SHE USED TO +61-70968-0008-1689: TAKE YOUR PLACE AND LET US SEE WHAT THE CRYSTAL CAN SHOW TO YOU +5142-33396-0003-1465: THE REST OF YOU OFF A VIKING HE HAD THREE SHIPS +6930-76324-0003-1975: NOW WHAT (WAS->IS) THE SENSE OF IT TWO INNOCENT BABIES LIKE THAT +4507-16021-0018-1239: WHAT IS SLANG PROPERLY SPEAKING +1580-141084-0027-415: NO SIR CERTAINLY NOT +7127-75946-0021-2109: YES SIRE AND READY DRESSED FOR THE BALLET +237-134500-0011-670: AREN'T YOU SPLASHED LOOK AT THE SPIDER WEBS ALL OVER THE GRASS +7127-75947-0006-2124: THE PRINCESS INQUIRED NO +6930-81414-0003-2004: NO SOUND BROKE THE STILLNESS OF THE NIGHT +7021-85628-0019-2079: WITH ONE JUMP (ANDERS->ANDREWS) GOT OUT OF HIS CHAIR +6829-68769-0010-1870: WE WISH TO TALK WITH HIM ANSWERED KENNETH TALK +4446-2275-0034-1209: KEEP AWAY IF YOU WISH WHEN HAVE I EVER FOLLOWED YOU +237-134500-0026-685: SURELY YOU ARE NOT THINKING OF GOING OFF THERE +4446-2273-0012-1150: THANK YOU BUT I DON'T LIKE IT SO WELL AS THIS +6930-81414-0018-2019: I (REMEMBER->REMEMBERED) SAYING HAVE WE BEEN TOGETHER +4507-16021-0019-1240: IT IS THE LANGUAGE OF WRETCHEDNESS +4446-2271-0006-1119: HE'S BEEN WANTING TO MARRY HILDA THESE THREE YEARS AND MORE +5142-33396-0062-1524: NOW SHE PUT HER HAND ON HIS ARM AND SMILED AND SAID +5105-28240-0018-1435: YOU WILL TAKE ME ON BOARD COUNT WILL YOU NOT +7176-92135-0027-2214: LADY (LARKSPUR STARTS->LARCHBUR START) SUDDENLY AND (TURNS->TURNED) TOWARDS HIM +7021-85628-0004-2064: YES WHY NOT THOUGHT ANDERS +4446-2273-0011-1149: (THERE IS->THERE'S) NOTHING ELSE THAT LOOKS SO JOLLY +672-122797-0009-1794: HAVE YOU NOT MET THEM ANYWHERE +4446-2275-0020-1195: IT WAS MYSELF I WAS DEFYING HILDA +4992-23283-0001-1345: MISS MILNER'S HEALTH IS NOT GOOD +4970-29095-0010-1315: (THEE->THE) STUDY MEDICINE +5142-33396-0017-1479: WE ATE AT MANY MEN'S TABLES UNINVITED +8455-210777-0068-2424: YOUR POWER IS SUFFICIENT I SAID +5142-33396-0047-1509: MY MEN POUNDED THE TABLE WITH THEIR FISTS +5142-33396-0063-1525: AND WOULD HE NOT BE A GOOD GIFT FOR OUR BABY +6829-68771-0031-1945: HER EYES WANDERED TO THE MAID'S HANDS +908-31957-0010-2604: O LOVE (O->OH) TROTH +1580-141084-0012-400: THE FOUL MOUTHED FELLOW AT THE TOP +1995-1826-0006-445: BEEN LOOKING UP (TOOMS->TOMBS) COUNTY +7127-75947-0036-2154: I GIVE MY CONSENT +1188-133604-0006-70: THEN HE COMES TO THE BEAK OF IT +8463-294828-0022-2484: (WE'RE->WERE) LEAVING ON THE ABRAHAM LINCOLN +61-70970-0006-1750: NEVER THAT SIR HE HAD SAID +4446-2271-0022-1135: I'M GLAD SHE'S HELD HER OWN (SINCE->SENSE) +6930-76324-0018-1990: HE MAKES IT SORT OF (COZIER->COSIER) +4507-16021-0004-1225: WHAT IS SLANG +4446-2275-0004-1179: ALEXANDER DID NOT SIT DOWN +4446-2271-0007-1120: SHE DOESN'T TAKE UP WITH ANYBODY YOU KNOW +61-70968-0038-1719: ROBIN FITZOOTH +61-70970-0005-1749: THE LAD HAD CHECKED HIM THEN +1580-141083-0051-385: ONLY FOR A MINUTE OR SO +4446-2273-0026-1164: HAVE I TOLD YOU ABOUT MY NEW PLAY +5105-28241-0007-1449: (THERE IS->THERE'S) NO FEAR OF THAT SIR +4970-29093-0006-1287: LAW SEEMED TO HIM WELL ENOUGH AS A SCIENCE BUT HE NEVER COULD DISCOVER A PRACTICAL CASE WHERE IT APPEARED TO HIM WORTH WHILE TO GO TO LAW AND ALL THE CLIENTS WHO STOPPED WITH THIS NEW CLERK IN THE ANTE ROOM OF THE LAW OFFICE WHERE HE WAS WRITING PHILIP INVARIABLY ADVISED TO SETTLE NO MATTER HOW BUT SETTLE GREATLY TO THE DISGUST OF HIS EMPLOYER WHO KNEW THAT JUSTICE BETWEEN MAN AND MAN COULD ONLY BE ATTAINED BY THE RECOGNIZED PROCESSES WITH THE ATTENDANT FEES +4077-13751-0018-1092: AMERICAN (SCHOOL BOYS->SCHOOLBOYS) READ WITH EMOTIONS OF HORROR OF THE (ALBIGENSES->ALBIGINZAS) DRIVEN BEATEN AND KILLED WITH A (PAPAL->PAPEL) LEGATE DIRECTING THE BUTCHERY AND OF THE VAUDOIS HUNTED AND HOUNDED LIKE BEASTS AS THE EFFECT OF A ROYAL DECREE AND THEY YET SHALL READ IN THE HISTORY OF THEIR OWN COUNTRY OF SCENES AS TERRIBLE AS THESE IN THE EXHIBITION OF INJUSTICE AND INHUMAN HATE +1188-133604-0023-87: THE COLORIST SAYS FIRST OF ALL AS MY DELICIOUS PAROQUET WAS RUBY SO THIS NASTY VIPER SHALL BE BLACK AND THEN IS THE QUESTION CAN I ROUND HIM OFF EVEN THOUGH HE IS BLACK AND MAKE HIM SLIMY AND YET SPRINGY AND CLOSE DOWN CLOTTED LIKE A POOL OF BLACK BLOOD ON THE EARTH ALL THE SAME +7127-75947-0009-2127: A QUARTER OF AN HOUR AFTERWARDS HE RETURNED TO THE (THEATER->THEATRE) BUT IT WILL BE READILY BELIEVED THAT IT WAS ONLY A POWERFUL EFFORT OF REASON OVER HIS GREAT EXCITEMENT THAT ENABLED HIM TO GO BACK OR PERHAPS FOR LOVE IS THUS STRANGELY CONSTITUTED HE FOUND IT IMPOSSIBLE EVEN TO REMAIN MUCH LONGER SEPARATED FROM THE PRESENCE OF ONE WHO HAD BROKEN HIS HEART +8230-279154-0025-2337: WE MIGHT PROVISIONALLY THOUGH PERHAPS NOT QUITE CORRECTLY DEFINE MEMORY AS THAT WAY OF KNOWING ABOUT THE PAST WHICH HAS NO ANALOGUE IN OUR KNOWLEDGE OF THE FUTURE SUCH A DEFINITION WOULD AT LEAST SERVE TO MARK THE PROBLEM WITH WHICH WE ARE CONCERNED THOUGH SOME EXPECTATIONS MAY DESERVE TO RANK WITH MEMORY AS REGARDS IMMEDIACY +3729-6852-0035-1062: IT SEEMS TO ME I REPLIED THAT SUCH APPROVAL SUCH RATIFICATION OF THE OPINION EXPRESSED BY THE KING THE PRINCES OF THE BLOOD ET CETERA IS RATHER A PROOF OF THE AFFECTION FELT FOR THEM BY THE NATION FOR THE FRENCH CARRY THAT AFFECTION TO SUCH AN EXTENT THAT THEY BELIEVE THEM INFALLIBLE +1221-135767-0001-188: ANOTHER AND FAR MORE IMPORTANT REASON THAN THE DELIVERY OF A PAIR OF EMBROIDERED GLOVES IMPELLED HESTER AT THIS TIME TO SEEK AN INTERVIEW WITH A PERSONAGE OF SO MUCH POWER AND ACTIVITY IN THE AFFAIRS OF THE SETTLEMENT +1089-134686-0023-23: WHY WAS THE SACRAMENT OF THE EUCHARIST INSTITUTED UNDER THE TWO SPECIES OF BREAD AND WINE IF JESUS CHRIST BE PRESENT BODY AND BLOOD SOUL AND DIVINITY IN THE BREAD ALONE AND IN THE WINE ALONE +5639-40744-0034-1598: ALL ROSE TO DO HER REVERENCE AS IF SOMETHING FROM HEAVEN HAD MIRACULOUSLY APPEARED BEFORE THEM BUT GAZING ON HER ENTRANCED WITH ADMIRATION NOT ONE OF THEM WAS ABLE TO ADDRESS A SINGLE WORD TO HER +672-122797-0067-1852: THE TRUNKS WERE MOVED THE TREE WAS PULLED OUT AND THROWN RATHER HARD IT IS TRUE DOWN ON THE FLOOR BUT A MAN DREW HIM TOWARDS THE STAIRS WHERE THE DAYLIGHT SHONE +4077-13751-0014-1088: MAKING THEIR WAY ACROSS THE RIVER MOST OF THE REFUGEES FOUND SHELTER AMONG THE MORE HOSPITABLE PEOPLE OF CLAY COUNTY AND AFTERWARD ESTABLISHED THEMSELVES IN CALDWELL COUNTY THEREIN FOUNDING THE CITY (OF->A) FAR WEST +3729-6852-0046-1073: HIS HOUSEKEEPER HAD THE MANAGEMENT OF EVERYTHING SHE NEVER ALLOWED HIM TO BE IN NEED OF ANYTHING AND SHE GAVE NO ACCOUNT OF HIS MONEY WHICH SHE KEPT ALTOGETHER BECAUSE HE NEVER ASKED HER TO RENDER ANY ACCOUNTS +5639-40744-0004-1568: THEY DREW THEIR SWORDS HID THEIR FACES IN THE FLAPS OF THEIR CLOAKS TURNED BACK AND SOON CAME IN FRONT OF THE LITTLE PARTY WHO HAD NOT YET DONE GIVING THANKS TO GOD FOR THEIR ESCAPE FROM THOSE AUDACIOUS MEN +4446-2273-0010-1148: THERE WAS WATERCRESS SOUP AND SOLE AND A DELIGHTFUL (OMELETTE->OMELET) STUFFED WITH MUSHROOMS AND TRUFFLES AND TWO SMALL RARE DUCKLINGS AND ARTICHOKES AND A DRY YELLOW RHONE WINE OF WHICH BARTLEY HAD ALWAYS BEEN VERY FOND +4992-41806-0015-1403: MISSUS HARMON THOUGHT HE SANG TOO MUCH AND TOLD HER HUSBAND PRIVATELY THAT IF HE WAS A CANARY BIRD SHE SHOULD WANT TO KEEP A TABLE COVER OVER HIS HEAD MOST OF THE TIME BUT HE WAS IMMENSELY POPULAR WITH THE REST OF HIS AUDIENCE +6930-76324-0015-1987: SMUGGLING THE HOUSE CLEANING PARAPHERNALIA INTO THE CELLAR WINDOW UNOBSERVED THAT AFTERNOON PROVED NO EASY TASK FOR CYNTHIA HAD ADDED A WHISK BROOM AND DUST PAN TO THE OUTFIT +3729-6852-0001-1028: WITHOUT SAYING IT POSITIVELY SHE MADE ME UNDERSTAND THAT BEING HERSELF AN ILLUSTRIOUS MEMBER OF THE REPUBLIC OF LETTERS SHE WAS WELL AWARE THAT SHE WAS SPEAKING TO AN INSECT +2961-960-0019-893: HE COULD WRITE IN ONE STYLE BUT NOT IN ANOTHER AND THE GREEK LANGUAGE HAD NOT AS YET BEEN FASHIONED BY ANY POET OR PHILOSOPHER TO DESCRIBE PHYSICAL PHENOMENA +5639-40744-0019-1583: WHAT YOU HAD BEST DO MY CHILD IS TO KEEP IT AND PRAY TO IT THAT SINCE IT WAS A WITNESS TO YOUR UNDOING IT WILL DEIGN TO VINDICATE YOUR CAUSE BY ITS RIGHTEOUS JUDGMENT +7729-102255-0040-2273: IN THIS INCIDENT CONTRASTING THE CREATIVE AND THE DESTRUCTIVE SPIRIT OF THE FACTIONS THE EMIGRANT AID SOCIETY OF MASSACHUSETTS FINDS ITS MOST HONORABLE AND TRIUMPHANT VINDICATION +121-123859-0004-133: SO I RETURN (REBUK'D->REBUKED) TO MY CONTENT AND GAIN BY ILL THRICE MORE THAN I HAVE SPENT +3570-5694-0008-928: THE CONSUMPTION OF LUXURIES IN THE TRUE SENSE IS A CONSUMPTION DIRECTED TO THE COMFORT OF THE CONSUMER HIMSELF AND IS THEREFORE A MARK OF THE MASTER +2830-3979-0008-792: IN OTHER WORDS THESE THREE MEN TOOK DOWN THE LECTURES WHICH LUTHER ADDRESSED TO HIS STUDENTS IN THE COURSE OF GALATIANS AND (ROERER->RUER) PREPARED THE MANUSCRIPT FOR THE PRINTER +2961-961-0001-898: AND NOW HE DESIRES TO SEE THE IDEAL STATE SET IN MOTION HE WOULD LIKE TO KNOW HOW SHE BEHAVED IN SOME GREAT STRUGGLE +4446-2273-0000-1138: HILDA WAS VERY NICE TO HIM AND HE SAT ON THE EDGE OF HIS CHAIR FLUSHED WITH HIS CONVERSATIONAL EFFORTS AND MOVING HIS CHIN ABOUT NERVOUSLY OVER HIS HIGH COLLAR +4446-2275-0007-1182: SHE PUSHED HIM TOWARD THE BIG CHAIR BY THE FIRE AND SAT DOWN ON A STOOL AT THE OPPOSITE SIDE OF THE HEARTH HER KNEES DRAWN UP TO HER CHIN LAUGHING LIKE A HAPPY LITTLE GIRL +2830-3980-0025-822: BY HIS RESURRECTION CHRIST WON THE VICTORY OVER LAW SIN FLESH WORLD DEVIL DEATH HELL AND EVERY EVIL +1221-135766-0007-178: HESTER PRYNNE NEVERTHELESS THE LOVING MOTHER OF THIS ONE CHILD RAN LITTLE RISK OF ERRING ON THE SIDE OF UNDUE SEVERITY +8555-284447-0001-2502: HOLD HIM FAST MY (MEN->MAN) AND AS SOON AS I'VE HAD MY COFFEE AND OATMEAL I'LL TAKE HIM TO THE ROOM OF THE GREAT KNIFE AND PATCH HIM +3575-170457-0033-1003: SATURDAY AFTER SATURDAY COMES ROUND AND I CAN HAVE NO HOPE OF HEARING YOUR KNOCK AT THE DOOR AND THEN BEING TOLD THAT MISS E IS COME OH DEAR +6829-68771-0018-1932: FOR A MOMENT BETH STOOD STARING WHILE THE NEW MAID REGARDED HER WITH COMPOSURE AND A SLIGHT SMILE UPON HER BEAUTIFUL FACE +2830-3979-0009-793: IT PRESENTS LIKE NO OTHER OF LUTHER'S WRITINGS THE CENTRAL THOUGHT OF CHRISTIANITY THE JUSTIFICATION OF THE SINNER FOR THE SAKE OF CHRIST'S MERITS ALONE +8463-294828-0010-2472: NEVER DID HE OBJECT TO BUCKLING UP HIS SUITCASE FOR ANY COUNTRY WHATEVER CHINA OR THE CONGO NO MATTER HOW FAR OFF IT WAS +1284-1180-0026-238: BUT FIRST I WILL TELL YOU THAT FOR MANY YEARS I HAVE LONGED FOR A SERVANT TO HELP ME WITH THE HOUSEWORK AND TO COOK THE MEALS AND WASH THE DISHES +5105-28233-0002-1408: IT MUST BE OWNED AND NO ONE WAS MORE READY TO CONFESS IT THAN HIMSELF THAT HIS LITERARY ATTAINMENTS WERE BY NO MEANS OF A HIGH ORDER +1995-1837-0012-493: HE SPLASHED AND STAMPED ALONG FARTHER AND FARTHER ONWARD UNTIL HE NEARED THE RAMPART OF THE CLEARING AND PUT FOOT UPON THE TREE BRIDGE +4992-23283-0004-1348: AND YET YOU MUST OWN HER BEHAVIOUR HAS WARRANTED THEM HAS IT NOT BEEN IN THIS PARTICULAR INCOHERENT AND UNACCOUNTABLE +3570-5695-0015-958: THE RESULT IS A GREAT MOBILITY OF THE LABOR EMPLOYED IN PRINTING PERHAPS GREATER THAN IN ANY OTHER EQUALLY WELL DEFINED AND CONSIDERABLE BODY OF WORKMEN +672-122797-0072-1857: AND THE GARDENER'S BOY CHOPPED THE TREE INTO SMALL PIECES THERE WAS A WHOLE HEAP LYING THERE +1089-134686-0013-13: IF EVER HE WAS IMPELLED TO CAST SIN FROM HIM AND TO REPENT THE IMPULSE THAT MOVED HIM WAS THE WISH TO BE HER KNIGHT +1320-122612-0008-283: THE EYES OF THE WHOLE PARTY FOLLOWED THE UNEXPECTED MOVEMENT AND READ THEIR SUCCESS IN THE AIR OF TRIUMPH THAT THE YOUTH ASSUMED +7127-75946-0023-2111: THE KING SEEMED ONLY PLEASED WITH (EVERY ONE->EVERYONE) PRESENT +8455-210777-0025-2381: WHAT COULD I DO NOW BUT JUST LAY MYSELF DOWN AND DIE +8230-279154-0008-2320: BUT I DO NOT THINK SUCH AN INFERENCE IS WARRANTED +2094-142345-0025-536: COLD IS IT MY DARLING BLESS YOUR SWEET FACE +7021-85628-0006-2066: I AM GOING TO THE COURT BALL ANSWERED (ANDERS->ANDREWS) +260-123288-0012-746: THAT WILL BE (*->THE) SAFEST NO NO NEVER +4446-2275-0006-1181: I THOUGHT IT MIGHT BE SISTER KATE OR COUSIN MIKE WOULD BE HAPPENING ALONG +1995-1837-0010-491: PERHAPS SHE TOO MIGHT BE THERE WAITING WEEPING +260-123440-0013-776: I AM SO VERY TIRED OF BEING ALL ALONE HERE +1580-141083-0052-386: OH I WOULD NOT VENTURE TO SAY SIR +6930-81414-0019-2020: (VOLTAIRE->VOLCHERRE) PICKED UP SOMETHING FROM THE GROUND AND LOOKED AT IT +4970-29095-0011-1316: DOES THEE THINK THEE COULD STAND IT SIX MONTHS +7176-92135-0028-2215: (LARKSPUR->LARKS FOR) BIT ME AGAIN THIS MORNING FOR THE THIRD TIME +3729-6852-0019-1046: I TELL HIM TO GIVE ME SOME COFFEE IF IT IS GOOD +5142-33396-0049-1511: HERE FRIEND TAKE IT AND HE THRUST IT INTO THE FARMER'S HAND +5142-33396-0064-1526: YOUR FATHER THOUGHT A MOMENT THEN LOOKED AT YOUR MOTHER AND SMILED +1580-141083-0022-356: I AM AFRAID THERE ARE NO SIGNS HERE SAID HE +237-134500-0027-686: MARIE'S FACE FELL UNDER HIS BROODING GAZE +7021-79740-0012-2051: SAID SHE POINTING TO THE PLAYTHINGS SEE +5683-32866-0022-1646: ITS CURTAINS WERE OF THICK AND FADED TAPESTRY +7176-92135-0013-2200: WE MODERNS HOWEVER SEE THE ABSURDITY OF IT +4992-23283-0002-1346: SAID MISSUS HORTON A FEW MINUTES AFTER +8455-210777-0010-2366: THEIR MASTERS SAID MISSUS NEVERBEND +1089-134691-0018-56: AGAIN AGAIN +6930-75918-0010-1961: I CAN PERCEIVE LOVE CLEARLY ENOUGH +8463-287645-0014-2441: OF STARTING I DIDN'T KNOW THE WAY TO COME +3575-170457-0001-971: WHY ARE WE TO BE DENIED EACH OTHER'S SOCIETY +7176-92135-0029-2216: I WANT TO GET AWAY FROM IT ALL (SWOONS->SWOON) +260-123286-0014-716: TRULY (THIS->THE) SEA IS OF INFINITE WIDTH +4446-2275-0036-1211: HE TOOK HER ROUGHLY IN HIS ARMS DO YOU KNOW WHAT I MEAN +1284-1181-0021-266: ASKED THE VOICE IN SCORNFUL ACCENTS +7729-102255-0013-2246: IT WAS IN FACT THE BEST WEAPON OF ITS DAY +6829-68769-0041-1901: I'M NOT ELECTIONEERING JUST NOW +1580-141084-0028-416: THERE WAS NO MAN SIR +2094-142345-0055-566: BUT YOU KNOW MORE ABOUT THAT THAN I DO SIR +8463-294828-0008-2470: AND YET WHAT A FINE GALLANT LAD +4446-2273-0013-1151: HAVE YOU BEEN IN PARIS MUCH THESE LATE YEARS +237-134500-0042-701: THEN ALL OUR GOOD TIMES ARE OVER +6930-76324-0019-1991: NOW LET'S DUST THE FURNITURE AND PICTURES +672-122797-0011-1796: AND THEN WHAT HAPPENS THEN +8455-210777-0054-2410: THE LETTER RAN AS FOLLOWS +6829-68771-0017-1931: THE HOUSEKEEPER LED THE WAY AND BETH FOLLOWED +2094-142345-0040-551: I DIDN'T PREACH WITHOUT DIRECTION +2300-131720-0039-611: THE PROBLEM WAS SOLVED +908-31957-0011-2605: AND LOVE BE FALSE +4446-2271-0023-1136: AFTER ALL WE WERE AWFULLY YOUNG +1580-141084-0013-401: HE IS THE ONE WITH THE WORST RECORD +5142-33396-0004-1466: THESE HE GAVE TO THREE OF MY BROTHERS +1089-134691-0003-41: THE UNIVERSITY +7176-88083-0027-2186: THEN THE LEADER PARTED FROM THE LINE +1580-141084-0043-431: HE PUT HIS SHOES ON THE TABLE +6930-81414-0005-2006: WHAT WAS THAT +3575-170457-0016-986: FAREWELL (MADAM->MADAME) +3729-6852-0006-1033: (SILVIA->SYLVIA) DID NOT THINK THAT HER GOOD CONDUCT WAS A MERIT FOR SHE KNEW THAT SHE WAS VIRTUOUS ONLY BECAUSE HER SELF LOVE COMPELLED HER TO BE SO AND SHE NEVER EXHIBITED ANY PRIDE OR ASSUMED ANY SUPERIORITY TOWARDS HER THEATRICAL SISTERS ALTHOUGH SATISFIED TO SHINE BY THEIR TALENT OR THEIR BEAUTY THEY CARED LITTLE ABOUT RENDERING THEMSELVES CONSPICUOUS BY THEIR VIRTUE +1188-133604-0009-73: NOW YOU WILL SEE IN THESE STUDIES THAT THE MOMENT THE WHITE IS (INCLOSED->ENCLOSED) PROPERLY AND HARMONIZED WITH THE OTHER HUES IT BECOMES SOMEHOW MORE PRECIOUS AND PEARLY THAN THE WHITE PAPER AND THAT I AM NOT AFRAID TO LEAVE A WHOLE FIELD OF UNTREATED WHITE PAPER ALL ROUND IT BEING SURE THAT EVEN THE LITTLE DIAMONDS IN THE ROUND WINDOW WILL TELL AS JEWELS IF THEY ARE GRADATED JUSTLY +4077-13754-0012-1108: THIS MEANT THAT FOR AN ALLEGED MISDEMEANOR FOR WHICH CONGRESS PRESCRIBED A MAXIMUM PENALTY OF SIX MONTHS IMPRISONMENT AND A FINE OF THREE HUNDRED DOLLARS A MAN MIGHT BE IMPRISONED FOR LIFE (AYE->AY) FOR MANY TERMS OF A MAN'S NATURAL LIFE DID THE COURT'S POWER TO ENFORCE ITS SENTENCES EXTEND SO FAR AND MIGHT BE FINED MILLIONS OF DOLLARS +5683-32865-0012-1618: AND WYLDER LAUGHED TOO MORE SUDDENLY AND NOISILY THAN THE HUMOUR OF THE JOKE SEEMED QUITE TO CALL FOR AND GLANCED A GRIM LOOK FROM THE CORNERS OF HIS EYES ON LAKE BUT THE GALLANT CAPTAIN DID NOT SEEM TO PERCEIVE IT AND AFTER A FEW SECONDS MORE HE HANDED IT VERY INNOCENTLY BACK TO MISSUS DOROTHY ONLY REMARKING +260-123288-0015-749: FROM THE UNDER SURFACE OF THE CLOUDS THERE ARE CONTINUAL EMISSIONS OF LURID LIGHT ELECTRIC MATTER IS IN CONTINUAL EVOLUTION FROM THEIR COMPONENT MOLECULES THE GASEOUS ELEMENTS OF THE AIR NEED TO BE SLAKED WITH MOISTURE FOR INNUMERABLE COLUMNS OF WATER RUSH UPWARDS INTO THE AIR AND FALL BACK AGAIN IN WHITE FOAM +8555-292519-0001-2548: GUIDED BY YOU HOW WE MIGHT STROLL TOWARDS DEATH OUR ONLY MUSIC ONE ANOTHER'S BREATH THROUGH GARDENS INTIMATE WITH HOLLYHOCKS WHERE SILENT POPPIES BURN BETWEEN THE ROCKS BY POOLS WHERE BIRCHES BEND TO CONFIDANTS ABOVE GREEN WATERS (SCUMMED->SKUMMED) WITH LILY PLANTS +1221-135767-0006-193: THE MOTHER HERSELF AS IF THE RED IGNOMINY WERE SO DEEPLY SCORCHED INTO HER BRAIN THAT ALL HER CONCEPTIONS ASSUMED ITS FORM HAD CAREFULLY WROUGHT OUT THE SIMILITUDE LAVISHING MANY HOURS OF MORBID INGENUITY TO CREATE AN ANALOGY BETWEEN THE OBJECT OF HER AFFECTION AND THE EMBLEM OF HER GUILT AND TORTURE +2961-960-0010-884: BUT HE HAS NOT AS YET DEFINED THIS INTERMEDIATE TERRITORY WHICH LIES SOMEWHERE BETWEEN MEDICINE AND MATHEMATICS AND HE WOULD HAVE FELT THAT THERE WAS AS GREAT AN IMPIETY IN RANKING THEORIES OF PHYSICS FIRST IN THE ORDER OF KNOWLEDGE AS IN PLACING THE BODY BEFORE THE SOUL +2300-131720-0022-594: MEANWHILE HE HAD CALLED UPON ME TO MAKE A REPORT OF THE THREE WIRE SYSTEM KNOWN IN ENGLAND AS THE HOPKINSON BOTH DOCTOR JOHN HOPKINSON AND MISTER EDISON BEING INDEPENDENT INVENTORS AT PRACTICALLY THE SAME TIME +3729-6852-0032-1059: SIMPLY BY STOPPING HER CARRIAGE TWO OR THREE TIMES BEFORE THE SHOP TO HAVE HER SNUFF BOX FILLED AND BY SAYING ALOUD TO THE YOUNG GIRL WHO HANDED BACK THE BOX THAT HER SNUFF WAS THE VERY BEST IN PARIS +1320-122617-0002-294: IN OTHER WORDS WHILE HE HAD IMPLICIT FAITH IN THE ABILITY OF (BALAAM'S->BAILIM'S) ASS TO SPEAK HE WAS SOMEWHAT (SKEPTICAL->SCEPTICAL) ON THE SUBJECT OF A BEAR'S SINGING AND YET HE HAD BEEN ASSURED OF THE LATTER ON THE TESTIMONY OF HIS OWN EXQUISITE ORGANS +4077-13751-0015-1089: A SMALL SETTLEMENT HAD BEEN FOUNDED BY MORMON FAMILIES ON SHOAL CREEK AND HERE ON THE THIRTIETH OF OCTOBER EIGHTEEN THIRTY EIGHT A COMPANY OF TWO HUNDRED AND FORTY FELL UPON THE HAPLESS SETTLERS AND BUTCHERED A SCORE +2300-131720-0007-579: BROAD AS THE PRAIRIES AND FREE IN THOUGHT AS THE WINDS THAT (SWEEP->SWEPT) THEM HE IS IDIOSYNCRATICALLY OPPOSED TO LOOSE AND WASTEFUL METHODS TO PLANS OF EMPIRE THAT NEGLECT THE POOR AT THE GATE +260-123288-0025-759: HERE IT COMES THERE IT GLIDES NOW IT IS UP THE RAGGED STUMP OF THE MAST THENCE IT LIGHTLY LEAPS ON THE PROVISION BAG DESCENDS WITH A LIGHT BOUND AND JUST SKIMS THE POWDER MAGAZINE HORRIBLE +8555-284449-0017-2543: WHEN FIRST THEY ENTERED THE THRONE ROOM THEY TRIED TO BE AS HAUGHTY AND SCORNFUL AS EVER BUT THE BLUES WHO WERE ASSEMBLED THERE ALL LAUGHED AT THEM AND JEERED THEM FOR THERE WAS NOT A SINGLE PERSON IN ALL THE BLUE COUNTRY WHO LOVED THE PRINCESSES THE LEAST LITTLE BIT +6829-68771-0000-1914: SO TO THE SURPRISE OF THE DEMOCRATIC COMMITTEE AND ALL HIS FRIENDS MISTER HOPKINS ANNOUNCED THAT HE WOULD OPPOSE (FORBES'S->FORD'S) AGGRESSIVE CAMPAIGN WITH AN EQUAL AGGRESSIVENESS AND SPEND AS MANY DOLLARS IN DOING SO AS MIGHT BE NECESSARY +4992-41797-0009-1374: HENRY LORD WITH THE DEGREE OF (PH->P H) D TO HIS CREDIT HAD BEEN PROFESSOR OF ZOOLOGY AT A NEW ENGLAND COLLEGE BUT HAD RESIGNED HIS POST IN ORDER TO WRITE A SERIES OF SCIENTIFIC TEXT BOOKS +2961-961-0012-909: FOR IN THE TIMES BEFORE THE GREAT FLOOD ATHENS WAS THE GREATEST AND BEST OF CITIES AND DID THE NOBLEST DEEDS AND HAD THE BEST CONSTITUTION OF ANY UNDER THE FACE OF HEAVEN +7729-102255-0041-2274: THE WHOLE PROCEEDING WAS SO CHILDISH THE MISERABLE PLOT SO TRANSPARENT THE OUTRAGE SO GROSS AS TO BRING DISGUST TO THE BETTER CLASS OF BORDER RUFFIANS WHO WERE WITNESSES AND ACCESSORIES +7021-79740-0010-2049: (DELIA->DELLIA) CAME TO THE PARLOR AND WITH AN AIR OF GREAT MYSTERY BECKONED THE CHILDREN ASIDE AND SAID TO THEM IN A WHISPER LEAVE (ANDELLA->ANDELA) AND ROSALIE HERE AND DON'T SAY A WORD TO THEM +8455-210777-0008-2364: THE LADIES IN COMPLIANCE WITH THAT SOFTNESS OF HEART WHICH IS THEIR CHARACTERISTIC ARE ON ONE SIDE AND THE MEN BY WHOM THE WORLD HAS TO BE MANAGED ARE ON THE OTHER +1089-134686-0024-24: IF THE WINE CHANGE INTO VINEGAR AND THE HOST CRUMBLE INTO CORRUPTION AFTER THEY HAVE BEEN CONSECRATED IS JESUS CHRIST STILL PRESENT UNDER THEIR SPECIES AS GOD AND AS MAN +5639-40744-0022-1586: TIME ROLLED ON THE HOUR OF HER DELIVERY ARRIVED IT TOOK PLACE IN THE UTMOST SECRECY HER MOTHER TAKING UPON HER THE OFFICE OF MIDWIFE AND SHE GAVE BIRTH TO A SON ONE OF THE MOST BEAUTIFUL EVER SEEN +8224-274381-0010-2290: BY A QUICK AND UNEXPECTED MARCH MONTROSE HASTENED TO (INNERLOCHY->INNILOCKI) AND PRESENTED HIMSELF IN ORDER OF BATTLE BEFORE THE SURPRISED BUT NOT AFFRIGHTENED COVENANTERS +7176-92135-0043-2230: TWO BITES ARE MADE AND THE BREAD IS CRUMBLED WITH AN AIR OF GREAT EAGERNESS INDEED ONE FEELS THAT IN REAL LIFE THE GUEST WOULD CLUTCH HOLD OF THE FOOTMAN AND SAY HALF A MO OLD CHAP I HAVEN'T NEARLY FINISHED BUT THE ACTOR IS BETTER SCHOOLED THAN THIS +8463-294828-0038-2500: THOUSANDS OF HANDKERCHIEFS WERE WAVING ABOVE THESE TIGHTLY PACKED MASSES HAILING THE ABRAHAM LINCOLN UNTIL IT REACHED THE WATERS OF THE HUDSON RIVER AT THE TIP OF THE LONG PENINSULA THAT FORMS NEW YORK CITY +8463-294828-0007-2469: CLASSIFYING WAS EVERYTHING TO HIM SO HE KNEW NOTHING ELSE WELL VERSED IN THE THEORY OF CLASSIFICATION HE WAS POORLY VERSED IN ITS PRACTICAL APPLICATION AND I DOUBT THAT HE COULD TELL A SPERM WHALE FROM A BALEEN WHALE +1089-134686-0011-11: ON SATURDAY MORNINGS WHEN THE SODALITY MET IN THE CHAPEL TO RECITE THE LITTLE OFFICE HIS PLACE WAS A CUSHIONED KNEELING DESK AT THE RIGHT OF THE ALTAR FROM WHICH HE LED HIS WING OF BOYS THROUGH THE RESPONSES +7127-75947-0022-2140: I AM A WOMAN AND THERE ARE FEW LIKE ME WHOEVER LOVES ME FLATTERS ME WHOEVER FLATTERS ME PLEASES ME AND WHOEVER PLEASES WELL SAID MONTALAIS YOU DO NOT FINISH +1320-122617-0004-296: THE INGENIOUS HAWKEYE WHO RECALLED THE HASTY MANNER IN WHICH THE OTHER HAD ABANDONED HIS POST AT THE BEDSIDE OF THE SICK WOMAN WAS NOT WITHOUT HIS SUSPICIONS CONCERNING THE SUBJECT OF SO MUCH SOLEMN DELIBERATION +4077-13754-0010-1106: IN EIGHTEEN SIXTY TWO A LAW WAS ENACTED WITH THE PURPOSE OF SUPPRESSING PLURAL MARRIAGE AND AS HAD BEEN PREDICTED IN THE NATIONAL SENATE PRIOR TO ITS PASSAGE IT LAY FOR MANY YEARS A DEAD LETTER +5142-36377-0025-1556: SILAS SLUNK AWAY WITHOUT A WORD OF PROTEST AMBROSE STOOD HIS GROUND EVIDENTLY BENT ON MAKING HIS PEACE WITH NAOMI BEFORE HE LEFT HER SEEING THAT I WAS IN THE WAY I WALKED ASIDE TOWARD A GLASS DOOR AT THE LOWER END OF THE ROOM +1089-134691-0017-55: THE EUROPE THEY HAD COME FROM LAY OUT THERE BEYOND THE IRISH SEA EUROPE OF STRANGE TONGUES AND VALLEYED AND (WOODBEGIRT->WOOD BEGIRT) AND (CITADELLED->CITADELED) AND OF ENTRENCHED AND MARSHALLED RACES +1089-134691-0002-40: HE SET OFF ABRUPTLY FOR THE BULL WALKING RAPIDLY LEST HIS FATHER'S SHRILL WHISTLE MIGHT CALL HIM BACK AND IN A FEW MOMENTS HE HAD ROUNDED THE CURVE AT THE POLICE BARRACK AND WAS SAFE +5639-40744-0036-1600: HIS MOTHER HAD LEFT HER TO HIM AS BEING HER DESTINED PROTECTOR BUT WHEN SHE SAW THAT HE TOO WAS INSENSIBLE SHE WAS NEAR MAKING A THIRD AND WOULD HAVE DONE SO HAD HE NOT COME TO HIMSELF +5639-40744-0037-1601: KNOW THEN SON OF MY HEART THAT THIS FAINTING LADY IS YOUR REAL BRIDE I SAY REAL BECAUSE SHE IS THE ONE WHOM YOUR FATHER AND I HAVE CHOSEN FOR YOU AND THE PORTRAIT WAS A PRETENCE +5105-28241-0011-1453: ALL THE IMAGES OF HIS PAST LIFE FLOATED UPON HIS MEMORY HIS THOUGHTS SPED AWAY TO HIS NATIVE FRANCE ONLY TO RETURN AGAIN TO WONDER WHETHER THE DEPTHS OF OCEAN WOULD REVEAL ANY TRACES OF THE ALGERIAN METROPOLIS +8224-274384-0008-2306: THE GOOD NATURED AUDIENCE IN PITY TO FALLEN MAJESTY SHOWED FOR ONCE GREATER DEFERENCE TO THE KING THAN TO THE MINISTER AND SUNG THE PSALM WHICH THE FORMER HAD CALLED FOR +5683-32866-0009-1633: I DON'T KNOW AND CAN'T SAY HOW YOU FINE GENTLEMEN DEFINE WICKEDNESS ONLY AS AN OBSCURE FEMALE I SPEAK ACCORDING TO MY LIGHTS AND HE IS GENERALLY THOUGHT THE WICKEDEST MAN IN THIS COUNTY +3729-6852-0020-1047: THEN TURNING TOWARDS ME HE SAYS THAT I LOOK LIKE A FOREIGNER AND WHEN I SAY THAT I AM AN ITALIAN HE BEGINS TO SPEAK TO ME OF THE COURT (OF->*) THE CITY OF THE THEATRES AND AT LAST HE OFFERS TO ACCOMPANY ME EVERYWHERE +237-134493-0003-643: FROM THE GRAVEYARD GATE ONE CAN COUNT A DOZEN (GAYLY->GAILY) PAINTED FARMHOUSES THE GILDED WEATHER VANES ON THE BIG RED BARNS WINK AT EACH OTHER ACROSS THE GREEN AND BROWN AND YELLOW FIELDS +3575-170457-0017-987: THOUGH I MAY BE BUT AN UNGRACIOUS ADVISER YOU WILL ALLOW ME THEREFORE TO SUBSCRIBE MYSELF WITH THE BEST WISHES FOR YOUR HAPPINESS HERE AND HEREAFTER YOUR TRUE FRIEND ROBERT (SOUTHEY->SELVEY) +1221-135767-0021-208: PEARL ACCORDINGLY RAN TO THE BOW WINDOW AT THE FURTHER END OF THE HALL AND LOOKED ALONG THE VISTA OF A GARDEN WALK CARPETED WITH CLOSELY SHAVEN GRASS AND BORDERED WITH SOME RUDE AND IMMATURE ATTEMPT AT SHRUBBERY +7021-79730-0007-2036: IF YOU SHOULD NOT BE A GOOD GIRL BUT SHOULD SHOW SIGNS OF MAKING US ANY TROUBLE I SHALL HAVE TO SEND YOU OUT SOMEWHERE TO THE BACK PART OF THE HOUSE UNTIL WE ARE GONE +5105-28240-0021-1438: BEFORE STARTING IT WAS INDISPENSABLE THAT THE ENGINE OF THE DOBRYNA SHOULD BE REPAIRED TO SAIL UNDER CANVAS ONLY WOULD IN CONTRARY WINDS AND ROUGH SEAS BE BOTH TEDIOUS AND DIFFICULT +1320-122617-0036-328: GOD BLESS YOU FRIEND I DO BELIEVE YOUR SCENT IS NOT GREATLY WRONG WHEN THE MATTER IS DULY CONSIDERED AND KEEPING ETERNITY BEFORE THE EYES THOUGH MUCH DEPENDS ON THE NATURAL GIFTS AND THE FORCE OF TEMPTATION +5683-32879-0006-1661: THERE WAS SOMETHING OF SWEETNESS AND FONDNESS IN HER TONES AND MANNER WHICH WAS NEW TO RACHEL AND COMFORTING AND SHE RETURNED THE GREETING AS KINDLY AND FELT MORE LIKE HER FORMER SELF +2961-960-0009-883: THE DIALOGUE IS PRIMARILY CONCERNED WITH THE ANIMAL CREATION INCLUDING UNDER THIS TERM THE HEAVENLY BODIES AND WITH MAN ONLY AS ONE AMONG THE ANIMALS +7021-79730-0008-2037: BUT THIS LAST SUPPOSITION IS ALMOST ALWAYS UNNECESSARY FOR IF MARY HAS BEEN HABITUALLY MANAGED ON THIS PRINCIPLE SHE WILL NOT MAKE ANY TROUBLE +1089-134686-0012-12: HER EYES SEEMED TO REGARD HIM WITH MILD PITY HER HOLINESS A STRANGE LIGHT GLOWING FAINTLY UPON HER FRAIL FLESH DID NOT HUMILIATE THE SINNER WHO APPROACHED HER +8224-274381-0013-2293: FROM THE SAME MEN NEW REGIMENTS AND NEW COMPANIES WERE FORMED DIFFERENT OFFICERS APPOINTED AND THE WHOLE MILITARY FORCE PUT INTO SUCH HANDS AS THE INDEPENDENTS COULD RELY ON +1089-134691-0021-59: THEIR DIVING STONE POISED ON ITS RUDE SUPPORTS AND ROCKING UNDER THEIR PLUNGES AND THE ROUGH HEWN STONES OF THE SLOPING BREAKWATER OVER WHICH THEY SCRAMBLED IN THEIR (HORSEPLAY->HORSE PLAY) GLEAMED WITH COLD WET LUSTRE +237-126133-0000-614: HERE SHE WOULD STAY COMFORTED AND SOOTHED AMONG THE LOVELY PLANTS AND RICH EXOTICS REJOICING THE HEART OF OLD TURNER THE GARDENER WHO SINCE POLLY'S FIRST RAPTUROUS ENTRANCE HAD TAKEN HER INTO HIS GOOD GRACES FOR ALL TIME +4077-13754-0013-1109: BEFORE THIS TRAVESTY ON THE ADMINISTRATION OF LAW COULD BE BROUGHT BEFORE THE COURT OF LAST RESORT AND THERE (MEET->MET) WITH THE REVERSAL AND REBUKE IT DESERVED MEN WERE IMPRISONED UNDER (SENTENCES->SENTENCE) OF MANY YEARS DURATION +1221-135767-0007-194: BUT IN TRUTH PEARL WAS THE ONE AS WELL AS THE OTHER AND ONLY IN CONSEQUENCE OF THAT IDENTITY HAD HESTER CONTRIVED SO PERFECTLY TO REPRESENT THE SCARLET LETTER IN HER APPEARANCE +8455-210777-0027-2383: WHEN THIS CAPTAIN SHOULD HAVE TAKEN HIMSELF AND HIS VESSEL BACK TO ENGLAND I WOULD RETIRE TO A SMALL FARM WHICH I POSSESSED AT THE (FARTHEST->FURTHEST) SIDE OF THE ISLAND AND THERE IN SECLUSION WOULD I END MY DAYS +908-157963-0000-2563: TO FADE AWAY LIKE MORNING BEAUTY FROM HER MORTAL DAY DOWN BY THE RIVER OF (ADONA->ADONNA) HER SOFT VOICE IS HEARD AND THUS HER GENTLE LAMENTATION FALLS LIKE MORNING DEW +3570-5694-0009-929: WITH MANY QUALIFICATIONS WITH MORE QUALIFICATIONS AS THE PATRIARCHAL TRADITION HAS GRADUALLY WEAKENED THE GENERAL RULE IS FELT TO BE RIGHT AND BINDING THAT WOMEN SHOULD CONSUME ONLY FOR THE BENEFIT OF THEIR MASTERS +7021-79730-0009-2038: IT IS INDEED TRUE THAT THE IMPORTANCE OF TACT AND SKILL IN THE TRAINING OF THE YOUNG AND OF CULTIVATING THEIR REASON AND SECURING THEIR AFFECTION (CAN NOT->CANNOT) BE OVERRATED +5639-40744-0040-1604: THIS WAS DONE FOR THE EVENT TOOK PLACE AT A TIME WHEN THE CONSENT OF THE PARTIES WAS SUFFICIENT FOR THE CELEBRATION OF (A->THE) MARRIAGE WITHOUT ANY OF THE PRELIMINARY FORMALITIES WHICH ARE NOW SO PROPERLY REQUIRED +3729-6852-0007-1034: TWO YEARS BEFORE HER DEATH I SAW HER PERFORM THE CHARACTER OF (MARIANNE->MARY ANNE) IN THE COMEDY OF (MARIVAUX->MARAVAUX) AND IN SPITE OF HER AGE AND DECLINING HEALTH THE ILLUSION WAS COMPLETE +4507-16021-0023-1244: THE SUGAR MANUFACTURER WHO SAYS LOAF CLARIFIED LUMPS BASTARD COMMON BURNT THIS HONEST MANUFACTURER TALKS SLANG +8555-284447-0017-2518: WHEN THIS HAD BEEN ACCOMPLISHED THE BOOLOOROO LEANED OVER TO TRY TO DISCOVER WHY THE FRAME ROLLED AWAY SEEMINGLY OF ITS OWN ACCORD AND HE WAS THE MORE PUZZLED BECAUSE IT HAD NEVER DONE SUCH A THING BEFORE +7729-102255-0016-2249: THE GOVERNOR ON HIS PART BECOMING DOUBTFUL OF THE LEGALITY OF EMPLOYING MISSOURI MILITIA TO ENFORCE KANSAS LAWS WAS ALSO EAGER TO SECURE THE HELP OF FEDERAL TROOPS +2830-3980-0070-867: BUT WE ARE CARELESS WE MAKE LIGHT OF SIN +61-70970-0023-1767: BE NOT SO FOOLISH FRIEND SAID FITZOOTH CROSSLY +4992-41806-0004-1392: BURN FIRE BURN FLICKER FLICKER FLAME +5142-36586-0000-1557: IT IS MANIFEST THAT MAN IS NOW SUBJECT TO MUCH VARIABILITY +908-157963-0029-2592: WHY A TONGUE (IMPRESS'D->IMPRESSED) WITH HONEY FROM EVERY WIND +61-70968-0056-1737: THE WINE DID CERTAINLY BRING BACK THE COLOR TO THE SQUIRE'S CHEEKS +1995-1837-0011-492: HE STARTED AT THE THOUGHT HE HURRIED FORTH SADLY +1221-135767-0020-207: IN TRUTH SHE SEEMED ABSOLUTELY HIDDEN BEHIND IT +1580-141083-0023-357: ONE COULD HARDLY HOPE FOR ANY UPON SO DRY A DAY +5142-33396-0020-1482: A HOUSE SMELLS OF SMOKE A SHIP SMELLS OF FROLIC +4446-2273-0029-1167: IT'S REALLY TOO WARM IN THIS ROOM TO SING DON'T YOU FEEL IT +5683-32866-0008-1632: BRACTON'S A VERY GOOD FELLOW I CAN ASSURE YOU +4446-2275-0022-1197: BUT WHY DIDN'T YOU TELL ME WHEN YOU WERE HERE IN THE SUMMER +8463-294825-0015-2457: (GRAM->GRAHAM) ROUGHLY (ONE->WON) TWENTY EIGHTH OF AN OUNCE +6930-81414-0021-2022: A TERRIBLE THOUGHT FLASHED INTO MY MIND +6930-75918-0011-1962: I AM CONVINCED OF WHAT I SAY SAID THE COUNT +4446-2271-0024-1137: I SHOULDN'T WONDER IF SHE COULD LAUGH ABOUT IT WITH ME NOW +1089-134691-0019-57: A VOICE FROM BEYOND THE WORLD WAS CALLING +6829-68769-0027-1887: WHOSE NAME DID YOU SIGN TO THE CHECK ASKED KENNETH +3575-170457-0032-1002: COME COME I AM GETTING REALLY TIRED OF YOUR ABSENCE +8455-210777-0026-2382: AND THE DEATH OF WHICH I DREAMT COULD NOT ALAS +1580-141084-0044-432: GLOVES SAID THE YOUNG MAN +1995-1826-0008-447: SOME OTHERS TOO BIG COTTON COUNTY +5142-33396-0050-1512: MAY YOU DRINK HEART'S EASE FROM IT FOR MANY YEARS +8555-292519-0015-2562: HE HAD BROKEN INTO HER COURTYARD +2094-142345-0026-537: SHE'S GOING TO PUT THE IRONING THINGS AWAY +5683-32866-0023-1647: ALL THE FURNITURE BELONGED TO OTHER TIMES +1089-134686-0027-27: HE CLASPED HIS HANDS ON THE DESK AND SAID +4992-41797-0012-1377: SHE IS WILD TO KNOW HOW TO DO THINGS +8463-294825-0000-2442: IT'S ALMOST BEYOND CONJECTURE +2830-3980-0040-837: THE GREETING OF THE APOSTLE IS REFRESHING +260-123288-0028-762: WE SUFFER STIFLING PAINS +8555-284449-0005-2531: WHEN HE FINISHED SHE SAID CHEERFULLY +8555-292519-0014-2561: SHE WAS ALONE THAT NIGHT +4970-29093-0021-1302: I WAS AFRAID IT WAS NEARER HOME +8463-294825-0014-2456: FATHOM SIX FEET +4446-2273-0014-1152: THERE ARE (*->A) FEW CHANGES IN THE OLD QUARTER +5105-28241-0010-1452: OCEAN REIGNED SUPREME +5105-28240-0020-1437: THE COUNT SHOOK HIS HEAD +121-127105-0014-148: YOU ARE ACUTE +7176-92135-0030-2217: (ENTER->ENTERED) LORD ARTHUR (FLUFFINOSE->FLOPENNO'S) +5142-33396-0005-1467: BUT I STAYED THAT SPRING AND BUILT ME A BOAT +5639-40744-0008-1572: WHO TOUCHES ME AM I IN BED +6829-68769-0042-1902: OH WELL SIR WHAT ABOUT HIM +5142-36377-0011-1542: MISTER (JAGO->IAGO) IS AN AMERICAN PHILIP +61-70970-0008-1752: NOW TO BED BOY +3575-170457-0002-972: WHY ARE WE TO BE DIVIDED +4507-16021-0006-1227: SLANG IS ODIOUS +8555-284447-0016-2517: FINE GLORIOUS +4446-2275-0037-1212: OH BARTLEY WHAT AM I TO DO +8463-294828-0025-2487: BUT WE'RE GOING JUST THE SAME +8463-294828-0024-2486: WE DON'T KNOW WHERE IT WILL TAKE US +2094-142345-0041-552: DIRECTION +2300-131720-0041-613: WE HAD METERS IN WHICH THERE WERE TWO BOTTLES OF LIQUID +1580-141083-0040-374: ONE HARDLY LIKES TO THROW SUSPICION WHERE THERE ARE NO PROOFS +4992-41806-0005-1393: NEXT CAME OLIVE'S TURN TO HELP IN THE CEREMONIES +1580-141083-0039-373: AND THEY ARE ALL IN FOR THIS EXAMINATION YES +1995-1836-0012-478: BUT WE'RE NOT (ER->A) EXACTLY (WELCOMED->WELCOME) +1580-141084-0045-433: SUDDENLY HE HEARD HIM AT THE VERY DOOR THERE WAS NO POSSIBLE ESCAPE +7127-75946-0010-2098: YOUR MAJESTY'S PLAN THEN IN THIS AFFAIR IS +8463-287645-0001-2428: IT IS HARDLY NECESSARY TO SAY MORE OF THEM HERE +8455-210777-0012-2368: MISSUS NEVERBEND YOU MUST INDEED BE PROUD OF YOUR SON +5142-33396-0021-1483: UP AND DOWN THE WATER WE WENT TO GET MUCH WEALTH AND MUCH FROLIC +1580-141084-0015-403: HE IMPRESSED ME AS BEING A PERFECTLY HONEST MAN +61-70970-0009-1753: TIS LATE AND I GO MYSELF WITHIN A SHORT SPACE +1995-1826-0025-464: (SOME TIME->SOMETIME) YOU'LL TELL ME PLEASE WON'T YOU +6829-68769-0028-1888: HE IS SUPPOSED TO SIGN ALL THE CHECKS OF THE CONCERN +7729-102255-0000-2233: THE BOGUS LEGISLATURE NUMBERED THIRTY SIX MEMBERS +1284-1180-0027-239: YET THAT TASK WAS NOT SO EASY AS YOU MAY SUPPOSE +1580-141084-0001-389: HE WAS PACING SWIFTLY UP AND DOWN HIS ROOM +1188-133604-0040-104: THE (CRAMPNESS->CRAMPEDNESS) AND THE POVERTY ARE ALL INTENDED +1995-1837-0013-494: THEN HE LOOKED DOWN THE LAGOON WAS DRY +4446-2271-0010-1123: HE'S ANOTHER WHO'S AWFULLY KEEN ABOUT HER LET ME INTRODUCE YOU +260-123286-0001-703: THE HORIZON SEEMS EXTREMELY DISTANT +3729-6852-0021-1048: I THANK HIM AND TAKE MY LEAVE +121-127105-0015-149: HE QUITTED THE FIRE AND DROPPED BACK INTO HIS CHAIR +5683-32879-0008-1663: YOU HAVE BEEN SO ILL MY POOR RACHEL +3575-170457-0018-988: SIR MARCH SIXTEENTH +4077-13751-0019-1093: WHO BEGAN THE QUARREL WAS IT THE MORMONS +4446-2273-0030-1168: ALEXANDER WENT OVER AND OPENED THE WINDOW FOR HER +61-70968-0042-1723: MONTFICHET CALLED OUT FOR ROBIN TO GIVE HIM AN ARM +237-134500-0030-689: I WANT TO BE DOING SOMETHING ON MY OWN ACCOUNT +8463-294828-0026-2488: WE HAVE A COMMANDER WHO'S GAME FOR ANYTHING +6829-68769-0013-1873: MAY WE SEE GATES AT ONCE ASKED KENNETH +5142-36377-0012-1543: MAKE ACQUAINTANCE WITH (MISTER JAGO->MISS CHIAGO) SIT TOGETHER +4507-16021-0007-1228: SLANG MAKES ONE SHUDDER +61-70968-0012-1693: CRIES OF A NOTTINGHAM A NOTTINGHAM +672-122797-0028-1813: THIS EVENING THEY ALL SAID +6829-68769-0043-1903: AND HE DESERVES A TERM IN STATE'S PRISON +6829-68771-0034-1948: I WISH I KNEW MYSELF SHE CRIED FIERCELY +2094-142345-0042-553: I (HANNA->HAD A) COMMON PATIENCE WITH YOU +1995-1826-0010-449: AT ANY RATE I SAY GO +5639-40744-0009-1573: MOTHER DEAR FATHER DO YOU HEAR ME +1580-141084-0046-434: HAVE I TOLD THE TRUTH MISTER (GILCHRIST->GILCRIST) +1995-1837-0028-509: THE CHAIR WAS EMPTY BUT HE KNEW +1089-134686-0014-14: HE TRIED TO THINK HOW IT COULD BE +5142-33396-0066-1528: THEN HE TURNED TO ME AGAIN FROWNING +5142-36586-0001-1558: SO IT IS WITH THE LOWER ANIMALS +121-127105-0030-164: I DON'T ANTICIPATE +260-123440-0000-763: AND HOW ODD THE DIRECTIONS WILL LOOK +1580-141084-0030-418: JUST CLOSE THE DOOR SAID HOLMES +237-134500-0029-688: I DON'T WANT TO STAND AROUND AND LOOK ON +4970-29095-0013-1318: I WILL PRACTICE IT +6930-75918-0012-1963: IT IS ANNOYANCE THEN +260-123288-0014-748: HANS STIRS NOT +2830-3980-0026-823: (VERSE->FIRST) TWO +3570-5696-0000-959: UNDER THE SIMPLE TEST OF EFFECTIVENESS FOR ADVERTISING WE SHOULD EXPECT TO FIND LEISURE AND THE CONSPICUOUS CONSUMPTION OF GOODS DIVIDING THE FIELD OF PECUNIARY EMULATION PRETTY EVENLY BETWEEN THEM AT THE OUTSET +2300-131720-0012-584: EDISON WAS INTOLERANT OF SHAM AND SHODDY AND NOTHING WOULD SATISFY HIM THAT COULD NOT STAND CROSS EXAMINATION BY MICROSCOPE TEST TUBE AND GALVANOMETER +8230-279154-0027-2339: A GRAMOPHONE BY THE HELP OF SUITABLE RECORDS MIGHT RELATE TO US THE INCIDENTS OF ITS PAST AND PEOPLE ARE NOT SO DIFFERENT FROM GRAMOPHONES AS THEY LIKE TO BELIEVE +7021-79740-0000-2039: TO SUCH PERSONS THESE INDIRECT MODES OF TRAINING CHILDREN IN HABITS OF SUBORDINATION TO THEIR WILL OR RATHER OF YIELDING TO THEIR INFLUENCE ARE SPECIALLY USEFUL +237-134500-0015-674: THAT SUMMER THE RAINS HAD BEEN SO MANY AND OPPORTUNE THAT IT WAS ALMOST MORE THAN (SHABATA->CHABATA) AND HIS MAN COULD DO TO KEEP UP WITH THE CORN THE ORCHARD WAS A NEGLECTED WILDERNESS +8455-210777-0057-2413: BUT IN THEIR SELECTION OF A CONSTITUTION THE (BRITANNULISTS->BRITON ULISTS) HAVE UNFORTUNATELY ALLOWED THEMSELVES BUT ONE (DELIBERATIVE->DELIBERATE) ASSEMBLY AND HENCE HAVE SPRUNG THEIR PRESENT DIFFICULTIES +1221-135766-0008-179: MINDFUL HOWEVER OF HER OWN ERRORS AND MISFORTUNES SHE EARLY SOUGHT TO IMPOSE A TENDER BUT STRICT CONTROL OVER THE INFANT IMMORTALITY THAT WAS COMMITTED TO HER CHARGE +4992-23283-0005-1349: NOT THAT I KNOW OF NOT ONE MORE THAT I KNOW OF HE REPLIED WITH ASTONISHMENT AT WHAT SHE HAD INSINUATED AND YET WITH A PERFECT ASSURANCE THAT SHE WAS IN THE WRONG +8463-294825-0002-2444: FIRST AS A PARIS STOCKBROKER LATER AS A CELEBRATED AUTHOR AND YACHTSMAN HE WENT ON FREQUENT VOYAGES TO BRITAIN AMERICA THE MEDITERRANEAN +4992-23283-0020-1364: I HAVE NEVER YET HOWEVER BEEN VANQUISHED BY THEM AND EVEN UPON THIS OCCASION MY REASON SHALL COMBAT THEM TO THE LAST AND MY REASON SHALL FAIL ME BEFORE I DO WRONG +4992-41806-0006-1394: RALPH THURSTON HAD FOUND A LINE OF LATIN FOR THEM IN HIS BELOVED (HORACE TIBI SPLENDET->HORRACE TIBBY SPLENDID) FOCUS FOR YOU THE HEARTH FIRE SHINES +4077-13751-0020-1094: AS A SAMPLE OF THE PRESS COMMENTS AGAINST THE BRUTALITY OF THE MISSOURIANS I QUOTE A PARAGRAPH FROM THE QUINCY ARGUS MARCH SIXTEENTH EIGHTEEN THIRTY NINE +260-123286-0002-704: ALL MY DANGER AND SUFFERINGS WERE NEEDED TO STRIKE A SPARK OF HUMAN FEELING OUT OF HIM BUT NOW THAT I AM WELL HIS NATURE HAS RESUMED ITS SWAY +121-127105-0000-134: IT WAS THIS OBSERVATION THAT DREW FROM DOUGLAS NOT IMMEDIATELY BUT LATER IN THE EVENING A REPLY THAT HAD THE INTERESTING CONSEQUENCE TO WHICH I CALL ATTENTION +121-121726-0010-119: (HOUSECLEANING->HOUSE CLEANING) A DOMESTIC UPHEAVAL THAT MAKES IT EASY FOR THE GOVERNMENT TO ENLIST ALL THE SOLDIERS IT NEEDS +2961-961-0017-914: OBSERVE AGAIN WHAT CARE THE LAW TOOK IN THE PURSUIT OF WISDOM SEARCHING OUT THE DEEP THINGS OF THE WORLD AND APPLYING THEM TO THE USE OF (MAN->MEN) +4446-2273-0016-1154: HER HAIR IS STILL LIKE FLAX AND HER BLUE EYES ARE JUST LIKE A BABY'S AND SHE HAS THE SAME THREE FRECKLES ON HER LITTLE NOSE AND TALKS ABOUT GOING BACK TO HER (BAINS DE->BAND O) MER +8463-294828-0012-2474: PLEASE FORGIVE ME FOR THIS UNDERHANDED WAY OF ADMITTING (*->THAT) I HAD TURNED FORTY +6930-81414-0023-2024: PERCHANCE TOO (KAFFAR'S->KAFFIRS) DEATH MIGHT SERVE HIM IN GOOD STEAD +7021-79759-0000-2054: NATURE OF THE EFFECT PRODUCED BY EARLY IMPRESSIONS +5142-33396-0022-1484: WHAT OF THE FARM OLAF NOT YET I ANSWERED VIKING IS BETTER FOR SUMMER +6829-68771-0020-1934: THEN SHE GAVE A LITTLE LAUGH AND REPLIED NO MISS BETH (I'M->I AM) ELIZABETH PARSONS +61-70968-0013-1694: BEFORE THEM FLED THE STROLLER AND HIS THREE SONS CAPLESS AND TERRIFIED +8555-284447-0003-2504: BUT CAP'N BILL MADE NO SUCH ATTEMPT KNOWING IT WOULD BE USELESS +6829-68771-0035-1949: WILL YOU LEAVE ME ALONE IN MY OWN ROOM OR MUST I GO AWAY TO ESCAPE YOU +61-70970-0040-1784: THEY REGAINED THEIR APARTMENT APPARENTLY WITHOUT DISTURBING THE HOUSEHOLD OF GAMEWELL +5639-40744-0010-1574: IT IS THE ONLY AMENDS I ASK OF YOU FOR THE WRONG YOU HAVE DONE ME +8224-274384-0011-2309: THE ENGLISH IT IS EVIDENT HAD THEY NOT BEEN PREVIOUSLY ASSURED OF RECEIVING THE KING WOULD NEVER HAVE PARTED WITH SO CONSIDERABLE A SUM AND WHILE THEY WEAKENED THEMSELVES BY THE SAME MEASURE HAVE STRENGTHENED A PEOPLE WITH WHOM THEY MUST AFTERWARDS HAVE SO MATERIAL AN INTEREST TO DISCUSS +908-31957-0015-2609: THAT WAS THE CHRISM OF LOVE WHICH LOVE'S OWN CROWN WITH SANCTIFYING SWEETNESS DID (PRECEDE->PROCEED) THE THIRD UPON MY LIPS WAS FOLDED DOWN IN PERFECT PURPLE STATE SINCE WHEN INDEED I HAVE BEEN PROUD AND SAID MY LOVE MY OWN +8463-287645-0002-2429: FROM THE MANNER IN WHICH HE EXPRESSED HIMSELF WITH REGARD TO ROBERT (HOLLAN->HOLLAND) NO MAN IN THE WHOLE RANGE OF HIS RECOLLECTIONS WILL BE LONGER REMEMBERED THAN HE HIS (ENTHRALMENT->ENTHRALLMENT) WHILE UNDER (HOLLAN->HOLLAND) WILL HARDLY EVER BE FORGOTTEN +7127-75946-0026-2114: WHEN THE MUSIC BY ITS BURSTS OF MELODY CARRIED AWAY THESE ILLUSTRIOUS DANCERS WHEN THE SIMPLE UNTUTORED PANTOMIME OF THAT PERIOD ONLY THE MORE NATURAL ON ACCOUNT OF THE VERY INDIFFERENT ACTING OF THE AUGUST ACTORS HAD REACHED ITS CULMINATING POINT OF TRIUMPH THE (THEATER->THEATRE) SHOOK WITH TUMULTUOUS APPLAUSE +7729-102255-0046-2279: IN THE EARLY MORNING OF THE NEXT DAY MAY TWENTY NINTH A COMPANY OF DRAGOONS WITH ONE EMPTY SADDLE CAME DOWN FROM THE FORT AND WHILE THE PRO SLAVERY MEN STILL SLEPT THE PRISONER AND HIS ESCORT WERE ON THEIR WAY ACROSS THE PRAIRIES TO (LECOMPTON->LA COMPTON) IN THE CHARGE OF OFFICERS OF THE UNITED STATES ARMY +4077-13751-0005-1079: NEVERTHELESS THE MUSTARD SEED AMONG THE SMALLEST OF ALL SEEDS HAS ATTAINED THE PROPORTIONS OF A TREE AND THE BIRDS OF THE AIR ARE NESTING IN ITS BRANCHES THE ACORN IS NOW AN OAK OFFERING PROTECTION AND THE SWEETS OF SATISFACTION TO EVERY EARNEST PILGRIM JOURNEYING ITS WAY FOR TRUTH +8230-279154-0042-2354: THUS IF I RECOGNIZE A THING THE OCCASION OF ITS PREVIOUS EXISTENCE IN VIRTUE OF WHICH I RECOGNIZE IT FORMS PART OF MY EXPERIENCE BY DEFINITION RECOGNITION WILL BE ONE OF THE MARKS BY WHICH MY EXPERIENCE IS SINGLED OUT FROM THE REST OF THE WORLD +8224-274381-0014-2294: BESIDES MEMBERS OF PARLIAMENT WHO WERE EXCLUDED MANY OFFICERS UNWILLING TO SERVE UNDER THE NEW GENERALS THREW UP THEIR COMMISSIONS AND (UNWARILY->AND WARILY) FACILITATED THE PROJECT OF PUTTING THE ARMY ENTIRELY INTO THE HANDS OF THAT FACTION +8463-294825-0001-2443: THIS REALITY BEGINS TO EXPLAIN THE DARK POWER AND (OTHERWORLDLY->OTHER WORLDDLY) FASCINATION OF TWENTY THOUSAND LEAGUES UNDER THE SEAS +8455-210777-0028-2384: JACK WOULD BECOME EVA'S HAPPY HUSBAND AND WOULD REMAIN AMIDST THE HURRIED DUTIES OF THE EAGER WORLD +908-31957-0014-2608: A RING OF AMETHYST I COULD NOT WEAR HERE PLAINER TO MY SIGHT THAN THAT FIRST KISS +7176-92135-0016-2203: THIS WOULD BE AN EASY WAY OF DOING IT BUT IT WOULD NOT BE THE BEST WAY FOR THE REASON THAT IT IS TOO EASY TO CALL ATTENTION TO ITSELF +1188-133604-0025-89: YOU KNOW I HAVE JUST BEEN TELLING YOU HOW THIS SCHOOL OF MATERIALISM AND CLAY INVOLVED ITSELF AT LAST IN CLOUD AND FIRE +61-70970-0025-1769: THEY WERE UPON THE VERGE OF AN OPEN TRAP IN THE FAR CORNER OF THE HUT AND STUTELEY HAD TRIPPED OVER THE EDGE OF THE REVERSED (FLAP->FLAT) MOUTH OF THIS PIT +8455-210777-0013-2369: JACK HAD BEEN STANDING IN THE FAR CORNER OF THE ROOM TALKING TO EVA AND WAS NOW REDUCED TO SILENCE BY HIS PRAISES +2094-142345-0043-554: BY THIS TIME THE TWO GENTLEMEN HAD REACHED THE PALINGS AND HAD GOT DOWN FROM THEIR HORSES IT WAS PLAIN THEY MEANT TO COME IN +7729-102255-0030-2263: HE SUMMONED HALF A DOZEN CITIZENS TO JOIN HIS POSSE WHO FOLLOWED OBEYED AND ASSISTED HIM +4992-41797-0014-1379: WHEN SHE COULD NOT MAKE A RABBIT OR A BIRD LOOK REAL ON PAPER SHE SEARCHED IN HER FATHER'S BOOKS FOR PICTURES OF ITS BONES +1320-122617-0037-329: THE DELAWARE DOG HE SAID LEANING FORWARD AND PEERING THROUGH THE DIM LIGHT TO CATCH THE EXPRESSION OF THE OTHER'S FEATURES IS HE AFRAID +7176-92135-0017-2204: IN THE OLD BADLY MADE PLAY IT WAS FREQUENTLY NECESSARY FOR ONE OF THE CHARACTERS TO TAKE THE AUDIENCE INTO HIS CONFIDENCE +8455-210777-0058-2414: IT IS FOUNDED ON THE ACKNOWLEDGED WEAKNESS OF THOSE WHO SURVIVE THAT PERIOD OF LIFE AT WHICH MEN CEASE TO WORK +5683-32865-0013-1619: DO YOU KNOW LAKE OH I REALLY CAN'T TELL BUT HE'LL SOON TIRE OF COUNTRY LIFE +2094-142345-0028-539: NO NO NO TOTTY (UD->HAD) GET HER FEET WET SAID MISSUS POYSER CARRYING AWAY HER IRON +237-134493-0004-644: THE AIR AND THE EARTH ARE CURIOUSLY MATED AND INTERMINGLED AS IF THE ONE WERE THE BREATH OF THE OTHER +7729-102255-0045-2278: CAPTAIN MARTIN SAID I SHALL GIVE YOU A PISTOL TO HELP PROTECT YOURSELF IF WORSE COMES TO WORST +5105-28241-0012-1454: IS IT NOT IMPOSSIBLE HE MURMURED ALOUD THAT ANY CITY SHOULD DISAPPEAR SO COMPLETELY +7729-102255-0031-2264: HE CONTINUED HIS PRETENDED SEARCH AND TO GIVE COLOR TO HIS ERRAND MADE (TWO ARRESTS->TO ARREST) +61-70968-0043-1724: FRIENDS SAID MONTFICHET FAINTLY TO THE WRESTLERS BEAR US ESCORT SO FAR AS THE SHERIFF'S HOUSE +5142-36377-0013-1544: THEY POINTEDLY DREW BACK FROM JOHN JAGO AS HE APPROACHED THE EMPTY CHAIR NEXT TO ME AND MOVED ROUND TO THE OPPOSITE SIDE OF THE TABLE +7127-75947-0026-2144: WELL SAID MADEMOISELLE DE (TONNAY CHARENTE->TOURNACHEANT) I ALSO THINK A GOOD DEAL BUT I TAKE CARE +1580-141084-0031-419: WE WANT TO KNOW MISTER (GILCHRIST->GILCREST) HOW YOU AN (HONOURABLE->HONORABLE) MAN EVER CAME TO COMMIT SUCH AN ACTION AS THAT OF YESTERDAY +4507-16021-0052-1273: THE REAL HUMAN DIVISION IS THIS THE LUMINOUS AND THE SHADY +8230-279154-0011-2323: SOME IMAGES LIKE SOME SENSATIONS FEEL VERY FAMILIAR WHILE OTHERS FEEL STRANGE +5142-33396-0006-1468: I MADE HER FOR ONLY TWENTY OARS BECAUSE I THOUGHT FEW MEN WOULD FOLLOW ME FOR I WAS YOUNG FIFTEEN YEARS OLD +4970-29093-0022-1303: HE KNEW HIS UNCLE WOULD BE GLAD TO HEAR THAT HE HAD AT LAST TURNED HIS THOUGHTS TO A PRACTICAL MATTER +4507-16021-0022-1243: THERE IS THE SLANG OF THE AFFECTED LADY AS WELL AS OF THE (PRECIEUSES->PRECIUSEES) +6829-68771-0005-1919: THE ONLY THING NECESSARY WAS TO FIX SETH REYNOLDS AND THIS HOPKINS ARRANGED PERSONALLY +3575-170457-0019-989: I (HAD->HAVE) NOT VENTURED TO HOPE FOR SUCH A REPLY SO CONSIDERATE IN ITS TONE SO NOBLE IN ITS SPIRIT +8555-284449-0008-2534: THEN SHE GAVE ROSALIE BACK HER MAGIC RING THANKING THE KIND WITCH FOR ALL SHE HAD DONE FOR THEM +237-134500-0000-659: FRANK READ ENGLISH SLOWLY AND THE MORE HE READ ABOUT THIS DIVORCE CASE THE ANGRIER HE GREW +1188-133604-0010-74: BUT IN THIS VIGNETTE COPIED FROM TURNER YOU HAVE THE TWO PRINCIPLES BROUGHT OUT PERFECTLY +6930-81414-0008-2009: IT DID NOT BECKON OR INDEED MOVE AT ALL IT WAS AS STILL AS THE HAND OF DEATH +8463-294828-0027-2489: I LEFT INSTRUCTIONS FOR SHIPPING MY CONTAINERS OF STUFFED ANIMALS AND DRIED PLANTS TO PARIS FRANCE +1580-141084-0016-404: MY FRIEND DID NOT APPEAR TO BE DEPRESSED BY HIS FAILURE BUT SHRUGGED HIS SHOULDERS IN HALF HUMOROUS RESIGNATION +1089-134691-0006-44: THE PRIDE OF THAT DIM IMAGE BROUGHT BACK TO HIS MIND THE DIGNITY OF THE OFFICE HE HAD REFUSED +3729-6852-0037-1064: SHE INTRODUCED ME TO ALL HER GUESTS AND GAVE ME SOME PARTICULARS RESPECTING EVERY ONE OF THEM +5142-33396-0052-1514: HERE IS A RING FOR (SIF->SITH) THE FRIENDLY AND HERE IS A BRACELET (*->AND) A SWORD WOULD NOT BE ASHAMED TO HANG AT YOUR SIDE +7176-88083-0000-2159: ALL ABOUT HIM WAS A TUMULT OF BRIGHT AND BROKEN COLOR SCATTERED IN BROAD SPLASHES +5142-33396-0051-1513: AND WITH IT I LEAVE YOU A NAME (SIF->SITH) THE FRIENDLY I SHALL HOPE TO DRINK WITH YOU (SOMETIME->SOME TIME) IN VALHALLA +7127-75947-0025-2143: EXQUISITE SOFT TURF OF THE WOODS THE HAPPINESS WHICH YOUR FRIENDSHIP CONFERS UPON ME +5142-33396-0067-1529: BUT YOUNG SHARP TONGUE NOW THAT WE HAVE CAUGHT YOU WE WILL PUT YOU INTO A TRAP THAT YOU CANNOT GET OUT OF +8555-284447-0018-2519: AT ONCE THE GOAT GAVE A LEAP ESCAPED FROM THE SOLDIERS AND WITH BOWED HEAD RUSHED UPON THE BOOLOOROO +61-70968-0028-1709: THE HEAD (AND->IN) CHIEF OF THE RIOT THE NOTTINGHAM APPRENTICE WITH CLENCHED FISTS THREATENED MONTFICHET +4507-16021-0037-1258: THERE IT CLOTHES ITSELF IN WORD MASKS IN METAPHOR RAGS +1580-141083-0010-344: I GAVE HIM A LITTLE BRANDY AND LEFT HIM COLLAPSED IN A CHAIR WHILE I MADE A MOST CAREFUL EXAMINATION OF THE ROOM +1995-1836-0013-479: MARY TAYLOR HOWEVER RELATED THE TALE OF ZORA TO MISSUS (GREY'S->GRAY'S) PRIVATE EAR LATER +1284-1181-0009-254: SHE RAN TO HER HUSBAND'S SIDE AT ONCE AND HELPED HIM LIFT THE FOUR KETTLES FROM THE FIRE +5683-32879-0009-1664: ILL AND TROUBLED DEAR TROUBLED IN MIND AND MISERABLY NERVOUS +908-157963-0016-2579: I PASS AWAY YET I COMPLAIN AND NO ONE HEARS MY VOICE +672-122797-0044-1829: AND HE LEANED AGAINST THE WALL LOST IN REVERIE +6829-68769-0014-1874: THEY FOLLOWED THE JAILER ALONG A SUCCESSION OF PASSAGES +8230-279154-0012-2324: FAMILIARITY IS A FEELING CAPABLE OF DEGREES +7127-75947-0011-2129: REMAIN I IMPLORE YOU THE EVENING IS MOST LOVELY +4507-16021-0008-1229: WHO DENIES THAT OF COURSE IT DOES +4970-29093-0008-1289: HE WANTED TO BEGIN AT THE TOP OF THE LADDER +5142-33396-0037-1499: (HAKON->HAWKIN) THERE SHALL BE YOUR CONSTANT COMPANION FRIEND FARMER +672-122797-0059-1844: ONLY THAT ONE ANSWERED THE TREE +3575-170457-0034-1004: IN THIS MONOTONOUS LIFE OF MINE THAT WAS A PLEASANT EVENT +5683-32866-0025-1649: I DID NOT EVEN TAKE THE PRECAUTION OF SMOKING UP THE CHIMNEY +7729-102255-0001-2234: THIS WAS AT THE MARCH ELECTION EIGHTEEN FIFTY FIVE +2830-3980-0012-809: THE MOST THEY COULD CLAIM IS THAT THEY WERE SENT BY OTHERS +7176-92135-0002-2189: BUT EVEN THE UNSUCCESSFUL DRAMATIST HAS HIS MOMENTS +2830-3979-0010-794: BUT THE ESSENCE OF LUTHER'S LECTURES IS THERE +7176-92135-0032-2219: HOW YOU MAY BE WONDERING ARE YOU TO BEGIN YOUR MASTERPIECE +4970-29095-0014-1319: WHERE THEE AND THY FAMILY ARE KNOWN +6829-68769-0044-1904: IT HAS COST ME TWICE SIXTY DOLLARS (IN->AN) ANNOYANCE +7127-75946-0011-2099: YOU WILL TAKE THEM FROM MY PRIVATE TREASURE +8455-210777-0043-2399: BUT WHAT IS THE DELICATE MISSION I ASKED +3575-170457-0004-974: WE USED TO DISPUTE ABOUT POLITICS AND RELIGION +672-122797-0029-1814: HOW IT WILL SHINE THIS EVENING +61-70970-0010-1754: DISMISS YOUR SQUIRE ROBIN AND BID ME GOOD (E E N->EATIN) +260-123288-0000-734: THE ROARINGS BECOME LOST IN THE DISTANCE +2830-3980-0042-839: THE WORLD BRANDS THIS A PERNICIOUS DOCTRINE +6930-75918-0013-1964: IN THOSE VERY TERMS I EVEN ADDED MORE +908-157963-0001-2564: O LIFE OF THIS OUR SPRING +6930-76324-0007-1979: NOW WHAT HAVE YOU TO SAY CYNTHIA (SPRAGUE->SPROGU) +3575-170457-0049-1019: THIS DECISION WAS COMMUNICATED TO THE GIRLS +672-122797-0014-1799: WERE I BUT ALREADY ON THE CART +4446-2275-0024-1199: SHE PRESSED HIS HAND GENTLY IN GRATITUDE +5683-32879-0024-1679: YES RACHEL I DO LOVE YOU +8463-294825-0017-2459: (LITER->LEADER) ROUGHLY (ONE QUART->WON COURT) +5142-36586-0002-1559: THE VARIABILITY OF MULTIPLE PARTS +7176-88083-0015-2174: ALMOST INSTANTLY HE WAS FORCED TO THE TOP +2830-3980-0027-824: AND ALL THE BRETHREN WHICH ARE WITH ME +7021-85628-0024-2084: (ANDERS->ANDREW'S) FACE GREW RED +908-31957-0000-2594: ALL IS SAID WITHOUT A WORD +4446-2275-0009-1184: I GOT IN ABOUT TEN MINUTES AGO +6829-68769-0029-1889: IT'S A STOCK COMPANY (AND->IN) RICH +4446-2275-0039-1214: I MUST KNOW ABOUT YOU +6930-76324-0022-1994: THEN SHE SUDDENLY REMARKED +61-70968-0058-1739: WILL YOU FORGIVE ME NOW +8555-292519-0002-2549: VENICE +4446-2273-0031-1169: THERE JUST IN FRONT +260-123440-0001-764: POOR ALICE +237-126133-0015-629: YES ALL ALONE BY HIMSELF ASSERTED JASPER VEHEMENTLY AND WINKING FURIOUSLY TO THE OTHERS TO STOP THEIR LAUGHING HE DID NOW TRULY PHRONSIE +8555-284449-0007-2533: NOW THEN LET'S ENTER THE CITY (AN->AND) ENJOY THE (GRAND->GREAT) FEAST THAT'S BEING COOKED I'M NEARLY STARVED MYSELF FOR THIS (CONQUERIN KINGDOMS->CONQUERING KINGDOM'S) IS HARD WORK +8230-279154-0026-2338: THIS DISTINCTION IS VITAL TO THE UNDERSTANDING OF MEMORY BUT IT IS NOT SO EASY TO CARRY OUT IN PRACTICE AS IT IS TO DRAW IN THEORY +7021-85628-0023-2083: BUT WHEN HIS BIG BROTHER HEARD THAT HE HAD REFUSED TO GIVE HIS CAP FOR A KING'S GOLDEN CROWN HE SAID THAT ANDERS WAS A STUPID +7127-75947-0010-2128: WHEN SHE PERCEIVED THE YOUNG MAN SHE ROSE LIKE A WOMAN SURPRISED IN THE MIDST OF IDEAS SHE WAS DESIROUS OF CONCEALING FROM HERSELF +5639-40744-0024-1588: ONE DAY WHEN THE BOY WAS SENT BY HIS GRANDFATHER WITH A MESSAGE TO A RELATION HE PASSED ALONG A STREET IN WHICH THERE WAS A GREAT CONCOURSE OF HORSEMEN +5639-40744-0025-1589: THE BED SHE TOO WELL REMEMBERED WAS THERE AND ABOVE ALL THE CABINET ON WHICH HAD STOOD THE IMAGE SHE HAD TAKEN AWAY WAS STILL ON THE SAME SPOT +672-122797-0074-1859: HOWEVER THAT WAS OVER NOW THE TREE GONE THE STORY AT AN END +672-122797-0013-1798: I AM NOW TALL AND MY BRANCHES SPREAD LIKE THE OTHERS THAT WERE CARRIED OFF LAST YEAR OH +2300-131720-0027-599: EDISON HELD THAT THE ELECTRICITY SOLD MUST BE MEASURED JUST LIKE GAS OR WATER AND HE PROCEEDED TO DEVELOP A METER +7021-85628-0009-2069: AT THE FARTHER END OF THE LARGEST HALL A TABLE WAS SET WITH GOLDEN CUPS AND GOLDEN PLATES IN LONG ROWS +6829-68771-0004-1918: UNDER ORDINARY CONDITIONS REYNOLDS WAS SURE TO BE ELECTED BUT THE COMMITTEE PROPOSED TO SACRIFICE HIM IN ORDER TO ELECT HOPKINS +3729-6852-0022-1049: I ADDRESS HIM IN ITALIAN AND HE ANSWERS VERY WITTILY BUT HIS WAY OF SPEAKING MAKES ME SMILE AND I TELL HIM WHY +672-122797-0073-1858: THE WOOD FLAMED UP SPLENDIDLY UNDER THE LARGE BREWING COPPER AND IT SIGHED SO DEEPLY +4507-16021-0053-1274: TO DIMINISH THE NUMBER OF THE SHADY TO AUGMENT THE NUMBER OF THE LUMINOUS THAT IS THE OBJECT +2830-3980-0057-854: THE ARIANS TOOK CHRIST FOR A NOBLE AND PERFECT CREATURE SUPERIOR EVEN TO THE ANGELS BECAUSE BY HIM GOD CREATED HEAVEN AND EARTH +4970-29093-0023-1304: HE WELL KNEW THE PERILS OF THE FRONTIER THE SAVAGE STATE OF SOCIETY THE LURKING INDIANS AND THE DANGERS OF FEVER +8555-284447-0002-2503: I WOULDN'T MIND A CUP (O->OF) COFFEE MYSELF SAID CAP'N BILL I'VE HAD (CONSID'BLE->CONSRABLE) EXERCISE THIS MORNIN AND I'M ALL READY FOR (BREAKFAS->BREAKFAST) +1284-134647-0002-269: BUT THIS INESTIMABLE PRIVILEGE WAS SOON VIOLATED WITH (THE->A) KNOWLEDGE OF TRUTH THE EMPEROR IMBIBED THE MAXIMS OF PERSECUTION AND THE SECTS WHICH DISSENTED FROM THE CATHOLIC CHURCH WERE AFFLICTED AND OPPRESSED BY THE TRIUMPH OF CHRISTIANITY +5683-32866-0010-1634: WELL YOU KNOW RADIE WOMEN LIKE WICKED FELLOWS IT IS CONTRAST I SUPPOSE BUT THEY DO AND I'M SURE FROM WHAT BRACTON HAS SAID TO ME I KNOW HIM INTIMATELY THAT DORCAS LIKES HIM AND I CAN'T CONCEIVE WHY THEY ARE NOT MARRIED +7729-102255-0015-2248: ONE OF HIS MILITIA GENERALS SUGGESTED THAT THE GOVERNOR SHOULD REQUIRE THE OUTLAWS AT LAWRENCE AND ELSEWHERE TO SURRENDER THE (SHARPS->SHARP'S) RIFLES ANOTHER WROTE ASKING HIM TO CALL OUT THE GOVERNMENT TROOPS AT FORT LEAVENWORTH +8230-279154-0041-2353: THE DEFINITION OF MY EXPERIENCE IS DIFFICULT BROADLY SPEAKING IT IS EVERYTHING THAT IS CONNECTED WITH WHAT I AM EXPERIENCING NOW BY CERTAIN LINKS OF WHICH THE VARIOUS FORMS OF MEMORY ARE AMONG THE MOST IMPORTANT +5105-28233-0003-1409: WE DON'T SPIN TOPS IS A FAVORITE SAYING AMONGST ARTILLERY OFFICERS INDICATING THAT THEY DO NOT SHIRK THEIR DUTY BY FRIVOLOUS PURSUITS BUT IT MUST BE CONFESSED THAT SERVADAC BEING NATURALLY IDLE WAS VERY MUCH GIVEN TO SPINNING TOPS +3570-5695-0001-944: THE DEPENDENT WHO WAS FIRST DELEGATED FOR THESE DUTIES WAS THE WIFE OR THE CHIEF WIFE AND AS WOULD BE EXPECTED IN THE LATER DEVELOPMENT OF THE INSTITUTION WHEN THE NUMBER OF PERSONS BY WHOM THESE DUTIES ARE CUSTOMARILY PERFORMED GRADUALLY NARROWS THE WIFE REMAINS THE LAST +8224-274384-0010-2308: BEFORE THE SETTLEMENT OF TERMS THE ADMINISTRATION MUST BE POSSESSED ENTIRELY BY THE PARLIAMENTS OF BOTH KINGDOMS AND HOW INCOMPATIBLE THAT SCHEME WITH THE LIBERTY OF THE KING IS EASILY IMAGINED +908-157963-0015-2578: O LITTLE CLOUD THE VIRGIN SAID I CHARGE THEE TO TELL ME WHY THOU COMPLAINEST NOW WHEN IN ONE HOUR THOU FADE AWAY THEN WE SHALL SEEK THEE BUT NOT FIND AH (THEL->FELL) IS LIKE TO THEE +1221-135767-0022-209: BUT THE PROPRIETOR APPEARED ALREADY TO HAVE RELINQUISHED AS HOPELESS THE EFFORT TO PERPETUATE ON THIS SIDE OF THE ATLANTIC IN A HARD SOIL AND AMID THE CLOSE STRUGGLE FOR SUBSISTENCE THE NATIVE ENGLISH TASTE FOR ORNAMENTAL GARDENING +2094-142345-0013-524: HER TONGUE WAS NOT LESS KEEN THAN HER EYE AND WHENEVER A DAMSEL CAME WITHIN EARSHOT SEEMED TO TAKE UP AN UNFINISHED LECTURE AS A (BARREL->BERYL) ORGAN TAKES UP A TUNE PRECISELY AT THE POINT WHERE IT HAD LEFT OFF +7127-75947-0040-2158: IN THIS WAY THE FETE OF THE WHOLE COURT WAS A FETE ALSO FOR THE MYSTERIOUS INHABITANTS OF THE FOREST FOR CERTAINLY THE DEER IN THE BRAKE THE PHEASANT ON THE BRANCH THE FOX IN ITS HOLE WERE ALL LISTENING + +SUBSTITUTIONS: count ref -> hyp +11 AND -> IN +9 A -> THE +7 AN -> AND +6 RODOLFO -> RUDOLPHO +5 THIS -> THE +5 THE -> A +5 SOAMES -> SOLMES +5 SILVIA -> SYLVIA +5 O -> OF +5 I'M -> I +5 ANYONE -> ANY +4 METER -> METRE +4 IN -> AND +4 I'VE -> I +4 EMIL -> AMY +4 CRESSWELL -> CRASWELL +4 BATTLEAX -> BATTLE +4 ANDERS -> ANDREWS +3 VALLIERE -> VALLIER +3 TODAY -> TO +3 THORKEL -> TURKLE +3 THEIR -> THE +3 THEATER -> THEATRE +3 THAT -> THE +3 TABU -> TABOO +3 SOMEONE -> SOME +3 PRACTISE -> PRACTICE +3 PLATONISTS -> PLATINISTS +3 MEN -> MAN +3 MAINHALL -> MAYHALL +3 MADAM -> MADAME +3 LEOCADIA -> THE +3 HOLLAN -> HOLLAND +3 HE'S -> HE +3 EVERYONE -> EVERY +2 YOU'RE -> YOU +2 YE -> YOU +2 XAVIER -> ZAVIOUR +2 WHITTAWS -> WIDOWS +2 WHICH -> WITCH +2 VANDERPOOL -> VANERPOLE +2 TOWARDS -> TOWARD +2 TONIGHT -> TO +2 THERE -> THERE'S +2 THEL -> THOU +2 THEL -> FELL +2 THE -> DE +2 THAT -> IT +2 SOLON -> SOLOMON +2 SIF -> SITH +2 RODOLFO -> UDOLPHO +2 READ -> RED +2 ONE -> WON +2 OF -> A +2 O -> OH +2 MUNNY -> MONEY +2 MAN -> MEN +2 KAFFAR -> KAFFIR +2 JAGO -> IAGO +2 IN -> ON +2 HONOURABLE -> HONORABLE +2 HONOUR -> HONOR +2 HONOR -> HONOUR +2 HER -> THE +2 HELLO -> HALLO +2 HAS -> HAD +2 GREY -> GRAY +2 GRAY -> GREY +2 GILCHRIST -> GILCRIST +2 FOUNDED -> FOUND +2 FERDINANDO -> FERDINAND +2 ESTAFANIA -> STEPHANIA +2 DE -> THE +2 CRITIAS -> CRITIUS +2 COLOUR -> COLOR +2 COLORS -> COLOURS +2 COLORED -> COLOURED +2 CHAISE -> CHASE +2 CAN -> COULD +2 BEHAVIOURIST -> BEHAVIORIST +2 BARTLEY -> PARTLEY +2 AYE -> AY +2 ANDERS -> ANDREW'S +2 ANDELLA -> ANDELA +1 ZORA'S -> TSORA'S +1 ZORA -> TSORA +1 ZORA -> SORA +1 YOU'LL -> YOU +1 YOU -> HE +1 YEARNING -> YEARNIN +1 XAVIER -> ZAVIOR +1 WRIST -> RISK +1 WORST -> WORSE +1 WORLD -> WORLDWIDE +1 WORLD -> WORLD'S +1 WOODS -> WOOD +1 WOODBEGIRT -> WOOD +1 WOMAN'S -> WOMEN'S +1 WITH -> WHICH +1 WINIFRED -> WINNIFRED +1 WHOLLY -> HOLY +1 WHITTAWD -> WIDOWED +1 WHITTAW -> WIDOW +1 WHATEVER -> WHATSOEVER +1 WHAT'S -> WHAT +1 WERE -> WHERE +1 WERE -> ARE +1 WELCOMED -> WELCOME +1 WE'RE -> WERE +1 WATRY -> WATERYED +1 WATRY -> WATERY +1 WATERMILL -> WATER +1 WAS -> IS +1 VOLTAIRE -> VOLCHERRE +1 VILLEROY -> VILLEROI +1 VILLA -> VILIDESTA +1 VIADUCT -> VIADUC +1 VERSE -> FIRST +1 VERNE'S -> VERRNE'S +1 VEILED -> BALED +1 VAPOURS -> VAPORS +1 VANDERPOOLS -> VAN +1 UPTOWN -> UP +1 UPON -> ON +1 UP -> UPHILL +1 UNWARILY -> AND +1 UNDERGROUND -> ON +1 UNCLENCHED -> CLENCHED +1 UNC -> ONK +1 UN -> ONE +1 UD -> HAD +1 TWO -> TO +1 TWITE -> TWICE +1 TURNS -> TURNED +1 TUPPENY -> TUPPENNY +1 TRY -> TRIED +1 TROOPER'S -> TROOPERS +1 TREBLE -> TROUBLE +1 TRAVELERS -> TRAVELLERS +1 TRAVELER -> TRAVELLER +1 TOWNE -> TOWN +1 TOWELLING -> TOWELING +1 TOULD -> DID +1 TOPMASTS -> TOP +1 TOOMS -> TOMBS +1 TONNAY -> TUNNICHAVENT +1 TONNAY -> TOURNACHEANT +1 TONNAY -> TONNICHERANT +1 TONNAY -> TONAICHERANT +1 TONNAY -> TENNICHANT +1 TOILETTE -> TOILET +1 TO -> TWO +1 TO -> TONIGHT +1 TINTINT -> TINTENT +1 TIMAEUS -> TIMY +1 TIBI -> TIBBY +1 THRO -> THROUGH +1 THOUSAND -> THOUSANDTH +1 THORLEIF -> TOAR +1 THIS -> ITS +1 THINKS -> THINKSIDE +1 THING -> THANK +1 THEY -> THERE +1 THEM -> HIM +1 THEL -> THELL +1 THEE -> THEY +1 THEE -> THE +1 THE -> WITH +1 THE -> THIS +1 THE -> THEIR +1 THE -> HIS +1 THAT -> THAN +1 THAN -> THAT +1 TEACHERY -> TEACHER +1 TAN -> TAN'S +1 TAIL -> TALE +1 SWOONS -> SWOON +1 SWEEP -> SWEPT +1 SWAN -> SWAIN +1 SUSPICIONS -> SUSPICION +1 SURFACES -> SERVICES +1 SUMMERS -> SUMMER'S +1 SUCKLING -> SUCKING +1 STORY'S -> STORIES +1 STEPHANOS -> STEPHANO'S +1 STEEL'D -> STEELED +1 STATE'S -> THE +1 STARTS -> START +1 STARLIT -> STARLET +1 STAIR -> STARE +1 STAGECRAFT -> STAGE +1 STAGE -> STEED +1 SQUEAK -> SQUICK +1 SQUEAK -> QUICK +1 SPRAGUE -> SPROGU +1 SPLENDORS -> SPLENDOURS +1 SPLENDET -> SPLENDID +1 SPECIALISED -> SPECIALIZED +1 SOUTHEY -> SELVEY +1 SOUTH -> SOUTHEAST +1 SOU -> SOUS +1 SORREL -> SYREL +1 SOMETIME -> SOME +1 SOME -> SOMETIME +1 SOME -> SOMEONE +1 SOLON'S -> SOLONS +1 SOLON -> SONG +1 SMILD -> SMILED +1 SKILLFUL -> SKILFUL +1 SKEPTICAL -> SCEPTICAL +1 SINCE -> SENSE +1 SIGHT -> SIGHTSEERS +1 SHOWRING -> SHOWERING +1 SHOULD -> WOULD +1 SHOULD -> SHOULDST +1 SHEWD -> SHOWED +1 SHE'S -> SHE +1 SHE -> HE +1 SHARPS -> SHARP'S +1 SHARP'ST -> SHARPEST +1 SHANNON -> SHANON +1 SHALL -> SHALT +1 SHABATA -> CHABATA +1 SERVANT -> SERVANTS +1 SENTENCES -> SENTENCE +1 SENT -> SET +1 SENCE -> SINCE +1 SEMON'S -> SYMONDS +1 SEEMS -> SEEMED +1 SEEM -> SEEMED +1 SCUTCHEON -> DUCHEON +1 SCUSE -> EXCUSE +1 SCUMMED -> SKUMMED +1 SCRAPBOOKS -> SCRAP +1 SCOUTING -> SCOUT +1 SCHOOL -> SCHOOLBOYS +1 SCEVRA -> SCEVERA +1 SCEURA -> SCURA +1 SCEPTICISM -> SKEPTICISM +1 SCENE -> SEEN +1 SAVED -> SEE +1 SAUVEUR -> SOUVERT +1 SATE -> SAT +1 SAILD -> SAILED +1 SAID -> SENT +1 RUTH -> RUOTH +1 RUFUS -> RUFFUS +1 ROUND -> AROUND +1 ROSSETER -> ROSSITER +1 ROI -> ROY +1 ROERER -> RUER +1 RODOLFO'S -> RUDOLPHO'S +1 REWEIGHED -> RE +1 REMOVE -> MOVE +1 REMOV'D -> REMOVED +1 REMEMBER -> REMEMBERED +1 REMARK -> REMARKED +1 REMAINED -> REMAINING +1 REFUSED -> REFUSE +1 REENFORCEMENTS -> REINFORCEMENTS +1 REEDER -> READER +1 RECORD -> RECORDS +1 RECOGNISED -> RECOGNIZED +1 REBUK'D -> REBUKED +1 REBEL -> REBBLE +1 RE -> REENTRE +1 RAPHAEL -> RAFAELLE +1 RAOUL -> RALPH +1 RANCOR -> RANCOUR +1 RABBLE -> REBEL +1 QUINSON -> QUINCON +1 QUASI -> QUARSAI +1 QUART -> COURT +1 PUTTIN -> PUTTING +1 PURSE -> PERSON +1 PURPOSED -> PURPOSE +1 PURIST -> PUREST +1 PROVOCATION -> PROVOCATIONS +1 PROBABLY -> PROB'BLY +1 PREVENT -> PRESENT +1 PRETENSE -> PRETENCE +1 PRECIEUSES -> PRECIUSEES +1 PRECEDE -> PROCEED +1 PRE -> PRESOCRATIC +1 PRACTISED -> PRACTICED +1 POPHAM -> PAPA +1 POISON'D -> POISONED +1 PLURAL -> BORAL +1 PLESIOSAURUS -> PLECEOSAURUS +1 PLEASANCE -> PLEASANTS +1 PLANT -> PLAT +1 PLAITS -> PLATES +1 PIERC'D -> PIERCED +1 PHILADELPHIAN -> PHILADELPHIIAN +1 PHAEDRUS -> PHEDROS +1 PH -> P +1 PEGRENNE -> PEGREN +1 PEASE -> PLEASE +1 PATIENTS -> PATIENCE +1 PASSAGE -> PASSAGEWAY +1 PASCHAL -> PATIAL +1 PARASITES -> PARRICIDE +1 PAPAL -> PAPEL +1 PAIN -> PAINS +1 OVERWROUGHT -> OVER +1 OUT -> A +1 OTTLEY'S -> OAKLEY'S +1 OTHERWORLDLY -> OTHER +1 OTHER -> ARE +1 OR -> OF +1 ONTO -> ON +1 ONE -> WHEN +1 ON -> UPON +1 ON -> UNGULATIONS +1 OMELETTE -> OMELET +1 OF -> IS +1 ODORS -> ODOURS +1 NOTHIN -> NOTHING +1 NORTHWEST -> NORTH +1 NORTHWARDS -> NORTHWARD +1 NORTH -> NORTHWEST +1 NOR -> OR +1 NINE -> NOT +1 NEWCOMER -> NEW +1 NEO -> NEOP +1 NEIGHBORS -> NEIGHBOURS +1 NEIGHBORHOOD -> NEIGHBOURHOOD +1 NE'ER -> NEVER +1 NATTY -> NATTIE +1 NARES -> NAYERS +1 NAOMI -> THEY +1 NAOMI -> NAROWMY +1 N -> THAN +1 MY -> MIGALATIONS +1 MY -> A +1 MUMMERIES -> MEMORIES +1 MOUNTED -> MOUNTAIN +1 MOTHERS -> MOTHER'S +1 MORNIN -> MORNING +1 MONTMARTRE -> MONTMARCHRE +1 MONTMARTRE -> MONT +1 MOMBI -> MOMBY +1 MISTS -> MIST +1 MISTER -> MISS +1 MISSOURIANS -> MISSOURIIANS +1 MILLION'D -> MILLIONED +1 MILITATED -> MITIGATED +1 MERSEY -> MERCY +1 MERCHISTON -> MURCHESTON +1 MEET -> MET +1 MEALYBACK -> MEALLY +1 MEADOWCROFT'S -> MEDICROFT'S +1 MEADOWCROFT -> MEDICROFT +1 MATE -> MADE +1 MARVELOUS -> MARVELLOUS +1 MARSHALL -> MARTIAL +1 MARIVAUX -> MAREVAUX +1 MARIVAUX -> MARAVAUX +1 MARIANNE -> MARY +1 MANIFESTED -> MANIFESTS +1 MAID -> MAIDEN +1 MAC -> MICAWAL +1 LUTHER'S -> LUTHERS +1 LUIS -> LEWIS +1 LOUIS -> LOUISE +1 LOU'S -> LOSE +1 LORNE -> LORN +1 LIVES -> LIES +1 LITER -> LEADER +1 LINK'D -> LINKED +1 LINE -> LIE +1 LILLYS -> LILY'S +1 LILLY -> LILY +1 LEOCADIA'S -> LEIRCADIAS +1 LEOCADIA -> LOCALIA +1 LEOCADIA -> LOCADIA +1 LEOCADIA -> LEOKADIA +1 LEOCADI -> THE +1 LEFRANK -> FRANK +1 LECOMPTON -> LA +1 LECOMPTE -> LEC +1 LECOMPTE -> LE +1 LE -> LA +1 LARKSPUR -> LARKS +1 LARKSPUR -> LARCHBUR +1 LANTHORN -> LANTERN +1 LAID -> LADEN +1 LABOUR -> LABOR +1 KNOW -> KNOW'S +1 KNEED -> NEED +1 KIRTLAND -> PORTLAND +1 KINGDOMS -> KINGDOM'S +1 KICK -> KICKAPOOS +1 KEOGH -> KIOV +1 KAFFAR'S -> KAFFIRS +1 KAFFAR'S -> KAFFIR'S +1 KAFFAR -> CAFFER +1 JUS -> JUST +1 JUDGMENT -> JUDGEMENT +1 JEWELER'S -> JEWELLERS +1 JASPER -> JAPSER +1 JAGO -> JAAGO +1 JAGO -> CHIAGO +1 JACK -> JACKKNIFE +1 ITS -> IT'S +1 IT -> IT'LL +1 IT -> AND +1 IS -> WAS +1 IS -> IT +1 IS -> AS +1 INTRODUCE -> INTRODUCED +1 INTRENCHMENT -> ENTRENCHMENT +1 INTO -> AND +1 INTENTS -> INTENSE +1 INNERLOCHY -> INNILOCKI +1 INNERLOCHY -> INNER +1 INFANTS -> INFANT'S +1 INCLOSED -> ENCLOSED +1 INCERTAINTY -> IN +1 IN -> OF +1 IN -> AN +1 IMPRESSES -> IMPRESS +1 IMPRESS'D -> IMPRESSED +1 IMPEARLED -> EMPEARLED +1 IKE -> LIKE +1 ICHTHYOSAURUS -> ITHIOSAURUS +1 ICHTHYOSAURUS -> ICHTHIOSAURUS +1 ICHTHYOSAURUS -> ICHDEOSAURUS +1 I'D -> I +1 I -> OF +1 I -> I'VE +1 I -> I'M +1 HOUSECLEANING -> HOUSE +1 HOTBED -> HOT +1 HORSEPLAY -> HORSE +1 HORACE -> HORRACE +1 HOPKINS'S -> HOPKINS +1 HONOURABLY -> HONOROURABLY +1 HON -> HONOURABLE +1 HIT -> HID +1 HIDALGO -> HEDALGO +1 HICKEY -> HICKIE +1 HETTY -> HETTY'S +1 HERE -> THERE +1 HERACLEITUS -> HERACLITUS +1 HEN -> HAND +1 HELLENES -> HELLENS +1 HEART -> HEARTBROKEN +1 HE'D -> HE +1 HE -> WE +1 HAZEWRAPPED -> HAZE +1 HAWTREY -> HOULTREE +1 HAWORTH -> HAWWORTH +1 HAS -> IS +1 HAS -> AS +1 HARTS -> HEARTS +1 HARKENED -> HEARKENED +1 HARALD -> HAROLD +1 HAR -> HARR +1 HANNA -> HAD +1 HANGINGS -> HANGING +1 HAM -> HIM +1 HALLOA -> HALLO +1 HALLO -> HULLO +1 HAL -> HALLOA +1 HAKON -> HAWKIN +1 HAD -> HAVE +1 GUESTS -> GUEST +1 GUEST -> GUESTS +1 GRINGO -> GRINGE +1 GREY'S -> GRAY'S +1 GREETING -> READING +1 GREEING -> GREEN +1 GRAPEVINE -> GRAPE +1 GRAND -> GREAT +1 GRAM -> GRAHAM +1 GOVERNMENT -> GOVERNOR +1 GOVERNED -> GOVERN +1 GOOBERS -> GOOBBLES +1 GIVE -> GAVE +1 GIRARD -> GERARD +1 GILLIKINS -> GILLEKINS +1 GILLIKINS -> GILLAKANS +1 GILCHRIST'S -> GILKERTH'S +1 GILCHRIST -> GILCREST +1 GIER -> GEAR +1 GIAOURS -> GUYORES +1 GEOFFREY -> JEFFREY +1 GENTLEWOMAN -> GENTLE +1 GENTLEMEN -> GENTLEMAN +1 GENTLE -> GENLE +1 GENERAL -> GENERALSHIP +1 GAYLY -> GAILY +1 GALATIANS -> GLACIERS +1 GALATIANS -> GALLATIONS +1 FULNESS -> FULLNESS +1 FRISKILY -> FRISKLY +1 FOREVER -> FOR +1 FORBES'S -> FORD'S +1 FOR -> FOREVER +1 FLUFFINOSE -> FLOPENNO'S +1 FLAP -> FLAT +1 FIREBALL -> FIRE +1 FETE -> FIGHT +1 FEELIN'S -> FEELINGS +1 FAVORITE -> FAVOURITE +1 FAVORABLY -> FAVOURABLY +1 FARTHEST -> FURTHEST +1 EYED -> EY'D +1 EVOLUTION -> REVOLUTION +1 EVERY -> EVERYONE +1 EVENIN'S -> EVENINGS +1 ESTAFANIA -> ESTEPHANIA +1 ESPRIT -> A +1 ESCHEATED -> INITIATED +1 ER -> A +1 ENTRUSTING -> INTRUSTING +1 ENTRUSTED -> INTRUSTED +1 ENTHRALMENT -> ENTHRALLMENT +1 ENTER -> ENTERED +1 ENSURE -> INSURE +1 ENQUIRIES -> INQUIRIES +1 ENQUIRE -> INQUIRE +1 EM -> THEM +1 ELSINORE -> ELZINOR +1 ELMO'S -> ELBEL'S +1 ELCHO -> ELCO +1 E -> EATIN +1 DYKES -> DIKES +1 DUNNO -> DON'TO +1 DUMAS -> DE +1 DUERER -> DIURE +1 DRUGGIST'S -> DRUGGIST +1 DROPIDAS -> DROPIDUS +1 DRAWS -> DRAWLS +1 DOWN -> DOWNSTAIRS +1 DOVES -> DOVE'S +1 DOUZE -> DUSPORT +1 DOOR -> DOORSTEP +1 DISTRICTS -> DISTRICT +1 DIAS -> DAIS +1 DETERMINED -> DETERMINE +1 DEMEANOUR -> DEMEANOR +1 DEMEANOR -> DEMEANOUR +1 DELIBERATIVE -> DELIBERATE +1 DELIA -> DELLIA +1 DEFENCE -> DEFENSE +1 DEDALUS -> DAEDALUS +1 DEDALOS -> DEAD +1 DE -> O +1 DE -> DES +1 CYN -> SYM +1 CUSTOMER'S -> CUSTOMERS +1 CURVED -> CARVED +1 CRESSWELLS -> CROSS +1 CREIGHTON -> CRYIGHTON +1 CREIGHTON -> CRIGHTON +1 CRASWELLERS -> CRESSWELLERS +1 CRAMPNESS -> CRAMPEDNESS +1 COZIER -> COSIER +1 COURT -> COURTYARD +1 COUNTRY'S -> COUNTRY +1 COTTON -> CARTON +1 COSTS -> COST +1 COST -> CAUSED +1 CORRELATES -> CORELETS +1 CORMORANT -> CORMERANT +1 CONTAINED -> CONTAINING +1 CONSID'BLE -> CONSRABLE +1 CONQUERIN -> CONQUERING +1 CONNECTIONS -> CONNECTION +1 CONJURER -> CONJUROR +1 CONDENSE -> CONDENSED +1 COMPOSSER -> COMPOSOR +1 COMMANDMENTS -> COMMAND +1 COMING -> COMMON +1 COLOURS -> COLORS +1 CO -> COEXIST +1 CLEW -> CLUE +1 CLASS -> CLASSES +1 CIVET -> CEVETTE +1 CITADELLED -> CITADELED +1 CHRISTAIN -> CHRISTIAN +1 CHIAROSCURIST -> CHIOSCURIST +1 CHEROOT -> CERUT +1 CHECKER -> CHEQUER +1 CHECK -> CHEQUE +1 CHARLES -> CHARLESS +1 CHANGE -> CHANGED +1 CENTRED -> CENTERED +1 CENTER -> CENTRE +1 CENDENARIES -> SENDIARIES +1 CEASD -> CEASED +1 CAUGHT -> GOT +1 CASTS -> CAST +1 CARPACCIO'S -> CARPATIOS +1 CARL -> KARL +1 CANDLE -> CANDLELIGHT +1 CAN -> CANNOT +1 BYE -> BY +1 BY -> BYE +1 BUT -> DO +1 BURGOYNE -> BURGOIN +1 BUNNIT -> BENNETT +1 BUL -> BULBUL +1 BROWN -> O'BROWN +1 BRITANNULISTS -> BRITON +1 BRITANNULA -> BRITANULA +1 BREAKFAS -> BREAKFAST +1 BRAKE -> BREAK +1 BRAGELONNE -> BRAGELONE +1 BOLLS -> BOWLS +1 BOAR -> BOARHOUND +1 BLESSINGS -> BLESSING +1 BLASTS -> BLAST +1 BILLYGOAT -> SPILLY +1 BERGSON -> BERKSON +1 BEIN -> BEING +1 BEHAVIOUR -> BEHAVIOR +1 BEGGAR'S -> BEGGARS +1 BEFORE -> FOR +1 BEFAL -> BEFALL +1 BEEDER -> READER +1 BEEBE -> B +1 BEDIMMED -> BE +1 BEATER -> BEAER +1 BATTLEAX -> ATTILAX +1 BARTLEY -> BERTLEY +1 BARREL -> BERYL +1 BALAAM'S -> BAILIM'S +1 BAINS -> BAND +1 BADAUDERIE -> BADDELT +1 BABIRUSA -> BARBAROOSA +1 AY -> AYE +1 AWHILE -> A +1 AU -> ACCORANT +1 ATTENDANTS -> ATTENDANCE +1 ATHOLEMEN -> ATHOL +1 ATHENAIS -> ETHONAY +1 ATHENAIS -> ETHINAE +1 ASK -> ASKED +1 ASCENDENCY -> ASCENDANCY +1 AS -> TO +1 ARRONDISSEMENT -> ARONNDISSIMON +1 ARRESTS -> ARREST +1 ARMOUR -> ARMOR +1 ARDOUR -> ARDOR +1 ANYMORE -> ANY +1 ANY -> ANYTHING +1 ANY -> ANYONE +1 ANY -> ANYBODY +1 ANSWERD -> ANSWERED +1 ANOTHER -> TOTHER +1 ANOTHER -> THE +1 ANNALS -> ANNAL +1 ANGOR -> ANGER +1 ANDERS -> ANDER +1 ANDELLA -> AND +1 AND -> THEN +1 AND -> AT +1 AND -> AS +1 AND -> ANDUCADIA'S +1 ANAXAGORAS -> ANXAGARIS +1 AN -> ON +1 AMPHITHEATER -> AMPHITHEATRE +1 ALLUVION -> ALLUVIAN +1 ALL -> ALTOGETHER +1 ALBIGENSES -> ALBIGINZAS +1 AIGNAN -> ANNON +1 AFFECT -> EFFECT +1 ADONA -> ADONNA +1 ACKNOWLEDGEMENT -> ACKNOWLEDGMENT +1 ABOLITIONISTS -> ABOLITIONIST +1 ABDUCTION -> ADOCTION +1 A -> LAMOT +1 A -> ESPECIAL +1 A -> AWING +1 A -> ACCORD + +DELETIONS: count ref +5 CHARENTE +3 THE +3 ONE +2 IS +2 IN +2 GALATIANS +2 AND +2 A +1 YARD +1 WING +1 WILL +1 WIDE +1 WEST +1 WEED +1 WAY +1 TOGETHER +1 TO +1 TIME +1 THING +1 T +1 STEP +1 STAIRS +1 SPECIAL +1 SOCRATIC +1 SHIP +1 SEERS +1 PORTES +1 POOS +1 OF +1 NOT +1 NIGHT +1 N +1 LO +1 LIGHT +1 LEOCADIA'S +1 LAMBENT +1 KNIFE +1 HOUND +1 HIS +1 HILL +1 HAD +1 EXIST +1 EVER +1 ENTER +1 EAST +1 E +1 DO +1 D'ESTE +1 COURANT +1 CHORD +1 BUL +1 BROKEN +1 BOYS +1 BODY +1 AT +1 ARE +1 ARDLE +1 AM + +INSERTIONS: count hyp +11 ONE +5 THE +5 IS +5 AM +5 A +3 IT +3 IN +3 DAY +3 ARCADIA +2 THAT +2 OF +2 NIGHT +2 HAVE +2 FOR +2 COMTE +2 AXE +2 AX +2 AS +2 ARE +1 WROUGHT +1 WRAPT +1 WORLDDLY +1 WOMAN +1 WILL +1 WHILE +1 WEST +1 WELLS +1 WEIGHED +1 WARILY +1 VINE +1 ULISTS +1 TOWN +1 TO +1 TIME +1 STATES +1 SPREE +1 PLAY +1 OTHER +1 ONLY +1 OCADIA +1 O' +1 MORE +1 MILL +1 MEN +1 MAU +1 MASTS +1 MARTRE +1 LOS +1 LOCKI +1 LEAF +1 HIS +1 HE +1 HAD +1 H +1 GROUN +1 GRI +1 GOAT +1 EVER +1 DIMMED +1 DERPOOLS +1 DELA +1 DE +1 CRAFT +1 COMPTON +1 COMER +1 CLEANING +1 CERTAINTY +1 BOOKS +1 BEGIRT +1 BED +1 BALL +1 BACK +1 B +1 ANNE +1 AND +1 AN +1 AH + +PER-WORD STATS: word corr tot_errs count_in_ref count_in_hyp +THE 3447 50 3461 3483 +AND 1770 33 1787 1786 +A 1151 32 1166 1168 +IN 895 25 905 910 +ONE 185 18 191 197 +OF 1795 14 1799 1805 +IS 463 13 468 471 +I 708 13 711 718 +TO 1337 11 1340 1345 +AN 157 10 165 159 +THAT 604 9 610 607 +ANY 82 9 85 88 +RODOLFO 0 8 8 0 +ON 277 8 279 283 +O 7 8 14 8 +IT 556 8 558 562 +HE 525 8 526 532 +DE 6 8 10 10 +THIS 257 7 263 258 +HAD 319 7 321 324 +ANDERS 4 7 11 4 +YOU 417 6 418 422 +SOME 85 6 87 89 +RUDOLPHO 0 6 0 6 +MEN 59 6 62 62 +LEOCADIA 0 6 6 0 +I'M 28 6 33 29 +AS 382 6 383 387 +ANYONE 1 6 6 2 +AM 58 6 59 63 +TONNAY 0 5 5 0 +THEL 0 5 5 0 +SYLVIA 0 5 0 5 +SOLMES 0 5 0 5 +SOAMES 0 5 5 0 +SILVIA 0 5 5 0 +MAN 65 5 67 68 +I'VE 19 5 23 20 +FOR 419 5 420 423 +CHARENTE 0 5 5 0 +BATTLEAX 0 5 5 0 +ARE 181 5 182 185 +THERE 135 4 137 137 +THEIR 170 4 173 171 +SOMEONE 0 4 3 1 +METRE 0 4 0 4 +METER 7 4 11 7 +JAGO 1 4 5 1 +HONOUR 0 4 2 2 +HONOR 2 4 4 4 +HAS 104 4 108 104 +HALLO 1 4 2 4 +GREY 1 4 3 3 +GRAY 3 4 5 5 +GALATIANS 2 4 6 2 +EVERYONE 0 4 3 1 +EVERY 30 4 31 33 +EMIL 0 4 4 0 +CRESSWELL 0 4 4 0 +CRASWELL 0 4 0 4 +BATTLE 6 4 6 10 +ANDREWS 0 4 0 4 +AMY 0 4 0 4 +XAVIER 0 3 3 0 +WHICH 214 3 216 215 +WERE 184 3 186 185 +VALLIERE 0 3 3 0 +VALLIER 0 3 0 3 +TURKLE 0 3 0 3 +TONIGHT 0 3 2 1 +TODAY 0 3 3 0 +THORKEL 0 3 3 0 +THEY 208 3 209 210 +THEATRE 2 3 2 5 +THEATER 0 3 3 0 +THAN 87 3 88 89 +TABU 0 3 3 0 +TABOO 0 3 0 3 +SOLON 1 3 4 1 +PRACTISE 1 3 4 1 +PRACTICE 7 3 7 10 +PLATONISTS 0 3 3 0 +PLATINISTS 0 3 0 3 +OTHER 64 3 65 66 +NIGHT 23 3 24 25 +MAYHALL 0 3 0 3 +MAINHALL 1 3 4 1 +MADAME 4 3 4 7 +MADAM 0 3 3 0 +KAFFAR 0 3 3 0 +ICHTHYOSAURUS 0 3 3 0 +HONOURABLE 1 3 3 2 +HOLLAND 0 3 0 3 +HOLLAN 0 3 3 0 +HIS 472 3 473 474 +HE'S 6 3 9 6 +HAVE 215 3 215 218 +GILCHRIST 0 3 3 0 +ESTAFANIA 0 3 3 0 +DAY 50 3 50 53 +COLOURS 0 3 1 2 +COLORS 1 3 3 2 +CAN 63 3 66 63 +BARTLEY 11 3 14 11 +AYE 0 3 2 1 +AY 1 3 2 3 +ARCADIA 0 3 0 3 +ANDELLA 1 3 4 1 +ZORA 1 2 3 1 +ZAVIOUR 0 2 0 2 +YOU'RE 3 2 5 3 +YE 6 2 8 6 +WORLD 35 2 37 35 +WOOD 4 2 4 6 +WON 2 2 2 4 +WITH 423 2 424 424 +WITCH 3 2 3 5 +WILL 142 2 143 143 +WIDOWS 0 2 0 2 +WHITTAWS 0 2 2 0 +WEST 6 2 7 7 +WATRY 0 2 2 0 +WAS 578 2 579 579 +VANERPOLE 0 2 0 2 +VANDERPOOL 0 2 2 0 +UPON 93 2 94 94 +UP 109 2 110 110 +UDOLPHO 0 2 0 2 +TWO 70 2 71 71 +TOWN 6 2 6 8 +TOWARDS 17 2 19 17 +TOWARD 8 2 8 10 +TIME 86 2 87 87 +THOU 18 2 18 20 +THING 20 2 22 20 +THERE'S 12 2 12 14 +THEM 121 2 122 122 +THEE 28 2 30 28 +STEPHANIA 0 2 0 2 +STAGE 5 2 6 6 +SQUEAK 0 2 2 0 +SOMETIME 1 2 2 2 +SOLOMON 0 2 0 2 +SITH 0 2 0 2 +SINCE 24 2 25 25 +SIF 0 2 2 0 +SHOULD 59 2 61 59 +SHE 280 2 281 281 +SENT 5 2 6 6 +SEEMED 30 2 30 32 +RED 19 2 19 21 +REBEL 0 2 1 1 +READER 1 2 1 3 +READ 16 2 18 16 +RE 0 2 1 1 +PARTLEY 0 2 0 2 +OR 175 2 176 176 +OH 33 2 33 35 +NOT 337 2 338 338 +NORTHWEST 0 2 1 1 +NORTH 7 2 8 8 +NAOMI 3 2 5 3 +N 2 2 4 2 +MY 223 2 225 223 +MUNNY 0 2 2 0 +MONTMARTRE 0 2 2 0 +MONEY 5 2 5 7 +MARIVAUX 0 2 2 0 +LEOCADIA'S 0 2 2 0 +LECOMPTE 0 2 2 0 +LE 0 2 1 1 +LARKSPUR 0 2 2 0 +LA 5 2 5 7 +KAFFIR 0 2 0 2 +KAFFAR'S 0 2 2 0 +ITS 81 2 82 82 +INNERLOCHY 0 2 2 0 +IAGO 0 2 0 2 +HONORABLE 2 2 2 4 +HIM 215 2 215 217 +HER 322 2 324 322 +HELLO 0 2 2 0 +HALLOA 0 2 1 1 +GUESTS 4 2 5 5 +GUEST 3 2 4 4 +GILLIKINS 0 2 2 0 +GILCRIST 0 2 0 2 +GENTLE 6 2 7 7 +FOUNDED 3 2 5 3 +FOUND 21 2 21 23 +FOREVER 1 2 2 2 +FERDINANDO 3 2 5 3 +FERDINAND 0 2 0 2 +FELL 16 2 16 18 +EVER 34 2 35 35 +ENTER 7 2 9 7 +E 1 2 3 1 +DO 93 2 94 94 +DEMEANOUR 0 2 1 1 +DEMEANOR 0 2 1 1 +CRITIUS 0 2 0 2 +CRITIAS 0 2 2 0 +CREIGHTON 0 2 2 0 +COURT 11 2 12 12 +COULD 95 2 95 97 +COST 3 2 4 4 +COMTE 1 2 1 3 +COLOURED 1 2 1 3 +COLOUR 0 2 2 0 +COLORED 0 2 2 0 +COLOR 10 2 10 12 +CHASE 1 2 1 3 +CHAISE 0 2 2 0 +BYE 1 2 2 2 +BY 247 2 248 248 +BUL 0 2 2 0 +BEHAVIOURIST 0 2 2 0 +BEHAVIORIST 0 2 0 2 +B 2 2 2 4 +AXE 1 2 1 3 +AX 0 2 0 2 +ATHENAIS 0 2 2 0 +AT 283 2 284 284 +ANOTHER 34 2 36 34 +ANDREW'S 0 2 0 2 +ANDELA 0 2 0 2 +ZORA'S 0 1 1 0 +ZAVIOR 0 1 0 1 +YOU'LL 8 1 9 8 +YEARNING 1 1 2 1 +YEARNIN 0 1 0 1 +YARD 4 1 5 4 +WROUGHT 2 1 2 3 +WRIST 0 1 1 0 +WRAPT 0 1 0 1 +WOULD 141 1 141 142 +WORST 3 1 4 3 +WORSE 6 1 6 7 +WORLDWIDE 0 1 0 1 +WORLDDLY 0 1 0 1 +WORLD'S 2 1 2 3 +WOODS 2 1 3 2 +WOODBEGIRT 0 1 1 0 +WOMEN'S 1 1 1 2 +WOMAN'S 1 1 2 1 +WOMAN 28 1 28 29 +WINNIFRED 0 1 0 1 +WINIFRED 0 1 1 0 +WING 2 1 3 2 +WIDOWED 0 1 0 1 +WIDOW 1 1 1 2 +WIDE 8 1 9 8 +WHOLLY 8 1 9 8 +WHITTAWD 0 1 1 0 +WHITTAW 0 1 1 0 +WHILE 34 1 34 35 +WHERE 49 1 49 50 +WHEN 133 1 133 134 +WHATSOEVER 1 1 1 2 +WHATEVER 12 1 13 12 +WHAT'S 4 1 5 4 +WHAT 113 1 113 114 +WELLS 0 1 0 1 +WELCOMED 0 1 1 0 +WELCOME 6 1 6 7 +WEIGHED 3 1 3 4 +WEED 0 1 1 0 +WE'RE 2 1 3 2 +WE 152 1 152 153 +WAY 72 1 73 72 +WATERYED 0 1 0 1 +WATERY 2 1 2 3 +WATERMILL 0 1 1 0 +WATER 19 1 19 20 +WARILY 0 1 0 1 +VOLTAIRE 0 1 1 0 +VOLCHERRE 0 1 0 1 +VINE 0 1 0 1 +VILLEROY 0 1 1 0 +VILLEROI 0 1 0 1 +VILLA 0 1 1 0 +VILIDESTA 0 1 0 1 +VIADUCT 0 1 1 0 +VIADUC 0 1 0 1 +VERSE 1 1 2 1 +VERRNE'S 0 1 0 1 +VERNE'S 0 1 1 0 +VEILED 1 1 2 1 +VAPOURS 1 1 2 1 +VAPORS 0 1 0 1 +VANDERPOOLS 0 1 1 0 +VAN 2 1 2 3 +UPTOWN 0 1 1 0 +UPHILL 0 1 0 1 +UNWARILY 0 1 1 0 +UNGULATIONS 0 1 0 1 +UNDERGROUND 0 1 1 0 +UNCLENCHED 0 1 1 0 +UNC 4 1 5 4 +UN 0 1 1 0 +ULISTS 0 1 0 1 +UD 0 1 1 0 +TWITE 0 1 1 0 +TWICE 3 1 3 4 +TURNS 0 1 1 0 +TURNED 21 1 21 22 +TUPPENY 0 1 1 0 +TUPPENNY 0 1 0 1 +TUNNICHAVENT 0 1 0 1 +TSORA'S 0 1 0 1 +TSORA 0 1 0 1 +TRY 3 1 4 3 +TROUBLE 8 1 8 9 +TROOPERS 0 1 0 1 +TROOPER'S 0 1 1 0 +TRIED 9 1 9 10 +TREBLE 0 1 1 0 +TRAVELLERS 0 1 0 1 +TRAVELLER 0 1 0 1 +TRAVELERS 0 1 1 0 +TRAVELER 0 1 1 0 +TOWNE 0 1 1 0 +TOWELLING 0 1 1 0 +TOWELING 0 1 0 1 +TOURNACHEANT 0 1 0 1 +TOULD 0 1 1 0 +TOTHER 0 1 0 1 +TOPMASTS 0 1 1 0 +TOP 11 1 11 12 +TOOMS 0 1 1 0 +TONNICHERANT 0 1 0 1 +TONAICHERANT 0 1 0 1 +TOMBS 0 1 0 1 +TOILETTE 0 1 1 0 +TOILET 0 1 0 1 +TOGETHER 15 1 16 15 +TOAR 0 1 0 1 +TINTINT 0 1 1 0 +TINTENT 0 1 0 1 +TIMY 0 1 0 1 +TIMAEUS 8 1 9 8 +TIBI 0 1 1 0 +TIBBY 0 1 0 1 +THROUGH 42 1 42 43 +THRO 0 1 1 0 +THOUSANDTH 0 1 0 1 +THOUSAND 12 1 13 12 +THORLEIF 0 1 1 0 +THINKSIDE 0 1 0 1 +THINKS 2 1 3 2 +THEN 125 1 125 126 +THELL 0 1 0 1 +THANK 13 1 13 14 +TENNICHANT 0 1 0 1 +TEACHERY 0 1 1 0 +TEACHER 3 1 3 4 +TAN'S 0 1 0 1 +TAN 0 1 1 0 +TALE 4 1 4 5 +TAIL 2 1 3 2 +T 0 1 1 0 +SYREL 0 1 0 1 +SYMONDS 0 1 0 1 +SYM 0 1 0 1 +SWOONS 0 1 1 0 +SWOON 3 1 3 4 +SWEPT 1 1 1 2 +SWEEP 1 1 2 1 +SWAN 0 1 1 0 +SWAIN 0 1 0 1 +SUSPICIONS 2 1 3 2 +SUSPICION 4 1 4 5 +SURFACES 0 1 1 0 +SUMMERS 0 1 1 0 +SUMMER'S 2 1 2 3 +SUCKLING 0 1 1 0 +SUCKING 0 1 0 1 +STORY'S 0 1 1 0 +STORIES 3 1 3 4 +STEPHANOS 1 1 2 1 +STEPHANO'S 0 1 0 1 +STEP 6 1 7 6 +STEELED 0 1 0 1 +STEEL'D 0 1 1 0 +STEED 1 1 1 2 +STATES 6 1 6 7 +STATE'S 1 1 2 1 +STARTS 0 1 1 0 +START 3 1 3 4 +STARLIT 0 1 1 0 +STARLET 0 1 0 1 +STARE 1 1 1 2 +STAIRS 7 1 8 7 +STAIR 0 1 1 0 +STAGECRAFT 0 1 1 0 +SQUICK 0 1 0 1 +SPROGU 0 1 0 1 +SPREE 0 1 0 1 +SPRAGUE 0 1 1 0 +SPLENDOURS 0 1 0 1 +SPLENDORS 0 1 1 0 +SPLENDID 9 1 9 10 +SPLENDET 0 1 1 0 +SPILLY 0 1 0 1 +SPECIALIZED 0 1 0 1 +SPECIALISED 0 1 1 0 +SPECIAL 1 1 2 1 +SOUVERT 0 1 0 1 +SOUTHEY 1 1 2 1 +SOUTHEAST 0 1 0 1 +SOUTH 6 1 7 6 +SOUS 0 1 0 1 +SOU 0 1 1 0 +SORREL 0 1 1 0 +SORA 0 1 0 1 +SONG 2 1 2 3 +SOLONS 0 1 0 1 +SOLON'S 0 1 1 0 +SOCRATIC 0 1 1 0 +SMILED 4 1 4 5 +SMILD 0 1 1 0 +SKUMMED 0 1 0 1 +SKILLFUL 0 1 1 0 +SKILFUL 0 1 0 1 +SKEPTICISM 1 1 1 2 +SKEPTICAL 0 1 1 0 +SIGHTSEERS 0 1 0 1 +SIGHT 19 1 20 19 +SHOWRING 0 1 1 0 +SHOWERING 0 1 0 1 +SHOWED 5 1 5 6 +SHOULDST 0 1 0 1 +SHIP 8 1 9 8 +SHEWD 0 1 1 0 +SHE'S 5 1 6 5 +SHARPS 0 1 1 0 +SHARPEST 1 1 1 2 +SHARP'ST 0 1 1 0 +SHARP'S 0 1 0 1 +SHANON 0 1 0 1 +SHANNON 2 1 3 2 +SHALT 2 1 2 3 +SHALL 43 1 44 43 +SHABATA 0 1 1 0 +SET 19 1 19 20 +SERVICES 1 1 1 2 +SERVANTS 4 1 4 5 +SERVANT 11 1 12 11 +SENTENCES 2 1 3 2 +SENTENCE 3 1 3 4 +SENSE 16 1 16 17 +SENDIARIES 0 1 0 1 +SENCE 0 1 1 0 +SEMON'S 0 1 1 0 +SELVEY 0 1 0 1 +SEERS 0 1 1 0 +SEEN 16 1 16 17 +SEEMS 11 1 12 11 +SEEM 12 1 13 12 +SEE 65 1 65 66 +SCUTCHEON 0 1 1 0 +SCUSE 0 1 1 0 +SCURA 0 1 0 1 +SCUMMED 0 1 1 0 +SCRAPBOOKS 0 1 1 0 +SCRAP 0 1 0 1 +SCOUTING 0 1 1 0 +SCOUT 5 1 5 6 +SCHOOLBOYS 0 1 0 1 +SCHOOL 10 1 11 10 +SCEVRA 0 1 1 0 +SCEVERA 0 1 0 1 +SCEURA 0 1 1 0 +SCEPTICISM 0 1 1 0 +SCEPTICAL 0 1 0 1 +SCENE 2 1 3 2 +SAVED 4 1 5 4 +SAUVEUR 0 1 1 0 +SATE 0 1 1 0 +SAT 18 1 18 19 +SAILED 0 1 0 1 +SAILD 0 1 1 0 +SAID 160 1 161 160 +RUTH 9 1 10 9 +RUOTH 0 1 0 1 +RUFUS 0 1 1 0 +RUFFUS 0 1 0 1 +RUER 0 1 0 1 +RUDOLPHO'S 0 1 0 1 +ROY 0 1 0 1 +ROUND 16 1 17 16 +ROSSITER 0 1 0 1 +ROSSETER 0 1 1 0 +ROI 0 1 1 0 +ROERER 0 1 1 0 +RODOLFO'S 0 1 1 0 +RISK 2 1 2 3 +REWEIGHED 0 1 1 0 +REVOLUTION 0 1 0 1 +REMOVED 4 1 4 5 +REMOVE 3 1 4 3 +REMOV'D 0 1 1 0 +REMEMBERED 11 1 11 12 +REMEMBER 9 1 10 9 +REMARKED 4 1 4 5 +REMARK 2 1 3 2 +REMAINING 0 1 0 1 +REMAINED 5 1 6 5 +REINFORCEMENTS 0 1 0 1 +REFUSED 7 1 8 7 +REFUSE 0 1 0 1 +REENTRE 0 1 0 1 +REENFORCEMENTS 0 1 1 0 +REEDER 0 1 1 0 +RECORDS 2 1 2 3 +RECORD 6 1 7 6 +RECOGNIZED 3 1 3 4 +RECOGNISED 0 1 1 0 +REBUKED 0 1 0 1 +REBUK'D 0 1 1 0 +REBBLE 0 1 0 1 +READING 4 1 4 5 +RAPHAEL 0 1 1 0 +RAOUL 2 1 3 2 +RANCOUR 0 1 0 1 +RANCOR 0 1 1 0 +RALPH 2 1 2 3 +RAFAELLE 0 1 0 1 +RABBLE 0 1 1 0 +QUINSON 0 1 1 0 +QUINCON 0 1 0 1 +QUICK 6 1 6 7 +QUASI 0 1 1 0 +QUART 0 1 1 0 +QUARSAI 0 1 0 1 +PUTTING 7 1 7 8 +PUTTIN 0 1 1 0 +PURSE 1 1 2 1 +PURPOSED 0 1 1 0 +PURPOSE 10 1 10 11 +PURIST 0 1 1 0 +PUREST 0 1 0 1 +PROVOCATIONS 0 1 0 1 +PROVOCATION 0 1 1 0 +PROCEED 1 1 1 2 +PROBABLY 9 1 10 9 +PROB'BLY 0 1 0 1 +PREVENT 0 1 1 0 +PRETENSE 0 1 1 0 +PRETENCE 1 1 1 2 +PRESOCRATIC 0 1 0 1 +PRESENT 20 1 20 21 +PRECIUSEES 0 1 0 1 +PRECIEUSES 0 1 1 0 +PRECEDE 0 1 1 0 +PRE 0 1 1 0 +PRACTISED 0 1 1 0 +PRACTICED 0 1 0 1 +PORTLAND 0 1 0 1 +PORTES 0 1 1 0 +POPHAM 4 1 5 4 +POOS 0 1 1 0 +POISONED 0 1 0 1 +POISON'D 0 1 1 0 +PLURAL 1 1 2 1 +PLESIOSAURUS 0 1 1 0 +PLECEOSAURUS 0 1 0 1 +PLEASE 11 1 11 12 +PLEASANTS 0 1 0 1 +PLEASANCE 0 1 1 0 +PLAY 12 1 12 13 +PLATES 2 1 2 3 +PLAT 0 1 0 1 +PLANT 6 1 7 6 +PLAITS 0 1 1 0 +PIERCED 1 1 1 2 +PIERC'D 0 1 1 0 +PHILADELPHIIAN 0 1 0 1 +PHILADELPHIAN 0 1 1 0 +PHEDROS 0 1 0 1 +PHAEDRUS 0 1 1 0 +PH 0 1 1 0 +PERSON 13 1 13 14 +PEGRENNE 0 1 1 0 +PEGREN 0 1 0 1 +PEASE 0 1 1 0 +PATIENTS 0 1 1 0 +PATIENCE 2 1 2 3 +PATIAL 0 1 0 1 +PASSAGEWAY 0 1 0 1 +PASSAGE 8 1 9 8 +PASCHAL 0 1 1 0 +PARRICIDE 0 1 0 1 +PARASITES 0 1 1 0 +PAPEL 0 1 0 1 +PAPAL 0 1 1 0 +PAPA 0 1 0 1 +PAINS 2 1 2 3 +PAIN 6 1 7 6 +P 1 1 1 2 +OVERWROUGHT 0 1 1 0 +OVER 59 1 59 60 +OUT 99 1 100 99 +OTTLEY'S 0 1 1 0 +OTHERWORLDLY 0 1 1 0 +ONTO 0 1 1 0 +ONLY 77 1 77 78 +ONK 0 1 0 1 +OMELETTE 0 1 1 0 +OMELET 0 1 0 1 +ODOURS 0 1 0 1 +ODORS 0 1 1 0 +OCADIA 0 1 0 1 +OAKLEY'S 0 1 0 1 +O'BROWN 0 1 0 1 +O' 0 1 0 1 +NOTHING 33 1 33 34 +NOTHIN 0 1 1 0 +NORTHWARDS 0 1 1 0 +NORTHWARD 1 1 1 2 +NOR 20 1 21 20 +NINE 10 1 11 10 +NEWCOMER 0 1 1 0 +NEW 35 1 35 36 +NEVER 63 1 63 64 +NEOP 0 1 0 1 +NEO 2 1 3 2 +NEIGHBOURS 0 1 0 1 +NEIGHBOURHOOD 0 1 0 1 +NEIGHBORS 1 1 2 1 +NEIGHBORHOOD 0 1 1 0 +NEED 12 1 12 13 +NE'ER 0 1 1 0 +NAYERS 0 1 0 1 +NATTY 1 1 2 1 +NATTIE 0 1 0 1 +NAROWMY 0 1 0 1 +NARES 0 1 1 0 +MURCHESTON 0 1 0 1 +MUMMERIES 0 1 1 0 +MOVE 4 1 4 5 +MOUNTED 0 1 1 0 +MOUNTAIN 5 1 5 6 +MOTHERS 1 1 2 1 +MOTHER'S 4 1 4 5 +MORNING 21 1 21 22 +MORNIN 1 1 2 1 +MORE 119 1 119 120 +MONTMARCHRE 0 1 0 1 +MONT 0 1 0 1 +MOMBY 0 1 0 1 +MOMBI 0 1 1 0 +MITIGATED 0 1 0 1 +MISTS 2 1 3 2 +MISTER 47 1 48 47 +MIST 5 1 5 6 +MISSOURIIANS 0 1 0 1 +MISSOURIANS 1 1 2 1 +MISS 18 1 18 19 +MILLIONED 0 1 0 1 +MILLION'D 0 1 1 0 +MILL 0 1 0 1 +MILITATED 0 1 1 0 +MIGALATIONS 0 1 0 1 +MICAWAL 0 1 0 1 +MET 10 1 10 11 +MERSEY 0 1 1 0 +MERCY 2 1 2 3 +MERCHISTON 0 1 1 0 +MEMORIES 0 1 0 1 +MEET 6 1 7 6 +MEDICROFT'S 0 1 0 1 +MEDICROFT 0 1 0 1 +MEALYBACK 0 1 1 0 +MEALLY 0 1 0 1 +MEADOWCROFT'S 0 1 1 0 +MEADOWCROFT 0 1 1 0 +MAU 0 1 0 1 +MATE 1 1 2 1 +MASTS 0 1 0 1 +MARY 6 1 6 7 +MARVELOUS 0 1 1 0 +MARVELLOUS 0 1 0 1 +MARTRE 0 1 0 1 +MARTIAL 1 1 1 2 +MARSHALL 2 1 3 2 +MARIANNE 0 1 1 0 +MAREVAUX 0 1 0 1 +MARAVAUX 0 1 0 1 +MANIFESTS 0 1 0 1 +MANIFESTED 0 1 1 0 +MAIDEN 0 1 0 1 +MAID 5 1 6 5 +MADE 61 1 61 62 +MAC 0 1 1 0 +LUTHERS 0 1 0 1 +LUTHER'S 3 1 4 3 +LUIS 0 1 1 0 +LOUISE 4 1 4 5 +LOUIS 1 1 2 1 +LOU'S 0 1 1 0 +LOSE 3 1 3 4 +LOS 0 1 0 1 +LORNE 0 1 1 0 +LORN 0 1 0 1 +LOCKI 0 1 0 1 +LOCALIA 0 1 0 1 +LOCADIA 0 1 0 1 +LO 1 1 2 1 +LIVES 5 1 6 5 +LITER 0 1 1 0 +LINKED 0 1 0 1 +LINK'D 0 1 1 0 +LINE 12 1 13 12 +LILY'S 0 1 0 1 +LILY 2 1 2 3 +LILLYS 0 1 1 0 +LILLY 0 1 1 0 +LIKE 105 1 105 106 +LIGHT 38 1 39 38 +LIES 8 1 8 9 +LIE 1 1 1 2 +LEWIS 0 1 0 1 +LEOKADIA 0 1 0 1 +LEOCADI 0 1 1 0 +LEIRCADIAS 0 1 0 1 +LEFRANK 0 1 1 0 +LECOMPTON 0 1 1 0 +LEC 0 1 0 1 +LEAF 3 1 3 4 +LEADER 2 1 2 3 +LARKS 0 1 0 1 +LARCHBUR 0 1 0 1 +LANTHORN 0 1 1 0 +LANTERN 0 1 0 1 +LAMOT 0 1 0 1 +LAMBENT 0 1 1 0 +LAID 7 1 8 7 +LADEN 0 1 0 1 +LABOUR 0 1 1 0 +LABOR 1 1 1 2 +KNOW'S 0 1 0 1 +KNOW 75 1 76 75 +KNIFE 9 1 10 9 +KNEED 0 1 1 0 +KIRTLAND 0 1 1 0 +KIOV 0 1 0 1 +KINGDOMS 1 1 2 1 +KINGDOM'S 0 1 0 1 +KICKAPOOS 0 1 0 1 +KICK 1 1 2 1 +KEOGH 0 1 1 0 +KARL 0 1 0 1 +KAFFIRS 0 1 0 1 +KAFFIR'S 0 1 0 1 +JUST 42 1 42 43 +JUS 0 1 1 0 +JUDGMENT 5 1 6 5 +JUDGEMENT 0 1 0 1 +JEWELLERS 0 1 0 1 +JEWELER'S 0 1 1 0 +JEFFREY 0 1 0 1 +JASPER 5 1 6 5 +JAPSER 0 1 0 1 +JACKKNIFE 0 1 0 1 +JACK 5 1 6 5 +JAAGO 0 1 0 1 +ITHIOSAURUS 0 1 0 1 +IT'S 29 1 29 30 +IT'LL 2 1 2 3 +INTRUSTING 0 1 0 1 +INTRUSTED 0 1 0 1 +INTRODUCED 3 1 3 4 +INTRODUCE 2 1 3 2 +INTRENCHMENT 0 1 1 0 +INTO 103 1 104 103 +INTENTS 0 1 1 0 +INTENSE 2 1 2 3 +INSURE 0 1 0 1 +INQUIRIES 1 1 1 2 +INQUIRE 0 1 0 1 +INNILOCKI 0 1 0 1 +INNER 2 1 2 3 +INITIATED 0 1 0 1 +INFANTS 2 1 3 2 +INFANT'S 0 1 0 1 +INCLOSED 0 1 1 0 +INCERTAINTY 0 1 1 0 +IMPRESSES 0 1 1 0 +IMPRESSED 4 1 4 5 +IMPRESS'D 0 1 1 0 +IMPRESS 0 1 0 1 +IMPEARLED 0 1 1 0 +IKE 0 1 1 0 +ICHTHIOSAURUS 0 1 0 1 +ICHDEOSAURUS 0 1 0 1 +I'D 2 1 3 2 +HULLO 0 1 0 1 +HOUSECLEANING 0 1 1 0 +HOUSE 34 1 34 35 +HOUND 0 1 1 0 +HOULTREE 0 1 0 1 +HOTBED 0 1 1 0 +HOT 3 1 3 4 +HORSEPLAY 0 1 1 0 +HORSE 6 1 6 7 +HORRACE 0 1 0 1 +HORACE 0 1 1 0 +HOPKINS'S 0 1 1 0 +HOPKINS 4 1 4 5 +HONOURABLY 0 1 1 0 +HONOROURABLY 0 1 0 1 +HON 0 1 1 0 +HOLY 3 1 3 4 +HIT 1 1 2 1 +HILL 3 1 4 3 +HIDALGO 0 1 1 0 +HID 1 1 1 2 +HICKIE 0 1 0 1 +HICKEY 0 1 1 0 +HETTY'S 0 1 0 1 +HETTY 0 1 1 0 +HERE 69 1 70 69 +HERACLITUS 0 1 0 1 +HERACLEITUS 0 1 1 0 +HEN 1 1 2 1 +HELLENS 0 1 0 1 +HELLENES 0 1 1 0 +HEDALGO 0 1 0 1 +HEARTS 8 1 8 9 +HEARTBROKEN 0 1 0 1 +HEART 27 1 28 27 +HEARKENED 0 1 0 1 +HE'D 2 1 3 2 +HAZEWRAPPED 0 1 1 0 +HAZE 0 1 0 1 +HAWWORTH 0 1 0 1 +HAWTREY 0 1 1 0 +HAWORTH 0 1 1 0 +HAWKIN 0 1 0 1 +HARTS 0 1 1 0 +HARR 0 1 0 1 +HAROLD 0 1 0 1 +HARKENED 0 1 1 0 +HARALD 0 1 1 0 +HAR 0 1 1 0 +HANNA 0 1 1 0 +HANGINGS 0 1 1 0 +HANGING 2 1 2 3 +HAND 29 1 29 30 +HAM 0 1 1 0 +HAL 0 1 1 0 +HAKON 0 1 1 0 +H 0 1 0 1 +GUYORES 0 1 0 1 +GROUN 0 1 0 1 +GRINGO 0 1 1 0 +GRINGE 0 1 0 1 +GRI 0 1 0 1 +GREY'S 0 1 1 0 +GREETING 3 1 4 3 +GREEN 12 1 12 13 +GREEING 0 1 1 0 +GREAT 74 1 74 75 +GRAY'S 0 1 0 1 +GRAPEVINE 0 1 1 0 +GRAPE 0 1 0 1 +GRAND 1 1 2 1 +GRAM 0 1 1 0 +GRAHAM 0 1 0 1 +GOVERNOR 14 1 14 15 +GOVERNMENT 7 1 8 7 +GOVERNED 0 1 1 0 +GOVERN 0 1 0 1 +GOT 13 1 13 14 +GOOBERS 0 1 1 0 +GOOBBLES 0 1 0 1 +GOAT 7 1 7 8 +GLACIERS 0 1 0 1 +GIVE 29 1 30 29 +GIRARD 0 1 1 0 +GILLEKINS 0 1 0 1 +GILLAKANS 0 1 0 1 +GILKERTH'S 0 1 0 1 +GILCREST 0 1 0 1 +GILCHRIST'S 0 1 1 0 +GIER 0 1 1 0 +GIAOURS 0 1 1 0 +GERARD 0 1 0 1 +GEOFFREY 0 1 1 0 +GENTLEWOMAN 0 1 1 0 +GENTLEMEN 5 1 6 5 +GENTLEMAN 8 1 8 9 +GENLE 0 1 0 1 +GENERALSHIP 0 1 0 1 +GENERAL 16 1 17 16 +GEAR 0 1 0 1 +GAYLY 0 1 1 0 +GAVE 31 1 31 32 +GALLATIONS 0 1 0 1 +GAILY 0 1 0 1 +FURTHEST 0 1 0 1 +FULNESS 0 1 1 0 +FULLNESS 0 1 0 1 +FRISKLY 0 1 0 1 +FRISKILY 0 1 1 0 +FRANK 2 1 2 3 +FORD'S 0 1 0 1 +FORBES'S 0 1 1 0 +FLUFFINOSE 0 1 1 0 +FLOPENNO'S 0 1 0 1 +FLAT 1 1 1 2 +FLAP 1 1 2 1 +FIRST 67 1 67 68 +FIREBALL 0 1 1 0 +FIRE 22 1 22 23 +FIGHT 3 1 3 4 +FETE 2 1 3 2 +FEELINGS 3 1 3 4 +FEELIN'S 0 1 1 0 +FAVOURITE 1 1 1 2 +FAVOURABLY 0 1 0 1 +FAVORITE 3 1 4 3 +FAVORABLY 0 1 1 0 +FARTHEST 1 1 2 1 +EYED 0 1 1 0 +EY'D 0 1 0 1 +EXIST 2 1 3 2 +EXCUSE 3 1 3 4 +EVOLUTION 2 1 3 2 +EVENINGS 1 1 1 2 +EVENIN'S 0 1 1 0 +ETHONAY 0 1 0 1 +ETHINAE 0 1 0 1 +ESTEPHANIA 0 1 0 1 +ESPRIT 0 1 1 0 +ESPECIAL 1 1 1 2 +ESCHEATED 0 1 1 0 +ER 0 1 1 0 +ENTRUSTING 0 1 1 0 +ENTRUSTED 0 1 1 0 +ENTRENCHMENT 0 1 0 1 +ENTHRALMENT 0 1 1 0 +ENTHRALLMENT 0 1 0 1 +ENTERED 21 1 21 22 +ENSURE 0 1 1 0 +ENQUIRIES 0 1 1 0 +ENQUIRE 0 1 1 0 +ENCLOSED 0 1 0 1 +EMPEARLED 0 1 0 1 +EM 0 1 1 0 +ELZINOR 0 1 0 1 +ELSINORE 0 1 1 0 +ELMO'S 0 1 1 0 +ELCO 0 1 0 1 +ELCHO 0 1 1 0 +ELBEL'S 0 1 0 1 +EFFECT 9 1 9 10 +EATIN 0 1 0 1 +EAST 3 1 4 3 +DYKES 0 1 1 0 +DUSPORT 0 1 0 1 +DUNNO 0 1 1 0 +DUMAS 0 1 1 0 +DUERER 0 1 1 0 +DUCHEON 0 1 0 1 +DRUGGIST'S 0 1 1 0 +DRUGGIST 0 1 0 1 +DROPIDUS 0 1 0 1 +DROPIDAS 0 1 1 0 +DRAWS 1 1 2 1 +DRAWLS 0 1 0 1 +DOWNSTAIRS 1 1 1 2 +DOWN 72 1 73 72 +DOVES 0 1 1 0 +DOVE'S 0 1 0 1 +DOUZE 0 1 1 0 +DOORSTEP 0 1 0 1 +DOOR 35 1 36 35 +DON'TO 0 1 0 1 +DIURE 0 1 0 1 +DISTRICTS 1 1 2 1 +DISTRICT 1 1 1 2 +DIMMED 0 1 0 1 +DIKES 0 1 0 1 +DID 67 1 67 68 +DIAS 0 1 1 0 +DETERMINED 4 1 5 4 +DETERMINE 1 1 1 2 +DES 0 1 0 1 +DERPOOLS 0 1 0 1 +DELLIA 0 1 0 1 +DELIBERATIVE 0 1 1 0 +DELIBERATE 2 1 2 3 +DELIA 1 1 2 1 +DELA 0 1 0 1 +DEFENSE 0 1 0 1 +DEFENCE 0 1 1 0 +DEDALUS 1 1 2 1 +DEDALOS 0 1 1 0 +DEAD 5 1 5 6 +DAIS 0 1 0 1 +DAEDALUS 0 1 0 1 +D'ESTE 0 1 1 0 +CYN 0 1 1 0 +CUSTOMERS 0 1 0 1 +CUSTOMER'S 0 1 1 0 +CURVED 0 1 1 0 +CRYIGHTON 0 1 0 1 +CROSS 5 1 5 6 +CRIGHTON 0 1 0 1 +CRESSWELLS 0 1 1 0 +CRESSWELLERS 0 1 0 1 +CRASWELLERS 0 1 1 0 +CRAMPNESS 0 1 1 0 +CRAMPEDNESS 0 1 0 1 +CRAFT 0 1 0 1 +COZIER 0 1 1 0 +COURTYARD 4 1 4 5 +COURANT 0 1 1 0 +COUNTRY'S 1 1 2 1 +COUNTRY 25 1 25 26 +COTTON 11 1 12 11 +COSTS 0 1 1 0 +COSIER 0 1 0 1 +CORRELATES 0 1 1 0 +CORMORANT 0 1 1 0 +CORMERANT 0 1 0 1 +CORELETS 0 1 0 1 +CONTAINING 0 1 0 1 +CONTAINED 2 1 3 2 +CONSRABLE 0 1 0 1 +CONSID'BLE 0 1 1 0 +CONQUERING 0 1 0 1 +CONQUERIN 0 1 1 0 +CONNECTIONS 0 1 1 0 +CONNECTION 1 1 1 2 +CONJUROR 0 1 0 1 +CONJURER 0 1 1 0 +CONDENSED 1 1 1 2 +CONDENSE 0 1 1 0 +COMPTON 0 1 0 1 +COMPOSSER 0 1 1 0 +COMPOSOR 0 1 0 1 +COMMON 8 1 8 9 +COMMANDMENTS 0 1 1 0 +COMMAND 2 1 2 3 +COMING 6 1 7 6 +COMER 0 1 0 1 +COEXIST 0 1 0 1 +CO 0 1 1 0 +CLUE 1 1 1 2 +CLEW 0 1 1 0 +CLENCHED 1 1 1 2 +CLEANING 2 1 2 3 +CLASSES 1 1 1 2 +CLASS 8 1 9 8 +CIVET 0 1 1 0 +CITADELLED 0 1 1 0 +CITADELED 0 1 0 1 +CHRISTIAN 6 1 6 7 +CHRISTAIN 0 1 1 0 +CHORD 0 1 1 0 +CHIOSCURIST 0 1 0 1 +CHIAROSCURIST 0 1 1 0 +CHIAGO 0 1 0 1 +CHEROOT 0 1 1 0 +CHEQUER 0 1 0 1 +CHEQUE 0 1 0 1 +CHECKER 0 1 1 0 +CHECK 7 1 8 7 +CHARLESS 0 1 0 1 +CHARLES 2 1 3 2 +CHANGED 6 1 6 7 +CHANGE 8 1 9 8 +CHABATA 0 1 0 1 +CEVETTE 0 1 0 1 +CERUT 0 1 0 1 +CERTAINTY 2 1 2 3 +CENTRED 0 1 1 0 +CENTRE 1 1 1 2 +CENTERED 0 1 0 1 +CENTER 1 1 2 1 +CENDENARIES 0 1 1 0 +CEASED 1 1 1 2 +CEASD 0 1 1 0 +CAUSED 5 1 5 6 +CAUGHT 9 1 10 9 +CASTS 0 1 1 0 +CAST 8 1 8 9 +CARVED 1 1 1 2 +CARTON 0 1 0 1 +CARPATIOS 0 1 0 1 +CARPACCIO'S 0 1 1 0 +CARL 1 1 2 1 +CANNOT 16 1 16 17 +CANDLELIGHT 0 1 0 1 +CANDLE 1 1 2 1 +CAFFER 0 1 0 1 +BUT 343 1 344 343 +BURGOYNE 0 1 1 0 +BURGOIN 0 1 0 1 +BUNNIT 0 1 1 0 +BULBUL 0 1 0 1 +BROWN 9 1 10 9 +BROKEN 6 1 7 6 +BRITON 0 1 0 1 +BRITANULA 0 1 0 1 +BRITANNULISTS 0 1 1 0 +BRITANNULA 2 1 3 2 +BREAKFAST 1 1 1 2 +BREAKFAS 0 1 1 0 +BREAK 3 1 3 4 +BRAKE 2 1 3 2 +BRAGELONNE 1 1 2 1 +BRAGELONE 0 1 0 1 +BOYS 5 1 6 5 +BOWLS 0 1 0 1 +BORAL 0 1 0 1 +BOOKS 8 1 8 9 +BOLLS 0 1 1 0 +BODY 7 1 8 7 +BOARHOUND 0 1 0 1 +BOAR 0 1 1 0 +BLESSINGS 2 1 3 2 +BLESSING 2 1 2 3 +BLASTS 0 1 1 0 +BLAST 0 1 0 1 +BILLYGOAT 0 1 1 0 +BERYL 0 1 0 1 +BERTLEY 0 1 0 1 +BERKSON 0 1 0 1 +BERGSON 1 1 2 1 +BENNETT 0 1 0 1 +BEING 40 1 40 41 +BEIN 0 1 1 0 +BEHAVIOUR 1 1 2 1 +BEHAVIOR 0 1 0 1 +BEGIRT 0 1 0 1 +BEGGARS 0 1 0 1 +BEGGAR'S 0 1 1 0 +BEFORE 74 1 75 74 +BEFALL 0 1 0 1 +BEFAL 0 1 1 0 +BEEDER 0 1 1 0 +BEEBE 0 1 1 0 +BEDIMMED 0 1 1 0 +BED 12 1 12 13 +BEATER 0 1 1 0 +BEAER 0 1 0 1 +BE 314 1 314 315 +BARREL 0 1 1 0 +BARBAROOSA 0 1 0 1 +BAND 2 1 2 3 +BALL 4 1 4 5 +BALED 0 1 0 1 +BALAAM'S 0 1 1 0 +BAINS 0 1 1 0 +BAILIM'S 0 1 0 1 +BADDELT 0 1 0 1 +BADAUDERIE 0 1 1 0 +BACK 45 1 45 46 +BABIRUSA 0 1 1 0 +AWING 0 1 0 1 +AWHILE 0 1 1 0 +AU 0 1 1 0 +ATTILAX 0 1 0 1 +ATTENDANTS 0 1 1 0 +ATTENDANCE 1 1 1 2 +ATHOLEMEN 0 1 1 0 +ATHOL 0 1 0 1 +ASKED 22 1 22 23 +ASK 9 1 10 9 +ASCENDENCY 0 1 1 0 +ASCENDANCY 0 1 0 1 +ARRONDISSEMENT 0 1 1 0 +ARRESTS 0 1 1 0 +ARREST 1 1 1 2 +AROUND 12 1 12 13 +ARONNDISSIMON 0 1 0 1 +ARMOUR 0 1 1 0 +ARMOR 0 1 0 1 +ARDOUR 0 1 1 0 +ARDOR 0 1 0 1 +ARDLE 0 1 1 0 +ANYTHING 17 1 17 18 +ANYMORE 0 1 1 0 +ANYBODY 3 1 3 4 +ANXAGARIS 0 1 0 1 +ANSWERED 14 1 14 15 +ANSWERD 0 1 1 0 +ANNON 0 1 0 1 +ANNE 2 1 2 3 +ANNALS 2 1 3 2 +ANNAL 0 1 0 1 +ANGOR 0 1 1 0 +ANGER 1 1 1 2 +ANDUCADIA'S 0 1 0 1 +ANDER 0 1 0 1 +ANAXAGORAS 0 1 1 0 +AMPHITHEATRE 0 1 0 1 +AMPHITHEATER 0 1 1 0 +ALTOGETHER 6 1 6 7 +ALLUVION 0 1 1 0 +ALLUVIAN 0 1 0 1 +ALL 224 1 225 224 +ALBIGINZAS 0 1 0 1 +ALBIGENSES 0 1 1 0 +AIGNAN 0 1 1 0 +AH 7 1 7 8 +AFFECT 0 1 1 0 +ADONNA 0 1 0 1 +ADONA 0 1 1 0 +ADOCTION 0 1 0 1 +ACKNOWLEDGMENT 0 1 0 1 +ACKNOWLEDGEMENT 0 1 1 0 +ACCORD 2 1 2 3 +ACCORANT 0 1 0 1 +ABOLITIONISTS 0 1 1 0 +ABOLITIONIST 0 1 0 1 +ABDUCTION 0 1 1 0 +ZOOLOGY 1 0 1 1 +ZOOF'S 2 0 2 2 +ZOOF 1 0 1 1 +ZION 1 0 1 1 +ZEST 1 0 1 1 +ZEAL 2 0 2 2 +YOUTH 5 0 5 5 +YOURSELVES 1 0 1 1 +YOURSELF 8 0 8 8 +YOURS 3 0 3 3 +YOUR 109 0 109 109 +YOUNGER 1 0 1 1 +YOUNG 43 0 43 43 +YOU'VE 4 0 4 4 +YOU'D 3 0 3 3 +YORKSHIRE 2 0 2 2 +YORK 6 0 6 6 +YONDER 1 0 1 1 +YOKE 1 0 1 1 +YIELDING 3 0 3 3 +YIELDED 2 0 2 2 +YIELD 3 0 3 3 +YET 43 0 43 43 +YESTERDAY 3 0 3 3 +YES 33 0 33 33 +YELLOW 9 0 9 9 +YELL 1 0 1 1 +YEARS 34 0 34 34 +YEARLY 2 0 2 2 +YEAR 5 0 5 5 +YEA 1 0 1 1 +YARN 2 0 2 2 +YAMS 1 0 1 1 +YACHTSMAN 1 0 1 1 +YACHT 3 0 3 3 +WYLDER 5 0 5 5 +WROTE 6 0 6 6 +WRONGS 1 0 1 1 +WRONG 10 0 10 10 +WRITTEN 7 0 7 7 +WRITS 1 0 1 1 +WRITINGS 2 0 2 2 +WRITING 6 0 6 6 +WRITHING 1 0 1 1 +WRITES 1 0 1 1 +WRITER 2 0 2 2 +WRITE 4 0 4 4 +WRIT 1 0 1 1 +WRETCHEDNESS 2 0 2 2 +WRESTLERS 1 0 1 1 +WRESTLED 1 0 1 1 +WOUNDED 1 0 1 1 +WOUND 1 0 1 1 +WOULDN'T 5 0 5 5 +WORTHY 6 0 6 6 +WORTH 4 0 4 4 +WORSTED 1 0 1 1 +WORSHIP 3 0 3 3 +WORRY 3 0 3 3 +WORN 1 0 1 1 +WORM 4 0 4 4 +WORKS 8 0 8 8 +WORKMEN 1 0 1 1 +WORKING 3 0 3 3 +WORKERS 1 0 1 1 +WORKED 5 0 5 5 +WORK 34 0 34 34 +WORE 3 0 3 3 +WORDS 20 0 20 20 +WORD 20 0 20 20 +WOOL 3 0 3 3 +WOODLEY 3 0 3 3 +WOODEN 3 0 3 3 +WONDERS 1 0 1 1 +WONDERINGLY 1 0 1 1 +WONDERING 2 0 2 2 +WONDERFUL 7 0 7 7 +WONDERED 2 0 2 2 +WONDER 7 0 7 7 +WON'T 15 0 15 15 +WOMEN 8 0 8 8 +WOLF 1 0 1 1 +WOKE 1 0 1 1 +WOE 2 0 2 2 +WIZARD'S 1 0 1 1 +WIZARD 3 0 3 3 +WIVES 3 0 3 3 +WITTY 1 0 1 1 +WITTILY 1 0 1 1 +WITS 1 0 1 1 +WITNESSING 1 0 1 1 +WITNESSES 1 0 1 1 +WITNESS 1 0 1 1 +WITHOUT 37 0 37 37 +WITHIN 23 0 23 23 +WITHES 1 0 1 1 +WITHERING 1 0 1 1 +WITHERED 1 0 1 1 +WITHDRAWN 2 0 2 2 +WITHDRAW 1 0 1 1 +WITHAL 1 0 1 1 +WIT 3 0 3 3 +WISHES 3 0 3 3 +WISHERS 1 0 1 1 +WISHED 6 0 6 6 +WISH 11 0 11 11 +WISE 5 0 5 5 +WISDOM 3 0 3 3 +WIRE 4 0 4 4 +WIPED 1 0 1 1 +WINTER 5 0 5 5 +WINNING 1 0 1 1 +WINKING 1 0 1 1 +WINK 1 0 1 1 +WINGS 5 0 5 5 +WINE 7 0 7 7 +WINDY 1 0 1 1 +WINDS 3 0 3 3 +WINDOWS 7 0 7 7 +WINDOW 16 0 16 16 +WINDING 1 0 1 1 +WIND 8 0 8 8 +WIN 2 0 2 2 +WILY 1 0 1 1 +WILSON 1 0 1 1 +WILLS 1 0 1 1 +WILLOWY 1 0 1 1 +WILLINGLY 2 0 2 2 +WILLING 2 0 2 2 +WILLIAM 1 0 1 1 +WILLED 1 0 1 1 +WILDERNESS 1 0 1 1 +WILD 9 0 9 9 +WIFE 17 0 17 17 +WIDTH 1 0 1 1 +WIDEST 1 0 1 1 +WIDENING 1 0 1 1 +WIDELY 1 0 1 1 +WICKET 1 0 1 1 +WICKEDNESS 1 0 1 1 +WICKEDEST 1 0 1 1 +WICKED 3 0 3 3 +WHY 44 0 44 44 +WHOSE 14 0 14 14 +WHOM 18 0 18 18 +WHOLESOME 1 0 1 1 +WHOLE 25 0 25 25 +WHOEVER 3 0 3 3 +WHO'S 2 0 2 2 +WHO 154 0 154 154 +WHITNEY 1 0 1 1 +WHITE 23 0 23 23 +WHISTLING 1 0 1 1 +WHISTLE 2 0 2 2 +WHISPERED 7 0 7 7 +WHISPER 1 0 1 1 +WHISKERS 1 0 1 1 +WHISK 1 0 1 1 +WHIRLWIND 3 0 3 3 +WHIRLPOOL 2 0 2 2 +WHIPPED 2 0 2 2 +WHIM 1 0 1 1 +WHILST 3 0 3 3 +WHETHER 23 0 23 23 +WHEREVER 3 0 3 3 +WHEREUPON 3 0 3 3 +WHEREON 1 0 1 1 +WHEREFORE 1 0 1 1 +WHEREBY 1 0 1 1 +WHENEVER 3 0 3 3 +WHELPS 1 0 1 1 +WHEELING 1 0 1 1 +WHEELER 1 0 1 1 +WHEELED 3 0 3 3 +WHEEL 1 0 1 1 +WHEAT 2 0 2 2 +WHARVES 1 0 1 1 +WHALE 4 0 4 4 +WETTING 1 0 1 1 +WET 9 0 9 9 +WESTWARD 1 0 1 1 +WESTPORT 2 0 2 2 +WESTMERE 1 0 1 1 +WESTERN 1 0 1 1 +WESLEY 2 0 2 2 +WEREN'T 2 0 2 2 +WENT 25 0 25 25 +WELL 75 0 75 75 +WELFARE 2 0 2 2 +WEIGHT 2 0 2 2 +WEIGH 1 0 1 1 +WEEPING 4 0 4 4 +WEEP 1 0 1 1 +WEEKS 4 0 4 4 +WEEK 2 0 2 2 +WEEDS 3 0 3 3 +WEDNESDAY 2 0 2 2 +WEBS 1 0 1 1 +WEB 1 0 1 1 +WEATHER 6 0 6 6 +WEASEL 1 0 1 1 +WEARY 1 0 1 1 +WEARING 2 0 2 2 +WEARINESS 2 0 2 2 +WEARILY 2 0 2 2 +WEARERS 1 0 1 1 +WEAR 5 0 5 5 +WEAPON 2 0 2 2 +WEALTH 5 0 5 5 +WEAKNESS 3 0 3 3 +WEAKLY 1 0 1 1 +WEAKENED 2 0 2 2 +WEAK 6 0 6 6 +WE'VE 2 0 2 2 +WE'LL 6 0 6 6 +WAYS 1 0 1 1 +WAX 1 0 1 1 +WAVING 2 0 2 2 +WAVES 7 0 7 7 +WAVERING 2 0 2 2 +WAVED 1 0 1 1 +WATSON 5 0 5 5 +WATERS 6 0 6 6 +WATERCRESS 1 0 1 1 +WATCHING 1 0 1 1 +WATCHFULNESS 1 0 1 1 +WATCHFUL 1 0 1 1 +WATCHED 7 0 7 7 +WATCH 2 0 2 2 +WASTEFUL 4 0 4 4 +WASTED 2 0 2 2 +WASTE 5 0 5 5 +WASN'T 2 0 2 2 +WASHINGTON 1 0 1 1 +WASHING 1 0 1 1 +WASH 1 0 1 1 +WARY 1 0 1 1 +WARRIORS 2 0 2 2 +WARRENTON'S 2 0 2 2 +WARRENTON 4 0 4 4 +WARRANTED 2 0 2 2 +WARRANT 1 0 1 1 +WARN 1 0 1 1 +WARMEST 1 0 1 1 +WARMED 1 0 1 1 +WARM 4 0 4 4 +WARLIKE 1 0 1 1 +WARDS 1 0 1 1 +WARD 1 0 1 1 +WAR 5 0 5 5 +WANTS 3 0 3 3 +WANTING 3 0 3 3 +WANTED 8 0 8 8 +WANT 19 0 19 19 +WANDERING 2 0 2 2 +WANDERED 2 0 2 2 +WANDER 2 0 2 2 +WAN 1 0 1 1 +WALNUT 1 0 1 1 +WALLS 2 0 2 2 +WALL 6 0 6 6 +WALKS 1 0 1 1 +WALKING 2 0 2 2 +WALKETH 1 0 1 1 +WALKED 6 0 6 6 +WALK 5 0 5 5 +WAITING 7 0 7 7 +WAITERS 1 0 1 1 +WAITER 1 0 1 1 +WAITED 1 0 1 1 +WAIT 8 0 8 8 +WAISTCOAT 1 0 1 1 +WAIST 1 0 1 1 +WAILING 1 0 1 1 +WAGED 1 0 1 1 +WADDLING 1 0 1 1 +W 3 0 3 3 +VULGAR 1 0 1 1 +VOYAGING 2 0 2 2 +VOYAGES 1 0 1 1 +VOYAGE 2 0 2 2 +VOWS 1 0 1 1 +VOUCHED 1 0 1 1 +VOTES 1 0 1 1 +VOTERS 1 0 1 1 +VOLUME 1 0 1 1 +VOLTAIRE'S 1 0 1 1 +VOICES 2 0 2 2 +VOICE 18 0 18 18 +VIVIDLY 2 0 2 2 +VIVID 2 0 2 2 +VIVE 1 0 1 1 +VIVACITY 1 0 1 1 +VITALITY 1 0 1 1 +VITAL 1 0 1 1 +VISTA 1 0 1 1 +VISITORS 5 0 5 5 +VISITOR 2 0 2 2 +VISITED 4 0 4 4 +VISIT 4 0 4 4 +VISION 2 0 2 2 +VISIBLE 2 0 2 2 +VIRTUOUS 1 0 1 1 +VIRTUE 3 0 3 3 +VIRTUALLY 2 0 2 2 +VIRGINS 1 0 1 1 +VIRGIN 2 0 2 2 +VIPER 2 0 2 2 +VIOLET 1 0 1 1 +VIOLENT 5 0 5 5 +VIOLENCE 5 0 5 5 +VIOLATED 1 0 1 1 +VINES 1 0 1 1 +VINEGAR 1 0 1 1 +VINDICATION 1 0 1 1 +VINDICATE 1 0 1 1 +VILLAGE 4 0 4 4 +VIKING 3 0 3 3 +VIGOROUSLY 1 0 1 1 +VIGOROUS 1 0 1 1 +VIGNETTE 1 0 1 1 +VIGILANCE 1 0 1 1 +VIEWS 1 0 1 1 +VIEWED 1 0 1 1 +VIEW 2 0 2 2 +VIE 1 0 1 1 +VICTUALS 1 0 1 1 +VICTORY 1 0 1 1 +VICTORIES 1 0 1 1 +VICTIM 2 0 2 2 +VICOMTE 1 0 1 1 +VICISSITUDES 1 0 1 1 +VICIOUS 1 0 1 1 +VICINITY 1 0 1 1 +VICARIOUS 4 0 4 4 +VEXED 1 0 1 1 +VEXATION 1 0 1 1 +VESTURE 1 0 1 1 +VESTIBULE 1 0 1 1 +VESSEL 2 0 2 2 +VERY 83 0 83 83 +VERTEBRAL 1 0 1 1 +VERSES 1 0 1 1 +VERSED 3 0 3 3 +VERIFY 1 0 1 1 +VERGE 1 0 1 1 +VENTURED 1 0 1 1 +VENTURE 1 0 1 1 +VENICE 1 0 1 1 +VENGEANCE 2 0 2 2 +VENERABLE 1 0 1 1 +VELVET 1 0 1 1 +VELOCITY 2 0 2 2 +VEILS 1 0 1 1 +VEHICLE 1 0 1 1 +VEHEMENTLY 1 0 1 1 +VAULT 1 0 1 1 +VAUDOIS 1 0 1 1 +VASTLY 1 0 1 1 +VAST 5 0 5 5 +VASSALS 1 0 1 1 +VARYING 2 0 2 2 +VARIOUS 7 0 7 7 +VARIETY 2 0 2 2 +VARIETIES 1 0 1 1 +VARIED 1 0 1 1 +VARIATIONS 1 0 1 1 +VARIANCE 1 0 1 1 +VARIABILITY 2 0 2 2 +VANQUISHED 2 0 2 2 +VANITY 1 0 1 1 +VANISHED 2 0 2 2 +VANISH 2 0 2 2 +VANES 1 0 1 1 +VALUE 3 0 3 3 +VALUABLE 2 0 2 2 +VALOR 3 0 3 3 +VALLEYS 2 0 2 2 +VALLEYED 1 0 1 1 +VALLEY 4 0 4 4 +VALIANTLY 1 0 1 1 +VALHALLA 1 0 1 1 +VALES 3 0 3 3 +VALE 1 0 1 1 +VAINLY 1 0 1 1 +VAIN 1 0 1 1 +VAGUELY 1 0 1 1 +VAGUE 3 0 3 3 +VACUUM 1 0 1 1 +VACANT 1 0 1 1 +UTTERLY 4 0 4 4 +UTTERED 1 0 1 1 +UTTERANCE 1 0 1 1 +UTTER 4 0 4 4 +UTMOST 3 0 3 3 +UTILITY 3 0 3 3 +UTAH 2 0 2 2 +USUALLY 4 0 4 4 +USUAL 5 0 5 5 +USING 3 0 3 3 +USELESS 4 0 4 4 +USEFUL 5 0 5 5 +USED 17 0 17 17 +USE 31 0 31 31 +US 60 0 60 60 +URGING 1 0 1 1 +URGED 3 0 3 3 +UPWARDS 1 0 1 1 +UPSTAIRS 3 0 3 3 +UPRIGHT 1 0 1 1 +UPRAISED 1 0 1 1 +UPPERMOST 1 0 1 1 +UPPER 2 0 2 2 +UPLIFTED 1 0 1 1 +UPHOLSTERED 1 0 1 1 +UPHEAVAL 1 0 1 1 +UPBRAIDED 1 0 1 1 +UNWORTHY 1 0 1 1 +UNWILLING 2 0 2 2 +UNVARNISHED 1 0 1 1 +UNUSUAL 4 0 4 4 +UNTUTORED 1 0 1 1 +UNTRIED 1 0 1 1 +UNTREATED 1 0 1 1 +UNTOUCHED 1 0 1 1 +UNTO 3 0 3 3 +UNTIL 16 0 16 16 +UNTIDINESS 1 0 1 1 +UNTASTED 1 0 1 1 +UNSUCCESSFUL 1 0 1 1 +UNSEPARATED 1 0 1 1 +UNSEEN 1 0 1 1 +UNSAID 1 0 1 1 +UNREAL 1 0 1 1 +UNPRECEDENTED 1 0 1 1 +UNPOPULAR 1 0 1 1 +UNPLEASANT 3 0 3 3 +UNPERCEIVED 1 0 1 1 +UNPARALLELED 1 0 1 1 +UNOBSERVED 1 0 1 1 +UNNECESSARY 1 0 1 1 +UNNATURAL 1 0 1 1 +UNMOVED 1 0 1 1 +UNLUCKY 2 0 2 2 +UNLUCKILY 1 0 1 1 +UNLOCKED 1 0 1 1 +UNLOCK 1 0 1 1 +UNLOADED 1 0 1 1 +UNLIKELY 1 0 1 1 +UNLIKE 1 0 1 1 +UNLESS 5 0 5 5 +UNKNOWN 1 0 1 1 +UNJUST 2 0 2 2 +UNIVERSITY 1 0 1 1 +UNIVERSE 1 0 1 1 +UNIVERSAL 3 0 3 3 +UNITED 8 0 8 8 +UNITE 1 0 1 1 +UNION 3 0 3 3 +UNINVITED 1 0 1 1 +UNINTELLIGIBLE 1 0 1 1 +UNIFORMS 2 0 2 2 +UNIFORM 1 0 1 1 +UNICORN 1 0 1 1 +UNHEEDED 1 0 1 1 +UNHAPPY 4 0 4 4 +UNHAPPINESS 1 0 1 1 +UNGRACIOUSLY 1 0 1 1 +UNGRACIOUS 1 0 1 1 +UNFORTUNATELY 2 0 2 2 +UNFORTUNATE 1 0 1 1 +UNFOLD 1 0 1 1 +UNFINISHED 2 0 2 2 +UNFEELING 1 0 1 1 +UNFAMILIAR 1 0 1 1 +UNFAIRLY 1 0 1 1 +UNFAIR 2 0 2 2 +UNFAILING 1 0 1 1 +UNEXPECTEDLY 2 0 2 2 +UNEXPECTED 3 0 3 3 +UNEXCEPTIONABLY 1 0 1 1 +UNEASY 4 0 4 4 +UNEASINESS 1 0 1 1 +UNEASILY 1 0 1 1 +UNEARTHLY 1 0 1 1 +UNDUE 1 0 1 1 +UNDOUBTEDLY 1 0 1 1 +UNDOING 1 0 1 1 +UNDERWATER 1 0 1 1 +UNDERTONE 1 0 1 1 +UNDERTAKING 2 0 2 2 +UNDERSTOOD 6 0 6 6 +UNDERSTANDING 4 0 4 4 +UNDERSTAND 9 0 9 9 +UNDERSCORE 1 0 1 1 +UNDERNEATH 1 0 1 1 +UNDERMINE 1 0 1 1 +UNDERHANDED 1 0 1 1 +UNDER 40 0 40 40 +UNDECEIVED 1 0 1 1 +UNCOUTH 1 0 1 1 +UNCOURTEOUS 1 0 1 1 +UNCONTROLLABLE 1 0 1 1 +UNCONSTITUTIONALITY 1 0 1 1 +UNCOMPROMISING 1 0 1 1 +UNCOMFORTABLE 1 0 1 1 +UNCLE 6 0 6 6 +UNCIVIL 1 0 1 1 +UNCHARITABLENESS 1 0 1 1 +UNCHANGED 1 0 1 1 +UNCERTAIN 2 0 2 2 +UNCASING 1 0 1 1 +UNCAS 10 0 10 10 +UNBUTTONING 1 0 1 1 +UNBROKEN 1 0 1 1 +UNBEARABLE 2 0 2 2 +UNAVOIDABLE 1 0 1 1 +UNAVERRED 1 0 1 1 +UNANIMOUSLY 1 0 1 1 +UNANIMOUS 1 0 1 1 +UNAFFECTED 1 0 1 1 +UNACCOUNTABLE 1 0 1 1 +UNABLE 1 0 1 1 +UGLY 3 0 3 3 +TYRANNY 1 0 1 1 +TYPICAL 1 0 1 1 +TYPES 1 0 1 1 +TYPE 1 0 1 1 +TWIXT 1 0 1 1 +TWIRLING 1 0 1 1 +TWIN 1 0 1 1 +TWILIGHT 2 0 2 2 +TWENTY 15 0 15 15 +TWELVE 2 0 2 2 +TWELFTH 1 0 1 1 +TWASN'T 1 0 1 1 +TURNOVER 1 0 1 1 +TURNIPS 1 0 1 1 +TURNING 6 0 6 6 +TURNER'S 1 0 1 1 +TURNER 4 0 4 4 +TURN 18 0 18 18 +TURF 1 0 1 1 +TUNE 2 0 2 2 +TUMULTUOUS 1 0 1 1 +TUMULT 3 0 3 3 +TUMBLER 1 0 1 1 +TUMBLED 3 0 3 3 +TUFT 1 0 1 1 +TUESDAY 1 0 1 1 +TUCKED 1 0 1 1 +TUBE 1 0 1 1 +TRYING 5 0 5 5 +TRUTH 13 0 13 13 +TRUSTY 1 0 1 1 +TRUSTS 1 0 1 1 +TRUST 5 0 5 5 +TRUNKS 2 0 2 2 +TRUNK 2 0 2 2 +TRUMPETS 1 0 1 1 +TRULY 9 0 9 9 +TRUFFLES 1 0 1 1 +TRUE 21 0 21 21 +TRUDGED 1 0 1 1 +TROUT'S 1 0 1 1 +TROUT 1 0 1 1 +TROUBLESOME 1 0 1 1 +TROUBLES 2 0 2 2 +TROUBLED 4 0 4 4 +TROTTING 1 0 1 1 +TROTTED 1 0 1 1 +TROTH 1 0 1 1 +TROT 5 0 5 5 +TROPHIES 1 0 1 1 +TROOPS 3 0 3 3 +TRIUMPHANTLY 1 0 1 1 +TRIUMPHANT 1 0 1 1 +TRIUMPH 3 0 3 3 +TRIPPED 1 0 1 1 +TRINKET 1 0 1 1 +TRIMNESS 1 0 1 1 +TRIM 1 0 1 1 +TRILOGIES 1 0 1 1 +TRIGGER 1 0 1 1 +TRICKS 2 0 2 2 +TRIBUTE 1 0 1 1 +TRIBES 1 0 1 1 +TRIAL 2 0 2 2 +TREND 2 0 2 2 +TREMULOUSLY 1 0 1 1 +TREMULOUS 1 0 1 1 +TREMOR 1 0 1 1 +TREMENDOUSLY 1 0 1 1 +TREMBLING 5 0 5 5 +TREMBLED 1 0 1 1 +TREMBLE 2 0 2 2 +TREES 19 0 19 19 +TREE 35 0 35 35 +TREDDLESTON 1 0 1 1 +TREATY 1 0 1 1 +TREATS 1 0 1 1 +TREATING 1 0 1 1 +TREATED 2 0 2 2 +TREAT 1 0 1 1 +TREASURE 2 0 2 2 +TREAD 1 0 1 1 +TRAY 1 0 1 1 +TRAVESTY 1 0 1 1 +TRAVERSED 1 0 1 1 +TRAVELING 3 0 3 3 +TRAVEL 1 0 1 1 +TRASH 1 0 1 1 +TRAP 2 0 2 2 +TRANSPARENT 2 0 2 2 +TRANSLATION 1 0 1 1 +TRANSLATE 1 0 1 1 +TRANSIENT 2 0 2 2 +TRANSFERRED 1 0 1 1 +TRANSCRIPT 1 0 1 1 +TRANQUILLITY 1 0 1 1 +TRANQUIL 1 0 1 1 +TRAINS 1 0 1 1 +TRAINING 3 0 3 3 +TRAINED 1 0 1 1 +TRAIN 1 0 1 1 +TRAIL 2 0 2 2 +TRAFFIC 1 0 1 1 +TRADITIONS 3 0 3 3 +TRADITION 1 0 1 1 +TRADES 1 0 1 1 +TRACK 1 0 1 1 +TRACES 2 0 2 2 +TRACE 1 0 1 1 +TOYS 1 0 1 1 +TOWNS 3 0 3 3 +TOWERS 1 0 1 1 +TOWER 1 0 1 1 +TOUR 1 0 1 1 +TOUCHING 1 0 1 1 +TOUCHES 4 0 4 4 +TOUCH 8 0 8 8 +TOTTY 3 0 3 3 +TOTAL 1 0 1 1 +TOSSING 1 0 1 1 +TOSSED 1 0 1 1 +TORY 1 0 1 1 +TORTURED 2 0 2 2 +TORTURE 1 0 1 1 +TORTOISE 1 0 1 1 +TORRENT 3 0 3 3 +TORN 1 0 1 1 +TORCH 1 0 1 1 +TOPSAILS 1 0 1 1 +TOPS 4 0 4 4 +TOPMOST 1 0 1 1 +TOPEKA 1 0 1 1 +TOOTHED 1 0 1 1 +TOOTH 1 0 1 1 +TOOK 33 0 33 33 +TOO 61 0 61 61 +TONGUES 1 0 1 1 +TONGUE 8 0 8 8 +TONES 3 0 3 3 +TONED 1 0 1 1 +TONE 5 0 5 5 +TOMMY 1 0 1 1 +TOMB 1 0 1 1 +TOM 4 0 4 4 +TOLERATION 1 0 1 1 +TOLEDANS 1 0 1 1 +TOLD 32 0 32 32 +TOKEN 2 0 2 2 +TOE 1 0 1 1 +TITLE 3 0 3 3 +TITIAN 1 0 1 1 +TIS 8 0 8 8 +TIRESOME 2 0 2 2 +TIRELESS 1 0 1 1 +TIRED 6 0 6 6 +TIRE 1 0 1 1 +TIPTOE 2 0 2 2 +TIPPED 1 0 1 1 +TIP 3 0 3 3 +TINY 3 0 3 3 +TINTS 1 0 1 1 +TINTORET 1 0 1 1 +TINT 1 0 1 1 +TINSEL 1 0 1 1 +TINKLED 1 0 1 1 +TINGLING 1 0 1 1 +TINGE 1 0 1 1 +TIN 1 0 1 1 +TIMES 21 0 21 21 +TIME'S 2 0 2 2 +TIMASCHEFF'S 1 0 1 1 +TILL 8 0 8 8 +TILES 1 0 1 1 +TIGHTLY 1 0 1 1 +TIGHTEN 1 0 1 1 +TIGHT 1 0 1 1 +TIED 2 0 2 2 +TIE 1 0 1 1 +TIDING 1 0 1 1 +TICKET 1 0 1 1 +THY 17 0 17 17 +THWART 1 0 1 1 +THUS 21 0 21 21 +THURSTON 2 0 2 2 +THURSDAY 1 0 1 1 +THRUSTING 1 0 1 1 +THRUST 5 0 5 5 +THROWN 4 0 4 4 +THROW 2 0 2 2 +THROUGHOUT 5 0 5 5 +THRONE 4 0 4 4 +THROATS 1 0 1 1 +THROAT 1 0 1 1 +THRIVING 1 0 1 1 +THRILLING 1 0 1 1 +THRILLED 1 0 1 1 +THRILL 1 0 1 1 +THRICE 1 0 1 1 +THREW 5 0 5 5 +THREE 41 0 41 41 +THREATS 1 0 1 1 +THREATENS 2 0 2 2 +THREATENING 3 0 3 3 +THREATENED 1 0 1 1 +THRALLS 2 0 2 2 +THRALL'S 1 0 1 1 +THRALL 2 0 2 2 +THOUSANDS 2 0 2 2 +THOUGHTS 13 0 13 13 +THOUGHTLESS 1 0 1 1 +THOUGHTFUL 1 0 1 1 +THOUGHT 54 0 54 54 +THOUGH 33 0 33 33 +THOSE 37 0 37 37 +THOROUGH 1 0 1 1 +THOR 1 0 1 1 +THOMAS 1 0 1 1 +THIRTY 12 0 12 12 +THIRTIETH 1 0 1 1 +THIRTEENTH 1 0 1 1 +THIRD 7 0 7 7 +THINKING 8 0 8 8 +THINK 52 0 52 52 +THINGS 34 0 34 34 +THIN 2 0 2 2 +THICKEST 1 0 1 1 +THICK 5 0 5 5 +THEY'VE 1 0 1 1 +THEY'RE 3 0 3 3 +THESE 68 0 68 68 +THERMOMETER 1 0 1 1 +THEREOF 1 0 1 1 +THEREIN 3 0 3 3 +THEREFORE 20 0 20 20 +THEREAFTER 1 0 1 1 +THEREABOUTS 1 0 1 1 +THEORY 5 0 5 5 +THEORIES 1 0 1 1 +THEORETICAL 1 0 1 1 +THEOLOGY 1 0 1 1 +THENCE 1 0 1 1 +THEMSELVES 12 0 12 12 +THEME 1 0 1 1 +THEIRS 2 0 2 2 +THEFT 4 0 4 4 +THEE'S 1 0 1 1 +THEATRICAL 1 0 1 1 +THEATRES 1 0 1 1 +THAT'S 14 0 14 14 +THANKS 3 0 3 3 +THANKING 3 0 3 3 +THANKFUL 1 0 1 1 +THANKED 1 0 1 1 +TEXTURES 1 0 1 1 +TEXT 2 0 2 2 +TESTIMONY 1 0 1 1 +TESTIMONIES 1 0 1 1 +TESTED 1 0 1 1 +TEST 2 0 2 2 +TERROR 2 0 2 2 +TERRITORY 2 0 2 2 +TERRITORIAL 4 0 4 4 +TERRIFIED 2 0 2 2 +TERRIFIC 1 0 1 1 +TERRIBLY 2 0 2 2 +TERRIBLE 8 0 8 8 +TERRACED 1 0 1 1 +TERMS 9 0 9 9 +TERM 6 0 6 6 +TENTS 2 0 2 2 +TENT 5 0 5 5 +TENFOLD 2 0 2 2 +TENDERLY 1 0 1 1 +TENDER 3 0 3 3 +TENDED 1 0 1 1 +TEND 2 0 2 2 +TENANTED 1 0 1 1 +TENABILITY 1 0 1 1 +TEN 14 0 14 14 +TEMPTATION 2 0 2 2 +TEMPORARY 2 0 2 2 +TEMPORAL 2 0 2 2 +TEMPLES 1 0 1 1 +TEMPLE 2 0 2 2 +TEMPEST 2 0 2 2 +TEMPERATURE 1 0 1 1 +TEMPER 3 0 3 3 +TELLS 1 0 1 1 +TELLING 3 0 3 3 +TELL 34 0 34 34 +TELESCOPE 2 0 2 2 +TEETH 1 0 1 1 +TEDIOUS 2 0 2 2 +TECHNOLOGY 1 0 1 1 +TECHNIQUE 1 0 1 1 +TECHNICAL 2 0 2 2 +TEARS 11 0 11 11 +TEAPOT 1 0 1 1 +TEAL 1 0 1 1 +TEACHING 2 0 2 2 +TEACH 6 0 6 6 +TEA 3 0 3 3 +TAYLOR 7 0 7 7 +TAXED 1 0 1 1 +TAWNY 1 0 1 1 +TAUGHT 5 0 5 5 +TASTE 5 0 5 5 +TASKS 1 0 1 1 +TASK 9 0 9 9 +TARTS 3 0 3 3 +TARRY 1 0 1 1 +TARANTULA 1 0 1 1 +TAPESTRY 1 0 1 1 +TAPESTRIES 1 0 1 1 +TAPE 1 0 1 1 +TANKARD 1 0 1 1 +TANGLE 1 0 1 1 +TAMPERING 1 0 1 1 +TAMPERED 1 0 1 1 +TAMES 1 0 1 1 +TAME 1 0 1 1 +TALONS 1 0 1 1 +TALLOW 1 0 1 1 +TALL 9 0 9 9 +TALKS 4 0 4 4 +TALKING 10 0 10 10 +TALKERS 1 0 1 1 +TALKED 1 0 1 1 +TALK 19 0 19 19 +TALES 2 0 2 2 +TALENTED 1 0 1 1 +TALENT 5 0 5 5 +TAKING 7 0 7 7 +TAKES 3 0 3 3 +TAKEN 15 0 15 15 +TAKE 34 0 34 34 +TAILORS 1 0 1 1 +TAG 1 0 1 1 +TACT 1 0 1 1 +TACK 1 0 1 1 +TABLES 3 0 3 3 +TABLE 24 0 24 24 +TABBY'S 1 0 1 1 +TABBY 2 0 2 2 +SYSTEM 8 0 8 8 +SYNONYM 1 0 1 1 +SYMPOSIUM 2 0 2 2 +SYMPATHY 3 0 3 3 +SYMPATHETIC 2 0 2 2 +SYLLABLE 1 0 1 1 +SWORDS 1 0 1 1 +SWORD 5 0 5 5 +SWOOPED 1 0 1 1 +SWOLLEN 1 0 1 1 +SWIRLING 1 0 1 1 +SWIRL 1 0 1 1 +SWING 1 0 1 1 +SWIMMING 2 0 2 2 +SWIFTNESS 1 0 1 1 +SWIFTLY 4 0 4 4 +SWIFT 1 0 1 1 +SWELLING 1 0 1 1 +SWELL 1 0 1 1 +SWEETS 1 0 1 1 +SWEETNESS 2 0 2 2 +SWEET 6 0 6 6 +SWEEPING 1 0 1 1 +SWAY 1 0 1 1 +SWARMING 1 0 1 1 +SWAMP 3 0 3 3 +SWAM 1 0 1 1 +SUSPICIOUS 1 0 1 1 +SUSPENDED 1 0 1 1 +SUSPECT 2 0 2 2 +SURVIVE 2 0 2 2 +SURVEYOR 1 0 1 1 +SURVEYED 1 0 1 1 +SURROUNDINGS 1 0 1 1 +SURROUNDING 2 0 2 2 +SURROUNDED 2 0 2 2 +SURRENDER 2 0 2 2 +SURPRISED 6 0 6 6 +SURPRISE 4 0 4 4 +SURPASSED 1 0 1 1 +SURMISED 1 0 1 1 +SURGEON 1 0 1 1 +SURGE 1 0 1 1 +SURFACE 8 0 8 8 +SURELY 5 0 5 5 +SURE 16 0 16 16 +SURCHARGED 1 0 1 1 +SUPREME 2 0 2 2 +SUPPRESSING 1 0 1 1 +SUPPRESSED 2 0 2 2 +SUPPOSITION 1 0 1 1 +SUPPOSING 2 0 2 2 +SUPPOSES 1 0 1 1 +SUPPOSED 3 0 3 3 +SUPPOSE 19 0 19 19 +SUPPORTS 1 0 1 1 +SUPPORTING 2 0 2 2 +SUPPORTED 2 0 2 2 +SUPPORT 2 0 2 2 +SUPPLYING 1 0 1 1 +SUPPLY 1 0 1 1 +SUPPLIES 1 0 1 1 +SUPPER 7 0 7 7 +SUPERIORITY 1 0 1 1 +SUPERIOR 8 0 8 8 +SUPERINTENDENCE 1 0 1 1 +SUPERFLUOUS 1 0 1 1 +SUPERFLUITIES 1 0 1 1 +SUNSHINE 3 0 3 3 +SUNSETS 1 0 1 1 +SUNSET 1 0 1 1 +SUNRISE 1 0 1 1 +SUNNY 1 0 1 1 +SUNLIGHT 2 0 2 2 +SUNK 1 0 1 1 +SUNG 2 0 2 2 +SUNDAY 2 0 2 2 +SUNBEAMS 1 0 1 1 +SUN 15 0 15 15 +SUMNER 1 0 1 1 +SUMMONS 2 0 2 2 +SUMMONED 3 0 3 3 +SUMMON 1 0 1 1 +SUMMIT 1 0 1 1 +SUMMER 6 0 6 6 +SUMMARY 1 0 1 1 +SUM 1 0 1 1 +SULLIED 1 0 1 1 +SULLEN 1 0 1 1 +SUITS 1 0 1 1 +SUITED 1 0 1 1 +SUITCASE 1 0 1 1 +SUITABLE 2 0 2 2 +SUIT 4 0 4 4 +SUGGESTIONS 1 0 1 1 +SUGGESTION 1 0 1 1 +SUGGESTED 3 0 3 3 +SUGGEST 1 0 1 1 +SUGAR 1 0 1 1 +SUFFOCATING 1 0 1 1 +SUFFICIENTLY 1 0 1 1 +SUFFICIENT 3 0 3 3 +SUFFICED 1 0 1 1 +SUFFICE 1 0 1 1 +SUFFERINGS 2 0 2 2 +SUFFERING 2 0 2 2 +SUFFERED 3 0 3 3 +SUFFER 5 0 5 5 +SUDDENLY 15 0 15 15 +SUDDEN 7 0 7 7 +SUCH 67 0 67 67 +SUCCESSION 3 0 3 3 +SUCCESSFUL 3 0 3 3 +SUCCESS 9 0 9 9 +SUCCEEDED 3 0 3 3 +SUCCEED 1 0 1 1 +SUBURB 1 0 1 1 +SUBTLETIES 1 0 1 1 +SUBSTITUTION 1 0 1 1 +SUBSTITUTED 1 0 1 1 +SUBSTANTIALLY 1 0 1 1 +SUBSTANTIAL 3 0 3 3 +SUBSTANCE 3 0 3 3 +SUBSISTENCE 1 0 1 1 +SUBSIDED 1 0 1 1 +SUBSCRIBE 1 0 1 1 +SUBORDINATION 1 0 1 1 +SUBMITTED 1 0 1 1 +SUBMIT 2 0 2 2 +SUBMISSIVELY 1 0 1 1 +SUBMARINE 3 0 3 3 +SUBJECTS 3 0 3 3 +SUBJECTIVELY 1 0 1 1 +SUBJECTED 1 0 1 1 +SUBJECT 16 0 16 16 +SUBDUING 1 0 1 1 +SUBDUED 2 0 2 2 +STYLE 5 0 5 5 +STUTELEY 4 0 4 4 +STURDY 1 0 1 1 +STUPID 4 0 4 4 +STUPEFIED 2 0 2 2 +STUNG 1 0 1 1 +STUMPED 1 0 1 1 +STUMP 1 0 1 1 +STUFFED 3 0 3 3 +STUFF 1 0 1 1 +STUDYING 2 0 2 2 +STUDY 12 0 12 12 +STUDIOUS 2 0 2 2 +STUDIES 1 0 1 1 +STUDENTS 3 0 3 3 +STUDENT 2 0 2 2 +STUCCO 1 0 1 1 +STRUGGLES 1 0 1 1 +STRUGGLED 1 0 1 1 +STRUGGLE 6 0 6 6 +STRUCTURE 2 0 2 2 +STRUCK 4 0 4 4 +STROVE 2 0 2 2 +STRONGLY 2 0 2 2 +STRONGHOLD 1 0 1 1 +STRONGEST 2 0 2 2 +STRONGER 1 0 1 1 +STRONG 13 0 13 13 +STROLLERS 1 0 1 1 +STROLLER'S 1 0 1 1 +STROLLER 3 0 3 3 +STROLL 3 0 3 3 +STROKE 1 0 1 1 +STRIVING 1 0 1 1 +STRIVE 3 0 3 3 +STRIPPING 1 0 1 1 +STRIPPED 1 0 1 1 +STRIPLING 1 0 1 1 +STRIKING 1 0 1 1 +STRIKE 3 0 3 3 +STRIFE 1 0 1 1 +STRICTLY 1 0 1 1 +STRICTEST 1 0 1 1 +STRICT 2 0 2 2 +STRETCHING 1 0 1 1 +STRETCHED 1 0 1 1 +STRETCH 1 0 1 1 +STRENUOUS 1 0 1 1 +STRENGTHENING 1 0 1 1 +STRENGTHENED 2 0 2 2 +STRENGTH 7 0 7 7 +STREETS 1 0 1 1 +STREET 14 0 14 14 +STREAMLINE 1 0 1 1 +STREAM 1 0 1 1 +STREAKED 1 0 1 1 +STRAWBERRIES 1 0 1 1 +STRAW 1 0 1 1 +STRANGERS 2 0 2 2 +STRANGER 1 0 1 1 +STRANGELY 2 0 2 2 +STRANGE 12 0 12 12 +STRAITS 1 0 1 1 +STRAINED 1 0 1 1 +STRAIN 1 0 1 1 +STRAIGHTWAY 2 0 2 2 +STRAIGHT 3 0 3 3 +STORY 25 0 25 25 +STORMY 1 0 1 1 +STORMS 1 0 1 1 +STORM 3 0 3 3 +STORES 1 0 1 1 +STORAGE 1 0 1 1 +STOPPING 2 0 2 2 +STOPPED 6 0 6 6 +STOP 8 0 8 8 +STOOPED 1 0 1 1 +STOOP 1 0 1 1 +STOOL 2 0 2 2 +STOOD 22 0 22 22 +STONES 3 0 3 3 +STONE 3 0 3 3 +STOLEN 2 0 2 2 +STOICAL 1 0 1 1 +STOCKINGS 1 0 1 1 +STOCKBROKER 1 0 1 1 +STOCK 2 0 2 2 +STIRS 1 0 1 1 +STIRRED 1 0 1 1 +STIR 1 0 1 1 +STINGY 2 0 2 2 +STING 1 0 1 1 +STIMULANTS 1 0 1 1 +STILLNESS 1 0 1 1 +STILL 55 0 55 55 +STIFLING 1 0 1 1 +STIFFNESS 1 0 1 1 +STIFF 1 0 1 1 +STICKS 1 0 1 1 +STICKING 1 0 1 1 +STICK 1 0 1 1 +STEW 1 0 1 1 +STERNEST 1 0 1 1 +STERN 2 0 2 2 +STEPS 1 0 1 1 +STEPPED 1 0 1 1 +STEPHEN'S 1 0 1 1 +STEPHEN 2 0 2 2 +STEM 1 0 1 1 +STEEP 1 0 1 1 +STEEL 1 0 1 1 +STEAMING 1 0 1 1 +STEAMED 1 0 1 1 +STEAMBOAT 1 0 1 1 +STEAM 1 0 1 1 +STEAL 1 0 1 1 +STEADY 5 0 5 5 +STEADILY 2 0 2 2 +STEAD 1 0 1 1 +STAYS 1 0 1 1 +STAYED 2 0 2 2 +STAY 11 0 11 11 +STATUS 1 0 1 1 +STATUARY 1 0 1 1 +STATIONS 2 0 2 2 +STATION 6 0 6 6 +STATESMAN 1 0 1 1 +STATEMENT 3 0 3 3 +STATELY 1 0 1 1 +STATE 27 0 27 27 +STARVED 1 0 1 1 +STARTLING 1 0 1 1 +STARTLED 2 0 2 2 +STARTING 2 0 2 2 +STARTED 9 0 9 9 +STARS 1 0 1 1 +STARING 1 0 1 1 +STARED 1 0 1 1 +STAR 2 0 2 2 +STANLEY 1 0 1 1 +STANDS 3 0 3 3 +STANDING 8 0 8 8 +STANDARD 4 0 4 4 +STAND 13 0 13 13 +STAMPING 1 0 1 1 +STAMPED 1 0 1 1 +STALKS 1 0 1 1 +STAKES 1 0 1 1 +STAKE 1 0 1 1 +STAIRCASE 1 0 1 1 +STAINED 1 0 1 1 +STAIN 1 0 1 1 +STAID 1 0 1 1 +STAGES 2 0 2 2 +STAFF 1 0 1 1 +STACKED 1 0 1 1 +STABLE 1 0 1 1 +SQUIRE'S 3 0 3 3 +SQUIRE 8 0 8 8 +SQUEEZE 1 0 1 1 +SQUARES 2 0 2 2 +SQUARE 2 0 2 2 +SQUALOR 1 0 1 1 +SQUALID 1 0 1 1 +SPUR 1 0 1 1 +SPRUNG 2 0 2 2 +SPRINKLING 1 0 1 1 +SPRINKLED 1 0 1 1 +SPRINGY 1 0 1 1 +SPRINGS 3 0 3 3 +SPRINGING 1 0 1 1 +SPRING 8 0 8 8 +SPREADS 1 0 1 1 +SPREAD 5 0 5 5 +SPRANG 3 0 3 3 +SPOTLESS 1 0 1 1 +SPOT 4 0 4 4 +SPORTING 1 0 1 1 +SPOON 1 0 1 1 +SPOKEN 11 0 11 11 +SPOKE 15 0 15 15 +SPOILS 2 0 2 2 +SPLENDOR 1 0 1 1 +SPLENDIDLY 2 0 2 2 +SPLASHES 1 0 1 1 +SPLASHED 2 0 2 2 +SPITE 2 0 2 2 +SPIRITUAL 4 0 4 4 +SPIRITS 3 0 3 3 +SPIRIT 11 0 11 11 +SPINNING 4 0 4 4 +SPIN 1 0 1 1 +SPIKES 1 0 1 1 +SPIDER 1 0 1 1 +SPICY 1 0 1 1 +SPERM 1 0 1 1 +SPENT 5 0 5 5 +SPENDING 1 0 1 1 +SPEND 2 0 2 2 +SPELLED 1 0 1 1 +SPELL 1 0 1 1 +SPEEDS 1 0 1 1 +SPEED 3 0 3 3 +SPEECHLESS 1 0 1 1 +SPEECH 6 0 6 6 +SPED 2 0 2 2 +SPECULATE 1 0 1 1 +SPECTATORS 1 0 1 1 +SPECKS 1 0 1 1 +SPECIOUS 1 0 1 1 +SPECIFICATIONS 1 0 1 1 +SPECIFIC 1 0 1 1 +SPECIES 3 0 3 3 +SPECIALTY 1 0 1 1 +SPECIALLY 2 0 2 2 +SPEAR 1 0 1 1 +SPEAKS 1 0 1 1 +SPEAKING 10 0 10 10 +SPEAK 15 0 15 15 +SPASM 1 0 1 1 +SPARKS 1 0 1 1 +SPARKLING 3 0 3 3 +SPARKLES 2 0 2 2 +SPARKLED 1 0 1 1 +SPARK 1 0 1 1 +SPARE 3 0 3 3 +SPACE 5 0 5 5 +SOUTHEY'S 1 0 1 1 +SOUTHERNERS 2 0 2 2 +SOUTHBRIDGE 1 0 1 1 +SOURCE 1 0 1 1 +SOUP 1 0 1 1 +SOUNDING 3 0 3 3 +SOUNDED 2 0 2 2 +SOUND 7 0 7 7 +SOULS 4 0 4 4 +SOUL'S 1 0 1 1 +SOUL 8 0 8 8 +SOUGHT 6 0 6 6 +SORTS 2 0 2 2 +SORT 8 0 8 8 +SORRY 5 0 5 5 +SORROWS 1 0 1 1 +SORROWFULLY 1 0 1 1 +SORROWFUL 1 0 1 1 +SORROW 5 0 5 5 +SORE 1 0 1 1 +SORCERESS 1 0 1 1 +SOPHISTRY 1 0 1 1 +SOOTHINGLY 1 0 1 1 +SOOTHED 1 0 1 1 +SOOTHE 1 0 1 1 +SOOTH 1 0 1 1 +SOON 28 0 28 28 +SONS 3 0 3 3 +SONOROUS 1 0 1 1 +SON 15 0 15 15 +SOMEWHERE 6 0 6 6 +SOMEWHAT 5 0 5 5 +SOMETIMES 18 0 18 18 +SOMETHING 37 0 37 37 +SOMEHOW 6 0 6 6 +SOMEBODY 3 0 3 3 +SOMBRE 1 0 1 1 +SOLVED 2 0 2 2 +SOLUTION 1 0 1 1 +SOLILOQUY 5 0 5 5 +SOLID 2 0 2 2 +SOLEMNITY 1 0 1 1 +SOLEMN 1 0 1 1 +SOLELY 1 0 1 1 +SOLE 3 0 3 3 +SOLDIERS 6 0 6 6 +SOLD 4 0 4 4 +SOIL 2 0 2 2 +SOFTNESS 2 0 2 2 +SOFTLY 4 0 4 4 +SOFTENED 1 0 1 1 +SOFT 7 0 7 7 +SOFAS 1 0 1 1 +SODALITY 1 0 1 1 +SOCRATES 2 0 2 2 +SOCKS 1 0 1 1 +SOCIETY 7 0 7 7 +SOCIETIES 1 0 1 1 +SOCIAL 8 0 8 8 +SOCIABLE 1 0 1 1 +SOBS 1 0 1 1 +SOARED 1 0 1 1 +SOAR 1 0 1 1 +SO 197 0 197 197 +SNUFFED 1 0 1 1 +SNUFF 4 0 4 4 +SNUBNOSED 1 0 1 1 +SNOW 1 0 1 1 +SNORED 1 0 1 1 +SNEER 1 0 1 1 +SNATCHED 1 0 1 1 +SNATCH 1 0 1 1 +SMUGGLING 1 0 1 1 +SMOOTHER 1 0 1 1 +SMOOTH 1 0 1 1 +SMOKING 1 0 1 1 +SMOKE 5 0 5 5 +SMITTEN 2 0 2 2 +SMITH 2 0 2 2 +SMILING 3 0 3 3 +SMILES 3 0 3 3 +SMILE 12 0 12 12 +SMELLS 2 0 2 2 +SMELL 3 0 3 3 +SMARTLY 1 0 1 1 +SMART 1 0 1 1 +SMALLEST 2 0 2 2 +SMALLER 1 0 1 1 +SMALL 20 0 20 20 +SLY 2 0 2 2 +SLUNK 1 0 1 1 +SLUMS 1 0 1 1 +SLUMBERS 1 0 1 1 +SLOWLY 14 0 14 14 +SLOW 4 0 4 4 +SLOPING 1 0 1 1 +SLIPS 1 0 1 1 +SLIPPING 1 0 1 1 +SLIPPED 4 0 4 4 +SLINGS 1 0 1 1 +SLIMY 1 0 1 1 +SLIMLY 1 0 1 1 +SLIGHTLY 4 0 4 4 +SLIGHTEST 1 0 1 1 +SLIGHTER 1 0 1 1 +SLIGHT 4 0 4 4 +SLEPT 2 0 2 2 +SLENDER 2 0 2 2 +SLEEVE 1 0 1 1 +SLEEPING 2 0 2 2 +SLEEP 5 0 5 5 +SLEEK 2 0 2 2 +SLAVES 1 0 1 1 +SLAVERY 3 0 3 3 +SLAVE 1 0 1 1 +SLATED 1 0 1 1 +SLAP 1 0 1 1 +SLANG 12 0 12 12 +SLANDERER 1 0 1 1 +SLAM 1 0 1 1 +SLAKED 1 0 1 1 +SKY 5 0 5 5 +SKIRTS 1 0 1 1 +SKIRT 1 0 1 1 +SKIRMISHES 1 0 1 1 +SKIP 1 0 1 1 +SKINNER 1 0 1 1 +SKINNED 1 0 1 1 +SKIN 3 0 3 3 +SKIMS 1 0 1 1 +SKILL 2 0 2 2 +SKETCHES 1 0 1 1 +SKETCH 1 0 1 1 +SKELETON 1 0 1 1 +SIZZLE 1 0 1 1 +SIZED 1 0 1 1 +SIZE 5 0 5 5 +SIXTY 3 0 3 3 +SIXTH 1 0 1 1 +SIXTEENTH 4 0 4 4 +SIXTEEN 1 0 1 1 +SIX 14 0 14 14 +SITUATION 2 0 2 2 +SITTING 4 0 4 4 +SITE 1 0 1 1 +SIT 11 0 11 11 +SISTERS 5 0 5 5 +SISTER'S 1 0 1 1 +SISTER 8 0 8 8 +SIRE 4 0 4 4 +SIR 36 0 36 36 +SINS 9 0 9 9 +SINNER 2 0 2 2 +SINKS 1 0 1 1 +SINK 1 0 1 1 +SINGS 1 0 1 1 +SINGLED 1 0 1 1 +SINGLE 5 0 5 5 +SINGING 2 0 2 2 +SINGER'S 1 0 1 1 +SINGER 2 0 2 2 +SING 2 0 2 2 +SINFUL 2 0 2 2 +SIN 13 0 13 13 +SIMPLY 10 0 10 10 +SIMPLIFIED 1 0 1 1 +SIMPLICITY 2 0 2 2 +SIMPLE 9 0 9 9 +SIMON 1 0 1 1 +SIMILITUDE 1 0 1 1 +SIMILARLY 1 0 1 1 +SIMILAR 3 0 3 3 +SILVERING 1 0 1 1 +SILVER 8 0 8 8 +SILLINESS 2 0 2 2 +SILKEN 2 0 2 2 +SILK 6 0 6 6 +SILHOUETTE 1 0 1 1 +SILENT 11 0 11 11 +SILENCES 1 0 1 1 +SILENCE 7 0 7 7 +SILAS 1 0 1 1 +SIGNS 4 0 4 4 +SIGNING 1 0 1 1 +SIGNIFICANTLY 1 0 1 1 +SIGNIFICANCE 3 0 3 3 +SIGNED 1 0 1 1 +SIGNATURE 1 0 1 1 +SIGN 5 0 5 5 +SIGHED 4 0 4 4 +SIGH 3 0 3 3 +SIDEWAYS 1 0 1 1 +SIDES 6 0 6 6 +SIDE 23 0 23 23 +SICKNESS 2 0 2 2 +SICK 2 0 2 2 +SHY 1 0 1 1 +SHUTTING 2 0 2 2 +SHUTTERS 1 0 1 1 +SHUT 3 0 3 3 +SHUNNING 1 0 1 1 +SHUDDER 2 0 2 2 +SHRUGGED 1 0 1 1 +SHRUBBERY 1 0 1 1 +SHRIVELLED 1 0 1 1 +SHRINE 1 0 1 1 +SHRILL 1 0 1 1 +SHRIEKED 1 0 1 1 +SHREWISH 1 0 1 1 +SHREWDLY 1 0 1 1 +SHREWD 1 0 1 1 +SHOWN 1 0 1 1 +SHOWERED 1 0 1 1 +SHOWER 1 0 1 1 +SHOW 10 0 10 10 +SHOUTINGS 1 0 1 1 +SHOUTING 1 0 1 1 +SHOUTED 3 0 3 3 +SHOUT 3 0 3 3 +SHOULDN'T 1 0 1 1 +SHOULDERS 5 0 5 5 +SHOULDER 5 0 5 5 +SHOT 2 0 2 2 +SHORTLY 1 0 1 1 +SHORT 11 0 11 11 +SHORES 1 0 1 1 +SHORE 4 0 4 4 +SHOPS 1 0 1 1 +SHOP 2 0 2 2 +SHOOTING 1 0 1 1 +SHOOT 1 0 1 1 +SHOOK 10 0 10 10 +SHONE 7 0 7 7 +SHOES 3 0 3 3 +SHODDY 1 0 1 1 +SHOCK 4 0 4 4 +SHOAL 1 0 1 1 +SHIVERING 1 0 1 1 +SHIVER 2 0 2 2 +SHIRTS 1 0 1 1 +SHIRK 1 0 1 1 +SHIPS 2 0 2 2 +SHIPPING 1 0 1 1 +SHINING 4 0 4 4 +SHINES 2 0 2 2 +SHINE 2 0 2 2 +SHIFTED 1 0 1 1 +SHIELD 1 0 1 1 +SHERWOOD 1 0 1 1 +SHERIFF'S 3 0 3 3 +SHERIFF 4 0 4 4 +SHEPHERD 2 0 2 2 +SHELVES 1 0 1 1 +SHELTER 1 0 1 1 +SHELLEY'S 1 0 1 1 +SHELL 1 0 1 1 +SHEETING 1 0 1 1 +SHEET 4 0 4 4 +SHEEP 1 0 1 1 +SHEDDING 2 0 2 2 +SHED 1 0 1 1 +SHEAF 1 0 1 1 +SHAWL 1 0 1 1 +SHAVINGS 1 0 1 1 +SHAVEN 1 0 1 1 +SHARPLY 2 0 2 2 +SHARPENED 1 0 1 1 +SHARP 8 0 8 8 +SHARING 1 0 1 1 +SHARED 1 0 1 1 +SHARE 1 0 1 1 +SHAPEN 1 0 1 1 +SHAPELY 1 0 1 1 +SHAPED 1 0 1 1 +SHAPE 1 0 1 1 +SHAN'T 1 0 1 1 +SHAMES 1 0 1 1 +SHAME 3 0 3 3 +SHAM 1 0 1 1 +SHALLOWS 1 0 1 1 +SHALLOW 2 0 2 2 +SHAKING 2 0 2 2 +SHAKEN 1 0 1 1 +SHAGGY 2 0 2 2 +SHAFT 1 0 1 1 +SHADY 2 0 2 2 +SHADOWS 7 0 7 7 +SHADOW 2 0 2 2 +SHADES 1 0 1 1 +SHADE 4 0 4 4 +SHACKLETON 1 0 1 1 +SEXTANT 1 0 1 1 +SEX 2 0 2 2 +SEWING 1 0 1 1 +SEWED 1 0 1 1 +SEVERITY 4 0 4 4 +SEVERITIES 1 0 1 1 +SEVERED 2 0 2 2 +SEVERE 1 0 1 1 +SEVERAL 9 0 9 9 +SEVENTY 2 0 2 2 +SEVENTH 1 0 1 1 +SEVENTEEN 2 0 2 2 +SEVEN 6 0 6 6 +SETTLERS 1 0 1 1 +SETTLER 1 0 1 1 +SETTLEMENTS 1 0 1 1 +SETTLEMENT 3 0 3 3 +SETTLED 1 0 1 1 +SETTLE 3 0 3 3 +SETH 1 0 1 1 +SESSION 1 0 1 1 +SERVITUDE 1 0 1 1 +SERVILE 1 0 1 1 +SERVICEABILITY 1 0 1 1 +SERVICE 12 0 12 12 +SERVED 4 0 4 4 +SERVE 12 0 12 12 +SERVADAC'S 1 0 1 1 +SERVADAC 7 0 7 7 +SERIOUSLY 4 0 4 4 +SERIOUS 3 0 3 3 +SERIES 3 0 3 3 +SERENE 1 0 1 1 +SERAPHIC 1 0 1 1 +SEQUEL 1 0 1 1 +SEPARATION 1 0 1 1 +SEPARATED 3 0 3 3 +SENTIMENTS 1 0 1 1 +SENTIMENTAL 1 0 1 1 +SENTIMENT 1 0 1 1 +SENTENTIOUSLY 1 0 1 1 +SENSITIVE 1 0 1 1 +SENSIBLE 1 0 1 1 +SENSES 2 0 2 2 +SENSELESS 1 0 1 1 +SENSATIONS 1 0 1 1 +SENSATIONAL 1 0 1 1 +SENSATION 2 0 2 2 +SENORA 1 0 1 1 +SENOR 1 0 1 1 +SENIOR 1 0 1 1 +SEND 3 0 3 3 +SENATOR 1 0 1 1 +SENATE 2 0 2 2 +SELLING 2 0 2 2 +SELF 5 0 5 5 +SELECTION 2 0 2 2 +SELECTED 1 0 1 1 +SELDOM 3 0 3 3 +SEIZING 2 0 2 2 +SEIZED 3 0 3 3 +SEIZE 1 0 1 1 +SEGMENT 1 0 1 1 +SEES 1 0 1 1 +SEEMINGLY 3 0 3 3 +SEEKERS 1 0 1 1 +SEEK 4 0 4 4 +SEEING 12 0 12 12 +SEEDS 1 0 1 1 +SEED 2 0 2 2 +SECURITY 2 0 2 2 +SECURING 1 0 1 1 +SECURED 3 0 3 3 +SECURE 4 0 4 4 +SECTS 1 0 1 1 +SECRETLY 3 0 3 3 +SECRET 3 0 3 3 +SECRECY 1 0 1 1 +SECONDS 2 0 2 2 +SECONDLY 1 0 1 1 +SECONDED 1 0 1 1 +SECONDARY 1 0 1 1 +SECOND 10 0 10 10 +SECLUSION 1 0 1 1 +SECESSIONISTS 1 0 1 1 +SEATS 3 0 3 3 +SEATING 2 0 2 2 +SEATED 3 0 3 3 +SEAT 4 0 4 4 +SEASONS 2 0 2 2 +SEASONABLE 1 0 1 1 +SEASON 3 0 3 3 +SEAS 2 0 2 2 +SEARCHING 1 0 1 1 +SEARCHED 2 0 2 2 +SEARCH 4 0 4 4 +SEA 18 0 18 18 +SCYTHE 2 0 2 2 +SCURRIED 1 0 1 1 +SCULPTURE 1 0 1 1 +SCRUTINY 1 0 1 1 +SCRUTINIZE 1 0 1 1 +SCRUPLES 1 0 1 1 +SCRUB 1 0 1 1 +SCRIPTURES 1 0 1 1 +SCRIBE 1 0 1 1 +SCRIBBLING 1 0 1 1 +SCRIBBLER 1 0 1 1 +SCREEN 1 0 1 1 +SCREAMED 3 0 3 3 +SCRAMBLED 1 0 1 1 +SCOWLED 1 0 1 1 +SCOTTISH 2 0 2 2 +SCOTS 1 0 1 1 +SCOTLAND 1 0 1 1 +SCORPION 1 0 1 1 +SCORNFUL 2 0 2 2 +SCORE 2 0 2 2 +SCORCHED 1 0 1 1 +SCOPE 1 0 1 1 +SCOLD 1 0 1 1 +SCIENTISTS 2 0 2 2 +SCIENTIST 1 0 1 1 +SCIENTIFIC 1 0 1 1 +SCIENCE 2 0 2 2 +SCHOONER 2 0 2 2 +SCHOOLS 1 0 1 1 +SCHOOLROOM 1 0 1 1 +SCHOOLED 1 0 1 1 +SCHOLARSHIP 1 0 1 1 +SCHOLAR 1 0 1 1 +SCHISM 1 0 1 1 +SCHEME 5 0 5 5 +SCENT 1 0 1 1 +SCENES 1 0 1 1 +SCATTERS 1 0 1 1 +SCATTERED 3 0 3 3 +SCATTER 1 0 1 1 +SCATHE 1 0 1 1 +SCAROONS 1 0 1 1 +SCARLET 3 0 3 3 +SCARE 1 0 1 1 +SCARCELY 9 0 9 9 +SCARCE 1 0 1 1 +SCALP 1 0 1 1 +SCALE 2 0 2 2 +SAYS 12 0 12 12 +SAYING 15 0 15 15 +SAY 51 0 51 51 +SAWDUST 1 0 1 1 +SAW 23 0 23 23 +SAVIOUR 1 0 1 1 +SAVINGS 1 0 1 1 +SAVING 1 0 1 1 +SAVES 1 0 1 1 +SAVE 9 0 9 9 +SAVAGE 5 0 5 5 +SAUNTERED 1 0 1 1 +SAUCE 1 0 1 1 +SATURDAY 5 0 5 5 +SATISFY 1 0 1 1 +SATISFIED 8 0 8 8 +SATISFACTION 5 0 5 5 +SATANIC 1 0 1 1 +SARCASTIC 1 0 1 1 +SARAH'S 1 0 1 1 +SANK 1 0 1 1 +SANGUINARY 1 0 1 1 +SANG 5 0 5 5 +SANDY 2 0 2 2 +SANDWICHES 1 0 1 1 +SANDFORD 1 0 1 1 +SANCTIFYING 1 0 1 1 +SANCTIFIED 1 0 1 1 +SAMPLE 1 0 1 1 +SAME 35 0 35 35 +SALVATION 1 0 1 1 +SALUTE 1 0 1 1 +SALUTATION 1 0 1 1 +SALON 1 0 1 1 +SALINE 1 0 1 1 +SALIENT 2 0 2 2 +SAKE 4 0 4 4 +SAINTS 5 0 5 5 +SAINT 14 0 14 14 +SAILS 2 0 2 2 +SAILORS 1 0 1 1 +SAILORMAN 1 0 1 1 +SAILOR 1 0 1 1 +SAIL 6 0 6 6 +SAFETY 2 0 2 2 +SAFEST 1 0 1 1 +SAFE 5 0 5 5 +SADLY 2 0 2 2 +SADDLER 1 0 1 1 +SADDLE 1 0 1 1 +SAD 3 0 3 3 +SACRIFICE 2 0 2 2 +SACRED 2 0 2 2 +SACRAMENT 1 0 1 1 +S 1 0 1 1 +RUSTY 1 0 1 1 +RUSTLING 2 0 2 2 +RUSTLED 1 0 1 1 +RUST 1 0 1 1 +RUSHING 1 0 1 1 +RUSHED 7 0 7 7 +RUSH 4 0 4 4 +RUNS 2 0 2 2 +RUNNING 8 0 8 8 +RUNG 1 0 1 1 +RUN 9 0 9 9 +RUMMAGED 1 0 1 1 +RUMINATED 1 0 1 1 +RULER 1 0 1 1 +RULED 1 0 1 1 +RULE 2 0 2 2 +RUINS 1 0 1 1 +RUINED 1 0 1 1 +RUIN 2 0 2 2 +RUFFLED 1 0 1 1 +RUFFIANS 1 0 1 1 +RUFFIAN 2 0 2 2 +RUE 1 0 1 1 +RUDELY 1 0 1 1 +RUDE 2 0 2 2 +RUBY 1 0 1 1 +RUBBING 1 0 1 1 +RUBBED 1 0 1 1 +ROYALISTS 2 0 2 2 +ROYAL 9 0 9 9 +ROWS 2 0 2 2 +ROW 5 0 5 5 +ROVING 1 0 1 1 +ROUTINE 1 0 1 1 +ROUTE 2 0 2 2 +ROUT 1 0 1 1 +ROUSES 1 0 1 1 +ROUSED 1 0 1 1 +ROUSE 1 0 1 1 +ROUNDED 1 0 1 1 +ROUGHLY 7 0 7 7 +ROUGHEST 1 0 1 1 +ROUGH 3 0 3 3 +ROSES 2 0 2 2 +ROSE 14 0 14 14 +ROSALIE 4 0 4 4 +ROOTS 1 0 1 1 +ROOMS 3 0 3 3 +ROOM 41 0 41 41 +ROOFS 1 0 1 1 +ROMANCE 1 0 1 1 +ROMAN 1 0 1 1 +ROLLERS 2 0 2 2 +ROLLED 3 0 3 3 +ROLL 1 0 1 1 +ROGERS'S 1 0 1 1 +ROGERS 2 0 2 2 +ROD 1 0 1 1 +ROCKY 1 0 1 1 +ROCKS 2 0 2 2 +ROCKING 2 0 2 2 +ROCKED 1 0 1 1 +ROCK 1 0 1 1 +ROBUST 1 0 1 1 +ROBINSON 1 0 1 1 +ROBIN'S 1 0 1 1 +ROBIN 19 0 19 19 +ROBERT 2 0 2 2 +ROBBING 1 0 1 1 +ROBBER 1 0 1 1 +ROARINGS 1 0 1 1 +ROARED 1 0 1 1 +ROADS 1 0 1 1 +ROAD 4 0 4 4 +RIVULET 2 0 2 2 +RIVER 6 0 6 6 +RIVAL 2 0 2 2 +RISING 3 0 3 3 +RISEN 1 0 1 1 +RISE 1 0 1 1 +RIPPLING 1 0 1 1 +RIOTING 2 0 2 2 +RIOT 1 0 1 1 +RINGS 1 0 1 1 +RING 3 0 3 3 +RIGOROUSLY 1 0 1 1 +RIGOROUS 1 0 1 1 +RIGIDLY 1 0 1 1 +RIGIDITY 2 0 2 2 +RIGID 1 0 1 1 +RIGHTS 1 0 1 1 +RIGHTLY 1 0 1 1 +RIGHTEOUSNESS 1 0 1 1 +RIGHTEOUS 1 0 1 1 +RIGHT 25 0 25 25 +RIGGING 1 0 1 1 +RIFLES 1 0 1 1 +RIDGE 1 0 1 1 +RIDES 1 0 1 1 +RIDE 1 0 1 1 +RIDDLE 1 0 1 1 +RICHLY 1 0 1 1 +RICHEST 1 0 1 1 +RICHER 1 0 1 1 +RICH 11 0 11 11 +RHYTHM 1 0 1 1 +RHONE 1 0 1 1 +REYNOLDS 2 0 2 2 +REWARDED 2 0 2 2 +REWARD 3 0 3 3 +REVOLVING 1 0 1 1 +REVIVES 1 0 1 1 +REVERT 1 0 1 1 +REVERSED 1 0 1 1 +REVERSAL 1 0 1 1 +REVERIE 2 0 2 2 +REVEREND 1 0 1 1 +REVERENCE 1 0 1 1 +REVENGE 3 0 3 3 +REVELATION 1 0 1 1 +REVEAL 1 0 1 1 +RETURNING 1 0 1 1 +RETURNED 18 0 18 18 +RETURN 9 0 9 9 +RETRIEVE 1 0 1 1 +RETREATED 1 0 1 1 +RETREAT 3 0 3 3 +RETRACE 1 0 1 1 +RETIREMENT 2 0 2 2 +RETIRED 2 0 2 2 +RETIRE 1 0 1 1 +RETAINERS 1 0 1 1 +RETAINER 1 0 1 1 +RETAINED 3 0 3 3 +RETAIN 1 0 1 1 +RESURRECTION 3 0 3 3 +RESUMED 2 0 2 2 +RESULTS 4 0 4 4 +RESULTED 3 0 3 3 +RESULT 5 0 5 5 +RESTRAINED 1 0 1 1 +RESTORING 1 0 1 1 +RESTORED 2 0 2 2 +RESTLESS 4 0 4 4 +RESTED 2 0 2 2 +REST 14 0 14 14 +RESPONSIBLE 2 0 2 2 +RESPONSES 1 0 1 1 +RESPONDED 3 0 3 3 +RESPECTS 1 0 1 1 +RESPECTING 1 0 1 1 +RESPECT 3 0 3 3 +RESOURCES 2 0 2 2 +RESOUNDING 1 0 1 1 +RESORT 2 0 2 2 +RESOLVED 3 0 3 3 +RESOLVE 2 0 2 2 +RESOLUTIONS 1 0 1 1 +RESOLUTION 1 0 1 1 +RESOLUTE 1 0 1 1 +RESISTANCE 1 0 1 1 +RESIST 1 0 1 1 +RESIGNED 2 0 2 2 +RESIGNATION 2 0 2 2 +RESIDENCES 1 0 1 1 +RESIDENCE 3 0 3 3 +RESIDE 1 0 1 1 +RESERVOIR 1 0 1 1 +RESERVED 1 0 1 1 +RESERVE 2 0 2 2 +RESENTFUL 1 0 1 1 +RESENTED 1 0 1 1 +RESEMBLING 2 0 2 2 +RESEMBLED 1 0 1 1 +RESEMBLE 1 0 1 1 +RESEMBLANCE 2 0 2 2 +RESCUE 2 0 2 2 +REQUISITION 1 0 1 1 +REQUIRING 1 0 1 1 +REQUIREMENTS 1 0 1 1 +REQUIRED 3 0 3 3 +REQUIRE 4 0 4 4 +REQUEST 1 0 1 1 +REPUTE 2 0 2 2 +REPUTATION 1 0 1 1 +REPUGNANT 1 0 1 1 +REPUBLISH 1 0 1 1 +REPUBLICAN 1 0 1 1 +REPUBLIC 5 0 5 5 +REPTILES 2 0 2 2 +REPROOF 1 0 1 1 +REPRODUCE 1 0 1 1 +REPROACHING 1 0 1 1 +REPROACHFULLY 1 0 1 1 +REPROACH 2 0 2 2 +REPRESS 2 0 2 2 +REPRESENTS 1 0 1 1 +REPRESENTING 1 0 1 1 +REPRESENTATIVE 1 0 1 1 +REPRESENTATION 1 0 1 1 +REPRESENT 2 0 2 2 +REPOSE 2 0 2 2 +REPORT 2 0 2 2 +REPLY 7 0 7 7 +REPLIED 20 0 20 20 +REPLACES 1 0 1 1 +REPLACE 1 0 1 1 +REPETITION 1 0 1 1 +REPENTING 1 0 1 1 +REPENTANCE 1 0 1 1 +REPENT 1 0 1 1 +REPELLENT 1 0 1 1 +REPELLED 1 0 1 1 +REPEATED 1 0 1 1 +REPAST 1 0 1 1 +REPARTEES 1 0 1 1 +REPAIRS 1 0 1 1 +REPAIRED 1 0 1 1 +RENTED 1 0 1 1 +RENTAL 1 0 1 1 +RENT 1 0 1 1 +RENEWED 2 0 2 2 +RENEWABLE 1 0 1 1 +RENDING 1 0 1 1 +RENDERING 2 0 2 2 +RENDERED 2 0 2 2 +RENDER 3 0 3 3 +REMOTENESS 1 0 1 1 +REMOTE 2 0 2 2 +REMONSTRANCE 1 0 1 1 +REMNANT 2 0 2 2 +REMISSION 2 0 2 2 +REMIND 1 0 1 1 +REMEMBRANCE 2 0 2 2 +REMEMBERS 1 0 1 1 +REMEMBERING 3 0 3 3 +REMEDY 1 0 1 1 +REMARKS 1 0 1 1 +REMARKING 1 0 1 1 +REMARKABLE 2 0 2 2 +REMAINS 3 0 3 3 +REMAIN 6 0 6 6 +RELY 1 0 1 1 +RELUCTANTLY 1 0 1 1 +RELOCATED 1 0 1 1 +RELINQUISHED 1 0 1 1 +RELIGIOUS 1 0 1 1 +RELIGION 4 0 4 4 +RELIEVE 2 0 2 2 +RELIES 1 0 1 1 +RELIEF 1 0 1 1 +RELIANCE 1 0 1 1 +RELIABLE 1 0 1 1 +RELATIVES 1 0 1 1 +RELATIVE 1 0 1 1 +RELATIONSHIP 1 0 1 1 +RELATIONS 2 0 2 2 +RELATION 4 0 4 4 +RELATED 3 0 3 3 +RELATE 2 0 2 2 +RELAPSES 1 0 1 1 +REJOICING 2 0 2 2 +REJOICED 1 0 1 1 +REJOICE 5 0 5 5 +REINS 1 0 1 1 +REIGNS 1 0 1 1 +REIGNED 2 0 2 2 +REIGN 1 0 1 1 +REGULATOR 1 0 1 1 +REGULATIONS 1 0 1 1 +REGULATION 1 0 1 1 +REGULATED 1 0 1 1 +REGULARLY 1 0 1 1 +REGULARITY 1 0 1 1 +REGULAR 1 0 1 1 +REGRET 1 0 1 1 +REGISTRATION 1 0 1 1 +REGISTERS 1 0 1 1 +REGISTERED 1 0 1 1 +REGIONS 1 0 1 1 +REGION 1 0 1 1 +REGIMENTS 1 0 1 1 +REGIMENT 1 0 1 1 +REGGIE 2 0 2 2 +REGARDS 2 0 2 2 +REGARDLESS 1 0 1 1 +REGARDED 2 0 2 2 +REGARD 5 0 5 5 +REGAINING 1 0 1 1 +REGAINED 1 0 1 1 +REFUSING 2 0 2 2 +REFUSAL 1 0 1 1 +REFUGEES 1 0 1 1 +REFUGE 1 0 1 1 +REFRESHING 1 0 1 1 +REFRESH 2 0 2 2 +REFRAINED 1 0 1 1 +REFORMS 1 0 1 1 +REFORM 1 0 1 1 +REFLECTIONS 1 0 1 1 +REFLECTION 4 0 4 4 +REFLECTED 3 0 3 3 +REFLECT 1 0 1 1 +REFINEMENTS 1 0 1 1 +REFINEMENT 1 0 1 1 +REFINED 1 0 1 1 +REFERRING 2 0 2 2 +REFER 2 0 2 2 +REED 1 0 1 1 +REDUCED 1 0 1 1 +REDOUBLES 1 0 1 1 +REDOUBLED 1 0 1 1 +REDMAN'S 1 0 1 1 +REDEEMER 1 0 1 1 +RECUR 1 0 1 1 +RECTOR 2 0 2 2 +RECREATION 2 0 2 2 +RECOVERY 1 0 1 1 +RECOVERING 1 0 1 1 +RECOVERED 1 0 1 1 +RECOVER 1 0 1 1 +RECONCILIATION 1 0 1 1 +RECOLLECTIONS 1 0 1 1 +RECOILED 2 0 2 2 +RECOGNIZE 4 0 4 4 +RECOGNITION 9 0 9 9 +RECLINING 1 0 1 1 +RECKONING 2 0 2 2 +RECKONED 1 0 1 1 +RECKON 1 0 1 1 +RECKLESS 1 0 1 1 +RECITED 5 0 5 5 +RECITE 2 0 2 2 +RECESSES 1 0 1 1 +RECEPTION 4 0 4 4 +RECENTLY 1 0 1 1 +RECENT 2 0 2 2 +RECEIVING 2 0 2 2 +RECEIVES 1 0 1 1 +RECEIVER 1 0 1 1 +RECEIVED 9 0 9 9 +RECEIVE 3 0 3 3 +RECAPTURED 1 0 1 1 +RECALLING 1 0 1 1 +RECALLED 4 0 4 4 +RECALL 1 0 1 1 +REBUKES 1 0 1 1 +REBUKE 1 0 1 1 +REBELLION 2 0 2 2 +REBATE 1 0 1 1 +REASSURED 1 0 1 1 +REASONS 1 0 1 1 +REASONING 1 0 1 1 +REASON 19 0 19 19 +REAR 1 0 1 1 +REALLY 10 0 10 10 +REALIZED 2 0 2 2 +REALIZE 4 0 4 4 +REALITY 8 0 8 8 +REAL 16 0 16 16 +READY 10 0 10 10 +READINESS 1 0 1 1 +READILY 2 0 2 2 +REACHING 2 0 2 2 +REACHED 12 0 12 12 +REACH 3 0 3 3 +RAVISHING 1 0 1 1 +RAVING 1 0 1 1 +RAVINES 1 0 1 1 +RATS 1 0 1 1 +RATIFY 1 0 1 1 +RATIFICATION 1 0 1 1 +RATHER 23 0 23 23 +RATED 2 0 2 2 +RATE 7 0 7 7 +RASH 1 0 1 1 +RASCAL 1 0 1 1 +RARELY 1 0 1 1 +RARE 5 0 5 5 +RAPTUROUS 1 0 1 1 +RAPTURES 1 0 1 1 +RAPIDS 3 0 3 3 +RAPIDLY 3 0 3 3 +RAPIDITY 2 0 2 2 +RAPID 3 0 3 3 +RANSOM 1 0 1 1 +RANKING 1 0 1 1 +RANKED 1 0 1 1 +RANK 5 0 5 5 +RANGERS 1 0 1 1 +RANGED 1 0 1 1 +RANGE 4 0 4 4 +RANG 4 0 4 4 +RAN 12 0 12 12 +RAMPART 1 0 1 1 +RAISED 6 0 6 6 +RAISE 1 0 1 1 +RAINS 1 0 1 1 +RAINDROPS 1 0 1 1 +RAINBOW 1 0 1 1 +RAIN 3 0 3 3 +RAILROADS 1 0 1 1 +RAGS 2 0 2 2 +RAGGED 1 0 1 1 +RAGE 3 0 3 3 +RAFT 7 0 7 7 +RADIE 2 0 2 2 +RADICALS 1 0 1 1 +RADICALISM 1 0 1 1 +RADIANCE 1 0 1 1 +RACKED 1 0 1 1 +RACK 1 0 1 1 +RACHEL'S 1 0 1 1 +RACHEL 16 0 16 16 +RACES 6 0 6 6 +RACE 1 0 1 1 +RABBIT 2 0 2 2 +QUOTE 2 0 2 2 +QUIVERING 2 0 2 2 +QUIVERED 1 0 1 1 +QUITTING 1 0 1 1 +QUITTED 1 0 1 1 +QUITE 29 0 29 29 +QUINCY 1 0 1 1 +QUILT 3 0 3 3 +QUIETLY 5 0 5 5 +QUIET 4 0 4 4 +QUICKLY 5 0 5 5 +QUESTIONS 4 0 4 4 +QUESTIONING 1 0 1 1 +QUESTIONED 2 0 2 2 +QUESTION 12 0 12 12 +QUEST 1 0 1 1 +QUERIED 1 0 1 1 +QUEER 3 0 3 3 +QUEENSTOWN 1 0 1 1 +QUEENS 1 0 1 1 +QUEEN'S 1 0 1 1 +QUEEN 8 0 8 8 +QUASH 1 0 1 1 +QUARTERS 3 0 3 3 +QUARTER 7 0 7 7 +QUARRY 2 0 2 2 +QUARREL 1 0 1 1 +QUANTITY 4 0 4 4 +QUANTITIES 1 0 1 1 +QUALITY 1 0 1 1 +QUALITIES 3 0 3 3 +QUALIFICATIONS 2 0 2 2 +QUALIFICATION 1 0 1 1 +QUAKE 1 0 1 1 +QUAINT 1 0 1 1 +QUADRILLE 2 0 2 2 +QUADRANGLE 1 0 1 1 +PYTHON 1 0 1 1 +PYTHAGOREANS 1 0 1 1 +PYRAMIDS 2 0 2 2 +PUZZLED 1 0 1 1 +PUT 32 0 32 32 +PUSHED 2 0 2 2 +PUSH 2 0 2 2 +PURSUITS 3 0 3 3 +PURSUIT 2 0 2 2 +PURSUER 1 0 1 1 +PURSUED 1 0 1 1 +PURSUE 1 0 1 1 +PURRING 1 0 1 1 +PURPOSES 2 0 2 2 +PURPLE 1 0 1 1 +PURITY 2 0 2 2 +PURELY 1 0 1 1 +PURCHASED 1 0 1 1 +PUPIL 1 0 1 1 +PUNISHMENT 4 0 4 4 +PUNISHED 4 0 4 4 +PUNCH 1 0 1 1 +PUMP 1 0 1 1 +PULPIT 1 0 1 1 +PULLING 2 0 2 2 +PULLED 2 0 2 2 +PULL 1 0 1 1 +PUFFY 1 0 1 1 +PUBLISH 1 0 1 1 +PUBLIC 13 0 13 13 +PSYCHOLOGY 1 0 1 1 +PSYCHE 2 0 2 2 +PSALM 3 0 3 3 +PRYNNE 4 0 4 4 +PRUDENT 2 0 2 2 +PROWESS 1 0 1 1 +PROW 1 0 1 1 +PROVISIONALLY 1 0 1 1 +PROVISION 2 0 2 2 +PROVINCES 2 0 2 2 +PROVINCE 2 0 2 2 +PROVIDING 1 0 1 1 +PROVIDED 3 0 3 3 +PROVES 2 0 2 2 +PROVEN 1 0 1 1 +PROVED 6 0 6 6 +PROVE 4 0 4 4 +PROUDLY 1 0 1 1 +PROUD 5 0 5 5 +PROTESTED 1 0 1 1 +PROTEST 1 0 1 1 +PROTECTOR 1 0 1 1 +PROTECTION 3 0 3 3 +PROTECTING 1 0 1 1 +PROTECTED 1 0 1 1 +PROTECT 4 0 4 4 +PROSTRATION 2 0 2 2 +PROSPECTS 1 0 1 1 +PROSELYTING 1 0 1 1 +PROSELYTES 1 0 1 1 +PROSECUTION 1 0 1 1 +PROSECUTE 1 0 1 1 +PROSE 1 0 1 1 +PROSCRIPTION 1 0 1 1 +PROSCRIBED 1 0 1 1 +PROPRIETY 1 0 1 1 +PROPRIETORS 1 0 1 1 +PROPRIETOR 1 0 1 1 +PROPOSED 2 0 2 2 +PROPOSE 1 0 1 1 +PROPOSALS 1 0 1 1 +PROPORTIONS 3 0 3 3 +PROPHETS 1 0 1 1 +PROPHET 1 0 1 1 +PROPERTY 6 0 6 6 +PROPERTIES 1 0 1 1 +PROPERLY 4 0 4 4 +PROPER 3 0 3 3 +PROPENSITIES 1 0 1 1 +PROOFS 4 0 4 4 +PROOF 3 0 3 3 +PRONUNCIATION 1 0 1 1 +PRONOUNCED 4 0 4 4 +PROMPTLY 1 0 1 1 +PROMPT 1 0 1 1 +PROMOTING 1 0 1 1 +PROMOTED 1 0 1 1 +PROMISES 2 0 2 2 +PROMISED 4 0 4 4 +PROMISE 4 0 4 4 +PROMINENT 2 0 2 2 +PROLIFIC 1 0 1 1 +PROJECTION 1 0 1 1 +PROJECT 2 0 2 2 +PROGRESSING 1 0 1 1 +PROGRESS 6 0 6 6 +PROGRAMME 1 0 1 1 +PROFOUND 2 0 2 2 +PROFITABLE 1 0 1 1 +PROFESSOR 6 0 6 6 +PROFESSIONS 1 0 1 1 +PROFESSION 1 0 1 1 +PROFESSING 1 0 1 1 +PROFESSED 1 0 1 1 +PRODUCTIVE 1 0 1 1 +PRODUCTION 1 0 1 1 +PRODUCT 1 0 1 1 +PRODUCING 2 0 2 2 +PRODUCED 5 0 5 5 +PRODUCE 3 0 3 3 +PRODIGAL 1 0 1 1 +PROCOPE 2 0 2 2 +PROCESSION 1 0 1 1 +PROCESSES 1 0 1 1 +PROCESS 3 0 3 3 +PROCEEDINGS 2 0 2 2 +PROCEEDING 2 0 2 2 +PROCEEDED 2 0 2 2 +PROBLEM 4 0 4 4 +PROBING 1 0 1 1 +PROBABLE 2 0 2 2 +PRO 2 0 2 2 +PRIZE 1 0 1 1 +PRIVILEGE 3 0 3 3 +PRIVATION 1 0 1 1 +PRIVATELY 1 0 1 1 +PRIVATE 11 0 11 11 +PRIVACY 1 0 1 1 +PRISTINE 1 0 1 1 +PRISONER 4 0 4 4 +PRISON 4 0 4 4 +PRIOR 1 0 1 1 +PRINTING 1 0 1 1 +PRINTER 2 0 2 2 +PRINCIPLES 2 0 2 2 +PRINCIPLE 4 0 4 4 +PRINCIPAL 4 0 4 4 +PRINCESSES 2 0 2 2 +PRINCESS 9 0 9 9 +PRINCES 2 0 2 2 +PRINCE 3 0 3 3 +PRIMLY 1 0 1 1 +PRIMITIVE 2 0 2 2 +PRIMATE 1 0 1 1 +PRIMARY 1 0 1 1 +PRIMARILY 1 0 1 1 +PRIESTHOOD 1 0 1 1 +PRIEST 1 0 1 1 +PRIDE 9 0 9 9 +PRICE 2 0 2 2 +PREY 2 0 2 2 +PREVIOUSLY 2 0 2 2 +PREVIOUS 2 0 2 2 +PREVENTS 1 0 1 1 +PREVENTING 1 0 1 1 +PREVAILING 1 0 1 1 +PREVAILED 3 0 3 3 +PREVAIL 1 0 1 1 +PRETTY 10 0 10 10 +PRETTIEST 1 0 1 1 +PRETENSION 1 0 1 1 +PRETENDED 1 0 1 1 +PRESUMED 1 0 1 1 +PRESUMABLY 1 0 1 1 +PRESSURE 4 0 4 4 +PRESSING 1 0 1 1 +PRESSED 2 0 2 2 +PRESS 2 0 2 2 +PRESIDENT 5 0 5 5 +PRESIDED 1 0 1 1 +PRESERVED 1 0 1 1 +PRESERVE 1 0 1 1 +PRESENTS 3 0 3 3 +PRESENTLY 7 0 7 7 +PRESENTING 2 0 2 2 +PRESENTED 1 0 1 1 +PRESENCE 6 0 6 6 +PRESCRIBED 1 0 1 1 +PREROGATIVES 1 0 1 1 +PREPARING 1 0 1 1 +PREPARED 3 0 3 3 +PREPARE 1 0 1 1 +PREOCCUPIED 1 0 1 1 +PREOCCUPATION 1 0 1 1 +PREMISES 1 0 1 1 +PRELIMINARY 1 0 1 1 +PREFERENCE 1 0 1 1 +PREDOMINATE 1 0 1 1 +PREDOMINANCE 1 0 1 1 +PREDICTED 1 0 1 1 +PREDATORY 2 0 2 2 +PRECONCEIVED 1 0 1 1 +PRECISION 1 0 1 1 +PRECISELY 2 0 2 2 +PRECIPITATED 1 0 1 1 +PRECIPITATE 1 0 1 1 +PRECIOUS 3 0 3 3 +PRECAUTION 1 0 1 1 +PREACHER 1 0 1 1 +PREACH 3 0 3 3 +PRAYERS 1 0 1 1 +PRAYED 1 0 1 1 +PRAY 7 0 7 7 +PRATTLED 1 0 1 1 +PRAISES 2 0 2 2 +PRAISED 1 0 1 1 +PRAISE 3 0 3 3 +PRAIRIES 2 0 2 2 +PRAIRIE 2 0 2 2 +PRACTICALLY 4 0 4 4 +PRACTICAL 6 0 6 6 +POYSER 9 0 9 9 +POWERS 3 0 3 3 +POWERLESS 1 0 1 1 +POWERFUL 2 0 2 2 +POWER 22 0 22 22 +POWDERY 1 0 1 1 +POWDER 8 0 8 8 +POVERTY 2 0 2 2 +POURING 1 0 1 1 +POURED 3 0 3 3 +POUR 2 0 2 2 +POUNDED 1 0 1 1 +POUND 4 0 4 4 +POTFULS 1 0 1 1 +POTATOES 1 0 1 1 +POT 2 0 2 2 +POSTS 1 0 1 1 +POSTPONEMENT 1 0 1 1 +POSTERITY 2 0 2 2 +POSTED 1 0 1 1 +POST 3 0 3 3 +POSSIBLY 1 0 1 1 +POSSIBLE 6 0 6 6 +POSSESSING 1 0 1 1 +POSSESSED 3 0 3 3 +POSSESS 3 0 3 3 +POSSE 2 0 2 2 +POSITIVELY 3 0 3 3 +POSITIVE 1 0 1 1 +POSITION 9 0 9 9 +PORTRAIT 1 0 1 1 +PORTIONS 3 0 3 3 +PORTION 2 0 2 2 +PORTAL 1 0 1 1 +PORT 1 0 1 1 +POPULATION 3 0 3 3 +POPULATED 1 0 1 1 +POPULAR 7 0 7 7 +POPPIES 1 0 1 1 +POPPED 1 0 1 1 +POORLY 1 0 1 1 +POOR 15 0 15 15 +POOLS 1 0 1 1 +POOL 1 0 1 1 +PONY 1 0 1 1 +PONDS 1 0 1 1 +PONDERING 1 0 1 1 +POND 3 0 3 3 +POLLY'S 3 0 3 3 +POLLY 4 0 4 4 +POLITICS 1 0 1 1 +POLITICAL 4 0 4 4 +POLITELY 2 0 2 2 +POLITE 1 0 1 1 +POLISHED 3 0 3 3 +POLISH 1 0 1 1 +POLICE 1 0 1 1 +POLE 2 0 2 2 +POLAR 1 0 1 1 +POISON 1 0 1 1 +POISED 1 0 1 1 +POINTS 3 0 3 3 +POINTING 4 0 4 4 +POINTEDLY 1 0 1 1 +POINTED 3 0 3 3 +POINT 14 0 14 14 +POETRY 1 0 1 1 +POETIC 1 0 1 1 +POETESS 1 0 1 1 +POET 2 0 2 2 +POEMS 1 0 1 1 +POEM 3 0 3 3 +POCKETS 3 0 3 3 +POCKET 3 0 3 3 +PLURALITY 1 0 1 1 +PLUNGES 1 0 1 1 +PLUNDER 2 0 2 2 +PLUMES 1 0 1 1 +PLUCKING 1 0 1 1 +PLOT 3 0 3 3 +PLIABLE 1 0 1 1 +PLENTY 1 0 1 1 +PLENTIFUL 1 0 1 1 +PLEASURE 5 0 5 5 +PLEASING 1 0 1 1 +PLEASES 3 0 3 3 +PLEASED 4 0 4 4 +PLEASANTLY 2 0 2 2 +PLEASANT 5 0 5 5 +PLEADED 1 0 1 1 +PLEAD 1 0 1 1 +PLEA 1 0 1 1 +PLAYTHINGS 3 0 3 3 +PLAYS 1 0 1 1 +PLAYING 5 0 5 5 +PLAYER 2 0 2 2 +PLAYED 3 0 3 3 +PLATTERS 1 0 1 1 +PLATONIC 1 0 1 1 +PLATO'S 1 0 1 1 +PLATO 6 0 6 6 +PLATFORM 3 0 3 3 +PLATE 1 0 1 1 +PLASTER 1 0 1 1 +PLANTS 4 0 4 4 +PLANTED 3 0 3 3 +PLANTATIONS 1 0 1 1 +PLANS 2 0 2 2 +PLANNED 2 0 2 2 +PLAN 6 0 6 6 +PLAINER 1 0 1 1 +PLAIN 4 0 4 4 +PLACING 2 0 2 2 +PLACIDITY 1 0 1 1 +PLACID 1 0 1 1 +PLACES 1 0 1 1 +PLACED 8 0 8 8 +PLACE 39 0 39 39 +PITYING 1 0 1 1 +PITY 6 0 6 6 +PITIABLE 1 0 1 1 +PITH 1 0 1 1 +PITCHED 1 0 1 1 +PIT 1 0 1 1 +PISTOL 1 0 1 1 +PIPT 1 0 1 1 +PIPE 2 0 2 2 +PINKIES 3 0 3 3 +PINK 2 0 2 2 +PINIONED 1 0 1 1 +PINING 1 0 1 1 +PINES 1 0 1 1 +PINE 1 0 1 1 +PILLION 1 0 1 1 +PILLARS 1 0 1 1 +PILGRIM'S 1 0 1 1 +PILGRIM 1 0 1 1 +PILED 1 0 1 1 +PIGMENT 1 0 1 1 +PIG 2 0 2 2 +PIETY 1 0 1 1 +PIECES 3 0 3 3 +PIECE 1 0 1 1 +PICTURESQUENESS 1 0 1 1 +PICTURES 3 0 3 3 +PICTURE 6 0 6 6 +PICNIC 1 0 1 1 +PICKED 3 0 3 3 +PICK 3 0 3 3 +PIAZZA 3 0 3 3 +PIANO 4 0 4 4 +PHYSIOLOGY 1 0 1 1 +PHYSIOLOGICAL 1 0 1 1 +PHYSICS 1 0 1 1 +PHYSICAL 2 0 2 2 +PHRONSIE 6 0 6 6 +PHRASE 3 0 3 3 +PHOENICIAN 1 0 1 1 +PHILOSOPHY 2 0 2 2 +PHILOSOPHICAL 1 0 1 1 +PHILOSOPHER 5 0 5 5 +PHILOLOGIST 1 0 1 1 +PHILIP'S 1 0 1 1 +PHILIP 9 0 9 9 +PHILANTHROPIES 1 0 1 1 +PHENOMENON 1 0 1 1 +PHENOMENA 1 0 1 1 +PHEASANT 1 0 1 1 +PHASES 1 0 1 1 +PHASE 1 0 1 1 +PHANTOM 1 0 1 1 +PEWTER 2 0 2 2 +PET 1 0 1 1 +PERVERTERS 1 0 1 1 +PERVERTED 1 0 1 1 +PERVERSE 1 0 1 1 +PERVADED 1 0 1 1 +PERTH 1 0 1 1 +PERSUASIVE 1 0 1 1 +PERSUADED 1 0 1 1 +PERSUADE 1 0 1 1 +PERSPIRATION 1 0 1 1 +PERSONS 13 0 13 13 +PERSONALLY 2 0 2 2 +PERSONAL 7 0 7 7 +PERSONAGE 2 0 2 2 +PERSEVERED 1 0 1 1 +PERSECUTORS 1 0 1 1 +PERSECUTION 2 0 2 2 +PERSECUTED 1 0 1 1 +PERPLEXITY 1 0 1 1 +PERPLEXED 2 0 2 2 +PERPETUATE 1 0 1 1 +PERPETUAL 1 0 1 1 +PERNICIOUS 1 0 1 1 +PERMITTING 1 0 1 1 +PERMITTED 2 0 2 2 +PERMIT 1 0 1 1 +PERMISSION 1 0 1 1 +PERMANENT 2 0 2 2 +PERISHES 1 0 1 1 +PERIODICALS 1 0 1 1 +PERIOD 8 0 8 8 +PERILS 1 0 1 1 +PERIL 2 0 2 2 +PERHAPS 17 0 17 17 +PERFORMING 1 0 1 1 +PERFORMED 1 0 1 1 +PERFORM 3 0 3 3 +PERFECTLY 8 0 8 8 +PERFECTION 4 0 4 4 +PERFECT 7 0 7 7 +PERCY 1 0 1 1 +PERCHES 1 0 1 1 +PERCHANCE 1 0 1 1 +PERCH 2 0 2 2 +PERCEPTION 2 0 2 2 +PERCEIVING 1 0 1 1 +PERCEIVES 1 0 1 1 +PERCEIVED 2 0 2 2 +PERCEIVE 2 0 2 2 +PEPPERS 1 0 1 1 +PEPPERED 1 0 1 1 +PEPPER 1 0 1 1 +PEOPLE 36 0 36 36 +PENSIVE 1 0 1 1 +PENSION 1 0 1 1 +PENINSULA 2 0 2 2 +PENETRATING 1 0 1 1 +PENETRATE 2 0 2 2 +PENDULOUS 1 0 1 1 +PENCILLED 1 0 1 1 +PENCIL 2 0 2 2 +PENANCE 1 0 1 1 +PENALTY 2 0 2 2 +PENAL 1 0 1 1 +PELL 1 0 1 1 +PEGRE 1 0 1 1 +PEERING 3 0 3 3 +PEEPED 2 0 2 2 +PECUNIARY 6 0 6 6 +PECULIARLY 1 0 1 1 +PECULIAR 1 0 1 1 +PEARLY 2 0 2 2 +PEARL'S 1 0 1 1 +PEARL 12 0 12 12 +PEAKED 1 0 1 1 +PEACEFUL 1 0 1 1 +PEACEABLE 1 0 1 1 +PEACE 14 0 14 14 +PAYMENT 1 0 1 1 +PAYING 1 0 1 1 +PAY 3 0 3 3 +PAVEMENT 1 0 1 1 +PAUSED 4 0 4 4 +PAUSE 1 0 1 1 +PAUL 15 0 15 15 +PATTING 1 0 1 1 +PATRONIZING 1 0 1 1 +PATRON 1 0 1 1 +PATRIARCHAL 1 0 1 1 +PATHS 1 0 1 1 +PATHOLOGICAL 1 0 1 1 +PATH 4 0 4 4 +PATCHWORK 4 0 4 4 +PATCHING 2 0 2 2 +PATCHES 2 0 2 2 +PATCHED 1 0 1 1 +PATCH 4 0 4 4 +PASTNESS 1 0 1 1 +PASTEBOARD 1 0 1 1 +PAST 12 0 12 12 +PASSIONS 1 0 1 1 +PASSIONATE 1 0 1 1 +PASSION 3 0 3 3 +PASSING 3 0 3 3 +PASSES 1 0 1 1 +PASSER 1 0 1 1 +PASSED 15 0 15 15 +PASSAGES 1 0 1 1 +PASS 5 0 5 5 +PARTY 9 0 9 9 +PARTS 7 0 7 7 +PARTOOK 2 0 2 2 +PARTNER 1 0 1 1 +PARTLY 7 0 7 7 +PARTITION 1 0 1 1 +PARTISAN 1 0 1 1 +PARTING 2 0 2 2 +PARTIES 4 0 4 4 +PARTICULARS 2 0 2 2 +PARTICULARLY 6 0 6 6 +PARTICULAR 4 0 4 4 +PARTICLES 1 0 1 1 +PARTIALLY 1 0 1 1 +PARTED 2 0 2 2 +PART 22 0 22 22 +PARSONS 2 0 2 2 +PARSONAGE 1 0 1 1 +PARROT 2 0 2 2 +PARRIED 1 0 1 1 +PAROQUET 1 0 1 1 +PARLOR 2 0 2 2 +PARLIAMENTS 1 0 1 1 +PARLIAMENTARY 1 0 1 1 +PARLIAMENT 5 0 5 5 +PARK 1 0 1 1 +PARIS 9 0 9 9 +PARENTS 4 0 4 4 +PARENT 2 0 2 2 +PARDON 1 0 1 1 +PARAPHERNALIA 2 0 2 2 +PARALLELOGRAM 1 0 1 1 +PARAGRAPH 1 0 1 1 +PAPERS 8 0 8 8 +PAPER 8 0 8 8 +PANTS 1 0 1 1 +PANTOMIME 1 0 1 1 +PANTING 1 0 1 1 +PANTED 1 0 1 1 +PANS 1 0 1 1 +PANGS 1 0 1 1 +PANG 1 0 1 1 +PANES 1 0 1 1 +PANEL 1 0 1 1 +PANE 1 0 1 1 +PAN 1 0 1 1 +PALM 3 0 3 3 +PALINGS 1 0 1 1 +PALE 8 0 8 8 +PALATE 1 0 1 1 +PALAIS 1 0 1 1 +PALACE 4 0 4 4 +PAIR 5 0 5 5 +PAINTING 2 0 2 2 +PAINTER 2 0 2 2 +PAINTED 4 0 4 4 +PAINFULLY 1 0 1 1 +PAINFUL 3 0 3 3 +PAIL 1 0 1 1 +PAID 7 0 7 7 +PAGES 1 0 1 1 +PAGE 2 0 2 2 +PACKING 1 0 1 1 +PACKET 1 0 1 1 +PACKED 1 0 1 1 +PACK 1 0 1 1 +PACING 1 0 1 1 +PACIFIED 1 0 1 1 +PACED 3 0 3 3 +OZMA 1 0 1 1 +OZ 4 0 4 4 +OWNER 1 0 1 1 +OWNED 2 0 2 2 +OWN 69 0 69 69 +OWLS 1 0 1 1 +OWING 3 0 3 3 +OWEN 1 0 1 1 +OVERWHELMED 1 0 1 1 +OVERWHELM 1 0 1 1 +OVERTHROW 1 0 1 1 +OVERSTATEMENT 1 0 1 1 +OVERRATED 1 0 1 1 +OVERLOOKER 1 0 1 1 +OVERLOOKED 1 0 1 1 +OVERLEAN 1 0 1 1 +OVERHEAD 1 0 1 1 +OVERHANGING 2 0 2 2 +OVERFLOWING 1 0 1 1 +OVERCOAT 1 0 1 1 +OVERBEARING 1 0 1 1 +OVAL 2 0 2 2 +OUTWARD 1 0 1 1 +OUTSTRIP 1 0 1 1 +OUTSTRETCHED 2 0 2 2 +OUTSKIRTS 1 0 1 1 +OUTSIDE 4 0 4 4 +OUTSET 1 0 1 1 +OUTRIGHT 1 0 1 1 +OUTRAGE 1 0 1 1 +OUTLINED 1 0 1 1 +OUTLINE 2 0 2 2 +OUTLAWS 3 0 3 3 +OUTFIT 1 0 1 1 +OUTER 2 0 2 2 +OUTCRY 1 0 1 1 +OUTCAST 1 0 1 1 +OURSELVES 6 0 6 6 +OURS 2 0 2 2 +OUR 81 0 81 81 +OUNCE 2 0 2 2 +OUGHTER 1 0 1 1 +OUGHT 10 0 10 10 +OTHERWISE 5 0 5 5 +OTHERS 23 0 23 23 +OTHER'S 2 0 2 2 +OSTENSIBLY 2 0 2 2 +OSTENSIBLE 1 0 1 1 +OSH 1 0 1 1 +OSCILLATION 1 0 1 1 +OSAGE 1 0 1 1 +ORNAMENTS 1 0 1 1 +ORNAMENTAL 2 0 2 2 +ORNAMENT 3 0 3 3 +ORLEANS 1 0 1 1 +ORIGINATED 1 0 1 1 +ORIGINAL 1 0 1 1 +ORIGIN 7 0 7 7 +ORGANS 1 0 1 1 +ORGANIZED 5 0 5 5 +ORGANIZATIONS 4 0 4 4 +ORGANIZATION 3 0 3 3 +ORGAN 1 0 1 1 +ORDINARY 3 0 3 3 +ORDERS 3 0 3 3 +ORDERLY 1 0 1 1 +ORDERED 3 0 3 3 +ORDER 22 0 22 22 +ORCHARD 4 0 4 4 +ORBIT 2 0 2 2 +ORANGE 1 0 1 1 +ORACLE 1 0 1 1 +OPPRESSOR 1 0 1 1 +OPPRESSIVE 1 0 1 1 +OPPRESSION 1 0 1 1 +OPPRESSED 1 0 1 1 +OPPOSITION 4 0 4 4 +OPPOSITE 2 0 2 2 +OPPOSED 2 0 2 2 +OPPOSE 4 0 4 4 +OPPORTUNITY 4 0 4 4 +OPPORTUNITIES 2 0 2 2 +OPPORTUNE 1 0 1 1 +OPPONENT 2 0 2 2 +OPINIONS 1 0 1 1 +OPINION'S 1 0 1 1 +OPINION 9 0 9 9 +OPHELIA 1 0 1 1 +OPERATOR 1 0 1 1 +OPERATIONS 1 0 1 1 +OPERATION 2 0 2 2 +OPERATE 1 0 1 1 +OPENING 6 0 6 6 +OPENED 11 0 11 11 +OPEN 23 0 23 23 +OPAQUE 1 0 1 1 +ONWARD 4 0 4 4 +ONES 2 0 2 2 +ONE'S 1 0 1 1 +ONCE 22 0 22 22 +OLIVE'S 3 0 3 3 +OLIVE 4 0 4 4 +OLDEST 1 0 1 1 +OLDER 3 0 3 3 +OLDEN 1 0 1 1 +OLD 40 0 40 40 +OLAF 2 0 2 2 +OJO 7 0 7 7 +OHIO 1 0 1 1 +OFTEN 13 0 13 13 +OFFICIALS 3 0 3 3 +OFFICIAL 1 0 1 1 +OFFICES 1 0 1 1 +OFFICERS 10 0 10 10 +OFFICER 4 0 4 4 +OFFICE 11 0 11 11 +OFFERS 1 0 1 1 +OFFERING 1 0 1 1 +OFFER 1 0 1 1 +OFFENSES 1 0 1 1 +OFFENDS 1 0 1 1 +OFFENDED 1 0 1 1 +OFFALS 1 0 1 1 +OFF 25 0 25 25 +ODIOUS 1 0 1 1 +ODIN 1 0 1 1 +ODDLY 1 0 1 1 +ODD 3 0 3 3 +OCTOBER 1 0 1 1 +OCEANOGRAPHER 1 0 1 1 +OCEAN 5 0 5 5 +OCCURS 3 0 3 3 +OCCURRENCES 2 0 2 2 +OCCURRENCE 3 0 3 3 +OCCURRED 2 0 2 2 +OCCUR 1 0 1 1 +OCCUPY 1 0 1 1 +OCCUPIED 6 0 6 6 +OCCUPATION 2 0 2 2 +OCCUPANTS 1 0 1 1 +OCCASIONS 1 0 1 1 +OCCASIONALLY 1 0 1 1 +OCCASIONAL 1 0 1 1 +OCCASION 13 0 13 13 +OBVIOUS 3 0 3 3 +OBTAINED 1 0 1 1 +OBTAIN 3 0 3 3 +OBSTINATE 1 0 1 1 +OBSTINACY 2 0 2 2 +OBSTACLES 1 0 1 1 +OBSERVING 2 0 2 2 +OBSERVERS 1 0 1 1 +OBSERVED 6 0 6 6 +OBSERVE 4 0 4 4 +OBSERVATION 3 0 3 3 +OBSERVANCES 1 0 1 1 +OBSCURE 3 0 3 3 +OBNOXIOUS 1 0 1 1 +OBLIVION 1 0 1 1 +OBLITERATED 1 0 1 1 +OBLITERATE 1 0 1 1 +OBLIGED 1 0 1 1 +OBJECTIONS 1 0 1 1 +OBJECTION 1 0 1 1 +OBJECT 16 0 16 16 +OBEYING 2 0 2 2 +OBEYED 2 0 2 2 +OBEY 1 0 1 1 +OBEDIENCE 1 0 1 1 +OATMEAL 1 0 1 1 +OATH 1 0 1 1 +OARS 1 0 1 1 +OAKS 1 0 1 1 +OAK 2 0 2 2 +O'ER 1 0 1 1 +O'CLOCK 6 0 6 6 +NURSED 1 0 1 1 +NURSE 1 0 1 1 +NUNKIE 1 0 1 1 +NUMIDIA 1 0 1 1 +NUMERICAL 1 0 1 1 +NUMBERS 4 0 4 4 +NUMBERED 1 0 1 1 +NUMBER 6 0 6 6 +NUDITY 1 0 1 1 +NUDGED 1 0 1 1 +NOWHERE 1 0 1 1 +NOW 94 0 94 94 +NOVELS 2 0 2 2 +NOVEL'S 1 0 1 1 +NOVEL 1 0 1 1 +NOURISHING 1 0 1 1 +NOUGHT 1 0 1 1 +NOTWITHSTANDING 1 0 1 1 +NOTTINGHAM 7 0 7 7 +NOTORIOUS 1 0 1 1 +NOTIONS 1 0 1 1 +NOTION 1 0 1 1 +NOTING 1 0 1 1 +NOTICING 1 0 1 1 +NOTICED 1 0 1 1 +NOTICEABLE 1 0 1 1 +NOTICE 3 0 3 3 +NOTED 1 0 1 1 +NOTE 4 0 4 4 +NOTARY 1 0 1 1 +NOTABLY 1 0 1 1 +NOTABLE 3 0 3 3 +NOSE 2 0 2 2 +NORWEGIAN 1 0 1 1 +NORWAY 1 0 1 1 +NORTHERNERS 1 0 1 1 +NORTHERN 1 0 1 1 +NOON 3 0 3 3 +NONSENSE 1 0 1 1 +NONE 12 0 12 12 +NON 4 0 4 4 +NOMADS 1 0 1 1 +NOISILY 1 0 1 1 +NOISE 2 0 2 2 +NODS 1 0 1 1 +NOD 1 0 1 1 +NOBODY 6 0 6 6 +NOBLEST 1 0 1 1 +NOBLER 2 0 2 2 +NOBLEMAN'S 1 0 1 1 +NOBLE 10 0 10 10 +NO 169 0 169 169 +NITROGEN 1 0 1 1 +NINTH 1 0 1 1 +NINETY 2 0 2 2 +NIMBLY 1 0 1 1 +NIMBLE 1 0 1 1 +NIGHTS 3 0 3 3 +NIGHTLY 1 0 1 1 +NIGHTINGALE'S 1 0 1 1 +NIGHTFALL 1 0 1 1 +NIECE 1 0 1 1 +NICEST 1 0 1 1 +NICER 1 0 1 1 +NICELY 1 0 1 1 +NICE 3 0 3 3 +NEXT 12 0 12 12 +NEWSPAPER 4 0 4 4 +NEWS 2 0 2 2 +NEWLY 1 0 1 1 +NEVERTHELESS 3 0 3 3 +NEVERBEND 6 0 6 6 +NETTLES 1 0 1 1 +NETTLED 1 0 1 1 +NET 1 0 1 1 +NESTING 1 0 1 1 +NERVOUSNESS 1 0 1 1 +NERVOUSLY 1 0 1 1 +NERVOUS 4 0 4 4 +NERVES 2 0 2 2 +NEMO 4 0 4 4 +NELLY 1 0 1 1 +NELL 1 0 1 1 +NEITHER 9 0 9 9 +NEIGHBOUR 2 0 2 2 +NEIGHBOR 1 0 1 1 +NEGROES 2 0 2 2 +NEGRO 1 0 1 1 +NEGOTIATIONS 1 0 1 1 +NEGLIGENT 1 0 1 1 +NEGLECTED 2 0 2 2 +NEGLECT 1 0 1 1 +NEEDS 2 0 2 2 +NEEDING 1 0 1 1 +NEEDED 5 0 5 5 +NED 1 0 1 1 +NECK 6 0 6 6 +NECESSITY 2 0 2 2 +NECESSITIES 1 0 1 1 +NECESSARY 9 0 9 9 +NECESSARILY 2 0 2 2 +NEATLY 2 0 2 2 +NEAT 1 0 1 1 +NEARLY 10 0 10 10 +NEAREST 1 0 1 1 +NEARER 4 0 4 4 +NEARED 1 0 1 1 +NEAR 6 0 6 6 +NAY 5 0 5 5 +NAVY 1 0 1 1 +NAUTILUS 2 0 2 2 +NATURES 1 0 1 1 +NATURED 1 0 1 1 +NATURE 17 0 17 17 +NATURALLY 1 0 1 1 +NATURALISTS 2 0 2 2 +NATURALIST 1 0 1 1 +NATURAL 10 0 10 10 +NATIVE 5 0 5 5 +NATIONS 2 0 2 2 +NATIONAL 3 0 3 3 +NATION 2 0 2 2 +NASTY 1 0 1 1 +NARWHALE 1 0 1 1 +NARROWS 1 0 1 1 +NARROW 6 0 6 6 +NARRATIVE 2 0 2 2 +NAPIER 1 0 1 1 +NAPE 1 0 1 1 +NANCY'S 2 0 2 2 +NANCY 1 0 1 1 +NAMES 2 0 2 2 +NAMELY 2 0 2 2 +NAMED 4 0 4 4 +NAME 14 0 14 14 +NAKEDNESS 1 0 1 1 +NAKED 1 0 1 1 +MYTHOLOGICAL 1 0 1 1 +MYTHICAL 1 0 1 1 +MYSTIFIED 1 0 1 1 +MYSTERY 5 0 5 5 +MYSTERIOUSLY 1 0 1 1 +MYSTERIOUS 3 0 3 3 +MYSELF 25 0 25 25 +MUTUAL 2 0 2 2 +MUTTON 1 0 1 1 +MUTTERING 1 0 1 1 +MUTILATION 1 0 1 1 +MUTABILITY 1 0 1 1 +MUSTARD 1 0 1 1 +MUST 66 0 66 66 +MUSSULMANS 1 0 1 1 +MUSICIANS 1 0 1 1 +MUSIC 6 0 6 6 +MUSHROOMS 1 0 1 1 +MUSEUM 1 0 1 1 +MURMURED 4 0 4 4 +MURMUR 2 0 2 2 +MURDERS 1 0 1 1 +MURDERERS 1 0 1 1 +MUNCHKINS 2 0 2 2 +MUNCHKIN 1 0 1 1 +MULTIPLE 2 0 2 2 +MUFFLED 1 0 1 1 +MUDDY 1 0 1 1 +MUD 1 0 1 1 +MUCH 69 0 69 69 +MOWED 2 0 2 2 +MOW 1 0 1 1 +MOVING 1 0 1 1 +MOVES 1 0 1 1 +MOVEMENTS 3 0 3 3 +MOVEMENT 5 0 5 5 +MOVED 10 0 10 10 +MOUTHS 3 0 3 3 +MOUTHED 2 0 2 2 +MOUTH 5 0 5 5 +MOUSE 2 0 2 2 +MOURNFUL 1 0 1 1 +MOURN 1 0 1 1 +MOUNTAINS 2 0 2 2 +MOTTO 1 0 1 1 +MOTTLED 1 0 1 1 +MOTORS 1 0 1 1 +MOTIVES 5 0 5 5 +MOTIONLESS 1 0 1 1 +MOTIONING 1 0 1 1 +MOTIONED 2 0 2 2 +MOTION 1 0 1 1 +MOTHER 32 0 32 32 +MOST 51 0 51 51 +MOSSY 1 0 1 1 +MOSS 1 0 1 1 +MOSAIC 1 0 1 1 +MORTALS 1 0 1 1 +MORTALLY 1 0 1 1 +MORTAL 1 0 1 1 +MORROW 6 0 6 6 +MORRIS 1 0 1 1 +MORNINGS 1 0 1 1 +MORN 1 0 1 1 +MORMONS 4 0 4 4 +MORMONISM 3 0 3 3 +MORMON 5 0 5 5 +MOREOVER 1 0 1 1 +MORE'S 1 0 1 1 +MORBID 1 0 1 1 +MORAL 1 0 1 1 +MOONLIGHT 2 0 2 2 +MOONBEAMS 1 0 1 1 +MOON 4 0 4 4 +MOOD 2 0 2 2 +MONTROSE'S 1 0 1 1 +MONTROSE 6 0 6 6 +MONTHS 4 0 4 4 +MONTH 4 0 4 4 +MONTFICHET'S 1 0 1 1 +MONTFICHET 9 0 9 9 +MONTALAIS 4 0 4 4 +MONSTERS 2 0 2 2 +MONSTER 1 0 1 1 +MONSIEUR 1 0 1 1 +MONOTONOUS 1 0 1 1 +MONGOOSE 1 0 1 1 +MONCEUX 1 0 1 1 +MOMENTS 5 0 5 5 +MOMENTOUS 1 0 1 1 +MOMENTARY 1 0 1 1 +MOMENT 32 0 32 32 +MOLLY 3 0 3 3 +MOLECULES 1 0 1 1 +MOLDED 1 0 1 1 +MOISTURE 1 0 1 1 +MOIST 1 0 1 1 +MOHICAN 1 0 1 1 +MOHAMMED 1 0 1 1 +MODIFICATION 1 0 1 1 +MODEST 3 0 3 3 +MODES 2 0 2 2 +MODERNS 1 0 1 1 +MODERN 8 0 8 8 +MODERATE 2 0 2 2 +MODEL 1 0 1 1 +MODE 2 0 2 2 +MOCCASIN 1 0 1 1 +MOBS 1 0 1 1 +MOBILITY 2 0 2 2 +MOB 3 0 3 3 +MOANING 1 0 1 1 +MOAN 1 0 1 1 +MO 1 0 1 1 +MIXTURE 2 0 2 2 +MIXED 4 0 4 4 +MITIGATE 1 0 1 1 +MISUNDERSTANDING 1 0 1 1 +MISTY 1 0 1 1 +MISTRESS 10 0 10 10 +MISTAKEN 2 0 2 2 +MISTAKE 2 0 2 2 +MISSUS 23 0 23 23 +MISSOURI 6 0 6 6 +MISSIONARY 1 0 1 1 +MISSIONARIES 1 0 1 1 +MISSION 3 0 3 3 +MISSED 2 0 2 2 +MISGOVERNMENT 1 0 1 1 +MISFORTUNES 1 0 1 1 +MISFORTUNE 4 0 4 4 +MISERY 3 0 3 3 +MISERABLY 1 0 1 1 +MISERABLE 2 0 2 2 +MISDEMEANOR 1 0 1 1 +MISCHIEF 1 0 1 1 +MISCHANCE 1 0 1 1 +MIRROR 2 0 2 2 +MIRACULOUSLY 1 0 1 1 +MIRACLE 2 0 2 2 +MINUTES 6 0 6 6 +MINUTE 2 0 2 2 +MINT 1 0 1 1 +MINORITY 1 0 1 1 +MINISTRY 3 0 3 3 +MINISTERS 1 0 1 1 +MINISTER 3 0 3 3 +MINIATURE 1 0 1 1 +MINGOES 1 0 1 1 +MINGLES 1 0 1 1 +MINGLED 1 0 1 1 +MINE 7 0 7 7 +MINDS 3 0 3 3 +MINDFUL 1 0 1 1 +MINDED 1 0 1 1 +MIND 29 0 29 29 +MILNER'S 3 0 3 3 +MILLIONS 1 0 1 1 +MILLIMETER 1 0 1 1 +MILLIGRAM 1 0 1 1 +MILKING 1 0 1 1 +MILKED 1 0 1 1 +MILK 1 0 1 1 +MILITIA 3 0 3 3 +MILITARY 7 0 7 7 +MILES 7 0 7 7 +MILE 1 0 1 1 +MILDLY 1 0 1 1 +MILD 2 0 2 2 +MILAN 1 0 1 1 +MIKE 2 0 2 2 +MIGHTY 4 0 4 4 +MIGHTILY 1 0 1 1 +MIGHT 48 0 48 48 +MIDWIFE 1 0 1 1 +MIDST 2 0 2 2 +MIDDLE 4 0 4 4 +MIDDAY 1 0 1 1 +MICROSCOPE 1 0 1 1 +MICE 5 0 5 5 +METROPOLIS 1 0 1 1 +METHODS 3 0 3 3 +METHOD 3 0 3 3 +METERS 1 0 1 1 +METAPHOR 1 0 1 1 +METAMORPHOSIS 1 0 1 1 +METALLIC 1 0 1 1 +METAL 1 0 1 1 +MESSRS 1 0 1 1 +MESSAGE 2 0 2 2 +MESS 1 0 1 1 +MESHES 1 0 1 1 +MERRY 1 0 1 1 +MERITS 2 0 2 2 +MERIT 2 0 2 2 +MERIDIAN 2 0 2 2 +MERGANSER 1 0 1 1 +MERELY 5 0 5 5 +MERE 4 0 4 4 +MERCHANT 1 0 1 1 +MER 1 0 1 1 +MENTIONS 1 0 1 1 +MENTIONED 5 0 5 5 +MENTION 1 0 1 1 +MENTAL 2 0 2 2 +MENIAL 1 0 1 1 +MENDING 2 0 2 2 +MEND 2 0 2 2 +MENAGERIE 1 0 1 1 +MEN'S 2 0 2 2 +MEMORY 21 0 21 21 +MEMBERS 4 0 4 4 +MEMBER 2 0 2 2 +MELTS 1 0 1 1 +MELODY 1 0 1 1 +MELL 1 0 1 1 +MELANCHOLY 2 0 2 2 +MEETING 5 0 5 5 +MEEK 1 0 1 1 +MEDIUM 1 0 1 1 +MEDITERRANEAN 4 0 4 4 +MEDITATIVE 1 0 1 1 +MEDITATION 1 0 1 1 +MEDICINE 6 0 6 6 +MECHANICS 1 0 1 1 +MEAT 1 0 1 1 +MEASURES 2 0 2 2 +MEASURED 2 0 2 2 +MEASURE 6 0 6 6 +MEANWHILE 4 0 4 4 +MEANTIME 2 0 2 2 +MEANT 5 0 5 5 +MEANS 17 0 17 17 +MEANINGS 1 0 1 1 +MEANING 4 0 4 4 +MEAN 9 0 9 9 +MEALS 4 0 4 4 +MEAL 5 0 5 5 +MEADOWS 1 0 1 1 +ME 184 0 184 184 +MC 1 0 1 1 +MAY 56 0 56 56 +MAXIMUM 1 0 1 1 +MAXIMS 1 0 1 1 +MAUSOLEUM 1 0 1 1 +MATURE 1 0 1 1 +MATTHEWS 1 0 1 1 +MATTERS 5 0 5 5 +MATTERED 1 0 1 1 +MATTER 20 0 20 20 +MATHEMATICS 1 0 1 1 +MATERIALS 2 0 2 2 +MATERIALLY 1 0 1 1 +MATERIALISM 1 0 1 1 +MATERIAL 3 0 3 3 +MATED 1 0 1 1 +MATCHLESS 1 0 1 1 +MASTERY 1 0 1 1 +MASTERS 3 0 3 3 +MASTERPIECE 1 0 1 1 +MASTERLY 1 0 1 1 +MASTER'S 2 0 2 2 +MASTER 14 0 14 14 +MAST 2 0 2 2 +MASSES 1 0 1 1 +MASSACHUSETTS 1 0 1 1 +MASS 2 0 2 2 +MASKS 1 0 1 1 +MARVELS 1 0 1 1 +MARVELLED 1 0 1 1 +MARVEL 2 0 2 2 +MARTIN 2 0 2 2 +MARTHA 2 0 2 2 +MARSHALLED 1 0 1 1 +MARSHAL 1 0 1 1 +MARRY 1 0 1 1 +MARRIED 2 0 2 2 +MARRIAGE 5 0 5 5 +MARQUIS 1 0 1 1 +MARKS 4 0 4 4 +MARKING 1 0 1 1 +MARKHAM 2 0 2 2 +MARKET 1 0 1 1 +MARKED 3 0 3 3 +MARK 6 0 6 6 +MARINE 2 0 2 2 +MARIE'S 1 0 1 1 +MARIE 6 0 6 6 +MARIA 1 0 1 1 +MARGOLOTTE 5 0 5 5 +MARGIN 1 0 1 1 +MARGARET 1 0 1 1 +MARCHES 1 0 1 1 +MARCHED 2 0 2 2 +MARCH 7 0 7 7 +MARBLE 2 0 2 2 +MARAIS 1 0 1 1 +MAP 2 0 2 2 +MANY 41 0 41 41 +MANUSCRIPT 2 0 2 2 +MANUFACTURER 3 0 3 3 +MANSION 1 0 1 1 +MANSERVANT 1 0 1 1 +MANOEUVRING 1 0 1 1 +MANNERS 1 0 1 1 +MANNER 14 0 14 14 +MANNA 1 0 1 1 +MANKIND 2 0 2 2 +MANIFOLD 1 0 1 1 +MANIFESTLY 1 0 1 1 +MANIFEST 2 0 2 2 +MANICAMP 1 0 1 1 +MANHOOD 1 0 1 1 +MANDIBLE 1 0 1 1 +MANAGING 1 0 1 1 +MANAGERS 1 0 1 1 +MANAGEMENT 3 0 3 3 +MANAGED 4 0 4 4 +MANAGE 1 0 1 1 +MAN'S 5 0 5 5 +MAMMY 1 0 1 1 +MALIGNITIES 1 0 1 1 +MALIGNED 1 0 1 1 +MALICIOUS 1 0 1 1 +MALICE 1 0 1 1 +MALADY 1 0 1 1 +MAKING 13 0 13 13 +MAKES 10 0 10 10 +MAKE 40 0 40 40 +MAJESTY'S 2 0 2 2 +MAJESTY 6 0 6 6 +MAINTAINING 1 0 1 1 +MAINTAINED 4 0 4 4 +MAINSAIL 1 0 1 1 +MAINLY 1 0 1 1 +MAIN 3 0 3 3 +MAIDS 4 0 4 4 +MAID'S 1 0 1 1 +MAHOGANY 1 0 1 1 +MAGNIFIED 1 0 1 1 +MAGNIFICENT 3 0 3 3 +MAGNIFICENCE 1 0 1 1 +MAGISTRACY 1 0 1 1 +MAGICIAN 5 0 5 5 +MAGIC 4 0 4 4 +MAGAZINE 1 0 1 1 +MADNESS 1 0 1 1 +MADEMOISELLE 5 0 5 5 +MADAME'S 1 0 1 1 +MAD 3 0 3 3 +MACHINES 1 0 1 1 +MACHINE 1 0 1 1 +MACDONALDS 1 0 1 1 +MABEL 1 0 1 1 +LYNCHINGS 1 0 1 1 +LYING 4 0 4 4 +LUXURIES 2 0 2 2 +LUXURIANT 1 0 1 1 +LUTHERAN 2 0 2 2 +LUTHER 3 0 3 3 +LUSTROUS 1 0 1 1 +LUSTRE 1 0 1 1 +LURKING 1 0 1 1 +LURID 1 0 1 1 +LURE 1 0 1 1 +LUNGS 1 0 1 1 +LUMPS 1 0 1 1 +LUMP 1 0 1 1 +LUMINOUS 2 0 2 2 +LULLS 1 0 1 1 +LUKE 1 0 1 1 +LUGUBRIOUS 1 0 1 1 +LUCY 1 0 1 1 +LUCRETIUS 1 0 1 1 +LUCK 3 0 3 3 +LUCID 1 0 1 1 +LUBRICATE 1 0 1 1 +LOYALLY 1 0 1 1 +LOYAL 1 0 1 1 +LOWLY 2 0 2 2 +LOWER 6 0 6 6 +LOW 6 0 6 6 +LOVING 4 0 4 4 +LOVES 3 0 3 3 +LOVERS 2 0 2 2 +LOVER 1 0 1 1 +LOVELY 7 0 7 7 +LOVED 6 0 6 6 +LOVE'S 1 0 1 1 +LOVE 48 0 48 48 +LOUDNESS 1 0 1 1 +LOUDLY 2 0 2 2 +LOUDER 1 0 1 1 +LOUD 2 0 2 2 +LOTUS 1 0 1 1 +LOTS 2 0 2 2 +LOT 6 0 6 6 +LOST 12 0 12 12 +LOSS 6 0 6 6 +LOSING 3 0 3 3 +LOSES 2 0 2 2 +LORDS 1 0 1 1 +LORDLY 1 0 1 1 +LORDING 2 0 2 2 +LORD 23 0 23 23 +LOPPED 1 0 1 1 +LOOSELY 1 0 1 1 +LOOSE 5 0 5 5 +LOOKS 7 0 7 7 +LOOKING 16 0 16 16 +LOOKED 24 0 24 24 +LOOK 32 0 32 32 +LONGING 2 0 2 2 +LONGER 9 0 9 9 +LONGED 1 0 1 1 +LONG 29 0 29 29 +LONELY 2 0 2 2 +LONELINESS 1 0 1 1 +LONELIER 2 0 2 2 +LONDON 3 0 3 3 +LOGICALLY 1 0 1 1 +LOGICAL 2 0 2 2 +LOGARITHMS 1 0 1 1 +LOG 2 0 2 2 +LOFTINESS 1 0 1 1 +LOFTIEST 1 0 1 1 +LOFT 2 0 2 2 +LODGING 1 0 1 1 +LODGE 2 0 2 2 +LOCRIS 1 0 1 1 +LOCKED 3 0 3 3 +LOCK 1 0 1 1 +LOCATED 1 0 1 1 +LOAF 1 0 1 1 +LOADS 1 0 1 1 +LOADED 1 0 1 1 +LOAD 1 0 1 1 +LIVING 5 0 5 5 +LIVID 1 0 1 1 +LIVERY 1 0 1 1 +LIVERIES 2 0 2 2 +LIVERIED 1 0 1 1 +LIVELIEST 1 0 1 1 +LIVED 8 0 8 8 +LIVE 9 0 9 9 +LITTLE 101 0 101 101 +LITERATURE 1 0 1 1 +LITERARY 4 0 4 4 +LITERALLY 2 0 2 2 +LITERAL 2 0 2 2 +LISTLESSLY 1 0 1 1 +LISTENING 3 0 3 3 +LISTENED 5 0 5 5 +LISTEN 3 0 3 3 +LIST 1 0 1 1 +LIQUID 2 0 2 2 +LIPS 4 0 4 4 +LINKS 2 0 2 2 +LINGERED 1 0 1 1 +LINES 7 0 7 7 +LINEN 2 0 2 2 +LINED 2 0 2 2 +LINDENS 1 0 1 1 +LINCOLN 2 0 2 2 +LIMITATION 1 0 1 1 +LIMIT 1 0 1 1 +LIMESTONE 1 0 1 1 +LIMBS 2 0 2 2 +LILBURN 1 0 1 1 +LIKEWISE 1 0 1 1 +LIKES 2 0 2 2 +LIKENESS 1 0 1 1 +LIKELY 2 0 2 2 +LIKED 4 0 4 4 +LIGHTS 5 0 5 5 +LIGHTNING 1 0 1 1 +LIGHTLY 3 0 3 3 +LIGHTING 6 0 6 6 +LIGHTED 5 0 5 5 +LIFTING 2 0 2 2 +LIFTED 1 0 1 1 +LIFT 2 0 2 2 +LIFE'S 1 0 1 1 +LIFE 47 0 47 47 +LIEUTENANT 6 0 6 6 +LIEDENBROCK 1 0 1 1 +LICHEN 1 0 1 1 +LIBRARY 3 0 3 3 +LIBERTY 3 0 3 3 +LIBERAL 1 0 1 1 +LIABLE 2 0 2 2 +LEXINGTON 1 0 1 1 +LEVIED 2 0 2 2 +LEVELS 1 0 1 1 +LEVEL 3 0 3 3 +LETTING 1 0 1 1 +LETTERS 4 0 4 4 +LETTER 12 0 12 12 +LETS 1 0 1 1 +LET'S 2 0 2 2 +LET 27 0 27 27 +LEST 2 0 2 2 +LESSONS 1 0 1 1 +LESSON 1 0 1 1 +LESSER 2 0 2 2 +LESS 28 0 28 28 +LENGTHY 1 0 1 1 +LENGTHS 1 0 1 1 +LENGTH 4 0 4 4 +LEND 2 0 2 2 +LEISURELY 1 0 1 1 +LEISURE 11 0 11 11 +LEGS 3 0 3 3 +LEGISLATURE 4 0 4 4 +LEGISLATORS 1 0 1 1 +LEGISLATIVE 1 0 1 1 +LEGATE 1 0 1 1 +LEGALITY 2 0 2 2 +LEGAL 1 0 1 1 +LEG 1 0 1 1 +LEFT 34 0 34 34 +LEECH 2 0 2 2 +LED 7 0 7 7 +LECTURES 2 0 2 2 +LECTURE 3 0 3 3 +LEAVING 6 0 6 6 +LEAVES 5 0 5 5 +LEAVENWORTH 4 0 4 4 +LEAVE 16 0 16 16 +LEATHER 1 0 1 1 +LEAST 15 0 15 15 +LEASH 1 0 1 1 +LEASED 1 0 1 1 +LEARNING 1 0 1 1 +LEARNED 4 0 4 4 +LEARN 4 0 4 4 +LEAPS 2 0 2 2 +LEAPING 3 0 3 3 +LEAP 2 0 2 2 +LEANING 3 0 3 3 +LEANED 5 0 5 5 +LEAN 1 0 1 1 +LEAGUES 1 0 1 1 +LEAGUE 1 0 1 1 +LEADS 3 0 3 3 +LEADING 3 0 3 3 +LEADERS 1 0 1 1 +LEAD 8 0 8 8 +LAZILY 1 0 1 1 +LAYS 1 0 1 1 +LAYMAN 1 0 1 1 +LAYING 1 0 1 1 +LAY 16 0 16 16 +LAWYER 1 0 1 1 +LAWS 9 0 9 9 +LAWRENCE 2 0 2 2 +LAWN 1 0 1 1 +LAWFUL 1 0 1 1 +LAW 13 0 13 13 +LAVISHING 1 0 1 1 +LAUGHTER 2 0 2 2 +LAUGHING 6 0 6 6 +LAUGHED 6 0 6 6 +LAUGH 4 0 4 4 +LATTICE 1 0 1 1 +LATTER 9 0 9 9 +LATIN 3 0 3 3 +LATEST 1 0 1 1 +LATER 14 0 14 14 +LATENT 1 0 1 1 +LATE 6 0 6 6 +LATCHED 1 0 1 1 +LAST 41 0 41 41 +LASHED 1 0 1 1 +LARGEST 2 0 2 2 +LARGER 3 0 3 3 +LARGE 16 0 16 16 +LAREN 1 0 1 1 +LARDER 1 0 1 1 +LAPSE 1 0 1 1 +LAP 3 0 3 3 +LANGUISHINGLY 1 0 1 1 +LANGUID 1 0 1 1 +LANGUAGE 11 0 11 11 +LANE 1 0 1 1 +LANDSCAPE 1 0 1 1 +LANDS 2 0 2 2 +LAND 13 0 13 13 +LAMPS 3 0 3 3 +LAMP 4 0 4 4 +LAMENTATION 1 0 1 1 +LAMBS 1 0 1 1 +LAMB 1 0 1 1 +LALLIE 1 0 1 1 +LAKES 1 0 1 1 +LAKE'S 1 0 1 1 +LAKE 13 0 13 13 +LAGOON 4 0 4 4 +LADY 9 0 9 9 +LADLED 1 0 1 1 +LADIES 11 0 11 11 +LADDER 3 0 3 3 +LAD 3 0 3 3 +LACKEY 1 0 1 1 +LACK 1 0 1 1 +L 2 0 2 2 +KNOWS 6 0 6 6 +KNOWN 15 0 15 15 +KNOWLEDGE 15 0 15 15 +KNOWING 5 0 5 5 +KNOT 1 0 1 1 +KNOCKING 1 0 1 1 +KNOCKED 4 0 4 4 +KNOCK 1 0 1 1 +KNITTED 1 0 1 1 +KNIGHT 1 0 1 1 +KNEW 25 0 25 25 +KNEES 3 0 3 3 +KNEELS 1 0 1 1 +KNEELING 1 0 1 1 +KNEE 1 0 1 1 +KNAVE 1 0 1 1 +KITTEN 1 0 1 1 +KITES 1 0 1 1 +KITE 1 0 1 1 +KITCHEN 4 0 4 4 +KIT 1 0 1 1 +KISSING 1 0 1 1 +KISSES 1 0 1 1 +KISSED 2 0 2 2 +KISS 2 0 2 2 +KINGS 1 0 1 1 +KINGLY 1 0 1 1 +KINGDOM 4 0 4 4 +KING'S 4 0 4 4 +KING 26 0 26 26 +KINDS 1 0 1 1 +KINDNESS 1 0 1 1 +KINDLY 3 0 3 3 +KINDLED 3 0 3 3 +KINDER 1 0 1 1 +KIND 14 0 14 14 +KILLS 1 0 1 1 +KILLED 1 0 1 1 +KIDNAP 2 0 2 2 +KID 1 0 1 1 +KICKAPOO 1 0 1 1 +KEYNOTE 1 0 1 1 +KEY 5 0 5 5 +KETTLES 2 0 2 2 +KETTLE 1 0 1 1 +KESWICK 1 0 1 1 +KERCHIEFS 1 0 1 1 +KEPT 5 0 5 5 +KENNINGTON 2 0 2 2 +KENNETH 9 0 9 9 +KEEPS 3 0 3 3 +KEEPING 4 0 4 4 +KEEPER 1 0 1 1 +KEEP 10 0 10 10 +KEENNESS 2 0 2 2 +KEENER 1 0 1 1 +KEEN 3 0 3 3 +KATHLEEN 1 0 1 1 +KATE 1 0 1 1 +KANSAS 3 0 3 3 +KANE 1 0 1 1 +JUSTLY 2 0 2 2 +JUSTIFICATION 2 0 2 2 +JUSTICE 3 0 3 3 +JURISDICTION 1 0 1 1 +JUMPING 1 0 1 1 +JUMPED 1 0 1 1 +JUMP 3 0 3 3 +JUDGES 1 0 1 1 +JUDGE 5 0 5 5 +JUDAH 1 0 1 1 +JOYOUS 1 0 1 1 +JOYCE 2 0 2 2 +JOY 4 0 4 4 +JOURNEYING 1 0 1 1 +JOURNEY 5 0 5 5 +JONES 3 0 3 3 +JOLLY 5 0 5 5 +JOKED 1 0 1 1 +JOKE 2 0 2 2 +JOINED 1 0 1 1 +JOIN 2 0 2 2 +JOHNSON 1 0 1 1 +JOHN 16 0 16 16 +JOB 4 0 4 4 +JIB 1 0 1 1 +JEWISH 1 0 1 1 +JEWELS 3 0 3 3 +JET 1 0 1 1 +JESUS 7 0 7 7 +JERSEY 1 0 1 1 +JERK 1 0 1 1 +JENKS 1 0 1 1 +JELLIES 1 0 1 1 +JEHOVAH 1 0 1 1 +JEERED 1 0 1 1 +JEALOUS 1 0 1 1 +JAWS 2 0 2 2 +JAW 2 0 2 2 +JASPER'S 2 0 2 2 +JAP 1 0 1 1 +JANUARY 2 0 2 2 +JANE'S 1 0 1 1 +JANE 4 0 4 4 +JAMES 2 0 2 2 +JAILER 5 0 5 5 +JAIL 4 0 4 4 +JACOB'S 2 0 2 2 +JACOB 1 0 1 1 +JACKSON 1 0 1 1 +JACKET 1 0 1 1 +J 2 0 2 2 +IVORY 1 0 1 1 +ITSELF 21 0 21 21 +ITCH 1 0 1 1 +ITALIAN 2 0 2 2 +ISSUED 2 0 2 2 +ISSUE 1 0 1 1 +ISRAEL 1 0 1 1 +ISOLATED 1 0 1 1 +ISN'T 5 0 5 5 +ISLAND 5 0 5 5 +IRWINE 1 0 1 1 +IRRITABLE 1 0 1 1 +IRRESOLUTION 1 0 1 1 +IRREPARABLE 1 0 1 1 +IRREGULARITY 2 0 2 2 +IRONING 1 0 1 1 +IRON'S 1 0 1 1 +IRON 2 0 2 2 +IRISH 2 0 2 2 +IRIDESCENT 1 0 1 1 +IRENE 1 0 1 1 +IRELAND 1 0 1 1 +INWARDLY 1 0 1 1 +INWARD 1 0 1 1 +INVOLVING 1 0 1 1 +INVOLVES 1 0 1 1 +INVOLVED 1 0 1 1 +INVOLVE 1 0 1 1 +INVITED 4 0 4 4 +INVITATION 3 0 3 3 +INVISIBLE 1 0 1 1 +INVIOLATE 1 0 1 1 +INVIDIOUS 1 0 1 1 +INVESTIGATION 1 0 1 1 +INVENTORS 2 0 2 2 +INVENTOR 1 0 1 1 +INVENTION 1 0 1 1 +INVENTED 1 0 1 1 +INVASION 1 0 1 1 +INVARIABLY 4 0 4 4 +INVARIABLE 1 0 1 1 +INVALID 1 0 1 1 +INVADER 1 0 1 1 +INVADE 1 0 1 1 +INTRODUCTION 4 0 4 4 +INTRODUCING 1 0 1 1 +INTRINSIC 1 0 1 1 +INTRICATE 1 0 1 1 +INTOLERANT 1 0 1 1 +INTOLERANCY 1 0 1 1 +INTOLERABLE 1 0 1 1 +INTIMATELY 2 0 2 2 +INTIMATE 2 0 2 2 +INTERVIEWS 1 0 1 1 +INTERVIEW 3 0 3 3 +INTERSECTED 1 0 1 1 +INTERRUPTED 2 0 2 2 +INTERPRETED 1 0 1 1 +INTERPRETATION 1 0 1 1 +INTERPOSED 1 0 1 1 +INTERNAL 1 0 1 1 +INTERMINGLED 1 0 1 1 +INTERMEDIATE 1 0 1 1 +INTERLACED 1 0 1 1 +INTERFERE 2 0 2 2 +INTERESTS 2 0 2 2 +INTERESTING 3 0 3 3 +INTERESTED 4 0 4 4 +INTEREST 10 0 10 10 +INTENTLY 2 0 2 2 +INTENTIONS 1 0 1 1 +INTENTION 1 0 1 1 +INTENT 1 0 1 1 +INTENSITY 3 0 3 3 +INTENSIFICATION 1 0 1 1 +INTENSELY 1 0 1 1 +INTENDED 1 0 1 1 +INTEND 1 0 1 1 +INTELLIGENT 5 0 5 5 +INTELLIGENCE 7 0 7 7 +INTELLECTS 1 0 1 1 +INTELLECT 1 0 1 1 +INTEGRITY 1 0 1 1 +INTANGIBLE 1 0 1 1 +INSURRECTIONISTS 1 0 1 1 +INSULT 1 0 1 1 +INSTRUMENT 1 0 1 1 +INSTRUCTIONS 4 0 4 4 +INSTITUTION 1 0 1 1 +INSTITUTED 1 0 1 1 +INSTITUTE 1 0 1 1 +INSTINCT 1 0 1 1 +INSTEAD 11 0 11 11 +INSTANTLY 6 0 6 6 +INSTANTANEOUS 1 0 1 1 +INSTANT 3 0 3 3 +INSTANCING 1 0 1 1 +INSTANCE 3 0 3 3 +INSTALLED 5 0 5 5 +INSTALL 1 0 1 1 +INSPIRED 1 0 1 1 +INSPIRATION 1 0 1 1 +INSOLENTLY 1 0 1 1 +INSISTS 1 0 1 1 +INSISTENCE 2 0 2 2 +INSISTED 1 0 1 1 +INSIST 1 0 1 1 +INSIPID 1 0 1 1 +INSINUATED 1 0 1 1 +INSIGNIFICANT 2 0 2 2 +INSIGHT 1 0 1 1 +INSIDE 2 0 2 2 +INSERTING 1 0 1 1 +INSENSIBLE 1 0 1 1 +INSECT 1 0 1 1 +INSATIABLE 2 0 2 2 +INQUIRED 2 0 2 2 +INNUMERABLE 2 0 2 2 +INNOCENTLY 1 0 1 1 +INNOCENT 2 0 2 2 +INNINGS 1 0 1 1 +INMATES 1 0 1 1 +INJUSTICE 4 0 4 4 +INJURY 2 0 2 2 +INJURED 2 0 2 2 +INIQUITY 1 0 1 1 +INHUMAN 1 0 1 1 +INHERITANCE 2 0 2 2 +INHABITANTS 3 0 3 3 +INGREDIENTS 1 0 1 1 +INGENUITY 2 0 2 2 +INGENIOUS 2 0 2 2 +INFUSE 1 0 1 1 +INFORMING 1 0 1 1 +INFORMED 3 0 3 3 +INFORMATION 3 0 3 3 +INFORM 1 0 1 1 +INFLUENTIAL 2 0 2 2 +INFLUENCES 2 0 2 2 +INFLUENCE 8 0 8 8 +INFLICT 2 0 2 2 +INFLEXIBLE 1 0 1 1 +INFIRMITY 1 0 1 1 +INFIRMITIES 1 0 1 1 +INFIRMARY 1 0 1 1 +INFINITE 4 0 4 4 +INFERIOR 2 0 2 2 +INFERENCE 1 0 1 1 +INFECTED 1 0 1 1 +INFANTRY 2 0 2 2 +INFANTILE 2 0 2 2 +INFANT 1 0 1 1 +INFANCY 1 0 1 1 +INFALLIBLE 1 0 1 1 +INEXPRESSIBLY 1 0 1 1 +INEXPLICABLE 2 0 2 2 +INEXPERIENCE 1 0 1 1 +INEXHAUSTIBLE 1 0 1 1 +INESTIMABLE 1 0 1 1 +INEFFECTUALLY 1 0 1 1 +INDUSTRY 1 0 1 1 +INDUSTRIOUS 1 0 1 1 +INDULGENCE 1 0 1 1 +INDULGED 1 0 1 1 +INDUCED 1 0 1 1 +INDUCE 1 0 1 1 +INDUBITABLE 1 0 1 1 +INDIVIDUALS 1 0 1 1 +INDIVIDUAL 5 0 5 5 +INDISTINGUISHABLE 1 0 1 1 +INDISTINCT 1 0 1 1 +INDISPENSABLE 1 0 1 1 +INDISCREET 1 0 1 1 +INDIRECT 1 0 1 1 +INDIGENCE 1 0 1 1 +INDIFFERENT 3 0 3 3 +INDIFFERENCE 4 0 4 4 +INDIES 1 0 1 1 +INDICATOR 1 0 1 1 +INDICATING 1 0 1 1 +INDICATES 2 0 2 2 +INDICATED 3 0 3 3 +INDICATE 2 0 2 2 +INDIANS 4 0 4 4 +INDIAN 4 0 4 4 +INDIA 1 0 1 1 +INDEPENDENTS 1 0 1 1 +INDEPENDENT 4 0 4 4 +INDEPENDENCE 1 0 1 1 +INDEED 29 0 29 29 +INCURRING 1 0 1 1 +INCURRED 1 0 1 1 +INCUR 1 0 1 1 +INCULCATED 1 0 1 1 +INCREASED 2 0 2 2 +INCREASE 2 0 2 2 +INCONVENIENT 1 0 1 1 +INCONCEIVABLE 1 0 1 1 +INCOMPREHENSIBLE 1 0 1 1 +INCOMPATIBLE 1 0 1 1 +INCOHERENT 1 0 1 1 +INCLUDING 1 0 1 1 +INCLUDED 2 0 2 2 +INCLINES 1 0 1 1 +INCLINED 2 0 2 2 +INCLINATIONS 2 0 2 2 +INCITED 1 0 1 1 +INCIDENTS 1 0 1 1 +INCIDENTAL 1 0 1 1 +INCIDENT 6 0 6 6 +INCHES 1 0 1 1 +INCH 2 0 2 2 +INCESSANTLY 1 0 1 1 +INCEPTION 1 0 1 1 +INCAPABLE 2 0 2 2 +INCANDESCENT 1 0 1 1 +INASMUCH 1 0 1 1 +INADEQUATE 2 0 2 2 +INADEQUACY 1 0 1 1 +INACTION 1 0 1 1 +INACCURACY 1 0 1 1 +INACCESSIBLE 1 0 1 1 +IMPULSIVELY 1 0 1 1 +IMPULSE 3 0 3 3 +IMPROVING 1 0 1 1 +IMPROVED 3 0 3 3 +IMPRISONMENT 1 0 1 1 +IMPRISONED 3 0 3 3 +IMPRESSIONS 6 0 6 6 +IMPRESSION 2 0 2 2 +IMPOSSIBLE 11 0 11 11 +IMPOSSIBILITY 1 0 1 1 +IMPOSED 1 0 1 1 +IMPOSE 1 0 1 1 +IMPORTANT 7 0 7 7 +IMPORTANCE 5 0 5 5 +IMPORT 1 0 1 1 +IMPLY 1 0 1 1 +IMPLORES 1 0 1 1 +IMPLORE 1 0 1 1 +IMPLIES 3 0 3 3 +IMPLICIT 1 0 1 1 +IMPLICATION 1 0 1 1 +IMPIETY 1 0 1 1 +IMPETUS 1 0 1 1 +IMPETUOUS 3 0 3 3 +IMPERIOUSLY 1 0 1 1 +IMPERIALIST 1 0 1 1 +IMPERIALISM 1 0 1 1 +IMPERIAL 1 0 1 1 +IMPERFECTLY 1 0 1 1 +IMPERATIVE 1 0 1 1 +IMPENETRABLE 2 0 2 2 +IMPELLED 2 0 2 2 +IMPEDIMENT 1 0 1 1 +IMPATIENT 1 0 1 1 +IMPATIENCE 3 0 3 3 +IMPASSIVELY 1 0 1 1 +IMMUNITY 1 0 1 1 +IMMORTALITY 1 0 1 1 +IMMORTAL 1 0 1 1 +IMMENSELY 1 0 1 1 +IMMENSE 1 0 1 1 +IMMEDIATELY 4 0 4 4 +IMMEDIATE 2 0 2 2 +IMMEDIACY 1 0 1 1 +IMMATURE 1 0 1 1 +IMBIBING 1 0 1 1 +IMBIBED 1 0 1 1 +IMAGINING 1 0 1 1 +IMAGINED 2 0 2 2 +IMAGINE 2 0 2 2 +IMAGINATIVE 1 0 1 1 +IMAGINATION 3 0 3 3 +IMAGINARY 1 0 1 1 +IMAGINABLE 2 0 2 2 +IMAGES 8 0 8 8 +IMAGE 9 0 9 9 +ILLUSTRIOUS 2 0 2 2 +ILLUSTRATION 1 0 1 1 +ILLUSION 2 0 2 2 +ILLUMINATION 1 0 1 1 +ILLUMINATING 1 0 1 1 +ILLUMINATED 1 0 1 1 +ILLS 1 0 1 1 +ILLNESS 1 0 1 1 +ILL 6 0 6 6 +IGNORANCE 2 0 2 2 +IGNOMINY 1 0 1 1 +IGNOBLE 1 0 1 1 +IF 131 0 131 131 +IDOLATRY 1 0 1 1 +IDLY 1 0 1 1 +IDLENESS 1 0 1 1 +IDLE 6 0 6 6 +IDIOSYNCRATICALLY 1 0 1 1 +IDENTITY 1 0 1 1 +IDEAS 11 0 11 11 +IDEAL 3 0 3 3 +IDEA 7 0 7 7 +ICE 1 0 1 1 +I'LL 14 0 14 14 +HYPOTHESIS 1 0 1 1 +HYPOCRITE 1 0 1 1 +HYPOCRISY 1 0 1 1 +HYMN 1 0 1 1 +HYDRAS 1 0 1 1 +HUT 4 0 4 4 +HUSSY 1 0 1 1 +HUSHED 1 0 1 1 +HUSBAND'S 1 0 1 1 +HUSBAND 9 0 9 9 +HURT 1 0 1 1 +HURRYING 2 0 2 2 +HURRY 4 0 4 4 +HURRIEDLY 3 0 3 3 +HURRIED 6 0 6 6 +HURONS 1 0 1 1 +HURLED 2 0 2 2 +HUNTLEY 1 0 1 1 +HUNTING 2 0 2 2 +HUNTER 1 0 1 1 +HUNTED 1 0 1 1 +HUNGRY 1 0 1 1 +HUNGER 2 0 2 2 +HUNG 10 0 10 10 +HUNDREDTH 1 0 1 1 +HUNDREDS 2 0 2 2 +HUNDRED 18 0 18 18 +HUMPY 2 0 2 2 +HUMPH 1 0 1 1 +HUMOUR 1 0 1 1 +HUMOROUS 3 0 3 3 +HUMOR 1 0 1 1 +HUMMING 1 0 1 1 +HUMILITY 1 0 1 1 +HUMILIATE 1 0 1 1 +HUMID 1 0 1 1 +HUMBUG 1 0 1 1 +HUMBLY 2 0 2 2 +HUMBLE 5 0 5 5 +HUMANITY 2 0 2 2 +HUMANE 1 0 1 1 +HUMAN 15 0 15 15 +HUGGED 1 0 1 1 +HUGE 7 0 7 7 +HUES 1 0 1 1 +HUE 1 0 1 1 +HUDSON 1 0 1 1 +HUDDLED 1 0 1 1 +HOWL 1 0 1 1 +HOWEVER 29 0 29 29 +HOW 50 0 50 50 +HOVER 1 0 1 1 +HOUSEWORK 1 0 1 1 +HOUSES 1 0 1 1 +HOUSEMAID 2 0 2 2 +HOUSEKEEPER 2 0 2 2 +HOUSEHOLDS 1 0 1 1 +HOUSEHOLD'S 1 0 1 1 +HOUSEHOLD 4 0 4 4 +HOURS 13 0 13 13 +HOUR 12 0 12 12 +HOUNDED 1 0 1 1 +HOTELS 1 0 1 1 +HOTEL 7 0 7 7 +HOSTILITY 1 0 1 1 +HOSTESS 3 0 3 3 +HOST 3 0 3 3 +HOSPITALITY 4 0 4 4 +HOSPITABLY 1 0 1 1 +HOSPITABLE 1 0 1 1 +HOSE 2 0 2 2 +HORTON 1 0 1 1 +HORSES 6 0 6 6 +HORSEMEN 1 0 1 1 +HORROR 2 0 2 2 +HORRID 1 0 1 1 +HORRIBLY 2 0 2 2 +HORRIBLE 3 0 3 3 +HORNS 2 0 2 2 +HORNFUL 1 0 1 1 +HORIZON 3 0 3 3 +HORATIO 2 0 2 2 +HOPKINSON 2 0 2 2 +HOPING 1 0 1 1 +HOPES 6 0 6 6 +HOPELESS 1 0 1 1 +HOPED 2 0 2 2 +HOPE 10 0 10 10 +HOOKING 1 0 1 1 +HOOKED 1 0 1 1 +HONOURED 1 0 1 1 +HONORS 1 0 1 1 +HONORIFIC 2 0 2 2 +HONORED 1 0 1 1 +HONEY 1 0 1 1 +HONESTY 1 0 1 1 +HONESTLY 2 0 2 2 +HONEST 5 0 5 5 +HOMILY 1 0 1 1 +HOMES 2 0 2 2 +HOMELY 3 0 3 3 +HOME 23 0 23 23 +HOLOCAUST 1 0 1 1 +HOLMES 10 0 10 10 +HOLLYHOCKS 1 0 1 1 +HOLLOW 3 0 3 3 +HOLINESS 2 0 2 2 +HOLIDAYS 3 0 3 3 +HOLES 1 0 1 1 +HOLE 1 0 1 1 +HOLDS 2 0 2 2 +HOLDING 1 0 1 1 +HOLD 8 0 8 8 +HOLBORN 1 0 1 1 +HOLBEIN 2 0 2 2 +HOBSON'S 1 0 1 1 +HOBS 1 0 1 1 +HO 1 0 1 1 +HITHERTO 1 0 1 1 +HISTORY 5 0 5 5 +HISTORIC 1 0 1 1 +HISTORIANS 1 0 1 1 +HISTORIAN 1 0 1 1 +HISSING 1 0 1 1 +HISS 1 0 1 1 +HIRE 1 0 1 1 +HINT 2 0 2 2 +HINDERED 1 0 1 1 +HIND 1 0 1 1 +HIMSELF 49 0 49 49 +HILLY 1 0 1 1 +HILDA'S 2 0 2 2 +HILDA 9 0 9 9 +HIGHNESS 1 0 1 1 +HIGHLY 2 0 2 2 +HIGHEST 3 0 3 3 +HIGHER 2 0 2 2 +HIGH 18 0 18 18 +HIERARCHY 1 0 1 1 +HIDING 1 0 1 1 +HIDEOUS 1 0 1 1 +HIDE 3 0 3 3 +HIDDEN 3 0 3 3 +HEWN 1 0 1 1 +HESTER 11 0 11 11 +HESITATION 1 0 1 1 +HESITATING 2 0 2 2 +HESITATED 1 0 1 1 +HERSELF 20 0 20 20 +HERS 2 0 2 2 +HERON 1 0 1 1 +HEROINE 1 0 1 1 +HEROIC 2 0 2 2 +HERO 3 0 3 3 +HERMOCRATES 1 0 1 1 +HERETICS 2 0 2 2 +HEREDITY 1 0 1 1 +HEREAFTER 3 0 3 3 +HERE'S 1 0 1 1 +HERALDED 1 0 1 1 +HENRY'S 1 0 1 1 +HENRY 2 0 2 2 +HENLEY 1 0 1 1 +HENCHMEN 1 0 1 1 +HENCE 4 0 4 4 +HEMMED 1 0 1 1 +HELPLESS 3 0 3 3 +HELPING 1 0 1 1 +HELPED 3 0 3 3 +HELP 18 0 18 18 +HELMSMAN 1 0 1 1 +HELL 2 0 2 2 +HELD 15 0 15 15 +HEIGHTS 1 0 1 1 +HEIGHTENING 1 0 1 1 +HEIGHT 1 0 1 1 +HEELS 1 0 1 1 +HEDGES 1 0 1 1 +HEDGE 1 0 1 1 +HEAVY 13 0 13 13 +HEAVING 2 0 2 2 +HEAVILY 1 0 1 1 +HEAVENS 1 0 1 1 +HEAVENLY 1 0 1 1 +HEAVEN 14 0 14 14 +HEAVED 1 0 1 1 +HEAT 2 0 2 2 +HEARTY 1 0 1 1 +HEARTILY 2 0 2 2 +HEARTHSTONES 1 0 1 1 +HEARTH 3 0 3 3 +HEARTED 1 0 1 1 +HEART'S 1 0 1 1 +HEARSE 2 0 2 2 +HEARS 2 0 2 2 +HEARING 1 0 1 1 +HEARD 19 0 19 19 +HEAR 20 0 20 20 +HEAP 2 0 2 2 +HEALTH 6 0 6 6 +HEADS 3 0 3 3 +HEADQUARTERS 1 0 1 1 +HEADLONGS 1 0 1 1 +HEADLONG 1 0 1 1 +HEADING 1 0 1 1 +HEAD 36 0 36 36 +HE'LL 1 0 1 1 +HAY 1 0 1 1 +HAWKS 1 0 1 1 +HAWKEYE 5 0 5 5 +HAWK'S 1 0 1 1 +HAWK 7 0 7 7 +HAVING 12 0 12 12 +HAVEN'T 6 0 6 6 +HAUNTED 1 0 1 1 +HAUGHTY 4 0 4 4 +HATS 1 0 1 1 +HATRED 3 0 3 3 +HATH 4 0 4 4 +HATER 1 0 1 1 +HATEFUL 1 0 1 1 +HATED 1 0 1 1 +HATE 1 0 1 1 +HAT 1 0 1 1 +HASTY 2 0 2 2 +HASTILY 2 0 2 2 +HASTENED 4 0 4 4 +HASTE 1 0 1 1 +HARVEST 1 0 1 1 +HARRYING 1 0 1 1 +HARRY 3 0 3 3 +HARROW 1 0 1 1 +HARRIED 1 0 1 1 +HARPOONER 1 0 1 1 +HARNESS 1 0 1 1 +HARMONY 2 0 2 2 +HARMONIZED 2 0 2 2 +HARMON 4 0 4 4 +HARM 2 0 2 2 +HARE 1 0 1 1 +HARDSHIPS 1 0 1 1 +HARDLY 14 0 14 14 +HARDER 1 0 1 1 +HARD 12 0 12 12 +HARBORING 1 0 1 1 +HARASSING 1 0 1 1 +HARANGUING 1 0 1 1 +HARANGUE 1 0 1 1 +HAPPY 16 0 16 16 +HAPPINESS 6 0 6 6 +HAPPILY 1 0 1 1 +HAPPENS 3 0 3 3 +HAPPENING 2 0 2 2 +HAPPENED 6 0 6 6 +HAPPEN 5 0 5 5 +HAPLESS 1 0 1 1 +HANSOM 1 0 1 1 +HANS 2 0 2 2 +HANGS 1 0 1 1 +HANGERS 1 0 1 1 +HANG 1 0 1 1 +HANDSOMEST 1 0 1 1 +HANDSOMELY 1 0 1 1 +HANDSOME 3 0 3 3 +HANDS 17 0 17 17 +HANDLE 1 0 1 1 +HANDKERCHIEFS 1 0 1 1 +HANDFUL 1 0 1 1 +HANDED 3 0 3 3 +HAMMER 3 0 3 3 +HAMLET'S 1 0 1 1 +HAMLET 6 0 6 6 +HAMBURG 1 0 1 1 +HALT 1 0 1 1 +HALLWAY 1 0 1 1 +HALLS 3 0 3 3 +HALL 9 0 9 9 +HALF 19 0 19 19 +HAIRS 1 0 1 1 +HAIRDRESSER 1 0 1 1 +HAIR 6 0 6 6 +HAILING 1 0 1 1 +HADN'T 3 0 3 3 +HABITUALLY 1 0 1 1 +HABITUAL 2 0 2 2 +HABITS 4 0 4 4 +HABITATION 1 0 1 1 +HABIT 7 0 7 7 +GUTTER 1 0 1 1 +GUSTS 2 0 2 2 +GUST 1 0 1 1 +GUNS 1 0 1 1 +GUN 1 0 1 1 +GULLET 1 0 1 1 +GULF 2 0 2 2 +GUISE 1 0 1 1 +GUILTY 2 0 2 2 +GUILT 2 0 2 2 +GUIDED 2 0 2 2 +GUIDE 2 0 2 2 +GUERRILLA 1 0 1 1 +GUARDS 3 0 3 3 +GUARD 1 0 1 1 +GRUDGE 1 0 1 1 +GROWTH 5 0 5 5 +GROWS 2 0 2 2 +GROWN 7 0 7 7 +GROWLED 2 0 2 2 +GROWING 4 0 4 4 +GROW 4 0 4 4 +GROUPS 2 0 2 2 +GROUP 2 0 2 2 +GROUNDS 2 0 2 2 +GROUND 10 0 10 10 +GROSS 1 0 1 1 +GROPING 1 0 1 1 +GROPE 1 0 1 1 +GROANS 2 0 2 2 +GROANING 1 0 1 1 +GROANED 2 0 2 2 +GROAN 1 0 1 1 +GRINNING 1 0 1 1 +GRINDER 1 0 1 1 +GRIN 1 0 1 1 +GRIM 3 0 3 3 +GRIFFIN 1 0 1 1 +GRIEVED 1 0 1 1 +GRIEFS 1 0 1 1 +GRIEF 2 0 2 2 +GREW 5 0 5 5 +GREETINGS 1 0 1 1 +GREETED 1 0 1 1 +GREET 1 0 1 1 +GREEK 4 0 4 4 +GREATNESS 2 0 2 2 +GREATLY 10 0 10 10 +GREATEST 7 0 7 7 +GREATER 9 0 9 9 +GRAVEYARD 3 0 3 3 +GRAVES 1 0 1 1 +GRAVE 4 0 4 4 +GRATITUDE 2 0 2 2 +GRATIFICATION 3 0 3 3 +GRATEFUL 2 0 2 2 +GRATE 1 0 1 1 +GRASS 11 0 11 11 +GRASPING 3 0 3 3 +GRANTED 3 0 3 3 +GRANDSON 1 0 1 1 +GRANDMOTHER 2 0 2 2 +GRANDFATHER 4 0 4 4 +GRANDER 2 0 2 2 +GRAMOPHONES 1 0 1 1 +GRAMOPHONE 1 0 1 1 +GRAINS 1 0 1 1 +GRAF 1 0 1 1 +GRADUALLY 5 0 5 5 +GRADES 1 0 1 1 +GRADATED 1 0 1 1 +GRACIOUSLY 1 0 1 1 +GRACIOUS 2 0 2 2 +GRACES 2 0 2 2 +GRACEFULLY 1 0 1 1 +GRACEFUL 1 0 1 1 +GRACE 12 0 12 12 +GOWN 1 0 1 1 +GOVERNOR'S 1 0 1 1 +GOVERNESS 2 0 2 2 +GOTHIC 3 0 3 3 +GOSSIP 2 0 2 2 +GOSPEL 2 0 2 2 +GORGEOUS 1 0 1 1 +GORDONS 1 0 1 1 +GORDON 1 0 1 1 +GOODS 5 0 5 5 +GOODNESS 1 0 1 1 +GOODLY 1 0 1 1 +GOOD 69 0 69 69 +GONE 14 0 14 14 +GOLIATH 2 0 2 2 +GOLF 1 0 1 1 +GOLDEN 15 0 15 15 +GOLD 15 0 15 15 +GOING 26 0 26 26 +GOES 2 0 2 2 +GODLY 1 0 1 1 +GODLESS 1 0 1 1 +GODDESS 1 0 1 1 +GOD'S 1 0 1 1 +GOD 33 0 33 33 +GOBY 1 0 1 1 +GOAT'S 2 0 2 2 +GO 37 0 37 37 +GNARLED 1 0 1 1 +GLUE 1 0 1 1 +GLOWING 3 0 3 3 +GLOW 3 0 3 3 +GLOVES 5 0 5 5 +GLOVED 1 0 1 1 +GLOSSY 2 0 2 2 +GLORY 1 0 1 1 +GLORIOUS 2 0 2 2 +GLORIES 1 0 1 1 +GLOOMY 1 0 1 1 +GLOOMILY 2 0 2 2 +GLOBE 1 0 1 1 +GLITTERING 4 0 4 4 +GLITTERED 2 0 2 2 +GLINDA 1 0 1 1 +GLIMMERING 1 0 1 1 +GLIDING 1 0 1 1 +GLIDES 1 0 1 1 +GLIDED 2 0 2 2 +GLEANER 1 0 1 1 +GLEAMS 1 0 1 1 +GLEAMING 4 0 4 4 +GLEAMED 1 0 1 1 +GLEAM 1 0 1 1 +GLASS 6 0 6 6 +GLARE 2 0 2 2 +GLANCED 2 0 2 2 +GLANCE 5 0 5 5 +GLAMOUR 1 0 1 1 +GLADNESS 1 0 1 1 +GLADLY 1 0 1 1 +GLAD 12 0 12 12 +GIVING 7 0 7 7 +GIVES 3 0 3 3 +GIVEN 15 0 15 15 +GIRLS 7 0 7 7 +GIRL'S 2 0 2 2 +GIRL 25 0 25 25 +GILDED 3 0 3 3 +GILD 1 0 1 1 +GIGANTIC 1 0 1 1 +GIFTS 2 0 2 2 +GIFT 6 0 6 6 +GHOSTS 2 0 2 2 +GHOSTLY 1 0 1 1 +GHOST 2 0 2 2 +GHISIZZLE 4 0 4 4 +GHIP 4 0 4 4 +GHASTLY 1 0 1 1 +GETTING 6 0 6 6 +GET 30 0 30 30 +GESTURES 1 0 1 1 +GESTURE 2 0 2 2 +GESTATION 1 0 1 1 +GERMANTOWN 1 0 1 1 +GERM 1 0 1 1 +GEORGE 3 0 3 3 +GEOMETRICAL 1 0 1 1 +GEOFFREY'S 1 0 1 1 +GENUINE 1 0 1 1 +GENTLY 5 0 5 5 +GENTLENESS 1 0 1 1 +GENTLEMAN'S 1 0 1 1 +GENTILITY 1 0 1 1 +GENIUS 2 0 2 2 +GENEROUSLY 1 0 1 1 +GENEROUS 4 0 4 4 +GENERATION 2 0 2 2 +GENERATED 1 0 1 1 +GENERALS 3 0 3 3 +GENERALLY 8 0 8 8 +GENEALOGIES 1 0 1 1 +GAZING 2 0 2 2 +GAZED 2 0 2 2 +GAZE 3 0 3 3 +GAY 1 0 1 1 +GATHERINGS 1 0 1 1 +GATHERING 2 0 2 2 +GATHERED 2 0 2 2 +GATES 6 0 6 6 +GATE 3 0 3 3 +GASPED 1 0 1 1 +GASEOUS 1 0 1 1 +GAS 1 0 1 1 +GARDENS 3 0 3 3 +GARDENING 1 0 1 1 +GARDENER'S 1 0 1 1 +GARDENER 1 0 1 1 +GARDEN 6 0 6 6 +GARB 1 0 1 1 +GAMEWELL'S 1 0 1 1 +GAMEWELL 7 0 7 7 +GAME 4 0 4 4 +GALVANOMETER 1 0 1 1 +GALLOWSNESS 1 0 1 1 +GALLEY 1 0 1 1 +GALLANT 4 0 4 4 +GAIN 2 0 2 2 +GABLES 1 0 1 1 +FUZZY 1 0 1 1 +FUTURISTIC 1 0 1 1 +FUTURE 5 0 5 5 +FURTHERED 1 0 1 1 +FURTHER 9 0 9 9 +FURNITURE 4 0 4 4 +FURNISHING 1 0 1 1 +FURNISHED 1 0 1 1 +FURNISH 1 0 1 1 +FURLED 1 0 1 1 +FURIOUSLY 2 0 2 2 +FUNERAL 1 0 1 1 +FUNDAMENTAL 1 0 1 1 +FUNCTIONS 1 0 1 1 +FUNCTION 1 0 1 1 +FUMBLED 1 0 1 1 +FULLY 3 0 3 3 +FULL 18 0 18 18 +FULFILLED 2 0 2 2 +FUGITIVES 1 0 1 1 +FUGITIVE'S 1 0 1 1 +FRUSTRATED 1 0 1 1 +FRUITS 1 0 1 1 +FRUIT 1 0 1 1 +FROZEN 2 0 2 2 +FROWNINGLY 1 0 1 1 +FROWNING 2 0 2 2 +FROWNED 2 0 2 2 +FROWN 1 0 1 1 +FROTHY 1 0 1 1 +FROST 1 0 1 1 +FRONTISPIECE 1 0 1 1 +FRONTIER 3 0 3 3 +FRONT 6 0 6 6 +FROM 187 0 187 187 +FROLIC 2 0 2 2 +FRO 1 0 1 1 +FRIVOLOUS 2 0 2 2 +FRINGED 2 0 2 2 +FRIGHTFUL 1 0 1 1 +FRIGHTENED 2 0 2 2 +FRIGATE 2 0 2 2 +FRIENDSHIP 1 0 1 1 +FRIENDS 8 0 8 8 +FRIENDLY 5 0 5 5 +FRIEND'S 2 0 2 2 +FRIEND 21 0 21 21 +FRIDAY 1 0 1 1 +FRICTION 1 0 1 1 +FRETTING 1 0 1 1 +FRESHENS 1 0 1 1 +FRESH 6 0 6 6 +FREQUENTLY 3 0 3 3 +FREQUENTER 1 0 1 1 +FREQUENT 3 0 3 3 +FRENZY 1 0 1 1 +FRENCH 11 0 11 11 +FREELY 2 0 2 2 +FREEDOM 4 0 4 4 +FREED 1 0 1 1 +FREE 18 0 18 18 +FRECKLES 1 0 1 1 +FRANKNESS 1 0 1 1 +FRANKLY 1 0 1 1 +FRANCS 1 0 1 1 +FRANCIS 3 0 3 3 +FRANCE 6 0 6 6 +FRAMEWORK 1 0 1 1 +FRAMED 1 0 1 1 +FRAME 4 0 4 4 +FRAIL 1 0 1 1 +FRAGRANCE 1 0 1 1 +FRAGMENT 2 0 2 2 +FRACTURED 1 0 1 1 +FRACTURE 1 0 1 1 +FOX 1 0 1 1 +FOURTH 2 0 2 2 +FOURTEEN 1 0 1 1 +FOUR 12 0 12 12 +FOUNTAINS 1 0 1 1 +FOUNDING 1 0 1 1 +FOUNDATION 1 0 1 1 +FOUL 1 0 1 1 +FOUGHT 1 0 1 1 +FORWARDED 1 0 1 1 +FORWARD 5 0 5 5 +FORTY 2 0 2 2 +FORTUNES 2 0 2 2 +FORTUNE 8 0 8 8 +FORTUNATELY 1 0 1 1 +FORTUNATE 2 0 2 2 +FORTUITOUS 1 0 1 1 +FORTNIGHT 1 0 1 1 +FORTIFIED 2 0 2 2 +FORTHWITH 3 0 3 3 +FORTH 9 0 9 9 +FORT 2 0 2 2 +FORSAKE 1 0 1 1 +FORMS 6 0 6 6 +FORMING 1 0 1 1 +FORMIDABLE 1 0 1 1 +FORMER 7 0 7 7 +FORMED 7 0 7 7 +FORMALLY 3 0 3 3 +FORMALITY 1 0 1 1 +FORMALITIES 1 0 1 1 +FORMAL 1 0 1 1 +FORM 12 0 12 12 +FORKED 1 0 1 1 +FORGOTTEN 4 0 4 4 +FORGOT 1 0 1 1 +FORGIVE 6 0 6 6 +FORGING 1 0 1 1 +FORGETTING 1 0 1 1 +FORGETFULNESS 1 0 1 1 +FORGET 6 0 6 6 +FORGED 3 0 3 3 +FORGE 1 0 1 1 +FORETOLD 2 0 2 2 +FOREST 6 0 6 6 +FORESEEING 1 0 1 1 +FORESAW 1 0 1 1 +FOREIGNER 1 0 1 1 +FOREIGN 1 0 1 1 +FOREHEAD 1 0 1 1 +FOREFINGER 1 0 1 1 +FORCIBLE 1 0 1 1 +FORCES 2 0 2 2 +FORCED 1 0 1 1 +FORCE 17 0 17 17 +FORBES 1 0 1 1 +FOOTNOTE 2 0 2 2 +FOOTMEN 1 0 1 1 +FOOTMAN 1 0 1 1 +FOOTED 1 0 1 1 +FOOT 9 0 9 9 +FOOLS 1 0 1 1 +FOOLISHLY 2 0 2 2 +FOOLISH 3 0 3 3 +FOOD 1 0 1 1 +FONDNESS 1 0 1 1 +FOND 5 0 5 5 +FOLLOWS 3 0 3 3 +FOLLOWING 4 0 4 4 +FOLLOWER 1 0 1 1 +FOLLOWED 15 0 15 15 +FOLLOW 7 0 7 7 +FOLLIES 1 0 1 1 +FOLIAGE 1 0 1 1 +FOLDED 1 0 1 1 +FOLD 2 0 2 2 +FOES 2 0 2 2 +FOCUS 1 0 1 1 +FOAM 4 0 4 4 +FLY 4 0 4 4 +FLUX 2 0 2 2 +FLUSHED 3 0 3 3 +FLUKES 1 0 1 1 +FLOYD'S 1 0 1 1 +FLOWERS 11 0 11 11 +FLOWER 4 0 4 4 +FLOWED 1 0 1 1 +FLOW 2 0 2 2 +FLOURISH 1 0 1 1 +FLOUR 1 0 1 1 +FLOORS 1 0 1 1 +FLOOR 10 0 10 10 +FLOOD 2 0 2 2 +FLOATED 1 0 1 1 +FLOAT 1 0 1 1 +FLINT 1 0 1 1 +FLING 1 0 1 1 +FLIGHT 3 0 3 3 +FLIES 1 0 1 1 +FLICKER 2 0 2 2 +FLEW 1 0 1 1 +FLESH 5 0 5 5 +FLEETING 1 0 1 1 +FLEECY 1 0 1 1 +FLEECES 1 0 1 1 +FLEECE 3 0 3 3 +FLED 3 0 3 3 +FLAX 1 0 1 1 +FLATTERY 1 0 1 1 +FLATTERS 2 0 2 2 +FLATTERING 1 0 1 1 +FLATTERED 1 0 1 1 +FLASHED 3 0 3 3 +FLASH 3 0 3 3 +FLAPS 1 0 1 1 +FLANKED 1 0 1 1 +FLAMES 1 0 1 1 +FLAMED 2 0 2 2 +FLAME 3 0 3 3 +FLAGSTONES 1 0 1 1 +FLAG 1 0 1 1 +FIXES 1 0 1 1 +FIXED 3 0 3 3 +FIX 2 0 2 2 +FIVE 15 0 15 15 +FITZOOTH'S 1 0 1 1 +FITZOOTH 7 0 7 7 +FITTED 2 0 2 2 +FITS 1 0 1 1 +FITLY 1 0 1 1 +FISTS 2 0 2 2 +FISHES 3 0 3 3 +FISHERMAN 1 0 1 1 +FISH 1 0 1 1 +FIRS 1 0 1 1 +FIRMNESS 1 0 1 1 +FIRMLY 2 0 2 2 +FIRM 1 0 1 1 +FIRESIDES 1 0 1 1 +FIRESIDE 1 0 1 1 +FIRES 1 0 1 1 +FIREPLACE 2 0 2 2 +FIRED 1 0 1 1 +FIREBUGS 1 0 1 1 +FIR 9 0 9 9 +FINISHED 4 0 4 4 +FINISH 1 0 1 1 +FINGERS 6 0 6 6 +FINGER 2 0 2 2 +FINEST 1 0 1 1 +FINER 1 0 1 1 +FINELY 1 0 1 1 +FINED 1 0 1 1 +FINE 17 0 17 17 +FINDS 2 0 2 2 +FINDING 3 0 3 3 +FIND 22 0 22 22 +FINANCIAL 1 0 1 1 +FINALLY 8 0 8 8 +FINALE 1 0 1 1 +FINAL 5 0 5 5 +FILLS 2 0 2 2 +FILLED 8 0 8 8 +FILL 2 0 2 2 +FILE 1 0 1 1 +FIGURES 4 0 4 4 +FIGURED 1 0 1 1 +FIGURE 6 0 6 6 +FIGHTING 4 0 4 4 +FIGHTER 1 0 1 1 +FIFTY 6 0 6 6 +FIFTH 1 0 1 1 +FIFTEENTH 2 0 2 2 +FIFTEEN 1 0 1 1 +FIERCELY 1 0 1 1 +FIERCE 4 0 4 4 +FIELDS 4 0 4 4 +FIELD 6 0 6 6 +FEWER 1 0 1 1 +FEW 28 0 28 28 +FEVER 2 0 2 2 +FEUDS 1 0 1 1 +FETCH 1 0 1 1 +FESTIVE 1 0 1 1 +FERVENT 1 0 1 1 +FENCE 1 0 1 1 +FEMININE 1 0 1 1 +FEMALE 1 0 1 1 +FELT 19 0 19 19 +FELLOWSHIP 1 0 1 1 +FELLOWS 3 0 3 3 +FELLOW'S 1 0 1 1 +FELLOW 9 0 9 9 +FELLER 1 0 1 1 +FELLED 1 0 1 1 +FELICITY 2 0 2 2 +FEET 11 0 11 11 +FEES 1 0 1 1 +FEELS 2 0 2 2 +FEELING 11 0 11 11 +FEEL 18 0 18 18 +FEEDING 1 0 1 1 +FEEDER 1 0 1 1 +FEED 2 0 2 2 +FEEBLE 2 0 2 2 +FEDERAL 3 0 3 3 +FED 1 0 1 1 +FEBRUARY 1 0 1 1 +FEATURES 9 0 9 9 +FEATURE 1 0 1 1 +FEATHERS 1 0 1 1 +FEASTED 1 0 1 1 +FEAST 3 0 3 3 +FEASIBLE 1 0 1 1 +FEARS 3 0 3 3 +FEARLESS 1 0 1 1 +FEARING 2 0 2 2 +FEARFUL 1 0 1 1 +FEAREST 1 0 1 1 +FEARED 3 0 3 3 +FEAR 12 0 12 12 +FAVOR 2 0 2 2 +FAULTS 1 0 1 1 +FAULTLESS 1 0 1 1 +FAULT 2 0 2 2 +FATTENED 1 0 1 1 +FATIGUE 2 0 2 2 +FATHOMS 6 0 6 6 +FATHOM 1 0 1 1 +FATHERS 2 0 2 2 +FATHER'S 4 0 4 4 +FATHER 28 0 28 28 +FATALITY 2 0 2 2 +FAT 3 0 3 3 +FASTEST 1 0 1 1 +FASTEN 1 0 1 1 +FAST 7 0 7 7 +FASHIONED 1 0 1 1 +FASHIONABLE 1 0 1 1 +FASCINATION 2 0 2 2 +FARTHER 6 0 6 6 +FARRAGUT 1 0 1 1 +FARMS 1 0 1 1 +FARMHOUSES 1 0 1 1 +FARMERS 1 0 1 1 +FARMER'S 1 0 1 1 +FARMER 5 0 5 5 +FARM 8 0 8 8 +FAREWELL 2 0 2 2 +FAR 30 0 30 30 +FANTASY 1 0 1 1 +FANNING 1 0 1 1 +FANCY 3 0 3 3 +FANCIES 2 0 2 2 +FANCIED 2 0 2 2 +FANATIC 1 0 1 1 +FAN 2 0 2 2 +FAMOUSLY 2 0 2 2 +FAMOUS 3 0 3 3 +FAMILY 16 0 16 16 +FAMILIES 3 0 3 3 +FAMILIARITY 3 0 3 3 +FAMILIAR 4 0 4 4 +FAME 2 0 2 2 +FALSE 6 0 6 6 +FALLS 5 0 5 5 +FALLING 2 0 2 2 +FALLEN 3 0 3 3 +FALL 2 0 2 2 +FALCONS 1 0 1 1 +FALCON 1 0 1 1 +FAITHFUL 1 0 1 1 +FAITH 9 0 9 9 +FAIRVIEW 2 0 2 2 +FAIRLY 4 0 4 4 +FAIR 7 0 7 7 +FAINTNESS 1 0 1 1 +FAINTLY 3 0 3 3 +FAINTING 4 0 4 4 +FAINT 3 0 3 3 +FAIN 1 0 1 1 +FAILURE 2 0 2 2 +FAILS 1 0 1 1 +FAILING 1 0 1 1 +FAILED 2 0 2 2 +FAIL 4 0 4 4 +FADES 1 0 1 1 +FADED 1 0 1 1 +FADE 4 0 4 4 +FACULTIES 1 0 1 1 +FACTS 4 0 4 4 +FACTORS 1 0 1 1 +FACTOR 1 0 1 1 +FACTIONS 2 0 2 2 +FACTION 1 0 1 1 +FACT 23 0 23 23 +FACILITATED 1 0 1 1 +FACILITATE 1 0 1 1 +FACES 4 0 4 4 +FACED 3 0 3 3 +FACE 29 0 29 29 +FABULOUS 1 0 1 1 +EYES 44 0 44 44 +EYELIDS 1 0 1 1 +EYE 15 0 15 15 +EXULTING 1 0 1 1 +EXULTATION 1 0 1 1 +EXTREMELY 2 0 2 2 +EXTREME 1 0 1 1 +EXTRAORDINARY 2 0 2 2 +EXTRACT 1 0 1 1 +EXTRA 1 0 1 1 +EXTINGUISHED 2 0 2 2 +EXTINCTION 1 0 1 1 +EXTINCT 1 0 1 1 +EXTERIOR 1 0 1 1 +EXTENT 6 0 6 6 +EXTENSIVE 1 0 1 1 +EXTENDED 3 0 3 3 +EXTEND 2 0 2 2 +EXQUISITE 3 0 3 3 +EXPULSION 1 0 1 1 +EXPRESSLY 1 0 1 1 +EXPRESSIVE 1 0 1 1 +EXPRESSIONS 1 0 1 1 +EXPRESSION 4 0 4 4 +EXPRESSING 2 0 2 2 +EXPRESSED 4 0 4 4 +EXPRESS 4 0 4 4 +EXPOSURE 1 0 1 1 +EXPONENT 2 0 2 2 +EXPLOSION 1 0 1 1 +EXPLORE 2 0 2 2 +EXPLOITS 1 0 1 1 +EXPLANATION 1 0 1 1 +EXPLAINED 1 0 1 1 +EXPLAIN 4 0 4 4 +EXPERIMENTALLY 1 0 1 1 +EXPERIENCING 1 0 1 1 +EXPERIENCED 1 0 1 1 +EXPERIENCE 7 0 7 7 +EXPENSIVE 1 0 1 1 +EXPENDITURE 4 0 4 4 +EXPEDITION 4 0 4 4 +EXPECTED 3 0 3 3 +EXPECTATIONS 2 0 2 2 +EXPECT 4 0 4 4 +EXPANDED 1 0 1 1 +EXOTICS 1 0 1 1 +EXISTING 3 0 3 3 +EXISTENT 1 0 1 1 +EXISTENCE 5 0 5 5 +EXISTED 4 0 4 4 +EXILE 1 0 1 1 +EXHORT 1 0 1 1 +EXHIBITS 1 0 1 1 +EXHIBITION 2 0 2 2 +EXHIBITED 1 0 1 1 +EXHIBIT 2 0 2 2 +EXHAUSTED 1 0 1 1 +EXHALE 1 0 1 1 +EXERTIONS 1 0 1 1 +EXERTED 1 0 1 1 +EXERCISING 1 0 1 1 +EXERCISED 1 0 1 1 +EXERCISE 1 0 1 1 +EXEMPLIFIES 1 0 1 1 +EXEMPLARY 1 0 1 1 +EXECUTIVE 1 0 1 1 +EXECUTED 1 0 1 1 +EXCLUDED 2 0 2 2 +EXCLAIMED 3 0 3 3 +EXCITING 2 0 2 2 +EXCITEMENT 3 0 3 3 +EXCITE 1 0 1 1 +EXCESSIVELY 1 0 1 1 +EXCESS 1 0 1 1 +EXCEPTIONS 1 0 1 1 +EXCEPTION 2 0 2 2 +EXCEPT 6 0 6 6 +EXCELLENT 5 0 5 5 +EXCELLENCY'S 1 0 1 1 +EXCELLENCY 2 0 2 2 +EXCELLENCE 1 0 1 1 +EXCEEDING 1 0 1 1 +EXCEEDED 1 0 1 1 +EXCEED 1 0 1 1 +EXAMPLE 2 0 2 2 +EXAMINED 4 0 4 4 +EXAMINE 4 0 4 4 +EXAMINATION 8 0 8 8 +EXALTED 1 0 1 1 +EXALT 1 0 1 1 +EXAGGERATED 1 0 1 1 +EXACTLY 8 0 8 8 +EXACT 5 0 5 5 +EX 2 0 2 2 +EVOLVING 1 0 1 1 +EVOLVED 1 0 1 1 +EVOKED 1 0 1 1 +EVOKE 1 0 1 1 +EVIL 6 0 6 6 +EVIDENTLY 4 0 4 4 +EVIDENT 5 0 5 5 +EVIDENCE 5 0 5 5 +EVERYWHERE 7 0 7 7 +EVERYTHING 16 0 16 16 +EVERYDAY 3 0 3 3 +EVERYBODY 7 0 7 7 +EVERLASTING 2 0 2 2 +EVENTS 8 0 8 8 +EVENT 4 0 4 4 +EVENLY 2 0 2 2 +EVENING 15 0 15 15 +EVEN 51 0 51 51 +EVASIVELY 1 0 1 1 +EVAPORATION 1 0 1 1 +EVADED 1 0 1 1 +EVA'S 1 0 1 1 +EVA 3 0 3 3 +EUROPE 3 0 3 3 +EUCHARIST 1 0 1 1 +ETHICAL 1 0 1 1 +ETERNITY 2 0 2 2 +ETERNAL 2 0 2 2 +ETCHINGS 1 0 1 1 +ET 3 0 3 3 +ESTRANGEMENT 1 0 1 1 +ESTIMATE 1 0 1 1 +ESTATE 3 0 3 3 +ESTABLISHED 3 0 3 3 +ESSEX 1 0 1 1 +ESSENTIALLY 1 0 1 1 +ESSENTIAL 2 0 2 2 +ESSENCE 1 0 1 1 +ESQUIRE 2 0 2 2 +ESPECIALLY 6 0 6 6 +ESCORT 4 0 4 4 +ESCAPED 1 0 1 1 +ESCAPE 4 0 4 4 +ESCAPADES 1 0 1 1 +ERRORS 1 0 1 1 +ERRONEOUS 2 0 2 2 +ERRING 1 0 1 1 +ERRAND 2 0 2 2 +ERR 1 0 1 1 +ERNEST 1 0 1 1 +ERIE 1 0 1 1 +ERECTED 1 0 1 1 +ERECT 1 0 1 1 +ERA 1 0 1 1 +EQUIP 1 0 1 1 +EQUATION 1 0 1 1 +EQUALS 1 0 1 1 +EQUALLY 4 0 4 4 +EQUAL 2 0 2 2 +EPOCH 1 0 1 1 +EPISTLE 3 0 3 3 +EPISODE 1 0 1 1 +ENVY 1 0 1 1 +ENVIRONMENT 1 0 1 1 +ENVELOPMENT 1 0 1 1 +ENTRENCHED 1 0 1 1 +ENTREATIES 1 0 1 1 +ENTRANCED 1 0 1 1 +ENTRANCE 4 0 4 4 +ENTIRELY 6 0 6 6 +ENTIRE 2 0 2 2 +ENTHUSIASTS 1 0 1 1 +ENTHUSIASTIC 1 0 1 1 +ENTHUSIASM 1 0 1 1 +ENTERTAINS 1 0 1 1 +ENTERTAINMENT 3 0 3 3 +ENTERTAIN 2 0 2 2 +ENTERS 1 0 1 1 +ENTERPRISE 2 0 2 2 +ENTERING 2 0 2 2 +ENTANGLED 1 0 1 1 +ENQUIRED 1 0 1 1 +ENOUGH 20 0 20 20 +ENORMOUSLY 2 0 2 2 +ENORMOUS 1 0 1 1 +ENNIS 1 0 1 1 +ENLISTED 1 0 1 1 +ENLIST 1 0 1 1 +ENJOYMENT 1 0 1 1 +ENJOYED 1 0 1 1 +ENJOY 2 0 2 2 +ENIGMA 1 0 1 1 +ENGORGED 1 0 1 1 +ENGLISHMAN 3 0 3 3 +ENGLISH 12 0 12 12 +ENGLAND 10 0 10 10 +ENGINEERS 2 0 2 2 +ENGINEER 4 0 4 4 +ENGINE 6 0 6 6 +ENGENDERS 1 0 1 1 +ENGAGEMENTS 1 0 1 1 +ENGAGED 5 0 5 5 +ENGAGE 1 0 1 1 +ENFORCED 1 0 1 1 +ENFORCE 3 0 3 3 +ENERGY 3 0 3 3 +ENEMY'S 1 0 1 1 +ENEMY 3 0 3 3 +ENEMIES 3 0 3 3 +ENDURES 1 0 1 1 +ENDURED 1 0 1 1 +ENDURE 1 0 1 1 +ENDS 1 0 1 1 +ENDOWED 1 0 1 1 +ENDLESS 1 0 1 1 +ENDEAVOURED 1 0 1 1 +ENDEAVOUR 1 0 1 1 +ENDEAVORING 1 0 1 1 +ENDEAVOR 1 0 1 1 +END 18 0 18 18 +ENCYCLOPAEDIA 1 0 1 1 +ENCOURAGED 1 0 1 1 +ENCOURAGE 2 0 2 2 +ENCLOSE 1 0 1 1 +ENACTED 1 0 1 1 +ENABLES 2 0 2 2 +ENABLED 3 0 3 3 +EMULATION 1 0 1 1 +EMPTY 7 0 7 7 +EMPLOYMENTS 2 0 2 2 +EMPLOYMENT 1 0 1 1 +EMPLOYING 1 0 1 1 +EMPLOYERS 1 0 1 1 +EMPLOYER 1 0 1 1 +EMPLOYED 2 0 2 2 +EMPIRE 2 0 2 2 +EMPEROR 1 0 1 1 +EMOTIONS 2 0 2 2 +EMOTIONLESS 1 0 1 1 +EMOTION 1 0 1 1 +EMISSIONS 1 0 1 1 +EMINENCES 1 0 1 1 +EMIGRATION 1 0 1 1 +EMIGRANT 1 0 1 1 +EMERGENCY 1 0 1 1 +EMERGENCE 2 0 2 2 +EMERG'D 1 0 1 1 +EMERALD 1 0 1 1 +EMBROIDERED 2 0 2 2 +EMBRACING 2 0 2 2 +EMBRACE 2 0 2 2 +EMBODIED 1 0 1 1 +EMBLEM 1 0 1 1 +EMBITTERING 1 0 1 1 +EMBITTERED 1 0 1 1 +EMBERS 1 0 1 1 +EMBARRASS 1 0 1 1 +ELSEWHERE 4 0 4 4 +ELSE 7 0 7 7 +ELOQUENT 1 0 1 1 +ELMHURST 2 0 2 2 +ELIZABETH 1 0 1 1 +ELIZA 3 0 3 3 +ELICITED 1 0 1 1 +ELF 1 0 1 1 +ELEVEN 4 0 4 4 +ELEMENTS 7 0 7 7 +ELEMENTARY 3 0 3 3 +ELEMENT 4 0 4 4 +ELEGANT 1 0 1 1 +ELEGANCE 2 0 2 2 +ELECTROPLATING 1 0 1 1 +ELECTROLYTIC 2 0 2 2 +ELECTRICITY 5 0 5 5 +ELECTRICAL 2 0 2 2 +ELECTRIC 4 0 4 4 +ELECTIONEERING 1 0 1 1 +ELECTION 3 0 3 3 +ELECTING 1 0 1 1 +ELECTED 1 0 1 1 +ELECT 2 0 2 2 +ELDERS 1 0 1 1 +ELDERLY 1 0 1 1 +ELDER 2 0 2 2 +ELAPSED 1 0 1 1 +ELABORATE 3 0 3 3 +EJACULATIONS 1 0 1 1 +EJACULATED 2 0 2 2 +EITHER'S 1 0 1 1 +EITHER 8 0 8 8 +EIGHTY 3 0 3 3 +EIGHTH 3 0 3 3 +EIGHTEENTH 2 0 2 2 +EIGHTEEN 14 0 14 14 +EIGHT 7 0 7 7 +EGYPTIAN 1 0 1 1 +EGYPT 1 0 1 1 +EFFORTS 4 0 4 4 +EFFORT 11 0 11 11 +EFFICIENCY 1 0 1 1 +EFFECTUALLY 1 0 1 1 +EFFECTUAL 2 0 2 2 +EFFECTS 3 0 3 3 +EFFECTIVENESS 1 0 1 1 +EFFECTIVELY 1 0 1 1 +EFFECTIVE 2 0 2 2 +EFFECTING 1 0 1 1 +EFFECTED 2 0 2 2 +EDWARD 3 0 3 3 +EDUCATION 5 0 5 5 +EDUCATED 2 0 2 2 +EDITORS 1 0 1 1 +EDITOR 1 0 1 1 +EDITION 1 0 1 1 +EDISONIA 1 0 1 1 +EDISON 16 0 16 16 +EDIFICE 1 0 1 1 +EDICTS 1 0 1 1 +EDICT 1 0 1 1 +EDGES 1 0 1 1 +EDGED 1 0 1 1 +EDGE 5 0 5 5 +EDDYING 1 0 1 1 +ECONOMY 2 0 2 2 +ECONOMIC 3 0 3 3 +ECHOING 1 0 1 1 +ECHOES 1 0 1 1 +ECCLESIASTICAL 1 0 1 1 +ECCENTRICITY 1 0 1 1 +EATEN 1 0 1 1 +EAT 5 0 5 5 +EASY 14 0 14 14 +EASTWARDS 1 0 1 1 +EASTERLY'S 1 0 1 1 +EASILY 10 0 10 10 +EASE 3 0 3 3 +EARTHLY 1 0 1 1 +EARTH'S 3 0 3 3 +EARTH 17 0 17 17 +EARSHOT 1 0 1 1 +EARS 5 0 5 5 +EARNESTLY 1 0 1 1 +EARNEST 4 0 4 4 +EARNED 1 0 1 1 +EARLY 13 0 13 13 +EARLIER 6 0 6 6 +EAR 6 0 6 6 +EAGLES 1 0 1 1 +EAGLE 1 0 1 1 +EAGERNESS 2 0 2 2 +EAGERLY 1 0 1 1 +EAGER 4 0 4 4 +EACH 24 0 24 24 +DYNAMO 2 0 2 2 +DYING 2 0 2 2 +DYIN 1 0 1 1 +DYE 1 0 1 1 +DWELLINGS 3 0 3 3 +DWELL 1 0 1 1 +DUTY 7 0 7 7 +DUTIES 8 0 8 8 +DUST 4 0 4 4 +DUSK 2 0 2 2 +DURING 12 0 12 12 +DURATION 1 0 1 1 +DURABLE 1 0 1 1 +DUPLICATE 1 0 1 1 +DUMPY 2 0 2 2 +DUMBFOUNDED 1 0 1 1 +DUMB 1 0 1 1 +DULY 1 0 1 1 +DULL 2 0 2 2 +DUG 1 0 1 1 +DUE 5 0 5 5 +DUDLEY 1 0 1 1 +DUDGEON 1 0 1 1 +DUCKS 1 0 1 1 +DUCKLINGS 1 0 1 1 +DUCHESS 3 0 3 3 +DUBIOUSLY 1 0 1 1 +DRY 4 0 4 4 +DRUNKENNESS 1 0 1 1 +DRUMS 1 0 1 1 +DROWNED 2 0 2 2 +DROPS 3 0 3 3 +DROPPING 1 0 1 1 +DROPPED 5 0 5 5 +DROP 3 0 3 3 +DROOPED 1 0 1 1 +DRIVING 1 0 1 1 +DRIVES 1 0 1 1 +DRIVEN 1 0 1 1 +DRIPPING 1 0 1 1 +DRINKS 1 0 1 1 +DRINK 4 0 4 4 +DRIFTS 1 0 1 1 +DRIED 1 0 1 1 +DREW 10 0 10 10 +DRESSES 1 0 1 1 +DRESSED 6 0 6 6 +DRESS 1 0 1 1 +DREDGED 1 0 1 1 +DREARY 1 0 1 1 +DREAMY 1 0 1 1 +DREAMT 1 0 1 1 +DREAMS 2 0 2 2 +DREAMING 2 0 2 2 +DREAMED 1 0 1 1 +DREAM 5 0 5 5 +DREADING 1 0 1 1 +DREADFUL 3 0 3 3 +DRAWN 7 0 7 7 +DRAWING 2 0 2 2 +DRAW 4 0 4 4 +DRAUGHT 1 0 1 1 +DRAPERIES 1 0 1 1 +DRANK 1 0 1 1 +DRAMATIST'S 1 0 1 1 +DRAMATIST 1 0 1 1 +DRAMATIC 2 0 2 2 +DRAMA 1 0 1 1 +DRAINED 1 0 1 1 +DRAIN 1 0 1 1 +DRAGOONS 1 0 1 1 +DRAGON'S 1 0 1 1 +DRAGGING 1 0 1 1 +DRAGGED 2 0 2 2 +DRAG 1 0 1 1 +DOZEN 4 0 4 4 +DOWNWARD 2 0 2 2 +DOVE 1 0 1 1 +DOUGLAS 4 0 4 4 +DOUGHY 1 0 1 1 +DOUGHNUTS 1 0 1 1 +DOUGH 1 0 1 1 +DOUBTS 2 0 2 2 +DOUBTLESS 2 0 2 2 +DOUBTINGLY 1 0 1 1 +DOUBTING 1 0 1 1 +DOUBTFULLY 1 0 1 1 +DOUBTFUL 2 0 2 2 +DOUBT 11 0 11 11 +DOUBLE 6 0 6 6 +DOTH 5 0 5 5 +DOST 3 0 3 3 +DOROTHY 1 0 1 1 +DORKING 1 0 1 1 +DORCAS 6 0 6 6 +DOORS 3 0 3 3 +DOOM 1 0 1 1 +DONNITHORNE 1 0 1 1 +DONKEY 1 0 1 1 +DONE 24 0 24 24 +DONATISTS 2 0 2 2 +DONA 2 0 2 2 +DON'T 38 0 38 38 +DOMINIONS 1 0 1 1 +DOMINION 1 0 1 1 +DOMESTIC 2 0 2 2 +DOME 1 0 1 1 +DOLLS 2 0 2 2 +DOLLARS 7 0 7 7 +DOLL 2 0 2 2 +DOING 12 0 12 12 +DOGS 1 0 1 1 +DOGGED 1 0 1 1 +DOG 2 0 2 2 +DOESN'T 3 0 3 3 +DOES 14 0 14 14 +DOCTRINES 3 0 3 3 +DOCTRINE 4 0 4 4 +DOCTRESS 1 0 1 1 +DOCTORS 1 0 1 1 +DOCTOR 7 0 7 7 +DOBRYNA 3 0 3 3 +DIZZILY 1 0 1 1 +DIVORCE 1 0 1 1 +DIVISIONS 1 0 1 1 +DIVISION 2 0 2 2 +DIVINITY 1 0 1 1 +DIVING 4 0 4 4 +DIVINE 3 0 3 3 +DIVIDING 1 0 1 1 +DIVIDEND 1 0 1 1 +DIVIDED 4 0 4 4 +DIVIDE 2 0 2 2 +DIVERTING 1 0 1 1 +DIVERT 1 0 1 1 +DIVERSITY 1 0 1 1 +DISUSE 1 0 1 1 +DISUNITED 1 0 1 1 +DISTURBING 1 0 1 1 +DISTURBANCE 1 0 1 1 +DISTURB 2 0 2 2 +DISTRUSTING 2 0 2 2 +DISTRUSTFUL 1 0 1 1 +DISTRUST 1 0 1 1 +DISTRESSED 1 0 1 1 +DISTRESS 1 0 1 1 +DISTORTION 1 0 1 1 +DISTORTED 1 0 1 1 +DISTINGUISH 3 0 3 3 +DISTINCTLY 3 0 3 3 +DISTINCTIVE 1 0 1 1 +DISTINCTION 5 0 5 5 +DISTINCT 2 0 2 2 +DISTANT 4 0 4 4 +DISTANCE 6 0 6 6 +DISSENTERS 1 0 1 1 +DISSENTED 1 0 1 1 +DISSENT 2 0 2 2 +DISQUIETUDE 1 0 1 1 +DISPUTE 3 0 3 3 +DISPOSITIONS 1 0 1 1 +DISPOSITION 3 0 3 3 +DISPOSES 1 0 1 1 +DISPOSED 3 0 3 3 +DISPOSE 1 0 1 1 +DISPOSAL 1 0 1 1 +DISPLEASURE 1 0 1 1 +DISPLAYING 1 0 1 1 +DISPLAYED 1 0 1 1 +DISPLAY 1 0 1 1 +DISPERSED 3 0 3 3 +DISPENSE 1 0 1 1 +DISPENSATION 1 0 1 1 +DISPATCH 1 0 1 1 +DISPASSIONATE 1 0 1 1 +DISPARAGE 1 0 1 1 +DISOWN 1 0 1 1 +DISMISS 2 0 2 2 +DISMAYED 1 0 1 1 +DISMAY 1 0 1 1 +DISLOYAL 1 0 1 1 +DISLIKE 1 0 1 1 +DISK 1 0 1 1 +DISINCORPORATED 1 0 1 1 +DISHONOURED 1 0 1 1 +DISHES 2 0 2 2 +DISH 3 0 3 3 +DISGUSTED 1 0 1 1 +DISGUST 3 0 3 3 +DISGUISE 2 0 2 2 +DISGRACE 3 0 3 3 +DISENGAGED 1 0 1 1 +DISEASED 1 0 1 1 +DISDAINFUL 1 0 1 1 +DISCUSSION 1 0 1 1 +DISCUSSED 1 0 1 1 +DISCUSS 2 0 2 2 +DISCREETLY 1 0 1 1 +DISCREET 1 0 1 1 +DISCOVERY 2 0 2 2 +DISCOVERERS 1 0 1 1 +DISCOVERED 3 0 3 3 +DISCOVER 3 0 3 3 +DISCOURSE 2 0 2 2 +DISCOURAGED 1 0 1 1 +DISCOURAGE 2 0 2 2 +DISCOMFORT 1 0 1 1 +DISCOLOURED 1 0 1 1 +DISCLOSES 1 0 1 1 +DISCIPLINE 5 0 5 5 +DISCERN 1 0 1 1 +DISBURDENED 1 0 1 1 +DISASTER 1 0 1 1 +DISAPPOINTMENT 6 0 6 6 +DISAPPEARS 1 0 1 1 +DISAPPEARED 1 0 1 1 +DISAPPEAR 2 0 2 2 +DISADVANTAGE 1 0 1 1 +DISABILITIES 1 0 1 1 +DIRTY 1 0 1 1 +DIRECTLY 4 0 4 4 +DIRECTIONS 2 0 2 2 +DIRECTION 6 0 6 6 +DIRECTING 1 0 1 1 +DIRECTED 2 0 2 2 +DIRECT 8 0 8 8 +DIP 1 0 1 1 +DIOCLETIAN 1 0 1 1 +DINNER 8 0 8 8 +DINING 1 0 1 1 +DINE 1 0 1 1 +DINAH'S 1 0 1 1 +DINAH 2 0 2 2 +DIMLY 1 0 1 1 +DIMINUTION 2 0 2 2 +DIMINISH 1 0 1 1 +DIMENSIONS 1 0 1 1 +DIM 2 0 2 2 +DILIGENTLY 1 0 1 1 +DILEMMA 1 0 1 1 +DILATED 1 0 1 1 +DIGNITY 4 0 4 4 +DIGNITARIES 1 0 1 1 +DIGNIFIED 4 0 4 4 +DIGBY 1 0 1 1 +DIG 1 0 1 1 +DIFFUSED 1 0 1 1 +DIFFICULTIES 3 0 3 3 +DIFFICULT 11 0 11 11 +DIFFERS 2 0 2 2 +DIFFERENTLY 1 0 1 1 +DIFFERENTIATION 1 0 1 1 +DIFFERENT 15 0 15 15 +DIFFERENCES 2 0 2 2 +DIFFERENCE 5 0 5 5 +DIFFER 1 0 1 1 +DIES 1 0 1 1 +DIED 5 0 5 5 +DIE 4 0 4 4 +DIDN'T 12 0 12 12 +DICE 1 0 1 1 +DIATRIBE 1 0 1 1 +DIAMONDS 1 0 1 1 +DIALOGUES 2 0 2 2 +DIALOGUE 3 0 3 3 +DIALECT 1 0 1 1 +DIAGRAMS 1 0 1 1 +DEWS 2 0 2 2 +DEW 2 0 2 2 +DEVOUR 3 0 3 3 +DEVOTION 1 0 1 1 +DEVOTES 1 0 1 1 +DEVOTED 2 0 2 2 +DEVOTE 1 0 1 1 +DEVOLVE 1 0 1 1 +DEVOID 1 0 1 1 +DEVISING 1 0 1 1 +DEVIL 1 0 1 1 +DEVICES 2 0 2 2 +DEVELOPMENTS 1 0 1 1 +DEVELOPMENT 6 0 6 6 +DEVELOPED 2 0 2 2 +DEVELOP 1 0 1 1 +DETOUR 1 0 1 1 +DETESTS 1 0 1 1 +DETESTED 1 0 1 1 +DETESTABLE 1 0 1 1 +DETERMINING 1 0 1 1 +DETERMINATION 1 0 1 1 +DETECT 1 0 1 1 +DETAINED 2 0 2 2 +DETAILS 2 0 2 2 +DETAIL 1 0 1 1 +DETACHMENT 1 0 1 1 +DESTRUCTIVE 1 0 1 1 +DESTRUCTION 2 0 2 2 +DESTROYED 1 0 1 1 +DESTINY 1 0 1 1 +DESTINED 2 0 2 2 +DESPITE 1 0 1 1 +DESPISE 1 0 1 1 +DESPERATELY 1 0 1 1 +DESPERATE 3 0 3 3 +DESPAIRING 1 0 1 1 +DESPAIR 4 0 4 4 +DESOLATION 1 0 1 1 +DESOLATE 1 0 1 1 +DESK 2 0 2 2 +DESIROUS 1 0 1 1 +DESIRES 1 0 1 1 +DESIRED 3 0 3 3 +DESIRE 4 0 4 4 +DESIRABLE 1 0 1 1 +DESIGNERS 1 0 1 1 +DESIGNATED 1 0 1 1 +DESIGN 3 0 3 3 +DESERVES 1 0 1 1 +DESERVED 1 0 1 1 +DESERVE 1 0 1 1 +DESERTS 1 0 1 1 +DESERTED 1 0 1 1 +DESERT 1 0 1 1 +DESCRIPTIONS 1 0 1 1 +DESCRIPTION 2 0 2 2 +DESCRIBING 1 0 1 1 +DESCRIBED 2 0 2 2 +DESCRIBE 3 0 3 3 +DESCENT 2 0 2 2 +DESCENDS 1 0 1 1 +DESCENDING 2 0 2 2 +DESCENDED 2 0 2 2 +DESCENDANTS 1 0 1 1 +DESCEND 4 0 4 4 +DERIVED 1 0 1 1 +DERIVATIVE 1 0 1 1 +DEPUTY 1 0 1 1 +DEPTHS 2 0 2 2 +DEPTH 4 0 4 4 +DEPRIVED 1 0 1 1 +DEPRESSION 1 0 1 1 +DEPRESSED 1 0 1 1 +DEPRECIATING 1 0 1 1 +DEPRECATION 3 0 3 3 +DEPRAVED 1 0 1 1 +DEPOSITION 1 0 1 1 +DEPORTMENT 1 0 1 1 +DEPENDS 1 0 1 1 +DEPENDENT 2 0 2 2 +DEPENDENCE 1 0 1 1 +DEPEND 2 0 2 2 +DEPARTURE 4 0 4 4 +DEPARTMENT 1 0 1 1 +DEPARTING 2 0 2 2 +DEPARTED 1 0 1 1 +DEPART 1 0 1 1 +DENY 5 0 5 5 +DENUNCIATION 1 0 1 1 +DENSITY 1 0 1 1 +DENSELY 1 0 1 1 +DENSE 2 0 2 2 +DENOTE 1 0 1 1 +DENIES 1 0 1 1 +DENIED 2 0 2 2 +DEMURELY 1 0 1 1 +DEMURE 1 0 1 1 +DEMONSTRATION 1 0 1 1 +DEMOCRATIC 2 0 2 2 +DEMANDS 1 0 1 1 +DEMANDED 2 0 2 2 +DELUSIVE 1 0 1 1 +DELLA 1 0 1 1 +DELIVERY 2 0 2 2 +DELIVERING 1 0 1 1 +DELIVERED 2 0 2 2 +DELIGHTFUL 4 0 4 4 +DELIGHTED 5 0 5 5 +DELIGHT 4 0 4 4 +DELICIOUSNESS 1 0 1 1 +DELICIOUS 1 0 1 1 +DELICATE 4 0 4 4 +DELICACY 1 0 1 1 +DELIBERATIONS 1 0 1 1 +DELIBERATION 1 0 1 1 +DELEGATED 1 0 1 1 +DELAYED 2 0 2 2 +DELAY 3 0 3 3 +DELAWARES 1 0 1 1 +DELAWARE 1 0 1 1 +DEIGNED 1 0 1 1 +DEIGN 1 0 1 1 +DEGREES 3 0 3 3 +DEGREE 6 0 6 6 +DEFYING 1 0 1 1 +DEFTLY 1 0 1 1 +DEFORMITIES 1 0 1 1 +DEFINITION 3 0 3 3 +DEFINITE 2 0 2 2 +DEFINES 1 0 1 1 +DEFINED 2 0 2 2 +DEFINE 5 0 5 5 +DEFIED 1 0 1 1 +DEFIANCE 2 0 2 2 +DEFERENCE 2 0 2 2 +DEFENDS 1 0 1 1 +DEFENDERS 1 0 1 1 +DEFENDED 1 0 1 1 +DEFENDANT 1 0 1 1 +DEFECT 1 0 1 1 +DEER 3 0 3 3 +DEEPLY 4 0 4 4 +DEEPENING 1 0 1 1 +DEEP 11 0 11 11 +DEEDS 1 0 1 1 +DEED 3 0 3 3 +DECREES 1 0 1 1 +DECREED 1 0 1 1 +DECREE 1 0 1 1 +DECORATIVE 1 0 1 1 +DECORATED 3 0 3 3 +DECOMPOSE 1 0 1 1 +DECLINING 2 0 2 2 +DECLINED 2 0 2 2 +DECLARES 1 0 1 1 +DECLARED 2 0 2 2 +DECLARE 2 0 2 2 +DECLARATION 1 0 1 1 +DECK 1 0 1 1 +DECISION 3 0 3 3 +DECIDEDLY 1 0 1 1 +DECIDED 3 0 3 3 +DECIDE 4 0 4 4 +DECEPTIVE 1 0 1 1 +DECENCY 4 0 4 4 +DECEMBER 1 0 1 1 +DECEIVING 1 0 1 1 +DECEIVED 1 0 1 1 +DECEIT 1 0 1 1 +DECANTERS 1 0 1 1 +DECADES 1 0 1 1 +DEBATE 1 0 1 1 +DEATH 19 0 19 19 +DEARS 1 0 1 1 +DEARLY 2 0 2 2 +DEAREST 2 0 2 2 +DEAR 22 0 22 22 +DEALER 1 0 1 1 +DEAL 10 0 10 10 +DEAF 1 0 1 1 +DAZZLING 2 0 2 2 +DAZED 1 0 1 1 +DAYS 16 0 16 16 +DAYLIGHT 2 0 2 2 +DAWN'S 1 0 1 1 +DAWN 2 0 2 2 +DAVID 8 0 8 8 +DAUNTLESS 1 0 1 1 +DAUGHTERS 1 0 1 1 +DAUGHTER 9 0 9 9 +DATING 1 0 1 1 +DATE 1 0 1 1 +DATA 2 0 2 2 +DARTED 3 0 3 3 +DARLING 1 0 1 1 +DARKNESS 3 0 3 3 +DARKENED 1 0 1 1 +DARK 17 0 17 17 +DARING 2 0 2 2 +DARED 3 0 3 3 +DARE 3 0 3 3 +DANGERS 1 0 1 1 +DANGEROUS 4 0 4 4 +DANGER 9 0 9 9 +DANES 1 0 1 1 +DANCING 2 0 2 2 +DANCES 1 0 1 1 +DANCERS 1 0 1 1 +DANCER 1 0 1 1 +DANCED 2 0 2 2 +DANCE 4 0 4 4 +DAMSEL 1 0 1 1 +DAMNED 1 0 1 1 +DAMNABLE 1 0 1 1 +DAMASK 1 0 1 1 +DAMAGING 1 0 1 1 +DAMAGE 2 0 2 2 +DAIRY 5 0 5 5 +DAINTY 1 0 1 1 +DAILY 3 0 3 3 +DAFT 1 0 1 1 +D 2 0 2 2 +CYRIL 2 0 2 2 +CYPRESS 1 0 1 1 +CYNTHIA'S 1 0 1 1 +CYNTHIA 3 0 3 3 +CYMBALS 1 0 1 1 +CUTTINGS 1 0 1 1 +CUTTERS 1 0 1 1 +CUT 5 0 5 5 +CUSTOMS 1 0 1 1 +CUSTOMARILY 1 0 1 1 +CUSTOM 2 0 2 2 +CUSTODY 2 0 2 2 +CUSHIONED 1 0 1 1 +CURVE 2 0 2 2 +CURTAINS 1 0 1 1 +CURTAIN 3 0 3 3 +CURSE 1 0 1 1 +CURRENT 8 0 8 8 +CURLY 1 0 1 1 +CURIOUSLY 1 0 1 1 +CURIOUS 4 0 4 4 +CURIOSITY 2 0 2 2 +CURBSTONE 1 0 1 1 +CUPS 1 0 1 1 +CUPBOARD 2 0 2 2 +CUP 3 0 3 3 +CUNNING 3 0 3 3 +CUMBERLAND'S 1 0 1 1 +CULTURE 4 0 4 4 +CULTIVATING 1 0 1 1 +CULTIVATE 1 0 1 1 +CULPRIT 1 0 1 1 +CULMINATING 2 0 2 2 +CUFFS 1 0 1 1 +CUB 1 0 1 1 +CRYSTALLIZE 1 0 1 1 +CRYSTAL 4 0 4 4 +CRYING 1 0 1 1 +CRY 5 0 5 5 +CRUSHED 1 0 1 1 +CRUSH 3 0 3 3 +CRUMBLED 1 0 1 1 +CRUMBLE 1 0 1 1 +CRUISING 1 0 1 1 +CRUELTY 2 0 2 2 +CRUEL 1 0 1 1 +CRUCIFIX 2 0 2 2 +CRUCIFIED 1 0 1 1 +CROWNS 1 0 1 1 +CROWNING 2 0 2 2 +CROWN 6 0 6 6 +CROWDING 1 0 1 1 +CROWDED 2 0 2 2 +CROWD 5 0 5 5 +CROSSTREES 1 0 1 1 +CROSSLY 1 0 1 1 +CROSSING 1 0 1 1 +CROSSED 3 0 3 3 +CROPS 1 0 1 1 +CROP 1 0 1 1 +CROOKED 3 0 3 3 +CRITICISM 1 0 1 1 +CRITICALLY 1 0 1 1 +CRISIS 1 0 1 1 +CRIMSON 1 0 1 1 +CRIMINAL 3 0 3 3 +CRIME 3 0 3 3 +CRIES 3 0 3 3 +CRIED 23 0 23 23 +CRESTED 1 0 1 1 +CREPT 1 0 1 1 +CREEPING 2 0 2 2 +CREEP 1 0 1 1 +CREEK 2 0 2 2 +CREED 1 0 1 1 +CREDIT 2 0 2 2 +CREATURES 2 0 2 2 +CREATURE 8 0 8 8 +CREATOR 1 0 1 1 +CREATIVE 1 0 1 1 +CREATIONS 1 0 1 1 +CREATION 2 0 2 2 +CREATING 1 0 1 1 +CREATES 1 0 1 1 +CREATED 2 0 2 2 +CREATE 3 0 3 3 +CREAM 1 0 1 1 +CREAKED 1 0 1 1 +CRAZY 2 0 2 2 +CRAWLING 1 0 1 1 +CRAWL 1 0 1 1 +CRASWELLER 1 0 1 1 +CRANED 1 0 1 1 +CRANE 1 0 1 1 +CRAMPED 1 0 1 1 +CRACKING 1 0 1 1 +CRACKED 2 0 2 2 +COWARDLY 1 0 1 1 +COWARD 1 0 1 1 +COW 2 0 2 2 +COVERT 1 0 1 1 +COVERING 1 0 1 1 +COVERED 2 0 2 2 +COVER 2 0 2 2 +COVENANTERS 5 0 5 5 +COUSINS 3 0 3 3 +COUSIN'S 2 0 2 2 +COUSIN 7 0 7 7 +COURTS 2 0 2 2 +COURTIERS 2 0 2 2 +COURTESY 2 0 2 2 +COURT'S 2 0 2 2 +COURSE 19 0 19 19 +COURAGEOUS 1 0 1 1 +COURAGE 6 0 6 6 +COUPLE 1 0 1 1 +COUNTY 7 0 7 7 +COUNTRIES 1 0 1 1 +COUNTING 1 0 1 1 +COUNTERPART 1 0 1 1 +COUNTERFEITED 1 0 1 1 +COUNTERACT 1 0 1 1 +COUNTENANCE 3 0 3 3 +COUNT 15 0 15 15 +COUNSELS 1 0 1 1 +COUNSELLED 2 0 2 2 +COUNSEL 1 0 1 1 +COULDN'T 6 0 6 6 +COUCH 1 0 1 1 +COTTAGE 2 0 2 2 +COSTUME 2 0 2 2 +CORRUPTION 1 0 1 1 +CORRIDOR 1 0 1 1 +CORRESPONDING 1 0 1 1 +CORRESPOND 1 0 1 1 +CORRECTLY 1 0 1 1 +CORRECTED 1 0 1 1 +CORRECT 3 0 3 3 +CORNERS 4 0 4 4 +CORNER 13 0 13 13 +CORN 5 0 5 5 +CORDIALLY 1 0 1 1 +CORDIALITY 1 0 1 1 +CORALIE 1 0 1 1 +CORAL 1 0 1 1 +COQUETRY 1 0 1 1 +COPY 2 0 2 2 +COPPER 1 0 1 1 +COPIED 2 0 2 2 +COOKERY 2 0 2 2 +COOKED 1 0 1 1 +COOK 2 0 2 2 +CONVULSION 1 0 1 1 +CONVIVIALITY 1 0 1 1 +CONVINCING 2 0 2 2 +CONVINCED 2 0 2 2 +CONVICTIONS 2 0 2 2 +CONVICTION 2 0 2 2 +CONVEYED 1 0 1 1 +CONVEXITY 1 0 1 1 +CONVERTS 1 0 1 1 +CONVERSION 1 0 1 1 +CONVERSING 1 0 1 1 +CONVERSE 2 0 2 2 +CONVERSATIONS 1 0 1 1 +CONVERSATIONAL 1 0 1 1 +CONVERSATION 6 0 6 6 +CONVENTIONALITY 1 0 1 1 +CONVENTIONAL 1 0 1 1 +CONVENTION 1 0 1 1 +CONTROLLING 1 0 1 1 +CONTROL 4 0 4 4 +CONTRIVED 2 0 2 2 +CONTRIVANCE 2 0 2 2 +CONTRITION 1 0 1 1 +CONTRITE 1 0 1 1 +CONTRIBUTE 1 0 1 1 +CONTRASTING 1 0 1 1 +CONTRAST 4 0 4 4 +CONTRARY 5 0 5 5 +CONTRADICTIONS 1 0 1 1 +CONTRACTION 2 0 2 2 +CONTINUOUSLY 2 0 2 2 +CONTINUOUS 1 0 1 1 +CONTINUED 14 0 14 14 +CONTINUE 1 0 1 1 +CONTINUALLY 2 0 2 2 +CONTINUAL 3 0 3 3 +CONTINGENCY 1 0 1 1 +CONTINENT 1 0 1 1 +CONTI 1 0 1 1 +CONTESTED 1 0 1 1 +CONTEST 1 0 1 1 +CONTENTS 1 0 1 1 +CONTENTMENT 1 0 1 1 +CONTENTEDLY 1 0 1 1 +CONTENT 1 0 1 1 +CONTEMPTIBLE 1 0 1 1 +CONTEMPT 2 0 2 2 +CONTEMPORARY 1 0 1 1 +CONTEMPLATION 1 0 1 1 +CONTEMPLATED 1 0 1 1 +CONTAMINATION 1 0 1 1 +CONTAMINATED 1 0 1 1 +CONTAINS 1 0 1 1 +CONTAINERS 1 0 1 1 +CONTAGION 1 0 1 1 +CONTACT 1 0 1 1 +CONSUMPTION 13 0 13 13 +CONSUMERS 2 0 2 2 +CONSUMER'S 1 0 1 1 +CONSUMER 5 0 5 5 +CONSUMED 1 0 1 1 +CONSUME 2 0 2 2 +CONSULTED 1 0 1 1 +CONSULTATION 1 0 1 1 +CONSULT 1 0 1 1 +CONSTRUCTION 4 0 4 4 +CONSTRUCTED 1 0 1 1 +CONSTRAINEDLY 1 0 1 1 +CONSTRAINED 1 0 1 1 +CONSTITUTION 3 0 3 3 +CONSTITUTES 1 0 1 1 +CONSTITUTED 1 0 1 1 +CONSTITUTE 1 0 1 1 +CONSTANTINE 1 0 1 1 +CONSTANT 3 0 3 3 +CONSTANCY 1 0 1 1 +CONSPIRACY 2 0 2 2 +CONSPICUOUS 8 0 8 8 +CONSOLE 1 0 1 1 +CONSOLATION 1 0 1 1 +CONSISTENTLY 1 0 1 1 +CONSIDERING 1 0 1 1 +CONSIDERED 5 0 5 5 +CONSIDERATIONS 1 0 1 1 +CONSIDERATE 1 0 1 1 +CONSIDERABLY 1 0 1 1 +CONSIDERABLE 6 0 6 6 +CONSIDER 1 0 1 1 +CONSERVATION 1 0 1 1 +CONSEQUENTLY 1 0 1 1 +CONSEQUENT 2 0 2 2 +CONSEQUENCES 1 0 1 1 +CONSEQUENCE 5 0 5 5 +CONSENT 4 0 4 4 +CONSEIL 6 0 6 6 +CONSECRATED 2 0 2 2 +CONSCIOUSNESS 2 0 2 2 +CONSCIOUS 3 0 3 3 +CONSCIENCES 1 0 1 1 +CONSCIENCE 4 0 4 4 +CONQUERED 2 0 2 2 +CONQUER 1 0 1 1 +CONNECTED 3 0 3 3 +CONNECT 2 0 2 2 +CONJURATION 1 0 1 1 +CONJUNCTURE 1 0 1 1 +CONJECTURE 1 0 1 1 +CONGRESS 1 0 1 1 +CONGREGATED 1 0 1 1 +CONGRATULATIONS 1 0 1 1 +CONGRATULATION 1 0 1 1 +CONGRATULATE 1 0 1 1 +CONGO 1 0 1 1 +CONGENIAL 1 0 1 1 +CONFUSION 2 0 2 2 +CONFUSES 1 0 1 1 +CONFUSED 3 0 3 3 +CONFOUNDEDLY 1 0 1 1 +CONFLICTING 1 0 1 1 +CONFLICT 3 0 3 3 +CONFISCATED 1 0 1 1 +CONFIRMS 1 0 1 1 +CONFIRMED 3 0 3 3 +CONFINED 2 0 2 2 +CONFIDENT 1 0 1 1 +CONFIDENCE 7 0 7 7 +CONFIDE 1 0 1 1 +CONFIDANTS 1 0 1 1 +CONFESSION 1 0 1 1 +CONFESSED 1 0 1 1 +CONFESS 4 0 4 4 +CONFERS 1 0 1 1 +CONFEDERATE 1 0 1 1 +CONDUCTS 1 0 1 1 +CONDUCTORS 2 0 2 2 +CONDUCTED 1 0 1 1 +CONDUCT 6 0 6 6 +CONDUCIVE 1 0 1 1 +CONDITIONS 3 0 3 3 +CONDITION 11 0 11 11 +CONDENSATION 1 0 1 1 +CONDEMNATION 2 0 2 2 +CONCUR 1 0 1 1 +CONCOURSE 1 0 1 1 +CONCORD 1 0 1 1 +CONCLUSION 2 0 2 2 +CONCERTING 1 0 1 1 +CONCERNING 4 0 4 4 +CONCERNED 8 0 8 8 +CONCERN 1 0 1 1 +CONCEPTIONS 2 0 2 2 +CONCEPTION 2 0 2 2 +CONCEPT 1 0 1 1 +CONCEIVED 1 0 1 1 +CONCEIVE 2 0 2 2 +CONCEALING 1 0 1 1 +CONCEALED 1 0 1 1 +CONCEAL 1 0 1 1 +COMRADES 3 0 3 3 +COMPULSIVE 1 0 1 1 +COMPULSION 1 0 1 1 +COMPREHENSIVE 1 0 1 1 +COMPREHENDED 1 0 1 1 +COMPREHEND 1 0 1 1 +COMPOUND 1 0 1 1 +COMPOSURE 2 0 2 2 +COMPOSED 1 0 1 1 +COMPOSE 1 0 1 1 +COMPORT 1 0 1 1 +COMPONENT 1 0 1 1 +COMPLY 2 0 2 2 +COMPLIMENTARY 1 0 1 1 +COMPLIMENT 1 0 1 1 +COMPLICATED 1 0 1 1 +COMPLIANCE 2 0 2 2 +COMPLEXION 2 0 2 2 +COMPLETELY 3 0 3 3 +COMPLETED 2 0 2 2 +COMPLETE 2 0 2 2 +COMPLEMENT 1 0 1 1 +COMPLAINTS 1 0 1 1 +COMPLAINT 1 0 1 1 +COMPLAINING 1 0 1 1 +COMPLAINEST 1 0 1 1 +COMPLAIN 2 0 2 2 +COMPLACENTLY 1 0 1 1 +COMPLACENCY 1 0 1 1 +COMPETITION 1 0 1 1 +COMPETE 1 0 1 1 +COMPENSATION 1 0 1 1 +COMPELS 1 0 1 1 +COMPELLED 2 0 2 2 +COMPEL 1 0 1 1 +COMPASS 1 0 1 1 +COMPARISON 1 0 1 1 +COMPARED 3 0 3 3 +COMPARE 1 0 1 1 +COMPARATIVELY 2 0 2 2 +COMPARATIVE 1 0 1 1 +COMPANY 11 0 11 11 +COMPANIONSHIP 2 0 2 2 +COMPANIONS 2 0 2 2 +COMPANIONLESS 1 0 1 1 +COMPANION 5 0 5 5 +COMPANIES 3 0 3 3 +COMPACT 1 0 1 1 +COMMUNITY 3 0 3 3 +COMMUNITIES 1 0 1 1 +COMMUNION 1 0 1 1 +COMMUNICATED 2 0 2 2 +COMMUNICATE 1 0 1 1 +COMMOTION 1 0 1 1 +COMMONLY 1 0 1 1 +COMMITTING 1 0 1 1 +COMMITTEE 3 0 3 3 +COMMITTED 2 0 2 2 +COMMITTAL 1 0 1 1 +COMMITS 1 0 1 1 +COMMIT 1 0 1 1 +COMMISSIONS 1 0 1 1 +COMMISSIONERS 1 0 1 1 +COMMISSION 1 0 1 1 +COMMISERATION 1 0 1 1 +COMMERCIAL 2 0 2 2 +COMMENTS 1 0 1 1 +COMMENTED 1 0 1 1 +COMMENTATORS 1 0 1 1 +COMMENTARY 2 0 2 2 +COMMENT 1 0 1 1 +COMMENDED 1 0 1 1 +COMMEND 1 0 1 1 +COMMENCEMENT 1 0 1 1 +COMMENCE 1 0 1 1 +COMMANDS 1 0 1 1 +COMMANDMENT 2 0 2 2 +COMMANDERS 1 0 1 1 +COMMANDER 2 0 2 2 +COMMANDED 1 0 1 1 +COMFORTS 2 0 2 2 +COMFORTING 1 0 1 1 +COMFORTED 2 0 2 2 +COMFORTABLE 3 0 3 3 +COMFORT 8 0 8 8 +COMETH 1 0 1 1 +COMES 10 0 10 10 +COMELY 1 0 1 1 +COMEDY 1 0 1 1 +COMEDIES 2 0 2 2 +COME 51 0 51 51 +COMBINED 2 0 2 2 +COMBINE 1 0 1 1 +COMBINATIONS 1 0 1 1 +COMBINATION 2 0 2 2 +COMBAT 2 0 2 2 +COMBASH 1 0 1 1 +COMB 1 0 1 1 +COLUMNS 1 0 1 1 +COLUMN 1 0 1 1 +COLORISTS 2 0 2 2 +COLORIST 2 0 2 2 +COLONY 3 0 3 3 +COLONIAL 1 0 1 1 +COLONEL 1 0 1 1 +COLLEGE 4 0 4 4 +COLLECTIONS 1 0 1 1 +COLLECTION 1 0 1 1 +COLLECTING 1 0 1 1 +COLLARS 1 0 1 1 +COLLAR 2 0 2 2 +COLLAPSED 1 0 1 1 +COLD 9 0 9 9 +COLBERT 1 0 1 1 +COINED 1 0 1 1 +COINCIDE 1 0 1 1 +COFFEE 6 0 6 6 +COCK 1 0 1 1 +COBBLER 1 0 1 1 +COAXED 1 0 1 1 +COAT 1 0 1 1 +COASTS 1 0 1 1 +COAST 3 0 3 3 +COAL 1 0 1 1 +COACHMAN 1 0 1 1 +COACH 1 0 1 1 +CLUTCHING 1 0 1 1 +CLUTCH 1 0 1 1 +CLUNG 1 0 1 1 +CLUMSINESS 1 0 1 1 +CLOUDS 6 0 6 6 +CLOUD 11 0 11 11 +CLOTTED 1 0 1 1 +CLOTHING 1 0 1 1 +CLOTHES 5 0 5 5 +CLOTHED 1 0 1 1 +CLOTH 4 0 4 4 +CLOSET 1 0 1 1 +CLOSER 1 0 1 1 +CLOSELY 6 0 6 6 +CLOSED 2 0 2 2 +CLOSE 10 0 10 10 +CLOAKS 2 0 2 2 +CLIMBING 1 0 1 1 +CLIMATE 2 0 2 2 +CLIFF 2 0 2 2 +CLIENTS 1 0 1 1 +CLICKED 1 0 1 1 +CLEVERNESS 3 0 3 3 +CLEVER 2 0 2 2 +CLERK 2 0 2 2 +CLERICAL 1 0 1 1 +CLERGYMAN'S 2 0 2 2 +CLERGY 2 0 2 2 +CLEARNESS 1 0 1 1 +CLEARLY 5 0 5 5 +CLEARING 1 0 1 1 +CLEAREST 1 0 1 1 +CLEAR 10 0 10 10 +CLEANED 1 0 1 1 +CLAY 5 0 5 5 +CLAWS 2 0 2 2 +CLAUSE 1 0 1 1 +CLASSIFYING 1 0 1 1 +CLASSIFIER 1 0 1 1 +CLASSIFICATION 1 0 1 1 +CLASSIC 2 0 2 2 +CLASSED 3 0 3 3 +CLASPING 1 0 1 1 +CLASPED 2 0 2 2 +CLASHING 2 0 2 2 +CLARIFIED 1 0 1 1 +CLAP 1 0 1 1 +CLAMOROUS 1 0 1 1 +CLAIMS 1 0 1 1 +CLAIMED 1 0 1 1 +CLAIM 2 0 2 2 +CIVILIZATION 2 0 2 2 +CIVIL 3 0 3 3 +CITY 15 0 15 15 +CITIZENS 4 0 4 4 +CITIZEN 2 0 2 2 +CITIES 2 0 2 2 +CIRCUMVENTION 1 0 1 1 +CIRCUMSTANCES 4 0 4 4 +CIRCUMSTANCE 3 0 3 3 +CIRCUMNAVIGATION 1 0 1 1 +CIRCUMFERENCE 1 0 1 1 +CIRCULATED 1 0 1 1 +CIRCUITS 1 0 1 1 +CIRCUITOUS 1 0 1 1 +CIRCUIT 1 0 1 1 +CIRCLE 7 0 7 7 +CIGARS 1 0 1 1 +CIGARETTE 2 0 2 2 +CHURNING 1 0 1 1 +CHURCHES 1 0 1 1 +CHURCH 17 0 17 17 +CHUCKLING 1 0 1 1 +CHUBBY 1 0 1 1 +CHRONICLED 1 0 1 1 +CHRISTMAS 4 0 4 4 +CHRISTIANS 1 0 1 1 +CHRISTIANITY 2 0 2 2 +CHRIST'S 1 0 1 1 +CHRIST 22 0 22 22 +CHRISM 1 0 1 1 +CHOSEN 3 0 3 3 +CHOPPED 1 0 1 1 +CHOOSING 1 0 1 1 +CHOOSE 3 0 3 3 +CHOKING 1 0 1 1 +CHOICE 2 0 2 2 +CHOCOLATE 1 0 1 1 +CHIP 1 0 1 1 +CHINGACHGOOK 2 0 2 2 +CHINA 1 0 1 1 +CHIN 2 0 2 2 +CHIMNEY 2 0 2 2 +CHILDREN'S 1 0 1 1 +CHILDREN 18 0 18 18 +CHILDISH 2 0 2 2 +CHILDHOOD'S 1 0 1 1 +CHILDHOOD 3 0 3 3 +CHILD'S 2 0 2 2 +CHILD 19 0 19 19 +CHIEFTAIN 1 0 1 1 +CHIEFLY 4 0 4 4 +CHIEF 3 0 3 3 +CHIAROSCURISTS 1 0 1 1 +CHESTNUTS 1 0 1 1 +CHESTNUT 3 0 3 3 +CHEST 2 0 2 2 +CHERRY 1 0 1 1 +CHERRIES 3 0 3 3 +CHERISH 1 0 1 1 +CHEMICALS 1 0 1 1 +CHEMICAL 1 0 1 1 +CHELSEA 1 0 1 1 +CHELFORD 4 0 4 4 +CHEERS 1 0 1 1 +CHEERFULLY 2 0 2 2 +CHEERFUL 2 0 2 2 +CHEEKS 1 0 1 1 +CHEEK 2 0 2 2 +CHECKS 1 0 1 1 +CHECKED 3 0 3 3 +CHAUCER'S 1 0 1 1 +CHAUCER 1 0 1 1 +CHATTERBOX 1 0 1 1 +CHASING 1 0 1 1 +CHARTER 1 0 1 1 +CHARMING 1 0 1 1 +CHARMED 1 0 1 1 +CHARM 1 0 1 1 +CHARLOTTE 2 0 2 2 +CHARLESTOWN 1 0 1 1 +CHARLESTON 1 0 1 1 +CHARITY 1 0 1 1 +CHARGED 2 0 2 2 +CHARGE 8 0 8 8 +CHARACTERS 2 0 2 2 +CHARACTERIZES 1 0 1 1 +CHARACTERIZED 1 0 1 1 +CHARACTERISTIC 10 0 10 10 +CHARACTER 14 0 14 14 +CHAPTERS 1 0 1 1 +CHAPTER 3 0 3 3 +CHAPEL 1 0 1 1 +CHAP 1 0 1 1 +CHAOS 1 0 1 1 +CHANNEL 2 0 2 2 +CHANGING 2 0 2 2 +CHANGES 3 0 3 3 +CHANCES 1 0 1 1 +CHANCE 6 0 6 6 +CHAMBERS 2 0 2 2 +CHAMBER 4 0 4 4 +CHALICE 1 0 1 1 +CHAIRS 4 0 4 4 +CHAIR 15 0 15 15 +CHAIN 1 0 1 1 +CETERA 3 0 3 3 +CERTITUDE 1 0 1 1 +CERTAINLY 8 0 8 8 +CERTAIN 12 0 12 12 +CEREMONIES 2 0 2 2 +CEREMONIAL 1 0 1 1 +CENTURY 1 0 1 1 +CENTURIES 1 0 1 1 +CENTRAL 5 0 5 5 +CENTIPEDE 1 0 1 1 +CELLS 1 0 1 1 +CELLAR 1 0 1 1 +CELL 1 0 1 1 +CELESTIAL 2 0 2 2 +CELEBRITY 1 0 1 1 +CELEBRATION 1 0 1 1 +CELEBRATED 4 0 4 4 +CELEBRATE 1 0 1 1 +CEDAR 1 0 1 1 +CEASING 1 0 1 1 +CEASE 2 0 2 2 +CAVERN 2 0 2 2 +CAVALRY 2 0 2 2 +CAVALIERS 1 0 1 1 +CAUTIOUSLY 1 0 1 1 +CAUTION 1 0 1 1 +CAUSES 1 0 1 1 +CAUSE 9 0 9 9 +CATTLE 1 0 1 1 +CATS 1 0 1 1 +CATHOLIC 3 0 3 3 +CATHEDRAL 1 0 1 1 +CATECHISM 2 0 2 2 +CATCHING 1 0 1 1 +CATCHES 1 0 1 1 +CATCH 3 0 3 3 +CATASTROPHE 1 0 1 1 +CATAPULT 1 0 1 1 +CAT 7 0 7 7 +CASTLE 1 0 1 1 +CASES 6 0 6 6 +CASEMATES 1 0 1 1 +CASE 16 0 16 16 +CARTS 1 0 1 1 +CART 1 0 1 1 +CARRYING 4 0 4 4 +CARRY 7 0 7 7 +CARROTS 1 0 1 1 +CARRIES 1 0 1 1 +CARRIED 13 0 13 13 +CARRIAGES 1 0 1 1 +CARRIAGE 8 0 8 8 +CARPETED 1 0 1 1 +CARING 1 0 1 1 +CAREY 3 0 3 3 +CARELESSNESS 1 0 1 1 +CARELESS 1 0 1 1 +CAREFULLY 7 0 7 7 +CAREFUL 5 0 5 5 +CAREER 4 0 4 4 +CARED 4 0 4 4 +CARE 13 0 13 13 +CARD 1 0 1 1 +CAPTURED 1 0 1 1 +CAPTIVE 2 0 2 2 +CAPTIVATE 1 0 1 1 +CAPTAIN 27 0 27 27 +CAPSIZE 1 0 1 1 +CAPRICE 1 0 1 1 +CAPLESS 1 0 1 1 +CAPITAL 1 0 1 1 +CAPACITY 3 0 3 3 +CAPABLE 3 0 3 3 +CAP'N 4 0 4 4 +CAP 7 0 7 7 +CANVASS 1 0 1 1 +CANVAS 4 0 4 4 +CANST 1 0 1 1 +CANS 1 0 1 1 +CANOPY 1 0 1 1 +CANON 1 0 1 1 +CANNON 1 0 1 1 +CANE 1 0 1 1 +CANDLESTICKS 1 0 1 1 +CANDLES 2 0 2 2 +CANARY 1 0 1 1 +CANAL 1 0 1 1 +CAN'T 21 0 21 21 +CAMPS 1 0 1 1 +CAMPAIGN 2 0 2 2 +CAMP 1 0 1 1 +CAME 44 0 44 44 +CALVINISTIC 1 0 1 1 +CALMNESS 2 0 2 2 +CALMED 1 0 1 1 +CALM 5 0 5 5 +CALLS 5 0 5 5 +CALLOUS 1 0 1 1 +CALLING 2 0 2 2 +CALLED 24 0 24 24 +CALL 10 0 10 10 +CALHOUN 1 0 1 1 +CALDWELL 1 0 1 1 +CAKES 2 0 2 2 +CAKE 1 0 1 1 +CABINET 3 0 3 3 +CABIN 2 0 2 2 +CABALISTIC 1 0 1 1 +C 1 0 1 1 +BUTTONING 1 0 1 1 +BUTTON 1 0 1 1 +BUTTERFLY 1 0 1 1 +BUTTED 1 0 1 1 +BUTT 1 0 1 1 +BUTLER 2 0 2 2 +BUTCHERY 2 0 2 2 +BUTCHERED 1 0 1 1 +BUSY 1 0 1 1 +BUSINESS 5 0 5 5 +BUSHES 4 0 4 4 +BUSHEL 1 0 1 1 +BURSTS 1 0 1 1 +BURST 5 0 5 5 +BURNT 1 0 1 1 +BURNS 1 0 1 1 +BURNING 2 0 2 2 +BURNED 1 0 1 1 +BURNE 1 0 1 1 +BURN 4 0 4 4 +BURIED 2 0 2 2 +BURGOS 1 0 1 1 +BURGLARS 1 0 1 1 +BURDEN 1 0 1 1 +BUOYANT 2 0 2 2 +BUNDLE 1 0 1 1 +BULL 2 0 2 2 +BUILT 2 0 2 2 +BUILDS 1 0 1 1 +BUILDING 5 0 5 5 +BUILD 1 0 1 1 +BUGGY 1 0 1 1 +BUDDING 1 0 1 1 +BUCKLING 1 0 1 1 +BUCKLES 1 0 1 1 +BUCKINGHAM 1 0 1 1 +BUCHANAN 1 0 1 1 +BUBBLING 1 0 1 1 +BUBBLE'S 1 0 1 1 +BRUTE 1 0 1 1 +BRUTALITY 1 0 1 1 +BRUTAL 1 0 1 1 +BRUSH 1 0 1 1 +BRUISED 1 0 1 1 +BROWSED 1 0 1 1 +BROW 1 0 1 1 +BROUGHT 14 0 14 14 +BROTHERS 5 0 5 5 +BROTHER 8 0 8 8 +BROTHELS 1 0 1 1 +BROOM 1 0 1 1 +BROOKS 1 0 1 1 +BROOKLYN 1 0 1 1 +BROODING 3 0 3 3 +BRONTES 1 0 1 1 +BROKE 1 0 1 1 +BROADLY 1 0 1 1 +BROADEST 1 0 1 1 +BROAD 11 0 11 11 +BRITISH 2 0 2 2 +BRITAIN 1 0 1 1 +BRISTLING 1 0 1 1 +BRISK 1 0 1 1 +BRINGING 5 0 5 5 +BRING 9 0 9 9 +BRIM 2 0 2 2 +BRILLIANT 5 0 5 5 +BRILLIANCY 1 0 1 1 +BRIGHTNESS 1 0 1 1 +BRIGHTLY 1 0 1 1 +BRIGHTEST 1 0 1 1 +BRIGHTER 1 0 1 1 +BRIGHTENED 2 0 2 2 +BRIGHT 16 0 16 16 +BRIGANTINE 1 0 1 1 +BRIEFLY 1 0 1 1 +BRIDGE 4 0 4 4 +BRIDE 1 0 1 1 +BRICK 2 0 2 2 +BREWING 1 0 1 1 +BRETHREN 2 0 2 2 +BREEZE 1 0 1 1 +BRED 1 0 1 1 +BREATHING 4 0 4 4 +BREATH 10 0 10 10 +BREASTPLATE 1 0 1 1 +BREAST 2 0 2 2 +BREAKWATER 1 0 1 1 +BREAKING 2 0 2 2 +BREAKFASTING 1 0 1 1 +BREAKERS 1 0 1 1 +BREAD 5 0 5 5 +BRAVELY 1 0 1 1 +BRAVE 2 0 2 2 +BRANWELL 1 0 1 1 +BRANDY 1 0 1 1 +BRANDS 2 0 2 2 +BRANDON 4 0 4 4 +BRANDED 1 0 1 1 +BRANCHES 8 0 8 8 +BRANCH 4 0 4 4 +BRAINS 3 0 3 3 +BRAIN 2 0 2 2 +BRAIDS 1 0 1 1 +BRAIDED 1 0 1 1 +BRAID 1 0 1 1 +BRACTON'S 1 0 1 1 +BRACTON 1 0 1 1 +BRACELETS 1 0 1 1 +BRACELET 1 0 1 1 +BOY'S 3 0 3 3 +BOY 17 0 17 17 +BOXES 1 0 1 1 +BOX 8 0 8 8 +BOWING 1 0 1 1 +BOWER 1 0 1 1 +BOWED 1 0 1 1 +BOW 4 0 4 4 +BOUT 1 0 1 1 +BOUQUETS 1 0 1 1 +BOUND 6 0 6 6 +BOUGHS 1 0 1 1 +BOTTOMS 1 0 1 1 +BOTTOM 7 0 7 7 +BOTTLES 2 0 2 2 +BOTTLE 1 0 1 1 +BOTHER 1 0 1 1 +BOTH 34 0 34 34 +BOTANY 1 0 1 1 +BOTANICAL 2 0 2 2 +BOSOM 2 0 2 2 +BORN 8 0 8 8 +BORE 2 0 2 2 +BORDERS 4 0 4 4 +BORDERING 1 0 1 1 +BORDERED 1 0 1 1 +BORDER 3 0 3 3 +BOOTS 2 0 2 2 +BOOLOOROO 12 0 12 12 +BOOKKEEPER 1 0 1 1 +BOOK 4 0 4 4 +BONY 1 0 1 1 +BONNET 1 0 1 1 +BONES 2 0 2 2 +BONDAGE 1 0 1 1 +BOND 3 0 3 3 +BOLTON 1 0 1 1 +BOLDLY 3 0 3 3 +BOLDEST 1 0 1 1 +BOILED 1 0 1 1 +BOIL 1 0 1 1 +BOGUS 3 0 3 3 +BOGGS 2 0 2 2 +BODILY 3 0 3 3 +BODIES 3 0 3 3 +BOAT 2 0 2 2 +BOASTING 2 0 2 2 +BOARDED 2 0 2 2 +BOARD 9 0 9 9 +BLUSHING 2 0 2 2 +BLUSHED 1 0 1 1 +BLUSH 1 0 1 1 +BLUNT 1 0 1 1 +BLUFF 1 0 1 1 +BLUESKINS 2 0 2 2 +BLUES 1 0 1 1 +BLUE 21 0 21 21 +BLOWN 2 0 2 2 +BLOWING 1 0 1 1 +BLOW 2 0 2 2 +BLOOM 1 0 1 1 +BLOODY 1 0 1 1 +BLOODSHED 1 0 1 1 +BLOODED 1 0 1 1 +BLOOD 6 0 6 6 +BLOCKS 1 0 1 1 +BLISS 1 0 1 1 +BLIND 1 0 1 1 +BLEW 1 0 1 1 +BLESSED 3 0 3 3 +BLESS 2 0 2 2 +BLEED 1 0 1 1 +BLEACHED 1 0 1 1 +BLAZING 1 0 1 1 +BLAZED 1 0 1 1 +BLAZE 2 0 2 2 +BLANK 2 0 2 2 +BLAME 1 0 1 1 +BLADE 2 0 2 2 +BLACKSTONE 1 0 1 1 +BLACKNESSES 1 0 1 1 +BLACKNESS 1 0 1 1 +BLACKER 2 0 2 2 +BLACK 22 0 22 22 +BITTER 1 0 1 1 +BITS 1 0 1 1 +BITES 1 0 1 1 +BITE 1 0 1 1 +BIT 9 0 9 9 +BISHOPS 5 0 5 5 +BIRTH 2 0 2 2 +BIRMINGHAM 1 0 1 1 +BIRDS 4 0 4 4 +BIRD 4 0 4 4 +BIRCHES 1 0 1 1 +BINDING 1 0 1 1 +BIND 1 0 1 1 +BILLED 1 0 1 1 +BILL 6 0 6 6 +BIG 12 0 12 12 +BIDDING 1 0 1 1 +BID 1 0 1 1 +BIBLE 1 0 1 1 +BEYOND 6 0 6 6 +BEWILDERMENT 1 0 1 1 +BEWILDERED 6 0 6 6 +BEWARE 1 0 1 1 +BEVERAGES 1 0 1 1 +BETWEEN 25 0 25 25 +BETTING 1 0 1 1 +BETTER 25 0 25 25 +BETRAYED 1 0 1 1 +BETRAY 1 0 1 1 +BETH 12 0 12 12 +BESTOWED 1 0 1 1 +BESTOW 1 0 1 1 +BEST 22 0 22 22 +BESOUGHT 1 0 1 1 +BESIEGED 1 0 1 1 +BESIDES 8 0 8 8 +BESIDE 5 0 5 5 +BERTIE 1 0 1 1 +BERRIES 1 0 1 1 +BENT 4 0 4 4 +BENIGNANTLY 1 0 1 1 +BENIGHTED 1 0 1 1 +BENEFIT 1 0 1 1 +BENEATH 6 0 6 6 +BEND 1 0 1 1 +BENCHES 3 0 3 3 +BENCH 4 0 4 4 +BEN 3 0 3 3 +BELT 2 0 2 2 +BELOW 1 0 1 1 +BELOVED 3 0 3 3 +BELONGS 1 0 1 1 +BELONGING 1 0 1 1 +BELONGED 3 0 3 3 +BELONG 2 0 2 2 +BELLY 3 0 3 3 +BELLS 1 0 1 1 +BELLINGHAM 2 0 2 2 +BELL 3 0 3 3 +BELIEVING 2 0 2 2 +BELIEVERS 1 0 1 1 +BELIEVED 5 0 5 5 +BELIEVE 21 0 21 21 +BELIEF 3 0 3 3 +BEINGS 1 0 1 1 +BEHOLDING 1 0 1 1 +BEHOLDERS 1 0 1 1 +BEHOLDER 1 0 1 1 +BEHIND 10 0 10 10 +BEHELD 1 0 1 1 +BEHAVED 1 0 1 1 +BEHAVE 1 0 1 1 +BEHALF 1 0 1 1 +BEGUN 5 0 5 5 +BEGUILING 1 0 1 1 +BEGOT 1 0 1 1 +BEGINS 4 0 4 4 +BEGINNING 4 0 4 4 +BEGIN 9 0 9 9 +BEGGED 1 0 1 1 +BEGGAR 2 0 2 2 +BEGAN 22 0 22 22 +BEG 2 0 2 2 +BEFITS 1 0 1 1 +BEFIT 1 0 1 1 +BEFELL 1 0 1 1 +BEEN 137 0 137 137 +BEELZEBUB 1 0 1 1 +BEEHIVES 1 0 1 1 +BEEF 1 0 1 1 +BEDSIDE 1 0 1 1 +BEDROOM 2 0 2 2 +BEDFORD 1 0 1 1 +BECOMING 1 0 1 1 +BECOMES 8 0 8 8 +BECOME 14 0 14 14 +BECKONED 1 0 1 1 +BECKON 1 0 1 1 +BECAUSE 30 0 30 30 +BECAME 12 0 12 12 +BEAUTY 21 0 21 21 +BEAUTIFUL 13 0 13 13 +BEAUTIES 2 0 2 2 +BEATITUDE 2 0 2 2 +BEATING 2 0 2 2 +BEATERS 1 0 1 1 +BEATEN 2 0 2 2 +BEAT 1 0 1 1 +BEASTS 2 0 2 2 +BEARS 4 0 4 4 +BEARING 3 0 3 3 +BEARD 1 0 1 1 +BEAR'S 1 0 1 1 +BEAR 11 0 11 11 +BEAMS 1 0 1 1 +BEAK 6 0 6 6 +BEADS 1 0 1 1 +BEACHED 1 0 1 1 +BATTLED 1 0 1 1 +BATTERIES 1 0 1 1 +BATTERED 1 0 1 1 +BAT 1 0 1 1 +BASTARD 1 0 1 1 +BASKETS 1 0 1 1 +BASKET 2 0 2 2 +BASIS 2 0 2 2 +BASED 1 0 1 1 +BARS 1 0 1 1 +BARRICADED 1 0 1 1 +BARREN 1 0 1 1 +BARRACK 1 0 1 1 +BARNS 1 0 1 1 +BARN 4 0 4 4 +BARGAINS 1 0 1 1 +BAREFOOT 1 0 1 1 +BARE 2 0 2 2 +BARBARITY 1 0 1 1 +BAR 1 0 1 1 +BAPTIZED 1 0 1 1 +BAPTISM 1 0 1 1 +BANTER 1 0 1 1 +BANQUET 1 0 1 1 +BANNISTER 3 0 3 3 +BANKS 1 0 1 1 +BANK 3 0 3 3 +BANISHED 1 0 1 1 +BANG 1 0 1 1 +BANDS 4 0 4 4 +BALMY 1 0 1 1 +BALLS 2 0 2 2 +BALLET 2 0 2 2 +BALEEN 1 0 1 1 +BAKER 1 0 1 1 +BAGS 1 0 1 1 +BAGGAGE 1 0 1 1 +BAG 1 0 1 1 +BAFFLED 2 0 2 2 +BADLY 2 0 2 2 +BADGES 1 0 1 1 +BADE 3 0 3 3 +BAD 6 0 6 6 +BACON 1 0 1 1 +BACKWARD 1 0 1 1 +BABY'S 1 0 1 1 +BABY 1 0 1 1 +BABIES 1 0 1 1 +BABE 1 0 1 1 +AZURE 1 0 1 1 +AXIS 1 0 1 1 +AWOKE 3 0 3 3 +AWKWARD 1 0 1 1 +AWFULLY 2 0 2 2 +AWFUL 4 0 4 4 +AWAY 50 0 50 50 +AWARE 6 0 6 6 +AWAKE 1 0 1 1 +AWAITING 1 0 1 1 +AWAITED 2 0 2 2 +AVOIDING 1 0 1 1 +AVOIDED 1 0 1 1 +AVOID 5 0 5 5 +AVERSION 1 0 1 1 +AVERSE 1 0 1 1 +AVAILABLE 1 0 1 1 +AUTUMN 1 0 1 1 +AUTHORS 1 0 1 1 +AUTHORIZED 1 0 1 1 +AUTHORITY 6 0 6 6 +AUTHORITIES 1 0 1 1 +AUTHORITATIVELY 1 0 1 1 +AUTHOR 1 0 1 1 +AUTHENTICATED 1 0 1 1 +AUNT'S 1 0 1 1 +AUNT 5 0 5 5 +AUGUST 5 0 5 5 +AUGMENT 1 0 1 1 +AUDITORY 1 0 1 1 +AUDITORS 1 0 1 1 +AUDIENCE 6 0 6 6 +AUDACIOUS 1 0 1 1 +AUCTION 1 0 1 1 +ATTRIBUTED 1 0 1 1 +ATTRACTIVE 1 0 1 1 +ATTRACTION 1 0 1 1 +ATTRACTED 3 0 3 3 +ATTORNEYS 1 0 1 1 +ATTITUDE 4 0 4 4 +ATTIRE 1 0 1 1 +ATTENUATING 1 0 1 1 +ATTENTIVELY 2 0 2 2 +ATTENTION 11 0 11 11 +ATTENDED 1 0 1 1 +ATTENDANT 1 0 1 1 +ATTEND 3 0 3 3 +ATTEMPTS 1 0 1 1 +ATTEMPT 5 0 5 5 +ATTAINMENTS 1 0 1 1 +ATTAINMENT 1 0 1 1 +ATTAINED 3 0 3 3 +ATTACKED 1 0 1 1 +ATTACK 4 0 4 4 +ATTACHED 2 0 2 2 +ATROCIOUS 1 0 1 1 +ATMOSPHERIC 1 0 1 1 +ATMOSPHERE 2 0 2 2 +ATLANTIS 1 0 1 1 +ATLANTIC 3 0 3 3 +ATHLETE 2 0 2 2 +ATHENS 1 0 1 1 +ATHENIANS 1 0 1 1 +ATHENIAN 2 0 2 2 +ATE 2 0 2 2 +ATCHISON 1 0 1 1 +ASTRONOMY 1 0 1 1 +ASTOUNDING 1 0 1 1 +ASTOR 2 0 2 2 +ASTONISHMENT 2 0 2 2 +ASTONISHING 1 0 1 1 +ASTONISHED 1 0 1 1 +ASSURES 1 0 1 1 +ASSUREDLY 1 0 1 1 +ASSURED 5 0 5 5 +ASSURE 5 0 5 5 +ASSURANCES 1 0 1 1 +ASSURANCE 3 0 3 3 +ASSUMED 5 0 5 5 +ASSOCIATION 2 0 2 2 +ASSOCIATES 1 0 1 1 +ASSOCIATED 3 0 3 3 +ASSISTED 2 0 2 2 +ASSISTANT 1 0 1 1 +ASSIST 2 0 2 2 +ASSIDUOUSLY 1 0 1 1 +ASSERTS 1 0 1 1 +ASSERTIVE 1 0 1 1 +ASSERTED 3 0 3 3 +ASSENT 1 0 1 1 +ASSEMBLY 2 0 2 2 +ASSEMBLED 3 0 3 3 +ASS 1 0 1 1 +ASPECT 1 0 1 1 +ASLEEP 1 0 1 1 +ASKING 2 0 2 2 +ASIDE 3 0 3 3 +ASIA 1 0 1 1 +ASHORE 1 0 1 1 +ASHAMED 2 0 2 2 +ASCRIBES 1 0 1 1 +ASCERTAINING 1 0 1 1 +ASCERTAIN 2 0 2 2 +ARTILLERY 1 0 1 1 +ARTIFICE 1 0 1 1 +ARTICULATE 2 0 2 2 +ARTICLE 3 0 3 3 +ARTICHOKES 1 0 1 1 +ARTHUR 1 0 1 1 +ART 14 0 14 14 +ARROWS 1 0 1 1 +ARROW 2 0 2 2 +ARRIVING 2 0 2 2 +ARRIVED 4 0 4 4 +ARRIVE 1 0 1 1 +ARRIVAL 4 0 4 4 +ARRESTING 1 0 1 1 +ARRAY 1 0 1 1 +ARRANGING 2 0 2 2 +ARRANGEMENTS 1 0 1 1 +ARRANGEMENT 2 0 2 2 +ARRANGED 2 0 2 2 +AROUSE 1 0 1 1 +AROSE 2 0 2 2 +ARONNAX 1 0 1 1 +ARMY 9 0 9 9 +ARMS 15 0 15 15 +ARMED 2 0 2 2 +ARM 5 0 5 5 +ARISTOCRACY 1 0 1 1 +ARISING 1 0 1 1 +ARISE 1 0 1 1 +ARID 1 0 1 1 +ARIANS 1 0 1 1 +ARGYLE'S 1 0 1 1 +ARGYLE 2 0 2 2 +ARGUS 1 0 1 1 +ARGUING 1 0 1 1 +ARGUE 2 0 2 2 +AREN'T 1 0 1 1 +AREA 1 0 1 1 +ARDUOUS 1 0 1 1 +ARCHIVES 1 0 1 1 +ARCHITECTURAL 1 0 1 1 +ARCHED 1 0 1 1 +ARCH 1 0 1 1 +ARCADIAN 1 0 1 1 +ARC 2 0 2 2 +APRON 2 0 2 2 +APRIL 2 0 2 2 +APPROXIMATELY 1 0 1 1 +APPROVING 2 0 2 2 +APPROVES 1 0 1 1 +APPROVE 2 0 2 2 +APPROVAL 1 0 1 1 +APPROPRIATE 1 0 1 1 +APPROBATION 1 0 1 1 +APPROACHING 3 0 3 3 +APPROACHES 2 0 2 2 +APPROACHED 6 0 6 6 +APPROACH 1 0 1 1 +APPRENTICESHIP 1 0 1 1 +APPRENTICE 3 0 3 3 +APPREHENSION 1 0 1 1 +APPRECIATIVE 1 0 1 1 +APPRECIATE 1 0 1 1 +APPOSITION 1 0 1 1 +APPOINTED 7 0 7 7 +APPLYING 1 0 1 1 +APPLICATION 2 0 2 2 +APPLE 1 0 1 1 +APPLAUSE 2 0 2 2 +APPLAUDED 1 0 1 1 +APPETITES 1 0 1 1 +APPETITE 1 0 1 1 +APPEARS 1 0 1 1 +APPEARED 10 0 10 10 +APPEARANCES 3 0 3 3 +APPEARANCE 9 0 9 9 +APPEAR 3 0 3 3 +APPEALS 1 0 1 1 +APPEALED 1 0 1 1 +APPEAL 1 0 1 1 +APPARENTLY 1 0 1 1 +APPARENT 2 0 2 2 +APPARATUS 1 0 1 1 +APPALLING 1 0 1 1 +APOSTOLICAL 1 0 1 1 +APOSTOLIC 1 0 1 1 +APOSTLES 6 0 6 6 +APOSTLE 4 0 4 4 +APOLLO 1 0 1 1 +APARTMENT 2 0 2 2 +APART 1 0 1 1 +ANYWHERE 6 0 6 6 +ANYHOW 3 0 3 3 +ANXIOUS 3 0 3 3 +ANXIETY 4 0 4 4 +ANTIPATHY 2 0 2 2 +ANTICIPATION 1 0 1 1 +ANTICIPATE 1 0 1 1 +ANTICHRIST 1 0 1 1 +ANTI 1 0 1 1 +ANTEDATING 1 0 1 1 +ANTE 1 0 1 1 +ANTARCTIC 1 0 1 1 +ANSWERS 2 0 2 2 +ANSWER 6 0 6 6 +ANOTHER'S 1 0 1 1 +ANNOYANCE 2 0 2 2 +ANNOUNCED 2 0 2 2 +ANNE'S 2 0 2 2 +ANIMOSITY 2 0 2 2 +ANIMATED 2 0 2 2 +ANIMALS 5 0 5 5 +ANIMAL 8 0 8 8 +ANGRY 5 0 5 5 +ANGRILY 3 0 3 3 +ANGRIER 1 0 1 1 +ANGELS 3 0 3 3 +ANGEL 1 0 1 1 +ANECDOTES 1 0 1 1 +ANCIENT 3 0 3 3 +ANALYSIS 2 0 2 2 +ANALOGY 1 0 1 1 +ANALOGUE 1 0 1 1 +AMUSING 2 0 2 2 +AMUSEMENT 3 0 3 3 +AMUSED 1 0 1 1 +AMUSE 2 0 2 2 +AMPLY 2 0 2 2 +AMOUNT 3 0 3 3 +AMONGST 3 0 3 3 +AMONG 29 0 29 29 +AMISS 1 0 1 1 +AMIDST 2 0 2 2 +AMID 1 0 1 1 +AMETHYST 1 0 1 1 +AMERICANS 2 0 2 2 +AMERICAN 10 0 10 10 +AMERICA 2 0 2 2 +AMENDS 2 0 2 2 +AMENDMENT 1 0 1 1 +AMELIORATION 1 0 1 1 +AMBROSE 4 0 4 4 +AMBITIOUS 1 0 1 1 +AMBITION 2 0 2 2 +AMBASSADOR 1 0 1 1 +AMAZEMENT 2 0 2 2 +AMASS 1 0 1 1 +AMALGAMATED 1 0 1 1 +ALWAYS 36 0 36 36 +ALTHOUGH 10 0 10 10 +ALTERNATIVE 1 0 1 1 +ALTERNATING 3 0 3 3 +ALTERING 2 0 2 2 +ALTERED 2 0 2 2 +ALTERATION 1 0 1 1 +ALTER 1 0 1 1 +ALTAR 1 0 1 1 +ALSO 36 0 36 36 +ALREADY 22 0 22 22 +ALOUD 3 0 3 3 +ALONG 15 0 15 15 +ALONE 23 0 23 23 +ALMS 1 0 1 1 +ALMOST 19 0 19 19 +ALLY 1 0 1 1 +ALLUDE 1 0 1 1 +ALLOWING 2 0 2 2 +ALLOWED 7 0 7 7 +ALLOW 5 0 5 5 +ALLIES 1 0 1 1 +ALLIED 1 0 1 1 +ALLERS 1 0 1 1 +ALLEGED 2 0 2 2 +ALIVE 1 0 1 1 +ALIKE 1 0 1 1 +ALIGHTED 1 0 1 1 +ALICE 4 0 4 4 +ALGERIAN 1 0 1 1 +ALGERIA 2 0 2 2 +ALGEBRA 1 0 1 1 +ALEXANDRA 3 0 3 3 +ALEXANDER 13 0 13 13 +ALERTNESS 1 0 1 1 +ALE 2 0 2 2 +ALBANS 1 0 1 1 +ALAS 3 0 3 3 +ALARMED 1 0 1 1 +AKIN 1 0 1 1 +AIR 25 0 25 25 +AIN'T 2 0 2 2 +AIMED 1 0 1 1 +AIDED 1 0 1 1 +AID 2 0 2 2 +AHEAD 1 0 1 1 +AGREEMENT 1 0 1 1 +AGREED 2 0 2 2 +AGREEABLY 2 0 2 2 +AGREEABLE 5 0 5 5 +AGREE 2 0 2 2 +AGO 4 0 4 4 +AGITATION 4 0 4 4 +AGITATED 2 0 2 2 +AGGRESSIVENESS 1 0 1 1 +AGGRESSIVE 1 0 1 1 +AGGREGATE 1 0 1 1 +AGENCY 1 0 1 1 +AGE 6 0 6 6 +AGAPE 1 0 1 1 +AGAINST 23 0 23 23 +AGAIN 39 0 39 39 +AFTERWARDS 5 0 5 5 +AFTERWARD 2 0 2 2 +AFTERNOON 4 0 4 4 +AFTERDECK 1 0 1 1 +AFTER 58 0 58 58 +AFRICAN 1 0 1 1 +AFRAID 9 0 9 9 +AFLOAT 1 0 1 1 +AFFRIGHTENED 1 0 1 1 +AFFORD 4 0 4 4 +AFFLICTED 1 0 1 1 +AFFIRMATIVE 1 0 1 1 +AFFILIATED 2 0 2 2 +AFFECTIONS 1 0 1 1 +AFFECTIONATE 1 0 1 1 +AFFECTION 7 0 7 7 +AFFECTED 3 0 3 3 +AFFAIRS 3 0 3 3 +AFFAIR 2 0 2 2 +ADVISER 1 0 1 1 +ADVISED 1 0 1 1 +ADVISABLE 1 0 1 1 +ADVICE 4 0 4 4 +ADVERTISING 1 0 1 1 +ADVERTISEMENT 1 0 1 1 +ADVERSE 2 0 2 2 +ADVENTURE 1 0 1 1 +ADVANTAGES 2 0 2 2 +ADVANTAGE 3 0 3 3 +ADVANCING 1 0 1 1 +ADVANCE 5 0 5 5 +ADORE 1 0 1 1 +ADORATION 3 0 3 3 +ADOPTED 2 0 2 2 +ADOLESCENCE 1 0 1 1 +ADMITTING 2 0 2 2 +ADMITTED 3 0 3 3 +ADMITTANCE 1 0 1 1 +ADMIT 2 0 2 2 +ADMIRING 1 0 1 1 +ADMIRED 1 0 1 1 +ADMIRATION 2 0 2 2 +ADMINISTRATION 3 0 3 3 +ADJUST 1 0 1 1 +ADHERENTS 1 0 1 1 +ADDRESSING 1 0 1 1 +ADDRESSED 6 0 6 6 +ADDRESS 3 0 3 3 +ADDITIONAL 1 0 1 1 +ADDED 11 0 11 11 +ACUTE 2 0 2 2 +ACTUALLY 3 0 3 3 +ACTUAL 4 0 4 4 +ACTS 2 0 2 2 +ACTRESS 1 0 1 1 +ACTORS 4 0 4 4 +ACTOR 2 0 2 2 +ACTIVITY 1 0 1 1 +ACTIVELY 1 0 1 1 +ACTIVE 2 0 2 2 +ACTION 11 0 11 11 +ACTING 2 0 2 2 +ACT 6 0 6 6 +ACROSS 13 0 13 13 +ACQUIRES 1 0 1 1 +ACQUIRE 1 0 1 1 +ACQUAINTED 1 0 1 1 +ACQUAINTANCE 3 0 3 3 +ACQUAINT 1 0 1 1 +ACORN 1 0 1 1 +ACKNOWLEDGES 1 0 1 1 +ACKNOWLEDGED 2 0 2 2 +ACKNOWLEDGE 2 0 2 2 +ACHIEVEMENTS 1 0 1 1 +ACHIEVEMENT 1 0 1 1 +ACHIEVED 2 0 2 2 +ACE 2 0 2 2 +ACCUSTOMED 3 0 3 3 +ACCUSE 1 0 1 1 +ACCURATELY 1 0 1 1 +ACCURATE 2 0 2 2 +ACCURACY 3 0 3 3 +ACCRUING 1 0 1 1 +ACCOUTREMENTS 1 0 1 1 +ACCOUNTS 1 0 1 1 +ACCOUNTED 1 0 1 1 +ACCOUNT 9 0 9 9 +ACCORDINGLY 1 0 1 1 +ACCORDING 4 0 4 4 +ACCORDANCE 2 0 2 2 +ACCOMPLISHMENT 1 0 1 1 +ACCOMPLISHED 5 0 5 5 +ACCOMPANY 2 0 2 2 +ACCOMPANIED 3 0 3 3 +ACCOMMODATIONS 1 0 1 1 +ACCOMMODATION 1 0 1 1 +ACCLAMATIONS 1 0 1 1 +ACCIDENTS 2 0 2 2 +ACCIDENT 2 0 2 2 +ACCESSORIES 1 0 1 1 +ACCEPTING 1 0 1 1 +ACCEPTED 1 0 1 1 +ACCEPTABLE 1 0 1 1 +ACCENTS 2 0 2 2 +ACCENT 3 0 3 3 +ABSURDITY 1 0 1 1 +ABSURDITIES 1 0 1 1 +ABSURD 1 0 1 1 +ABSTRACTIONS 1 0 1 1 +ABSTRACTION 1 0 1 1 +ABSORBED 1 0 1 1 +ABSOLUTELY 6 0 6 6 +ABSOLUTE 1 0 1 1 +ABSENT 2 0 2 2 +ABSENCE 1 0 1 1 +ABRUPTLY 2 0 2 2 +ABROAD 4 0 4 4 +ABRAHAM 2 0 2 2 +ABOVE 17 0 17 17 +ABOUT 85 0 85 85 +ABOLITIONISM 1 0 1 1 +ABOARD 2 0 2 2 +ABNER 1 0 1 1 +ABLE 7 0 7 7 +ABJECTLY 1 0 1 1 +ABILITY 2 0 2 2 +ABBE 1 0 1 1 +ABANDONED 2 0 2 2 diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..218568b7e7e6cc328d6b3c8be8d86b2d494e639e --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,13012 @@ +%WER = 4.08 +Errors: 225 insertions, 178 deletions, 1734 substitutions, over 52343 reference words (50431 correct) +Search below for sections starting with PER-UTT DETAILS:, SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS: + +PER-UTT DETAILS: corr or (ref->hyp) +2033-164914-0014-225: BUT THE EUNUCH SAID I WILL NOT LEAVE THEE TILL THOU SHOW ME WHO IT WAS THAT RECITED THE VERSES FOR I DREAD RETURNING TO MY LADY WITHOUT HIM +8131-117017-0008-2625: ONE LOOK WAS ENOUGH THE WORK PAPERS HAD THE (TELLTALE->TELL TALE) OVER THICKENING OF THE SIGNATURE THAT HAD SHOWED UP ON OTHER PAPERS OBVIOUSLY FORGERIES +8280-266249-0034-2835: NOW THE VOICE CAME FROM THE SKYLIGHT OVERHEAD APPARENTLY AND WITH A FIERCE IMPRECATION THE IRATE GAMESTER RUSHED UPON DECK AND RAN HITHER AND THITHER IN SEARCH OF HIS TORMENTOR +2609-157645-0006-406: (PARR->PAR) WAS SUCH A (CONTINUAL->CONTINUOUS) SMOKER THAT (ANYONE->ANY ONE) WHO CAME INTO HIS COMPANY IF HE HAD NEVER SMOKED BEFORE HAD TO LEARN THE USE OF A PIPE AS A MEANS OF SELF DEFENCE +4294-9934-0002-1320: HE AFFIRMED TO HIMSELF THAT HE WOULD NOT HE DECLARED TO HIMSELF THAT HE WOULD NOT DOUBT AND HE BEGAN TO DOUBT IN SPITE OF HIMSELF +3538-163624-0023-916: (THEN BRYNHILD'S->LIM BRUNHILD'S) FATHER TOLD (GUNNAR->GUNNER) THAT SHE WOULD MARRY NONE BUT HIM WHO COULD RIDE THE FLAME IN FRONT OF HER ENCHANTED TOWER AND THITHER THEY RODE AND (GUNNAR->GUNNER) SET HIS HORSE (AT->TO) THE FLAME BUT HE WOULD NOT FACE IT +2609-169640-0021-436: ON OUR SIDE WE GAVE THE GENTLEMEN THE FOUR (SIXES->SIXXES) TWO (AT->OUT) THE NEAREST AND TWO AT THE (STERN MOST PROA->STERNMOST PROW) WHICH WAS STILL NEAR A CABLE'S LENGTH DISTANT +3538-163622-0018-886: (CINDERLAD->SINDERLAD) TRIED BUT COULD NOT DO IT SO HE HAD TO TAKE A (DRAUGHT->DROP) FROM THE PITCHER AND THEN ONE MORE AND AFTER THAT STILL ANOTHER AND THEN HE WAS ABLE TO WIELD THE SWORD WITH PERFECT EASE +6432-63723-0031-2130: IT WOULDN'T DO YOU KNOW AFTER THAT STORY CAME OUT FOR ME AND THE VICE CHANCELLOR WHO SAT IN THE CASE AS WELL AS OTHER JUDGES AND MEMBERS OF THE BAR TO BE SEEN THERE KENNETH EXPLAINED TO THE COLONEL +4350-9170-0028-1410: BUT THERE IS NOT ONLY ONE GOVERNMENT THERE ARE OTHER GOVERNMENTS EXPLOITING THEIR SUBJECTS BY VIOLENCE IN THE SAME WAY AND (*->ARE) ALWAYS READY TO POUNCE DOWN ON ANY OTHER GOVERNMENT AND CARRY OFF THE FRUITS OF THE TOIL OF ITS ENSLAVED SUBJECTS +3331-159609-0004-660: TOM WROTE ONCE A WEEK TO HIS MOTHER BUT THE LETTERS WERE SHORT AND NOT VERY SATISFACTORY FOR MEN NEVER DO TELL THE INTERESTING LITTLE THINGS THAT WOMEN BEST LIKE TO HEAR +4350-9170-0043-1425: THEY ARE NEEDED PRINCIPALLY AGAINST THEIR SUBJECTS AND EVERY MAN UNDER UNIVERSAL MILITARY SERVICE BECOMES AN ACCOMPLICE IN ALL THE ACTS OF VIOLENCE OF THE GOVERNMENT AGAINST THE CITIZENS WITHOUT ANY CHOICE OF HIS OWN +8461-258277-0013-2880: SO I WENT ROUND ABOUT THE HIGHWAYS OF THE CITY TILL I MET A SWEETMEAT SELLER AND BUYING HIS CLOTHES AND STOCK IN TRADE AND GEAR FOR TEN DINARS DID WHAT WAS DONE +1688-142285-0060-60: BUT SURELY SAID MARGARET FACING ROUND YOU BELIEVE IN WHAT I SAID THAT GOD GAVE HER LIFE AND ORDERED WHAT KIND OF LIFE IT WAS TO BE +3538-142836-0015-841: MARMALADES AND JAMS DIFFER LITTLE FROM EACH OTHER THEY ARE PRESERVES OF (A->*) HALF LIQUID CONSISTENCY MADE BY BOILING THE PULP OF FRUITS AND SOMETIMES PART OF THE RINDS WITH SUGAR +2414-159411-0022-345: THE BRAHMAN TOLD HIS STORY AND SAID TO HIM UNCLE JACKAL UNCLE JACKAL SAY WHAT IS YOUR JUDGMENT +3764-168670-0015-990: THAT THE NUN WHO HAD DIED THAT MORNING HAD REQUESTED TO BE BURIED IN THE COFFIN WHICH HAD SERVED HER FOR A BED AND INTERRED IN THE VAULT UNDER THE ALTAR OF THE CHAPEL +5442-41168-0010-1682: BUT LEVIN FORGOT ALL THAT AND IT WAS PAINFUL TO HIM TO SEE ALL THESE EXCELLENT PERSONS FOR WHOM HE HAD A RESPECT IN SUCH AN UNPLEASANT AND VICIOUS STATE OF EXCITEMENT +3005-163390-0009-468: WE STRUCK THE RAFT AT THE SAME TIME AND IN LESS THAN TWO SECONDS WE WAS GLIDING DOWN STREAM ALL DARK AND STILL AND EDGING TOWARDS THE MIDDLE OF THE RIVER NOBODY SAYING A WORD +4198-12259-0027-1202: (HEYDAY->HEY THEY) HERE ARE TRIPES FIT FOR OUR SPORT AND IN EARNEST EXCELLENT (GODEBILLIOS->GOTA BILLIOS) OF THE DUN OX YOU KNOW WITH THE BLACK STREAK +6938-70848-0019-2176: ON THE TWENTY SEVENTH OCCURRED THE DEBATE ON THE LAND QUESTION WHICH REVEALED THE DIFFERENCES BETWEEN THE (AGRARIAN->AGRIEAN) PROGRAMME OF THE BOLSHEVIKI AND THE LEFT SOCIALIST REVOLUTIONARIES +8461-258277-0015-2882: SO ALI RELATED TO HIM ALL THAT HAD PASSED FROM FIRST TO LAST AND THE CALIPH SAID I HAD NOT THOUGHT THOU WOULDST KILL HIM FOR THAT HE WAS A SORCERER +2609-156975-0002-363: LET US HAVE FAITH THAT RIGHT (MAKES->MATRON) MIGHT AND IN THAT FAITH LET US DARE TO DO OUR DUTY AS WE UNDERSTAND IT (LINCOLN->LINTON) +3080-5032-0024-572: AND BESIDES THERE WAS A TIME WHEN WE OURSELVES WERE INDIFFERENT TO ONE ANOTHER DID I DO SO THEN OR HAVE I LEARNED IT SINCE +3331-159609-0021-677: SHE KEPT MUCH AT HOME WHEN THE DAY'S WORK WAS DONE FINDING IT PLEASANTER TO SIT DREAMING (OVER->OF A) BOOK OR SEWING ALONE THAN TO EXERT HERSELF EVEN TO GO TO THE (SHAWS->SHORES) +7975-280076-0003-2492: NO WARRANT WAS ISSUED FOR THE YOUNGERS BUT SUBSEQUENT HISTORIANS HAVE INFERENTIALLY AT LEAST ACCUSED US OF TAKING PART BUT AS I SAID BEFORE THERE IS NO TRUTH IN THE ACCUSATION +5764-299665-0073-1877: IT FOLLOWS THAT NOTHING HAS BEEN OR CAN BE CREATED THAT THERE NEVER HAS BEEN OR CAN BE A CREATOR +8131-117016-0011-2566: YOUR JOB IS TO PROTECT THE CITIZENS HERE AND THAT MEANS EVERYONE NOT BREAKING THE LAWS WHETHER YOU FEEL LIKE IT OR NOT NO GRAFT +2033-164915-0008-242: I BELIEVE HE HATH DENOUNCED ME TO THE EUNUCH HENCE THESE PAGES (ET->AT) ABOUT ME AND HE HATH MADE ME AN ACCOMPLICE IN HIS CRIME +7902-96592-0012-2357: WHAT DID THEY SAY FALSE ALARM TELL (SIR RISDON->SERVICE AND) THEY WOULD CLEAR ALL AWAY TO NIGHT SEE IF ANYTHING HAD BEEN LEFT ABOUT LOBSTER BOAT +4294-35475-0001-1292: THROWING HIMSELF ON HIS KNEES BEFORE THE KING HE CRIED (OH->O) ROYAL SIRE BESTOW UPON ME ALSO A SWORD AND A STEED THAT I MAY UP AND AWAY TO FOLLOW MY BRETHREN +3080-5040-0027-602: (WELL->WHILE) IN SOBER EARNEST NOW I WOULD NOT LIVE THUS A TWELVEMONTH TO GAIN ALL THAT THE KING HAS LOST UNLESS IT WERE TO GIVE IT HIM AGAIN +1688-142285-0093-93: TAKE NOTICE THAT (*->THIS) IS NOT MY KIND OF HAUGHTINESS PAPA IF I HAVE ANY AT ALL WHICH I DON'T AGREE TO THOUGH YOU'RE ALWAYS ACCUSING ME OF IT +6432-63722-0018-2057: AND THE DETECTIVE'S PROFESSIONAL INSTINCTS GOT THE UPPER HAND OF HIS FRIENDLINESS NOT THE LEAST IN THE WORLD NOT AS MUCH AS YOU DO WAS THE COOL ANSWER +3005-163399-0023-540: HE MUST A COME AND YOU'VE MISSED HIM ALONG THE ROAD +3005-163390-0006-465: JUST A WORD GENTLEMEN THEY STOPPED TO LISTEN +3997-182399-0016-1170: YES SAH SHE (SHO'LY->SURELY) WAS (PLUMB->PLUM) SCARED +1998-29454-0042-166: YOU ARE GOOD SAID DICKIE I DO LIKE YOU +4198-61336-0025-1260: THE REMNANT OF THE (PHILISTINES->PHILISTINE) SHALL PERISH +2414-159411-0008-331: (IS->*) IT (JUST->IS JEALOUS) THAT HE SHOULD DO SO (OR NO->I KNOW) +3997-180297-0018-1140: NEVER (DO THEY->DID HE) GIVE YOU ADVICE WHICH IS NOT LUCRATIVE +7902-96592-0010-2355: AND I'M HUNGRY TOO TIME I WAS UP I SUPPOSE +7975-280084-0017-2535: THE FIRST ONE MISSED HIM BUT THE SECOND WENT THROUGH HIS RIGHT SHOULDER +3528-168656-0008-690: SOME UNIQUE CHAPLET SOME AUTHENTIC RELIC +3331-159605-0006-615: LET ME SEE HOW CAN I BEGIN +1688-142285-0061-61: I BELIEVE WHAT I SEE AND NO MORE +7975-280057-0005-2460: MY ELDEST BROTHER RICHARD DIED IN EIGHTEEN SIXTY +3538-163622-0017-885: IT LOOKS LIKE THE TRUNK OF A GREAT THICK BIRCH TREE +5442-41169-0010-1710: THEN TOO ONE MUST KEEP UP CONNECTIONS +533-131564-0011-1635: NOW DON'T BURST INTO A TEMPEST AT THAT +8131-117017-0023-2640: (HUH IZZY->HA AS HE) TURNED IT OVER AND SHOOK HIS HEAD +2414-128291-0008-271: THOU ALSO THOU ALSO +3005-163399-0008-525: YOUR UNCLE'S BEEN UP TO (THE->*) TOWN EVERY DAY TO FETCH YOU +2609-156975-0014-375: THE MAKING OF A LOYAL PATRIOT +5442-41169-0025-1725: YES IT'S RATHER STRANGE HE WENT ON +1688-142285-0015-15: HOW TAINTED ASKED HER FATHER +1688-142285-0075-75: (I WILL->OH) COME TO MORROW SAID MARGARET +8188-269290-0044-2775: GOD ANSWERS (TO->*) EACH OF YOU YOU ARE +3764-168671-0017-1050: WHAT REMAINED TO BE DONE WAS A MERE NOTHING +2609-169640-0020-435: NOT (A SOUL->SO) ON BOARD THE (JOHN->JON) WAS (HURT->HEART) +1688-142285-0046-46: (NOUGHT->NOT) WORSE THAN MANY OTHERS I RECKON +3528-168656-0009-691: THEY LOST THEMSELVES IN CONJECTURES +7902-96595-0001-2430: MISTER (RAYSTOKE->GREYSTOKE) SIR DON'T BE A FOOL +1998-29455-0025-196: WHICH THIS (AIN'T->END) NOT BY NO MEANS +3764-168670-0030-1005: HAVE YOU THE KEYS TO THOSE TWO DOORS +6432-63722-0031-2070: AND DONOVAN'S VOICE WAS PLAINLY SKEPTICAL +6432-63722-0016-2055: THE VERY PAIR I WAS GOING TO BUY +1998-29454-0012-136: AND THE (TEA->TEENO) AND (ALL AN->*) THE EGG +3528-168669-0130-825: I WILL MAKE THAT MY SPECIAL BUSINESS +6070-63485-0018-1920: THE (FIACRE->THEACCUS) STARTED +8280-266249-0064-2865: YE ARE ALL ONE IN CHRIST JESUS +8188-269290-0014-2745: BUT DON'T (LOCK->LOOK) ME OUT PLEASE ANNIE +7902-96594-0004-2400: SOON AS THE SIGNAL COMES WE SHALL PUSH OFF +3005-163391-0020-510: WAS PETER (WILKS->WILKES) WELL OFF +2414-128292-0025-315: SAID HE AT LAST SADLY +3528-168669-0040-735: IT CUT MORE OFTEN SHORT +4852-28330-0000-1530: THEY WENT DOWN TO THEIR QUARTERS FIRST +3528-168669-0025-720: YOU KNOW THAT (A->HER) MOTHER DIED THIS MORNING +1998-29455-0039-210: WHAT'S THAT THERE SAID DICKIE +3997-182399-0001-1155: THIS SOUNDED LIKE ANOTHER STORY +3005-163391-0005-495: (GIT->GET) ABOARD SAYS THE KING +4350-10919-0017-1365: AND THEN THE MOTHER WISHES IT HE ADDED +4852-28311-0012-1455: MIKE WAS STANDING ON THE CORNER +2414-159411-0023-346: SHOW ME THE (PLACE->PACE) +3528-168669-0100-795: THE OFFICE FOR THE DEAD WILL THEN BE (SAID->SET) +3005-163389-0011-451: YOU CAN'T BE TOO CAREFUL +8188-269288-0012-2685: LESLIE OPENED THE DOOR +367-130732-0025-945: (BISQUE->FISK) OF CRAWFISH +3005-163390-0022-481: WELL THEY ALL DO JIM +367-130732-0010-930: KING OF (SHELL FISH->SHELLFISH) +8188-269288-0042-2715: DON'T SAID ANNIE +3528-168669-0070-765: BUT IT IS FORBIDDEN +8131-117029-0005-2655: THEY HAD BEEN LUCKY +6432-63722-0001-2040: AND (SHAG->SHAGG) WITH THE FREEDOM OF AN OLD SERVANT STOOD LOOKING AT HIS MASTER AS IF NOT QUITE UNDERSTANDING THE NEW TWIST THE AFFAIRS HAD TAKEN +2609-156975-0015-376: THE STORY OF (MOSES->MOSES'S) BIRTH (AND->IN) EARLY CHILDHOOD IS ONE OF THE MOST INTERESTING CHAPTERS IN BIBLICAL HISTORY +2609-156975-0029-390: AFTER THE CAPTURE OF JERICHO CERTAIN OF THEM WENT UP WITH THE SOUTHERN TRIBES TO CONQUER SOUTHERN PALESTINE +1688-142285-0090-90: SHE SOUNDED TO BE SUCH A CAREFUL ECONOMICAL PERSON THAT I SHOULD LIKE ANY ONE OUT OF THE SAME FAMILY +4294-32859-0005-1290: BUT HONEST MARK FORGOT THAT YOUNG LADIES DO NOT ALWAYS COME OUT QUITE ALONE AND JUMP UNASSISTED INTO THEIR VEHICLES +1998-29455-0010-181: THIS LIFE OF THE RABBIT AS DESCRIBED BY MISTER BEALE WAS THE CHILD'S FIRST GLIMPSE OF FREEDOM I'D LIKE TO BE A RABBIT +3764-168671-0002-1035: A MOURNING COACH IN WHICH COULD BE SEEN A PRIEST IN HIS SURPLICE AND A CHOIR BOY IN HIS RED CAP FOLLOWED +1998-15444-0009-105: APOMORPHINE IS NOT ALLIED IN PHYSIOLOGICAL ACTION TO MORPHINE AND MAY BE GIVEN IN CASES OF NARCOTIC POISONING +4198-12281-0011-1230: BELIEVE THAT IT WAS THE MOST HORRIBLE SPECTACLE THAT EVER ONE SAW +8188-269290-0029-2760: I AM SURE SHE IS ILL SHE WORKS TOO HARD AND SHE BUT THERE I DON'T KNOW THAT I OUGHT TO SAY ANY MORE +6128-63240-0009-1980: AND HE TOOK UP HIS HAT VAGUELY A SOFT BLACK HAT WITH A LOW CROWN AND AN IMMENSE STRAIGHT BRIM +8461-281231-0010-2910: THE BLACK KNIGHT WITH PORTENTOUS STRENGTH FORCED HIS WAY INWARD IN DESPITE OF DE BRACY AND HIS FOLLOWERS +5484-24317-0008-1740: THEN HE WENT DIRECTLY TO THE NEIGHBOURING PALACE THE QUEEN MIGHT HAVE APPEARED ALREADY AND IT WOULD NOT DO TO KEEP HER WAITING +7105-2330-0025-2265: THIS IS NOT A MOMENT FOR STANDING ON DIGNITY HE OBSERVED BLUNTLY MUSICIANS MUST BE SUPPLIED AT ONCE +533-131562-0013-1620: NOT IN MONEY MATTERS EXACTLY IT SEEMS BUT IT'S BEST TO KEEP OUT OF THE WAY OF TEMPTATION +3331-159605-0021-630: SHE REALLY COULD (N'T->NOT) HELP IT IT WAS SO PLEASANT TO SEE HIM AGAIN JUST WHEN SHE WAS FEELING SO LONELY +7105-2330-0010-2250: AND MAY THE LORD HAVE MERCY ON THE (POLL->POLE) A JUNIOR (COUNSEL->CONSUL) EXCLAIMED IRREVERENTLY +2609-169640-0005-420: AT THAT MOMENT I HEARD A NOISE (ONE->WHEN) FAMILIAR TO SEAMEN THAT OF AN OAR FALLING IN (A->THE) BOAT +5764-299665-0011-1815: CAN WE CONCEIVE OF A DEVIL BASE ENOUGH TO PREFER HIS ENEMIES TO HIS FRIENDS +8461-281231-0025-2925: DE BRACY BOWED LOW AND IN SILENCE THREW HIMSELF UPON A HORSE AND GALLOPED OFF THROUGH THE WOOD +2414-128291-0023-286: NOW HOWEVER TAKE LEAVE AT ONCE OF THY (KINE->KIND) THOU STRANGE ONE +1998-15444-0024-120: FIVE A PART OF THIS ETHEREAL SOLUTION IS POURED INTO A WATCH GLASS AND ALLOWED TO EVAPORATE +2414-128291-0007-270: WHO HATH NOT AT PRESENT HIS HEART HIS MOUTH AND HIS EYES FULL OF DISGUST +2033-164916-0004-256: THEN HE ADDED O MY UNCLE HOW SHALL I DO WITH MY BROTHER (SHARRKAN->SHARKAN) +6128-63240-0024-1995: HER VOICE WAS LOW (AND->AN) AGREEABLE A CULTIVATED VOICE AND SHE EXTENDED A SLENDER WHITE HAND TO HER VISITOR WHO REMARKED WITH SOME SOLEMNITY HE FELT A CERTAIN GUILT OF PARTICIPATION IN MISSUS (LUNA'S->LUNER'S) INDISCRETION THAT HE WAS INTENSELY HAPPY TO MAKE HER ACQUAINTANCE +7018-75788-0017-2205: BUT IT BURNETH IN THEIR BELLIES SO THEY CAST IT UP AGAIN AND IT CONGEALETH ON THE SURFACE OF THE WATER WHEREBY ITS (COLOR->COLOUR) AND QUANTITIES ARE CHANGED AND AT LAST THE WAVES CAST IT ASHORE AND THE TRAVELLERS AND MERCHANTS WHO KNOW IT COLLECT IT AND SELL IT +6070-63485-0003-1905: SARAH AND HER BROTHER HAVING AGAIN PASSED BY THE (TAPIS FRANC->TAPI FRANCS) ARRIVED CLOSE TO THE DILAPIDATED HOUSE WHICH WAS PARTLY IN RUINS AND ITS (OPENED->OPEN) CELLARS FORMED A KIND OF GULF ALONG WHICH THE STREET RAN IN THAT DIRECTION +1688-142285-0031-31: ONCE MARGARET HAD GONE INTO THE CHAMBER SOON AFTER DIXON LEFT IT AND FOUND HER MOTHER ON HER KNEES AND AS MARGARET STOLE OUT SHE CAUGHT A FEW WORDS WHICH WERE EVIDENTLY A PRAYER FOR STRENGTH AND PATIENCE TO ENDURE SEVERE BODILY SUFFERING +7018-75789-0027-2235: SO HAVING MADE UP MY MIND I PACKED UP IN BALES A QUANTITY OF PRECIOUS STUFFS SUITED FOR SEA TRADE AND REPAIRED WITH THEM FROM BAGHDAD CITY TO BASSORAH TOWN WHERE I FOUND A SHIP READY FOR SEA AND IN HER A COMPANY OF CONSIDERABLE MERCHANTS +2609-157645-0005-405: ANOTHER EIGHTEENTH CENTURY CLERICAL WORTHY THE FAMOUS DOCTOR PARR AN INVETERATE SMOKER WAS ACCUSTOMED TO DO WHAT (MISTER DISNEY->MITZTER DNEY) PREVENTED ARCHBISHOP BLACKBURN FROM DOING HE SMOKED IN HIS (VESTRY->VETXRY) AT HATTON +3764-168670-0045-1020: TO HAVE HIMSELF NAILED UP IN A CASE AND CARRIED OFF LIKE A BALE OF GOODS TO LIVE FOR A LONG TIME IN A BOX TO FIND AIR WHERE THERE IS NONE TO ECONOMIZE HIS BREATH FOR HOURS TO KNOW HOW TO STIFLE WITHOUT DYING THIS WAS ONE OF JEAN VALJEAN'S GLOOMY TALENTS +4350-9170-0013-1395: EVEN IF THERE WAS ONCE A TIME WHEN OWING TO THE LOW (STANDARD->STANDARDS) OF MORALS AND THE DISPOSITION OF MEN TO VIOLENCE THE EXISTENCE OF AN AUTHORITY TO RESTRAIN SUCH VIOLENCE WAS AN ADVANTAGE BECAUSE THE VIOLENCE OF (*->THE) GOVERNMENT WAS LESS THAN THE VIOLENCE OF INDIVIDUALS ONE CANNOT BUT SEE THAT THIS ADVANTAGE COULD NOT BE LASTING +2414-159411-0007-330: THIS TIGER SAID THE BRAHMAN BEGGED ME TO LET HIM OUT OF HIS CAGE TO DRINK A LITTLE WATER AND HE PROMISED NOT TO HURT ME IF I DID SO BUT NOW THAT I HAVE (LET->LEFT) HIM OUT HE WISHES TO EAT ME +1688-142285-0000-0: THERE'S IRON THEY SAY IN ALL OUR BLOOD AND A GRAIN OR TWO PERHAPS IS GOOD BUT HIS HE MAKES ME HARSHLY FEEL HAS GOT A LITTLE TOO MUCH OF STEEL ANON +3080-5032-0007-555: MY BEST QUALITIES IF I HAVE ANY THAT ARE GOOD SERVED BUT FOR AGGRAVATIONS OF MY FAULT AND I WAS ALLOWED TO HAVE WIT AND UNDERSTANDING AND DISCRETION IN OTHER THINGS THAT IT MIGHT APPEAR I HAD NONE IN THIS +2033-164914-0015-226: NOW WHEN THE FIREMAN HEARD THESE WORDS HE FEARED FOR ZAU AL MAKAN AND WEPT WITH EXCEEDING WEEPING AND SAID TO THE EUNUCH BY ALLAH IT WAS NOT I AND I KNOW HIM NOT +2033-164915-0007-241: SHE SAID IT HATH REACHED ME O AUSPICIOUS KING THAT WHEN THE (STOKER GIRTHED->STALKER GIRDED) HIS ASS FOR FLIGHT AND BESPAKE HIMSELF SAYING (OH->O) WOULD I KNEW WHAT IS BECOME OF HIM +3528-168669-0010-705: BECAUSE (DOM MABILLON->DONEBYON) GIVES FOUR HUNDRED AND SEVENTEEN EPISTLES OF SAINT BERNARD WHILE (MERLONUS HORSTIUS->MERELONA'S HORSES) ONLY GIVES THREE HUNDRED (AND->*) SIXTY SEVEN I DO NOT DESPISE (MERLONUS HORSTIUS->MELONUS HORSES) NEITHER DO I +3080-5040-0025-600: THEY LAUGH TO HEAR ME SAY THAT ONE UNKIND WORD WOULD DESTROY ALL THE SATISFACTION OF MY LIFE AND THAT I SHOULD EXPECT OUR KINDNESS SHOULD INCREASE EVERY DAY IF IT WERE POSSIBLE BUT NEVER LESSEN +7902-96595-0016-2445: GURR TURNED AWAY IMPATIENTLY AGAIN AND SIGNING TO HIS MEN TO FOLLOW THEY ALL BEGAN TO TRAMP UP THE STEEP TRACK LEADING TOWARD THE (HOZE->HOSE) WITH THE RABBITS SCUTTLING AWAY AMONG THE (FURZE->FIRS) AND SHOWING THEIR WHITE COTTONY TAILS FOR A MOMENT AS THEY DARTED DOWN INTO THEIR HOLES +1688-142285-0001-1: MARGARET SAID MISTER HALE AS HE RETURNED FROM SHOWING HIS GUEST (DOWNSTAIRS->DOWN STAIRS) I COULD NOT HELP WATCHING YOUR FACE WITH SOME ANXIETY WHEN MISTER THORNTON MADE HIS CONFESSION OF HAVING BEEN A SHOP BOY +7105-2340-0013-2295: OH WE HAVEN'T SHOWN YOU THE SILVER WEDDING PRESENTS SAID MISSUS PETER SUDDENLY AS THOUGH STRUCK BY A BRILLIANT IDEA FOR ENTERTAINING THE GUEST HERE THEY ALL ARE +1688-142285-0030-30: SHE AND DIXON HELD MYSTERIOUS CONSULTATIONS IN HER BEDROOM FROM WHICH DIXON WOULD COME OUT CRYING AND CROSS AS WAS HER CUSTOM WHEN ANY DISTRESS OF HER MISTRESS CALLED UPON HER SYMPATHY +3005-163389-0010-450: THE CROWD WASHED BACK SUDDEN AND THEN BROKE ALL APART AND WENT TEARING OFF EVERY WHICH WAY AND BUCK HARKNESS HE HEELED IT AFTER THEM LOOKING TOLERABLE CHEAP +6938-70848-0018-2175: BUT THIS WORKED BOTH WAYS THE (LEFT->LAST) SOCIALIST (REVOLUTIONISTS->REVOLUTIONIST) DECIDED THAT IF THE CONGRESS HAD NO POWER OVER THE EXECUTIVE COMMITTEE THEN THE EXECUTIVE COMMITTEE HAD NO POWER OVER THE CONGRESS +5442-41168-0008-1680: LEVIN DID NOT IN THE LEAST UNDERSTAND WHAT WAS THE MATTER AND HE (MARVELED->MARVELLED) AT THE PASSION WITH WHICH IT WAS DISPUTED WHETHER OR NOT THE DECISION ABOUT (FLEROV->FLAIROFF) SHOULD BE PUT TO THE VOTE +1998-15444-0025-121: TO PURIFY IT ADD A SMALL QUANTITY OF DILUTE SULPHURIC ACID AND AFTER EVAPORATING TO THREE QUARTERS OF ITS BULK ADD A SATURATED SOLUTION OF CARBONATE OF POTASH OR SODA +7105-2340-0028-2310: LADY ERNESTINE (PIGEONCOTE->PIECOTE) HIS MOTHER MOVED IN CIRCLES WHICH WERE ENTIRELY BEYOND THEIR COMPASS OR AMBITIONS AND THE SON WOULD PROBABLY ONE DAY BE AN AMBASSADOR +6432-63723-0001-2100: AND WHEN THE FISH WAS LANDED PANTING ON THE GRASS AND (SHAG->SHAGG) HAD BEEN ROUSED FROM HIS SLUMBER TO SLIP THE NOW LIMP FISH INTO THE CREEL COLONEL ASHLEY GAVE A SIGH OF RELIEF AND REMARKED I THINK I SEE IT NOW +7105-2340-0001-2283: WELL THE FAILING STILL EXISTS DOESN'T IT SAID (HER->THE) HUSBAND OR DO YOU SUPPOSE A REFORM OF CHARACTER IS ENTAILED ALONG WITH THE ESTATE +8280-266249-0052-2853: A DEEP GROAN STARTLED THEM AND THEY SPRANG TO THEIR FEET PALE AND TREMBLING WITH SUDDEN TERROR EACH HOLDING HIS BREATH AND STRAINING HIS EAR TO CATCH A REPETITION OF THE DREAD SOUND +3538-163622-0006-874: WHEN HE HAD RUN AFTER THE FOALS FOR A LONG LONG TIME AND WAS HOT AND TIRED HE PASSED BY A CLEFT IN THE ROCK WHERE AN OLD WOMAN WAS SITTING SPINNING WITH A DISTAFF AND SHE CALLED TO HIM +2414-128291-0011-274: WANTON (AVIDITY->AID DUTY) BILIOUS ENVY CAREWORN REVENGE (POPULACE->POPULOUS) PRIDE ALL (THESE->ALWAYS) STRUCK MINE EYE +4294-14317-0012-1278: WHEN HE HAD HEARD THIS SPEECH THE DUKE ROSE UP IN ANGER AND SENT FOR BERNARDONE WHO WAS FORCED TO TAKE FLIGHT AS FAR AS VENICE HE AND ANTONIO LANDI WITH HIM +8461-258277-0001-2868: BUT HE ANSWERED NEEDS MUST (I->THY) HAVE (ZAYNAB->THY NAB) ALSO NOW SUDDENLY THERE CAME A RAP AT THE DOOR AND THE MAID SAID WHO IS AT THE DOOR +6128-63241-0000-1999: POOR RANSOM ANNOUNCED THIS FACT TO HIMSELF AS IF HE HAD MADE A GREAT DISCOVERY BUT IN REALITY HE HAD NEVER BEEN SO (BOEOTIAN->BEE OTIAN) AS AT THAT MOMENT +7975-280085-0002-2538: BOB'S SHATTERED ELBOW WAS REQUIRING FREQUENT ATTENTION AND THAT NIGHT WE MADE ONLY NINE MILES (AND->ON) MONDAY MONDAY NIGHT AND TUESDAY WE SPENT IN A DESERTED (FARM HOUSE->FARMHOUSE) CLOSE TO (MANKATO->MAN CATO) +6938-70848-0022-2179: THE FIRST STAGE WAS THE CRUSHING OF AUTOCRACY AND THE CRUSHING OF THE POWER OF THE INDUSTRIAL (CAPITALISTS->CAPITALIST) AND (LAND OWNERS->THE LANDOWNERS) WHOSE INTERESTS ARE CLOSELY RELATED +3538-142836-0017-843: IN ALL THE OPERATIONS FOR PRESERVE MAKING WHEN THE PRESERVING PAN IS USED IT SHOULD NOT BE PLACED ON THE FIRE BUT ON A TRIVET UNLESS THE JAM IS MADE ON A HOT PLATE WHEN THIS IS NOT NECESSARY +3331-159605-0009-618: I DON'T THINK IT WAS HIS WEALTH ACCOMPLISHMENTS OR POSITION THAT MOST ATTRACTED POLLY THOUGH THESE DOUBTLESS POSSESSED A GREATER INFLUENCE THAN SHE SUSPECTED +3080-5032-0011-559: I CAN NO SOONER GIVE YOU SOME LITTLE HINTS (WHEREABOUTS->WHEREABOUT) THEY LIVE BUT YOU KNOW THEM PRESENTLY AND I MEANT YOU SHOULD BE BEHOLDING TO ME FOR YOUR ACQUAINTANCE +5442-41168-0026-1698: TWO NOBLE GENTLEMEN WHO HAD A WEAKNESS FOR STRONG DRINK HAD BEEN MADE DRUNK BY THE PARTISANS OF SNETKOV AND A THIRD HAD BEEN ROBBED OF HIS UNIFORM +3997-180294-0026-1114: AT THE BEGINNING OF THE THIRD ACT I HEARD THE DOOR OF THE BOX ON WHICH MY EYES HAD BEEN ALMOST CONSTANTLY FIXED OPEN AND MARGUERITE APPEARED +4350-9170-0031-1413: EVERY INCREASE IN THE ARMY OF ONE STATE WITH THE AIM OF SELF DEFENSE AGAINST ITS SUBJECTS BECOMES A SOURCE OF DANGER FOR NEIGHBORING STATES AND CALLS FOR A SIMILAR INCREASE IN THEIR ARMIES +5484-24318-0008-1774: BUT IF HE WERE DESTINED TO MEET HIS (MYRTILUS->MERTALUS) AND HIS MOTHER IN THE WORLD BEYOND THE GRAVE WHAT HAD HE NOT TO TELL THEM HOW SURE HE WAS OF FINDING A JOYFUL RECEPTION THERE FROM BOTH +4294-35475-0003-1294: IN SOOTH THOU SHALT HAVE ONE BUT IT SHALL BE ONE BEFITTING THY MAIDEN SIZE AND COURAGE IF SO SMALL A WEAPON CAN BE FOUND IN ALL MY KINGDOM +2609-156975-0031-392: DO THE EARLIEST HEBREW TRADITIONS IMPLY THAT THE ANCESTORS OF THE ISRAELITES WERE WORSHIPPERS OF JEHOVAH +5764-299665-0088-1892: NATURE PRODUCES (*->IT) WITHOUT PURPOSE SUSTAINS WITHOUT INTENTION AND DESTROYS WITHOUT THOUGHT +8188-269288-0014-2687: THESE LETTERS HAVE JUST COME FOR YOU AND ANNIE COLCHESTER SHE SAID AND AS I WAS COMING (UPSTAIRS->UP STAIRS) I THOUGHT I WOULD LEAVE THEM WITH YOU +8461-281231-0011-2911: TWO OF THE FOREMOST INSTANTLY FELL AND THE REST GAVE WAY NOTWITHSTANDING ALL (THEIR LEADERS->THE LEADER'S) EFFORTS TO STOP THEM +3764-168670-0017-992: THAT HE FAUCHELEVENT WAS TO NAIL UP THE COFFIN IN THE CELL RAISE THE STONE IN THE CHAPEL AND (LOWER->BLOWER) THE CORPSE INTO THE VAULT +1998-15444-0012-108: THE TUBE SHOULD BE EXAMINED TO SEE THAT IT IS NOT BROKEN OR CRACKED AS ACCIDENTS HAVE HAPPENED FROM NEGLECTING THIS PRECAUTION +8280-266249-0050-2851: AN INTENSE VOICELESS EXCITEMENT POSSESSED THE PLAYERS FOR THE GAME WAS A CLOSE ONE AND THE STAKES WERE VERY HEAVY +6432-63723-0048-2147: I BEG YOUR PARDON HE SAID IN THE CULTURED TONES HE KNEW SO WELL HOW TO USE YET OF WHICH HE MADE SO LITTLE USE OF LATE +6070-63485-0005-1907: NO SAID THE OLD BRUTE GRUMBLINGLY NO NOT ONE RING WHAT A SHAME +3997-180297-0020-1142: SUCH A MAN I FOUND IN THE DUKE BUT THE DUKE IS OLD AND OLD AGE NEITHER PROTECTS NOR CONSOLES +3331-159609-0006-662: FORGIVE ME POLLY BUT I CAN'T HELP SAYING IT FOR IT IS THERE AND I WANT TO BE AS TRUE TO YOU AS YOU WERE TO ME IF I CAN +3528-168669-0042-737: IN HER LIFETIME MOTHER CRUCIFIXION MADE CONVERTS AFTER HER DEATH SHE WILL PERFORM MIRACLES SHE WILL +7902-96591-0007-2327: THEN AS ARCHY STOOD IN THE DARK LITERALLY AGHAST WITH ASTONISHMENT HE HEARD THE FAINT RUSTLING ONCE MORE AND AGAIN ALL WAS SILENT +6128-63240-0010-1981: WELL SO IT IS THEY ARE ALL WITCHES AND WIZARDS MEDIUMS AND SPIRIT RAPPERS AND ROARING RADICALS +4198-61336-0012-1247: ITS FALL MAY NOT HAVE BEEN UNCONNECTED WITH THE TREND OF EVENTS IN ASSYRIA DURING THE CLOSING YEARS OF THE MIDDLE EMPIRE +2609-169640-0007-422: HE WAS TOO MUCH OF A SEAMAN TO REQUIRE A SECOND LOOK IN ORDER TO ASCERTAIN WHAT WAS TO BE DONE +3005-163389-0012-452: THEY ARGUED AND TRIED TO KEEP HIM OUT BUT HE WOULDN'T LISTEN AND THE WHOLE SHOW COME TO A STANDSTILL +8280-266249-0021-2822: AND THEN SUCH POWERFUL ACIDS PRODUCE INTENSE IRRITATION AND THIRST THIRST WHICH WATER DOES NOT QUENCH +6432-63722-0048-2087: AT BEST HE WOULD GET OFF WITH A SCOTCH VERDICT OF NOT (PROVEN->PROVING) BUT HE DOESN'T WANT THAT NOR DO I +3080-5040-0012-587: I TOLD YOU IN MY LAST THAT MY (SUFFOLK->SUFFOLED) JOURNEY WAS LAID ASIDE AND THAT INTO KENT HASTENED +7975-280084-0003-2521: J S ALLEN WHOSE HARDWARE STORE WAS NEAR TRIED TO GO INTO THE BANK BUT MILLER ORDERED HIM AWAY AND HE RAN AROUND THE CORNER SHOUTING +7018-75788-0004-2192: DO AS THEY DO AND BELIKE THOU SHALT GAIN WHAT MAY FURTHER THY RETURN VOYAGE TO THY NATIVE LAND +4350-9170-0000-1382: EDUCATED PEOPLE OF THE UPPER CLASSES ARE TRYING TO STIFLE THE EVER GROWING SENSE OF THE NECESSITY OF TRANSFORMING THE EXISTING SOCIAL ORDER +7902-96592-0011-2356: NO HE WAS NOT DREAMING FOR HE WAS LOOKING OUT ON THE SEA OVER WHICH A FAINT MIST HUNG LIKE WREATHS OF SMOKE +5484-24317-0024-1756: HE WINKED AT HER AND MADE A SIGNIFICANT GESTURE AS HE SPOKE AND THEN INFORMED THE BLIND ARTIST HOW GRACIOUSLY (ARSINOE->ARSENAL) HAD REMEMBERED HIM WHEN SHE HEARD OF THE REMEDY BY WHOSE AID MANY A WONDERFUL CURE OF BLIND (EYES->EYE) HAD BEEN MADE IN RHODES +7105-2330-0041-2281: THE LOCAL TRADE UNIONISTS TOOK OFFENCE AT THE FACT OF CABINET MINISTERS HAVING PERSONALLY ACTED AS STRIKE BREAKERS AND EVEN THE RELEASE OF PLATTERBAFF FAILED TO PACIFY THEM +7975-280085-0015-2551: ONE OF THE FELLOWS IN THE OUTER LINE NOT BRAVE ENOUGH HIMSELF TO JOIN THE VOLUNTEERS WHO HAD COME IN TO BEAT US OUT WAS NOT DISPOSED TO BELIEVE IN THE SURRENDER AND HAD HIS GUN LEVELLED ON BOB IN SPITE OF THE HANDKERCHIEF WHICH WAS WAVING AS A FLAG OF TRUCE +7018-75788-0003-2191: UPON THIS HE BROUGHT ME A COTTON BAG AND GIVING IT TO ME SAID TAKE THIS BAG AND FILL IT WITH PEBBLES FROM THE BEACH AND GO FORTH WITH A COMPANY OF THE TOWNSFOLK TO WHOM I WILL GIVE A CHARGE RESPECTING THEE +533-131562-0014-1621: HERE (BENSON->BASSOM) ENTERED (WITH->*) THE CANDLES AND THERE FOLLOWED A BRIEF INTERVAL OF SILENCE I SITTING STILL IN MY CHAIR AND HE STANDING WITH HIS BACK TO THE FIRE SILENTLY TRIUMPHING IN MY DESPAIR +367-130732-0027-947: MINCE OR CUT INTO SMALL DICE A CARROT AN ONION ONE HEAD OF CELERY AND A FEW PARSLEY ROOTS AND TO THESE ADD A BAY LEAF A SPRIG OF THYME A LITTLE (MINIONETTE->MANONET) PEPPER AND TWO OUNCES OF BUTTER +6432-63723-0032-2131: MEANWHILE COLONEL ASHLEY WAS A VERY BUSY MAN AND TO NO ONE DID HE TELL VERY MUCH ABOUT HIS ACTIVITIES HE SAW DARCY FREQUENTLY AT THE JAIL AND TO THAT YOUNG MAN'S PLEADINGS THAT SOMETHING (*->TO) BE DONE ALWAYS RETURNED THE ANSWER +3764-168670-0046-1021: YOU SURELY MUST HAVE A GIMLET YOU WILL MAKE A FEW HOLES HERE AND THERE AROUND MY MOUTH AND YOU WILL NAIL THE TOP PLANK ON LOOSELY GOOD AND WHAT IF YOU SHOULD HAPPEN TO COUGH OR TO SNEEZE +1998-29455-0026-197: THE NIGHT IS FULL OF INTERESTING LITTLE SOUNDS THAT WILL NOT AT FIRST LET YOU SLEEP THE RUSTLE OF LITTLE (WILD->WHITE) THINGS IN THE HEDGES THE BARKING OF DOGS IN DISTANT FARMS THE (CHIRP->CHIRRUP) OF CRICKETS AND THE CROAKING OF FROGS +7018-75789-0013-2221: SO I ROSE WITHOUT STAY OR DELAY AND KISSED THE KING'S HAND AND ACQUAINTED HIM WITH MY LONGING TO SET OUT WITH THE MERCHANTS FOR THAT I PINED AFTER MY PEOPLE AND MINE OWN LAND +6070-86744-0015-1936: I WILL CONFESS TO YOU ALBERT REPLIED FRANZ THE COUNT IS A VERY SINGULAR PERSON AND THE APPOINTMENT YOU HAVE MADE TO MEET HIM IN PARIS FILLS ME WITH A THOUSAND APPREHENSIONS +1998-15444-0011-107: IN USING THE ELASTIC STOMACH TUBE SOME FLUID SHOULD BE INTRODUCED INTO THE STOMACH BEFORE ATTEMPTING TO EMPTY IT OR A PORTION OF THE MUCOUS MEMBRANE MAY BE SUCKED INTO THE APERTURE +4198-12281-0012-1231: O THE HOLY LADY (NYTOUCH->KNIGHTS) SAID ONE THE GOOD (SANCTESS->SANCTUS) O OUR LADY OF (SUCCOURS->SUCCOURUS) SAID ANOTHER HELP HELP +1998-15444-0026-122: BOIL THE FINELY DIVIDED SUBSTANCE WITH ABOUT ONE EIGHTH ITS BULK OF PURE HYDROCHLORIC ACID ADD FROM TIME TO TIME POTASSIC CHLORATE UNTIL THE SOLIDS ARE REDUCED TO A STRAW YELLOW FLUID +3080-5040-0010-585: BY THE NEXT I SHALL BE GONE INTO KENT AND MY OTHER JOURNEY IS LAID ASIDE WHICH I AM NOT DISPLEASED AT BECAUSE IT WOULD HAVE BROKEN OUR INTERCOURSE VERY MUCH +3080-5032-0022-570: I KNOW NOT HOW MY BROTHER COMES TO BE SO WELL INFORMED AS YOU SAY BUT I AM CERTAIN HE KNOWS THE UTMOST OF THE INJURIES YOU HAVE RECEIVED FROM HER +7975-280076-0001-2490: IT WAS CLAIMED BY PEOPLE OF LIBERTY THAT THEY POSITIVELY RECOGNIZED AMONG THE ROBBERS (OLL->ALL) SHEPHERD RED (MONKERS->MOCKERS) AND BUD (PENCE->PANTS) WHO HAD SEEN SERVICE WITH (QUANTRELL->QUANTRAILLE) +5764-299665-0012-1816: HOW CAN WE ACCOUNT FOR THE WILD BEASTS THAT DEVOUR HUMAN BEINGS FOR THE FANGED SERPENTS WHOSE BITE IS DEATH +2033-164914-0000-211: REPLIED HE OF A TRUTH I HEARD HIM NOT AND I WOT HIM NOT AND FOLKS ARE ALL SLEEPING +533-1066-0004-1560: OUT OF DEFERENCE TO LIDDY THEY WASHED (THEIR->HER) DISHES ONCE A DAY AND THEY (CONCOCTED->CONCLUDED) QUEER MESSES ACCORDING TO THEIR SEVERAL ABILITIES +533-1066-0005-1561: MISS (INNES->EANES) HE SAID STOPPING ME AS I WAS ABOUT TO GO TO MY ROOM (UP STAIRS->UPSTAIRS) HOW ARE YOUR NERVES (TONIGHT->TO NIGHT) +3080-5032-0008-556: TIS A STRANGE CHANGE AND I AM VERY SORRY FOR IT BUT I'LL SWEAR I KNOW NOT HOW TO HELP IT +3764-168671-0048-1081: BUT HE HAD REVERSES HE HAD LOSSES ON CHANGE I WAS OBLIGED TO RENOUNCE THE PROFESSION OF AUTHOR BUT I AM STILL A PUBLIC WRITER +3764-168670-0000-975: THE STRIDES OF A LAME MAN ARE LIKE THE OGLING GLANCES OF A ONE EYED MAN THEY DO NOT REACH THEIR GOAL VERY PROMPTLY +7902-96591-0020-2340: SYMPATHY AND PITY FOR THE DWELLERS IN THE (HOZE->HOES) WERE COMPLETELY GONE NOW AND HE SET HIS TEETH FAST AND MENTALLY CALLED HIMSELF A WEAK IDIOT FOR EVER THINKING ABOUT SUCH PEOPLE +4350-9170-0014-1396: BETWEEN THE MEMBERS OF ONE STATE SUBJECT TO A SINGLE AUTHORITY THE STRIFE BETWEEN (*->THE) INDIVIDUALS SEEMS STILL LESS AND THE LIFE OF THE STATE SEEMS EVEN MORE SECURE +6070-86744-0014-1935: WHAT IS THE MATTER ASKED ALBERT OF FRANZ WHEN THEY HAD RETURNED TO THEIR OWN APARTMENTS YOU SEEM MORE THAN COMMONLY THOUGHTFUL +8188-274364-0001-2790: IN THE GOVERNMENT OF IRELAND HIS ADMINISTRATION HAD BEEN EQUALLY PROMOTIVE OF HIS MASTER'S INTEREST AND THAT OF THE SUBJECTS COMMITTED TO HIS CARE +4852-28312-0000-1470: OF THE MANY TIMES HE HAD EXAMINED MISTER WICKER'S WINDOW AND PORED OVER THE ROPE (THE->TO) SHIP AND THE NUBIAN BOY HE HAD NEVER GONE INTO MISTER WICKER'S SHOP +1998-29454-0011-135: WHEN IT WAS OVER THE MAN ASKED DICKIE IF HE COULD WALK A LITTLE WAY AND WHEN DICKIE SAID HE COULD THEY SET OUT IN THE MOST FRIENDLY WAY SIDE BY SIDE +5764-299665-0056-1860: THE QUEEN RECEIVED THE BIBLE KISSED IT AND PLEDGED HERSELF TO DILIGENTLY READ THEREIN +5764-299665-0057-1861: IN OTHER WORDS IT WAS JUST AS FIENDISH JUST AS INFAMOUS AS THE CATHOLIC SPIRIT +5764-299665-0071-1875: THE FIRST STONE IS THAT MATTER SUBSTANCE CANNOT BE DESTROYED CANNOT BE ANNIHILATED +8461-278226-0011-2895: SHE TURNED TO THE FRENCH ARTIST PRESENTLY AND ASKED HIM WHERE THE ELDER MISTER (KERSTALL->CRISTEL) LIVED AND IF THERE WAS ANY POSSIBILITY OF SEEING HIM +3764-168670-0031-1006: NO I HAVE THE KEY TO THE DOOR WHICH COMMUNICATES WITH THE CONVENT THE PORTER HAS THE KEY TO THE DOOR WHICH COMMUNICATES WITH THE CHURCH +2033-164915-0006-240: THE CHAMBERLAIN CALLED (THE->*) CASTRATO AND CHARGED HIM TO DO ACCORDINGLY SO HE REPLIED I HEAR AND I OBEY AND HE TOOK HIS PAGES WITH HIM AND WENT OUT IN SEARCH OF THE (STOKER->STALKER) TILL HE FOUND HIM IN THE REAR OF THE CARAVAN (GIRTHING->GIRDING) HIS ASS AND PREPARING FOR FLIGHT +3997-180294-0007-1095: EDUCATION FAMILY FEELING THE SENSE OF DUTY THE FAMILY ARE STRONG SENTINELS BUT THERE ARE NO SENTINELS SO VIGILANT AS NOT TO BE DECEIVED BY A GIRL OF SIXTEEN TO WHOM NATURE BY THE VOICE OF THE MAN SHE LOVES GIVES THE FIRST (COUNSELS->COUNSEL) OF LOVE ALL THE MORE ARDENT BECAUSE THEY SEEM SO PURE +5484-24318-0019-1785: HE MIGHT NOW HAVE BEEN PERMITTED TO BIND FOREVER TO HIS LIFE THE WOMAN WHO HAD JUST RESCUED HIM FROM THE GREATEST DANGER BUT THE CONFESSION HE MUST MAKE TO HIS FELLOW ARTISTS IN THE (PALAESTRA->PILASTER OF) THE FOLLOWING MORNING STILL SEALED HIS LIPS YET IN THIS HOUR HE FELT THAT HE WAS UNITED TO HER AND OUGHT NOT TO CONCEAL WHAT AWAITED HIM SO OBEYING A STRONG IMPULSE HE EXCLAIMED YOU KNOW THAT I LOVE YOU +2414-165385-0001-360: (*->AND) THAT HIS SOUL MIGHT AFTERWARDS OCCUPY SUCH A STATION AS WOULD BE MOST SUITABLE TO HIS CHARACTER IT WAS (SENTENCED->INTENSE) TO INHABIT THE BODY OF THAT (FINICAL->FINNICAL) GRINNING AND MISCHIEVOUS LITTLE (MIMICK->MIMIC) WITH FOUR LEGS WHICH YOU NOW BEHOLD BEFORE YOU +2033-164916-0003-255: (ZAU AL->ZAUAM) MAKAN BOWED HIS HEAD AWHILE AND THEN SAID I ACCEPT (THIS->THE) POSITION FOR INDEED THERE WAS NO REFUSING AND HE WAS CERTIFIED THAT THE CHAMBERLAIN HAD COUNSELLED HIM WELL AND WISELY AND SET HIM ON THE RIGHT WAY +5442-32873-0013-1665: HE SLACKENED HIS PACE AND TAPPED SHARPLY AT THE LITTLE WINDOW OF THAT MODEST POST OFFICE AT WHICH THE YOUNG LADIES IN THE PONY CARRIAGE HAD PULLED UP THE DAY BEFORE AND WITHIN WHICH LUKE (WAGGOT->WAGGETT) WAS WONT TO SLEEP IN A SORT OF WOODEN BOX THAT FOLDED UP (AND->WHAT) APPEARED TO BE A CHEST OF DRAWERS ALL DAY +4294-14317-0009-1275: I SAID MY LORD I THANK YOU AND BEG YOU TO CONDESCEND SO FAR AS TO LISTEN TO FOUR WORDS IT IS TRUE THAT HE LENT ME A PAIR OF OLD SCALES TWO ANVILS AND THREE LITTLE HAMMERS WHICH ARTICLES I BEGGED HIS WORKMAN GIORGIO DA CORTONA FIFTEEN DAYS AGO TO FETCH BACK +3005-163390-0007-466: WHAT WE WANT IS TO GO OUT OF HERE QUIET AND TALK THIS SHOW UP AND SELL THE REST OF THE TOWN +8280-266249-0049-2850: THEN MISTER (LILBURN->LOWBORN) WAKING FROM HIS FIRST SLEEP IN A STATEROOM NEAR BY THOUGHT HE WOULD BREAK IT UP ONCE MORE +1998-29454-0026-150: HE TOOK OUT OF HIS POCKET (A NEW->IN YOUR) ENVELOPE A NEW SHEET OF PAPER AND A NEW PENCIL READY SHARPENED BY MACHINERY +3331-159609-0005-661: (NO->NOW) I (M->AM) ONLY TIRED HAD A GOOD DEAL TO DO LATELY AND THE DULL WEATHER MAKES ME JUST A (TRIFLE->TRAVEL) BLUE +4294-35475-0000-1291: BUT THE MIDDLE SON WAS LITTLE AND LORN HE WAS NEITHER DARK NOR FAIR HE WAS NEITHER HANDSOME NOR STRONG +3331-159605-0036-645: (MINNIE->MINNY) SAID THE OTHER DAY SHE WISHED SHE WAS A PIGEON SO SHE COULD PADDLE IN THE (PUDDLES->BOTTLES) AND NOT FUSS ABOUT RUBBERS +6432-63723-0046-2145: THERE WERE THREE OF THEM THE CENTER FIGURE BEING THAT OF HARRY KING AND HE WAS VERY MUCH INTOXICATED +6432-63722-0047-2086: RATHER A HYPOTHETICAL QUESTION COLONEL BUT I SHOULD SAY IT MIGHT BE A FIFTY FIFTY PROPOSITION +3538-142836-0014-840: MARMALADES JAMS AND FRUIT (PASTES->PASTE) ARE OF THE SAME NATURE AND ARE NOW IN VERY GENERAL REQUEST +6128-63241-0012-2011: HE WAS TOO SIMPLE TOO (MISSISSIPPIAN->MISSISSIPPIENT) FOR THAT SHE WAS ALMOST DISAPPOINTED +367-293981-0007-961: IF THEY (DON'T->DO NOT) LET THEMSELVES BE SEEN THEY LET THEMSELVES BE FELT SAID SANCHO IF NOT LET MY SHOULDERS SPEAK TO THE POINT +4852-28312-0001-1471: SO NOW ALONE UNTIL SOMEONE SHOULD ANSWER THE BELL HE LOOKED EAGERLY IF UNEASILY AROUND HIM +3997-182399-0002-1156: HE WAS CURIOUS ABOUT THAT BLACK HEADED COUSIN OF (OL MISTAH->OLD MISTER) BUZZARD VERY CURIOUS INDEED +3080-5040-0026-601: WE GO ABROAD ALL DAY AND PLAY ALL NIGHT AND SAY OUR PRAYERS WHEN WE HAVE TIME +4350-9170-0059-1441: SO MUCH FOR THE ADVANTAGES AND DISADVANTAGES OF BOTH LINES OF CONDUCT FOR A MAN OF THE WEALTHY (CLASSES AN->CLASS AND) OPPRESSOR +1998-29454-0041-165: IF YOU'RE CLEAN THEY SAY HONEST POVERTY (AN->AND) IF YOU'RE DIRTY THEY SAY SERVE YOU RIGHT +3538-163622-0002-870: HAST THOU WATCHED FAITHFULLY AND WELL THE WHOLE DAY LONG SAID THE KING WHEN THE LAD CAME INTO HIS PRESENCE IN THE EVENING +367-293981-0006-960: IT CANNOT BE THE (MOOR->MORE) ANSWERED DON QUIXOTE FOR THOSE UNDER ENCHANTMENT DO NOT LET THEMSELVES BE SEEN BY ANYONE +4852-28330-0001-1531: GUESS MISTER FINNEY WENT TO HIS QUARTERS I DON'T REMEMBER SEEING HIM CROSS THE DECK OR COME OVER THAT WAY AT ALL +5764-299665-0086-1890: THEY LIVE BY FRAUD AND VIOLENCE AND BEQUEATH THEIR VICES TO THEIR CHILDREN +7018-75788-0002-2190: HEARING THIS I WAS SORE TROUBLED REMEMBERING WHAT I HAD BEFORE SUFFERED FROM THE APE KIND +4294-35475-0014-1305: HE LIFTED THE SCISSORS AND WITH ONE STROKE DESTROYED THE WEB AND GAVE THE FLY ITS FREEDOM +8280-266249-0004-2805: THERE ARE SOME NICE LOOKING PEOPLE ON BOARD REMARKED MISTER TRAVILLA IN AN UNDERTONE TO HIS WIFE +3764-168671-0003-1036: BEHIND IT CAME AN OLD MAN IN THE GARMENTS OF A LABORER WHO LIMPED ALONG +1688-142285-0045-45: BESSY WAS SILENT IN HER TURN FOR A MINUTE OR TWO THEN SHE REPLIED +6432-63722-0046-2085: WHAT ARE THE CHANCES OF GETTING HIM OFF LEGALLY IF WE GO AT IT FROM A NEGATIVE STANDPOINT ASKED THE COLONEL +367-130732-0011-931: ONE HAS TO COME TO SAN FRANCISCO TO PARTAKE OF THE KING OF (SHELL FISH->SHELLFISH) THE MAMMOTH PACIFIC CRAB +5764-299665-0026-1830: MAN ADVANCES AND NECESSARILY ADVANCES THROUGH EXPERIENCE +3331-159605-0037-646: NOW DON'T BE AFFECTED POLLY BUT JUST TELL ME LIKE A DEAR HAS (N'T->NOT) HE PROPOSED +8280-266249-0035-2836: HIS VICTIM WHO HAD BEEN LOOKING ON DURING THE LITTLE SCENE AND LISTENING TO THE MYSTERIOUS VOICE IN SILENT WIDE EYED WONDER AND FEAR NOW ROSE HASTILY HIS FACE DEATHLY PALE WITH TREMBLING HANDS GATHERED UP THE MONEY HE HAD STAKED AND HURRYING (INTO->TO) HIS (STATE ROOM->STATEROOM) LOCKED HIMSELF IN +8280-266249-0065-2866: WE FEEL MY HUSBAND AND I THAT WE ARE ONLY THE STEWARDS OF HIS BOUNTY AND (THAT->*) BECAUSE HE HAS SAID INASMUCH AS YE HAVE DONE IT UNTO ONE OF THE LEAST OF THESE MY BRETHREN YE HAVE DONE IT UNTO ME IT IS THE GREATEST PRIVILEGE AND DELIGHT TO DO ANYTHING FOR HIS PEOPLE +6070-63485-0004-1906: IN AN INSTANT THE SCHOOLMASTER WITH A LEAP RESEMBLING IN STRENGTH AND AGILITY THE SPRING OF A TIGER SEIZED (SEYTON->SETAN) WITH ONE HAND BY THE THROAT AND EXCLAIMED YOUR MONEY OR I WILL FLING YOU INTO THIS HOLE +2033-164914-0016-227: SO GO THOU TO THY STATION AND IF THOU AGAIN MEET ANY ONE AFTER THIS HOUR RECITING AUGHT OF POETRY WHETHER HE BE NEAR OR FAR IT WILL BE I OR SOME ONE I KNOW AND THOU SHALT NOT LEARN OF HIM BUT BY ME +1688-142285-0032-32: BUT THOUGH SHE RECEIVED CARESSES AND FOND WORDS BACK AGAIN IN SUCH PROFUSION AS WOULD HAVE GLADDENED HER FORMERLY YET SHE FELT THAT THERE WAS A SECRET WITHHELD FROM HER AND SHE BELIEVED IT BORE SERIOUS REFERENCE TO HER MOTHER'S HEALTH +6070-86745-0000-1951: THEN SHOULD ANYTHING APPEAR TO MERIT A MORE MINUTE EXAMINATION ALBERT DE MORCERF COULD FOLLOW UP HIS RESEARCHES BY MEANS OF A SMALL GATE SIMILAR TO THAT CLOSE TO THE CONCIERGE'S DOOR AND WHICH MERITS A PARTICULAR DESCRIPTION +4294-9934-0003-1321: TO STAND BETWEEN TWO RELIGIONS FROM ONE OF WHICH YOU HAVE NOT AS YET EMERGED AND ANOTHER INTO WHICH YOU HAVE NOT YET ENTERED IS INTOLERABLE AND TWILIGHT IS PLEASING ONLY TO BAT LIKE SOULS +5484-24317-0009-1741: HITHERTO THE MERCHANT HAD BEEN INDUCED IT IS TRUE TO ADVANCE LARGE SUMS OF MONEY TO THE QUEEN BUT THE LOYAL DEVOTION WHICH HE SHOWED TO HER ROYAL HUSBAND HAD RENDERED IT IMPOSSIBLE TO GIVE HIM EVEN A HINT OF THE CONSPIRACY +3997-180297-0019-1141: IT MEANS LITTLE ENOUGH TO THEM THAT WE SHOULD HAVE TEN LOVERS EXTRA AS LONG AS THEY GET DRESSES OR A BRACELET OUT OF THEM AND THAT THEY CAN DRIVE IN OUR CARRIAGE FROM TIME TO TIME OR COME TO OUR BOX AT THE THEATRE +8280-266249-0020-2821: IN A SMALL COUNTRY TOWN SEVEN OF THESE MYSTERIOUS PROVIDENCES OCCURRED WITHIN THE CIRCUIT OF A MILE ALL DIRECTLY TRACEABLE TO TOBACCO AND ANY PHYSICIAN ON A FEW MOMENTS REFLECTION CAN MATCH THIS FACT BY HIS OWN OBSERVATION +5764-299665-0072-1876: IF THESE (CORNER STONES->CORNERSTONES) ARE FACTS IT FOLLOWS AS A NECESSITY THAT MATTER AND FORCE ARE FROM (AND->END) TO ETERNITY THAT THEY CAN NEITHER BE INCREASED NOR DIMINISHED +3528-168669-0115-810: I CANNOT WORK VERY FAST I AM INFIRM THAT IS WHY I REQUIRE AN ASSISTANT I LIMP +3528-168669-0011-706: (MERIT->MARRIAGE) CONSISTS IN WORKING ACCORDING TO ONE'S STRENGTH A CLOISTER IS NOT A (DOCK YARD->DOCKYARD) +2414-128292-0026-316: THY DANGER IS (NOT->BUT) SMALL THOU FREE SPIRIT AND WANDERER +6432-63723-0016-2115: NO BUT HE WILL OR I'LL SUE HIM AND GET JUDGMENT OH HE'LL PAY ALL RIGHT +4198-12259-0026-1201: I WAS WONT HERETOFORE TO DRINK OUT ALL BUT NOW I LEAVE NOTHING +3764-168670-0016-991: THAT THE PRIORESS AND THE VOCAL MOTHERS INTENDED TO FULFIL THE WISH OF THE DECEASED +4852-28319-0013-1515: THE FIGURE'S SHOES CARVED IN SOME EASTERN STYLE HAD CURVED UP POINTING TOES +8188-269288-0057-2730: OH THIS WILL KILL ME MY HEART WILL BREAK THIS WILL KILL ME +1688-142285-0091-91: MY DEAR SAID MISTER HALE ALARMED PRAY DON'T GO OFF ON THAT IDEA +7018-75789-0012-2220: SO I CONSORTED WITH THE CHIEF OF THE ISLANDERS AND THEY PAID ME THE UTMOST RESPECT +4198-61336-0010-1245: ARPAD WAS CAPTURED AND (MATI->MEANT TO) ILU DEPOSED AND PROBABLY PUT TO DEATH +3005-163390-0021-480: TAKE (THEM->EM) ALL AROUND THEY'RE A MIGHTY ORNERY LOT IT'S THE WAY THEY'RE RAISED +3538-142836-0000-826: GENERAL OBSERVATIONS ON PRESERVES (CONFECTIONARY->CONFECTIONERY) ICES AND DESSERT DISHES +3005-163391-0021-511: WHEN WE STRUCK THE BOAT SHE WAS ABOUT DONE LOADING AND PRETTY SOON SHE GOT OFF +8280-266249-0019-2820: NOTICE THE MULTITUDE OF SUDDEN DEATHS AND SEE HOW MANY ARE SMOKERS AND CHEWERS +8131-117016-0010-2565: I'VE GOT A FREE HAND AND WE'RE GOING TO RUN THIS THE WAY WE WOULD ON EARTH +3538-163624-0008-901: SO (REGIN->REGAN) MADE A SWORD AND SIGURD TRIED IT WITH A BLOW ON A LUMP OF IRON AND THE SWORD BROKE +3997-180297-0004-1126: MY DEAR PRUDENCE I ANSWERED YOU DO NOT KNOW WHAT YOU ARE SAYING +7975-280084-0002-2520: WHEN WE CAME UP I TOLD MILLER TO SHUT THE BANK DOOR WHICH THEY HAD LEFT OPEN IN THEIR HURRY +4350-10919-0002-1350: THE PRINCESS WAS DISTRACTED AND DID NOT KNOW WHAT TO DO SHE FELT SHE HAD SINNED AGAINST KITTY +6070-86745-0014-1965: BESIDES THAT DOES NOT CONCERN THE HOME BUT THE FINANCIAL DEPARTMENT +1688-142285-0016-16: OH PAPA BY THAT TESTING EVERYTHING BY THE STANDARD OF WEALTH +7902-96592-0040-2385: NOT YOU LOOK LIKE A WILD BEAST IN A CAGE LIKE A MONKEY YOU INSOLENT +5442-41168-0023-1695: IF THERE ARE MEN YOUNGER AND MORE DESERVING THAN I LET THEM SERVE +3538-163624-0007-900: ONLY ONE RING WAS LEFT WHICH THE DWARF WORE AND EVEN THAT WAS TAKEN FROM HIM +3538-163624-0022-915: THEN SIGURD RODE AWAY AND HE CAME TO THE HOUSE OF A KING WHO HAD A FAIR DAUGHTER +8131-117016-0025-2580: HE SWALLOWED THE SENTIMENT HIS OWN CLUB WAS MOVING NOW +8131-117029-0020-2670: DID YOU THINK WE'D LET YOU GO WITHOUT SEEING YOU OFF (COBBER->COWBER) HE ASKED +6128-63244-0012-2025: OUR MOVEMENT IS FOR ALL IT APPEALS TO THE MOST DELICATE LADIES +8131-117016-0040-2595: IN THE THIRD ONE BRUCE GORDON SPOTTED ONE OF THE MEN (WHO'D->WHO HAD) BEEN BEATEN BEFORE +4198-12259-0040-1215: MY (PRENTICESHIP->APPRENTICESHIP) IS (OUT I AM->I'M) A FREE MAN AT THIS TRADE +4852-28330-0015-1545: THIS SHIP ITS CARGO AND ITS MEN WILL BE IN YOUR HANDS +2414-128291-0022-285: AND (TALK->TALKED) TO MINE ANIMALS OF THE HAPPINESS OF ANIMALS +3331-159605-0007-616: HE HAS KNOWN HER ALL HER LIFE AND HAS A GOOD INFLUENCE OVER HER +2609-156975-0000-361: (THEN->THE) MOSES WAS AFRAID AND SAID SURELY THE THING IS KNOWN +2414-128292-0028-318: BEWARE LEST IN THE END A NARROW FAITH CAPTURE THEE A HARD (RIGOROUS->REGOROUS) DELUSION +6432-63723-0019-2118: THE MURDER OF MISSUS DARCY HAD SOME TIME AGO BEEN SHIFTED OFF THE FRONT PAGE THOUGH IT WOULD GET BACK THERE WHEN THE YOUNG JEWELER WAS TRIED +3005-163399-0012-529: I HAD MY MIND ON THE CHILDREN ALL THE TIME I WANTED TO GET THEM OUT TO ONE SIDE AND PUMP THEM A LITTLE AND FIND OUT WHO I WAS +2609-156975-0017-378: IS (PEONAGE->OPINION IS) ALWAYS DISASTROUS NOT ONLY TO ITS VICTIMS BUT ALSO TO THE GOVERNMENT IMPOSING IT +7975-280084-0005-2523: AND I CALLED TO HIM TO GET INSIDE AT THE SAME TIME FIRING A PISTOL SHOT IN THE AIR AS A SIGNAL TO THE THREE BOYS AT THE BRIDGE THAT WE HAD BEEN DISCOVERED +6432-63722-0019-2058: I HAPPENED TO SEE THOSE CANDLESTICKS IN THE WINDOW OF SINGA PHUT'S SHOP THE OTHER DAY AND I MADE UP MY MIND TO BUY THEM WHEN I HAD A CHANCE +4350-10919-0020-1368: THE CELEBRATED DOCTOR ANNOUNCED TO THE PRINCESS A FEELING OF WHAT WAS DUE FROM HIM DICTATED HIS DOING SO THAT HE OUGHT TO SEE THE PATIENT ONCE MORE +4294-35475-0017-1308: AS HE UTTERED THE WORDS THE SCISSORS LEAPED OUT OF HIS HAND AND BEGAN TO CUT THROUGH THE WOODEN SHUTTERS AS EASILY AS THROUGH A CHEESE +1688-142285-0064-64: BUT (HOO'S->WHO'S) COME AT LAST AND (HOO'S->WHO'S) WELCOME AS LONG AS (HOO'LL->HE'LL) KEEP FROM PREACHING ON WHAT (HOO->HE) KNOWS NOUGHT ABOUT +5764-299665-0059-1863: RELIGION HAS BEEN TRIED AND IN ALL COUNTRIES IN ALL TIMES HAS FAILED +7975-280057-0008-2463: MISSUS WASHINGTON WELLS AND HER SON SAMUEL ON THE ROAD HOME FROM KANSAS CITY TO (LEE'S->LEE) SUMMIT RECOGNIZED THE BODY AS THAT OF MY FATHER +6128-63244-0000-2013: MISS CHANCELLOR HERSELF HAD THOUGHT SO MUCH ON THE VITAL SUBJECT WOULD NOT SHE MAKE A FEW REMARKS AND GIVE THEM SOME OF HER EXPERIENCES +2033-164914-0002-213: THEN SAID THE EUNUCH ART THOU HE WHO REPEATED POETRY BUT NOW AND MY LADY HEARD HIM +4350-9170-0016-1398: AFTER CONQUEST THE POWER OF THE EMPEROR PUTS AN END TO INTERNAL DISSENSIONS AND SO THE STATE CONCEPTION OF LIFE JUSTIFIES ITSELF +1688-142285-0094-94: I DON'T KNOW POSITIVELY THAT IT IS HERS EITHER BUT FROM LITTLE THINGS I HAVE GATHERED FROM HIM I FANCY SO +7975-280063-0002-2478: ACCORDINGLY I WAS SHORTLY AWAKENED TO ACCOMPANY HIM TO (LONE->LONG) JACK WHERE HE WOULD PERSONALLY MAKE KNOWN THE SITUATION TO THE OTHER COLONELS +2609-169640-0009-424: AS OUR (SHEETS->SEATS) WERE ALL FLYING FORWARD AND REMAINED SO FOR A FEW MINUTES IT GAVE ME (*->A) LEISURE TO LOOK ABOUT +7105-2330-0028-2268: EIGHT O'CLOCK STRUCK THE CROWD OUTSIDE CHANTED WITH AN INCREASING VOLUME OF SOUND (WILL->WE'LL) VOTE THE OTHER WAY +3528-168656-0011-693: HE IS RESISTING FLUTTERING HIS TINY WINGS AND STILL MAKING AN EFFORT TO FLY BUT THE DANCER IS LAUGHING WITH A SATANICAL AIR +3080-5032-0026-574: HOW KINDLY DO I TAKE (THESE->THE) CIVILITIES OF YOUR (FATHER'S->FATHERS) IN EARNEST YOU CANNOT IMAGINE HOW HIS LETTER PLEASED ME +6070-86745-0017-1968: THEY SAY THAT IT IS QUITE FAIR AND THAT (SOWING->SEWING) SO MUCH RED YOU OUGHT TO REAP A LITTLE BLUE +5764-299665-0029-1833: A CHILD CHARMED BY THE BEAUTY OF THE FLAME (GRASPS IT->GRASPED) WITH (ITS->HIS) DIMPLED HAND +7105-2330-0013-2253: SEVEN THIRTY AMENDED THE PRIME MINISTER WE MUST AVOID ANY APPEARANCE OF PRECIPITANCY +2414-159411-0010-333: SIR CAMEL SIR CAMEL CRIED THE BRAHMAN HEAR AND GIVE JUDGMENT +3538-163624-0009-902: THEN SIGURD WENT TO HIS MOTHER AND ASKED FOR THE BROKEN PIECES OF HIS FATHER'S BLADE AND GAVE THEM TO (REGIN->REGAN) +7902-96594-0020-2416: THEN WHAT DID YOU MEAN SPEAK OUT AND DON'T DO THE DOUBLE SHUFFLE ALL OVER MY CLEAN DECK NO SIR +5764-299665-0027-1831: A MAN WISHING TO GO TO A CERTAIN PLACE (COMES->COME) TO WHERE THE ROAD DIVIDES +2414-159411-0009-332: LET THE TIGER EAT THE MAN FOR MEN ARE AN UNGRATEFUL RACE +3005-163391-0007-497: THE YOUNG CHAP WAS MIGHTY THANKFUL SAID IT WAS TOUGH WORK (TOTING->TOADING) HIS BAGGAGE SUCH WEATHER +4198-12259-0042-1217: O THE DRINKERS THOSE THAT ARE (A DRY->ADRY) O POOR THIRSTY SOULS +6432-63722-0002-2041: I'M GOING OFF FISHING I MAY NOT CATCH ANYTHING (I->AND) MAY NOT WANT TO AFTER I GET THERE +4852-28330-0016-1546: YES SIR I THINK I CAN DO IT SAFELY OR I SHOULD NOT TRY SIR +4350-10919-0004-1352: IS THERE HOPE SHE MEANT TO SAY BUT HER LIPS QUIVERED AND SHE COULD NOT UTTER THE QUESTION WELL DOCTOR +8131-117029-0021-2671: I I OH DRAT IT I'M GETTING OLD (IZZY->IASY) YOU TELL HIM +5484-24318-0035-1801: SHOUTED HIS FRIEND (SOTELES->SOCULUS) IN JOYFUL SURPRISE IN THE MIDST OF (THIS->HIS) PAINFUL WALK (HERMON->HAREMON) +4852-28312-0031-1501: MY WINDOW HAS A POWER FOR THOSE FEW WHO ARE TO SEE +3538-163622-0004-872: HE HAD GONE OUT ONCE TO SEEK A PLACE HE SAID BUT NEVER WOULD HE DO SUCH A THING AGAIN +1998-29454-0043-167: I KNOW YOU WILL SAID DICKIE WITH ENTHUSIASM I KNOW (OW->HOW) GOOD YOU ARE +2609-156975-0016-377: (WAS MOSES JUSTIFIED IN->WHICH MOVES IT JUSTFIED AND) RESISTING THE (EGYPTIAN TASKMASTER->EGIPSIAN TAX MASTER) +3331-159605-0023-632: SHE DID NOT MEAN TO TELL BUT HIS FRANKNESS WAS SO AGREEABLE SHE FORGOT HERSELF +3005-163399-0025-542: WHY SILAS LOOK YONDER UP THE ROAD AIN'T THAT SOMEBODY COMING +5484-24318-0005-1771: DID IT NOT SPARE HIM A HUMILIATION AS GREAT AND PAINFUL AS COULD BE IMAGINED +7105-2340-0014-2296: SUCH NICE USEFUL GIFTS A FEW DUPLICATES OF COURSE +533-131562-0015-1622: I KNOW THAT DAY AFTER DAY SUCH FEELINGS WILL RETURN UPON ME +2609-156975-0001-362: (HOLD->O) ON (HOLD FAST HOLD OUT PATIENCE IS->HOFAX HODOBT PATESENCES HIS) GENIUS +4350-10919-0033-1381: THE MOTHER WAS MUCH MORE CHEERFUL WHEN SHE WENT BACK TO HER DAUGHTER AND KITTY PRETENDED TO BE MORE CHEERFUL +8188-269290-0045-2776: THE WORLD SAYS NO I AM NOT BUT GOD SAYS YES YOU ARE +7902-96592-0026-2371: HE HAS GONE TO GIVE THE ALARM THOUGHT THE PRISONER AND HE LOOKED EXCITEDLY ROUND FOR A WAY OF ESCAPE +2414-128292-0027-317: THEY SLEEP QUIETLY THEY ENJOY THEIR NEW SECURITY +4294-14317-0011-1277: I HOPE TO PROVE ON WHAT ACCOUNT THAT SCOUNDREL TRIES TO BRING ME INTO DISGRACE +4294-9934-0004-1322: MARIUS WAS CLEAR EYED AND HE REQUIRED THE TRUE LIGHT +3005-163390-0023-482: NOW DE (DUKE->DUPE) HE'S A (TOLERBLE LIKELY->TOLERABLE LIKE) MAN IN SOME WAYS +4294-35475-0015-1306: A FAINT GLIMMER OF LIGHT ON THE OPPOSITE WALL SHOWS ME THE KEYHOLE +1688-142285-0092-92: I AM SURE AT ANY RATE SHE WOULD NOT LIKE STRANGERS TO KNOW ANYTHING ABOUT IT +1998-29455-0011-182: (OW'M->HOW AM) I TO (WHEEL->WEAR) THE BLOOMIN PRAM IF YOU GOES ON LIKE AS IF YOU WAS A (BAG->PACK) OF (EELS->FIELDS) +533-1066-0020-1576: IT WAS ALEX ARMED WITH TWO LONG HANDLED SPADES +4350-10919-0018-1366: AH WELL IN THAT CASE TO BE SURE LET THEM GO ONLY THOSE GERMAN QUACKS ARE MISCHIEVOUS +5764-299665-0087-1891: FAILURE SEEMS TO BE THE TRADEMARK OF NATURE WHY +8131-117016-0056-2611: BECAUSE (IZZY->ISSY) IS ALWAYS HONEST ACCORDING TO HOW HE SEES IT +8188-269288-0027-2700: JUST AFTER MIDNIGHT SHE ROSE WITH A SIGH TO PREPARE FOR BED +533-131556-0024-1605: YOU USE HARD WORDS MISSUS HUNTINGDON BUT I CAN PARDON YOU +1998-15444-0010-106: TICKLING THE (FAUCES->PHOCES) WITH A FEATHER (MAY EXCITE->MAKES OUT) VOMITING +3005-163399-0009-526: YOU MUST A MET HIM ON THE ROAD DIDN'T YOU OLDISH MAN WITH A +3764-168671-0032-1065: ARE YOU NOT COMRADE WE'LL GO AND HAVE A DRINK TOGETHER PRESENTLY +2414-128292-0010-300: ASKED (ZARATHUSTRA->THEIR T EXTRA) VEHEMENTLY WHAT DOEST THOU HERE +533-1066-0019-1575: IN SPITE OF MYSELF I DREW MY BREATH IN SHARPLY +5764-299665-0041-1845: THE SENSE OF DUTY (BECOMES->BECAME) STRONGER MORE IMPERATIVE +3997-180297-0003-1125: DURING THIS REMARK MARGUERITE LOOKED AT ME ATTENTIVELY +6938-70848-0003-2160: WELL DIDN'T THEY SHOOT US ONE MAN EXHIBITED HIS ARM IN A SLING +3080-5032-0023-571: WE HAVE HAD ANOTHER DEBATE BUT MUCH MORE CALMLY +4350-9170-0058-1440: TO RESIST WOULD NEED INDEPENDENT THOUGHT AND EFFORT OF WHICH EVERY MAN IS NOT CAPABLE +4350-10919-0032-1380: IT SEEMED AS THOUGH SOME PIECE OF GOOD FORTUNE HAD COME TO PASS AFTER THE DOCTOR HAD GONE +7902-96594-0019-2415: WHAT FOR THERE AREN'T A PUBLIC HOUSE FOR TEN MILES DIDN'T MEAN THAT +4852-28312-0015-1485: HE GUESSED THERE MUST BE A LIVELY FIRE IN THAT ROOM BEYOND +4198-12259-0025-1200: I APPEAL FROM THIRST AND DISCLAIM ITS JURISDICTION +1998-29455-0024-195: BUT YOU SAID THE BED WITH THE GREEN CURTAINS URGED DICKIE +3997-180294-0022-1110: HERE ARE MY ORDERS TO NIGHT AT (THE->A) VAUDEVILLE +7975-280076-0016-2505: THERE I TOOK DINNER AND THEN WENT TO DOCTOR L W (TWYMAN'S->TWIMMANS) +533-131556-0009-1590: WILL YOU OBLIGE ME (HELEN->ELLEN) CONTINUED SHE +533-131564-0026-1650: YOU NEVER (TRIED->TRIVE) ME (MILLY->MERELY) SAID HE +3528-168669-0055-750: THE DEAD MUST BE OBEYED SO BE IT +3764-168671-0047-1080: MY FATHER WAS A PORTER AT THE (PRYTANEUM->PRETINNIUM) TOWN HALL +4198-12259-0010-1185: IT IS ENOUGH TO BREAK BOTH (GIRDS->GURGE) AND (PETREL->PETROL) +3538-163619-0002-855: BUT HIS SISTER WHO WAS STILL AT HOME FARED WORSE AND WORSE +2414-128292-0011-301: AND WHY CALLEST THOU THYSELF MY SHADOW +1688-142285-0076-76: MARGARET WENT AWAY VERY SAD AND THOUGHTFUL +2609-169640-0006-421: I (SANG->SING) OUT SAIL HO AND CLOSE (ABOARD->ABROAD) +5484-24318-0004-1770: OUGHT HE NOT TO GREET (THIS->HIS) SUDDEN END AS A BOON FROM THE IMMORTALS +3005-163391-0006-496: I DONE SO AND THEN WE ALL THREE STARTED ON AGAIN +1998-29454-0027-151: (AN->AND) I (ASKS->ASK) YOU LET ME COME (ALONGER->ALONG O) YOU GOT THAT +1998-29455-0009-180: HOW BEAUTIFUL SAID DICKIE WRIGGLING WITH DELIGHT +3005-163399-0024-541: OH DON'T DISTRESS ME ANY (MORE'N->MORE AND) I'M ALREADY DISTRESSED +8131-117016-0055-2610: COST EM MORE BUT THEY'D BE RESPECTABLE +6070-86744-0029-1950: AND NOW MY DEAR FRANZ LET US TALK OF SOMETHING ELSE +4294-9934-0017-1335: NO IT IS NOT GOOD WHAT WILL YOU DO AFTER THAT +3331-159609-0019-675: IT WAS A VERY DIFFERENT WINTER FROM THE LAST FOR BOTH THE GIRLS +4852-28312-0030-1500: THEY MOVED INTO THE INN THE COACH RATTLED OFF TO THE STABLE +7105-2330-0040-2280: THE SEAT WAS LOST AFTER ALL BY A NARROW (MAJORITY->MATURITY) +3528-168669-0085-780: NO ONE DOUBTS THE RIGHT OF THE MONASTERY TO SEPULTURE +7975-280057-0020-2475: WHEN I WAS TAKEN PRISONER I EXPECTED TO BE SHOT WITHOUT CEREMONY +5484-24317-0023-1755: THAT IS GOING TOO FAR REPLIED (HERMON->HARMON) +7975-280085-0014-2550: I GOT UP AS (THE->A) SIGNAL FOR THE CHARGE AND WE FIRED ONE VOLLEY +7902-96592-0025-2370: (ARCHY->ARCHIE) STEPPED BACK TO THE DOOR LISTENING BUT THERE WAS NOT A SOUND +7902-96591-0005-2325: HE COULD NOT HELP IT HE HATES THE SMUGGLERS YOU SHALL NOT TELL +5484-24318-0034-1800: THE EGYPTIAN OBEYED AND HIS MASTER CROSSED THE WIDE SPACE STREWN WITH SAND AND APPROACHED THE STAGE WHICH HAD BEEN ERECTED FOR THE FESTAL PERFORMANCES EVEN HAD HIS EYES RETAINED THE POWER OF SIGHT HIS BLOOD WAS COURSING SO WILDLY THROUGH HIS VEINS THAT HE MIGHT PERHAPS HAVE BEEN UNABLE TO DISTINGUISH THE STATUES AROUND HIM AND THE THOUSANDS OF SPECTATORS WHO CROWDED CLOSELY TOGETHER RICHLY GARLANDED THEIR CHEEKS GLOWING WITH ENTHUSIASM SURROUNDED THE ARENA HERMON +3331-159609-0020-676: IF FANNY WANTED TO SHOW HIM WHAT SHE COULD DO TOWARD MAKING A PLEASANT HOME SHE CERTAINLY SUCCEEDED BETTER THAN SHE SUSPECTED FOR IN SPITE OF MANY FAILURES AND DISCOURAGEMENTS BEHIND THE SCENES THE LITTLE HOUSE BECAME A MOST ATTRACTIVE PLACE TO MISTER SYDNEY AT LEAST FOR HE WAS MORE THE HOUSE FRIEND THAN EVER AND SEEMED DETERMINED TO PROVE THAT CHANGE OF FORTUNE MADE NO DIFFERENCE TO HIM +3997-180294-0008-1096: THE MORE A GIRL BELIEVES IN GOODNESS THE MORE EASILY WILL SHE GIVE WAY IF NOT TO (HER->A) LOVER AT LEAST TO LOVE FOR BEING WITHOUT MISTRUST SHE IS WITHOUT FORCE AND TO WIN HER LOVE IS A TRIUMPH THAT CAN BE GAINED BY ANY YOUNG (MAN->MEN) OF FIVE AND TWENTY SEE HOW YOUNG GIRLS ARE WATCHED AND GUARDED +5442-32873-0014-1666: LUKE TOOK CARE OF MISTER LARKIN'S DOGS AND GROOMED MISTER WYLDER'S HORSE AND CLEANED UP HIS DOG CART FOR MARK BEING CLOSE ABOUT MONEY AND FINDING THAT THE THING WAS TO BE DONE MORE CHEAPLY THAT WAY PUT UP HIS HORSE AND DOG CART IN THE POST OFFICE PREMISES AND SO EVADED THE LIVERY CHARGES OF THE BRANDON ARMS +6128-63241-0011-2010: SHE WAS PERFECTLY SAFE AFTER WRITING TO (BASIL->BAISIL) RANSOM AND INDEED IT WAS DIFFICULT TO SEE WHAT HE COULD HAVE DONE TO HER EXCEPT THANK HER HE WAS ONLY EXCEPTIONALLY SUPERLATIVE FOR HER LETTER AND ASSURE HER THAT HE WOULD COME AND SEE HER THE FIRST TIME HIS BUSINESS HE WAS BEGINNING TO GET A LITTLE SHOULD TAKE HIM TO BOSTON +367-130732-0026-946: TAKE THIRTY CRAWFISH FROM WHICH REMOVE THE GUT CONTAINING THE GALL IN THE FOLLOWING MANNER TAKE FIRM HOLD OF THE CRAWFISH WITH THE LEFT HAND SO AS TO AVOID BEING PINCHED BY ITS CLAWS WITH THE THUMB AND FOREFINGER OF THE RIGHT HAND PINCH THE EXTREME END OF THE CENTRAL FIN OF THE TAIL AND WITH A SUDDEN JERK THE GUT WILL BE WITHDRAWN +2609-156975-0030-391: MANY MODERN SCHOLARS DRAW THE CONCLUSION FROM THE BIBLICAL NARRATIVE THAT IT WAS FROM THE (KENITES->CANAITES) THAT MOSES FIRST LEARNED OF (YAHWEH OR AS->YAHWAY ORAS) THE DISTINCTIVE NAME OF ISRAEL'S GOD WAS TRANSLATED BY LATER JEWISH SCRIBES JEHOVAH +7975-280076-0005-2494: IT WAS CHARGED THAT ARTHUR MC COY OR A C (MC->MAC) COY AND MYSELF HAD BEEN PARTICIPANTS IN THE GAD'S HILL AFFAIR AND THE TWO STAGE ROBBERIES +7018-75789-0016-2224: THEN I TOOK LEAVE OF HIM AND OF ALL MY INTIMATES AND ACQUAINTANCES IN THE ISLAND AND EMBARKED WITH THE MERCHANTS AFORESAID +8461-258277-0002-2869: THE KNOCKER REPLIED (KAMAR->KAMA) DAUGHTER OF (AZARIAH->AZARAIAH) THE JEW SAY ME IS ALI OF CAIRO WITH YOU +8131-117016-0014-2569: THE REST OF YOU CAN TEAM UP ANY WAY YOU WANT (TONIGHT->TO NIGHT) PICK ANY ROUTE THAT'S OPEN (OKAY MEN->O CAMEN) LET'S GO +3331-159605-0011-620: THIS HAD AN ESPECIAL CHARM TO POLLY FOR SHE SOON FOUND THAT THIS SIDE OF HIS CHARACTER WAS NOT SHOWN TO EVERY ONE +2414-128292-0015-305: WHEN THE DEVIL CASTETH HIS SKIN DOTH NOT HIS NAME ALSO FALL AWAY IT IS ALSO (SKIN->SKINNED) +5442-41169-0030-1730: THERE'S THE PEASANTS TOO I WONDER AT THEM SOMETIMES ANY GOOD PEASANT TRIES TO TAKE ALL THE LAND HE CAN +2609-156975-0019-380: THE WILDERNESS TO THE EAST OF EGYPT HAD FOR CENTURIES BEEN THE PLACE OF REFUGE FOR EGYPTIAN FUGITIVES +7105-2340-0002-2284: BESIDES CYNICISM APART HIS BEING RICH WILL MAKE A DIFFERENCE IN THE WAY PEOPLE WILL LOOK AT HIS FAILING +6070-86745-0003-1954: WAIT THEN DURING THE DAY TELL ROSA THAT WHEN I LEAVE THE OPERA I WILL SUP WITH HER AS SHE WISHES +4294-35475-0004-1295: FORTHWITH THE GRINNING JESTER BEGAN SHRIEKING WITH LAUGHTER SO THAT THE BELLS UPON HIS MOTLEY CAP WERE ALL SET A JANGLING +533-131556-0013-1594: SHE COLOURED AGAIN EXCESSIVELY AND REMAINED SILENT PRESSING HER FINGER AGAINST HER (TEETH->CHEEK) AND GAZING INTO THE FIRE +2609-169640-0010-425: I SOON SAW BOTH (PROAS->P PROCKS) AND (GLAD->GREAT) ENOUGH WAS I TO PERCEIVE THAT THEY HAD NOT APPROACHED MATERIALLY (NEARER->NEAR) +7975-280063-0003-2479: FOSTER HAD NEARLY ONE THOUSAND (CAVALRYMEN->CAVERNMEN) AND TWO PIECES OF (RABB'S->RAB'S) INDIANA BATTERY THAT HAD ALREADY MADE FOR ITSELF A NAME FOR HARD FIGHTING +3080-5040-0030-605: YOU MUST GIVE ME LEAVE TO ENTERTAIN YOU THUS WITH DISCOURSES OF THE FAMILY FOR I CAN TELL YOU NOTHING ELSE FROM HENCE +3764-168671-0021-1054: HE WAS A SORT OF (LABORING->LABOURING) MAN WHO WORE A WAISTCOAT WITH LARGE POCKETS AND CARRIED A MATTOCK UNDER HIS ARM +4350-9170-0047-1429: WE (KNOW->*) NOW (*->KNOW) THAT THREATS AND PUNISHMENTS CANNOT DIMINISH THEIR NUMBER THAT THAT CAN ONLY BE DONE BY CHANGE OF ENVIRONMENT AND MORAL INFLUENCE +4350-9170-0018-1400: INTERNAL DISSENSIONS DISAPPEAR ONLY IN PROPORTION TO THE DEGREE OF OPPRESSION EXERTED BY THE AUTHORITY OVER THE DISSENTIENT INDIVIDUALS +2033-164915-0011-245: TWAS AS I FEARED THE (COMING ILLS->CAMEN EILS) DISCERNING BUT UNTO ALLAH WE ARE ALL RETURNING +6432-63722-0035-2074: BEFORE THE BIG WIND IN IRELAND SUGGESTED THONG WITH A NOD AT HIS IRISH COMPATRIOT SLIGHTLY LAUGHED THE COLONEL +5764-299665-0060-1864: RELIGION HAS ALWAYS BEEN THE ENEMY OF SCIENCE OF INVESTIGATION AND THOUGHT +3080-5040-0000-575: (WOULD->WHAT) IT WOULD LEAVE ME AND THEN I COULD BELIEVE I SHALL NOT ALWAYS HAVE OCCASION FOR IT +3538-163619-0007-860: WELL IF MY BROTHER SAYS SO I MUST DO IT SAID THE MAN'S DAUGHTER AND SHE FLUNG HER CASKET INTO THE SEA +5484-24317-0028-1760: YET WHAT MATTERED IT EVEN IF THESE MISERABLE PEOPLE CONSIDERED THEMSELVES DECEIVED AND POINTED THE FINGER OF SCORN AT HIM +7105-2340-0032-2314: THE (PIGEONCOTES->PIGEON COATS) HAD TURNED PALER THAN EVER MISSUS PETER HAD A FINAL INSPIRATION +6070-86744-0002-1923: MY FATHER THE COMTE DE MORCERF ALTHOUGH OF SPANISH ORIGIN POSSESSES CONSIDERABLE INFLUENCE BOTH AT THE COURT OF FRANCE AND MADRID AND I UNHESITATINGLY PLACE THE BEST SERVICES OF MYSELF AND ALL TO WHOM MY LIFE IS DEAR AT YOUR DISPOSAL +1998-29455-0027-198: THE NEW GAME OF BEGGING AND INVENTING STORIES TO INTEREST THE PEOPLE FROM WHOM IT WAS WORTH WHILE TO BEG WENT ON GAILY DAY BY DAY AND WEEK BY WEEK AND DICKIE BY CONSTANT PRACTICE GREW SO CLEVER AT TAKING HIS PART IN THE ACTING THAT MISTER BEALE WAS QUITE DAZED WITH ADMIRATION +3080-5040-0013-588: IF I DROWN BY THE WAY THIS WILL BE MY LAST LETTER AND LIKE A WILL I BEQUEATH ALL MY KINDNESS TO YOU IN IT WITH A CHARGE NEVER TO BESTOW IT ALL UPON ANOTHER MISTRESS LEST MY GHOST RISE AGAIN AND HAUNT YOU +6128-63241-0013-2012: OF ALL THINGS IN THE WORLD CONTENTION WAS MOST SWEET TO HER THOUGH WHY IT IS HARD TO IMAGINE FOR IT ALWAYS COST HER TEARS HEADACHES A DAY OR TWO IN BED ACUTE EMOTION AND IT WAS VERY POSSIBLE BASIL RANSOM WOULD NOT CARE TO CONTEND +5442-32873-0016-1668: HERE WERE THE FLOW OF (SOUL->SOLE) AND OF STOUT LONG PIPES LONG YARNS AND TOLERABLY LONG CREDITS AND THE HUMBLE SCAPEGRACES OF THE TOWN RESORTED THITHER FOR THE PLEASURES OF A CLUB LIFE AND OFTEN REVELLED DEEP INTO THE SMALL HOURS OF THE MORNING +7105-2340-0030-2312: IT WAS MISSUS PETER WHO ARRIVED FIRST AT AN INSPIRATION HOW DREADFUL TO THINK THERE ARE THIEVES IN THE HOUSE WE KEEP THE DRAWING ROOM LOCKED UP AT NIGHT OF COURSE BUT ANYTHING MIGHT BE CARRIED OFF WHILE WE ARE AT BREAKFAST +8280-266249-0051-2852: THEY BENT EAGERLY OVER THE BOARD EACH WATCHING WITH FEVERISH ANXIETY HIS COMPANION'S MOVEMENTS EACH CASTING NOW AND AGAIN A GLOATING EYE UPON THE HEAP OF GOLD AND (GREENBACKS->GREEN BACKS) THAT LAY BETWEEN THEM AND AT TIMES HALF STRETCHING OUT HIS HAND TO CLUTCH IT +5764-299665-0014-1818: DID INFINITE WISDOM (INTENTIONALLY->INTENTIALLY) PRODUCE THE MICROSCOPIC BEASTS THAT FEED UPON THE OPTIC (NERVE->NURSE) THINK OF BLINDING A MAN TO SATISFY THE APPETITE OF A MICROBE +6070-86745-0001-1952: SHRUBS AND CREEPING PLANTS COVERED THE WINDOWS AND HID FROM THE GARDEN AND COURT THESE TWO APARTMENTS THE ONLY ROOMS INTO WHICH AS THEY WERE ON THE GROUND FLOOR THE PRYING EYES OF THE CURIOUS COULD PENETRATE +7902-96591-0008-2328: HE LAUGHED BUT IT WAS A CURIOUS KIND OF LAUGH FULL OF VEXATION INJURED AMOUR (PROPRE->PROPERA) AS THE FRENCH CALL OUR LOVE OF OUR OWN DIGNITY OF WHICH ARCHIBALD (RAYSTOKE->REYSTROKE) IN THE FULL FLUSH OF HIS YOUNG BELIEF IN HIS IMPORTANCE AS A BRITISH OFFICER HAD A PRETTY GOOD STOCK +5442-32873-0015-1667: BUT LUKE WAS NOT THERE AND CAPTAIN LAKE RECOLLECTING HIS HABITS AND HIS HAUNT HURRIED ON TO THE SILVER LION WHICH HAS ITS (GABLE->CABLE) TOWARDS THE COMMON ONLY ABOUT A HUNDRED STEPS AWAY FOR DISTANCES ARE NOT GREAT IN GYLINGDEN +6070-86744-0000-1921: FRANZ WHO SEEMED ATTRACTED BY SOME INVISIBLE INFLUENCE TOWARDS THE COUNT IN WHICH TERROR WAS STRANGELY MINGLED FELT AN EXTREME RELUCTANCE TO PERMIT HIS FRIEND TO BE EXPOSED ALONE TO THE SINGULAR FASCINATION THAT THIS MYSTERIOUS PERSONAGE SEEMED TO EXERCISE OVER HIM AND THEREFORE MADE NO OBJECTION TO ALBERT'S REQUEST BUT AT ONCE ACCOMPANIED HIM TO THE DESIRED SPOT AND AFTER A SHORT DELAY THE COUNT JOINED THEM IN THE SALON +5442-41168-0009-1681: HE FORGOT AS SERGEY IVANOVITCH EXPLAINED TO HIM AFTERWARDS THIS SYLLOGISM THAT IT WAS NECESSARY FOR THE PUBLIC GOOD TO GET RID OF THE MARSHAL OF THE PROVINCE THAT TO GET (RID OF->IT TO) THE MARSHAL IT WAS NECESSARY TO HAVE A MAJORITY OF VOTES THAT TO GET A MAJORITY OF VOTES IT WAS NECESSARY TO SECURE (FLEROV'S->FLIROV'S) RIGHT TO VOTE THAT TO SECURE THE RECOGNITION OF (FLEROV'S->FLAEROFF'S) RIGHT TO VOTE THEY MUST DECIDE ON THE INTERPRETATION TO BE PUT ON THE ACT +7018-75788-0018-2206: EACH THAT DIED WE WASHED AND SHROUDED IN SOME OF THE CLOTHES AND LINEN CAST ASHORE BY THE TIDES AND AFTER A LITTLE THE REST OF MY FELLOWS PERISHED ONE BY ONE TILL I HAD BURIED THE LAST OF THE PARTY AND ABODE ALONE ON THE ISLAND WITH BUT A LITTLE PROVISION LEFT I WHO WAS WONT TO HAVE SO MUCH +2033-164916-0005-257: AFTER (AWHILE->A WHILE) THE DUST DISPERSED AND THERE APPEARED UNDER IT THE ARMY OF BAGHDAD AND KHORASAN A CONQUERING HOST LIKE THE FULL TIDE SEA AND SHAHRAZAD PERCEIVED THE DAWN OF DAY AND CEASED TO SAY HER PERMITTED SAY +7018-75789-0028-2236: BUT THE CAPTAIN AROSE AND TIGHTENING HIS GIRDLE TUCKED UP HIS SKIRTS AND AFTER TAKING REFUGE WITH ALLAH FROM SATAN THE STONED (CLOMB->CLIMBED) TO THE MAST HEAD WHENCE HE LOOKED OUT RIGHT AND LEFT AND GAZING AT THE PASSENGERS AND CREW FELL TO BUFFETING HIS FACE AND PLUCKING OUT HIS BEARD +8131-117016-0043-2598: NO THE (COPS->COPSE) THEY'RE GIVING ME WE'RE COVERED GORDON +6128-63244-0015-2028: SAID OLIVE CHANCELLOR WITH A FACE WHICH SEEMED TO PLEAD FOR A (REMISSION OF->REMISSIONER'S) RESPONSIBILITY +6938-70848-0006-2163: WHO ARE YOU A COUNTER REVOLUTIONIST A PROVOCATOR THEY (BELLOWED->BELOWED) AT HIM +533-1066-0022-1578: THERE'S ONE THING SURE I'LL NOT BE SUSPECTED OF COMPLICITY +6432-63723-0004-2103: HOWEVER DON'T THINK I'M NOT INTERESTED IN YOUR CASE (I'VE FISHED->I HAVE FINISHED) ENOUGH FOR TO DAY +3538-163619-0005-858: THE YOUTH PROMISED TO MAKE ALL THE HASTE HE COULD AND SET FORTH FROM THE KING'S PALACE +3997-182399-0004-1158: PLEASE MISTER BUZZARD PLEASE TELL US THE STORY HE BEGGED +6128-63240-0011-1982: IF YOU ARE GOING TO DINE WITH HER YOU HAD BETTER KNOW IT OH MURDER +4198-61336-0028-1263: THE PHILISTINES AND THE ARABIANS OF THE DESERT WERE ALSO SUBDUED +3997-180297-0021-1143: I THOUGHT I COULD ACCEPT THE LIFE WHICH HE OFFERED ME BUT WHAT WOULD YOU HAVE +6070-86745-0016-1967: IN THE ENTIRE POLITICAL WORLD OF WHICH YOU ARE ONE OF THE LEADERS +3997-182399-0019-1173: A LITTLE SIGH OF SATISFACTION WENT (AROUND->ROUND) THE CIRCLE OF LISTENERS +1688-142285-0003-3: I REALLY LIKED THAT ACCOUNT OF HIMSELF BETTER THAN ANYTHING ELSE HE SAID +7975-280085-0001-2537: FRIDAY WE MOVED TOWARD WATERVILLE AND FRIDAY NIGHT WE CAMPED BETWEEN ELYSIAN AND GERMAN LAKE +7975-280076-0018-2507: WE CROSSED ON THE BRIDGE STAYED IN THE CITY ALL NIGHT AND THE NEXT MORNING WE RODE UP (THROUGH->TO) THE CITY +1688-142285-0063-63: I DON'T BELIEVE ALL I HEAR NO NOT BY A BIG DEAL +4198-12259-0028-1203: (O->OH) FOR GOD'S SAKE LET US (LASH->LAST) THEM SOUNDLY YET THRIFTILY +7902-96592-0013-2358: ONCE OUT OF THAT ROOM HE COULD RAN AND BY DAYLIGHT THE SMUGGLERS DARE NOT HUNT HIM DOWN +6432-63722-0033-2072: AND I'VE READ ENOUGH ABOUT (GERMS->TERMS) TO KNOW THE DANGER I'D ADVISE YOU TO BE CAREFUL +3538-163624-0025-918: FOR HER HUSBAND SHE SAID HAD RIDDEN THROUGH THE FLAME WHEN NO OTHER MAN DARED FACE IT +8188-269288-0029-2702: NOW I REMEMBER SHE GOT A LETTER WHICH UPSET HER VERY MUCH AND WENT OUT +3538-163622-0020-888: WHEN THEY HAD (TRAVELLED->TRAVELED) A LONG LONG WAY THE FOAL SAID DOST THOU SEE ANYTHING +8131-117016-0057-2612: BUT YOU GOT EARTH IDEAS OF THE STUFF LIKE I HAD ONCE +4294-35475-0002-1293: BUT THE KING (LAUGHED->LAUGH'D) HIM TO SCORN THOU A SWORD HE QUOTH +3764-168670-0018-993: AND THEN THAT THERE WAS ANOTHER THE EMPTY COFFIN +5764-299665-0043-1847: IN ALL THIS THERE IS NOTHING SUPERNATURAL +7105-2330-0027-2267: I'LL TRY SAID THE HOME SECRETARY AND WENT TO THE TELEPHONE +3764-168671-0050-1083: RETURNED FAUCHELEVENT CLUTCHING AT THIS BRANCH FEEBLE AS IT WAS +6938-70848-0021-2178: FOLLOWED HIM LENIN LISTENED TO NOW WITH ABSORBING INTENSITY +6938-70848-0020-2177: THE CONSTITUENT ASSEMBLY WILL NOT DARE TO BREAK WITH THE WILL OF THE PEOPLE +7902-96595-0019-2448: A LAD LOOKING LIKE A COMMON SAILOR AND WEARING A RED CAP NO SAID SIR RISDON +2414-128291-0025-288: FOR THEY ARE THY WARMEST FRIENDS AND (PRECEPTORS->PERSEPTORS) +3764-168670-0003-978: YOU WILL WAIT FOR ME AT A LADY'S HOUSE I SHALL COME TO FETCH YOU +5764-299665-0028-1832: HE HAS TRIED THAT ROAD AND KNOWS THAT IT IS THE WRONG ROAD +7975-280063-0001-2477: (BOONE MUIR->BOOMEUER) AND MYSELF (MET->MAKE) COFFEE (AND->IN) THE REST BELOW ROSE HILL ON GRAND RIVER +3528-168669-0046-741: IF YOU HAD A LITTLE MORE FAITH AND IF YOU COULD HAVE BEEN IN HER CELL SHE WOULD HAVE CURED YOUR LEG MERELY BY TOUCHING IT SHE SMILED +7975-280084-0007-2525: CHADWELL WOODS AND JIM RODE UP AND JOINED US SHOUTING TO (*->THE) PEOPLE IN THE STREET TO GET INSIDE AND FIRING THEIR PISTOLS TO EMPHASIZE THEIR COMMANDS +6070-63485-0009-1911: DID YOU SEE IN THE CABARET WE HAVE JUST LEFT FOR I KNOW YOU AGAIN THE MAN WHOM THE CHARCOAL MAN CAME TO SEEK +2609-169640-0011-426: MISTER (KITE->KAIGHT) OBSERVED THIS ALSO AND REMARKED THAT OUR MOVEMENTS HAD BEEN SO PROMPT AS TO TAKE THE RASCALS ABACK +3005-163390-0027-486: HE WAS OFTEN MOANING AND MOURNING THAT WAY NIGHTS WHEN HE JUDGED I WAS ASLEEP AND SAYING (PO->POOR) LITTLE (LIZABETH->LIZ'BETH) +3331-159609-0025-681: BUT IF WORK BASKETS WERE GIFTED WITH POWERS OF SPEECH THEY COULD TELL STORIES MORE TRUE AND TENDER THAN ANY WE READ +1998-29454-0003-127: HE GOT IT UP AND PUSHED HIS TREASURES AS FAR IN AS HE COULD ALONG THE ROUGH CRUMBLY SURFACE OF THE (LATH->GLASS) AND PLASTER +3997-180294-0028-1116: WHAT IS THE MATTER WITH YOU TO NIGHT SAID MARGUERITE RISING AND COMING TO THE BACK OF THE BOX AND KISSING ME ON THE FOREHEAD +4294-14317-0000-1266: AS I THOUGHT THAT THIS WAS DUE TO SOME FAULT IN THE EARTH I WANTED TO MAKE THESE FIRST EXPERIMENTS BEFORE I UNDERTOOK MY PERSEUS +3331-159605-0012-621: LATELY THIS HAD CHANGED ESPECIALLY TOWARDS POLLY AND IT FLATTERED HER MORE THAN SHE WOULD CONFESS EVEN TO HERSELF +6432-63722-0036-2075: THAT'S RIGHT AGREED THE COLONEL AS HE CONTINUED TO MOVE HIS MAGNIFYING GLASS OVER THE SURFACE OF THE STILL TICKING WATCH +8188-269288-0033-2706: LESLIE LEFT THE ROOM BUT SHE HAD SCARCELY GONE A DOZEN PACES DOWN THE CORRIDOR BEFORE SHE MET ANNIE RETURNING +6128-63241-0002-2001: (RANSOM->RUNSEN) WAS PLEASED WITH THE VISION OF THAT REMEDY IT MUST BE REPEATED THAT HE WAS VERY PROVINCIAL +5484-24318-0009-1775: THE POWER WHICH DELIVERED HIM OVER TO DEATH JUST AT THAT MOMENT WAS NOT NEMESIS NO IT WAS A KINDLY DEITY +4350-9170-0034-1416: AND BY THIS MEANS ALL CITIZENS ARE UNDER ARMS TO SUPPORT THE INIQUITIES (PRACTICED->PRACTISED) UPON THEM ALL CITIZENS HAVE BECOME THEIR OWN OPPRESSORS +3764-168671-0008-1041: TO BE BURIED IN (PERE LACHAISE->PERELACHASE) IS EQUIVALENT TO HAVING FURNITURE OF MAHOGANY IT IS RECOGNIZED AS ELEGANT +8188-269290-0004-2735: IT DOESN'T MATTER REPLIED (ANNIE->ENNY) WHETHER IT IS AN ORDER OR NOT I'M NOT COMING SAY NOTHING ABOUT ME PLEASE +1688-142285-0052-52: AS THEY TURNED UP INTO A SMALL COURT OPENING OUT (OF->INTO) A SQUALID STREET BESSY SAID +6938-70848-0024-2181: HE KNEW THAT AN AGREEMENT WITH THE BOLSHEVIKI WAS BEING DISCUSSED BUT HE DID NOT KNOW THAT IT HAD BEEN CONCLUDED +1688-142285-0007-7: I REALLY WAS VERY MUCH AFRAID OF SHOWING HIM HOW MUCH SHOCKED I WAS AT SOME (PARTS->PART) OF WHAT HE SAID +6128-63244-0003-2016: WITH HER IMMENSE SYMPATHY FOR REFORM SHE FOUND HERSELF SO OFTEN WISHING THAT REFORMERS WERE A LITTLE DIFFERENT +3538-163622-0008-876: THE YOUTH LIKED THE THOUGHT OF THIS LET THE FOALS RUN WHERE THEY CHOSE AND SEATED HIMSELF IN THE CLEFT OF THE ROCK BY THE SIDE OF THE OLD HAG +5764-299665-0092-1896: LAW CAN PUNISH BUT IT CAN NEITHER REFORM CRIMINALS NOR PREVENT CRIME +7902-96591-0010-2330: COLD WATER CAME ON THIS IDEA DIRECTLY AS HE RECALLED THE FACT THAT THE DARKNESS WAS INTENSE AND CELIA COULD NOT HAVE SEEN HIM +4350-9170-0029-1411: AND SO EVERY GOVERNMENT NEEDS AN ARMY ALSO TO PROTECT ITS BOOTY FROM ITS (NEIGHBOR->NEIGHBOUR) BRIGANDS +7902-96594-0005-2401: AWKWARD BIT (O->OF) COUNTRY SIR SIX MILES ROW BEFORE YOU CAN FIND A PLACE TO LAND +7105-2340-0029-2311: HUSBAND AND WIFE LOOKED BLANKLY AND DESPERATELY AT ONE ANOTHER +3764-168671-0019-1052: HE DID WHAT HE LIKED WITH HIM HE MADE HIM DANCE ACCORDING TO HIS WHIM +6938-70848-0005-2162: WHO ARE YOU TO DESTROY THE LEGAL GOVERNMENT WHO IS (LENIN->LENDING) A GERMAN +8131-117016-0041-2596: GET A STRETCHER AND TAKE HIM WHEREVER HE BELONGS HE ORDERED +533-1066-0021-1577: THE DOCTOR KEPT A KEEN (LOOKOUT->LOOK OUT) BUT NO ONE APPEARED +3528-168669-0027-722: NOTHING CAN BE HEARD AT THE BOTTOM OF THE GARDEN REALLY +5764-299665-0058-1862: HAS THE BIBLE MADE THE PEOPLE OF GEORGIA KIND AND MERCIFUL +6432-63723-0047-2146: THAT IS NOT ALWAYS BUT SOMETIMES IT HAPPENED TO BE SO NOW +3764-168670-0047-1022: A MAN WHO IS MAKING HIS ESCAPE DOES NOT COUGH OR SNEEZE +3005-163390-0008-467: YOU BET IT IS THE (JEDGE->JUDGE) IS RIGHT EVERYBODY SINGS OUT +4350-9170-0030-1412: THIS INCREASE IS CONTAGIOUS AS MONTESQUIEU POINTED OUT (ONE->A) HUNDRED FIFTY YEARS AGO +4852-28312-0016-1486: WOULD THAT INTERFERE WITH (JAKEY'S->JAKIE GINK'S) GETTING THE JOB SIR +6128-63244-0013-2026: RAISE THE STANDARD AMONG THEM AND BRING ME A THOUSAND NAMES +4852-28330-0017-1547: CAPTAIN BLIZZARD'S ROUND PINK FACE CREASED IN (HIS->ITS) WINNING SMILE +3764-168670-0032-1007: ONLY TO ALLOW THE UNDERTAKER'S MEN TO ENTER WHEN THEY COME TO GET THE COFFIN +3764-168671-0034-1067: HE LIMPED MORE OUT OF ANXIETY THAN FROM INFIRMITY +4198-61336-0011-1246: ONCE AGAIN THE HEBREWS CAME INTO CONTACT WITH ASSYRIA +8131-117017-0024-2641: NOW SHOW ME WHERE I SIGNED ANY AGREEMENT SAYING I'D PAY YOU BACK +3080-5040-0011-586: HERE ARE SOME VERSES OF (COWLEY'S->COLLEIES) TELL ME HOW YOU LIKE THEM +5442-32873-0000-1652: CAPTAIN LAKE DID NOT LOOK AT ALL LIKE A LONDON DANDY NOW +533-1066-0006-1562: I HAVE NONE I SAID HAPPILY +3528-168669-0012-707: AND A WOMAN IS NOT A MAN BUT MY BROTHER IS THE STRONG ONE THOUGH +7902-96591-0006-2326: PRAY PRAY SAY YOU WILL NOT (ARCHY->ARCHIE) WAS SILENT +6432-63723-0002-2101: THE REASON SHE ASKED NO ALIMONY INQUIRED KENNETH +8461-258277-0014-2881: QUOTH AL RASHID WHOSE HEAD IS THIS +3528-168669-0116-811: EVERYTHING MUST HAVE BEEN COMPLETED A GOOD QUARTER OF AN HOUR BEFORE THAT +7902-96595-0017-2446: I DUNNO MUTTERED DICK AND A (MAN->MEN) CAN'T BE SURE +6938-70848-0004-2161: HAVEN'T I GOT SOMETHING TO REMEMBER THEM BY THE DEVILS +4350-10919-0003-1351: WELL DOCTOR DECIDE OUR FATE SAID THE PRINCESS TELL ME EVERYTHING +4852-28319-0015-1517: IF HE WAS TO BE A MAGICIAN COULD HE MAKE THIS BOY COME TO LIFE +5442-41169-0012-1712: AND THEN TO TELL THE TRUTH THERE'S ONE'S OWN (INTERESTS->INTEREST) +8188-269290-0015-2746: OH I WON'T LOCK YOU OUT SHE SAID BUT I MUST HAVE THE KEY +7975-280085-0000-2536: THAT NIGHT IT STARTED TO RAIN AND WE WORE OUT OUR HORSES +4198-61336-0027-1262: HE SWEPT THROUGH ISRAEL LIKE A HURRICANE +3528-168669-0102-797: BESIDES WHAT THE CLOISTER KNOWS THE WORLD LEARNS NOT +7105-2330-0026-2266: CAN'T YOU GET A STRIKE PERMIT ASKED THE (ORGANISER->ORGANISR) +3331-159605-0022-631: THAT IS THE WAY I GET TO THE (ROTHS->WARS) ANSWERED POLLY +5442-41169-0026-1726: THE LANDOWNER CHUCKLED UNDER HIS WHITE (MUSTACHES->MOUSTACHES) +2609-169640-0022-437: THEY WERE LIKE THE YELLS OF FIENDS IN ANGUISH +4294-9934-0019-1337: IT IS BADLY PAID WORK BUT ONE CAN LIVE BY IT +3997-182399-0017-1171: THEY LIKE TO CHOKE THAT NO (COUNT BUZZARD->CON BUZZER) TO DEATH +4198-12259-0011-1186: WHAT DIFFERENCE IS THERE BETWEEN A BOTTLE AND A FLAGON +8280-266249-0005-2806: (BESIDE->BESIDES) OURSELVES ADDED COUSIN RONALD LAUGHING +7902-96595-0003-2432: GURR GLANCED ROUND TO SEE IF THE MEN WERE LOOKING AND THEN SAID RATHER HUSKILY BUT KINDLY +7975-280085-0017-2553: INCLUDING THOSE RECEIVED IN AND ON THE WAY FROM NORTHFIELD I HAD ELEVEN WOUNDS +3005-163390-0010-469: WE NEVER SHOWED A LIGHT TILL WE WAS ABOUT TEN MILE BELOW THE VILLAGE +8131-117017-0025-2642: FOR A SECOND (IZZY'S->ISEY'S) FACE WENT BLANK THEN HE CHUCKLED +3528-168669-0073-768: WHAT A GLORY OF GOD FOR THE COMMUNITY AND MIRACLES ISSUE FROM TOMBS +8188-269288-0044-2717: ANNIE STARED VACANTLY AT THE COCOA THEN SHE UTTERED A LAUGH +7105-2340-0015-2297: SEVEN CREAM JUGS PUT IN PETER +8131-117017-0010-2627: WHEN IT WAS OVER THE TWO PICKED UP THEIR WHIMPERING CAPTIVE +7902-96594-0007-2403: YOU DON'T THINK MISTER GURR THAT THEY WOULD DARE TO INJURE HIM IF HE WAS SO UNLUCKY AS TO BE CAUGHT +5764-299665-0089-1893: MUST THE WORLD FOREVER REMAIN THE VICTIM OF IGNORANT PASSION +8131-117016-0028-2583: THEY ROUNDED UP THE MEN OF THE GANG AND ONE OF THE (COPS->COUPS) STARTED OFF +3528-168669-0043-738: FATHER FAUVENT THE COMMUNITY HAS BEEN BLESSED IN MOTHER (CRUCIFIXION->CRUCIFICTION) +8188-269288-0015-2688: LESLIE THANKED HER AND EAGERLY GRASPED THE LITTLE PARCEL +8131-117029-0008-2658: HE REACHED AUTOMATICALLY FOR THE GLASS OF ETHER (NEEDLED->NEEDLE) BEER +5442-41169-0013-1713: THEY'RE PROPRIETORS OF A SORT BUT (WE'RE->WE ARE) THE LANDOWNERS +6432-63723-0049-2148: I SAID WHERE HAVE YOU BEEN REMARKED THE OTHER WE'VE MISSED YOU +8461-258277-0016-2883: HE REPLIED I HAVE FORTY LADS BUT THEY ARE IN CAIRO +4852-28312-0003-1473: HEAVY HAND HEWN BEAMS CROSSED IT FROM ONE SIDE TO THE OTHER +3764-168670-0048-1023: WHO IS THERE WHO HAS NOT SAID TO A CAT DO COME IN +8188-269290-0017-2748: AS SHE WALKED DOWN THE CORRIDOR SHE HEARD IT BEING TURNED (IN->TO) THE LOCK +2414-159411-0025-348: EXACTLY THERE WAS IT ASKED THE JACKAL +7105-2330-0012-2252: OUR MAJORITY LAST TIME WAS ONLY A THOUSAND AND SEVEN +8131-117016-0027-2582: HE BROUGHT HIM TO THE GROUND WITH A SINGLE BLOW ACROSS THE KIDNEYS +8461-278226-0014-2898: I DON'T THINK YOU WILL HAVE ANY DIFFICULTY IN FINDING THE HOUSE +8131-117017-0011-2628: JENKINS THE OTHER COP HAD BEEN HOLDING THE WALLET +7105-2340-0016-2298: WE FEEL THAT WE MUST LIVE (ON CREAM->UNCREAM) FOR THE REST OF OUR LIVES +1688-142285-0079-79: NO MAMMA THAT ANNE BUCKLEY WOULD NEVER HAVE DONE +6128-63240-0012-1983: HE LOOKED AT MISSUS LUNA WITH INTELLIGENT INCREDULITY +5442-41169-0028-1728: SAID LEVIN RETURNING TO A THOUGHT THAT HAD STRUCK HIM +1998-29455-0012-183: I LIKE YOU (NEXTER->NEXT TO) MY OWN DADDY AND MISTER BAXTER NEXT DOOR +8131-117016-0042-2597: BUT THE CAPTAIN STIRRED FINALLY SIGHING +4198-12281-0014-1233: CAN YOU TELL WITH WHAT INSTRUMENTS THEY DID IT +1998-29454-0045-169: STEP OUT (SONNY->SANNY) OR WE'LL NEVER GET THERE THIS SIDE (*->OF) CHRISTMAS +8461-258277-0000-2867: WHEN IT WAS THE SEVEN HUNDRED AND EIGHTEENTH NIGHT +1998-29454-0044-168: BLESS ME SAID MISTER (BEALE->BELL) UNCOMFORTABLY WELL THERE +7975-280076-0019-2508: I MET SEVERAL OF MY FRIENDS AMONG THEM WAS BOB HUDSPETH +2609-169640-0023-438: I DOUBT IF WE (TOUCHED A MAN->TOCH THE MEN) IN THE (NEAREST PROA->NEAR EXPRARA) +2609-157645-0008-408: (LET->THEM) THEM (SING->SINGING AN) ANOTHER (PSALM SAID->OTHER SONG) THE CURATE +3528-168669-0028-723: AND THEN THE WIND IS NOT BLOWING IN MY DIRECTION THIS MORNING +4198-12259-0043-1218: CLEAR OFF NEAT SUPERNACULUM +4198-12259-0013-1188: OUR FATHERS DRANK LUSTILY AND EMPTIED THEIR CANS +3997-180294-0025-1113: ONLY ONE REMAINED EMPTY THE STAGE BOX +7902-96592-0042-2387: IT WAS YOUR TURN YESTERDAY IT'S MINE TO DAY WHAT A GAME +8188-269288-0000-2673: ANNIE COLCHESTER HAD BEGUN TO MAKE FRIENDS WITH LESLIE +2414-159411-0026-349: EXACTLY HERE REPLIED THE BRAHMAN +3764-168671-0004-1037: THE GRAVE DIGGERS BEING THUS BOUND TO SERVICE IN THE EVENING IN SUMMER AND AT NIGHT IN WINTER IN THIS CEMETERY THEY WERE SUBJECTED TO A SPECIAL DISCIPLINE +8131-117016-0012-2567: THE FIRST MAN MAKING A SHAKEDOWN WILL GET THE SAME TREATMENT WE'RE GOING TO USE ON THE STONEWALL BOYS YOU'LL GET DOUBLE PAY HERE AND YOU CAN LIVE ON IT +7975-280063-0000-2476: WE TOOK THE OATH PERHAPS THREE HUNDRED OF US DOWN ON LUTHER MASON'S FARM A FEW MILES FROM WHERE I NOW WRITE WHERE COLONEL (HAYS->HAYES) HAD ENCAMPED AFTER INDEPENDENCE +6070-86744-0001-1922: MY VERY GOOD FRIEND AND EXCELLENT NEIGHBOR REPLIED THE COUNT WITH A SMILE YOU REALLY EXAGGERATE MY TRIFLING EXERTIONS +2609-157645-0007-407: ONE SUNDAY SAYS MISTER DITCHFIELD HE HAD (AN EXTRA->ANPERAL) PIPE AND (JOSHUA->JONCEWA) THE CLERK TOLD HIM THAT THE PEOPLE WERE GETTING IMPATIENT +6128-63240-0025-1996: HE OBSERVED THAT MISS CHANCELLOR'S HAND WAS AT ONCE COLD AND LIMP SHE MERELY PLACED IT IN HIS WITHOUT EXERTING THE SMALLEST PRESSURE +3538-163622-0019-887: FOR WE ARE BROTHERS OF THE PRINCESS WHOM THOU ART TO HAVE WHEN THOU CANST TELL THE KING WHAT WE EAT AND DRINK BUT THERE IS A MIGHTY TROLL WHO HAS CAST A SPELL OVER US +3538-142836-0001-827: THE EXPENSE OF PRESERVING THEM WITH SUGAR IS A SERIOUS OBJECTION FOR EXCEPT THE SUGAR IS USED IN CONSIDERABLE (QUANTITIES->QUALITIES) THE SUCCESS IS VERY UNCERTAIN +3538-163624-0024-917: FOR ONE DAY WHEN (BRYNHILD->BEURNHILD) AND (GUDRUN->GUNDRAN) WERE BATHING (BRYNHILD->BURNHILD) WADED FARTHEST OUT INTO THE RIVER AND SAID SHE DID THAT TO SHOW SHE WAS (GUIRUN'S->GUNDERN'S) SUPERIOR +4350-9170-0045-1427: I AM EXPECTED FOR THE SAKE OF THE STATE TO MAKE THESE SACRIFICES TO RENOUNCE EVERYTHING THAT CAN BE PRECIOUS TO MAN PEACE FAMILY SECURITY AND HUMAN DIGNITY +8188-274364-0002-2791: THE CASE OF LORD (MOUNTNORRIS->MONTNORRIS) OF ALL THOSE WHICH WERE (COLLECTED->CONNECTED) WITH SO MUCH INDUSTRY IS THE MOST FLAGRANT AND THE LEAST EXCUSABLE +3538-163619-0004-857: WHEN THE KING ENTERED AND SAW IT HE STOOD STILL AS IF HE WERE IN FETTERS AND COULD NOT STIR FROM THE SPOT FOR THE PICTURE SEEMED TO HIM SO BEAUTIFUL +3997-182399-0018-1172: WHEN HE GET HOME HE TRY (AN->AND) TRY TO BRUSH (THAT SOOT->THE SUIT) OFF BUT IT (DONE->DOESN'T) GET INTO THE SKIN (AN->AND) IT (STAY->STAYED) THERE +5484-24317-0025-1757: THE ROYAL LADY HAD INQUIRED ABOUT HIM AND HIS SUFFERINGS WITH ALMOST SISTERLY INTEREST AND ALTHEA EAGERLY CONFIRMED THE STATEMENT +3528-168669-0057-752: FATHER (FAUVENT->PROUVENT) MOTHER CRUCIFIXION WILL BE INTERRED IN THE COFFIN IN WHICH SHE HAS SLEPT FOR THE LAST TWENTY YEARS THAT IS JUST +367-130732-0012-932: I SAY COME TO SAN FRANCISCO ADVISEDLY FOR WHILE THE CRAB IS FOUND ALL ALONG THE COAST IT IS PREPARED NOWHERE SO DELICIOUSLY AS IN SAN FRANCISCO +1688-142285-0017-17: WHEN HE SPOKE OF THE MECHANICAL POWERS HE EVIDENTLY LOOKED UPON THEM ONLY AS NEW WAYS OF EXTENDING TRADE AND MAKING MONEY +7902-96595-0018-2447: GURR SALUTED AND STATED HIS BUSINESS WHILE THE BARONET WHO HAD TURNED SALLOWER AND MORE CAREWORN THAN HIS LOT DREW A BREATH (*->OF) FULL OF RELIEF ONE OF YOUR SHIP BOYS HE SAID +8461-281231-0026-2926: HERE IS A BUGLE WHICH AN ENGLISH YEOMAN HAS ONCE WORN I PRAY YOU TO KEEP IT AS A MEMORIAL OF YOUR GALLANT BEARING +8461-278226-0012-2896: THEY HAVE SAID THAT HE IS EVEN A LITTLE IMBECILE THAT HE DOES NOT REMEMBER HIMSELF OF THE MOST COMMON EVENTS OF HIS LIFE +3997-180297-0005-1127: YES BUT BESIDES NOT WISHING TO PUT YOU OUT I WAS SURE THAT IF YOU CAME AS FAR AS MY DOOR YOU WOULD WANT TO COME UP AND AS I COULD NOT LET YOU I DID NOT WISH TO LET YOU GO AWAY BLAMING ME FOR SAYING NO +6432-63722-0004-2043: AND HAVING PUT HIMSELF IN A FAIR WAY AS HE HOPED TO SOLVE SOME OF THE PROBLEMS CONNECTED WITH THE DARCY CASE COLONEL ASHLEY WENT DOWN TO POLICE HEADQUARTERS TO LEARN MORE FACTS IN (*->THE) CONNECTION WITH THE MURDER OF THE EAST INDIAN +7018-75788-0005-2193: THEN HE CARRIED ME TO THE BEACH WHERE I FILLED MY BAG WITH PEBBLES LARGE AND SMALL AND PRESENTLY WE SAW A COMPANY OF FOLK ISSUE FROM THE TOWN EACH BEARING A BAG LIKE MINE FILLED WITH PEBBLES +3005-163391-0008-498: HE ASKED THE KING WHERE HE WAS GOING AND THE KING TOLD HIM HE'D COME DOWN THE RIVER AND LANDED AT THE OTHER VILLAGE THIS MORNING AND NOW HE WAS GOING UP A FEW MILE TO SEE AN OLD FRIEND ON A FARM UP THERE THE YOUNG FELLOW SAYS +4852-28312-0017-1487: BUT EVEN AS HE SLOWLY TURNED THE THOUGHT PIERCED HIS MIND WHY HAD HE NOT SEEN THE REFLECTION OF THE HEADLIGHTS OF THE CARS MOVING UP (AROUND->A ROUND) THE CORNER OF (WATER->WALUTTER) STREET AND UP THE HILL TOWARD THE (TRAFFIC->TRAPHIC) SIGNALS +6128-63244-0014-2027: (I->AND) LOOK AFTER THE DETAILS AS WELL AS THE (BIG CURRENTS->BOOK CURRANTS) MISSUS (FARRINDER->FARLANDER) ADDED IN A TONE AS EXPLANATORY AS COULD BE EXPECTED OF SUCH A WOMAN AND WITH A SMILE OF WHICH THE SWEETNESS WAS THRILLING TO HER LISTENER +3331-159605-0024-633: BUT I KNOW HER BETTER AND I ASSURE YOU THAT SHE DOES IMPROVE SHE TRIES TO MEND HER FAULTS THOUGH SHE WON'T OWN IT AND WILL SURPRISE YOU SOME DAY BY THE AMOUNT OF HEART AND SENSE AND GOODNESS SHE HAS GOT +7018-75789-0014-2222: QUOTH HE THOU ART THINE OWN MASTER YET IF IT BE THY WILL TO ABIDE WITH US ON OUR HEAD AND EYES BE IT FOR THOU GLADDENEST US WITH THY COMPANY +1688-142285-0018-18: AND THE POOR MEN AROUND HIM THEY WERE POOR BECAUSE THEY WERE VICIOUS OUT OF THE PALE OF HIS SYMPATHIES BECAUSE THEY HAD NOT HIS IRON NATURE AND THE CAPABILITIES THAT IT GIVES HIM FOR BEING RICH +7018-75789-0015-2223: BY ALLAH O MY LORD ANSWERED I THOU HAST INDEED OVERWHELMED ME WITH THY FAVOURS AND WELL DOINGS BUT I WEARY FOR A SIGHT OF MY FRIENDS AND FAMILY AND NATIVE COUNTRY +5484-24318-0036-1802: EVEN WHILE HE BELIEVED HIMSELF TO BE THE CREATOR OF THE (DEMETER->DEMEANOR) HE HAD BEEN SERIOUSLY TROUBLED BY THE PRAISE OF SO MANY CRITICS BECAUSE IT HAD EXPOSED HIM TO THE SUSPICION OF HAVING BECOME FAITHLESS TO HIS ART AND HIS NATURE +533-131556-0012-1593: I ENJOY (A->YOUR) MOONLIGHT RAMBLE AS WELL AS YOU I ANSWERED STEADILY FIXING MY EYES UPON HER AND THE SHRUBBERY HAPPENS TO BE ONE OF MY FAVOURITE RESORTS +4198-12281-0013-1232: SOME DIED WITHOUT SPEAKING OTHERS SPOKE WITHOUT DYING SOME DIED IN SPEAKING OTHERS SPOKE IN DYING +7975-280057-0007-2462: HE HAD STARTED BACK TO HARRISONVILLE IN A BUGGY BUT WAS WAYLAID ONE MILE SOUTH OF WESTPORT A SUBURB OF KANSAS CITY AND BRUTALLY MURDERED FALLING OUT OF HIS BUGGY INTO THE ROAD WITH THREE MORTAL BULLET WOUNDS +5764-299665-0000-1804: (AFTERWARD->AFTERWARDS) IT WAS SUPPOSED THAT HE WAS SATISFIED WITH THE BLOOD OF OXEN LAMBS AND DOVES AND THAT IN EXCHANGE FOR OR (ON->IN) ACCOUNT OF THESE SACRIFICES THIS GOD GAVE RAIN SUNSHINE AND HARVEST +7018-75789-0031-2239: WHEN SUDDENLY A VIOLENT SQUALL OF WIND AROSE AND SMOTE THE SHIP WHICH ROSE OUT OF THE WATER AND SETTLED UPON A GREAT REEF THE HAUNT OF SEA MONSTERS WHERE IT BROKE UP AND FELL ASUNDER INTO PLANKS AND ALL AND EVERYTHING ON BOARD WERE PLUNGED INTO THE SEA +533-131564-0001-1625: MISTER AND MISSUS (HATTERSLEY->HAUTTERSLEY) HAVE BEEN (STAYING->SEEING) AT THE GROVE A FORTNIGHT AND AS (MISTER->MISSUS) HARGRAVE IS STILL ABSENT AND THE WEATHER WAS REMARKABLY FINE I NEVER (PASSED A->PASS THE) DAY WITHOUT SEEING MY TWO FRIENDS (MILICENT->MILICON) AND ESTHER EITHER THERE OR HERE +7018-75788-0006-2194: TO THESE HE COMMITTED ME COMMENDING ME TO THEIR CARE AND SAYING THIS MAN IS A STRANGER SO TAKE HIM WITH YOU AND TEACH HIM HOW TO GATHER THAT HE MAY GET HIS DAILY BREAD AND YOU WILL EARN YOUR REWARD AND RECOMPENSE IN HEAVEN +7902-96592-0030-2375: THE RESULT WAS NOT VERY SATISFACTORY BUT SUFFICIENTLY SO TO MAKE HIM ESSAY THE BAR OF THE WINDOW ONCE MORE PRODUCING A GRATING (EAR ASSAILING->IRASCELLING) SOUND AS HE FOUND THAT NOW HE DID MAKE A LITTLE IMPRESSION SO LITTLE THOUGH THAT THE PROBABILITY WAS IF HE KEPT ON WORKING WELL FOR TWENTY FOUR HOURS HE WOULD NOT GET THROUGH +1688-142285-0034-34: A SERVANT TO GIVE DIXON PERMANENT ASSISTANCE SHOULD BE GOT IF SHE GAVE UP (HER->THE) WHOLE TIME TO THE SEARCH AND THEN AT ANY RATE HER MOTHER MIGHT HAVE ALL THE PERSONAL (ATTENTION->ATTENTIONS) SHE REQUIRED AND HAD BEEN ACCUSTOMED TO HER WHOLE LIFE +4198-12281-0015-1234: IN THE MEANTIME FRIAR JOHN WITH HIS FORMIDABLE BATON OF THE CROSS GOT TO THE BREACH WHICH THE ENEMIES HAD MADE AND THERE STOOD TO SNATCH UP THOSE THAT (ENDEAVOURED->ENDEAVORED) TO ESCAPE +2033-164914-0018-229: I SAY WHAT MADE MY IGNOMY (WHATE'ER->WHATEVER) THE BITTER CUP I DRAIN FAR BE (FRO->FROM) ME (THAT->THY) LAND TO FLEE NOR WILL I BOW TO THOSE WHO BLAME AND FOR SUCH LOVE WOULD DEAL ME SHAME +5442-41168-0027-1699: ON LEARNING THIS THE NEW PARTY HAD MADE HASTE DURING THE DISPUTE ABOUT (FLEROV->FLEROFF) TO SEND SOME OF THEIR MEN IN A SLEDGE TO CLOTHE THE STRIPPED (GENTLEMAN->GENTLEMEN) AND TO BRING ALONG ONE OF THE INTOXICATED TO THE MEETING +2033-164916-0008-260: LASTLY THE MINISTER WENT IN AND KISSED THE GROUND BEFORE ZAU AL MAKAN WHO ROSE TO MEET HIM SAYING WELCOME O WAZIR AND (SIRE SANS PEER->SIRE'S SON SPEAR) +4852-28311-0016-1459: THE LONGER WING (TOWARD->TOWARDS) THE BACK (HAD->GOT) A BACK DOOR THAT OPENED (ONTO->ON A) WATER STREET THE SPACE BETWEEN THE HOUSE AND WISCONSIN AVENUE HAD BEEN MADE INTO A NEAT OBLONG FLOWER GARDEN FENCED OFF FROM THE SIDEWALK BY BOX (SHRUBS->SHRUGS) AND A WHITE PICKET FENCE +2033-164914-0004-215: BY ALLAH REPLIED THE FIREMAN I TELL THEE THE TRUTH +1688-142285-0005-5: YOU WHO WERE ALWAYS ACCUSING PEOPLE OF BEING SHOPPY AT HELSTONE +4294-14317-0013-1279: YOU HAD BETTER PUT THIS TO THE PROOF AND I WILL GO AT ONCE TO THE (BARGELLO->BARGELO) +3764-168671-0036-1069: FAUCHELEVENT PASSED THE UNEXPECTED (GRIBIER->CRIBIER) ONCE MORE IN REVIEW +8461-278226-0015-2899: YOU WILL BE DOING ME SUCH A (FAVOUR->FAVOR) PHILIP IF YOU'LL SAY YES +4350-10919-0021-1369: OH NO ONLY A FEW DETAILS PRINCESS COME THIS WAY +1688-142285-0065-65: IT'S SIMPLE AND NOT FAR TO FETCH NOR HARD TO WORK +7902-96594-0008-2404: WELL SIR SAID THE MASTER HESITATING SMUGGLERS ARE SMUGGLERS +6432-63722-0020-2059: NOW I'M AFRAID I WON'T BUT HOW DID IT HAPPEN +3528-168669-0000-695: THE PRIORESS RETURNED AND SEATED HERSELF ONCE MORE ON HER CHAIR +6128-63244-0001-2014: HOW DID THE LADIES ON BEACON STREET FEEL ABOUT THE (BALLOT->BANNET) +3528-168669-0074-769: BUT REVEREND MOTHER IF THE AGENT OF THE SANITARY COMMISSION +8131-117016-0059-2614: IT WASN'T EXACTLY LEGAL BUT NOTHING WAS HERE +7902-96591-0009-2329: (IT->AND) ALL COMES OF DRESSING UP IN THIS STUPID WAY LIKE A ROUGH FISHER LAD +4198-12259-0015-1190: COME LET US DRINK WILL YOU SEND NOTHING TO THE RIVER +7902-96592-0044-2389: I SAY YOU DO LOOK (*->LIKE) A (RUM UN->ROMAN) JUST LIKE A BIG MONKEY IN A SHOW +2414-128291-0012-275: IT IS NO LONGER TRUE THAT THE POOR ARE BLESSED +3764-168671-0006-1039: DAMPNESS WAS INVADING IT THE FLOWERS WERE DESERTING IT +7975-280076-0020-2509: WE WERE NOT ON GOOD TERMS AT THE TIME NOR HAVE WE BEEN FOR SEVERAL YEARS +1998-29454-0001-125: PERUSAL SAID THE PAWNBROKER THAT'S THE WAY TO (PERNOUNCE->PRONOUNCE) IT +7105-2340-0017-2299: OF COURSE SOME OF THEM CAN BE CHANGED +1998-29455-0029-200: CLEVER AS A (TRAINDAWG E->TRAINED DOG) IS (AN ALL OUTER IS->AND WHILE OUT OF HIS) OWN (EAD->ATT) +8188-269290-0003-2734: EVERY STUDENT IS TO BE IN EAST HALL AT HALF PAST EIGHT +3538-142836-0018-844: (CONFECTIONARY->CONFECTIONERY) FIFTEEN O EIGHT +4294-35475-0018-1309: IN A VERY SHORT TIME THE PRINCE HAD CRAWLED THROUGH THE OPENING +6432-63723-0035-2134: IT'S HARD FOR MISS MASON TOO ALTHOUGH SHE'S BEARING UP LIKE A MAJOR +8461-281231-0014-2914: TELL ME THY NAME OR WORK THY PLEASURE ON ME +3538-163622-0007-875: COME HITHER COME HITHER MY HANDSOME SON AND LET ME COMB YOUR HAIR +6432-63722-0050-2089: BUT I WANT TO KNOW JUST WHERE WE STAND NOW I KNOW +3331-159605-0040-649: I DON'T MEAN TO BE PRYING BUT I REALLY THOUGHT HE DID +3528-168669-0059-754: SO I SHALL HAVE TO NAIL UP THAT COFFIN YES +3538-163622-0022-890: NOW THEN SAID THE FOAL DOST THOU NOT SEE ANYTHING NOW +1688-142285-0020-20: IMPROVIDENT AND SELF INDULGENT WERE HIS WORDS +7902-96594-0023-2419: BEG PARDON DIDN'T MEAN (NOWT->IT OUT) SIR SAID THE SAILOR TOUCHING HIS FORELOCK +3331-159609-0009-665: CRIED POLLY WITH THE HEARTIEST SATISFACTION IN HER VOICE +4350-10919-0006-1354: THE FAMILY DOCTOR RESPECTFULLY CEASED IN THE MIDDLE OF HIS OBSERVATIONS +6432-63722-0005-2044: PINKUS AND DONOVAN HAVEN'T THEY CARROLL YEP +7975-280085-0003-2539: THAT DAY A MAN NAMED DUNNING DISCOVERED US AND WE TOOK HIM PRISONER +2414-159411-0012-335: IS IT FAIR THAT HE SHOULD DO SO OR NOT +8131-117017-0027-2644: THANKS IZZY THANKS YOURSELF +1688-142285-0080-80: (SUPPOSE->S'POSE) I TRY SAID MISTER HALE +8188-269290-0018-2749: WHAT CAN THIS MEAN SHE SAID TO HERSELF +3538-163622-0021-889: AND NOW INQUIRED THE (FOAL->POLE) SEEST THOU NOTHING NOW +6128-63244-0016-2029: I WANT TO BE NEAR TO THEM TO HELP THEM +5764-299665-0075-1879: I SAY WHAT I THINK +6070-86745-0018-1969: COME COME THAT IS NOT BAD SAID LUCIEN +8131-117029-0007-2657: (FATS->FATT'S) PLACE WAS STILL OPEN THOUGH THE CROOKED TABLES HAD BEEN REMOVED GORDON DROPPED TO A STOOL SLIPPING OFF HIS HELMET +1998-15444-0027-123: THE RESIDUE OF THE MATERIAL AFTER DIGESTION WITH HYDROCHLORIC ACID AND POTASSIUM (CHLORATE->CHLORIDE) MAY HAVE TO BE EXAMINED FOR SILVER LEAD AND BARIUM +4852-28319-0016-1518: HE SQUATTED ON HIS HAUNCHES (EXAMINING->EXAMINED) THE CARVED WOODEN FIGURE ATTENTIVELY AND FELT CONVINCED THAT ONCE ALIVE THE BOY WOULD BE AN IDEAL AND HAPPY COMPANION +3080-5032-0025-573: I HAVE BEEN STUDYING HOW TOM (CHEEKE->CHEEK) MIGHT COME BY HIS INTELLIGENCE AND I VERILY BELIEVE HE HAS IT FROM MY COUSIN PETERS +2033-164915-0009-243: WHY DIDST THOU SAY I NEVER REPEATED THESE COUPLETS NOR DO I KNOW WHO REPEATED THEM WHEN IT WAS THY COMPANION +3764-168671-0005-1038: THESE GATES THEREFORE SWUNG INEXORABLY ON THEIR HINGES AT THE INSTANT WHEN THE SUN DISAPPEARED BEHIND THE DOME OF THE INVALIDES +367-130732-0013-933: (GOBEY'S->GOBIES) PASSED WITH THE FIRE AND THE LITTLE RESTAURANT BEARING HIS NAME AND IN CHARGE OF HIS WIDOW IN UNION SQUARE AVENUE HAS NOT ATTAINED THE FAME OF THE OLD PLACE +5442-41168-0011-1683: TO ESCAPE FROM THIS PAINFUL FEELING HE WENT AWAY INTO THE OTHER ROOM WHERE THERE WAS NOBODY EXCEPT THE WAITERS AT THE REFRESHMENT BAR +1688-142285-0033-33: SHE LAY AWAKE VERY LONG THIS NIGHT PLANNING HOW TO LESSEN THE EVIL INFLUENCE OF THEIR MILTON LIFE ON HER MOTHER +3997-180294-0010-1098: WITH THEM THE BODY HAS WORN OUT THE SOUL THE SENSES HAVE BURNED UP THE HEART DISSIPATION HAS BLUNTED THE FEELINGS +4852-28311-0015-1458: AN EMPTY LOT CUT (*->IN) INTO BY CHURCH LANE GAVE A LOOK OF ISOLATION TO THE L SHAPED BRICK BUILDING THAT SERVED MISTER WICKER AS BOTH HOUSE AND PLACE OF BUSINESS +8461-281231-0012-2912: THE BLACK KNIGHT WAS SOON ENGAGED IN DESPERATE COMBAT WITH THE NORMAN CHIEF AND THE VAULTED ROOF OF THE HALL RUNG WITH (THEIR->*) FURIOUS BLOWS +367-130732-0028-948: PUT THESE INGREDIENTS INTO A STEWPAN AND FRY THEM TEN MINUTES THEN THROW IN THE CRAWFISH AND POUR ON THEM HALF A BOTTLE OF FRENCH WHITE WINE +8461-278226-0013-2897: BUT THERE ARE SOME OTHERS WHO SAY THAT HIS MEMORY HAS NOT ALTOGETHER FAILED AND THAT HE IS STILL ENOUGH HARSHLY CRITICAL TOWARDS THE WORKS OF OTHERS +533-131562-0001-1608: THE KEYS OF YOUR CABINET DESK (DRAWERS->DRAWER) AND WHATEVER ELSE YOU POSSESS SAID HE RISING AND HOLDING OUT HIS HAND +2609-156975-0032-393: THE TITLE OF HIS FATHER IN LAW IMPLIES THAT THIS (PRIEST->PREACH) MINISTERED AT SOME (WILDERNESS->MOTHER) SANCTUARY +5484-24318-0007-1773: IF HE HAD PASSED INTO ANNIHILATION HE (HERMON->HERMOD) WISHED TO FOLLOW HIM THITHER AND ANNIHILATION CERTAINLY MEANT REDEMPTION FROM PAIN AND MISERY +8188-274364-0003-2792: THE COURT WHICH CONSISTED OF THE CHIEF (OFFICERS->OFFICIALS) OF THE ARMY FOUND THE CRIME TO BE CAPITAL AND CONDEMNED THAT NOBLEMAN TO LOSE HIS HEAD +533-1066-0007-1563: I MEAN HE PERSISTED DO YOU FEEL AS THOUGH YOU COULD GO THROUGH WITH SOMETHING RATHER UNUSUAL +1688-142285-0048-48: YOU KNOW I'M A STRANGER HERE SO PERHAPS I'M NOT SO QUICK AT UNDERSTANDING WHAT YOU MEAN AS IF I'D LIVED ALL MY LIFE (AT->IN) MILTON +5442-32873-0001-1653: THERE WAS A VERY NATURAL SAVAGERY AND DEJECTION THERE AND A WILD LEER IN HIS YELLOW EYES RACHEL SAT DOWN +3764-168671-0037-1070: FAUCHELEVENT WHO WAS ILLITERATE BUT VERY SHARP UNDERSTOOD THAT HE HAD TO DEAL WITH A FORMIDABLE SPECIES OF MAN WITH A FINE TALKER HE MUTTERED +3538-142836-0004-830: IF YOU DIP THE FINGER INTO THE SYRUP AND APPLY IT TO THE THUMB THE TENACITY OF THE SYRUP WILL ON SEPARATING THE FINGER AND THUMB AFFORD A THREAD WHICH SHORTLY BREAKS THIS IS THE LITTLE THREAD +4852-28312-0005-1475: THE DOUBLE FANS OF MINUTE WRINKLES BREAKING FROM EYE CORNER TO TEMPLE AND JOINING WITH THOSE OVER THE (CHEEKBONES->CHEEK BONES) WERE DRAWN INTO THE HORIZONTAL LINES ACROSS THE DOMED FOREHEAD +1688-142285-0035-35: VISITING REGISTER OFFICES SEEING ALL MANNER OF UNLIKELY PEOPLE AND VERY FEW IN THE LEAST LIKELY ABSORBED MARGARET'S TIME AND THOUGHTS FOR SEVERAL DAYS +4198-61336-0030-1265: (UKINZER->AKENJER) TOOK REFUGE IN HIS CAPITAL SHAPIA WHICH HELD OUT SUCCESSFULLY ALTHOUGH THE SURROUNDING COUNTRY WAS RAVAGED AND DESPOILED +5442-41168-0013-1685: LEVIN ADVANCED BUT UTTERLY FORGETTING WHAT HE WAS TO DO AND MUCH EMBARRASSED HE TURNED TO SERGEY IVANOVITCH WITH THE QUESTION WHERE AM I TO PUT IT +3005-163391-0010-500: NO MY NAME'S (BLODGETT ELEXANDER BLODGETT->BLODGET ALEXANDER BLAGET) REVEREND (ELEXANDER BLODGETT->ALEXANDER BLDGET) I S'POSE I MUST SAY AS I'M ONE (O->OF) THE (LORD'S->LARGE) POOR SERVANTS +2609-156975-0034-395: THE CRUEL FATE OF HIS PEOPLE AND THE PAINFUL EXPERIENCE IN EGYPT THAT HAD DRIVEN HIM INTO THE WILDERNESS PREPARED HIS MIND TO RECEIVE THIS TRAINING +8280-266249-0053-2854: BUT ALL WAS SILENT AND AFTER A MOMENT OF ANXIOUS WAITING THEY SAT DOWN TO THEIR GAME AGAIN TRYING TO CONCEAL AND SHAKE OFF THEIR FEARS WITH A FORCED UNNATURAL LAUGH +6070-86744-0004-1925: THEN IT IS SETTLED SAID THE COUNT AND I GIVE YOU MY SOLEMN ASSURANCE THAT I ONLY WAITED AN OPPORTUNITY LIKE THE PRESENT TO (REALIZE->REALISE) PLANS THAT I HAVE LONG MEDITATED +6432-63723-0021-2120: GRAVE AND EVEN REVEREND (*->THE) CONVENTIONS ASSEMBLED IN ITS (BALLROOM->BALL ROOM) AND POLITICIANS OF THE UPPER IF NOT BETTER CLASS WERE FREQUENTLY SEEN IN ITS DINING ROOM OR CAFE +5484-24317-0013-1745: A STRANGER OUT OF HIS OWN SPHERE HE (FELT->FELL) CHILLED AMONG THESE CLOSELY UNITED MEN AND WOMEN TO WHOM NO TIE BOUND HIM SAVE THE PRESENCE OF THE SAME HOST +7018-75789-0017-2225: HE ASKED ME WHENCE THEY CAME AND I SAID TO HIM BY ALLAH O COMMANDER OF THE FAITHFUL I KNOW NOT THE NAME OF THE CITY NOR THE WAY THITHER +8188-269288-0016-2689: HER EYES SHONE WITH PLEASURE AT THE ANTICIPATION OF THE DELIGHTFUL TIME SHE WOULD HAVE (REVELING->REVELLING) IN THE HOME NEWS THE OTHER LETTER WAS DIRECTED TO ANNIE COLCHESTER +8188-274364-0005-2794: IT IS NOW FULL TWO HUNDRED AND FORTY YEARS SINCE TREASONS WERE DEFINED AND SO LONG HAS IT BEEN SINCE ANY MAN WAS TOUCHED TO THIS EXTENT UPON THIS CRIME BEFORE MYSELF +6128-63244-0017-2030: IT WAS ONE THING TO CHOOSE FOR HERSELF BUT NOW THE GREAT REPRESENTATIVE OF THE ENFRANCHISEMENT OF THEIR SEX FROM EVERY FORM OF BONDAGE HAD CHOSEN FOR HER +4350-9170-0048-1430: SO THAT THE JUSTIFICATION OF STATE VIOLENCE ON THE GROUND OF THE PROTECTION IT GIVES US FROM EVIL DISPOSED PERSONS EVEN IF IT HAD SOME FOUNDATION THREE OR FOUR CENTURIES AGO HAS NONE WHATEVER (NOW->KNOWN) +533-131556-0014-1595: I WATCHED HER A FEW MOMENTS WITH A FEELING OF MALEVOLENT GRATIFICATION THEN MOVING TOWARDS THE DOOR I CALMLY ASKED IF SHE HAD ANYTHING MORE TO SAY +4350-9170-0002-1384: IN THE SOCIAL CONCEPTION OF LIFE IT IS SUPPOSED THAT SINCE THE AIM OF LIFE IS FOUND IN GROUPS OF INDIVIDUALS INDIVIDUALS WILL VOLUNTARILY SACRIFICE THEIR OWN INTERESTS FOR THE (INTERESTS->INTEREST) OF THE GROUP +5484-24318-0037-1803: (HONOUR->HONOR) TO (MYRTILUS->MERTILLUS) AND HIS ART BUT HE TRUSTED THIS NOBLE (FESTAL->FESTALE) ASSEMBLAGE WOULD PARDON THE UNINTENTIONAL DECEPTION AND AID HIS PRAYER FOR RECOVERY +3331-159605-0010-619: IT WAS THAT INDESCRIBABLE SOMETHING WHICH WOMEN ARE QUICK TO SEE AND FEEL IN MEN WHO HAVE BEEN BLESSED WITH WISE AND GOOD MOTHERS +2033-164915-0010-244: BUT NOW I WILL NOT LEAVE THEE BETWEEN THIS PLACE AND BAGHDAD AND WHAT BETIDETH THY COMRADE SHALL BETIDE THEE +2609-156975-0033-394: MOSES IN THE HOME OF THE MIDIAN (PRIEST->PRIESTS) WAS BROUGHT INTO DIRECT AND CONSTANT CONTACT WITH THE JEHOVAH WORSHIP +4350-9170-0032-1414: THE DESPOTISM OF (A->THE) GOVERNMENT ALWAYS INCREASES WITH THE STRENGTH OF THE ARMY AND ITS EXTERNAL SUCCESSES AND THE AGGRESSIVENESS OF (A->THE) GOVERNMENT INCREASES WITH ITS INTERNAL DESPOTISM +8188-269290-0033-2764: BUT MARJORIE AND (EILEEN->AILEEN) HAD ALREADY DEPARTED AND LESLIE AND JANE FOUND THEMSELVES AMONG THE LAST STUDENTS TO ARRIVE AT THE GREAT EAST HALL +3005-163391-0009-499: BUT THEN I SAYS AGAIN NO I RECKON IT AIN'T HIM OR ELSE HE WOULDN'T BE (PADDLING->PADDLIN) UP THE RIVER YOU AIN'T HIM ARE YOU +6070-63485-0006-1908: TOM SEYTON DID NOT LOSE HIS PRESENCE OF MIND DURING THIS SCENE RAPIDLY AND UNEXPECTEDLY AS IT HAD OCCURRED +5764-299665-0090-1894: WHY SHOULD MEN AND WOMEN HAVE CHILDREN THAT THEY CANNOT TAKE CARE OF CHILDREN THAT ARE (BURDENS->A BURDEN) AND CURSES WHY +367-130732-0014-934: IT IS POSSIBLE THAT SHE KNOWS THE SECRET OF PREPARING CRAB AS IT WAS PREPARED IN THE (GOBEY'S->GOBIES) OF BEFORE THE FIRE BUT HIS (PRESTIGE->PRESGE) DID NOT DESCEND TO HER +6128-63240-0027-1998: MISSUS (LUNA'S->LUNNY'S) FAMILIARITY EXTENDED EVEN TO HER SISTER SHE REMARKED TO MISS CHANCELLOR THAT SHE LOOKED AS IF SHE WERE GOT UP FOR A SEA VOYAGE +4198-12259-0029-1204: SPARROWS WILL NOT EAT UNLESS YOU BOB THEM ON THE TAIL NOR CAN I DRINK IF I BE NOT FAIRLY SPOKE TO +5764-299665-0074-1878: IT (FOLLOWS->FOLLOWED) THAT THERE COULD NOT HAVE BEEN ANY INTELLIGENCE ANY DESIGN BACK OF MATTER AND FORCE +2609-169640-0024-439: IN THIS STATE (THE SHIP PASSED->THESHIP POUCHED) AHEAD ALL (*->OF) HER (CANVAS BEING FULL->CANVATES BEEN FOR) LEAVING THE (PROA MOTIONLESS->PROW MOTION IT) IN HER WAKE +7975-280057-0009-2464: (MISSUS->MISS) WELLS STAYED TO GUARD THE REMAINS WHILE HER SON CARRIED THE NEWS OF THE MURDER TO COLONEL PEABODY OF THE FEDERAL COMMAND WHO WAS THEN IN CAMP AT KANSAS CITY +3005-163390-0025-484: WHEN I WAKED UP JUST AT DAYBREAK HE WAS SITTING THERE WITH HIS HEAD DOWN BETWIXT HIS KNEES MOANING AND MOURNING TO HIMSELF +7975-280076-0004-2493: JUNE THIRD EIGHTEEN SEVENTY ONE (OBOCOCK BROTHERS->OBACOCK BROTHER'S) BANK AT (CORYDON->CROYDON) IOWA WAS ROBBED OF FORTY THOUSAND DOLLARS BY SEVEN MEN IN BROAD DAYLIGHT +3764-168670-0049-1024: THE OVER PRUDENT CATS AS THEY ARE AND BECAUSE THEY ARE CATS SOMETIMES INCUR MORE DANGER THAN THE AUDACIOUS +3538-163624-0011-904: THEN HE SAW THE TRACK WHICH THE DRAGON (*->HAD) MADE WHEN HE WENT TO A CLIFF TO DRINK AND THE TRACK WAS AS IF A GREAT RIVER HAD ROLLED ALONG AND LEFT A DEEP VALLEY +8461-281231-0028-2928: DURING ALL THIS TIME ISAAC OF (YORK->YORKE) SAT MOURNFULLY APART GRIEVING FOR THE LOSS OF HIS DEARLY LOVED DAUGHTER REBECCA +7018-75789-0029-2237: THIS HE SET IN A SAUCER WETTED WITH A LITTLE WATER AND AFTER WAITING A SHORT TIME SMELT AND TASTED IT AND THEN HE TOOK OUT OF THE CHEST A BOOKLET WHEREIN HE READ (AWHILE->A WHILE) AND SAID WEEPING KNOW O YE PASSENGERS THAT IN THIS BOOK IS A MARVELLOUS MATTER DENOTING THAT WHOSO COMETH HITHER SHALL SURELY DIE WITHOUT HOPE OF ESCAPE FOR THAT THIS OCEAN IS CALLED THE SEA OF THE CLIME OF THE KING WHEREIN IS (THE->A) SEPULCHRE OF OUR LORD SOLOMON SON OF DAVID ON BOTH BE PEACE +3080-5032-0009-557: MISTER FISH IS THE SQUIRE OF DAMES AND HAS SO MANY MISTRESSES THAT ANYBODY MAY PRETEND (A->TO) SHARE IN HIM AND BE BELIEVED BUT THOUGH I HAVE THE HONOUR TO BE HIS NEAR NEIGHBOUR TO SPEAK FREELY I CANNOT BRAG MUCH THAT HE MAKES ANY COURT TO ME AND I KNOW NO YOUNG WOMAN IN THE COUNTRY THAT HE DOES NOT VISIT OFTEN +3331-159605-0008-617: NOW AS POLLY WAS BY NO MEANS A PERFECT CREATURE I AM FREE TO CONFESS THAT THE OLD TEMPTATION ASSAILED HER MORE THAN ONCE THAT WEEK FOR WHEN THE FIRST EXCITEMENT OF THE DODGING REFORM HAD SUBSIDED SHE MISSED THE PLEASANT LITTLE INTERVIEWS THAT USED TO PUT A CERTAIN (FLAVOR->FLAVOUR) OF ROMANCE INTO HER DULL HARD WORKING DAYS +3997-180294-0009-1097: THEN HOW SURELY MUST THEY DESIRE THE WORLD WHICH IS HIDDEN FROM THEM HOW SURELY MUST THEY FIND IT TEMPTING HOW SURELY MUST THEY LISTEN TO THE FIRST VOICE WHICH COMES TO TELL ITS SECRETS THROUGH THEIR BARS AND BLESS THE HAND WHICH IS THE FIRST TO RAISE A CORNER OF THE (MYSTERIOUS->MYSTERY) VEIL +533-131562-0000-1607: IT SEEMS VERY INTERESTING LOVE SAID HE LIFTING HIS HEAD AND TURNING TO WHERE I STOOD WRINGING MY (HANDS->HAND) IN SILENT RAGE AND ANGUISH BUT IT'S RATHER LONG I'LL LOOK AT IT SOME OTHER TIME AND MEANWHILE I'LL TROUBLE YOU FOR YOUR KEYS MY DEAR WHAT KEYS +367-293981-0008-962: (MINE->I) COULD SPEAK TOO SAID DON QUIXOTE BUT THAT IS NOT A SUFFICIENT REASON FOR BELIEVING THAT WHAT WE SEE IS THE ENCHANTED MOOR +2033-164914-0001-212: BUT SHE SAID WHOMSOEVER THOU SEEST AWAKE HE IS THE RECITER +4350-9170-0060-1442: FOR A MAN OF THE (POOR->POORER) WORKING CLASS THE ADVANTAGES AND DISADVANTAGES WILL BE THE SAME BUT WITH A GREAT INCREASE OF DISADVANTAGES +4852-28311-0014-1457: CHRIS STARTED OFF ONCE MORE PASSING THE BLEAK LITTLE VICTORIAN CHURCH PERCHED ON THE HILL ABOVE MISTER WICKER'S HOUSE +533-131556-0011-1592: IF I WERE SUSPICIOUS I REPLIED I SHOULD HAVE DISCOVERED YOUR INFAMY LONG BEFORE +8188-269290-0001-2732: IMMEDIATELY AFTER DINNER THAT EVENING LESLIE RAN UP TO HER ROOM TO MAKE PREPARATIONS FOR HER VISIT TO EAST HALL +4294-35475-0016-1307: THE PRINCE SPENT ALL THE FOLLOWING TIME UNTIL MIDNIGHT TRYING TO THINK OF A SUITABLE VERSE TO SAY TO THE SCISSORS +7105-2330-0011-2251: FIFTEEN HUNDRED SAID THE PRIME MINISTER WITH A SHUDDER IT'S TOO HORRIBLE TO THINK OF +4350-9170-0015-1397: IT WAS PRODUCED ON ONE HAND BY THE NATURAL GROWTH OF POPULATION AND ON THE OTHER BY STRUGGLE AND CONQUEST +7105-2340-0000-2282: WITH THAT NOTORIOUS FAILING OF HIS HE WAS NOT THE SORT OF PERSON ONE WANTED IN ONE'S HOUSE +2414-159411-0024-347: WHEN THEY GOT THERE THE JACKAL SAID NOW (BRAHMAN->BROWMAN) SHOW ME EXACTLY WHERE YOU STOOD +8131-117017-0009-2626: SOME TURNED AWAY AS GORDON AND THE OTHER COP WENT TO WORK BUT MOST OF THEM WEREN'T SQUEAMISH +3528-168656-0010-692: WHEN THE POOR OLD WOMAN DIED THEY RUSHED TO HER CUPBOARD MORE HASTILY THAN WAS FITTING PERHAPS AND OPENED IT +4852-28330-0002-1532: NEXT NED CILLEY WAS RELIEVED AT THE HELM BY ELBERT JONES WHO TOOK OVER NED WENT ON DOWN +5484-24317-0010-1742: WHEN (HERMON->HERMANN) ENTERED THE RESIDENCE OF THE (GRAMMATEUS->GRAMMATIUS) IN THE PALACE THE GUESTS HAD ALREADY ASSEMBLED +7975-280085-0016-2552: SHERIFF GLISPIN OF (WATONWAN->WATERWAN) COUNTY WHO WAS TAKING BOB'S PISTOL FROM HIM WAS ALSO SHOUTING TO THE FELLOW +7975-280076-0002-2491: THIS RAID WAS ACCOMPANIED BY BLOODSHED JUDGE MC (LAIN->LANE) THE BANKER BEING SHOT THOUGH NOT FATALLY +3538-142836-0016-842: THAT THEY MAY KEEP IT IS NECESSARY NOT TO BE SPARING OF SUGAR FIFTEEN O THREE +3528-168669-0117-812: I WILL DO ANYTHING TO PROVE MY ZEAL TOWARDS THE COMMUNITY THESE ARE MY ORDERS I AM TO NAIL UP THE COFFIN +7018-75788-0019-2207: BUT THERE IS MAJESTY AND THERE IS NO MIGHT SAVE IN ALLAH THE GLORIOUS THE GREAT +7902-96591-0022-2342: HE WENT AND TRIED TO FORCE HIS HEAD THROUGH RECALLING AS HE DID THAT WHERE A PERSON'S HEAD WOULD GO THE REST OF THE BODY WOULD PASS +5764-299665-0013-1817: HOW CAN WE ACCOUNT FOR A WORLD WHERE LIFE FEEDS ON LIFE +8131-117029-0022-2672: HE GRABBED GORDON'S HAND AND WADDLED DOWN THE LANDING PLANK (IZZY->ISEY) SHOOK HIS HEAD +8188-269288-0043-2716: NOW DRINK THIS AT ONCE SHE SAID IN A VOICE OF AUTHORITY IF YOU REALLY WISH TO SLEEP +3005-163391-0022-512: NOW HUSTLE BACK RIGHT OFF AND FETCH THE DUKE UP HERE AND THE NEW CARPET BAGS +6128-63240-0026-1997: I SHALL BE BACK VERY LATE (WE ARE->WE'RE) GOING TO A THEATRE PARTY THAT'S WHY WE DINE SO EARLY +7975-280057-0006-2461: MY FATHER WAS IN THE EMPLOY OF THE UNITED STATES GOVERNMENT AND HAD THE MAIL CONTRACT FOR FIVE HUNDRED MILES +4852-28312-0002-1472: WHAT WITH (THE->*) ONE WINDOW AND THE LOWERING DAY OUTSIDE THE LONG NARROW SHOP WAS SOMBER +7975-280076-0017-2506: OUR BUSINESS THERE WAS TO SEE E P WEST HE WAS NOT AT HOME BUT THE FAMILY WILL REMEMBER THAT WE WERE THERE +6070-86744-0005-1926: SHALL WE MAKE A POSITIVE APPOINTMENT FOR A PARTICULAR DAY AND HOUR INQUIRED THE COUNT ONLY LET ME WARN YOU THAT I AM PROVERBIAL FOR MY PUNCTILIOUS EXACTITUDE IN KEEPING MY ENGAGEMENTS DAY FOR DAY HOUR FOR HOUR SAID ALBERT THAT WILL SUIT ME TO A DOT +2033-164914-0006-217: WHAT AILS THEE THEN THAT THOU MUST NEEDS RECITE VERSES SEEING THAT WE ARE TIRED OUT WITH WALKING AND WATCHING AND ALL THE FOLK ARE ASLEEP FOR THEY REQUIRE SLEEP TO REST THEM OF THEIR FATIGUE +8461-281231-0030-2930: THE TEMPLAR IS FLED SAID DE BRACY IN ANSWER TO THE PRINCE'S EAGER QUESTIONS FRONT DE BOEUF YOU WILL NEVER SEE MORE AND HE ADDED IN A LOW AND EMPHATIC TONE RICHARD IS IN ENGLAND I HAVE SEEN HIM AND SPOKEN WITH HIM +2033-164916-0009-261: MOREOVER THE SULTAN COMMANDED HIS WAZIR DANDAN (*->TO) CALL (A->AT) TEN DAYS HALT OF THE ARMY THAT HE MIGHT BE PRIVATE WITH HIM AND LEARN FROM HIM HOW AND WHEREFORE HIS FATHER HAD BEEN SLAIN +5764-299665-0032-1836: IT IS INSISTED BY THESE THEOLOGIANS AND BY MANY OF THE SO CALLED PHILOSOPHERS THAT THIS MORAL SENSE THIS SENSE OF DUTY OF OBLIGATION WAS IMPORTED AND THAT CONSCIENCE IS AN EXOTIC +7105-2330-0001-2241: HE HAD NOT ONLY PLEADED GUILTY BUT HAD EXPRESSED HIS INTENTION OF REPEATING HIS ESCAPADE IN OTHER DIRECTIONS AS SOON AS CIRCUMSTANCES PERMITTED THROUGHOUT THE TRIAL HE WAS BUSY EXAMINING A SMALL MODEL OF THE FREE TRADE HALL IN MANCHESTER +7018-75789-0018-2226: FOR STATE PROCESSIONS A THRONE IS SET FOR HIM UPON A HUGE ELEPHANT ELEVEN CUBITS HIGH AND UPON THIS HE SITTETH HAVING HIS GREAT LORDS AND OFFICERS AND GUESTS STANDING IN TWO RANKS ON HIS RIGHT HAND AND ON HIS LEFT +7105-2330-0016-2256: DESPITE THE EARLINESS OF THE HOUR A SMALL CROWD HAD GATHERED IN THE STREET OUTSIDE AND THE HORRIBLE MENACING (TRELAWNEY->TRE LONE) REFRAIN OF THE FIFTEEN HUNDRED VOTING MEN CAME IN A STEADY MONOTONOUS CHANT +6128-63240-0000-1971: THE GENTLEMAN HAD NOT EVEN NEEDED TO SIT DOWN TO BECOME INTERESTED APPARENTLY HE HAD TAKEN UP THE VOLUME FROM (A->THE) TABLE AS SOON AS HE CAME IN AND STANDING THERE AFTER A SINGLE GLANCE ROUND THE APARTMENT HAD LOST HIMSELF IN ITS PAGES +3005-163391-0011-501: YOU SEE HE WAS PRETTY (OLD->OLE) AND GEORGE'S (G'YIRLS->GO GIRLS) WAS TOO YOUNG TO BE MUCH COMPANY FOR HIM EXCEPT MARY JANE THE RED HEADED ONE AND SO HE WAS KINDER LONESOME AFTER GEORGE AND HIS WIFE DIED AND DIDN'T SEEM TO CARE MUCH TO LIVE +5484-24317-0014-1746: CRATES HAD REALLY BEEN INVITED IN ORDER TO WIN HIM OVER TO THE QUEEN'S CAUSE BUT CHARMING FAIR HAIRED (NICO->NICHO) HAD BEEN COMMISSIONED BY THE CONSPIRATORS TO PERSUADE HIM TO SING (ARSINOE'S->ARSENAL'S) PRAISES AMONG HIS PROFESSIONAL ASSOCIATES +4294-35475-0019-1310: WHILE HE STOOD LOOKING AROUND HIM IN BEWILDERMENT A FIREFLY ALIGHTED ON HIS ARM FLASHING ITS LITTLE LANTERN IN THE PRINCE'S FACE IT CRIED THIS WAY MY FRIEND THE FLY SENT ME TO GUIDE YOU TO A PLACE OF SAFETY +4198-61336-0000-1235: IT IS SIGNIFICANT TO NOTE IN THIS CONNECTION THAT THE NEW KING WAS AN UNSWERVING ADHERENT OF THE CULT OF (ASHUR->AESRE) BY THE (ADHERENTS->ADHERENCE) OF WHICH HE WAS PROBABLY STRONGLY SUPPORTED +3005-163389-0014-454: SO THEN THE (RINGMASTER->RING MASTER) HE MADE A LITTLE SPEECH AND SAID HE HOPED THERE WOULDN'T BE NO DISTURBANCE AND IF THE MAN WOULD PROMISE HE WOULDN'T MAKE NO MORE TROUBLE HE WOULD LET HIM RIDE IF HE THOUGHT HE COULD STAY ON THE HORSE +4294-9934-0006-1324: WHATEVER MAY HAVE BEEN HIS DESIRE TO REMAIN WHERE HE WAS HE COULD NOT HALT THERE HE WAS IRRESISTIBLY CONSTRAINED TO CONTINUE TO ADVANCE TO EXAMINE TO THINK TO MARCH FURTHER +7105-2330-0014-2254: NOT LATER THAN SEVEN THIRTY THEN SAID THE CHIEF ORGANISER I HAVE PROMISED THE AGENT DOWN THERE THAT HE SHALL BE ABLE TO DISPLAY POSTERS ANNOUNCING (PLATTERBAFF->PLATTER BAFF) IS OUT BEFORE THE POLL OPENS +3080-5032-0012-560: BUT IT SEEMS THIS GENTLEMAN IS NOT SO EASY ACCESS BUT YOU MAY ACKNOWLEDGE SOMETHING DUE TO ME IF I INCLINE HIM TO LOOK GRACIOUSLY UPON YOU AND THEREFORE THERE IS NOT MUCH HARM DONE +3080-5040-0029-604: HE HAS ONE SON AND TIS THE FINEST BOY THAT (E'ER->EVER) YOU SAW AND HAS A NOBLE SPIRIT BUT YET STANDS IN THAT AWE OF HIS FATHER THAT ONE WORD FROM HIM IS AS MUCH AS TWENTY WHIPPINGS +3997-180297-0023-1145: MARGUERITE TIRED OUT WITH THIS LONG CONFESSION THREW HERSELF BACK ON THE SOFA AND TO STIFLE A SLIGHT COUGH PUT UP HER HANDKERCHIEF TO HER LIPS AND FROM THAT TO HER EYES +7105-2340-0031-2313: SHE ROSE AND WENT OUT HURRIEDLY AS THOUGH TO ASSURE HERSELF THAT THE DRAWING ROOM WAS NOT BEING STRIPPED OF ITS SILVERWARE AND RETURNED A MOMENT LATER BEARING A CREAM JUG IN HER HANDS +7902-96592-0028-2373: A HAPPY INSPIRATION HAD COME AND PLACING ONE HAND UPON HIS (BREAST->CHEST) HE THRUST IN THE OTHER GAVE A TUG AND DREW OUT HIS LITTLE CURVED DIRK GLANCED AT THE EDGE RAN TO THE WINDOW AND BEGAN TO CUT AT ONE OF THE BARS (LABOUR->LABOR) IN VAIN +8188-269288-0030-2703: LESLIE WENT TO THE WINDOW AND FLUNG IT OPEN SHE PUT HER HEAD OUT AND TRIED TO PEER INTO THE DARKNESS BUT THE MOON HAD ALREADY SET AND SHE COULD NOT SEE MORE THAN A COUPLE OF YARDS IN FRONT OF HER +3997-182399-0005-1159: NOW (OL MISTAH->OLD MISTER) BUZZARD IS NATURALLY GOOD NATURED AND ACCOMMODATING AND WHEN PETER BEGGED SO HARD HE JUST COULDN'T FIND IT IN HIS HEART TO REFUSE +5442-41168-0012-1684: HE PARTICULARLY LIKED THE WAY ONE GRAY WHISKERED WAITER WHO SHOWED (HIS->A) SCORN FOR THE OTHER YOUNGER ONES AND WAS JEERED AT BY THEM WAS TEACHING THEM HOW TO FOLD UP NAPKINS PROPERLY +3080-5040-0014-589: INDEED I LIKE HIM EXTREMELY AND HE IS COMMENDED TO ME BY PEOPLE THAT KNOW HIM VERY WELL AND ARE ABLE TO JUDGE FOR A MOST EXCELLENT SERVANT AND FAITHFUL AS POSSIBLE +7018-75789-0002-2210: LAND AFTER LAND SHALT THOU (SEEK AND FIND->SEE CONFINED) BUT NO OTHER LIFE ON THY WISH SHALL WAIT FRET NOT THY SOUL (IN->AND) THY THOUGHTS O NIGHT ALL WOES SHALL END OR SOONER OR LATE +1998-15444-0000-96: IF CALLED TO A CASE SUPPOSED OR SUSPECTED TO BE ONE OF POISONING THE MEDICAL MAN HAS TWO DUTIES TO PERFORM TO SAVE THE PATIENT'S LIFE AND TO PLACE HIMSELF IN A POSITION TO GIVE EVIDENCE IF CALLED ON TO DO SO +4350-9170-0033-1415: THE RIVALRY OF THE EUROPEAN STATES IN CONSTANTLY INCREASING THEIR FORCES HAS REDUCED THEM TO THE NECESSITY OF HAVING RECOURSE TO UNIVERSAL MILITARY SERVICE SINCE BY THAT MEANS THE GREATEST POSSIBLE NUMBER OF SOLDIERS IS OBTAINED AT THE LEAST POSSIBLE EXPENSE +3528-168669-0076-771: (CHONODEMAIRE->SHONAU DE MER) ONE OF THE SEVEN GERMAN KINGS WHO ENTERED AMONG THE GAULS UNDER THE EMPIRE OF CONSTANTIUS EXPRESSLY RECOGNIZED THE RIGHT OF NUNS TO BE BURIED IN RELIGION THAT IS TO SAY BENEATH THE ALTAR +6128-63241-0001-2000: THE WOMEN HE HAD HITHERTO KNOWN HAD BEEN MAINLY OF HIS OWN SOFT CLIME AND IT WAS NOT OFTEN THEY EXHIBITED THE TENDENCY HE DETECTED AND CURSORILY DEPLORED IN MISSUS (LUNA'S->LUNNY'S) SISTER +3080-5040-0001-576: MY POOR LADY (VAVASOUR->VAVASOR) IS CARRIED TO THE TOWER AND HER GREAT BELLY COULD NOT EXCUSE HER BECAUSE SHE WAS ACQUAINTED BY SOMEBODY THAT THERE WAS A PLOT AGAINST THE PROTECTOR AND DID NOT DISCOVER IT +4198-61336-0016-1251: NO RESISTANCE WAS POSSIBLE ON THE PART OF (MENAHEM->MANAHEM) THE USURPER WHO WAS PROBABLY READY TO WELCOME THE ASSYRIAN CONQUEROR SO THAT BY ARRANGING AN ALLIANCE HE MIGHT SECURE HIS OWN POSITION +3080-5040-0031-606: NOT TO KNOW WHEN YOU WOULD COME HOME I CAN ASSURE YOU NOR FOR ANY OTHER OCCASION OF MY OWN BUT WITH A COUSIN OF MINE THAT HAD LONG DESIGNED TO MAKE HERSELF SPORT WITH HIM AND DID NOT MISS OF HER AIM +3997-180297-0024-1146: MARGUERITE DO WITH ME AS YOU WILL I AM YOUR SLAVE YOUR DOG BUT IN THE NAME OF HEAVEN TEAR UP THE LETTER WHICH I WROTE TO YOU AND DO NOT MAKE ME LEAVE YOU TO MORROW IT WOULD KILL ME +4852-28319-0003-1505: ONE AFTERNOON WHEN HE (*->HAD) RETURNED AFTER A REST TO MISTER WICKER'S STUDY HE SAW THAT THERE WAS SOMETHING NEW IN THE ROOM A BOWL WITH A GOLDFISH IN IT STOOD ON THE TABLE BUT MISTER WICKER WAS NOT TO BE SEEN +4852-28312-0006-1476: LITTLE TUFTS OF WHITE FUZZ ABOVE THE EARS WERE ALL THAT REMAINED OF THE ANTIQUARIAN'S HAIR BUT WHAT DREW AND HELD CHRIS'S GAZE WERE THE OLD MAN'S EYES +2609-157645-0011-411: SIX ARMS THE NEAREST WITHIN REACH PRESENTED WITH AN OBEDIENT START AS MANY TOBACCO (POUCHES->PIROUCHES) TO THE MAN OF OFFICE +4350-9170-0019-1401: GOVERNMENT AUTHORITY EVEN IF IT DOES SUPPRESS PRIVATE VIOLENCE ALWAYS INTRODUCES INTO THE LIFE OF MEN FRESH FORMS OF VIOLENCE WHICH TEND TO BECOME GREATER AND GREATER IN PROPORTION TO THE DURATION AND STRENGTH OF THE GOVERNMENT +1998-29455-0000-171: THE SINGING AND LAUGHING WENT ON LONG AFTER HE HAD FALLEN ASLEEP AND IF LATER IN THE EVENING (THERE->THEY) WERE LOUD VOICED ARGUMENTS OR QUARRELS EVEN DICKIE DID NOT HEAR THEM +3080-5040-0033-608: EVER SINCE THIS ADVENTURE I HAVE HAD SO GREAT A BELIEF IN ALL THINGS OF THIS NATURE THAT I COULD NOT FORBEAR LAYING A (PEAS COD->PEASE CARD) WITH NINE PEAS (IN'T->IN IT) UNDER MY DOOR YESTERDAY AND WAS INFORMED BY IT THAT MY HUSBAND'S NAME SHOULD BE THOMAS HOW DO YOU LIKE THAT +2033-164914-0008-219: WHEN NUZHAT AL ZAMAN HEARD THE FIRST (IMPROVISATION->IMPROCISATION) SHE CALLED TO MIND HER FATHER AND HER MOTHER AND HER BROTHER AND THEIR (WHILOME->WILLOW) HOME THEN SHE WEPT AND CRIED (AT->TO) THE EUNUCH AND SAID TO HIM WOE TO THEE +6128-63244-0020-2033: THIS WAS THE ONLY SACRED CAUSE THIS WAS THE GREAT THE JUST REVOLUTION IT MUST TRIUMPH IT MUST SWEEP EVERYTHING BEFORE IT IT MUST EXACT FROM THE OTHER THE BRUTAL (BLOOD STAINED->BLOODSTAINED) RAVENING RACE THE LAST PARTICLE OF EXPIATION +7105-2330-0003-2243: OF COURSE ANY SENTENCE WHICH THE LAW MIGHT FEEL COMPELLED TO INFLICT WOULD BE FOLLOWED BY AN IMMEDIATE PARDON BUT IT WAS HIGHLY DESIRABLE FROM THE GOVERNMENT'S POINT OF VIEW THAT THE NECESSITY FOR SUCH AN EXERCISE OF CLEMENCY SHOULD NOT ARISE +3997-182399-0008-1162: WASN'T ANYTHING GOING ON THAT THIS TRIFLING MEMBER OF THE BUZZARD (FAM'LY->FAMILY) DIDN'T FIND OUT ABOUT AND MEDDLE IN HE COULD ASK (MO->MORE) QUESTIONS THAN PETER RABBIT CAN (AN->AND) ANYBODY THAT CAN DO THAT HAS GOT TO ASK A LOT +5442-32873-0006-1658: BRIGHT AND NATTY WERE THE CHINTZ CURTAINS AND THE LITTLE TOILET SET OUT NOT INELEGANTLY AND HER PET PIPING GOLDFINCH ASLEEP ON HIS PERCH WITH HIS BIT OF SUGAR BETWEEN THE (WIRES->WINDS) OF HIS CAGE HER PILLOW SO WHITE AND UNPRESSED WITH ITS LITTLE EDGING OF LACE +7105-2330-0002-2242: THE JURY COULD NOT POSSIBLY FIND THAT THE PRISONER HAD NOT DELIBERATELY AND INTENTIONALLY BLOWN UP THE ALBERT HALL THE QUESTION WAS COULD THEY FIND ANY EXTENUATING CIRCUMSTANCES WHICH WOULD PERMIT OF AN ACQUITTAL +367-130732-0033-953: THIS BUTTER IS MADE AS FOLLOWS PLACE THE SHELLS (ON->IN) A BAKING SHEET IN THE OVEN TO DRY LET THE SHELLS COOL AND THEN POUND THEM IN A MORTAR WITH A LITTLE LOBSTER (CORAL->COAL) AND FOUR OUNCES OF FRESH BUTTER THOROUGHLY BRUISING THE WHOLE TOGETHER SO AS TO MAKE A FINE PASTE +7975-280076-0024-2513: (HELVIN->HELVAN) FICKLE AND WIFE OF GREENTON VALLEY WERE ATTENDING THE SPRINGS AT THAT TIME AND EITHER OF THEM WILL TESTIFY TO THE ABOVE FOR JOHN AND I SAT IN FRONT OF MISTER SMITH WHILE HE WAS PREACHING AND WAS IN HIS COMPANY FOR A FEW MOMENTS TOGETHER WITH HIS WIFE AND MISTER (AND MISSUS->MISS) FICKLE AFTER (*->THE) SERVICE +367-130732-0003-923: THE PACIFIC CRAYFISH HOWEVER SERVES EVERY PURPOSE AND WHILE MANY CONTEND THAT ITS MEAT IS NOT SO DELICATE IN FLAVOR AS THAT OF ITS EASTERN COUSIN THE CALIFORNIAN (WILL AS->WALLA) STRENUOUSLY (INSIST->INSISTS) THAT IT IS BETTER BUT OF COURSE SOMETHING MUST ALWAYS BE ALLOWED FOR THE PATRIOTISM OF THE CALIFORNIAN +7018-75789-0020-2228: PRESENTLY MY FRIENDS CAME TO ME AND I DISTRIBUTED PRESENTS AMONG MY FAMILY AND GAVE ALMS AND LARGESSE AFTER WHICH I YIELDED MYSELF TO JOYANCE AND ENJOYMENT MIRTH AND (MERRY MAKING->MERRYMAKING) AND FORGOT ALL THAT I HAD SUFFERED +367-293981-0013-967: DON QUIXOTE CONSENTED AND HE TAKING IT WITH BOTH HANDS IN GOOD FAITH AND WITH A BETTER WILL GULPED (*->IT) DOWN AND DRAINED (OFF->OUT) VERY LITTLE LESS THAN HIS MASTER +6432-63723-0053-2152: THERE WAS A RATTLE OF COINS ON THE MAHOGANY BAR AS KING SOUGHT TO DISENTANGLE A SINGLE BILL FROM THE WADDED UP CURRENCY IN HIS POCKET +3764-168670-0052-1027: AN OLD FELLOW OF THE OLD SCHOOL THE (GRAVE DIGGER->GRAVEDIGGER) PUTS THE CORPSES IN THE GRAVE AND I PUT THE (GRAVE DIGGER->GRAVEDIGGER) IN MY POCKET +7018-75789-0019-2227: HIS LETTER HATH SHOWN ME THIS AND AS FOR THE MIGHTINESS OF HIS DOMINION THOU HAST TOLD US WHAT THOU HAST EYE WITNESSED +3997-180294-0029-1117: (YOU->HE) SHOULD GO TO BED SHE REPLIED WITH THAT (IRONICAL->IRONIC) AIR WHICH WENT SO WELL WITH HER DELICATE AND WITTY FACE +4198-61336-0002-1237: WELL MIGHT (SHARDURIS->JODURIS) EXCLAIM IN THE WORDS OF THE PROPHET WHERE IS THE KING OF (ARPAD->ARPAT) +3538-142836-0021-847: HOWEVER AS LATE AS THE (REIGNS->REIGN) OF OUR TWO LAST GEORGES FABULOUS SUMS WERE OFTEN EXPENDED UPON FANCIFUL (DESSERTS->DESERTS) +4852-28319-0005-1507: HOW YOU HAVE IMPROVED MY BOY HE EXCLAIMED IT IS NOW TIME FOR YOU TO TRY (AND->*) THIS IS AS GOOD A CHANGE AS ANY +4350-9170-0020-1402: AND THEREFORE THE OPPRESSION OF THE OPPRESSED ALWAYS GOES ON GROWING UP TO THE FURTHEST LIMIT BEYOND WHICH IT CANNOT GO WITHOUT KILLING THE GOOSE WITH THE GOLDEN (EGGS->AXE) +6432-63723-0008-2107: YOU DON'T MEAN THAT LARCH STRUCK HER THAT THERE WAS PHYSICAL ABUSE DO YOU ASKED THE COLONEL THAT'S WHAT HE DID +5764-299665-0063-1867: ARE CHRISTIANS MORE TEMPERATE NEARER VIRTUOUS NEARER HONEST THAN SAVAGES +5484-24318-0011-1777: AGAIN HE HEARD THE BELOVED VOICE AND THIS TIME IT ADDRESSED NOT ONLY HIM BUT WITH THE UTMOST HASTE THE COMMANDER OF THE SOLDIERS +2414-159411-0029-352: VERY GOOD SAID THE JACKAL BUT I CANNOT JUDGE WITHOUT UNDERSTANDING THE WHOLE MATTER EXACTLY +4294-35475-0006-1297: ONE NIGHT AS HE LAY IN A DEEP FOREST TOO UNHAPPY TO SLEEP HE HEARD A NOISE NEAR AT HAND IN THE BUSHES +7902-96591-0012-2332: FOR IT SUDDENLY OCCURRED TO HIM THAT HE WAS NOT ONLY A PRISONER BUT A PRISONER IN THE POWER OF A VERY RECKLESS SET OF PEOPLE WHO WOULD STOP AT NOTHING +3331-159605-0043-652: HE MEANT TO GO AWAY BEFORE THAT SO DON'T THINK HIS HEART IS BROKEN OR MIND WHAT (SILLY TATTLERS->SIDDY TATLERS) SAY +8461-281231-0017-2917: THE LIFE OF EVERY MAN IN THE CASTLE SHALL ANSWER IT IF A HAIR OF HIS HEAD BE SINGED SHOW ME HIS CHAMBER +8280-266249-0041-2842: THEY HEARD HIM IN SILENCE WITH A COOL PHLEGMATIC INDIFFERENCE MOST EXASPERATING TO ONE IN HIS PRESENT MOOD +8188-269290-0021-2752: YOU SEE ALL THE GIRLS EXCEPT EILEEN AND MARJORIE LAUGH AT HER AND THAT SEEMS TO ME TO MAKE HER WORSE +3764-168670-0022-997: FAUCHELEVENT WHO WAS SEATED SPRANG UP AS THOUGH A BOMB HAD BURST UNDER HIS CHAIR YOU +8280-266249-0011-2812: I DO INDEED THOUGH PROBABLY COMPARATIVELY FEW ARE AWARE THAT TOBACCO IS THE CAUSE OF THEIR AILMENTS +533-1066-0011-1567: LIDDY WAS SLEEPING THE SLEEP OF THE JUST WHEN I WENT (UP STAIRS->UPSTAIRS) AND I HUNTED OUT MY THINGS CAUTIOUSLY +3005-163389-0017-457: HE SHED THEM SO THICK THEY KIND OF CLOGGED UP THE AIR AND ALTOGETHER HE SHED SEVENTEEN SUITS +4294-9934-0009-1327: IN THE TROUBLED STATE OF HIS CONSCIENCE HE NO LONGER THOUGHT OF CERTAIN SERIOUS SIDES OF EXISTENCE +5764-299665-0078-1882: IN THE INFINITE CHAIN THERE IS AND THERE CAN BE NO BROKEN NO MISSING LINK +533-131564-0027-1651: AFTER THAT THEY WILL REPAIR TO THEIR COUNTRY HOME +4198-12259-0012-1187: BRAVELY AND WELL PLAYED UPON THE WORDS +5442-41168-0024-1696: AND THE MARSHAL DISAPPEARED THROUGH A SIDE DOOR +8131-117016-0026-2581: THE OTHER FOUR COPS HAD COME IN RELUCTANTLY +533-131564-0012-1636: BUT HANG IT THAT'S NOT MY FAULT +3528-168669-0072-767: THINK FATHER (FAUVENT->FAUVAIN) IF SHE WERE TO WORK MIRACLES HERE +5764-299665-0042-1846: MAN (JUDGES->JUDGETH) HIMSELF +6432-63722-0017-2056: LOOK HERE COLONEL DO YOU KNOW ANYTHING ABOUT THIS +3538-163619-0003-856: KISS ME GIRL SAID THE HEAD +4198-12259-0041-1216: I SHOULD SAY MASTER (PAST->PASS) +3528-168669-0101-796: BUT SHE WILL HEAR SHE WILL NOT LISTEN +4294-9934-0018-1336: DO YOU KNOW GERMAN NO +3528-168669-0026-721: NO DID YOU NOT HEAR THE BELL +7902-96591-0021-2341: A NARROW TABLE AGAINST THE WALL IN TWO PLACES +4852-28319-0014-1516: THEN ALL AT ONCE THE IDEA CAME TO CHRIS +3528-168669-0086-781: ONLY FANATICS AND THOSE IN ERROR DENY IT +8131-117029-0006-2656: (SCHULBERG'S->SCHOLBURG'S) VOLUNTEERS WERE OFFICIAL NOW +1688-142285-0002-2: YOU DON'T MEAN THAT YOU THOUGHT ME SO SILLY +533-131556-0010-1591: AH YOU ARE SUSPICIOUS +1688-142285-0062-62: THAT'S WHAT I BELIEVE YOUNG WOMAN +3764-168670-0002-977: JEAN VALJEAN HAD PLACED HER NEAR THE FIRE +4198-61336-0026-1261: ISRAEL WAS ALSO DEALT WITH +3997-180294-0024-1112: THE BOXES FILLED ONE AFTER ANOTHER +3528-168669-0071-766: OH I AM A STONE IN YOUR WALLS +533-131556-0025-1606: HOW DARE YOU MENTION HIS NAME TO ME +2414-128292-0012-302: THOU ART NOT PLEASING UNTO ME +4350-10919-0019-1367: OH TIME'S UP ALREADY AND HE WENT TO THE DOOR +3528-168669-0056-751: FOR THAT MATTER NO REVEREND MOTHER +6432-63723-0017-2116: AND IT TAKES ALL SORTS OF PERSONS TO MAKE IT UP +6432-63722-0032-2071: YES IT MAY HAVE SOME ROUGH EDGES ON IT +1998-29454-0013-137: AND THIS IS THE PRETTIEST PLACE EVER I SEE +3997-180294-0023-1111: COME (DURING->DUN) THE THIRD (ENTR'ACTE->ENTRANCE) +7902-96595-0002-2431: WHAT CHUCKED HIM OFF YONDER +1688-142285-0077-77: SHE WAS LATE FOR TEA AT HOME +7902-96592-0041-2386: ARCHY CHECKED HIMSELF AND THE BOY LAUGHED +2414-128291-0009-272: BUT BEHOLD (THESE KINE->THIS KIND) +3764-168671-0049-1082: SO YOU ARE NOT A GRAVE DIGGER THEN +8188-269288-0013-2686: JANE HERIOT STOOD WITHOUT +3528-168669-0087-782: WE LIVE IN TIMES OF TERRIBLE CONFUSION +3528-168669-0041-736: FAUCHELEVENT MOPPED HIS FOREHEAD +4350-9170-0044-1426: AND FOR THE SAKE OF WHAT AM I MAKING THEM +5484-24318-0020-1786: I LOVE YOU AND HAVE LOVED YOU ALWAYS +5442-41169-0011-1711: IT'S A MORAL OBLIGATION OF A SORT +6070-86745-0015-1966: ABOUT WHAT ABOUT THE PAPERS +4852-28311-0013-1456: (AW->OH) SHUCKS +3997-182399-0003-1157: ANYWAY HE WOULD FIND OUT +8188-269290-0030-2761: I'LL WAIT FOR YOU HERE SAID LESLIE +1998-29454-0028-152: GET (IT WROTE->US WELL) DOWN THEN DONE +4294-14317-0010-1276: (GIORGIO->GEORGE O) CAME FOR THEM HIMSELF +3538-163622-0003-871: YES THAT I HAVE SAID THE YOUTH +2414-128291-0024-287: THOU AMIABLE ONE +3005-163399-0010-527: WHY CHILD (IT LL->IT'LL) BE STOLE +8188-269290-0000-2731: THE (GUILD->GIRL) OF SAINT ELIZABETH +3764-168670-0001-976: COSETTE HAD WAKED UP +3331-159605-0038-647: DON'T YOU THINK HE MEANS TO +3764-168671-0018-1051: HE PLAYED WITH FATHER (MESTIENNE->MISTIAN) +8188-269288-0028-2701: SHE LOOKED ROUND THE ROOM +1688-142285-0047-47: BUT WHAT WAS IT +3764-168671-0033-1066: THE MAN REPLIED +7018-75788-0011-2199: THERE I ABODE A LITTLE AND THEN WENT ON TO BAGHDAD WHERE I ENTERED MY QUARTER AND FOUND MY HOUSE AND FOREGATHERED WITH MY FAMILY AND SALUTED MY FRIENDS WHO GAVE ME JOY OF MY SAFE RETURN AND I LAID UP ALL MY GOODS AND VALUABLES IN MY STOREHOUSES +5484-24317-0017-1749: WE WOMEN ARE ONLY AS OLD AS WE LOOK AND THE LEECHES AND TIRING WOMEN OF THIS BEAUTY OF FORTY PRACTISE ARTS WHICH GIVE HER THE APPEARANCE OF TWENTY FIVE YET PERHAPS THE KING VALUES HER INTELLECT MORE THAN HER PERSON AND THE WISDOM OF A HUNDRED SERPENTS IS CERTAINLY UNITED IN THIS WOMAN'S HEAD +533-1066-0013-1569: THERE WERE A FEW PREPARATIONS TO BE MADE (THE LOCKS->LOGS) TO BE GONE OVER WINTERS TO BE INSTRUCTED AS TO RENEWED VIGILANCE AND THEN AFTER EXTINGUISHING THE (HALL->WHOLE) LIGHT WE CREPT IN THE DARKNESS THROUGH THE FRONT DOOR AND INTO THE NIGHT +3080-5040-0004-579: I NEVER SAW ANY ONE YET THAT DID NOT LOOK SIMPLY AND OUT OF COUNTENANCE NOR EVER KNEW A WEDDING WELL DESIGNED BUT ONE AND THAT WAS OF TWO PERSONS WHO HAD TIME ENOUGH I CONFESS TO CONTRIVE IT AND NOBODY TO PLEASE (IN'T->IN) BUT THEMSELVES +3005-163390-0015-474: AND LOOK AT CHARLES SECOND AND LOUIS FOURTEEN AND LOUIS (FIFTEEN->FIFTEENTH) AND JAMES SECOND AND EDWARD SECOND AND RICHARD THIRD AND FORTY MORE BESIDES ALL THEM SAXON (HEPTARCHIES->HEPTARKEYS) THAT USED TO RIP AROUND SO IN OLD TIMES AND RAISE (CAIN->CANE) +3331-159605-0030-639: SHE THOUGHT SHE HAD A GOOD DEAL OF THE COQUETTE IN HER AND (I VE->I'VE) NO DOUBT THAT WITH TIME AND TRAINING SHE WOULD HAVE BECOME A VERY DANGEROUS LITTLE PERSON BUT NOW SHE WAS FAR TOO TRANSPARENT AND STRAIGHTFORWARD BY NATURE EVEN TO TELL A (WHITE LIE CLEVERLY->WI LIKE LEVERLY) +5764-299665-0020-1824: DO WE PROVE HIS GOODNESS BY SHOWING THAT HE HAS OPENED THE EARTH AND SWALLOWED THOUSANDS OF HIS HELPLESS CHILDREN OR THAT WITH THE VOLCANOES HE HAS OVERWHELMED THEM WITH RIVERS OF FIRE +4294-35475-0023-1314: TO HIM WHO COULD BRING HER BACK TO HER FATHER'S CASTLE SHOULD BE GIVEN THE THRONE AND KINGDOM AS WELL AS THE PRINCESS HERSELF SO FROM FAR AND NEAR INDEED FROM ALMOST EVERY COUNTRY UNDER THE SUN CAME KNIGHTS AND PRINCES TO FIGHT THE OGRE +8461-278226-0005-2889: SHE RETURNED IN A LITTLE MORE THAN TEN MINUTES IN THE FRESHEST TOILETTE ALL PALE SHIMMERING BLUE LIKE THE SPRING SKY WITH (PEARL GREY->PURLE GRAY) GLOVES AND BOOTS AND PARASOL AND A BONNET THAT SEEMED MADE OF AZURE BUTTERFLIES +3331-159605-0015-624: THIS FINISHED POLLY'S INDECISION AND AFTER THAT NIGHT SHE NEVER ALLOWED HERSELF TO DWELL UPON THE PLEASANT TEMPTATION WHICH CAME IN A GUISE PARTICULARLY ATTRACTIVE TO A YOUNG GIRL WITH (A SPICE->THE SPIES) OF THE OLD EVE IN HER COMPOSITION +7105-2330-0004-2244: A HEADLONG PARDON ON THE EVE OF A (BYE->BY) ELECTION WITH THREATS OF A HEAVY VOTING DEFECTION IF IT WERE WITHHELD OR EVEN DELAYED WOULD NOT NECESSARILY BE A SURRENDER BUT IT WOULD LOOK LIKE ONE +5484-24318-0006-1772: WHATEVER MIGHT AWAIT HIM HE DESIRED NO BETTER FATE +7902-96591-0023-2343: BUT THERE WAS NO CHANCE FOR HIS BODY THERE (*->AND) THE HEAD WOULD NOT GO FIRST +4852-28319-0000-1502: THE LEARNING OF MAGIC WAS BY NO MEANS EASY +6070-86744-0016-1937: DID YOU EVER MEET HIM PREVIOUSLY TO COMING HITHER +533-131564-0013-1637: NOT YEARS FOR (SHE'S->SHE IS) ONLY FIVE AND TWENTY +367-293981-0009-963: THE (OFFICER->OFFICERS) TURNED TO HIM AND SAID WELL HOW GOES IT GOOD MAN +6432-63723-0034-2133: I'M GOING TO RECTIFY THEM BUT IT WILL TAKE TIME +6432-63722-0034-2073: IF YOU DON'T MIND I SHOULD LIKE TO EXAMINE THIS A BIT +5764-299665-0044-1848: MAN HAS DECEIVED HIMSELF +5484-24318-0021-1787: DAPHNE EXCLAIMED TENDERLY WHAT MORE IS NEEDED +3005-163399-0011-528: IT WAS (KINDER->KIND OF) THIN ICE BUT I SAYS +5442-41168-0025-1697: THEY WERE TO PROCEED IMMEDIATELY TO THE ELECTION +4350-10919-0005-1353: AS YOU PLEASE THE PRINCESS WENT OUT WITH A SIGH +6070-86744-0017-1938: UPON MY HONOR THEN LISTEN TO ME +3764-168671-0020-1053: THE PERMISSION FOR INTERMENT MUST BE EXHIBITED +4294-9934-0005-1323: THE HALF LIGHTS OF DOUBT PAINED HIM +6432-63722-0003-2042: GET READY (SHAG->SHAD) YES (SAH->A) COLONEL +7018-75789-0000-2208: WHEN IT WAS THE FIVE HUNDRED AND SIXTY FIRST NIGHT +5442-41169-0027-1727: WHY DON'T WE CUT DOWN OUR (PARKS->BOGS) FOR TIMBER +1998-29454-0029-153: THEN HE FOLDED IT AND PUT IT IN HIS POCKET +3764-168671-0035-1068: THE GRAVE DIGGER WALKED ON IN FRONT OF HIM +3080-5032-0010-558: I THINK MY YOUNGEST BROTHER COMES DOWN WITH HIM +5484-24317-0026-1758: (HERMON->HERMANN) LISTENED TO THE PAIR IN SILENCE +3764-168670-0033-1008: WHO NAILS UP THE COFFIN I DO +8188-269290-0016-2747: JANE (HERIOT'S->HERRIOT'S) VOICE WAS HEARD IN THE PASSAGE +3005-163389-0013-453: AND ONE OR TWO WOMEN (BEGUN->BEGIN) TO SCREAM +3331-159605-0039-648: TRULY (TRULY FAN->JULIE FANN) +3528-168669-0118-813: AT ELEVEN O'CLOCK EXACTLY I AM TO BE IN THE CHAPEL +7902-96592-0043-2388: YOU LAUGHED AND FLEERED AT ME WHEN I WAS ON THE CUTTER'S DECK +2033-164916-0006-258: WHEN IT WAS THE SEVENTY EIGHTH NIGHT +3005-163390-0024-483: THIS (ONE'S->WAS) A (MIDDLING->MIDDLIN) HARD LOT FOR A (DUKE->DUPE) +7902-96594-0022-2418: NOW THEN WHY DO YOU WANT TO GO ASHORE +1688-142285-0078-78: HAVE YOU MET WITH A SERVANT DEAR +3538-163624-0010-903: SO SIGURD SAID THAT SWORD WOULD DO +4852-28312-0018-1488: THE ROOM SEEMED OVERLY STILL +3005-163391-0023-513: SO THEN THEY WAITED FOR A STEAMBOAT +3528-168669-0058-753: IT IS A CONTINUATION OF HER SLUMBER +3080-5040-0028-603: WILL YOU BE SO GOOD NATURED +7902-96592-0027-2372: NOTHING BUT THE CHIMNEY PRESENTED ITSELF +3528-168669-0088-783: WE ARE IGNORANT AND IMPIOUS +6432-63723-0033-2132: DON'T WORRY IT WILL COME OUT ALL RIGHT +7975-280084-0004-2522: GET YOUR GUNS BOYS THEY'RE ROBBING THE BANK +2414-128292-0013-303: MUST I EVER BE ON THE WAY +3005-163399-0026-543: THE OLD GENTLEMAN STARED AND SAYS +6432-63723-0018-2117: STILL I WOULD LIKE TO KNOW +8280-266249-0036-2837: WHAT DOES IT MEAN CRIED ONE +7902-96595-0004-2433: AH EJACULATED DICK SADLY +7902-96594-0021-2417: HOPPING ABOUT LIKE A CAT ON HOT BRICKS +6432-63722-0049-2088: AND YOU I DON'T WANT IT EITHER +7902-96594-0006-2402: SO SHALL WE YET SIR +1998-29454-0014-138: I SHALL CATCH IT A FAIR TREAT AS IT IS +6432-63723-0003-2102: NO I WASN'T THINKING OF THAT +8188-269290-0031-2762: DO COME ANNIE DO +4294-9934-0020-1338: THE CLOTHES DEALER WAS SENT FOR +8188-269290-0046-2777: ALL MEN ARE YOUR BROTHERS +4350-9170-0001-1383: THIS IS ABSOLUTELY INCORRECT +3528-168669-0013-708: AND CAN YOU GET A (LEVER->LOVER) +4852-28311-0000-1443: SAY YOU KNOW (SUMTHIN->SUPPER) +3528-168669-0103-798: A PAUSE ENSUED +4350-9170-0004-1386: THE MAN WHO IS CONTROLLED BY MORAL INFLUENCE ACTS IN ACCORDANCE WITH HIS OWN DESIRES +6070-86745-0019-1970: WITH YOUR TALENTS YOU WOULD MAKE YOUR FORTUNE IN THREE OR FOUR YEARS +367-130732-0001-921: WHEN IS A LOBSTER NOT A LOBSTER WHEN IT IS A CRAYFISH +3764-168670-0036-1011: COULD YOU HIDE ME IN THAT ROOM TO NIGHT WHEN (EVERY ONE->EVERYONE) IS ASLEEP +4198-61336-0015-1250: THIS USURPER HELD SWAY AT (SAMARIA->SUMERIA) FOR ONLY A MONTH +3538-163624-0013-906: SIGURD SAID I WOULD TOUCH NONE OF IT IF BY LOSING IT I SHOULD NEVER DIE +533-131562-0004-1611: AND PUTTING THE KEYS INTO HIS POCKET HE WALKED INTO THE LIBRARY +7105-2330-0031-2271: WITHOUT A BAND HE WOULD NOT GO AND THEY HAD NO BAND +6432-63722-0052-2091: IF YOU'LL EXCUSE ME I'LL PRETEND I'M FISHING AND I MAY CATCH SOMETHING +7902-96592-0031-2376: BUT AT THE END OF FIVE MINUTES HE STOPPED AND THRUST BACK THE DIRK INTO ITS SHEATH +4852-28330-0020-1550: WHAT CAN BE SAID DURING THAT TIME SIR CHRIS THOUGHT TO ASK +7902-96592-0000-2345: SURE YOU'VE LOOKED ROUND EVERYWHERE BOY YES FATHER QUITE +8131-117016-0030-2585: WE'RE NOT USING WAGONS (MURDOCH->MURDOCK) TOLD HIM LINE THEM UP +7902-96595-0006-2435: I HOPE NOT DICK I HOPE NOT BUT SMUGGLERS DON'T STAND AT ANYTHING SOMETIMES +8461-258277-0003-2870: REPLIED THE BROKER'S DAUGHTER O THOU DAUGHTER OF A DOG +8131-117016-0061-2616: BUT THERE PROBABLY WOULDN'T BE TIME FOR IT IF MAYOR (WAYNE->WAIN) WAS RE ELECTED +4350-10919-0023-1371: WHEN THE DOCTOR CAME IN SHE FLUSHED CRIMSON AND HER EYES FILLED WITH TEARS +4350-10919-0022-1370: AND THE MOTHER ACCOMPANIED BY THE DOCTOR WENT INTO THE DRAWING ROOM TO KITTY +5764-299665-0091-1895: PASSION IS AND ALWAYS HAS BEEN (DEAF->DEATH) +7105-2340-0034-2316: MISSUS PETER TURNED TO HER (GUEST->GUESTS) WITH CONFIDENTIAL (COYNESS->KINDNESS) +5764-299665-0002-1806: WHETHER ANY PRAYER WAS EVER ANSWERED +1998-29455-0015-186: POOR LITTLE MAN SAID THE LADY YOU MISS YOUR MOTHER DON'T YOU +4350-10919-0007-1355: AND THERE ARE INDICATIONS (MALNUTRITION->MAL NUTRITION) NERVOUS EXCITABILITY AND SO ON +8131-117017-0028-2645: THE KID POCKETED THE MONEY CHEERFULLY NODDING +533-1066-0009-1565: SOMETHING IS GOING TO OCCUR HE SAID +533-131564-0002-1626: NO UNLESS YOU CAN TELL ME WHEN TO EXPECT HIM HOME +5764-299665-0001-1805: WHETHER HE WAS THE CREATOR OF YOURSELF AND MYSELF +4852-28311-0002-1445: KNOW WHO NEEDS A JOB (BAD->BAN) THAT'S (JAKEY->JIKI) HARRIS +4852-28312-0021-1491: ACROSS THE WATER WHERE WAS THE FREEWAY +3764-168671-0023-1056: THE GRAVE DIGGER YES +5764-299665-0077-1881: THAT WHICH HAS NOT HAPPENED COULD NOT +6432-63722-0022-2061: ANYHOW HE AND (PHUT->FLUT) DIDN'T GET ALONG VERY WELL IT SEEMS +8131-117016-0060-2615: THIS COULD LEAD TO ABUSES AS HE'D SEEN ON EARTH +8131-117017-0013-2630: THERE MUST HAVE BEEN OVER TWO THOUSAND CREDITS IN THE WALLET +3764-168671-0053-1086: THE GRAVE DIGGER WENT ON WITH A SUPERIOR SMILE +6128-63244-0002-2015: PERHAPS SHE COULD SPEAK FOR THEM MORE THAN FOR SOME OTHERS +3538-163622-0023-891: THAT IS A RIVER SAID THE FOAL AND WE HAVE TO CROSS IT +3538-163619-0008-861: WHAT IS MY BROTHER SAYING ASKED HIS SISTER AGAIN +7105-2330-0030-2270: IT WAS FROM THE CENTRAL COMMITTEE ROOMS AT NEMESIS +2033-164914-0005-216: TELL ME WHAT HAPPENED QUOTH (ZAU AL->ZUL) MAKAN +5442-32873-0003-1655: OH FRIGHTFUL FRIGHTFUL IS IT A DREAM +3005-163391-0026-516: THEN ONE OF THEM SAYS KIND OF SOFT AND GENTLE +8131-117016-0001-2556: BUT MARSPORT HAD FLOURISHED ENOUGH TO KILL IT OFF +6938-70848-0008-2165: THE (PEASANTS ARE THE->PEACE AND OTHER) PEOPLE OF RUSSIA WAIT UNTIL THE PEASANTS +4294-9934-0023-1341: (HELLO->HALLO) I HAD FORGOTTEN THAT SAID MARIUS +8461-278226-0002-2886: ONE MORNING LAURA TOLD HER HUSBAND WITH A GAY LAUGH THAT SHE WAS GOING TO VICTIMIZE HIM BUT HE WAS TO PROMISE TO BE PATIENT AND BEAR WITH HER FOR ONCE IN A WAY +8188-274364-0006-2795: LET US NOT TO OUR OWN DESTRUCTION AWAKE THOSE SLEEPING LIONS BY RATTLING UP A COMPANY OF OLD RECORDS WHICH HAVE LAIN FOR SO MANY AGES BY THE WALL FORGOTTEN AND NEGLECTED +8131-117029-0010-2660: THAT'S MARS GORDON ECHOED THE OTHER'S COMMENT WHY DON'T YOU PULL OFF THE PLANET (FATS->THATS) YOU COULD GO BACK TO EARTH I'D GUESS THE OTHER NODDED +6432-63722-0037-2076: AND A CLOSE OBSERVER MIGHT HAVE OBSERVED THAT HE DID NOT TOUCH HIS BARE FINGERS TO THE TIMEPIECE BUT POKED IT ABOUT AND TOUCHED IT HERE AND THERE WITH THE END OF A (LEADPENCIL->LEAD PENCIL) +8131-117029-0011-2661: GUESS A MAN GETS USED TO ANYTHING HELL MAYBE I CAN HIRE SOME BUMS TO SIT AROUND AND WHOOP IT UP WHEN THE SHIPS COME IN AND (BILL->BUILD) THIS AS A REAL OLD MARTIAN DEN OF SIN +7105-2340-0003-2285: WHEN A MAN IS ABSOLUTELY WEALTHY NOT MERELY WELL TO DO ALL SUSPICION OF SORDID MOTIVE NATURALLY DISAPPEARS THE THING BECOMES MERELY A TIRESOME MALADY +3997-182399-0007-1161: LIKE MOST (NO COUNT->NOCOMN) PEOPLE HE USED TO MAKE A REGULAR NUISANCE OF (HISSELF->HIMSELF) POKING HIS NOSE INTO (EV'YBODY'S->EVERYBODY'S) BUSINESS AND NEVER TENDING TO HIS OWN +8461-281231-0000-2900: HIS FOLLOWERS RUSHED FORWARD TO WHERE HE LAY AND THEIR UNITED FORCE COMPELLING THE BLACK KNIGHT TO PAUSE THEY DRAGGED (THEIR->THE) WOUNDED LEADER WITHIN THE WALLS +3080-5032-0013-561: I HAVE MISSED FOUR FITS AND (*->HAVE) HAD BUT FIVE AND HAVE RECOVERED SO MUCH STRENGTH AS MADE ME VENTURE TO MEET YOUR LETTER ON WEDNESDAY A MILE FROM HOME +7105-2340-0033-2315: PETER DASHED OUT OF THE ROOM WITH GLAD RELIEF HE HAD LIVED SO LONG DURING THE LAST FEW MINUTES THAT A GOLDEN WEDDING SEEMED WITHIN MEASURABLE DISTANCE +6432-63723-0052-2151: BECAUSE DEAR FRIEND REPLIED KING SOFTLY HE SOMEWHAT RESEMBLES A CERTAIN PERSON HERE WHO TALKS TOO MUCH BUT WHO IS NOT SO WISE AS HE THINKS +8188-269288-0002-2675: HER TASTES ALL LAY IN THIS DIRECTION HER IDEA BEING BY AND BY TO FOLLOW HER MOTHER'S PROFESSION OF JOURNALISM FOR WHICH SHE ALREADY SHOWED CONSIDERABLE APTITUDE +2033-164914-0020-231: O MY LORD CONTINUED THE EUNUCH AND SHAHRAZAD PERCEIVED THE DAWN OF DAY AND CEASED TO SAY HER PERMITTED SAY +8461-258277-0004-2871: AND HAVING THUS ISLAMISED SHE ASKED HIM (DO->TWO) MEN IN THE FAITH OF AL ISLAM GIVE MARRIAGE PORTIONS TO WOMEN OR DO WOMEN (DOWER->TO OUR) MEN +2414-128291-0013-276: THE KINGDOM OF HEAVEN HOWEVER IS WITH THE (KINE->KIND) AND WHY IS IT NOT WITH THE RICH +4198-12259-0001-1176: SO MY FRIEND SO WHIP ME OFF THIS GLASS NEATLY BRING ME HITHER SOME CLARET A FULL WEEPING GLASS TILL IT RUN OVER +5764-299665-0017-1821: LIPS RELIGIOUS AND FEARFUL TREMBLINGLY REPEAT THIS PASSAGE THOUGH HE SLAY ME YET WILL I TRUST HIM +4852-28312-0019-1489: THEN IN THAT SECOND HE TURNED AND FACED ABOUT +2609-156975-0018-379: NATURALLY HE WENT TO THE LAND OF (MIDIAN->MEDIAN) +8131-117017-0026-2643: HE PULLED OUT THE BILLS AND HANDED THEM OVER +2609-156975-0003-364: THE EGYPTIAN BACKGROUND OF THE BONDAGE +3997-180294-0011-1099: THEY LOVE BY PROFESSION AND NOT BY INSTINCT +1998-29454-0046-170: WELL (YOU'LL->YOU) KNOW ALL ABOUT IT PRESENTLY +7975-280084-0006-2524: ALMOST AT THIS INSTANT I HEARD A PISTOL SHOT IN THE BANK +3528-168669-0044-739: SHE RETAINED HER CONSCIOUSNESS TO THE VERY LAST MOMENT +1688-142285-0019-19: NOT VICIOUS HE NEVER SAID THAT +8280-266249-0008-2809: DO YOU SON WAS THE SMILING REJOINDER +1998-29455-0013-184: THAT'S ALL RIGHT SAID MISTER (BEALE->BELE) AWKWARDLY +3538-163619-0006-859: AT LAST THEY CAME IN SIGHT OF LAND +3528-168656-0012-694: MORAL LOVE CONQUERED BY THE COLIC +3005-163399-0027-544: I HAIN'T NO IDEA WHO IS IT +8461-278226-0000-2884: AND LAURA HAD HER OWN PET PLANS +3528-168669-0089-784: AND THEN RELIGION IS ATTACKED WHY +3528-168669-0119-814: MOTHER (ASCENSION->ASCENSON) WILL BE THERE TWO MEN WOULD BE BETTER +5442-32873-0002-1654: A SLAVE ONLY THINK A SLAVE +8188-269288-0046-2719: DO YOU WANT TO KILL ME DON'T TALK ANY MORE +4852-28319-0017-1519: BUT HOW DID ONE CHANGE INANIMATE TO ANIMATE +8461-281231-0013-2913: AT LENGTH (DE BRACY->THE BRACELEY) FELL +8131-117016-0044-2599: BUT THE (STONEWALL->STERNWALL) GANG IS BACKING (WAYNE->WAIN) +8131-117016-0029-2584: TO FIND A PHONE AND CALL THE WAGON +5764-299665-0045-1849: HAS CHRISTIANITY DONE GOOD +1998-29455-0014-185: DICKIE QUICK TO IMITATE TOUCHED HIS +4350-9170-0017-1399: BUT THIS JUSTIFICATION IS NEVER MORE THAN TEMPORARY +6432-63723-0020-2119: IT HAD A DOUBLE REPUTATION SO TO SPEAK +1998-29454-0031-155: THEY COULD PUT A MAN AWAY FOR LESS THAN THAT +3005-163391-0024-514: BUT THE KING WAS (CA'M->CALM) HE SAYS +533-131564-0015-1639: I KNOW THEY ARE BLESS THEM +5442-32873-0017-1669: LOSE NO TIME AND I'LL GIVE YOU HALF A CROWN +3764-168671-0051-1084: HERE A REMARK BECOMES NECESSARY +8280-266249-0022-2823: HENCE A RESORT TO CIDER AND BEER +4198-61336-0029-1264: HE INVADED BABYLONIA +2609-157645-0009-409: THEY HAVE SIR REPLIED THE CLERK +3997-180297-0007-1129: IS THAT REALLY THE ONLY REASON +8188-269290-0002-2733: I'M NOT COMING SAID ANNIE +7902-96595-0020-2449: I HAVE SEEN NO ONE ANSWERING TO THE DESCRIPTION HERE +4198-12259-0014-1189: WELL (CACKED->KACKLED) WELL SUNG +1998-29454-0016-140: (AIN'T->AND) BAD WHEN SHE'S IN A GOOD TEMPER +6070-86744-0003-1924: I CAN SCARCELY CREDIT IT +3764-168670-0019-994: WHAT IS THAT EMPTY COFFIN +7105-2330-0029-2269: A TELEGRAM WAS BROUGHT IN +1998-29454-0000-124: A THOUSAND BLESSINGS FROM A GRATEFUL HEART +2414-128292-0030-320: THOU HAST LOST THY (GOAL->GOLD) +8188-269288-0045-2718: DRINK THAT SHE SAID +8188-274364-0004-2793: (WHERE->WERE) THE TOKEN BY WHICH I (SHOULD->SHALL) DISCOVER IT +2414-128291-0026-289: THOU (EVIL->AVIOUS) FLATTERER +8131-117029-0009-2659: THOUGHT YOU'D BE IN THE CHIPS +6432-63723-0005-2104: WELL I DON'T KNOW THAT YOU CAN +4852-28330-0004-1534: THE MIRABELLE WAS NEARING (TAHITI->TITTI) +3331-159609-0008-664: THANK HEAVEN FOR THAT +533-131564-0000-1624: VAIN HOPE I FEAR +3528-168669-0014-709: THERE IS A RING IN THE STONE +1998-29454-0030-154: NOW WE'RE (SQUARE->SQUEER) HE SAID +3764-168670-0034-1009: WHO SPREADS THE (PALL->PAW) OVER IT +3528-168669-0029-724: IT WAS MOTHER (CRUCIFIXION->CROSS FICTION) +3331-159605-0025-634: THANK YOU NO +3528-168669-0104-799: YOU WILL REMOVE YOUR (BELL->BELT) +367-293981-0010-964: SANCHO GOT UP WITH PAIN ENOUGH IN HIS BONES AND WENT AFTER THE INNKEEPER IN THE DARK AND MEETING THE OFFICER WHO WAS LOOKING TO SEE WHAT HAD BECOME OF HIS ENEMY HE SAID TO HIM SENOR WHOEVER YOU ARE DO US THE FAVOUR AND KINDNESS TO GIVE US A LITTLE ROSEMARY OIL SALT AND WINE FOR IT IS WANTED TO CURE ONE OF (THE->OUR) BEST KNIGHTS ERRANT ON EARTH WHO LIES ON YONDER BED WOUNDED BY THE HANDS OF THE ENCHANTED MOOR THAT IS IN THIS INN +5484-24317-0012-1744: TRUE AN INTERESTING CONVERSATION STILL HAD POWER TO CHARM HIM BUT OFTEN DURING ITS CONTINUANCE THE FULL CONSCIOUSNESS OF HIS MISFORTUNE FORCED ITSELF UPON HIS MIND FOR THE MAJORITY OF THE SUBJECTS DISCUSSED BY THE ARTISTS CAME TO THEM THROUGH THE MEDIUM OF SIGHT AND REFERRED TO NEW CREATIONS OF ARCHITECTURE SCULPTURE AND PAINTING FROM WHOSE ENJOYMENT HIS BLINDNESS DEBARRED HIM +5484-24317-0027-1759: THE (RHODIAN->ROUDIAN) WAS JUST BEGINNING TO PRAISE (ARSINOE->ARSENAL) ALSO AS A SPECIAL FRIEND AND CONNOISSEUR OF THE (SCULPTOR'S->SCULPT'S) ART WHEN CRATES (HERMON'S->HERMANN'S) FELLOW STUDENT ASKED THE BLIND ARTIST IN BEHALF OF HIS BEAUTIFUL COMPANION WHY (HIS DEMETER->DEMEANOUR) WAS PLACED UPON A PEDESTAL WHICH TO OTHERS AS WELL AS HIMSELF SEEMED TOO HIGH FOR THE SIZE OF THE STATUE +4198-12281-0000-1219: ALTHOUGH THE PLAGUE WAS THERE IN THE MOST PART OF ALL THE HOUSES THEY NEVERTHELESS ENTERED EVERYWHERE THEN PLUNDERED AND CARRIED AWAY ALL THAT WAS WITHIN AND YET FOR ALL THIS NOT ONE OF THEM TOOK ANY HURT WHICH IS A MOST WONDERFUL CASE +6070-86745-0002-1953: AT A QUARTER TO TEN A VALET ENTERED HE COMPOSED WITH A LITTLE GROOM NAMED JOHN AND WHO ONLY SPOKE ENGLISH ALL ALBERT'S ESTABLISHMENT ALTHOUGH THE COOK OF THE HOTEL WAS ALWAYS AT HIS SERVICE AND ON GREAT OCCASIONS THE COUNT'S CHASSEUR ALSO +2033-164914-0017-228: THEN HE KISSED THE EUNUCH'S HEAD AND SPAKE HIM (FAIR->FARE) TILL HE WENT AWAY BUT THE CASTRATO FETCHED A ROUND AND RETURNING SECRETLY CAME AND STOOD BEHIND THE FIREMAN FEARING TO GO BACK TO HIS MISTRESS WITHOUT TIDINGS +533-131564-0017-1641: I SOUGHT (OUT AND->HOUGHTON) PUT INTO HIS HANDS TWO OF (MILICENT'S->MILICON'S) LETTERS ONE (DATED->THEY DID) FROM LONDON AND WRITTEN DURING ONE OF HIS WILDEST SEASONS OF RECKLESS DISSIPATION THE OTHER IN THE COUNTRY DURING A LUCID INTERVAL +1688-142285-0022-22: JUST AS SHE WAS LEAVING THE ROOM SHE HESITATED SHE WAS INCLINED TO MAKE AN ACKNOWLEDGMENT WHICH SHE THOUGHT WOULD PLEASE HER FATHER BUT WHICH TO BE FULL AND TRUE MUST INCLUDE A LITTLE ANNOYANCE +7105-2340-0004-2286: WILFRID PIGEONCOTE HAD SUDDENLY BECOME HEIR TO HIS UNCLE SIR WILFRID (PIGEONCOTE->PIGEON COTE) ON THE DEATH OF HIS COUSIN MAJOR WILFRID PIGEONCOTE WHO HAD SUCCUMBED TO THE AFTER EFFECTS OF A POLO ACCIDENT +4852-28319-0019-1521: HE HEARD (THE->THAT) MAGICIAN GOING UP THE SPIRAL STAIRCASE TO HIS ROOM ABOVE AND AFTER CHANGING HIMSELF TO A MOUSE TO SLIP UNDER THE DOOR AND SEE THAT THE ROOM WAS REALLY EMPTY (CHRIS->IT) RESUMED (HIS->ITS) PROPER SHAPE AND OPENED THE DOORS OF THE CUPBOARD AT THE FAR END OF THE ROOM +2609-156975-0036-397: THE WILDERNESS WITH ITS LURKING FOES AND THE EVER PRESENT DREAD OF HUNGER AND THIRST (DEEPENED HIS->DEEPENS A) SENSE OF NEED AND OF DEPENDENCE UPON A POWER ABLE TO GUIDE THE (DESTINIES->DEST NEEDS) OF MEN +3080-5040-0002-577: SHE HAS TOLD NOW ALL THAT WAS TOLD HER BUT VOWS SHE WILL NEVER SAY FROM WHENCE SHE HAD IT WE SHALL SEE WHETHER HER RESOLUTIONS ARE AS UNALTERABLE AS THOSE OF MY LADY TALMASH +3080-5040-0032-607: IN MY LIFE I NEVER HEARD SO RIDICULOUS A DISCOURSE AS HE MADE US AND NO OLD WOMAN WHO PASSES FOR A WITCH COULD HAVE BEEN MORE PUZZLED TO SEEK WHAT TO SAY TO REASONABLE PEOPLE THAN HE WAS +8131-117017-0000-2617: IT WAS NIGHT OUTSIDE AND THE (PHOSPHOR BULBS->PHOSPHORE BOBS) AT THE CORNERS GLOWED DIMLY GIVING HIM BARELY ENOUGH LIGHT BY WHICH TO LOCATE THE WAY TO THE (EXTEMPORIZED->EXTEMPORISED) PRECINCT HOUSE +4294-14317-0001-1267: WHEN I SAW THAT THIS BUST CAME OUT SHARP AND CLEAN I SET AT ONCE TO CONSTRUCT A LITTLE FURNACE IN THE WORKSHOP ERECTED FOR ME BY THE DUKE AFTER MY OWN PLANS AND DESIGN IN THE HOUSE WHICH THE DUKE HAD GIVEN ME +8461-281231-0031-2931: HE APPEALED TO (DE BRACY->THE BRAVELEY) TO ASSIST HIM IN THIS PROJECT AND BECAME AT ONCE DEEPLY SUSPICIOUS OF THE KNIGHT'S LOYALTY TOWARDS HIM WHEN HE DECLINED TO LIFT HAND AGAINST THE MAN WHO HAD SPARED HIS OWN LIFE +3997-180294-0014-1102: IN ORDER TO DISTURB THE LABOURERS IN THE (FIELD->FIELDS) WAS ONE DAY DEVOURED BY A WOLF BECAUSE THOSE WHOM HE HAD SO OFTEN DECEIVED NO LONGER BELIEVED IN HIS CRIES FOR HELP +4198-12259-0032-1207: LET US WIND OUR HORNS BY THE SOUND OF FLAGONS AND BOTTLES AND CRY ALOUD THAT WHOEVER HATH LOST HIS THIRST COME NOT HITHER TO SEEK IT +8188-274364-0007-2796: HOWEVER THESE GENTLEMEN AT THE BAR SAY THEY SPEAK FOR THE COMMONWEALTH AND THEY BELIEVE SO YET UNDER (FAVOR->FAVOUR) IT IS I WHO IN THIS PARTICULAR SPEAK FOR THE COMMONWEALTH +3331-159609-0011-667: NONE WERE NEEDED HER TELLTALE FACE ANSWERED FOR HER AS WELL AS THE IMPULSE WHICH MADE HER HIDE HER HEAD IN THE SOFA CUSHION LIKE A FOOLISH OSTRICH WHEN THE (HUNTERS->HANDERS) ARE AFTER IT +367-293981-0011-965: TO BE BRIEF HE TOOK THE MATERIALS OF WHICH HE MADE A COMPOUND MIXING THEM (ALL->WELL) AND BOILING THEM A GOOD (WHILE->WALLET) UNTIL IT SEEMED TO HIM THEY HAD COME TO PERFECTION +3528-168669-0090-785: BECAUSE THERE HAVE BEEN BAD PRIESTS BECAUSE (SAGITTAIRE->SAGOTARE) BISHOP OF GAP WAS THE BROTHER OF (SALONE->SALOON) BISHOP OF (EMBRUN->EMBRO) AND BECAUSE BOTH OF THEM FOLLOWED (MOMMOL->MAMAL) +8280-266249-0023-2824: NO SIR WHAT KNOW YE NOT THAT YOUR BODY IS THE TEMPLE OF THE HOLY GHOST WHICH IS IN YOU WHICH YE HAVE OF GOD AND YE ARE NOT YOUR OWN +4350-9170-0003-1385: THE CHAMPIONS OF THE SOCIAL CONCEPTION OF LIFE USUALLY TRY TO CONNECT THE IDEA OF AUTHORITY THAT IS OF VIOLENCE WITH THE IDEA OF MORAL INFLUENCE BUT THIS CONNECTION IS QUITE IMPOSSIBLE +3005-163399-0029-546: BEING TOM SAWYER WAS EASY AND COMFORTABLE AND IT STAYED EASY AND COMFORTABLE TILL BY AND BY I HEAR A STEAMBOAT COUGHING ALONG DOWN THE RIVER +3528-168669-0030-725: THREE YEARS AGO MADAME DE (BETHUNE->BESSOON) A JANSENIST TURNED ORTHODOX MERELY FROM HAVING SEEN MOTHER CRUCIFIXION AT PRAYER AH +4294-9934-0007-1325: HE FEARED AFTER HAVING TAKEN SO MANY STEPS WHICH HAD BROUGHT HIM NEARER TO HIS FATHER TO NOW TAKE A STEP WHICH SHOULD ESTRANGE HIM FROM THAT FATHER +3080-5040-0015-590: BECAUSE YOU FIND FAULT WITH MY OTHER LETTERS THIS IS LIKE TO BE SHORTER THAN THEY I DID NOT INTEND IT SO THOUGH I CAN ASSURE YOU +3764-168671-0052-1085: FAUCHELEVENT WHATEVER HIS ANGUISH OFFERED A DRINK BUT HE DID NOT EXPLAIN HIMSELF ON ONE POINT WHO WAS TO PAY +2414-159411-0028-351: WHY I STOOD SO SAID THE (TIGER->DRIVER) JUMPING INTO THE CAGE AND MY HEAD WAS ON THIS SIDE +3005-163389-0000-440: THEY SWARMED UP IN FRONT OF SHERBURN'S PALINGS AS THICK AS THEY COULD JAM TOGETHER AND YOU COULDN'T HEAR YOURSELF THINK FOR THE NOISE +8280-266249-0039-2840: THAT FELLOW NICK WARD IS A NOTED (BLACKLEG->BLACK LEG) AND RUFFIAN HAD HIS NOSE BROKEN IN A FIGHT AND IS SENSITIVE ON THE SUBJECT WAS CHEATING OF COURSE +8131-117016-0015-2570: BRUCE GORDON GRINNED SLOWLY AS HE SWUNG THE STICK AND (MURDOCH'S->MURDOCK'S) EYES FELL ON HIM EARTH COP +367-130732-0030-950: PICK OUT THE CRAWFISH AND STRAIN THE BROTH THROUGH A NAPKIN BY PRESSURE INTO A BASIN IN ORDER TO EXTRACT ALL THE ESSENCE FROM THE VEGETABLES +3538-142836-0020-846: THE THOUSAND AND ONE ORNAMENTAL DISHES THAT ADORN THE TABLES OF THE WEALTHY SHOULD BE PURCHASED FROM THE CONFECTIONER THEY (CANNOT->CAN NOT) PROFITABLY BE MADE AT HOME +3331-159609-0024-680: FANNY CAME WALKING IN UPON HER ONE DAY LOOKING AS IF SHE BROUGHT TIDINGS OF SUCH GREAT JOY THAT SHE HARDLY KNEW HOW TO TELL THEM +3764-168671-0007-1040: THE BOURGEOIS DID NOT CARE MUCH ABOUT BEING BURIED IN THE (VAUGIRARD->ROUGE HOISS) IT HINTED AT POVERTY (PERE LACHAISE->PARLAISE) IF YOU PLEASE +1688-142285-0006-6: I DON'T THINK MISTER HALE YOU HAVE DONE QUITE RIGHT IN INTRODUCING SUCH A PERSON TO US WITHOUT TELLING US WHAT HE HAD BEEN +4852-28312-0020-1490: THE WIDE BOW WINDOW WAS THERE BEFORE HIM THE THREE OBJECTS HE LIKED BEST SHOWING FROSTY IN THE MOONLIGHT THAT POURED IN FROM ACROSS THE WATER +1998-29454-0002-126: HIS BOOKS TOLD HIM (THAT->THE) TREASURE IS BEST HIDDEN UNDER LOOSE BOARDS (UNLESS->AND AS) OF COURSE YOUR HOUSE (HAS->HAD) A SECRET (PANEL->PENNEL) WHICH HIS HAD NOT +367-293981-0012-966: SANCHO PANZA WHO ALSO REGARDED THE AMENDMENT OF HIS MASTER AS MIRACULOUS BEGGED HIM TO GIVE HIM WHAT WAS LEFT IN THE (PIGSKIN->PIG SKIN) WHICH WAS NO SMALL QUANTITY +4852-28330-0018-1548: HE THEN WENT ON TO DESCRIBE WHAT ELSE WAS TO FOLLOW THE COVERING OF THE SHIP WITH LEAVES TO MAKE IT BLEND WITH ITS SURROUNDINGS +5484-24317-0011-1743: THE PLACE BY (HERMON'S->HERMONT'S) SIDE WHICH (ALTHEA->ALPHIE) HAD CHOSEN FOR HERSELF WOULD THEN BE GIVEN UP TO (ARSINOE->ARSENO) +4852-28330-0003-1533: IT LOOKS TO ME AS IF IT (COULD HAVE->HAD) BEEN ONE OF SEVERAL PEOPLE AND I'LL BE SWITCHED IF I KNOW WHO I'LL KEEP MY EYES OPEN +3331-159609-0022-678: POLLY WAS NOT AT ALL LIKE HERSELF THAT WINTER AND THOSE NEAREST TO HER SAW AND WONDERED AT IT MOST +2609-169640-0008-423: ALTHOUGH THEY WENT THREE FEET TO OUR TWO THIS GAVE US A MOMENT OF BREATHING TIME +1998-15444-0013-109: ANTIDOTES ARE USUALLY GIVEN HYPODERMICALLY OR IF BY MOUTH IN THE FORM OF TABLETS +8131-117016-0013-2568: HE PICKED OUT FIVE OF THE MEN INCLUDING GORDON YOU FIVE WILL COME WITH ME +3538-163624-0026-919: NOT LONG TO WAIT HE SAID TILL THE BITTER SWORD STANDS FAST IN MY HEART AND THOU (WILL->WILT) NOT LIVE LONG WHEN I AM DEAD +3331-159609-0023-679: FOR NED WAS SO ABSORBED IN BUSINESS THAT HE IGNORED THE WHOLE BAILEY QUESTION AND LEFT THEM IN UTTER DARKNESS +2414-128292-0029-319: FOR NOW EVERYTHING THAT IS NARROW AND FIXED SEDUCETH AND TEMPTETH THEE +533-131562-0016-1623: I TRY TO LOOK TO HIM AND RAISE MY HEART TO HEAVEN BUT IT WILL CLEAVE TO THE DUST +533-131564-0014-1638: WHAT (WOULD->DID) YOU MAKE OF ME AND THE CHILDREN TO BE SURE THAT (WORRY HER TO->WERE A HURT) DEATH BETWEEN THEM +3538-142836-0003-829: BUT TO DISTINGUISH THESE PROPERLY REQUIRES VERY GREAT ATTENTION AND CONSIDERABLE EXPERIENCE +4198-61336-0013-1248: JEHOASH THE GRANDSON OF JEHU HAD ACHIEVED SUCCESSES IN CONFLICT WITH DAMASCUS +8280-266249-0006-2807: YES SHE ANSWERED THAT LITTLE GROUP YONDER A YOUNG MINISTER AND HIS WIFE AND CHILD I SUPPOSE +4350-9170-0046-1428: EXCEPT FOR THE STATE THEY SAY WE SHOULD BE EXPOSED TO THE ATTACKS OF EVIL DISPOSED PERSONS IN OUR OWN COUNTRY +2033-164914-0003-214: REJOINED THE EUNUCH WHO THEN WAS THE RECITER POINT HIM OUT TO ME +4852-28319-0001-1503: HE HAD TOLD HIS MASTER AT ONCE ABOUT SIMON (GOSLER->GOSTLER) HIS (HORDE->HOARD) OF MONEY AND HIS HIDING PLACES FOR IT +5484-24318-0022-1788: BUT (HERMON->HAREMON) WITH DROOPING HEAD MURMURED TO MORROW I SHALL NO LONGER BE WHAT I AM NOW +8461-281231-0027-2927: SO SAYING HE MOUNTED HIS STRONG WAR HORSE AND RODE OFF THROUGH THE FOREST +8188-269290-0032-2763: SCARCELY LIKELY REPLIED LESLIE SHE TOLD ME SHE WAS DETERMINED NOT TO COME TO THE MEETING +3538-163622-0005-873: THEN THE KING PROMISED HIM THE SAME PUNISHMENT AND THE SAME REWARD THAT HE HAD PROMISED HIS BROTHER +3331-159609-0007-663: I TRY NOT TO DECEIVE MYSELF BUT IT DOES SEEM AS IF THERE WAS A CHANCE OF HAPPINESS FOR ME +7018-75789-0030-2238: A SECOND FISH MADE ITS APPEARANCE (THAN->AND) WHICH WE HAD SEEN (NAUGHT->NOUGHT) MORE MONSTROUS +8131-117016-0058-2613: THE GROUPS GREW MORE EXPERIENCED AND (MURDOCH->MURDOCK) WAS TRAINING A NEW SQUAD EVERY NIGHT +2033-164916-0007-259: AND IN IT ALL REJOICED AT THE ACCESSION OF THE LIGHT OF THE PLACE +2414-128291-0010-273: THE (KINE->KIND) HOWEVER GAZED AT IT ALL AND WONDERED +3538-142836-0002-828: FRUIT GATHERED IN WET OR FOGGY WEATHER WILL SOON BE MILDEWED AND BE OF NO SERVICE FOR PRESERVES +3997-180297-0006-1128: BECAUSE I AM WATCHED AND THE LEAST SUSPICION MIGHT DO ME THE GREATEST HARM +3997-180297-0010-1132: I FANCIED FOR A MOMENT THAT I MIGHT GIVE MYSELF THAT HAPPINESS FOR SIX MONTHS YOU WOULD NOT HAVE IT YOU INSISTED ON KNOWING THE MEANS +6432-63723-0007-2106: IT WAS ONE OF WHAT AT FIRST MIGHT BE CALLED REFINED CRUELTY ON HER HUSBAND'S PART DEGENERATING GRADUALLY INTO THAT OF (THE->A) BASER SORT +4294-35475-0021-1312: MY GRAIN MUST FALL AND ROT IN THE FIELD FROM (OVERRIPENESS->OVER RIPENESS) BECAUSE I HAVE NOT THE STRENGTH TO RISE AND HARVEST IT THEN INDEED MUST WE ALL STARVE +1998-29455-0001-172: WHAT'S (ALL->ON) THAT THERE DICKIE ASKED POINTING TO THE ODD (KNOBBLY->KNOBBY) BUNDLES OF ALL SORTS AND SHAPES TIED ON TO THE PERAMBULATOR'S FRONT +3538-163619-0009-862: ON THE FIRST THURSDAY NIGHT AFTER THIS A BEAUTIFUL MAIDEN CAME INTO THE KITCHEN OF THE PALACE AND BEGGED THE KITCHEN MAID WHO SLEPT THERE TO LEND HER A BRUSH +7975-280085-0005-2541: NO SOONER HOWEVER WAS HE RELEASED THAN HE MADE (POSTHASTE->POST HASTE) INTO (MANKATO->MAN CATO) TO ANNOUNCE OUR PRESENCE AND IN A FEW MINUTES ANOTHER POSSE WAS LOOKING FOR US +7018-75789-0003-2211: I ROWED MY CONVEYANCE INTO THE PLACE WHICH WAS INTENSELY DARK AND THE CURRENT CARRIED THE RAFT WITH IT DOWN THE UNDERGROUND CHANNEL +5484-24317-0030-1762: ONLY EVEN THOUGH LOVE HAS WHOLLY DISAPPEARED SHE STILL CLAIMS CONSIDERATION AND ALTHEA DID NOT WISH TO LOSE (HERMON'S->HARMONT'S) REGARD +4294-35475-0005-1296: I DID BUT LAUGH TO THINK THE SWORD OF ETHELRIED HAD BEEN SO QUICKLY FOUND RESPONDED THE JESTER AND HE POINTED TO THE SCISSORS HANGING FROM THE TAILOR'S GIRDLE +6128-63244-0004-2017: OLIVE HATED TO HEAR THAT FINE AVENUE TALKED ABOUT AS IF IT WERE SUCH A REMARKABLE PLACE AND TO LIVE THERE WERE A PROOF OF WORLDLY GLORY +7105-2340-0020-2302: AFTER THEY HAD SAID GOOD NIGHT TO THEIR VISITOR MISSUS PETER EXPRESSED HER CONVICTION THAT HE HAD TAKEN SOMETHING +7975-280085-0006-2542: THE WHISTLE ON THE OIL MILL BLEW AND WE FEARED THAT IT WAS A SIGNAL THAT HAD BEEN AGREED UPON TO ALARM THE TOWN IN CASE WE WERE OBSERVED BUT WE WERE NOT MOLESTED +6128-63241-0003-2002: HE WAS SORRY FOR HER BUT HE SAW IN A FLASH THAT NO ONE COULD HELP HER THAT WAS WHAT MADE HER TRAGIC +2609-156975-0021-382: ON THE BORDERS OF THE WILDERNESS HE FOUND CERTAIN BEDOUIN HERDSMEN WHO RECEIVED HIM HOSPITABLY +8131-117016-0002-2557: SOME OF MARS LAWS DATED FROM THE TIME WHEN LAW ENFORCEMENT HAD BEEN HAMPERED BY LACK OF MEN RATHER THAN BY THE TYPE OF MEN +5484-24317-0029-1761: A WOMAN WHO YEARNS FOR THE REGARD OF ALL MEN AND MAKES LOVE A TOY EASILY LESSENS THE DEMANDS SHE IMPOSES UPON INDIVIDUALS +2033-164916-0010-262: HE THEN REPAIRED TO THE HEART OF THE ENCAMPMENT AND ORDERED THE HOST TO HALT TEN DAYS +5484-24317-0015-1747: HIS SON HAD BEEN (THIS->THE) ROYAL DAME'S FIRST HUSBAND AND SHE HAD DESERTED HIM TO MARRY (LYSIMACHUS->LISUMACHUS) THE AGED KING OF THRACE +4198-12281-0002-1221: NEVERTHELESS AT ALL (ADVENTURES->VENTURES) THEY RANG THE BELLS (AD->ED) CAPITULUM (CAPITULANTES->CAPITULANT DAYS) +4198-12281-0003-1222: BY THE VIRTUE OF GOD WHY DO NOT YOU SING PANNIERS FAREWELL VINTAGE IS DONE +7018-75788-0008-2196: WE WEIGHED ANCHOR AND SHAHRAZAD PERCEIVED THE DAWN OF DAY AND CEASED SAYING HER PERMITTED SAY +3538-163622-0009-877: SO THERE HE SAT WITH HIS HEAD ON HER LAP TAKING HIS EASE THE LIVELONG DAY +3538-163624-0014-907: BUT ALL MEN DIE AND NO BRAVE MAN LETS DEATH FRIGHTEN HIM FROM HIS DESIRE +8188-269288-0019-2692: I HAVE BEEN STARVING OR RATHER I HAVE BEEN THIRSTING +2609-157645-0012-412: DAVID (DEANS->DENES) HOWEVER DID NOT AT ALL (APPROVE->PROVE) THIS IRREVERENCE +6128-63240-0001-1972: THAT HAS AN UNFLATTERING SOUND FOR ME SAID THE YOUNG MAN +7902-96592-0032-2377: NO I CAN'T PART WITH THAT HA HA HA LAUGHED THE BOY JEERINGLY +8461-278226-0003-2887: I WANT TO SEE ALL THE PICTURES THE MODERN PICTURES ESPECIALLY +3764-168670-0007-982: IT IS EASY ENOUGH WITH THE CHILD YOU WILL CARRY HER OUT +7902-96594-0011-2407: I'M GETTING VERY ANXIOUS ABOUT MISTER (RAYSTOKE->RAYSTROKE) START AT ONCE SIR +1688-142285-0083-83: WHAT WOULD YOU DO PAPA HOW WOULD YOU SET ABOUT IT +1688-142285-0038-38: BETTER AND NOT BETTER IF (YO->YOU) KNOW WHAT THAT MEANS +1688-142285-0008-8: HIS FATHER DYING IN MISERABLE CIRCUMSTANCES +5764-299665-0033-1837: WE LIVE TOGETHER IN FAMILIES TRIBES AND NATIONS +4852-28312-0007-1477: CHRIS BLINKED AND LOOKED AGAIN YES THEY WERE STILL THERE +4294-9934-0024-1342: THE LANDLORD PRESENTED HIS BILL WHICH HAD TO BE PAID ON THE SPOT +8131-117016-0047-2602: (MURDOCH->MURDOCK) BLINKED HE DROPPED HIS EYES SLOWLY +7902-96594-0026-2422: KEEP A SHARP (LOOK OUT->LOOKOUT) ON THE CLIFF TO SEE IF MISTER (RAYSTOKE->RAYSTROKE) IS MAKING SIGNALS FOR A BOAT +1998-29455-0002-173: TELL (YER->YOU) WHAT MATE LOOKS TO ME AS IF (I'D->I) TOOK A FANCY TO YOU +3528-168669-0107-802: THE PEAL WHICH ORDERS THE DOCTOR FOR THE DEAD TO BE SUMMONED HAS ALREADY BEEN RUNG +3005-163390-0028-487: (DOAN->DON'T) YOU HEAR ME (SHET->SHUT) DE DO +4350-9170-0035-1417: THIS INCONSISTENCY HAS BECOME OBVIOUS IN UNIVERSAL MILITARY SERVICE +8461-281231-0016-2916: EXCLAIMED THE BLACK KNIGHT PRISONER AND PERISH +1688-142285-0067-67: DON'T THINK HARDLY ON HIM HE'S A GOOD MAN HE IS +367-130732-0017-937: SOAK THE CRAB MEAT IN THE SHERRY TWO HOURS BEFORE COOKING +8188-269290-0051-2782: (ANNIE->ENNIE) COLCHESTER IS YOUR ROOMFELLOW IS SHE NOT SHE SAID +5764-299665-0003-1807: WHY DID HE CREATE THE (INTELLECTUALLY->INTELLECTUAL) INFERIOR +6432-63722-0008-2047: BUT HE HADN'T ANY MORE TO DO WITH IT COLONEL THAN THAT CAT +8461-258277-0005-2872: AND SHE THREW DOWN THE JEW'S HEAD BEFORE HIM +4852-28330-0022-1552: I SHALL SAY THAT YOU ARE IN MY OWN CABIN SO THAT I CAN CARE FOR YOU +2414-128292-0032-322: WILT THOU HAVE A REST AND A HOME THIS EVENING +533-131564-0003-1627: I CAN'T YOU DON'T WANT HIM DO YOU +6432-63722-0053-2092: IN FACT I HAVE A FEELING THAT (I'LL->I WILL) LAND MY FISH +5764-299665-0093-1897: THIS CANNOT BE DONE BY TALK OR EXAMPLE +1998-29455-0016-187: OH WELL DONE LITTLE (UN->ONE) SAID MISTER BEALE TO HIMSELF +3005-163389-0002-442: THE STILLNESS WAS AWFUL CREEPY AND UNCOMFORTABLE +5484-24317-0000-1732: WHEN HE CAME FROM THE BATH (PROCLUS->PROCLAS) VISITED HIM AGAIN +6070-86744-0021-1942: WHY REALLY THE THING SEEMS TO ME SIMPLE ENOUGH +533-131556-0001-1582: FOR NONE COULD (INJURE->ENDURE) ME AS HE HAS DONE OH +1688-142285-0082-82: I MAY BE THE CINDERELLA TO PUT ON THE SLIPPER AFTER ALL +3528-168669-0092-787: THEY SHUT THEIR EYES TO THE TRUTH DARKNESS IS THE RULE +4852-28319-0020-1522: THE AFTERNOON RAINY BEFORE INCREASED IN STORM +8188-269290-0020-2751: OH I SHALL NEVER DO THAT REPLIED LESLIE +8188-269290-0050-2781: JUST AS SHE WAS DOING SO MISS FRERE CAME UP +7018-75788-0009-2197: WHEN IT WAS THE FIVE HUNDRED AND FIFTY NINTH NIGHT +4350-10919-0024-1372: SHE ANSWERED HIM AND ALL AT ONCE GOT UP FURIOUS +6432-63723-0050-2149: I SAID I WAS GOLFING HE WENT ON EXCEEDINGLY DISTINCTLY THOUGH WITH AN EFFORT +8188-269288-0031-2704: SHE IS A VERY QUEER ERRATIC CREATURE AND THAT LETTER THERE (WAS->IS) BAD NEWS IN THAT LETTER +6432-63722-0006-2045: (CARROLL->CARL) WAS TOO MUCH ENGAGED IN WATCHING THE BLUE SMOKE CURL LAZILY UPWARD FROM HIS CIGAR JUST THEN TO SAY MORE +3538-163624-0012-905: BUT SIGURD WAITED TILL HALF OF HIM HAD CRAWLED OVER THE PIT AND THEN HE THRUST THE SWORD (GRAM->GRAHAM) RIGHT INTO HIS VERY HEART +6432-63722-0021-2060: (PHUT->BUT) I DON'T KNOW WHETHER THAT'S HIS FIRST OR HIS LAST NAME ANYHOW HE HAD A PARTNER NAMED (SHERE ALI->SHERLLY) +5764-299665-0016-1820: FEAR ERECTS THE CATHEDRAL AND BOWS THE HEAD OF MAN IN WORSHIP +8280-266249-0038-2839: I WONDER WHICH OF US IT IS REMARKED THE FIRST LOOKING HARD AT OUR PARTY I DON'T KNOW BUT COME ON +3764-168670-0005-980: I HAVE PERMISSION TO BRING YOU IN BUT BEFORE BRINGING YOU IN YOU MUST BE GOT OUT +3764-168670-0035-1010: NOT ANOTHER MAN EXCEPT THE POLICE DOCTOR CAN ENTER THE (DEAD ROOM->DEDROOM) THAT IS EVEN WRITTEN ON THE WALL +3997-180297-0008-1130: IF THERE WERE ANY OTHER I WOULD TELL YOU FOR WE ARE NOT TO HAVE ANY SECRETS FROM ONE ANOTHER NOW +8188-269290-0048-2779: AFTER THE ADDRESS THE GIRLS THEMSELVES WERE ENCOURAGED TO SPEAK AND A VERY ANIMATED DISCUSSION FOLLOWED +7902-96595-0005-2434: SAY (MESTER GURR->MISTER GORE) SIR WHICH THANKFUL I AM (TO->FOR) YOU FOR SPEAKING SO BUT YOU DON'T REALLY THINK AS HE HAS COME TO HARM +533-1066-0023-1579: A DOCTOR IS GENERALLY SUPPOSED TO BE (*->A) HANDIER AT BURYING FOLKS THAN AT DIGGING THEM UP +367-130732-0029-949: ALLOW THIS TO BOIL AND THEN ADD A QUART OF STRONG (CONSOMME->CONSUMM) AND LET ALL CONTINUE BOILING FOR HALF AN HOUR +533-131562-0002-1609: THE KEY OF MY DESK IN FACT WAS AT THAT MOMENT IN (THE->*) LOCK AND THE OTHERS WERE ATTACHED TO IT +7105-2340-0018-2300: I PUT IT DOWN BY THE (CLARET->CLARY) JUG SAID WILFRID BUSY WITH ANOTHER OBJECT +2033-164914-0019-230: THEN SAID THE EUNUCH TO ZAU AL MAKAN PEACE BE WITH THEE O MY LORD +533-1066-0024-1580: I HELD ON TO HIM FRANTICALLY AND SOMEHOW I GOT THERE AND LOOKED DOWN +7975-280085-0018-2554: AND SHERIFF (GLISPIN'S->LISPIN'S) ORDER NOT TO SHOOT WAS THE BEGINNING OF THE (PROTECTORATE THAT->PROTECTORY THE) MINNESOTA PEOPLE ESTABLISHED OVER US +5764-299665-0030-1834: THE POWER THAT WORKS FOR RIGHTEOUSNESS (HAS->HAD) TAUGHT THE CHILD A LESSON +3005-163391-0025-515: THEY GIVE A GLANCE AT ONE ANOTHER AND NODDED THEIR HEADS AS MUCH AS TO SAY (WHAT D I->WOULD THEY) TELL YOU +5484-24318-0023-1789: THEN (DAPHNE->DAPHANE) RAISED HER FACE TO HIS ASKING SO (THE->THAT) DEMETER IS THE WORK OF (MYRTILUS->MYRTLES) +5442-41169-0014-1714: THAT IT MAY BE BUT STILL IT OUGHT TO BE TREATED A LITTLE MORE RESPECTFULLY +8461-281231-0029-2929: AND WITH THIS EPISTLE THE UNHAPPY OLD MAN SET OUT TO PROCURE HIS DAUGHTER'S LIBERATION +3764-168670-0050-1025: BUT JEAN VALJEAN'S COOLNESS PREVAILED OVER HIM IN SPITE OF HIMSELF HE GRUMBLED +533-131562-0003-1610: NOW THEN SNEERED HE WE MUST HAVE A CONFISCATION OF PROPERTY +7975-280076-0021-2510: POOR JOHN HE HAS BEEN HUNTED DOWN AND SHOT LIKE A WILD BEAST AND NEVER WAS A BOY MORE INNOCENT +4198-12259-0000-1175: DRAW REACH FILL MIX GIVE IT ME WITHOUT WATER +1998-15444-0014-110: IN THE ABSENCE OF A HYPODERMIC SYRINGE THE REMEDY MAY BE GIVEN BY THE RECTUM +6070-86744-0018-1939: HE DWELT WITH CONSIDERABLE FORCE AND ENERGY ON THE ALMOST MAGICAL HOSPITALITY HE HAD RECEIVED FROM THE COUNT AND THE MAGNIFICENCE OF HIS ENTERTAINMENT IN THE GROTTO OF THE THOUSAND AND ONE NIGHTS HE RECOUNTED WITH CIRCUMSTANTIAL EXACTITUDE ALL THE PARTICULARS OF THE SUPPER THE HASHISH THE STATUES THE DREAM AND HOW AT HIS AWAKENING THERE REMAINED NO PROOF OR TRACE OF ALL THESE EVENTS SAVE THE SMALL YACHT SEEN IN THE DISTANT HORIZON DRIVING UNDER FULL SAIL TOWARD PORTO VECCHIO +3538-142836-0019-845: IN SPEAKING OF (CONFECTIONARY->CONFECTIONERY) IT SHOULD BE REMARKED THAT ALL THE VARIOUS PREPARATIONS ABOVE NAMED COME STRICTLY SPEAKING UNDER THAT HEAD FOR THE VARIOUS FRUITS FLOWERS HERBS ROOTS AND JUICES WHICH WHEN BOILED WITH SUGAR WERE FORMERLY EMPLOYED IN PHARMACY AS WELL AS FOR SWEETMEATS WERE CALLED CONFECTIONS FROM THE LATIN WORD (CONFICERE->CONFIRCET) TO MAKE UP BUT THE TERM (CONFECTIONARY->CONFECTIONERY) EMBRACES A (VERY->*) LARGE CLASS INDEED OF SWEET FOOD MANY KINDS OF WHICH SHOULD NOT BE ATTEMPTED IN THE ORDINARY CUISINE +7018-75789-0001-2209: THEN (SIGHING->SEWING) FOR MYSELF I SET TO WORK COLLECTING A NUMBER OF PIECES OF CHINESE AND (COMORIN->CORMERAN) ALOES WOOD AND I BOUND THEM TOGETHER WITH ROPES FROM THE WRECKAGE THEN I CHOSE OUT FROM THE BROKEN UP (SHIPS->SHIP) STRAIGHT PLANKS OF EVEN SIZE AND FIXED THEM FIRMLY UPON THE ALOES WOOD MAKING ME A BOAT RAFT A LITTLE NARROWER THAN THE CHANNEL OF THE STREAM AND I TIED IT TIGHTLY AND FIRMLY AS THOUGH IT WERE NAILED +7902-96592-0014-2359: OH THOSE BARS HE MENTALLY EXCLAIMED AND HE WAS ADVANCING (TOWARD->TOWARDS) THEM (WHEN->BUT) JUST AS HE DREW NEAR THERE WAS A RUSTLING NOISE UNDER THE (WINDOW->WINDOWS) A COUPLE OF HANDS SEIZED THE BARS THERE WAS A SCRATCHING OF BOOT TOES AGAINST STONE WORK AND (RAM'S->RAHAM'S) FACE APPEARED TO GAZE INTO THE ROOM BY INTENTION BUT INTO THE ASTONISHED COUNTENANCE OF THE YOUNG MIDSHIPMAN INSTEAD +2609-156975-0004-365: EVERY ONE (WHO IS TURBULENT->WHOSE TURBRIMENT) HAS BEEN FOUND BY (KING MERNEPTAH->GHEIMURNETH PATH) THE TESTIMONY OF THE OLDEST BIBLICAL NARRATIVES REGARDING THE SOJOURN OF THE HEBREWS IN EGYPT IS ALSO IN PERFECT ACCORD WITH THE PICTURE WHICH THE CONTEMPORARY EGYPTIAN INSCRIPTIONS GIVE OF THE PERIOD +533-1066-0010-1566: PUT ON HEAVY SHOES AND SOME OLD DARK CLOTHES AND MAKE UP YOUR MIND NOT TO BE SURPRISED AT ANYTHING +8188-269290-0035-2766: THEN A (ROLL->RAW) CALL WAS GONE THROUGH BY ONE OF THE TUTORS THE ONLY ABSENTEE WAS ANNIE COLCHESTER +367-130732-0031-951: PICK THE SHELLS OFF TWENTY FIVE OF THE CRAWFISH TAILS TRIM THEM NEATLY AND SET THEM ASIDE UNTIL WANTED +5442-41169-0001-1701: HE WENT TO THE WINDOW AND SAT DOWN SCANNING THE GROUPS AND LISTENING TO WHAT WAS BEING SAID AROUND HIM +4294-14317-0015-1281: AFTER SEVERAL MONTHS WERE WASTED AND PIERO WOULD NEITHER WORK NOR PUT MEN TO WORK UPON THE PIECE I MADE HIM GIVE IT BACK +8131-117016-0031-2586: IF THEY TRIED TO RUN THEY WERE HIT FROM BEHIND IF THEY STOOD STILL THEY WERE CLUBBED CAREFULLY +367-130732-0032-952: RESERVE SOME OF THE SPAWN ALSO HALF OF THE BODY SHELLS WITH WHICH TO MAKE THE CRAWFISH BUTTER TO FINISH THE SOUP +7975-280085-0004-2540: FINALLY WE ADMINISTERED TO HIM AN OATH NOT TO BETRAY OUR WHEREABOUTS UNTIL WE HAD TIME TO MAKE OUR ESCAPE AND HE AGREED NOT TO +1688-142285-0051-51: THE SHARPNESS IN HER EYE TURNED TO A WISTFUL LONGING AS SHE MET MARGARET'S SOFT AND FRIENDLY GAZE +8280-266249-0024-2825: FOR (YE->YOU) ARE BOUGHT WITH A PRICE THEREFORE GLORIFY GOD IN YOUR BODY AND IN YOUR SPIRIT WHICH ARE (GOD'S->GODS) +8188-269290-0034-2765: (MISS LAUDERDALE->MISSUS LORDADALE) WAS STANDING WITH THE OTHER TUTORS AND PRINCIPALS OF THE DIFFERENT HALLS ON A RAISED PLATFORM +2609-156975-0005-366: THE ABSENCE OF DETAILED REFERENCE TO THE HEBREWS IS THEREFORE PERFECTLY NATURAL +2033-164915-0012-246: THEN THE EUNUCH CRIED UPON THE PAGES SAYING TAKE HIM OFF THE ASS +4350-9170-0049-1431: EXCEPT FOR THE STATE THEY TELL US WE SHOULD NOT HAVE ANY RELIGION EDUCATION CULTURE MEANS OF COMMUNICATION AND SO ON +4350-10919-0008-1356: THE QUESTION STANDS THUS IN PRESENCE OF INDICATIONS OF TUBERCULOUS PROCESS WHAT IS TO BE DONE TO MAINTAIN NUTRITION +533-131556-0000-1581: BUT HOW AM I TO GET OVER THE TEN OR TWELVE DAYS THAT MUST YET ELAPSE BEFORE THEY GO +5764-299665-0062-1866: IT HAS NEVER MADE MAN MORAL TEMPERATE INDUSTRIOUS AND HONEST +3005-163399-0000-517: (PHELPS->PHELPS'S) WAS ONE OF THESE LITTLE ONE HORSE COTTON PLANTATIONS AND THEY ALL LOOK ALIKE +1998-15444-0001-97: HE SHOULD MAKE INQUIRIES AS TO SYMPTOMS AND TIME AT WHICH FOOD OR MEDICINE WAS LAST TAKEN +4852-28330-0006-1536: CHRIS LOST NO TIME AS SOON AS HE COULD DO IT WITHOUT BEING NOTICED IN HURRYING DOWN TO HIS CABIN +8280-266249-0054-2855: IT CAME FROM UNDER THE TABLE GASPED (WARD->HOWARD) LOOK WHAT'S THERE (LOOK->LOOKED) YOURSELF +4852-28330-0021-1551: I AM SOMEWHAT SKILLED IN MEDICAMENTS I HAVE TO BE AS (*->THE) CAPTAIN OF A SHIP AND (THE->*) CREW KNOW IT +8131-117017-0014-2631: WHEN GORDON AND JENKINS CAME BACK (MURDOCH->MURDOCK) TOSSED THE MONEY TO THEM SPLIT IT +1688-142285-0081-81: EVERYBODY ELSE HAS HAD THEIR TURN AT THIS GREAT DIFFICULTY NOW LET ME TRY +4198-61336-0001-1236: AT THE BEGINNING OF HIS REIGN THERE WAS MUCH SOCIAL DISCONTENT AND SUFFERING +8280-266249-0025-2826: WE CERTAINLY HAVE NO RIGHT TO INJURE OUR BODIES EITHER BY NEGLECT OR SELF INDULGENCE +8461-281231-0015-2915: YET FIRST LET ME SAY SAID DE BRACY WHAT IT IMPORTS THEE TO KNOW +3080-5040-0016-591: I DO NOT FIND IT THOUGH I AM TOLD I WAS SO EXTREMELY WHEN I BELIEVED YOU LOVED ME +8131-117017-0029-2646: THE LITTLE GUY KNEW MARS AS FEW OTHERS DID APPARENTLY FROM ALL SIDES +3997-180297-0025-1147: MARGUERITE DREW THE LETTER FROM HER BOSOM AND HANDING IT TO ME WITH A SMILE OF INFINITE SWEETNESS SAID +367-293981-0014-968: IF YOUR WORSHIP KNEW THAT RETURNED SANCHO WOE BETIDE ME AND ALL MY KINDRED WHY DID YOU LET ME TASTE IT +1688-142285-0068-68: I SOMETIMES THINK I SHALL BE (MOPED WI->MILKED WITH) SORROW EVEN IN THE CITY OF GOD IF FATHER IS NOT THERE +7975-280085-0007-2543: HE HAD TO SLEEP WITH (IT PILLOWED->THAT PILLOW) ON MY BREAST JIM BEING ALSO (CRIPPLED->A CRIPPLE) WITH A WOUND IN HIS SHOULDER AND WE COULD NOT GET MUCH SLEEP +5442-41168-0001-1673: HE WENT UP TO THE TABLE AND STRIKING IT WITH HIS FINGER RING HE SHOUTED LOUDLY A BALLOT +1998-29454-0004-128: WHEN DICKIE CAME DOWN HIS AUNT SLIGHTLY SLAPPED HIM AND HE TOOK THE HALFPENNY AND LIMPED OFF OBEDIENTLY +4852-28330-0023-1553: NOT SINCE HE HAD LEFT MISTER WICKER (HAD->AND) CHRIS FELT SUCH CONFIDENCE AS HE DID IN THE WORDS AND ACTIONS OF CAPTAIN BLIZZARD +4198-61336-0017-1252: (TIGLATH PILESER->TIGLAS POLYSER) NEXT OPERATED AGAINST THE (MEDIAN->MEDEAN) AND OTHER (HILL->HI) TRIBES IN THE (NORTH EAST->NORTHEAST) +4294-35475-0007-1298: THOU SHALT HAVE THY LIBERTY HE CRIED EVEN THOUGH THOU SHOULDST (REND->RUN) ME IN PIECES THE MOMENT THOU ART FREE +533-131556-0017-1598: SHE PAUSED IN EVIDENT DISCONCERTION AND PERPLEXITY MINGLED WITH ANGER SHE DARED NOT SHOW +8188-269288-0034-2707: ANNIE'S EYES WERE VERY BRIGHT HER CHEEKS WERE NO LONGER PALE AND THERE WAS A BRILLIANT (COLOR->COLOUR) IN THEM +4294-14317-0016-1282: AMONG ARTISTS CERTAIN ENRAGED SCULPTORS LAUGHED AT ME AND CALLED ME THE NEW SCULPTOR +2609-157645-0013-413: GOING TO CHURCH AT HAYES IN THOSE DAYS MUST HAVE BEEN (QUITE AN EXCITING EXPERIENCE->A THE SOUNDTING SPIRITENCE) +4852-28312-0022-1492: IT WAS NO LONGER THERE NOR WERE THE HIGH WALLS AND SMOKESTACKS OF FACTORIES TO BE SEEN +7975-280076-0008-2497: I WILL GIVE YOU THIS OUTLINE AND SKETCH OF MY WHEREABOUTS AND ACTIONS AT THE TIME OF CERTAIN ROBBERIES WITH WHICH I AM CHARGED +8188-269288-0035-2708: SHE DID NOT TAKE THE LEAST NOTICE OF LESLIE BUT GOING INTO THE ROOM SHUT THE DOOR +3538-163619-0010-863: SHE BEGGED VERY PRETTILY AND GOT IT AND THEN SHE BRUSHED HER HAIR AND THE GOLD DROPPED FROM IT +3331-159609-0012-668: ONCE OR TWICE BUT SORT OF (JOKINGLY->CHOKINGLY) AND I THOUGHT IT WAS ONLY SOME LITTLE FLIRTATION +6432-63722-0023-2062: NEIGHBORS OFTEN HEARD EM (SCRAPPIN->SCRAPPING) A LOT AND THIS AFTERNOON THEY WENT AT IT AGAIN HOT AND HEAVY +8131-117016-0032-2587: (MURDOCH->MURDOCK) INDICATED ONE WHO STOOD WITH HIS SHOULDERS SHAKING AND TEARS RUNNING DOWN HIS CHEEKS +3005-163391-0013-503: I'M GOING IN A SHIP NEXT WEDNESDAY FOR (RYO JANEERO->RIO JANEIRO) WHERE MY UNCLE LIVES +6432-63723-0024-2123: AND IT WAS THIS MAN RICH IT WAS SAID HANDSOME CERTAINLY THAT CYNTHIA (RATCHFORD->RACHFORD) HAD MARRIED +6128-63244-0019-2032: THEY WERE HER SISTERS THEY WERE HER OWN AND THE DAY OF THEIR DELIVERY HAD DAWNED +8131-117017-0001-2618: IT HAD PROBABLY BEEN YEARS SINCE ANY HAD DARED RISK IT AFTER THE SUN WENT DOWN +1688-142285-0053-53: (YO'LL->YOU'LL) NOT BE DAUNTED IF FATHER'S AT HOME AND SPEAKS A BIT GRUFFISH AT FIRST +3005-163399-0001-518: I WENT AROUND AND (CLUMB->CLIMBED) OVER THE BACK STILE BY THE ASH HOPPER AND STARTED FOR THE KITCHEN +5764-299665-0049-1853: COULD THEY HAVE BEEN WORSE HAD THEY HAD ANY OTHER RELIGION THAN CHRISTIANITY +7105-2340-0035-2317: PETER'S LITTLE WEAKNESS IT RUNS IN THE FAMILY GOOD LORD +5442-32873-0005-1657: STANLEY STANLEY IT WOULD BE MERCY TO KILL ME SHE BROKE OUT AGAIN +8188-269290-0047-2778: FOR ALL WHO SIN ALL WHO SUFFER YOU ARE TO A CERTAIN EXTENT RESPONSIBLE +4852-28330-0019-1549: (*->THE) CAMOUFLAGE WAS NOT A WORD THE CAPTAIN OR ANYONE ELSE OF HIS TIME YET UNDERSTOOD +3997-180297-0022-1144: WHAT I LOVED IN YOU WAS NOT THE MAN WHO WAS BUT THE MAN WHO WAS GOING TO BE +1688-142285-0050-50: I SHOULD HAVE THOUGHT OF IT AGAIN WHEN I WAS LESS BUSY MAY I GO WITH YOU NOW +3005-163399-0013-530: PRETTY SOON SHE MADE THE COLD (CHILLS->CHILL) STREAK ALL DOWN MY BACK BECAUSE SHE SAYS +7902-96592-0029-2374: HE DIVIDED THE PAINT AND PRODUCED A FEW SQUEAKS AND GRATING SOUNDS AS HE (REALISED->REALIZED) THAT THE ATTEMPT WAS MADNESS +4198-61336-0014-1249: SIX MONTHS (AFTERWARDS->AFTERWARD) HE WAS ASSASSINATED BY (SHALLUM->SCHILUM) +1688-142285-0095-95: THEY CARED TOO LITTLE TO ASK IN WHAT MANNER HER SON HAD SPOKEN ABOUT HER +3005-163389-0015-455: IT WARN'T FUNNY TO ME THOUGH I WAS ALL OF A TREMBLE TO SEE HIS DANGER +5764-299665-0015-1819: FEAR BUILDS THE ALTAR AND OFFERS THE SACRIFICE +2414-159411-0011-334: AT A LITTLE DISTANCE THEY FOUND A BULLOCK LYING BY THE ROADSIDE +6070-63485-0007-1909: OH AH TO LAY A TRAP TO CATCH US REPLIED THE THIEF +7902-96591-0024-2344: A FELLOW WHO WAS SHUT UP IN PRISON FOR LIFE MIGHT DO IT HE SAID BUT NOT IN A CASE LIKE THIS +8280-266249-0007-2808: AND WHAT A DEAR LITTLE FELLOW HE IS JUST ABOUT THE AGE OF OUR HAROLD I SHOULD JUDGE +1688-142285-0049-49: I HAD FORGOTTEN WHAT I SAID FOR THE TIME CONTINUED MARGARET QUIETLY +5442-41169-0029-1729: (THERE'S->THERE IS) A CLASS INSTINCT TOO OF WHAT ONE OUGHT AND (OUGHTN'T->OUGHT NOT) TO DO +533-1066-0008-1564: THE MOST UNUSUAL THING I CAN THINK OF WOULD BE A PEACEFUL NIGHT +4852-28311-0001-1444: CHRIS LOOKED FROM A NICKEL PLATED FLASHLIGHT TO A CAR JACK AND SPARK PLUG +8280-266249-0037-2838: A (VENTRILOQUIST ABOARD->VENTRILOQUEST OF BOARD) OF COURSE RETURNED ANOTHER LET'S FOLLOW AND SEE THE FUN +1998-29455-0028-199: (BLESSED->BLEST) IF I EVER SEE SUCH A (NIPPER->NIBBER) HE SAID OVER AND OVER AGAIN +6938-70848-0007-2164: YOU CALL YOURSELVES THE PEOPLE OF RUSSIA BUT (YOU'RE->YOU ARE) NOT THE PEOPLE OF RUSSIA +4294-9934-0021-1339: HE PAID TWENTY FRANCS FOR THE CAST OFF GARMENTS THEY WENT TO THE WATCHMAKER'S +8131-117017-0012-2629: MUST (OF->HAVE) BEEN MAKING A BIG CONTACT IN SOMETHING FIFTY FIFTY +6128-63240-0013-1984: SHE WAS ATTRACTIVE AND IMPERTINENT ESPECIALLY THE LATTER +4852-28312-0004-1474: MISTER WICKER'S BACK BEING (TOWARD->TOWARDS) THE SOURCE OF LIGHT CHRIS COULD NOT SEE HIS FACE +8188-269288-0001-2674: LESLIE DETERMINED TO (TRY FOR->TRIFLE) HONORS IN ENGLISH LANGUAGE AND LITERATURE +2609-157645-0010-410: THEN LET THEM SING THE HUNDRED AND NINETEENTH REPLIED THE CURATE +2414-128292-0014-304: O (EARTH->ART) THOU HAST BECOME TOO ROUND FOR ME +2414-128292-0000-290: WHITHER HATH MY LONESOMENESS GONE SPAKE HE +3331-159605-0026-635: HOW LOVELY THE PARK LOOKS SHE SAID IN GREAT CONFUSION +4852-28319-0002-1504: CHRIS THEREFORE THREW HIMSELF INTO ALL THE PRELIMINARIES OF HIS TASK +1688-142285-0004-4: HIS STATEMENT OF HAVING BEEN A (SHOP BOY->SHOPBOY) WAS THE THING I LIKED BEST OF ALL +1998-29454-0015-139: SHE WAS (WAITIN->WAITING) FOR THE WOOD TO BOIL THE (KETTLE->CATTLE) WHEN I COME OUT MOTHER +3997-182399-0020-1174: IT WAS JUST AS GOOD AS ONE OF GRANDFATHER (FROG'S->FROGS) +3764-168670-0004-979: EVERYTHING IS ARRANGED AND NOTHING IS SAID FAUCHELEVENT +5764-299665-0066-1870: RELIGION RESTS ON THE IDEA THAT NATURE HAS A MASTER AND THAT THIS MASTER WILL LISTEN TO PRAYER THAT (THIS->HIS) MASTER PUNISHES AND REWARDS THAT HE LOVES PRAISE AND FLATTERY AND HATES THE BRAVE AND FREE +5484-24318-0029-1795: HIS HEART CONTRACTED PAINFULLY AND HIS SOLICITUDE ABOUT HIS UNCLE'S FATE INCREASED WHEN PHILIPPUS INFORMED HIM THAT THE CONSPIRATORS HAD BEEN ARRESTED AT THE BANQUET AND HEADED BY (AMYNTAS THE RHODIAN->AMENTUS HERRODIAN) CHRYSIPPUS AND (PROCLUS->PROCLIS) HAD PERISHED BY THE EXECUTIONER'S SWORD AT SUNRISE +4294-32859-0001-1286: IT WAS HIS FANCY I SUPPOSE TO REVIVE CERTAIN SENTIMENTAL RELATIONS WHICH HAD IT MAY BE ONCE EXISTED BETWEEN HIM AND MISS LAKE AND HE WAS A PERSON OF THAT COMBATIVE TEMPERAMENT THAT MAGNIFIES AN OBJECT IN PROPORTION AS ITS PURSUIT IS THWARTED +3005-163390-0001-460: THE PEOPLE MOST KILLED THEMSELVES LAUGHING AND WHEN THE KING GOT DONE CAPERING AND CAPERED OFF BEHIND THE SCENES THEY ROARED AND CLAPPED AND STORMED AND (HAW HAWED->JAWHAWED) TILL HE COME BACK AND DONE IT OVER AGAIN AND AFTER THAT THEY MADE HIM DO IT ANOTHER TIME +6070-86744-0024-1945: MY DEAR FRANZ REPLIED ALBERT WHEN UPON RECEIPT OF MY LETTER YOU FOUND THE NECESSITY OF ASKING THE COUNT'S ASSISTANCE YOU PROMPTLY WENT TO HIM SAYING MY FRIEND ALBERT DE MORCERF IS IN DANGER HELP ME TO DELIVER HIM +6128-63240-0004-1975: HE WAS TALL AND LEAN AND DRESSED THROUGHOUT IN BLACK HIS SHIRT COLLAR WAS LOW AND WIDE AND THE TRIANGLE OF LINEN A LITTLE CRUMPLED EXHIBITED BY THE OPENING OF HIS WAISTCOAT WAS ADORNED BY A PIN CONTAINING A SMALL RED STONE +367-130732-0005-925: IT WAS HERE THAT MOST MAGNIFICENT DINNERS WERE ARRANGED IT WAS HERE THAT EXTRAORDINARY DISHES WERE CONCOCTED BY CHEFS OF WORLD WIDE FAME IT WAS HERE THAT LOBSTER (A LA NEWBERG->AEUBERG) REACHED ITS HIGHEST PERFECTION AND THIS IS THE RECIPE THAT WAS FOLLOWED WHEN IT WAS PREPARED IN THE (DELMONICO->DOMONICO) +3331-159605-0032-641: POLLY DID NOT RETURN TO HER (FAVORITE->FAVOURITE) WALK TILL SHE LEARNED FROM MINNIE THAT UNCLE HAD REALLY LEFT TOWN AND THEN SHE FOUND THAT HIS FRIENDLY COMPANY AND CONVERSATION WAS WHAT HAD MADE THE WAY SO PLEASANT AFTER ALL +533-131556-0019-1600: IF YOU ARE GENEROUS HERE IS A FITTING OPPORTUNITY FOR THE EXERCISE OF YOUR MAGNANIMITY IF YOU ARE PROUD HERE AM I YOUR RIVAL (READY->RAY) TO (ACKNOWLEDGE->PRONOUNCE) MYSELF YOUR (DEBTOR->ADEPTOR) FOR AN ACT OF THE MOST NOBLE FORBEARANCE +3331-159605-0031-640: HE WAS GONE BEFORE SHE COULD DO ANYTHING BUT LOOK UP AT HIM WITH A REMORSEFUL FACE AND SHE WALKED ON FEELING THAT THE FIRST AND PERHAPS THE ONLY LOVER SHE WOULD EVER HAVE HAD READ HIS ANSWER AND ACCEPTED IT IN SILENCE +6128-63244-0022-2035: IT WAS NOT CLEAR TO THIS INTERESTING GIRL IN WHAT MANNER SUCH A SACRIFICE AS THIS LAST WOULD BE REQUIRED OF HER BUT SHE SAW THE MATTER THROUGH A KIND OF SUNRISE MIST OF EMOTION WHICH MADE DANGER AS ROSY (AS->IS) SUCCESS +2033-164915-0015-249: THEN HE BADE HIM BE SEATED AND QUESTIONED HIM AND HE REPLIED THAT HE WAS CHAMBERLAIN TO THE EMIR OF DAMASCUS AND WAS BOUND TO KING OMAR WITH PRESENTS AND THE TRIBUTE OF SYRIA +4350-9170-0036-1418: IN FACT THE WHOLE SIGNIFICANCE OF THE SOCIAL CONCEPTION OF LIFE CONSISTS IN MAN'S RECOGNITION OF THE BARBARITY OF STRIFE BETWEEN INDIVIDUALS AND THE TRANSITORINESS OF PERSONAL LIFE ITSELF AND THE TRANSFERENCE OF THE AIM OF LIFE TO GROUPS OF PERSONS +2609-156975-0022-383: THESE SAND WANDERERS SENT HIM ON FROM (TRIBE->TIME) TO (TRIBE->TIME) UNTIL HE REACHED THE LAND OF (KEDEM->KEIDAM) EAST OF THE DEAD SEA WHERE HE REMAINED FOR A YEAR AND A HALF +5442-41168-0016-1688: HAVING PUT IT IN HE RECOLLECTED THAT HE OUGHT TO HAVE THRUST HIS LEFT HAND TOO AND SO HE THRUST IT IN THOUGH TOO LATE AND STILL MORE OVERCOME WITH CONFUSION HE BEAT A HASTY RETREAT INTO THE BACKGROUND +8461-281231-0018-2918: RAISING THE WOUNDED MAN WITH EASE THE BLACK KNIGHT RUSHED WITH HIM TO THE POSTERN GATE AND HAVING THERE DELIVERED HIS BURDEN TO THE CARE OF TWO YEOMEN HE AGAIN ENTERED THE CASTLE TO ASSIST IN THE RESCUE OF THE OTHER PRISONERS +533-131564-0019-1643: I'VE BEEN A CURSED RASCAL GOD KNOWS SAID HE AS HE GAVE IT A (HEARTY->EARTHY) SQUEEZE BUT YOU SEE IF I DON'T MAKE AMENDS FOR IT (D N->DAMN) ME IF I DON'T +7018-75789-0005-2213: WHEN I AWOKE AT LAST I FOUND MYSELF IN THE LIGHT OF HEAVEN AND OPENING MY EYES I SAW MYSELF IN A BROAD STREAM AND THE RAFT MOORED TO AN ISLAND IN THE MIDST OF A NUMBER OF INDIANS AND ABYSSINIANS +2414-159411-0016-339: THE (BRAHMAN->BRAHMIN) STATED THE CASE AND THE EAGLE ANSWERED WHENEVER MEN SEE ME THEY TRY TO SHOOT ME THEY CLIMB THE ROCKS AND STEAL AWAY MY LITTLE ONES +2609-156975-0023-384: LATER HE FOUND HIS WAY TO THE COURT OF ONE OF THE LOCAL KINGS IN CENTRAL PALESTINE WHERE HE MARRIED AND BECAME IN (*->THE) TIME A PROSPEROUS LOCAL PRINCE +2414-159411-0000-323: ONCE UPON A TIME A BRAHMAN WHO WAS WALKING ALONG THE ROAD CAME UPON AN IRON CAGE IN WHICH A GREAT TIGER HAD BEEN SHUT UP BY THE VILLAGERS WHO CAUGHT HIM +4198-61336-0018-1253: HE OVERTHREW BUILDINGS DESTROYED ORCHARDS AND TRANSPORTED TO NINEVEH THOSE OF THE INHABITANTS HE HAD NOT PUT TO THE (SWORD->SWARD) WITH ALL THE LIVE STOCK HE COULD LAY HANDS ON +7902-96592-0003-2348: STOP HERE TILL SIR RISDON COMES DOWN AND TELL HIM I'M VERY SORRY THAT WE SHOULD HAVE CLEARED OUT LAST NIGHT ONLY A BORN FOOL SAW JERRY (NANDY'S->AND ANDY'S) LOBSTER BOAT COMING INTO THE COVE AND CAME RUNNING TO SAY IT WAS A PARTY FROM THE CUTTER YES FATHER +8188-269290-0036-2767: THE PHYSICAL PART OF YOUR TRAINING AND ALSO THE MENTAL PART ARE ABUNDANTLY SUPPLIED IN THIS GREAT HOUSE OF LEARNING SHE CONTINUED BUT THE SPIRITUAL PART IT SEEMS TO ME OUGHT NOW TO BE STRENGTHENED +3005-163389-0003-443: (SHERBURN->SHERBIN) RUN HIS EYE SLOW ALONG THE CROWD AND WHEREVER IT STRUCK THE PEOPLE TRIED A LITTLE TO (OUT GAZE->OUTGAZE) HIM BUT THEY COULDN'T THEY DROPPED THEIR EYES AND LOOKED SNEAKY +3528-168656-0001-683: IT WAS HER PLEASURE AND HER VANITY TO DRAG IN THESE NAMES ON EVERY PRETEXT +4198-12259-0033-1208: THE GREAT GOD MADE THE PLANETS AND WE MAKE THE PLATTERS NEAT +1998-29455-0032-203: IF (E'S->HE'S) STRAIGHT (E'LL->HE'LL) DO FOR ME AND IF HE AIN'T I'LL DO FOR (IM->HIM) SEE +5764-299665-0034-1838: THEY ARE PRAISED ADMIRED AND RESPECTED +7105-2330-0033-2273: HAVE YOU ANY BAND INSTRUMENTS OF AN EASY NATURE TO PLAY +367-130732-0018-938: CHOP FINE THE ONION SWEET PEPPER AND TOMATO WITH THE ROSEMARY +3997-180297-0011-1133: WELL GOOD HEAVENS THE MEANS WERE EASY ENOUGH TO GUESS +6938-70848-0011-2168: THESE LAST WERE THE YOUNG GENERATION WHO HAD BEEN SERVING IN THE ARMY +8131-117029-0013-2663: FINALLY GOT OUR ORDERS FOR YOU IT'S MERCURY +7018-75788-0010-2198: AND CEASED NOT SAILING TILL WE ARRIVED SAFELY AT (BASSORAH->PASORAH) +6432-63722-0054-2093: I'D RECOMMEND HIM TO YOU INSTEAD OF BLACKSTONE THANKS LAUGHED KENNETH +2414-128292-0004-294: THUS (SPAKE ZARATHUSTRA->SAYING THE TWO STRIKE) TO HIS HEART AND RAN AWAY +1998-29454-0005-129: HE HAD NEVER SEEN ONE BEFORE AND IT INTERESTED HIM EXTREMELY +7902-96595-0009-2438: BOY (BOUT->ABOUT) SEVENTEEN WITH A RED CAP NO SIR INDEED (I'VE->HAVE) NOT +8461-258277-0006-2873: NOW THE CAUSE OF HER SLAYING HER SIRE WAS AS FOLLOWS +3538-163624-0015-908: DIE THOU (FAFNIR->FAFNER) AND THEN (FAFNIR->FAFNER) DIED +2414-128292-0003-293: LET IT RUN AFTER ME I (RUN->RAN) AWAY FROM IT +4198-12259-0003-1178: YOU HAVE (CATCHED->CAST) A COLD GAMMER YEA FORSOOTH SIR +5442-41169-0018-1718: OH WELL ONE DOES IT WHAT WOULD YOU HAVE +1688-142285-0039-39: NOT EXACTLY REPLIED MARGARET SMILING +7902-96592-0033-2378: BUT I'LL YES I'LL GIVE YOU A GUINEA IF YOU WILL LET ME OUT +533-131564-0004-1628: IT IS A RESOLUTION YOU (OUGHT TO->*) HAVE (FORMED->REFORMED) LONG AGO +5764-299665-0094-1898: THIS IS THE SOLUTION OF THE WHOLE QUESTION +3764-168670-0008-983: AND SHE WILL HOLD HER TONGUE I ANSWER FOR THAT +3331-159605-0044-653: HE UNDERSTOOD AND BEING A GENTLEMAN MADE NO FUSS +1998-29454-0020-144: NO SAID DICKIE OH NO NO I NEVER +4852-28312-0008-1478: CHRIS SWALLOWED (AND->THEN) HIS VOICE CAME BACK TO HIM +3528-168669-0123-818: THE EMPTY COFFIN REMAINS THIS PRODUCED A PAUSE +3005-163391-0014-504: BUT IT'LL BE LOVELY (WISHT->WISHED) I WAS A (GOING->GOIN) +2414-159411-0015-338: ON WHAT MUST I GIVE JUDGMENT ASKED THE EAGLE +5764-299665-0064-1868: CAN WE CURE DISEASE BY SUPPLICATION +1998-29455-0018-189: NO I NEVER SAID DICKIE (ERE'S->YES) THE (STEEVER->STEVER) +3080-5040-0003-578: I WONDER HOW SHE BEHAVED HERSELF WHEN SHE WAS MARRIED +8131-117016-0003-2558: THE STONEWALL GANG NUMBERED PERHAPS FIVE HUNDRED +6432-63723-0009-2108: THE COLONEL DID NOT DISCLOSE THE FACT THAT IT WAS NO NEWS TO HIM +2414-128292-0018-308: (THEN->THE NO) ONLY DID I (HIT THE->HATE THAT) TRUTH +1688-142285-0009-9: WHY IT MIGHT HAVE BEEN IN THE WORKHOUSE +8131-117016-0033-2588: THE CAPTAIN'S FACE WAS AS SICK AS (GORDON->GORDON'S) FELT +3528-168669-0003-698: REVEREND MOTHER DO YOU KNOW THE CHAPEL +3005-163399-0002-519: I OUT WITH A (YES'M BEFORE->YASSEM FORE) I THOUGHT +4350-9170-0051-1433: THIS ARGUMENT TOO WAS WELL FOUNDED ONLY SOME CENTURIES AGO +6432-63722-0024-2063: TOWARD DARK A MAN WENT IN TO BUY A LAMP +7975-280063-0007-2483: WE WERE OUT OF AMMUNITION AND WERE HELPLESS HAD THE FIGHT BEEN PRESSED +8280-266249-0012-2813: DOUBTLESS THAT IS THE CASE REMARKED MISTER DINSMORE +3764-168670-0038-1013: I SHALL BE HUNGRY I WILL BRING YOU SOMETHING +8131-117017-0016-2633: LIKE THIS SOCIAL CALL GORDON ASKED HIM +5442-41168-0018-1690: SANG (OUT->*) THE VOICE OF THE SECRETARY WHO COULD NOT PRONOUNCE THE LETTER R +3764-168670-0024-999: AND I ADD AND FATHER MADELEINE IS BURIED AH +533-131564-0021-1645: GOD HELP ME THEN FOR I'M SURE I NEED IT +5764-299665-0065-1869: CAN WE RECEIVE VIRTUE OR HONOR AS ALMS +7902-96592-0004-2349: TELL HIM NOT TO BE UNEASY TIS ALL RIGHT AND I'LL HAVE EVERYTHING CLEAR AWAY TO NIGHT +2609-156975-0009-370: THE LATER TRADITIONS TEND TO EXTEND THE PERIOD +4198-61336-0020-1255: IN THE FOLLOWING YEAR TIGLATH (PILESER->BELEASER) RETURNED TO SYRIA +5442-41169-0020-1720: SO THERE'LL BE NO ONE TO KEEP IT UP AND YET ONE DOES IT +3538-163619-0012-865: I SHALL COME TWICE MORE AND THEN NEVER AGAIN SAID SHE +8131-117016-0049-2604: NOBODY WANTS HIM EXCEPT A GANG OF CROOKS AND THOSE IN POWER +1998-29454-0036-160: AH SAID (DICKIE->DICKY) AND A (FULL->SHORT) SILENCE FELL BETWEEN THEM +533-131564-0006-1630: NO I'D RATHER BE LIKE MYSELF (BAD->THAN) AS I AM +5484-24317-0003-1735: SO THE ARTIST FOUND HIMSELF OBLIGED TO RELINQUISH HIS OPPOSITION +6432-63722-0040-2079: DON'T SCRATCH YOURSELF ON IT WHATEVER YOU DO WHY NOT +6432-63723-0040-2139: ONLY THAT I DARCY HESITATED AND GREW RED +3764-168670-0009-984: FAUCHELEVENT GRUMBLED MORE TO HIMSELF THAN TO JEAN VALJEAN +3764-168670-0010-985: YOU UNDERSTAND FATHER MADELEINE THE GOVERNMENT WILL NOTICE IT +7902-96592-0049-2394: ONCE MORE WILL YOU COME AND LET ME OUT NO +1688-142285-0070-70: BUT (YOU WILL->YOU'LL) BE THERE FATHER YOU SHALL OH MY HEART +3997-180294-0017-1105: BUT TO RETURN TO THE FIRST DAY OF MY (LIAISON->LIYER SONG) +1998-29454-0006-130: HE LOOKED ABOUT HIM AND KNEW THAT HE DID NOT AT ALL KNOW WHERE HE WAS +4852-28312-0025-1495: NO ELECTRIC SIGNS NO LAMPLIT STREETS +3997-180297-0012-1134: I LISTENED AND I GAZED AT MARGUERITE WITH ADMIRATION +5764-299665-0021-1825: WAS THERE GOODNESS WAS THERE WISDOM IN THIS +6432-63723-0010-2109: AARON GRAFTON'S STATEMENT WAS BEING UNEXPECTEDLY CONFIRMED +5442-41169-0005-1705: SHALL WE GO ON YOUR EXCELLENCY FINE CHAMPAGNE +1688-142285-0085-85: VERY GOOD BUT WE MUST FIRST CATCH OUR HOUSE MOTHER +7902-96592-0034-2379: GUINEA SAID THE BOY THINK (I'D->I'LL) DO IT FOR A GUINEA WELL THEN (TWO->TOO) +5764-299665-0096-1900: POVERTY AND CRIME WILL BE CHILDLESS +3331-159605-0045-654: BUT POLLY IT WOULD HAVE BEEN A GRAND THING FOR YOU +3528-168669-0035-730: I SAY MORE OFTEN MORE OFTEN THAN WHAT +3528-168669-0064-759: FAUCHELEVENT STARTED THE VAULT UNDER THE ALTAR +4294-14317-0018-1284: HAVING THIS EXCELLENT RESOLVE IN HEART I REACHED MY HOME +4294-9934-0011-1329: REQUEST COURFEYRAC TO COME AND TALK WITH ME SAID MARIUS +4852-28319-0022-1524: CERTAIN ELEMENTS WERE TO BE MIXED AND POURED AT THE PROPER TIME +6128-63240-0018-1989: ARE YOU VERY AMBITIOUS YOU LOOK AS IF YOU WERE +4852-28312-0010-1480: I SAW YOUR SIGN AND I KNOW A BOY WHO NEEDS THE JOB +3764-168671-0042-1075: YOU ARE A PEASANT I AM A PARISIAN +8131-117029-0014-2664: WE SENT TWENTY OTHERS THE SAME WAY AND THEY FAILED +4350-9170-0008-1390: THE ARMY HAS ALWAYS BEEN AND STILL IS THE BASIS OF POWER +3764-168670-0025-1000: YOU ARE NOT LIKE OTHER MEN FATHER MADELEINE +3528-168669-0005-700: THERE IS A STONE TO BE RAISED HEAVY +3528-168669-0049-744: FAUCHELEVENT HELD HIS PEACE SHE WENT ON +1688-142285-0025-25: AND I DO SAID HER FATHER LAUGHING +3538-163622-0012-880: (I AM->I'M) WALKING ABOUT IN SEARCH OF A PLACE SAID (CINDERLAD->SINDA LAD) +8461-281231-0002-2902: AS HE LAY UPON HIS BED (RACKED->WRAPPED) WITH PAIN AND MENTAL AGONY AND FILLED WITH THE FEAR OF RAPIDLY APPROACHING DEATH HE HEARD A VOICE ADDRESS HIM +5764-299665-0004-1808: WHY DID HE CREATE THE DEFORMED AND HELPLESS WHY DID HE CREATE THE CRIMINAL THE IDIOTIC THE INSANE +6432-63722-0025-2064: HE FOUND THE PLACE WITHOUT A LIGHT IN IT STUMBLED OVER SOMETHING ON THE FLOOR AND THERE WAS ALI'S BODY WITH THE HEAD BUSTED IN AND THIS HEAVY CANDLESTICK NEAR IT +7105-2330-0019-2259: SAID THE PRIME MINISTER WE CAN HARDLY BE SUPPOSED TO SUPPLY A RELEASED PRISONER WITH A BRASS BAND HOW ON EARTH COULD WE DEFEND IT ON THE ESTIMATES +5442-41169-0019-1719: AND WHAT'S MORE THE LANDOWNER WENT ON LEANING HIS ELBOWS ON THE WINDOW AND CHATTING ON MY SON I MUST TELL YOU HAS NO TASTE FOR IT +5484-24317-0032-1764: THOUGH SO LOUD A DENIAL IS WRITTEN ON YOUR FACE I PERSIST IN MY CONVICTION AND THAT NO IDLE DELUSION (ENSNARES->ENSNAS) ME I CAN PROVE +1998-15444-0002-98: HE SHOULD NOTICE THE POSITION AND TEMPERATURE OF THE BODY THE CONDITION OF RIGOR MORTIS MARKS OF VIOLENCE APPEARANCE OF LIPS AND MOUTH +3764-168671-0040-1073: THE GOOD GOD CONSULTED HIS NOTE BOOK WHICH SHOWS WHEN THE TIME IS UP IT WAS FATHER (MESTIENNE'S->MAIAN'S) TURN FATHER MESTIENNE DIED +1688-142285-0024-24: PAPA I DO THINK MISTER THORNTON A VERY REMARKABLE MAN BUT PERSONALLY I DON'T LIKE HIM AT ALL +7105-2340-0021-2303: HOW ON EARTH ARE WE TO KNOW SAID PETER THE MEAN PIG HASN'T BROUGHT US A PRESENT AND I'M HANGED IF HE SHALL CARRY ONE OFF +6070-86744-0007-1928: I RESIDE IN MY FATHER'S HOUSE BUT OCCUPY A PAVILION AT THE FARTHER SIDE OF THE (COURT YARD->COURTYARD) ENTIRELY SEPARATED FROM THE MAIN BUILDING +1998-15444-0017-113: THIS PROCESS IS BASED UPON THE PRINCIPLE THAT THE SALTS OF THE (ALKALOIDS->AKALITES) ARE SOLUBLE IN (ALCOHOL AND->ALKALINE) WATER AND INSOLUBLE IN ETHER +6128-63244-0005-2018: ALL SORTS OF INFERIOR PEOPLE LIVED THERE AND SO BRILLIANT A WOMAN AS MISSUS (FARRINDER->FARINNDER) WHO LIVED AT (ROXBURY->BRAXBURY) OUGHT NOT TO MIX THINGS UP +2414-128291-0001-264: HE ASKED HIMSELF SOMETHING WARM AND LIVING QUICKENETH ME IT MUST BE IN (THE NEIGHBOURHOOD->THAT NEIGHBORHOOD) +3538-163622-0011-879: THE TWO BROTHERS LAUGHED AT HIM AND HIS FATHER AND MOTHER BEGGED HIM NOT TO GO BUT ALL TO NO PURPOSE AND (CINDERLAD->SINDERLAD) SET OUT ON HIS WAY +3528-168669-0078-773: MARTIN THE ELEVENTH GENERAL OF THE CARTHUSIANS GAVE TO HIS ORDER THIS DEVICE STAT (CRUX DUM VOLVITUR->CREW DUME VOLVETER) ORBIS +4198-61336-0019-1254: THUS WAS (URARTU->URYTU) CRIPPLED AND HUMILIATED IT NEVER REGAINED ITS FORMER PRESTIGE AMONG THE NORTHERN STATES +3997-182399-0010-1164: SO WE UNS SIT ON THE CHIMNEY TOPS WHENEVER (OL->OLE) JACK FROST GETS TO STRAYING DOWN WHERE HE HAVE NO BUSINESS +4294-14317-0002-1268: IT WAS AN EXTREMELY DIFFICULT TASK AND I WAS ANXIOUS TO OBSERVE ALL THE NICETIES OF ART WHICH I HAD LEARNED SO AS NOT TO LAPSE INTO SOME ERROR +4350-9170-0006-1388: THE POSSIBILITY OF APPLYING BODILY VIOLENCE TO PEOPLE IS PROVIDED ABOVE ALL BY AN ORGANIZATION OF ARMED MEN TRAINED TO ACT IN UNISON (IN->AND) SUBMISSION TO ONE WILL +6432-63723-0055-2154: I WENT OVER THEM THE OTHER DAY AND NOTICED SOME WERE MISSING THOUGH I SAW THEM ALL WHEN I PAID A VISIT TO HER JUST A SHORT TIME BEFORE SHE WAS KILLED +7018-75789-0008-2216: O MY BROTHER ANSWERED HE WE ARE HUSBANDMEN AND TILLERS OF THE SOIL WHO CAME OUT TO WATER OUR FIELDS AND PLANTATIONS AND FINDING THEE ASLEEP ON THIS RAFT LAID HOLD OF IT AND MADE IT FAST BY US AGAINST THOU SHOULDST AWAKE AT THY LEISURE +4350-9170-0025-1407: THE REASON TO WHICH HE GAVE EXPRESSION IS ESSENTIALLY THE SAME AS THAT WHICH MADE THE FRENCH KINGS AND THE POPES ENGAGE SWISS AND SCOTCH GUARDS AND MAKES THE RUSSIAN AUTHORITIES OF TO DAY SO CAREFULLY DISTRIBUTE THE RECRUITS SO THAT THE REGIMENTS FROM THE FRONTIERS ARE STATIONED IN CENTRAL DISTRICTS AND THE REGIMENTS FROM THE CENTER ARE STATIONED ON THE FRONTIERS +8461-281231-0021-2921: BEFORE LONG THE TOWERING FLAMES HAD SURMOUNTED EVERY OBSTRUCTION AND ROSE TO THE EVENING SKIES ONE HUGE AND BURNING BEACON SEEN FAR AND WIDE THROUGH THE ADJACENT COUNTRY TOWER AFTER TOWER CRASHED DOWN WITH BLAZING ROOF AND RAFTER +5764-299665-0007-1811: IS HE RESPONSIBLE FOR THE CENTURIES OF SLAVERY FOR THE BACKS THAT HAVE BEEN SCARRED WITH (THE->A) LASH FOR THE BABES THAT HAVE BEEN SOLD FROM THE BREASTS OF MOTHERS FOR THE FAMILIES THAT HAVE BEEN SEPARATED AND DESTROYED +3331-159609-0001-657: FORTUNATELY EVERY ONE WAS SO BUSY WITH THE NECESSARY PREPARATIONS THAT THERE WAS NO TIME FOR ROMANCE OF ANY SORT AND THE FOUR YOUNG PEOPLE WORKED TOGETHER AS SOBERLY AND SENSIBLY AS IF ALL SORTS OF EMOTIONS WERE NOT BOTTLED UP IN THEIR RESPECTIVE HEARTS +6070-86744-0026-1947: I CONFESS HE ASKED ME NONE NO HE MERELY CAME AND FREED ME FROM THE HANDS OF SIGNOR VAMPA WHERE I CAN ASSURE YOU IN SPITE OF ALL MY OUTWARD APPEARANCE OF EASE AND UNCONCERN I DID NOT VERY PARTICULARLY CARE TO REMAIN +2033-164915-0003-237: AFTER (A WHILE->AWHILE) THEY CAME TO THEMSELVES AND NUZHAT AL ZAMAN REJOICED WITH EXCEEDING JOY OPPRESSION AND DEPRESSION LEFT HER AND GLADNESS TOOK THE MASTERY OF HER AND SHE REPEATED THESE VERSES +8280-266249-0060-2861: OH HOW KIND HOW VERY KIND MISSUS DALY SAID WITH TEARS OF JOY AND GRATITUDE WE HAVE HARDLY KNOWN HOW WE SHOULD MEET THE MOST NECESSARY EXPENSES OF THIS TRIP BUT HAVE BEEN TRYING TO CAST OUR CARE UPON THE LORD ASKING HIM TO PROVIDE +533-131562-0009-1616: I SHALL PUT YOU UPON A SMALL MONTHLY ALLOWANCE IN FUTURE FOR YOUR OWN PRIVATE EXPENSES AND YOU NEEDN'T TROUBLE YOURSELF ANY MORE ABOUT MY CONCERNS I SHALL LOOK OUT FOR A STEWARD MY DEAR I WON'T EXPOSE YOU TO (THE->*) TEMPTATION +2414-159411-0004-327: THEN THE BRAHMAN TOOK PITY ON HIM AND OPENED THE CAGE DOOR BUT NO SOONER HAD HE (DONE->TURNED) SO THAN THE TIGER JUMPING OUT SAID NOW I WILL EAT YOU FIRST AND DRINK THE WATER AFTERWARDS +7018-75789-0024-2232: SHE SAID IT HATH REACHED ME O AUSPICIOUS KING THAT WHEN SINDBAD THE SEAMAN HAD RELATED THE HISTORY OF WHAT (BEFEL->BEFELL) HIM IN HIS SIXTH VOYAGE AND ALL THE COMPANY HAD DISPERSED SINDBAD THE LANDSMAN WENT HOME AND SLEPT AS OF WONT +3331-159605-0000-609: SHE PULLED HER HAIR DOWN TURNED HER SKIRT BACK PUT HER FEET ON THE FENDER AND TOOK (PUTTEL->PATTER) INTO HER LAP ALL OF WHICH ARRANGEMENTS SIGNIFIED THAT SOMETHING VERY IMPORTANT HAD GOT TO BE THOUGHT OVER AND SETTLED +8280-266249-0028-2829: NO DOUBT NO DOUBT RETURNED MISTER (LILBURN->LOWBURN) BUT IF THY RIGHT EYE OFFEND THEE PLUCK IT OUT AND CAST IT FROM THEE FOR IT IS PROFITABLE FOR THEE THAT ONE OF THY MEMBERS SHOULD PERISH AND NOT THAT THY WHOLE BODY SHOULD BE CAST INTO HELL +3080-5040-0019-594: I SEE YOU CAN (CHIDE->CHIT) WHEN YOU PLEASE AND WITH AUTHORITY BUT I DESERVE IT I CONFESS AND ALL I CAN SAY FOR MYSELF IS THAT MY FAULT PROCEEDED FROM A VERY GOOD PRINCIPLE IN ME +2609-156975-0038-399: THE CONSTANT NECESSITY OF MEETING THE DANGERS OF THE WILDERNESS AND OF DEFENDING THE FLOCKS (ENTRUSTED->INTRUSTED) TO MOSES CARE DEVELOPED HIS COURAGE AND POWER OF LEADERSHIP AND ACTION +3538-163619-0011-864: OUT ON (THEE->ME) UGLY BUSHY BRIDE SLEEPING SO SOFT BY THE YOUNG KING'S SIDE ON SAND AND STONES MY BED I MAKE AND MY BROTHER SLEEPS WITH (THE->A) COLD SNAKE UNPITIED AND UNWEPT +7105-2340-0007-2289: FROM HIS LATE (SCHOOLDAYS->SCHOOL DAYS) ONWARD HE HAD BEEN POSSESSED BY AN ACUTE AND OBSTINATE FORM OF (KLEPTOMANIA->CLAPTOMANIA) HE HAD THE ACQUISITIVE INSTINCT OF THE COLLECTOR WITHOUT ANY OF THE COLLECTOR'S DISCRIMINATION +5484-24318-0013-1779: LASTLY WITH EARNEST WARMTH SHE BESOUGHT HIM BEFORE TAKING THE PRISONERS AWAY TO PERMIT HER TO SPEAK TO THE COMMANDING GENERAL PHILIPPUS HER FATHER'S GUEST WHO SHE WAS CERTAIN WAS IN THE PALACE +8461-281231-0019-2919: BUT IN OTHER PARTS THE BESIEGERS PURSUED THE DEFENDERS OF THE CASTLE FROM CHAMBER TO CHAMBER AND SATIATED IN THEIR BLOOD THE VENGEANCE WHICH HAD LONG ANIMATED THEM AGAINST THE SOLDIERS OF THE TYRANT FRONT DE BOEUF +6070-86744-0008-1929: NOW THEN SAID THE COUNT RETURNING HIS TABLETS TO HIS POCKET MAKE YOURSELF PERFECTLY EASY THE HAND OF YOUR TIME PIECE WILL NOT BE MORE ACCURATE IN MARKING THE TIME THAN MYSELF +4350-9170-0052-1434: THE GREAT EXTENSION OF MEANS OF COMMUNICATION AND INTERCHANGE OF IDEAS HAS MADE MEN COMPLETELY ABLE TO DISPENSE WITH STATE AID IN FORMING SOCIETIES ASSOCIATIONS CORPORATIONS AND CONGRESSES FOR SCIENTIFIC ECONOMIC AND POLITICAL OBJECTS +6128-63244-0006-2019: SHE KNEW HER PLACE IN THE BOSTON HIERARCHY AND IT WAS NOT WHAT MISSUS (FARRINDER->FARRINGERS) SUPPOSED SO THAT THERE WAS A WANT OF PERSPECTIVE IN TALKING TO HER AS IF SHE HAD BEEN A REPRESENTATIVE OF THE ARISTOCRACY +533-131556-0003-1584: I SOMETIMES THINK I OUGHT TO GIVE HIM CREDIT FOR THE GOOD FEELING HE SIMULATES SO WELL AND THEN AGAIN I THINK IT IS MY DUTY TO SUSPECT HIM UNDER THE PECULIAR CIRCUMSTANCES IN WHICH I AM PLACED +2609-169640-0000-415: (PROAS->PEROCKS) IN THAT QUARTER WERE USUALLY (DISTRUSTED->DESTRUCTED) BY (*->THE) SHIPS IT IS TRUE BUT THE SEA IS FULL OF THEM (AND->*) FAR MORE ARE INNOCENT THAN ARE GUILTY OF ANY ACTS OF VIOLENCE +4350-9170-0037-1419: BUT WITH UNIVERSAL MILITARY SERVICE IT COMES TO PASS THAT MEN AFTER MAKING EVERY SACRIFICE TO GET RID OF THE CRUELTY OF STRIFE AND THE INSECURITY OF EXISTENCE ARE CALLED UPON TO FACE ALL THE PERILS THEY HAD MEANT TO AVOID +3080-5040-0021-596: YOU ARE SATISFIED I HOPE (ERE->AT) THIS THAT I (SCAPED->ESCAPED) DROWNING +7975-280076-0012-2501: I THEN WENT TO ARKANSAS AND RETURNED TO SAINT CLAIR COUNTY ABOUT THE FIRST OF MAY +3538-142836-0025-851: THE (SPADDLE->SPATTLE) IS GENERALLY MADE OF COPPER KEPT BRIGHT AND CLEAN +4198-12259-0021-1196: IT IS THE COMPULSORY OF DRINKERS IT IS A (PULLEY->PULLY) +7975-280063-0009-2485: MAJOR FOSTER IN A LETTER TO JUDGE GEORGE M (BENNETT->BENARD) OF MINNEAPOLIS SAID +2414-128292-0007-297: BUT (DOTH->DO) ZARATHUSTRA NEED TO BE FRIGHTENED BY HIS SHADOW +1998-29455-0020-191: THEY WENT ON UP THE HILL AS HAPPY AS (ANY ONE->ANYONE) NEED WISH TO BE +1998-29454-0007-131: WHAT'S UP (MATEY->MAITIE) LOST YOUR WAY DICKIE EXPLAINED +3005-163391-0017-507: OLD PETER HAD FRIENDS AND THEY AIN'T GOING TO LET THEM COME TO NO HARM +2609-169640-0016-431: THE STILLNESS THAT REIGNED ON BOTH SIDES WAS LIKE THAT OF DEATH +3997-180294-0018-1106: WHEN I REACHED HOME I WAS IN A STATE OF MAD GAIETY +3528-168669-0021-716: AND TO HOLD YOUR PEACE ABOUT EVERYTHING YES (REVEREND->REVERED) MOTHER +2609-169640-0017-432: THE (JOHN->GIAN) BEHAVED BEAUTIFULLY (AND->HE) CAME (ROUND->AROUND) LIKE A (TOP->TART) +1688-142285-0027-27: I DON'T SET HIM UP FOR A HERO OR ANYTHING OF THAT KIND +4294-35475-0010-1301: SCISSORS GROW A GIANT'S HEIGHT AND SAVE US FROM THE OGRE'S MIGHT +6938-70848-0028-2185: THE WILL OF MILLIONS OF WORKERS IS NOW CONCENTRATED IN (THIS->THE) HALL +3080-5032-0003-551: WHY (DID->DIDN'T) YOU NOT SEND ME THAT NEWS AND A GARLAND +6432-63723-0011-2110: HE REMEMBERED THAT CYNTHIA AND GRAFTON HAD ONCE BEEN IN LOVE WITH EACH OTHER +4198-12259-0006-1181: WHAT IT SEEMS I DO NOT DRINK BUT (BY->BUY) AN ATTORNEY +6432-63723-0012-2111: SHE SAID HE HAD STRUCK HER MORE THAN ONCE AND SHE COULD STAND IT NO LONGER +3005-163399-0005-522: WELL IT'S LUCKY BECAUSE SOMETIMES PEOPLE DO GET HURT +1998-29455-0036-207: DICKIE WAS FULL OF QUESTIONS BUT MISTER (BEALE->BELL) HAD NO ANSWERS FOR THEM +5442-41169-0006-1706: LAST YEAR AT OUR DISTRICT MARSHAL NIKOLAY IVANOVITCH'S +6070-86745-0009-1960: NO NO MY DEAR FELLOW DO NOT CONFOUND OUR PLANS +1688-142285-0071-71: SHE PUT HER HAND TO IT AND BECAME GHASTLY PALE +3528-168669-0051-746: FORTUNATELY THE PRIORESS COMPLETELY ABSORBED IN HER OWN THOUGHTS DID NOT HEAR IT +6432-63722-0011-2050: YOU'RE ON THE DARCY CASE THEY TELL ME IN A WAY YES +3528-168669-0126-821: AH (THE DE->VIDA) EXCLAIMED FAUCHELEVENT +3764-168671-0043-1076: FAUCHELEVENT THOUGHT I AM LOST +6938-70848-0014-2171: FEARFUL TUMULT (CRIES->CHRIST) DOWN WITH THE BOLSHEVIKI +3538-163622-0013-881: I WOULD MUCH RATHER HAVE THE PRINCESS SAID CINDERLAD +3528-168669-0066-761: YOU WILL HAVE AN IRON BAR YES BUT +7902-96595-0011-2440: IF SHE KNEW EVIL HAD COME TO THE POOR LAD HER FACE WOULD TELL TALES LIKE PRINT +7902-96594-0029-2425: PIPE AWAY (THE MEN TO->THEM INTO) THAT BOAT THERE HE SAID (AND->*) AS THE CREW SPRANG IN +8188-269290-0040-2771: I WILL TALK WITH YOU (BELLE ACHESON->BELL ARCHISON) PRESENTLY SHE SAID +1688-142285-0026-26: PERSONALLY AS YOU CALL IT AND ALL +3005-163399-0020-537: HAS HE COME NO SAYS HER HUSBAND +7018-75789-0023-2231: WHEN IT WAS THE FIVE HUNDRED AND SIXTY THIRD NIGHT +7975-280076-0026-2515: I STAYED THERE UNTIL THE EIGHTH OF FEBRUARY EIGHTEEN SEVENTY FOUR +7105-2330-0036-2276: LEND US THE INSTRUMENTS SAID THE CHIEF (ORGANISER->ORGANIZER) +3528-168669-0006-701: (THE->THIS) SLAB OF THE PAVEMENT WHICH IS AT THE SIDE OF THE ALTAR +6938-70848-0029-2186: A NEW HUMANITY WILL BE BORN OF THIS WAR +4350-10919-0013-1361: AND HOW ABOUT A TOUR ABROAD ASKED THE FAMILY DOCTOR +8131-117016-0035-2590: COLONEL THEY'D KILL ME I DON'T KNOW +5764-299665-0052-1856: THEY MUFFLED ALL THE BELLS OF GLADNESS +2414-159411-0001-324: THE BRAHMAN ANSWERED NO I WILL NOT FOR IF I LET YOU OUT OF THE CAGE YOU WILL EAT ME +1998-29455-0017-188: THE TWO TRAVELLERS WERE LEFT FACING EACH OTHER THE RICHER BY A PENNY AND OH WONDERFUL GOOD FORTUNE A WHOLE HALF CROWN +533-131556-0002-1583: THE WORD STARES ME IN THE FACE LIKE A GUILTY CONFESSION BUT IT IS TRUE I HATE HIM I HATE HIM +3005-163390-0014-473: WELL THAT'S WHAT I'M A SAYING ALL KINGS IS MOSTLY RAPSCALLIONS AS FUR AS I CAN MAKE OUT IS (DAT->THAT) SO +3997-180294-0000-1088: THE DUKE COMES EVERY MORNING THEY WILL TELL HIM WHEN HE COMES THAT I AM ASLEEP AND PERHAPS HE WILL WAIT UNTIL I (WAKE->AWAKE) +4852-28311-0005-1448: MIKE BECAME UNEASY AND FISHED AN ELASTIC BAND OUT OF HIS POCKET MADE A FLICK OF PAPER AND SENT IT SOARING OUT INTO (M->EM) STREET +3538-142836-0022-848: THE SHAPE OF THE DISHES VARIES AT DIFFERENT PERIODS THE PREVAILING FASHION AT PRESENT BEING OVAL AND CIRCULAR DISHES ON STEMS +3080-5032-0001-549: I KNEW YOU COULD NOT CHOOSE BUT LIKE HER BUT YET LET ME TELL YOU YOU HAVE SEEN BUT THE WORST OF HER +8461-278226-0004-2888: I REMEMBER ALL THE RUBENSES AT THE LOUVRE FOR I SAW THEM THREE YEARS AGO WHEN I WAS STAYING IN PARIS WITH GRANDPAPA +367-130732-0019-939: HEAT THIS IN A STEWPAN AND WHEN SIMMERING ADD THE SHERRY AND CRAB MEAT AND LET ALL COOK TOGETHER WITH A SLOW FIRE FOR EIGHT MINUTES +7902-96594-0027-2423: HE SWUNG ROUND WALKED AFT AND BEGAN SWEEPING (THE SHORE->ASHORE) AGAIN WITH HIS GLASS WHILE THE MASTER AND DICK EXCHANGED GLANCES WHICH MEANT A GREAT DEAL +1688-142285-0084-84: (WHY->WELL) I (*->I) WOULD APPLY (*->IT) TO SOME GOOD (HOUSE MOTHER->HOUSEMOTHER) TO RECOMMEND ME ONE KNOWN TO HERSELF OR HER SERVANTS +2609-169640-0014-429: I HEARD THE RATTLING OF THE BOARDING (PIKES->PIPES) TOO AS THEY WERE CUT ADRIFT FROM THE SPANKER BOOM AND FELL UPON THE DECKS +3538-142836-0007-833: BEFORE SUGAR WAS IN USE HONEY WAS EMPLOYED TO PRESERVE MANY VEGETABLE PRODUCTIONS THOUGH THIS SUBSTANCE HAS NOW GIVEN WAY TO THE JUICE OF THE SUGAR CANE +7975-280084-0010-2528: EVERY TIME I SAW (ANY ONE->ANYONE) WITH A BEAD ON ME I WOULD DROP OFF MY HORSE AND TRY TO DRIVE THE SHOOTER INSIDE BUT I COULD NOT SEE IN EVERY DIRECTION +8280-266249-0027-2828: IT MUST REQUIRE A GOOD DEAL OF RESOLUTION FOR ONE WHO HAS BECOME FOND OF THE INDULGENCE TO GIVE IT UP REMARKED MISTER (DALY->DALEY) +6128-63240-0017-1988: THIS LAST REMARK HE MADE AT (A->HER) VENTURE FOR HE HAD NATURALLY NOT DEVOTED ANY SUPPOSITION WHATEVER TO MISSUS (LUNA->LOINER) +1998-15444-0003-99: IN MAKING A POST MORTEM EXAMINATION THE (ALIMENTARY->ELEMENTARY) CANAL SHOULD BE REMOVED AND PRESERVED FOR FURTHER INVESTIGATION +7975-280076-0009-2498: AT THE TIME OF THE (GALLATIN->GLLOTON) BANK ROBBERY I WAS GATHERING CATTLE IN ELLIS COUNTY TEXAS CATTLE (THAT->HAD) I BOUGHT FROM (PLEAS->PLEDS) TAYLOR AND RECTOR +7105-2330-0018-2258: HE SAYS HE NEVER HAS LEFT PRISON WITHOUT A (BRASS->BREAST) BAND TO PLAY HIM OUT AND HE'S NOT GOING TO GO WITHOUT ONE NOW +2033-164915-0014-248: SO FARE YE FORWARDS NO HARM SHALL (BEFAL->BEFALL) YOU TILL YOU JOIN HIS GRAND WAZIR DANDAN +2609-156975-0008-369: THE BIBLICAL NARRATIVES APPARENTLY DISAGREE REGARDING THE DURATION OF THE SOJOURN IN EGYPT +3005-163399-0016-533: I OPENED MY MOUTH TO BEGIN BUT SHE GRABBED ME AND HUSTLED ME IN BEHIND THE BED AND SAYS HERE (HE->IT) COMES +6070-86745-0007-1958: GOOD MORNING LUCIEN GOOD MORNING SAID ALBERT YOUR PUNCTUALITY REALLY ALARMS ME +4198-12259-0008-1183: IF I DRINK NOT I AM A GROUND DRY GRAVELLED AND SPENT I AM STARK DEAD WITHOUT DRINK AND MY SOUL READY TO FLY INTO SOME MARSH AMONGST FROGS THE SOUL NEVER DWELLS IN A DRY PLACE (DROUTH KILLS->DROUGHTH KILLETH) IT +8280-266249-0016-2817: THE HALF DOZEN CIGARS WHICH MOST SMOKERS USE A DAY CONTAIN SIX OR SEVEN GRAINS ENOUGH IF CONCENTRATED AND ABSORBED TO KILL THREE MEN AND A POUND OF TOBACCO ACCORDING TO ITS QUALITY CONTAINS FROM ONE QUARTER TO ONE AND A QUARTER OUNCES +5442-41168-0021-1693: IN REPLY SNETKOV SPOKE OF THE TRUST THE NOBLEMEN OF THE PROVINCE HAD PLACED IN HIM THE AFFECTION THEY HAD SHOWN HIM WHICH HE DID NOT DESERVE AS HIS ONLY MERIT HAD BEEN HIS ATTACHMENT TO THE NOBILITY TO WHOM HE HAD DEVOTED TWELVE YEARS OF SERVICE +4294-9934-0000-1318: HE FELT WHAT THE EARTH MAY POSSIBLY FEEL AT THE MOMENT WHEN IT IS TORN OPEN WITH THE IRON IN ORDER THAT GRAIN MAY BE DEPOSITED WITHIN IT IT FEELS ONLY THE WOUND THE QUIVER OF THE GERM (AND->*) THE JOY OF THE FRUIT ONLY ARRIVE LATER +6128-63240-0007-1978: THESE THINGS THE EYES ESPECIALLY WITH THEIR SMOULDERING FIRE MIGHT HAVE INDICATED THAT HE WAS TO BE A GREAT AMERICAN STATESMAN OR ON THE OTHER HAND THEY MIGHT SIMPLY HAVE PROVED THAT HE CAME FROM CAROLINA OR ALABAMA +6432-63723-0029-2128: THEN IT WAS SAID OF (LARCH->LARGE) THAT SOON AFTER THE ECHOES OF THE WEDDING CHIMES HAD DIED AWAY HE HAD BEGUN TO TREAT HIS WIFE WITH (*->A) REFINED CRUELTY THAT HIDDEN AWAY FROM THE PUBLIC UNDERNEATH HIS HABITUAL MANNER THERE WAS THE RAWNESS OF THE BRUTE +5484-24318-0017-1783: AS SOON AS THE CAPTIVE ARTIST WAS ALONE WITH THE WOMAN HE LOVED HE CLASPED HER HAND POURING FORTH INCOHERENT WORDS OF THE MOST ARDENT GRATITUDE AND WHEN HE FELT HER WARMLY RETURN THE PRESSURE HE COULD NOT RESTRAIN THE DESIRE TO CLASP HER TO HIS HEART +7105-2330-0038-2278: IT WAS A TUNE THEY HAD ALL HEARD HUNDREDS OF TIMES SO THERE WAS NO DIFFICULTY IN TURNING OUT A PASSABLE IMITATION OF IT TO THE IMPROVISED (STRAINS->TRAINS) OF I (DIDN'T->DON'T) WANT TO DO IT THE (PRISONER STRODE->PRISONERS STROLLED) FORTH TO FREEDOM +3764-168671-0000-1033: ON THE FOLLOWING DAY AS THE SUN WAS DECLINING THE VERY RARE PASSERS BY ON THE BOULEVARD (DU MAINE->DES MAIN) PULLED OFF THEIR HATS TO AN OLD FASHIONED HEARSE ORNAMENTED WITH SKULLS CROSS BONES AND TEARS +3331-159609-0002-658: PITY THAT THE END SHOULD COME SO SOON BUT THE HOUR DID ITS WORK AND WENT ITS WAY LEAVING A CLEARER ATMOSPHERE BEHIND THOUGH THE YOUNG FOLKS DID NOT SEE IT THEN FOR THEIR EYES WERE DIM BECAUSE OF THE PARTINGS THAT MUST BE +2033-164916-0002-254: WHEN THE MINISTER HEARD THESE WORDS HE REJOICED WITH GREAT JOY AND SAID O CHAMBERLAIN TELL ME THE TALE OF THE TWAIN AND WHAT (BEFEL->BEFELL) THEM AND THE CAUSE OF THEIR LONG ABSENCE +4198-12259-0004-1179: BY THE BELLY OF (SANCT->SAINT) BUFF LET US TALK OF OUR DRINK I NEVER DRINK BUT AT MY HOURS LIKE THE POPE'S MULE +7018-75789-0021-2229: SUCH THEN O MY BROTHERS IS THE HISTORY OF WHAT (BEFEL->BEFELL) ME IN MY SIXTH VOYAGE AND TO MORROW INSHALLAH +3005-163389-0005-445: BECAUSE THEY'RE AFRAID THE MAN'S FRIENDS WILL SHOOT THEM IN THE BACK IN THE (DARKAND->DARK AND) IT'S JUST WHAT THEY WOULD DO +3080-5032-0017-565: IF MARRIAGE AGREES NO BETTER WITH OTHER PEOPLE THAN IT DOES WITH HIM I SHALL PRAY THAT ALL MY FRIENDS MAY (SCAPE->ESCAPE) IT +4350-9170-0038-1420: BUT INSTEAD OF DOING THAT THEY EXPOSE THE INDIVIDUALS TO THE SAME NECESSITY OF STRIFE SUBSTITUTING STRIFE WITH INDIVIDUALS OF OTHER STATES FOR STRIFE WITH NEIGHBORS +2414-159411-0032-355: WHEN THE BRAHMAN HAD DONE THIS THE JACKAL SAID OH YOU WICKED AND UNGRATEFUL TIGER +1688-142285-0010-10: HIS FATHER SPECULATED WILDLY FAILED AND THEN KILLED HIMSELF BECAUSE HE COULD NOT BEAR THE DISGRACE +1998-15444-0019-115: TWO COOL THE MIXTURE AND FILTER WASH THE RESIDUE WITH STRONG ALCOHOL AND MIX THE (FILTRATES->FULL TRADES) +6070-86745-0008-1959: YOU WHOM I EXPECTED LAST YOU ARRIVE AT FIVE MINUTES TO TEN WHEN THE TIME FIXED WAS HALF PAST +1998-29455-0019-190: YOU STICK TO THAT SAID BEALE RADIANT WITH DELIGHT YOU'RE A FAIR MASTERPIECE YOU ARE YOU EARNED IT HONEST IF EVER (A KID->KIT) DONE +8188-269288-0051-2724: SHE TURNED OFF THE LIGHT AND LIT A CANDLE WHICH SHE PUT BEHIND HER SCREEN THEN PREPARED TO GET INTO BED +4198-12259-0035-1210: I HAVE A REMEDY AGAINST THIRST QUITE CONTRARY TO THAT WHICH IS GOOD AGAINST THE BITING OF A MAD DOG +4294-14317-0003-1269: I IN MY TURN FEEL THE SAME DESIRE AND HOPE TO PLAY MY PART LIKE THEM THEREFORE MY LORD GIVE ME THE LEAVE TO GO +4294-35475-0009-1300: AT THIS MOMENT THERE WAS A DISTANT RUMBLING AS OF THUNDER TIS THE OGRE CRIED THE FAIRY WE MUST HASTEN +3538-142836-0009-835: BOIL THEM UP THREE DAYS SUCCESSIVELY SKIMMING EACH TIME AND THEY WILL THEN BE FINISHED AND IN A STATE FIT TO BE PUT INTO POTS FOR USE +7105-2330-0034-2274: DEMANDED THE CHIEF ORGANISER OF THE PRISON GOVERNOR DRUMS CYMBALS THOSE SORT OF THINGS +4198-61336-0004-1239: A FIERCE BATTLE ENSUED AND ONE OF ITS DRAMATIC INCIDENTS WAS A SINGLE COMBAT BETWEEN THE RIVAL KINGS +3080-5040-0020-595: WE DARE NOT LET OUR TONGUES LIE MORE ON ONE SIDE OF OUR (MOUTHS->MOUTH) THAN (T'OTHER->THE OTHER) FOR FEAR OF OVERTURNING IT +6128-63240-0003-1974: JUST AS I AM THE VISITOR INQUIRED PRESENTING HIMSELF WITH RATHER A (WORK A DAY->WORKADAY) ASPECT +3764-168671-0011-1044: THE NUNS HAD COMMITTED IT NOT ONLY WITHOUT DIFFICULTY BUT EVEN WITH THE APPLAUSE OF THEIR OWN CONSCIENCES +4852-28330-0009-1539: HIS FACE (FROZE->ROSE) WITH NERVOUSNESS THAT THIS MIGHT (NOT->*) DO (*->OUTDO) AS AN ANSWER AND HE STOOD STIFF AND STILL BEFORE CAPTAIN BLIZZARD +2033-164914-0009-220: HE WHO RECITED THE FIRST TIME HATH RECITED A SECOND TIME AND I HEARD HIM HARD BY +533-131564-0020-1644: IF YOU INTEND TO REFORM INVOKE GOD'S BLESSING (HIS->IS) MERCY (AND HIS AID->IN THIS APE) NOT (HIS CURSE->DISCURSE) +2414-159411-0017-340: THEN THE TIGER BEGAN TO ROAR AND SAID (THE->*) JUDGMENT OF ALL IS AGAINST YOU O BRAHMAN +4852-28312-0028-1498: THE POST WAS PLANTED AT THE EDGE OF WHAT WAS NOW A BROAD AND MUDDY ROAD +3997-180294-0004-1092: I DON'T KNOW HOW IT IS BUT IT SEEMS TO ME AS IF I DO +367-130732-0008-928: PUT THIS IN A DOUBLE BOILER AND LET COOK UNTIL THICK STIRRING CONSTANTLY +367-130732-0023-943: ALL (OF->*) THE BETTER CLASS RESTAURANTS HOWEVER WILL SERVE THEM IF YOU ORDER THEM +7902-96595-0013-2442: THE MAN SHOOK HIS HEAD AND STARED AS IF HE DIDN'T HALF UNDERSTAND THE DRIFT OF WHAT WAS SAID +3331-159605-0033-642: (WAGGING->WORKING) TO AND FRO AS USUAL WHAT'S THE NEWS WITH YOU +4198-61336-0023-1258: HE CONDEMNED ISRAEL FOR ITS IDOLATRIES AND CRIED +533-131556-0021-1602: GIVE ME NO THANKS IT IS NOT FOR YOUR SAKE THAT I REFRAIN +3528-168669-0128-823: HE MADE HASTE TO IMPROVISE AN EXPEDIENT TO MAKE HER FORGET THE OATH +3997-180297-0031-1153: I EMBRACED MARGUERITE UNTIL SHE WAS ALMOST STIFLED +6938-70848-0030-2187: I GREET YOU WITH THE CHRISTENING OF A NEW RUSSIAN LIFE AND FREEDOM +7975-280057-0017-2472: FROM HARRISONVILLE SHE WENT TO (WAVERLY->WAVERLEY) WHERE SHE WAS (HOUNDED->HOUNDY) CONTINUALLY +7975-280076-0013-2502: (I->AND) WENT TO KANSAS WHERE OUR CATTLE WERE IN WOODSON COUNTY AT COLONEL (RIDGE'S->RIDGES) +5764-299665-0023-1827: MOST PEOPLE CLING TO THE SUPERNATURAL +3764-168671-0014-1047: A PRINCE IS NOTHING IN THE PRESENCE OF A PRINCIPLE +1688-142285-0058-58: REMEMBER WHO GAVE IT (*->TO) YOU AND MADE IT WHAT IT IS +3005-163391-0003-493: JIM CLEANED UP THE CANOE AND I GOT MY PADDLE READY +3764-168670-0057-1032: THAT IS SETTLED FATHER FAUCHELEVENT ALL WILL GO WELL +5442-41168-0020-1692: BUT THE OLD PARTY DID NOT CONSIDER THEMSELVES CONQUERED +3080-5040-0023-598: HOW WELCOME YOU WILL BE BUT ALAS +7902-96592-0037-2382: WHY YOUR CLOTHES DON'T FIT YOU AND YOUR (CAP'S->CAPS) PUT ON ALL (SKEW REW->SKEWER) +4852-28312-0013-1483: I I JUST WONDERED IF THE PLACE WAS STILL OPEN +4852-28311-0025-1468: HOW THEN DID HE LIVE AND WHAT DID HE EVER SELL +4350-9170-0011-1393: ONLY UNDER THOSE CONDITIONS COULD THE SOCIAL ORGANIZATION BE JUSTIFIED +4852-28311-0024-1467: HE HAD NEVER SEEN (ANYONE->ANY ONE) GO INTO MISTER WICKER'S SHOP NOW HE THOUGHT OF (IT->THAT) +2414-159411-0035-358: YOUR ROAD LIES THAT WAY AND (MINE->MIND) THIS +3528-168669-0022-717: WHEN THE VAULT IS OPEN I WILL CLOSE IT AGAIN +3528-168669-0007-702: THE (SLAB->SLAP) WHICH CLOSES THE VAULT YES +3080-5032-0020-568: IF I HAD A PICTURE THAT WERE FIT FOR YOU YOU SHOULD HAVE IT +4198-61336-0008-1243: (TIGLATH PILESER->TIGLAS BELEISURE) HAD THEREFORE TO MARCH WESTWARD AGAIN +4294-32859-0002-1287: THE STORY OF (FRIDOLIN->FRIEDOLIN) AND (RETZCH'S->WRETCH'S) PRETTY OUTLINES +4852-28312-0012-1482: (JAKEY HARRIS HIS->JAKIE HARRIS'S) NAME (IS AND->ISN'T) HE REALLY NEEDS THE JOB +8188-269288-0024-2697: I MUST GO INTO THE GROUNDS THE AIR IS STIFLING +3764-168670-0042-1017: JEAN VALJEAN HAD BEEN IN WORSE STRAITS THAN THIS +1998-29454-0024-148: THE SUN SHOT LONG GOLDEN BEAMS THROUGH THE GAPS IN THE HEDGE +3005-163399-0006-523: AND I THINK HE DIED AFTERWARDS HE WAS A BAPTIST +3764-168670-0027-1002: HOW LONG IS THE COFFIN SIX FEET +8280-266249-0031-2832: THE EYES OF THE WHOLE PARTY WERE AT ONCE TURNED IN THAT DIRECTION +3538-163622-0000-868: WILT THOU SERVE ME AND WATCH MY SEVEN FOALS ASKED THE KING +3528-168669-0023-718: BUT BEFORE THAT WHAT REVEREND MOTHER +3997-180294-0019-1107: THE WOMAN BECOMES THE MAN'S MISTRESS AND LOVES HIM +4350-10919-0015-1363: THE FAMILY DOCTOR LISTENED ATTENTIVELY AND RESPECTFULLY +3538-163622-0014-882: AND THUS THEY JOURNEYED ONWARDS A LONG LONG WAY +1688-142285-0043-43: AT LAST SHE SAID IN A LOW VOICE +3528-168656-0005-687: THE RULE OF (FONTEVRAULT->FONTREVALLE) DID NOT FORBID THIS +4294-14317-0014-1280: I AM WILLING TO ENTER INTO COMPETITION WITH THE ANCIENTS AND FEEL ABLE TO SURPASS THEM FOR SINCE THOSE EARLY DAYS IN WHICH I MADE THE MEDALS OF POPE CLEMENT I HAVE LEARNED SO MUCH THAT I CAN NOW PRODUCE FAR BETTER PIECES OF THE KIND I THINK I CAN ALSO OUTDO THE COINS I STRUCK FOR DUKE (ALESSANDRO->ALISANDRO) WHICH ARE STILL HELD IN HIGH ESTEEM IN LIKE MANNER I COULD MAKE FOR YOU LARGE PIECES OF GOLD AND SILVER PLATE AS I DID SO OFTEN FOR THAT NOBLE MONARCH KING FRANCIS OF FRANCE THANKS TO THE GREAT CONVENIENCES HE ALLOWED ME WITHOUT EVER LOSING TIME FOR THE EXECUTION OF COLOSSAL STATUES OR OTHER WORKS OF THE (SCULPTORS->SCULPTOR'S) CRAFT +3997-182399-0006-1160: WAY BACK IN THE DAYS WHEN GRANDPAP BUZZARD HAD HIS (LIL->LITTLE) FALLING OUT WITH (OL->OLD) KING EAGLE AND DONE (FLY->FLIES) SO HIGH HE (SCO'TCH->SCORCHED) THE FEATHERS (OFFEN->OFF) HIS (HAID->HEAD) HE HAD (A->*) COUSIN DID GRANDPAP BUZZARD AND THIS COUSIN WAS (JES->JUST) NATURALLY LAZY AND NO COUNT +3997-180294-0012-1100: WHEN A CREATURE WHO HAS ALL HER PAST TO REPROACH HERSELF WITH IS TAKEN ALL AT ONCE BY A PROFOUND SINCERE IRRESISTIBLE LOVE OF WHICH SHE HAD NEVER FELT HERSELF CAPABLE WHEN SHE HAS CONFESSED HER LOVE HOW ABSOLUTELY THE MAN WHOM SHE LOVES DOMINATES HER +5442-41169-0000-1700: LEVIN DID NOT CARE TO EAT AND HE WAS NOT SMOKING HE DID NOT WANT TO JOIN HIS OWN FRIENDS THAT IS SERGEY IVANOVITCH STEPAN ARKADYEVITCH SVIAZHSKY AND THE REST BECAUSE VRONSKY IN HIS EQUERRY'S UNIFORM WAS STANDING WITH THEM IN EAGER CONVERSATION +5442-32873-0018-1670: LUKE STUCK (ON->HOME) HIS GREASY (WIDEAWAKE->WIDE AWAKE) AND IN A FEW MINUTES MORE THE DOG CART WAS TRUNDLED OUT INTO THE LANE AND THE HORSE HARNESSED WENT BETWEEN THE SHAFTS WITH THAT WONDERFUL CHEERFULNESS WITH WHICH THEY BEAR TO BE CALLED UP UNDER STARTLING CIRCUMSTANCES AT UNSEASONABLE HOURS +367-130732-0002-922: THIS QUESTION (AND->IN) ANSWER MIGHT WELL GO INTO THE (PRIMER->PRIMARY) OF INFORMATION FOR THOSE WHO COME TO SAN FRANCISCO FROM THE EAST FOR WHAT IS CALLED A LOBSTER IN SAN FRANCISCO IS NOT A LOBSTER AT ALL BUT A CRAYFISH +8280-266249-0026-2827: AND AGAIN I BESEECH YOU THEREFORE BRETHREN BY THE MERCIES OF GOD THAT YE PRESENT YOUR BODIES A LIVING SACRIFICE (HOLY->WHOLLY) ACCEPTABLE UNTO GOD WHICH IS YOUR REASONABLE SERVICE +2609-156975-0006-367: IT SEEMS PROBABLE THAT NOT ALL BUT ONLY PART OF THE TRIBES WHICH (ULTIMATELY COALESCED->OUGHT TO MAKE COROTS) INTO THE HEBREW NATION FOUND THEIR WAY TO EGYPT +3331-159605-0028-637: HE WAS QUICKER TO TAKE A (HINT->HAND) THAN SHE HAD EXPECTED AND BEING BOTH PROUD AND GENEROUS RESOLVED TO SETTLE THE MATTER AT ONCE FOR POLLY'S SAKE AS WELL AS HIS OWN +8461-281231-0001-2901: IT WAS ON THEIR JOURNEY TO THAT TOWN THAT THEY WERE OVERTAKEN ON THE ROAD BY CEDRIC AND HIS PARTY IN WHOSE COMPANY THEY WERE AFTERWARDS CARRIED CAPTIVE TO THE CASTLE OF (TORQUILSTONE->TORKELSTONE) +5484-24318-0012-1778: SOMETIMES WITH TOUCHING ENTREATY SOMETIMES WITH IMPERIOUS COMMAND SHE PROTESTED AFTER GIVING HIM HER NAME THAT THIS MATTER COULD BE NOTHING BUT AN UNFORTUNATE MISTAKE +3331-159605-0013-622: AT FIRST SHE TRIED TO THINK SHE COULD BUT UNFORTUNATELY HEARTS ARE SO CONTRARY THAT THEY WON'T BE OBEDIENT TO REASON WILL OR EVEN GRATITUDE +5484-24317-0031-1763: HOW INDIFFERENT YOU LOOK BUT I TELL YOU HER DEEP BLUE EYES FLASHED AS SHE SPOKE THAT SO LONG AS YOU WERE STILL A GENUINE CREATING ARTIST THE CASE WAS DIFFERENT +2414-159411-0014-337: THREE OUT OF THE SIX HAD (GIVEN->KEEPN) JUDGMENT AGAINST THE BRAHMAN BUT STILL HE DID NOT LOSE ALL HOPE AND DETERMINED TO ASK THE OTHER THREE +4294-14317-0017-1283: NOW I HOPE TO SHOW THEM THAT I AM AN OLD SCULPTOR IF GOD SHALL GRANT ME THE BOON OF FINISHING MY PERSEUS FOR THAT NOBLE PIAZZA OF HIS MOST ILLUSTRIOUS EXCELLENCY +7975-280063-0006-2482: JACKMAN WITH A PARTY OF THIRTY SEASONED MEN CHARGED THE INDIANA GUNS AND CAPTURED THEM BUT MAJOR FOSTER LED A GALLANT CHARGE AGAINST THE INVADERS AND RECAPTURED THE PIECES +4198-61336-0003-1238: TIGLATH (PILESER->POLESU) HOWEVER CROSSED THE EUPHRATES AND MOVING NORTHWARD DELIVERED AN UNEXPECTED ATTACK ON THE (URARTIAN->GORACIAN) ARMY IN (QUMMUKH->CUMAC) +3331-159605-0014-623: POLLY FELT A VERY CORDIAL FRIENDSHIP FOR MISTER SYDNEY BUT NOT ONE PARTICLE OF THE LOVE WHICH IS THE ONLY COIN IN WHICH LOVE CAN BE TRULY PAID +2609-156975-0037-398: THE PEASANTS OF THE (VAST ANTOLIAN->VATS ANTONIAN) PLAIN (IN->OF) CENTRAL ASIA MINOR STILL (*->WILL) CALL EVERY LIFE (GIVING->GIING) SPRING GOD HATH GIVEN +8461-281231-0032-2932: BEFORE REACHING HIS DESTINATION HE WAS TOLD THAT (LUCAS->LYCAS) DE BEAUMANOIR THE GRAND MASTER OF THE ORDER OF THE TEMPLARS WAS THEN ON VISIT TO THE (PRECEPTORY->PRECEPTARY) +5484-24318-0026-1792: BRING THIS BEFORE YOUR MIND AND EVERYTHING ELSE THAT YOU MUST ACCEPT WITH IT IF YOU CONSENT WHEN THE TIME ARRIVES TO BECOME MINE CONCEAL AND PALLIATE NOTHING +4350-9170-0021-1403: THE MOST CONVINCING EXAMPLE OF THIS IS TO BE FOUND IN THE CONDITION OF THE WORKING CLASSES OF OUR EPOCH WHO ARE IN REALITY NO BETTER THAN THE SLAVES OF ANCIENT TIMES SUBDUED BY CONQUEST +8461-278226-0001-2885: SHE MEANT TO BE SCRUPULOUSLY CONSCIENTIOUS IN THE ADMINISTRATION OF HER TALENTS AND SOMETIMES AT CHURCH ON A SUNDAY WHEN THE SERMON WAS PARTICULARLY AWAKENING SHE MENTALLY DEBATED (THE->A) SERIOUS QUESTION AS TO WHETHER NEW BONNETS AND A PAIR OF (JOUVIN'S->ROUVNENES) GLOVES DAILY WERE NOT SINFUL BUT I THINK SHE DECIDED THAT THE NEW BONNETS AND GLOVES WERE ON THE WHOLE A PARDONABLE WEAKNESS AS BEING GOOD FOR TRADE +3764-168671-0009-1042: THE INTERMENT OF MOTHER CRUCIFIXION IN THE VAULT UNDER THE ALTAR THE EXIT OF COSETTE THE INTRODUCTION OF JEAN VALJEAN (TO->INTO) THE DEAD ROOM ALL HAD BEEN EXECUTED WITHOUT DIFFICULTY AND THERE HAD BEEN NO HITCH LET US REMARK IN PASSING THAT THE BURIAL OF MOTHER CRUCIFIXION UNDER THE ALTAR OF THE CONVENT IS A PERFECTLY (VENIAL->VENAL) OFFENCE IN OUR SIGHT +2033-164914-0021-232: WE WILL DO THEE NO UPRIGHT O MY SON NOR WRONG THEE IN AUGHT BUT OUR OBJECT IS THAT THOU BEND THY GRACIOUS STEPS WITH ME TO MY MISTRESS TO RECEIVE HER ANSWER AND RETURN IN WEAL AND SAFETY AND THOU SHALT HAVE A HANDSOME PRESENT AS ONE WHO BRINGETH GOOD NEWS +5442-41169-0015-1715: IF (WE'RE->WE ARE) LAYING OUT A GARDEN (PLANNING->PLANTING) ONE BEFORE THE HOUSE YOU KNOW AND THERE (YOU'VE->YOU HAVE) A TREE (THAT'S->THAT) STOOD FOR CENTURIES IN THE VERY SPOT OLD AND GNARLED IT MAY BE AND YET YOU DON'T CUT DOWN THE OLD FELLOW TO MAKE ROOM FOR THE (FLOWERBEDS->FLOWER BEDS) BUT LAY OUT YOUR BEDS SO AS TO TAKE ADVANTAGE OF THE TREE +7018-75788-0007-2195: NOW SLEEPING UNDER THESE TREES WERE MANY APES WHICH WHEN THEY SAW US ROSE AND FLED FROM US AND SWARMED UP AMONG THE BRANCHES WHEREUPON MY COMPANIONS BEGAN TO PELT THEM WITH WHAT THEY HAD IN THEIR BAGS AND THE APES FELL TO PLUCKING OF THE FRUIT OF THE TREES AND CASTING THEM AT THE FOLK +6070-86744-0019-1940: THEN HE DETAILED THE CONVERSATION OVERHEARD BY HIM AT THE (COLOSSEUM->COLISEUM) BETWEEN THE COUNT AND VAMPA IN WHICH THE COUNT HAD PROMISED TO OBTAIN THE RELEASE OF THE BANDIT PEPPINO AN ENGAGEMENT WHICH AS OUR READERS ARE AWARE HE MOST FAITHFULLY FULFILLED +2033-164915-0013-247: AND HE ANSWERED I AM THE CHAMBERLAIN OF THE EMIR OF DAMASCUS KING (SHARRKAN->SHARKAN) SON OF OMAR BIN AL (NU'UMAN->NUMAN) LORD OF BAGHDAD AND OF THE LAND OF KHORASAN AND I BRING TRIBUTE AND PRESENTS FROM HIM TO HIS FATHER IN BAGHDAD +5484-24318-0014-1780: CRIED (HERMON->HAREMAN) IN GRATEFUL AGITATION BUT SHE WOULD NOT LISTEN TO HIM AND FOLLOWED THE SOLDIER WHOM THE CAPTAIN DETAILED TO GUIDE HER INTO THE PALACE +6070-63485-0014-1916: WELL MY WIFE SHALL BE THERE SAID THE SCHOOLMASTER YOU WILL TELL HER WHAT YOU WANT AND I SHALL SEE +8280-266249-0044-2845: I DINKS NO (I->*) DINKS I (DEACH->DID) YOU (VON->VUN) LESSON RETURNED HIS CAPTOR NOT RELAXING HIS GRASP IN THE LEAST +7018-75788-0012-2200: AFTER WHICH I RETURNED TO MY OLD MERRY WAY OF LIFE AND FORGOT ALL I HAD SUFFERED IN THE GREAT PROFIT AND GAIN I HAD MADE +8280-266249-0045-2846: THE GERMAN RELEASED HIS PRISONER AND THE LATTER SLUNK AWAY WITH MUTTERED THREATS AND IMPRECATIONS UPON THE HEAD OF HIS TORMENTOR +7018-75789-0022-2230: I WILL TELL YOU THE STORY OF MY SEVENTH AND LAST VOYAGE WHICH IS STILL MORE WONDROUS AND (MARVELLOUS->MARVELOUS) THAN THAT OF THE FIRST SIX +2609-169640-0001-416: AN HOUR AFTER THE SUN HAD SET THE WIND FELL TO A LIGHT AIR (THAT JUST->THE HAGITTS) KEPT STEERAGE WAY ON THE SHIP +7105-2330-0035-2275: THE WARDERS HAVE A PRIVATE BAND OF THEIR OWN SAID THE GOVERNOR BUT OF COURSE I COULDN'T ALLOW THE MEN THEMSELVES +3997-180297-0014-1136: TRULY SHE CONTINUED WE POOR CREATURES OF CHANCE HAVE FANTASTIC DESIRES AND INCONCEIVABLE LOVES +7902-96592-0050-2395: TO HIS ASTONISHMENT THE BOY DID NOT FLINCH BUT THRUST HIS OWN ARMS THROUGH PLACING THEM ABOUT THE MIDDY'S WAIST CLENCHING HIS (HANDS->HAND) BEHIND AND UTTERING A SHARP WHISTLE +8131-117016-0006-2561: THE SHOPKEEPERS AND SOME OF THE LESS UNFORTUNATE PEOPLE THERE HAD PROTESTED LOUD ENOUGH TO REACH CLEAR BACK TO EARTH +8461-281231-0020-2920: AS THE FIRE COMMENCED TO SPREAD RAPIDLY THROUGH ALL PARTS OF THE CASTLE (ULRICA->EUREKA) APPEARED ON ONE OF THE TURRETS +8280-266249-0029-2830: THERE WAS A PAUSE BROKEN BY YOUNG HORACE WHO HAD BEEN WATCHING A GROUP OF MEN GATHERED ABOUT A TABLE AT THE FURTHER END OF THE ROOM +4852-28311-0023-1466: BUT THE NAME STILL SHOWED AT THE PROW AND MANY A TIME CHRIS SAFE AT HOME (IN->AND) BED HAD SAILED IMAGINARY VOYAGES IN THE MIRABELLE +2414-159411-0003-326: I WILL NEVER BE SO UNGRATEFUL ONLY LET ME OUT THAT I MAY DRINK SOME WATER AND RETURN +4852-28312-0026-1496: WHERE THE PEOPLE'S (DRUGSTORE->DRUG STORE) HAD STOOD BUT (A->*) HALF (*->AN) HOUR BEFORE ROSE THE ROOFS OF WHAT WAS EVIDENTLY AN INN +8461-258277-0009-2876: PRESENTLY (HASAN->HASSAN) SHUMAN CAME OUT OF A CLOSET AND SAID TO HIM HAST THOU GOTTEN THE GEAR O ALI +4852-28319-0024-1526: WITH INFINITE CAUTION CHRIS CLOSED THE DOOR SILENTLY BEHIND HIM AND RUNNING LIGHTLY FORWARD REACHED THE FIGURE OF THE NEGRO BOY +5442-32873-0008-1660: THERE WAS NOTHING VERY BROTHERLY IN HIS LOOK AS HE PEERED INTO HER PALE SHARP FEATURES DURING THE PROCESS +1688-142285-0041-41: MARGARET TURNED ROUND TO WALK ALONGSIDE OF THE GIRL IN HER FEEBLE PROGRESS (HOMEWARD->HOMEWARDS) +4350-9170-0039-1421: THE TAXES RAISED FROM THE PEOPLE FOR WAR PREPARATIONS ABSORB THE GREATER PART OF THE PRODUCE OF LABOR WHICH THE ARMY OUGHT TO DEFEND +8280-266249-0059-2860: TO ELSIE'S OBSERVANT EYES IT PRESENTLY BECAME EVIDENT THAT THE (DALYS->DAILIES) WERE IN VERY STRAITENED CIRCUMSTANCES +3331-159609-0015-671: I HOPE MARIA BAILEY IS ALL HE THINKS HER SHE ADDED SOFTLY FOR I COULD (N'T->NOT) BEAR TO HAVE HIM DISAPPOINTED AGAIN +1688-142285-0056-56: BESSY TOOK A LONG AND FEVERISH DRAUGHT AND THEN FELL BACK AND SHUT HER EYES +6070-86744-0006-1927: SO BE IT THEN REPLIED THE COUNT AND EXTENDING HIS HAND TOWARDS (A->THE) CALENDAR SUSPENDED NEAR THE CHIMNEY PIECE HE SAID TO DAY IS THE TWENTY FIRST OF FEBRUARY AND DRAWING OUT HIS WATCH ADDED IT IS EXACTLY HALF PAST TEN O'CLOCK NOW PROMISE ME TO REMEMBER THIS AND EXPECT ME THE TWENTY FIRST OF MAY AT THE SAME HOUR IN THE FORENOON +7105-2340-0005-2287: A (WILFRID PIGEONCOTE->WILFRED PICHKOTE) HAD COVERED HIMSELF WITH HONOURS IN THE COURSE OF MARLBOROUGH'S CAMPAIGNS AND THE NAME WILFRID HAD BEEN A BAPTISMAL WEAKNESS IN THE FAMILY EVER SINCE THE NEW HEIR TO THE FAMILY DIGNITY AND ESTATES WAS A YOUNG MAN OF ABOUT FIVE AND TWENTY WHO WAS KNOWN MORE BY REPUTATION THAN BY PERSON TO A WIDE CIRCLE OF COUSINS AND KINSFOLK +533-131564-0018-1642: THE FORMER WAS FULL OF TROUBLE AND ANGUISH NOT ACCUSING HIM BUT DEEPLY REGRETTING HIS CONNECTION WITH HIS PROFLIGATE COMPANIONS ABUSING MISTER GRIMSBY AND OTHERS INSINUATING BITTER THINGS AGAINST MISTER HUNTINGDON AND MOST (INGENIOUSLY->INGENUOUSLY) THROWING THE BLAME OF HER HUSBAND'S MISCONDUCT ON (TO->THE) OTHER (MEN'S->MAN'S) SHOULDERS +3080-5032-0015-563: I AM HERE MUCH MORE OUT OF PEOPLE'S WAY THAN IN TOWN WHERE MY AUNT AND SUCH AS PRETEND AN INTEREST IN ME AND A POWER OVER ME DO SO PERSECUTE ME WITH (THEIR->DEAR) GOOD NATURE AND TAKE IT SO ILL THAT THEY ARE NOT ACCEPTED AS I WOULD LIVE IN A HOLLOW TREE TO AVOID THEM +2609-156975-0007-368: THE STORIES REGARDING JOSEPH (THE->THEIR) TRADITIONAL FATHER (OF EPHRAIM->EPH FROM) AND MANASSEH IMPLY THAT THESE STRONG (CENTRAL->CENTAL) TRIBES POSSIBLY TOGETHER WITH THE SOUTHERN TRIBES OF BENJAMIN AND JUDAH WERE THE CHIEF (ACTORS IN->FACTORS OF) THIS OPENING SCENE IN ISRAEL'S HISTORY +8131-117017-0030-2647: AND IF ANY OF THE OTHER COPS HAD PRIVATE RACKETS OF THEIR OWN (IZZY->ISSEY) WAS UNDOUBTEDLY THE MAN TO FIND IT OUT AND USE THE INFORMATION WITH A BEAT SUCH AS THAT EVEN GOING HALVES AND WITH ALL THE GRAFT TO THE UPPER BRACKETS HE'D STILL BE ABLE TO MAKE HIS PILE IN A MATTER OF MONTHS +7018-75789-0004-2212: AND I THREW MYSELF DOWN UPON MY FACE ON THE RAFT BY REASON OF THE NARROWNESS OF THE CHANNEL WHILST THE STREAM CEASED NOT TO CARRY ME ALONG KNOWING NOT NIGHT FROM DAY FOR THE EXCESS OF THE GLOOM WHICH ENCOMPASSED ME ABOUT (AND->IN) MY TERROR AND CONCERN FOR MYSELF LEST I SHOULD PERISH +2033-164914-0022-233: THEN THE EUNUCH WENT OUT TO ZAU AL MAKAN AND SAID TO HIM RECITE WHAT VERSES THOU KNOWEST FOR MY LADY IS HERE HARD BY LISTENING TO THEE AND AFTER I WILL ASK THEE OF THY NAME AND THY NATIVE COUNTRY AND THY CONDITION +4350-9170-0023-1405: FOOTNOTE THE FACT THAT IN AMERICA THE ABUSES OF AUTHORITY EXIST IN SPITE OF THE SMALL NUMBER OF THEIR TROOPS NOT ONLY FAILS TO DISPROVE THIS POSITION BUT POSITIVELY CONFIRMS IT +7975-280057-0015-2470: TWO MONTHS AFTER THIS INCIDENT THE SAME PERSECUTORS AGAIN ENTERED OUR HOME IN THE DEAD OF THE NIGHT AND AT THE POINT OF A PISTOL TRIED TO FORCE MY MOTHER TO SET FIRE TO HER OWN HOME +8131-117016-0004-2559: EVEN DERELICTS AND FAILURES HAD TO EAT THERE WERE (STORES->STORIES) AND SHOPS THROUGHOUT THE DISTRICT WHICH EKED OUT SOME KIND OF A MARGINAL LIVING +8188-269288-0007-2680: IT IS THIS IF BY ANY CHANCE YOU DON'T LEAVE SAINT WODE'S ANNIE I HOPE YOU WILL ALLOW ME TO BE YOUR ROOMFELLOW AGAIN NEXT TERM +1998-15444-0020-116: THE RESIDUE MAY BE SET ASIDE FOR THE DETECTION OF THE METALLIC POISONS IF SUSPECTED EXPEL THE ALCOHOL BY CAREFUL EVAPORATION +7018-75789-0007-2215: WHEN THEY SAW I UNDERSTOOD THEM NOT AND MADE THEM NO ANSWER ONE OF THEM CAME FORWARD AND SAID TO ME IN ARABIC PEACE BE WITH THEE O MY BROTHER +8461-281231-0034-2934: POOR ISAAC WAS HURRIED OFF ACCORDINGLY AND EXPELLED FROM THE PRECEPTORY ALL HIS ENTREATIES AND EVEN HIS OFFERS UNHEARD AND DISREGARDED +3080-5040-0005-580: THE TRUTH IS I COULD NOT ENDURE TO BE MISSUS BRIDE IN A PUBLIC WEDDING TO BE MADE THE HAPPIEST PERSON ON EARTH +2033-164915-0001-235: THEN SHE THREW HERSELF UPON HIM AND HE GATHERED HER TO HIS BOSOM AND THE TWAIN FELL DOWN IN A FAINTING FIT +5484-24317-0018-1750: THE THREE MOST TRUSTWORTHY ONES ARE HERE (AMYNTAS->AMUNTUS) THE (LEECH->LIEGE) CHRYSIPPUS (AND->IN) THE ADMIRABLE (PROCLUS->PROCLYS) +8188-274364-0011-2800: YOUR MAJESTY HAVING TRIED THE AFFECTIONS OF YOUR PEOPLE YOU ARE ABSOLVED AND LOOSE FROM ALL RULES OF GOVERNMENT AND MAY DO WHAT POWER WILL ADMIT +5764-299665-0006-1810: IS HE RESPONSIBLE FOR ALL THE WARS THAT HAVE BEEN WAGED FOR ALL THE INNOCENT BLOOD THAT HAS BEEN SHED +5764-299665-0036-1840: THE MEMBERS WHO ADD TO THE MISERY OF THE FAMILY THE TRIBE OR THE NATION ARE CONSIDERED BAD MEMBERS +3528-168656-0003-685: AND THE (SCHOOL GIRLS->SCHOOLGIRLS) WOULD BEGIN TO LAUGH NOT IN THEIR SLEEVES BUT UNDER THEIR VEILS CHARMING LITTLE STIFLED LAUGHS WHICH MADE THE VOCAL MOTHERS FROWN +7902-96592-0005-2350: THE DULL SOUND OF DEPARTING STEPS AND A LOW WHISTLING SOUND COMING DOWN THROUGH THE SKYLIGHT WINDOW INTO THE CABIN WHERE ARCHY (RAYSTOKE->RAYSTROKE) LAY WITH HIS HEAVY EYELIDS PRESSED DOWN BY SLEEP +8131-117017-0017-2634: THE LITTLE MAN SHOOK HIS HEAD HIS ANCIENT EIGHTEEN YEAR OLD FACE TURNING SOBER NOPE +3764-168671-0012-1045: IN THE CLOISTER WHAT IS CALLED THE GOVERNMENT IS ONLY AN INTERMEDDLING WITH AUTHORITY AN INTERFERENCE WHICH IS ALWAYS QUESTIONABLE +8280-266249-0058-2859: THE CAPTAIN COMING IN SHORTLY AFTER THE SUDDEN FLIGHT OF THE GAMBLERS TOOK CHARGE OF THE MONEY AND THE NEXT DAY RESTORED IT TO THE OWNERS +1998-15444-0005-101: IF THE MEDICAL PRACTITIONER IS IN DOUBT ON ANY POINT HE SHOULD OBTAIN TECHNICAL ASSISTANCE FROM (SOMEONE->SOME ONE) WHO HAS PAID ATTENTION TO THE SUBJECT +1998-15444-0004-100: THE GUT AND THE GULLET BEING CUT ACROSS BETWEEN THESE LIGATURES THE STOMACH MAY BE REMOVED ENTIRE WITHOUT (SPILLING->SPINNING) ITS CONTENTS +3538-163624-0002-895: BUT IN THE NIGHT WHEN THE BATTLE WAS OVER HIS YOUNG WIFE CAME OUT AND SEARCHED FOR HIM AMONG THE SLAIN AND AT LAST SHE FOUND HIM AND ASKED WHETHER HE MIGHT BE HEALED +367-293981-0020-974: SANCHO TOOK IT AND AS HE WAS RAISING IT TO HIS MOUTH HE WAS STOPPED BY THE CRIES OF HIS MASTER EXCLAIMING SANCHO MY SON DRINK NOT WATER DRINK IT NOT MY SON FOR IT WILL KILL THEE SEE HERE I HAVE THE BLESSED BALSAM AND HE HELD UP THE FLASK OF LIQUOR AND WITH DRINKING TWO DROPS OF IT THOU WILT CERTAINLY BE RESTORED +3080-5032-0006-554: ALL THE PEOPLE THAT I HAD EVER IN MY LIFE REFUSED WERE BROUGHT AGAIN UPON THE STAGE LIKE RICHARD THE (THREE S->THIRD'S) GHOSTS TO REPROACH ME (WITHAL->WITH A) AND ALL THE KINDNESS HIS DISCOVERIES COULD MAKE I HAD FOR YOU WAS LAID TO MY CHARGE +6128-63240-0023-1994: SHE STOOD THERE LOOKING CONSCIOUSLY AND RATHER SERIOUSLY AT MISTER (RANSOM->RAMSON) A SMILE OF EXCEEDING FAINTNESS PLAYED ABOUT HER LIPS IT WAS JUST PERCEPTIBLE ENOUGH TO LIGHT UP THE NATIVE GRAVITY OF HER FACE +4198-12281-0010-1229: TO SOME WITH A SMART (SOUSE->SOULS) ON (THE EPIGASTER->THEIR EBERGASTER) HE WOULD MAKE THEIR (MIDRIFF SWAG->MIDDRIFTS WAAG) THEN REDOUBLING THE BLOW GAVE THEM SUCH A (HOMEPUSH->HOME PUSH) ON THE NAVEL THAT HE MADE THEIR PUDDINGS TO GUSH OUT +5442-41168-0022-1694: THIS EXPRESSION IN THE MARSHAL'S FACE WAS PARTICULARLY TOUCHING TO LEVIN BECAUSE ONLY THE DAY BEFORE HE HAD BEEN AT HIS HOUSE ABOUT HIS (TRUSTEE->TRUSTY) BUSINESS AND HAD SEEN HIM IN ALL HIS GRANDEUR A KIND HEARTED FATHERLY MAN +3528-168669-0054-749: SAINT TERENTIUS BISHOP OF PORT WHERE THE MOUTH OF THE TIBER EMPTIES INTO THE SEA REQUESTED THAT ON HIS TOMB MIGHT BE ENGRAVED THE SIGN WHICH WAS PLACED ON THE GRAVES OF PARRICIDES IN THE HOPE THAT PASSERS BY WOULD SPIT ON HIS TOMB THIS WAS DONE +3331-159605-0020-629: HOW HE GOT THERE WAS NEVER VERY CLEAR TO POLLY BUT THERE HE WAS FLUSHED AND A LITTLE OUT OF BREATH BUT LOOKING SO GLAD TO SEE HER THAT SHE HAD (N'T->NOT) THE HEART TO BE STIFF AND COOL AS SHE HAD FULLY INTENDED TO BE WHEN THEY MET +6432-63723-0030-2129: BUT IT WAS NOTICED THAT THE OLDER AND MORE CONSERVATIVE FAMILIES WERE LESS OFTEN REPRESENTED AND WHEN THEY WERE IT WAS BY SOME OF THE YOUNGER MEMBERS WHOSE REPUTATIONS WERE ALREADY SMIRCHED OR WHO HAD NOT YET ACQUIRED ANY AND WERE WILLING TO TAKE A CHANCE +4350-9170-0057-1439: EVEN LOOKING AT IT PRACTICALLY WEIGHING THAT IS TO SAY ALL THE BURDENS LAID ON HIM BY THE (STATE->STATES) NO MAN CAN FAIL TO SEE THAT FOR HIM PERSONALLY TO COMPLY WITH (*->THE) STATE DEMANDS AND SERVE IN THE ARMY WOULD IN THE MAJORITY OF CASES BE MORE DISADVANTAGEOUS THAN TO REFUSE TO DO SO +3538-163619-0001-854: FROM THE VERY DAY THAT THE NEW WIFE CAME INTO THE HOUSE THERE WAS NO PEACE FOR THE MAN'S CHILDREN AND NOT A CORNER TO BE FOUND WHERE THEY COULD GET ANY REST SO THE BOY THOUGHT THAT THE BEST THING HE COULD DO WAS TO GO OUT INTO THE WORLD AND TRY TO EARN HIS OWN BREAD +3997-180294-0006-1094: IT SEEMED TO ME AS IF THIS SLEEPING CITY BELONGED TO ME I SEARCHED MY MEMORY FOR THE NAMES OF THOSE WHOSE HAPPINESS I HAD ONCE ENVIED AND I COULD NOT RECALL ONE WITHOUT FINDING MYSELF THE HAPPIER +4852-28311-0017-1460: A LIVID YELLOW STAINED THE HORIZON BEYOND THE FACTORIES AND GRAY CLOUDS LOWERED AND TUMBLED ABOVE +5442-32873-0019-1671: IF I THOUGHT YOU'D FAIL ME NOW (TAMAR->TO MORROW) I SHOULD NEVER COME BACK GOOD NIGHT TAMAR +3528-168669-0001-696: WE WILL PRESENT A STENOGRAPHIC REPORT OF THE DIALOGUE WHICH THEN ENSUED TO THE BEST OF OUR ABILITY +1998-29455-0030-201: I (AIN'T->AM) SURE AS I (ADN'T->HADN'T) BETTER STICK TO THE ROAD AND KEEP AWAY FROM OLD (ANDS->ENDS) LIKE (YOU->EU) JIM +6070-63485-0008-1910: THEN ADDRESSING THOMAS SEYTON YOU KNOW THE PLAIN OF SAINT DENIS +7105-2330-0000-2240: UNFORTUNATELY THERE COULD BE NO DOUBT OR MISCONCEPTION AS TO PLATTERBAFF'S GUILT +2609-156975-0035-396: HIS (QUEST->PLACE) WAS (FOR A JUST->FULL OF JUTS) AND STRONG (GOD->GUNS) ABLE TO DELIVER (*->WITH) THE OPPRESSED +3005-163389-0001-441: SOME SUNG OUT TEAR DOWN THE FENCE TEAR DOWN THE FENCE +1998-15444-0015-111: NOTICE THE SMELL (COLOUR->COLOR) AND GENERAL APPEARANCE OF THE MATTER SUBMITTED FOR EXAMINATION +3528-168669-0031-726: THE MOTHERS HAVE TAKEN HER TO THE DEAD ROOM WHICH OPENS ON THE CHURCH I KNOW +7902-96595-0021-2450: BEG PARDON SIR BUT CAN YOU AS A GENTLEMAN ASSURE ME THAT HE IS NOT HERE CERTAINLY SAID SIR RISDON +3997-180294-0027-1115: DID SHE LOVE ME ENOUGH TO BELIEVE THAT THE MORE BEAUTIFUL SHE LOOKED THE HAPPIER I SHOULD BE +2414-128292-0031-321: THOU POOR ROVER AND RAMBLER THOU TIRED (BUTTERFLY->BUT TO FLY) +3005-163399-0014-531: I SEE IT WARN'T A BIT OF USE TO TRY TO GO AHEAD I'D GOT TO THROW UP MY HAND +3331-159609-0010-666: POOR POLLY WAS SO TAKEN BY SURPRISE THAT SHE HAD NOT A WORD TO SAY +3331-159605-0041-650: WELL I ALWAYS MEANT TO TRY IT IF I GOT A CHANCE AND I HAVE +7105-2330-0015-2255: HE SAID IT WAS OUR ONLY CHANCE OF GETTING A TELEGRAM (RADPROP IS->RHYDROPPIST) IN TO NIGHT +6070-86744-0020-1941: BUT SAID FRANZ THE CORSICAN BANDITS THAT WERE AMONG THE CREW OF HIS VESSEL +1688-142285-0021-21: MARGARET WAS COLLECTING HER MOTHER'S WORKING MATERIALS AND PREPARING TO GO TO BED +1688-142285-0036-36: ONE AFTERNOON SHE MET BESSY HIGGINS IN THE STREET AND STOPPED TO SPEAK TO HER +6432-63723-0006-2105: IT ISN'T GENERALLY KNOWN WENT ON THE LAWYER THAT THE HOTEL KEEPER'S WIFE HAS LEFT HIM +4294-9934-0008-1326: HIS DISCOMFORT WAS AUGMENTED BY ALL THE REFLECTIONS WHICH OCCURRED TO HIM +3764-168670-0051-1026: IF YOU ARE SURE OF COMING OUT OF THE COFFIN ALL RIGHT I AM SURE OF GETTING YOU OUT OF THE GRAVE +3538-142836-0005-831: LET IT BOIL UP AGAIN THEN TAKE IT OFF AND REMOVE CAREFULLY THE SCUM THAT HAS RISEN +3997-180297-0009-1131: HONESTLY DO YOU CARE FOR ME A LITTLE A GREAT DEAL +7105-2340-0019-2301: VIGILANCE WAS NOT COMPLETELY CROWNED WITH A SENSE OF VICTORY +5484-24318-0025-1791: AND I FOOL BLINDED ALSO IN MIND COULD BE VEXED WITH YOU FOR IT +3331-159605-0027-636: ASKED THE ARTFUL YOUNG MAN LAYING A TRAP INTO WHICH POLLY IMMEDIATELY FELL +5764-299665-0046-1850: WHEN THE CHURCH HAD CONTROL WERE MEN MADE BETTER AND HAPPIER +367-130732-0016-936: TAKE THE MEAT OF ONE LARGE CRAB SCRAPING OUT ALL (OF->*) THE FAT FROM THE SHELL +5764-299665-0047-1851: WHAT HAS RELIGION DONE FOR HUNGARY OR AUSTRIA +7902-96592-0045-2390: (RAM->GRAHAM) SHOWED HIS WHITE TEETH AS HE BURST OUT WITH A LONG LOW FIT OF LAUGHTER +2414-159411-0013-336: LET THE TIGER EAT THE MAN FOR MEN HAVE NO PITY +6938-70848-0009-2166: WE KNOW WHAT THE PEASANTS WILL SAY AREN'T THEY (WORKINGMEN->WORKING MEN) LIKE OURSELVES +6432-63722-0051-2090: BUT I NEED TO DO A LITTLE MORE SMOKING OUT FIRST NOW I WANT TO THINK +1688-142285-0011-11: ALL HIS FORMER FRIENDS SHRUNK FROM THE DISCLOSURES THAT HAD TO BE MADE OF HIS DISHONEST GAMBLING WILD HOPELESS STRUGGLES MADE WITH OTHER PEOPLE'S MONEY TO REGAIN HIS OWN MODERATE PORTION OF WEALTH +4350-9170-0009-1391: POWER IS ALWAYS IN THE HANDS OF THOSE WHO CONTROL THE ARMY AND ALL MEN IN POWER FROM THE ROMAN CAESARS TO THE RUSSIAN AND GERMAN EMPERORS TAKE MORE INTEREST IN THEIR ARMY THAN IN ANYTHING AND COURT POPULARITY IN THE ARMY KNOWING THAT IF THAT IS ON THEIR SIDE THEIR POWER IS SECURE +4294-14317-0004-1270: BUT BEWARE OF LETTING (BANDINELLO->BEND NELLO) QUIT YOU RATHER BESTOW UPON HIM ALWAYS MORE THAN HE DEMANDS FOR IF HE GOES INTO FOREIGN PARTS HIS IGNORANCE IS SO PRESUMPTUOUS THAT HE IS JUST THE MAN TO DISGRACE OUR MOST ILLUSTRIOUS SCHOOL +7018-75788-0013-2201: NEXT MORNING AS SOON AS IT WAS LIGHT HE PRAYED THE DAWN PRAYER AND AFTER BLESSING MOHAMMED THE CREAM OF ALL CREATURES BETOOK HIMSELF TO THE HOUSE OF SINDBAD THE SEAMAN AND WISHED HIM A GOOD DAY +8461-258277-0008-2875: SO HE ATE AND FELL DOWN SENSELESS FOR THE SWEETMEATS WERE DRUGGED WITH (BHANG->BANG) WHEREUPON THE KAZI BUNDLED HIM INTO THE SACK AND MADE OFF WITH HIM CHARGER AND CHEST AND ALL TO THE BARRACK OF THE FORTY +4198-61336-0005-1240: AN ATTEMPT WAS MADE TO CAPTURE KING (SHARDURIS->CHADORIS) WHO (LEAPT->LEAPED) FROM HIS CHARIOT AND MADE HASTY ESCAPE ON HORSEBACK HOTLY PURSUED IN THE GATHERING DARKNESS BY AN ASSYRIAN CONTINGENT OF CAVALRY +3331-159605-0016-625: WHEN SATURDAY CAME POLLY STARTED AS USUAL FOR A VISIT TO BECKY AND BESS BUT COULD (N'T->NOT) RESIST STOPPING AT THE (SHAWS->SHORES) TO LEAVE A LITTLE PARCEL FOR FAN THOUGH IT WAS CALLING TIME +3997-180294-0033-1121: (NONETHELESS->NEVERTHELESS) I WAS VERY UNHAPPY ALL THE REST OF THE EVENING AND WENT AWAY VERY SADLY AFTER HAVING SEEN PRUDENCE THE COUNT AND MARGUERITE GET INTO THE CARRIAGE WHICH WAS WAITING FOR THEM AT THE DOOR +8131-117016-0020-2575: THERE WAS A CRUDE LIGHTING SYSTEM HERE PUT UP BY THE CITIZENS AT THE FRONT OF EACH BUILDING A DIM (PHOSPHOR BULB->PHOSPHORIBULB) GLOWED WHEN DARKNESS FELL THEY WOULD HAVE NOTHING ELSE TO SEE BY +4852-28311-0022-1465: ON THE LEFT THE COIL OF ROPE IN THE CENTER THE MODEL OF A SAILING SHIP IN A (GREEN->GRAY) GLASS BOTTLE AND ON THE RIGHT THE WOODEN STATUE OF A NEGRO BOY IN BAGGY TROUSERS TURKISH JACKET AND WHITE TURBAN +5442-32873-0009-1661: THERE DON'T MIND ME SHE SAID SHARPLY AND GETTING UP SHE LOOKED DOWN AT HER DRESS AND THIN SHOES AND SEEMING TO RECOLLECT HERSELF SHE TOOK THE CANDLE HE HAD JUST SET DOWN AND WENT SWIFTLY TO HER ROOM +8461-278226-0006-2890: (IT->HE) WAS DRAWING TOWARDS THE CLOSE OF THIS DELIGHTFUL HONEYMOON TOUR AND IT WAS A BRIGHT SUNSHINY MORNING EARLY IN FEBRUARY BUT FEBRUARY IN PARIS IS SOMETIMES BETTER THAN APRIL IN LONDON +4198-12281-0007-1226: WHEREFORE IS IT THAT OUR DEVOTIONS WERE INSTITUTED TO BE SHORT IN THE TIME OF HARVEST AND VINTAGE AND LONG IN THE ADVENT (AND->IN) ALL THE WINTER +8280-266249-0014-2815: HE LED THE WAY THE OTHERS ALL FOLLOWING AND TAKING OUT A SLIP OF PAPER READ FROM IT IN A DISTINCT TONE LOUD ENOUGH TO BE HEARD BY THOSE (*->ALL) ABOUT HIM WITHOUT DISTURBING THE OTHER PASSENGERS +8188-274364-0009-2798: YOUNG (VANE->VAIN) FALLING UPON THIS PAPER OF NOTES DEEMED THE MATTER OF THE UTMOST IMPORTANCE AND IMMEDIATELY COMMUNICATED IT TO PYM WHO NOW PRODUCED THE PAPER BEFORE THE HOUSE OF COMMONS +8461-281231-0033-2933: HE HAD NOT UNTIL THEN BEEN INFORMED OF THE PRESENCE OF THE JEWISH MAIDEN IN THE ABODE OF THE TEMPLARS AND GREAT WAS HIS FURY AND INDIGNATION ON LEARNING THAT SHE WAS AMONGST THEM +367-293981-0015-969: SEARCH YOUR MEMORY AND IF YOU FIND ANYTHING OF THIS KIND YOU NEED ONLY TELL ME OF IT AND I PROMISE YOU BY THE ORDER OF KNIGHTHOOD WHICH I HAVE RECEIVED TO PROCURE YOU SATISFACTION AND REPARATION TO THE UTMOST OF YOUR DESIRE +6128-63241-0004-2003: SHE COULD NOT DEFEND HERSELF AGAINST (A->THE) RICH ADMIRATION A KIND OF TENDERNESS OF ENVY OF ANY ONE WHO HAD BEEN SO HAPPY AS TO HAVE THAT OPPORTUNITY +6128-63244-0021-2034: (THEY->THERE) WOULD BE NAMES OF WOMEN WEAK INSULTED PERSECUTED BUT DEVOTED IN EVERY PULSE OF THEIR BEING TO THE CAUSE AND ASKING NO BETTER FATE THAN TO DIE FOR IT +6128-63241-0005-2004: HIS FAMILY WAS RUINED THEY HAD LOST THEIR SLAVES THEIR PROPERTY THEIR FRIENDS AND RELATIONS THEIR HOME HAD TASTED OF ALL THE CRUELTY OF DEFEAT +2033-164915-0016-250: SO IT WAS AGREED THAT WE GO TO DAMASCUS AND FETCH THENCE THE KING'S SON SHARRKAN AND MAKE HIM SULTAN OVER HIS FATHER'S REALM +8131-117017-0002-2619: AND THE SLOW DOUBTFUL RESPECT ON THE FACES OF THE CITIZENS AS THEY NODDED TO HIM WAS EVEN MORE PROOF THAT HALEY'S SYSTEM WAS WORKING +6432-63723-0054-2153: IT'S IT'S AN ODD COIN AN OLD ROMAN ONE THAT MISSUS DARCY HAD IN HER PRIVATE COLLECTION KEPT IN THE JEWELRY STORE SAFE WAS THE WHISPERED ANSWER +8280-266249-0042-2843: A MAN OF GIANT SIZE AND HERCULEAN STRENGTH HAD LAID ASIDE HIS PIPE AND SLOWLY RISING TO HIS FEET SEIZED THE SCOUNDREL IN HIS POWERFUL GRASP +5764-299665-0080-1884: WE NOW KNOW IF WE KNOW ANYTHING THAT THE UNIVERSE IS NATURAL AND THAT MEN AND WOMEN HAVE BEEN NATURALLY PRODUCED +8461-281231-0004-2904: LEAVE ME AND SEEK THE SAXON WITCH (ULRICA->EUREKA) WHO WAS MY TEMPTRESS LET HER AS WELL AS I TASTE THE TORTURES WHICH ANTICIPATE HELL +3528-168669-0079-774: THE PRIORESS WHO WAS USUALLY SUBJECTED TO THE BARRIER OF SILENCE AND WHOSE RESERVOIR WAS (OVERFULL->OVER FULL) ROSE AND EXCLAIMED WITH THE LOQUACITY OF A DAM WHICH HAS BROKEN AWAY +5442-32873-0007-1659: WHEN HE CAME BACK TO THE DRAWING ROOM A (TOILET->TOURID) BOTTLE (OF EAU DE->OFAU THE) COLOGNE IN HIS HAND WITH HER LACE HANDKERCHIEF HE BATHED HER TEMPLES AND FOREHEAD +7902-96591-0014-2334: THE KICK HE HAD RECEIVED WAS A FORETASTE OF WHAT HE MIGHT EXPECT AND AFTER A LITTLE CONSIDERATION HE CAME TO THE CONCLUSION THAT HIS DUTY WAS TO ESCAPE AND GET BACK TO THE CUTTER AS QUICKLY AS HE COULD +2609-157645-0014-414: WHEN THESE MEN IN THE COURSE OF MY REMONSTRANCE FOUND (*->OUT) THAT I WAS NOT GOING TO CONTINUE THE CUSTOM THEY NO LONGER CARED TO BE COMMUNICANTS +4294-35475-0008-1299: IT HAD SUDDENLY DISAPPEARED AND IN ITS PLACE STOOD A BEAUTIFUL FAIRY WITH FILMY WINGS WHICH SHONE LIKE RAINBOWS IN THE MOONLIGHT +6070-63485-0001-1903: THE TWO MONSTERS TOOK OFF THEIR SHOES AND MOVED STEALTHILY ALONG KEEPING IN THE SHADOWS OF THE HOUSES +7975-280085-0011-2547: FORMING IN LINE FOUR PACES APART HE ORDERED THEM TO ADVANCE RAPIDLY AND CONCENTRATE THE FIRE OF THE WHOLE LINE THE INSTANT THE ROBBERS WERE DISCOVERED +4350-9170-0056-1438: THE POWER OF THE STATE FAR FROM BEING A SECURITY AGAINST THE ATTACKS OF OUR NEIGHBORS EXPOSES US ON THE CONTRARY TO MUCH GREATER DANGER OF SUCH ATTACKS +2033-164916-0000-252: SO HE TURNED TO THE WAZIR DANDAN AND SAID TO HIM VERILY YOUR TALE IS A WONDER OF WONDERS +4294-14317-0006-1272: THEN I THANKED HIM AND SAID I HAD NO GREATER DESIRE THAN TO SHOW THOSE ENVIOUS FOLK THAT I HAD IT IN (ME->NEED) TO EXECUTE THE PROMISED WORK +3538-163624-0020-913: THERE MUST SHE SLEEP TILL THOU (COMEST->COMES) FOR HER WAKING RISE UP AND RIDE FOR NOW SURE SHE WILL SWEAR THE VOW FEARLESS OF BREAKING +5484-24317-0005-1737: HE LONGED WITH ARDENT YEARNING FOR THE SOUND OF HER VOICE AND STILL MORE TO UNBURDEN HIS SORELY TROUBLED SOUL TO HER +1998-29454-0023-147: THE MAN'S MANNER WAS SO KIND AND HEARTY THE WHOLE ADVENTURE WAS SO WONDERFUL AND NEW IS IT COUNTRY WHERE (YOU->YOU'RE) GOING +5484-24317-0006-1738: SINCE HIS RETURN FROM THE ORACLE THE FEAR THAT THE (RESCUED->RESCUE) DEMETER MIGHT YET BE THE WORK OF (MYRTILUS->MERTILLUS) HAD AGAIN MASTERED HIM +4350-9170-0055-1437: AND IF (DEFENSE->DEFENCE) AGAINST BARBAROUS NATIONS IS MEANT ONE THOUSANDTH PART OF THE TROOPS NOW UNDER ARMS WOULD BE AMPLY SUFFICIENT FOR THAT PURPOSE +5442-41168-0005-1677: THE CONFIDENCE OF THE MONARCH NO CHECKING THE ACCOUNTS OF THE (MARSHAL HE'S->MARTIAN HE IS) NOT A CASHIER BUT THAT'S NOT THE POINT +5484-24317-0020-1752: YOUR UNCLE TOO HAS OFTEN WITH PRAISEWORTHY GENEROSITY HELPED (ARSINOE->ARSENAL) IN MANY AN EMBARRASSMENT +3997-182399-0013-1167: WHY HE (JES->JUST) STRETCH HIS (FOOL HAID->FULL HEAD) AS FAR DOWN (THAT->THE) CHIMNEY AS HE CAN (AN->AND) LISTEN (AN->AND) LISTEN +1688-142285-0073-73: PRESENTLY THE SPASM THAT FORESHADOWED DEATH HAD PASSED AWAY AND BESSY ROUSED HERSELF AND SAID +367-130732-0022-942: SO FAR IT HAS BEEN USED MOSTLY FOR GARNISHMENT OF OTHER DISHES AND IT IS ONLY RECENTLY THAT THE (HOF BRAU->WHOLE BROW) HAS BEEN MAKING A SPECIALTY OF THEM +8461-281231-0037-2937: AT HIS FEET WAS PLACED A TABLE OCCUPIED BY TWO SCRIBES WHOSE DUTY IT WAS TO RECORD THE PROCEEDINGS OF THE DAY +5484-24317-0021-1753: HOW LONG HE KEPT YOU WAITING (FOR->FROM) THE FIRST WORD CONCERNING A WORK WHICH JUSTLY TRANSPORTED THE WHOLE CITY WITH DELIGHT +3764-168670-0043-1018: ANY MAN WHO HAS BEEN A PRISONER UNDERSTANDS HOW TO CONTRACT HIMSELF TO FIT THE DIAMETER OF THE ESCAPE +6432-63723-0014-2113: LARCH BY REFUSING TO APPEAR PRACTICALLY ADMITTED THE CHARGES AGAINST HIM AND DID NOT OPPOSE THE SEPARATION +367-293981-0019-973: HE SAW HIM RISING AND FALLING IN THE AIR WITH SUCH GRACE AND NIMBLENESS THAT HAD HIS RAGE ALLOWED HIM IT IS MY BELIEF HE WOULD HAVE LAUGHED +4852-28312-0027-1497: A COURTYARD WAS (SPARSELY->FIRSTLY) LIT BY A FLARING TORCH OR TWO SHOWING A SWINGING SIGN HUNG ON A POST +7902-96594-0032-2428: STEADY MY (LADS->LAD) STEADY CRIED THE MASTER KEEP STROKE AND THEN HE BEGAN TO MAKE PLANS AS TO HIS FIRST PROCEEDINGS ON GETTING ASHORE +3331-159605-0018-627: TAKE HOLD OF (MASTER CHARLEY'S->MASSA CHARLIE'S) HAND MISS MAMIE AND WALK PRETTY LIKE (WILLY->WILLIE) AND (FLOSSY->FLOSSIE) SAID THE MAID +1688-142285-0087-87: (MISSUS->MISTER) THORNTON THE ONLY MOTHER HE HAS I BELIEVE SAID MISTER HALE QUIETLY +8188-274364-0010-2799: THE KING PROPOSES THIS DIFFICULTY BUT HOW CAN I UNDERTAKE OFFENSIVE WAR IF I HAVE NO MORE MONEY +3005-163390-0000-459: (ANDBUT->AN BUT) NEVER MIND THE REST OF HIS OUTFIT IT WAS (JUST->JEST) WILD BUT IT WAS AWFUL FUNNY +4852-28312-0024-1494: FLABBERGASTED AND BREATHLESS CHRIS WAS UNAWARE THAT HE HAD MOVED CLOSER TO PEER OUT THE WINDOW IN EVERY DIRECTION +2609-157645-0000-400: EVIDENTLY THE INTENTION WAS (TO MAKE->MADE) THINGS (PLEASANT->PRESENT) FOR THE ROYAL (FOE->FOLK) OF TOBACCO DURING HIS VISIT +5484-24317-0002-1734: SHE WOULD APPEAR HERSELF AT DESSERT AND THE BANQUET MUST THEREFORE BEGIN AT AN UNUSUALLY EARLY HOUR +4852-28330-0024-1554: HE KNEW NOW THAT HIS ABSENCE FOR AS LONG AS HE HAD TO BE AWAY WOULD BE COVERED UP AND (SATISFACTORILY->SATISFACTIONILY) ACCOUNTED FOR +7105-2340-0037-2319: BRAVE LITTLE WOMAN SAID PETER WITH A GASP OF RELIEF I COULD NEVER HAVE DONE IT +3997-182399-0011-1165: ONE DAY THIS (NO COUNT->NOCCOUNT) TRIFLING COUSIN OF (GRANDPAP->GRANPAP) BUZZARD GET COLD IN HIS FEET +2414-159411-0002-325: (OH->O) FATHER OF MERCY ANSWERED THE TIGER IN TRUTH THAT I WILL NOT +1998-15444-0018-114: THE PURE (ALKALOIDS->AKALOIDS) WITH THE EXCEPTION OF MORPHINE IN ITS CRYSTALLINE FORM ARE SOLUBLE IN ETHER +533-131556-0018-1599: I CANNOT RENOUNCE WHAT IS DEARER THAN LIFE SHE MUTTERED IN A LOW HURRIED TONE +3005-163391-0000-490: WHICH WAS SOUND ENOUGH JUDGMENT BUT YOU TAKE THE AVERAGE MAN AND HE WOULDN'T WAIT FOR HIM TO (HOWL->HIRE) +3331-159605-0046-655: (I M ODD->I'M ART) YOU KNOW AND PREFER TO BE AN INDEPENDENT SPINSTER AND TEACH MUSIC ALL MY DAYS +5442-41169-0004-1704: THESE PERSONS WERE UNMISTAKABLY SEEKING A PLACE WHERE THEY COULD TALK WITHOUT BEING OVERHEARD +3997-180294-0002-1090: WELL DO IT FOR ME FOR I SWEAR TO YOU THAT I DON'T LOVE YOU AS THE OTHERS HAVE LOVED YOU +4198-12259-0034-1209: APPETITE COMES WITH EATING SAYS (ANGESTON->ANGISTON) BUT THE THIRST GOES AWAY WITH DRINKING +3005-163390-0016-475: MY YOU OUGHT TO SEEN OLD HENRY THE (EIGHT->EIGHTH) WHEN HE WAS IN BLOOM HE WAS A BLOSSOM +3080-5032-0016-564: YOU WILL THINK HIM ALTERED AND IF IT BE POSSIBLE MORE MELANCHOLY THAN HE WAS +3538-163624-0001-894: THE OLD KING WENT OUT AND FOUGHT BRAVELY BUT AT LAST HIS SWORD BROKE AND HE WAS WOUNDED AND HIS MEN FLED +4294-32859-0000-1285: WYLDER WAS RATHER SURLY AFTER THE LADIES HAD FLOATED AWAY FROM THE SCENE AND HE DRANK HIS LIQUOR DOGGEDLY +7975-280076-0010-2499: THIS CAN BE PROVED BY BOTH OF THEM ALSO BY SHERIFF (BARKLEY->BARKELEY) AND FIFTY OTHER RESPECTABLE MEN OF THAT COUNTY +8188-269290-0008-2739: I KNOW YOU DON'T QUITE MEAN WHAT YOU SAY SAID LESLIE BUT OF COURSE IF YOU REALLY WISH ME +3764-168670-0055-1030: THE PRIEST SAYS THE PRAYERS MAKES THE SIGN OF THE CROSS SPRINKLES THE HOLY WATER AND TAKES HIS DEPARTURE +367-130732-0004-924: A BOOK COULD BE WRITTEN ABOUT THIS RESTAURANT AND THEN ALL WOULD NOT BE TOLD FOR ALL ITS SECRETS CAN NEVER BE KNOWN +2609-169640-0015-430: KITE WENT AFT AND RETURNED WITH THREE OR FOUR MUSKETS AND AS MANY PIKES +533-131564-0005-1629: WE ALL HAVE A BIT OF A LIKING FOR HIM AT THE BOTTOM OF OUR HEARTS THOUGH WE CAN'T RESPECT HIM +3538-163624-0016-909: THEN SIGURD RODE BACK AND MET (REGIN->REGEN) AND (REGIN->REGAN) ASKED HIM TO ROAST (FAFNIR'S->FAFFNER'S) HEART AND LET HIM TASTE OF IT +3997-180294-0001-1089: YES BUT IF I SHOULD ALREADY ASK FOR SOMETHING WHAT +3764-168670-0054-1029: THE (HEARSE->HOUSE) HALTS THE UNDERTAKER'S MEN KNOT A ROPE (AROUND->ROUND) YOUR COFFIN AND LOWER YOU DOWN +6432-63722-0038-2077: AND (DONOVAN->DONOMAN) TAKE A FRIEND'S ADVICE AND DON'T BE TOO FREE WITH THAT WATCH TOO FREE WITH IT +533-131562-0005-1612: THAT AND ALL REPLIED THE MASTER AND THE THINGS WERE CLEARED AWAY +4852-28330-0007-1537: CERTAINLY MY BOY BOOMED OUT THE CAPTAIN HIS BLUE EYES ABRUPTLY KEEN AND PENETRATING +5442-41168-0000-1672: THE ACT SAID THAT IN CASE OF DIFFERENCE OF OPINION THERE MUST BE A BALLOT +8131-117016-0046-2601: BRUCE GORDON GRIMACED I'VE GOT A YELLOW TICKET FROM SECURITY +8280-266249-0040-2841: WHO ASKED THE MATE I'VE SEEN (NONE->NO ONE) UP HERE THOUGH THERE ARE SOME IN THE STEERAGE +7975-280076-0023-2512: THERE WERE FIFTY OR A HUNDRED PERSONS THERE WHO WILL TESTIFY IN ANY COURT THAT JOHN AND I WERE THERE +3538-142836-0006-832: IT IS CONSIDERED TO BE SUFFICIENTLY BOILED WHEN SOME TAKEN UP IN A SPOON POURS OUT LIKE OIL +5764-299665-0048-1852: COULD THESE COUNTRIES HAVE BEEN WORSE WITHOUT RELIGION +1998-15444-0016-112: FOR THE SEPARATION OF AN ALKALOID THE FOLLOWING IS THE PROCESS OF (STAS->STARS) OTTO +1998-29454-0018-142: (I->I'VE) GOT TO STICK IT SAID (DICKIE->DICKI) SADLY I'D BEST BE GETTING HOME +1998-29454-0033-157: HE (REWARD->REVORED) THE WAKE THE LAST OF THE ENGLISH AND I (WUNNERED->WONDERED) WHAT (IT->I) STOOD FOR +3005-163399-0015-532: SO I SAYS TO MYSELF (HERE'S->HERE IS) ANOTHER PLACE WHERE I GOT TO (RESK->REST) THE TRUTH +5442-41168-0015-1687: THAT IS A MATTER FOR EACH MAN'S OWN DECISION HE SAID SEVERELY +8188-269288-0048-2721: SHE GOT INTO BED AS SHE SPOKE AND WRAPPED THE CLOTHES TIGHTLY ROUND HER +1998-29455-0031-202: I (OPE E'S->HOPE HE'S) CLEVER ENOUGH TO DO (WOT E'S->WHAT HE'S) TOLD KEEP IS (MUG SHUT->MUCH AT) THAT'S ALL +8188-274364-0008-2797: MY LORDS I HAVE NOW TROUBLED YOUR (LORDSHIPS->LORDSHIP) A GREAT DEAL LONGER THAN I SHOULD HAVE DONE +4852-28311-0019-1462: ALL AT ONCE HE WONDERED HOW HIS MOTHER WAS AND EVERYTHING IN HIM PINCHED AND TIGHTENED ITSELF +4350-9170-0050-1432: WITHOUT THE STATE MEN WOULD NOT HAVE BEEN ABLE TO FORM THE SOCIAL INSTITUTIONS NEEDED FOR DOING (ANY THING->ANYTHING) +7975-280057-0012-2467: NOW OLD LADY CALL ON YOUR PROTECTORS WHY DON'T YOU CALL ON (COLE->CO) YOUNGER NOW +7902-96595-0008-2437: IF (YOU'D->YOU) LET ME FINISH YOU'D KNOW SAID GURR GRUFFLY ONE OF OUR BOYS IS MISSING SEEN HIM UP HERE +3528-168669-0032-727: A FINE SIGHT IT WOULD BE TO SEE A MAN ENTER THE (DEAD->BED) ROOM MORE OFTEN +7902-96592-0047-2392: WHY I COULD TIE YOU UP IN A KNOT AND HEAVE YOU OFF THE CLIFF ANY DAY WHAT A GAME +3005-163390-0013-472: WELL IT DON'T BECAUSE IT'S IN THE BREED I RECKON THEY'RE ALL ALIKE +4350-10919-0009-1357: YES THAT'S AN UNDERSTOOD THING RESPONDED THE CELEBRATED PHYSICIAN AGAIN GLANCING AT HIS WATCH +3005-163399-0030-547: THEN I SAYS TO MYSELF S'POSE TOM SAWYER COMES DOWN ON THAT BOAT +8188-269288-0018-2691: HER FACE GREW SUDDENLY WHITE AS DEATH WHAT IS IT DEAR +2609-169640-0013-428: THE (PROAS->PROETS) DID NOT ALTER (THEIR->THE) COURSE BUT NEARED US FAST +3005-163391-0012-502: TOO BAD TOO BAD HE COULDN'T (A->HAVE) LIVED TO SEE HIS BROTHERS POOR SOUL +6070-63485-0010-1912: CRIED THE SCHOOLMASTER A THOUSAND FRANCS AND I'LL KILL HIM +3080-5032-0000-548: BUT I AM HUGELY PLEASED THAT YOU HAVE SEEN MY LADY +7975-280063-0005-2481: I THINK HE'LL BE RATHER TOUGH MEAT FOR BREAKFAST I REPLIED HE MIGHT BE ALL RIGHT FOR DINNER +6938-70848-0010-2167: (THESE MEN ESPECIALLY->THIS MAN HAD SPECIALLY) WELCOMED THE CALL TO A CONGRESS OF PEASANTS +2033-164914-0007-218: AND HE ALSO IMPROVISED THE TWO FOLLOWING (DISTICHS->DISTICHES) +3528-168669-0048-743: FAUCHELEVENT THOUGHT THAT IT WAS AN ORISON WHICH SHE WAS FINISHING +3764-168671-0026-1059: FAUCHELEVENT HAD EXPECTED ANYTHING BUT THIS THAT A GRAVE DIGGER COULD DIE +8188-269290-0052-2783: I SEE BY YOUR FACE (MISS->MISSUS) GILROY THAT YOU ARE DISTRESSED ABOUT SOMETHING ARE YOU KEEPING ANYTHING BACK +5484-24317-0016-1748: THE KING'S SISTER THE OBJECT OF HIS LOVE CRIED (HERMON->HARMONT) INCREDULOUSLY +3528-168669-0004-699: AND YOU HAVE BEEN IN THE CHOIR IN PURSUANCE OF YOUR DUTIES TWO OR THREE TIMES +4294-9934-0026-1344: THAT WILL BE SWALLOWING A TONGUE VERY FAST OR A HUNDRED SOUS VERY SLOWLY +7902-96591-0013-2333: NO HE THOUGHT TO HIMSELF I DON'T BELIEVE THEY WOULD KILL ME BUT THEY WOULD KNOCK ME ABOUT +4198-12281-0004-1223: BY THE BELLY OF (SANCT->SAINT) JAMES WHAT SHALL WE POOR DEVILS DRINK THE WHILE +8131-117017-0031-2648: THE CAPTAIN LOOKED COMPLETELY BEATEN AS HE CAME INTO THE ROOM AND DROPPED (ONTO->ON) THE BENCH +1688-142285-0069-69: THE FEVERISH COLOUR CAME INTO HER (CHEEK->CHEEKS) AND THE FEVERISH FLAME INTO HER EYE +3997-180297-0027-1149: I TORE THE LETTER INTO FRAGMENTS AND KISSED WITH TEARS THE HAND THAT GAVE IT TO ME +533-1066-0012-1568: THEY WERE TALKING (CONFIDENTIALLY->CONFIDENTLY) TOGETHER BUT WHEN I CAME DOWN THEY CEASED +5764-299665-0035-1839: THEY ARE REGARDED AS GOOD THAT IS TO SAY AS MORAL +5764-299665-0019-1823: CAN WE SAY THAT HIS MERCY (ENDURETH->ENDURED) FOREVER +3764-168671-0025-1058: FATHER (MESTIENNE->MUSTHIENNE) IS THE GRAVE DIGGER HE WAS +3538-163624-0000-893: ONCE UPON A TIME THERE WAS A KING IN THE NORTH WHO HAD WON MANY WARS BUT NOW HE WAS OLD +7902-96592-0048-2393: BIT OF A (MIDDY->MITTEE) FED ON (*->A) SALT TACK (AND WEEVILLY->IN WEEVILY) BISCUIT TALK OF GIVING ME (ROPE'S->ROPES) END +6070-63485-0011-1913: (WRETCH->VETCH) I DO NOT (SEEK->SEE) HIS LIFE REPLIED SARAH TO THE SCHOOLMASTER +5484-24318-0028-1794: HE HIMSELF ON THE WAY TO EXPOSE HIMSELF TO THE MALICE AND MOCKERY OF THE WHOLE CITY +5442-41169-0003-1703: I HAVE TOLD HIM SO BUT IT MAKES NO DIFFERENCE ONLY THINK OF IT +3997-180294-0015-1103: (IT->THIS) IS THE SAME WITH THESE UNHAPPY WOMEN WHEN THEY LOVE SERIOUSLY +5442-41168-0017-1689: A HUNDRED AND TWENTY SIX FOR ADMISSION NINETY EIGHT AGAINST +4852-28319-0021-1523: DUSK CAME TWO HOURS BEFORE ITS TIME THUNDER (SNARLED->SNARLS) IN THE SKY +5484-24318-0027-1793: (SO ARCHIAS->SORCHAUS) INTENDED TO LEAVE THE CITY ON ONE OF HIS OWN SHIPS THAT VERY DAY +8188-269288-0020-2693: WELL READ IT IN PEACE SAID (LESLIE->LESLEY) I WON'T DISTURB YOU +4350-9170-0007-1389: THESE BANDS OF ARMED MEN SUBMISSIVE TO A SINGLE WILL ARE WHAT CONSTITUTE THE ARMY +5764-299665-0005-1809: ARE THE FAILURES UNDER OBLIGATION TO THEIR CREATOR +533-131562-0007-1614: MUTTERED HE STARTING BACK SHE'S THE VERY DEVIL FOR SPITE +4852-28330-0008-1538: MISTER FINNEY WILL BE SOME TIME ON DECK WE CANNOT BE OVERHEARD IN HERE +4852-28311-0021-1464: THERE WERE THREE THINGS THAT ALWAYS CAUGHT HIS EYE AMID THE LITTER OF DUSTY PIECES +3764-168670-0023-998: YOU KNOW FAUCHELEVENT WHAT YOU HAVE SAID MOTHER CRUCIFIXION IS DEAD +4852-28319-0007-1509: MISTER WICKER WAITED PATIENTLY BESIDE HIM FOR A FEW MOMENTS FOR CHRIS TO GET UP HIS COURAGE +2414-128291-0016-279: THOU KNOWEST IT THYSELF BETTER EVEN THAN I +5764-299665-0079-1883: WE NOW KNOW THAT OUR FIRST PARENTS WERE NOT FOREIGNERS +1998-29455-0033-204: SEE THAT BLOKE JUST NOW SAID MISTER (BEALE YUSS->BELE YES) SAID DICKIE +7105-2340-0036-2318: DO YOU MEAN TO SAY HE'S (A KLEPTOMANIAC->ACLEPTOMANIA) LIKE COUSIN SNATCHER +3997-180294-0016-1104: BUT WHEN THE MAN WHO INSPIRES THIS REDEEMING LOVE IS GREAT ENOUGH IN SOUL TO RECEIVE IT WITHOUT REMEMBERING THE PAST WHEN HE GIVES HIMSELF UP TO IT WHEN IN SHORT HE LOVES AS HE IS LOVED THIS MAN DRAINS AT ONE DRAUGHT ALL EARTHLY EMOTIONS AND AFTER SUCH A LOVE HIS HEART WILL BE CLOSED TO EVERY OTHER +4294-35475-0022-1313: THE (GRANDAME->GRAND DAME) WHOM HE SUPPLIED WITH (FAGOTS->FAGGOTS) THE MERCHANT WHOM HE RESCUED FROM ROBBERS THE KING'S (COUNCILLOR->COUNSELLOR) TO WHOM HE GAVE AID ALL BECAME HIS FRIENDS UP AND DOWN THE LAND (TO->*) BEGGAR OR LORD HOMELESS WANDERER OR HIGH BORN DAME HE GLADLY GAVE UNSELFISH SERVICE ALL UNSOUGHT AND SUCH AS HE HELPED STRAIGHTWAY BECAME HIS FRIENDS +3528-168656-0002-684: EVERY YEAR SHE SOLEMNLY RENEWED HER VOWS AND AT THE MOMENT OF TAKING THE OATH SHE SAID TO THE PRIEST MONSEIGNEUR SAINT FRANCOIS GAVE IT TO (MONSEIGNEUR->MONSEIGNOR) SAINT (JULIEN MONSEIGNEUR->JULIAN MONSEIGNOR) SAINT (JULIEN->JULIAN) GAVE IT TO MONSEIGNEUR SAINT (EUSEBIUS MONSEIGNEUR->CUPIUS A SIGNOR) SAINT (EUSEBIUS->JOSEPIUS) GAVE IT (TO MONSEIGNEUR->MONSEIGNOR) SAINT PROCOPIUS ET CETERA ET CETERA +1688-142285-0040-40: I'M BETTER IN NOT BEING TORN TO PIECES BY COUGHING (O'NIGHTS->O NIGHTS) BUT I'M WEARY AND TIRED O MILTON AND LONGING TO GET AWAY TO THE LAND (O BEULAH->OF BOOLA) AND WHEN I THINK I'M FARTHER AND FARTHER OFF MY HEART SINKS AND I'M NO BETTER I'M WORSE +6070-86744-0022-1943: TALKING OF COUNTRIES REPLIED FRANZ OF WHAT COUNTRY IS THE COUNT WHAT IS HIS NATIVE TONGUE WHENCE DOES HE DERIVE HIS IMMENSE FORTUNE AND WHAT WERE THOSE EVENTS OF HIS EARLY LIFE A LIFE AS MARVELLOUS AS UNKNOWN THAT HAVE TINCTURED HIS SUCCEEDING YEARS WITH SO DARK AND GLOOMY A MISANTHROPY +5442-41168-0002-1674: HE WAS SHOUTING FOR THE VERY COURSE SERGEY IVANOVITCH HAD PROPOSED BUT IT WAS EVIDENT THAT HE HATED HIM AND ALL HIS PARTY AND THIS FEELING OF HATRED SPREAD THROUGH THE WHOLE PARTY AND ROUSED IN OPPOSITION TO IT THE SAME VINDICTIVENESS THOUGH IN A MORE SEEMLY FORM ON THE OTHER SIDE +3331-159605-0029-638: SO WHEN SHE MADE HER LAST (BRILLIANT->BUOYANT) REMARK HE SAID QUIETLY WATCHING HER FACE KEENLY ALL THE WHILE I THOUGHT SO WELL (I M->I'M) GOING OUT OF TOWN ON BUSINESS FOR SEVERAL WEEKS SO YOU CAN ENJOY YOUR LITTLE BIT OF COUNTRY WITHOUT BEING ANNOYED BY ME ANNOYED +5484-24317-0001-1733: BUT (HERMON->HARE) WAS NOT IN THE MOOD TO SHARE A JOYOUS REVEL AND HE FRANKLY SAID SO ALTHOUGH IMMEDIATELY AFTER HIS RETURN HE HAD ACCEPTED THE INVITATION TO THE FESTIVAL WHICH THE WHOLE FELLOWSHIP OF ARTISTS WOULD GIVE THE FOLLOWING DAY IN (HONOUR->HONOR) OF THE SEVENTIETH BIRTHDAY OF THE OLD SCULPTOR (EUPHRANOR->EUPHRANER) +7975-280057-0014-2469: BUT SHE FAILED TO FIND THE COMFORT SHE SOUGHT FOR (ANNOYANCES->ANNOYANCE WAS) CONTINUED IN A MORE AGGRAVATED FORM +7975-280076-0025-2514: ABOUT THE LAST OF DECEMBER EIGHTEEN SEVENTY THREE I ARRIVED IN CARROLL PARISH LOUISIANA +3005-163399-0003-520: SO THEN SHE STARTED FOR THE HOUSE LEADING ME BY THE HAND AND THE CHILDREN TAGGING AFTER +3997-180297-0028-1150: LOOK HERE PRUDENCE DO YOU KNOW WHAT HE WANTS SAID MARGUERITE +3528-168669-0080-775: I HAVE ON MY RIGHT (BENOIT->BENOIS) AND ON MY LEFT BERNARD WHO WAS BERNARD +7975-280084-0011-2529: DOCTOR WHEELER WHO HAD GONE UPSTAIRS IN THE HOTEL SHOT MILLER AND HE LAY DYING IN THE STREET +3528-168669-0095-790: BY ORDER OF THE KING SIGNIFIES TO DAY BY ORDER OF THE REVOLUTION +8280-266249-0043-2844: LET ME GO YELLED WARD MAKING A DESPERATE EFFORT TO FREE HIS ARMS +3538-142836-0024-850: AT DESSERTS OR AT SOME EVENING PARTIES ICES ARE SCARCELY TO BE DISPENSED WITH +6938-70848-0027-2184: BUT THE PRESENT (MOVEMENT->MOMENT) IS INTERNATIONAL AND THAT IS WHY IT IS INVINCIBLE +7975-280076-0011-2500: I BROUGHT THE CATTLE TO KANSAS THAT FALL AND REMAINED IN SAINT CLAIR COUNTY UNTIL FEBRUARY +7902-96592-0019-2364: IT WAS (YOU->YOUR) FATHER KICKED FOR SHIRKING AND MY WELL I HARDLY KNOWED YOU +3764-168670-0040-1015: FAUCHELEVENT RECOILED AND CRACKED HIS FINGER JOINTS BUT THAT IS IMPOSSIBLE +6432-63723-0041-2140: GOOD EVENING COLONEL HE CALLED GENIALLY WILL YOU JOIN ME IN A WELSH RABBIT +367-293981-0001-955: I SAY SO CONTINUED DON QUIXOTE BECAUSE I HATE TAKING AWAY (ANYONE'S->ANY ONE'S) GOOD NAME +7902-96595-0010-2439: DON'T KNOW AS HE HAS BEEN SEEN ABOUT HERE DO YOU SAID (GURR->GIRL) LOOKING AT HER SEARCHINGLY NO SIR +8131-117016-0034-2589: I WANT THE NAME OF EVERY MAN IN THE GANG YOU CAN REMEMBER HE TOLD THE MAN +8131-117017-0003-2620: GORDON HIT THE SIGNAL SWITCH AND THE (MARSPEAKER->MAR SPEAKER) LET OUT A SHRILL WHISTLE +6128-63240-0019-1990: AND MISSUS (LUNA->LENA) ADDED THAT NOW SHE WAS BACK SHE DIDN'T KNOW WHAT SHE SHOULD DO +7975-280057-0000-2455: THESE HATREDS WERE SOON TO MAKE TROUBLE FOR ME OF WHICH I HAD NEVER DREAMED +8280-266249-0013-2814: WITH ALL MY HEART IF YOU WILL STEP INTO THE (GENTLEMEN'S->GENTLEMAN'S) CABIN WHERE THERE'S A LIGHT +2414-128292-0005-295: VERILY MY FOLLY HATH GROWN BIG IN THE MOUNTAINS +8188-269290-0053-2784: (I AM->I'M) AFRAID I AM REPLIED (LESLIE DISTRESS->LIZZIE DISTRESSED) NOW IN HER TONE +5484-24317-0033-1765: IT WAS NAY IT COULD HAVE BEEN NOTHING ELSE THAT VERY SPIDER +7975-280084-0012-2530: CHANGING HIS PISTOL TO HIS LEFT HAND BOB RAN OUT AND MOUNTED MILLER'S MARE +7975-280085-0008-2544: BUT THEY SOON AFTER GOT CLOSE ENOUGH SO THAT ONE OF THEM BROKE MY WALKING STICK WITH A SHOT +4852-28319-0008-1510: THEN AS NOTHING HAPPENED WITH A VOICE LIKE A WHIP MISTER WICKER SAID START AT ONCE +3764-168671-0027-1060: IT IS TRUE NEVERTHELESS THAT GRAVE DIGGERS DO DIE THEMSELVES +6070-86744-0023-1944: CERTAINLY THESE ARE QUESTIONS THAT IN YOUR PLACE I SHOULD LIKE TO HAVE ANSWERED +4852-28330-0010-1540: THE CAPTAIN SAT FORWARD IN HIS CHAIR LOOKING AT HIM FOR A LONG MOMENT CONSIDERING +533-131562-0008-1615: I DIDN'T SAY (I'D->I) BROKEN IT DID I RETURNED HE +367-130732-0020-940: SERVE IN A CHAFING DISH WITH TOASTED CRACKERS OR THIN SLICES OF TOASTED BREAD +8188-269290-0023-2754: IF YOU ARE IN LONDON DURING THE SUMMER YOU MUST COME AND PAY US A VISIT WILL YOU +8131-117029-0000-2650: THERE WAS A MAN COMING FROM EARTH ON A SECOND SHIP WHO WOULD SEE HIM +8461-258277-0007-2874: THEN HE SET OUT REJOICING TO RETURN TO THE (BARRACK->BARRA) OF THE FORTY +4198-12281-0006-1225: LET HIM BE CARRIED TO PRISON FOR TROUBLING THE DIVINE SERVICE +3528-168669-0060-755: I AM AT THE ORDERS OF THE VERY REVEREND (COMMUNITY->KUNITY) +3764-168671-0022-1055: THE MAN REPLIED THE GRAVE DIGGER +3764-168670-0020-995: ASKED JEAN VALJEAN FAUCHELEVENT REPLIED +7902-96594-0009-2405: CERTAINLY SIR SMUGGLERS ARE SMUGGLERS INDEED +7975-280076-0006-2495: THE PARTS OF THIS LETTER NOW RELEVANT ARE AS FOLLOWS +3528-168669-0016-711: THAT IS GOOD REVEREND MOTHER I WILL OPEN THE VAULT +7975-280057-0010-2465: (MISSUS->MISS) MC (CORKLE->CORAL) JUMPED FROM THE WINDOW OF THE HOUSE AND ESCAPED +8188-269290-0019-2750: OH I WON'T PRESS YOU REPLIED JANE +3764-168670-0021-996: WHAT COFFIN WHAT ADMINISTRATION +7902-96594-0024-2420: YES SIR SAID THE MAN HUMBLY SHALL I GO AT ONCE SIR +3528-168669-0061-756: THE FOUR MOTHER (PRECENTORS->PRESENTERS) WILL ASSIST YOU +1688-142285-0066-66: BUT THE GIRL ONLY PLEADED THE MORE WITH MARGARET +3005-163389-0016-456: AND THE HORSE A GOING LIKE A HOUSE AFIRE TOO +3005-163390-0026-485: IT DON'T SEEM NATURAL BUT I RECKON IT'S SO +8188-269290-0049-2780: IT WAS PAST TEN O'CLOCK WHEN SHE LEFT THE HALL +2414-128292-0016-306: THE DEVIL HIMSELF IS PERHAPS (SKIN->KIN) +4852-28319-0018-1520: CHRIS GOT UP AND STOLE BACK TO MISTER WICKER'S DOOR +3331-159605-0042-651: I JUST GAVE HIM A HINT AND HE TOOK IT +6432-63723-0036-2135: SO KING GOT BAIL WHO PUT IT UP +8188-269288-0017-2690: HERE IS A LETTER FOR YOU ANNIE CRIED LESLIE +3528-168669-0121-816: AFTER WHICH THERE WILL BE NO TRACE OF ANYTHING +3528-168669-0105-800: HAS THE DOCTOR FOR THE DEAD PAID HIS VISIT +1998-29454-0032-156: I SEE THAT THERE IN A BOOK SAID DICKIE CHARMED +8280-266249-0009-2810: HE CERTAINLY LOOKS LIKE A VERY NICE LITTLE BOY +8131-117016-0045-2600: BUT IT'S GOING TO BE TOUGH ON THEM +4852-28330-0005-1535: WE'VE WATER AND FRESH STORES TO TAKE ON THERE +2609-156975-0020-381: FROM ABOUT TWO THOUSAND B C +4294-9934-0022-1340: HE BOUGHT THE WATCH FOR FORTY FIVE FRANCS +8188-269288-0032-2705: WHAT CAN SHE BE DOING OUT BY HERSELF +5764-299665-0061-1865: RELIGION HAS NEVER MADE (MAN->MEN) FREE +6938-70848-0023-2180: THE (DUMAS->TUMICE) AND (ZEMSTVOS->THEM STOVES) WERE DROPPED +1998-29454-0017-141: THAT (AIN'T WHAT->AIRN'T BUT) SHE'LL BE IN WHEN (YOU->HE) GETS BACK +7902-96592-0015-2360: (RAM->GRIM) WAS THE FIRST TO RECOVER FROM HIS SURPRISE +3528-168669-0120-815: HOWEVER NEVER MIND I SHALL HAVE MY (LEVER->LOVER) +4198-12259-0030-1205: (HO->OH) THIS (WILL->WAS) BANG IT SOUNDLY +533-131564-0016-1640: HE FOLLOWED ME INTO THE LIBRARY +3005-163390-0011-470: GREENHORNS FLATHEADS +4198-12281-0001-1220: I BESEECH YOU THINK UPON IT +3528-168669-0075-770: BUT THE COMMISSARY OF POLICE +2414-128292-0001-291: MY SHADOW CALLETH ME +5484-24318-0024-1790: WHAT A TERRIBLE ORDEAL AGAIN AWAITS YOU +3528-168669-0045-740: SHE GAVE US HER LAST COMMANDS +6070-86745-0004-1955: VERY WELL AT HALF PAST TEN +7975-280063-0004-2480: COME IN COLONEL (HAYS->HAYES) EXCLAIMED COLONEL (COCKRELL->CONCRELL) +8188-269288-0047-2720: I AM SLEEPY I SHALL SLEEP +5764-299665-0076-1880: EVERY EVENT HAS PARENTS +367-130732-0000-920: LOBSTERS AND LOBSTERS +367-130732-0015-935: (GOBEY'S CRAB STEW->GOBIES CRABS DO) +3764-168670-0006-981: THAT'S WHERE THE DIFFICULTY LIES +6432-63723-0051-2150: WHY POLONIUS SOME ONE (ASKED->ASKS) +3528-168669-0106-801: HE WILL PAY IT AT FOUR O'CLOCK TO DAY +8131-117016-0000-2555: CAPTAIN (MURDOCH->MURDOCK) +5764-299665-0031-1835: IT IS A RESULT +3005-163390-0012-471: NO I SAYS IT DON'T +2414-159411-0027-350: (WHERE->THERE) WAS THE (TIGER->CHILD) THEN +3005-163399-0028-545: (IT'S->IS) TOM SAWYER +3528-168669-0015-710: I WILL PUT THE (LEVER->LOVER) THROUGH IT +3528-168669-0091-786: THEY PERSECUTE THE SAINTS +6128-63240-0014-1985: HAVE YOU BEEN IN EUROPE +4294-9934-0028-1346: MARIUS SENT BACK THE THIRTY LOUIS TO HIS AUNT WITH A RESPECTFUL LETTER IN WHICH HE STATED THAT HE HAD SUFFICIENT MEANS OF SUBSISTENCE AND THAT HE SHOULD BE ABLE THENCEFORTH TO SUPPLY ALL HIS NEEDS +7105-2330-0005-2245: HENCE THE ANXIETY IN THE CROWDED COURT AND IN THE LITTLE GROUPS GATHERED ROUND THE TAPE MACHINES IN WHITEHALL AND (DOWNING->DAWNING) STREET AND OTHER AFFECTED CENTRES +8188-269290-0054-2785: I MUST SEE HER MYSELF EARLY IN THE MORNING AND I AM QUITE SURE THAT NOTHING WILL SATISFY (MISS LAUDERDALE->MISSUS LARDADAE) EXCEPT A VERY AMPLE APOLOGY AND A FULL EXPLANATION OF THE REASON WHY SHE ABSENTED HERSELF +3005-163391-0015-505: MARY JANE'S NINETEEN SUSAN'S FIFTEEN (AND->AN) JOANNA'S ABOUT (FOURTEENTHAT'S->FOURTEEN THAT'S) THE ONE THAT GIVES HERSELF TO GOOD WORKS (AND->AN) HAS A (HARE->HAIR) LIP POOR THINGS +2414-128291-0017-280: THUS SPAKE THE (PEACEFUL->B YOUTHFUL) ONE AND PUFFED HIMSELF AND PERSPIRED WITH HIS WORDS SO (THAT->IN) THE (KINE WONDERED->KIND WANDERED) ANEW +4294-9934-0027-1345: ONE MORNING ON HIS RETURN FROM THE LAW SCHOOL MARIUS FOUND A LETTER FROM HIS AUNT AND THE SIXTY PISTOLES THAT IS TO SAY SIX HUNDRED FRANCS IN GOLD IN A SEALED BOX +2033-164914-0010-221: BY ALLAH AN THOU FETCH HIM NOT TO ME I WILL ASSUREDLY ROUSE THE CHAMBERLAIN ON THEE AND HE SHALL BEAT THEE AND CAST THEE OUT +2033-164915-0002-236: WHEN THE EUNUCH SAW THIS CASE HE WONDERED AT THEM AND THROWING OVER THEM SOMEWHAT TO COVER THEM WAITED TILL THEY SHOULD RECOVER +3080-5032-0002-550: HER CONVERSATION HAS MORE CHARMS THAN CAN BE IN MERE BEAUTY AND (HER->A) HUMOUR AND DISPOSITION WOULD MAKE A DEFORMED PERSON APPEAR LOVELY +3005-163389-0006-446: SO THEY ALWAYS ACQUIT AND THEN A MAN GOES IN THE NIGHT WITH A HUNDRED (MASKED->MASTED) COWARDS AT HIS BACK AND LYNCHES THE RASCAL +5442-41168-0003-1675: SHOUTS WERE RAISED AND FOR A MOMENT ALL WAS CONFUSION SO THAT THE MARSHAL OF THE PROVINCE HAD TO CALL FOR ORDER A BALLOT +4852-28319-0023-1525: MISTER WICKER BEGAN MOVING ABOUT UPSTAIRS THE (FLOORBOARDS->FLOOR BOARDS) CREAKED (*->CREAKERY) AND STILL CHRIS COULD NOT LEAVE UNTIL THE (POTION->FOCCETION) FUMED AND GLOWED +3538-142836-0010-836: THE REASON WHY THE FRUIT IS EMPTIED OUT OF THE PRESERVING PAN INTO AN EARTHEN PAN IS THAT THE ACID OF THE FRUIT ACTS UPON THE COPPER OF WHICH THE PRESERVING PANS ARE USUALLY MADE +3080-5040-0006-581: DO NOT TAKE IT ILL FOR I WOULD ENDURE IT IF I COULD RATHER THAN FAIL BUT IN EARNEST I DO NOT THINK IT WERE POSSIBLE FOR ME +8461-281231-0035-2935: THE ASSURANCE THAT SHE POSSESSED SOME FRIEND IN THIS AWFUL ASSEMBLY GAVE HER COURAGE TO LOOK (AROUND->ROUND) AND TO MARK INTO WHOSE PRESENCE SHE HAD BEEN CONDUCTED +3528-168669-0050-745: I HAVE CONSULTED UPON THIS POINT MANY ECCLESIASTICS (LABORING->LABOURING) IN OUR LORD WHO OCCUPY THEMSELVES IN THE EXERCISES OF THE CLERICAL LIFE AND WHO BEAR WONDERFUL FRUIT +7902-96591-0015-2335: TO DO THIS HE MUST SCHEME LIE HID TILL MORNING THEN MAKE FOR THE NEAREST POINT AND SIGNAL FOR HELP UNLESS (A->THE) BOAT'S CREW WERE ALREADY SEARCHING FOR HIM HOW TO ESCAPE +7975-280076-0027-2516: I HAD NOT HEARD OF THAT WHEN I WROTE THE LETTER OF EIGHTEEN SEVENTY FOUR AND TO CORRECT ANY MISAPPREHENSION THAT MIGHT BE CREATED BY OMITTING IT I WILL SAY THAT AT (THAT->THE) TIME I WAS AT (NEOSHO->NEOTIO) KANSAS WITH A DROVE OF CATTLE WHICH I SOLD TO MAJOR RAY +5484-24318-0001-1767: TO OFFER RESISTANCE WOULD HAVE BEEN MADNESS FOR EVEN (HERMON->HERMONN) PERCEIVED BY THE LOUD CLANKING OF WEAPONS AROUND THEM THE GREATLY SUPERIOR POWER OF THE ENEMY AND THEY WERE ACTING BY THE ORDERS OF THE KING TO THE PRISON NEAR THE PLACE OF EXECUTION +8280-266249-0015-2816: ONE DROP OF NICOTINE (EXTRACT OF->EXTRACTED) TOBACCO PLACED ON THE TONGUE OF A DOG WILL KILL HIM IN A MINUTE THE HUNDREDTH PART OF A GRAIN (PICKED->PRICKED) UNDER THE SKIN OF A MAN'S ARM WILL PRODUCE NAUSEA AND FAINTING +2414-159411-0005-328: SO THE BRAHMAN AND THE TIGER WALKED ON TILL THEY CAME TO A (BANYAN->BANDON) TREE AND THE BRAHMAN SAID TO IT (BANYAN->BANNON) TREE (BANYAN->BANNON) TREE HEAR AND GIVE (JUDGMENT->JURGMENT) +3764-168670-0028-1003: IT IS A CHAMBER ON THE GROUND FLOOR WHICH HAS A GRATED WINDOW OPENING ON THE GARDEN WHICH IS CLOSED ON THE OUTSIDE BY A SHUTTER AND TWO DOORS ONE LEADS INTO THE CONVENT THE OTHER INTO THE CHURCH WHAT CHURCH +3080-5040-0022-597: BUT I AM TROUBLED MUCH YOU SHOULD MAKE SO ILL A JOURNEY TO SO LITTLE PURPOSE INDEED I (WRIT->WROTE) BY THE FIRST POST AFTER MY ARRIVAL HERE AND CANNOT IMAGINE HOW YOU CAME TO MISS OF MY LETTERS +3080-5040-0008-583: MY AUNT TOLD ME NO LONGER AGONE THAN YESTERDAY THAT I WAS THE MOST WILFUL WOMAN THAT EVER SHE KNEW AND HAD AN OBSTINACY OF SPIRIT NOTHING COULD OVERCOME TAKE HEED +367-293981-0018-972: THE CRIES OF THE POOR (BLANKETED->BLANDED) WRETCH WERE SO LOUD THAT THEY REACHED THE EARS OF HIS MASTER WHO HALTING TO LISTEN ATTENTIVELY WAS PERSUADED THAT SOME NEW ADVENTURE WAS COMING UNTIL HE CLEARLY PERCEIVED THAT IT WAS HIS SQUIRE WHO UTTERED THEM +4852-28311-0009-1452: HE WOULD HAVE LIKED TO GET THE JOB FOR (JAKEY->JAKIE) WHO NEEDED IT BUT SOMEHOW THE TASK OF FACING MISTER WICKER ESPECIALLY NOW THAT THE LIGHT WAS GOING AND DUSK (EDGING->EDGED) INTO THE STREETS WAS NOT WHAT CHRIS HAD INTENDED FOR ENDING THE AFTERNOON +5764-299665-0008-1812: IS THIS GOD RESPONSIBLE FOR RELIGIOUS PERSECUTION FOR THE INQUISITION FOR THE THUMB SCREW AND (RACK->RAG) AND FOR ALL THE INSTRUMENTS OF TORTURE +7975-280057-0002-2457: MY MOTHER WHO (WAS BURSHEBA FRISTOE->WASURCEBAH FOR STOW) OF INDEPENDENCE WAS (THE->A) DAUGHTER OF RICHARD (FRISTOE->FRISTOW) WHO FOUGHT UNDER GENERAL ANDREW JACKSON AT NEW ORLEANS JACKSON COUNTY HAVING BEEN SO NAMED (AT->AND) MY GRANDFATHER (FRISTOE'S->FORSTOW'S) INSISTENCE +6938-70848-0015-2172: UPON MY RETURN I VISITED (SMOLNY->SMOLNEY) NO SUCH ACCUSATION WAS MADE AGAINST ME THERE AFTER A BRIEF CONVERSATION I LEFT AND THAT'S ALL LET ANY ONE PRESENT MAKE SUCH AN ACCUSATION +7105-2340-0024-2306: IT'S AN UNPLEASANT THING TO HAVE TO SAY HE BLURTED OUT PRESENTLY BUT I'M AFRAID YOU MUST HAVE A THIEF AMONG YOUR SERVANTS SOMETHING'S BEEN TAKEN OUT OF MY PORTMANTEAU +4852-28319-0011-1513: HE THOUGHT NOT WITHOUT A FEELING OF PRIDE AND COMMENCED EXPERIMENTING WITH HIS TAIL AND FINS WITH SUCH ENTHUSIASM AND DELIGHT THAT SOME LITTLE TIME ELAPSED BEFORE MISTER WICKER'S VOICE BOOMED CLOSE BY +4198-61336-0006-1241: DESPITE THE BLOW DEALT AGAINST (URARTU->URTU) ASSYRIA DID NOT IMMEDIATELY REGAIN POSSESSION OF NORTH SYRIA +2414-159411-0033-356: WHEN (THE->A) GOOD BRAHMAN OPENED YOUR (CAGE->CASE) DOOR IS TO EAT HIM THE ONLY RETURN YOU (WOULD->WILL) MAKE +6128-63244-0007-2020: SHE WISHED TO WORK IN ANOTHER FIELD SHE HAD LONG BEEN PREOCCUPIED WITH THE ROMANCE OF THE PEOPLE +4294-14317-0005-1271: I ASK NO FURTHER REWARD FOR MY LABOURS UP TO THIS TIME THAN THE GRACIOUS FAVOUR OF YOUR MOST ILLUSTRIOUS EXCELLENCY +5764-299665-0037-1841: THE GREATEST OF HUMAN BEINGS (HAS->HAD) SAID CONSCIENCE IS BORN OF LOVE +7105-2340-0023-2305: WILFRID WAS LATE IN COMING DOWN TO BREAKFAST AND HIS MANNER SHOWED PLAINLY THAT SOMETHING WAS AMISS +2414-128292-0006-296: NOW DO I HEAR SIX OLD (FOOLS->FOOL'S) LEGS RATTLING BEHIND ONE ANOTHER +4198-61336-0021-1256: (MENAHEM->MANAHIM) KING OF ISRAEL HAD DIED AND WAS SUCCEEDED BY HIS SON PEKAHIAH +3538-163624-0003-896: SO HE ASKED THE QUEEN HOW DO YOU KNOW IN THE DARK OF NIGHT WHETHER THE HOURS ARE WEARING TO THE MORNING AND SHE SAID +533-131564-0007-1631: NEVER MIND MY PLAIN SPEAKING SAID I IT IS FROM THE BEST OF MOTIVES +6128-63244-0008-2021: THIS MIGHT SEEM ONE OF THE MOST ACCESSIBLE OF PLEASURES BUT IN POINT OF FACT SHE HAD NOT FOUND IT SO +8131-117016-0005-2560: THEY WERE SAFE FROM PROTECTION RACKETEERS THERE NONE BOTHERED TO COME SO FAR OUT +5484-24318-0030-1796: BESIDES HE KNEW THAT THE OBJECT OF HIS LOVE WOULD NOT PART FROM HIM WITHOUT GRANTING HIM ONE LAST WORD +3331-159605-0002-611: (I VE->I'VE) TRIED IT AND LIKED IT AND MAYBE THIS IS THE CONSEQUENCE OF THAT NIGHT'S FUN +4294-35475-0024-1315: AMONG THOSE WHO DREW BACK WERE ETHELRIED'S BROTHERS THE THREE THAT WERE DARK AND THE THREE THAT WERE FAIR +7902-96594-0030-2426: NOW MISTER GURR HE SAID I'M ONLY GOING TO SAY ONE THING TO YOU IN THE WAY OF INSTRUCTIONS YES SIR +3538-163624-0018-911: THERE IS SIGURD ROASTING FAFNIR'S HEART FOR ANOTHER WHEN HE SHOULD TASTE OF IT HIMSELF AND LEARN ALL WISDOM +5484-24317-0019-1751: LET US HOPE THAT YOU WILL MAKE THIS THREE LEAVED CLOVER THE LUCK PROMISING FOUR LEAVED ONE +7902-96591-0016-2336: THE WINDOW WAS BARRED BUT HE WENT TO IT AND TRIED THE BARS ONE BY ONE TO FIND THEM ALL SOLIDLY FITTED INTO THE STONE SILL +7975-280085-0009-2545: WE WERE IN SIGHT OF OUR LONG (SOUGHT->SAWED) HORSES WHEN THEY CUT US OFF FROM THE ANIMALS AND OUR LAST HOPE WAS GONE +6070-86745-0010-1961: YES HE HAS NOT MUCH TO COMPLAIN OF BOURGES IS THE CAPITAL OF CHARLES (SEVEN->THE SEVENTH) +4350-10919-0028-1376: NERVOUS IRRITABILITY HE SAID TO THE PRINCESS WHEN KITTY HAD LEFT THE ROOM HOWEVER I HAD FINISHED +5764-299665-0022-1826: OUGHT THE SUPERIOR (RACES->RACE) TO THANK GOD THAT THEY ARE NOT THE INFERIOR +1998-29455-0035-206: IF ANY ONE (ARSTS->ASKS) YOU IF YOU EVER SEE (IM->HIM) YOU NEVER SET EYES ON (IM->HIM) IN ALL (YOUR->YOU'RE) BORN NOT TO REMEMBER (IM->HIM) +3528-168669-0096-791: ONE NO LONGER KNOWS WHAT IS DUE TO THE LIVING OR TO THE DEAD A HOLY DEATH IS PROHIBITED +2609-156975-0025-386: THE STORY OF MOSES IS IN MANY WAYS CLOSELY PARALLEL TO THAT OF (SINUHIT->SINEWET) +8188-269288-0022-2695: LESLIE SEATED HERSELF WITH HER BACK TO HER COMPANION AND OPENED HER OWN LETTERS +6070-86744-0010-1931: FOR FRANCE NO FOR VENICE I SHALL REMAIN IN ITALY FOR ANOTHER YEAR OR TWO +4852-28311-0007-1450: CHRIS ASKED AND FOR THE FIRST TIME THAT DAY THE HEAVY WEIGHT HE CARRIED WITHIN HIM LIFTED AND LIGHTENED A LITTLE +2609-157645-0001-401: THE PROHIBITION IN THE REGULATION (QUOTED->QUOTE) OF SMOKING IN SAINT MARY'S CHURCH REFERRED IT MAY BE NOTED TO THE ACT WHICH WAS HELD THEREIN +6128-63240-0005-1976: IN SPITE OF THIS DECORATION THE YOUNG MAN LOOKED POOR AS POOR AS A YOUNG MAN COULD LOOK WHO HAD SUCH A FINE HEAD AND SUCH MAGNIFICENT EYES +7105-2330-0006-2246: THE JURY RETURNED FROM CONSIDERING THEIR VERDICT THERE WAS A FLUTTER AN EXCITED MURMUR A (DEATHLIKE->DEATH LIKE) HUSH +3005-163399-0004-521: WHEN WE GOT THERE SHE SET ME DOWN IN A SPLIT (BOTTOMED->BOTTOM) CHAIR AND SET HERSELF DOWN ON A LITTLE LOW STOOL IN FRONT OF ME HOLDING BOTH OF MY HANDS AND SAYS +7975-280085-0010-2546: SIX (STEPPED->STEPS) TO THE FRONT SHERIFF (GLISPIN->LISPON) COLONEL T (L VOUGHT->O VAULT) B M RICE G A BRADFORD C A (POMEROY->POLMROY) AND S J (SEVERSON->CEVERSON) +533-1066-0015-1571: ONCE ONLY SOMEBODY SPOKE AND THEN IT WAS AN EMPHATIC BIT OF PROFANITY FROM DOCTOR STEWART WHEN HE RAN INTO A WIRE FENCE +3331-159605-0017-626: A FOOLISH LITTLE SPEECH TO MAKE TO A (DOG->DARK) BUT YOU SEE POLLY WAS ONLY A TENDER HEARTED GIRL TRYING TO DO HER DUTY +533-1066-0001-1557: I KNEW WELL ENOUGH THAT HE MIGHT BE CARRIED THOUSANDS OF MILES IN THE BOX CAR LOCKED IN PERHAPS WITHOUT WATER OR FOOD +2033-164914-0011-222: BUT TAKE THESE HUNDRED (DINERS->DINARS) AND GIVE THEM TO THE SINGER AND BRING HIM TO ME GENTLY AND DO HIM NO HURT +1688-142285-0057-57: MARGARET BENT OVER AND SAID BESSY DON'T BE IMPATIENT WITH YOUR LIFE WHATEVER IT IS OR MAY HAVE BEEN +2414-159411-0018-341: AFTER THIS THEY SAW AN ALLIGATOR AND THE BRAHMAN RELATED THE MATTER TO HIM HOPING FOR A MORE (FAVORABLE->FAVOURABLE) VERDICT +3538-163619-0014-867: NO ONE CAN TELL HOW DELIGHTED THE KING WAS TO GET RID OF THAT HIDEOUS BUSHY BRIDE AND GET A QUEEN WHO WAS BRIGHT AND BEAUTIFUL AS DAY ITSELF +2609-156975-0010-371: HERE WERE FOUND SEVERAL INSCRIPTIONS BEARING THE EGYPTIAN NAME OF THE CITY (P ATUM->PATUM) HOUSE OF THE GOD (ATUM->ATOM) +7975-280057-0016-2471: I HAVE ALWAYS FELT THAT THE EXPOSURE TO WHICH SHE WAS SUBJECTED ON THIS CRUEL JOURNEY TOO HARD EVEN FOR A MAN TO TAKE WAS THE DIRECT CAUSE OF HER DEATH +2414-128291-0018-281: THOU DOEST (VIOLENCE->VICE) TO THYSELF THOU PREACHER (ON->OF) THE MOUNT (WHEN->AND) THOU USEST SUCH SEVERE WORDS +2414-159411-0019-342: BUT THE ALLIGATOR SAID WHENEVER I PUT MY NOSE OUT OF THE WATER MEN TORMENT ME AND TRY TO KILL ME +2609-156975-0026-387: THE (PRIEST->PRIESTS) OF THE SUB TRIBE OF THE (KENITES->CANAITES) RECEIVED HIM INTO HIS HOME AND GAVE HIM HIS DAUGHTER IN MARRIAGE +367-293981-0002-956: I SAY REPLIED SANCHO THAT I SWEAR TO HOLD MY TONGUE ABOUT IT TILL THE END OF YOUR (WORSHIP'S DAYS->WORSHIP STAYS) AND (GOD GRANT->GON GRAT) I MAY BE ABLE TO LET IT OUT (TOMORROW->TO MORROW) +5484-24317-0004-1736: THE BANQUET WAS TO BEGIN IN A FEW HOURS YET HE COULD NOT LET THE DAY PASS WITHOUT SEEING DAPHNE AND TELLING HER THE WORDS OF THE ORACLE +6432-63722-0027-2066: MAYBE THE FIGHT WAS ABOUT WHO OWNED THE WATCH FOR THE (DAGOS->DAGOES) TALKED IN THEIR FOREIGN LINGO AND NONE OF THE NEIGHBORS COULD TELL WHAT THEY WERE (SAYIN->SAYING) I SEE +7975-280057-0001-2456: HENRY WASHINGTON YOUNGER MY FATHER REPRESENTED JACKSON COUNTY THREE TIMES IN THE LEGISLATURE AND WAS ALSO (*->A) JUDGE OF THE COUNTY COURT +3528-168656-0000-682: SHE HAD EVEN BEEN IN SOCIETY BEFORE THE REVOLUTION +4852-28311-0018-1461: THE AIR WAS GROWING CHILL (AND->WHEN) CHRIS DECIDED TO FINISH (HIS->THE) JOB +2414-128291-0014-277: WHY DOST THOU TEMPT ME +3080-5032-0014-562: BUT BESIDES I CAN GIVE YOU OTHERS +5442-41169-0016-1716: WELL AND (HOW IS->HOW'S) YOUR LAND DOING +4294-35475-0020-1311: WHAT IS TO BECOME OF ME CRIED THE POOR PEASANT +3764-168671-0024-1057: YOU I +5442-41169-0031-1731: WITHOUT A RETURN TOO AT A SIMPLE LOSS +2414-128292-0017-307: SOMETIMES I MEANT TO LIE AND BEHOLD +3764-168671-0038-1071: (*->BUT) SO FATHER MESTIENNE IS DEAD +8188-269288-0003-2676: SHE HAD NO IDEA OF ALLOWING HERSELF TO BREAK DOWN +7975-280084-0008-2526: IF ANY OF OUR PARTY SHOT HIM IT MUST HAVE BEEN WOODS +8280-266249-0055-2856: WHAT CAN IT HAVE BEEN THEY ASKED EACH OTHER +4198-12259-0002-1177: A CESSATION AND (TRUCE->TRUTH) WITH THIRST +7902-96591-0011-2331: I'LL SOON SHOW THEM THAT I AM NOT GOING TO BE PLAYED WITH +3764-168671-0039-1072: THE MAN REPLIED COMPLETELY +3997-180294-0013-1101: THEY KNOW NOT WHAT PROOF TO GIVE +7975-280057-0011-2466: AS THE RAIDERS LEFT ONE OF THEM SHOUTED +7902-96594-0010-2406: BEG PARDON SIR DIDN'T MEAN ANY HARM +6432-63723-0022-2121: LARCH HIMSELF WAS A PECULIAR CHARACTER +8280-266249-0010-2811: SUPPOSE YOU AND HE SHAKE HANDS FRANK +5442-32873-0004-1656: OH FRIGHTFUL FRIGHTFUL +3528-168669-0047-742: THERE WAS SOMETHING OF PARADISE IN THAT DEATH +7902-96595-0007-2436: I DO ASSURE YOU THERE'S NOTHING HERE BUT WHAT YOU MAY SEE +3528-168669-0017-712: WILL THAT BE ALL NO +4198-12259-0031-1206: BUT (THIS->THEY) SHALL BANISH IT UTTERLY +3764-168670-0037-1012: ABOUT THREE O'CLOCK IN THE AFTERNOON +6128-63240-0015-1986: NO (I->*) HAVEN'T BEEN ANYWHERE +3528-168669-0077-772: THE WORLD IS NOTHING IN THE PRESENCE OF THE CROSS +7902-96595-0022-2451: SURELY CRIED SIR RISDON EXCITEDLY +6432-63722-0007-2046: ARE YOU GOING TO WORK ON THAT CASE COLONEL +2414-128292-0002-292: WHAT MATTER ABOUT MY SHADOW +4350-9170-0005-1387: THE BASIS OF AUTHORITY IS BODILY VIOLENCE +7975-280076-0007-2496: YOU MAY USE THIS LETTER IN YOUR OWN WAY +2609-169640-0012-427: A BREATHLESS STILLNESS SUCCEEDED +8188-269290-0005-2736: IT BURNED AS IF WITH FEVER +8131-117016-0016-2571: TWO YEARS GORDON ADMITTED +4852-28311-0003-1446: O K HE SAID +6128-63244-0018-2031: THE UNHAPPINESS OF WOMEN +7975-280076-0022-2511: DOCTOR L LEWIS WAS HIS PHYSICIAN +3528-168669-0062-757: NO IN LOWERING THE COFFIN +3538-163622-0024-892: I HAVE DONE MY BEST REPLIED (CINDERLAD->SIN LAD) +6432-63723-0037-2136: IT WAS (HIGH->TIME) LARCH +4198-12259-0016-1191: I DRINK NO MORE THAN (A SPONGE->THE SPONNES) +6070-86745-0005-1956: IS THE COUNTESS UP YET +4852-28319-0004-1506: WHAT (SHALL->SHOULD ALL) I DO FIRST +4198-12259-0017-1192: I DRINK LIKE A (TEMPLAR KNIGHT->TEMPT LAW NIGHT) +5442-41168-0014-1686: SERGEY IVANOVITCH FROWNED +533-131556-0015-1596: YES YES +5484-24318-0010-1776: YET IT WAS NO ILLUSION THAT DECEIVED HIM +3528-168669-0122-817: THE GOVERNMENT WILL HAVE NO SUSPICION +7902-96592-0016-2361: HULLO HE SAID WHO ARE YOU +1688-142285-0037-37: WELL BESSY HOW ARE YOU +3528-168669-0002-697: FATHER FAUVENT +3080-5040-0017-592: BUT I AM CALLED UPON +7902-96592-0046-2391: (YOU ROPE'S->YOUR ROPES) END ME HE SAID +7902-96592-0001-2346: I'M GOING HOME TO BREAKFAST +7902-96594-0025-2421: NO WAIT +3764-168671-0054-1087: ONE MUST EAT +6128-63240-0016-1987: SHE HATES IT SHE WOULD LIKE TO ABOLISH IT +8188-269288-0049-2722: CAN'T YOU MANAGE WITH A CANDLE JUST FOR ONCE +3528-168669-0093-788: THE MOST FEROCIOUS BEASTS ARE BEASTS WHICH ARE BLIND +5764-299665-0018-1822: CAN WE SAY THAT HE CARED FOR THE CHILDREN OF MEN +7105-2330-0032-2272: A QUARTER PAST TEN HALF PAST +4852-28319-0006-1508: SUPPOSE I CHANGE AND CAN'T CHANGE BACK +7902-96592-0017-2362: GO ROUND AND OPEN THE DOOR I WAS SHUT IN LAST NIGHT BY MISTAKE +4350-10919-0010-1358: BEG PARDON (*->IT) IS THE (YAUSKY->YOZKI) BRIDGE DONE YET OR SHALL I HAVE TO DRIVE (AROUND->ROUND) +2414-159411-0030-353: SHUT AND BOLTED SAID THE BRAHMAN +3538-163622-0010-878: ON THE THIRD DAY (CINDERLAD->SAID THE LAD) WANTED TO SET OUT +3997-180297-0026-1148: HERE IT IS I HAVE BROUGHT IT BACK +533-131562-0006-1613: MISTER HUNTINGDON THEN WENT (UP STAIRS->UPSTAIRS) +6432-63723-0039-2138: BUT (HIS ARE->HE HAS A) PRETTY UNCERTAIN SHOES TO BE IN JUST THE SAME +1998-29454-0019-143: I WOULDN'T GO (OME->HOME) NOT IF I WAS YOU SAID THE MAN +4350-10919-0025-1373: EXCUSE ME DOCTOR BUT THERE IS REALLY NO OBJECT IN THIS +3764-168671-0010-1043: IT IS ONE OF THE FAULTS WHICH RESEMBLE A DUTY +6432-63723-0023-2122: IN A SMALLER PLACE HE WOULD HAVE BEEN CALLED A SALOON KEEPER +8188-269290-0006-2737: YOU DON'T KNOW WHAT A TRIAL IT IS FOR ME TO HAVE YOU HERE +3005-163389-0018-458: WHY IT WAS ONE OF HIS OWN MEN +4198-12259-0018-1193: (AND->AN) I (TANQUAM SPONSUS->TAM CORRESPONSES) +5442-41169-0017-1717: BUT ONE'S WORK IS THROWN IN FOR NOTHING +7105-2340-0006-2288: AND THE REPUTATION WAS AN UNPLEASANT ONE +4294-9934-0010-1328: THEY SOON ELBOWED HIM ABRUPTLY +3528-168669-0108-803: BUT YOU DO NOT UNDERSTAND ANY OF (THE PEALS->APPEALS) +8131-117016-0017-2572: FOR A SECOND GORDON CURSED HIMSELF +2414-128291-0000-263: WHAT HATH HAPPENED UNTO ME +3528-168669-0018-713: GIVE ME YOUR ORDERS VERY REVEREND MOTHER +6432-63722-0039-2078: ASKED THE SURPRISED DETECTIVE YES +8188-269288-0004-2677: WHAT DO YOU MEAN REPLIED LESLIE +4852-28311-0004-1447: ONLY WHY DIDN'T YOU ASK HIM YOURSELF +3997-182399-0009-1163: EVERYBODY LOOKED AT PETER AND LAUGHED +7902-96592-0018-2363: I SAW YOU LAST NIGHT AND WONDERED WHOSE BOY (YOU->HE) WAS +7902-96592-0002-2347: SHALL I COME TOO FATHER NO +3005-163390-0029-488: I LAY I MAKE YOU MINE +3997-180294-0030-1118: WHERE AT HOME +4852-28311-0020-1463: AT THE FOOT OF THE HILL HE REACHED THE HOUSE +3528-168669-0063-758: WHERE INTO THE VAULT +8131-117029-0012-2662: THERE WAS A GRIN ON THE OTHER'S FACE +3764-168670-0053-1028: I SHALL FOLLOW THAT IS MY BUSINESS +6432-63722-0009-2048: PERHAPS NOT ADMITTED COLONEL ASHLEY +4294-9934-0025-1343: I HAVE TEN FRANCS LEFT SAID MARIUS +3080-5040-0018-593: DIRECTED FOR YOUR MASTER +8280-266249-0056-2857: OH NONSENSE WHAT FOOLS WE ARE +7975-280084-0009-2527: MEANTIME THE STREET WAS GETTING UNCOMFORTABLY HOT +1688-142285-0023-23: HOWEVER OUT IT CAME +7105-2330-0017-2257: HE EXCLAIMED WON'T GO +8131-117017-0015-2632: WHATEVER COMES TO HAND (GOV'NOR->GUV'NOR) +1998-29454-0034-158: WILD ONES (AIN'T ALF->AND A HALF) THE SIZE I LAY +6938-70848-0026-2183: THE VILLAGES WILL SAVE US IN THE END +3528-168669-0033-728: HEY MORE OFTEN +6128-63240-0002-1973: SHE IS WILLING TO RISK THAT +6938-70848-0025-2182: HE SPOKE TO THE (RUMP->RUM) CONVENTION +6432-63723-0038-2137: THEY TOOK HARRY AWAY A WHILE AGO +7902-96595-0023-2452: SIR (RISDON->RICHMOND) WAS SILENT +5442-41169-0002-1702: HE'S SUCH A BLACKGUARD +533-131556-0016-1597: SUPPOSE I DO +2414-128291-0015-278: ANSWERED THE OTHER +6070-86745-0006-1957: THE VALET LEFT THE ROOM +4852-28312-0023-1493: THE WAREHOUSES WERE STILL THERE +3331-159609-0014-670: IT WAS SO TENDER EARNEST AND DEFIANT THAT FANNY FORGOT THE DEFENCE OF HER OWN LOVER IN ADMIRATION OF POLLY'S LOYALTY TO HERS FOR THIS FAITHFUL ALL ABSORBING LOVE WAS A NEW REVELATION TO FANNY WHO WAS USED TO HEARING HER FRIENDS BOAST OF TWO OR THREE LOVERS A YEAR AND CALCULATE THEIR RESPECTIVE VALUES WITH ALMOST AS MUCH COOLNESS AS THE YOUNG MEN DISCUSSED THE FORTUNES OF THE GIRLS THEY WISHED FOR BUT COULD NOT AFFORD TO MARRY +2033-164915-0017-251: AND AMONGST THEM WERE SOME WHO WOULD HAVE CHOSEN THE CADET ZAU AL MAKAN FOR QUOTH THEY HIS NAME BE LIGHT OF THE PLACE AND HE HATH A SISTER NUZHAT AL ZAMAN HIGHS THE DELIGHT OF THE TIME BUT THEY SET OUT FIVE YEARS AGO FOR (AL HIJAZ->ALHIJAZ) AND NONE WOTTETH WHAT IS BECOME OF THEM +3997-180297-0013-1135: WHEN I THOUGHT THAT THIS MARVELLOUS CREATURE WHOSE FEET I HAD ONCE LONGED TO KISS WAS WILLING TO LET ME TAKE MY PLACE IN HER THOUGHTS MY PART IN HER LIFE AND THAT I WAS NOT YET CONTENT WITH WHAT SHE GAVE ME I ASKED IF MAN'S DESIRE (HAS->HAD) INDEED LIMITS WHEN SATISFIED AS PROMPTLY AS MINE HAD BEEN IT REACHED AFTER SOMETHING FURTHER +6128-63241-0006-2005: THE STATE OF MISSISSIPPI SEEMED TO HIM THE STATE OF DESPAIR SO HE SURRENDERED THE REMNANTS OF HIS PATRIMONY TO HIS MOTHER AND SISTERS AND AT NEARLY THIRTY YEARS OF AGE ALIGHTED FOR THE FIRST TIME IN NEW YORK IN THE COSTUME OF HIS PROVINCE WITH FIFTY DOLLARS IN HIS POCKET AND A GNAWING HUNGER IN HIS HEART +2414-128291-0002-265: WHEN HOWEVER ZARATHUSTRA WAS QUITE NIGH UNTO THEM THEN DID HE HEAR PLAINLY (THAT A->*) HUMAN VOICE SPAKE IN THE MIDST OF THE KINE AND APPARENTLY ALL OF THEM HAD TURNED THEIR HEADS TOWARDS THE SPEAKER +7105-2340-0008-2290: THE SEARCH USUALLY PRODUCED A LARGE AND VARIED YIELD THIS IS FUNNY SAID PETER (PIGEONCOTE->PIGEONBUL) TO HIS WIFE SOME HALF HOUR AFTER THEIR CONVERSATION HERE'S A TELEGRAM FROM (WILFRID->WILFRED) SAYING (HE'S->HE IS) PASSING THROUGH HERE IN HIS MOTOR AND WOULD LIKE TO STOP AND PAY US HIS RESPECTS +8131-117016-0036-2591: (MURDOCH->MURDOCK) TOOK HIS NOD AS EVIDENCE ENOUGH AND TURNED TO THE WRETCHED TOUGHS +8131-117029-0015-2665: LET'S SAY YOU'VE SHIFTED SOME OF THE MISERY AROUND A BIT AND GIVEN THEM A CHANCE TO DO BETTER +3331-159605-0003-612: JUST SUPPOSE IT IS TRUE THAT HE DOES ASK ME AND I SAY YES +6070-86744-0025-1946: WHAT ARE HIS MEANS OF EXISTENCE WHAT IS HIS BIRTHPLACE OF WHAT COUNTRY IS HE A NATIVE +1998-29454-0037-161: THAT WAS CHARMING BUT IT WAS PLEASANT TOO TO WASH THE MUD OFF ON THE WET GRASS +3005-163390-0018-477: WELL HENRY HE TAKES A NOTION HE WANTS TO (GET->GIT) UP SOME TROUBLE WITH THIS COUNTRY +4350-9170-0054-1436: THE GOVERNMENT THEY TELL US WITH ITS ARMY IS NECESSARY TO DEFEND US FROM NEIGHBORING STATES WHO MIGHT ENSLAVE US +3764-168670-0056-1031: ONE OF TWO THINGS WILL HAPPEN HE WILL EITHER BE SOBER OR HE WILL NOT BE SOBER +4852-28330-0011-1541: THEN HE SAID WELL I DO NOT CARE FOR IT I CANNOT SAY (*->THAT) I DO +3997-182399-0012-1166: IT WAS (ON->IN) A (LIL OL->LITTLE OLD) HOUSE A (LIL OL->LITTLE OLD) TUMBLE DOWN HOUSE +3005-163391-0001-491: THE KING'S DUDS WAS ALL BLACK AND HE DID LOOK REAL SWELL AND STARCHY +3538-163624-0004-897: THEN THE OLD MAN SAID DRIVE ALL THE HORSES INTO THE RIVER AND CHOOSE THE ONE THAT SWIMS ACROSS +6432-63723-0027-2126: SHE ALSO SAW AN OPPORTUNITY OF PAYING OLD DEBTS AND REAPING SOME REVENGES +3764-168670-0011-986: JEAN VALJEAN STARED HIM STRAIGHT IN THE EYE AND THOUGHT THAT HE WAS RAVING +3538-142836-0011-837: FROM THIS EXAMPLE THE PROCESS OF PRESERVING FRUITS BY SYRUP (WILL->WOULD) BE EASILY COMPREHENDED +5764-299665-0038-1842: AS PEOPLE ADVANCE THE REMOTE CONSEQUENCES ARE PERCEIVED +5484-24318-0015-1781: TO MORROW YOU SHALL CONFESS TO ME WHO TREACHEROUSLY DIRECTED YOU TO THIS DANGEROUS PATH +5442-41168-0019-1691: THEN THERE WAS A LAUGH A (BUTTON->BOTTOM) AND TWO (NUTS->KNOTS) WERE FOUND (IN->ON) THE BOX +8131-117016-0051-2606: NO (YOU'RE->YOU ARE) A FIRSTER HE CAN'T LOSE +4852-28319-0010-1512: HIS HEAD SWAM AND HE FELT FAINT AND A LITTLE SICK BUT HE PERSISTED THROUGH THE FINAL WORDS +3528-168669-0036-731: REVEREND MOTHER I DID NOT SAY MORE OFTEN THAN WHAT I SAID MORE OFTEN +1998-15444-0021-117: ON THE EVAPORATION OF THE ALCOHOL THE (RESINOUS->VESINOUS) AND (FATTY MATTERS->FATIMATAS) SEPARATE +3538-163624-0019-912: THAT LET HIM DO (AND->*) THEN RIDE OVER (HINDFELL->HENFELD) TO THE PLACE WHERE (BRYNHILD->BRUNHILD) SLEEPS +3528-168669-0067-762: YOU WILL RAISE THE STONE WITH THE BAR BY MEANS OF THE RING BUT +3997-180297-0030-1152: ONE HAS TO BUT HE WANTS MORE THAN THAT WHAT THEN +4198-12259-0037-1212: O (LACHRYMA->LACHRYMAL) CHRISTI IT IS OF THE BEST GRAPE +3528-168656-0004-686: IT WAS A CENTURY WHICH SPOKE THROUGH HER BUT IT WAS THE EIGHTEENTH CENTURY +3764-168671-0013-1046: MAKE AS MANY (LAWS->NOISE) AS YOU PLEASE MEN BUT KEEP THEM FOR YOURSELVES +7902-96595-0012-2441: I SAID A LAD (BOUT->ABOUT) SEVENTEEN IN A RED CAP LIKE YOURS SAID GURR VERY SHORTLY +8188-269288-0008-2681: SAID ANNIE A FLASH OF LIGHT COMING INTO HER EYES AND THEN LEAVING THEM +8188-269290-0010-2741: WRAPPING A PRETTY BLUE SHAWL (ROUND->AROUND) HER HEAD AND SHOULDERS SHE TURNED TO ANNIE +3764-168670-0026-1001: THIS OFFERS THE MEANS (BUT->*) GIVE ME SOME INFORMATION IN THE FIRST PLACE +5764-299665-0082-1886: WE KNOW THE FOOTSTEPS OF ADVANCE THEY HAVE BEEN TRACED +3764-168670-0041-1016: BAH IMPOSSIBLE TO TAKE A HAMMER AND DRIVE SOME NAILS IN A PLANK +4852-28330-0012-1542: THIS SHIP IS MORE TO ME THAN WIFE OR MOTHER OR FAMILY +6938-70848-0016-2173: MEANWHILE THE QUESTION OF THE STATUS OF THE EXECUTIVE COMMITTEE WAS AGITATING ALL MINDS +533-1066-0017-1573: THE DOCTOR WAS PUFFING SOMEWHAT WHEN WE FINALLY CAME TO A HALT +6128-63240-0021-1992: BESIDES OLIVE DIDN'T WANT HER IN BOSTON AND DIDN'T GO THROUGH THE FORM OF SAYING SO +3764-168671-0015-1048: FAUCHELEVENT LIMPED ALONG BEHIND THE HEARSE IN A VERY CONTENTED FRAME OF MIND +5764-299665-0083-1887: FOR THOUSANDS OF YEARS MEN AND WOMEN HAVE BEEN TRYING TO REFORM THE WORLD +2609-169640-0003-418: I NEVER WAS IN A BETTER STEERING SHIP (MOST ESPECIALLY->MOSTLY SPECTREE) IN MODERATE WEATHER +7105-2340-0010-2292: I SUPPOSE HE'S BRINGING US A PRESENT FOR THE SILVER WEDDING GOOD GRACIOUS +7975-280084-0014-2532: AS TO THE REST OF THE AFFAIR INSIDE THE BANK I TAKE THE ACCOUNT OF A NORTHFIELD NARRATOR +3005-163399-0021-538: I CAN'T IMAGINE SAYS THE OLD GENTLEMAN AND I MUST SAY IT MAKES ME DREADFUL UNEASY +7105-2330-0008-2248: THE JURY FIND THE PRISONER GUILTY OF BLOWING UP THE ALBERT HALL +6432-63723-0043-2142: I'M AFRAID MY DIGESTION ISN'T QUITE UP TO THAT AS I'VE HAD TO CUT OUT MY FISHING OF LATE +4350-10919-0000-1348: HE PERCEIVED THAT IT WAS NO GOOD TALKING TO THE OLD MAN AND THAT THE PRINCIPAL PERSON IN THE HOUSE WAS THE MOTHER +7902-96592-0007-2352: BUT HOW QUEER FOR MISTER (GURR->GOURR) TO BE TALKING LIKE THAT TO ANDREW (TEAL->TEALE) THE BOY WHO HELPED THE COOK +3005-163389-0008-448: BUT A MOB WITHOUT ANY MAN AT THE HEAD OF IT IS BENEATH PITIFULNESS +7018-75788-0015-2203: (HAPLY->HAPPILY) AMONGST YOU IS ONE RIGHTEOUS WHOSE PRAYERS THE LORD WILL ACCEPT +8188-269288-0039-2712: I WALKED UP AND DOWN AS FAST AS EVER I COULD OUTSIDE IN ORDER TO MAKE MYSELF SLEEPY +5764-299665-0053-1857: THE RELIGION OF THE PURITAN WAS AN UNADULTERATED CURSE +7902-96591-0002-2322: AND AND YOU HAVE NOT FOUND OUT ANYTHING CAME IN QUICK FRIGHTENED TONES +8188-269290-0041-2772: THE NAMES OF (*->THE) PROPOSED MEMBERS ARE TO BE SUBMITTED TO ME BEFORE THIS DAY WEEK +5442-41169-0023-1723: HERE (YOU'VE->YOU HAVE) THOUSANDS OF LIMES AND EACH WOULD MAKE TWO GOOD BUNDLES OF BARK +4198-12259-0038-1213: (I'FAITH->I FAITH) PURE GREEK GREEK O THE FINE WHITE WINE +8131-117017-0020-2637: YOU DIDN'T PAY UP YOUR PLEDGE TO THE (CAMPAIGN->CAPTAIN) FUND SO I (HADDA->HAD A) FILL IN +3538-163619-0000-853: THERE WAS ONCE ON A TIME A WIDOWER WHO HAD A SON AND A DAUGHTER BY HIS FIRST WIFE +3528-168669-0083-778: ON ONE SIDE SAINT BERNARD ON THE OTHER THE AGENT OF THE SANITARY DEPARTMENT +8188-269288-0009-2682: BUT SHE ADDED ABRUPTLY YOU SPEAK OF SOMETHING WHICH MUST NOT TAKE PLACE +6432-63722-0058-2097: NO ALIMONY (REPEATED->REPLIED) THE COLONEL PUZZLED YES JUST THAT +6128-63244-0010-2023: OLIVE CHANCELLOR WONDERED HOW MISSUS (FARRINDER->VERNDER) WOULD TREAT THAT BRANCH OF THE QUESTION +2609-157645-0003-403: BLACKBURN ARCHBISHOP OF YORK WAS A GREAT SMOKER +7105-2340-0025-2307: IT WAS A LITTLE PRESENT FROM MY MOTHER AND MYSELF FOR YOUR SILVER WEDDING +8188-269290-0011-2742: LESLIE WAS JUST CLOSING THE DOOR BEHIND HER WHEN ANNIE CALLED AFTER HER +3764-168671-0030-1063: DO YOU KNOW WHO LITTLE FATHER (LENOIR->LE NOIR) IS HE IS A JUG OF RED WINE +3764-168670-0013-988: IT IS TO MORROW THAT I AM TO BRING YOU IN THE PRIORESS EXPECTS YOU +2609-156975-0027-388: NOTE THE (CHARACTERISTIC ORIENTAL IDEA->CHAAVERALISTIC ORIENT RE) OF (MARRIAGE->MARYS) +6128-63240-0022-1993: THAT WAS ONE COMFORT WITH (OLIVE->ALIVE) SHE NEVER WENT THROUGH ANY FORMS +8131-117016-0022-2577: THERE WAS NO CHANCE TO SAVE THE CITIZEN WHO WAS DYING FROM LACK OF AIR +7902-96594-0028-2424: AT LAST THE LITTLE LIEUTENANT COULD BEAR THE ANXIETY NO LONGER +4198-12259-0019-1194: AND I SICUT (TERRA SINE AQUA->TERIS INAQUA) +6938-70848-0012-2169: WHEREUPON THE OLD EXECUTIVE COMMITTEE LEFT THE HALL +8188-269288-0006-2679: I (WON'T->WOULD) BE THE CONSTANT WORRY AND PLAGUE OF YOUR LIFE +2414-159411-0031-354: THEN SHUT AND BOLT IT SAID THE JACKAL +4198-12281-0005-1224: LORD GOD (DA MIHI POTUM->DOMIHIPPOTEM) +5764-299665-0050-1854: WHAT DID CHRISTIANITY DO FOR THEM +3764-168671-0041-1074: STAMMERED FAUCHELEVENT IT IS MADE +8461-281231-0003-2903: WHAT ART THOU HE EXCLAIMED IN TERROR +6432-63723-0025-2124: TO THIS WAS THE ANSWER WHISPERED MONEY +3997-180294-0031-1119: YOU STILL LOVE ME CAN YOU ASK +3764-168670-0039-1014: YOU CAN COME AND NAIL ME UP IN THE COFFIN AT TWO O'CLOCK +1688-142285-0054-54: BUT NICHOLAS WAS NOT AT HOME WHEN THEY ENTERED +2414-128292-0019-309: HOW HAVE I STILL (INCLINATION->INCLINATIONS) +8188-269288-0005-2678: WHY YOU WILL BE PARTING FROM ME YOU KNOW +8131-117016-0048-2603: WHAT MAKES YOU THINK (WAYNE->WAIN) WILL BE RE ELECTED +8131-117016-0018-2573: HE BEGAN WONDERING ABOUT SECURITY THEN +3528-168669-0124-819: WHAT IS TO BE DONE WITH THAT COFFIN FATHER (FAUVENT->PREVENT) +7975-280063-0008-2484: THEY DID MARK MY CLOTHES IN ONE OR TWO PLACES HOWEVER +8280-266249-0057-2858: IT WAS THE LAST GAME OF CARDS FOR THAT TRIP +3331-159609-0013-669: IT WAS SO STUPID OF ME NOT TO GUESS BEFORE +8188-269290-0038-2769: SHE UTTERED HER (STRANGE->STREAM) REMARK STANDING UP +7018-75789-0006-2214: BUT I WAS DELIGHTED AT MY ESCAPE FROM THE RIVER +6432-63722-0010-2049: WE'VE GOT OUR MAN AND THAT'S ALL WE WANT +1998-29454-0035-159: ADVENTURES I SHOULD THINK SO +7902-96595-0025-2454: YOU DO NOT KNOW NO +4350-10919-0026-1374: THIS IS THE THIRD TIME (YOU'VE->YOU HAVE) ASKED ME THE SAME THING +3528-168669-0019-714: (FAUVENT->FOR THAT) WE HAVE CONFIDENCE IN YOU +7975-280057-0013-2468: EVERY KNOT REPRESENTED A HUMAN LIFE +8131-117016-0019-2574: NOBODY HAD TRIED TO GET IN TOUCH WITH HIM +4852-28311-0006-1449: WELL HE ADMITTED I DID +8188-269288-0050-2723: CERTAINLY SAID (LESLIE->E) +8188-269290-0037-2768: (HEAR HEAR->HARE HERE) AND ONCE AGAIN (HEAR->HARE) +8188-269290-0022-2753: SOME DAY JANE YOU MUST SEE HER +8188-269288-0036-2709: DON'T BEGIN SAID ANNIE +3005-163389-0004-444: THE AVERAGE MAN'S A COWARD +3005-163390-0030-489: (JIS->JUST) AS LOUD AS I COULD YELL +3005-163399-0017-534: CHILDREN DON'T YOU SAY A WORD +8188-269290-0007-2738: I WANT TO BE ALONE GO +8188-269288-0021-2694: I AM TRULY GLAD IT HAS COME +6432-63722-0055-2094: WHAT IS IT PERHAPS I CAN HELP YOU +8131-117017-0032-2649: GO ON (ACCEPT->EXCEPT) DAMN IT +367-293981-0000-954: I SWEAR IT ANSWERED SANCHO +1998-29455-0003-174: (SWELP->SWAP) ME HE SAID HELPLESSLY +3538-142836-0008-834: FOURTEEN NINETY NINE +7902-96595-0024-2453: LADY (GRAEME->GRAHAM) LOOKED GHASTLY +4350-10919-0011-1359: HE ASKED AH IT IS +3528-168669-0094-789: OH HOW WICKED PEOPLE ARE +7105-2340-0022-2304: (IT'S->IS) THE ONLY THING TO DO +3528-168669-0109-804: THAT IS WELL FATHER (FAUVENT->FERVEN) +6070-63485-0012-1914: LET'S GO AND MEET HIM +7902-96594-0012-2408: NO WAIT ANOTHER HALF HOUR +2033-164915-0000-234: AND ALSO THESE +5764-299665-0095-1899: (THIS FREES WOMAN->THESE FREESWOMEN) +7902-96594-0013-2409: VERY ILL ADVISED THING TO DO +4852-28312-0009-1479: YES SIR HE SAID +4350-9170-0022-1404: SO IT HAS ALWAYS BEEN +3528-168669-0034-729: WHAT DO YOU SAY +3538-142836-0023-849: (ICES->ISIS) +4198-12259-0020-1195: GIVE ME A (SYNONYMON->SYNONYM) FOR A (GAMMON->GAMIN) OF BACON +4294-9934-0012-1330: WHAT IS TO BECOME OF YOU SAID COURFEYRAC +7902-96594-0014-2410: THEN I MUST REQUEST THAT YOU WILL NOT MAKE IT AGAIN VERY TRUE +3528-168669-0125-820: IT WILL BE GIVEN TO THE EARTH EMPTY +1688-142285-0086-86: THE MOTHER OF WHOM (HE->YOU) SPOKE TO US SAID MARGARET +3538-163624-0017-910: SO SIGURD PUT THE HEART OF (FAFNIR->FAFNER) ON A (STAKE->STEAK) AND ROASTED IT +4350-9170-0053-1435: WITHOUT GOVERNMENTS NATIONS WOULD BE ENSLAVED BY THEIR NEIGHBORS +7902-96592-0035-2380: BE QUICK THERE'S A GOOD FELLOW I WANT TO GET AWAY AT ONCE +8188-269290-0024-2755: THAT IS IF YOU CARE TO CONFIDE IN ME +367-293981-0016-970: THEN THIS IS AN INN SAID DON QUIXOTE +3005-163399-0019-536: MISSUS PHELPS SHE JUMPS FOR HIM AND SAYS +3331-159605-0001-610: THE MORE PROPOSALS THE MORE CREDIT +3528-168669-0111-806: I HAVE MY HEAP OF OLD IRON AT THE BOTTOM OF THE GARDEN +6432-63722-0026-2065: (SURE->SHORE) HELD SO TIGHT WE COULD HARDLY GET IT OUT +5764-299665-0081-1885: WE KNOW THE PATHS THAT LIFE HAS (TRAVELED->TRAVELLED) +8188-269288-0037-2710: DON'T BEGIN WHAT DO YOU MEAN +8188-269288-0052-2725: ANNIE'S MANNER WAS VERY MYSTERIOUS +7105-2330-0020-2260: ANYWAY HE WON'T GO UNLESS HE HAS A BAND +2414-128292-0021-311: A HAVEN TOWARDS WHICH MY (SAIL IS->SAILORS) SET +6070-63485-0013-1915: (OLD BOY->ALL BY) IT (WILL->WOT) PAY FOR LOOKING AFTER +1998-29454-0022-146: WELL THAT'LL SHOW YOU THE SORT OF MAN I AM +1998-29455-0004-175: OH LOOK SAID DICKIE THE FLOWERS +4198-12259-0005-1180: WHICH WAS FIRST THIRST (OR->A) DRINKING +3528-168669-0065-760: UNDER THE ALTAR BUT +533-131556-0004-1585: I HAVE DONE WELL TO RECORD THEM SO MINUTELY +8188-269290-0009-2740: YOU FRET ME BEYOND ENDURANCE +4350-10919-0027-1375: THE CELEBRATED DOCTOR DID NOT TAKE (OFFENSE->OFFENCE) +2414-128291-0003-266: (WHAT->OR) DO I HERE SEEK +4852-28330-0025-1555: THEIR CONVERSATION HAD TAKEN SOME LITTLE WHILE +1998-29454-0021-145: I AIN'T IT (YER->HERE) HAVE I LIKE WHAT (YER->YOU') AUNT (DO->TOO) +6070-86744-0009-1930: THAT DEPENDS WHEN DO YOU LEAVE +3005-163391-0016-506: WELL THEY COULD BE WORSE OFF +2609-156975-0024-385: THE (SCHOOL->SCORE) OF THE WILDERNESS +8131-117016-0050-2605: EVER SEE A MARTIAN ELECTION +3005-163399-0018-535: I SEE I WAS IN A FIX NOW +1998-29455-0005-176: THEY'RE ONLY WEEDS SAID (BEALE->BEAL) +3528-168669-0081-776: THE FIRST ABBOT OF (CLAIRVAUX->CLERVAL) +533-1066-0014-1570: I ASKED NO QUESTIONS +8461-281231-0005-2905: EXCLAIMED THE NORMAN (HO->OH) +5764-299665-0051-1855: THEY HATED PLEASURE +3528-168669-0020-715: I AM HERE TO DO ANYTHING YOU WISH +1688-142285-0055-55: GASPED BESSY AT LAST +367-130732-0006-926: LOBSTER A (LA NEWBERG->NEWBURG) +6432-63722-0041-2080: SIMPLY BECAUSE THIS WATCH +8131-117017-0018-2635: YOU OWE ME SOME BILLS (GOV'NOR->GUV'NOR) +6432-63722-0056-2095: THE OLD ADAGE OF TWO HEADS YOU KNOW +3997-180294-0032-1120: BECAUSE YOU DON'T LIKE SEEING HIM +6938-70848-0013-2170: DOWN WITH HIM THEY SHRIEKED +3005-163390-0002-461: TWENTY PEOPLE SINGS OUT +2414-128292-0020-310: (HAVE->AM) I STILL A (GOAL->GOLD) +6432-63723-0026-2125: AND IN A WAY IT WAS TRUE +8188-269290-0039-2770: MARJORIE AND (EILEEN->AILEEN) WERE CLOSE TO HER +3005-163390-0017-476: (RING->BRING) UP FAIR (ROSAMUN->ROSAMOND) +6432-63723-0056-2155: THAT WAS HERS WENT ON THE (JEWELER->JAWER) +4350-10919-0012-1360: OH WELL THEN I CAN DO IT IN TWENTY MINUTES +7902-96591-0000-2320: I AM FROM THE CUTTER LYING OFF THE COAST +3528-168669-0110-805: WHERE WILL YOU OBTAIN IT +1998-29455-0034-205: WELL YOU NEVER SEE (IM->HIM) +7902-96592-0020-2365: NONSENSE +3080-5032-0018-566: WELL IN EARNEST IF I WERE A PRINCE THAT LADY SHOULD BE MY MISTRESS BUT I CAN GIVE NO RULE TO ANY ONE ELSE AND PERHAPS THOSE THAT ARE IN NO DANGER OF LOSING THEIR HEARTS TO HER MAY BE INFINITELY TAKEN WITH ONE I SHOULD NOT VALUE AT ALL FOR SO SAYS (THE->*) JUSTINIAN WISE PROVIDENCE HAS ORDAINED IT THAT BY THEIR DIFFERENT HUMOURS EVERYBODY MIGHT FIND SOMETHING TO PLEASE THEMSELVES (WITHAL->WITH OF) WITHOUT ENVYING THEIR NEIGHBOURS +3080-5032-0019-567: THE MATTER IS NOT GREAT FOR I CONFESS I DO NATURALLY HATE THE NOISE AND TALK OF THE WORLD AND SHOULD BE BEST PLEASED NEVER TO BE KNOWN (IN'T->IN IT) UPON ANY OCCASION WHATSOEVER YET SINCE IT CAN NEVER BE WHOLLY AVOIDED ONE MUST SATISFY ONESELF BY DOING NOTHING THAT ONE NEED CARE (WHO->*) KNOWS +3080-5040-0007-582: YET IN EARNEST YOUR FATHER WILL NOT FIND MY BROTHER PEYTON WANTING IN CIVILITY THOUGH HE IS NOT A MAN OF MUCH COMPLIMENT UNLESS IT BE IN HIS LETTERS TO ME NOR AN UNREASONABLE PERSON IN ANYTHING SO HE WILL ALLOW HIM OUT OF HIS KINDNESS TO HIS WIFE TO SET A HIGHER VALUE UPON HER SISTER THAN SHE DESERVES +3538-163619-0013-866: THIS TIME ALSO AS BEFORE SHE BORROWED A BRUSH AND BRUSHED HER HAIR WITH IT AND THE GOLD DROPPED DOWN AS SHE DID IT AND AGAIN SHE SENT THE DOG OUT THREE TIMES AND WHEN DAY DAWNED SHE DEPARTED BUT AS SHE WAS GOING SHE SAID AS SHE HAD SAID BEFORE I SHALL COME ONCE MORE AND THEN NEVER AGAIN +3005-163391-0002-492: WHY BEFORE HE LOOKED LIKE THE ORNERIEST OLD RIP THAT EVER WAS BUT NOW WHEN HE'D TAKE OFF HIS NEW WHITE BEAVER AND MAKE A BOW AND (DO->DEW) A SMILE HE LOOKED THAT GRAND AND GOOD AND PIOUS THAT YOU'D SAY (HE HAD->HE'D) WALKED RIGHT OUT OF THE ARK AND MAYBE WAS OLD LEVITICUS HIMSELF +3331-159609-0000-656: NEVER MIND WHAT THE BUSINESS WAS IT SUFFICES TO SAY THAT IT WAS A GOOD BEGINNING FOR A YOUNG MAN LIKE TOM WHO HAVING BEEN BORN AND BRED IN THE MOST CONSERVATIVE CLASS OF THE MOST CONCEITED CITY IN NEW ENGLAND NEEDED JUST THE HEALTHY HEARTY SOCIAL INFLUENCES OF THE WEST TO WIDEN HIS VIEWS AND MAKE A MAN OF HIM +4198-61336-0007-1242: THE SHIFTY (MATI ILU->MATIILLIU) EITHER CHERISHED THE HOPE THAT (SHARDURIS->CHARDURIS) WOULD RECOVER STRENGTH AND AGAIN INVADE NORTH (SYRIA->ASSYRIA) OR THAT HE MIGHT HIMSELF ESTABLISH AN EMPIRE IN THAT REGION +533-131556-0006-1587: I AM TOO WELL ACQUAINTED WITH YOUR CHARACTER AND CONDUCT TO FEEL ANY REAL FRIENDSHIP FOR YOU AND AS I AM WITHOUT YOUR TALENT FOR DISSIMULATION I CANNOT ASSUME THE APPEARANCE OF IT +5764-299665-0097-1901: IT IS FAR BETTER TO BE FREE TO LEAVE THE (FORTS->FAULTS) AND BARRICADES OF FEAR TO STAND ERECT AND FACE THE FUTURE WITH A SMILE +3538-142836-0026-852: THEY SHOULD BE TAKEN IMMEDIATELY AFTER THE REPAST OR SOME HOURS AFTER BECAUSE THE TAKING (*->OF) THESE SUBSTANCES DURING THE PROCESS OF DIGESTION IS APT TO PROVOKE INDISPOSITION +2609-156975-0011-372: A CONTEMPORARY (INSCRIPTION->INSCRIPTS AND) ALSO STATES THAT HE FOUNDED NEAR (PITHUM->PITTHAM) THE HOUSE OF (RAMSES->RAMESES) A CITY WITH A ROYAL RESIDENCE AND TEMPLES +6128-63241-0007-2006: IT WAS IN THE FEMALE LINE AS (BASIL->BEILS AND) RANSOM HAD WRITTEN IN ANSWERING HER LETTER WITH A GOOD DEAL OF FORM AND FLOURISH HE SPOKE AS IF THEY HAD BEEN ROYAL HOUSES +4350-10919-0029-1377: AND THE DOCTOR BEGAN SCIENTIFICALLY EXPLAINING TO THE PRINCESS AS AN EXCEPTIONALLY INTELLIGENT WOMAN THE CONDITION OF THE YOUNG PRINCESS AND CONCLUDED BY INSISTING ON THE DRINKING OF THE WATERS WHICH (WERE->WAS) CERTAINLY HARMLESS +2414-128291-0004-267: ANSWERED HE THE SAME THAT THOU SEEKEST THOU MISCHIEF MAKER THAT IS TO SAY HAPPINESS UPON EARTH +6128-63244-0023-2036: WHEN MISS (BIRDSEYE->BIRD'S EYE) APPROACHED IT TRANSFIGURED HER FAMILIAR HER COMICAL SHAPE AND MADE THE POOR LITTLE HUMANITARY HACK SEEM ALREADY A MARTYR +533-131556-0005-1586: THEY HAD BETAKEN THEMSELVES TO THEIR WORK I LESS TO DIVERT MY MIND THAN TO DEPRECATE CONVERSATION (HAD->I) PROVIDED MYSELF WITH A BOOK +2609-169640-0002-417: FORTUNATELY THE JOHN WAS NOT ONLY (FAST->FAT) BUT SHE MINDED HER (HELM->HAM) AS A LIGHT FOOTED GIRL TURNS IN A LIVELY DANCE +4350-9170-0024-1406: THE UPPER CLASSES KNOW THAT AN ARMY OF FIFTY THOUSAND WILL SOON BE INSUFFICIENT AND NO LONGER RELYING ON PINKERTON'S MEN THEY FEEL THAT THE SECURITY OF THEIR POSITION DEPENDS ON THE INCREASED STRENGTH OF THE ARMY +367-130732-0007-927: ONE (POUND->POUNDS) OF LOBSTER MEAT ONE TEASPOONFUL OF BUTTER ONE HALF PINT OF CREAM YOLKS OF FOUR EGGS ONE WINE GLASS OF SHERRY LOBSTER FAT +4294-35475-0011-1302: HE COULD SEE THE OGRE STANDING POWERLESS TO HURT HIM ON THE OTHER SIDE OF THE CHASM AND GNASHING HIS TEETH EACH ONE OF WHICH WAS AS BIG AS A (MILLSTON->MILLSTONE) +367-293981-0003-957: THOUGH YOUR WORSHIP WAS NOT SO BADLY OFF HAVING IN YOUR ARMS (THAT->THE) INCOMPARABLE BEAUTY YOU SPOKE OF BUT (I->AYE) WHAT DID I HAVE EXCEPT THE HEAVIEST (WHACKS->WAX THAT) I THINK I HAD IN ALL MY LIFE +5442-41169-0021-1721: WE WALKED ABOUT THE FIELDS AND THE GARDEN NO SAID HE STEPAN (VASSILIEVITCH->MASLOVITCH) EVERYTHING'S WELL LOOKED AFTER BUT YOUR GARDEN'S NEGLECTED +8280-266249-0000-2801: OLD MISTER DINSMORE HAD ACCEPTED A PRESSING INVITATION FROM HIS GRANDDAUGHTER AND HER HUSBAND TO JOIN THE PARTY AND WITH THE ADDITION OF SERVANTS IT WAS A LARGE ONE +2033-164915-0004-238: ACCORDINGLY SHE TOLD HIM ALL THAT HAD COME TO HER SINCE THEIR SEPARATION AT THE KHAN AND WHAT HAD HAPPENED TO HER WITH THE BADAWI HOW THE MERCHANT HAD BOUGHT HER OF HIM AND HAD TAKEN HER TO HER BROTHER (SHARRKAN->SHARKAN) AND HAD SOLD HER TO HIM HOW HE HAD FREED HER AT THE TIME OF BUYING HOW HE HAD MADE A MARRIAGE CONTRACT WITH HER AND HAD GONE IN TO HER AND HOW THE KING THEIR SIRE HAD SENT AND ASKED FOR HER FROM (SHARRKAN->SHARKAN) +2033-164916-0001-253: KNOW O CHIEF WAZIR THAT HERE WHERE YOU HAVE ENCOUNTERED ME ALLAH HATH GIVEN YOU REST FROM FATIGUE AND BRINGETH YOU YOUR DESIRE AFTER THE EASIEST OF FASHIONS FOR THAT HIS ALMIGHTY WILL RESTORETH TO YOU ZAU (AL->*) MAKAN AND HIS SISTER NUZHAT AL ZAMAN WHEREBY WE WILL SETTLE THE MATTER AS WE EASILY CAN +3528-168669-0082-777: HIS ORDER HAS PRODUCED FORTY POPES TWO HUNDRED CARDINALS FIFTY PATRIARCHS SIXTEEN HUNDRED ARCHBISHOPS FOUR THOUSAND SIX HUNDRED BISHOPS FOUR EMPERORS TWELVE EMPRESSES FORTY SIX KINGS FORTY ONE QUEENS THREE THOUSAND SIX HUNDRED CANONIZED SAINTS AND HAS BEEN IN EXISTENCE FOR FOURTEEN HUNDRED YEARS +4198-12281-0008-1227: HARK YOU MY MASTERS YOU THAT LOVE THE WINE COP'S BODY FOLLOW ME FOR (SANCT->SAINT) ANTHONY (BURN->BURNED) ME AS FREELY AS A FAGGOT (IF->DID) THEY GET LEAVE TO TASTE ONE DROP OF THE LIQUOR THAT (WILL->WOULD) NOT NOW COME AND FIGHT FOR RELIEF OF THE VINE +6128-63240-0006-1977: THOSE OF BASIL RANSOM WERE DARK DEEP AND GLOWING HIS HEAD HAD A CHARACTER OF ELEVATION WHICH FAIRLY ADDED TO HIS STATURE IT WAS A HEAD TO BE SEEN ABOVE THE LEVEL OF (A->THE) CROWD ON SOME JUDICIAL BENCH OR POLITICAL PLATFORM OR EVEN ON A BRONZE MEDAL +3005-163391-0018-508: BLAMED IF HE DIDN'T INQUIRE ABOUT EVERYBODY AND EVERYTHING IN THAT BLESSED TOWN AND ALL ABOUT THE (WILKSES->WILKESES) AND ABOUT PETER'S (BUSINESSWHICH->BUSINESS WHICH) WAS A TANNER AND ABOUT (GEORGE'SWHICH->GEORGE'S WHICH) WAS A CARPENTER AND ABOUT (HARVEY'SWHICH->HARVEY'S WHICH) WAS A DISSENTERING MINISTER AND SO ON AND SO ON THEN HE SAYS +7975-280076-0028-2517: IT WAS IMMEDIATELY FOLLOWING THE ROCK ISLAND ROBBERY AT (ADAIR->EIGHT AIR) IOWA THAT THERE FIRST APPEARED A DELIBERATE ENLISTMENT OF SOME LOCAL PAPERS IN MISSOURI TO CONNECT US WITH THIS ROBBERY +6070-63485-0000-1902: (THEY'RE->THERE) DONE (FOR->FAR) SAID THE SCHOOLMASTER IN A LOW KEY TO THE (CHOUETTE->SHUETTE) OUT WITH YOUR VITRIOL AND MIND YOUR EYE +6128-63244-0025-2038: SHE HAD BEEN CONSUMED BY THE PASSION OF SYMPATHY IT HAD CRUMPLED HER INTO AS MANY CREASES AS AN OLD GLAZED DISTENDED GLOVE +533-1066-0002-1558: (I AM->I'M) SURE I KISSED LIDDY AND (I HAVE->I'VE) HAD TERRIBLE MOMENTS SINCE WHEN I (SEEM->SEEMED) TO REMEMBER KISSING MISTER JAMIESON TOO IN THE EXCITEMENT +8280-266249-0001-2802: AS THEY WERE IN NO HASTE AND THE CONFINEMENT OF A RAILROAD CAR WOULD BE VERY IRKSOME TO THE YOUNGER CHILDREN IT HAD BEEN DECIDED TO MAKE THE JOURNEY BY WATER +533-131562-0010-1617: AND AS FOR THE HOUSEHOLD MATTERS MISSUS (GREAVES->GRIEBS) MUST BE VERY PARTICULAR IN KEEPING HER ACCOUNTS WE MUST GO (UPON->UP ON) AN ENTIRELY NEW PLAN +4198-12259-0023-1198: BUT IF THERE CAME SUCH LIQUOR (FROM->FOR) MY (BALLOCK WOULD->BALLOT WILL) YOU NOT WILLINGLY THEREAFTER SUCK THE (UDDER->UTTER) WHENCE IT ISSUED +8461-258277-0010-2877: SO HE TOLD HIM WHAT HAD BEFALLEN HIM AND ADDED IF I KNOW WHITHER THE RASCAL IS GONE AND WHERE TO FIND THE KNAVE I (WOULD->WILL) PAY HIM OUT +8280-266249-0046-2847: MISTER LILBURN AND MISTER DALY EACH AT A DIFFERENT TIME SOUGHT OUT THE YOUNG MAN (WARD'S->WORD'S) INTENDED VICTIM AND TRIED TO INFLUENCE HIM FOR GOOD +6128-63244-0009-2022: (CHARLIE->CHARLEY) WAS A YOUNG MAN IN A WHITE OVERCOAT AND A PAPER COLLAR IT WAS FOR HIM IN THE LAST ANALYSIS THAT THEY CARED MUCH THE MOST +6070-86745-0012-1963: I RETURNED HOME AT DAYBREAK AND STROVE TO SLEEP BUT MY HEAD ACHED AND I GOT UP TO HAVE A RIDE FOR AN HOUR +8461-281231-0022-2922: AT LENGTH WITH A TERRIFIC CRASH THE WHOLE TURRET GAVE WAY AND SHE PERISHED IN THE FLAMES WHICH HAD CONSUMED HER TYRANT +4350-9170-0041-1423: BUT THE FATAL SIGNIFICANCE OF UNIVERSAL MILITARY SERVICE AS THE MANIFESTATION OF THE CONTRADICTION INHERENT IN THE SOCIAL CONCEPTION OF LIFE IS NOT ONLY APPARENT IN THAT +4294-14317-0007-1273: I HAD BETTER LOOK TO MY CONDUCT FOR IT HAD COME TO HIS EARS THAT I RELIED UPON HIS FAVOUR TO TAKE IN FIRST ONE MAN AND THEN ANOTHER +8461-281231-0023-2923: WHEN THE OUTLAWS HAD DIVIDED THE SPOILS WHICH THEY HAD TAKEN FROM THE CASTLE OF (TORQUILSTONE->TORKELSTONE) CEDRIC PREPARED TO TAKE HIS DEPARTURE +3080-5032-0005-553: IN EARNEST WE HAVE HAD SUCH A SKIRMISH AND UPON SO FOOLISH AN OCCASION AS I CANNOT TELL WHICH (IS STRANGEST->OF STRANGERS) +5484-24318-0016-1782: DAPHNE AGAIN PLEADED FOR THE LIBERATION OF THE PRISONERS BUT (PHILIPPUS->PHILIPPA) SILENCED HER WITH THE GRAVE EXCLAMATION THE ORDER OF THE KING +7975-280076-0014-2503: DURING THE SUMMER I WAS EITHER IN SAINT CLAIR JACKSON OR KANSAS BUT AS THERE WAS NO ROBBERY COMMITTED THAT SUMMER IT MAKES NO DIFFERENCE WHERE I WAS +3331-159609-0017-673: SUPPOSE I SAY A WORD TO TOM JUST INQUIRE AFTER HIS HEART IN A GENERAL WAY YOU KNOW AND GIVE HIM A CHANCE TO TELL ME IF (THERE IS->THERE'S) ANYTHING TO TELL +2609-156975-0028-389: HERE MOSES LEARNED THE LESSONS THAT WERE ESSENTIAL FOR HIS TRAINING AS THE LEADER AND DELIVERER OF HIS PEOPLE +5442-41169-0007-1707: OH STILL JUST THE SAME ALWAYS AT A LOSS THE LANDOWNER ANSWERED WITH A RESIGNED SMILE BUT WITH AN EXPRESSION OF SERENITY AND CONVICTION THAT SO IT MUST BE +2414-159411-0020-343: THE BRAHMAN GAVE HIMSELF UP AS LOST BUT AGAIN HE PRAYED THE TIGER TO HAVE PATIENCE AND LET HIM ASK THE OPINION OF THE SIXTH JUDGE +7018-75788-0014-2202: HERE I FOUND A GREAT SHIP READY FOR SEA AND FULL OF MERCHANTS AND NOTABLES WHO HAD WITH THEM GOODS OF PRICE SO I EMBARKED MY BALES THEREIN +7018-75789-0009-2217: I ANSWERED FOR ALLAH'S SAKE O MY LORD ERE I SPEAK GIVE ME SOMEWHAT TO EAT FOR I AM STARVING AND AFTER ASK ME WHAT THOU WILT +7018-75788-0000-2188: THEN I TOOK UP A GREAT STONE FROM AMONG THE TREES AND COMING UP TO HIM SMOTE HIM THEREWITH ON THE HEAD WITH ALL MY MIGHT AND CRUSHED IN HIS SKULL AS HE LAY DEAD DRUNK +6128-63244-0024-2037: OLIVE CHANCELLOR LOOKED AT HER WITH LOVE REMEMBERED THAT SHE HAD NEVER IN HER LONG UNREWARDED WEARY LIFE HAD A THOUGHT OR AN IMPULSE FOR HERSELF +5764-299665-0054-1858: THE PURITAN BELIEVED THE BIBLE TO BE THE WORD OF GOD AND THIS BELIEF HAS ALWAYS MADE THOSE WHO HELD IT CRUEL AND WRETCHED +2414-128291-0005-268: FOR I TELL THEE THAT I HAVE ALREADY TALKED HALF A MORNING UNTO THEM AND JUST NOW WERE THEY ABOUT TO GIVE ME THEIR ANSWER +3528-168669-0068-763: THE DEAD MUST BE OBEYED TO BE BURIED IN THE VAULT UNDER THE ALTAR OF THE CHAPEL NOT TO GO TO PROFANE EARTH TO REMAIN THERE IN DEATH WHERE SHE PRAYED WHILE LIVING SUCH WAS THE LAST WISH OF MOTHER CRUCIFIXION +6070-86744-0027-1948: AND THIS TIME IT MUST BE CONFESSED THAT CONTRARY TO THE USUAL STATE OF AFFAIRS (IN->AND) DISCUSSIONS BETWEEN THE YOUNG MEN THE EFFECTIVE ARGUMENTS WERE ALL ON ALBERT'S SIDE +4350-9170-0026-1408: THE MEANING OF (CAPRIVI'S->CAPRIVI) SPEECH PUT INTO PLAIN LANGUAGE IS THAT FUNDS ARE NEEDED NOT TO RESIST FOREIGN FOES BUT TO BUY UNDER OFFICERS TO BE READY TO ACT AGAINST THE (ENSLAVED->ENSLAVE) TOILING MASSES +6432-63723-0028-2127: AFTER THE MARRIAGE WHICH WAS A BRILLIANT AND GAY ONE IF NOT HAPPY THE LARCH HOTEL IT COULD HARDLY BE CALLED A HOME BECAME THE SCENE OF MANY FESTIVE OCCASIONS +8461-281231-0007-2907: THEY ARE FAST RISING AT LEAST SAID (ULRICA->EUREKA) AND (A->THE) SIGNAL SHALL SOON WAVE TO WARN THE BESIEGERS TO PRESS HARD UPON THOSE WHO WOULD EXTINGUISH THEM +1688-142285-0013-13: AT LEAST NO FRIEND CAME FORWARDS IMMEDIATELY AND (MISSUS->MISTER) THORNTON IS NOT ONE I FANCY TO WAIT TILL TARDY KINDNESS COMES TO FIND HER OUT +4852-28319-0026-1528: UNDER HIS EYES (THE->*) WOODEN FOLDS OF CLOTH BECAME RICH SILK EMBROIDERY GLEAMED IN ITS REALITY UPON THE COAT AND OH THE FACE +6128-63241-0008-2007: IF IT HAD BEEN POSSIBLE TO SEND MISSUS RANSOM MONEY OR EVEN CLOTHES SHE WOULD HAVE LIKED THAT BUT SHE HAD NO MEANS OF ASCERTAINING HOW SUCH AN OFFERING WOULD BE TAKEN +3331-159605-0019-628: AT A STREET CORNER A BLACK EYED (SCHOOL BOY->SCHOOLBOY) WAS PARTING FROM A ROSY FACED SCHOOL GIRL WHOSE MUSIC ROLL HE WAS RELUCTANTLY SURRENDERING +3538-142836-0012-838: THEY SHOULD BE DRIED IN (THE->A) STOVE OR OVEN ON A SIEVE AND TURNED EVERY SIX OR EIGHT HOURS FRESH POWDERED SUGAR BEING SIFTED OVER THEM EVERY TIME THEY ARE TURNED +8131-117029-0001-2651: THE LITTLE PUBLISHER WAS BACK AT THE CRUSADER AGAIN +8131-117017-0004-2621: GUNS SUDDENLY SEEMED TO BE FLOURISHING EVERYWHERE +4350-10919-0014-1362: WHAT IS WANTED IS (*->THE) MEANS OF IMPROVING NUTRITION AND NOT FOR LOWERING IT +8188-269290-0025-2756: I BELIEVE POOR ANNIE IS DREADFULLY UNHAPPY +1688-142285-0012-12: NO ONE CAME FORWARDS TO HELP THE MOTHER AND THIS BOY +8188-269288-0038-2711: I MEAN THAT I DON'T WANT YOU TO BEGIN TO ASK QUESTIONS +1688-142285-0042-42: BUT FOR A MINUTE OR TWO SHE DID NOT SPEAK +3997-180294-0003-1091: THERE ARE BOLTS (ON->IN) THE DOOR WRETCH +6128-63240-0020-1991: ONE DIDN'T EVEN KNOW WHAT ONE HAD COME BACK FOR +4852-28319-0009-1511: THE SENSATION SPREAD FASTER AND FASTER +3997-180297-0000-1122: I HAVE NOT COME TO HINDER YOU FROM LEAVING PARIS +3528-168669-0052-747: SHE CONTINUED (FATHER FAUVENT->FOR THE PROUVENT) +4294-9934-0014-1332: SILVER GOLD HERE IT IS +3528-168669-0127-822: THE (VIL->VEIL) STUCK FAST IN HIS THROAT +4294-9934-0013-1331: WHAT ARE YOU GOING TO DO I DO NOT KNOW +4294-9934-0029-1347: AT THAT MOMENT HE HAD THREE FRANCS LEFT +6432-63723-0057-2156: NOW HARRY KING HAS IT EXCLAIMED COLONEL ASHLEY +2414-159411-0034-357: PROCEED ON YOUR JOURNEY FRIEND (BRAHMAN->RAMAN) +7902-96594-0000-2396: SEEMED IN GOOD SPIRITS LAST NIGHT MISTER GURR EH +4198-12259-0007-1182: DRINK ALWAYS AND YOU SHALL NEVER DIE +1998-29454-0008-132: WHEN HE SAID (AVE->HAVE) I (BIN->BEEN) ASLEEP +367-293981-0017-971: AND A VERY RESPECTABLE ONE SAID THE INNKEEPER +4198-12259-0036-1211: WHITE WINE HERE WINE BOYS +3331-159609-0016-672: SAID FANNY TURNING HOPEFUL ALL AT ONCE +4852-28311-0008-1451: THINK HE REALLY NEEDS IT HE PURSUED +5484-24318-0000-1766: NOT A SOUND IF YOU VALUE YOUR LIVES +4294-35475-0025-1316: BUT (ETHELRIED->ETHEL REED) HEEDED NOT THEIR TAUNTS +7105-2340-0009-2291: (SIGNED WILFRID->SIGHING WILFRED) PIGEONCOTE +7902-96594-0015-2411: (AWK WARD->AWKWARD) MISTER GURR AWKWARD +533-1066-0000-1556: WHEN CHURCHYARDS YAWN +6432-63722-0012-2051: I'M WORKING IN THE (INTERESTS->INTEREST) OF THE YOUNG MAN +7105-2330-0021-2261: (POLL->ALL) OPENS IN FIVE MINUTES +7902-96591-0001-2321: DON'T CRY HE SAID I WAS OBLIGED TO COME +8188-269288-0023-2696: DON'T NOTICE ME REPLIED ANNIE +1998-29454-0038-162: DICKIE ALWAYS REMEMBERED THAT MOMENT +3528-168669-0112-807: REVEREND MOTHER WHAT +367-130732-0021-941: LOBSTER IN MINIATURE +8131-117017-0019-2636: ELEVEN HUNDRED FIFTY CREDITS +3764-168671-0028-1061: HE HAD HARDLY THE STRENGTH TO STAMMER +7902-96592-0006-2351: WHAT A QUEER DREAM HE THOUGHT TO HIMSELF +5764-299665-0067-1871: WE MUST HAVE (CORNER->CORN) STONES +3528-168669-0037-732: BUT I DID NOT SAY MORE OFTEN +7975-280084-0013-2531: WHAT KEPT YOU SO LONG (I->*) ASKED (PITTS->PITT) +5442-41168-0004-1676: WE SHED OUR BLOOD FOR OUR COUNTRY +3997-180297-0029-1151: HE WANTS YOU TO FORGIVE HIM +7902-96592-0036-2381: NOT YOU ONLY A SHAM +3005-163390-0003-462: THE DUKE SAYS YES +6432-63722-0042-2081: SOME ONE OUT HERE TO SEE YOU +6432-63722-0057-2096: YES IT STILL HOLDS GOOD +8188-269290-0055-2786: EXCUSES MAKE NO DIFFERENCE +3005-163389-0007-447: YOU DIDN'T WANT TO COME +533-131556-0020-1601: I SHALL NOT TELL HIM +7902-96592-0021-2366: WON'T DO SAID RAM GRINNING +533-131564-0022-1646: WHERE'S MILICENT +1998-29455-0021-192: PLEASE DO NOT BE TOO SHOCKED +4852-28312-0011-1481: HE'S A SCHOOLMATE OF MINE +3764-168670-0012-987: FAUCHELEVENT WENT ON +8131-117029-0016-2666: YOU CAN'T STAY HERE +6432-63723-0042-2141: THANK YOU NO +6070-86745-0011-1962: IT IS FOR THAT REASON YOU SEE ME SO EARLY +3528-168669-0098-793: THE PRIORESS TOOK BREATH THEN TURNED TO FAUCHELEVENT +7105-2330-0007-2247: THE FOREMAN DELIVERED HIS MESSAGE +5484-24318-0031-1797: ON THE WAY HIS HEART THROBBED ALMOST TO BURSTING +6938-70848-0000-2157: EVEN THE SUN CAME OUT PALE AND WATERY AT NOON +3997-180297-0001-1123: YOU IN THE WAY MARGUERITE BUT HOW +8188-269288-0054-2727: TIRED OUT LESLIE HERSELF DROPPED ASLEEP +1998-29455-0022-193: REMEMBER THAT NEITHER OF THEM KNEW ANY BETTER +6432-63722-0028-2067: AND THE WATCH HAVE YOU IT YES IT'S HERE +3997-180294-0005-1093: NOW GO I CAN'T KEEP MY EYES OPEN +1998-15444-0007-103: THE BEST (EMETIC->AMATIC) IS THAT WHICH IS AT HAND +3005-163390-0019-478: S'POSE HE OPENED HIS (MOUTHWHAT->MOUTH WHAT) THEN +2033-164914-0012-223: RETURN QUICKLY AND LINGER NOT +8280-266249-0061-2862: AND HOW WONDERFULLY HE HAS ANSWERED OUR PETITIONS +5764-299665-0039-1843: THE IMAGINATION IS CULTIVATED +533-131564-0009-1633: OH NO I COULDN'T STAND THAT +5442-41169-0022-1722: TO MY THINKING I'D CUT DOWN (THAT LIME TREE->THE LINERY) +7902-96594-0001-2397: YES SIR BUT HE MAY TURN UP ON THE CLIFF AT ANY MOMENT +8188-269290-0056-2787: THE GIRL WHO BREAKS THE RULES HAS TO BE PUNISHED +8131-117029-0002-2652: ONLY GORDON AND SHEILA WERE LEFT +5484-24318-0002-1768: WAS HE TO BE LED TO THE EXECUTIONER'S BLOCK +3528-168669-0008-703: IT WOULD BE A GOOD THING TO HAVE TWO MEN FOR IT +6432-63722-0013-2052: IT'S JUST ONE OF THEM COINCIDENCES LIKE +4294-32859-0003-1288: SIT DOWN BESIDE ME AND I'LL TELL YOU THE STORY +5442-41169-0008-1708: WHY WHAT IS THERE TO UNDERSTAND +7902-96594-0031-2427: BEG PARDON SIR SAID THE MASTER DEPRECATINGLY +7975-280063-0011-2487: ABOUT NINE THIRTY A M I WAS SHOT DOWN +6432-63722-0044-2083: SINGA PHUT WAS THE PANTING ANSWER +3005-163390-0004-463: EVERYBODY SINGS OUT SOLD +5442-41168-0006-1678: VOTES PLEASE (BEASTLY->PEASLEY) +533-131556-0022-1603: AND (MILICENT->ME LISTEN) WILL YOU TELL HER +5764-299665-0068-1872: THE STRUCTURE MUST HAVE A BASEMENT +5764-299665-0084-1888: WHY HAVE THE REFORMERS (FAILED->SA) +4852-28319-0025-1527: IT WAS AS IF THE STIFFNESS MELTED +6070-86744-0011-1932: THEN WE SHALL NOT MEET IN PARIS +6432-63722-0043-2082: ALL RIGHT BE THERE IN A SECOND +3997-180297-0016-1138: WE NO LONGER BELONG TO OURSELVES +6070-86744-0012-1933: I FEAR I SHALL NOT HAVE THAT HONOR +3528-168656-0006-688: SHE WOULD NOT SHOW (THIS OBJECT->THE SUBJECT) TO (ANYONE->ANY ONE) +533-1066-0016-1572: I HARDLY KNOW WHAT I EXPECTED +3528-168669-0038-733: AT THAT MOMENT NINE O'CLOCK STRUCK +3997-180294-0020-1108: HOW WHY +6432-63723-0013-2112: BECAUSE LARCH MADE NO (DEFENSE->DEFENCE) +367-293981-0004-958: UNLUCKY (ME AND->MAN TO) THE MOTHER THAT BORE ME +4350-9170-0010-1392: INDEED IT COULD NOT BE OTHERWISE +6432-63722-0059-2098: AND THERE'S NO REASON YOU SHOULDN'T KNOW +7902-96592-0022-2367: THINK I DON'T KNOW YOU MISTER ORFICER +7105-2330-0022-2262: IS (PLATTERBAFF->FLATTERBUFF) OUT YET +6070-63485-0015-1917: IN THE PLAIN OF SAINT DENIS +3331-159605-0004-613: WHAT A SPITEFUL THING I AM +3331-159605-0034-643: PERHAPS SHE (LL JILT->HAD TOLD) HIM +3997-182399-0014-1168: BUT (HE->YOU) DON'T MIND THAT +1998-29455-0007-178: HI THERE GOES A RABBIT +1688-142285-0028-28: BUT GOOD NIGHT CHILD +1998-29454-0009-133: HERE WE ARE SAID THE MAN +7902-96594-0016-2412: YES SIR OF COURSE +8131-117017-0005-2622: YOU CAN'T DO IT TO ME +3528-168669-0053-748: YES REVEREND MOTHER +2414-128291-0020-283: WELL +8461-281231-0036-2936: SHE GAZED ACCORDINGLY UPON A SCENE WHICH MIGHT WELL HAVE STRUCK TERROR INTO A BOLDER HEART THAN HERS +8131-117016-0021-2576: MOVING IN TWO GROUPS OF THREES AT OPPOSITE SIDES OF THE STREET THEY BEGAN THEIR BEAT +8280-266249-0030-2831: THEY ARE GAMBLING YONDER AND I'M AFRAID THAT YOUNG FELLOW IS BEING BADLY FLEECED BY (THAT->THE) MIDDLE AGED MAN OPPOSITE +7902-96591-0017-2337: NEXT MOMENT AS HE FELT HIS WAY ABOUT HIS HAND TOUCHED AN OLD FASHIONED MARBLE MANTELPIECE FIREPLACE CHIMNEY +8461-281231-0006-2906: REMEMBEREST THOU THE MAGAZINE OF FUEL THAT IS STORED BENEATH THESE APARTMENTS WOMAN +5442-32873-0010-1662: AND SHE THREW BACK HER VEIL AND GOING HURRIEDLY TO THE TOILET MECHANICALLY SURVEYED HERSELF IN THE GLASS +1998-15444-0006-102: IN A CASE OF ATTEMPTED SUICIDE BY POISONING IS IT THE DUTY OF THE DOCTOR TO INFORM THE POLICE +4198-61336-0022-1257: JUDAH HAD TAKEN ADVANTAGE OF THE DISTURBED CONDITIONS IN ISRAEL TO ASSERT ITS INDEPENDENCE +7975-280063-0010-2486: I WAS TOLD BY SOME OF OUR MEN FROM THE WESTERN BORDER OF THE STATE THAT THEY RECOGNIZED THE DARING YOUNG (RIDER->RITER) AS (COLE->CO) YOUNGER +8461-278226-0007-2891: BUT SHE FIXED UPON A PICTURE WHICH SHE SAID SHE PREFERRED TO ANYTHING SHE HAD SEEN IN THE GALLERY +4350-9170-0040-1422: THE DANGER OF WAR EVER READY TO BREAK OUT RENDERS ALL REFORMS OF (LIFE->LIFE'S) SOCIAL LIFE VAIN AND FRUITLESS +2414-128292-0022-312: FOR IT (DO->TOO) I ASK AND SEEK AND HAVE SOUGHT BUT HAVE NOT FOUND IT +533-131564-0008-1632: BUT TELL ME SHOULD YOU WISH YOUR SONS TO BE LIKE MISTER HUNTINGDON OR EVEN LIKE YOURSELF +533-131564-0023-1647: NAY NOT I SAID HE TURNING (HER->*) ROUND AND PUSHING (HER->IT) TOWARDS ME +2414-128291-0019-282: THEY ALSO (ABSTAIN->ABSTAINED) FROM ALL HEAVY THOUGHTS WHICH INFLATE THE HEART +4198-12259-0022-1197: A LITTLE RAIN ALLAYS A GREAT DEAL OF WIND LONG TIPPLING BREAKS THE THUNDER +2609-157645-0002-402: SOMETIMES TOBACCO (WAS->IS) USED IN CHURCH FOR DISINFECTING OR (DEODORIZING->DEORTERIZING) PURPOSES +4294-35475-0026-1317: SO THEY ALL CRIED OUT LONG AND LOUD LONG (LIVE->LOVE) THE PRINCE PRINCE (CISEAUX->IS ALL) +1998-29455-0006-177: BUT I SHALL HAVE THEM WHILE (THEY'RE ALIVE->THEY ARE LIVE) SAID DICKIE AS HE HAD SAID TO THE PAWNBROKER ABOUT THE MOONFLOWERS +3528-168669-0097-792: (GAUTHIER->GATHIER) BISHOP OF (CHALONS->CHELAN) HELD HIS OWN IN THIS MATTER AGAINST OTHO DUKE OF BURGUNDY +8188-269288-0053-2726: ANNIE DID NOT MEAN TO (CONFIDE->CONFINE) IN (ANYONE->ANY ONE) THAT NIGHT AND THE KINDEST THING WAS TO LEAVE HER ALONE +3764-168671-0044-1077: THEY WERE ONLY A FEW TURNS OF THE WHEEL DISTANT FROM THE SMALL ALLEY LEADING TO THE (NUNS->NUN'S) CORNER +3080-5032-0004-552: WELL THE BEST (ON'T->HONOUR) IS (*->THAT) I HAVE A SQUIRE NOW THAT IS AS GOOD AS A KNIGHT +7105-2330-0037-2277: THE POPULAR SONG OF THE MOMENT REPLIED THE AGITATOR AFTER A MOMENT'S REFLECTION +1688-142285-0072-72: MARGARET HELD HER IN HER ARMS AND PUT THE WEARY HEAD TO REST UPON HER BOSOM +3997-180297-0015-1137: WE ARE NOT ALLOWED TO HAVE HEARTS UNDER PENALTY OF BEING HOOTED DOWN AND OF RUINING OUR CREDIT +8131-117016-0007-2562: CAPTAIN (MURDOCH->MURDOCK) WAS AN UNKNOWN FACTOR AND NOW WAS ASKING FOR MORE MEN +3764-168671-0029-1062: BUT HE PERSISTED FEEBLY FATHER (MESTIENNE->MASTIENNE) IS THE GRAVE DIGGER +8131-117016-0037-2592: IF HE SHOULD TURN UP DEAD I'LL KNOW YOU BOYS ARE RESPONSIBLE AND I'LL FIND YOU +5484-24318-0032-1798: EVEN DAPHNE'S IMAGE AND WHAT THREATENED HER FATHER AND HER WITH HIM RECEDED FAR INTO THE BACKGROUND +8131-117029-0017-2667: THERE'S A ROCKET WAITING TO (TRANSSHIP->TRANSHIP) YOU TO THE MOON ON THE WAY TO MERCURY RIGHT NOW GORDON SIGHED +1998-29455-0037-208: NOR WAS IT SUNDAY ON WHICH THEY TOOK A REST AND WASHED THEIR SHIRTS ACCORDING TO MISTER BEALE'S RULE OF LIFE +8131-117029-0018-2668: AND (I'VE->I) PAID HER THE PAY WE OWE YOU FROM THE TIME YOU BEGAN USING YOUR BADGE SHE'S OUT SHOPPING +2414-128292-0023-313: (O->OR) ETERNAL EVERYWHERE (O ETERNAL->OR E TURNED) NOWHERE (O ETERNAL->OR TURNEDETH) IN VAIN +1998-29454-0039-163: SO YOU SHALL SAID MISTER BEALE A (REG'LER->REG'LAR) WASH ALL OVER THIS VERY NIGHT I ALWAYS LIKE A WASH MESELF +4294-9934-0015-1333: YOU WILL THEN HAVE ONLY A PAIR OF TROUSERS A WAISTCOAT A HAT AND A COAT AND MY BOOTS +3528-168669-0113-808: IF YOU WERE EVER TO HAVE ANY OTHER JOBS OF THIS SORT MY BROTHER IS THE STRONG MAN FOR YOU A PERFECT TURK +8461-278226-0008-2892: PHILIP JOCELYN WAS EXAMINING SOME PICTURES ON THE OTHER SIDE OF THE ROOM WHEN HIS WIFE MADE (THIS->THE) DISCOVERY +4350-10919-0030-1378: (AT->BUT) THE QUESTION SHOULD THEY GO ABROAD THE DOCTOR PLUNGED INTO DEEP MEDITATION AS THOUGH RESOLVING A WEIGHTY PROBLEM +1688-142285-0088-88: I SHALL LIKE TO SEE HER SHE MUST BE AN UNCOMMON PERSON HER MOTHER ADDED +1998-15444-0022-118: EVAPORATE THE (FILTRATE->FILDRATE) TO A SYRUP AND EXTRACT WITH SUCCESSIVE PORTIONS OF ABSOLUTE ALCOHOL +2609-169640-0018-433: THE QUESTION WAS NOW WHETHER WE COULD PASS THEM OR NOT BEFORE THEY GOT NEAR ENOUGH TO GRAPPLE +8131-117016-0052-2607: AND THEN HELL IS GOING TO POP AND THIS WHOLE PLANET MAY BE BLOWN WIDE OPEN +5764-299665-0009-1813: DID THIS GOD (ALLOW->ALOAD) THE CRUEL AND VILE TO DESTROY THE BRAVE AND VIRTUOUS +3538-163624-0005-898: HE IS NO BIGGER THAN OTHER DRAGONS SAID THE TUTOR AND IF YOU WERE AS BRAVE AS YOUR FATHER YOU WOULD NOT FEAR HIM +4852-28330-0013-1543: HE PAUSED FINGERING HIS LOWER LIP AND LOOKING SIDEWAYS IN A REFLECTIVE FASHION AT CHRIS STANDING BEFORE HIM +8280-266249-0047-2848: YET THERE WAS GAMBLING AGAIN THE SECOND NIGHT BETWEEN WARD AND SEVERAL OTHERS OF HIS PROFESSION +6128-63241-0009-2008: OLIVE HAD A FEAR OF EVERYTHING BUT HER GREATEST FEAR WAS OF BEING AFRAID +2414-128292-0008-298: ALSO (METHINKETH->METHINK IT) THAT AFTER ALL IT HATH LONGER (LEGS->LESS) THAN MINE +8131-117016-0023-2578: GORDON FELT THE SOLID PLEASURE OF THE FINELY TURNED CLUB IN HIS HANDS +3538-163622-0015-883: WHEN THEY HAD GONE THUS FOR A LONG LONG WAY THE FOAL AGAIN ASKED DOST THOU SEE ANYTHING NOW +6432-63722-0029-2068: THAT'S THE WATCH ANNOUNCED THE HEADQUARTERS DETECTIVE REACHING IN FOR IT GOING (YET->IT) SEE +3764-168671-0045-1078: AND HE ADDED WITH THE SATISFACTION OF A SERIOUS MAN WHO IS TURNING A PHRASE WELL +6070-63485-0016-1918: BETWEEN SAINT (OUEN->JUIN) AND THE ROAD OF LA (REVOLTE->REVOLT) AT THE END OF THE ROAD AGREED +4294-35475-0012-1303: THE SIGHT WAS SO TERRIBLE THAT HE TURNED ON HIS HEEL AND FLED AWAY AS FAST AS HIS FEET COULD CARRY HIM +4852-28311-0010-1453: MIKE'S EXPRESSION CHANGED AT ONCE TO ONE OF TRIUMPH BUT CHRIS WAS ONLY (PARTLY->PARTIALLY) ENCOURAGED +8188-269290-0026-2757: THAT'S JUST IT JANE THAT IS WHAT FRIGHTENS ME SHE REFUSES TO COME +2609-156975-0012-373: THAT THE HEBREWS WERE RESTIVE UNDER THIS (TYRANNY->CEREMONY) WAS (NATURAL->NATURALLY) INEVITABLE +4198-61336-0024-1259: FOR THUS SAITH THE LORD UNTO THE HOUSE OF ISRAEL SEEK YE ME AND YE SHALL LIVE HAVE YE OFFERED UNTO ME SACRIFICES AND OFFERINGS IN THE WILDERNESS FORTY YEARS O HOUSE OF ISRAEL +8280-266249-0002-2803: THERE WERE NO SAD LEAVE TAKINGS TO MAR THEIR PLEASURE THE CHILDREN WERE IN WILD SPIRITS AND ALL SEEMED CHEERFUL AND HAPPY AS THEY SAT OR STOOD UPON THE DECK WATCHING THE RECEDING SHORE AS THE VESSEL STEAMED OUT OF THE HARBOR +2414-128292-0009-299: FOR WHEN (ZARATHUSTRA SCRUTINISED->THEIR TOESTRAS SCRUTINIZED) HIM WITH HIS GLANCE HE WAS FRIGHTENED AS BY A SUDDEN APPARITION SO SLENDER SWARTHY HOLLOW AND WORN OUT DID (THIS->HIS) FOLLOWER APPEAR +7105-2340-0026-2308: I SHOULD HAVE GIVEN IT TO YOU LAST NIGHT AFTER DINNER ONLY IT HAPPENED TO BE A CREAM JUG AND YOU SEEMED ANNOYED AT HAVING SO MANY DUPLICATES SO I FELT RATHER AWKWARD ABOUT GIVING YOU ANOTHER +2414-128291-0021-284: SAID ZARATHUSTRA THOU SHOULDST ALSO SEE MINE ANIMALS MINE EAGLE AND MY SERPENT (THEIR LIKE->THEY ALIKE) DO NOT AT PRESENT EXIST ON EARTH +3080-5040-0024-599: FOR MY LIFE I CANNOT BEAT INTO THEIR HEADS A PASSION THAT MUST BE SUBJECT TO NO DECAY AN EVEN PERFECT KINDNESS THAT MUST LAST PERPETUALLY WITHOUT THE LEAST INTERMISSION +8280-266249-0003-2804: AT LENGTH THE LAND HAD QUITE DISAPPEARED NOTHING COULD BE SEEN BUT THE SKY OVERHEAD AND A VAST EXPANSE OF WATER ALL AROUND AND THE PASSENGERS FOUND LEISURE TO TURN THEIR ATTENTION UPON EACH OTHER +8188-274364-0000-2789: THE COMMONS ALSO VOTED THAT THE NEW CREATED PEERS OUGHT TO HAVE NO VOICE IN THIS TRIAL BECAUSE THE ACCUSATION BEING AGREED TO WHILE THEY WERE COMMONERS THEIR CONSENT TO IT WAS IMPLIED WITH THAT OF ALL THE COMMONS OF ENGLAND +3331-159609-0003-659: IF IT HAD NOT BEEN FOR TWO THINGS I FEAR SHE NEVER WOULD HAVE STOOD A SUMMER IN TOWN BUT SYDNEY OFTEN CALLED TILL HIS VACATION CAME AND A VOLUMINOUS CORRESPONDENCE WITH POLLY BEGUILED THE LONG DAYS +7975-280076-0000-2489: ALTHOUGH EVERY BOOK PURPORTING TO NARRATE THE LIVES OF THE YOUNGER BROTHERS HAS TOLD OF THE LIBERTY ROBBERY AND IMPLIED THAT WE HAD A PART IN IT THE YOUNGERS WERE NOT SUSPECTED AT THAT TIME NOR FOR A LONG TIME AFTERWARD +6128-63241-0010-2009: SHE HAD ERECTED IT INTO A SORT OF RULE OF CONDUCT THAT WHENEVER SHE SAW A RISK SHE WAS TO TAKE IT AND SHE HAD FREQUENT HUMILIATIONS AT FINDING HERSELF (SAFE->SAVED) AFTER ALL +4852-28312-0014-1484: WHAT HE SAW WAS A FRESH CHEEKED LAD TALL FOR THIRTEEN STURDY WITH SINCERITY AND GOOD (HUMOR->HUMOUR) IN HIS FACE AND SOMETHING SENSITIVE AND APPEALING ABOUT HIS EYES +8461-281231-0009-2909: THE DEFENDERS FINDING THE CASTLE TO BE ON FIRE NOW DETERMINED TO SELL THEIR LIVES AS DEARLY AS THEY COULD AND HEADED BY (DE->THE) BRACY THEY THREW OPEN THE GATE AND WERE AT ONCE INVOLVED IN A TERRIFIC CONFLICT WITH THOSE OUTSIDE +3005-163389-0009-449: NOW LEAVE AND TAKE YOUR HALF A MAN WITH YOU TOSSING HIS GUN UP ACROSS HIS LEFT ARM AND COCKING IT WHEN HE SAYS THIS +6432-63722-0045-2084: I WANT TO TALK OVER DARCY'S CASE WITH YOU THE COLONEL HAD SAID AND THE TWO HAD TALKED HAD THOUGHT HAD TALKED AGAIN AND NOW WERE SILENT FOR A TIME +3997-180297-0002-1124: WELL YOU MIGHT HAVE HAD A WOMAN HERE SAID PRUDENCE AND IT WOULD HARDLY HAVE BEEN AMUSING FOR HER TO SEE TWO MORE ARRIVE +3764-168670-0014-989: THEN HE EXPLAINED TO JEAN VALJEAN THAT THIS WAS HIS RECOMPENSE FOR A SERVICE WHICH HE FAUCHELEVENT WAS TO RENDER TO THE COMMUNITY +8461-258277-0012-2879: ANSWERED HASAN I KNOW WHERE HE IS AND OPENING THE DOOR OF THE CLOSET SHOWED HIM THE SWEETMEAT (SELLER->CELLAR) WITHIN DRUGGED AND SENSELESS +6070-86744-0013-1934: WELL SINCE WE MUST PART SAID THE COUNT HOLDING OUT A HAND TO EACH OF THE YOUNG MEN ALLOW ME TO WISH YOU BOTH A SAFE AND PLEASANT JOURNEY +3080-5032-0021-569: HOW CAN YOU TALK OF DEFYING FORTUNE NOBODY LIVES WITHOUT IT AND THEREFORE WHY SHOULD YOU IMAGINE YOU COULD +2609-169640-0019-434: THE CAPTAIN BEHAVED PERFECTLY WELL IN THIS CRITICAL (INSTANT->INCIDENT) COMMANDING A DEAD SILENCE (AND->IN) THE CLOSEST (ATTENTION->INTENTION) TO HIS ORDERS +6070-86744-0028-1949: WELL SAID FRANZ WITH A SIGH DO AS YOU PLEASE MY DEAR VISCOUNT FOR YOUR ARGUMENTS ARE BEYOND MY POWERS OF REFUTATION +7105-2330-0039-2279: THE WORD OF THE SONG HAD REFERENCE IT WAS UNDERSTOOD TO THE INCARCERATING GOVERNMENT AND NOT TO THE DESTROYER OF THE ALBERT HALL +8131-117029-0004-2654: GORDON CAME TO A ROW OF TEMPORARY BUBBLES INDIVIDUAL DWELLINGS BUILT LIKE THE DOME BUT OPAQUE FOR PRIVACY +6070-86745-0013-1964: (PESTE->PESTES) I WILL DO NOTHING OF THE KIND THE MOMENT THEY COME FROM GOVERNMENT YOU WOULD FIND THEM EXECRABLE +7975-280063-0012-2488: THE WOUNDED OF BOTH FORCES WERE GATHERED UP AND WERE PLACED IN HOUSES +4852-28311-0011-1454: (BETCHA->BITCHER) AREN'T (GOIN->GOING) AFTER ALL CHRIS TURNED ON HIM +8188-269288-0040-2713: DON'T TALK TO ME LESLIE DON'T SAY A SINGLE WORD +3528-168669-0099-794: YOU WILL CLOSE THE COFFIN THE SISTERS WILL CARRY IT TO THE CHAPEL +3764-168670-0044-1019: WHAT DOES NOT A MAN UNDERGO FOR THE SAKE OF A CURE +5764-299665-0040-1844: A MAN PUTS HIMSELF IN THE PLACE OF ANOTHER +5442-41168-0007-1679: THEY EXPRESSED THE MOST IMPLACABLE HATRED +3528-168669-0024-719: FATHER (FAUVENT->FOUVET) REVEREND MOTHER +3331-159605-0035-644: UTTERLY DONE WITH AND LAID UPON THE SHELF +7902-96591-0018-2338: YES IF OTHER WAYS FAILED HE COULD ESCAPE UP THE CHIMNEY +3005-163399-0007-524: YES IT WAS (MORTIFICATIONTHAT->MORTIFICATION THAT) WAS IT +7902-96594-0002-2398: YES MEN QUITE READY YES SIR +7018-75789-0025-2233: THE SEVENTH VOYAGE OF (SINDBAD->SINBAD) THE SEAMAN +8131-117016-0038-2593: TROUBLE BEGAN BREWING SHORTLY AFTER THOUGH +7975-280085-0012-2548: MAKE FOR THE HORSES I SAID EVERY MAN FOR HIMSELF +4350-9170-0027-1409: AND THIS ABNORMAL ORDER OF THINGS IS MAINTAINED BY THE ARMY +1998-15444-0008-104: THE DOSE FOR AN ADULT IS TEN MINIMS +4350-10919-0001-1349: BEFORE HER HE DECIDED TO SCATTER HIS PEARLS +1998-29455-0008-179: (SEE IM CROST THE ROAD THERE SEE HIM->SEEM CCHOSTAWTE SEEM) +1998-29454-0010-134: NOT (EXACKLY->EXACTLY) SAID THE MAN BUT IT'S ALL RIGHT +3005-163391-0004-494: (WHER->WHERE ARE) YOU BOUND FOR YOUNG MAN +7105-2330-0024-2264: THE CHIEF ORGANISER RANG OFF +8131-117017-0006-2623: I'M REFORMED I'M GOING STRAIGHT +7975-280084-0015-2533: WHERE'S THE MONEY OUTSIDE THE SAFE BOB ASKED +7975-280057-0003-2458: I CANNOT REMEMBER WHEN I DID NOT KNOW HOW TO SHOOT +7902-96592-0038-2383: NEVER MIND ABOUT THAT LET ME OUT OF THIS PLACE +4852-28311-0026-1469: A SUDDEN (CAR HORN WOKE HIM->CARHORN WELCOMED) FROM HIS DREAM +8131-117016-0008-2563: THE PRESSURE WAS ENOUGH TO GET THEM FOR HIM +7902-96592-0008-2353: AND WHY DID ANDY CALL MISTER (GURR FATHER->GURRFATHER) +7902-96592-0023-2368: WON'T DO SAID (RAM->GRAHAM) QUICKLY I KNOW YOU +3528-168669-0114-809: YOU WILL DO IT AS SPEEDILY AS POSSIBLE +3080-5040-0009-584: YOU SEE I GIVE YOU FAIR WARNING +2033-164914-0013-224: WHEN IT WAS THE SEVENTY THIRD NIGHT +3005-163391-0019-509: WHEN THEY'RE DEEP THEY WON'T STOP FOR A HAIL +7902-96594-0017-2413: SAY (AWK WARD->AWKWARD) IN FUTURE NOT (AWK'ARD->AWKWARD) +8188-269290-0027-2758: REFUSES TO COME SHE CRIED +3997-182399-0000-1154: (OL MISTAH->OLD MISTER) BUZZARD GRINNED +2414-159411-0021-344: (NOW->ON) THE SIXTH WAS A JACKAL +7902-96594-0003-2399: THAT'S RIGHT OF COURSE WELL ARMED +2414-128292-0024-314: THOU ART MY SHADOW +5442-41169-0009-1709: (THERE'S->THERE IS) NO MEANING IN IT AT ALL +8461-258277-0011-2878: KNOWEST THOU WHITHER HE WENT +5764-299665-0025-1829: WHAT IS THIS POWER +533-131562-0012-1619: (HAVE->IF) I ATTEMPTED TO DEFRAUD YOU +7902-96591-0019-2339: NO THAT WAS TOO BAD HE COULD NOT DO THAT +7105-2330-0023-2263: IN HEAVEN'S NAME WHY +7902-96595-0014-2443: HERE MY LAD WHERE'S YOUR MASTER +7902-96594-0018-2414: I MEAN ALL ALONE BY MYSELF SIR +8280-266249-0032-2833: NO SIR HE IS NOT HERE +8188-269288-0025-2698: BUT THEY ARE JUST SHUTTING UP +4198-12259-0024-1199: HERE PAGE FILL +533-131564-0010-1634: FIRE AND FURY +3764-168671-0031-1064: BUT (YOU ARE->YOU'RE) A JOLLY FELLOW TOO +6432-63722-0015-2054: GAD (EXCLAIMED->EXPLAINED) THE COLONEL +3528-168669-0009-704: A WOMAN IS NEVER A MAN +1688-142285-0014-14: SO THEY LEFT MILTON +8188-269290-0042-2773: AM I MY BROTHER'S KEEPER +4294-9934-0016-1334: THAT WILL BE ENOUGH +8188-269290-0057-2788: I WILL TELL HER +7975-280057-0018-2473: ONE OF THE CONDITIONS UPON WHICH HER LIFE WAS SPARED WAS THAT SHE WOULD REPORT AT (LEXINGTON->LESSINGTON) WEEKLY +8280-266249-0062-2863: ELSIE ANSWERED PRESSING HER HAND AFFECTIONATELY (ART->ARE) WE NOT SISTERS IN CHRIST +8131-117016-0039-2594: (MURDOCH->MURDOCK) SENT ONE OF THE MEN TO PICK UP A SECOND SQUAD OF SIX AND THEN A THIRD +1688-142285-0089-89: PERHAPS SHE MAY HAVE A RELATION WHO MIGHT SUIT US AND BE GLAD OF OUR PLACE +3538-163624-0021-914: THEN HE TOOK THE HELMET OFF THE HEAD OF THE SLEEPER AND BEHOLD SHE WAS A MOST BEAUTIFUL LADY +7105-2340-0012-2294: IN THE DRAWING ROOM AFTER DINNER THEIR NERVOUSNESS AND AWKWARDNESS INCREASED +6128-63244-0011-2024: IF IT BE NECESSARY WE ARE PREPARED TO TAKE CERTAIN STEPS TO CONCILIATE THE SHRINKING +5442-32873-0012-1664: I'LL STAY HERE THAT IS IN THE DRAWING ROOM SHE ANSWERED AND THE FACE WAS WITHDRAWN +4294-9934-0001-1319: HE HAD BUT JUST ACQUIRED A FAITH MUST HE THEN REJECT IT ALREADY +7105-2340-0011-2293: THE TALK FLITTED NERVOUSLY AND HURRIEDLY FROM ONE IMPERSONAL TOPIC TO ANOTHER +533-131556-0008-1589: YOU MAY GO (MILICENT->MILLICENT) AND (SHE'LL->YOU'LL) FOLLOW IN A WHILE (MILICENT->MELLICENT) WENT +4198-12259-0039-1214: THERE IS NO ENCHANTMENT NOR CHARM THERE EVERY ONE OF YOU HATH SEEN IT +6432-63722-0030-2069: YOU'RE NOT AS SQUEAMISH AS ALL THAT ARE YOU JUST BECAUSE IT WAS IN A DEAD MAN'S (HAND->HANDS) AND (IN->*) A WOMAN'S +7902-96595-0000-2429: SAY (MESTER GURR->MISTER GIRK) SAID DICK AFTER ONE OF THESE SEARCHES HE WOULDN'T RUN AWAY WHAT +3005-163390-0020-479: ALL I SAY IS KINGS IS KINGS AND YOU GOT TO MAKE ALLOWANCES +5484-24317-0022-1754: WHEN HE DID FINALLY SUMMON YOU HE SAID THINGS WHICH MUST HAVE WOUNDED YOU +5484-24318-0018-1784: IN SPITE OF HIS DEEP MENTAL DISTRESS HE COULD HAVE SHOUTED ALOUD IN HIS DELIGHT AND GRATITUDE +5484-24318-0003-1769: WHAT PLEASURE HAD LIFE TO OFFER HIM THE BLIND MAN WHO WAS ALREADY DEAD TO HIS ART +5484-24318-0033-1799: HE WAS APPEARING BEFORE HIS COMPANIONS ONLY TO GIVE TRUTH ITS JUST DUE +7902-96592-0009-2354: THERE WAS AN INTERVAL OF THINKING OVER THIS KNOTTY QUESTION DURING WHICH THE LOW WHISTLING WENT ON +5764-299665-0070-1874: I HAVE A THEORY AND I HAVE FOUR CORNER STONES +5764-299665-0085-1889: THEY DEPEND ON THE (LORD->LOT) ON LUCK AND CHARITY +3528-168669-0129-824: I WILL PUT EARTH IN THE COFFIN REVEREND MOTHER THAT WILL PRODUCE THE EFFECT OF A CORPSE +3331-159605-0005-614: I COULD DO SO MUCH FOR ALL AT HOME HOW I SHOULD ENJOY THAT +533-131556-0023-1604: I WOULD NOT FOR MUCH THAT SHE SHOULD KNOW THE INFAMY AND DISGRACE OF HER RELATION +4294-35475-0013-1304: THOU SHALT NOT BE LEFT A PRISONER IN THIS DISMAL SPOT WHILE I HAVE THE POWER TO HELP THEE +5764-299665-0010-1814: DID HE ALLOW TYRANTS TO SHED THE BLOOD OF PATRIOTS +1998-29455-0023-194: TO THE (ELDER->OTHER) TRAMP LIES AND BEGGING WERE NATURAL MEANS OF LIVELIHOOD +4350-9170-0042-1424: GOVERNMENTS ASSERT THAT ARMIES ARE NEEDED ABOVE ALL FOR EXTERNAL DEFENSE BUT THAT IS NOT TRUE +2414-159411-0006-329: ON WHAT MUST I GIVE JUDGMENT ASKED THE (BANYAN->BANDED) TREE +3538-163622-0001-869: THE YOUTH THOUGHT THAT IT WAS VERY EASY WORK TO WATCH THE (FOALS->HOLES) AND THAT HE COULD DO IT WELL ENOUGH +2033-164915-0005-239: BUT NOW GO TO THY MASTER AND BRING HIM QUICKLY TO ME +1998-29454-0025-149: A BIRD PAUSED IN ITS FLIGHT ON A BRANCH QUITE CLOSE AND CLUNG THERE (SWAYING->SWING) +8131-117029-0003-2653: CREDIT HAD BEEN ESTABLISHED AGAIN AND THE BUSINESSES WERE OPEN +1688-142285-0029-29: THERE WERE SEVERAL OTHER SIGNS OF SOMETHING WRONG ABOUT MISSUS HALE +8188-269288-0056-2729: THERE WAS NO REPLY BUT THE SOUND OF HURRYING STEPS CAME QUICKER AND QUICKER NOW AND THEN THEY WERE INTERRUPTED BY A GROAN +5764-299665-0055-1859: LET ME REFER TO JUST ONE FACT SHOWING THE INFLUENCE OF A BELIEF IN THE BIBLE ON HUMAN BEINGS +7975-280057-0004-2459: MY BROTHER JAMES WAS BORN JANUARY FIFTEENTH EIGHTEEN FORTY EIGHT JOHN IN EIGHTEEN FIFTY ONE AND ROBERT IN DECEMBER EIGHTEEN FIFTY THREE +6432-63723-0015-2114: SO I HAD TO LET HER HAVE HER WAY AND WE DID NOT ASK THE COURT FOR MONEY THOUGH I HAD NO SUCH SQUEAMISH FEELINGS WHEN IT CAME TO MY COUNSEL FEE +7902-96592-0039-2384: I TOLD YOU A FISHER BOY CRIED ARCHY IMPATIENTLY BUT TRYING NOT TO OFFEND HIS VISITOR WHO POSSESSED THE POWER OF CONFERRING FREEDOM BY SPEAKING SHARPLY +3764-168671-0046-1079: FORTUNATELY THE SOIL WHICH WAS LIGHT AND WET WITH THE WINTER RAINS CLOGGED THE WHEELS AND RETARDED ITS SPEED +4350-10919-0031-1379: FINALLY HIS DECISION WAS PRONOUNCED THEY WERE TO GO ABROAD BUT TO PUT NO FAITH IN FOREIGN QUACKS AND TO APPLY TO HIM IN ANY NEED +3997-180294-0021-1109: MY WHOLE BEING WAS EXALTED INTO JOY AT THE MEMORY OF THE WORDS WE HAD EXCHANGED DURING THAT FIRST NIGHT +6938-70848-0017-2174: BY DECLARING THE ASSEMBLY EXTRAORDINARY CONFERENCE IT HAD BEEN PLANNED TO BLOCK THE REELECTION OF THE EXECUTIVE COMMITTEE +7018-75788-0016-2204: PRESENTLY THE SHIP STRUCK THE MOUNTAIN AND BROKE UP AND ALL AND EVERYTHING ON BOARD OF HER WERE PLUNGED INTO THE SEA +8280-266249-0018-2819: THAT THE FRENCH POLYTECHNIC INSTITUTE HAD TO PROHIBIT ITS (*->ILL) USE ON ACCOUNT OF ITS EFFECTS (ON->UPON) THE (MIND->MINE) +8131-117016-0053-2608: IT FITTED WITH THE DIRE PREDICTIONS OF SECURITY AND (WITH->WHAT) THE SPYING GORDON WAS GOING TO DO ACCORDING TO THEM +3528-168669-0039-734: AT NINE O'CLOCK IN THE MORNING AND AT ALL HOURS PRAISED AND ADORED BE THE MOST HOLY SACRAMENT OF THE ALTAR SAID THE PRIORESS +3331-159609-0018-674: BEAR IT PEOPLE ALWAYS DO (BEAR->BARE) THINGS SOMEHOW ANSWERED POLLY LOOKING AS IF SENTENCE HAD BEEN PASSED UPON HER +4198-61336-0009-1244: FOR THREE YEARS HE CONDUCTED VIGOROUS CAMPAIGNS IN THE (WESTERN LAND->WESTERNLAND) WHERE HE MET WITH VIGOROUS RESISTANCE +7975-280085-0013-2549: (THERE IS->THERE'S) NO USE STOPPING TO PICK UP A COMRADE HERE FOR WE CAN'T GET HIM THROUGH THE LINE JUST CHARGE (THEM->HIM) AND MAKE IT IF WE CAN +8131-117017-0022-2639: GORDON HAD HEARD OF THE FRIENDLY INTEREST CHARGED ON THE SIDE HERE BUT HE SHOOK HIS HEAD WRONG (IZZY->IS HE) +2609-156975-0013-374: WAS ANY OTHER PROCEDURE TO BE EXPECTED FROM (A->IT THE) DESPOTIC (RULER->ROAR) OF THAT LAND AND DAY +8188-269288-0041-2714: I SHALL GO OFF TO SLEEP THAT IS ALL I CARE FOR +7975-280076-0015-2504: I WENT THROUGH INDEPENDENCE AND FROM THERE TO ACE (WEBB'S->WEBBS) +7902-96595-0015-2444: EH I SAY WHERE'S YOUR MASTER +8188-269288-0026-2699: I SHALL GO I KNOW A WAY +8131-117029-0019-2669: BUT HIS OLD EYES WERE GLINTING +8131-117016-0024-2579: GORDON'S EYES POPPED AT THAT +8188-269290-0013-2744: HAVE THE GOODNESS TO FIND IT AND PUT IT BACK +8280-266249-0033-2834: AND THE DOOR WAS SLAMMED VIOLENTLY TO +8280-266249-0048-2849: THEY KEPT IT UP TILL AFTER MIDNIGHT +8188-269290-0043-2774: YOU ASK SHE CONTINUED +8188-269290-0028-2759: (SHE WILL->SHE'LL) GET (INTO->IN) AN AWFUL SCRAPE +8461-281231-0008-2908: MEANWHILE THE BLACK KNIGHT HAD LED HIS FORCES AGAIN TO THE ATTACK AND SO VIGOROUS WAS THEIR ASSAULT THAT BEFORE LONG THE GATE OF THE CASTLE ALONE SEPARATED THEM FROM THOSE WITHIN +1688-142285-0074-74: I'LL GO TO BED (IT'S->ITS) BEST PLACE BUT CATCHING AT MARGARET'S GOWN (YO'LL->YOU'LL) COME AGAIN I KNOW (YO->YOU) WILL BUT JUST SAY IT +1998-29455-0038-209: THEY DID NOT STAY THERE BUT WALKED OUT ACROSS THE DOWNS WHERE THE SKYLARKS WERE SINGING AND ON A DIP OF THE DOWNS CAME UPON GREAT STONE WALLS AND TOWERS VERY STRONG AND (GRAY->GREY) +4294-32859-0004-1289: HE ASSISTED AT IT BUT TOOK NO PART AND IN FACT WAS LISTENING TO THAT OTHER CONVERSATION WHICH SOUNDED WITH ITS PLEASANT GABBLE AND LAUGHTER LIKE A LITTLE MUSICAL TINKLE OF BELLS IN THE DISTANCE +2609-169640-0004-419: MISTER (MARBLE->MARRBLE) HE (I DO->OUGHED TO) BELIEVE WAS FAIRLY SNOOZING (ON->OF) THE (HEN COOPS->HINGOOPS) BEING LIKE (*->TO) THE SAILS AS ONE MIGHT SAY (BARELY->VERY) ASLEEP +5484-24317-0007-1739: THE APPROVAL AS WELL AS THE DOUBTS WHICH (IT->HAD) AROUSED IN OTHERS STRENGTHENED HIS OPINION ALTHOUGH EVEN NOW HE COULD NOT SUCCEED IN BRINGING IT INTO HARMONY WITH THE FACTS +4350-9170-0012-1394: BUT SINCE THIS IS NOT THE CASE AND ON THE CONTRARY MEN IN POWER ARE ALWAYS FAR FROM BEING SAINTS THROUGH THE VERY FACT OF THEIR POSSESSION OF POWER THE SOCIAL ORGANIZATION BASED ON POWER HAS NO JUSTIFICATION +6070-63485-0002-1904: BY MEANS OF THIS STRATAGEM THEY FOLLOWED SO CLOSELY THAT ALTHOUGH WITHIN A FEW STEPS OF SARAH AND TOM THEY DID NOT HEAR THEM +8461-281231-0038-2938: THE PRECEPTORS OF WHOM THERE WERE FOUR PRESENT OCCUPIED SEATS BEHIND (THEIR->THE) SUPERIORS AND BEHIND THEM STOOD THE ESQUIRES OF THE ORDER ROBED IN WHITE +3764-168671-0001-1034: THIS HEARSE CONTAINED A COFFIN COVERED WITH A WHITE CLOTH OVER WHICH SPREAD A LARGE BLACK CROSS LIKE A HUGE CORPSE WITH DROOPING ARMS +7975-280084-0001-2519: WHEN MILLER AND I CROSSED THE BRIDGE THE THREE WERE ON SOME DRY (GOODS->GOOD) BOXES AT THE CORNER NEAR THE BANK AND AS SOON AS THEY SAW US WENT RIGHT INTO THE BANK INSTEAD OF WAITING FOR US TO GET THERE +7105-2330-0009-2249: THE JURY (WISH->WISHED) TO ADD A (RIDER->WRITER) DRAWING ATTENTION TO THE FACT THAT A (BY->BIE) ELECTION IS (PENDING->SPENDING) IN THE PARLIAMENTARY DIVISION OF NEMESIS ON HAND +4852-28319-0027-1529: THE WOODEN GRIN LOOSENED THE LARGE EYES TURNED THE HAND HOLDING THE HARD BOUQUET OF CARVED FLOWERS MOVED (AND->*) LET THE BOUQUET FALL +7975-280057-0019-2474: ONE OF MY OLD SCHOOL TEACHERS WHOM I HAVE NEVER SEEN SINCE THE SPRING (OR->*) SUMMER OF EIGHTEEN SIXTY TWO IS STEPHEN B ELKINS SENATOR FROM WEST VIRGINIA +8131-117016-0054-2609: HE WAS GETTING EVEN FATTER NOW THAT HE WAS EATING BETTER FOOD FROM THE FAIR RESTAURANT AROUND THE CORNER +3997-182399-0015-1169: WILL (YO' ALLS->YOU ALL) PLEASE SPEAK A (LIL->LITTLE) LOUDER HE (HOLLER->HOLLERED) DOWN THE CHIMNEY (JES->JUST) LIKE THAT +7975-280084-0016-2534: THE (SHUTTERS->SHOULDERS) WERE CLOSED AND THIS CAUSED BUNKER AN INSTANT'S DELAY THAT WAS ALMOST FATAL (PITTS->FITZ) CHASED HIM WITH A BULLET +4350-10919-0016-1364: BUT IN (FAVOR->FAVOUR) OF FOREIGN TRAVEL I WOULD URGE THE CHANGE OF HABITS THE REMOVAL FROM CONDITIONS CALLING UP REMINISCENCES +6432-63723-0000-2099: CHUCKLED THE COLONEL AS HE SKILFULLY PLAYED THE LUCKLESS TROUT NOW STRUGGLING TO GET LOOSE FROM THE HOOK +533-131564-0025-1649: CRIED SHE I COULDN'T HAVE INFLUENCED HIM (I'M->I AM) SURE BY ANYTHING THAT I COULD HAVE SAID +367-130732-0024-944: THIS IS THE RECIPE FOR EIGHT PEOPLE AND IT IS WELL (*->AND) WORTH TRYING IF YOU ARE GIVING A DINNER OF IMPORTANCE +8461-278226-0009-2893: HOW I WISH YOU COULD GET ME A COPY OF THAT PICTURE PHILIP (LAURA->LORA) SAID ENTREATINGLY +4294-14317-0008-1274: I BEGGED HIS MOST ILLUSTRIOUS EXCELLENCY TO NAME A SINGLE PERSON WHOM I HAD EVER TAKEN IN +533-1066-0018-1574: I CONFESS THAT JUST AT THAT MINUTE EVEN SUNNYSIDE SEEMED A CHEERFUL SPOT +7018-75788-0001-2189: BEHOLD A SHIP WAS MAKING FOR THE ISLAND THROUGH THE DASHING SEA AND CLASHING WAVES +3528-168669-0084-779: GOD SUBORDINATED TO THE COMMISSARY OF POLICE SUCH IS THE AGE SILENCE (FAUVENT->FAVANT) +533-1066-0003-1559: FORTUNATELY WARNER AND THE (DETECTIVES->DETECTIVE) WERE KEEPING BACHELOR (HALL IN THE->HALLAND) LODGE +4852-28312-0029-1499: A COACH WITH ITS TOP PILED HIGH WITH LUGGAGE STAMPED TO A HALT BESIDE THE FLAGGED COURTYARD +3764-168671-0016-1049: JEAN VALJEAN'S COMPOSURE WAS ONE OF THOSE POWERFUL TRANQUILLITIES WHICH ARE CONTAGIOUS +8131-117016-0009-2564: GORDON REPORTED FOR WORK WITH A SENSE OF THE BOTTOM FALLING OUT MIXED WITH A VAGUE RELIEF +3538-163624-0006-899: THEN THE PERSON WHO HAD KILLED OTTER WENT DOWN AND CAUGHT THE DWARF WHO OWNED ALL THE TREASURE AND TOOK IT FROM HIM +3005-163390-0005-464: BUT A BIG FINE LOOKING MAN JUMPS UP ON A BENCH AND SHOUTS HOLD ON +2609-157645-0004-404: ON ONE OCCASION HE WAS AT SAINT MARY'S CHURCH NOTTINGHAM FOR A (CONFIRMATION->CONFIRM MASON) +3528-168656-0007-689: THUS IT FURNISHED A SUBJECT OF COMMENT FOR ALL THOSE WHO WERE UNOCCUPIED OR BORED IN THE CONVENT +6070-63485-0017-1919: HE HAD FORGOTTEN THE ADDRESS OF THE SELF STYLED (FAN->PEN) PAINTER +8188-269288-0010-2683: I MUST PASS IN HONORS IF I DON'T I SHALL DIE +5764-299665-0024-1828: IF THEY GIVE UP ONE GOD THEY IMAGINE ANOTHER +533-131564-0024-1648: MILICENT FLEW TO THANK ME (OVERFLOWING WITH->OVERWHELMING HIS) GRATITUDE +8280-266249-0017-2818: IS IT STRANGE THEN THAT SMOKERS AND CHEWERS HAVE A THOUSAND AILMENTS +6432-63722-0000-2039: BUT (SCUSE->EXCUSE) ME DIDN'T (YO FIGGER->YOU FIG) ON DOIN SOME DETECTIN AN (GIVE->GIVIN) UP FISHIN +3538-142836-0013-839: IN THIS WAY IT IS ALSO THAT ORANGE AND (LEMON CHIPS->LEMONSHIPS) ARE PRESERVED +3764-168670-0029-1004: THE CHURCH IN THE STREET THE CHURCH WHICH ANY ONE CAN ENTER +8131-117017-0007-2624: YOU DAMNED COPS CAN'T (O'NEILL->O'NEIL) WAS BLUBBERING +533-131556-0007-1588: (UPON->UP IN) PERUSING THIS SHE TURNED SCARLET AND BIT HER LIP +8280-266249-0063-2864: YE ARE ALL THE CHILDREN OF GOD BY FAITH IN CHRIST JESUS +6938-70848-0001-2158: THE COLDS AND RHEUMATISM OF THE RAINY MONTHS VANISHED +4852-28330-0014-1544: WE SHALL SAY NO MORE BUT I TRUST YOU UNDERSTAND THE RESPONSIBILITY YOU HAVE +6432-63723-0045-2144: THE STOPPED CLOCKS FOR INSTANCE HAVE YOU ANY THEORY +6938-70848-0002-2159: (ASKED A->AS TO) WORKER LAST SUNDAY YOU DID IT WHEN THE YUNKERS +8188-269290-0012-2743: I TOOK IT OUT SAID LESLIE TOOK IT OUT +5442-32873-0011-1663: RACHEL (LAKE RACHEL LAKE->MEEK LACH TO LEEK) WHAT ARE YOU NOW +8131-117017-0021-2638: A THOUSAND INTEREST AT TEN PER CENT A WEEK STANDARD RIGHT +5442-41169-0024-1724: YOU'RE MARRIED (I'VE->I) HEARD SAID THE LANDOWNER +3997-180297-0017-1139: WE STAND FIRST IN THEIR SELF ESTEEM LAST IN THEIR ESTEEM +6432-63723-0044-2143: NOW AS TO CERTAIN MATTERS IN THE STORE ON THE MORNING OF THE MURDER +6432-63722-0014-2053: BUSTED HIS HEAD IN WITH A HEAVY CANDLESTICK ONE OF A PAIR +7902-96591-0003-2323: I WISH YOU WOULD BELIEVE ME THAT I AM IN AS GREAT TROUBLE ABOUT IT AS YOU ARE +3005-163399-0022-539: UNEASY SHE SAYS I'M READY TO GO DISTRACTED +7902-96592-0024-2369: BEEN PLAYING THE SPY THAT'S WHAT YOU'VE BEEN DOING WHO LOCKED YOU IN +7975-280084-0000-2518: I URGED ON THE BOYS THAT WHATEVER HAPPENED WE SHOULD NOT SHOOT ANY ONE +1998-29454-0040-164: SOME (BLOKES->LOLKS) THINK IT PAYS TO BE DIRTY BUT IT DON'T +367-130732-0009-929: SERVE IN A CHAFING DISH WITH THIN SLICES OF DRY TOAST +3528-168669-0069-764: SHE ASKED IT OF US THAT IS TO SAY COMMANDED US +7018-75789-0010-2218: WHEN IT WAS THE FIVE HUNDRED AND SIXTY SECOND NIGHT +8188-269288-0011-2684: A FEW MOMENTS LATER THERE CAME A TAP AT THE DOOR +533-131562-0011-1618: WHAT GREAT DISCOVERY HAVE YOU MADE NOW MISTER (HUNTINGDON->HINTING AN) +7902-96591-0004-2324: THAT MY FATHER SIR RISDON (GRAEME->GRAHAM) HAS SMUGGLED GOODS HERE +5764-299665-0069-1873: IF WE BUILD WE MUST BEGIN AT THE BOTTOM +1688-142285-0059-59: NOW I'LL NOT HAVE MY WENCH PREACHED TO +8188-269288-0055-2728: ANNIE IS THAT YOU SHE CALLED OUT +7105-2340-0027-2309: THE SNATCHER HAD BEEN AN ORPHAN THESE MANY YEARS +3538-163622-0016-884: YES NOW I SEE SOMETHING THAT IS WHITE SAID CINDERLAD +4852-28319-0012-1514: SEVENTY FOUR BOOK ONE THE RETURN +1998-15444-0023-119: SEPARATE THE ETHEREAL SOLUTION AND EVAPORATE +2414-128291-0006-269: HE WOULD NOT BE RID OF HIS AFFLICTION +4198-12259-0009-1184: HE DRINKS IN VAIN THAT (FEELS->FILLS) NOT THE PLEASURE OF IT +367-293981-0005-959: DIDN'T I SAY SO WORSE LUCK TO MY LINE SAID SANCHO +1688-142285-0044-44: BESSY DO YOU WISH TO DIE +8461-278226-0010-2894: I SHOULD SO LIKE ONE TO HANG IN MY MORNING ROOM AT (JOCELYN'S ROCK->JOSTLINGS FROCK) +6128-63240-0008-1979: AND YET THE READER WHO LIKES A COMPLETE IMAGE WHO DESIRES TO READ WITH THE SENSES AS WELL AS WITH THE REASON IS ENTREATED NOT TO FORGET THAT HE PROLONGED HIS (CONSONANTS->CONSONANCE) AND SWALLOWED HIS (VOWELS->VOWALS) THAT HE WAS GUILTY OF (ELISIONS->ALLEGIANCE) AND INTERPOLATIONS WHICH WERE EQUALLY UNEXPECTED AND THAT HIS DISCOURSE WAS PERVADED BY SOMETHING SULTRY AND VAST SOMETHING ALMOST AFRICAN IN ITS RICH BASKING TONE SOMETHING THAT SUGGESTED THE TEEMING EXPANSE OF THE (COTTON FIELD->COTTONFIELD) +7018-75789-0026-2234: KNOW O COMPANY THAT AFTER MY RETURN FROM MY SIXTH VOYAGE WHICH BROUGHT ME ABUNDANT PROFIT I RESUMED MY FORMER LIFE IN ALL POSSIBLE JOYANCE AND ENJOYMENT AND MIRTH AND MAKING MERRY DAY AND NIGHT AND I TARRIED SOME TIME IN THIS SOLACE AND SATISFACTION TILL MY SOUL BEGAN ONCE MORE TO LONG TO SAIL THE SEAS AND SEE FOREIGN COUNTRIES AND COMPANY WITH MERCHANTS AND HEAR NEW THINGS +7018-75789-0011-2219: SHE SAID IT HATH REACHED ME O AUSPICIOUS KING THAT SINDBAD THE SEAMAN CONTINUED WHEN I LANDED AND FOUND MYSELF AMONGST THE INDIANS AND ABYSSINIANS AND HAD TAKEN SOME REST THEY CONSULTED AMONG THEMSELVES AND SAID TO ONE ANOTHER THERE IS NO HELP FOR IT BUT WE CARRY HIM WITH US AND PRESENT HIM TO OUR KING THAT HE MAY ACQUAINT HIM WITH HIS ADVENTURES +8461-281231-0024-2924: HE LEFT THE GALLANT BAND OF FORESTERS SORROWING DEEPLY FOR HIS LOST FRIEND THE LORD OF (CONINGSBURGH->KENNINGSBURG) AND HE AND HIS FOLLOWERS HAD SCARCE DEPARTED WHEN A PROCESSION MOVED SLOWLY FROM UNDER THE (GREENWOOD->GREENOOD) BRANCHES IN THE DIRECTION WHICH HE HAD TAKEN IN THE CENTRE OF WHICH WAS THE CAR IN WHICH THE BODY OF (ATHELSTANE->ADELSTEIN) WAS LAID +2414-165385-0000-359: (*->AND) THUS ACCOMPLISHED HE EXCITED THE ADMIRATION OF EVERY SILLY (COQUETTE->COCKET) AND THE ENVY OF EVERY FLUTTERING (COXCOMB->PROCOMB) BUT BY ALL YOUNG GENTLEMEN AND LADIES OF UNDERSTANDING HE WAS HEARTILY DESPISED AS A MERE CIVILIZED MONKEY +4198-12281-0009-1228: TO OTHERS AGAIN HE UNJOINTED THE (SPONDYLES->SPINE NEULES) OR KNUCKLES OF THE NECK DISFIGURED THEIR CHAPS GASHED THEIR FACES MADE THEIR CHEEKS HANG FLAPPING ON THEIR CHIN AND SO SWINGED AND (BALAMMED->BELAMMED) THEM THAT THEY FELL DOWN BEFORE HIM LIKE HAY BEFORE A (MOWER->MOOR) + +SUBSTITUTIONS: count ref -> hyp +13 A -> THE +10 THE -> A +9 THAT -> THE +9 MURDOCH -> MURDOCK +9 AND -> IN +9 AN -> AND +6 OL -> OLD +5 THIS -> THE +5 N'T -> NOT +5 KINE -> KIND +5 IM -> HIM +5 I -> I'M +4 THIS -> HIS +4 THEIR -> THE +4 THE -> THAT +4 SHARRKAN -> SHARKAN +4 ON -> IN +4 MAN -> MEN +4 LIL -> LITTLE +4 IN -> AND +4 I -> I'VE +4 HAS -> HAD +4 CONFECTIONARY -> CONFECTIONERY +4 AROUND -> ROUND +4 ANYONE -> ANY +4 AND -> AN +3 YOU'VE -> YOU +3 YOU -> HE +3 YO -> YOU +3 WOULD -> WILL +3 WILFRID -> WILFRED +3 WAYNE -> WAIN +3 UP -> UPSTAIRS +3 ULRICA -> EUREKA +3 TOWARD -> TOWARDS +3 SANCT -> SAINT +3 REGIN -> REGAN +3 RAYSTOKE -> RAYSTROKE +3 OH -> O +3 O -> OR +3 O -> OF +3 MISTAH -> MISTER +3 MISS -> MISSUS +3 LEVER -> LOVER +3 JES -> JUST +3 INTERESTS -> INTEREST +3 IN'T -> IN +3 I'VE -> I +3 I -> AND +3 HER -> A +3 GOBEY'S -> GOBIES +3 FAFNIR -> FAFNER +3 E'S -> HE'S +3 DE -> THE +3 BEFEL -> BEFELL +3 ARSINOE -> ARSENAL +2 ZARATHUSTRA -> THEIR +2 YOU'RE -> YOU +2 YOU -> YOUR +2 YOU -> YOU'RE +2 YO'LL -> YOU'LL +2 WITHAL -> WITH +2 WILL -> WOULD +2 WE'RE -> WE +2 WAS -> IS +2 UPON -> UP +2 TRIBE -> TIME +2 TORQUILSTONE -> TORKELSTONE +2 TONIGHT -> TO +2 TIGLATH -> TIGLAS +2 THESE -> THIS +2 THERE'S -> THERE +2 THERE -> THERE'S +2 THEN -> THE +2 THE -> THEIR +2 STOKER -> STALKER +2 SHELL -> SHELLFISH +2 SHAWS -> SHORES +2 SHAG -> SHAGG +2 SEEK -> SEE +2 ROUND -> AROUND +2 ROPE'S -> ROPES +2 RAM -> GRAHAM +2 PRIEST -> PRIESTS +2 ONTO -> ON +2 ON -> OF +2 MYRTILUS -> MERTILLUS +2 MONSEIGNEUR -> MONSEIGNOR +2 MISSUS -> MISTER +2 MISSUS -> MISS +2 MESTER -> MISTER +2 MANKATO -> MAN +2 LUNA'S -> LUNNY'S +2 LABORING -> LABOURING +2 KENITES -> CANAITES +2 JULIEN -> JULIAN +2 JAKEY -> JAKIE +2 IT'S -> IS +2 IT -> THAT +2 IS -> OF +2 IN -> OF +2 I'D -> I +2 HORSTIUS -> HORSES +2 HOO'S -> WHO'S +2 HONOUR -> HONOR +2 HO -> OH +2 HIS -> ITS +2 HIS -> A +2 HERMON -> HERMANN +2 HERMON -> HAREMON +2 HER -> THE +2 HEAR -> HARE +2 HE'S -> HE +2 HE -> YOU +2 HAYS -> HAYES +2 HANDS -> HAND +2 HAID -> HEAD +2 GUNNAR -> GUNNER +2 GRAVE -> GRAVEDIGGER +2 GRAEME -> GRAHAM +2 GOV'NOR -> GUV'NOR +2 GOAL -> GOLD +2 FAVOR -> FAVOUR +2 ELEXANDER -> ALEXANDER +2 EILEEN -> AILEEN +2 DUKE -> DUPE +2 DO -> TOO +2 DEFENSE -> DEFENCE +2 COLOR -> COLOUR +2 COLE -> CO +2 CINDERLAD -> SINDERLAD +2 BOUT -> ABOUT +2 BEALE -> BELL +2 BEALE -> BELE +2 BANYAN -> BANNON +2 AWK -> AWKWARD +2 AWHILE -> A +2 AT -> TO +2 ARCHY -> ARCHIE +2 ANY -> ANYONE +2 AIN'T -> AND +2 A -> TO +2 A -> HER +1 ZEMSTVOS -> THEM +1 ZAYNAB -> THY +1 ZAU -> ZUL +1 ZAU -> ZAUAM +1 ZARATHUSTRA -> THE +1 YUSS -> YES +1 YOUR -> YOU'RE +1 YOU'LL -> YOU +1 YOU'D -> YOU +1 YOU -> YOU'LL +1 YOU -> EU +1 YORK -> YORKE +1 YO' -> YOU +1 YET -> IT +1 YES'M -> YASSEM +1 YER -> YOU' +1 YER -> YOU +1 YER -> HERE +1 YE -> YOU +1 YAUSKY -> YOZKI +1 YAHWEH -> YAHWAY +1 WUNNERED -> WONDERED +1 WROTE -> WELL +1 WRIT -> WROTE +1 WRETCH -> VETCH +1 WOULD -> WHAT +1 WOULD -> DID +1 WOT -> WHAT +1 WORSHIP'S -> WORSHIP +1 WORRY -> WERE +1 WORKINGMEN -> WORKING +1 WORK -> WORKADAY +1 WONDERED -> WANDERED +1 WON'T -> WOULD +1 WITH -> WHAT +1 WITH -> HIS +1 WISHT -> WISHED +1 WISH -> WISHED +1 WIRES -> WINDS +1 WINDOW -> WINDOWS +1 WILLY -> WILLIE +1 WILL -> WOT +1 WILL -> WILT +1 WILL -> WE'LL +1 WILL -> WAS +1 WILL -> WALLA +1 WILKSES -> WILKESES +1 WILKS -> WILKES +1 WILDERNESS -> MOTHER +1 WILD -> WHITE +1 WIDEAWAKE -> WIDE +1 WI -> WITH +1 WHY -> WELL +1 WHO'D -> WHO +1 WHO -> WHOSE +1 WHITE -> WI +1 WHILOME -> WILLOW +1 WHILE -> WALLET +1 WHEREABOUTS -> WHEREABOUT +1 WHERE -> WERE +1 WHERE -> THERE +1 WHER -> WHERE +1 WHEN -> BUT +1 WHEN -> AND +1 WHEEL -> WEAR +1 WHATE'ER -> WHATEVER +1 WHAT -> WOULD +1 WHAT -> OR +1 WHAT -> BUT +1 WHACKS -> WAX +1 WESTERN -> WESTERNLAND +1 WERE -> WAS +1 WELL -> WHILE +1 WEEVILLY -> WEEVILY +1 WEBB'S -> WEBBS +1 WE -> WE'RE +1 WAVERLY -> WAVERLEY +1 WATONWAN -> WATERWAN +1 WATER -> WALUTTER +1 WAS -> WHICH +1 WAS -> WASURCEBAH +1 WARD'S -> WORD'S +1 WARD -> HOWARD +1 WAKE -> AWAKE +1 WAITIN -> WAITING +1 WAGGOT -> WAGGETT +1 WAGGING -> WORKING +1 VOWELS -> VOWALS +1 VOUGHT -> VAULT +1 VON -> VUN +1 VOLVITUR -> VOLVETER +1 VIOLENCE -> VICE +1 VIL -> VEIL +1 VESTRY -> VETXRY +1 VENTRILOQUIST -> VENTRILOQUEST +1 VENIAL -> VENAL +1 VAVASOUR -> VAVASOR +1 VAUGIRARD -> ROUGE +1 VAST -> VATS +1 VASSILIEVITCH -> MASLOVITCH +1 VANE -> VAIN +1 URARTU -> URYTU +1 URARTU -> URTU +1 URARTIAN -> GORACIAN +1 UPSTAIRS -> UP +1 UNLESS -> AND +1 UN -> ONE +1 ULTIMATELY -> OUGHT +1 UKINZER -> AKENJER +1 UDDER -> UTTER +1 TYRANNY -> CEREMONY +1 TWYMAN'S -> TWIMMANS +1 TWO -> TOO +1 TRY -> TRIFLE +1 TRUSTEE -> TRUSTY +1 TRULY -> JULIE +1 TRUCE -> TRUTH +1 TRIFLE -> TRAVEL +1 TRIED -> TRIVE +1 TRELAWNEY -> TRE +1 TRAVELLED -> TRAVELED +1 TRAVELED -> TRAVELLED +1 TRANSSHIP -> TRANSHIP +1 TRAINDAWG -> TRAINED +1 TRAFFIC -> TRAPHIC +1 TOUCHED -> TOCH +1 TOTING -> TOADING +1 TOP -> TART +1 TOMORROW -> TO +1 TOLERBLE -> TOLERABLE +1 TOILET -> TOURID +1 TO -> THE +1 TO -> MONSEIGNOR +1 TO -> MADE +1 TO -> INTO +1 TO -> HURT +1 TO -> FOR +1 TIGER -> DRIVER +1 TIGER -> CHILD +1 THROUGH -> TO +1 THREE -> THIRD'S +1 THIS -> THEY +1 THIS -> THESE +1 THEY'RE -> THEY +1 THEY'RE -> THERE +1 THEY -> THERE +1 THEY -> HE +1 THESE -> THE +1 THESE -> ALWAYS +1 THERE -> THEY +1 THEN -> LIM +1 THEM -> HIM +1 THEM -> EM +1 THEIR -> THEY +1 THEIR -> HER +1 THEIR -> DEAR +1 THEE -> ME +1 THE -> VIDA +1 THE -> TO +1 THE -> THIS +1 THE -> THESHIP +1 THE -> THEM +1 THE -> OUR +1 THE -> OTHER +1 THE -> LOGS +1 THE -> HERRODIAN +1 THE -> ASHORE +1 THE -> APPEALS +1 THAT'S -> THAT +1 THAT -> THY +1 THAT -> IN +1 THAT -> HAD +1 THAN -> AND +1 TERRA -> TERIS +1 TEMPLAR -> TEMPT +1 TELLTALE -> TELL +1 TEETH -> CHEEK +1 TEAL -> TEALE +1 TEA -> TEENO +1 TATTLERS -> TATLERS +1 TASKMASTER -> TAX +1 TAPIS -> TAPI +1 TANQUAM -> TAM +1 TAMAR -> TO +1 TALK -> TALKED +1 TAHITI -> TITTI +1 T'OTHER -> THE +1 SYRIA -> ASSYRIA +1 SYNONYMON -> SYNONYM +1 SWORD -> SWARD +1 SWELP -> SWAP +1 SWAYING -> SWING +1 SWAG -> WAAG +1 SURE -> SHORE +1 SUPPOSE -> S'POSE +1 SUMTHIN -> SUPPER +1 SUFFOLK -> SUFFOLED +1 SUCCOURS -> SUCCOURUS +1 STRODE -> STROLLED +1 STRANGEST -> STRANGERS +1 STRANGE -> STREAM +1 STRAINS -> TRAINS +1 STORES -> STORIES +1 STONEWALL -> STERNWALL +1 STEW -> DO +1 STERN -> STERNMOST +1 STEPPED -> STEPS +1 STEEVER -> STEVER +1 STAYING -> SEEING +1 STAY -> STAYED +1 STATE -> STATES +1 STATE -> STATEROOM +1 STAS -> STARS +1 STANDARD -> STANDARDS +1 STAKE -> STEAK +1 SQUARE -> SQUEER +1 SPONSUS -> CORRESPONSES +1 SPONGE -> SPONNES +1 SPONDYLES -> SPINE +1 SPILLING -> SPINNING +1 SPICE -> SPIES +1 SPARSELY -> FIRSTLY +1 SPAKE -> SAYING +1 SPADDLE -> SPATTLE +1 SOWING -> SEWING +1 SOUSE -> SOULS +1 SOUL -> SOLE +1 SOUGHT -> SAWED +1 SOTELES -> SOCULUS +1 SOOT -> SUIT +1 SONNY -> SANNY +1 SOMEONE -> SOME +1 SO -> SORCHAUS +1 SNARLED -> SNARLS +1 SMOLNY -> SMOLNEY +1 SLAB -> SLAP +1 SKIN -> SKINNED +1 SKIN -> KIN +1 SKEW -> SKEWER +1 SIXES -> SIXXES +1 SIRE -> SIRE'S +1 SIR -> SERVICE +1 SINUHIT -> SINEWET +1 SING -> SINGING +1 SINE -> INAQUA +1 SINDBAD -> SINBAD +1 SILLY -> SIDDY +1 SIGNED -> SIGHING +1 SIGHING -> SEWING +1 SHUTTERS -> SHOULDERS +1 SHUT -> AT +1 SHRUBS -> SHRUGS +1 SHOULD -> SHALL +1 SHOP -> SHOPBOY +1 SHO'LY -> SURELY +1 SHIPS -> SHIP +1 SHIP -> POUCHED +1 SHET -> SHUT +1 SHERE -> SHERLLY +1 SHERBURN -> SHERBIN +1 SHEETS -> SEATS +1 SHE'S -> SHE +1 SHE'LL -> YOU'LL +1 SHE -> SHE'LL +1 SHARDURIS -> JODURIS +1 SHARDURIS -> CHARDURIS +1 SHARDURIS -> CHADORIS +1 SHALLUM -> SCHILUM +1 SHALL -> SHOULD +1 SHAG -> SHAD +1 SEYTON -> SETAN +1 SEVERSON -> CEVERSON +1 SEVEN -> THE +1 SENTENCED -> INTENSE +1 SELLER -> CELLAR +1 SEEM -> SEEMED +1 SEE -> SEEM +1 SCUSE -> EXCUSE +1 SCULPTORS -> SCULPTOR'S +1 SCULPTOR'S -> SCULPT'S +1 SCRUTINISED -> TOESTRAS +1 SCRAPPIN -> SCRAPPING +1 SCO'TCH -> SCORCHED +1 SCHULBERG'S -> SCHOLBURG'S +1 SCHOOLDAYS -> SCHOOL +1 SCHOOL -> SCORE +1 SCHOOL -> SCHOOLGIRLS +1 SCHOOL -> SCHOOLBOY +1 SCAPED -> ESCAPED +1 SCAPE -> ESCAPE +1 SAYIN -> SAYING +1 SATISFACTORILY -> SATISFACTIONILY +1 SANS -> SON +1 SANG -> SING +1 SANCTESS -> SANCTUS +1 SAMARIA -> SUMERIA +1 SALONE -> SALOON +1 SAIL -> SAILORS +1 SAID -> SONG +1 SAID -> SET +1 SAH -> A +1 SAGITTAIRE -> SAGOTARE +1 SAFE -> SAVED +1 RYO -> RIO +1 RUN -> RAN +1 RUMP -> RUM +1 RUM -> ROMAN +1 RULER -> ROAR +1 ROXBURY -> BRAXBURY +1 ROTHS -> WARS +1 ROSAMUN -> ROSAMOND +1 ROLL -> RAW +1 ROCK -> FROCK +1 RISDON -> RICHMOND +1 RISDON -> AND +1 RINGMASTER -> RING +1 RING -> BRING +1 RIGOROUS -> REGOROUS +1 RIDGE'S -> RIDGES +1 RIDER -> WRITER +1 RIDER -> RITER +1 RID -> IT +1 RHODIAN -> ROUDIAN +1 REWARD -> REVORED +1 REVOLUTIONISTS -> REVOLUTIONIST +1 REVOLTE -> REVOLT +1 REVEREND -> REVERED +1 REVELING -> REVELLING +1 RETZCH'S -> WRETCH'S +1 RESK -> REST +1 RESINOUS -> VESINOUS +1 RESCUED -> RESCUE +1 REPEATED -> REPLIED +1 REND -> RUN +1 REMISSION -> REMISSIONER'S +1 REIGNS -> REIGN +1 REGIN -> REGEN +1 REG'LER -> REG'LAR +1 REALIZE -> REALISE +1 REALISED -> REALIZED +1 READY -> RAY +1 RAYSTOKE -> REYSTROKE +1 RAYSTOKE -> GREYSTOKE +1 RATCHFORD -> RACHFORD +1 RANSOM -> RUNSEN +1 RANSOM -> RAMSON +1 RAMSES -> RAMESES +1 RAM'S -> RAHAM'S +1 RAM -> GRIM +1 RADPROP -> RHYDROPPIST +1 RACKED -> WRAPPED +1 RACK -> RAG +1 RACHEL -> LACH +1 RACES -> RACE +1 RABB'S -> RAB'S +1 QUOTED -> QUOTE +1 QUMMUKH -> CUMAC +1 QUITE -> A +1 QUEST -> PLACE +1 QUANTRELL -> QUANTRAILLE +1 QUANTITIES -> QUALITIES +1 PUTTEL -> PATTER +1 PULLEY -> PULLY +1 PUDDLES -> BOTTLES +1 PSALM -> OTHER +1 PRYTANEUM -> PRETINNIUM +1 PROVEN -> PROVING +1 PROTECTORATE -> PROTECTORY +1 PROPRE -> PROPERA +1 PROCLUS -> PROCLYS +1 PROCLUS -> PROCLIS +1 PROCLUS -> PROCLAS +1 PROAS -> PROETS +1 PROAS -> PEROCKS +1 PROAS -> P +1 PROA -> PROW +1 PROA -> EXPRARA +1 PRISONER -> PRISONERS +1 PRIMER -> PRIMARY +1 PRIEST -> PREACH +1 PRESTIGE -> PRESGE +1 PRENTICESHIP -> APPRENTICESHIP +1 PRECEPTORY -> PRECEPTARY +1 PRECEPTORS -> PERSEPTORS +1 PRECENTORS -> PRESENTERS +1 PRACTICED -> PRACTISED +1 POUND -> POUNDS +1 POUCHES -> PIROUCHES +1 POTION -> FOCCETION +1 POSTHASTE -> POST +1 POPULACE -> POPULOUS +1 POOR -> POORER +1 POMEROY -> POLMROY +1 POLL -> POLE +1 POLL -> ALL +1 PO -> POOR +1 PLUMB -> PLUM +1 PLEASANT -> PRESENT +1 PLEAS -> PLEDS +1 PLATTERBAFF -> PLATTER +1 PLATTERBAFF -> FLATTERBUFF +1 PLANNING -> PLANTING +1 PLACE -> PACE +1 PITTS -> PITT +1 PITTS -> FITZ +1 PITHUM -> PITTHAM +1 PILLOWED -> PILLOW +1 PILESER -> POLYSER +1 PILESER -> POLESU +1 PILESER -> BELEISURE +1 PILESER -> BELEASER +1 PIKES -> PIPES +1 PIGSKIN -> PIG +1 PIGEONCOTES -> PIGEON +1 PIGEONCOTE -> PIGEONBUL +1 PIGEONCOTE -> PIGEON +1 PIGEONCOTE -> PIECOTE +1 PIGEONCOTE -> PICHKOTE +1 PICKED -> PRICKED +1 PHUT -> FLUT +1 PHUT -> BUT +1 PHOSPHOR -> PHOSPHORIBULB +1 PHOSPHOR -> PHOSPHORE +1 PHILISTINES -> PHILISTINE +1 PHILIPPUS -> PHILIPPA +1 PHELPS -> PHELPS'S +1 PETREL -> PETROL +1 PESTE -> PESTES +1 PERNOUNCE -> PRONOUNCE +1 PERE -> PERELACHASE +1 PERE -> PARLAISE +1 PEONAGE -> OPINION +1 PENDING -> SPENDING +1 PENCE -> PANTS +1 PEER -> SPEAR +1 PEASANTS -> PEACE +1 PEAS -> PEASE +1 PEARL -> PURLE +1 PEACEFUL -> B +1 PASTES -> PASTE +1 PAST -> PASS +1 PASSED -> PASS +1 PARTS -> PART +1 PARTLY -> PARTIALLY +1 PARR -> PAR +1 PARKS -> BOGS +1 PANEL -> PENNEL +1 PALL -> PAW +1 PALAESTRA -> PILASTER +1 PADDLING -> PADDLIN +1 P -> PATUM +1 OWNERS -> LANDOWNERS +1 OW'M -> HOW +1 OW -> HOW +1 OVERRIPENESS -> OVER +1 OVERFULL -> OVER +1 OVERFLOWING -> OVERWHELMING +1 OVER -> OF +1 OUTER -> OUT +1 OUT -> OUTGAZE +1 OUT -> I'M +1 OUT -> HOUGHTON +1 OUT -> HIS +1 OUGHTN'T -> OUGHT +1 OUEN -> JUIN +1 ORIENTAL -> ORIENT +1 ORGANISER -> ORGANIZER +1 ORGANISER -> ORGANISR +1 OR -> ORAS +1 OR -> I +1 OR -> A +1 OPENED -> OPEN +1 OPE -> HOPE +1 ONE'S -> WAS +1 ONE -> WHEN +1 ONE -> A +1 ON'T -> HONOUR +1 ON -> UPON +1 ON -> UNCREAM +1 ON -> HOME +1 OME -> HOME +1 OLL -> ALL +1 OLIVE -> ALIVE +1 OLD -> OLE +1 OLD -> ALL +1 OL -> OLE +1 OKAY -> O +1 OFFICERS -> OFFICIALS +1 OFFICER -> OFFICERS +1 OFFENSE -> OFFENCE +1 OFFEN -> OFF +1 OFF -> OUT +1 OF -> TO +1 OF -> OFAU +1 OF -> INTO +1 OF -> HAVE +1 OF -> EPH +1 OBOCOCK -> OBACOCK +1 OBJECT -> SUBJECT +1 O'NIGHTS -> O +1 O'NEILL -> O'NEIL +1 O -> OH +1 NYTOUCH -> KNIGHTS +1 NUTS -> KNOTS +1 NUNS -> NUN'S +1 NU'UMAN -> NUMAN +1 NOWT -> IT +1 NOW -> ON +1 NOW -> KNOWN +1 NOUGHT -> NOT +1 NOT -> BUT +1 NORTH -> NORTHEAST +1 NONETHELESS -> NEVERTHELESS +1 NONE -> NO +1 NO -> NOW +1 NO -> NOCOMN +1 NO -> NOCCOUNT +1 NO -> KNOW +1 NIPPER -> NIBBER +1 NICO -> NICHO +1 NEXTER -> NEXT +1 NEW -> YOUR +1 NERVE -> NURSE +1 NEOSHO -> NEOTIO +1 NEIGHBOURHOOD -> NEIGHBORHOOD +1 NEIGHBOR -> NEIGHBOUR +1 NEEDLED -> NEEDLE +1 NEAREST -> NEAR +1 NEARER -> NEAR +1 NAUGHT -> NOUGHT +1 NATURAL -> NATURALLY +1 NANDY'S -> AND +1 MYSTERIOUS -> MYSTERY +1 MYRTILUS -> MYRTLES +1 MYRTILUS -> MERTALUS +1 MUSTACHES -> MOUSTACHES +1 MURDOCH'S -> MURDOCK'S +1 MUG -> MUCH +1 MOWER -> MOOR +1 MOVEMENT -> MOMENT +1 MOUTHWHAT -> MOUTH +1 MOUTHS -> MOUTH +1 MOUNTNORRIS -> MONTNORRIS +1 MOTIONLESS -> MOTION +1 MOST -> PROW +1 MOST -> MOSTLY +1 MOSES -> MOVES +1 MOSES -> MOSES'S +1 MORTIFICATIONTHAT -> MORTIFICATION +1 MORE'N -> MORE +1 MOPED -> MILKED +1 MOOR -> MORE +1 MONSEIGNEUR -> A +1 MONKERS -> MOCKERS +1 MOMMOL -> MAMAL +1 MO -> MORE +1 MISTER -> MITZTER +1 MISTER -> MISSUS +1 MISSISSIPPIAN -> MISSISSIPPIENT +1 MINNIE -> MINNY +1 MINIONETTE -> MANONET +1 MINE -> MIND +1 MINE -> I +1 MIND -> MINE +1 MIMICK -> MIMIC +1 MILLY -> MERELY +1 MILLSTON -> MILLSTONE +1 MILICENT'S -> MILICON'S +1 MILICENT -> MILLICENT +1 MILICENT -> MILICON +1 MILICENT -> MELLICENT +1 MILICENT -> ME +1 MIDRIFF -> MIDDRIFTS +1 MIDIAN -> MEDIAN +1 MIDDY -> MITTEE +1 MIDDLING -> MIDDLIN +1 METHINKETH -> METHINK +1 MET -> MAKE +1 MESTIENNE'S -> MAIAN'S +1 MESTIENNE -> MUSTHIENNE +1 MESTIENNE -> MISTIAN +1 MESTIENNE -> MASTIENNE +1 MERRY -> MERRYMAKING +1 MERNEPTAH -> PATH +1 MERLONUS -> MERELONA'S +1 MERLONUS -> MELONUS +1 MERIT -> MARRIAGE +1 MENAHEM -> MANAHIM +1 MENAHEM -> MANAHEM +1 MEN'S -> MAN'S +1 MEN -> MAN +1 MEN -> INTO +1 MEN -> CAMEN +1 MEDIAN -> MEDEAN +1 ME -> NEED +1 ME -> MAN +1 MC -> MAC +1 MAY -> MAKES +1 MATI -> MEANT +1 MATI -> MATIILLIU +1 MATEY -> MAITIE +1 MASTER -> MASSA +1 MASKED -> MASTED +1 MARVELLOUS -> MARVELOUS +1 MARVELED -> MARVELLED +1 MARSPEAKER -> MAR +1 MARSHAL -> MARTIAN +1 MARRIAGE -> MARYS +1 MARBLE -> MARRBLE +1 MALNUTRITION -> MAL +1 MAKES -> MATRON +1 MAJORITY -> MATURITY +1 MAINE -> MAIN +1 M -> EM +1 M -> ART +1 M -> AM +1 LYSIMACHUS -> LISUMACHUS +1 LUNA'S -> LUNER'S +1 LUNA -> LOINER +1 LUNA -> LENA +1 LUCAS -> LYCAS +1 LOWER -> BLOWER +1 LORDSHIPS -> LORDSHIP +1 LORD'S -> LARGE +1 LORD -> LOT +1 LOOKOUT -> LOOK +1 LOOK -> LOOKOUT +1 LOOK -> LOOKED +1 LONE -> LONG +1 LOCK -> LOOK +1 LL -> HAD +1 LIZABETH -> LIZ'BETH +1 LIVE -> LOVE +1 LINCOLN -> LINTON +1 LIME -> LINERY +1 LILBURN -> LOWBURN +1 LILBURN -> LOWBORN +1 LIKELY -> LIKE +1 LIKE -> ALIKE +1 LIFE -> LIFE'S +1 LIE -> LIKE +1 LIAISON -> LIYER +1 LEXINGTON -> LESSINGTON +1 LET -> THEM +1 LET -> LEFT +1 LESLIE -> LIZZIE +1 LESLIE -> LESLEY +1 LESLIE -> E +1 LENOIR -> LE +1 LENIN -> LENDING +1 LEMON -> LEMONSHIPS +1 LEGS -> LESS +1 LEFT -> LAST +1 LEECH -> LIEGE +1 LEE'S -> LEE +1 LEAPT -> LEAPED +1 LEADPENCIL -> LEAD +1 LEADERS -> LEADER'S +1 LAWS -> NOISE +1 LAURA -> LORA +1 LAUGHED -> LAUGH'D +1 LAUDERDALE -> LORDADALE +1 LAUDERDALE -> LARDADAE +1 LATH -> GLASS +1 LASH -> LAST +1 LARCH -> LARGE +1 LAND -> THE +1 LAKE -> TO +1 LAKE -> MEEK +1 LAIN -> LANE +1 LADS -> LAD +1 LACHRYMA -> LACHRYMAL +1 LABOUR -> LABOR +1 LA -> NEWBURG +1 L -> O +1 KNOBBLY -> KNOBBY +1 KNIGHT -> LAW +1 KLEPTOMANIA -> CLAPTOMANIA +1 KITE -> KAIGHT +1 KING -> GHEIMURNETH +1 KINDER -> KIND +1 KILLS -> KILLETH +1 KETTLE -> CATTLE +1 KERSTALL -> CRISTEL +1 KEDEM -> KEIDAM +1 KAMAR -> KAMA +1 JUSTIFIED -> IT +1 JUST -> JUTS +1 JUST -> JEST +1 JUST -> IS +1 JUST -> HAGITTS +1 JUDGMENT -> JURGMENT +1 JUDGES -> JUDGETH +1 JOUVIN'S -> ROUVNENES +1 JOSHUA -> JONCEWA +1 JOKINGLY -> CHOKINGLY +1 JOHN -> JON +1 JOHN -> GIAN +1 JOCELYN'S -> JOSTLINGS +1 JIS -> JUST +1 JILT -> TOLD +1 JEWELER -> JAWER +1 JEDGE -> JUDGE +1 JANEERO -> JANEIRO +1 JAKEY'S -> JAKIE +1 JAKEY -> JIKI +1 IZZY'S -> ISEY'S +1 IZZY -> ISSY +1 IZZY -> ISSEY +1 IZZY -> ISEY +1 IZZY -> IS +1 IZZY -> IASY +1 IZZY -> AS +1 ITS -> HIS +1 IT'S -> ITS +1 IT -> US +1 IT -> THIS +1 IT -> IT'LL +1 IT -> I +1 IT -> HE +1 IT -> HAD +1 IT -> AND +1 IS -> TURBRIMENT +1 IS -> ISN'T +1 IRONICAL -> IRONIC +1 INTO -> TO +1 INTO -> IN +1 INTENTIONALLY -> INTENTIALLY +1 INTELLECTUALLY -> INTELLECTUAL +1 INSTANT -> INCIDENT +1 INSIST -> INSISTS +1 INSCRIPTION -> INSCRIPTS +1 INNES -> EANES +1 INJURE -> ENDURE +1 INGENIOUSLY -> INGENUOUSLY +1 INCLINATION -> INCLINATIONS +1 IN -> TO +1 IN -> ON +1 IN -> JUSTFIED +1 IMPROVISATION -> IMPROCISATION +1 IM -> CCHOSTAWTE +1 ILLS -> EILS +1 IF -> DID +1 IDEA -> RE +1 ICES -> ISIS +1 I'VE -> HAVE +1 I'M -> I +1 I'LL -> I +1 I'FAITH -> I +1 I'D -> I'LL +1 I -> THY +1 I -> OUGHED +1 I -> OH +1 I -> AYE +1 HURT -> HEART +1 HUNTINGDON -> HINTING +1 HUNTERS -> HANDERS +1 HUMOR -> HUMOUR +1 HUH -> HA +1 HOZE -> HOSE +1 HOZE -> HOES +1 HOWL -> HIRE +1 HOW -> HOW'S +1 HOUSE -> HOUSEMOTHER +1 HOUNDED -> HOUNDY +1 HORN -> WELCOMED +1 HORDE -> HOARD +1 HOO'LL -> HE'LL +1 HOO -> HE +1 HOMEWARD -> HOMEWARDS +1 HOMEPUSH -> HOME +1 HOLY -> WHOLLY +1 HOLLER -> HOLLERED +1 HOLD -> PATESENCES +1 HOLD -> O +1 HOLD -> HOFAX +1 HOF -> WHOLE +1 HIT -> HATE +1 HISSELF -> HIMSELF +1 HIS -> THIS +1 HIS -> THE +1 HIS -> IS +1 HIS -> HE +1 HIS -> DISCURSE +1 HIS -> DEMEANOUR +1 HINT -> HAND +1 HINDFELL -> HENFELD +1 HILL -> HI +1 HIGH -> TIME +1 HEYDAY -> HEY +1 HERMON'S -> HERMONT'S +1 HERMON'S -> HERMANN'S +1 HERMON'S -> HARMONT'S +1 HERMON -> HERMONN +1 HERMON -> HERMOD +1 HERMON -> HARMONT +1 HERMON -> HARMON +1 HERMON -> HAREMAN +1 HERMON -> HARE +1 HERIOT'S -> HERRIOT'S +1 HERE'S -> HERE +1 HER -> IT +1 HEPTARCHIES -> HEPTARKEYS +1 HEN -> HINGOOPS +1 HELVIN -> HELVAN +1 HELM -> HAM +1 HELLO -> HALLO +1 HELEN -> ELLEN +1 HEARTY -> EARTHY +1 HEARSE -> HOUSE +1 HEAR -> HERE +1 HE -> IT +1 HE -> HE'D +1 HAW -> JAWHAWED +1 HAVE -> IF +1 HAVE -> AM +1 HATTERSLEY -> HAUTTERSLEY +1 HASAN -> HASSAN +1 HARVEY'SWHICH -> HARVEY'S +1 HARRIS -> HARRIS'S +1 HARE -> HAIR +1 HAPLY -> HAPPILY +1 HAND -> HANDS +1 HALL -> WHOLE +1 HALL -> HALLAND +1 HADDA -> HAD +1 HAD -> I +1 HAD -> GOT +1 HAD -> AND +1 GURR -> GURRFATHER +1 GURR -> GOURR +1 GURR -> GORE +1 GURR -> GIRL +1 GURR -> GIRK +1 GUIRUN'S -> GUNDERN'S +1 GUILD -> GIRL +1 GUEST -> GUESTS +1 GUDRUN -> GUNDRAN +1 GRIBIER -> CRIBIER +1 GREY -> GRAY +1 GREENWOOD -> GREENOOD +1 GREENBACKS -> GREEN +1 GREEN -> GRAY +1 GREAVES -> GRIEBS +1 GRAY -> GREY +1 GRASPS -> GRASPED +1 GRANT -> GRAT +1 GRANDPAP -> GRANPAP +1 GRANDAME -> GRAND +1 GRAMMATEUS -> GRAMMATIUS +1 GRAM -> GRAHAM +1 GOSLER -> GOSTLER +1 GORDON -> GORDON'S +1 GOODS -> GOOD +1 GOING -> GOIN +1 GOIN -> GOING +1 GODEBILLIOS -> GOTA +1 GOD'S -> GODS +1 GOD -> GUNS +1 GOD -> GON +1 GLISPIN'S -> LISPIN'S +1 GLISPIN -> LISPON +1 GLAD -> GREAT +1 GIVING -> GIING +1 GIVEN -> KEEPN +1 GIVE -> GIVIN +1 GIT -> GET +1 GIRTHING -> GIRDING +1 GIRTHED -> GIRDED +1 GIRDS -> GURGE +1 GIORGIO -> GEORGE +1 GET -> GIT +1 GERMS -> TERMS +1 GEORGE'SWHICH -> GEORGE'S +1 GENTLEMEN'S -> GENTLEMAN'S +1 GENTLEMAN -> GENTLEMEN +1 GAUTHIER -> GATHIER +1 GAMMON -> GAMIN +1 GALLATIN -> GLLOTON +1 GABLE -> CABLE +1 G'YIRLS -> GO +1 FURZE -> FIRS +1 FULL -> SHORT +1 FULL -> FOR +1 FROZE -> ROSE +1 FROM -> FOR +1 FROG'S -> FROGS +1 FRO -> FROM +1 FRISTOE'S -> FORSTOW'S +1 FRISTOE -> STOW +1 FRISTOE -> FRISTOW +1 FRIDOLIN -> FRIEDOLIN +1 FREES -> FREESWOMEN +1 FRANC -> FRANCS +1 FOURTEENTHAT'S -> FOURTEEN +1 FORTS -> FAULTS +1 FORMED -> REFORMED +1 FOR -> FULL +1 FOR -> FROM +1 FOR -> FAR +1 FOOLS -> FOOL'S +1 FOOL -> FULL +1 FONTEVRAULT -> FONTREVALLE +1 FOLLOWS -> FOLLOWED +1 FOE -> FOLK +1 FOALS -> HOLES +1 FOAL -> POLE +1 FLY -> FLIES +1 FLOWERBEDS -> FLOWER +1 FLOSSY -> FLOSSIE +1 FLOORBOARDS -> FLOOR +1 FLEROV'S -> FLIROV'S +1 FLEROV'S -> FLAEROFF'S +1 FLEROV -> FLEROFF +1 FLEROV -> FLAIROFF +1 FLAVOR -> FLAVOUR +1 FISHED -> HAVE +1 FINICAL -> FINNICAL +1 FILTRATES -> FULL +1 FILTRATE -> FILDRATE +1 FIGGER -> FIG +1 FIFTEEN -> FIFTEENTH +1 FIELD -> FIELDS +1 FIACRE -> THEACCUS +1 FESTAL -> FESTALE +1 FELT -> FELL +1 FEELS -> FILLS +1 FAVOUR -> FAVOR +1 FAVORITE -> FAVOURITE +1 FAVORABLE -> FAVOURABLE +1 FAUVENT -> THE +1 FAUVENT -> PROUVENT +1 FAUVENT -> PREVENT +1 FAUVENT -> FOUVET +1 FAUVENT -> FOR +1 FAUVENT -> FERVEN +1 FAUVENT -> FAVANT +1 FAUVENT -> FAUVAIN +1 FAUCES -> PHOCES +1 FATTY -> FATIMATAS +1 FATS -> THATS +1 FATS -> FATT'S +1 FATHER'S -> FATHERS +1 FATHER -> FOR +1 FAST -> HODOBT +1 FAST -> FAT +1 FARRINDER -> VERNDER +1 FARRINDER -> FARRINGERS +1 FARRINDER -> FARLANDER +1 FARRINDER -> FARINNDER +1 FARM -> FARMHOUSE +1 FAN -> PEN +1 FAN -> FANN +1 FAM'LY -> FAMILY +1 FAIR -> FARE +1 FAILED -> SA +1 FAGOTS -> FAGGOTS +1 FAFNIR'S -> FAFFNER'S +1 EYES -> EYE +1 EXTRACT -> EXTRACTED +1 EXTEMPORIZED -> EXTEMPORISED +1 EXPERIENCE -> SPIRITENCE +1 EXCLAIMED -> EXPLAINED +1 EXCITING -> SOUNDTING +1 EXCITE -> OUT +1 EXAMINING -> EXAMINED +1 EXACKLY -> EXACTLY +1 EVIL -> AVIOUS +1 EVERY -> EVERYONE +1 EV'YBODY'S -> EVERYBODY'S +1 EUSEBIUS -> JOSEPIUS +1 EUSEBIUS -> CUPIUS +1 EUPHRANOR -> EUPHRANER +1 ETHELRIED -> ETHEL +1 ETERNAL -> TURNEDETH +1 ETERNAL -> E +1 ET -> AT +1 ESPECIALLY -> SPECTREE +1 ESPECIALLY -> HAD +1 ERE'S -> YES +1 ERE -> AT +1 EPIGASTER -> EBERGASTER +1 EPHRAIM -> FROM +1 ENTRUSTED -> INTRUSTED +1 ENTR'ACTE -> ENTRANCE +1 ENSNARES -> ENSNAS +1 ENSLAVED -> ENSLAVE +1 ENDURETH -> ENDURED +1 ENDEAVOURED -> ENDEAVORED +1 EMETIC -> AMATIC +1 EMBRUN -> EMBRO +1 ELISIONS -> ALLEGIANCE +1 ELDER -> OTHER +1 EIGHT -> EIGHTH +1 EGYPTIAN -> EGIPSIAN +1 EGGS -> AXE +1 EELS -> FIELDS +1 EDGING -> EDGED +1 EAU -> THE +1 EARTH -> ART +1 EAR -> IRASCELLING +1 EAD -> ATT +1 E'LL -> HE'LL +1 E'ER -> EVER +1 E -> DOG +1 DURING -> DUN +1 DUMAS -> TUMICE +1 DUM -> DUME +1 DU -> DES +1 DRUGSTORE -> DRUG +1 DROUTH -> DROUGHTH +1 DRAWERS -> DRAWER +1 DRAUGHT -> DROP +1 DOWNSTAIRS -> DOWN +1 DOWNING -> DAWNING +1 DOWER -> TO +1 DOTH -> DO +1 DONOVAN -> DONOMAN +1 DONE -> TURNED +1 DONE -> DOESN'T +1 DON'T -> DO +1 DOM -> DONEBYON +1 DOG -> DARK +1 DOCK -> DOCKYARD +1 DOAN -> DON'T +1 DO -> TWO +1 DO -> TO +1 DO -> DID +1 DO -> DEW +1 DISTRUSTED -> DESTRUCTED +1 DISTRESS -> DISTRESSED +1 DISTICHS -> DISTICHES +1 DISNEY -> DNEY +1 DINERS -> DINARS +1 DIDN'T -> DON'T +1 DID -> DIDN'T +1 DICKIE -> DICKY +1 DICKIE -> DICKI +1 DETECTIVES -> DETECTIVE +1 DESTINIES -> DEST +1 DESSERTS -> DESERTS +1 DEODORIZING -> DEORTERIZING +1 DEMETER -> DEMEANOR +1 DELMONICO -> DOMONICO +1 DEEPENED -> DEEPENS +1 DEBTOR -> ADEPTOR +1 DEATHLIKE -> DEATH +1 DEANS -> DENES +1 DEAF -> DEATH +1 DEAD -> DEDROOM +1 DEAD -> BED +1 DEACH -> DID +1 DAYS -> STAYS +1 DATED -> THEY +1 DAT -> THAT +1 DARKAND -> DARK +1 DAPHNE -> DAPHANE +1 DALYS -> DAILIES +1 DALY -> DALEY +1 DAGOS -> DAGOES +1 DA -> DOMIHIPPOTEM +1 D -> THEY +1 D -> DAMN +1 CURRENTS -> CURRANTS +1 CRUX -> CREW +1 CRUCIFIXION -> CRUCIFICTION +1 CRUCIFIXION -> CROSS +1 CROST -> SEEM +1 CRIPPLED -> A +1 CRIES -> CHRIST +1 CRAB -> CRABS +1 COYNESS -> KINDNESS +1 COXCOMB -> PROCOMB +1 COWLEY'S -> COLLEIES +1 COURT -> COURTYARD +1 COUNT -> CON +1 COUNSELS -> COUNSEL +1 COUNSEL -> CONSUL +1 COUNCILLOR -> COUNSELLOR +1 COULD -> HAD +1 COTTON -> COTTONFIELD +1 CORYDON -> CROYDON +1 CORNER -> CORNERSTONES +1 CORNER -> CORN +1 CORKLE -> CORAL +1 CORAL -> COAL +1 COQUETTE -> COCKET +1 COPS -> COUPS +1 COPS -> COPSE +1 CONTINUAL -> CONTINUOUS +1 CONSONANTS -> CONSONANCE +1 CONSOMME -> CONSUMM +1 CONINGSBURGH -> KENNINGSBURG +1 CONFIRMATION -> CONFIRM +1 CONFIDENTIALLY -> CONFIDENTLY +1 CONFIDE -> CONFINE +1 CONFICERE -> CONFIRCET +1 CONCOCTED -> CONCLUDED +1 COMORIN -> CORMERAN +1 COMMUNITY -> KUNITY +1 COMING -> CAMEN +1 COMEST -> COMES +1 COMES -> COME +1 COLOUR -> COLOR +1 COLOSSEUM -> COLISEUM +1 COLLECTED -> CONNECTED +1 COD -> CARD +1 COCKRELL -> CONCRELL +1 COBBER -> COWBER +1 COALESCED -> TO +1 CLUMB -> CLIMBED +1 CLOMB -> CLIMBED +1 CLEVERLY -> LEVERLY +1 CLASSES -> CLASS +1 CLARET -> CLARY +1 CLAIRVAUX -> CLERVAL +1 CISEAUX -> IS +1 CINDERLAD -> SINDA +1 CINDERLAD -> SIN +1 CINDERLAD -> SAID +1 CHRIS -> IT +1 CHOUETTE -> SHUETTE +1 CHONODEMAIRE -> SHONAU +1 CHLORATE -> CHLORIDE +1 CHIRP -> CHIRRUP +1 CHILLS -> CHILL +1 CHIDE -> CHIT +1 CHEEKE -> CHEEK +1 CHEEKBONES -> CHEEK +1 CHEEK -> CHEEKS +1 CHARLIE -> CHARLEY +1 CHARLEY'S -> CHARLIE'S +1 CHARACTERISTIC -> CHAAVERALISTIC +1 CHALONS -> CHELAN +1 CENTRAL -> CENTAL +1 CAVALRYMEN -> CAVERNMEN +1 CATCHED -> CAST +1 CARROLL -> CARL +1 CAR -> CARHORN +1 CAPRIVI'S -> CAPRIVI +1 CAPITULANTES -> CAPITULANT +1 CAPITALISTS -> CAPITALIST +1 CAP'S -> CAPS +1 CANVAS -> CANVATES +1 CANNOT -> CAN +1 CAMPAIGN -> CAPTAIN +1 CAIN -> CANE +1 CAGE -> CASE +1 CACKED -> KACKLED +1 CA'M -> CALM +1 BYE -> BY +1 BY -> BUY +1 BY -> BIE +1 BUZZARD -> BUZZER +1 BUTTON -> BOTTOM +1 BUTTERFLY -> BUT +1 BUSINESSWHICH -> BUSINESS +1 BURSHEBA -> FOR +1 BURN -> BURNED +1 BURDENS -> A +1 BULBS -> BOBS +1 BRYNHILD'S -> BRUNHILD'S +1 BRYNHILD -> BURNHILD +1 BRYNHILD -> BRUNHILD +1 BRYNHILD -> BEURNHILD +1 BROTHERS -> BROTHER'S +1 BRILLIANT -> BUOYANT +1 BREAST -> CHEST +1 BRAU -> BROW +1 BRASS -> BREAST +1 BRAHMAN -> RAMAN +1 BRAHMAN -> BROWMAN +1 BRAHMAN -> BRAHMIN +1 BRACY -> BRAVELEY +1 BRACY -> BRACELEY +1 BOY -> BY +1 BOTTOMED -> BOTTOM +1 BOONE -> BOOMEUER +1 BOEOTIAN -> BEE +1 BLOOD -> BLOODSTAINED +1 BLOKES -> LOLKS +1 BLODGETT -> BLODGET +1 BLODGETT -> BLDGET +1 BLODGETT -> BLAGET +1 BLESSED -> BLEST +1 BLANKETED -> BLANDED +1 BLACKLEG -> BLACK +1 BISQUE -> FISK +1 BIRDSEYE -> BIRD'S +1 BIN -> BEEN +1 BILL -> BUILD +1 BIG -> BOOK +1 BHANG -> BANG +1 BEULAH -> BOOLA +1 BETHUNE -> BESSOON +1 BETCHA -> BITCHER +1 BESIDE -> BESIDES +1 BENSON -> BASSOM +1 BENOIT -> BENOIS +1 BENNETT -> BENARD +1 BELLOWED -> BELOWED +1 BELLE -> BELL +1 BELL -> BELT +1 BEING -> BEEN +1 BEGUN -> BEGIN +1 BEFORE -> FORE +1 BEFAL -> BEFALL +1 BECOMES -> BECAME +1 BEASTLY -> PEASLEY +1 BEAR -> BARE +1 BEALE -> BEAL +1 BASSORAH -> PASORAH +1 BASIL -> BEILS +1 BASIL -> BAISIL +1 BARRACK -> BARRA +1 BARKLEY -> BARKELEY +1 BARGELLO -> BARGELO +1 BARELY -> VERY +1 BANYAN -> BANDON +1 BANYAN -> BANDED +1 BANDINELLO -> BEND +1 BALLROOM -> BALL +1 BALLOT -> BANNET +1 BALLOCK -> BALLOT +1 BALAMMED -> BELAMMED +1 BAG -> PACK +1 BAD -> THAN +1 BAD -> BAN +1 AZARIAH -> AZARAIAH +1 AWK'ARD -> AWKWARD +1 AW -> OH +1 AVIDITY -> AID +1 AVE -> HAVE +1 ATUM -> ATOM +1 ATTENTION -> INTENTION +1 ATTENTION -> ATTENTIONS +1 ATHELSTANE -> ADELSTEIN +1 AT -> OUT +1 AT -> IN +1 AT -> BUT +1 AT -> AND +1 ASKS -> ASK +1 ASKED -> ASKS +1 ASKED -> AS +1 ASHUR -> AESRE +1 ASCENSION -> ASCENSON +1 AS -> IS +1 ART -> ARE +1 ARSTS -> ASKS +1 ARSINOE'S -> ARSENAL'S +1 ARSINOE -> ARSENO +1 ARPAD -> ARPAT +1 AROUND -> A +1 ARE -> HAS +1 ARE -> AND +1 APPROVE -> PROVE +1 ANYONE'S -> ANY +1 ANY -> ANYTHING +1 ANTOLIAN -> ANTONIAN +1 ANNOYANCES -> ANNOYANCE +1 ANNIE -> ENNY +1 ANNIE -> ENNIE +1 ANGESTON -> ANGISTON +1 ANDS -> ENDS +1 ANDBUT -> AN +1 AND -> WHEN +1 AND -> WHAT +1 AND -> TO +1 AND -> THEN +1 AND -> ON +1 AND -> MISS +1 AND -> HE +1 AND -> END +1 AND -> CONFINED +1 AN -> THE +1 AN -> ANPERAL +1 AMYNTAS -> AMUNTUS +1 AMYNTAS -> AMENTUS +1 ALTHEA -> ALPHIE +1 ALONGER -> ALONG +1 ALLS -> ALL +1 ALLOW -> ALOAD +1 ALL -> WHILE +1 ALL -> WELL +1 ALL -> ON +1 ALKALOIDS -> AKALOIDS +1 ALKALOIDS -> AKALITES +1 ALIVE -> ARE +1 ALIMENTARY -> ELEMENTARY +1 ALF -> A +1 ALESSANDRO -> ALISANDRO +1 ALCOHOL -> ALKALINE +1 AL -> ALHIJAZ +1 AIN'T -> END +1 AIN'T -> AM +1 AIN'T -> AIRN'T +1 AID -> APE +1 AGRARIAN -> AGRIEAN +1 AFTERWARDS -> AFTERWARD +1 AFTERWARD -> AFTERWARDS +1 ADVENTURES -> VENTURES +1 ADN'T -> HADN'T +1 ADHERENTS -> ADHERENCE +1 ADAIR -> EIGHT +1 AD -> ED +1 ACTORS -> FACTORS +1 ACKNOWLEDGE -> PRONOUNCE +1 ACHESON -> ARCHISON +1 ACCEPT -> EXCEPT +1 ABSTAIN -> ABSTAINED +1 ABOARD -> OF +1 ABOARD -> ABROAD +1 A -> YOUR +1 A -> SO +1 A -> OF +1 A -> KIT +1 A -> IT +1 A -> IN +1 A -> HAVE +1 A -> AWHILE +1 A -> AT +1 A -> AEUBERG +1 A -> ADRY +1 A -> ACLEPTOMANIA + +DELETIONS: count ref +11 THE +10 AND +7 IS +5 I +5 A +4 TO +4 OF +4 AM +3 WILL +3 STAIRS +3 ONE +3 AL +2 YARD +2 WARD +2 VE +2 THAT +2 ROOM +2 OUT +2 NEWBERG +2 LACHAISE +2 IN +2 HIM +2 HAVE +2 FISH +2 DIGGER +2 DE +2 COUNT +2 BOY +2 AS +2 ARE +1 WOMAN +1 WOKE +1 WITH +1 WHO +1 WHILE +1 VERY +1 UN +1 TURBULENT +1 TREE +1 THING +1 THERE +1 THEIR +1 STONES +1 STAINED +1 SOUL +1 SHORE +1 SEE +1 S +1 ROAD +1 RHODIAN +1 REW +1 PROA +1 POTUM +1 PEALS +1 PATIENCE +1 PASSED +1 OUGHT +1 OR +1 ODD +1 NOT +1 N +1 MUIR +1 MOTHER +1 MONSEIGNEUR +1 MISSUS +1 MIHI +1 MATTERS +1 MAKING +1 MAKE +1 MABILLON +1 M +1 LOCKS +1 LL +1 LAND +1 LA +1 KNOW +1 KLEPTOMANIAC +1 KID +1 IT +1 ILU +1 HOUSE +1 HIS +1 HIJAZ +1 HER +1 HAWED +1 HAD +1 GIRLS +1 GAZE +1 FOR +1 FIND +1 FIELD +1 FATHER +1 EXTRA +1 EAST +1 DRY +1 DEMETER +1 DAY +1 CURSE +1 CREAM +1 COOPS +1 CHIPS +1 BUT +1 BULB +1 ATUM +1 ASSAILING +1 ARCHIAS +1 AQUA +1 AN +1 ALL +1 ALI + +INSERTIONS: count hyp +15 THE +10 A +9 AND +8 IT +7 TO +7 OF +7 IS +6 ONE +6 ARE +5 THAT +4 HAVE +3 WHICH +3 OUT +3 NOT +3 NIGHT +3 LAD +3 HAD +3 AN +3 ALL +2 WILL +2 WHILE +2 STAIRS +2 O +2 MORROW +2 MASTER +2 LIKE +2 IN +2 HE +2 DAYS +2 CATO +2 BUT +2 AM +1 YOUTHFUL +1 WITH +1 WHAT +1 WAS +1 TWO +1 TURNED +1 TRADES +1 THIS +1 THEY +1 THAT'S +1 TALE +1 T +1 STRIKE +1 STOVES +1 STORE +1 SPECIALLY +1 SPEAKER +1 SONG +1 SKIN +1 SIGNOR +1 SEVENTH +1 SCRUTINIZED +1 ROUND +1 ROOM +1 RIPENESS +1 REED +1 PUSH +1 PROUVENT +1 PROCKS +1 PENCIL +1 OUTDO +1 OUR +1 OTIAN +1 OTHER +1 ONE'S +1 ON +1 NUTRITION +1 NOIR +1 NO +1 NIGHTS +1 NEULES +1 NELLO +1 NEEDS +1 NAB +1 MER +1 MEN +1 MASON +1 MAKE +1 LONE +1 LIVE +1 LISTEN +1 LEG +1 LEEK +1 KNOW +1 JEALOUS +1 ILL +1 I +1 HOISS +1 HIS +1 HASTE +1 HALF +1 GIRLS +1 GINK'S +1 FULL +1 FLY +1 FINISHED +1 FICTION +1 FAITH +1 EYE +1 EXTRA +1 DUTY +1 DID +1 DE +1 DAME +1 CRIPPLE +1 CREAKERY +1 COTE +1 COROTS +1 COATS +1 BURDEN +1 BONES +1 BOARDS +1 BOARD +1 BILLIOS +1 BEDS +1 BAFF +1 BACKS +1 AWAKE +1 AS +1 ANDY'S +1 AIR + +PER-WORD STATS: word corr tot_errs count_in_ref count_in_hyp +THE 3096 101 3134 3159 +A 1111 70 1145 1147 +AND 1756 68 1788 1792 +TO 1434 35 1444 1459 +I 832 34 853 845 +IN 797 33 808 819 +OF 1377 28 1386 1396 +THAT 668 27 682 681 +IS 404 27 415 420 +IT 650 26 660 666 +YOU 504 24 513 519 +AN 113 20 125 121 +HIS 482 19 493 490 +THIS 235 17 246 241 +HAD 371 17 375 384 +ON 272 16 281 279 +O 34 16 41 43 +HE 689 16 693 701 +WILL 156 15 166 161 +OUT 160 13 166 167 +HAVE 229 13 233 238 +THEIR 104 12 112 108 +ONE 211 12 216 218 +ARE 155 12 159 163 +NOT 399 11 401 408 +ALL 232 11 236 239 +HERMON 1 10 11 1 +HER 282 10 289 285 +FOR 427 10 431 433 +AT 273 10 279 277 +WOULD 124 9 129 128 +THEY 237 9 239 244 +THERE 170 9 174 175 +MURDOCK 0 9 0 9 +MURDOCH 0 9 9 0 +MISTER 72 9 74 79 +MISSUS 26 9 31 30 +DO 147 9 153 150 +BUT 369 9 370 377 +AM 58 9 62 63 +WHAT 189 8 192 194 +WAS 649 8 653 653 +OR 105 8 109 109 +OLD 55 8 57 61 +OH 32 8 35 37 +MEN 63 8 66 68 +MAN 110 8 114 114 +JUST 53 8 57 57 +I'VE 6 8 10 10 +HIM 303 8 305 309 +FAUVENT 2 8 10 2 +ANY 73 8 76 78 +WITH 382 7 385 386 +ROUND 16 7 18 21 +OL 0 7 7 0 +I'M 33 7 34 39 +AROUND 15 7 20 17 +WHILE 20 6 22 24 +UP 142 6 145 145 +NO 186 6 190 188 +MISS 12 6 15 15 +KIND 19 6 19 25 +IZZY 1 6 7 1 +IM 0 6 6 0 +FULL 16 6 18 20 +DID 94 6 95 99 +DE 13 6 18 14 +AS 336 6 339 339 +ANYONE 2 6 6 4 +YOUR 103 5 104 107 +YOU'RE 7 5 9 10 +YOU'LL 3 5 4 7 +THESE 47 5 51 48 +THEM 142 5 144 145 +STAIRS 0 5 3 2 +RAYSTOKE 0 5 5 0 +N'T 0 5 5 0 +LIKE 88 5 89 92 +KINE 1 5 6 1 +INTO 115 5 117 118 +HE'S 7 5 9 10 +HAS 98 5 102 99 +GURR 9 5 14 9 +GRAHAM 0 5 0 5 +CINDERLAD 2 5 7 2 +BEALE 5 5 10 5 +AIN'T 6 5 11 6 +WHICH 191 4 191 195 +WHEN 160 4 162 162 +WELL 84 4 85 87 +UPSTAIRS 2 4 3 5 +THERE'S 9 4 11 11 +THEN 150 4 153 151 +SHARRKAN 1 4 5 1 +SHARKAN 0 4 0 4 +SEE 75 4 77 77 +SCHOOL 5 4 8 6 +REGIN 0 4 4 0 +PILESER 0 4 4 0 +PIGEONCOTE 3 4 7 3 +OTHER 69 4 69 73 +MYRTILUS 0 4 4 0 +MONSEIGNEUR 2 4 6 2 +MILICENT 2 4 6 2 +ME 258 4 260 260 +M 3 4 7 3 +LOOK 31 4 33 33 +LITTLE 91 4 91 95 +LIL 0 4 4 0 +LAD 7 4 7 11 +ITS 56 4 57 59 +HARE 0 4 1 3 +HAND 39 4 40 42 +FROM 178 4 179 181 +FARRINDER 0 4 4 0 +CONFECTIONERY 0 4 0 4 +CONFECTIONARY 0 4 4 0 +CHEEK 0 4 1 3 +BY 197 4 199 199 +BELL 2 4 3 5 +BANYAN 0 4 4 0 +ARSINOE 0 4 4 0 +AL 12 4 16 12 +ZARATHUSTRA 3 3 6 3 +YOU'VE 4 3 7 4 +YO 0 3 3 0 +YER 0 3 3 0 +WILFRID 6 3 9 6 +WILFRED 0 3 0 3 +WHO 149 3 151 150 +WHERE 55 3 57 56 +WERE 165 3 166 167 +WE'RE 5 3 7 6 +WE 150 3 151 152 +WAYNE 0 3 3 0 +WARD 3 3 6 3 +WAIN 0 3 0 3 +UPON 69 3 71 70 +ULRICA 0 3 3 0 +TWO 61 3 62 63 +TOWARDS 13 3 13 16 +TOWARD 6 3 9 6 +TOO 41 3 41 44 +TIME 83 3 83 86 +THY 28 3 28 31 +SKIN 3 3 5 4 +SHARDURIS 0 3 3 0 +SHAG 0 3 3 0 +SEEM 7 3 8 9 +SANCT 0 3 3 0 +SAINT 19 3 19 22 +SAID 250 3 252 251 +ROOM 35 3 37 36 +REGAN 0 3 0 3 +RAYSTROKE 0 3 0 3 +RAM 1 3 4 1 +PROCLUS 0 3 3 0 +PROAS 0 3 3 0 +PROA 0 3 3 0 +PRIEST 3 3 6 3 +OVER 48 3 49 50 +OUGHT 15 3 16 17 +NOW 114 3 116 115 +NIGHT 49 3 49 52 +MORE 99 3 99 102 +MONSEIGNOR 0 3 0 3 +MISTAH 0 3 3 0 +MINE 13 3 15 14 +MESTIENNE 2 3 5 2 +MASTER 22 3 23 24 +MAKE 65 3 66 67 +LUNA'S 0 3 3 0 +LOVER 3 3 3 6 +LEVER 0 3 3 0 +LESLIE 20 3 23 20 +KNOW 94 3 95 96 +JES 0 3 3 0 +JAKIE 0 3 0 3 +JAKEY 0 3 3 0 +IT'S 21 3 24 21 +INTERESTS 2 3 5 2 +INTEREST 7 3 7 10 +IN'T 0 3 3 0 +I'D 10 3 13 10 +HOW 80 3 81 82 +HOUSE 35 3 37 36 +HONOUR 1 3 3 2 +HOME 34 3 34 37 +HOLD 7 3 10 7 +HERMON'S 0 3 3 0 +HERE 69 3 69 72 +HEAR 18 3 21 18 +HANDS 13 3 15 14 +GRAY 2 3 3 4 +GOBIES 0 3 0 3 +GOBEY'S 0 3 3 0 +FAVOUR 3 3 4 5 +FAVOR 0 3 2 1 +FAFNIR 0 3 3 0 +FAFNER 0 3 0 3 +EUREKA 0 3 0 3 +E'S 0 3 3 0 +E 1 3 2 3 +DON'T 72 3 73 74 +DAYS 12 3 13 14 +COUNT 15 3 18 15 +COLOUR 1 3 2 3 +COLOR 0 3 2 1 +BRYNHILD 0 3 3 0 +BRAHMAN 16 3 19 16 +BOY 24 3 27 24 +BLODGETT 0 3 3 0 +BEFELL 0 3 0 3 +BEFEL 0 3 3 0 +AWKWARD 3 3 3 6 +AWHILE 1 3 3 2 +ASKS 0 3 1 2 +ART 12 3 13 14 +ARSENAL 0 3 0 3 +ZAU 6 2 8 6 +YO'LL 0 2 2 0 +YES 42 2 42 44 +YARD 0 2 2 0 +WROTE 3 2 4 4 +WOT 1 2 2 2 +WORKING 8 2 8 10 +WONDERED 7 2 8 8 +WITHAL 0 2 2 0 +WISHED 5 2 5 7 +WI 0 2 1 1 +WHOLE 22 2 22 24 +WHO'S 0 2 0 2 +WHITE 17 2 18 18 +VERY 83 2 84 84 +VE 0 2 2 0 +URARTU 0 2 2 0 +UN 0 2 2 0 +TURNED 31 2 31 33 +TRIFLE 0 2 1 1 +TRIBE 2 2 4 2 +TRAVELLED 0 2 1 1 +TRAVELED 0 2 1 1 +TORQUILSTONE 0 2 2 0 +TORKELSTONE 0 2 0 2 +TONIGHT 0 2 2 0 +TIGLATH 2 2 4 2 +TIGLAS 0 2 0 2 +TIGER 11 2 13 11 +THEY'RE 9 2 11 9 +THAT'S 22 2 23 23 +THAN 73 2 74 74 +STOKER 0 2 2 0 +STATE 22 2 24 22 +STALKER 0 2 0 2 +SOUL 9 2 11 9 +SONG 2 2 2 4 +SO 210 2 211 211 +SING 3 2 4 4 +SINDERLAD 0 2 0 2 +SIGHING 1 2 2 2 +SHUT 9 2 10 10 +SHOULD 71 2 72 72 +SHORES 0 2 0 2 +SHORE 1 2 2 2 +SHIP 16 2 17 17 +SHELLFISH 0 2 0 2 +SHELL 1 2 3 1 +SHE'LL 1 2 2 2 +SHE 291 2 292 292 +SHAWS 0 2 2 0 +SHALL 61 2 62 62 +SHAGG 0 2 0 2 +SEWING 1 2 1 3 +SEEK 8 2 10 8 +SCULPTOR'S 0 2 1 1 +SAYING 17 2 17 19 +RUN 8 2 9 9 +RUM 0 2 1 1 +ROPES 1 2 1 3 +ROPE'S 0 2 2 0 +RISDON 5 2 7 5 +RING 5 2 6 6 +RIDER 0 2 2 0 +RHODIAN 0 2 2 0 +RANSOM 6 2 8 6 +PROW 1 2 1 3 +PROUVENT 0 2 0 2 +PRONOUNCE 1 2 1 3 +PRIESTS 1 2 1 3 +POOR 24 2 25 25 +POLL 1 2 3 1 +POLE 0 2 0 2 +PLATTERBAFF 1 2 3 1 +PLACE 38 2 39 39 +PITTS 0 2 2 0 +PIGEON 1 2 1 3 +PHUT 1 2 3 1 +PHOSPHOR 0 2 2 0 +PERE 0 2 2 0 +PASSED 8 2 10 8 +PASS 6 2 6 8 +P 1 2 2 2 +OUR 70 2 70 72 +ORGANISER 3 2 5 3 +ONTO 0 2 2 0 +ONE'S 4 2 5 5 +OLE 0 2 0 2 +OFFICERS 2 2 3 3 +OFF 52 2 53 53 +NOUGHT 1 2 2 2 +NEWBERG 0 2 2 0 +NEAR 16 2 16 18 +MOUTH 7 2 7 9 +MOTHER 51 2 52 52 +MOST 42 2 44 42 +MOSES 6 2 8 6 +MORROW 6 2 6 8 +MOOR 2 2 3 3 +MIND 23 2 24 24 +MESTER 0 2 2 0 +MERTILLUS 0 2 0 2 +MERLONUS 0 2 2 0 +MENAHEM 0 2 2 0 +MEDIAN 0 2 1 1 +MATI 0 2 2 0 +MARRIAGE 5 2 6 6 +MANKATO 0 2 2 0 +MAKES 10 2 11 11 +LUNNY'S 0 2 0 2 +LUNA 1 2 3 1 +LOOKOUT 0 2 1 1 +LONE 0 2 1 1 +LL 0 2 2 0 +LIVE 16 2 17 17 +LILBURN 1 2 3 1 +LET 62 2 64 62 +LEFT 37 2 38 38 +LAUDERDALE 0 2 2 0 +LAST 47 2 47 49 +LARGE 10 2 10 12 +LAND 18 2 20 18 +LAKE 4 2 6 4 +LACHAISE 0 2 2 0 +LABOURING 0 2 0 2 +LABORING 0 2 2 0 +LA 1 2 3 1 +KENITES 0 2 2 0 +JULIEN 0 2 2 0 +JULIAN 0 2 0 2 +JOHN 7 2 9 7 +IF 166 2 167 167 +I'LL 23 2 24 24 +HURT 5 2 6 6 +HOZE 0 2 2 0 +HORSTIUS 0 2 2 0 +HORSES 4 2 4 6 +HOO'S 0 2 2 0 +HONOR 3 2 3 5 +HO 1 2 3 1 +HERMANN 0 2 0 2 +HEAD 41 2 41 43 +HE'LL 2 2 2 4 +HAYS 0 2 2 0 +HAYES 1 2 1 3 +HAREMON 0 2 0 2 +HALL 12 2 14 12 +HAID 0 2 2 0 +GUV'NOR 0 2 0 2 +GUNNER 0 2 0 2 +GUNNAR 0 2 2 0 +GREY 0 2 1 1 +GREEN 1 2 2 2 +GRAVEDIGGER 0 2 0 2 +GRAVE 15 2 17 15 +GRAEME 0 2 2 0 +GOV'NOR 0 2 2 0 +GOLD 6 2 6 8 +GOING 38 2 39 39 +GOIN 0 2 1 1 +GOD 27 2 29 27 +GOAL 1 2 3 1 +GIT 0 2 1 1 +GIRLS 5 2 6 6 +GIRL 11 2 11 13 +GET 52 2 53 53 +FRISTOE 0 2 2 0 +FLY 4 2 5 5 +FLEROV'S 0 2 2 0 +FLEROV 0 2 2 0 +FISH 5 2 7 5 +FIELDS 2 2 2 4 +FIELD 2 2 4 2 +FATS 0 2 2 0 +FATHER 49 2 51 49 +FAST 10 2 12 10 +FAN 1 2 3 1 +EYE 11 2 11 13 +EXTRA 1 2 2 2 +EUSEBIUS 0 2 2 0 +ETERNAL 1 2 3 1 +ESPECIALLY 5 2 7 5 +END 15 2 15 17 +EM 2 2 2 4 +ELEXANDER 0 2 2 0 +EILEEN 1 2 3 1 +EIGHT 8 2 9 9 +DUPE 0 2 0 2 +DUKE 10 2 12 10 +DONE 37 2 39 37 +DOG 8 2 9 9 +DIGGER 8 2 10 8 +DIDN'T 20 2 21 21 +DICKIE 21 2 23 21 +DEMETER 2 2 4 2 +DEFENSE 2 2 4 2 +DEFENCE 2 2 2 4 +DEATH 16 2 16 18 +DEAD 19 2 21 19 +DARK 11 2 11 13 +D 0 2 2 0 +CRUCIFIXION 7 2 9 7 +COUNSEL 1 2 2 2 +CORNER 11 2 13 11 +CORAL 0 2 1 1 +COPS 3 2 5 3 +COMES 14 2 15 15 +COLE 0 2 2 0 +CO 0 2 0 2 +CLIMBED 0 2 0 2 +CATO 0 2 0 2 +CANAITES 0 2 0 2 +CAMEN 0 2 0 2 +BREAST 1 2 2 2 +BRACY 5 2 7 5 +BOUT 0 2 2 0 +BOTTOM 5 2 5 7 +BELE 0 2 0 2 +BEEN 133 2 133 135 +BASIL 2 2 4 2 +BANNON 0 2 0 2 +BALLOT 3 2 4 4 +BAD 8 2 10 8 +AWK 0 2 2 0 +AWAKE 4 2 4 6 +ATUM 0 2 2 0 +ATTENTION 5 2 7 5 +ASKED 44 2 46 44 +ARCHY 4 2 6 4 +ARCHIE 0 2 0 2 +ANNIE 20 2 22 20 +AMYNTAS 0 2 2 0 +ALKALOIDS 0 2 2 0 +ALIVE 1 2 2 2 +ALEXANDER 0 2 0 2 +AILEEN 0 2 0 2 +AID 4 2 5 5 +AFTERWARDS 5 2 6 6 +AFTERWARD 1 2 2 2 +ABOUT 79 2 79 81 +ABOARD 1 2 3 1 +ZUL 0 1 0 1 +ZEMSTVOS 0 1 1 0 +ZAYNAB 0 1 1 0 +ZAUAM 0 1 0 1 +YUSS 0 1 1 0 +YOZKI 0 1 0 1 +YOUTHFUL 0 1 0 1 +YOU'D 4 1 5 4 +YOU' 0 1 0 1 +YORKE 0 1 0 1 +YORK 2 1 3 2 +YO' 0 1 1 0 +YET 33 1 34 33 +YES'M 0 1 1 0 +YE 13 1 14 13 +YAUSKY 0 1 1 0 +YASSEM 0 1 0 1 +YAHWEH 0 1 1 0 +YAHWAY 0 1 0 1 +WUNNERED 0 1 1 0 +WRITER 1 1 1 2 +WRIT 0 1 1 0 +WRETCH'S 0 1 0 1 +WRETCH 2 1 3 2 +WRAPPED 1 1 1 2 +WORSHIP'S 0 1 1 0 +WORSHIP 4 1 4 5 +WORRY 2 1 3 2 +WORKINGMEN 0 1 1 0 +WORKADAY 0 1 0 1 +WORK 25 1 26 25 +WORD'S 0 1 0 1 +WON'T 12 1 13 12 +WOMAN 18 1 19 18 +WOKE 0 1 1 0 +WISHT 0 1 1 0 +WISH 14 1 15 14 +WIRES 0 1 1 0 +WINDS 0 1 0 1 +WINDOWS 1 1 1 2 +WINDOW 16 1 17 16 +WILT 4 1 4 5 +WILLY 0 1 1 0 +WILLOW 0 1 0 1 +WILLIE 0 1 0 1 +WILKSES 0 1 1 0 +WILKS 0 1 1 0 +WILKESES 0 1 0 1 +WILKES 0 1 0 1 +WILDERNESS 7 1 8 7 +WILD 8 1 9 8 +WIDEAWAKE 0 1 1 0 +WIDE 8 1 8 9 +WHY 46 1 47 46 +WHOSE 16 1 16 17 +WHOLLY 2 1 2 3 +WHO'D 0 1 1 0 +WHILOME 0 1 1 0 +WHEREABOUTS 2 1 3 2 +WHEREABOUT 0 1 0 1 +WHER 0 1 1 0 +WHEEL 1 1 2 1 +WHATEVER 10 1 10 11 +WHATE'ER 0 1 1 0 +WHACKS 0 1 1 0 +WESTERNLAND 0 1 0 1 +WESTERN 1 1 2 1 +WELCOMED 1 1 1 2 +WEEVILY 0 1 0 1 +WEEVILLY 0 1 1 0 +WEBBS 0 1 0 1 +WEBB'S 0 1 1 0 +WEAR 0 1 0 1 +WE'LL 2 1 2 3 +WAX 0 1 0 1 +WAVERLY 0 1 1 0 +WAVERLEY 0 1 0 1 +WATONWAN 0 1 1 0 +WATERWAN 0 1 0 1 +WATER 21 1 22 21 +WASURCEBAH 0 1 0 1 +WARS 2 1 2 3 +WARD'S 0 1 1 0 +WANDERED 0 1 0 1 +WALUTTER 0 1 0 1 +WALLET 2 1 2 3 +WALLA 0 1 0 1 +WAKE 2 1 3 2 +WAITING 7 1 7 8 +WAITIN 0 1 1 0 +WAGGOT 0 1 1 0 +WAGGING 0 1 1 0 +WAGGETT 0 1 0 1 +WAAG 0 1 0 1 +VUN 0 1 0 1 +VOWELS 0 1 1 0 +VOWALS 0 1 0 1 +VOUGHT 0 1 1 0 +VON 0 1 1 0 +VOLVITUR 0 1 1 0 +VOLVETER 0 1 0 1 +VIOLENCE 15 1 16 15 +VIL 0 1 1 0 +VIDA 0 1 0 1 +VICE 1 1 1 2 +VETXRY 0 1 0 1 +VETCH 0 1 0 1 +VESTRY 0 1 1 0 +VESINOUS 0 1 0 1 +VERNDER 0 1 0 1 +VENTURES 0 1 0 1 +VENTRILOQUIST 0 1 1 0 +VENTRILOQUEST 0 1 0 1 +VENIAL 0 1 1 0 +VENAL 0 1 0 1 +VEIL 2 1 2 3 +VAVASOUR 0 1 1 0 +VAVASOR 0 1 0 1 +VAULT 9 1 9 10 +VAUGIRARD 0 1 1 0 +VATS 0 1 0 1 +VAST 2 1 3 2 +VASSILIEVITCH 0 1 1 0 +VANE 0 1 1 0 +VAIN 5 1 5 6 +UTTER 2 1 2 3 +US 58 1 58 59 +URYTU 0 1 0 1 +URTU 0 1 0 1 +URARTIAN 0 1 1 0 +UNLESS 7 1 8 7 +UNCREAM 0 1 0 1 +ULTIMATELY 0 1 1 0 +UKINZER 0 1 1 0 +UDDER 0 1 1 0 +TYRANNY 0 1 1 0 +TWYMAN'S 0 1 1 0 +TWIMMANS 0 1 0 1 +TURNEDETH 0 1 0 1 +TURBULENT 0 1 1 0 +TURBRIMENT 0 1 0 1 +TUMICE 0 1 0 1 +TRY 16 1 17 16 +TRUTH 10 1 10 11 +TRUSTY 0 1 0 1 +TRUSTEE 0 1 1 0 +TRULY 4 1 5 4 +TRUCE 1 1 2 1 +TRIVE 0 1 0 1 +TRIED 18 1 19 18 +TRELAWNEY 0 1 1 0 +TREE 8 1 9 8 +TRE 0 1 0 1 +TRAVEL 1 1 1 2 +TRAPHIC 0 1 0 1 +TRANSSHIP 0 1 1 0 +TRANSHIP 0 1 0 1 +TRAINS 0 1 0 1 +TRAINED 1 1 1 2 +TRAINDAWG 0 1 1 0 +TRAFFIC 0 1 1 0 +TRADES 0 1 0 1 +TOURID 0 1 0 1 +TOUCHED 4 1 5 4 +TOTING 0 1 1 0 +TOP 2 1 3 2 +TOMORROW 0 1 1 0 +TOLERBLE 0 1 1 0 +TOLERABLE 1 1 1 2 +TOLD 26 1 26 27 +TOILET 2 1 3 2 +TOESTRAS 0 1 0 1 +TOCH 0 1 0 1 +TOADING 0 1 0 1 +TITTI 0 1 0 1 +THROUGH 36 1 37 36 +THREE 36 1 37 36 +THIRD'S 0 1 0 1 +THING 20 1 21 20 +THESHIP 0 1 0 1 +THEE 26 1 27 26 +THEACCUS 0 1 0 1 +THATS 0 1 0 1 +TERRA 0 1 1 0 +TERMS 1 1 1 2 +TERIS 0 1 0 1 +TEMPT 1 1 1 2 +TEMPLAR 1 1 2 1 +TELLTALE 1 1 2 1 +TELL 52 1 52 53 +TEETH 3 1 4 3 +TEENO 0 1 0 1 +TEALE 0 1 0 1 +TEAL 0 1 1 0 +TEA 1 1 2 1 +TAX 0 1 0 1 +TATTLERS 0 1 1 0 +TATLERS 0 1 0 1 +TASKMASTER 0 1 1 0 +TART 0 1 0 1 +TAPIS 0 1 1 0 +TAPI 0 1 0 1 +TANQUAM 0 1 1 0 +TAMAR 1 1 2 1 +TAM 0 1 0 1 +TALKED 5 1 5 6 +TALK 14 1 15 14 +TALE 2 1 2 3 +TAHITI 0 1 1 0 +T'OTHER 0 1 1 0 +T 1 1 1 2 +SYRIA 3 1 4 3 +SYNONYMON 0 1 1 0 +SYNONYM 0 1 0 1 +SWORD 11 1 12 11 +SWING 0 1 0 1 +SWELP 0 1 1 0 +SWAYING 0 1 1 0 +SWARD 0 1 0 1 +SWAP 0 1 0 1 +SWAG 0 1 1 0 +SURELY 8 1 8 9 +SURE 17 1 18 17 +SUPPOSE 10 1 11 10 +SUPPER 1 1 1 2 +SUMTHIN 0 1 1 0 +SUMERIA 0 1 0 1 +SUIT 2 1 2 3 +SUFFOLK 0 1 1 0 +SUFFOLED 0 1 0 1 +SUCCOURUS 0 1 0 1 +SUCCOURS 0 1 1 0 +SUBJECT 6 1 6 7 +STROLLED 0 1 0 1 +STRODE 0 1 1 0 +STRIKE 2 1 2 3 +STREAM 4 1 4 5 +STRANGEST 0 1 1 0 +STRANGERS 1 1 1 2 +STRANGE 4 1 5 4 +STRAINS 0 1 1 0 +STOW 0 1 0 1 +STOVES 0 1 0 1 +STORIES 3 1 3 4 +STORES 1 1 2 1 +STORE 3 1 3 4 +STONEWALL 2 1 3 2 +STONES 3 1 4 3 +STEW 0 1 1 0 +STEVER 0 1 0 1 +STERNWALL 0 1 0 1 +STERNMOST 0 1 0 1 +STERN 0 1 1 0 +STEPS 7 1 7 8 +STEPPED 1 1 2 1 +STEEVER 0 1 1 0 +STEAK 0 1 0 1 +STAYS 0 1 0 1 +STAYING 1 1 2 1 +STAYED 4 1 4 5 +STAY 5 1 6 5 +STATES 7 1 7 8 +STATEROOM 1 1 1 2 +STAS 0 1 1 0 +STARS 0 1 0 1 +STANDARDS 0 1 0 1 +STANDARD 3 1 4 3 +STAKE 0 1 1 0 +STAINED 1 1 2 1 +SQUEER 0 1 0 1 +SQUARE 1 1 2 1 +SPONSUS 0 1 1 0 +SPONNES 0 1 0 1 +SPONGE 0 1 1 0 +SPONDYLES 0 1 1 0 +SPIRITENCE 0 1 0 1 +SPINNING 1 1 1 2 +SPINE 0 1 0 1 +SPILLING 0 1 1 0 +SPIES 0 1 0 1 +SPICE 0 1 1 0 +SPENDING 0 1 0 1 +SPECTREE 0 1 0 1 +SPECIALLY 0 1 0 1 +SPEAR 0 1 0 1 +SPEAKER 1 1 1 2 +SPATTLE 0 1 0 1 +SPARSELY 0 1 1 0 +SPAKE 4 1 5 4 +SPADDLE 0 1 1 0 +SOWING 0 1 1 0 +SOUSE 0 1 1 0 +SOUNDTING 0 1 0 1 +SOULS 2 1 2 3 +SOUGHT 5 1 6 5 +SOTELES 0 1 1 0 +SORCHAUS 0 1 0 1 +SOOT 0 1 1 0 +SONNY 0 1 1 0 +SON 18 1 18 19 +SOMEONE 1 1 2 1 +SOME 77 1 77 78 +SOLE 0 1 0 1 +SOCULUS 0 1 0 1 +SNARLS 0 1 0 1 +SNARLED 0 1 1 0 +SMOLNY 0 1 1 0 +SMOLNEY 0 1 0 1 +SLAP 0 1 0 1 +SLAB 1 1 2 1 +SKINNED 0 1 0 1 +SKEWER 0 1 0 1 +SKEW 0 1 1 0 +SIXXES 0 1 0 1 +SIXES 0 1 1 0 +SIRE'S 0 1 0 1 +SIRE 3 1 4 3 +SIR 39 1 40 39 +SINUHIT 0 1 1 0 +SINGING 2 1 2 3 +SINEWET 0 1 0 1 +SINE 0 1 1 0 +SINDBAD 4 1 5 4 +SINDA 0 1 0 1 +SINBAD 0 1 0 1 +SIN 2 1 2 3 +SILLY 2 1 3 2 +SIGNOR 1 1 1 2 +SIGNED 1 1 2 1 +SIDDY 0 1 0 1 +SHUTTERS 1 1 2 1 +SHUETTE 0 1 0 1 +SHRUGS 0 1 0 1 +SHRUBS 1 1 2 1 +SHOULDERS 4 1 4 5 +SHORT 8 1 8 9 +SHOPBOY 0 1 0 1 +SHOP 5 1 6 5 +SHONAU 0 1 0 1 +SHO'LY 0 1 1 0 +SHIPS 3 1 4 3 +SHET 0 1 1 0 +SHERLLY 0 1 0 1 +SHERE 0 1 1 0 +SHERBURN 0 1 1 0 +SHERBIN 0 1 0 1 +SHEETS 0 1 1 0 +SHE'S 4 1 5 4 +SHALLUM 0 1 1 0 +SHAD 0 1 0 1 +SEYTON 2 1 3 2 +SEVERSON 0 1 1 0 +SEVENTH 3 1 3 4 +SEVEN 11 1 12 11 +SETAN 0 1 0 1 +SET 31 1 31 32 +SERVICE 15 1 15 16 +SENTENCED 0 1 1 0 +SELLER 1 1 2 1 +SEEMED 18 1 18 19 +SEEING 8 1 8 9 +SEATS 1 1 1 2 +SCUSE 0 1 1 0 +SCULPTORS 1 1 2 1 +SCULPT'S 0 1 0 1 +SCRUTINIZED 0 1 0 1 +SCRUTINISED 0 1 1 0 +SCRAPPING 0 1 0 1 +SCRAPPIN 0 1 1 0 +SCORE 0 1 0 1 +SCORCHED 0 1 0 1 +SCO'TCH 0 1 1 0 +SCHULBERG'S 0 1 1 0 +SCHOOLGIRLS 0 1 0 1 +SCHOOLDAYS 0 1 1 0 +SCHOOLBOY 0 1 0 1 +SCHOLBURG'S 0 1 0 1 +SCHILUM 0 1 0 1 +SCAPED 0 1 1 0 +SCAPE 0 1 1 0 +SAYIN 0 1 1 0 +SAWED 0 1 0 1 +SAVED 0 1 0 1 +SATISFACTORILY 0 1 1 0 +SATISFACTIONILY 0 1 0 1 +SANS 0 1 1 0 +SANNY 0 1 0 1 +SANG 1 1 2 1 +SANCTUS 0 1 0 1 +SANCTESS 0 1 1 0 +SAMARIA 0 1 1 0 +SALOON 1 1 1 2 +SALONE 0 1 1 0 +SAILORS 0 1 0 1 +SAIL 3 1 4 3 +SAH 1 1 2 1 +SAGOTARE 0 1 0 1 +SAGITTAIRE 0 1 1 0 +SAFE 7 1 8 7 +SA 0 1 0 1 +S'POSE 3 1 3 4 +S 2 1 3 2 +RYO 0 1 1 0 +RUNSEN 0 1 0 1 +RUMP 0 1 1 0 +RULER 0 1 1 0 +ROXBURY 0 1 1 0 +ROUVNENES 0 1 0 1 +ROUGE 0 1 0 1 +ROUDIAN 0 1 0 1 +ROTHS 0 1 1 0 +ROSE 12 1 12 13 +ROSAMUN 0 1 1 0 +ROSAMOND 0 1 0 1 +ROMAN 2 1 2 3 +ROLL 1 1 2 1 +ROCK 3 1 4 3 +ROAR 1 1 1 2 +ROAD 15 1 16 15 +RITER 0 1 0 1 +RIPENESS 0 1 0 1 +RIO 0 1 0 1 +RINGMASTER 0 1 1 0 +RIGOROUS 0 1 1 0 +RIDGES 0 1 0 1 +RIDGE'S 0 1 1 0 +RID 4 1 5 4 +RICHMOND 0 1 0 1 +RHYDROPPIST 0 1 0 1 +REYSTROKE 0 1 0 1 +REWARD 3 1 4 3 +REW 0 1 1 0 +REVORED 0 1 0 1 +REVOLUTIONISTS 0 1 1 0 +REVOLUTIONIST 1 1 1 2 +REVOLTE 0 1 1 0 +REVOLT 0 1 0 1 +REVEREND 14 1 15 14 +REVERED 0 1 0 1 +REVELLING 0 1 0 1 +REVELING 0 1 1 0 +RETZCH'S 0 1 1 0 +REST 19 1 19 20 +RESK 0 1 1 0 +RESINOUS 0 1 1 0 +RESCUED 2 1 3 2 +RESCUE 1 1 1 2 +REPLIED 39 1 39 40 +REPEATED 5 1 6 5 +REND 0 1 1 0 +REMISSIONER'S 0 1 0 1 +REMISSION 0 1 1 0 +REIGNS 0 1 1 0 +REIGN 1 1 1 2 +REGOROUS 0 1 0 1 +REGEN 0 1 0 1 +REG'LER 0 1 1 0 +REG'LAR 0 1 0 1 +REFORMED 1 1 1 2 +REED 0 1 0 1 +REALIZED 0 1 0 1 +REALIZE 0 1 1 0 +REALISED 0 1 1 0 +REALISE 0 1 0 1 +READY 12 1 13 12 +RE 2 1 2 3 +RAY 1 1 1 2 +RAW 0 1 0 1 +RATCHFORD 0 1 1 0 +RAN 9 1 9 10 +RAMSON 0 1 0 1 +RAMSES 0 1 1 0 +RAMESES 0 1 0 1 +RAMAN 0 1 0 1 +RAM'S 0 1 1 0 +RAHAM'S 0 1 0 1 +RAG 0 1 0 1 +RADPROP 0 1 1 0 +RACKED 0 1 1 0 +RACK 0 1 1 0 +RACHFORD 0 1 0 1 +RACHEL 2 1 3 2 +RACES 0 1 1 0 +RACE 2 1 2 3 +RABB'S 0 1 1 0 +RAB'S 0 1 0 1 +QUOTED 0 1 1 0 +QUOTE 0 1 0 1 +QUMMUKH 0 1 1 0 +QUITE 15 1 16 15 +QUEST 0 1 1 0 +QUANTRELL 0 1 1 0 +QUANTRAILLE 0 1 0 1 +QUANTITIES 1 1 2 1 +QUALITIES 1 1 1 2 +PUTTEL 0 1 1 0 +PUSH 1 1 1 2 +PURLE 0 1 0 1 +PULLY 0 1 0 1 +PULLEY 0 1 1 0 +PUDDLES 0 1 1 0 +PSALM 0 1 1 0 +PRYTANEUM 0 1 1 0 +PROVING 0 1 0 1 +PROVEN 0 1 1 0 +PROVE 5 1 5 6 +PROTECTORY 0 1 0 1 +PROTECTORATE 0 1 1 0 +PROPRE 0 1 1 0 +PROPERA 0 1 0 1 +PROETS 0 1 0 1 +PROCOMB 0 1 0 1 +PROCLYS 0 1 0 1 +PROCLIS 0 1 0 1 +PROCLAS 0 1 0 1 +PROCKS 0 1 0 1 +PRISONERS 3 1 3 4 +PRISONER 12 1 13 12 +PRIMER 0 1 1 0 +PRIMARY 0 1 0 1 +PRICKED 0 1 0 1 +PREVENT 1 1 1 2 +PRETINNIUM 0 1 0 1 +PRESTIGE 1 1 2 1 +PRESGE 0 1 0 1 +PRESENTERS 0 1 0 1 +PRESENT 16 1 16 17 +PRENTICESHIP 0 1 1 0 +PRECEPTORY 1 1 2 1 +PRECEPTORS 1 1 2 1 +PRECEPTARY 0 1 0 1 +PRECENTORS 0 1 1 0 +PREACH 0 1 0 1 +PRACTISED 0 1 0 1 +PRACTICED 0 1 1 0 +POUNDS 0 1 0 1 +POUND 2 1 3 2 +POUCHES 0 1 1 0 +POUCHED 0 1 0 1 +POTUM 0 1 1 0 +POTION 0 1 1 0 +POSTHASTE 0 1 1 0 +POST 6 1 6 7 +POPULOUS 0 1 0 1 +POPULACE 0 1 1 0 +POORER 0 1 0 1 +POMEROY 0 1 1 0 +POLYSER 0 1 0 1 +POLMROY 0 1 0 1 +POLESU 0 1 0 1 +PO 0 1 1 0 +PLUMB 0 1 1 0 +PLUM 0 1 0 1 +PLEDS 0 1 0 1 +PLEASANT 8 1 9 8 +PLEAS 0 1 1 0 +PLATTER 0 1 0 1 +PLANTING 0 1 0 1 +PLANNING 1 1 2 1 +PITTHAM 0 1 0 1 +PITT 0 1 0 1 +PITHUM 0 1 1 0 +PIROUCHES 0 1 0 1 +PIPES 1 1 1 2 +PILLOWED 0 1 1 0 +PILLOW 1 1 1 2 +PILASTER 0 1 0 1 +PIKES 1 1 2 1 +PIGSKIN 0 1 1 0 +PIGEONCOTES 0 1 1 0 +PIGEONBUL 0 1 0 1 +PIG 1 1 1 2 +PIECOTE 0 1 0 1 +PICKED 2 1 3 2 +PICHKOTE 0 1 0 1 +PHOSPHORIBULB 0 1 0 1 +PHOSPHORE 0 1 0 1 +PHOCES 0 1 0 1 +PHILISTINES 1 1 2 1 +PHILISTINE 0 1 0 1 +PHILIPPUS 2 1 3 2 +PHILIPPA 0 1 0 1 +PHELPS'S 0 1 0 1 +PHELPS 1 1 2 1 +PETROL 0 1 0 1 +PETREL 0 1 1 0 +PESTES 0 1 0 1 +PESTE 0 1 1 0 +PERSEPTORS 0 1 0 1 +PEROCKS 0 1 0 1 +PERNOUNCE 0 1 1 0 +PERELACHASE 0 1 0 1 +PEONAGE 0 1 1 0 +PENNEL 0 1 0 1 +PENDING 0 1 1 0 +PENCIL 1 1 1 2 +PENCE 0 1 1 0 +PEN 0 1 0 1 +PEER 2 1 3 2 +PEASLEY 0 1 0 1 +PEASE 0 1 0 1 +PEASANTS 5 1 6 5 +PEAS 1 1 2 1 +PEARL 0 1 1 0 +PEALS 0 1 1 0 +PEACEFUL 1 1 2 1 +PEACE 8 1 8 9 +PAW 0 1 0 1 +PATUM 0 1 0 1 +PATTER 0 1 0 1 +PATIENCE 2 1 3 2 +PATH 1 1 1 2 +PATESENCES 0 1 0 1 +PASTES 0 1 1 0 +PASTE 1 1 1 2 +PAST 9 1 10 9 +PASORAH 0 1 0 1 +PARTS 4 1 5 4 +PARTLY 1 1 2 1 +PARTIALLY 0 1 0 1 +PART 21 1 21 22 +PARR 1 1 2 1 +PARLAISE 0 1 0 1 +PARKS 0 1 1 0 +PAR 0 1 0 1 +PANTS 0 1 0 1 +PANEL 0 1 1 0 +PALL 0 1 1 0 +PALAESTRA 0 1 1 0 +PADDLING 0 1 1 0 +PADDLIN 0 1 0 1 +PACK 0 1 0 1 +PACE 1 1 1 2 +OWNERS 1 1 2 1 +OW'M 0 1 1 0 +OW 0 1 1 0 +OVERWHELMING 0 1 0 1 +OVERRIPENESS 0 1 1 0 +OVERFULL 0 1 1 0 +OVERFLOWING 0 1 1 0 +OUTGAZE 0 1 0 1 +OUTER 1 1 2 1 +OUTDO 1 1 1 2 +OUGHTN'T 0 1 1 0 +OUGHED 0 1 0 1 +OUEN 0 1 1 0 +OTIAN 0 1 0 1 +ORIENTAL 0 1 1 0 +ORIENT 0 1 0 1 +ORGANIZER 0 1 0 1 +ORGANISR 0 1 0 1 +ORAS 0 1 0 1 +OPINION 3 1 3 4 +OPENED 10 1 11 10 +OPEN 15 1 15 16 +OPE 0 1 1 0 +ON'T 0 1 1 0 +OME 0 1 1 0 +OLL 0 1 1 0 +OLIVE 6 1 7 6 +OKAY 0 1 1 0 +OFFICIALS 0 1 0 1 +OFFICER 2 1 3 2 +OFFENSE 0 1 1 0 +OFFENCE 2 1 2 3 +OFFEN 0 1 1 0 +OFAU 0 1 0 1 +ODD 2 1 3 2 +OBOCOCK 0 1 1 0 +OBJECT 6 1 7 6 +OBACOCK 0 1 0 1 +O'NIGHTS 0 1 1 0 +O'NEILL 0 1 1 0 +O'NEIL 0 1 0 1 +NYTOUCH 0 1 1 0 +NUTS 0 1 1 0 +NUTRITION 2 1 2 3 +NURSE 0 1 0 1 +NUNS 2 1 3 2 +NUN'S 0 1 0 1 +NUMAN 0 1 0 1 +NU'UMAN 0 1 1 0 +NOWT 0 1 1 0 +NORTHEAST 0 1 0 1 +NORTH 3 1 4 3 +NONETHELESS 0 1 1 0 +NONE 12 1 13 12 +NOISE 5 1 5 6 +NOIR 0 1 0 1 +NOCOMN 0 1 0 1 +NOCCOUNT 0 1 0 1 +NIPPER 0 1 1 0 +NIGHTS 2 1 2 3 +NICO 0 1 1 0 +NICHO 0 1 0 1 +NIBBER 0 1 0 1 +NEXTER 0 1 1 0 +NEXT 10 1 10 11 +NEWBURG 0 1 0 1 +NEW 29 1 30 29 +NEVERTHELESS 3 1 3 4 +NEULES 0 1 0 1 +NERVE 0 1 1 0 +NEOTIO 0 1 0 1 +NEOSHO 0 1 1 0 +NELLO 0 1 0 1 +NEIGHBOURHOOD 0 1 1 0 +NEIGHBOUR 1 1 1 2 +NEIGHBORHOOD 0 1 0 1 +NEIGHBOR 1 1 2 1 +NEEDS 8 1 8 9 +NEEDLED 0 1 1 0 +NEEDLE 0 1 0 1 +NEED 9 1 9 10 +NEAREST 4 1 5 4 +NEARER 3 1 4 3 +NAUGHT 0 1 1 0 +NATURALLY 7 1 7 8 +NATURAL 6 1 7 6 +NANDY'S 0 1 1 0 +NAB 0 1 0 1 +N 0 1 1 0 +MYSTERY 0 1 0 1 +MYSTERIOUS 5 1 6 5 +MYRTLES 0 1 0 1 +MUSTHIENNE 0 1 0 1 +MUSTACHES 0 1 1 0 +MURDOCK'S 0 1 0 1 +MURDOCH'S 0 1 1 0 +MUIR 0 1 1 0 +MUG 0 1 1 0 +MUCH 41 1 41 42 +MOWER 0 1 1 0 +MOVES 0 1 0 1 +MOVEMENT 1 1 2 1 +MOUTHWHAT 0 1 1 0 +MOUTHS 0 1 1 0 +MOUSTACHES 0 1 0 1 +MOUNTNORRIS 0 1 1 0 +MOTIONLESS 0 1 1 0 +MOTION 0 1 0 1 +MOSTLY 2 1 2 3 +MOSES'S 0 1 0 1 +MORTIFICATIONTHAT 0 1 1 0 +MORTIFICATION 0 1 0 1 +MORE'N 0 1 1 0 +MOPED 0 1 1 0 +MONTNORRIS 0 1 0 1 +MONKERS 0 1 1 0 +MOMMOL 0 1 1 0 +MOMENT 24 1 24 25 +MOCKERS 0 1 0 1 +MO 0 1 1 0 +MITZTER 0 1 0 1 +MITTEE 0 1 0 1 +MISTIAN 0 1 0 1 +MISSISSIPPIENT 0 1 0 1 +MISSISSIPPIAN 0 1 1 0 +MINNY 0 1 0 1 +MINNIE 1 1 2 1 +MINIONETTE 0 1 1 0 +MIMICK 0 1 1 0 +MIMIC 0 1 0 1 +MILLY 0 1 1 0 +MILLSTONE 0 1 0 1 +MILLSTON 0 1 1 0 +MILLICENT 0 1 0 1 +MILKED 0 1 0 1 +MILICON'S 0 1 0 1 +MILICON 0 1 0 1 +MILICENT'S 0 1 1 0 +MIHI 0 1 1 0 +MIDRIFF 0 1 1 0 +MIDIAN 1 1 2 1 +MIDDY 0 1 1 0 +MIDDRIFTS 0 1 0 1 +MIDDLING 0 1 1 0 +MIDDLIN 0 1 0 1 +METHINKETH 0 1 1 0 +METHINK 0 1 0 1 +MET 10 1 11 10 +MESTIENNE'S 0 1 1 0 +MERTALUS 0 1 0 1 +MERRYMAKING 0 1 0 1 +MERRY 2 1 3 2 +MERNEPTAH 0 1 1 0 +MERIT 2 1 3 2 +MERELY 6 1 6 7 +MERELONA'S 0 1 0 1 +MER 0 1 0 1 +MEN'S 0 1 1 0 +MELONUS 0 1 0 1 +MELLICENT 0 1 0 1 +MEEK 0 1 0 1 +MEDEAN 0 1 0 1 +MEANT 10 1 10 11 +MC 3 1 4 3 +MAY 42 1 43 42 +MATURITY 0 1 0 1 +MATTERS 3 1 4 3 +MATRON 0 1 0 1 +MATIILLIU 0 1 0 1 +MATEY 0 1 1 0 +MASTIENNE 0 1 0 1 +MASTED 0 1 0 1 +MASSA 0 1 0 1 +MASON 1 1 1 2 +MASLOVITCH 0 1 0 1 +MASKED 0 1 1 0 +MARYS 0 1 0 1 +MARVELOUS 0 1 0 1 +MARVELLOUS 3 1 4 3 +MARVELLED 0 1 0 1 +MARVELED 0 1 1 0 +MARTIAN 2 1 2 3 +MARSPEAKER 0 1 1 0 +MARSHAL 5 1 6 5 +MARRBLE 0 1 0 1 +MARBLE 1 1 2 1 +MAR 1 1 1 2 +MANONET 0 1 0 1 +MANAHIM 0 1 0 1 +MANAHEM 0 1 0 1 +MAN'S 13 1 13 14 +MAMAL 0 1 0 1 +MALNUTRITION 0 1 1 0 +MAL 0 1 0 1 +MAKING 17 1 18 17 +MAJORITY 5 1 6 5 +MAITIE 0 1 0 1 +MAINE 0 1 1 0 +MAIN 1 1 1 2 +MAIAN'S 0 1 0 1 +MADE 77 1 77 78 +MAC 0 1 0 1 +MABILLON 0 1 1 0 +LYSIMACHUS 0 1 1 0 +LYCAS 0 1 0 1 +LUNER'S 0 1 0 1 +LUCAS 0 1 1 0 +LOWER 2 1 3 2 +LOWBURN 0 1 0 1 +LOWBORN 0 1 0 1 +LOVE 29 1 29 30 +LOT 6 1 6 7 +LORDSHIPS 0 1 1 0 +LORDSHIP 0 1 0 1 +LORDADALE 0 1 0 1 +LORD'S 0 1 1 0 +LORD 18 1 19 18 +LORA 0 1 0 1 +LOOKED 25 1 25 26 +LONG 52 1 52 53 +LOLKS 0 1 0 1 +LOINER 0 1 0 1 +LOGS 0 1 0 1 +LOCKS 0 1 1 0 +LOCK 3 1 4 3 +LIZZIE 0 1 0 1 +LIZABETH 0 1 1 0 +LIZ'BETH 0 1 0 1 +LIYER 0 1 0 1 +LISUMACHUS 0 1 0 1 +LISTEN 11 1 11 12 +LISPON 0 1 0 1 +LISPIN'S 0 1 0 1 +LINTON 0 1 0 1 +LINERY 0 1 0 1 +LINCOLN 0 1 1 0 +LIME 0 1 1 0 +LIM 0 1 0 1 +LIKELY 2 1 3 2 +LIFE'S 0 1 0 1 +LIFE 54 1 55 54 +LIEGE 0 1 0 1 +LIE 3 1 4 3 +LIAISON 0 1 1 0 +LEXINGTON 0 1 1 0 +LEVERLY 0 1 0 1 +LESSINGTON 0 1 0 1 +LESS 9 1 9 10 +LESLEY 0 1 0 1 +LENOIR 0 1 1 0 +LENIN 1 1 2 1 +LENDING 0 1 0 1 +LENA 0 1 0 1 +LEMONSHIPS 0 1 0 1 +LEMON 0 1 1 0 +LEGS 2 1 3 2 +LEG 1 1 1 2 +LEEK 0 1 0 1 +LEECH 0 1 1 0 +LEE'S 0 1 1 0 +LEE 0 1 0 1 +LEAPT 0 1 1 0 +LEAPED 1 1 1 2 +LEADPENCIL 0 1 1 0 +LEADERS 1 1 2 1 +LEADER'S 0 1 0 1 +LEAD 2 1 2 3 +LE 0 1 0 1 +LAWS 2 1 3 2 +LAW 5 1 5 6 +LAURA 2 1 3 2 +LAUGHED 10 1 11 10 +LAUGH'D 0 1 0 1 +LATH 0 1 1 0 +LASH 1 1 2 1 +LARDADAE 0 1 0 1 +LARCH 6 1 7 6 +LANE 2 1 2 3 +LANDOWNERS 1 1 1 2 +LAIN 1 1 2 1 +LADS 1 1 2 1 +LACHRYMAL 0 1 0 1 +LACHRYMA 0 1 1 0 +LACH 0 1 0 1 +LABOUR 0 1 1 0 +LABOR 1 1 1 2 +L 3 1 4 3 +KUNITY 0 1 0 1 +KNOWN 10 1 10 11 +KNOTS 0 1 0 1 +KNOBBY 0 1 0 1 +KNOBBLY 0 1 1 0 +KNIGHTS 2 1 2 3 +KNIGHT 7 1 8 7 +KLEPTOMANIAC 0 1 1 0 +KLEPTOMANIA 0 1 1 0 +KITE 1 1 2 1 +KIT 0 1 0 1 +KING 44 1 45 44 +KINDNESS 7 1 7 8 +KINDER 1 1 2 1 +KIN 0 1 0 1 +KILLS 0 1 1 0 +KILLETH 0 1 0 1 +KID 1 1 2 1 +KETTLE 0 1 1 0 +KERSTALL 0 1 1 0 +KENNINGSBURG 0 1 0 1 +KEIDAM 0 1 0 1 +KEEPN 0 1 0 1 +KEDEM 0 1 1 0 +KAMAR 0 1 1 0 +KAMA 0 1 0 1 +KAIGHT 0 1 0 1 +KACKLED 0 1 0 1 +JUTS 0 1 0 1 +JUSTIFIED 1 1 2 1 +JUSTFIED 0 1 0 1 +JURGMENT 0 1 0 1 +JULIE 0 1 0 1 +JUIN 0 1 0 1 +JUDGMENT 8 1 9 8 +JUDGETH 0 1 0 1 +JUDGES 1 1 2 1 +JUDGE 7 1 7 8 +JOUVIN'S 0 1 1 0 +JOSTLINGS 0 1 0 1 +JOSHUA 0 1 1 0 +JOSEPIUS 0 1 0 1 +JONCEWA 0 1 0 1 +JON 0 1 0 1 +JOKINGLY 0 1 1 0 +JODURIS 0 1 0 1 +JOCELYN'S 0 1 1 0 +JIS 0 1 1 0 +JILT 0 1 1 0 +JIKI 0 1 0 1 +JEWELER 1 1 2 1 +JEST 0 1 0 1 +JEDGE 0 1 1 0 +JEALOUS 0 1 0 1 +JAWHAWED 0 1 0 1 +JAWER 0 1 0 1 +JANEIRO 0 1 0 1 +JANEERO 0 1 1 0 +JAKEY'S 0 1 1 0 +IZZY'S 0 1 1 0 +IT'LL 1 1 1 2 +ISSY 0 1 0 1 +ISSEY 0 1 0 1 +ISN'T 2 1 2 3 +ISIS 0 1 0 1 +ISEY'S 0 1 0 1 +ISEY 0 1 0 1 +IRONICAL 0 1 1 0 +IRONIC 0 1 0 1 +IRASCELLING 0 1 0 1 +INTRUSTED 0 1 0 1 +INTENTIONALLY 1 1 2 1 +INTENTION 4 1 4 5 +INTENTIALLY 0 1 0 1 +INTENSE 3 1 3 4 +INTELLECTUALLY 0 1 1 0 +INTELLECTUAL 0 1 0 1 +INSTANT 4 1 5 4 +INSISTS 0 1 0 1 +INSIST 0 1 1 0 +INSCRIPTS 0 1 0 1 +INSCRIPTION 0 1 1 0 +INNES 0 1 1 0 +INJURE 2 1 3 2 +INGENUOUSLY 0 1 0 1 +INGENIOUSLY 0 1 1 0 +INCLINATIONS 0 1 0 1 +INCLINATION 0 1 1 0 +INCIDENT 1 1 1 2 +INAQUA 0 1 0 1 +IMPROVISATION 0 1 1 0 +IMPROCISATION 0 1 0 1 +ILU 1 1 2 1 +ILLS 0 1 1 0 +ILL 5 1 5 6 +IDEA 10 1 11 10 +ICES 2 1 3 2 +IASY 0 1 0 1 +I'FAITH 0 1 1 0 +HUNTINGDON 4 1 5 4 +HUNTERS 0 1 1 0 +HUMOUR 1 1 1 2 +HUMOR 0 1 1 0 +HUH 0 1 1 0 +HOWL 0 1 1 0 +HOWARD 0 1 0 1 +HOW'S 0 1 0 1 +HOUSEMOTHER 0 1 0 1 +HOUNDY 0 1 0 1 +HOUNDED 0 1 1 0 +HOUGHTON 0 1 0 1 +HOSE 0 1 0 1 +HORN 0 1 1 0 +HORDE 0 1 1 0 +HOPE 16 1 16 17 +HOO'LL 0 1 1 0 +HOO 0 1 1 0 +HOMEWARDS 0 1 0 1 +HOMEWARD 0 1 1 0 +HOMEPUSH 0 1 1 0 +HOLY 5 1 6 5 +HOLLERED 0 1 0 1 +HOLLER 0 1 1 0 +HOLES 2 1 2 3 +HOISS 0 1 0 1 +HOFAX 0 1 0 1 +HOF 0 1 1 0 +HOES 0 1 0 1 +HODOBT 0 1 0 1 +HOARD 0 1 0 1 +HIT 2 1 3 2 +HISSELF 0 1 1 0 +HIRE 1 1 1 2 +HINTING 0 1 0 1 +HINT 2 1 3 2 +HINGOOPS 0 1 0 1 +HINDFELL 0 1 1 0 +HIMSELF 52 1 52 53 +HILL 6 1 7 6 +HIJAZ 0 1 1 0 +HIGH 7 1 8 7 +HI 1 1 1 2 +HEYDAY 0 1 1 0 +HEY 1 1 1 2 +HERRODIAN 0 1 0 1 +HERRIOT'S 0 1 0 1 +HERMONT'S 0 1 0 1 +HERMONN 0 1 0 1 +HERMOD 0 1 0 1 +HERMANN'S 0 1 0 1 +HERIOT'S 0 1 1 0 +HERE'S 1 1 2 1 +HEPTARKEYS 0 1 0 1 +HEPTARCHIES 0 1 1 0 +HENFELD 0 1 0 1 +HEN 0 1 1 0 +HELVIN 0 1 1 0 +HELVAN 0 1 0 1 +HELM 1 1 2 1 +HELLO 0 1 1 0 +HELEN 0 1 1 0 +HEARTY 2 1 3 2 +HEART 28 1 28 29 +HEARSE 3 1 4 3 +HE'D 4 1 4 5 +HAWED 0 1 1 0 +HAW 0 1 1 0 +HAUTTERSLEY 0 1 0 1 +HATTERSLEY 0 1 1 0 +HATE 4 1 4 5 +HASTE 5 1 5 6 +HASSAN 0 1 0 1 +HASAN 1 1 2 1 +HARVEY'SWHICH 0 1 1 0 +HARVEY'S 0 1 0 1 +HARRIS'S 0 1 0 1 +HARRIS 1 1 2 1 +HARMONT'S 0 1 0 1 +HARMONT 0 1 0 1 +HARMON 0 1 0 1 +HAREMAN 0 1 0 1 +HAPPILY 1 1 1 2 +HAPLY 0 1 1 0 +HANDERS 0 1 0 1 +HAM 0 1 0 1 +HALLO 0 1 0 1 +HALLAND 0 1 0 1 +HALF 23 1 23 24 +HAIR 6 1 6 7 +HAGITTS 0 1 0 1 +HADN'T 1 1 1 2 +HADDA 0 1 1 0 +HA 3 1 3 4 +GURRFATHER 0 1 0 1 +GURGE 0 1 0 1 +GUNS 3 1 3 4 +GUNDRAN 0 1 0 1 +GUNDERN'S 0 1 0 1 +GUIRUN'S 0 1 1 0 +GUILD 0 1 1 0 +GUESTS 2 1 2 3 +GUEST 3 1 4 3 +GUDRUN 0 1 1 0 +GRIM 0 1 0 1 +GRIEBS 0 1 0 1 +GRIBIER 0 1 1 0 +GREYSTOKE 0 1 0 1 +GREENWOOD 0 1 1 0 +GREENOOD 0 1 0 1 +GREENBACKS 0 1 1 0 +GREAVES 0 1 1 0 +GREAT 40 1 40 41 +GRAT 0 1 0 1 +GRASPS 0 1 1 0 +GRASPED 1 1 1 2 +GRANT 1 1 2 1 +GRANPAP 0 1 0 1 +GRANDPAP 2 1 3 2 +GRANDAME 0 1 1 0 +GRAND 5 1 5 6 +GRAMMATIUS 0 1 0 1 +GRAMMATEUS 0 1 1 0 +GRAM 0 1 1 0 +GOURR 0 1 0 1 +GOTA 0 1 0 1 +GOT 40 1 40 41 +GOSTLER 0 1 0 1 +GOSLER 0 1 1 0 +GORE 0 1 0 1 +GORDON'S 2 1 2 3 +GORDON 20 1 21 20 +GORACIAN 0 1 0 1 +GOODS 4 1 5 4 +GOOD 67 1 67 68 +GON 0 1 0 1 +GODS 0 1 0 1 +GODEBILLIOS 0 1 1 0 +GOD'S 2 1 3 2 +GO 61 1 61 62 +GLLOTON 0 1 0 1 +GLISPIN'S 0 1 1 0 +GLISPIN 1 1 2 1 +GLASS 9 1 9 10 +GLAD 4 1 5 4 +GIVING 7 1 8 7 +GIVIN 0 1 0 1 +GIVEN 12 1 13 12 +GIVE 46 1 47 46 +GIRTHING 0 1 1 0 +GIRTHED 0 1 1 0 +GIRK 0 1 0 1 +GIRDS 0 1 1 0 +GIRDING 0 1 0 1 +GIRDED 0 1 0 1 +GIORGIO 1 1 2 1 +GINK'S 0 1 0 1 +GIING 0 1 0 1 +GIAN 0 1 0 1 +GHEIMURNETH 0 1 0 1 +GERMS 0 1 1 0 +GEORGE'SWHICH 0 1 1 0 +GEORGE'S 1 1 1 2 +GEORGE 2 1 2 3 +GENTLEMEN'S 0 1 1 0 +GENTLEMEN 5 1 5 6 +GENTLEMAN'S 0 1 0 1 +GENTLEMAN 6 1 7 6 +GAZE 3 1 4 3 +GAUTHIER 0 1 1 0 +GATHIER 0 1 0 1 +GAMMON 0 1 1 0 +GAMIN 0 1 0 1 +GALLATIN 0 1 1 0 +GABLE 0 1 1 0 +G'YIRLS 0 1 1 0 +FURZE 0 1 1 0 +FROZE 0 1 1 0 +FROGS 2 1 2 3 +FROG'S 0 1 1 0 +FROCK 0 1 0 1 +FRO 1 1 2 1 +FRISTOW 0 1 0 1 +FRISTOE'S 0 1 1 0 +FRIEDOLIN 0 1 0 1 +FRIDOLIN 0 1 1 0 +FREESWOMEN 0 1 0 1 +FREES 0 1 1 0 +FRANCS 6 1 6 7 +FRANC 0 1 1 0 +FOUVET 0 1 0 1 +FOURTEENTHAT'S 0 1 1 0 +FOURTEEN 3 1 3 4 +FORTS 0 1 1 0 +FORSTOW'S 0 1 0 1 +FORMED 1 1 2 1 +FORE 0 1 0 1 +FOOLS 1 1 2 1 +FOOL'S 0 1 0 1 +FOOL 3 1 4 3 +FONTREVALLE 0 1 0 1 +FONTEVRAULT 0 1 1 0 +FOLLOWS 5 1 6 5 +FOLLOWED 10 1 10 11 +FOLK 4 1 4 5 +FOE 0 1 1 0 +FOCCETION 0 1 0 1 +FOALS 3 1 4 3 +FOAL 4 1 5 4 +FLUT 0 1 0 1 +FLOWERBEDS 0 1 1 0 +FLOWER 1 1 1 2 +FLOSSY 0 1 1 0 +FLOSSIE 0 1 0 1 +FLOORBOARDS 0 1 1 0 +FLOOR 3 1 3 4 +FLIROV'S 0 1 0 1 +FLIES 0 1 0 1 +FLEROFF 0 1 0 1 +FLAVOUR 0 1 0 1 +FLAVOR 1 1 2 1 +FLATTERBUFF 0 1 0 1 +FLAIROFF 0 1 0 1 +FLAEROFF'S 0 1 0 1 +FITZ 0 1 0 1 +FISK 0 1 0 1 +FISHED 1 1 2 1 +FIRSTLY 0 1 0 1 +FIRS 0 1 0 1 +FINNICAL 0 1 0 1 +FINISHED 3 1 3 4 +FINICAL 0 1 1 0 +FIND 24 1 25 24 +FILTRATES 0 1 1 0 +FILTRATE 0 1 1 0 +FILLS 1 1 1 2 +FILDRATE 0 1 0 1 +FIGGER 0 1 1 0 +FIG 0 1 0 1 +FIFTEENTH 1 1 1 2 +FIFTEEN 6 1 7 6 +FICTION 0 1 0 1 +FIACRE 0 1 1 0 +FESTALE 0 1 0 1 +FESTAL 1 1 2 1 +FERVEN 0 1 0 1 +FELT 18 1 19 18 +FELL 15 1 15 16 +FEELS 1 1 2 1 +FAVOURITE 1 1 1 2 +FAVOURABLE 0 1 0 1 +FAVORITE 0 1 1 0 +FAVORABLE 0 1 1 0 +FAVANT 0 1 0 1 +FAUVAIN 0 1 0 1 +FAULTS 2 1 2 3 +FAUCES 0 1 1 0 +FATTY 0 1 1 0 +FATT'S 0 1 0 1 +FATIMATAS 0 1 0 1 +FATHERS 1 1 1 2 +FATHER'S 6 1 7 6 +FAT 2 1 2 3 +FARRINGERS 0 1 0 1 +FARMHOUSE 0 1 0 1 +FARM 2 1 3 2 +FARLANDER 0 1 0 1 +FARINNDER 0 1 0 1 +FARE 1 1 1 2 +FAR 21 1 21 22 +FANN 0 1 0 1 +FAMILY 18 1 18 19 +FAM'LY 0 1 1 0 +FAITH 9 1 9 10 +FAIR 12 1 13 12 +FAILED 7 1 8 7 +FAGOTS 0 1 1 0 +FAGGOTS 0 1 0 1 +FAFNIR'S 1 1 2 1 +FAFFNER'S 0 1 0 1 +FACTORS 0 1 0 1 +EYES 34 1 35 34 +EXTRACTED 0 1 0 1 +EXTRACT 2 1 3 2 +EXTEMPORIZED 0 1 1 0 +EXTEMPORISED 0 1 0 1 +EXPRARA 0 1 0 1 +EXPLAINED 4 1 4 5 +EXPERIENCE 3 1 4 3 +EXCUSE 3 1 3 4 +EXCLAIMED 14 1 15 14 +EXCITING 0 1 1 0 +EXCITE 0 1 1 0 +EXCEPT 11 1 11 12 +EXAMINING 2 1 3 2 +EXAMINED 3 1 3 4 +EXACTLY 9 1 9 10 +EXACKLY 0 1 1 0 +EVIL 4 1 5 4 +EVERYONE 1 1 1 2 +EVERYBODY'S 0 1 0 1 +EVERY 37 1 38 37 +EVER 27 1 27 28 +EV'YBODY'S 0 1 1 0 +EUPHRANOR 0 1 1 0 +EUPHRANER 0 1 0 1 +EU 0 1 0 1 +ETHELRIED 1 1 2 1 +ETHEL 0 1 0 1 +ET 2 1 3 2 +ESCAPED 1 1 1 2 +ESCAPE 12 1 12 13 +ERE'S 0 1 1 0 +ERE 1 1 2 1 +EPIGASTER 0 1 1 0 +EPHRAIM 0 1 1 0 +EPH 0 1 0 1 +ENTRUSTED 0 1 1 0 +ENTRANCE 0 1 0 1 +ENTR'ACTE 0 1 1 0 +ENSNAS 0 1 0 1 +ENSNARES 0 1 1 0 +ENSLAVED 2 1 3 2 +ENSLAVE 1 1 1 2 +ENNY 0 1 0 1 +ENNIE 0 1 0 1 +ENDURETH 0 1 1 0 +ENDURED 0 1 0 1 +ENDURE 3 1 3 4 +ENDS 0 1 0 1 +ENDEAVOURED 0 1 1 0 +ENDEAVORED 0 1 0 1 +EMETIC 0 1 1 0 +EMBRUN 0 1 1 0 +EMBRO 0 1 0 1 +ELLEN 0 1 0 1 +ELISIONS 0 1 1 0 +ELEMENTARY 0 1 0 1 +ELDER 1 1 2 1 +EILS 0 1 0 1 +EIGHTH 3 1 3 4 +EGYPTIAN 5 1 6 5 +EGIPSIAN 0 1 0 1 +EGGS 1 1 2 1 +EELS 0 1 1 0 +EDGING 2 1 3 2 +EDGED 0 1 0 1 +ED 0 1 0 1 +EBERGASTER 0 1 0 1 +EAU 0 1 1 0 +EAST 7 1 8 7 +EARTHY 0 1 0 1 +EARTH 19 1 20 19 +EAR 1 1 2 1 +EANES 0 1 0 1 +EAD 0 1 1 0 +E'LL 0 1 1 0 +E'ER 0 1 1 0 +DUTY 10 1 10 11 +DURING 19 1 20 19 +DUN 1 1 1 2 +DUME 0 1 0 1 +DUMAS 0 1 1 0 +DUM 0 1 1 0 +DU 0 1 1 0 +DRY 5 1 6 5 +DRUGSTORE 0 1 1 0 +DRUG 0 1 0 1 +DROUTH 0 1 1 0 +DROUGHTH 0 1 0 1 +DROP 3 1 3 4 +DRIVER 0 1 0 1 +DRAWERS 1 1 2 1 +DRAWER 0 1 0 1 +DRAUGHT 2 1 3 2 +DOWNSTAIRS 0 1 1 0 +DOWNING 0 1 1 0 +DOWN 68 1 68 69 +DOWER 0 1 1 0 +DOTH 1 1 2 1 +DONOVAN 1 1 2 1 +DONOMAN 0 1 0 1 +DONEBYON 0 1 0 1 +DOMONICO 0 1 0 1 +DOMIHIPPOTEM 0 1 0 1 +DOM 0 1 1 0 +DOESN'T 3 1 3 4 +DOCKYARD 0 1 0 1 +DOCK 0 1 1 0 +DOAN 0 1 1 0 +DNEY 0 1 0 1 +DISTRUSTED 0 1 1 0 +DISTRESSED 2 1 2 3 +DISTRESS 3 1 4 3 +DISTICHS 0 1 1 0 +DISTICHES 0 1 0 1 +DISNEY 0 1 1 0 +DISCURSE 0 1 0 1 +DINERS 0 1 1 0 +DINARS 1 1 1 2 +DICKY 0 1 0 1 +DICKI 0 1 0 1 +DEW 0 1 0 1 +DETECTIVES 0 1 1 0 +DETECTIVE 2 1 2 3 +DESTRUCTED 0 1 0 1 +DESTINIES 0 1 1 0 +DEST 0 1 0 1 +DESSERTS 1 1 2 1 +DESERTS 0 1 0 1 +DES 0 1 0 1 +DEORTERIZING 0 1 0 1 +DEODORIZING 0 1 1 0 +DENES 0 1 0 1 +DEMEANOUR 0 1 0 1 +DEMEANOR 0 1 0 1 +DELMONICO 0 1 1 0 +DEEPENS 0 1 0 1 +DEEPENED 0 1 1 0 +DEDROOM 0 1 0 1 +DEBTOR 0 1 1 0 +DEATHLIKE 0 1 1 0 +DEAR 14 1 14 15 +DEANS 0 1 1 0 +DEAF 0 1 1 0 +DEACH 0 1 1 0 +DAY 60 1 61 60 +DAWNING 0 1 0 1 +DATED 1 1 2 1 +DAT 0 1 1 0 +DARKAND 0 1 1 0 +DAPHNE 3 1 4 3 +DAPHANE 0 1 0 1 +DAMN 1 1 1 2 +DAME 1 1 1 2 +DALYS 0 1 1 0 +DALY 2 1 3 2 +DALEY 0 1 0 1 +DAILIES 0 1 0 1 +DAGOS 0 1 1 0 +DAGOES 0 1 0 1 +DA 1 1 2 1 +CURSE 1 1 2 1 +CURRENTS 0 1 1 0 +CURRANTS 0 1 0 1 +CUPIUS 0 1 0 1 +CUMAC 0 1 0 1 +CRUX 0 1 1 0 +CRUCIFICTION 0 1 0 1 +CROYDON 0 1 0 1 +CROST 0 1 1 0 +CROSS 8 1 8 9 +CRISTEL 0 1 0 1 +CRIPPLED 1 1 2 1 +CRIPPLE 0 1 0 1 +CRIES 3 1 4 3 +CRIBIER 0 1 0 1 +CREW 5 1 5 6 +CREAM 5 1 6 5 +CREAKERY 0 1 0 1 +CRABS 0 1 0 1 +CRAB 6 1 7 6 +COYNESS 0 1 1 0 +COXCOMB 0 1 1 0 +COWLEY'S 0 1 1 0 +COWBER 0 1 0 1 +COURTYARD 2 1 2 3 +COURT 11 1 12 11 +COUPS 0 1 0 1 +COUNSELS 0 1 1 0 +COUNSELLOR 0 1 0 1 +COUNCILLOR 0 1 1 0 +COULD 113 1 114 113 +COTTONFIELD 0 1 0 1 +COTTON 2 1 3 2 +COTE 0 1 0 1 +CORYDON 0 1 1 0 +CORRESPONSES 0 1 0 1 +COROTS 0 1 0 1 +CORNERSTONES 0 1 0 1 +CORN 0 1 0 1 +CORMERAN 0 1 0 1 +CORKLE 0 1 1 0 +COQUETTE 1 1 2 1 +COPSE 0 1 0 1 +COOPS 0 1 1 0 +CONTINUOUS 0 1 0 1 +CONTINUAL 0 1 1 0 +CONSUMM 0 1 0 1 +CONSUL 0 1 0 1 +CONSONANTS 0 1 1 0 +CONSONANCE 0 1 0 1 +CONSOMME 0 1 1 0 +CONNECTED 1 1 1 2 +CONINGSBURGH 0 1 1 0 +CONFIRMATION 0 1 1 0 +CONFIRM 0 1 0 1 +CONFIRCET 0 1 0 1 +CONFINED 0 1 0 1 +CONFINE 0 1 0 1 +CONFIDENTLY 0 1 0 1 +CONFIDENTIALLY 0 1 1 0 +CONFIDE 1 1 2 1 +CONFICERE 0 1 1 0 +CONCRELL 0 1 0 1 +CONCOCTED 1 1 2 1 +CONCLUDED 2 1 2 3 +CON 0 1 0 1 +COMORIN 0 1 1 0 +COMMUNITY 4 1 5 4 +COMING 15 1 16 15 +COMEST 0 1 1 0 +COME 71 1 71 72 +COLOSSEUM 0 1 1 0 +COLLEIES 0 1 0 1 +COLLECTED 0 1 1 0 +COLISEUM 0 1 0 1 +COD 0 1 1 0 +COCKRELL 0 1 1 0 +COCKET 0 1 0 1 +COBBER 0 1 1 0 +COATS 0 1 0 1 +COALESCED 0 1 1 0 +COAL 0 1 0 1 +CLUMB 0 1 1 0 +CLOMB 0 1 1 0 +CLEVERLY 0 1 1 0 +CLERVAL 0 1 0 1 +CLASSES 3 1 4 3 +CLASS 6 1 6 7 +CLARY 0 1 0 1 +CLARET 1 1 2 1 +CLAPTOMANIA 0 1 0 1 +CLAIRVAUX 0 1 1 0 +CISEAUX 0 1 1 0 +CHRIST 3 1 3 4 +CHRIS 22 1 23 22 +CHOUETTE 0 1 1 0 +CHONODEMAIRE 0 1 1 0 +CHOKINGLY 0 1 0 1 +CHLORIDE 0 1 0 1 +CHLORATE 1 1 2 1 +CHIT 0 1 0 1 +CHIRRUP 0 1 0 1 +CHIRP 0 1 1 0 +CHIPS 1 1 2 1 +CHILLS 0 1 1 0 +CHILL 1 1 1 2 +CHILD 6 1 6 7 +CHIDE 0 1 1 0 +CHEST 3 1 3 4 +CHELAN 0 1 0 1 +CHEEKS 4 1 4 5 +CHEEKE 0 1 1 0 +CHEEKBONES 0 1 1 0 +CHARLIE'S 0 1 0 1 +CHARLIE 0 1 1 0 +CHARLEY'S 0 1 1 0 +CHARLEY 0 1 0 1 +CHARDURIS 0 1 0 1 +CHARACTERISTIC 0 1 1 0 +CHALONS 0 1 1 0 +CHADORIS 0 1 0 1 +CHAAVERALISTIC 0 1 0 1 +CEVERSON 0 1 0 1 +CEREMONY 1 1 1 2 +CENTRAL 5 1 6 5 +CENTAL 0 1 0 1 +CELLAR 0 1 0 1 +CCHOSTAWTE 0 1 0 1 +CAVERNMEN 0 1 0 1 +CAVALRYMEN 0 1 1 0 +CATTLE 5 1 5 6 +CATCHED 0 1 1 0 +CAST 9 1 9 10 +CASE 20 1 20 21 +CARROLL 2 1 3 2 +CARL 0 1 0 1 +CARHORN 0 1 0 1 +CARD 0 1 0 1 +CAR 4 1 5 4 +CAPTAIN 17 1 17 18 +CAPS 0 1 0 1 +CAPRIVI'S 0 1 1 0 +CAPRIVI 0 1 0 1 +CAPITULANTES 0 1 1 0 +CAPITULANT 0 1 0 1 +CAPITALISTS 0 1 1 0 +CAPITALIST 0 1 0 1 +CAP'S 0 1 1 0 +CANVATES 0 1 0 1 +CANVAS 0 1 1 0 +CANNOT 20 1 21 20 +CANE 1 1 1 2 +CAN 80 1 80 81 +CAMPAIGN 0 1 1 0 +CALM 0 1 0 1 +CAIN 0 1 1 0 +CAGE 7 1 8 7 +CACKED 0 1 1 0 +CABLE 0 1 0 1 +CA'M 0 1 1 0 +BYE 0 1 1 0 +BUZZER 0 1 0 1 +BUZZARD 8 1 9 8 +BUY 4 1 4 5 +BUTTON 0 1 1 0 +BUTTERFLY 0 1 1 0 +BUSINESSWHICH 0 1 1 0 +BUSINESS 12 1 12 13 +BURSHEBA 0 1 1 0 +BURNHILD 0 1 0 1 +BURNED 2 1 2 3 +BURN 0 1 1 0 +BURDENS 1 1 2 1 +BURDEN 1 1 1 2 +BUOYANT 0 1 0 1 +BULBS 0 1 1 0 +BULB 0 1 1 0 +BUILD 1 1 1 2 +BRYNHILD'S 0 1 1 0 +BRUNHILD'S 0 1 0 1 +BRUNHILD 0 1 0 1 +BROWMAN 0 1 0 1 +BROW 0 1 0 1 +BROTHERS 7 1 8 7 +BROTHER'S 1 1 1 2 +BRING 12 1 12 13 +BRILLIANT 4 1 5 4 +BRAXBURY 0 1 0 1 +BRAVELEY 0 1 0 1 +BRAU 0 1 1 0 +BRASS 1 1 2 1 +BRAHMIN 0 1 0 1 +BRACELEY 0 1 0 1 +BOTTOMED 0 1 1 0 +BOTTLES 1 1 1 2 +BOONE 0 1 1 0 +BOOMEUER 0 1 0 1 +BOOLA 0 1 0 1 +BOOK 8 1 8 9 +BONES 2 1 2 3 +BOGS 0 1 0 1 +BOEOTIAN 0 1 1 0 +BOBS 0 1 0 1 +BOARDS 1 1 1 2 +BOARD 5 1 5 6 +BLOWER 0 1 0 1 +BLOODSTAINED 0 1 0 1 +BLOOD 7 1 8 7 +BLOKES 0 1 1 0 +BLODGET 0 1 0 1 +BLEST 0 1 0 1 +BLESSED 5 1 6 5 +BLDGET 0 1 0 1 +BLANKETED 0 1 1 0 +BLANDED 0 1 0 1 +BLAGET 0 1 0 1 +BLACKLEG 0 1 1 0 +BLACK 13 1 13 14 +BITCHER 0 1 0 1 +BISQUE 0 1 1 0 +BIRDSEYE 0 1 1 0 +BIRD'S 0 1 0 1 +BIN 1 1 2 1 +BILLIOS 0 1 0 1 +BILL 2 1 3 2 +BIG 7 1 8 7 +BIE 0 1 0 1 +BHANG 0 1 1 0 +BEURNHILD 0 1 0 1 +BEULAH 0 1 1 0 +BETHUNE 0 1 1 0 +BETCHA 0 1 1 0 +BESSOON 0 1 0 1 +BESIDES 9 1 9 10 +BESIDE 3 1 4 3 +BENSON 0 1 1 0 +BENOIT 0 1 1 0 +BENOIS 0 1 0 1 +BENNETT 0 1 1 0 +BEND 1 1 1 2 +BENARD 0 1 0 1 +BELT 0 1 0 1 +BELOWED 0 1 0 1 +BELLOWED 0 1 1 0 +BELLE 0 1 1 0 +BELEISURE 0 1 0 1 +BELEASER 0 1 0 1 +BELAMMED 0 1 0 1 +BEING 38 1 39 38 +BEILS 0 1 0 1 +BEGUN 2 1 3 2 +BEGIN 9 1 9 10 +BEFORE 55 1 56 55 +BEFALL 0 1 0 1 +BEFAL 0 1 1 0 +BEE 0 1 0 1 +BEDS 1 1 1 2 +BED 14 1 14 15 +BECOMES 5 1 6 5 +BECAME 10 1 10 11 +BEASTLY 0 1 1 0 +BEAR 7 1 8 7 +BEAL 0 1 0 1 +BASSORAH 1 1 2 1 +BASSOM 0 1 0 1 +BARRACK 1 1 2 1 +BARRA 0 1 0 1 +BARKLEY 0 1 1 0 +BARKELEY 0 1 0 1 +BARGELO 0 1 0 1 +BARGELLO 0 1 1 0 +BARELY 1 1 2 1 +BARE 1 1 1 2 +BANNET 0 1 0 1 +BANG 1 1 1 2 +BANDON 0 1 0 1 +BANDINELLO 0 1 1 0 +BANDED 0 1 0 1 +BAN 0 1 0 1 +BALLROOM 0 1 1 0 +BALLOCK 0 1 1 0 +BALL 0 1 0 1 +BALAMMED 0 1 1 0 +BAISIL 0 1 0 1 +BAG 4 1 5 4 +BAFF 0 1 0 1 +BACKS 1 1 1 2 +B 3 1 3 4 +AZARIAH 0 1 1 0 +AZARAIAH 0 1 0 1 +AYE 0 1 0 1 +AXE 0 1 0 1 +AWK'ARD 0 1 1 0 +AW 0 1 1 0 +AVIOUS 0 1 0 1 +AVIDITY 0 1 1 0 +AVE 0 1 1 0 +ATTENTIONS 0 1 0 1 +ATT 0 1 0 1 +ATOM 0 1 0 1 +ATHELSTANE 0 1 1 0 +ASSYRIA 3 1 3 4 +ASSAILING 0 1 1 0 +ASK 17 1 17 18 +ASHUR 0 1 1 0 +ASHORE 4 1 4 5 +ASCENSON 0 1 0 1 +ASCENSION 0 1 1 0 +ARSTS 0 1 1 0 +ARSINOE'S 0 1 1 0 +ARSENO 0 1 0 1 +ARSENAL'S 0 1 0 1 +ARPAT 0 1 0 1 +ARPAD 1 1 2 1 +ARCHISON 0 1 0 1 +ARCHIAS 0 1 1 0 +AQUA 0 1 1 0 +APPROVE 0 1 1 0 +APPRENTICESHIP 0 1 0 1 +APPEALS 1 1 1 2 +APE 1 1 1 2 +ANYTHING 31 1 31 32 +ANYONE'S 0 1 1 0 +ANTONIAN 0 1 0 1 +ANTOLIAN 0 1 1 0 +ANPERAL 0 1 0 1 +ANNOYANCES 0 1 1 0 +ANNOYANCE 1 1 1 2 +ANGISTON 0 1 0 1 +ANGESTON 0 1 1 0 +ANDY'S 0 1 0 1 +ANDS 0 1 1 0 +ANDBUT 0 1 1 0 +AMUNTUS 0 1 0 1 +AMENTUS 0 1 0 1 +AMATIC 0 1 0 1 +ALWAYS 34 1 34 35 +ALTHEA 2 1 3 2 +ALPHIE 0 1 0 1 +ALONGER 0 1 1 0 +ALONG 15 1 15 16 +ALOAD 0 1 0 1 +ALLS 0 1 1 0 +ALLOW 7 1 8 7 +ALLEGIANCE 0 1 0 1 +ALKALINE 0 1 0 1 +ALISANDRO 0 1 0 1 +ALIMENTARY 0 1 1 0 +ALIKE 2 1 2 3 +ALI 3 1 4 3 +ALHIJAZ 0 1 0 1 +ALF 0 1 1 0 +ALESSANDRO 0 1 1 0 +ALCOHOL 4 1 5 4 +AKENJER 0 1 0 1 +AKALOIDS 0 1 0 1 +AKALITES 0 1 0 1 +AIRN'T 0 1 0 1 +AIR 10 1 10 11 +AGRIEAN 0 1 0 1 +AGRARIAN 0 1 1 0 +AEUBERG 0 1 0 1 +AESRE 0 1 0 1 +ADVENTURES 2 1 3 2 +ADRY 0 1 0 1 +ADN'T 0 1 1 0 +ADHERENTS 0 1 1 0 +ADHERENCE 0 1 0 1 +ADEPTOR 0 1 0 1 +ADELSTEIN 0 1 0 1 +ADAIR 0 1 1 0 +AD 0 1 1 0 +ACTORS 0 1 1 0 +ACLEPTOMANIA 0 1 0 1 +ACKNOWLEDGE 1 1 2 1 +ACHESON 0 1 1 0 +ACCEPT 4 1 5 4 +ABSTAINED 0 1 0 1 +ABSTAIN 0 1 1 0 +ABROAD 4 1 4 5 +ZEAL 1 0 1 1 +ZAMAN 4 0 4 4 +YUNKERS 1 0 1 1 +YOUTH 4 0 4 4 +YOURSELVES 2 0 2 2 +YOURSELF 9 0 9 9 +YOURS 1 0 1 1 +YOUNGEST 1 0 1 1 +YOUNGERS 2 0 2 2 +YOUNGER 8 0 8 8 +YOUNG 39 0 39 39 +YONDER 5 0 5 5 +YOLKS 1 0 1 1 +YIELDED 1 0 1 1 +YIELD 1 0 1 1 +YESTERDAY 3 0 3 3 +YEP 1 0 1 1 +YEOMEN 1 0 1 1 +YEOMAN 1 0 1 1 +YELLS 1 0 1 1 +YELLOW 4 0 4 4 +YELLED 1 0 1 1 +YELL 1 0 1 1 +YEARS 20 0 20 20 +YEARNS 1 0 1 1 +YEARNING 1 0 1 1 +YEAR 7 0 7 7 +YEA 1 0 1 1 +YAWN 1 0 1 1 +YARNS 1 0 1 1 +YARDS 1 0 1 1 +YACHT 1 0 1 1 +WYLDER'S 1 0 1 1 +WYLDER 1 0 1 1 +WRONG 4 0 4 4 +WRITTEN 5 0 5 5 +WRITING 1 0 1 1 +WRITE 1 0 1 1 +WRINKLES 1 0 1 1 +WRINGING 1 0 1 1 +WRIGGLING 1 0 1 1 +WRETCHED 2 0 2 2 +WRECKAGE 1 0 1 1 +WREATHS 1 0 1 1 +WRAPPING 1 0 1 1 +WOUNDS 2 0 2 2 +WOUNDED 6 0 6 6 +WOUND 2 0 2 2 +WOULDST 1 0 1 1 +WOULDN'T 9 0 9 9 +WOTTETH 1 0 1 1 +WORTHY 1 0 1 1 +WORTH 2 0 2 2 +WORST 1 0 1 1 +WORSHIPPERS 1 0 1 1 +WORSE 10 0 10 10 +WORN 3 0 3 3 +WORLDLY 1 0 1 1 +WORLD 14 0 14 14 +WORKSHOP 1 0 1 1 +WORKS 5 0 5 5 +WORKMAN 1 0 1 1 +WORKHOUSE 1 0 1 1 +WORKERS 1 0 1 1 +WORKER 1 0 1 1 +WORKED 2 0 2 2 +WORE 3 0 3 3 +WORDS 18 0 18 18 +WORD 15 0 15 15 +WOODSON 1 0 1 1 +WOODS 2 0 2 2 +WOODEN 6 0 6 6 +WOOD 4 0 4 4 +WONT 4 0 4 4 +WONDROUS 1 0 1 1 +WONDERS 1 0 1 1 +WONDERING 1 0 1 1 +WONDERFULLY 1 0 1 1 +WONDERFUL 6 0 6 6 +WONDER 5 0 5 5 +WON 1 0 1 1 +WOMEN 15 0 15 15 +WOMAN'S 2 0 2 2 +WOLF 1 0 1 1 +WOES 1 0 1 1 +WOE 2 0 2 2 +WODE'S 1 0 1 1 +WIZARDS 1 0 1 1 +WITTY 1 0 1 1 +WITNESSED 1 0 1 1 +WITHOUT 51 0 51 51 +WITHIN 11 0 11 11 +WITHHELD 2 0 2 2 +WITHDRAWN 2 0 2 2 +WITCHES 1 0 1 1 +WITCH 2 0 2 2 +WIT 1 0 1 1 +WISTFUL 1 0 1 1 +WISHING 3 0 3 3 +WISHES 3 0 3 3 +WISELY 1 0 1 1 +WISE 3 0 3 3 +WISDOM 4 0 4 4 +WISCONSIN 1 0 1 1 +WIRE 1 0 1 1 +WINTERS 1 0 1 1 +WINTER 5 0 5 5 +WINNING 1 0 1 1 +WINKED 1 0 1 1 +WINGS 2 0 2 2 +WING 1 0 1 1 +WINE 8 0 8 8 +WIND 6 0 6 6 +WIN 2 0 2 2 +WILLINGLY 1 0 1 1 +WILLING 4 0 4 4 +WILFUL 1 0 1 1 +WILDLY 2 0 2 2 +WILDEST 1 0 1 1 +WIFE 16 0 16 16 +WIELD 1 0 1 1 +WIDOWER 1 0 1 1 +WIDOW 1 0 1 1 +WIDEN 1 0 1 1 +WICKER'S 8 0 8 8 +WICKER 7 0 7 7 +WICKED 2 0 2 2 +WHOSO 1 0 1 1 +WHOOP 1 0 1 1 +WHOMSOEVER 1 0 1 1 +WHOM 20 0 20 20 +WHOEVER 2 0 2 2 +WHITHER 3 0 3 3 +WHITEHALL 1 0 1 1 +WHISTLING 2 0 2 2 +WHISTLE 3 0 3 3 +WHISPERED 2 0 2 2 +WHISKERED 1 0 1 1 +WHIPPINGS 1 0 1 1 +WHIP 2 0 2 2 +WHIMPERING 1 0 1 1 +WHIM 1 0 1 1 +WHILST 1 0 1 1 +WHETHER 12 0 12 12 +WHEREVER 2 0 2 2 +WHEREUPON 3 0 3 3 +WHEREIN 2 0 2 2 +WHEREFORE 2 0 2 2 +WHEREBY 2 0 2 2 +WHERE'S 4 0 4 4 +WHENEVER 4 0 4 4 +WHENCE 5 0 5 5 +WHEELS 1 0 1 1 +WHEELER 1 0 1 1 +WHATSOEVER 1 0 1 1 +WHAT'S 6 0 6 6 +WETTED 1 0 1 1 +WET 3 0 3 3 +WESTWARD 1 0 1 1 +WESTPORT 1 0 1 1 +WEST 3 0 3 3 +WEREN'T 1 0 1 1 +WEPT 2 0 2 2 +WENT 76 0 76 76 +WENCH 1 0 1 1 +WELSH 1 0 1 1 +WELLS 2 0 2 2 +WELCOME 4 0 4 4 +WEIGHTY 1 0 1 1 +WEIGHT 1 0 1 1 +WEIGHING 1 0 1 1 +WEIGHED 1 0 1 1 +WEEPING 3 0 3 3 +WEEKS 1 0 1 1 +WEEKLY 1 0 1 1 +WEEK 6 0 6 6 +WEEDS 1 0 1 1 +WEDNESDAY 2 0 2 2 +WEDDING 7 0 7 7 +WEB 1 0 1 1 +WEATHER 5 0 5 5 +WEARY 4 0 4 4 +WEARING 2 0 2 2 +WEAPONS 1 0 1 1 +WEAPON 1 0 1 1 +WEALTHY 3 0 3 3 +WEALTH 3 0 3 3 +WEAL 1 0 1 1 +WEAKNESS 4 0 4 4 +WEAK 2 0 2 2 +WE'VE 3 0 3 3 +WE'D 1 0 1 1 +WAZIR 5 0 5 5 +WAYS 5 0 5 5 +WAYLAID 1 0 1 1 +WAY 62 0 62 62 +WAVING 1 0 1 1 +WAVES 2 0 2 2 +WAVE 1 0 1 1 +WATERY 1 0 1 1 +WATERVILLE 1 0 1 1 +WATERS 1 0 1 1 +WATCHMAKER'S 1 0 1 1 +WATCHING 7 0 7 7 +WATCHED 4 0 4 4 +WATCH 12 0 12 12 +WASTED 1 0 1 1 +WASN'T 3 0 3 3 +WASHINGTON 2 0 2 2 +WASHED 4 0 4 4 +WASH 4 0 4 4 +WARRANT 1 0 1 1 +WARNING 1 0 1 1 +WARNER 1 0 1 1 +WARN'T 2 0 2 2 +WARN 2 0 2 2 +WARMTH 1 0 1 1 +WARMLY 1 0 1 1 +WARMEST 1 0 1 1 +WARM 1 0 1 1 +WAREHOUSES 1 0 1 1 +WARDERS 1 0 1 1 +WAR 5 0 5 5 +WANTS 5 0 5 5 +WANTON 1 0 1 1 +WANTING 1 0 1 1 +WANTED 8 0 8 8 +WANT 26 0 26 26 +WANDERERS 1 0 1 1 +WANDERER 2 0 2 2 +WALLS 4 0 4 4 +WALL 4 0 4 4 +WALKING 5 0 5 5 +WALKED 10 0 10 10 +WALK 5 0 5 5 +WAKING 2 0 2 2 +WAKED 2 0 2 2 +WAITERS 1 0 1 1 +WAITER 1 0 1 1 +WAITED 5 0 5 5 +WAIT 11 0 11 11 +WAISTCOAT 3 0 3 3 +WAIST 1 0 1 1 +WAGONS 1 0 1 1 +WAGON 1 0 1 1 +WAGED 1 0 1 1 +WADED 1 0 1 1 +WADDLED 1 0 1 1 +WADDED 1 0 1 1 +W 1 0 1 1 +VRONSKY 1 0 1 1 +VOYAGES 1 0 1 1 +VOYAGE 7 0 7 7 +VOWS 2 0 2 2 +VOW 1 0 1 1 +VOTING 2 0 2 2 +VOTES 3 0 3 3 +VOTED 1 0 1 1 +VOTE 4 0 4 4 +VOMITING 1 0 1 1 +VOLUNTEERS 2 0 2 2 +VOLUNTARILY 1 0 1 1 +VOLUMINOUS 1 0 1 1 +VOLUME 2 0 2 2 +VOLLEY 1 0 1 1 +VOLCANOES 1 0 1 1 +VOICELESS 1 0 1 1 +VOICED 1 0 1 1 +VOICE 20 0 20 20 +VOCAL 2 0 2 2 +VITRIOL 1 0 1 1 +VITAL 1 0 1 1 +VISITOR 4 0 4 4 +VISITING 1 0 1 1 +VISITED 2 0 2 2 +VISIT 8 0 8 8 +VISION 1 0 1 1 +VISCOUNT 1 0 1 1 +VIRTUOUS 2 0 2 2 +VIRTUE 2 0 2 2 +VIRGINIA 1 0 1 1 +VIOLENTLY 1 0 1 1 +VIOLENT 1 0 1 1 +VINTAGE 2 0 2 2 +VINE 1 0 1 1 +VINDICTIVENESS 1 0 1 1 +VILLAGES 1 0 1 1 +VILLAGERS 1 0 1 1 +VILLAGE 2 0 2 2 +VILE 1 0 1 1 +VIGOROUS 3 0 3 3 +VIGILANT 1 0 1 1 +VIGILANCE 2 0 2 2 +VIEWS 1 0 1 1 +VIEW 1 0 1 1 +VICTORY 1 0 1 1 +VICTORIAN 1 0 1 1 +VICTIMS 1 0 1 1 +VICTIMIZE 1 0 1 1 +VICTIM 3 0 3 3 +VICIOUS 3 0 3 3 +VICES 1 0 1 1 +VEXED 1 0 1 1 +VEXATION 1 0 1 1 +VESSEL 2 0 2 2 +VERSES 5 0 5 5 +VERSE 1 0 1 1 +VERILY 3 0 3 3 +VERDICT 3 0 3 3 +VENTURE 2 0 2 2 +VENICE 2 0 2 2 +VENGEANCE 1 0 1 1 +VEINS 1 0 1 1 +VEILS 1 0 1 1 +VEHICLES 1 0 1 1 +VEHEMENTLY 1 0 1 1 +VEGETABLES 1 0 1 1 +VEGETABLE 1 0 1 1 +VECCHIO 1 0 1 1 +VAULTED 1 0 1 1 +VAUDEVILLE 1 0 1 1 +VARIOUS 2 0 2 2 +VARIES 1 0 1 1 +VARIED 1 0 1 1 +VANITY 1 0 1 1 +VANISHED 1 0 1 1 +VAMPA 2 0 2 2 +VALUES 2 0 2 2 +VALUE 3 0 3 3 +VALUABLES 1 0 1 1 +VALLEY 2 0 2 2 +VALJEAN'S 3 0 3 3 +VALJEAN 7 0 7 7 +VALET 2 0 2 2 +VAGUELY 1 0 1 1 +VAGUE 1 0 1 1 +VACATION 1 0 1 1 +VACANTLY 1 0 1 1 +UTTERLY 3 0 3 3 +UTTERING 1 0 1 1 +UTTERED 4 0 4 4 +UTMOST 5 0 5 5 +USURPER 2 0 2 2 +USUALLY 6 0 6 6 +USUAL 3 0 3 3 +USING 3 0 3 3 +USEST 1 0 1 1 +USEFUL 1 0 1 1 +USED 9 0 9 9 +USE 13 0 13 13 +URGED 2 0 2 2 +URGE 1 0 1 1 +UPWARD 1 0 1 1 +UPSET 1 0 1 1 +UPRIGHT 1 0 1 1 +UPPER 5 0 5 5 +UNWEPT 1 0 1 1 +UNUSUALLY 1 0 1 1 +UNUSUAL 2 0 2 2 +UNTO 10 0 10 10 +UNTIL 16 0 16 16 +UNSWERVING 1 0 1 1 +UNSOUGHT 1 0 1 1 +UNSELFISH 1 0 1 1 +UNSEASONABLE 1 0 1 1 +UNS 1 0 1 1 +UNREWARDED 1 0 1 1 +UNREASONABLE 1 0 1 1 +UNPRESSED 1 0 1 1 +UNPLEASANT 3 0 3 3 +UNPITIED 1 0 1 1 +UNOCCUPIED 1 0 1 1 +UNNATURAL 1 0 1 1 +UNMISTAKABLY 1 0 1 1 +UNLUCKY 2 0 2 2 +UNLIKELY 1 0 1 1 +UNKNOWN 2 0 2 2 +UNKIND 1 0 1 1 +UNJOINTED 1 0 1 1 +UNIVERSE 1 0 1 1 +UNIVERSAL 5 0 5 5 +UNITED 5 0 5 5 +UNISON 1 0 1 1 +UNIQUE 1 0 1 1 +UNIONISTS 1 0 1 1 +UNION 1 0 1 1 +UNINTENTIONAL 1 0 1 1 +UNIFORM 2 0 2 2 +UNHESITATINGLY 1 0 1 1 +UNHEARD 1 0 1 1 +UNHAPPY 5 0 5 5 +UNHAPPINESS 1 0 1 1 +UNGRATEFUL 3 0 3 3 +UNFORTUNATELY 2 0 2 2 +UNFORTUNATE 2 0 2 2 +UNFLATTERING 1 0 1 1 +UNEXPECTEDLY 2 0 2 2 +UNEXPECTED 3 0 3 3 +UNEASY 4 0 4 4 +UNEASILY 1 0 1 1 +UNDOUBTEDLY 1 0 1 1 +UNDERTOOK 1 0 1 1 +UNDERTONE 1 0 1 1 +UNDERTAKER'S 2 0 2 2 +UNDERTAKE 1 0 1 1 +UNDERSTOOD 6 0 6 6 +UNDERSTANDS 1 0 1 1 +UNDERSTANDING 5 0 5 5 +UNDERSTAND 7 0 7 7 +UNDERNEATH 1 0 1 1 +UNDERGROUND 1 0 1 1 +UNDERGO 1 0 1 1 +UNDER 37 0 37 37 +UNCONNECTED 1 0 1 1 +UNCONCERN 1 0 1 1 +UNCOMMON 1 0 1 1 +UNCOMFORTABLY 2 0 2 2 +UNCOMFORTABLE 1 0 1 1 +UNCLE'S 2 0 2 2 +UNCLE 7 0 7 7 +UNCERTAIN 2 0 2 2 +UNBURDEN 1 0 1 1 +UNAWARE 1 0 1 1 +UNASSISTED 1 0 1 1 +UNALTERABLE 1 0 1 1 +UNADULTERATED 1 0 1 1 +UNABLE 1 0 1 1 +UGLY 1 0 1 1 +TYRANTS 1 0 1 1 +TYRANT 2 0 2 2 +TYPE 1 0 1 1 +TWIST 1 0 1 1 +TWILIGHT 1 0 1 1 +TWICE 2 0 2 2 +TWENTY 16 0 16 16 +TWELVEMONTH 1 0 1 1 +TWELVE 3 0 3 3 +TWAS 1 0 1 1 +TWAIN 2 0 2 2 +TUTORS 2 0 2 2 +TUTOR 1 0 1 1 +TURRETS 1 0 1 1 +TURRET 1 0 1 1 +TURNS 2 0 2 2 +TURNING 6 0 6 6 +TURN 8 0 8 8 +TURKISH 1 0 1 1 +TURK 1 0 1 1 +TURBAN 1 0 1 1 +TUNE 1 0 1 1 +TUMULT 1 0 1 1 +TUMBLED 1 0 1 1 +TUMBLE 1 0 1 1 +TUG 1 0 1 1 +TUFTS 1 0 1 1 +TUESDAY 1 0 1 1 +TUCKED 1 0 1 1 +TUBERCULOUS 1 0 1 1 +TUBE 2 0 2 2 +TRYING 8 0 8 8 +TRUSTWORTHY 1 0 1 1 +TRUSTED 1 0 1 1 +TRUST 3 0 3 3 +TRUNK 1 0 1 1 +TRUNDLED 1 0 1 1 +TRUE 15 0 15 15 +TROUT 1 0 1 1 +TROUSERS 2 0 2 2 +TROUBLING 1 0 1 1 +TROUBLED 6 0 6 6 +TROUBLE 8 0 8 8 +TROOPS 2 0 2 2 +TROLL 1 0 1 1 +TRIVET 1 0 1 1 +TRIUMPHING 1 0 1 1 +TRIUMPH 3 0 3 3 +TRIPES 1 0 1 1 +TRIP 2 0 2 2 +TRIM 1 0 1 1 +TRIFLING 3 0 3 3 +TRIES 3 0 3 3 +TRIBUTE 2 0 2 2 +TRIBES 6 0 6 6 +TRIANGLE 1 0 1 1 +TRIAL 3 0 3 3 +TREND 1 0 1 1 +TREMBLINGLY 1 0 1 1 +TREMBLING 2 0 2 2 +TREMBLE 1 0 1 1 +TREES 3 0 3 3 +TREATMENT 1 0 1 1 +TREATED 1 0 1 1 +TREAT 3 0 3 3 +TREASURES 1 0 1 1 +TREASURE 2 0 2 2 +TREASONS 1 0 1 1 +TREACHEROUSLY 1 0 1 1 +TRAVILLA 1 0 1 1 +TRAVELLERS 2 0 2 2 +TRAP 2 0 2 2 +TRANSPORTED 2 0 2 2 +TRANSPARENT 1 0 1 1 +TRANSLATED 1 0 1 1 +TRANSITORINESS 1 0 1 1 +TRANSFORMING 1 0 1 1 +TRANSFIGURED 1 0 1 1 +TRANSFERENCE 1 0 1 1 +TRANQUILLITIES 1 0 1 1 +TRAMP 2 0 2 2 +TRAINING 5 0 5 5 +TRAGIC 1 0 1 1 +TRADITIONS 2 0 2 2 +TRADITIONAL 1 0 1 1 +TRADEMARK 1 0 1 1 +TRADE 7 0 7 7 +TRACK 3 0 3 3 +TRACED 1 0 1 1 +TRACEABLE 1 0 1 1 +TRACE 2 0 2 2 +TOY 1 0 1 1 +TOWNSFOLK 1 0 1 1 +TOWN 14 0 14 14 +TOWERS 1 0 1 1 +TOWERING 1 0 1 1 +TOWER 4 0 4 4 +TOUR 2 0 2 2 +TOUGHS 1 0 1 1 +TOUGH 3 0 3 3 +TOUCHING 4 0 4 4 +TOUCH 3 0 3 3 +TOSSING 1 0 1 1 +TOSSED 1 0 1 1 +TORTURES 1 0 1 1 +TORTURE 1 0 1 1 +TORN 2 0 2 2 +TORMENTOR 2 0 2 2 +TORMENT 1 0 1 1 +TORE 1 0 1 1 +TORCH 1 0 1 1 +TOPS 1 0 1 1 +TOPIC 1 0 1 1 +TOOK 35 0 35 35 +TONGUES 1 0 1 1 +TONGUE 5 0 5 5 +TONES 2 0 2 2 +TONE 6 0 6 6 +TOMBS 1 0 1 1 +TOMB 2 0 2 2 +TOMATO 1 0 1 1 +TOM 9 0 9 9 +TOLERABLY 1 0 1 1 +TOKEN 1 0 1 1 +TOILING 1 0 1 1 +TOILETTE 1 0 1 1 +TOIL 1 0 1 1 +TOGETHER 11 0 11 11 +TOES 2 0 2 2 +TOBACCO 7 0 7 7 +TOASTED 2 0 2 2 +TOAST 1 0 1 1 +TITLE 1 0 1 1 +TIS 4 0 4 4 +TIRING 1 0 1 1 +TIRESOME 1 0 1 1 +TIRED 7 0 7 7 +TIPPLING 1 0 1 1 +TINY 1 0 1 1 +TINKLE 1 0 1 1 +TINCTURED 1 0 1 1 +TIMES 10 0 10 10 +TIMEPIECE 1 0 1 1 +TIME'S 1 0 1 1 +TIMBER 1 0 1 1 +TILLERS 1 0 1 1 +TILL 24 0 24 24 +TIGHTLY 2 0 2 2 +TIGHTENING 1 0 1 1 +TIGHTENED 1 0 1 1 +TIGHT 1 0 1 1 +TIED 2 0 2 2 +TIE 2 0 2 2 +TIDINGS 2 0 2 2 +TIDES 1 0 1 1 +TIDE 1 0 1 1 +TICKLING 1 0 1 1 +TICKING 1 0 1 1 +TICKET 1 0 1 1 +TIBER 1 0 1 1 +THYSELF 3 0 3 3 +THYME 1 0 1 1 +THWARTED 1 0 1 1 +THUS 13 0 13 13 +THURSDAY 1 0 1 1 +THUNDER 3 0 3 3 +THUMB 4 0 4 4 +THRUST 6 0 6 6 +THROWN 1 0 1 1 +THROWING 3 0 3 3 +THROW 2 0 2 2 +THROUGHOUT 3 0 3 3 +THRONE 2 0 2 2 +THROBBED 1 0 1 1 +THROAT 2 0 2 2 +THRILLING 1 0 1 1 +THRIFTILY 1 0 1 1 +THREW 8 0 8 8 +THREES 1 0 1 1 +THREATS 3 0 3 3 +THREATENED 1 0 1 1 +THREAD 2 0 2 2 +THRACE 1 0 1 1 +THOUSANDTH 1 0 1 1 +THOUSANDS 5 0 5 5 +THOUSAND 16 0 16 16 +THOUGHTS 5 0 5 5 +THOUGHTFUL 2 0 2 2 +THOUGHT 39 0 39 39 +THOUGH 39 0 39 39 +THOU 68 0 68 68 +THOSE 38 0 38 38 +THOROUGHLY 1 0 1 1 +THORNTON 4 0 4 4 +THONG 1 0 1 1 +THOMAS 2 0 2 2 +THITHER 5 0 5 5 +THIRTY 7 0 7 7 +THIRTEEN 1 0 1 1 +THIRSTY 1 0 1 1 +THIRSTING 1 0 1 1 +THIRST 9 0 9 9 +THIRD 11 0 11 11 +THINKS 2 0 2 2 +THINKING 4 0 4 4 +THINK 46 0 46 46 +THINGS 21 0 21 21 +THINE 1 0 1 1 +THIN 4 0 4 4 +THIEVES 1 0 1 1 +THIEF 2 0 2 2 +THICKENING 1 0 1 1 +THICK 4 0 4 4 +THEY'D 2 0 2 2 +THEREWITH 1 0 1 1 +THEREIN 3 0 3 3 +THEREFORE 12 0 12 12 +THEREAFTER 1 0 1 1 +THERE'LL 1 0 1 1 +THEORY 2 0 2 2 +THEOLOGIANS 1 0 1 1 +THENCEFORTH 1 0 1 1 +THENCE 1 0 1 1 +THEMSELVES 17 0 17 17 +THEATRE 2 0 2 2 +THAT'LL 1 0 1 1 +THANKS 5 0 5 5 +THANKFUL 2 0 2 2 +THANKED 2 0 2 2 +THANK 7 0 7 7 +TEXAS 1 0 1 1 +TESTING 1 0 1 1 +TESTIMONY 1 0 1 1 +TESTIFY 2 0 2 2 +TERROR 5 0 5 5 +TERRIFIC 2 0 2 2 +TERRIBLE 4 0 4 4 +TERM 2 0 2 2 +TERENTIUS 1 0 1 1 +TENDING 1 0 1 1 +TENDERNESS 1 0 1 1 +TENDERLY 1 0 1 1 +TENDER 3 0 3 3 +TENDENCY 1 0 1 1 +TEND 2 0 2 2 +TENACITY 1 0 1 1 +TEN 18 0 18 18 +TEMPTRESS 1 0 1 1 +TEMPTING 1 0 1 1 +TEMPTETH 1 0 1 1 +TEMPTATION 4 0 4 4 +TEMPORARY 2 0 2 2 +TEMPLES 2 0 2 2 +TEMPLE 2 0 2 2 +TEMPLARS 2 0 2 2 +TEMPEST 1 0 1 1 +TEMPERATURE 1 0 1 1 +TEMPERATE 2 0 2 2 +TEMPERAMENT 1 0 1 1 +TEMPER 1 0 1 1 +TELLING 2 0 2 2 +TELEPHONE 1 0 1 1 +TELEGRAM 3 0 3 3 +TEEMING 1 0 1 1 +TECHNICAL 1 0 1 1 +TEASPOONFUL 1 0 1 1 +TEARS 6 0 6 6 +TEARING 1 0 1 1 +TEAR 3 0 3 3 +TEAM 1 0 1 1 +TEACHING 1 0 1 1 +TEACHERS 1 0 1 1 +TEACH 2 0 2 2 +TAYLOR 1 0 1 1 +TAXES 1 0 1 1 +TAUNTS 1 0 1 1 +TAUGHT 1 0 1 1 +TASTES 1 0 1 1 +TASTED 2 0 2 2 +TASTE 6 0 6 6 +TASK 3 0 3 3 +TARRIED 1 0 1 1 +TARDY 1 0 1 1 +TAPPED 1 0 1 1 +TAPE 1 0 1 1 +TAP 1 0 1 1 +TANNER 1 0 1 1 +TALMASH 1 0 1 1 +TALL 2 0 2 2 +TALKS 1 0 1 1 +TALKING 5 0 5 5 +TALKER 1 0 1 1 +TALES 1 0 1 1 +TALENTS 3 0 3 3 +TALENT 1 0 1 1 +TAKINGS 1 0 1 1 +TAKING 11 0 11 11 +TAKES 3 0 3 3 +TAKEN 22 0 22 22 +TAKE 45 0 45 45 +TAINTED 1 0 1 1 +TAILS 2 0 2 2 +TAILOR'S 1 0 1 1 +TAIL 3 0 3 3 +TAGGING 1 0 1 1 +TACK 1 0 1 1 +TABLETS 2 0 2 2 +TABLES 2 0 2 2 +TABLE 7 0 7 7 +SYSTEM 2 0 2 2 +SYRUP 4 0 4 4 +SYRINGE 1 0 1 1 +SYMPTOMS 1 0 1 1 +SYMPATHY 4 0 4 4 +SYMPATHIES 1 0 1 1 +SYLLOGISM 1 0 1 1 +SYDNEY 3 0 3 3 +SWUNG 3 0 3 3 +SWITCHED 1 0 1 1 +SWITCH 1 0 1 1 +SWISS 1 0 1 1 +SWINGING 1 0 1 1 +SWINGED 1 0 1 1 +SWIMS 1 0 1 1 +SWIFTLY 1 0 1 1 +SWEPT 1 0 1 1 +SWELL 1 0 1 1 +SWEETNESS 2 0 2 2 +SWEETMEATS 2 0 2 2 +SWEETMEAT 2 0 2 2 +SWEET 3 0 3 3 +SWEEPING 1 0 1 1 +SWEEP 1 0 1 1 +SWEAR 5 0 5 5 +SWAY 1 0 1 1 +SWARTHY 1 0 1 1 +SWARMED 2 0 2 2 +SWAM 1 0 1 1 +SWALLOWING 1 0 1 1 +SWALLOWED 4 0 4 4 +SVIAZHSKY 1 0 1 1 +SUSTAINS 1 0 1 1 +SUSPICIOUS 3 0 3 3 +SUSPICION 4 0 4 4 +SUSPENDED 1 0 1 1 +SUSPECTED 6 0 6 6 +SUSPECT 1 0 1 1 +SUSAN'S 1 0 1 1 +SURVEYED 1 0 1 1 +SURROUNDINGS 1 0 1 1 +SURROUNDING 1 0 1 1 +SURROUNDED 1 0 1 1 +SURRENDERING 1 0 1 1 +SURRENDERED 1 0 1 1 +SURRENDER 2 0 2 2 +SURPRISED 2 0 2 2 +SURPRISE 4 0 4 4 +SURPLICE 1 0 1 1 +SURPASS 1 0 1 1 +SURMOUNTED 1 0 1 1 +SURLY 1 0 1 1 +SURFACE 3 0 3 3 +SUPPRESS 1 0 1 1 +SUPPOSITION 1 0 1 1 +SUPPOSED 6 0 6 6 +SUPPORTED 1 0 1 1 +SUPPORT 1 0 1 1 +SUPPLY 2 0 2 2 +SUPPLIED 3 0 3 3 +SUPPLICATION 1 0 1 1 +SUPERNATURAL 2 0 2 2 +SUPERNACULUM 1 0 1 1 +SUPERLATIVE 1 0 1 1 +SUPERIORS 1 0 1 1 +SUPERIOR 4 0 4 4 +SUP 1 0 1 1 +SUNSHINY 1 0 1 1 +SUNSHINE 1 0 1 1 +SUNRISE 2 0 2 2 +SUNNYSIDE 1 0 1 1 +SUNG 2 0 2 2 +SUNDAY 4 0 4 4 +SUN 7 0 7 7 +SUMS 2 0 2 2 +SUMMONED 1 0 1 1 +SUMMON 1 0 1 1 +SUMMIT 1 0 1 1 +SUMMER 6 0 6 6 +SULTRY 1 0 1 1 +SULTAN 2 0 2 2 +SULPHURIC 1 0 1 1 +SUITS 1 0 1 1 +SUITED 1 0 1 1 +SUITABLE 2 0 2 2 +SUICIDE 1 0 1 1 +SUGGESTED 2 0 2 2 +SUGAR 9 0 9 9 +SUFFICIENTLY 2 0 2 2 +SUFFICIENT 3 0 3 3 +SUFFICES 1 0 1 1 +SUFFERINGS 1 0 1 1 +SUFFERING 2 0 2 2 +SUFFERED 3 0 3 3 +SUFFER 1 0 1 1 +SUE 1 0 1 1 +SUDDENLY 8 0 8 8 +SUDDEN 8 0 8 8 +SUCKED 1 0 1 1 +SUCK 1 0 1 1 +SUCH 44 0 44 44 +SUCCUMBED 1 0 1 1 +SUCCESSIVELY 1 0 1 1 +SUCCESSIVE 1 0 1 1 +SUCCESSFULLY 1 0 1 1 +SUCCESSES 2 0 2 2 +SUCCESS 2 0 2 2 +SUCCEEDING 1 0 1 1 +SUCCEEDED 3 0 3 3 +SUCCEED 1 0 1 1 +SUBURB 1 0 1 1 +SUBSTITUTING 1 0 1 1 +SUBSTANCES 1 0 1 1 +SUBSTANCE 3 0 3 3 +SUBSISTENCE 1 0 1 1 +SUBSIDED 1 0 1 1 +SUBSEQUENT 1 0 1 1 +SUBORDINATED 1 0 1 1 +SUBMITTED 2 0 2 2 +SUBMISSIVE 1 0 1 1 +SUBMISSION 1 0 1 1 +SUBJECTS 6 0 6 6 +SUBJECTED 3 0 3 3 +SUBDUED 2 0 2 2 +SUB 1 0 1 1 +STYLED 1 0 1 1 +STYLE 1 0 1 1 +STURDY 1 0 1 1 +STUPID 2 0 2 2 +STUMBLED 1 0 1 1 +STUFFS 1 0 1 1 +STUFF 1 0 1 1 +STUDYING 1 0 1 1 +STUDY 1 0 1 1 +STUDENTS 1 0 1 1 +STUDENT 2 0 2 2 +STUCK 2 0 2 2 +STRUGGLING 1 0 1 1 +STRUGGLES 1 0 1 1 +STRUGGLE 1 0 1 1 +STRUCTURE 1 0 1 1 +STRUCK 13 0 13 13 +STROVE 1 0 1 1 +STRONGLY 1 0 1 1 +STRONGER 1 0 1 1 +STRONG 12 0 12 12 +STROKE 2 0 2 2 +STRIPPED 2 0 2 2 +STRIKING 1 0 1 1 +STRIFE 6 0 6 6 +STRIDES 1 0 1 1 +STRICTLY 1 0 1 1 +STREWN 1 0 1 1 +STRETCHING 1 0 1 1 +STRETCHER 1 0 1 1 +STRETCH 1 0 1 1 +STRENUOUSLY 1 0 1 1 +STRENGTHENED 2 0 2 2 +STRENGTH 12 0 12 12 +STREETS 2 0 2 2 +STREET 15 0 15 15 +STREAK 2 0 2 2 +STRAYING 1 0 1 1 +STRAW 1 0 1 1 +STRATAGEM 1 0 1 1 +STRANGER 3 0 3 3 +STRANGELY 1 0 1 1 +STRAITS 1 0 1 1 +STRAITENED 1 0 1 1 +STRAINING 1 0 1 1 +STRAIN 1 0 1 1 +STRAIGHTWAY 1 0 1 1 +STRAIGHTFORWARD 1 0 1 1 +STRAIGHT 5 0 5 5 +STOVE 1 0 1 1 +STOUT 1 0 1 1 +STORY 9 0 9 9 +STORMED 1 0 1 1 +STORM 1 0 1 1 +STOREHOUSES 1 0 1 1 +STORED 1 0 1 1 +STOPPING 3 0 3 3 +STOPPED 5 0 5 5 +STOP 5 0 5 5 +STOOL 2 0 2 2 +STOOD 22 0 22 22 +STONED 1 0 1 1 +STONE 11 0 11 11 +STOMACH 3 0 3 3 +STOLE 3 0 3 3 +STOCK 3 0 3 3 +STIRRING 1 0 1 1 +STIRRED 1 0 1 1 +STIR 1 0 1 1 +STILLNESS 3 0 3 3 +STILL 41 0 41 41 +STILE 1 0 1 1 +STIFLING 1 0 1 1 +STIFLED 2 0 2 2 +STIFLE 3 0 3 3 +STIFFNESS 1 0 1 1 +STIFF 2 0 2 2 +STICK 5 0 5 5 +STEWPAN 2 0 2 2 +STEWART 1 0 1 1 +STEWARDS 1 0 1 1 +STEWARD 1 0 1 1 +STEPHEN 1 0 1 1 +STEPAN 2 0 2 2 +STEP 3 0 3 3 +STENOGRAPHIC 1 0 1 1 +STEMS 1 0 1 1 +STEERING 1 0 1 1 +STEERAGE 2 0 2 2 +STEEP 1 0 1 1 +STEEL 1 0 1 1 +STEED 1 0 1 1 +STEAMED 1 0 1 1 +STEAMBOAT 2 0 2 2 +STEALTHILY 1 0 1 1 +STEAL 1 0 1 1 +STEADY 3 0 3 3 +STEADILY 1 0 1 1 +STATUS 1 0 1 1 +STATURE 1 0 1 1 +STATUES 3 0 3 3 +STATUE 2 0 2 2 +STATIONED 2 0 2 2 +STATION 2 0 2 2 +STATESMAN 1 0 1 1 +STATEMENT 3 0 3 3 +STATED 3 0 3 3 +STAT 1 0 1 1 +STARVING 2 0 2 2 +STARVE 1 0 1 1 +STARTLING 1 0 1 1 +STARTLED 1 0 1 1 +STARTING 1 0 1 1 +STARTED 10 0 10 10 +START 3 0 3 3 +STARK 1 0 1 1 +STARES 1 0 1 1 +STARED 4 0 4 4 +STARCHY 1 0 1 1 +STANLEY 2 0 2 2 +STANDSTILL 1 0 1 1 +STANDS 3 0 3 3 +STANDPOINT 1 0 1 1 +STANDING 10 0 10 10 +STAND 7 0 7 7 +STAMPED 1 0 1 1 +STAMMERED 1 0 1 1 +STAMMER 1 0 1 1 +STAKES 1 0 1 1 +STAKED 1 0 1 1 +STAIRCASE 1 0 1 1 +STAGE 5 0 5 5 +STABLE 1 0 1 1 +SQUIRE 3 0 3 3 +SQUEEZE 1 0 1 1 +SQUEAMISH 3 0 3 3 +SQUEAKS 1 0 1 1 +SQUATTED 1 0 1 1 +SQUALL 1 0 1 1 +SQUALID 1 0 1 1 +SQUAD 2 0 2 2 +SPYING 1 0 1 1 +SPY 1 0 1 1 +SPRINKLES 1 0 1 1 +SPRINGS 1 0 1 1 +SPRING 4 0 4 4 +SPRIG 1 0 1 1 +SPREADS 1 0 1 1 +SPREAD 4 0 4 4 +SPRANG 3 0 3 3 +SPOTTED 1 0 1 1 +SPOT 6 0 6 6 +SPORT 2 0 2 2 +SPOON 1 0 1 1 +SPOKEN 2 0 2 2 +SPOKE 15 0 15 15 +SPOILS 1 0 1 1 +SPLIT 2 0 2 2 +SPITEFUL 1 0 1 1 +SPITE 10 0 10 10 +SPIT 1 0 1 1 +SPIRITUAL 1 0 1 1 +SPIRITS 2 0 2 2 +SPIRIT 6 0 6 6 +SPIRAL 1 0 1 1 +SPINSTER 1 0 1 1 +SPIDER 1 0 1 1 +SPHERE 1 0 1 1 +SPENT 3 0 3 3 +SPELL 1 0 1 1 +SPEEDILY 1 0 1 1 +SPEED 1 0 1 1 +SPEECH 5 0 5 5 +SPECULATED 1 0 1 1 +SPECTATORS 1 0 1 1 +SPECTACLE 1 0 1 1 +SPECIES 1 0 1 1 +SPECIALTY 1 0 1 1 +SPECIAL 3 0 3 3 +SPEAKS 1 0 1 1 +SPEAKING 7 0 7 7 +SPEAK 15 0 15 15 +SPAWN 1 0 1 1 +SPASM 1 0 1 1 +SPARROWS 1 0 1 1 +SPARK 1 0 1 1 +SPARING 1 0 1 1 +SPARED 2 0 2 2 +SPARE 1 0 1 1 +SPANKER 1 0 1 1 +SPANISH 1 0 1 1 +SPADES 1 0 1 1 +SPACE 2 0 2 2 +SOUTHERN 3 0 3 3 +SOUTH 1 0 1 1 +SOUS 1 0 1 1 +SOURCE 2 0 2 2 +SOUP 1 0 1 1 +SOUNDS 2 0 2 2 +SOUNDLY 2 0 2 2 +SOUNDED 3 0 3 3 +SOUND 12 0 12 12 +SORTS 4 0 4 4 +SORT 12 0 12 12 +SORRY 3 0 3 3 +SORROWING 1 0 1 1 +SORROW 1 0 1 1 +SORELY 1 0 1 1 +SORE 1 0 1 1 +SORDID 1 0 1 1 +SORCERER 1 0 1 1 +SOOTH 1 0 1 1 +SOONER 4 0 4 4 +SOON 22 0 22 22 +SONS 1 0 1 1 +SOMEWHAT 5 0 5 5 +SOMETIMES 14 0 14 14 +SOMETHING'S 1 0 1 1 +SOMETHING 29 0 29 29 +SOMEHOW 3 0 3 3 +SOMEBODY 3 0 3 3 +SOMBER 1 0 1 1 +SOLVE 1 0 1 1 +SOLUTION 4 0 4 4 +SOLUBLE 2 0 2 2 +SOLOMON 1 0 1 1 +SOLIDS 1 0 1 1 +SOLIDLY 1 0 1 1 +SOLID 1 0 1 1 +SOLICITUDE 1 0 1 1 +SOLEMNLY 1 0 1 1 +SOLEMNITY 1 0 1 1 +SOLEMN 1 0 1 1 +SOLDIERS 3 0 3 3 +SOLDIER 1 0 1 1 +SOLD 4 0 4 4 +SOLACE 1 0 1 1 +SOJOURN 2 0 2 2 +SOIL 2 0 2 2 +SOFTLY 2 0 2 2 +SOFT 5 0 5 5 +SOFA 2 0 2 2 +SODA 1 0 1 1 +SOCIETY 1 0 1 1 +SOCIETIES 1 0 1 1 +SOCIALIST 2 0 2 2 +SOCIAL 12 0 12 12 +SOBERLY 1 0 1 1 +SOBER 4 0 4 4 +SOARING 1 0 1 1 +SOAK 1 0 1 1 +SNOOZING 1 0 1 1 +SNETKOV 2 0 2 2 +SNEEZE 2 0 2 2 +SNEERED 1 0 1 1 +SNEAKY 1 0 1 1 +SNATCHER 2 0 2 2 +SNATCH 1 0 1 1 +SNAKE 1 0 1 1 +SMUGGLERS 7 0 7 7 +SMUGGLED 1 0 1 1 +SMOULDERING 1 0 1 1 +SMOTE 2 0 2 2 +SMOKING 3 0 3 3 +SMOKESTACKS 1 0 1 1 +SMOKERS 3 0 3 3 +SMOKER 3 0 3 3 +SMOKED 2 0 2 2 +SMOKE 2 0 2 2 +SMITH 1 0 1 1 +SMIRCHED 1 0 1 1 +SMILING 2 0 2 2 +SMILED 1 0 1 1 +SMILE 9 0 9 9 +SMELT 1 0 1 1 +SMELL 1 0 1 1 +SMART 1 0 1 1 +SMALLEST 1 0 1 1 +SMALLER 1 0 1 1 +SMALL 17 0 17 17 +SLUNK 1 0 1 1 +SLUMBER 2 0 2 2 +SLOWLY 6 0 6 6 +SLOW 3 0 3 3 +SLIPPING 1 0 1 1 +SLIPPER 1 0 1 1 +SLIP 3 0 3 3 +SLING 1 0 1 1 +SLIGHTLY 2 0 2 2 +SLIGHT 1 0 1 1 +SLICES 2 0 2 2 +SLEPT 3 0 3 3 +SLENDER 2 0 2 2 +SLEEVES 1 0 1 1 +SLEEPY 2 0 2 2 +SLEEPS 2 0 2 2 +SLEEPING 6 0 6 6 +SLEEPER 1 0 1 1 +SLEEP 15 0 15 15 +SLEDGE 1 0 1 1 +SLAYING 1 0 1 1 +SLAY 1 0 1 1 +SLAVES 2 0 2 2 +SLAVERY 1 0 1 1 +SLAVE 3 0 3 3 +SLAPPED 1 0 1 1 +SLAMMED 1 0 1 1 +SLAIN 2 0 2 2 +SLACKENED 1 0 1 1 +SKYLIGHT 2 0 2 2 +SKYLARKS 1 0 1 1 +SKY 3 0 3 3 +SKULLS 1 0 1 1 +SKULL 1 0 1 1 +SKIRTS 1 0 1 1 +SKIRT 1 0 1 1 +SKIRMISH 1 0 1 1 +SKIMMING 1 0 1 1 +SKILLED 1 0 1 1 +SKILFULLY 1 0 1 1 +SKIES 1 0 1 1 +SKETCH 1 0 1 1 +SKEPTICAL 1 0 1 1 +SIZE 5 0 5 5 +SIXTY 7 0 7 7 +SIXTH 5 0 5 5 +SIXTEEN 2 0 2 2 +SIX 17 0 17 17 +SITUATION 1 0 1 1 +SITTING 3 0 3 3 +SITTETH 1 0 1 1 +SIT 5 0 5 5 +SISTERS 4 0 4 4 +SISTERLY 1 0 1 1 +SISTER 8 0 8 8 +SINNED 1 0 1 1 +SINKS 1 0 1 1 +SINGULAR 2 0 2 2 +SINGS 3 0 3 3 +SINGLE 8 0 8 8 +SINGER 1 0 1 1 +SINGED 1 0 1 1 +SINGA 2 0 2 2 +SINFUL 1 0 1 1 +SINCERITY 1 0 1 1 +SINCERE 1 0 1 1 +SINCE 17 0 17 17 +SIMULATES 1 0 1 1 +SIMPLY 3 0 3 3 +SIMPLE 4 0 4 4 +SIMON 1 0 1 1 +SIMMERING 1 0 1 1 +SIMILAR 2 0 2 2 +SILVERWARE 1 0 1 1 +SILVER 7 0 7 7 +SILL 1 0 1 1 +SILK 1 0 1 1 +SILENTLY 2 0 2 2 +SILENT 9 0 9 9 +SILENCED 1 0 1 1 +SILENCE 9 0 9 9 +SILAS 1 0 1 1 +SIGURD 9 0 9 9 +SIGNS 2 0 2 2 +SIGNING 1 0 1 1 +SIGNIFIES 1 0 1 1 +SIGNIFIED 1 0 1 1 +SIGNIFICANT 2 0 2 2 +SIGNIFICANCE 2 0 2 2 +SIGNATURE 1 0 1 1 +SIGNALS 2 0 2 2 +SIGNAL 7 0 7 7 +SIGN 4 0 4 4 +SIGHT 8 0 8 8 +SIGHED 1 0 1 1 +SIGH 5 0 5 5 +SIFTED 1 0 1 1 +SIEVE 1 0 1 1 +SIDEWAYS 1 0 1 1 +SIDEWALK 1 0 1 1 +SIDES 4 0 4 4 +SIDE 22 0 22 22 +SICUT 1 0 1 1 +SICK 2 0 2 2 +SHUTTING 1 0 1 1 +SHUTTER 1 0 1 1 +SHUMAN 1 0 1 1 +SHUFFLE 1 0 1 1 +SHUDDER 1 0 1 1 +SHUCKS 1 0 1 1 +SHRUNK 1 0 1 1 +SHRUBBERY 1 0 1 1 +SHROUDED 1 0 1 1 +SHRINKING 1 0 1 1 +SHRILL 1 0 1 1 +SHRIEKING 1 0 1 1 +SHRIEKED 1 0 1 1 +SHOWS 2 0 2 2 +SHOWN 4 0 4 4 +SHOWING 7 0 7 7 +SHOWED 9 0 9 9 +SHOW 16 0 16 16 +SHOUTS 2 0 2 2 +SHOUTING 4 0 4 4 +SHOUTED 4 0 4 4 +SHOULDST 3 0 3 3 +SHOULDN'T 1 0 1 1 +SHOULDER 2 0 2 2 +SHOT 10 0 10 10 +SHORTLY 5 0 5 5 +SHORTER 1 0 1 1 +SHOPS 1 0 1 1 +SHOPPY 1 0 1 1 +SHOPPING 1 0 1 1 +SHOPKEEPERS 1 0 1 1 +SHOOTER 1 0 1 1 +SHOOT 6 0 6 6 +SHOOK 5 0 5 5 +SHONE 2 0 2 2 +SHOES 5 0 5 5 +SHOCKED 2 0 2 2 +SHIRTS 1 0 1 1 +SHIRT 1 0 1 1 +SHIRKING 1 0 1 1 +SHIMMERING 1 0 1 1 +SHIFTY 1 0 1 1 +SHIFTED 2 0 2 2 +SHERRY 3 0 3 3 +SHERIFF 4 0 4 4 +SHERBURN'S 1 0 1 1 +SHEPHERD 1 0 1 1 +SHELLS 4 0 4 4 +SHELF 1 0 1 1 +SHEILA 1 0 1 1 +SHEET 2 0 2 2 +SHED 5 0 5 5 +SHEATH 1 0 1 1 +SHAWL 1 0 1 1 +SHATTERED 1 0 1 1 +SHARPNESS 1 0 1 1 +SHARPLY 4 0 4 4 +SHARPENED 1 0 1 1 +SHARP 5 0 5 5 +SHARE 2 0 2 2 +SHAPIA 1 0 1 1 +SHAPES 1 0 1 1 +SHAPED 1 0 1 1 +SHAPE 3 0 3 3 +SHAME 2 0 2 2 +SHAM 1 0 1 1 +SHALT 7 0 7 7 +SHAKING 1 0 1 1 +SHAKEDOWN 1 0 1 1 +SHAKE 2 0 2 2 +SHAHRAZAD 3 0 3 3 +SHAFTS 1 0 1 1 +SHADOWS 1 0 1 1 +SHADOW 5 0 5 5 +SEX 1 0 1 1 +SEVERELY 1 0 1 1 +SEVERE 2 0 2 2 +SEVERAL 10 0 10 10 +SEVENTY 7 0 7 7 +SEVENTIETH 1 0 1 1 +SEVENTEEN 4 0 4 4 +SETTLED 4 0 4 4 +SETTLE 2 0 2 2 +SERVING 1 0 1 1 +SERVICES 1 0 1 1 +SERVES 1 0 1 1 +SERVED 3 0 3 3 +SERVE 7 0 7 7 +SERVANTS 4 0 4 4 +SERVANT 4 0 4 4 +SERPENTS 2 0 2 2 +SERPENT 1 0 1 1 +SERMON 1 0 1 1 +SERIOUSLY 3 0 3 3 +SERIOUS 5 0 5 5 +SERGEY 5 0 5 5 +SERENITY 1 0 1 1 +SEPULTURE 1 0 1 1 +SEPULCHRE 1 0 1 1 +SEPARATION 3 0 3 3 +SEPARATING 1 0 1 1 +SEPARATED 3 0 3 3 +SEPARATE 2 0 2 2 +SENTINELS 2 0 2 2 +SENTIMENTAL 1 0 1 1 +SENTIMENT 1 0 1 1 +SENTENCE 2 0 2 2 +SENT 10 0 10 10 +SENSITIVE 2 0 2 2 +SENSIBLY 1 0 1 1 +SENSES 2 0 2 2 +SENSELESS 2 0 2 2 +SENSE 9 0 9 9 +SENSATION 1 0 1 1 +SENOR 1 0 1 1 +SEND 4 0 4 4 +SENATOR 1 0 1 1 +SELL 4 0 4 4 +SELF 6 0 6 6 +SEIZED 3 0 3 3 +SEEST 2 0 2 2 +SEES 1 0 1 1 +SEEN 32 0 32 32 +SEEMS 13 0 13 13 +SEEMLY 1 0 1 1 +SEEMING 1 0 1 1 +SEEKING 1 0 1 1 +SEEKEST 1 0 1 1 +SEDUCETH 1 0 1 1 +SECURITY 7 0 7 7 +SECURE 5 0 5 5 +SECRETS 3 0 3 3 +SECRETLY 1 0 1 1 +SECRETARY 2 0 2 2 +SECRET 3 0 3 3 +SECONDS 1 0 1 1 +SECOND 15 0 15 15 +SEATED 5 0 5 5 +SEAT 1 0 1 1 +SEASONS 1 0 1 1 +SEASONED 1 0 1 1 +SEAS 1 0 1 1 +SEARCHINGLY 1 0 1 1 +SEARCHING 1 0 1 1 +SEARCHES 1 0 1 1 +SEARCHED 2 0 2 2 +SEARCH 6 0 6 6 +SEAMEN 1 0 1 1 +SEAMAN 5 0 5 5 +SEALED 2 0 2 2 +SEA 15 0 15 15 +SCUTTLING 1 0 1 1 +SCUM 1 0 1 1 +SCULPTURE 1 0 1 1 +SCULPTOR 3 0 3 3 +SCRUPULOUSLY 1 0 1 1 +SCRIBES 2 0 2 2 +SCREW 1 0 1 1 +SCREEN 1 0 1 1 +SCREAM 1 0 1 1 +SCRATCHING 1 0 1 1 +SCRATCH 1 0 1 1 +SCRAPING 1 0 1 1 +SCRAPE 1 0 1 1 +SCOUNDREL 2 0 2 2 +SCOTCH 2 0 2 2 +SCORN 3 0 3 3 +SCISSORS 5 0 5 5 +SCIENTIFICALLY 1 0 1 1 +SCIENTIFIC 1 0 1 1 +SCIENCE 1 0 1 1 +SCHOOLMATE 1 0 1 1 +SCHOOLMASTER 5 0 5 5 +SCHOLARS 1 0 1 1 +SCHEME 1 0 1 1 +SCENES 2 0 2 2 +SCENE 6 0 6 6 +SCATTER 1 0 1 1 +SCARRED 1 0 1 1 +SCARLET 1 0 1 1 +SCARED 1 0 1 1 +SCARCELY 4 0 4 4 +SCARCE 1 0 1 1 +SCAPEGRACES 1 0 1 1 +SCANNING 1 0 1 1 +SCALES 1 0 1 1 +SAYS 28 0 28 28 +SAY 83 0 83 83 +SAXON 2 0 2 2 +SAWYER 3 0 3 3 +SAW 29 0 29 29 +SAVE 7 0 7 7 +SAVAGES 1 0 1 1 +SAVAGERY 1 0 1 1 +SAUCER 1 0 1 1 +SATURDAY 1 0 1 1 +SATURATED 1 0 1 1 +SATISFY 3 0 3 3 +SATISFIED 3 0 3 3 +SATISFACTORY 2 0 2 2 +SATISFACTION 6 0 6 6 +SATIATED 1 0 1 1 +SATANICAL 1 0 1 1 +SATAN 1 0 1 1 +SAT 9 0 9 9 +SARAH 3 0 3 3 +SANITARY 2 0 2 2 +SAND 3 0 3 3 +SANCTUARY 1 0 1 1 +SANCHO 9 0 9 9 +SAN 5 0 5 5 +SAMUEL 1 0 1 1 +SAME 22 0 22 22 +SALUTED 2 0 2 2 +SALTS 1 0 1 1 +SALT 2 0 2 2 +SALON 1 0 1 1 +SALLOWER 1 0 1 1 +SAKE 7 0 7 7 +SAITH 1 0 1 1 +SAINTS 3 0 3 3 +SAILS 1 0 1 1 +SAILOR 2 0 2 2 +SAILING 2 0 2 2 +SAILED 1 0 1 1 +SAFETY 2 0 2 2 +SAFELY 2 0 2 2 +SADLY 4 0 4 4 +SAD 2 0 2 2 +SACRIFICES 3 0 3 3 +SACRIFICE 5 0 5 5 +SACRED 1 0 1 1 +SACRAMENT 1 0 1 1 +SACK 1 0 1 1 +RUSTLING 2 0 2 2 +RUSTLE 1 0 1 1 +RUSSIAN 3 0 3 3 +RUSSIA 3 0 3 3 +RUSHED 4 0 4 4 +RUNS 1 0 1 1 +RUNNING 3 0 3 3 +RUNG 2 0 2 2 +RUMBLING 1 0 1 1 +RULES 2 0 2 2 +RULE 5 0 5 5 +RUINS 1 0 1 1 +RUINING 1 0 1 1 +RUINED 1 0 1 1 +RUFFIAN 1 0 1 1 +RUBENSES 1 0 1 1 +RUBBERS 1 0 1 1 +ROYAL 7 0 7 7 +ROWED 1 0 1 1 +ROW 2 0 2 2 +ROVER 1 0 1 1 +ROUTE 1 0 1 1 +ROUSED 3 0 3 3 +ROUSE 1 0 1 1 +ROUNDED 1 0 1 1 +ROUGH 3 0 3 3 +ROT 1 0 1 1 +ROSY 2 0 2 2 +ROSEMARY 2 0 2 2 +ROSA 1 0 1 1 +ROPE 3 0 3 3 +ROOTS 2 0 2 2 +ROOMS 2 0 2 2 +ROOMFELLOW 2 0 2 2 +ROOFS 1 0 1 1 +ROOF 2 0 2 2 +RONALD 1 0 1 1 +ROMANCE 3 0 3 3 +ROLLED 1 0 1 1 +RODE 6 0 6 6 +ROCKS 1 0 1 1 +ROCKET 1 0 1 1 +ROBERT 1 0 1 1 +ROBED 1 0 1 1 +ROBBING 1 0 1 1 +ROBBERY 5 0 5 5 +ROBBERS 3 0 3 3 +ROBBERIES 2 0 2 2 +ROBBED 2 0 2 2 +ROASTING 1 0 1 1 +ROASTED 1 0 1 1 +ROAST 1 0 1 1 +ROARING 1 0 1 1 +ROARED 1 0 1 1 +ROADSIDE 1 0 1 1 +RIVERS 1 0 1 1 +RIVER 11 0 11 11 +RIVALRY 1 0 1 1 +RIVAL 2 0 2 2 +RISK 3 0 3 3 +RISING 5 0 5 5 +RISEN 1 0 1 1 +RISE 3 0 3 3 +RIP 2 0 2 2 +RINDS 1 0 1 1 +RIGOR 1 0 1 1 +RIGHTEOUSNESS 1 0 1 1 +RIGHTEOUS 1 0 1 1 +RIGHT 33 0 33 33 +RIDICULOUS 1 0 1 1 +RIDE 5 0 5 5 +RIDDEN 1 0 1 1 +RICHLY 1 0 1 1 +RICHER 1 0 1 1 +RICHARD 5 0 5 5 +RICH 7 0 7 7 +RICE 1 0 1 1 +RHODES 1 0 1 1 +RHEUMATISM 1 0 1 1 +REWARDS 1 0 1 1 +REVOLUTIONARIES 1 0 1 1 +REVOLUTION 3 0 3 3 +REVIVE 1 0 1 1 +REVIEW 1 0 1 1 +REVERSES 1 0 1 1 +REVENGES 1 0 1 1 +REVENGE 1 0 1 1 +REVELLED 1 0 1 1 +REVELATION 1 0 1 1 +REVEL 1 0 1 1 +REVEALED 1 0 1 1 +RETURNING 6 0 6 6 +RETURNED 19 0 19 19 +RETURN 18 0 18 18 +RETREAT 1 0 1 1 +RETARDED 1 0 1 1 +RETAINED 2 0 2 2 +RESUMED 2 0 2 2 +RESULT 2 0 2 2 +RESTS 1 0 1 1 +RESTRAIN 2 0 2 2 +RESTORETH 1 0 1 1 +RESTORED 2 0 2 2 +RESTIVE 1 0 1 1 +RESTAURANTS 1 0 1 1 +RESTAURANT 3 0 3 3 +RESPONSIBLE 5 0 5 5 +RESPONSIBILITY 2 0 2 2 +RESPONDED 2 0 2 2 +RESPECTS 1 0 1 1 +RESPECTIVE 2 0 2 2 +RESPECTING 1 0 1 1 +RESPECTFULLY 3 0 3 3 +RESPECTFUL 1 0 1 1 +RESPECTED 1 0 1 1 +RESPECTABLE 3 0 3 3 +RESPECT 4 0 4 4 +RESORTS 1 0 1 1 +RESORTED 1 0 1 1 +RESORT 1 0 1 1 +RESOLVING 1 0 1 1 +RESOLVED 1 0 1 1 +RESOLVE 1 0 1 1 +RESOLUTIONS 1 0 1 1 +RESOLUTION 2 0 2 2 +RESISTING 2 0 2 2 +RESISTANCE 3 0 3 3 +RESIST 3 0 3 3 +RESIGNED 1 0 1 1 +RESIDUE 3 0 3 3 +RESIDENCE 2 0 2 2 +RESIDE 1 0 1 1 +RESERVOIR 1 0 1 1 +RESERVE 1 0 1 1 +RESEMBLING 1 0 1 1 +RESEMBLES 1 0 1 1 +RESEMBLE 1 0 1 1 +RESEARCHES 1 0 1 1 +REQUIRING 1 0 1 1 +REQUIRES 1 0 1 1 +REQUIRED 3 0 3 3 +REQUIRE 4 0 4 4 +REQUESTED 2 0 2 2 +REQUEST 4 0 4 4 +REPUTATIONS 1 0 1 1 +REPUTATION 3 0 3 3 +REPROACH 2 0 2 2 +REPRESENTED 3 0 3 3 +REPRESENTATIVE 2 0 2 2 +REPORTED 1 0 1 1 +REPORT 2 0 2 2 +REPLY 2 0 2 2 +REPETITION 1 0 1 1 +REPEATING 1 0 1 1 +REPEAT 1 0 1 1 +REPAST 1 0 1 1 +REPARATION 1 0 1 1 +REPAIRED 2 0 2 2 +REPAIR 1 0 1 1 +RENOUNCE 3 0 3 3 +RENEWED 2 0 2 2 +RENDERS 1 0 1 1 +RENDERED 1 0 1 1 +RENDER 1 0 1 1 +REMOVED 3 0 3 3 +REMOVE 3 0 3 3 +REMOVAL 1 0 1 1 +REMOTE 1 0 1 1 +REMORSEFUL 1 0 1 1 +REMONSTRANCE 1 0 1 1 +REMNANTS 1 0 1 1 +REMNANT 1 0 1 1 +REMINISCENCES 1 0 1 1 +REMEMBERING 2 0 2 2 +REMEMBEREST 1 0 1 1 +REMEMBERED 4 0 4 4 +REMEMBER 13 0 13 13 +REMEDY 4 0 4 4 +REMARKS 1 0 1 1 +REMARKED 10 0 10 10 +REMARKABLY 1 0 1 1 +REMARKABLE 2 0 2 2 +REMARK 6 0 6 6 +REMAINS 2 0 2 2 +REMAINED 8 0 8 8 +REMAIN 5 0 5 5 +RELYING 1 0 1 1 +RELUCTANTLY 2 0 2 2 +RELUCTANCE 1 0 1 1 +RELINQUISH 1 0 1 1 +RELIGIOUS 2 0 2 2 +RELIGIONS 1 0 1 1 +RELIGION 11 0 11 11 +RELIEVED 1 0 1 1 +RELIEF 6 0 6 6 +RELIED 1 0 1 1 +RELIC 1 0 1 1 +RELEVANT 1 0 1 1 +RELEASED 3 0 3 3 +RELEASE 2 0 2 2 +RELAXING 1 0 1 1 +RELATIONS 2 0 2 2 +RELATION 2 0 2 2 +RELATED 4 0 4 4 +REJOINED 1 0 1 1 +REJOINDER 1 0 1 1 +REJOICING 1 0 1 1 +REJOICED 3 0 3 3 +REJECT 1 0 1 1 +REIGNED 1 0 1 1 +REGULATION 1 0 1 1 +REGULAR 1 0 1 1 +REGRETTING 1 0 1 1 +REGISTER 1 0 1 1 +REGION 1 0 1 1 +REGIMENTS 2 0 2 2 +REGARDING 3 0 3 3 +REGARDED 2 0 2 2 +REGARD 2 0 2 2 +REGAINED 1 0 1 1 +REGAIN 2 0 2 2 +REFUTATION 1 0 1 1 +REFUSING 2 0 2 2 +REFUSES 2 0 2 2 +REFUSED 1 0 1 1 +REFUSE 2 0 2 2 +REFUGE 3 0 3 3 +REFRESHMENT 1 0 1 1 +REFRAIN 2 0 2 2 +REFORMS 1 0 1 1 +REFORMERS 2 0 2 2 +REFORM 6 0 6 6 +REFLECTIVE 1 0 1 1 +REFLECTIONS 1 0 1 1 +REFLECTION 3 0 3 3 +REFINED 2 0 2 2 +REFERRED 2 0 2 2 +REFERENCE 3 0 3 3 +REFER 1 0 1 1 +REELECTION 1 0 1 1 +REEF 1 0 1 1 +REDUCED 2 0 2 2 +REDOUBLING 1 0 1 1 +REDEMPTION 1 0 1 1 +REDEEMING 1 0 1 1 +RED 10 0 10 10 +RECTUM 1 0 1 1 +RECTOR 1 0 1 1 +RECTIFY 1 0 1 1 +RECRUITS 1 0 1 1 +RECOVERY 1 0 1 1 +RECOVERED 1 0 1 1 +RECOVER 3 0 3 3 +RECOURSE 1 0 1 1 +RECOUNTED 1 0 1 1 +RECORDS 1 0 1 1 +RECORD 2 0 2 2 +RECOMPENSE 2 0 2 2 +RECOMMEND 2 0 2 2 +RECOLLECTING 1 0 1 1 +RECOLLECTED 1 0 1 1 +RECOLLECT 1 0 1 1 +RECOILED 1 0 1 1 +RECOGNIZED 5 0 5 5 +RECOGNITION 2 0 2 2 +RECKON 4 0 4 4 +RECKLESS 2 0 2 2 +RECITING 1 0 1 1 +RECITER 2 0 2 2 +RECITED 3 0 3 3 +RECITE 2 0 2 2 +RECIPE 2 0 2 2 +RECEPTION 1 0 1 1 +RECENTLY 1 0 1 1 +RECEIVED 9 0 9 9 +RECEIVE 4 0 4 4 +RECEIPT 1 0 1 1 +RECEDING 1 0 1 1 +RECEDED 1 0 1 1 +RECAPTURED 1 0 1 1 +RECALLING 1 0 1 1 +RECALLED 1 0 1 1 +RECALL 1 0 1 1 +REBECCA 1 0 1 1 +REASONABLE 2 0 2 2 +REASON 11 0 11 11 +REAR 1 0 1 1 +REAPING 1 0 1 1 +REAP 1 0 1 1 +REALM 1 0 1 1 +REALLY 18 0 18 18 +REALITY 3 0 3 3 +REAL 3 0 3 3 +READERS 1 0 1 1 +READER 1 0 1 1 +READ 8 0 8 8 +REACHING 2 0 2 2 +REACHED 12 0 12 12 +REACH 4 0 4 4 +RAWNESS 1 0 1 1 +RAVING 1 0 1 1 +RAVENING 1 0 1 1 +RAVAGED 1 0 1 1 +RATTLING 3 0 3 3 +RATTLED 1 0 1 1 +RATTLE 1 0 1 1 +RATHER 16 0 16 16 +RATE 2 0 2 2 +RASHID 1 0 1 1 +RASCALS 1 0 1 1 +RASCAL 3 0 3 3 +RARE 1 0 1 1 +RAPSCALLIONS 1 0 1 1 +RAPPERS 1 0 1 1 +RAPIDLY 4 0 4 4 +RAP 1 0 1 1 +RANKS 1 0 1 1 +RANG 2 0 2 2 +RAMBLER 1 0 1 1 +RAMBLE 1 0 1 1 +RAISING 2 0 2 2 +RAISED 6 0 6 6 +RAISE 6 0 6 6 +RAINY 2 0 2 2 +RAINS 1 0 1 1 +RAINBOWS 1 0 1 1 +RAIN 3 0 3 3 +RAILROAD 1 0 1 1 +RAIDERS 1 0 1 1 +RAID 1 0 1 1 +RAGE 2 0 2 2 +RAFTER 1 0 1 1 +RAFT 6 0 6 6 +RADICALS 1 0 1 1 +RADIANT 1 0 1 1 +RACKETS 1 0 1 1 +RACKETEERS 1 0 1 1 +RABBITS 1 0 1 1 +RABBIT 5 0 5 5 +R 1 0 1 1 +QUOTH 5 0 5 5 +QUIXOTE 5 0 5 5 +QUIVERED 1 0 1 1 +QUIVER 1 0 1 1 +QUIT 1 0 1 1 +QUIETLY 4 0 4 4 +QUIET 1 0 1 1 +QUICKLY 5 0 5 5 +QUICKER 3 0 3 3 +QUICKENETH 1 0 1 1 +QUICK 5 0 5 5 +QUESTIONS 6 0 6 6 +QUESTIONED 1 0 1 1 +QUESTIONABLE 1 0 1 1 +QUESTION 15 0 15 15 +QUENCH 1 0 1 1 +QUEER 4 0 4 4 +QUEENS 1 0 1 1 +QUEEN'S 1 0 1 1 +QUEEN 5 0 5 5 +QUARTERS 3 0 3 3 +QUARTER 7 0 7 7 +QUART 1 0 1 1 +QUARRELS 1 0 1 1 +QUANTITY 3 0 3 3 +QUALITY 1 0 1 1 +QUACKS 2 0 2 2 +PYM 1 0 1 1 +PUZZLED 2 0 2 2 +PUTTING 1 0 1 1 +PUTS 3 0 3 3 +PUT 40 0 40 40 +PUSHING 1 0 1 1 +PUSHED 1 0 1 1 +PURSUIT 1 0 1 1 +PURSUED 3 0 3 3 +PURSUANCE 1 0 1 1 +PURPOSES 1 0 1 1 +PURPOSE 5 0 5 5 +PURPORTING 1 0 1 1 +PURITAN 2 0 2 2 +PURIFY 1 0 1 1 +PURE 4 0 4 4 +PURCHASED 1 0 1 1 +PUNISHMENTS 1 0 1 1 +PUNISHMENT 1 0 1 1 +PUNISHES 1 0 1 1 +PUNISHED 1 0 1 1 +PUNISH 1 0 1 1 +PUNCTUALITY 1 0 1 1 +PUNCTILIOUS 1 0 1 1 +PUMP 1 0 1 1 +PULSE 1 0 1 1 +PULP 1 0 1 1 +PULLED 4 0 4 4 +PULL 1 0 1 1 +PUFFING 1 0 1 1 +PUFFED 1 0 1 1 +PUDDINGS 1 0 1 1 +PUBLISHER 1 0 1 1 +PUBLIC 5 0 5 5 +PRYING 2 0 2 2 +PRUDENT 1 0 1 1 +PRUDENCE 4 0 4 4 +PROVOKE 1 0 1 1 +PROVOCATOR 1 0 1 1 +PROVISION 1 0 1 1 +PROVINCIAL 1 0 1 1 +PROVINCE 4 0 4 4 +PROVIDENCES 1 0 1 1 +PROVIDENCE 1 0 1 1 +PROVIDED 2 0 2 2 +PROVIDE 1 0 1 1 +PROVERBIAL 1 0 1 1 +PROVED 2 0 2 2 +PROUD 2 0 2 2 +PROTESTED 2 0 2 2 +PROTECTS 1 0 1 1 +PROTECTORS 1 0 1 1 +PROTECTOR 1 0 1 1 +PROTECTION 2 0 2 2 +PROTECT 2 0 2 2 +PROSPEROUS 1 0 1 1 +PROPRIETORS 1 0 1 1 +PROPOSITION 1 0 1 1 +PROPOSES 1 0 1 1 +PROPOSED 3 0 3 3 +PROPOSALS 1 0 1 1 +PROPORTION 3 0 3 3 +PROPHET 1 0 1 1 +PROPERTY 2 0 2 2 +PROPERLY 2 0 2 2 +PROPER 2 0 2 2 +PROOF 5 0 5 5 +PRONOUNCED 1 0 1 1 +PROMPTLY 3 0 3 3 +PROMPT 1 0 1 1 +PROMOTIVE 1 0 1 1 +PROMISING 1 0 1 1 +PROMISED 7 0 7 7 +PROMISE 4 0 4 4 +PROLONGED 1 0 1 1 +PROJECT 1 0 1 1 +PROHIBITION 1 0 1 1 +PROHIBITED 1 0 1 1 +PROHIBIT 1 0 1 1 +PROGRESS 1 0 1 1 +PROGRAMME 1 0 1 1 +PROFUSION 1 0 1 1 +PROFOUND 1 0 1 1 +PROFLIGATE 1 0 1 1 +PROFITABLY 1 0 1 1 +PROFITABLE 1 0 1 1 +PROFIT 2 0 2 2 +PROFESSIONAL 2 0 2 2 +PROFESSION 4 0 4 4 +PROFANITY 1 0 1 1 +PROFANE 1 0 1 1 +PRODUCTIONS 1 0 1 1 +PRODUCING 1 0 1 1 +PRODUCES 1 0 1 1 +PRODUCED 7 0 7 7 +PRODUCE 6 0 6 6 +PROCURE 2 0 2 2 +PROCOPIUS 1 0 1 1 +PROCESSIONS 1 0 1 1 +PROCESSION 1 0 1 1 +PROCESS 6 0 6 6 +PROCEEDINGS 2 0 2 2 +PROCEEDED 1 0 1 1 +PROCEED 2 0 2 2 +PROCEDURE 1 0 1 1 +PROBLEMS 1 0 1 1 +PROBLEM 1 0 1 1 +PROBABLY 7 0 7 7 +PROBABLE 1 0 1 1 +PROBABILITY 1 0 1 1 +PRIVILEGE 1 0 1 1 +PRIVATE 6 0 6 6 +PRIVACY 1 0 1 1 +PRISON 5 0 5 5 +PRIORESS 7 0 7 7 +PRINT 1 0 1 1 +PRINCIPLE 3 0 3 3 +PRINCIPALS 1 0 1 1 +PRINCIPALLY 1 0 1 1 +PRINCIPAL 1 0 1 1 +PRINCESS 11 0 11 11 +PRINCES 1 0 1 1 +PRINCE'S 2 0 2 2 +PRINCE 7 0 7 7 +PRIME 3 0 3 3 +PRIDE 2 0 2 2 +PRICE 2 0 2 2 +PREVIOUSLY 1 0 1 1 +PREVENTED 1 0 1 1 +PREVAILING 1 0 1 1 +PREVAILED 1 0 1 1 +PRETTY 8 0 8 8 +PRETTILY 1 0 1 1 +PRETTIEST 1 0 1 1 +PRETEXT 1 0 1 1 +PRETENDED 1 0 1 1 +PRETEND 3 0 3 3 +PRESUMPTUOUS 1 0 1 1 +PRESSURE 4 0 4 4 +PRESSING 3 0 3 3 +PRESSED 2 0 2 2 +PRESS 2 0 2 2 +PRESERVING 5 0 5 5 +PRESERVES 3 0 3 3 +PRESERVED 2 0 2 2 +PRESERVE 2 0 2 2 +PRESENTS 4 0 4 4 +PRESENTLY 12 0 12 12 +PRESENTING 1 0 1 1 +PRESENTED 3 0 3 3 +PRESENCE 9 0 9 9 +PREPARING 3 0 3 3 +PREPARED 7 0 7 7 +PREPARE 1 0 1 1 +PREPARATIONS 5 0 5 5 +PREOCCUPIED 1 0 1 1 +PREMISES 1 0 1 1 +PRELIMINARIES 1 0 1 1 +PREFERRED 1 0 1 1 +PREFER 2 0 2 2 +PREDICTIONS 1 0 1 1 +PRECIPITANCY 1 0 1 1 +PRECIOUS 2 0 2 2 +PRECINCT 1 0 1 1 +PRECAUTION 1 0 1 1 +PREACHING 2 0 2 2 +PREACHER 1 0 1 1 +PREACHED 1 0 1 1 +PRAYERS 3 0 3 3 +PRAYER 6 0 6 6 +PRAYED 3 0 3 3 +PRAY 5 0 5 5 +PRAM 1 0 1 1 +PRAISEWORTHY 1 0 1 1 +PRAISES 1 0 1 1 +PRAISED 2 0 2 2 +PRAISE 3 0 3 3 +PRACTITIONER 1 0 1 1 +PRACTISE 1 0 1 1 +PRACTICE 1 0 1 1 +PRACTICALLY 2 0 2 2 +POWERS 3 0 3 3 +POWERLESS 1 0 1 1 +POWERFUL 3 0 3 3 +POWER 27 0 27 27 +POWDERED 1 0 1 1 +POVERTY 3 0 3 3 +POURS 1 0 1 1 +POURING 1 0 1 1 +POURED 3 0 3 3 +POUR 1 0 1 1 +POUNCE 1 0 1 1 +POTS 1 0 1 1 +POTASSIUM 1 0 1 1 +POTASSIC 1 0 1 1 +POTASH 1 0 1 1 +POSTERS 1 0 1 1 +POSTERN 1 0 1 1 +POSSIBLY 3 0 3 3 +POSSIBLE 12 0 12 12 +POSSIBILITY 2 0 2 2 +POSSESSION 2 0 2 2 +POSSESSES 1 0 1 1 +POSSESSED 5 0 5 5 +POSSESS 1 0 1 1 +POSSE 1 0 1 1 +POSITIVELY 3 0 3 3 +POSITIVE 1 0 1 1 +POSITION 7 0 7 7 +PORTO 1 0 1 1 +PORTMANTEAU 1 0 1 1 +PORTIONS 2 0 2 2 +PORTION 2 0 2 2 +PORTER 2 0 2 2 +PORTENTOUS 1 0 1 1 +PORT 1 0 1 1 +PORED 1 0 1 1 +POPULATION 1 0 1 1 +POPULARITY 1 0 1 1 +POPULAR 1 0 1 1 +POPPED 1 0 1 1 +POPES 2 0 2 2 +POPE'S 1 0 1 1 +POPE 1 0 1 1 +POP 1 0 1 1 +PONY 1 0 1 1 +POLYTECHNIC 1 0 1 1 +POLONIUS 1 0 1 1 +POLO 1 0 1 1 +POLLY'S 3 0 3 3 +POLLY 19 0 19 19 +POLITICIANS 1 0 1 1 +POLITICAL 3 0 3 3 +POLICE 5 0 5 5 +POKING 1 0 1 1 +POKED 1 0 1 1 +POISONS 1 0 1 1 +POISONING 3 0 3 3 +POINTING 2 0 2 2 +POINTED 3 0 3 3 +POINT 10 0 10 10 +POETRY 2 0 2 2 +POCKETS 1 0 1 1 +POCKETED 1 0 1 1 +POCKET 8 0 8 8 +PLUNGED 3 0 3 3 +PLUNDERED 1 0 1 1 +PLUG 1 0 1 1 +PLUCKING 2 0 2 2 +PLUCK 1 0 1 1 +PLOT 1 0 1 1 +PLEDGED 1 0 1 1 +PLEDGE 1 0 1 1 +PLEASURES 2 0 2 2 +PLEASURE 8 0 8 8 +PLEASING 2 0 2 2 +PLEASED 4 0 4 4 +PLEASE 15 0 15 15 +PLEASANTER 1 0 1 1 +PLEADINGS 1 0 1 1 +PLEADED 3 0 3 3 +PLEAD 1 0 1 1 +PLAYING 1 0 1 1 +PLAYERS 1 0 1 1 +PLAYED 5 0 5 5 +PLAY 4 0 4 4 +PLATTERS 1 0 1 1 +PLATTERBAFF'S 1 0 1 1 +PLATFORM 2 0 2 2 +PLATED 1 0 1 1 +PLATE 2 0 2 2 +PLASTER 1 0 1 1 +PLANTS 1 0 1 1 +PLANTED 1 0 1 1 +PLANTATIONS 2 0 2 2 +PLANS 5 0 5 5 +PLANNED 1 0 1 1 +PLANKS 2 0 2 2 +PLANK 3 0 3 3 +PLANETS 1 0 1 1 +PLANET 2 0 2 2 +PLAN 1 0 1 1 +PLAINLY 3 0 3 3 +PLAIN 5 0 5 5 +PLAGUE 2 0 2 2 +PLACING 2 0 2 2 +PLACES 3 0 3 3 +PLACED 10 0 10 10 +PITY 4 0 4 4 +PITIFULNESS 1 0 1 1 +PITCHER 1 0 1 1 +PIT 1 0 1 1 +PISTOLS 1 0 1 1 +PISTOLES 1 0 1 1 +PISTOL 5 0 5 5 +PIPING 1 0 1 1 +PIPE 4 0 4 4 +PIOUS 1 0 1 1 +PINT 1 0 1 1 +PINKUS 1 0 1 1 +PINKERTON'S 1 0 1 1 +PINK 1 0 1 1 +PINED 1 0 1 1 +PINCHED 2 0 2 2 +PINCH 1 0 1 1 +PIN 1 0 1 1 +PILED 1 0 1 1 +PILE 1 0 1 1 +PIERO 1 0 1 1 +PIERCED 1 0 1 1 +PIECES 9 0 9 9 +PIECE 4 0 4 4 +PICTURES 3 0 3 3 +PICTURE 5 0 5 5 +PICKET 1 0 1 1 +PICK 5 0 5 5 +PIAZZA 1 0 1 1 +PHYSIOLOGICAL 1 0 1 1 +PHYSICIAN 3 0 3 3 +PHYSICAL 2 0 2 2 +PHUT'S 1 0 1 1 +PHRASE 1 0 1 1 +PHONE 1 0 1 1 +PHLEGMATIC 1 0 1 1 +PHILOSOPHERS 1 0 1 1 +PHILIP 3 0 3 3 +PHARMACY 1 0 1 1 +PEYTON 1 0 1 1 +PETITIONS 1 0 1 1 +PETERS 1 0 1 1 +PETER'S 2 0 2 2 +PETER 15 0 15 15 +PET 2 0 2 2 +PERVADED 1 0 1 1 +PERUSING 1 0 1 1 +PERUSAL 1 0 1 1 +PERSUADED 1 0 1 1 +PERSUADE 1 0 1 1 +PERSPIRED 1 0 1 1 +PERSPECTIVE 1 0 1 1 +PERSONS 8 0 8 8 +PERSONALLY 5 0 5 5 +PERSONAL 2 0 2 2 +PERSONAGE 1 0 1 1 +PERSON'S 1 0 1 1 +PERSON 16 0 16 16 +PERSISTED 3 0 3 3 +PERSIST 1 0 1 1 +PERSEUS 2 0 2 2 +PERSECUTORS 1 0 1 1 +PERSECUTION 1 0 1 1 +PERSECUTED 1 0 1 1 +PERSECUTE 2 0 2 2 +PERPLEXITY 1 0 1 1 +PERPETUALLY 1 0 1 1 +PERMITTED 5 0 5 5 +PERMIT 4 0 4 4 +PERMISSION 2 0 2 2 +PERMANENT 1 0 1 1 +PERISHED 3 0 3 3 +PERISH 4 0 4 4 +PERIODS 1 0 1 1 +PERIOD 2 0 2 2 +PERILS 1 0 1 1 +PERHAPS 17 0 17 17 +PERFORMANCES 1 0 1 1 +PERFORM 2 0 2 2 +PERFECTLY 5 0 5 5 +PERFECTION 2 0 2 2 +PERFECT 5 0 5 5 +PERCHED 1 0 1 1 +PERCH 1 0 1 1 +PERCEPTIBLE 1 0 1 1 +PERCEIVED 7 0 7 7 +PERCEIVE 1 0 1 1 +PERAMBULATOR'S 1 0 1 1 +PER 1 0 1 1 +PEPPINO 1 0 1 1 +PEPPER 2 0 2 2 +PEOPLE'S 3 0 3 3 +PEOPLE 44 0 44 44 +PENNY 1 0 1 1 +PENETRATING 1 0 1 1 +PENETRATE 1 0 1 1 +PENALTY 1 0 1 1 +PELT 1 0 1 1 +PEKAHIAH 1 0 1 1 +PEERS 1 0 1 1 +PEERED 1 0 1 1 +PEDESTAL 1 0 1 1 +PECULIAR 2 0 2 2 +PEBBLES 3 0 3 3 +PEASANT 3 0 3 3 +PEARLS 1 0 1 1 +PEAL 1 0 1 1 +PEABODY 1 0 1 1 +PAYS 1 0 1 1 +PAYING 1 0 1 1 +PAY 11 0 11 11 +PAWNBROKER 2 0 2 2 +PAVILION 1 0 1 1 +PAVEMENT 1 0 1 1 +PAUSED 3 0 3 3 +PAUSE 4 0 4 4 +PATRIOTS 1 0 1 1 +PATRIOTISM 1 0 1 1 +PATRIOT 1 0 1 1 +PATRIMONY 1 0 1 1 +PATRIARCHS 1 0 1 1 +PATIENTLY 1 0 1 1 +PATIENT'S 1 0 1 1 +PATIENT 2 0 2 2 +PATHS 1 0 1 1 +PASSION 5 0 5 5 +PASSING 3 0 3 3 +PASSES 1 0 1 1 +PASSERS 2 0 2 2 +PASSENGERS 4 0 4 4 +PASSAGE 2 0 2 2 +PASSABLE 1 0 1 1 +PARTY 13 0 13 13 +PARTNER 1 0 1 1 +PARTISANS 1 0 1 1 +PARTINGS 1 0 1 1 +PARTING 2 0 2 2 +PARTIES 1 0 1 1 +PARTICULARS 1 0 1 1 +PARTICULARLY 5 0 5 5 +PARTICULAR 4 0 4 4 +PARTICLE 2 0 2 2 +PARTICIPATION 1 0 1 1 +PARTICIPANTS 1 0 1 1 +PARTAKE 1 0 1 1 +PARSLEY 1 0 1 1 +PARRICIDES 1 0 1 1 +PARLIAMENTARY 1 0 1 1 +PARK 1 0 1 1 +PARISIAN 1 0 1 1 +PARISH 1 0 1 1 +PARIS 5 0 5 5 +PARENTS 2 0 2 2 +PARDONABLE 1 0 1 1 +PARDON 10 0 10 10 +PARCEL 2 0 2 2 +PARASOL 1 0 1 1 +PARALLEL 1 0 1 1 +PARADISE 1 0 1 1 +PAPERS 4 0 4 4 +PAPER 6 0 6 6 +PAPA 4 0 4 4 +PANZA 1 0 1 1 +PANTING 2 0 2 2 +PANS 1 0 1 1 +PANNIERS 1 0 1 1 +PAN 3 0 3 3 +PALLIATE 1 0 1 1 +PALINGS 1 0 1 1 +PALESTINE 2 0 2 2 +PALER 1 0 1 1 +PALE 8 0 8 8 +PALACE 6 0 6 6 +PAIR 6 0 6 6 +PAINTING 1 0 1 1 +PAINTER 1 0 1 1 +PAINT 1 0 1 1 +PAINFULLY 1 0 1 1 +PAINFUL 5 0 5 5 +PAINED 1 0 1 1 +PAIN 3 0 3 3 +PAID 9 0 9 9 +PAGES 4 0 4 4 +PAGE 2 0 2 2 +PADDLE 2 0 2 2 +PACKED 1 0 1 1 +PACIFY 1 0 1 1 +PACIFIC 2 0 2 2 +PACES 2 0 2 2 +OXEN 1 0 1 1 +OX 1 0 1 1 +OWNED 2 0 2 2 +OWN 45 0 45 45 +OWING 1 0 1 1 +OWE 2 0 2 2 +OVERWHELMED 2 0 2 2 +OVERTURNING 1 0 1 1 +OVERTHREW 1 0 1 1 +OVERTAKEN 1 0 1 1 +OVERLY 1 0 1 1 +OVERHEARD 3 0 3 3 +OVERHEAD 2 0 2 2 +OVERCOME 2 0 2 2 +OVERCOAT 1 0 1 1 +OVEN 2 0 2 2 +OVAL 1 0 1 1 +OUTWARD 1 0 1 1 +OUTSIDE 8 0 8 8 +OUTLINES 1 0 1 1 +OUTLINE 1 0 1 1 +OUTLAWS 1 0 1 1 +OUTFIT 1 0 1 1 +OURSELVES 4 0 4 4 +OUNCES 3 0 3 3 +OTTO 1 0 1 1 +OTTER 1 0 1 1 +OTHO 1 0 1 1 +OTHERWISE 1 0 1 1 +OTHERS 17 0 17 17 +OTHER'S 2 0 2 2 +OSTRICH 1 0 1 1 +ORTHODOX 1 0 1 1 +ORPHAN 1 0 1 1 +ORNERY 1 0 1 1 +ORNERIEST 1 0 1 1 +ORNAMENTED 1 0 1 1 +ORNAMENTAL 1 0 1 1 +ORLEANS 1 0 1 1 +ORISON 1 0 1 1 +ORIGIN 1 0 1 1 +ORGANIZATION 3 0 3 3 +ORFICER 1 0 1 1 +ORDINARY 1 0 1 1 +ORDERS 8 0 8 8 +ORDERED 5 0 5 5 +ORDER 20 0 20 20 +ORDEAL 1 0 1 1 +ORDAINED 1 0 1 1 +ORCHARDS 1 0 1 1 +ORBIS 1 0 1 1 +ORANGE 1 0 1 1 +ORACLE 2 0 2 2 +OPTIC 1 0 1 1 +OPPRESSORS 1 0 1 1 +OPPRESSOR 1 0 1 1 +OPPRESSION 3 0 3 3 +OPPRESSED 2 0 2 2 +OPPOSITION 2 0 2 2 +OPPOSITE 3 0 3 3 +OPPOSE 1 0 1 1 +OPPORTUNITY 4 0 4 4 +OPERATIONS 1 0 1 1 +OPERATED 1 0 1 1 +OPERA 1 0 1 1 +OPENS 3 0 3 3 +OPENING 7 0 7 7 +OPAQUE 1 0 1 1 +ONWARDS 1 0 1 1 +ONWARD 1 0 1 1 +ONLY 73 0 73 73 +ONION 2 0 2 2 +ONESELF 1 0 1 1 +ONES 4 0 4 4 +ONCE 56 0 56 56 +OMITTING 1 0 1 1 +OMAR 2 0 2 2 +OLDISH 1 0 1 1 +OLDEST 1 0 1 1 +OLDER 1 0 1 1 +OIL 3 0 3 3 +OGRE'S 1 0 1 1 +OGRE 3 0 3 3 +OGLING 1 0 1 1 +OFTEN 21 0 21 21 +OFFICIAL 1 0 1 1 +OFFICES 1 0 1 1 +OFFICE 4 0 4 4 +OFFERS 3 0 3 3 +OFFERINGS 1 0 1 1 +OFFERING 1 0 1 1 +OFFERED 3 0 3 3 +OFFER 2 0 2 2 +OFFENSIVE 1 0 1 1 +OFFEND 2 0 2 2 +OCEAN 1 0 1 1 +OCCURRED 5 0 5 5 +OCCUR 1 0 1 1 +OCCUPY 3 0 3 3 +OCCUPIED 2 0 2 2 +OCCASIONS 2 0 2 2 +OCCASION 5 0 5 5 +OBVIOUSLY 1 0 1 1 +OBVIOUS 1 0 1 1 +OBTAINED 1 0 1 1 +OBTAIN 3 0 3 3 +OBSTRUCTION 1 0 1 1 +OBSTINATE 1 0 1 1 +OBSTINACY 1 0 1 1 +OBSERVER 1 0 1 1 +OBSERVED 5 0 5 5 +OBSERVE 1 0 1 1 +OBSERVATIONS 2 0 2 2 +OBSERVATION 1 0 1 1 +OBSERVANT 1 0 1 1 +OBLONG 1 0 1 1 +OBLIGED 3 0 3 3 +OBLIGE 1 0 1 1 +OBLIGATION 3 0 3 3 +OBJECTS 2 0 2 2 +OBJECTION 2 0 2 2 +OBEYING 1 0 1 1 +OBEYED 3 0 3 3 +OBEY 1 0 1 1 +OBEDIENTLY 1 0 1 1 +OBEDIENT 2 0 2 2 +OATH 4 0 4 4 +OAR 1 0 1 1 +O'CLOCK 9 0 9 9 +NUZHAT 4 0 4 4 +NUN 1 0 1 1 +NUMBERED 1 0 1 1 +NUMBER 5 0 5 5 +NUISANCE 1 0 1 1 +NUBIAN 1 0 1 1 +NOWHERE 2 0 2 2 +NOTWITHSTANDING 1 0 1 1 +NOTTINGHAM 1 0 1 1 +NOTORIOUS 1 0 1 1 +NOTION 1 0 1 1 +NOTICED 3 0 3 3 +NOTICE 7 0 7 7 +NOTHING 28 0 28 28 +NOTES 1 0 1 1 +NOTED 2 0 2 2 +NOTE 3 0 3 3 +NOTABLES 1 0 1 1 +NOSE 3 0 3 3 +NORTHWARD 1 0 1 1 +NORTHFIELD 2 0 2 2 +NORTHERN 1 0 1 1 +NORMAN 2 0 2 2 +NOR 21 0 21 21 +NOPE 1 0 1 1 +NOON 1 0 1 1 +NONSENSE 2 0 2 2 +NODDING 1 0 1 1 +NODDED 3 0 3 3 +NOD 2 0 2 2 +NOBODY 6 0 6 6 +NOBLEMEN 1 0 1 1 +NOBLEMAN 1 0 1 1 +NOBLE 6 0 6 6 +NOBILITY 1 0 1 1 +NINTH 1 0 1 1 +NINEVEH 1 0 1 1 +NINETY 2 0 2 2 +NINETEENTH 1 0 1 1 +NINETEEN 1 0 1 1 +NINE 6 0 6 6 +NIMBLENESS 1 0 1 1 +NIKOLAY 1 0 1 1 +NIGHT'S 1 0 1 1 +NIGH 1 0 1 1 +NICOTINE 1 0 1 1 +NICKEL 1 0 1 1 +NICK 1 0 1 1 +NICHOLAS 1 0 1 1 +NICETIES 1 0 1 1 +NICE 3 0 3 3 +NEWS 7 0 7 7 +NEVER 61 0 61 61 +NERVOUSNESS 2 0 2 2 +NERVOUSLY 1 0 1 1 +NERVOUS 2 0 2 2 +NERVES 1 0 1 1 +NEMESIS 3 0 3 3 +NEITHER 8 0 8 8 +NEIGHBOURS 1 0 1 1 +NEIGHBOURING 1 0 1 1 +NEIGHBORS 5 0 5 5 +NEIGHBORING 2 0 2 2 +NEGRO 2 0 2 2 +NEGLECTING 1 0 1 1 +NEGLECTED 2 0 2 2 +NEGLECT 1 0 1 1 +NEGATIVE 1 0 1 1 +NEEDN'T 1 0 1 1 +NEEDED 9 0 9 9 +NED 3 0 3 3 +NECK 1 0 1 1 +NECESSITY 7 0 7 7 +NECESSARY 10 0 10 10 +NECESSARILY 2 0 2 2 +NEATLY 2 0 2 2 +NEAT 3 0 3 3 +NEARLY 2 0 2 2 +NEARING 1 0 1 1 +NEARED 1 0 1 1 +NAY 2 0 2 2 +NAVEL 1 0 1 1 +NAUSEA 1 0 1 1 +NATURED 2 0 2 2 +NATURE 11 0 11 11 +NATTY 1 0 1 1 +NATIVE 6 0 6 6 +NATIONS 3 0 3 3 +NATION 2 0 2 2 +NARROWNESS 1 0 1 1 +NARROWER 1 0 1 1 +NARROW 5 0 5 5 +NARRATOR 1 0 1 1 +NARRATIVES 2 0 2 2 +NARRATIVE 1 0 1 1 +NARRATE 1 0 1 1 +NARCOTIC 1 0 1 1 +NAPKINS 1 0 1 1 +NAPKIN 1 0 1 1 +NAMES 5 0 5 5 +NAMED 5 0 5 5 +NAME'S 1 0 1 1 +NAME 21 0 21 21 +NAILS 2 0 2 2 +NAILED 2 0 2 2 +NAIL 5 0 5 5 +MYSELF 27 0 27 27 +MY 248 0 248 248 +MUTTERED 5 0 5 5 +MUST 77 0 77 77 +MUSKETS 1 0 1 1 +MUSICIANS 1 0 1 1 +MUSICAL 1 0 1 1 +MUSIC 2 0 2 2 +MURMURED 1 0 1 1 +MURMUR 1 0 1 1 +MURDERED 1 0 1 1 +MURDER 5 0 5 5 +MULTITUDE 1 0 1 1 +MULE 1 0 1 1 +MUFFLED 1 0 1 1 +MUDDY 1 0 1 1 +MUD 1 0 1 1 +MUCOUS 1 0 1 1 +MOVING 6 0 6 6 +MOVEMENTS 2 0 2 2 +MOVED 7 0 7 7 +MOVE 1 0 1 1 +MOUSE 1 0 1 1 +MOURNING 3 0 3 3 +MOURNFULLY 1 0 1 1 +MOUNTED 2 0 2 2 +MOUNTAINS 1 0 1 1 +MOUNTAIN 1 0 1 1 +MOUNT 1 0 1 1 +MOTOR 1 0 1 1 +MOTLEY 1 0 1 1 +MOTIVES 1 0 1 1 +MOTIVE 1 0 1 1 +MOTHERS 5 0 5 5 +MOTHER'S 3 0 3 3 +MORTIS 1 0 1 1 +MORTEM 1 0 1 1 +MORTAR 1 0 1 1 +MORTAL 1 0 1 1 +MORPHINE 2 0 2 2 +MORNING 21 0 21 21 +MOREOVER 1 0 1 1 +MORCERF 3 0 3 3 +MORALS 1 0 1 1 +MORAL 8 0 8 8 +MOPPED 1 0 1 1 +MOORED 1 0 1 1 +MOONLIGHT 3 0 3 3 +MOONFLOWERS 1 0 1 1 +MOON 2 0 2 2 +MOOD 2 0 2 2 +MONTHS 6 0 6 6 +MONTHLY 1 0 1 1 +MONTH 1 0 1 1 +MONTESQUIEU 1 0 1 1 +MONSTROUS 1 0 1 1 +MONSTERS 2 0 2 2 +MONOTONOUS 1 0 1 1 +MONKEY 3 0 3 3 +MONEY 16 0 16 16 +MONDAY 2 0 2 2 +MONASTERY 1 0 1 1 +MONARCH 2 0 2 2 +MOMENTS 6 0 6 6 +MOMENT'S 1 0 1 1 +MOLESTED 1 0 1 1 +MOHAMMED 1 0 1 1 +MODEST 1 0 1 1 +MODERN 2 0 2 2 +MODERATE 2 0 2 2 +MODEL 2 0 2 2 +MOCKERY 1 0 1 1 +MOB 1 0 1 1 +MOANING 2 0 2 2 +MIXTURE 1 0 1 1 +MIXING 1 0 1 1 +MIXED 2 0 2 2 +MIX 3 0 3 3 +MISTRUST 1 0 1 1 +MISTRESSES 1 0 1 1 +MISTRESS 6 0 6 6 +MISTAKE 2 0 2 2 +MIST 2 0 2 2 +MISSOURI 1 0 1 1 +MISSISSIPPI 1 0 1 1 +MISSING 3 0 3 3 +MISSED 5 0 5 5 +MISFORTUNE 1 0 1 1 +MISERY 3 0 3 3 +MISERABLE 2 0 2 2 +MISCONDUCT 1 0 1 1 +MISCONCEPTION 1 0 1 1 +MISCHIEVOUS 2 0 2 2 +MISCHIEF 1 0 1 1 +MISAPPREHENSION 1 0 1 1 +MISANTHROPY 1 0 1 1 +MIRTH 2 0 2 2 +MIRACULOUS 1 0 1 1 +MIRACLES 3 0 3 3 +MIRABELLE 2 0 2 2 +MINUTES 11 0 11 11 +MINUTELY 1 0 1 1 +MINUTE 6 0 6 6 +MINOR 1 0 1 1 +MINNESOTA 1 0 1 1 +MINNEAPOLIS 1 0 1 1 +MINISTERS 1 0 1 1 +MINISTERED 1 0 1 1 +MINISTER 7 0 7 7 +MINIMS 1 0 1 1 +MINIATURE 1 0 1 1 +MINGLED 2 0 2 2 +MINDS 1 0 1 1 +MINDED 1 0 1 1 +MINCE 1 0 1 1 +MILTON 4 0 4 4 +MILLIONS 1 0 1 1 +MILLER'S 1 0 1 1 +MILLER 4 0 4 4 +MILL 1 0 1 1 +MILITARY 5 0 5 5 +MILES 6 0 6 6 +MILE 5 0 5 5 +MILDEWED 1 0 1 1 +MIKE'S 1 0 1 1 +MIKE 2 0 2 2 +MIGHTY 3 0 3 3 +MIGHTINESS 1 0 1 1 +MIGHT 43 0 43 43 +MIDST 3 0 3 3 +MIDSHIPMAN 1 0 1 1 +MIDNIGHT 3 0 3 3 +MIDDY'S 1 0 1 1 +MIDDLE 5 0 5 5 +MICROSCOPIC 1 0 1 1 +MICROBE 1 0 1 1 +METALLIC 1 0 1 1 +MESSES 1 0 1 1 +MESSAGE 1 0 1 1 +MESELF 1 0 1 1 +MERITS 1 0 1 1 +MERE 3 0 3 3 +MERCY 5 0 5 5 +MERCURY 2 0 2 2 +MERCIFUL 1 0 1 1 +MERCIES 1 0 1 1 +MERCHANTS 6 0 6 6 +MERCHANT 3 0 3 3 +MENTION 1 0 1 1 +MENTALLY 3 0 3 3 +MENTAL 3 0 3 3 +MEND 1 0 1 1 +MENACING 1 0 1 1 +MEMORY 4 0 4 4 +MEMORIAL 1 0 1 1 +MEMBRANE 1 0 1 1 +MEMBERS 7 0 7 7 +MEMBER 1 0 1 1 +MELTED 1 0 1 1 +MELANCHOLY 1 0 1 1 +MEETING 4 0 4 4 +MEET 9 0 9 9 +MEDIUMS 1 0 1 1 +MEDIUM 1 0 1 1 +MEDITATION 1 0 1 1 +MEDITATED 1 0 1 1 +MEDICINE 1 0 1 1 +MEDICAMENTS 1 0 1 1 +MEDICAL 2 0 2 2 +MEDDLE 1 0 1 1 +MEDALS 1 0 1 1 +MEDAL 1 0 1 1 +MECHANICALLY 1 0 1 1 +MECHANICAL 1 0 1 1 +MEAT 6 0 6 6 +MEASURABLE 1 0 1 1 +MEANWHILE 4 0 4 4 +MEANTIME 2 0 2 2 +MEANS 23 0 23 23 +MEANING 2 0 2 2 +MEAN 20 0 20 20 +MAYOR 1 0 1 1 +MAYBE 4 0 4 4 +MATTOCK 1 0 1 1 +MATTERED 1 0 1 1 +MATTER 22 0 22 22 +MATERIALS 2 0 2 2 +MATERIALLY 1 0 1 1 +MATERIAL 1 0 1 1 +MATE 2 0 2 2 +MATCH 1 0 1 1 +MASTERY 1 0 1 1 +MASTERS 1 0 1 1 +MASTERPIECE 1 0 1 1 +MASTERED 1 0 1 1 +MASTER'S 1 0 1 1 +MAST 1 0 1 1 +MASSES 1 0 1 1 +MASON'S 1 0 1 1 +MARY'S 2 0 2 2 +MARY 2 0 2 2 +MARTYR 1 0 1 1 +MARTIN 1 0 1 1 +MARSPORT 1 0 1 1 +MARSHAL'S 1 0 1 1 +MARSH 1 0 1 1 +MARS 3 0 3 3 +MARRY 3 0 3 3 +MARRIED 4 0 4 4 +MARMALADES 2 0 2 2 +MARLBOROUGH'S 1 0 1 1 +MARKS 1 0 1 1 +MARKING 1 0 1 1 +MARK 4 0 4 4 +MARJORIE 3 0 3 3 +MARIUS 6 0 6 6 +MARIA 1 0 1 1 +MARGUERITE 11 0 11 11 +MARGINAL 1 0 1 1 +MARGARET'S 3 0 3 3 +MARGARET 14 0 14 14 +MARE 1 0 1 1 +MARCH 2 0 2 2 +MANY 27 0 27 27 +MANTELPIECE 1 0 1 1 +MANNER 9 0 9 9 +MANIFESTATION 1 0 1 1 +MANCHESTER 1 0 1 1 +MANASSEH 1 0 1 1 +MANAGE 1 0 1 1 +MAMMOTH 1 0 1 1 +MAMMA 1 0 1 1 +MAMIE 1 0 1 1 +MALICE 1 0 1 1 +MALEVOLENT 1 0 1 1 +MALADY 1 0 1 1 +MAKER 1 0 1 1 +MAKAN 8 0 8 8 +MAJOR 5 0 5 5 +MAJESTY 2 0 2 2 +MAINTAINED 1 0 1 1 +MAINTAIN 1 0 1 1 +MAINLY 1 0 1 1 +MAIL 1 0 1 1 +MAIDEN 3 0 3 3 +MAID 3 0 3 3 +MAHOGANY 2 0 2 2 +MAGNIFYING 1 0 1 1 +MAGNIFIES 1 0 1 1 +MAGNIFICENT 2 0 2 2 +MAGNIFICENCE 1 0 1 1 +MAGNANIMITY 1 0 1 1 +MAGICIAN 2 0 2 2 +MAGICAL 1 0 1 1 +MAGIC 1 0 1 1 +MAGAZINE 1 0 1 1 +MADRID 1 0 1 1 +MADNESS 2 0 2 2 +MADELEINE 3 0 3 3 +MADAME 1 0 1 1 +MAD 2 0 2 2 +MACHINES 1 0 1 1 +MACHINERY 1 0 1 1 +LYNCHES 1 0 1 1 +LYING 2 0 2 2 +LUTHER 1 0 1 1 +LUSTILY 1 0 1 1 +LURKING 1 0 1 1 +LUMP 1 0 1 1 +LUKE 4 0 4 4 +LUGGAGE 1 0 1 1 +LUCRATIVE 1 0 1 1 +LUCKY 2 0 2 2 +LUCKLESS 1 0 1 1 +LUCK 3 0 3 3 +LUCIEN 2 0 2 2 +LUCID 1 0 1 1 +LOYALTY 2 0 2 2 +LOYAL 2 0 2 2 +LOWERING 3 0 3 3 +LOWERED 1 0 1 1 +LOW 13 0 13 13 +LOVES 6 0 6 6 +LOVERS 2 0 2 2 +LOVELY 3 0 3 3 +LOVED 7 0 7 7 +LOUVRE 1 0 1 1 +LOUISIANA 1 0 1 1 +LOUIS 3 0 3 3 +LOUDLY 1 0 1 1 +LOUDER 1 0 1 1 +LOUD 8 0 8 8 +LOST 12 0 12 12 +LOSSES 1 0 1 1 +LOSS 3 0 3 3 +LOSING 3 0 3 3 +LOSE 6 0 6 6 +LORN 1 0 1 1 +LORDS 2 0 2 2 +LOQUACITY 1 0 1 1 +LOOSENED 1 0 1 1 +LOOSELY 1 0 1 1 +LOOSE 3 0 3 3 +LOOKS 5 0 5 5 +LOOKING 21 0 21 21 +LONGING 3 0 3 3 +LONGER 16 0 16 16 +LONGED 2 0 2 2 +LONESOMENESS 1 0 1 1 +LONESOME 1 0 1 1 +LONELY 1 0 1 1 +LONDON 4 0 4 4 +LODGE 1 0 1 1 +LOCKED 4 0 4 4 +LOCATE 1 0 1 1 +LOCAL 4 0 4 4 +LOBSTERS 2 0 2 2 +LOBSTER 12 0 12 12 +LOADING 1 0 1 1 +LIVING 5 0 5 5 +LIVID 1 0 1 1 +LIVES 6 0 6 6 +LIVERY 1 0 1 1 +LIVELY 2 0 2 2 +LIVELONG 1 0 1 1 +LIVELIHOOD 1 0 1 1 +LIVED 6 0 6 6 +LITTER 1 0 1 1 +LITERATURE 1 0 1 1 +LITERALLY 1 0 1 1 +LIT 2 0 2 2 +LISTENING 5 0 5 5 +LISTENERS 1 0 1 1 +LISTENER 1 0 1 1 +LISTENED 4 0 4 4 +LIQUOR 4 0 4 4 +LIQUID 1 0 1 1 +LIPS 6 0 6 6 +LIP 3 0 3 3 +LIONS 1 0 1 1 +LION 1 0 1 1 +LINK 1 0 1 1 +LINGO 1 0 1 1 +LINGER 1 0 1 1 +LINES 2 0 2 2 +LINEN 2 0 2 2 +LINE 7 0 7 7 +LIMPED 4 0 4 4 +LIMP 3 0 3 3 +LIMITS 1 0 1 1 +LIMIT 1 0 1 1 +LIMES 1 0 1 1 +LIKING 1 0 1 1 +LIKES 1 0 1 1 +LIKED 9 0 9 9 +LIGHTS 1 0 1 1 +LIGHTLY 1 0 1 1 +LIGHTING 1 0 1 1 +LIGHTENED 1 0 1 1 +LIGHT 19 0 19 19 +LIGATURES 1 0 1 1 +LIFTING 1 0 1 1 +LIFTED 2 0 2 2 +LIFT 1 0 1 1 +LIFETIME 1 0 1 1 +LIEUTENANT 1 0 1 1 +LIES 4 0 4 4 +LIDDY 3 0 3 3 +LIBRARY 2 0 2 2 +LIBERTY 3 0 3 3 +LIBERATION 2 0 2 2 +LEWIS 1 0 1 1 +LEVITICUS 1 0 1 1 +LEVIN 6 0 6 6 +LEVELLED 1 0 1 1 +LEVEL 1 0 1 1 +LETTING 1 0 1 1 +LETTERS 7 0 7 7 +LETTER 22 0 22 22 +LETS 1 0 1 1 +LET'S 4 0 4 4 +LEST 3 0 3 3 +LESSONS 1 0 1 1 +LESSON 2 0 2 2 +LESSENS 1 0 1 1 +LESSEN 2 0 2 2 +LENT 1 0 1 1 +LENGTH 4 0 4 4 +LEND 2 0 2 2 +LEISURE 3 0 3 3 +LEGISLATURE 1 0 1 1 +LEGALLY 1 0 1 1 +LEGAL 2 0 2 2 +LEER 1 0 1 1 +LEECHES 1 0 1 1 +LED 4 0 4 4 +LEAVING 5 0 5 5 +LEAVES 1 0 1 1 +LEAVED 2 0 2 2 +LEAVE 22 0 22 22 +LEAST 15 0 15 15 +LEARNS 1 0 1 1 +LEARNING 4 0 4 4 +LEARNED 6 0 6 6 +LEARN 5 0 5 5 +LEAP 1 0 1 1 +LEANING 1 0 1 1 +LEAN 1 0 1 1 +LEAF 1 0 1 1 +LEADS 1 0 1 1 +LEADING 3 0 3 3 +LEADERSHIP 1 0 1 1 +LEADER 2 0 2 2 +LAZY 1 0 1 1 +LAZILY 1 0 1 1 +LAYING 3 0 3 3 +LAY 14 0 14 14 +LAWYER 1 0 1 1 +LAUGHTER 3 0 3 3 +LAUGHS 1 0 1 1 +LAUGHING 5 0 5 5 +LAUGH 9 0 9 9 +LATTER 2 0 2 2 +LATIN 1 0 1 1 +LATER 8 0 8 8 +LATELY 2 0 2 2 +LATE 9 0 9 9 +LASTLY 2 0 2 2 +LASTING 1 0 1 1 +LARKIN'S 1 0 1 1 +LARGESSE 1 0 1 1 +LAPSE 1 0 1 1 +LAP 2 0 2 2 +LANTERN 1 0 1 1 +LANGUAGE 2 0 2 2 +LANDSMAN 1 0 1 1 +LANDOWNER 4 0 4 4 +LANDLORD 1 0 1 1 +LANDING 1 0 1 1 +LANDI 1 0 1 1 +LANDED 3 0 3 3 +LAMPLIT 1 0 1 1 +LAMP 1 0 1 1 +LAME 1 0 1 1 +LAMBS 1 0 1 1 +LAID 9 0 9 9 +LADY'S 1 0 1 1 +LADY 15 0 15 15 +LADIES 6 0 6 6 +LACK 2 0 2 2 +LACE 2 0 2 2 +LABOURS 1 0 1 1 +LABOURERS 1 0 1 1 +LABORER 1 0 1 1 +KNUCKLES 1 0 1 1 +KNOWS 8 0 8 8 +KNOWING 3 0 3 3 +KNOWEST 3 0 3 3 +KNOWED 1 0 1 1 +KNOTTY 1 0 1 1 +KNOT 3 0 3 3 +KNOCKER 1 0 1 1 +KNOCK 1 0 1 1 +KNIGHTHOOD 1 0 1 1 +KNIGHT'S 1 0 1 1 +KNEW 16 0 16 16 +KNEES 3 0 3 3 +KNAVE 1 0 1 1 +KITTY 4 0 4 4 +KITCHEN 3 0 3 3 +KISSING 2 0 2 2 +KISSED 6 0 6 6 +KISS 2 0 2 2 +KINSFOLK 1 0 1 1 +KINGS 8 0 8 8 +KINGDOM 3 0 3 3 +KING'S 7 0 7 7 +KINDS 1 0 1 1 +KINDRED 1 0 1 1 +KINDLY 3 0 3 3 +KINDEST 1 0 1 1 +KILLING 1 0 1 1 +KILLED 4 0 4 4 +KILL 14 0 14 14 +KIDNEYS 1 0 1 1 +KICKED 1 0 1 1 +KICK 1 0 1 1 +KHORASAN 2 0 2 2 +KHAN 1 0 1 1 +KEYS 5 0 5 5 +KEYHOLE 1 0 1 1 +KEY 5 0 5 5 +KEPT 9 0 9 9 +KENT 2 0 2 2 +KENNETH 3 0 3 3 +KEEPING 5 0 5 5 +KEEPER'S 1 0 1 1 +KEEPER 2 0 2 2 +KEEP 16 0 16 16 +KEENLY 1 0 1 1 +KEEN 2 0 2 2 +KAZI 1 0 1 1 +KANSAS 7 0 7 7 +K 1 0 1 1 +JUSTLY 1 0 1 1 +JUSTINIAN 1 0 1 1 +JUSTIFIES 1 0 1 1 +JUSTIFICATION 3 0 3 3 +JURY 4 0 4 4 +JURISDICTION 1 0 1 1 +JUNIOR 1 0 1 1 +JUNE 1 0 1 1 +JUMPS 2 0 2 2 +JUMPING 2 0 2 2 +JUMPED 1 0 1 1 +JUMP 1 0 1 1 +JUICES 1 0 1 1 +JUICE 1 0 1 1 +JUGS 1 0 1 1 +JUG 4 0 4 4 +JUDICIAL 1 0 1 1 +JUDGED 1 0 1 1 +JUDAH 2 0 2 2 +JOYOUS 1 0 1 1 +JOYFUL 2 0 2 2 +JOYANCE 2 0 2 2 +JOY 7 0 7 7 +JOURNEYED 1 0 1 1 +JOURNEY 8 0 8 8 +JOURNALISM 1 0 1 1 +JOSEPH 1 0 1 1 +JONES 1 0 1 1 +JOLLY 1 0 1 1 +JOINTS 1 0 1 1 +JOINING 1 0 1 1 +JOINED 2 0 2 2 +JOIN 5 0 5 5 +JOCELYN 1 0 1 1 +JOBS 1 0 1 1 +JOB 7 0 7 7 +JOANNA'S 1 0 1 1 +JIM 5 0 5 5 +JEWISH 2 0 2 2 +JEWELRY 1 0 1 1 +JEW'S 1 0 1 1 +JEW 1 0 1 1 +JESUS 2 0 2 2 +JESTER 2 0 2 2 +JERRY 1 0 1 1 +JERK 1 0 1 1 +JERICHO 1 0 1 1 +JENKINS 2 0 2 2 +JEHU 1 0 1 1 +JEHOVAH 3 0 3 3 +JEHOASH 1 0 1 1 +JEERINGLY 1 0 1 1 +JEERED 1 0 1 1 +JEAN 10 0 10 10 +JANUARY 1 0 1 1 +JANSENIST 1 0 1 1 +JANGLING 1 0 1 1 +JANE'S 1 0 1 1 +JANE 7 0 7 7 +JAMS 2 0 2 2 +JAMIESON 1 0 1 1 +JAMES 3 0 3 3 +JAM 2 0 2 2 +JAIL 1 0 1 1 +JACKSON 4 0 4 4 +JACKMAN 1 0 1 1 +JACKET 1 0 1 1 +JACKAL 8 0 8 8 +JACK 3 0 3 3 +J 2 0 2 2 +IVANOVITCH'S 1 0 1 1 +IVANOVITCH 5 0 5 5 +ITSELF 7 0 7 7 +ITALY 1 0 1 1 +ISSUED 2 0 2 2 +ISSUE 2 0 2 2 +ISRAELITES 1 0 1 1 +ISRAEL'S 2 0 2 2 +ISRAEL 7 0 7 7 +ISOLATION 1 0 1 1 +ISLANDERS 1 0 1 1 +ISLAND 5 0 5 5 +ISLAMISED 1 0 1 1 +ISLAM 1 0 1 1 +ISAAC 2 0 2 2 +IRRITATION 1 0 1 1 +IRRITABILITY 1 0 1 1 +IRREVERENTLY 1 0 1 1 +IRREVERENCE 1 0 1 1 +IRRESISTIBLY 1 0 1 1 +IRRESISTIBLE 1 0 1 1 +IRON 7 0 7 7 +IRKSOME 1 0 1 1 +IRISH 1 0 1 1 +IRELAND 2 0 2 2 +IRATE 1 0 1 1 +IOWA 2 0 2 2 +INWARD 1 0 1 1 +INVOLVED 1 0 1 1 +INVOKE 1 0 1 1 +INVITED 1 0 1 1 +INVITATION 2 0 2 2 +INVISIBLE 1 0 1 1 +INVINCIBLE 1 0 1 1 +INVETERATE 1 0 1 1 +INVESTIGATION 2 0 2 2 +INVENTING 1 0 1 1 +INVALIDES 1 0 1 1 +INVADING 1 0 1 1 +INVADERS 1 0 1 1 +INVADED 1 0 1 1 +INVADE 1 0 1 1 +INTRODUCTION 1 0 1 1 +INTRODUCING 1 0 1 1 +INTRODUCES 1 0 1 1 +INTRODUCED 1 0 1 1 +INTOXICATED 2 0 2 2 +INTOLERABLE 1 0 1 1 +INTIMATES 1 0 1 1 +INTERVIEWS 1 0 1 1 +INTERVAL 3 0 3 3 +INTERRUPTED 1 0 1 1 +INTERRED 2 0 2 2 +INTERPRETATION 1 0 1 1 +INTERPOLATIONS 1 0 1 1 +INTERNATIONAL 1 0 1 1 +INTERNAL 3 0 3 3 +INTERMISSION 1 0 1 1 +INTERMENT 2 0 2 2 +INTERMEDDLING 1 0 1 1 +INTERFERENCE 1 0 1 1 +INTERFERE 1 0 1 1 +INTERESTING 6 0 6 6 +INTERESTED 3 0 3 3 +INTERCOURSE 1 0 1 1 +INTERCHANGE 1 0 1 1 +INTENSITY 1 0 1 1 +INTENSELY 2 0 2 2 +INTENDED 5 0 5 5 +INTEND 2 0 2 2 +INTELLIGENT 2 0 2 2 +INTELLIGENCE 2 0 2 2 +INTELLECT 1 0 1 1 +INSULTED 1 0 1 1 +INSUFFICIENT 1 0 1 1 +INSTRUMENTS 4 0 4 4 +INSTRUCTIONS 1 0 1 1 +INSTRUCTED 1 0 1 1 +INSTITUTIONS 1 0 1 1 +INSTITUTED 1 0 1 1 +INSTITUTE 1 0 1 1 +INSTINCTS 1 0 1 1 +INSTINCT 3 0 3 3 +INSTEAD 4 0 4 4 +INSTANTLY 1 0 1 1 +INSTANT'S 1 0 1 1 +INSTANCE 1 0 1 1 +INSPIRES 1 0 1 1 +INSPIRATION 3 0 3 3 +INSOLUBLE 1 0 1 1 +INSOLENT 1 0 1 1 +INSISTING 1 0 1 1 +INSISTENCE 1 0 1 1 +INSISTED 2 0 2 2 +INSINUATING 1 0 1 1 +INSIDE 4 0 4 4 +INSHALLAH 1 0 1 1 +INSECURITY 1 0 1 1 +INSCRIPTIONS 2 0 2 2 +INSANE 1 0 1 1 +INQUISITION 1 0 1 1 +INQUIRIES 1 0 1 1 +INQUIRED 5 0 5 5 +INQUIRE 2 0 2 2 +INNOCENT 3 0 3 3 +INNKEEPER 2 0 2 2 +INN 4 0 4 4 +INJURIES 1 0 1 1 +INJURED 1 0 1 1 +INIQUITIES 1 0 1 1 +INHERENT 1 0 1 1 +INHABITANTS 1 0 1 1 +INHABIT 1 0 1 1 +INGREDIENTS 1 0 1 1 +INFORMED 5 0 5 5 +INFORMATION 3 0 3 3 +INFORM 1 0 1 1 +INFLUENCES 1 0 1 1 +INFLUENCED 1 0 1 1 +INFLUENCE 10 0 10 10 +INFLICT 1 0 1 1 +INFLATE 1 0 1 1 +INFIRMITY 1 0 1 1 +INFIRM 1 0 1 1 +INFINITELY 1 0 1 1 +INFINITE 4 0 4 4 +INFERIOR 3 0 3 3 +INFERENTIALLY 1 0 1 1 +INFAMY 2 0 2 2 +INFAMOUS 1 0 1 1 +INEXORABLY 1 0 1 1 +INEVITABLE 1 0 1 1 +INELEGANTLY 1 0 1 1 +INDUSTRY 1 0 1 1 +INDUSTRIOUS 1 0 1 1 +INDUSTRIAL 1 0 1 1 +INDULGENT 1 0 1 1 +INDULGENCE 2 0 2 2 +INDUCED 1 0 1 1 +INDIVIDUALS 9 0 9 9 +INDIVIDUAL 1 0 1 1 +INDISPOSITION 1 0 1 1 +INDISCRETION 1 0 1 1 +INDIGNATION 1 0 1 1 +INDIFFERENT 2 0 2 2 +INDIFFERENCE 1 0 1 1 +INDICATIONS 2 0 2 2 +INDICATED 2 0 2 2 +INDIANS 2 0 2 2 +INDIANA 2 0 2 2 +INDIAN 1 0 1 1 +INDESCRIBABLE 1 0 1 1 +INDEPENDENT 2 0 2 2 +INDEPENDENCE 4 0 4 4 +INDEED 14 0 14 14 +INDECISION 1 0 1 1 +INCUR 1 0 1 1 +INCREDULOUSLY 1 0 1 1 +INCREDULITY 1 0 1 1 +INCREASING 2 0 2 2 +INCREASES 2 0 2 2 +INCREASED 5 0 5 5 +INCREASE 5 0 5 5 +INCORRECT 1 0 1 1 +INCONSISTENCY 1 0 1 1 +INCONCEIVABLE 1 0 1 1 +INCOMPARABLE 1 0 1 1 +INCOHERENT 1 0 1 1 +INCLUDING 2 0 2 2 +INCLUDE 1 0 1 1 +INCLINED 1 0 1 1 +INCLINE 1 0 1 1 +INCIDENTS 1 0 1 1 +INCARCERATING 1 0 1 1 +INASMUCH 1 0 1 1 +INANIMATE 1 0 1 1 +IMPULSE 3 0 3 3 +IMPROVISED 2 0 2 2 +IMPROVISE 1 0 1 1 +IMPROVING 1 0 1 1 +IMPROVIDENT 1 0 1 1 +IMPROVED 1 0 1 1 +IMPROVE 1 0 1 1 +IMPRESSION 1 0 1 1 +IMPRECATIONS 1 0 1 1 +IMPRECATION 1 0 1 1 +IMPOSSIBLE 4 0 4 4 +IMPOSING 1 0 1 1 +IMPOSES 1 0 1 1 +IMPORTS 1 0 1 1 +IMPORTED 1 0 1 1 +IMPORTANT 1 0 1 1 +IMPORTANCE 3 0 3 3 +IMPLY 2 0 2 2 +IMPLIES 1 0 1 1 +IMPLIED 2 0 2 2 +IMPLACABLE 1 0 1 1 +IMPIOUS 1 0 1 1 +IMPERTINENT 1 0 1 1 +IMPERSONAL 1 0 1 1 +IMPERIOUS 1 0 1 1 +IMPERATIVE 1 0 1 1 +IMPATIENTLY 2 0 2 2 +IMPATIENT 2 0 2 2 +IMMORTALS 1 0 1 1 +IMMENSE 3 0 3 3 +IMMEDIATELY 9 0 9 9 +IMMEDIATE 1 0 1 1 +IMITATION 1 0 1 1 +IMITATE 1 0 1 1 +IMBECILE 1 0 1 1 +IMAGINED 1 0 1 1 +IMAGINE 6 0 6 6 +IMAGINATION 1 0 1 1 +IMAGINARY 1 0 1 1 +IMAGE 2 0 2 2 +ILLUSTRIOUS 4 0 4 4 +ILLUSION 1 0 1 1 +ILLITERATE 1 0 1 1 +IGNORED 1 0 1 1 +IGNORANT 2 0 2 2 +IGNORANCE 1 0 1 1 +IGNOMY 1 0 1 1 +IDOLATRIES 1 0 1 1 +IDLE 1 0 1 1 +IDIOTIC 1 0 1 1 +IDIOT 1 0 1 1 +IDEAS 2 0 2 2 +IDEAL 1 0 1 1 +ICE 1 0 1 1 +HYPOTHETICAL 1 0 1 1 +HYPODERMICALLY 1 0 1 1 +HYPODERMIC 1 0 1 1 +HYDROCHLORIC 2 0 2 2 +HUSTLED 1 0 1 1 +HUSTLE 1 0 1 1 +HUSKILY 1 0 1 1 +HUSH 1 0 1 1 +HUSBANDMEN 1 0 1 1 +HUSBAND'S 3 0 3 3 +HUSBAND 9 0 9 9 +HURRYING 3 0 3 3 +HURRY 1 0 1 1 +HURRIEDLY 3 0 3 3 +HURRIED 3 0 3 3 +HURRICANE 1 0 1 1 +HUNTED 2 0 2 2 +HUNT 1 0 1 1 +HUNGRY 2 0 2 2 +HUNGER 2 0 2 2 +HUNGARY 1 0 1 1 +HUNG 2 0 2 2 +HUNDREDTH 1 0 1 1 +HUNDREDS 1 0 1 1 +HUNDRED 29 0 29 29 +HUMOURS 1 0 1 1 +HUMILIATIONS 1 0 1 1 +HUMILIATION 1 0 1 1 +HUMILIATED 1 0 1 1 +HUMBLY 1 0 1 1 +HUMBLE 1 0 1 1 +HUMANITY 1 0 1 1 +HUMANITARY 1 0 1 1 +HUMAN 6 0 6 6 +HULLO 1 0 1 1 +HUGELY 1 0 1 1 +HUGE 3 0 3 3 +HUDSPETH 1 0 1 1 +HOWEVER 16 0 16 16 +HOUSES 4 0 4 4 +HOUSEHOLD 1 0 1 1 +HOURS 12 0 12 12 +HOUR 16 0 16 16 +HOTLY 1 0 1 1 +HOTEL 4 0 4 4 +HOT 5 0 5 5 +HOST 3 0 3 3 +HOSPITALITY 1 0 1 1 +HOSPITABLY 1 0 1 1 +HORSEBACK 1 0 1 1 +HORSE 10 0 10 10 +HORRIBLE 3 0 3 3 +HORNS 1 0 1 1 +HORIZONTAL 1 0 1 1 +HORIZON 2 0 2 2 +HORACE 1 0 1 1 +HOPPING 1 0 1 1 +HOPPER 1 0 1 1 +HOPING 1 0 1 1 +HOPELESS 1 0 1 1 +HOPEFUL 1 0 1 1 +HOPED 2 0 2 2 +HOOTED 1 0 1 1 +HOOK 1 0 1 1 +HONOURS 1 0 1 1 +HONORS 2 0 2 2 +HONEYMOON 1 0 1 1 +HONEY 1 0 1 1 +HONESTLY 1 0 1 1 +HONEST 6 0 6 6 +HOMELESS 1 0 1 1 +HOLLOW 2 0 2 2 +HOLE 1 0 1 1 +HOLDS 1 0 1 1 +HOLDING 6 0 6 6 +HITHERTO 2 0 2 2 +HITHER 7 0 7 7 +HITCH 1 0 1 1 +HISTORY 4 0 4 4 +HISTORIANS 1 0 1 1 +HINTS 1 0 1 1 +HINTED 1 0 1 1 +HINGES 1 0 1 1 +HINDER 1 0 1 1 +HIGHWAYS 1 0 1 1 +HIGHS 1 0 1 1 +HIGHLY 1 0 1 1 +HIGHEST 1 0 1 1 +HIGHER 1 0 1 1 +HIGGINS 1 0 1 1 +HIERARCHY 1 0 1 1 +HIDING 1 0 1 1 +HIDEOUS 1 0 1 1 +HIDE 2 0 2 2 +HIDDEN 3 0 3 3 +HID 2 0 2 2 +HEWN 1 0 1 1 +HESITATING 1 0 1 1 +HESITATED 2 0 2 2 +HERSELF 35 0 35 35 +HERS 4 0 4 4 +HERO 1 0 1 1 +HERIOT 1 0 1 1 +HERETOFORE 1 0 1 1 +HERDSMEN 1 0 1 1 +HERCULEAN 1 0 1 1 +HERBS 1 0 1 1 +HENRY 3 0 3 3 +HENCE 4 0 4 4 +HELSTONE 1 0 1 1 +HELPLESSLY 1 0 1 1 +HELPLESS 3 0 3 3 +HELPED 3 0 3 3 +HELP 17 0 17 17 +HELMET 2 0 2 2 +HELL 4 0 4 4 +HELD 13 0 13 13 +HEIR 2 0 2 2 +HEIGHT 1 0 1 1 +HEELED 1 0 1 1 +HEEL 1 0 1 1 +HEEDED 1 0 1 1 +HEED 1 0 1 1 +HEDGES 1 0 1 1 +HEDGE 1 0 1 1 +HEBREWS 4 0 4 4 +HEBREW 2 0 2 2 +HEAVY 11 0 11 11 +HEAVIEST 1 0 1 1 +HEAVENS 1 0 1 1 +HEAVEN'S 1 0 1 1 +HEAVEN 6 0 6 6 +HEAVE 1 0 1 1 +HEAT 1 0 1 1 +HEARTS 5 0 5 5 +HEARTILY 1 0 1 1 +HEARTIEST 1 0 1 1 +HEARTED 2 0 2 2 +HEARING 2 0 2 2 +HEARD 28 0 28 28 +HEAP 2 0 2 2 +HEALTHY 1 0 1 1 +HEALTH 1 0 1 1 +HEALED 1 0 1 1 +HEADS 4 0 4 4 +HEADQUARTERS 2 0 2 2 +HEADLONG 1 0 1 1 +HEADLIGHTS 1 0 1 1 +HEADED 4 0 4 4 +HEADACHES 1 0 1 1 +HAY 1 0 1 1 +HAVING 22 0 22 22 +HAVEN'T 4 0 4 4 +HAVEN 1 0 1 1 +HAUNT 3 0 3 3 +HAUNCHES 1 0 1 1 +HAUGHTINESS 1 0 1 1 +HATTON 1 0 1 1 +HATS 1 0 1 1 +HATREDS 1 0 1 1 +HATRED 2 0 2 2 +HATH 17 0 17 17 +HATES 3 0 3 3 +HATED 3 0 3 3 +HAT 3 0 3 3 +HASTY 2 0 2 2 +HASTILY 2 0 2 2 +HASTENED 1 0 1 1 +HASTEN 1 0 1 1 +HAST 7 0 7 7 +HASN'T 1 0 1 1 +HASHISH 1 0 1 1 +HARVEST 3 0 3 3 +HARSHLY 2 0 2 2 +HARRY 3 0 3 3 +HARRISONVILLE 2 0 2 2 +HAROLD 1 0 1 1 +HARNESSED 1 0 1 1 +HARMONY 1 0 1 1 +HARMLESS 1 0 1 1 +HARM 6 0 6 6 +HARKNESS 1 0 1 1 +HARK 1 0 1 1 +HARGRAVE 1 0 1 1 +HARDWARE 1 0 1 1 +HARDLY 10 0 10 10 +HARD 16 0 16 16 +HARBOR 1 0 1 1 +HAPPY 7 0 7 7 +HAPPINESS 5 0 5 5 +HAPPIEST 1 0 1 1 +HAPPIER 3 0 3 3 +HAPPENS 1 0 1 1 +HAPPENED 10 0 10 10 +HAPPEN 3 0 3 3 +HANGING 1 0 1 1 +HANGED 1 0 1 1 +HANG 3 0 3 3 +HANDSOME 4 0 4 4 +HANDLED 1 0 1 1 +HANDKERCHIEF 3 0 3 3 +HANDING 1 0 1 1 +HANDIER 1 0 1 1 +HANDED 1 0 1 1 +HAMPERED 1 0 1 1 +HAMMERS 1 0 1 1 +HAMMER 1 0 1 1 +HALVES 1 0 1 1 +HALTS 1 0 1 1 +HALTING 1 0 1 1 +HALT 5 0 5 5 +HALLS 1 0 1 1 +HALFPENNY 1 0 1 1 +HALEY'S 1 0 1 1 +HALE 6 0 6 6 +HAIRED 1 0 1 1 +HAIN'T 1 0 1 1 +HAIL 1 0 1 1 +HAG 1 0 1 1 +HACK 1 0 1 1 +HABITUAL 1 0 1 1 +HABITS 2 0 2 2 +GYLINGDEN 1 0 1 1 +GUY 1 0 1 1 +GUT 3 0 3 3 +GUSH 1 0 1 1 +GUN 2 0 2 2 +GULPED 1 0 1 1 +GULLET 1 0 1 1 +GULF 1 0 1 1 +GUISE 1 0 1 1 +GUINEA 3 0 3 3 +GUILTY 5 0 5 5 +GUILT 2 0 2 2 +GUIDE 3 0 3 3 +GUESSED 1 0 1 1 +GUESS 5 0 5 5 +GUARDS 1 0 1 1 +GUARDED 1 0 1 1 +GUARD 1 0 1 1 +GRUMBLINGLY 1 0 1 1 +GRUMBLED 2 0 2 2 +GRUFFLY 1 0 1 1 +GRUFFISH 1 0 1 1 +GROWTH 1 0 1 1 +GROWN 1 0 1 1 +GROWING 3 0 3 3 +GROW 1 0 1 1 +GROVE 1 0 1 1 +GROUPS 6 0 6 6 +GROUP 3 0 3 3 +GROUNDS 1 0 1 1 +GROUND 6 0 6 6 +GROTTO 1 0 1 1 +GROOMED 1 0 1 1 +GROOM 1 0 1 1 +GROAN 2 0 2 2 +GRINNING 3 0 3 3 +GRINNED 2 0 2 2 +GRIN 2 0 2 2 +GRIMSBY 1 0 1 1 +GRIMACED 1 0 1 1 +GRIEVING 1 0 1 1 +GREW 4 0 4 4 +GREET 2 0 2 2 +GREENTON 1 0 1 1 +GREENHORNS 1 0 1 1 +GREEK 2 0 2 2 +GREATLY 1 0 1 1 +GREATEST 6 0 6 6 +GREATER 6 0 6 6 +GREASY 1 0 1 1 +GRAVITY 1 0 1 1 +GRAVES 1 0 1 1 +GRAVELLED 1 0 1 1 +GRATITUDE 5 0 5 5 +GRATING 2 0 2 2 +GRATIFICATION 1 0 1 1 +GRATEFUL 2 0 2 2 +GRATED 1 0 1 1 +GRASS 2 0 2 2 +GRASP 2 0 2 2 +GRAPPLE 1 0 1 1 +GRAPE 1 0 1 1 +GRANTING 1 0 1 1 +GRANDSON 1 0 1 1 +GRANDPAPA 1 0 1 1 +GRANDFATHER 2 0 2 2 +GRANDEUR 1 0 1 1 +GRANDDAUGHTER 1 0 1 1 +GRAINS 1 0 1 1 +GRAIN 4 0 4 4 +GRAFTON'S 1 0 1 1 +GRAFTON 1 0 1 1 +GRAFT 2 0 2 2 +GRADUALLY 1 0 1 1 +GRACIOUSLY 2 0 2 2 +GRACIOUS 3 0 3 3 +GRACE 1 0 1 1 +GRABBED 2 0 2 2 +GOWN 1 0 1 1 +GOVERNOR 2 0 2 2 +GOVERNMENTS 3 0 3 3 +GOVERNMENT'S 1 0 1 1 +GOVERNMENT 20 0 20 20 +GOTTEN 1 0 1 1 +GOOSE 1 0 1 1 +GOODNESS 5 0 5 5 +GONE 17 0 17 17 +GOLFING 1 0 1 1 +GOLDFISH 1 0 1 1 +GOLDFINCH 1 0 1 1 +GOLDEN 3 0 3 3 +GOES 7 0 7 7 +GNAWING 1 0 1 1 +GNASHING 1 0 1 1 +GNARLED 1 0 1 1 +GLOWING 2 0 2 2 +GLOWED 3 0 3 3 +GLOVES 3 0 3 3 +GLOVE 1 0 1 1 +GLORY 2 0 2 2 +GLORIOUS 1 0 1 1 +GLORIFY 1 0 1 1 +GLOOMY 2 0 2 2 +GLOOM 1 0 1 1 +GLOATING 1 0 1 1 +GLINTING 1 0 1 1 +GLIMPSE 1 0 1 1 +GLIMMER 1 0 1 1 +GLIDING 1 0 1 1 +GLEAMED 1 0 1 1 +GLAZED 1 0 1 1 +GLANCING 1 0 1 1 +GLANCES 2 0 2 2 +GLANCED 2 0 2 2 +GLANCE 3 0 3 3 +GLADNESS 2 0 2 2 +GLADLY 1 0 1 1 +GLADDENEST 1 0 1 1 +GLADDENED 1 0 1 1 +GIVES 7 0 7 7 +GIRDLE 2 0 2 2 +GIMLET 1 0 1 1 +GILROY 1 0 1 1 +GIFTS 1 0 1 1 +GIFTED 1 0 1 1 +GIANT'S 1 0 1 1 +GIANT 1 0 1 1 +GHOSTS 1 0 1 1 +GHOST 2 0 2 2 +GHASTLY 2 0 2 2 +GETTING 12 0 12 12 +GETS 3 0 3 3 +GESTURE 1 0 1 1 +GERMAN 7 0 7 7 +GERM 1 0 1 1 +GEORGIA 1 0 1 1 +GEORGES 1 0 1 1 +GENUINE 1 0 1 1 +GENTLY 1 0 1 1 +GENTLE 1 0 1 1 +GENIUS 1 0 1 1 +GENIALLY 1 0 1 1 +GENEROUS 2 0 2 2 +GENEROSITY 1 0 1 1 +GENERATION 1 0 1 1 +GENERALLY 3 0 3 3 +GENERAL 7 0 7 7 +GEAR 2 0 2 2 +GAZING 2 0 2 2 +GAZED 3 0 3 3 +GAY 2 0 2 2 +GAVE 32 0 32 32 +GAULS 1 0 1 1 +GATHERING 2 0 2 2 +GATHERED 8 0 8 8 +GATHER 1 0 1 1 +GATES 1 0 1 1 +GATE 4 0 4 4 +GASPED 2 0 2 2 +GASP 1 0 1 1 +GASHED 1 0 1 1 +GARNISHMENT 1 0 1 1 +GARMENTS 2 0 2 2 +GARLANDED 1 0 1 1 +GARLAND 1 0 1 1 +GARDEN'S 1 0 1 1 +GARDEN 7 0 7 7 +GAPS 1 0 1 1 +GAP 1 0 1 1 +GANG 5 0 5 5 +GAMMER 1 0 1 1 +GAMESTER 1 0 1 1 +GAME 6 0 6 6 +GAMBLING 3 0 3 3 +GAMBLERS 1 0 1 1 +GALLOPED 1 0 1 1 +GALLERY 1 0 1 1 +GALLANT 3 0 3 3 +GALL 1 0 1 1 +GAINED 1 0 1 1 +GAIN 3 0 3 3 +GAILY 1 0 1 1 +GAIETY 1 0 1 1 +GAD'S 1 0 1 1 +GAD 1 0 1 1 +GABBLE 1 0 1 1 +G 1 0 1 1 +FUZZ 1 0 1 1 +FUTURE 3 0 3 3 +FUSS 2 0 2 2 +FURY 2 0 2 2 +FURTHEST 1 0 1 1 +FURTHER 6 0 6 6 +FURNITURE 1 0 1 1 +FURNISHED 1 0 1 1 +FURNACE 1 0 1 1 +FURIOUS 2 0 2 2 +FUR 1 0 1 1 +FUNNY 3 0 3 3 +FUNDS 1 0 1 1 +FUND 1 0 1 1 +FUN 2 0 2 2 +FUMED 1 0 1 1 +FULLY 1 0 1 1 +FULFILLED 1 0 1 1 +FULFIL 1 0 1 1 +FUGITIVES 1 0 1 1 +FUEL 1 0 1 1 +FRY 1 0 1 1 +FRUITS 4 0 4 4 +FRUITLESS 1 0 1 1 +FRUIT 7 0 7 7 +FROWNED 1 0 1 1 +FROWN 1 0 1 1 +FROSTY 1 0 1 1 +FROST 1 0 1 1 +FRONTIERS 2 0 2 2 +FRONT 13 0 13 13 +FRIGHTFUL 4 0 4 4 +FRIGHTENS 1 0 1 1 +FRIGHTENED 3 0 3 3 +FRIGHTEN 1 0 1 1 +FRIENDSHIP 2 0 2 2 +FRIENDS 17 0 17 17 +FRIENDLY 4 0 4 4 +FRIENDLINESS 1 0 1 1 +FRIEND'S 1 0 1 1 +FRIEND 14 0 14 14 +FRIDAY 2 0 2 2 +FRIAR 1 0 1 1 +FRET 2 0 2 2 +FRESHEST 1 0 1 1 +FRESH 5 0 5 5 +FRERE 1 0 1 1 +FREQUENTLY 2 0 2 2 +FREQUENT 2 0 2 2 +FRENCH 5 0 5 5 +FREEWAY 1 0 1 1 +FREELY 2 0 2 2 +FREEDOM 6 0 6 6 +FREED 2 0 2 2 +FREE 12 0 12 12 +FRAUD 1 0 1 1 +FRANZ 8 0 8 8 +FRANTICALLY 1 0 1 1 +FRANKNESS 1 0 1 1 +FRANKLY 1 0 1 1 +FRANK 1 0 1 1 +FRANCOIS 1 0 1 1 +FRANCISCO 5 0 5 5 +FRANCIS 1 0 1 1 +FRANCE 3 0 3 3 +FRAME 1 0 1 1 +FRAGMENTS 1 0 1 1 +FOUR 24 0 24 24 +FOUNDED 2 0 2 2 +FOUNDATION 1 0 1 1 +FOUND 37 0 37 37 +FOUGHT 2 0 2 2 +FOSTER 3 0 3 3 +FORWARDS 3 0 3 3 +FORWARD 5 0 5 5 +FORTY 13 0 13 13 +FORTUNES 1 0 1 1 +FORTUNE 6 0 6 6 +FORTUNATELY 5 0 5 5 +FORTNIGHT 1 0 1 1 +FORTHWITH 1 0 1 1 +FORTH 4 0 4 4 +FORSOOTH 1 0 1 1 +FORMS 2 0 2 2 +FORMING 2 0 2 2 +FORMIDABLE 2 0 2 2 +FORMERLY 2 0 2 2 +FORMER 4 0 4 4 +FORM 9 0 9 9 +FORGOTTEN 4 0 4 4 +FORGOT 7 0 7 7 +FORGIVE 2 0 2 2 +FORGETTING 1 0 1 1 +FORGET 2 0 2 2 +FORGERIES 1 0 1 1 +FOREVER 3 0 3 3 +FORETASTE 1 0 1 1 +FORESTERS 1 0 1 1 +FOREST 2 0 2 2 +FORESHADOWED 1 0 1 1 +FORENOON 1 0 1 1 +FOREMOST 1 0 1 1 +FOREMAN 1 0 1 1 +FORELOCK 1 0 1 1 +FOREIGNERS 1 0 1 1 +FOREIGN 6 0 6 6 +FOREHEAD 4 0 4 4 +FOREGATHERED 1 0 1 1 +FOREFINGER 1 0 1 1 +FORCES 3 0 3 3 +FORCED 4 0 4 4 +FORCE 7 0 7 7 +FORBIDDEN 1 0 1 1 +FORBID 1 0 1 1 +FORBEARANCE 1 0 1 1 +FORBEAR 1 0 1 1 +FOOTSTEPS 1 0 1 1 +FOOTNOTE 1 0 1 1 +FOOTED 1 0 1 1 +FOOT 1 0 1 1 +FOOLISH 3 0 3 3 +FOOD 4 0 4 4 +FOND 2 0 2 2 +FOLLY 1 0 1 1 +FOLLOWING 10 0 10 10 +FOLLOWERS 3 0 3 3 +FOLLOWER 1 0 1 1 +FOLLOW 10 0 10 10 +FOLKS 3 0 3 3 +FOLDS 1 0 1 1 +FOLDED 2 0 2 2 +FOLD 1 0 1 1 +FOGGY 1 0 1 1 +FOES 2 0 2 2 +FLYING 1 0 1 1 +FLUTTERING 2 0 2 2 +FLUTTER 1 0 1 1 +FLUSHED 2 0 2 2 +FLUSH 1 0 1 1 +FLUNG 2 0 2 2 +FLUID 2 0 2 2 +FLOWERS 4 0 4 4 +FLOW 1 0 1 1 +FLOURISHING 1 0 1 1 +FLOURISHED 1 0 1 1 +FLOURISH 1 0 1 1 +FLOCKS 1 0 1 1 +FLOATED 1 0 1 1 +FLITTED 1 0 1 1 +FLIRTATION 1 0 1 1 +FLING 1 0 1 1 +FLINCH 1 0 1 1 +FLIGHT 5 0 5 5 +FLICK 1 0 1 1 +FLEW 1 0 1 1 +FLEERED 1 0 1 1 +FLEECED 1 0 1 1 +FLEE 1 0 1 1 +FLED 4 0 4 4 +FLATTERY 1 0 1 1 +FLATTERER 1 0 1 1 +FLATTERED 1 0 1 1 +FLATHEADS 1 0 1 1 +FLASK 1 0 1 1 +FLASHLIGHT 1 0 1 1 +FLASHING 1 0 1 1 +FLASHED 1 0 1 1 +FLASH 2 0 2 2 +FLARING 1 0 1 1 +FLAPPING 1 0 1 1 +FLAMES 2 0 2 2 +FLAME 5 0 5 5 +FLAGRANT 1 0 1 1 +FLAGONS 1 0 1 1 +FLAGON 1 0 1 1 +FLAGGED 1 0 1 1 +FLAG 1 0 1 1 +FLABBERGASTED 1 0 1 1 +FIXING 1 0 1 1 +FIXED 5 0 5 5 +FIX 1 0 1 1 +FIVE 20 0 20 20 +FITTING 2 0 2 2 +FITTED 2 0 2 2 +FITS 1 0 1 1 +FIT 7 0 7 7 +FISHING 3 0 3 3 +FISHIN 1 0 1 1 +FISHER 2 0 2 2 +FIRSTER 1 0 1 1 +FIRST 54 0 54 54 +FIRMLY 2 0 2 2 +FIRM 1 0 1 1 +FIRING 2 0 2 2 +FIREPLACE 1 0 1 1 +FIREMAN 3 0 3 3 +FIREFLY 1 0 1 1 +FIRED 1 0 1 1 +FIRE 15 0 15 15 +FINS 1 0 1 1 +FINNEY 2 0 2 2 +FINISHING 2 0 2 2 +FINISH 3 0 3 3 +FINGERS 1 0 1 1 +FINGERING 1 0 1 1 +FINGER 6 0 6 6 +FINEST 1 0 1 1 +FINELY 2 0 2 2 +FINE 10 0 10 10 +FINDING 8 0 8 8 +FINANCIAL 1 0 1 1 +FINALLY 6 0 6 6 +FINAL 2 0 2 2 +FIN 1 0 1 1 +FILTER 1 0 1 1 +FILMY 1 0 1 1 +FILLED 5 0 5 5 +FILL 4 0 4 4 +FIGURE'S 1 0 1 1 +FIGURE 3 0 3 3 +FIGHTING 1 0 1 1 +FIGHT 5 0 5 5 +FIFTY 14 0 14 14 +FIERCE 2 0 2 2 +FIENDS 1 0 1 1 +FIENDISH 1 0 1 1 +FICKLE 2 0 2 2 +FEW 26 0 26 26 +FEVERISH 4 0 4 4 +FEVER 1 0 1 1 +FETTERS 1 0 1 1 +FETCHED 1 0 1 1 +FETCH 7 0 7 7 +FESTIVE 1 0 1 1 +FESTIVAL 1 0 1 1 +FEROCIOUS 1 0 1 1 +FENDER 1 0 1 1 +FENCED 1 0 1 1 +FENCE 4 0 4 4 +FEMALE 1 0 1 1 +FELLOWSHIP 1 0 1 1 +FELLOWS 2 0 2 2 +FELLOW 13 0 13 13 +FEET 9 0 9 9 +FEELINGS 3 0 3 3 +FEELING 10 0 10 10 +FEEL 13 0 13 13 +FEEDS 1 0 1 1 +FEED 1 0 1 1 +FEEBLY 1 0 1 1 +FEEBLE 2 0 2 2 +FEE 1 0 1 1 +FEDERAL 1 0 1 1 +FED 1 0 1 1 +FEBRUARY 5 0 5 5 +FEATURES 1 0 1 1 +FEATHERS 1 0 1 1 +FEATHER 1 0 1 1 +FEARS 1 0 1 1 +FEARLESS 1 0 1 1 +FEARING 1 0 1 1 +FEARFUL 2 0 2 2 +FEARED 4 0 4 4 +FEAR 13 0 13 13 +FAVOURS 1 0 1 1 +FAULT 5 0 5 5 +FAUCHELEVENT 24 0 24 24 +FATTER 1 0 1 1 +FATIGUE 2 0 2 2 +FATHERLY 1 0 1 1 +FATE 5 0 5 5 +FATALLY 1 0 1 1 +FATAL 2 0 2 2 +FASTER 2 0 2 2 +FASHIONS 1 0 1 1 +FASHIONED 2 0 2 2 +FASHION 2 0 2 2 +FASCINATION 1 0 1 1 +FARTHEST 1 0 1 1 +FARTHER 3 0 3 3 +FARMS 1 0 1 1 +FAREWELL 1 0 1 1 +FARED 1 0 1 1 +FANTASTIC 1 0 1 1 +FANS 1 0 1 1 +FANNY 5 0 5 5 +FANGED 1 0 1 1 +FANCY 4 0 4 4 +FANCIFUL 1 0 1 1 +FANCIED 1 0 1 1 +FANATICS 1 0 1 1 +FAMOUS 1 0 1 1 +FAMILIES 3 0 3 3 +FAMILIARITY 1 0 1 1 +FAMILIAR 2 0 2 2 +FAME 2 0 2 2 +FALSE 1 0 1 1 +FALLING 6 0 6 6 +FALLEN 1 0 1 1 +FALL 5 0 5 5 +FAITHLESS 1 0 1 1 +FAITHFULLY 2 0 2 2 +FAITHFUL 3 0 3 3 +FAIRY 2 0 2 2 +FAIRLY 3 0 3 3 +FAINTNESS 1 0 1 1 +FAINTING 2 0 2 2 +FAINT 4 0 4 4 +FAILURES 3 0 3 3 +FAILURE 1 0 1 1 +FAILS 1 0 1 1 +FAILING 3 0 3 3 +FAIL 3 0 3 3 +FAGGOT 1 0 1 1 +FACTS 3 0 3 3 +FACTORIES 2 0 2 2 +FACTOR 1 0 1 1 +FACT 14 0 14 14 +FACING 3 0 3 3 +FACES 2 0 2 2 +FACED 2 0 2 2 +FACE 33 0 33 33 +FABULOUS 1 0 1 1 +EYELIDS 1 0 1 1 +EYED 4 0 4 4 +EXTREMELY 4 0 4 4 +EXTREME 2 0 2 2 +EXTRAORDINARY 2 0 2 2 +EXTINGUISHING 1 0 1 1 +EXTINGUISH 1 0 1 1 +EXTERNAL 2 0 2 2 +EXTENUATING 1 0 1 1 +EXTENT 2 0 2 2 +EXTENSION 1 0 1 1 +EXTENDING 2 0 2 2 +EXTENDED 2 0 2 2 +EXTEND 1 0 1 1 +EXPRESSLY 1 0 1 1 +EXPRESSION 4 0 4 4 +EXPRESSED 3 0 3 3 +EXPOSURE 1 0 1 1 +EXPOSES 1 0 1 1 +EXPOSED 3 0 3 3 +EXPOSE 3 0 3 3 +EXPLOITING 1 0 1 1 +EXPLANATORY 1 0 1 1 +EXPLANATION 1 0 1 1 +EXPLAINING 1 0 1 1 +EXPLAIN 1 0 1 1 +EXPIATION 1 0 1 1 +EXPERIMENTS 1 0 1 1 +EXPERIMENTING 1 0 1 1 +EXPERIENCES 1 0 1 1 +EXPERIENCED 1 0 1 1 +EXPENSES 2 0 2 2 +EXPENSE 2 0 2 2 +EXPENDED 1 0 1 1 +EXPELLED 1 0 1 1 +EXPEL 1 0 1 1 +EXPEDIENT 1 0 1 1 +EXPECTS 1 0 1 1 +EXPECTED 8 0 8 8 +EXPECT 4 0 4 4 +EXPANSE 2 0 2 2 +EXOTIC 1 0 1 1 +EXIT 1 0 1 1 +EXISTS 1 0 1 1 +EXISTING 1 0 1 1 +EXISTENCE 5 0 5 5 +EXISTED 1 0 1 1 +EXIST 2 0 2 2 +EXHIBITED 4 0 4 4 +EXERTIONS 1 0 1 1 +EXERTING 1 0 1 1 +EXERTED 1 0 1 1 +EXERT 1 0 1 1 +EXERCISES 1 0 1 1 +EXERCISE 3 0 3 3 +EXECUTIVE 5 0 5 5 +EXECUTIONER'S 2 0 2 2 +EXECUTION 2 0 2 2 +EXECUTED 1 0 1 1 +EXECUTE 1 0 1 1 +EXECRABLE 1 0 1 1 +EXCUSES 1 0 1 1 +EXCUSABLE 1 0 1 1 +EXCLAMATION 1 0 1 1 +EXCLAIMING 1 0 1 1 +EXCLAIM 1 0 1 1 +EXCITEMENT 4 0 4 4 +EXCITEDLY 2 0 2 2 +EXCITED 2 0 2 2 +EXCITABILITY 1 0 1 1 +EXCHANGED 2 0 2 2 +EXCHANGE 1 0 1 1 +EXCESSIVELY 1 0 1 1 +EXCESS 1 0 1 1 +EXCEPTIONALLY 2 0 2 2 +EXCEPTION 1 0 1 1 +EXCELLENT 5 0 5 5 +EXCELLENCY 4 0 4 4 +EXCEEDINGLY 1 0 1 1 +EXCEEDING 3 0 3 3 +EXASPERATING 1 0 1 1 +EXAMPLE 3 0 3 3 +EXAMINE 2 0 2 2 +EXAMINATION 3 0 3 3 +EXALTED 1 0 1 1 +EXAGGERATE 1 0 1 1 +EXACTITUDE 2 0 2 2 +EXACT 1 0 1 1 +EVIDENTLY 4 0 4 4 +EVIDENT 3 0 3 3 +EVIDENCE 2 0 2 2 +EVERYWHERE 4 0 4 4 +EVERYTHING'S 1 0 1 1 +EVERYTHING 15 0 15 15 +EVERYBODY 6 0 6 6 +EVENTS 4 0 4 4 +EVENT 1 0 1 1 +EVENING 9 0 9 9 +EVEN 46 0 46 46 +EVE 2 0 2 2 +EVAPORATION 2 0 2 2 +EVAPORATING 1 0 1 1 +EVAPORATE 3 0 3 3 +EVADED 1 0 1 1 +EUROPEAN 1 0 1 1 +EUROPE 1 0 1 1 +EUPHRATES 1 0 1 1 +EUNUCH'S 1 0 1 1 +EUNUCH 11 0 11 11 +ETHEREAL 2 0 2 2 +ETHER 3 0 3 3 +ETHELRIED'S 1 0 1 1 +ETERNITY 1 0 1 1 +ESTRANGE 1 0 1 1 +ESTIMATES 1 0 1 1 +ESTHER 1 0 1 1 +ESTEEM 3 0 3 3 +ESTATES 1 0 1 1 +ESTATE 1 0 1 1 +ESTABLISHMENT 1 0 1 1 +ESTABLISHED 2 0 2 2 +ESTABLISH 1 0 1 1 +ESSENTIALLY 1 0 1 1 +ESSENTIAL 1 0 1 1 +ESSENCE 1 0 1 1 +ESSAY 1 0 1 1 +ESQUIRES 1 0 1 1 +ESPECIAL 1 0 1 1 +ESCAPADE 1 0 1 1 +ERROR 2 0 2 2 +ERRATIC 1 0 1 1 +ERRANT 1 0 1 1 +ERNESTINE 1 0 1 1 +ERECTS 1 0 1 1 +ERECTED 3 0 3 3 +ERECT 1 0 1 1 +EQUIVALENT 1 0 1 1 +EQUERRY'S 1 0 1 1 +EQUALLY 2 0 2 2 +EPOCH 1 0 1 1 +EPISTLES 1 0 1 1 +EPISTLE 1 0 1 1 +ENVYING 1 0 1 1 +ENVY 3 0 3 3 +ENVIRONMENT 1 0 1 1 +ENVIOUS 1 0 1 1 +ENVIED 1 0 1 1 +ENVELOPE 1 0 1 1 +ENTREATY 1 0 1 1 +ENTREATINGLY 1 0 1 1 +ENTREATIES 1 0 1 1 +ENTREATED 1 0 1 1 +ENTIRELY 3 0 3 3 +ENTIRE 2 0 2 2 +ENTHUSIASM 3 0 3 3 +ENTERTAINMENT 1 0 1 1 +ENTERTAINING 1 0 1 1 +ENTERTAIN 1 0 1 1 +ENTERED 11 0 11 11 +ENTER 5 0 5 5 +ENTAILED 1 0 1 1 +ENSUED 3 0 3 3 +ENRAGED 1 0 1 1 +ENOUGH 31 0 31 31 +ENLISTMENT 1 0 1 1 +ENJOYMENT 3 0 3 3 +ENJOY 4 0 4 4 +ENGRAVED 1 0 1 1 +ENGLISH 4 0 4 4 +ENGLAND 3 0 3 3 +ENGAGEMENTS 1 0 1 1 +ENGAGEMENT 1 0 1 1 +ENGAGED 2 0 2 2 +ENGAGE 1 0 1 1 +ENFRANCHISEMENT 1 0 1 1 +ENFORCEMENT 1 0 1 1 +ENERGY 1 0 1 1 +ENEMY 3 0 3 3 +ENEMIES 2 0 2 2 +ENDURANCE 1 0 1 1 +ENDING 1 0 1 1 +ENCOURAGED 2 0 2 2 +ENCOUNTERED 1 0 1 1 +ENCOMPASSED 1 0 1 1 +ENCHANTMENT 2 0 2 2 +ENCHANTED 3 0 3 3 +ENCAMPMENT 1 0 1 1 +ENCAMPED 1 0 1 1 +EMPTY 8 0 8 8 +EMPTIES 1 0 1 1 +EMPTIED 2 0 2 2 +EMPRESSES 1 0 1 1 +EMPLOYED 2 0 2 2 +EMPLOY 1 0 1 1 +EMPIRE 3 0 3 3 +EMPHATIC 2 0 2 2 +EMPHASIZE 1 0 1 1 +EMPERORS 2 0 2 2 +EMPEROR 1 0 1 1 +EMOTIONS 2 0 2 2 +EMOTION 2 0 2 2 +EMIR 2 0 2 2 +EMERGED 1 0 1 1 +EMBROIDERY 1 0 1 1 +EMBRACES 1 0 1 1 +EMBRACED 1 0 1 1 +EMBARRASSMENT 1 0 1 1 +EMBARRASSED 1 0 1 1 +EMBARKED 2 0 2 2 +ELYSIAN 1 0 1 1 +ELSIE'S 1 0 1 1 +ELSIE 1 0 1 1 +ELSE 12 0 12 12 +ELLIS 1 0 1 1 +ELKINS 1 0 1 1 +ELIZABETH 1 0 1 1 +ELEVENTH 1 0 1 1 +ELEVEN 4 0 4 4 +ELEVATION 1 0 1 1 +ELEPHANT 1 0 1 1 +ELEMENTS 1 0 1 1 +ELEGANT 1 0 1 1 +ELECTRIC 1 0 1 1 +ELECTION 4 0 4 4 +ELECTED 2 0 2 2 +ELDEST 1 0 1 1 +ELBOWS 1 0 1 1 +ELBOWED 1 0 1 1 +ELBOW 1 0 1 1 +ELBERT 1 0 1 1 +ELASTIC 2 0 2 2 +ELAPSED 1 0 1 1 +ELAPSE 1 0 1 1 +EKED 1 0 1 1 +EJACULATED 1 0 1 1 +EITHER 8 0 8 8 +EIGHTEENTH 3 0 3 3 +EIGHTEEN 10 0 10 10 +EH 2 0 2 2 +EGYPT 5 0 5 5 +EGG 1 0 1 1 +EFFORTS 1 0 1 1 +EFFORT 4 0 4 4 +EFFECTS 2 0 2 2 +EFFECTIVE 1 0 1 1 +EFFECT 1 0 1 1 +EDWARD 1 0 1 1 +EDUCATION 2 0 2 2 +EDUCATED 1 0 1 1 +EDGES 1 0 1 1 +EDGE 2 0 2 2 +ECONOMIZE 1 0 1 1 +ECONOMICAL 1 0 1 1 +ECONOMIC 1 0 1 1 +ECHOES 1 0 1 1 +ECHOED 1 0 1 1 +ECCLESIASTICS 1 0 1 1 +EATING 2 0 2 2 +EAT 12 0 12 12 +EASY 9 0 9 9 +EASTERN 2 0 2 2 +EASILY 5 0 5 5 +EASIEST 1 0 1 1 +EASE 4 0 4 4 +EARTHLY 1 0 1 1 +EARTHEN 1 0 1 1 +EARS 3 0 3 3 +EARNEST 9 0 9 9 +EARNED 1 0 1 1 +EARN 2 0 2 2 +EARLY 8 0 8 8 +EARLINESS 1 0 1 1 +EARLIEST 1 0 1 1 +EAGLE 4 0 4 4 +EAGERLY 4 0 4 4 +EAGER 2 0 2 2 +EACH 18 0 18 18 +DYING 6 0 6 6 +DWELT 1 0 1 1 +DWELLS 1 0 1 1 +DWELLINGS 1 0 1 1 +DWELLERS 1 0 1 1 +DWELL 1 0 1 1 +DWARF 2 0 2 2 +DUTIES 2 0 2 2 +DUSTY 1 0 1 1 +DUST 2 0 2 2 +DUSK 2 0 2 2 +DURATION 2 0 2 2 +DUPLICATES 2 0 2 2 +DUNNO 1 0 1 1 +DUNNING 1 0 1 1 +DULL 3 0 3 3 +DUE 5 0 5 5 +DUDS 1 0 1 1 +DRUNK 2 0 2 2 +DRUMS 1 0 1 1 +DRUGGED 2 0 2 2 +DROWNING 1 0 1 1 +DROWN 1 0 1 1 +DROVE 1 0 1 1 +DROPS 1 0 1 1 +DROPPED 8 0 8 8 +DROOPING 2 0 2 2 +DRIVING 1 0 1 1 +DRIVEN 1 0 1 1 +DRIVE 5 0 5 5 +DRINKS 1 0 1 1 +DRINKING 4 0 4 4 +DRINKERS 2 0 2 2 +DRINK 24 0 24 24 +DRIFT 1 0 1 1 +DRIED 1 0 1 1 +DREW 7 0 7 7 +DRESSING 1 0 1 1 +DRESSES 1 0 1 1 +DRESSED 1 0 1 1 +DRESS 1 0 1 1 +DREAMING 2 0 2 2 +DREAMED 1 0 1 1 +DREAM 4 0 4 4 +DREADFULLY 1 0 1 1 +DREADFUL 2 0 2 2 +DREAD 3 0 3 3 +DRAWN 1 0 1 1 +DRAWING 9 0 9 9 +DRAW 2 0 2 2 +DRAT 1 0 1 1 +DRANK 2 0 2 2 +DRAMATIC 1 0 1 1 +DRAINS 1 0 1 1 +DRAINED 1 0 1 1 +DRAIN 1 0 1 1 +DRAGONS 1 0 1 1 +DRAGON 1 0 1 1 +DRAGGED 1 0 1 1 +DRAG 1 0 1 1 +DOZEN 2 0 2 2 +DOWNS 2 0 2 2 +DOVES 1 0 1 1 +DOUBTS 2 0 2 2 +DOUBTLESS 2 0 2 2 +DOUBTFUL 1 0 1 1 +DOUBT 9 0 9 9 +DOUBLE 5 0 5 5 +DOT 1 0 1 1 +DOST 4 0 4 4 +DOSE 1 0 1 1 +DOORS 3 0 3 3 +DOOR 30 0 30 30 +DONOVAN'S 1 0 1 1 +DON 5 0 5 5 +DOMINION 1 0 1 1 +DOMINATES 1 0 1 1 +DOMED 1 0 1 1 +DOME 2 0 2 2 +DOLLARS 2 0 2 2 +DOINGS 1 0 1 1 +DOING 10 0 10 10 +DOIN 1 0 1 1 +DOGS 2 0 2 2 +DOGGEDLY 1 0 1 1 +DOEST 2 0 2 2 +DOES 15 0 15 15 +DODGING 1 0 1 1 +DOCTOR 25 0 25 25 +DIXON 4 0 4 4 +DIVISION 1 0 1 1 +DIVINE 1 0 1 1 +DIVIDES 1 0 1 1 +DIVIDED 3 0 3 3 +DIVERT 1 0 1 1 +DITCHFIELD 1 0 1 1 +DISTURBING 1 0 1 1 +DISTURBED 1 0 1 1 +DISTURBANCE 1 0 1 1 +DISTURB 2 0 2 2 +DISTRICTS 1 0 1 1 +DISTRICT 2 0 2 2 +DISTRIBUTED 1 0 1 1 +DISTRIBUTE 1 0 1 1 +DISTRACTED 2 0 2 2 +DISTINGUISH 2 0 2 2 +DISTINCTLY 1 0 1 1 +DISTINCTIVE 1 0 1 1 +DISTINCT 1 0 1 1 +DISTENDED 1 0 1 1 +DISTANT 5 0 5 5 +DISTANCES 1 0 1 1 +DISTANCE 3 0 3 3 +DISTAFF 1 0 1 1 +DISSIPATION 2 0 2 2 +DISSIMULATION 1 0 1 1 +DISSENTIENT 1 0 1 1 +DISSENTERING 1 0 1 1 +DISSENSIONS 2 0 2 2 +DISREGARDED 1 0 1 1 +DISPUTED 1 0 1 1 +DISPUTE 1 0 1 1 +DISPROVE 1 0 1 1 +DISPOSITION 2 0 2 2 +DISPOSED 3 0 3 3 +DISPOSAL 1 0 1 1 +DISPLEASED 1 0 1 1 +DISPLAY 1 0 1 1 +DISPERSED 2 0 2 2 +DISPENSED 1 0 1 1 +DISPENSE 1 0 1 1 +DISMAL 1 0 1 1 +DISINFECTING 1 0 1 1 +DISHONEST 1 0 1 1 +DISHES 7 0 7 7 +DISH 2 0 2 2 +DISGUST 1 0 1 1 +DISGRACE 4 0 4 4 +DISFIGURED 1 0 1 1 +DISENTANGLE 1 0 1 1 +DISEASE 1 0 1 1 +DISCUSSIONS 1 0 1 1 +DISCUSSION 1 0 1 1 +DISCUSSED 3 0 3 3 +DISCRIMINATION 1 0 1 1 +DISCRETION 1 0 1 1 +DISCOVERY 3 0 3 3 +DISCOVERIES 1 0 1 1 +DISCOVERED 4 0 4 4 +DISCOVER 2 0 2 2 +DISCOURSES 1 0 1 1 +DISCOURSE 2 0 2 2 +DISCOURAGEMENTS 1 0 1 1 +DISCONTENT 1 0 1 1 +DISCONCERTION 1 0 1 1 +DISCOMFORT 1 0 1 1 +DISCLOSURES 1 0 1 1 +DISCLOSE 1 0 1 1 +DISCLAIM 1 0 1 1 +DISCIPLINE 1 0 1 1 +DISCERNING 1 0 1 1 +DISASTROUS 1 0 1 1 +DISAPPOINTED 2 0 2 2 +DISAPPEARS 1 0 1 1 +DISAPPEARED 5 0 5 5 +DISAPPEAR 1 0 1 1 +DISAGREE 1 0 1 1 +DISADVANTAGES 3 0 3 3 +DISADVANTAGEOUS 1 0 1 1 +DIRTY 2 0 2 2 +DIRK 2 0 2 2 +DIRECTLY 3 0 3 3 +DIRECTIONS 1 0 1 1 +DIRECTION 7 0 7 7 +DIRECTED 3 0 3 3 +DIRECT 2 0 2 2 +DIRE 1 0 1 1 +DIP 2 0 2 2 +DINSMORE 2 0 2 2 +DINNERS 1 0 1 1 +DINNER 6 0 6 6 +DINKS 2 0 2 2 +DINING 1 0 1 1 +DINE 2 0 2 2 +DIMPLED 1 0 1 1 +DIMLY 1 0 1 1 +DIMINISHED 1 0 1 1 +DIMINISH 1 0 1 1 +DIM 2 0 2 2 +DILUTE 1 0 1 1 +DILIGENTLY 1 0 1 1 +DILAPIDATED 1 0 1 1 +DIGNITY 4 0 4 4 +DIGGING 1 0 1 1 +DIGGERS 2 0 2 2 +DIGESTION 3 0 3 3 +DIFFICULTY 7 0 7 7 +DIFFICULT 2 0 2 2 +DIFFERENT 7 0 7 7 +DIFFERENCES 1 0 1 1 +DIFFERENCE 7 0 7 7 +DIFFER 1 0 1 1 +DIED 13 0 13 13 +DIE 10 0 10 10 +DIDST 1 0 1 1 +DICTATED 1 0 1 1 +DICK 5 0 5 5 +DICE 1 0 1 1 +DIAMETER 1 0 1 1 +DIALOGUE 1 0 1 1 +DEVOURED 1 0 1 1 +DEVOUR 1 0 1 1 +DEVOTIONS 1 0 1 1 +DEVOTION 1 0 1 1 +DEVOTED 3 0 3 3 +DEVILS 2 0 2 2 +DEVIL 4 0 4 4 +DEVICE 1 0 1 1 +DEVELOPED 1 0 1 1 +DETERMINED 5 0 5 5 +DETECTIVE'S 1 0 1 1 +DETECTION 1 0 1 1 +DETECTIN 1 0 1 1 +DETECTED 1 0 1 1 +DETAILS 2 0 2 2 +DETAILED 3 0 3 3 +DESTRUCTION 1 0 1 1 +DESTROYS 1 0 1 1 +DESTROYER 1 0 1 1 +DESTROYED 4 0 4 4 +DESTROY 3 0 3 3 +DESTINED 1 0 1 1 +DESTINATION 1 0 1 1 +DESSERT 2 0 2 2 +DESPOTISM 2 0 2 2 +DESPOTIC 1 0 1 1 +DESPOILED 1 0 1 1 +DESPITE 3 0 3 3 +DESPISED 1 0 1 1 +DESPISE 1 0 1 1 +DESPERATELY 1 0 1 1 +DESPERATE 2 0 2 2 +DESPAIR 2 0 2 2 +DESK 2 0 2 2 +DESIRES 3 0 3 3 +DESIRED 2 0 2 2 +DESIRE 9 0 9 9 +DESIRABLE 1 0 1 1 +DESIGNED 2 0 2 2 +DESIGN 2 0 2 2 +DESERVING 1 0 1 1 +DESERVES 1 0 1 1 +DESERVE 2 0 2 2 +DESERTING 1 0 1 1 +DESERTED 2 0 2 2 +DESERT 1 0 1 1 +DESCRIPTION 2 0 2 2 +DESCRIBED 1 0 1 1 +DESCRIBE 1 0 1 1 +DESCEND 1 0 1 1 +DERIVE 1 0 1 1 +DERELICTS 1 0 1 1 +DEPRESSION 1 0 1 1 +DEPRECATINGLY 1 0 1 1 +DEPRECATE 1 0 1 1 +DEPOSITED 1 0 1 1 +DEPOSED 1 0 1 1 +DEPLORED 1 0 1 1 +DEPENDS 2 0 2 2 +DEPENDENCE 1 0 1 1 +DEPEND 1 0 1 1 +DEPARTURE 2 0 2 2 +DEPARTMENT 2 0 2 2 +DEPARTING 1 0 1 1 +DEPARTED 3 0 3 3 +DENY 1 0 1 1 +DENOUNCED 1 0 1 1 +DENOTING 1 0 1 1 +DENIS 2 0 2 2 +DENIAL 1 0 1 1 +DEN 1 0 1 1 +DEMANDS 3 0 3 3 +DEMANDED 1 0 1 1 +DELUSION 2 0 2 2 +DELIVERY 1 0 1 1 +DELIVERER 1 0 1 1 +DELIVERED 4 0 4 4 +DELIVER 2 0 2 2 +DELIGHTFUL 2 0 2 2 +DELIGHTED 2 0 2 2 +DELIGHT 7 0 7 7 +DELICIOUSLY 1 0 1 1 +DELICATE 3 0 3 3 +DELIBERATELY 1 0 1 1 +DELIBERATE 1 0 1 1 +DELAYED 1 0 1 1 +DELAY 3 0 3 3 +DEJECTION 1 0 1 1 +DEITY 1 0 1 1 +DEGREE 1 0 1 1 +DEGENERATING 1 0 1 1 +DEFYING 1 0 1 1 +DEFRAUD 1 0 1 1 +DEFORMED 2 0 2 2 +DEFINED 1 0 1 1 +DEFIANT 1 0 1 1 +DEFERENCE 1 0 1 1 +DEFENDING 1 0 1 1 +DEFENDERS 2 0 2 2 +DEFEND 4 0 4 4 +DEFECTION 1 0 1 1 +DEFEAT 1 0 1 1 +DEEPLY 3 0 3 3 +DEEP 9 0 9 9 +DEEMED 1 0 1 1 +DECORATION 1 0 1 1 +DECLINING 1 0 1 1 +DECLINED 1 0 1 1 +DECLARING 1 0 1 1 +DECLARED 1 0 1 1 +DECKS 1 0 1 1 +DECK 6 0 6 6 +DECISION 3 0 3 3 +DECIDED 5 0 5 5 +DECIDE 2 0 2 2 +DECEPTION 1 0 1 1 +DECEMBER 2 0 2 2 +DECEIVED 5 0 5 5 +DECEIVE 1 0 1 1 +DECEASED 1 0 1 1 +DECAY 1 0 1 1 +DEBTS 1 0 1 1 +DEBATED 1 0 1 1 +DEBATE 2 0 2 2 +DEBARRED 1 0 1 1 +DEATHS 1 0 1 1 +DEATHLY 1 0 1 1 +DEARLY 2 0 2 2 +DEARER 1 0 1 1 +DEALT 2 0 2 2 +DEALER 1 0 1 1 +DEAL 11 0 11 11 +DAZED 1 0 1 1 +DAYLIGHT 2 0 2 2 +DAYBREAK 2 0 2 2 +DAY'S 1 0 1 1 +DAWNED 2 0 2 2 +DAWN 4 0 4 4 +DAVID 2 0 2 2 +DAUNTED 1 0 1 1 +DAUGHTER'S 1 0 1 1 +DAUGHTER 10 0 10 10 +DASHING 1 0 1 1 +DASHED 1 0 1 1 +DARTED 1 0 1 1 +DARKNESS 7 0 7 7 +DARING 1 0 1 1 +DARED 3 0 3 3 +DARE 6 0 6 6 +DARCY'S 1 0 1 1 +DARCY 6 0 6 6 +DAPHNE'S 1 0 1 1 +DANGERS 1 0 1 1 +DANGEROUS 2 0 2 2 +DANGER 11 0 11 11 +DANDY 1 0 1 1 +DANDAN 3 0 3 3 +DANCER 1 0 1 1 +DANCE 2 0 2 2 +DAMPNESS 1 0 1 1 +DAMNED 1 0 1 1 +DAMES 1 0 1 1 +DAME'S 1 0 1 1 +DAMASCUS 4 0 4 4 +DAM 1 0 1 1 +DAILY 2 0 2 2 +DADDY 1 0 1 1 +CYNTHIA 2 0 2 2 +CYNICISM 1 0 1 1 +CYMBALS 1 0 1 1 +CUTTER'S 1 0 1 1 +CUTTER 3 0 3 3 +CUT 12 0 12 12 +CUSTOM 2 0 2 2 +CUSHION 1 0 1 1 +CURVED 2 0 2 2 +CURTAINS 2 0 2 2 +CURSORILY 1 0 1 1 +CURSES 1 0 1 1 +CURSED 2 0 2 2 +CURRENT 1 0 1 1 +CURRENCY 1 0 1 1 +CURL 1 0 1 1 +CURIOUS 4 0 4 4 +CURED 1 0 1 1 +CURE 4 0 4 4 +CURATE 2 0 2 2 +CUPBOARD 2 0 2 2 +CUP 1 0 1 1 +CULTURED 1 0 1 1 +CULTURE 1 0 1 1 +CULTIVATED 2 0 2 2 +CULT 1 0 1 1 +CUISINE 1 0 1 1 +CUBITS 1 0 1 1 +CRYSTALLINE 1 0 1 1 +CRYING 1 0 1 1 +CRY 2 0 2 2 +CRUSHING 2 0 2 2 +CRUSHED 1 0 1 1 +CRUSADER 1 0 1 1 +CRUMPLED 2 0 2 2 +CRUMBLY 1 0 1 1 +CRUELTY 4 0 4 4 +CRUEL 4 0 4 4 +CRUDE 1 0 1 1 +CROWNED 1 0 1 1 +CROWN 3 0 3 3 +CROWDED 2 0 2 2 +CROWD 5 0 5 5 +CROSSED 5 0 5 5 +CROOKS 1 0 1 1 +CROOKED 1 0 1 1 +CROAKING 1 0 1 1 +CRITICS 1 0 1 1 +CRITICAL 2 0 2 2 +CRIMSON 1 0 1 1 +CRIMINALS 1 0 1 1 +CRIMINAL 1 0 1 1 +CRIME 5 0 5 5 +CRIED 21 0 21 21 +CRICKETS 1 0 1 1 +CREPT 1 0 1 1 +CREEPY 1 0 1 1 +CREEPING 1 0 1 1 +CREEL 1 0 1 1 +CREDITS 3 0 3 3 +CREDIT 5 0 5 5 +CREATURES 2 0 2 2 +CREATURE 4 0 4 4 +CREATOR 4 0 4 4 +CREATIONS 1 0 1 1 +CREATING 1 0 1 1 +CREATED 3 0 3 3 +CREATE 3 0 3 3 +CREASES 1 0 1 1 +CREASED 1 0 1 1 +CREAKED 1 0 1 1 +CRAYFISH 3 0 3 3 +CRAWLED 2 0 2 2 +CRAWFISH 7 0 7 7 +CRATES 2 0 2 2 +CRASHED 1 0 1 1 +CRASH 1 0 1 1 +CRAFT 1 0 1 1 +CRACKERS 1 0 1 1 +CRACKED 2 0 2 2 +COY 2 0 2 2 +COWARDS 1 0 1 1 +COWARD 1 0 1 1 +COVERING 1 0 1 1 +COVERED 5 0 5 5 +COVER 1 0 1 1 +COVE 1 0 1 1 +COUSINS 1 0 1 1 +COUSIN 10 0 10 10 +COURSING 1 0 1 1 +COURSE 16 0 16 16 +COURFEYRAC 2 0 2 2 +COURAGE 4 0 4 4 +COUPLETS 1 0 1 1 +COUPLE 2 0 2 2 +COUNTY 9 0 9 9 +COUNTRY 17 0 17 17 +COUNTRIES 4 0 4 4 +COUNTESS 1 0 1 1 +COUNTER 1 0 1 1 +COUNTENANCE 2 0 2 2 +COUNT'S 2 0 2 2 +COUNSELLED 1 0 1 1 +COULDN'T 7 0 7 7 +COUGHING 2 0 2 2 +COUGH 3 0 3 3 +COTTONY 1 0 1 1 +COSTUME 1 0 1 1 +COST 2 0 2 2 +COSETTE 2 0 2 2 +CORTONA 1 0 1 1 +CORSICAN 1 0 1 1 +CORRIDOR 2 0 2 2 +CORRESPONDENCE 1 0 1 1 +CORRECT 1 0 1 1 +CORPSES 1 0 1 1 +CORPSE 3 0 3 3 +CORPORATIONS 1 0 1 1 +CORNERS 1 0 1 1 +CORDIAL 1 0 1 1 +COPY 1 0 1 1 +COPPER 2 0 2 2 +COP'S 1 0 1 1 +COP 3 0 3 3 +COOLNESS 2 0 2 2 +COOL 5 0 5 5 +COOKING 1 0 1 1 +COOK 4 0 4 4 +CONVINCING 1 0 1 1 +CONVINCED 1 0 1 1 +CONVICTION 3 0 3 3 +CONVEYANCE 1 0 1 1 +CONVERTS 1 0 1 1 +CONVERSATION 10 0 10 10 +CONVENTIONS 1 0 1 1 +CONVENTION 1 0 1 1 +CONVENT 4 0 4 4 +CONVENIENCES 1 0 1 1 +CONTROLLED 1 0 1 1 +CONTROL 2 0 2 2 +CONTRIVE 1 0 1 1 +CONTRARY 5 0 5 5 +CONTRADICTION 1 0 1 1 +CONTRACTED 1 0 1 1 +CONTRACT 3 0 3 3 +CONTINUED 11 0 11 11 +CONTINUE 3 0 3 3 +CONTINUATION 1 0 1 1 +CONTINUANCE 1 0 1 1 +CONTINUALLY 1 0 1 1 +CONTINGENT 1 0 1 1 +CONTENTS 1 0 1 1 +CONTENTION 1 0 1 1 +CONTENTED 1 0 1 1 +CONTENT 1 0 1 1 +CONTEND 2 0 2 2 +CONTEMPORARY 2 0 2 2 +CONTAINS 1 0 1 1 +CONTAINING 2 0 2 2 +CONTAINED 1 0 1 1 +CONTAIN 1 0 1 1 +CONTAGIOUS 2 0 2 2 +CONTACT 3 0 3 3 +CONSUMED 2 0 2 2 +CONSULTED 3 0 3 3 +CONSULTATIONS 1 0 1 1 +CONSTRUCT 1 0 1 1 +CONSTRAINED 1 0 1 1 +CONSTITUTE 1 0 1 1 +CONSTITUENT 1 0 1 1 +CONSTANTLY 3 0 3 3 +CONSTANTIUS 1 0 1 1 +CONSTANT 4 0 4 4 +CONSPIRATORS 2 0 2 2 +CONSPIRACY 1 0 1 1 +CONSORTED 1 0 1 1 +CONSOLES 1 0 1 1 +CONSISTS 2 0 2 2 +CONSISTENCY 1 0 1 1 +CONSISTED 1 0 1 1 +CONSIDERING 2 0 2 2 +CONSIDERED 3 0 3 3 +CONSIDERATION 2 0 2 2 +CONSIDERABLE 6 0 6 6 +CONSIDER 1 0 1 1 +CONSERVATIVE 2 0 2 2 +CONSEQUENCES 1 0 1 1 +CONSEQUENCE 1 0 1 1 +CONSENTED 1 0 1 1 +CONSENT 2 0 2 2 +CONSCIOUSNESS 2 0 2 2 +CONSCIOUSLY 1 0 1 1 +CONSCIENTIOUS 1 0 1 1 +CONSCIENCES 1 0 1 1 +CONSCIENCE 3 0 3 3 +CONQUEST 3 0 3 3 +CONQUEROR 1 0 1 1 +CONQUERING 1 0 1 1 +CONQUERED 2 0 2 2 +CONQUER 1 0 1 1 +CONNOISSEUR 1 0 1 1 +CONNECTIONS 1 0 1 1 +CONNECTION 4 0 4 4 +CONNECT 2 0 2 2 +CONJECTURES 1 0 1 1 +CONGRESSES 1 0 1 1 +CONGRESS 3 0 3 3 +CONGEALETH 1 0 1 1 +CONFUSION 4 0 4 4 +CONFOUND 1 0 1 1 +CONFLICT 2 0 2 2 +CONFISCATION 1 0 1 1 +CONFIRMS 1 0 1 1 +CONFIRMED 2 0 2 2 +CONFINEMENT 1 0 1 1 +CONFIDENTIAL 1 0 1 1 +CONFIDENCE 3 0 3 3 +CONFESSION 4 0 4 4 +CONFESSED 2 0 2 2 +CONFESS 9 0 9 9 +CONFERRING 1 0 1 1 +CONFERENCE 1 0 1 1 +CONFECTIONS 1 0 1 1 +CONFECTIONER 1 0 1 1 +CONDUCTED 2 0 2 2 +CONDUCT 4 0 4 4 +CONDITIONS 4 0 4 4 +CONDITION 4 0 4 4 +CONDESCEND 1 0 1 1 +CONDEMNED 2 0 2 2 +CONCLUSION 2 0 2 2 +CONCILIATE 1 0 1 1 +CONCIERGE'S 1 0 1 1 +CONCERNS 1 0 1 1 +CONCERNING 1 0 1 1 +CONCERN 2 0 2 2 +CONCEPTION 5 0 5 5 +CONCENTRATED 2 0 2 2 +CONCENTRATE 1 0 1 1 +CONCEIVE 1 0 1 1 +CONCEITED 1 0 1 1 +CONCEAL 3 0 3 3 +COMTE 1 0 1 1 +COMRADE 3 0 3 3 +COMPULSORY 1 0 1 1 +COMPREHENDED 1 0 1 1 +COMPOUND 1 0 1 1 +COMPOSURE 1 0 1 1 +COMPOSITION 1 0 1 1 +COMPOSED 1 0 1 1 +COMPLY 1 0 1 1 +COMPLIMENT 1 0 1 1 +COMPLICITY 1 0 1 1 +COMPLETELY 6 0 6 6 +COMPLETED 1 0 1 1 +COMPLETE 1 0 1 1 +COMPLAIN 1 0 1 1 +COMPETITION 1 0 1 1 +COMPELLING 1 0 1 1 +COMPELLED 1 0 1 1 +COMPATRIOT 1 0 1 1 +COMPASS 1 0 1 1 +COMPARATIVELY 1 0 1 1 +COMPANY 13 0 13 13 +COMPANIONS 3 0 3 3 +COMPANION'S 1 0 1 1 +COMPANION 4 0 4 4 +COMMUNICATION 2 0 2 2 +COMMUNICATES 2 0 2 2 +COMMUNICATED 1 0 1 1 +COMMUNICANTS 1 0 1 1 +COMMONWEALTH 2 0 2 2 +COMMONS 3 0 3 3 +COMMONLY 1 0 1 1 +COMMONERS 1 0 1 1 +COMMON 3 0 3 3 +COMMITTEE 6 0 6 6 +COMMITTED 4 0 4 4 +COMMISSIONED 1 0 1 1 +COMMISSION 1 0 1 1 +COMMISSARY 2 0 2 2 +COMMENT 2 0 2 2 +COMMENDING 1 0 1 1 +COMMENDED 1 0 1 1 +COMMENCED 2 0 2 2 +COMMANDS 2 0 2 2 +COMMANDING 2 0 2 2 +COMMANDER 2 0 2 2 +COMMANDED 2 0 2 2 +COMMAND 2 0 2 2 +COMICAL 1 0 1 1 +COMFORTABLE 2 0 2 2 +COMFORT 2 0 2 2 +COMETH 1 0 1 1 +COMBATIVE 1 0 1 1 +COMBAT 2 0 2 2 +COMB 1 0 1 1 +COLOURED 1 0 1 1 +COLOSSAL 1 0 1 1 +COLONELS 1 0 1 1 +COLONEL 28 0 28 28 +COLOGNE 1 0 1 1 +COLLECTOR'S 1 0 1 1 +COLLECTOR 1 0 1 1 +COLLECTION 1 0 1 1 +COLLECTING 2 0 2 2 +COLLECT 1 0 1 1 +COLLAR 2 0 2 2 +COLIC 1 0 1 1 +COLDS 1 0 1 1 +COLD 6 0 6 6 +COLCHESTER 5 0 5 5 +COINS 2 0 2 2 +COINCIDENCES 1 0 1 1 +COIN 2 0 2 2 +COIL 1 0 1 1 +COFFIN 20 0 20 20 +COFFEE 1 0 1 1 +COCOA 1 0 1 1 +COCKING 1 0 1 1 +COAT 2 0 2 2 +COAST 2 0 2 2 +COACH 3 0 3 3 +CLUTCHING 1 0 1 1 +CLUTCH 1 0 1 1 +CLUNG 1 0 1 1 +CLUBBED 1 0 1 1 +CLUB 3 0 3 3 +CLOVER 1 0 1 1 +CLOUDS 1 0 1 1 +CLOTHES 8 0 8 8 +CLOTHE 1 0 1 1 +CLOTH 2 0 2 2 +CLOSING 2 0 2 2 +CLOSET 2 0 2 2 +CLOSEST 1 0 1 1 +CLOSES 1 0 1 1 +CLOSER 1 0 1 1 +CLOSELY 5 0 5 5 +CLOSED 4 0 4 4 +CLOSE 14 0 14 14 +CLOISTER 3 0 3 3 +CLOGGED 2 0 2 2 +CLOCKS 1 0 1 1 +CLING 1 0 1 1 +CLIME 2 0 2 2 +CLIMB 1 0 1 1 +CLIFF 4 0 4 4 +CLEVER 3 0 3 3 +CLERK 2 0 2 2 +CLERICAL 2 0 2 2 +CLENCHING 1 0 1 1 +CLEMENT 1 0 1 1 +CLEMENCY 1 0 1 1 +CLEFT 2 0 2 2 +CLEAVE 1 0 1 1 +CLEARLY 1 0 1 1 +CLEARER 1 0 1 1 +CLEARED 2 0 2 2 +CLEAR 7 0 7 7 +CLEANED 2 0 2 2 +CLEAN 4 0 4 4 +CLAWS 1 0 1 1 +CLASPED 1 0 1 1 +CLASP 1 0 1 1 +CLASHING 1 0 1 1 +CLAPPED 1 0 1 1 +CLANKING 1 0 1 1 +CLAIR 3 0 3 3 +CLAIMS 1 0 1 1 +CLAIMED 1 0 1 1 +CIVILIZED 1 0 1 1 +CIVILITY 1 0 1 1 +CIVILITIES 1 0 1 1 +CITY 16 0 16 16 +CITIZENS 6 0 6 6 +CITIZEN 1 0 1 1 +CIRCUMSTANTIAL 1 0 1 1 +CIRCUMSTANCES 6 0 6 6 +CIRCULAR 1 0 1 1 +CIRCUIT 1 0 1 1 +CIRCLES 1 0 1 1 +CIRCLE 2 0 2 2 +CINDERELLA 1 0 1 1 +CILLEY 1 0 1 1 +CIGARS 1 0 1 1 +CIGAR 1 0 1 1 +CIDER 1 0 1 1 +CHURCHYARDS 1 0 1 1 +CHURCH 14 0 14 14 +CHUCKLED 3 0 3 3 +CHUCKED 1 0 1 1 +CHRYSIPPUS 2 0 2 2 +CHRISTMAS 1 0 1 1 +CHRISTIANS 1 0 1 1 +CHRISTIANITY 3 0 3 3 +CHRISTI 1 0 1 1 +CHRISTENING 1 0 1 1 +CHRIS'S 1 0 1 1 +CHOSEN 3 0 3 3 +CHOSE 2 0 2 2 +CHOP 1 0 1 1 +CHOOSE 3 0 3 3 +CHOKE 1 0 1 1 +CHOIR 2 0 2 2 +CHOICE 1 0 1 1 +CHINTZ 1 0 1 1 +CHINESE 1 0 1 1 +CHIN 1 0 1 1 +CHIMNEY 7 0 7 7 +CHIMES 1 0 1 1 +CHILLED 1 0 1 1 +CHILDREN 13 0 13 13 +CHILDLESS 1 0 1 1 +CHILDHOOD 1 0 1 1 +CHILD'S 1 0 1 1 +CHIEF 9 0 9 9 +CHEWERS 2 0 2 2 +CHERISHED 1 0 1 1 +CHEFS 1 0 1 1 +CHEESE 1 0 1 1 +CHEERFULNESS 1 0 1 1 +CHEERFULLY 1 0 1 1 +CHEERFUL 4 0 4 4 +CHEEKED 1 0 1 1 +CHECKING 1 0 1 1 +CHECKED 1 0 1 1 +CHEATING 1 0 1 1 +CHEAPLY 1 0 1 1 +CHEAP 1 0 1 1 +CHATTING 1 0 1 1 +CHASSEUR 1 0 1 1 +CHASM 1 0 1 1 +CHASED 1 0 1 1 +CHARMS 1 0 1 1 +CHARMING 3 0 3 3 +CHARMED 2 0 2 2 +CHARM 3 0 3 3 +CHARLES 2 0 2 2 +CHARITY 1 0 1 1 +CHARIOT 1 0 1 1 +CHARGES 2 0 2 2 +CHARGER 1 0 1 1 +CHARGED 5 0 5 5 +CHARGE 8 0 8 8 +CHARCOAL 1 0 1 1 +CHARACTER 6 0 6 6 +CHAPTERS 1 0 1 1 +CHAPS 1 0 1 1 +CHAPLET 1 0 1 1 +CHAPEL 6 0 6 6 +CHAP 1 0 1 1 +CHANTED 1 0 1 1 +CHANT 1 0 1 1 +CHANNEL 3 0 3 3 +CHANGING 2 0 2 2 +CHANGED 4 0 4 4 +CHANGE 9 0 9 9 +CHANCES 1 0 1 1 +CHANCELLOR'S 1 0 1 1 +CHANCELLOR 6 0 6 6 +CHANCE 11 0 11 11 +CHAMPIONS 1 0 1 1 +CHAMPAGNE 1 0 1 1 +CHAMBERLAIN 6 0 6 6 +CHAMBER 5 0 5 5 +CHAIR 5 0 5 5 +CHAIN 1 0 1 1 +CHAFING 2 0 2 2 +CHADWELL 1 0 1 1 +CETERA 2 0 2 2 +CESSATION 1 0 1 1 +CERTIFIED 1 0 1 1 +CERTAINLY 13 0 13 13 +CERTAIN 16 0 16 16 +CENTURY 3 0 3 3 +CENTURIES 5 0 5 5 +CENTRES 1 0 1 1 +CENTRE 1 0 1 1 +CENTER 3 0 3 3 +CENT 1 0 1 1 +CEMETERY 1 0 1 1 +CELLARS 1 0 1 1 +CELL 2 0 2 2 +CELIA 1 0 1 1 +CELERY 1 0 1 1 +CELEBRATED 3 0 3 3 +CEDRIC 2 0 2 2 +CEASED 7 0 7 7 +CAVALRY 1 0 1 1 +CAUTIOUSLY 1 0 1 1 +CAUTION 1 0 1 1 +CAUSED 1 0 1 1 +CAUSE 7 0 7 7 +CAUGHT 5 0 5 5 +CATS 2 0 2 2 +CATHOLIC 1 0 1 1 +CATHEDRAL 1 0 1 1 +CATCHING 1 0 1 1 +CATCH 6 0 6 6 +CAT 3 0 3 3 +CASTRATO 2 0 2 2 +CASTLE 9 0 9 9 +CASTING 2 0 2 2 +CASTETH 1 0 1 1 +CASKET 1 0 1 1 +CASHIER 1 0 1 1 +CASES 2 0 2 2 +CARVED 3 0 3 3 +CARTHUSIANS 1 0 1 1 +CART 3 0 3 3 +CARS 1 0 1 1 +CARRY 7 0 7 7 +CARROT 1 0 1 1 +CARRIED 12 0 12 12 +CARRIAGE 3 0 3 3 +CARPET 1 0 1 1 +CARPENTER 1 0 1 1 +CAROLINA 1 0 1 1 +CARGO 1 0 1 1 +CAREWORN 2 0 2 2 +CARESSES 1 0 1 1 +CAREFULLY 3 0 3 3 +CAREFUL 4 0 4 4 +CARED 4 0 4 4 +CARE 18 0 18 18 +CARDS 1 0 1 1 +CARDINALS 1 0 1 1 +CARBONATE 1 0 1 1 +CARAVAN 1 0 1 1 +CAPTURED 2 0 2 2 +CAPTURE 3 0 3 3 +CAPTOR 1 0 1 1 +CAPTIVE 3 0 3 3 +CAPTAIN'S 1 0 1 1 +CAPITULUM 1 0 1 1 +CAPITAL 3 0 3 3 +CAPERING 1 0 1 1 +CAPERED 1 0 1 1 +CAPABLE 2 0 2 2 +CAPABILITIES 1 0 1 1 +CAP 5 0 5 5 +CANST 1 0 1 1 +CANS 1 0 1 1 +CANONIZED 1 0 1 1 +CANOE 1 0 1 1 +CANDLESTICKS 1 0 1 1 +CANDLESTICK 2 0 2 2 +CANDLES 1 0 1 1 +CANDLE 3 0 3 3 +CANAL 1 0 1 1 +CAN'T 16 0 16 16 +CAMPED 1 0 1 1 +CAMPAIGNS 2 0 2 2 +CAMP 1 0 1 1 +CAMOUFLAGE 1 0 1 1 +CAMEL 2 0 2 2 +CAME 66 0 66 66 +CALMLY 2 0 2 2 +CALLS 1 0 1 1 +CALLING 2 0 2 2 +CALLETH 1 0 1 1 +CALLEST 1 0 1 1 +CALLED 24 0 24 24 +CALL 13 0 13 13 +CALIPH 1 0 1 1 +CALIFORNIAN 2 0 2 2 +CALENDAR 1 0 1 1 +CALCULATE 1 0 1 1 +CAIRO 2 0 2 2 +CAFE 1 0 1 1 +CAESARS 1 0 1 1 +CADET 1 0 1 1 +CABLE'S 1 0 1 1 +CABINET 2 0 2 2 +CABIN 4 0 4 4 +CABARET 1 0 1 1 +C 3 0 3 3 +BUYING 2 0 2 2 +BUTTERFLIES 1 0 1 1 +BUTTER 5 0 5 5 +BUSY 5 0 5 5 +BUSTED 2 0 2 2 +BUST 1 0 1 1 +BUSINESSES 1 0 1 1 +BUSHY 2 0 2 2 +BUSHES 1 0 1 1 +BURYING 1 0 1 1 +BURSTING 1 0 1 1 +BURST 3 0 3 3 +BURNING 1 0 1 1 +BURNETH 1 0 1 1 +BURIED 7 0 7 7 +BURIAL 1 0 1 1 +BURGUNDY 1 0 1 1 +BUNKER 1 0 1 1 +BUNDLES 2 0 2 2 +BUNDLED 1 0 1 1 +BUMS 1 0 1 1 +BULLOCK 1 0 1 1 +BULLET 2 0 2 2 +BULK 2 0 2 2 +BUILT 1 0 1 1 +BUILDS 1 0 1 1 +BUILDINGS 1 0 1 1 +BUILDING 3 0 3 3 +BUGLE 1 0 1 1 +BUGGY 2 0 2 2 +BUFFETING 1 0 1 1 +BUFF 1 0 1 1 +BUD 1 0 1 1 +BUCKLEY 1 0 1 1 +BUCK 1 0 1 1 +BUBBLES 1 0 1 1 +BRUTE 2 0 2 2 +BRUTALLY 1 0 1 1 +BRUTAL 1 0 1 1 +BRUSHED 2 0 2 2 +BRUSH 3 0 3 3 +BRUISING 1 0 1 1 +BRUCE 3 0 3 3 +BROUGHT 11 0 11 11 +BROTHERLY 1 0 1 1 +BROTHER 18 0 18 18 +BROTH 1 0 1 1 +BRONZE 1 0 1 1 +BROKER'S 1 0 1 1 +BROKEN 10 0 10 10 +BROKE 7 0 7 7 +BROAD 3 0 3 3 +BRITISH 1 0 1 1 +BRINGING 3 0 3 3 +BRINGETH 2 0 2 2 +BRIM 1 0 1 1 +BRIGHT 5 0 5 5 +BRIGANDS 1 0 1 1 +BRIEF 3 0 3 3 +BRIDGE 4 0 4 4 +BRIDE 3 0 3 3 +BRICKS 1 0 1 1 +BRICK 1 0 1 1 +BREWING 1 0 1 1 +BRETHREN 3 0 3 3 +BREED 1 0 1 1 +BRED 1 0 1 1 +BREATHLESS 2 0 2 2 +BREATHING 1 0 1 1 +BREATH 6 0 6 6 +BREASTS 1 0 1 1 +BREAKS 3 0 3 3 +BREAKING 3 0 3 3 +BREAKFAST 4 0 4 4 +BREAKERS 1 0 1 1 +BREAK 6 0 6 6 +BREAD 3 0 3 3 +BREACH 1 0 1 1 +BRAVELY 2 0 2 2 +BRAVE 6 0 6 6 +BRANDON 1 0 1 1 +BRANCHES 2 0 2 2 +BRANCH 3 0 3 3 +BRAG 1 0 1 1 +BRADFORD 1 0 1 1 +BRACKETS 1 0 1 1 +BRACELET 1 0 1 1 +BOYS 8 0 8 8 +BOXES 2 0 2 2 +BOX 10 0 10 10 +BOWS 1 0 1 1 +BOWL 1 0 1 1 +BOWED 2 0 2 2 +BOW 3 0 3 3 +BOURGES 1 0 1 1 +BOURGEOIS 1 0 1 1 +BOUQUET 2 0 2 2 +BOUNTY 1 0 1 1 +BOUND 5 0 5 5 +BOULEVARD 1 0 1 1 +BOUGHT 4 0 4 4 +BOTTLED 1 0 1 1 +BOTTLE 4 0 4 4 +BOTHERED 1 0 1 1 +BOTH 17 0 17 17 +BOSTON 3 0 3 3 +BOSOM 3 0 3 3 +BORROWED 1 0 1 1 +BORN 7 0 7 7 +BORED 1 0 1 1 +BORE 2 0 2 2 +BORDERS 1 0 1 1 +BORDER 1 0 1 1 +BOOTY 1 0 1 1 +BOOTS 2 0 2 2 +BOOT 1 0 1 1 +BOON 2 0 2 2 +BOOMED 2 0 2 2 +BOOM 1 0 1 1 +BOOKS 1 0 1 1 +BOOKLET 1 0 1 1 +BONNETS 2 0 2 2 +BONNET 1 0 1 1 +BONDAGE 2 0 2 2 +BOMB 1 0 1 1 +BOLTS 1 0 1 1 +BOLTED 1 0 1 1 +BOLT 1 0 1 1 +BOLSHEVIKI 3 0 3 3 +BOLDER 1 0 1 1 +BOILING 3 0 3 3 +BOILER 1 0 1 1 +BOILED 2 0 2 2 +BOIL 5 0 5 5 +BOEUF 2 0 2 2 +BODY 13 0 13 13 +BODILY 3 0 3 3 +BODIES 2 0 2 2 +BOB'S 2 0 2 2 +BOB 5 0 5 5 +BOAT'S 1 0 1 1 +BOAT 8 0 8 8 +BOAST 1 0 1 1 +BOARDING 1 0 1 1 +BLURTED 1 0 1 1 +BLUNTLY 1 0 1 1 +BLUNTED 1 0 1 1 +BLUE 7 0 7 7 +BLUBBERING 1 0 1 1 +BLOWS 1 0 1 1 +BLOWN 2 0 2 2 +BLOWING 2 0 2 2 +BLOW 4 0 4 4 +BLOSSOM 1 0 1 1 +BLOOMIN 1 0 1 1 +BLOOM 1 0 1 1 +BLOODSHED 1 0 1 1 +BLOKE 1 0 1 1 +BLOCK 2 0 2 2 +BLIZZARD'S 1 0 1 1 +BLIZZARD 2 0 2 2 +BLINKED 2 0 2 2 +BLINDNESS 1 0 1 1 +BLINDING 1 0 1 1 +BLINDED 1 0 1 1 +BLIND 5 0 5 5 +BLEW 1 0 1 1 +BLESSINGS 1 0 1 1 +BLESSING 2 0 2 2 +BLESS 3 0 3 3 +BLEND 1 0 1 1 +BLEAK 1 0 1 1 +BLAZING 1 0 1 1 +BLANKLY 1 0 1 1 +BLANK 1 0 1 1 +BLAMING 1 0 1 1 +BLAMED 1 0 1 1 +BLAME 2 0 2 2 +BLADE 1 0 1 1 +BLACKSTONE 1 0 1 1 +BLACKGUARD 1 0 1 1 +BLACKBURN 2 0 2 2 +BITTER 3 0 3 3 +BITING 1 0 1 1 +BITE 1 0 1 1 +BIT 11 0 11 11 +BISHOPS 1 0 1 1 +BISHOP 4 0 4 4 +BISCUIT 1 0 1 1 +BIRTHPLACE 1 0 1 1 +BIRTHDAY 1 0 1 1 +BIRTH 1 0 1 1 +BIRD 1 0 1 1 +BIRCH 1 0 1 1 +BIND 1 0 1 1 +BILLS 2 0 2 2 +BILIOUS 1 0 1 1 +BIGGER 1 0 1 1 +BIBLICAL 4 0 4 4 +BIBLE 4 0 4 4 +BEYOND 7 0 7 7 +BEWILDERMENT 1 0 1 1 +BEWARE 2 0 2 2 +BETWIXT 1 0 1 1 +BETWEEN 21 0 21 21 +BETTER 29 0 29 29 +BETRAY 1 0 1 1 +BETOOK 1 0 1 1 +BETIDETH 1 0 1 1 +BETIDE 2 0 2 2 +BETAKEN 1 0 1 1 +BET 1 0 1 1 +BESTOW 3 0 3 3 +BEST 19 0 19 19 +BESSY 9 0 9 9 +BESS 1 0 1 1 +BESPAKE 1 0 1 1 +BESOUGHT 1 0 1 1 +BESIEGERS 2 0 2 2 +BESEECH 2 0 2 2 +BERNARDONE 1 0 1 1 +BERNARD 4 0 4 4 +BEQUEATH 2 0 2 2 +BENT 2 0 2 2 +BENJAMIN 1 0 1 1 +BENEATH 3 0 3 3 +BENCH 3 0 3 3 +BELOW 2 0 2 2 +BELOVED 1 0 1 1 +BELONGS 1 0 1 1 +BELONGED 1 0 1 1 +BELONG 1 0 1 1 +BELLY 3 0 3 3 +BELLS 4 0 4 4 +BELLIES 1 0 1 1 +BELIKE 1 0 1 1 +BELIEVING 1 0 1 1 +BELIEVES 1 0 1 1 +BELIEVED 6 0 6 6 +BELIEVE 16 0 16 16 +BELIEF 5 0 5 5 +BEINGS 3 0 3 3 +BEHOLDING 1 0 1 1 +BEHOLD 5 0 5 5 +BEHIND 16 0 16 16 +BEHAVED 3 0 3 3 +BEHALF 1 0 1 1 +BEGUILED 1 0 1 1 +BEGINNING 6 0 6 6 +BEGGING 2 0 2 2 +BEGGED 9 0 9 9 +BEGGAR 1 0 1 1 +BEGAN 16 0 16 16 +BEG 8 0 8 8 +BEFITTING 1 0 1 1 +BEFALLEN 1 0 1 1 +BEER 2 0 2 2 +BEDROOM 1 0 1 1 +BEDOUIN 1 0 1 1 +BECOME 15 0 15 15 +BECKY 1 0 1 1 +BECAUSE 34 0 34 34 +BEAVER 1 0 1 1 +BEAUTY 4 0 4 4 +BEAUTIFULLY 1 0 1 1 +BEAUTIFUL 8 0 8 8 +BEAUMANOIR 1 0 1 1 +BEATEN 2 0 2 2 +BEAT 6 0 6 6 +BEASTS 4 0 4 4 +BEAST 2 0 2 2 +BEARING 6 0 6 6 +BEARD 1 0 1 1 +BEAMS 2 0 2 2 +BEALE'S 1 0 1 1 +BEAD 1 0 1 1 +BEACON 2 0 2 2 +BEACH 2 0 2 2 +BE 313 0 313 313 +BAY 1 0 1 1 +BAXTER 1 0 1 1 +BATTLE 2 0 2 2 +BATTERY 1 0 1 1 +BATON 1 0 1 1 +BATHING 1 0 1 1 +BATHED 1 0 1 1 +BATH 1 0 1 1 +BAT 1 0 1 1 +BASKING 1 0 1 1 +BASKETS 1 0 1 1 +BASIS 2 0 2 2 +BASIN 1 0 1 1 +BASER 1 0 1 1 +BASEMENT 1 0 1 1 +BASED 2 0 2 2 +BASE 1 0 1 1 +BARS 5 0 5 5 +BARRIER 1 0 1 1 +BARRICADES 1 0 1 1 +BARRED 1 0 1 1 +BARONET 1 0 1 1 +BARKING 1 0 1 1 +BARK 1 0 1 1 +BARIUM 1 0 1 1 +BARBAROUS 1 0 1 1 +BARBARITY 1 0 1 1 +BAR 7 0 7 7 +BAPTIST 1 0 1 1 +BAPTISMAL 1 0 1 1 +BANQUET 3 0 3 3 +BANKER 1 0 1 1 +BANK 9 0 9 9 +BANISH 1 0 1 1 +BANDS 1 0 1 1 +BANDITS 1 0 1 1 +BANDIT 1 0 1 1 +BAND 9 0 9 9 +BALSAM 1 0 1 1 +BALES 2 0 2 2 +BALE 1 0 1 1 +BAKING 1 0 1 1 +BAILEY 2 0 2 2 +BAIL 1 0 1 1 +BAH 1 0 1 1 +BAGS 2 0 2 2 +BAGHDAD 6 0 6 6 +BAGGY 1 0 1 1 +BAGGAGE 1 0 1 1 +BADLY 3 0 3 3 +BADGE 1 0 1 1 +BADE 1 0 1 1 +BADAWI 1 0 1 1 +BACON 1 0 1 1 +BACKING 1 0 1 1 +BACKGROUND 3 0 3 3 +BACK 51 0 51 51 +BACHELOR 1 0 1 1 +BABYLONIA 1 0 1 1 +BABES 1 0 1 1 +AZURE 1 0 1 1 +AWOKE 1 0 1 1 +AWKWARDNESS 1 0 1 1 +AWKWARDLY 1 0 1 1 +AWFUL 4 0 4 4 +AWE 1 0 1 1 +AWAY 39 0 39 39 +AWARE 2 0 2 2 +AWAKENING 2 0 2 2 +AWAKENED 1 0 1 1 +AWAITS 1 0 1 1 +AWAITED 1 0 1 1 +AWAIT 1 0 1 1 +AVOIDED 1 0 1 1 +AVOID 4 0 4 4 +AVERAGE 2 0 2 2 +AVENUE 3 0 3 3 +AUTOMATICALLY 1 0 1 1 +AUTOCRACY 1 0 1 1 +AUTHORITY 10 0 10 10 +AUTHORITIES 1 0 1 1 +AUTHOR 1 0 1 1 +AUTHENTIC 1 0 1 1 +AUSTRIA 1 0 1 1 +AUSPICIOUS 3 0 3 3 +AUNT 6 0 6 6 +AUGMENTED 1 0 1 1 +AUGHT 2 0 2 2 +AUDACIOUS 1 0 1 1 +ATTRACTIVE 3 0 3 3 +ATTRACTED 2 0 2 2 +ATTORNEY 1 0 1 1 +ATTENTIVELY 4 0 4 4 +ATTENDING 1 0 1 1 +ATTEMPTING 1 0 1 1 +ATTEMPTED 3 0 3 3 +ATTEMPT 2 0 2 2 +ATTAINED 1 0 1 1 +ATTACKS 3 0 3 3 +ATTACKED 1 0 1 1 +ATTACK 2 0 2 2 +ATTACHMENT 1 0 1 1 +ATTACHED 1 0 1 1 +ATMOSPHERE 1 0 1 1 +ATE 1 0 1 1 +ASUNDER 1 0 1 1 +ASTONISHMENT 2 0 2 2 +ASTONISHED 1 0 1 1 +ASSYRIAN 2 0 2 2 +ASSUREDLY 1 0 1 1 +ASSURE 8 0 8 8 +ASSURANCE 2 0 2 2 +ASSUME 1 0 1 1 +ASSOCIATIONS 1 0 1 1 +ASSOCIATES 1 0 1 1 +ASSISTED 1 0 1 1 +ASSISTANT 1 0 1 1 +ASSISTANCE 3 0 3 3 +ASSIST 3 0 3 3 +ASSERT 2 0 2 2 +ASSEMBLY 3 0 3 3 +ASSEMBLED 2 0 2 2 +ASSEMBLAGE 1 0 1 1 +ASSAULT 1 0 1 1 +ASSASSINATED 1 0 1 1 +ASSAILED 1 0 1 1 +ASS 3 0 3 3 +ASPECT 1 0 1 1 +ASLEEP 10 0 10 10 +ASKING 5 0 5 5 +ASIDE 5 0 5 5 +ASIA 1 0 1 1 +ASHLEY 5 0 5 5 +ASH 1 0 1 1 +ASCERTAINING 1 0 1 1 +ASCERTAIN 1 0 1 1 +ARTS 1 0 1 1 +ARTISTS 4 0 4 4 +ARTIST 6 0 6 6 +ARTICLES 1 0 1 1 +ARTHUR 1 0 1 1 +ARTFUL 1 0 1 1 +ARRIVES 1 0 1 1 +ARRIVED 4 0 4 4 +ARRIVE 4 0 4 4 +ARRIVAL 1 0 1 1 +ARRESTED 1 0 1 1 +ARRANGING 1 0 1 1 +ARRANGEMENTS 1 0 1 1 +ARRANGED 2 0 2 2 +AROUSED 1 0 1 1 +AROSE 2 0 2 2 +ARMY 19 0 19 19 +ARMS 9 0 9 9 +ARMIES 2 0 2 2 +ARMED 4 0 4 4 +ARM 5 0 5 5 +ARKANSAS 1 0 1 1 +ARKADYEVITCH 1 0 1 1 +ARK 1 0 1 1 +ARISTOCRACY 1 0 1 1 +ARISE 1 0 1 1 +ARGUMENTS 3 0 3 3 +ARGUMENT 1 0 1 1 +ARGUED 1 0 1 1 +ARENA 1 0 1 1 +AREN'T 3 0 3 3 +ARDENT 3 0 3 3 +ARCHITECTURE 1 0 1 1 +ARCHIBALD 1 0 1 1 +ARCHBISHOPS 1 0 1 1 +ARCHBISHOP 2 0 2 2 +ARABIC 1 0 1 1 +ARABIANS 1 0 1 1 +APTITUDE 1 0 1 1 +APT 1 0 1 1 +APRIL 1 0 1 1 +APPROVAL 1 0 1 1 +APPROACHING 1 0 1 1 +APPROACHED 3 0 3 3 +APPREHENSIONS 1 0 1 1 +APPOINTMENT 2 0 2 2 +APPLYING 1 0 1 1 +APPLY 3 0 3 3 +APPLAUSE 1 0 1 1 +APPETITE 2 0 2 2 +APPEARING 1 0 1 1 +APPEARED 8 0 8 8 +APPEARANCE 7 0 7 7 +APPEAR 6 0 6 6 +APPEALING 1 0 1 1 +APPEALED 1 0 1 1 +APPEAL 1 0 1 1 +APPARITION 1 0 1 1 +APPARENTLY 5 0 5 5 +APPARENT 1 0 1 1 +APOMORPHINE 1 0 1 1 +APOLOGY 1 0 1 1 +APES 2 0 2 2 +APERTURE 1 0 1 1 +APARTMENTS 3 0 3 3 +APARTMENT 1 0 1 1 +APART 4 0 4 4 +ANYWHERE 1 0 1 1 +ANYWAY 2 0 2 2 +ANYHOW 2 0 2 2 +ANYBODY 2 0 2 2 +ANXIOUS 3 0 3 3 +ANXIETY 5 0 5 5 +ANVILS 1 0 1 1 +ANTONIO 1 0 1 1 +ANTIQUARIAN'S 1 0 1 1 +ANTIDOTES 1 0 1 1 +ANTICIPATION 1 0 1 1 +ANTICIPATE 1 0 1 1 +ANTHONY 1 0 1 1 +ANSWERS 2 0 2 2 +ANSWERING 2 0 2 2 +ANSWERED 26 0 26 26 +ANSWER 15 0 15 15 +ANOTHER 31 0 31 31 +ANON 1 0 1 1 +ANNOYED 3 0 3 3 +ANNOUNCING 1 0 1 1 +ANNOUNCED 3 0 3 3 +ANNOUNCE 1 0 1 1 +ANNIHILATION 2 0 2 2 +ANNIHILATED 1 0 1 1 +ANNIE'S 2 0 2 2 +ANNE 1 0 1 1 +ANIMATED 2 0 2 2 +ANIMATE 1 0 1 1 +ANIMALS 4 0 4 4 +ANGUISH 4 0 4 4 +ANGER 2 0 2 2 +ANEW 1 0 1 1 +ANDY 1 0 1 1 +ANDREW 2 0 2 2 +ANCIENTS 1 0 1 1 +ANCIENT 2 0 2 2 +ANCHOR 1 0 1 1 +ANCESTORS 1 0 1 1 +ANALYSIS 1 0 1 1 +AMUSING 1 0 1 1 +AMPLY 1 0 1 1 +AMPLE 1 0 1 1 +AMOUR 1 0 1 1 +AMOUNT 1 0 1 1 +AMONGST 5 0 5 5 +AMONG 18 0 18 18 +AMMUNITION 1 0 1 1 +AMISS 1 0 1 1 +AMID 1 0 1 1 +AMIABLE 1 0 1 1 +AMERICAN 1 0 1 1 +AMERICA 1 0 1 1 +AMENDS 1 0 1 1 +AMENDMENT 1 0 1 1 +AMENDED 1 0 1 1 +AMBITIOUS 1 0 1 1 +AMBITIONS 1 0 1 1 +AMBASSADOR 1 0 1 1 +ALTOGETHER 2 0 2 2 +ALTHOUGH 10 0 10 10 +ALTERED 1 0 1 1 +ALTER 1 0 1 1 +ALTAR 10 0 10 10 +ALSO 33 0 33 33 +ALREADY 16 0 16 16 +ALOUD 2 0 2 2 +ALONGSIDE 1 0 1 1 +ALONE 10 0 10 10 +ALOES 2 0 2 2 +ALMS 2 0 2 2 +ALMOST 11 0 11 11 +ALMIGHTY 1 0 1 1 +ALLOWING 1 0 1 1 +ALLOWED 7 0 7 7 +ALLOWANCES 1 0 1 1 +ALLOWANCE 1 0 1 1 +ALLIGATOR 2 0 2 2 +ALLIED 1 0 1 1 +ALLIANCE 1 0 1 1 +ALLEY 1 0 1 1 +ALLEN 1 0 1 1 +ALLAYS 1 0 1 1 +ALLAH'S 1 0 1 1 +ALLAH 9 0 9 9 +ALKALOID 1 0 1 1 +ALIMONY 2 0 2 2 +ALIGHTED 2 0 2 2 +ALI'S 1 0 1 1 +ALEX 1 0 1 1 +ALBERT'S 3 0 3 3 +ALBERT 10 0 10 10 +ALAS 1 0 1 1 +ALARMS 1 0 1 1 +ALARMED 1 0 1 1 +ALARM 3 0 3 3 +ALABAMA 1 0 1 1 +AIM 4 0 4 4 +AILS 1 0 1 1 +AILMENTS 2 0 2 2 +AHEAD 2 0 2 2 +AH 9 0 9 9 +AGREES 1 0 1 1 +AGREEMENT 2 0 2 2 +AGREED 6 0 6 6 +AGREEABLE 2 0 2 2 +AGREE 1 0 1 1 +AGONY 1 0 1 1 +AGONE 1 0 1 1 +AGO 10 0 10 10 +AGITATOR 1 0 1 1 +AGITATION 1 0 1 1 +AGITATING 1 0 1 1 +AGILITY 1 0 1 1 +AGHAST 1 0 1 1 +AGGRESSIVENESS 1 0 1 1 +AGGRAVATIONS 1 0 1 1 +AGGRAVATED 1 0 1 1 +AGES 1 0 1 1 +AGENT 3 0 3 3 +AGED 2 0 2 2 +AGE 4 0 4 4 +AGAINST 27 0 27 27 +AGAIN 56 0 56 56 +AFTERNOON 6 0 6 6 +AFTER 97 0 97 97 +AFT 2 0 2 2 +AFRICAN 1 0 1 1 +AFRAID 9 0 9 9 +AFORESAID 1 0 1 1 +AFIRE 1 0 1 1 +AFFORD 2 0 2 2 +AFFLICTION 1 0 1 1 +AFFIRMED 1 0 1 1 +AFFECTIONS 1 0 1 1 +AFFECTIONATELY 1 0 1 1 +AFFECTION 1 0 1 1 +AFFECTED 2 0 2 2 +AFFAIRS 2 0 2 2 +AFFAIR 2 0 2 2 +ADVISEDLY 1 0 1 1 +ADVISED 1 0 1 1 +ADVISE 1 0 1 1 +ADVICE 2 0 2 2 +ADVENTURE 3 0 3 3 +ADVENT 1 0 1 1 +ADVANTAGES 2 0 2 2 +ADVANTAGE 4 0 4 4 +ADVANCING 1 0 1 1 +ADVANCES 2 0 2 2 +ADVANCED 1 0 1 1 +ADVANCE 5 0 5 5 +ADULT 1 0 1 1 +ADRIFT 1 0 1 1 +ADORNED 1 0 1 1 +ADORN 1 0 1 1 +ADORED 1 0 1 1 +ADMITTED 4 0 4 4 +ADMIT 1 0 1 1 +ADMISSION 1 0 1 1 +ADMIRED 1 0 1 1 +ADMIRATION 5 0 5 5 +ADMIRABLE 1 0 1 1 +ADMINISTRATION 3 0 3 3 +ADMINISTERED 1 0 1 1 +ADJACENT 1 0 1 1 +ADHERENT 1 0 1 1 +ADDRESSING 1 0 1 1 +ADDRESSED 1 0 1 1 +ADDRESS 3 0 3 3 +ADDITION 1 0 1 1 +ADDED 13 0 13 13 +ADD 9 0 9 9 +ADAGE 1 0 1 1 +ACUTE 2 0 2 2 +ACTS 4 0 4 4 +ACTIVITIES 1 0 1 1 +ACTIONS 2 0 2 2 +ACTION 2 0 2 2 +ACTING 2 0 2 2 +ACTED 1 0 1 1 +ACT 7 0 7 7 +ACROSS 8 0 8 8 +ACQUITTAL 1 0 1 1 +ACQUIT 1 0 1 1 +ACQUISITIVE 1 0 1 1 +ACQUIRED 2 0 2 2 +ACQUAINTED 3 0 3 3 +ACQUAINTANCES 1 0 1 1 +ACQUAINTANCE 2 0 2 2 +ACQUAINT 1 0 1 1 +ACKNOWLEDGMENT 1 0 1 1 +ACIDS 1 0 1 1 +ACID 4 0 4 4 +ACHIEVED 1 0 1 1 +ACHED 1 0 1 1 +ACE 1 0 1 1 +ACCUSTOMED 2 0 2 2 +ACCUSING 3 0 3 3 +ACCUSED 1 0 1 1 +ACCUSATION 4 0 4 4 +ACCURATE 1 0 1 1 +ACCOUNTS 2 0 2 2 +ACCOUNTED 1 0 1 1 +ACCOUNT 7 0 7 7 +ACCORDINGLY 5 0 5 5 +ACCORDING 7 0 7 7 +ACCORDANCE 1 0 1 1 +ACCORD 1 0 1 1 +ACCOMPLISHMENTS 1 0 1 1 +ACCOMPLISHED 1 0 1 1 +ACCOMPLICE 2 0 2 2 +ACCOMPANY 1 0 1 1 +ACCOMPANIED 3 0 3 3 +ACCOMMODATING 1 0 1 1 +ACCIDENTS 1 0 1 1 +ACCIDENT 1 0 1 1 +ACCESSION 1 0 1 1 +ACCESSIBLE 1 0 1 1 +ACCESS 1 0 1 1 +ACCEPTED 4 0 4 4 +ACCEPTABLE 1 0 1 1 +ABYSSINIANS 2 0 2 2 +ABUSING 1 0 1 1 +ABUSES 2 0 2 2 +ABUSE 1 0 1 1 +ABUNDANTLY 1 0 1 1 +ABUNDANT 1 0 1 1 +ABSORBING 2 0 2 2 +ABSORBED 4 0 4 4 +ABSORB 1 0 1 1 +ABSOLVED 1 0 1 1 +ABSOLUTELY 3 0 3 3 +ABSOLUTE 1 0 1 1 +ABSENTEE 1 0 1 1 +ABSENTED 1 0 1 1 +ABSENT 1 0 1 1 +ABSENCE 4 0 4 4 +ABRUPTLY 3 0 3 3 +ABOVE 9 0 9 9 +ABOLISH 1 0 1 1 +ABODE 3 0 3 3 +ABNORMAL 1 0 1 1 +ABLE 11 0 11 11 +ABILITY 1 0 1 1 +ABILITIES 1 0 1 1 +ABIDE 1 0 1 1 +ABBOT 1 0 1 1 +ABACK 1 0 1 1 +AARON 1 0 1 1 diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/log-decode-epoch-50_avg-20_use-averaged-model-2024-09-21-17-57-57 b/decoding_results/attention-decoder-rescoring-no-ngram/log-decode-epoch-50_avg-20_use-averaged-model-2024-09-21-17-57-57 new file mode 100644 index 0000000000000000000000000000000000000000..93c541e0b08ed7e3191670a279fe0df6e9270f86 --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/log-decode-epoch-50_avg-20_use-averaged-model-2024-09-21-17-57-57 @@ -0,0 +1,251 @@ +2024-09-21 17:57:57,911 INFO [ctc_decode.py:769] Decoding started +2024-09-21 17:57:57,911 INFO [ctc_decode.py:775] Device: cuda:0 +2024-09-21 17:57:57,911 INFO [ctc_decode.py:776] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'ignore_id': -1, 'label_smoothing': 0.1, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '44a9d5682af9fd3ef77074777e15278ec6d390eb', 'k2-git-date': 'Wed Sep 27 11:22:55 2023', 'lhotse-version': '1.17.0.dev+git.ccfc5b2c.dirty', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'cr-ctc', 'icefall-git-sha1': 'a6eead6c-clean', 'icefall-git-date': 'Mon Sep 9 10:10:08 2024', 'icefall-path': '/star-zw/workspace/zipformer/icefall_cr_ctc', 'k2-path': '/star-zw/workspace/k2/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-zw/workspace/lhotse/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0904151501-7d58788f57-7cktm', 'IP address': '10.30.14.169'}, 'frame_shift_ms': 10, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 50, 'iter': 0, 'avg': 20, 'use_averaged_model': True, 'exp_dir': PosixPath('zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'context_size': 2, 'decoding_method': 'attention-decoder-rescoring-no-ngram', 'num_paths': 100, 'nbest_scale': 1.0, 'hlg_scale': 0.6, 'lm_dir': PosixPath('data/lm'), 'skip_scoring': False, 'num_encoder_layers': '2,2,4,5,4,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1536,2048,1536,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,512,768,512,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,320,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'attention_decoder_dim': 512, 'attention_decoder_num_layers': 6, 'attention_decoder_attention_dim': 512, 'attention_decoder_num_heads': 8, 'attention_decoder_feedforward_dim': 2048, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': False, 'use_ctc': True, 'use_attention_decoder': True, 'use_cr_ctc': True, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram'), 'suffix': 'epoch-50_avg-20_use-averaged-model'} +2024-09-21 17:57:58,323 INFO [lexicon.py:168] Loading pre-compiled data/lang_bpe_500/Linv.pt +2024-09-21 17:58:03,090 INFO [ctc_decode.py:861] About to create model +2024-09-21 17:58:04,386 INFO [ctc_decode.py:928] Calculating the averaged model over epoch range from 30 (excluded) to 50 +2024-09-21 17:58:28,955 INFO [ctc_decode.py:945] Number of model parameters: 174319650 +2024-09-21 17:58:28,955 INFO [asr_datamodule.py:467] About to get test-clean cuts +2024-09-21 17:58:29,094 INFO [asr_datamodule.py:474] About to get test-other cuts +2024-09-21 17:58:32,173 INFO [ctc_decode.py:653] batch 0/?, cuts processed until now is 14 +2024-09-21 17:58:55,303 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([5.2234, 4.6148, 4.6277, 4.7056], device='cuda:0') +2024-09-21 18:01:56,602 INFO [ctc_decode.py:653] batch 100/?, cuts processed until now is 2298 +2024-09-21 18:02:22,203 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,241 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,277 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,313 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,347 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,383 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,432 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,469 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,505 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,541 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,577 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,610 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,645 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,680 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,713 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,749 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,784 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,819 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,854 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,890 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,924 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,959 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:22,994 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,028 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,060 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,094 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,129 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,163 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,262 INFO [utils.py:657] [test-clean_attention_scale_0.01] %WER 2.53% [1329 / 52576, 151 ins, 144 del, 1034 sub ] +2024-09-21 18:02:23,476 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,572 INFO [utils.py:657] [test-clean_attention_scale_0.05] %WER 2.48% [1302 / 52576, 149 ins, 133 del, 1020 sub ] +2024-09-21 18:02:23,784 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:23,879 INFO [utils.py:657] [test-clean_attention_scale_0.08] %WER 2.43% [1278 / 52576, 147 ins, 126 del, 1005 sub ] +2024-09-21 18:02:24,086 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:24,425 INFO [utils.py:657] [test-clean_attention_scale_0.1] %WER 2.39% [1259 / 52576, 143 ins, 124 del, 992 sub ] +2024-09-21 18:02:24,632 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:24,724 INFO [utils.py:657] [test-clean_attention_scale_0.3] %WER 2.28% [1197 / 52576, 130 ins, 113 del, 954 sub ] +2024-09-21 18:02:24,931 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:25,030 INFO [utils.py:657] [test-clean_attention_scale_0.5] %WER 2.17% [1143 / 52576, 124 ins, 102 del, 917 sub ] +2024-09-21 18:02:25,238 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:25,332 INFO [utils.py:657] [test-clean_attention_scale_0.6] %WER 2.13% [1119 / 52576, 125 ins, 96 del, 898 sub ] +2024-09-21 18:02:25,537 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:25,629 INFO [utils.py:657] [test-clean_attention_scale_0.7] %WER 2.08% [1096 / 52576, 124 ins, 88 del, 884 sub ] +2024-09-21 18:02:25,836 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:25,933 INFO [utils.py:657] [test-clean_attention_scale_0.9] %WER 2.05% [1078 / 52576, 122 ins, 85 del, 871 sub ] +2024-09-21 18:02:26,137 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:26,229 INFO [utils.py:657] [test-clean_attention_scale_1.0] %WER 2.04% [1073 / 52576, 123 ins, 84 del, 866 sub ] +2024-09-21 18:02:26,433 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:26,524 INFO [utils.py:657] [test-clean_attention_scale_1.1] %WER 2.04% [1070 / 52576, 123 ins, 83 del, 864 sub ] +2024-09-21 18:02:26,730 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:26,824 INFO [utils.py:657] [test-clean_attention_scale_1.2] %WER 2.02% [1060 / 52576, 121 ins, 83 del, 856 sub ] +2024-09-21 18:02:27,030 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:27,123 INFO [utils.py:657] [test-clean_attention_scale_1.3] %WER 2.01% [1055 / 52576, 121 ins, 82 del, 852 sub ] +2024-09-21 18:02:27,364 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:27,456 INFO [utils.py:657] [test-clean_attention_scale_1.5] %WER 2.00% [1052 / 52576, 121 ins, 79 del, 852 sub ] +2024-09-21 18:02:27,661 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:27,752 INFO [utils.py:657] [test-clean_attention_scale_1.7] %WER 1.98% [1040 / 52576, 122 ins, 75 del, 843 sub ] +2024-09-21 18:02:27,960 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:28,052 INFO [utils.py:657] [test-clean_attention_scale_1.9] %WER 1.98% [1039 / 52576, 122 ins, 74 del, 843 sub ] +2024-09-21 18:02:28,520 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:28,613 INFO [utils.py:657] [test-clean_attention_scale_2.0] %WER 1.98% [1039 / 52576, 122 ins, 73 del, 844 sub ] +2024-09-21 18:02:28,818 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:28,910 INFO [utils.py:657] [test-clean_attention_scale_2.1] %WER 1.97% [1037 / 52576, 121 ins, 72 del, 844 sub ] +2024-09-21 18:02:29,122 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:29,214 INFO [utils.py:657] [test-clean_attention_scale_2.2] %WER 1.97% [1037 / 52576, 122 ins, 71 del, 844 sub ] +2024-09-21 18:02:29,425 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:29,523 INFO [utils.py:657] [test-clean_attention_scale_2.3] %WER 1.97% [1037 / 52576, 122 ins, 71 del, 844 sub ] +2024-09-21 18:02:29,733 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:29,836 INFO [utils.py:657] [test-clean_attention_scale_2.5] %WER 1.97% [1035 / 52576, 122 ins, 70 del, 843 sub ] +2024-09-21 18:02:30,052 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:30,144 INFO [utils.py:657] [test-clean_attention_scale_3.0] %WER 1.96% [1033 / 52576, 122 ins, 70 del, 841 sub ] +2024-09-21 18:02:30,366 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:30,461 INFO [utils.py:657] [test-clean_attention_scale_4.0] %WER 1.97% [1037 / 52576, 122 ins, 71 del, 844 sub ] +2024-09-21 18:02:30,670 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:30,770 INFO [utils.py:657] [test-clean_attention_scale_5.0] %WER 1.96% [1030 / 52576, 119 ins, 71 del, 840 sub ] +2024-09-21 18:02:30,985 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:31,077 INFO [utils.py:657] [test-clean_attention_scale_6.0] %WER 1.96% [1028 / 52576, 119 ins, 71 del, 838 sub ] +2024-09-21 18:02:31,300 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:31,395 INFO [utils.py:657] [test-clean_attention_scale_7.0] %WER 1.96% [1030 / 52576, 120 ins, 71 del, 839 sub ] +2024-09-21 18:02:31,604 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:31,698 INFO [utils.py:657] [test-clean_attention_scale_8.0] %WER 1.96% [1030 / 52576, 120 ins, 71 del, 839 sub ] +2024-09-21 18:02:31,906 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:32,241 INFO [utils.py:657] [test-clean_attention_scale_9.0] %WER 1.96% [1031 / 52576, 121 ins, 71 del, 839 sub ] +2024-09-21 18:02:32,449 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-clean-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:02:32,486 INFO [ctc_decode.py:717] +For test-clean, WER of different settings are: +attention_scale_3.0 1.96 best for test-clean +attention_scale_5.0 1.96 +attention_scale_6.0 1.96 +attention_scale_7.0 1.96 +attention_scale_8.0 1.96 +attention_scale_9.0 1.96 +attention_scale_2.1 1.97 +attention_scale_2.2 1.97 +attention_scale_2.3 1.97 +attention_scale_2.5 1.97 +attention_scale_4.0 1.97 +attention_scale_1.7 1.98 +attention_scale_1.9 1.98 +attention_scale_2.0 1.98 +attention_scale_1.5 2.0 +attention_scale_1.3 2.01 +attention_scale_1.2 2.02 +attention_scale_1.0 2.04 +attention_scale_1.1 2.04 +attention_scale_0.9 2.05 +attention_scale_0.7 2.08 +attention_scale_0.6 2.13 +attention_scale_0.5 2.17 +attention_scale_0.3 2.28 +attention_scale_0.1 2.39 +attention_scale_0.08 2.43 +attention_scale_0.05 2.48 +attention_scale_0.01 2.53 + +2024-09-21 18:02:32,913 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([5.1365, 4.4428, 4.9126, 5.0717], device='cuda:0') +2024-09-21 18:02:35,087 INFO [ctc_decode.py:653] batch 0/?, cuts processed until now is 17 +2024-09-21 18:04:48,388 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([4.7769, 4.1399, 4.5622, 4.7112], device='cuda:0') +2024-09-21 18:05:13,242 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([5.8741, 5.7811, 5.1177, 5.4814], device='cuda:0') +2024-09-21 18:05:41,706 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([3.8281, 4.7939, 5.1590, 5.1042], device='cuda:0') +2024-09-21 18:05:48,184 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([2.5137, 2.8956, 2.5775, 2.2306], device='cuda:0') +2024-09-21 18:05:53,840 INFO [zipformer.py:1858] name=None, attn_weights_entropy = tensor([4.7181, 3.8914, 4.0782, 4.2465], device='cuda:0') +2024-09-21 18:06:02,472 INFO [ctc_decode.py:653] batch 100/?, cuts processed until now is 2530 +2024-09-21 18:06:26,746 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,785 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,822 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,856 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,893 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,941 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:26,989 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,032 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,074 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,110 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,192 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,293 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,328 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,378 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,414 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,499 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,560 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,597 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,632 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,672 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,727 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,763 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,851 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,889 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,934 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:27,999 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:28,034 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:28,113 INFO [ctc_decode.py:674] The transcripts are stored in zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:28,232 INFO [utils.py:657] [test-other_attention_scale_0.01] %WER 4.93% [2579 / 52343, 256 ins, 268 del, 2055 sub ] +2024-09-21 18:06:28,478 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:28,582 INFO [utils.py:657] [test-other_attention_scale_0.05] %WER 4.83% [2530 / 52343, 257 ins, 260 del, 2013 sub ] +2024-09-21 18:06:28,824 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:28,931 INFO [utils.py:657] [test-other_attention_scale_0.08] %WER 4.77% [2499 / 52343, 255 ins, 253 del, 1991 sub ] +2024-09-21 18:06:29,169 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:29,284 INFO [utils.py:657] [test-other_attention_scale_0.1] %WER 4.74% [2481 / 52343, 252 ins, 247 del, 1982 sub ] +2024-09-21 18:06:29,507 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:29,612 INFO [utils.py:657] [test-other_attention_scale_0.3] %WER 4.50% [2353 / 52343, 234 ins, 218 del, 1901 sub ] +2024-09-21 18:06:29,853 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:29,956 INFO [utils.py:657] [test-other_attention_scale_0.5] %WER 4.33% [2269 / 52343, 221 ins, 195 del, 1853 sub ] +2024-09-21 18:06:30,175 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:30,275 INFO [utils.py:657] [test-other_attention_scale_0.6] %WER 4.31% [2254 / 52343, 221 ins, 193 del, 1840 sub ] +2024-09-21 18:06:30,497 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:30,600 INFO [utils.py:657] [test-other_attention_scale_0.7] %WER 4.26% [2230 / 52343, 220 ins, 187 del, 1823 sub ] +2024-09-21 18:06:30,820 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:30,920 INFO [utils.py:657] [test-other_attention_scale_0.9] %WER 4.21% [2202 / 52343, 220 ins, 182 del, 1800 sub ] +2024-09-21 18:06:31,136 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:31,239 INFO [utils.py:657] [test-other_attention_scale_1.0] %WER 4.20% [2197 / 52343, 219 ins, 183 del, 1795 sub ] +2024-09-21 18:06:31,455 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:31,837 INFO [utils.py:657] [test-other_attention_scale_1.1] %WER 4.18% [2190 / 52343, 218 ins, 182 del, 1790 sub ] +2024-09-21 18:06:32,069 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:32,171 INFO [utils.py:657] [test-other_attention_scale_1.2] %WER 4.17% [2182 / 52343, 217 ins, 182 del, 1783 sub ] +2024-09-21 18:06:32,392 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:32,493 INFO [utils.py:657] [test-other_attention_scale_1.3] %WER 4.17% [2182 / 52343, 219 ins, 181 del, 1782 sub ] +2024-09-21 18:06:32,712 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:32,812 INFO [utils.py:657] [test-other_attention_scale_1.5] %WER 4.15% [2173 / 52343, 219 ins, 180 del, 1774 sub ] +2024-09-21 18:06:33,036 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:33,135 INFO [utils.py:657] [test-other_attention_scale_1.7] %WER 4.14% [2169 / 52343, 222 ins, 179 del, 1768 sub ] +2024-09-21 18:06:33,352 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:33,452 INFO [utils.py:657] [test-other_attention_scale_1.9] %WER 4.13% [2161 / 52343, 221 ins, 180 del, 1760 sub ] +2024-09-21 18:06:33,690 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:33,797 INFO [utils.py:657] [test-other_attention_scale_2.0] %WER 4.13% [2160 / 52343, 221 ins, 180 del, 1759 sub ] +2024-09-21 18:06:34,015 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:34,114 INFO [utils.py:657] [test-other_attention_scale_2.1] %WER 4.13% [2161 / 52343, 222 ins, 181 del, 1758 sub ] +2024-09-21 18:06:34,332 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:34,431 INFO [utils.py:657] [test-other_attention_scale_2.2] %WER 4.13% [2161 / 52343, 221 ins, 182 del, 1758 sub ] +2024-09-21 18:06:34,650 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:34,750 INFO [utils.py:657] [test-other_attention_scale_2.3] %WER 4.12% [2159 / 52343, 221 ins, 181 del, 1757 sub ] +2024-09-21 18:06:34,965 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:35,065 INFO [utils.py:657] [test-other_attention_scale_2.5] %WER 4.12% [2156 / 52343, 223 ins, 181 del, 1752 sub ] +2024-09-21 18:06:35,287 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:35,389 INFO [utils.py:657] [test-other_attention_scale_3.0] %WER 4.11% [2153 / 52343, 225 ins, 179 del, 1749 sub ] +2024-09-21 18:06:35,612 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:35,711 INFO [utils.py:657] [test-other_attention_scale_4.0] %WER 4.10% [2145 / 52343, 228 ins, 180 del, 1737 sub ] +2024-09-21 18:06:35,928 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:36,276 INFO [utils.py:657] [test-other_attention_scale_5.0] %WER 4.11% [2153 / 52343, 231 ins, 179 del, 1743 sub ] +2024-09-21 18:06:36,492 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:36,591 INFO [utils.py:657] [test-other_attention_scale_6.0] %WER 4.10% [2146 / 52343, 228 ins, 180 del, 1738 sub ] +2024-09-21 18:06:36,812 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:36,913 INFO [utils.py:657] [test-other_attention_scale_7.0] %WER 4.09% [2143 / 52343, 227 ins, 180 del, 1736 sub ] +2024-09-21 18:06:37,135 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:37,234 INFO [utils.py:657] [test-other_attention_scale_8.0] %WER 4.08% [2138 / 52343, 225 ins, 179 del, 1734 sub ] +2024-09-21 18:06:37,454 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:37,554 INFO [utils.py:657] [test-other_attention_scale_9.0] %WER 4.08% [2137 / 52343, 225 ins, 178 del, 1734 sub ] +2024-09-21 18:06:37,775 INFO [ctc_decode.py:701] Wrote detailed error stats to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/attention-decoder-rescoring-no-ngram/errs-test-other-epoch-50_avg-20_use-averaged-model.txt +2024-09-21 18:06:37,782 INFO [ctc_decode.py:717] +For test-other, WER of different settings are: +attention_scale_8.0 4.08 best for test-other +attention_scale_9.0 4.08 +attention_scale_7.0 4.09 +attention_scale_4.0 4.1 +attention_scale_6.0 4.1 +attention_scale_3.0 4.11 +attention_scale_5.0 4.11 +attention_scale_2.3 4.12 +attention_scale_2.5 4.12 +attention_scale_1.9 4.13 +attention_scale_2.0 4.13 +attention_scale_2.1 4.13 +attention_scale_2.2 4.13 +attention_scale_1.7 4.14 +attention_scale_1.5 4.15 +attention_scale_1.2 4.17 +attention_scale_1.3 4.17 +attention_scale_1.1 4.18 +attention_scale_1.0 4.2 +attention_scale_0.9 4.21 +attention_scale_0.7 4.26 +attention_scale_0.6 4.31 +attention_scale_0.5 4.33 +attention_scale_0.3 4.5 +attention_scale_0.1 4.74 +attention_scale_0.08 4.77 +attention_scale_0.05 4.83 +attention_scale_0.01 4.93 + +2024-09-21 18:06:37,782 INFO [ctc_decode.py:985] Done! diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..d39ce20d68bad50e794551b9f59ea105d8ade099 --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-clean-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,5240 @@ +1089-134686-0000-0: ref=['HE', 'HOPED', 'THERE', 'WOULD', 'BE', 'STEW', 'FOR', 'DINNER', 'TURNIPS', 'AND', 'CARROTS', 'AND', 'BRUISED', 'POTATOES', 'AND', 'FAT', 'MUTTON', 'PIECES', 'TO', 'BE', 'LADLED', 'OUT', 'IN', 'THICK', 'PEPPERED', 'FLOUR', 'FATTENED', 'SAUCE'] +1089-134686-0000-0: hyp=['HE', 'HOPED', 'THERE', 'WOULD', 'BE', 'STEW', 'FOR', 'DINNER', 'TURNIPS', 'AND', 'CARROTS', 'AND', 'BRUISED', 'POTATOES', 'AND', 'FAT', 'MUTTON', 'PIECES', 'TO', 'BE', 'LADLED', 'OUT', 'IN', 'THICK', 'PEPPERED', 'FLOUR', 'FATTENED', 'SAUCE'] +1089-134686-0001-1: ref=['STUFF', 'IT', 'INTO', 'YOU', 'HIS', 'BELLY', 'COUNSELLED', 'HIM'] +1089-134686-0001-1: hyp=['STUFF', 'IT', 'INTO', 'YOU', 'HIS', 'BELLY', 'COUNSELLED', 'HIM'] +1089-134686-0002-2: ref=['AFTER', 'EARLY', 'NIGHTFALL', 'THE', 'YELLOW', 'LAMPS', 'WOULD', 'LIGHT', 'UP', 'HERE', 'AND', 'THERE', 'THE', 'SQUALID', 'QUARTER', 'OF', 'THE', 'BROTHELS'] +1089-134686-0002-2: hyp=['AFTER', 'EARLY', 'NIGHTFALL', 'THE', 'YELLOW', 'LAMPS', 'WOULD', 'LIGHT', 'UP', 'HERE', 'AND', 'THERE', 'THE', 'SQUALID', 'QUARTER', 'OF', 'THE', 'BROTHELS'] +1089-134686-0003-3: ref=['HELLO', 'BERTIE', 'ANY', 'GOOD', 'IN', 'YOUR', 'MIND'] +1089-134686-0003-3: hyp=['HALLO', 'BERTIE', 'ANY', 'GOOD', 'IN', 'YOUR', 'MIND'] +1089-134686-0004-4: ref=['NUMBER', 'TEN', 'FRESH', 'NELLY', 'IS', 'WAITING', 'ON', 'YOU', 'GOOD', 'NIGHT', 'HUSBAND'] +1089-134686-0004-4: hyp=['NUMBER', 'TEN', 'FRESH', 'NELLY', 'IS', 'WAITING', 'ON', 'YOU', 'GOOD', 'NIGHT', 'HUSBAND'] +1089-134686-0005-5: ref=['THE', 'MUSIC', 'CAME', 'NEARER', 'AND', 'HE', 'RECALLED', 'THE', 'WORDS', 'THE', 'WORDS', 'OF', "SHELLEY'S", 'FRAGMENT', 'UPON', 'THE', 'MOON', 'WANDERING', 'COMPANIONLESS', 'PALE', 'FOR', 'WEARINESS'] +1089-134686-0005-5: hyp=['THE', 'MUSIC', 'CAME', 'NEARER', 'AND', 'HE', 'RECALLED', 'THE', 'WORDS', 'THE', 'WORDS', 'OF', "SHELLEY'S", 'FRAGMENT', 'UPON', 'THE', 'MOON', 'WANDERING', 'COMPANIONLESS', 'PALE', 'FOR', 'WEARINESS'] +1089-134686-0006-6: ref=['THE', 'DULL', 'LIGHT', 'FELL', 'MORE', 'FAINTLY', 'UPON', 'THE', 'PAGE', 'WHEREON', 'ANOTHER', 'EQUATION', 'BEGAN', 'TO', 'UNFOLD', 'ITSELF', 'SLOWLY', 'AND', 'TO', 'SPREAD', 'ABROAD', 'ITS', 'WIDENING', 'TAIL'] +1089-134686-0006-6: hyp=['THE', 'DULL', 'LIGHT', 'FELL', 'MORE', 'FAINTLY', 'UPON', 'THE', 'PAGE', 'WHEREON', 'ANOTHER', 'EQUATION', 'BEGAN', 'TO', 'UNFOLD', 'ITSELF', 'SLOWLY', 'AND', 'TO', 'SPREAD', 'ABROAD', 'ITS', 'WIDENING', 'TALE'] +1089-134686-0007-7: ref=['A', 'COLD', 'LUCID', 'INDIFFERENCE', 'REIGNED', 'IN', 'HIS', 'SOUL'] +1089-134686-0007-7: hyp=['A', 'COLD', 'LUCID', 'INDIFFERENCE', 'REIGNED', 'IN', 'HIS', 'SOUL'] +1089-134686-0008-8: ref=['THE', 'CHAOS', 'IN', 'WHICH', 'HIS', 'ARDOUR', 'EXTINGUISHED', 'ITSELF', 'WAS', 'A', 'COLD', 'INDIFFERENT', 'KNOWLEDGE', 'OF', 'HIMSELF'] +1089-134686-0008-8: hyp=['THE', 'CHAOS', 'IN', 'WHICH', 'HIS', 'ARDOR', 'EXTINGUISHED', 'ITSELF', 'WAS', 'A', 'COLD', 'INDIFFERENT', 'KNOWLEDGE', 'OF', 'HIMSELF'] +1089-134686-0009-9: ref=['AT', 'MOST', 'BY', 'AN', 'ALMS', 'GIVEN', 'TO', 'A', 'BEGGAR', 'WHOSE', 'BLESSING', 'HE', 'FLED', 'FROM', 'HE', 'MIGHT', 'HOPE', 'WEARILY', 'TO', 'WIN', 'FOR', 'HIMSELF', 'SOME', 'MEASURE', 'OF', 'ACTUAL', 'GRACE'] +1089-134686-0009-9: hyp=['AT', 'MOST', 'BY', 'AN', 'ALMS', 'GIVEN', 'TO', 'A', 'BEGGAR', 'WHOSE', 'BLESSING', 'HE', 'FLED', 'FROM', 'HE', 'MIGHT', 'HOPE', 'WEARILY', 'TO', 'WIN', 'FOR', 'HIMSELF', 'SOME', 'MEASURE', 'OF', 'ACTUAL', 'GRACE'] +1089-134686-0010-10: ref=['WELL', 'NOW', 'ENNIS', 'I', 'DECLARE', 'YOU', 'HAVE', 'A', 'HEAD', 'AND', 'SO', 'HAS', 'MY', 'STICK'] +1089-134686-0010-10: hyp=['WELL', 'NOW', 'ENNIS', 'I', 'DECLARE', 'YOU', 'HAVE', 'A', 'HEAD', 'AND', 'SO', 'HAS', 'MY', 'STICK'] +1089-134686-0011-11: ref=['ON', 'SATURDAY', 'MORNINGS', 'WHEN', 'THE', 'SODALITY', 'MET', 'IN', 'THE', 'CHAPEL', 'TO', 'RECITE', 'THE', 'LITTLE', 'OFFICE', 'HIS', 'PLACE', 'WAS', 'A', 'CUSHIONED', 'KNEELING', 'DESK', 'AT', 'THE', 'RIGHT', 'OF', 'THE', 'ALTAR', 'FROM', 'WHICH', 'HE', 'LED', 'HIS', 'WING', 'OF', 'BOYS', 'THROUGH', 'THE', 'RESPONSES'] +1089-134686-0011-11: hyp=['ON', 'SATURDAY', 'MORNINGS', 'WHEN', 'THE', 'SODALITY', 'MET', 'IN', 'THE', 'CHAPEL', 'TO', 'RECITE', 'THE', 'LITTLE', 'OFFICE', 'HIS', 'PLACE', 'WAS', 'A', 'CUSHIONED', 'KNEELING', 'DESK', 'AT', 'THE', 'RIGHT', 'OF', 'THE', 'ALTAR', 'FROM', 'WHICH', 'HE', 'LED', 'HIS', 'WING', 'OF', 'BOYS', 'THROUGH', 'THE', 'RESPONSES'] +1089-134686-0012-12: ref=['HER', 'EYES', 'SEEMED', 'TO', 'REGARD', 'HIM', 'WITH', 'MILD', 'PITY', 'HER', 'HOLINESS', 'A', 'STRANGE', 'LIGHT', 'GLOWING', 'FAINTLY', 'UPON', 'HER', 'FRAIL', 'FLESH', 'DID', 'NOT', 'HUMILIATE', 'THE', 'SINNER', 'WHO', 'APPROACHED', 'HER'] +1089-134686-0012-12: hyp=['HER', 'EYES', 'SEEMED', 'TO', 'REGARD', 'HIM', 'WITH', 'MILD', 'PITY', 'HER', 'HOLINESS', 'A', 'STRANGE', 'LIGHT', 'GLOWING', 'FAINTLY', 'UPON', 'HER', 'FRAIL', 'FLESH', 'DID', 'NOT', 'HUMILIATE', 'THE', 'SINNER', 'WHO', 'APPROACHED', 'HER'] +1089-134686-0013-13: ref=['IF', 'EVER', 'HE', 'WAS', 'IMPELLED', 'TO', 'CAST', 'SIN', 'FROM', 'HIM', 'AND', 'TO', 'REPENT', 'THE', 'IMPULSE', 'THAT', 'MOVED', 'HIM', 'WAS', 'THE', 'WISH', 'TO', 'BE', 'HER', 'KNIGHT'] +1089-134686-0013-13: hyp=['IF', 'EVER', 'HE', 'WAS', 'IMPELLED', 'TO', 'CAST', 'SIN', 'FROM', 'HIM', 'AND', 'TO', 'REPENT', 'THE', 'IMPULSE', 'THAT', 'MOVED', 'HIM', 'WAS', 'THE', 'WISH', 'TO', 'BE', 'HER', 'KNIGHT'] +1089-134686-0014-14: ref=['HE', 'TRIED', 'TO', 'THINK', 'HOW', 'IT', 'COULD', 'BE'] +1089-134686-0014-14: hyp=['HE', 'TRIED', 'TO', 'THINK', 'HOW', 'IT', 'COULD', 'BE'] +1089-134686-0015-15: ref=['BUT', 'THE', 'DUSK', 'DEEPENING', 'IN', 'THE', 'SCHOOLROOM', 'COVERED', 'OVER', 'HIS', 'THOUGHTS', 'THE', 'BELL', 'RANG'] +1089-134686-0015-15: hyp=['BUT', 'THE', 'DUSK', 'DEEPENING', 'IN', 'THE', 'SCHOOLROOM', 'COVERED', 'OVER', 'HIS', 'THOUGHTS', 'THE', 'BELL', 'RANG'] +1089-134686-0016-16: ref=['THEN', 'YOU', 'CAN', 'ASK', 'HIM', 'QUESTIONS', 'ON', 'THE', 'CATECHISM', 'DEDALUS'] +1089-134686-0016-16: hyp=['THEN', 'YOU', 'CAN', 'ASK', 'HIM', 'QUESTIONS', 'ON', 'THE', 'CATECHISM', 'DEDALUS'] +1089-134686-0017-17: ref=['STEPHEN', 'LEANING', 'BACK', 'AND', 'DRAWING', 'IDLY', 'ON', 'HIS', 'SCRIBBLER', 'LISTENED', 'TO', 'THE', 'TALK', 'ABOUT', 'HIM', 'WHICH', 'HERON', 'CHECKED', 'FROM', 'TIME', 'TO', 'TIME', 'BY', 'SAYING'] +1089-134686-0017-17: hyp=['STEPHEN', 'LEANING', 'BACK', 'AND', 'DRAWING', 'IDLY', 'ON', 'HIS', 'SCRIBBLER', 'LISTENED', 'TO', 'THE', 'TALK', 'ABOUT', 'HIM', 'WHICH', 'HERON', 'CHECKED', 'FROM', 'TIME', 'TO', 'TIME', 'BY', 'SAYING'] +1089-134686-0018-18: ref=['IT', 'WAS', 'STRANGE', 'TOO', 'THAT', 'HE', 'FOUND', 'AN', 'ARID', 'PLEASURE', 'IN', 'FOLLOWING', 'UP', 'TO', 'THE', 'END', 'THE', 'RIGID', 'LINES', 'OF', 'THE', 'DOCTRINES', 'OF', 'THE', 'CHURCH', 'AND', 'PENETRATING', 'INTO', 'OBSCURE', 'SILENCES', 'ONLY', 'TO', 'HEAR', 'AND', 'FEEL', 'THE', 'MORE', 'DEEPLY', 'HIS', 'OWN', 'CONDEMNATION'] +1089-134686-0018-18: hyp=['IT', 'WAS', 'STRANGE', 'TOO', 'THAT', 'HE', 'FOUND', 'AN', 'ARID', 'PLEASURE', 'IN', 'FOLLOWING', 'UP', 'TO', 'THE', 'END', 'THE', 'RIGID', 'LINES', 'OF', 'THE', 'DOCTRINES', 'OF', 'THE', 'CHURCH', 'AND', 'PENETRATING', 'INTO', 'OBSCURE', 'SILENCES', 'ONLY', 'TO', 'HEAR', 'AND', 'FEEL', 'THE', 'MORE', 'DEEPLY', 'HIS', 'OWN', 'CONDEMNATION'] +1089-134686-0019-19: ref=['THE', 'SENTENCE', 'OF', 'SAINT', 'JAMES', 'WHICH', 'SAYS', 'THAT', 'HE', 'WHO', 'OFFENDS', 'AGAINST', 'ONE', 'COMMANDMENT', 'BECOMES', 'GUILTY', 'OF', 'ALL', 'HAD', 'SEEMED', 'TO', 'HIM', 'FIRST', 'A', 'SWOLLEN', 'PHRASE', 'UNTIL', 'HE', 'HAD', 'BEGUN', 'TO', 'GROPE', 'IN', 'THE', 'DARKNESS', 'OF', 'HIS', 'OWN', 'STATE'] +1089-134686-0019-19: hyp=['THE', 'SENTENCE', 'OF', 'SAINT', 'JAMES', 'WHICH', 'SAYS', 'THAT', 'HE', 'WHO', 'OFFENDS', 'AGAINST', 'ONE', 'COMMANDMENT', 'BECOMES', 'GUILTY', 'OF', 'ALL', 'HAD', 'SEEMED', 'TO', 'HIM', 'FIRST', 'A', 'SWOLLEN', 'PHRASE', 'UNTIL', 'HE', 'HAD', 'BEGUN', 'TO', 'GROPE', 'IN', 'THE', 'DARKNESS', 'OF', 'HIS', 'OWN', 'STATE'] +1089-134686-0020-20: ref=['IF', 'A', 'MAN', 'HAD', 'STOLEN', 'A', 'POUND', 'IN', 'HIS', 'YOUTH', 'AND', 'HAD', 'USED', 'THAT', 'POUND', 'TO', 'AMASS', 'A', 'HUGE', 'FORTUNE', 'HOW', 'MUCH', 'WAS', 'HE', 'OBLIGED', 'TO', 'GIVE', 'BACK', 'THE', 'POUND', 'HE', 'HAD', 'STOLEN', 'ONLY', 'OR', 'THE', 'POUND', 'TOGETHER', 'WITH', 'THE', 'COMPOUND', 'INTEREST', 'ACCRUING', 'UPON', 'IT', 'OR', 'ALL', 'HIS', 'HUGE', 'FORTUNE'] +1089-134686-0020-20: hyp=['IF', 'A', 'MAN', 'HAD', 'STOLEN', 'A', 'POUND', 'IN', 'HIS', 'YOUTH', 'AND', 'HAD', 'USED', 'THAT', 'POUND', 'TO', 'AMASS', 'A', 'HUGE', 'FORTUNE', 'HOW', 'MUCH', 'WAS', 'HE', 'OBLIGED', 'TO', 'GIVE', 'BACK', 'THE', 'POUND', 'HE', 'HAD', 'STOLEN', 'ONLY', 'OR', 'THE', 'POUND', 'TOGETHER', 'WITH', 'THE', 'COMPOUND', 'INTEREST', 'ACCRUING', 'UPON', 'IT', 'OR', 'ALL', 'HIS', 'HUGE', 'FORTUNE'] +1089-134686-0021-21: ref=['IF', 'A', 'LAYMAN', 'IN', 'GIVING', 'BAPTISM', 'POUR', 'THE', 'WATER', 'BEFORE', 'SAYING', 'THE', 'WORDS', 'IS', 'THE', 'CHILD', 'BAPTIZED'] +1089-134686-0021-21: hyp=['IF', 'A', 'LAYMAN', 'IN', 'GIVING', 'BAPTISM', 'POUR', 'THE', 'WATER', 'BEFORE', 'SAYING', 'THE', 'WORDS', 'IS', 'THE', 'CHILD', 'BAPTIZED'] +1089-134686-0022-22: ref=['HOW', 'COMES', 'IT', 'THAT', 'WHILE', 'THE', 'FIRST', 'BEATITUDE', 'PROMISES', 'THE', 'KINGDOM', 'OF', 'HEAVEN', 'TO', 'THE', 'POOR', 'OF', 'HEART', 'THE', 'SECOND', 'BEATITUDE', 'PROMISES', 'ALSO', 'TO', 'THE', 'MEEK', 'THAT', 'THEY', 'SHALL', 'POSSESS', 'THE', 'LAND'] +1089-134686-0022-22: hyp=['HOW', 'COMES', 'IT', 'THAT', 'WHILE', 'THE', 'FIRST', 'BEATITUDE', 'PROMISES', 'THE', 'KINGDOM', 'OF', 'HEAVEN', 'TO', 'THE', 'POOR', 'OF', 'HEART', 'THE', 'SECOND', 'BEATITUDE', 'PROMISES', 'ALSO', 'TO', 'THE', 'MEEK', 'THAT', 'THEY', 'SHALL', 'POSSESS', 'THE', 'LAND'] +1089-134686-0023-23: ref=['WHY', 'WAS', 'THE', 'SACRAMENT', 'OF', 'THE', 'EUCHARIST', 'INSTITUTED', 'UNDER', 'THE', 'TWO', 'SPECIES', 'OF', 'BREAD', 'AND', 'WINE', 'IF', 'JESUS', 'CHRIST', 'BE', 'PRESENT', 'BODY', 'AND', 'BLOOD', 'SOUL', 'AND', 'DIVINITY', 'IN', 'THE', 'BREAD', 'ALONE', 'AND', 'IN', 'THE', 'WINE', 'ALONE'] +1089-134686-0023-23: hyp=['WHY', 'WAS', 'THE', 'SACRAMENT', 'OF', 'THE', 'EUCHARIST', 'INSTITUTED', 'UNDER', 'THE', 'TWO', 'SPECIES', 'OF', 'BREAD', 'AND', 'WINE', 'IF', 'JESUS', 'CHRIST', 'BE', 'PRESENT', 'BODY', 'AND', 'BLOOD', 'SOUL', 'AND', 'DIVINITY', 'IN', 'THE', 'BREAD', 'ALONE', 'AND', 'IN', 'THE', 'WINE', 'ALONE'] +1089-134686-0024-24: ref=['IF', 'THE', 'WINE', 'CHANGE', 'INTO', 'VINEGAR', 'AND', 'THE', 'HOST', 'CRUMBLE', 'INTO', 'CORRUPTION', 'AFTER', 'THEY', 'HAVE', 'BEEN', 'CONSECRATED', 'IS', 'JESUS', 'CHRIST', 'STILL', 'PRESENT', 'UNDER', 'THEIR', 'SPECIES', 'AS', 'GOD', 'AND', 'AS', 'MAN'] +1089-134686-0024-24: hyp=['IF', 'THE', 'WINE', 'CHANGE', 'INTO', 'VINEGAR', 'AND', 'THE', 'HOST', 'CRUMBLE', 'INTO', 'CORRUPTION', 'AFTER', 'THEY', 'HAVE', 'BEEN', 'CONSECRATED', 'IS', 'JESUS', 'CHRIST', 'STILL', 'PRESENT', 'UNDER', 'THEIR', 'SPECIES', 'AS', 'GOD', 'AND', 'AS', 'MAN'] +1089-134686-0025-25: ref=['A', 'GENTLE', 'KICK', 'FROM', 'THE', 'TALL', 'BOY', 'IN', 'THE', 'BENCH', 'BEHIND', 'URGED', 'STEPHEN', 'TO', 'ASK', 'A', 'DIFFICULT', 'QUESTION'] +1089-134686-0025-25: hyp=['A', 'GENTLE', 'KICK', 'FROM', 'THE', 'TALL', 'BOY', 'ON', 'THE', 'BENCH', 'BEHIND', 'URGED', 'STEPHEN', 'TO', 'ASK', 'A', 'DIFFICULT', 'QUESTION'] +1089-134686-0026-26: ref=['THE', 'RECTOR', 'DID', 'NOT', 'ASK', 'FOR', 'A', 'CATECHISM', 'TO', 'HEAR', 'THE', 'LESSON', 'FROM'] +1089-134686-0026-26: hyp=['THE', 'RECTOR', 'DID', 'NOT', 'ASK', 'FOR', 'A', 'CATECHISM', 'TO', 'HEAR', 'THE', 'LESSON', 'FROM'] +1089-134686-0027-27: ref=['HE', 'CLASPED', 'HIS', 'HANDS', 'ON', 'THE', 'DESK', 'AND', 'SAID'] +1089-134686-0027-27: hyp=['HE', 'CLASPED', 'HIS', 'HANDS', 'ON', 'THE', 'DESK', 'AND', 'SAID'] +1089-134686-0028-28: ref=['THE', 'RETREAT', 'WILL', 'BEGIN', 'ON', 'WEDNESDAY', 'AFTERNOON', 'IN', 'HONOUR', 'OF', 'SAINT', 'FRANCIS', 'XAVIER', 'WHOSE', 'FEAST', 'DAY', 'IS', 'SATURDAY'] +1089-134686-0028-28: hyp=['THE', 'RETREAT', 'WILL', 'BEGIN', 'ON', 'WEDNESDAY', 'AFTERNOON', 'IN', 'HONOR', 'OF', 'SAINT', 'FRANCIS', 'ZAVIOR', 'WHOSE', 'FEAST', 'DAY', 'IS', 'SATURDAY'] +1089-134686-0029-29: ref=['ON', 'FRIDAY', 'CONFESSION', 'WILL', 'BE', 'HEARD', 'ALL', 'THE', 'AFTERNOON', 'AFTER', 'BEADS'] +1089-134686-0029-29: hyp=['ON', 'FRIDAY', 'CONFESSION', 'WILL', 'BE', 'HEARD', 'ALL', 'THE', 'AFTERNOON', 'AFTER', 'BEADS'] +1089-134686-0030-30: ref=['BEWARE', 'OF', 'MAKING', 'THAT', 'MISTAKE'] +1089-134686-0030-30: hyp=['BEWARE', 'OF', 'MAKING', 'THAT', 'MISTAKE'] +1089-134686-0031-31: ref=["STEPHEN'S", 'HEART', 'BEGAN', 'SLOWLY', 'TO', 'FOLD', 'AND', 'FADE', 'WITH', 'FEAR', 'LIKE', 'A', 'WITHERING', 'FLOWER'] +1089-134686-0031-31: hyp=["STEPHEN'S", 'HEART', 'BEGAN', 'SLOWLY', 'TO', 'FOLD', 'AND', 'FADE', 'WITH', 'FEAR', 'LIKE', 'A', 'WITHERING', 'FLOWER'] +1089-134686-0032-32: ref=['HE', 'IS', 'CALLED', 'AS', 'YOU', 'KNOW', 'THE', 'APOSTLE', 'OF', 'THE', 'INDIES'] +1089-134686-0032-32: hyp=['HE', 'IS', 'CALLED', 'AS', 'YOU', 'KNOW', 'THE', 'APOSTLE', 'OF', 'THE', 'INDIES'] +1089-134686-0033-33: ref=['A', 'GREAT', 'SAINT', 'SAINT', 'FRANCIS', 'XAVIER'] +1089-134686-0033-33: hyp=['A', 'GREAT', 'SAINT', 'SAINT', 'FRANCIS', 'ZAVIOUR'] +1089-134686-0034-34: ref=['THE', 'RECTOR', 'PAUSED', 'AND', 'THEN', 'SHAKING', 'HIS', 'CLASPED', 'HANDS', 'BEFORE', 'HIM', 'WENT', 'ON'] +1089-134686-0034-34: hyp=['THE', 'RECTOR', 'PAUSED', 'AND', 'THEN', 'SHAKING', 'HIS', 'CLASPED', 'HANDS', 'BEFORE', 'HIM', 'WENT', 'ON'] +1089-134686-0035-35: ref=['HE', 'HAD', 'THE', 'FAITH', 'IN', 'HIM', 'THAT', 'MOVES', 'MOUNTAINS'] +1089-134686-0035-35: hyp=['HE', 'HAD', 'THE', 'FAITH', 'IN', 'HIM', 'THAT', 'MOVES', 'MOUNTAINS'] +1089-134686-0036-36: ref=['A', 'GREAT', 'SAINT', 'SAINT', 'FRANCIS', 'XAVIER'] +1089-134686-0036-36: hyp=['A', 'GREAT', 'SAINT', 'SAINT', 'FRANCIS', 'ZAVIOUR'] +1089-134686-0037-37: ref=['IN', 'THE', 'SILENCE', 'THEIR', 'DARK', 'FIRE', 'KINDLED', 'THE', 'DUSK', 'INTO', 'A', 'TAWNY', 'GLOW'] +1089-134686-0037-37: hyp=['IN', 'THE', 'SILENCE', 'THEIR', 'DARK', 'FIRE', 'KINDLED', 'THE', 'DUSK', 'INTO', 'A', 'TAWNY', 'GLOW'] +1089-134691-0000-38: ref=['HE', 'COULD', 'WAIT', 'NO', 'LONGER'] +1089-134691-0000-38: hyp=['HE', 'COULD', 'WAIT', 'NO', 'LONGER'] +1089-134691-0001-39: ref=['FOR', 'A', 'FULL', 'HOUR', 'HE', 'HAD', 'PACED', 'UP', 'AND', 'DOWN', 'WAITING', 'BUT', 'HE', 'COULD', 'WAIT', 'NO', 'LONGER'] +1089-134691-0001-39: hyp=['FOR', 'A', 'FULL', 'HOUR', 'HE', 'HAD', 'PACED', 'UP', 'AND', 'DOWN', 'WAITING', 'BUT', 'HE', 'COULD', 'WAIT', 'NO', 'LONGER'] +1089-134691-0002-40: ref=['HE', 'SET', 'OFF', 'ABRUPTLY', 'FOR', 'THE', 'BULL', 'WALKING', 'RAPIDLY', 'LEST', 'HIS', "FATHER'S", 'SHRILL', 'WHISTLE', 'MIGHT', 'CALL', 'HIM', 'BACK', 'AND', 'IN', 'A', 'FEW', 'MOMENTS', 'HE', 'HAD', 'ROUNDED', 'THE', 'CURVE', 'AT', 'THE', 'POLICE', 'BARRACK', 'AND', 'WAS', 'SAFE'] +1089-134691-0002-40: hyp=['HE', 'SET', 'OFF', 'ABRUPTLY', 'FOR', 'THE', 'BULL', 'WALKING', 'RAPIDLY', 'LEST', 'HIS', "FATHER'S", 'SHRILL', 'WHISTLE', 'MIGHT', 'CALL', 'HIM', 'BACK', 'AND', 'IN', 'A', 'FEW', 'MOMENTS', 'HE', 'HAD', 'ROUNDED', 'THE', 'CURVE', 'AT', 'THE', 'POLICE', 'BARRACK', 'AND', 'WAS', 'SAFE'] +1089-134691-0003-41: ref=['THE', 'UNIVERSITY'] +1089-134691-0003-41: hyp=['THE', 'UNIVERSITY'] +1089-134691-0004-42: ref=['PRIDE', 'AFTER', 'SATISFACTION', 'UPLIFTED', 'HIM', 'LIKE', 'LONG', 'SLOW', 'WAVES'] +1089-134691-0004-42: hyp=['PRIDE', 'AFTER', 'SATISFACTION', 'UPLIFTED', 'HIM', 'LIKE', 'LONG', 'SLOW', 'WAVES'] +1089-134691-0005-43: ref=['WHOSE', 'FEET', 'ARE', 'AS', 'THE', 'FEET', 'OF', 'HARTS', 'AND', 'UNDERNEATH', 'THE', 'EVERLASTING', 'ARMS'] +1089-134691-0005-43: hyp=['WHOSE', 'FEET', 'ARE', 'AS', 'THE', 'FEET', 'OF', 'HEARTS', 'AND', 'UNDERNEATH', 'THE', 'EVERLASTING', 'ARMS'] +1089-134691-0006-44: ref=['THE', 'PRIDE', 'OF', 'THAT', 'DIM', 'IMAGE', 'BROUGHT', 'BACK', 'TO', 'HIS', 'MIND', 'THE', 'DIGNITY', 'OF', 'THE', 'OFFICE', 'HE', 'HAD', 'REFUSED'] +1089-134691-0006-44: hyp=['THE', 'PRIDE', 'OF', 'THAT', 'DIM', 'IMAGE', 'BROUGHT', 'BACK', 'TO', 'HIS', 'MIND', 'THE', 'DIGNITY', 'OF', 'THE', 'OFFICE', 'HE', 'HAD', 'REFUSED'] +1089-134691-0007-45: ref=['SOON', 'THE', 'WHOLE', 'BRIDGE', 'WAS', 'TREMBLING', 'AND', 'RESOUNDING'] +1089-134691-0007-45: hyp=['SOON', 'THE', 'WHOLE', 'BRIDGE', 'WAS', 'TREMBLING', 'AND', 'RESOUNDING'] +1089-134691-0008-46: ref=['THE', 'UNCOUTH', 'FACES', 'PASSED', 'HIM', 'TWO', 'BY', 'TWO', 'STAINED', 'YELLOW', 'OR', 'RED', 'OR', 'LIVID', 'BY', 'THE', 'SEA', 'AND', 'AS', 'HE', 'STROVE', 'TO', 'LOOK', 'AT', 'THEM', 'WITH', 'EASE', 'AND', 'INDIFFERENCE', 'A', 'FAINT', 'STAIN', 'OF', 'PERSONAL', 'SHAME', 'AND', 'COMMISERATION', 'ROSE', 'TO', 'HIS', 'OWN', 'FACE'] +1089-134691-0008-46: hyp=['THE', 'UNCOUTH', 'FACES', 'PASSED', 'HIM', 'TWO', 'BY', 'TWO', 'STAINED', 'YELLOW', 'OR', 'RED', 'OR', 'LIVID', 'BY', 'THE', 'SEA', 'AND', 'AS', 'HE', 'STROVE', 'TO', 'LOOK', 'AT', 'THEM', 'WITH', 'EASE', 'AND', 'INDIFFERENCE', 'A', 'FAINT', 'STAIN', 'OF', 'PERSONAL', 'SHAME', 'AND', 'COMMISERATION', 'ROSE', 'TO', 'HIS', 'OWN', 'FACE'] +1089-134691-0009-47: ref=['ANGRY', 'WITH', 'HIMSELF', 'HE', 'TRIED', 'TO', 'HIDE', 'HIS', 'FACE', 'FROM', 'THEIR', 'EYES', 'BY', 'GAZING', 'DOWN', 'SIDEWAYS', 'INTO', 'THE', 'SHALLOW', 'SWIRLING', 'WATER', 'UNDER', 'THE', 'BRIDGE', 'BUT', 'HE', 'STILL', 'SAW', 'A', 'REFLECTION', 'THEREIN', 'OF', 'THEIR', 'TOP', 'HEAVY', 'SILK', 'HATS', 'AND', 'HUMBLE', 'TAPE', 'LIKE', 'COLLARS', 'AND', 'LOOSELY', 'HANGING', 'CLERICAL', 'CLOTHES', 'BROTHER', 'HICKEY'] +1089-134691-0009-47: hyp=['ANGRY', 'WITH', 'HIMSELF', 'HE', 'TRIED', 'TO', 'HIDE', 'HIS', 'FACE', 'FROM', 'THEIR', 'EYES', 'BY', 'GAZING', 'DOWN', 'SIDEWAYS', 'INTO', 'THE', 'SHALLOW', 'SWIRLING', 'WATER', 'UNDER', 'THE', 'BRIDGE', 'BUT', 'HE', 'STILL', 'SAW', 'A', 'REFLECTION', 'THEREIN', 'OF', 'THEIR', 'TOP', 'HEAVY', 'SILK', 'HATS', 'AND', 'HUMBLE', 'TAPE', 'LIKE', 'COLLARS', 'AND', 'LOOSELY', 'HANGING', 'CLERICAL', 'CLOTHES', 'BROTHER', 'HICKIE'] +1089-134691-0010-48: ref=['BROTHER', 'MAC', 'ARDLE', 'BROTHER', 'KEOGH'] +1089-134691-0010-48: hyp=['BROTHER', 'MICAWAL', 'BROTHER', 'KIOV'] +1089-134691-0011-49: ref=['THEIR', 'PIETY', 'WOULD', 'BE', 'LIKE', 'THEIR', 'NAMES', 'LIKE', 'THEIR', 'FACES', 'LIKE', 'THEIR', 'CLOTHES', 'AND', 'IT', 'WAS', 'IDLE', 'FOR', 'HIM', 'TO', 'TELL', 'HIMSELF', 'THAT', 'THEIR', 'HUMBLE', 'AND', 'CONTRITE', 'HEARTS', 'IT', 'MIGHT', 'BE', 'PAID', 'A', 'FAR', 'RICHER', 'TRIBUTE', 'OF', 'DEVOTION', 'THAN', 'HIS', 'HAD', 'EVER', 'BEEN', 'A', 'GIFT', 'TENFOLD', 'MORE', 'ACCEPTABLE', 'THAN', 'HIS', 'ELABORATE', 'ADORATION'] +1089-134691-0011-49: hyp=['THEIR', 'PIETY', 'WOULD', 'BE', 'LIKE', 'THEIR', 'NAMES', 'LIKE', 'THEIR', 'FACES', 'LIKE', 'THEIR', 'CLOTHES', 'AND', 'IT', 'WAS', 'IDLE', 'FOR', 'HIM', 'TO', 'TELL', 'HIMSELF', 'THAT', 'THEIR', 'HUMBLE', 'AND', 'CONTRITE', 'HEARTS', 'IT', 'MIGHT', 'BE', 'PAID', 'A', 'FAR', 'RICHER', 'TRIBUTE', 'OF', 'DEVOTION', 'THAN', 'HIS', 'HAD', 'EVER', 'BEEN', 'A', 'GIFT', 'TENFOLD', 'MORE', 'ACCEPTABLE', 'THAN', 'HIS', 'ELABORATE', 'ADORATION'] +1089-134691-0012-50: ref=['IT', 'WAS', 'IDLE', 'FOR', 'HIM', 'TO', 'MOVE', 'HIMSELF', 'TO', 'BE', 'GENEROUS', 'TOWARDS', 'THEM', 'TO', 'TELL', 'HIMSELF', 'THAT', 'IF', 'HE', 'EVER', 'CAME', 'TO', 'THEIR', 'GATES', 'STRIPPED', 'OF', 'HIS', 'PRIDE', 'BEATEN', 'AND', 'IN', "BEGGAR'S", 'WEEDS', 'THAT', 'THEY', 'WOULD', 'BE', 'GENEROUS', 'TOWARDS', 'HIM', 'LOVING', 'HIM', 'AS', 'THEMSELVES'] +1089-134691-0012-50: hyp=['IT', 'WAS', 'IDLE', 'FOR', 'HIM', 'TO', 'MOVE', 'HIMSELF', 'TO', 'BE', 'GENEROUS', 'TOWARDS', 'THEM', 'TO', 'TELL', 'HIMSELF', 'THAT', 'IF', 'HE', 'EVER', 'CAME', 'TO', 'THEIR', 'GATES', 'STRIPPED', 'OF', 'HIS', 'PRIDE', 'BEATEN', 'AND', 'IN', 'BEGGARS', 'WEEDS', 'THAT', 'THEY', 'WOULD', 'BE', 'GENEROUS', 'TOWARDS', 'HIM', 'LOVING', 'HIM', 'AS', 'THEMSELVES'] +1089-134691-0013-51: ref=['IDLE', 'AND', 'EMBITTERING', 'FINALLY', 'TO', 'ARGUE', 'AGAINST', 'HIS', 'OWN', 'DISPASSIONATE', 'CERTITUDE', 'THAT', 'THE', 'COMMANDMENT', 'OF', 'LOVE', 'BADE', 'US', 'NOT', 'TO', 'LOVE', 'OUR', 'NEIGHBOUR', 'AS', 'OURSELVES', 'WITH', 'THE', 'SAME', 'AMOUNT', 'AND', 'INTENSITY', 'OF', 'LOVE', 'BUT', 'TO', 'LOVE', 'HIM', 'AS', 'OURSELVES', 'WITH', 'THE', 'SAME', 'KIND', 'OF', 'LOVE'] +1089-134691-0013-51: hyp=['IDLE', 'AND', 'EMBITTERING', 'FINALLY', 'TO', 'ARGUE', 'AGAINST', 'HIS', 'OWN', 'DISPASSIONATE', 'CERTITUDE', 'THAT', 'THE', 'COMMANDMENT', 'OF', 'LOVE', 'BADE', 'US', 'NOT', 'TO', 'LOVE', 'OUR', 'NEIGHBOUR', 'AS', 'OURSELVES', 'WITH', 'THE', 'SAME', 'AMOUNT', 'AND', 'INTENSITY', 'OF', 'LOVE', 'BUT', 'TO', 'LOVE', 'HIM', 'AS', 'OURSELVES', 'WITH', 'THE', 'SAME', 'KIND', 'OF', 'LOVE'] +1089-134691-0014-52: ref=['THE', 'PHRASE', 'AND', 'THE', 'DAY', 'AND', 'THE', 'SCENE', 'HARMONIZED', 'IN', 'A', 'CHORD'] +1089-134691-0014-52: hyp=['THE', 'PHRASE', 'AND', 'THE', 'DAY', 'AND', 'THE', 'SCENE', 'HARMONIZED', 'IN', 'ACCORD'] +1089-134691-0015-53: ref=['WORDS', 'WAS', 'IT', 'THEIR', 'COLOURS'] +1089-134691-0015-53: hyp=['WORDS', 'WAS', 'IT', 'THEIR', 'COLORS'] +1089-134691-0016-54: ref=['THEY', 'WERE', 'VOYAGING', 'ACROSS', 'THE', 'DESERTS', 'OF', 'THE', 'SKY', 'A', 'HOST', 'OF', 'NOMADS', 'ON', 'THE', 'MARCH', 'VOYAGING', 'HIGH', 'OVER', 'IRELAND', 'WESTWARD', 'BOUND'] +1089-134691-0016-54: hyp=['THEY', 'WERE', 'VOYAGING', 'ACROSS', 'THE', 'DESERTS', 'OF', 'THE', 'SKY', 'A', 'HOST', 'OF', 'NOMADS', 'ON', 'THE', 'MARCH', 'VOYAGING', 'HIGH', 'OVER', 'IRELAND', 'WESTWARD', 'BOUND'] +1089-134691-0017-55: ref=['THE', 'EUROPE', 'THEY', 'HAD', 'COME', 'FROM', 'LAY', 'OUT', 'THERE', 'BEYOND', 'THE', 'IRISH', 'SEA', 'EUROPE', 'OF', 'STRANGE', 'TONGUES', 'AND', 'VALLEYED', 'AND', 'WOODBEGIRT', 'AND', 'CITADELLED', 'AND', 'OF', 'ENTRENCHED', 'AND', 'MARSHALLED', 'RACES'] +1089-134691-0017-55: hyp=['THE', 'EUROPE', 'THEY', 'HAD', 'COME', 'FROM', 'LAY', 'OUT', 'THERE', 'BEYOND', 'THE', 'IRISH', 'SEA', 'EUROPE', 'OF', 'STRANGE', 'TONGUES', 'AND', 'VALLEYED', 'AND', 'WOOD', 'BEGIRT', 'AND', 'CITADELED', 'AND', 'OF', 'ENTRENCHED', 'AND', 'MARSHALLED', 'RACES'] +1089-134691-0018-56: ref=['AGAIN', 'AGAIN'] +1089-134691-0018-56: hyp=['AGAIN', 'AGAIN'] +1089-134691-0019-57: ref=['A', 'VOICE', 'FROM', 'BEYOND', 'THE', 'WORLD', 'WAS', 'CALLING'] +1089-134691-0019-57: hyp=['A', 'VOICE', 'FROM', 'BEYOND', 'THE', 'WORLD', 'WAS', 'CALLING'] +1089-134691-0020-58: ref=['HELLO', 'STEPHANOS', 'HERE', 'COMES', 'THE', 'DEDALUS'] +1089-134691-0020-58: hyp=['HALLO', 'STEPHANOS', 'HERE', 'COMES', 'THE', 'DAEDALUS'] +1089-134691-0021-59: ref=['THEIR', 'DIVING', 'STONE', 'POISED', 'ON', 'ITS', 'RUDE', 'SUPPORTS', 'AND', 'ROCKING', 'UNDER', 'THEIR', 'PLUNGES', 'AND', 'THE', 'ROUGH', 'HEWN', 'STONES', 'OF', 'THE', 'SLOPING', 'BREAKWATER', 'OVER', 'WHICH', 'THEY', 'SCRAMBLED', 'IN', 'THEIR', 'HORSEPLAY', 'GLEAMED', 'WITH', 'COLD', 'WET', 'LUSTRE'] +1089-134691-0021-59: hyp=['THEIR', 'DIVING', 'STONE', 'POISED', 'ON', 'ITS', 'RUDE', 'SUPPORTS', 'AND', 'ROCKING', 'UNDER', 'THEIR', 'PLUNGES', 'AND', 'THE', 'ROUGH', 'HEWN', 'STONES', 'OF', 'THE', 'SLOPING', 'BREAKWATER', 'OVER', 'WHICH', 'THEY', 'SCRAMBLED', 'IN', 'THEIR', 'HORSE', 'PLAY', 'GLEAMED', 'WITH', 'COLD', 'WET', 'LUSTRE'] +1089-134691-0022-60: ref=['HE', 'STOOD', 'STILL', 'IN', 'DEFERENCE', 'TO', 'THEIR', 'CALLS', 'AND', 'PARRIED', 'THEIR', 'BANTER', 'WITH', 'EASY', 'WORDS'] +1089-134691-0022-60: hyp=['HE', 'STOOD', 'STILL', 'IN', 'DEFERENCE', 'TO', 'THEIR', 'CALLS', 'AND', 'PARRIED', 'THEIR', 'BANTER', 'WITH', 'EASY', 'WORDS'] +1089-134691-0023-61: ref=['IT', 'WAS', 'A', 'PAIN', 'TO', 'SEE', 'THEM', 'AND', 'A', 'SWORD', 'LIKE', 'PAIN', 'TO', 'SEE', 'THE', 'SIGNS', 'OF', 'ADOLESCENCE', 'THAT', 'MADE', 'REPELLENT', 'THEIR', 'PITIABLE', 'NAKEDNESS'] +1089-134691-0023-61: hyp=['IT', 'WAS', 'A', 'PAIN', 'TO', 'SEE', 'THEM', 'AND', 'A', 'SWORD', 'LIKE', 'PAIN', 'TO', 'SEE', 'THE', 'SIGNS', 'OF', 'ADOLESCENCE', 'THAT', 'MADE', 'REPELLENT', 'THEIR', 'PITIABLE', 'NAKEDNESS'] +1089-134691-0024-62: ref=['STEPHANOS', 'DEDALOS'] +1089-134691-0024-62: hyp=["STEPHANO'S", 'DEAD', 'LOS'] +1089-134691-0025-63: ref=['A', 'MOMENT', 'BEFORE', 'THE', 'GHOST', 'OF', 'THE', 'ANCIENT', 'KINGDOM', 'OF', 'THE', 'DANES', 'HAD', 'LOOKED', 'FORTH', 'THROUGH', 'THE', 'VESTURE', 'OF', 'THE', 'HAZEWRAPPED', 'CITY'] +1089-134691-0025-63: hyp=['A', 'MOMENT', 'BEFORE', 'THE', 'GHOST', 'OF', 'THE', 'ANCIENT', 'KINGDOM', 'OF', 'THE', 'DANES', 'HAD', 'LOOKED', 'FORTH', 'THROUGH', 'THE', 'VESTURE', 'OF', 'THE', 'HAZE', 'WRAPT', 'CITY'] +1188-133604-0000-64: ref=['YOU', 'WILL', 'FIND', 'ME', 'CONTINUALLY', 'SPEAKING', 'OF', 'FOUR', 'MEN', 'TITIAN', 'HOLBEIN', 'TURNER', 'AND', 'TINTORET', 'IN', 'ALMOST', 'THE', 'SAME', 'TERMS'] +1188-133604-0000-64: hyp=['YOU', 'WILL', 'FIND', 'ME', 'CONTINUALLY', 'SPEAKING', 'OF', 'FOUR', 'MEN', 'TITIAN', 'HOLBEIN', 'TURNER', 'AND', 'TINTORET', 'IN', 'ALMOST', 'THE', 'SAME', 'TERMS'] +1188-133604-0001-65: ref=['THEY', 'UNITE', 'EVERY', 'QUALITY', 'AND', 'SOMETIMES', 'YOU', 'WILL', 'FIND', 'ME', 'REFERRING', 'TO', 'THEM', 'AS', 'COLORISTS', 'SOMETIMES', 'AS', 'CHIAROSCURISTS'] +1188-133604-0001-65: hyp=['THEY', 'UNITE', 'EVERY', 'QUALITY', 'AND', 'SOMETIMES', 'YOU', 'WILL', 'FIND', 'ME', 'REFERRING', 'TO', 'THEM', 'AS', 'COLORISTS', 'SOMETIMES', 'AS', 'CHIAROSCURISTS'] +1188-133604-0002-66: ref=['BY', 'BEING', 'STUDIOUS', 'OF', 'COLOR', 'THEY', 'ARE', 'STUDIOUS', 'OF', 'DIVISION', 'AND', 'WHILE', 'THE', 'CHIAROSCURIST', 'DEVOTES', 'HIMSELF', 'TO', 'THE', 'REPRESENTATION', 'OF', 'DEGREES', 'OF', 'FORCE', 'IN', 'ONE', 'THING', 'UNSEPARATED', 'LIGHT', 'THE', 'COLORISTS', 'HAVE', 'FOR', 'THEIR', 'FUNCTION', 'THE', 'ATTAINMENT', 'OF', 'BEAUTY', 'BY', 'ARRANGEMENT', 'OF', 'THE', 'DIVISIONS', 'OF', 'LIGHT'] +1188-133604-0002-66: hyp=['BY', 'BEING', 'STUDIOUS', 'OF', 'COLOR', 'THEY', 'ARE', 'STUDIOUS', 'OF', 'DIVISION', 'AND', 'WHILE', 'THE', 'CHIOSCURIST', 'DEVOTES', 'HIMSELF', 'TO', 'THE', 'REPRESENTATION', 'OF', 'DEGREES', 'OF', 'FORCE', 'IN', 'ONE', 'THING', 'UNSEPARATED', 'LIGHT', 'THE', 'COLORISTS', 'HAVE', 'FOR', 'THEIR', 'FUNCTION', 'THE', 'ATTAINMENT', 'OF', 'BEAUTY', 'BY', 'ARRANGEMENT', 'OF', 'THE', 'DIVISIONS', 'OF', 'LIGHT'] +1188-133604-0003-67: ref=['MY', 'FIRST', 'AND', 'PRINCIPAL', 'REASON', 'WAS', 'THAT', 'THEY', 'ENFORCED', 'BEYOND', 'ALL', 'RESISTANCE', 'ON', 'ANY', 'STUDENT', 'WHO', 'MIGHT', 'ATTEMPT', 'TO', 'COPY', 'THEM', 'THIS', 'METHOD', 'OF', 'LAYING', 'PORTIONS', 'OF', 'DISTINCT', 'HUE', 'SIDE', 'BY', 'SIDE'] +1188-133604-0003-67: hyp=['MY', 'FIRST', 'AND', 'PRINCIPAL', 'REASON', 'WAS', 'THAT', 'THEY', 'ENFORCED', 'BEYOND', 'ALL', 'RESISTANCE', 'ON', 'ANY', 'STUDENT', 'WHO', 'MIGHT', 'ATTEMPT', 'TO', 'COPY', 'THEM', 'THIS', 'METHOD', 'OF', 'LAYING', 'PORTIONS', 'OF', 'DISTINCT', 'HUE', 'SIDE', 'BY', 'SIDE'] +1188-133604-0004-68: ref=['SOME', 'OF', 'THE', 'TOUCHES', 'INDEED', 'WHEN', 'THE', 'TINT', 'HAS', 'BEEN', 'MIXED', 'WITH', 'MUCH', 'WATER', 'HAVE', 'BEEN', 'LAID', 'IN', 'LITTLE', 'DROPS', 'OR', 'PONDS', 'SO', 'THAT', 'THE', 'PIGMENT', 'MIGHT', 'CRYSTALLIZE', 'HARD', 'AT', 'THE', 'EDGE'] +1188-133604-0004-68: hyp=['SOME', 'OF', 'THE', 'TOUCHES', 'INDEED', 'WHEN', 'THE', 'TINT', 'HAS', 'BEEN', 'MIXED', 'WITH', 'MUCH', 'WATER', 'HAVE', 'BEEN', 'LAID', 'IN', 'LITTLE', 'DROPS', 'OR', 'PONDS', 'SO', 'THAT', 'THE', 'PIGMENT', 'MIGHT', 'CRYSTALLIZE', 'HARD', 'AT', 'THE', 'EDGE'] +1188-133604-0005-69: ref=['IT', 'IS', 'THE', 'HEAD', 'OF', 'A', 'PARROT', 'WITH', 'A', 'LITTLE', 'FLOWER', 'IN', 'HIS', 'BEAK', 'FROM', 'A', 'PICTURE', 'OF', "CARPACCIO'S", 'ONE', 'OF', 'HIS', 'SERIES', 'OF', 'THE', 'LIFE', 'OF', 'SAINT', 'GEORGE'] +1188-133604-0005-69: hyp=['IT', 'IS', 'THE', 'HEAD', 'OF', 'A', 'PARROT', 'WITH', 'A', 'LITTLE', 'FLOWER', 'IN', 'HIS', 'BEAK', 'FROM', 'A', 'PICTURE', 'OF', 'CARPATIOS', 'ONE', 'OF', 'HIS', 'SERIES', 'OF', 'THE', 'LIFE', 'OF', 'SAINT', 'GEORGE'] +1188-133604-0006-70: ref=['THEN', 'HE', 'COMES', 'TO', 'THE', 'BEAK', 'OF', 'IT'] +1188-133604-0006-70: hyp=['THEN', 'HE', 'COMES', 'TO', 'THE', 'BEAK', 'OF', 'IT'] +1188-133604-0007-71: ref=['THE', 'BROWN', 'GROUND', 'BENEATH', 'IS', 'LEFT', 'FOR', 'THE', 'MOST', 'PART', 'ONE', 'TOUCH', 'OF', 'BLACK', 'IS', 'PUT', 'FOR', 'THE', 'HOLLOW', 'TWO', 'DELICATE', 'LINES', 'OF', 'DARK', 'GRAY', 'DEFINE', 'THE', 'OUTER', 'CURVE', 'AND', 'ONE', 'LITTLE', 'QUIVERING', 'TOUCH', 'OF', 'WHITE', 'DRAWS', 'THE', 'INNER', 'EDGE', 'OF', 'THE', 'MANDIBLE'] +1188-133604-0007-71: hyp=['THE', 'BROWN', 'GROUND', 'BENEATH', 'IS', 'LEFT', 'FOR', 'THE', 'MOST', 'PART', 'ONE', 'TOUCH', 'OF', 'BLACK', 'IS', 'PUT', 'FOR', 'THE', 'HOLLOW', 'TWO', 'DELICATE', 'LINES', 'OF', 'DARK', 'GRAY', 'DEFINE', 'THE', 'OUTER', 'CURVE', 'AND', 'ONE', 'LITTLE', 'QUIVERING', 'TOUCH', 'OF', 'WHITE', 'DRAWS', 'THE', 'INNER', 'EDGE', 'OF', 'THE', 'MANDIBLE'] +1188-133604-0008-72: ref=['FOR', 'BELIEVE', 'ME', 'THE', 'FINAL', 'PHILOSOPHY', 'OF', 'ART', 'CAN', 'ONLY', 'RATIFY', 'THEIR', 'OPINION', 'THAT', 'THE', 'BEAUTY', 'OF', 'A', 'COCK', 'ROBIN', 'IS', 'TO', 'BE', 'RED', 'AND', 'OF', 'A', 'GRASS', 'PLOT', 'TO', 'BE', 'GREEN', 'AND', 'THE', 'BEST', 'SKILL', 'OF', 'ART', 'IS', 'IN', 'INSTANTLY', 'SEIZING', 'ON', 'THE', 'MANIFOLD', 'DELICIOUSNESS', 'OF', 'LIGHT', 'WHICH', 'YOU', 'CAN', 'ONLY', 'SEIZE', 'BY', 'PRECISION', 'OF', 'INSTANTANEOUS', 'TOUCH'] +1188-133604-0008-72: hyp=['FOR', 'BELIEVE', 'ME', 'THE', 'FINAL', 'PHILOSOPHY', 'OF', 'ART', 'CAN', 'ONLY', 'RATIFY', 'THEIR', 'OPINION', 'THAT', 'THE', 'BEAUTY', 'OF', 'A', 'COCK', 'ROBIN', 'IS', 'TO', 'BE', 'RED', 'AND', 'OF', 'A', 'GRASS', 'PLOT', 'TO', 'BE', 'GREEN', 'AND', 'THE', 'BEST', 'SKILL', 'OF', 'ART', 'IS', 'IN', 'INSTANTLY', 'SEIZING', 'ON', 'THE', 'MANIFOLD', 'DELICIOUSNESS', 'OF', 'LIGHT', 'WHICH', 'YOU', 'CAN', 'ONLY', 'SEIZE', 'BY', 'PRECISION', 'OF', 'INSTANTANEOUS', 'TOUCH'] +1188-133604-0009-73: ref=['NOW', 'YOU', 'WILL', 'SEE', 'IN', 'THESE', 'STUDIES', 'THAT', 'THE', 'MOMENT', 'THE', 'WHITE', 'IS', 'INCLOSED', 'PROPERLY', 'AND', 'HARMONIZED', 'WITH', 'THE', 'OTHER', 'HUES', 'IT', 'BECOMES', 'SOMEHOW', 'MORE', 'PRECIOUS', 'AND', 'PEARLY', 'THAN', 'THE', 'WHITE', 'PAPER', 'AND', 'THAT', 'I', 'AM', 'NOT', 'AFRAID', 'TO', 'LEAVE', 'A', 'WHOLE', 'FIELD', 'OF', 'UNTREATED', 'WHITE', 'PAPER', 'ALL', 'ROUND', 'IT', 'BEING', 'SURE', 'THAT', 'EVEN', 'THE', 'LITTLE', 'DIAMONDS', 'IN', 'THE', 'ROUND', 'WINDOW', 'WILL', 'TELL', 'AS', 'JEWELS', 'IF', 'THEY', 'ARE', 'GRADATED', 'JUSTLY'] +1188-133604-0009-73: hyp=['NOW', 'YOU', 'WILL', 'SEE', 'IN', 'THESE', 'STUDIES', 'THAT', 'THE', 'MOMENT', 'THE', 'WHITE', 'IS', 'ENCLOSED', 'PROPERLY', 'AND', 'HARMONIZED', 'WITH', 'THE', 'OTHER', 'HUES', 'IT', 'BECOMES', 'SOMEHOW', 'MORE', 'PRECIOUS', 'AND', 'PEARLY', 'THAN', 'THE', 'WHITE', 'PAPER', 'AND', 'THAT', 'I', 'AM', 'NOT', 'AFRAID', 'TO', 'LEAVE', 'A', 'WHOLE', 'FIELD', 'OF', 'UNTREATED', 'WHITE', 'PAPER', 'ALL', 'ROUND', 'IT', 'BEING', 'SURE', 'THAT', 'EVEN', 'THE', 'LITTLE', 'DIAMONDS', 'IN', 'THE', 'ROUND', 'WINDOW', 'WILL', 'TELL', 'AS', 'JEWELS', 'IF', 'THEY', 'ARE', 'GRADATED', 'JUSTLY'] +1188-133604-0010-74: ref=['BUT', 'IN', 'THIS', 'VIGNETTE', 'COPIED', 'FROM', 'TURNER', 'YOU', 'HAVE', 'THE', 'TWO', 'PRINCIPLES', 'BROUGHT', 'OUT', 'PERFECTLY'] +1188-133604-0010-74: hyp=['BUT', 'IN', 'THIS', 'VIGNETTE', 'COPIED', 'FROM', 'TURNER', 'YOU', 'HAVE', 'THE', 'TWO', 'PRINCIPLES', 'BROUGHT', 'OUT', 'PERFECTLY'] +1188-133604-0011-75: ref=['THEY', 'ARE', 'BEYOND', 'ALL', 'OTHER', 'WORKS', 'THAT', 'I', 'KNOW', 'EXISTING', 'DEPENDENT', 'FOR', 'THEIR', 'EFFECT', 'ON', 'LOW', 'SUBDUED', 'TONES', 'THEIR', 'FAVORITE', 'CHOICE', 'IN', 'TIME', 'OF', 'DAY', 'BEING', 'EITHER', 'DAWN', 'OR', 'TWILIGHT', 'AND', 'EVEN', 'THEIR', 'BRIGHTEST', 'SUNSETS', 'PRODUCED', 'CHIEFLY', 'OUT', 'OF', 'GRAY', 'PAPER'] +1188-133604-0011-75: hyp=['THEY', 'ARE', 'BEYOND', 'ALL', 'OTHER', 'WORKS', 'THAT', 'I', 'KNOW', 'EXISTING', 'DEPENDENT', 'FOR', 'THEIR', 'EFFECT', 'ON', 'LOW', 'SUBDUED', 'TONES', 'THEIR', 'FAVORITE', 'CHOICE', 'IN', 'TIME', 'OF', 'DAY', 'BEING', 'EITHER', 'DAWN', 'OR', 'TWILIGHT', 'AND', 'EVEN', 'THEIR', 'BRIGHTEST', 'SUNSETS', 'PRODUCED', 'CHIEFLY', 'OUT', 'OF', 'GREY', 'PAPER'] +1188-133604-0012-76: ref=['IT', 'MAY', 'BE', 'THAT', 'A', 'GREAT', 'COLORIST', 'WILL', 'USE', 'HIS', 'UTMOST', 'FORCE', 'OF', 'COLOR', 'AS', 'A', 'SINGER', 'HIS', 'FULL', 'POWER', 'OF', 'VOICE', 'BUT', 'LOUD', 'OR', 'LOW', 'THE', 'VIRTUE', 'IS', 'IN', 'BOTH', 'CASES', 'ALWAYS', 'IN', 'REFINEMENT', 'NEVER', 'IN', 'LOUDNESS'] +1188-133604-0012-76: hyp=['IT', 'MAY', 'BE', 'THAT', 'A', 'GREAT', 'COLORIST', 'WILL', 'USE', 'HIS', 'UTMOST', 'FORCE', 'OF', 'COLOR', 'AS', 'A', 'SINGER', 'HIS', 'FULL', 'POWER', 'OF', 'VOICE', 'BUT', 'LOUD', 'OR', 'LOW', 'THE', 'VIRTUE', 'IS', 'IN', 'BOTH', 'CASES', 'ALWAYS', 'IN', 'REFINEMENT', 'NEVER', 'IN', 'LOUDNESS'] +1188-133604-0013-77: ref=['IT', 'MUST', 'REMEMBER', 'BE', 'ONE', 'OR', 'THE', 'OTHER'] +1188-133604-0013-77: hyp=['IT', 'MUST', 'REMEMBER', 'BE', 'ONE', 'OR', 'THE', 'OTHER'] +1188-133604-0014-78: ref=['DO', 'NOT', 'THEREFORE', 'THINK', 'THAT', 'THE', 'GOTHIC', 'SCHOOL', 'IS', 'AN', 'EASY', 'ONE'] +1188-133604-0014-78: hyp=['DO', 'NOT', 'THEREFORE', 'THINK', 'THAT', 'THE', 'GOTHIC', 'SCHOOL', 'IS', 'AN', 'EASY', 'ONE'] +1188-133604-0015-79: ref=['THE', 'LAW', 'OF', 'THAT', 'SCHOOL', 'IS', 'THAT', 'EVERYTHING', 'SHALL', 'BE', 'SEEN', 'CLEARLY', 'OR', 'AT', 'LEAST', 'ONLY', 'IN', 'SUCH', 'MIST', 'OR', 'FAINTNESS', 'AS', 'SHALL', 'BE', 'DELIGHTFUL', 'AND', 'I', 'HAVE', 'NO', 'DOUBT', 'THAT', 'THE', 'BEST', 'INTRODUCTION', 'TO', 'IT', 'WOULD', 'BE', 'THE', 'ELEMENTARY', 'PRACTICE', 'OF', 'PAINTING', 'EVERY', 'STUDY', 'ON', 'A', 'GOLDEN', 'GROUND'] +1188-133604-0015-79: hyp=['THE', 'LAW', 'OF', 'THAT', 'SCHOOL', 'IS', 'THAT', 'EVERYTHING', 'SHALL', 'BE', 'SEEN', 'CLEARLY', 'OR', 'AT', 'LEAST', 'ONLY', 'IN', 'SUCH', 'MIST', 'OR', 'FAINTNESS', 'AS', 'SHALL', 'BE', 'DELIGHTFUL', 'AND', 'I', 'HAVE', 'NO', 'DOUBT', 'THAT', 'THE', 'BEST', 'INTRODUCTION', 'TO', 'IT', 'WOULD', 'BE', 'THE', 'ELEMENTARY', 'PRACTICE', 'OF', 'PAINTING', 'EVERY', 'STUDY', 'ON', 'A', 'GOLDEN', 'GROUND'] +1188-133604-0016-80: ref=['THIS', 'AT', 'ONCE', 'COMPELS', 'YOU', 'TO', 'UNDERSTAND', 'THAT', 'THE', 'WORK', 'IS', 'TO', 'BE', 'IMAGINATIVE', 'AND', 'DECORATIVE', 'THAT', 'IT', 'REPRESENTS', 'BEAUTIFUL', 'THINGS', 'IN', 'THE', 'CLEAREST', 'WAY', 'BUT', 'NOT', 'UNDER', 'EXISTING', 'CONDITIONS', 'AND', 'THAT', 'IN', 'FACT', 'YOU', 'ARE', 'PRODUCING', "JEWELER'S", 'WORK', 'RATHER', 'THAN', 'PICTURES'] +1188-133604-0016-80: hyp=['THIS', 'AT', 'ONCE', 'COMPELS', 'YOU', 'TO', 'UNDERSTAND', 'THAT', 'THE', 'WORK', 'IS', 'TO', 'BE', 'IMAGINATIVE', 'AND', 'DECORATIVE', 'THAT', 'IT', 'REPRESENTS', 'BEAUTIFUL', 'THINGS', 'IN', 'THE', 'CLEAREST', 'WAY', 'BUT', 'NOT', 'UNDER', 'EXISTING', 'CONDITIONS', 'AND', 'THAT', 'IN', 'FACT', 'YOU', 'ARE', 'PRODUCING', 'JEWELLERS', 'WORK', 'RATHER', 'THAN', 'PICTURES'] +1188-133604-0017-81: ref=['THAT', 'A', 'STYLE', 'IS', 'RESTRAINED', 'OR', 'SEVERE', 'DOES', 'NOT', 'MEAN', 'THAT', 'IT', 'IS', 'ALSO', 'ERRONEOUS'] +1188-133604-0017-81: hyp=['THAT', 'A', 'STYLE', 'IS', 'RESTRAINED', 'OR', 'SEVERE', 'DOES', 'NOT', 'MEAN', 'THAT', 'IT', 'IS', 'ALSO', 'ERRONEOUS'] +1188-133604-0018-82: ref=['IN', 'ALL', 'EARLY', 'GOTHIC', 'ART', 'INDEED', 'YOU', 'WILL', 'FIND', 'FAILURE', 'OF', 'THIS', 'KIND', 'ESPECIALLY', 'DISTORTION', 'AND', 'RIGIDITY', 'WHICH', 'ARE', 'IN', 'MANY', 'RESPECTS', 'PAINFULLY', 'TO', 'BE', 'COMPARED', 'WITH', 'THE', 'SPLENDID', 'REPOSE', 'OF', 'CLASSIC', 'ART'] +1188-133604-0018-82: hyp=['IN', 'ALL', 'EARLY', 'GOTHIC', 'ART', 'INDEED', 'YOU', 'WILL', 'FIND', 'FAILURE', 'OF', 'THIS', 'KIND', 'ESPECIALLY', 'DISTORTION', 'AND', 'RIGIDITY', 'WHICH', 'ARE', 'IN', 'MANY', 'RESPECTS', 'PAINFULLY', 'TO', 'BE', 'COMPARED', 'WITH', 'THE', 'SPLENDID', 'REPOSE', 'OF', 'CLASSIC', 'ART'] +1188-133604-0019-83: ref=['THE', 'LARGE', 'LETTER', 'CONTAINS', 'INDEED', 'ENTIRELY', 'FEEBLE', 'AND', 'ILL', 'DRAWN', 'FIGURES', 'THAT', 'IS', 'MERELY', 'CHILDISH', 'AND', 'FAILING', 'WORK', 'OF', 'AN', 'INFERIOR', 'HAND', 'IT', 'IS', 'NOT', 'CHARACTERISTIC', 'OF', 'GOTHIC', 'OR', 'ANY', 'OTHER', 'SCHOOL'] +1188-133604-0019-83: hyp=['THE', 'LARGE', 'LETTER', 'CONTAINS', 'INDEED', 'ENTIRELY', 'FEEBLE', 'AND', 'ILL', 'DRAWN', 'FIGURES', 'THAT', 'IS', 'MERELY', 'CHILDISH', 'AND', 'FAILING', 'WORK', 'OF', 'AN', 'INFERIOR', 'HAND', 'IT', 'IS', 'NOT', 'CHARACTERISTIC', 'OF', 'GOTHIC', 'OR', 'ANY', 'OTHER', 'SCHOOL'] +1188-133604-0020-84: ref=['BUT', 'OBSERVE', 'YOU', 'CAN', 'ONLY', 'DO', 'THIS', 'ON', 'ONE', 'CONDITION', 'THAT', 'OF', 'STRIVING', 'ALSO', 'TO', 'CREATE', 'IN', 'REALITY', 'THE', 'BEAUTY', 'WHICH', 'YOU', 'SEEK', 'IN', 'IMAGINATION'] +1188-133604-0020-84: hyp=['BUT', 'OBSERVE', 'YOU', 'CAN', 'ONLY', 'DO', 'THIS', 'ON', 'ONE', 'CONDITION', 'THAT', 'OF', 'STRIVING', 'ALSO', 'TO', 'CREATE', 'IN', 'REALITY', 'THE', 'BEAUTY', 'WHICH', 'YOU', 'SEEK', 'IN', 'IMAGINATION'] +1188-133604-0021-85: ref=['IT', 'WILL', 'BE', 'WHOLLY', 'IMPOSSIBLE', 'FOR', 'YOU', 'TO', 'RETAIN', 'THE', 'TRANQUILLITY', 'OF', 'TEMPER', 'AND', 'FELICITY', 'OF', 'FAITH', 'NECESSARY', 'FOR', 'NOBLE', 'PURIST', 'PAINTING', 'UNLESS', 'YOU', 'ARE', 'ACTIVELY', 'ENGAGED', 'IN', 'PROMOTING', 'THE', 'FELICITY', 'AND', 'PEACE', 'OF', 'PRACTICAL', 'LIFE'] +1188-133604-0021-85: hyp=['IT', 'WILL', 'BE', 'WHOLLY', 'IMPOSSIBLE', 'FOR', 'YOU', 'TO', 'RETAIN', 'THE', 'TRANQUILLITY', 'OF', 'TEMPER', 'AND', 'FELICITY', 'OF', 'FAITH', 'NECESSARY', 'FOR', 'NOBLE', 'PUREST', 'PAINTING', 'UNLESS', 'YOU', 'ARE', 'ACTIVELY', 'ENGAGED', 'IN', 'PROMOTING', 'THE', 'FELICITY', 'AND', 'PEACE', 'OF', 'PRACTICAL', 'LIFE'] +1188-133604-0022-86: ref=['YOU', 'MUST', 'LOOK', 'AT', 'HIM', 'IN', 'THE', 'FACE', 'FIGHT', 'HIM', 'CONQUER', 'HIM', 'WITH', 'WHAT', 'SCATHE', 'YOU', 'MAY', 'YOU', 'NEED', 'NOT', 'THINK', 'TO', 'KEEP', 'OUT', 'OF', 'THE', 'WAY', 'OF', 'HIM'] +1188-133604-0022-86: hyp=['YOU', 'MUST', 'LOOK', 'HIM', 'IN', 'THE', 'FACE', 'FIGHT', 'HIM', 'CONQUER', 'HIM', 'WITH', 'WHAT', 'SCATHE', 'YOU', 'MAY', 'YOU', 'NEED', 'NOT', 'THINK', 'TO', 'KEEP', 'OUT', 'OF', 'THE', 'WAY', 'OF', 'HIM'] +1188-133604-0023-87: ref=['THE', 'COLORIST', 'SAYS', 'FIRST', 'OF', 'ALL', 'AS', 'MY', 'DELICIOUS', 'PAROQUET', 'WAS', 'RUBY', 'SO', 'THIS', 'NASTY', 'VIPER', 'SHALL', 'BE', 'BLACK', 'AND', 'THEN', 'IS', 'THE', 'QUESTION', 'CAN', 'I', 'ROUND', 'HIM', 'OFF', 'EVEN', 'THOUGH', 'HE', 'IS', 'BLACK', 'AND', 'MAKE', 'HIM', 'SLIMY', 'AND', 'YET', 'SPRINGY', 'AND', 'CLOSE', 'DOWN', 'CLOTTED', 'LIKE', 'A', 'POOL', 'OF', 'BLACK', 'BLOOD', 'ON', 'THE', 'EARTH', 'ALL', 'THE', 'SAME'] +1188-133604-0023-87: hyp=['THE', 'COLORIST', 'SAYS', 'FIRST', 'OF', 'ALL', 'AS', 'MY', 'DELICIOUS', 'PAROQUET', 'WAS', 'RUBY', 'SO', 'THIS', 'NASTY', 'VIPER', 'SHALL', 'BE', 'BLACK', 'AND', 'THEN', 'IS', 'THE', 'QUESTION', 'CAN', 'I', 'ROUND', 'HIM', 'OFF', 'EVEN', 'THOUGH', 'HE', 'IS', 'BLACK', 'AND', 'MAKE', 'HIM', 'SLIMY', 'AND', 'YET', 'SPRINGY', 'AND', 'CLOSE', 'DOWN', 'CLOTTED', 'LIKE', 'A', 'POOL', 'OF', 'BLACK', 'BLOOD', 'ON', 'THE', 'EARTH', 'ALL', 'THE', 'SAME'] +1188-133604-0024-88: ref=['NOTHING', 'WILL', 'BE', 'MORE', 'PRECIOUS', 'TO', 'YOU', 'I', 'THINK', 'IN', 'THE', 'PRACTICAL', 'STUDY', 'OF', 'ART', 'THAN', 'THE', 'CONVICTION', 'WHICH', 'WILL', 'FORCE', 'ITSELF', 'ON', 'YOU', 'MORE', 'AND', 'MORE', 'EVERY', 'HOUR', 'OF', 'THE', 'WAY', 'ALL', 'THINGS', 'ARE', 'BOUND', 'TOGETHER', 'LITTLE', 'AND', 'GREAT', 'IN', 'SPIRIT', 'AND', 'IN', 'MATTER'] +1188-133604-0024-88: hyp=['NOTHING', 'WILL', 'BE', 'MORE', 'PRECIOUS', 'TO', 'YOU', 'I', 'THINK', 'IN', 'THE', 'PRACTICAL', 'STUDY', 'OF', 'ART', 'THAN', 'THE', 'CONVICTION', 'WHICH', 'WILL', 'FORCE', 'ITSELF', 'ON', 'YOU', 'MORE', 'AND', 'MORE', 'EVERY', 'HOUR', 'OF', 'THE', 'WAY', 'ALL', 'THINGS', 'ARE', 'BOUND', 'TOGETHER', 'LITTLE', 'AND', 'GREAT', 'IN', 'SPIRIT', 'AND', 'IN', 'MATTER'] +1188-133604-0025-89: ref=['YOU', 'KNOW', 'I', 'HAVE', 'JUST', 'BEEN', 'TELLING', 'YOU', 'HOW', 'THIS', 'SCHOOL', 'OF', 'MATERIALISM', 'AND', 'CLAY', 'INVOLVED', 'ITSELF', 'AT', 'LAST', 'IN', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0025-89: hyp=['YOU', 'KNOW', 'I', 'HAVE', 'JUST', 'BEEN', 'TELLING', 'YOU', 'HOW', 'THIS', 'SCHOOL', 'OF', 'MATERIALISM', 'AND', 'CLAY', 'INVOLVED', 'ITSELF', 'AT', 'LAST', 'IN', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0026-90: ref=['HERE', 'IS', 'AN', 'EQUALLY', 'TYPICAL', 'GREEK', 'SCHOOL', 'LANDSCAPE', 'BY', 'WILSON', 'LOST', 'WHOLLY', 'IN', 'GOLDEN', 'MIST', 'THE', 'TREES', 'SO', 'SLIGHTLY', 'DRAWN', 'THAT', 'YOU', "DON'T", 'KNOW', 'IF', 'THEY', 'ARE', 'TREES', 'OR', 'TOWERS', 'AND', 'NO', 'CARE', 'FOR', 'COLOR', 'WHATEVER', 'PERFECTLY', 'DECEPTIVE', 'AND', 'MARVELOUS', 'EFFECT', 'OF', 'SUNSHINE', 'THROUGH', 'THE', 'MIST', 'APOLLO', 'AND', 'THE', 'PYTHON'] +1188-133604-0026-90: hyp=['HERE', 'IS', 'AN', 'EQUALLY', 'TYPICAL', 'GREEK', 'SCHOOL', 'LANDSCAPE', 'BY', 'WILSON', 'LOST', 'WHOLLY', 'IN', 'GOLDEN', 'MIST', 'THE', 'TREES', 'SO', 'SLIGHTLY', 'DRAWN', 'THAT', 'YOU', "DON'T", 'KNOW', 'IF', 'THEY', 'ARE', 'TREES', 'OR', 'TOWERS', 'AND', 'NO', 'CARE', 'FOR', 'COLOR', 'WHATSOEVER', 'PERFECTLY', 'DECEPTIVE', 'AND', 'MARVELLOUS', 'EFFECT', 'OF', 'SUNSHINE', 'THROUGH', 'THE', 'MIST', 'APOLLO', 'AND', 'THE', 'PYTHON'] +1188-133604-0027-91: ref=['NOW', 'HERE', 'IS', 'RAPHAEL', 'EXACTLY', 'BETWEEN', 'THE', 'TWO', 'TREES', 'STILL', 'DRAWN', 'LEAF', 'BY', 'LEAF', 'WHOLLY', 'FORMAL', 'BUT', 'BEAUTIFUL', 'MIST', 'COMING', 'GRADUALLY', 'INTO', 'THE', 'DISTANCE'] +1188-133604-0027-91: hyp=['NOW', 'HERE', 'IS', 'RAFAELLE', 'EXACTLY', 'BETWEEN', 'THE', 'TWO', 'TREES', 'STILL', 'DRAWN', 'LEAF', 'BY', 'LEAF', 'WHOLLY', 'FORMAL', 'BUT', 'BEAUTIFUL', 'MIST', 'COMING', 'GRADUALLY', 'INTO', 'THE', 'DISTANCE'] +1188-133604-0028-92: ref=['WELL', 'THEN', 'LAST', 'HERE', 'IS', "TURNER'S", 'GREEK', 'SCHOOL', 'OF', 'THE', 'HIGHEST', 'CLASS', 'AND', 'YOU', 'DEFINE', 'HIS', 'ART', 'ABSOLUTELY', 'AS', 'FIRST', 'THE', 'DISPLAYING', 'INTENSELY', 'AND', 'WITH', 'THE', 'STERNEST', 'INTELLECT', 'OF', 'NATURAL', 'FORM', 'AS', 'IT', 'IS', 'AND', 'THEN', 'THE', 'ENVELOPMENT', 'OF', 'IT', 'WITH', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0028-92: hyp=['WELL', 'THEN', 'LAST', 'HERE', 'IS', "TURNER'S", 'GREEK', 'SCHOOL', 'OF', 'THE', 'HIGHEST', 'CLASS', 'AND', 'YOU', 'DEFINE', 'HIS', 'ART', 'ABSOLUTELY', 'AS', 'FIRST', 'THE', 'DISPLAYING', 'INTENSELY', 'AND', 'WITH', 'THE', 'STERNEST', 'INTELLECT', 'OF', 'NATURAL', 'FORM', 'AS', 'IT', 'IS', 'AND', 'THEN', 'THE', 'ENVELOPMENT', 'OF', 'IT', 'WITH', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0029-93: ref=['ONLY', 'THERE', 'ARE', 'TWO', 'SORTS', 'OF', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0029-93: hyp=['ONLY', 'THERE', 'ARE', 'TWO', 'SORTS', 'OF', 'CLOUD', 'AND', 'FIRE'] +1188-133604-0030-94: ref=['HE', 'KNOWS', 'THEM', 'BOTH'] +1188-133604-0030-94: hyp=['HE', 'KNOWS', 'THEM', 'BOTH'] +1188-133604-0031-95: ref=["THERE'S", 'ONE', 'AND', "THERE'S", 'ANOTHER', 'THE', 'DUDLEY', 'AND', 'THE', 'FLINT'] +1188-133604-0031-95: hyp=["THERE'S", 'ONE', 'AND', "THERE'S", 'ANOTHER', 'THE', 'DUDLEY', 'AND', 'THE', 'FLINT'] +1188-133604-0032-96: ref=['IT', 'IS', 'ONLY', 'A', 'PENCIL', 'OUTLINE', 'BY', 'EDWARD', 'BURNE', 'JONES', 'IN', 'ILLUSTRATION', 'OF', 'THE', 'STORY', 'OF', 'PSYCHE', 'IT', 'IS', 'THE', 'INTRODUCTION', 'OF', 'PSYCHE', 'AFTER', 'ALL', 'HER', 'TROUBLES', 'INTO', 'HEAVEN'] +1188-133604-0032-96: hyp=['IT', 'IS', 'ONLY', 'A', 'PENCIL', 'OUTLINE', 'BY', 'EDWARD', 'BURNE', 'JONES', 'IN', 'ILLUSTRATION', 'OF', 'THE', 'STORY', 'OF', 'PSYCHE', 'IT', 'IS', 'THE', 'INTRODUCTION', 'OF', 'PSYCHE', 'AFTER', 'ALL', 'HER', 'TROUBLES', 'INTO', 'HEAVEN'] +1188-133604-0033-97: ref=['EVERY', 'PLANT', 'IN', 'THE', 'GRASS', 'IS', 'SET', 'FORMALLY', 'GROWS', 'PERFECTLY', 'AND', 'MAY', 'BE', 'REALIZED', 'COMPLETELY'] +1188-133604-0033-97: hyp=['EVERY', 'PLANT', 'IN', 'THE', 'GRASS', 'IS', 'SET', 'FORMALLY', 'GROWS', 'PERFECTLY', 'AND', 'MAY', 'BE', 'REALIZED', 'COMPLETELY'] +1188-133604-0034-98: ref=['EXQUISITE', 'ORDER', 'AND', 'UNIVERSAL', 'WITH', 'ETERNAL', 'LIFE', 'AND', 'LIGHT', 'THIS', 'IS', 'THE', 'FAITH', 'AND', 'EFFORT', 'OF', 'THE', 'SCHOOLS', 'OF', 'CRYSTAL', 'AND', 'YOU', 'MAY', 'DESCRIBE', 'AND', 'COMPLETE', 'THEIR', 'WORK', 'QUITE', 'LITERALLY', 'BY', 'TAKING', 'ANY', 'VERSES', 'OF', 'CHAUCER', 'IN', 'HIS', 'TENDER', 'MOOD', 'AND', 'OBSERVING', 'HOW', 'HE', 'INSISTS', 'ON', 'THE', 'CLEARNESS', 'AND', 'BRIGHTNESS', 'FIRST', 'AND', 'THEN', 'ON', 'THE', 'ORDER'] +1188-133604-0034-98: hyp=['EXQUISITE', 'ORDER', 'AND', 'UNIVERSAL', 'WITH', 'ETERNAL', 'LIFE', 'AND', 'LIGHT', 'THIS', 'IS', 'THE', 'FAITH', 'AND', 'EFFORT', 'OF', 'THE', 'SCHOOLS', 'OF', 'CRYSTAL', 'AND', 'YOU', 'MAY', 'DESCRIBE', 'AND', 'COMPLETE', 'THEIR', 'WORK', 'QUITE', 'LITERALLY', 'BY', 'TAKING', 'ANY', 'VERSES', 'OF', 'CHAUCER', 'IN', 'HIS', 'TENDER', 'MOOD', 'AND', 'OBSERVING', 'HOW', 'HE', 'INSISTS', 'ON', 'THE', 'CLEARNESS', 'AND', 'BRIGHTNESS', 'FIRST', 'AND', 'THEN', 'ON', 'THE', 'ORDER'] +1188-133604-0035-99: ref=['THUS', 'IN', "CHAUCER'S", 'DREAM'] +1188-133604-0035-99: hyp=['THUS', 'IN', "CHAUCER'S", 'DREAM'] +1188-133604-0036-100: ref=['IN', 'BOTH', 'THESE', 'HIGH', 'MYTHICAL', 'SUBJECTS', 'THE', 'SURROUNDING', 'NATURE', 'THOUGH', 'SUFFERING', 'IS', 'STILL', 'DIGNIFIED', 'AND', 'BEAUTIFUL'] +1188-133604-0036-100: hyp=['IN', 'BOTH', 'THESE', 'HIGH', 'MYTHICAL', 'SUBJECTS', 'THE', 'SURROUNDING', 'NATURE', 'THOUGH', 'SUFFERING', 'IS', 'STILL', 'DIGNIFIED', 'AND', 'BEAUTIFUL'] +1188-133604-0037-101: ref=['EVERY', 'LINE', 'IN', 'WHICH', 'THE', 'MASTER', 'TRACES', 'IT', 'EVEN', 'WHERE', 'SEEMINGLY', 'NEGLIGENT', 'IS', 'LOVELY', 'AND', 'SET', 'DOWN', 'WITH', 'A', 'MEDITATIVE', 'CALMNESS', 'WHICH', 'MAKES', 'THESE', 'TWO', 'ETCHINGS', 'CAPABLE', 'OF', 'BEING', 'PLACED', 'BESIDE', 'THE', 'MOST', 'TRANQUIL', 'WORK', 'OF', 'HOLBEIN', 'OR', 'DUERER'] +1188-133604-0037-101: hyp=['EVERY', 'LINE', 'IN', 'WHICH', 'THE', 'MASTER', 'TRACES', 'IT', 'EVEN', 'WHERE', 'SEEMINGLY', 'NEGLIGENT', 'IS', 'LOVELY', 'AND', 'SET', 'DOWN', 'WITH', 'A', 'MEDITATIVE', 'CALMNESS', 'WHICH', 'MAKES', 'THESE', 'TWO', 'ETCHINGS', 'CAPABLE', 'OF', 'BEING', 'PLACED', 'BESIDE', 'THE', 'MOST', 'TRANQUIL', 'WORK', 'OF', 'HOLBEIN', 'OR', 'DIURE'] +1188-133604-0038-102: ref=['BUT', 'NOW', 'HERE', 'IS', 'A', 'SUBJECT', 'OF', 'WHICH', 'YOU', 'WILL', 'WONDER', 'AT', 'FIRST', 'WHY', 'TURNER', 'DREW', 'IT', 'AT', 'ALL'] +1188-133604-0038-102: hyp=['BUT', 'NOW', 'HERE', 'IS', 'A', 'SUBJECT', 'OF', 'WHICH', 'YOU', 'WILL', 'WONDER', 'AT', 'FIRST', 'WHY', 'TURNER', 'DREW', 'IT', 'AT', 'ALL'] +1188-133604-0039-103: ref=['IT', 'HAS', 'NO', 'BEAUTY', 'WHATSOEVER', 'NO', 'SPECIALTY', 'OF', 'PICTURESQUENESS', 'AND', 'ALL', 'ITS', 'LINES', 'ARE', 'CRAMPED', 'AND', 'POOR'] +1188-133604-0039-103: hyp=['IT', 'HAS', 'NO', 'BEAUTY', 'WHATSOEVER', 'NO', 'SPECIALTY', 'OF', 'PICTURESQUENESS', 'AND', 'ALL', 'ITS', 'LINES', 'ARE', 'CRAMPED', 'AND', 'POOR'] +1188-133604-0040-104: ref=['THE', 'CRAMPNESS', 'AND', 'THE', 'POVERTY', 'ARE', 'ALL', 'INTENDED'] +1188-133604-0040-104: hyp=['THE', 'CRAMPEDNESS', 'AND', 'THE', 'POVERTY', 'ARE', 'ALL', 'INTENDED'] +1188-133604-0041-105: ref=['IT', 'IS', 'A', 'GLEANER', 'BRINGING', 'DOWN', 'HER', 'ONE', 'SHEAF', 'OF', 'CORN', 'TO', 'AN', 'OLD', 'WATERMILL', 'ITSELF', 'MOSSY', 'AND', 'RENT', 'SCARCELY', 'ABLE', 'TO', 'GET', 'ITS', 'STONES', 'TO', 'TURN'] +1188-133604-0041-105: hyp=['IT', 'IS', 'A', 'GLEANER', 'BRINGING', 'DOWN', 'HER', 'ONE', 'SHEAF', 'OF', 'CORN', 'TO', 'AN', 'OLD', 'WATER', 'MILL', 'ITSELF', 'MOSSY', 'AND', 'RENT', 'SCARCELY', 'ABLE', 'TO', 'GET', 'ITS', 'STONES', 'TO', 'TURN'] +1188-133604-0042-106: ref=['THE', 'SCENE', 'IS', 'ABSOLUTELY', 'ARCADIAN'] +1188-133604-0042-106: hyp=['THE', 'SCENE', 'IS', 'ABSOLUTELY', 'ARCADIAN'] +1188-133604-0043-107: ref=['SEE', 'THAT', 'YOUR', 'LIVES', 'BE', 'IN', 'NOTHING', 'WORSE', 'THAN', 'A', "BOY'S", 'CLIMBING', 'FOR', 'HIS', 'ENTANGLED', 'KITE'] +1188-133604-0043-107: hyp=['SEE', 'THAT', 'YOUR', 'LIES', 'BE', 'IN', 'NOTHING', 'WORSE', 'THAN', 'A', "BOY'S", 'CLIMBING', 'FOR', 'HIS', 'ENTANGLED', 'KITE'] +1188-133604-0044-108: ref=['IT', 'WILL', 'BE', 'WELL', 'FOR', 'YOU', 'IF', 'YOU', 'JOIN', 'NOT', 'WITH', 'THOSE', 'WHO', 'INSTEAD', 'OF', 'KITES', 'FLY', 'FALCONS', 'WHO', 'INSTEAD', 'OF', 'OBEYING', 'THE', 'LAST', 'WORDS', 'OF', 'THE', 'GREAT', 'CLOUD', 'SHEPHERD', 'TO', 'FEED', 'HIS', 'SHEEP', 'LIVE', 'THE', 'LIVES', 'HOW', 'MUCH', 'LESS', 'THAN', 'VANITY', 'OF', 'THE', 'WAR', 'WOLF', 'AND', 'THE', 'GIER', 'EAGLE'] +1188-133604-0044-108: hyp=['IT', 'WILL', 'BE', 'WELL', 'FOR', 'YOU', 'IF', 'YOU', 'JOIN', 'NOT', 'WITH', 'THOSE', 'WHO', 'INSTEAD', 'OF', 'KITES', 'FLY', 'FALCONS', 'WHO', 'INSTEAD', 'OF', 'OBEYING', 'THE', 'LAST', 'WORDS', 'OF', 'THE', 'GREAT', 'CLOUD', 'SHEPHERD', 'TO', 'FEED', 'HIS', 'SHEEP', 'LIVE', 'THE', 'LIVES', 'HOW', 'MUCH', 'LESS', 'THAN', 'VANITY', 'OF', 'THE', 'WAR', 'WOLF', 'AND', 'THE', 'GEAR', 'EAGLE'] +121-121726-0000-109: ref=['ALSO', 'A', 'POPULAR', 'CONTRIVANCE', 'WHEREBY', 'LOVE', 'MAKING', 'MAY', 'BE', 'SUSPENDED', 'BUT', 'NOT', 'STOPPED', 'DURING', 'THE', 'PICNIC', 'SEASON'] +121-121726-0000-109: hyp=['ALSO', 'A', 'POPULAR', 'CONTRIVANCE', 'WHEREBY', 'LOVE', 'MAKING', 'MAY', 'BE', 'SUSPENDED', 'BUT', 'NOT', 'STOPPED', 'DURING', 'THE', 'PICNIC', 'SEASON'] +121-121726-0001-110: ref=['HARANGUE', 'THE', 'TIRESOME', 'PRODUCT', 'OF', 'A', 'TIRELESS', 'TONGUE'] +121-121726-0001-110: hyp=['HARANGUE', 'THE', 'TIRESOME', 'PRODUCT', 'OF', 'A', 'TIRELESS', 'TONGUE'] +121-121726-0002-111: ref=['ANGOR', 'PAIN', 'PAINFUL', 'TO', 'HEAR'] +121-121726-0002-111: hyp=['ANGER', 'PAIN', 'PAINFUL', 'TO', 'HEAR'] +121-121726-0003-112: ref=['HAY', 'FEVER', 'A', 'HEART', 'TROUBLE', 'CAUSED', 'BY', 'FALLING', 'IN', 'LOVE', 'WITH', 'A', 'GRASS', 'WIDOW'] +121-121726-0003-112: hyp=['HAY', 'FEVER', 'A', 'HEART', 'TROUBLE', 'CAUSED', 'BY', 'FALLING', 'IN', 'LOVE', 'WITH', 'A', 'GRASS', 'WIDOW'] +121-121726-0004-113: ref=['HEAVEN', 'A', 'GOOD', 'PLACE', 'TO', 'BE', 'RAISED', 'TO'] +121-121726-0004-113: hyp=['HEAVEN', 'A', 'GOOD', 'PLACE', 'TO', 'BE', 'RAISED', 'TO'] +121-121726-0005-114: ref=['HEDGE', 'A', 'FENCE'] +121-121726-0005-114: hyp=['HEDGE', 'A', 'FENCE'] +121-121726-0006-115: ref=['HEREDITY', 'THE', 'CAUSE', 'OF', 'ALL', 'OUR', 'FAULTS'] +121-121726-0006-115: hyp=['HEREDITY', 'THE', 'CAUSE', 'OF', 'ALL', 'OUR', 'FAULTS'] +121-121726-0007-116: ref=['HORSE', 'SENSE', 'A', 'DEGREE', 'OF', 'WISDOM', 'THAT', 'KEEPS', 'ONE', 'FROM', 'BETTING', 'ON', 'THE', 'RACES'] +121-121726-0007-116: hyp=['HORSE', 'SENSE', 'A', 'DEGREE', 'OF', 'WISDOM', 'THAT', 'KEEPS', 'ONE', 'FROM', 'BETTING', 'ON', 'THE', 'RACES'] +121-121726-0008-117: ref=['HOSE', "MAN'S", 'EXCUSE', 'FOR', 'WETTING', 'THE', 'WALK'] +121-121726-0008-117: hyp=['HOSE', "MAN'S", 'EXCUSE', 'FOR', 'WETTING', 'THE', 'WALK'] +121-121726-0009-118: ref=['HOTEL', 'A', 'PLACE', 'WHERE', 'A', 'GUEST', 'OFTEN', 'GIVES', 'UP', 'GOOD', 'DOLLARS', 'FOR', 'POOR', 'QUARTERS'] +121-121726-0009-118: hyp=['HOTEL', 'A', 'PLACE', 'WHERE', 'A', 'GUEST', 'OFTEN', 'GIVES', 'UP', 'GOOD', 'DOLLARS', 'FOR', 'POOR', 'QUARTERS'] +121-121726-0010-119: ref=['HOUSECLEANING', 'A', 'DOMESTIC', 'UPHEAVAL', 'THAT', 'MAKES', 'IT', 'EASY', 'FOR', 'THE', 'GOVERNMENT', 'TO', 'ENLIST', 'ALL', 'THE', 'SOLDIERS', 'IT', 'NEEDS'] +121-121726-0010-119: hyp=['HOUSE', 'CLEANING', 'A', 'DOMESTIC', 'UPHEAVAL', 'THAT', 'MAKES', 'IT', 'EASY', 'FOR', 'THE', 'GOVERNMENT', 'TO', 'ENLIST', 'ALL', 'THE', 'SOLDIERS', 'IT', 'NEEDS'] +121-121726-0011-120: ref=['HUSBAND', 'THE', 'NEXT', 'THING', 'TO', 'A', 'WIFE'] +121-121726-0011-120: hyp=['HUSBAND', 'THE', 'NEXT', 'THING', 'TO', 'A', 'WIFE'] +121-121726-0012-121: ref=['HUSSY', 'WOMAN', 'AND', 'BOND', 'TIE'] +121-121726-0012-121: hyp=['HUSSY', 'WOMAN', 'AND', 'BOND', 'TIE'] +121-121726-0013-122: ref=['TIED', 'TO', 'A', 'WOMAN'] +121-121726-0013-122: hyp=['TIED', 'TO', 'A', 'WOMAN'] +121-121726-0014-123: ref=['HYPOCRITE', 'A', 'HORSE', 'DEALER'] +121-121726-0014-123: hyp=['HYPOCRITE', 'A', 'HORSE', 'DEALER'] +121-123852-0000-124: ref=['THOSE', 'PRETTY', 'WRONGS', 'THAT', 'LIBERTY', 'COMMITS', 'WHEN', 'I', 'AM', 'SOMETIME', 'ABSENT', 'FROM', 'THY', 'HEART', 'THY', 'BEAUTY', 'AND', 'THY', 'YEARS', 'FULL', 'WELL', 'BEFITS', 'FOR', 'STILL', 'TEMPTATION', 'FOLLOWS', 'WHERE', 'THOU', 'ART'] +121-123852-0000-124: hyp=['THOSE', 'PRETTY', 'WRONGS', 'THAT', 'LIBERTY', 'COMMITS', 'WHEN', 'I', 'AM', 'SOMETIME', 'ABSENT', 'FROM', 'THY', 'HEART', 'THY', 'BEAUTY', 'AND', 'THY', 'YEARS', 'FULL', 'WELL', 'BEFITS', 'FOR', 'STILL', 'TEMPTATION', 'FOLLOWS', 'WHERE', 'THOU', 'ART'] +121-123852-0001-125: ref=['AY', 'ME'] +121-123852-0001-125: hyp=['AY', 'ME'] +121-123852-0002-126: ref=['NO', 'MATTER', 'THEN', 'ALTHOUGH', 'MY', 'FOOT', 'DID', 'STAND', 'UPON', 'THE', 'FARTHEST', 'EARTH', "REMOV'D", 'FROM', 'THEE', 'FOR', 'NIMBLE', 'THOUGHT', 'CAN', 'JUMP', 'BOTH', 'SEA', 'AND', 'LAND', 'AS', 'SOON', 'AS', 'THINK', 'THE', 'PLACE', 'WHERE', 'HE', 'WOULD', 'BE', 'BUT', 'AH'] +121-123852-0002-126: hyp=['NO', 'MATTER', 'THEN', 'ALTHOUGH', 'MY', 'FOOT', 'DID', 'STAND', 'UPON', 'THE', 'FARTHEST', 'EARTH', 'REMOVED', 'FROM', 'THEE', 'FOR', 'NIMBLE', 'THOUGHT', 'CAN', 'JUMP', 'BOTH', 'SEA', 'AND', 'LAND', 'AS', 'SOON', 'AS', 'THINK', 'THE', 'PLACE', 'WHERE', 'HE', 'WOULD', 'BE', 'BUT', 'AH'] +121-123852-0003-127: ref=['THOUGHT', 'KILLS', 'ME', 'THAT', 'I', 'AM', 'NOT', 'THOUGHT', 'TO', 'LEAP', 'LARGE', 'LENGTHS', 'OF', 'MILES', 'WHEN', 'THOU', 'ART', 'GONE', 'BUT', 'THAT', 'SO', 'MUCH', 'OF', 'EARTH', 'AND', 'WATER', 'WROUGHT', 'I', 'MUST', 'ATTEND', "TIME'S", 'LEISURE', 'WITH', 'MY', 'MOAN', 'RECEIVING', 'NOUGHT', 'BY', 'ELEMENTS', 'SO', 'SLOW', 'BUT', 'HEAVY', 'TEARS', 'BADGES', 'OF', "EITHER'S", 'WOE'] +121-123852-0003-127: hyp=['THOUGHT', 'KILLS', 'ME', 'THAT', 'I', 'AM', 'NOT', 'THOUGHT', 'TO', 'LEAP', 'LARGE', 'LENGTHS', 'OF', 'MILES', 'WHEN', 'THOU', 'ART', 'GONE', 'BUT', 'THAT', 'SO', 'MUCH', 'OF', 'EARTH', 'AND', 'WATER', 'WROUGHT', 'I', 'MUST', 'ATTEND', "TIME'S", 'LEISURE', 'WITH', 'MY', 'MOAN', 'RECEIVING', 'NOUGHT', 'BY', 'ELEMENTS', 'SO', 'SLOW', 'BUT', 'HEAVY', 'TEARS', 'BADGES', 'OF', "EITHER'S", 'WOE'] +121-123852-0004-128: ref=['MY', 'HEART', 'DOTH', 'PLEAD', 'THAT', 'THOU', 'IN', 'HIM', 'DOST', 'LIE', 'A', 'CLOSET', 'NEVER', "PIERC'D", 'WITH', 'CRYSTAL', 'EYES', 'BUT', 'THE', 'DEFENDANT', 'DOTH', 'THAT', 'PLEA', 'DENY', 'AND', 'SAYS', 'IN', 'HIM', 'THY', 'FAIR', 'APPEARANCE', 'LIES'] +121-123852-0004-128: hyp=['MY', 'HEART', 'DOTH', 'PLEAD', 'THAT', 'THOU', 'IN', 'HIM', 'DOST', 'LIE', 'A', 'CLOSET', 'NEVER', 'PIERCED', 'WITH', 'CRYSTAL', 'EYES', 'BUT', 'THE', 'DEFENDANT', 'DOTH', 'THAT', 'PLEA', 'DENY', 'AND', 'SAYS', 'IN', 'HIM', 'THY', 'FAIR', 'APPEARANCE', 'LIES'] +121-123859-0000-129: ref=['YOU', 'ARE', 'MY', 'ALL', 'THE', 'WORLD', 'AND', 'I', 'MUST', 'STRIVE', 'TO', 'KNOW', 'MY', 'SHAMES', 'AND', 'PRAISES', 'FROM', 'YOUR', 'TONGUE', 'NONE', 'ELSE', 'TO', 'ME', 'NOR', 'I', 'TO', 'NONE', 'ALIVE', 'THAT', 'MY', "STEEL'D", 'SENSE', 'OR', 'CHANGES', 'RIGHT', 'OR', 'WRONG'] +121-123859-0000-129: hyp=['YOU', 'ARE', 'MY', 'ALL', 'THE', 'WORLD', 'AND', 'I', 'MUST', 'STRIVE', 'TO', 'KNOW', 'MY', 'SHAMES', 'AND', 'PRAISES', 'FROM', 'YOUR', 'TONGUE', 'NONE', 'ELSE', 'TO', 'ME', 'NOR', 'I', 'TO', 'NONE', 'ALIVE', 'THAT', 'MY', 'STEELED', 'SENSE', 'OR', 'CHANGES', 'RIGHT', 'OR', 'WRONG'] +121-123859-0001-130: ref=['O', 'TIS', 'THE', 'FIRST', 'TIS', 'FLATTERY', 'IN', 'MY', 'SEEING', 'AND', 'MY', 'GREAT', 'MIND', 'MOST', 'KINGLY', 'DRINKS', 'IT', 'UP', 'MINE', 'EYE', 'WELL', 'KNOWS', 'WHAT', 'WITH', 'HIS', 'GUST', 'IS', 'GREEING', 'AND', 'TO', 'HIS', 'PALATE', 'DOTH', 'PREPARE', 'THE', 'CUP', 'IF', 'IT', 'BE', "POISON'D", 'TIS', 'THE', 'LESSER', 'SIN', 'THAT', 'MINE', 'EYE', 'LOVES', 'IT', 'AND', 'DOTH', 'FIRST', 'BEGIN'] +121-123859-0001-130: hyp=['O', 'TIS', 'THE', 'FIRST', 'TIS', 'FLATTERY', 'IN', 'MY', 'SEEING', 'AND', 'MY', 'GREAT', 'MIND', 'MOST', 'KINGLY', 'DRINKS', 'IT', 'UP', 'MINE', 'EYE', 'WELL', 'KNOWS', 'WHAT', 'WITH', 'HIS', 'GUST', 'IS', 'GREEN', 'AND', 'TO', 'HIS', 'PALATE', 'DOTH', 'PREPARE', 'THE', 'CUP', 'IF', 'IT', 'BE', 'POISONED', 'TIS', 'THE', 'LESSER', 'SIN', 'THAT', 'MINE', 'EYE', 'LOVES', 'IT', 'AND', 'DOTH', 'FIRST', 'BEGIN'] +121-123859-0002-131: ref=['BUT', 'RECKONING', 'TIME', 'WHOSE', "MILLION'D", 'ACCIDENTS', 'CREEP', 'IN', 'TWIXT', 'VOWS', 'AND', 'CHANGE', 'DECREES', 'OF', 'KINGS', 'TAN', 'SACRED', 'BEAUTY', 'BLUNT', 'THE', "SHARP'ST", 'INTENTS', 'DIVERT', 'STRONG', 'MINDS', 'TO', 'THE', 'COURSE', 'OF', 'ALTERING', 'THINGS', 'ALAS', 'WHY', 'FEARING', 'OF', "TIME'S", 'TYRANNY', 'MIGHT', 'I', 'NOT', 'THEN', 'SAY', 'NOW', 'I', 'LOVE', 'YOU', 'BEST', 'WHEN', 'I', 'WAS', 'CERTAIN', "O'ER", 'INCERTAINTY', 'CROWNING', 'THE', 'PRESENT', 'DOUBTING', 'OF', 'THE', 'REST'] +121-123859-0002-131: hyp=['BUT', 'RECKONING', 'TIME', 'WHOSE', 'MILLIONED', 'ACCIDENTS', 'CREEP', 'IN', 'TWIXT', 'VOWS', 'AND', 'CHANGE', 'DECREES', 'OF', 'KINGS', "TAN'S", 'SACRED', 'BEAUTY', 'BLUNT', 'THE', 'SHARPEST', 'INTENSE', 'DIVERT', 'STRONG', 'MINDS', 'TO', 'THE', 'COURSE', 'OF', 'ALTERING', 'THINGS', 'ALAS', 'WHY', 'FEARING', 'OF', "TIME'S", 'TYRANNY', 'MIGHT', 'I', 'NOT', 'THEN', 'SAY', 'NOW', 'I', 'LOVE', 'YOU', 'BEST', 'WHEN', 'I', 'WAS', 'CERTAIN', "O'ER", 'IN', 'CERTAINTY', 'CROWNING', 'THE', 'PRESENT', 'DOUBTING', 'OF', 'THE', 'REST'] +121-123859-0003-132: ref=['LOVE', 'IS', 'A', 'BABE', 'THEN', 'MIGHT', 'I', 'NOT', 'SAY', 'SO', 'TO', 'GIVE', 'FULL', 'GROWTH', 'TO', 'THAT', 'WHICH', 'STILL', 'DOTH', 'GROW'] +121-123859-0003-132: hyp=['LOVE', 'IS', 'A', 'BABE', 'THEN', 'MIGHT', 'I', 'NOT', 'SAY', 'SO', 'TO', 'GIVE', 'FULL', 'GROWTH', 'TO', 'THAT', 'WHICH', 'STILL', 'DOTH', 'GROW'] +121-123859-0004-133: ref=['SO', 'I', 'RETURN', "REBUK'D", 'TO', 'MY', 'CONTENT', 'AND', 'GAIN', 'BY', 'ILL', 'THRICE', 'MORE', 'THAN', 'I', 'HAVE', 'SPENT'] +121-123859-0004-133: hyp=['SO', 'I', 'RETURN', 'REBUKED', 'TO', 'MY', 'CONTENT', 'AND', 'GAIN', 'BY', 'ILL', 'THRICE', 'MORE', 'THAN', 'I', 'HAVE', 'SPENT'] +121-127105-0000-134: ref=['IT', 'WAS', 'THIS', 'OBSERVATION', 'THAT', 'DREW', 'FROM', 'DOUGLAS', 'NOT', 'IMMEDIATELY', 'BUT', 'LATER', 'IN', 'THE', 'EVENING', 'A', 'REPLY', 'THAT', 'HAD', 'THE', 'INTERESTING', 'CONSEQUENCE', 'TO', 'WHICH', 'I', 'CALL', 'ATTENTION'] +121-127105-0000-134: hyp=['IT', 'WAS', 'THIS', 'OBSERVATION', 'THAT', 'DREW', 'FROM', 'DOUGLAS', 'NOT', 'IMMEDIATELY', 'BUT', 'LATER', 'IN', 'THE', 'EVENING', 'A', 'REPLY', 'THAT', 'HAD', 'THE', 'INTERESTING', 'CONSEQUENCE', 'TO', 'WHICH', 'I', 'CALL', 'ATTENTION'] +121-127105-0001-135: ref=['SOMEONE', 'ELSE', 'TOLD', 'A', 'STORY', 'NOT', 'PARTICULARLY', 'EFFECTIVE', 'WHICH', 'I', 'SAW', 'HE', 'WAS', 'NOT', 'FOLLOWING'] +121-127105-0001-135: hyp=['SOME', 'ONE', 'ELSE', 'TOLD', 'A', 'STORY', 'NOT', 'PARTICULARLY', 'EFFECTIVE', 'WHICH', 'I', 'SAW', 'HE', 'WAS', 'NOT', 'FOLLOWING'] +121-127105-0002-136: ref=['CRIED', 'ONE', 'OF', 'THE', 'WOMEN', 'HE', 'TOOK', 'NO', 'NOTICE', 'OF', 'HER', 'HE', 'LOOKED', 'AT', 'ME', 'BUT', 'AS', 'IF', 'INSTEAD', 'OF', 'ME', 'HE', 'SAW', 'WHAT', 'HE', 'SPOKE', 'OF'] +121-127105-0002-136: hyp=['CRIED', 'ONE', 'OF', 'THE', 'WOMEN', 'HE', 'TOOK', 'NO', 'NOTICE', 'OF', 'HER', 'HE', 'LOOKED', 'AT', 'ME', 'BUT', 'AS', 'IF', 'INSTEAD', 'OF', 'ME', 'HE', 'SAW', 'WHAT', 'HE', 'SPOKE', 'OF'] +121-127105-0003-137: ref=['THERE', 'WAS', 'A', 'UNANIMOUS', 'GROAN', 'AT', 'THIS', 'AND', 'MUCH', 'REPROACH', 'AFTER', 'WHICH', 'IN', 'HIS', 'PREOCCUPIED', 'WAY', 'HE', 'EXPLAINED'] +121-127105-0003-137: hyp=['THERE', 'WAS', 'A', 'UNANIMOUS', 'GROAN', 'AT', 'THIS', 'AND', 'MUCH', 'REPROACH', 'AFTER', 'WHICH', 'IN', 'HIS', 'PREOCCUPIED', 'WAY', 'HE', 'EXPLAINED'] +121-127105-0004-138: ref=['THE', "STORY'S", 'WRITTEN'] +121-127105-0004-138: hyp=['THE', 'STORIES', 'WRITTEN'] +121-127105-0005-139: ref=['I', 'COULD', 'WRITE', 'TO', 'MY', 'MAN', 'AND', 'ENCLOSE', 'THE', 'KEY', 'HE', 'COULD', 'SEND', 'DOWN', 'THE', 'PACKET', 'AS', 'HE', 'FINDS', 'IT'] +121-127105-0005-139: hyp=['I', 'COULD', 'WRITE', 'TO', 'MY', 'MAN', 'AND', 'ENCLOSE', 'THE', 'KEY', 'HE', 'COULD', 'SEND', 'DOWN', 'THE', 'PACKET', 'AS', 'HE', 'FINDS', 'IT'] +121-127105-0006-140: ref=['THE', 'OTHERS', 'RESENTED', 'POSTPONEMENT', 'BUT', 'IT', 'WAS', 'JUST', 'HIS', 'SCRUPLES', 'THAT', 'CHARMED', 'ME'] +121-127105-0006-140: hyp=['THE', 'OTHERS', 'RESENTED', 'POSTPONEMENT', 'BUT', 'IT', 'WAS', 'JUST', 'HIS', 'SCRUPLES', 'THAT', 'CHARMED', 'ME'] +121-127105-0007-141: ref=['TO', 'THIS', 'HIS', 'ANSWER', 'WAS', 'PROMPT', 'OH', 'THANK', 'GOD', 'NO', 'AND', 'IS', 'THE', 'RECORD', 'YOURS'] +121-127105-0007-141: hyp=['TO', 'THIS', 'HIS', 'ANSWER', 'WAS', 'PROMPT', 'OH', 'THANK', 'GOD', 'NO', 'AND', 'IS', 'THE', 'RECORD', 'YOURS'] +121-127105-0008-142: ref=['HE', 'HUNG', 'FIRE', 'AGAIN', 'A', "WOMAN'S"] +121-127105-0008-142: hyp=['HE', 'HUNG', 'FIRE', 'AGAIN', 'A', "WOMAN'S"] +121-127105-0009-143: ref=['SHE', 'HAS', 'BEEN', 'DEAD', 'THESE', 'TWENTY', 'YEARS'] +121-127105-0009-143: hyp=['SHE', 'HAS', 'BEEN', 'DEAD', 'THESE', 'TWENTY', 'YEARS'] +121-127105-0010-144: ref=['SHE', 'SENT', 'ME', 'THE', 'PAGES', 'IN', 'QUESTION', 'BEFORE', 'SHE', 'DIED'] +121-127105-0010-144: hyp=['SHE', 'SENT', 'ME', 'THE', 'PAGES', 'IN', 'QUESTION', 'BEFORE', 'SHE', 'DIED'] +121-127105-0011-145: ref=['SHE', 'WAS', 'THE', 'MOST', 'AGREEABLE', 'WOMAN', "I'VE", 'EVER', 'KNOWN', 'IN', 'HER', 'POSITION', 'SHE', 'WOULD', 'HAVE', 'BEEN', 'WORTHY', 'OF', 'ANY', 'WHATEVER'] +121-127105-0011-145: hyp=['SHE', 'WAS', 'THE', 'MOST', 'AGREEABLE', 'WOMAN', "I'VE", 'EVER', 'KNOWN', 'IN', 'HER', 'POSITION', 'SHE', 'WOULD', 'HAVE', 'BEEN', 'WORTHY', 'OF', 'ANY', 'WHATEVER'] +121-127105-0012-146: ref=['IT', "WASN'T", 'SIMPLY', 'THAT', 'SHE', 'SAID', 'SO', 'BUT', 'THAT', 'I', 'KNEW', 'SHE', "HADN'T", 'I', 'WAS', 'SURE', 'I', 'COULD', 'SEE'] +121-127105-0012-146: hyp=['IT', "WASN'T", 'SIMPLY', 'THAT', 'SHE', 'SAID', 'SO', 'BUT', 'THAT', 'I', 'KNEW', 'SHE', "HADN'T", 'I', 'WAS', 'SURE', 'I', 'COULD', 'SEE'] +121-127105-0013-147: ref=["YOU'LL", 'EASILY', 'JUDGE', 'WHY', 'WHEN', 'YOU', 'HEAR', 'BECAUSE', 'THE', 'THING', 'HAD', 'BEEN', 'SUCH', 'A', 'SCARE', 'HE', 'CONTINUED', 'TO', 'FIX', 'ME'] +121-127105-0013-147: hyp=["YOU'LL", 'EASILY', 'JUDGE', 'WHY', 'WHEN', 'YOU', 'HEAR', 'BECAUSE', 'THE', 'THING', 'HAD', 'BEEN', 'SUCH', 'A', 'SCARE', 'HE', 'CONTINUED', 'TO', 'FIX', 'ME'] +121-127105-0014-148: ref=['YOU', 'ARE', 'ACUTE'] +121-127105-0014-148: hyp=['YOU', 'ARE', 'ACUTE'] +121-127105-0015-149: ref=['HE', 'QUITTED', 'THE', 'FIRE', 'AND', 'DROPPED', 'BACK', 'INTO', 'HIS', 'CHAIR'] +121-127105-0015-149: hyp=['HE', 'QUITTED', 'THE', 'FIRE', 'AND', 'DROPPED', 'BACK', 'INTO', 'HIS', 'CHAIR'] +121-127105-0016-150: ref=['PROBABLY', 'NOT', 'TILL', 'THE', 'SECOND', 'POST'] +121-127105-0016-150: hyp=['PROBABLY', 'NOT', 'TILL', 'THE', 'SECOND', 'POST'] +121-127105-0017-151: ref=['IT', 'WAS', 'ALMOST', 'THE', 'TONE', 'OF', 'HOPE', 'EVERYBODY', 'WILL', 'STAY'] +121-127105-0017-151: hyp=['IT', 'WAS', 'ALMOST', 'THE', 'TONE', 'OF', 'HOPE', 'EVERYBODY', 'WILL', 'STAY'] +121-127105-0018-152: ref=['CRIED', 'THE', 'LADIES', 'WHOSE', 'DEPARTURE', 'HAD', 'BEEN', 'FIXED'] +121-127105-0018-152: hyp=['CRIED', 'THE', 'LADIES', 'WHOSE', 'DEPARTURE', 'HAD', 'BEEN', 'FIXED'] +121-127105-0019-153: ref=['MISSUS', 'GRIFFIN', 'HOWEVER', 'EXPRESSED', 'THE', 'NEED', 'FOR', 'A', 'LITTLE', 'MORE', 'LIGHT'] +121-127105-0019-153: hyp=['MISSUS', 'GRIFFIN', 'HOWEVER', 'EXPRESSED', 'THE', 'NEED', 'FOR', 'A', 'LITTLE', 'MORE', 'LIGHT'] +121-127105-0020-154: ref=['WHO', 'WAS', 'IT', 'SHE', 'WAS', 'IN', 'LOVE', 'WITH', 'THE', 'STORY', 'WILL', 'TELL', 'I', 'TOOK', 'UPON', 'MYSELF', 'TO', 'REPLY', 'OH', 'I', "CAN'T", 'WAIT', 'FOR', 'THE', 'STORY', 'THE', 'STORY', "WON'T", 'TELL', 'SAID', 'DOUGLAS', 'NOT', 'IN', 'ANY', 'LITERAL', 'VULGAR', 'WAY', "MORE'S", 'THE', 'PITY', 'THEN'] +121-127105-0020-154: hyp=['WHO', 'WAS', 'IT', 'SHE', 'WAS', 'IN', 'LOVE', 'WITH', 'THE', 'STORY', 'WILL', 'TELL', 'I', 'TOOK', 'UPON', 'MYSELF', 'TO', 'REPLY', 'OH', 'I', "CAN'T", 'WAIT', 'FOR', 'THE', 'STORY', 'THE', 'STORY', "WON'T", 'TELL', 'SAID', 'DOUGLAS', 'NOT', 'IN', 'ANY', 'LITERAL', 'VULGAR', 'WAY', "MORE'S", 'THE', 'PITY', 'THEN'] +121-127105-0021-155: ref=["WON'T", 'YOU', 'TELL', 'DOUGLAS'] +121-127105-0021-155: hyp=["WON'T", 'YOU', 'TELL', 'DOUGLAS'] +121-127105-0022-156: ref=['WELL', 'IF', 'I', "DON'T", 'KNOW', 'WHO', 'SHE', 'WAS', 'IN', 'LOVE', 'WITH', 'I', 'KNOW', 'WHO', 'HE', 'WAS'] +121-127105-0022-156: hyp=['WELL', 'IF', 'I', "DON'T", 'KNOW', 'WHO', 'SHE', 'WAS', 'IN', 'LOVE', 'WITH', 'I', 'KNOW', 'WHO', 'HE', 'WAS'] +121-127105-0023-157: ref=['LET', 'ME', 'SAY', 'HERE', 'DISTINCTLY', 'TO', 'HAVE', 'DONE', 'WITH', 'IT', 'THAT', 'THIS', 'NARRATIVE', 'FROM', 'AN', 'EXACT', 'TRANSCRIPT', 'OF', 'MY', 'OWN', 'MADE', 'MUCH', 'LATER', 'IS', 'WHAT', 'I', 'SHALL', 'PRESENTLY', 'GIVE'] +121-127105-0023-157: hyp=['LET', 'ME', 'SAY', 'HERE', 'DISTINCTLY', 'TO', 'HAVE', 'DONE', 'WITH', 'IT', 'THAT', 'THIS', 'NARRATIVE', 'FROM', 'AN', 'EXACT', 'TRANSCRIPT', 'OF', 'MY', 'OWN', 'MADE', 'MUCH', 'LATER', 'IS', 'WHAT', 'I', 'SHALL', 'PRESENTLY', 'GIVE'] +121-127105-0024-158: ref=['POOR', 'DOUGLAS', 'BEFORE', 'HIS', 'DEATH', 'WHEN', 'IT', 'WAS', 'IN', 'SIGHT', 'COMMITTED', 'TO', 'ME', 'THE', 'MANUSCRIPT', 'THAT', 'REACHED', 'HIM', 'ON', 'THE', 'THIRD', 'OF', 'THESE', 'DAYS', 'AND', 'THAT', 'ON', 'THE', 'SAME', 'SPOT', 'WITH', 'IMMENSE', 'EFFECT', 'HE', 'BEGAN', 'TO', 'READ', 'TO', 'OUR', 'HUSHED', 'LITTLE', 'CIRCLE', 'ON', 'THE', 'NIGHT', 'OF', 'THE', 'FOURTH'] +121-127105-0024-158: hyp=['POOR', 'DOUGLAS', 'BEFORE', 'HIS', 'DEATH', 'WHEN', 'IT', 'WAS', 'IN', 'SIGHT', 'COMMITTED', 'TO', 'ME', 'THE', 'MANUSCRIPT', 'THAT', 'REACHED', 'HIM', 'ON', 'THE', 'THIRD', 'OF', 'THESE', 'DAYS', 'AND', 'THAT', 'ON', 'THE', 'SAME', 'SPOT', 'WITH', 'IMMENSE', 'EFFECT', 'HE', 'BEGAN', 'TO', 'READ', 'TO', 'OUR', 'HUSHED', 'LITTLE', 'CIRCLE', 'ON', 'THE', 'NIGHT', 'OF', 'THE', 'FOURTH'] +121-127105-0025-159: ref=['THE', 'DEPARTING', 'LADIES', 'WHO', 'HAD', 'SAID', 'THEY', 'WOULD', 'STAY', "DIDN'T", 'OF', 'COURSE', 'THANK', 'HEAVEN', 'STAY', 'THEY', 'DEPARTED', 'IN', 'CONSEQUENCE', 'OF', 'ARRANGEMENTS', 'MADE', 'IN', 'A', 'RAGE', 'OF', 'CURIOSITY', 'AS', 'THEY', 'PROFESSED', 'PRODUCED', 'BY', 'THE', 'TOUCHES', 'WITH', 'WHICH', 'HE', 'HAD', 'ALREADY', 'WORKED', 'US', 'UP'] +121-127105-0025-159: hyp=['THE', 'DEPARTING', 'LADIES', 'WHO', 'HAD', 'SAID', 'THEY', 'WOULD', 'STAY', "DIDN'T", 'OF', 'COURSE', 'THANK', 'HEAVEN', 'STAY', 'THEY', 'DEPARTED', 'IN', 'CONSEQUENCE', 'OF', 'ARRANGEMENTS', 'MADE', 'IN', 'A', 'RAGE', 'OF', 'CURIOSITY', 'AS', 'THEY', 'PROFESSED', 'PRODUCED', 'BY', 'THE', 'TOUCHES', 'WITH', 'WHICH', 'HE', 'HAD', 'ALREADY', 'WORKED', 'US', 'UP'] +121-127105-0026-160: ref=['THE', 'FIRST', 'OF', 'THESE', 'TOUCHES', 'CONVEYED', 'THAT', 'THE', 'WRITTEN', 'STATEMENT', 'TOOK', 'UP', 'THE', 'TALE', 'AT', 'A', 'POINT', 'AFTER', 'IT', 'HAD', 'IN', 'A', 'MANNER', 'BEGUN'] +121-127105-0026-160: hyp=['THE', 'FIRST', 'OF', 'THESE', 'TOUCHES', 'CONVEYED', 'THAT', 'THE', 'WRITTEN', 'STATEMENT', 'TOOK', 'UP', 'THE', 'TALE', 'AT', 'A', 'POINT', 'AFTER', 'IT', 'HAD', 'IN', 'A', 'MANNER', 'BEGUN'] +121-127105-0027-161: ref=['HE', 'HAD', 'FOR', 'HIS', 'OWN', 'TOWN', 'RESIDENCE', 'A', 'BIG', 'HOUSE', 'FILLED', 'WITH', 'THE', 'SPOILS', 'OF', 'TRAVEL', 'AND', 'THE', 'TROPHIES', 'OF', 'THE', 'CHASE', 'BUT', 'IT', 'WAS', 'TO', 'HIS', 'COUNTRY', 'HOME', 'AN', 'OLD', 'FAMILY', 'PLACE', 'IN', 'ESSEX', 'THAT', 'HE', 'WISHED', 'HER', 'IMMEDIATELY', 'TO', 'PROCEED'] +121-127105-0027-161: hyp=['HE', 'HAD', 'FOR', 'HIS', 'OWN', 'TOWN', 'RESIDENCE', 'A', 'BIG', 'HOUSE', 'FILLED', 'WITH', 'THE', 'SPOILS', 'OF', 'TRAVEL', 'AND', 'THE', 'TROPHIES', 'OF', 'THE', 'CHASE', 'BUT', 'IT', 'WAS', 'TO', 'HIS', 'COUNTRY', 'HOME', 'AN', 'OLD', 'FAMILY', 'PLACE', 'IN', 'ESSEX', 'THAT', 'HE', 'WISHED', 'HER', 'IMMEDIATELY', 'TO', 'PROCEED'] +121-127105-0028-162: ref=['THE', 'AWKWARD', 'THING', 'WAS', 'THAT', 'THEY', 'HAD', 'PRACTICALLY', 'NO', 'OTHER', 'RELATIONS', 'AND', 'THAT', 'HIS', 'OWN', 'AFFAIRS', 'TOOK', 'UP', 'ALL', 'HIS', 'TIME'] +121-127105-0028-162: hyp=['THE', 'AWKWARD', 'THING', 'WAS', 'THAT', 'THEY', 'HAD', 'PRACTICALLY', 'NO', 'OTHER', 'RELATIONS', 'AND', 'THAT', 'HIS', 'OWN', 'AFFAIRS', 'TOOK', 'UP', 'ALL', 'HIS', 'TIME'] +121-127105-0029-163: ref=['THERE', 'WERE', 'PLENTY', 'OF', 'PEOPLE', 'TO', 'HELP', 'BUT', 'OF', 'COURSE', 'THE', 'YOUNG', 'LADY', 'WHO', 'SHOULD', 'GO', 'DOWN', 'AS', 'GOVERNESS', 'WOULD', 'BE', 'IN', 'SUPREME', 'AUTHORITY'] +121-127105-0029-163: hyp=['THERE', 'WERE', 'PLENTY', 'OF', 'PEOPLE', 'TO', 'HELP', 'BUT', 'OF', 'COURSE', 'THE', 'YOUNG', 'LADY', 'WHO', 'SHOULD', 'GO', 'DOWN', 'AS', 'GOVERNESS', 'WOULD', 'BE', 'IN', 'SUPREME', 'AUTHORITY'] +121-127105-0030-164: ref=['I', "DON'T", 'ANTICIPATE'] +121-127105-0030-164: hyp=['I', "DON'T", 'ANTICIPATE'] +121-127105-0031-165: ref=['SHE', 'WAS', 'YOUNG', 'UNTRIED', 'NERVOUS', 'IT', 'WAS', 'A', 'VISION', 'OF', 'SERIOUS', 'DUTIES', 'AND', 'LITTLE', 'COMPANY', 'OF', 'REALLY', 'GREAT', 'LONELINESS'] +121-127105-0031-165: hyp=['SHE', 'WAS', 'YOUNG', 'UNTRIED', 'NERVOUS', 'IT', 'WAS', 'A', 'VISION', 'OF', 'SERIOUS', 'DUTIES', 'IN', 'LITTLE', 'COMPANY', 'OF', 'REALLY', 'GREAT', 'LONELINESS'] +121-127105-0032-166: ref=['YES', 'BUT', "THAT'S", 'JUST', 'THE', 'BEAUTY', 'OF', 'HER', 'PASSION'] +121-127105-0032-166: hyp=['YES', 'BUT', "THAT'S", 'JUST', 'THE', 'BEAUTY', 'OF', 'HER', 'PASSION'] +121-127105-0033-167: ref=['IT', 'WAS', 'THE', 'BEAUTY', 'OF', 'IT'] +121-127105-0033-167: hyp=['IT', 'WAS', 'THE', 'BEAUTY', 'OF', 'IT'] +121-127105-0034-168: ref=['IT', 'SOUNDED', 'DULL', 'IT', 'SOUNDED', 'STRANGE', 'AND', 'ALL', 'THE', 'MORE', 'SO', 'BECAUSE', 'OF', 'HIS', 'MAIN', 'CONDITION', 'WHICH', 'WAS'] +121-127105-0034-168: hyp=['IT', 'SOUNDED', 'DULL', 'IT', 'SOUNDED', 'STRANGE', 'AND', 'ALL', 'THE', 'MORE', 'SO', 'BECAUSE', 'OF', 'HIS', 'MAIN', 'CONDITION', 'WHICH', 'WAS'] +121-127105-0035-169: ref=['SHE', 'PROMISED', 'TO', 'DO', 'THIS', 'AND', 'SHE', 'MENTIONED', 'TO', 'ME', 'THAT', 'WHEN', 'FOR', 'A', 'MOMENT', 'DISBURDENED', 'DELIGHTED', 'HE', 'HELD', 'HER', 'HAND', 'THANKING', 'HER', 'FOR', 'THE', 'SACRIFICE', 'SHE', 'ALREADY', 'FELT', 'REWARDED'] +121-127105-0035-169: hyp=['SHE', 'PROMISED', 'TO', 'DO', 'THIS', 'AND', 'SHE', 'MENTIONED', 'TO', 'ME', 'THAT', 'WHEN', 'FOR', 'A', 'MOMENT', 'DISBURDENED', 'DELIGHTED', 'HE', 'HELD', 'HER', 'HAND', 'THANKING', 'HER', 'FOR', 'THE', 'SACRIFICE', 'SHE', 'ALREADY', 'FELT', 'REWARDED'] +121-127105-0036-170: ref=['BUT', 'WAS', 'THAT', 'ALL', 'HER', 'REWARD', 'ONE', 'OF', 'THE', 'LADIES', 'ASKED'] +121-127105-0036-170: hyp=['BUT', 'WAS', 'THAT', 'ALL', 'HER', 'REWARD', 'ONE', 'OF', 'THE', 'LADIES', 'ASKED'] +1221-135766-0000-171: ref=['HOW', 'STRANGE', 'IT', 'SEEMED', 'TO', 'THE', 'SAD', 'WOMAN', 'AS', 'SHE', 'WATCHED', 'THE', 'GROWTH', 'AND', 'THE', 'BEAUTY', 'THAT', 'BECAME', 'EVERY', 'DAY', 'MORE', 'BRILLIANT', 'AND', 'THE', 'INTELLIGENCE', 'THAT', 'THREW', 'ITS', 'QUIVERING', 'SUNSHINE', 'OVER', 'THE', 'TINY', 'FEATURES', 'OF', 'THIS', 'CHILD'] +1221-135766-0000-171: hyp=['HOW', 'STRANGE', 'IT', 'SEEMED', 'TO', 'THE', 'SAD', 'WOMAN', 'AS', 'SHE', 'WATCHED', 'THE', 'GROWTH', 'AND', 'THE', 'BEAUTY', 'THAT', 'BECAME', 'EVERY', 'DAY', 'MORE', 'BRILLIANT', 'AND', 'THE', 'INTELLIGENCE', 'THAT', 'THREW', 'ITS', 'QUIVERING', 'SUNSHINE', 'OVER', 'THE', 'TINY', 'FEATURES', 'OF', 'THIS', 'CHILD'] +1221-135766-0001-172: ref=['GOD', 'AS', 'A', 'DIRECT', 'CONSEQUENCE', 'OF', 'THE', 'SIN', 'WHICH', 'MAN', 'THUS', 'PUNISHED', 'HAD', 'GIVEN', 'HER', 'A', 'LOVELY', 'CHILD', 'WHOSE', 'PLACE', 'WAS', 'ON', 'THAT', 'SAME', 'DISHONOURED', 'BOSOM', 'TO', 'CONNECT', 'HER', 'PARENT', 'FOR', 'EVER', 'WITH', 'THE', 'RACE', 'AND', 'DESCENT', 'OF', 'MORTALS', 'AND', 'TO', 'BE', 'FINALLY', 'A', 'BLESSED', 'SOUL', 'IN', 'HEAVEN'] +1221-135766-0001-172: hyp=['GOD', 'AS', 'A', 'DIRECT', 'CONSEQUENCE', 'OF', 'THE', 'SIN', 'WHICH', 'MAN', 'THUS', 'PUNISHED', 'HAD', 'GIVEN', 'HER', 'A', 'LOVELY', 'CHILD', 'WHOSE', 'PLACE', 'WAS', 'ON', 'THAT', 'SAME', 'DISHONOURED', 'BOSOM', 'TO', 'CONNECT', 'HER', 'PARENT', 'FOREVER', 'WITH', 'THE', 'RACE', 'AND', 'DESCENT', 'OF', 'MORTALS', 'AND', 'TO', 'BE', 'FINALLY', 'A', 'BLESSED', 'SOUL', 'IN', 'HEAVEN'] +1221-135766-0002-173: ref=['YET', 'THESE', 'THOUGHTS', 'AFFECTED', 'HESTER', 'PRYNNE', 'LESS', 'WITH', 'HOPE', 'THAN', 'APPREHENSION'] +1221-135766-0002-173: hyp=['YET', 'THESE', 'THOUGHTS', 'AFFECTED', 'HESTER', 'PRYNNE', 'LESS', 'WITH', 'HOPE', 'THAN', 'APPREHENSION'] +1221-135766-0003-174: ref=['THE', 'CHILD', 'HAD', 'A', 'NATIVE', 'GRACE', 'WHICH', 'DOES', 'NOT', 'INVARIABLY', 'CO', 'EXIST', 'WITH', 'FAULTLESS', 'BEAUTY', 'ITS', 'ATTIRE', 'HOWEVER', 'SIMPLE', 'ALWAYS', 'IMPRESSED', 'THE', 'BEHOLDER', 'AS', 'IF', 'IT', 'WERE', 'THE', 'VERY', 'GARB', 'THAT', 'PRECISELY', 'BECAME', 'IT', 'BEST'] +1221-135766-0003-174: hyp=['THE', 'CHILD', 'HAD', 'A', 'NATIVE', 'GRACE', 'WHICH', 'DOES', 'NOT', 'INVARIABLY', 'COEXIST', 'WITH', 'FAULTLESS', 'BEAUTY', 'ITS', 'ATTIRE', 'HOWEVER', 'SIMPLE', 'ALWAYS', 'IMPRESSED', 'THE', 'BEHOLDER', 'AS', 'IF', 'IT', 'WERE', 'THE', 'VERY', 'GARB', 'THAT', 'PRECISELY', 'BECAME', 'IT', 'BEST'] +1221-135766-0004-175: ref=['THIS', 'OUTWARD', 'MUTABILITY', 'INDICATED', 'AND', 'DID', 'NOT', 'MORE', 'THAN', 'FAIRLY', 'EXPRESS', 'THE', 'VARIOUS', 'PROPERTIES', 'OF', 'HER', 'INNER', 'LIFE'] +1221-135766-0004-175: hyp=['THIS', 'OUTWARD', 'MUTABILITY', 'INDICATED', 'AND', 'DID', 'NOT', 'MORE', 'THAN', 'FAIRLY', 'EXPRESS', 'THE', 'VARIOUS', 'PROPERTIES', 'OF', 'HER', 'INNER', 'LIFE'] +1221-135766-0005-176: ref=['HESTER', 'COULD', 'ONLY', 'ACCOUNT', 'FOR', 'THE', "CHILD'S", 'CHARACTER', 'AND', 'EVEN', 'THEN', 'MOST', 'VAGUELY', 'AND', 'IMPERFECTLY', 'BY', 'RECALLING', 'WHAT', 'SHE', 'HERSELF', 'HAD', 'BEEN', 'DURING', 'THAT', 'MOMENTOUS', 'PERIOD', 'WHILE', 'PEARL', 'WAS', 'IMBIBING', 'HER', 'SOUL', 'FROM', 'THE', 'SPIRITUAL', 'WORLD', 'AND', 'HER', 'BODILY', 'FRAME', 'FROM', 'ITS', 'MATERIAL', 'OF', 'EARTH'] +1221-135766-0005-176: hyp=['HESTER', 'COULD', 'ONLY', 'ACCOUNT', 'FOR', 'THE', "CHILD'S", 'CHARACTER', 'AND', 'EVEN', 'THEN', 'MOST', 'VAGUELY', 'AND', 'IMPERFECTLY', 'BY', 'RECALLING', 'WHAT', 'SHE', 'HERSELF', 'HAD', 'BEEN', 'DURING', 'THAT', 'MOMENTOUS', 'PERIOD', 'WHILE', 'PEARL', 'WAS', 'IMBIBING', 'HER', 'SOUL', 'FROM', 'THE', 'SPIRITUAL', 'WORLD', 'AND', 'HER', 'BODILY', 'FRAME', 'FROM', 'ITS', 'MATERIAL', 'OF', 'EARTH'] +1221-135766-0006-177: ref=['THEY', 'WERE', 'NOW', 'ILLUMINATED', 'BY', 'THE', 'MORNING', 'RADIANCE', 'OF', 'A', 'YOUNG', "CHILD'S", 'DISPOSITION', 'BUT', 'LATER', 'IN', 'THE', 'DAY', 'OF', 'EARTHLY', 'EXISTENCE', 'MIGHT', 'BE', 'PROLIFIC', 'OF', 'THE', 'STORM', 'AND', 'WHIRLWIND'] +1221-135766-0006-177: hyp=['THEY', 'WERE', 'NOW', 'ILLUMINATED', 'BY', 'THE', 'MORNING', 'RADIANCE', 'OF', 'A', 'YOUNG', "CHILD'S", 'DISPOSITION', 'BUT', 'LATER', 'IN', 'THE', 'DAY', 'OF', 'EARTHLY', 'EXISTENCE', 'MIGHT', 'BE', 'PROLIFIC', 'OF', 'THE', 'STORM', 'AND', 'WHIRLWIND'] +1221-135766-0007-178: ref=['HESTER', 'PRYNNE', 'NEVERTHELESS', 'THE', 'LOVING', 'MOTHER', 'OF', 'THIS', 'ONE', 'CHILD', 'RAN', 'LITTLE', 'RISK', 'OF', 'ERRING', 'ON', 'THE', 'SIDE', 'OF', 'UNDUE', 'SEVERITY'] +1221-135766-0007-178: hyp=['HESTER', 'PRYNNE', 'NEVERTHELESS', 'THE', 'LOVING', 'MOTHER', 'OF', 'THIS', 'ONE', 'CHILD', 'RAN', 'LITTLE', 'RISK', 'OF', 'ERRING', 'ON', 'THE', 'SIDE', 'OF', 'UNDUE', 'SEVERITY'] +1221-135766-0008-179: ref=['MINDFUL', 'HOWEVER', 'OF', 'HER', 'OWN', 'ERRORS', 'AND', 'MISFORTUNES', 'SHE', 'EARLY', 'SOUGHT', 'TO', 'IMPOSE', 'A', 'TENDER', 'BUT', 'STRICT', 'CONTROL', 'OVER', 'THE', 'INFANT', 'IMMORTALITY', 'THAT', 'WAS', 'COMMITTED', 'TO', 'HER', 'CHARGE'] +1221-135766-0008-179: hyp=['MINDFUL', 'HOWEVER', 'OF', 'HER', 'OWN', 'ERRORS', 'AND', 'MISFORTUNES', 'SHE', 'EARLY', 'SOUGHT', 'TO', 'IMPOSE', 'A', 'TENDER', 'BUT', 'STRICT', 'CONTROL', 'OVER', 'THE', 'INFANT', 'IMMORTALITY', 'THAT', 'WAS', 'COMMITTED', 'TO', 'HER', 'CHARGE'] +1221-135766-0009-180: ref=['AS', 'TO', 'ANY', 'OTHER', 'KIND', 'OF', 'DISCIPLINE', 'WHETHER', 'ADDRESSED', 'TO', 'HER', 'MIND', 'OR', 'HEART', 'LITTLE', 'PEARL', 'MIGHT', 'OR', 'MIGHT', 'NOT', 'BE', 'WITHIN', 'ITS', 'REACH', 'IN', 'ACCORDANCE', 'WITH', 'THE', 'CAPRICE', 'THAT', 'RULED', 'THE', 'MOMENT'] +1221-135766-0009-180: hyp=['AS', 'TO', 'ANY', 'OTHER', 'KIND', 'OF', 'DISCIPLINE', 'WHETHER', 'ADDRESSED', 'TO', 'HER', 'MIND', 'OR', 'HEART', 'LITTLE', 'PEARL', 'MIGHT', 'OR', 'MIGHT', 'NOT', 'BE', 'WITHIN', 'ITS', 'REACH', 'IN', 'ACCORDANCE', 'WITH', 'THE', 'CAPRICE', 'THAT', 'RULED', 'THE', 'MOMENT'] +1221-135766-0010-181: ref=['IT', 'WAS', 'A', 'LOOK', 'SO', 'INTELLIGENT', 'YET', 'INEXPLICABLE', 'PERVERSE', 'SOMETIMES', 'SO', 'MALICIOUS', 'BUT', 'GENERALLY', 'ACCOMPANIED', 'BY', 'A', 'WILD', 'FLOW', 'OF', 'SPIRITS', 'THAT', 'HESTER', 'COULD', 'NOT', 'HELP', 'QUESTIONING', 'AT', 'SUCH', 'MOMENTS', 'WHETHER', 'PEARL', 'WAS', 'A', 'HUMAN', 'CHILD'] +1221-135766-0010-181: hyp=['IT', 'WAS', 'A', 'LOOK', 'SO', 'INTELLIGENT', 'YET', 'INEXPLICABLE', 'PERVERSE', 'SOMETIMES', 'SO', 'MALICIOUS', 'BUT', 'GENERALLY', 'ACCOMPANIED', 'BY', 'A', 'WILD', 'FLOW', 'OF', 'SPIRITS', 'THAT', 'HESTER', 'COULD', 'NOT', 'HELP', 'QUESTIONING', 'AT', 'SUCH', 'MOMENTS', 'WHETHER', 'PEARL', 'WAS', 'A', 'HUMAN', 'CHILD'] +1221-135766-0011-182: ref=['BEHOLDING', 'IT', 'HESTER', 'WAS', 'CONSTRAINED', 'TO', 'RUSH', 'TOWARDS', 'THE', 'CHILD', 'TO', 'PURSUE', 'THE', 'LITTLE', 'ELF', 'IN', 'THE', 'FLIGHT', 'WHICH', 'SHE', 'INVARIABLY', 'BEGAN', 'TO', 'SNATCH', 'HER', 'TO', 'HER', 'BOSOM', 'WITH', 'A', 'CLOSE', 'PRESSURE', 'AND', 'EARNEST', 'KISSES', 'NOT', 'SO', 'MUCH', 'FROM', 'OVERFLOWING', 'LOVE', 'AS', 'TO', 'ASSURE', 'HERSELF', 'THAT', 'PEARL', 'WAS', 'FLESH', 'AND', 'BLOOD', 'AND', 'NOT', 'UTTERLY', 'DELUSIVE'] +1221-135766-0011-182: hyp=['BEHOLDING', 'IT', 'HESTER', 'WAS', 'CONSTRAINED', 'TO', 'RUSH', 'TOWARDS', 'THE', 'CHILD', 'TO', 'PURSUE', 'THE', 'LITTLE', 'ELF', 'IN', 'THE', 'FLIGHT', 'WHICH', 'SHE', 'INVARIABLY', 'BEGAN', 'TO', 'SNATCH', 'HER', 'TO', 'HER', 'BOSOM', 'WITH', 'A', 'CLOSE', 'PRESSURE', 'AND', 'EARNEST', 'KISSES', 'NOT', 'SO', 'MUCH', 'FROM', 'OVERFLOWING', 'LOVE', 'AS', 'TO', 'ASSURE', 'HERSELF', 'THAT', 'PEARL', 'WAS', 'FLESH', 'AND', 'BLOOD', 'AND', 'NOT', 'UTTERLY', 'DELUSIVE'] +1221-135766-0012-183: ref=['BROODING', 'OVER', 'ALL', 'THESE', 'MATTERS', 'THE', 'MOTHER', 'FELT', 'LIKE', 'ONE', 'WHO', 'HAS', 'EVOKED', 'A', 'SPIRIT', 'BUT', 'BY', 'SOME', 'IRREGULARITY', 'IN', 'THE', 'PROCESS', 'OF', 'CONJURATION', 'HAS', 'FAILED', 'TO', 'WIN', 'THE', 'MASTER', 'WORD', 'THAT', 'SHOULD', 'CONTROL', 'THIS', 'NEW', 'AND', 'INCOMPREHENSIBLE', 'INTELLIGENCE'] +1221-135766-0012-183: hyp=['BROODING', 'OVER', 'ALL', 'THESE', 'MATTERS', 'THE', 'MOTHER', 'FELT', 'LIKE', 'ONE', 'WHO', 'HAS', 'EVOKED', 'A', 'SPIRIT', 'BUT', 'BY', 'SOME', 'IRREGULARITY', 'IN', 'THE', 'PROCESS', 'OF', 'CONJURATION', 'HAS', 'FAILED', 'TO', 'WIN', 'THE', 'MASTER', 'WORD', 'THAT', 'SHOULD', 'CONTROL', 'THIS', 'NEW', 'AND', 'INCOMPREHENSIBLE', 'INTELLIGENCE'] +1221-135766-0013-184: ref=['PEARL', 'WAS', 'A', 'BORN', 'OUTCAST', 'OF', 'THE', 'INFANTILE', 'WORLD'] +1221-135766-0013-184: hyp=['PEARL', 'WAS', 'A', 'BORN', 'OUTCAST', 'OF', 'THE', 'INFANTILE', 'WORLD'] +1221-135766-0014-185: ref=['PEARL', 'SAW', 'AND', 'GAZED', 'INTENTLY', 'BUT', 'NEVER', 'SOUGHT', 'TO', 'MAKE', 'ACQUAINTANCE'] +1221-135766-0014-185: hyp=['PEARL', 'SAW', 'AND', 'GAZED', 'INTENTLY', 'BUT', 'NEVER', 'SOUGHT', 'TO', 'MAKE', 'ACQUAINTANCE'] +1221-135766-0015-186: ref=['IF', 'SPOKEN', 'TO', 'SHE', 'WOULD', 'NOT', 'SPEAK', 'AGAIN'] +1221-135766-0015-186: hyp=['IF', 'SPOKEN', 'TO', 'SHE', 'WOULD', 'NOT', 'SPEAK', 'AGAIN'] +1221-135767-0000-187: ref=['HESTER', 'PRYNNE', 'WENT', 'ONE', 'DAY', 'TO', 'THE', 'MANSION', 'OF', 'GOVERNOR', 'BELLINGHAM', 'WITH', 'A', 'PAIR', 'OF', 'GLOVES', 'WHICH', 'SHE', 'HAD', 'FRINGED', 'AND', 'EMBROIDERED', 'TO', 'HIS', 'ORDER', 'AND', 'WHICH', 'WERE', 'TO', 'BE', 'WORN', 'ON', 'SOME', 'GREAT', 'OCCASION', 'OF', 'STATE', 'FOR', 'THOUGH', 'THE', 'CHANCES', 'OF', 'A', 'POPULAR', 'ELECTION', 'HAD', 'CAUSED', 'THIS', 'FORMER', 'RULER', 'TO', 'DESCEND', 'A', 'STEP', 'OR', 'TWO', 'FROM', 'THE', 'HIGHEST', 'RANK', 'HE', 'STILL', 'HELD', 'AN', 'HONOURABLE', 'AND', 'INFLUENTIAL', 'PLACE', 'AMONG', 'THE', 'COLONIAL', 'MAGISTRACY'] +1221-135767-0000-187: hyp=['HESTER', 'PRYNNE', 'WENT', 'ONE', 'DAY', 'TO', 'THE', 'MANSION', 'OF', 'GOVERNOR', 'BELLINGHAM', 'WITH', 'A', 'PAIR', 'OF', 'GLOVES', 'WHICH', 'HE', 'HAD', 'FRINGED', 'AND', 'EMBROIDERED', 'TO', 'HIS', 'ORDER', 'AND', 'WHICH', 'WERE', 'TO', 'BE', 'WORN', 'ON', 'SOME', 'GREAT', 'OCCASION', 'OF', 'STATE', 'FOR', 'THOUGH', 'THE', 'CHANCES', 'OF', 'A', 'POPULAR', 'ELECTION', 'HAD', 'CAUSED', 'THIS', 'FORMER', 'RULER', 'TO', 'DESCEND', 'A', 'STEP', 'OR', 'TWO', 'FROM', 'THE', 'HIGHEST', 'RANK', 'HE', 'STILL', 'HELD', 'AN', 'HONORABLE', 'AND', 'INFLUENTIAL', 'PLACE', 'AMONG', 'THE', 'COLONIAL', 'MAGISTRACY'] +1221-135767-0001-188: ref=['ANOTHER', 'AND', 'FAR', 'MORE', 'IMPORTANT', 'REASON', 'THAN', 'THE', 'DELIVERY', 'OF', 'A', 'PAIR', 'OF', 'EMBROIDERED', 'GLOVES', 'IMPELLED', 'HESTER', 'AT', 'THIS', 'TIME', 'TO', 'SEEK', 'AN', 'INTERVIEW', 'WITH', 'A', 'PERSONAGE', 'OF', 'SO', 'MUCH', 'POWER', 'AND', 'ACTIVITY', 'IN', 'THE', 'AFFAIRS', 'OF', 'THE', 'SETTLEMENT'] +1221-135767-0001-188: hyp=['ANOTHER', 'AND', 'FAR', 'MORE', 'IMPORTANT', 'REASON', 'THAN', 'THE', 'DELIVERY', 'OF', 'A', 'PAIR', 'OF', 'EMBROIDERED', 'GLOVES', 'IMPELLED', 'HESTER', 'AT', 'THIS', 'TIME', 'TO', 'SEEK', 'AN', 'INTERVIEW', 'WITH', 'A', 'PERSONAGE', 'OF', 'SO', 'MUCH', 'POWER', 'AND', 'ACTIVITY', 'IN', 'THE', 'AFFAIRS', 'OF', 'THE', 'SETTLEMENT'] +1221-135767-0002-189: ref=['AT', 'THAT', 'EPOCH', 'OF', 'PRISTINE', 'SIMPLICITY', 'HOWEVER', 'MATTERS', 'OF', 'EVEN', 'SLIGHTER', 'PUBLIC', 'INTEREST', 'AND', 'OF', 'FAR', 'LESS', 'INTRINSIC', 'WEIGHT', 'THAN', 'THE', 'WELFARE', 'OF', 'HESTER', 'AND', 'HER', 'CHILD', 'WERE', 'STRANGELY', 'MIXED', 'UP', 'WITH', 'THE', 'DELIBERATIONS', 'OF', 'LEGISLATORS', 'AND', 'ACTS', 'OF', 'STATE'] +1221-135767-0002-189: hyp=['AT', 'THAT', 'EPOCH', 'OF', 'PRISTINE', 'SIMPLICITY', 'HOWEVER', 'MATTERS', 'OF', 'EVEN', 'SLIGHTER', 'PUBLIC', 'INTEREST', 'AND', 'OF', 'FAR', 'LESS', 'INTRINSIC', 'WEIGHT', 'THAN', 'THE', 'WELFARE', 'OF', 'HESTER', 'AND', 'HER', 'CHILD', 'WERE', 'STRANGELY', 'MIXED', 'UP', 'WITH', 'THE', 'DELIBERATIONS', 'OF', 'LEGISLATORS', 'AND', 'ACTS', 'OF', 'STATE'] +1221-135767-0003-190: ref=['THE', 'PERIOD', 'WAS', 'HARDLY', 'IF', 'AT', 'ALL', 'EARLIER', 'THAN', 'THAT', 'OF', 'OUR', 'STORY', 'WHEN', 'A', 'DISPUTE', 'CONCERNING', 'THE', 'RIGHT', 'OF', 'PROPERTY', 'IN', 'A', 'PIG', 'NOT', 'ONLY', 'CAUSED', 'A', 'FIERCE', 'AND', 'BITTER', 'CONTEST', 'IN', 'THE', 'LEGISLATIVE', 'BODY', 'OF', 'THE', 'COLONY', 'BUT', 'RESULTED', 'IN', 'AN', 'IMPORTANT', 'MODIFICATION', 'OF', 'THE', 'FRAMEWORK', 'ITSELF', 'OF', 'THE', 'LEGISLATURE'] +1221-135767-0003-190: hyp=['THE', 'PERIOD', 'WAS', 'HARDLY', 'IF', 'AT', 'ALL', 'EARLIER', 'THAN', 'THAT', 'OF', 'OUR', 'STORY', 'WHEN', 'A', 'DISPUTE', 'CONCERNING', 'THE', 'RIGHT', 'OF', 'PROPERTY', 'IN', 'A', 'PIG', 'NOT', 'ONLY', 'CAUSED', 'A', 'FIERCE', 'AND', 'BITTER', 'CONTEST', 'IN', 'THE', 'LEGISLATIVE', 'BODY', 'OF', 'THE', 'COLONY', 'BUT', 'RESULTED', 'IN', 'AN', 'IMPORTANT', 'MODIFICATION', 'OF', 'THE', 'FRAMEWORK', 'ITSELF', 'OF', 'THE', 'LEGISLATURE'] +1221-135767-0004-191: ref=['WE', 'HAVE', 'SPOKEN', 'OF', "PEARL'S", 'RICH', 'AND', 'LUXURIANT', 'BEAUTY', 'A', 'BEAUTY', 'THAT', 'SHONE', 'WITH', 'DEEP', 'AND', 'VIVID', 'TINTS', 'A', 'BRIGHT', 'COMPLEXION', 'EYES', 'POSSESSING', 'INTENSITY', 'BOTH', 'OF', 'DEPTH', 'AND', 'GLOW', 'AND', 'HAIR', 'ALREADY', 'OF', 'A', 'DEEP', 'GLOSSY', 'BROWN', 'AND', 'WHICH', 'IN', 'AFTER', 'YEARS', 'WOULD', 'BE', 'NEARLY', 'AKIN', 'TO', 'BLACK'] +1221-135767-0004-191: hyp=['WE', 'HAVE', 'SPOKEN', 'OF', "PEARL'S", 'RICH', 'AND', 'LUXURIANT', 'BEAUTY', 'A', 'BEAUTY', 'THAT', 'SHONE', 'WITH', 'DEEP', 'AND', 'VIVID', 'TINTS', 'A', 'BRIGHT', 'COMPLEXION', 'EYES', 'POSSESSING', 'INTENSITY', 'BOTH', 'OF', 'DEPTH', 'AND', 'GLOW', 'AND', 'HAIR', 'ALREADY', 'OF', 'A', 'DEEP', 'GLOSSY', 'BROWN', 'AND', 'WHICH', 'IN', 'AFTER', 'YEARS', 'WOULD', 'BE', 'NEARLY', 'AKIN', 'TO', 'BLACK'] +1221-135767-0005-192: ref=['IT', 'WAS', 'THE', 'SCARLET', 'LETTER', 'IN', 'ANOTHER', 'FORM', 'THE', 'SCARLET', 'LETTER', 'ENDOWED', 'WITH', 'LIFE'] +1221-135767-0005-192: hyp=['IT', 'WAS', 'THE', 'SCARLET', 'LETTER', 'IN', 'ANOTHER', 'FORM', 'THE', 'SCARLET', 'LETTER', 'ENDOWED', 'WITH', 'LIFE'] +1221-135767-0006-193: ref=['THE', 'MOTHER', 'HERSELF', 'AS', 'IF', 'THE', 'RED', 'IGNOMINY', 'WERE', 'SO', 'DEEPLY', 'SCORCHED', 'INTO', 'HER', 'BRAIN', 'THAT', 'ALL', 'HER', 'CONCEPTIONS', 'ASSUMED', 'ITS', 'FORM', 'HAD', 'CAREFULLY', 'WROUGHT', 'OUT', 'THE', 'SIMILITUDE', 'LAVISHING', 'MANY', 'HOURS', 'OF', 'MORBID', 'INGENUITY', 'TO', 'CREATE', 'AN', 'ANALOGY', 'BETWEEN', 'THE', 'OBJECT', 'OF', 'HER', 'AFFECTION', 'AND', 'THE', 'EMBLEM', 'OF', 'HER', 'GUILT', 'AND', 'TORTURE'] +1221-135767-0006-193: hyp=['THE', 'MOTHER', 'HERSELF', 'AS', 'IF', 'THE', 'RED', 'IGNOMINY', 'WERE', 'SO', 'DEEPLY', 'SCORCHED', 'INTO', 'HER', 'BRAIN', 'THAT', 'ALL', 'HER', 'CONCEPTIONS', 'ASSUMED', 'ITS', 'FORM', 'HAD', 'CAREFULLY', 'WROUGHT', 'OUT', 'THE', 'SIMILITUDE', 'LAVISHING', 'MANY', 'HOURS', 'OF', 'MORBID', 'INGENUITY', 'TO', 'CREATE', 'AN', 'ANALOGY', 'BETWEEN', 'THE', 'OBJECT', 'OF', 'HER', 'AFFECTION', 'AND', 'THE', 'EMBLEM', 'OF', 'HER', 'GUILT', 'AND', 'TORTURE'] +1221-135767-0007-194: ref=['BUT', 'IN', 'TRUTH', 'PEARL', 'WAS', 'THE', 'ONE', 'AS', 'WELL', 'AS', 'THE', 'OTHER', 'AND', 'ONLY', 'IN', 'CONSEQUENCE', 'OF', 'THAT', 'IDENTITY', 'HAD', 'HESTER', 'CONTRIVED', 'SO', 'PERFECTLY', 'TO', 'REPRESENT', 'THE', 'SCARLET', 'LETTER', 'IN', 'HER', 'APPEARANCE'] +1221-135767-0007-194: hyp=['BUT', 'IN', 'TRUTH', 'PEARL', 'WAS', 'THE', 'ONE', 'AS', 'WELL', 'AS', 'THE', 'OTHER', 'AND', 'ONLY', 'IN', 'CONSEQUENCE', 'OF', 'THAT', 'IDENTITY', 'HAD', 'HESTER', 'CONTRIVED', 'SO', 'PERFECTLY', 'TO', 'REPRESENT', 'THE', 'SCARLET', 'LETTER', 'IN', 'HER', 'APPEARANCE'] +1221-135767-0008-195: ref=['COME', 'THEREFORE', 'AND', 'LET', 'US', 'FLING', 'MUD', 'AT', 'THEM'] +1221-135767-0008-195: hyp=['COME', 'THEREFORE', 'AND', 'LET', 'US', 'FLING', 'MUD', 'AT', 'THEM'] +1221-135767-0009-196: ref=['BUT', 'PEARL', 'WHO', 'WAS', 'A', 'DAUNTLESS', 'CHILD', 'AFTER', 'FROWNING', 'STAMPING', 'HER', 'FOOT', 'AND', 'SHAKING', 'HER', 'LITTLE', 'HAND', 'WITH', 'A', 'VARIETY', 'OF', 'THREATENING', 'GESTURES', 'SUDDENLY', 'MADE', 'A', 'RUSH', 'AT', 'THE', 'KNOT', 'OF', 'HER', 'ENEMIES', 'AND', 'PUT', 'THEM', 'ALL', 'TO', 'FLIGHT'] +1221-135767-0009-196: hyp=['BUT', 'PEARL', 'WHO', 'WAS', 'A', 'DAUNTLESS', 'CHILD', 'AFTER', 'FROWNING', 'STAMPING', 'HER', 'FOOT', 'AND', 'SHAKING', 'HER', 'LITTLE', 'HAND', 'WITH', 'A', 'VARIETY', 'OF', 'THREATENING', 'GESTURES', 'SUDDENLY', 'MADE', 'A', 'RUSH', 'AT', 'THE', 'KNOT', 'OF', 'HER', 'ENEMIES', 'AND', 'PUT', 'THEM', 'ALL', 'TO', 'FLIGHT'] +1221-135767-0010-197: ref=['SHE', 'SCREAMED', 'AND', 'SHOUTED', 'TOO', 'WITH', 'A', 'TERRIFIC', 'VOLUME', 'OF', 'SOUND', 'WHICH', 'DOUBTLESS', 'CAUSED', 'THE', 'HEARTS', 'OF', 'THE', 'FUGITIVES', 'TO', 'QUAKE', 'WITHIN', 'THEM'] +1221-135767-0010-197: hyp=['SHE', 'SCREAMED', 'AND', 'SHOUTED', 'TOO', 'WITH', 'A', 'TERRIFIC', 'VOLUME', 'OF', 'SOUND', 'WHICH', 'DOUBTLESS', 'CAUSED', 'THE', 'HEARTS', 'OF', 'THE', 'FUGITIVES', 'TO', 'QUAKE', 'WITHIN', 'THEM'] +1221-135767-0011-198: ref=['IT', 'WAS', 'FURTHER', 'DECORATED', 'WITH', 'STRANGE', 'AND', 'SEEMINGLY', 'CABALISTIC', 'FIGURES', 'AND', 'DIAGRAMS', 'SUITABLE', 'TO', 'THE', 'QUAINT', 'TASTE', 'OF', 'THE', 'AGE', 'WHICH', 'HAD', 'BEEN', 'DRAWN', 'IN', 'THE', 'STUCCO', 'WHEN', 'NEWLY', 'LAID', 'ON', 'AND', 'HAD', 'NOW', 'GROWN', 'HARD', 'AND', 'DURABLE', 'FOR', 'THE', 'ADMIRATION', 'OF', 'AFTER', 'TIMES'] +1221-135767-0011-198: hyp=['IT', 'WAS', 'FURTHER', 'DECORATED', 'WITH', 'STRANGE', 'AND', 'SEEMINGLY', 'CABALISTIC', 'FIGURES', 'AND', 'DIAGRAMS', 'SUITABLE', 'TO', 'THE', 'QUAINT', 'TASTE', 'OF', 'THE', 'AGE', 'WHICH', 'HAD', 'BEEN', 'DRAWN', 'IN', 'THE', 'STUCCO', 'WHEN', 'NEWLY', 'LAID', 'ON', 'AND', 'HAD', 'NOW', 'GROWN', 'HARD', 'AND', 'DURABLE', 'FOR', 'THE', 'ADMIRATION', 'OF', 'AFTER', 'TIMES'] +1221-135767-0012-199: ref=['THEY', 'APPROACHED', 'THE', 'DOOR', 'WHICH', 'WAS', 'OF', 'AN', 'ARCHED', 'FORM', 'AND', 'FLANKED', 'ON', 'EACH', 'SIDE', 'BY', 'A', 'NARROW', 'TOWER', 'OR', 'PROJECTION', 'OF', 'THE', 'EDIFICE', 'IN', 'BOTH', 'OF', 'WHICH', 'WERE', 'LATTICE', 'WINDOWS', 'THE', 'WOODEN', 'SHUTTERS', 'TO', 'CLOSE', 'OVER', 'THEM', 'AT', 'NEED'] +1221-135767-0012-199: hyp=['THEY', 'APPROACHED', 'THE', 'DOOR', 'WHICH', 'WAS', 'OF', 'AN', 'ARCHED', 'FORM', 'AND', 'FLANKED', 'ON', 'EACH', 'SIDE', 'BY', 'A', 'NARROW', 'TOWER', 'OR', 'PROJECTION', 'OF', 'THE', 'EDIFICE', 'IN', 'BOTH', 'OF', 'WHICH', 'WERE', 'LATTICE', 'WINDOWS', 'WITH', 'WOODEN', 'SHUTTERS', 'TO', 'CLOSE', 'OVER', 'THEM', 'AT', 'NEED'] +1221-135767-0013-200: ref=['LIFTING', 'THE', 'IRON', 'HAMMER', 'THAT', 'HUNG', 'AT', 'THE', 'PORTAL', 'HESTER', 'PRYNNE', 'GAVE', 'A', 'SUMMONS', 'WHICH', 'WAS', 'ANSWERED', 'BY', 'ONE', 'OF', 'THE', "GOVERNOR'S", 'BOND', 'SERVANT', 'A', 'FREE', 'BORN', 'ENGLISHMAN', 'BUT', 'NOW', 'A', 'SEVEN', 'YEARS', 'SLAVE'] +1221-135767-0013-200: hyp=['LIFTING', 'THE', 'IRON', 'HAMMER', 'THAT', 'HUNG', 'AT', 'THE', 'PORTAL', 'HESTER', 'PRYNNE', 'GAVE', 'A', 'SUMMONS', 'WHICH', 'WAS', 'ANSWERED', 'BY', 'ONE', 'OF', 'THE', "GOVERNOR'S", 'BOND', 'SERVANTS', 'A', 'FREE', 'BORN', 'ENGLISHMAN', 'BUT', 'NOW', 'A', 'SEVEN', 'YEARS', 'SLAVE'] +1221-135767-0014-201: ref=['YEA', 'HIS', 'HONOURABLE', 'WORSHIP', 'IS', 'WITHIN', 'BUT', 'HE', 'HATH', 'A', 'GODLY', 'MINISTER', 'OR', 'TWO', 'WITH', 'HIM', 'AND', 'LIKEWISE', 'A', 'LEECH'] +1221-135767-0014-201: hyp=['YEA', 'HIS', 'HONOURABLE', 'WORSHIP', 'IS', 'WITHIN', 'BUT', 'HE', 'HATH', 'A', 'GODLY', 'MINISTER', 'OR', 'TWO', 'WITH', 'HIM', 'AND', 'LIKEWISE', 'A', 'LEECH'] +1221-135767-0015-202: ref=['YE', 'MAY', 'NOT', 'SEE', 'HIS', 'WORSHIP', 'NOW'] +1221-135767-0015-202: hyp=['YE', 'MAY', 'NOT', 'SEE', 'HIS', 'WORSHIP', 'NOW'] +1221-135767-0016-203: ref=['WITH', 'MANY', 'VARIATIONS', 'SUGGESTED', 'BY', 'THE', 'NATURE', 'OF', 'HIS', 'BUILDING', 'MATERIALS', 'DIVERSITY', 'OF', 'CLIMATE', 'AND', 'A', 'DIFFERENT', 'MODE', 'OF', 'SOCIAL', 'LIFE', 'GOVERNOR', 'BELLINGHAM', 'HAD', 'PLANNED', 'HIS', 'NEW', 'HABITATION', 'AFTER', 'THE', 'RESIDENCES', 'OF', 'GENTLEMEN', 'OF', 'FAIR', 'ESTATE', 'IN', 'HIS', 'NATIVE', 'LAND'] +1221-135767-0016-203: hyp=['WITH', 'MANY', 'VARIATIONS', 'SUGGESTED', 'BY', 'THE', 'NATURE', 'OF', 'HIS', 'BUILDING', 'MATERIALS', 'DIVERSITY', 'OF', 'CLIMATE', 'AND', 'A', 'DIFFERENT', 'MODE', 'OF', 'SOCIAL', 'LIFE', 'GOVERNOR', 'BELLINGHAM', 'HAD', 'PLANNED', 'HIS', 'NEW', 'HABITATION', 'AFTER', 'THE', 'RESIDENCES', 'OF', 'GENTLEMEN', 'OF', 'FAIR', 'ESTATE', 'IN', 'HIS', 'NATIVE', 'LAND'] +1221-135767-0017-204: ref=['ON', 'THE', 'TABLE', 'IN', 'TOKEN', 'THAT', 'THE', 'SENTIMENT', 'OF', 'OLD', 'ENGLISH', 'HOSPITALITY', 'HAD', 'NOT', 'BEEN', 'LEFT', 'BEHIND', 'STOOD', 'A', 'LARGE', 'PEWTER', 'TANKARD', 'AT', 'THE', 'BOTTOM', 'OF', 'WHICH', 'HAD', 'HESTER', 'OR', 'PEARL', 'PEEPED', 'INTO', 'IT', 'THEY', 'MIGHT', 'HAVE', 'SEEN', 'THE', 'FROTHY', 'REMNANT', 'OF', 'A', 'RECENT', 'DRAUGHT', 'OF', 'ALE'] +1221-135767-0017-204: hyp=['ON', 'THE', 'TABLE', 'IN', 'TOKEN', 'THAT', 'THE', 'SENTIMENT', 'OF', 'OLD', 'ENGLISH', 'HOSPITALITY', 'HAD', 'NOT', 'BEEN', 'LEFT', 'BEHIND', 'STOOD', 'A', 'LARGE', 'PEWTER', 'TANKARD', 'AT', 'THE', 'BOTTOM', 'OF', 'WHICH', 'HAD', 'HESTER', 'OR', 'PEARL', 'PEEPED', 'INTO', 'IT', 'THEY', 'MIGHT', 'HAVE', 'SEEN', 'THE', 'FROTHY', 'REMNANT', 'OF', 'A', 'RECENT', 'DRAUGHT', 'OF', 'ALE'] +1221-135767-0018-205: ref=['LITTLE', 'PEARL', 'WHO', 'WAS', 'AS', 'GREATLY', 'PLEASED', 'WITH', 'THE', 'GLEAMING', 'ARMOUR', 'AS', 'SHE', 'HAD', 'BEEN', 'WITH', 'THE', 'GLITTERING', 'FRONTISPIECE', 'OF', 'THE', 'HOUSE', 'SPENT', 'SOME', 'TIME', 'LOOKING', 'INTO', 'THE', 'POLISHED', 'MIRROR', 'OF', 'THE', 'BREASTPLATE'] +1221-135767-0018-205: hyp=['LITTLE', 'PEARL', 'WHO', 'WAS', 'AS', 'GREATLY', 'PLEASED', 'WITH', 'THE', 'GLEAMING', 'ARMOR', 'AS', 'SHE', 'HAD', 'BEEN', 'WITH', 'THE', 'GLITTERING', 'FRONTISPIECE', 'OF', 'THE', 'HOUSE', 'SPENT', 'SOME', 'TIME', 'LOOKING', 'INTO', 'THE', 'POLISHED', 'MIRROR', 'OF', 'THE', 'BREASTPLATE'] +1221-135767-0019-206: ref=['MOTHER', 'CRIED', 'SHE', 'I', 'SEE', 'YOU', 'HERE', 'LOOK', 'LOOK'] +1221-135767-0019-206: hyp=['MOTHER', 'CRIED', 'SHE', 'I', 'SEE', 'YOU', 'HERE', 'LOOK', 'LOOK'] +1221-135767-0020-207: ref=['IN', 'TRUTH', 'SHE', 'SEEMED', 'ABSOLUTELY', 'HIDDEN', 'BEHIND', 'IT'] +1221-135767-0020-207: hyp=['IN', 'TRUTH', 'SHE', 'SEEMED', 'ABSOLUTELY', 'HIDDEN', 'BEHIND', 'IT'] +1221-135767-0021-208: ref=['PEARL', 'ACCORDINGLY', 'RAN', 'TO', 'THE', 'BOW', 'WINDOW', 'AT', 'THE', 'FURTHER', 'END', 'OF', 'THE', 'HALL', 'AND', 'LOOKED', 'ALONG', 'THE', 'VISTA', 'OF', 'A', 'GARDEN', 'WALK', 'CARPETED', 'WITH', 'CLOSELY', 'SHAVEN', 'GRASS', 'AND', 'BORDERED', 'WITH', 'SOME', 'RUDE', 'AND', 'IMMATURE', 'ATTEMPT', 'AT', 'SHRUBBERY'] +1221-135767-0021-208: hyp=['PEARL', 'ACCORDINGLY', 'RAN', 'TO', 'THE', 'BOW', 'WINDOW', 'AT', 'THE', 'FURTHER', 'END', 'OF', 'THE', 'HALL', 'AND', 'LOOKED', 'ALONG', 'THE', 'VISTA', 'OF', 'A', 'GARDEN', 'WALK', 'CARPETED', 'WITH', 'CLOSELY', 'SHAVEN', 'GRASS', 'AND', 'BORDERED', 'WITH', 'SOME', 'RUDE', 'AND', 'IMMATURE', 'ATTEMPT', 'AT', 'SHRUBBERY'] +1221-135767-0022-209: ref=['BUT', 'THE', 'PROPRIETOR', 'APPEARED', 'ALREADY', 'TO', 'HAVE', 'RELINQUISHED', 'AS', 'HOPELESS', 'THE', 'EFFORT', 'TO', 'PERPETUATE', 'ON', 'THIS', 'SIDE', 'OF', 'THE', 'ATLANTIC', 'IN', 'A', 'HARD', 'SOIL', 'AND', 'AMID', 'THE', 'CLOSE', 'STRUGGLE', 'FOR', 'SUBSISTENCE', 'THE', 'NATIVE', 'ENGLISH', 'TASTE', 'FOR', 'ORNAMENTAL', 'GARDENING'] +1221-135767-0022-209: hyp=['BUT', 'THE', 'PROPRIETOR', 'APPEARED', 'ALREADY', 'TO', 'HAVE', 'RELINQUISHED', 'AS', 'HOPELESS', 'THE', 'EFFORT', 'TO', 'PERPETUATE', 'ON', 'THIS', 'SIDE', 'OF', 'THE', 'ATLANTIC', 'IN', 'A', 'HARD', 'SOIL', 'AND', 'AMID', 'THE', 'CLOSE', 'STRUGGLE', 'FOR', 'SUBSISTENCE', 'THE', 'NATIVE', 'ENGLISH', 'TASTE', 'FOR', 'ORNAMENTAL', 'GARDENING'] +1221-135767-0023-210: ref=['THERE', 'WERE', 'A', 'FEW', 'ROSE', 'BUSHES', 'HOWEVER', 'AND', 'A', 'NUMBER', 'OF', 'APPLE', 'TREES', 'PROBABLY', 'THE', 'DESCENDANTS', 'OF', 'THOSE', 'PLANTED', 'BY', 'THE', 'REVEREND', 'MISTER', 'BLACKSTONE', 'THE', 'FIRST', 'SETTLER', 'OF', 'THE', 'PENINSULA', 'THAT', 'HALF', 'MYTHOLOGICAL', 'PERSONAGE', 'WHO', 'RIDES', 'THROUGH', 'OUR', 'EARLY', 'ANNALS', 'SEATED', 'ON', 'THE', 'BACK', 'OF', 'A', 'BULL'] +1221-135767-0023-210: hyp=['THERE', 'WERE', 'A', 'FEW', 'ROSE', 'BUSHES', 'HOWEVER', 'AND', 'A', 'NUMBER', 'OF', 'APPLE', 'TREES', 'PROBABLY', 'THE', 'DESCENDANTS', 'OF', 'THOSE', 'PLANTED', 'BY', 'THE', 'REVEREND', 'MISTER', 'BLACKSTONE', 'THE', 'FIRST', 'SETTLER', 'OF', 'THE', 'PENINSULA', 'THAT', 'HALF', 'MYTHOLOGICAL', 'PERSONAGE', 'WHO', 'RIDES', 'THROUGH', 'OUR', 'EARLY', 'ANNALS', 'SEATED', 'ON', 'THE', 'BACK', 'OF', 'A', 'BULL'] +1221-135767-0024-211: ref=['PEARL', 'SEEING', 'THE', 'ROSE', 'BUSHES', 'BEGAN', 'TO', 'CRY', 'FOR', 'A', 'RED', 'ROSE', 'AND', 'WOULD', 'NOT', 'BE', 'PACIFIED'] +1221-135767-0024-211: hyp=['PEARL', 'SEEING', 'THE', 'ROSE', 'BUSHES', 'BEGAN', 'TO', 'CRY', 'FOR', 'A', 'RED', 'ROSE', 'AND', 'WOULD', 'NOT', 'BE', 'PACIFIED'] +1284-1180-0000-212: ref=['HE', 'WORE', 'BLUE', 'SILK', 'STOCKINGS', 'BLUE', 'KNEE', 'PANTS', 'WITH', 'GOLD', 'BUCKLES', 'A', 'BLUE', 'RUFFLED', 'WAIST', 'AND', 'A', 'JACKET', 'OF', 'BRIGHT', 'BLUE', 'BRAIDED', 'WITH', 'GOLD'] +1284-1180-0000-212: hyp=['HE', 'WORE', 'BLUE', 'SILK', 'STOCKINGS', 'BLUE', 'KNEE', 'PANTS', 'WITH', 'GOLD', 'BUCKLES', 'A', 'BLUE', 'RUFFLED', 'WAIST', 'AND', 'A', 'JACKET', 'OF', 'BRIGHT', 'BLUE', 'BRAIDED', 'WITH', 'GOLD'] +1284-1180-0001-213: ref=['HIS', 'HAT', 'HAD', 'A', 'PEAKED', 'CROWN', 'AND', 'A', 'FLAT', 'BRIM', 'AND', 'AROUND', 'THE', 'BRIM', 'WAS', 'A', 'ROW', 'OF', 'TINY', 'GOLDEN', 'BELLS', 'THAT', 'TINKLED', 'WHEN', 'HE', 'MOVED'] +1284-1180-0001-213: hyp=['HIS', 'HAT', 'HAD', 'A', 'PEAKED', 'CROWN', 'AT', 'A', 'FLAT', 'BRIM', 'AND', 'AROUND', 'THE', 'BRIM', 'WAS', 'A', 'ROW', 'OF', 'TINY', 'GOLDEN', 'BELLS', 'THAT', 'TINKLED', 'WHEN', 'HE', 'MOVED'] +1284-1180-0002-214: ref=['INSTEAD', 'OF', 'SHOES', 'THE', 'OLD', 'MAN', 'WORE', 'BOOTS', 'WITH', 'TURNOVER', 'TOPS', 'AND', 'HIS', 'BLUE', 'COAT', 'HAD', 'WIDE', 'CUFFS', 'OF', 'GOLD', 'BRAID'] +1284-1180-0002-214: hyp=['INSTEAD', 'OF', 'SHOES', 'THE', 'OLD', 'MAN', 'WORE', 'BOOTS', 'WITH', 'TURNOVER', 'TOPS', 'AND', 'HIS', 'BLUE', 'COAT', 'HAD', 'WIDE', 'CUFFS', 'OF', 'GOLD', 'BRAID'] +1284-1180-0003-215: ref=['FOR', 'A', 'LONG', 'TIME', 'HE', 'HAD', 'WISHED', 'TO', 'EXPLORE', 'THE', 'BEAUTIFUL', 'LAND', 'OF', 'OZ', 'IN', 'WHICH', 'THEY', 'LIVED'] +1284-1180-0003-215: hyp=['FOR', 'A', 'LONG', 'TIME', 'HE', 'HAD', 'WISHED', 'TO', 'EXPLORE', 'THE', 'BEAUTIFUL', 'LAND', 'OF', 'OZ', 'IN', 'WHICH', 'THEY', 'LIVED'] +1284-1180-0004-216: ref=['WHEN', 'THEY', 'WERE', 'OUTSIDE', 'UNC', 'SIMPLY', 'LATCHED', 'THE', 'DOOR', 'AND', 'STARTED', 'UP', 'THE', 'PATH'] +1284-1180-0004-216: hyp=['WHEN', 'THEY', 'WERE', 'OUTSIDE', 'UNC', 'SIMPLY', 'LATCHED', 'THE', 'DOOR', 'AND', 'STARTED', 'UP', 'THE', 'PATH'] +1284-1180-0005-217: ref=['NO', 'ONE', 'WOULD', 'DISTURB', 'THEIR', 'LITTLE', 'HOUSE', 'EVEN', 'IF', 'ANYONE', 'CAME', 'SO', 'FAR', 'INTO', 'THE', 'THICK', 'FOREST', 'WHILE', 'THEY', 'WERE', 'GONE'] +1284-1180-0005-217: hyp=['NO', 'ONE', 'WOULD', 'DISTURB', 'THEIR', 'LITTLE', 'HOUSE', 'EVEN', 'IF', 'ANY', 'ONE', 'CAME', 'SO', 'FAR', 'INTO', 'THE', 'THICK', 'FOREST', 'WHILE', 'THEY', 'WERE', 'GONE'] +1284-1180-0006-218: ref=['AT', 'THE', 'FOOT', 'OF', 'THE', 'MOUNTAIN', 'THAT', 'SEPARATED', 'THE', 'COUNTRY', 'OF', 'THE', 'MUNCHKINS', 'FROM', 'THE', 'COUNTRY', 'OF', 'THE', 'GILLIKINS', 'THE', 'PATH', 'DIVIDED'] +1284-1180-0006-218: hyp=['AT', 'THE', 'FOOT', 'OF', 'THE', 'MOUNTAIN', 'THAT', 'SEPARATED', 'THE', 'COUNTRY', 'OF', 'THE', 'MUNCHKINS', 'FROM', 'THE', 'COUNTRY', 'OF', 'THE', 'GILLAKANS', 'THE', 'PATH', 'DIVIDED'] +1284-1180-0007-219: ref=['HE', 'KNEW', 'IT', 'WOULD', 'TAKE', 'THEM', 'TO', 'THE', 'HOUSE', 'OF', 'THE', 'CROOKED', 'MAGICIAN', 'WHOM', 'HE', 'HAD', 'NEVER', 'SEEN', 'BUT', 'WHO', 'WAS', 'THEIR', 'NEAREST', 'NEIGHBOR'] +1284-1180-0007-219: hyp=['HE', 'KNEW', 'IT', 'WOULD', 'TAKE', 'THEM', 'TO', 'THE', 'HOUSE', 'OF', 'THE', 'CROOKED', 'MAGICIAN', 'WHOM', 'HE', 'HAD', 'NEVER', 'SEEN', 'BUT', 'WHO', 'WAS', 'THEIR', 'NEAREST', 'NEIGHBOR'] +1284-1180-0008-220: ref=['ALL', 'THE', 'MORNING', 'THEY', 'TRUDGED', 'UP', 'THE', 'MOUNTAIN', 'PATH', 'AND', 'AT', 'NOON', 'UNC', 'AND', 'OJO', 'SAT', 'ON', 'A', 'FALLEN', 'TREE', 'TRUNK', 'AND', 'ATE', 'THE', 'LAST', 'OF', 'THE', 'BREAD', 'WHICH', 'THE', 'OLD', 'MUNCHKIN', 'HAD', 'PLACED', 'IN', 'HIS', 'POCKET'] +1284-1180-0008-220: hyp=['ALL', 'THE', 'MORNING', 'THEY', 'TRUDGED', 'UP', 'THE', 'MOUNTAIN', 'PATH', 'AND', 'AT', 'NOON', 'UNC', 'AND', 'OJO', 'SAT', 'ON', 'A', 'FALLEN', 'TREE', 'TRUNK', 'AND', 'ATE', 'THE', 'LAST', 'OF', 'THE', 'BREAD', 'WHICH', 'THE', 'OLD', 'MUNCHKIN', 'HAD', 'PLACED', 'IN', 'HIS', 'POCKET'] +1284-1180-0009-221: ref=['THEN', 'THEY', 'STARTED', 'ON', 'AGAIN', 'AND', 'TWO', 'HOURS', 'LATER', 'CAME', 'IN', 'SIGHT', 'OF', 'THE', 'HOUSE', 'OF', 'DOCTOR', 'PIPT'] +1284-1180-0009-221: hyp=['THEN', 'THEY', 'STARTED', 'ON', 'AGAIN', 'AND', 'TWO', 'HOURS', 'LATER', 'CAME', 'IN', 'SIGHT', 'OF', 'THE', 'HOUSE', 'OF', 'DOCTOR', 'PIPT'] +1284-1180-0010-222: ref=['UNC', 'KNOCKED', 'AT', 'THE', 'DOOR', 'OF', 'THE', 'HOUSE', 'AND', 'A', 'CHUBBY', 'PLEASANT', 'FACED', 'WOMAN', 'DRESSED', 'ALL', 'IN', 'BLUE', 'OPENED', 'IT', 'AND', 'GREETED', 'THE', 'VISITORS', 'WITH', 'A', 'SMILE'] +1284-1180-0010-222: hyp=['UNC', 'KNOCKED', 'AT', 'THE', 'DOOR', 'OF', 'THE', 'HOUSE', 'AND', 'A', 'CHUBBY', 'PLEASANT', 'FACED', 'WOMAN', 'DRESSED', 'ALL', 'IN', 'BLUE', 'OPENED', 'IT', 'AND', 'GREETED', 'THE', 'VISITORS', 'WITH', 'A', 'SMILE'] +1284-1180-0011-223: ref=['I', 'AM', 'MY', 'DEAR', 'AND', 'ALL', 'STRANGERS', 'ARE', 'WELCOME', 'TO', 'MY', 'HOME'] +1284-1180-0011-223: hyp=['I', 'AM', 'MY', 'DEAR', 'AND', 'ALL', 'STRANGERS', 'ARE', 'WELCOME', 'TO', 'MY', 'HOME'] +1284-1180-0012-224: ref=['WE', 'HAVE', 'COME', 'FROM', 'A', 'FAR', 'LONELIER', 'PLACE', 'THAN', 'THIS', 'A', 'LONELIER', 'PLACE'] +1284-1180-0012-224: hyp=['WE', 'HAVE', 'COME', 'FROM', 'A', 'FAR', 'LONELIER', 'PLACE', 'THAN', 'THIS', 'A', 'LONELIER', 'PLACE'] +1284-1180-0013-225: ref=['AND', 'YOU', 'MUST', 'BE', 'OJO', 'THE', 'UNLUCKY', 'SHE', 'ADDED'] +1284-1180-0013-225: hyp=['AND', 'YOU', 'MUST', 'BE', 'OJO', 'THE', 'UNLUCKY', 'SHE', 'ADDED'] +1284-1180-0014-226: ref=['OJO', 'HAD', 'NEVER', 'EATEN', 'SUCH', 'A', 'FINE', 'MEAL', 'IN', 'ALL', 'HIS', 'LIFE'] +1284-1180-0014-226: hyp=['OJO', 'HAD', 'NEVER', 'EATEN', 'SUCH', 'A', 'FINE', 'MEAL', 'IN', 'ALL', 'HIS', 'LIFE'] +1284-1180-0015-227: ref=['WE', 'ARE', 'TRAVELING', 'REPLIED', 'OJO', 'AND', 'WE', 'STOPPED', 'AT', 'YOUR', 'HOUSE', 'JUST', 'TO', 'REST', 'AND', 'REFRESH', 'OURSELVES'] +1284-1180-0015-227: hyp=['WE', 'ARE', 'TRAVELING', 'REPLIED', 'OJO', 'AND', 'WE', 'STOPPED', 'AT', 'YOUR', 'HOUSE', 'JUST', 'TO', 'REST', 'AND', 'REFRESH', 'OURSELVES'] +1284-1180-0016-228: ref=['THE', 'WOMAN', 'SEEMED', 'THOUGHTFUL'] +1284-1180-0016-228: hyp=['THE', 'WOMAN', 'SEEMED', 'THOUGHTFUL'] +1284-1180-0017-229: ref=['AT', 'ONE', 'END', 'STOOD', 'A', 'GREAT', 'FIREPLACE', 'IN', 'WHICH', 'A', 'BLUE', 'LOG', 'WAS', 'BLAZING', 'WITH', 'A', 'BLUE', 'FLAME', 'AND', 'OVER', 'THE', 'FIRE', 'HUNG', 'FOUR', 'KETTLES', 'IN', 'A', 'ROW', 'ALL', 'BUBBLING', 'AND', 'STEAMING', 'AT', 'A', 'GREAT', 'RATE'] +1284-1180-0017-229: hyp=['AT', 'ONE', 'END', 'STOOD', 'A', 'GREAT', 'FIREPLACE', 'IN', 'WHICH', 'A', 'BLUE', 'LOG', 'WAS', 'BLAZING', 'WITH', 'A', 'BLUE', 'FLAME', 'AND', 'OVER', 'THE', 'FIRE', 'HUNG', 'FOUR', 'KETTLES', 'IN', 'A', 'ROW', 'ALL', 'BUBBLING', 'AND', 'STEAMING', 'AT', 'A', 'GREAT', 'RATE'] +1284-1180-0018-230: ref=['IT', 'TAKES', 'ME', 'SEVERAL', 'YEARS', 'TO', 'MAKE', 'THIS', 'MAGIC', 'POWDER', 'BUT', 'AT', 'THIS', 'MOMENT', 'I', 'AM', 'PLEASED', 'TO', 'SAY', 'IT', 'IS', 'NEARLY', 'DONE', 'YOU', 'SEE', 'I', 'AM', 'MAKING', 'IT', 'FOR', 'MY', 'GOOD', 'WIFE', 'MARGOLOTTE', 'WHO', 'WANTS', 'TO', 'USE', 'SOME', 'OF', 'IT', 'FOR', 'A', 'PURPOSE', 'OF', 'HER', 'OWN'] +1284-1180-0018-230: hyp=['IT', 'TAKES', 'ME', 'SEVERAL', 'YEARS', 'TO', 'MAKE', 'THIS', 'MAGIC', 'POWDER', 'BUT', 'AT', 'THIS', 'MOMENT', 'I', 'AM', 'PLEASED', 'TO', 'SAY', 'IT', 'IS', 'NEARLY', 'DONE', 'YOU', 'SEE', 'I', 'AM', 'MAKING', 'IT', 'FOR', 'MY', 'GOOD', 'WIFE', 'MARGOLOTTE', 'WHO', 'WANTS', 'TO', 'USE', 'SOME', 'OF', 'IT', 'FOR', 'A', 'PURPOSE', 'OF', 'HER', 'OWN'] +1284-1180-0019-231: ref=['YOU', 'MUST', 'KNOW', 'SAID', 'MARGOLOTTE', 'WHEN', 'THEY', 'WERE', 'ALL', 'SEATED', 'TOGETHER', 'ON', 'THE', 'BROAD', 'WINDOW', 'SEAT', 'THAT', 'MY', 'HUSBAND', 'FOOLISHLY', 'GAVE', 'AWAY', 'ALL', 'THE', 'POWDER', 'OF', 'LIFE', 'HE', 'FIRST', 'MADE', 'TO', 'OLD', 'MOMBI', 'THE', 'WITCH', 'WHO', 'USED', 'TO', 'LIVE', 'IN', 'THE', 'COUNTRY', 'OF', 'THE', 'GILLIKINS', 'TO', 'THE', 'NORTH', 'OF', 'HERE'] +1284-1180-0019-231: hyp=['YOU', 'MUST', 'KNOW', 'SAID', 'MARGOLOTTE', 'WHEN', 'THEY', 'WERE', 'ALL', 'SEATED', 'TOGETHER', 'ON', 'THE', 'BROAD', 'WINDOW', 'SEAT', 'THAT', 'MY', 'HUSBAND', 'FOOLISHLY', 'GAVE', 'AWAY', 'ALL', 'THE', 'POWDER', 'OF', 'LIFE', 'HE', 'FIRST', 'MADE', 'TO', 'OLD', 'MOMBY', 'THE', 'WITCH', 'WHO', 'USED', 'TO', 'LIVE', 'IN', 'THE', 'COUNTRY', 'OF', 'THE', 'GILLEKINS', 'TO', 'THE', 'NORTH', 'OF', 'HERE'] +1284-1180-0020-232: ref=['THE', 'FIRST', 'LOT', 'WE', 'TESTED', 'ON', 'OUR', 'GLASS', 'CAT', 'WHICH', 'NOT', 'ONLY', 'BEGAN', 'TO', 'LIVE', 'BUT', 'HAS', 'LIVED', 'EVER', 'SINCE'] +1284-1180-0020-232: hyp=['THE', 'FIRST', 'LOT', 'WE', 'TESTED', 'ON', 'OUR', 'GLASS', 'CAT', 'WHICH', 'NOT', 'ONLY', 'BEGAN', 'TO', 'LIVE', 'BUT', 'HAS', 'LIVED', 'EVER', 'SINCE'] +1284-1180-0021-233: ref=['I', 'THINK', 'THE', 'NEXT', 'GLASS', 'CAT', 'THE', 'MAGICIAN', 'MAKES', 'WILL', 'HAVE', 'NEITHER', 'BRAINS', 'NOR', 'HEART', 'FOR', 'THEN', 'IT', 'WILL', 'NOT', 'OBJECT', 'TO', 'CATCHING', 'MICE', 'AND', 'MAY', 'PROVE', 'OF', 'SOME', 'USE', 'TO', 'US'] +1284-1180-0021-233: hyp=['I', 'THINK', 'THE', 'NEXT', 'GLASS', 'CAT', 'THE', 'MAGICIAN', 'MAKES', 'WILL', 'HAVE', 'NEITHER', 'BRAINS', 'NOR', 'HEART', 'FOR', 'THEN', 'IT', 'WILL', 'NOT', 'OBJECT', 'TO', 'CATCHING', 'MICE', 'AND', 'MAY', 'PROVE', 'OF', 'SOME', 'USE', 'TO', 'US'] +1284-1180-0022-234: ref=["I'M", 'AFRAID', 'I', "DON'T", 'KNOW', 'MUCH', 'ABOUT', 'THE', 'LAND', 'OF', 'OZ'] +1284-1180-0022-234: hyp=['I', 'AM', 'AFRAID', 'I', "DON'T", 'KNOW', 'MUCH', 'ABOUT', 'THE', 'LAND', 'OF', 'OZ'] +1284-1180-0023-235: ref=['YOU', 'SEE', "I'VE", 'LIVED', 'ALL', 'MY', 'LIFE', 'WITH', 'UNC', 'NUNKIE', 'THE', 'SILENT', 'ONE', 'AND', 'THERE', 'WAS', 'NO', 'ONE', 'TO', 'TELL', 'ME', 'ANYTHING'] +1284-1180-0023-235: hyp=['YOU', 'SEE', "I'VE", 'LIVED', 'ALL', 'MY', 'LIFE', 'WITH', 'UNC', 'NUNKIE', 'THE', 'SILENT', 'ONE', 'AND', 'THERE', 'WAS', 'NO', 'ONE', 'TO', 'TELL', 'ME', 'ANYTHING'] +1284-1180-0024-236: ref=['THAT', 'IS', 'ONE', 'REASON', 'YOU', 'ARE', 'OJO', 'THE', 'UNLUCKY', 'SAID', 'THE', 'WOMAN', 'IN', 'A', 'SYMPATHETIC', 'TONE'] +1284-1180-0024-236: hyp=['THAT', 'IS', 'ONE', 'REASON', 'YOU', 'ARE', 'OJO', 'THE', 'UNLUCKY', 'SAID', 'THE', 'WOMAN', 'IN', 'A', 'SYMPATHETIC', 'TONE'] +1284-1180-0025-237: ref=['I', 'THINK', 'I', 'MUST', 'SHOW', 'YOU', 'MY', 'PATCHWORK', 'GIRL', 'SAID', 'MARGOLOTTE', 'LAUGHING', 'AT', 'THE', "BOY'S", 'ASTONISHMENT', 'FOR', 'SHE', 'IS', 'RATHER', 'DIFFICULT', 'TO', 'EXPLAIN'] +1284-1180-0025-237: hyp=['I', 'THINK', 'I', 'MUST', 'SHOW', 'YOU', 'MY', 'PATCHWORK', 'GIRL', 'SAID', 'MARGOLOTTE', 'LAUGHING', 'AT', 'THE', "BOY'S", 'ASTONISHMENT', 'FOR', 'SHE', 'IS', 'RATHER', 'DIFFICULT', 'TO', 'EXPLAIN'] +1284-1180-0026-238: ref=['BUT', 'FIRST', 'I', 'WILL', 'TELL', 'YOU', 'THAT', 'FOR', 'MANY', 'YEARS', 'I', 'HAVE', 'LONGED', 'FOR', 'A', 'SERVANT', 'TO', 'HELP', 'ME', 'WITH', 'THE', 'HOUSEWORK', 'AND', 'TO', 'COOK', 'THE', 'MEALS', 'AND', 'WASH', 'THE', 'DISHES'] +1284-1180-0026-238: hyp=['BUT', 'FIRST', 'I', 'WILL', 'TELL', 'YOU', 'THAT', 'FOR', 'MANY', 'YEARS', 'I', 'HAVE', 'LONGED', 'FOR', 'A', 'SERVANT', 'TO', 'HELP', 'ME', 'WITH', 'THE', 'HOUSEWORK', 'AND', 'TO', 'COOK', 'THE', 'MEALS', 'AND', 'WASH', 'THE', 'DISHES'] +1284-1180-0027-239: ref=['YET', 'THAT', 'TASK', 'WAS', 'NOT', 'SO', 'EASY', 'AS', 'YOU', 'MAY', 'SUPPOSE'] +1284-1180-0027-239: hyp=['YET', 'THAT', 'TASK', 'WAS', 'NOT', 'SO', 'EASY', 'AS', 'YOU', 'MAY', 'SUPPOSE'] +1284-1180-0028-240: ref=['A', 'BED', 'QUILT', 'MADE', 'OF', 'PATCHES', 'OF', 'DIFFERENT', 'KINDS', 'AND', 'COLORS', 'OF', 'CLOTH', 'ALL', 'NEATLY', 'SEWED', 'TOGETHER'] +1284-1180-0028-240: hyp=['A', 'BED', 'QUILT', 'MADE', 'OF', 'PATCHES', 'OF', 'DIFFERENT', 'KINDS', 'AND', 'COLOURS', 'OF', 'CLOTH', 'ALL', 'NEATLY', 'SEWED', 'TOGETHER'] +1284-1180-0029-241: ref=['SOMETIMES', 'IT', 'IS', 'CALLED', 'A', 'CRAZY', 'QUILT', 'BECAUSE', 'THE', 'PATCHES', 'AND', 'COLORS', 'ARE', 'SO', 'MIXED', 'UP'] +1284-1180-0029-241: hyp=['SOMETIMES', 'IT', 'IS', 'CALLED', 'A', 'CRAZY', 'QUILT', 'BECAUSE', 'THE', 'PATCHES', 'AND', 'COLOURS', 'ARE', 'SO', 'MIXED', 'UP'] +1284-1180-0030-242: ref=['WHEN', 'I', 'FOUND', 'IT', 'I', 'SAID', 'TO', 'MYSELF', 'THAT', 'IT', 'WOULD', 'DO', 'NICELY', 'FOR', 'MY', 'SERVANT', 'GIRL', 'FOR', 'WHEN', 'SHE', 'WAS', 'BROUGHT', 'TO', 'LIFE', 'SHE', 'WOULD', 'NOT', 'BE', 'PROUD', 'NOR', 'HAUGHTY', 'AS', 'THE', 'GLASS', 'CAT', 'IS', 'FOR', 'SUCH', 'A', 'DREADFUL', 'MIXTURE', 'OF', 'COLORS', 'WOULD', 'DISCOURAGE', 'HER', 'FROM', 'TRYING', 'TO', 'BE', 'AS', 'DIGNIFIED', 'AS', 'THE', 'BLUE', 'MUNCHKINS', 'ARE'] +1284-1180-0030-242: hyp=['WHEN', 'I', 'FOUND', 'IT', 'I', 'SAID', 'TO', 'MYSELF', 'THAT', 'IT', 'WOULD', 'DO', 'NICELY', 'FOR', 'MY', 'SERVANT', 'GIRL', 'FOR', 'WHEN', 'SHE', 'WAS', 'BROUGHT', 'TO', 'LIFE', 'SHE', 'WOULD', 'NOT', 'BE', 'PROUD', 'NOR', 'HAUGHTY', 'AS', 'THE', 'GLASS', 'CAT', 'IS', 'FOR', 'SUCH', 'A', 'DREADFUL', 'MIXTURE', 'OF', 'COLORS', 'WOULD', 'DISCOURAGE', 'HER', 'FROM', 'TRYING', 'TO', 'BE', 'AS', 'DIGNIFIED', 'AS', 'THE', 'BLUE', 'MUNCHKINS', 'ARE'] +1284-1180-0031-243: ref=['AT', 'THE', 'EMERALD', 'CITY', 'WHERE', 'OUR', 'PRINCESS', 'OZMA', 'LIVES', 'GREEN', 'IS', 'THE', 'POPULAR', 'COLOR'] +1284-1180-0031-243: hyp=['AT', 'THE', 'EMERALD', 'CITY', 'WHERE', 'OUR', 'PRINCESS', 'OZMA', 'LIVES', 'GREEN', 'IS', 'THE', 'POPULAR', 'COLOR'] +1284-1180-0032-244: ref=['I', 'WILL', 'SHOW', 'YOU', 'WHAT', 'A', 'GOOD', 'JOB', 'I', 'DID', 'AND', 'SHE', 'WENT', 'TO', 'A', 'TALL', 'CUPBOARD', 'AND', 'THREW', 'OPEN', 'THE', 'DOORS'] +1284-1180-0032-244: hyp=['I', 'WILL', 'SHOW', 'YOU', 'WHAT', 'A', 'GOOD', 'JOB', 'I', 'DID', 'AND', 'SHE', 'WENT', 'TO', 'A', 'TALL', 'CUPBOARD', 'AND', 'THREW', 'OPEN', 'THE', 'DOORS'] +1284-1181-0000-245: ref=['OJO', 'EXAMINED', 'THIS', 'CURIOUS', 'CONTRIVANCE', 'WITH', 'WONDER'] +1284-1181-0000-245: hyp=['OJO', 'EXAMINED', 'THIS', 'CURIOUS', 'CONTRIVANCE', 'WITH', 'WONDER'] +1284-1181-0001-246: ref=['MARGOLOTTE', 'HAD', 'FIRST', 'MADE', 'THE', "GIRL'S", 'FORM', 'FROM', 'THE', 'PATCHWORK', 'QUILT', 'AND', 'THEN', 'SHE', 'HAD', 'DRESSED', 'IT', 'WITH', 'A', 'PATCHWORK', 'SKIRT', 'AND', 'AN', 'APRON', 'WITH', 'POCKETS', 'IN', 'IT', 'USING', 'THE', 'SAME', 'GAY', 'MATERIAL', 'THROUGHOUT'] +1284-1181-0001-246: hyp=['MARGOLOTTE', 'HAD', 'FIRST', 'MADE', 'THE', "GIRL'S", 'FORM', 'FROM', 'THE', 'PATCHWORK', 'QUILT', 'AND', 'THEN', 'SHE', 'HAD', 'DRESSED', 'IT', 'WITH', 'A', 'PATCHWORK', 'SKIRT', 'AND', 'AN', 'APRON', 'WITH', 'POCKETS', 'IN', 'IT', 'USING', 'THE', 'SAME', 'GAY', 'MATERIAL', 'THROUGHOUT'] +1284-1181-0002-247: ref=['THE', 'HEAD', 'OF', 'THE', 'PATCHWORK', 'GIRL', 'WAS', 'THE', 'MOST', 'CURIOUS', 'PART', 'OF', 'HER'] +1284-1181-0002-247: hyp=['THE', 'HEAD', 'OF', 'THE', 'PATCHWORK', 'GIRL', 'WAS', 'THE', 'MOST', 'CURIOUS', 'PART', 'OF', 'HER'] +1284-1181-0003-248: ref=['THE', 'HAIR', 'WAS', 'OF', 'BROWN', 'YARN', 'AND', 'HUNG', 'DOWN', 'ON', 'HER', 'NECK', 'IN', 'SEVERAL', 'NEAT', 'BRAIDS'] +1284-1181-0003-248: hyp=['THE', 'HAIR', 'WAS', 'OF', 'BROWN', 'YARN', 'AND', 'HUNG', 'DOWN', 'ON', 'HER', 'NECK', 'IN', 'SEVERAL', 'NEAT', 'BRAIDS'] +1284-1181-0004-249: ref=['GOLD', 'IS', 'THE', 'MOST', 'COMMON', 'METAL', 'IN', 'THE', 'LAND', 'OF', 'OZ', 'AND', 'IS', 'USED', 'FOR', 'MANY', 'PURPOSES', 'BECAUSE', 'IT', 'IS', 'SOFT', 'AND', 'PLIABLE'] +1284-1181-0004-249: hyp=['GOLD', 'IS', 'THE', 'MOST', 'COMMON', 'METAL', 'IN', 'THE', 'LAND', 'OF', 'OZ', 'AND', 'IS', 'USED', 'FOR', 'MANY', 'PURPOSES', 'BECAUSE', 'IT', 'IS', 'SOFT', 'AND', 'PLIABLE'] +1284-1181-0005-250: ref=['NO', 'I', 'FORGOT', 'ALL', 'ABOUT', 'THE', 'BRAINS', 'EXCLAIMED', 'THE', 'WOMAN'] +1284-1181-0005-250: hyp=['NO', 'I', 'FORGOT', 'ALL', 'ABOUT', 'THE', 'BRAINS', 'EXCLAIMED', 'THE', 'WOMAN'] +1284-1181-0006-251: ref=['WELL', 'THAT', 'MAY', 'BE', 'TRUE', 'AGREED', 'MARGOLOTTE', 'BUT', 'ON', 'THE', 'CONTRARY', 'A', 'SERVANT', 'WITH', 'TOO', 'MUCH', 'BRAINS', 'IS', 'SURE', 'TO', 'BECOME', 'INDEPENDENT', 'AND', 'HIGH', 'AND', 'MIGHTY', 'AND', 'FEEL', 'ABOVE', 'HER', 'WORK'] +1284-1181-0006-251: hyp=['WELL', 'THAT', 'MAY', 'BE', 'TRUE', 'AGREED', 'MARGOLOTTE', 'BUT', 'ON', 'THE', 'CONTRARY', 'A', 'SERVANT', 'WITH', 'TOO', 'MUCH', 'BRAINS', 'IS', 'SURE', 'TO', 'BECOME', 'INDEPENDENT', 'AND', 'HIGH', 'AND', 'MIGHTY', 'AND', 'FEEL', 'ABOVE', 'HER', 'WORK'] +1284-1181-0007-252: ref=['SHE', 'POURED', 'INTO', 'THE', 'DISH', 'A', 'QUANTITY', 'FROM', 'EACH', 'OF', 'THESE', 'BOTTLES'] +1284-1181-0007-252: hyp=['SHE', 'POURED', 'INTO', 'THE', 'DISH', 'A', 'QUANTITY', 'FROM', 'EACH', 'OF', 'THESE', 'BOTTLES'] +1284-1181-0008-253: ref=['I', 'THINK', 'THAT', 'WILL', 'DO', 'SHE', 'CONTINUED', 'FOR', 'THE', 'OTHER', 'QUALITIES', 'ARE', 'NOT', 'NEEDED', 'IN', 'A', 'SERVANT'] +1284-1181-0008-253: hyp=['I', 'THINK', 'THAT', 'WILL', 'DO', 'SHE', 'CONTINUED', 'FOR', 'THE', 'OTHER', 'QUALITIES', 'ARE', 'NOT', 'NEEDED', 'IN', 'A', 'SERVANT'] +1284-1181-0009-254: ref=['SHE', 'RAN', 'TO', 'HER', "HUSBAND'S", 'SIDE', 'AT', 'ONCE', 'AND', 'HELPED', 'HIM', 'LIFT', 'THE', 'FOUR', 'KETTLES', 'FROM', 'THE', 'FIRE'] +1284-1181-0009-254: hyp=['SHE', 'RAN', 'TO', 'HER', "HUSBAND'S", 'SIDE', 'AT', 'ONCE', 'AND', 'HELPED', 'HIM', 'LIFT', 'THE', 'FOUR', 'KETTLES', 'FROM', 'THE', 'FIRE'] +1284-1181-0010-255: ref=['THEIR', 'CONTENTS', 'HAD', 'ALL', 'BOILED', 'AWAY', 'LEAVING', 'IN', 'THE', 'BOTTOM', 'OF', 'EACH', 'KETTLE', 'A', 'FEW', 'GRAINS', 'OF', 'FINE', 'WHITE', 'POWDER'] +1284-1181-0010-255: hyp=['THEIR', 'CONTENTS', 'HAD', 'ALL', 'BOILED', 'AWAY', 'LEAVING', 'IN', 'THE', 'BOTTOM', 'OF', 'EACH', 'KETTLE', 'A', 'FEW', 'GRAINS', 'OF', 'FINE', 'WHITE', 'POWDER'] +1284-1181-0011-256: ref=['VERY', 'CAREFULLY', 'THE', 'MAGICIAN', 'REMOVED', 'THIS', 'POWDER', 'PLACING', 'IT', 'ALL', 'TOGETHER', 'IN', 'A', 'GOLDEN', 'DISH', 'WHERE', 'HE', 'MIXED', 'IT', 'WITH', 'A', 'GOLDEN', 'SPOON'] +1284-1181-0011-256: hyp=['VERY', 'CAREFULLY', 'THE', 'MAGICIAN', 'REMOVED', 'THIS', 'POWDER', 'PLACING', 'IT', 'ALTOGETHER', 'IN', 'A', 'GOLDEN', 'DISH', 'WHERE', 'HE', 'MIXED', 'IT', 'WITH', 'A', 'GOLDEN', 'SPOON'] +1284-1181-0012-257: ref=['NO', 'ONE', 'SAW', 'HIM', 'DO', 'THIS', 'FOR', 'ALL', 'WERE', 'LOOKING', 'AT', 'THE', 'POWDER', 'OF', 'LIFE', 'BUT', 'SOON', 'THE', 'WOMAN', 'REMEMBERED', 'WHAT', 'SHE', 'HAD', 'BEEN', 'DOING', 'AND', 'CAME', 'BACK', 'TO', 'THE', 'CUPBOARD'] +1284-1181-0012-257: hyp=['NO', 'ONE', 'SAW', 'HIM', 'DO', 'THIS', 'FOR', 'ALL', 'WERE', 'LOOKING', 'AT', 'THE', 'POWDER', 'OF', 'LIFE', 'BUT', 'SOON', 'THE', 'WOMAN', 'REMEMBERED', 'WHAT', 'SHE', 'HAD', 'BEEN', 'DOING', 'AND', 'CAME', 'BACK', 'TO', 'THE', 'CUPBOARD'] +1284-1181-0013-258: ref=['OJO', 'BECAME', 'A', 'BIT', 'UNEASY', 'AT', 'THIS', 'FOR', 'HE', 'HAD', 'ALREADY', 'PUT', 'QUITE', 'A', 'LOT', 'OF', 'THE', 'CLEVERNESS', 'POWDER', 'IN', 'THE', 'DISH', 'BUT', 'HE', 'DARED', 'NOT', 'INTERFERE', 'AND', 'SO', 'HE', 'COMFORTED', 'HIMSELF', 'WITH', 'THE', 'THOUGHT', 'THAT', 'ONE', 'CANNOT', 'HAVE', 'TOO', 'MUCH', 'CLEVERNESS'] +1284-1181-0013-258: hyp=['OJO', 'BECAME', 'A', 'BIT', 'UNEASY', 'AT', 'THIS', 'FOR', 'HE', 'HAD', 'ALREADY', 'PUT', 'QUITE', 'A', 'LOT', 'OF', 'THE', 'CLEVERNESS', 'POWDER', 'IN', 'THE', 'DISH', 'BUT', 'HE', 'DARED', 'NOT', 'INTERFERE', 'AND', 'SO', 'HE', 'COMFORTED', 'HIMSELF', 'WITH', 'THE', 'THOUGHT', 'THAT', 'ONE', 'CANNOT', 'HAVE', 'TOO', 'MUCH', 'CLEVERNESS'] +1284-1181-0014-259: ref=['HE', 'SELECTED', 'A', 'SMALL', 'GOLD', 'BOTTLE', 'WITH', 'A', 'PEPPER', 'BOX', 'TOP', 'SO', 'THAT', 'THE', 'POWDER', 'MIGHT', 'BE', 'SPRINKLED', 'ON', 'ANY', 'OBJECT', 'THROUGH', 'THE', 'SMALL', 'HOLES'] +1284-1181-0014-259: hyp=['HE', 'SELECTED', 'A', 'SMALL', 'GOLD', 'BOTTLE', 'WITH', 'A', 'PEPPER', 'BOX', 'TOP', 'SO', 'THAT', 'THE', 'POWDER', 'MIGHT', 'BE', 'SPRINKLED', 'ON', 'ANY', 'OBJECT', 'THROUGH', 'THE', 'SMALL', 'HOLES'] +1284-1181-0015-260: ref=['MOST', 'PEOPLE', 'TALK', 'TOO', 'MUCH', 'SO', 'IT', 'IS', 'A', 'RELIEF', 'TO', 'FIND', 'ONE', 'WHO', 'TALKS', 'TOO', 'LITTLE'] +1284-1181-0015-260: hyp=['MOST', 'PEOPLE', 'TALK', 'TOO', 'MUCH', 'SO', 'IT', 'IS', 'A', 'RELIEF', 'TO', 'FIND', 'ONE', 'WHO', 'TALKS', 'TOO', 'LITTLE'] +1284-1181-0016-261: ref=['I', 'AM', 'NOT', 'ALLOWED', 'TO', 'PERFORM', 'MAGIC', 'EXCEPT', 'FOR', 'MY', 'OWN', 'AMUSEMENT', 'HE', 'TOLD', 'HIS', 'VISITORS', 'AS', 'HE', 'LIGHTED', 'A', 'PIPE', 'WITH', 'A', 'CROOKED', 'STEM', 'AND', 'BEGAN', 'TO', 'SMOKE'] +1284-1181-0016-261: hyp=['I', 'AM', 'NOT', 'ALLOWED', 'TO', 'PERFORM', 'MAGIC', 'EXCEPT', 'FOR', 'MY', 'OWN', 'AMUSEMENT', 'HE', 'TOLD', 'HIS', 'VISITORS', 'AS', 'HE', 'LIGHTED', 'A', 'PIPE', 'WITH', 'A', 'CROOKED', 'STEM', 'AND', 'BEGAN', 'TO', 'SMOKE'] +1284-1181-0017-262: ref=['THE', 'WIZARD', 'OF', 'OZ', 'WHO', 'USED', 'TO', 'BE', 'A', 'HUMBUG', 'AND', 'KNEW', 'NO', 'MAGIC', 'AT', 'ALL', 'HAS', 'BEEN', 'TAKING', 'LESSONS', 'OF', 'GLINDA', 'AND', "I'M", 'TOLD', 'HE', 'IS', 'GETTING', 'TO', 'BE', 'A', 'PRETTY', 'GOOD', 'WIZARD', 'BUT', 'HE', 'IS', 'MERELY', 'THE', 'ASSISTANT', 'OF', 'THE', 'GREAT', 'SORCERESS'] +1284-1181-0017-262: hyp=['THE', 'WIZARD', 'OF', 'OZ', 'WHO', 'USED', 'TO', 'BE', 'A', 'HUMBUG', 'AND', 'KNEW', 'NO', 'MAGIC', 'AT', 'ALL', 'HAS', 'BEEN', 'TAKING', 'LESSONS', 'OF', 'GLINDA', 'AND', "I'M", 'TOLD', 'HE', 'IS', 'GETTING', 'TO', 'BE', 'A', 'PRETTY', 'GOOD', 'WIZARD', 'BUT', 'HE', 'IS', 'MERELY', 'THE', 'ASSISTANT', 'OF', 'THE', 'GREAT', 'SORCERESS'] +1284-1181-0018-263: ref=['IT', 'TRULY', 'IS', 'ASSERTED', 'THE', 'MAGICIAN'] +1284-1181-0018-263: hyp=['IT', 'TRULY', 'IS', 'ASSERTED', 'THE', 'MAGICIAN'] +1284-1181-0019-264: ref=['I', 'NOW', 'USE', 'THEM', 'AS', 'ORNAMENTAL', 'STATUARY', 'IN', 'MY', 'GARDEN'] +1284-1181-0019-264: hyp=['I', 'NOW', 'USE', 'THEM', 'AS', 'ORNAMENTAL', 'STATUARY', 'IN', 'MY', 'GARDEN'] +1284-1181-0020-265: ref=['DEAR', 'ME', 'WHAT', 'A', 'CHATTERBOX', "YOU'RE", 'GETTING', 'TO', 'BE', 'UNC', 'REMARKED', 'THE', 'MAGICIAN', 'WHO', 'WAS', 'PLEASED', 'WITH', 'THE', 'COMPLIMENT'] +1284-1181-0020-265: hyp=['DEAR', 'ME', 'WHAT', 'A', 'CHATTERBOX', "YOU'RE", 'GETTING', 'TO', 'BE', 'ONK', 'REMARKED', 'THE', 'MAGICIAN', 'WHO', 'WAS', 'PLEASED', 'WITH', 'THE', 'COMPLIMENT'] +1284-1181-0021-266: ref=['ASKED', 'THE', 'VOICE', 'IN', 'SCORNFUL', 'ACCENTS'] +1284-1181-0021-266: hyp=['ASKED', 'THE', 'VOICE', 'IN', 'SCORNFUL', 'ACCENTS'] +1284-134647-0000-267: ref=['THE', 'GRATEFUL', 'APPLAUSE', 'OF', 'THE', 'CLERGY', 'HAS', 'CONSECRATED', 'THE', 'MEMORY', 'OF', 'A', 'PRINCE', 'WHO', 'INDULGED', 'THEIR', 'PASSIONS', 'AND', 'PROMOTED', 'THEIR', 'INTEREST'] +1284-134647-0000-267: hyp=['THE', 'GRATEFUL', 'APPLAUSE', 'OF', 'THE', 'CLERGY', 'HAS', 'CONSECRATED', 'THE', 'MEMORY', 'OF', 'A', 'PRINCE', 'WHO', 'INDULGED', 'THEIR', 'PASSIONS', 'AND', 'PROMOTED', 'THEIR', 'INTEREST'] +1284-134647-0001-268: ref=['THE', 'EDICT', 'OF', 'MILAN', 'THE', 'GREAT', 'CHARTER', 'OF', 'TOLERATION', 'HAD', 'CONFIRMED', 'TO', 'EACH', 'INDIVIDUAL', 'OF', 'THE', 'ROMAN', 'WORLD', 'THE', 'PRIVILEGE', 'OF', 'CHOOSING', 'AND', 'PROFESSING', 'HIS', 'OWN', 'RELIGION'] +1284-134647-0001-268: hyp=['THE', 'EDICT', 'OF', 'MILAN', 'THE', 'GREAT', 'CHARTER', 'OF', 'TOLERATION', 'HAD', 'CONFIRMED', 'TO', 'EACH', 'INDIVIDUAL', 'OF', 'THE', 'ROMAN', 'WORLD', 'THE', 'PRIVILEGE', 'OF', 'CHOOSING', 'AND', 'PROFESSING', 'HIS', 'OWN', 'RELIGION'] +1284-134647-0002-269: ref=['BUT', 'THIS', 'INESTIMABLE', 'PRIVILEGE', 'WAS', 'SOON', 'VIOLATED', 'WITH', 'THE', 'KNOWLEDGE', 'OF', 'TRUTH', 'THE', 'EMPEROR', 'IMBIBED', 'THE', 'MAXIMS', 'OF', 'PERSECUTION', 'AND', 'THE', 'SECTS', 'WHICH', 'DISSENTED', 'FROM', 'THE', 'CATHOLIC', 'CHURCH', 'WERE', 'AFFLICTED', 'AND', 'OPPRESSED', 'BY', 'THE', 'TRIUMPH', 'OF', 'CHRISTIANITY'] +1284-134647-0002-269: hyp=['BUT', 'THIS', 'INESTIMABLE', 'PRIVILEGE', 'WAS', 'SOON', 'VIOLATED', 'WITH', 'A', 'KNOWLEDGE', 'OF', 'TRUTH', 'THE', 'EMPEROR', 'IMBIBED', 'THE', 'MAXIMS', 'OF', 'PERSECUTION', 'AND', 'THE', 'SECTS', 'WHICH', 'DISSENTED', 'FROM', 'THE', 'CATHOLIC', 'CHURCH', 'WERE', 'AFFLICTED', 'AND', 'OPPRESSED', 'BY', 'THE', 'TRIUMPH', 'OF', 'CHRISTIANITY'] +1284-134647-0003-270: ref=['CONSTANTINE', 'EASILY', 'BELIEVED', 'THAT', 'THE', 'HERETICS', 'WHO', 'PRESUMED', 'TO', 'DISPUTE', 'HIS', 'OPINIONS', 'OR', 'TO', 'OPPOSE', 'HIS', 'COMMANDS', 'WERE', 'GUILTY', 'OF', 'THE', 'MOST', 'ABSURD', 'AND', 'CRIMINAL', 'OBSTINACY', 'AND', 'THAT', 'A', 'SEASONABLE', 'APPLICATION', 'OF', 'MODERATE', 'SEVERITIES', 'MIGHT', 'SAVE', 'THOSE', 'UNHAPPY', 'MEN', 'FROM', 'THE', 'DANGER', 'OF', 'AN', 'EVERLASTING', 'CONDEMNATION'] +1284-134647-0003-270: hyp=['CONSTANTINE', 'EASILY', 'BELIEVED', 'THAT', 'THE', 'HERETICS', 'WHO', 'PRESUMED', 'TO', 'DISPUTE', 'HIS', 'OPINIONS', 'OR', 'TO', 'OPPOSE', 'HIS', 'COMMANDS', 'WERE', 'GUILTY', 'OF', 'THE', 'MOST', 'ABSURD', 'AND', 'CRIMINAL', 'OBSTINACY', 'AND', 'THAT', 'A', 'SEASONABLE', 'APPLICATION', 'OF', 'MODERATE', 'SEVERITIES', 'MIGHT', 'SAVE', 'THOSE', 'UNHAPPY', 'MEN', 'FROM', 'THE', 'DANGER', 'OF', 'AN', 'EVERLASTING', 'CONDEMNATION'] +1284-134647-0004-271: ref=['SOME', 'OF', 'THE', 'PENAL', 'REGULATIONS', 'WERE', 'COPIED', 'FROM', 'THE', 'EDICTS', 'OF', 'DIOCLETIAN', 'AND', 'THIS', 'METHOD', 'OF', 'CONVERSION', 'WAS', 'APPLAUDED', 'BY', 'THE', 'SAME', 'BISHOPS', 'WHO', 'HAD', 'FELT', 'THE', 'HAND', 'OF', 'OPPRESSION', 'AND', 'PLEADED', 'FOR', 'THE', 'RIGHTS', 'OF', 'HUMANITY'] +1284-134647-0004-271: hyp=['SOME', 'OF', 'THE', 'PENAL', 'REGULATIONS', 'WERE', 'COPIED', 'FROM', 'THE', 'EDICTS', 'OF', 'DIOCLETIAN', 'AND', 'THIS', 'METHOD', 'OF', 'CONVERSION', 'WAS', 'APPLAUDED', 'BY', 'THE', 'SAME', 'BISHOPS', 'WHO', 'HAD', 'FELT', 'THE', 'HAND', 'OF', 'OPPRESSION', 'AND', 'PLEADED', 'FOR', 'THE', 'RIGHTS', 'OF', 'HUMANITY'] +1284-134647-0005-272: ref=['THEY', 'ASSERTED', 'WITH', 'CONFIDENCE', 'AND', 'ALMOST', 'WITH', 'EXULTATION', 'THAT', 'THE', 'APOSTOLICAL', 'SUCCESSION', 'WAS', 'INTERRUPTED', 'THAT', 'ALL', 'THE', 'BISHOPS', 'OF', 'EUROPE', 'AND', 'ASIA', 'WERE', 'INFECTED', 'BY', 'THE', 'CONTAGION', 'OF', 'GUILT', 'AND', 'SCHISM', 'AND', 'THAT', 'THE', 'PREROGATIVES', 'OF', 'THE', 'CATHOLIC', 'CHURCH', 'WERE', 'CONFINED', 'TO', 'THE', 'CHOSEN', 'PORTION', 'OF', 'THE', 'AFRICAN', 'BELIEVERS', 'WHO', 'ALONE', 'HAD', 'PRESERVED', 'INVIOLATE', 'THE', 'INTEGRITY', 'OF', 'THEIR', 'FAITH', 'AND', 'DISCIPLINE'] +1284-134647-0005-272: hyp=['THEY', 'ASSERTED', 'WITH', 'CONFIDENCE', 'AND', 'ALMOST', 'WITH', 'EXULTATION', 'THAT', 'THE', 'APOSTOLICAL', 'SUCCESSION', 'WAS', 'INTERRUPTED', 'THAT', 'ALL', 'THE', 'BISHOPS', 'OF', 'EUROPE', 'AND', 'ASIA', 'WERE', 'INFECTED', 'BY', 'THE', 'CONTAGION', 'OF', 'GUILT', 'AND', 'SCHISM', 'AND', 'THAT', 'THE', 'PREROGATIVES', 'OF', 'THE', 'CATHOLIC', 'CHURCH', 'WERE', 'CONFINED', 'TO', 'THE', 'CHOSEN', 'PORTION', 'OF', 'THE', 'AFRICAN', 'BELIEVERS', 'WHO', 'ALONE', 'HAD', 'PRESERVED', 'INVIOLATE', 'THE', 'INTEGRITY', 'OF', 'THEIR', 'FAITH', 'AND', 'DISCIPLINE'] +1284-134647-0006-273: ref=['BISHOPS', 'VIRGINS', 'AND', 'EVEN', 'SPOTLESS', 'INFANTS', 'WERE', 'SUBJECTED', 'TO', 'THE', 'DISGRACE', 'OF', 'A', 'PUBLIC', 'PENANCE', 'BEFORE', 'THEY', 'COULD', 'BE', 'ADMITTED', 'TO', 'THE', 'COMMUNION', 'OF', 'THE', 'DONATISTS'] +1284-134647-0006-273: hyp=['BISHOPS', 'VIRGINS', 'AND', 'EVEN', 'SPOTLESS', 'INFANTS', 'WERE', 'SUBJECTED', 'TO', 'THE', 'DISGRACE', 'OF', 'A', 'PUBLIC', 'PENANCE', 'BEFORE', 'THEY', 'COULD', 'BE', 'ADMITTED', 'TO', 'THE', 'COMMUNION', 'OF', 'THE', 'DONATISTS'] +1284-134647-0007-274: ref=['PROSCRIBED', 'BY', 'THE', 'CIVIL', 'AND', 'ECCLESIASTICAL', 'POWERS', 'OF', 'THE', 'EMPIRE', 'THE', 'DONATISTS', 'STILL', 'MAINTAINED', 'IN', 'SOME', 'PROVINCES', 'PARTICULARLY', 'IN', 'NUMIDIA', 'THEIR', 'SUPERIOR', 'NUMBERS', 'AND', 'FOUR', 'HUNDRED', 'BISHOPS', 'ACKNOWLEDGED', 'THE', 'JURISDICTION', 'OF', 'THEIR', 'PRIMATE'] +1284-134647-0007-274: hyp=['PROSCRIBED', 'BY', 'THE', 'CIVIL', 'AND', 'ECCLESIASTICAL', 'POWERS', 'OF', 'THE', 'EMPIRE', 'THE', 'DONATISTS', 'STILL', 'MAINTAINED', 'IN', 'SOME', 'PROVINCES', 'PARTICULARLY', 'IN', 'NUMIDIA', 'THEIR', 'SUPERIOR', 'NUMBERS', 'AND', 'FOUR', 'HUNDRED', 'BISHOPS', 'ACKNOWLEDGED', 'THE', 'JURISDICTION', 'OF', 'THEIR', 'PRIMATE'] +1320-122612-0000-275: ref=['SINCE', 'THE', 'PERIOD', 'OF', 'OUR', 'TALE', 'THE', 'ACTIVE', 'SPIRIT', 'OF', 'THE', 'COUNTRY', 'HAS', 'SURROUNDED', 'IT', 'WITH', 'A', 'BELT', 'OF', 'RICH', 'AND', 'THRIVING', 'SETTLEMENTS', 'THOUGH', 'NONE', 'BUT', 'THE', 'HUNTER', 'OR', 'THE', 'SAVAGE', 'IS', 'EVER', 'KNOWN', 'EVEN', 'NOW', 'TO', 'PENETRATE', 'ITS', 'WILD', 'RECESSES'] +1320-122612-0000-275: hyp=['SINCE', 'THE', 'PERIOD', 'OF', 'OUR', 'TALE', 'THE', 'ACTIVE', 'SPIRIT', 'OF', 'THE', 'COUNTRY', 'HAS', 'SURROUNDED', 'IT', 'WITH', 'A', 'BELT', 'OF', 'RICH', 'AND', 'THRIVING', 'SETTLEMENTS', 'THOUGH', 'NONE', 'BUT', 'THE', 'HUNTER', 'OR', 'THE', 'SAVAGE', 'IS', 'EVER', 'KNOWN', 'EVEN', 'NOW', 'TO', 'PENETRATE', 'ITS', 'WILD', 'RECESSES'] +1320-122612-0001-276: ref=['THE', 'DEWS', 'WERE', 'SUFFERED', 'TO', 'EXHALE', 'AND', 'THE', 'SUN', 'HAD', 'DISPERSED', 'THE', 'MISTS', 'AND', 'WAS', 'SHEDDING', 'A', 'STRONG', 'AND', 'CLEAR', 'LIGHT', 'IN', 'THE', 'FOREST', 'WHEN', 'THE', 'TRAVELERS', 'RESUMED', 'THEIR', 'JOURNEY'] +1320-122612-0001-276: hyp=['THE', 'DEWS', 'WERE', 'SUFFERED', 'TO', 'EXHALE', 'AND', 'THE', 'SUN', 'HAD', 'DISPERSED', 'THE', 'MISTS', 'AND', 'WAS', 'SHEDDING', 'A', 'STRONG', 'AND', 'CLEAR', 'LIGHT', 'IN', 'THE', 'FOREST', 'WHEN', 'THE', 'TRAVELLERS', 'RESUMED', 'THEIR', 'JOURNEY'] +1320-122612-0002-277: ref=['AFTER', 'PROCEEDING', 'A', 'FEW', 'MILES', 'THE', 'PROGRESS', 'OF', 'HAWKEYE', 'WHO', 'LED', 'THE', 'ADVANCE', 'BECAME', 'MORE', 'DELIBERATE', 'AND', 'WATCHFUL'] +1320-122612-0002-277: hyp=['AFTER', 'PROCEEDING', 'A', 'FEW', 'MILES', 'THE', 'PROGRESS', 'OF', 'HAWKEYE', 'WHO', 'LED', 'THE', 'ADVANCE', 'BECAME', 'MORE', 'DELIBERATE', 'AND', 'WATCHFUL'] +1320-122612-0003-278: ref=['HE', 'OFTEN', 'STOPPED', 'TO', 'EXAMINE', 'THE', 'TREES', 'NOR', 'DID', 'HE', 'CROSS', 'A', 'RIVULET', 'WITHOUT', 'ATTENTIVELY', 'CONSIDERING', 'THE', 'QUANTITY', 'THE', 'VELOCITY', 'AND', 'THE', 'COLOR', 'OF', 'ITS', 'WATERS'] +1320-122612-0003-278: hyp=['HE', 'OFTEN', 'STOPPED', 'TO', 'EXAMINE', 'THE', 'TREES', 'NOR', 'DID', 'HE', 'CROSS', 'A', 'RIVULET', 'WITHOUT', 'ATTENTIVELY', 'CONSIDERING', 'THE', 'QUANTITY', 'THE', 'VELOCITY', 'AND', 'THE', 'COLOR', 'OF', 'ITS', 'WATERS'] +1320-122612-0004-279: ref=['DISTRUSTING', 'HIS', 'OWN', 'JUDGMENT', 'HIS', 'APPEALS', 'TO', 'THE', 'OPINION', 'OF', 'CHINGACHGOOK', 'WERE', 'FREQUENT', 'AND', 'EARNEST'] +1320-122612-0004-279: hyp=['DISTRUSTING', 'HIS', 'OWN', 'JUDGMENT', 'HIS', 'APPEALS', 'TO', 'THE', 'OPINION', 'OF', 'CHINGACHGOOK', 'WERE', 'FREQUENT', 'AND', 'EARNEST'] +1320-122612-0005-280: ref=['YET', 'HERE', 'ARE', 'WE', 'WITHIN', 'A', 'SHORT', 'RANGE', 'OF', 'THE', 'SCAROONS', 'AND', 'NOT', 'A', 'SIGN', 'OF', 'A', 'TRAIL', 'HAVE', 'WE', 'CROSSED'] +1320-122612-0005-280: hyp=['YET', 'HERE', 'ARE', 'WE', 'WITHIN', 'A', 'SHORT', 'RANGE', 'OF', 'THE', 'SCAROONS', 'AND', 'NOT', 'A', 'SIGN', 'OF', 'A', 'TRAIL', 'HAVE', 'WE', 'CROSSED'] +1320-122612-0006-281: ref=['LET', 'US', 'RETRACE', 'OUR', 'STEPS', 'AND', 'EXAMINE', 'AS', 'WE', 'GO', 'WITH', 'KEENER', 'EYES'] +1320-122612-0006-281: hyp=['LET', 'US', 'RETRACE', 'OUR', 'STEPS', 'AND', 'EXAMINE', 'AS', 'WE', 'GO', 'WITH', 'KEENER', 'EYES'] +1320-122612-0007-282: ref=['CHINGACHGOOK', 'HAD', 'CAUGHT', 'THE', 'LOOK', 'AND', 'MOTIONING', 'WITH', 'HIS', 'HAND', 'HE', 'BADE', 'HIM', 'SPEAK'] +1320-122612-0007-282: hyp=['CHINGACHGOOK', 'HAD', 'CAUGHT', 'THE', 'LOOK', 'AND', 'MOTIONING', 'WITH', 'HIS', 'HAND', 'HE', 'BADE', 'HIM', 'SPEAK'] +1320-122612-0008-283: ref=['THE', 'EYES', 'OF', 'THE', 'WHOLE', 'PARTY', 'FOLLOWED', 'THE', 'UNEXPECTED', 'MOVEMENT', 'AND', 'READ', 'THEIR', 'SUCCESS', 'IN', 'THE', 'AIR', 'OF', 'TRIUMPH', 'THAT', 'THE', 'YOUTH', 'ASSUMED'] +1320-122612-0008-283: hyp=['THE', 'EYES', 'OF', 'THE', 'WHOLE', 'PARTY', 'FOLLOWED', 'THE', 'UNEXPECTED', 'MOVEMENT', 'AND', 'READ', 'THEIR', 'SUCCESS', 'IN', 'THE', 'AIR', 'OF', 'TRIUMPH', 'THAT', 'THE', 'YOUTH', 'ASSUMED'] +1320-122612-0009-284: ref=['IT', 'WOULD', 'HAVE', 'BEEN', 'MORE', 'WONDERFUL', 'HAD', 'HE', 'SPOKEN', 'WITHOUT', 'A', 'BIDDING'] +1320-122612-0009-284: hyp=['IT', 'WOULD', 'HAVE', 'BEEN', 'MORE', 'WONDERFUL', 'HAD', 'HE', 'SPOKEN', 'WITHOUT', 'A', 'BIDDING'] +1320-122612-0010-285: ref=['SEE', 'SAID', 'UNCAS', 'POINTING', 'NORTH', 'AND', 'SOUTH', 'AT', 'THE', 'EVIDENT', 'MARKS', 'OF', 'THE', 'BROAD', 'TRAIL', 'ON', 'EITHER', 'SIDE', 'OF', 'HIM', 'THE', 'DARK', 'HAIR', 'HAS', 'GONE', 'TOWARD', 'THE', 'FOREST'] +1320-122612-0010-285: hyp=['SEE', 'SAID', 'UNCAS', 'POINTING', 'NORTH', 'AND', 'SOUTH', 'AT', 'THE', 'EVIDENT', 'MARKS', 'OF', 'THE', 'BROAD', 'TRAIL', 'ON', 'EITHER', 'SIDE', 'OF', 'HIM', 'THE', 'DARK', 'HAIR', 'HAS', 'GONE', 'TOWARD', 'THE', 'FOREST'] +1320-122612-0011-286: ref=['IF', 'A', 'ROCK', 'OR', 'A', 'RIVULET', 'OR', 'A', 'BIT', 'OF', 'EARTH', 'HARDER', 'THAN', 'COMMON', 'SEVERED', 'THE', 'LINKS', 'OF', 'THE', 'CLEW', 'THEY', 'FOLLOWED', 'THE', 'TRUE', 'EYE', 'OF', 'THE', 'SCOUT', 'RECOVERED', 'THEM', 'AT', 'A', 'DISTANCE', 'AND', 'SELDOM', 'RENDERED', 'THE', 'DELAY', 'OF', 'A', 'SINGLE', 'MOMENT', 'NECESSARY'] +1320-122612-0011-286: hyp=['IF', 'A', 'ROCK', 'OR', 'A', 'RIVULET', 'OR', 'A', 'BIT', 'OF', 'EARTH', 'HARDER', 'THAN', 'COMMON', 'SEVERED', 'THE', 'LINKS', 'OF', 'THE', 'CLUE', 'THEY', 'FOLLOWED', 'THE', 'TRUE', 'EYE', 'OF', 'THE', 'SCOUT', 'RECOVERED', 'THEM', 'AT', 'A', 'DISTANCE', 'AND', 'SELDOM', 'RENDERED', 'THE', 'DELAY', 'OF', 'A', 'SINGLE', 'MOMENT', 'NECESSARY'] +1320-122612-0012-287: ref=['EXTINGUISHED', 'BRANDS', 'WERE', 'LYING', 'AROUND', 'A', 'SPRING', 'THE', 'OFFALS', 'OF', 'A', 'DEER', 'WERE', 'SCATTERED', 'ABOUT', 'THE', 'PLACE', 'AND', 'THE', 'TREES', 'BORE', 'EVIDENT', 'MARKS', 'OF', 'HAVING', 'BEEN', 'BROWSED', 'BY', 'THE', 'HORSES'] +1320-122612-0012-287: hyp=['EXTINGUISHED', 'BRANDS', 'WERE', 'LYING', 'AROUND', 'A', 'SPRING', 'THE', 'OFFALS', 'OF', 'A', 'DEER', 'WERE', 'SCATTERED', 'ABOUT', 'THE', 'PLACE', 'AND', 'THE', 'TREES', 'BORE', 'EVIDENT', 'MARKS', 'OF', 'HAVING', 'BEEN', 'BROWSED', 'BY', 'THE', 'HORSES'] +1320-122612-0013-288: ref=['A', 'CIRCLE', 'OF', 'A', 'FEW', 'HUNDRED', 'FEET', 'IN', 'CIRCUMFERENCE', 'WAS', 'DRAWN', 'AND', 'EACH', 'OF', 'THE', 'PARTY', 'TOOK', 'A', 'SEGMENT', 'FOR', 'HIS', 'PORTION'] +1320-122612-0013-288: hyp=['A', 'CIRCLE', 'OF', 'A', 'FEW', 'HUNDRED', 'FEET', 'IN', 'CIRCUMFERENCE', 'WAS', 'DRAWN', 'AND', 'EACH', 'OF', 'THE', 'PARTY', 'TOOK', 'A', 'SEGMENT', 'FOR', 'HIS', 'PORTION'] +1320-122612-0014-289: ref=['THE', 'EXAMINATION', 'HOWEVER', 'RESULTED', 'IN', 'NO', 'DISCOVERY'] +1320-122612-0014-289: hyp=['THE', 'EXAMINATION', 'HOWEVER', 'RESULTED', 'IN', 'NO', 'DISCOVERY'] +1320-122612-0015-290: ref=['THE', 'WHOLE', 'PARTY', 'CROWDED', 'TO', 'THE', 'SPOT', 'WHERE', 'UNCAS', 'POINTED', 'OUT', 'THE', 'IMPRESSION', 'OF', 'A', 'MOCCASIN', 'IN', 'THE', 'MOIST', 'ALLUVION'] +1320-122612-0015-290: hyp=['THE', 'WHOLE', 'PARTY', 'CROWDED', 'TO', 'THE', 'SPOT', 'WHERE', 'UNCAS', 'POINTED', 'OUT', 'THE', 'IMPRESSION', 'OF', 'A', 'MOCCASIN', 'IN', 'THE', 'MOIST', 'ALLUVIAN'] +1320-122612-0016-291: ref=['RUN', 'BACK', 'UNCAS', 'AND', 'BRING', 'ME', 'THE', 'SIZE', 'OF', 'THE', "SINGER'S", 'FOOT'] +1320-122612-0016-291: hyp=['RUN', 'BACK', 'UNCAS', 'AND', 'BRING', 'ME', 'THE', 'SIZE', 'OF', 'THE', "SINGER'S", 'FOOT'] +1320-122617-0000-292: ref=['NOTWITHSTANDING', 'THE', 'HIGH', 'RESOLUTION', 'OF', 'HAWKEYE', 'HE', 'FULLY', 'COMPREHENDED', 'ALL', 'THE', 'DIFFICULTIES', 'AND', 'DANGER', 'HE', 'WAS', 'ABOUT', 'TO', 'INCUR'] +1320-122617-0000-292: hyp=['NOTWITHSTANDING', 'THE', 'HIGH', 'RESOLUTION', 'OF', 'HAWKEYE', 'HE', 'FULLY', 'COMPREHENDED', 'ALL', 'THE', 'DIFFICULTIES', 'AND', 'DANGER', 'HE', 'WAS', 'ABOUT', 'TO', 'INCUR'] +1320-122617-0001-293: ref=['IN', 'HIS', 'RETURN', 'TO', 'THE', 'CAMP', 'HIS', 'ACUTE', 'AND', 'PRACTISED', 'INTELLECTS', 'WERE', 'INTENTLY', 'ENGAGED', 'IN', 'DEVISING', 'MEANS', 'TO', 'COUNTERACT', 'A', 'WATCHFULNESS', 'AND', 'SUSPICION', 'ON', 'THE', 'PART', 'OF', 'HIS', 'ENEMIES', 'THAT', 'HE', 'KNEW', 'WERE', 'IN', 'NO', 'DEGREE', 'INFERIOR', 'TO', 'HIS', 'OWN'] +1320-122617-0001-293: hyp=['IN', 'HIS', 'RETURN', 'TO', 'THE', 'CAMP', 'HIS', 'ACUTE', 'AND', 'PRACTICED', 'INTELLECTS', 'WERE', 'INTENTLY', 'ENGAGED', 'IN', 'DEVISING', 'MEANS', 'TO', 'COUNTERACT', 'A', 'WATCHFULNESS', 'AND', 'SUSPICION', 'ON', 'THE', 'PART', 'OF', 'HIS', 'ENEMIES', 'THAT', 'HE', 'KNEW', 'WERE', 'IN', 'NO', 'DEGREE', 'INFERIOR', 'TO', 'HIS', 'OWN'] +1320-122617-0002-294: ref=['IN', 'OTHER', 'WORDS', 'WHILE', 'HE', 'HAD', 'IMPLICIT', 'FAITH', 'IN', 'THE', 'ABILITY', 'OF', "BALAAM'S", 'ASS', 'TO', 'SPEAK', 'HE', 'WAS', 'SOMEWHAT', 'SKEPTICAL', 'ON', 'THE', 'SUBJECT', 'OF', 'A', "BEAR'S", 'SINGING', 'AND', 'YET', 'HE', 'HAD', 'BEEN', 'ASSURED', 'OF', 'THE', 'LATTER', 'ON', 'THE', 'TESTIMONY', 'OF', 'HIS', 'OWN', 'EXQUISITE', 'ORGANS'] +1320-122617-0002-294: hyp=['IN', 'OTHER', 'WORDS', 'WHILE', 'HE', 'HAD', 'IMPLICIT', 'FAITH', 'IN', 'THE', 'ABILITY', 'OF', "BAILIM'S", 'ASS', 'TO', 'SPEAK', 'HE', 'WAS', 'SOMEWHAT', 'SCEPTICAL', 'ON', 'THE', 'SUBJECT', 'OF', 'A', "BEAR'S", 'SINGING', 'AND', 'YET', 'HE', 'HAD', 'BEEN', 'ASSURED', 'OF', 'THE', 'LATTER', 'ON', 'THE', 'TESTIMONY', 'OF', 'HIS', 'OWN', 'EXQUISITE', 'ORGANS'] +1320-122617-0003-295: ref=['THERE', 'WAS', 'SOMETHING', 'IN', 'HIS', 'AIR', 'AND', 'MANNER', 'THAT', 'BETRAYED', 'TO', 'THE', 'SCOUT', 'THE', 'UTTER', 'CONFUSION', 'OF', 'THE', 'STATE', 'OF', 'HIS', 'MIND'] +1320-122617-0003-295: hyp=['THERE', 'WAS', 'SOMETHING', 'IN', 'HIS', 'AIR', 'AND', 'MANNER', 'THAT', 'BETRAYED', 'TO', 'THE', 'SCOUT', 'THE', 'UTTER', 'CONFUSION', 'OF', 'THE', 'STATE', 'OF', 'HIS', 'MIND'] +1320-122617-0004-296: ref=['THE', 'INGENIOUS', 'HAWKEYE', 'WHO', 'RECALLED', 'THE', 'HASTY', 'MANNER', 'IN', 'WHICH', 'THE', 'OTHER', 'HAD', 'ABANDONED', 'HIS', 'POST', 'AT', 'THE', 'BEDSIDE', 'OF', 'THE', 'SICK', 'WOMAN', 'WAS', 'NOT', 'WITHOUT', 'HIS', 'SUSPICIONS', 'CONCERNING', 'THE', 'SUBJECT', 'OF', 'SO', 'MUCH', 'SOLEMN', 'DELIBERATION'] +1320-122617-0004-296: hyp=['THE', 'INGENIOUS', 'HAWKEYE', 'WHO', 'RECALLED', 'THE', 'HASTY', 'MANNER', 'IN', 'WHICH', 'THE', 'OTHER', 'HAD', 'ABANDONED', 'HIS', 'POST', 'AT', 'THE', 'BEDSIDE', 'OF', 'THE', 'SICK', 'WOMAN', 'WAS', 'NOT', 'WITHOUT', 'HIS', 'SUSPICIONS', 'CONCERNING', 'THE', 'SUBJECT', 'OF', 'SO', 'MUCH', 'SOLEMN', 'DELIBERATION'] +1320-122617-0005-297: ref=['THE', 'BEAR', 'SHOOK', 'HIS', 'SHAGGY', 'SIDES', 'AND', 'THEN', 'A', 'WELL', 'KNOWN', 'VOICE', 'REPLIED'] +1320-122617-0005-297: hyp=['THE', 'BEAR', 'SHOOK', 'HIS', 'SHAGGY', 'SIDES', 'AND', 'THEN', 'A', 'WELL', 'KNOWN', 'VOICE', 'REPLIED'] +1320-122617-0006-298: ref=['CAN', 'THESE', 'THINGS', 'BE', 'RETURNED', 'DAVID', 'BREATHING', 'MORE', 'FREELY', 'AS', 'THE', 'TRUTH', 'BEGAN', 'TO', 'DAWN', 'UPON', 'HIM'] +1320-122617-0006-298: hyp=['CAN', 'THESE', 'THINGS', 'BE', 'RETURNED', 'DAVID', 'BREATHING', 'MORE', 'FREELY', 'AS', 'THE', 'TRUTH', 'BEGAN', 'TO', 'DAWN', 'UPON', 'HIM'] +1320-122617-0007-299: ref=['COME', 'COME', 'RETURNED', 'HAWKEYE', 'UNCASING', 'HIS', 'HONEST', 'COUNTENANCE', 'THE', 'BETTER', 'TO', 'ASSURE', 'THE', 'WAVERING', 'CONFIDENCE', 'OF', 'HIS', 'COMPANION', 'YOU', 'MAY', 'SEE', 'A', 'SKIN', 'WHICH', 'IF', 'IT', 'BE', 'NOT', 'AS', 'WHITE', 'AS', 'ONE', 'OF', 'THE', 'GENTLE', 'ONES', 'HAS', 'NO', 'TINGE', 'OF', 'RED', 'TO', 'IT', 'THAT', 'THE', 'WINDS', 'OF', 'THE', 'HEAVEN', 'AND', 'THE', 'SUN', 'HAVE', 'NOT', 'BESTOWED', 'NOW', 'LET', 'US', 'TO', 'BUSINESS'] +1320-122617-0007-299: hyp=['COME', 'COME', 'RETURNED', 'HAWKEYE', 'UNCASING', 'HIS', 'HONEST', 'COUNTENANCE', 'THE', 'BETTER', 'TO', 'ASSURE', 'THE', 'WAVERING', 'CONFIDENCE', 'OF', 'HIS', 'COMPANION', 'YOU', 'MAY', 'SEE', 'A', 'SKIN', 'WHICH', 'IF', 'IT', 'BE', 'NOT', 'AS', 'WHITE', 'AS', 'ONE', 'OF', 'THE', 'GENTLE', 'ONES', 'HAS', 'NO', 'TINGE', 'OF', 'RED', 'TO', 'IT', 'THAT', 'THE', 'WINDS', 'OF', 'THE', 'HEAVEN', 'AND', 'THE', 'SUN', 'HAVE', 'NOT', 'BESTOWED', 'NOW', 'LET', 'US', 'TO', 'BUSINESS'] +1320-122617-0008-300: ref=['THE', 'YOUNG', 'MAN', 'IS', 'IN', 'BONDAGE', 'AND', 'MUCH', 'I', 'FEAR', 'HIS', 'DEATH', 'IS', 'DECREED'] +1320-122617-0008-300: hyp=['THE', 'YOUNG', 'MAN', 'IS', 'IN', 'BONDAGE', 'AND', 'MUCH', 'I', 'FEAR', 'HIS', 'DEATH', 'IS', 'DECREED'] +1320-122617-0009-301: ref=['I', 'GREATLY', 'MOURN', 'THAT', 'ONE', 'SO', 'WELL', 'DISPOSED', 'SHOULD', 'DIE', 'IN', 'HIS', 'IGNORANCE', 'AND', 'I', 'HAVE', 'SOUGHT', 'A', 'GOODLY', 'HYMN', 'CAN', 'YOU', 'LEAD', 'ME', 'TO', 'HIM'] +1320-122617-0009-301: hyp=['I', 'GREATLY', 'MOURN', 'THAT', 'ONE', 'SO', 'WELL', 'DISPOSED', 'SHOULD', 'DIE', 'IN', 'HIS', 'IGNORANCE', 'AND', 'I', 'HAVE', 'SOUGHT', 'A', 'GOODLY', 'HYMN', 'CAN', 'YOU', 'LEAD', 'ME', 'TO', 'HIM'] +1320-122617-0010-302: ref=['THE', 'TASK', 'WILL', 'NOT', 'BE', 'DIFFICULT', 'RETURNED', 'DAVID', 'HESITATING', 'THOUGH', 'I', 'GREATLY', 'FEAR', 'YOUR', 'PRESENCE', 'WOULD', 'RATHER', 'INCREASE', 'THAN', 'MITIGATE', 'HIS', 'UNHAPPY', 'FORTUNES'] +1320-122617-0010-302: hyp=['THE', 'TASK', 'WILL', 'NOT', 'BE', 'DIFFICULT', 'RETURNED', 'DAVID', 'HESITATING', 'THOUGH', 'I', 'GREATLY', 'FEAR', 'YOUR', 'PRESENCE', 'WOULD', 'RATHER', 'INCREASE', 'THAN', 'MITIGATE', 'HIS', 'UNHAPPY', 'FORTUNES'] +1320-122617-0011-303: ref=['THE', 'LODGE', 'IN', 'WHICH', 'UNCAS', 'WAS', 'CONFINED', 'WAS', 'IN', 'THE', 'VERY', 'CENTER', 'OF', 'THE', 'VILLAGE', 'AND', 'IN', 'A', 'SITUATION', 'PERHAPS', 'MORE', 'DIFFICULT', 'THAN', 'ANY', 'OTHER', 'TO', 'APPROACH', 'OR', 'LEAVE', 'WITHOUT', 'OBSERVATION'] +1320-122617-0011-303: hyp=['THE', 'LODGE', 'IN', 'WHICH', 'UNCAS', 'WAS', 'CONFINED', 'WAS', 'IN', 'THE', 'VERY', 'CENTER', 'OF', 'THE', 'VILLAGE', 'AND', 'IN', 'A', 'SITUATION', 'PERHAPS', 'MORE', 'DIFFICULT', 'THAN', 'ANY', 'OTHER', 'TO', 'APPROACH', 'OR', 'LEAVE', 'WITHOUT', 'OBSERVATION'] +1320-122617-0012-304: ref=['FOUR', 'OR', 'FIVE', 'OF', 'THE', 'LATTER', 'ONLY', 'LINGERED', 'ABOUT', 'THE', 'DOOR', 'OF', 'THE', 'PRISON', 'OF', 'UNCAS', 'WARY', 'BUT', 'CLOSE', 'OBSERVERS', 'OF', 'THE', 'MANNER', 'OF', 'THEIR', 'CAPTIVE'] +1320-122617-0012-304: hyp=['FOUR', 'OR', 'FIVE', 'OF', 'THE', 'LATTER', 'ONLY', 'LINGERED', 'ABOUT', 'THE', 'DOOR', 'OF', 'THE', 'PRISON', 'OF', 'UNCAS', 'WARY', 'BUT', 'CLOSE', 'OBSERVERS', 'OF', 'THE', 'MANNER', 'OF', 'THEIR', 'CAPTIVE'] +1320-122617-0013-305: ref=['DELIVERED', 'IN', 'A', 'STRONG', 'TONE', 'OF', 'ASSENT', 'ANNOUNCED', 'THE', 'GRATIFICATION', 'THE', 'SAVAGE', 'WOULD', 'RECEIVE', 'IN', 'WITNESSING', 'SUCH', 'AN', 'EXHIBITION', 'OF', 'WEAKNESS', 'IN', 'AN', 'ENEMY', 'SO', 'LONG', 'HATED', 'AND', 'SO', 'MUCH', 'FEARED'] +1320-122617-0013-305: hyp=['DELIVERED', 'IN', 'A', 'STRONG', 'TONE', 'OF', 'ASSENT', 'ANNOUNCED', 'THE', 'GRATIFICATION', 'THE', 'SAVAGE', 'WOULD', 'RECEIVE', 'IN', 'WITNESSING', 'SUCH', 'AN', 'EXHIBITION', 'OF', 'WEAKNESS', 'IN', 'AN', 'ENEMY', 'SO', 'LONG', 'HATED', 'AND', 'SO', 'MUCH', 'FEARED'] +1320-122617-0014-306: ref=['THEY', 'DREW', 'BACK', 'A', 'LITTLE', 'FROM', 'THE', 'ENTRANCE', 'AND', 'MOTIONED', 'TO', 'THE', 'SUPPOSED', 'CONJURER', 'TO', 'ENTER'] +1320-122617-0014-306: hyp=['THEY', 'DREW', 'BACK', 'A', 'LITTLE', 'FROM', 'THE', 'ENTRANCE', 'AND', 'MOTIONED', 'TO', 'THE', 'SUPPOSED', 'CONJUROR', 'TO', 'ENTER'] +1320-122617-0015-307: ref=['BUT', 'THE', 'BEAR', 'INSTEAD', 'OF', 'OBEYING', 'MAINTAINED', 'THE', 'SEAT', 'IT', 'HAD', 'TAKEN', 'AND', 'GROWLED'] +1320-122617-0015-307: hyp=['BUT', 'THE', 'BEAR', 'INSTEAD', 'OF', 'OBEYING', 'MAINTAINED', 'THE', 'SEAT', 'IT', 'HAD', 'TAKEN', 'AND', 'GROWLED'] +1320-122617-0016-308: ref=['THE', 'CUNNING', 'MAN', 'IS', 'AFRAID', 'THAT', 'HIS', 'BREATH', 'WILL', 'BLOW', 'UPON', 'HIS', 'BROTHERS', 'AND', 'TAKE', 'AWAY', 'THEIR', 'COURAGE', 'TOO', 'CONTINUED', 'DAVID', 'IMPROVING', 'THE', 'HINT', 'HE', 'RECEIVED', 'THEY', 'MUST', 'STAND', 'FURTHER', 'OFF'] +1320-122617-0016-308: hyp=['THE', 'CUNNING', 'MAN', 'IS', 'AFRAID', 'THAT', 'HIS', 'BREATH', 'WILL', 'BLOW', 'UPON', 'HIS', 'BROTHERS', 'AND', 'TAKE', 'AWAY', 'THEIR', 'COURAGE', 'TOO', 'CONTINUED', 'DAVID', 'IMPROVING', 'THE', 'HINT', 'HE', 'RECEIVED', 'THEY', 'MUST', 'STAND', 'FURTHER', 'OFF'] +1320-122617-0017-309: ref=['THEN', 'AS', 'IF', 'SATISFIED', 'OF', 'THEIR', 'SAFETY', 'THE', 'SCOUT', 'LEFT', 'HIS', 'POSITION', 'AND', 'SLOWLY', 'ENTERED', 'THE', 'PLACE'] +1320-122617-0017-309: hyp=['THEN', 'AS', 'IF', 'SATISFIED', 'OF', 'THEIR', 'SAFETY', 'THE', 'SCOUT', 'LEFT', 'HIS', 'POSITION', 'AND', 'SLOWLY', 'ENTERED', 'THE', 'PLACE'] +1320-122617-0018-310: ref=['IT', 'WAS', 'SILENT', 'AND', 'GLOOMY', 'BEING', 'TENANTED', 'SOLELY', 'BY', 'THE', 'CAPTIVE', 'AND', 'LIGHTED', 'BY', 'THE', 'DYING', 'EMBERS', 'OF', 'A', 'FIRE', 'WHICH', 'HAD', 'BEEN', 'USED', 'FOR', 'THE', 'PURPOSED', 'OF', 'COOKERY'] +1320-122617-0018-310: hyp=['IT', 'WAS', 'SILENT', 'AND', 'GLOOMY', 'BEING', 'TENANTED', 'SOLELY', 'BY', 'THE', 'CAPTIVE', 'AND', 'LIGHTED', 'BY', 'THE', 'DYING', 'EMBERS', 'OF', 'A', 'FIRE', 'WHICH', 'HAD', 'BEEN', 'USED', 'FOR', 'THE', 'PURPOSE', 'OF', 'COOKERY'] +1320-122617-0019-311: ref=['UNCAS', 'OCCUPIED', 'A', 'DISTANT', 'CORNER', 'IN', 'A', 'RECLINING', 'ATTITUDE', 'BEING', 'RIGIDLY', 'BOUND', 'BOTH', 'HANDS', 'AND', 'FEET', 'BY', 'STRONG', 'AND', 'PAINFUL', 'WITHES'] +1320-122617-0019-311: hyp=['UNCAS', 'OCCUPIED', 'A', 'DISTANT', 'CORNER', 'IN', 'A', 'RECLINING', 'ATTITUDE', 'BEING', 'RIGIDLY', 'BOUND', 'BOTH', 'HANDS', 'AND', 'FEET', 'BY', 'STRONG', 'AND', 'PAINFUL', 'WITHES'] +1320-122617-0020-312: ref=['THE', 'SCOUT', 'WHO', 'HAD', 'LEFT', 'DAVID', 'AT', 'THE', 'DOOR', 'TO', 'ASCERTAIN', 'THEY', 'WERE', 'NOT', 'OBSERVED', 'THOUGHT', 'IT', 'PRUDENT', 'TO', 'PRESERVE', 'HIS', 'DISGUISE', 'UNTIL', 'ASSURED', 'OF', 'THEIR', 'PRIVACY'] +1320-122617-0020-312: hyp=['THE', 'SCOUT', 'WHO', 'HAD', 'LEFT', 'DAVID', 'AT', 'THE', 'DOOR', 'TO', 'ASCERTAIN', 'THEY', 'WERE', 'NOT', 'OBSERVED', 'THOUGHT', 'IT', 'PRUDENT', 'TO', 'PRESERVE', 'HIS', 'DISGUISE', 'UNTIL', 'ASSURED', 'OF', 'THEIR', 'PRIVACY'] +1320-122617-0021-313: ref=['WHAT', 'SHALL', 'WE', 'DO', 'WITH', 'THE', 'MINGOES', 'AT', 'THE', 'DOOR', 'THEY', 'COUNT', 'SIX', 'AND', 'THIS', 'SINGER', 'IS', 'AS', 'GOOD', 'AS', 'NOTHING'] +1320-122617-0021-313: hyp=['WHAT', 'SHALL', 'WE', 'DO', 'WITH', 'THE', 'MINGOES', 'AT', 'THE', 'DOOR', 'THEY', 'COUNT', 'SIX', 'AND', 'THE', 'SINGER', 'IS', 'AS', 'GOOD', 'AS', 'NOTHING'] +1320-122617-0022-314: ref=['THE', 'DELAWARES', 'ARE', 'CHILDREN', 'OF', 'THE', 'TORTOISE', 'AND', 'THEY', 'OUTSTRIP', 'THE', 'DEER'] +1320-122617-0022-314: hyp=['THE', 'DELAWARES', 'ARE', 'CHILDREN', 'OF', 'THE', 'TORTOISE', 'AND', 'THEY', 'OUTSTRIP', 'THE', 'DEER'] +1320-122617-0023-315: ref=['UNCAS', 'WHO', 'HAD', 'ALREADY', 'APPROACHED', 'THE', 'DOOR', 'IN', 'READINESS', 'TO', 'LEAD', 'THE', 'WAY', 'NOW', 'RECOILED', 'AND', 'PLACED', 'HIMSELF', 'ONCE', 'MORE', 'IN', 'THE', 'BOTTOM', 'OF', 'THE', 'LODGE'] +1320-122617-0023-315: hyp=['UNCAS', 'WHO', 'HAD', 'ALREADY', 'APPROACHED', 'THE', 'DOOR', 'IN', 'READINESS', 'TO', 'LEAD', 'THE', 'WAY', 'NOW', 'RECOILED', 'AND', 'PLACED', 'HIMSELF', 'ONCE', 'MORE', 'IN', 'THE', 'BOTTOM', 'OF', 'THE', 'LODGE'] +1320-122617-0024-316: ref=['BUT', 'HAWKEYE', 'WHO', 'WAS', 'TOO', 'MUCH', 'OCCUPIED', 'WITH', 'HIS', 'OWN', 'THOUGHTS', 'TO', 'NOTE', 'THE', 'MOVEMENT', 'CONTINUED', 'SPEAKING', 'MORE', 'TO', 'HIMSELF', 'THAN', 'TO', 'HIS', 'COMPANION'] +1320-122617-0024-316: hyp=['BUT', 'HAWKEYE', 'WHO', 'WAS', 'TOO', 'MUCH', 'OCCUPIED', 'WITH', 'HIS', 'OWN', 'THOUGHTS', 'TO', 'NOTE', 'THE', 'MOVEMENT', 'CONTINUED', 'SPEAKING', 'MORE', 'TO', 'HIMSELF', 'THAN', 'TO', 'HIS', 'COMPANION'] +1320-122617-0025-317: ref=['SO', 'UNCAS', 'YOU', 'HAD', 'BETTER', 'TAKE', 'THE', 'LEAD', 'WHILE', 'I', 'WILL', 'PUT', 'ON', 'THE', 'SKIN', 'AGAIN', 'AND', 'TRUST', 'TO', 'CUNNING', 'FOR', 'WANT', 'OF', 'SPEED'] +1320-122617-0025-317: hyp=['SO', 'UNCAS', 'YOU', 'HAD', 'BETTER', 'TAKE', 'THE', 'LEAD', 'WHILE', 'I', 'WILL', 'PUT', 'ON', 'THE', 'SKIN', 'AGAIN', 'AND', 'TRUST', 'TO', 'CUNNING', 'FOR', 'WANT', 'OF', 'SPEED'] +1320-122617-0026-318: ref=['WELL', 'WHAT', "CAN'T", 'BE', 'DONE', 'BY', 'MAIN', 'COURAGE', 'IN', 'WAR', 'MUST', 'BE', 'DONE', 'BY', 'CIRCUMVENTION'] +1320-122617-0026-318: hyp=['WELL', 'WHAT', "CAN'T", 'BE', 'DONE', 'BY', 'MAIN', 'COURAGE', 'IN', 'WAR', 'MUST', 'BE', 'DONE', 'BY', 'CIRCUMVENTION'] +1320-122617-0027-319: ref=['AS', 'SOON', 'AS', 'THESE', 'DISPOSITIONS', 'WERE', 'MADE', 'THE', 'SCOUT', 'TURNED', 'TO', 'DAVID', 'AND', 'GAVE', 'HIM', 'HIS', 'PARTING', 'INSTRUCTIONS'] +1320-122617-0027-319: hyp=['AS', 'SOON', 'AS', 'THESE', 'DISPOSITIONS', 'WERE', 'MADE', 'THE', 'SCOUT', 'TURNED', 'TO', 'DAVID', 'AND', 'GAVE', 'HIM', 'HIS', 'PARTING', 'INSTRUCTIONS'] +1320-122617-0028-320: ref=['MY', 'PURSUITS', 'ARE', 'PEACEFUL', 'AND', 'MY', 'TEMPER', 'I', 'HUMBLY', 'TRUST', 'IS', 'GREATLY', 'GIVEN', 'TO', 'MERCY', 'AND', 'LOVE', 'RETURNED', 'DAVID', 'A', 'LITTLE', 'NETTLED', 'AT', 'SO', 'DIRECT', 'AN', 'ATTACK', 'ON', 'HIS', 'MANHOOD', 'BUT', 'THERE', 'ARE', 'NONE', 'WHO', 'CAN', 'SAY', 'THAT', 'I', 'HAVE', 'EVER', 'FORGOTTEN', 'MY', 'FAITH', 'IN', 'THE', 'LORD', 'EVEN', 'IN', 'THE', 'GREATEST', 'STRAITS'] +1320-122617-0028-320: hyp=['MY', 'PURSUITS', 'ARE', 'PEACEFUL', 'AND', 'MY', 'TEMPER', 'I', 'HUMBLY', 'TRUST', 'IS', 'GREATLY', 'GIVEN', 'TO', 'MERCY', 'AND', 'LOVE', 'RETURNED', 'DAVID', 'A', 'LITTLE', 'NETTLED', 'AT', 'SO', 'DIRECT', 'AN', 'ATTACK', 'ON', 'HIS', 'MANHOOD', 'BUT', 'THERE', 'ARE', 'NONE', 'WHO', 'CAN', 'SAY', 'THAT', 'I', 'HAVE', 'EVER', 'FORGOTTEN', 'MY', 'FAITH', 'IN', 'THE', 'LORD', 'EVEN', 'IN', 'THE', 'GREATEST', 'STRAITS'] +1320-122617-0029-321: ref=['IF', 'YOU', 'ARE', 'NOT', 'THEN', 'KNOCKED', 'ON', 'THE', 'HEAD', 'YOUR', 'BEING', 'A', 'NON', 'COMPOSSER', 'WILL', 'PROTECT', 'YOU', 'AND', "YOU'LL", 'THEN', 'HAVE', 'A', 'GOOD', 'REASON', 'TO', 'EXPECT', 'TO', 'DIE', 'IN', 'YOUR', 'BED'] +1320-122617-0029-321: hyp=['IF', 'YOU', 'ARE', 'NOT', 'THEN', 'KNOCKED', 'ON', 'THE', 'HEAD', 'YOUR', 'BEING', 'A', 'NON', 'COMPOSOR', 'WILL', 'PROTECT', 'YOU', 'AND', "YOU'LL", 'THEN', 'HAVE', 'GOOD', 'REASON', 'TO', 'EXPECT', 'TO', 'DIE', 'IN', 'YOUR', 'BED'] +1320-122617-0030-322: ref=['SO', 'CHOOSE', 'FOR', 'YOURSELF', 'TO', 'MAKE', 'A', 'RUSH', 'OR', 'TARRY', 'HERE'] +1320-122617-0030-322: hyp=['SO', 'CHOOSE', 'FOR', 'YOURSELF', 'TO', 'MAKE', 'A', 'RUSH', 'OR', 'TARRY', 'HERE'] +1320-122617-0031-323: ref=['BRAVELY', 'AND', 'GENEROUSLY', 'HAS', 'HE', 'BATTLED', 'IN', 'MY', 'BEHALF', 'AND', 'THIS', 'AND', 'MORE', 'WILL', 'I', 'DARE', 'IN', 'HIS', 'SERVICE'] +1320-122617-0031-323: hyp=['BRAVELY', 'AND', 'GENEROUSLY', 'HAS', 'HE', 'BATTLED', 'IN', 'MY', 'BEHALF', 'AND', 'THIS', 'AND', 'MORE', 'WILL', 'I', 'DARE', 'IN', 'HIS', 'SERVICE'] +1320-122617-0032-324: ref=['KEEP', 'SILENT', 'AS', 'LONG', 'AS', 'MAY', 'BE', 'AND', 'IT', 'WOULD', 'BE', 'WISE', 'WHEN', 'YOU', 'DO', 'SPEAK', 'TO', 'BREAK', 'OUT', 'SUDDENLY', 'IN', 'ONE', 'OF', 'YOUR', 'SHOUTINGS', 'WHICH', 'WILL', 'SERVE', 'TO', 'REMIND', 'THE', 'INDIANS', 'THAT', 'YOU', 'ARE', 'NOT', 'ALTOGETHER', 'AS', 'RESPONSIBLE', 'AS', 'MEN', 'SHOULD', 'BE'] +1320-122617-0032-324: hyp=['KEEP', 'SILENT', 'AS', 'LONG', 'AS', 'MAY', 'BE', 'AND', 'IT', 'WOULD', 'BE', 'WISE', 'WHEN', 'YOU', 'DO', 'SPEAK', 'TO', 'BREAK', 'OUT', 'SUDDENLY', 'IN', 'ONE', 'OF', 'YOUR', 'SHOUTINGS', 'WHICH', 'WILL', 'SERVE', 'TO', 'REMIND', 'THE', 'INDIANS', 'THAT', 'YOU', 'ARE', 'NOT', 'ALTOGETHER', 'AS', 'RESPONSIBLE', 'AS', 'MEN', 'SHOULD', 'BE'] +1320-122617-0033-325: ref=['IF', 'HOWEVER', 'THEY', 'TAKE', 'YOUR', 'SCALP', 'AS', 'I', 'TRUST', 'AND', 'BELIEVE', 'THEY', 'WILL', 'NOT', 'DEPEND', 'ON', 'IT', 'UNCAS', 'AND', 'I', 'WILL', 'NOT', 'FORGET', 'THE', 'DEED', 'BUT', 'REVENGE', 'IT', 'AS', 'BECOMES', 'TRUE', 'WARRIORS', 'AND', 'TRUSTY', 'FRIENDS'] +1320-122617-0033-325: hyp=['IF', 'HOWEVER', 'THEY', 'TAKE', 'YOUR', 'SCALP', 'AS', 'I', 'TRUST', 'AND', 'BELIEVE', 'THEY', 'WILL', 'NOT', 'DEPEND', 'UPON', 'IT', 'UNCAS', 'AND', 'I', 'WILL', 'NOT', 'FORGET', 'THE', 'DEED', 'BUT', 'REVENGE', 'IT', 'AS', 'BECOMES', 'TRUE', 'WARRIORS', 'AND', 'TRUSTY', 'FRIENDS'] +1320-122617-0034-326: ref=['HOLD', 'SAID', 'DAVID', 'PERCEIVING', 'THAT', 'WITH', 'THIS', 'ASSURANCE', 'THEY', 'WERE', 'ABOUT', 'TO', 'LEAVE', 'HIM', 'I', 'AM', 'AN', 'UNWORTHY', 'AND', 'HUMBLE', 'FOLLOWER', 'OF', 'ONE', 'WHO', 'TAUGHT', 'NOT', 'THE', 'DAMNABLE', 'PRINCIPLE', 'OF', 'REVENGE'] +1320-122617-0034-326: hyp=['HOLD', 'SAID', 'DAVID', 'PERCEIVING', 'THAT', 'WITH', 'THIS', 'ASSURANCE', 'THEY', 'WERE', 'ABOUT', 'TO', 'LEAVE', 'HIM', 'I', 'AM', 'AN', 'UNWORTHY', 'AND', 'HUMBLE', 'FOLLOWER', 'OF', 'ONE', 'WHO', 'TAUGHT', 'NOT', 'THE', 'DAMNABLE', 'PRINCIPLE', 'OF', 'REVENGE'] +1320-122617-0035-327: ref=['THEN', 'HEAVING', 'A', 'HEAVY', 'SIGH', 'PROBABLY', 'AMONG', 'THE', 'LAST', 'HE', 'EVER', 'DREW', 'IN', 'PINING', 'FOR', 'A', 'CONDITION', 'HE', 'HAD', 'SO', 'LONG', 'ABANDONED', 'HE', 'ADDED', 'IT', 'IS', 'WHAT', 'I', 'WOULD', 'WISH', 'TO', 'PRACTISE', 'MYSELF', 'AS', 'ONE', 'WITHOUT', 'A', 'CROSS', 'OF', 'BLOOD', 'THOUGH', 'IT', 'IS', 'NOT', 'ALWAYS', 'EASY', 'TO', 'DEAL', 'WITH', 'AN', 'INDIAN', 'AS', 'YOU', 'WOULD', 'WITH', 'A', 'FELLOW', 'CHRISTIAN'] +1320-122617-0035-327: hyp=['THEN', 'HEAVING', 'A', 'HEAVY', 'SIGH', 'PROBABLY', 'AMONG', 'THE', 'LAST', 'HE', 'EVER', 'DREW', 'AND', 'PINING', 'FOR', 'A', 'CONDITION', 'HE', 'HAD', 'SO', 'LONG', 'ABANDONED', 'HE', 'ADDED', 'IT', 'IS', 'WHAT', 'I', 'WOULD', 'WISH', 'TO', 'PRACTISE', 'MYSELF', 'AS', 'ONE', 'WITHOUT', 'A', 'CROSS', 'OF', 'BLOOD', 'THOUGH', 'IT', 'IS', 'NOT', 'ALWAYS', 'EASY', 'TO', 'DEAL', 'WITH', 'AN', 'INDIAN', 'AS', 'YOU', 'WOULD', 'WITH', 'A', 'FELLOW', 'CHRISTIAN'] +1320-122617-0036-328: ref=['GOD', 'BLESS', 'YOU', 'FRIEND', 'I', 'DO', 'BELIEVE', 'YOUR', 'SCENT', 'IS', 'NOT', 'GREATLY', 'WRONG', 'WHEN', 'THE', 'MATTER', 'IS', 'DULY', 'CONSIDERED', 'AND', 'KEEPING', 'ETERNITY', 'BEFORE', 'THE', 'EYES', 'THOUGH', 'MUCH', 'DEPENDS', 'ON', 'THE', 'NATURAL', 'GIFTS', 'AND', 'THE', 'FORCE', 'OF', 'TEMPTATION'] +1320-122617-0036-328: hyp=['GOD', 'BLESS', 'YOU', 'FRIEND', 'I', 'DO', 'BELIEVE', 'YOUR', 'SCENT', 'IS', 'NOT', 'GREATLY', 'WRONG', 'WHEN', 'THE', 'MATTER', 'IS', 'DULY', 'CONSIDERED', 'AND', 'KEEPING', 'ETERNITY', 'BEFORE', 'THE', 'EYES', 'THOUGH', 'MUCH', 'DEPENDS', 'ON', 'THE', 'NATURAL', 'GIFTS', 'AND', 'THE', 'FORCE', 'OF', 'TEMPTATION'] +1320-122617-0037-329: ref=['THE', 'DELAWARE', 'DOG', 'HE', 'SAID', 'LEANING', 'FORWARD', 'AND', 'PEERING', 'THROUGH', 'THE', 'DIM', 'LIGHT', 'TO', 'CATCH', 'THE', 'EXPRESSION', 'OF', 'THE', "OTHER'S", 'FEATURES', 'IS', 'HE', 'AFRAID'] +1320-122617-0037-329: hyp=['THE', 'DELAWARE', 'DOG', 'HE', 'SAID', 'LEANING', 'FORWARD', 'AND', 'PEERING', 'THROUGH', 'THE', 'DIM', 'LIGHT', 'TO', 'CATCH', 'THE', 'EXPRESSION', 'OF', 'THE', "OTHER'S", 'FEATURES', 'IS', 'HE', 'AFRAID'] +1320-122617-0038-330: ref=['WILL', 'THE', 'HURONS', 'HEAR', 'HIS', 'GROANS'] +1320-122617-0038-330: hyp=['WILL', 'THE', 'HURONS', 'HEAR', 'HIS', 'GROANS'] +1320-122617-0039-331: ref=['THE', 'MOHICAN', 'STARTED', 'ON', 'HIS', 'FEET', 'AND', 'SHOOK', 'HIS', 'SHAGGY', 'COVERING', 'AS', 'THOUGH', 'THE', 'ANIMAL', 'HE', 'COUNTERFEITED', 'WAS', 'ABOUT', 'TO', 'MAKE', 'SOME', 'DESPERATE', 'EFFORT'] +1320-122617-0039-331: hyp=['THE', 'MOHICAN', 'STARTED', 'ON', 'HIS', 'FEET', 'AND', 'SHOOK', 'HIS', 'SHAGGY', 'COVERING', 'AS', 'THOUGH', 'THE', 'ANIMAL', 'HE', 'COUNTERFEITED', 'WAS', 'ABOUT', 'TO', 'MAKE', 'SOME', 'DESPERATE', 'EFFORT'] +1320-122617-0040-332: ref=['HE', 'HAD', 'NO', 'OCCASION', 'TO', 'DELAY', 'FOR', 'AT', 'THE', 'NEXT', 'INSTANT', 'A', 'BURST', 'OF', 'CRIES', 'FILLED', 'THE', 'OUTER', 'AIR', 'AND', 'RAN', 'ALONG', 'THE', 'WHOLE', 'EXTENT', 'OF', 'THE', 'VILLAGE'] +1320-122617-0040-332: hyp=['HE', 'HAD', 'NO', 'OCCASION', 'TO', 'DELAY', 'FOR', 'AT', 'THE', 'NEXT', 'INSTANT', 'A', 'BURST', 'OF', 'CRIES', 'FILLED', 'THE', 'OUTER', 'AIR', 'AND', 'RAN', 'ALONG', 'THE', 'WHOLE', 'EXTENT', 'OF', 'THE', 'VILLAGE'] +1320-122617-0041-333: ref=['UNCAS', 'CAST', 'HIS', 'SKIN', 'AND', 'STEPPED', 'FORTH', 'IN', 'HIS', 'OWN', 'BEAUTIFUL', 'PROPORTIONS'] +1320-122617-0041-333: hyp=['UNCAS', 'CAST', 'HIS', 'SKIN', 'AND', 'STEPPED', 'FORTH', 'IN', 'HIS', 'OWN', 'BEAUTIFUL', 'PROPORTIONS'] +1580-141083-0000-334: ref=['I', 'WILL', 'ENDEAVOUR', 'IN', 'MY', 'STATEMENT', 'TO', 'AVOID', 'SUCH', 'TERMS', 'AS', 'WOULD', 'SERVE', 'TO', 'LIMIT', 'THE', 'EVENTS', 'TO', 'ANY', 'PARTICULAR', 'PLACE', 'OR', 'GIVE', 'A', 'CLUE', 'AS', 'TO', 'THE', 'PEOPLE', 'CONCERNED'] +1580-141083-0000-334: hyp=['I', 'WILL', 'ENDEAVOUR', 'IN', 'MY', 'STATEMENT', 'TO', 'AVOID', 'SUCH', 'TERMS', 'AS', 'WOULD', 'SERVE', 'TO', 'LIMIT', 'THE', 'EVENTS', 'TO', 'ANY', 'PARTICULAR', 'PLACE', 'OR', 'GIVE', 'A', 'CLUE', 'AS', 'TO', 'THE', 'PEOPLE', 'CONCERNED'] +1580-141083-0001-335: ref=['I', 'HAD', 'ALWAYS', 'KNOWN', 'HIM', 'TO', 'BE', 'RESTLESS', 'IN', 'HIS', 'MANNER', 'BUT', 'ON', 'THIS', 'PARTICULAR', 'OCCASION', 'HE', 'WAS', 'IN', 'SUCH', 'A', 'STATE', 'OF', 'UNCONTROLLABLE', 'AGITATION', 'THAT', 'IT', 'WAS', 'CLEAR', 'SOMETHING', 'VERY', 'UNUSUAL', 'HAD', 'OCCURRED'] +1580-141083-0001-335: hyp=['I', 'HAD', 'ALWAYS', 'KNOWN', 'HIM', 'TO', 'BE', 'RESTLESS', 'IN', 'HIS', 'MANNER', 'BUT', 'ON', 'THIS', 'PARTICULAR', 'OCCASION', 'HE', 'WAS', 'IN', 'SUCH', 'A', 'STATE', 'OF', 'UNCONTROLLABLE', 'AGITATION', 'THAT', 'IT', 'WAS', 'CLEAR', 'SOMETHING', 'VERY', 'UNUSUAL', 'HAD', 'OCCURRED'] +1580-141083-0002-336: ref=['MY', "FRIEND'S", 'TEMPER', 'HAD', 'NOT', 'IMPROVED', 'SINCE', 'HE', 'HAD', 'BEEN', 'DEPRIVED', 'OF', 'THE', 'CONGENIAL', 'SURROUNDINGS', 'OF', 'BAKER', 'STREET'] +1580-141083-0002-336: hyp=['MY', "FRIEND'S", 'TEMPER', 'HAD', 'NOT', 'IMPROVED', 'SINCE', 'HE', 'HAD', 'BEEN', 'DEPRIVED', 'OF', 'THE', 'CONGENIAL', 'SURROUNDINGS', 'OF', 'BAKER', 'STREET'] +1580-141083-0003-337: ref=['WITHOUT', 'HIS', 'SCRAPBOOKS', 'HIS', 'CHEMICALS', 'AND', 'HIS', 'HOMELY', 'UNTIDINESS', 'HE', 'WAS', 'AN', 'UNCOMFORTABLE', 'MAN'] +1580-141083-0003-337: hyp=['WITHOUT', 'HIS', 'SCRAP', 'BOOKS', 'HIS', 'CHEMICALS', 'AND', 'HIS', 'HOMELY', 'UNTIDINESS', 'HE', 'WAS', 'AN', 'UNCOMFORTABLE', 'MAN'] +1580-141083-0004-338: ref=['I', 'HAD', 'TO', 'READ', 'IT', 'OVER', 'CAREFULLY', 'AS', 'THE', 'TEXT', 'MUST', 'BE', 'ABSOLUTELY', 'CORRECT'] +1580-141083-0004-338: hyp=['I', 'HAD', 'TO', 'READ', 'IT', 'OVER', 'CAREFULLY', 'AS', 'THE', 'TEXT', 'MUST', 'BE', 'ABSOLUTELY', 'CORRECT'] +1580-141083-0005-339: ref=['I', 'WAS', 'ABSENT', 'RATHER', 'MORE', 'THAN', 'AN', 'HOUR'] +1580-141083-0005-339: hyp=['I', 'WAS', 'ABSENT', 'RATHER', 'MORE', 'THAN', 'AN', 'HOUR'] +1580-141083-0006-340: ref=['THE', 'ONLY', 'DUPLICATE', 'WHICH', 'EXISTED', 'SO', 'FAR', 'AS', 'I', 'KNEW', 'WAS', 'THAT', 'WHICH', 'BELONGED', 'TO', 'MY', 'SERVANT', 'BANNISTER', 'A', 'MAN', 'WHO', 'HAS', 'LOOKED', 'AFTER', 'MY', 'ROOM', 'FOR', 'TEN', 'YEARS', 'AND', 'WHOSE', 'HONESTY', 'IS', 'ABSOLUTELY', 'ABOVE', 'SUSPICION'] +1580-141083-0006-340: hyp=['THE', 'ONLY', 'DUPLICATE', 'WHICH', 'EXISTED', 'SO', 'FAR', 'AS', 'I', 'KNEW', 'WAS', 'THAT', 'WHICH', 'BELONGED', 'TO', 'MY', 'SERVANT', 'BANNISTER', 'A', 'MAN', 'WHO', 'HAS', 'LOOKED', 'AFTER', 'MY', 'ROOM', 'FOR', 'TEN', 'YEARS', 'AND', 'WHOSE', 'HONESTY', 'IS', 'ABSOLUTELY', 'ABOVE', 'SUSPICION'] +1580-141083-0007-341: ref=['THE', 'MOMENT', 'I', 'LOOKED', 'AT', 'MY', 'TABLE', 'I', 'WAS', 'AWARE', 'THAT', 'SOMEONE', 'HAD', 'RUMMAGED', 'AMONG', 'MY', 'PAPERS'] +1580-141083-0007-341: hyp=['THE', 'MOMENT', 'I', 'LOOKED', 'AT', 'MY', 'TABLE', 'I', 'WAS', 'AWARE', 'THAT', 'SOME', 'ONE', 'HAD', 'RUMMAGED', 'AMONG', 'MY', 'PAPERS'] +1580-141083-0008-342: ref=['THE', 'PROOF', 'WAS', 'IN', 'THREE', 'LONG', 'SLIPS', 'I', 'HAD', 'LEFT', 'THEM', 'ALL', 'TOGETHER'] +1580-141083-0008-342: hyp=['THE', 'PROOF', 'WAS', 'IN', 'THREE', 'LONG', 'SLIPS', 'I', 'HAD', 'LEFT', 'THEM', 'ALL', 'TOGETHER'] +1580-141083-0009-343: ref=['THE', 'ALTERNATIVE', 'WAS', 'THAT', 'SOMEONE', 'PASSING', 'HAD', 'OBSERVED', 'THE', 'KEY', 'IN', 'THE', 'DOOR', 'HAD', 'KNOWN', 'THAT', 'I', 'WAS', 'OUT', 'AND', 'HAD', 'ENTERED', 'TO', 'LOOK', 'AT', 'THE', 'PAPERS'] +1580-141083-0009-343: hyp=['THE', 'ALTERNATIVE', 'WAS', 'THAT', 'SOME', 'ONE', 'PASSING', 'HAD', 'OBSERVED', 'THE', 'KEY', 'IN', 'THE', 'DOOR', 'HAD', 'KNOWN', 'THAT', 'I', 'WAS', 'OUT', 'AND', 'HAD', 'ENTERED', 'TO', 'LOOK', 'AT', 'THE', 'PAPERS'] +1580-141083-0010-344: ref=['I', 'GAVE', 'HIM', 'A', 'LITTLE', 'BRANDY', 'AND', 'LEFT', 'HIM', 'COLLAPSED', 'IN', 'A', 'CHAIR', 'WHILE', 'I', 'MADE', 'A', 'MOST', 'CAREFUL', 'EXAMINATION', 'OF', 'THE', 'ROOM'] +1580-141083-0010-344: hyp=['I', 'GAVE', 'HIM', 'A', 'LITTLE', 'BRANDY', 'AND', 'LEFT', 'HIM', 'COLLAPSED', 'IN', 'A', 'CHAIR', 'WHILE', 'I', 'MADE', 'A', 'MOST', 'CAREFUL', 'EXAMINATION', 'OF', 'THE', 'ROOM'] +1580-141083-0011-345: ref=['A', 'BROKEN', 'TIP', 'OF', 'LEAD', 'WAS', 'LYING', 'THERE', 'ALSO'] +1580-141083-0011-345: hyp=['A', 'BROKEN', 'TIP', 'OF', 'LEAD', 'WAS', 'LYING', 'THERE', 'ALSO'] +1580-141083-0012-346: ref=['NOT', 'ONLY', 'THIS', 'BUT', 'ON', 'THE', 'TABLE', 'I', 'FOUND', 'A', 'SMALL', 'BALL', 'OF', 'BLACK', 'DOUGH', 'OR', 'CLAY', 'WITH', 'SPECKS', 'OF', 'SOMETHING', 'WHICH', 'LOOKS', 'LIKE', 'SAWDUST', 'IN', 'IT'] +1580-141083-0012-346: hyp=['NOT', 'ONLY', 'THIS', 'BUT', 'ON', 'THE', 'TABLE', 'I', 'FOUND', 'A', 'SMALL', 'BALL', 'OF', 'BLACK', 'DOUGH', 'OR', 'CLAY', 'WITH', 'SPECKS', 'OF', 'SOMETHING', 'WHICH', 'LOOKS', 'LIKE', 'SAWDUST', 'IN', 'IT'] +1580-141083-0013-347: ref=['ABOVE', 'ALL', 'THINGS', 'I', 'DESIRE', 'TO', 'SETTLE', 'THE', 'MATTER', 'QUIETLY', 'AND', 'DISCREETLY'] +1580-141083-0013-347: hyp=['ABOVE', 'ALL', 'THINGS', 'I', 'DESIRE', 'TO', 'SETTLE', 'THE', 'MATTER', 'QUIETLY', 'AND', 'DISCREETLY'] +1580-141083-0014-348: ref=['TO', 'THE', 'BEST', 'OF', 'MY', 'BELIEF', 'THEY', 'WERE', 'ROLLED', 'UP'] +1580-141083-0014-348: hyp=['TO', 'THE', 'BEST', 'OF', 'MY', 'BELIEF', 'THEY', 'WERE', 'ROLLED', 'UP'] +1580-141083-0015-349: ref=['DID', 'ANYONE', 'KNOW', 'THAT', 'THESE', 'PROOFS', 'WOULD', 'BE', 'THERE', 'NO', 'ONE', 'SAVE', 'THE', 'PRINTER'] +1580-141083-0015-349: hyp=['DID', 'ANY', 'ONE', 'KNOW', 'THAT', 'THESE', 'PROOFS', 'WOULD', 'BE', 'THERE', 'NO', 'ONE', 'SAVE', 'THE', 'PRINTER'] +1580-141083-0016-350: ref=['I', 'WAS', 'IN', 'SUCH', 'A', 'HURRY', 'TO', 'COME', 'TO', 'YOU', 'YOU', 'LEFT', 'YOUR', 'DOOR', 'OPEN'] +1580-141083-0016-350: hyp=['I', 'WAS', 'IN', 'SUCH', 'A', 'HURRY', 'TO', 'COME', 'TO', 'YOU', 'YOU', 'LEFT', 'YOUR', 'DOOR', 'OPEN'] +1580-141083-0017-351: ref=['SO', 'IT', 'SEEMS', 'TO', 'ME'] +1580-141083-0017-351: hyp=['SO', 'IT', 'SEEMS', 'TO', 'ME'] +1580-141083-0018-352: ref=['NOW', 'MISTER', 'SOAMES', 'AT', 'YOUR', 'DISPOSAL'] +1580-141083-0018-352: hyp=['NOW', 'MISTER', 'SOLMES', 'AT', 'YOUR', 'DISPOSAL'] +1580-141083-0019-353: ref=['ABOVE', 'WERE', 'THREE', 'STUDENTS', 'ONE', 'ON', 'EACH', 'STORY'] +1580-141083-0019-353: hyp=['ABOVE', 'WERE', 'THREE', 'STUDENTS', 'ONE', 'ON', 'EACH', 'STORY'] +1580-141083-0020-354: ref=['THEN', 'HE', 'APPROACHED', 'IT', 'AND', 'STANDING', 'ON', 'TIPTOE', 'WITH', 'HIS', 'NECK', 'CRANED', 'HE', 'LOOKED', 'INTO', 'THE', 'ROOM'] +1580-141083-0020-354: hyp=['THEN', 'HE', 'APPROACHED', 'IT', 'AND', 'STANDING', 'ON', 'TIPTOE', 'WITH', 'HIS', 'NECK', 'CRANED', 'HE', 'LOOKED', 'INTO', 'THE', 'ROOM'] +1580-141083-0021-355: ref=['THERE', 'IS', 'NO', 'OPENING', 'EXCEPT', 'THE', 'ONE', 'PANE', 'SAID', 'OUR', 'LEARNED', 'GUIDE'] +1580-141083-0021-355: hyp=['THERE', 'IS', 'NO', 'OPENING', 'EXCEPT', 'THE', 'ONE', 'PANE', 'SAID', 'OUR', 'LEARNED', 'GUIDE'] +1580-141083-0022-356: ref=['I', 'AM', 'AFRAID', 'THERE', 'ARE', 'NO', 'SIGNS', 'HERE', 'SAID', 'HE'] +1580-141083-0022-356: hyp=['I', 'AM', 'AFRAID', 'THERE', 'ARE', 'NO', 'SIGNS', 'HERE', 'SAID', 'HE'] +1580-141083-0023-357: ref=['ONE', 'COULD', 'HARDLY', 'HOPE', 'FOR', 'ANY', 'UPON', 'SO', 'DRY', 'A', 'DAY'] +1580-141083-0023-357: hyp=['ONE', 'COULD', 'HARDLY', 'HOPE', 'FOR', 'ANY', 'UPON', 'SO', 'DRY', 'A', 'DAY'] +1580-141083-0024-358: ref=['YOU', 'LEFT', 'HIM', 'IN', 'A', 'CHAIR', 'YOU', 'SAY', 'WHICH', 'CHAIR', 'BY', 'THE', 'WINDOW', 'THERE'] +1580-141083-0024-358: hyp=['YOU', 'LEFT', 'HIM', 'IN', 'A', 'CHAIR', 'YOU', 'SAY', 'WHICH', 'CHAIR', 'BY', 'THE', 'WINDOW', 'THERE'] +1580-141083-0025-359: ref=['THE', 'MAN', 'ENTERED', 'AND', 'TOOK', 'THE', 'PAPERS', 'SHEET', 'BY', 'SHEET', 'FROM', 'THE', 'CENTRAL', 'TABLE'] +1580-141083-0025-359: hyp=['THE', 'MAN', 'ENTERED', 'AND', 'TOOK', 'THE', 'PAPERS', 'SHEET', 'BY', 'SHEET', 'FROM', 'THE', 'CENTRAL', 'TABLE'] +1580-141083-0026-360: ref=['AS', 'A', 'MATTER', 'OF', 'FACT', 'HE', 'COULD', 'NOT', 'SAID', 'SOAMES', 'FOR', 'I', 'ENTERED', 'BY', 'THE', 'SIDE', 'DOOR'] +1580-141083-0026-360: hyp=['AS', 'A', 'MATTER', 'OF', 'FACT', 'HE', 'COULD', 'NOT', 'SAID', 'SOLMES', 'FOR', 'I', 'ENTERED', 'BY', 'THE', 'SIDE', 'DOOR'] +1580-141083-0027-361: ref=['HOW', 'LONG', 'WOULD', 'IT', 'TAKE', 'HIM', 'TO', 'DO', 'THAT', 'USING', 'EVERY', 'POSSIBLE', 'CONTRACTION', 'A', 'QUARTER', 'OF', 'AN', 'HOUR', 'NOT', 'LESS'] +1580-141083-0027-361: hyp=['HOW', 'LONG', 'WOULD', 'IT', 'TAKE', 'HIM', 'TO', 'DO', 'THAT', 'USING', 'EVERY', 'POSSIBLE', 'CONTRACTION', 'A', 'QUARTER', 'OF', 'AN', 'HOUR', 'NOT', 'LESS'] +1580-141083-0028-362: ref=['THEN', 'HE', 'TOSSED', 'IT', 'DOWN', 'AND', 'SEIZED', 'THE', 'NEXT'] +1580-141083-0028-362: hyp=['THEN', 'HE', 'TOSSED', 'IT', 'DOWN', 'AND', 'SEIZED', 'THE', 'NEXT'] +1580-141083-0029-363: ref=['HE', 'WAS', 'IN', 'THE', 'MIDST', 'OF', 'THAT', 'WHEN', 'YOUR', 'RETURN', 'CAUSED', 'HIM', 'TO', 'MAKE', 'A', 'VERY', 'HURRIED', 'RETREAT', 'VERY', 'HURRIED', 'SINCE', 'HE', 'HAD', 'NOT', 'TIME', 'TO', 'REPLACE', 'THE', 'PAPERS', 'WHICH', 'WOULD', 'TELL', 'YOU', 'THAT', 'HE', 'HAD', 'BEEN', 'THERE'] +1580-141083-0029-363: hyp=['HE', 'WAS', 'IN', 'THE', 'MIDST', 'OF', 'THAT', 'WHEN', 'YOUR', 'RETURN', 'CAUSED', 'HIM', 'TO', 'MAKE', 'A', 'VERY', 'HURRIED', 'RETREAT', 'VERY', 'HURRIED', 'SINCE', 'HE', 'HAD', 'NOT', 'TIME', 'TO', 'REPLACE', 'THE', 'PAPERS', 'WHICH', 'WOULD', 'TELL', 'YOU', 'THAT', 'HE', 'HAD', 'BEEN', 'THERE'] +1580-141083-0030-364: ref=['MISTER', 'SOAMES', 'WAS', 'SOMEWHAT', 'OVERWHELMED', 'BY', 'THIS', 'FLOOD', 'OF', 'INFORMATION'] +1580-141083-0030-364: hyp=['MISTER', 'SOLMES', 'WAS', 'SOMEWHAT', 'OVERWHELMED', 'BY', 'THIS', 'FLOOD', 'OF', 'INFORMATION'] +1580-141083-0031-365: ref=['HOLMES', 'HELD', 'OUT', 'A', 'SMALL', 'CHIP', 'WITH', 'THE', 'LETTERS', 'N', 'N', 'AND', 'A', 'SPACE', 'OF', 'CLEAR', 'WOOD', 'AFTER', 'THEM', 'YOU', 'SEE'] +1580-141083-0031-365: hyp=['HOLMES', 'HELD', 'OUT', 'A', 'SMALL', 'CHIP', 'WITH', 'THE', 'LETTERS', 'N', 'N', 'AND', 'A', 'SPACE', 'OF', 'CLEAR', 'WOOD', 'AFTER', 'THEM', 'YOU', 'SEE'] +1580-141083-0032-366: ref=['WATSON', 'I', 'HAVE', 'ALWAYS', 'DONE', 'YOU', 'AN', 'INJUSTICE', 'THERE', 'ARE', 'OTHERS'] +1580-141083-0032-366: hyp=['WATSON', 'I', 'HAVE', 'ALWAYS', 'DONE', 'YOU', 'AN', 'INJUSTICE', 'THERE', 'ARE', 'OTHERS'] +1580-141083-0033-367: ref=['I', 'WAS', 'HOPING', 'THAT', 'IF', 'THE', 'PAPER', 'ON', 'WHICH', 'HE', 'WROTE', 'WAS', 'THIN', 'SOME', 'TRACE', 'OF', 'IT', 'MIGHT', 'COME', 'THROUGH', 'UPON', 'THIS', 'POLISHED', 'SURFACE', 'NO', 'I', 'SEE', 'NOTHING'] +1580-141083-0033-367: hyp=['I', 'WAS', 'HOPING', 'THAT', 'IF', 'THE', 'PAPER', 'ON', 'WHICH', 'HE', 'WROTE', 'WAS', 'THIN', 'SOME', 'TRACE', 'OF', 'IT', 'MIGHT', 'COME', 'THROUGH', 'UPON', 'THIS', 'POLISHED', 'SURFACE', 'NO', 'I', 'SEE', 'NOTHING'] +1580-141083-0034-368: ref=['AS', 'HOLMES', 'DREW', 'THE', 'CURTAIN', 'I', 'WAS', 'AWARE', 'FROM', 'SOME', 'LITTLE', 'RIGIDITY', 'AND', 'ALERTNESS', 'OF', 'HIS', 'ATTITUDE', 'THAT', 'HE', 'WAS', 'PREPARED', 'FOR', 'AN', 'EMERGENCY'] +1580-141083-0034-368: hyp=['AS', 'HOLMES', 'DREW', 'THE', 'CURTAIN', 'I', 'WAS', 'AWARE', 'FROM', 'SOME', 'LITTLE', 'RIGIDITY', 'AND', 'AN', 'ALERTNESS', 'OF', 'HIS', 'ATTITUDE', 'THAT', 'HE', 'WAS', 'PREPARED', 'FOR', 'AN', 'EMERGENCY'] +1580-141083-0035-369: ref=['HOLMES', 'TURNED', 'AWAY', 'AND', 'STOOPED', 'SUDDENLY', 'TO', 'THE', 'FLOOR', 'HALLOA', "WHAT'S", 'THIS'] +1580-141083-0035-369: hyp=['HOLMES', 'TURNED', 'AWAY', 'AND', 'STOOPED', 'SUDDENLY', 'TO', 'THE', 'FLOOR', 'HALLO', 'WHAT', 'IS', 'THIS'] +1580-141083-0036-370: ref=['HOLMES', 'HELD', 'IT', 'OUT', 'ON', 'HIS', 'OPEN', 'PALM', 'IN', 'THE', 'GLARE', 'OF', 'THE', 'ELECTRIC', 'LIGHT'] +1580-141083-0036-370: hyp=['HOLMES', 'HELD', 'IT', 'OUT', 'ON', 'HIS', 'OPEN', 'PALM', 'IN', 'THE', 'GLARE', 'OF', 'THE', 'ELECTRIC', 'LIGHT'] +1580-141083-0037-371: ref=['WHAT', 'COULD', 'HE', 'DO', 'HE', 'CAUGHT', 'UP', 'EVERYTHING', 'WHICH', 'WOULD', 'BETRAY', 'HIM', 'AND', 'HE', 'RUSHED', 'INTO', 'YOUR', 'BEDROOM', 'TO', 'CONCEAL', 'HIMSELF'] +1580-141083-0037-371: hyp=['WHAT', 'COULD', 'HE', 'DO', 'HE', 'CAUGHT', 'UP', 'EVERYTHING', 'WHICH', 'WOULD', 'BETRAY', 'HIM', 'AND', 'HE', 'RUSHED', 'INTO', 'YOUR', 'BEDROOM', 'TO', 'CONCEAL', 'HIMSELF'] +1580-141083-0038-372: ref=['I', 'UNDERSTAND', 'YOU', 'TO', 'SAY', 'THAT', 'THERE', 'ARE', 'THREE', 'STUDENTS', 'WHO', 'USE', 'THIS', 'STAIR', 'AND', 'ARE', 'IN', 'THE', 'HABIT', 'OF', 'PASSING', 'YOUR', 'DOOR', 'YES', 'THERE', 'ARE'] +1580-141083-0038-372: hyp=['I', 'UNDERSTAND', 'YOU', 'TO', 'SAY', 'THAT', 'THERE', 'ARE', 'THREE', 'STUDENTS', 'WHO', 'USE', 'THIS', 'STARE', 'AND', 'ARE', 'IN', 'THE', 'HABIT', 'OF', 'PASSING', 'YOUR', 'DOOR', 'YES', 'THERE', 'ARE'] +1580-141083-0039-373: ref=['AND', 'THEY', 'ARE', 'ALL', 'IN', 'FOR', 'THIS', 'EXAMINATION', 'YES'] +1580-141083-0039-373: hyp=['AND', 'THEY', 'ARE', 'ALL', 'IN', 'FOR', 'THIS', 'EXAMINATION', 'YES'] +1580-141083-0040-374: ref=['ONE', 'HARDLY', 'LIKES', 'TO', 'THROW', 'SUSPICION', 'WHERE', 'THERE', 'ARE', 'NO', 'PROOFS'] +1580-141083-0040-374: hyp=['ONE', 'HARDLY', 'LIKES', 'TO', 'THROW', 'SUSPICION', 'WHERE', 'THERE', 'ARE', 'NO', 'PROOFS'] +1580-141083-0041-375: ref=['LET', 'US', 'HEAR', 'THE', 'SUSPICIONS', 'I', 'WILL', 'LOOK', 'AFTER', 'THE', 'PROOFS'] +1580-141083-0041-375: hyp=['LET', 'US', 'HEAR', 'THE', 'SUSPICIONS', 'I', 'WILL', 'LOOK', 'AFTER', 'THE', 'PROOFS'] +1580-141083-0042-376: ref=['MY', 'SCHOLAR', 'HAS', 'BEEN', 'LEFT', 'VERY', 'POOR', 'BUT', 'HE', 'IS', 'HARD', 'WORKING', 'AND', 'INDUSTRIOUS', 'HE', 'WILL', 'DO', 'WELL'] +1580-141083-0042-376: hyp=['MY', 'SCHOLAR', 'HAS', 'BEEN', 'LEFT', 'VERY', 'POOR', 'BUT', 'HE', 'IS', 'HARD', 'WORKING', 'AND', 'INDUSTRIOUS', 'HE', 'WILL', 'DO', 'WELL'] +1580-141083-0043-377: ref=['THE', 'TOP', 'FLOOR', 'BELONGS', 'TO', 'MILES', 'MC', 'LAREN'] +1580-141083-0043-377: hyp=['THE', 'TOP', 'FLOOR', 'BELONGS', 'TO', 'MILES', 'MC', 'LAREN'] +1580-141083-0044-378: ref=['I', 'DARE', 'NOT', 'GO', 'SO', 'FAR', 'AS', 'THAT', 'BUT', 'OF', 'THE', 'THREE', 'HE', 'IS', 'PERHAPS', 'THE', 'LEAST', 'UNLIKELY'] +1580-141083-0044-378: hyp=['I', 'DARE', 'NOT', 'GO', 'SO', 'FAR', 'AS', 'THAT', 'BUT', 'OF', 'THE', 'THREE', 'HE', 'IS', 'PERHAPS', 'THE', 'LEAST', 'UNLIKELY'] +1580-141083-0045-379: ref=['HE', 'WAS', 'STILL', 'SUFFERING', 'FROM', 'THIS', 'SUDDEN', 'DISTURBANCE', 'OF', 'THE', 'QUIET', 'ROUTINE', 'OF', 'HIS', 'LIFE'] +1580-141083-0045-379: hyp=['HE', 'WAS', 'STILL', 'SUFFERING', 'FROM', 'THIS', 'SUDDEN', 'DISTURBANCE', 'OF', 'THE', 'QUIET', 'ROUTINE', 'OF', 'HIS', 'LIFE'] +1580-141083-0046-380: ref=['BUT', 'I', 'HAVE', 'OCCASIONALLY', 'DONE', 'THE', 'SAME', 'THING', 'AT', 'OTHER', 'TIMES'] +1580-141083-0046-380: hyp=['BUT', 'I', 'HAVE', 'OCCASIONALLY', 'DONE', 'THE', 'SAME', 'THING', 'AT', 'OTHER', 'TIMES'] +1580-141083-0047-381: ref=['DID', 'YOU', 'LOOK', 'AT', 'THESE', 'PAPERS', 'ON', 'THE', 'TABLE'] +1580-141083-0047-381: hyp=['DID', 'YOU', 'LOOK', 'AT', 'THESE', 'PAPERS', 'ON', 'THE', 'TABLE'] +1580-141083-0048-382: ref=['HOW', 'CAME', 'YOU', 'TO', 'LEAVE', 'THE', 'KEY', 'IN', 'THE', 'DOOR'] +1580-141083-0048-382: hyp=['HOW', 'CAME', 'YOU', 'TO', 'LEAVE', 'THE', 'KEY', 'IN', 'THE', 'DOOR'] +1580-141083-0049-383: ref=['ANYONE', 'IN', 'THE', 'ROOM', 'COULD', 'GET', 'OUT', 'YES', 'SIR'] +1580-141083-0049-383: hyp=['ANY', 'ONE', 'IN', 'THE', 'ROOM', 'COULD', 'GET', 'OUT', 'YES', 'SIR'] +1580-141083-0050-384: ref=['I', 'REALLY', "DON'T", 'THINK', 'HE', 'KNEW', 'MUCH', 'ABOUT', 'IT', 'MISTER', 'HOLMES'] +1580-141083-0050-384: hyp=['I', 'REALLY', "DON'T", 'THINK', 'HE', 'KNEW', 'MUCH', 'ABOUT', 'IT', 'MISTER', 'HOLMES'] +1580-141083-0051-385: ref=['ONLY', 'FOR', 'A', 'MINUTE', 'OR', 'SO'] +1580-141083-0051-385: hyp=['ONLY', 'FOR', 'A', 'MINUTE', 'OR', 'SO'] +1580-141083-0052-386: ref=['OH', 'I', 'WOULD', 'NOT', 'VENTURE', 'TO', 'SAY', 'SIR'] +1580-141083-0052-386: hyp=['OH', 'I', 'WOULD', 'NOT', 'VENTURE', 'TO', 'SAY', 'SIR'] +1580-141083-0053-387: ref=['YOU', "HAVEN'T", 'SEEN', 'ANY', 'OF', 'THEM', 'NO', 'SIR'] +1580-141083-0053-387: hyp=['YOU', "HAVEN'T", 'SEEN', 'ANY', 'OF', 'THEM', 'NO', 'SIR'] +1580-141084-0000-388: ref=['IT', 'WAS', 'THE', 'INDIAN', 'WHOSE', 'DARK', 'SILHOUETTE', 'APPEARED', 'SUDDENLY', 'UPON', 'HIS', 'BLIND'] +1580-141084-0000-388: hyp=['IT', 'WAS', 'THE', 'INDIAN', 'WHOSE', 'DARK', 'SILHOUETTE', 'APPEARED', 'SUDDENLY', 'UPON', 'HIS', 'BLIND'] +1580-141084-0001-389: ref=['HE', 'WAS', 'PACING', 'SWIFTLY', 'UP', 'AND', 'DOWN', 'HIS', 'ROOM'] +1580-141084-0001-389: hyp=['HE', 'WAS', 'PACING', 'SWIFTLY', 'UP', 'AND', 'DOWN', 'HIS', 'ROOM'] +1580-141084-0002-390: ref=['THIS', 'SET', 'OF', 'ROOMS', 'IS', 'QUITE', 'THE', 'OLDEST', 'IN', 'THE', 'COLLEGE', 'AND', 'IT', 'IS', 'NOT', 'UNUSUAL', 'FOR', 'VISITORS', 'TO', 'GO', 'OVER', 'THEM'] +1580-141084-0002-390: hyp=['THE', 'SET', 'OF', 'ROOMS', 'IS', 'QUITE', 'THE', 'OLDEST', 'IN', 'THE', 'COLLEGE', 'AND', 'IT', 'IS', 'NOT', 'UNUSUAL', 'FOR', 'VISITORS', 'TO', 'GO', 'OVER', 'THEM'] +1580-141084-0003-391: ref=['NO', 'NAMES', 'PLEASE', 'SAID', 'HOLMES', 'AS', 'WE', 'KNOCKED', 'AT', "GILCHRIST'S", 'DOOR'] +1580-141084-0003-391: hyp=['NO', 'NAMES', 'PLEASE', 'SAID', 'HOLMES', 'AS', 'WE', 'KNOCKED', 'AT', "GILKERTH'S", 'DOOR'] +1580-141084-0004-392: ref=['OF', 'COURSE', 'HE', 'DID', 'NOT', 'REALIZE', 'THAT', 'IT', 'WAS', 'I', 'WHO', 'WAS', 'KNOCKING', 'BUT', 'NONE', 'THE', 'LESS', 'HIS', 'CONDUCT', 'WAS', 'VERY', 'UNCOURTEOUS', 'AND', 'INDEED', 'UNDER', 'THE', 'CIRCUMSTANCES', 'RATHER', 'SUSPICIOUS'] +1580-141084-0004-392: hyp=['OF', 'COURSE', 'HE', 'DID', 'NOT', 'REALIZE', 'THAT', 'IT', 'WAS', 'I', 'WHO', 'WAS', 'KNOCKING', 'BUT', 'NONE', 'THE', 'LESS', 'HIS', 'CONDUCT', 'WAS', 'VERY', 'UNCOURTEOUS', 'AND', 'INDEED', 'UNDER', 'THE', 'CIRCUMSTANCES', 'RATHER', 'SUSPICIOUS'] +1580-141084-0005-393: ref=['THAT', 'IS', 'VERY', 'IMPORTANT', 'SAID', 'HOLMES'] +1580-141084-0005-393: hyp=['THAT', 'IS', 'VERY', 'IMPORTANT', 'SAID', 'HOLMES'] +1580-141084-0006-394: ref=['YOU', "DON'T", 'SEEM', 'TO', 'REALIZE', 'THE', 'POSITION'] +1580-141084-0006-394: hyp=['YOU', "DON'T", 'SEEM', 'TO', 'REALIZE', 'THE', 'POSITION'] +1580-141084-0007-395: ref=['TO', 'MORROW', 'IS', 'THE', 'EXAMINATION'] +1580-141084-0007-395: hyp=['TO', 'MORROW', 'IS', 'THE', 'EXAMINATION'] +1580-141084-0008-396: ref=['I', 'CANNOT', 'ALLOW', 'THE', 'EXAMINATION', 'TO', 'BE', 'HELD', 'IF', 'ONE', 'OF', 'THE', 'PAPERS', 'HAS', 'BEEN', 'TAMPERED', 'WITH', 'THE', 'SITUATION', 'MUST', 'BE', 'FACED'] +1580-141084-0008-396: hyp=['I', 'CANNOT', 'ALLOW', 'THE', 'EXAMINATION', 'TO', 'BE', 'HELD', 'IF', 'ONE', 'OF', 'THE', 'PAPERS', 'HAS', 'BEEN', 'TAMPERED', 'WITH', 'THE', 'SITUATION', 'MUST', 'BE', 'FACED'] +1580-141084-0009-397: ref=['IT', 'IS', 'POSSIBLE', 'THAT', 'I', 'MAY', 'BE', 'IN', 'A', 'POSITION', 'THEN', 'TO', 'INDICATE', 'SOME', 'COURSE', 'OF', 'ACTION'] +1580-141084-0009-397: hyp=['IT', 'IS', 'POSSIBLE', 'THAT', 'I', 'MAY', 'BE', 'IN', 'A', 'POSITION', 'THEN', 'TO', 'INDICATE', 'SOME', 'COURSE', 'OF', 'ACTION'] +1580-141084-0010-398: ref=['I', 'WILL', 'TAKE', 'THE', 'BLACK', 'CLAY', 'WITH', 'ME', 'ALSO', 'THE', 'PENCIL', 'CUTTINGS', 'GOOD', 'BYE'] +1580-141084-0010-398: hyp=['I', 'WILL', 'TAKE', 'THE', 'BLACK', 'CLAY', 'WITH', 'ME', 'ALSO', 'THE', 'PENCIL', 'CUTTINGS', 'GOOD', 'BY'] +1580-141084-0011-399: ref=['WHEN', 'WE', 'WERE', 'OUT', 'IN', 'THE', 'DARKNESS', 'OF', 'THE', 'QUADRANGLE', 'WE', 'AGAIN', 'LOOKED', 'UP', 'AT', 'THE', 'WINDOWS'] +1580-141084-0011-399: hyp=['WHEN', 'WE', 'WERE', 'OUT', 'IN', 'THE', 'DARKNESS', 'OF', 'THE', 'QUADRANGLE', 'WE', 'AGAIN', 'LOOKED', 'UP', 'AT', 'THE', 'WINDOWS'] +1580-141084-0012-400: ref=['THE', 'FOUL', 'MOUTHED', 'FELLOW', 'AT', 'THE', 'TOP'] +1580-141084-0012-400: hyp=['THE', 'FOUL', 'MOUTHED', 'FELLOW', 'AT', 'THE', 'TOP'] +1580-141084-0013-401: ref=['HE', 'IS', 'THE', 'ONE', 'WITH', 'THE', 'WORST', 'RECORD'] +1580-141084-0013-401: hyp=['HE', 'IS', 'THE', 'ONE', 'WITH', 'THE', 'WORST', 'RECORD'] +1580-141084-0014-402: ref=['WHY', 'BANNISTER', 'THE', 'SERVANT', "WHAT'S", 'HIS', 'GAME', 'IN', 'THE', 'MATTER'] +1580-141084-0014-402: hyp=['WHY', 'BANNISTER', 'THE', 'SERVANT', "WHAT'S", 'HIS', 'GAME', 'IN', 'THE', 'MATTER'] +1580-141084-0015-403: ref=['HE', 'IMPRESSED', 'ME', 'AS', 'BEING', 'A', 'PERFECTLY', 'HONEST', 'MAN'] +1580-141084-0015-403: hyp=['HE', 'IMPRESSED', 'ME', 'AS', 'BEING', 'A', 'PERFECTLY', 'HONEST', 'MAN'] +1580-141084-0016-404: ref=['MY', 'FRIEND', 'DID', 'NOT', 'APPEAR', 'TO', 'BE', 'DEPRESSED', 'BY', 'HIS', 'FAILURE', 'BUT', 'SHRUGGED', 'HIS', 'SHOULDERS', 'IN', 'HALF', 'HUMOROUS', 'RESIGNATION'] +1580-141084-0016-404: hyp=['MY', 'FRIEND', 'DID', 'NOT', 'APPEAR', 'TO', 'BE', 'DEPRESSED', 'BY', 'HIS', 'FAILURE', 'BUT', 'SHRUGGED', 'HIS', 'SHOULDERS', 'IN', 'HALF', 'HUMOROUS', 'RESIGNATION'] +1580-141084-0017-405: ref=['NO', 'GOOD', 'MY', 'DEAR', 'WATSON'] +1580-141084-0017-405: hyp=['NO', 'GOOD', 'MY', 'DEAR', 'WATSON'] +1580-141084-0018-406: ref=['I', 'THINK', 'SO', 'YOU', 'HAVE', 'FORMED', 'A', 'CONCLUSION'] +1580-141084-0018-406: hyp=['I', 'THINK', 'SO', 'YOU', 'HAVE', 'FORMED', 'A', 'CONCLUSION'] +1580-141084-0019-407: ref=['YES', 'MY', 'DEAR', 'WATSON', 'I', 'HAVE', 'SOLVED', 'THE', 'MYSTERY'] +1580-141084-0019-407: hyp=['YES', 'MY', 'DEAR', 'WATSON', 'I', 'HAVE', 'SOLVED', 'THE', 'MYSTERY'] +1580-141084-0020-408: ref=['LOOK', 'AT', 'THAT', 'HE', 'HELD', 'OUT', 'HIS', 'HAND'] +1580-141084-0020-408: hyp=['LOOK', 'AT', 'THAT', 'HE', 'HELD', 'OUT', 'HIS', 'HAND'] +1580-141084-0021-409: ref=['ON', 'THE', 'PALM', 'WERE', 'THREE', 'LITTLE', 'PYRAMIDS', 'OF', 'BLACK', 'DOUGHY', 'CLAY'] +1580-141084-0021-409: hyp=['ON', 'THE', 'PALM', 'WERE', 'THREE', 'LITTLE', 'PYRAMIDS', 'OF', 'BLACK', 'DOUGHY', 'CLAY'] +1580-141084-0022-410: ref=['AND', 'ONE', 'MORE', 'THIS', 'MORNING'] +1580-141084-0022-410: hyp=['AND', 'ONE', 'MORE', 'THIS', 'MORNING'] +1580-141084-0023-411: ref=['IN', 'A', 'FEW', 'HOURS', 'THE', 'EXAMINATION', 'WOULD', 'COMMENCE', 'AND', 'HE', 'WAS', 'STILL', 'IN', 'THE', 'DILEMMA', 'BETWEEN', 'MAKING', 'THE', 'FACTS', 'PUBLIC', 'AND', 'ALLOWING', 'THE', 'CULPRIT', 'TO', 'COMPETE', 'FOR', 'THE', 'VALUABLE', 'SCHOLARSHIP'] +1580-141084-0023-411: hyp=['IN', 'A', 'FEW', 'HOURS', 'THE', 'EXAMINATION', 'WOULD', 'COMMENCE', 'AND', 'HE', 'WAS', 'STILL', 'IN', 'THE', 'DILEMMA', 'BETWEEN', 'MAKING', 'THE', 'FACTS', 'PUBLIC', 'AND', 'ALLOWING', 'THE', 'CULPRIT', 'TO', 'COMPETE', 'FOR', 'THE', 'VALUABLE', 'SCHOLARSHIP'] +1580-141084-0024-412: ref=['HE', 'COULD', 'HARDLY', 'STAND', 'STILL', 'SO', 'GREAT', 'WAS', 'HIS', 'MENTAL', 'AGITATION', 'AND', 'HE', 'RAN', 'TOWARDS', 'HOLMES', 'WITH', 'TWO', 'EAGER', 'HANDS', 'OUTSTRETCHED', 'THANK', 'HEAVEN', 'THAT', 'YOU', 'HAVE', 'COME'] +1580-141084-0024-412: hyp=['HE', 'COULD', 'HARDLY', 'STAND', 'STILL', 'SO', 'GREAT', 'WAS', 'HIS', 'MENTAL', 'AGITATION', 'AND', 'HE', 'RAN', 'TOWARDS', 'HOLMES', 'WITH', 'TWO', 'EAGER', 'HANDS', 'OUTSTRETCHED', 'THANK', 'HEAVEN', 'THAT', 'YOU', 'HAVE', 'COME'] +1580-141084-0025-413: ref=['YOU', 'KNOW', 'HIM', 'I', 'THINK', 'SO'] +1580-141084-0025-413: hyp=['YOU', 'KNOW', 'HIM', 'I', 'THINK', 'SO'] +1580-141084-0026-414: ref=['IF', 'THIS', 'MATTER', 'IS', 'NOT', 'TO', 'BECOME', 'PUBLIC', 'WE', 'MUST', 'GIVE', 'OURSELVES', 'CERTAIN', 'POWERS', 'AND', 'RESOLVE', 'OURSELVES', 'INTO', 'A', 'SMALL', 'PRIVATE', 'COURT', 'MARTIAL'] +1580-141084-0026-414: hyp=['IF', 'THIS', 'MATTER', 'IS', 'NOT', 'TO', 'BECOME', 'PUBLIC', 'WE', 'MUST', 'GIVE', 'OURSELVES', 'CERTAIN', 'POWERS', 'AND', 'RESOLVE', 'OURSELVES', 'INTO', 'A', 'SMALL', 'PRIVATE', 'COURT', 'MARTIAL'] +1580-141084-0027-415: ref=['NO', 'SIR', 'CERTAINLY', 'NOT'] +1580-141084-0027-415: hyp=['NO', 'SIR', 'CERTAINLY', 'NOT'] +1580-141084-0028-416: ref=['THERE', 'WAS', 'NO', 'MAN', 'SIR'] +1580-141084-0028-416: hyp=['THERE', 'WAS', 'NO', 'MAN', 'SIR'] +1580-141084-0029-417: ref=['HIS', 'TROUBLED', 'BLUE', 'EYES', 'GLANCED', 'AT', 'EACH', 'OF', 'US', 'AND', 'FINALLY', 'RESTED', 'WITH', 'AN', 'EXPRESSION', 'OF', 'BLANK', 'DISMAY', 'UPON', 'BANNISTER', 'IN', 'THE', 'FARTHER', 'CORNER'] +1580-141084-0029-417: hyp=['HIS', 'TROUBLED', 'BLUE', 'EYES', 'GLANCED', 'AT', 'EACH', 'OF', 'US', 'AND', 'FINALLY', 'RESTED', 'WITH', 'AN', 'EXPRESSION', 'OF', 'BLANK', 'DISMAY', 'UPON', 'BANNISTER', 'IN', 'THE', 'FARTHER', 'CORNER'] +1580-141084-0030-418: ref=['JUST', 'CLOSE', 'THE', 'DOOR', 'SAID', 'HOLMES'] +1580-141084-0030-418: hyp=['JUST', 'CLOSE', 'THE', 'DOOR', 'SAID', 'HOLMES'] +1580-141084-0031-419: ref=['WE', 'WANT', 'TO', 'KNOW', 'MISTER', 'GILCHRIST', 'HOW', 'YOU', 'AN', 'HONOURABLE', 'MAN', 'EVER', 'CAME', 'TO', 'COMMIT', 'SUCH', 'AN', 'ACTION', 'AS', 'THAT', 'OF', 'YESTERDAY'] +1580-141084-0031-419: hyp=['WE', 'WANT', 'TO', 'KNOW', 'MISTER', 'GILCREST', 'HOW', 'YOU', 'AN', 'HONORABLE', 'MAN', 'EVER', 'CAME', 'TO', 'COMMIT', 'SUCH', 'AN', 'ACTION', 'AS', 'THAT', 'OF', 'YESTERDAY'] +1580-141084-0032-420: ref=['FOR', 'A', 'MOMENT', 'GILCHRIST', 'WITH', 'UPRAISED', 'HAND', 'TRIED', 'TO', 'CONTROL', 'HIS', 'WRITHING', 'FEATURES'] +1580-141084-0032-420: hyp=['FOR', 'A', 'MOMENT', 'GILCRIST', 'WITH', 'UPRAISED', 'HAND', 'TRIED', 'TO', 'CONTROL', 'HIS', 'WRITHING', 'FEATURES'] +1580-141084-0033-421: ref=['COME', 'COME', 'SAID', 'HOLMES', 'KINDLY', 'IT', 'IS', 'HUMAN', 'TO', 'ERR', 'AND', 'AT', 'LEAST', 'NO', 'ONE', 'CAN', 'ACCUSE', 'YOU', 'OF', 'BEING', 'A', 'CALLOUS', 'CRIMINAL'] +1580-141084-0033-421: hyp=['COME', 'COME', 'SAID', 'HOLMES', 'KINDLY', 'IT', 'IS', 'HUMAN', 'TO', 'ERR', 'AND', 'AT', 'LEAST', 'NO', 'ONE', 'CAN', 'ACCUSE', 'YOU', 'OF', 'BEING', 'A', 'CALLOUS', 'CRIMINAL'] +1580-141084-0034-422: ref=['WELL', 'WELL', "DON'T", 'TROUBLE', 'TO', 'ANSWER', 'LISTEN', 'AND', 'SEE', 'THAT', 'I', 'DO', 'YOU', 'NO', 'INJUSTICE'] +1580-141084-0034-422: hyp=['WELL', 'WELL', "DON'T", 'TROUBLE', 'TO', 'ANSWER', 'LISTEN', 'AND', 'SEE', 'THAT', 'I', 'DO', 'YOU', 'NO', 'INJUSTICE'] +1580-141084-0035-423: ref=['HE', 'COULD', 'EXAMINE', 'THE', 'PAPERS', 'IN', 'HIS', 'OWN', 'OFFICE'] +1580-141084-0035-423: hyp=['HE', 'COULD', 'EXAMINE', 'THE', 'PAPERS', 'IN', 'HIS', 'OWN', 'OFFICE'] +1580-141084-0036-424: ref=['THE', 'INDIAN', 'I', 'ALSO', 'THOUGHT', 'NOTHING', 'OF'] +1580-141084-0036-424: hyp=['THE', 'INDIAN', 'I', 'ALSO', 'THOUGHT', 'NOTHING', 'OF'] +1580-141084-0037-425: ref=['WHEN', 'I', 'APPROACHED', 'YOUR', 'ROOM', 'I', 'EXAMINED', 'THE', 'WINDOW'] +1580-141084-0037-425: hyp=['WHEN', 'I', 'APPROACHED', 'YOUR', 'ROOM', 'I', 'EXAMINED', 'THE', 'WINDOW'] +1580-141084-0038-426: ref=['NO', 'ONE', 'LESS', 'THAN', 'THAT', 'WOULD', 'HAVE', 'A', 'CHANCE'] +1580-141084-0038-426: hyp=['NO', 'ONE', 'LESS', 'THAN', 'THAT', 'WOULD', 'HAVE', 'A', 'CHANCE'] +1580-141084-0039-427: ref=['I', 'ENTERED', 'AND', 'I', 'TOOK', 'YOU', 'INTO', 'MY', 'CONFIDENCE', 'AS', 'TO', 'THE', 'SUGGESTIONS', 'OF', 'THE', 'SIDE', 'TABLE'] +1580-141084-0039-427: hyp=['I', 'ENTERED', 'AND', 'I', 'TOOK', 'YOU', 'INTO', 'MY', 'CONFIDENCE', 'AS', 'TO', 'THE', 'SUGGESTIONS', 'OF', 'THE', 'SIDE', 'TABLE'] +1580-141084-0040-428: ref=['HE', 'RETURNED', 'CARRYING', 'HIS', 'JUMPING', 'SHOES', 'WHICH', 'ARE', 'PROVIDED', 'AS', 'YOU', 'ARE', 'AWARE', 'WITH', 'SEVERAL', 'SHARP', 'SPIKES'] +1580-141084-0040-428: hyp=['HE', 'RETURNED', 'CARRYING', 'HIS', 'JUMPING', 'SHOES', 'WHICH', 'ARE', 'PROVIDED', 'AS', 'YOU', 'AWARE', 'WITH', 'SEVERAL', 'SHARP', 'SPIKES'] +1580-141084-0041-429: ref=['NO', 'HARM', 'WOULD', 'HAVE', 'BEEN', 'DONE', 'HAD', 'IT', 'NOT', 'BEEN', 'THAT', 'AS', 'HE', 'PASSED', 'YOUR', 'DOOR', 'HE', 'PERCEIVED', 'THE', 'KEY', 'WHICH', 'HAD', 'BEEN', 'LEFT', 'BY', 'THE', 'CARELESSNESS', 'OF', 'YOUR', 'SERVANT'] +1580-141084-0041-429: hyp=['NO', 'HARM', 'WOULD', 'HAVE', 'BEEN', 'DONE', 'HAD', 'IT', 'NOT', 'BEEN', 'THAT', 'AS', 'HE', 'PASSED', 'YOUR', 'DOOR', 'HE', 'PERCEIVED', 'THE', 'KEY', 'WHICH', 'HAD', 'BEEN', 'LEFT', 'BY', 'THE', 'CARELESSNESS', 'OF', 'YOUR', 'SERVANT'] +1580-141084-0042-430: ref=['A', 'SUDDEN', 'IMPULSE', 'CAME', 'OVER', 'HIM', 'TO', 'ENTER', 'AND', 'SEE', 'IF', 'THEY', 'WERE', 'INDEED', 'THE', 'PROOFS'] +1580-141084-0042-430: hyp=['A', 'SUDDEN', 'IMPULSE', 'CAME', 'OVER', 'HIM', 'TO', 'ENTER', 'AND', 'SEE', 'IF', 'THEY', 'WERE', 'INDEED', 'THE', 'PROOFS'] +1580-141084-0043-431: ref=['HE', 'PUT', 'HIS', 'SHOES', 'ON', 'THE', 'TABLE'] +1580-141084-0043-431: hyp=['HE', 'PUT', 'HIS', 'SHOES', 'ON', 'THE', 'TABLE'] +1580-141084-0044-432: ref=['GLOVES', 'SAID', 'THE', 'YOUNG', 'MAN'] +1580-141084-0044-432: hyp=['GLOVES', 'SAID', 'THE', 'YOUNG', 'MAN'] +1580-141084-0045-433: ref=['SUDDENLY', 'HE', 'HEARD', 'HIM', 'AT', 'THE', 'VERY', 'DOOR', 'THERE', 'WAS', 'NO', 'POSSIBLE', 'ESCAPE'] +1580-141084-0045-433: hyp=['SUDDENLY', 'HE', 'HEARD', 'HIM', 'AT', 'THE', 'VERY', 'DOOR', 'THERE', 'WAS', 'NO', 'POSSIBLE', 'ESCAPE'] +1580-141084-0046-434: ref=['HAVE', 'I', 'TOLD', 'THE', 'TRUTH', 'MISTER', 'GILCHRIST'] +1580-141084-0046-434: hyp=['HAVE', 'I', 'TOLD', 'THE', 'TRUTH', 'MISTER', 'GILCRIST'] +1580-141084-0047-435: ref=['I', 'HAVE', 'A', 'LETTER', 'HERE', 'MISTER', 'SOAMES', 'WHICH', 'I', 'WROTE', 'TO', 'YOU', 'EARLY', 'THIS', 'MORNING', 'IN', 'THE', 'MIDDLE', 'OF', 'A', 'RESTLESS', 'NIGHT'] +1580-141084-0047-435: hyp=['I', 'HAVE', 'A', 'LETTER', 'HERE', 'MISTER', 'SOLMES', 'WHICH', 'I', 'WROTE', 'TO', 'YOU', 'EARLY', 'THIS', 'MORNING', 'IN', 'THE', 'MIDDLE', 'OF', 'A', 'RESTLESS', 'NIGHT'] +1580-141084-0048-436: ref=['IT', 'WILL', 'BE', 'CLEAR', 'TO', 'YOU', 'FROM', 'WHAT', 'I', 'HAVE', 'SAID', 'THAT', 'ONLY', 'YOU', 'COULD', 'HAVE', 'LET', 'THIS', 'YOUNG', 'MAN', 'OUT', 'SINCE', 'YOU', 'WERE', 'LEFT', 'IN', 'THE', 'ROOM', 'AND', 'MUST', 'HAVE', 'LOCKED', 'THE', 'DOOR', 'WHEN', 'YOU', 'WENT', 'OUT'] +1580-141084-0048-436: hyp=['IT', 'WILL', 'BE', 'CLEAR', 'TO', 'YOU', 'FROM', 'WHAT', 'I', 'HAVE', 'SAID', 'THAT', 'ONLY', 'YOU', 'COULD', 'HAVE', 'LET', 'THIS', 'YOUNG', 'MAN', 'OUT', 'SINCE', 'YOU', 'WERE', 'LEFT', 'IN', 'THE', 'ROOM', 'AND', 'MUST', 'HAVE', 'LOCKED', 'THE', 'DOOR', 'WHEN', 'YOU', 'WENT', 'OUT'] +1580-141084-0049-437: ref=['IT', 'WAS', 'SIMPLE', 'ENOUGH', 'SIR', 'IF', 'YOU', 'ONLY', 'HAD', 'KNOWN', 'BUT', 'WITH', 'ALL', 'YOUR', 'CLEVERNESS', 'IT', 'WAS', 'IMPOSSIBLE', 'THAT', 'YOU', 'COULD', 'KNOW'] +1580-141084-0049-437: hyp=['IT', 'WAS', 'SIMPLE', 'ENOUGH', 'SIR', 'IF', 'YOU', 'ONLY', 'HAD', 'KNOWN', 'BUT', 'WITH', 'ALL', 'YOUR', 'CLEVERNESS', 'IT', 'WAS', 'IMPOSSIBLE', 'THAT', 'YOU', 'COULD', 'KNOW'] +1580-141084-0050-438: ref=['IF', 'MISTER', 'SOAMES', 'SAW', 'THEM', 'THE', 'GAME', 'WAS', 'UP'] +1580-141084-0050-438: hyp=['IF', 'MISTER', 'SOLMES', 'SAW', 'THEM', 'THE', 'GAME', 'WAS', 'UP'] +1995-1826-0000-439: ref=['IN', 'THE', 'DEBATE', 'BETWEEN', 'THE', 'SENIOR', 'SOCIETIES', 'HER', 'DEFENCE', 'OF', 'THE', 'FIFTEENTH', 'AMENDMENT', 'HAD', 'BEEN', 'NOT', 'ONLY', 'A', 'NOTABLE', 'BIT', 'OF', 'REASONING', 'BUT', 'DELIVERED', 'WITH', 'REAL', 'ENTHUSIASM'] +1995-1826-0000-439: hyp=['IN', 'THE', 'DEBATE', 'BETWEEN', 'THE', 'SENIOR', 'SOCIETIES', 'HER', 'DEFENSE', 'OF', 'THE', 'FIFTEENTH', 'AMENDMENT', 'HAD', 'BEEN', 'NOT', 'ONLY', 'A', 'NOTABLE', 'BIT', 'OF', 'REASONING', 'BUT', 'DELIVERED', 'WITH', 'REAL', 'ENTHUSIASM'] +1995-1826-0001-440: ref=['THE', 'SOUTH', 'SHE', 'HAD', 'NOT', 'THOUGHT', 'OF', 'SERIOUSLY', 'AND', 'YET', 'KNOWING', 'OF', 'ITS', 'DELIGHTFUL', 'HOSPITALITY', 'AND', 'MILD', 'CLIMATE', 'SHE', 'WAS', 'NOT', 'AVERSE', 'TO', 'CHARLESTON', 'OR', 'NEW', 'ORLEANS'] +1995-1826-0001-440: hyp=['THE', 'SOUTH', 'SHE', 'HAD', 'NOT', 'THOUGHT', 'OF', 'SERIOUSLY', 'AND', 'YET', 'KNOWING', 'OF', 'ITS', 'DELIGHTFUL', 'HOSPITALITY', 'AND', 'MILD', 'CLIMATE', 'SHE', 'WAS', 'NOT', 'AVERSE', 'TO', 'CHARLESTON', 'OR', 'NEW', 'ORLEANS'] +1995-1826-0002-441: ref=['JOHN', 'TAYLOR', 'WHO', 'HAD', 'SUPPORTED', 'HER', 'THROUGH', 'COLLEGE', 'WAS', 'INTERESTED', 'IN', 'COTTON'] +1995-1826-0002-441: hyp=['JOHN', 'TAYLOR', 'WHO', 'HAD', 'SUPPORTED', 'HER', 'THROUGH', 'COLLEGE', 'WAS', 'INTERESTED', 'IN', 'COTTON'] +1995-1826-0003-442: ref=['BETTER', 'GO', 'HE', 'HAD', 'COUNSELLED', 'SENTENTIOUSLY'] +1995-1826-0003-442: hyp=['BETTER', 'GO', 'HE', 'HAD', 'COUNSELLED', 'SENTENTIOUSLY'] +1995-1826-0004-443: ref=['MIGHT', 'LEARN', 'SOMETHING', 'USEFUL', 'DOWN', 'THERE'] +1995-1826-0004-443: hyp=['MIGHT', 'LEARN', 'SOMETHING', 'USEFUL', 'DOWN', 'THERE'] +1995-1826-0005-444: ref=['BUT', 'JOHN', "THERE'S", 'NO', 'SOCIETY', 'JUST', 'ELEMENTARY', 'WORK'] +1995-1826-0005-444: hyp=['BUT', 'JOHN', "THERE'S", 'NO', 'SOCIETY', 'JUST', 'ELEMENTARY', 'WORK'] +1995-1826-0006-445: ref=['BEEN', 'LOOKING', 'UP', 'TOOMS', 'COUNTY'] +1995-1826-0006-445: hyp=['BEEN', 'LOOKING', 'UP', 'TOMBS', 'COUNTY'] +1995-1826-0007-446: ref=['FIND', 'SOME', 'CRESSWELLS', 'THERE', 'BIG', 'PLANTATIONS', 'RATED', 'AT', 'TWO', 'HUNDRED', 'AND', 'FIFTY', 'THOUSAND', 'DOLLARS'] +1995-1826-0007-446: hyp=['FIND', 'SOME', 'CROSS', 'WELLS', 'THERE', 'BIG', 'PLANTATIONS', 'RATED', 'AT', 'TWO', 'HUNDRED', 'AND', 'FIFTY', 'THOUSAND', 'DOLLARS'] +1995-1826-0008-447: ref=['SOME', 'OTHERS', 'TOO', 'BIG', 'COTTON', 'COUNTY'] +1995-1826-0008-447: hyp=['SOME', 'OTHERS', 'TOO', 'BIG', 'COTTON', 'COUNTY'] +1995-1826-0009-448: ref=['YOU', 'OUGHT', 'TO', 'KNOW', 'JOHN', 'IF', 'I', 'TEACH', 'NEGROES', "I'LL", 'SCARCELY', 'SEE', 'MUCH', 'OF', 'PEOPLE', 'IN', 'MY', 'OWN', 'CLASS'] +1995-1826-0009-448: hyp=['YOU', 'OUGHT', 'TO', 'KNOW', 'JOHN', 'IF', 'I', 'TEACH', 'NEGROES', "I'LL", 'SCARCELY', 'SEE', 'MUCH', 'OF', 'PEOPLE', 'IN', 'MY', 'OWN', 'CLASS'] +1995-1826-0010-449: ref=['AT', 'ANY', 'RATE', 'I', 'SAY', 'GO'] +1995-1826-0010-449: hyp=['AT', 'ANY', 'RATE', 'I', 'SAY', 'GO'] +1995-1826-0011-450: ref=['HERE', 'SHE', 'WAS', 'TEACHING', 'DIRTY', 'CHILDREN', 'AND', 'THE', 'SMELL', 'OF', 'CONFUSED', 'ODORS', 'AND', 'BODILY', 'PERSPIRATION', 'WAS', 'TO', 'HER', 'AT', 'TIMES', 'UNBEARABLE'] +1995-1826-0011-450: hyp=['HERE', 'SHE', 'WAS', 'TEACHING', 'DIRTY', 'CHILDREN', 'AND', 'THE', 'SMELL', 'OF', 'CONFUSED', 'ODOURS', 'AND', 'BODILY', 'PERSPIRATION', 'WAS', 'TO', 'HER', 'AT', 'TIMES', 'UNBEARABLE'] +1995-1826-0012-451: ref=['SHE', 'WANTED', 'A', 'GLANCE', 'OF', 'THE', 'NEW', 'BOOKS', 'AND', 'PERIODICALS', 'AND', 'TALK', 'OF', 'GREAT', 'PHILANTHROPIES', 'AND', 'REFORMS'] +1995-1826-0012-451: hyp=['SHE', 'WANTED', 'A', 'GLANCE', 'OF', 'THE', 'NEW', 'BOOKS', 'AND', 'PERIODICALS', 'AND', 'TALK', 'OF', 'GREAT', 'PHILANTHROPIES', 'AND', 'REFORMS'] +1995-1826-0013-452: ref=['SO', 'FOR', 'THE', 'HUNDREDTH', 'TIME', 'SHE', 'WAS', 'THINKING', 'TODAY', 'AS', 'SHE', 'WALKED', 'ALONE', 'UP', 'THE', 'LANE', 'BACK', 'OF', 'THE', 'BARN', 'AND', 'THEN', 'SLOWLY', 'DOWN', 'THROUGH', 'THE', 'BOTTOMS'] +1995-1826-0013-452: hyp=['SO', 'FOR', 'THE', 'HUNDREDTH', 'TIME', 'SHE', 'WAS', 'THINKING', 'TO', 'DAY', 'AS', 'SHE', 'WALKED', 'ALONE', 'UP', 'THE', 'LANE', 'BACK', 'OF', 'THE', 'BARN', 'AND', 'THEN', 'SLOWLY', 'DOWN', 'THROUGH', 'THE', 'BOTTOMS'] +1995-1826-0014-453: ref=['COTTON', 'SHE', 'PAUSED'] +1995-1826-0014-453: hyp=['CARTON', 'SHE', 'PAUSED'] +1995-1826-0015-454: ref=['SHE', 'HAD', 'ALMOST', 'FORGOTTEN', 'THAT', 'IT', 'WAS', 'HERE', 'WITHIN', 'TOUCH', 'AND', 'SIGHT'] +1995-1826-0015-454: hyp=['SHE', 'HAD', 'ALMOST', 'FORGOTTEN', 'THAT', 'IT', 'WAS', 'HERE', 'WITHIN', 'TOUCH', 'AND', 'SIGHT'] +1995-1826-0016-455: ref=['THE', 'GLIMMERING', 'SEA', 'OF', 'DELICATE', 'LEAVES', 'WHISPERED', 'AND', 'MURMURED', 'BEFORE', 'HER', 'STRETCHING', 'AWAY', 'TO', 'THE', 'NORTHWARD'] +1995-1826-0016-455: hyp=['THE', 'GLIMMERING', 'SEA', 'OF', 'DELICATE', 'LEAVES', 'WHISPERED', 'AND', 'MURMURED', 'BEFORE', 'HER', 'STRETCHING', 'AWAY', 'TO', 'THE', 'NORTHWARD'] +1995-1826-0017-456: ref=['THERE', 'MIGHT', 'BE', 'A', 'BIT', 'OF', 'POETRY', 'HERE', 'AND', 'THERE', 'BUT', 'MOST', 'OF', 'THIS', 'PLACE', 'WAS', 'SUCH', 'DESPERATE', 'PROSE'] +1995-1826-0017-456: hyp=['THERE', 'MIGHT', 'BE', 'A', 'BIT', 'OF', 'POETRY', 'HERE', 'AND', 'THERE', 'BUT', 'MOST', 'OF', 'THIS', 'PLACE', 'WAS', 'SUCH', 'DESPERATE', 'PROSE'] +1995-1826-0018-457: ref=['HER', 'REGARD', 'SHIFTED', 'TO', 'THE', 'GREEN', 'STALKS', 'AND', 'LEAVES', 'AGAIN', 'AND', 'SHE', 'STARTED', 'TO', 'MOVE', 'AWAY'] +1995-1826-0018-457: hyp=['HER', 'REGARD', 'SHIFTED', 'TO', 'THE', 'GREEN', 'STALKS', 'AND', 'LEAVES', 'AGAIN', 'AND', 'SHE', 'STARTED', 'TO', 'MOVE', 'AWAY'] +1995-1826-0019-458: ref=['COTTON', 'IS', 'A', 'WONDERFUL', 'THING', 'IS', 'IT', 'NOT', 'BOYS', 'SHE', 'SAID', 'RATHER', 'PRIMLY'] +1995-1826-0019-458: hyp=['COTTON', 'IS', 'A', 'WONDERFUL', 'THING', 'IS', 'IT', 'NOT', 'BOYS', 'SHE', 'SAID', 'RATHER', 'PRIMLY'] +1995-1826-0020-459: ref=['MISS', 'TAYLOR', 'DID', 'NOT', 'KNOW', 'MUCH', 'ABOUT', 'COTTON', 'BUT', 'AT', 'LEAST', 'ONE', 'MORE', 'REMARK', 'SEEMED', 'CALLED', 'FOR'] +1995-1826-0020-459: hyp=['MISS', 'TAYLOR', 'DID', 'NOT', 'KNOW', 'MUCH', 'ABOUT', 'COTTON', 'BUT', 'AT', 'LEAST', 'ONE', 'MORE', 'REMARKED', 'SEEMED', 'CALLED', 'FOR'] +1995-1826-0021-460: ref=["DON'T", 'KNOW', 'WELL', 'OF', 'ALL', 'THINGS', 'INWARDLY', 'COMMENTED', 'MISS', 'TAYLOR', 'LITERALLY', 'BORN', 'IN', 'COTTON', 'AND', 'OH', 'WELL', 'AS', 'MUCH', 'AS', 'TO', 'ASK', "WHAT'S", 'THE', 'USE', 'SHE', 'TURNED', 'AGAIN', 'TO', 'GO'] +1995-1826-0021-460: hyp=["DON'T", 'KNOW', 'WELL', 'OF', 'ALL', 'THINGS', 'INWARDLY', 'COMMENTED', 'MISS', 'TAYLOR', 'LITERALLY', 'BORN', 'IN', 'COTTON', 'AND', 'OH', 'WELL', 'AS', 'MUCH', 'AS', 'TO', 'ASK', "WHAT'S", 'THE', 'USE', 'SHE', 'TURNED', 'AGAIN', 'TO', 'GO'] +1995-1826-0022-461: ref=['I', 'SUPPOSE', 'THOUGH', "IT'S", 'TOO', 'EARLY', 'FOR', 'THEM', 'THEN', 'CAME', 'THE', 'EXPLOSION'] +1995-1826-0022-461: hyp=['I', 'SUPPOSE', 'THOUGH', "IT'S", 'TOO', 'EARLY', 'FOR', 'THEM', 'THEN', 'CAME', 'THE', 'EXPLOSION'] +1995-1826-0023-462: ref=['GOOBERS', "DON'T", 'GROW', 'ON', 'THE', 'TOPS', 'OF', 'VINES', 'BUT', 'UNDERGROUND', 'ON', 'THE', 'ROOTS', 'LIKE', 'YAMS', 'IS', 'THAT', 'SO'] +1995-1826-0023-462: hyp=['GOOBBLES', "DON'T", 'GROW', 'ON', 'DE', 'TOPS', 'OF', 'VINES', 'BUT', 'ON', 'DE', 'GROUN', 'ON', 'DE', 'ROOTS', 'LIKE', 'YAMS', 'IS', 'THAT', 'SO'] +1995-1826-0024-463: ref=['THE', 'GOLDEN', 'FLEECE', "IT'S", 'THE', 'SILVER', 'FLEECE', 'HE', 'HARKENED'] +1995-1826-0024-463: hyp=['THE', 'GOLDEN', 'FLEECE', "IT'S", 'THE', 'SILVER', 'FLEECE', 'HE', 'HEARKENED'] +1995-1826-0025-464: ref=['SOME', 'TIME', "YOU'LL", 'TELL', 'ME', 'PLEASE', "WON'T", 'YOU'] +1995-1826-0025-464: hyp=['SOMETIME', "YOU'LL", 'TELL', 'ME', 'PLEASE', "WON'T", 'YOU'] +1995-1826-0026-465: ref=['NOW', 'FOR', 'ONE', 'LITTLE', 'HALF', 'HOUR', 'SHE', 'HAD', 'BEEN', 'A', 'WOMAN', 'TALKING', 'TO', 'A', 'BOY', 'NO', 'NOT', 'EVEN', 'THAT', 'SHE', 'HAD', 'BEEN', 'TALKING', 'JUST', 'TALKING', 'THERE', 'WERE', 'NO', 'PERSONS', 'IN', 'THE', 'CONVERSATION', 'JUST', 'THINGS', 'ONE', 'THING', 'COTTON'] +1995-1826-0026-465: hyp=['NOW', 'FOR', 'ONE', 'LITTLE', 'HALF', 'HOUR', 'SHE', 'HAD', 'BEEN', 'A', 'WOMAN', 'TALKING', 'TO', 'A', 'BOY', 'NO', 'NOT', 'EVEN', 'THAT', 'SHE', 'HAD', 'BEEN', 'TALKING', 'JUST', 'TALKING', 'THERE', 'WERE', 'NO', 'PERSONS', 'IN', 'THE', 'CONVERSATION', 'JUST', 'THINGS', 'ONE', 'THING', 'COTTON'] +1995-1836-0000-466: ref=['THE', 'HON', 'CHARLES', 'SMITH', 'MISS', "SARAH'S", 'BROTHER', 'WAS', 'WALKING', 'SWIFTLY', 'UPTOWN', 'FROM', 'MISTER', "EASTERLY'S", 'WALL', 'STREET', 'OFFICE', 'AND', 'HIS', 'FACE', 'WAS', 'PALE'] +1995-1836-0000-466: hyp=['THE', 'HONOURABLE', 'CHARLES', 'SMITH', 'MISS', "SARAH'S", 'BROTHER', 'WAS', 'WALKING', 'SWIFTLY', 'UP', 'TOWN', 'FROM', 'MISTER', "EASTERLY'S", 'WALL', 'STREET', 'OFFICE', 'AND', 'HIS', 'FACE', 'WAS', 'PALE'] +1995-1836-0001-467: ref=['AT', 'LAST', 'THE', 'COTTON', 'COMBINE', 'WAS', 'TO', 'ALL', 'APPEARANCES', 'AN', 'ASSURED', 'FACT', 'AND', 'HE', 'WAS', 'SLATED', 'FOR', 'THE', 'SENATE'] +1995-1836-0001-467: hyp=['AT', 'LAST', 'THE', 'COTTON', 'COMBINE', 'WAS', 'TO', 'ALL', 'APPEARANCES', 'AN', 'ASSURED', 'FACT', 'AND', 'HE', 'WAS', 'SLATED', 'FOR', 'THE', 'SENATE'] +1995-1836-0002-468: ref=['WHY', 'SHOULD', 'HE', 'NOT', 'BE', 'AS', 'OTHER', 'MEN'] +1995-1836-0002-468: hyp=['WHY', 'SHOULD', 'HE', 'NOT', 'BE', 'AS', 'OTHER', 'MEN'] +1995-1836-0003-469: ref=['SHE', 'WAS', 'NOT', 'HERSELF', 'A', 'NOTABLY', 'INTELLIGENT', 'WOMAN', 'SHE', 'GREATLY', 'ADMIRED', 'INTELLIGENCE', 'OR', 'WHATEVER', 'LOOKED', 'TO', 'HER', 'LIKE', 'INTELLIGENCE', 'IN', 'OTHERS'] +1995-1836-0003-469: hyp=['SHE', 'WAS', 'NOT', 'HERSELF', 'A', 'NOTABLY', 'INTELLIGENT', 'WOMAN', 'SHE', 'GREATLY', 'ADMIRED', 'INTELLIGENCE', 'OR', 'WHATEVER', 'LOOKED', 'TO', 'HER', 'LIKE', 'INTELLIGENCE', 'IN', 'OTHERS'] +1995-1836-0004-470: ref=['AS', 'SHE', 'AWAITED', 'HER', 'GUESTS', 'SHE', 'SURVEYED', 'THE', 'TABLE', 'WITH', 'BOTH', 'SATISFACTION', 'AND', 'DISQUIETUDE', 'FOR', 'HER', 'SOCIAL', 'FUNCTIONS', 'WERE', 'FEW', 'TONIGHT', 'THERE', 'WERE', 'SHE', 'CHECKED', 'THEM', 'OFF', 'ON', 'HER', 'FINGERS', 'SIR', 'JAMES', 'CREIGHTON', 'THE', 'RICH', 'ENGLISH', 'MANUFACTURER', 'AND', 'LADY', 'CREIGHTON', 'MISTER', 'AND', 'MISSUS', 'VANDERPOOL', 'MISTER', 'HARRY', 'CRESSWELL', 'AND', 'HIS', 'SISTER', 'JOHN', 'TAYLOR', 'AND', 'HIS', 'SISTER', 'AND', 'MISTER', 'CHARLES', 'SMITH', 'WHOM', 'THE', 'EVENING', 'PAPERS', 'MENTIONED', 'AS', 'LIKELY', 'TO', 'BE', 'UNITED', 'STATES', 'SENATOR', 'FROM', 'NEW', 'JERSEY', 'A', 'SELECTION', 'OF', 'GUESTS', 'THAT', 'HAD', 'BEEN', 'DETERMINED', 'UNKNOWN', 'TO', 'THE', 'HOSTESS', 'BY', 'THE', 'MEETING', 'OF', 'COTTON', 'INTERESTS', 'EARLIER', 'IN', 'THE', 'DAY'] +1995-1836-0004-470: hyp=['AS', 'SHE', 'AWAITED', 'HER', 'GUEST', 'SHE', 'SURVEYED', 'THE', 'TABLE', 'WITH', 'BOTH', 'SATISFACTION', 'AND', 'DISQUIETUDE', 'FOR', 'HER', 'SOCIAL', 'FUNCTIONS', 'WERE', 'FEW', 'TO', 'NIGHT', 'THERE', 'WERE', 'SHE', 'CHECKED', 'THEM', 'OFF', 'ON', 'HER', 'FINGERS', 'SIR', 'JAMES', 'CRIGHTON', 'THE', 'RICH', 'ENGLISH', 'MANUFACTURER', 'AND', 'LADY', 'CRYIGHTON', 'MISTER', 'AND', 'MISSUS', 'VANERPOLE', 'MISTER', 'HARRY', 'CRASWELL', 'AND', 'HIS', 'SISTER', 'JOHN', 'TAYLOR', 'AND', 'HIS', 'SISTER', 'AND', 'MISTER', 'CHARLESS', 'SMITH', 'WHOM', 'THE', 'EVENING', 'PAPERS', 'MENTIONED', 'AS', 'LIKELY', 'TO', 'BE', 'UNITED', 'STATES', 'SENATOR', 'FROM', 'NEW', 'JERSEY', 'A', 'SELECTION', 'OF', 'GUESTS', 'THAT', 'HAD', 'BEEN', 'DETERMINED', 'UNKNOWN', 'TO', 'THE', 'HOSTESS', 'BY', 'THE', 'MEETING', 'OF', 'COTTON', 'INTERESTS', 'EARLIER', 'IN', 'THE', 'DAY'] +1995-1836-0005-471: ref=['MISSUS', 'GREY', 'HAD', 'MET', 'SOUTHERNERS', 'BEFORE', 'BUT', 'NOT', 'INTIMATELY', 'AND', 'SHE', 'ALWAYS', 'HAD', 'IN', 'MIND', 'VIVIDLY', 'THEIR', 'CRUELTY', 'TO', 'POOR', 'NEGROES', 'A', 'SUBJECT', 'SHE', 'MADE', 'A', 'POINT', 'OF', 'INTRODUCING', 'FORTHWITH'] +1995-1836-0005-471: hyp=['MISSUS', 'GRAY', 'HAD', 'MET', 'SOUTHERNERS', 'BEFORE', 'BUT', 'NOT', 'INTIMATELY', 'AND', 'SHE', 'ALWAYS', 'HAD', 'IN', 'MIND', 'VIVIDLY', 'THEIR', 'CRUELTY', 'TO', 'POOR', 'NEGROES', 'A', 'SUBJECT', 'SHE', 'MADE', 'A', 'POINT', 'OF', 'INTRODUCING', 'FORTHWITH'] +1995-1836-0006-472: ref=['SHE', 'WAS', 'THEREFORE', 'MOST', 'AGREEABLY', 'SURPRISED', 'TO', 'HEAR', 'MISTER', 'CRESSWELL', 'EXPRESS', 'HIMSELF', 'SO', 'CORDIALLY', 'AS', 'APPROVING', 'OF', 'NEGRO', 'EDUCATION'] +1995-1836-0006-472: hyp=['SHE', 'WAS', 'THEREFORE', 'MOST', 'AGREEABLY', 'SURPRISED', 'TO', 'HEAR', 'MISTER', 'CRASWELL', 'EXPRESS', 'HIMSELF', 'SO', 'CORDIALLY', 'AS', 'APPROVING', 'OF', 'NEGRO', 'EDUCATION'] +1995-1836-0007-473: ref=['BUT', 'YOU', 'BELIEVE', 'IN', 'SOME', 'EDUCATION', 'ASKED', 'MARY', 'TAYLOR'] +1995-1836-0007-473: hyp=['DO', 'YOU', 'BELIEVE', 'IN', 'SOME', 'EDUCATION', 'ASKED', 'MARY', 'TAYLOR'] +1995-1836-0008-474: ref=['I', 'BELIEVE', 'IN', 'THE', 'TRAINING', 'OF', 'PEOPLE', 'TO', 'THEIR', 'HIGHEST', 'CAPACITY', 'THE', 'ENGLISHMAN', 'HERE', 'HEARTILY', 'SECONDED', 'HIM'] +1995-1836-0008-474: hyp=['I', 'BELIEVE', 'IN', 'THE', 'TRAINING', 'OF', 'PEOPLE', 'TO', 'THE', 'HIGHEST', 'CAPACITY', 'THE', 'ENGLISHMAN', 'HERE', 'HEARTILY', 'SECONDED', 'HIM'] +1995-1836-0009-475: ref=['BUT', 'CRESSWELL', 'ADDED', 'SIGNIFICANTLY', 'CAPACITY', 'DIFFERS', 'ENORMOUSLY', 'BETWEEN', 'RACES'] +1995-1836-0009-475: hyp=['BUT', 'CRASWELL', 'ADDED', 'SIGNIFICANTLY', 'CAPACITY', 'DIFFERS', 'ENORMOUSLY', 'BETWEEN', 'RACES'] +1995-1836-0010-476: ref=['THE', 'VANDERPOOLS', 'WERE', 'SURE', 'OF', 'THIS', 'AND', 'THE', 'ENGLISHMAN', 'INSTANCING', 'INDIA', 'BECAME', 'QUITE', 'ELOQUENT', 'MISSUS', 'GREY', 'WAS', 'MYSTIFIED', 'BUT', 'HARDLY', 'DARED', 'ADMIT', 'IT', 'THE', 'GENERAL', 'TREND', 'OF', 'THE', 'CONVERSATION', 'SEEMED', 'TO', 'BE', 'THAT', 'MOST', 'INDIVIDUALS', 'NEEDED', 'TO', 'BE', 'SUBMITTED', 'TO', 'THE', 'SHARPEST', 'SCRUTINY', 'BEFORE', 'BEING', 'ALLOWED', 'MUCH', 'EDUCATION', 'AND', 'AS', 'FOR', 'THE', 'LOWER', 'RACES', 'IT', 'WAS', 'SIMPLY', 'CRIMINAL', 'TO', 'OPEN', 'SUCH', 'USELESS', 'OPPORTUNITIES', 'TO', 'THEM'] +1995-1836-0010-476: hyp=['THE', 'VAN', 'DERPOOLS', 'WERE', 'SURE', 'OF', 'THIS', 'AND', 'THE', 'ENGLISHMAN', 'INSTANCING', 'INDIA', 'BECAME', 'QUITE', 'ELOQUENT', 'MISSUS', 'GRAY', 'WAS', 'MYSTIFIED', 'BUT', 'HARDLY', 'DARED', 'ADMIT', 'IT', 'THE', 'GENERAL', 'TREND', 'OF', 'THE', 'CONVERSATION', 'SEEMED', 'TO', 'BE', 'THAT', 'MOST', 'INDIVIDUALS', 'NEEDED', 'TO', 'BE', 'SUBMITTED', 'TO', 'THE', 'SHARPEST', 'SCRUTINY', 'BEFORE', 'BEING', 'ALLOWED', 'MUCH', 'EDUCATION', 'AND', 'AS', 'FOR', 'THE', 'LOWER', 'RACES', 'IT', 'WAS', 'SIMPLY', 'CRIMINAL', 'TO', 'OPEN', 'SUCH', 'USELESS', 'OPPORTUNITIES', 'TO', 'THEM'] +1995-1836-0011-477: ref=['POSITIVELY', 'HEROIC', 'ADDED', 'CRESSWELL', 'AVOIDING', 'HIS', "SISTER'S", 'EYES'] +1995-1836-0011-477: hyp=['POSITIVELY', 'HEROIC', 'ADDED', 'CRASWELL', 'AVOIDING', 'HIS', "SISTER'S", 'EYES'] +1995-1836-0012-478: ref=['BUT', "WE'RE", 'NOT', 'ER', 'EXACTLY', 'WELCOMED'] +1995-1836-0012-478: hyp=['BUT', "WE'RE", 'NOT', 'A', 'EXACTLY', 'WELCOME'] +1995-1836-0013-479: ref=['MARY', 'TAYLOR', 'HOWEVER', 'RELATED', 'THE', 'TALE', 'OF', 'ZORA', 'TO', 'MISSUS', "GREY'S", 'PRIVATE', 'EAR', 'LATER'] +1995-1836-0013-479: hyp=['MARY', 'TAYLOR', 'HOWEVER', 'RELATED', 'THE', 'TALE', 'OF', 'ZORA', 'TO', 'MISSUS', "GRAY'S", 'PRIVATE', 'EAR', 'LATER'] +1995-1836-0014-480: ref=['FORTUNATELY', 'SAID', 'MISTER', 'VANDERPOOL', 'NORTHERNERS', 'AND', 'SOUTHERNERS', 'ARE', 'ARRIVING', 'AT', 'A', 'BETTER', 'MUTUAL', 'UNDERSTANDING', 'ON', 'MOST', 'OF', 'THESE', 'MATTERS'] +1995-1836-0014-480: hyp=['FORTUNATELY', 'SAID', 'MISTER', 'VANERPOLE', 'NORTHERNERS', 'AND', 'SOUTHERNERS', 'ARE', 'ARRIVING', 'AT', 'A', 'BETTER', 'MUTUAL', 'UNDERSTANDING', 'ON', 'MOST', 'OF', 'THESE', 'MATTERS'] +1995-1837-0000-481: ref=['HE', 'KNEW', 'THE', 'SILVER', 'FLEECE', 'HIS', 'AND', "ZORA'S", 'MUST', 'BE', 'RUINED'] +1995-1837-0000-481: hyp=['HE', 'KNEW', 'THE', 'SILVER', 'FLEECE', 'HIS', 'AND', "TSORA'S", 'MUST', 'BE', 'RUINED'] +1995-1837-0001-482: ref=['IT', 'WAS', 'THE', 'FIRST', 'GREAT', 'SORROW', 'OF', 'HIS', 'LIFE', 'IT', 'WAS', 'NOT', 'SO', 'MUCH', 'THE', 'LOSS', 'OF', 'THE', 'COTTON', 'ITSELF', 'BUT', 'THE', 'FANTASY', 'THE', 'HOPES', 'THE', 'DREAMS', 'BUILT', 'AROUND', 'IT'] +1995-1837-0001-482: hyp=['IT', 'WAS', 'THE', 'FIRST', 'GREAT', 'SORROW', 'OF', 'HIS', 'LIFE', 'IT', 'WAS', 'NOT', 'SO', 'MUCH', 'THE', 'LOSS', 'OF', 'THE', 'COTTON', 'ITSELF', 'BUT', 'THE', 'FANTASY', 'THE', 'HOPES', 'THE', 'DREAMS', 'BUILT', 'AROUND', 'IT'] +1995-1837-0002-483: ref=['AH', 'THE', 'SWAMP', 'THE', 'CRUEL', 'SWAMP'] +1995-1837-0002-483: hyp=['AH', 'THE', 'SWAMP', 'THE', 'CRUEL', 'SWAMP'] +1995-1837-0003-484: ref=['THE', 'REVELATION', 'OF', 'HIS', 'LOVE', 'LIGHTED', 'AND', 'BRIGHTENED', 'SLOWLY', 'TILL', 'IT', 'FLAMED', 'LIKE', 'A', 'SUNRISE', 'OVER', 'HIM', 'AND', 'LEFT', 'HIM', 'IN', 'BURNING', 'WONDER'] +1995-1837-0003-484: hyp=['THE', 'REVELATION', 'OF', 'HIS', 'LOVE', 'LIGHTED', 'AND', 'BRIGHTENED', 'SLOWLY', 'TILL', 'IT', 'FLAMED', 'LIKE', 'A', 'SUNRISE', 'OVER', 'HIM', 'AND', 'LEFT', 'HIM', 'IN', 'BURNING', 'WONDER'] +1995-1837-0004-485: ref=['HE', 'PANTED', 'TO', 'KNOW', 'IF', 'SHE', 'TOO', 'KNEW', 'OR', 'KNEW', 'AND', 'CARED', 'NOT', 'OR', 'CARED', 'AND', 'KNEW', 'NOT'] +1995-1837-0004-485: hyp=['HE', 'PANTED', 'TO', 'KNOW', 'IF', 'SHE', 'TOO', 'KNEW', 'OR', 'KNEW', 'AND', 'CARED', 'NOT', 'OR', 'CARED', 'AND', 'KNEW', 'NOT'] +1995-1837-0005-486: ref=['SHE', 'WAS', 'SO', 'STRANGE', 'AND', 'HUMAN', 'A', 'CREATURE'] +1995-1837-0005-486: hyp=['SHE', 'WAS', 'SO', 'STRANGE', 'AND', 'HUMAN', 'A', 'CREATURE'] +1995-1837-0006-487: ref=['THE', 'WORLD', 'WAS', 'WATER', 'VEILED', 'IN', 'MISTS'] +1995-1837-0006-487: hyp=['THE', 'WORLD', 'WAS', 'WATER', 'VEILED', 'IN', 'MISTS'] +1995-1837-0007-488: ref=['THEN', 'OF', 'A', 'SUDDEN', 'AT', 'MIDDAY', 'THE', 'SUN', 'SHOT', 'OUT', 'HOT', 'AND', 'STILL', 'NO', 'BREATH', 'OF', 'AIR', 'STIRRED', 'THE', 'SKY', 'WAS', 'LIKE', 'BLUE', 'STEEL', 'THE', 'EARTH', 'STEAMED'] +1995-1837-0007-488: hyp=['THEN', 'OF', 'A', 'SUDDEN', 'AT', 'MIDDAY', 'THE', 'SUN', 'SHOT', 'OUT', 'HOT', 'AND', 'STILL', 'NO', 'BREATH', 'OF', 'AIR', 'STIRRED', 'THE', 'SKY', 'WAS', 'LIKE', 'BLUE', 'STEEL', 'THE', 'EARTH', 'STEAMED'] +1995-1837-0008-489: ref=['WHERE', 'WAS', 'THE', 'USE', 'OF', 'IMAGINING'] +1995-1837-0008-489: hyp=['WHERE', 'WAS', 'THE', 'USE', 'OF', 'IMAGINING'] +1995-1837-0009-490: ref=['THE', 'LAGOON', 'HAD', 'BEEN', 'LEVEL', 'WITH', 'THE', 'DYKES', 'A', 'WEEK', 'AGO', 'AND', 'NOW'] +1995-1837-0009-490: hyp=['THE', 'LAGOON', 'HAD', 'BEEN', 'LEVEL', 'WITH', 'THE', 'DIKES', 'A', 'WEEK', 'AGO', 'AND', 'NOW'] +1995-1837-0010-491: ref=['PERHAPS', 'SHE', 'TOO', 'MIGHT', 'BE', 'THERE', 'WAITING', 'WEEPING'] +1995-1837-0010-491: hyp=['PERHAPS', 'SHE', 'TOO', 'MIGHT', 'BE', 'THERE', 'WAITING', 'WEEPING'] +1995-1837-0011-492: ref=['HE', 'STARTED', 'AT', 'THE', 'THOUGHT', 'HE', 'HURRIED', 'FORTH', 'SADLY'] +1995-1837-0011-492: hyp=['HE', 'STARTED', 'AT', 'THE', 'THOUGHT', 'HE', 'HURRIED', 'FORTH', 'SADLY'] +1995-1837-0012-493: ref=['HE', 'SPLASHED', 'AND', 'STAMPED', 'ALONG', 'FARTHER', 'AND', 'FARTHER', 'ONWARD', 'UNTIL', 'HE', 'NEARED', 'THE', 'RAMPART', 'OF', 'THE', 'CLEARING', 'AND', 'PUT', 'FOOT', 'UPON', 'THE', 'TREE', 'BRIDGE'] +1995-1837-0012-493: hyp=['HE', 'SPLASHED', 'AND', 'STAMPED', 'ALONG', 'FARTHER', 'AND', 'FARTHER', 'ONWARD', 'UNTIL', 'HE', 'NEARED', 'THE', 'RAMPART', 'OF', 'THE', 'CLEARING', 'AND', 'PUT', 'FOOT', 'UPON', 'THE', 'TREE', 'BRIDGE'] +1995-1837-0013-494: ref=['THEN', 'HE', 'LOOKED', 'DOWN', 'THE', 'LAGOON', 'WAS', 'DRY'] +1995-1837-0013-494: hyp=['THEN', 'HE', 'LOOKED', 'DOWN', 'THE', 'LAGOON', 'WAS', 'DRY'] +1995-1837-0014-495: ref=['HE', 'STOOD', 'A', 'MOMENT', 'BEWILDERED', 'THEN', 'TURNED', 'AND', 'RUSHED', 'UPON', 'THE', 'ISLAND', 'A', 'GREAT', 'SHEET', 'OF', 'DAZZLING', 'SUNLIGHT', 'SWEPT', 'THE', 'PLACE', 'AND', 'BENEATH', 'LAY', 'A', 'MIGHTY', 'MASS', 'OF', 'OLIVE', 'GREEN', 'THICK', 'TALL', 'WET', 'AND', 'WILLOWY'] +1995-1837-0014-495: hyp=['HE', 'STOOD', 'A', 'MOMENT', 'BEWILDERED', 'THEN', 'TURNED', 'AND', 'RUSHED', 'UPON', 'THE', 'ISLAND', 'A', 'GREAT', 'SHEET', 'OF', 'DAZZLING', 'SUNLIGHT', 'SWEPT', 'THE', 'PLACE', 'AND', 'BENEATH', 'LAY', 'A', 'MIGHTY', 'MASS', 'OF', 'OLIVE', 'GREEN', 'THICK', 'TALL', 'WET', 'AND', 'WILLOWY'] +1995-1837-0015-496: ref=['THE', 'SQUARES', 'OF', 'COTTON', 'SHARP', 'EDGED', 'HEAVY', 'WERE', 'JUST', 'ABOUT', 'TO', 'BURST', 'TO', 'BOLLS'] +1995-1837-0015-496: hyp=['THE', 'SQUARES', 'OF', 'COTTON', 'SHARP', 'EDGED', 'HEAVY', 'WERE', 'JUST', 'ABOUT', 'TO', 'BURST', 'TO', 'BOWLS'] +1995-1837-0016-497: ref=['FOR', 'ONE', 'LONG', 'MOMENT', 'HE', 'PAUSED', 'STUPID', 'AGAPE', 'WITH', 'UTTER', 'AMAZEMENT', 'THEN', 'LEANED', 'DIZZILY', 'AGAINST', 'A', 'TREE'] +1995-1837-0016-497: hyp=['FOR', 'ONE', 'LONG', 'MOMENT', 'HE', 'PAUSED', 'STUPID', 'AGAPE', 'WITH', 'UTTER', 'AMAZEMENT', 'THEN', 'LEANED', 'DIZZILY', 'AGAINST', 'A', 'TREE'] +1995-1837-0017-498: ref=['HE', 'GAZED', 'ABOUT', 'PERPLEXED', 'ASTONISHED'] +1995-1837-0017-498: hyp=['HE', 'GAZED', 'ABOUT', 'PERPLEXED', 'ASTONISHED'] +1995-1837-0018-499: ref=['HERE', 'LAY', 'THE', 'READING', 'OF', 'THE', 'RIDDLE', 'WITH', 'INFINITE', 'WORK', 'AND', 'PAIN', 'SOME', 'ONE', 'HAD', 'DUG', 'A', 'CANAL', 'FROM', 'THE', 'LAGOON', 'TO', 'THE', 'CREEK', 'INTO', 'WHICH', 'THE', 'FORMER', 'HAD', 'DRAINED', 'BY', 'A', 'LONG', 'AND', 'CROOKED', 'WAY', 'THUS', 'ALLOWING', 'IT', 'TO', 'EMPTY', 'DIRECTLY'] +1995-1837-0018-499: hyp=['HERE', 'LAY', 'THE', 'READING', 'OF', 'THE', 'RIDDLE', 'WITH', 'INFINITE', 'WORK', 'AND', 'PAINS', 'SOME', 'ONE', 'HAD', 'DUG', 'A', 'CANAL', 'FROM', 'THE', 'LAGOON', 'TO', 'THE', 'CREEK', 'INTO', 'WHICH', 'THE', 'FORMER', 'HAD', 'DRAINED', 'BY', 'A', 'LONG', 'AND', 'CROOKED', 'WAY', 'THUS', 'ALLOWING', 'IT', 'TO', 'EMPTY', 'DIRECTLY'] +1995-1837-0019-500: ref=['HE', 'SAT', 'DOWN', 'WEAK', 'BEWILDERED', 'AND', 'ONE', 'THOUGHT', 'WAS', 'UPPERMOST', 'ZORA'] +1995-1837-0019-500: hyp=['HE', 'SAT', 'DOWN', 'WEAK', 'BEWILDERED', 'AND', 'ONE', 'THOUGHT', 'WAS', 'UPPERMOST', 'SORA'] +1995-1837-0020-501: ref=['THE', 'YEARS', 'OF', 'THE', 'DAYS', 'OF', 'HER', 'DYING', 'WERE', 'TEN'] +1995-1837-0020-501: hyp=['THE', 'YEARS', 'OF', 'THE', 'DAYS', 'OF', 'HER', 'DYING', 'WERE', 'TEN'] +1995-1837-0021-502: ref=['THE', 'HOPE', 'AND', 'DREAM', 'OF', 'HARVEST', 'WAS', 'UPON', 'THE', 'LAND'] +1995-1837-0021-502: hyp=['THE', 'HOPE', 'AND', 'DREAM', 'OF', 'HARVEST', 'WAS', 'UPON', 'THE', 'LAND'] +1995-1837-0022-503: ref=['UP', 'IN', 'THE', 'SICK', 'ROOM', 'ZORA', 'LAY', 'ON', 'THE', 'LITTLE', 'WHITE', 'BED'] +1995-1837-0022-503: hyp=['UP', 'IN', 'THE', 'SICK', 'ROOM', 'TSORA', 'LAY', 'ON', 'THE', 'LITTLE', 'WHITE', 'BED'] +1995-1837-0023-504: ref=['THE', 'NET', 'AND', 'WEB', 'OF', 'ENDLESS', 'THINGS', 'HAD', 'BEEN', 'CRAWLING', 'AND', 'CREEPING', 'AROUND', 'HER', 'SHE', 'HAD', 'STRUGGLED', 'IN', 'DUMB', 'SPEECHLESS', 'TERROR', 'AGAINST', 'SOME', 'MIGHTY', 'GRASPING', 'THAT', 'STROVE', 'FOR', 'HER', 'LIFE', 'WITH', 'GNARLED', 'AND', 'CREEPING', 'FINGERS', 'BUT', 'NOW', 'AT', 'LAST', 'WEAKLY', 'SHE', 'OPENED', 'HER', 'EYES', 'AND', 'QUESTIONED'] +1995-1837-0023-504: hyp=['THE', 'NET', 'AND', 'WEB', 'OF', 'ENDLESS', 'THINGS', 'HAD', 'BEEN', 'CRAWLING', 'AND', 'CREEPING', 'AROUND', 'HER', 'SHE', 'HAD', 'STRUGGLED', 'IN', 'DUMB', 'SPEECHLESS', 'TERROR', 'AGAINST', 'SOME', 'MIGHTY', 'GRASPING', 'THAT', 'STROVE', 'FOR', 'HER', 'LIFE', 'WITH', 'GNARLED', 'AND', 'CREEPING', 'FINGERS', 'BUT', 'NOW', 'AT', 'LAST', 'WEAKLY', 'SHE', 'OPENED', 'HER', 'EYES', 'AND', 'QUESTIONED'] +1995-1837-0024-505: ref=['FOR', 'A', 'WHILE', 'SHE', 'LAY', 'IN', 'HER', 'CHAIR', 'IN', 'HAPPY', 'DREAMY', 'PLEASURE', 'AT', 'SUN', 'AND', 'BIRD', 'AND', 'TREE'] +1995-1837-0024-505: hyp=['FOR', 'A', 'WHILE', 'SHE', 'LAY', 'IN', 'HER', 'CHAIR', 'IN', 'HAPPY', 'DREAMY', 'PLEASURE', 'AT', 'SUN', 'AND', 'BIRD', 'AND', 'TREE'] +1995-1837-0025-506: ref=['SHE', 'ROSE', 'WITH', 'A', 'FLEETING', 'GLANCE', 'GATHERED', 'THE', 'SHAWL', 'ROUND', 'HER', 'THEN', 'GLIDING', 'FORWARD', 'WAVERING', 'TREMULOUS', 'SLIPPED', 'ACROSS', 'THE', 'ROAD', 'AND', 'INTO', 'THE', 'SWAMP'] +1995-1837-0025-506: hyp=['SHE', 'ROSE', 'WITH', 'A', 'FLEETING', 'GLANCE', 'GATHERED', 'THE', 'SHAWL', 'AROUND', 'HER', 'THEN', 'GLIDING', 'FORWARD', 'WAVERING', 'TREMULOUS', 'SLIPPED', 'ACROSS', 'THE', 'ROAD', 'AND', 'INTO', 'THE', 'SWAMP'] +1995-1837-0026-507: ref=['SHE', 'HAD', 'BEEN', 'BORN', 'WITHIN', 'ITS', 'BORDERS', 'WITHIN', 'ITS', 'BORDERS', 'SHE', 'HAD', 'LIVED', 'AND', 'GROWN', 'AND', 'WITHIN', 'ITS', 'BORDERS', 'SHE', 'HAD', 'MET', 'HER', 'LOVE'] +1995-1837-0026-507: hyp=['SHE', 'HAD', 'BEEN', 'BORN', 'WITHIN', 'ITS', 'BORDERS', 'WITHIN', 'ITS', 'BORDERS', 'SHE', 'HAD', 'LIVED', 'AND', 'GROWN', 'AND', 'WITHIN', 'ITS', 'BORDERS', 'SHE', 'HAD', 'MET', 'HER', 'LOVE'] +1995-1837-0027-508: ref=['ON', 'SHE', 'HURRIED', 'UNTIL', 'SWEEPING', 'DOWN', 'TO', 'THE', 'LAGOON', 'AND', 'THE', 'ISLAND', 'LO', 'THE', 'COTTON', 'LAY', 'BEFORE', 'HER'] +1995-1837-0027-508: hyp=['ON', 'SHE', 'HURRIED', 'UNTIL', 'SWEEPING', 'DOWN', 'TO', 'THE', 'LAGOON', 'AND', 'THE', 'ISLAND', 'LO', 'THE', 'COTTON', 'LAY', 'BEFORE', 'HER'] +1995-1837-0028-509: ref=['THE', 'CHAIR', 'WAS', 'EMPTY', 'BUT', 'HE', 'KNEW'] +1995-1837-0028-509: hyp=['THE', 'CHAIR', 'WAS', 'EMPTY', 'BUT', 'HE', 'KNEW'] +1995-1837-0029-510: ref=['HE', 'DARTED', 'THROUGH', 'THE', 'TREES', 'AND', 'PAUSED', 'A', 'TALL', 'MAN', 'STRONGLY', 'BUT', 'SLIMLY', 'MADE'] +1995-1837-0029-510: hyp=['HE', 'DARTED', 'THROUGH', 'THE', 'TREES', 'AND', 'PAUSED', 'A', 'TALL', 'MAN', 'STRONGLY', 'BUT', 'SLIMLY', 'MADE'] +2094-142345-0000-511: ref=['IT', 'IS', 'A', 'VERY', 'FINE', 'OLD', 'PLACE', 'OF', 'RED', 'BRICK', 'SOFTENED', 'BY', 'A', 'PALE', 'POWDERY', 'LICHEN', 'WHICH', 'HAS', 'DISPERSED', 'ITSELF', 'WITH', 'HAPPY', 'IRREGULARITY', 'SO', 'AS', 'TO', 'BRING', 'THE', 'RED', 'BRICK', 'INTO', 'TERMS', 'OF', 'FRIENDLY', 'COMPANIONSHIP', 'WITH', 'THE', 'LIMESTONE', 'ORNAMENTS', 'SURROUNDING', 'THE', 'THREE', 'GABLES', 'THE', 'WINDOWS', 'AND', 'THE', 'DOOR', 'PLACE'] +2094-142345-0000-511: hyp=['IT', 'IS', 'A', 'VERY', 'FINE', 'OLD', 'PLACE', 'OF', 'RED', 'BRICK', 'SOFTENED', 'BY', 'A', 'PALE', 'POWDERY', 'LICHEN', 'WHICH', 'HAS', 'DISPERSED', 'ITSELF', 'WITH', 'HAPPY', 'IRREGULARITY', 'SO', 'AS', 'TO', 'BRING', 'THE', 'RED', 'BRICK', 'INTO', 'TERMS', 'OF', 'FRIENDLY', 'COMPANIONSHIP', 'WITH', 'THE', 'LIMESTONE', 'ORNAMENTS', 'SURROUNDING', 'THE', 'THREE', 'GABLES', 'THE', 'WINDOWS', 'AND', 'THE', 'DOOR', 'PLACE'] +2094-142345-0001-512: ref=['BUT', 'THE', 'WINDOWS', 'ARE', 'PATCHED', 'WITH', 'WOODEN', 'PANES', 'AND', 'THE', 'DOOR', 'I', 'THINK', 'IS', 'LIKE', 'THE', 'GATE', 'IT', 'IS', 'NEVER', 'OPENED'] +2094-142345-0001-512: hyp=['BUT', 'THE', 'WINDOWS', 'ARE', 'PATCHED', 'WITH', 'WOODEN', 'PANES', 'AND', 'THE', 'DOOR', 'I', 'THINK', 'IS', 'LIKE', 'THE', 'GATE', 'IT', 'IS', 'NEVER', 'OPENED'] +2094-142345-0002-513: ref=['FOR', 'IT', 'IS', 'A', 'SOLID', 'HEAVY', 'HANDSOME', 'DOOR', 'AND', 'MUST', 'ONCE', 'HAVE', 'BEEN', 'IN', 'THE', 'HABIT', 'OF', 'SHUTTING', 'WITH', 'A', 'SONOROUS', 'BANG', 'BEHIND', 'A', 'LIVERIED', 'LACKEY', 'WHO', 'HAD', 'JUST', 'SEEN', 'HIS', 'MASTER', 'AND', 'MISTRESS', 'OFF', 'THE', 'GROUNDS', 'IN', 'A', 'CARRIAGE', 'AND', 'PAIR'] +2094-142345-0002-513: hyp=['FOR', 'IT', 'IS', 'A', 'SOLID', 'HEAVY', 'HANDSOME', 'DOOR', 'AND', 'MUST', 'ONCE', 'HAVE', 'BEEN', 'IN', 'THE', 'HABIT', 'OF', 'SHUTTING', 'WITH', 'A', 'SONOROUS', 'BANG', 'BEHIND', 'THE', 'LIVERIED', 'LACKEY', 'WHO', 'HAD', 'JUST', 'SEEN', 'HIS', 'MASTER', 'AND', 'MISTRESS', 'OFF', 'THE', 'GROUNDS', 'IN', 'A', 'CARRIAGE', 'AND', 'PAIR'] +2094-142345-0003-514: ref=['A', 'LARGE', 'OPEN', 'FIREPLACE', 'WITH', 'RUSTY', 'DOGS', 'IN', 'IT', 'AND', 'A', 'BARE', 'BOARDED', 'FLOOR', 'AT', 'THE', 'FAR', 'END', 'FLEECES', 'OF', 'WOOL', 'STACKED', 'UP', 'IN', 'THE', 'MIDDLE', 'OF', 'THE', 'FLOOR', 'SOME', 'EMPTY', 'CORN', 'BAGS'] +2094-142345-0003-514: hyp=['A', 'LARGE', 'OPEN', 'FIREPLACE', 'WITH', 'RUSTY', 'DOGS', 'IN', 'IT', 'AND', 'A', 'BARE', 'BOARDED', 'FLOOR', 'AT', 'THE', 'FAR', 'END', 'FLEECES', 'OF', 'WOOL', 'STACKED', 'UP', 'IN', 'THE', 'MIDDLE', 'OF', 'THE', 'FLOOR', 'SOME', 'EMPTY', 'CORN', 'BAGS'] +2094-142345-0004-515: ref=['AND', 'WHAT', 'THROUGH', 'THE', 'LEFT', 'HAND', 'WINDOW'] +2094-142345-0004-515: hyp=['AND', 'WHAT', 'THROUGH', 'THE', 'LEFT', 'HAND', 'WINDOW'] +2094-142345-0005-516: ref=['SEVERAL', 'CLOTHES', 'HORSES', 'A', 'PILLION', 'A', 'SPINNING', 'WHEEL', 'AND', 'AN', 'OLD', 'BOX', 'WIDE', 'OPEN', 'AND', 'STUFFED', 'FULL', 'OF', 'COLOURED', 'RAGS'] +2094-142345-0005-516: hyp=['SEVERAL', 'CLOTHES', 'HORSES', 'A', 'PILLION', 'A', 'SPINNING', 'WHEEL', 'AND', 'AN', 'OLD', 'BOX', 'WIDE', 'OPEN', 'AND', 'STUFFED', 'FULL', 'OF', 'COLOURED', 'RAGS'] +2094-142345-0006-517: ref=['AT', 'THE', 'EDGE', 'OF', 'THIS', 'BOX', 'THERE', 'LIES', 'A', 'GREAT', 'WOODEN', 'DOLL', 'WHICH', 'SO', 'FAR', 'AS', 'MUTILATION', 'IS', 'CONCERNED', 'BEARS', 'A', 'STRONG', 'RESEMBLANCE', 'TO', 'THE', 'FINEST', 'GREEK', 'SCULPTURE', 'AND', 'ESPECIALLY', 'IN', 'THE', 'TOTAL', 'LOSS', 'OF', 'ITS', 'NOSE'] +2094-142345-0006-517: hyp=['AT', 'THE', 'EDGE', 'OF', 'THIS', 'BOX', 'THERE', 'LIES', 'A', 'GREAT', 'WOODEN', 'DOLL', 'WHICH', 'SO', 'FAR', 'AS', 'MUTILATION', 'IS', 'CONCERNED', 'BEARS', 'A', 'STRONG', 'RESEMBLANCE', 'TO', 'THE', 'FINEST', 'GREEK', 'SCULPTURE', 'AND', 'ESPECIALLY', 'IN', 'THE', 'TOTAL', 'LOSS', 'OF', 'ITS', 'NOSE'] +2094-142345-0007-518: ref=['THE', 'HISTORY', 'OF', 'THE', 'HOUSE', 'IS', 'PLAIN', 'NOW'] +2094-142345-0007-518: hyp=['THE', 'HISTORY', 'OF', 'THE', 'HOUSE', 'IS', 'PLAIN', 'NOW'] +2094-142345-0008-519: ref=['BUT', 'THERE', 'IS', 'ALWAYS', 'A', 'STRONGER', 'SENSE', 'OF', 'LIFE', 'WHEN', 'THE', 'SUN', 'IS', 'BRILLIANT', 'AFTER', 'RAIN', 'AND', 'NOW', 'HE', 'IS', 'POURING', 'DOWN', 'HIS', 'BEAMS', 'AND', 'MAKING', 'SPARKLES', 'AMONG', 'THE', 'WET', 'STRAW', 'AND', 'LIGHTING', 'UP', 'EVERY', 'PATCH', 'OF', 'VIVID', 'GREEN', 'MOSS', 'ON', 'THE', 'RED', 'TILES', 'OF', 'THE', 'COW', 'SHED', 'AND', 'TURNING', 'EVEN', 'THE', 'MUDDY', 'WATER', 'THAT', 'IS', 'HURRYING', 'ALONG', 'THE', 'CHANNEL', 'TO', 'THE', 'DRAIN', 'INTO', 'A', 'MIRROR', 'FOR', 'THE', 'YELLOW', 'BILLED', 'DUCKS', 'WHO', 'ARE', 'SEIZING', 'THE', 'OPPORTUNITY', 'OF', 'GETTING', 'A', 'DRINK', 'WITH', 'AS', 'MUCH', 'BODY', 'IN', 'IT', 'AS', 'POSSIBLE'] +2094-142345-0008-519: hyp=['BUT', 'THERE', 'IS', 'ALWAYS', 'A', 'STRONGER', 'SENSE', 'OF', 'LIFE', 'WHEN', 'THE', 'SUN', 'IS', 'BRILLIANT', 'AFTER', 'RAIN', 'AND', 'NOW', 'HE', 'IS', 'POURING', 'DOWN', 'HIS', 'BEAMS', 'AND', 'MAKING', 'SPARKLES', 'AMONG', 'THE', 'WET', 'STRAW', 'AND', 'LIGHTING', 'UP', 'EVERY', 'PATCH', 'OF', 'VIVID', 'GREEN', 'MOSS', 'ON', 'THE', 'RED', 'TILES', 'OF', 'THE', 'COW', 'SHED', 'AND', 'TURNING', 'EVEN', 'THE', 'MUDDY', 'WATER', 'THAT', 'IS', 'HURRYING', 'ALONG', 'THE', 'CHANNEL', 'TO', 'THE', 'DRAIN', 'INTO', 'A', 'MIRROR', 'FOR', 'THE', 'YELLOW', 'BILLED', 'DUCKS', 'WHO', 'ARE', 'SEIZING', 'THE', 'OPPORTUNITY', 'OF', 'GETTING', 'A', 'DRINK', 'WITH', 'AS', 'MUCH', 'BODY', 'IN', 'IT', 'AS', 'POSSIBLE'] +2094-142345-0009-520: ref=['FOR', 'THE', 'GREAT', 'BARN', 'DOORS', 'ARE', 'THROWN', 'WIDE', 'OPEN', 'AND', 'MEN', 'ARE', 'BUSY', 'THERE', 'MENDING', 'THE', 'HARNESS', 'UNDER', 'THE', 'SUPERINTENDENCE', 'OF', 'MISTER', 'GOBY', 'THE', 'WHITTAW', 'OTHERWISE', 'SADDLER', 'WHO', 'ENTERTAINS', 'THEM', 'WITH', 'THE', 'LATEST', 'TREDDLESTON', 'GOSSIP'] +2094-142345-0009-520: hyp=['FOR', 'THE', 'GREAT', 'BARN', 'DOORS', 'ARE', 'THROWN', 'WIDE', 'OPEN', 'AND', 'MEN', 'ARE', 'BUSY', 'THERE', 'MENDING', 'THE', 'HARNESS', 'UNDER', 'THE', 'SUPERINTENDENCE', 'OF', 'MISTER', 'GOBY', 'THE', 'WIDOW', 'OTHERWISE', 'SADDLER', 'WHO', 'ENTERTAINS', 'THEM', 'WITH', 'THE', 'LATEST', 'TREDDLESTON', 'GOSSIP'] +2094-142345-0010-521: ref=['HETTY', 'SORREL', 'OFTEN', 'TOOK', 'THE', 'OPPORTUNITY', 'WHEN', 'HER', "AUNT'S", 'BACK', 'WAS', 'TURNED', 'OF', 'LOOKING', 'AT', 'THE', 'PLEASING', 'REFLECTION', 'OF', 'HERSELF', 'IN', 'THOSE', 'POLISHED', 'SURFACES', 'FOR', 'THE', 'OAK', 'TABLE', 'WAS', 'USUALLY', 'TURNED', 'UP', 'LIKE', 'A', 'SCREEN', 'AND', 'WAS', 'MORE', 'FOR', 'ORNAMENT', 'THAN', 'FOR', 'USE', 'AND', 'SHE', 'COULD', 'SEE', 'HERSELF', 'SOMETIMES', 'IN', 'THE', 'GREAT', 'ROUND', 'PEWTER', 'DISHES', 'THAT', 'WERE', 'RANGED', 'ON', 'THE', 'SHELVES', 'ABOVE', 'THE', 'LONG', 'DEAL', 'DINNER', 'TABLE', 'OR', 'IN', 'THE', 'HOBS', 'OF', 'THE', 'GRATE', 'WHICH', 'ALWAYS', 'SHONE', 'LIKE', 'JASPER'] +2094-142345-0010-521: hyp=["HETTY'S", 'SYREL', 'OFTEN', 'TOOK', 'THE', 'OPPORTUNITY', 'WHEN', 'HER', "AUNT'S", 'BACK', 'WAS', 'TURNED', 'OF', 'LOOKING', 'AT', 'THE', 'PLEASING', 'REFLECTION', 'OF', 'HERSELF', 'IN', 'THOSE', 'POLISHED', 'SERVICES', 'FOR', 'THE', 'OAK', 'TABLE', 'WAS', 'USUALLY', 'TURNED', 'UP', 'LIKE', 'A', 'SCREEN', 'AND', 'WAS', 'MORE', 'FOR', 'ORNAMENT', 'THAN', 'FOR', 'USE', 'AND', 'SHE', 'COULD', 'SEE', 'HERSELF', 'SOMETIMES', 'IN', 'THE', 'GREAT', 'ROUND', 'PEWTER', 'DISHES', 'THAT', 'WERE', 'RANGED', 'ON', 'THE', 'SHELVES', 'ABOVE', 'THE', 'LONG', 'DEAL', 'DINNER', 'TABLE', 'OR', 'IN', 'THE', 'HOBS', 'OF', 'THE', 'GRATE', 'WHICH', 'ALWAYS', 'SHONE', 'LIKE', 'JASPER'] +2094-142345-0011-522: ref=['DO', 'NOT', 'SUPPOSE', 'HOWEVER', 'THAT', 'MISSUS', 'POYSER', 'WAS', 'ELDERLY', 'OR', 'SHREWISH', 'IN', 'HER', 'APPEARANCE', 'SHE', 'WAS', 'A', 'GOOD', 'LOOKING', 'WOMAN', 'NOT', 'MORE', 'THAN', 'EIGHT', 'AND', 'THIRTY', 'OF', 'FAIR', 'COMPLEXION', 'AND', 'SANDY', 'HAIR', 'WELL', 'SHAPEN', 'LIGHT', 'FOOTED'] +2094-142345-0011-522: hyp=['DO', 'NOT', 'SUPPOSE', 'HOWEVER', 'THAT', 'MISSUS', 'POYSER', 'WAS', 'ELDERLY', 'OR', 'SHREWISH', 'IN', 'HER', 'APPEARANCE', 'SHE', 'WAS', 'A', 'GOOD', 'LOOKING', 'WOMAN', 'NOT', 'MORE', 'THAN', 'EIGHT', 'AND', 'THIRTY', 'OF', 'FAIR', 'COMPLEXION', 'AND', 'SANDY', 'HAIR', 'WELL', 'SHAPEN', 'LIGHT', 'FOOTED'] +2094-142345-0012-523: ref=['THE', 'FAMILY', 'LIKENESS', 'BETWEEN', 'HER', 'AND', 'HER', 'NIECE', 'DINAH', 'MORRIS', 'WITH', 'THE', 'CONTRAST', 'BETWEEN', 'HER', 'KEENNESS', 'AND', "DINAH'S", 'SERAPHIC', 'GENTLENESS', 'OF', 'EXPRESSION', 'MIGHT', 'HAVE', 'SERVED', 'A', 'PAINTER', 'AS', 'AN', 'EXCELLENT', 'SUGGESTION', 'FOR', 'A', 'MARTHA', 'AND', 'MARY'] +2094-142345-0012-523: hyp=['THE', 'FAMILY', 'LIKENESS', 'BETWEEN', 'HER', 'AND', 'HER', 'NIECE', 'DINAH', 'MORRIS', 'WITH', 'THE', 'CONTRAST', 'BETWEEN', 'HER', 'KEENNESS', 'AND', "DINAH'S", 'SERAPHIC', 'GENTLENESS', 'OF', 'EXPRESSION', 'MIGHT', 'HAVE', 'SERVED', 'A', 'PAINTER', 'AS', 'AN', 'EXCELLENT', 'SUGGESTION', 'FOR', 'A', 'MARTHA', 'AND', 'MARY'] +2094-142345-0013-524: ref=['HER', 'TONGUE', 'WAS', 'NOT', 'LESS', 'KEEN', 'THAN', 'HER', 'EYE', 'AND', 'WHENEVER', 'A', 'DAMSEL', 'CAME', 'WITHIN', 'EARSHOT', 'SEEMED', 'TO', 'TAKE', 'UP', 'AN', 'UNFINISHED', 'LECTURE', 'AS', 'A', 'BARREL', 'ORGAN', 'TAKES', 'UP', 'A', 'TUNE', 'PRECISELY', 'AT', 'THE', 'POINT', 'WHERE', 'IT', 'HAD', 'LEFT', 'OFF'] +2094-142345-0013-524: hyp=['HER', 'TONGUE', 'WAS', 'NOT', 'LESS', 'KEEN', 'THAN', 'HER', 'EYE', 'AND', 'WHENEVER', 'A', 'DAMSEL', 'CAME', 'WITHIN', 'EARSHOT', 'SEEMED', 'TO', 'TAKE', 'UP', 'AN', 'UNFINISHED', 'LECTURE', 'AS', 'A', 'BERYL', 'ORGAN', 'TAKES', 'UP', 'A', 'TUNE', 'PRECISELY', 'AT', 'THE', 'POINT', 'WHERE', 'IT', 'HAD', 'LEFT', 'OFF'] +2094-142345-0014-525: ref=['THE', 'FACT', 'THAT', 'IT', 'WAS', 'CHURNING', 'DAY', 'WAS', 'ANOTHER', 'REASON', 'WHY', 'IT', 'WAS', 'INCONVENIENT', 'TO', 'HAVE', 'THE', 'WHITTAWS', 'AND', 'WHY', 'CONSEQUENTLY', 'MISSUS', 'POYSER', 'SHOULD', 'SCOLD', 'MOLLY', 'THE', 'HOUSEMAID', 'WITH', 'UNUSUAL', 'SEVERITY'] +2094-142345-0014-525: hyp=['THE', 'FACT', 'THAT', 'IT', 'WAS', 'CHURNING', 'DAY', 'WAS', 'ANOTHER', 'REASON', 'WHY', 'IT', 'WAS', 'INCONVENIENT', 'TO', 'HAVE', 'THE', 'WIDOWS', 'AND', 'WHY', 'CONSEQUENTLY', 'MISSUS', 'POYSER', 'SHOULD', 'SCOLD', 'MOLLY', 'THE', 'HOUSEMAID', 'WITH', 'UNUSUAL', 'SEVERITY'] +2094-142345-0015-526: ref=['TO', 'ALL', 'APPEARANCE', 'MOLLY', 'HAD', 'GOT', 'THROUGH', 'HER', 'AFTER', 'DINNER', 'WORK', 'IN', 'AN', 'EXEMPLARY', 'MANNER', 'HAD', 'CLEANED', 'HERSELF', 'WITH', 'GREAT', 'DISPATCH', 'AND', 'NOW', 'CAME', 'TO', 'ASK', 'SUBMISSIVELY', 'IF', 'SHE', 'SHOULD', 'SIT', 'DOWN', 'TO', 'HER', 'SPINNING', 'TILL', 'MILKING', 'TIME'] +2094-142345-0015-526: hyp=['TO', 'ALL', 'APPEARANCE', 'MOLLY', 'HAD', 'GOT', 'THROUGH', 'HER', 'AFTER', 'DINNER', 'WORK', 'IN', 'AN', 'EXEMPLARY', 'MANNER', 'HAD', 'CLEANED', 'HERSELF', 'WITH', 'GREAT', 'DISPATCH', 'AND', 'NOW', 'CAME', 'TO', 'ASK', 'SUBMISSIVELY', 'IF', 'SHE', 'SHOULD', 'SIT', 'DOWN', 'TO', 'HER', 'SPINNING', 'TILL', 'MILKING', 'TIME'] +2094-142345-0016-527: ref=['SPINNING', 'INDEED'] +2094-142345-0016-527: hyp=['SPINNING', 'INDEED'] +2094-142345-0017-528: ref=['I', 'NEVER', 'KNEW', 'YOUR', 'EQUALS', 'FOR', 'GALLOWSNESS'] +2094-142345-0017-528: hyp=['I', 'NEVER', 'KNEW', 'YOUR', 'EQUALS', 'FOR', 'GALLOWSNESS'] +2094-142345-0018-529: ref=['WHO', 'TAUGHT', 'YOU', 'TO', 'SCRUB', 'A', 'FLOOR', 'I', 'SHOULD', 'LIKE', 'TO', 'KNOW'] +2094-142345-0018-529: hyp=['WHO', 'TAUGHT', 'YOU', 'TO', 'SCRUB', 'A', 'FLOOR', 'I', 'SHOULD', 'LIKE', 'TO', 'KNOW'] +2094-142345-0019-530: ref=['COMB', 'THE', 'WOOL', 'FOR', 'THE', 'WHITTAWS', 'INDEED'] +2094-142345-0019-530: hyp=['COMB', 'THE', 'WOOL', 'FOR', 'THE', 'WIDOWS', 'INDEED'] +2094-142345-0020-531: ref=["THAT'S", 'WHAT', "YOU'D", 'LIKE', 'TO', 'BE', 'DOING', 'IS', 'IT'] +2094-142345-0020-531: hyp=["THAT'S", 'WHAT', "YOU'D", 'LIKE', 'TO', 'BE', 'DOING', 'IS', 'IT'] +2094-142345-0021-532: ref=["THAT'S", 'THE', 'WAY', 'WITH', 'YOU', "THAT'S", 'THE', 'ROAD', "YOU'D", 'ALL', 'LIKE', 'TO', 'GO', 'HEADLONGS', 'TO', 'RUIN'] +2094-142345-0021-532: hyp=["THAT'S", 'THE', 'WAY', 'WITH', 'YOU', "THAT'S", 'THE', 'ROAD', "YOU'D", 'ALL', 'LIKE', 'TO', 'GO', 'HEADLONGS', 'TO', 'RUIN'] +2094-142345-0022-533: ref=['MISTER', "OTTLEY'S", 'INDEED'] +2094-142345-0022-533: hyp=['MISTER', "OAKLEY'S", 'INDEED'] +2094-142345-0023-534: ref=["YOU'RE", 'A', 'RARE', 'UN', 'FOR', 'SITTING', 'DOWN', 'TO', 'YOUR', 'WORK', 'A', 'LITTLE', 'WHILE', 'AFTER', "IT'S", 'TIME', 'TO', 'PUT', 'BY'] +2094-142345-0023-534: hyp=['YOU', 'ARE', 'A', 'RARE', 'ONE', 'FOR', 'SITTING', 'DOWN', 'TO', 'YOUR', 'WORK', 'A', 'LITTLE', 'WHILE', 'AFTER', "IT'S", 'TIME', 'TO', 'PUT', 'BY'] +2094-142345-0024-535: ref=['MUNNY', 'MY', "IRON'S", 'TWITE', 'TOLD', 'PEASE', 'PUT', 'IT', 'DOWN', 'TO', 'WARM'] +2094-142345-0024-535: hyp=['MONEY', 'MY', "IRON'S", 'TWICE', 'TOLD', 'PLEASE', 'PUT', 'IT', 'DOWN', 'TO', 'WARM'] +2094-142345-0025-536: ref=['COLD', 'IS', 'IT', 'MY', 'DARLING', 'BLESS', 'YOUR', 'SWEET', 'FACE'] +2094-142345-0025-536: hyp=['COLD', 'IS', 'IT', 'MY', 'DARLING', 'BLESS', 'YOUR', 'SWEET', 'FACE'] +2094-142345-0026-537: ref=["SHE'S", 'GOING', 'TO', 'PUT', 'THE', 'IRONING', 'THINGS', 'AWAY'] +2094-142345-0026-537: hyp=["SHE'S", 'GOING', 'TO', 'PUT', 'THE', 'IRONING', 'THINGS', 'AWAY'] +2094-142345-0027-538: ref=['MUNNY', 'I', 'TOULD', 'IKE', 'TO', 'DO', 'INTO', 'DE', 'BARN', 'TO', 'TOMMY', 'TO', 'SEE', 'DE', 'WHITTAWD'] +2094-142345-0027-538: hyp=['MONEY', 'I', 'DID', 'LIKE', 'TO', 'DO', 'INTO', 'THE', 'BARN', 'TO', 'TOMMY', 'TO', 'SEE', 'THE', 'WIDOWED'] +2094-142345-0028-539: ref=['NO', 'NO', 'NO', 'TOTTY', 'UD', 'GET', 'HER', 'FEET', 'WET', 'SAID', 'MISSUS', 'POYSER', 'CARRYING', 'AWAY', 'HER', 'IRON'] +2094-142345-0028-539: hyp=['NO', 'NO', 'NO', 'TOTTY', 'HAD', 'GET', 'HER', 'FEET', 'WET', 'SAID', 'MISSUS', 'POYSER', 'CARRYING', 'AWAY', 'HER', 'IRON'] +2094-142345-0029-540: ref=['DID', 'EVER', 'ANYBODY', 'SEE', 'THE', 'LIKE', 'SCREAMED', 'MISSUS', 'POYSER', 'RUNNING', 'TOWARDS', 'THE', 'TABLE', 'WHEN', 'HER', 'EYE', 'HAD', 'FALLEN', 'ON', 'THE', 'BLUE', 'STREAM'] +2094-142345-0029-540: hyp=['DID', 'EVER', 'ANYBODY', 'SEE', 'THE', 'LIKE', 'SCREAMED', 'MISSUS', 'POYSER', 'RUNNING', 'TOWARDS', 'THE', 'TABLE', 'WHEN', 'HER', 'EYE', 'HAD', 'FALLEN', 'ON', 'THE', 'BLUE', 'STREAM'] +2094-142345-0030-541: ref=['TOTTY', 'HOWEVER', 'HAD', 'DESCENDED', 'FROM', 'HER', 'CHAIR', 'WITH', 'GREAT', 'SWIFTNESS', 'AND', 'WAS', 'ALREADY', 'IN', 'RETREAT', 'TOWARDS', 'THE', 'DAIRY', 'WITH', 'A', 'SORT', 'OF', 'WADDLING', 'RUN', 'AND', 'AN', 'AMOUNT', 'OF', 'FAT', 'ON', 'THE', 'NAPE', 'OF', 'HER', 'NECK', 'WHICH', 'MADE', 'HER', 'LOOK', 'LIKE', 'THE', 'METAMORPHOSIS', 'OF', 'A', 'WHITE', 'SUCKLING', 'PIG'] +2094-142345-0030-541: hyp=['TOTTY', 'HOWEVER', 'HAD', 'DESCENDED', 'FROM', 'HER', 'CHAIR', 'WITH', 'GREAT', 'SWIFTNESS', 'AND', 'WAS', 'ALREADY', 'IN', 'RETREAT', 'TOWARDS', 'THE', 'DAIRY', 'WITH', 'A', 'SORT', 'OF', 'WADDLING', 'RUN', 'AND', 'AN', 'AMOUNT', 'OF', 'FAT', 'ON', 'THE', 'NAPE', 'OF', 'HER', 'NECK', 'WHICH', 'MADE', 'HER', 'LOOK', 'LIKE', 'THE', 'METAMORPHOSIS', 'OF', 'A', 'WHITE', 'SUCKING', 'PIG'] +2094-142345-0031-542: ref=['AND', 'SHE', 'WAS', 'VERY', 'FOND', 'OF', 'YOU', 'TOO', 'AUNT', 'RACHEL'] +2094-142345-0031-542: hyp=['AND', 'SHE', 'WAS', 'VERY', 'FOND', 'OF', 'YOU', 'TOO', 'AUNT', 'RACHEL'] +2094-142345-0032-543: ref=['I', 'OFTEN', 'HEARD', 'HER', 'TALK', 'OF', 'YOU', 'IN', 'THE', 'SAME', 'SORT', 'OF', 'WAY'] +2094-142345-0032-543: hyp=['I', 'OFTEN', 'HEARD', 'HER', 'TALK', 'OF', 'YOU', 'IN', 'THE', 'SAME', 'SORT', 'OF', 'WAY'] +2094-142345-0033-544: ref=['WHEN', 'SHE', 'HAD', 'THAT', 'BAD', 'ILLNESS', 'AND', 'I', 'WAS', 'ONLY', 'ELEVEN', 'YEARS', 'OLD', 'SHE', 'USED', 'TO', 'SAY', "YOU'LL", 'HAVE', 'A', 'FRIEND', 'ON', 'EARTH', 'IN', 'YOUR', 'AUNT', 'RACHEL', 'IF', "I'M", 'TAKEN', 'FROM', 'YOU', 'FOR', 'SHE', 'HAS', 'A', 'KIND', 'HEART', 'AND', "I'M", 'SURE', "I'VE", 'FOUND', 'IT', 'SO'] +2094-142345-0033-544: hyp=['WHEN', 'SHE', 'HAD', 'THAT', 'BAD', 'ILLNESS', 'AND', 'I', 'WAS', 'ONLY', 'ELEVEN', 'YEARS', 'OLD', 'SHE', 'USED', 'TO', 'SAY', "YOU'LL", 'HAVE', 'A', 'FRIEND', 'ON', 'EARTH', 'AND', 'YOUR', 'AUNT', 'RACHEL', 'IF', "I'M", 'TAKEN', 'FROM', 'YOU', 'FOR', 'SHE', 'HAS', 'A', 'KIND', 'HEART', 'AND', "I'M", 'SURE', "I'VE", 'FOUND', 'IT', 'SO'] +2094-142345-0034-545: ref=['AND', "THERE'S", 'LINEN', 'IN', 'THE', 'HOUSE', 'AS', 'I', 'COULD', 'WELL', 'SPARE', 'YOU', 'FOR', "I'VE", 'GOT', 'LOTS', 'O', 'SHEETING', 'AND', 'TABLE', 'CLOTHING', 'AND', 'TOWELLING', 'AS', "ISN'T", 'MADE', 'UP'] +2094-142345-0034-545: hyp=['AND', "THERE'S", 'LINEN', 'IN', 'THE', 'HOUSE', 'AS', 'I', 'COULD', 'WELL', 'SPARE', 'YOU', 'FOR', 'I', 'GOT', 'LOTS', 'OF', 'SHEETING', 'AND', 'TABLE', 'CLOTHING', 'AND', 'TOWELING', 'AS', "ISN'T", 'MADE', 'UP'] +2094-142345-0035-546: ref=['BUT', 'NOT', 'MORE', 'THAN', "WHAT'S", 'IN', 'THE', 'BIBLE', 'AUNT', 'SAID', 'DINAH'] +2094-142345-0035-546: hyp=['BUT', 'NOT', 'MORE', 'THAN', "WHAT'S", 'IN', 'THE', 'BIBLE', 'AUNT', 'SAID', 'DINAH'] +2094-142345-0036-547: ref=['NAY', 'DEAR', 'AUNT', 'YOU', 'NEVER', 'HEARD', 'ME', 'SAY', 'THAT', 'ALL', 'PEOPLE', 'ARE', 'CALLED', 'TO', 'FORSAKE', 'THEIR', 'WORK', 'AND', 'THEIR', 'FAMILIES'] +2094-142345-0036-547: hyp=['NAY', 'DEAR', 'AUNT', 'YOU', 'NEVER', 'HEARD', 'ME', 'SAY', 'THAT', 'ALL', 'PEOPLE', 'ARE', 'CALLED', 'TO', 'FORSAKE', 'THEIR', 'WORK', 'AND', 'THEIR', 'FAMILIES'] +2094-142345-0037-548: ref=['WE', 'CAN', 'ALL', 'BE', 'SERVANTS', 'OF', 'GOD', 'WHEREVER', 'OUR', 'LOT', 'IS', 'CAST', 'BUT', 'HE', 'GIVES', 'US', 'DIFFERENT', 'SORTS', 'OF', 'WORK', 'ACCORDING', 'AS', 'HE', 'FITS', 'US', 'FOR', 'IT', 'AND', 'CALLS', 'US', 'TO', 'IT'] +2094-142345-0037-548: hyp=['WE', 'CAN', 'ALL', 'BE', 'SERVANTS', 'OF', 'GOD', 'WHEREVER', 'OUR', 'LOT', 'IS', 'CAST', 'BUT', 'HE', 'GIVES', 'US', 'DIFFERENT', 'SORTS', 'OF', 'WORK', 'ACCORDING', 'AS', 'HE', 'FITS', 'US', 'FOR', 'IT', 'AND', 'CALLS', 'US', 'TO', 'IT'] +2094-142345-0038-549: ref=['I', 'CAN', 'NO', 'MORE', 'HELP', 'SPENDING', 'MY', 'LIFE', 'IN', 'TRYING', 'TO', 'DO', 'WHAT', 'I', 'CAN', 'FOR', 'THE', 'SOULS', 'OF', 'OTHERS', 'THAN', 'YOU', 'COULD', 'HELP', 'RUNNING', 'IF', 'YOU', 'HEARD', 'LITTLE', 'TOTTY', 'CRYING', 'AT', 'THE', 'OTHER', 'END', 'OF', 'THE', 'HOUSE', 'THE', 'VOICE', 'WOULD', 'GO', 'TO', 'YOUR', 'HEART', 'YOU', 'WOULD', 'THINK', 'THE', 'DEAR', 'CHILD', 'WAS', 'IN', 'TROUBLE', 'OR', 'IN', 'DANGER', 'AND', 'YOU', "COULDN'T", 'REST', 'WITHOUT', 'RUNNING', 'TO', 'HELP', 'HER', 'AND', 'COMFORT', 'HER'] +2094-142345-0038-549: hyp=['I', 'CAN', 'NO', 'MORE', 'HELP', 'SPENDING', 'MY', 'LIFE', 'IN', 'TRYING', 'TO', 'DO', 'WHAT', 'I', 'CAN', 'FOR', 'THE', 'SOULS', 'OF', 'OTHERS', 'THAN', 'YOU', 'COULD', 'HELP', 'RUNNING', 'IF', 'YOU', 'HEARD', 'LITTLE', 'TOTTY', 'CRYING', 'AT', 'THE', 'OTHER', 'END', 'OF', 'THE', 'HOUSE', 'THE', 'VOICE', 'WOULD', 'GO', 'TO', 'YOUR', 'HEART', 'YOU', 'WOULD', 'THINK', 'THE', 'DEAR', 'CHILD', 'WAS', 'IN', 'TROUBLE', 'OR', 'IN', 'DANGER', 'AND', 'YOU', "COULDN'T", 'REST', 'WITHOUT', 'RUNNING', 'TO', 'HELP', 'HER', 'AND', 'COMFORT', 'HER'] +2094-142345-0039-550: ref=["I'VE", 'STRONG', 'ASSURANCE', 'THAT', 'NO', 'EVIL', 'WILL', 'HAPPEN', 'TO', 'YOU', 'AND', 'MY', 'UNCLE', 'AND', 'THE', 'CHILDREN', 'FROM', 'ANYTHING', "I'VE", 'DONE'] +2094-142345-0039-550: hyp=["I'VE", 'STRONG', 'ASSURANCE', 'THAT', 'NO', 'EVIL', 'WILL', 'HAPPEN', 'TO', 'YOU', 'AND', 'MY', 'UNCLE', 'AND', 'THE', 'CHILDREN', 'FROM', 'ANYTHING', "I'VE", 'DONE'] +2094-142345-0040-551: ref=['I', "DIDN'T", 'PREACH', 'WITHOUT', 'DIRECTION'] +2094-142345-0040-551: hyp=['I', "DIDN'T", 'PREACH', 'WITHOUT', 'DIRECTION'] +2094-142345-0041-552: ref=['DIRECTION'] +2094-142345-0041-552: hyp=['DIRECTION'] +2094-142345-0042-553: ref=['I', 'HANNA', 'COMMON', 'PATIENCE', 'WITH', 'YOU'] +2094-142345-0042-553: hyp=['I', 'HAD', 'A', 'COMMON', 'PATIENCE', 'WITH', 'YOU'] +2094-142345-0043-554: ref=['BY', 'THIS', 'TIME', 'THE', 'TWO', 'GENTLEMEN', 'HAD', 'REACHED', 'THE', 'PALINGS', 'AND', 'HAD', 'GOT', 'DOWN', 'FROM', 'THEIR', 'HORSES', 'IT', 'WAS', 'PLAIN', 'THEY', 'MEANT', 'TO', 'COME', 'IN'] +2094-142345-0043-554: hyp=['BY', 'THIS', 'TIME', 'THE', 'TWO', 'GENTLEMEN', 'HAD', 'REACHED', 'THE', 'PALINGS', 'AND', 'HAD', 'GOT', 'DOWN', 'FROM', 'THEIR', 'HORSES', 'IT', 'WAS', 'PLAIN', 'THEY', 'MEANT', 'TO', 'COME', 'IN'] +2094-142345-0044-555: ref=['SAID', 'MISTER', 'IRWINE', 'WITH', 'HIS', 'STATELY', 'CORDIALITY'] +2094-142345-0044-555: hyp=['SAID', 'MISTER', 'IRWINE', 'WITH', 'HIS', 'STATELY', 'CORDIALITY'] +2094-142345-0045-556: ref=['OH', 'SIR', "DON'T", 'MENTION', 'IT', 'SAID', 'MISSUS', 'POYSER'] +2094-142345-0045-556: hyp=['OH', 'SIR', "DON'T", 'MENTION', 'IT', 'SAID', 'MISSUS', 'POYSER'] +2094-142345-0046-557: ref=['I', 'DELIGHT', 'IN', 'YOUR', 'KITCHEN'] +2094-142345-0046-557: hyp=['I', 'DELIGHT', 'IN', 'YOUR', 'KITCHEN'] +2094-142345-0047-558: ref=['POYSER', 'IS', 'NOT', 'AT', 'HOME', 'IS', 'HE'] +2094-142345-0047-558: hyp=['POYSER', 'IS', 'NOT', 'AT', 'HOME', 'IS', 'HE'] +2094-142345-0048-559: ref=['SAID', 'CAPTAIN', 'DONNITHORNE', 'SEATING', 'HIMSELF', 'WHERE', 'HE', 'COULD', 'SEE', 'ALONG', 'THE', 'SHORT', 'PASSAGE', 'TO', 'THE', 'OPEN', 'DAIRY', 'DOOR'] +2094-142345-0048-559: hyp=['SAID', 'CAPTAIN', 'DONNITHORNE', 'SEATING', 'HIMSELF', 'WHERE', 'HE', 'COULD', 'SEE', 'ALONG', 'THE', 'SHORT', 'PASSAGE', 'TO', 'THE', 'OPEN', 'DAIRY', 'DOOR'] +2094-142345-0049-560: ref=['NO', 'SIR', 'HE', "ISN'T", "HE'S", 'GONE', 'TO', 'ROSSETER', 'TO', 'SEE', 'MISTER', 'WEST', 'THE', 'FACTOR', 'ABOUT', 'THE', 'WOOL'] +2094-142345-0049-560: hyp=['NO', 'SIR', 'HE', "ISN'T", "HE'S", 'GONE', 'TO', 'ROSSITER', 'TO', 'SEE', 'MISTER', 'WEST', 'THE', 'FACTOR', 'ABOUT', 'THE', 'WOOL'] +2094-142345-0050-561: ref=['BUT', "THERE'S", 'FATHER', 'THE', 'BARN', 'SIR', 'IF', "HE'D", 'BE', 'OF', 'ANY', 'USE'] +2094-142345-0050-561: hyp=['BUT', "THERE'S", 'FATHER', 'IN', 'THE', 'BARN', 'SIR', 'IF', "HE'D", 'BE', 'OF', 'ANY', 'USE'] +2094-142345-0051-562: ref=['NO', 'THANK', 'YOU', "I'LL", 'JUST', 'LOOK', 'AT', 'THE', 'WHELPS', 'AND', 'LEAVE', 'A', 'MESSAGE', 'ABOUT', 'THEM', 'WITH', 'YOUR', 'SHEPHERD'] +2094-142345-0051-562: hyp=['NO', 'THANK', 'YOU', "I'LL", 'JUST', 'LOOK', 'AT', 'THE', 'WHELPS', 'AND', 'LEAVE', 'A', 'MESSAGE', 'ABOUT', 'THEM', 'WITH', 'YOUR', 'SHEPHERD'] +2094-142345-0052-563: ref=['I', 'MUST', 'COME', 'ANOTHER', 'DAY', 'AND', 'SEE', 'YOUR', 'HUSBAND', 'I', 'WANT', 'TO', 'HAVE', 'A', 'CONSULTATION', 'WITH', 'HIM', 'ABOUT', 'HORSES'] +2094-142345-0052-563: hyp=['I', 'MUST', 'COME', 'ANOTHER', 'DAY', 'AND', 'SEE', 'YOUR', 'HUSBAND', 'I', 'WANT', 'TO', 'HAVE', 'A', 'CONSULTATION', 'WITH', 'HIM', 'ABOUT', 'HORSES'] +2094-142345-0053-564: ref=['FOR', 'IF', "HE'S", 'ANYWHERE', 'ON', 'THE', 'FARM', 'WE', 'CAN', 'SEND', 'FOR', 'HIM', 'IN', 'A', 'MINUTE'] +2094-142345-0053-564: hyp=['FOR', 'IF', "HE'S", 'ANYWHERE', 'ON', 'THE', 'FARM', 'WE', 'CAN', 'SEND', 'FOR', 'HIM', 'IN', 'A', 'MINUTE'] +2094-142345-0054-565: ref=['OH', 'SIR', 'SAID', 'MISSUS', 'POYSER', 'RATHER', 'ALARMED', 'YOU', "WOULDN'T", 'LIKE', 'IT', 'AT', 'ALL'] +2094-142345-0054-565: hyp=['OH', 'SIR', 'SAID', 'MISSUS', 'POYSER', 'RATHER', 'ALARMED', 'YOU', "WOULDN'T", 'LIKE', 'IT', 'AT', 'ALL'] +2094-142345-0055-566: ref=['BUT', 'YOU', 'KNOW', 'MORE', 'ABOUT', 'THAT', 'THAN', 'I', 'DO', 'SIR'] +2094-142345-0055-566: hyp=['BUT', 'YOU', 'KNOW', 'MORE', 'ABOUT', 'THAT', 'THAN', 'I', 'DO', 'SIR'] +2094-142345-0056-567: ref=['I', 'THINK', 'I', 'SHOULD', 'BE', 'DOING', 'YOU', 'A', 'SERVICE', 'TO', 'TURN', 'YOU', 'OUT', 'OF', 'SUCH', 'A', 'PLACE'] +2094-142345-0056-567: hyp=['I', 'THINK', 'I', 'SHOULD', 'BE', 'DOING', 'YOU', 'A', 'SERVICE', 'TO', 'TURN', 'YOU', 'OUT', 'OF', 'SUCH', 'A', 'PLACE'] +2094-142345-0057-568: ref=['I', 'KNOW', 'HIS', 'FARM', 'IS', 'IN', 'BETTER', 'ORDER', 'THAN', 'ANY', 'OTHER', 'WITHIN', 'TEN', 'MILES', 'OF', 'US', 'AND', 'AS', 'FOR', 'THE', 'KITCHEN', 'HE', 'ADDED', 'SMILING', 'I', "DON'T", 'BELIEVE', "THERE'S", 'ONE', 'IN', 'THE', 'KINGDOM', 'TO', 'BEAT', 'IT'] +2094-142345-0057-568: hyp=['I', "KNOW'S", 'FARM', 'IS', 'IN', 'BETTER', 'ORDER', 'THAN', 'ANY', 'OTHER', 'WITHIN', 'TEN', 'MILES', 'OF', 'US', 'AND', 'AS', 'FOR', 'THE', 'KITCHEN', 'HE', 'ADDED', 'SMILING', 'I', "DON'T", 'BELIEVE', "THERE'S", 'ONE', 'IN', 'THE', 'KINGDOM', 'TO', 'BEAT', 'IT'] +2094-142345-0058-569: ref=['BY', 'THE', 'BY', "I'VE", 'NEVER', 'SEEN', 'YOUR', 'DAIRY', 'I', 'MUST', 'SEE', 'YOUR', 'DAIRY', 'MISSUS', 'POYSER'] +2094-142345-0058-569: hyp=['BY', 'THE', 'BYE', 'I', 'HAVE', 'NEVER', 'SEEN', 'YOUR', 'DAIRY', 'I', 'MUST', 'SEE', 'YOUR', 'DAIRY', 'MISSUS', 'POYSER'] +2094-142345-0059-570: ref=['THIS', 'MISSUS', 'POYSER', 'SAID', 'BLUSHING', 'AND', 'BELIEVING', 'THAT', 'THE', 'CAPTAIN', 'WAS', 'REALLY', 'INTERESTED', 'IN', 'HER', 'MILK', 'PANS', 'AND', 'WOULD', 'ADJUST', 'HIS', 'OPINION', 'OF', 'HER', 'TO', 'THE', 'APPEARANCE', 'OF', 'HER', 'DAIRY'] +2094-142345-0059-570: hyp=['THIS', 'MISSUS', 'POYSER', 'SAID', 'BLUSHING', 'AND', 'BELIEVING', 'THAT', 'THE', 'CAPTAIN', 'WAS', 'REALLY', 'INTERESTED', 'IN', 'HER', 'MILK', 'PANS', 'AND', 'WOULD', 'ADJUST', 'HIS', 'OPINION', 'OF', 'HER', 'TO', 'THE', 'APPEARANCE', 'OF', 'HER', 'DAIRY'] +2094-142345-0060-571: ref=['OH', "I'VE", 'NO', 'DOUBT', "IT'S", 'IN', 'CAPITAL', 'ORDER'] +2094-142345-0060-571: hyp=['OH', "I'VE", 'NO', 'DOUBT', "IT'S", 'IN', 'CAPITAL', 'ORDER'] +2300-131720-0000-572: ref=['THE', 'PARIS', 'PLANT', 'LIKE', 'THAT', 'AT', 'THE', 'CRYSTAL', 'PALACE', 'WAS', 'A', 'TEMPORARY', 'EXHIBIT'] +2300-131720-0000-572: hyp=['THE', 'PARIS', 'PLANT', 'LIKE', 'THAT', 'AT', 'THE', 'CRYSTAL', 'PALACE', 'WAS', 'A', 'TEMPORARY', 'EXHIBIT'] +2300-131720-0001-573: ref=['THE', 'LONDON', 'PLANT', 'WAS', 'LESS', 'TEMPORARY', 'BUT', 'NOT', 'PERMANENT', 'SUPPLYING', 'BEFORE', 'IT', 'WAS', 'TORN', 'OUT', 'NO', 'FEWER', 'THAN', 'THREE', 'THOUSAND', 'LAMPS', 'IN', 'HOTELS', 'CHURCHES', 'STORES', 'AND', 'DWELLINGS', 'IN', 'THE', 'VICINITY', 'OF', 'HOLBORN', 'VIADUCT'] +2300-131720-0001-573: hyp=['THE', 'LONDON', 'PLANT', 'WAS', 'LESS', 'TEMPORARY', 'BUT', 'NOT', 'PERMANENT', 'SUPPLYING', 'BEFORE', 'IT', 'WAS', 'TORN', 'OUT', 'NO', 'FEWER', 'THAN', 'THREE', 'THOUSAND', 'LAMPS', 'IN', 'HOTELS', 'CHURCHES', 'STORES', 'AND', 'DWELLINGS', 'IN', 'THE', 'VICINITY', 'OF', 'HOLBORN', 'VIADUC'] +2300-131720-0002-574: ref=['THERE', 'MESSRS', 'JOHNSON', 'AND', 'HAMMER', 'PUT', 'INTO', 'PRACTICE', 'MANY', 'OF', 'THE', 'IDEAS', 'NOW', 'STANDARD', 'IN', 'THE', 'ART', 'AND', 'SECURED', 'MUCH', 'USEFUL', 'DATA', 'FOR', 'THE', 'WORK', 'IN', 'NEW', 'YORK', 'OF', 'WHICH', 'THE', 'STORY', 'HAS', 'JUST', 'BEEN', 'TOLD'] +2300-131720-0002-574: hyp=['THERE', 'MESSRS', 'JOHNSON', 'AND', 'HAMMER', 'PUT', 'INTO', 'PRACTICE', 'MANY', 'OF', 'THE', 'IDEAS', 'NOW', 'STANDARD', 'IN', 'THE', 'ART', 'AND', 'SECURED', 'MUCH', 'USEFUL', 'DATA', 'FOR', 'THE', 'WORK', 'IN', 'NEW', 'YORK', 'OF', 'WHICH', 'THE', 'STORY', 'HAS', 'JUST', 'BEEN', 'TOLD'] +2300-131720-0003-575: ref=['THE', 'DYNAMO', 'ELECTRIC', 'MACHINE', 'THOUGH', 'SMALL', 'WAS', 'ROBUST', 'FOR', 'UNDER', 'ALL', 'THE', 'VARYING', 'SPEEDS', 'OF', 'WATER', 'POWER', 'AND', 'THE', 'VICISSITUDES', 'OF', 'THE', 'PLANT', 'TO', 'WHICH', 'IT', 'BELONGED', 'IT', 'CONTINUED', 'IN', 'ACTIVE', 'USE', 'UNTIL', 'EIGHTEEN', 'NINETY', 'NINE', 'SEVENTEEN', 'YEARS'] +2300-131720-0003-575: hyp=['THE', 'DYNAMO', 'ELECTRIC', 'MACHINE', 'THOUGH', 'SMALL', 'WAS', 'ROBUST', 'FOR', 'UNDER', 'ALL', 'THE', 'VARYING', 'SPEEDS', 'OF', 'WATER', 'POWER', 'AND', 'THE', 'VICISSITUDES', 'OF', 'THE', 'PLANT', 'TO', 'WHICH', 'IT', 'BELONGED', 'IT', 'CONTINUED', 'IN', 'ACTIVE', 'USE', 'UNTIL', 'EIGHTEEN', 'NINETY', 'NINE', 'SEVENTEEN', 'YEARS'] +2300-131720-0004-576: ref=['OWING', 'TO', 'HIS', 'INSISTENCE', 'ON', 'LOW', 'PRESSURE', 'DIRECT', 'CURRENT', 'FOR', 'USE', 'IN', 'DENSELY', 'POPULATED', 'DISTRICTS', 'AS', 'THE', 'ONLY', 'SAFE', 'AND', 'TRULY', 'UNIVERSAL', 'PROFITABLE', 'WAY', 'OF', 'DELIVERING', 'ELECTRICAL', 'ENERGY', 'TO', 'THE', 'CONSUMERS', 'EDISON', 'HAS', 'BEEN', 'FREQUENTLY', 'SPOKEN', 'OF', 'AS', 'AN', 'OPPONENT', 'OF', 'THE', 'ALTERNATING', 'CURRENT'] +2300-131720-0004-576: hyp=['OWING', 'TO', 'HIS', 'INSISTENCE', 'ON', 'LOW', 'PRESSURE', 'DIRECT', 'CURRENT', 'FOR', 'USE', 'IN', 'DENSELY', 'POPULATED', 'DISTRICTS', 'AS', 'THE', 'ONLY', 'SAFE', 'AND', 'TRULY', 'UNIVERSAL', 'PROFITABLE', 'WAY', 'OF', 'DELIVERING', 'ELECTRICAL', 'ENERGY', 'TO', 'THE', 'CONSUMERS', 'EDISON', 'HAS', 'BEEN', 'FREQUENTLY', 'SPOKEN', 'OF', 'AS', 'AN', 'OPPONENT', 'OF', 'THE', 'ALTERNATING', 'CURRENT'] +2300-131720-0005-577: ref=['WHY', 'IF', 'WE', 'ERECT', 'A', 'STATION', 'AT', 'THE', 'FALLS', 'IT', 'IS', 'A', 'GREAT', 'ECONOMY', 'TO', 'GET', 'IT', 'UP', 'TO', 'THE', 'CITY'] +2300-131720-0005-577: hyp=['WHY', 'IF', 'WE', 'ERECT', 'A', 'STATION', 'AT', 'THE', 'FALLS', 'IT', 'IS', 'A', 'GREAT', 'ECONOMY', 'TO', 'GET', 'IT', 'UP', 'TO', 'THE', 'CITY'] +2300-131720-0006-578: ref=['THERE', 'SEEMS', 'NO', 'GOOD', 'REASON', 'FOR', 'BELIEVING', 'THAT', 'IT', 'WILL', 'CHANGE'] +2300-131720-0006-578: hyp=['THERE', 'SEEMS', 'NO', 'GOOD', 'REASON', 'FOR', 'BELIEVING', 'THAT', 'IT', 'WILL', 'CHANGE'] +2300-131720-0007-579: ref=['BROAD', 'AS', 'THE', 'PRAIRIES', 'AND', 'FREE', 'IN', 'THOUGHT', 'AS', 'THE', 'WINDS', 'THAT', 'SWEEP', 'THEM', 'HE', 'IS', 'IDIOSYNCRATICALLY', 'OPPOSED', 'TO', 'LOOSE', 'AND', 'WASTEFUL', 'METHODS', 'TO', 'PLANS', 'OF', 'EMPIRE', 'THAT', 'NEGLECT', 'THE', 'POOR', 'AT', 'THE', 'GATE'] +2300-131720-0007-579: hyp=['BROAD', 'AS', 'THE', 'PRAIRIES', 'AND', 'FREE', 'IN', 'THOUGHT', 'AS', 'THE', 'WINDS', 'THAT', 'SWEPT', 'THEM', 'HE', 'IS', 'IDIOSYNCRATICALLY', 'OPPOSED', 'TO', 'LOOSE', 'AND', 'WASTEFUL', 'METHODS', 'TO', 'PLANS', 'OF', 'EMPIRE', 'THAT', 'NEGLECT', 'THE', 'POOR', 'AT', 'THE', 'GATE'] +2300-131720-0008-580: ref=['EVERYTHING', 'HE', 'HAS', 'DONE', 'HAS', 'BEEN', 'AIMED', 'AT', 'THE', 'CONSERVATION', 'OF', 'ENERGY', 'THE', 'CONTRACTION', 'OF', 'SPACE', 'THE', 'INTENSIFICATION', 'OF', 'CULTURE'] +2300-131720-0008-580: hyp=['EVERYTHING', 'HE', 'HAS', 'DONE', 'HAS', 'BEEN', 'AIMED', 'AT', 'THE', 'CONSERVATION', 'OF', 'ENERGY', 'THE', 'CONTRACTION', 'OF', 'SPACE', 'THE', 'INTENSIFICATION', 'OF', 'CULTURE'] +2300-131720-0009-581: ref=['FOR', 'SOME', 'YEARS', 'IT', 'WAS', 'NOT', 'FOUND', 'FEASIBLE', 'TO', 'OPERATE', 'MOTORS', 'ON', 'ALTERNATING', 'CURRENT', 'CIRCUITS', 'AND', 'THAT', 'REASON', 'WAS', 'OFTEN', 'URGED', 'AGAINST', 'IT', 'SERIOUSLY'] +2300-131720-0009-581: hyp=['FOR', 'SOME', 'YEARS', 'IT', 'WAS', 'NOT', 'FOUND', 'FEASIBLE', 'TO', 'OPERATE', 'MOTORS', 'ON', 'ALTERNATING', 'CURRENT', 'CIRCUITS', 'AND', 'THAT', 'REASON', 'WAS', 'OFTEN', 'URGED', 'AGAINST', 'IT', 'SERIOUSLY'] +2300-131720-0010-582: ref=['IT', 'COULD', 'NOT', 'BE', 'USED', 'FOR', 'ELECTROPLATING', 'OR', 'DEPOSITION', 'NOR', 'COULD', 'IT', 'CHARGE', 'STORAGE', 'BATTERIES', 'ALL', 'OF', 'WHICH', 'ARE', 'EASILY', 'WITHIN', 'THE', 'ABILITY', 'OF', 'THE', 'DIRECT', 'CURRENT'] +2300-131720-0010-582: hyp=['IT', 'COULD', 'NOT', 'BE', 'USED', 'FOR', 'ELECTROPLATING', 'OR', 'DEPOSITION', 'NOR', 'COULD', 'IT', 'CHARGE', 'STORAGE', 'BATTERIES', 'ALL', 'OF', 'WHICH', 'ARE', 'EASILY', 'WITHIN', 'THE', 'ABILITY', 'OF', 'THE', 'DIRECT', 'CURRENT'] +2300-131720-0011-583: ref=['BUT', 'WHEN', 'IT', 'CAME', 'TO', 'BE', 'A', 'QUESTION', 'OF', 'LIGHTING', 'A', 'SCATTERED', 'SUBURB', 'A', 'GROUP', 'OF', 'DWELLINGS', 'ON', 'THE', 'OUTSKIRTS', 'A', 'REMOTE', 'COUNTRY', 'RESIDENCE', 'OR', 'A', 'FARM', 'HOUSE', 'THE', 'ALTERNATING', 'CURRENT', 'IN', 'ALL', 'ELEMENTS', 'SAVE', 'ITS', 'DANGER', 'WAS', 'AND', 'IS', 'IDEAL'] +2300-131720-0011-583: hyp=['BUT', 'WHEN', 'IT', 'CAME', 'TO', 'BE', 'A', 'QUESTION', 'OF', 'LIGHTING', 'A', 'SCATTERED', 'SUBURB', 'A', 'GROUP', 'OF', 'DWELLINGS', 'ON', 'THE', 'OUTSKIRTS', 'A', 'REMOTE', 'COUNTRY', 'RESIDENCE', 'OR', 'A', 'FARM', 'HOUSE', 'THE', 'ALTERNATING', 'CURRENT', 'IN', 'ALL', 'ELEMENTS', 'SAVE', 'ITS', 'DANGER', 'WAS', 'AND', 'IS', 'IDEAL'] +2300-131720-0012-584: ref=['EDISON', 'WAS', 'INTOLERANT', 'OF', 'SHAM', 'AND', 'SHODDY', 'AND', 'NOTHING', 'WOULD', 'SATISFY', 'HIM', 'THAT', 'COULD', 'NOT', 'STAND', 'CROSS', 'EXAMINATION', 'BY', 'MICROSCOPE', 'TEST', 'TUBE', 'AND', 'GALVANOMETER'] +2300-131720-0012-584: hyp=['EDISON', 'WAS', 'INTOLERANT', 'OF', 'SHAM', 'AND', 'SHODDY', 'AND', 'NOTHING', 'WOULD', 'SATISFY', 'HIM', 'THAT', 'COULD', 'NOT', 'STAND', 'CROSS', 'EXAMINATION', 'BY', 'MICROSCOPE', 'TEST', 'TUBE', 'AND', 'GALVANOMETER'] +2300-131720-0013-585: ref=['UNLESS', 'HE', 'COULD', 'SECURE', 'AN', 'ENGINE', 'OF', 'SMOOTHER', 'RUNNING', 'AND', 'MORE', 'EXACTLY', 'GOVERNED', 'AND', 'REGULATED', 'THAN', 'THOSE', 'AVAILABLE', 'FOR', 'HIS', 'DYNAMO', 'AND', 'LAMP', 'EDISON', 'REALIZED', 'THAT', 'HE', 'WOULD', 'FIND', 'IT', 'ALMOST', 'IMPOSSIBLE', 'TO', 'GIVE', 'A', 'STEADY', 'LIGHT'] +2300-131720-0013-585: hyp=['UNLESS', 'HE', 'COULD', 'SECURE', 'AN', 'ENGINE', 'OF', 'SMOOTHER', 'RUNNING', 'AND', 'MORE', 'EXACTLY', 'GOVERN', 'AND', 'REGULATED', 'THAN', 'THOSE', 'AVAILABLE', 'FOR', 'HIS', 'DYNAMO', 'AND', 'LAMP', 'EDISON', 'REALIZED', 'THAT', 'HE', 'WOULD', 'FIND', 'IT', 'ALMOST', 'IMPOSSIBLE', 'TO', 'GIVE', 'A', 'STEADY', 'LIGHT'] +2300-131720-0014-586: ref=['MISTER', 'EDISON', 'WAS', 'A', 'LEADER', 'FAR', 'AHEAD', 'OF', 'THE', 'TIME'] +2300-131720-0014-586: hyp=['MISTER', 'EDISON', 'WAS', 'A', 'LEADER', 'FAR', 'AHEAD', 'OF', 'THE', 'TIME'] +2300-131720-0015-587: ref=['HE', 'OBTAINED', 'THE', 'DESIRED', 'SPEED', 'AND', 'LOAD', 'WITH', 'A', 'FRICTION', 'BRAKE', 'ALSO', 'REGULATOR', 'OF', 'SPEED', 'BUT', 'WAITED', 'FOR', 'AN', 'INDICATOR', 'TO', 'VERIFY', 'IT'] +2300-131720-0015-587: hyp=['HE', 'OBTAINED', 'THE', 'DESIRED', 'SPEED', 'AND', 'LOAD', 'WITH', 'A', 'FRICTION', 'BRAKE', 'ALSO', 'REGULATOR', 'OF', 'SPEED', 'BUT', 'WAITED', 'FOR', 'AN', 'INDICATOR', 'TO', 'VERIFY', 'IT'] +2300-131720-0016-588: ref=['THEN', 'AGAIN', 'THERE', 'WAS', 'NO', 'KNOWN', 'WAY', 'TO', 'LUBRICATE', 'AN', 'ENGINE', 'FOR', 'CONTINUOUS', 'RUNNING', 'AND', 'MISTER', 'EDISON', 'INFORMED', 'ME', 'THAT', 'AS', 'A', 'MARINE', 'ENGINE', 'STARTED', 'BEFORE', 'THE', 'SHIP', 'LEFT', 'NEW', 'YORK', 'AND', 'CONTINUED', 'RUNNING', 'UNTIL', 'IT', 'REACHED', 'ITS', 'HOME', 'PORT', 'SO', 'AN', 'ENGINE', 'FOR', 'HIS', 'PURPOSES', 'MUST', 'PRODUCE', 'LIGHT', 'AT', 'ALL', 'TIMES'] +2300-131720-0016-588: hyp=['THEN', 'AGAIN', 'THERE', 'WAS', 'NO', 'KNOWN', 'WAY', 'TO', 'LUBRICATE', 'AN', 'ENGINE', 'FOR', 'CONTINUOUS', 'RUNNING', 'AND', 'MISTER', 'EDISON', 'INFORMED', 'ME', 'THAT', 'AS', 'A', 'MARINE', 'ENGINE', 'STARTED', 'BEFORE', 'THE', 'SHIP', 'LEFT', 'NEW', 'YORK', 'AND', 'CONTINUED', 'RUNNING', 'UNTIL', 'IT', 'REACHED', 'ITS', 'HOME', 'PORT', 'SO', 'AN', 'ENGINE', 'FOR', 'HIS', 'PURPOSES', 'MUST', 'PRODUCE', 'LIGHT', 'AT', 'ALL', 'TIMES'] +2300-131720-0017-589: ref=['EDISON', 'HAD', 'INSTALLED', 'HIS', 'HISTORIC', 'FIRST', 'GREAT', 'CENTRAL', 'STATION', 'SYSTEM', 'IN', 'NEW', 'YORK', 'ON', 'THE', 'MULTIPLE', 'ARC', 'SYSTEM', 'COVERED', 'BY', 'HIS', 'FEEDER', 'AND', 'MAIN', 'INVENTION', 'WHICH', 'RESULTED', 'IN', 'A', 'NOTABLE', 'SAVING', 'IN', 'THE', 'COST', 'OF', 'CONDUCTORS', 'AS', 'AGAINST', 'A', 'STRAIGHT', 'TWO', 'WIRE', 'SYSTEM', 'THROUGHOUT', 'OF', 'THE', 'TREE', 'KIND'] +2300-131720-0017-589: hyp=['EDISON', 'HAD', 'INSTALLED', 'HIS', 'HISTORIC', 'FIRST', 'GREAT', 'CENTRAL', 'STATION', 'SYSTEM', 'IN', 'NEW', 'YORK', 'ON', 'THE', 'MULTIPLE', 'ARC', 'SYSTEM', 'COVERED', 'BY', 'HIS', 'FEEDER', 'AND', 'MAIN', 'INVENTION', 'WHICH', 'RESULTED', 'IN', 'A', 'NOTABLE', 'SAVING', 'IN', 'THE', 'COST', 'OF', 'CONDUCTORS', 'AS', 'AGAINST', 'A', 'STRAIGHT', 'TWO', 'WIRE', 'SYSTEM', 'THROUGHOUT', 'OF', 'THE', 'TREE', 'KIND'] +2300-131720-0018-590: ref=['HE', 'SOON', 'FORESAW', 'THAT', 'STILL', 'GREATER', 'ECONOMY', 'WOULD', 'BE', 'NECESSARY', 'FOR', 'COMMERCIAL', 'SUCCESS', 'NOT', 'ALONE', 'FOR', 'THE', 'LARGER', 'TERRITORY', 'OPENING', 'BUT', 'FOR', 'THE', 'COMPACT', 'DISTRICTS', 'OF', 'LARGE', 'CITIES'] +2300-131720-0018-590: hyp=['HE', 'SOON', 'FORESAW', 'THAT', 'STILL', 'GREATER', 'ECONOMY', 'WOULD', 'BE', 'NECESSARY', 'FOR', 'COMMERCIAL', 'SUCCESS', 'NOT', 'ALONE', 'FOR', 'THE', 'LARGER', 'TERRITORY', 'OPENING', 'BUT', 'FOR', 'THE', 'COMPACT', 'DISTRICT', 'OF', 'LARGE', 'CITIES'] +2300-131720-0019-591: ref=['THE', 'STRONG', 'POSITION', 'HELD', 'BY', 'THE', 'EDISON', 'SYSTEM', 'UNDER', 'THE', 'STRENUOUS', 'COMPETITION', 'THAT', 'WAS', 'ALREADY', 'SPRINGING', 'UP', 'WAS', 'ENORMOUSLY', 'IMPROVED', 'BY', 'THE', 'INTRODUCTION', 'OF', 'THE', 'THREE', 'WIRE', 'SYSTEM', 'AND', 'IT', 'GAVE', 'AN', 'IMMEDIATE', 'IMPETUS', 'TO', 'INCANDESCENT', 'LIGHTING'] +2300-131720-0019-591: hyp=['THE', 'STRONG', 'POSITION', 'HELD', 'BY', 'THE', 'EDISON', 'SYSTEM', 'UNDER', 'THE', 'STRENUOUS', 'COMPETITION', 'THAT', 'WAS', 'ALREADY', 'SPRINGING', 'UP', 'WAS', 'ENORMOUSLY', 'IMPROVED', 'BY', 'THE', 'INTRODUCTION', 'OF', 'THE', 'THREE', 'WIRE', 'SYSTEM', 'AND', 'IT', 'GAVE', 'AN', 'IMMEDIATE', 'IMPETUS', 'TO', 'INCANDESCENT', 'LIGHTING'] +2300-131720-0020-592: ref=['IT', 'WAS', 'SPECIALLY', 'SUITED', 'FOR', 'A', 'TRIAL', 'PLANT', 'ALSO', 'IN', 'THE', 'EARLY', 'DAYS', 'WHEN', 'A', 'YIELD', 'OF', 'SIX', 'OR', 'EIGHT', 'LAMPS', 'TO', 'THE', 'HORSE', 'POWER', 'WAS', 'CONSIDERED', 'SUBJECT', 'FOR', 'CONGRATULATION'] +2300-131720-0020-592: hyp=['IT', 'WAS', 'SPECIALLY', 'SUITED', 'FOR', 'A', 'TRIAL', 'PLAT', 'ALSO', 'IN', 'THE', 'EARLY', 'DAYS', 'WHEN', 'A', 'YIELD', 'OF', 'SIX', 'OR', 'EIGHT', 'LAMPS', 'TO', 'THE', 'HORSE', 'POWER', 'WAS', 'CONSIDERED', 'SUBJECT', 'FOR', 'CONGRATULATION'] +2300-131720-0021-593: ref=['THE', 'STREET', 'CONDUCTORS', 'WERE', 'OF', 'THE', 'OVERHEAD', 'POLE', 'LINE', 'CONSTRUCTION', 'AND', 'WERE', 'INSTALLED', 'BY', 'THE', 'CONSTRUCTION', 'COMPANY', 'THAT', 'HAD', 'BEEN', 'ORGANIZED', 'BY', 'EDISON', 'TO', 'BUILD', 'AND', 'EQUIP', 'CENTRAL', 'STATIONS'] +2300-131720-0021-593: hyp=['THE', 'STREET', 'CONDUCTORS', 'WERE', 'OF', 'THE', 'OVERHEAD', 'POLE', 'LINE', 'CONSTRUCTION', 'AND', 'WERE', 'INSTALLED', 'BY', 'THE', 'CONSTRUCTION', 'COMPANY', 'THAT', 'HAD', 'BEEN', 'ORGANIZED', 'BY', 'EDISON', 'TO', 'BUILD', 'AND', 'EQUIP', 'CENTRAL', 'STATIONS'] +2300-131720-0022-594: ref=['MEANWHILE', 'HE', 'HAD', 'CALLED', 'UPON', 'ME', 'TO', 'MAKE', 'A', 'REPORT', 'OF', 'THE', 'THREE', 'WIRE', 'SYSTEM', 'KNOWN', 'IN', 'ENGLAND', 'AS', 'THE', 'HOPKINSON', 'BOTH', 'DOCTOR', 'JOHN', 'HOPKINSON', 'AND', 'MISTER', 'EDISON', 'BEING', 'INDEPENDENT', 'INVENTORS', 'AT', 'PRACTICALLY', 'THE', 'SAME', 'TIME'] +2300-131720-0022-594: hyp=['MEANWHILE', 'HE', 'HAD', 'CALLED', 'UPON', 'ME', 'TO', 'MAKE', 'A', 'REPORT', 'OF', 'THE', 'THREE', 'WIRE', 'SYSTEM', 'KNOWN', 'IN', 'ENGLAND', 'AS', 'THE', 'HOPKINSON', 'BOTH', 'DOCTOR', 'JOHN', 'HOPKINSON', 'AND', 'MISTER', 'EDISON', 'BEING', 'INDEPENDENT', 'INVENTORS', 'AT', 'PRACTICALLY', 'THE', 'SAME', 'TIME'] +2300-131720-0023-595: ref=['I', 'THINK', 'HE', 'WAS', 'PERHAPS', 'MORE', 'APPRECIATIVE', 'THAN', 'I', 'WAS', 'OF', 'THE', 'DISCIPLINE', 'OF', 'THE', 'EDISON', 'CONSTRUCTION', 'DEPARTMENT', 'AND', 'THOUGHT', 'IT', 'WOULD', 'BE', 'WELL', 'FOR', 'US', 'TO', 'WAIT', 'UNTIL', 'THE', 'MORNING', 'OF', 'THE', 'FOURTH', 'BEFORE', 'WE', 'STARTED', 'UP'] +2300-131720-0023-595: hyp=['I', 'THINK', 'HE', 'WAS', 'PERHAPS', 'MORE', 'APPRECIATIVE', 'THAT', 'I', 'WAS', 'OF', 'THE', 'DISCIPLINE', 'OF', 'THE', 'EDISON', 'CONSTRUCTION', 'DEPARTMENT', 'AND', 'THOUGHT', 'IT', 'WOULD', 'BE', 'WELL', 'FOR', 'US', 'TO', 'WAIT', 'UNTIL', 'THE', 'MORNING', 'OF', 'THE', 'FOURTH', 'BEFORE', 'WE', 'STARTED', 'UP'] +2300-131720-0024-596: ref=['BUT', 'THE', 'PLANT', 'RAN', 'AND', 'IT', 'WAS', 'THE', 'FIRST', 'THREE', 'WIRE', 'STATION', 'IN', 'THIS', 'COUNTRY'] +2300-131720-0024-596: hyp=['BUT', 'THE', 'PLANT', 'RAN', 'AND', 'IT', 'WAS', 'THE', 'FIRST', 'THREE', 'WIRE', 'STATION', 'IN', 'THIS', 'COUNTRY'] +2300-131720-0025-597: ref=['THEY', 'WERE', 'LATER', 'USED', 'AS', 'RESERVE', 'MACHINES', 'AND', 'FINALLY', 'WITH', 'THE', 'ENGINE', 'RETIRED', 'FROM', 'SERVICE', 'AS', 'PART', 'OF', 'THE', 'COLLECTION', 'OF', 'EDISONIA', 'BUT', 'THEY', 'REMAIN', 'IN', 'PRACTICALLY', 'AS', 'GOOD', 'CONDITION', 'AS', 'WHEN', 'INSTALLED', 'IN', 'EIGHTEEN', 'EIGHTY', 'THREE'] +2300-131720-0025-597: hyp=['THEY', 'WERE', 'LATER', 'USED', 'AS', 'RESERVE', 'MACHINES', 'AND', 'FINALLY', 'WITH', 'THE', 'ENGINE', 'RETIRED', 'FROM', 'SERVICE', 'AS', 'PART', 'OF', 'THE', 'COLLECTION', 'OF', 'EDISONIA', 'BUT', 'THEY', 'REMAIN', 'IN', 'PRACTICALLY', 'AS', 'GOOD', 'CONDITION', 'AS', 'WHEN', 'INSTALLED', 'IN', 'EIGHTEEN', 'EIGHTY', 'THREE'] +2300-131720-0026-598: ref=['THE', 'ARC', 'LAMP', 'INSTALLED', 'OUTSIDE', 'A', "CUSTOMER'S", 'PREMISES', 'OR', 'IN', 'A', 'CIRCUIT', 'FOR', 'PUBLIC', 'STREET', 'LIGHTING', 'BURNED', 'SO', 'MANY', 'HOURS', 'NIGHTLY', 'SO', 'MANY', 'NIGHTS', 'IN', 'THE', 'MONTH', 'AND', 'WAS', 'PAID', 'FOR', 'AT', 'THAT', 'RATE', 'SUBJECT', 'TO', 'REBATE', 'FOR', 'HOURS', 'WHEN', 'THE', 'LAMP', 'MIGHT', 'BE', 'OUT', 'THROUGH', 'ACCIDENT'] +2300-131720-0026-598: hyp=['THE', 'ARC', 'LAMP', 'INSTALLED', 'OUTSIDE', 'A', 'CUSTOMERS', 'PREMISES', 'OR', 'IN', 'A', 'CIRCUIT', 'FOR', 'PUBLIC', 'STREET', 'LIGHTING', 'BURNED', 'SO', 'MANY', 'HOURS', 'NIGHTLY', 'SO', 'MANY', 'NIGHTS', 'IN', 'THE', 'MONTH', 'AND', 'WAS', 'PAID', 'FOR', 'AT', 'THAT', 'RATE', 'SUBJECT', 'TO', 'REBATE', 'FOR', 'HOURS', 'WHEN', 'THE', 'LAMP', 'MIGHT', 'BE', 'OUT', 'THROUGH', 'ACCIDENT'] +2300-131720-0027-599: ref=['EDISON', 'HELD', 'THAT', 'THE', 'ELECTRICITY', 'SOLD', 'MUST', 'BE', 'MEASURED', 'JUST', 'LIKE', 'GAS', 'OR', 'WATER', 'AND', 'HE', 'PROCEEDED', 'TO', 'DEVELOP', 'A', 'METER'] +2300-131720-0027-599: hyp=['EDISON', 'HELD', 'THAT', 'THE', 'ELECTRICITY', 'SOLD', 'MUST', 'BE', 'MEASURED', 'JUST', 'LIKE', 'GAS', 'OR', 'WATER', 'AND', 'HE', 'PROCEEDED', 'TO', 'DEVELOP', 'A', 'METER'] +2300-131720-0028-600: ref=['THERE', 'WAS', 'INFINITE', 'SCEPTICISM', 'AROUND', 'HIM', 'ON', 'THE', 'SUBJECT', 'AND', 'WHILE', 'OTHER', 'INVENTORS', 'WERE', 'ALSO', 'GIVING', 'THE', 'SUBJECT', 'THEIR', 'THOUGHT', 'THE', 'PUBLIC', 'TOOK', 'IT', 'FOR', 'GRANTED', 'THAT', 'ANYTHING', 'SO', 'UTTERLY', 'INTANGIBLE', 'AS', 'ELECTRICITY', 'THAT', 'COULD', 'NOT', 'BE', 'SEEN', 'OR', 'WEIGHED', 'AND', 'ONLY', 'GAVE', 'SECONDARY', 'EVIDENCE', 'OF', 'ITSELF', 'AT', 'THE', 'EXACT', 'POINT', 'OF', 'USE', 'COULD', 'NOT', 'BE', 'BROUGHT', 'TO', 'ACCURATE', 'REGISTRATION'] +2300-131720-0028-600: hyp=['THERE', 'WAS', 'INFINITE', 'SKEPTICISM', 'AROUND', 'HIM', 'ON', 'THE', 'SUBJECT', 'AND', 'WHILE', 'OTHER', 'INVENTORS', 'WERE', 'ALSO', 'GIVING', 'THE', 'SUBJECT', 'THEIR', 'THOUGHT', 'THE', 'PUBLIC', 'TOOK', 'IT', 'FOR', 'GRANTED', 'THAT', 'ANYTHING', 'SO', 'UTTERLY', 'INTANGIBLE', 'AS', 'ELECTRICITY', 'THAT', 'COULD', 'NOT', 'BE', 'SEEN', 'OR', 'WEIGHED', 'AND', 'ONLY', 'GAVE', 'SECONDARY', 'EVIDENCE', 'OF', 'ITSELF', 'AT', 'THE', 'EXACT', 'POINT', 'OF', 'USE', 'COULD', 'NOT', 'BE', 'BROUGHT', 'TO', 'ACCURATE', 'REGISTRATION'] +2300-131720-0029-601: ref=['HENCE', 'THE', 'EDISON', 'ELECTROLYTIC', 'METER', 'IS', 'NO', 'LONGER', 'USED', 'DESPITE', 'ITS', 'EXCELLENT', 'QUALITIES'] +2300-131720-0029-601: hyp=['HENCE', 'THE', 'EDISON', 'ELECTROLYTIC', 'METRE', 'IS', 'NO', 'LONGER', 'USED', 'DESPITE', 'ITS', 'EXCELLENT', 'QUALITIES'] +2300-131720-0030-602: ref=['THE', 'PRINCIPLE', 'EMPLOYED', 'IN', 'THE', 'EDISON', 'ELECTROLYTIC', 'METER', 'IS', 'THAT', 'WHICH', 'EXEMPLIFIES', 'THE', 'POWER', 'OF', 'ELECTRICITY', 'TO', 'DECOMPOSE', 'A', 'CHEMICAL', 'SUBSTANCE'] +2300-131720-0030-602: hyp=['THE', 'PRINCIPLE', 'EMPLOYED', 'IN', 'THE', 'EDISON', 'ELECTROLYTIC', 'METRE', 'IS', 'THAT', 'WHICH', 'EXEMPLIFIES', 'THE', 'POWER', 'OF', 'ELECTRICITY', 'TO', 'DECOMPOSE', 'A', 'CHEMICAL', 'SUBSTANCE'] +2300-131720-0031-603: ref=['ASSOCIATED', 'WITH', 'THIS', 'SIMPLE', 'FORM', 'OF', 'APPARATUS', 'WERE', 'VARIOUS', 'INGENIOUS', 'DETAILS', 'AND', 'REFINEMENTS', 'TO', 'SECURE', 'REGULARITY', 'OF', 'OPERATION', 'FREEDOM', 'FROM', 'INACCURACY', 'AND', 'IMMUNITY', 'FROM', 'SUCH', 'TAMPERING', 'AS', 'WOULD', 'PERMIT', 'THEFT', 'OF', 'CURRENT', 'OR', 'DAMAGE'] +2300-131720-0031-603: hyp=['ASSOCIATED', 'WITH', 'THIS', 'SIMPLE', 'FORM', 'OF', 'APPARATUS', 'WERE', 'VARIOUS', 'INGENIOUS', 'DETAILS', 'AND', 'REFINEMENTS', 'TO', 'SECURE', 'REGULARITY', 'OF', 'OPERATION', 'FREEDOM', 'FROM', 'INACCURACY', 'AND', 'IMMUNITY', 'FROM', 'SUCH', 'TAMPERING', 'AS', 'WOULD', 'PERMIT', 'THEFT', 'OF', 'CURRENT', 'OR', 'DAMAGE'] +2300-131720-0032-604: ref=['THE', 'STANDARD', 'EDISON', 'METER', 'PRACTICE', 'WAS', 'TO', 'REMOVE', 'THE', 'CELLS', 'ONCE', 'A', 'MONTH', 'TO', 'THE', 'METER', 'ROOM', 'OF', 'THE', 'CENTRAL', 'STATION', 'COMPANY', 'FOR', 'EXAMINATION', 'ANOTHER', 'SET', 'BEING', 'SUBSTITUTED'] +2300-131720-0032-604: hyp=['THE', 'STANDARD', 'EDISON', 'METER', 'PRACTICE', 'WAS', 'TO', 'REMOVE', 'THE', 'CELLS', 'ONCE', 'A', 'MONTH', 'TO', 'THE', 'METER', 'ROOM', 'OF', 'THE', 'CENTRAL', 'STATION', 'COMPANY', 'FOR', 'EXAMINATION', 'ANOTHER', 'SET', 'BEING', 'SUBSTITUTED'] +2300-131720-0033-605: ref=['IN', 'DECEMBER', 'EIGHTEEN', 'EIGHTY', 'EIGHT', 'MISTER', 'W', 'J', 'JENKS', 'READ', 'AN', 'INTERESTING', 'PAPER', 'BEFORE', 'THE', 'AMERICAN', 'INSTITUTE', 'OF', 'ELECTRICAL', 'ENGINEERS', 'ON', 'THE', 'SIX', 'YEARS', 'OF', 'PRACTICAL', 'EXPERIENCE', 'HAD', 'UP', 'TO', 'THAT', 'TIME', 'WITH', 'THE', 'METER', 'THEN', 'MORE', 'GENERALLY', 'IN', 'USE', 'THAN', 'ANY', 'OTHER'] +2300-131720-0033-605: hyp=['IN', 'DECEMBER', 'EIGHTEEN', 'EIGHTY', 'EIGHT', 'MISTER', 'W', 'J', 'JENKS', 'READ', 'AN', 'INTERESTING', 'PAPER', 'BEFORE', 'THE', 'AMERICAN', 'INSTITUTE', 'OF', 'ELECTRICAL', 'ENGINEERS', 'ON', 'THE', 'SIX', 'YEARS', 'OF', 'PRACTICAL', 'EXPERIENCE', 'HAD', 'UP', 'TO', 'THAT', 'TIME', 'WITH', 'THE', 'METER', 'THEN', 'MORE', 'GENERALLY', 'IN', 'USE', 'THAN', 'ANY', 'OTHER'] +2300-131720-0034-606: ref=['THE', 'OTHERS', 'HAVING', 'BEEN', 'IN', 'OPERATION', 'TOO', 'SHORT', 'A', 'TIME', 'TO', 'SHOW', 'DEFINITE', 'RESULTS', 'ALTHOUGH', 'THEY', 'ALSO', 'WENT', 'QUICKLY', 'TO', 'A', 'DIVIDEND', 'BASIS'] +2300-131720-0034-606: hyp=['THE', 'OTHERS', 'HAVING', 'BEEN', 'IN', 'OPERATION', 'TOO', 'SHORT', 'A', 'TIME', 'TO', 'SHOW', 'DEFINITE', 'RESULTS', 'ALTHOUGH', 'THEY', 'ALSO', 'WENT', 'QUICKLY', 'TO', 'A', 'DIVIDEND', 'BASIS'] +2300-131720-0035-607: ref=['IN', 'THIS', 'CONNECTION', 'IT', 'SHOULD', 'BE', 'MENTIONED', 'THAT', 'THE', 'ASSOCIATION', 'OF', 'EDISON', 'ILLUMINATING', 'COMPANIES', 'IN', 'THE', 'SAME', 'YEAR', 'ADOPTED', 'RESOLUTIONS', 'UNANIMOUSLY', 'TO', 'THE', 'EFFECT', 'THAT', 'THE', 'EDISON', 'METER', 'WAS', 'ACCURATE', 'AND', 'THAT', 'ITS', 'USE', 'WAS', 'NOT', 'EXPENSIVE', 'FOR', 'STATIONS', 'ABOVE', 'ONE', 'THOUSAND', 'LIGHTS', 'AND', 'THAT', 'THE', 'BEST', 'FINANCIAL', 'RESULTS', 'WERE', 'INVARIABLY', 'SECURED', 'IN', 'A', 'STATION', 'SELLING', 'CURRENT', 'BY', 'METER'] +2300-131720-0035-607: hyp=['IN', 'THIS', 'CONNECTION', 'IT', 'SHOULD', 'BE', 'MENTIONED', 'THAT', 'THE', 'ASSOCIATION', 'OF', 'EDISON', 'ILLUMINATING', 'COMPANIES', 'IN', 'THE', 'SAME', 'YEAR', 'ADOPTED', 'RESOLUTIONS', 'UNANIMOUSLY', 'TO', 'THE', 'EFFECT', 'THAT', 'THE', 'EDISON', 'METER', 'WAS', 'ACCURATE', 'AND', 'THAT', 'ITS', 'USE', 'WAS', 'NOT', 'EXPENSIVE', 'FOR', 'STATIONS', 'ABOVE', 'ONE', 'THOUSAND', 'LIGHTS', 'AND', 'THAT', 'THE', 'BEST', 'FINANCIAL', 'RESULTS', 'WERE', 'INVARIABLY', 'SECURED', 'IN', 'A', 'STATION', 'SELLING', 'CURRENT', 'BY', 'METER'] +2300-131720-0036-608: ref=['THE', 'METER', 'CONTINUED', 'IN', 'GENERAL', 'SERVICE', 'DURING', 'EIGHTEEN', 'NINETY', 'NINE', 'AND', 'PROBABLY', 'UP', 'TO', 'THE', 'CLOSE', 'OF', 'THE', 'CENTURY'] +2300-131720-0036-608: hyp=['THE', 'METRE', 'CONTINUED', 'IN', 'GENERAL', 'SERVICE', 'DURING', 'EIGHTEEN', 'NINETY', 'NINE', 'AND', 'PROBABLY', 'UP', 'TO', 'THE', 'CLOSE', 'OF', 'THE', 'CENTURY'] +2300-131720-0037-609: ref=['HE', 'WEIGHED', 'AND', 'REWEIGHED', 'THE', 'METER', 'PLATES', 'AND', 'PURSUED', 'EVERY', 'LINE', 'OF', 'INVESTIGATION', 'IMAGINABLE', 'BUT', 'ALL', 'IN', 'VAIN'] +2300-131720-0037-609: hyp=['HE', 'WEIGHED', 'AND', 'RE', 'WEIGHED', 'THE', 'METER', 'PLATES', 'AND', 'PURSUED', 'EVERY', 'LINE', 'OF', 'INVESTIGATION', 'IMAGINABLE', 'BUT', 'ALL', 'IN', 'VAIN'] +2300-131720-0038-610: ref=['HE', 'FELT', 'HE', 'WAS', 'UP', 'AGAINST', 'IT', 'AND', 'THAT', 'PERHAPS', 'ANOTHER', 'KIND', 'OF', 'A', 'JOB', 'WOULD', 'SUIT', 'HIM', 'BETTER'] +2300-131720-0038-610: hyp=['HE', 'FELT', 'HE', 'WAS', 'UP', 'AGAINST', 'IT', 'AND', 'THAT', 'PERHAPS', 'ANOTHER', 'KIND', 'OF', 'A', 'JOB', 'WOULD', 'SUIT', 'HIM', 'BETTER'] +2300-131720-0039-611: ref=['THE', 'PROBLEM', 'WAS', 'SOLVED'] +2300-131720-0039-611: hyp=['THE', 'PROBLEM', 'WAS', 'SOLVED'] +2300-131720-0040-612: ref=['WE', 'WERE', 'MORE', 'INTERESTED', 'IN', 'THE', 'TECHNICAL', 'CONDITION', 'OF', 'THE', 'STATION', 'THAN', 'IN', 'THE', 'COMMERCIAL', 'PART'] +2300-131720-0040-612: hyp=['WE', 'WERE', 'MORE', 'INTERESTED', 'IN', 'THE', 'TECHNICAL', 'CONDITION', 'OF', 'THE', 'STATION', 'THAN', 'IN', 'THE', 'COMMERCIAL', 'PART'] +2300-131720-0041-613: ref=['WE', 'HAD', 'METERS', 'IN', 'WHICH', 'THERE', 'WERE', 'TWO', 'BOTTLES', 'OF', 'LIQUID'] +2300-131720-0041-613: hyp=['WE', 'HAD', 'METERS', 'IN', 'WHICH', 'THERE', 'WERE', 'TWO', 'BOTTLES', 'OF', 'LIQUID'] +237-126133-0000-614: ref=['HERE', 'SHE', 'WOULD', 'STAY', 'COMFORTED', 'AND', 'SOOTHED', 'AMONG', 'THE', 'LOVELY', 'PLANTS', 'AND', 'RICH', 'EXOTICS', 'REJOICING', 'THE', 'HEART', 'OF', 'OLD', 'TURNER', 'THE', 'GARDENER', 'WHO', 'SINCE', "POLLY'S", 'FIRST', 'RAPTUROUS', 'ENTRANCE', 'HAD', 'TAKEN', 'HER', 'INTO', 'HIS', 'GOOD', 'GRACES', 'FOR', 'ALL', 'TIME'] +237-126133-0000-614: hyp=['HERE', 'SHE', 'WOULD', 'STAY', 'COMFORTED', 'AND', 'SOOTHED', 'AMONG', 'THE', 'LOVELY', 'PLANTS', 'AND', 'RICH', 'EXOTICS', 'REJOICING', 'THE', 'HEART', 'OF', 'OLD', 'TURNER', 'THE', 'GARDENER', 'WHO', 'SINCE', "POLLY'S", 'FIRST', 'RAPTUROUS', 'ENTRANCE', 'HAD', 'TAKEN', 'HER', 'INTO', 'HIS', 'GOOD', 'GRACES', 'FOR', 'ALL', 'TIME'] +237-126133-0001-615: ref=['EVERY', 'CHANCE', 'SHE', 'COULD', 'STEAL', 'AFTER', 'PRACTICE', 'HOURS', 'WERE', 'OVER', 'AND', 'AFTER', 'THE', 'CLAMOROUS', 'DEMANDS', 'OF', 'THE', 'BOYS', 'UPON', 'HER', 'TIME', 'WERE', 'FULLY', 'SATISFIED', 'WAS', 'SEIZED', 'TO', 'FLY', 'ON', 'THE', 'WINGS', 'OF', 'THE', 'WIND', 'TO', 'THE', 'FLOWERS'] +237-126133-0001-615: hyp=['EVERY', 'CHANCE', 'SHE', 'COULD', 'STEAL', 'AFTER', 'PRACTICE', 'HOURS', 'WERE', 'OVER', 'AND', 'AFTER', 'THE', 'CLAMOROUS', 'DEMANDS', 'OF', 'THE', 'BOYS', 'UPON', 'HER', 'TIME', 'WERE', 'FULLY', 'SATISFIED', 'WAS', 'SEIZED', 'TO', 'FLY', 'ON', 'THE', 'WINGS', 'OF', 'THE', 'WIND', 'TO', 'THE', 'FLOWERS'] +237-126133-0002-616: ref=['THEN', 'DEAR', 'SAID', 'MISSUS', 'WHITNEY', 'YOU', 'MUST', 'BE', 'KINDER', 'TO', 'HER', 'THAN', 'EVER', 'THINK', 'WHAT', 'IT', 'WOULD', 'BE', 'FOR', 'ONE', 'OF', 'YOU', 'TO', 'BE', 'AWAY', 'FROM', 'HOME', 'EVEN', 'AMONG', 'FRIENDS'] +237-126133-0002-616: hyp=['THEN', 'DEAR', 'SAID', 'MISSUS', 'WHITNEY', 'YOU', 'MUST', 'BE', 'KINDER', 'TO', 'HER', 'THAN', 'EVER', 'THINK', 'WHAT', 'IT', 'WOULD', 'BE', 'FOR', 'ONE', 'OF', 'YOU', 'TO', 'BE', 'AWAY', 'FROM', 'HOME', 'EVEN', 'AMONG', 'FRIENDS'] +237-126133-0003-617: ref=['SOMEHOW', 'OF', 'ALL', 'THE', 'DAYS', 'WHEN', 'THE', 'HOME', 'FEELING', 'WAS', 'THE', 'STRONGEST', 'THIS', 'DAY', 'IT', 'SEEMED', 'AS', 'IF', 'SHE', 'COULD', 'BEAR', 'IT', 'NO', 'LONGER'] +237-126133-0003-617: hyp=['SOMEHOW', 'OF', 'ALL', 'THE', 'DAYS', 'WHEN', 'THE', 'HOME', 'FEELING', 'WAS', 'THE', 'STRONGEST', 'THIS', 'DAY', 'IT', 'SEEMED', 'AS', 'IF', 'SHE', 'COULD', 'BEAR', 'IT', 'NO', 'LONGER'] +237-126133-0004-618: ref=['IF', 'SHE', 'COULD', 'ONLY', 'SEE', 'PHRONSIE', 'FOR', 'JUST', 'ONE', 'MOMENT'] +237-126133-0004-618: hyp=['IF', 'SHE', 'COULD', 'ONLY', 'SEE', 'PHRONSIE', 'FOR', 'JUST', 'ONE', 'MOMENT'] +237-126133-0005-619: ref=['OH', "SHE'S", 'ALWAYS', 'AT', 'THE', 'PIANO', 'SAID', 'VAN', 'SHE', 'MUST', 'BE', 'THERE', 'NOW', 'SOMEWHERE', 'AND', 'THEN', 'SOMEBODY', 'LAUGHED'] +237-126133-0005-619: hyp=['OH', "SHE'S", 'ALWAYS', 'AT', 'THE', 'PIANO', 'SAID', 'VAN', 'SHE', 'MUST', 'BE', 'THERE', 'NOW', 'SOMEWHERE', 'AND', 'THEN', 'SOMEBODY', 'LAUGHED'] +237-126133-0006-620: ref=['AT', 'THIS', 'THE', 'BUNDLE', 'OPENED', 'SUDDENLY', 'AND', 'OUT', 'POPPED', 'PHRONSIE'] +237-126133-0006-620: hyp=['AT', 'THIS', 'THE', 'BUNDLE', 'OPENED', 'SUDDENLY', 'AND', 'OUT', 'POPPED', 'PHRONSIE'] +237-126133-0007-621: ref=['BUT', 'POLLY', "COULDN'T", 'SPEAK', 'AND', 'IF', 'JASPER', "HADN'T", 'CAUGHT', 'HER', 'JUST', 'IN', 'TIME', 'SHE', 'WOULD', 'HAVE', 'TUMBLED', 'OVER', 'BACKWARD', 'FROM', 'THE', 'STOOL', 'PHRONSIE', 'AND', 'ALL'] +237-126133-0007-621: hyp=['BUT', 'POLLY', "COULDN'T", 'SPEAK', 'AND', 'IF', 'JASPER', "HADN'T", 'CAUGHT', 'HER', 'JUST', 'IN', 'TIME', 'SHE', 'WOULD', 'HAVE', 'TUMBLED', 'OVER', 'BACKWARD', 'FROM', 'THE', 'STOOL', 'PHRONSIE', 'AND', 'ALL'] +237-126133-0008-622: ref=['ASKED', 'PHRONSIE', 'WITH', 'HER', 'LITTLE', 'FACE', 'CLOSE', 'TO', "POLLY'S", 'OWN'] +237-126133-0008-622: hyp=['ASKED', 'PHRONSIE', 'WITH', 'HER', 'LITTLE', 'FACE', 'CLOSE', 'TO', "POLLY'S", 'OWN'] +237-126133-0009-623: ref=['NOW', "YOU'LL", 'STAY', 'CRIED', 'VAN', 'SAY', 'POLLY', "WON'T", 'YOU'] +237-126133-0009-623: hyp=['NOW', "YOU'LL", 'STAY', 'CRIED', 'VAN', 'SAY', 'POLLY', "WON'T", 'YOU'] +237-126133-0010-624: ref=['OH', 'YOU', 'ARE', 'THE', 'DEAREST', 'AND', 'BEST', 'MISTER', 'KING', 'I', 'EVER', 'SAW', 'BUT', 'HOW', 'DID', 'YOU', 'MAKE', 'MAMMY', 'LET', 'HER', 'COME'] +237-126133-0010-624: hyp=['OH', 'YOU', 'ARE', 'THE', 'DEAREST', 'AND', 'BEST', 'MISTER', 'KING', 'I', 'EVER', 'SAW', 'BUT', 'HOW', 'DID', 'YOU', 'MAKE', 'MAMMY', 'LET', 'HER', 'COME'] +237-126133-0011-625: ref=["ISN'T", 'HE', 'SPLENDID', 'CRIED', 'JASPER', 'IN', 'INTENSE', 'PRIDE', 'SWELLING', 'UP', 'FATHER', 'KNEW', 'HOW', 'TO', 'DO', 'IT'] +237-126133-0011-625: hyp=["ISN'T", 'HE', 'SPLENDID', 'CRIED', 'JASPER', 'IN', 'INTENSE', 'PRIDE', 'SWELLING', 'UP', 'FATHER', 'KNEW', 'HOW', 'TO', 'DO', 'IT'] +237-126133-0012-626: ref=['THERE', 'THERE', 'HE', 'SAID', 'SOOTHINGLY', 'PATTING', 'HER', 'BROWN', 'FUZZY', 'HEAD'] +237-126133-0012-626: hyp=['THERE', 'THERE', 'HE', 'SAID', 'SOOTHINGLY', 'PATTING', 'HER', 'BROWN', 'FUZZY', 'HEAD'] +237-126133-0013-627: ref=['I', 'KNOW', 'GASPED', 'POLLY', 'CONTROLLING', 'HER', 'SOBS', 'I', "WON'T", 'ONLY', 'I', "CAN'T", 'THANK', 'YOU'] +237-126133-0013-627: hyp=['I', 'KNOW', 'GASPED', 'POLLY', 'CONTROLLING', 'HER', 'SOBS', 'I', "WON'T", 'ONLY', 'I', "CAN'T", 'THANK', 'YOU'] +237-126133-0014-628: ref=['ASKED', 'PHRONSIE', 'IN', 'INTENSE', 'INTEREST', 'SLIPPING', 'DOWN', 'OUT', 'OF', "POLLY'S", 'ARMS', 'AND', 'CROWDING', 'UP', 'CLOSE', 'TO', "JASPER'S", 'SIDE'] +237-126133-0014-628: hyp=['ASKED', 'PHRONSIE', 'IN', 'INTENSE', 'INTEREST', 'SLIPPING', 'DOWN', 'OUT', 'OF', "POLLY'S", 'ARMS', 'AND', 'CROWDING', 'UP', 'CLOSE', 'TO', "JASPER'S", 'SIDE'] +237-126133-0015-629: ref=['YES', 'ALL', 'ALONE', 'BY', 'HIMSELF', 'ASSERTED', 'JASPER', 'VEHEMENTLY', 'AND', 'WINKING', 'FURIOUSLY', 'TO', 'THE', 'OTHERS', 'TO', 'STOP', 'THEIR', 'LAUGHING', 'HE', 'DID', 'NOW', 'TRULY', 'PHRONSIE'] +237-126133-0015-629: hyp=['YES', 'ALL', 'ALONE', 'BY', 'HIMSELF', 'ASSERTED', 'JASPER', 'VEHEMENTLY', 'AND', 'WINKING', 'FURIOUSLY', 'TO', 'THE', 'OTHERS', 'TO', 'STOP', 'THEIR', 'LAUGHING', 'HE', 'DID', 'NOW', 'TRULY', 'PHRONSIE'] +237-126133-0016-630: ref=['OH', 'NO', 'JASPER', 'I', 'MUST', 'GO', 'BY', 'MY', 'VERY', 'OWN', 'SELF'] +237-126133-0016-630: hyp=['OH', 'NO', 'JAPSER', 'I', 'MUST', 'GO', 'BY', 'MY', 'VERY', 'OWN', 'SELF'] +237-126133-0017-631: ref=['THERE', 'JAP', "YOU'VE", 'CAUGHT', 'IT', 'LAUGHED', 'PERCY', 'WHILE', 'THE', 'OTHERS', 'SCREAMED', 'AT', 'THE', 'SIGHT', 'OF', "JASPER'S", 'FACE'] +237-126133-0017-631: hyp=['THERE', 'JAP', "YOU'VE", 'CAUGHT', 'IT', 'LAUGHED', 'PERCY', 'WHILE', 'THE', 'OTHERS', 'SCREAMED', 'AT', 'THE', 'SIGHT', 'OF', "JASPER'S", 'FACE'] +237-126133-0018-632: ref=["DON'T", 'MIND', 'IT', 'POLLY', 'WHISPERED', 'JASPER', "TWASN'T", 'HER', 'FAULT'] +237-126133-0018-632: hyp=["DON'T", 'MIND', 'IT', 'POLLY', 'WHISPERED', 'JASPER', "TWASN'T", 'HER', 'FAULT'] +237-126133-0019-633: ref=['DEAR', 'ME', 'EJACULATED', 'THE', 'OLD', 'GENTLEMAN', 'IN', 'THE', 'UTMOST', 'AMAZEMENT', 'AND', 'SUCH', 'A', 'TIME', 'AS', "I'VE", 'HAD', 'TO', 'GET', 'HER', 'HERE', 'TOO'] +237-126133-0019-633: hyp=['DEAR', 'ME', 'EJACULATED', 'THE', 'OLD', 'GENTLEMAN', 'IN', 'THE', 'UTMOST', 'AMAZEMENT', 'AND', 'SUCH', 'A', 'TIME', 'AS', "I'VE", 'HAD', 'TO', 'GET', 'HER', 'HERE', 'TOO'] +237-126133-0020-634: ref=['HOW', 'DID', 'HER', 'MOTHER', 'EVER', 'LET', 'HER', 'GO'] +237-126133-0020-634: hyp=['HOW', 'DID', 'HER', 'MOTHER', 'EVER', 'LET', 'HER', 'GO'] +237-126133-0021-635: ref=['SHE', 'ASKED', 'IMPULSIVELY', 'I', "DIDN'T", 'BELIEVE', 'YOU', 'COULD', 'PERSUADE', 'HER', 'FATHER'] +237-126133-0021-635: hyp=['SHE', 'ASKED', 'IMPULSIVELY', 'I', "DIDN'T", 'BELIEVE', 'YOU', 'COULD', 'PERSUADE', 'HER', 'FATHER'] +237-126133-0022-636: ref=['I', "DIDN'T", 'HAVE', 'ANY', 'FEARS', 'IF', 'I', 'WORKED', 'IT', 'RIGHTLY', 'SAID', 'THE', 'OLD', 'GENTLEMAN', 'COMPLACENTLY'] +237-126133-0022-636: hyp=['I', "DIDN'T", 'HAVE', 'ANY', 'FEARS', 'IF', 'I', 'WORKED', 'IT', 'RIGHTLY', 'SAID', 'THE', 'OLD', 'GENTLEMAN', 'COMPLACENTLY'] +237-126133-0023-637: ref=['HE', 'CRIED', 'IN', 'HIGH', 'DUDGEON', 'JUST', 'AS', 'IF', 'HE', 'OWNED', 'THE', 'WHOLE', 'OF', 'THE', 'PEPPERS', 'AND', 'COULD', 'DISPOSE', 'OF', 'THEM', 'ALL', 'TO', 'SUIT', 'HIS', 'FANCY'] +237-126133-0023-637: hyp=['HE', 'CRIED', 'IN', 'HIGH', 'DUDGEON', 'JUST', 'AS', 'IF', 'HE', 'OWNED', 'THE', 'WHOLE', 'OF', 'THE', 'PEPPERS', 'AND', 'COULD', 'DISPOSE', 'OF', 'THEM', 'ALL', 'TO', 'SUIT', 'HIS', 'FANCY'] +237-126133-0024-638: ref=['AND', 'THE', 'OLD', 'GENTLEMAN', 'WAS', 'SO', 'DELIGHTED', 'WITH', 'HIS', 'SUCCESS', 'THAT', 'HE', 'HAD', 'TO', 'BURST', 'OUT', 'INTO', 'A', 'SERIES', 'OF', 'SHORT', 'HAPPY', 'BITS', 'OF', 'LAUGHTER', 'THAT', 'OCCUPIED', 'QUITE', 'A', 'SPACE', 'OF', 'TIME'] +237-126133-0024-638: hyp=['AND', 'THE', 'OLD', 'GENTLEMAN', 'WAS', 'SO', 'DELIGHTED', 'WITH', 'HIS', 'SUCCESS', 'THAT', 'HE', 'HAD', 'TO', 'BURST', 'OUT', 'INTO', 'A', 'SERIES', 'OF', 'SHORT', 'HAPPY', 'BITS', 'OF', 'LAUGHTER', 'THAT', 'OCCUPIED', 'QUITE', 'A', 'SPACE', 'OF', 'TIME'] +237-126133-0025-639: ref=['AT', 'LAST', 'HE', 'CAME', 'OUT', 'OF', 'THEM', 'AND', 'WIPED', 'HIS', 'FACE', 'VIGOROUSLY'] +237-126133-0025-639: hyp=['AT', 'LAST', 'HE', 'CAME', 'OUT', 'OF', 'THEM', 'AND', 'WIPED', 'HIS', 'FACE', 'VIGOROUSLY'] +237-134493-0000-640: ref=['IT', 'IS', 'SIXTEEN', 'YEARS', 'SINCE', 'JOHN', 'BERGSON', 'DIED'] +237-134493-0000-640: hyp=['IT', 'IS', 'SIXTEEN', 'YEARS', 'SINCE', 'JOHN', 'BERKSON', 'DIED'] +237-134493-0001-641: ref=['HIS', 'WIFE', 'NOW', 'LIES', 'BESIDE', 'HIM', 'AND', 'THE', 'WHITE', 'SHAFT', 'THAT', 'MARKS', 'THEIR', 'GRAVES', 'GLEAMS', 'ACROSS', 'THE', 'WHEAT', 'FIELDS'] +237-134493-0001-641: hyp=['HIS', 'WIFE', 'NOW', 'LIES', 'BESIDE', 'HIM', 'AND', 'THE', 'WHITE', 'SHAFT', 'THAT', 'MARKS', 'THEIR', 'GRAVES', 'GLEAMS', 'ACROSS', 'THE', 'WHEAT', 'FIELDS'] +237-134493-0002-642: ref=['FROM', 'THE', 'NORWEGIAN', 'GRAVEYARD', 'ONE', 'LOOKS', 'OUT', 'OVER', 'A', 'VAST', 'CHECKER', 'BOARD', 'MARKED', 'OFF', 'IN', 'SQUARES', 'OF', 'WHEAT', 'AND', 'CORN', 'LIGHT', 'AND', 'DARK', 'DARK', 'AND', 'LIGHT'] +237-134493-0002-642: hyp=['FROM', 'THE', 'NORWEGIAN', 'GRAVEYARD', 'ONE', 'LOOKS', 'OUT', 'OVER', 'A', 'VAST', 'CHEQUER', 'BOARD', 'MARKED', 'OFF', 'IN', 'SQUARES', 'OF', 'WHEAT', 'AND', 'CORN', 'LIGHT', 'AND', 'DARK', 'DARK', 'AND', 'LIGHT'] +237-134493-0003-643: ref=['FROM', 'THE', 'GRAVEYARD', 'GATE', 'ONE', 'CAN', 'COUNT', 'A', 'DOZEN', 'GAYLY', 'PAINTED', 'FARMHOUSES', 'THE', 'GILDED', 'WEATHER', 'VANES', 'ON', 'THE', 'BIG', 'RED', 'BARNS', 'WINK', 'AT', 'EACH', 'OTHER', 'ACROSS', 'THE', 'GREEN', 'AND', 'BROWN', 'AND', 'YELLOW', 'FIELDS'] +237-134493-0003-643: hyp=['FROM', 'THE', 'GRAVEYARD', 'GATE', 'ONE', 'CAN', 'COUNT', 'A', 'DOZEN', 'GAILY', 'PAINTED', 'FARMHOUSES', 'THE', 'GILDED', 'WEATHER', 'VANES', 'ON', 'THE', 'BIG', 'RED', 'BARNS', 'WINK', 'AT', 'EACH', 'OTHER', 'ACROSS', 'THE', 'GREEN', 'AND', 'BROWN', 'AND', 'YELLOW', 'FIELDS'] +237-134493-0004-644: ref=['THE', 'AIR', 'AND', 'THE', 'EARTH', 'ARE', 'CURIOUSLY', 'MATED', 'AND', 'INTERMINGLED', 'AS', 'IF', 'THE', 'ONE', 'WERE', 'THE', 'BREATH', 'OF', 'THE', 'OTHER'] +237-134493-0004-644: hyp=['THE', 'AIR', 'AND', 'THE', 'EARTH', 'ARE', 'CURIOUSLY', 'MATED', 'AND', 'INTERMINGLED', 'AS', 'IF', 'THE', 'ONE', 'WERE', 'THE', 'BREATH', 'OF', 'THE', 'OTHER'] +237-134493-0005-645: ref=['HE', 'WAS', 'A', 'SPLENDID', 'FIGURE', 'OF', 'A', 'BOY', 'TALL', 'AND', 'STRAIGHT', 'AS', 'A', 'YOUNG', 'PINE', 'TREE', 'WITH', 'A', 'HANDSOME', 'HEAD', 'AND', 'STORMY', 'GRAY', 'EYES', 'DEEPLY', 'SET', 'UNDER', 'A', 'SERIOUS', 'BROW'] +237-134493-0005-645: hyp=['HE', 'WAS', 'A', 'SPLENDID', 'FIGURE', 'OF', 'A', 'BOY', 'TALL', 'AND', 'STRAIGHT', 'AS', 'A', 'YOUNG', 'PINE', 'TREE', 'WITH', 'A', 'HANDSOME', 'HEAD', 'AND', 'STORMY', 'GRAY', 'EYES', 'DEEPLY', 'SET', 'UNDER', 'A', 'SERIOUS', 'BROW'] +237-134493-0006-646: ref=["THAT'S", 'NOT', 'MUCH', 'OF', 'A', 'JOB', 'FOR', 'AN', 'ATHLETE', 'HERE', "I'VE", 'BEEN', 'TO', 'TOWN', 'AND', 'BACK'] +237-134493-0006-646: hyp=["THAT'S", 'NOT', 'MUCH', 'OF', 'A', 'JOB', 'FOR', 'AN', 'ATHLETE', 'HERE', "I'VE", 'BEEN', 'TO', 'TOWN', 'AND', 'BACK'] +237-134493-0007-647: ref=['ALEXANDRA', 'LETS', 'YOU', 'SLEEP', 'LATE'] +237-134493-0007-647: hyp=['ALEXANDRA', 'LETS', 'YOU', 'SLEEP', 'LATE'] +237-134493-0008-648: ref=['SHE', 'GATHERED', 'UP', 'HER', 'REINS'] +237-134493-0008-648: hyp=['SHE', 'GATHERED', 'UP', 'HER', 'REINS'] +237-134493-0009-649: ref=['PLEASE', 'WAIT', 'FOR', 'ME', 'MARIE', 'EMIL', 'COAXED'] +237-134493-0009-649: hyp=['PLEASE', 'WAIT', 'FOR', 'ME', 'MARIE', 'AMY', 'COAXED'] +237-134493-0010-650: ref=['I', 'NEVER', 'SEE', "LOU'S", 'SCYTHE', 'OVER', 'HERE'] +237-134493-0010-650: hyp=['I', 'NEVER', 'SEE', 'LOSE', 'SCYTHE', 'OVER', 'HERE'] +237-134493-0011-651: ref=['HOW', 'BROWN', "YOU'VE", 'GOT', 'SINCE', 'YOU', 'CAME', 'HOME', 'I', 'WISH', 'I', 'HAD', 'AN', 'ATHLETE', 'TO', 'MOW', 'MY', 'ORCHARD'] +237-134493-0011-651: hyp=['HOW', 'BROWN', "YOU'VE", 'GOT', 'SINCE', 'YOU', 'CAME', 'HOME', 'I', 'WISH', 'I', 'HAD', 'AN', 'ATHLETE', 'TO', 'MOW', 'MY', 'ORCHARD'] +237-134493-0012-652: ref=['I', 'GET', 'WET', 'TO', 'MY', 'KNEES', 'WHEN', 'I', 'GO', 'DOWN', 'TO', 'PICK', 'CHERRIES'] +237-134493-0012-652: hyp=['I', 'GET', 'WET', 'TO', 'MY', 'KNEES', 'WHEN', 'I', 'GO', 'DOWN', 'TO', 'PICK', 'CHERRIES'] +237-134493-0013-653: ref=['INDEED', 'HE', 'HAD', 'LOOKED', 'AWAY', 'WITH', 'THE', 'PURPOSE', 'OF', 'NOT', 'SEEING', 'IT'] +237-134493-0013-653: hyp=['INDEED', 'HE', 'HAD', 'LOOKED', 'AWAY', 'WITH', 'THE', 'PURPOSE', 'OF', 'NOT', 'SEEING', 'IT'] +237-134493-0014-654: ref=['THEY', 'THINK', "YOU'RE", 'PROUD', 'BECAUSE', "YOU'VE", 'BEEN', 'AWAY', 'TO', 'SCHOOL', 'OR', 'SOMETHING'] +237-134493-0014-654: hyp=['THEY', 'THINK', 'YOU', 'ARE', 'PROUD', 'BECAUSE', "YOU'VE", 'BEEN', 'AWAY', 'TO', 'SCHOOL', 'OR', 'SOMETHING'] +237-134493-0015-655: ref=['THERE', 'WAS', 'SOMETHING', 'INDIVIDUAL', 'ABOUT', 'THE', 'GREAT', 'FARM', 'A', 'MOST', 'UNUSUAL', 'TRIMNESS', 'AND', 'CARE', 'FOR', 'DETAIL'] +237-134493-0015-655: hyp=['THERE', 'WAS', 'SOMETHING', 'INDIVIDUAL', 'ABOUT', 'THE', 'GREAT', 'FARM', 'A', 'MOST', 'UNUSUAL', 'TRIMNESS', 'AND', 'CARE', 'FOR', 'DETAIL'] +237-134493-0016-656: ref=['ON', 'EITHER', 'SIDE', 'OF', 'THE', 'ROAD', 'FOR', 'A', 'MILE', 'BEFORE', 'YOU', 'REACHED', 'THE', 'FOOT', 'OF', 'THE', 'HILL', 'STOOD', 'TALL', 'OSAGE', 'ORANGE', 'HEDGES', 'THEIR', 'GLOSSY', 'GREEN', 'MARKING', 'OFF', 'THE', 'YELLOW', 'FIELDS'] +237-134493-0016-656: hyp=['ON', 'EITHER', 'SIDE', 'OF', 'THE', 'ROAD', 'FOR', 'A', 'MILE', 'BEFORE', 'YOU', 'REACHED', 'THE', 'FOOT', 'OF', 'THE', 'HILL', 'STOOD', 'TALL', 'OSAGE', 'ORANGE', 'HEDGES', 'THEIR', 'GLOSSY', 'GREEN', 'MARKING', 'OFF', 'THE', 'YELLOW', 'FIELDS'] +237-134493-0017-657: ref=['ANY', 'ONE', 'THEREABOUTS', 'WOULD', 'HAVE', 'TOLD', 'YOU', 'THAT', 'THIS', 'WAS', 'ONE', 'OF', 'THE', 'RICHEST', 'FARMS', 'ON', 'THE', 'DIVIDE', 'AND', 'THAT', 'THE', 'FARMER', 'WAS', 'A', 'WOMAN', 'ALEXANDRA', 'BERGSON'] +237-134493-0017-657: hyp=['ANY', 'ONE', 'THEREABOUTS', 'WOULD', 'HAVE', 'TOLD', 'YOU', 'THAT', 'THIS', 'WAS', 'ONE', 'OF', 'THE', 'RICHEST', 'FARMS', 'ON', 'THE', 'DIVIDE', 'AND', 'THAT', 'THE', 'FARMER', 'WAS', 'A', 'WOMAN', 'ALEXANDRA', 'BERGSON'] +237-134493-0018-658: ref=['THERE', 'IS', 'EVEN', 'A', 'WHITE', 'ROW', 'OF', 'BEEHIVES', 'IN', 'THE', 'ORCHARD', 'UNDER', 'THE', 'WALNUT', 'TREES'] +237-134493-0018-658: hyp=['THERE', 'IS', 'EVEN', 'A', 'WHITE', 'ROW', 'OF', 'BEEHIVES', 'IN', 'THE', 'ORCHARD', 'UNDER', 'THE', 'WALNUT', 'TREES'] +237-134500-0000-659: ref=['FRANK', 'READ', 'ENGLISH', 'SLOWLY', 'AND', 'THE', 'MORE', 'HE', 'READ', 'ABOUT', 'THIS', 'DIVORCE', 'CASE', 'THE', 'ANGRIER', 'HE', 'GREW'] +237-134500-0000-659: hyp=['FRANK', 'READ', 'ENGLISH', 'SLOWLY', 'AND', 'THE', 'MORE', 'HE', 'READ', 'ABOUT', 'THIS', 'DIVORCE', 'CASE', 'THE', 'ANGRIER', 'HE', 'GREW'] +237-134500-0001-660: ref=['MARIE', 'SIGHED'] +237-134500-0001-660: hyp=['MARIE', 'SIGHED'] +237-134500-0002-661: ref=['A', 'BRISK', 'WIND', 'HAD', 'COME', 'UP', 'AND', 'WAS', 'DRIVING', 'PUFFY', 'WHITE', 'CLOUDS', 'ACROSS', 'THE', 'SKY'] +237-134500-0002-661: hyp=['A', 'BRISK', 'WIND', 'HAD', 'COME', 'UP', 'AND', 'WAS', 'DRIVING', 'PUFFY', 'WHITE', 'CLOUDS', 'ACROSS', 'THE', 'SKY'] +237-134500-0003-662: ref=['THE', 'ORCHARD', 'WAS', 'SPARKLING', 'AND', 'RIPPLING', 'IN', 'THE', 'SUN'] +237-134500-0003-662: hyp=['THE', 'ORCHARD', 'WAS', 'SPARKLING', 'AND', 'RIPPLING', 'IN', 'THE', 'SUN'] +237-134500-0004-663: ref=['THAT', 'INVITATION', 'DECIDED', 'HER'] +237-134500-0004-663: hyp=['THAT', 'INVITATION', 'DECIDED', 'HER'] +237-134500-0005-664: ref=['OH', 'BUT', "I'M", 'GLAD', 'TO', 'GET', 'THIS', 'PLACE', 'MOWED'] +237-134500-0005-664: hyp=['OH', 'BUT', "I'M", 'GLAD', 'TO', 'GET', 'THIS', 'PLACE', 'MOWED'] +237-134500-0006-665: ref=['JUST', 'SMELL', 'THE', 'WILD', 'ROSES', 'THEY', 'ARE', 'ALWAYS', 'SO', 'SPICY', 'AFTER', 'A', 'RAIN'] +237-134500-0006-665: hyp=['JUST', 'SMELL', 'THE', 'WILD', 'ROSES', 'THEY', 'ARE', 'ALWAYS', 'SO', 'SPICY', 'AFTER', 'A', 'RAIN'] +237-134500-0007-666: ref=['WE', 'NEVER', 'HAD', 'SO', 'MANY', 'OF', 'THEM', 'IN', 'HERE', 'BEFORE'] +237-134500-0007-666: hyp=['WE', 'NEVER', 'HAD', 'SO', 'MANY', 'OF', 'THEM', 'IN', 'HERE', 'BEFORE'] +237-134500-0008-667: ref=['I', 'SUPPOSE', "IT'S", 'THE', 'WET', 'SEASON', 'WILL', 'YOU', 'HAVE', 'TO', 'CUT', 'THEM', 'TOO'] +237-134500-0008-667: hyp=['I', 'SUPPOSE', "IT'S", 'THE', 'WET', 'SEASON', 'WILL', 'YOU', 'HAVE', 'TO', 'CUT', 'THEM', 'TOO'] +237-134500-0009-668: ref=['I', 'SUPPOSE', "THAT'S", 'THE', 'WET', 'SEASON', 'TOO', 'THEN'] +237-134500-0009-668: hyp=['I', 'SUPPOSE', "THAT'S", 'THE', 'WET', 'SEASON', 'TOO', 'THEN'] +237-134500-0010-669: ref=["IT'S", 'EXCITING', 'TO', 'SEE', 'EVERYTHING', 'GROWING', 'SO', 'FAST', 'AND', 'TO', 'GET', 'THE', 'GRASS', 'CUT'] +237-134500-0010-669: hyp=["IT'S", 'EXCITING', 'TO', 'SEE', 'EVERYTHING', 'GROWING', 'SO', 'FAST', 'AND', 'TO', 'GET', 'THE', 'GRASS', 'CUT'] +237-134500-0011-670: ref=["AREN'T", 'YOU', 'SPLASHED', 'LOOK', 'AT', 'THE', 'SPIDER', 'WEBS', 'ALL', 'OVER', 'THE', 'GRASS'] +237-134500-0011-670: hyp=["AREN'T", 'YOU', 'SPLASHED', 'LOOK', 'AT', 'THE', 'SPIDER', 'WEBS', 'ALL', 'OVER', 'THE', 'GRASS'] +237-134500-0012-671: ref=['IN', 'A', 'FEW', 'MOMENTS', 'HE', 'HEARD', 'THE', 'CHERRIES', 'DROPPING', 'SMARTLY', 'INTO', 'THE', 'PAIL', 'AND', 'HE', 'BEGAN', 'TO', 'SWING', 'HIS', 'SCYTHE', 'WITH', 'THAT', 'LONG', 'EVEN', 'STROKE', 'THAT', 'FEW', 'AMERICAN', 'BOYS', 'EVER', 'LEARN'] +237-134500-0012-671: hyp=['IN', 'A', 'FEW', 'MOMENTS', 'HE', 'HEARD', 'THE', 'CHERRIES', 'DROPPING', 'SMARTLY', 'INTO', 'THE', 'PAIL', 'AND', 'HE', 'BEGAN', 'TO', 'SWING', 'HIS', 'SCYTHE', 'WITH', 'THAT', 'LONG', 'EVEN', 'STROKE', 'THAT', 'FEW', 'AMERICAN', 'BOYS', 'EVER', 'LEARN'] +237-134500-0013-672: ref=['MARIE', 'PICKED', 'CHERRIES', 'AND', 'SANG', 'SOFTLY', 'TO', 'HERSELF', 'STRIPPING', 'ONE', 'GLITTERING', 'BRANCH', 'AFTER', 'ANOTHER', 'SHIVERING', 'WHEN', 'SHE', 'CAUGHT', 'A', 'SHOWER', 'OF', 'RAINDROPS', 'ON', 'HER', 'NECK', 'AND', 'HAIR'] +237-134500-0013-672: hyp=['MARIE', 'PICKED', 'CHERRIES', 'AND', 'SANG', 'SOFTLY', 'TO', 'HERSELF', 'STRIPPING', 'ONE', 'GLITTERING', 'BRANCH', 'AFTER', 'ANOTHER', 'SHIVERING', 'WHEN', 'SHE', 'CAUGHT', 'A', 'SHOWER', 'OF', 'RAINDROPS', 'ON', 'HER', 'NECK', 'AND', 'HAIR'] +237-134500-0014-673: ref=['AND', 'EMIL', 'MOWED', 'HIS', 'WAY', 'SLOWLY', 'DOWN', 'TOWARD', 'THE', 'CHERRY', 'TREES'] +237-134500-0014-673: hyp=['AND', 'AMY', 'MOWED', 'HIS', 'WAY', 'SLOWLY', 'DOWN', 'TOWARD', 'THE', 'CHERRY', 'TREES'] +237-134500-0015-674: ref=['THAT', 'SUMMER', 'THE', 'RAINS', 'HAD', 'BEEN', 'SO', 'MANY', 'AND', 'OPPORTUNE', 'THAT', 'IT', 'WAS', 'ALMOST', 'MORE', 'THAN', 'SHABATA', 'AND', 'HIS', 'MAN', 'COULD', 'DO', 'TO', 'KEEP', 'UP', 'WITH', 'THE', 'CORN', 'THE', 'ORCHARD', 'WAS', 'A', 'NEGLECTED', 'WILDERNESS'] +237-134500-0015-674: hyp=['THAT', 'SUMMER', 'THE', 'RAINS', 'HAD', 'BEEN', 'SO', 'MANY', 'AND', 'OPPORTUNE', 'THAT', 'IT', 'WAS', 'ALMOST', 'MORE', 'THAN', 'CHABATA', 'AND', 'HIS', 'MAN', 'COULD', 'DO', 'TO', 'KEEP', 'UP', 'WITH', 'THE', 'CORN', 'THE', 'ORCHARD', 'WAS', 'A', 'NEGLECTED', 'WILDERNESS'] +237-134500-0016-675: ref=['I', "DON'T", 'KNOW', 'ALL', 'OF', 'THEM', 'BUT', 'I', 'KNOW', 'LINDENS', 'ARE'] +237-134500-0016-675: hyp=['I', "DON'T", 'KNOW', 'ALL', 'OF', 'THEM', 'BUT', 'I', 'KNOW', 'LINDENS', 'ARE'] +237-134500-0017-676: ref=['IF', 'I', 'FEEL', 'THAT', 'WAY', 'I', 'FEEL', 'THAT', 'WAY'] +237-134500-0017-676: hyp=['IF', 'I', 'FEEL', 'THAT', 'WAY', 'I', 'FEEL', 'THAT', 'WAY'] +237-134500-0018-677: ref=['HE', 'REACHED', 'UP', 'AMONG', 'THE', 'BRANCHES', 'AND', 'BEGAN', 'TO', 'PICK', 'THE', 'SWEET', 'INSIPID', 'FRUIT', 'LONG', 'IVORY', 'COLORED', 'BERRIES', 'TIPPED', 'WITH', 'FAINT', 'PINK', 'LIKE', 'WHITE', 'CORAL', 'THAT', 'FALL', 'TO', 'THE', 'GROUND', 'UNHEEDED', 'ALL', 'SUMMER', 'THROUGH'] +237-134500-0018-677: hyp=['HE', 'REACHED', 'UP', 'AMONG', 'THE', 'BRANCHES', 'AND', 'BEGAN', 'TO', 'PICK', 'THE', 'SWEET', 'INSIPID', 'FRUIT', 'LONG', 'IVORY', 'COLOURED', 'BERRIES', 'TIPPED', 'WITH', 'FAINT', 'PINK', 'LIKE', 'WHITE', 'CORAL', 'THAT', 'FALL', 'TO', 'THE', 'GROUND', 'UNHEEDED', 'ALL', 'SUMMER', 'THROUGH'] +237-134500-0019-678: ref=['HE', 'DROPPED', 'A', 'HANDFUL', 'INTO', 'HER', 'LAP'] +237-134500-0019-678: hyp=['HE', 'DROPPED', 'A', 'HANDFUL', 'INTO', 'HER', 'LAP'] +237-134500-0020-679: ref=['YES', "DON'T", 'YOU'] +237-134500-0020-679: hyp=['YES', "DON'T", 'YOU'] +237-134500-0021-680: ref=['OH', 'EVER', 'SO', 'MUCH', 'ONLY', 'HE', 'SEEMS', 'KIND', 'OF', 'STAID', 'AND', 'SCHOOL', 'TEACHERY'] +237-134500-0021-680: hyp=['OH', 'EVER', 'SO', 'MUCH', 'ONLY', 'HE', 'SEEMS', 'KIND', 'OF', 'STAID', 'IN', 'SCHOOL', 'TEACHER'] +237-134500-0022-681: ref=['WHEN', 'SHE', 'USED', 'TO', 'TELL', 'ME', 'ABOUT', 'HIM', 'I', 'ALWAYS', 'WONDERED', 'WHETHER', 'SHE', "WASN'T", 'A', 'LITTLE', 'IN', 'LOVE', 'WITH', 'HIM'] +237-134500-0022-681: hyp=['WHEN', 'SHE', 'USED', 'TO', 'TELL', 'ME', 'ABOUT', 'HIM', 'I', 'ALWAYS', 'WONDERED', 'WHETHER', 'SHE', "WASN'T", 'A', 'LITTLE', 'IN', 'LOVE', 'WITH', 'HIM'] +237-134500-0023-682: ref=['IT', 'WOULD', 'SERVE', 'YOU', 'ALL', 'RIGHT', 'IF', 'SHE', 'WALKED', 'OFF', 'WITH', 'CARL'] +237-134500-0023-682: hyp=['IT', 'WOULD', 'SERVE', 'YOU', 'ALL', 'RIGHT', 'IF', 'SHE', 'WALKED', 'OFF', 'WITH', 'KARL'] +237-134500-0024-683: ref=['I', 'LIKE', 'TO', 'TALK', 'TO', 'CARL', 'ABOUT', 'NEW', 'YORK', 'AND', 'WHAT', 'A', 'FELLOW', 'CAN', 'DO', 'THERE'] +237-134500-0024-683: hyp=['I', 'LIKE', 'TO', 'TALK', 'TO', 'CARL', 'ABOUT', 'NEW', 'YORK', 'AND', 'WHAT', 'A', 'FELLOW', 'CAN', 'DO', 'THERE'] +237-134500-0025-684: ref=['OH', 'EMIL'] +237-134500-0025-684: hyp=['OH', 'AMY'] +237-134500-0026-685: ref=['SURELY', 'YOU', 'ARE', 'NOT', 'THINKING', 'OF', 'GOING', 'OFF', 'THERE'] +237-134500-0026-685: hyp=['SURELY', 'YOU', 'ARE', 'NOT', 'THINKING', 'OF', 'GOING', 'OFF', 'THERE'] +237-134500-0027-686: ref=["MARIE'S", 'FACE', 'FELL', 'UNDER', 'HIS', 'BROODING', 'GAZE'] +237-134500-0027-686: hyp=["MARIE'S", 'FACE', 'FELL', 'UNDER', 'HIS', 'BROODING', 'GAZE'] +237-134500-0028-687: ref=["I'M", 'SURE', 'ALEXANDRA', 'HOPES', 'YOU', 'WILL', 'STAY', 'ON', 'HERE', 'SHE', 'MURMURED'] +237-134500-0028-687: hyp=['I', 'AM', 'SURE', 'ALEXANDRA', 'HOPES', 'YOU', 'WILL', 'STAY', 'ON', 'HERE', 'SHE', 'MURMURED'] +237-134500-0029-688: ref=['I', "DON'T", 'WANT', 'TO', 'STAND', 'AROUND', 'AND', 'LOOK', 'ON'] +237-134500-0029-688: hyp=['I', "DON'T", 'WANT', 'TO', 'STAND', 'AROUND', 'AND', 'LOOK', 'ON'] +237-134500-0030-689: ref=['I', 'WANT', 'TO', 'BE', 'DOING', 'SOMETHING', 'ON', 'MY', 'OWN', 'ACCOUNT'] +237-134500-0030-689: hyp=['I', 'WANT', 'TO', 'BE', 'DOING', 'SOMETHING', 'ON', 'MY', 'OWN', 'ACCOUNT'] +237-134500-0031-690: ref=['SOMETIMES', 'I', "DON'T", 'WANT', 'TO', 'DO', 'ANYTHING', 'AT', 'ALL', 'AND', 'SOMETIMES', 'I', 'WANT', 'TO', 'PULL', 'THE', 'FOUR', 'CORNERS', 'OF', 'THE', 'DIVIDE', 'TOGETHER', 'HE', 'THREW', 'OUT', 'HIS', 'ARM', 'AND', 'BROUGHT', 'IT', 'BACK', 'WITH', 'A', 'JERK', 'SO', 'LIKE', 'A', 'TABLE', 'CLOTH'] +237-134500-0031-690: hyp=['SOMETIMES', 'I', "DON'T", 'WANT', 'TO', 'DO', 'ANYTHING', 'AT', 'ALL', 'AND', 'SOMETIMES', 'I', 'WANT', 'TO', 'PULL', 'THE', 'FOUR', 'CORNERS', 'OF', 'THE', 'DIVIDE', 'TOGETHER', 'HE', 'THREW', 'OUT', 'HIS', 'ARM', 'AND', 'BROUGHT', 'IT', 'BACK', 'WITH', 'A', 'JERK', 'SO', 'LIKE', 'A', 'TABLE', 'CLOTH'] +237-134500-0032-691: ref=['I', 'GET', 'TIRED', 'OF', 'SEEING', 'MEN', 'AND', 'HORSES', 'GOING', 'UP', 'AND', 'DOWN', 'UP', 'AND', 'DOWN'] +237-134500-0032-691: hyp=['I', 'GET', 'TIRED', 'OF', 'SEEING', 'MEN', 'AND', 'HORSES', 'GOING', 'UP', 'AND', 'DOWN', 'UP', 'AND', 'DOWN'] +237-134500-0033-692: ref=['I', 'WISH', 'YOU', "WEREN'T", 'SO', 'RESTLESS', 'AND', "DIDN'T", 'GET', 'SO', 'WORKED', 'UP', 'OVER', 'THINGS', 'SHE', 'SAID', 'SADLY'] +237-134500-0033-692: hyp=['I', 'WISH', 'YOU', "WEREN'T", 'SO', 'RESTLESS', 'AND', "DIDN'T", 'GET', 'SO', 'WORKED', 'UP', 'OVER', 'THINGS', 'SHE', 'SAID', 'SADLY'] +237-134500-0034-693: ref=['THANK', 'YOU', 'HE', 'RETURNED', 'SHORTLY'] +237-134500-0034-693: hyp=['THANK', 'YOU', 'HE', 'RETURNED', 'SHORTLY'] +237-134500-0035-694: ref=['AND', 'YOU', 'NEVER', 'USED', 'TO', 'BE', 'CROSS', 'TO', 'ME'] +237-134500-0035-694: hyp=['AND', 'YOU', 'NEVER', 'USED', 'TO', 'BE', 'CROSS', 'TO', 'ME'] +237-134500-0036-695: ref=['I', "CAN'T", 'PLAY', 'WITH', 'YOU', 'LIKE', 'A', 'LITTLE', 'BOY', 'ANY', 'MORE', 'HE', 'SAID', 'SLOWLY', "THAT'S", 'WHAT', 'YOU', 'MISS', 'MARIE'] +237-134500-0036-695: hyp=['I', "CAN'T", 'PLAY', 'WITH', 'YOU', 'LIKE', 'A', 'LITTLE', 'BOY', 'ANY', 'MORE', 'HE', 'SAID', 'SLOWLY', "THAT'S", 'WHAT', 'YOU', 'MISS', 'MARIE'] +237-134500-0037-696: ref=['BUT', 'EMIL', 'IF', 'I', 'UNDERSTAND', 'THEN', 'ALL', 'OUR', 'GOOD', 'TIMES', 'ARE', 'OVER', 'WE', 'CAN', 'NEVER', 'DO', 'NICE', 'THINGS', 'TOGETHER', 'ANY', 'MORE'] +237-134500-0037-696: hyp=['BUT', 'AMY', 'IF', 'I', 'UNDERSTAND', 'THEN', 'ALL', 'OUR', 'GOOD', 'TIMES', 'ARE', 'OVER', 'WE', 'CAN', 'NEVER', 'DO', 'NICE', 'THINGS', 'TOGETHER', 'ANY', 'MORE'] +237-134500-0038-697: ref=['AND', 'ANYHOW', "THERE'S", 'NOTHING', 'TO', 'UNDERSTAND'] +237-134500-0038-697: hyp=['AND', 'ANYHOW', "THERE'S", 'NOTHING', 'TO', 'UNDERSTAND'] +237-134500-0039-698: ref=['THAT', "WON'T", 'LAST', 'IT', 'WILL', 'GO', 'AWAY', 'AND', 'THINGS', 'WILL', 'BE', 'JUST', 'AS', 'THEY', 'USED', 'TO'] +237-134500-0039-698: hyp=['THAT', "WON'T", 'LAST', 'IT', 'WILL', 'GO', 'AWAY', 'AND', 'THINGS', 'WILL', 'BE', 'JUST', 'AS', 'THEY', 'USED', 'TO'] +237-134500-0040-699: ref=['I', 'PRAY', 'FOR', 'YOU', 'BUT', "THAT'S", 'NOT', 'THE', 'SAME', 'AS', 'IF', 'YOU', 'PRAYED', 'YOURSELF'] +237-134500-0040-699: hyp=['I', 'PRAY', 'FOR', 'YOU', 'BUT', "THAT'S", 'NOT', 'THE', 'SAME', 'AS', 'IF', 'YOU', 'PRAYED', 'YOURSELF'] +237-134500-0041-700: ref=['I', "CAN'T", 'PRAY', 'TO', 'HAVE', 'THE', 'THINGS', 'I', 'WANT', 'HE', 'SAID', 'SLOWLY', 'AND', 'I', "WON'T", 'PRAY', 'NOT', 'TO', 'HAVE', 'THEM', 'NOT', 'IF', "I'M", 'DAMNED', 'FOR', 'IT'] +237-134500-0041-700: hyp=['I', "CAN'T", 'PRAY', 'TO', 'HAVE', 'THE', 'THINGS', 'I', 'WANT', 'HE', 'SAID', 'SLOWLY', 'AND', 'I', "WON'T", 'PRAY', 'NOT', 'TO', 'HAVE', 'THEM', 'NOT', 'IF', "I'M", 'DAMNED', 'FOR', 'IT'] +237-134500-0042-701: ref=['THEN', 'ALL', 'OUR', 'GOOD', 'TIMES', 'ARE', 'OVER'] +237-134500-0042-701: hyp=['THEN', 'ALL', 'OUR', 'GOOD', 'TIMES', 'ARE', 'OVER'] +260-123286-0000-702: ref=['SATURDAY', 'AUGUST', 'FIFTEENTH', 'THE', 'SEA', 'UNBROKEN', 'ALL', 'ROUND', 'NO', 'LAND', 'IN', 'SIGHT'] +260-123286-0000-702: hyp=['SATURDAY', 'AUGUST', 'FIFTEENTH', 'THE', 'SEA', 'UNBROKEN', 'ALL', 'ROUND', 'NO', 'LAND', 'IN', 'SIGHT'] +260-123286-0001-703: ref=['THE', 'HORIZON', 'SEEMS', 'EXTREMELY', 'DISTANT'] +260-123286-0001-703: hyp=['THE', 'HORIZON', 'SEEMS', 'EXTREMELY', 'DISTANT'] +260-123286-0002-704: ref=['ALL', 'MY', 'DANGER', 'AND', 'SUFFERINGS', 'WERE', 'NEEDED', 'TO', 'STRIKE', 'A', 'SPARK', 'OF', 'HUMAN', 'FEELING', 'OUT', 'OF', 'HIM', 'BUT', 'NOW', 'THAT', 'I', 'AM', 'WELL', 'HIS', 'NATURE', 'HAS', 'RESUMED', 'ITS', 'SWAY'] +260-123286-0002-704: hyp=['ALL', 'MY', 'DANGER', 'AND', 'SUFFERINGS', 'WERE', 'NEEDED', 'TO', 'STRIKE', 'A', 'SPARK', 'OF', 'HUMAN', 'FEELING', 'OUT', 'OF', 'HIM', 'BUT', 'NOW', 'THAT', 'I', 'AM', 'WELL', 'HIS', 'NATURE', 'HAS', 'RESUMED', 'ITS', 'SWAY'] +260-123286-0003-705: ref=['YOU', 'SEEM', 'ANXIOUS', 'MY', 'UNCLE', 'I', 'SAID', 'SEEING', 'HIM', 'CONTINUALLY', 'WITH', 'HIS', 'GLASS', 'TO', 'HIS', 'EYE', 'ANXIOUS'] +260-123286-0003-705: hyp=['YOU', 'SEEM', 'ANXIOUS', 'MY', 'UNCLE', 'I', 'SAID', 'SEEING', 'HIM', 'CONTINUALLY', 'WITH', 'HIS', 'GLASS', 'TO', 'HIS', 'EYE', 'ANXIOUS'] +260-123286-0004-706: ref=['ONE', 'MIGHT', 'BE', 'WITH', 'LESS', 'REASON', 'THAN', 'NOW'] +260-123286-0004-706: hyp=['ONE', 'MIGHT', 'BE', 'WITH', 'LESS', 'REASON', 'THAN', 'NOW'] +260-123286-0005-707: ref=['I', 'AM', 'NOT', 'COMPLAINING', 'THAT', 'THE', 'RATE', 'IS', 'SLOW', 'BUT', 'THAT', 'THE', 'SEA', 'IS', 'SO', 'WIDE'] +260-123286-0005-707: hyp=['I', 'AM', 'NOT', 'COMPLAINING', 'THAT', 'THE', 'RATE', 'IS', 'SLOW', 'BUT', 'THAT', 'THE', 'SEA', 'IS', 'SO', 'WIDE'] +260-123286-0006-708: ref=['WE', 'ARE', 'LOSING', 'TIME', 'AND', 'THE', 'FACT', 'IS', 'I', 'HAVE', 'NOT', 'COME', 'ALL', 'THIS', 'WAY', 'TO', 'TAKE', 'A', 'LITTLE', 'SAIL', 'UPON', 'A', 'POND', 'ON', 'A', 'RAFT'] +260-123286-0006-708: hyp=['WE', 'ARE', 'LOSING', 'TIME', 'AND', 'THE', 'FACT', 'IS', 'I', 'HAVE', 'NOT', 'COME', 'ALL', 'THIS', 'WAY', 'TO', 'TAKE', 'A', 'LITTLE', 'SAIL', 'UPON', 'A', 'POND', 'ON', 'A', 'RAFT'] +260-123286-0007-709: ref=['HE', 'CALLED', 'THIS', 'SEA', 'A', 'POND', 'AND', 'OUR', 'LONG', 'VOYAGE', 'TAKING', 'A', 'LITTLE', 'SAIL'] +260-123286-0007-709: hyp=['HE', 'CALLED', 'THIS', 'SEA', 'A', 'POND', 'AND', 'OUR', 'LONG', 'VOYAGE', 'TAKING', 'A', 'LITTLE', 'SAIL'] +260-123286-0008-710: ref=['THEREFORE', "DON'T", 'TALK', 'TO', 'ME', 'ABOUT', 'VIEWS', 'AND', 'PROSPECTS'] +260-123286-0008-710: hyp=['THEREFORE', "DON'T", 'TALK', 'TO', 'ME', 'ABOUT', 'VIEWS', 'AND', 'PROSPECTS'] +260-123286-0009-711: ref=['I', 'TAKE', 'THIS', 'AS', 'MY', 'ANSWER', 'AND', 'I', 'LEAVE', 'THE', 'PROFESSOR', 'TO', 'BITE', 'HIS', 'LIPS', 'WITH', 'IMPATIENCE'] +260-123286-0009-711: hyp=['I', 'TAKE', 'THIS', 'AS', 'MY', 'ANSWER', 'AND', 'I', 'LEAVE', 'THE', 'PROFESSOR', 'TO', 'BITE', 'HIS', 'LIPS', 'WITH', 'IMPATIENCE'] +260-123286-0010-712: ref=['SUNDAY', 'AUGUST', 'SIXTEENTH'] +260-123286-0010-712: hyp=['SUNDAY', 'AUGUST', 'SIXTEENTH'] +260-123286-0011-713: ref=['NOTHING', 'NEW', 'WEATHER', 'UNCHANGED', 'THE', 'WIND', 'FRESHENS'] +260-123286-0011-713: hyp=['NOTHING', 'NEW', 'WEATHER', 'UNCHANGED', 'THE', 'WIND', 'FRESHENS'] +260-123286-0012-714: ref=['BUT', 'THERE', 'SEEMED', 'NO', 'REASON', 'TO', 'FEAR'] +260-123286-0012-714: hyp=['BUT', 'THERE', 'SEEMED', 'NO', 'REASON', 'TO', 'FEAR'] +260-123286-0013-715: ref=['THE', 'SHADOW', 'OF', 'THE', 'RAFT', 'WAS', 'CLEARLY', 'OUTLINED', 'UPON', 'THE', 'SURFACE', 'OF', 'THE', 'WAVES'] +260-123286-0013-715: hyp=['THE', 'SHADOW', 'OF', 'THE', 'RAFT', 'WAS', 'CLEARLY', 'OUTLINED', 'UPON', 'THE', 'SURFACE', 'OF', 'THE', 'WAVES'] +260-123286-0014-716: ref=['TRULY', 'THIS', 'SEA', 'IS', 'OF', 'INFINITE', 'WIDTH'] +260-123286-0014-716: hyp=['TRULY', 'THE', 'SEA', 'IS', 'OF', 'INFINITE', 'WIDTH'] +260-123286-0015-717: ref=['IT', 'MUST', 'BE', 'AS', 'WIDE', 'AS', 'THE', 'MEDITERRANEAN', 'OR', 'THE', 'ATLANTIC', 'AND', 'WHY', 'NOT'] +260-123286-0015-717: hyp=['IT', 'MUST', 'BE', 'AS', 'WIDE', 'AS', 'THE', 'MEDITERRANEAN', 'OR', 'THE', 'ATLANTIC', 'AND', 'WHY', 'NOT'] +260-123286-0016-718: ref=['THESE', 'THOUGHTS', 'AGITATED', 'ME', 'ALL', 'DAY', 'AND', 'MY', 'IMAGINATION', 'SCARCELY', 'CALMED', 'DOWN', 'AFTER', 'SEVERAL', 'HOURS', 'SLEEP'] +260-123286-0016-718: hyp=['THESE', 'THOUGHTS', 'AGITATED', 'ME', 'ALL', 'DAY', 'AND', 'MY', 'IMAGINATION', 'SCARCELY', 'CALMED', 'DOWN', 'AFTER', 'SEVERAL', 'HOURS', 'SLEEP'] +260-123286-0017-719: ref=['I', 'SHUDDER', 'AS', 'I', 'RECALL', 'THESE', 'MONSTERS', 'TO', 'MY', 'REMEMBRANCE'] +260-123286-0017-719: hyp=['I', 'SHUDDER', 'AS', 'I', 'RECALL', 'THESE', 'MONSTERS', 'TO', 'MY', 'REMEMBRANCE'] +260-123286-0018-720: ref=['I', 'SAW', 'AT', 'THE', 'HAMBURG', 'MUSEUM', 'THE', 'SKELETON', 'OF', 'ONE', 'OF', 'THESE', 'CREATURES', 'THIRTY', 'FEET', 'IN', 'LENGTH'] +260-123286-0018-720: hyp=['I', 'SAW', 'AT', 'THE', 'HAMBURG', 'MUSEUM', 'THE', 'SKELETON', 'OF', 'ONE', 'OF', 'THESE', 'CREATURES', 'THIRTY', 'FEET', 'IN', 'LENGTH'] +260-123286-0019-721: ref=['I', 'SUPPOSE', 'PROFESSOR', 'LIEDENBROCK', 'WAS', 'OF', 'MY', 'OPINION', 'TOO', 'AND', 'EVEN', 'SHARED', 'MY', 'FEARS', 'FOR', 'AFTER', 'HAVING', 'EXAMINED', 'THE', 'PICK', 'HIS', 'EYES', 'TRAVERSED', 'THE', 'OCEAN', 'FROM', 'SIDE', 'TO', 'SIDE'] +260-123286-0019-721: hyp=['I', 'SUPPOSE', 'PROFESSOR', 'LIEDENBROCK', 'WAS', 'OF', 'MY', 'OPINION', 'TOO', 'AND', 'EVEN', 'SHARED', 'MY', 'FEARS', 'FOR', 'AFTER', 'HAVING', 'EXAMINED', 'THE', 'PICK', 'HIS', 'EYES', 'TRAVERSED', 'THE', 'OCEAN', 'FROM', 'SIDE', 'TO', 'SIDE'] +260-123286-0020-722: ref=['TUESDAY', 'AUGUST', 'EIGHTEENTH'] +260-123286-0020-722: hyp=['TUESDAY', 'AUGUST', 'EIGHTEENTH'] +260-123286-0021-723: ref=['DURING', 'HIS', 'WATCH', 'I', 'SLEPT'] +260-123286-0021-723: hyp=['DURING', 'HIS', 'WATCH', 'I', 'SLEPT'] +260-123286-0022-724: ref=['TWO', 'HOURS', 'AFTERWARDS', 'A', 'TERRIBLE', 'SHOCK', 'AWOKE', 'ME'] +260-123286-0022-724: hyp=['TWO', 'HOURS', 'AFTERWARDS', 'A', 'TERRIBLE', 'SHOCK', 'AWOKE', 'ME'] +260-123286-0023-725: ref=['THE', 'RAFT', 'WAS', 'HEAVED', 'UP', 'ON', 'A', 'WATERY', 'MOUNTAIN', 'AND', 'PITCHED', 'DOWN', 'AGAIN', 'AT', 'A', 'DISTANCE', 'OF', 'TWENTY', 'FATHOMS'] +260-123286-0023-725: hyp=['THE', 'RAFT', 'WAS', 'HEAVED', 'UP', 'ON', 'A', 'WATERY', 'MOUNTAIN', 'AND', 'PITCHED', 'DOWN', 'AGAIN', 'AT', 'A', 'DISTANCE', 'OF', 'TWENTY', 'FATHOMS'] +260-123286-0024-726: ref=["THERE'S", 'A', 'WHALE', 'A', 'WHALE', 'CRIED', 'THE', 'PROFESSOR'] +260-123286-0024-726: hyp=["THERE'S", 'A', 'WHALE', 'A', 'WHALE', 'CRIED', 'THE', 'PROFESSOR'] +260-123286-0025-727: ref=['FLIGHT', 'WAS', 'OUT', 'OF', 'THE', 'QUESTION', 'NOW', 'THE', 'REPTILES', 'ROSE', 'THEY', 'WHEELED', 'AROUND', 'OUR', 'LITTLE', 'RAFT', 'WITH', 'A', 'RAPIDITY', 'GREATER', 'THAN', 'THAT', 'OF', 'EXPRESS', 'TRAINS'] +260-123286-0025-727: hyp=['FLIGHT', 'WAS', 'OUT', 'OF', 'THE', 'QUESTION', 'NOW', 'THE', 'REPTILES', 'ROSE', 'THEY', 'WHEELED', 'AROUND', 'OUR', 'LITTLE', 'RAFT', 'WITH', 'A', 'RAPIDITY', 'GREATER', 'THAN', 'THAT', 'OF', 'EXPRESS', 'TRAINS'] +260-123286-0026-728: ref=['TWO', 'MONSTERS', 'ONLY', 'WERE', 'CREATING', 'ALL', 'THIS', 'COMMOTION', 'AND', 'BEFORE', 'MY', 'EYES', 'ARE', 'TWO', 'REPTILES', 'OF', 'THE', 'PRIMITIVE', 'WORLD'] +260-123286-0026-728: hyp=['TWO', 'MONSTERS', 'ONLY', 'WERE', 'CREATING', 'ALL', 'THIS', 'COMMOTION', 'AND', 'BEFORE', 'MY', 'EYES', 'ARE', 'TWO', 'REPTILES', 'OF', 'THE', 'PRIMITIVE', 'WORLD'] +260-123286-0027-729: ref=['I', 'CAN', 'DISTINGUISH', 'THE', 'EYE', 'OF', 'THE', 'ICHTHYOSAURUS', 'GLOWING', 'LIKE', 'A', 'RED', 'HOT', 'COAL', 'AND', 'AS', 'LARGE', 'AS', 'A', "MAN'S", 'HEAD'] +260-123286-0027-729: hyp=['I', 'COULD', 'DISTINGUISH', 'THE', 'EYE', 'OF', 'THE', 'ICHTHIOSAURUS', 'GLOWING', 'LIKE', 'A', 'RED', 'HOT', 'COAL', 'AND', 'AS', 'LARGE', 'AS', 'A', "MAN'S", 'HEAD'] +260-123286-0028-730: ref=['ITS', 'JAW', 'IS', 'ENORMOUS', 'AND', 'ACCORDING', 'TO', 'NATURALISTS', 'IT', 'IS', 'ARMED', 'WITH', 'NO', 'LESS', 'THAN', 'ONE', 'HUNDRED', 'AND', 'EIGHTY', 'TWO', 'TEETH'] +260-123286-0028-730: hyp=['ITS', 'JAW', 'IS', 'ENORMOUS', 'AND', 'ACCORDING', 'TO', 'NATURALISTS', 'IT', 'IS', 'ARMED', 'WITH', 'NO', 'LESS', 'THAN', 'ONE', 'HUNDRED', 'AND', 'EIGHTY', 'TWO', 'TEETH'] +260-123286-0029-731: ref=['THOSE', 'HUGE', 'CREATURES', 'ATTACKED', 'EACH', 'OTHER', 'WITH', 'THE', 'GREATEST', 'ANIMOSITY'] +260-123286-0029-731: hyp=['THOSE', 'HUGE', 'CREATURES', 'ATTACKED', 'EACH', 'OTHER', 'WITH', 'THE', 'GREATEST', 'ANIMOSITY'] +260-123286-0030-732: ref=['SUDDENLY', 'THE', 'ICHTHYOSAURUS', 'AND', 'THE', 'PLESIOSAURUS', 'DISAPPEAR', 'BELOW', 'LEAVING', 'A', 'WHIRLPOOL', 'EDDYING', 'IN', 'THE', 'WATER'] +260-123286-0030-732: hyp=['SUDDENLY', 'THE', 'ICHDEOSAURUS', 'AND', 'THE', 'PLECEOSAURUS', 'DISAPPEAR', 'BELOW', 'LEAVING', 'A', 'WHIRLPOOL', 'EDDYING', 'IN', 'THE', 'WATER'] +260-123286-0031-733: ref=['AS', 'FOR', 'THE', 'ICHTHYOSAURUS', 'HAS', 'HE', 'RETURNED', 'TO', 'HIS', 'SUBMARINE', 'CAVERN'] +260-123286-0031-733: hyp=['AS', 'FOR', 'THE', 'ITHIOSAURUS', 'HAS', 'HE', 'RETURNED', 'TO', 'HIS', 'SUBMARINE', 'CAVERN'] +260-123288-0000-734: ref=['THE', 'ROARINGS', 'BECOME', 'LOST', 'IN', 'THE', 'DISTANCE'] +260-123288-0000-734: hyp=['THE', 'ROARINGS', 'BECOME', 'LOST', 'IN', 'THE', 'DISTANCE'] +260-123288-0001-735: ref=['THE', 'WEATHER', 'IF', 'WE', 'MAY', 'USE', 'THAT', 'TERM', 'WILL', 'CHANGE', 'BEFORE', 'LONG'] +260-123288-0001-735: hyp=['THE', 'WEATHER', 'IF', 'WE', 'MAY', 'USE', 'THE', 'TERM', 'WILL', 'CHANGE', 'BEFORE', 'LONG'] +260-123288-0002-736: ref=['THE', 'ATMOSPHERE', 'IS', 'CHARGED', 'WITH', 'VAPOURS', 'PERVADED', 'WITH', 'THE', 'ELECTRICITY', 'GENERATED', 'BY', 'THE', 'EVAPORATION', 'OF', 'SALINE', 'WATERS'] +260-123288-0002-736: hyp=['THE', 'ATMOSPHERE', 'IS', 'CHARGED', 'WITH', 'VAPORS', 'PERVADED', 'WITH', 'THE', 'ELECTRICITY', 'GENERATED', 'BY', 'THE', 'EVAPORATION', 'OF', 'SALINE', 'WATERS'] +260-123288-0003-737: ref=['THE', 'ELECTRIC', 'LIGHT', 'CAN', 'SCARCELY', 'PENETRATE', 'THROUGH', 'THE', 'DENSE', 'CURTAIN', 'WHICH', 'HAS', 'DROPPED', 'OVER', 'THE', 'THEATRE', 'ON', 'WHICH', 'THE', 'BATTLE', 'OF', 'THE', 'ELEMENTS', 'IS', 'ABOUT', 'TO', 'BE', 'WAGED'] +260-123288-0003-737: hyp=['THE', 'ELECTRIC', 'LIGHT', 'CAN', 'SCARCELY', 'PENETRATE', 'THROUGH', 'THE', 'DENSE', 'CURTAIN', 'WHICH', 'IS', 'DROPPED', 'OVER', 'THE', 'THEATRE', 'ON', 'WHICH', 'THE', 'BATTLE', 'OF', 'THE', 'ELEMENTS', 'IS', 'ABOUT', 'TO', 'BE', 'WAGED'] +260-123288-0004-738: ref=['THE', 'AIR', 'IS', 'HEAVY', 'THE', 'SEA', 'IS', 'CALM'] +260-123288-0004-738: hyp=['THE', 'AIR', 'IS', 'HEAVY', 'THE', 'SEA', 'IS', 'CALM'] +260-123288-0005-739: ref=['FROM', 'TIME', 'TO', 'TIME', 'A', 'FLEECY', 'TUFT', 'OF', 'MIST', 'WITH', 'YET', 'SOME', 'GLEAMING', 'LIGHT', 'LEFT', 'UPON', 'IT', 'DROPS', 'DOWN', 'UPON', 'THE', 'DENSE', 'FLOOR', 'OF', 'GREY', 'AND', 'LOSES', 'ITSELF', 'IN', 'THE', 'OPAQUE', 'AND', 'IMPENETRABLE', 'MASS'] +260-123288-0005-739: hyp=['FROM', 'TIME', 'TO', 'TIME', 'A', 'FLEECY', 'TUFT', 'OF', 'MIST', 'WITH', 'YET', 'SOME', 'GLEAMING', 'LIGHT', 'LEFT', 'UPON', 'IT', 'DROPS', 'DOWN', 'UPON', 'THE', 'DENSE', 'FLOOR', 'OF', 'GREY', 'AND', 'LOSES', 'ITSELF', 'IN', 'THE', 'OPAQUE', 'AND', 'IMPENETRABLE', 'MASS'] +260-123288-0006-740: ref=['THE', 'ATMOSPHERE', 'IS', 'EVIDENTLY', 'CHARGED', 'AND', 'SURCHARGED', 'WITH', 'ELECTRICITY'] +260-123288-0006-740: hyp=['THE', 'ATMOSPHERE', 'IS', 'EVIDENTLY', 'CHARGED', 'AND', 'SURCHARGED', 'WITH', 'ELECTRICITY'] +260-123288-0007-741: ref=['THE', 'WIND', 'NEVER', 'LULLS', 'BUT', 'TO', 'ACQUIRE', 'INCREASED', 'STRENGTH', 'THE', 'VAST', 'BANK', 'OF', 'HEAVY', 'CLOUDS', 'IS', 'A', 'HUGE', 'RESERVOIR', 'OF', 'FEARFUL', 'WINDY', 'GUSTS', 'AND', 'RUSHING', 'STORMS'] +260-123288-0007-741: hyp=['THE', 'WIND', 'NEVER', 'LULLS', 'BUT', 'TO', 'ACQUIRE', 'INCREASED', 'STRENGTH', 'THE', 'VAST', 'BANK', 'OF', 'HEAVY', 'CLOUDS', 'IS', 'A', 'HUGE', 'RESERVOIR', 'OF', 'FEARFUL', 'WINDY', 'GUSTS', 'AND', 'RUSHING', 'STORMS'] +260-123288-0008-742: ref=["THERE'S", 'A', 'HEAVY', 'STORM', 'COMING', 'ON', 'I', 'CRIED', 'POINTING', 'TOWARDS', 'THE', 'HORIZON'] +260-123288-0008-742: hyp=["THERE'S", 'A', 'HEAVY', 'STORM', 'COMING', 'ON', 'I', 'CRIED', 'POINTING', 'TOWARDS', 'THE', 'HORIZON'] +260-123288-0009-743: ref=['THOSE', 'CLOUDS', 'SEEM', 'AS', 'IF', 'THEY', 'WERE', 'GOING', 'TO', 'CRUSH', 'THE', 'SEA'] +260-123288-0009-743: hyp=['THOSE', 'CLOUDS', 'SEEM', 'AS', 'IF', 'THEY', 'WERE', 'GOING', 'TO', 'CRUSH', 'THE', 'SEA'] +260-123288-0010-744: ref=['ON', 'THE', 'MAST', 'ALREADY', 'I', 'SEE', 'THE', 'LIGHT', 'PLAY', 'OF', 'A', 'LAMBENT', 'SAINT', "ELMO'S", 'FIRE', 'THE', 'OUTSTRETCHED', 'SAIL', 'CATCHES', 'NOT', 'A', 'BREATH', 'OF', 'WIND', 'AND', 'HANGS', 'LIKE', 'A', 'SHEET', 'OF', 'LEAD'] +260-123288-0010-744: hyp=['ON', 'THE', 'MAST', 'ALREADY', 'I', 'SEE', 'THE', 'LIGHT', 'PLAY', 'OF', 'LAMOT', 'SAINT', "ELBEL'S", 'FIRE', 'THE', 'OUTSTRETCHED', 'SAIL', 'CATCHES', 'NOT', 'A', 'BREATH', 'OF', 'WIND', 'AND', 'HANGS', 'LIKE', 'A', 'SHEET', 'OF', 'LEAD'] +260-123288-0011-745: ref=['BUT', 'IF', 'WE', 'HAVE', 'NOW', 'CEASED', 'TO', 'ADVANCE', 'WHY', 'DO', 'WE', 'YET', 'LEAVE', 'THAT', 'SAIL', 'LOOSE', 'WHICH', 'AT', 'THE', 'FIRST', 'SHOCK', 'OF', 'THE', 'TEMPEST', 'MAY', 'CAPSIZE', 'US', 'IN', 'A', 'MOMENT'] +260-123288-0011-745: hyp=['BUT', 'IF', 'WE', 'HAVE', 'NOW', 'CEASED', 'TO', 'ADVANCE', 'WHY', 'DO', 'WE', 'YET', 'LEAVE', 'THAT', 'SAIL', 'LOOSE', 'WHICH', 'AT', 'THE', 'FIRST', 'SHOCK', 'OF', 'A', 'TEMPEST', 'MAY', 'CAPSIZE', 'US', 'IN', 'A', 'MOMENT'] +260-123288-0012-746: ref=['THAT', 'WILL', 'BE', 'SAFEST', 'NO', 'NO', 'NEVER'] +260-123288-0012-746: hyp=['THAT', 'WILL', 'BE', 'THE', 'SAFEST', 'NO', 'NO', 'NEVER'] +260-123288-0013-747: ref=['THE', 'PILED', 'UP', 'VAPOURS', 'CONDENSE', 'INTO', 'WATER', 'AND', 'THE', 'AIR', 'PUT', 'INTO', 'VIOLENT', 'ACTION', 'TO', 'SUPPLY', 'THE', 'VACUUM', 'LEFT', 'BY', 'THE', 'CONDENSATION', 'OF', 'THE', 'MISTS', 'ROUSES', 'ITSELF', 'INTO', 'A', 'WHIRLWIND'] +260-123288-0013-747: hyp=['THE', 'PILED', 'UP', 'VAPOURS', 'CONDENSED', 'INTO', 'WATER', 'AND', 'THE', 'AIR', 'PUT', 'INTO', 'VIOLENT', 'ACTION', 'TO', 'SUPPLY', 'THE', 'VACUUM', 'LEFT', 'BY', 'THE', 'CONDENSATION', 'OF', 'THE', 'MIST', 'ROUSES', 'ITSELF', 'INTO', 'A', 'WHIRLWIND'] +260-123288-0014-748: ref=['HANS', 'STIRS', 'NOT'] +260-123288-0014-748: hyp=['HANS', 'STIRS', 'NOT'] +260-123288-0015-749: ref=['FROM', 'THE', 'UNDER', 'SURFACE', 'OF', 'THE', 'CLOUDS', 'THERE', 'ARE', 'CONTINUAL', 'EMISSIONS', 'OF', 'LURID', 'LIGHT', 'ELECTRIC', 'MATTER', 'IS', 'IN', 'CONTINUAL', 'EVOLUTION', 'FROM', 'THEIR', 'COMPONENT', 'MOLECULES', 'THE', 'GASEOUS', 'ELEMENTS', 'OF', 'THE', 'AIR', 'NEED', 'TO', 'BE', 'SLAKED', 'WITH', 'MOISTURE', 'FOR', 'INNUMERABLE', 'COLUMNS', 'OF', 'WATER', 'RUSH', 'UPWARDS', 'INTO', 'THE', 'AIR', 'AND', 'FALL', 'BACK', 'AGAIN', 'IN', 'WHITE', 'FOAM'] +260-123288-0015-749: hyp=['FROM', 'THE', 'UNDER', 'SURFACE', 'OF', 'THE', 'CLOUDS', 'THERE', 'ARE', 'CONTINUAL', 'EMISSIONS', 'OF', 'LURID', 'LIGHT', 'ELECTRIC', 'MATTER', 'IS', 'IN', 'CONTINUAL', 'EVOLUTION', 'FROM', 'THEIR', 'COMPONENT', 'MOLECULES', 'THE', 'GASEOUS', 'ELEMENTS', 'OF', 'THE', 'AIR', 'NEED', 'TO', 'BE', 'SLAKED', 'WITH', 'MOISTURE', 'FOR', 'INNUMERABLE', 'COLUMNS', 'OF', 'WATER', 'RUSH', 'UPWARDS', 'INTO', 'THE', 'AIR', 'AND', 'FALL', 'BACK', 'AGAIN', 'IN', 'WHITE', 'FOAM'] +260-123288-0016-750: ref=['I', 'REFER', 'TO', 'THE', 'THERMOMETER', 'IT', 'INDICATES', 'THE', 'FIGURE', 'IS', 'OBLITERATED'] +260-123288-0016-750: hyp=['I', 'REFER', 'TO', 'THE', 'THERMOMETER', 'IT', 'INDICATES', 'THE', 'FIGURE', 'IS', 'OBLITERATED'] +260-123288-0017-751: ref=['IS', 'THE', 'ATMOSPHERIC', 'CONDITION', 'HAVING', 'ONCE', 'REACHED', 'THIS', 'DENSITY', 'TO', 'BECOME', 'FINAL'] +260-123288-0017-751: hyp=['IS', 'THE', 'ATMOSPHERIC', 'CONDITION', 'HAVING', 'ONCE', 'REACHED', 'ITS', 'DENSITY', 'TO', 'BECOME', 'FINAL'] +260-123288-0018-752: ref=['THE', 'RAFT', 'BEARS', 'ON', 'STILL', 'TO', 'THE', 'SOUTH', 'EAST'] +260-123288-0018-752: hyp=['THE', 'RAFT', 'BEARS', 'ON', 'STILL', 'TO', 'THE', 'SOUTHEAST'] +260-123288-0019-753: ref=['AT', 'NOON', 'THE', 'VIOLENCE', 'OF', 'THE', 'STORM', 'REDOUBLES'] +260-123288-0019-753: hyp=['AT', 'NOON', 'THE', 'VIOLENCE', 'OF', 'THE', 'STORM', 'REDOUBLES'] +260-123288-0020-754: ref=['EACH', 'OF', 'US', 'IS', 'LASHED', 'TO', 'SOME', 'PART', 'OF', 'THE', 'RAFT'] +260-123288-0020-754: hyp=['EACH', 'OF', 'US', 'IS', 'LASHED', 'TO', 'SOME', 'PART', 'OF', 'THE', 'RAFT'] +260-123288-0021-755: ref=['THE', 'WAVES', 'RISE', 'ABOVE', 'OUR', 'HEADS'] +260-123288-0021-755: hyp=['THE', 'WAVES', 'RISE', 'ABOVE', 'OUR', 'HEADS'] +260-123288-0022-756: ref=['THEY', 'SEEM', 'TO', 'BE', 'WE', 'ARE', 'LOST', 'BUT', 'I', 'AM', 'NOT', 'SURE'] +260-123288-0022-756: hyp=['THEY', 'SEEM', 'TO', 'BE', 'WE', 'ARE', 'LOST', 'BUT', 'I', 'AM', 'NOT', 'SURE'] +260-123288-0023-757: ref=['HE', 'NODS', 'HIS', 'CONSENT'] +260-123288-0023-757: hyp=['HE', 'NODS', 'HIS', 'CONSENT'] +260-123288-0024-758: ref=['THE', 'FIREBALL', 'HALF', 'OF', 'IT', 'WHITE', 'HALF', 'AZURE', 'BLUE', 'AND', 'THE', 'SIZE', 'OF', 'A', 'TEN', 'INCH', 'SHELL', 'MOVED', 'SLOWLY', 'ABOUT', 'THE', 'RAFT', 'BUT', 'REVOLVING', 'ON', 'ITS', 'OWN', 'AXIS', 'WITH', 'ASTONISHING', 'VELOCITY', 'AS', 'IF', 'WHIPPED', 'ROUND', 'BY', 'THE', 'FORCE', 'OF', 'THE', 'WHIRLWIND'] +260-123288-0024-758: hyp=['THE', 'FIRE', 'BALL', 'HALF', 'OF', 'IT', 'WHITE', 'HALF', 'AZURE', 'BLUE', 'AND', 'THE', 'SIZE', 'OF', 'A', 'TEN', 'INCH', 'SHELL', 'MOVED', 'SLOWLY', 'ABOUT', 'THE', 'RAFT', 'BUT', 'REVOLVING', 'ON', 'ITS', 'OWN', 'AXIS', 'WITH', 'ASTONISHING', 'VELOCITY', 'AS', 'IF', 'WHIPPED', 'ROUND', 'BY', 'THE', 'FORCE', 'OF', 'THE', 'WHIRLWIND'] +260-123288-0025-759: ref=['HERE', 'IT', 'COMES', 'THERE', 'IT', 'GLIDES', 'NOW', 'IT', 'IS', 'UP', 'THE', 'RAGGED', 'STUMP', 'OF', 'THE', 'MAST', 'THENCE', 'IT', 'LIGHTLY', 'LEAPS', 'ON', 'THE', 'PROVISION', 'BAG', 'DESCENDS', 'WITH', 'A', 'LIGHT', 'BOUND', 'AND', 'JUST', 'SKIMS', 'THE', 'POWDER', 'MAGAZINE', 'HORRIBLE'] +260-123288-0025-759: hyp=['HERE', 'IT', 'COMES', 'THERE', 'IT', 'GLIDES', 'NOW', 'IT', 'IS', 'UP', 'THE', 'RAGGED', 'STUMP', 'OF', 'THE', 'MAST', 'THENCE', 'IT', 'LIGHTLY', 'LEAPS', 'ON', 'THE', 'PROVISION', 'BAG', 'DESCENDS', 'WITH', 'A', 'LIGHT', 'BOUND', 'AND', 'JUST', 'SKIMS', 'THE', 'POWDER', 'MAGAZINE', 'HORRIBLE'] +260-123288-0026-760: ref=['WE', 'SHALL', 'BE', 'BLOWN', 'UP', 'BUT', 'NO', 'THE', 'DAZZLING', 'DISK', 'OF', 'MYSTERIOUS', 'LIGHT', 'NIMBLY', 'LEAPS', 'ASIDE', 'IT', 'APPROACHES', 'HANS', 'WHO', 'FIXES', 'HIS', 'BLUE', 'EYE', 'UPON', 'IT', 'STEADILY', 'IT', 'THREATENS', 'THE', 'HEAD', 'OF', 'MY', 'UNCLE', 'WHO', 'FALLS', 'UPON', 'HIS', 'KNEES', 'WITH', 'HIS', 'HEAD', 'DOWN', 'TO', 'AVOID', 'IT'] +260-123288-0026-760: hyp=['WE', 'SHALL', 'BE', 'BLOWN', 'UP', 'BUT', 'NO', 'THE', 'DAZZLING', 'DISK', 'OF', 'MYSTERIOUS', 'LIGHT', 'NIMBLY', 'LEAPS', 'ASIDE', 'IT', 'APPROACHES', 'HANS', 'WHO', 'FIXES', 'HIS', 'BLUE', 'EYE', 'UPON', 'IT', 'STEADILY', 'IT', 'THREATENS', 'THE', 'HEAD', 'OF', 'MY', 'UNCLE', 'WHO', 'FALLS', 'UPON', 'HIS', 'KNEES', 'WITH', 'HIS', 'HEAD', 'DOWN', 'TO', 'AVOID', 'IT'] +260-123288-0027-761: ref=['A', 'SUFFOCATING', 'SMELL', 'OF', 'NITROGEN', 'FILLS', 'THE', 'AIR', 'IT', 'ENTERS', 'THE', 'THROAT', 'IT', 'FILLS', 'THE', 'LUNGS'] +260-123288-0027-761: hyp=['A', 'SUFFOCATING', 'SMELL', 'OF', 'NITROGEN', 'FILLS', 'THE', 'AIR', 'IT', 'ENTERS', 'THE', 'THROAT', 'IT', 'FILLS', 'THE', 'LUNGS'] +260-123288-0028-762: ref=['WE', 'SUFFER', 'STIFLING', 'PAINS'] +260-123288-0028-762: hyp=['WE', 'SUFFER', 'STIFLING', 'PAINS'] +260-123440-0000-763: ref=['AND', 'HOW', 'ODD', 'THE', 'DIRECTIONS', 'WILL', 'LOOK'] +260-123440-0000-763: hyp=['AND', 'HOW', 'ODD', 'THE', 'DIRECTIONS', 'WILL', 'LOOK'] +260-123440-0001-764: ref=['POOR', 'ALICE'] +260-123440-0001-764: hyp=['POOR', 'ALICE'] +260-123440-0002-765: ref=['IT', 'WAS', 'THE', 'WHITE', 'RABBIT', 'RETURNING', 'SPLENDIDLY', 'DRESSED', 'WITH', 'A', 'PAIR', 'OF', 'WHITE', 'KID', 'GLOVES', 'IN', 'ONE', 'HAND', 'AND', 'A', 'LARGE', 'FAN', 'IN', 'THE', 'OTHER', 'HE', 'CAME', 'TROTTING', 'ALONG', 'IN', 'A', 'GREAT', 'HURRY', 'MUTTERING', 'TO', 'HIMSELF', 'AS', 'HE', 'CAME', 'OH', 'THE', 'DUCHESS', 'THE', 'DUCHESS'] +260-123440-0002-765: hyp=['IT', 'WAS', 'THE', 'WHITE', 'RABBIT', 'RETURNING', 'SPLENDIDLY', 'DRESSED', 'WITH', 'A', 'PAIR', 'OF', 'WHITE', 'KID', 'GLOVES', 'IN', 'ONE', 'HAND', 'AND', 'A', 'LARGE', 'FAN', 'IN', 'THE', 'OTHER', 'HE', 'CAME', 'TROTTING', 'ALONG', 'IN', 'A', 'GREAT', 'HURRY', 'MUTTERING', 'TO', 'HIMSELF', 'AS', 'HE', 'CAME', 'OH', 'THE', 'DUCHESS', 'THE', 'DUCHESS'] +260-123440-0003-766: ref=['OH', "WON'T", 'SHE', 'BE', 'SAVAGE', 'IF', "I'VE", 'KEPT', 'HER', 'WAITING'] +260-123440-0003-766: hyp=['OH', "WON'T", 'SHE', 'BE', 'SAVAGE', 'IF', "I'VE", 'KEPT', 'HER', 'WAITING'] +260-123440-0004-767: ref=['ALICE', 'TOOK', 'UP', 'THE', 'FAN', 'AND', 'GLOVES', 'AND', 'AS', 'THE', 'HALL', 'WAS', 'VERY', 'HOT', 'SHE', 'KEPT', 'FANNING', 'HERSELF', 'ALL', 'THE', 'TIME', 'SHE', 'WENT', 'ON', 'TALKING', 'DEAR', 'DEAR', 'HOW', 'QUEER', 'EVERYTHING', 'IS', 'TO', 'DAY'] +260-123440-0004-767: hyp=['ALICE', 'TOOK', 'UP', 'THE', 'FAN', 'AND', 'GLOVES', 'AND', 'AS', 'THE', 'HALL', 'WAS', 'VERY', 'HOT', 'SHE', 'KEPT', 'FANNING', 'HERSELF', 'ALL', 'THE', 'TIME', 'SHE', 'WENT', 'ON', 'TALKING', 'DEAR', 'DEAR', 'HOW', 'QUEER', 'EVERYTHING', 'IS', 'TO', 'DAY'] +260-123440-0005-768: ref=['AND', 'YESTERDAY', 'THINGS', 'WENT', 'ON', 'JUST', 'AS', 'USUAL'] +260-123440-0005-768: hyp=['AND', 'YESTERDAY', 'THINGS', 'WENT', 'ON', 'JUST', 'AS', 'USUAL'] +260-123440-0006-769: ref=['I', 'WONDER', 'IF', "I'VE", 'BEEN', 'CHANGED', 'IN', 'THE', 'NIGHT'] +260-123440-0006-769: hyp=['I', 'WONDER', 'IF', 'I', 'HAVE', 'BEEN', 'CHANGED', 'IN', 'THE', 'NIGHT'] +260-123440-0007-770: ref=['I', 'ALMOST', 'THINK', 'I', 'CAN', 'REMEMBER', 'FEELING', 'A', 'LITTLE', 'DIFFERENT'] +260-123440-0007-770: hyp=['I', 'ALMOST', 'THINK', 'I', 'CAN', 'REMEMBER', 'FEELING', 'A', 'LITTLE', 'DIFFERENT'] +260-123440-0008-771: ref=["I'LL", 'TRY', 'IF', 'I', 'KNOW', 'ALL', 'THE', 'THINGS', 'I', 'USED', 'TO', 'KNOW'] +260-123440-0008-771: hyp=["I'LL", 'TRY', 'IF', 'I', 'KNOW', 'ALL', 'THE', 'THINGS', 'I', 'USED', 'TO', 'KNOW'] +260-123440-0009-772: ref=['I', 'SHALL', 'NEVER', 'GET', 'TO', 'TWENTY', 'AT', 'THAT', 'RATE'] +260-123440-0009-772: hyp=['I', 'SHALL', 'NEVER', 'GET', 'TO', 'TWENTY', 'AT', 'THAT', 'RATE'] +260-123440-0010-773: ref=['HOW', 'CHEERFULLY', 'HE', 'SEEMS', 'TO', 'GRIN', 'HOW', 'NEATLY', 'SPREAD', 'HIS', 'CLAWS', 'AND', 'WELCOME', 'LITTLE', 'FISHES', 'IN', 'WITH', 'GENTLY', 'SMILING', 'JAWS'] +260-123440-0010-773: hyp=['HOW', 'CHEERFULLY', 'HE', 'SEEMS', 'TO', 'GRIN', 'HOW', 'NEATLY', 'SPREAD', 'HIS', 'CLAWS', 'AND', 'WELCOME', 'LITTLE', 'FISHES', 'IN', 'WITH', 'GENTLY', 'SMILING', 'JAWS'] +260-123440-0011-774: ref=['NO', "I'VE", 'MADE', 'UP', 'MY', 'MIND', 'ABOUT', 'IT', 'IF', "I'M", 'MABEL', "I'LL", 'STAY', 'DOWN', 'HERE'] +260-123440-0011-774: hyp=['NO', "I'VE", 'MADE', 'UP', 'MY', 'MIND', 'ABOUT', 'IT', 'IF', "I'M", 'MABEL', "I'LL", 'STAY', 'DOWN', 'HERE'] +260-123440-0012-775: ref=["IT'LL", 'BE', 'NO', 'USE', 'THEIR', 'PUTTING', 'THEIR', 'HEADS', 'DOWN', 'AND', 'SAYING', 'COME', 'UP', 'AGAIN', 'DEAR'] +260-123440-0012-775: hyp=["IT'LL", 'BE', 'NO', 'USE', 'THEIR', 'PUTTING', 'THEIR', 'HEADS', 'DOWN', 'AND', 'SAYING', 'COME', 'UP', 'AGAIN', 'DEAR'] +260-123440-0013-776: ref=['I', 'AM', 'SO', 'VERY', 'TIRED', 'OF', 'BEING', 'ALL', 'ALONE', 'HERE'] +260-123440-0013-776: hyp=['I', 'AM', 'SO', 'VERY', 'TIRED', 'OF', 'BEING', 'ALL', 'ALONE', 'HERE'] +260-123440-0014-777: ref=['AND', 'I', 'DECLARE', "IT'S", 'TOO', 'BAD', 'THAT', 'IT', 'IS'] +260-123440-0014-777: hyp=['AND', 'I', 'DECLARE', "IT'S", 'TOO', 'BAD', 'THAT', 'IT', 'IS'] +260-123440-0015-778: ref=['I', 'WISH', 'I', "HADN'T", 'CRIED', 'SO', 'MUCH', 'SAID', 'ALICE', 'AS', 'SHE', 'SWAM', 'ABOUT', 'TRYING', 'TO', 'FIND', 'HER', 'WAY', 'OUT'] +260-123440-0015-778: hyp=['I', 'WISH', 'I', "HADN'T", 'CRIED', 'SO', 'MUCH', 'SAID', 'ALICE', 'AS', 'SHE', 'SWAM', 'ABOUT', 'TRYING', 'TO', 'FIND', 'HER', 'WAY', 'OUT'] +260-123440-0016-779: ref=['I', 'SHALL', 'BE', 'PUNISHED', 'FOR', 'IT', 'NOW', 'I', 'SUPPOSE', 'BY', 'BEING', 'DROWNED', 'IN', 'MY', 'OWN', 'TEARS'] +260-123440-0016-779: hyp=['I', 'SHALL', 'BE', 'PUNISHED', 'FOR', 'IT', 'NOW', 'I', 'SUPPOSE', 'BY', 'BEING', 'DROWNED', 'IN', 'MY', 'OWN', 'TEARS'] +260-123440-0017-780: ref=['THAT', 'WILL', 'BE', 'A', 'QUEER', 'THING', 'TO', 'BE', 'SURE'] +260-123440-0017-780: hyp=['THAT', 'WILL', 'BE', 'A', 'QUEER', 'THING', 'TO', 'BE', 'SURE'] +260-123440-0018-781: ref=['I', 'AM', 'VERY', 'TIRED', 'OF', 'SWIMMING', 'ABOUT', 'HERE', 'O', 'MOUSE'] +260-123440-0018-781: hyp=['I', 'AM', 'VERY', 'TIRED', 'OF', 'SWIMMING', 'ABOUT', 'HERE', 'O', 'MOUSE'] +260-123440-0019-782: ref=['CRIED', 'ALICE', 'AGAIN', 'FOR', 'THIS', 'TIME', 'THE', 'MOUSE', 'WAS', 'BRISTLING', 'ALL', 'OVER', 'AND', 'SHE', 'FELT', 'CERTAIN', 'IT', 'MUST', 'BE', 'REALLY', 'OFFENDED'] +260-123440-0019-782: hyp=['CRIED', 'ALICE', 'AGAIN', 'FOR', 'THIS', 'TIME', 'THE', 'MOUSE', 'WAS', 'BRISTLING', 'ALL', 'OVER', 'AND', 'SHE', 'FELT', 'CERTAIN', 'IT', 'MUST', 'BE', 'REALLY', 'OFFENDED'] +260-123440-0020-783: ref=['WE', "WON'T", 'TALK', 'ABOUT', 'HER', 'ANY', 'MORE', 'IF', "YOU'D", 'RATHER', 'NOT', 'WE', 'INDEED'] +260-123440-0020-783: hyp=['WE', "WON'T", 'TALK', 'ABOUT', 'HER', 'ANY', 'MORE', 'IF', "YOU'D", 'RATHER', 'NOT', 'WE', 'INDEED'] +2830-3979-0000-784: ref=['WE', 'WANT', 'YOU', 'TO', 'HELP', 'US', 'PUBLISH', 'SOME', 'LEADING', 'WORK', 'OF', "LUTHER'S", 'FOR', 'THE', 'GENERAL', 'AMERICAN', 'MARKET', 'WILL', 'YOU', 'DO', 'IT'] +2830-3979-0000-784: hyp=['WE', 'WANT', 'YOU', 'TO', 'HELP', 'US', 'PUBLISH', 'SOME', 'LEADING', 'WORK', 'OF', 'LUTHERS', 'FOR', 'THE', 'GENERAL', 'AMERICAN', 'MARKET', 'WILL', 'YOU', 'DO', 'IT'] +2830-3979-0001-785: ref=['THE', 'CONDITION', 'IS', 'THAT', 'I', 'WILL', 'BE', 'PERMITTED', 'TO', 'MAKE', 'LUTHER', 'TALK', 'AMERICAN', 'STREAMLINE', 'HIM', 'SO', 'TO', 'SPEAK', 'BECAUSE', 'YOU', 'WILL', 'NEVER', 'GET', 'PEOPLE', 'WHETHER', 'IN', 'OR', 'OUTSIDE', 'THE', 'LUTHERAN', 'CHURCH', 'ACTUALLY', 'TO', 'READ', 'LUTHER', 'UNLESS', 'WE', 'MAKE', 'HIM', 'TALK', 'AS', 'HE', 'WOULD', 'TALK', 'TODAY', 'TO', 'AMERICANS'] +2830-3979-0001-785: hyp=['THE', 'CONDITION', 'IS', 'THAT', 'I', 'WILL', 'BE', 'PERMITTED', 'TO', 'MAKE', 'LUTHER', 'TALK', 'AMERICAN', 'STREAMLINE', 'HIM', 'SO', 'TO', 'SPEAK', 'BECAUSE', 'HE', 'WILL', 'NEVER', 'GET', 'PEOPLE', 'WHETHER', 'IN', 'OR', 'OUTSIDE', 'THE', 'LUTHERAN', 'CHURCH', 'ACTUALLY', 'TO', 'READ', 'LUTHER', 'UNLESS', 'WE', 'MAKE', 'HIM', 'TALK', 'AS', 'HE', 'WOULD', 'TALK', 'TO', 'DAY', 'TO', 'AMERICANS'] +2830-3979-0002-786: ref=['LET', 'US', 'BEGIN', 'WITH', 'THAT', 'HIS', 'COMMENTARY', 'ON', 'GALATIANS'] +2830-3979-0002-786: hyp=['LET', 'US', 'BEGIN', 'WITH', 'THAT', 'HIS', 'COMMENTARY', 'ON', 'GALLATIONS'] +2830-3979-0003-787: ref=['THE', 'UNDERTAKING', 'WHICH', 'SEEMED', 'SO', 'ATTRACTIVE', 'WHEN', 'VIEWED', 'AS', 'A', 'LITERARY', 'TASK', 'PROVED', 'A', 'MOST', 'DIFFICULT', 'ONE', 'AND', 'AT', 'TIMES', 'BECAME', 'OPPRESSIVE'] +2830-3979-0003-787: hyp=['THE', 'UNDERTAKING', 'WHICH', 'SEEMED', 'SO', 'ATTRACTIVE', 'WHEN', 'VIEWED', 'AS', 'A', 'LITERARY', 'TASK', 'PROVED', 'A', 'MOST', 'DIFFICULT', 'ONE', 'AND', 'AT', 'TIMES', 'BECAME', 'OPPRESSIVE'] +2830-3979-0004-788: ref=['IT', 'WAS', 'WRITTEN', 'IN', 'LATIN'] +2830-3979-0004-788: hyp=['IT', 'WAS', 'WRITTEN', 'IN', 'LATIN'] +2830-3979-0005-789: ref=['THE', 'WORK', 'HAD', 'TO', 'BE', 'CONDENSED'] +2830-3979-0005-789: hyp=['THE', 'WORK', 'HAD', 'TO', 'BE', 'CONDENSED'] +2830-3979-0006-790: ref=['A', 'WORD', 'SHOULD', 'NOW', 'BE', 'SAID', 'ABOUT', 'THE', 'ORIGIN', 'OF', "LUTHER'S", 'COMMENTARY', 'ON', 'GALATIANS'] +2830-3979-0006-790: hyp=['A', 'WORD', 'SHOULD', 'NOW', 'BE', 'SAID', 'ABOUT', 'THE', 'ORIGIN', 'OF', "LUTHER'S", 'COMMENTARY', 'UNGULATIONS'] +2830-3979-0007-791: ref=['MUCH', 'LATER', 'WHEN', 'A', 'FRIEND', 'OF', 'HIS', 'WAS', 'PREPARING', 'AN', 'EDITION', 'OF', 'ALL', 'HIS', 'LATIN', 'WORKS', 'HE', 'REMARKED', 'TO', 'HIS', 'HOME', 'CIRCLE', 'IF', 'I', 'HAD', 'MY', 'WAY', 'ABOUT', 'IT', 'THEY', 'WOULD', 'REPUBLISH', 'ONLY', 'THOSE', 'OF', 'MY', 'BOOKS', 'WHICH', 'HAVE', 'DOCTRINE', 'MY', 'GALATIANS', 'FOR', 'INSTANCE'] +2830-3979-0007-791: hyp=['MUCH', 'LATER', 'WHEN', 'A', 'FRIEND', 'OF', 'HIS', 'WAS', 'PREPARING', 'AN', 'EDITION', 'OF', 'ALL', 'HIS', 'LATIN', 'WORKS', 'HE', 'REMARKED', 'TO', 'HIS', 'HOME', 'CIRCLE', 'IF', 'I', 'HAD', 'MY', 'WAY', 'ABOUT', 'IT', 'THEY', 'WOULD', 'REPUBLISH', 'ONLY', 'THOSE', 'OF', 'MY', 'BOOKS', 'WHICH', 'HAVE', 'DOCTRINE', 'MIGALATIONS', 'FOR', 'INSTANCE'] +2830-3979-0008-792: ref=['IN', 'OTHER', 'WORDS', 'THESE', 'THREE', 'MEN', 'TOOK', 'DOWN', 'THE', 'LECTURES', 'WHICH', 'LUTHER', 'ADDRESSED', 'TO', 'HIS', 'STUDENTS', 'IN', 'THE', 'COURSE', 'OF', 'GALATIANS', 'AND', 'ROERER', 'PREPARED', 'THE', 'MANUSCRIPT', 'FOR', 'THE', 'PRINTER'] +2830-3979-0008-792: hyp=['IN', 'OTHER', 'WORDS', 'THESE', 'THREE', 'MEN', 'TOOK', 'DOWN', 'THE', 'LECTURES', 'WHICH', 'LUTHER', 'ADDRESSED', 'TO', 'HIS', 'STUDENTS', 'IN', 'THE', 'COURSE', 'OF', 'GALATIANS', 'AND', 'RUER', 'PREPARED', 'THE', 'MANUSCRIPT', 'FOR', 'THE', 'PRINTER'] +2830-3979-0009-793: ref=['IT', 'PRESENTS', 'LIKE', 'NO', 'OTHER', 'OF', "LUTHER'S", 'WRITINGS', 'THE', 'CENTRAL', 'THOUGHT', 'OF', 'CHRISTIANITY', 'THE', 'JUSTIFICATION', 'OF', 'THE', 'SINNER', 'FOR', 'THE', 'SAKE', 'OF', "CHRIST'S", 'MERITS', 'ALONE'] +2830-3979-0009-793: hyp=['IT', 'PRESENTS', 'LIKE', 'NO', 'OTHER', 'OF', "LUTHER'S", 'WRITINGS', 'THE', 'CENTRAL', 'THOUGHT', 'OF', 'CHRISTIANITY', 'THE', 'JUSTIFICATION', 'OF', 'THE', 'SINNER', 'FOR', 'THE', 'SAKE', 'OF', "CHRIST'S", 'MERITS', 'ALONE'] +2830-3979-0010-794: ref=['BUT', 'THE', 'ESSENCE', 'OF', "LUTHER'S", 'LECTURES', 'IS', 'THERE'] +2830-3979-0010-794: hyp=['BUT', 'THE', 'ESSENCE', 'OF', "LUTHER'S", 'LECTURES', 'IS', 'THERE'] +2830-3979-0011-795: ref=['THE', 'LORD', 'WHO', 'HAS', 'GIVEN', 'US', 'POWER', 'TO', 'TEACH', 'AND', 'TO', 'HEAR', 'LET', 'HIM', 'ALSO', 'GIVE', 'US', 'THE', 'POWER', 'TO', 'SERVE', 'AND', 'TO', 'DO', 'LUKE', 'TWO'] +2830-3979-0011-795: hyp=['THE', 'LORD', 'WHO', 'HAS', 'GIVEN', 'US', 'POWER', 'TO', 'TEACH', 'AND', 'TO', 'HEAR', 'LET', 'HIM', 'ALSO', 'GIVE', 'US', 'THE', 'POWER', 'TO', 'SERVE', 'AND', 'TO', 'DO', 'LUKE', 'TWO'] +2830-3979-0012-796: ref=['THE', 'WORD', 'OF', 'OUR', 'GOD', 'SHALL', 'STAND', 'FOREVER'] +2830-3979-0012-796: hyp=['THE', 'WORD', 'OF', 'OUR', 'GOD', 'SHALL', 'STAND', 'FOREVER'] +2830-3980-0000-797: ref=['IN', 'EVERY', 'WAY', 'THEY', 'SOUGHT', 'TO', 'UNDERMINE', 'THE', 'AUTHORITY', 'OF', 'SAINT', 'PAUL'] +2830-3980-0000-797: hyp=['IN', 'EVERY', 'WAY', 'THEY', 'SOUGHT', 'TO', 'UNDERMINE', 'THE', 'AUTHORITY', 'OF', 'SAINT', 'PAUL'] +2830-3980-0001-798: ref=['THEY', 'SAID', 'TO', 'THE', 'GALATIANS', 'YOU', 'HAVE', 'NO', 'RIGHT', 'TO', 'THINK', 'HIGHLY', 'OF', 'PAUL'] +2830-3980-0001-798: hyp=['THEY', 'SAID', 'TO', 'THE', 'GALATIANS', 'YOU', 'HAVE', 'NO', 'RIGHT', 'TO', 'THINK', 'HIGHLY', 'OF', 'PAUL'] +2830-3980-0002-799: ref=['HE', 'WAS', 'THE', 'LAST', 'TO', 'TURN', 'TO', 'CHRIST'] +2830-3980-0002-799: hyp=['HE', 'WAS', 'THE', 'LAST', 'TO', 'TURN', 'TO', 'CHRIST'] +2830-3980-0003-800: ref=['PAUL', 'CAME', 'LATER', 'AND', 'IS', 'BENEATH', 'US'] +2830-3980-0003-800: hyp=['PAUL', 'CAME', 'LATER', 'AND', 'IS', 'BENEATH', 'US'] +2830-3980-0004-801: ref=['INDEED', 'HE', 'PERSECUTED', 'THE', 'CHURCH', 'OF', 'CHRIST', 'FOR', 'A', 'LONG', 'TIME'] +2830-3980-0004-801: hyp=['INDEED', 'HE', 'PERSECUTED', 'THE', 'CHURCH', 'OF', 'CHRIST', 'FOR', 'A', 'LONG', 'TIME'] +2830-3980-0005-802: ref=['DO', 'YOU', 'SUPPOSE', 'THAT', 'GOD', 'FOR', 'THE', 'SAKE', 'OF', 'A', 'FEW', 'LUTHERAN', 'HERETICS', 'WOULD', 'DISOWN', 'HIS', 'ENTIRE', 'CHURCH'] +2830-3980-0005-802: hyp=['DO', 'YOU', 'SUPPOSE', 'THAT', 'GOD', 'FOR', 'THE', 'SAKE', 'OF', 'A', 'FEW', 'LUTHERAN', 'HERETICS', 'WOULD', 'DISOWN', 'HIS', 'ENTIRE', 'CHURCH'] +2830-3980-0006-803: ref=['AGAINST', 'THESE', 'BOASTING', 'FALSE', 'APOSTLES', 'PAUL', 'BOLDLY', 'DEFENDS', 'HIS', 'APOSTOLIC', 'AUTHORITY', 'AND', 'MINISTRY'] +2830-3980-0006-803: hyp=['AGAINST', 'THESE', 'BOASTING', 'FALSE', 'APOSTLES', 'PAUL', 'BOLDLY', 'DEFENDS', 'HIS', 'APOSTOLIC', 'AUTHORITY', 'AND', 'MINISTRY'] +2830-3980-0007-804: ref=['AS', 'THE', 'AMBASSADOR', 'OF', 'A', 'GOVERNMENT', 'IS', 'HONORED', 'FOR', 'HIS', 'OFFICE', 'AND', 'NOT', 'FOR', 'HIS', 'PRIVATE', 'PERSON', 'SO', 'THE', 'MINISTER', 'OF', 'CHRIST', 'SHOULD', 'EXALT', 'HIS', 'OFFICE', 'IN', 'ORDER', 'TO', 'GAIN', 'AUTHORITY', 'AMONG', 'MEN'] +2830-3980-0007-804: hyp=['AS', 'THE', 'AMBASSADOR', 'OF', 'A', 'GOVERNMENT', 'IS', 'HONORED', 'FOR', 'HIS', 'OFFICE', 'AND', 'NOT', 'FOR', 'HIS', 'PRIVATE', 'PERSON', 'SO', 'THE', 'MINISTER', 'OF', 'CHRIST', 'SHOULD', 'EXALT', 'HIS', 'OFFICE', 'IN', 'ORDER', 'TO', 'GAIN', 'AUTHORITY', 'AMONG', 'MEN'] +2830-3980-0008-805: ref=['PAUL', 'TAKES', 'PRIDE', 'IN', 'HIS', 'MINISTRY', 'NOT', 'TO', 'HIS', 'OWN', 'PRAISE', 'BUT', 'TO', 'THE', 'PRAISE', 'OF', 'GOD'] +2830-3980-0008-805: hyp=['PAUL', 'TAKES', 'PRIDE', 'IN', 'HIS', 'MINISTRY', 'NOT', 'TO', 'HIS', 'OWN', 'PRAISE', 'BUT', 'TO', 'THE', 'PRAISE', 'OF', 'GOD'] +2830-3980-0009-806: ref=['PAUL', 'AN', 'APOSTLE', 'NOT', 'OF', 'MEN', 'ET', 'CETERA'] +2830-3980-0009-806: hyp=['PAUL', 'AN', 'APOSTLE', 'NOT', 'OF', 'MEN', 'ET', 'CETERA'] +2830-3980-0010-807: ref=['EITHER', 'HE', 'CALLS', 'MINISTERS', 'THROUGH', 'THE', 'AGENCY', 'OF', 'MEN', 'OR', 'HE', 'CALLS', 'THEM', 'DIRECTLY', 'AS', 'HE', 'CALLED', 'THE', 'PROPHETS', 'AND', 'APOSTLES'] +2830-3980-0010-807: hyp=['EITHER', 'HE', 'CALLS', 'MINISTERS', 'THROUGH', 'THE', 'AGENCY', 'OF', 'MEN', 'OR', 'HE', 'CALLS', 'THEM', 'DIRECTLY', 'AS', 'HE', 'CALLED', 'THE', 'PROPHETS', 'AND', 'APOSTLES'] +2830-3980-0011-808: ref=['PAUL', 'DECLARES', 'THAT', 'THE', 'FALSE', 'APOSTLES', 'WERE', 'CALLED', 'OR', 'SENT', 'NEITHER', 'BY', 'MEN', 'NOR', 'BY', 'MAN'] +2830-3980-0011-808: hyp=['PAUL', 'DECLARES', 'THAT', 'THE', 'FALSE', 'APOSTLES', 'WERE', 'CALLED', 'OR', 'SENT', 'NEITHER', 'BY', 'MEN', 'NOR', 'BY', 'MAN'] +2830-3980-0012-809: ref=['THE', 'MOST', 'THEY', 'COULD', 'CLAIM', 'IS', 'THAT', 'THEY', 'WERE', 'SENT', 'BY', 'OTHERS'] +2830-3980-0012-809: hyp=['THE', 'MOST', 'THEY', 'COULD', 'CLAIM', 'IS', 'THAT', 'THEY', 'WERE', 'SENT', 'BY', 'OTHERS'] +2830-3980-0013-810: ref=['HE', 'MENTIONS', 'THE', 'APOSTLES', 'FIRST', 'BECAUSE', 'THEY', 'WERE', 'APPOINTED', 'DIRECTLY', 'BY', 'GOD'] +2830-3980-0013-810: hyp=['HE', 'MENTIONS', 'THE', 'APOSTLES', 'FIRST', 'BECAUSE', 'THEY', 'WERE', 'APPOINTED', 'DIRECTLY', 'BY', 'GOD'] +2830-3980-0014-811: ref=['THE', 'CALL', 'IS', 'NOT', 'TO', 'BE', 'TAKEN', 'LIGHTLY'] +2830-3980-0014-811: hyp=['THE', 'CALL', 'IS', 'NOT', 'TO', 'BE', 'TAKEN', 'LIGHTLY'] +2830-3980-0015-812: ref=['FOR', 'A', 'PERSON', 'TO', 'POSSESS', 'KNOWLEDGE', 'IS', 'NOT', 'ENOUGH'] +2830-3980-0015-812: hyp=['FOR', 'A', 'PERSON', 'TO', 'POSSESS', 'KNOWLEDGE', 'IS', 'NOT', 'ENOUGH'] +2830-3980-0016-813: ref=['IT', 'SPOILS', "ONE'S", 'BEST', 'WORK'] +2830-3980-0016-813: hyp=['IT', 'SPOILS', "ONE'S", 'BEST', 'WORK'] +2830-3980-0017-814: ref=['WHEN', 'I', 'WAS', 'A', 'YOUNG', 'MAN', 'I', 'THOUGHT', 'PAUL', 'WAS', 'MAKING', 'TOO', 'MUCH', 'OF', 'HIS', 'CALL'] +2830-3980-0017-814: hyp=['WHEN', 'I', 'WAS', 'A', 'YOUNG', 'MAN', 'I', 'THOUGHT', 'PAUL', 'WAS', 'MAKING', 'TOO', 'MUCH', 'OF', 'HIS', 'CALL'] +2830-3980-0018-815: ref=['I', 'DID', 'NOT', 'THEN', 'REALIZE', 'THE', 'IMPORTANCE', 'OF', 'THE', 'MINISTRY'] +2830-3980-0018-815: hyp=['I', 'DID', 'NOT', 'THEN', 'REALIZE', 'THE', 'IMPORTANCE', 'OF', 'THE', 'MINISTRY'] +2830-3980-0019-816: ref=['I', 'KNEW', 'NOTHING', 'OF', 'THE', 'DOCTRINE', 'OF', 'FAITH', 'BECAUSE', 'WE', 'WERE', 'TAUGHT', 'SOPHISTRY', 'INSTEAD', 'OF', 'CERTAINTY', 'AND', 'NOBODY', 'UNDERSTOOD', 'SPIRITUAL', 'BOASTING'] +2830-3980-0019-816: hyp=['I', 'KNEW', 'NOTHING', 'OF', 'THE', 'DOCTRINE', 'OF', 'FAITH', 'BECAUSE', 'WE', 'WERE', 'TAUGHT', 'SOPHISTRY', 'INSTEAD', 'OF', 'CERTAINTY', 'AND', 'NOBODY', 'UNDERSTOOD', 'SPIRITUAL', 'BOASTING'] +2830-3980-0020-817: ref=['THIS', 'IS', 'NO', 'SINFUL', 'PRIDE', 'IT', 'IS', 'HOLY', 'PRIDE'] +2830-3980-0020-817: hyp=['THIS', 'IS', 'NO', 'SINFUL', 'PRIDE', 'IT', 'IS', 'HOLY', 'PRIDE'] +2830-3980-0021-818: ref=['AND', 'GOD', 'THE', 'FATHER', 'WHO', 'RAISED', 'HIM', 'FROM', 'THE', 'DEAD'] +2830-3980-0021-818: hyp=['AND', 'GOD', 'THE', 'FATHER', 'WHO', 'RAISED', 'HIM', 'FROM', 'THE', 'DEAD'] +2830-3980-0022-819: ref=['THE', 'CLAUSE', 'SEEMS', 'SUPERFLUOUS', 'ON', 'FIRST', 'SIGHT'] +2830-3980-0022-819: hyp=['THE', 'CLAUSE', 'SEEMED', 'SUPERFLUOUS', 'ON', 'FIRST', 'SIGHT'] +2830-3980-0023-820: ref=['THESE', 'PERVERTERS', 'OF', 'THE', 'RIGHTEOUSNESS', 'OF', 'CHRIST', 'RESIST', 'THE', 'FATHER', 'AND', 'THE', 'SON', 'AND', 'THE', 'WORKS', 'OF', 'THEM', 'BOTH'] +2830-3980-0023-820: hyp=['THESE', 'PERVERTERS', 'OF', 'THE', 'RIGHTEOUSNESS', 'OF', 'CHRIST', 'RESIST', 'THE', 'FATHER', 'AND', 'THE', 'SON', 'AND', 'THE', 'WORKS', 'OF', 'THEM', 'BOTH'] +2830-3980-0024-821: ref=['IN', 'THIS', 'WHOLE', 'EPISTLE', 'PAUL', 'TREATS', 'OF', 'THE', 'RESURRECTION', 'OF', 'CHRIST'] +2830-3980-0024-821: hyp=['IN', 'THIS', 'WHOLE', 'EPISTLE', 'PAUL', 'TREATS', 'OF', 'THE', 'RESURRECTION', 'OF', 'CHRIST'] +2830-3980-0025-822: ref=['BY', 'HIS', 'RESURRECTION', 'CHRIST', 'WON', 'THE', 'VICTORY', 'OVER', 'LAW', 'SIN', 'FLESH', 'WORLD', 'DEVIL', 'DEATH', 'HELL', 'AND', 'EVERY', 'EVIL'] +2830-3980-0025-822: hyp=['BY', 'HIS', 'RESURRECTION', 'CHRIST', 'WON', 'THE', 'VICTORY', 'OVER', 'LAW', 'SIN', 'FLESH', 'WORLD', 'DEVIL', 'DEATH', 'HELL', 'AND', 'EVERY', 'EVIL'] +2830-3980-0026-823: ref=['VERSE', 'TWO'] +2830-3980-0026-823: hyp=['FIRST', 'TWO'] +2830-3980-0027-824: ref=['AND', 'ALL', 'THE', 'BRETHREN', 'WHICH', 'ARE', 'WITH', 'ME'] +2830-3980-0027-824: hyp=['AND', 'ALL', 'THE', 'BRETHREN', 'WHICH', 'ARE', 'WITH', 'ME'] +2830-3980-0028-825: ref=['THIS', 'SHOULD', 'GO', 'FAR', 'IN', 'SHUTTING', 'THE', 'MOUTHS', 'OF', 'THE', 'FALSE', 'APOSTLES'] +2830-3980-0028-825: hyp=['THIS', 'SHOULD', 'GO', 'FAR', 'IN', 'SHUTTING', 'THE', 'MOUTHS', 'OF', 'THE', 'FALSE', 'APOSTLES'] +2830-3980-0029-826: ref=['ALTHOUGH', 'THE', 'BRETHREN', 'WITH', 'ME', 'ARE', 'NOT', 'APOSTLES', 'LIKE', 'MYSELF', 'YET', 'THEY', 'ARE', 'ALL', 'OF', 'ONE', 'MIND', 'WITH', 'ME', 'THINK', 'WRITE', 'AND', 'TEACH', 'AS', 'I', 'DO'] +2830-3980-0029-826: hyp=['ALTHOUGH', 'THE', 'BRETHREN', 'WITH', 'ME', 'ARE', 'NOT', 'APOSTLES', 'LIKE', 'MYSELF', 'YET', 'THEY', 'ARE', 'ALL', 'OF', 'ONE', 'MIND', 'WITH', 'ME', 'THINK', 'WRITE', 'AND', 'TEACH', 'AS', 'I', 'DO'] +2830-3980-0030-827: ref=['THEY', 'DO', 'NOT', 'GO', 'WHERE', 'THE', 'ENEMIES', 'OF', 'THE', 'GOSPEL', 'PREDOMINATE', 'THEY', 'GO', 'WHERE', 'THE', 'CHRISTIANS', 'ARE'] +2830-3980-0030-827: hyp=['THEY', 'DO', 'NOT', 'GO', 'WHERE', 'THE', 'ENEMIES', 'OF', 'THE', 'GOSPEL', 'PREDOMINATE', 'THEY', 'GO', 'WHERE', 'THE', 'CHRISTIANS', 'ARE'] +2830-3980-0031-828: ref=['WHY', 'DO', 'THEY', 'NOT', 'INVADE', 'THE', 'CATHOLIC', 'PROVINCES', 'AND', 'PREACH', 'THEIR', 'DOCTRINE', 'TO', 'GODLESS', 'PRINCES', 'BISHOPS', 'AND', 'DOCTORS', 'AS', 'WE', 'HAVE', 'DONE', 'BY', 'THE', 'HELP', 'OF', 'GOD'] +2830-3980-0031-828: hyp=['WHY', 'DO', 'THEY', 'NOT', 'INVADE', 'THE', 'CATHOLIC', 'PROVINCES', 'AND', 'PREACH', 'THEIR', 'DOCTRINE', 'TO', 'GODLESS', 'PRINCES', 'BISHOPS', 'AND', 'DOCTORS', 'AS', 'WE', 'HAVE', 'DONE', 'BY', 'THE', 'HELP', 'OF', 'GOD'] +2830-3980-0032-829: ref=['WE', 'LOOK', 'FOR', 'THAT', 'REWARD', 'WHICH', 'EYE', 'HATH', 'NOT', 'SEEN', 'NOR', 'EAR', 'HEARD', 'NEITHER', 'HATH', 'ENTERED', 'INTO', 'THE', 'HEART', 'OF', 'MAN'] +2830-3980-0032-829: hyp=['WE', 'LOOK', 'FOR', 'THAT', 'REWARD', 'WHICH', 'EYE', 'HATH', 'NOT', 'SEEN', 'NOR', 'EAR', 'HEARD', 'NEITHER', 'HATH', 'ENTERED', 'INTO', 'THE', 'HEART', 'OF', 'MAN'] +2830-3980-0033-830: ref=['NOT', 'ALL', 'THE', 'GALATIANS', 'HAD', 'BECOME', 'PERVERTED'] +2830-3980-0033-830: hyp=['NOT', 'ALL', 'THE', 'GLACIERS', 'HAD', 'BECOME', 'PERVERTED'] +2830-3980-0034-831: ref=['THESE', 'MEANS', 'CANNOT', 'BE', 'CONTAMINATED'] +2830-3980-0034-831: hyp=['THESE', 'MEANS', 'CANNOT', 'BE', 'CONTAMINATED'] +2830-3980-0035-832: ref=['THEY', 'REMAIN', 'DIVINE', 'REGARDLESS', 'OF', "MEN'S", 'OPINION'] +2830-3980-0035-832: hyp=['THEY', 'REMAIN', 'DIVINE', 'REGARDLESS', 'OF', "MEN'S", 'OPINION'] +2830-3980-0036-833: ref=['WHEREVER', 'THE', 'MEANS', 'OF', 'GRACE', 'ARE', 'FOUND', 'THERE', 'IS', 'THE', 'HOLY', 'CHURCH', 'EVEN', 'THOUGH', 'ANTICHRIST', 'REIGNS', 'THERE'] +2830-3980-0036-833: hyp=['WHEREVER', 'THE', 'MEANS', 'OF', 'GRACE', 'ARE', 'FOUND', 'THERE', 'IS', 'THE', 'HOLY', 'CHURCH', 'EVEN', 'THOUGH', 'ANTICHRIST', 'REIGNS', 'THERE'] +2830-3980-0037-834: ref=['SO', 'MUCH', 'FOR', 'THE', 'TITLE', 'OF', 'THE', 'EPISTLE', 'NOW', 'FOLLOWS', 'THE', 'GREETING', 'OF', 'THE', 'APOSTLE', 'VERSE', 'THREE'] +2830-3980-0037-834: hyp=['SO', 'MUCH', 'FOR', 'THE', 'TITLE', 'OF', 'THE', 'EPISTLE', 'NOW', 'FOLLOWS', 'THE', 'READING', 'OF', 'THE', 'APOSTLE', 'VERSE', 'THREE'] +2830-3980-0038-835: ref=['GRACE', 'BE', 'TO', 'YOU', 'AND', 'PEACE', 'FROM', 'GOD', 'THE', 'FATHER', 'AND', 'FROM', 'OUR', 'LORD', 'JESUS', 'CHRIST'] +2830-3980-0038-835: hyp=['GRACE', 'BE', 'TO', 'YOU', 'AND', 'PEACE', 'FROM', 'GOD', 'THE', 'FATHER', 'AND', 'FROM', 'OUR', 'LORD', 'JESUS', 'CHRIST'] +2830-3980-0039-836: ref=['THE', 'TERMS', 'OF', 'GRACE', 'AND', 'PEACE', 'ARE', 'COMMON', 'TERMS', 'WITH', 'PAUL', 'AND', 'ARE', 'NOW', 'PRETTY', 'WELL', 'UNDERSTOOD'] +2830-3980-0039-836: hyp=['THE', 'TERMS', 'OF', 'GRACE', 'AND', 'PEACE', 'ARE', 'COMMON', 'TERMS', 'WITH', 'PAUL', 'AND', 'ARE', 'NOW', 'PRETTY', 'WELL', 'UNDERSTOOD'] +2830-3980-0040-837: ref=['THE', 'GREETING', 'OF', 'THE', 'APOSTLE', 'IS', 'REFRESHING'] +2830-3980-0040-837: hyp=['THE', 'GREETING', 'OF', 'THE', 'APOSTLE', 'IS', 'REFRESHING'] +2830-3980-0041-838: ref=['GRACE', 'INVOLVES', 'THE', 'REMISSION', 'OF', 'SINS', 'PEACE', 'AND', 'A', 'HAPPY', 'CONSCIENCE'] +2830-3980-0041-838: hyp=['GRACE', 'INVOLVES', 'THE', 'REMISSION', 'OF', 'SINS', 'PEACE', 'AND', 'A', 'HAPPY', 'CONSCIENCE'] +2830-3980-0042-839: ref=['THE', 'WORLD', 'BRANDS', 'THIS', 'A', 'PERNICIOUS', 'DOCTRINE'] +2830-3980-0042-839: hyp=['THE', 'WORLD', 'BRANDS', 'THIS', 'A', 'PERNICIOUS', 'DOCTRINE'] +2830-3980-0043-840: ref=['EXPERIENCE', 'PROVES', 'THIS'] +2830-3980-0043-840: hyp=['EXPERIENCE', 'PROVES', 'THIS'] +2830-3980-0044-841: ref=['HOWEVER', 'THE', 'GRACE', 'AND', 'PEACE', 'OF', 'GOD', 'WILL'] +2830-3980-0044-841: hyp=['HOWEVER', 'THE', 'GRACE', 'AND', 'PEACE', 'OF', 'GOD', 'WILL'] +2830-3980-0045-842: ref=['MEN', 'SHOULD', 'NOT', 'SPECULATE', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD'] +2830-3980-0045-842: hyp=['MEN', 'SHOULD', 'NOT', 'SPECULATE', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD'] +2830-3980-0046-843: ref=['WAS', 'IT', 'NOT', 'ENOUGH', 'TO', 'SAY', 'FROM', 'GOD', 'THE', 'FATHER'] +2830-3980-0046-843: hyp=['WAS', 'IT', 'NOT', 'ENOUGH', 'TO', 'SAY', 'FROM', 'GOD', 'THE', 'FATHER'] +2830-3980-0047-844: ref=['TO', 'DO', 'SO', 'IS', 'TO', 'LOSE', 'GOD', 'ALTOGETHER', 'BECAUSE', 'GOD', 'BECOMES', 'INTOLERABLE', 'WHEN', 'WE', 'SEEK', 'TO', 'MEASURE', 'AND', 'TO', 'COMPREHEND', 'HIS', 'INFINITE', 'MAJESTY'] +2830-3980-0047-844: hyp=['TO', 'DO', 'SO', 'IS', 'TO', 'LOSE', 'GOD', 'ALTOGETHER', 'BECAUSE', 'GOD', 'BECOMES', 'INTOLERABLE', 'WHEN', 'WE', 'SEEK', 'TO', 'MEASURE', 'AND', 'TO', 'COMPREHEND', 'HIS', 'INFINITE', 'MAJESTY'] +2830-3980-0048-845: ref=['HE', 'CAME', 'DOWN', 'TO', 'EARTH', 'LIVED', 'AMONG', 'MEN', 'SUFFERED', 'WAS', 'CRUCIFIED', 'AND', 'THEN', 'HE', 'DIED', 'STANDING', 'CLEARLY', 'BEFORE', 'US', 'SO', 'THAT', 'OUR', 'HEARTS', 'AND', 'EYES', 'MAY', 'FASTEN', 'UPON', 'HIM'] +2830-3980-0048-845: hyp=['HE', 'CAME', 'DOWN', 'TO', 'EARTH', 'LIVED', 'AMONG', 'MEN', 'SUFFERED', 'WAS', 'CRUCIFIED', 'AND', 'THEN', 'HE', 'DIED', 'STANDING', 'CLEARLY', 'BEFORE', 'US', 'SO', 'THAT', 'OUR', 'HEARTS', 'AND', 'EYES', 'MAY', 'FASTEN', 'UPON', 'HIM'] +2830-3980-0049-846: ref=['EMBRACE', 'HIM', 'AND', 'FORGET', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD'] +2830-3980-0049-846: hyp=['EMBRACE', 'HIM', 'AND', 'FORGET', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD'] +2830-3980-0050-847: ref=['DID', 'NOT', 'CHRIST', 'HIMSELF', 'SAY', 'I', 'AM', 'THE', 'WAY', 'AND', 'THE', 'TRUTH', 'AND', 'THE', 'LIFE', 'NO', 'MAN', 'COMETH', 'UNTO', 'THE', 'FATHER', 'BUT', 'BY', 'ME'] +2830-3980-0050-847: hyp=['DID', 'NOT', 'CHRIST', 'HIMSELF', 'SAY', 'I', 'AM', 'THE', 'WAY', 'AND', 'THE', 'TRUTH', 'AND', 'THE', 'LIFE', 'NO', 'MAN', 'COMETH', 'UNTO', 'THE', 'FATHER', 'BUT', 'BY', 'ME'] +2830-3980-0051-848: ref=['WHEN', 'YOU', 'ARGUE', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD', 'APART', 'FROM', 'THE', 'QUESTION', 'OF', 'JUSTIFICATION', 'YOU', 'MAY', 'BE', 'AS', 'PROFOUND', 'AS', 'YOU', 'LIKE'] +2830-3980-0051-848: hyp=['WHEN', 'YOU', 'ARGUE', 'ABOUT', 'THE', 'NATURE', 'OF', 'GOD', 'APART', 'FROM', 'THE', 'QUESTION', 'OF', 'JUSTIFICATION', 'YOU', 'MAY', 'BE', 'AS', 'PROFOUND', 'AS', 'YOU', 'LIKE'] +2830-3980-0052-849: ref=['WE', 'ARE', 'TO', 'HEAR', 'CHRIST', 'WHO', 'HAS', 'BEEN', 'APPOINTED', 'BY', 'THE', 'FATHER', 'AS', 'OUR', 'DIVINE', 'TEACHER'] +2830-3980-0052-849: hyp=['WE', 'ARE', 'TO', 'HEAR', 'CHRIST', 'WHO', 'HAS', 'BEEN', 'APPOINTED', 'BY', 'THE', 'FATHER', 'AS', 'OUR', 'DIVINE', 'TEACHER'] +2830-3980-0053-850: ref=['AT', 'THE', 'SAME', 'TIME', 'PAUL', 'CONFIRMS', 'OUR', 'CREED', 'THAT', 'CHRIST', 'IS', 'VERY', 'GOD'] +2830-3980-0053-850: hyp=['AT', 'THE', 'SAME', 'TIME', 'PAUL', 'CONFIRMS', 'OUR', 'CREED', 'THAT', 'CHRIST', 'IS', 'VERY', 'GOD'] +2830-3980-0054-851: ref=['THAT', 'CHRIST', 'IS', 'VERY', 'GOD', 'IS', 'APPARENT', 'IN', 'THAT', 'PAUL', 'ASCRIBES', 'TO', 'HIM', 'DIVINE', 'POWERS', 'EQUALLY', 'WITH', 'THE', 'FATHER', 'AS', 'FOR', 'INSTANCE', 'THE', 'POWER', 'TO', 'DISPENSE', 'GRACE', 'AND', 'PEACE'] +2830-3980-0054-851: hyp=['THAT', 'CHRIST', 'IS', 'VERY', 'GOD', 'IS', 'APPARENT', 'IN', 'THAT', 'PAUL', 'ASCRIBES', 'TO', 'HIM', 'DIVINE', 'POWERS', 'EQUALLY', 'WITH', 'THE', 'FATHER', 'AS', 'FOR', 'INSTANCE', 'THE', 'POWER', 'TO', 'DISPENSE', 'GRACE', 'AND', 'PEACE'] +2830-3980-0055-852: ref=['TO', 'BESTOW', 'PEACE', 'AND', 'GRACE', 'LIES', 'IN', 'THE', 'PROVINCE', 'OF', 'GOD', 'WHO', 'ALONE', 'CAN', 'CREATE', 'THESE', 'BLESSINGS', 'THE', 'ANGELS', 'CANNOT'] +2830-3980-0055-852: hyp=['TO', 'BESTOW', 'PEACE', 'AND', 'GRACE', 'LIES', 'IN', 'THE', 'PROVINCE', 'OF', 'GOD', 'WHO', 'ALONE', 'CAN', 'CREATE', 'THESE', 'BLESSINGS', 'THE', 'ANGELS', 'CANNOT'] +2830-3980-0056-853: ref=['OTHERWISE', 'PAUL', 'SHOULD', 'HAVE', 'WRITTEN', 'GRACE', 'FROM', 'GOD', 'THE', 'FATHER', 'AND', 'PEACE', 'FROM', 'OUR', 'LORD', 'JESUS', 'CHRIST'] +2830-3980-0056-853: hyp=['OTHERWISE', 'PAUL', 'SHOULD', 'HAVE', 'WRITTEN', 'GRACE', 'FROM', 'GOD', 'THE', 'FATHER', 'AND', 'PEACE', 'FROM', 'OUR', 'LORD', 'JESUS', 'CHRIST'] +2830-3980-0057-854: ref=['THE', 'ARIANS', 'TOOK', 'CHRIST', 'FOR', 'A', 'NOBLE', 'AND', 'PERFECT', 'CREATURE', 'SUPERIOR', 'EVEN', 'TO', 'THE', 'ANGELS', 'BECAUSE', 'BY', 'HIM', 'GOD', 'CREATED', 'HEAVEN', 'AND', 'EARTH'] +2830-3980-0057-854: hyp=['THE', 'ARIANS', 'TOOK', 'CHRIST', 'FOR', 'A', 'NOBLE', 'AND', 'PERFECT', 'CREATURE', 'SUPERIOR', 'EVEN', 'TO', 'THE', 'ANGELS', 'BECAUSE', 'BY', 'HIM', 'GOD', 'CREATED', 'HEAVEN', 'AND', 'EARTH'] +2830-3980-0058-855: ref=['MOHAMMED', 'ALSO', 'SPEAKS', 'HIGHLY', 'OF', 'CHRIST'] +2830-3980-0058-855: hyp=['MOHAMMED', 'ALSO', 'SPEAKS', 'HIGHLY', 'OF', 'CHRIST'] +2830-3980-0059-856: ref=['PAUL', 'STICKS', 'TO', 'HIS', 'THEME'] +2830-3980-0059-856: hyp=['PAUL', 'STICKS', 'TO', 'HIS', 'THEME'] +2830-3980-0060-857: ref=['HE', 'NEVER', 'LOSES', 'SIGHT', 'OF', 'THE', 'PURPOSE', 'OF', 'HIS', 'EPISTLE'] +2830-3980-0060-857: hyp=['HE', 'NEVER', 'LOSES', 'SIGHT', 'OF', 'THE', 'PURPOSE', 'OF', 'HIS', 'EPISTLE'] +2830-3980-0061-858: ref=['NOT', 'GOLD', 'OR', 'SILVER', 'OR', 'PASCHAL', 'LAMBS', 'OR', 'AN', 'ANGEL', 'BUT', 'HIMSELF', 'WHAT', 'FOR'] +2830-3980-0061-858: hyp=['NOT', 'GOLD', 'OR', 'SILVER', 'OR', 'PATIAL', 'LAMBS', 'OR', 'AN', 'ANGEL', 'BUT', 'HIMSELF', 'WHAT', 'FOR'] +2830-3980-0062-859: ref=['NOT', 'FOR', 'A', 'CROWN', 'OR', 'A', 'KINGDOM', 'OR', 'OUR', 'GOODNESS', 'BUT', 'FOR', 'OUR', 'SINS'] +2830-3980-0062-859: hyp=['NOT', 'FOR', 'A', 'CROWN', 'OR', 'A', 'KINGDOM', 'OR', 'OUR', 'GOODNESS', 'BUT', 'FOR', 'OUR', 'SINS'] +2830-3980-0063-860: ref=['UNDERSCORE', 'THESE', 'WORDS', 'FOR', 'THEY', 'ARE', 'FULL', 'OF', 'COMFORT', 'FOR', 'SORE', 'CONSCIENCES'] +2830-3980-0063-860: hyp=['UNDERSCORE', 'THESE', 'WORDS', 'FOR', 'THEY', 'ARE', 'FULL', 'OF', 'COMFORT', 'FOR', 'SORE', 'CONSCIENCES'] +2830-3980-0064-861: ref=['HOW', 'MAY', 'WE', 'OBTAIN', 'REMISSION', 'OF', 'OUR', 'SINS'] +2830-3980-0064-861: hyp=['HOW', 'MAY', 'WE', 'OBTAIN', 'REMISSION', 'OF', 'OUR', 'SINS'] +2830-3980-0065-862: ref=['PAUL', 'ANSWERS', 'THE', 'MAN', 'WHO', 'IS', 'NAMED', 'JESUS', 'CHRIST', 'AND', 'THE', 'SON', 'OF', 'GOD', 'GAVE', 'HIMSELF', 'FOR', 'OUR', 'SINS'] +2830-3980-0065-862: hyp=['PAUL', 'ANSWERS', 'THE', 'MAN', 'WHO', 'IS', 'NAMED', 'JESUS', 'CHRIST', 'AND', 'THE', 'SON', 'OF', 'GOD', 'GAVE', 'HIMSELF', 'FOR', 'OUR', 'SINS'] +2830-3980-0066-863: ref=['SINCE', 'CHRIST', 'WAS', 'GIVEN', 'FOR', 'OUR', 'SINS', 'IT', 'STANDS', 'TO', 'REASON', 'THAT', 'THEY', 'CANNOT', 'BE', 'PUT', 'AWAY', 'BY', 'OUR', 'OWN', 'EFFORTS'] +2830-3980-0066-863: hyp=['SINCE', 'CHRIST', 'WAS', 'GIVEN', 'FOR', 'OUR', 'SINS', 'IT', 'STANDS', 'TO', 'REASON', 'THAT', 'THEY', 'CANNOT', 'BE', 'PUT', 'AWAY', 'BY', 'OUR', 'OWN', 'EFFORTS'] +2830-3980-0067-864: ref=['THIS', 'SENTENCE', 'ALSO', 'DEFINES', 'OUR', 'SINS', 'AS', 'GREAT', 'SO', 'GREAT', 'IN', 'FACT', 'THAT', 'THE', 'WHOLE', 'WORLD', 'COULD', 'NOT', 'MAKE', 'AMENDS', 'FOR', 'A', 'SINGLE', 'SIN'] +2830-3980-0067-864: hyp=['THIS', 'SENTENCE', 'ALSO', 'DEFINES', 'OUR', 'SINS', 'AS', 'GREAT', 'SO', 'GREAT', 'IN', 'FACT', 'THAT', 'THE', 'WHOLE', 'WORLD', 'COULD', 'NOT', 'MAKE', 'AMENDS', 'FOR', 'A', 'SINGLE', 'SIN'] +2830-3980-0068-865: ref=['THE', 'GREATNESS', 'OF', 'THE', 'RANSOM', 'CHRIST', 'THE', 'SON', 'OF', 'GOD', 'INDICATES', 'THIS'] +2830-3980-0068-865: hyp=['THE', 'GREATNESS', 'OF', 'THE', 'RANSOM', 'CHRIST', 'THE', 'SON', 'OF', 'GOD', 'INDICATES', 'THIS'] +2830-3980-0069-866: ref=['THE', 'VICIOUS', 'CHARACTER', 'OF', 'SIN', 'IS', 'BROUGHT', 'OUT', 'BY', 'THE', 'WORDS', 'WHO', 'GAVE', 'HIMSELF', 'FOR', 'OUR', 'SINS'] +2830-3980-0069-866: hyp=['THE', 'VICIOUS', 'CHARACTER', 'OF', 'SIN', 'IS', 'BROUGHT', 'OUT', 'BY', 'THE', 'WORDS', 'WHO', 'GAVE', 'HIMSELF', 'FOR', 'OUR', 'SINS'] +2830-3980-0070-867: ref=['BUT', 'WE', 'ARE', 'CARELESS', 'WE', 'MAKE', 'LIGHT', 'OF', 'SIN'] +2830-3980-0070-867: hyp=['BUT', 'WE', 'ARE', 'CARELESS', 'WE', 'MAKE', 'LIGHT', 'OF', 'SIN'] +2830-3980-0071-868: ref=['WE', 'THINK', 'THAT', 'BY', 'SOME', 'LITTLE', 'WORK', 'OR', 'MERIT', 'WE', 'CAN', 'DISMISS', 'SIN'] +2830-3980-0071-868: hyp=['WE', 'THINK', 'THAT', 'BY', 'SOME', 'LITTLE', 'WORK', 'OR', 'MERIT', 'WE', 'CAN', 'DISMISS', 'SIN'] +2830-3980-0072-869: ref=['THIS', 'PASSAGE', 'THEN', 'BEARS', 'OUT', 'THE', 'FACT', 'THAT', 'ALL', 'MEN', 'ARE', 'SOLD', 'UNDER', 'SIN'] +2830-3980-0072-869: hyp=['THIS', 'PASSAGE', 'THEN', 'BEARS', 'OUT', 'THE', 'FACT', 'THAT', 'ALL', 'MEN', 'ARE', 'SOLD', 'UNDER', 'SIN'] +2830-3980-0073-870: ref=['THIS', 'ATTITUDE', 'SPRINGS', 'FROM', 'A', 'FALSE', 'CONCEPTION', 'OF', 'SIN', 'THE', 'CONCEPTION', 'THAT', 'SIN', 'IS', 'A', 'SMALL', 'MATTER', 'EASILY', 'TAKEN', 'CARE', 'OF', 'BY', 'GOOD', 'WORKS', 'THAT', 'WE', 'MUST', 'PRESENT', 'OURSELVES', 'UNTO', 'GOD', 'WITH', 'A', 'GOOD', 'CONSCIENCE', 'THAT', 'WE', 'MUST', 'FEEL', 'NO', 'SIN', 'BEFORE', 'WE', 'MAY', 'FEEL', 'THAT', 'CHRIST', 'WAS', 'GIVEN', 'FOR', 'OUR', 'SINS'] +2830-3980-0073-870: hyp=['THIS', 'ATTITUDE', 'SPRINGS', 'FROM', 'A', 'FALSE', 'CONCEPTION', 'OF', 'SIN', 'THE', 'CONCEPTION', 'THAT', 'SIN', 'IS', 'A', 'SMALL', 'MATTER', 'EASILY', 'TAKEN', 'CARE', 'OF', 'BY', 'GOOD', 'WORKS', 'THAT', 'WE', 'MUST', 'PRESENT', 'OURSELVES', 'UNTO', 'GOD', 'WITH', 'A', 'GOOD', 'CONSCIENCE', 'THAT', 'WE', 'MUST', 'FEEL', 'NO', 'SIN', 'BEFORE', 'WE', 'MAY', 'FEEL', 'THAT', 'CHRIST', 'WAS', 'GIVEN', 'FOR', 'OUR', 'SINS'] +2830-3980-0074-871: ref=['THIS', 'ATTITUDE', 'IS', 'UNIVERSAL', 'AND', 'PARTICULARLY', 'DEVELOPED', 'IN', 'THOSE', 'WHO', 'CONSIDER', 'THEMSELVES', 'BETTER', 'THAN', 'OTHERS'] +2830-3980-0074-871: hyp=['THIS', 'ATTITUDE', 'IS', 'UNIVERSAL', 'AND', 'PARTICULARLY', 'DEVELOPED', 'IN', 'THOSE', 'WHO', 'CONSIDER', 'THEMSELVES', 'BETTER', 'THAN', 'OTHERS'] +2830-3980-0075-872: ref=['BUT', 'THE', 'REAL', 'SIGNIFICANCE', 'AND', 'COMFORT', 'OF', 'THE', 'WORDS', 'FOR', 'OUR', 'SINS', 'IS', 'LOST', 'UPON', 'THEM'] +2830-3980-0075-872: hyp=['BUT', 'THE', 'REAL', 'SIGNIFICANCE', 'AND', 'COMFORT', 'OF', 'THE', 'WORDS', 'FOR', 'OUR', 'SINS', 'IS', 'LOST', 'UPON', 'THEM'] +2830-3980-0076-873: ref=['ON', 'THE', 'OTHER', 'HAND', 'WE', 'ARE', 'NOT', 'TO', 'REGARD', 'THEM', 'AS', 'SO', 'TERRIBLE', 'THAT', 'WE', 'MUST', 'DESPAIR'] +2830-3980-0076-873: hyp=['ON', 'THE', 'OTHER', 'HAND', 'WE', 'ARE', 'NOT', 'TO', 'REGARD', 'THEM', 'AS', 'SO', 'TERRIBLE', 'THAT', 'WE', 'MUST', 'DESPAIR'] +2961-960-0000-874: ref=['HE', 'PASSES', 'ABRUPTLY', 'FROM', 'PERSONS', 'TO', 'IDEAS', 'AND', 'NUMBERS', 'AND', 'FROM', 'IDEAS', 'AND', 'NUMBERS', 'TO', 'PERSONS', 'FROM', 'THE', 'HEAVENS', 'TO', 'MAN', 'FROM', 'ASTRONOMY', 'TO', 'PHYSIOLOGY', 'HE', 'CONFUSES', 'OR', 'RATHER', 'DOES', 'NOT', 'DISTINGUISH', 'SUBJECT', 'AND', 'OBJECT', 'FIRST', 'AND', 'FINAL', 'CAUSES', 'AND', 'IS', 'DREAMING', 'OF', 'GEOMETRICAL', 'FIGURES', 'LOST', 'IN', 'A', 'FLUX', 'OF', 'SENSE'] +2961-960-0000-874: hyp=['HE', 'PASSES', 'ABRUPTLY', 'FROM', 'PERSONS', 'TO', 'IDEAS', 'AND', 'NUMBERS', 'AND', 'FROM', 'IDEAS', 'AND', 'NUMBERS', 'TO', 'PERSONS', 'FROM', 'THE', 'HEAVENS', 'TO', 'MAN', 'FROM', 'ASTRONOMY', 'TO', 'PHYSIOLOGY', 'HE', 'CONFUSES', 'OR', 'RATHER', 'DOES', 'NOT', 'DISTINGUISH', 'SUBJECT', 'AND', 'OBJECT', 'FIRST', 'AND', 'FINAL', 'CAUSES', 'AND', 'IS', 'DREAMING', 'OF', 'GEOMETRICAL', 'FIGURES', 'LOST', 'IN', 'A', 'FLUX', 'OF', 'SENSE'] +2961-960-0001-875: ref=['THE', 'INFLUENCE', 'WITH', 'THE', 'TIMAEUS', 'HAS', 'EXERCISED', 'UPON', 'POSTERITY', 'IS', 'DUE', 'PARTLY', 'TO', 'A', 'MISUNDERSTANDING'] +2961-960-0001-875: hyp=['THE', 'INFLUENCE', 'WHICH', 'THE', 'TIMAEUS', 'HAS', 'EXERCISED', 'UPON', 'POSTERITY', 'IS', 'DUE', 'PARTLY', 'TO', 'A', 'MISUNDERSTANDING'] +2961-960-0002-876: ref=['IN', 'THE', 'SUPPOSED', 'DEPTHS', 'OF', 'THIS', 'DIALOGUE', 'THE', 'NEO', 'PLATONISTS', 'FOUND', 'HIDDEN', 'MEANINGS', 'AND', 'CONNECTIONS', 'WITH', 'THE', 'JEWISH', 'AND', 'CHRISTIAN', 'SCRIPTURES', 'AND', 'OUT', 'OF', 'THEM', 'THEY', 'ELICITED', 'DOCTRINES', 'QUITE', 'AT', 'VARIANCE', 'WITH', 'THE', 'SPIRIT', 'OF', 'PLATO'] +2961-960-0002-876: hyp=['IN', 'THE', 'SUPPOSED', 'DEPTHS', 'OF', 'THIS', 'DIALOGUE', 'THE', 'NEO', 'PLATINISTS', 'FOUND', 'HIDDEN', 'MEANINGS', 'IN', 'CONNECTION', 'WITH', 'THE', 'JEWISH', 'AND', 'CHRISTIAN', 'SCRIPTURES', 'AND', 'OUT', 'OF', 'THEM', 'THEY', 'ELICITED', 'DOCTRINES', 'QUITE', 'AT', 'VARIANCE', 'WITH', 'THE', 'SPIRIT', 'OF', 'PLATO'] +2961-960-0003-877: ref=['THEY', 'WERE', 'ABSORBED', 'IN', 'HIS', 'THEOLOGY', 'AND', 'WERE', 'UNDER', 'THE', 'DOMINION', 'OF', 'HIS', 'NAME', 'WHILE', 'THAT', 'WHICH', 'WAS', 'TRULY', 'GREAT', 'AND', 'TRULY', 'CHARACTERISTIC', 'IN', 'HIM', 'HIS', 'EFFORT', 'TO', 'REALIZE', 'AND', 'CONNECT', 'ABSTRACTIONS', 'WAS', 'NOT', 'UNDERSTOOD', 'BY', 'THEM', 'AT', 'ALL'] +2961-960-0003-877: hyp=['THEY', 'WERE', 'ABSORBED', 'IN', 'HIS', 'THEOLOGY', 'AND', 'WERE', 'UNDER', 'THE', 'DOMINION', 'OF', 'HIS', 'NAME', 'WHILE', 'THAT', 'WHICH', 'WAS', 'TRULY', 'GREAT', 'AND', 'TRULY', 'CHARACTERISTIC', 'IN', 'HIM', 'HIS', 'EFFORT', 'TO', 'REALIZE', 'AND', 'CONNECT', 'ABSTRACTIONS', 'WAS', 'NOT', 'UNDERSTOOD', 'BY', 'THEM', 'AT', 'ALL'] +2961-960-0004-878: ref=['THERE', 'IS', 'NO', 'DANGER', 'OF', 'THE', 'MODERN', 'COMMENTATORS', 'ON', 'THE', 'TIMAEUS', 'FALLING', 'INTO', 'THE', 'ABSURDITIES', 'OF', 'THE', 'NEO', 'PLATONISTS'] +2961-960-0004-878: hyp=['THERE', 'IS', 'NO', 'DANGER', 'OF', 'THE', 'MODERN', 'COMMENTATORS', 'ON', 'THE', 'TIMAEUS', 'FALLING', 'INTO', 'THE', 'ABSURDITIES', 'OF', 'THE', 'NEOP', 'PLATINISTS'] +2961-960-0005-879: ref=['IN', 'THE', 'PRESENT', 'DAY', 'WE', 'ARE', 'WELL', 'AWARE', 'THAT', 'AN', 'ANCIENT', 'PHILOSOPHER', 'IS', 'TO', 'BE', 'INTERPRETED', 'FROM', 'HIMSELF', 'AND', 'BY', 'THE', 'CONTEMPORARY', 'HISTORY', 'OF', 'THOUGHT'] +2961-960-0005-879: hyp=['IN', 'THE', 'PRESENT', 'DAY', 'WE', 'ARE', 'WELL', 'AWARE', 'THAT', 'AN', 'ANCIENT', 'PHILOSOPHER', 'IS', 'TO', 'BE', 'INTERPRETED', 'FROM', 'HIMSELF', 'AND', 'BY', 'THE', 'CONTEMPORARY', 'HISTORY', 'OF', 'THOUGHT'] +2961-960-0006-880: ref=['THE', 'FANCIES', 'OF', 'THE', 'NEO', 'PLATONISTS', 'ARE', 'ONLY', 'INTERESTING', 'TO', 'US', 'BECAUSE', 'THEY', 'EXHIBIT', 'A', 'PHASE', 'OF', 'THE', 'HUMAN', 'MIND', 'WHICH', 'PREVAILED', 'WIDELY', 'IN', 'THE', 'FIRST', 'CENTURIES', 'OF', 'THE', 'CHRISTIAN', 'ERA', 'AND', 'IS', 'NOT', 'WHOLLY', 'EXTINCT', 'IN', 'OUR', 'OWN', 'DAY'] +2961-960-0006-880: hyp=['THE', 'FANCIES', 'OF', 'THE', 'NEO', 'PLATINISTS', 'ARE', 'ONLY', 'INTERESTING', 'TO', 'US', 'BECAUSE', 'THEY', 'EXHIBIT', 'A', 'PHASE', 'OF', 'THE', 'HUMAN', 'MIND', 'WHICH', 'PREVAILED', 'WIDELY', 'IN', 'THE', 'FIRST', 'CENTURIES', 'OF', 'THE', 'CHRISTIAN', 'ERA', 'AND', 'IS', 'NOT', 'WHOLLY', 'EXTINCT', 'IN', 'OUR', 'OWN', 'DAY'] +2961-960-0007-881: ref=['BUT', 'THEY', 'HAVE', 'NOTHING', 'TO', 'DO', 'WITH', 'THE', 'INTERPRETATION', 'OF', 'PLATO', 'AND', 'IN', 'SPIRIT', 'THEY', 'ARE', 'OPPOSED', 'TO', 'HIM'] +2961-960-0007-881: hyp=['BUT', 'THEY', 'HAVE', 'NOTHING', 'TO', 'DO', 'WITH', 'THE', 'INTERPRETATION', 'OF', 'PLATO', 'AND', 'IN', 'SPIRIT', 'THEY', 'ARE', 'OPPOSED', 'TO', 'HIM'] +2961-960-0008-882: ref=['WE', 'DO', 'NOT', 'KNOW', 'HOW', 'PLATO', 'WOULD', 'HAVE', 'ARRANGED', 'HIS', 'OWN', 'DIALOGUES', 'OR', 'WHETHER', 'THE', 'THOUGHT', 'OF', 'ARRANGING', 'ANY', 'OF', 'THEM', 'BESIDES', 'THE', 'TWO', 'TRILOGIES', 'WHICH', 'HE', 'HAS', 'EXPRESSLY', 'CONNECTED', 'WAS', 'EVER', 'PRESENT', 'TO', 'HIS', 'MIND'] +2961-960-0008-882: hyp=['WE', 'DO', 'NOT', 'KNOW', 'HOW', 'PLATO', 'WOULD', 'HAVE', 'ARRANGED', 'HIS', 'OWN', 'DIALOGUES', 'OR', 'WHETHER', 'THE', 'THOUGHT', 'OF', 'ARRANGING', 'ANY', 'OF', 'THEM', 'BESIDES', 'THE', 'TWO', 'TRILOGIES', 'WHICH', 'HE', 'HAS', 'EXPRESSLY', 'CONNECTED', 'WAS', 'EVER', 'PRESENT', 'TO', 'HIS', 'MIND'] +2961-960-0009-883: ref=['THE', 'DIALOGUE', 'IS', 'PRIMARILY', 'CONCERNED', 'WITH', 'THE', 'ANIMAL', 'CREATION', 'INCLUDING', 'UNDER', 'THIS', 'TERM', 'THE', 'HEAVENLY', 'BODIES', 'AND', 'WITH', 'MAN', 'ONLY', 'AS', 'ONE', 'AMONG', 'THE', 'ANIMALS'] +2961-960-0009-883: hyp=['THE', 'DIALOGUE', 'IS', 'PRIMARILY', 'CONCERNED', 'WITH', 'THE', 'ANIMAL', 'CREATION', 'INCLUDING', 'UNDER', 'THIS', 'TERM', 'THE', 'HEAVENLY', 'BODIES', 'AND', 'WITH', 'MAN', 'ONLY', 'AS', 'ONE', 'AMONG', 'THE', 'ANIMALS'] +2961-960-0010-884: ref=['BUT', 'HE', 'HAS', 'NOT', 'AS', 'YET', 'DEFINED', 'THIS', 'INTERMEDIATE', 'TERRITORY', 'WHICH', 'LIES', 'SOMEWHERE', 'BETWEEN', 'MEDICINE', 'AND', 'MATHEMATICS', 'AND', 'HE', 'WOULD', 'HAVE', 'FELT', 'THAT', 'THERE', 'WAS', 'AS', 'GREAT', 'AN', 'IMPIETY', 'IN', 'RANKING', 'THEORIES', 'OF', 'PHYSICS', 'FIRST', 'IN', 'THE', 'ORDER', 'OF', 'KNOWLEDGE', 'AS', 'IN', 'PLACING', 'THE', 'BODY', 'BEFORE', 'THE', 'SOUL'] +2961-960-0010-884: hyp=['BUT', 'HE', 'HAS', 'NOT', 'AS', 'YET', 'DEFINED', 'THIS', 'INTERMEDIATE', 'TERRITORY', 'WHICH', 'LIES', 'SOMEWHERE', 'BETWEEN', 'MEDICINE', 'AND', 'MATHEMATICS', 'AND', 'HE', 'WOULD', 'HAVE', 'FELT', 'THAT', 'THERE', 'WAS', 'AS', 'GREAT', 'AN', 'IMPIETY', 'IN', 'RANKING', 'THEORIES', 'OF', 'PHYSICS', 'FIRST', 'IN', 'THE', 'ORDER', 'OF', 'KNOWLEDGE', 'AS', 'IN', 'PLACING', 'THE', 'BODY', 'BEFORE', 'THE', 'SOUL'] +2961-960-0011-885: ref=['WITH', 'HERACLEITUS', 'HE', 'ACKNOWLEDGES', 'THE', 'PERPETUAL', 'FLUX', 'LIKE', 'ANAXAGORAS', 'HE', 'ASSERTS', 'THE', 'PREDOMINANCE', 'OF', 'MIND', 'ALTHOUGH', 'ADMITTING', 'AN', 'ELEMENT', 'OF', 'NECESSITY', 'WHICH', 'REASON', 'IS', 'INCAPABLE', 'OF', 'SUBDUING', 'LIKE', 'THE', 'PYTHAGOREANS', 'HE', 'SUPPOSES', 'THE', 'MYSTERY', 'OF', 'THE', 'WORLD', 'TO', 'BE', 'CONTAINED', 'IN', 'NUMBER'] +2961-960-0011-885: hyp=['WITH', 'HERACLITUS', 'HE', 'ACKNOWLEDGES', 'THE', 'PERPETUAL', 'FLUX', 'LIKE', 'ANXAGARIS', 'HE', 'ASSERTS', 'THE', 'PREDOMINANCE', 'OF', 'MIND', 'ALTHOUGH', 'ADMITTING', 'AN', 'ELEMENT', 'OF', 'NECESSITY', 'WHICH', 'REASON', 'IS', 'INCAPABLE', 'OF', 'SUBDUING', 'LIKE', 'THE', 'PYTHAGOREANS', 'HE', 'SUPPOSES', 'THE', 'MYSTERY', 'OF', 'THE', 'WORLD', 'TO', 'BE', 'CONTAINED', 'IN', 'NUMBER'] +2961-960-0012-886: ref=['MANY', 'IF', 'NOT', 'ALL', 'THE', 'ELEMENTS', 'OF', 'THE', 'PRE', 'SOCRATIC', 'PHILOSOPHY', 'ARE', 'INCLUDED', 'IN', 'THE', 'TIMAEUS'] +2961-960-0012-886: hyp=['MANY', 'IF', 'NOT', 'ALL', 'THE', 'ELEMENTS', 'OF', 'THE', 'PRESOCRATIC', 'PHILOSOPHY', 'ARE', 'INCLUDED', 'IN', 'THE', 'TIMAEUS'] +2961-960-0013-887: ref=['IT', 'IS', 'PROBABLE', 'THAT', 'THE', 'RELATION', 'OF', 'THE', 'IDEAS', 'TO', 'GOD', 'OR', 'OF', 'GOD', 'TO', 'THE', 'WORLD', 'WAS', 'DIFFERENTLY', 'CONCEIVED', 'BY', 'HIM', 'AT', 'DIFFERENT', 'TIMES', 'OF', 'HIS', 'LIFE'] +2961-960-0013-887: hyp=['IT', 'IS', 'PROBABLE', 'THAT', 'THE', 'RELATION', 'OF', 'THE', 'IDEAS', 'TO', 'GOD', 'OR', 'OF', 'GOD', 'TO', 'THE', 'WORLD', 'WAS', 'DIFFERENTLY', 'CONCEIVED', 'BY', 'HIM', 'AT', 'DIFFERENT', 'TIMES', 'OF', 'HIS', 'LIFE'] +2961-960-0014-888: ref=['THE', 'IDEAS', 'ALSO', 'REMAIN', 'BUT', 'THEY', 'HAVE', 'BECOME', 'TYPES', 'IN', 'NATURE', 'FORMS', 'OF', 'MEN', 'ANIMALS', 'BIRDS', 'FISHES'] +2961-960-0014-888: hyp=['THE', 'IDEAS', 'ALSO', 'REMAIN', 'BUT', 'THEY', 'HAVE', 'BECOME', 'TYPES', 'IN', 'NATURE', 'FORMS', 'OF', 'MEN', 'ANIMALS', 'BIRDS', 'FISHES'] +2961-960-0015-889: ref=['THE', 'STYLE', 'AND', 'PLAN', 'OF', 'THE', 'TIMAEUS', 'DIFFER', 'GREATLY', 'FROM', 'THAT', 'OF', 'ANY', 'OTHER', 'OF', 'THE', 'PLATONIC', 'DIALOGUES'] +2961-960-0015-889: hyp=['THE', 'STYLE', 'AND', 'PLAN', 'OF', 'THE', 'TIMAEUS', 'DIFFER', 'GREATLY', 'FROM', 'THAT', 'OF', 'ANY', 'OTHER', 'OF', 'THE', 'PLATONIC', 'DIALOGUES'] +2961-960-0016-890: ref=['BUT', 'PLATO', 'HAS', 'NOT', 'THE', 'SAME', 'MASTERY', 'OVER', 'HIS', 'INSTRUMENT', 'WHICH', 'HE', 'EXHIBITS', 'IN', 'THE', 'PHAEDRUS', 'OR', 'SYMPOSIUM'] +2961-960-0016-890: hyp=['BUT', 'PLATO', 'HAS', 'NOT', 'THE', 'SAME', 'MASTERY', 'OVER', 'HIS', 'INSTRUMENT', 'WHICH', 'HE', 'EXHIBITS', 'IN', 'THE', 'PHEDROS', 'OR', 'SYMPOSIUM'] +2961-960-0017-891: ref=['NOTHING', 'CAN', 'EXCEED', 'THE', 'BEAUTY', 'OR', 'ART', 'OF', 'THE', 'INTRODUCTION', 'IN', 'WHICH', 'HE', 'IS', 'USING', 'WORDS', 'AFTER', 'HIS', 'ACCUSTOMED', 'MANNER'] +2961-960-0017-891: hyp=['NOTHING', 'CAN', 'EXCEED', 'THE', 'BEAUTY', 'OR', 'ART', 'OF', 'INTRODUCTION', 'IN', 'WHICH', 'HE', 'IS', 'USING', 'WORDS', 'AFTER', 'HIS', 'ACCUSTOMED', 'MANNER'] +2961-960-0018-892: ref=['BUT', 'IN', 'THE', 'REST', 'OF', 'THE', 'WORK', 'THE', 'POWER', 'OF', 'LANGUAGE', 'SEEMS', 'TO', 'FAIL', 'HIM', 'AND', 'THE', 'DRAMATIC', 'FORM', 'IS', 'WHOLLY', 'GIVEN', 'UP'] +2961-960-0018-892: hyp=['BUT', 'IN', 'THE', 'REST', 'OF', 'THE', 'WORK', 'THE', 'POWER', 'OF', 'LANGUAGE', 'SEEMS', 'TO', 'FAIL', 'HIM', 'AND', 'THE', 'DRAMATIC', 'FORM', 'IS', 'WHOLLY', 'GIVEN', 'UP'] +2961-960-0019-893: ref=['HE', 'COULD', 'WRITE', 'IN', 'ONE', 'STYLE', 'BUT', 'NOT', 'IN', 'ANOTHER', 'AND', 'THE', 'GREEK', 'LANGUAGE', 'HAD', 'NOT', 'AS', 'YET', 'BEEN', 'FASHIONED', 'BY', 'ANY', 'POET', 'OR', 'PHILOSOPHER', 'TO', 'DESCRIBE', 'PHYSICAL', 'PHENOMENA'] +2961-960-0019-893: hyp=['HE', 'COULD', 'WRITE', 'IN', 'ONE', 'STYLE', 'BUT', 'NOT', 'IN', 'ANOTHER', 'AND', 'THE', 'GREEK', 'LANGUAGE', 'HAD', 'NOT', 'AS', 'YET', 'BEEN', 'FASHIONED', 'BY', 'ANY', 'POET', 'OR', 'PHILOSOPHER', 'TO', 'DESCRIBE', 'PHYSICAL', 'PHENOMENA'] +2961-960-0020-894: ref=['AND', 'HENCE', 'WE', 'FIND', 'THE', 'SAME', 'SORT', 'OF', 'CLUMSINESS', 'IN', 'THE', 'TIMAEUS', 'OF', 'PLATO', 'WHICH', 'CHARACTERIZES', 'THE', 'PHILOSOPHICAL', 'POEM', 'OF', 'LUCRETIUS'] +2961-960-0020-894: hyp=['AND', 'HENCE', 'WE', 'FIND', 'THE', 'SAME', 'SORT', 'OF', 'CLUMSINESS', 'IN', 'THE', 'TIMAEUS', 'OF', 'PLATO', 'WHICH', 'CHARACTERIZES', 'THE', 'PHILOSOPHICAL', 'POEM', 'OF', 'LUCRETIUS'] +2961-960-0021-895: ref=['THERE', 'IS', 'A', 'WANT', 'OF', 'FLOW', 'AND', 'OFTEN', 'A', 'DEFECT', 'OF', 'RHYTHM', 'THE', 'MEANING', 'IS', 'SOMETIMES', 'OBSCURE', 'AND', 'THERE', 'IS', 'A', 'GREATER', 'USE', 'OF', 'APPOSITION', 'AND', 'MORE', 'OF', 'REPETITION', 'THAN', 'OCCURS', 'IN', "PLATO'S", 'EARLIER', 'WRITINGS'] +2961-960-0021-895: hyp=['THERE', 'IS', 'A', 'WANT', 'OF', 'FLOW', 'AND', 'OFTEN', 'A', 'DEFECT', 'OF', 'RHYTHM', 'THE', 'MEANING', 'IS', 'SOMETIMES', 'OBSCURE', 'AND', 'THERE', 'IS', 'A', 'GREATER', 'USE', 'OF', 'APPOSITION', 'AND', 'MORE', 'OF', 'REPETITION', 'THAN', 'OCCURS', 'IN', "PLATO'S", 'EARLIER', 'WRITINGS'] +2961-960-0022-896: ref=['PLATO', 'HAD', 'NOT', 'THE', 'COMMAND', 'OF', 'HIS', 'MATERIALS', 'WHICH', 'WOULD', 'HAVE', 'ENABLED', 'HIM', 'TO', 'PRODUCE', 'A', 'PERFECT', 'WORK', 'OF', 'ART'] +2961-960-0022-896: hyp=['PLATO', 'HAD', 'NOT', 'THE', 'COMMAND', 'OF', 'HIS', 'MATERIALS', 'WHICH', 'WOULD', 'HAVE', 'ENABLED', 'HIM', 'TO', 'PRODUCE', 'A', 'PERFECT', 'WORK', 'OF', 'ART'] +2961-961-0000-897: ref=['SOCRATES', 'BEGINS', 'THE', 'TIMAEUS', 'WITH', 'A', 'SUMMARY', 'OF', 'THE', 'REPUBLIC'] +2961-961-0000-897: hyp=['SOCRATES', 'BEGINS', 'THE', 'TIMAEUS', 'WITH', 'THE', 'SUMMARY', 'OF', 'THE', 'REPUBLIC'] +2961-961-0001-898: ref=['AND', 'NOW', 'HE', 'DESIRES', 'TO', 'SEE', 'THE', 'IDEAL', 'STATE', 'SET', 'IN', 'MOTION', 'HE', 'WOULD', 'LIKE', 'TO', 'KNOW', 'HOW', 'SHE', 'BEHAVED', 'IN', 'SOME', 'GREAT', 'STRUGGLE'] +2961-961-0001-898: hyp=['AND', 'NOW', 'HE', 'DESIRES', 'TO', 'SEE', 'THE', 'IDEAL', 'STATE', 'SET', 'IN', 'MOTION', 'HE', 'WOULD', 'LIKE', 'TO', 'KNOW', 'HOW', 'SHE', 'BEHAVED', 'IN', 'SOME', 'GREAT', 'STRUGGLE'] +2961-961-0002-899: ref=['AND', 'THEREFORE', 'TO', 'YOU', 'I', 'TURN', 'TIMAEUS', 'CITIZEN', 'OF', 'LOCRIS', 'WHO', 'ARE', 'AT', 'ONCE', 'A', 'PHILOSOPHER', 'AND', 'A', 'STATESMAN', 'AND', 'TO', 'YOU', 'CRITIAS', 'WHOM', 'ALL', 'ATHENIANS', 'KNOW', 'TO', 'BE', 'SIMILARLY', 'ACCOMPLISHED', 'AND', 'TO', 'HERMOCRATES', 'WHO', 'IS', 'ALSO', 'FITTED', 'BY', 'NATURE', 'AND', 'EDUCATION', 'TO', 'SHARE', 'IN', 'OUR', 'DISCOURSE'] +2961-961-0002-899: hyp=['AND', 'THEREFORE', 'TO', 'YOU', 'I', 'TURN', 'TIMAEUS', 'CITIZEN', 'OF', 'LOCRIS', 'WHO', 'ARE', 'AT', 'ONCE', 'A', 'PHILOSOPHER', 'AND', 'A', 'STATESMAN', 'AND', 'TO', 'YOU', 'CRITIUS', 'WHOM', 'ALL', 'ATHENIANS', 'KNOW', 'TO', 'BE', 'SIMILARLY', 'ACCOMPLISHED', 'AND', 'TO', 'HERMOCRATES', 'WHO', 'IS', 'ALSO', 'FITTED', 'BY', 'NATURE', 'AND', 'EDUCATION', 'TO', 'SHARE', 'IN', 'OUR', 'DISCOURSE'] +2961-961-0003-900: ref=['I', 'WILL', 'IF', 'TIMAEUS', 'APPROVES', 'I', 'APPROVE'] +2961-961-0003-900: hyp=['I', 'WILL', 'IF', 'TIMY', 'AS', 'APPROVES', 'I', 'APPROVE'] +2961-961-0004-901: ref=['LISTEN', 'THEN', 'SOCRATES', 'TO', 'A', 'TALE', 'OF', "SOLON'S", 'WHO', 'BEING', 'THE', 'FRIEND', 'OF', 'DROPIDAS', 'MY', 'GREAT', 'GRANDFATHER', 'TOLD', 'IT', 'TO', 'MY', 'GRANDFATHER', 'CRITIAS', 'AND', 'HE', 'TOLD', 'ME'] +2961-961-0004-901: hyp=['LISTEN', 'THEN', 'SOCRATES', 'TO', 'A', 'TALE', 'OF', 'SOLONS', 'WHO', 'BEING', 'THE', 'FRIEND', 'OF', 'DROPIDUS', 'MY', 'GREAT', 'GRANDFATHER', 'TOLD', 'IT', 'TO', 'MY', 'GRANDFATHER', 'CRITIUS', 'AND', 'HE', 'TOLD', 'ME'] +2961-961-0005-902: ref=['SOME', 'POEMS', 'OF', 'SOLON', 'WERE', 'RECITED', 'BY', 'THE', 'BOYS'] +2961-961-0005-902: hyp=['SOME', 'POEMS', 'OF', 'SOLON', 'WERE', 'RECITED', 'BY', 'THE', 'BOYS'] +2961-961-0006-903: ref=['AND', 'WHAT', 'WAS', 'THE', 'SUBJECT', 'OF', 'THE', 'POEM', 'SAID', 'THE', 'PERSON', 'WHO', 'MADE', 'THE', 'REMARK'] +2961-961-0006-903: hyp=['AND', 'WHAT', 'WAS', 'THE', 'SUBJECT', 'OF', 'THE', 'POEM', 'SAID', 'THE', 'PERSON', 'WHO', 'MADE', 'THE', 'REMARK'] +2961-961-0007-904: ref=['THE', 'SUBJECT', 'WAS', 'A', 'VERY', 'NOBLE', 'ONE', 'HE', 'DESCRIBED', 'THE', 'MOST', 'FAMOUS', 'ACTION', 'IN', 'WHICH', 'THE', 'ATHENIAN', 'PEOPLE', 'WERE', 'EVER', 'ENGAGED'] +2961-961-0007-904: hyp=['HIS', 'SUBJECT', 'WAS', 'A', 'VERY', 'NOBLE', 'ONE', 'HE', 'DESCRIBED', 'THE', 'MOST', 'FAMOUS', 'ACTION', 'IN', 'WHICH', 'THE', 'ATHENIAN', 'PEOPLE', 'WERE', 'EVER', 'ENGAGED'] +2961-961-0008-905: ref=['BUT', 'THE', 'MEMORY', 'OF', 'THEIR', 'EXPLOITS', 'HAS', 'PASSED', 'AWAY', 'OWING', 'TO', 'THE', 'LAPSE', 'OF', 'TIME', 'AND', 'THE', 'EXTINCTION', 'OF', 'THE', 'ACTORS'] +2961-961-0008-905: hyp=['BUT', 'THE', 'MEMORY', 'OF', 'THEIR', 'EXPLOITS', 'HAD', 'PASSED', 'AWAY', 'OWING', 'TO', 'THE', 'LAPSE', 'OF', 'TIME', 'AND', 'THE', 'EXTINCTION', 'OF', 'THE', 'ACTORS'] +2961-961-0009-906: ref=['TELL', 'US', 'SAID', 'THE', 'OTHER', 'THE', 'WHOLE', 'STORY', 'AND', 'WHERE', 'SOLON', 'HEARD', 'THE', 'STORY'] +2961-961-0009-906: hyp=['TELL', 'US', 'SAID', 'THE', 'OTHER', 'THE', 'WHOLE', 'STORY', 'AND', 'WHERE', 'SOLOMON', 'HEARD', 'THE', 'STORY'] +2961-961-0010-907: ref=['BUT', 'IN', 'EGYPT', 'THE', 'TRADITIONS', 'OF', 'OUR', 'OWN', 'AND', 'OTHER', 'LANDS', 'ARE', 'BY', 'US', 'REGISTERED', 'FOR', 'EVER', 'IN', 'OUR', 'TEMPLES'] +2961-961-0010-907: hyp=['BUT', 'IN', 'EGYPT', 'THE', 'TRADITIONS', 'OF', 'OUR', 'OWN', 'AND', 'OTHER', 'LANDS', 'ARE', 'BY', 'US', 'REGISTERED', 'FOR', 'EVER', 'IN', 'OUR', 'TEMPLES'] +2961-961-0011-908: ref=['THE', 'GENEALOGIES', 'WHICH', 'YOU', 'HAVE', 'RECITED', 'TO', 'US', 'OUT', 'OF', 'YOUR', 'OWN', 'ANNALS', 'SOLON', 'ARE', 'A', 'MERE', "CHILDREN'S", 'STORY'] +2961-961-0011-908: hyp=['THE', 'GENEALOGIES', 'WHICH', 'YOU', 'HAVE', 'RECITED', 'TO', 'US', 'OUT', 'OF', 'YOUR', 'OWN', 'ANNAL', 'SONG', 'ARE', 'A', 'MERE', "CHILDREN'S", 'STORY'] +2961-961-0012-909: ref=['FOR', 'IN', 'THE', 'TIMES', 'BEFORE', 'THE', 'GREAT', 'FLOOD', 'ATHENS', 'WAS', 'THE', 'GREATEST', 'AND', 'BEST', 'OF', 'CITIES', 'AND', 'DID', 'THE', 'NOBLEST', 'DEEDS', 'AND', 'HAD', 'THE', 'BEST', 'CONSTITUTION', 'OF', 'ANY', 'UNDER', 'THE', 'FACE', 'OF', 'HEAVEN'] +2961-961-0012-909: hyp=['FOR', 'IN', 'THE', 'TIMES', 'BEFORE', 'THE', 'GREAT', 'FLOOD', 'ATHENS', 'WAS', 'THE', 'GREATEST', 'AND', 'BEST', 'OF', 'CITIES', 'AND', 'DID', 'THE', 'NOBLEST', 'DEEDS', 'AND', 'HAD', 'THE', 'BEST', 'CONSTITUTION', 'OF', 'ANY', 'UNDER', 'THE', 'FACE', 'OF', 'HEAVEN'] +2961-961-0013-910: ref=['SOLON', 'MARVELLED', 'AND', 'DESIRED', 'TO', 'BE', 'INFORMED', 'OF', 'THE', 'PARTICULARS'] +2961-961-0013-910: hyp=['SOLOMON', 'MARVELLED', 'AND', 'DESIRED', 'TO', 'BE', 'INFORMED', 'OF', 'THE', 'PARTICULARS'] +2961-961-0014-911: ref=['NINE', 'THOUSAND', 'YEARS', 'HAVE', 'ELAPSED', 'SINCE', 'SHE', 'FOUNDED', 'YOURS', 'AND', 'EIGHT', 'THOUSAND', 'SINCE', 'SHE', 'FOUNDED', 'OURS', 'AS', 'OUR', 'ANNALS', 'RECORD'] +2961-961-0014-911: hyp=['NINE', 'THOUSAND', 'YEARS', 'HAVE', 'ELAPSED', 'SINCE', 'SHE', 'FOUND', 'IT', 'YOURS', 'AND', 'EIGHT', 'THOUSAND', 'SINCE', 'SHE', 'FOUND', 'IT', 'OURS', 'AS', 'OUR', 'ANNALS', 'RECORD'] +2961-961-0015-912: ref=['MANY', 'LAWS', 'EXIST', 'AMONG', 'US', 'WHICH', 'ARE', 'THE', 'COUNTERPART', 'OF', 'YOURS', 'AS', 'THEY', 'WERE', 'IN', 'THE', 'OLDEN', 'TIME'] +2961-961-0015-912: hyp=['MANY', 'LAWS', 'EXIST', 'AMONG', 'US', 'WHICH', 'ARE', 'THE', 'COUNTERPART', 'OF', 'YOURS', 'AS', 'THEY', 'WERE', 'IN', 'THE', 'OLDEN', 'TIME'] +2961-961-0016-913: ref=['I', 'WILL', 'BRIEFLY', 'DESCRIBE', 'THEM', 'TO', 'YOU', 'AND', 'YOU', 'SHALL', 'READ', 'THE', 'ACCOUNT', 'OF', 'THEM', 'AT', 'YOUR', 'LEISURE', 'IN', 'THE', 'SACRED', 'REGISTERS'] +2961-961-0016-913: hyp=['I', 'WILL', 'BRIEFLY', 'DESCRIBE', 'THEM', 'TO', 'YOU', 'AND', 'YOU', 'SHALL', 'READ', 'THE', 'ACCOUNT', 'OF', 'THEM', 'AT', 'YOUR', 'LEISURE', 'IN', 'THE', 'SACRED', 'REGISTERS'] +2961-961-0017-914: ref=['OBSERVE', 'AGAIN', 'WHAT', 'CARE', 'THE', 'LAW', 'TOOK', 'IN', 'THE', 'PURSUIT', 'OF', 'WISDOM', 'SEARCHING', 'OUT', 'THE', 'DEEP', 'THINGS', 'OF', 'THE', 'WORLD', 'AND', 'APPLYING', 'THEM', 'TO', 'THE', 'USE', 'OF', 'MAN'] +2961-961-0017-914: hyp=['OBSERVE', 'AGAIN', 'WHAT', 'CARE', 'THE', 'LAW', 'TOOK', 'IN', 'THE', 'PURSUIT', 'OF', 'WISDOM', 'SEARCHING', 'OUT', 'THE', 'DEEP', 'THINGS', 'OF', 'THE', 'WORLD', 'AND', 'APPLYING', 'THEM', 'TO', 'THE', 'USE', 'OF', 'MEN'] +2961-961-0018-915: ref=['THE', 'MOST', 'FAMOUS', 'OF', 'THEM', 'ALL', 'WAS', 'THE', 'OVERTHROW', 'OF', 'THE', 'ISLAND', 'OF', 'ATLANTIS'] +2961-961-0018-915: hyp=['THE', 'MOST', 'FAMOUS', 'OF', 'THEM', 'ALL', 'WAS', 'THE', 'OVERTHROW', 'OF', 'THE', 'ISLAND', 'OF', 'ATLANTIS'] +2961-961-0019-916: ref=['FOR', 'AT', 'THE', 'PERIL', 'OF', 'HER', 'OWN', 'EXISTENCE', 'AND', 'WHEN', 'THE', 'OTHER', 'HELLENES', 'HAD', 'DESERTED', 'HER', 'SHE', 'REPELLED', 'THE', 'INVADER', 'AND', 'OF', 'HER', 'OWN', 'ACCORD', 'GAVE', 'LIBERTY', 'TO', 'ALL', 'THE', 'NATIONS', 'WITHIN', 'THE', 'PILLARS'] +2961-961-0019-916: hyp=['FOR', 'AT', 'THE', 'PERIL', 'OF', 'HER', 'OWN', 'EXISTENCE', 'AND', 'WHEN', 'THE', 'OTHER', 'HELLENS', 'HAD', 'DESERTED', 'HER', 'SHE', 'REPELLED', 'THE', 'INVADER', 'AND', 'OF', 'HER', 'OWN', 'ACCORD', 'GAVE', 'LIBERTY', 'TO', 'ALL', 'THE', 'NATIONS', 'WITHIN', 'THE', 'PILLARS'] +2961-961-0020-917: ref=['THIS', 'IS', 'THE', 'EXPLANATION', 'OF', 'THE', 'SHALLOWS', 'WHICH', 'ARE', 'FOUND', 'IN', 'THAT', 'PART', 'OF', 'THE', 'ATLANTIC', 'OCEAN'] +2961-961-0020-917: hyp=['THIS', 'IS', 'THE', 'EXPLANATION', 'OF', 'THE', 'SHALLOWS', 'WHICH', 'ARE', 'FOUND', 'IN', 'THAT', 'PART', 'OF', 'THE', 'ATLANTIC', 'OCEAN'] +2961-961-0021-918: ref=['BUT', 'I', 'WOULD', 'NOT', 'SPEAK', 'AT', 'THE', 'TIME', 'BECAUSE', 'I', 'WANTED', 'TO', 'REFRESH', 'MY', 'MEMORY'] +2961-961-0021-918: hyp=['BUT', 'I', 'WOULD', 'NOT', 'SPEAK', 'AT', 'THE', 'TIME', 'BECAUSE', 'I', 'WANTED', 'TO', 'REFRESH', 'MY', 'MEMORY'] +2961-961-0022-919: ref=['THEN', 'NOW', 'LET', 'ME', 'EXPLAIN', 'TO', 'YOU', 'THE', 'ORDER', 'OF', 'OUR', 'ENTERTAINMENT', 'FIRST', 'TIMAEUS', 'WHO', 'IS', 'A', 'NATURAL', 'PHILOSOPHER', 'WILL', 'SPEAK', 'OF', 'THE', 'ORIGIN', 'OF', 'THE', 'WORLD', 'GOING', 'DOWN', 'TO', 'THE', 'CREATION', 'OF', 'MAN', 'AND', 'THEN', 'I', 'SHALL', 'RECEIVE', 'THE', 'MEN', 'WHOM', 'HE', 'HAS', 'CREATED', 'AND', 'SOME', 'OF', 'WHOM', 'WILL', 'HAVE', 'BEEN', 'EDUCATED', 'BY', 'YOU', 'AND', 'INTRODUCE', 'THEM', 'TO', 'YOU', 'AS', 'THE', 'LOST', 'ATHENIAN', 'CITIZENS', 'OF', 'WHOM', 'THE', 'EGYPTIAN', 'RECORD', 'SPOKE'] +2961-961-0022-919: hyp=['THEN', 'NOW', 'LET', 'ME', 'EXPLAIN', 'TO', 'YOU', 'THE', 'ORDER', 'OF', 'OUR', 'ENTERTAINMENT', 'FIRST', 'TIMAEUS', 'WHO', 'IS', 'A', 'NATURAL', 'PHILOSOPHER', 'WILL', 'SPEAK', 'OF', 'THE', 'ORIGIN', 'OF', 'THE', 'WORLD', 'GOING', 'DOWN', 'TO', 'THE', 'CREATION', 'OF', 'MEN', 'AND', 'THEN', 'I', 'SHALL', 'RECEIVE', 'THE', 'MEN', 'WHOM', 'HE', 'HAS', 'CREATED', 'AND', 'SOME', 'OF', 'WHOM', 'WILL', 'HAVE', 'BEEN', 'EDUCATED', 'BY', 'YOU', 'AND', 'INTRODUCED', 'THEM', 'TO', 'YOU', 'AS', 'THE', 'LOST', 'ATHENIAN', 'CITIZENS', 'OF', 'WHOM', 'THE', 'EGYPTIAN', 'RECORDS', 'SPOKE'] +3570-5694-0000-920: ref=['BUT', 'ALREADY', 'AT', 'A', 'POINT', 'IN', 'ECONOMIC', 'EVOLUTION', 'FAR', 'ANTEDATING', 'THE', 'EMERGENCE', 'OF', 'THE', 'LADY', 'SPECIALISED', 'CONSUMPTION', 'OF', 'GOODS', 'AS', 'AN', 'EVIDENCE', 'OF', 'PECUNIARY', 'STRENGTH', 'HAD', 'BEGUN', 'TO', 'WORK', 'OUT', 'IN', 'A', 'MORE', 'OR', 'LESS', 'ELABORATE', 'SYSTEM'] +3570-5694-0000-920: hyp=['BUT', 'ALREADY', 'AT', 'A', 'POINT', 'IN', 'ECONOMIC', 'EVOLUTION', 'FAR', 'ANTEDATING', 'THE', 'EMERGENCE', 'OF', 'THE', 'LADY', 'SPECIALIZED', 'CONSUMPTION', 'OF', 'GOODS', 'AS', 'AN', 'EVIDENCE', 'OF', 'PECUNIARY', 'STRENGTH', 'HAD', 'BEGUN', 'TO', 'WORK', 'OUT', 'IN', 'A', 'MORE', 'OR', 'LESS', 'ELABORATE', 'SYSTEM'] +3570-5694-0001-921: ref=['THE', 'UTILITY', 'OF', 'CONSUMPTION', 'AS', 'AN', 'EVIDENCE', 'OF', 'WEALTH', 'IS', 'TO', 'BE', 'CLASSED', 'AS', 'A', 'DERIVATIVE', 'GROWTH'] +3570-5694-0001-921: hyp=['THE', 'UTILITY', 'OF', 'CONSUMPTION', 'AS', 'AN', 'EVIDENCE', 'OF', 'WEALTH', 'IS', 'TO', 'BE', 'CLASSED', 'AS', 'A', 'DERIVATIVE', 'GROWTH'] +3570-5694-0002-922: ref=['SUCH', 'CONSUMPTION', 'AS', 'FALLS', 'TO', 'THE', 'WOMEN', 'IS', 'MERELY', 'INCIDENTAL', 'TO', 'THEIR', 'WORK', 'IT', 'IS', 'A', 'MEANS', 'TO', 'THEIR', 'CONTINUED', 'LABOUR', 'AND', 'NOT', 'A', 'CONSUMPTION', 'DIRECTED', 'TO', 'THEIR', 'OWN', 'COMFORT', 'AND', 'FULNESS', 'OF', 'LIFE'] +3570-5694-0002-922: hyp=['SUCH', 'CONSUMPTION', 'AS', 'FALLS', 'TO', 'THE', 'WOMEN', 'IS', 'MERELY', 'INCIDENTAL', 'TO', 'THEIR', 'WORK', 'IT', 'IS', 'A', 'MEANS', 'TO', 'THEIR', 'CONTINUED', 'LABOR', 'AND', 'NOT', 'A', 'CONSUMPTION', 'DIRECTED', 'TO', 'THEIR', 'OWN', 'COMFORT', 'AND', 'FULLNESS', 'OF', 'LIFE'] +3570-5694-0003-923: ref=['WITH', 'A', 'FURTHER', 'ADVANCE', 'IN', 'CULTURE', 'THIS', 'TABU', 'MAY', 'CHANGE', 'INTO', 'SIMPLE', 'CUSTOM', 'OF', 'A', 'MORE', 'OR', 'LESS', 'RIGOROUS', 'CHARACTER', 'BUT', 'WHATEVER', 'BE', 'THE', 'THEORETICAL', 'BASIS', 'OF', 'THE', 'DISTINCTION', 'WHICH', 'IS', 'MAINTAINED', 'WHETHER', 'IT', 'BE', 'A', 'TABU', 'OR', 'A', 'LARGER', 'CONVENTIONALITY', 'THE', 'FEATURES', 'OF', 'THE', 'CONVENTIONAL', 'SCHEME', 'OF', 'CONSUMPTION', 'DO', 'NOT', 'CHANGE', 'EASILY'] +3570-5694-0003-923: hyp=['WITH', 'A', 'FURTHER', 'ADVANCE', 'IN', 'CULTURE', 'THIS', 'TABOO', 'MAY', 'CHANGE', 'INTO', 'SIMPLE', 'CUSTOM', 'OF', 'A', 'MORE', 'OR', 'LESS', 'RIGOROUS', 'CHARACTER', 'BUT', 'WHATEVER', 'BE', 'THE', 'THEORETICAL', 'BASIS', 'OF', 'THE', 'DISTINCTION', 'WHICH', 'IS', 'MAINTAINED', 'WHETHER', 'IT', 'BE', 'A', 'TABOO', 'OR', 'A', 'LARGER', 'CONVENTIONALITY', 'THE', 'FEATURES', 'OF', 'THE', 'CONVENTIONAL', 'SCHEME', 'OF', 'CONSUMPTION', 'DO', 'NOT', 'CHANGE', 'EASILY'] +3570-5694-0004-924: ref=['IN', 'THE', 'NATURE', 'OF', 'THINGS', 'LUXURIES', 'AND', 'THE', 'COMFORTS', 'OF', 'LIFE', 'BELONG', 'TO', 'THE', 'LEISURE', 'CLASS'] +3570-5694-0004-924: hyp=['IN', 'THE', 'NATURE', 'OF', 'THINGS', 'LUXURIES', 'AND', 'THE', 'COMFORTS', 'OF', 'LIFE', 'BELONG', 'TO', 'THE', 'LEISURE', 'CLASS'] +3570-5694-0005-925: ref=['UNDER', 'THE', 'TABU', 'CERTAIN', 'VICTUALS', 'AND', 'MORE', 'PARTICULARLY', 'CERTAIN', 'BEVERAGES', 'ARE', 'STRICTLY', 'RESERVED', 'FOR', 'THE', 'USE', 'OF', 'THE', 'SUPERIOR', 'CLASS'] +3570-5694-0005-925: hyp=['UNDER', 'THE', 'TABOO', 'CERTAIN', 'VICTUALS', 'AND', 'MORE', 'PARTICULARLY', 'CERTAIN', 'BEVERAGES', 'ARE', 'STRICTLY', 'RESERVED', 'FOR', 'THE', 'USE', 'OF', 'THE', 'SUPERIOR', 'CLASS'] +3570-5694-0006-926: ref=['DRUNKENNESS', 'AND', 'THE', 'OTHER', 'PATHOLOGICAL', 'CONSEQUENCES', 'OF', 'THE', 'FREE', 'USE', 'OF', 'STIMULANTS', 'THEREFORE', 'TEND', 'IN', 'THEIR', 'TURN', 'TO', 'BECOME', 'HONORIFIC', 'AS', 'BEING', 'A', 'MARK', 'AT', 'THE', 'SECOND', 'REMOVE', 'OF', 'THE', 'SUPERIOR', 'STATUS', 'OF', 'THOSE', 'WHO', 'ARE', 'ABLE', 'TO', 'AFFORD', 'THE', 'INDULGENCE'] +3570-5694-0006-926: hyp=['DRUNKENNESS', 'AND', 'THE', 'OTHER', 'PATHOLOGICAL', 'CONSEQUENCES', 'OF', 'THE', 'FREE', 'USE', 'OF', 'STIMULANTS', 'THEREFORE', 'TEND', 'IN', 'THEIR', 'TURN', 'TO', 'BECOME', 'HONORIFIC', 'AS', 'BEING', 'A', 'MARK', 'AT', 'THE', 'SECOND', 'REMOVE', 'OF', 'THE', 'SUPERIOR', 'STATUS', 'OF', 'THOSE', 'WHO', 'ARE', 'ABLE', 'TO', 'AFFORD', 'THE', 'INDULGENCE'] +3570-5694-0007-927: ref=['IT', 'HAS', 'EVEN', 'HAPPENED', 'THAT', 'THE', 'NAME', 'FOR', 'CERTAIN', 'DISEASED', 'CONDITIONS', 'OF', 'THE', 'BODY', 'ARISING', 'FROM', 'SUCH', 'AN', 'ORIGIN', 'HAS', 'PASSED', 'INTO', 'EVERYDAY', 'SPEECH', 'AS', 'A', 'SYNONYM', 'FOR', 'NOBLE', 'OR', 'GENTLE'] +3570-5694-0007-927: hyp=['IT', 'HAS', 'EVEN', 'HAPPENED', 'THAT', 'THE', 'NAME', 'FOR', 'CERTAIN', 'DISEASED', 'CONDITIONS', 'OF', 'THE', 'BODY', 'ARISING', 'FROM', 'SUCH', 'AN', 'ORIGIN', 'HAS', 'PASSED', 'INTO', 'EVERYDAY', 'SPEECH', 'AS', 'A', 'SYNONYM', 'FOR', 'NOBLE', 'OR', 'GENTLE'] +3570-5694-0008-928: ref=['THE', 'CONSUMPTION', 'OF', 'LUXURIES', 'IN', 'THE', 'TRUE', 'SENSE', 'IS', 'A', 'CONSUMPTION', 'DIRECTED', 'TO', 'THE', 'COMFORT', 'OF', 'THE', 'CONSUMER', 'HIMSELF', 'AND', 'IS', 'THEREFORE', 'A', 'MARK', 'OF', 'THE', 'MASTER'] +3570-5694-0008-928: hyp=['THE', 'CONSUMPTION', 'OF', 'LUXURIES', 'IN', 'THE', 'TRUE', 'SENSE', 'IS', 'A', 'CONSUMPTION', 'DIRECTED', 'TO', 'THE', 'COMFORT', 'OF', 'THE', 'CONSUMER', 'HIMSELF', 'AND', 'IS', 'THEREFORE', 'A', 'MARK', 'OF', 'THE', 'MASTER'] +3570-5694-0009-929: ref=['WITH', 'MANY', 'QUALIFICATIONS', 'WITH', 'MORE', 'QUALIFICATIONS', 'AS', 'THE', 'PATRIARCHAL', 'TRADITION', 'HAS', 'GRADUALLY', 'WEAKENED', 'THE', 'GENERAL', 'RULE', 'IS', 'FELT', 'TO', 'BE', 'RIGHT', 'AND', 'BINDING', 'THAT', 'WOMEN', 'SHOULD', 'CONSUME', 'ONLY', 'FOR', 'THE', 'BENEFIT', 'OF', 'THEIR', 'MASTERS'] +3570-5694-0009-929: hyp=['WITH', 'MANY', 'QUALIFICATIONS', 'WITH', 'MORE', 'QUALIFICATIONS', 'AS', 'THE', 'PATRIARCHAL', 'TRADITION', 'HAS', 'GRADUALLY', 'WEAKENED', 'THE', 'GENERAL', 'RULE', 'IS', 'FELT', 'TO', 'BE', 'RIGHT', 'AND', 'BINDING', 'THAT', 'WOMEN', 'SHOULD', 'CONSUME', 'ONLY', 'FOR', 'THE', 'BENEFIT', 'OF', 'THEIR', 'MASTERS'] +3570-5694-0010-930: ref=['THE', 'OBJECTION', 'OF', 'COURSE', 'PRESENTS', 'ITSELF', 'THAT', 'EXPENDITURE', 'ON', "WOMEN'S", 'DRESS', 'AND', 'HOUSEHOLD', 'PARAPHERNALIA', 'IS', 'AN', 'OBVIOUS', 'EXCEPTION', 'TO', 'THIS', 'RULE', 'BUT', 'IT', 'WILL', 'APPEAR', 'IN', 'THE', 'SEQUEL', 'THAT', 'THIS', 'EXCEPTION', 'IS', 'MUCH', 'MORE', 'OBVIOUS', 'THAN', 'SUBSTANTIAL'] +3570-5694-0010-930: hyp=['THE', 'OBJECTION', 'OF', 'COURSE', 'PRESENTS', 'ITSELF', 'THAT', 'EXPENDITURE', 'ON', "WOMEN'S", 'DRESS', 'AND', 'HOUSEHOLD', 'PARAPHERNALIA', 'IS', 'AN', 'OBVIOUS', 'EXCEPTION', 'TO', 'THIS', 'RULE', 'BUT', 'IT', 'WILL', 'APPEAR', 'IN', 'THE', 'SEQUEL', 'THAT', 'THIS', 'EXCEPTION', 'IS', 'MUCH', 'MORE', 'OBVIOUS', 'THAN', 'SUBSTANTIAL'] +3570-5694-0011-931: ref=['THE', 'CUSTOM', 'OF', 'FESTIVE', 'GATHERINGS', 'PROBABLY', 'ORIGINATED', 'IN', 'MOTIVES', 'OF', 'CONVIVIALITY', 'AND', 'RELIGION', 'THESE', 'MOTIVES', 'ARE', 'ALSO', 'PRESENT', 'IN', 'THE', 'LATER', 'DEVELOPMENT', 'BUT', 'THEY', 'DO', 'NOT', 'CONTINUE', 'TO', 'BE', 'THE', 'SOLE', 'MOTIVES'] +3570-5694-0011-931: hyp=['THE', 'CUSTOM', 'OF', 'FESTIVE', 'GATHERINGS', 'PROBABLY', 'ORIGINATED', 'IN', 'MOTIVES', 'OF', 'CONVIVIALITY', 'AND', 'RELIGION', 'THESE', 'MOTIVES', 'ARE', 'ALSO', 'PRESENT', 'IN', 'THE', 'LATER', 'DEVELOPMENT', 'BUT', 'THEY', 'DO', 'NOT', 'CONTINUE', 'TO', 'BE', 'THE', 'SOLE', 'MOTIVES'] +3570-5694-0012-932: ref=['THERE', 'IS', 'A', 'MORE', 'OR', 'LESS', 'ELABORATE', 'SYSTEM', 'OF', 'RANK', 'AND', 'GRADES'] +3570-5694-0012-932: hyp=['THERE', 'IS', 'A', 'MORE', 'OR', 'LESS', 'ELABORATE', 'SYSTEM', 'OF', 'RANK', 'AND', 'GRADES'] +3570-5694-0013-933: ref=['THIS', 'DIFFERENTIATION', 'IS', 'FURTHERED', 'BY', 'THE', 'INHERITANCE', 'OF', 'WEALTH', 'AND', 'THE', 'CONSEQUENT', 'INHERITANCE', 'OF', 'GENTILITY'] +3570-5694-0013-933: hyp=['THIS', 'DIFFERENTIATION', 'IS', 'FURTHERED', 'BY', 'THE', 'INHERITANCE', 'OF', 'WEALTH', 'AND', 'THE', 'CONSEQUENT', 'INHERITANCE', 'OF', 'GENTILITY'] +3570-5694-0014-934: ref=['MANY', 'OF', 'THESE', 'AFFILIATED', 'GENTLEMEN', 'OF', 'LEISURE', 'ARE', 'AT', 'THE', 'SAME', 'TIME', 'LESSER', 'MEN', 'OF', 'SUBSTANCE', 'IN', 'THEIR', 'OWN', 'RIGHT', 'SO', 'THAT', 'SOME', 'OF', 'THEM', 'ARE', 'SCARCELY', 'AT', 'ALL', 'OTHERS', 'ONLY', 'PARTIALLY', 'TO', 'BE', 'RATED', 'AS', 'VICARIOUS', 'CONSUMERS'] +3570-5694-0014-934: hyp=['MANY', 'OF', 'THESE', 'AFFILIATED', 'GENTLEMEN', 'OF', 'LEISURE', 'ARE', 'AT', 'THE', 'SAME', 'TIME', 'LESSER', 'MEN', 'OF', 'SUBSTANCE', 'IN', 'THEIR', 'OWN', 'RIGHT', 'SO', 'THAT', 'SOME', 'OF', 'THEM', 'ARE', 'SCARCELY', 'AT', 'ALL', 'OTHERS', 'ONLY', 'PARTIALLY', 'TO', 'BE', 'RATED', 'AS', 'VICARIOUS', 'CONSUMERS'] +3570-5694-0015-935: ref=['SO', 'MANY', 'OF', 'THEM', 'HOWEVER', 'AS', 'MAKE', 'UP', 'THE', 'RETAINER', 'AND', 'HANGERS', 'ON', 'OF', 'THE', 'PATRON', 'MAY', 'BE', 'CLASSED', 'AS', 'VICARIOUS', 'CONSUMER', 'WITHOUT', 'QUALIFICATION'] +3570-5694-0015-935: hyp=['SO', 'MANY', 'OF', 'THEM', 'HOWEVER', 'AS', 'MAKE', 'UP', 'THE', 'RETAINER', 'AND', 'HANGERS', 'ON', 'OF', 'THE', 'PATRON', 'MAY', 'BE', 'CLASSED', 'AS', 'VICARIOUS', 'CONSUMER', 'WITHOUT', 'QUALIFICATION'] +3570-5694-0016-936: ref=['MANY', 'OF', 'THESE', 'AGAIN', 'AND', 'ALSO', 'MANY', 'OF', 'THE', 'OTHER', 'ARISTOCRACY', 'OF', 'LESS', 'DEGREE', 'HAVE', 'IN', 'TURN', 'ATTACHED', 'TO', 'THEIR', 'PERSONS', 'A', 'MORE', 'OR', 'LESS', 'COMPREHENSIVE', 'GROUP', 'OF', 'VICARIOUS', 'CONSUMER', 'IN', 'THE', 'PERSONS', 'OF', 'THEIR', 'WIVES', 'AND', 'CHILDREN', 'THEIR', 'SERVANTS', 'RETAINERS', 'ET', 'CETERA'] +3570-5694-0016-936: hyp=['MANY', 'OF', 'THESE', 'AGAIN', 'AND', 'ALSO', 'MANY', 'OF', 'THE', 'OTHER', 'ARISTOCRACY', 'OF', 'LESS', 'DEGREE', 'HAVE', 'IN', 'TURN', 'ATTACHED', 'TO', 'THEIR', 'PERSONS', 'A', 'MORE', 'OR', 'LESS', 'COMPREHENSIVE', 'GROUP', 'OF', 'VICARIOUS', 'CONSUMER', 'IN', 'THE', 'PERSONS', 'OF', 'THEIR', 'WIVES', 'AND', 'CHILDREN', 'THEIR', 'SERVANTS', 'RETAINERS', 'ET', 'CETERA'] +3570-5694-0017-937: ref=['THE', 'WEARING', 'OF', 'UNIFORMS', 'OR', 'LIVERIES', 'IMPLIES', 'A', 'CONSIDERABLE', 'DEGREE', 'OF', 'DEPENDENCE', 'AND', 'MAY', 'EVEN', 'BE', 'SAID', 'TO', 'BE', 'A', 'MARK', 'OF', 'SERVITUDE', 'REAL', 'OR', 'OSTENSIBLE'] +3570-5694-0017-937: hyp=['THE', 'WEARING', 'OF', 'UNIFORMS', 'OR', 'LIVERIES', 'IMPLIES', 'A', 'CONSIDERABLE', 'DEGREE', 'OF', 'DEPENDENCE', 'AND', 'MAY', 'EVEN', 'BE', 'SAID', 'TO', 'BE', 'A', 'MARK', 'OF', 'SERVITUDE', 'REAL', 'OR', 'OSTENSIBLE'] +3570-5694-0018-938: ref=['THE', 'WEARERS', 'OF', 'UNIFORMS', 'AND', 'LIVERIES', 'MAY', 'BE', 'ROUGHLY', 'DIVIDED', 'INTO', 'TWO', 'CLASSES', 'THE', 'FREE', 'AND', 'THE', 'SERVILE', 'OR', 'THE', 'NOBLE', 'AND', 'THE', 'IGNOBLE'] +3570-5694-0018-938: hyp=['THE', 'WEARERS', 'OF', 'UNIFORMS', 'AND', 'LIVERIES', 'MAY', 'BE', 'ROUGHLY', 'DIVIDED', 'INTO', 'TWO', 'CLASSES', 'THE', 'FREE', 'AND', 'THE', 'SERVILE', 'OR', 'THE', 'NOBLE', 'AND', 'THE', 'IGNOBLE'] +3570-5694-0019-939: ref=['BUT', 'THE', 'GENERAL', 'DISTINCTION', 'IS', 'NOT', 'ON', 'THAT', 'ACCOUNT', 'TO', 'BE', 'OVERLOOKED'] +3570-5694-0019-939: hyp=['BUT', 'THE', 'GENERAL', 'DISTINCTION', 'IS', 'NOT', 'ON', 'THAT', 'ACCOUNT', 'TO', 'BE', 'OVERLOOKED'] +3570-5694-0020-940: ref=['SO', 'THOSE', 'OFFICES', 'WHICH', 'ARE', 'BY', 'RIGHT', 'THE', 'PROPER', 'EMPLOYMENT', 'OF', 'THE', 'LEISURE', 'CLASS', 'ARE', 'NOBLE', 'SUCH', 'AS', 'GOVERNMENT', 'FIGHTING', 'HUNTING', 'THE', 'CARE', 'OF', 'ARMS', 'AND', 'ACCOUTREMENTS', 'AND', 'THE', 'LIKE', 'IN', 'SHORT', 'THOSE', 'WHICH', 'MAY', 'BE', 'CLASSED', 'AS', 'OSTENSIBLY', 'PREDATORY', 'EMPLOYMENTS'] +3570-5694-0020-940: hyp=['SO', 'THOSE', 'OFFICES', 'WHICH', 'ARE', 'BY', 'RIGHT', 'THE', 'PROPER', 'EMPLOYMENT', 'OF', 'THE', 'LEISURE', 'CLASS', 'ARE', 'NOBLE', 'SUCH', 'AS', 'GOVERNMENT', 'FIGHTING', 'HUNTING', 'THE', 'CARE', 'OF', 'ARMS', 'AND', 'ACCOUTREMENTS', 'AND', 'THE', 'LIKE', 'IN', 'SHORT', 'THOSE', 'WHICH', 'MAY', 'BE', 'CLASSED', 'AS', 'OSTENSIBLY', 'PREDATORY', 'EMPLOYMENTS'] +3570-5694-0021-941: ref=['WHENEVER', 'AS', 'IN', 'THESE', 'CASES', 'THE', 'MENIAL', 'SERVICE', 'IN', 'QUESTION', 'HAS', 'TO', 'DO', 'DIRECTLY', 'WITH', 'THE', 'PRIMARY', 'LEISURE', 'EMPLOYMENTS', 'OF', 'FIGHTING', 'AND', 'HUNTING', 'IT', 'EASILY', 'ACQUIRES', 'A', 'REFLECTED', 'HONORIFIC', 'CHARACTER'] +3570-5694-0021-941: hyp=['WHENEVER', 'AS', 'IN', 'THESE', 'CASES', 'THE', 'MENIAL', 'SERVICE', 'IN', 'QUESTION', 'HAS', 'TO', 'DO', 'DIRECTLY', 'WITH', 'THE', 'PRIMARY', 'LEISURE', 'EMPLOYMENTS', 'OF', 'FIGHTING', 'AND', 'HUNTING', 'IT', 'EASILY', 'ACQUIRES', 'A', 'REFLECTED', 'HONORIFIC', 'CHARACTER'] +3570-5694-0022-942: ref=['THE', 'LIVERY', 'BECOMES', 'OBNOXIOUS', 'TO', 'NEARLY', 'ALL', 'WHO', 'ARE', 'REQUIRED', 'TO', 'WEAR', 'IT'] +3570-5694-0022-942: hyp=['THE', 'LIVERY', 'BECOMES', 'OBNOXIOUS', 'TO', 'NEARLY', 'ALL', 'WHO', 'ARE', 'REQUIRED', 'TO', 'WEAR', 'IT'] +3570-5695-0000-943: ref=['IN', 'A', 'GENERAL', 'WAY', 'THOUGH', 'NOT', 'WHOLLY', 'NOR', 'CONSISTENTLY', 'THESE', 'TWO', 'GROUPS', 'COINCIDE'] +3570-5695-0000-943: hyp=['IN', 'A', 'GENERAL', 'WAY', 'THOUGH', 'NOT', 'WHOLLY', 'NOR', 'CONSISTENTLY', 'THESE', 'TWO', 'GROUPS', 'COINCIDE'] +3570-5695-0001-944: ref=['THE', 'DEPENDENT', 'WHO', 'WAS', 'FIRST', 'DELEGATED', 'FOR', 'THESE', 'DUTIES', 'WAS', 'THE', 'WIFE', 'OR', 'THE', 'CHIEF', 'WIFE', 'AND', 'AS', 'WOULD', 'BE', 'EXPECTED', 'IN', 'THE', 'LATER', 'DEVELOPMENT', 'OF', 'THE', 'INSTITUTION', 'WHEN', 'THE', 'NUMBER', 'OF', 'PERSONS', 'BY', 'WHOM', 'THESE', 'DUTIES', 'ARE', 'CUSTOMARILY', 'PERFORMED', 'GRADUALLY', 'NARROWS', 'THE', 'WIFE', 'REMAINS', 'THE', 'LAST'] +3570-5695-0001-944: hyp=['THE', 'DEPENDENT', 'WHO', 'WAS', 'FIRST', 'DELEGATED', 'FOR', 'THESE', 'DUTIES', 'WAS', 'THE', 'WIFE', 'OR', 'THE', 'CHIEF', 'WIFE', 'AND', 'AS', 'WOULD', 'BE', 'EXPECTED', 'IN', 'THE', 'LATER', 'DEVELOPMENT', 'OF', 'THE', 'INSTITUTION', 'WHEN', 'THE', 'NUMBER', 'OF', 'PERSONS', 'BY', 'WHOM', 'THESE', 'DUTIES', 'ARE', 'CUSTOMARILY', 'PERFORMED', 'GRADUALLY', 'NARROWS', 'THE', 'WIFE', 'REMAINS', 'THE', 'LAST'] +3570-5695-0002-945: ref=['BUT', 'AS', 'WE', 'DESCEND', 'THE', 'SOCIAL', 'SCALE', 'THE', 'POINT', 'IS', 'PRESENTLY', 'REACHED', 'WHERE', 'THE', 'DUTIES', 'OF', 'VICARIOUS', 'LEISURE', 'AND', 'CONSUMPTION', 'DEVOLVE', 'UPON', 'THE', 'WIFE', 'ALONE'] +3570-5695-0002-945: hyp=['BUT', 'AS', 'WE', 'DESCEND', 'THE', 'SOCIAL', 'SCALE', 'THE', 'POINT', 'IS', 'PRESENTLY', 'REACHED', 'WHERE', 'THE', 'DUTIES', 'OF', 'VICARIOUS', 'LEISURE', 'AND', 'CONSUMPTION', 'DEVOLVE', 'UPON', 'THE', 'WIFE', 'ALONE'] +3570-5695-0003-946: ref=['IN', 'THE', 'COMMUNITIES', 'OF', 'THE', 'WESTERN', 'CULTURE', 'THIS', 'POINT', 'IS', 'AT', 'PRESENT', 'FOUND', 'AMONG', 'THE', 'LOWER', 'MIDDLE', 'CLASS'] +3570-5695-0003-946: hyp=['IN', 'THE', 'COMMUNITIES', 'OF', 'THE', 'WESTERN', 'CULTURE', 'THIS', 'POINT', 'IS', 'AT', 'PRESENT', 'FOUND', 'AMONG', 'THE', 'LOWER', 'MIDDLE', 'CLASSES'] +3570-5695-0004-947: ref=['IF', 'BEAUTY', 'OR', 'COMFORT', 'IS', 'ACHIEVED', 'AND', 'IT', 'IS', 'A', 'MORE', 'OR', 'LESS', 'FORTUITOUS', 'CIRCUMSTANCE', 'IF', 'THEY', 'ARE', 'THEY', 'MUST', 'BE', 'ACHIEVED', 'BY', 'MEANS', 'AND', 'METHODS', 'THAT', 'COMMEND', 'THEMSELVES', 'TO', 'THE', 'GREAT', 'ECONOMIC', 'LAW', 'OF', 'WASTED', 'EFFORT'] +3570-5695-0004-947: hyp=['IF', 'BEAUTY', 'OR', 'COMFORT', 'IS', 'ACHIEVED', 'AND', 'IT', 'IS', 'A', 'MORE', 'OR', 'LESS', 'FORTUITOUS', 'CIRCUMSTANCE', 'IF', 'THEY', 'ARE', 'THEY', 'MUST', 'BE', 'ACHIEVED', 'BY', 'MEANS', 'AND', 'METHODS', 'THAT', 'COMMEND', 'THEMSELVES', 'TO', 'THE', 'GREAT', 'ECONOMIC', 'LAW', 'OF', 'WASTED', 'EFFORT'] +3570-5695-0005-948: ref=['THE', 'MAN', 'OF', 'THE', 'HOUSEHOLD', 'ALSO', 'CAN', 'DO', 'SOMETHING', 'IN', 'THIS', 'DIRECTION', 'AND', 'INDEED', 'HE', 'COMMONLY', 'DOES', 'BUT', 'WITH', 'A', 'STILL', 'LOWER', 'DESCENT', 'INTO', 'THE', 'LEVELS', 'OF', 'INDIGENCE', 'ALONG', 'THE', 'MARGIN', 'OF', 'THE', 'SLUMS', 'THE', 'MAN', 'AND', 'PRESENTLY', 'ALSO', 'THE', 'CHILDREN', 'VIRTUALLY', 'CEASE', 'TO', 'CONSUME', 'VALUABLE', 'GOODS', 'FOR', 'APPEARANCES', 'AND', 'THE', 'WOMAN', 'REMAINS', 'VIRTUALLY', 'THE', 'SOLE', 'EXPONENT', 'OF', 'THE', "HOUSEHOLD'S", 'PECUNIARY', 'DECENCY'] +3570-5695-0005-948: hyp=['THE', 'MAN', 'OF', 'THE', 'HOUSEHOLD', 'ALSO', 'CAN', 'DO', 'SOMETHING', 'IN', 'THIS', 'DIRECTION', 'AND', 'INDEED', 'HE', 'COMMONLY', 'DOES', 'BUT', 'WITH', 'A', 'STILL', 'LOWER', 'DESCENT', 'INTO', 'THE', 'LEVELS', 'OF', 'INDIGENCE', 'ALONG', 'THE', 'MARGIN', 'OF', 'THE', 'SLUMS', 'THE', 'MAN', 'AND', 'PRESENTLY', 'ALSO', 'THE', 'CHILDREN', 'VIRTUALLY', 'CEASE', 'TO', 'CONSUME', 'VALUABLE', 'GOODS', 'FOR', 'APPEARANCES', 'AND', 'THE', 'WOMAN', 'REMAINS', 'VIRTUALLY', 'THE', 'SOLE', 'EXPONENT', 'OF', 'THE', "HOUSEHOLD'S", 'PECUNIARY', 'DECENCY'] +3570-5695-0006-949: ref=['VERY', 'MUCH', 'OF', 'SQUALOR', 'AND', 'DISCOMFORT', 'WILL', 'BE', 'ENDURED', 'BEFORE', 'THE', 'LAST', 'TRINKET', 'OR', 'THE', 'LAST', 'PRETENSE', 'OF', 'PECUNIARY', 'DECENCY', 'IS', 'PUT', 'AWAY'] +3570-5695-0006-949: hyp=['VERY', 'MUCH', 'OF', 'SQUALOR', 'AND', 'DISCOMFORT', 'WILL', 'BE', 'ENDURED', 'BEFORE', 'THE', 'LAST', 'TRINKET', 'OR', 'THE', 'LAST', 'PRETENCE', 'OF', 'PECUNIARY', 'DECENCY', 'IS', 'PUT', 'AWAY'] +3570-5695-0007-950: ref=['THERE', 'IS', 'NO', 'CLASS', 'AND', 'NO', 'COUNTRY', 'THAT', 'HAS', 'YIELDED', 'SO', 'ABJECTLY', 'BEFORE', 'THE', 'PRESSURE', 'OF', 'PHYSICAL', 'WANT', 'AS', 'TO', 'DENY', 'THEMSELVES', 'ALL', 'GRATIFICATION', 'OF', 'THIS', 'HIGHER', 'OR', 'SPIRITUAL', 'NEED'] +3570-5695-0007-950: hyp=['THERE', 'IS', 'NO', 'CLASS', 'IN', 'NO', 'COUNTRY', 'THAT', 'HAS', 'YIELDED', 'SO', 'ABJECTLY', 'BEFORE', 'THE', 'PRESSURE', 'OF', 'PHYSICAL', 'WANT', 'AS', 'TO', 'DENY', 'THEMSELVES', 'ALL', 'GRATIFICATION', 'OF', 'THIS', 'HIGHER', 'OR', 'SPIRITUAL', 'NEED'] +3570-5695-0008-951: ref=['THE', 'QUESTION', 'IS', 'WHICH', 'OF', 'THE', 'TWO', 'METHODS', 'WILL', 'MOST', 'EFFECTIVELY', 'REACH', 'THE', 'PERSONS', 'WHOSE', 'CONVICTIONS', 'IT', 'IS', 'DESIRED', 'TO', 'AFFECT'] +3570-5695-0008-951: hyp=['THE', 'QUESTION', 'IS', 'WHICH', 'OF', 'THE', 'TWO', 'METHODS', 'WILL', 'MOST', 'EFFECTIVELY', 'REACH', 'THE', 'PERSONS', 'WHOSE', 'CONVICTIONS', 'IT', 'IS', 'DESIRED', 'TO', 'EFFECT'] +3570-5695-0009-952: ref=['EACH', 'WILL', 'THEREFORE', 'SERVE', 'ABOUT', 'EQUALLY', 'WELL', 'DURING', 'THE', 'EARLIER', 'STAGES', 'OF', 'SOCIAL', 'GROWTH'] +3570-5695-0009-952: hyp=['EACH', 'WILL', 'THEREFORE', 'SERVE', 'ABOUT', 'EQUALLY', 'WELL', 'DURING', 'THE', 'EARLIER', 'STAGES', 'OF', 'SOCIAL', 'GROWTH'] +3570-5695-0010-953: ref=['THE', 'MODERN', 'ORGANIZATION', 'OF', 'INDUSTRY', 'WORKS', 'IN', 'THE', 'SAME', 'DIRECTION', 'ALSO', 'BY', 'ANOTHER', 'LINE'] +3570-5695-0010-953: hyp=['THE', 'MODERN', 'ORGANIZATION', 'OF', 'INDUSTRY', 'WORKS', 'IN', 'THE', 'SAME', 'DIRECTION', 'ALSO', 'BY', 'ANOTHER', 'LINE'] +3570-5695-0011-954: ref=['IT', 'IS', 'EVIDENT', 'THEREFORE', 'THAT', 'THE', 'PRESENT', 'TREND', 'OF', 'THE', 'DEVELOPMENT', 'IS', 'IN', 'THE', 'DIRECTION', 'OF', 'HEIGHTENING', 'THE', 'UTILITY', 'OF', 'CONSPICUOUS', 'CONSUMPTION', 'AS', 'COMPARED', 'WITH', 'LEISURE'] +3570-5695-0011-954: hyp=['IT', 'IS', 'EVIDENT', 'THEREFORE', 'THAT', 'THE', 'PRESENT', 'TREND', 'OF', 'THE', 'DEVELOPMENT', 'IS', 'IN', 'THE', 'DIRECTION', 'OF', 'HEIGHTENING', 'THE', 'UTILITY', 'OF', 'CONSPICUOUS', 'CONSUMPTION', 'AS', 'COMPARED', 'WITH', 'LEISURE'] +3570-5695-0012-955: ref=['IT', 'IS', 'ALSO', 'NOTICEABLE', 'THAT', 'THE', 'SERVICEABILITY', 'OF', 'CONSUMPTION', 'AS', 'A', 'MEANS', 'OF', 'REPUTE', 'AS', 'WELL', 'AS', 'THE', 'INSISTENCE', 'ON', 'IT', 'AS', 'AN', 'ELEMENT', 'OF', 'DECENCY', 'IS', 'AT', 'ITS', 'BEST', 'IN', 'THOSE', 'PORTIONS', 'OF', 'THE', 'COMMUNITY', 'WHERE', 'THE', 'HUMAN', 'CONTACT', 'OF', 'THE', 'INDIVIDUAL', 'IS', 'WIDEST', 'AND', 'THE', 'MOBILITY', 'OF', 'THE', 'POPULATION', 'IS', 'GREATEST'] +3570-5695-0012-955: hyp=['IT', 'IS', 'ALSO', 'NOTICEABLE', 'THAT', 'THE', 'SERVICEABILITY', 'OF', 'CONSUMPTION', 'AS', 'A', 'MEANS', 'OF', 'REPUTE', 'AS', 'WELL', 'AS', 'THE', 'INSISTENCE', 'ON', 'IT', 'AS', 'AN', 'ELEMENT', 'OF', 'DECENCY', 'IS', 'AT', 'ITS', 'BEST', 'IN', 'THOSE', 'PORTIONS', 'OF', 'THE', 'COMMUNITY', 'WHERE', 'THE', 'HUMAN', 'CONTACT', 'OF', 'THE', 'INDIVIDUAL', 'IS', 'WIDEST', 'AND', 'THE', 'MOBILITY', 'OF', 'THE', 'POPULATION', 'IS', 'GREATEST'] +3570-5695-0013-956: ref=['CONSUMPTION', 'BECOMES', 'A', 'LARGER', 'ELEMENT', 'IN', 'THE', 'STANDARD', 'OF', 'LIVING', 'IN', 'THE', 'CITY', 'THAN', 'IN', 'THE', 'COUNTRY'] +3570-5695-0013-956: hyp=['CONSUMPTION', 'BECOMES', 'A', 'LARGER', 'ELEMENT', 'IN', 'THE', 'STANDARD', 'OF', 'LIVING', 'IN', 'THE', 'CITY', 'THAN', 'IN', 'THE', 'COUNTRY'] +3570-5695-0014-957: ref=['AMONG', 'THE', 'COUNTRY', 'POPULATION', 'ITS', 'PLACE', 'IS', 'TO', 'SOME', 'EXTENT', 'TAKEN', 'BY', 'SAVINGS', 'AND', 'HOME', 'COMFORTS', 'KNOWN', 'THROUGH', 'THE', 'MEDIUM', 'OF', 'NEIGHBORHOOD', 'GOSSIP', 'SUFFICIENTLY', 'TO', 'SERVE', 'THE', 'LIKE', 'GENERAL', 'PURPOSE', 'OF', 'PECUNIARY', 'REPUTE'] +3570-5695-0014-957: hyp=['AMONG', 'THE', 'COUNTRY', 'POPULATION', 'ITS', 'PLACE', 'IS', 'TO', 'SOME', 'EXTENT', 'TAKEN', 'BY', 'SAVINGS', 'AND', 'HOME', 'COMFORTS', 'KNOWN', 'THROUGH', 'THE', 'MEDIUM', 'OF', 'NEIGHBOURHOOD', 'GOSSIP', 'SUFFICIENTLY', 'TO', 'SERVE', 'THE', 'LIKE', 'GENERAL', 'PURPOSE', 'OF', 'PECUNIARY', 'REPUTE'] +3570-5695-0015-958: ref=['THE', 'RESULT', 'IS', 'A', 'GREAT', 'MOBILITY', 'OF', 'THE', 'LABOR', 'EMPLOYED', 'IN', 'PRINTING', 'PERHAPS', 'GREATER', 'THAN', 'IN', 'ANY', 'OTHER', 'EQUALLY', 'WELL', 'DEFINED', 'AND', 'CONSIDERABLE', 'BODY', 'OF', 'WORKMEN'] +3570-5695-0015-958: hyp=['THE', 'RESULT', 'IS', 'A', 'GREAT', 'MOBILITY', 'OF', 'THE', 'LABOR', 'EMPLOYED', 'IN', 'PRINTING', 'PERHAPS', 'GREATER', 'THAN', 'IN', 'ANY', 'OTHER', 'EQUALLY', 'WELL', 'DEFINED', 'AND', 'CONSIDERABLE', 'BODY', 'OF', 'WORKMEN'] +3570-5696-0000-959: ref=['UNDER', 'THE', 'SIMPLE', 'TEST', 'OF', 'EFFECTIVENESS', 'FOR', 'ADVERTISING', 'WE', 'SHOULD', 'EXPECT', 'TO', 'FIND', 'LEISURE', 'AND', 'THE', 'CONSPICUOUS', 'CONSUMPTION', 'OF', 'GOODS', 'DIVIDING', 'THE', 'FIELD', 'OF', 'PECUNIARY', 'EMULATION', 'PRETTY', 'EVENLY', 'BETWEEN', 'THEM', 'AT', 'THE', 'OUTSET'] +3570-5696-0000-959: hyp=['UNDER', 'THE', 'SIMPLE', 'TEST', 'OF', 'EFFECTIVENESS', 'FOR', 'ADVERTISING', 'WE', 'SHOULD', 'EXPECT', 'TO', 'FIND', 'LEISURE', 'AND', 'THE', 'CONSPICUOUS', 'CONSUMPTION', 'OF', 'GOODS', 'DIVIDING', 'THE', 'FIELD', 'OF', 'PECUNIARY', 'EMULATION', 'PRETTY', 'EVENLY', 'BETWEEN', 'THEM', 'AT', 'THE', 'OUTSET'] +3570-5696-0001-960: ref=['BUT', 'THE', 'ACTUAL', 'COURSE', 'OF', 'DEVELOPMENT', 'HAS', 'BEEN', 'SOMEWHAT', 'DIFFERENT', 'FROM', 'THIS', 'IDEAL', 'SCHEME', 'LEISURE', 'HELD', 'THE', 'FIRST', 'PLACE', 'AT', 'THE', 'START', 'AND', 'CAME', 'TO', 'HOLD', 'A', 'RANK', 'VERY', 'MUCH', 'ABOVE', 'WASTEFUL', 'CONSUMPTION', 'OF', 'GOODS', 'BOTH', 'AS', 'A', 'DIRECT', 'EXPONENT', 'OF', 'WEALTH', 'AND', 'AS', 'AN', 'ELEMENT', 'IN', 'THE', 'STANDARD', 'OF', 'DECENCY', 'DURING', 'THE', 'QUASI', 'PEACEABLE', 'CULTURE'] +3570-5696-0001-960: hyp=['BUT', 'THE', 'ACTUAL', 'COURSE', 'OF', 'DEVELOPMENT', 'HAS', 'BEEN', 'SOMEWHAT', 'DIFFERENT', 'FROM', 'THIS', 'IDEAL', 'SCHEME', 'LEISURE', 'HELD', 'THE', 'FIRST', 'PLACE', 'AT', 'THE', 'START', 'AND', 'CAME', 'TO', 'HOLD', 'A', 'RANK', 'VERY', 'MUCH', 'ABOVE', 'WASTEFUL', 'CONSUMPTION', 'OF', 'GOODS', 'BOTH', 'AS', 'A', 'DIRECT', 'EXPONENT', 'OF', 'WEALTH', 'AND', 'AS', 'AN', 'ELEMENT', 'IN', 'THE', 'STANDARD', 'OF', 'DECENCY', 'DURING', 'THE', 'QUARSAI', 'PEACEABLE', 'CULTURE'] +3570-5696-0002-961: ref=['OTHER', 'CIRCUMSTANCES', 'PERMITTING', 'THAT', 'INSTINCT', 'DISPOSES', 'MEN', 'TO', 'LOOK', 'WITH', 'FAVOR', 'UPON', 'PRODUCTIVE', 'EFFICIENCY', 'AND', 'ON', 'WHATEVER', 'IS', 'OF', 'HUMAN', 'USE'] +3570-5696-0002-961: hyp=['ARE', 'THE', 'CIRCUMSTANCES', 'PERMITTING', 'THAT', 'INSTINCT', 'DISPOSES', 'MEN', 'TO', 'LOOK', 'WITH', 'FAVOR', 'UPON', 'PRODUCTIVE', 'EFFICIENCY', 'AND', 'ON', 'WHATEVER', 'IS', 'OF', 'HUMAN', 'USE'] +3570-5696-0003-962: ref=['A', 'RECONCILIATION', 'BETWEEN', 'THE', 'TWO', 'CONFLICTING', 'REQUIREMENTS', 'IS', 'EFFECTED', 'BY', 'A', 'RESORT', 'TO', 'MAKE', 'BELIEVE', 'MANY', 'AND', 'INTRICATE', 'POLITE', 'OBSERVANCES', 'AND', 'SOCIAL', 'DUTIES', 'OF', 'A', 'CEREMONIAL', 'NATURE', 'ARE', 'DEVELOPED', 'MANY', 'ORGANIZATIONS', 'ARE', 'FOUNDED', 'WITH', 'SOME', 'SPECIOUS', 'OBJECT', 'OF', 'AMELIORATION', 'EMBODIED', 'IN', 'THEIR', 'OFFICIAL', 'STYLE', 'AND', 'TITLE', 'THERE', 'IS', 'MUCH', 'COMING', 'AND', 'GOING', 'AND', 'A', 'DEAL', 'OF', 'TALK', 'TO', 'THE', 'END', 'THAT', 'THE', 'TALKERS', 'MAY', 'NOT', 'HAVE', 'OCCASION', 'TO', 'REFLECT', 'ON', 'WHAT', 'IS', 'THE', 'EFFECTUAL', 'ECONOMIC', 'VALUE', 'OF', 'THEIR', 'TRAFFIC'] +3570-5696-0003-962: hyp=['A', 'RECONCILIATION', 'BETWEEN', 'THE', 'TWO', 'CONFLICTING', 'REQUIREMENTS', 'IS', 'EFFECTED', 'BY', 'A', 'RESORT', 'TO', 'MAKE', 'BELIEVE', 'MANY', 'AND', 'INTRICATE', 'POLITE', 'OBSERVANCES', 'AND', 'SOCIAL', 'DUTIES', 'OF', 'A', 'CEREMONIAL', 'NATURE', 'ARE', 'DEVELOPED', 'MANY', 'ORGANIZATIONS', 'ARE', 'FOUNDED', 'WITH', 'SOME', 'SPECIOUS', 'OBJECT', 'OF', 'AMELIORATION', 'EMBODIED', 'IN', 'THEIR', 'OFFICIAL', 'STYLE', 'AND', 'TITLE', 'THERE', 'IS', 'MUCH', 'COMING', 'AND', 'GOING', 'AND', 'A', 'DEAL', 'OF', 'TALK', 'TO', 'THE', 'END', 'THAT', 'THE', 'TALKERS', 'MAY', 'NOT', 'HAVE', 'OCCASION', 'TO', 'REFLECT', 'ON', 'WHAT', 'IS', 'THE', 'EFFECTUAL', 'ECONOMIC', 'VALUE', 'OF', 'THEIR', 'TRAFFIC'] +3570-5696-0004-963: ref=['THE', 'SALIENT', 'FEATURES', 'OF', 'THIS', 'DEVELOPMENT', 'OF', 'DOMESTIC', 'SERVICE', 'HAVE', 'ALREADY', 'BEEN', 'INDICATED'] +3570-5696-0004-963: hyp=['THE', 'SALIENT', 'FEATURES', 'OF', 'THIS', 'DEVELOPMENT', 'OF', 'DOMESTIC', 'SERVICE', 'HAVE', 'ALREADY', 'BEEN', 'INDICATED'] +3570-5696-0005-964: ref=['THROUGHOUT', 'THE', 'ENTIRE', 'EVOLUTION', 'OF', 'CONSPICUOUS', 'EXPENDITURE', 'WHETHER', 'OF', 'GOODS', 'OR', 'OF', 'SERVICES', 'OR', 'HUMAN', 'LIFE', 'RUNS', 'THE', 'OBVIOUS', 'IMPLICATION', 'THAT', 'IN', 'ORDER', 'TO', 'EFFECTUALLY', 'MEND', 'THE', "CONSUMER'S", 'GOOD', 'FAME', 'IT', 'MUST', 'BE', 'AN', 'EXPENDITURE', 'OF', 'SUPERFLUITIES'] +3570-5696-0005-964: hyp=['THROUGHOUT', 'THE', 'ENTIRE', 'REVOLUTION', 'OF', 'CONSPICUOUS', 'EXPENDITURE', 'WHETHER', 'OF', 'GOODS', 'OR', 'OF', 'SERVICES', 'OR', 'HUMAN', 'LIFE', 'RUNS', 'THE', 'OBVIOUS', 'IMPLICATION', 'THAT', 'IN', 'ORDER', 'TO', 'EFFECTUALLY', 'MEND', 'THE', "CONSUMER'S", 'GOOD', 'FAME', 'IT', 'MUST', 'BE', 'AN', 'EXPENDITURE', 'OF', 'SUPERFLUITIES'] +3570-5696-0006-965: ref=['AS', 'USED', 'IN', 'THE', 'SPEECH', 'OF', 'EVERYDAY', 'LIFE', 'THE', 'WORD', 'CARRIES', 'AN', 'UNDERTONE', 'OF', 'DEPRECATION'] +3570-5696-0006-965: hyp=['AS', 'USED', 'IN', 'THE', 'SPEECH', 'OF', 'EVERYDAY', 'LIFE', 'THE', 'WORD', 'CARRIES', 'AN', 'UNDERTONE', 'OF', 'DEPRECATION'] +3570-5696-0007-966: ref=['THE', 'USE', 'OF', 'THE', 'WORD', 'WASTE', 'AS', 'A', 'TECHNICAL', 'TERM', 'THEREFORE', 'IMPLIES', 'NO', 'DEPRECATION', 'OF', 'THE', 'MOTIVES', 'OR', 'OF', 'THE', 'ENDS', 'SOUGHT', 'BY', 'THE', 'CONSUMER', 'UNDER', 'THIS', 'CANON', 'OF', 'CONSPICUOUS', 'WASTE'] +3570-5696-0007-966: hyp=['THE', 'USE', 'OF', 'THE', 'WORD', 'WASTE', 'AS', 'A', 'TECHNICAL', 'TERM', 'THEREFORE', 'IMPLIES', 'NO', 'DEPRECATION', 'OF', 'THE', 'MOTIVES', 'OR', 'OF', 'THE', 'ENDS', 'SOUGHT', 'BY', 'THE', 'CONSUMER', 'UNDER', 'THIS', 'CANON', 'OF', 'CONSPICUOUS', 'WASTE'] +3570-5696-0008-967: ref=['BUT', 'IT', 'IS', 'ON', 'OTHER', 'GROUNDS', 'WORTH', 'NOTING', 'THAT', 'THE', 'TERM', 'WASTE', 'IN', 'THE', 'LANGUAGE', 'OF', 'EVERYDAY', 'LIFE', 'IMPLIES', 'DEPRECATION', 'OF', 'WHAT', 'IS', 'CHARACTERIZED', 'AS', 'WASTEFUL'] +3570-5696-0008-967: hyp=['BUT', 'IT', 'IS', 'ON', 'OTHER', 'GROUNDS', 'WORTH', 'NOTING', 'THAT', 'THE', 'TERM', 'WASTE', 'IN', 'THE', 'LANGUAGE', 'OF', 'EVERYDAY', 'LIFE', 'IMPLIES', 'DEPRECATION', 'OF', 'WHAT', 'IS', 'CHARACTERIZED', 'AS', 'WASTEFUL'] +3570-5696-0009-968: ref=['IN', 'STRICT', 'ACCURACY', 'NOTHING', 'SHOULD', 'BE', 'INCLUDED', 'UNDER', 'THE', 'HEAD', 'OF', 'CONSPICUOUS', 'WASTE', 'BUT', 'SUCH', 'EXPENDITURE', 'AS', 'IS', 'INCURRED', 'ON', 'THE', 'GROUND', 'OF', 'AN', 'INVIDIOUS', 'PECUNIARY', 'COMPARISON'] +3570-5696-0009-968: hyp=['IN', 'STRICT', 'ACCURACY', 'NOTHING', 'SHOULD', 'BE', 'INCLUDED', 'UNDER', 'THE', 'HEAD', 'OF', 'CONSPICUOUS', 'WASTE', 'BUT', 'SUCH', 'EXPENDITURE', 'AS', 'IS', 'INCURRED', 'ON', 'THE', 'GROUND', 'OF', 'AN', 'INVIDIOUS', 'PECUNIARY', 'COMPARISON'] +3570-5696-0010-969: ref=['AN', 'ARTICLE', 'MAY', 'BE', 'USEFUL', 'AND', 'WASTEFUL', 'BOTH', 'AND', 'ITS', 'UTILITY', 'TO', 'THE', 'CONSUMER', 'MAY', 'BE', 'MADE', 'UP', 'OF', 'USE', 'AND', 'WASTE', 'IN', 'THE', 'MOST', 'VARYING', 'PROPORTIONS'] +3570-5696-0010-969: hyp=['AN', 'ARTICLE', 'MAY', 'BE', 'USEFUL', 'AND', 'WASTEFUL', 'BOTH', 'AND', 'ITS', 'UTILITY', 'TO', 'THE', 'CONSUMER', 'MAY', 'BE', 'MADE', 'UP', 'OF', 'USE', 'AND', 'WASTE', 'IN', 'THE', 'MOST', 'VARYING', 'PROPORTIONS'] +3575-170457-0000-970: ref=['AND', 'OFTEN', 'HAS', 'MY', 'MOTHER', 'SAID', 'WHILE', 'ON', 'HER', 'LAP', 'I', 'LAID', 'MY', 'HEAD', 'SHE', 'FEARED', 'FOR', 'TIME', 'I', 'WAS', 'NOT', 'MADE', 'BUT', 'FOR', 'ETERNITY'] +3575-170457-0000-970: hyp=['AND', 'OFTEN', 'AS', 'MY', 'MOTHER', 'SAID', 'WHILE', 'ON', 'HER', 'LAP', 'I', 'LAID', 'MY', 'HEAD', 'SHE', 'FEARED', 'FOR', 'TIME', 'I', 'WAS', 'NOT', 'MADE', 'BUT', 'FOR', 'ETERNITY'] +3575-170457-0001-971: ref=['WHY', 'ARE', 'WE', 'TO', 'BE', 'DENIED', 'EACH', "OTHER'S", 'SOCIETY'] +3575-170457-0001-971: hyp=['WHY', 'ARE', 'WE', 'TO', 'BE', 'DENIED', 'EACH', "OTHER'S", 'SOCIETY'] +3575-170457-0002-972: ref=['WHY', 'ARE', 'WE', 'TO', 'BE', 'DIVIDED'] +3575-170457-0002-972: hyp=['WHY', 'ARE', 'WE', 'TO', 'BE', 'DIVIDED'] +3575-170457-0003-973: ref=['SURELY', 'IT', 'MUST', 'BE', 'BECAUSE', 'WE', 'ARE', 'IN', 'DANGER', 'OF', 'LOVING', 'EACH', 'OTHER', 'TOO', 'WELL', 'OF', 'LOSING', 'SIGHT', 'OF', 'THE', 'CREATOR', 'IN', 'IDOLATRY', 'OF', 'THE', 'CREATURE'] +3575-170457-0003-973: hyp=['SURELY', 'IT', 'MUST', 'BE', 'BECAUSE', 'WE', 'ARE', 'IN', 'DANGER', 'OF', 'LOVING', 'EACH', 'OTHER', 'TOO', 'WELL', 'OF', 'LOSING', 'SIGHT', 'OF', 'THE', 'CREATOR', 'AND', 'IDOLATRY', 'OF', 'THE', 'CREATURE'] +3575-170457-0004-974: ref=['WE', 'USED', 'TO', 'DISPUTE', 'ABOUT', 'POLITICS', 'AND', 'RELIGION'] +3575-170457-0004-974: hyp=['WE', 'USED', 'TO', 'DISPUTE', 'ABOUT', 'POLITICS', 'AND', 'RELIGION'] +3575-170457-0005-975: ref=['SHE', 'A', 'TORY', 'AND', "CLERGYMAN'S", 'DAUGHTER', 'WAS', 'ALWAYS', 'IN', 'A', 'MINORITY', 'OF', 'ONE', 'IN', 'OUR', 'HOUSE', 'OF', 'VIOLENT', 'DISSENT', 'AND', 'RADICALISM'] +3575-170457-0005-975: hyp=['SHE', 'A', 'TORY', 'AND', "CLERGYMAN'S", 'DAUGHTER', 'WAS', 'ALWAYS', 'IN', 'A', 'MINORITY', 'OF', 'ONE', 'IN', 'OUR', 'HOUSE', 'OF', 'VIOLENT', 'DISSENT', 'AND', 'RADICALISM'] +3575-170457-0006-976: ref=['HER', 'FEEBLE', 'HEALTH', 'GAVE', 'HER', 'HER', 'YIELDING', 'MANNER', 'FOR', 'SHE', 'COULD', 'NEVER', 'OPPOSE', 'ANY', 'ONE', 'WITHOUT', 'GATHERING', 'UP', 'ALL', 'HER', 'STRENGTH', 'FOR', 'THE', 'STRUGGLE'] +3575-170457-0006-976: hyp=['HER', 'FEEBLE', 'HEALTH', 'GAVE', 'HER', 'HER', 'YIELDING', 'MANNER', 'FOR', 'SHE', 'COULD', 'NEVER', 'OPPOSE', 'ANY', 'ONE', 'WITHOUT', 'GATHERING', 'UP', 'ALL', 'HER', 'STRENGTH', 'FOR', 'THE', 'STRUGGLE'] +3575-170457-0007-977: ref=['HE', 'SPOKE', 'FRENCH', 'PERFECTLY', 'I', 'HAVE', 'BEEN', 'TOLD', 'WHEN', 'NEED', 'WAS', 'BUT', 'DELIGHTED', 'USUALLY', 'IN', 'TALKING', 'THE', 'BROADEST', 'YORKSHIRE'] +3575-170457-0007-977: hyp=['HE', 'SPOKE', 'FRENCH', 'PERFECTLY', 'I', 'HAVE', 'BEEN', 'TOLD', 'WHEN', 'NEED', 'WAS', 'BUT', 'DELIGHTED', 'USUALLY', 'IN', 'TALKING', 'THE', 'BROADEST', 'YORKSHIRE'] +3575-170457-0008-978: ref=['AND', 'SO', 'LIFE', 'AND', 'DEATH', 'HAVE', 'DISPERSED', 'THE', 'CIRCLE', 'OF', 'VIOLENT', 'RADICALS', 'AND', 'DISSENTERS', 'INTO', 'WHICH', 'TWENTY', 'YEARS', 'AGO', 'THE', 'LITTLE', 'QUIET', 'RESOLUTE', "CLERGYMAN'S", 'DAUGHTER', 'WAS', 'RECEIVED', 'AND', 'BY', 'WHOM', 'SHE', 'WAS', 'TRULY', 'LOVED', 'AND', 'HONOURED'] +3575-170457-0008-978: hyp=['AND', 'SO', 'LIFE', 'AND', 'DEATH', 'HAVE', 'DISPERSED', 'THE', 'CIRCLE', 'OF', 'VIOLENT', 'RADICALS', 'AND', 'DISSENTERS', 'INTO', 'WHICH', 'TWENTY', 'YEARS', 'AGO', 'THE', 'LITTLE', 'QUIET', 'RESOLUTE', "CLERGYMAN'S", 'DAUGHTER', 'WAS', 'RECEIVED', 'AND', 'BY', 'WHOM', 'SHE', 'WAS', 'TRULY', 'LOVED', 'AND', 'HONOURED'] +3575-170457-0009-979: ref=['JANUARY', 'AND', 'FEBRUARY', 'OF', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'HAD', 'PASSED', 'AWAY', 'AND', 'STILL', 'THERE', 'WAS', 'NO', 'REPLY', 'FROM', 'SOUTHEY'] +3575-170457-0009-979: hyp=['JANUARY', 'AND', 'FEBRUARY', 'OF', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'HAD', 'PASSED', 'AWAY', 'AND', 'STILL', 'THERE', 'WAS', 'NO', 'REPLY', 'FROM', 'SOUTHEY'] +3575-170457-0010-980: ref=['I', 'AM', 'NOT', 'DEPRECIATING', 'IT', 'WHEN', 'I', 'SAY', 'THAT', 'IN', 'THESE', 'TIMES', 'IT', 'IS', 'NOT', 'RARE'] +3575-170457-0010-980: hyp=['I', 'AM', 'NOT', 'DEPRECIATING', 'IT', 'WHEN', 'I', 'SAY', 'THAT', 'IN', 'THESE', 'TIMES', 'IT', 'IS', 'NOT', 'RARE'] +3575-170457-0011-981: ref=['BUT', 'IT', 'IS', 'NOT', 'WITH', 'A', 'VIEW', 'TO', 'DISTINCTION', 'THAT', 'YOU', 'SHOULD', 'CULTIVATE', 'THIS', 'TALENT', 'IF', 'YOU', 'CONSULT', 'YOUR', 'OWN', 'HAPPINESS'] +3575-170457-0011-981: hyp=['BUT', 'IT', 'IS', 'NOT', 'WITH', 'A', 'VIEW', 'TO', 'DISTINCTION', 'THAT', 'YOU', 'SHOULD', 'CULTIVATE', 'THIS', 'TALENT', 'IF', 'YOU', 'CONSULT', 'YOUR', 'OWN', 'HAPPINESS'] +3575-170457-0012-982: ref=['YOU', 'WILL', 'SAY', 'THAT', 'A', 'WOMAN', 'HAS', 'NO', 'NEED', 'OF', 'SUCH', 'A', 'CAUTION', 'THERE', 'CAN', 'BE', 'NO', 'PERIL', 'IN', 'IT', 'FOR', 'HER'] +3575-170457-0012-982: hyp=['YOU', 'WILL', 'SAY', 'THAT', 'A', 'WOMAN', 'HAS', 'NO', 'NEED', 'OF', 'SUCH', 'A', 'CAUTION', 'THERE', 'CAN', 'BE', 'NO', 'PERIL', 'IN', 'IT', 'FOR', 'HER'] +3575-170457-0013-983: ref=['THE', 'MORE', 'SHE', 'IS', 'ENGAGED', 'IN', 'HER', 'PROPER', 'DUTIES', 'THE', 'LESS', 'LEISURE', 'WILL', 'SHE', 'HAVE', 'FOR', 'IT', 'EVEN', 'AS', 'AN', 'ACCOMPLISHMENT', 'AND', 'A', 'RECREATION'] +3575-170457-0013-983: hyp=['THE', 'MORE', 'SHE', 'IS', 'ENGAGED', 'IN', 'HER', 'PROPER', 'DUTIES', 'THE', 'LESS', 'LEISURE', 'WILL', 'SHE', 'HAVE', 'FOR', 'IT', 'EVEN', 'AS', 'AN', 'ACCOMPLISHMENT', 'AND', 'A', 'RECREATION'] +3575-170457-0014-984: ref=['TO', 'THOSE', 'DUTIES', 'YOU', 'HAVE', 'NOT', 'YET', 'BEEN', 'CALLED', 'AND', 'WHEN', 'YOU', 'ARE', 'YOU', 'WILL', 'BE', 'LESS', 'EAGER', 'FOR', 'CELEBRITY'] +3575-170457-0014-984: hyp=['TO', 'THOSE', 'DUTIES', 'YOU', 'HAVE', 'NOT', 'YET', 'BEEN', 'CALLED', 'AND', 'WHEN', 'YOU', 'ARE', 'YOU', 'WILL', 'BE', 'LESS', 'EAGER', 'FOR', 'CELEBRITY'] +3575-170457-0015-985: ref=['BUT', 'DO', 'NOT', 'SUPPOSE', 'THAT', 'I', 'DISPARAGE', 'THE', 'GIFT', 'WHICH', 'YOU', 'POSSESS', 'NOR', 'THAT', 'I', 'WOULD', 'DISCOURAGE', 'YOU', 'FROM', 'EXERCISING', 'IT', 'I', 'ONLY', 'EXHORT', 'YOU', 'SO', 'TO', 'THINK', 'OF', 'IT', 'AND', 'SO', 'TO', 'USE', 'IT', 'AS', 'TO', 'RENDER', 'IT', 'CONDUCIVE', 'TO', 'YOUR', 'OWN', 'PERMANENT', 'GOOD'] +3575-170457-0015-985: hyp=['BUT', 'DO', 'NOT', 'SUPPOSE', 'THAT', 'I', 'DISPARAGE', 'THE', 'GIFT', 'WHICH', 'YOU', 'POSSESS', 'NOR', 'THAT', 'I', 'WOULD', 'DISCOURAGE', 'YOU', 'FROM', 'EXERCISING', 'IT', 'I', 'ONLY', 'EXHORT', 'YOU', 'SO', 'TO', 'THINK', 'OF', 'IT', 'AND', 'SO', 'TO', 'USE', 'IT', 'AS', 'TO', 'RENDER', 'IT', 'CONDUCIVE', 'TO', 'YOUR', 'OWN', 'PERMANENT', 'GOOD'] +3575-170457-0016-986: ref=['FAREWELL', 'MADAM'] +3575-170457-0016-986: hyp=['FAREWELL', 'MADAME'] +3575-170457-0017-987: ref=['THOUGH', 'I', 'MAY', 'BE', 'BUT', 'AN', 'UNGRACIOUS', 'ADVISER', 'YOU', 'WILL', 'ALLOW', 'ME', 'THEREFORE', 'TO', 'SUBSCRIBE', 'MYSELF', 'WITH', 'THE', 'BEST', 'WISHES', 'FOR', 'YOUR', 'HAPPINESS', 'HERE', 'AND', 'HEREAFTER', 'YOUR', 'TRUE', 'FRIEND', 'ROBERT', 'SOUTHEY'] +3575-170457-0017-987: hyp=['THOUGH', 'I', 'MAY', 'BE', 'BUT', 'AN', 'UNGRACIOUS', 'ADVISER', 'YOU', 'WILL', 'ALLOW', 'ME', 'THEREFORE', 'TO', 'SUBSCRIBE', 'MYSELF', 'WITH', 'THE', 'BEST', 'WISHES', 'FOR', 'YOUR', 'HAPPINESS', 'HERE', 'AND', 'HEREAFTER', 'YOUR', 'TRUE', 'FRIEND', 'ROBERT', 'SELVEY'] +3575-170457-0018-988: ref=['SIR', 'MARCH', 'SIXTEENTH'] +3575-170457-0018-988: hyp=['SIR', 'MARCH', 'SIXTEENTH'] +3575-170457-0019-989: ref=['I', 'HAD', 'NOT', 'VENTURED', 'TO', 'HOPE', 'FOR', 'SUCH', 'A', 'REPLY', 'SO', 'CONSIDERATE', 'IN', 'ITS', 'TONE', 'SO', 'NOBLE', 'IN', 'ITS', 'SPIRIT'] +3575-170457-0019-989: hyp=['I', 'HAVE', 'NOT', 'VENTURED', 'TO', 'HOPE', 'FOR', 'SUCH', 'A', 'REPLY', 'SO', 'CONSIDERATE', 'IN', 'ITS', 'TONE', 'SO', 'NOBLE', 'IN', 'ITS', 'SPIRIT'] +3575-170457-0020-990: ref=['I', 'KNOW', 'THE', 'FIRST', 'LETTER', 'I', 'WROTE', 'TO', 'YOU', 'WAS', 'ALL', 'SENSELESS', 'TRASH', 'FROM', 'BEGINNING', 'TO', 'END', 'BUT', 'I', 'AM', 'NOT', 'ALTOGETHER', 'THE', 'IDLE', 'DREAMING', 'BEING', 'IT', 'WOULD', 'SEEM', 'TO', 'DENOTE'] +3575-170457-0020-990: hyp=['I', 'KNOW', 'THE', 'FIRST', 'LETTER', 'I', 'WROTE', 'TO', 'YOU', 'WAS', 'ALL', 'SENSELESS', 'TRASH', 'FROM', 'BEGINNING', 'TO', 'END', 'BUT', 'I', 'AM', 'NOT', 'ALTOGETHER', 'THE', 'IDLE', 'DREAMING', 'BEING', 'IT', 'WOULD', 'SEEM', 'TO', 'DENOTE'] +3575-170457-0021-991: ref=['I', 'THOUGHT', 'IT', 'THEREFORE', 'MY', 'DUTY', 'WHEN', 'I', 'LEFT', 'SCHOOL', 'TO', 'BECOME', 'A', 'GOVERNESS'] +3575-170457-0021-991: hyp=['I', 'THOUGHT', 'IT', 'THEREFORE', 'MY', 'DUTY', 'WHEN', 'I', 'LEFT', 'SCHOOL', 'TO', 'BECOME', 'A', 'GOVERNESS'] +3575-170457-0022-992: ref=['IN', 'THE', 'EVENINGS', 'I', 'CONFESS', 'I', 'DO', 'THINK', 'BUT', 'I', 'NEVER', 'TROUBLE', 'ANY', 'ONE', 'ELSE', 'WITH', 'MY', 'THOUGHTS'] +3575-170457-0022-992: hyp=['IN', 'THE', 'EVENINGS', 'I', 'CONFESS', 'I', 'DO', 'THINK', 'BUT', 'I', 'NEVER', 'TROUBLE', 'ANY', 'ONE', 'ELSE', 'WITH', 'MY', 'THOUGHTS'] +3575-170457-0023-993: ref=['I', 'CAREFULLY', 'AVOID', 'ANY', 'APPEARANCE', 'OF', 'PREOCCUPATION', 'AND', 'ECCENTRICITY', 'WHICH', 'MIGHT', 'LEAD', 'THOSE', 'I', 'LIVE', 'AMONGST', 'TO', 'SUSPECT', 'THE', 'NATURE', 'OF', 'MY', 'PURSUITS'] +3575-170457-0023-993: hyp=['I', 'CAREFULLY', 'AVOID', 'ANY', 'APPEARANCE', 'OF', 'PREOCCUPATION', 'AND', 'ECCENTRICITY', 'WHICH', 'MIGHT', 'LEAD', 'THOSE', 'I', 'LIVE', 'AMONGST', 'TO', 'SUSPECT', 'THE', 'NATURE', 'OF', 'MY', 'PURSUITS'] +3575-170457-0024-994: ref=['I', "DON'T", 'ALWAYS', 'SUCCEED', 'FOR', 'SOMETIMES', 'WHEN', "I'M", 'TEACHING', 'OR', 'SEWING', 'I', 'WOULD', 'RATHER', 'BE', 'READING', 'OR', 'WRITING', 'BUT', 'I', 'TRY', 'TO', 'DENY', 'MYSELF', 'AND', 'MY', "FATHER'S", 'APPROBATION', 'AMPLY', 'REWARDED', 'ME', 'FOR', 'THE', 'PRIVATION'] +3575-170457-0024-994: hyp=['I', "DON'T", 'ALWAYS', 'SUCCEED', 'FOR', 'SOMETIMES', 'WHEN', "I'M", 'TEACHING', 'OR', 'SEWING', 'I', 'WOULD', 'RATHER', 'BE', 'READING', 'OR', 'WRITING', 'BUT', 'I', 'TRIED', 'TO', 'DENY', 'MYSELF', 'AND', 'MY', "FATHER'S", 'APPROBATION', 'AMPLY', 'REWARDED', 'ME', 'FOR', 'THE', 'PRIVATION'] +3575-170457-0025-995: ref=['AGAIN', 'I', 'THANK', 'YOU', 'THIS', 'INCIDENT', 'I', 'SUPPOSE', 'WILL', 'BE', 'RENEWED', 'NO', 'MORE', 'IF', 'I', 'LIVE', 'TO', 'BE', 'AN', 'OLD', 'WOMAN', 'I', 'SHALL', 'REMEMBER', 'IT', 'THIRTY', 'YEARS', 'HENCE', 'AS', 'A', 'BRIGHT', 'DREAM'] +3575-170457-0025-995: hyp=['AGAIN', 'I', 'THANK', 'YOU', 'THIS', 'INCIDENT', 'I', 'SUPPOSE', 'WILL', 'BE', 'RENEWED', 'NO', 'MORE', 'IF', 'I', 'LIVE', 'TO', 'BE', 'AN', 'OLD', 'WOMAN', 'I', 'SHALL', 'REMEMBER', 'IT', 'THIRTY', 'YEARS', 'HENCE', 'AS', 'A', 'BRIGHT', 'DREAM'] +3575-170457-0026-996: ref=['P', 'S', 'PRAY', 'SIR', 'EXCUSE', 'ME', 'FOR', 'WRITING', 'TO', 'YOU', 'A', 'SECOND', 'TIME', 'I', 'COULD', 'NOT', 'HELP', 'WRITING', 'PARTLY', 'TO', 'TELL', 'YOU', 'HOW', 'THANKFUL', 'I', 'AM', 'FOR', 'YOUR', 'KINDNESS', 'AND', 'PARTLY', 'TO', 'LET', 'YOU', 'KNOW', 'THAT', 'YOUR', 'ADVICE', 'SHALL', 'NOT', 'BE', 'WASTED', 'HOWEVER', 'SORROWFULLY', 'AND', 'RELUCTANTLY', 'IT', 'MAY', 'BE', 'AT', 'FIRST', 'FOLLOWED', 'C', 'B'] +3575-170457-0026-996: hyp=['P', 'S', 'PRAY', 'SIR', 'EXCUSE', 'ME', 'FOR', 'WRITING', 'TO', 'YOU', 'A', 'SECOND', 'TIME', 'I', 'COULD', 'NOT', 'HELP', 'WRITING', 'PARTLY', 'TO', 'TELL', 'YOU', 'HOW', 'THANKFUL', 'I', 'AM', 'FOR', 'YOUR', 'KINDNESS', 'AND', 'PARTLY', 'TO', 'LET', 'YOU', 'KNOW', 'THAT', 'YOUR', 'ADVICE', 'SHALL', 'NOT', 'BE', 'WASTED', 'HOWEVER', 'SORROWFULLY', 'AND', 'RELUCTANTLY', 'IT', 'MAY', 'BE', 'AT', 'FIRST', 'FOLLOWED', 'C', 'B'] +3575-170457-0027-997: ref=['I', 'CANNOT', 'DENY', 'MYSELF', 'THE', 'GRATIFICATION', 'OF', 'INSERTING', "SOUTHEY'S", 'REPLY'] +3575-170457-0027-997: hyp=['I', 'CANNOT', 'DENY', 'MYSELF', 'THE', 'GRATIFICATION', 'OF', 'INSERTING', "SOUTHEY'S", 'REPLY'] +3575-170457-0028-998: ref=['KESWICK', 'MARCH', 'TWENTY', 'SECOND', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'DEAR', 'MADAM'] +3575-170457-0028-998: hyp=['KESWICK', 'MARCH', 'TWENTY', 'SECOND', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'DEAR', 'MADAME'] +3575-170457-0029-999: ref=['YOUR', 'LETTER', 'HAS', 'GIVEN', 'ME', 'GREAT', 'PLEASURE', 'AND', 'I', 'SHOULD', 'NOT', 'FORGIVE', 'MYSELF', 'IF', 'I', 'DID', 'NOT', 'TELL', 'YOU', 'SO'] +3575-170457-0029-999: hyp=['YOUR', 'LETTER', 'HAS', 'GIVEN', 'ME', 'GREAT', 'PLEASURE', 'AND', 'I', 'SHOULD', 'NOT', 'FORGIVE', 'MYSELF', 'IF', 'I', 'DID', 'NOT', 'TELL', 'YOU', 'SO'] +3575-170457-0030-1000: ref=['OF', 'THIS', 'SECOND', 'LETTER', 'ALSO', 'SHE', 'SPOKE', 'AND', 'TOLD', 'ME', 'THAT', 'IT', 'CONTAINED', 'AN', 'INVITATION', 'FOR', 'HER', 'TO', 'GO', 'AND', 'SEE', 'THE', 'POET', 'IF', 'EVER', 'SHE', 'VISITED', 'THE', 'LAKES'] +3575-170457-0030-1000: hyp=['OF', 'THIS', 'SECOND', 'LETTER', 'ALSO', 'SHE', 'SPOKE', 'AND', 'TOLD', 'ME', 'THAT', 'IT', 'CONTAINED', 'AN', 'INVITATION', 'FOR', 'HER', 'TO', 'GO', 'AND', 'SEE', 'THE', 'POET', 'IF', 'EVER', 'SHE', 'VISITED', 'THE', 'LAKES'] +3575-170457-0031-1001: ref=['ON', 'AUGUST', 'TWENTY', 'SEVENTH', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'SHE', 'WRITES'] +3575-170457-0031-1001: hyp=['ON', 'AUGUST', 'TWENTY', 'SEVENTH', 'EIGHTEEN', 'THIRTY', 'SEVEN', 'SHE', 'WRITES'] +3575-170457-0032-1002: ref=['COME', 'COME', 'I', 'AM', 'GETTING', 'REALLY', 'TIRED', 'OF', 'YOUR', 'ABSENCE'] +3575-170457-0032-1002: hyp=['COME', 'COME', 'I', 'AM', 'GETTING', 'REALLY', 'TIRED', 'OF', 'YOUR', 'ABSENCE'] +3575-170457-0033-1003: ref=['SATURDAY', 'AFTER', 'SATURDAY', 'COMES', 'ROUND', 'AND', 'I', 'CAN', 'HAVE', 'NO', 'HOPE', 'OF', 'HEARING', 'YOUR', 'KNOCK', 'AT', 'THE', 'DOOR', 'AND', 'THEN', 'BEING', 'TOLD', 'THAT', 'MISS', 'E', 'IS', 'COME', 'OH', 'DEAR'] +3575-170457-0033-1003: hyp=['SATURDAY', 'AFTER', 'SATURDAY', 'COMES', 'ROUND', 'AND', 'I', 'CAN', 'HAVE', 'NO', 'HOPE', 'OF', 'HEARING', 'YOUR', 'KNOCK', 'AT', 'THE', 'DOOR', 'AND', 'THEN', 'BEING', 'TOLD', 'THAT', 'MISS', 'E', 'IS', 'COME', 'OH', 'DEAR'] +3575-170457-0034-1004: ref=['IN', 'THIS', 'MONOTONOUS', 'LIFE', 'OF', 'MINE', 'THAT', 'WAS', 'A', 'PLEASANT', 'EVENT'] +3575-170457-0034-1004: hyp=['IN', 'THIS', 'MONOTONOUS', 'LIFE', 'OF', 'MINE', 'THAT', 'WAS', 'A', 'PLEASANT', 'EVENT'] +3575-170457-0035-1005: ref=['I', 'WISH', 'IT', 'WOULD', 'RECUR', 'AGAIN', 'BUT', 'IT', 'WILL', 'TAKE', 'TWO', 'OR', 'THREE', 'INTERVIEWS', 'BEFORE', 'THE', 'STIFFNESS', 'THE', 'ESTRANGEMENT', 'OF', 'THIS', 'LONG', 'SEPARATION', 'WILL', 'WEAR', 'AWAY'] +3575-170457-0035-1005: hyp=['I', 'WISH', 'IT', 'WOULD', 'RECUR', 'AGAIN', 'BUT', 'IT', 'WILL', 'TAKE', 'TWO', 'OR', 'THREE', 'INTERVIEWS', 'BEFORE', 'THE', 'STIFFNESS', 'THE', 'ESTRANGEMENT', 'OF', 'THIS', 'LONG', 'SEPARATION', 'WILL', 'WEAR', 'AWAY'] +3575-170457-0036-1006: ref=['MY', 'EYES', 'FILL', 'WITH', 'TEARS', 'WHEN', 'I', 'CONTRAST', 'THE', 'BLISS', 'OF', 'SUCH', 'A', 'STATE', 'BRIGHTENED', 'BY', 'HOPES', 'OF', 'THE', 'FUTURE', 'WITH', 'THE', 'MELANCHOLY', 'STATE', 'I', 'NOW', 'LIVE', 'IN', 'UNCERTAIN', 'THAT', 'I', 'EVER', 'FELT', 'TRUE', 'CONTRITION', 'WANDERING', 'IN', 'THOUGHT', 'AND', 'DEED', 'LONGING', 'FOR', 'HOLINESS', 'WHICH', 'I', 'SHALL', 'NEVER', 'NEVER', 'OBTAIN', 'SMITTEN', 'AT', 'TIMES', 'TO', 'THE', 'HEART', 'WITH', 'THE', 'CONVICTION', 'THAT', 'GHASTLY', 'CALVINISTIC', 'DOCTRINES', 'ARE', 'TRUE', 'DARKENED', 'IN', 'SHORT', 'BY', 'THE', 'VERY', 'SHADOWS', 'OF', 'SPIRITUAL', 'DEATH'] +3575-170457-0036-1006: hyp=['MY', 'EYES', 'FILL', 'WITH', 'TEARS', 'WHEN', 'I', 'CONTRAST', 'THE', 'BLISS', 'OF', 'SUCH', 'A', 'STATE', 'BRIGHTENED', 'BY', 'HOPES', 'OF', 'THE', 'FUTURE', 'WITH', 'THE', 'MELANCHOLY', 'STATE', 'I', 'NOW', 'LIVE', 'IN', 'UNCERTAIN', 'THAT', 'I', 'EVER', 'FELT', 'TRUE', 'CONTRITION', 'WANDERING', 'IN', 'THOUGHT', 'AND', 'DEED', 'LONGING', 'FOR', 'HOLINESS', 'WHICH', 'I', 'SHALL', 'NEVER', 'NEVER', 'OBTAIN', 'SMITTEN', 'AT', 'TIMES', 'TO', 'THE', 'HEART', 'WITH', 'THE', 'CONVICTION', 'THAT', 'GHASTLY', 'CALVINISTIC', 'DOCTRINES', 'ARE', 'TRUE', 'DARKENED', 'IN', 'SHORT', 'BY', 'THE', 'VERY', 'SHADOWS', 'OF', 'SPIRITUAL', 'DEATH'] +3575-170457-0037-1007: ref=['IF', 'CHRISTIAN', 'PERFECTION', 'BE', 'NECESSARY', 'TO', 'SALVATION', 'I', 'SHALL', 'NEVER', 'BE', 'SAVED', 'MY', 'HEART', 'IS', 'A', 'VERY', 'HOTBED', 'FOR', 'SINFUL', 'THOUGHTS', 'AND', 'WHEN', 'I', 'DECIDE', 'ON', 'AN', 'ACTION', 'I', 'SCARCELY', 'REMEMBER', 'TO', 'LOOK', 'TO', 'MY', 'REDEEMER', 'FOR', 'DIRECTION'] +3575-170457-0037-1007: hyp=['IF', 'CHRISTIAN', 'PERFECTION', 'BE', 'NECESSARY', 'TO', 'SALVATION', 'I', 'SHALL', 'NEVER', 'BE', 'SAVED', 'MY', 'HEART', 'IS', 'A', 'VERY', 'HOT', 'BED', 'FOR', 'SINFUL', 'THOUGHTS', 'AND', 'WHEN', 'I', 'DECIDE', 'ON', 'AN', 'ACTION', 'I', 'SCARCELY', 'REMEMBER', 'TO', 'LOOK', 'TO', 'MY', 'REDEEMER', 'FOR', 'DIRECTION'] +3575-170457-0038-1008: ref=['AND', 'MEANTIME', 'I', 'KNOW', 'THE', 'GREATNESS', 'OF', 'JEHOVAH', 'I', 'ACKNOWLEDGE', 'THE', 'PERFECTION', 'OF', 'HIS', 'WORD', 'I', 'ADORE', 'THE', 'PURITY', 'OF', 'THE', 'CHRISTIAN', 'FAITH', 'MY', 'THEORY', 'IS', 'RIGHT', 'MY', 'PRACTICE', 'HORRIBLY', 'WRONG'] +3575-170457-0038-1008: hyp=['AND', 'MEANTIME', 'I', 'KNOW', 'THE', 'GREATNESS', 'OF', 'JEHOVAH', 'I', 'ACKNOWLEDGE', 'THE', 'PERFECTION', 'OF', 'HIS', 'WORD', 'I', 'ADORE', 'THE', 'PURITY', 'OF', 'THE', 'CHRISTIAN', 'FAITH', 'MY', 'THEORY', 'IS', 'RIGHT', 'MY', 'PRACTICE', 'HORRIBLY', 'WRONG'] +3575-170457-0039-1009: ref=['THE', 'CHRISTMAS', 'HOLIDAYS', 'CAME', 'AND', 'SHE', 'AND', 'ANNE', 'RETURNED', 'TO', 'THE', 'PARSONAGE', 'AND', 'TO', 'THAT', 'HAPPY', 'HOME', 'CIRCLE', 'IN', 'WHICH', 'ALONE', 'THEIR', 'NATURES', 'EXPANDED', 'AMONGST', 'ALL', 'OTHER', 'PEOPLE', 'THEY', 'SHRIVELLED', 'UP', 'MORE', 'OR', 'LESS'] +3575-170457-0039-1009: hyp=['THE', 'CHRISTMAS', 'HOLIDAYS', 'CAME', 'AND', 'SHE', 'AND', 'ANNE', 'RETURNED', 'TO', 'THE', 'PARSONAGE', 'AND', 'TO', 'THAT', 'HAPPY', 'HOME', 'CIRCLE', 'IN', 'WHICH', 'ALONE', 'THEIR', 'NATURES', 'EXPANDED', 'AMONGST', 'ALL', 'OTHER', 'PEOPLE', 'THEY', 'SHRIVELLED', 'UP', 'MORE', 'OR', 'LESS'] +3575-170457-0040-1010: ref=['INDEED', 'THERE', 'WERE', 'ONLY', 'ONE', 'OR', 'TWO', 'STRANGERS', 'WHO', 'COULD', 'BE', 'ADMITTED', 'AMONG', 'THE', 'SISTERS', 'WITHOUT', 'PRODUCING', 'THE', 'SAME', 'RESULT'] +3575-170457-0040-1010: hyp=['INDEED', 'THERE', 'WERE', 'ONLY', 'ONE', 'OR', 'TWO', 'STRANGERS', 'WHO', 'COULD', 'BE', 'ADMITTED', 'AMONG', 'THE', 'SISTERS', 'WITHOUT', 'PRODUCING', 'THE', 'SAME', 'RESULT'] +3575-170457-0041-1011: ref=['SHE', 'WAS', 'GONE', 'OUT', 'INTO', 'THE', 'VILLAGE', 'ON', 'SOME', 'ERRAND', 'WHEN', 'AS', 'SHE', 'WAS', 'DESCENDING', 'THE', 'STEEP', 'STREET', 'HER', 'FOOT', 'SLIPPED', 'ON', 'THE', 'ICE', 'AND', 'SHE', 'FELL', 'IT', 'WAS', 'DARK', 'AND', 'NO', 'ONE', 'SAW', 'HER', 'MISCHANCE', 'TILL', 'AFTER', 'A', 'TIME', 'HER', 'GROANS', 'ATTRACTED', 'THE', 'ATTENTION', 'OF', 'A', 'PASSER', 'BY'] +3575-170457-0041-1011: hyp=['SHE', 'WAS', 'GONE', 'OUT', 'INTO', 'THE', 'VILLAGE', 'ON', 'SOME', 'ERRAND', 'WHEN', 'AS', 'SHE', 'WAS', 'DESCENDING', 'THE', 'STEEP', 'STREET', 'HER', 'FOOT', 'SLIPPED', 'ON', 'THE', 'ICE', 'AND', 'SHE', 'FELL', 'IT', 'WAS', 'DARK', 'AND', 'NO', 'ONE', 'SAW', 'HER', 'MISCHANCE', 'TILL', 'AFTER', 'A', 'TIME', 'HER', 'GROANS', 'ATTRACTED', 'THE', 'ATTENTION', 'OF', 'A', 'PASSER', 'BY'] +3575-170457-0042-1012: ref=['UNFORTUNATELY', 'THE', 'FRACTURE', 'COULD', 'NOT', 'BE', 'SET', 'TILL', 'SIX', "O'CLOCK", 'THE', 'NEXT', 'MORNING', 'AS', 'NO', 'SURGEON', 'WAS', 'TO', 'BE', 'HAD', 'BEFORE', 'THAT', 'TIME', 'AND', 'SHE', 'NOW', 'LIES', 'AT', 'OUR', 'HOUSE', 'IN', 'A', 'VERY', 'DOUBTFUL', 'AND', 'DANGEROUS', 'STATE'] +3575-170457-0042-1012: hyp=['UNFORTUNATELY', 'THE', 'FRACTURE', 'COULD', 'NOT', 'BE', 'SET', 'TILL', 'SIX', "O'CLOCK", 'THE', 'NEXT', 'MORNING', 'AS', 'NO', 'SURGEON', 'WAS', 'TO', 'BE', 'HAD', 'BEFORE', 'THAT', 'TIME', 'AND', 'SHE', 'NOW', 'LIES', 'AT', 'OUR', 'HOUSE', 'IN', 'A', 'VERY', 'DOUBTFUL', 'AND', 'DANGEROUS', 'STATE'] +3575-170457-0043-1013: ref=['HOWEVER', 'REMEMBERING', 'WHAT', 'YOU', 'TOLD', 'ME', 'NAMELY', 'THAT', 'YOU', 'HAD', 'COMMENDED', 'THE', 'MATTER', 'TO', 'A', 'HIGHER', 'DECISION', 'THAN', 'OURS', 'AND', 'THAT', 'YOU', 'WERE', 'RESOLVED', 'TO', 'SUBMIT', 'WITH', 'RESIGNATION', 'TO', 'THAT', 'DECISION', 'WHATEVER', 'IT', 'MIGHT', 'BE', 'I', 'HOLD', 'IT', 'MY', 'DUTY', 'TO', 'YIELD', 'ALSO', 'AND', 'TO', 'BE', 'SILENT', 'IT', 'MAY', 'BE', 'ALL', 'FOR', 'THE', 'BEST'] +3575-170457-0043-1013: hyp=['HOWEVER', 'REMEMBERING', 'WHAT', 'YOU', 'TOLD', 'ME', 'NAMELY', 'THAT', 'YOU', 'HAD', 'COMMENDED', 'THE', 'MATTER', 'TO', 'A', 'HIGHER', 'DECISION', 'THAN', 'OURS', 'AND', 'THAT', 'YOU', 'WERE', 'RESOLVED', 'TO', 'SUBMIT', 'WITH', 'RESIGNATION', 'TO', 'THAT', 'DECISION', 'WHATEVER', 'IT', 'MIGHT', 'BE', 'I', 'HOLD', 'IT', 'MY', 'DUTY', 'TO', 'YIELD', 'ALSO', 'AND', 'TO', 'BE', 'SILENT', 'AND', 'MAY', 'BE', 'ALL', 'FOR', 'THE', 'BEST'] +3575-170457-0044-1014: ref=['AFTER', 'THIS', 'DISAPPOINTMENT', 'I', 'NEVER', 'DARE', 'RECKON', 'WITH', 'CERTAINTY', 'ON', 'THE', 'ENJOYMENT', 'OF', 'A', 'PLEASURE', 'AGAIN', 'IT', 'SEEMS', 'AS', 'IF', 'SOME', 'FATALITY', 'STOOD', 'BETWEEN', 'YOU', 'AND', 'ME'] +3575-170457-0044-1014: hyp=['AFTER', 'THIS', 'DISAPPOINTMENT', 'I', 'NEVER', 'DARE', 'RECKON', 'WITH', 'CERTAINTY', 'ON', 'THE', 'ENJOYMENT', 'OF', 'A', 'PLEASURE', 'AGAIN', 'IT', 'SEEMS', 'AS', 'IF', 'SOME', 'FATALITY', 'STOOD', 'BETWEEN', 'YOU', 'AND', 'ME'] +3575-170457-0045-1015: ref=['I', 'AM', 'NOT', 'GOOD', 'ENOUGH', 'FOR', 'YOU', 'AND', 'YOU', 'MUST', 'BE', 'KEPT', 'FROM', 'THE', 'CONTAMINATION', 'OF', 'TOO', 'INTIMATE', 'SOCIETY'] +3575-170457-0045-1015: hyp=['I', 'AM', 'NOT', 'GOOD', 'ENOUGH', 'FOR', 'YOU', 'AND', 'YOU', 'MUST', 'BE', 'KEPT', 'FROM', 'THE', 'CONTAMINATION', 'OF', 'TOO', 'INTIMATE', 'SOCIETY'] +3575-170457-0046-1016: ref=['A', 'GOOD', 'NEIGHBOUR', 'OF', 'THE', 'BRONTES', 'A', 'CLEVER', 'INTELLIGENT', 'YORKSHIRE', 'WOMAN', 'WHO', 'KEEPS', 'A', "DRUGGIST'S", 'SHOP', 'IN', 'HAWORTH', 'AND', 'FROM', 'HER', 'OCCUPATION', 'HER', 'EXPERIENCE', 'AND', 'EXCELLENT', 'SENSE', 'HOLDS', 'THE', 'POSITION', 'OF', 'VILLAGE', 'DOCTRESS', 'AND', 'NURSE', 'AND', 'AS', 'SUCH', 'HAS', 'BEEN', 'A', 'FRIEND', 'IN', 'MANY', 'A', 'TIME', 'OF', 'TRIAL', 'AND', 'SICKNESS', 'AND', 'DEATH', 'IN', 'THE', 'HOUSEHOLDS', 'ROUND', 'TOLD', 'ME', 'A', 'CHARACTERISTIC', 'LITTLE', 'INCIDENT', 'CONNECTED', 'WITH', "TABBY'S", 'FRACTURED', 'LEG'] +3575-170457-0046-1016: hyp=['A', 'GOOD', 'NEIGHBOUR', 'OF', 'THE', 'BRONTES', 'A', 'CLEVER', 'INTELLIGENT', 'YORKSHIRE', 'WOMAN', 'WHO', 'KEEPS', 'A', 'DRUGGIST', 'SHOP', 'IN', 'HAWWORTH', 'AND', 'FROM', 'HER', 'OCCUPATION', 'HER', 'EXPERIENCE', 'IN', 'EXCELLENT', 'SENSE', 'HOLDS', 'THE', 'POSITION', 'OF', 'VILLAGE', 'DOCTRESS', 'AND', 'NURSE', 'AND', 'AS', 'SUCH', 'HAS', 'BEEN', 'A', 'FRIEND', 'IN', 'MANY', 'A', 'TIME', 'OF', 'TRIAL', 'AND', 'SICKNESS', 'AND', 'DEATH', 'IN', 'THE', 'HOUSEHOLDS', 'ROUND', 'TOLD', 'ME', 'A', 'CHARACTERISTIC', 'LITTLE', 'INCIDENT', 'CONNECTED', 'WITH', "TABBY'S", 'FRACTURED', 'LEG'] +3575-170457-0047-1017: ref=['TABBY', 'HAD', 'LIVED', 'WITH', 'THEM', 'FOR', 'TEN', 'OR', 'TWELVE', 'YEARS', 'AND', 'WAS', 'AS', 'CHARLOTTE', 'EXPRESSED', 'IT', 'ONE', 'OF', 'THE', 'FAMILY'] +3575-170457-0047-1017: hyp=['TABBY', 'HAD', 'LIVED', 'WITH', 'THEM', 'FOR', 'TEN', 'OR', 'TWELVE', 'YEARS', 'AND', 'WAS', 'AS', 'CHARLOTTE', 'EXPRESSED', 'IT', 'ONE', 'OF', 'THE', 'FAMILY'] +3575-170457-0048-1018: ref=['HE', 'REFUSED', 'AT', 'FIRST', 'TO', 'LISTEN', 'TO', 'THE', 'CAREFUL', 'ADVICE', 'IT', 'WAS', 'REPUGNANT', 'TO', 'HIS', 'LIBERAL', 'NATURE'] +3575-170457-0048-1018: hyp=['HE', 'REFUSED', 'AT', 'FIRST', 'TO', 'LISTEN', 'TO', 'THE', 'CAREFUL', 'ADVICE', 'IT', 'WAS', 'REPUGNANT', 'TO', 'HIS', 'LIBERAL', 'NATURE'] +3575-170457-0049-1019: ref=['THIS', 'DECISION', 'WAS', 'COMMUNICATED', 'TO', 'THE', 'GIRLS'] +3575-170457-0049-1019: hyp=['THIS', 'DECISION', 'WAS', 'COMMUNICATED', 'TO', 'THE', 'GIRLS'] +3575-170457-0050-1020: ref=['TABBY', 'HAD', 'TENDED', 'THEM', 'IN', 'THEIR', 'CHILDHOOD', 'THEY', 'AND', 'NONE', 'OTHER', 'SHOULD', 'TEND', 'HER', 'IN', 'HER', 'INFIRMITY', 'AND', 'AGE'] +3575-170457-0050-1020: hyp=['TABBY', 'HAD', 'TENDED', 'THEM', 'IN', 'THEIR', 'CHILDHOOD', 'THEY', 'AND', 'NONE', 'OTHER', 'SHOULD', 'TEND', 'HER', 'IN', 'HER', 'INFIRMITY', 'AND', 'AGE'] +3575-170457-0051-1021: ref=['AT', 'TEA', 'TIME', 'THEY', 'WERE', 'SAD', 'AND', 'SILENT', 'AND', 'THE', 'MEAL', 'WENT', 'AWAY', 'UNTOUCHED', 'BY', 'ANY', 'OF', 'THE', 'THREE'] +3575-170457-0051-1021: hyp=['AT', 'TEA', 'TIME', 'THEY', 'WERE', 'SAD', 'AND', 'SILENT', 'AND', 'THE', 'MEAL', 'WENT', 'AWAY', 'UNTOUCHED', 'BY', 'ANY', 'OF', 'THE', 'THREE'] +3575-170457-0052-1022: ref=['SHE', 'HAD', 'ANOTHER', 'WEIGHT', 'ON', 'HER', 'MIND', 'THIS', 'CHRISTMAS'] +3575-170457-0052-1022: hyp=['SHE', 'HAD', 'ANOTHER', 'WEIGHT', 'ON', 'HER', 'MIND', 'THIS', 'CHRISTMAS'] +3575-170457-0053-1023: ref=['BUT', 'ANNE', 'HAD', 'BEGUN', 'TO', 'SUFFER', 'JUST', 'BEFORE', 'THE', 'HOLIDAYS', 'AND', 'CHARLOTTE', 'WATCHED', 'OVER', 'HER', 'YOUNGER', 'SISTERS', 'WITH', 'THE', 'JEALOUS', 'VIGILANCE', 'OF', 'SOME', 'WILD', 'CREATURE', 'THAT', 'CHANGES', 'HER', 'VERY', 'NATURE', 'IF', 'DANGER', 'THREATENS', 'HER', 'YOUNG'] +3575-170457-0053-1023: hyp=['BUT', 'ANNE', 'HAD', 'BEGUN', 'TO', 'SUFFER', 'JUST', 'BEFORE', 'THE', 'HOLIDAYS', 'AND', 'CHARLOTTE', 'WATCHED', 'OVER', 'HER', 'YOUNGER', 'SISTERS', 'WITH', 'THE', 'JEALOUS', 'VIGILANCE', 'OF', 'SOME', 'WILD', 'CREATURE', 'THAT', 'CHANGES', 'HER', 'VERY', 'NATURE', 'IF', 'DANGER', 'THREATENS', 'HER', 'YOUNG'] +3575-170457-0054-1024: ref=['STUNG', 'BY', 'ANXIETY', 'FOR', 'THIS', 'LITTLE', 'SISTER', 'SHE', 'UPBRAIDED', 'MISS', 'W', 'FOR', 'HER', 'FANCIED', 'INDIFFERENCE', 'TO', "ANNE'S", 'STATE', 'OF', 'HEALTH'] +3575-170457-0054-1024: hyp=['STUNG', 'BY', 'ANXIETY', 'FOR', 'THIS', 'LITTLE', 'SISTER', 'SHE', 'UPBRAIDED', 'MISS', 'W', 'FOR', 'HER', 'FANCIED', 'INDIFFERENCE', 'TO', "ANNE'S", 'STATE', 'OF', 'HEALTH'] +3575-170457-0055-1025: ref=['STILL', 'HER', 'HEART', 'HAD', 'RECEIVED', 'A', 'SHOCK', 'IN', 'THE', 'PERCEPTION', 'OF', "ANNE'S", 'DELICACY', 'AND', 'ALL', 'THESE', 'HOLIDAYS', 'SHE', 'WATCHED', 'OVER', 'HER', 'WITH', 'THE', 'LONGING', 'FOND', 'ANXIETY', 'WHICH', 'IS', 'SO', 'FULL', 'OF', 'SUDDEN', 'PANGS', 'OF', 'FEAR'] +3575-170457-0055-1025: hyp=['STILL', 'HER', 'HEART', 'HAD', 'RECEIVED', 'A', 'SHOCK', 'IN', 'THE', 'PERCEPTION', 'OF', "ANNE'S", 'DELICACY', 'AND', 'ALL', 'THESE', 'HOLIDAYS', 'SHE', 'WATCHED', 'OVER', 'HER', 'WITH', 'THE', 'LONGING', 'FOND', 'ANXIETY', 'WHICH', 'IS', 'SO', 'FULL', 'OF', 'SUDDEN', 'PANGS', 'OF', 'FEAR'] +3575-170457-0056-1026: ref=['I', 'DOUBT', 'WHETHER', 'BRANWELL', 'WAS', 'MAINTAINING', 'HIMSELF', 'AT', 'THIS', 'TIME'] +3575-170457-0056-1026: hyp=['I', 'DOUBT', 'WHETHER', 'BRANWELL', 'WAS', 'MAINTAINING', 'HIMSELF', 'AT', 'THIS', 'TIME'] +3729-6852-0000-1027: ref=['TO', 'CELEBRATE', 'THE', 'ARRIVAL', 'OF', 'HER', 'SON', 'SILVIA', 'GAVE', 'A', 'SPLENDID', 'SUPPER', 'TO', 'WHICH', 'SHE', 'HAD', 'INVITED', 'ALL', 'HER', 'RELATIVES', 'AND', 'IT', 'WAS', 'A', 'GOOD', 'OPPORTUNITY', 'FOR', 'ME', 'TO', 'MAKE', 'THEIR', 'ACQUAINTANCE'] +3729-6852-0000-1027: hyp=['TO', 'CELEBRATE', 'THE', 'ARRIVAL', 'OF', 'HER', 'SON', 'SYLVIA', 'GAVE', 'A', 'SPLENDID', 'SUPPER', 'TO', 'WHICH', 'SHE', 'HAD', 'INVITED', 'ALL', 'HER', 'RELATIVES', 'AND', 'IT', 'WAS', 'A', 'GOOD', 'OPPORTUNITY', 'FOR', 'ME', 'TO', 'MAKE', 'THEIR', 'ACQUAINTANCE'] +3729-6852-0001-1028: ref=['WITHOUT', 'SAYING', 'IT', 'POSITIVELY', 'SHE', 'MADE', 'ME', 'UNDERSTAND', 'THAT', 'BEING', 'HERSELF', 'AN', 'ILLUSTRIOUS', 'MEMBER', 'OF', 'THE', 'REPUBLIC', 'OF', 'LETTERS', 'SHE', 'WAS', 'WELL', 'AWARE', 'THAT', 'SHE', 'WAS', 'SPEAKING', 'TO', 'AN', 'INSECT'] +3729-6852-0001-1028: hyp=['WITHOUT', 'SAYING', 'IT', 'POSITIVELY', 'SHE', 'MADE', 'ME', 'UNDERSTAND', 'THAT', 'BEING', 'HERSELF', 'AN', 'ILLUSTRIOUS', 'MEMBER', 'OF', 'THE', 'REPUBLIC', 'OF', 'LETTERS', 'SHE', 'WAS', 'WELL', 'AWARE', 'THAT', 'SHE', 'WAS', 'SPEAKING', 'TO', 'AN', 'INSECT'] +3729-6852-0002-1029: ref=['IN', 'ORDER', 'TO', 'PLEASE', 'HER', 'I', 'SPOKE', 'TO', 'HER', 'OF', 'THE', 'ABBE', 'CONTI', 'AND', 'I', 'HAD', 'OCCASION', 'TO', 'QUOTE', 'TWO', 'LINES', 'OF', 'THAT', 'PROFOUND', 'WRITER'] +3729-6852-0002-1029: hyp=['IN', 'ORDER', 'TO', 'PLEASE', 'HER', 'I', 'SPOKE', 'TO', 'HER', 'OF', 'THE', 'ABBE', 'CONTI', 'AND', 'I', 'HAD', 'OCCASION', 'TO', 'QUOTE', 'TWO', 'LINES', 'OF', 'THAT', 'PROFOUND', 'WRITER'] +3729-6852-0003-1030: ref=['MADAM', 'CORRECTED', 'ME', 'WITH', 'A', 'PATRONIZING', 'AIR', 'FOR', 'MY', 'PRONUNCIATION', 'OF', 'THE', 'WORD', 'SCEVRA', 'WHICH', 'MEANS', 'DIVIDED', 'SAYING', 'THAT', 'IT', 'OUGHT', 'TO', 'BE', 'PRONOUNCED', 'SCEURA', 'AND', 'SHE', 'ADDED', 'THAT', 'I', 'OUGHT', 'TO', 'BE', 'VERY', 'GLAD', 'TO', 'HAVE', 'LEARNED', 'SO', 'MUCH', 'ON', 'THE', 'FIRST', 'DAY', 'OF', 'MY', 'ARRIVAL', 'IN', 'PARIS', 'TELLING', 'ME', 'THAT', 'IT', 'WOULD', 'BE', 'AN', 'IMPORTANT', 'DAY', 'IN', 'MY', 'LIFE'] +3729-6852-0003-1030: hyp=['MADAME', 'CORRECTED', 'ME', 'WITH', 'A', 'PATRONIZING', 'AIR', 'FOR', 'MY', 'PRONUNCIATION', 'OF', 'THE', 'WORD', 'SCEVERA', 'WHICH', 'MEANS', 'DIVIDED', 'SAYING', 'THAT', 'IT', 'OUGHT', 'TO', 'BE', 'PRONOUNCED', 'SCURA', 'AND', 'SHE', 'ADDED', 'THAT', 'I', 'OUGHT', 'TO', 'BE', 'VERY', 'GLAD', 'TO', 'HAVE', 'LEARNED', 'SO', 'MUCH', 'ON', 'THE', 'FIRST', 'DAY', 'OF', 'MY', 'ARRIVAL', 'IN', 'PARIS', 'TELLING', 'ME', 'THAT', 'IT', 'WOULD', 'BE', 'AN', 'IMPORTANT', 'DAY', 'IN', 'MY', 'LIFE'] +3729-6852-0004-1031: ref=['HER', 'FACE', 'WAS', 'AN', 'ENIGMA', 'FOR', 'IT', 'INSPIRED', 'EVERYONE', 'WITH', 'THE', 'WARMEST', 'SYMPATHY', 'AND', 'YET', 'IF', 'YOU', 'EXAMINED', 'IT', 'ATTENTIVELY', 'THERE', 'WAS', 'NOT', 'ONE', 'BEAUTIFUL', 'FEATURE', 'SHE', 'COULD', 'NOT', 'BE', 'CALLED', 'HANDSOME', 'BUT', 'NO', 'ONE', 'COULD', 'HAVE', 'THOUGHT', 'HER', 'UGLY'] +3729-6852-0004-1031: hyp=['HER', 'FACE', 'WAS', 'AN', 'ENIGMA', 'FOR', 'IT', 'INSPIRED', 'EVERY', 'ONE', 'WITH', 'THE', 'WARMEST', 'SYMPATHY', 'AND', 'YET', 'IF', 'YOU', 'EXAMINED', 'IT', 'ATTENTIVELY', 'THERE', 'WAS', 'NOT', 'ONE', 'BEAUTIFUL', 'FEATURE', 'SHE', 'COULD', 'NOT', 'BE', 'CALLED', 'HANDSOME', 'BUT', 'NO', 'ONE', 'COULD', 'HAVE', 'THOUGHT', 'HER', 'UGLY'] +3729-6852-0005-1032: ref=['SILVIA', 'WAS', 'THE', 'ADORATION', 'OF', 'FRANCE', 'AND', 'HER', 'TALENT', 'WAS', 'THE', 'REAL', 'SUPPORT', 'OF', 'ALL', 'THE', 'COMEDIES', 'WHICH', 'THE', 'GREATEST', 'AUTHORS', 'WROTE', 'FOR', 'HER', 'ESPECIALLY', 'OF', 'THE', 'PLAYS', 'OF', 'MARIVAUX', 'FOR', 'WITHOUT', 'HER', 'HIS', 'COMEDIES', 'WOULD', 'NEVER', 'HAVE', 'GONE', 'TO', 'POSTERITY'] +3729-6852-0005-1032: hyp=['SYLVIA', 'WAS', 'THE', 'ADORATION', 'OF', 'FRANCE', 'AND', 'HER', 'TALENT', 'WAS', 'THE', 'REAL', 'SUPPORT', 'OF', 'ALL', 'THE', 'COMEDIES', 'WHICH', 'THE', 'GREATEST', 'AUTHORS', 'WROTE', 'FOR', 'HER', 'ESPECIALLY', 'OF', 'THE', 'PLAYS', 'OF', 'MAREVAUX', 'FOR', 'WITHOUT', 'HER', 'HIS', 'COMEDIES', 'WOULD', 'NEVER', 'HAVE', 'GONE', 'TO', 'POSTERITY'] +3729-6852-0006-1033: ref=['SILVIA', 'DID', 'NOT', 'THINK', 'THAT', 'HER', 'GOOD', 'CONDUCT', 'WAS', 'A', 'MERIT', 'FOR', 'SHE', 'KNEW', 'THAT', 'SHE', 'WAS', 'VIRTUOUS', 'ONLY', 'BECAUSE', 'HER', 'SELF', 'LOVE', 'COMPELLED', 'HER', 'TO', 'BE', 'SO', 'AND', 'SHE', 'NEVER', 'EXHIBITED', 'ANY', 'PRIDE', 'OR', 'ASSUMED', 'ANY', 'SUPERIORITY', 'TOWARDS', 'HER', 'THEATRICAL', 'SISTERS', 'ALTHOUGH', 'SATISFIED', 'TO', 'SHINE', 'BY', 'THEIR', 'TALENT', 'OR', 'THEIR', 'BEAUTY', 'THEY', 'CARED', 'LITTLE', 'ABOUT', 'RENDERING', 'THEMSELVES', 'CONSPICUOUS', 'BY', 'THEIR', 'VIRTUE'] +3729-6852-0006-1033: hyp=['SYLVIA', 'DID', 'NOT', 'THINK', 'THAT', 'HER', 'GOOD', 'CONDUCT', 'WAS', 'A', 'MERIT', 'FOR', 'SHE', 'KNEW', 'THAT', 'SHE', 'WAS', 'VIRTUOUS', 'ONLY', 'BECAUSE', 'HER', 'SELF', 'LOVE', 'COMPELLED', 'HER', 'TO', 'BE', 'SO', 'AND', 'SHE', 'NEVER', 'EXHIBITED', 'ANY', 'PRIDE', 'OR', 'ASSUMED', 'ANY', 'SUPERIORITY', 'TOWARDS', 'HER', 'THEATRICAL', 'SISTERS', 'ALTHOUGH', 'SATISFIED', 'TO', 'SHINE', 'BY', 'THEIR', 'TALENT', 'OR', 'THEIR', 'BEAUTY', 'THEY', 'CARED', 'LITTLE', 'ABOUT', 'RENDERING', 'THEMSELVES', 'CONSPICUOUS', 'BY', 'THEIR', 'VIRTUE'] +3729-6852-0007-1034: ref=['TWO', 'YEARS', 'BEFORE', 'HER', 'DEATH', 'I', 'SAW', 'HER', 'PERFORM', 'THE', 'CHARACTER', 'OF', 'MARIANNE', 'IN', 'THE', 'COMEDY', 'OF', 'MARIVAUX', 'AND', 'IN', 'SPITE', 'OF', 'HER', 'AGE', 'AND', 'DECLINING', 'HEALTH', 'THE', 'ILLUSION', 'WAS', 'COMPLETE'] +3729-6852-0007-1034: hyp=['TWO', 'YEARS', 'BEFORE', 'HER', 'DEATH', 'I', 'SAW', 'HER', 'PERFORM', 'THE', 'CHARACTER', 'OF', 'MARY', 'ANNE', 'IN', 'THE', 'COMEDY', 'OF', 'MARAVAUX', 'AND', 'IN', 'SPITE', 'OF', 'HER', 'AGE', 'AND', 'DECLINING', 'HEALTH', 'THE', 'ILLUSION', 'WAS', 'COMPLETE'] +3729-6852-0008-1035: ref=['SHE', 'WAS', 'HONOURABLY', 'BURIED', 'IN', 'THE', 'CHURCH', 'OF', 'SAINT', 'SAUVEUR', 'WITHOUT', 'THE', 'SLIGHTEST', 'OPPOSITION', 'FROM', 'THE', 'VENERABLE', 'PRIEST', 'WHO', 'FAR', 'FROM', 'SHARING', 'THE', 'ANTI', 'CHRISTAIN', 'INTOLERANCY', 'OF', 'THE', 'CLERGY', 'IN', 'GENERAL', 'SAID', 'THAT', 'HER', 'PROFESSION', 'AS', 'AN', 'ACTRESS', 'HAD', 'NOT', 'HINDERED', 'HER', 'FROM', 'BEING', 'A', 'GOOD', 'CHRISTIAN', 'AND', 'THAT', 'THE', 'EARTH', 'WAS', 'THE', 'COMMON', 'MOTHER', 'OF', 'ALL', 'HUMAN', 'BEINGS', 'AS', 'JESUS', 'CHRIST', 'HAD', 'BEEN', 'THE', 'SAVIOUR', 'OF', 'ALL', 'MANKIND'] +3729-6852-0008-1035: hyp=['SHE', 'WAS', 'HONOROURABLY', 'BURIED', 'IN', 'THE', 'CHURCH', 'OF', 'SAINT', 'SOUVERT', 'WITHOUT', 'THE', 'SLIGHTEST', 'OPPOSITION', 'FROM', 'THE', 'VENERABLE', 'PRIEST', 'WHO', 'FAR', 'FROM', 'SHARING', 'THE', 'ANTI', 'CHRISTIAN', 'INTOLERANCY', 'OF', 'THE', 'CLERGY', 'IN', 'GENERAL', 'SAID', 'THAT', 'HER', 'PROFESSION', 'AS', 'AN', 'ACTRESS', 'HAD', 'NOT', 'HINDERED', 'HER', 'FROM', 'BEING', 'A', 'GOOD', 'CHRISTIAN', 'AND', 'THAT', 'THE', 'EARTH', 'WAS', 'A', 'COMMON', 'MOTHER', 'OF', 'ALL', 'HUMAN', 'BEINGS', 'AS', 'JESUS', 'CHRIST', 'HAD', 'BEEN', 'THE', 'SAVIOUR', 'OF', 'ALL', 'MANKIND'] +3729-6852-0009-1036: ref=['YOU', 'WILL', 'FORGIVE', 'ME', 'DEAR', 'READER', 'IF', 'I', 'HAVE', 'MADE', 'YOU', 'ATTEND', 'THE', 'FUNERAL', 'OF', 'SILVIA', 'TEN', 'YEARS', 'BEFORE', 'HER', 'DEATH', 'BELIEVE', 'ME', 'I', 'HAVE', 'NO', 'INTENTION', 'OF', 'PERFORMING', 'A', 'MIRACLE', 'YOU', 'MAY', 'CONSOLE', 'YOURSELF', 'WITH', 'THE', 'IDEA', 'THAT', 'I', 'SHALL', 'SPARE', 'YOU', 'THAT', 'UNPLEASANT', 'TASK', 'WHEN', 'POOR', 'SILVIA', 'DIES'] +3729-6852-0009-1036: hyp=['YOU', 'WILL', 'FORGIVE', 'ME', 'DEAR', 'READER', 'IF', 'I', 'HAVE', 'MADE', 'YOU', 'ATTEND', 'THE', 'FUNERAL', 'OF', 'SYLVIA', 'TEN', 'YEARS', 'BEFORE', 'HER', 'DEATH', 'BELIEVE', 'ME', 'I', 'HAVE', 'NO', 'INTENTION', 'OF', 'PERFORMING', 'A', 'MIRACLE', 'YOU', 'MAY', 'CONSOLE', 'YOURSELF', 'WITH', 'THE', 'IDEA', 'THAT', 'I', 'SHALL', 'SPARE', 'YOU', 'THAT', 'UNPLEASANT', 'TASK', 'WHEN', 'POOR', 'SYLVIA', 'DIES'] +3729-6852-0010-1037: ref=['I', 'NEVER', 'HAD', 'ANY', 'FAMILY'] +3729-6852-0010-1037: hyp=['I', 'NEVER', 'HAD', 'ANY', 'FAMILY'] +3729-6852-0011-1038: ref=['I', 'HAD', 'A', 'NAME', 'I', 'BELIEVE', 'IN', 'MY', 'YOUNG', 'DAYS', 'BUT', 'I', 'HAVE', 'FORGOTTEN', 'IT', 'SINCE', 'I', 'HAVE', 'BEEN', 'IN', 'SERVICE'] +3729-6852-0011-1038: hyp=['I', 'HAD', 'A', 'NAME', 'I', 'BELIEVE', 'IN', 'MY', 'YOUNG', 'DAYS', 'BUT', 'I', 'HAVE', 'FORGOTTEN', 'IT', 'SINCE', 'I', 'HAVE', 'BEEN', 'IN', 'SERVICE'] +3729-6852-0012-1039: ref=['I', 'SHALL', 'CALL', 'YOU', 'ESPRIT'] +3729-6852-0012-1039: hyp=['I', 'SHALL', 'CALL', 'YOU', 'A', 'SPREE'] +3729-6852-0013-1040: ref=['YOU', 'DO', 'ME', 'A', 'GREAT', 'HONOUR'] +3729-6852-0013-1040: hyp=['YOU', 'DO', 'ME', 'A', 'GREAT', 'HONOR'] +3729-6852-0014-1041: ref=['HERE', 'GO', 'AND', 'GET', 'ME', 'CHANGE', 'FOR', 'A', 'LOUIS', 'I', 'HAVE', 'IT', 'SIR'] +3729-6852-0014-1041: hyp=['HERE', 'GO', 'AND', 'GET', 'ME', 'CHANGED', 'FOR', 'A', 'LOUIS', 'I', 'HAVE', 'IT', 'SIR'] +3729-6852-0015-1042: ref=['AT', 'YOUR', 'SERVICE', 'SIR'] +3729-6852-0015-1042: hyp=['AT', 'YOUR', 'SERVICE', 'SIR'] +3729-6852-0016-1043: ref=['MADAME', 'QUINSON', 'BESIDES', 'CAN', 'ANSWER', 'YOUR', 'ENQUIRIES'] +3729-6852-0016-1043: hyp=['MADAME', 'QUINCON', 'BESIDES', 'CAN', 'ANSWER', 'YOUR', 'INQUIRIES'] +3729-6852-0017-1044: ref=['I', 'SEE', 'A', 'QUANTITY', 'OF', 'CHAIRS', 'FOR', 'HIRE', 'AT', 'THE', 'RATE', 'OF', 'ONE', 'SOU', 'MEN', 'READING', 'THE', 'NEWSPAPER', 'UNDER', 'THE', 'SHADE', 'OF', 'THE', 'TREES', 'GIRLS', 'AND', 'MEN', 'BREAKFASTING', 'EITHER', 'ALONE', 'OR', 'IN', 'COMPANY', 'WAITERS', 'WHO', 'WERE', 'RAPIDLY', 'GOING', 'UP', 'AND', 'DOWN', 'A', 'NARROW', 'STAIRCASE', 'HIDDEN', 'UNDER', 'THE', 'FOLIAGE'] +3729-6852-0017-1044: hyp=['I', 'SEE', 'A', 'QUANTITY', 'OF', 'CHAIRS', 'FOR', 'HIRE', 'AT', 'THE', 'RATE', 'OF', 'ONE', 'SOUS', 'MEN', 'READING', 'THE', 'NEWSPAPER', 'UNDER', 'THE', 'SHADE', 'OF', 'THE', 'TREES', 'GIRLS', 'AND', 'MEN', 'BREAKFASTING', 'EITHER', 'ALONE', 'OR', 'IN', 'COMPANY', 'WAITERS', 'WHO', 'WERE', 'RAPIDLY', 'GOING', 'UP', 'AND', 'DOWN', 'A', 'NARROW', 'STAIRCASE', 'HIDDEN', 'UNDER', 'THE', 'FOLIAGE'] +3729-6852-0018-1045: ref=['I', 'SIT', 'DOWN', 'AT', 'A', 'SMALL', 'TABLE', 'A', 'WAITER', 'COMES', 'IMMEDIATELY', 'TO', 'ENQUIRE', 'MY', 'WISHES'] +3729-6852-0018-1045: hyp=['I', 'SIT', 'DOWN', 'AT', 'A', 'SMALL', 'TABLE', 'A', 'WAITER', 'COMES', 'IMMEDIATELY', 'TO', 'INQUIRE', 'MY', 'WISHES'] +3729-6852-0019-1046: ref=['I', 'TELL', 'HIM', 'TO', 'GIVE', 'ME', 'SOME', 'COFFEE', 'IF', 'IT', 'IS', 'GOOD'] +3729-6852-0019-1046: hyp=['I', 'TELL', 'HIM', 'TO', 'GIVE', 'ME', 'SOME', 'COFFEE', 'IF', 'IT', 'IS', 'GOOD'] +3729-6852-0020-1047: ref=['THEN', 'TURNING', 'TOWARDS', 'ME', 'HE', 'SAYS', 'THAT', 'I', 'LOOK', 'LIKE', 'A', 'FOREIGNER', 'AND', 'WHEN', 'I', 'SAY', 'THAT', 'I', 'AM', 'AN', 'ITALIAN', 'HE', 'BEGINS', 'TO', 'SPEAK', 'TO', 'ME', 'OF', 'THE', 'COURT', 'OF', 'THE', 'CITY', 'OF', 'THE', 'THEATRES', 'AND', 'AT', 'LAST', 'HE', 'OFFERS', 'TO', 'ACCOMPANY', 'ME', 'EVERYWHERE'] +3729-6852-0020-1047: hyp=['THEN', 'TURNING', 'TOWARDS', 'ME', 'HE', 'SAYS', 'THAT', 'I', 'LOOK', 'LIKE', 'A', 'FOREIGNER', 'AND', 'WHEN', 'I', 'SAY', 'THAT', 'I', 'AM', 'AN', 'ITALIAN', 'HE', 'BEGINS', 'TO', 'SPEAK', 'TO', 'ME', 'OF', 'THE', 'COURT', 'THE', 'CITY', 'OF', 'THE', 'THEATRES', 'AND', 'AT', 'LAST', 'HE', 'OFFERS', 'TO', 'ACCOMPANY', 'ME', 'EVERYWHERE'] +3729-6852-0021-1048: ref=['I', 'THANK', 'HIM', 'AND', 'TAKE', 'MY', 'LEAVE'] +3729-6852-0021-1048: hyp=['I', 'THANK', 'HIM', 'AND', 'TAKE', 'MY', 'LEAVE'] +3729-6852-0022-1049: ref=['I', 'ADDRESS', 'HIM', 'IN', 'ITALIAN', 'AND', 'HE', 'ANSWERS', 'VERY', 'WITTILY', 'BUT', 'HIS', 'WAY', 'OF', 'SPEAKING', 'MAKES', 'ME', 'SMILE', 'AND', 'I', 'TELL', 'HIM', 'WHY'] +3729-6852-0022-1049: hyp=['I', 'ADDRESS', 'HIM', 'IN', 'ITALIAN', 'AND', 'HE', 'ANSWERS', 'VERY', 'WITTILY', 'BUT', 'HIS', 'WAY', 'OF', 'SPEAKING', 'MAKES', 'ME', 'SMILE', 'AND', 'I', 'TELL', 'HIM', 'WHY'] +3729-6852-0023-1050: ref=['MY', 'REMARK', 'PLEASES', 'HIM', 'BUT', 'I', 'SOON', 'PROVE', 'TO', 'HIM', 'THAT', 'IT', 'IS', 'NOT', 'THE', 'RIGHT', 'WAY', 'TO', 'SPEAK', 'HOWEVER', 'PERFECT', 'MAY', 'HAVE', 'BEEN', 'THE', 'LANGUAGE', 'OF', 'THAT', 'ANCIENT', 'WRITER'] +3729-6852-0023-1050: hyp=['MY', 'REMARK', 'PLEASES', 'HIM', 'BUT', 'I', 'SOON', 'PROVE', 'TO', 'HIM', 'THAT', 'IT', 'IS', 'NOT', 'THE', 'RIGHT', 'WAY', 'TO', 'SPEAK', 'HOWEVER', 'PERFECT', 'MAY', 'HAVE', 'BEEN', 'THE', 'LANGUAGE', 'OF', 'THAT', 'ANCIENT', 'WRITER'] +3729-6852-0024-1051: ref=['I', 'SEE', 'A', 'CROWD', 'IN', 'ONE', 'CORNER', 'OF', 'THE', 'GARDEN', 'EVERYBODY', 'STANDING', 'STILL', 'AND', 'LOOKING', 'UP'] +3729-6852-0024-1051: hyp=['I', 'SEE', 'A', 'CROWD', 'IN', 'ONE', 'CORNER', 'OF', 'THE', 'GARDEN', 'EVERYBODY', 'STANDING', 'STILL', 'AND', 'LOOKING', 'UP'] +3729-6852-0025-1052: ref=['IS', 'THERE', 'NOT', 'A', 'MERIDIAN', 'EVERYWHERE'] +3729-6852-0025-1052: hyp=['IS', 'THERE', 'NOT', 'A', 'MERIDIAN', 'EVERYWHERE'] +3729-6852-0026-1053: ref=['YES', 'BUT', 'THE', 'MERIDIAN', 'OF', 'THE', 'PALAIS', 'ROYAL', 'IS', 'THE', 'MOST', 'EXACT'] +3729-6852-0026-1053: hyp=['YES', 'BUT', 'THE', 'MERIDIAN', 'OF', 'THE', 'PALAIS', 'ROYAL', 'IS', 'THE', 'MOST', 'EXACT'] +3729-6852-0027-1054: ref=['THAT', 'IS', 'TRUE', 'BADAUDERIE'] +3729-6852-0027-1054: hyp=['THAT', 'IS', 'TRUE', 'BADDELT', 'GRI'] +3729-6852-0028-1055: ref=['ALL', 'THESE', 'HONEST', 'PERSONS', 'ARE', 'WAITING', 'THEIR', 'TURN', 'TO', 'GET', 'THEIR', 'SNUFF', 'BOXES', 'FILLED'] +3729-6852-0028-1055: hyp=['ALL', 'THESE', 'HONEST', 'PERSONS', 'ARE', 'WAITING', 'THEIR', 'TURN', 'TO', 'GET', 'THEIR', 'SNUFF', 'BOXES', 'FILLED'] +3729-6852-0029-1056: ref=['IT', 'IS', 'SOLD', 'EVERYWHERE', 'BUT', 'FOR', 'THE', 'LAST', 'THREE', 'WEEKS', 'NOBODY', 'WILL', 'USE', 'ANY', 'SNUFF', 'BUT', 'THAT', 'SOLD', 'AT', 'THE', 'CIVET', 'CAT'] +3729-6852-0029-1056: hyp=['IT', 'IS', 'SOLD', 'EVERYWHERE', 'BUT', 'FOR', 'THE', 'LAST', 'THREE', 'WEEKS', 'NOBODY', 'WILL', 'USE', 'ANY', 'SNUFF', 'BUT', 'THAT', 'SOLD', 'AT', 'THE', 'CEVETTE', 'CAT'] +3729-6852-0030-1057: ref=['IS', 'IT', 'BETTER', 'THAN', 'ANYWHERE', 'ELSE'] +3729-6852-0030-1057: hyp=['IS', 'IT', 'BETTER', 'THAN', 'ANYWHERE', 'ELSE'] +3729-6852-0031-1058: ref=['BUT', 'HOW', 'DID', 'SHE', 'MANAGE', 'TO', 'RENDER', 'IT', 'SO', 'FASHIONABLE'] +3729-6852-0031-1058: hyp=['BUT', 'HOW', 'DID', 'SHE', 'MANAGE', 'TO', 'RENDER', 'IT', 'SO', 'FASHIONABLE'] +3729-6852-0032-1059: ref=['SIMPLY', 'BY', 'STOPPING', 'HER', 'CARRIAGE', 'TWO', 'OR', 'THREE', 'TIMES', 'BEFORE', 'THE', 'SHOP', 'TO', 'HAVE', 'HER', 'SNUFF', 'BOX', 'FILLED', 'AND', 'BY', 'SAYING', 'ALOUD', 'TO', 'THE', 'YOUNG', 'GIRL', 'WHO', 'HANDED', 'BACK', 'THE', 'BOX', 'THAT', 'HER', 'SNUFF', 'WAS', 'THE', 'VERY', 'BEST', 'IN', 'PARIS'] +3729-6852-0032-1059: hyp=['SIMPLY', 'BY', 'STOPPING', 'HER', 'CARRIAGE', 'TWO', 'OR', 'THREE', 'TIMES', 'BEFORE', 'THE', 'SHOP', 'TO', 'HAVE', 'HER', 'SNUFF', 'BOX', 'FILLED', 'AND', 'BY', 'SAYING', 'ALOUD', 'TO', 'THE', 'YOUNG', 'GIRL', 'WHO', 'HANDED', 'BACK', 'THE', 'BOX', 'THAT', 'HER', 'SNUFF', 'WAS', 'THE', 'VERY', 'BEST', 'IN', 'PARIS'] +3729-6852-0033-1060: ref=['YOU', 'ARE', 'NOW', 'IN', 'THE', 'ONLY', 'COUNTRY', 'IN', 'THE', 'WORLD', 'WHERE', 'WIT', 'CAN', 'MAKE', 'A', 'FORTUNE', 'BY', 'SELLING', 'EITHER', 'A', 'GENUINE', 'OR', 'A', 'FALSE', 'ARTICLE', 'IN', 'THE', 'FIRST', 'CASE', 'IT', 'RECEIVES', 'THE', 'WELCOME', 'OF', 'INTELLIGENT', 'AND', 'TALENTED', 'PEOPLE', 'AND', 'IN', 'THE', 'SECOND', 'FOOLS', 'ARE', 'ALWAYS', 'READY', 'TO', 'REWARD', 'IT', 'FOR', 'SILLINESS', 'IS', 'TRULY', 'A', 'CHARACTERISTIC', 'OF', 'THE', 'PEOPLE', 'HERE', 'AND', 'HOWEVER', 'WONDERFUL', 'IT', 'MAY', 'APPEAR', 'SILLINESS', 'IS', 'THE', 'DAUGHTER', 'OF', 'WIT'] +3729-6852-0033-1060: hyp=['YOU', 'ARE', 'NOW', 'IN', 'THE', 'ONLY', 'COUNTRY', 'IN', 'THE', 'WORLD', 'WHERE', 'WIT', 'CAN', 'MAKE', 'A', 'FORTUNE', 'BY', 'SELLING', 'EITHER', 'A', 'GENUINE', 'OR', 'A', 'FALSE', 'ARTICLE', 'IN', 'THE', 'FIRST', 'CASE', 'IT', 'RECEIVES', 'THE', 'WELCOME', 'OF', 'INTELLIGENT', 'AND', 'TALENTED', 'PEOPLE', 'AND', 'IN', 'THE', 'SECOND', 'FOOLS', 'ARE', 'ALWAYS', 'READY', 'TO', 'REWARD', 'IT', 'FOR', 'SILLINESS', 'IS', 'TRULY', 'A', 'CHARACTERISTIC', 'OF', 'THE', 'PEOPLE', 'HERE', 'AND', 'HOWEVER', 'WONDERFUL', 'IT', 'MAY', 'APPEAR', 'SILLINESS', 'IS', 'THE', 'DAUGHTER', 'OF', 'WIT'] +3729-6852-0034-1061: ref=['LET', 'A', 'MAN', 'RUN', 'AND', 'EVERYBODY', 'WILL', 'RUN', 'AFTER', 'HIM', 'THE', 'CROWD', 'WILL', 'NOT', 'STOP', 'UNLESS', 'THE', 'MAN', 'IS', 'PROVED', 'TO', 'BE', 'MAD', 'BUT', 'TO', 'PROVE', 'IT', 'IS', 'INDEED', 'A', 'DIFFICULT', 'TASK', 'BECAUSE', 'WE', 'HAVE', 'A', 'CROWD', 'OF', 'MEN', 'WHO', 'MAD', 'FROM', 'THEIR', 'BIRTH', 'ARE', 'STILL', 'CONSIDERED', 'WISE'] +3729-6852-0034-1061: hyp=['LET', 'A', 'MAN', 'RUN', 'AND', 'EVERYBODY', 'WILL', 'RUN', 'AFTER', 'HIM', 'THE', 'CROWD', 'WILL', 'NOT', 'STOP', 'UNLESS', 'THE', 'MAN', 'IS', 'PROVED', 'TO', 'BE', 'MAD', 'BUT', 'TO', 'PROVE', 'IT', 'IS', 'INDEED', 'A', 'DIFFICULT', 'TASK', 'BECAUSE', 'WE', 'HAVE', 'A', 'CROWD', 'OF', 'MEN', 'WHO', 'MAD', 'FROM', 'THEIR', 'BIRTH', 'ARE', 'STILL', 'CONSIDERED', 'WISE'] +3729-6852-0035-1062: ref=['IT', 'SEEMS', 'TO', 'ME', 'I', 'REPLIED', 'THAT', 'SUCH', 'APPROVAL', 'SUCH', 'RATIFICATION', 'OF', 'THE', 'OPINION', 'EXPRESSED', 'BY', 'THE', 'KING', 'THE', 'PRINCES', 'OF', 'THE', 'BLOOD', 'ET', 'CETERA', 'IS', 'RATHER', 'A', 'PROOF', 'OF', 'THE', 'AFFECTION', 'FELT', 'FOR', 'THEM', 'BY', 'THE', 'NATION', 'FOR', 'THE', 'FRENCH', 'CARRY', 'THAT', 'AFFECTION', 'TO', 'SUCH', 'AN', 'EXTENT', 'THAT', 'THEY', 'BELIEVE', 'THEM', 'INFALLIBLE'] +3729-6852-0035-1062: hyp=['IT', 'SEEMS', 'TO', 'ME', 'I', 'REPLIED', 'THAT', 'SUCH', 'APPROVAL', 'SUCH', 'RATIFICATION', 'OF', 'THE', 'OPINION', 'EXPRESSED', 'BY', 'THE', 'KING', 'THE', 'PRINCES', 'OF', 'THE', 'BLOOD', 'ET', 'CETERA', 'IS', 'RATHER', 'A', 'PROOF', 'OF', 'THE', 'AFFECTION', 'FELT', 'FOR', 'THEM', 'BY', 'THE', 'NATION', 'FOR', 'THE', 'FRENCH', 'CARRY', 'THAT', 'AFFECTION', 'TO', 'SUCH', 'AN', 'EXTENT', 'THAT', 'THEY', 'BELIEVE', 'THEM', 'INFALLIBLE'] +3729-6852-0036-1063: ref=['WHEN', 'THE', 'KING', 'COMES', 'TO', 'PARIS', 'EVERYBODY', 'CALLS', 'OUT', 'VIVE', 'LE', 'ROI'] +3729-6852-0036-1063: hyp=['WHEN', 'THE', 'KING', 'COMES', 'TO', 'PARIS', 'EVERYBODY', 'CALLS', 'OUT', 'VIVE', 'LA', 'ROY'] +3729-6852-0037-1064: ref=['SHE', 'INTRODUCED', 'ME', 'TO', 'ALL', 'HER', 'GUESTS', 'AND', 'GAVE', 'ME', 'SOME', 'PARTICULARS', 'RESPECTING', 'EVERY', 'ONE', 'OF', 'THEM'] +3729-6852-0037-1064: hyp=['SHE', 'INTRODUCED', 'ME', 'TO', 'ALL', 'HER', 'GUESTS', 'AND', 'GAVE', 'ME', 'SOME', 'PARTICULARS', 'RESPECTING', 'EVERY', 'ONE', 'OF', 'THEM'] +3729-6852-0038-1065: ref=['WHAT', 'SIR', 'I', 'SAID', 'TO', 'HIM', 'AM', 'I', 'FORTUNATE', 'ENOUGH', 'TO', 'SEE', 'YOU'] +3729-6852-0038-1065: hyp=['WHAT', 'SIR', 'I', 'SAID', 'TO', 'HIM', 'AM', 'I', 'FORTUNATE', 'ENOUGH', 'TO', 'SEE', 'YOU'] +3729-6852-0039-1066: ref=['HE', 'HIMSELF', 'RECITED', 'THE', 'SAME', 'PASSAGE', 'IN', 'FRENCH', 'AND', 'POLITELY', 'POINTED', 'OUT', 'THE', 'PARTS', 'IN', 'WHICH', 'HE', 'THOUGHT', 'THAT', 'I', 'HAD', 'IMPROVED', 'ON', 'THE', 'ORIGINAL'] +3729-6852-0039-1066: hyp=['HE', 'HIMSELF', 'RECITED', 'THE', 'SAME', 'PASSAGE', 'IN', 'FRENCH', 'AND', 'POLITELY', 'POINTED', 'OUT', 'THE', 'PARTS', 'IN', 'WHICH', 'HE', 'THOUGHT', 'THAT', 'I', 'HAD', 'IMPROVED', 'ON', 'THE', 'ORIGINAL'] +3729-6852-0040-1067: ref=['FOR', 'THE', 'FIRST', 'DAY', 'SIR', 'I', 'THINK', 'THAT', 'WHAT', 'YOU', 'HAVE', 'DONE', 'GIVES', 'GREAT', 'HOPES', 'OF', 'YOU', 'AND', 'WITHOUT', 'ANY', 'DOUBT', 'YOU', 'WILL', 'MAKE', 'RAPID', 'PROGRESS'] +3729-6852-0040-1067: hyp=['FOR', 'THE', 'FIRST', 'DAY', 'SIR', 'I', 'THINK', 'THAT', 'WHAT', 'YOU', 'HAVE', 'DONE', 'GIVES', 'GREAT', 'HOPES', 'OF', 'YOU', 'AND', 'WITHOUT', 'ANY', 'DOUBT', 'YOU', 'WILL', 'MAKE', 'RAPID', 'PROGRESS'] +3729-6852-0041-1068: ref=['I', 'BELIEVE', 'IT', 'SIR', 'AND', 'THAT', 'IS', 'WHAT', 'I', 'FEAR', 'THEREFORE', 'THE', 'PRINCIPAL', 'OBJECT', 'OF', 'MY', 'VISIT', 'HERE', 'IS', 'TO', 'DEVOTE', 'MYSELF', 'ENTIRELY', 'TO', 'THE', 'STUDY', 'OF', 'THE', 'FRENCH', 'LANGUAGE'] +3729-6852-0041-1068: hyp=['I', 'BELIEVE', 'IT', 'SIR', 'AND', 'THAT', 'IS', 'WHAT', 'I', 'FEAR', 'THEREFORE', 'THE', 'PRINCIPAL', 'OBJECT', 'OF', 'MY', 'VISIT', 'HERE', 'IS', 'TO', 'DEVOTE', 'MYSELF', 'ENTIRELY', 'TO', 'THE', 'STUDY', 'OF', 'THE', 'FRENCH', 'LANGUAGE'] +3729-6852-0042-1069: ref=['I', 'AM', 'A', 'VERY', 'UNPLEASANT', 'PUPIL', 'ALWAYS', 'ASKING', 'QUESTIONS', 'CURIOUS', 'TROUBLESOME', 'INSATIABLE', 'AND', 'EVEN', 'SUPPOSING', 'THAT', 'I', 'COULD', 'MEET', 'WITH', 'THE', 'TEACHER', 'I', 'REQUIRE', 'I', 'AM', 'AFRAID', 'I', 'AM', 'NOT', 'RICH', 'ENOUGH', 'TO', 'PAY', 'HIM'] +3729-6852-0042-1069: hyp=['I', 'AM', 'A', 'VERY', 'UNPLEASANT', 'PUPIL', 'ALWAYS', 'ASKING', 'QUESTIONS', 'CURIOUS', 'TROUBLESOME', 'INSATIABLE', 'AND', 'EVEN', 'SUPPOSING', 'THAT', 'I', 'COULD', 'MEET', 'WITH', 'THE', 'TEACHER', 'I', 'REQUIRE', 'I', 'AM', 'AFRAID', 'I', 'AM', 'NOT', 'RICH', 'ENOUGH', 'TO', 'PAY', 'HIM'] +3729-6852-0043-1070: ref=['I', 'RESIDE', 'IN', 'THE', 'MARAIS', 'RUE', 'DE', 'DOUZE', 'PORTES'] +3729-6852-0043-1070: hyp=['I', 'RESIDE', 'IN', 'THE', 'MARAIS', 'RUE', 'DES', 'DUSPORT'] +3729-6852-0044-1071: ref=['I', 'WILL', 'MAKE', 'YOU', 'TRANSLATE', 'THEM', 'INTO', 'FRENCH', 'AND', 'YOU', 'NEED', 'NOT', 'BE', 'AFRAID', 'OF', 'MY', 'FINDING', 'YOU', 'INSATIABLE'] +3729-6852-0044-1071: hyp=['I', 'WILL', 'MAKE', 'YOU', 'TRANSLATE', 'THEM', 'INTO', 'FRENCH', 'AND', 'YOU', 'NEED', 'NOT', 'BE', 'AFRAID', 'OF', 'MY', 'FINDING', 'YOU', 'INSATIABLE'] +3729-6852-0045-1072: ref=['HE', 'HAD', 'A', 'GOOD', 'APPETITE', 'COULD', 'TELL', 'A', 'GOOD', 'STORY', 'WITHOUT', 'LAUGHING', 'WAS', 'CELEBRATED', 'FOR', 'HIS', 'WITTY', 'REPARTEES', 'AND', 'HIS', 'SOCIABLE', 'MANNERS', 'BUT', 'HE', 'SPENT', 'HIS', 'LIFE', 'AT', 'HOME', 'SELDOM', 'GOING', 'OUT', 'AND', 'SEEING', 'HARDLY', 'ANYONE', 'BECAUSE', 'HE', 'ALWAYS', 'HAD', 'A', 'PIPE', 'IN', 'HIS', 'MOUTH', 'AND', 'WAS', 'SURROUNDED', 'BY', 'AT', 'LEAST', 'TWENTY', 'CATS', 'WITH', 'WHICH', 'HE', 'WOULD', 'AMUSE', 'HIMSELF', 'ALL', 'DAY'] +3729-6852-0045-1072: hyp=['HE', 'HAD', 'A', 'GOOD', 'APPETITE', 'COULD', 'TELL', 'A', 'GOOD', 'STORY', 'WITHOUT', 'LAUGHING', 'WAS', 'CELEBRATED', 'FOR', 'HIS', 'WITTY', 'REPARTEES', 'AND', 'HIS', 'SOCIABLE', 'MANNERS', 'BUT', 'HE', 'SPENT', 'HIS', 'LIFE', 'AT', 'HOME', 'SELDOM', 'GOING', 'OUT', 'AND', 'SEEING', 'HARDLY', 'ANY', 'ONE', 'BECAUSE', 'HE', 'ALWAYS', 'HAD', 'A', 'PIPE', 'IN', 'HIS', 'MOUTH', 'AND', 'WAS', 'SURROUNDED', 'BY', 'AT', 'LEAST', 'TWENTY', 'CATS', 'WITH', 'WHICH', 'HE', 'WOULD', 'AMUSE', 'HIMSELF', 'ALL', 'DAY'] +3729-6852-0046-1073: ref=['HIS', 'HOUSEKEEPER', 'HAD', 'THE', 'MANAGEMENT', 'OF', 'EVERYTHING', 'SHE', 'NEVER', 'ALLOWED', 'HIM', 'TO', 'BE', 'IN', 'NEED', 'OF', 'ANYTHING', 'AND', 'SHE', 'GAVE', 'NO', 'ACCOUNT', 'OF', 'HIS', 'MONEY', 'WHICH', 'SHE', 'KEPT', 'ALTOGETHER', 'BECAUSE', 'HE', 'NEVER', 'ASKED', 'HER', 'TO', 'RENDER', 'ANY', 'ACCOUNTS'] +3729-6852-0046-1073: hyp=['HIS', 'HOUSEKEEPER', 'HAD', 'THE', 'MANAGEMENT', 'OF', 'EVERYTHING', 'SHE', 'NEVER', 'ALLOWED', 'HIM', 'TO', 'BE', 'IN', 'NEED', 'OF', 'ANYTHING', 'AND', 'SHE', 'GAVE', 'NO', 'ACCOUNT', 'OF', 'HIS', 'MONEY', 'WHICH', 'SHE', 'KEPT', 'ALTOGETHER', 'BECAUSE', 'HE', 'NEVER', 'ASKED', 'HER', 'TO', 'RENDER', 'ANY', 'ACCOUNTS'] +4077-13751-0000-1074: ref=['ON', 'THE', 'SIXTH', 'OF', 'APRIL', 'EIGHTEEN', 'THIRTY', 'THE', 'CHURCH', 'OF', 'JESUS', 'CHRIST', 'OF', 'LATTER', 'DAY', 'SAINTS', 'WAS', 'FORMALLY', 'ORGANIZED', 'AND', 'THUS', 'TOOK', 'ON', 'A', 'LEGAL', 'EXISTENCE'] +4077-13751-0000-1074: hyp=['ON', 'THE', 'SIXTH', 'OF', 'APRIL', 'EIGHTEEN', 'THIRTY', 'THE', 'CHURCH', 'OF', 'JESUS', 'CHRIST', 'OF', 'LATTER', 'DAY', 'SAINTS', 'WAS', 'FORMALLY', 'ORGANIZED', 'AND', 'THUS', 'TOOK', 'ON', 'A', 'LEGAL', 'EXISTENCE'] +4077-13751-0001-1075: ref=['ITS', 'ORIGIN', 'WAS', 'SMALL', 'A', 'GERM', 'AN', 'INSIGNIFICANT', 'SEED', 'HARDLY', 'TO', 'BE', 'THOUGHT', 'OF', 'AS', 'LIKELY', 'TO', 'AROUSE', 'OPPOSITION'] +4077-13751-0001-1075: hyp=['ITS', 'ORIGIN', 'WAS', 'SMALL', 'A', 'GERM', 'AN', 'INSIGNIFICANT', 'SEED', 'HARDLY', 'TO', 'BE', 'THOUGHT', 'OF', 'AS', 'LIKELY', 'TO', 'AROUSE', 'OPPOSITION'] +4077-13751-0002-1076: ref=['INSTEAD', 'OF', 'BUT', 'SIX', 'REGULARLY', 'AFFILIATED', 'MEMBERS', 'AND', 'AT', 'MOST', 'TWO', 'SCORE', 'OF', 'ADHERENTS', 'THE', 'ORGANIZATION', 'NUMBERS', 'TODAY', 'MANY', 'HUNDRED', 'THOUSAND', 'SOULS'] +4077-13751-0002-1076: hyp=['INSTEAD', 'OF', 'BUT', 'SIX', 'REGULARLY', 'AFFILIATED', 'MEMBERS', 'AND', 'AT', 'MOST', 'TWO', 'SCORE', 'OF', 'ADHERENTS', 'THE', 'ORGANIZATION', 'NUMBERS', 'TO', 'DAY', 'MANY', 'HUNDRED', 'THOUSAND', 'SOULS'] +4077-13751-0003-1077: ref=['IN', 'PLACE', 'OF', 'A', 'SINGLE', 'HAMLET', 'IN', 'THE', 'SMALLEST', 'CORNER', 'OF', 'WHICH', 'THE', 'MEMBERS', 'COULD', 'HAVE', 'CONGREGATED', 'THERE', 'NOW', 'ARE', 'ABOUT', 'SEVENTY', 'STAKES', 'OF', 'ZION', 'AND', 'ABOUT', 'SEVEN', 'HUNDRED', 'ORGANIZED', 'WARDS', 'EACH', 'WARD', 'AND', 'STAKE', 'WITH', 'ITS', 'FULL', 'COMPLEMENT', 'OF', 'OFFICERS', 'AND', 'PRIESTHOOD', 'ORGANIZATIONS'] +4077-13751-0003-1077: hyp=['IN', 'PLACE', 'OF', 'A', 'SINGLE', 'HAMLET', 'IN', 'THE', 'SMALLEST', 'CORNER', 'OF', 'WHICH', 'THE', 'MEMBERS', 'COULD', 'HAVE', 'CONGREGATED', 'THERE', 'NOW', 'ARE', 'ABOUT', 'SEVENTY', 'STAKES', 'OF', 'ZION', 'AND', 'ABOUT', 'SEVEN', 'HUNDRED', 'ORGANIZED', 'WARDS', 'EACH', 'WARD', 'AND', 'STAKE', 'WITH', 'ITS', 'FULL', 'COMPLEMENT', 'OF', 'OFFICERS', 'AND', 'PRIESTHOOD', 'ORGANIZATIONS'] +4077-13751-0004-1078: ref=['THE', 'PRACTISE', 'OF', 'GATHERING', 'ITS', 'PROSELYTES', 'INTO', 'ONE', 'PLACE', 'PREVENTS', 'THE', 'BUILDING', 'UP', 'AND', 'STRENGTHENING', 'OF', 'FOREIGN', 'BRANCHES', 'AND', 'INASMUCH', 'AS', 'EXTENSIVE', 'AND', 'STRONG', 'ORGANIZATIONS', 'ARE', 'SELDOM', 'MET', 'WITH', 'ABROAD', 'VERY', 'ERRONEOUS', 'IDEAS', 'EXIST', 'CONCERNING', 'THE', 'STRENGTH', 'OF', 'THE', 'CHURCH'] +4077-13751-0004-1078: hyp=['THE', 'PRACTICE', 'OF', 'GATHERING', 'ITS', 'PROSELYTES', 'INTO', 'ONE', 'PLACE', 'PREVENTS', 'THE', 'BUILDING', 'UP', 'AND', 'STRENGTHENING', 'OF', 'FOREIGN', 'BRANCHES', 'AND', 'INASMUCH', 'AS', 'EXTENSIVE', 'AND', 'STRONG', 'ORGANIZATIONS', 'ARE', 'SELDOM', 'MET', 'WITH', 'ABROAD', 'VERY', 'ERRONEOUS', 'IDEAS', 'EXIST', 'CONCERNING', 'THE', 'STRENGTH', 'OF', 'THE', 'CHURCH'] +4077-13751-0005-1079: ref=['NEVERTHELESS', 'THE', 'MUSTARD', 'SEED', 'AMONG', 'THE', 'SMALLEST', 'OF', 'ALL', 'SEEDS', 'HAS', 'ATTAINED', 'THE', 'PROPORTIONS', 'OF', 'A', 'TREE', 'AND', 'THE', 'BIRDS', 'OF', 'THE', 'AIR', 'ARE', 'NESTING', 'IN', 'ITS', 'BRANCHES', 'THE', 'ACORN', 'IS', 'NOW', 'AN', 'OAK', 'OFFERING', 'PROTECTION', 'AND', 'THE', 'SWEETS', 'OF', 'SATISFACTION', 'TO', 'EVERY', 'EARNEST', 'PILGRIM', 'JOURNEYING', 'ITS', 'WAY', 'FOR', 'TRUTH'] +4077-13751-0005-1079: hyp=['NEVERTHELESS', 'THE', 'MUSTARD', 'SEED', 'AMONG', 'THE', 'SMALLEST', 'OF', 'ALL', 'SEEDS', 'HAS', 'ATTAINED', 'THE', 'PROPORTIONS', 'OF', 'A', 'TREE', 'AND', 'THE', 'BIRDS', 'OF', 'THE', 'AIR', 'ARE', 'NESTING', 'IN', 'ITS', 'BRANCHES', 'THE', 'ACORN', 'IS', 'NOW', 'AN', 'OAK', 'OFFERING', 'PROTECTION', 'AND', 'THE', 'SWEETS', 'OF', 'SATISFACTION', 'TO', 'EVERY', 'EARNEST', 'PILGRIM', 'JOURNEYING', 'ITS', 'WAY', 'FOR', 'TRUTH'] +4077-13751-0006-1080: ref=['THEIR', 'EYES', 'WERE', 'FROM', 'THE', 'FIRST', 'TURNED', 'IN', 'ANTICIPATION', 'TOWARD', 'THE', 'EVENING', 'SUN', 'NOT', 'MERELY', 'THAT', 'THE', 'WORK', 'OF', 'PROSELYTING', 'SHOULD', 'BE', 'CARRIED', 'ON', 'IN', 'THE', 'WEST', 'BUT', 'THAT', 'THE', 'HEADQUARTERS', 'OF', 'THE', 'CHURCH', 'SHOULD', 'BE', 'THERE', 'ESTABLISHED'] +4077-13751-0006-1080: hyp=['THEIR', 'EYES', 'WERE', 'FROM', 'THE', 'FIRST', 'TURNED', 'IN', 'ANTICIPATION', 'TOWARD', 'THE', 'EVENING', 'SUN', 'NOT', 'MERELY', 'THAT', 'THE', 'WORK', 'OF', 'PROSELYTING', 'SHOULD', 'BE', 'CARRIED', 'ON', 'IN', 'THE', 'WEST', 'BUT', 'THAT', 'THE', 'HEADQUARTERS', 'OF', 'THE', 'CHURCH', 'SHOULD', 'BE', 'THERE', 'ESTABLISHED'] +4077-13751-0007-1081: ref=['THE', 'BOOK', 'OF', 'MORMON', 'HAD', 'TAUGHT', 'THE', 'PEOPLE', 'THE', 'TRUE', 'ORIGIN', 'AND', 'DESTINY', 'OF', 'THE', 'AMERICAN', 'INDIANS', 'AND', 'TOWARD', 'THIS', 'DARK', 'SKINNED', 'REMNANT', 'OF', 'A', 'ONCE', 'MIGHTY', 'PEOPLE', 'THE', 'MISSIONARIES', 'OF', 'MORMONISM', 'EARLY', 'TURNED', 'THEIR', 'EYES', 'AND', 'WITH', 'THEIR', 'EYES', 'WENT', 'THEIR', 'HEARTS', 'AND', 'THEIR', 'HOPES'] +4077-13751-0007-1081: hyp=['THE', 'BOOK', 'A', 'MORMON', 'HAD', 'TAUGHT', 'THE', 'PEOPLE', 'THE', 'TRUE', 'ORIGIN', 'AND', 'DESTINY', 'OF', 'THE', 'AMERICAN', 'INDIANS', 'AND', 'TOWARD', 'THIS', 'DARK', 'SKINNED', 'REMNANT', 'OF', 'A', 'ONCE', 'MIGHTY', 'PEOPLE', 'THE', 'MISSIONARIES', 'OF', 'MORMONISM', 'EARLY', 'TURNED', 'THEIR', 'EYES', 'AND', 'WITH', 'THEIR', 'EYES', 'WENT', 'THEIR', 'HEARTS', 'AND', 'THEIR', 'HOPES'] +4077-13751-0008-1082: ref=['IT', 'IS', 'NOTABLE', 'THAT', 'THE', 'INDIAN', 'TRIBES', 'HAVE', 'GENERALLY', 'REGARDED', 'THE', 'RELIGION', 'OF', 'THE', 'LATTER', 'DAY', 'SAINTS', 'WITH', 'FAVOR', 'SEEING', 'IN', 'THE', 'BOOK', 'OF', 'MORMON', 'STRIKING', 'AGREEMENT', 'WITH', 'THEIR', 'OWN', 'TRADITIONS'] +4077-13751-0008-1082: hyp=['IT', 'IS', 'NOTABLE', 'THAT', 'THE', 'INDIAN', 'TRIBES', 'HAVE', 'GENERALLY', 'REGARDED', 'THE', 'RELIGION', 'OF', 'THE', 'LATTER', 'DAY', 'SAINTS', 'WITH', 'FAVOR', 'SEEING', 'IN', 'THE', 'BOOK', 'OF', 'MORMON', 'STRIKING', 'AGREEMENT', 'WITH', 'THEIR', 'OWN', 'TRADITIONS'] +4077-13751-0009-1083: ref=['THE', 'FIRST', 'WELL', 'ESTABLISHED', 'SEAT', 'OF', 'THE', 'CHURCH', 'WAS', 'IN', 'THE', 'PRETTY', 'LITTLE', 'TOWN', 'OF', 'KIRTLAND', 'OHIO', 'ALMOST', 'WITHIN', 'SIGHT', 'OF', 'LAKE', 'ERIE', 'AND', 'HERE', 'SOON', 'ROSE', 'THE', 'FIRST', 'TEMPLE', 'OF', 'MODERN', 'TIMES'] +4077-13751-0009-1083: hyp=['THE', 'FIRST', 'WELL', 'ESTABLISHED', 'SEAT', 'OF', 'THE', 'CHURCH', 'WAS', 'IN', 'THE', 'PRETTY', 'LITTLE', 'TOWN', 'OF', 'PORTLAND', 'OHIO', 'ALMOST', 'WITHIN', 'SIGHT', 'OF', 'LAKE', 'ERIE', 'AND', 'HERE', 'SOON', 'ROSE', 'THE', 'FIRST', 'TEMPLE', 'OF', 'MODERN', 'TIMES'] +4077-13751-0010-1084: ref=['TO', 'THE', 'FERVENT', 'LATTER', 'DAY', 'SAINT', 'A', 'TEMPLE', 'IS', 'NOT', 'SIMPLY', 'A', 'CHURCH', 'BUILDING', 'A', 'HOUSE', 'FOR', 'RELIGIOUS', 'ASSEMBLY'] +4077-13751-0010-1084: hyp=['TO', 'THE', 'FERVENT', 'LATTER', 'DAY', 'SAINT', 'A', 'TEMPLE', 'IS', 'NOT', 'SIMPLY', 'A', 'CHURCH', 'BUILDING', 'A', 'HOUSE', 'FOR', 'A', 'RELIGIOUS', 'ASSEMBLY'] +4077-13751-0011-1085: ref=['SOON', 'THOUSANDS', 'OF', 'CONVERTS', 'HAD', 'RENTED', 'OR', 'PURCHASED', 'HOMES', 'IN', 'MISSOURI', 'INDEPENDENCE', 'JACKSON', 'COUNTY', 'BEING', 'THEIR', 'CENTER', 'BUT', 'FROM', 'THE', 'FIRST', 'THEY', 'WERE', 'UNPOPULAR', 'AMONG', 'THE', 'MISSOURIANS'] +4077-13751-0011-1085: hyp=['SOON', 'THOUSANDS', 'OF', 'CONVERTS', 'HAD', 'RENTED', 'OR', 'PURCHASED', 'HOMES', 'IN', 'MISSOURI', 'INDEPENDENCE', 'JACKSON', 'COUNTY', 'BEING', 'THEIR', 'CENTRE', 'BUT', 'FROM', 'THE', 'FIRST', 'THEY', 'WERE', 'UNPOPULAR', 'AMONG', 'THE', 'MISSOURIIANS'] +4077-13751-0012-1086: ref=['THE', 'LIEUTENANT', 'GOVERNOR', 'LILBURN', 'W', 'BOGGS', 'AFTERWARD', 'GOVERNOR', 'WAS', 'A', 'PRONOUNCED', 'MORMON', 'HATER', 'AND', 'THROUGHOUT', 'THE', 'PERIOD', 'OF', 'THE', 'TROUBLES', 'HE', 'MANIFESTED', 'SYMPATHY', 'WITH', 'THE', 'PERSECUTORS'] +4077-13751-0012-1086: hyp=['THE', 'LIEUTENANT', 'GOVERNOR', 'LILBURN', 'W', 'BOGGS', 'AFTERWARD', 'GOVERNOR', 'WAS', 'A', 'PRONOUNCED', 'MORMON', 'HATER', 'AND', 'THROUGHOUT', 'THE', 'PERIOD', 'OF', 'THE', 'TROUBLES', 'HE', 'MANIFESTS', 'HIS', 'SYMPATHY', 'WITH', 'THE', 'PERSECUTORS'] +4077-13751-0013-1087: ref=['THEIR', 'SUFFERINGS', 'HAVE', 'NEVER', 'YET', 'BEEN', 'FITLY', 'CHRONICLED', 'BY', 'HUMAN', 'SCRIBE'] +4077-13751-0013-1087: hyp=['THEIR', 'SUFFERINGS', 'HAVE', 'NEVER', 'YET', 'BEEN', 'FITLY', 'CHRONICLED', 'BY', 'HUMAN', 'SCRIBE'] +4077-13751-0014-1088: ref=['MAKING', 'THEIR', 'WAY', 'ACROSS', 'THE', 'RIVER', 'MOST', 'OF', 'THE', 'REFUGEES', 'FOUND', 'SHELTER', 'AMONG', 'THE', 'MORE', 'HOSPITABLE', 'PEOPLE', 'OF', 'CLAY', 'COUNTY', 'AND', 'AFTERWARD', 'ESTABLISHED', 'THEMSELVES', 'IN', 'CALDWELL', 'COUNTY', 'THEREIN', 'FOUNDING', 'THE', 'CITY', 'OF', 'FAR', 'WEST'] +4077-13751-0014-1088: hyp=['MAKING', 'THEIR', 'WAY', 'ACROSS', 'THE', 'RIVER', 'MOST', 'OF', 'THE', 'REFUGEES', 'FOUND', 'SHELTER', 'AMONG', 'THE', 'MORE', 'HOSPITABLE', 'PEOPLE', 'OF', 'CLAY', 'COUNTY', 'AND', 'AFTERWARD', 'ESTABLISHED', 'THEMSELVES', 'IN', 'CALDWELL', 'COUNTY', 'THEREIN', 'FOUNDING', 'THE', 'CITY', 'A', 'FAR', 'WEST'] +4077-13751-0015-1089: ref=['A', 'SMALL', 'SETTLEMENT', 'HAD', 'BEEN', 'FOUNDED', 'BY', 'MORMON', 'FAMILIES', 'ON', 'SHOAL', 'CREEK', 'AND', 'HERE', 'ON', 'THE', 'THIRTIETH', 'OF', 'OCTOBER', 'EIGHTEEN', 'THIRTY', 'EIGHT', 'A', 'COMPANY', 'OF', 'TWO', 'HUNDRED', 'AND', 'FORTY', 'FELL', 'UPON', 'THE', 'HAPLESS', 'SETTLERS', 'AND', 'BUTCHERED', 'A', 'SCORE'] +4077-13751-0015-1089: hyp=['A', 'SMALL', 'SETTLEMENT', 'HAD', 'BEEN', 'FOUNDED', 'BY', 'MORMON', 'FAMILIES', 'ON', 'SHOAL', 'CREEK', 'AND', 'HERE', 'ON', 'THE', 'THIRTIETH', 'OF', 'OCTOBER', 'EIGHTEEN', 'THIRTY', 'EIGHT', 'A', 'COMPANY', 'OF', 'TWO', 'HUNDRED', 'AND', 'FORTY', 'FELL', 'UPON', 'THE', 'HAPLESS', 'SETTLERS', 'AND', 'BUTCHERED', 'A', 'SCORE'] +4077-13751-0016-1090: ref=['BE', 'IT', 'SAID', 'TO', 'THE', 'HONOR', 'OF', 'SOME', 'OF', 'THE', 'OFFICERS', 'ENTRUSTED', 'WITH', 'THE', 'TERRIBLE', 'COMMISSION', 'THAT', 'WHEN', 'THEY', 'LEARNED', 'ITS', 'TRUE', 'SIGNIFICANCE', 'THEY', 'RESIGNED', 'THEIR', 'AUTHORITY', 'RATHER', 'THAN', 'HAVE', 'ANYTHING', 'TO', 'DO', 'WITH', 'WHAT', 'THEY', 'DESIGNATED', 'A', 'COLD', 'BLOODED', 'BUTCHERY'] +4077-13751-0016-1090: hyp=['BE', 'IT', 'SAID', 'TO', 'THE', 'HONOR', 'OF', 'SOME', 'OF', 'THE', 'OFFICERS', 'INTRUSTED', 'WITH', 'THE', 'TERRIBLE', 'COMMISSION', 'THAT', 'WHEN', 'THEY', 'LEARNED', 'ITS', 'TRUE', 'SIGNIFICANCE', 'THEY', 'RESIGNED', 'THEIR', 'AUTHORITY', 'RATHER', 'THAN', 'HAVE', 'ANYTHING', 'TO', 'DO', 'WITH', 'WHAT', 'THEY', 'DESIGNATED', 'A', 'COLD', 'BLOODED', 'BUTCHERY'] +4077-13751-0017-1091: ref=['OH', 'WHAT', 'A', 'RECORD', 'TO', 'READ', 'WHAT', 'A', 'PICTURE', 'TO', 'GAZE', 'UPON', 'HOW', 'AWFUL', 'THE', 'FACT'] +4077-13751-0017-1091: hyp=['OH', 'WHAT', 'A', 'RECORD', 'TO', 'READ', 'WHAT', 'A', 'PICTURE', 'TO', 'GAZE', 'UPON', 'HOW', 'AWFUL', 'THE', 'FACT'] +4077-13751-0018-1092: ref=['AMERICAN', 'SCHOOL', 'BOYS', 'READ', 'WITH', 'EMOTIONS', 'OF', 'HORROR', 'OF', 'THE', 'ALBIGENSES', 'DRIVEN', 'BEATEN', 'AND', 'KILLED', 'WITH', 'A', 'PAPAL', 'LEGATE', 'DIRECTING', 'THE', 'BUTCHERY', 'AND', 'OF', 'THE', 'VAUDOIS', 'HUNTED', 'AND', 'HOUNDED', 'LIKE', 'BEASTS', 'AS', 'THE', 'EFFECT', 'OF', 'A', 'ROYAL', 'DECREE', 'AND', 'THEY', 'YET', 'SHALL', 'READ', 'IN', 'THE', 'HISTORY', 'OF', 'THEIR', 'OWN', 'COUNTRY', 'OF', 'SCENES', 'AS', 'TERRIBLE', 'AS', 'THESE', 'IN', 'THE', 'EXHIBITION', 'OF', 'INJUSTICE', 'AND', 'INHUMAN', 'HATE'] +4077-13751-0018-1092: hyp=['AMERICAN', 'SCHOOLBOYS', 'READ', 'WITH', 'EMOTIONS', 'OF', 'HORROR', 'OF', 'THE', 'ALBIGINZAS', 'DRIVEN', 'BEATEN', 'AND', 'KILLED', 'WITH', 'A', 'PAPEL', 'LEGATE', 'DIRECTING', 'THE', 'BUTCHERY', 'AND', 'OF', 'THE', 'VAUDOIS', 'HUNTED', 'AND', 'HOUNDED', 'LIKE', 'BEASTS', 'AS', 'THE', 'EFFECT', 'OF', 'A', 'ROYAL', 'DECREE', 'AND', 'THEY', 'YET', 'SHALL', 'READ', 'IN', 'THE', 'HISTORY', 'OF', 'THEIR', 'OWN', 'COUNTRY', 'OF', 'SCENES', 'AS', 'TERRIBLE', 'AS', 'THESE', 'IN', 'THE', 'EXHIBITION', 'OF', 'INJUSTICE', 'AND', 'INHUMAN', 'HATE'] +4077-13751-0019-1093: ref=['WHO', 'BEGAN', 'THE', 'QUARREL', 'WAS', 'IT', 'THE', 'MORMONS'] +4077-13751-0019-1093: hyp=['WHO', 'BEGAN', 'THE', 'QUARREL', 'WAS', 'IT', 'THE', 'MORMONS'] +4077-13751-0020-1094: ref=['AS', 'A', 'SAMPLE', 'OF', 'THE', 'PRESS', 'COMMENTS', 'AGAINST', 'THE', 'BRUTALITY', 'OF', 'THE', 'MISSOURIANS', 'I', 'QUOTE', 'A', 'PARAGRAPH', 'FROM', 'THE', 'QUINCY', 'ARGUS', 'MARCH', 'SIXTEENTH', 'EIGHTEEN', 'THIRTY', 'NINE'] +4077-13751-0020-1094: hyp=['AS', 'A', 'SAMPLE', 'OF', 'THE', 'PRESS', 'COMMENTS', 'AGAINST', 'THE', 'BRUTALITY', 'OF', 'THE', 'MISSOURIANS', 'I', 'QUOTE', 'A', 'PARAGRAPH', 'FROM', 'THE', 'QUINCY', 'ARGUS', 'MARCH', 'SIXTEENTH', 'EIGHTEEN', 'THIRTY', 'NINE'] +4077-13751-0021-1095: ref=['IT', 'WILL', 'BE', 'OBSERVED', 'THAT', 'AN', 'ORGANIZED', 'MOB', 'AIDED', 'BY', 'MANY', 'OF', 'THE', 'CIVIL', 'AND', 'MILITARY', 'OFFICERS', 'OF', 'MISSOURI', 'WITH', 'GOVERNOR', 'BOGGS', 'AT', 'THEIR', 'HEAD', 'HAVE', 'BEEN', 'THE', 'PROMINENT', 'ACTORS', 'IN', 'THIS', 'BUSINESS', 'INCITED', 'TOO', 'IT', 'APPEARS', 'AGAINST', 'THE', 'MORMONS', 'BY', 'POLITICAL', 'HATRED', 'AND', 'BY', 'THE', 'ADDITIONAL', 'MOTIVES', 'OF', 'PLUNDER', 'AND', 'REVENGE'] +4077-13751-0021-1095: hyp=['IT', 'WILL', 'BE', 'OBSERVED', 'THAT', 'AN', 'ORGANIZED', 'MOB', 'AIDED', 'BY', 'MANY', 'OF', 'THE', 'CIVIL', 'AND', 'MILITARY', 'OFFICERS', 'OF', 'MISSOURI', 'WITH', 'GOVERNOR', 'BOGGS', 'AT', 'THEIR', 'HEAD', 'HAVE', 'BEEN', 'THE', 'PROMINENT', 'ACTORS', 'IN', 'THIS', 'BUSINESS', 'INCITED', 'TOO', 'IT', 'APPEARS', 'AGAINST', 'THE', 'MORMONS', 'BY', 'POLITICAL', 'HATRED', 'AND', 'BY', 'THE', 'ADDITIONAL', 'MOTIVES', 'OF', 'PLUNDER', 'AND', 'REVENGE'] +4077-13754-0000-1096: ref=['THE', 'ARMY', 'FOUND', 'THE', 'PEOPLE', 'IN', 'POVERTY', 'AND', 'LEFT', 'THEM', 'IN', 'COMPARATIVE', 'WEALTH'] +4077-13754-0000-1096: hyp=['THE', 'ARMY', 'FOUND', 'THE', 'PEOPLE', 'IN', 'POVERTY', 'AND', 'LEFT', 'THEM', 'IN', 'COMPARATIVE', 'WEALTH'] +4077-13754-0001-1097: ref=['BUT', 'A', 'WORD', 'FURTHER', 'CONCERNING', 'THE', 'EXPEDITION', 'IN', 'GENERAL'] +4077-13754-0001-1097: hyp=['BUT', 'A', 'WORD', 'FURTHER', 'CONCERNING', 'THE', 'EXPEDITION', 'IN', 'GENERAL'] +4077-13754-0002-1098: ref=['IT', 'WAS', 'THROUGH', "FLOYD'S", 'ADVICE', 'THAT', 'BUCHANAN', 'ORDERED', 'THE', 'MILITARY', 'EXPEDITION', 'TO', 'UTAH', 'OSTENSIBLY', 'TO', 'INSTALL', 'CERTAIN', 'FEDERAL', 'OFFICIALS', 'AND', 'TO', 'REPRESS', 'AN', 'ALLEGED', 'INFANTILE', 'REBELLION', 'WHICH', 'IN', 'FACT', 'HAD', 'NEVER', 'COME', 'INTO', 'EXISTENCE', 'BUT', 'IN', 'REALITY', 'TO', 'FURTHER', 'THE', 'INTERESTS', 'OF', 'THE', 'SECESSIONISTS'] +4077-13754-0002-1098: hyp=['IT', 'WAS', 'THROUGH', "FLOYD'S", 'ADVICE', 'THE', 'BUCHANAN', 'ORDERED', 'THE', 'MILITARY', 'EXPEDITION', 'TO', 'UTAH', 'OSTENSIBLY', 'TO', 'INSTALL', 'CERTAIN', 'FEDERAL', 'OFFICIALS', 'AND', 'TO', 'REPRESS', 'AN', 'ALLEGED', 'INFANTILE', 'REBELLION', 'WHICH', 'IN', 'FACT', 'HAD', 'NEVER', 'COME', 'INTO', 'EXISTENCE', 'BUT', 'IN', 'REALITY', 'TO', 'FURTHER', 'THE', 'INTERESTS', 'OF', 'THE', 'SECESSIONISTS'] +4077-13754-0003-1099: ref=['MOREOVER', 'HAD', 'THE', 'PEOPLE', 'BEEN', 'INCLINED', 'TO', 'REBELLION', 'WHAT', 'GREATER', 'OPPORTUNITY', 'COULD', 'THEY', 'HAVE', 'WISHED'] +4077-13754-0003-1099: hyp=['MOREOVER', 'HAD', 'THE', 'PEOPLE', 'BEEN', 'INCLINED', 'TO', 'REBELLION', 'WHAT', 'GREATER', 'OPPORTUNITY', 'COULD', 'THEY', 'HAVE', 'WISHED'] +4077-13754-0004-1100: ref=['ALREADY', 'A', 'NORTH', 'AND', 'A', 'SOUTH', 'WERE', 'TALKED', 'OF', 'WHY', 'NOT', 'SET', 'UP', 'ALSO', 'A', 'WEST'] +4077-13754-0004-1100: hyp=['ALREADY', 'A', 'NORTH', 'AND', 'A', 'SOUTH', 'WERE', 'TALKED', 'OF', 'WHY', 'NOT', 'SET', 'UP', 'ALSO', 'A', 'WEST'] +4077-13754-0005-1101: ref=['THEY', 'KNEW', 'NO', 'NORTH', 'NO', 'SOUTH', 'NO', 'EAST', 'NO', 'WEST', 'THEY', 'STOOD', 'POSITIVELY', 'BY', 'THE', 'CONSTITUTION', 'AND', 'WOULD', 'HAVE', 'NOTHING', 'TO', 'DO', 'IN', 'THE', 'BLOODY', 'STRIFE', 'BETWEEN', 'BROTHERS', 'UNLESS', 'INDEED', 'THEY', 'WERE', 'SUMMONED', 'BY', 'THE', 'AUTHORITY', 'TO', 'WHICH', 'THEY', 'HAD', 'ALREADY', 'ONCE', 'LOYALLY', 'RESPONDED', 'TO', 'FURNISH', 'MEN', 'AND', 'ARMS', 'FOR', 'THEIR', "COUNTRY'S", 'NEED'] +4077-13754-0005-1101: hyp=['THEY', 'KNEW', 'NO', 'NORTH', 'NO', 'SOUTH', 'NO', 'EAST', 'NO', 'WEST', 'THEY', 'STOOD', 'POSITIVELY', 'BY', 'THE', 'CONSTITUTION', 'AND', 'WOULD', 'HAVE', 'NOTHING', 'TO', 'DO', 'IN', 'THE', 'BLOODY', 'STRIFE', 'BETWEEN', 'BROTHERS', 'UNLESS', 'INDEED', 'THEY', 'WERE', 'SUMMONED', 'BY', 'THE', 'AUTHORITY', 'TO', 'WHICH', 'THEY', 'HAD', 'ALREADY', 'ONCE', 'LOYALLY', 'RESPONDED', 'TO', 'FURNISH', 'MEN', 'AND', 'ARMS', 'FOR', 'THEIR', "COUNTRY'S", 'NEED'] +4077-13754-0006-1102: ref=['WHAT', 'THE', 'LATTER', 'DAY', 'SAINTS', 'CALL', 'CELESTIAL', 'MARRIAGE', 'IS', 'CHARACTERISTIC', 'OF', 'THE', 'CHURCH', 'AND', 'IS', 'IN', 'VERY', 'GENERAL', 'PRACTISE', 'BUT', 'OF', 'CELESTIAL', 'MARRIAGE', 'PLURALITY', 'OF', 'WIVES', 'WAS', 'AN', 'INCIDENT', 'NEVER', 'AN', 'ESSENTIAL'] +4077-13754-0006-1102: hyp=['WHAT', 'THE', 'LATTER', 'DAY', 'SAINTS', 'CALL', 'CELESTIAL', 'MARRIAGE', 'IS', 'CHARACTERISTIC', 'OF', 'THE', 'CHURCH', 'AND', 'IS', 'IN', 'VERY', 'GENERAL', 'PRACTICE', 'BUT', 'OF', 'CELESTIAL', 'MARRIAGE', 'PLURALITY', 'OF', 'WIVES', 'WAS', 'AN', 'INCIDENT', 'NEVER', 'AN', 'ESSENTIAL'] +4077-13754-0007-1103: ref=['WE', 'BELIEVE', 'IN', 'A', 'LITERAL', 'RESURRECTION', 'AND', 'AN', 'ACTUAL', 'HEREAFTER', 'IN', 'WHICH', 'FUTURE', 'STATE', 'SHALL', 'BE', 'RECOGNIZED', 'EVERY', 'SANCTIFIED', 'AND', 'AUTHORIZED', 'RELATIONSHIP', 'EXISTING', 'HERE', 'ON', 'EARTH', 'OF', 'PARENT', 'AND', 'CHILD', 'BROTHER', 'AND', 'SISTER', 'HUSBAND', 'AND', 'WIFE'] +4077-13754-0007-1103: hyp=['WE', 'BELIEVE', 'IN', 'A', 'LITERAL', 'RESURRECTION', 'AND', 'AN', 'ACTUAL', 'HEREAFTER', 'IN', 'WHICH', 'FUTURE', 'STATE', 'SHALL', 'BE', 'RECOGNIZED', 'EVERY', 'SANCTIFIED', 'AND', 'AUTHORIZED', 'RELATIONSHIP', 'EXISTING', 'HERE', 'ON', 'EARTH', 'OF', 'PARENT', 'AND', 'CHILD', 'BROTHER', 'AND', 'SISTER', 'HUSBAND', 'AND', 'WIFE'] +4077-13754-0008-1104: ref=['IT', 'HAS', 'BEEN', 'MY', 'PRIVILEGE', 'TO', 'TREAD', 'THE', 'SOIL', 'OF', 'MANY', 'LANDS', 'TO', 'OBSERVE', 'THE', 'CUSTOMS', 'AND', 'STUDY', 'THE', 'HABITS', 'OF', 'MORE', 'NATIONS', 'THAN', 'ONE', 'AND', 'I', 'HAVE', 'YET', 'TO', 'FIND', 'THE', 'PLACE', 'AND', 'MEET', 'THE', 'PEOPLE', 'WHERE', 'AND', 'WITH', 'WHOM', 'THE', 'PURITY', 'OF', 'MAN', 'AND', 'WOMAN', 'IS', 'HELD', 'MORE', 'PRECIOUS', 'THAN', 'AMONG', 'THE', 'MALIGNED', 'MORMONS', 'IN', 'THE', 'MOUNTAIN', 'VALLEYS', 'OF', 'THE', 'WEST'] +4077-13754-0008-1104: hyp=['IT', 'HAS', 'BEEN', 'MY', 'PRIVILEGE', 'TO', 'TREAD', 'THE', 'SOIL', 'OF', 'MANY', 'LANDS', 'TO', 'OBSERVE', 'THE', 'CUSTOMS', 'AND', 'STUDY', 'THE', 'HABITS', 'OF', 'MORE', 'NATIONS', 'THAN', 'ONE', 'AND', 'I', 'HAVE', 'YET', 'TO', 'FIND', 'THE', 'PLACE', 'AND', 'MEET', 'THE', 'PEOPLE', 'WHERE', 'AND', 'WITH', 'WHOM', 'THE', 'PURITY', 'OF', 'MAN', 'AND', 'WOMAN', 'IS', 'HELD', 'MORE', 'PRECIOUS', 'THAN', 'AMONG', 'THE', 'MALIGNED', 'MORMONS', 'IN', 'THE', 'MOUNTAIN', 'VALLEYS', 'OF', 'THE', 'WEST'] +4077-13754-0009-1105: ref=['AT', 'THE', 'INCEPTION', 'OF', 'PLURAL', 'MARRIAGE', 'AMONG', 'THE', 'LATTER', 'DAY', 'SAINTS', 'THERE', 'WAS', 'NO', 'LAW', 'NATIONAL', 'OR', 'STATE', 'AGAINST', 'ITS', 'PRACTISE'] +4077-13754-0009-1105: hyp=['AT', 'THE', 'INCEPTION', 'OF', 'BORAL', 'MARRIAGE', 'AMONG', 'THE', 'LATTER', 'DAY', 'SAINTS', 'THERE', 'WAS', 'NO', 'LAW', 'NATIONAL', 'OR', 'STATE', 'AGAINST', 'ITS', 'PRACTICE'] +4077-13754-0010-1106: ref=['IN', 'EIGHTEEN', 'SIXTY', 'TWO', 'A', 'LAW', 'WAS', 'ENACTED', 'WITH', 'THE', 'PURPOSE', 'OF', 'SUPPRESSING', 'PLURAL', 'MARRIAGE', 'AND', 'AS', 'HAD', 'BEEN', 'PREDICTED', 'IN', 'THE', 'NATIONAL', 'SENATE', 'PRIOR', 'TO', 'ITS', 'PASSAGE', 'IT', 'LAY', 'FOR', 'MANY', 'YEARS', 'A', 'DEAD', 'LETTER'] +4077-13754-0010-1106: hyp=['IN', 'EIGHTEEN', 'SIXTY', 'TWO', 'A', 'LAW', 'WAS', 'ENACTED', 'WITH', 'THE', 'PURPOSE', 'OF', 'SUPPRESSING', 'PLURAL', 'MARRIAGE', 'AND', 'AS', 'HAD', 'BEEN', 'PREDICTED', 'IN', 'THE', 'NATIONAL', 'SENATE', 'PRIOR', 'TO', 'ITS', 'PASSAGE', 'IT', 'LAY', 'FOR', 'MANY', 'YEARS', 'A', 'DEAD', 'LETTER'] +4077-13754-0011-1107: ref=['FEDERAL', 'JUDGES', 'AND', 'UNITED', 'STATES', 'ATTORNEYS', 'IN', 'UTAH', 'WHO', 'WERE', 'NOT', 'MORMONS', 'NOR', 'LOVERS', 'OF', 'MORMONISM', 'REFUSED', 'TO', 'ENTERTAIN', 'COMPLAINTS', 'OR', 'PROSECUTE', 'CASES', 'UNDER', 'THE', 'LAW', 'BECAUSE', 'OF', 'ITS', 'MANIFEST', 'INJUSTICE', 'AND', 'INADEQUACY'] +4077-13754-0011-1107: hyp=['FEDERAL', 'JUDGES', 'AND', 'UNITED', 'STATES', 'ATTORNEYS', 'IN', 'UTAH', 'WHO', 'WERE', 'NOT', 'MORMONS', 'NOR', 'LOVERS', 'OF', 'MORMONISM', 'REFUSED', 'TO', 'ENTERTAIN', 'COMPLAINTS', 'OR', 'PROSECUTE', 'CASES', 'UNDER', 'THE', 'LAW', 'BECAUSE', 'OF', 'ITS', 'MANIFEST', 'INJUSTICE', 'AND', 'INADEQUACY'] +4077-13754-0012-1108: ref=['THIS', 'MEANT', 'THAT', 'FOR', 'AN', 'ALLEGED', 'MISDEMEANOR', 'FOR', 'WHICH', 'CONGRESS', 'PRESCRIBED', 'A', 'MAXIMUM', 'PENALTY', 'OF', 'SIX', 'MONTHS', 'IMPRISONMENT', 'AND', 'A', 'FINE', 'OF', 'THREE', 'HUNDRED', 'DOLLARS', 'A', 'MAN', 'MIGHT', 'BE', 'IMPRISONED', 'FOR', 'LIFE', 'AYE', 'FOR', 'MANY', 'TERMS', 'OF', 'A', "MAN'S", 'NATURAL', 'LIFE', 'DID', 'THE', "COURT'S", 'POWER', 'TO', 'ENFORCE', 'ITS', 'SENTENCES', 'EXTEND', 'SO', 'FAR', 'AND', 'MIGHT', 'BE', 'FINED', 'MILLIONS', 'OF', 'DOLLARS'] +4077-13754-0012-1108: hyp=['THIS', 'MEANT', 'THAT', 'FOR', 'AN', 'ALLEGED', 'MISDEMEANOR', 'FOR', 'WHICH', 'CONGRESS', 'PRESCRIBED', 'A', 'MAXIMUM', 'PENALTY', 'OF', 'SIX', 'MONTHS', 'IMPRISONMENT', 'AND', 'A', 'FINE', 'OF', 'THREE', 'HUNDRED', 'DOLLARS', 'A', 'MAN', 'MIGHT', 'BE', 'IMPRISONED', 'FOR', 'LIFE', 'AY', 'FOR', 'MANY', 'TERMS', 'OF', 'A', "MAN'S", 'NATURAL', 'LIFE', 'DID', 'THE', "COURT'S", 'POWER', 'TO', 'ENFORCE', 'ITS', 'SENTENCES', 'EXTEND', 'SO', 'FAR', 'AND', 'MIGHT', 'BE', 'FINED', 'MILLIONS', 'OF', 'DOLLARS'] +4077-13754-0013-1109: ref=['BEFORE', 'THIS', 'TRAVESTY', 'ON', 'THE', 'ADMINISTRATION', 'OF', 'LAW', 'COULD', 'BE', 'BROUGHT', 'BEFORE', 'THE', 'COURT', 'OF', 'LAST', 'RESORT', 'AND', 'THERE', 'MEET', 'WITH', 'THE', 'REVERSAL', 'AND', 'REBUKE', 'IT', 'DESERVED', 'MEN', 'WERE', 'IMPRISONED', 'UNDER', 'SENTENCES', 'OF', 'MANY', 'YEARS', 'DURATION'] +4077-13754-0013-1109: hyp=['BEFORE', 'THIS', 'TRAVESTY', 'ON', 'THE', 'ADMINISTRATION', 'OF', 'LAW', 'COULD', 'BE', 'BROUGHT', 'BEFORE', 'THE', 'COURT', 'OF', 'LAST', 'RESORT', 'AND', 'THERE', 'MET', 'WITH', 'THE', 'REVERSAL', 'AND', 'REBUKE', 'IT', 'DESERVED', 'MEN', 'WERE', 'IMPRISONED', 'UNDER', 'SENTENCE', 'OF', 'MANY', 'YEARS', 'DURATION'] +4077-13754-0014-1110: ref=['THE', 'PEOPLE', 'CONTESTED', 'THESE', 'MEASURES', 'ONE', 'BY', 'ONE', 'IN', 'THE', 'COURTS', 'PRESENTING', 'IN', 'CASE', 'AFTER', 'CASE', 'THE', 'DIFFERENT', 'PHASES', 'OF', 'THE', 'SUBJECT', 'AND', 'URGING', 'THE', 'UNCONSTITUTIONALITY', 'OF', 'THE', 'MEASURE'] +4077-13754-0014-1110: hyp=['THE', 'PEOPLE', 'CONTESTED', 'THESE', 'MEASURES', 'ONE', 'BY', 'ONE', 'IN', 'THE', 'COURTS', 'PRESENTING', 'IN', 'CASE', 'AFTER', 'CASE', 'THE', 'DIFFERENT', 'PHASES', 'OF', 'THE', 'SUBJECT', 'AND', 'URGING', 'THE', 'UNCONSTITUTIONALITY', 'OF', 'THE', 'MEASURE'] +4077-13754-0015-1111: ref=['THEN', 'THE', 'CHURCH', 'WAS', 'DISINCORPORATED', 'AND', 'ITS', 'PROPERTY', 'BOTH', 'REAL', 'AND', 'PERSONAL', 'CONFISCATED', 'AND', 'ESCHEATED', 'TO', 'THE', 'GOVERNMENT', 'OF', 'THE', 'UNITED', 'STATES', 'AND', 'ALTHOUGH', 'THE', 'PERSONAL', 'PROPERTY', 'WAS', 'SOON', 'RESTORED', 'REAL', 'ESTATE', 'OF', 'GREAT', 'VALUE', 'LONG', 'LAY', 'IN', 'THE', 'HANDS', 'OF', 'THE', "COURT'S", 'RECEIVER', 'AND', 'THE', 'MORMON', 'CHURCH', 'HAD', 'TO', 'PAY', 'THE', 'NATIONAL', 'GOVERNMENT', 'HIGH', 'RENTAL', 'ON', 'ITS', 'OWN', 'PROPERTY'] +4077-13754-0015-1111: hyp=['THEN', 'THE', 'CHURCH', 'WAS', 'DISINCORPORATED', 'AND', 'ITS', 'PROPERTY', 'BOTH', 'REAL', 'AND', 'PERSONAL', 'CONFISCATED', 'AND', 'INITIATED', 'TO', 'THE', 'GOVERNMENT', 'OF', 'THE', 'UNITED', 'STATES', 'AND', 'ALTHOUGH', 'THE', 'PERSONAL', 'PROPERTY', 'WAS', 'SOON', 'RESTORED', 'REAL', 'ESTATE', 'OF', 'GREAT', 'VALUE', 'LONG', 'LAY', 'IN', 'THE', 'HANDS', 'OF', 'THE', "COURT'S", 'RECEIVER', 'AND', 'THE', 'MORMON', 'CHURCH', 'HAD', 'TO', 'PAY', 'THE', 'NATIONAL', 'GOVERNMENT', 'HIGH', 'RENTAL', 'ON', 'ITS', 'OWN', 'PROPERTY'] +4077-13754-0016-1112: ref=['AND', 'SO', 'THE', 'STORY', 'OF', 'MORMONISM', 'RUNS', 'ON', 'ITS', 'FINALE', 'HAS', 'NOT', 'YET', 'BEEN', 'WRITTEN', 'THE', 'CURRENT', 'PRESS', 'PRESENTS', 'CONTINUOUSLY', 'NEW', 'STAGES', 'OF', 'ITS', 'PROGRESS', 'NEW', 'DEVELOPMENTS', 'OF', 'ITS', 'PLAN'] +4077-13754-0016-1112: hyp=['AND', 'SO', 'THE', 'STORY', 'OF', 'MORMONISM', 'RUNS', 'ON', 'ITS', 'FINALE', 'HAS', 'NOT', 'YET', 'BEEN', 'WRITTEN', 'THE', 'CURRENT', 'PRESS', 'PRESENTS', 'CONTINUOUSLY', 'NEW', 'STAGES', 'OF', 'ITS', 'PROGRESS', 'NEW', 'DEVELOPMENTS', 'OF', 'ITS', 'PLAN'] +4446-2271-0000-1113: ref=['MAINHALL', 'LIKED', 'ALEXANDER', 'BECAUSE', 'HE', 'WAS', 'AN', 'ENGINEER'] +4446-2271-0000-1113: hyp=['MAYHALL', 'LIKED', 'ALEXANDER', 'BECAUSE', 'HE', 'WAS', 'AN', 'ENGINEER'] +4446-2271-0001-1114: ref=['HE', 'HAD', 'PRECONCEIVED', 'IDEAS', 'ABOUT', 'EVERYTHING', 'AND', 'HIS', 'IDEA', 'ABOUT', 'AMERICANS', 'WAS', 'THAT', 'THEY', 'SHOULD', 'BE', 'ENGINEERS', 'OR', 'MECHANICS'] +4446-2271-0001-1114: hyp=['HE', 'HAD', 'PRECONCEIVED', 'IDEAS', 'ABOUT', 'EVERYTHING', 'AND', 'HIS', 'IDEA', 'ABOUT', 'AMERICANS', 'WAS', 'THAT', 'THEY', 'SHOULD', 'BE', 'ENGINEERS', 'OR', 'MECHANICS'] +4446-2271-0002-1115: ref=["IT'S", 'TREMENDOUSLY', 'WELL', 'PUT', 'ON', 'TOO'] +4446-2271-0002-1115: hyp=["IT'S", 'TREMENDOUSLY', 'WELL', 'PUT', 'ON', 'TOO'] +4446-2271-0003-1116: ref=["IT'S", 'BEEN', 'ON', 'ONLY', 'TWO', 'WEEKS', 'AND', "I'VE", 'BEEN', 'HALF', 'A', 'DOZEN', 'TIMES', 'ALREADY'] +4446-2271-0003-1116: hyp=["IT'S", 'BEEN', 'ON', 'ONLY', 'TWO', 'WEEKS', 'AND', "I'VE", 'BEEN', 'HALF', 'A', 'DOZEN', 'TIMES', 'ALREADY'] +4446-2271-0004-1117: ref=['DO', 'YOU', 'KNOW', 'ALEXANDER', 'MAINHALL', 'LOOKED', 'WITH', 'PERPLEXITY', 'UP', 'INTO', 'THE', 'TOP', 'OF', 'THE', 'HANSOM', 'AND', 'RUBBED', 'HIS', 'PINK', 'CHEEK', 'WITH', 'HIS', 'GLOVED', 'FINGER', 'DO', 'YOU', 'KNOW', 'I', 'SOMETIMES', 'THINK', 'OF', 'TAKING', 'TO', 'CRITICISM', 'SERIOUSLY', 'MYSELF'] +4446-2271-0004-1117: hyp=['DO', 'YOU', 'KNOW', 'ALEXANDER', 'MAYHALL', 'LOOKED', 'WITH', 'PERPLEXITY', 'UP', 'INTO', 'THE', 'TOP', 'OF', 'THE', 'HANSOM', 'AND', 'RUBBED', 'HIS', 'PINK', 'CHEEK', 'WITH', 'HIS', 'GLOVED', 'FINGER', 'DO', 'YOU', 'KNOW', 'I', 'SOMETIMES', 'THINK', 'OF', 'TAKING', 'TO', 'CRITICISM', 'SERIOUSLY', 'MYSELF'] +4446-2271-0005-1118: ref=['SHE', 'SAVES', 'HER', 'HAND', 'TOO', "SHE'S", 'AT', 'HER', 'BEST', 'IN', 'THE', 'SECOND', 'ACT'] +4446-2271-0005-1118: hyp=['SHE', 'SAVES', 'HER', 'HAND', 'TOO', "SHE'S", 'AT', 'HER', 'BEST', 'IN', 'THE', 'SECOND', 'ACT'] +4446-2271-0006-1119: ref=["HE'S", 'BEEN', 'WANTING', 'TO', 'MARRY', 'HILDA', 'THESE', 'THREE', 'YEARS', 'AND', 'MORE'] +4446-2271-0006-1119: hyp=["HE'S", 'BEEN', 'WANTING', 'TO', 'MARRY', 'HILDA', 'THESE', 'THREE', 'YEARS', 'AND', 'MORE'] +4446-2271-0007-1120: ref=['SHE', "DOESN'T", 'TAKE', 'UP', 'WITH', 'ANYBODY', 'YOU', 'KNOW'] +4446-2271-0007-1120: hyp=['SHE', "DOESN'T", 'TAKE', 'UP', 'WITH', 'ANYBODY', 'YOU', 'KNOW'] +4446-2271-0008-1121: ref=['IRENE', 'BURGOYNE', 'ONE', 'OF', 'HER', 'FAMILY', 'TOLD', 'ME', 'IN', 'CONFIDENCE', 'THAT', 'THERE', 'WAS', 'A', 'ROMANCE', 'SOMEWHERE', 'BACK', 'IN', 'THE', 'BEGINNING'] +4446-2271-0008-1121: hyp=['IRENE', 'BURGOIN', 'ONE', 'OF', 'HER', 'FAMILY', 'TOLD', 'ME', 'IN', 'CONFIDENCE', 'THAT', 'THERE', 'WAS', 'A', 'ROMANCE', 'SOMEWHERE', 'BACK', 'IN', 'THE', 'BEGINNING'] +4446-2271-0009-1122: ref=['MAINHALL', 'VOUCHED', 'FOR', 'HER', 'CONSTANCY', 'WITH', 'A', 'LOFTINESS', 'THAT', 'MADE', 'ALEXANDER', 'SMILE', 'EVEN', 'WHILE', 'A', 'KIND', 'OF', 'RAPID', 'EXCITEMENT', 'WAS', 'TINGLING', 'THROUGH', 'HIM'] +4446-2271-0009-1122: hyp=['MAYHALL', 'VOUCHED', 'FOR', 'HER', 'CONSTANCY', 'WITH', 'A', 'LOFTINESS', 'THAT', 'MADE', 'ALEXANDER', 'SMILE', 'EVEN', 'WHILE', 'A', 'KIND', 'OF', 'RAPID', 'EXCITEMENT', 'WAS', 'TINGLING', 'THROUGH', 'HIM'] +4446-2271-0010-1123: ref=["HE'S", 'ANOTHER', "WHO'S", 'AWFULLY', 'KEEN', 'ABOUT', 'HER', 'LET', 'ME', 'INTRODUCE', 'YOU'] +4446-2271-0010-1123: hyp=["HE'S", 'ANOTHER', "WHO'S", 'AWFULLY', 'KEEN', 'ABOUT', 'HER', 'LET', 'ME', 'INTRODUCE', 'YOU'] +4446-2271-0011-1124: ref=['SIR', 'HARRY', 'TOWNE', 'MISTER', 'BARTLEY', 'ALEXANDER', 'THE', 'AMERICAN', 'ENGINEER'] +4446-2271-0011-1124: hyp=['SIR', 'HARRY', 'TOWN', 'MISTER', 'BERTLEY', 'ALEXANDER', 'THE', 'AMERICAN', 'ENGINEER'] +4446-2271-0012-1125: ref=['I', 'SAY', 'SIR', 'HARRY', 'THE', 'LITTLE', "GIRL'S", 'GOING', 'FAMOUSLY', 'TO', 'NIGHT', "ISN'T", 'SHE'] +4446-2271-0012-1125: hyp=['I', 'SAY', 'SIR', 'HARRY', 'THE', 'LITTLE', "GIRL'S", 'GOING', 'FAMOUSLY', 'TO', 'NIGHT', "ISN'T", 'SHE'] +4446-2271-0013-1126: ref=['DO', 'YOU', 'KNOW', 'I', 'THOUGHT', 'THE', 'DANCE', 'A', 'BIT', 'CONSCIOUS', 'TO', 'NIGHT', 'FOR', 'THE', 'FIRST', 'TIME'] +4446-2271-0013-1126: hyp=['YOU', 'KNOW', 'I', 'THOUGHT', 'THE', 'DANCE', 'A', 'BIT', 'CONSCIOUS', 'TONIGHT', 'FOR', 'THE', 'FIRST', 'TIME'] +4446-2271-0014-1127: ref=['WESTMERE', 'AND', 'I', 'WERE', 'BACK', 'AFTER', 'THE', 'FIRST', 'ACT', 'AND', 'WE', 'THOUGHT', 'SHE', 'SEEMED', 'QUITE', 'UNCERTAIN', 'OF', 'HERSELF'] +4446-2271-0014-1127: hyp=['WESTMERE', 'AND', 'I', 'WERE', 'BACK', 'AFTER', 'THE', 'FIRST', 'ACT', 'AND', 'WE', 'THOUGHT', 'SHE', 'SEEMED', 'QUITE', 'UNCERTAIN', 'OF', 'HERSELF'] +4446-2271-0015-1128: ref=['A', 'LITTLE', 'ATTACK', 'OF', 'NERVES', 'POSSIBLY'] +4446-2271-0015-1128: hyp=['A', 'LITTLE', 'ATTACK', 'OF', 'NERVES', 'POSSIBLY'] +4446-2271-0016-1129: ref=['HE', 'WAS', 'BEGINNING', 'TO', 'FEEL', 'A', 'KEEN', 'INTEREST', 'IN', 'THE', 'SLENDER', 'BAREFOOT', 'DONKEY', 'GIRL', 'WHO', 'SLIPPED', 'IN', 'AND', 'OUT', 'OF', 'THE', 'PLAY', 'SINGING', 'LIKE', 'SOME', 'ONE', 'WINDING', 'THROUGH', 'A', 'HILLY', 'FIELD'] +4446-2271-0016-1129: hyp=['HE', 'WAS', 'BEGINNING', 'TO', 'FEEL', 'A', 'KEEN', 'INTEREST', 'IN', 'THE', 'SLENDER', 'BAREFOOT', 'DONKEY', 'GIRL', 'WHO', 'SLIPPED', 'IN', 'AND', 'OUT', 'OF', 'THE', 'PLAY', 'SINGING', 'LIKE', 'SOME', 'ONE', 'WINDING', 'THROUGH', 'A', 'HILLY', 'FIELD'] +4446-2271-0017-1130: ref=['ONE', 'NIGHT', 'WHEN', 'HE', 'AND', 'WINIFRED', 'WERE', 'SITTING', 'TOGETHER', 'ON', 'THE', 'BRIDGE', 'HE', 'TOLD', 'HER', 'THAT', 'THINGS', 'HAD', 'HAPPENED', 'WHILE', 'HE', 'WAS', 'STUDYING', 'ABROAD', 'THAT', 'HE', 'WAS', 'SORRY', 'FOR', 'ONE', 'THING', 'IN', 'PARTICULAR', 'AND', 'HE', 'ASKED', 'HER', 'WHETHER', 'SHE', 'THOUGHT', 'SHE', 'OUGHT', 'TO', 'KNOW', 'ABOUT', 'THEM'] +4446-2271-0017-1130: hyp=['ONE', 'NIGHT', 'WHEN', 'HE', 'AND', 'WINNIFRED', 'WERE', 'SITTING', 'TOGETHER', 'ON', 'THE', 'BRIDGE', 'HE', 'TOLD', 'HER', 'THAT', 'THINGS', 'HAD', 'HAPPENED', 'WHILE', 'HE', 'WAS', 'STUDYING', 'ABROAD', 'THAT', 'HE', 'WAS', 'SORRY', 'FOR', 'ONE', 'THING', 'IN', 'PARTICULAR', 'AND', 'HE', 'ASKED', 'HER', 'WHETHER', 'SHE', 'THOUGHT', 'SHE', 'OUGHT', 'TO', 'KNOW', 'ABOUT', 'THEM'] +4446-2271-0018-1131: ref=['SHE', 'CONSIDERED', 'A', 'MOMENT', 'AND', 'THEN', 'SAID', 'NO', 'I', 'THINK', 'NOT', 'THOUGH', 'I', 'AM', 'GLAD', 'YOU', 'ASK', 'ME'] +4446-2271-0018-1131: hyp=['SHE', 'CONSIDERED', 'FOR', 'A', 'MOMENT', 'AND', 'THEN', 'SAID', 'NO', 'I', 'THINK', 'NOT', 'THOUGH', 'I', 'AM', 'GLAD', 'YOU', 'ASKED', 'ME'] +4446-2271-0019-1132: ref=['AFTER', 'THAT', 'IT', 'WAS', 'EASY', 'TO', 'FORGET', 'ACTUALLY', 'TO', 'FORGET'] +4446-2271-0019-1132: hyp=['AFTER', 'THAT', 'IT', 'WAS', 'EASY', 'TO', 'FORGET', 'ACTUALLY', 'TO', 'FORGET'] +4446-2271-0020-1133: ref=['OF', 'COURSE', 'HE', 'REFLECTED', 'SHE', 'ALWAYS', 'HAD', 'THAT', 'COMBINATION', 'OF', 'SOMETHING', 'HOMELY', 'AND', 'SENSIBLE', 'AND', 'SOMETHING', 'UTTERLY', 'WILD', 'AND', 'DAFT'] +4446-2271-0020-1133: hyp=['OF', 'COURSE', 'HE', 'REFLECTED', 'SHE', 'ALWAYS', 'HAD', 'THAT', 'COMBINATION', 'OF', 'SOMETHING', 'HOMELY', 'AND', 'SENSIBLE', 'AND', 'SOMETHING', 'UTTERLY', 'WILD', 'AND', 'DAFT'] +4446-2271-0021-1134: ref=['SHE', 'MUST', 'CARE', 'ABOUT', 'THE', 'THEATRE', 'A', 'GREAT', 'DEAL', 'MORE', 'THAN', 'SHE', 'USED', 'TO'] +4446-2271-0021-1134: hyp=['SHE', 'MUST', 'CARE', 'ABOUT', 'THE', 'THEATRE', 'A', 'GREAT', 'DEAL', 'MORE', 'THAN', 'SHE', 'USED', 'TO'] +4446-2271-0022-1135: ref=["I'M", 'GLAD', "SHE'S", 'HELD', 'HER', 'OWN', 'SINCE'] +4446-2271-0022-1135: hyp=["I'M", 'GLAD', "SHE'S", 'HELD', 'HER', 'OWN', 'SENSE'] +4446-2271-0023-1136: ref=['AFTER', 'ALL', 'WE', 'WERE', 'AWFULLY', 'YOUNG'] +4446-2271-0023-1136: hyp=['AFTER', 'ALL', 'WE', 'WERE', 'AWFULLY', 'YOUNG'] +4446-2271-0024-1137: ref=['I', "SHOULDN'T", 'WONDER', 'IF', 'SHE', 'COULD', 'LAUGH', 'ABOUT', 'IT', 'WITH', 'ME', 'NOW'] +4446-2271-0024-1137: hyp=['I', "SHOULDN'T", 'WONDER', 'IF', 'SHE', 'COULD', 'LAUGH', 'ABOUT', 'IT', 'WITH', 'ME', 'NOW'] +4446-2273-0000-1138: ref=['HILDA', 'WAS', 'VERY', 'NICE', 'TO', 'HIM', 'AND', 'HE', 'SAT', 'ON', 'THE', 'EDGE', 'OF', 'HIS', 'CHAIR', 'FLUSHED', 'WITH', 'HIS', 'CONVERSATIONAL', 'EFFORTS', 'AND', 'MOVING', 'HIS', 'CHIN', 'ABOUT', 'NERVOUSLY', 'OVER', 'HIS', 'HIGH', 'COLLAR'] +4446-2273-0000-1138: hyp=['HILDA', 'WAS', 'VERY', 'NICE', 'TO', 'HIM', 'AND', 'HE', 'SAT', 'ON', 'THE', 'EDGE', 'OF', 'HIS', 'CHAIR', 'FLUSHED', 'WITH', 'HIS', 'CONVERSATIONAL', 'EFFORTS', 'AND', 'MOVING', 'HIS', 'CHIN', 'ABOUT', 'NERVOUSLY', 'OVER', 'HIS', 'HIGH', 'COLLAR'] +4446-2273-0001-1139: ref=['THEY', 'ASKED', 'HIM', 'TO', 'COME', 'TO', 'SEE', 'THEM', 'IN', 'CHELSEA', 'AND', 'THEY', 'SPOKE', 'VERY', 'TENDERLY', 'OF', 'HILDA'] +4446-2273-0001-1139: hyp=['THEY', 'ASKED', 'HIM', 'TO', 'COME', 'TO', 'SEE', 'THEM', 'IN', 'CHELSEA', 'AND', 'THEY', 'SPOKE', 'VERY', 'TENDERLY', 'OF', 'HILDA'] +4446-2273-0002-1140: ref=['LAMB', "WOULDN'T", 'CARE', 'A', 'GREAT', 'DEAL', 'ABOUT', 'MANY', 'OF', 'THEM', 'I', 'FANCY'] +4446-2273-0002-1140: hyp=['LAMB', "WOULDN'T", 'CARE', 'A', 'GREAT', 'DEAL', 'ABOUT', 'MANY', 'OF', 'THEM', 'I', 'FANCY'] +4446-2273-0003-1141: ref=['WHEN', 'BARTLEY', 'ARRIVED', 'AT', 'BEDFORD', 'SQUARE', 'ON', 'SUNDAY', 'EVENING', 'MARIE', 'THE', 'PRETTY', 'LITTLE', 'FRENCH', 'GIRL', 'MET', 'HIM', 'AT', 'THE', 'DOOR', 'AND', 'CONDUCTED', 'HIM', 'UPSTAIRS'] +4446-2273-0003-1141: hyp=['WHEN', 'BARTLEY', 'ARRIVED', 'AT', 'BEDFORD', 'SQUARE', 'ON', 'SUNDAY', 'EVENING', 'MARIE', 'THE', 'PRETTY', 'LITTLE', 'FRENCH', 'GIRL', 'MET', 'HIM', 'AT', 'THE', 'DOOR', 'AND', 'CONDUCTED', 'HIM', 'UPSTAIRS'] +4446-2273-0004-1142: ref=['I', 'SHOULD', 'NEVER', 'HAVE', 'ASKED', 'YOU', 'IF', 'MOLLY', 'HAD', 'BEEN', 'HERE', 'FOR', 'I', 'REMEMBER', 'YOU', "DON'T", 'LIKE', 'ENGLISH', 'COOKERY'] +4446-2273-0004-1142: hyp=['I', 'SHOULD', 'NEVER', 'HAVE', 'ASKED', 'YOU', 'IF', 'MOLLY', 'HAD', 'BEEN', 'HERE', 'FOR', 'I', 'REMEMBER', 'YOU', "DON'T", 'LIKE', 'ENGLISH', 'COOKERY'] +4446-2273-0005-1143: ref=['I', "HAVEN'T", 'HAD', 'A', 'CHANCE', 'YET', 'TO', 'TELL', 'YOU', 'WHAT', 'A', 'JOLLY', 'LITTLE', 'PLACE', 'I', 'THINK', 'THIS', 'IS'] +4446-2273-0005-1143: hyp=['I', "HAVEN'T", 'HAD', 'A', 'CHANCE', 'YET', 'TO', 'TELL', 'YOU', 'WHAT', 'A', 'JOLLY', 'LITTLE', 'PLACE', 'I', 'THINK', 'THIS', 'IS'] +4446-2273-0006-1144: ref=['THEY', 'ARE', 'ALL', 'SKETCHES', 'MADE', 'ABOUT', 'THE', 'VILLA', "D'ESTE", 'YOU', 'SEE'] +4446-2273-0006-1144: hyp=['THEY', 'ARE', 'ALL', 'SKETCHES', 'MADE', 'ABOUT', 'THE', 'VILIDESTA', 'YOU', 'SEE'] +4446-2273-0007-1145: ref=['THOSE', 'FELLOWS', 'ARE', 'ALL', 'VERY', 'LOYAL', 'EVEN', 'MAINHALL'] +4446-2273-0007-1145: hyp=['THOSE', 'FELLOWS', 'ARE', 'ALL', 'VERY', 'LOYAL', 'EVEN', 'MAINHALL'] +4446-2273-0008-1146: ref=["I'VE", 'MANAGED', 'TO', 'SAVE', 'SOMETHING', 'EVERY', 'YEAR', 'AND', 'THAT', 'WITH', 'HELPING', 'MY', 'THREE', 'SISTERS', 'NOW', 'AND', 'THEN', 'AND', 'TIDING', 'POOR', 'COUSIN', 'MIKE', 'OVER', 'BAD', 'SEASONS'] +4446-2273-0008-1146: hyp=["I'VE", 'MANAGED', 'TO', 'SAVE', 'SOMETHING', 'EVERY', 'YEAR', 'AND', 'THAT', 'WITH', 'HELPING', 'MY', 'THREE', 'SISTERS', 'NOW', 'AND', 'THEN', 'AND', 'TIDING', 'POOR', 'COUSIN', 'MIKE', 'OVER', 'BAD', 'SEASONS'] +4446-2273-0009-1147: ref=["IT'S", 'NOT', 'PARTICULARLY', 'RARE', 'SHE', 'SAID', 'BUT', 'SOME', 'OF', 'IT', 'WAS', 'MY', "MOTHER'S"] +4446-2273-0009-1147: hyp=["IT'S", 'NOT', 'PARTICULARLY', 'RARE', 'SHE', 'SAID', 'BUT', 'SOME', 'OF', 'IT', 'WAS', 'MY', "MOTHER'S"] +4446-2273-0010-1148: ref=['THERE', 'WAS', 'WATERCRESS', 'SOUP', 'AND', 'SOLE', 'AND', 'A', 'DELIGHTFUL', 'OMELETTE', 'STUFFED', 'WITH', 'MUSHROOMS', 'AND', 'TRUFFLES', 'AND', 'TWO', 'SMALL', 'RARE', 'DUCKLINGS', 'AND', 'ARTICHOKES', 'AND', 'A', 'DRY', 'YELLOW', 'RHONE', 'WINE', 'OF', 'WHICH', 'BARTLEY', 'HAD', 'ALWAYS', 'BEEN', 'VERY', 'FOND'] +4446-2273-0010-1148: hyp=['THERE', 'WAS', 'WATERCRESS', 'SOUP', 'AND', 'SOLE', 'AND', 'A', 'DELIGHTFUL', 'OMELET', 'STUFFED', 'WITH', 'MUSHROOMS', 'AND', 'TRUFFLES', 'AND', 'TWO', 'SMALL', 'RARE', 'DUCKLINGS', 'AND', 'ARTICHOKES', 'AND', 'A', 'DRY', 'YELLOW', 'RHONE', 'WINE', 'OF', 'WHICH', 'BARTLEY', 'HAD', 'ALWAYS', 'BEEN', 'VERY', 'FOND'] +4446-2273-0011-1149: ref=['THERE', 'IS', 'NOTHING', 'ELSE', 'THAT', 'LOOKS', 'SO', 'JOLLY'] +4446-2273-0011-1149: hyp=["THERE'S", 'NOTHING', 'ELSE', 'THAT', 'LOOKS', 'SO', 'JOLLY'] +4446-2273-0012-1150: ref=['THANK', 'YOU', 'BUT', 'I', "DON'T", 'LIKE', 'IT', 'SO', 'WELL', 'AS', 'THIS'] +4446-2273-0012-1150: hyp=['THANK', 'YOU', 'BUT', 'I', "DON'T", 'LIKE', 'IT', 'SO', 'WELL', 'AS', 'THIS'] +4446-2273-0013-1151: ref=['HAVE', 'YOU', 'BEEN', 'IN', 'PARIS', 'MUCH', 'THESE', 'LATE', 'YEARS'] +4446-2273-0013-1151: hyp=['HAVE', 'YOU', 'BEEN', 'IN', 'PARIS', 'MUCH', 'THESE', 'LATE', 'YEARS'] +4446-2273-0014-1152: ref=['THERE', 'ARE', 'FEW', 'CHANGES', 'IN', 'THE', 'OLD', 'QUARTER'] +4446-2273-0014-1152: hyp=['THERE', 'ARE', 'A', 'FEW', 'CHANGES', 'IN', 'THE', 'OLD', 'QUARTER'] +4446-2273-0015-1153: ref=["DON'T", 'I', 'THOUGH', "I'M", 'SO', 'SORRY', 'TO', 'HEAR', 'IT', 'HOW', 'DID', 'HER', 'SON', 'TURN', 'OUT'] +4446-2273-0015-1153: hyp=["DON'T", 'I', 'THOUGH', "I'M", 'SO', 'SORRY', 'TO', 'HEAR', 'IT', 'HOW', 'DID', 'HER', 'SON', 'TURN', 'OUT'] +4446-2273-0016-1154: ref=['HER', 'HAIR', 'IS', 'STILL', 'LIKE', 'FLAX', 'AND', 'HER', 'BLUE', 'EYES', 'ARE', 'JUST', 'LIKE', 'A', "BABY'S", 'AND', 'SHE', 'HAS', 'THE', 'SAME', 'THREE', 'FRECKLES', 'ON', 'HER', 'LITTLE', 'NOSE', 'AND', 'TALKS', 'ABOUT', 'GOING', 'BACK', 'TO', 'HER', 'BAINS', 'DE', 'MER'] +4446-2273-0016-1154: hyp=['HER', 'HAIR', 'IS', 'STILL', 'LIKE', 'FLAX', 'AND', 'HER', 'BLUE', 'EYES', 'ARE', 'JUST', 'LIKE', 'A', "BABY'S", 'AND', 'SHE', 'HAS', 'THE', 'SAME', 'THREE', 'FRECKLES', 'ON', 'HER', 'LITTLE', 'NOSE', 'AND', 'TALKS', 'ABOUT', 'GOING', 'BACK', 'TO', 'HER', 'BAND', 'O', 'MER'] +4446-2273-0017-1155: ref=['HOW', 'JOLLY', 'IT', 'WAS', 'BEING', 'YOUNG', 'HILDA'] +4446-2273-0017-1155: hyp=['HOW', 'JOLLY', 'IT', 'WAS', 'BEING', 'YOUNG', 'HILDA'] +4446-2273-0018-1156: ref=['DO', 'YOU', 'REMEMBER', 'THAT', 'FIRST', 'WALK', 'WE', 'TOOK', 'TOGETHER', 'IN', 'PARIS'] +4446-2273-0018-1156: hyp=['DO', 'YOU', 'REMEMBER', 'THAT', 'FIRST', 'WALK', 'WE', 'TOOK', 'TOGETHER', 'IN', 'PARIS'] +4446-2273-0019-1157: ref=['COME', "WE'LL", 'HAVE', 'OUR', 'COFFEE', 'IN', 'THE', 'OTHER', 'ROOM', 'AND', 'YOU', 'CAN', 'SMOKE'] +4446-2273-0019-1157: hyp=['COME', "WE'LL", 'HAVE', 'OUR', 'COFFEE', 'IN', 'THE', 'OTHER', 'ROOM', 'AND', 'YOU', 'CAN', 'SMOKE'] +4446-2273-0020-1158: ref=['I', 'THINK', 'WE', 'DID', 'SHE', 'ANSWERED', 'DEMURELY'] +4446-2273-0020-1158: hyp=['I', 'THINK', 'WE', 'DID', 'SHE', 'ANSWERED', 'DEMURELY'] +4446-2273-0021-1159: ref=['WHAT', 'SHE', 'WANTED', 'FROM', 'US', 'WAS', 'NEITHER', 'OUR', 'FLOWERS', 'NOR', 'OUR', 'FRANCS', 'BUT', 'JUST', 'OUR', 'YOUTH'] +4446-2273-0021-1159: hyp=['WHAT', 'SHE', 'WANTED', 'FROM', 'US', 'WAS', 'NEITHER', 'OUR', 'FLOWERS', 'NOR', 'OUR', 'FRANCS', 'BUT', 'JUST', 'OUR', 'YOUTH'] +4446-2273-0022-1160: ref=['THEY', 'WERE', 'BOTH', 'REMEMBERING', 'WHAT', 'THE', 'WOMAN', 'HAD', 'SAID', 'WHEN', 'SHE', 'TOOK', 'THE', 'MONEY', 'GOD', 'GIVE', 'YOU', 'A', 'HAPPY', 'LOVE'] +4446-2273-0022-1160: hyp=['THEY', 'WERE', 'BOTH', 'REMEMBERING', 'WHAT', 'THE', 'WOMAN', 'HAD', 'SAID', 'WHEN', 'SHE', 'TOOK', 'THE', 'MONEY', 'GOD', 'GIVE', 'YOU', 'A', 'HAPPY', 'LOVE'] +4446-2273-0023-1161: ref=['THE', 'STRANGE', 'WOMAN', 'AND', 'HER', 'PASSIONATE', 'SENTENCE', 'THAT', 'RANG', 'OUT', 'SO', 'SHARPLY', 'HAD', 'FRIGHTENED', 'THEM', 'BOTH'] +4446-2273-0023-1161: hyp=['THE', 'STRANGE', 'WOMAN', 'AND', 'HER', 'PASSIONATE', 'SENTENCE', 'THAT', 'RANG', 'OUT', 'SO', 'SHARPLY', 'HAD', 'FRIGHTENED', 'THEM', 'BOTH'] +4446-2273-0024-1162: ref=['BARTLEY', 'STARTED', 'WHEN', 'HILDA', 'RANG', 'THE', 'LITTLE', 'BELL', 'BESIDE', 'HER', 'DEAR', 'ME', 'WHY', 'DID', 'YOU', 'DO', 'THAT'] +4446-2273-0024-1162: hyp=['PARTLEY', 'STARTED', 'WHEN', 'HILDA', 'RANG', 'THE', 'LITTLE', 'BELL', 'BESIDE', 'HER', 'DEAR', 'ME', 'WHY', 'DID', 'YOU', 'DO', 'THAT'] +4446-2273-0025-1163: ref=['IT', 'WAS', 'VERY', 'JOLLY', 'HE', 'MURMURED', 'LAZILY', 'AS', 'MARIE', 'CAME', 'IN', 'TO', 'TAKE', 'AWAY', 'THE', 'COFFEE'] +4446-2273-0025-1163: hyp=['IT', 'WAS', 'VERY', 'JOLLY', 'HE', 'MURMURED', 'LAZILY', 'AS', 'MARIE', 'CAME', 'IN', 'TO', 'TAKE', 'AWAY', 'THE', 'COFFEE'] +4446-2273-0026-1164: ref=['HAVE', 'I', 'TOLD', 'YOU', 'ABOUT', 'MY', 'NEW', 'PLAY'] +4446-2273-0026-1164: hyp=['HAVE', 'I', 'TOLD', 'YOU', 'ABOUT', 'MY', 'NEW', 'PLAY'] +4446-2273-0027-1165: ref=['WHEN', 'SHE', 'FINISHED', 'ALEXANDER', 'SHOOK', 'HIMSELF', 'OUT', 'OF', 'A', 'REVERIE'] +4446-2273-0027-1165: hyp=['WHEN', 'SHE', 'FINISHED', 'ALEXANDER', 'SHOOK', 'HIMSELF', 'OUT', 'OF', 'A', 'REVERIE'] +4446-2273-0028-1166: ref=['NONSENSE', 'OF', 'COURSE', 'I', "CAN'T", 'REALLY', 'SING', 'EXCEPT', 'THE', 'WAY', 'MY', 'MOTHER', 'AND', 'GRANDMOTHER', 'DID', 'BEFORE', 'ME'] +4446-2273-0028-1166: hyp=['NONSENSE', 'OF', 'COURSE', 'I', "CAN'T", 'REALLY', 'SING', 'EXCEPT', 'THE', 'WAY', 'MY', 'MOTHER', 'AND', 'GRANDMOTHER', 'DID', 'BEFORE', 'ME'] +4446-2273-0029-1167: ref=["IT'S", 'REALLY', 'TOO', 'WARM', 'IN', 'THIS', 'ROOM', 'TO', 'SING', "DON'T", 'YOU', 'FEEL', 'IT'] +4446-2273-0029-1167: hyp=["IT'S", 'REALLY', 'TOO', 'WARM', 'IN', 'THIS', 'ROOM', 'TO', 'SING', "DON'T", 'YOU', 'FEEL', 'IT'] +4446-2273-0030-1168: ref=['ALEXANDER', 'WENT', 'OVER', 'AND', 'OPENED', 'THE', 'WINDOW', 'FOR', 'HER'] +4446-2273-0030-1168: hyp=['ALEXANDER', 'WENT', 'OVER', 'AND', 'OPENED', 'THE', 'WINDOW', 'FOR', 'HER'] +4446-2273-0031-1169: ref=['THERE', 'JUST', 'IN', 'FRONT'] +4446-2273-0031-1169: hyp=['THERE', 'JUST', 'IN', 'FRONT'] +4446-2273-0032-1170: ref=['HE', 'STOOD', 'A', 'LITTLE', 'BEHIND', 'HER', 'AND', 'TRIED', 'TO', 'STEADY', 'HIMSELF', 'AS', 'HE', 'SAID', "IT'S", 'SOFT', 'AND', 'MISTY', 'SEE', 'HOW', 'WHITE', 'THE', 'STARS', 'ARE'] +4446-2273-0032-1170: hyp=['HE', 'STOOD', 'A', 'LITTLE', 'BEHIND', 'HER', 'AND', 'TRIED', 'TO', 'STEADY', 'HIMSELF', 'AS', 'HE', 'SAID', "IT'S", 'SOFT', 'AND', 'MISTY', 'SEE', 'HOW', 'WHITE', 'THE', 'STARS', 'ARE'] +4446-2273-0033-1171: ref=['FOR', 'A', 'LONG', 'TIME', 'NEITHER', 'HILDA', 'NOR', 'BARTLEY', 'SPOKE'] +4446-2273-0033-1171: hyp=['FOR', 'A', 'LONG', 'TIME', 'NEITHER', 'HILDA', 'NOR', 'BARTLEY', 'SPOKE'] +4446-2273-0034-1172: ref=['HE', 'FELT', 'A', 'TREMOR', 'RUN', 'THROUGH', 'THE', 'SLENDER', 'YELLOW', 'FIGURE', 'IN', 'FRONT', 'OF', 'HIM'] +4446-2273-0034-1172: hyp=['HE', 'FELT', 'A', 'TREMOR', 'RUN', 'THROUGH', 'THE', 'SLENDER', 'YELLOW', 'FIGURE', 'IN', 'FRONT', 'OF', 'HIM'] +4446-2273-0035-1173: ref=['BARTLEY', 'LEANED', 'OVER', 'HER', 'SHOULDER', 'WITHOUT', 'TOUCHING', 'HER', 'AND', 'WHISPERED', 'IN', 'HER', 'EAR', 'YOU', 'ARE', 'GIVING', 'ME', 'A', 'CHANCE', 'YES'] +4446-2273-0035-1173: hyp=['BARTLEY', 'LEANED', 'OVER', 'HER', 'SHOULDER', 'WITHOUT', 'TOUCHING', 'HER', 'AND', 'WHISPERED', 'IN', 'HER', 'EAR', 'YOU', 'ARE', 'GIVING', 'ME', 'A', 'CHANCE', 'YES'] +4446-2273-0036-1174: ref=['ALEXANDER', 'UNCLENCHED', 'THE', 'TWO', 'HANDS', 'AT', 'HIS', 'SIDES'] +4446-2273-0036-1174: hyp=['ALEXANDER', 'CLENCHED', 'THE', 'TWO', 'HANDS', 'AT', 'HIS', 'SIDES'] +4446-2275-0000-1175: ref=['THE', 'STOP', 'AT', 'QUEENSTOWN', 'THE', 'TEDIOUS', 'PASSAGE', 'UP', 'THE', 'MERSEY', 'WERE', 'THINGS', 'THAT', 'HE', 'NOTED', 'DIMLY', 'THROUGH', 'HIS', 'GROWING', 'IMPATIENCE'] +4446-2275-0000-1175: hyp=['THE', 'STOP', 'AT', 'QUEENSTOWN', 'THE', 'TEDIOUS', 'PASSAGE', 'UP', 'THE', 'MERCY', 'WERE', 'THINGS', 'THAT', 'HE', 'NOTED', 'DIMLY', 'THROUGH', 'HIS', 'GROWING', 'IMPATIENCE'] +4446-2275-0001-1176: ref=['SHE', 'BLUSHED', 'AND', 'SMILED', 'AND', 'FUMBLED', 'HIS', 'CARD', 'IN', 'HER', 'CONFUSION', 'BEFORE', 'SHE', 'RAN', 'UPSTAIRS'] +4446-2275-0001-1176: hyp=['SHE', 'BLUSHED', 'AND', 'SMILED', 'AND', 'FUMBLED', 'HIS', 'CARD', 'IN', 'HER', 'CONFUSION', 'BEFORE', 'SHE', 'RAN', 'UPSTAIRS'] +4446-2275-0002-1177: ref=['ALEXANDER', 'PACED', 'UP', 'AND', 'DOWN', 'THE', 'HALLWAY', 'BUTTONING', 'AND', 'UNBUTTONING', 'HIS', 'OVERCOAT', 'UNTIL', 'SHE', 'RETURNED', 'AND', 'TOOK', 'HIM', 'UP', 'TO', "HILDA'S", 'LIVING', 'ROOM'] +4446-2275-0002-1177: hyp=['ALEXANDER', 'PACED', 'UP', 'AND', 'DOWN', 'THE', 'HALLWAY', 'BUTTONING', 'AND', 'UNBUTTONING', 'HIS', 'OVERCOAT', 'UNTIL', 'SHE', 'RETURNED', 'AND', 'TOOK', 'HIM', 'UP', 'TO', "HILDA'S", 'LIVING', 'ROOM'] +4446-2275-0003-1178: ref=['THE', 'ROOM', 'WAS', 'EMPTY', 'WHEN', 'HE', 'ENTERED'] +4446-2275-0003-1178: hyp=['THE', 'ROOM', 'WAS', 'EMPTY', 'WHEN', 'HE', 'ENTERED'] +4446-2275-0004-1179: ref=['ALEXANDER', 'DID', 'NOT', 'SIT', 'DOWN'] +4446-2275-0004-1179: hyp=['ALEXANDER', 'DID', 'NOT', 'SIT', 'DOWN'] +4446-2275-0005-1180: ref=['I', 'FELT', 'IT', 'IN', 'MY', 'BONES', 'WHEN', 'I', 'WOKE', 'THIS', 'MORNING', 'THAT', 'SOMETHING', 'SPLENDID', 'WAS', 'GOING', 'TO', 'TURN', 'UP'] +4446-2275-0005-1180: hyp=['I', 'FELT', 'IT', 'IN', 'MY', 'BONES', 'WHEN', 'I', 'WOKE', 'THIS', 'MORNING', 'THAT', 'SOMETHING', 'SPLENDID', 'WAS', 'GOING', 'TO', 'TURN', 'UP'] +4446-2275-0006-1181: ref=['I', 'THOUGHT', 'IT', 'MIGHT', 'BE', 'SISTER', 'KATE', 'OR', 'COUSIN', 'MIKE', 'WOULD', 'BE', 'HAPPENING', 'ALONG'] +4446-2275-0006-1181: hyp=['I', 'THOUGHT', 'IT', 'MIGHT', 'BE', 'SISTER', 'KATE', 'OR', 'COUSIN', 'MIKE', 'WOULD', 'BE', 'HAPPENING', 'ALONG'] +4446-2275-0007-1182: ref=['SHE', 'PUSHED', 'HIM', 'TOWARD', 'THE', 'BIG', 'CHAIR', 'BY', 'THE', 'FIRE', 'AND', 'SAT', 'DOWN', 'ON', 'A', 'STOOL', 'AT', 'THE', 'OPPOSITE', 'SIDE', 'OF', 'THE', 'HEARTH', 'HER', 'KNEES', 'DRAWN', 'UP', 'TO', 'HER', 'CHIN', 'LAUGHING', 'LIKE', 'A', 'HAPPY', 'LITTLE', 'GIRL'] +4446-2275-0007-1182: hyp=['SHE', 'PUSHED', 'HIM', 'TOWARD', 'THE', 'BIG', 'CHAIR', 'BY', 'THE', 'FIRE', 'AND', 'SAT', 'DOWN', 'ON', 'A', 'STOOL', 'AT', 'THE', 'OPPOSITE', 'SIDE', 'OF', 'THE', 'HEARTH', 'HER', 'KNEES', 'DRAWN', 'UP', 'TO', 'HER', 'CHIN', 'LAUGHING', 'LIKE', 'A', 'HAPPY', 'LITTLE', 'GIRL'] +4446-2275-0008-1183: ref=['WHEN', 'DID', 'YOU', 'COME', 'BARTLEY', 'AND', 'HOW', 'DID', 'IT', 'HAPPEN', 'YOU', "HAVEN'T", 'SPOKEN', 'A', 'WORD'] +4446-2275-0008-1183: hyp=['WHEN', 'DID', 'YOU', 'COME', 'BARTLEY', 'AND', 'HOW', 'DID', 'IT', 'HAPPEN', 'YOU', "HAVEN'T", 'SPOKEN', 'A', 'WORD'] +4446-2275-0009-1184: ref=['I', 'GOT', 'IN', 'ABOUT', 'TEN', 'MINUTES', 'AGO'] +4446-2275-0009-1184: hyp=['I', 'GOT', 'IN', 'ABOUT', 'TEN', 'MINUTES', 'AGO'] +4446-2275-0010-1185: ref=['ALEXANDER', 'LEANED', 'FORWARD', 'AND', 'WARMED', 'HIS', 'HANDS', 'BEFORE', 'THE', 'BLAZE'] +4446-2275-0010-1185: hyp=['ALEXANDER', 'LEANED', 'FORWARD', 'AND', 'WARMED', 'HIS', 'HANDS', 'BEFORE', 'THE', 'BLAZE'] +4446-2275-0011-1186: ref=['BARTLEY', 'BENT', 'LOWER', 'OVER', 'THE', 'FIRE'] +4446-2275-0011-1186: hyp=['PARTLEY', 'BENT', 'LOWER', 'OVER', 'THE', 'FIRE'] +4446-2275-0012-1187: ref=['SHE', 'LOOKED', 'AT', 'HIS', 'HEAVY', 'SHOULDERS', 'AND', 'BIG', 'DETERMINED', 'HEAD', 'THRUST', 'FORWARD', 'LIKE', 'A', 'CATAPULT', 'IN', 'LEASH'] +4446-2275-0012-1187: hyp=['SHE', 'LOOKED', 'AT', 'HIS', 'HEAVY', 'SHOULDERS', 'AND', 'BIG', 'DETERMINED', 'HEAD', 'THRUST', 'FORWARD', 'LIKE', 'A', 'CATAPULT', 'IN', 'LEASH'] +4446-2275-0013-1188: ref=["I'LL", 'DO', 'ANYTHING', 'YOU', 'WISH', 'ME', 'TO', 'BARTLEY', 'SHE', 'SAID', 'TREMULOUSLY'] +4446-2275-0013-1188: hyp=["I'LL", 'DO', 'ANYTHING', 'YOU', 'WISH', 'ME', 'TO', 'BARTLEY', 'SHE', 'SAID', 'TREMULOUSLY'] +4446-2275-0014-1189: ref=['I', "CAN'T", 'STAND', 'SEEING', 'YOU', 'MISERABLE'] +4446-2275-0014-1189: hyp=['I', "CAN'T", 'STAND', 'SEEING', 'YOU', 'MISERABLE'] +4446-2275-0015-1190: ref=['HE', 'PULLED', 'UP', 'A', 'WINDOW', 'AS', 'IF', 'THE', 'AIR', 'WERE', 'HEAVY'] +4446-2275-0015-1190: hyp=['HE', 'PULLED', 'UP', 'A', 'WINDOW', 'AS', 'IF', 'THE', 'AIR', 'WERE', 'HEAVY'] +4446-2275-0016-1191: ref=['HILDA', 'WATCHED', 'HIM', 'FROM', 'HER', 'CORNER', 'TREMBLING', 'AND', 'SCARCELY', 'BREATHING', 'DARK', 'SHADOWS', 'GROWING', 'ABOUT', 'HER', 'EYES', 'IT'] +4446-2275-0016-1191: hyp=['HILDA', 'WATCHED', 'HIM', 'FROM', 'THE', 'CORNER', 'TREMBLING', 'AND', 'SCARCELY', 'BREATHING', 'DARK', 'SHADOWS', 'GROWING', 'ABOUT', 'HER', 'EYES', 'IT'] +4446-2275-0017-1192: ref=['BUT', "IT'S", 'WORSE', 'NOW', "IT'S", 'UNBEARABLE'] +4446-2275-0017-1192: hyp=['BUT', "IT'S", 'WORSE', 'NOW', "IT'S", 'UNBEARABLE'] +4446-2275-0018-1193: ref=['I', 'GET', 'NOTHING', 'BUT', 'MISERY', 'OUT', 'OF', 'EITHER'] +4446-2275-0018-1193: hyp=['I', 'GET', 'NOTHING', 'BUT', 'MISERY', 'OUT', 'OF', 'EITHER'] +4446-2275-0019-1194: ref=['THE', 'WORLD', 'IS', 'ALL', 'THERE', 'JUST', 'AS', 'IT', 'USED', 'TO', 'BE', 'BUT', 'I', "CAN'T", 'GET', 'AT', 'IT', 'ANY', 'MORE'] +4446-2275-0019-1194: hyp=['THE', 'WORLD', 'IS', 'ALL', 'THERE', 'JUST', 'AS', 'IT', 'USED', 'TO', 'BE', 'BUT', 'I', "CAN'T", 'GET', 'AT', 'IT', 'ANY', 'MORE'] +4446-2275-0020-1195: ref=['IT', 'WAS', 'MYSELF', 'I', 'WAS', 'DEFYING', 'HILDA'] +4446-2275-0020-1195: hyp=['IT', 'WAS', 'MYSELF', 'I', 'WAS', 'DEFYING', 'HILDA'] +4446-2275-0021-1196: ref=["HILDA'S", 'FACE', 'QUIVERED', 'BUT', 'SHE', 'WHISPERED', 'YES', 'I', 'THINK', 'IT', 'MUST', 'HAVE', 'BEEN'] +4446-2275-0021-1196: hyp=["HILDA'S", 'FACE', 'QUIVERED', 'BUT', 'SHE', 'WHISPERED', 'YES', 'I', 'THINK', 'IT', 'MUST', 'HAVE', 'BEEN'] +4446-2275-0022-1197: ref=['BUT', 'WHY', "DIDN'T", 'YOU', 'TELL', 'ME', 'WHEN', 'YOU', 'WERE', 'HERE', 'IN', 'THE', 'SUMMER'] +4446-2275-0022-1197: hyp=['BUT', 'WHY', "DIDN'T", 'YOU', 'TELL', 'ME', 'WHEN', 'YOU', 'WERE', 'HERE', 'IN', 'THE', 'SUMMER'] +4446-2275-0023-1198: ref=['ALEXANDER', 'GROANED', 'I', 'MEANT', 'TO', 'BUT', 'SOMEHOW', 'I', "COULDN'T"] +4446-2275-0023-1198: hyp=['ALEXANDER', 'GROANED', 'I', 'MEANT', 'TO', 'BUT', 'SOMEHOW', 'I', "COULDN'T"] +4446-2275-0024-1199: ref=['SHE', 'PRESSED', 'HIS', 'HAND', 'GENTLY', 'IN', 'GRATITUDE'] +4446-2275-0024-1199: hyp=['SHE', 'PRESSED', 'HIS', 'HAND', 'GENTLY', 'IN', 'GRATITUDE'] +4446-2275-0025-1200: ref=["WEREN'T", 'YOU', 'HAPPY', 'THEN', 'AT', 'ALL'] +4446-2275-0025-1200: hyp=["WEREN'T", 'YOU', 'HAPPY', 'THEN', 'AT', 'ALL'] +4446-2275-0026-1201: ref=['SHE', 'CLOSED', 'HER', 'EYES', 'AND', 'TOOK', 'A', 'DEEP', 'BREATH', 'AS', 'IF', 'TO', 'DRAW', 'IN', 'AGAIN', 'THE', 'FRAGRANCE', 'OF', 'THOSE', 'DAYS'] +4446-2275-0026-1201: hyp=['SHE', 'CLOSED', 'HER', 'EYES', 'AND', 'TOOK', 'A', 'DEEP', 'BREATH', 'AS', 'IF', 'TO', 'DRAW', 'IN', 'AGAIN', 'THE', 'FRAGRANCE', 'OF', 'THOSE', 'DAYS'] +4446-2275-0027-1202: ref=['HE', 'MOVED', 'UNEASILY', 'AND', 'HIS', 'CHAIR', 'CREAKED'] +4446-2275-0027-1202: hyp=['HE', 'MOVED', 'UNEASILY', 'AND', 'HIS', 'CHAIR', 'CREAKED'] +4446-2275-0028-1203: ref=['YES', 'YES', 'SHE', 'HURRIED', 'PULLING', 'HER', 'HAND', 'GENTLY', 'AWAY', 'FROM', 'HIM'] +4446-2275-0028-1203: hyp=['YES', 'YES', 'SHE', 'HURRIED', 'PULLING', 'HER', 'HAND', 'GENTLY', 'AWAY', 'FROM', 'HIM'] +4446-2275-0029-1204: ref=['PLEASE', 'TELL', 'ME', 'ONE', 'THING', 'BARTLEY', 'AT', 'LEAST', 'TELL', 'ME', 'THAT', 'YOU', 'BELIEVE', 'I', 'THOUGHT', 'I', 'WAS', 'MAKING', 'YOU', 'HAPPY'] +4446-2275-0029-1204: hyp=['PLEASE', 'TELL', 'ME', 'ONE', 'THING', 'BARTLEY', 'AT', 'LEAST', 'TELL', 'ME', 'THAT', 'YOU', 'BELIEVE', 'I', 'THOUGHT', 'I', 'WAS', 'MAKING', 'YOU', 'HAPPY'] +4446-2275-0030-1205: ref=['YES', 'HILDA', 'I', 'KNOW', 'THAT', 'HE', 'SAID', 'SIMPLY'] +4446-2275-0030-1205: hyp=['YES', 'HILDA', 'I', 'KNOW', 'THAT', 'HE', 'SAID', 'SIMPLY'] +4446-2275-0031-1206: ref=['I', 'UNDERSTAND', 'BARTLEY', 'I', 'WAS', 'WRONG'] +4446-2275-0031-1206: hyp=['I', 'UNDERSTAND', 'BARTLEY', 'I', 'WAS', 'WRONG'] +4446-2275-0032-1207: ref=['BUT', 'I', "DIDN'T", 'KNOW', "YOU'VE", 'ONLY', 'TO', 'TELL', 'ME', 'NOW'] +4446-2275-0032-1207: hyp=['BUT', 'I', "DIDN'T", 'KNOW', "YOU'VE", 'ONLY', 'TO', 'TELL', 'ME', 'NOW'] +4446-2275-0033-1208: ref=['WHAT', 'I', 'MEAN', 'IS', 'THAT', 'I', 'WANT', 'YOU', 'TO', 'PROMISE', 'NEVER', 'TO', 'SEE', 'ME', 'AGAIN', 'NO', 'MATTER', 'HOW', 'OFTEN', 'I', 'COME', 'NO', 'MATTER', 'HOW', 'HARD', 'I', 'BEG'] +4446-2275-0033-1208: hyp=['WHAT', 'I', 'MEAN', 'IS', 'THAT', 'I', 'WANT', 'YOU', 'TO', 'PROMISE', 'NEVER', 'TO', 'SEE', 'ME', 'AGAIN', 'NO', 'MATTER', 'HOW', 'OFTEN', 'I', 'COME', 'NO', 'MATTER', 'HOW', 'HARD', 'I', 'BEG'] +4446-2275-0034-1209: ref=['KEEP', 'AWAY', 'IF', 'YOU', 'WISH', 'WHEN', 'HAVE', 'I', 'EVER', 'FOLLOWED', 'YOU'] +4446-2275-0034-1209: hyp=['KEEP', 'AWAY', 'IF', 'YOU', 'WISH', 'WHEN', 'HAVE', 'I', 'EVER', 'FOLLOWED', 'YOU'] +4446-2275-0035-1210: ref=['ALEXANDER', 'ROSE', 'AND', 'SHOOK', 'HIMSELF', 'ANGRILY', 'YES', 'I', 'KNOW', "I'M", 'COWARDLY'] +4446-2275-0035-1210: hyp=['ALEXANDER', 'ROSE', 'AND', 'SHOOK', 'HIMSELF', 'ANGRILY', 'YES', 'I', 'KNOW', 'I', 'AM', 'COWARDLY'] +4446-2275-0036-1211: ref=['HE', 'TOOK', 'HER', 'ROUGHLY', 'IN', 'HIS', 'ARMS', 'DO', 'YOU', 'KNOW', 'WHAT', 'I', 'MEAN'] +4446-2275-0036-1211: hyp=['HE', 'TOOK', 'HER', 'ROUGHLY', 'IN', 'HIS', 'ARMS', 'DO', 'YOU', 'KNOW', 'WHAT', 'I', 'MEAN'] +4446-2275-0037-1212: ref=['OH', 'BARTLEY', 'WHAT', 'AM', 'I', 'TO', 'DO'] +4446-2275-0037-1212: hyp=['OH', 'BARTLEY', 'WHAT', 'AM', 'I', 'TO', 'DO'] +4446-2275-0038-1213: ref=['I', 'WILL', 'ASK', 'THE', 'LEAST', 'IMAGINABLE', 'BUT', 'I', 'MUST', 'HAVE', 'SOMETHING'] +4446-2275-0038-1213: hyp=['I', 'WILL', 'ASK', 'THE', 'LEAST', 'IMAGINABLE', 'BUT', 'I', 'MUST', 'HAVE', 'SOMETHING'] +4446-2275-0039-1214: ref=['I', 'MUST', 'KNOW', 'ABOUT', 'YOU'] +4446-2275-0039-1214: hyp=['I', 'MUST', 'KNOW', 'ABOUT', 'YOU'] +4446-2275-0040-1215: ref=['THE', 'SIGHT', 'OF', 'YOU', 'BARTLEY', 'TO', 'SEE', 'YOU', 'LIVING', 'AND', 'HAPPY', 'AND', 'SUCCESSFUL', 'CAN', 'I', 'NEVER', 'MAKE', 'YOU', 'UNDERSTAND', 'WHAT', 'THAT', 'MEANS', 'TO', 'ME'] +4446-2275-0040-1215: hyp=['THE', 'SIGHT', 'OF', 'YOU', 'BARTLEY', 'TO', 'SEE', 'YOU', 'LIVING', 'AND', 'HAPPY', 'AND', 'SUCCESSFUL', 'CAN', 'I', 'NEVER', 'MAKE', 'YOU', 'UNDERSTAND', 'WHAT', 'THAT', 'MEANS', 'TO', 'ME'] +4446-2275-0041-1216: ref=['YOU', 'SEE', 'LOVING', 'SOME', 'ONE', 'AS', 'I', 'LOVE', 'YOU', 'MAKES', 'THE', 'WHOLE', 'WORLD', 'DIFFERENT'] +4446-2275-0041-1216: hyp=['YOU', 'SEE', 'LOVING', 'SOMEONE', 'AS', 'I', 'LOVE', 'YOU', 'MAKES', 'THE', 'WHOLE', 'WORLD', 'DIFFERENT'] +4446-2275-0042-1217: ref=['AND', 'THEN', 'YOU', 'CAME', 'BACK', 'NOT', 'CARING', 'VERY', 'MUCH', 'BUT', 'IT', 'MADE', 'NO', 'DIFFERENCE'] +4446-2275-0042-1217: hyp=['AND', 'THEN', 'YOU', 'CAME', 'BACK', 'NOT', 'CARING', 'VERY', 'MUCH', 'BUT', 'IT', 'MADE', 'NO', 'DIFFERENCE'] +4446-2275-0043-1218: ref=['BARTLEY', 'BENT', 'OVER', 'AND', 'TOOK', 'HER', 'IN', 'HIS', 'ARMS', 'KISSING', 'HER', 'MOUTH', 'AND', 'HER', 'WET', 'TIRED', 'EYES'] +4446-2275-0043-1218: hyp=['BARTLEY', 'BENT', 'OVER', 'AND', 'TOOK', 'HER', 'IN', 'HIS', 'ARMS', 'KISSING', 'HER', 'MOUTH', 'AND', 'HER', 'WET', 'TIRED', 'EYES'] +4446-2275-0044-1219: ref=["DON'T", 'CRY', "DON'T", 'CRY', 'HE', 'WHISPERED'] +4446-2275-0044-1219: hyp=['AH', "DON'T", 'CRY', "DON'T", 'CRY', 'HE', 'WHISPERED'] +4446-2275-0045-1220: ref=["WE'VE", 'TORTURED', 'EACH', 'OTHER', 'ENOUGH', 'FOR', 'TONIGHT'] +4446-2275-0045-1220: hyp=["WE'VE", 'TORTURED', 'EACH', 'OTHER', 'ENOUGH', 'FOR', 'TO', 'NIGHT'] +4507-16021-0000-1221: ref=['CHAPTER', 'ONE', 'ORIGIN'] +4507-16021-0000-1221: hyp=['CHAPTER', 'ONE', 'ORIGIN'] +4507-16021-0001-1222: ref=['IT', 'ENGENDERS', 'A', 'WHOLE', 'WORLD', 'LA', 'PEGRE', 'FOR', 'WHICH', 'READ', 'THEFT', 'AND', 'A', 'HELL', 'LA', 'PEGRENNE', 'FOR', 'WHICH', 'READ', 'HUNGER'] +4507-16021-0001-1222: hyp=['IT', 'ENGENDERS', 'A', 'WHOLE', 'WORLD', 'LA', 'PEGRE', 'FOR', 'WITCH', 'RED', 'THEFT', 'AND', 'A', 'HELL', 'LA', 'PEGREN', 'FOR', 'WITCH', 'RED', 'HUNGER'] +4507-16021-0002-1223: ref=['THUS', 'IDLENESS', 'IS', 'THE', 'MOTHER'] +4507-16021-0002-1223: hyp=['THUS', 'IDLENESS', 'IS', 'THE', 'MOTHER'] +4507-16021-0003-1224: ref=['SHE', 'HAS', 'A', 'SON', 'THEFT', 'AND', 'A', 'DAUGHTER', 'HUNGER'] +4507-16021-0003-1224: hyp=['SHE', 'HAS', 'A', 'SON', 'THEFT', 'AND', 'A', 'DAUGHTER', 'HUNGER'] +4507-16021-0004-1225: ref=['WHAT', 'IS', 'SLANG'] +4507-16021-0004-1225: hyp=['WHAT', 'IS', 'SLANG'] +4507-16021-0005-1226: ref=['WE', 'HAVE', 'NEVER', 'UNDERSTOOD', 'THIS', 'SORT', 'OF', 'OBJECTIONS'] +4507-16021-0005-1226: hyp=['WE', 'HAVE', 'NEVER', 'UNDERSTOOD', 'THIS', 'SORT', 'OF', 'OBJECTIONS'] +4507-16021-0006-1227: ref=['SLANG', 'IS', 'ODIOUS'] +4507-16021-0006-1227: hyp=['SLANG', 'IS', 'ODIOUS'] +4507-16021-0007-1228: ref=['SLANG', 'MAKES', 'ONE', 'SHUDDER'] +4507-16021-0007-1228: hyp=['SLANG', 'MAKES', 'ONE', 'SHUDDER'] +4507-16021-0008-1229: ref=['WHO', 'DENIES', 'THAT', 'OF', 'COURSE', 'IT', 'DOES'] +4507-16021-0008-1229: hyp=['WHO', 'DENIES', 'THAT', 'OF', 'COURSE', 'IT', 'DOES'] +4507-16021-0009-1230: ref=['WHEN', 'IT', 'IS', 'A', 'QUESTION', 'OF', 'PROBING', 'A', 'WOUND', 'A', 'GULF', 'A', 'SOCIETY', 'SINCE', 'WHEN', 'HAS', 'IT', 'BEEN', 'CONSIDERED', 'WRONG', 'TO', 'GO', 'TOO', 'FAR', 'TO', 'GO', 'TO', 'THE', 'BOTTOM'] +4507-16021-0009-1230: hyp=['WHEN', 'IT', 'IS', 'A', 'QUESTION', 'OF', 'PROBING', 'A', 'WOUND', 'A', 'GULF', 'A', 'SOCIETY', 'SINCE', 'WHEN', 'HAS', 'IT', 'BEEN', 'CONSIDERED', 'WRONG', 'TO', 'GO', 'TOO', 'FAR', 'TO', 'GO', 'TO', 'THE', 'BOTTOM'] +4507-16021-0010-1231: ref=['WE', 'HAVE', 'ALWAYS', 'THOUGHT', 'THAT', 'IT', 'WAS', 'SOMETIMES', 'A', 'COURAGEOUS', 'ACT', 'AND', 'AT', 'LEAST', 'A', 'SIMPLE', 'AND', 'USEFUL', 'DEED', 'WORTHY', 'OF', 'THE', 'SYMPATHETIC', 'ATTENTION', 'WHICH', 'DUTY', 'ACCEPTED', 'AND', 'FULFILLED', 'MERITS'] +4507-16021-0010-1231: hyp=['WE', 'HAVE', 'ALWAYS', 'THOUGHT', 'THAT', 'IT', 'WAS', 'SOMETIMES', 'A', 'COURAGEOUS', 'ACT', 'AND', 'AT', 'LEAST', 'A', 'SIMPLE', 'AND', 'USEFUL', 'DEED', 'WORTHY', 'OF', 'THE', 'SYMPATHETIC', 'ATTENTION', 'WHICH', 'DUTY', 'ACCEPTED', 'IN', 'FULFILLED', 'MERITS'] +4507-16021-0011-1232: ref=['WHY', 'SHOULD', 'ONE', 'NOT', 'EXPLORE', 'EVERYTHING', 'AND', 'STUDY', 'EVERYTHING'] +4507-16021-0011-1232: hyp=['WHY', 'SHOULD', 'ONE', 'NOT', 'EXPLORE', 'EVERYTHING', 'AND', 'STUDY', 'EVERYTHING'] +4507-16021-0012-1233: ref=['WHY', 'SHOULD', 'ONE', 'HALT', 'ON', 'THE', 'WAY'] +4507-16021-0012-1233: hyp=['WHY', 'SHOULD', 'ONE', 'HALT', 'ON', 'THE', 'WAY'] +4507-16021-0013-1234: ref=['NOTHING', 'IS', 'MORE', 'LUGUBRIOUS', 'THAN', 'THE', 'CONTEMPLATION', 'THUS', 'IN', 'ITS', 'NUDITY', 'IN', 'THE', 'BROAD', 'LIGHT', 'OF', 'THOUGHT', 'OF', 'THE', 'HORRIBLE', 'SWARMING', 'OF', 'SLANG'] +4507-16021-0013-1234: hyp=['NOTHING', 'IS', 'MORE', 'LUGUBRIOUS', 'THAN', 'THE', 'CONTEMPLATION', 'THUS', 'IN', 'ITS', 'NUDITY', 'IN', 'THE', 'BROAD', 'LIGHT', 'OF', 'THOUGHT', 'OF', 'THE', 'HORRIBLE', 'SWARMING', 'OF', 'SLANG'] +4507-16021-0014-1235: ref=['NOW', 'WHEN', 'HAS', 'HORROR', 'EVER', 'EXCLUDED', 'STUDY'] +4507-16021-0014-1235: hyp=['NOW', 'WHEN', 'HAS', 'HORROR', 'EVER', 'EXCLUDED', 'STUDY'] +4507-16021-0015-1236: ref=['SINCE', 'WHEN', 'HAS', 'MALADY', 'BANISHED', 'MEDICINE'] +4507-16021-0015-1236: hyp=['SINCE', 'WHEN', 'HAS', 'MALADY', 'BANISHED', 'MEDICINE'] +4507-16021-0016-1237: ref=['CAN', 'ONE', 'IMAGINE', 'A', 'NATURALIST', 'REFUSING', 'TO', 'STUDY', 'THE', 'VIPER', 'THE', 'BAT', 'THE', 'SCORPION', 'THE', 'CENTIPEDE', 'THE', 'TARANTULA', 'AND', 'ONE', 'WHO', 'WOULD', 'CAST', 'THEM', 'BACK', 'INTO', 'THEIR', 'DARKNESS', 'SAYING', 'OH', 'HOW', 'UGLY', 'THAT', 'IS'] +4507-16021-0016-1237: hyp=['CAN', 'ONE', 'IMAGINE', 'A', 'NATURALIST', 'REFUSING', 'TO', 'STUDY', 'THE', 'VIPER', 'THE', 'BAT', 'THE', 'SCORPION', 'THE', 'CENTIPEDE', 'THE', 'TARANTULA', 'AND', 'ONE', 'WHO', 'WOULD', 'CAST', 'THEM', 'BACK', 'INTO', 'THEIR', 'DARKNESS', 'SAYING', 'OH', 'HOW', 'UGLY', 'THAT', 'IS'] +4507-16021-0017-1238: ref=['HE', 'WOULD', 'BE', 'LIKE', 'A', 'PHILOLOGIST', 'REFUSING', 'TO', 'EXAMINE', 'A', 'FACT', 'IN', 'LANGUAGE', 'A', 'PHILOSOPHER', 'HESITATING', 'TO', 'SCRUTINIZE', 'A', 'FACT', 'IN', 'HUMANITY'] +4507-16021-0017-1238: hyp=['HE', 'WOULD', 'BE', 'LIKE', 'A', 'PHILOLOGIST', 'REFUSING', 'TO', 'EXAMINE', 'A', 'FACT', 'IN', 'LANGUAGE', 'A', 'PHILOSOPHER', 'HESITATING', 'TO', 'SCRUTINIZE', 'A', 'FACT', 'IN', 'HUMANITY'] +4507-16021-0018-1239: ref=['WHAT', 'IS', 'SLANG', 'PROPERLY', 'SPEAKING'] +4507-16021-0018-1239: hyp=['WHAT', 'IS', 'SLANG', 'PROPERLY', 'SPEAKING'] +4507-16021-0019-1240: ref=['IT', 'IS', 'THE', 'LANGUAGE', 'OF', 'WRETCHEDNESS'] +4507-16021-0019-1240: hyp=['IT', 'IS', 'THE', 'LANGUAGE', 'OF', 'WRETCHEDNESS'] +4507-16021-0020-1241: ref=['WE', 'MAY', 'BE', 'STOPPED', 'THE', 'FACT', 'MAY', 'BE', 'PUT', 'TO', 'US', 'IN', 'GENERAL', 'TERMS', 'WHICH', 'IS', 'ONE', 'WAY', 'OF', 'ATTENUATING', 'IT', 'WE', 'MAY', 'BE', 'TOLD', 'THAT', 'ALL', 'TRADES', 'PROFESSIONS', 'IT', 'MAY', 'BE', 'ADDED', 'ALL', 'THE', 'ACCIDENTS', 'OF', 'THE', 'SOCIAL', 'HIERARCHY', 'AND', 'ALL', 'FORMS', 'OF', 'INTELLIGENCE', 'HAVE', 'THEIR', 'OWN', 'SLANG'] +4507-16021-0020-1241: hyp=['WE', 'MAY', 'BE', 'STOPPED', 'THE', 'FACT', 'MAY', 'BE', 'PUT', 'TO', 'US', 'IN', 'GENERAL', 'TERMS', 'WHICH', 'IS', 'ONE', 'WAY', 'OF', 'ATTENUATING', 'IT', 'WE', 'MAY', 'BE', 'TOLD', 'THAT', 'ALL', 'TRADES', 'PROFESSIONS', 'IT', 'MAY', 'BE', 'ADDED', 'ALL', 'THE', 'ACCIDENTS', 'OF', 'THE', 'SOCIAL', 'HIERARCHY', 'AND', 'ALL', 'FORMS', 'OF', 'INTELLIGENCE', 'HAVE', 'THEIR', 'OWN', 'SLANG'] +4507-16021-0021-1242: ref=['THE', 'PAINTER', 'WHO', 'SAYS', 'MY', 'GRINDER', 'THE', 'NOTARY', 'WHO', 'SAYS', 'MY', 'SKIP', 'THE', 'GUTTER', 'THE', 'HAIRDRESSER', 'WHO', 'SAYS', 'MY', 'MEALYBACK', 'THE', 'COBBLER', 'WHO', 'SAYS', 'MY', 'CUB', 'TALKS', 'SLANG'] +4507-16021-0021-1242: hyp=['THE', 'PAINTER', 'WHO', 'SAYS', 'MY', 'GRINDER', 'THE', 'NOTARY', 'WHO', 'SAYS', 'MY', 'SKIP', 'THE', 'GUTTER', 'THE', 'HAIRDRESSER', 'WHO', 'SAYS', 'MY', 'MEALLY', 'BACK', 'THE', 'COBBLER', 'WHO', 'SAYS', 'MY', 'CUB', 'TALKS', 'SLANG'] +4507-16021-0022-1243: ref=['THERE', 'IS', 'THE', 'SLANG', 'OF', 'THE', 'AFFECTED', 'LADY', 'AS', 'WELL', 'AS', 'OF', 'THE', 'PRECIEUSES'] +4507-16021-0022-1243: hyp=['THERE', 'IS', 'THE', 'SLANG', 'OF', 'THE', 'AFFECTED', 'LADY', 'AS', 'WELL', 'AS', 'OF', 'THE', 'PRECIUSEES'] +4507-16021-0023-1244: ref=['THE', 'SUGAR', 'MANUFACTURER', 'WHO', 'SAYS', 'LOAF', 'CLARIFIED', 'LUMPS', 'BASTARD', 'COMMON', 'BURNT', 'THIS', 'HONEST', 'MANUFACTURER', 'TALKS', 'SLANG'] +4507-16021-0023-1244: hyp=['THE', 'SUGAR', 'MANUFACTURER', 'WHO', 'SAYS', 'LOAF', 'CLARIFIED', 'LUMPS', 'BASTARD', 'COMMON', 'BURNT', 'THIS', 'HONEST', 'MANUFACTURER', 'TALKS', 'SLANG'] +4507-16021-0024-1245: ref=['ALGEBRA', 'MEDICINE', 'BOTANY', 'HAVE', 'EACH', 'THEIR', 'SLANG'] +4507-16021-0024-1245: hyp=['ALGEBRA', 'MEDICINE', 'BOTANY', 'HAVE', 'EACH', 'THEIR', 'SLANG'] +4507-16021-0025-1246: ref=['TO', 'MEET', 'THE', 'NEEDS', 'OF', 'THIS', 'CONFLICT', 'WRETCHEDNESS', 'HAS', 'INVENTED', 'A', 'LANGUAGE', 'OF', 'COMBAT', 'WHICH', 'IS', 'SLANG'] +4507-16021-0025-1246: hyp=['TO', 'MEET', 'THE', 'NEEDS', 'OF', 'THIS', 'CONFLICT', 'WRETCHEDNESS', 'HAS', 'INVENTED', 'THE', 'LANGUAGE', 'OF', 'COMBAT', 'WHICH', 'IS', 'SLANG'] +4507-16021-0026-1247: ref=['TO', 'KEEP', 'AFLOAT', 'AND', 'TO', 'RESCUE', 'FROM', 'OBLIVION', 'TO', 'HOLD', 'ABOVE', 'THE', 'GULF', 'WERE', 'IT', 'BUT', 'A', 'FRAGMENT', 'OF', 'SOME', 'LANGUAGE', 'WHICH', 'MAN', 'HAS', 'SPOKEN', 'AND', 'WHICH', 'WOULD', 'OTHERWISE', 'BE', 'LOST', 'THAT', 'IS', 'TO', 'SAY', 'ONE', 'OF', 'THE', 'ELEMENTS', 'GOOD', 'OR', 'BAD', 'OF', 'WHICH', 'CIVILIZATION', 'IS', 'COMPOSED', 'OR', 'BY', 'WHICH', 'IT', 'IS', 'COMPLICATED', 'TO', 'EXTEND', 'THE', 'RECORDS', 'OF', 'SOCIAL', 'OBSERVATION', 'IS', 'TO', 'SERVE', 'CIVILIZATION', 'ITSELF'] +4507-16021-0026-1247: hyp=['TO', 'KEEP', 'AFLOAT', 'AND', 'TO', 'RESCUE', 'FROM', 'OBLIVION', 'TO', 'HOLD', 'ABOVE', 'THE', 'GULF', 'WERE', 'IT', 'BUT', 'A', 'FRAGMENT', 'OF', 'SOME', 'LANGUAGE', 'WHICH', 'MAN', 'HAS', 'SPOKEN', 'AND', 'WHICH', 'WOULD', 'OTHERWISE', 'BE', 'LOST', 'THAT', 'IS', 'TO', 'SAY', 'ONE', 'OF', 'THE', 'ELEMENTS', 'GOOD', 'OR', 'BAD', 'OF', 'WHICH', 'CIVILIZATION', 'IS', 'COMPOSED', 'OR', 'BY', 'WHICH', 'IT', 'IS', 'COMPLICATED', 'TO', 'EXTEND', 'THE', 'RECORDS', 'OF', 'SOCIAL', 'OBSERVATION', 'IS', 'TO', 'SERVE', 'CIVILIZATION', 'ITSELF'] +4507-16021-0027-1248: ref=['PHOENICIAN', 'VERY', 'GOOD'] +4507-16021-0027-1248: hyp=['PHOENICIAN', 'VERY', 'GOOD'] +4507-16021-0028-1249: ref=['EVEN', 'DIALECT', 'LET', 'THAT', 'PASS'] +4507-16021-0028-1249: hyp=['EVEN', 'DIALECT', 'LET', 'THAT', 'PASS'] +4507-16021-0029-1250: ref=['TO', 'THIS', 'WE', 'REPLY', 'IN', 'ONE', 'WORD', 'ONLY'] +4507-16021-0029-1250: hyp=['TO', 'THIS', 'WE', 'REPLY', 'IN', 'ONE', 'WORD', 'ONLY'] +4507-16021-0030-1251: ref=['ASSUREDLY', 'IF', 'THE', 'TONGUE', 'WHICH', 'A', 'NATION', 'OR', 'A', 'PROVINCE', 'HAS', 'SPOKEN', 'IS', 'WORTHY', 'OF', 'INTEREST', 'THE', 'LANGUAGE', 'WHICH', 'HAS', 'BEEN', 'SPOKEN', 'BY', 'A', 'MISERY', 'IS', 'STILL', 'MORE', 'WORTHY', 'OF', 'ATTENTION', 'AND', 'STUDY'] +4507-16021-0030-1251: hyp=['ASSUREDLY', 'IF', 'THE', 'TONGUE', 'WHICH', 'A', 'NATION', 'OR', 'A', 'PROVINCE', 'HAS', 'SPOKEN', 'IS', 'WORTHY', 'OF', 'INTEREST', 'THE', 'LANGUAGE', 'WHICH', 'HAS', 'BEEN', 'SPOKEN', 'BY', 'A', 'MISERY', 'IS', 'STILL', 'MORE', 'WORTHY', 'OF', 'ATTENTION', 'AND', 'STUDY'] +4507-16021-0031-1252: ref=['AND', 'THEN', 'WE', 'INSIST', 'UPON', 'IT', 'THE', 'STUDY', 'OF', 'SOCIAL', 'DEFORMITIES', 'AND', 'INFIRMITIES', 'AND', 'THE', 'TASK', 'OF', 'POINTING', 'THEM', 'OUT', 'WITH', 'A', 'VIEW', 'TO', 'REMEDY', 'IS', 'NOT', 'A', 'BUSINESS', 'IN', 'WHICH', 'CHOICE', 'IS', 'PERMITTED'] +4507-16021-0031-1252: hyp=['AND', 'THEN', 'WE', 'INSIST', 'UPON', 'IT', 'THE', 'STUDY', 'OF', 'SOCIAL', 'DEFORMITIES', 'AND', 'INFIRMITIES', 'AND', 'THE', 'TASK', 'OF', 'POINTING', 'THEM', 'OUT', 'WITH', 'A', 'VIEW', 'TO', 'REMEDY', 'IS', 'NOT', 'A', 'BUSINESS', 'IN', 'WHICH', 'CHOICE', 'IS', 'PERMITTED'] +4507-16021-0032-1253: ref=['HE', 'MUST', 'DESCEND', 'WITH', 'HIS', 'HEART', 'FULL', 'OF', 'CHARITY', 'AND', 'SEVERITY', 'AT', 'THE', 'SAME', 'TIME', 'AS', 'A', 'BROTHER', 'AND', 'AS', 'A', 'JUDGE', 'TO', 'THOSE', 'IMPENETRABLE', 'CASEMATES', 'WHERE', 'CRAWL', 'PELL', 'MELL', 'THOSE', 'WHO', 'BLEED', 'AND', 'THOSE', 'WHO', 'DEAL', 'THE', 'BLOW', 'THOSE', 'WHO', 'WEEP', 'AND', 'THOSE', 'WHO', 'CURSE', 'THOSE', 'WHO', 'FAST', 'AND', 'THOSE', 'WHO', 'DEVOUR', 'THOSE', 'WHO', 'ENDURE', 'EVIL', 'AND', 'THOSE', 'WHO', 'INFLICT', 'IT'] +4507-16021-0032-1253: hyp=['HE', 'MUST', 'DESCEND', 'WITH', 'HIS', 'HEART', 'FULL', 'OF', 'CHARITY', 'AND', 'SEVERITY', 'AT', 'THE', 'SAME', 'TIME', 'AS', 'A', 'BROTHER', 'AND', 'AS', 'A', 'JUDGE', 'TO', 'THOSE', 'IMPENETRABLE', 'CASEMATES', 'WHERE', 'CRAWL', 'PELL', 'MELL', 'THOSE', 'WHO', 'BLEED', 'AND', 'THOSE', 'WHO', 'DEAL', 'THE', 'BLOW', 'THOSE', 'WHO', 'WEEP', 'AND', 'THOSE', 'WHO', 'CURSE', 'THOSE', 'WHO', 'FAST', 'AND', 'THOSE', 'WHO', 'DEVOUR', 'THOSE', 'WHO', 'ENDURE', 'EVIL', 'AND', 'THOSE', 'WHO', 'INFLICT', 'IT'] +4507-16021-0033-1254: ref=['DO', 'WE', 'REALLY', 'KNOW', 'THE', 'MOUNTAIN', 'WELL', 'WHEN', 'WE', 'ARE', 'NOT', 'ACQUAINTED', 'WITH', 'THE', 'CAVERN'] +4507-16021-0033-1254: hyp=['DO', 'WE', 'REALLY', 'KNOW', 'THE', 'MOUNTAIN', 'WELL', 'WHEN', 'WE', 'ARE', 'NOT', 'ACQUAINTED', 'WITH', 'THE', 'CAVERN'] +4507-16021-0034-1255: ref=['THEY', 'CONSTITUTE', 'TWO', 'DIFFERENT', 'ORDERS', 'OF', 'FACTS', 'WHICH', 'CORRESPOND', 'TO', 'EACH', 'OTHER', 'WHICH', 'ARE', 'ALWAYS', 'INTERLACED', 'AND', 'WHICH', 'OFTEN', 'BRING', 'FORTH', 'RESULTS'] +4507-16021-0034-1255: hyp=['THEY', 'CONSTITUTE', 'TWO', 'DIFFERENT', 'ORDERS', 'OF', 'FACTS', 'WHICH', 'CORRESPOND', 'TO', 'EACH', 'OTHER', 'WHICH', 'ARE', 'ALWAYS', 'INTERLACED', 'AND', 'WHICH', 'OFTEN', 'BRING', 'FORTH', 'RESULTS'] +4507-16021-0035-1256: ref=['TRUE', 'HISTORY', 'BEING', 'A', 'MIXTURE', 'OF', 'ALL', 'THINGS', 'THE', 'TRUE', 'HISTORIAN', 'MINGLES', 'IN', 'EVERYTHING'] +4507-16021-0035-1256: hyp=['TRUE', 'HISTORY', 'BEING', 'A', 'MIXTURE', 'OF', 'ALL', 'THINGS', 'THE', 'TRUE', 'HISTORIAN', 'MINGLES', 'IN', 'EVERYTHING'] +4507-16021-0036-1257: ref=['FACTS', 'FORM', 'ONE', 'OF', 'THESE', 'AND', 'IDEAS', 'THE', 'OTHER'] +4507-16021-0036-1257: hyp=['FACTS', 'FORM', 'ONE', 'OF', 'THESE', 'AND', 'IDEAS', 'THE', 'OTHER'] +4507-16021-0037-1258: ref=['THERE', 'IT', 'CLOTHES', 'ITSELF', 'IN', 'WORD', 'MASKS', 'IN', 'METAPHOR', 'RAGS'] +4507-16021-0037-1258: hyp=['THERE', 'IT', 'CLOTHES', 'ITSELF', 'IN', 'WORD', 'MASKS', 'IN', 'METAPHOR', 'RAGS'] +4507-16021-0038-1259: ref=['IN', 'THIS', 'GUISE', 'IT', 'BECOMES', 'HORRIBLE'] +4507-16021-0038-1259: hyp=['IN', 'THIS', 'GUISE', 'IT', 'BECOMES', 'HORRIBLE'] +4507-16021-0039-1260: ref=['ONE', 'PERCEIVES', 'WITHOUT', 'UNDERSTANDING', 'IT', 'A', 'HIDEOUS', 'MURMUR', 'SOUNDING', 'ALMOST', 'LIKE', 'HUMAN', 'ACCENTS', 'BUT', 'MORE', 'NEARLY', 'RESEMBLING', 'A', 'HOWL', 'THAN', 'AN', 'ARTICULATE', 'WORD'] +4507-16021-0039-1260: hyp=['ONE', 'PERCEIVES', 'WITHOUT', 'UNDERSTANDING', 'IT', 'A', 'HIDEOUS', 'MURMUR', 'SOUNDING', 'ALMOST', 'LIKE', 'HUMAN', 'ACCENTS', 'BUT', 'MORE', 'NEARLY', 'RESEMBLING', 'A', 'HOWL', 'THAN', 'AN', 'ARTICULATE', 'WORD'] +4507-16021-0040-1261: ref=['ONE', 'THINKS', 'ONE', 'HEARS', 'HYDRAS', 'TALKING'] +4507-16021-0040-1261: hyp=['ONE', 'THINKS', 'ONE', 'HEARS', 'HYDRAS', 'TALKING'] +4507-16021-0041-1262: ref=['IT', 'IS', 'UNINTELLIGIBLE', 'IN', 'THE', 'DARK'] +4507-16021-0041-1262: hyp=['IT', 'IS', 'UNINTELLIGIBLE', 'IN', 'THE', 'DARK'] +4507-16021-0042-1263: ref=['IT', 'IS', 'BLACK', 'IN', 'MISFORTUNE', 'IT', 'IS', 'BLACKER', 'STILL', 'IN', 'CRIME', 'THESE', 'TWO', 'BLACKNESSES', 'AMALGAMATED', 'COMPOSE', 'SLANG'] +4507-16021-0042-1263: hyp=['IT', 'IS', 'BLACK', 'IN', 'MISFORTUNE', 'IT', 'IS', 'BLACKER', 'STILL', 'IN', 'CRIME', 'THESE', 'TWO', 'BLACKNESSES', 'AMALGAMATED', 'COMPOSE', 'SLANG'] +4507-16021-0043-1264: ref=['THE', 'EARTH', 'IS', 'NOT', 'DEVOID', 'OF', 'RESEMBLANCE', 'TO', 'A', 'JAIL'] +4507-16021-0043-1264: hyp=['THE', 'EARTH', 'IS', 'NOT', 'DEVOID', 'OF', 'RESEMBLANCE', 'TO', 'A', 'JAIL'] +4507-16021-0044-1265: ref=['LOOK', 'CLOSELY', 'AT', 'LIFE'] +4507-16021-0044-1265: hyp=['LOOK', 'CLOSELY', 'AT', 'LIFE'] +4507-16021-0045-1266: ref=['IT', 'IS', 'SO', 'MADE', 'THAT', 'EVERYWHERE', 'WE', 'FEEL', 'THE', 'SENSE', 'OF', 'PUNISHMENT'] +4507-16021-0045-1266: hyp=['IT', 'IS', 'SO', 'MADE', 'THAT', 'EVERYWHERE', 'WE', 'FEEL', 'THE', 'SENSE', 'OF', 'PUNISHMENT'] +4507-16021-0046-1267: ref=['EACH', 'DAY', 'HAS', 'ITS', 'OWN', 'GREAT', 'GRIEF', 'OR', 'ITS', 'LITTLE', 'CARE'] +4507-16021-0046-1267: hyp=['EACH', 'DAY', 'HAS', 'ITS', 'OWN', 'GREAT', 'GRIEF', 'OR', 'ITS', 'LITTLE', 'CARE'] +4507-16021-0047-1268: ref=['YESTERDAY', 'YOU', 'WERE', 'TREMBLING', 'FOR', 'A', 'HEALTH', 'THAT', 'IS', 'DEAR', 'TO', 'YOU', 'TO', 'DAY', 'YOU', 'FEAR', 'FOR', 'YOUR', 'OWN', 'TO', 'MORROW', 'IT', 'WILL', 'BE', 'ANXIETY', 'ABOUT', 'MONEY', 'THE', 'DAY', 'AFTER', 'TO', 'MORROW', 'THE', 'DIATRIBE', 'OF', 'A', 'SLANDERER', 'THE', 'DAY', 'AFTER', 'THAT', 'THE', 'MISFORTUNE', 'OF', 'SOME', 'FRIEND', 'THEN', 'THE', 'PREVAILING', 'WEATHER', 'THEN', 'SOMETHING', 'THAT', 'HAS', 'BEEN', 'BROKEN', 'OR', 'LOST', 'THEN', 'A', 'PLEASURE', 'WITH', 'WHICH', 'YOUR', 'CONSCIENCE', 'AND', 'YOUR', 'VERTEBRAL', 'COLUMN', 'REPROACH', 'YOU', 'AGAIN', 'THE', 'COURSE', 'OF', 'PUBLIC', 'AFFAIRS'] +4507-16021-0047-1268: hyp=['YESTERDAY', 'YOU', 'WERE', 'TREMBLING', 'FOR', 'A', 'HEALTH', 'THAT', 'IS', 'DEAR', 'TO', 'YOU', 'TO', 'DAY', 'YOU', 'FEAR', 'FOR', 'YOUR', 'OWN', 'TO', 'MORROW', 'IT', 'WILL', 'BE', 'ANXIETY', 'ABOUT', 'MONEY', 'THE', 'DAY', 'AFTER', 'TO', 'MORROW', 'THE', 'DIATRIBE', 'OF', 'A', 'SLANDERER', 'THE', 'DAY', 'AFTER', 'THAT', 'THE', 'MISFORTUNE', 'OF', 'SOME', 'FRIEND', 'THEN', 'THE', 'PREVAILING', 'WEATHER', 'THEN', 'SOMETHING', 'THAT', 'HAS', 'BEEN', 'BROKEN', 'OR', 'LOST', 'THEN', 'A', 'PLEASURE', 'WITH', 'WHICH', 'YOUR', 'CONSCIENCE', 'AND', 'YOUR', 'VERTEBRAL', 'COLUMN', 'REPROACH', 'YOU', 'AGAIN', 'THE', 'COURSE', 'OF', 'PUBLIC', 'AFFAIRS'] +4507-16021-0048-1269: ref=['THIS', 'WITHOUT', 'RECKONING', 'IN', 'THE', 'PAINS', 'OF', 'THE', 'HEART', 'AND', 'SO', 'IT', 'GOES', 'ON'] +4507-16021-0048-1269: hyp=['THIS', 'WITHOUT', 'RECKONING', 'IN', 'THE', 'PAINS', 'OF', 'THE', 'HEART', 'AND', 'SO', 'IT', 'GOES', 'ON'] +4507-16021-0049-1270: ref=['THERE', 'IS', 'HARDLY', 'ONE', 'DAY', 'OUT', 'OF', 'A', 'HUNDRED', 'WHICH', 'IS', 'WHOLLY', 'JOYOUS', 'AND', 'SUNNY'] +4507-16021-0049-1270: hyp=['THERE', 'IS', 'HARDLY', 'ONE', 'DAY', 'OUT', 'OF', 'A', 'HUNDRED', 'WHICH', 'IS', 'WHOLLY', 'JOYOUS', 'AND', 'SUNNY'] +4507-16021-0050-1271: ref=['AND', 'YOU', 'BELONG', 'TO', 'THAT', 'SMALL', 'CLASS', 'WHO', 'ARE', 'HAPPY'] +4507-16021-0050-1271: hyp=['AND', 'YOU', 'BELONG', 'TO', 'THAT', 'SMALL', 'CLASS', 'WHO', 'ARE', 'HAPPY'] +4507-16021-0051-1272: ref=['IN', 'THIS', 'WORLD', 'EVIDENTLY', 'THE', 'VESTIBULE', 'OF', 'ANOTHER', 'THERE', 'ARE', 'NO', 'FORTUNATE'] +4507-16021-0051-1272: hyp=['IN', 'THIS', "WORLD'S", 'EVIDENTLY', 'THE', 'VESTIBULE', 'OF', 'ANOTHER', 'THERE', 'ARE', 'NO', 'FORTUNATE'] +4507-16021-0052-1273: ref=['THE', 'REAL', 'HUMAN', 'DIVISION', 'IS', 'THIS', 'THE', 'LUMINOUS', 'AND', 'THE', 'SHADY'] +4507-16021-0052-1273: hyp=['THE', 'REAL', 'HUMAN', 'DIVISION', 'IS', 'THIS', 'THE', 'LUMINOUS', 'AND', 'THE', 'SHADY'] +4507-16021-0053-1274: ref=['TO', 'DIMINISH', 'THE', 'NUMBER', 'OF', 'THE', 'SHADY', 'TO', 'AUGMENT', 'THE', 'NUMBER', 'OF', 'THE', 'LUMINOUS', 'THAT', 'IS', 'THE', 'OBJECT'] +4507-16021-0053-1274: hyp=['TO', 'DIMINISH', 'THE', 'NUMBER', 'OF', 'THE', 'SHADY', 'TO', 'AUGMENT', 'THE', 'NUMBER', 'OF', 'THE', 'LUMINOUS', 'THAT', 'IS', 'THE', 'OBJECT'] +4507-16021-0054-1275: ref=['THAT', 'IS', 'WHY', 'WE', 'CRY', 'EDUCATION', 'SCIENCE'] +4507-16021-0054-1275: hyp=['THAT', 'IS', 'WHY', 'WE', 'CRY', 'EDUCATION', 'SCIENCE'] +4507-16021-0055-1276: ref=['TO', 'TEACH', 'READING', 'MEANS', 'TO', 'LIGHT', 'THE', 'FIRE', 'EVERY', 'SYLLABLE', 'SPELLED', 'OUT', 'SPARKLES'] +4507-16021-0055-1276: hyp=['TO', 'TEACH', 'READING', 'MEANS', 'TO', 'LIGHT', 'THE', 'FIRE', 'EVERY', 'SYLLABLE', 'SPELLED', 'OUT', 'SPARKLES'] +4507-16021-0056-1277: ref=['HOWEVER', 'HE', 'WHO', 'SAYS', 'LIGHT', 'DOES', 'NOT', 'NECESSARILY', 'SAY', 'JOY'] +4507-16021-0056-1277: hyp=['HOWEVER', 'HE', 'WHO', 'SAYS', 'LIGHT', 'DOES', 'NOT', 'NECESSARILY', 'SAY', 'JOY'] +4507-16021-0057-1278: ref=['PEOPLE', 'SUFFER', 'IN', 'THE', 'LIGHT', 'EXCESS', 'BURNS'] +4507-16021-0057-1278: hyp=['PEOPLE', 'SUFFER', 'IN', 'THE', 'LIGHT', 'EXCESS', 'BURNS'] +4507-16021-0058-1279: ref=['THE', 'FLAME', 'IS', 'THE', 'ENEMY', 'OF', 'THE', 'WING'] +4507-16021-0058-1279: hyp=['THE', 'FLAME', 'IS', 'THE', 'ENEMY', 'OF', 'THE', 'WING'] +4507-16021-0059-1280: ref=['TO', 'BURN', 'WITHOUT', 'CEASING', 'TO', 'FLY', 'THEREIN', 'LIES', 'THE', 'MARVEL', 'OF', 'GENIUS'] +4507-16021-0059-1280: hyp=['TO', 'BURN', 'WITHOUT', 'CEASING', 'TO', 'FLY', 'THEREIN', 'LIES', 'THE', 'MARVEL', 'OF', 'GENIUS'] +4970-29093-0000-1281: ref=["YOU'LL", 'NEVER', 'DIG', 'IT', 'OUT', 'OF', 'THE', 'ASTOR', 'LIBRARY'] +4970-29093-0000-1281: hyp=["YOU'LL", 'NEVER', 'DIG', 'IT', 'OUT', 'OF', 'THE', 'ASTOR', 'LIBRARY'] +4970-29093-0001-1282: ref=['TO', 'THE', 'YOUNG', 'AMERICAN', 'HERE', 'OR', 'ELSEWHERE', 'THE', 'PATHS', 'TO', 'FORTUNE', 'ARE', 'INNUMERABLE', 'AND', 'ALL', 'OPEN', 'THERE', 'IS', 'INVITATION', 'IN', 'THE', 'AIR', 'AND', 'SUCCESS', 'IN', 'ALL', 'HIS', 'WIDE', 'HORIZON'] +4970-29093-0001-1282: hyp=['TO', 'THE', 'YOUNG', 'AMERICAN', 'HERE', 'OR', 'ELSEWHERE', 'THE', 'PATHS', 'TO', 'FORTUNE', 'ARE', 'INNUMERABLE', 'AND', 'ALL', 'OPEN', 'THERE', 'IS', 'INVITATION', 'IN', 'THE', 'AIR', 'AND', 'SUCCESS', 'IN', 'ALL', 'HIS', 'WIDE', 'HORIZON'] +4970-29093-0002-1283: ref=['HE', 'HAS', 'NO', 'TRADITIONS', 'TO', 'BIND', 'HIM', 'OR', 'GUIDE', 'HIM', 'AND', 'HIS', 'IMPULSE', 'IS', 'TO', 'BREAK', 'AWAY', 'FROM', 'THE', 'OCCUPATION', 'HIS', 'FATHER', 'HAS', 'FOLLOWED', 'AND', 'MAKE', 'A', 'NEW', 'WAY', 'FOR', 'HIMSELF'] +4970-29093-0002-1283: hyp=['HE', 'HAS', 'NO', 'TRADITIONS', 'TO', 'BIND', 'HIM', 'OR', 'GUIDE', 'HIM', 'AND', 'HIS', 'IMPULSE', 'IS', 'TO', 'BREAK', 'AWAY', 'FROM', 'THE', 'OCCUPATION', 'HIS', 'FATHER', 'HAS', 'FOLLOWED', 'AND', 'MAKE', 'A', 'NEW', 'WAY', 'FOR', 'HIMSELF'] +4970-29093-0003-1284: ref=['THE', 'MODEST', 'FELLOW', 'WOULD', 'HAVE', 'LIKED', 'FAME', 'THRUST', 'UPON', 'HIM', 'FOR', 'SOME', 'WORTHY', 'ACHIEVEMENT', 'IT', 'MIGHT', 'BE', 'FOR', 'A', 'BOOK', 'OR', 'FOR', 'THE', 'SKILLFUL', 'MANAGEMENT', 'OF', 'SOME', 'GREAT', 'NEWSPAPER', 'OR', 'FOR', 'SOME', 'DARING', 'EXPEDITION', 'LIKE', 'THAT', 'OF', 'LIEUTENANT', 'STRAIN', 'OR', 'DOCTOR', 'KANE'] +4970-29093-0003-1284: hyp=['THE', 'MODEST', 'FELLOW', 'WOULD', 'HAVE', 'LIKED', 'FAME', 'THRUST', 'UPON', 'HIM', 'FOR', 'SOME', 'WORTHY', 'ACHIEVEMENT', 'IT', 'MIGHT', 'BE', 'FOR', 'A', 'BOOK', 'OR', 'FOR', 'THE', 'SKILFUL', 'MANAGEMENT', 'OF', 'SOME', 'GREAT', 'NEWSPAPER', 'OR', 'FOR', 'SOME', 'DARING', 'EXPEDITION', 'LIKE', 'THAT', 'OF', 'LIEUTENANT', 'STRAIN', 'OR', 'DOCTOR', 'KANE'] +4970-29093-0004-1285: ref=['HE', 'WAS', 'UNABLE', 'TO', 'DECIDE', 'EXACTLY', 'WHAT', 'IT', 'SHOULD', 'BE'] +4970-29093-0004-1285: hyp=['HE', 'WAS', 'UNABLE', 'TO', 'DECIDE', 'EXACTLY', 'WHAT', 'IT', 'SHOULD', 'BE'] +4970-29093-0005-1286: ref=['SOMETIMES', 'HE', 'THOUGHT', 'HE', 'WOULD', 'LIKE', 'TO', 'STAND', 'IN', 'A', 'CONSPICUOUS', 'PULPIT', 'AND', 'HUMBLY', 'PREACH', 'THE', 'GOSPEL', 'OF', 'REPENTANCE', 'AND', 'IT', 'EVEN', 'CROSSED', 'HIS', 'MIND', 'THAT', 'IT', 'WOULD', 'BE', 'NOBLE', 'TO', 'GIVE', 'HIMSELF', 'TO', 'A', 'MISSIONARY', 'LIFE', 'TO', 'SOME', 'BENIGHTED', 'REGION', 'WHERE', 'THE', 'DATE', 'PALM', 'GROWS', 'AND', 'THE', "NIGHTINGALE'S", 'VOICE', 'IS', 'IN', 'TUNE', 'AND', 'THE', 'BUL', 'BUL', 'SINGS', 'ON', 'THE', 'OFF', 'NIGHTS'] +4970-29093-0005-1286: hyp=['SOMETIMES', 'HE', 'THOUGHT', 'HE', 'WOULD', 'LIKE', 'TO', 'STAND', 'IN', 'A', 'CONSPICUOUS', 'PULPIT', 'AND', 'HUMBLY', 'PREACH', 'THE', 'GOSPEL', 'OF', 'REPENTANCE', 'AND', 'IT', 'EVEN', 'CROSSED', 'HIS', 'MIND', 'THAT', 'IT', 'WOULD', 'BE', 'NOBLE', 'TO', 'GIVE', 'HIMSELF', 'TO', 'A', 'MISSIONARY', 'LIFE', 'TO', 'SOME', 'BENIGHTED', 'REGION', 'WHERE', 'THE', 'DATE', 'PALM', 'GROWS', 'AND', 'THE', "NIGHTINGALE'S", 'VOICE', 'IS', 'IN', 'TUNE', 'AND', 'THE', 'BULBUL', 'SINGS', 'ON', 'THE', 'OFF', 'NIGHTS'] +4970-29093-0006-1287: ref=['LAW', 'SEEMED', 'TO', 'HIM', 'WELL', 'ENOUGH', 'AS', 'A', 'SCIENCE', 'BUT', 'HE', 'NEVER', 'COULD', 'DISCOVER', 'A', 'PRACTICAL', 'CASE', 'WHERE', 'IT', 'APPEARED', 'TO', 'HIM', 'WORTH', 'WHILE', 'TO', 'GO', 'TO', 'LAW', 'AND', 'ALL', 'THE', 'CLIENTS', 'WHO', 'STOPPED', 'WITH', 'THIS', 'NEW', 'CLERK', 'IN', 'THE', 'ANTE', 'ROOM', 'OF', 'THE', 'LAW', 'OFFICE', 'WHERE', 'HE', 'WAS', 'WRITING', 'PHILIP', 'INVARIABLY', 'ADVISED', 'TO', 'SETTLE', 'NO', 'MATTER', 'HOW', 'BUT', 'SETTLE', 'GREATLY', 'TO', 'THE', 'DISGUST', 'OF', 'HIS', 'EMPLOYER', 'WHO', 'KNEW', 'THAT', 'JUSTICE', 'BETWEEN', 'MAN', 'AND', 'MAN', 'COULD', 'ONLY', 'BE', 'ATTAINED', 'BY', 'THE', 'RECOGNIZED', 'PROCESSES', 'WITH', 'THE', 'ATTENDANT', 'FEES'] +4970-29093-0006-1287: hyp=['LAW', 'SEEMED', 'TO', 'HIM', 'WELL', 'ENOUGH', 'AS', 'A', 'SCIENCE', 'BUT', 'HE', 'NEVER', 'COULD', 'DISCOVER', 'A', 'PRACTICAL', 'CASE', 'WHERE', 'IT', 'APPEARED', 'TO', 'HIM', 'WORTH', 'WHILE', 'TO', 'GO', 'TO', 'LAW', 'AND', 'ALL', 'THE', 'CLIENTS', 'WHO', 'STOPPED', 'WITH', 'THIS', 'NEW', 'CLERK', 'IN', 'THE', 'ANTE', 'ROOM', 'OF', 'THE', 'LAW', 'OFFICE', 'WHERE', 'HE', 'WAS', 'WRITING', 'PHILIP', 'INVARIABLY', 'ADVISED', 'TO', 'SETTLE', 'NO', 'MATTER', 'HOW', 'BUT', 'SETTLE', 'GREATLY', 'TO', 'THE', 'DISGUST', 'OF', 'HIS', 'EMPLOYER', 'WHO', 'KNEW', 'THAT', 'JUSTICE', 'BETWEEN', 'MAN', 'AND', 'MAN', 'COULD', 'ONLY', 'BE', 'ATTAINED', 'BY', 'THE', 'RECOGNIZED', 'PROCESSES', 'WITH', 'THE', 'ATTENDANT', 'FEES'] +4970-29093-0007-1288: ref=['IT', 'IS', 'SUCH', 'A', 'NOBLE', 'AMBITION', 'THAT', 'IT', 'IS', 'A', 'PITY', 'IT', 'HAS', 'USUALLY', 'SUCH', 'A', 'SHALLOW', 'FOUNDATION'] +4970-29093-0007-1288: hyp=['IT', 'IS', 'SUCH', 'A', 'NOBLE', 'AMBITION', 'THAT', 'IT', 'IS', 'A', 'PITY', 'IT', 'HAS', 'USUALLY', 'SUCH', 'A', 'SHALLOW', 'FOUNDATION'] +4970-29093-0008-1289: ref=['HE', 'WANTED', 'TO', 'BEGIN', 'AT', 'THE', 'TOP', 'OF', 'THE', 'LADDER'] +4970-29093-0008-1289: hyp=['HE', 'WANTED', 'TO', 'BEGIN', 'AT', 'THE', 'TOP', 'OF', 'THE', 'LADDER'] +4970-29093-0009-1290: ref=['PHILIP', 'THEREFORE', 'READ', 'DILIGENTLY', 'IN', 'THE', 'ASTOR', 'LIBRARY', 'PLANNED', 'LITERARY', 'WORKS', 'THAT', 'SHOULD', 'COMPEL', 'ATTENTION', 'AND', 'NURSED', 'HIS', 'GENIUS'] +4970-29093-0009-1290: hyp=['PHILIP', 'THEREFORE', 'READ', 'DILIGENTLY', 'IN', 'THE', 'ASTOR', 'LIBRARY', 'PLANNED', 'LITERARY', 'WORKS', 'THAT', 'SHOULD', 'COMPEL', 'ATTENTION', 'AND', 'NURSED', 'HIS', 'GENIUS'] +4970-29093-0010-1291: ref=['HE', 'HAD', 'NO', 'FRIEND', 'WISE', 'ENOUGH', 'TO', 'TELL', 'HIM', 'TO', 'STEP', 'INTO', 'THE', 'DORKING', 'CONVENTION', 'THEN', 'IN', 'SESSION', 'MAKE', 'A', 'SKETCH', 'OF', 'THE', 'MEN', 'AND', 'WOMEN', 'ON', 'THE', 'PLATFORM', 'AND', 'TAKE', 'IT', 'TO', 'THE', 'EDITOR', 'OF', 'THE', 'DAILY', 'GRAPEVINE', 'AND', 'SEE', 'WHAT', 'HE', 'COULD', 'GET', 'A', 'LINE', 'FOR', 'IT'] +4970-29093-0010-1291: hyp=['HE', 'HAD', 'NO', 'FRIEND', 'WISE', 'ENOUGH', 'TO', 'TELL', 'HIM', 'TO', 'STEP', 'INTO', 'THE', 'DORKING', 'CONVENTION', 'THEN', 'IN', 'SESSION', 'MAKE', 'A', 'SKETCH', 'OF', 'THE', 'MEN', 'AND', 'WOMEN', 'ON', 'THE', 'PLATFORM', 'AND', 'TAKE', 'IT', 'TO', 'THE', 'EDITOR', 'OF', 'THE', 'DAILY', 'GRAPE', 'VINE', 'AND', 'SEE', 'WHAT', 'HE', 'COULD', 'GET', 'A', 'LINE', 'FOR', 'IT'] +4970-29093-0011-1292: ref=['O', 'VERY', 'WELL', 'SAID', 'GRINGO', 'TURNING', 'AWAY', 'WITH', 'A', 'SHADE', 'OF', 'CONTEMPT', "YOU'LL", 'FIND', 'IF', 'YOU', 'ARE', 'GOING', 'INTO', 'LITERATURE', 'AND', 'NEWSPAPER', 'WORK', 'THAT', 'YOU', "CAN'T", 'AFFORD', 'A', 'CONSCIENCE', 'LIKE', 'THAT'] +4970-29093-0011-1292: hyp=['OH', 'VERY', 'WELL', 'SAID', 'GRINGE', 'TURNING', 'AWAY', 'WITH', 'A', 'SHADE', 'OF', 'CONTEMPT', "YOU'LL", 'FIND', 'IF', 'YOU', 'ARE', 'GOING', 'INTO', 'LITERATURE', 'AND', 'NEWSPAPER', 'WORK', 'THAT', 'YOU', "CAN'T", 'AFFORD', 'A', 'CONSCIENCE', 'LIKE', 'THAT'] +4970-29093-0012-1293: ref=['BUT', 'PHILIP', 'DID', 'AFFORD', 'IT', 'AND', 'HE', 'WROTE', 'THANKING', 'HIS', 'FRIENDS', 'AND', 'DECLINING', 'BECAUSE', 'HE', 'SAID', 'THE', 'POLITICAL', 'SCHEME', 'WOULD', 'FAIL', 'AND', 'OUGHT', 'TO', 'FAIL'] +4970-29093-0012-1293: hyp=['BUT', 'PHILIP', 'DID', 'AFFORD', 'IT', 'AND', 'HE', 'WROTE', 'THANKING', 'HIS', 'FRIENDS', 'AND', 'DECLINING', 'BECAUSE', 'HE', 'SAID', 'THE', 'POLITICAL', 'SCHEME', 'WOULD', 'FAIL', 'AND', 'OUGHT', 'TO', 'FAIL'] +4970-29093-0013-1294: ref=['AND', 'HE', 'WENT', 'BACK', 'TO', 'HIS', 'BOOKS', 'AND', 'TO', 'HIS', 'WAITING', 'FOR', 'AN', 'OPENING', 'LARGE', 'ENOUGH', 'FOR', 'HIS', 'DIGNIFIED', 'ENTRANCE', 'INTO', 'THE', 'LITERARY', 'WORLD'] +4970-29093-0013-1294: hyp=['AND', 'HE', 'WENT', 'BACK', 'TO', 'HIS', 'BOOKS', 'AND', 'TO', 'HIS', 'WAITING', 'FOR', 'AN', 'OPENING', 'LARGE', 'ENOUGH', 'FOR', 'HIS', 'DIGNIFIED', 'ENTRANCE', 'INTO', 'THE', 'LITERARY', 'WORLD'] +4970-29093-0014-1295: ref=['WELL', "I'M", 'GOING', 'AS', 'AN', 'ENGINEER', 'YOU', 'CAN', 'GO', 'AS', 'ONE'] +4970-29093-0014-1295: hyp=['WELL', "I'M", 'GOING', 'AS', 'AN', 'ENGINEER', 'YOU', 'COULD', 'GO', 'AS', 'ONE'] +4970-29093-0015-1296: ref=['YOU', 'CAN', 'BEGIN', 'BY', 'CARRYING', 'A', 'ROD', 'AND', 'PUTTING', 'DOWN', 'THE', 'FIGURES'] +4970-29093-0015-1296: hyp=['YOU', 'CAN', 'BEGIN', 'BY', 'CARRYING', 'A', 'ROD', 'AND', 'PUTTING', 'DOWN', 'THE', 'FIGURES'] +4970-29093-0016-1297: ref=['NO', 'ITS', 'NOT', 'TOO', 'SOON'] +4970-29093-0016-1297: hyp=['NO', "IT'S", 'NOT', 'TOO', 'SOON'] +4970-29093-0017-1298: ref=["I'VE", 'BEEN', 'READY', 'TO', 'GO', 'ANYWHERE', 'FOR', 'SIX', 'MONTHS'] +4970-29093-0017-1298: hyp=["I'VE", 'BEEN', 'READY', 'TO', 'GO', 'ANYWHERE', 'FOR', 'SIX', 'MONTHS'] +4970-29093-0018-1299: ref=['THE', 'TWO', 'YOUNG', 'MEN', 'WHO', 'WERE', 'BY', 'THIS', 'TIME', 'FULL', 'OF', 'THE', 'ADVENTURE', 'WENT', 'DOWN', 'TO', 'THE', 'WALL', 'STREET', 'OFFICE', 'OF', "HENRY'S", 'UNCLE', 'AND', 'HAD', 'A', 'TALK', 'WITH', 'THAT', 'WILY', 'OPERATOR'] +4970-29093-0018-1299: hyp=['THE', 'TWO', 'YOUNG', 'MEN', 'WHO', 'WERE', 'BY', 'THIS', 'TIME', 'FULL', 'OF', 'THE', 'ADVENTURE', 'WENT', 'DOWN', 'TO', 'THE', 'WALL', 'STREET', 'OFFICE', 'OF', "HENRY'S", 'UNCLE', 'AND', 'HAD', 'A', 'TALK', 'WITH', 'THAT', 'WILY', 'OPERATOR'] +4970-29093-0019-1300: ref=['THE', 'NIGHT', 'WAS', 'SPENT', 'IN', 'PACKING', 'UP', 'AND', 'WRITING', 'LETTERS', 'FOR', 'PHILIP', 'WOULD', 'NOT', 'TAKE', 'SUCH', 'AN', 'IMPORTANT', 'STEP', 'WITHOUT', 'INFORMING', 'HIS', 'FRIENDS'] +4970-29093-0019-1300: hyp=['THE', 'NIGHT', 'WAS', 'SPENT', 'IN', 'PACKING', 'UP', 'AND', 'WRITING', 'LETTERS', 'FOR', 'PHILIP', 'WOULD', 'NOT', 'TAKE', 'SUCH', 'AN', 'IMPORTANT', 'STEP', 'WITHOUT', 'INFORMING', 'HIS', 'FRIENDS'] +4970-29093-0020-1301: ref=['WHY', "IT'S", 'IN', 'MISSOURI', 'SOMEWHERE', 'ON', 'THE', 'FRONTIER', 'I', 'THINK', "WE'LL", 'GET', 'A', 'MAP'] +4970-29093-0020-1301: hyp=['WHY', "IT'S", 'IN', 'MISSOURI', 'SOMEWHERE', 'ON', 'THE', 'FRONTIER', 'I', 'THINK', "WE'LL", 'GET', 'A', 'MAP'] +4970-29093-0021-1302: ref=['I', 'WAS', 'AFRAID', 'IT', 'WAS', 'NEARER', 'HOME'] +4970-29093-0021-1302: hyp=['I', 'WAS', 'AFRAID', 'IT', 'WAS', 'NEARER', 'HOME'] +4970-29093-0022-1303: ref=['HE', 'KNEW', 'HIS', 'UNCLE', 'WOULD', 'BE', 'GLAD', 'TO', 'HEAR', 'THAT', 'HE', 'HAD', 'AT', 'LAST', 'TURNED', 'HIS', 'THOUGHTS', 'TO', 'A', 'PRACTICAL', 'MATTER'] +4970-29093-0022-1303: hyp=['HE', 'KNEW', 'HIS', 'UNCLE', 'WOULD', 'BE', 'GLAD', 'TO', 'HEAR', 'THAT', 'HE', 'HAD', 'AT', 'LAST', 'TURNED', 'HIS', 'THOUGHTS', 'TO', 'A', 'PRACTICAL', 'MATTER'] +4970-29093-0023-1304: ref=['HE', 'WELL', 'KNEW', 'THE', 'PERILS', 'OF', 'THE', 'FRONTIER', 'THE', 'SAVAGE', 'STATE', 'OF', 'SOCIETY', 'THE', 'LURKING', 'INDIANS', 'AND', 'THE', 'DANGERS', 'OF', 'FEVER'] +4970-29093-0023-1304: hyp=['HE', 'WELL', 'KNEW', 'THE', 'PERILS', 'OF', 'THE', 'FRONTIER', 'THE', 'SAVAGE', 'STATE', 'OF', 'SOCIETY', 'THE', 'LURKING', 'INDIANS', 'AND', 'THE', 'DANGERS', 'OF', 'FEVER'] +4970-29095-0000-1305: ref=['SHE', 'WAS', 'TIRED', 'OF', 'OTHER', 'THINGS'] +4970-29095-0000-1305: hyp=['SHE', 'WAS', 'TIRED', 'OF', 'OTHER', 'THINGS'] +4970-29095-0001-1306: ref=['SHE', 'TRIED', 'THIS', 'MORNING', 'AN', 'AIR', 'OR', 'TWO', 'UPON', 'THE', 'PIANO', 'SANG', 'A', 'SIMPLE', 'SONG', 'IN', 'A', 'SWEET', 'BUT', 'SLIGHTLY', 'METALLIC', 'VOICE', 'AND', 'THEN', 'SEATING', 'HERSELF', 'BY', 'THE', 'OPEN', 'WINDOW', 'READ', "PHILIP'S", 'LETTER'] +4970-29095-0001-1306: hyp=['SHE', 'TRIED', 'THIS', 'MORNING', 'AN', 'AIR', 'OR', 'TWO', 'UPON', 'THE', 'PIANO', 'SANG', 'A', 'SIMPLE', 'SONG', 'IN', 'A', 'SWEET', 'BUT', 'SLIGHTLY', 'METALLIC', 'VOICE', 'AND', 'THEN', 'SEATING', 'HERSELF', 'BY', 'THE', 'OPEN', 'WINDOW', 'READ', "PHILIP'S", 'LETTER'] +4970-29095-0002-1307: ref=['WELL', 'MOTHER', 'SAID', 'THE', 'YOUNG', 'STUDENT', 'LOOKING', 'UP', 'WITH', 'A', 'SHADE', 'OF', 'IMPATIENCE'] +4970-29095-0002-1307: hyp=['WELL', 'MOTHER', 'SAID', 'THE', 'YOUNG', 'STUDENT', 'LOOKING', 'UP', 'WITH', 'A', 'SHADE', 'OF', 'IMPATIENCE'] +4970-29095-0003-1308: ref=['I', 'HOPE', 'THEE', 'TOLD', 'THE', 'ELDERS', 'THAT', 'FATHER', 'AND', 'I', 'ARE', 'RESPONSIBLE', 'FOR', 'THE', 'PIANO', 'AND', 'THAT', 'MUCH', 'AS', 'THEE', 'LOVES', 'MUSIC', 'THEE', 'IS', 'NEVER', 'IN', 'THE', 'ROOM', 'WHEN', 'IT', 'IS', 'PLAYED'] +4970-29095-0003-1308: hyp=['I', 'HOPE', 'THEE', 'TOLD', 'THE', 'ELDERS', 'THAT', 'FATHER', 'AND', 'I', 'ARE', 'RESPONSIBLE', 'FOR', 'THE', 'PIANO', 'AND', 'THAT', 'MUCH', 'AS', 'THEE', 'LOVES', 'MUSIC', 'THEE', 'IS', 'NEVER', 'IN', 'THE', 'ROOM', 'WHEN', 'IT', 'IS', 'PLAYED'] +4970-29095-0004-1309: ref=['I', 'HEARD', 'FATHER', 'TELL', 'COUSIN', 'ABNER', 'THAT', 'HE', 'WAS', 'WHIPPED', 'SO', 'OFTEN', 'FOR', 'WHISTLING', 'WHEN', 'HE', 'WAS', 'A', 'BOY', 'THAT', 'HE', 'WAS', 'DETERMINED', 'TO', 'HAVE', 'WHAT', 'COMPENSATION', 'HE', 'COULD', 'GET', 'NOW'] +4970-29095-0004-1309: hyp=['I', 'HEARD', 'FATHER', 'TELL', 'COUSIN', 'ABNER', 'THAT', 'HE', 'WAS', 'WHIPPED', 'SO', 'OFTEN', 'FOR', 'WHISTLING', 'WHEN', 'HE', 'WAS', 'A', 'BOY', 'THAT', 'HE', 'WAS', 'DETERMINED', 'TO', 'HAVE', 'WHAT', 'COMPENSATION', 'HE', 'COULD', 'GET', 'NOW'] +4970-29095-0005-1310: ref=['THY', 'WAYS', 'GREATLY', 'TRY', 'ME', 'RUTH', 'AND', 'ALL', 'THY', 'RELATIONS'] +4970-29095-0005-1310: hyp=['THY', 'WAYS', 'GREATLY', 'TRY', 'ME', 'RUTH', 'AND', 'ALL', 'THY', 'RELATIONS'] +4970-29095-0006-1311: ref=['IS', 'THY', 'FATHER', 'WILLING', 'THEE', 'SHOULD', 'GO', 'AWAY', 'TO', 'A', 'SCHOOL', 'OF', 'THE', "WORLD'S", 'PEOPLE'] +4970-29095-0006-1311: hyp=['IS', 'THY', 'FATHER', 'WILLING', 'THEE', 'SHOULD', 'GO', 'AWAY', 'TO', 'A', 'SCHOOL', 'OF', 'THE', "WORLD'S", 'PEOPLE'] +4970-29095-0007-1312: ref=['I', 'HAVE', 'NOT', 'ASKED', 'HIM', 'RUTH', 'REPLIED', 'WITH', 'A', 'LOOK', 'THAT', 'MIGHT', 'IMPLY', 'THAT', 'SHE', 'WAS', 'ONE', 'OF', 'THOSE', 'DETERMINED', 'LITTLE', 'BODIES', 'WHO', 'FIRST', 'MADE', 'UP', 'HER', 'OWN', 'MIND', 'AND', 'THEN', 'COMPELLED', 'OTHERS', 'TO', 'MAKE', 'UP', 'THEIRS', 'IN', 'ACCORDANCE', 'WITH', 'HERS'] +4970-29095-0007-1312: hyp=['I', 'HAVE', 'NOT', 'ASKED', 'HIM', 'RUTH', 'REPLIED', 'WITH', 'A', 'LOOK', 'THAT', 'MIGHT', 'IMPLY', 'THAT', 'SHE', 'WAS', 'ONE', 'OF', 'THOSE', 'DETERMINED', 'LITTLE', 'BODIES', 'WHO', 'FIRST', 'MADE', 'UP', 'HER', 'OWN', 'MIND', 'AND', 'THEN', 'COMPELLED', 'OTHERS', 'TO', 'MAKE', 'UP', 'THEIRS', 'IN', 'ACCORDANCE', 'WITH', 'HERS'] +4970-29095-0008-1313: ref=['MOTHER', "I'M", 'GOING', 'TO', 'STUDY', 'MEDICINE'] +4970-29095-0008-1313: hyp=['MOTHER', 'I', 'AM', 'GOING', 'TO', 'STUDY', 'MEDICINE'] +4970-29095-0009-1314: ref=['MARGARET', 'BOLTON', 'ALMOST', 'LOST', 'FOR', 'A', 'MOMENT', 'HER', 'HABITUAL', 'PLACIDITY'] +4970-29095-0009-1314: hyp=['MARGARET', 'BOLTON', 'ALMOST', 'LOST', 'FOR', 'A', 'MOMENT', 'HER', 'HABITUAL', 'PLACIDITY'] +4970-29095-0010-1315: ref=['THEE', 'STUDY', 'MEDICINE'] +4970-29095-0010-1315: hyp=['THE', 'STUDY', 'MEDICINE'] +4970-29095-0011-1316: ref=['DOES', 'THEE', 'THINK', 'THEE', 'COULD', 'STAND', 'IT', 'SIX', 'MONTHS'] +4970-29095-0011-1316: hyp=['DOES', 'THEE', 'THINK', 'THEE', 'COULD', 'STAND', 'IT', 'SIX', 'MONTHS'] +4970-29095-0012-1317: ref=['AND', 'BESIDES', 'SUPPOSE', 'THEE', 'DOES', 'LEARN', 'MEDICINE'] +4970-29095-0012-1317: hyp=['AND', 'BESIDES', 'SUPPOSE', 'THEY', 'DOES', 'LEARN', 'MEDICINE'] +4970-29095-0013-1318: ref=['I', 'WILL', 'PRACTICE', 'IT'] +4970-29095-0013-1318: hyp=['I', 'WILL', 'PRACTICE', 'IT'] +4970-29095-0014-1319: ref=['WHERE', 'THEE', 'AND', 'THY', 'FAMILY', 'ARE', 'KNOWN'] +4970-29095-0014-1319: hyp=['WHERE', 'THEE', 'AND', 'THY', 'FAMILY', 'ARE', 'KNOWN'] +4970-29095-0015-1320: ref=['IF', 'I', 'CAN', 'GET', 'PATIENTS'] +4970-29095-0015-1320: hyp=['IF', 'I', 'CAN', 'GET', 'PATIENCE'] +4970-29095-0016-1321: ref=['RUTH', 'SAT', 'QUITE', 'STILL', 'FOR', 'A', 'TIME', 'WITH', 'FACE', 'INTENT', 'AND', 'FLUSHED', 'IT', 'WAS', 'OUT', 'NOW'] +4970-29095-0016-1321: hyp=['RUTH', 'SAT', 'QUITE', 'STILL', 'FOR', 'A', 'TIME', 'WITH', 'FACE', 'INTENT', 'AND', 'FLUSHED', 'IT', 'WAS', 'OUT', 'NOW'] +4970-29095-0017-1322: ref=['THE', 'SIGHT', 'SEERS', 'RETURNED', 'IN', 'HIGH', 'SPIRITS', 'FROM', 'THE', 'CITY'] +4970-29095-0017-1322: hyp=['THE', 'SIGHTSEERS', 'RETURNED', 'IN', 'HIGH', 'SPIRITS', 'FROM', 'THE', 'CITY'] +4970-29095-0018-1323: ref=['RUTH', 'ASKED', 'THE', 'ENTHUSIASTS', 'IF', 'THEY', 'WOULD', 'LIKE', 'TO', 'LIVE', 'IN', 'SUCH', 'A', 'SOUNDING', 'MAUSOLEUM', 'WITH', 'ITS', 'GREAT', 'HALLS', 'AND', 'ECHOING', 'ROOMS', 'AND', 'NO', 'COMFORTABLE', 'PLACE', 'IN', 'IT', 'FOR', 'THE', 'ACCOMMODATION', 'OF', 'ANY', 'BODY'] +4970-29095-0018-1323: hyp=['RUTH', 'ASKED', 'THE', 'ENTHUSIASTS', 'IF', 'THEY', 'WOULD', 'LIKE', 'TO', 'LIVE', 'IN', 'SUCH', 'A', 'SOUNDING', 'MAUSOLEUM', 'WITH', 'ITS', 'GREAT', 'HALLS', 'AND', 'ECHOING', 'ROOMS', 'AND', 'NO', 'COMFORTABLE', 'PLACE', 'IN', 'IT', 'FOR', 'THE', 'ACCOMMODATION', 'OF', 'ANYBODY'] +4970-29095-0019-1324: ref=['AND', 'THEN', 'THERE', 'WAS', 'BROAD', 'STREET'] +4970-29095-0019-1324: hyp=['AND', 'THEN', 'THERE', 'WAS', 'BROAD', 'STREET'] +4970-29095-0020-1325: ref=['THERE', 'CERTAINLY', 'WAS', 'NO', 'END', 'TO', 'IT', 'AND', 'EVEN', 'RUTH', 'WAS', 'PHILADELPHIAN', 'ENOUGH', 'TO', 'BELIEVE', 'THAT', 'A', 'STREET', 'OUGHT', 'NOT', 'TO', 'HAVE', 'ANY', 'END', 'OR', 'ARCHITECTURAL', 'POINT', 'UPON', 'WHICH', 'THE', 'WEARY', 'EYE', 'COULD', 'REST'] +4970-29095-0020-1325: hyp=['THERE', 'CERTAINLY', 'WAS', 'NO', 'END', 'TO', 'IT', 'AND', 'EVEN', 'RUTH', 'WAS', 'PHILADELPHIIAN', 'ENOUGH', 'TO', 'BELIEVE', 'THAT', 'A', 'STREET', 'OUGHT', 'NOT', 'TO', 'HAVE', 'ANY', 'END', 'OR', 'ARCHITECTURAL', 'POINT', 'UPON', 'WHICH', 'THE', 'WEARY', 'EYE', 'COULD', 'REST'] +4970-29095-0021-1326: ref=['BUT', 'NEITHER', 'SAINT', 'GIRARD', 'NOR', 'BROAD', 'STREET', 'NEITHER', 'WONDERS', 'OF', 'THE', 'MINT', 'NOR', 'THE', 'GLORIES', 'OF', 'THE', 'HALL', 'WHERE', 'THE', 'GHOSTS', 'OF', 'OUR', 'FATHERS', 'SIT', 'ALWAYS', 'SIGNING', 'THE', 'DECLARATION', 'IMPRESSED', 'THE', 'VISITORS', 'SO', 'MUCH', 'AS', 'THE', 'SPLENDORS', 'OF', 'THE', 'CHESTNUT', 'STREET', 'WINDOWS', 'AND', 'THE', 'BARGAINS', 'ON', 'EIGHTH', 'STREET'] +4970-29095-0021-1326: hyp=['BUT', 'NEITHER', 'SAINT', 'GERARD', 'NOR', 'BROAD', 'STREET', 'NEITHER', 'WONDERS', 'OF', 'THE', 'MINT', 'NOR', 'THE', 'GLORIES', 'OF', 'THE', 'HALL', 'WHERE', 'THE', 'GHOSTS', 'OF', 'OUR', 'FATHERS', 'SIT', 'ALWAYS', 'SIGNING', 'THE', 'DECLARATION', 'IMPRESSED', 'THE', 'VISITORS', 'SO', 'MUCH', 'AS', 'THE', 'SPLENDOURS', 'OF', 'THE', 'CHESTNUT', 'STREET', 'WINDOWS', 'AND', 'THE', 'BARGAINS', 'ON', 'EIGHTH', 'STREET'] +4970-29095-0022-1327: ref=['IS', 'THEE', 'GOING', 'TO', 'THE', 'YEARLY', 'MEETING', 'RUTH', 'ASKED', 'ONE', 'OF', 'THE', 'GIRLS'] +4970-29095-0022-1327: hyp=['IS', 'THEE', 'GOING', 'TO', 'THE', 'YEARLY', 'MEETING', 'RUTH', 'ASKED', 'ONE', 'OF', 'THE', 'GIRLS'] +4970-29095-0023-1328: ref=['I', 'HAVE', 'NOTHING', 'TO', 'WEAR', 'REPLIED', 'THAT', 'DEMURE', 'PERSON'] +4970-29095-0023-1328: hyp=['I', 'HAVE', 'NOTHING', 'TO', 'WEAR', 'REPLIED', 'THAT', 'DEMURE', 'PERSON'] +4970-29095-0024-1329: ref=['IT', 'HAS', 'OCCUPIED', 'MOTHER', 'A', 'LONG', 'TIME', 'TO', 'FIND', 'AT', 'THE', 'SHOPS', 'THE', 'EXACT', 'SHADE', 'FOR', 'HER', 'NEW', 'BONNET'] +4970-29095-0024-1329: hyp=['IT', 'HAS', 'OCCUPIED', 'MOTHER', 'A', 'LONG', 'TIME', 'TO', 'FIND', 'AT', 'THE', 'SHOPS', 'THE', 'EXACT', 'SHADE', 'FOR', 'HER', 'NEW', 'BONNET'] +4970-29095-0025-1330: ref=['AND', 'THEE', "WON'T", 'GO', 'WHY', 'SHOULD', 'I'] +4970-29095-0025-1330: hyp=['AND', 'THEE', "WON'T", 'GO', 'WHY', 'SHOULD', 'I'] +4970-29095-0026-1331: ref=['IF', 'I', 'GO', 'TO', 'MEETING', 'AT', 'ALL', 'I', 'LIKE', 'BEST', 'TO', 'SIT', 'IN', 'THE', 'QUIET', 'OLD', 'HOUSE', 'IN', 'GERMANTOWN', 'WHERE', 'THE', 'WINDOWS', 'ARE', 'ALL', 'OPEN', 'AND', 'I', 'CAN', 'SEE', 'THE', 'TREES', 'AND', 'HEAR', 'THE', 'STIR', 'OF', 'THE', 'LEAVES'] +4970-29095-0026-1331: hyp=['IF', 'I', 'GO', 'TO', 'MEETING', 'AT', 'ALL', 'I', 'LIKE', 'BEST', 'TO', 'SIT', 'IN', 'THE', 'QUIET', 'OLD', 'HOUSE', 'IN', 'GERMANTOWN', 'WHERE', 'THE', 'WINDOWS', 'ARE', 'ALL', 'OPEN', 'AND', 'I', 'CAN', 'SEE', 'THE', 'TREES', 'AND', 'HEAR', 'THE', 'STIR', 'OF', 'THE', 'LEAVES'] +4970-29095-0027-1332: ref=["IT'S", 'SUCH', 'A', 'CRUSH', 'AT', 'THE', 'YEARLY', 'MEETING', 'AT', 'ARCH', 'STREET', 'AND', 'THEN', "THERE'S", 'THE', 'ROW', 'OF', 'SLEEK', 'LOOKING', 'YOUNG', 'MEN', 'WHO', 'LINE', 'THE', 'CURBSTONE', 'AND', 'STARE', 'AT', 'US', 'AS', 'WE', 'COME', 'OUT'] +4970-29095-0027-1332: hyp=["IT'S", 'SUCH', 'A', 'CRUSH', 'AT', 'THE', 'YEARLY', 'MEETING', 'AT', 'ARCH', 'STREET', 'AND', 'THEN', "THERE'S", 'THE', 'ROW', 'OF', 'SLEEK', 'LOOKING', 'YOUNG', 'MEN', 'WHO', 'LIE', 'IN', 'THE', 'CURBSTONE', 'AND', 'STARE', 'AT', 'US', 'AS', 'WE', 'COME', 'OUT'] +4970-29095-0028-1333: ref=['HE', "DOESN'T", 'SAY', 'BUT', "IT'S", 'ON', 'THE', 'FRONTIER', 'AND', 'ON', 'THE', 'MAP', 'EVERYTHING', 'BEYOND', 'IT', 'IS', 'MARKED', 'INDIANS', 'AND', 'DESERT', 'AND', 'LOOKS', 'AS', 'DESOLATE', 'AS', 'A', 'WEDNESDAY', 'MEETING', 'HUMPH', 'IT', 'WAS', 'TIME', 'FOR', 'HIM', 'TO', 'DO', 'SOMETHING'] +4970-29095-0028-1333: hyp=['HE', "DOESN'T", 'SAY', 'BUT', "IT'S", 'ON', 'THE', 'FRONTIER', 'AND', 'ON', 'THE', 'MAP', 'EVERYTHING', 'BEYOND', 'IT', 'IS', 'MARKED', 'INDIANS', 'AND', 'DESERT', 'AND', 'LOOKS', 'AS', 'DESOLATE', 'AS', 'A', 'WEDNESDAY', 'MEETING', 'HUMPH', 'IT', 'WAS', 'TIME', 'FOR', 'HIM', 'TO', 'DO', 'SOMETHING'] +4970-29095-0029-1334: ref=['IS', 'HE', 'GOING', 'TO', 'START', 'A', 'DAILY', 'NEWSPAPER', 'AMONG', 'THE', 'KICK', 'A', 'POOS'] +4970-29095-0029-1334: hyp=['IS', 'HE', 'GOING', 'TO', 'START', 'A', 'DAILY', 'NEWSPAPER', 'AMONG', 'THE', 'KICKAPOOS'] +4970-29095-0030-1335: ref=['FATHER', "THEE'S", 'UNJUST', 'TO', 'PHILIP', "HE'S", 'GOING', 'INTO', 'BUSINESS'] +4970-29095-0030-1335: hyp=['FATHER', "THEE'S", 'UNJUST', 'TO', 'PHILIP', "HE'S", 'GOING', 'INTO', 'BUSINESS'] +4970-29095-0031-1336: ref=['HE', "DOESN'T", 'SAY', 'EXACTLY', 'WHAT', 'IT', 'IS', 'SAID', 'RUTH', 'A', 'LITTLE', 'DUBIOUSLY', 'BUT', "IT'S", 'SOMETHING', 'ABOUT', 'LAND', 'AND', 'RAILROADS', 'AND', 'THEE', 'KNOWS', 'FATHER', 'THAT', 'FORTUNES', 'ARE', 'MADE', 'NOBODY', 'KNOWS', 'EXACTLY', 'HOW', 'IN', 'A', 'NEW', 'COUNTRY'] +4970-29095-0031-1336: hyp=['HE', "DOESN'T", 'SAY', 'EXACTLY', 'WHAT', 'IT', 'IS', 'SAID', 'RUTH', 'A', 'LITTLE', 'DUBIOUSLY', 'BUT', "IT'S", 'SOMETHING', 'ABOUT', 'LAND', 'AND', 'RAILROADS', 'AND', 'THEE', 'KNOWS', 'FATHER', 'THAT', 'FORTUNES', 'ARE', 'MADE', 'NOBODY', 'KNOWS', 'EXACTLY', 'HOW', 'IN', 'A', 'NEW', 'COUNTRY'] +4970-29095-0032-1337: ref=['BUT', 'PHILIP', 'IS', 'HONEST', 'AND', 'HE', 'HAS', 'TALENT', 'ENOUGH', 'IF', 'HE', 'WILL', 'STOP', 'SCRIBBLING', 'TO', 'MAKE', 'HIS', 'WAY'] +4970-29095-0032-1337: hyp=['BUT', 'PHILIP', 'IS', 'HONEST', 'AND', 'HE', 'HAS', 'TALENT', 'ENOUGH', 'IF', 'HE', 'WILL', 'STOP', 'SCRIBBLING', 'TO', 'MAKE', 'HIS', 'WAY'] +4970-29095-0033-1338: ref=['WHAT', 'A', 'BOX', 'WOMEN', 'ARE', 'PUT', 'INTO', 'MEASURED', 'FOR', 'IT', 'AND', 'PUT', 'IN', 'YOUNG', 'IF', 'WE', 'GO', 'ANYWHERE', "IT'S", 'IN', 'A', 'BOX', 'VEILED', 'AND', 'PINIONED', 'AND', 'SHUT', 'IN', 'BY', 'DISABILITIES'] +4970-29095-0033-1338: hyp=['WHAT', 'A', 'BOX', 'WOMEN', 'ARE', 'PUT', 'INTO', 'MEASURED', 'FOR', 'IT', 'AND', 'PUT', 'IN', 'YOUNG', 'IF', 'WE', 'GO', 'ANYWHERE', "IT'S", 'IN', 'A', 'BOX', 'BALED', 'AND', 'PINIONED', 'AND', 'SHUT', 'IN', 'BY', 'DISABILITIES'] +4970-29095-0034-1339: ref=['WHY', 'SHOULD', 'I', 'RUST', 'AND', 'BE', 'STUPID', 'AND', 'SIT', 'IN', 'INACTION', 'BECAUSE', 'I', 'AM', 'A', 'GIRL'] +4970-29095-0034-1339: hyp=['WHY', 'SHOULD', 'I', 'RUST', 'AND', 'BE', 'STUPID', 'AND', 'SIT', 'IN', 'INACTION', 'BECAUSE', 'I', 'AM', 'A', 'GIRL'] +4970-29095-0035-1340: ref=['AND', 'IF', 'I', 'HAD', 'A', 'FORTUNE', 'WOULD', 'THEE', 'WANT', 'ME', 'TO', 'LEAD', 'A', 'USELESS', 'LIFE'] +4970-29095-0035-1340: hyp=['AND', 'IF', 'I', 'HAD', 'A', 'FORTUNE', 'WOULD', 'THEE', 'WANT', 'ME', 'TO', 'LEAD', 'A', 'USELESS', 'LIFE'] +4970-29095-0036-1341: ref=['HAS', 'THEE', 'CONSULTED', 'THY', 'MOTHER', 'ABOUT', 'A', 'CAREER', 'I', 'SUPPOSE', 'IT', 'IS', 'A', 'CAREER', 'THEE', 'WANTS'] +4970-29095-0036-1341: hyp=['HAS', 'THEE', 'CONSULTED', 'THY', 'MOTHER', 'ABOUT', 'A', 'CAREER', 'I', 'SUPPOSE', 'IT', 'IS', 'A', 'CAREER', 'OF', 'THEE', 'WANTS'] +4970-29095-0037-1342: ref=['BUT', 'THAT', 'WISE', 'AND', 'PLACID', 'WOMAN', 'UNDERSTOOD', 'THE', 'SWEET', 'REBEL', 'A', 'GREAT', 'DEAL', 'BETTER', 'THAN', 'RUTH', 'UNDERSTOOD', 'HERSELF'] +4970-29095-0037-1342: hyp=['BUT', 'THAT', 'WISE', 'AND', 'PLACID', 'WOMAN', 'UNDERSTOOD', 'THE', 'SWEET', 'REBBLE', 'A', 'GREAT', 'DEAL', 'BETTER', 'THAN', 'RUTH', 'UNDERSTOOD', 'HERSELF'] +4970-29095-0038-1343: ref=['RUTH', 'WAS', 'GLAD', 'TO', 'HEAR', 'THAT', 'PHILIP', 'HAD', 'MADE', 'A', 'PUSH', 'INTO', 'THE', 'WORLD', 'AND', 'SHE', 'WAS', 'SURE', 'THAT', 'HIS', 'TALENT', 'AND', 'COURAGE', 'WOULD', 'MAKE', 'A', 'WAY', 'FOR', 'HIM'] +4970-29095-0038-1343: hyp=['RUTH', 'WAS', 'GLAD', 'TO', 'HEAR', 'THAT', 'PHILIP', 'HAD', 'MADE', 'A', 'PUSH', 'INTO', 'THE', 'WORLD', 'AND', 'SHE', 'WAS', 'SURE', 'THAT', 'HIS', 'TALENT', 'AND', 'COURAGE', 'WOULD', 'MAKE', 'A', 'WAY', 'FOR', 'HIM'] +4992-23283-0000-1344: ref=['BUT', 'THE', 'MORE', 'FORGETFULNESS', 'HAD', 'THEN', 'PREVAILED', 'THE', 'MORE', 'POWERFUL', 'WAS', 'THE', 'FORCE', 'OF', 'REMEMBRANCE', 'WHEN', 'SHE', 'AWOKE'] +4992-23283-0000-1344: hyp=['BUT', 'THE', 'MORE', 'FORGETFULNESS', 'HAD', 'THEN', 'PREVAILED', 'THE', 'MORE', 'POWERFUL', 'WAS', 'THE', 'FORCE', 'OF', 'REMEMBRANCE', 'WHEN', 'SHE', 'AWOKE'] +4992-23283-0001-1345: ref=['MISS', "MILNER'S", 'HEALTH', 'IS', 'NOT', 'GOOD'] +4992-23283-0001-1345: hyp=['MISS', "MILNER'S", 'HEALTH', 'IS', 'NOT', 'GOOD'] +4992-23283-0002-1346: ref=['SAID', 'MISSUS', 'HORTON', 'A', 'FEW', 'MINUTES', 'AFTER'] +4992-23283-0002-1346: hyp=['SAID', 'MISSUS', 'HORTON', 'A', 'FEW', 'MINUTES', 'AFTER'] +4992-23283-0003-1347: ref=['SO', 'THERE', 'IS', 'TO', 'ME', 'ADDED', 'SANDFORD', 'WITH', 'A', 'SARCASTIC', 'SNEER'] +4992-23283-0003-1347: hyp=['SO', 'THERE', 'IS', 'TO', 'ME', 'ADDED', 'SANDFORD', 'WITH', 'A', 'SARCASTIC', 'SNEER'] +4992-23283-0004-1348: ref=['AND', 'YET', 'YOU', 'MUST', 'OWN', 'HER', 'BEHAVIOUR', 'HAS', 'WARRANTED', 'THEM', 'HAS', 'IT', 'NOT', 'BEEN', 'IN', 'THIS', 'PARTICULAR', 'INCOHERENT', 'AND', 'UNACCOUNTABLE'] +4992-23283-0004-1348: hyp=['AND', 'YET', 'YOU', 'MUST', 'OWN', 'HER', 'BEHAVIOUR', 'HAS', 'WARRANTED', 'THEM', 'HAS', 'IT', 'NOT', 'BEEN', 'IN', 'THIS', 'PARTICULAR', 'INCOHERENT', 'AND', 'UNACCOUNTABLE'] +4992-23283-0005-1349: ref=['NOT', 'THAT', 'I', 'KNOW', 'OF', 'NOT', 'ONE', 'MORE', 'THAT', 'I', 'KNOW', 'OF', 'HE', 'REPLIED', 'WITH', 'ASTONISHMENT', 'AT', 'WHAT', 'SHE', 'HAD', 'INSINUATED', 'AND', 'YET', 'WITH', 'A', 'PERFECT', 'ASSURANCE', 'THAT', 'SHE', 'WAS', 'IN', 'THE', 'WRONG'] +4992-23283-0005-1349: hyp=['NOT', 'THAT', 'I', 'KNOW', 'OF', 'NOT', 'ONE', 'MORE', 'THAT', 'I', 'KNOW', 'OF', 'HE', 'REPLIED', 'WITH', 'ASTONISHMENT', 'AT', 'WHAT', 'SHE', 'HAD', 'INSINUATED', 'AND', 'YET', 'WITH', 'A', 'PERFECT', 'ASSURANCE', 'THAT', 'SHE', 'WAS', 'IN', 'THE', 'WRONG'] +4992-23283-0006-1350: ref=['PERHAPS', 'I', 'AM', 'MISTAKEN', 'ANSWERED', 'SHE'] +4992-23283-0006-1350: hyp=['PERHAPS', 'I', 'AM', 'MISTAKEN', 'ANSWERED', 'SHE'] +4992-23283-0007-1351: ref=['TO', 'ASK', 'ANY', 'MORE', 'QUESTIONS', 'OF', 'YOU', 'I', 'BELIEVE', 'WOULD', 'BE', 'UNFAIR'] +4992-23283-0007-1351: hyp=['TO', 'ASK', 'ANY', 'MORE', 'QUESTIONS', 'OF', 'YOU', 'I', 'BELIEVE', 'WOULD', 'BE', 'UNFAIR'] +4992-23283-0008-1352: ref=['HE', 'SEEMED', 'TO', 'WAIT', 'FOR', 'HER', 'REPLY', 'BUT', 'AS', 'SHE', 'MADE', 'NONE', 'HE', 'PROCEEDED'] +4992-23283-0008-1352: hyp=['HE', 'SEEMED', 'TO', 'WAIT', 'FOR', 'HER', 'REPLY', 'BUT', 'AS', 'SHE', 'MADE', 'NONE', 'HE', 'PROCEEDED'] +4992-23283-0009-1353: ref=['OH', 'MY', 'LORD', 'CRIED', 'MISS', 'WOODLEY', 'WITH', 'A', 'MOST', 'FORCIBLE', 'ACCENT', 'YOU', 'ARE', 'THE', 'LAST', 'PERSON', 'ON', 'EARTH', 'SHE', 'WOULD', 'PARDON', 'ME', 'FOR', 'ENTRUSTING'] +4992-23283-0009-1353: hyp=['OH', 'MY', 'LORD', 'CRIED', 'MISS', 'WOODLEY', 'WITH', 'A', 'MOST', 'FORCIBLE', 'ACCENT', 'YOU', 'ARE', 'THE', 'LAST', 'PERSON', 'ON', 'EARTH', 'SHE', 'WOULD', 'PARDON', 'ME', 'FOR', 'INTRUSTING'] +4992-23283-0010-1354: ref=['BUT', 'IN', 'SUCH', 'A', 'CASE', 'MISS', "MILNER'S", 'ELECTION', 'OF', 'A', 'HUSBAND', 'SHALL', 'NOT', 'DIRECT', 'MINE'] +4992-23283-0010-1354: hyp=['BUT', 'IN', 'SUCH', 'A', 'CASE', 'MISS', "MILNER'S", 'ELECTION', 'OF', 'A', 'HUSBAND', 'SHALL', 'NOT', 'DIRECT', 'MINE'] +4992-23283-0011-1355: ref=['IF', 'SHE', 'DOES', 'NOT', 'KNOW', 'HOW', 'TO', 'ESTIMATE', 'HER', 'OWN', 'VALUE', 'I', 'DO'] +4992-23283-0011-1355: hyp=['IF', 'SHE', 'DOES', 'NOT', 'KNOW', 'HOW', 'TO', 'ESTIMATE', 'HER', 'OWN', 'VALUE', 'I', 'DO'] +4992-23283-0012-1356: ref=['INDEPENDENT', 'OF', 'HER', 'FORTUNE', 'SHE', 'HAS', 'BEAUTY', 'TO', 'CAPTIVATE', 'THE', 'HEART', 'OF', 'ANY', 'MAN', 'AND', 'WITH', 'ALL', 'HER', 'FOLLIES', 'SHE', 'HAS', 'A', 'FRANKNESS', 'IN', 'HER', 'MANNER', 'AN', 'UNAFFECTED', 'WISDOM', 'IN', 'HER', 'THOUGHTS', 'A', 'VIVACITY', 'IN', 'HER', 'CONVERSATION', 'AND', 'WITHAL', 'A', 'SOFTNESS', 'IN', 'HER', 'DEMEANOUR', 'THAT', 'MIGHT', 'ALONE', 'ENGAGE', 'THE', 'AFFECTIONS', 'OF', 'A', 'MAN', 'OF', 'THE', 'NICEST', 'SENTIMENTS', 'AND', 'THE', 'STRONGEST', 'UNDERSTANDING'] +4992-23283-0012-1356: hyp=['INDEPENDENT', 'OF', 'HER', 'FORTUNE', 'SHE', 'HAS', 'BEAUTY', 'TO', 'CAPTIVATE', 'THE', 'HEART', 'OF', 'ANY', 'MAN', 'AND', 'WITH', 'ALL', 'HER', 'FOLLIES', 'SHE', 'HAS', 'A', 'FRANKNESS', 'IN', 'HER', 'MANNER', 'AN', 'UNAFFECTED', 'WISDOM', 'IN', 'HER', 'THOUGHTS', 'A', 'VIVACITY', 'IN', 'HER', 'CONVERSATION', 'AND', 'WITHAL', 'A', 'SOFTNESS', 'IN', 'HER', 'DEMEANOR', 'THAT', 'MIGHT', 'ALONE', 'ENGAGE', 'THE', 'AFFECTIONS', 'OF', 'A', 'MAN', 'OF', 'THE', 'NICEST', 'SENTIMENTS', 'AND', 'THE', 'STRONGEST', 'UNDERSTANDING'] +4992-23283-0013-1357: ref=['MY', 'LORD', 'MISS', "MILNER'S", 'TASTE', 'IS', 'NOT', 'A', 'DEPRAVED', 'ONE', 'IT', 'IS', 'BUT', 'TOO', 'REFINED'] +4992-23283-0013-1357: hyp=['MY', 'LORD', 'MISS', "MILNER'S", 'TASTE', 'IS', 'NOT', 'A', 'DEPRAVED', 'ONE', 'IT', 'IS', 'BUT', 'TOO', 'REFINED'] +4992-23283-0014-1358: ref=['WHAT', 'CAN', 'YOU', 'MEAN', 'BY', 'THAT', 'MISS', 'WOODLEY', 'YOU', 'TALK', 'MYSTERIOUSLY'] +4992-23283-0014-1358: hyp=['WHAT', 'CAN', 'YOU', 'MEAN', 'BY', 'THAT', 'MISS', 'WOODLEY', 'YOU', 'TALK', 'MYSTERIOUSLY'] +4992-23283-0015-1359: ref=['IS', 'SHE', 'NOT', 'AFRAID', 'THAT', 'I', 'WILL', 'THWART', 'HER', 'INCLINATIONS'] +4992-23283-0015-1359: hyp=['IS', 'SHE', 'NOT', 'AFRAID', 'THAT', 'I', 'WILL', 'THWART', 'HER', 'INCLINATIONS'] +4992-23283-0016-1360: ref=['AGAIN', 'HE', 'SEARCHED', 'HIS', 'OWN', 'THOUGHTS', 'NOR', 'INEFFECTUALLY', 'AS', 'BEFORE'] +4992-23283-0016-1360: hyp=['AGAIN', 'HE', 'SEARCHED', 'HIS', 'OWN', 'THOUGHTS', 'NOR', 'INEFFECTUALLY', 'AS', 'BEFORE'] +4992-23283-0017-1361: ref=['MISS', 'WOODLEY', 'WAS', 'TOO', 'LITTLE', 'VERSED', 'IN', 'THE', 'SUBJECT', 'TO', 'KNOW', 'THIS', 'WOULD', 'HAVE', 'BEEN', 'NOT', 'TO', 'LOVE', 'AT', 'ALL', 'AT', 'LEAST', 'NOT', 'TO', 'THE', 'EXTENT', 'OF', 'BREAKING', 'THROUGH', 'ENGAGEMENTS', 'AND', 'ALL', 'THE', 'VARIOUS', 'OBSTACLES', 'THAT', 'STILL', 'MILITATED', 'AGAINST', 'THEIR', 'UNION'] +4992-23283-0017-1361: hyp=['MISS', 'WOODLEY', 'WAS', 'TOO', 'LITTLE', 'VERSED', 'IN', 'THE', 'SUBJECT', 'TO', 'KNOW', 'THIS', 'WOULD', 'HAVE', 'BEEN', 'NOT', 'TO', 'LOVE', 'AT', 'ALL', 'AT', 'LEAST', 'NOT', 'TO', 'THE', 'EXTENT', 'OF', 'BREAKING', 'THROUGH', 'ENGAGEMENTS', 'AND', 'ALL', 'THE', 'VARIOUS', 'OBSTACLES', 'THAT', 'STILL', 'MITIGATED', 'AGAINST', 'THEIR', 'UNION'] +4992-23283-0018-1362: ref=['TO', 'RELIEVE', 'HER', 'FROM', 'BOTH', 'HE', 'LAID', 'HIS', 'HAND', 'WITH', 'FORCE', 'UPON', 'HIS', 'HEART', 'AND', 'SAID', 'DO', 'YOU', 'BELIEVE', 'ME'] +4992-23283-0018-1362: hyp=['TO', 'RELIEVE', 'HER', 'FROM', 'BOTH', 'HE', 'LAID', 'HIS', 'HAND', 'WITH', 'FORCE', 'UPON', 'HIS', 'HEART', 'AND', 'SAID', 'DO', 'YOU', 'BELIEVE', 'ME'] +4992-23283-0019-1363: ref=['I', 'WILL', 'MAKE', 'NO', 'UNJUST', 'USE', 'OF', 'WHAT', 'I', 'KNOW', 'HE', 'REPLIED', 'WITH', 'FIRMNESS', 'I', 'BELIEVE', 'YOU', 'MY', 'LORD'] +4992-23283-0019-1363: hyp=['I', 'WILL', 'MAKE', 'NO', 'UNJUST', 'USE', 'OF', 'WHAT', 'I', 'KNOW', 'HE', 'REPLIED', 'WITH', 'FIRMNESS', 'I', 'BELIEVE', 'YOU', 'MY', 'LORD'] +4992-23283-0020-1364: ref=['I', 'HAVE', 'NEVER', 'YET', 'HOWEVER', 'BEEN', 'VANQUISHED', 'BY', 'THEM', 'AND', 'EVEN', 'UPON', 'THIS', 'OCCASION', 'MY', 'REASON', 'SHALL', 'COMBAT', 'THEM', 'TO', 'THE', 'LAST', 'AND', 'MY', 'REASON', 'SHALL', 'FAIL', 'ME', 'BEFORE', 'I', 'DO', 'WRONG'] +4992-23283-0020-1364: hyp=['I', 'HAVE', 'NEVER', 'YET', 'HOWEVER', 'BEEN', 'VANQUISHED', 'BY', 'THEM', 'AND', 'EVEN', 'UPON', 'THIS', 'OCCASION', 'MY', 'REASON', 'SHALL', 'COMBAT', 'THEM', 'TO', 'THE', 'LAST', 'AND', 'MY', 'REASON', 'SHALL', 'FAIL', 'ME', 'BEFORE', 'I', 'DO', 'WRONG'] +4992-41797-0000-1365: ref=['YES', 'DEAD', 'THESE', 'FOUR', 'YEARS', 'AN', 'A', 'GOOD', 'JOB', 'FOR', 'HER', 'TOO'] +4992-41797-0000-1365: hyp=['YES', 'DEAD', 'THESE', 'FOUR', 'YEARS', 'AND', 'A', 'GOOD', 'JOB', 'FOR', 'HER', 'TOO'] +4992-41797-0001-1366: ref=['WELL', 'AS', 'I', 'SAY', "IT'S", 'AN', 'AWFUL', 'QUEER', 'WORLD', 'THEY', 'CLAP', 'ALL', 'THE', 'BURGLARS', 'INTO', 'JAIL', 'AND', 'THE', 'MURDERERS', 'AND', 'THE', 'WIFE', 'BEATERS', "I'VE", 'ALLERS', 'THOUGHT', 'A', 'GENTLE', 'REPROOF', 'WOULD', 'BE', 'ENOUGH', 'PUNISHMENT', 'FOR', 'A', 'WIFE', 'BEATER', 'CAUSE', 'HE', 'PROBABLY', 'HAS', 'A', 'LOT', 'O', 'PROVOCATION', 'THAT', 'NOBODY', 'KNOWS', 'AND', 'THE', 'FIREBUGS', "CAN'T", 'THINK', 'O', 'THE', 'RIGHT', 'NAME', 'SOMETHING', 'LIKE', 'CENDENARIES', 'AN', 'THE', 'BREAKERS', 'O', 'THE', 'PEACE', 'AN', 'WHAT', 'NOT', 'AN', 'YET', 'THE', 'LAW', 'HAS', 'NOTHIN', 'TO', 'SAY', 'TO', 'A', 'MAN', 'LIKE', 'HEN', 'LORD'] +4992-41797-0001-1366: hyp=['WELL', 'AS', 'I', 'SAY', "IT'S", 'AN', 'AWFUL', 'QUEER', 'WORLD', 'THEY', 'CLAP', 'ALL', 'THE', 'BURGLARS', 'AND', 'JAIL', 'THE', 'MURDERERS', 'AND', 'THE', 'WIFE', 'BEATERS', 'I', 'ALLERS', 'THOUGHT', 'A', 'GENLE', 'REPROOF', 'WOULD', 'BE', 'ENOUGH', 'PUNISHMENT', 'FOR', 'A', 'WIFE', 'BEAER', 'CAUSE', 'HE', "PROB'BLY", 'HAS', 'A', 'LOT', 'OF', 'PROVOCATIONS', 'THAT', 'NOBODY', 'KNOWS', 'AND', 'THE', 'FIREBUGS', "CAN'T", 'THINK', 'OF', 'THE', 'RIGHT', 'NAME', 'SOMETHING', 'LIKE', 'SENDIARIES', 'AND', 'THE', 'BREAKERS', 'OF', 'THE', 'PEACE', 'AND', 'WHAT', 'NOT', 'AND', 'YET', 'THE', 'LAW', 'HAS', 'NOTHING', 'TO', 'SAY', 'TO', 'A', 'MAN', 'LIKE', 'HAND', 'LORD'] +4992-41797-0002-1367: ref=['GRANDFATHER', 'WAS', 'ALEXANDER', 'CAREY', 'L', 'L', 'D', 'DOCTOR', 'OF', 'LAWS', 'THAT', 'IS'] +4992-41797-0002-1367: hyp=['GRANDFATHER', 'WAS', 'ALEXANDER', 'CAREY', 'L', 'L', 'D', 'DOCTOR', 'OF', 'LAWS', 'THAT', 'IS'] +4992-41797-0003-1368: ref=['MISTER', 'POPHAM', 'LAID', 'DOWN', 'HIS', 'BRUSH'] +4992-41797-0003-1368: hyp=['MISTER', 'POPHAM', 'LAID', 'DOWN', 'HIS', 'BRUSH'] +4992-41797-0004-1369: ref=['I', 'SWAN', 'TO', 'MAN', 'HE', 'EJACULATED', 'IF', 'YOU', "DON'T", 'WORK', 'HARD', 'YOU', "CAN'T", 'KEEP', 'UP', 'WITH', 'THE', 'TIMES', 'DOCTOR', 'OF', 'LAWS'] +4992-41797-0004-1369: hyp=['I', 'SWAIN', 'TO', 'MAN', 'HE', 'EJACULATED', 'IF', 'YOU', "DON'T", 'WORK', 'HARD', 'YOU', "CAN'T", 'KEEP', 'UP', 'WITH', 'THE', 'TIMES', 'DOCTOR', 'OF', 'LAWS'] +4992-41797-0005-1370: ref=['DONE', 'HE', "AIN'T", 'DONE', 'A', 'THING', "HE'D", 'OUGHTER', 'SENCE', 'HE', 'WAS', 'BORN'] +4992-41797-0005-1370: hyp=['DONE', 'HE', "AIN'T", 'DONE', 'A', 'THANK', 'HE', 'OUGHTER', 'SINCE', 'HE', 'WAS', 'BORN'] +4992-41797-0006-1371: ref=['HE', 'KEEPS', 'THE', 'THOU', 'SHALT', 'NOT', 'COMMANDMENTS', 'FIRST', 'RATE', 'HEN', 'LORD', 'DOES'] +4992-41797-0006-1371: hyp=['HE', 'KEEPS', 'THE', 'THOU', 'SHALT', 'NOT', 'COMMAND', 'AS', 'FIRST', 'RATE', 'HEN', 'LORD', 'DOES'] +4992-41797-0007-1372: ref=['HE', 'GIVE', 'UP', 'HIS', 'POSITION', 'AND', 'SHUT', 'THE', 'FAMILY', 'UP', 'IN', 'THAT', 'TOMB', 'OF', 'A', 'HOUSE', 'SO', 'T', 'HE', 'COULD', 'STUDY', 'HIS', 'BOOKS'] +4992-41797-0007-1372: hyp=['HE', 'GAVE', 'UP', 'HIS', 'POSITION', 'AND', 'SHUT', 'THE', 'FAMILY', 'UP', 'IN', 'THAT', 'TOMB', 'OF', 'A', 'HOUSE', 'SO', 'HE', 'COULD', 'STUDY', 'HIS', 'BOOKS'] +4992-41797-0008-1373: ref=['MISTER', 'POPHAM', 'EXAGGERATED', 'NOTHING', 'BUT', 'ON', 'THE', 'CONTRARY', 'LEFT', 'MUCH', 'UNSAID', 'IN', 'HIS', 'NARRATIVE', 'OF', 'THE', 'FAMILY', 'AT', 'THE', 'HOUSE', 'OF', 'LORDS'] +4992-41797-0008-1373: hyp=['MISTER', 'POPHAM', 'EXAGGERATED', 'NOTHING', 'BUT', 'ON', 'THE', 'CONTRARY', 'LEFT', 'MUCH', 'UNSAID', 'IN', 'HIS', 'NARRATIVE', 'OF', 'THE', 'FAMILY', 'AT', 'THE', 'HOUSE', 'OF', 'LORDS'] +4992-41797-0009-1374: ref=['HENRY', 'LORD', 'WITH', 'THE', 'DEGREE', 'OF', 'PH', 'D', 'TO', 'HIS', 'CREDIT', 'HAD', 'BEEN', 'PROFESSOR', 'OF', 'ZOOLOGY', 'AT', 'A', 'NEW', 'ENGLAND', 'COLLEGE', 'BUT', 'HAD', 'RESIGNED', 'HIS', 'POST', 'IN', 'ORDER', 'TO', 'WRITE', 'A', 'SERIES', 'OF', 'SCIENTIFIC', 'TEXT', 'BOOKS'] +4992-41797-0009-1374: hyp=['HENRY', 'LORD', 'WITH', 'THE', 'DEGREE', 'OF', 'P', 'H', 'D', 'TO', 'HIS', 'CREDIT', 'HAD', 'BEEN', 'PROFESSOR', 'OF', 'ZOOLOGY', 'AT', 'A', 'NEW', 'ENGLAND', 'COLLEGE', 'BUT', 'HAD', 'RESIGNED', 'HIS', 'POST', 'IN', 'ORDER', 'TO', 'WRITE', 'A', 'SERIES', 'OF', 'SCIENTIFIC', 'TEXT', 'BOOKS'] +4992-41797-0010-1375: ref=['ALWAYS', 'IRRITABLE', 'COLD', 'INDIFFERENT', 'HE', 'HAD', 'GROWN', 'RAPIDLY', 'MORE', 'SO', 'AS', 'YEARS', 'WENT', 'ON'] +4992-41797-0010-1375: hyp=['ALWAYS', 'IRRITABLE', 'COLD', 'INDIFFERENT', 'HE', 'HAD', 'GROWN', 'RAPIDLY', 'MORE', 'SO', 'AS', 'YEARS', 'WENT', 'ON'] +4992-41797-0011-1376: ref=['WHATEVER', 'APPEALED', 'TO', 'HER', 'SENSE', 'OF', 'BEAUTY', 'WAS', 'STRAIGHTWAY', 'TRANSFERRED', 'TO', 'PAPER', 'OR', 'CANVAS'] +4992-41797-0011-1376: hyp=['WHATEVER', 'APPEALED', 'TO', 'HER', 'SENSE', 'OF', 'BEAUTY', 'WAS', 'STRAIGHTWAY', 'TRANSFERRED', 'TO', 'PAPER', 'OR', 'CANVAS'] +4992-41797-0012-1377: ref=['SHE', 'IS', 'WILD', 'TO', 'KNOW', 'HOW', 'TO', 'DO', 'THINGS'] +4992-41797-0012-1377: hyp=['SHE', 'IS', 'WILD', 'TO', 'KNOW', 'HOW', 'TO', 'DO', 'THINGS'] +4992-41797-0013-1378: ref=['SHE', 'MAKES', 'EFFORT', 'AFTER', 'EFFORT', 'TREMBLING', 'WITH', 'EAGERNESS', 'AND', 'WHEN', 'SHE', 'FAILS', 'TO', 'REPRODUCE', 'WHAT', 'SHE', 'SEES', 'SHE', 'WORKS', 'HERSELF', 'INTO', 'A', 'FRENZY', 'OF', 'GRIEF', 'AND', 'DISAPPOINTMENT'] +4992-41797-0013-1378: hyp=['SHE', 'MAKES', 'EFFORT', 'AFTER', 'EFFORT', 'TREMBLING', 'WITH', 'EAGERNESS', 'AND', 'WHEN', 'SHE', 'FAILS', 'TO', 'REPRODUCE', 'WHAT', 'SHE', 'SEES', 'SHE', 'WORKS', 'HERSELF', 'INTO', 'A', 'FRENZY', 'OF', 'GRIEF', 'AND', 'DISAPPOINTMENT'] +4992-41797-0014-1379: ref=['WHEN', 'SHE', 'COULD', 'NOT', 'MAKE', 'A', 'RABBIT', 'OR', 'A', 'BIRD', 'LOOK', 'REAL', 'ON', 'PAPER', 'SHE', 'SEARCHED', 'IN', 'HER', "FATHER'S", 'BOOKS', 'FOR', 'PICTURES', 'OF', 'ITS', 'BONES'] +4992-41797-0014-1379: hyp=['WHEN', 'SHE', 'COULD', 'NOT', 'MAKE', 'A', 'RABBIT', 'OR', 'A', 'BIRD', 'LOOK', 'REAL', 'ON', 'PAPER', 'SHE', 'SEARCHED', 'IN', 'HER', "FATHER'S", 'BOOKS', 'FOR', 'PICTURES', 'OF', 'ITS', 'BONES'] +4992-41797-0015-1380: ref=['CYRIL', 'THERE', 'MUST', 'BE', 'SOME', 'BETTER', 'WAY', 'OF', 'DOING', 'I', 'JUST', 'DRAW', 'THE', 'OUTLINE', 'OF', 'AN', 'ANIMAL', 'AND', 'THEN', 'I', 'PUT', 'HAIRS', 'OR', 'FEATHERS', 'ON', 'IT', 'THEY', 'HAVE', 'NO', 'BODIES'] +4992-41797-0015-1380: hyp=['CYRIL', 'THERE', 'MUST', 'BE', 'SOME', 'BETTER', 'WAY', 'OF', 'DOING', 'I', 'JUST', 'DRAW', 'THE', 'OUTLINE', 'OF', 'AN', 'ANIMAL', 'AND', 'THEN', 'I', 'PUT', 'HAIRS', 'OR', 'FEATHERS', 'ON', 'IT', 'THEY', 'HAVE', 'NO', 'BODIES'] +4992-41797-0016-1381: ref=['THEY', "COULDN'T", 'RUN', 'NOR', 'MOVE', "THEY'RE", 'JUST', 'PASTEBOARD'] +4992-41797-0016-1381: hyp=['THEY', "COULDN'T", 'RUN', 'OR', 'MOVE', "THEY'RE", 'JUST', 'PASTEBOARD'] +4992-41797-0017-1382: ref=['HE', "WOULDN'T", 'SEARCH', 'SO', "DON'T", 'WORRY', 'REPLIED', 'CYRIL', 'QUIETLY', 'AND', 'THE', 'TWO', 'LOOKED', 'AT', 'EACH', 'OTHER', 'AND', 'KNEW', 'THAT', 'IT', 'WAS', 'SO'] +4992-41797-0017-1382: hyp=['HE', "WOULDN'T", 'SEARCH', 'SO', "DON'T", 'WORRY', 'REPLIED', 'CYRIL', 'QUIETLY', 'AND', 'THE', 'TWO', 'LOOKED', 'AT', 'EACH', 'OTHER', 'AND', 'KNEW', 'THAT', 'IT', 'WAS', 'SO'] +4992-41797-0018-1383: ref=['THERE', 'IN', 'THE', 'CEDAR', 'HOLLOW', 'THEN', 'LIVED', 'OLIVE', 'LORD', 'AN', 'ANGRY', 'RESENTFUL', 'LITTLE', 'CREATURE', 'WEIGHED', 'DOWN', 'BY', 'A', 'FIERCE', 'SENSE', 'OF', 'INJURY'] +4992-41797-0018-1383: hyp=['THERE', 'IN', 'THE', 'CEDAR', 'HOLLOW', 'THEN', 'LIVED', 'OLIVE', 'LORD', 'AN', 'ANGRY', 'RESENTFUL', 'LITTLE', 'CREATURE', 'WEIGHED', 'DOWN', 'BY', 'A', 'FIERCE', 'SENSE', 'OF', 'INJURY'] +4992-41797-0019-1384: ref=["OLIVE'S", 'MOURNFUL', 'BLACK', 'EYES', 'MET', "NANCY'S", 'SPARKLING', 'BROWN', 'ONES'] +4992-41797-0019-1384: hyp=["OLIVE'S", 'MOURNFUL', 'BLACK', 'EYES', 'MET', "NANCY'S", 'SPARKLING', 'BROWN', 'ONES'] +4992-41797-0020-1385: ref=["NANCY'S", 'CURLY', 'CHESTNUT', 'CROP', 'SHONE', 'IN', 'THE', 'SUN', 'AND', "OLIVE'S", 'THICK', 'BLACK', 'PLAITS', 'LOOKED', 'BLACKER', 'BY', 'CONTRAST'] +4992-41797-0020-1385: hyp=["NANCY'S", 'CURLY', 'CHESTNUT', 'CROP', 'SHONE', 'IN', 'THE', 'SUN', 'AND', "OLIVE'S", 'THICK', 'BLACK', 'PLATES', 'LOOKED', 'BLACKER', 'BY', 'CONTRAST'] +4992-41797-0021-1386: ref=["SHE'S", 'WONDERFUL', 'MORE', 'WONDERFUL', 'THAN', 'ANYBODY', "WE'VE", 'EVER', 'SEEN', 'ANYWHERE', 'AND', 'SHE', 'DRAWS', 'BETTER', 'THAN', 'THE', 'TEACHER', 'IN', 'CHARLESTOWN'] +4992-41797-0021-1386: hyp=['SHE', 'IS', 'WONDERFUL', 'MORE', 'WONDERFUL', 'THAN', 'ANYBODY', "WE'VE", 'EVER', 'SEEN', 'ANYWHERE', 'AND', 'SHE', 'DRAWLS', 'BETTER', 'THAN', 'THE', 'TEACHER', 'IN', 'CHARLESTOWN'] +4992-41797-0022-1387: ref=["SHE'S", 'OLDER', 'THAN', 'I', 'AM', 'BUT', 'SO', 'TINY', 'AND', 'SAD', 'AND', 'SHY', 'THAT', 'SHE', 'SEEMS', 'LIKE', 'A', 'CHILD'] +4992-41797-0022-1387: hyp=["SHE'S", 'OLDER', 'THAN', 'I', 'AM', 'BUT', 'SO', 'TINY', 'AND', 'SAD', 'AND', 'SHY', 'THAT', 'SHE', 'SEEMS', 'LIKE', 'A', 'CHILD'] +4992-41806-0000-1388: ref=['NATTY', 'HARMON', 'TRIED', 'THE', 'KITCHEN', 'PUMP', 'SECRETLY', 'SEVERAL', 'TIMES', 'DURING', 'THE', 'EVENING', 'FOR', 'THE', 'WATER', 'HAD', 'TO', 'RUN', 'UP', 'HILL', 'ALL', 'THE', 'WAY', 'FROM', 'THE', 'WELL', 'TO', 'THE', 'KITCHEN', 'SINK', 'AND', 'HE', 'BELIEVED', 'THIS', 'TO', 'BE', 'A', 'CONTINUAL', 'MIRACLE', 'THAT', 'MIGHT', 'GIVE', 'OUT', 'AT', 'ANY', 'MOMENT'] +4992-41806-0000-1388: hyp=['NATTY', 'HARMON', 'TRIED', 'THE', 'KITCHEN', 'PUMP', 'SECRETLY', 'SEVERAL', 'TIMES', 'DURING', 'THE', 'EVENING', 'FOR', 'THE', 'WATER', 'HAD', 'TO', 'RUN', 'UPHILL', 'ALL', 'THE', 'WAY', 'FROM', 'THE', 'WELL', 'TO', 'THE', 'KITCHEN', 'SINK', 'AND', 'HE', 'BELIEVED', 'THIS', 'TO', 'BE', 'A', 'CONTINUAL', 'MIRACLE', 'THAT', 'MIGHT', 'GIVE', 'OUT', 'AT', 'ANY', 'MOMENT'] +4992-41806-0001-1389: ref=['TO', 'NIGHT', 'THERE', 'WAS', 'NO', 'NEED', 'OF', 'EXTRA', 'HEAT', 'AND', 'THERE', 'WERE', 'GREAT', 'CEREMONIES', 'TO', 'BE', 'OBSERVED', 'IN', 'LIGHTING', 'THE', 'FIRES', 'ON', 'THE', 'HEARTHSTONES'] +4992-41806-0001-1389: hyp=['TO', 'NIGHT', 'THERE', 'WAS', 'NO', 'NEED', 'OF', 'EXTRA', 'HEAT', 'AND', 'THERE', 'WERE', 'GREAT', 'CEREMONIES', 'TO', 'BE', 'OBSERVED', 'IN', 'LIGHTING', 'THE', 'FIRES', 'ON', 'THE', 'HEARTHSTONES'] +4992-41806-0002-1390: ref=['THEY', 'BEGAN', 'WITH', 'THE', 'ONE', 'IN', 'THE', 'FAMILY', 'SITTING', 'ROOM', 'COLONEL', 'WHEELER', 'RALPH', 'THURSTON', 'MISTER', 'AND', 'MISSUS', 'BILL', 'HARMON', 'WITH', 'NATTY', 'AND', 'RUFUS', 'MISTER', 'AND', 'MISSUS', 'POPHAM', 'WITH', 'DIGBY', 'AND', 'LALLIE', 'JOY', 'ALL', 'STANDING', 'IN', 'ADMIRING', 'GROUPS', 'AND', 'THRILLING', 'WITH', 'DELIGHT', 'AT', 'THE', 'ORDER', 'OF', 'EVENTS'] +4992-41806-0002-1390: hyp=['THEY', 'BEGAN', 'WITH', 'THE', 'ONE', 'IN', 'THE', 'FAMILY', 'SITTING', 'ROOM', 'COLONEL', 'WHEELER', 'RALPH', 'THURSTON', 'MISTER', 'AND', 'MISSUS', 'BILL', 'HARMON', 'WITH', 'NATTIE', 'AND', 'RUFFUS', 'MISTER', 'AND', 'MISSUS', 'POPHAM', 'WITH', 'DIGBY', 'AND', 'LALLIE', 'JOY', 'ALL', 'STANDING', 'IN', 'ADMIRING', 'GROUPS', 'AND', 'THRILLING', 'WITH', 'DELIGHT', 'AT', 'THE', 'ORDER', 'OF', 'EVENTS'] +4992-41806-0003-1391: ref=['KATHLEEN', 'WAVED', 'THE', 'TORCH', 'TO', 'AND', 'FRO', 'AS', 'SHE', 'RECITED', 'SOME', 'BEAUTIFUL', 'LINES', 'WRITTEN', 'FOR', 'SOME', 'SUCH', 'PURPOSE', 'AS', 'THAT', 'WHICH', 'CALLED', 'THEM', 'TOGETHER', 'TO', 'NIGHT'] +4992-41806-0003-1391: hyp=['KATHLEEN', 'WAVED', 'THE', 'TORCH', 'TO', 'AND', 'FRO', 'AS', 'SHE', 'RECITED', 'SOME', 'BEAUTIFUL', 'LINES', 'WRITTEN', 'FOR', 'SOME', 'SUCH', 'PURPOSE', 'AS', 'THAT', 'WHICH', 'CALLED', 'THEM', 'TOGETHER', 'TO', 'NIGHT'] +4992-41806-0004-1392: ref=['BURN', 'FIRE', 'BURN', 'FLICKER', 'FLICKER', 'FLAME'] +4992-41806-0004-1392: hyp=['BURN', 'FIRE', 'BURN', 'FLICKER', 'FLICKER', 'FLAME'] +4992-41806-0005-1393: ref=['NEXT', 'CAME', "OLIVE'S", 'TURN', 'TO', 'HELP', 'IN', 'THE', 'CEREMONIES'] +4992-41806-0005-1393: hyp=['NEXT', 'CAME', "OLIVE'S", 'TURN', 'TO', 'HELP', 'IN', 'THE', 'CEREMONIES'] +4992-41806-0006-1394: ref=['RALPH', 'THURSTON', 'HAD', 'FOUND', 'A', 'LINE', 'OF', 'LATIN', 'FOR', 'THEM', 'IN', 'HIS', 'BELOVED', 'HORACE', 'TIBI', 'SPLENDET', 'FOCUS', 'FOR', 'YOU', 'THE', 'HEARTH', 'FIRE', 'SHINES'] +4992-41806-0006-1394: hyp=['RALPH', 'THURSTON', 'HAD', 'FOUND', 'A', 'LINE', 'OF', 'LATIN', 'FOR', 'THEM', 'IN', 'HIS', 'BELOVED', 'HORRACE', 'TIBBY', 'SPLENDID', 'FOCUS', 'FOR', 'YOU', 'THE', 'HEARTH', 'FIRE', 'SHINES'] +4992-41806-0007-1395: ref=['OLIVE', 'HAD', 'PAINTED', 'THE', 'MOTTO', 'ON', 'A', 'LONG', 'NARROW', 'PANEL', 'OF', 'CANVAS', 'AND', 'GIVING', 'IT', 'TO', 'MISTER', 'POPHAM', 'STOOD', 'BY', 'THE', 'FIRESIDE', 'WHILE', 'HE', 'DEFTLY', 'FITTED', 'IT', 'INTO', 'THE', 'PLACE', 'PREPARED', 'FOR', 'IT'] +4992-41806-0007-1395: hyp=['OLIVE', 'HAD', 'PAINTED', 'THE', 'MOTTO', 'ON', 'A', 'LONG', 'NARROW', 'PANEL', 'OF', 'CANVAS', 'AND', 'GIVING', 'IT', 'TO', 'MISTER', 'POPHAM', 'STOOD', 'BY', 'THE', 'FIRESIDE', 'WHILE', 'HE', 'DEFTLY', 'FITTED', 'IT', 'INTO', 'THE', 'PLACE', 'PREPARED', 'FOR', 'IT'] +4992-41806-0008-1396: ref=['OLIVE', 'HAS', 'ANOTHER', 'LOVELY', 'GIFT', 'FOR', 'THE', 'YELLOW', 'HOUSE', 'SAID', 'MOTHER', 'CAREY', 'RISING', 'AND', 'TO', 'CARRY', 'OUT', 'THE', 'NEXT', 'PART', 'OF', 'THE', 'PROGRAMME', 'WE', 'SHALL', 'HAVE', 'TO', 'GO', 'IN', 'PROCESSION', 'UPSTAIRS', 'TO', 'MY', 'BEDROOM'] +4992-41806-0008-1396: hyp=['OLIVE', 'HAS', 'ANOTHER', 'LOVELY', 'GIFT', 'FOR', 'THE', 'YELLOW', 'HOUSE', 'SAID', 'MOTHER', 'CAREY', 'RISING', 'AND', 'TO', 'CARRY', 'OUT', 'THE', 'NEXT', 'PART', 'OF', 'THE', 'PROGRAMME', 'WE', 'SHALL', 'HAVE', 'TO', 'GO', 'IN', 'PROCESSION', 'UPSTAIRS', 'TO', 'MY', 'BEDROOM'] +4992-41806-0009-1397: ref=['EXCLAIMED', 'BILL', 'HARMON', 'TO', 'HIS', 'WIFE', 'AS', 'THEY', 'WENT', 'THROUGH', 'THE', 'LIGHTED', 'HALL'] +4992-41806-0009-1397: hyp=['EXCLAIMED', 'BILL', 'HARMON', 'TO', 'HIS', 'WIFE', 'AS', 'THEY', 'WENT', 'THROUGH', 'THE', 'LIGHTED', 'HALL'] +4992-41806-0010-1398: ref=["AIN'T", 'THEY', 'THE', 'GREATEST'] +4992-41806-0010-1398: hyp=["AIN'T", 'THEY', 'THE', 'GREATEST'] +4992-41806-0011-1399: ref=['MOTHER', 'CAREY', 'POURED', 'COFFEE', 'NANCY', 'CHOCOLATE', 'AND', 'THE', 'OTHERS', 'HELPED', 'SERVE', 'THE', 'SANDWICHES', 'AND', 'CAKE', 'DOUGHNUTS', 'AND', 'TARTS'] +4992-41806-0011-1399: hyp=['MOTHER', 'CAREY', 'POURED', 'COFFEE', 'NANCY', 'CHOCOLATE', 'AND', 'THE', 'OTHERS', 'HELPED', 'SERVE', 'THE', 'SANDWICHES', 'AND', 'CAKE', 'DOUGHNUTS', 'AND', 'TARTS'] +4992-41806-0012-1400: ref=['AT', 'THAT', 'MOMENT', 'THE', 'GENTLEMAN', 'ENTERED', 'BEARING', 'A', 'HUGE', 'OBJECT', 'CONCEALED', 'BY', 'A', 'PIECE', 'OF', 'GREEN', 'FELT'] +4992-41806-0012-1400: hyp=['AT', 'THAT', 'MOMENT', 'THE', 'GENTLEMAN', 'ENTERED', 'BEARING', 'A', 'HUGE', 'OBJECT', 'CONCEALED', 'BY', 'A', 'PIECE', 'OF', 'GREEN', 'FELT'] +4992-41806-0013-1401: ref=['APPROACHING', 'THE', 'DINING', 'TABLE', 'HE', 'CAREFULLY', 'PLACED', 'THE', 'ARTICLE', 'IN', 'THE', 'CENTRE', 'AND', 'REMOVED', 'THE', 'CLOTH'] +4992-41806-0013-1401: hyp=['APPROACHING', 'THE', 'DINING', 'TABLE', 'HE', 'CAREFULLY', 'PLACED', 'THE', 'ARTICLE', 'IN', 'THE', 'CENTRE', 'AND', 'REMOVED', 'THE', 'CLOTH'] +4992-41806-0014-1402: ref=['THINKS', 'I', 'TO', 'MYSELF', 'I', 'NEVER', 'SEEN', 'ANYTHING', 'OSH', 'POPHAM', "COULDN'T", 'MEND', 'IF', 'HE', 'TOOK', 'TIME', 'ENOUGH', 'AND', 'GLUE', 'ENOUGH', 'SO', 'I', 'CARRIED', 'THIS', 'LITTLE', 'FELLER', 'HOME', 'IN', 'A', 'BUSHEL', 'BASKET', 'ONE', 'NIGHT', 'LAST', 'MONTH', 'AN', "I'VE", 'SPENT', 'ELEVEN', "EVENIN'S", 'PUTTIN', 'HIM', 'TOGETHER'] +4992-41806-0014-1402: hyp=['THINKSIDE', 'OF', 'MYSELF', "I'VE", 'NEVER', 'SEEN', 'ANYTHING', 'OSH', 'PAPA', "COULDN'T", 'MEND', 'IF', 'HE', 'TOOK', 'TIME', 'ENOUGH', 'AND', 'GLUE', 'ENOUGH', 'SO', 'I', 'CARRIED', 'THIS', 'LITTLE', 'FELLER', 'HOME', 'IN', 'A', 'BUSHEL', 'BASKET', 'ONE', 'NIGHT', 'LAST', 'MONTH', 'AND', "I'VE", 'SPENT', 'ELEVEN', 'EVENINGS', 'PUTTING', 'HIM', 'TOGETHER'] +4992-41806-0015-1403: ref=['MISSUS', 'HARMON', 'THOUGHT', 'HE', 'SANG', 'TOO', 'MUCH', 'AND', 'TOLD', 'HER', 'HUSBAND', 'PRIVATELY', 'THAT', 'IF', 'HE', 'WAS', 'A', 'CANARY', 'BIRD', 'SHE', 'SHOULD', 'WANT', 'TO', 'KEEP', 'A', 'TABLE', 'COVER', 'OVER', 'HIS', 'HEAD', 'MOST', 'OF', 'THE', 'TIME', 'BUT', 'HE', 'WAS', 'IMMENSELY', 'POPULAR', 'WITH', 'THE', 'REST', 'OF', 'HIS', 'AUDIENCE'] +4992-41806-0015-1403: hyp=['MISSUS', 'HARMON', 'THOUGHT', 'HE', 'SANG', 'TOO', 'MUCH', 'AND', 'TOLD', 'HER', 'HUSBAND', 'PRIVATELY', 'THAT', 'IF', 'HE', 'WAS', 'A', 'CANARY', 'BIRD', 'SHE', 'SHOULD', 'WANT', 'TO', 'KEEP', 'A', 'TABLE', 'COVER', 'OVER', 'HIS', 'HEAD', 'MOST', 'OF', 'THE', 'TIME', 'BUT', 'HE', 'WAS', 'IMMENSELY', 'POPULAR', 'WITH', 'THE', 'REST', 'OF', 'HIS', 'AUDIENCE'] +4992-41806-0016-1404: ref=['THE', 'FACE', 'OF', 'THE', 'MAHOGANY', 'SHONE', 'WITH', 'DELIGHT', 'AND', 'WHY', 'NOT', 'WHEN', 'IT', 'WAS', 'DOING', 'EVERYTHING', 'ALMOST', 'EVERYTHING', 'WITHIN', 'THE', 'SCOPE', 'OF', 'A', 'PIANO', 'AND', 'YET', 'THE', 'FAMILY', 'HAD', 'ENJOYED', 'WEEKS', 'OF', 'GOOD', 'NOURISHING', 'MEALS', 'ON', 'WHAT', 'HAD', 'BEEN', 'SAVED', 'BY', 'ITS', 'EXERTIONS'] +4992-41806-0016-1404: hyp=['THE', 'FACE', 'OF', 'THE', 'MAHOGANY', 'SHONE', 'WITH', 'DELIGHT', 'AND', 'WHY', 'NOT', 'WHEN', 'IT', 'WAS', 'DOING', 'EVERYTHING', 'ALMOST', 'EVERYTHING', 'WITHIN', 'THE', 'SCOPE', 'OF', 'A', 'PIANO', 'AND', 'YET', 'THE', 'FAMILY', 'HAD', 'ENJOYED', 'WEEKS', 'OF', 'GOOD', 'NOURISHING', 'MEALS', 'ON', 'WHAT', 'HAD', 'BEEN', 'SAVED', 'BY', 'ITS', 'EXERTIONS'] +4992-41806-0017-1405: ref=['WE', 'SHUT', 'OUR', 'EYES', 'THE', 'FLOWERS', 'BLOOM', 'ON', 'WE', 'MURMUR', 'BUT', 'THE', 'CORN', 'EARS', 'FILL', 'WE', 'CHOOSE', 'THE', 'SHADOW', 'BUT', 'THE', 'SUN', 'THAT', 'CASTS', 'IT', 'SHINES', 'BEHIND', 'US', 'STILL'] +4992-41806-0017-1405: hyp=['WE', 'SHUT', 'OUR', 'EYES', 'THE', 'FLOWERS', 'BLOOM', 'ON', 'WE', 'MURMUR', 'BUT', 'THE', 'CORN', 'EARS', 'FILL', 'WE', 'CHOOSE', 'THE', 'SHADOW', 'BUT', 'THE', 'SUN', 'THAT', 'CAST', 'IT', 'SHINES', 'BEHIND', 'US', 'STILL'] +5105-28233-0000-1406: ref=['LENGTH', 'OF', 'SERVICE', 'FOURTEEN', 'YEARS', 'THREE', 'MONTHS', 'AND', 'FIVE', 'DAYS'] +5105-28233-0000-1406: hyp=['LENGTH', 'OF', 'SERVICE', 'FOURTEEN', 'YEARS', 'THREE', 'MONTHS', 'AND', 'FIVE', 'DAYS'] +5105-28233-0001-1407: ref=['HE', 'SEEMED', 'BORN', 'TO', 'PLEASE', 'WITHOUT', 'BEING', 'CONSCIOUS', 'OF', 'THE', 'POWER', 'HE', 'POSSESSED'] +5105-28233-0001-1407: hyp=['HE', 'SEEMED', 'BORN', 'TO', 'PLEASE', 'WITHOUT', 'BEING', 'CONSCIOUS', 'OF', 'THE', 'POWER', 'HE', 'POSSESSED'] +5105-28233-0002-1408: ref=['IT', 'MUST', 'BE', 'OWNED', 'AND', 'NO', 'ONE', 'WAS', 'MORE', 'READY', 'TO', 'CONFESS', 'IT', 'THAN', 'HIMSELF', 'THAT', 'HIS', 'LITERARY', 'ATTAINMENTS', 'WERE', 'BY', 'NO', 'MEANS', 'OF', 'A', 'HIGH', 'ORDER'] +5105-28233-0002-1408: hyp=['IT', 'MUST', 'BE', 'OWNED', 'AND', 'NO', 'ONE', 'WAS', 'MORE', 'READY', 'TO', 'CONFESS', 'IT', 'THAN', 'HIMSELF', 'THAT', 'HIS', 'LITERARY', 'ATTAINMENTS', 'WERE', 'BY', 'NO', 'MEANS', 'OF', 'A', 'HIGH', 'ORDER'] +5105-28233-0003-1409: ref=['WE', "DON'T", 'SPIN', 'TOPS', 'IS', 'A', 'FAVORITE', 'SAYING', 'AMONGST', 'ARTILLERY', 'OFFICERS', 'INDICATING', 'THAT', 'THEY', 'DO', 'NOT', 'SHIRK', 'THEIR', 'DUTY', 'BY', 'FRIVOLOUS', 'PURSUITS', 'BUT', 'IT', 'MUST', 'BE', 'CONFESSED', 'THAT', 'SERVADAC', 'BEING', 'NATURALLY', 'IDLE', 'WAS', 'VERY', 'MUCH', 'GIVEN', 'TO', 'SPINNING', 'TOPS'] +5105-28233-0003-1409: hyp=['WE', "DON'T", 'SPIN', 'TOPS', 'IS', 'A', 'FAVORITE', 'SAYING', 'AMONGST', 'ARTILLERY', 'OFFICERS', 'INDICATING', 'THAT', 'THEY', 'DO', 'NOT', 'SHIRK', 'THEIR', 'DUTY', 'BY', 'FRIVOLOUS', 'PURSUITS', 'BUT', 'IT', 'MUST', 'BE', 'CONFESSED', 'THAT', 'SERVADAC', 'BEING', 'NATURALLY', 'IDLE', 'WAS', 'VERY', 'MUCH', 'GIVEN', 'TO', 'SPINNING', 'TOPS'] +5105-28233-0004-1410: ref=['ONCE', 'IN', 'ACTION', 'HE', 'WAS', 'LEADING', 'A', 'DETACHMENT', 'OF', 'INFANTRY', 'THROUGH', 'AN', 'INTRENCHMENT'] +5105-28233-0004-1410: hyp=['ONCE', 'IN', 'ACTION', 'HE', 'WAS', 'LEADING', 'A', 'DETACHMENT', 'OF', 'INFANTRY', 'THROUGH', 'AN', 'ENTRENCHMENT'] +5105-28233-0005-1411: ref=['SOMETIMES', 'HE', 'WOULD', 'WANDER', 'ON', 'FOOT', 'UPON', 'THE', 'SANDY', 'SHORE', 'AND', 'SOMETIMES', 'HE', 'WOULD', 'ENJOY', 'A', 'RIDE', 'ALONG', 'THE', 'SUMMIT', 'OF', 'THE', 'CLIFF', 'ALTOGETHER', 'BEING', 'IN', 'NO', 'HURRY', 'AT', 'ALL', 'TO', 'BRING', 'HIS', 'TASK', 'TO', 'AN', 'END'] +5105-28233-0005-1411: hyp=['SOMETIMES', 'HE', 'WOULD', 'WANDER', 'ON', 'FOOT', 'UPON', 'THE', 'SANDY', 'SHORE', 'AND', 'SOMETIMES', 'HE', 'WOULD', 'ENJOY', 'A', 'RIDE', 'ALONG', 'THE', 'SUMMIT', 'OF', 'THE', 'CLIFF', 'ALTOGETHER', 'BEING', 'IN', 'NO', 'HURRY', 'AT', 'ALL', 'TO', 'BRING', 'HIS', 'TASK', 'TO', 'AN', 'END'] +5105-28233-0006-1412: ref=['NO', 'CATHEDRAL', 'NOT', 'EVEN', 'BURGOS', 'ITSELF', 'COULD', 'VIE', 'WITH', 'THE', 'CHURCH', 'AT', 'MONTMARTRE'] +5105-28233-0006-1412: hyp=['NO', 'CATHEDRAL', 'NOT', 'EVEN', 'BURGOS', 'ITSELF', 'COULD', 'VIE', 'WITH', 'THE', 'CHURCH', 'AT', 'MONT', 'MARTRE'] +5105-28233-0007-1413: ref=['BEN', "ZOOF'S", 'MOST', 'AMBITIOUS', 'DESIRE', 'WAS', 'TO', 'INDUCE', 'THE', 'CAPTAIN', 'TO', 'GO', 'WITH', 'HIM', 'AND', 'END', 'HIS', 'DAYS', 'IN', 'HIS', 'MUCH', 'LOVED', 'HOME', 'AND', 'SO', 'INCESSANTLY', 'WERE', "SERVADAC'S", 'EARS', 'BESIEGED', 'WITH', 'DESCRIPTIONS', 'OF', 'THE', 'UNPARALLELED', 'BEAUTIES', 'AND', 'ADVANTAGES', 'OF', 'THIS', 'EIGHTEENTH', 'ARRONDISSEMENT', 'OF', 'PARIS', 'THAT', 'HE', 'COULD', 'SCARCELY', 'HEAR', 'THE', 'NAME', 'OF', 'MONTMARTRE', 'WITHOUT', 'A', 'CONSCIOUS', 'THRILL', 'OF', 'AVERSION'] +5105-28233-0007-1413: hyp=['BEN', "ZOOF'S", 'MOST', 'AMBITIOUS', 'DESIRE', 'WAS', 'TO', 'INDUCE', 'THE', 'CAPTAIN', 'TO', 'GO', 'WITH', 'HIM', 'AND', 'END', 'HIS', 'DAYS', 'IN', 'HIS', 'MUCH', 'LOVED', 'HOME', 'AND', 'SO', 'INCESSANTLY', 'WERE', "SERVADAC'S", 'EARS', 'BESIEGED', 'WITH', 'DESCRIPTIONS', 'OF', 'THE', 'UNPARALLELED', 'BEAUTIES', 'AND', 'ADVANTAGES', 'OF', 'THIS', 'EIGHTEENTH', 'ARONNDISSIMON', 'OF', 'PARIS', 'THAT', 'HE', 'COULD', 'SCARCELY', 'HEAR', 'THE', 'NAME', 'OF', 'MONTMARCHRE', 'WITHOUT', 'A', 'CONSCIOUS', 'THRILL', 'OF', 'AVERSION'] +5105-28233-0008-1414: ref=['WHEN', 'A', 'PRIVATE', 'IN', 'THE', 'EIGHTH', 'CAVALRY', 'HE', 'HAD', 'BEEN', 'ON', 'THE', 'POINT', 'OF', 'QUITTING', 'THE', 'ARMY', 'AT', 'TWENTY', 'EIGHT', 'YEARS', 'OF', 'AGE', 'BUT', 'UNEXPECTEDLY', 'HE', 'HAD', 'BEEN', 'APPOINTED', 'ORDERLY', 'TO', 'CAPTAIN', 'SERVADAC'] +5105-28233-0008-1414: hyp=['WHEN', 'A', 'PRIVATE', 'IN', 'THE', 'EIGHTH', 'CAVALRY', 'HE', 'HAD', 'BEEN', 'ON', 'THE', 'POINT', 'OF', 'QUITTING', 'THE', 'ARMY', 'AT', 'TWENTY', 'EIGHT', 'YEARS', 'OF', 'AGE', 'BUT', 'UNEXPECTEDLY', 'HE', 'HAD', 'BEEN', 'APPOINTED', 'ORDERLY', 'TO', 'CAPTAIN', 'SERVADAC'] +5105-28233-0009-1415: ref=['THE', 'BOND', 'OF', 'UNION', 'THUS', 'EFFECTED', 'COULD', 'NEVER', 'BE', 'SEVERED', 'AND', 'ALTHOUGH', 'BEN', "ZOOF'S", 'ACHIEVEMENTS', 'HAD', 'FAIRLY', 'EARNED', 'HIM', 'THE', 'RIGHT', 'OF', 'RETIREMENT', 'HE', 'FIRMLY', 'DECLINED', 'ALL', 'HONORS', 'OR', 'ANY', 'PENSION', 'THAT', 'MIGHT', 'PART', 'HIM', 'FROM', 'HIS', 'SUPERIOR', 'OFFICER'] +5105-28233-0009-1415: hyp=['THE', 'BOND', 'OF', 'UNION', 'THUS', 'EFFECTED', 'COULD', 'NEVER', 'BE', 'SEVERED', 'AND', 'ALTHOUGH', 'BEN', "ZOOF'S", 'ACHIEVEMENTS', 'HAD', 'FAIRLY', 'EARNED', 'HIM', 'THE', 'RIGHT', 'OF', 'RETIREMENT', 'HE', 'FIRMLY', 'DECLINED', 'ALL', 'HONORS', 'OR', 'ANY', 'PENSION', 'THAT', 'MIGHT', 'PART', 'HIM', 'FROM', 'HIS', 'SUPERIOR', 'OFFICER'] +5105-28233-0010-1416: ref=['UNLIKE', 'HIS', 'MASTER', 'HE', 'MADE', 'NO', 'PRETENSION', 'TO', 'ANY', 'GIFT', 'OF', 'POETIC', 'POWER', 'BUT', 'HIS', 'INEXHAUSTIBLE', 'MEMORY', 'MADE', 'HIM', 'A', 'LIVING', 'ENCYCLOPAEDIA', 'AND', 'FOR', 'HIS', 'STOCK', 'OF', 'ANECDOTES', 'AND', "TROOPER'S", 'TALES', 'HE', 'WAS', 'MATCHLESS'] +5105-28233-0010-1416: hyp=['UNLIKE', 'HIS', 'MASTER', 'HE', 'MADE', 'NO', 'PRETENSION', 'TO', 'ANY', 'GIFT', 'OF', 'POETIC', 'POWER', 'BUT', 'HIS', 'INEXHAUSTIBLE', 'MEMORY', 'MADE', 'HIM', 'A', 'LIVING', 'ENCYCLOPAEDIA', 'AND', 'FOR', 'HIS', 'STOCK', 'OF', 'ANECDOTES', 'AND', 'TROOPERS', 'TALES', 'HE', 'WAS', 'MATCHLESS'] +5105-28240-0000-1417: ref=['FAST', 'AS', 'HIS', 'LEGS', 'COULD', 'CARRY', 'HIM', 'SERVADAC', 'HAD', 'MADE', 'HIS', 'WAY', 'TO', 'THE', 'TOP', 'OF', 'THE', 'CLIFF'] +5105-28240-0000-1417: hyp=['FAST', 'AS', 'HIS', 'LEGS', 'COULD', 'CARRY', 'HIM', 'SERVADAC', 'HAD', 'MADE', 'HIS', 'WAY', 'TO', 'THE', 'TOP', 'OF', 'THE', 'CLIFF'] +5105-28240-0001-1418: ref=['IT', 'WAS', 'QUITE', 'TRUE', 'THAT', 'A', 'VESSEL', 'WAS', 'IN', 'SIGHT', 'HARDLY', 'MORE', 'THAN', 'SIX', 'MILES', 'FROM', 'THE', 'SHORE', 'BUT', 'OWING', 'TO', 'THE', 'INCREASE', 'IN', 'THE', "EARTH'S", 'CONVEXITY', 'AND', 'THE', 'CONSEQUENT', 'LIMITATION', 'OF', 'THE', 'RANGE', 'OF', 'VISION', 'THE', 'RIGGING', 'OF', 'THE', 'TOPMASTS', 'ALONE', 'WAS', 'VISIBLE', 'ABOVE', 'THE', 'WATER'] +5105-28240-0001-1418: hyp=['IT', 'WAS', 'QUITE', 'TRUE', 'THAT', 'A', 'VESSEL', 'WAS', 'IN', 'SIGHT', 'HARDLY', 'MORE', 'THAN', 'SIX', 'MILES', 'FROM', 'THE', 'SHORE', 'BUT', 'OWING', 'TO', 'THE', 'INCREASE', 'IN', 'THE', "EARTH'S", 'CONVEXITY', 'AND', 'THE', 'CONSEQUENT', 'LIMITATION', 'OF', 'THE', 'RANGE', 'OF', 'VISION', 'THE', 'RIGGING', 'OF', 'THE', 'TOP', 'MASTS', 'ALONE', 'WAS', 'VISIBLE', 'ABOVE', 'THE', 'WATER'] +5105-28240-0002-1419: ref=['EXCLAIMED', 'SERVADAC', 'KEEPING', 'HIS', 'EYE', 'UNMOVED', 'AT', 'HIS', 'TELESCOPE'] +5105-28240-0002-1419: hyp=['EXCLAIMED', 'SERVADAC', 'KEEPING', 'HIS', 'EYE', 'UNMOVED', 'AT', 'HIS', 'TELESCOPE'] +5105-28240-0003-1420: ref=['SHE', 'IS', 'UNDER', 'SAIL', 'BUT', 'SHE', 'IS', 'COUNT', "TIMASCHEFF'S", 'YACHT', 'HE', 'WAS', 'RIGHT'] +5105-28240-0003-1420: hyp=['SHE', 'IS', 'UNDER', 'SAIL', 'BUT', 'SHE', 'IS', 'COUNT', "TIMASCHEFF'S", 'YACHT', 'HE', 'WAS', 'RIGHT'] +5105-28240-0004-1421: ref=['IF', 'THE', 'COUNT', 'WERE', 'ON', 'BOARD', 'A', 'STRANGE', 'FATALITY', 'WAS', 'BRINGING', 'HIM', 'TO', 'THE', 'PRESENCE', 'OF', 'HIS', 'RIVAL'] +5105-28240-0004-1421: hyp=['IF', 'THE', 'COUNT', 'WERE', 'ON', 'BOARD', 'A', 'STRANGE', 'FATALITY', 'WAS', 'BRINGING', 'HIM', 'TO', 'THE', 'PRESENCE', 'OF', 'HIS', 'RIVAL'] +5105-28240-0005-1422: ref=['HE', 'RECKONED', 'THEREFORE', 'NOT', 'ONLY', 'UPON', 'ASCERTAINING', 'THE', 'EXTENT', 'OF', 'THE', 'LATE', 'CATASTROPHE', 'BUT', 'UPON', 'LEARNING', 'ITS', 'CAUSE'] +5105-28240-0005-1422: hyp=['HE', 'RECKONED', 'THEREFORE', 'NOT', 'ONLY', 'UPON', 'ASCERTAINING', 'THE', 'EXTENT', 'OF', 'THE', 'LATE', 'CATASTROPHE', 'BUT', 'UPON', 'LEARNING', 'ITS', 'CAUSE'] +5105-28240-0006-1423: ref=['THE', 'WIND', 'BEING', 'ADVERSE', 'THE', 'DOBRYNA', 'DID', 'NOT', 'MAKE', 'VERY', 'RAPID', 'PROGRESS', 'BUT', 'AS', 'THE', 'WEATHER', 'IN', 'SPITE', 'OF', 'A', 'FEW', 'CLOUDS', 'REMAINED', 'CALM', 'AND', 'THE', 'SEA', 'WAS', 'QUITE', 'SMOOTH', 'SHE', 'WAS', 'ENABLED', 'TO', 'HOLD', 'A', 'STEADY', 'COURSE'] +5105-28240-0006-1423: hyp=['THE', 'WIND', 'BEING', 'ADVERSE', 'THE', 'DOBRYNA', 'DID', 'NOT', 'MAKE', 'VERY', 'RAPID', 'PROGRESS', 'BUT', 'AS', 'THE', 'WEATHER', 'IN', 'SPITE', 'OF', 'A', 'FEW', 'CLOUDS', 'REMAINED', 'CALM', 'AND', 'THE', 'SEA', 'WAS', 'QUITE', 'SMOOTH', 'SHE', 'WAS', 'ENABLED', 'TO', 'HOLD', 'A', 'STEADY', 'COURSE'] +5105-28240-0007-1424: ref=['SERVADAC', 'TOOK', 'IT', 'FOR', 'GRANTED', 'THAT', 'THE', 'DOBRYNA', 'WAS', 'ENDEAVORING', 'TO', 'PUT', 'IN'] +5105-28240-0007-1424: hyp=['SERVADAC', 'TOOK', 'IT', 'FOR', 'GRANTED', 'THAT', 'THE', 'DOBRYNA', 'WAS', 'ENDEAVORING', 'TO', 'PUT', 'IN'] +5105-28240-0008-1425: ref=['A', 'NARROW', 'CHANNEL', 'FORMED', 'A', 'PASSAGE', 'THROUGH', 'THE', 'RIDGE', 'OF', 'ROCKS', 'THAT', 'PROTECTED', 'IT', 'FROM', 'THE', 'OPEN', 'SEA', 'AND', 'WHICH', 'EVEN', 'IN', 'THE', 'ROUGHEST', 'WEATHER', 'WOULD', 'ENSURE', 'THE', 'CALMNESS', 'OF', 'ITS', 'WATERS'] +5105-28240-0008-1425: hyp=['A', 'NARROW', 'CHANNEL', 'FORMED', 'A', 'PASSAGE', 'THROUGH', 'THE', 'RIDGE', 'OF', 'ROCKS', 'THAT', 'PROTECTED', 'IT', 'FROM', 'THE', 'OPEN', 'SEA', 'AND', 'WHICH', 'EVEN', 'IN', 'THE', 'ROUGHEST', 'WEATHER', 'WOULD', 'INSURE', 'THE', 'CALMNESS', 'OF', 'ITS', 'WATERS'] +5105-28240-0009-1426: ref=['SLIGHTLY', 'CHANGING', 'HER', 'COURSE', 'SHE', 'FIRST', 'STRUCK', 'HER', 'MAINSAIL', 'AND', 'IN', 'ORDER', 'TO', 'FACILITATE', 'THE', 'MOVEMENTS', 'OF', 'HER', 'HELMSMAN', 'SOON', 'CARRIED', 'NOTHING', 'BUT', 'HER', 'TWO', 'TOPSAILS', 'BRIGANTINE', 'AND', 'JIB'] +5105-28240-0009-1426: hyp=['SLIGHTLY', 'CHANGING', 'HER', 'COURSE', 'SHE', 'FIRST', 'STRUCK', 'HER', 'MAINSAIL', 'AND', 'IN', 'ORDER', 'TO', 'FACILITATE', 'THE', 'MOVEMENTS', 'OF', 'HER', 'HELMSMAN', 'SOON', 'CARRIED', 'NOTHING', 'BUT', 'HER', 'TWO', 'TOPSAILS', 'BRIGANTINE', 'AND', 'JIB'] +5105-28240-0010-1427: ref=['CAPTAIN', 'SERVADAC', 'HASTENED', 'TOWARDS', 'HIM'] +5105-28240-0010-1427: hyp=['CAPTAIN', 'SERVADAC', 'HASTENED', 'TOWARD', 'HIM'] +5105-28240-0011-1428: ref=['I', 'LEFT', 'YOU', 'ON', 'A', 'CONTINENT', 'AND', 'HERE', 'I', 'HAVE', 'THE', 'HONOR', 'OF', 'FINDING', 'YOU', 'ON', 'AN', 'ISLAND'] +5105-28240-0011-1428: hyp=['I', 'LEFT', 'YOU', 'ON', 'A', 'CONTINENT', 'AND', 'HERE', 'I', 'HAVE', 'THE', 'HONOUR', 'OF', 'FINDING', 'YOU', 'ON', 'AN', 'ISLAND'] +5105-28240-0012-1429: ref=['NEVER', 'MIND', 'NOW', 'INTERPOSED', 'THE', 'CAPTAIN', 'WE', 'WILL', 'TALK', 'OF', 'THAT', 'BY', 'AND', 'BY'] +5105-28240-0012-1429: hyp=['NEVER', 'MIND', 'NOW', 'INTERPOSED', 'THE', 'CAPTAIN', 'WE', 'WILL', 'TALK', 'OF', 'THAT', 'BY', 'AND', 'BY'] +5105-28240-0013-1430: ref=['NOTHING', 'MORE', 'THAN', 'YOU', 'KNOW', 'YOURSELF'] +5105-28240-0013-1430: hyp=['NOTHING', 'MORE', 'THAN', 'YOU', 'KNOW', 'YOURSELF'] +5105-28240-0014-1431: ref=['ARE', 'YOU', 'CERTAIN', 'THAT', 'THIS', 'IS', 'THE', 'MEDITERRANEAN'] +5105-28240-0014-1431: hyp=['ARE', 'YOU', 'CERTAIN', 'THAT', 'THIS', 'IS', 'THE', 'MEDITERRANEAN'] +5105-28240-0015-1432: ref=['FOR', 'SOME', 'MOMENTS', 'HE', 'SEEMED', 'PERFECTLY', 'STUPEFIED', 'THEN', 'RECOVERING', 'HIMSELF', 'HE', 'BEGAN', 'TO', 'OVERWHELM', 'THE', 'COUNT', 'WITH', 'A', 'TORRENT', 'OF', 'QUESTIONS'] +5105-28240-0015-1432: hyp=['FOR', 'SOME', 'MOMENTS', 'HE', 'SEEMED', 'PERFECTLY', 'STUPEFIED', 'THEN', 'RECOVERING', 'HIMSELF', 'HE', 'BEGAN', 'TO', 'OVERWHELM', 'THE', 'COUNT', 'WITH', 'A', 'TORRENT', 'OF', 'QUESTIONS'] +5105-28240-0016-1433: ref=['TO', 'ALL', 'THESE', 'INQUIRIES', 'THE', 'COUNT', 'RESPONDED', 'IN', 'THE', 'AFFIRMATIVE'] +5105-28240-0016-1433: hyp=['TO', 'ALL', 'THESE', 'INQUIRIES', 'THE', 'COUNT', 'RESPONDED', 'IN', 'THE', 'AFFIRMATIVE'] +5105-28240-0017-1434: ref=['SOME', 'MYSTERIOUS', 'FORCE', 'SEEMED', 'TO', 'HAVE', 'BROUGHT', 'ABOUT', 'A', 'CONVULSION', 'OF', 'THE', 'ELEMENTS'] +5105-28240-0017-1434: hyp=['SOME', 'MYSTERIOUS', 'FORCE', 'SEEMED', 'TO', 'HAVE', 'BROUGHT', 'ABOUT', 'A', 'CONVULSION', 'OF', 'THE', 'ELEMENTS'] +5105-28240-0018-1435: ref=['YOU', 'WILL', 'TAKE', 'ME', 'ON', 'BOARD', 'COUNT', 'WILL', 'YOU', 'NOT'] +5105-28240-0018-1435: hyp=['YOU', 'WILL', 'TAKE', 'ME', 'ON', 'BOARD', 'COUNT', 'WILL', 'YOU', 'NOT'] +5105-28240-0019-1436: ref=['MY', 'YACHT', 'IS', 'AT', 'YOUR', 'SERVICE', 'SIR', 'EVEN', 'SHOULD', 'YOU', 'REQUIRE', 'TO', 'MAKE', 'A', 'TOUR', 'ROUND', 'THE', 'WORLD'] +5105-28240-0019-1436: hyp=['MY', 'YACHT', 'IS', 'AT', 'YOUR', 'SERVICE', 'SIR', 'EVEN', 'SHOULD', 'YOU', 'REQUIRE', 'TO', 'MAKE', 'A', 'TOUR', 'ROUND', 'THE', 'WORLD'] +5105-28240-0020-1437: ref=['THE', 'COUNT', 'SHOOK', 'HIS', 'HEAD'] +5105-28240-0020-1437: hyp=['THE', 'COUNT', 'SHOOK', 'HIS', 'HEAD'] +5105-28240-0021-1438: ref=['BEFORE', 'STARTING', 'IT', 'WAS', 'INDISPENSABLE', 'THAT', 'THE', 'ENGINE', 'OF', 'THE', 'DOBRYNA', 'SHOULD', 'BE', 'REPAIRED', 'TO', 'SAIL', 'UNDER', 'CANVAS', 'ONLY', 'WOULD', 'IN', 'CONTRARY', 'WINDS', 'AND', 'ROUGH', 'SEAS', 'BE', 'BOTH', 'TEDIOUS', 'AND', 'DIFFICULT'] +5105-28240-0021-1438: hyp=['BEFORE', 'STARTING', 'IT', 'WAS', 'INDISPENSABLE', 'THAT', 'THE', 'ENGINE', 'OF', 'THE', 'DOBRYNA', 'SHOULD', 'BE', 'REPAIRED', 'TO', 'SAIL', 'UNDER', 'CANVAS', 'ONLY', 'WOULD', 'IN', 'CONTRARY', 'WINDS', 'AND', 'ROUGH', 'SEAS', 'BE', 'BOTH', 'TEDIOUS', 'AND', 'DIFFICULT'] +5105-28240-0022-1439: ref=['IT', 'WAS', 'ON', 'THE', 'LAST', 'DAY', 'OF', 'JANUARY', 'THAT', 'THE', 'REPAIRS', 'OF', 'THE', 'SCHOONER', 'WERE', 'COMPLETED'] +5105-28240-0022-1439: hyp=['IT', 'WAS', 'ON', 'THE', 'LAST', 'DAY', 'OF', 'JANUARY', 'THAT', 'THE', 'REPAIRS', 'OF', 'THE', 'SCHOONER', 'WERE', 'COMPLETED'] +5105-28240-0023-1440: ref=['A', 'SLIGHT', 'DIMINUTION', 'IN', 'THE', 'EXCESSIVELY', 'HIGH', 'TEMPERATURE', 'WHICH', 'HAD', 'PREVAILED', 'FOR', 'THE', 'LAST', 'FEW', 'WEEKS', 'WAS', 'THE', 'ONLY', 'APPARENT', 'CHANGE', 'IN', 'THE', 'GENERAL', 'ORDER', 'OF', 'THINGS', 'BUT', 'WHETHER', 'THIS', 'WAS', 'TO', 'BE', 'ATTRIBUTED', 'TO', 'ANY', 'ALTERATION', 'IN', 'THE', "EARTH'S", 'ORBIT', 'WAS', 'A', 'QUESTION', 'WHICH', 'WOULD', 'STILL', 'REQUIRE', 'SEVERAL', 'DAYS', 'TO', 'DECIDE'] +5105-28240-0023-1440: hyp=['A', 'SLIGHT', 'DIMINUTION', 'IN', 'THE', 'EXCESSIVELY', 'HIGH', 'TEMPERATURE', 'WHICH', 'HAD', 'PREVAILED', 'FOR', 'THE', 'LAST', 'FEW', 'WEEKS', 'WAS', 'THE', 'ONLY', 'APPARENT', 'CHANGE', 'IN', 'THE', 'GENERAL', 'ORDER', 'OF', 'THINGS', 'BUT', 'WHETHER', 'THIS', 'WAS', 'TO', 'BE', 'ATTRIBUTED', 'TO', 'ANY', 'ALTERATION', 'IN', 'THE', "EARTH'S", 'ORBIT', 'WAS', 'A', 'QUESTION', 'WHICH', 'WOULD', 'STILL', 'REQUIRE', 'SEVERAL', 'DAYS', 'TO', 'DECIDE'] +5105-28240-0024-1441: ref=['DOUBTS', 'NOW', 'AROSE', 'AND', 'SOME', 'DISCUSSION', 'FOLLOWED', 'WHETHER', 'OR', 'NOT', 'IT', 'WAS', 'DESIRABLE', 'FOR', 'BEN', 'ZOOF', 'TO', 'ACCOMPANY', 'HIS', 'MASTER'] +5105-28240-0024-1441: hyp=['DOUBTS', 'NOW', 'AROSE', 'AND', 'SOME', 'DISCUSSION', 'FOLLOWED', 'WHETHER', 'OR', 'NOT', 'IT', 'WAS', 'DESIRABLE', 'FOR', 'BEN', 'ZOOF', 'TO', 'ACCOMPANY', 'HIS', 'MASTER'] +5105-28241-0000-1442: ref=['HER', 'SEA', 'GOING', 'QUALITIES', 'WERE', 'EXCELLENT', 'AND', 'WOULD', 'HAVE', 'AMPLY', 'SUFFICED', 'FOR', 'A', 'CIRCUMNAVIGATION', 'OF', 'THE', 'GLOBE'] +5105-28241-0000-1442: hyp=['HER', 'SEA', 'GOING', 'QUALITIES', 'WERE', 'EXCELLENT', 'AND', 'WOULD', 'HAVE', 'AMPLY', 'SUFFICED', 'FOR', 'A', 'CIRCUMNAVIGATION', 'OF', 'THE', 'GLOBE'] +5105-28241-0001-1443: ref=['AFTER', 'AN', 'APPRENTICESHIP', 'ON', 'A', 'MERCHANT', 'SHIP', 'HE', 'HAD', 'ENTERED', 'THE', 'IMPERIAL', 'NAVY', 'AND', 'HAD', 'ALREADY', 'REACHED', 'THE', 'RANK', 'OF', 'LIEUTENANT', 'WHEN', 'THE', 'COUNT', 'APPOINTED', 'HIM', 'TO', 'THE', 'CHARGE', 'OF', 'HIS', 'OWN', 'PRIVATE', 'YACHT', 'IN', 'WHICH', 'HE', 'WAS', 'ACCUSTOMED', 'TO', 'SPEND', 'BY', 'FAR', 'THE', 'GREATER', 'PART', 'OF', 'HIS', 'TIME', 'THROUGHOUT', 'THE', 'WINTER', 'GENERALLY', 'CRUISING', 'IN', 'THE', 'MEDITERRANEAN', 'WHILST', 'IN', 'THE', 'SUMMER', 'HE', 'VISITED', 'MORE', 'NORTHERN', 'WATERS'] +5105-28241-0001-1443: hyp=['AFTER', 'AN', 'APPRENTICESHIP', 'ON', 'A', 'MERCHANT', 'SHIP', 'HE', 'HAD', 'ENTERED', 'THE', 'IMPERIAL', 'NAVY', 'AND', 'HAD', 'ALREADY', 'REACHED', 'THE', 'RANK', 'OF', 'LIEUTENANT', 'WHEN', 'THE', 'COUNT', 'APPOINTED', 'HIM', 'TO', 'THE', 'CHARGE', 'OF', 'HIS', 'OWN', 'PRIVATE', 'YACHT', 'IN', 'WHICH', 'HE', 'WAS', 'ACCUSTOMED', 'TO', 'SPEND', 'BY', 'FAR', 'THE', 'GREATER', 'PART', 'OF', 'HIS', 'TIME', 'THROUGHOUT', 'THE', 'WINTER', 'GENERALLY', 'CRUISING', 'IN', 'THE', 'MEDITERRANEAN', 'WHILST', 'IN', 'THE', 'SUMMER', 'HE', 'VISITED', 'MORE', 'NORTHERN', 'WATERS'] +5105-28241-0002-1444: ref=['THE', 'LATE', 'ASTOUNDING', 'EVENTS', 'HOWEVER', 'HAD', 'RENDERED', 'PROCOPE', 'MANIFESTLY', 'UNEASY', 'AND', 'NOT', 'THE', 'LESS', 'SO', 'FROM', 'HIS', 'CONSCIOUSNESS', 'THAT', 'THE', 'COUNT', 'SECRETLY', 'PARTOOK', 'OF', 'HIS', 'OWN', 'ANXIETY'] +5105-28241-0002-1444: hyp=['THE', 'LATE', 'ASTOUNDING', 'EVENTS', 'HOWEVER', 'HAD', 'RENDERED', 'PROCOPE', 'MANIFESTLY', 'UNEASY', 'AND', 'NOT', 'THE', 'LESS', 'SO', 'FROM', 'HIS', 'CONSCIOUSNESS', 'THAT', 'THE', 'COUNT', 'SECRETLY', 'PARTOOK', 'OF', 'HIS', 'OWN', 'ANXIETY'] +5105-28241-0003-1445: ref=['STEAM', 'UP', 'AND', 'CANVAS', 'SPREAD', 'THE', 'SCHOONER', 'STARTED', 'EASTWARDS'] +5105-28241-0003-1445: hyp=['STEAM', 'UP', 'AND', 'CANVAS', 'SPREAD', 'THE', 'SCHOONER', 'STARTED', 'EASTWARDS'] +5105-28241-0004-1446: ref=['ALTHOUGH', 'ONLY', 'A', 'MODERATE', 'BREEZE', 'WAS', 'BLOWING', 'THE', 'SEA', 'WAS', 'ROUGH', 'A', 'CIRCUMSTANCE', 'TO', 'BE', 'ACCOUNTED', 'FOR', 'ONLY', 'BY', 'THE', 'DIMINUTION', 'IN', 'THE', 'FORCE', 'OF', 'THE', "EARTH'S", 'ATTRACTION', 'RENDERING', 'THE', 'LIQUID', 'PARTICLES', 'SO', 'BUOYANT', 'THAT', 'BY', 'THE', 'MERE', 'EFFECT', 'OF', 'OSCILLATION', 'THEY', 'WERE', 'CARRIED', 'TO', 'A', 'HEIGHT', 'THAT', 'WAS', 'QUITE', 'UNPRECEDENTED'] +5105-28241-0004-1446: hyp=['ALTHOUGH', 'ONLY', 'A', 'MODERATE', 'BREEZE', 'WAS', 'BLOWING', 'THE', 'SEA', 'WAS', 'ROUGH', 'A', 'CIRCUMSTANCE', 'TO', 'BE', 'ACCOUNTED', 'FOR', 'ONLY', 'BY', 'THE', 'DIMINUTION', 'IN', 'THE', 'FORCE', 'OF', 'THE', "EARTH'S", 'ATTRACTION', 'RENDERING', 'THE', 'LIQUID', 'PARTICLES', 'SO', 'BUOYANT', 'THAT', 'BY', 'THE', 'MERE', 'EFFECT', 'OF', 'OSCILLATION', 'THEY', 'WERE', 'CARRIED', 'TO', 'A', 'HEIGHT', 'THAT', 'WAS', 'QUITE', 'UNPRECEDENTED'] +5105-28241-0005-1447: ref=['FOR', 'A', 'FEW', 'MILES', 'SHE', 'FOLLOWED', 'THE', 'LINE', 'HITHERTO', 'PRESUMABLY', 'OCCUPIED', 'BY', 'THE', 'COAST', 'OF', 'ALGERIA', 'BUT', 'NO', 'LAND', 'APPEARED', 'TO', 'THE', 'SOUTH'] +5105-28241-0005-1447: hyp=['FOR', 'A', 'FEW', 'MILES', 'SHE', 'FOLLOWED', 'THE', 'LINE', 'HITHERTO', 'PRESUMABLY', 'OCCUPIED', 'BY', 'THE', 'COAST', 'OF', 'ALGERIA', 'BUT', 'NO', 'LAND', 'APPEARED', 'TO', 'THE', 'SOUTH'] +5105-28241-0006-1448: ref=['THE', 'LOG', 'AND', 'THE', 'COMPASS', 'THEREFORE', 'WERE', 'ABLE', 'TO', 'BE', 'CALLED', 'UPON', 'TO', 'DO', 'THE', 'WORK', 'OF', 'THE', 'SEXTANT', 'WHICH', 'HAD', 'BECOME', 'UTTERLY', 'USELESS'] +5105-28241-0006-1448: hyp=['THE', 'LOG', 'AND', 'THE', 'COMPASS', 'THEREFORE', 'WERE', 'ABLE', 'TO', 'BE', 'CALLED', 'UPON', 'TO', 'DO', 'THE', 'WORK', 'OF', 'THE', 'SEXTANT', 'WHICH', 'HAD', 'BECOME', 'UTTERLY', 'USELESS'] +5105-28241-0007-1449: ref=['THERE', 'IS', 'NO', 'FEAR', 'OF', 'THAT', 'SIR'] +5105-28241-0007-1449: hyp=["THERE'S", 'NO', 'FEAR', 'OF', 'THAT', 'SIR'] +5105-28241-0008-1450: ref=['THE', 'EARTH', 'HAS', 'UNDOUBTEDLY', 'ENTERED', 'UPON', 'A', 'NEW', 'ORBIT', 'BUT', 'SHE', 'IS', 'NOT', 'INCURRING', 'ANY', 'PROBABLE', 'RISK', 'OF', 'BEING', 'PRECIPITATED', 'ONTO', 'THE', 'SUN'] +5105-28241-0008-1450: hyp=['THAT', 'THE', 'EARTH', 'HAS', 'UNDOUBTEDLY', 'ENTERED', 'UPON', 'A', 'NEW', 'ORBIT', 'BUT', 'SHE', 'IS', 'NOT', 'INCURRING', 'ANY', 'PROBABLE', 'RISK', 'OF', 'BEING', 'PRECIPITATED', 'ON', 'TO', 'THE', 'SUN'] +5105-28241-0009-1451: ref=['AND', 'WHAT', 'DEMONSTRATION', 'DO', 'YOU', 'OFFER', 'ASKED', 'SERVADAC', 'EAGERLY', 'THAT', 'IT', 'WILL', 'NOT', 'HAPPEN'] +5105-28241-0009-1451: hyp=['AND', 'WHAT', 'DEMONSTRATION', 'DO', 'YOU', 'OFFER', 'ASKED', 'SERVADAC', 'EAGERLY', 'THAT', 'IT', 'WILL', 'NOT', 'HAPPEN'] +5105-28241-0010-1452: ref=['OCEAN', 'REIGNED', 'SUPREME'] +5105-28241-0010-1452: hyp=['OCEAN', 'REIGNED', 'SUPREME'] +5105-28241-0011-1453: ref=['ALL', 'THE', 'IMAGES', 'OF', 'HIS', 'PAST', 'LIFE', 'FLOATED', 'UPON', 'HIS', 'MEMORY', 'HIS', 'THOUGHTS', 'SPED', 'AWAY', 'TO', 'HIS', 'NATIVE', 'FRANCE', 'ONLY', 'TO', 'RETURN', 'AGAIN', 'TO', 'WONDER', 'WHETHER', 'THE', 'DEPTHS', 'OF', 'OCEAN', 'WOULD', 'REVEAL', 'ANY', 'TRACES', 'OF', 'THE', 'ALGERIAN', 'METROPOLIS'] +5105-28241-0011-1453: hyp=['ALL', 'THE', 'IMAGES', 'OF', 'HIS', 'PAST', 'LIFE', 'FLOATED', 'UPON', 'HIS', 'MEMORY', 'HIS', 'THOUGHTS', 'SPED', 'AWAY', 'TO', 'HIS', 'NATIVE', 'FRANCE', 'ONLY', 'TO', 'RETURN', 'AGAIN', 'TO', 'WONDER', 'WHETHER', 'THE', 'DEPTHS', 'OF', 'OCEAN', 'WOULD', 'REVEAL', 'ANY', 'TRACES', 'OF', 'THE', 'ALGERIAN', 'METROPOLIS'] +5105-28241-0012-1454: ref=['IS', 'IT', 'NOT', 'IMPOSSIBLE', 'HE', 'MURMURED', 'ALOUD', 'THAT', 'ANY', 'CITY', 'SHOULD', 'DISAPPEAR', 'SO', 'COMPLETELY'] +5105-28241-0012-1454: hyp=['IS', 'IT', 'NOT', 'IMPOSSIBLE', 'HE', 'MURMURED', 'ALOUD', 'THAT', 'ANY', 'CITY', 'SHOULD', 'DISAPPEAR', 'SO', 'COMPLETELY'] +5105-28241-0013-1455: ref=['WOULD', 'NOT', 'THE', 'LOFTIEST', 'EMINENCES', 'OF', 'THE', 'CITY', 'AT', 'LEAST', 'BE', 'VISIBLE'] +5105-28241-0013-1455: hyp=['WOULD', 'NOT', 'THE', 'LOFTIEST', 'EMINENCES', 'OF', 'THE', 'CITY', 'AT', 'LEAST', 'BE', 'VISIBLE'] +5105-28241-0014-1456: ref=['ANOTHER', 'CIRCUMSTANCE', 'WAS', 'MOST', 'REMARKABLE'] +5105-28241-0014-1456: hyp=['ANOTHER', 'CIRCUMSTANCE', 'WAS', 'MOST', 'REMARKABLE'] +5105-28241-0015-1457: ref=['TO', 'THE', 'SURPRISE', 'OF', 'ALL', 'AND', 'ESPECIALLY', 'OF', 'LIEUTENANT', 'PROCOPE', 'THE', 'LINE', 'INDICATED', 'A', 'BOTTOM', 'AT', 'A', 'NEARLY', 'UNIFORM', 'DEPTH', 'OF', 'FROM', 'FOUR', 'TO', 'FIVE', 'FATHOMS', 'AND', 'ALTHOUGH', 'THE', 'SOUNDING', 'WAS', 'PERSEVERED', 'WITH', 'CONTINUOUSLY', 'FOR', 'MORE', 'THAN', 'TWO', 'HOURS', 'OVER', 'A', 'CONSIDERABLE', 'AREA', 'THE', 'DIFFERENCES', 'OF', 'LEVEL', 'WERE', 'INSIGNIFICANT', 'NOT', 'CORRESPONDING', 'IN', 'ANY', 'DEGREE', 'TO', 'WHAT', 'WOULD', 'BE', 'EXPECTED', 'OVER', 'THE', 'SITE', 'OF', 'A', 'CITY', 'THAT', 'HAD', 'BEEN', 'TERRACED', 'LIKE', 'THE', 'SEATS', 'OF', 'AN', 'AMPHITHEATER'] +5105-28241-0015-1457: hyp=['TO', 'THE', 'SURPRISE', 'OF', 'ALL', 'AND', 'ESPECIALLY', 'OF', 'LIEUTENANT', 'PROCOPE', 'THE', 'LINE', 'INDICATED', 'A', 'BOTTOM', 'AT', 'A', 'NEARLY', 'UNIFORM', 'DEPTH', 'OF', 'FROM', 'FOUR', 'TO', 'FIVE', 'FATHOMS', 'AND', 'ALTHOUGH', 'THE', 'SOUNDING', 'WAS', 'PERSEVERED', 'WITH', 'CONTINUOUSLY', 'FOR', 'MORE', 'THAN', 'TWO', 'HOURS', 'OVER', 'A', 'CONSIDERABLE', 'AREA', 'THE', 'DIFFERENCES', 'OF', 'LEVEL', 'WERE', 'INSIGNIFICANT', 'NOT', 'CORRESPONDING', 'IN', 'ANY', 'DEGREE', 'TO', 'WHAT', 'WOULD', 'BE', 'EXPECTED', 'OVER', 'THE', 'SITE', 'OF', 'A', 'CITY', 'THAT', 'HAD', 'BEEN', 'TERRACED', 'LIKE', 'THE', 'SEATS', 'OF', 'AN', 'AMPHITHEATRE'] +5105-28241-0016-1458: ref=['YOU', 'MUST', 'SEE', 'LIEUTENANT', 'I', 'SHOULD', 'THINK', 'THAT', 'WE', 'ARE', 'NOT', 'SO', 'NEAR', 'THE', 'COAST', 'OF', 'ALGERIA', 'AS', 'YOU', 'IMAGINED'] +5105-28241-0016-1458: hyp=['YOU', 'MUST', 'SEE', 'LIEUTENANT', 'I', 'SHOULD', 'THINK', 'THAT', 'WE', 'ARE', 'NOT', 'SO', 'NEAR', 'THE', 'COAST', 'OF', 'ALGERIA', 'AS', 'YOU', 'IMAGINED'] +5105-28241-0017-1459: ref=['AFTER', 'PONDERING', 'AWHILE', 'HE', 'SAID', 'IF', 'WE', 'WERE', 'FARTHER', 'AWAY', 'I', 'SHOULD', 'EXPECT', 'TO', 'FIND', 'A', 'DEPTH', 'OF', 'TWO', 'OR', 'THREE', 'HUNDRED', 'FATHOMS', 'INSTEAD', 'OF', 'FIVE', 'FATHOMS', 'FIVE', 'FATHOMS'] +5105-28241-0017-1459: hyp=['AFTER', 'PONDERING', 'A', 'WHILE', 'HE', 'SAID', 'IF', 'WE', 'WERE', 'FARTHER', 'AWAY', 'I', 'SHOULD', 'EXPECT', 'TO', 'FIND', 'A', 'DEPTH', 'OF', 'TWO', 'OR', 'THREE', 'HUNDRED', 'FATHOMS', 'INSTEAD', 'OF', 'FIVE', 'FATHOMS', 'FIVE', 'FATHOMS'] +5105-28241-0018-1460: ref=['ITS', 'DEPTH', 'REMAINED', 'INVARIABLE', 'STILL', 'FOUR', 'OR', 'AT', 'MOST', 'FIVE', 'FATHOMS', 'AND', 'ALTHOUGH', 'ITS', 'BOTTOM', 'WAS', 'ASSIDUOUSLY', 'DREDGED', 'IT', 'WAS', 'ONLY', 'TO', 'PROVE', 'IT', 'BARREN', 'OF', 'MARINE', 'PRODUCTION', 'OF', 'ANY', 'TYPE'] +5105-28241-0018-1460: hyp=['ITS', 'DEPTH', 'REMAINED', 'INVARIABLE', 'STILL', 'FOUR', 'OR', 'AT', 'MOST', 'FIVE', 'FATHOMS', 'AND', 'ALTHOUGH', 'ITS', 'BOTTOM', 'WAS', 'ASSIDUOUSLY', 'DREDGED', 'IT', 'WAS', 'ONLY', 'TO', 'PROVE', 'IT', 'BARREN', 'OF', 'MARINE', 'PRODUCTION', 'OF', 'ANY', 'TYPE'] +5105-28241-0019-1461: ref=['NOTHING', 'WAS', 'TO', 'BE', 'DONE', 'BUT', 'TO', 'PUT', 'ABOUT', 'AND', 'RETURN', 'IN', 'DISAPPOINTMENT', 'TOWARDS', 'THE', 'NORTH'] +5105-28241-0019-1461: hyp=['NOTHING', 'WAS', 'TO', 'BE', 'DONE', 'BUT', 'TO', 'PUT', 'ABOUT', 'AND', 'RETURN', 'IN', 'DISAPPOINTMENT', 'TOWARD', 'THE', 'NORTH'] +5142-33396-0000-1462: ref=['AT', 'ANOTHER', 'TIME', 'HARALD', 'ASKED'] +5142-33396-0000-1462: hyp=['AT', 'ANOTHER', 'TIME', 'HAROLD', 'ASKED'] +5142-33396-0001-1463: ref=['WHAT', 'IS', 'YOUR', 'COUNTRY', 'OLAF', 'HAVE', 'YOU', 'ALWAYS', 'BEEN', 'A', 'THRALL', 'THE', "THRALL'S", 'EYES', 'FLASHED'] +5142-33396-0001-1463: hyp=['WHAT', 'IS', 'YOUR', 'COUNTRY', 'OLAF', 'HAVE', 'YOU', 'ALWAYS', 'BEEN', 'A', 'THRALL', 'THE', "THRALL'S", 'EYES', 'FLASHED'] +5142-33396-0002-1464: ref=['TWO', 'HUNDRED', 'WARRIORS', 'FEASTED', 'IN', 'HIS', 'HALL', 'AND', 'FOLLOWED', 'HIM', 'TO', 'BATTLE'] +5142-33396-0002-1464: hyp=['TWO', 'HUNDRED', 'WARRIORS', 'FEASTED', 'IN', 'HIS', 'HALL', 'AND', 'FOLLOWED', 'HIM', 'TO', 'BATTLE'] +5142-33396-0003-1465: ref=['THE', 'REST', 'OF', 'YOU', 'OFF', 'A', 'VIKING', 'HE', 'HAD', 'THREE', 'SHIPS'] +5142-33396-0003-1465: hyp=['THE', 'REST', 'OF', 'YOU', 'OFF', 'A', 'VIKING', 'HE', 'HAD', 'THREE', 'SHIPS'] +5142-33396-0004-1466: ref=['THESE', 'HE', 'GAVE', 'TO', 'THREE', 'OF', 'MY', 'BROTHERS'] +5142-33396-0004-1466: hyp=['THESE', 'HE', 'GAVE', 'TO', 'THREE', 'OF', 'MY', 'BROTHERS'] +5142-33396-0005-1467: ref=['BUT', 'I', 'STAYED', 'THAT', 'SPRING', 'AND', 'BUILT', 'ME', 'A', 'BOAT'] +5142-33396-0005-1467: hyp=['BUT', 'I', 'STAYED', 'THAT', 'SPRING', 'AND', 'BUILT', 'ME', 'A', 'BOAT'] +5142-33396-0006-1468: ref=['I', 'MADE', 'HER', 'FOR', 'ONLY', 'TWENTY', 'OARS', 'BECAUSE', 'I', 'THOUGHT', 'FEW', 'MEN', 'WOULD', 'FOLLOW', 'ME', 'FOR', 'I', 'WAS', 'YOUNG', 'FIFTEEN', 'YEARS', 'OLD'] +5142-33396-0006-1468: hyp=['I', 'MADE', 'HER', 'FOR', 'ONLY', 'TWENTY', 'OARS', 'BECAUSE', 'I', 'THOUGHT', 'FEW', 'MEN', 'WOULD', 'FOLLOW', 'ME', 'FOR', 'I', 'WAS', 'YOUNG', 'FIFTEEN', 'YEARS', 'OLD'] +5142-33396-0007-1469: ref=['AT', 'THE', 'PROW', 'I', 'CARVED', 'THE', 'HEAD', 'WITH', 'OPEN', 'MOUTH', 'AND', 'FORKED', 'TONGUE', 'THRUST', 'OUT'] +5142-33396-0007-1469: hyp=['AT', 'THE', 'PROW', 'I', 'CARVED', 'THE', 'HEAD', 'WITH', 'OPEN', 'MOUTH', 'AND', 'FORKED', 'TONGUE', 'THRUST', 'OUT'] +5142-33396-0008-1470: ref=['I', 'PAINTED', 'THE', 'EYES', 'RED', 'FOR', 'ANGER'] +5142-33396-0008-1470: hyp=['I', 'PAINTED', 'THE', 'EYES', 'RED', 'FOR', 'ANGER'] +5142-33396-0009-1471: ref=['THERE', 'STAND', 'SO', 'I', 'SAID', 'AND', 'GLARE', 'AND', 'HISS', 'AT', 'MY', 'FOES'] +5142-33396-0009-1471: hyp=['THERE', 'STAND', 'SO', 'I', 'SAID', 'AND', 'GLARE', 'AND', 'HISS', 'AT', 'MY', 'FOES'] +5142-33396-0010-1472: ref=['IN', 'THE', 'STERN', 'I', 'CURVED', 'THE', 'TAIL', 'UP', 'ALMOST', 'AS', 'HIGH', 'AS', 'THE', 'HEAD'] +5142-33396-0010-1472: hyp=['IN', 'THE', 'STERN', 'I', 'CARVED', 'THE', 'TAIL', 'UP', 'ALMOST', 'AS', 'HIGH', 'AS', 'THE', 'HEAD'] +5142-33396-0011-1473: ref=['THERE', 'SHE', 'SAT', 'ON', 'THE', 'ROLLERS', 'AS', 'FAIR', 'A', 'SHIP', 'AS', 'I', 'EVER', 'SAW'] +5142-33396-0011-1473: hyp=['THERE', 'SHE', 'SAT', 'ON', 'THE', 'ROLLERS', 'AS', 'FAIR', 'A', 'SHIP', 'AS', 'I', 'EVER', 'SAW'] +5142-33396-0012-1474: ref=['THEN', 'I', 'WILL', 'GET', 'ME', 'A', 'FARM', 'AND', 'WILL', 'WINTER', 'IN', 'THAT', 'LAND', 'NOW', 'WHO', 'WILL', 'FOLLOW', 'ME'] +5142-33396-0012-1474: hyp=['THEN', 'I', 'WILL', 'GET', 'ME', 'A', 'FARM', 'AND', 'WILL', 'WINTER', 'IN', 'THAT', 'LAND', 'NOW', 'WHO', 'WILL', 'FOLLOW', 'ME'] +5142-33396-0013-1475: ref=['HE', 'IS', 'BUT', 'A', 'BOY', 'THE', 'MEN', 'SAID'] +5142-33396-0013-1475: hyp=['HE', 'IS', 'BUT', 'A', 'BOY', 'THE', 'MAN', 'SAID'] +5142-33396-0014-1476: ref=['THIRTY', 'MEN', 'ONE', 'AFTER', 'ANOTHER', 'RAISED', 'THEIR', 'HORNS', 'AND', 'SAID'] +5142-33396-0014-1476: hyp=['THIRTY', 'MEN', 'ONE', 'AFTER', 'ANOTHER', 'RAISED', 'THEIR', 'HORNS', 'AND', 'SAID'] +5142-33396-0015-1477: ref=['AS', 'OUR', 'BOAT', 'FLASHED', 'DOWN', 'THE', 'ROLLERS', 'INTO', 'THE', 'WATER', 'I', 'MADE', 'THIS', 'SONG', 'AND', 'SANG', 'IT'] +5142-33396-0015-1477: hyp=['AS', 'OUR', 'BOAT', 'FLASHED', 'DOWN', 'THE', 'ROLLERS', 'INTO', 'THE', 'WATER', 'I', 'MADE', 'THIS', 'SONG', 'AND', 'SANG', 'IT'] +5142-33396-0016-1478: ref=['SO', 'WE', 'HARRIED', 'THE', 'COAST', 'OF', 'NORWAY'] +5142-33396-0016-1478: hyp=['SO', 'WE', 'HARRIED', 'THE', 'COAST', 'OF', 'NORWAY'] +5142-33396-0017-1479: ref=['WE', 'ATE', 'AT', 'MANY', "MEN'S", 'TABLES', 'UNINVITED'] +5142-33396-0017-1479: hyp=['WE', 'ATE', 'AT', 'MANY', "MEN'S", 'TABLES', 'UNINVITED'] +5142-33396-0018-1480: ref=['MY', "DRAGON'S", 'BELLY', 'IS', 'NEVER', 'FULL', 'AND', 'ON', 'BOARD', 'WENT', 'THE', 'GOLD'] +5142-33396-0018-1480: hyp=['A', "DRAGON'S", 'BELLY', 'IS', 'NEVER', 'FULL', 'AND', 'ON', 'BOARD', 'WENT', 'THE', 'GOLD'] +5142-33396-0019-1481: ref=['OH', 'IT', 'IS', 'BETTER', 'TO', 'LIVE', 'ON', 'THE', 'SEA', 'AND', 'LET', 'OTHER', 'MEN', 'RAISE', 'YOUR', 'CROPS', 'AND', 'COOK', 'YOUR', 'MEALS'] +5142-33396-0019-1481: hyp=['OH', 'IT', 'IS', 'BETTER', 'TO', 'LIVE', 'ON', 'THE', 'SEA', 'AND', 'LET', 'OTHER', 'MEN', 'RAISE', 'YOUR', 'CROPS', 'AND', 'COOK', 'YOUR', 'MEALS'] +5142-33396-0020-1482: ref=['A', 'HOUSE', 'SMELLS', 'OF', 'SMOKE', 'A', 'SHIP', 'SMELLS', 'OF', 'FROLIC'] +5142-33396-0020-1482: hyp=['A', 'HOUSE', 'SMELLS', 'OF', 'SMOKE', 'A', 'SHIP', 'SMELLS', 'OF', 'FROLIC'] +5142-33396-0021-1483: ref=['UP', 'AND', 'DOWN', 'THE', 'WATER', 'WE', 'WENT', 'TO', 'GET', 'MUCH', 'WEALTH', 'AND', 'MUCH', 'FROLIC'] +5142-33396-0021-1483: hyp=['UP', 'AND', 'DOWN', 'THE', 'WATER', 'WE', 'WENT', 'TO', 'GET', 'MUCH', 'WEALTH', 'AND', 'MUCH', 'FROLIC'] +5142-33396-0022-1484: ref=['WHAT', 'OF', 'THE', 'FARM', 'OLAF', 'NOT', 'YET', 'I', 'ANSWERED', 'VIKING', 'IS', 'BETTER', 'FOR', 'SUMMER'] +5142-33396-0022-1484: hyp=['WHAT', 'OF', 'THE', 'FARM', 'OLAF', 'NOT', 'YET', 'I', 'ANSWERED', 'VIKING', 'IS', 'BETTER', 'FOR', 'SUMMER'] +5142-33396-0023-1485: ref=['IT', 'WAS', 'SO', 'DARK', 'THAT', 'I', 'COULD', 'SEE', 'NOTHING', 'BUT', 'A', 'FEW', 'SPARKS', 'ON', 'THE', 'HEARTH'] +5142-33396-0023-1485: hyp=['IT', 'WAS', 'SO', 'DARK', 'THAT', 'I', 'COULD', 'SEE', 'NOTHING', 'BUT', 'A', 'FEW', 'SPARKS', 'ON', 'THE', 'HEARTH'] +5142-33396-0024-1486: ref=['I', 'STOOD', 'WITH', 'MY', 'BACK', 'TO', 'THE', 'WALL', 'FOR', 'I', 'WANTED', 'NO', 'SWORD', 'REACHING', 'OUT', 'OF', 'THE', 'DARK', 'FOR', 'ME'] +5142-33396-0024-1486: hyp=['I', 'STOOD', 'WITH', 'MY', 'BACK', 'TO', 'THE', 'WALL', 'FOR', 'I', 'WANTED', 'NO', 'SWORD', 'REACHING', 'OUT', 'OF', 'THE', 'DARK', 'FOR', 'ME'] +5142-33396-0025-1487: ref=['COME', 'COME', 'I', 'CALLED', 'WHEN', 'NO', 'ONE', 'OBEYED', 'A', 'FIRE'] +5142-33396-0025-1487: hyp=['COME', 'COME', 'I', 'CALLED', 'WHEN', 'NO', 'ONE', 'OBEYED', 'A', 'FIRE'] +5142-33396-0026-1488: ref=['MY', 'MEN', 'LAUGHED', 'YES', 'A', 'STINGY', 'HOST'] +5142-33396-0026-1488: hyp=['MY', 'MEN', 'LAUGHED', 'YES', 'A', 'STINGY', 'HOST'] +5142-33396-0027-1489: ref=['HE', 'ACTS', 'AS', 'THOUGH', 'HE', 'HAD', 'NOT', 'EXPECTED', 'US'] +5142-33396-0027-1489: hyp=['HE', 'ACTS', 'AS', 'THOUGH', 'HE', 'HAD', 'NOT', 'EXPECTED', 'US'] +5142-33396-0028-1490: ref=['ON', 'A', 'BENCH', 'IN', 'A', 'FAR', 'CORNER', 'WERE', 'A', 'DOZEN', 'PEOPLE', 'HUDDLED', 'TOGETHER'] +5142-33396-0028-1490: hyp=['ON', 'A', 'BENCH', 'IN', 'A', 'FAR', 'CORNER', 'WERE', 'A', 'DOZEN', 'PEOPLE', 'HUDDLED', 'TOGETHER'] +5142-33396-0029-1491: ref=['BRING', 'IN', 'THE', 'TABLE', 'WE', 'ARE', 'HUNGRY'] +5142-33396-0029-1491: hyp=['BRING', 'IN', 'THE', 'TABLE', 'WE', 'ARE', 'HUNGRY'] +5142-33396-0030-1492: ref=['THE', 'THRALLS', 'WERE', 'BRINGING', 'IN', 'A', 'GREAT', 'POT', 'OF', 'MEAT'] +5142-33396-0030-1492: hyp=['THE', 'THRALLS', 'WERE', 'BRINGING', 'IN', 'A', 'GREAT', 'POT', 'OF', 'MEAT'] +5142-33396-0031-1493: ref=['THEY', 'SET', 'UP', 'A', 'CRANE', 'OVER', 'THE', 'FIRE', 'AND', 'HUNG', 'THE', 'POT', 'UPON', 'IT', 'AND', 'WE', 'SAT', 'AND', 'WATCHED', 'IT', 'BOIL', 'WHILE', 'WE', 'JOKED', 'AT', 'LAST', 'THE', 'SUPPER', 'BEGAN'] +5142-33396-0031-1493: hyp=['THEY', 'SET', 'UP', 'A', 'CRANE', 'OVER', 'THE', 'FIRE', 'AND', 'HUNG', 'THE', 'POT', 'UPON', 'IT', 'AND', 'WE', 'SAT', 'AND', 'WATCHED', 'IT', 'BOIL', 'WHILE', 'WE', 'JOKED', 'AT', 'LAST', 'THE', 'SUPPER', 'BEGAN'] +5142-33396-0032-1494: ref=['THE', 'FARMER', 'SAT', 'GLOOMILY', 'ON', 'THE', 'BENCH', 'AND', 'WOULD', 'NOT', 'EAT', 'AND', 'YOU', 'CANNOT', 'WONDER', 'FOR', 'HE', 'SAW', 'US', 'PUTTING', 'POTFULS', 'OF', 'HIS', 'GOOD', 'BEEF', 'AND', 'BASKET', 'LOADS', 'OF', 'BREAD', 'INTO', 'OUR', 'BIG', 'MOUTHS'] +5142-33396-0032-1494: hyp=['THE', 'FARMER', 'SAT', 'GLOOMILY', 'ON', 'THE', 'BENCH', 'AND', 'WOULD', 'NOT', 'EAT', 'AND', 'YOU', 'CANNOT', 'WONDER', 'FOR', 'HE', 'SAW', 'US', 'PUTTING', 'POTFULS', 'OF', 'HIS', 'GOOD', 'BEEF', 'AND', 'BASKET', 'LOADS', 'OF', 'BREAD', 'INTO', 'OUR', 'BIG', 'MOUTHS'] +5142-33396-0033-1495: ref=['YOU', 'WOULD', 'NOT', 'EAT', 'WITH', 'US', 'YOU', 'CANNOT', 'SAY', 'NO', 'TO', 'HALF', 'OF', 'MY', 'ALE', 'I', 'DRINK', 'THIS', 'TO', 'YOUR', 'HEALTH'] +5142-33396-0033-1495: hyp=['YOU', 'WOULD', 'NOT', 'EAT', 'WITH', 'US', 'YOU', 'CANNOT', 'SAY', 'NO', 'TO', 'HALF', 'OF', 'MY', 'ALE', 'I', 'DRINK', 'THIS', 'TO', 'YOUR', 'HEALTH'] +5142-33396-0034-1496: ref=['THEN', 'I', 'DRANK', 'HALF', 'OF', 'THE', 'HORNFUL', 'AND', 'SENT', 'THE', 'REST', 'ACROSS', 'THE', 'FIRE', 'TO', 'THE', 'FARMER', 'HE', 'TOOK', 'IT', 'AND', 'SMILED', 'SAYING'] +5142-33396-0034-1496: hyp=['THEN', 'I', 'DRANK', 'HALF', 'OF', 'THE', 'HORNFUL', 'AND', 'SET', 'THE', 'REST', 'ACROSS', 'THE', 'FIRE', 'TO', 'THE', 'FARMER', 'HE', 'TOOK', 'IT', 'AND', 'SMILED', 'SAYING'] +5142-33396-0035-1497: ref=['DID', 'YOU', 'EVER', 'HAVE', 'SUCH', 'A', 'LORDLY', 'GUEST', 'BEFORE', 'I', 'WENT', 'ON'] +5142-33396-0035-1497: hyp=['DID', 'YOU', 'EVER', 'HAVE', 'SUCH', 'A', 'LORDLY', 'GUEST', 'BEFORE', 'I', 'WENT', 'ON'] +5142-33396-0036-1498: ref=['SO', 'I', 'WILL', 'GIVE', 'OUT', 'THIS', 'LAW', 'THAT', 'MY', 'MEN', 'SHALL', 'NEVER', 'LEAVE', 'YOU', 'ALONE'] +5142-33396-0036-1498: hyp=['SO', 'I', 'WILL', 'GIVE', 'OUT', 'THIS', 'LAW', 'THAT', 'MY', 'MEN', 'SHALL', 'NEVER', 'LEAVE', 'YOU', 'ALONE'] +5142-33396-0037-1499: ref=['HAKON', 'THERE', 'SHALL', 'BE', 'YOUR', 'CONSTANT', 'COMPANION', 'FRIEND', 'FARMER'] +5142-33396-0037-1499: hyp=['HAWKIN', 'THERE', 'SHALL', 'BE', 'YOUR', 'CONSTANT', 'COMPANION', 'FRIEND', 'FARMER'] +5142-33396-0038-1500: ref=['HE', 'SHALL', 'NOT', 'LEAVE', 'YOU', 'DAY', 'OR', 'NIGHT', 'WHETHER', 'YOU', 'ARE', 'WORKING', 'OR', 'PLAYING', 'OR', 'SLEEPING'] +5142-33396-0038-1500: hyp=['HE', 'SHALL', 'NOT', 'LEAVE', 'YOU', 'DAY', 'OR', 'NIGHT', 'WHETHER', 'YOU', 'ARE', 'WORKING', 'OR', 'PLAYING', 'OR', 'SLEEPING'] +5142-33396-0039-1501: ref=['I', 'NAMED', 'NINE', 'OTHERS', 'AND', 'SAID'] +5142-33396-0039-1501: hyp=['I', 'NAMED', 'NINE', 'OTHERS', 'AND', 'SAID'] +5142-33396-0040-1502: ref=['AND', 'THESE', 'SHALL', 'FOLLOW', 'YOUR', 'THRALLS', 'IN', 'THE', 'SAME', 'WAY'] +5142-33396-0040-1502: hyp=['AND', 'THESE', 'SHALL', 'FOLLOW', 'YOUR', 'THRALLS', 'IN', 'THE', 'SAME', 'WAY'] +5142-33396-0041-1503: ref=['SO', 'I', 'SET', 'GUARDS', 'OVER', 'EVERY', 'ONE', 'IN', 'THAT', 'HOUSE'] +5142-33396-0041-1503: hyp=['SO', 'I', 'SET', 'GUARDS', 'OVER', 'EVERY', 'ONE', 'IN', 'THAT', 'HOUSE'] +5142-33396-0042-1504: ref=['SO', 'NO', 'TALES', 'GOT', 'OUT', 'TO', 'THE', 'NEIGHBORS', 'BESIDES', 'IT', 'WAS', 'A', 'LONELY', 'PLACE', 'AND', 'BY', 'GOOD', 'LUCK', 'NO', 'ONE', 'CAME', 'THAT', 'WAY'] +5142-33396-0042-1504: hyp=['SO', 'NO', 'TALES', 'GOT', 'OUT', 'TO', 'THE', 'NEIGHBOURS', 'BESIDES', 'IT', 'WAS', 'A', 'LONELY', 'PLACE', 'AND', 'BY', 'GOOD', 'LUCK', 'NO', 'ONE', 'CAME', 'THAT', 'WAY'] +5142-33396-0043-1505: ref=['THEIR', 'EYES', 'DANCED', 'BIG', 'THORLEIF', 'STOOD', 'UP', 'AND', 'STRETCHED', 'HIMSELF'] +5142-33396-0043-1505: hyp=['THEIR', 'EYES', 'DANCED', 'BIG', 'TOAR', 'LEAF', 'STOOD', 'UP', 'AND', 'STRETCHED', 'HIMSELF'] +5142-33396-0044-1506: ref=['I', 'AM', 'STIFF', 'WITH', 'LONG', 'SITTING', 'HE', 'SAID', 'I', 'ITCH', 'FOR', 'A', 'FIGHT', 'I', 'TURNED', 'TO', 'THE', 'FARMER'] +5142-33396-0044-1506: hyp=["I'M", 'STIFF', 'WITH', 'LONG', 'SITTING', 'HE', 'SAID', 'I', 'ITCH', 'FOR', 'A', 'FIGHT', 'I', 'TURNED', 'TO', 'THE', 'FARMER'] +5142-33396-0045-1507: ref=['THIS', 'IS', 'OUR', 'LAST', 'FEAST', 'WITH', 'YOU', 'I', 'SAID'] +5142-33396-0045-1507: hyp=['THIS', 'IS', 'OUR', 'LAST', 'FEAST', 'WITH', 'YOU', 'I', 'SAID'] +5142-33396-0046-1508: ref=['BY', 'THE', 'BEARD', 'OF', 'ODIN', 'I', 'CRIED', 'YOU', 'HAVE', 'TAKEN', 'OUR', 'JOKE', 'LIKE', 'A', 'MAN'] +5142-33396-0046-1508: hyp=['BY', 'THE', 'BEARD', 'OF', 'ODIN', 'I', 'CRIED', 'YOU', 'HAVE', 'TAKEN', 'OUR', 'JOKE', 'LIKE', 'A', 'MAN'] +5142-33396-0047-1509: ref=['MY', 'MEN', 'POUNDED', 'THE', 'TABLE', 'WITH', 'THEIR', 'FISTS'] +5142-33396-0047-1509: hyp=['MY', 'MEN', 'POUNDED', 'THE', 'TABLE', 'WITH', 'THEIR', 'FISTS'] +5142-33396-0048-1510: ref=['BY', 'THE', 'HAMMER', 'OF', 'THOR', 'SHOUTED', 'GRIM', 'HERE', 'IS', 'NO', 'STINGY', 'COWARD'] +5142-33396-0048-1510: hyp=['BY', 'THE', 'HAMMER', 'OF', 'THOR', 'SHOUTED', 'GRIM', 'THERE', 'IS', 'NO', 'STINGY', 'COWARD'] +5142-33396-0049-1511: ref=['HERE', 'FRIEND', 'TAKE', 'IT', 'AND', 'HE', 'THRUST', 'IT', 'INTO', 'THE', "FARMER'S", 'HAND'] +5142-33396-0049-1511: hyp=['HERE', 'FRIEND', 'TAKE', 'IT', 'AND', 'HE', 'THRUST', 'IT', 'INTO', 'THE', "FARMER'S", 'HAND'] +5142-33396-0050-1512: ref=['MAY', 'YOU', 'DRINK', "HEART'S", 'EASE', 'FROM', 'IT', 'FOR', 'MANY', 'YEARS'] +5142-33396-0050-1512: hyp=['MAY', 'YOU', 'DRINK', "HEART'S", 'EASE', 'FROM', 'IT', 'FOR', 'MANY', 'YEARS'] +5142-33396-0051-1513: ref=['AND', 'WITH', 'IT', 'I', 'LEAVE', 'YOU', 'A', 'NAME', 'SIF', 'THE', 'FRIENDLY', 'I', 'SHALL', 'HOPE', 'TO', 'DRINK', 'WITH', 'YOU', 'SOMETIME', 'IN', 'VALHALLA'] +5142-33396-0051-1513: hyp=['AND', 'WITH', 'IT', 'I', 'LEAVE', 'YOU', 'A', 'NAME', 'SITH', 'THE', 'FRIENDLY', 'I', 'SHALL', 'HOPE', 'TO', 'DRINK', 'WITH', 'YOU', 'SOME', 'TIME', 'IN', 'VALHALLA'] +5142-33396-0052-1514: ref=['HERE', 'IS', 'A', 'RING', 'FOR', 'SIF', 'THE', 'FRIENDLY', 'AND', 'HERE', 'IS', 'A', 'BRACELET', 'A', 'SWORD', 'WOULD', 'NOT', 'BE', 'ASHAMED', 'TO', 'HANG', 'AT', 'YOUR', 'SIDE'] +5142-33396-0052-1514: hyp=['HERE', 'IS', 'A', 'RING', 'FOR', 'SITH', 'THE', 'FRIENDLY', 'AND', 'HERE', 'IS', 'A', 'BRACELET', 'AND', 'A', 'SWORD', 'WOULD', 'NOT', 'BE', 'ASHAMED', 'TO', 'HANG', 'AT', 'YOUR', 'SIDE'] +5142-33396-0053-1515: ref=['I', 'TOOK', 'FIVE', 'GREAT', 'BRACELETS', 'OF', 'GOLD', 'FROM', 'OUR', 'TREASURE', 'CHEST', 'AND', 'GAVE', 'THEM', 'TO', 'HIM'] +5142-33396-0053-1515: hyp=['I', 'TOOK', 'FIVE', 'GREAT', 'BRACELETS', 'OF', 'GOLD', 'FROM', 'OUR', 'TREASURE', 'CHEST', 'AND', 'GAVE', 'THEM', 'TO', 'HIM'] +5142-33396-0054-1516: ref=['THAT', 'IS', 'THE', 'BEST', 'WAY', 'TO', 'DECIDE', 'FOR', 'THE', 'SPEAR', 'WILL', 'ALWAYS', 'POINT', 'SOMEWHERE', 'AND', 'ONE', 'THING', 'IS', 'AS', 'GOOD', 'AS', 'ANOTHER'] +5142-33396-0054-1516: hyp=['THAT', 'IS', 'THE', 'BEST', 'WAY', 'TO', 'DECIDE', 'FOR', 'THE', 'SPEAR', 'WILL', 'ALWAYS', 'POINT', 'SOMEWHERE', 'AND', 'ONE', 'THING', 'IS', 'AS', 'GOOD', 'AS', 'ANOTHER'] +5142-33396-0055-1517: ref=['THAT', 'TIME', 'IT', 'POINTED', 'US', 'INTO', 'YOUR', "FATHER'S", 'SHIPS'] +5142-33396-0055-1517: hyp=['THAT', 'TIME', 'IT', 'POINTED', 'US', 'INTO', 'YOUR', "FATHER'S", 'SHIPS'] +5142-33396-0056-1518: ref=['HERE', 'THEY', 'SAID', 'IS', 'A', 'RASCAL', 'WHO', 'HAS', 'BEEN', 'HARRYING', 'OUR', 'COASTS'] +5142-33396-0056-1518: hyp=['HERE', 'THEY', 'SAID', 'IS', 'A', 'RASCAL', 'WHO', 'HAS', 'BEEN', 'HARRYING', 'OUR', 'COASTS'] +5142-33396-0057-1519: ref=['WE', 'SUNK', 'HIS', 'SHIP', 'AND', 'MEN', 'BUT', 'HIM', 'WE', 'BROUGHT', 'TO', 'YOU'] +5142-33396-0057-1519: hyp=['WE', 'SUNK', 'HIS', 'SHIP', 'AND', 'MEN', 'BUT', 'HIM', 'WE', 'BROUGHT', 'TO', 'YOU'] +5142-33396-0058-1520: ref=['A', 'ROBBER', 'VIKING', 'SAID', 'THE', 'KING', 'AND', 'SCOWLED', 'AT', 'ME'] +5142-33396-0058-1520: hyp=['A', 'ROBBER', 'VIKING', 'SAID', 'THE', 'KING', 'AND', 'HE', 'SCOWLED', 'AT', 'ME'] +5142-33396-0059-1521: ref=['YES', 'AND', 'WITH', 'ALL', 'YOUR', 'FINGERS', 'IT', 'TOOK', 'YOU', 'A', 'YEAR', 'TO', 'CATCH', 'ME', 'THE', 'KING', 'FROWNED', 'MORE', 'ANGRILY'] +5142-33396-0059-1521: hyp=['YES', 'AND', 'WITH', 'ALL', 'YOUR', 'FINGERS', 'IT', 'TOOK', 'YOU', 'A', 'YEAR', 'TO', 'CATCH', 'ME', 'THE', 'KING', 'FROWNED', 'MORE', 'ANGRILY'] +5142-33396-0060-1522: ref=['TAKE', 'HIM', 'OUT', 'THORKEL', 'AND', 'LET', 'HIM', 'TASTE', 'YOUR', 'SWORD'] +5142-33396-0060-1522: hyp=['TAKE', 'HIM', 'OUT', 'TURKLE', 'AND', 'LET', 'HIM', 'TASTE', 'YOUR', 'SWORD'] +5142-33396-0061-1523: ref=['YOUR', 'MOTHER', 'THE', 'QUEEN', 'WAS', 'STANDING', 'BY'] +5142-33396-0061-1523: hyp=['YOUR', 'MOTHER', 'THE', 'QUEEN', 'WAS', 'STANDING', 'BY'] +5142-33396-0062-1524: ref=['NOW', 'SHE', 'PUT', 'HER', 'HAND', 'ON', 'HIS', 'ARM', 'AND', 'SMILED', 'AND', 'SAID'] +5142-33396-0062-1524: hyp=['NOW', 'SHE', 'PUT', 'HER', 'HAND', 'ON', 'HIS', 'ARM', 'AND', 'SMILED', 'AND', 'SAID'] +5142-33396-0063-1525: ref=['AND', 'WOULD', 'HE', 'NOT', 'BE', 'A', 'GOOD', 'GIFT', 'FOR', 'OUR', 'BABY'] +5142-33396-0063-1525: hyp=['AND', 'WOULD', 'HE', 'NOT', 'BE', 'A', 'GOOD', 'GIFT', 'FOR', 'OUR', 'BABY'] +5142-33396-0064-1526: ref=['YOUR', 'FATHER', 'THOUGHT', 'A', 'MOMENT', 'THEN', 'LOOKED', 'AT', 'YOUR', 'MOTHER', 'AND', 'SMILED'] +5142-33396-0064-1526: hyp=['YOUR', 'FATHER', 'THOUGHT', 'A', 'MOMENT', 'THEN', 'LOOKED', 'AT', 'YOUR', 'MOTHER', 'AND', 'SMILED'] +5142-33396-0065-1527: ref=['SOFT', 'HEART', 'HE', 'SAID', 'GENTLY', 'TO', 'HER', 'THEN', 'TO', 'THORKEL', 'WELL', 'LET', 'HIM', 'GO', 'THORKEL'] +5142-33396-0065-1527: hyp=['SOFT', 'HEART', 'HE', 'SAID', 'GENTLY', 'TO', 'HER', 'THEN', 'TO', 'TURKLE', 'WELL', 'LET', 'HIM', 'GO', 'TURKLE'] +5142-33396-0066-1528: ref=['THEN', 'HE', 'TURNED', 'TO', 'ME', 'AGAIN', 'FROWNING'] +5142-33396-0066-1528: hyp=['THEN', 'HE', 'TURNED', 'TO', 'ME', 'AGAIN', 'FROWNING'] +5142-33396-0067-1529: ref=['BUT', 'YOUNG', 'SHARP', 'TONGUE', 'NOW', 'THAT', 'WE', 'HAVE', 'CAUGHT', 'YOU', 'WE', 'WILL', 'PUT', 'YOU', 'INTO', 'A', 'TRAP', 'THAT', 'YOU', 'CANNOT', 'GET', 'OUT', 'OF'] +5142-33396-0067-1529: hyp=['BUT', 'YOUNG', 'SHARP', 'TONGUE', 'NOW', 'THAT', 'WE', 'HAVE', 'CAUGHT', 'YOU', 'WE', 'WILL', 'PUT', 'YOU', 'INTO', 'A', 'TRAP', 'THAT', 'YOU', 'CANNOT', 'GET', 'OUT', 'OF'] +5142-33396-0068-1530: ref=['SO', 'I', 'LIVED', 'AND', 'NOW', 'AM', 'YOUR', 'TOOTH', 'THRALL', 'WELL', 'IT', 'IS', 'THE', 'LUCK', 'OF', 'WAR'] +5142-33396-0068-1530: hyp=['SO', 'I', 'LIVED', 'AND', 'NOW', 'AM', 'YOUR', 'TOOTH', 'THRALL', 'WELL', 'IT', 'IS', 'THE', 'LUCK', 'OF', 'WAR'] +5142-36377-0000-1531: ref=['IT', 'WAS', 'ONE', 'OF', 'THE', 'MASTERLY', 'AND', 'CHARMING', 'STORIES', 'OF', 'DUMAS', 'THE', 'ELDER'] +5142-36377-0000-1531: hyp=['IT', 'WAS', 'ONE', 'OF', 'THE', 'MASTERLY', 'AND', 'CHARMING', 'STORIES', 'OF', 'DE', 'MAU', 'THE', 'ELDER'] +5142-36377-0001-1532: ref=['IN', 'FIVE', 'MINUTES', 'I', 'WAS', 'IN', 'A', 'NEW', 'WORLD', 'AND', 'MY', 'MELANCHOLY', 'ROOM', 'WAS', 'FULL', 'OF', 'THE', 'LIVELIEST', 'FRENCH', 'COMPANY'] +5142-36377-0001-1532: hyp=['IN', 'FIVE', 'MINUTES', 'I', 'WAS', 'IN', 'A', 'NEW', 'WORLD', 'AND', 'MY', 'MELANCHOLY', 'ROOM', 'WAS', 'FULL', 'OF', 'THE', 'LIVELIEST', 'FRENCH', 'COMPANY'] +5142-36377-0002-1533: ref=['THE', 'SOUND', 'OF', 'AN', 'IMPERATIVE', 'AND', 'UNCOMPROMISING', 'BELL', 'RECALLED', 'ME', 'IN', 'DUE', 'TIME', 'TO', 'THE', 'REGIONS', 'OF', 'REALITY'] +5142-36377-0002-1533: hyp=['THE', 'SOUND', 'OF', 'AN', 'IMPERATIVE', 'AND', 'UNCOMPROMISING', 'BELL', 'RECALLED', 'ME', 'IN', 'DUE', 'TIME', 'TO', 'THE', 'REGIONS', 'OF', 'REALITY'] +5142-36377-0003-1534: ref=['AMBROSE', 'MET', 'ME', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'STAIRS', 'AND', 'SHOWED', 'ME', 'THE', 'WAY', 'TO', 'THE', 'SUPPER', 'ROOM'] +5142-36377-0003-1534: hyp=['AMBROSE', 'MET', 'ME', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'STAIRS', 'AND', 'SHOWED', 'ME', 'THE', 'WAY', 'TO', 'THE', 'SUPPER', 'ROOM'] +5142-36377-0004-1535: ref=['SHE', 'SIGNED', 'TO', 'ME', 'WITH', 'A', 'GHOSTLY', 'SOLEMNITY', 'TO', 'TAKE', 'THE', 'VACANT', 'PLACE', 'ON', 'THE', 'LEFT', 'OF', 'HER', 'FATHER'] +5142-36377-0004-1535: hyp=['SHE', 'SIGNED', 'TO', 'ME', 'WITH', 'A', 'GHOSTLY', 'SOLEMNITY', 'TO', 'TAKE', 'THE', 'VACANT', 'PLACE', 'ON', 'THE', 'LEFT', 'OF', 'HER', 'FATHER'] +5142-36377-0005-1536: ref=['THE', 'DOOR', 'OPENED', 'AGAIN', 'WHILE', 'I', 'WAS', 'STILL', 'STUDYING', 'THE', 'TWO', 'BROTHERS', 'WITHOUT', 'I', 'HONESTLY', 'CONFESS', 'BEING', 'VERY', 'FAVORABLY', 'IMPRESSED', 'BY', 'EITHER', 'OF', 'THEM'] +5142-36377-0005-1536: hyp=['THE', 'DOOR', 'OPENED', 'AGAIN', 'WHILE', 'I', 'WAS', 'STILL', 'STUDYING', 'THE', 'TWO', 'BROTHERS', 'WITHOUT', 'I', 'HONESTLY', 'CONFESS', 'BEING', 'VERY', 'FAVOURABLY', 'IMPRESSED', 'BY', 'EITHER', 'OF', 'THEM'] +5142-36377-0006-1537: ref=['A', 'NEW', 'MEMBER', 'OF', 'THE', 'FAMILY', 'CIRCLE', 'WHO', 'INSTANTLY', 'ATTRACTED', 'MY', 'ATTENTION', 'ENTERED', 'THE', 'ROOM'] +5142-36377-0006-1537: hyp=['A', 'NEW', 'MEMBER', 'OF', 'THE', 'FAMILY', 'CIRCLE', 'WHO', 'INSTANTLY', 'ATTRACTED', 'MY', 'ATTENTION', 'ENTERED', 'THE', 'ROOM'] +5142-36377-0007-1538: ref=['A', 'LITTLE', 'CRACKED', 'THAT', 'IN', 'THE', 'POPULAR', 'PHRASE', 'WAS', 'MY', 'IMPRESSION', 'OF', 'THE', 'STRANGER', 'WHO', 'NOW', 'MADE', 'HIS', 'APPEARANCE', 'IN', 'THE', 'SUPPER', 'ROOM'] +5142-36377-0007-1538: hyp=['A', 'LITTLE', 'CRACKED', 'THAT', 'IN', 'THE', 'POPULAR', 'PHRASE', 'WAS', 'MY', 'IMPRESSION', 'OF', 'THE', 'STRANGER', 'WHO', 'NOW', 'MADE', 'HIS', 'APPEARANCE', 'IN', 'THE', 'SUPPER', 'ROOM'] +5142-36377-0008-1539: ref=['MISTER', 'MEADOWCROFT', 'THE', 'ELDER', 'HAVING', 'NOT', 'SPOKEN', 'ONE', 'WORD', 'THUS', 'FAR', 'HIMSELF', 'INTRODUCED', 'THE', 'NEWCOMER', 'TO', 'ME', 'WITH', 'A', 'SIDE', 'GLANCE', 'AT', 'HIS', 'SONS', 'WHICH', 'HAD', 'SOMETHING', 'LIKE', 'DEFIANCE', 'IN', 'IT', 'A', 'GLANCE', 'WHICH', 'AS', 'I', 'WAS', 'SORRY', 'TO', 'NOTICE', 'WAS', 'RETURNED', 'WITH', 'THE', 'DEFIANCE', 'ON', 'THEIR', 'SIDE', 'BY', 'THE', 'TWO', 'YOUNG', 'MEN'] +5142-36377-0008-1539: hyp=['MISTER', 'MEDICROFT', 'THE', 'ELDER', 'HAVING', 'NOT', 'SPOKEN', 'ONE', 'WORD', 'THUS', 'FAR', 'HIMSELF', 'INTRODUCED', 'THE', 'NEW', 'COMER', 'TO', 'ME', 'WITH', 'A', 'SIDE', 'GLANCE', 'AT', 'HIS', 'SONS', 'WHICH', 'HAD', 'SOMETHING', 'LIKE', 'DEFIANCE', 'IN', 'IT', 'A', 'GLANCE', 'WHICH', 'AS', 'I', 'WAS', 'SORRY', 'TO', 'NOTICE', 'WAS', 'RETURNED', 'WITH', 'A', 'DEFIANCE', 'ON', 'THEIR', 'SIDE', 'BY', 'THE', 'TWO', 'YOUNG', 'MEN'] +5142-36377-0009-1540: ref=['PHILIP', 'LEFRANK', 'THIS', 'IS', 'MY', 'OVERLOOKER', 'MISTER', 'JAGO', 'SAID', 'THE', 'OLD', 'MAN', 'FORMALLY', 'PRESENTING', 'US'] +5142-36377-0009-1540: hyp=['PHILIP', 'FRANK', 'THIS', 'IS', 'MY', 'OVERLOOKER', 'MISTER', 'JAAGO', 'SAID', 'THE', 'OLD', 'MAN', 'FORMALLY', 'PRESENTING', 'US'] +5142-36377-0010-1541: ref=['HE', 'IS', 'NOT', 'WELL', 'HE', 'HAS', 'COME', 'OVER', 'THE', 'OCEAN', 'FOR', 'REST', 'AND', 'CHANGE', 'OF', 'SCENE'] +5142-36377-0010-1541: hyp=['HE', 'IS', 'NOT', 'WELL', 'HE', 'HAS', 'COME', 'OVER', 'THE', 'OCEAN', 'FOR', 'REST', 'AND', 'CHANGE', 'IS', 'SEEN'] +5142-36377-0011-1542: ref=['MISTER', 'JAGO', 'IS', 'AN', 'AMERICAN', 'PHILIP'] +5142-36377-0011-1542: hyp=['MISTER', 'IAGO', 'IS', 'AN', 'AMERICAN', 'PHILIP'] +5142-36377-0012-1543: ref=['MAKE', 'ACQUAINTANCE', 'WITH', 'MISTER', 'JAGO', 'SIT', 'TOGETHER'] +5142-36377-0012-1543: hyp=['MAKE', 'ACQUAINTANCE', 'WITH', 'MISS', 'CHIAGO', 'SIT', 'TOGETHER'] +5142-36377-0013-1544: ref=['THEY', 'POINTEDLY', 'DREW', 'BACK', 'FROM', 'JOHN', 'JAGO', 'AS', 'HE', 'APPROACHED', 'THE', 'EMPTY', 'CHAIR', 'NEXT', 'TO', 'ME', 'AND', 'MOVED', 'ROUND', 'TO', 'THE', 'OPPOSITE', 'SIDE', 'OF', 'THE', 'TABLE'] +5142-36377-0013-1544: hyp=['THEY', 'POINTEDLY', 'DREW', 'BACK', 'FROM', 'JOHN', 'JAGO', 'AS', 'HE', 'APPROACHED', 'THE', 'EMPTY', 'CHAIR', 'NEXT', 'TO', 'ME', 'AND', 'MOVED', 'ROUND', 'TO', 'THE', 'OPPOSITE', 'SIDE', 'OF', 'THE', 'TABLE'] +5142-36377-0014-1545: ref=['A', 'PRETTY', 'GIRL', 'AND', 'SO', 'FAR', 'AS', 'I', 'COULD', 'JUDGE', 'BY', 'APPEARANCES', 'A', 'GOOD', 'GIRL', 'TOO', 'DESCRIBING', 'HER', 'GENERALLY', 'I', 'MAY', 'SAY', 'THAT', 'SHE', 'HAD', 'A', 'SMALL', 'HEAD', 'WELL', 'CARRIED', 'AND', 'WELL', 'SET', 'ON', 'HER', 'SHOULDERS', 'BRIGHT', 'GRAY', 'EYES', 'THAT', 'LOOKED', 'AT', 'YOU', 'HONESTLY', 'AND', 'MEANT', 'WHAT', 'THEY', 'LOOKED', 'A', 'TRIM', 'SLIGHT', 'LITTLE', 'FIGURE', 'TOO', 'SLIGHT', 'FOR', 'OUR', 'ENGLISH', 'NOTIONS', 'OF', 'BEAUTY', 'A', 'STRONG', 'AMERICAN', 'ACCENT', 'AND', 'A', 'RARE', 'THING', 'IN', 'AMERICA', 'A', 'PLEASANTLY', 'TONED', 'VOICE', 'WHICH', 'MADE', 'THE', 'ACCENT', 'AGREEABLE', 'TO', 'ENGLISH', 'EARS'] +5142-36377-0014-1545: hyp=['A', 'PRETTY', 'GIRL', 'AND', 'SO', 'FAR', 'AS', 'I', 'COULD', 'JUDGE', 'BY', 'APPEARANCES', 'A', 'GOOD', 'GIRL', 'TOO', 'DESCRIBING', 'HER', 'GENERALLY', 'I', 'MAY', 'SAY', 'THAT', 'SHE', 'HAD', 'A', 'SMALL', 'HEAD', 'WELL', 'CARRIED', 'AND', 'WELL', 'SET', 'ON', 'HER', 'SHOULDERS', 'BRIGHT', 'GRAY', 'EYES', 'THAT', 'LOOKED', 'AT', 'YOU', 'HONESTLY', 'AND', 'MEANT', 'WHAT', 'THEY', 'LOOKED', 'A', 'TRIM', 'SLIGHT', 'LITTLE', 'FIGURE', 'TOO', 'SLIGHT', 'FOR', 'OUR', 'ENGLISH', 'NOTIONS', 'OF', 'BEAUTY', 'A', 'STRONG', 'AMERICAN', 'ACCENT', 'AND', 'A', 'RARE', 'THING', 'IN', 'AMERICA', 'A', 'PLEASANTLY', 'TONED', 'VOICE', 'WHICH', 'MADE', 'THE', 'ACCENT', 'AGREEABLE', 'TO', 'ENGLISH', 'EARS'] +5142-36377-0015-1546: ref=['OUR', 'FIRST', 'IMPRESSIONS', 'OF', 'PEOPLE', 'ARE', 'IN', 'NINE', 'CASES', 'OUT', 'OF', 'TEN', 'THE', 'RIGHT', 'IMPRESSIONS'] +5142-36377-0015-1546: hyp=['OUR', 'FIRST', 'IMPRESSIONS', 'OF', 'PEOPLE', 'ARE', 'IN', 'NINE', 'CASES', 'OUT', 'OF', 'TEN', 'THE', 'RIGHT', 'IMPRESSIONS'] +5142-36377-0016-1547: ref=['FOR', 'ONCE', 'IN', 'A', 'WAY', 'I', 'PROVED', 'A', 'TRUE', 'PROPHET'] +5142-36377-0016-1547: hyp=['FOR', 'ONCE', 'IN', 'A', 'WAY', 'I', 'PROVED', 'A', 'TRUE', 'PROPHET'] +5142-36377-0017-1548: ref=['THE', 'ONLY', 'CHEERFUL', 'CONVERSATION', 'WAS', 'THE', 'CONVERSATION', 'ACROSS', 'THE', 'TABLE', 'BETWEEN', 'NAOMI', 'AND', 'ME'] +5142-36377-0017-1548: hyp=['THE', 'ONLY', 'CHEERFUL', 'CONVERSATION', 'WAS', 'THE', 'CONVERSATION', 'ACROSS', 'THE', 'TABLE', 'BETWEEN', 'NAOMI', 'AND', 'ME'] +5142-36377-0018-1549: ref=['HE', 'LOOKED', 'UP', 'AT', 'NAOMI', 'DOUBTINGLY', 'FROM', 'HIS', 'PLATE', 'AND', 'LOOKED', 'DOWN', 'AGAIN', 'SLOWLY', 'WITH', 'A', 'FROWN'] +5142-36377-0018-1549: hyp=['HE', 'LOOKED', 'UP', 'AT', 'NAROWMY', 'DOUBTINGLY', 'FROM', 'HIS', 'PLATE', 'AND', 'LOOKED', 'DOWN', 'AGAIN', 'SLOWLY', 'WITH', 'A', 'FROWN'] +5142-36377-0019-1550: ref=['WHEN', 'I', 'ADDRESSED', 'HIM', 'HE', 'ANSWERED', 'CONSTRAINEDLY'] +5142-36377-0019-1550: hyp=['WHEN', 'I', 'ADDRESSED', 'HIM', 'HE', 'ANSWERED', 'CONSTRAINEDLY'] +5142-36377-0020-1551: ref=['A', 'MORE', 'DREARY', 'AND', 'MORE', 'DISUNITED', 'FAMILY', 'PARTY', 'I', 'NEVER', 'SAT', 'AT', 'THE', 'TABLE', 'WITH'] +5142-36377-0020-1551: hyp=['A', 'MORE', 'DREARY', 'AND', 'MORE', 'DISUNITED', 'FAMILY', 'PARTY', 'I', 'NEVER', 'SAT', 'AT', 'TABLE', 'WITH'] +5142-36377-0021-1552: ref=['ENVY', 'HATRED', 'MALICE', 'AND', 'UNCHARITABLENESS', 'ARE', 'NEVER', 'SO', 'ESSENTIALLY', 'DETESTABLE', 'TO', 'MY', 'MIND', 'AS', 'WHEN', 'THEY', 'ARE', 'ANIMATED', 'BY', 'A', 'SENSE', 'OF', 'PROPRIETY', 'AND', 'WORK', 'UNDER', 'THE', 'SURFACE', 'BUT', 'FOR', 'MY', 'INTEREST', 'IN', 'NAOMI', 'AND', 'MY', 'OTHER', 'INTEREST', 'IN', 'THE', 'LITTLE', 'LOVE', 'LOOKS', 'WHICH', 'I', 'NOW', 'AND', 'THEN', 'SURPRISED', 'PASSING', 'BETWEEN', 'HER', 'AND', 'AMBROSE', 'I', 'SHOULD', 'NEVER', 'HAVE', 'SAT', 'THROUGH', 'THAT', 'SUPPER'] +5142-36377-0021-1552: hyp=['ENVY', 'HATRED', 'MALICE', 'AND', 'UNCHARITABLENESS', 'ARE', 'NEVER', 'SO', 'ESSENTIALLY', 'DETESTABLE', 'TO', 'MY', 'MIND', 'AS', 'WHEN', 'THEY', 'ARE', 'ANIMATED', 'BY', 'THE', 'SENSE', 'OF', 'PROPRIETY', 'AND', 'WORK', 'UNDER', 'THE', 'SURFACE', 'BUT', 'FOR', 'MY', 'INTEREST', 'IN', 'NAOMI', 'AND', 'MY', 'OTHER', 'INTEREST', 'IN', 'THE', 'LITTLE', 'LOVE', 'LOOKS', 'WHICH', 'I', 'NOW', 'AND', 'THEN', 'SURPRISED', 'PASSING', 'BETWEEN', 'HER', 'AND', 'AMBROSE', 'I', 'SHOULD', 'NEVER', 'HAVE', 'SAT', 'THROUGH', 'THAT', 'SUPPER'] +5142-36377-0022-1553: ref=['I', 'WISH', 'YOU', 'GOOD', 'NIGHT', 'SHE', 'LAID', 'HER', 'BONY', 'HANDS', 'ON', 'THE', 'BACK', 'OF', 'MISTER', "MEADOWCROFT'S", 'INVALID', 'CHAIR', 'CUT', 'HIM', 'SHORT', 'IN', 'HIS', 'FAREWELL', 'SALUTATION', 'TO', 'ME', 'AND', 'WHEELED', 'HIM', 'OUT', 'TO', 'HIS', 'BED', 'AS', 'IF', 'SHE', 'WERE', 'WHEELING', 'HIM', 'OUT', 'TO', 'HIS', 'GRAVE'] +5142-36377-0022-1553: hyp=['I', 'WISH', 'YOU', 'GOOD', 'NIGHT', 'SHE', 'LAID', 'HER', 'BONY', 'HANDS', 'ON', 'THE', 'BACK', 'OF', 'MISTER', "MEDICROFT'S", 'INVALID', 'CHAIR', 'CUT', 'HIM', 'SHORT', 'IN', 'HIS', 'FAREWELL', 'SALUTATION', 'TO', 'ME', 'AND', 'WHEELED', 'HIM', 'OUT', 'TO', 'HIS', 'BED', 'AS', 'IF', 'SHE', 'WERE', 'WHEELING', 'HIM', 'OUT', 'TO', 'HIS', 'GRAVE'] +5142-36377-0023-1554: ref=['YOU', 'WERE', 'QUITE', 'RIGHT', 'TO', 'SAY', 'NO', 'AMBROSE', 'BEGAN', 'NEVER', 'SMOKE', 'WITH', 'JOHN', 'JAGO', 'HIS', 'CIGARS', 'WILL', 'POISON', 'YOU'] +5142-36377-0023-1554: hyp=['YOU', 'WERE', 'QUITE', 'RIGHT', 'TO', 'SAY', 'NO', 'AMBROSE', 'BEGAN', 'NEVER', 'SMOKE', 'WITH', 'JOHN', 'IAGO', 'HIS', 'CIGARS', 'WILL', 'POISON', 'YOU'] +5142-36377-0024-1555: ref=['NAOMI', 'SHOOK', 'HER', 'FOREFINGER', 'REPROACHFULLY', 'AT', 'THEM', 'AS', 'IF', 'THE', 'TWO', 'STURDY', 'YOUNG', 'FARMERS', 'HAD', 'BEEN', 'TWO', 'CHILDREN'] +5142-36377-0024-1555: hyp=['THEY', 'ONLY', 'SHOOK', 'HER', 'FOREFINGER', 'REPROACHFULLY', 'AT', 'THEM', 'AS', 'IF', 'THE', 'TWO', 'STURDY', 'YOUNG', 'FARMERS', 'HAD', 'BEEN', 'TWO', 'CHILDREN'] +5142-36377-0025-1556: ref=['SILAS', 'SLUNK', 'AWAY', 'WITHOUT', 'A', 'WORD', 'OF', 'PROTEST', 'AMBROSE', 'STOOD', 'HIS', 'GROUND', 'EVIDENTLY', 'BENT', 'ON', 'MAKING', 'HIS', 'PEACE', 'WITH', 'NAOMI', 'BEFORE', 'HE', 'LEFT', 'HER', 'SEEING', 'THAT', 'I', 'WAS', 'IN', 'THE', 'WAY', 'I', 'WALKED', 'ASIDE', 'TOWARD', 'A', 'GLASS', 'DOOR', 'AT', 'THE', 'LOWER', 'END', 'OF', 'THE', 'ROOM'] +5142-36377-0025-1556: hyp=['SILAS', 'SLUNK', 'AWAY', 'WITHOUT', 'A', 'WORD', 'OF', 'PROTEST', 'AMBROSE', 'STOOD', 'HIS', 'GROUND', 'EVIDENTLY', 'BENT', 'ON', 'MAKING', 'HIS', 'PEACE', 'WITH', 'NAOMI', 'BEFORE', 'HE', 'LEFT', 'HER', 'SEEING', 'THAT', 'I', 'WAS', 'IN', 'THE', 'WAY', 'I', 'WALKED', 'ASIDE', 'TOWARD', 'A', 'GLASS', 'DOOR', 'AT', 'THE', 'LOWER', 'END', 'OF', 'THE', 'ROOM'] +5142-36586-0000-1557: ref=['IT', 'IS', 'MANIFEST', 'THAT', 'MAN', 'IS', 'NOW', 'SUBJECT', 'TO', 'MUCH', 'VARIABILITY'] +5142-36586-0000-1557: hyp=['IT', 'IS', 'MANIFEST', 'THAT', 'MAN', 'IS', 'NOW', 'SUBJECT', 'TO', 'MUCH', 'VARIABILITY'] +5142-36586-0001-1558: ref=['SO', 'IT', 'IS', 'WITH', 'THE', 'LOWER', 'ANIMALS'] +5142-36586-0001-1558: hyp=['SO', 'IT', 'IS', 'WITH', 'THE', 'LOWER', 'ANIMALS'] +5142-36586-0002-1559: ref=['THE', 'VARIABILITY', 'OF', 'MULTIPLE', 'PARTS'] +5142-36586-0002-1559: hyp=['THE', 'VARIABILITY', 'OF', 'MULTIPLE', 'PARTS'] +5142-36586-0003-1560: ref=['BUT', 'THIS', 'SUBJECT', 'WILL', 'BE', 'MORE', 'PROPERLY', 'DISCUSSED', 'WHEN', 'WE', 'TREAT', 'OF', 'THE', 'DIFFERENT', 'RACES', 'OF', 'MANKIND'] +5142-36586-0003-1560: hyp=['BUT', 'THIS', 'SUBJECT', 'WILL', 'BE', 'MORE', 'PROPERLY', 'DISCUSSED', 'WHEN', 'WE', 'TREAT', 'OF', 'THE', 'DIFFERENT', 'RACES', 'OF', 'MANKIND'] +5142-36586-0004-1561: ref=['EFFECTS', 'OF', 'THE', 'INCREASED', 'USE', 'AND', 'DISUSE', 'OF', 'PARTS'] +5142-36586-0004-1561: hyp=['EFFECTS', 'OF', 'THE', 'INCREASED', 'USE', 'AND', 'DISUSE', 'OF', 'PARTS'] +5142-36600-0000-1562: ref=['CHAPTER', 'SEVEN', 'ON', 'THE', 'RACES', 'OF', 'MAN'] +5142-36600-0000-1562: hyp=['CHAPTER', 'SEVEN', 'ON', 'THE', 'RACES', 'OF', 'MAN'] +5142-36600-0001-1563: ref=['IN', 'DETERMINING', 'WHETHER', 'TWO', 'OR', 'MORE', 'ALLIED', 'FORMS', 'OUGHT', 'TO', 'BE', 'RANKED', 'AS', 'SPECIES', 'OR', 'VARIETIES', 'NATURALISTS', 'ARE', 'PRACTICALLY', 'GUIDED', 'BY', 'THE', 'FOLLOWING', 'CONSIDERATIONS', 'NAMELY', 'THE', 'AMOUNT', 'OF', 'DIFFERENCE', 'BETWEEN', 'THEM', 'AND', 'WHETHER', 'SUCH', 'DIFFERENCES', 'RELATE', 'TO', 'FEW', 'OR', 'MANY', 'POINTS', 'OF', 'STRUCTURE', 'AND', 'WHETHER', 'THEY', 'ARE', 'OF', 'PHYSIOLOGICAL', 'IMPORTANCE', 'BUT', 'MORE', 'ESPECIALLY', 'WHETHER', 'THEY', 'ARE', 'CONSTANT'] +5142-36600-0001-1563: hyp=['IN', 'DETERMINING', 'WHETHER', 'TWO', 'OR', 'MORE', 'ALLIED', 'FORMS', 'OUGHT', 'TO', 'BE', 'RANKED', 'AS', 'SPECIES', 'OR', 'VARIETIES', 'NATURALISTS', 'ARE', 'PRACTICALLY', 'GUIDED', 'BY', 'THE', 'FOLLOWING', 'CONSIDERATIONS', 'NAMELY', 'THE', 'AMOUNT', 'OF', 'DIFFERENCE', 'BETWEEN', 'THEM', 'AND', 'WHETHER', 'SUCH', 'DIFFERENCES', 'RELATE', 'TO', 'FEW', 'OR', 'MANY', 'POINTS', 'OF', 'STRUCTURE', 'AND', 'WHETHER', 'THEY', 'ARE', 'OF', 'PHYSIOLOGICAL', 'IMPORTANCE', 'BUT', 'MORE', 'ESPECIALLY', 'WHETHER', 'THEY', 'ARE', 'CONSTANT'] +5639-40744-0000-1564: ref=['ELEVEN', "O'CLOCK", 'HAD', 'STRUCK', 'IT', 'WAS', 'A', 'FINE', 'CLEAR', 'NIGHT', 'THEY', 'WERE', 'THE', 'ONLY', 'PERSONS', 'ON', 'THE', 'ROAD', 'AND', 'THEY', 'SAUNTERED', 'LEISURELY', 'ALONG', 'TO', 'AVOID', 'PAYING', 'THE', 'PRICE', 'OF', 'FATIGUE', 'FOR', 'THE', 'RECREATION', 'PROVIDED', 'FOR', 'THE', 'TOLEDANS', 'IN', 'THEIR', 'VALLEY', 'OR', 'ON', 'THE', 'BANKS', 'OF', 'THEIR', 'RIVER'] +5639-40744-0000-1564: hyp=['ELEVEN', "O'CLOCK", 'HAD', 'STRUCK', 'IT', 'WAS', 'A', 'FINE', 'CLEAR', 'NIGHT', 'THEY', 'WERE', 'THE', 'ONLY', 'PERSONS', 'ON', 'THE', 'ROAD', 'AND', 'THEY', 'SAUNTERED', 'LEISURELY', 'ALONG', 'TO', 'AVOID', 'PAYING', 'THE', 'PRICE', 'OF', 'FATIGUE', 'FOR', 'THE', 'RECREATION', 'PROVIDED', 'FOR', 'THE', 'TOLEDANS', 'IN', 'THE', 'VALLEY', 'OR', 'ON', 'THE', 'BANKS', 'OF', 'THE', 'RIVER'] +5639-40744-0001-1565: ref=['SECURE', 'AS', 'HE', 'THOUGHT', 'IN', 'THE', 'CAREFUL', 'ADMINISTRATION', 'OF', 'JUSTICE', 'IN', 'THAT', 'CITY', 'AND', 'THE', 'CHARACTER', 'OF', 'ITS', 'WELL', 'DISPOSED', 'INHABITANTS', 'THE', 'GOOD', 'HIDALGO', 'WAS', 'FAR', 'FROM', 'THINKING', 'THAT', 'ANY', 'DISASTER', 'COULD', 'BEFAL', 'HIS', 'FAMILY'] +5639-40744-0001-1565: hyp=['SECURE', 'AS', 'HE', 'THOUGHT', 'IN', 'THE', 'CAREFUL', 'ADMINISTRATION', 'OF', 'JUSTICE', 'IN', 'THAT', 'CITY', 'AND', 'THE', 'CHARACTER', 'OF', 'ITS', 'WELL', 'DISPOSED', 'INHABITANTS', 'THE', 'GOOD', 'HEDALGO', 'WAS', 'FAR', 'FROM', 'THINKING', 'THAT', 'ANY', 'DISASTER', 'COULD', 'BEFALL', 'HIS', 'FAMILY'] +5639-40744-0002-1566: ref=['RODOLFO', 'AND', 'HIS', 'COMPANIONS', 'WITH', 'THEIR', 'FACES', 'MUFFLED', 'IN', 'THEIR', 'CLOAKS', 'STARED', 'RUDELY', 'AND', 'INSOLENTLY', 'AT', 'THE', 'MOTHER', 'THE', 'DAUGHTER', 'AND', 'THE', 'SERVANT', 'MAID'] +5639-40744-0002-1566: hyp=['RUDOLPHO', 'AND', 'HIS', 'COMPANIONS', 'WITH', 'THEIR', 'FACES', 'MUFFLED', 'IN', 'THEIR', 'CLOAKS', 'STARED', 'RUDELY', 'AND', 'INSOLENTLY', 'AT', 'THE', 'MOTHER', 'THE', 'DAUGHTER', 'AND', 'THE', 'SERVANT', 'MAID'] +5639-40744-0003-1567: ref=['IN', 'A', 'MOMENT', 'HE', 'COMMUNICATED', 'HIS', 'THOUGHTS', 'TO', 'HIS', 'COMPANIONS', 'AND', 'IN', 'THE', 'NEXT', 'MOMENT', 'THEY', 'RESOLVED', 'TO', 'TURN', 'BACK', 'AND', 'CARRY', 'HER', 'OFF', 'TO', 'PLEASE', 'RODOLFO', 'FOR', 'THE', 'RICH', 'WHO', 'ARE', 'OPEN', 'HANDED', 'ALWAYS', 'FIND', 'PARASITES', 'READY', 'TO', 'ENCOURAGE', 'THEIR', 'BAD', 'PROPENSITIES', 'AND', 'THUS', 'TO', 'CONCEIVE', 'THIS', 'WICKED', 'DESIGN', 'TO', 'COMMUNICATE', 'IT', 'APPROVE', 'IT', 'RESOLVE', 'ON', 'RAVISHING', 'LEOCADIA', 'AND', 'TO', 'CARRY', 'THAT', 'DESIGN', 'INTO', 'EFFECT', 'WAS', 'THE', 'WORK', 'OF', 'A', 'MOMENT'] +5639-40744-0003-1567: hyp=['IN', 'A', 'MOMENT', 'HE', 'COMMUNICATED', 'HIS', 'THOUGHTS', 'TO', 'HIS', 'COMPANIONS', 'AND', 'IN', 'THE', 'NEXT', 'MOMENT', 'THEY', 'RESOLVED', 'TO', 'TURN', 'BACK', 'AND', 'CARRY', 'HER', 'OFF', 'TO', 'PLEASE', 'RUDOLPHO', 'FOR', 'THE', 'RICH', 'WHO', 'ARE', 'OPEN', 'HANDED', 'ALWAYS', 'FIND', 'PARRICIDE', 'READY', 'TO', 'ENCOURAGE', 'THEIR', 'BAD', 'PROPENSITIES', 'AND', 'THUS', 'TO', 'CONCEIVE', 'THIS', 'WICKED', 'DESIGN', 'TO', 'COMMUNICATE', 'IT', 'APPROVE', 'IT', 'RESOLVE', 'ON', 'RAVISHING', 'THE', 'ARCADIA', 'AND', 'TO', 'CARRY', 'THAT', 'DESIGN', 'INTO', 'EFFECT', 'WAS', 'THE', 'WORK', 'OF', 'A', 'MOMENT'] +5639-40744-0004-1568: ref=['THEY', 'DREW', 'THEIR', 'SWORDS', 'HID', 'THEIR', 'FACES', 'IN', 'THE', 'FLAPS', 'OF', 'THEIR', 'CLOAKS', 'TURNED', 'BACK', 'AND', 'SOON', 'CAME', 'IN', 'FRONT', 'OF', 'THE', 'LITTLE', 'PARTY', 'WHO', 'HAD', 'NOT', 'YET', 'DONE', 'GIVING', 'THANKS', 'TO', 'GOD', 'FOR', 'THEIR', 'ESCAPE', 'FROM', 'THOSE', 'AUDACIOUS', 'MEN'] +5639-40744-0004-1568: hyp=['THEY', 'DREW', 'THEIR', 'SWORDS', 'HID', 'THEIR', 'FACES', 'IN', 'THE', 'FLAPS', 'OF', 'THEIR', 'CLOAKS', 'TURNED', 'BACK', 'AND', 'SOON', 'CAME', 'IN', 'FRONT', 'OF', 'THE', 'LITTLE', 'PARTY', 'WHO', 'HAD', 'NOT', 'YET', 'DONE', 'GIVING', 'THANKS', 'TO', 'GOD', 'FOR', 'THEIR', 'ESCAPE', 'FROM', 'THOSE', 'AUDACIOUS', 'MEN'] +5639-40744-0005-1569: ref=['FINALLY', 'THE', 'ONE', 'PARTY', 'WENT', 'OFF', 'EXULTING', 'AND', 'THE', 'OTHER', 'WAS', 'LEFT', 'IN', 'DESOLATION', 'AND', 'WOE'] +5639-40744-0005-1569: hyp=['FINALLY', 'THE', 'ONE', 'PARTY', 'WENT', 'OFF', 'EXULTING', 'AND', 'THE', 'OTHER', 'WAS', 'LEFT', 'IN', 'DESOLATION', 'AND', 'WOE'] +5639-40744-0006-1570: ref=['RODOLFO', 'ARRIVED', 'AT', 'HIS', 'OWN', 'HOUSE', 'WITHOUT', 'ANY', 'IMPEDIMENT', 'AND', "LEOCADIA'S", 'PARENTS', 'REACHED', 'THEIRS', 'HEART', 'BROKEN', 'AND', 'DESPAIRING'] +5639-40744-0006-1570: hyp=['UDOLPHO', 'ARRIVED', 'AT', 'HIS', 'OWN', 'HOUSE', 'WITHOUT', 'ANY', 'IMPEDIMENT', "ANDUCADIA'S", 'PARENTS', 'REACHED', 'THEIRS', 'HEARTBROKEN', 'AND', 'DESPAIRING'] +5639-40744-0007-1571: ref=['MEANWHILE', 'RODOLFO', 'HAD', 'LEOCADIA', 'SAFE', 'IN', 'HIS', 'CUSTODY', 'AND', 'IN', 'HIS', 'OWN', 'APARTMENT'] +5639-40744-0007-1571: hyp=['MEANWHILE', 'RUDOLPHO', 'HAD', 'LOCALIA', 'SAFE', 'IN', 'HIS', 'CUSTODY', 'AND', 'IN', 'HIS', 'OWN', 'APARTMENT'] +5639-40744-0008-1572: ref=['WHO', 'TOUCHES', 'ME', 'AM', 'I', 'IN', 'BED'] +5639-40744-0008-1572: hyp=['WHO', 'TOUCHES', 'ME', 'AM', 'I', 'IN', 'BED'] +5639-40744-0009-1573: ref=['MOTHER', 'DEAR', 'FATHER', 'DO', 'YOU', 'HEAR', 'ME'] +5639-40744-0009-1573: hyp=['MOTHER', 'DEAR', 'FATHER', 'DO', 'YOU', 'HEAR', 'ME'] +5639-40744-0010-1574: ref=['IT', 'IS', 'THE', 'ONLY', 'AMENDS', 'I', 'ASK', 'OF', 'YOU', 'FOR', 'THE', 'WRONG', 'YOU', 'HAVE', 'DONE', 'ME'] +5639-40744-0010-1574: hyp=['IT', 'IS', 'THE', 'ONLY', 'AMENDS', 'I', 'ASK', 'OF', 'YOU', 'FOR', 'THE', 'WRONG', 'YOU', 'HAVE', 'DONE', 'ME'] +5639-40744-0011-1575: ref=['SHE', 'FOUND', 'THE', 'DOOR', 'BUT', 'IT', 'WAS', 'LOCKED', 'OUTSIDE'] +5639-40744-0011-1575: hyp=['SHE', 'FOUND', 'THE', 'DOOR', 'BUT', 'IT', 'WAS', 'LOCKED', 'OUTSIDE'] +5639-40744-0012-1576: ref=['SHE', 'SUCCEEDED', 'IN', 'OPENING', 'THE', 'WINDOW', 'AND', 'THE', 'MOONLIGHT', 'SHONE', 'IN', 'SO', 'BRIGHTLY', 'THAT', 'SHE', 'COULD', 'DISTINGUISH', 'THE', 'COLOUR', 'OF', 'SOME', 'DAMASK', 'HANGINGS', 'IN', 'THE', 'ROOM'] +5639-40744-0012-1576: hyp=['SHE', 'SUCCEEDED', 'IN', 'OPENING', 'THE', 'WINDOW', 'AND', 'THE', 'MOONLIGHT', 'SHONE', 'IN', 'SO', 'BRIGHTLY', 'THAT', 'SHE', 'COULD', 'DISTINGUISH', 'THE', 'COLOR', 'OF', 'SOME', 'DAMASK', 'HANGING', 'IN', 'THE', 'ROOM'] +5639-40744-0013-1577: ref=['SHE', 'SAW', 'THAT', 'THE', 'BED', 'WAS', 'GILDED', 'AND', 'SO', 'RICH', 'THAT', 'IT', 'SEEMED', 'THAT', 'OF', 'A', 'PRINCE', 'RATHER', 'THAN', 'OF', 'A', 'PRIVATE', 'GENTLEMAN'] +5639-40744-0013-1577: hyp=['SHE', 'SAW', 'THAT', 'THE', 'BED', 'WAS', 'GILDED', 'AND', 'SO', 'RICH', 'THAT', 'IT', 'SEEMED', 'THAT', 'OF', 'A', 'PRINCE', 'RATHER', 'THAN', 'OF', 'A', 'PRIVATE', 'GENTLEMAN'] +5639-40744-0014-1578: ref=['AMONG', 'OTHER', 'THINGS', 'ON', 'WHICH', 'SHE', 'CAST', 'HER', 'EYES', 'WAS', 'A', 'SMALL', 'CRUCIFIX', 'OF', 'SOLID', 'SILVER', 'STANDING', 'ON', 'A', 'CABINET', 'NEAR', 'THE', 'WINDOW'] +5639-40744-0014-1578: hyp=['AMONG', 'OTHER', 'THINGS', 'ON', 'WHICH', 'SHE', 'CAST', 'HER', 'EYES', 'WAS', 'A', 'SMALL', 'CRUCIFIX', 'OF', 'SOLID', 'SILVER', 'STANDING', 'ON', 'A', 'CABINET', 'NEAR', 'THE', 'WINDOW'] +5639-40744-0015-1579: ref=['THIS', 'PERSON', 'WAS', 'RODOLFO', 'WHO', 'THOUGH', 'HE', 'HAD', 'GONE', 'TO', 'LOOK', 'FOR', 'HIS', 'FRIENDS', 'HAD', 'CHANGED', 'HIS', 'MIND', 'IN', 'THAT', 'RESPECT', 'NOT', 'THINKING', 'IT', 'ADVISABLE', 'TO', 'ACQUAINT', 'THEM', 'WITH', 'WHAT', 'HAD', 'PASSED', 'BETWEEN', 'HIM', 'AND', 'THE', 'GIRL'] +5639-40744-0015-1579: hyp=['THIS', 'PERSON', 'WAS', 'RUDOLPHO', 'WHO', 'THOUGH', 'HE', 'HAD', 'GONE', 'TO', 'LOOK', 'FOR', 'HIS', 'FRIENDS', 'HAD', 'CHANGED', 'HIS', 'MIND', 'IN', 'THAT', 'RESPECT', 'NOT', 'THINKING', 'IT', 'ADVISABLE', 'TO', 'ACQUAINT', 'THEM', 'WITH', 'WHAT', 'HAD', 'PASSED', 'BETWEEN', 'HIM', 'AND', 'THE', 'GIRL'] +5639-40744-0016-1580: ref=['ON', 'THE', 'CONTRARY', 'HE', 'RESOLVED', 'TO', 'TELL', 'THEM', 'THAT', 'REPENTING', 'OF', 'HIS', 'VIOLENCE', 'AND', 'MOVED', 'BY', 'HER', 'TEARS', 'HE', 'HAD', 'ONLY', 'CARRIED', 'HER', 'HALF', 'WAY', 'TOWARDS', 'HIS', 'HOUSE', 'AND', 'THEN', 'LET', 'HER', 'GO'] +5639-40744-0016-1580: hyp=['ON', 'THE', 'CONTRARY', 'HE', 'RESOLVED', 'TO', 'TELL', 'THEM', 'THAT', 'REPENTING', 'OF', 'HIS', 'VIOLENCE', 'AND', 'MOVED', 'BY', 'HER', 'TEARS', 'HE', 'HAD', 'ONLY', 'CARRIED', 'HER', 'HALF', 'WAY', 'TOWARDS', 'HIS', 'HOUSE', 'AND', 'THEN', 'LET', 'HER', 'GO'] +5639-40744-0017-1581: ref=['CHOKING', 'WITH', 'EMOTION', 'LEOCADI', 'MADE', 'A', 'SIGN', 'TO', 'HER', 'PARENTS', 'THAT', 'SHE', 'WISHED', 'TO', 'BE', 'ALONE', 'WITH', 'THEM'] +5639-40744-0017-1581: hyp=['CHOKING', 'WITH', 'EMOTION', 'THE', 'ARCADIA', 'MADE', 'A', 'SIGN', 'TO', 'HER', 'PARENTS', 'THAT', 'SHE', 'WISHED', 'TO', 'BE', 'ALONE', 'WITH', 'THEM'] +5639-40744-0018-1582: ref=['THAT', 'WOULD', 'BE', 'VERY', 'WELL', 'MY', 'CHILD', 'REPLIED', 'HER', 'FATHER', 'IF', 'YOUR', 'PLAN', 'WERE', 'NOT', 'LIABLE', 'TO', 'BE', 'FRUSTRATED', 'BY', 'ORDINARY', 'CUNNING', 'BUT', 'NO', 'DOUBT', 'THIS', 'IMAGE', 'HAS', 'BEEN', 'ALREADY', 'MISSED', 'BY', 'ITS', 'OWNER', 'AND', 'HE', 'WILL', 'HAVE', 'SET', 'IT', 'DOWN', 'FOR', 'CERTAIN', 'THAT', 'IT', 'WAS', 'TAKEN', 'OUT', 'OF', 'THE', 'ROOM', 'BY', 'THE', 'PERSON', 'HE', 'LOCKED', 'UP', 'THERE'] +5639-40744-0018-1582: hyp=['THAT', 'WOULD', 'BE', 'VERY', 'WELL', 'MY', 'CHILD', 'REPLIED', 'HER', 'FATHER', 'IF', 'YOUR', 'PLAN', 'WERE', 'NOT', 'LIABLE', 'TO', 'BE', 'FRUSTRATED', 'BY', 'ORDINARY', 'CUNNING', 'BUT', 'NO', 'DOUBT', 'THIS', 'IMAGE', 'HAD', 'BEEN', 'ALREADY', 'MISSED', 'BY', 'ITS', 'OWNER', 'AND', 'HE', 'WILL', 'HAVE', 'SET', 'IT', 'DOWN', 'FOR', 'CERTAIN', 'THAT', 'IT', 'WAS', 'TAKEN', 'OUT', 'OF', 'THE', 'ROOM', 'BY', 'THE', 'PERSON', 'HE', 'LOCKED', 'UP', 'THERE'] +5639-40744-0019-1583: ref=['WHAT', 'YOU', 'HAD', 'BEST', 'DO', 'MY', 'CHILD', 'IS', 'TO', 'KEEP', 'IT', 'AND', 'PRAY', 'TO', 'IT', 'THAT', 'SINCE', 'IT', 'WAS', 'A', 'WITNESS', 'TO', 'YOUR', 'UNDOING', 'IT', 'WILL', 'DEIGN', 'TO', 'VINDICATE', 'YOUR', 'CAUSE', 'BY', 'ITS', 'RIGHTEOUS', 'JUDGMENT'] +5639-40744-0019-1583: hyp=['WHAT', 'YOU', 'HAD', 'BEST', 'DO', 'MY', 'CHILD', 'IS', 'TO', 'KEEP', 'IT', 'AND', 'PRAY', 'TO', 'IT', 'THAT', 'SINCE', 'IT', 'WAS', 'A', 'WITNESS', 'TO', 'YOUR', 'UNDOING', 'IT', 'WILL', 'DEIGN', 'TO', 'VINDICATE', 'YOUR', 'CAUSE', 'BY', 'ITS', 'RIGHTEOUS', 'JUDGMENT'] +5639-40744-0020-1584: ref=['THUS', 'DID', 'THIS', 'HUMANE', 'AND', 'RIGHT', 'MINDED', 'FATHER', 'COMFORT', 'HIS', 'UNHAPPY', 'DAUGHTER', 'AND', 'HER', 'MOTHER', 'EMBRACING', 'HER', 'AGAIN', 'DID', 'ALL', 'SHE', 'COULD', 'TO', 'SOOTHE', 'HER', 'FEELINGS'] +5639-40744-0020-1584: hyp=['THUS', 'DID', 'THE', 'HUMANE', 'AND', 'RIGHT', 'MINDED', 'FATHER', 'COMFORT', 'HIS', 'UNHAPPY', 'DAUGHTER', 'AND', 'HER', 'MOTHER', 'EMBRACING', 'HER', 'AGAIN', 'DID', 'ALL', 'SHE', 'COULD', 'TO', 'SOOTHE', 'THE', 'FEELINGS'] +5639-40744-0021-1585: ref=['SHE', 'MEANWHILE', 'PASSED', 'HER', 'LIFE', 'WITH', 'HER', 'PARENTS', 'IN', 'THE', 'STRICTEST', 'RETIREMENT', 'NEVER', 'LETTING', 'HERSELF', 'BE', 'SEEN', 'BUT', 'SHUNNING', 'EVERY', 'EYE', 'LEST', 'IT', 'SHOULD', 'READ', 'HER', 'MISFORTUNE', 'IN', 'HER', 'FACE'] +5639-40744-0021-1585: hyp=['SHE', 'MEANWHILE', 'PASSED', 'HER', 'LIFE', 'WITH', 'HER', 'PARENTS', 'IN', 'THE', 'STRICTEST', 'RETIREMENT', 'NEVER', 'LETTING', 'HERSELF', 'BE', 'SEEN', 'BUT', 'SHUNNING', 'EVERY', 'EYE', 'LEST', 'IT', 'SHOULD', 'READ', 'HER', 'MISFORTUNE', 'IN', 'HER', 'FACE'] +5639-40744-0022-1586: ref=['TIME', 'ROLLED', 'ON', 'THE', 'HOUR', 'OF', 'HER', 'DELIVERY', 'ARRIVED', 'IT', 'TOOK', 'PLACE', 'IN', 'THE', 'UTMOST', 'SECRECY', 'HER', 'MOTHER', 'TAKING', 'UPON', 'HER', 'THE', 'OFFICE', 'OF', 'MIDWIFE', 'AND', 'SHE', 'GAVE', 'BIRTH', 'TO', 'A', 'SON', 'ONE', 'OF', 'THE', 'MOST', 'BEAUTIFUL', 'EVER', 'SEEN'] +5639-40744-0022-1586: hyp=['TIME', 'ROLLED', 'ON', 'THE', 'HOUR', 'OF', 'HER', 'DELIVERY', 'ARRIVED', 'IT', 'TOOK', 'PLACE', 'IN', 'THE', 'UTMOST', 'SECRECY', 'HER', 'MOTHER', 'TAKING', 'UPON', 'HER', 'THE', 'OFFICE', 'OF', 'MIDWIFE', 'AND', 'SHE', 'GAVE', 'BIRTH', 'TO', 'A', 'SON', 'ONE', 'OF', 'THE', 'MOST', 'BEAUTIFUL', 'EVER', 'SEEN'] +5639-40744-0023-1587: ref=['WHEN', 'THE', 'BOY', 'WALKED', 'THROUGH', 'THE', 'STREETS', 'BLESSINGS', 'WERE', 'SHOWERED', 'UPON', 'HIM', 'BY', 'ALL', 'WHO', 'SAW', 'HIM', 'BLESSINGS', 'UPON', 'HIS', 'BEAUTY', 'UPON', 'THE', 'MOTHER', 'THAT', 'BORE', 'HIM', 'UPON', 'THE', 'FATHER', 'THAT', 'BEGOT', 'HIM', 'UPON', 'THOSE', 'WHO', 'BROUGHT', 'HIM', 'UP', 'SO', 'WELL'] +5639-40744-0023-1587: hyp=['WHEN', 'THE', 'BOY', 'WALKED', 'THROUGH', 'THE', 'STREETS', 'BLESSINGS', 'WERE', 'SHOWERED', 'UPON', 'HIM', 'BY', 'ALL', 'WHO', 'SAW', 'HIM', 'BLESSING', 'UPON', 'HIS', 'BEAUTY', 'UPON', 'THE', 'MOTHER', 'THAT', 'BORE', 'HIM', 'UPON', 'THE', 'FATHER', 'THAT', 'BEGOT', 'HIM', 'UPON', 'THOSE', 'WHO', 'BROUGHT', 'HIM', 'UP', 'SO', 'WELL'] +5639-40744-0024-1588: ref=['ONE', 'DAY', 'WHEN', 'THE', 'BOY', 'WAS', 'SENT', 'BY', 'HIS', 'GRANDFATHER', 'WITH', 'A', 'MESSAGE', 'TO', 'A', 'RELATION', 'HE', 'PASSED', 'ALONG', 'A', 'STREET', 'IN', 'WHICH', 'THERE', 'WAS', 'A', 'GREAT', 'CONCOURSE', 'OF', 'HORSEMEN'] +5639-40744-0024-1588: hyp=['ONE', 'DAY', 'WHEN', 'THE', 'BOY', 'WAS', 'SENT', 'BY', 'HIS', 'GRANDFATHER', 'WITH', 'A', 'MESSAGE', 'TO', 'A', 'RELATION', 'HE', 'PASSED', 'ALONG', 'A', 'STREET', 'IN', 'WHICH', 'THERE', 'WAS', 'A', 'GREAT', 'CONCOURSE', 'OF', 'HORSEMEN'] +5639-40744-0025-1589: ref=['THE', 'BED', 'SHE', 'TOO', 'WELL', 'REMEMBERED', 'WAS', 'THERE', 'AND', 'ABOVE', 'ALL', 'THE', 'CABINET', 'ON', 'WHICH', 'HAD', 'STOOD', 'THE', 'IMAGE', 'SHE', 'HAD', 'TAKEN', 'AWAY', 'WAS', 'STILL', 'ON', 'THE', 'SAME', 'SPOT'] +5639-40744-0025-1589: hyp=['THE', 'BED', 'SHE', 'TOO', 'WELL', 'REMEMBERED', 'WAS', 'THERE', 'AND', 'ABOVE', 'ALL', 'THE', 'CABINET', 'ON', 'WHICH', 'HAD', 'STOOD', 'THE', 'IMAGE', 'SHE', 'HAD', 'TAKEN', 'AWAY', 'WAS', 'STILL', 'ON', 'THE', 'SAME', 'SPOT'] +5639-40744-0026-1590: ref=['LUIS', 'WAS', 'OUT', 'OF', 'DANGER', 'IN', 'A', 'FORTNIGHT', 'IN', 'A', 'MONTH', 'HE', 'ROSE', 'FROM', 'HIS', 'BED', 'AND', 'DURING', 'ALL', 'THAT', 'TIME', 'HE', 'WAS', 'VISITED', 'DAILY', 'BY', 'HIS', 'MOTHER', 'AND', 'GRANDMOTHER', 'AND', 'TREATED', 'BY', 'THE', 'MASTER', 'AND', 'MISTRESS', 'OF', 'THE', 'HOUSE', 'AS', 'IF', 'HE', 'WAS', 'THEIR', 'OWN', 'CHILD'] +5639-40744-0026-1590: hyp=['LEWIS', 'WAS', 'OUT', 'OF', 'DANGER', 'IN', 'A', 'FORTNIGHT', 'IN', 'A', 'MONTH', 'HE', 'ROSE', 'FROM', 'HIS', 'BED', 'AND', 'DURING', 'ALL', 'THAT', 'TIME', 'HE', 'WAS', 'VISITED', 'DAILY', 'BY', 'HIS', 'MOTHER', 'AND', 'GRANDMOTHER', 'AND', 'TREATED', 'BY', 'THE', 'MASTER', 'AND', 'MISTRESS', 'OF', 'THE', 'HOUSE', 'AS', 'IF', 'HE', 'WAS', 'THEIR', 'OWN', 'CHILD'] +5639-40744-0027-1591: ref=['THUS', 'SAYING', 'AND', 'PRESSING', 'THE', 'CRUCIFIX', 'TO', 'HER', 'BREAST', 'SHE', 'FELL', 'FAINTING', 'INTO', 'THE', 'ARMS', 'OF', 'DONA', 'ESTAFANIA', 'WHO', 'AS', 'A', 'GENTLEWOMAN', 'TO', 'WHOSE', 'SEX', 'PITY', 'IS', 'AS', 'NATURAL', 'AS', 'CRUELTY', 'IS', 'TO', 'MAN', 'INSTANTLY', 'PRESSED', 'HER', 'LIPS', 'TO', 'THOSE', 'OF', 'THE', 'FAINTING', 'GIRL', 'SHEDDING', 'OVER', 'HER', 'SO', 'MANY', 'TEARS', 'THAT', 'THERE', 'NEEDED', 'NO', 'OTHER', 'SPRINKLING', 'OF', 'WATER', 'TO', 'RECOVER', 'LEOCADIA', 'FROM', 'HER', 'SWOON'] +5639-40744-0027-1591: hyp=['THUS', 'SAYING', 'AND', 'PRESSING', 'THE', 'CRUCIFIX', 'TO', 'HER', 'BREAST', 'SHE', 'FELL', 'FAINTING', 'INTO', 'THE', 'ARMS', 'OF', 'DONA', 'ESTEPHANIA', 'WHO', 'AS', 'A', 'GENTLE', 'WOMAN', 'TO', 'WHOSE', 'SEX', 'PITY', 'IS', 'AS', 'NATURAL', 'AS', 'CRUELTY', 'AS', 'TO', 'MAN', 'INSTANTLY', 'PRESSED', 'HER', 'LIPS', 'TO', 'THOSE', 'OF', 'THE', 'FAINTING', 'GIRL', 'SHEDDING', 'OVER', 'HER', 'SO', 'MANY', 'TEARS', 'THAT', 'THERE', 'NEEDED', 'NO', 'OTHER', 'SPRINKLING', 'OF', 'WATER', 'TO', 'RECOVER', 'THE', 'OCADIA', 'FROM', 'HER', 'SWOON'] +5639-40744-0028-1592: ref=['I', 'HAVE', 'GREAT', 'THINGS', 'TO', 'TELL', 'YOU', 'SENOR', 'SAID', 'DONA', 'ESTAFANIA', 'TO', 'HER', 'HUSBAND', 'THE', 'CREAM', 'AND', 'SUBSTANCE', 'OF', 'WHICH', 'IS', 'THIS', 'THE', 'FAINTING', 'GIRL', 'BEFORE', 'YOU', 'IS', 'YOUR', 'DAUGHTER', 'AND', 'THAT', 'BOY', 'IS', 'YOUR', 'GRANDSON'] +5639-40744-0028-1592: hyp=['I', 'HAVE', 'GREAT', 'THINGS', 'TO', 'TELL', 'YOU', 'SENOR', 'SAID', 'DONA', 'STEPHANIA', 'TO', 'HER', 'HUSBAND', 'THE', 'CREAM', 'AND', 'SUBSTANCE', 'OF', 'WHICH', 'IS', 'THIS', 'THE', 'FAINTING', 'GIRL', 'BEFORE', 'YOU', 'IS', 'YOUR', 'DAUGHTER', 'AND', 'THE', 'BOY', 'IS', 'YOUR', 'GRANDSON'] +5639-40744-0029-1593: ref=['THIS', 'TRUTH', 'WHICH', 'I', 'HAVE', 'LEARNED', 'FROM', 'HER', 'LIPS', 'IS', 'CONFIRMED', 'BY', 'HIS', 'FACE', 'IN', 'WHICH', 'WE', 'HAVE', 'BOTH', 'BEHELD', 'THAT', 'OF', 'OUR', 'SON'] +5639-40744-0029-1593: hyp=['THIS', 'TRUTH', 'WHICH', 'I', 'HAVE', 'LEARNED', 'FROM', 'HER', 'LIPS', 'IS', 'CONFIRMED', 'BY', 'HIS', 'FACE', 'IN', 'WHICH', 'WE', 'HAVE', 'BOTH', 'BEHELD', 'THAT', 'OF', 'OUR', 'SON'] +5639-40744-0030-1594: ref=['JUST', 'THEN', 'LEOCADIA', 'CAME', 'TO', 'HERSELF', 'AND', 'EMBRACING', 'THE', 'CROSS', 'SEEMED', 'CHANGED', 'INTO', 'A', 'SEA', 'OF', 'TEARS', 'AND', 'THE', 'GENTLEMAN', 'REMAINED', 'IN', 'UTTER', 'BEWILDERMENT', 'UNTIL', 'HIS', 'WIFE', 'HAD', 'REPEATED', 'TO', 'HIM', 'FROM', 'BEGINNING', 'TO', 'END', "LEOCADIA'S", 'WHOLE', 'STORY', 'AND', 'HE', 'BELIEVED', 'IT', 'THROUGH', 'THE', 'BLESSED', 'DISPENSATION', 'OF', 'HEAVEN', 'WHICH', 'HAD', 'CONFIRMED', 'IT', 'BY', 'SO', 'MANY', 'CONVINCING', 'TESTIMONIES'] +5639-40744-0030-1594: hyp=['JUST', 'THEN', 'LEOKADIA', 'CAME', 'TO', 'HERSELF', 'AND', 'EMBRACING', 'THE', 'CROSS', 'SEEMED', 'CHANGED', 'INTO', 'A', 'SEA', 'OF', 'TEARS', 'AND', 'THE', 'GENTLEMAN', 'REMAINING', 'IN', 'UTTER', 'BEWILDERMENT', 'UNTIL', 'HIS', 'WIFE', 'HAD', 'REPEATED', 'TO', 'HIM', 'FROM', 'BEGINNING', 'TO', 'END', 'LEIRCADIAS', 'WHOLE', 'STORY', 'AND', 'HE', 'BELIEVED', 'IT', 'THROUGH', 'THE', 'BLESSED', 'DISPENSATION', 'OF', 'HEAVEN', 'WHICH', 'HAD', 'CONFIRMED', 'IT', 'BY', 'SO', 'MANY', 'CONVINCING', 'TESTIMONIES'] +5639-40744-0031-1595: ref=['SO', 'PERSUASIVE', 'WERE', 'HER', 'ENTREATIES', 'AND', 'SO', 'STRONG', 'HER', 'ASSURANCES', 'THAT', 'NO', 'HARM', 'WHATEVER', 'COULD', 'RESULT', 'TO', 'THEM', 'FROM', 'THE', 'INFORMATION', 'SHE', 'SOUGHT', 'THEY', 'WERE', 'INDUCED', 'TO', 'CONFESS', 'THAT', 'ONE', "SUMMER'S", 'NIGHT', 'THE', 'SAME', 'SHE', 'HAD', 'MENTIONED', 'THEMSELVES', 'AND', 'ANOTHER', 'FRIEND', 'BEING', 'OUT', 'ON', 'A', 'STROLL', 'WITH', 'RODOLFO', 'THEY', 'HAD', 'BEEN', 'CONCERNED', 'IN', 'THE', 'ABDUCTION', 'OF', 'A', 'GIRL', 'WHOM', 'RODOLFO', 'CARRIED', 'OFF', 'WHILST', 'THE', 'REST', 'OF', 'THEM', 'DETAINED', 'HER', 'FAMILY', 'WHO', 'MADE', 'A', 'GREAT', 'OUTCRY', 'AND', 'WOULD', 'HAVE', 'DEFENDED', 'HER', 'IF', 'THEY', 'COULD'] +5639-40744-0031-1595: hyp=['SO', 'PERSUASIVE', 'WERE', 'HER', 'ENTREATIES', 'AND', 'SO', 'STRONG', 'HER', 'ASSURANCES', 'THAT', 'NO', 'HARM', 'WHATEVER', 'COULD', 'RESULT', 'TO', 'THEM', 'FROM', 'THE', 'INFORMATION', 'SHE', 'SOUGHT', 'THEY', 'WERE', 'INDUCED', 'TO', 'CONFESS', 'THAT', 'ONE', "SUMMER'S", 'NIGHT', 'THE', 'SAME', 'SHE', 'HAD', 'MENTIONED', 'THEMSELVES', 'AND', 'ANOTHER', 'FRIEND', 'BEING', 'OUT', 'ON', 'A', 'STROLL', 'WITH', 'UDOLPHO', 'THEY', 'HAD', 'BEEN', 'CONCERNED', 'IN', 'THE', 'ADOCTION', 'OF', 'A', 'GIRL', 'WHOM', 'RUDOLPHO', 'CARRIED', 'OFF', 'WHILST', 'THE', 'REST', 'OF', 'THEM', 'DETAINED', 'HER', 'FAMILY', 'WHO', 'MADE', 'A', 'GREAT', 'OUTCRY', 'AND', 'WOULD', 'HAVE', 'DEFENDED', 'HER', 'IF', 'THEY', 'COULD'] +5639-40744-0032-1596: ref=['FOR', "GOD'S", 'SAKE', 'MY', 'LADY', 'MOTHER', 'GIVE', 'ME', 'A', 'WIFE', 'WHO', 'WOULD', 'BE', 'AN', 'AGREEABLE', 'COMPANION', 'NOT', 'ONE', 'WHO', 'WILL', 'DISGUST', 'ME', 'SO', 'THAT', 'WE', 'MAY', 'BOTH', 'BEAR', 'EVENLY', 'AND', 'WITH', 'MUTUAL', 'GOOD', 'WILL', 'THE', 'YOKE', 'IMPOSED', 'ON', 'US', 'BY', 'HEAVEN', 'INSTEAD', 'OF', 'PULLING', 'THIS', 'WAY', 'AND', 'THAT', 'WAY', 'AND', 'FRETTING', 'EACH', 'OTHER', 'TO', 'DEATH'] +5639-40744-0032-1596: hyp=['FOR', "GOD'S", 'SAKE', 'MY', 'LADY', 'MOTHER', 'GIVE', 'ME', 'A', 'WIFE', 'WHO', 'WOULD', 'BE', 'AN', 'AGREEABLE', 'COMPANION', 'NOT', 'ONE', 'WHO', 'WILL', 'DISGUST', 'ME', 'SO', 'THAT', 'WE', 'MAY', 'BOTH', 'BEAR', 'EVENLY', 'AND', 'WITH', 'MUTUAL', 'GOOD', 'WILL', 'THE', 'YOKE', 'IMPOSED', 'ON', 'US', 'BY', 'HEAVEN', 'INSTEAD', 'OF', 'PULLING', 'THIS', 'WAY', 'AND', 'THAT', 'WAY', 'AND', 'FRETTING', 'EACH', 'OTHER', 'TO', 'DEATH'] +5639-40744-0033-1597: ref=['HER', 'BEARING', 'WAS', 'GRACEFUL', 'AND', 'ANIMATED', 'SHE', 'LED', 'HER', 'SON', 'BY', 'THE', 'HAND', 'AND', 'BEFORE', 'HER', 'WALKED', 'TWO', 'MAIDS', 'WITH', 'WAX', 'LIGHTS', 'AND', 'SILVER', 'CANDLESTICKS'] +5639-40744-0033-1597: hyp=['HER', 'BEARING', 'WAS', 'GRACEFUL', 'AND', 'ANIMATED', 'SHE', 'LED', 'HER', 'SON', 'BY', 'THE', 'HAND', 'AND', 'BEFORE', 'HER', 'WALKED', 'TWO', 'MAIDS', 'WITH', 'WAX', 'LIGHTS', 'AND', 'SILVER', 'CANDLESTICKS'] +5639-40744-0034-1598: ref=['ALL', 'ROSE', 'TO', 'DO', 'HER', 'REVERENCE', 'AS', 'IF', 'SOMETHING', 'FROM', 'HEAVEN', 'HAD', 'MIRACULOUSLY', 'APPEARED', 'BEFORE', 'THEM', 'BUT', 'GAZING', 'ON', 'HER', 'ENTRANCED', 'WITH', 'ADMIRATION', 'NOT', 'ONE', 'OF', 'THEM', 'WAS', 'ABLE', 'TO', 'ADDRESS', 'A', 'SINGLE', 'WORD', 'TO', 'HER'] +5639-40744-0034-1598: hyp=['ALL', 'ROSE', 'TO', 'DO', 'HER', 'REVERENCE', 'AS', 'IF', 'SOMETHING', 'FROM', 'HEAVEN', 'HAD', 'MIRACULOUSLY', 'APPEARED', 'BEFORE', 'THEM', 'BUT', 'GAZING', 'ON', 'HER', 'ENTRANCED', 'WITH', 'ADMIRATION', 'NOT', 'ONE', 'OF', 'THEM', 'WAS', 'ABLE', 'TO', 'ADDRESS', 'A', 'SINGLE', 'WORD', 'TO', 'HER'] +5639-40744-0035-1599: ref=['SHE', 'REFLECTED', 'HOW', 'NEAR', 'SHE', 'STOOD', 'TO', 'THE', 'CRISIS', 'WHICH', 'WAS', 'TO', 'DETERMINE', 'WHETHER', 'SHE', 'WAS', 'TO', 'BE', 'BLESSED', 'OR', 'UNHAPPY', 'FOR', 'EVER', 'AND', 'RACKED', 'BY', 'THE', 'INTENSITY', 'OF', 'HER', 'EMOTIONS', 'SHE', 'SUDDENLY', 'CHANGED', 'COLOUR', 'HER', 'HEAD', 'DROPPED', 'AND', 'SHE', 'FELL', 'FORWARD', 'IN', 'A', 'SWOON', 'INTO', 'THE', 'ARMS', 'OF', 'THE', 'DISMAYED', 'ESTAFANIA'] +5639-40744-0035-1599: hyp=['SHE', 'REFLECTED', 'HOW', 'NEAR', 'SHE', 'STOOD', 'TO', 'THE', 'CRISIS', 'WHICH', 'WAS', 'TO', 'DETERMINE', 'WHETHER', 'SHE', 'WAS', 'TO', 'BE', 'BLESSED', 'OR', 'UNHAPPY', 'FOR', 'EVER', 'AND', 'RACKED', 'BY', 'THE', 'INTENSITY', 'OF', 'HER', 'EMOTIONS', 'SHE', 'SUDDENLY', 'CHANGED', 'COLOR', 'HER', 'HEAD', 'DROPPED', 'AND', 'SHE', 'FELL', 'FORWARD', 'IN', 'A', 'SWOON', 'INTO', 'THE', 'ARMS', 'OF', 'THE', 'DISMAYED', 'STEPHANIA'] +5639-40744-0036-1600: ref=['HIS', 'MOTHER', 'HAD', 'LEFT', 'HER', 'TO', 'HIM', 'AS', 'BEING', 'HER', 'DESTINED', 'PROTECTOR', 'BUT', 'WHEN', 'SHE', 'SAW', 'THAT', 'HE', 'TOO', 'WAS', 'INSENSIBLE', 'SHE', 'WAS', 'NEAR', 'MAKING', 'A', 'THIRD', 'AND', 'WOULD', 'HAVE', 'DONE', 'SO', 'HAD', 'HE', 'NOT', 'COME', 'TO', 'HIMSELF'] +5639-40744-0036-1600: hyp=['HIS', 'MOTHER', 'HAD', 'LEFT', 'HER', 'TO', 'HIM', 'AS', 'BEING', 'HER', 'DESTINED', 'PROTECTOR', 'BUT', 'WHEN', 'SHE', 'SAW', 'THAT', 'HE', 'TOO', 'WAS', 'INSENSIBLE', 'SHE', 'WAS', 'NEAR', 'MAKING', 'A', 'THIRD', 'AND', 'WOULD', 'HAVE', 'DONE', 'SO', 'HAD', 'HE', 'NOT', 'COME', 'TO', 'HIMSELF'] +5639-40744-0037-1601: ref=['KNOW', 'THEN', 'SON', 'OF', 'MY', 'HEART', 'THAT', 'THIS', 'FAINTING', 'LADY', 'IS', 'YOUR', 'REAL', 'BRIDE', 'I', 'SAY', 'REAL', 'BECAUSE', 'SHE', 'IS', 'THE', 'ONE', 'WHOM', 'YOUR', 'FATHER', 'AND', 'I', 'HAVE', 'CHOSEN', 'FOR', 'YOU', 'AND', 'THE', 'PORTRAIT', 'WAS', 'A', 'PRETENCE'] +5639-40744-0037-1601: hyp=['KNOW', 'THEN', 'SON', 'OF', 'MY', 'HEART', 'THAT', 'THIS', 'FAINTING', 'LADY', 'IS', 'YOUR', 'REAL', 'BRIDE', 'I', 'SAY', 'REAL', 'BECAUSE', 'SHE', 'IS', 'THE', 'ONE', 'WHOM', 'YOUR', 'FATHER', 'AND', 'I', 'HAVE', 'CHOSEN', 'FOR', 'YOU', 'AND', 'THE', 'PORTRAIT', 'WAS', 'A', 'PRETENCE'] +5639-40744-0038-1602: ref=['JUST', 'AT', 'THE', 'MOMENT', 'WHEN', 'THE', 'TEARS', 'OF', 'THE', 'PITYING', 'BEHOLDERS', 'FLOWED', 'FASTEST', 'AND', 'THEIR', 'EJACULATIONS', 'WERE', 'MOST', 'EXPRESSIVE', 'OF', 'DESPAIR', 'LEOCADIA', 'GAVE', 'SIGNS', 'OF', 'RECOVERY', 'AND', 'BROUGHT', 'BACK', 'GLADNESS', 'TO', 'THE', 'HEARTS', 'OF', 'ALL'] +5639-40744-0038-1602: hyp=['JUST', 'AT', 'THE', 'MOMENT', 'WHEN', 'THE', 'TEARS', 'OF', 'THE', 'PITYING', 'BEHOLDERS', 'FLOWED', 'FASTEST', 'AND', 'THEIR', 'EJACULATIONS', 'WERE', 'MOST', 'EXPRESSIVE', 'OF', 'DESPAIR', 'THE', 'ARCADIA', 'GAVE', 'SIGNS', 'OF', 'RECOVERY', 'AND', 'BROUGHT', 'BACK', 'GLADNESS', 'TO', 'THE', 'HEARTS', 'OF', 'ALL'] +5639-40744-0039-1603: ref=['WHEN', 'SHE', 'CAME', 'TO', 'HER', 'SENSES', 'AND', 'BLUSHING', 'TO', 'FIND', 'HERSELF', 'IN', "RODOLFO'S", 'ARMS', 'WOULD', 'HAVE', 'DISENGAGED', 'HERSELF', 'NO', 'SENORA', 'HE', 'SAID', 'THAT', 'MUST', 'NOT', 'BE', 'STRIVE', 'NOT', 'TO', 'WITHDRAW', 'FROM', 'THE', 'ARMS', 'OF', 'HIM', 'WHO', 'HOLDS', 'YOU', 'IN', 'HIS', 'SOUL'] +5639-40744-0039-1603: hyp=['WHEN', 'SHE', 'CAME', 'TO', 'HER', 'SENSES', 'AND', 'BLUSHING', 'TO', 'FIND', 'HERSELF', 'IN', "RUDOLPHO'S", 'ARMS', 'WOULD', 'HAVE', 'DISENGAGED', 'HERSELF', 'NO', 'SENORA', 'HE', 'SAID', 'THAT', 'MUST', 'NOT', 'BE', 'STRIVE', 'NOT', 'TO', 'WITHDRAW', 'FROM', 'THE', 'ARMS', 'OF', 'HIM', 'WHO', 'HOLDS', 'YOU', 'IN', 'HIS', 'SOUL'] +5639-40744-0040-1604: ref=['THIS', 'WAS', 'DONE', 'FOR', 'THE', 'EVENT', 'TOOK', 'PLACE', 'AT', 'A', 'TIME', 'WHEN', 'THE', 'CONSENT', 'OF', 'THE', 'PARTIES', 'WAS', 'SUFFICIENT', 'FOR', 'THE', 'CELEBRATION', 'OF', 'A', 'MARRIAGE', 'WITHOUT', 'ANY', 'OF', 'THE', 'PRELIMINARY', 'FORMALITIES', 'WHICH', 'ARE', 'NOW', 'SO', 'PROPERLY', 'REQUIRED'] +5639-40744-0040-1604: hyp=['THIS', 'WAS', 'DONE', 'FOR', 'THE', 'EVENT', 'TOOK', 'PLACE', 'AT', 'A', 'TIME', 'WHEN', 'THE', 'CONSENT', 'OF', 'THE', 'PARTIES', 'WAS', 'SUFFICIENT', 'FOR', 'THE', 'CELEBRATION', 'OF', 'THE', 'MARRIAGE', 'WITHOUT', 'ANY', 'OF', 'THE', 'PRELIMINARY', 'FORMALITIES', 'WHICH', 'ARE', 'NOW', 'SO', 'PROPERLY', 'REQUIRED'] +5639-40744-0041-1605: ref=['NOR', 'WAS', 'RODOLFO', 'LESS', 'SURPRISED', 'THAN', 'THEY', 'AND', 'THE', 'BETTER', 'TO', 'ASSURE', 'HIMSELF', 'OF', 'SO', 'WONDERFUL', 'A', 'FACT', 'HE', 'BEGGED', 'LEOCADIA', 'TO', 'GIVE', 'HIM', 'SOME', 'TOKEN', 'WHICH', 'SHOULD', 'MAKE', 'PERFECTLY', 'CLEAR', 'TO', 'HIM', 'THAT', 'WHICH', 'INDEED', 'HE', 'DID', 'NOT', 'DOUBT', 'SINCE', 'IT', 'WAS', 'AUTHENTICATED', 'BY', 'HIS', 'PARENTS'] +5639-40744-0041-1605: hyp=['NOR', 'WAS', 'RUDOLPHO', 'LESS', 'SURPRISED', 'THAN', 'THEY', 'AND', 'THE', 'BETTER', 'TO', 'ASSURE', 'HIMSELF', 'OF', 'SO', 'WONDERFUL', 'A', 'FACT', 'HE', 'BEGGED', 'LOCADIA', 'TO', 'GIVE', 'HIM', 'SOME', 'TOKEN', 'WHICH', 'SHOULD', 'MAKE', 'PERFECTLY', 'CLEAR', 'TO', 'HIM', 'THAT', 'WHICH', 'INDEED', 'HE', 'DID', 'NOT', 'DOUBT', 'SINCE', 'IT', 'WAS', 'AUTHENTICATED', 'BY', 'HIS', 'PARENTS'] +5683-32865-0000-1606: ref=['YOU', 'KNOW', 'CAPTAIN', 'LAKE'] +5683-32865-0000-1606: hyp=['YOU', 'KNOW', 'CAPTAIN', 'LAKE'] +5683-32865-0001-1607: ref=['SAID', 'LORD', 'CHELFORD', 'ADDRESSING', 'ME'] +5683-32865-0001-1607: hyp=['SAID', 'LORD', 'CHELFORD', 'ADDRESSING', 'ME'] +5683-32865-0002-1608: ref=['HE', 'HAD', 'HIS', 'HAND', 'UPON', "LAKE'S", 'SHOULDER'] +5683-32865-0002-1608: hyp=['HE', 'HAD', 'HIS', 'HAND', 'UPON', "LAKE'S", 'SHOULDER'] +5683-32865-0003-1609: ref=['THEY', 'ARE', 'COUSINS', 'YOU', 'KNOW', 'WE', 'ARE', 'ALL', 'COUSINS'] +5683-32865-0003-1609: hyp=['THEY', 'ARE', 'COUSINS', 'YOU', 'KNOW', 'WE', 'ARE', 'ALL', 'COUSINS'] +5683-32865-0004-1610: ref=['WHATEVER', 'LORD', 'CHELFORD', 'SAID', 'MISS', 'BRANDON', 'RECEIVED', 'IT', 'VERY', 'GRACIOUSLY', 'AND', 'EVEN', 'WITH', 'A', 'MOMENTARY', 'SMILE'] +5683-32865-0004-1610: hyp=['WHATEVER', 'LORD', 'CHELFORD', 'SAID', 'MISS', 'BRANDON', 'RECEIVED', 'IT', 'VERY', 'GRACIOUSLY', 'AND', 'EVEN', 'WITH', 'A', 'MOMENTARY', 'SMILE'] +5683-32865-0005-1611: ref=['BUT', 'HER', 'GREETING', 'TO', 'CAPTAIN', 'LAKE', 'WAS', 'MORE', 'THAN', 'USUALLY', 'HAUGHTY', 'AND', 'FROZEN', 'AND', 'HER', 'FEATURES', 'I', 'FANCIED', 'PARTICULARLY', 'PROUD', 'AND', 'PALE'] +5683-32865-0005-1611: hyp=['BUT', 'HER', 'GREETING', 'TO', 'CAPTAIN', 'LAKE', 'WAS', 'MORE', 'THAN', 'USUALLY', 'HAUGHTY', 'AND', 'FROZEN', 'AND', 'HER', 'FEATURES', 'I', 'FANCIED', 'PARTICULARLY', 'PROUD', 'AND', 'PALE'] +5683-32865-0006-1612: ref=['AT', 'DINNER', 'LAKE', 'WAS', 'EASY', 'AND', 'AMUSING'] +5683-32865-0006-1612: hyp=['AT', 'DINNER', 'LAKE', 'WAS', 'EASY', 'AND', 'AMUSING'] +5683-32865-0007-1613: ref=["I'M", 'GLAD', 'YOU', 'LIKE', 'IT', 'SAYS', 'WYLDER', 'CHUCKLING', 'BENIGNANTLY', 'ON', 'IT', 'OVER', 'HIS', 'SHOULDER'] +5683-32865-0007-1613: hyp=["I'M", 'GLAD', 'YOU', 'LIKE', 'IT', 'SAYS', 'WYLDER', 'CHUCKLING', 'BENIGNANTLY', 'ON', 'IT', 'OVER', 'HIS', 'SHOULDER'] +5683-32865-0008-1614: ref=['I', 'BELIEVE', 'I', 'HAVE', 'A', 'LITTLE', 'TASTE', 'THAT', 'WAY', 'THOSE', 'ARE', 'ALL', 'REAL', 'YOU', 'KNOW', 'THOSE', 'JEWELS'] +5683-32865-0008-1614: hyp=['I', 'BELIEVE', 'I', 'HAVE', 'A', 'LITTLE', 'TASTE', 'THAT', 'WAY', 'THOSE', 'ARE', 'ALL', 'REAL', 'YOU', 'KNOW', 'THOSE', 'JEWELS'] +5683-32865-0009-1615: ref=['AND', 'HE', 'PLACED', 'IT', 'IN', 'THAT', "GENTLEMAN'S", 'FINGERS', 'WHO', 'NOW', 'TOOK', 'HIS', 'TURN', 'AT', 'THE', 'LAMP', 'AND', 'CONTEMPLATED', 'THE', 'LITTLE', 'PARALLELOGRAM', 'WITH', 'A', 'GLEAM', 'OF', 'SLY', 'AMUSEMENT'] +5683-32865-0009-1615: hyp=['AND', 'HE', 'PLACED', 'IT', 'IN', 'THAT', "GENTLEMAN'S", 'FINGERS', 'WHO', 'NOW', 'TOOK', 'HIS', 'TURN', 'AT', 'THE', 'LAMP', 'AND', 'CONTEMPLATED', 'THE', 'LITTLE', 'PARALLELOGRAM', 'WITH', 'A', 'GLEAM', 'OF', 'SLY', 'AMUSEMENT'] +5683-32865-0010-1616: ref=['I', 'WAS', 'THINKING', "IT'S", 'VERY', 'LIKE', 'THE', 'ACE', 'OF', 'HEARTS', 'ANSWERED', 'THE', 'CAPTAIN', 'SOFTLY', 'SMILING', 'ON'] +5683-32865-0010-1616: hyp=['I', 'WAS', 'THINKING', "IT'S", 'VERY', 'LIKE', 'THE', 'ACE', 'OF', 'HEARTS', 'ANSWERED', 'THE', 'CAPTAIN', 'SOFTLY', 'SMILING', 'ON'] +5683-32865-0011-1617: ref=['WHEREUPON', 'LAKE', 'LAUGHED', 'QUIETLY', 'STILL', 'LOOKING', 'ON', 'THE', 'ACE', 'OF', 'HEARTS', 'WITH', 'HIS', 'SLY', 'EYES'] +5683-32865-0011-1617: hyp=['WHEREUPON', 'LAKE', 'LAUGHED', 'QUIETLY', 'STILL', 'LOOKING', 'ON', 'THE', 'ACE', 'OF', 'HEARTS', 'WITH', 'HIS', 'SLY', 'EYES'] +5683-32865-0012-1618: ref=['AND', 'WYLDER', 'LAUGHED', 'TOO', 'MORE', 'SUDDENLY', 'AND', 'NOISILY', 'THAN', 'THE', 'HUMOUR', 'OF', 'THE', 'JOKE', 'SEEMED', 'QUITE', 'TO', 'CALL', 'FOR', 'AND', 'GLANCED', 'A', 'GRIM', 'LOOK', 'FROM', 'THE', 'CORNERS', 'OF', 'HIS', 'EYES', 'ON', 'LAKE', 'BUT', 'THE', 'GALLANT', 'CAPTAIN', 'DID', 'NOT', 'SEEM', 'TO', 'PERCEIVE', 'IT', 'AND', 'AFTER', 'A', 'FEW', 'SECONDS', 'MORE', 'HE', 'HANDED', 'IT', 'VERY', 'INNOCENTLY', 'BACK', 'TO', 'MISSUS', 'DOROTHY', 'ONLY', 'REMARKING'] +5683-32865-0012-1618: hyp=['AND', 'WYLDER', 'LAUGHED', 'TOO', 'MORE', 'SUDDENLY', 'AND', 'NOISILY', 'THAN', 'THE', 'HUMOUR', 'OF', 'THE', 'JOKE', 'SEEMED', 'QUITE', 'TO', 'CALL', 'FOR', 'AND', 'GLANCED', 'A', 'GRIM', 'LOOK', 'FROM', 'THE', 'CORNERS', 'OF', 'HIS', 'EYES', 'ON', 'LAKE', 'BUT', 'THE', 'GALLANT', 'CAPTAIN', 'DID', 'NOT', 'SEEM', 'TO', 'PERCEIVE', 'IT', 'AND', 'AFTER', 'A', 'FEW', 'SECONDS', 'MORE', 'HE', 'HANDED', 'IT', 'VERY', 'INNOCENTLY', 'BACK', 'TO', 'MISSUS', 'DOROTHY', 'ONLY', 'REMARKING'] +5683-32865-0013-1619: ref=['DO', 'YOU', 'KNOW', 'LAKE', 'OH', 'I', 'REALLY', "CAN'T", 'TELL', 'BUT', "HE'LL", 'SOON', 'TIRE', 'OF', 'COUNTRY', 'LIFE'] +5683-32865-0013-1619: hyp=['DO', 'YOU', 'KNOW', 'LAKE', 'OH', 'I', 'REALLY', "CAN'T", 'TELL', 'BUT', "HE'LL", 'SOON', 'TIRE', 'OF', 'COUNTRY', 'LIFE'] +5683-32865-0014-1620: ref=["HE'S", 'NOT', 'A', 'MAN', 'FOR', 'COUNTRY', 'QUARTERS'] +5683-32865-0014-1620: hyp=["HE'S", 'NOT', 'A', 'MAN', 'FOR', 'COUNTRY', 'QUARTERS'] +5683-32865-0015-1621: ref=['I', 'HAD', 'A', 'HORRID', 'DREAM', 'ABOUT', 'HIM', 'LAST', 'NIGHT', 'THAT'] +5683-32865-0015-1621: hyp=['I', 'HAD', 'A', 'HORRID', 'DREAM', 'ABOUT', 'HIM', 'LAST', 'NIGHT', 'THAT'] +5683-32865-0016-1622: ref=['OH', 'I', 'KNOW', "THAT'S", 'LORNE', 'BRANDON'] +5683-32865-0016-1622: hyp=['OH', 'I', 'KNOW', "THAT'S", 'LORN', 'BRANDON'] +5683-32865-0017-1623: ref=['ALL', 'THE', 'TIME', 'HE', 'WAS', 'TALKING', 'TO', 'ME', 'HIS', 'ANGRY', 'LITTLE', 'EYES', 'WERE', 'FOLLOWING', 'LAKE'] +5683-32865-0017-1623: hyp=['ALL', 'THE', 'TIME', 'HE', 'WAS', 'TALKING', 'TO', 'ME', 'HIS', 'ANGRY', 'LITTLE', 'EYES', 'WERE', 'FOLLOWING', 'LAKE'] +5683-32866-0000-1624: ref=['MISS', 'LAKE', 'DECLINED', 'THE', 'CARRIAGE', 'TO', 'NIGHT'] +5683-32866-0000-1624: hyp=['MISS', 'LAKE', 'DECLINED', 'THE', 'CARRIAGE', 'TO', 'NIGHT'] +5683-32866-0001-1625: ref=['AND', 'HE', 'ADDED', 'SOMETHING', 'STILL', 'LESS', 'COMPLIMENTARY'] +5683-32866-0001-1625: hyp=['AND', 'HE', 'ADDED', 'SOMETHING', 'STILL', 'LESS', 'COMPLIMENTARY'] +5683-32866-0002-1626: ref=['BUT', "DON'T", 'THESE', 'VERY', 'WISE', 'THINGS', 'SOMETIMES', 'TURN', 'OUT', 'VERY', 'FOOLISHLY'] +5683-32866-0002-1626: hyp=['BUT', "DON'T", 'THESE', 'VERY', 'WISE', 'THINGS', 'SOMETIMES', 'TURN', 'OUT', 'VERY', 'FOOLISHLY'] +5683-32866-0003-1627: ref=['IN', 'THE', 'MEANTIME', 'I', 'HAD', 'FORMED', 'A', 'NEW', 'IDEA', 'OF', 'HER'] +5683-32866-0003-1627: hyp=['IN', 'THE', 'MEANTIME', 'I', 'HAD', 'FORMED', 'A', 'NEW', 'IDEA', 'OF', 'HER'] +5683-32866-0004-1628: ref=['BY', 'THIS', 'TIME', 'LORD', 'CHELFORD', 'AND', 'WYLDER', 'RETURNED', 'AND', 'DISGUSTED', 'RATHER', 'WITH', 'MYSELF', 'I', 'RUMINATED', 'ON', 'MY', 'WANT', 'OF', 'GENERAL', 'SHIP'] +5683-32866-0004-1628: hyp=['BY', 'THIS', 'TIME', 'LORD', 'CHELFORD', 'AND', 'WYLDER', 'RETURNED', 'AND', 'DISGUSTED', 'RATHER', 'WITH', 'MYSELF', 'I', 'RUMINATED', 'ON', 'MY', 'WANT', 'OF', 'GENERALSHIP'] +5683-32866-0005-1629: ref=['AND', 'HE', 'MADE', 'A', 'LITTLE', 'DIP', 'OF', 'HIS', 'CANE', 'TOWARDS', 'BRANDON', 'HALL', 'OVER', 'HIS', 'SHOULDER'] +5683-32866-0005-1629: hyp=['AND', 'HE', 'MADE', 'A', 'LITTLE', 'DIP', 'OF', 'HIS', 'CANE', 'TOWARDS', 'BRANDON', 'HALL', 'OVER', 'HIS', 'SHOULDER'] +5683-32866-0006-1630: ref=['YES', 'SO', 'THEY', 'SAID', 'BUT', 'THAT', 'WOULD', 'I', 'THINK', 'HAVE', 'BEEN', 'WORSE'] +5683-32866-0006-1630: hyp=['YES', 'SO', 'THEY', 'SAID', 'BUT', 'THAT', 'WOULD', 'I', 'THINK', 'HAVE', 'BEEN', 'WORSE'] +5683-32866-0007-1631: ref=['IF', 'A', "FELLOW'S", 'BEEN', 'A', 'LITTLE', 'BIT', 'WILD', "HE'S", 'BEELZEBUB', 'AT', 'ONCE'] +5683-32866-0007-1631: hyp=['IF', 'A', "FELLOW'S", 'BEEN', 'A', 'LITTLE', 'BIT', 'WILD', 'HE', 'IS', 'BEELZEBUB', 'AT', 'ONCE'] +5683-32866-0008-1632: ref=["BRACTON'S", 'A', 'VERY', 'GOOD', 'FELLOW', 'I', 'CAN', 'ASSURE', 'YOU'] +5683-32866-0008-1632: hyp=["BRACTON'S", 'A', 'VERY', 'GOOD', 'FELLOW', 'I', 'CAN', 'ASSURE', 'YOU'] +5683-32866-0009-1633: ref=['I', "DON'T", 'KNOW', 'AND', "CAN'T", 'SAY', 'HOW', 'YOU', 'FINE', 'GENTLEMEN', 'DEFINE', 'WICKEDNESS', 'ONLY', 'AS', 'AN', 'OBSCURE', 'FEMALE', 'I', 'SPEAK', 'ACCORDING', 'TO', 'MY', 'LIGHTS', 'AND', 'HE', 'IS', 'GENERALLY', 'THOUGHT', 'THE', 'WICKEDEST', 'MAN', 'IN', 'THIS', 'COUNTY'] +5683-32866-0009-1633: hyp=['I', "DON'T", 'KNOW', 'AND', "CAN'T", 'SAY', 'HOW', 'YOU', 'FINE', 'GENTLEMEN', 'DEFINE', 'WICKEDNESS', 'ONLY', 'AS', 'AN', 'OBSCURE', 'FEMALE', 'I', 'SPEAK', 'ACCORDING', 'TO', 'MY', 'LIGHTS', 'AND', 'HE', 'IS', 'GENERALLY', 'THOUGHT', 'THE', 'WICKEDEST', 'MAN', 'IN', 'THIS', 'COUNTY'] +5683-32866-0010-1634: ref=['WELL', 'YOU', 'KNOW', 'RADIE', 'WOMEN', 'LIKE', 'WICKED', 'FELLOWS', 'IT', 'IS', 'CONTRAST', 'I', 'SUPPOSE', 'BUT', 'THEY', 'DO', 'AND', "I'M", 'SURE', 'FROM', 'WHAT', 'BRACTON', 'HAS', 'SAID', 'TO', 'ME', 'I', 'KNOW', 'HIM', 'INTIMATELY', 'THAT', 'DORCAS', 'LIKES', 'HIM', 'AND', 'I', "CAN'T", 'CONCEIVE', 'WHY', 'THEY', 'ARE', 'NOT', 'MARRIED'] +5683-32866-0010-1634: hyp=['WELL', 'YOU', 'KNOW', 'RADIE', 'WOMEN', 'LIKE', 'WICKED', 'FELLOWS', 'IT', 'IS', 'CONTRAST', 'I', 'SUPPOSE', 'BUT', 'THEY', 'DO', 'AND', "I'M", 'SURE', 'FROM', 'WHAT', 'BRACTON', 'HAS', 'SAID', 'TO', 'ME', 'I', 'KNOW', 'HIM', 'INTIMATELY', 'THAT', 'DORCAS', 'LIKES', 'HIM', 'AND', 'I', "CAN'T", 'CONCEIVE', 'WHY', 'THEY', 'ARE', 'NOT', 'MARRIED'] +5683-32866-0011-1635: ref=['THEIR', 'WALK', 'CONTINUED', 'SILENT', 'FOR', 'THE', 'GREATER', 'PART', 'NEITHER', 'WAS', 'QUITE', 'SATISFIED', 'WITH', 'THE', 'OTHER', 'BUT', 'RACHEL', 'AT', 'LAST', 'SAID'] +5683-32866-0011-1635: hyp=['THEIR', 'WALK', 'CONTINUED', 'SILENT', 'FOR', 'THE', 'GREATER', 'PART', 'NEITHER', 'WAS', 'QUITE', 'SATISFIED', 'WITH', 'THE', 'OTHER', 'BUT', 'RACHEL', 'AT', 'LAST', 'SAID'] +5683-32866-0012-1636: ref=['NOW', "THAT'S", 'IMPOSSIBLE', 'RADIE', 'FOR', 'I', 'REALLY', "DON'T", 'THINK', 'I', 'ONCE', 'THOUGHT', 'OF', 'HIM', 'ALL', 'THIS', 'EVENING', 'EXCEPT', 'JUST', 'WHILE', 'WE', 'WERE', 'TALKING'] +5683-32866-0012-1636: hyp=['NOW', "THAT'S", 'IMPOSSIBLE', 'RADIE', 'FOR', 'I', 'REALLY', "DON'T", 'THINK', 'I', 'ONCE', 'THOUGHT', 'OF', 'HIM', 'ALL', 'THIS', 'EVENING', 'EXCEPT', 'JUST', 'WHILE', 'WE', 'WERE', 'TALKING'] +5683-32866-0013-1637: ref=['THERE', 'WAS', 'A', 'BRIGHT', 'MOONLIGHT', 'BROKEN', 'BY', 'THE', 'SHADOWS', 'OF', 'OVERHANGING', 'BOUGHS', 'AND', 'WITHERED', 'LEAVES', 'AND', 'THE', 'MOTTLED', 'LIGHTS', 'AND', 'SHADOWS', 'GLIDED', 'ODDLY', 'ACROSS', 'HIS', 'PALE', 'FEATURES'] +5683-32866-0013-1637: hyp=['THERE', 'WAS', 'A', 'BRIGHT', 'MOONLIGHT', 'BROKEN', 'BY', 'THE', 'SHADOWS', 'OF', 'OVERHANGING', 'BOUGHS', 'AND', 'WITHERED', 'LEAVES', 'AND', 'THE', 'MOTTLED', 'LIGHTS', 'AND', 'SHADOWS', 'GLIDED', 'ODDLY', 'ACROSS', 'HIS', 'PALE', 'FEATURES'] +5683-32866-0014-1638: ref=["DON'T", 'INSULT', 'ME', 'STANLEY', 'BY', 'TALKING', 'AGAIN', 'AS', 'YOU', 'DID', 'THIS', 'MORNING'] +5683-32866-0014-1638: hyp=["DON'T", 'INSULT', 'ME', 'STANLEY', 'BY', 'TALKING', 'AGAIN', 'AS', 'YOU', 'DID', 'THIS', 'MORNING'] +5683-32866-0015-1639: ref=['WHAT', 'I', 'SAY', 'IS', 'ALTOGETHER', 'ON', 'YOUR', 'OWN', 'ACCOUNT'] +5683-32866-0015-1639: hyp=['WHAT', 'I', 'SAY', 'IS', 'ALTOGETHER', 'ON', 'YOUR', 'OWN', 'ACCOUNT'] +5683-32866-0016-1640: ref=['MARK', 'MY', 'WORDS', "YOU'LL", 'FIND', 'HIM', 'TOO', 'STRONG', 'FOR', 'YOU', 'AYE', 'AND', 'TOO', 'DEEP'] +5683-32866-0016-1640: hyp=['MARK', 'MY', 'WORDS', "YOU'LL", 'FIND', 'HIM', 'TOO', 'STRONG', 'FOR', 'YOU', 'AY', 'AND', 'TOO', 'DEEP'] +5683-32866-0017-1641: ref=['I', 'AM', 'VERY', 'UNEASY', 'ABOUT', 'IT', 'WHATEVER', 'IT', 'IS', 'I', "CAN'T", 'HELP', 'IT'] +5683-32866-0017-1641: hyp=['I', 'AM', 'VERY', 'UNEASY', 'ABOUT', 'IT', 'WHATEVER', 'IT', 'IS', 'I', "CAN'T", 'HELP', 'IT'] +5683-32866-0018-1642: ref=['TO', 'MY', 'MIND', 'THERE', 'HAS', 'ALWAYS', 'BEEN', 'SOMETHING', 'INEXPRESSIBLY', 'AWFUL', 'IN', 'FAMILY', 'FEUDS'] +5683-32866-0018-1642: hyp=['TO', 'MY', 'MIND', 'THERE', 'HAS', 'ALWAYS', 'BEEN', 'SOMETHING', 'INEXPRESSIBLY', 'AWFUL', 'IN', 'FAMILY', 'FEUDS'] +5683-32866-0019-1643: ref=['THE', 'MYSTERY', 'OF', 'THEIR', 'ORIGIN', 'THEIR', 'CAPACITY', 'FOR', 'EVOLVING', 'LATENT', 'FACULTIES', 'OF', 'CRIME', 'AND', 'THE', 'STEADY', 'VITALITY', 'WITH', 'WHICH', 'THEY', 'SURVIVE', 'THE', 'HEARSE', 'AND', 'SPEAK', 'THEIR', 'DEEP', 'MOUTHED', 'MALIGNITIES', 'IN', 'EVERY', 'NEW', 'BORN', 'GENERATION', 'HAVE', 'ASSOCIATED', 'THEM', 'SOMEHOW', 'IN', 'MY', 'MIND', 'WITH', 'A', 'SPELL', 'OF', 'LIFE', 'EXCEEDING', 'AND', 'DISTINCT', 'FROM', 'HUMAN', 'AND', 'A', 'SPECIAL', 'SATANIC', 'ACTION'] +5683-32866-0019-1643: hyp=['THE', 'MYSTERY', 'OF', 'THEIR', 'ORIGIN', 'THEIR', 'CAPACITY', 'FOR', 'EVOLVING', 'LATENT', 'FACULTIES', 'OF', 'CRIME', 'AND', 'THE', 'STEADY', 'VITALITY', 'WITH', 'WHICH', 'THEY', 'SURVIVE', 'THE', 'HEARSE', 'AND', 'SPEAK', 'THEIR', 'DEEP', 'MOUTHED', 'MALIGNITIES', 'IN', 'EVERY', 'NEW', 'BORN', 'GENERATION', 'HAVE', 'ASSOCIATED', 'THEM', 'SOMEHOW', 'IN', 'MY', 'MIND', 'WITH', 'A', 'SPELL', 'OF', 'LIFE', 'EXCEEDING', 'AND', 'DISTINCT', 'FROM', 'HUMAN', 'AND', 'ESPECIAL', 'SATANIC', 'ACTION'] +5683-32866-0020-1644: ref=['THE', 'FLOOR', 'MORE', 'THAN', 'ANYTHING', 'ELSE', 'SHOWED', 'THE', 'GREAT', 'AGE', 'OF', 'THE', 'ROOM'] +5683-32866-0020-1644: hyp=['THE', 'FLOOR', 'MORE', 'THAN', 'ANYTHING', 'ELSE', 'SHOWED', 'THE', 'GREAT', 'AGE', 'OF', 'THE', 'ROOM'] +5683-32866-0021-1645: ref=['MY', 'BED', 'WAS', 'UNEXCEPTIONABLY', 'COMFORTABLE', 'BUT', 'IN', 'MY', 'THEN', 'MOOD', 'I', 'COULD', 'HAVE', 'WISHED', 'IT', 'A', 'GREAT', 'DEAL', 'MORE', 'MODERN'] +5683-32866-0021-1645: hyp=['MY', 'BED', 'WAS', 'UNEXCEPTIONABLY', 'COMFORTABLE', 'BUT', 'IN', 'MY', 'THEN', 'MOOD', 'I', 'COULD', 'HAVE', 'WISHED', 'IT', 'A', 'GREAT', 'DEAL', 'MORE', 'MODERN'] +5683-32866-0022-1646: ref=['ITS', 'CURTAINS', 'WERE', 'OF', 'THICK', 'AND', 'FADED', 'TAPESTRY'] +5683-32866-0022-1646: hyp=['ITS', 'CURTAINS', 'WERE', 'OF', 'THICK', 'AND', 'FADED', 'TAPESTRY'] +5683-32866-0023-1647: ref=['ALL', 'THE', 'FURNITURE', 'BELONGED', 'TO', 'OTHER', 'TIMES'] +5683-32866-0023-1647: hyp=['ALL', 'THE', 'FURNITURE', 'BELONGED', 'TO', 'OTHER', 'TIMES'] +5683-32866-0024-1648: ref=['I', "SHAN'T", 'TROUBLE', 'YOU', 'ABOUT', 'MY', 'TRAIN', 'OF', 'THOUGHTS', 'OR', 'FANCIES', 'BUT', 'I', 'BEGAN', 'TO', 'FEEL', 'VERY', 'LIKE', 'A', 'GENTLEMAN', 'IN', 'A', 'GHOST', 'STORY', 'WATCHING', 'EXPERIMENTALLY', 'IN', 'A', 'HAUNTED', 'CHAMBER'] +5683-32866-0024-1648: hyp=['I', "SHAN'T", 'TROUBLE', 'YOU', 'ABOUT', 'MY', 'TRAIN', 'OF', 'THOUGHTS', 'OR', 'FANCIES', 'BUT', 'I', 'BEGAN', 'TO', 'FEEL', 'VERY', 'LIKE', 'A', 'GENTLEMAN', 'IN', 'A', 'GHOST', 'STORY', 'WATCHING', 'EXPERIMENTALLY', 'IN', 'A', 'HAUNTED', 'CHAMBER'] +5683-32866-0025-1649: ref=['I', 'DID', 'NOT', 'EVEN', 'TAKE', 'THE', 'PRECAUTION', 'OF', 'SMOKING', 'UP', 'THE', 'CHIMNEY'] +5683-32866-0025-1649: hyp=['I', 'DID', 'NOT', 'EVEN', 'TAKE', 'THE', 'PRECAUTION', 'OF', 'SMOKING', 'UP', 'THE', 'CHIMNEY'] +5683-32866-0026-1650: ref=['I', 'BOLDLY', 'LIGHTED', 'MY', 'CHEROOT'] +5683-32866-0026-1650: hyp=['I', 'BOLDLY', 'LIGHTED', 'MY', 'CERUT'] +5683-32866-0027-1651: ref=['A', 'COLD', 'BRIGHT', 'MOON', 'WAS', 'SHINING', 'WITH', 'CLEAR', 'SHARP', 'LIGHTS', 'AND', 'SHADOWS'] +5683-32866-0027-1651: hyp=['A', 'COLD', 'BRIGHT', 'MOON', 'WAS', 'SHINING', 'WITH', 'CLEAR', 'SHARP', 'LIGHTS', 'AND', 'SHADOWS'] +5683-32866-0028-1652: ref=['THE', 'SOMBRE', 'OLD', 'TREES', 'LIKE', 'GIGANTIC', 'HEARSE', 'PLUMES', 'BLACK', 'AND', 'AWFUL'] +5683-32866-0028-1652: hyp=['THE', 'SOMBRE', 'OLD', 'TREES', 'LIKE', 'GIGANTIC', 'HEARSE', 'PLUMES', 'BLACK', 'AND', 'AWFUL'] +5683-32866-0029-1653: ref=['SOMEHOW', 'I', 'HAD', 'GROWN', 'NERVOUS'] +5683-32866-0029-1653: hyp=['SOMEHOW', 'I', 'HAD', 'GROWN', 'NERVOUS'] +5683-32866-0030-1654: ref=['A', 'LITTLE', 'BIT', 'OF', 'PLASTER', 'TUMBLED', 'DOWN', 'THE', 'CHIMNEY', 'AND', 'STARTLED', 'ME', 'CONFOUNDEDLY'] +5683-32866-0030-1654: hyp=['A', 'LITTLE', 'BIT', 'OF', 'PLASTER', 'TUMBLED', 'DOWN', 'THE', 'CHIMNEY', 'AND', 'STARTLED', 'ME', 'CONFOUNDEDLY'] +5683-32879-0000-1655: ref=['IT', 'WAS', 'NOT', 'VERY', 'MUCH', 'PAST', 'ELEVEN', 'THAT', 'MORNING', 'WHEN', 'THE', 'PONY', 'CARRIAGE', 'FROM', 'BRANDON', 'DREW', 'UP', 'BEFORE', 'THE', 'LITTLE', 'GARDEN', 'WICKET', 'OF', "REDMAN'S", 'FARM'] +5683-32879-0000-1655: hyp=['IT', 'WAS', 'NOT', 'VERY', 'MUCH', 'PAST', 'ELEVEN', 'THAT', 'MORNING', 'WHEN', 'THE', 'PONY', 'CARRIAGE', 'FROM', 'BRANDON', 'DREW', 'UP', 'BEFORE', 'THE', 'LITTLE', 'GARDEN', 'WICKET', 'OF', "REDMAN'S", 'FARM'] +5683-32879-0001-1656: ref=['WELL', 'SHE', 'WAS', 'BETTER', 'THOUGH', 'SHE', 'HAD', 'HAD', 'A', 'BAD', 'NIGHT'] +5683-32879-0001-1656: hyp=['WELL', 'SHE', 'WAS', 'BETTER', 'THOUGH', 'SHE', 'HAD', 'HAD', 'A', 'BAD', 'NIGHT'] +5683-32879-0002-1657: ref=['SO', 'THERE', 'CAME', 'A', 'STEP', 'AND', 'A', 'LITTLE', 'RUSTLING', 'OF', 'FEMININE', 'DRAPERIES', 'THE', 'SMALL', 'DOOR', 'OPENED', 'AND', 'RACHEL', 'ENTERED', 'WITH', 'HER', 'HAND', 'EXTENDED', 'AND', 'A', 'PALE', 'SMILE', 'OF', 'WELCOME'] +5683-32879-0002-1657: hyp=['SO', 'THERE', 'CAME', 'A', 'STEP', 'AND', 'A', 'LITTLE', 'RUSTLING', 'OF', 'FEMININE', 'DRAPERIES', 'THE', 'SMALL', 'DOOR', 'OPENED', 'AND', 'RACHEL', 'ENTERED', 'WITH', 'HER', 'HAND', 'EXTENDED', 'AND', 'A', 'PALE', 'SMILE', 'OF', 'WELCOME'] +5683-32879-0003-1658: ref=['WOMEN', 'CAN', 'HIDE', 'THEIR', 'PAIN', 'BETTER', 'THAN', 'WE', 'MEN', 'AND', 'BEAR', 'IT', 'BETTER', 'TOO', 'EXCEPT', 'WHEN', 'SHAME', 'DROPS', 'FIRE', 'INTO', 'THE', 'DREADFUL', 'CHALICE'] +5683-32879-0003-1658: hyp=['WOMEN', 'CAN', 'HIDE', 'THEIR', 'PAIN', 'BETTER', 'THAN', 'WE', 'MEN', 'AND', 'BEAR', 'IT', 'BETTER', 'TOO', 'EXCEPT', 'WHEN', 'SHAME', 'DROPS', 'FIRE', 'INTO', 'THE', 'DREADFUL', 'CHALICE'] +5683-32879-0004-1659: ref=['BUT', 'POOR', 'RACHEL', 'LAKE', 'HAD', 'MORE', 'THAN', 'THAT', 'STOICAL', 'HYPOCRISY', 'WHICH', 'ENABLES', 'THE', 'TORTURED', 'SPIRITS', 'OF', 'HER', 'SEX', 'TO', 'LIFT', 'A', 'PALE', 'FACE', 'THROUGH', 'THE', 'FLAMES', 'AND', 'SMILE'] +5683-32879-0004-1659: hyp=['BUT', 'POOR', 'RACHEL', 'LAKE', 'HAD', 'MORE', 'THAN', 'THAT', 'STOICAL', 'HYPOCRISY', 'WHICH', 'ENABLES', 'THE', 'TORTURED', 'SPIRITS', 'OF', 'HER', 'SEX', 'TO', 'LIFT', 'A', 'PALE', 'FACE', 'THROUGH', 'THE', 'FLAMES', 'AND', 'SMILE'] +5683-32879-0005-1660: ref=['THIS', 'TRANSIENT', 'SPRING', 'AND', 'LIGHTING', 'UP', 'ARE', 'BEAUTIFUL', 'A', 'GLAMOUR', 'BEGUILING', 'OUR', 'SENSES'] +5683-32879-0005-1660: hyp=['THIS', 'TRANSIENT', 'SPRING', 'AND', 'LIGHTING', 'UP', 'ARE', 'BEAUTIFUL', 'A', 'GLAMOUR', 'BEGUILING', 'OUR', 'SENSES'] +5683-32879-0006-1661: ref=['THERE', 'WAS', 'SOMETHING', 'OF', 'SWEETNESS', 'AND', 'FONDNESS', 'IN', 'HER', 'TONES', 'AND', 'MANNER', 'WHICH', 'WAS', 'NEW', 'TO', 'RACHEL', 'AND', 'COMFORTING', 'AND', 'SHE', 'RETURNED', 'THE', 'GREETING', 'AS', 'KINDLY', 'AND', 'FELT', 'MORE', 'LIKE', 'HER', 'FORMER', 'SELF'] +5683-32879-0006-1661: hyp=['THERE', 'WAS', 'SOMETHING', 'OF', 'SWEETNESS', 'AND', 'FONDNESS', 'IN', 'HER', 'TONES', 'AND', 'MANNER', 'WHICH', 'WAS', 'NEW', 'TO', 'RACHEL', 'AND', 'COMFORTING', 'AND', 'SHE', 'RETURNED', 'THE', 'GREETING', 'AS', 'KINDLY', 'AND', 'FELT', 'MORE', 'LIKE', 'HER', 'FORMER', 'SELF'] +5683-32879-0007-1662: ref=["RACHEL'S", 'PALE', 'AND', 'SHARPENED', 'FEATURES', 'AND', 'DILATED', 'EYE', 'STRUCK', 'HER', 'WITH', 'A', 'PAINFUL', 'SURPRISE'] +5683-32879-0007-1662: hyp=["RACHEL'S", 'PALE', 'AND', 'SHARPENED', 'FEATURES', 'AND', 'DILATED', 'EYE', 'STRUCK', 'HER', 'WITH', 'A', 'PAINFUL', 'SURPRISE'] +5683-32879-0008-1663: ref=['YOU', 'HAVE', 'BEEN', 'SO', 'ILL', 'MY', 'POOR', 'RACHEL'] +5683-32879-0008-1663: hyp=['YOU', 'HAVE', 'BEEN', 'SO', 'ILL', 'MY', 'POOR', 'RACHEL'] +5683-32879-0009-1664: ref=['ILL', 'AND', 'TROUBLED', 'DEAR', 'TROUBLED', 'IN', 'MIND', 'AND', 'MISERABLY', 'NERVOUS'] +5683-32879-0009-1664: hyp=['ILL', 'AND', 'TROUBLED', 'DEAR', 'TROUBLED', 'IN', 'MIND', 'AND', 'MISERABLY', 'NERVOUS'] +5683-32879-0010-1665: ref=['POOR', 'RACHEL', 'HER', 'NATURE', 'RECOILED', 'FROM', 'DECEIT', 'AND', 'SHE', 'TOLD', 'AT', 'ALL', 'EVENTS', 'AS', 'MUCH', 'OF', 'THE', 'TRUTH', 'AS', 'SHE', 'DARED'] +5683-32879-0010-1665: hyp=['POOR', 'RACHEL', 'HER', 'NATURE', 'RECOILED', 'FROM', 'DECEIT', 'AND', 'SHE', 'TOLD', 'AT', 'ALL', 'EVENTS', 'AS', 'MUCH', 'OF', 'THE', 'TRUTH', 'AS', 'SHE', 'DARED'] +5683-32879-0011-1666: ref=['SHE', 'SPOKE', 'WITH', 'A', 'SUDDEN', 'ENERGY', 'WHICH', 'PARTOOK', 'OF', 'FEAR', 'AND', 'PASSION', 'AND', 'FLUSHED', 'HER', 'THIN', 'CHEEK', 'AND', 'MADE', 'HER', 'LANGUID', 'EYES', 'FLASH'] +5683-32879-0011-1666: hyp=['SHE', 'SPOKE', 'WITH', 'A', 'SUDDEN', 'ENERGY', 'WHICH', 'PARTOOK', 'OF', 'FEAR', 'AND', 'PASSION', 'AND', 'FLUSHED', 'HER', 'THIN', 'CHEEK', 'AND', 'MADE', 'HER', 'LANGUID', 'EYES', 'FLASH'] +5683-32879-0012-1667: ref=['THANK', 'YOU', 'RACHEL', 'MY', 'COUSIN', 'RACHEL', 'MY', 'ONLY', 'FRIEND'] +5683-32879-0012-1667: hyp=['THANK', 'YOU', 'RACHEL', 'MY', 'COUSIN', 'RACHEL', 'MY', 'ONLY', 'FRIEND'] +5683-32879-0013-1668: ref=['CHELFORD', 'HAD', 'A', 'NOTE', 'FROM', 'MISTER', 'WYLDER', 'THIS', 'MORNING', 'ANOTHER', 'NOTE', 'HIS', 'COMING', 'DELAYED', 'AND', 'SOMETHING', 'OF', 'HIS', 'HAVING', 'TO', 'SEE', 'SOME', 'PERSON', 'WHO', 'IS', 'ABROAD', 'CONTINUED', 'DORCAS', 'AFTER', 'A', 'LITTLE', 'PAUSE'] +5683-32879-0013-1668: hyp=['CHELFORD', 'HAD', 'A', 'NOTE', 'FROM', 'MISTER', 'WYLDER', 'THIS', 'MORNING', 'ANOTHER', 'NOTE', 'HIS', 'COMING', 'DELAYED', 'AND', 'SOMETHING', 'OF', 'HIS', 'HAVING', 'TO', 'SEE', 'SOME', 'PERSON', 'WHO', 'WAS', 'ABROAD', 'CONTINUED', 'DORCAS', 'AFTER', 'A', 'LITTLE', 'PAUSE'] +5683-32879-0014-1669: ref=['YES', 'SOMETHING', 'EVERYTHING', 'SAID', 'RACHEL', 'HURRIEDLY', 'LOOKING', 'FROWNINGLY', 'AT', 'A', 'FLOWER', 'WHICH', 'SHE', 'WAS', 'TWIRLING', 'IN', 'HER', 'FINGERS'] +5683-32879-0014-1669: hyp=['YES', 'SOMETHING', 'EVERYTHING', 'SAID', 'RACHEL', 'HURRIEDLY', 'LOOKING', 'FROWNINGLY', 'AT', 'A', 'FLOWER', 'WHICH', 'SHE', 'WAS', 'TWIRLING', 'IN', 'HER', 'FINGERS'] +5683-32879-0015-1670: ref=['YES', 'SAID', 'RACHEL'] +5683-32879-0015-1670: hyp=['YES', 'SAID', 'RACHEL'] +5683-32879-0016-1671: ref=['AND', 'THE', 'WAN', 'ORACLE', 'HAVING', 'SPOKEN', 'SHE', 'SATE', 'DOWN', 'IN', 'THE', 'SAME', 'SORT', 'OF', 'ABSTRACTION', 'AGAIN', 'BESIDE', 'DORCAS', 'AND', 'SHE', 'LOOKED', 'FULL', 'IN', 'HER', "COUSIN'S", 'EYES'] +5683-32879-0016-1671: hyp=['AND', 'THE', 'WAN', 'ORACLE', 'HAVING', 'SPOKEN', 'SHE', 'SAT', 'DOWN', 'IN', 'THE', 'SAME', 'SORT', 'OF', 'ABSTRACTION', 'AGAIN', 'BESIDE', 'DORCAS', 'AND', 'SHE', 'LOOKED', 'FULL', 'IN', 'HER', "COUSIN'S", 'EYES'] +5683-32879-0017-1672: ref=['OF', 'MARK', 'WYLDER', 'I', 'SAY', 'THIS', 'HIS', 'NAME', 'HAS', 'BEEN', 'FOR', 'YEARS', 'HATEFUL', 'TO', 'ME', 'AND', 'RECENTLY', 'IT', 'HAS', 'BECOME', 'FRIGHTFUL', 'AND', 'YOU', 'WILL', 'PROMISE', 'ME', 'SIMPLY', 'THIS', 'THAT', 'YOU', 'WILL', 'NEVER', 'ASK', 'ME', 'TO', 'SPEAK', 'AGAIN', 'ABOUT', 'HIM'] +5683-32879-0017-1672: hyp=['OF', 'MARK', 'WYLDER', 'I', 'SAY', 'THIS', 'HIS', 'NAME', 'HAS', 'BEEN', 'FOR', 'YEARS', 'HATEFUL', 'TO', 'ME', 'AND', 'RECENTLY', 'IT', 'HAS', 'BECOME', 'FRIGHTFUL', 'AND', 'YOU', 'WILL', 'PROMISE', 'ME', 'SIMPLY', 'THIS', 'THAT', 'YOU', 'WILL', 'NEVER', 'ASK', 'ME', 'TO', 'SPEAK', 'AGAIN', 'ABOUT', 'HIM'] +5683-32879-0018-1673: ref=['IT', 'IS', 'AN', 'ANTIPATHY', 'AN', 'ANTIPATHY', 'I', 'CANNOT', 'GET', 'OVER', 'DEAR', 'DORCAS', 'YOU', 'MAY', 'THINK', 'IT', 'A', 'MADNESS', 'BUT', "DON'T", 'BLAME', 'ME'] +5683-32879-0018-1673: hyp=['IT', 'IS', 'AN', 'ANTIPATHY', 'AN', 'ANTIPATHY', 'I', 'CANNOT', 'GET', 'OVER', 'DEAR', 'DORCAS', 'YOU', 'MAY', 'THINK', 'IT', 'A', 'MADNESS', 'BUT', "DON'T", 'BLAME', 'ME'] +5683-32879-0019-1674: ref=['I', 'HAVE', 'VERY', 'FEW', 'TO', 'LOVE', 'ME', 'NOW', 'AND', 'I', 'THOUGHT', 'YOU', 'MIGHT', 'LOVE', 'ME', 'AS', 'I', 'HAVE', 'BEGUN', 'TO', 'LOVE', 'YOU'] +5683-32879-0019-1674: hyp=['I', 'HAVE', 'VERY', 'FEW', 'TO', 'LOVE', 'ME', 'NOW', 'AND', 'I', 'THOUGHT', 'YOU', 'MIGHT', 'LOVE', 'ME', 'AS', 'I', 'HAVE', 'BEGUN', 'TO', 'LOVE', 'YOU'] +5683-32879-0020-1675: ref=['AND', 'SHE', 'THREW', 'HER', 'ARMS', 'ROUND', 'HER', "COUSIN'S", 'NECK', 'AND', 'BRAVE', 'RACHEL', 'AT', 'LAST', 'BURST', 'INTO', 'TEARS'] +5683-32879-0020-1675: hyp=['AND', 'SHE', 'THREW', 'HER', 'ARMS', 'ROUND', 'HER', "COUSIN'S", 'NECK', 'AND', 'BRAVE', 'RACHEL', 'AT', 'LAST', 'BURST', 'INTO', 'TEARS'] +5683-32879-0021-1676: ref=['DORCAS', 'IN', 'HER', 'STRANGE', 'WAY', 'WAS', 'MOVED'] +5683-32879-0021-1676: hyp=['DORCAS', 'IN', 'HER', 'STRANGE', 'WAY', 'WAS', 'MOVED'] +5683-32879-0022-1677: ref=['I', 'LIKE', 'YOU', 'STILL', 'RACHEL', "I'M", 'SURE', "I'LL", 'ALWAYS', 'LIKE', 'YOU'] +5683-32879-0022-1677: hyp=['I', 'LIKE', 'YOU', 'STILL', 'RACHEL', "I'M", 'SURE', "I'LL", 'ALWAYS', 'LIKE', 'YOU'] +5683-32879-0023-1678: ref=['YOU', 'RESEMBLE', 'ME', 'RACHEL', 'YOU', 'ARE', 'FEARLESS', 'AND', 'INFLEXIBLE', 'AND', 'GENEROUS'] +5683-32879-0023-1678: hyp=['YOU', 'RESEMBLE', 'ME', 'RACHEL', 'YOU', 'ARE', 'FEARLESS', 'AND', 'INFLEXIBLE', 'AND', 'GENEROUS'] +5683-32879-0024-1679: ref=['YES', 'RACHEL', 'I', 'DO', 'LOVE', 'YOU'] +5683-32879-0024-1679: hyp=['YES', 'RACHEL', 'I', 'DO', 'LOVE', 'YOU'] +5683-32879-0025-1680: ref=['THANK', 'YOU', 'DORCAS', 'DEAR'] +5683-32879-0025-1680: hyp=['THANK', 'YOU', 'DORCAS', 'DEAR'] +61-70968-0000-1681: ref=['HE', 'BEGAN', 'A', 'CONFUSED', 'COMPLAINT', 'AGAINST', 'THE', 'WIZARD', 'WHO', 'HAD', 'VANISHED', 'BEHIND', 'THE', 'CURTAIN', 'ON', 'THE', 'LEFT'] +61-70968-0000-1681: hyp=['HE', 'BEGAN', 'A', 'CONFUSED', 'COMPLAINT', 'AGAINST', 'THE', 'WIZARD', 'WHO', 'HAD', 'VANISHED', 'BEHIND', 'THE', 'CURTAIN', 'ON', 'THE', 'LEFT'] +61-70968-0001-1682: ref=['GIVE', 'NOT', 'SO', 'EARNEST', 'A', 'MIND', 'TO', 'THESE', 'MUMMERIES', 'CHILD'] +61-70968-0001-1682: hyp=['GIVE', 'NOT', 'SO', 'EARNEST', 'A', 'MIND', 'TO', 'THESE', 'MEMORIES', 'CHILD'] +61-70968-0002-1683: ref=['A', 'GOLDEN', 'FORTUNE', 'AND', 'A', 'HAPPY', 'LIFE'] +61-70968-0002-1683: hyp=['A', 'GOLDEN', 'FORTUNE', 'AND', 'A', 'HAPPY', 'LIFE'] +61-70968-0003-1684: ref=['HE', 'WAS', 'LIKE', 'UNTO', 'MY', 'FATHER', 'IN', 'A', 'WAY', 'AND', 'YET', 'WAS', 'NOT', 'MY', 'FATHER'] +61-70968-0003-1684: hyp=['HE', 'WAS', 'LIKE', 'UNTO', 'MY', 'FATHER', 'IN', 'A', 'WAY', 'AND', 'YET', 'WAS', 'NOT', 'MY', 'FATHER'] +61-70968-0004-1685: ref=['ALSO', 'THERE', 'WAS', 'A', 'STRIPLING', 'PAGE', 'WHO', 'TURNED', 'INTO', 'A', 'MAID'] +61-70968-0004-1685: hyp=['ALSO', 'THERE', 'WAS', 'A', 'STRIPLING', 'PAGE', 'WHO', 'TURNED', 'INTO', 'A', 'MAID'] +61-70968-0005-1686: ref=['THIS', 'WAS', 'SO', 'SWEET', 'A', 'LADY', 'SIR', 'AND', 'IN', 'SOME', 'MANNER', 'I', 'DO', 'THINK', 'SHE', 'DIED'] +61-70968-0005-1686: hyp=['THIS', 'WAS', 'SO', 'SWEET', 'A', 'LADY', 'SIR', 'AND', 'IN', 'SOME', 'MANNER', 'I', 'DO', 'THINK', 'SHE', 'DIED'] +61-70968-0006-1687: ref=['BUT', 'THEN', 'THE', 'PICTURE', 'WAS', 'GONE', 'AS', 'QUICKLY', 'AS', 'IT', 'CAME'] +61-70968-0006-1687: hyp=['BUT', 'THEN', 'THE', 'PICTURE', 'WAS', 'GONE', 'AS', 'QUICKLY', 'AS', 'IT', 'CAME'] +61-70968-0007-1688: ref=['SISTER', 'NELL', 'DO', 'YOU', 'HEAR', 'THESE', 'MARVELS'] +61-70968-0007-1688: hyp=['SISTER', 'NELL', 'DO', 'YOU', 'HEAR', 'THESE', 'MARVELS'] +61-70968-0008-1689: ref=['TAKE', 'YOUR', 'PLACE', 'AND', 'LET', 'US', 'SEE', 'WHAT', 'THE', 'CRYSTAL', 'CAN', 'SHOW', 'TO', 'YOU'] +61-70968-0008-1689: hyp=['TAKE', 'YOUR', 'PLACE', 'AND', 'LET', 'US', 'SEE', 'WHAT', 'THE', 'CRYSTAL', 'CAN', 'SHOW', 'TO', 'YOU'] +61-70968-0009-1690: ref=['LIKE', 'AS', 'NOT', 'YOUNG', 'MASTER', 'THOUGH', 'I', 'AM', 'AN', 'OLD', 'MAN'] +61-70968-0009-1690: hyp=['LIKE', 'AS', 'NOT', 'YOUNG', 'MASTER', 'THOUGH', 'I', 'AM', 'AN', 'OLD', 'MAN'] +61-70968-0010-1691: ref=['FORTHWITH', 'ALL', 'RAN', 'TO', 'THE', 'OPENING', 'OF', 'THE', 'TENT', 'TO', 'SEE', 'WHAT', 'MIGHT', 'BE', 'AMISS', 'BUT', 'MASTER', 'WILL', 'WHO', 'PEEPED', 'OUT', 'FIRST', 'NEEDED', 'NO', 'MORE', 'THAN', 'ONE', 'GLANCE'] +61-70968-0010-1691: hyp=['FORTHWITH', 'ALL', 'RAN', 'TO', 'THE', 'OPENING', 'OF', 'THE', 'TENT', 'TO', 'SEE', 'WHAT', 'MIGHT', 'BE', 'AMISS', 'BUT', 'MASTER', 'WILL', 'WHO', 'PEEPED', 'OUT', 'FIRST', 'NEEDED', 'NO', 'MORE', 'THAN', 'ONE', 'GLANCE'] +61-70968-0011-1692: ref=['HE', 'GAVE', 'WAY', 'TO', 'THE', 'OTHERS', 'VERY', 'READILY', 'AND', 'RETREATED', 'UNPERCEIVED', 'BY', 'THE', 'SQUIRE', 'AND', 'MISTRESS', 'FITZOOTH', 'TO', 'THE', 'REAR', 'OF', 'THE', 'TENT'] +61-70968-0011-1692: hyp=['HE', 'GAVE', 'WAY', 'TO', 'THE', 'OTHERS', 'VERY', 'READILY', 'AND', 'RETREATED', 'UNPERCEIVED', 'BY', 'THE', 'SQUIRE', 'AND', 'MISTRESS', 'FITZOOTH', 'TO', 'THE', 'REAR', 'OF', 'THE', 'TENT'] +61-70968-0012-1693: ref=['CRIES', 'OF', 'A', 'NOTTINGHAM', 'A', 'NOTTINGHAM'] +61-70968-0012-1693: hyp=['CRIES', 'OF', 'A', 'NOTTINGHAM', 'A', 'NOTTINGHAM'] +61-70968-0013-1694: ref=['BEFORE', 'THEM', 'FLED', 'THE', 'STROLLER', 'AND', 'HIS', 'THREE', 'SONS', 'CAPLESS', 'AND', 'TERRIFIED'] +61-70968-0013-1694: hyp=['BEFORE', 'THEM', 'FLED', 'THE', 'STROLLER', 'AND', 'HIS', 'THREE', 'SONS', 'CAPLESS', 'AND', 'TERRIFIED'] +61-70968-0014-1695: ref=['WHAT', 'IS', 'THE', 'TUMULT', 'AND', 'RIOTING', 'CRIED', 'OUT', 'THE', 'SQUIRE', 'AUTHORITATIVELY', 'AND', 'HE', 'BLEW', 'TWICE', 'ON', 'A', 'SILVER', 'WHISTLE', 'WHICH', 'HUNG', 'AT', 'HIS', 'BELT'] +61-70968-0014-1695: hyp=['WHAT', 'IS', 'THE', 'TUMULT', 'AND', 'RIOTING', 'CRIED', 'OUT', 'THE', 'SQUIRE', 'AUTHORITATIVELY', 'AND', 'HE', 'BLEW', 'TWICE', 'ON', 'THE', 'SILVER', 'WHISTLE', 'WHICH', 'HUNG', 'AT', 'HIS', 'BELT'] +61-70968-0015-1696: ref=['NAY', 'WE', 'REFUSED', 'THEIR', 'REQUEST', 'MOST', 'POLITELY', 'MOST', 'NOBLE', 'SAID', 'THE', 'LITTLE', 'STROLLER'] +61-70968-0015-1696: hyp=['NAY', 'WE', 'REFUSE', 'THEIR', 'REQUEST', 'MOST', 'POLITELY', 'MOST', 'NOBLE', 'SAID', 'THE', 'LITTLE', 'STROLLER'] +61-70968-0016-1697: ref=['AND', 'THEN', 'THEY', 'BECAME', 'VEXED', 'AND', 'WOULD', 'HAVE', 'SNATCHED', 'YOUR', 'PURSE', 'FROM', 'US'] +61-70968-0016-1697: hyp=['AND', 'THEN', 'THEY', 'BECAME', 'VEXED', 'AND', 'WOULD', 'HAVE', 'SNATCHED', 'YOUR', 'PURSE', 'FROM', 'US'] +61-70968-0017-1698: ref=['I', 'COULD', 'NOT', 'SEE', 'MY', 'BOY', 'INJURED', 'EXCELLENCE', 'FOR', 'BUT', 'DOING', 'HIS', 'DUTY', 'AS', 'ONE', 'OF', "CUMBERLAND'S", 'SONS'] +61-70968-0017-1698: hyp=['I', 'COULD', 'NOT', 'SEE', 'MY', 'BOY', 'INJURED', 'EXCELLENCE', 'FOR', 'BUT', 'DOING', 'HIS', 'DUTY', 'AS', 'ONE', 'OF', "CUMBERLAND'S", 'SONS'] +61-70968-0018-1699: ref=['SO', 'I', 'DID', 'PUSH', 'THIS', 'FELLOW'] +61-70968-0018-1699: hyp=['SO', 'I', 'DID', 'PUSH', 'THIS', 'FELLOW'] +61-70968-0019-1700: ref=['IT', 'IS', 'ENOUGH', 'SAID', 'GEORGE', 'GAMEWELL', 'SHARPLY', 'AND', 'HE', 'TURNED', 'UPON', 'THE', 'CROWD'] +61-70968-0019-1700: hyp=['IT', 'IS', 'ENOUGH', 'SAID', 'GEORGE', 'GAMEWELL', 'SHARPLY', 'AS', 'HE', 'TURNED', 'UPON', 'THE', 'CROWD'] +61-70968-0020-1701: ref=['SHAME', 'ON', 'YOU', 'CITIZENS', 'CRIED', 'HE', 'I', 'BLUSH', 'FOR', 'MY', 'FELLOWS', 'OF', 'NOTTINGHAM'] +61-70968-0020-1701: hyp=['SHAME', 'ON', 'YOU', 'CITIZENS', 'CRIED', 'HE', 'I', 'BLUSH', 'FOR', 'MY', 'FELLOWS', 'OF', 'NOTTINGHAM'] +61-70968-0021-1702: ref=['SURELY', 'WE', 'CAN', 'SUBMIT', 'WITH', 'GOOD', 'GRACE'] +61-70968-0021-1702: hyp=['SURELY', 'WE', 'CAN', 'SUBMIT', 'WITH', 'GOOD', 'GRACE'] +61-70968-0022-1703: ref=['TIS', 'FINE', 'FOR', 'YOU', 'TO', 'TALK', 'OLD', 'MAN', 'ANSWERED', 'THE', 'LEAN', 'SULLEN', 'APPRENTICE'] +61-70968-0022-1703: hyp=['TIS', 'FINE', 'FOR', 'YOU', 'TO', 'TALK', 'OLD', 'MAN', 'ANSWERED', 'THE', 'LEAN', 'SULLEN', 'APPRENTICE'] +61-70968-0023-1704: ref=['BUT', 'I', 'WRESTLED', 'WITH', 'THIS', 'FELLOW', 'AND', 'DO', 'KNOW', 'THAT', 'HE', 'PLAYED', 'UNFAIRLY', 'IN', 'THE', 'SECOND', 'BOUT'] +61-70968-0023-1704: hyp=['BUT', 'I', 'WRESTLED', 'WITH', 'THIS', 'FELLOW', 'AND', 'DO', 'KNOW', 'THAT', 'HE', 'PLAYED', 'UNFAIRLY', 'IN', 'THE', 'SECOND', 'BOUT'] +61-70968-0024-1705: ref=['SPOKE', 'THE', 'SQUIRE', 'LOSING', 'ALL', 'PATIENCE', 'AND', 'IT', 'WAS', 'TO', 'YOU', 'THAT', 'I', 'GAVE', 'ANOTHER', 'PURSE', 'IN', 'CONSOLATION'] +61-70968-0024-1705: hyp=['SPOKE', 'THE', 'SQUIRE', 'LOSING', 'ALL', 'PATIENCE', 'AND', 'IT', 'WAS', 'TO', 'YOU', 'THAT', 'I', 'GAVE', 'ANOTHER', 'PERSON', 'CONSOLATION'] +61-70968-0025-1706: ref=['COME', 'TO', 'ME', 'MEN', 'HERE', 'HERE', 'HE', 'RAISED', 'HIS', 'VOICE', 'STILL', 'LOUDER'] +61-70968-0025-1706: hyp=['COME', 'TO', 'ME', 'MEN', 'HERE', 'HERE', 'HE', 'RAISED', 'HIS', 'VOICE', 'STILL', 'LOUDER'] +61-70968-0026-1707: ref=['THE', 'STROLLERS', 'TOOK', 'THEIR', 'PART', 'IN', 'IT', 'WITH', 'HEARTY', 'ZEST', 'NOW', 'THAT', 'THEY', 'HAD', 'SOME', 'CHANCE', 'OF', 'BEATING', 'OFF', 'THEIR', 'FOES'] +61-70968-0026-1707: hyp=['THE', 'STROLLERS', 'TOOK', 'THEIR', 'PART', 'IN', 'IT', 'WITH', 'HEARTY', 'ZEST', 'NOW', 'THAT', 'THEY', 'HAD', 'SOME', 'CHANCE', 'OF', 'BEATING', 'OFF', 'THEIR', 'FOES'] +61-70968-0027-1708: ref=['ROBIN', 'AND', 'THE', 'LITTLE', 'TUMBLER', 'BETWEEN', 'THEM', 'TRIED', 'TO', 'FORCE', 'THE', 'SQUIRE', 'TO', 'STAND', 'BACK', 'AND', 'VERY', 'VALIANTLY', 'DID', 'THESE', 'TWO', 'COMPORT', 'THEMSELVES'] +61-70968-0027-1708: hyp=['ROBIN', 'AND', 'THE', 'LITTLE', 'TUMBLER', 'BETWEEN', 'THEM', 'TRIED', 'TO', 'FORCE', 'THE', 'SQUIRE', 'TO', 'STAND', 'BACK', 'AND', 'VERY', 'VALIANTLY', 'DID', 'THESE', 'TWO', 'COMPORT', 'THEMSELVES'] +61-70968-0028-1709: ref=['THE', 'HEAD', 'AND', 'CHIEF', 'OF', 'THE', 'RIOT', 'THE', 'NOTTINGHAM', 'APPRENTICE', 'WITH', 'CLENCHED', 'FISTS', 'THREATENED', 'MONTFICHET'] +61-70968-0028-1709: hyp=['THE', 'HEAD', 'IN', 'CHIEF', 'OF', 'THE', 'RIOT', 'THE', 'NOTTINGHAM', 'APPRENTICE', 'WITH', 'CLENCHED', 'FISTS', 'THREATENED', 'MONTFICHET'] +61-70968-0029-1710: ref=['THE', 'SQUIRE', 'HELPED', 'TO', 'THRUST', 'THEM', 'ALL', 'IN', 'AND', 'ENTERED', 'SWIFTLY', 'HIMSELF'] +61-70968-0029-1710: hyp=['THE', 'SQUIRE', 'HELPED', 'TO', 'THRUST', 'THEM', 'ALL', 'IN', 'AND', 'ENTERED', 'SWIFTLY', 'HIMSELF'] +61-70968-0030-1711: ref=['NOW', 'BE', 'SILENT', 'ON', 'YOUR', 'LIVES', 'HE', 'BEGAN', 'BUT', 'THE', 'CAPTURED', 'APPRENTICE', 'SET', 'UP', 'AN', 'INSTANT', 'SHOUT'] +61-70968-0030-1711: hyp=['NOW', 'BE', 'SILENT', 'ON', 'YOUR', 'LIVES', 'HE', 'BEGAN', 'BUT', 'THE', 'CAPTURED', 'APPRENTICE', 'SET', 'UP', 'AN', 'INSTANT', 'SHOUT'] +61-70968-0031-1712: ref=['SILENCE', 'YOU', 'KNAVE', 'CRIED', 'MONTFICHET'] +61-70968-0031-1712: hyp=['SILENCE', 'YOU', 'KNAVE', 'CRIED', 'MONTFICHET'] +61-70968-0032-1713: ref=['HE', 'FELT', 'FOR', 'AND', 'FOUND', 'THE', "WIZARD'S", 'BLACK', 'CLOTH', 'THE', 'SQUIRE', 'WAS', 'QUITE', 'OUT', 'OF', 'BREATH'] +61-70968-0032-1713: hyp=['HE', 'FELT', 'FOR', 'AND', 'FOUND', 'THE', "WIZARD'S", 'BLACK', 'CLOTH', 'THE', 'SQUIRE', 'WAS', 'QUITE', 'OUT', 'OF', 'BREATH'] +61-70968-0033-1714: ref=['THRUSTING', 'OPEN', 'THE', 'PROPER', 'ENTRANCE', 'OF', 'THE', 'TENT', 'ROBIN', 'SUDDENLY', 'RUSHED', 'FORTH', 'WITH', 'HIS', 'BURDEN', 'WITH', 'A', 'GREAT', 'SHOUT'] +61-70968-0033-1714: hyp=['THRUSTING', 'OPEN', 'THE', 'PROPER', 'ENTRANCE', 'OF', 'THE', 'TENT', 'ROBIN', 'SUDDENLY', 'RUSHED', 'FORTH', 'WITH', 'HIS', 'BURDEN', 'WITH', 'A', 'GREAT', 'SHOUT'] +61-70968-0034-1715: ref=['A', 'MONTFICHET', 'A', 'MONTFICHET', 'GAMEWELL', 'TO', 'THE', 'RESCUE'] +61-70968-0034-1715: hyp=['A', 'MONTFICHET', 'A', 'MONTFICHET', 'GAMEWELL', 'TO', 'THE', 'RESCUE'] +61-70968-0035-1716: ref=['TAKING', 'ADVANTAGE', 'OF', 'THIS', 'THE', "SQUIRE'S", 'FEW', 'MEN', 'REDOUBLED', 'THEIR', 'EFFORTS', 'AND', 'ENCOURAGED', 'BY', "ROBIN'S", 'AND', 'THE', 'LITTLE', "STROLLER'S", 'CRIES', 'FOUGHT', 'THEIR', 'WAY', 'TO', 'HIM'] +61-70968-0035-1716: hyp=['TAKING', 'ADVANTAGE', 'OF', 'THIS', 'THE', "SQUIRE'S", 'FEW', 'MEN', 'REDOUBLED', 'THEIR', 'EFFORTS', 'AND', 'ENCOURAGED', 'BY', "ROBIN'S", 'AND', 'THE', 'LITTLE', "STROLLER'S", 'CRIES', 'FOUGHT', 'THEIR', 'WAY', 'TO', 'HIM'] +61-70968-0036-1717: ref=['GEORGE', 'MONTFICHET', 'WILL', 'NEVER', 'FORGET', 'THIS', 'DAY'] +61-70968-0036-1717: hyp=['GEORGE', 'MONTFICHET', 'WILL', 'NEVER', 'FORGET', 'THIS', 'DAY'] +61-70968-0037-1718: ref=['WHAT', 'IS', 'YOUR', 'NAME', 'LORDING', 'ASKED', 'THE', 'LITTLE', 'STROLLER', 'PRESENTLY'] +61-70968-0037-1718: hyp=['WHAT', 'IS', 'YOUR', 'NAME', 'LORDING', 'ASKED', 'THE', 'LITTLE', 'STROLLER', 'PRESENTLY'] +61-70968-0038-1719: ref=['ROBIN', 'FITZOOTH'] +61-70968-0038-1719: hyp=['ROBIN', 'FITZOOTH'] +61-70968-0039-1720: ref=['AND', 'MINE', 'IS', 'WILL', 'STUTELEY', 'SHALL', 'WE', 'BE', 'COMRADES'] +61-70968-0039-1720: hyp=['AND', 'MINE', 'IS', 'WILL', 'STUTELEY', 'SHALL', 'WE', 'BE', 'COMRADES'] +61-70968-0040-1721: ref=['RIGHT', 'WILLINGLY', 'FOR', 'BETWEEN', 'US', 'WE', 'HAVE', 'WON', 'THE', 'BATTLE', 'ANSWERED', 'ROBIN'] +61-70968-0040-1721: hyp=['RIGHT', 'WILLINGLY', 'FOR', 'BETWEEN', 'US', 'WE', 'HAVE', 'WON', 'THE', 'BATTLE', 'ANSWERED', 'ROBIN'] +61-70968-0041-1722: ref=['I', 'LIKE', 'YOU', 'WILL', 'YOU', 'ARE', 'THE', 'SECOND', 'WILL', 'THAT', 'I', 'HAVE', 'MET', 'AND', 'LIKED', 'WITHIN', 'TWO', 'DAYS', 'IS', 'THERE', 'A', 'SIGN', 'IN', 'THAT'] +61-70968-0041-1722: hyp=['I', 'LIKE', 'YOU', 'WILL', 'YOU', 'ARE', 'THE', 'SECOND', 'WILL', 'THAT', 'I', 'HAVE', 'MET', 'AND', 'LIKED', 'WITHIN', 'TWO', 'DAYS', 'IS', 'THERE', 'A', 'SIGN', 'IN', 'THAT'] +61-70968-0042-1723: ref=['MONTFICHET', 'CALLED', 'OUT', 'FOR', 'ROBIN', 'TO', 'GIVE', 'HIM', 'AN', 'ARM'] +61-70968-0042-1723: hyp=['MONTFICHET', 'CALLED', 'OUT', 'FOR', 'ROBIN', 'TO', 'GIVE', 'HIM', 'AN', 'ARM'] +61-70968-0043-1724: ref=['FRIENDS', 'SAID', 'MONTFICHET', 'FAINTLY', 'TO', 'THE', 'WRESTLERS', 'BEAR', 'US', 'ESCORT', 'SO', 'FAR', 'AS', 'THE', "SHERIFF'S", 'HOUSE'] +61-70968-0043-1724: hyp=['FRIENDS', 'SAID', 'MONTFICHET', 'FAINTLY', 'TO', 'THE', 'WRESTLERS', 'BEAR', 'US', 'ESCORT', 'SO', 'FAR', 'AS', 'THE', "SHERIFF'S", 'HOUSE'] +61-70968-0044-1725: ref=['IT', 'WILL', 'NOT', 'BE', 'SAFE', 'FOR', 'YOU', 'TO', 'STAY', 'HERE', 'NOW'] +61-70968-0044-1725: hyp=['IT', 'WILL', 'NOT', 'BE', 'SAFE', 'FOR', 'YOU', 'TO', 'STAY', 'HERE', 'NOW'] +61-70968-0045-1726: ref=['PRAY', 'FOLLOW', 'US', 'WITH', 'MINE', 'AND', 'MY', 'LORD', "SHERIFF'S", 'MEN'] +61-70968-0045-1726: hyp=['PRAY', 'FOLLOW', 'US', 'WITH', 'MINE', 'AND', 'MY', 'LORD', "SHERIFF'S", 'MEN'] +61-70968-0046-1727: ref=['NOTTINGHAM', 'CASTLE', 'WAS', 'REACHED', 'AND', 'ADMITTANCE', 'WAS', 'DEMANDED'] +61-70968-0046-1727: hyp=['NOTTINGHAM', 'CASTLE', 'WAS', 'REACHED', 'AND', 'ADMITTANCE', 'WAS', 'DEMANDED'] +61-70968-0047-1728: ref=['MASTER', 'MONCEUX', 'THE', 'SHERIFF', 'OF', 'NOTTINGHAM', 'WAS', 'MIGHTILY', 'PUT', 'ABOUT', 'WHEN', 'TOLD', 'OF', 'THE', 'RIOTING'] +61-70968-0047-1728: hyp=['MASTER', 'MONCEUX', 'THE', 'SHERIFF', 'OF', 'NOTTINGHAM', 'WAS', 'MIGHTILY', 'PUT', 'ABOUT', 'WHEN', 'TOLD', 'OF', 'THE', 'RIOTING'] +61-70968-0048-1729: ref=['AND', 'HENRY', 'MIGHT', 'RETURN', 'TO', 'ENGLAND', 'AT', 'ANY', 'MOMENT'] +61-70968-0048-1729: hyp=['AND', 'HENRY', 'MIGHT', 'RETURN', 'TO', 'ENGLAND', 'AT', 'ANY', 'MOMENT'] +61-70968-0049-1730: ref=['HAVE', 'YOUR', 'WILL', 'CHILD', 'IF', 'THE', 'BOY', 'ALSO', 'WILLS', 'IT', 'MONTFICHET', 'ANSWERED', 'FEELING', 'TOO', 'ILL', 'TO', 'OPPOSE', 'ANYTHING', 'VERY', 'STRONGLY', 'JUST', 'THEN'] +61-70968-0049-1730: hyp=['HAVE', 'YOUR', 'WILL', 'CHILD', 'IF', 'THE', 'BOY', 'ALSO', 'WILLS', 'IT', 'MONTFICHET', 'ANSWERED', 'FEELING', 'TOO', 'ILL', 'TO', 'OPPOSE', 'ANYTHING', 'VERY', 'STRONGLY', 'JUST', 'THEN'] +61-70968-0050-1731: ref=['HE', 'MADE', 'AN', 'EFFORT', 'TO', 'HIDE', 'HIS', 'CONDITION', 'FROM', 'THEM', 'ALL', 'AND', 'ROBIN', 'FELT', 'HIS', 'FINGERS', 'TIGHTEN', 'UPON', 'HIS', 'ARM'] +61-70968-0050-1731: hyp=['HE', 'MADE', 'AN', 'EFFORT', 'TO', 'HIDE', 'HIS', 'CONDITION', 'FROM', 'THEM', 'ALL', 'AND', 'ROBIN', 'FELT', 'HIS', 'FINGERS', 'TIGHTEN', 'UPON', 'HIS', 'ARM'] +61-70968-0051-1732: ref=['BEG', 'ME', 'A', 'ROOM', 'OF', 'THE', 'SHERIFF', 'CHILD', 'QUICKLY'] +61-70968-0051-1732: hyp=['BEG', 'ME', 'A', 'ROOM', 'OF', 'THE', 'SHERIFF', 'CHILD', 'QUICKLY'] +61-70968-0052-1733: ref=['BUT', 'WHO', 'IS', 'THIS', 'FELLOW', 'PLUCKING', 'AT', 'YOUR', 'SLEEVE'] +61-70968-0052-1733: hyp=['BUT', 'WHO', 'IS', 'THIS', 'FELLOW', 'PLUCKING', 'AT', 'YOUR', 'SLEEVE'] +61-70968-0053-1734: ref=['HE', 'IS', 'MY', 'ESQUIRE', 'EXCELLENCY', 'RETURNED', 'ROBIN', 'WITH', 'DIGNITY'] +61-70968-0053-1734: hyp=['HE', 'IS', 'MY', 'ESQUIRE', 'EXCELLENCY', 'RETURNED', 'ROBIN', 'WITH', 'DIGNITY'] +61-70968-0054-1735: ref=['MISTRESS', 'FITZOOTH', 'HAD', 'BEEN', 'CARRIED', 'OFF', 'BY', 'THE', "SHERIFF'S", 'DAUGHTER', 'AND', 'HER', 'MAIDS', 'AS', 'SOON', 'AS', 'THEY', 'HAD', 'ENTERED', 'THE', 'HOUSE', 'SO', 'THAT', 'ROBIN', 'ALONE', 'HAD', 'THE', 'CARE', 'OF', 'MONTFICHET'] +61-70968-0054-1735: hyp=['MISTRESS', 'FITZOOTH', 'HAD', 'BEEN', 'CARRIED', 'OFF', 'BY', 'THE', "SHERIFF'S", 'DAUGHTER', 'AND', 'HER', 'MAIDS', 'AS', 'SOON', 'AS', 'THEY', 'ENTERED', 'THE', 'HOUSE', 'SO', 'THAT', 'ROBIN', 'ALONE', 'HAD', 'THE', 'CARE', 'OF', 'MONTFICHET'] +61-70968-0055-1736: ref=['ROBIN', 'WAS', 'GLAD', 'WHEN', 'AT', 'LENGTH', 'THEY', 'WERE', 'LEFT', 'TO', 'THEIR', 'OWN', 'DEVICES'] +61-70968-0055-1736: hyp=['ROBIN', 'WAS', 'GLAD', 'WHEN', 'AT', 'LENGTH', 'THEY', 'WERE', 'LEFT', 'TO', 'THEIR', 'OWN', 'DEVICES'] +61-70968-0056-1737: ref=['THE', 'WINE', 'DID', 'CERTAINLY', 'BRING', 'BACK', 'THE', 'COLOR', 'TO', 'THE', "SQUIRE'S", 'CHEEKS'] +61-70968-0056-1737: hyp=['THE', 'WINE', 'DID', 'CERTAINLY', 'BRING', 'BACK', 'THE', 'COLOR', 'TO', 'THE', "SQUIRE'S", 'CHEEKS'] +61-70968-0057-1738: ref=['THESE', 'ESCAPADES', 'ARE', 'NOT', 'FOR', 'OLD', 'GAMEWELL', 'LAD', 'HIS', 'DAY', 'HAS', 'COME', 'TO', 'TWILIGHT'] +61-70968-0057-1738: hyp=['THESE', 'ESCAPADES', 'ARE', 'NOT', 'FOR', 'OLD', 'GAMEWELL', 'LAD', 'HIS', 'DAY', 'HAS', 'COME', 'TO', 'TWILIGHT'] +61-70968-0058-1739: ref=['WILL', 'YOU', 'FORGIVE', 'ME', 'NOW'] +61-70968-0058-1739: hyp=['WILL', 'YOU', 'FORGIVE', 'ME', 'NOW'] +61-70968-0059-1740: ref=['IT', 'WILL', 'BE', 'NO', 'DISAPPOINTMENT', 'TO', 'ME'] +61-70968-0059-1740: hyp=["IT'LL", 'BE', 'NO', 'DISAPPOINTMENT', 'TO', 'ME'] +61-70968-0060-1741: ref=['NO', 'THANKS', 'I', 'AM', 'GLAD', 'TO', 'GIVE', 'YOU', 'SUCH', 'EASY', 'HAPPINESS'] +61-70968-0060-1741: hyp=['NO', 'THANKS', 'I', 'AM', 'GLAD', 'TO', 'GIVE', 'YOU', 'SUCH', 'EASY', 'HAPPINESS'] +61-70968-0061-1742: ref=['YOU', 'ARE', 'A', 'WORTHY', 'LEECH', 'WILL', 'PRESENTLY', 'WHISPERED', 'ROBIN', 'THE', 'WINE', 'HAS', 'WORKED', 'A', 'MARVEL'] +61-70968-0061-1742: hyp=['YOU', 'ARE', 'A', 'WORTHY', 'LEECH', 'WILL', 'PRESENTLY', 'WHISPERED', 'ROBIN', 'THE', 'WINE', 'HAS', 'WORKED', 'A', 'MARVEL'] +61-70968-0062-1743: ref=['AY', 'AND', 'SHOW', 'YOU', 'SOME', 'PRETTY', 'TRICKS'] +61-70968-0062-1743: hyp=['AYE', 'AND', 'SHOW', 'YOU', 'SOME', 'PRETTY', 'TRICKS'] +61-70970-0000-1744: ref=['YOUNG', 'FITZOOTH', 'HAD', 'BEEN', 'COMMANDED', 'TO', 'HIS', "MOTHER'S", 'CHAMBER', 'SO', 'SOON', 'AS', 'HE', 'HAD', 'COME', 'OUT', 'FROM', 'HIS', 'CONVERSE', 'WITH', 'THE', 'SQUIRE'] +61-70970-0000-1744: hyp=['YOUNG', 'FITZOOTH', 'HAD', 'BEEN', 'COMMANDED', 'TO', 'HIS', "MOTHER'S", 'CHAMBER', 'SO', 'SOON', 'AS', 'HE', 'HAD', 'COME', 'OUT', 'FROM', 'HIS', 'CONVERSE', 'WITH', 'THE', 'SQUIRE'] +61-70970-0001-1745: ref=['THERE', 'BEFELL', 'AN', 'ANXIOUS', 'INTERVIEW', 'MISTRESS', 'FITZOOTH', 'ARGUING', 'FOR', 'AND', 'AGAINST', 'THE', "SQUIRE'S", 'PROJECT', 'IN', 'A', 'BREATH'] +61-70970-0001-1745: hyp=['THERE', 'BEFELL', 'AN', 'ANXIOUS', 'INTERVIEW', 'MISTRESS', 'FITZOOTH', 'ARGUING', 'FOR', 'AND', 'AGAINST', 'THE', "SQUIRE'S", 'PROJECT', 'IN', 'A', 'BREATH'] +61-70970-0002-1746: ref=['MOST', 'OF', 'ALL', 'ROBIN', 'THOUGHT', 'OF', 'HIS', 'FATHER', 'WHAT', 'WOULD', 'HE', 'COUNSEL'] +61-70970-0002-1746: hyp=['MOST', 'OF', 'ALL', 'ROBIN', 'THOUGHT', 'OF', 'HIS', 'FATHER', 'WHAT', 'WOULD', 'HE', 'COUNSEL'] +61-70970-0003-1747: ref=['IF', 'FOR', 'A', 'WHIM', 'YOU', 'BEGGAR', 'YOURSELF', 'I', 'CANNOT', 'STAY', 'YOU'] +61-70970-0003-1747: hyp=['IF', 'FOR', 'A', 'WHIM', 'YOU', 'BEGGAR', 'YOURSELF', 'I', 'CANNOT', 'STAY', 'YOU'] +61-70970-0004-1748: ref=['BUT', 'TAKE', 'IT', 'WHILST', 'I', 'LIVE', 'AND', 'WEAR', "MONTFICHET'S", 'SHIELD', 'IN', 'THE', 'DAYS', 'WHEN', 'MY', 'EYES', 'CAN', 'BE', 'REJOICED', 'BY', 'SO', 'BRAVE', 'A', 'SIGHT', 'FOR', 'YOU', 'WILL', "NE'ER", 'DISGRACE', 'OUR', 'SCUTCHEON', 'I', 'WARRANT', 'ME'] +61-70970-0004-1748: hyp=['BUT', 'TAKE', 'IT', 'WHILST', 'I', 'LIVE', 'AND', 'WEAR', "MONTFICHET'S", 'SHIELD', 'IN', 'THE', 'DAYS', 'WHEN', 'MY', 'EYES', 'CAN', 'BE', 'REJOICED', 'BY', 'SO', 'BRAVE', 'A', 'SIGHT', 'FOR', 'YOU', 'WILL', 'NEVER', 'DISGRACE', 'OUR', 'DUCHEON', 'I', 'WARRANT', 'ME'] +61-70970-0005-1749: ref=['THE', 'LAD', 'HAD', 'CHECKED', 'HIM', 'THEN'] +61-70970-0005-1749: hyp=['THE', 'LAD', 'HAD', 'CHECKED', 'HIM', 'THEN'] +61-70970-0006-1750: ref=['NEVER', 'THAT', 'SIR', 'HE', 'HAD', 'SAID'] +61-70970-0006-1750: hyp=['NEVER', 'THAT', 'SIR', 'HE', 'HAD', 'SAID'] +61-70970-0007-1751: ref=['HE', 'WAS', 'IN', 'DEEP', 'CONVERSE', 'WITH', 'THE', 'CLERK', 'AND', 'ENTERED', 'THE', 'HALL', 'HOLDING', 'HIM', 'BY', 'THE', 'ARM'] +61-70970-0007-1751: hyp=['HE', 'WAS', 'IN', 'DEEP', 'CONVERSE', 'WITH', 'THE', 'CLERK', 'AND', 'ENTERED', 'THE', 'HALL', 'HOLDING', 'HIM', 'BY', 'THE', 'ARM'] +61-70970-0008-1752: ref=['NOW', 'TO', 'BED', 'BOY'] +61-70970-0008-1752: hyp=['NOW', 'TO', 'BED', 'BOY'] +61-70970-0009-1753: ref=['TIS', 'LATE', 'AND', 'I', 'GO', 'MYSELF', 'WITHIN', 'A', 'SHORT', 'SPACE'] +61-70970-0009-1753: hyp=['TIS', 'LATE', 'AND', 'I', 'GO', 'MYSELF', 'WITHIN', 'A', 'SHORT', 'SPACE'] +61-70970-0010-1754: ref=['DISMISS', 'YOUR', 'SQUIRE', 'ROBIN', 'AND', 'BID', 'ME', 'GOOD', 'E', 'E', 'N'] +61-70970-0010-1754: hyp=['DISMISS', 'YOUR', 'SQUIRE', 'ROBIN', 'AND', 'BID', 'ME', 'GOOD', 'EATIN'] +61-70970-0011-1755: ref=['AS', 'ANY', 'IN', 'ENGLAND', 'I', 'WOULD', 'SAY', 'SAID', 'GAMEWELL', 'PROUDLY', 'THAT', 'IS', 'IN', 'HIS', 'DAY'] +61-70970-0011-1755: hyp=['AS', 'ANY', 'IN', 'ENGLAND', 'I', 'WOULD', 'SAY', 'SAID', 'GAMEWELL', 'PROUDLY', 'THAT', 'IS', 'IN', 'HIS', 'DAY'] +61-70970-0012-1756: ref=['YET', 'HE', 'WILL', 'TEACH', 'YOU', 'A', 'FEW', 'TRICKS', 'WHEN', 'MORNING', 'IS', 'COME'] +61-70970-0012-1756: hyp=['YET', 'HE', 'WILL', 'TEACH', 'YOU', 'A', 'FEW', 'TRICKS', 'WHEN', 'MORNING', 'IS', 'COME'] +61-70970-0013-1757: ref=['THERE', 'WAS', 'NO', 'CHANCE', 'TO', 'ALTER', 'HIS', 'SLEEPING', 'ROOM', 'TO', 'ONE', 'NEARER', 'TO', "GAMEWELL'S", 'CHAMBER'] +61-70970-0013-1757: hyp=['THERE', 'WAS', 'NO', 'CHANCE', 'TO', 'ALTER', 'HIS', 'SLEEPING', 'ROOM', 'TO', 'ONE', 'NEARER', 'TO', "GAMEWELL'S", 'CHAMBER'] +61-70970-0014-1758: ref=['PRESENTLY', 'HE', 'CROSSED', 'THE', 'FLOOR', 'OF', 'HIS', 'ROOM', 'WITH', 'DECIDED', 'STEP'] +61-70970-0014-1758: hyp=['PRESENTLY', 'HE', 'CROSSED', 'THE', 'FLOOR', 'OF', 'HIS', 'ROOM', 'WITH', 'DECIDED', 'STEP'] +61-70970-0015-1759: ref=['WILL', 'CRIED', 'HE', 'SOFTLY', 'AND', 'STUTELEY', 'WHO', 'HAD', 'CHOSEN', 'HIS', 'COUCH', 'ACROSS', 'THE', 'DOOR', 'OF', 'HIS', 'YOUNG', "MASTER'S", 'CHAMBER', 'SPRANG', 'UP', 'AT', 'ONCE', 'IN', 'ANSWER'] +61-70970-0015-1759: hyp=['WILL', 'CRIED', 'HE', 'SOFTLY', 'AND', 'STUTELEY', 'WHO', 'HAD', 'CHOSEN', 'HIS', 'COUCH', 'ACROSS', 'THE', 'DOOR', 'OF', 'HIS', 'YOUNG', "MASTER'S", 'CHAMBER', 'SPRANG', 'UP', 'AT', 'ONCE', 'IN', 'ANSWER'] +61-70970-0016-1760: ref=['WE', 'WILL', 'GO', 'OUT', 'TOGETHER', 'TO', 'THE', 'BOWER', 'THERE', 'IS', 'A', 'WAY', 'DOWN', 'TO', 'THE', 'COURT', 'FROM', 'MY', 'WINDOW'] +61-70970-0016-1760: hyp=['WE', 'WILL', 'GO', 'OUT', 'TOGETHER', 'TO', 'THE', 'BOWER', 'THERE', 'IS', 'A', 'WAY', 'DOWN', 'TO', 'THE', 'COURT', 'FROM', 'MY', 'WINDOW'] +61-70970-0017-1761: ref=['REST', 'AND', 'BE', 'STILL', 'UNTIL', 'I', 'WARN', 'YOU'] +61-70970-0017-1761: hyp=['REST', 'AND', 'BE', 'STILL', 'UNTIL', 'I', 'WARN', 'YOU'] +61-70970-0018-1762: ref=['THE', 'HOURS', 'PASSED', 'WEARILY', 'BY', 'AND', 'MOVEMENT', 'COULD', 'YET', 'BE', 'HEARD', 'ABOUT', 'THE', 'HALL'] +61-70970-0018-1762: hyp=['THE', 'HOURS', 'PASSED', 'WEARILY', 'BY', 'AND', 'MOVEMENT', 'COULD', 'YET', 'BE', 'HEARD', 'ABOUT', 'THE', 'HALL'] +61-70970-0019-1763: ref=['AT', 'LAST', 'ALL', 'WAS', 'QUIET', 'AND', 'BLACK', 'IN', 'THE', 'COURTYARD', 'OF', 'GAMEWELL'] +61-70970-0019-1763: hyp=['AT', 'LAST', 'ALL', 'WAS', 'QUIET', 'AND', 'BLACK', 'IN', 'THE', 'COURTYARD', 'OF', 'GAMEWELL'] +61-70970-0020-1764: ref=['WILL', 'WHISPERED', 'ROBIN', 'OPENING', 'HIS', 'DOOR', 'AS', 'HE', 'SPOKE', 'ARE', 'YOU', 'READY'] +61-70970-0020-1764: hyp=['WILL', 'WHISPERED', 'ROBIN', 'OPENING', 'HIS', 'DOOR', 'AS', 'HE', 'SPOKE', 'ARE', 'YOU', 'READY'] +61-70970-0021-1765: ref=['THEY', 'THEN', 'RENEWED', 'THEIR', 'JOURNEY', 'AND', 'UNDER', 'THE', 'BETTER', 'LIGHT', 'MADE', 'A', 'SAFE', 'CROSSING', 'OF', 'THE', 'STABLE', 'ROOFS'] +61-70970-0021-1765: hyp=['THEY', 'THEN', 'RENEWED', 'THEIR', 'JOURNEY', 'AND', 'UNDER', 'THE', 'BETTER', 'LIGHT', 'MADE', 'A', 'SAFE', 'CROSSING', 'OF', 'THE', 'STABLE', 'ROOFS'] +61-70970-0022-1766: ref=['ROBIN', 'ENTERED', 'THE', 'HUT', 'DRAGGING', 'THE', 'UNWILLING', 'ESQUIRE', 'AFTER', 'HIM'] +61-70970-0022-1766: hyp=['ROBIN', 'ENTERED', 'THE', 'HUT', 'DRAGGING', 'THE', 'UNWILLING', 'ESQUIRE', 'AFTER', 'HIM'] +61-70970-0023-1767: ref=['BE', 'NOT', 'SO', 'FOOLISH', 'FRIEND', 'SAID', 'FITZOOTH', 'CROSSLY'] +61-70970-0023-1767: hyp=['BE', 'NOT', 'SO', 'FOOLISH', 'FRIEND', 'SAID', 'FITZOOTH', 'CROSSLY'] +61-70970-0024-1768: ref=['THEY', 'MOVED', 'THEREAFTER', 'CAUTIOUSLY', 'ABOUT', 'THE', 'HUT', 'GROPING', 'BEFORE', 'AND', 'ABOUT', 'THEM', 'TO', 'FIND', 'SOMETHING', 'TO', 'SHOW', 'THAT', 'WARRENTON', 'HAD', 'FULFILLED', 'HIS', 'MISSION'] +61-70970-0024-1768: hyp=['THEY', 'MOVED', 'THEREAFTER', 'CAUTIOUSLY', 'ABOUT', 'THE', 'HUT', 'GROPING', 'BEFORE', 'AND', 'ABOUT', 'THEM', 'TO', 'FIND', 'SOMETHING', 'TO', 'SHOW', 'THAT', 'WARRENTON', 'HAD', 'FULFILLED', 'HIS', 'MISSION'] +61-70970-0025-1769: ref=['THEY', 'WERE', 'UPON', 'THE', 'VERGE', 'OF', 'AN', 'OPEN', 'TRAP', 'IN', 'THE', 'FAR', 'CORNER', 'OF', 'THE', 'HUT', 'AND', 'STUTELEY', 'HAD', 'TRIPPED', 'OVER', 'THE', 'EDGE', 'OF', 'THE', 'REVERSED', 'FLAP', 'MOUTH', 'OF', 'THIS', 'PIT'] +61-70970-0025-1769: hyp=['THEY', 'WERE', 'UPON', 'THE', 'VERGE', 'OF', 'AN', 'OPEN', 'TRAP', 'IN', 'THE', 'FAR', 'CORNER', 'OF', 'THE', 'HUT', 'AND', 'STUTELEY', 'HAD', 'TRIPPED', 'OVER', 'THE', 'EDGE', 'OF', 'THE', 'REVERSED', 'FLAT', 'MOUTH', 'OF', 'THIS', 'PIT'] +61-70970-0026-1770: ref=["FITZOOTH'S", 'HAND', 'RESTED', 'AT', 'LAST', 'UPON', 'THE', 'TOP', 'RUNG', 'OF', 'A', 'LADDER', 'AND', 'SLOWLY', 'THE', 'TRUTH', 'CAME', 'TO', 'HIM'] +61-70970-0026-1770: hyp=["FITZOOTH'S", 'HAND', 'RESTED', 'AT', 'LAST', 'UPON', 'THE', 'TOP', 'RUNG', 'OF', 'THE', 'LADDER', 'AND', 'SLOWLY', 'THE', 'TRUTH', 'CAME', 'TO', 'HIM'] +61-70970-0027-1771: ref=['ROBIN', 'CAREFULLY', 'DESCENDED', 'THE', 'LADDER', 'AND', 'FOUND', 'HIMSELF', 'SOON', 'UPON', 'FIRM', 'ROCKY', 'GROUND'] +61-70970-0027-1771: hyp=['ROBIN', 'CAREFULLY', 'DESCENDED', 'THE', 'LADDER', 'AND', 'FOUND', 'HIMSELF', 'SOON', 'UPON', 'FIRM', 'ROCKY', 'GROUND'] +61-70970-0028-1772: ref=['STUTELEY', 'WAS', 'BY', 'HIS', 'SIDE', 'IN', 'A', 'FLASH', 'AND', 'THEN', 'THEY', 'BOTH', 'BEGAN', 'FEELING', 'ABOUT', 'THEM', 'TO', 'ASCERTAIN', 'THE', 'SHAPE', 'AND', 'CHARACTER', 'OF', 'THIS', 'VAULT'] +61-70970-0028-1772: hyp=['STUTELEY', 'WAS', 'BY', 'HIS', 'SIDE', 'IN', 'A', 'FLASH', 'AND', 'THEN', 'THEY', 'BOTH', 'BEGAN', 'FEELING', 'ABOUT', 'THEM', 'TO', 'ASCERTAIN', 'THE', 'SHAPE', 'AND', 'CHARACTER', 'OF', 'THIS', 'VAULT'] +61-70970-0029-1773: ref=['FROM', 'THE', 'BLACKNESS', 'BEHIND', 'THE', 'LIGHT', 'THEY', 'HEARD', 'A', 'VOICE', "WARRENTON'S"] +61-70970-0029-1773: hyp=['FROM', 'THE', 'BLACKNESS', 'BEHIND', 'THE', 'LIGHT', 'THEY', 'HEARD', 'A', 'VOICE', "WARRENTON'S"] +61-70970-0030-1774: ref=['SAVE', 'ME', 'MASTERS', 'BUT', 'YOU', 'STARTLED', 'ME', 'RARELY'] +61-70970-0030-1774: hyp=['SAVE', 'ME', 'MASTERS', 'BUT', 'YOU', 'STARTLED', 'ME', 'RARELY'] +61-70970-0031-1775: ref=['CRIED', 'HE', 'WAVING', 'THE', 'LANTHORN', 'BEFORE', 'HIM', 'TO', 'MAKE', 'SURE', 'THAT', 'THESE', 'WERE', 'NO', 'GHOSTS', 'IN', 'FRONT', 'OF', 'HIM'] +61-70970-0031-1775: hyp=['CRIED', 'HE', 'WAVING', 'THE', 'LANTERN', 'BEFORE', 'HIM', 'TO', 'MAKE', 'SURE', 'THAT', 'THESE', 'WERE', 'NO', 'GHOSTS', 'IN', 'FRONT', 'OF', 'HIM'] +61-70970-0032-1776: ref=['ENQUIRED', 'ROBIN', 'WITH', 'HIS', 'SUSPICIONS', 'STILL', 'UPON', 'HIM'] +61-70970-0032-1776: hyp=['ENQUIRED', 'ROBIN', 'WITH', 'HIS', 'SUSPICION', 'STILL', 'UPON', 'HIM'] +61-70970-0033-1777: ref=['TRULY', 'SUCH', 'A', 'HORSE', 'SHOULD', 'BE', 'WORTH', 'MUCH', 'IN', 'NOTTINGHAM', 'FAIR'] +61-70970-0033-1777: hyp=['TRULY', 'SUCH', 'A', 'HORSE', 'WOULD', 'BE', 'WORTH', 'MUCH', 'IN', 'NOTTINGHAM', 'FAIR'] +61-70970-0034-1778: ref=['NAY', 'NAY', 'LORDING', 'ANSWERED', 'WARRENTON', 'WITH', 'A', 'HALF', 'LAUGH'] +61-70970-0034-1778: hyp=['NAY', 'NAY', 'LORDING', 'ANSWERED', 'WARRENTON', 'WITH', 'A', 'HALF', 'LAUGH'] +61-70970-0035-1779: ref=['WARRENTON', 'SPOKE', 'THUS', 'WITH', 'SIGNIFICANCE', 'TO', 'SHOW', 'ROBIN', 'THAT', 'HE', 'WAS', 'NOT', 'TO', 'THINK', "GEOFFREY'S", 'CLAIMS', 'TO', 'THE', 'ESTATE', 'WOULD', 'BE', 'PASSED', 'BY'] +61-70970-0035-1779: hyp=['WARRENTON', 'SPOKE', 'THUS', 'WITH', 'SIGNIFICANCE', 'TO', 'SHOW', 'ROBIN', 'THAT', 'HE', 'WAS', 'NOT', 'TO', 'THINK', "GEOFFREY'S", 'CLAIMS', 'TO', 'THE', 'ESTATE', 'WOULD', 'BE', 'PASSED', 'BY'] +61-70970-0036-1780: ref=['ROBIN', 'FITZOOTH', 'SAW', 'THAT', 'HIS', 'DOUBTS', 'OF', 'WARRENTON', 'HAD', 'BEEN', 'UNFAIR', 'AND', 'HE', 'BECAME', 'ASHAMED', 'OF', 'HIMSELF', 'FOR', 'HARBORING', 'THEM'] +61-70970-0036-1780: hyp=['ROBIN', 'FITZOOTH', 'SAW', 'THAT', 'HIS', 'DOUBTS', 'OF', 'WARRENTON', 'HAD', 'BEEN', 'UNFAIR', 'AND', 'HE', 'BECAME', 'ASHAMED', 'OF', 'HIMSELF', 'FOR', 'HARBORING', 'THEM'] +61-70970-0037-1781: ref=['HIS', 'TONES', 'RANG', 'PLEASANTLY', 'ON', "WARRENTON'S", 'EARS', 'AND', 'FORTHWITH', 'A', 'GOOD', 'FELLOWSHIP', 'WAS', 'HERALDED', 'BETWEEN', 'THEM'] +61-70970-0037-1781: hyp=['HIS', 'TONES', 'RANG', 'PLEASANTLY', 'ON', "WARRENTON'S", 'EARS', 'AND', 'FORTHWITH', 'A', 'GOOD', 'FELLOWSHIP', 'WAS', 'HERALDED', 'BETWEEN', 'THEM'] +61-70970-0038-1782: ref=['THE', 'OLD', 'SERVANT', 'TOLD', 'HIM', 'QUIETLY', 'AS', 'THEY', 'CREPT', 'BACK', 'TO', 'GAMEWELL', 'THAT', 'THIS', 'PASSAGE', 'WAY', 'LED', 'FROM', 'THE', 'HUT', 'IN', 'THE', 'PLEASANCE', 'TO', 'SHERWOOD', 'AND', 'THAT', 'GEOFFREY', 'FOR', 'THE', 'TIME', 'WAS', 'HIDING', 'WITH', 'THE', 'OUTLAWS', 'IN', 'THE', 'FOREST'] +61-70970-0038-1782: hyp=['THE', 'OLD', 'SERVANT', 'TOLD', 'HIM', 'QUIETLY', 'AS', 'THEY', 'CREPT', 'BACK', 'TO', 'GAMEWELL', 'THAT', 'THIS', 'PASSAGEWAY', 'LED', 'FROM', 'THE', 'HUT', 'IN', 'THE', 'PLEASANTS', 'TO', 'SHERWOOD', 'AND', 'THAT', 'JEFFREY', 'FOR', 'THE', 'TIME', 'WAS', 'HIDING', 'WITH', 'THE', 'OUTLAWS', 'IN', 'THE', 'FOREST'] +61-70970-0039-1783: ref=['HE', 'IMPLORES', 'US', 'TO', 'BE', 'DISCREET', 'AS', 'THE', 'GRAVE', 'IN', 'THIS', 'MATTER', 'FOR', 'IN', 'SOOTH', 'HIS', 'LIFE', 'IS', 'IN', 'THE', 'HOLLOW', 'OF', 'OUR', 'HANDS'] +61-70970-0039-1783: hyp=['HE', 'IMPLORES', 'US', 'TO', 'BE', 'DISCREET', 'AS', 'THE', 'GRAVE', 'IN', 'THIS', 'MATTER', 'FOR', 'IN', 'SOOTH', 'HIS', 'LIFE', 'IS', 'IN', 'THE', 'HOLLOW', 'OF', 'OUR', 'HANDS'] +61-70970-0040-1784: ref=['THEY', 'REGAINED', 'THEIR', 'APARTMENT', 'APPARENTLY', 'WITHOUT', 'DISTURBING', 'THE', 'HOUSEHOLD', 'OF', 'GAMEWELL'] +61-70970-0040-1784: hyp=['THEY', 'REGAINED', 'THEIR', 'APARTMENT', 'APPARENTLY', 'WITHOUT', 'DISTURBING', 'THE', 'HOUSEHOLD', 'OF', 'GAMEWELL'] +672-122797-0000-1785: ref=['OUT', 'IN', 'THE', 'WOODS', 'STOOD', 'A', 'NICE', 'LITTLE', 'FIR', 'TREE'] +672-122797-0000-1785: hyp=['OUT', 'IN', 'THE', 'WOOD', 'STOOD', 'A', 'NICE', 'LITTLE', 'FIR', 'TREE'] +672-122797-0001-1786: ref=['THE', 'PLACE', 'HE', 'HAD', 'WAS', 'A', 'VERY', 'GOOD', 'ONE', 'THE', 'SUN', 'SHONE', 'ON', 'HIM', 'AS', 'TO', 'FRESH', 'AIR', 'THERE', 'WAS', 'ENOUGH', 'OF', 'THAT', 'AND', 'ROUND', 'HIM', 'GREW', 'MANY', 'LARGE', 'SIZED', 'COMRADES', 'PINES', 'AS', 'WELL', 'AS', 'FIRS'] +672-122797-0001-1786: hyp=['THE', 'PLACE', 'HE', 'HAD', 'WAS', 'A', 'VERY', 'GOOD', 'ONE', 'THE', 'SUN', 'SHONE', 'ON', 'HIM', 'AS', 'TO', 'FRESH', 'AIR', 'THERE', 'WAS', 'ENOUGH', 'OF', 'THAT', 'AND', 'ROUND', 'HIM', 'GREW', 'MANY', 'LARGE', 'SIZED', 'COMRADES', 'PINES', 'AS', 'WELL', 'AS', 'FIRS'] +672-122797-0002-1787: ref=['HE', 'DID', 'NOT', 'THINK', 'OF', 'THE', 'WARM', 'SUN', 'AND', 'OF', 'THE', 'FRESH', 'AIR', 'HE', 'DID', 'NOT', 'CARE', 'FOR', 'THE', 'LITTLE', 'COTTAGE', 'CHILDREN', 'THAT', 'RAN', 'ABOUT', 'AND', 'PRATTLED', 'WHEN', 'THEY', 'WERE', 'IN', 'THE', 'WOODS', 'LOOKING', 'FOR', 'WILD', 'STRAWBERRIES'] +672-122797-0002-1787: hyp=['HE', 'DID', 'NOT', 'THINK', 'OF', 'THE', 'WARM', 'SUN', 'AND', 'OF', 'THE', 'FRESH', 'AIR', 'HE', 'DID', 'NOT', 'CARE', 'FOR', 'THE', 'LITTLE', 'COTTAGE', 'CHILDREN', 'THAT', 'RAN', 'ABOUT', 'AND', 'PRATTLED', 'WHEN', 'THEY', 'WERE', 'IN', 'THE', 'WOODS', 'LOOKING', 'FOR', 'WILD', 'STRAWBERRIES'] +672-122797-0003-1788: ref=['BUT', 'THIS', 'WAS', 'WHAT', 'THE', 'TREE', 'COULD', 'NOT', 'BEAR', 'TO', 'HEAR'] +672-122797-0003-1788: hyp=['BUT', 'THIS', 'WAS', 'WHAT', 'THE', 'TREE', 'COULD', 'NOT', 'BEAR', 'TO', 'HEAR'] +672-122797-0004-1789: ref=['IN', 'WINTER', 'WHEN', 'THE', 'SNOW', 'LAY', 'GLITTERING', 'ON', 'THE', 'GROUND', 'A', 'HARE', 'WOULD', 'OFTEN', 'COME', 'LEAPING', 'ALONG', 'AND', 'JUMP', 'RIGHT', 'OVER', 'THE', 'LITTLE', 'TREE'] +672-122797-0004-1789: hyp=['IN', 'WINTER', 'WHEN', 'THE', 'SNOW', 'LAY', 'GLITTERING', 'ON', 'THE', 'GROUND', 'A', 'HARE', 'WOULD', 'OFTEN', 'COME', 'LEAPING', 'ALONG', 'AND', 'JUMP', 'RIGHT', 'OVER', 'THE', 'LITTLE', 'TREE'] +672-122797-0005-1790: ref=['OH', 'THAT', 'MADE', 'HIM', 'SO', 'ANGRY'] +672-122797-0005-1790: hyp=['OH', 'THAT', 'MADE', 'HIM', 'SO', 'ANGRY'] +672-122797-0006-1791: ref=['TO', 'GROW', 'AND', 'GROW', 'TO', 'GET', 'OLDER', 'AND', 'BE', 'TALL', 'THOUGHT', 'THE', 'TREE', 'THAT', 'AFTER', 'ALL', 'IS', 'THE', 'MOST', 'DELIGHTFUL', 'THING', 'IN', 'THE', 'WORLD'] +672-122797-0006-1791: hyp=['TO', 'GROW', 'AND', 'GROW', 'TO', 'GET', 'OLDER', 'AND', 'BE', 'TALL', 'THOUGHT', 'THE', 'TREE', 'THAT', 'AFTER', 'ALL', 'IS', 'THE', 'MOST', 'DELIGHTFUL', 'THING', 'IN', 'THE', 'WORLD'] +672-122797-0007-1792: ref=['IN', 'AUTUMN', 'THE', 'WOOD', 'CUTTERS', 'ALWAYS', 'CAME', 'AND', 'FELLED', 'SOME', 'OF', 'THE', 'LARGEST', 'TREES'] +672-122797-0007-1792: hyp=['IN', 'AUTUMN', 'THE', 'WOOD', 'CUTTERS', 'ALWAYS', 'CAME', 'AND', 'FELLED', 'SOME', 'OF', 'THE', 'LARGEST', 'TREES'] +672-122797-0008-1793: ref=['THIS', 'HAPPENED', 'EVERY', 'YEAR', 'AND', 'THE', 'YOUNG', 'FIR', 'TREE', 'THAT', 'HAD', 'NOW', 'GROWN', 'TO', 'A', 'VERY', 'COMELY', 'SIZE', 'TREMBLED', 'AT', 'THE', 'SIGHT', 'FOR', 'THE', 'MAGNIFICENT', 'GREAT', 'TREES', 'FELL', 'TO', 'THE', 'EARTH', 'WITH', 'NOISE', 'AND', 'CRACKING', 'THE', 'BRANCHES', 'WERE', 'LOPPED', 'OFF', 'AND', 'THE', 'TREES', 'LOOKED', 'LONG', 'AND', 'BARE', 'THEY', 'WERE', 'HARDLY', 'TO', 'BE', 'RECOGNISED', 'AND', 'THEN', 'THEY', 'WERE', 'LAID', 'IN', 'CARTS', 'AND', 'THE', 'HORSES', 'DRAGGED', 'THEM', 'OUT', 'OF', 'THE', 'WOOD'] +672-122797-0008-1793: hyp=['THIS', 'HAPPENED', 'EVERY', 'YEAR', 'AND', 'THE', 'YOUNG', 'FIR', 'TREE', 'THAT', 'HAD', 'NOW', 'GROWN', 'TO', 'A', 'VERY', 'COMELY', 'SIZE', 'TREMBLED', 'AT', 'THE', 'SIGHT', 'FOR', 'THE', 'MAGNIFICENT', 'GREAT', 'TREES', 'FELL', 'TO', 'THE', 'EARTH', 'WITH', 'NOISE', 'AND', 'CRACKING', 'THE', 'BRANCHES', 'WERE', 'LOPPED', 'OFF', 'AND', 'THE', 'TREES', 'LOOKED', 'LONG', 'AND', 'BARE', 'THEY', 'WERE', 'HARDLY', 'TO', 'BE', 'RECOGNIZED', 'AND', 'THEN', 'THEY', 'WERE', 'LADEN', 'CARTS', 'AND', 'THE', 'HORSES', 'DRAGGED', 'THEM', 'OUT', 'OF', 'THE', 'WOOD'] +672-122797-0009-1794: ref=['HAVE', 'YOU', 'NOT', 'MET', 'THEM', 'ANYWHERE'] +672-122797-0009-1794: hyp=['HAVE', 'YOU', 'NOT', 'MET', 'THEM', 'ANYWHERE'] +672-122797-0010-1795: ref=['REJOICE', 'IN', 'THY', 'GROWTH', 'SAID', 'THE', 'SUNBEAMS'] +672-122797-0010-1795: hyp=['REJOICE', 'IN', 'THY', 'GROWTH', 'SAID', 'THE', 'SUNBEAMS'] +672-122797-0011-1796: ref=['AND', 'THEN', 'WHAT', 'HAPPENS', 'THEN'] +672-122797-0011-1796: hyp=['AND', 'THEN', 'WHAT', 'HAPPENS', 'THEN'] +672-122797-0012-1797: ref=['I', 'WOULD', 'FAIN', 'KNOW', 'IF', 'I', 'AM', 'DESTINED', 'FOR', 'SO', 'GLORIOUS', 'A', 'CAREER', 'CRIED', 'THE', 'TREE', 'REJOICING'] +672-122797-0012-1797: hyp=['I', 'WOULD', 'FAIN', 'KNOW', 'IF', 'I', 'AM', 'DESTINED', 'FOR', 'SO', 'GLORIOUS', 'A', 'CAREER', 'CRIED', 'THE', 'TREE', 'REJOICING'] +672-122797-0013-1798: ref=['I', 'AM', 'NOW', 'TALL', 'AND', 'MY', 'BRANCHES', 'SPREAD', 'LIKE', 'THE', 'OTHERS', 'THAT', 'WERE', 'CARRIED', 'OFF', 'LAST', 'YEAR', 'OH'] +672-122797-0013-1798: hyp=['I', 'AM', 'NOW', 'TALL', 'AND', 'MY', 'BRANCHES', 'SPREAD', 'LIKE', 'THE', 'OTHERS', 'THAT', 'WERE', 'CARRIED', 'OFF', 'LAST', 'YEAR', 'OH'] +672-122797-0014-1799: ref=['WERE', 'I', 'BUT', 'ALREADY', 'ON', 'THE', 'CART'] +672-122797-0014-1799: hyp=['WERE', 'I', 'BUT', 'ALREADY', 'ON', 'THE', 'CART'] +672-122797-0015-1800: ref=['WERE', 'I', 'IN', 'THE', 'WARM', 'ROOM', 'WITH', 'ALL', 'THE', 'SPLENDOR', 'AND', 'MAGNIFICENCE'] +672-122797-0015-1800: hyp=['WHERE', 'I', 'IN', 'THE', 'WARM', 'ROOM', 'WITH', 'ALL', 'THE', 'SPLENDOR', 'AND', 'MAGNIFICENCE'] +672-122797-0016-1801: ref=['YES', 'THEN', 'SOMETHING', 'BETTER', 'SOMETHING', 'STILL', 'GRANDER', 'WILL', 'SURELY', 'FOLLOW', 'OR', 'WHEREFORE', 'SHOULD', 'THEY', 'THUS', 'ORNAMENT', 'ME'] +672-122797-0016-1801: hyp=['YES', 'THEN', 'SOMETHING', 'BETTER', 'SOMETHING', 'STILL', 'GRANDER', 'WILL', 'SURELY', 'FOLLOW', 'OR', 'WHEREFORE', 'SHOULD', 'THEY', 'THUS', 'ORNAMENT', 'ME'] +672-122797-0017-1802: ref=['SOMETHING', 'BETTER', 'SOMETHING', 'STILL', 'GRANDER', 'MUST', 'FOLLOW', 'BUT', 'WHAT'] +672-122797-0017-1802: hyp=['SOMETHING', 'BETTER', 'SOMETHING', 'STILL', 'GRANDER', 'MUST', 'FOLLOW', 'BUT', 'WHAT'] +672-122797-0018-1803: ref=['REJOICE', 'IN', 'OUR', 'PRESENCE', 'SAID', 'THE', 'AIR', 'AND', 'THE', 'SUNLIGHT'] +672-122797-0018-1803: hyp=['REJOICE', 'IN', 'OUR', 'PRESENCE', 'SAID', 'THE', 'AIR', 'AND', 'THE', 'SUNLIGHT'] +672-122797-0019-1804: ref=['REJOICE', 'IN', 'THY', 'OWN', 'FRESH', 'YOUTH'] +672-122797-0019-1804: hyp=['REJOICE', 'IN', 'THY', 'OWN', 'FRESH', 'YOUTH'] +672-122797-0020-1805: ref=['BUT', 'THE', 'TREE', 'DID', 'NOT', 'REJOICE', 'AT', 'ALL', 'HE', 'GREW', 'AND', 'GREW', 'AND', 'WAS', 'GREEN', 'BOTH', 'WINTER', 'AND', 'SUMMER'] +672-122797-0020-1805: hyp=['BUT', 'THE', 'TREE', 'DID', 'NOT', 'REJOICE', 'AT', 'ALL', 'HE', 'GREW', 'AND', 'GREW', 'AND', 'WAS', 'GREEN', 'BOTH', 'WINTER', 'AND', 'SUMMER'] +672-122797-0021-1806: ref=['AND', 'TOWARDS', 'CHRISTMAS', 'HE', 'WAS', 'ONE', 'OF', 'THE', 'FIRST', 'THAT', 'WAS', 'CUT', 'DOWN'] +672-122797-0021-1806: hyp=['AND', 'TOWARDS', 'CHRISTMAS', 'HE', 'WAS', 'ONE', 'OF', 'THE', 'FIRST', 'THAT', 'WAS', 'CUT', 'DOWN'] +672-122797-0022-1807: ref=['THE', 'AXE', 'STRUCK', 'DEEP', 'INTO', 'THE', 'VERY', 'PITH', 'THE', 'TREE', 'FELL', 'TO', 'THE', 'EARTH', 'WITH', 'A', 'SIGH', 'HE', 'FELT', 'A', 'PANG', 'IT', 'WAS', 'LIKE', 'A', 'SWOON', 'HE', 'COULD', 'NOT', 'THINK', 'OF', 'HAPPINESS', 'FOR', 'HE', 'WAS', 'SORROWFUL', 'AT', 'BEING', 'SEPARATED', 'FROM', 'HIS', 'HOME', 'FROM', 'THE', 'PLACE', 'WHERE', 'HE', 'HAD', 'SPRUNG', 'UP'] +672-122797-0022-1807: hyp=['THE', 'AXE', 'STRUCK', 'DEEP', 'INTO', 'THE', 'VERY', 'PITH', 'THE', 'TREE', 'FELL', 'TO', 'THE', 'EARTH', 'WITH', 'A', 'SIGH', 'HE', 'FELT', 'A', 'PANG', 'IT', 'WAS', 'LIKE', 'A', 'SWOON', 'HE', 'COULD', 'NOT', 'THINK', 'OF', 'HAPPINESS', 'FOR', 'HE', 'WAS', 'SORROWFUL', 'AT', 'BEING', 'SEPARATED', 'FROM', 'HIS', 'HOME', 'FROM', 'THE', 'PLACE', 'WHERE', 'HE', 'HAD', 'SPRUNG', 'UP'] +672-122797-0023-1808: ref=['HE', 'WELL', 'KNEW', 'THAT', 'HE', 'SHOULD', 'NEVER', 'SEE', 'HIS', 'DEAR', 'OLD', 'COMRADES', 'THE', 'LITTLE', 'BUSHES', 'AND', 'FLOWERS', 'AROUND', 'HIM', 'ANYMORE', 'PERHAPS', 'NOT', 'EVEN', 'THE', 'BIRDS'] +672-122797-0023-1808: hyp=['HE', 'WELL', 'KNEW', 'THAT', 'HE', 'SHOULD', 'NEVER', 'SEE', 'HIS', 'DEAR', 'OLD', 'COMRADES', 'THE', 'LITTLE', 'BUSHES', 'AND', 'FLOWERS', 'AROUND', 'HIM', 'ANY', 'MORE', 'PERHAPS', 'NOT', 'EVEN', 'THE', 'BIRDS'] +672-122797-0024-1809: ref=['THE', 'DEPARTURE', 'WAS', 'NOT', 'AT', 'ALL', 'AGREEABLE'] +672-122797-0024-1809: hyp=['THE', 'DEPARTURE', 'WAS', 'NOT', 'AT', 'ALL', 'AGREEABLE'] +672-122797-0025-1810: ref=['THE', 'TREE', 'ONLY', 'CAME', 'TO', 'HIMSELF', 'WHEN', 'HE', 'WAS', 'UNLOADED', 'IN', 'A', 'COURT', 'YARD', 'WITH', 'THE', 'OTHER', 'TREES', 'AND', 'HEARD', 'A', 'MAN', 'SAY', 'THAT', 'ONE', 'IS', 'SPLENDID', 'WE', "DON'T", 'WANT', 'THE', 'OTHERS'] +672-122797-0025-1810: hyp=['THE', 'TREE', 'ONLY', 'CAME', 'TO', 'HIMSELF', 'WHEN', 'HE', 'WAS', 'UNLOADED', 'IN', 'A', 'COURTYARD', 'WITH', 'THE', 'OTHER', 'TREES', 'AND', 'HEARD', 'A', 'MAN', 'SAY', 'THAT', 'ONE', 'IS', 'SPLENDID', 'WE', "DON'T", 'WANT', 'THE', 'OTHERS'] +672-122797-0026-1811: ref=['THERE', 'TOO', 'WERE', 'LARGE', 'EASY', 'CHAIRS', 'SILKEN', 'SOFAS', 'LARGE', 'TABLES', 'FULL', 'OF', 'PICTURE', 'BOOKS', 'AND', 'FULL', 'OF', 'TOYS', 'WORTH', 'HUNDREDS', 'AND', 'HUNDREDS', 'OF', 'CROWNS', 'AT', 'LEAST', 'THE', 'CHILDREN', 'SAID', 'SO'] +672-122797-0026-1811: hyp=['THERE', 'TOO', 'WERE', 'LARGE', 'EASY', 'CHAIRS', 'SILKEN', 'SOFAS', 'LARGE', 'TABLES', 'FULL', 'OF', 'PICTURE', 'BOOKS', 'AND', 'FULL', 'OF', 'TOYS', 'WORTH', 'HUNDREDS', 'AND', 'HUNDREDS', 'OF', 'CROWNS', 'AT', 'LEAST', 'THE', 'CHILDREN', 'SAID', 'SO'] +672-122797-0027-1812: ref=['THE', 'SERVANTS', 'AS', 'WELL', 'AS', 'THE', 'YOUNG', 'LADIES', 'DECORATED', 'IT'] +672-122797-0027-1812: hyp=['THE', 'SERVANTS', 'AS', 'WELL', 'AS', 'THE', 'YOUNG', 'LADIES', 'DECORATED', 'IT'] +672-122797-0028-1813: ref=['THIS', 'EVENING', 'THEY', 'ALL', 'SAID'] +672-122797-0028-1813: hyp=['THIS', 'EVENING', 'THEY', 'ALL', 'SAID'] +672-122797-0029-1814: ref=['HOW', 'IT', 'WILL', 'SHINE', 'THIS', 'EVENING'] +672-122797-0029-1814: hyp=['HOW', 'IT', 'WILL', 'SHINE', 'THIS', 'EVENING'] +672-122797-0030-1815: ref=['PERHAPS', 'THE', 'OTHER', 'TREES', 'FROM', 'THE', 'FOREST', 'WILL', 'COME', 'TO', 'LOOK', 'AT', 'ME'] +672-122797-0030-1815: hyp=['PERHAPS', 'THE', 'OTHER', 'TREES', 'FROM', 'THE', 'FOREST', 'WILL', 'COME', 'TO', 'LOOK', 'AT', 'ME'] +672-122797-0031-1816: ref=['IT', 'BLAZED', 'UP', 'FAMOUSLY', 'HELP', 'HELP'] +672-122797-0031-1816: hyp=['IT', 'BLAZED', 'UP', 'FAMOUSLY', 'HELP', 'HELP'] +672-122797-0032-1817: ref=['CRIED', 'THE', 'YOUNG', 'LADIES', 'AND', 'THEY', 'QUICKLY', 'PUT', 'OUT', 'THE', 'FIRE'] +672-122797-0032-1817: hyp=['CRIED', 'THE', 'YOUNG', 'LADIES', 'AND', 'THEY', 'QUICKLY', 'PUT', 'OUT', 'THE', 'FIRE'] +672-122797-0033-1818: ref=['A', 'STORY'] +672-122797-0033-1818: hyp=['A', 'STORY'] +672-122797-0034-1819: ref=['A', 'STORY', 'CRIED', 'THE', 'CHILDREN', 'DRAWING', 'A', 'LITTLE', 'FAT', 'MAN', 'TOWARDS', 'THE', 'TREE'] +672-122797-0034-1819: hyp=['A', 'STORY', 'CRIED', 'THE', 'CHILDREN', 'DRAWING', 'A', 'LITTLE', 'FAT', 'MAN', 'TOWARDS', 'THE', 'TREE'] +672-122797-0035-1820: ref=['BUT', 'I', 'SHALL', 'TELL', 'ONLY', 'ONE', 'STORY'] +672-122797-0035-1820: hyp=['BUT', 'I', 'SHALL', 'TELL', 'ONLY', 'ONE', 'STORY'] +672-122797-0036-1821: ref=['HUMPY', 'DUMPY', 'FELL', 'DOWNSTAIRS', 'AND', 'YET', 'HE', 'MARRIED', 'THE', 'PRINCESS'] +672-122797-0036-1821: hyp=['HUMPY', 'DUMPY', 'FELL', 'DOWNSTAIRS', 'AND', 'YET', 'HE', 'MARRIED', 'THE', 'PRINCESS'] +672-122797-0037-1822: ref=["THAT'S", 'THE', 'WAY', 'OF', 'THE', 'WORLD'] +672-122797-0037-1822: hyp=["THAT'S", 'THE', 'WAY', 'OF', 'THE', 'WORLD'] +672-122797-0038-1823: ref=['THOUGHT', 'THE', 'FIR', 'TREE', 'AND', 'BELIEVED', 'IT', 'ALL', 'BECAUSE', 'THE', 'MAN', 'WHO', 'TOLD', 'THE', 'STORY', 'WAS', 'SO', 'GOOD', 'LOOKING', 'WELL', 'WELL'] +672-122797-0038-1823: hyp=['THOUGHT', 'THE', 'FIR', 'TREE', 'AND', 'BELIEVED', 'IT', 'ALL', 'BECAUSE', 'THE', 'MAN', 'WHO', 'TOLD', 'THE', 'STORY', 'WAS', 'SO', 'GOOD', 'LOOKING', 'WELL', 'WELL'] +672-122797-0039-1824: ref=['I', "WON'T", 'TREMBLE', 'TO', 'MORROW', 'THOUGHT', 'THE', 'FIR', 'TREE'] +672-122797-0039-1824: hyp=['I', "WON'T", 'TREMBLE', 'TO', 'MORROW', 'THOUGHT', 'THE', 'FIR', 'TREE'] +672-122797-0040-1825: ref=['AND', 'THE', 'WHOLE', 'NIGHT', 'THE', 'TREE', 'STOOD', 'STILL', 'AND', 'IN', 'DEEP', 'THOUGHT'] +672-122797-0040-1825: hyp=['AND', 'THE', 'WHOLE', 'NIGHT', 'THE', 'TREE', 'STOOD', 'STILL', 'AND', 'IN', 'DEEP', 'THOUGHT'] +672-122797-0041-1826: ref=['IN', 'THE', 'MORNING', 'THE', 'SERVANT', 'AND', 'THE', 'HOUSEMAID', 'CAME', 'IN'] +672-122797-0041-1826: hyp=['IN', 'THE', 'MORNING', 'THE', 'SERVANT', 'AND', 'THE', 'HOUSEMAID', 'CAME', 'IN'] +672-122797-0042-1827: ref=['BUT', 'THEY', 'DRAGGED', 'HIM', 'OUT', 'OF', 'THE', 'ROOM', 'AND', 'UP', 'THE', 'STAIRS', 'INTO', 'THE', 'LOFT', 'AND', 'HERE', 'IN', 'A', 'DARK', 'CORNER', 'WHERE', 'NO', 'DAYLIGHT', 'COULD', 'ENTER', 'THEY', 'LEFT', 'HIM'] +672-122797-0042-1827: hyp=['BUT', 'THEY', 'DRAGGED', 'HIM', 'OUT', 'OF', 'THE', 'ROOM', 'AND', 'UP', 'THE', 'STAIRS', 'INTO', 'THE', 'LOFT', 'AND', 'HERE', 'IN', 'A', 'DARK', 'CORNER', 'WHERE', 'NO', 'DAYLIGHT', 'COULD', 'ENTER', 'THEY', 'LEFT', 'HIM'] +672-122797-0043-1828: ref=["WHAT'S", 'THE', 'MEANING', 'OF', 'THIS', 'THOUGHT', 'THE', 'TREE'] +672-122797-0043-1828: hyp=["WHAT'S", 'THE', 'MEANING', 'OF', 'THIS', 'THOUGHT', 'THE', 'TREE'] +672-122797-0044-1829: ref=['AND', 'HE', 'LEANED', 'AGAINST', 'THE', 'WALL', 'LOST', 'IN', 'REVERIE'] +672-122797-0044-1829: hyp=['AND', 'HE', 'LEANED', 'AGAINST', 'THE', 'WALL', 'LOST', 'IN', 'REVERIE'] +672-122797-0045-1830: ref=['TIME', 'ENOUGH', 'HAD', 'HE', 'TOO', 'FOR', 'HIS', 'REFLECTIONS', 'FOR', 'DAYS', 'AND', 'NIGHTS', 'PASSED', 'ON', 'AND', 'NOBODY', 'CAME', 'UP', 'AND', 'WHEN', 'AT', 'LAST', 'SOMEBODY', 'DID', 'COME', 'IT', 'WAS', 'ONLY', 'TO', 'PUT', 'SOME', 'GREAT', 'TRUNKS', 'IN', 'A', 'CORNER', 'OUT', 'OF', 'THE', 'WAY'] +672-122797-0045-1830: hyp=['TIME', 'ENOUGH', 'HAD', 'HE', 'TOO', 'FOR', 'HIS', 'REFLECTIONS', 'FOR', 'DAYS', 'AND', 'NIGHTS', 'PASSED', 'ON', 'AND', 'NOBODY', 'CAME', 'UP', 'AND', 'WHEN', 'AT', 'LAST', 'SOMEBODY', 'DID', 'COME', 'IT', 'WAS', 'ONLY', 'TO', 'PUT', 'SOME', 'GREAT', 'TRUNKS', 'IN', 'A', 'CORNER', 'OUT', 'OF', 'THE', 'WAY'] +672-122797-0046-1831: ref=['TIS', 'NOW', 'WINTER', 'OUT', 'OF', 'DOORS', 'THOUGHT', 'THE', 'TREE'] +672-122797-0046-1831: hyp=['TIS', 'NOW', 'WINTER', 'OUT', 'OF', 'DOORS', 'THOUGHT', 'THE', 'TREE'] +672-122797-0047-1832: ref=['HOW', 'KIND', 'MAN', 'IS', 'AFTER', 'ALL'] +672-122797-0047-1832: hyp=['HOW', 'KIND', 'MAN', 'IS', 'AFTER', 'ALL'] +672-122797-0048-1833: ref=['IF', 'IT', 'ONLY', 'WERE', 'NOT', 'SO', 'DARK', 'HERE', 'AND', 'SO', 'TERRIBLY', 'LONELY'] +672-122797-0048-1833: hyp=['IF', 'IT', 'ONLY', 'WERE', 'NOT', 'SO', 'DARK', 'HERE', 'AND', 'SO', 'TERRIBLY', 'LONELY'] +672-122797-0049-1834: ref=['SQUEAK', 'SQUEAK'] +672-122797-0049-1834: hyp=['SQUICK', 'QUICK'] +672-122797-0050-1835: ref=['THEY', 'SNUFFED', 'ABOUT', 'THE', 'FIR', 'TREE', 'AND', 'RUSTLED', 'AMONG', 'THE', 'BRANCHES'] +672-122797-0050-1835: hyp=['THEY', 'SNUFFED', 'ABOUT', 'THE', 'FIR', 'TREE', 'AND', 'RUSTLED', 'AMONG', 'THE', 'BRANCHES'] +672-122797-0051-1836: ref=['I', 'AM', 'BY', 'NO', 'MEANS', 'OLD', 'SAID', 'THE', 'FIR', 'TREE'] +672-122797-0051-1836: hyp=['I', 'AM', 'BY', 'NO', 'MEANS', 'OLD', 'SAID', 'THE', 'FIR', 'TREE'] +672-122797-0052-1837: ref=["THERE'S", 'MANY', 'A', 'ONE', 'CONSIDERABLY', 'OLDER', 'THAN', 'I', 'AM'] +672-122797-0052-1837: hyp=["THERE'S", 'MANY', 'A', 'ONE', 'CONSIDERABLY', 'OLDER', 'THAN', 'I', 'AM'] +672-122797-0053-1838: ref=['THEY', 'WERE', 'SO', 'EXTREMELY', 'CURIOUS'] +672-122797-0053-1838: hyp=['THEY', 'WERE', 'SO', 'EXTREMELY', 'CURIOUS'] +672-122797-0054-1839: ref=['I', 'KNOW', 'NO', 'SUCH', 'PLACE', 'SAID', 'THE', 'TREE'] +672-122797-0054-1839: hyp=['I', 'KNOW', 'NO', 'SUCH', 'PLACE', 'SAID', 'THE', 'TREE'] +672-122797-0055-1840: ref=['AND', 'THEN', 'HE', 'TOLD', 'ALL', 'ABOUT', 'HIS', 'YOUTH', 'AND', 'THE', 'LITTLE', 'MICE', 'HAD', 'NEVER', 'HEARD', 'THE', 'LIKE', 'BEFORE', 'AND', 'THEY', 'LISTENED', 'AND', 'SAID'] +672-122797-0055-1840: hyp=['AND', 'THEN', 'HE', 'TOLD', 'ALL', 'ABOUT', 'HIS', 'YOUTH', 'AND', 'THE', 'LITTLE', 'MICE', 'HAD', 'NEVER', 'HEARD', 'THE', 'LIKE', 'BEFORE', 'AND', 'THEY', 'LISTENED', 'AND', 'SAID'] +672-122797-0056-1841: ref=['SAID', 'THE', 'FIR', 'TREE', 'THINKING', 'OVER', 'WHAT', 'HE', 'HAD', 'HIMSELF', 'RELATED'] +672-122797-0056-1841: hyp=['SAID', 'THE', 'FIR', 'TREE', 'THINKING', 'OVER', 'WHAT', 'HE', 'HAD', 'HIMSELF', 'RELATED'] +672-122797-0057-1842: ref=['YES', 'IN', 'REALITY', 'THOSE', 'WERE', 'HAPPY', 'TIMES'] +672-122797-0057-1842: hyp=['YES', 'IN', 'REALITY', 'THOSE', 'WERE', 'HAPPY', 'TIMES'] +672-122797-0058-1843: ref=['WHO', 'IS', 'HUMPY', 'DUMPY', 'ASKED', 'THE', 'MICE'] +672-122797-0058-1843: hyp=['WHO', 'IS', 'HUMPY', 'DUMPY', 'ASKED', 'THE', 'MICE'] +672-122797-0059-1844: ref=['ONLY', 'THAT', 'ONE', 'ANSWERED', 'THE', 'TREE'] +672-122797-0059-1844: hyp=['ONLY', 'THAT', 'ONE', 'ANSWERED', 'THE', 'TREE'] +672-122797-0060-1845: ref=['IT', 'IS', 'A', 'VERY', 'STUPID', 'STORY'] +672-122797-0060-1845: hyp=['IT', 'IS', 'A', 'VERY', 'STUPID', 'STORY'] +672-122797-0061-1846: ref=["DON'T", 'YOU', 'KNOW', 'ONE', 'ABOUT', 'BACON', 'AND', 'TALLOW', 'CANDLES', "CAN'T", 'YOU', 'TELL', 'ANY', 'LARDER', 'STORIES'] +672-122797-0061-1846: hyp=["DON'T", 'YOU', 'KNOW', 'ONE', 'ABOUT', 'BACON', 'AND', 'TALLOW', 'CANDLES', "CAN'T", 'YOU', 'TELL', 'ANY', 'LARDER', 'STORIES'] +672-122797-0062-1847: ref=['NO', 'SAID', 'THE', 'TREE'] +672-122797-0062-1847: hyp=['NO', 'SAID', 'THE', 'TREE'] +672-122797-0063-1848: ref=['THEN', 'GOOD', 'BYE', 'SAID', 'THE', 'RATS', 'AND', 'THEY', 'WENT', 'HOME'] +672-122797-0063-1848: hyp=['THEN', 'GOOD', 'BYE', 'SAID', 'THE', 'RATS', 'AND', 'THEY', 'WENT', 'HOME'] +672-122797-0064-1849: ref=['AT', 'LAST', 'THE', 'LITTLE', 'MICE', 'STAYED', 'AWAY', 'ALSO', 'AND', 'THE', 'TREE', 'SIGHED', 'AFTER', 'ALL', 'IT', 'WAS', 'VERY', 'PLEASANT', 'WHEN', 'THE', 'SLEEK', 'LITTLE', 'MICE', 'SAT', 'ROUND', 'ME', 'AND', 'LISTENED', 'TO', 'WHAT', 'I', 'TOLD', 'THEM'] +672-122797-0064-1849: hyp=['AT', 'LAST', 'THE', 'LITTLE', 'MICE', 'STAYED', 'AWAY', 'ALSO', 'AND', 'THE', 'TREE', 'SIGHED', 'AFTER', 'ALL', 'IT', 'WAS', 'VERY', 'PLEASANT', 'WHEN', 'THE', 'SLEEK', 'LITTLE', 'MICE', 'SAT', 'ROUND', 'ME', 'AND', 'LISTENED', 'TO', 'WHAT', 'I', 'TOLD', 'THEM'] +672-122797-0065-1850: ref=['NOW', 'THAT', 'TOO', 'IS', 'OVER'] +672-122797-0065-1850: hyp=['NOW', 'THAT', 'TOO', 'IS', 'OVER'] +672-122797-0066-1851: ref=['WHY', 'ONE', 'MORNING', 'THERE', 'CAME', 'A', 'QUANTITY', 'OF', 'PEOPLE', 'AND', 'SET', 'TO', 'WORK', 'IN', 'THE', 'LOFT'] +672-122797-0066-1851: hyp=['WHY', 'ONE', 'MORNING', 'THERE', 'CAME', 'A', 'QUANTITY', 'OF', 'PEOPLE', 'AND', 'SET', 'TO', 'WORK', 'IN', 'THE', 'LOFT'] +672-122797-0067-1852: ref=['THE', 'TRUNKS', 'WERE', 'MOVED', 'THE', 'TREE', 'WAS', 'PULLED', 'OUT', 'AND', 'THROWN', 'RATHER', 'HARD', 'IT', 'IS', 'TRUE', 'DOWN', 'ON', 'THE', 'FLOOR', 'BUT', 'A', 'MAN', 'DREW', 'HIM', 'TOWARDS', 'THE', 'STAIRS', 'WHERE', 'THE', 'DAYLIGHT', 'SHONE'] +672-122797-0067-1852: hyp=['THE', 'TRUNKS', 'WERE', 'MOVED', 'THE', 'TREE', 'WAS', 'PULLED', 'OUT', 'AND', 'THROWN', 'RATHER', 'HARD', 'IT', 'IS', 'TRUE', 'DOWN', 'ON', 'THE', 'FLOOR', 'BUT', 'A', 'MAN', 'DREW', 'HIM', 'TOWARDS', 'THE', 'STAIRS', 'WHERE', 'THE', 'DAYLIGHT', 'SHONE'] +672-122797-0068-1853: ref=['BUT', 'IT', 'WAS', 'NOT', 'THE', 'FIR', 'TREE', 'THAT', 'THEY', 'MEANT'] +672-122797-0068-1853: hyp=['BUT', 'IT', 'WAS', 'NOT', 'THE', 'FIR', 'TREE', 'THAT', 'THEY', 'MEANT'] +672-122797-0069-1854: ref=['IT', 'WAS', 'IN', 'A', 'CORNER', 'THAT', 'HE', 'LAY', 'AMONG', 'WEEDS', 'AND', 'NETTLES'] +672-122797-0069-1854: hyp=['IT', 'WAS', 'IN', 'A', 'CORNER', 'THAT', 'HE', 'LAY', 'AMONG', 'WEEDS', 'AND', 'NETTLES'] +672-122797-0070-1855: ref=['THE', 'GOLDEN', 'STAR', 'OF', 'TINSEL', 'WAS', 'STILL', 'ON', 'THE', 'TOP', 'OF', 'THE', 'TREE', 'AND', 'GLITTERED', 'IN', 'THE', 'SUNSHINE'] +672-122797-0070-1855: hyp=['THE', 'GOLDEN', 'STAR', 'OF', 'TINSEL', 'WAS', 'STILL', 'ON', 'THE', 'TOP', 'OF', 'THE', 'TREE', 'AND', 'GLITTERED', 'IN', 'THE', 'SUNSHINE'] +672-122797-0071-1856: ref=['IN', 'THE', 'COURT', 'YARD', 'SOME', 'OF', 'THE', 'MERRY', 'CHILDREN', 'WERE', 'PLAYING', 'WHO', 'HAD', 'DANCED', 'AT', 'CHRISTMAS', 'ROUND', 'THE', 'FIR', 'TREE', 'AND', 'WERE', 'SO', 'GLAD', 'AT', 'THE', 'SIGHT', 'OF', 'HIM'] +672-122797-0071-1856: hyp=['IN', 'THE', 'COURT', 'YARD', 'SOME', 'OF', 'THE', 'MERRY', 'CHILDREN', 'WERE', 'PLAYING', 'WHO', 'HAD', 'DANCED', 'AT', 'CHRISTMAS', 'ROUND', 'THE', 'FIR', 'TREE', 'AND', 'WERE', 'SO', 'GLAD', 'AT', 'THE', 'SIGHT', 'OF', 'HIM'] +672-122797-0072-1857: ref=['AND', 'THE', "GARDENER'S", 'BOY', 'CHOPPED', 'THE', 'TREE', 'INTO', 'SMALL', 'PIECES', 'THERE', 'WAS', 'A', 'WHOLE', 'HEAP', 'LYING', 'THERE'] +672-122797-0072-1857: hyp=['AND', 'THE', "GARDENER'S", 'BOY', 'CHOPPED', 'THE', 'TREE', 'INTO', 'SMALL', 'PIECES', 'THERE', 'WAS', 'A', 'WHOLE', 'HEAP', 'LYING', 'THERE'] +672-122797-0073-1858: ref=['THE', 'WOOD', 'FLAMED', 'UP', 'SPLENDIDLY', 'UNDER', 'THE', 'LARGE', 'BREWING', 'COPPER', 'AND', 'IT', 'SIGHED', 'SO', 'DEEPLY'] +672-122797-0073-1858: hyp=['THE', 'WOOD', 'FLAMED', 'UP', 'SPLENDIDLY', 'UNDER', 'THE', 'LARGE', 'BREWING', 'COPPER', 'AND', 'IT', 'SIGHED', 'SO', 'DEEPLY'] +672-122797-0074-1859: ref=['HOWEVER', 'THAT', 'WAS', 'OVER', 'NOW', 'THE', 'TREE', 'GONE', 'THE', 'STORY', 'AT', 'AN', 'END'] +672-122797-0074-1859: hyp=['HOWEVER', 'THAT', 'WAS', 'OVER', 'NOW', 'THE', 'TREE', 'GONE', 'THE', 'STORY', 'AT', 'AN', 'END'] +6829-68769-0000-1860: ref=['KENNETH', 'AND', 'BETH', 'REFRAINED', 'FROM', 'TELLING', 'THE', 'OTHER', 'GIRLS', 'OR', 'UNCLE', 'JOHN', 'OF', 'OLD', 'WILL', "ROGERS'S", 'VISIT', 'BUT', 'THEY', 'GOT', 'MISTER', 'WATSON', 'IN', 'THE', 'LIBRARY', 'AND', 'QUESTIONED', 'HIM', 'CLOSELY', 'ABOUT', 'THE', 'PENALTY', 'FOR', 'FORGING', 'A', 'CHECK'] +6829-68769-0000-1860: hyp=['KENNETH', 'AND', 'BETH', 'REFRAINED', 'FROM', 'TELLING', 'THE', 'OTHER', 'GIRLS', 'OR', 'UNCLE', 'JOHN', 'OF', 'OLD', 'WILL', "ROGERS'S", 'VISIT', 'BUT', 'THEY', 'GOT', 'MISTER', 'WATSON', 'IN', 'THE', 'LIBRARY', 'AND', 'QUESTIONED', 'HIM', 'CLOSELY', 'ABOUT', 'THE', 'PENALTY', 'FOR', 'FORGING', 'A', 'CHECK'] +6829-68769-0001-1861: ref=['IT', 'WAS', 'A', 'SERIOUS', 'CRIME', 'INDEED', 'MISTER', 'WATSON', 'TOLD', 'THEM', 'AND', 'TOM', 'GATES', 'BADE', 'FAIR', 'TO', 'SERVE', 'A', 'LENGTHY', 'TERM', 'IN', "STATE'S", 'PRISON', 'AS', 'A', 'CONSEQUENCE', 'OF', 'HIS', 'RASH', 'ACT'] +6829-68769-0001-1861: hyp=['IT', 'WAS', 'A', 'SERIOUS', 'CRIME', 'INDEED', 'MISTER', 'WATSON', 'TOLD', 'THEM', 'AND', 'TOM', 'GATES', 'BADE', 'FAIR', 'TO', 'SERVE', 'A', 'LENGTHY', 'TERM', 'IN', 'THE', 'STATES', 'PRISON', 'AS', 'A', 'CONSEQUENCE', 'OF', 'HIS', 'RASH', 'ACT'] +6829-68769-0002-1862: ref=['I', "CAN'T", 'SEE', 'IT', 'IN', 'THAT', 'LIGHT', 'SAID', 'THE', 'OLD', 'LAWYER'] +6829-68769-0002-1862: hyp=['I', "CAN'T", 'SEE', 'IT', 'IN', 'THAT', 'LIGHT', 'SAID', 'THE', 'OLD', 'LAWYER'] +6829-68769-0003-1863: ref=['IT', 'WAS', 'A', 'DELIBERATE', 'THEFT', 'FROM', 'HIS', 'EMPLOYERS', 'TO', 'PROTECT', 'A', 'GIRL', 'HE', 'LOVED'] +6829-68769-0003-1863: hyp=['IT', 'WAS', 'A', 'DELIBERATE', 'THEFT', 'FROM', 'HIS', 'EMPLOYERS', 'TO', 'PROTECT', 'A', 'GIRL', 'HE', 'LOVED'] +6829-68769-0004-1864: ref=['BUT', 'THEY', 'COULD', 'NOT', 'HAVE', 'PROVEN', 'A', 'CASE', 'AGAINST', 'LUCY', 'IF', 'SHE', 'WAS', 'INNOCENT', 'AND', 'ALL', 'THEIR', 'THREATS', 'OF', 'ARRESTING', 'HER', 'WERE', 'PROBABLY', 'MERE', 'BLUFF'] +6829-68769-0004-1864: hyp=['BUT', 'THEY', 'COULD', 'NOT', 'HAVE', 'PROVEN', 'A', 'CASE', 'AGAINST', 'LUCY', 'IF', 'SHE', 'WAS', 'INNOCENT', 'AND', 'ALL', 'THEIR', 'THREATS', 'OF', 'ARRESTING', 'HER', 'WERE', 'PROBABLY', 'A', 'MERE', 'BLUFF'] +6829-68769-0005-1865: ref=['HE', 'WAS', 'SOFT', 'HEARTED', 'AND', 'IMPETUOUS', 'SAID', 'BETH', 'AND', 'BEING', 'IN', 'LOVE', 'HE', "DIDN'T", 'STOP', 'TO', 'COUNT', 'THE', 'COST'] +6829-68769-0005-1865: hyp=['HE', 'WAS', 'SOFT', 'HEARTED', 'AND', 'IMPETUOUS', 'SAID', 'BETH', 'AND', 'BEING', 'IN', 'LOVE', 'HE', "DIDN'T", 'STOP', 'TO', 'COUNT', 'THE', 'COST'] +6829-68769-0006-1866: ref=['IF', 'THE', 'PROSECUTION', 'WERE', 'WITHDRAWN', 'AND', 'THE', 'CASE', 'SETTLED', 'WITH', 'THE', 'VICTIM', 'OF', 'THE', 'FORGED', 'CHECK', 'THEN', 'THE', 'YOUNG', 'MAN', 'WOULD', 'BE', 'ALLOWED', 'HIS', 'FREEDOM'] +6829-68769-0006-1866: hyp=['IF', 'THE', 'PROSECUTION', 'WERE', 'WITHDRAWN', 'AND', 'THE', 'CASE', 'SETTLED', 'WITH', 'THE', 'VICTIM', 'OF', 'THE', 'FORGED', 'CHECK', 'THEN', 'THE', 'YOUNG', 'MAN', 'WOULD', 'BE', 'ALLOWED', 'HIS', 'FREEDOM'] +6829-68769-0007-1867: ref=['BUT', 'UNDER', 'THE', 'CIRCUMSTANCES', 'I', 'DOUBT', 'IF', 'SUCH', 'AN', 'ARRANGEMENT', 'COULD', 'BE', 'MADE'] +6829-68769-0007-1867: hyp=['BUT', 'UNDER', 'THE', 'CIRCUMSTANCES', 'I', 'DOUBT', 'IF', 'SUCH', 'AN', 'ARRANGEMENT', 'COULD', 'BE', 'MADE'] +6829-68769-0008-1868: ref=['FAIRVIEW', 'WAS', 'TWELVE', 'MILES', 'AWAY', 'BUT', 'BY', 'TEN', "O'CLOCK", 'THEY', 'DREW', 'UP', 'AT', 'THE', 'COUNTY', 'JAIL'] +6829-68769-0008-1868: hyp=['FAIRVIEW', 'WAS', 'TWELVE', 'MILES', 'AWAY', 'BUT', 'BY', 'TEN', "O'CLOCK", 'THEY', 'DREW', 'UP', 'AT', 'THE', 'COUNTY', 'JAIL'] +6829-68769-0009-1869: ref=['THEY', 'WERE', 'RECEIVED', 'IN', 'THE', 'LITTLE', 'OFFICE', 'BY', 'A', 'MAN', 'NAMED', 'MARKHAM', 'WHO', 'WAS', 'THE', 'JAILER'] +6829-68769-0009-1869: hyp=['THEY', 'WERE', 'RECEIVED', 'IN', 'THE', 'LITTLE', 'OFFICE', 'BY', 'A', 'MAN', 'NAMED', 'MARKHAM', 'WHO', 'WAS', 'THE', 'JAILER'] +6829-68769-0010-1870: ref=['WE', 'WISH', 'TO', 'TALK', 'WITH', 'HIM', 'ANSWERED', 'KENNETH', 'TALK'] +6829-68769-0010-1870: hyp=['WE', 'WISH', 'TO', 'TALK', 'WITH', 'HIM', 'ANSWERED', 'KENNETH', 'TALK'] +6829-68769-0011-1871: ref=["I'M", 'RUNNING', 'FOR', 'REPRESENTATIVE', 'ON', 'THE', 'REPUBLICAN', 'TICKET', 'SAID', 'KENNETH', 'QUIETLY'] +6829-68769-0011-1871: hyp=["I'M", 'RUNNING', 'FOR', 'REPRESENTATIVE', 'ON', 'THE', 'REPUBLICAN', 'TICKET', 'SAID', 'KENNETH', 'QUIETLY'] +6829-68769-0012-1872: ref=['OH', 'SAY', "THAT'S", 'DIFFERENT', 'OBSERVED', 'MARKHAM', 'ALTERING', 'HIS', 'DEMEANOR'] +6829-68769-0012-1872: hyp=['OH', 'SAY', "THAT'S", 'DIFFERENT', 'OBSERVED', 'MARKHAM', 'ALTERING', 'HIS', 'DEMEANOUR'] +6829-68769-0013-1873: ref=['MAY', 'WE', 'SEE', 'GATES', 'AT', 'ONCE', 'ASKED', 'KENNETH'] +6829-68769-0013-1873: hyp=['MAY', 'WE', 'SEE', 'GATES', 'AT', 'ONCE', 'ASKED', 'KENNETH'] +6829-68769-0014-1874: ref=['THEY', 'FOLLOWED', 'THE', 'JAILER', 'ALONG', 'A', 'SUCCESSION', 'OF', 'PASSAGES'] +6829-68769-0014-1874: hyp=['THEY', 'FOLLOWED', 'THE', 'JAILER', 'ALONG', 'A', 'SUCCESSION', 'OF', 'PASSAGES'] +6829-68769-0015-1875: ref=['SOMETIMES', "I'M", 'THAT', 'YEARNING', 'FOR', 'A', 'SMOKE', "I'M", 'NEARLY', 'CRAZY', 'AN', 'I', 'DUNNO', 'WHICH', 'IS', 'WORST', 'DYIN', 'ONE', 'WAY', 'OR', 'ANOTHER'] +6829-68769-0015-1875: hyp=['SOMETIMES', "I'M", 'THAT', 'YEARNIN', 'FOR', 'A', 'SMOKE', "I'M", 'NEARLY', 'CRAZY', 'AND', 'I', "DON'TO", 'WHICH', 'IS', 'WORSE', 'DYIN', 'ONE', 'WAY', 'OR', 'TOTHER'] +6829-68769-0016-1876: ref=['HE', 'UNLOCKED', 'THE', 'DOOR', 'AND', 'CALLED', "HERE'S", 'VISITORS', 'TOM'] +6829-68769-0016-1876: hyp=['HE', 'UNLOCKED', 'THE', 'DOOR', 'AND', 'CALLED', "HERE'S", 'VISITORS', 'TOM'] +6829-68769-0017-1877: ref=['WORSE', 'TOM', 'WORSE', 'N', 'EVER', 'REPLIED', 'THE', 'JAILER', 'GLOOMILY'] +6829-68769-0017-1877: hyp=['WORSE', 'TOM', 'WORSE', 'THAN', 'EVER', 'REPLIED', 'THE', 'JAILER', 'GLOOMILY'] +6829-68769-0018-1878: ref=['MISS', 'DE', 'GRAF', 'SAID', 'KENNETH', 'NOTICING', 'THE', "BOY'S", 'FACE', 'CRITICALLY', 'AS', 'HE', 'STOOD', 'WHERE', 'THE', 'LIGHT', 'FROM', 'THE', 'PASSAGE', 'FELL', 'UPON', 'IT'] +6829-68769-0018-1878: hyp=['MISS', 'DE', 'GRAF', 'SAID', 'KENNETH', 'NOTICING', 'THE', "BOY'S", 'FACE', 'CRITICALLY', 'AS', 'HE', 'STOOD', 'WHERE', 'THE', 'LIGHT', 'FROM', 'THE', 'PASSAGE', 'FELL', 'UPON', 'IT'] +6829-68769-0019-1879: ref=['SORRY', 'WE', "HAVEN'T", 'ANY', 'RECEPTION', 'ROOM', 'IN', 'THE', 'JAIL'] +6829-68769-0019-1879: hyp=['SORRY', 'WE', "HAVEN'T", 'ANY', 'RECEPTION', 'ROOM', 'IN', 'THE', 'JAIL'] +6829-68769-0020-1880: ref=['SIT', 'DOWN', 'PLEASE', 'SAID', 'GATES', 'IN', 'A', 'CHEERFUL', 'AND', 'PLEASANT', 'VOICE', "THERE'S", 'A', 'BENCH', 'HERE'] +6829-68769-0020-1880: hyp=['SIT', 'DOWN', 'PLEASE', 'SAID', 'GATES', 'IN', 'A', 'CHEERFUL', 'AND', 'PLEASANT', 'VOICE', "THERE'S", 'A', 'BENCH', 'HERE'] +6829-68769-0021-1881: ref=['A', 'FRESH', 'WHOLESOME', 'LOOKING', 'BOY', 'WAS', 'TOM', 'GATES', 'WITH', 'STEADY', 'GRAY', 'EYES', 'AN', 'INTELLIGENT', 'FOREHEAD', 'BUT', 'A', 'SENSITIVE', 'RATHER', 'WEAK', 'MOUTH'] +6829-68769-0021-1881: hyp=['A', 'FRESH', 'WHOLESOME', 'LOOKING', 'BOY', 'WAS', 'TOM', 'GATES', 'WITH', 'STEADY', 'GREY', 'EYES', 'AN', 'INTELLIGENT', 'FOREHEAD', 'BUT', 'A', 'SENSITIVE', 'RATHER', 'WEAK', 'MOUTH'] +6829-68769-0022-1882: ref=['WE', 'HAVE', 'HEARD', 'SOMETHING', 'OF', 'YOUR', 'STORY', 'SAID', 'KENNETH', 'AND', 'ARE', 'INTERESTED', 'IN', 'IT'] +6829-68769-0022-1882: hyp=['WE', 'HAVE', 'HEARD', 'SOMETHING', 'OF', 'YOUR', 'STORY', 'SAID', 'KENNETH', 'AND', 'ARE', 'INTERESTED', 'IN', 'IT'] +6829-68769-0023-1883: ref=['I', "DIDN'T", 'STOP', 'TO', 'THINK', 'WHETHER', 'IT', 'WAS', 'FOOLISH', 'OR', 'NOT', 'I', 'DID', 'IT', 'AND', "I'M", 'GLAD', 'I', 'DID'] +6829-68769-0023-1883: hyp=['I', "DIDN'T", 'STOP', 'TO', 'THINK', 'WHETHER', 'IT', 'WAS', 'FOOLISH', 'OR', 'NOT', 'I', 'DID', 'IT', 'AND', "I'M", 'GLAD', 'I', 'DID', 'IT'] +6829-68769-0024-1884: ref=['OLD', 'WILL', 'IS', 'A', 'FINE', 'FELLOW', 'BUT', 'POOR', 'AND', 'HELPLESS', 'SINCE', 'MISSUS', 'ROGERS', 'HAD', 'HER', 'ACCIDENT'] +6829-68769-0024-1884: hyp=['OLD', 'WILL', 'IS', 'A', 'FINE', 'FELLOW', 'BUT', 'POOR', 'AND', 'HELPLESS', 'SINCE', 'MISSUS', 'ROGERS', 'HAD', 'HER', 'ACCIDENT'] +6829-68769-0025-1885: ref=['THEN', 'ROGERS', "WOULDN'T", 'DO', 'ANYTHING', 'BUT', 'LEAD', 'HER', 'AROUND', 'AND', 'WAIT', 'UPON', 'HER', 'AND', 'THE', 'PLACE', 'WENT', 'TO', 'RACK', 'AND', 'RUIN'] +6829-68769-0025-1885: hyp=['THEN', 'ROGERS', "WOULDN'T", 'DO', 'ANYTHING', 'BUT', 'LEAD', 'HER', 'AROUND', 'AND', 'WAIT', 'UPON', 'HER', 'AND', 'THE', 'PLACE', 'WENT', 'TO', 'RACK', 'AND', 'RUIN'] +6829-68769-0026-1886: ref=['HE', 'SPOKE', 'SIMPLY', 'BUT', 'PACED', 'UP', 'AND', 'DOWN', 'THE', 'NARROW', 'CELL', 'IN', 'FRONT', 'OF', 'THEM'] +6829-68769-0026-1886: hyp=['HE', 'SPOKE', 'SIMPLY', 'BUT', 'PACED', 'UP', 'AND', 'DOWN', 'THE', 'NARROW', 'CELL', 'IN', 'FRONT', 'OF', 'THEM'] +6829-68769-0027-1887: ref=['WHOSE', 'NAME', 'DID', 'YOU', 'SIGN', 'TO', 'THE', 'CHECK', 'ASKED', 'KENNETH'] +6829-68769-0027-1887: hyp=['WHOSE', 'NAME', 'DID', 'YOU', 'SIGN', 'TO', 'THE', 'CHECK', 'ASKED', 'KENNETH'] +6829-68769-0028-1888: ref=['HE', 'IS', 'SUPPOSED', 'TO', 'SIGN', 'ALL', 'THE', 'CHECKS', 'OF', 'THE', 'CONCERN'] +6829-68769-0028-1888: hyp=['HE', 'IS', 'SUPPOSED', 'TO', 'SIGN', 'ALL', 'THE', 'CHECKS', 'OF', 'THE', 'CONCERN'] +6829-68769-0029-1889: ref=["IT'S", 'A', 'STOCK', 'COMPANY', 'AND', 'RICH'] +6829-68769-0029-1889: hyp=["IT'S", 'A', 'STOCK', 'COMPANY', 'IN', 'RICH'] +6829-68769-0030-1890: ref=['I', 'WAS', 'BOOKKEEPER', 'SO', 'IT', 'WAS', 'EASY', 'TO', 'GET', 'A', 'BLANK', 'CHECK', 'AND', 'FORGE', 'THE', 'SIGNATURE'] +6829-68769-0030-1890: hyp=['I', 'WAS', 'BOOKKEEPER', 'SO', 'IT', 'WAS', 'EASY', 'TO', 'GET', 'A', 'BLANK', 'CHEQUE', 'AND', 'FORGE', 'THE', 'SIGNATURE'] +6829-68769-0031-1891: ref=['AS', 'REGARDS', 'MY', 'ROBBING', 'THE', 'COMPANY', "I'LL", 'SAY', 'THAT', 'I', 'SAVED', 'THEM', 'A', 'HEAVY', 'LOSS', 'ONE', 'DAY'] +6829-68769-0031-1891: hyp=['AS', 'REGARDS', 'MY', 'ROBBING', 'THE', 'COMPANY', "I'LL", 'SAY', 'THAT', 'I', 'SAVED', 'HIM', 'A', 'HEAVY', 'LOSS', 'ONE', 'DAY'] +6829-68769-0032-1892: ref=['I', 'DISCOVERED', 'AND', 'PUT', 'OUT', 'A', 'FIRE', 'THAT', 'WOULD', 'HAVE', 'DESTROYED', 'THE', 'WHOLE', 'PLANT', 'BUT', 'MARSHALL', 'NEVER', 'EVEN', 'THANKED', 'ME'] +6829-68769-0032-1892: hyp=['I', 'DISCOVERED', 'AND', 'PUT', 'OUT', 'A', 'FIRE', 'THAT', 'WOULD', 'HAVE', 'DESTROYED', 'THE', 'WHOLE', 'PLANT', 'BUT', 'MARTIAL', 'NEVER', 'EVEN', 'THANKED', 'ME'] +6829-68769-0033-1893: ref=['IT', 'WAS', 'BETTER', 'FOR', 'HIM', 'TO', 'THINK', 'THE', 'GIRL', 'UNFEELING', 'THAN', 'TO', 'KNOW', 'THE', 'TRUTH'] +6829-68769-0033-1893: hyp=['IT', 'WAS', 'BETTER', 'FOR', 'HIM', 'TO', 'THINK', 'THE', 'GIRL', 'UNFEELING', 'THAN', 'TO', 'KNOW', 'THE', 'TRUTH'] +6829-68769-0034-1894: ref=["I'M", 'GOING', 'TO', 'SEE', 'MISTER', 'MARSHALL', 'SAID', 'KENNETH', 'AND', 'DISCOVER', 'WHAT', 'I', 'CAN', 'DO', 'TO', 'ASSIST', 'YOU', 'THANK', 'YOU', 'SIR'] +6829-68769-0034-1894: hyp=["I'M", 'GOING', 'TO', 'SEE', 'MISTER', 'MARSHALL', 'SAID', 'KENNETH', 'AND', 'DISCOVER', 'WHAT', 'I', 'CAN', 'DO', 'TO', 'ASSIST', 'YOU', 'THANK', 'YOU', 'SIR'] +6829-68769-0035-1895: ref=['IT', "WON'T", 'BE', 'MUCH', 'BUT', "I'M", 'GRATEFUL', 'TO', 'FIND', 'A', 'FRIEND'] +6829-68769-0035-1895: hyp=['IT', "WON'T", 'BE', 'MUCH', 'BUT', "I'M", 'GRATEFUL', 'TO', 'FIND', 'A', 'FRIEND'] +6829-68769-0036-1896: ref=['THEY', 'LEFT', 'HIM', 'THEN', 'FOR', 'THE', 'JAILER', 'ARRIVED', 'TO', 'UNLOCK', 'THE', 'DOOR', 'AND', 'ESCORT', 'THEM', 'TO', 'THE', 'OFFICE'] +6829-68769-0036-1896: hyp=['THEY', 'LEFT', 'HIM', 'THEN', 'FOR', 'THE', 'JAILER', 'ARRIVED', 'TO', 'UNLOCK', 'THE', 'DOOR', 'AND', 'ESCORT', 'THEM', 'TO', 'THE', 'OFFICE'] +6829-68769-0037-1897: ref=["I'VE", 'SEEN', 'LOTS', 'OF', 'THAT', 'KIND', 'IN', 'MY', 'DAY'] +6829-68769-0037-1897: hyp=["I'VE", 'SEEN', 'LOTS', 'OF', 'THAT', 'KIND', 'IN', 'MY', 'DAY'] +6829-68769-0038-1898: ref=['AND', 'IT', 'RUINS', 'A', "MAN'S", 'DISPOSITION'] +6829-68769-0038-1898: hyp=['AND', 'IT', 'RUINS', 'A', "MAN'S", 'DISPOSITION'] +6829-68769-0039-1899: ref=['HE', 'LOOKED', 'UP', 'RATHER', 'UNGRACIOUSLY', 'BUT', 'MOTIONED', 'THEM', 'TO', 'BE', 'SEATED'] +6829-68769-0039-1899: hyp=['HE', 'LOOKED', 'UP', 'RATHER', 'UNGRACIOUSLY', 'BUT', 'MOTIONED', 'THEM', 'TO', 'BE', 'SEATED'] +6829-68769-0040-1900: ref=['SOME', 'GIRL', 'HAS', 'BEEN', 'HERE', 'TWICE', 'TO', 'INTERVIEW', 'MY', 'MEN', 'AND', 'I', 'HAVE', 'REFUSED', 'TO', 'ADMIT', 'HER'] +6829-68769-0040-1900: hyp=['SOME', 'GIRL', 'HAS', 'BEEN', 'IN', 'HERE', 'TWICE', 'TO', 'INTERVIEW', 'MY', 'MEN', 'AND', 'I', 'HAVE', 'REFUSED', 'TO', 'ADMIT', 'HER'] +6829-68769-0041-1901: ref=["I'M", 'NOT', 'ELECTIONEERING', 'JUST', 'NOW'] +6829-68769-0041-1901: hyp=["I'M", 'NOT', 'ELECTIONEERING', 'JUST', 'NOW'] +6829-68769-0042-1902: ref=['OH', 'WELL', 'SIR', 'WHAT', 'ABOUT', 'HIM'] +6829-68769-0042-1902: hyp=['OH', 'WELL', 'SIR', 'WHAT', 'ABOUT', 'HIM'] +6829-68769-0043-1903: ref=['AND', 'HE', 'DESERVES', 'A', 'TERM', 'IN', "STATE'S", 'PRISON'] +6829-68769-0043-1903: hyp=['AND', 'HE', 'DESERVES', 'A', 'TERM', 'IN', "STATE'S", 'PRISON'] +6829-68769-0044-1904: ref=['IT', 'HAS', 'COST', 'ME', 'TWICE', 'SIXTY', 'DOLLARS', 'IN', 'ANNOYANCE'] +6829-68769-0044-1904: hyp=['IT', 'HAS', 'COST', 'ME', 'TWICE', 'SIXTY', 'DOLLARS', 'AN', 'ANNOYANCE'] +6829-68769-0045-1905: ref=["I'LL", 'PAY', 'ALL', 'THE', 'COSTS', 'BESIDES'] +6829-68769-0045-1905: hyp=["I'LL", 'PAY', 'ALL', 'THE', 'COST', 'BESIDES'] +6829-68769-0046-1906: ref=["YOU'RE", 'FOOLISH', 'WHY', 'SHOULD', 'YOU', 'DO', 'ALL', 'THIS'] +6829-68769-0046-1906: hyp=["YOU'RE", 'FOOLISH', 'WHY', 'SHOULD', 'YOU', 'DO', 'ALL', 'THIS'] +6829-68769-0047-1907: ref=['I', 'HAVE', 'MY', 'OWN', 'REASONS', 'MISTER', 'MARSHALL'] +6829-68769-0047-1907: hyp=['I', 'HAVE', 'MY', 'OWN', 'REASONS', 'MISTER', 'MARSHALL'] +6829-68769-0048-1908: ref=['GIVE', 'ME', 'A', 'CHECK', 'FOR', 'A', 'HUNDRED', 'AND', 'FIFTY', 'AND', "I'LL", 'TURN', 'OVER', 'TO', 'YOU', 'THE', 'FORGED', 'CHECK', 'AND', 'QUASH', 'FURTHER', 'PROCEEDINGS'] +6829-68769-0048-1908: hyp=['GIVE', 'ME', 'A', 'CHECK', 'FOR', 'A', 'HUNDRED', 'AND', 'FIFTY', 'AND', "I'LL", 'TURN', 'OVER', 'TO', 'YOU', 'THE', 'FORGED', 'CHECK', 'AND', 'QUASH', 'FURTHER', 'PROCEEDINGS'] +6829-68769-0049-1909: ref=['HE', 'DETESTED', 'THE', 'GRASPING', 'DISPOSITION', 'THAT', 'WOULD', 'ENDEAVOR', 'TO', 'TAKE', 'ADVANTAGE', 'OF', 'HIS', 'EVIDENT', 'DESIRE', 'TO', 'HELP', 'YOUNG', 'GATES'] +6829-68769-0049-1909: hyp=['HE', 'DETESTED', 'THE', 'GRASPING', 'DISPOSITION', 'THAT', 'WOULD', 'ENDEAVOR', 'TO', 'TAKE', 'ADVANTAGE', 'OF', 'HIS', 'EVIDENT', 'DESIRE', 'TO', 'HELP', 'YOUNG', 'GATES'] +6829-68769-0050-1910: ref=['BETH', 'UNEASY', 'AT', 'HIS', 'SILENCE', 'NUDGED', 'HIM'] +6829-68769-0050-1910: hyp=['BETH', 'UNEASY', 'AT', 'HIS', 'SILENCE', 'NUDGED', 'HIM'] +6829-68769-0051-1911: ref=['THERE', 'WAS', 'A', 'GRIM', 'SMILE', 'OF', 'AMUSEMENT', 'ON', 'HIS', 'SHREWD', 'FACE'] +6829-68769-0051-1911: hyp=['THERE', 'WAS', 'A', 'GRIM', 'SMILE', 'OF', 'AMUSEMENT', 'ON', 'HIS', 'SHREWD', 'FACE'] +6829-68769-0052-1912: ref=['HE', 'MIGHT', 'HAVE', 'HAD', 'THAT', 'FORGED', 'CHECK', 'FOR', 'THE', 'FACE', 'OF', 'IT', 'IF', "HE'D", 'BEEN', 'SHARP'] +6829-68769-0052-1912: hyp=['HE', 'MIGHT', 'HAVE', 'HAD', 'THAT', 'FORGED', 'CHECK', 'FOR', 'THE', 'FACE', 'OF', 'IT', 'IF', "HE'D", 'BEEN', 'SHARP'] +6829-68769-0053-1913: ref=['AND', 'TO', 'THINK', 'WE', 'CAN', 'SAVE', 'ALL', 'THAT', 'MISERY', 'AND', 'DESPAIR', 'BY', 'THE', 'PAYMENT', 'OF', 'A', 'HUNDRED', 'AND', 'FIFTY', 'DOLLARS'] +6829-68769-0053-1913: hyp=['AND', 'TO', 'THINK', 'WE', 'CAN', 'SAVE', 'ALL', 'THAT', 'MISERY', 'AND', 'DESPAIR', 'BY', 'THE', 'PAYMENT', 'OF', 'A', 'HUNDRED', 'AND', 'FIFTY', 'DOLLARS'] +6829-68771-0000-1914: ref=['SO', 'TO', 'THE', 'SURPRISE', 'OF', 'THE', 'DEMOCRATIC', 'COMMITTEE', 'AND', 'ALL', 'HIS', 'FRIENDS', 'MISTER', 'HOPKINS', 'ANNOUNCED', 'THAT', 'HE', 'WOULD', 'OPPOSE', "FORBES'S", 'AGGRESSIVE', 'CAMPAIGN', 'WITH', 'AN', 'EQUAL', 'AGGRESSIVENESS', 'AND', 'SPEND', 'AS', 'MANY', 'DOLLARS', 'IN', 'DOING', 'SO', 'AS', 'MIGHT', 'BE', 'NECESSARY'] +6829-68771-0000-1914: hyp=['SO', 'TO', 'THE', 'SURPRISE', 'OF', 'THE', 'DEMOCRATIC', 'COMMITTEE', 'AND', 'ALL', 'HIS', 'FRIENDS', 'MISTER', 'HOPKINS', 'ANNOUNCED', 'THAT', 'HE', 'WOULD', 'OPPOSE', "FORD'S", 'AGGRESSIVE', 'CAMPAIGN', 'WITH', 'AN', 'EQUAL', 'AGGRESSIVENESS', 'AND', 'SPEND', 'AS', 'MANY', 'DOLLARS', 'IN', 'DOING', 'SO', 'AS', 'MIGHT', 'BE', 'NECESSARY'] +6829-68771-0001-1915: ref=['ONE', 'OF', 'MISTER', "HOPKINS'S", 'FIRST', 'TASKS', 'AFTER', 'CALLING', 'HIS', 'FAITHFUL', 'HENCHMEN', 'AROUND', 'HIM', 'WAS', 'TO', 'MAKE', 'A', 'CAREFUL', 'CANVASS', 'OF', 'THE', 'VOTERS', 'OF', 'HIS', 'DISTRICT', 'TO', 'SEE', 'WHAT', 'WAS', 'STILL', 'TO', 'BE', 'ACCOMPLISHED'] +6829-68771-0001-1915: hyp=['ONE', 'OF', 'MISTER', 'HOPKINS', 'FIRST', 'TASKS', 'AFTER', 'CALLING', 'HIS', 'FAITHFUL', 'HENCHMEN', 'AROUND', 'HIM', 'WAS', 'TO', 'MAKE', 'A', 'CAREFUL', 'CANVASS', 'OF', 'THE', 'VOTERS', 'OF', 'HIS', 'DISTRICT', 'TO', 'SEE', 'WHAT', 'WAS', 'STILL', 'TO', 'BE', 'ACCOMPLISHED'] +6829-68771-0002-1916: ref=['THE', 'WEAK', 'KNEED', 'CONTINGENCY', 'MUST', 'BE', 'STRENGTHENED', 'AND', 'FORTIFIED', 'AND', 'A', 'COUPLE', 'OF', 'HUNDRED', 'VOTES', 'IN', 'ONE', 'WAY', 'OR', 'ANOTHER', 'SECURED', 'FROM', 'THE', 'OPPOSITION'] +6829-68771-0002-1916: hyp=['THE', 'WEAK', 'NEED', 'CONTINGENCY', 'MUST', 'BE', 'STRENGTHENED', 'AND', 'FORTIFIED', 'AND', 'A', 'COUPLE', 'OF', 'HUNDRED', 'VOTES', 'IN', 'ONE', 'WAY', 'OR', 'THE', 'OTHER', 'SECURED', 'FROM', 'OPPOSITION'] +6829-68771-0003-1917: ref=['THE', 'DEMOCRATIC', 'COMMITTEE', 'FIGURED', 'OUT', 'A', 'WAY', 'TO', 'DO', 'THIS'] +6829-68771-0003-1917: hyp=['THE', 'DEMOCRATIC', 'COMMITTEE', 'FIGURED', 'OUT', 'A', 'WAY', 'TO', 'DO', 'THIS'] +6829-68771-0004-1918: ref=['UNDER', 'ORDINARY', 'CONDITIONS', 'REYNOLDS', 'WAS', 'SURE', 'TO', 'BE', 'ELECTED', 'BUT', 'THE', 'COMMITTEE', 'PROPOSED', 'TO', 'SACRIFICE', 'HIM', 'IN', 'ORDER', 'TO', 'ELECT', 'HOPKINS'] +6829-68771-0004-1918: hyp=['UNDER', 'ORDINARY', 'CONDITIONS', 'REYNOLDS', 'WAS', 'SURE', 'TO', 'BE', 'ELECTED', 'BUT', 'THE', 'COMMITTEE', 'PROPOSED', 'TO', 'SACRIFICE', 'HIM', 'IN', 'ORDER', 'TO', 'ELECT', 'HOPKINS'] +6829-68771-0005-1919: ref=['THE', 'ONLY', 'THING', 'NECESSARY', 'WAS', 'TO', 'FIX', 'SETH', 'REYNOLDS', 'AND', 'THIS', 'HOPKINS', 'ARRANGED', 'PERSONALLY'] +6829-68771-0005-1919: hyp=['THE', 'ONLY', 'THING', 'NECESSARY', 'WAS', 'TO', 'FIX', 'SETH', 'REYNOLDS', 'AND', 'THIS', 'HOPKINS', 'ARRANGED', 'PERSONALLY'] +6829-68771-0006-1920: ref=['AND', 'THIS', 'WAS', 'WHY', 'KENNETH', 'AND', 'BETH', 'DISCOVERED', 'HIM', 'CONVERSING', 'WITH', 'THE', 'YOUNG', 'WOMAN', 'IN', 'THE', 'BUGGY'] +6829-68771-0006-1920: hyp=['AND', 'THIS', 'WAS', 'WHY', 'KENNETH', 'AND', 'BETH', 'DISCOVERED', 'HIM', 'CONVERSING', 'WITH', 'THE', 'YOUNG', 'WOMAN', 'IN', 'THE', 'BUGGY'] +6829-68771-0007-1921: ref=['THE', 'DESCRIPTION', 'SHE', 'GAVE', 'OF', 'THE', 'COMING', 'RECEPTION', 'TO', 'THE', "WOMAN'S", 'POLITICAL', 'LEAGUE', 'WAS', 'SO', 'HUMOROUS', 'AND', 'DIVERTING', 'THAT', 'THEY', 'WERE', 'BOTH', 'LAUGHING', 'HEARTILY', 'OVER', 'THE', 'THING', 'WHEN', 'THE', 'YOUNG', 'PEOPLE', 'PASSED', 'THEM', 'AND', 'THUS', 'MISTER', 'HOPKINS', 'FAILED', 'TO', 'NOTICE', 'WHO', 'THE', 'OCCUPANTS', 'OF', 'THE', 'OTHER', 'VEHICLE', 'WERE'] +6829-68771-0007-1921: hyp=['THE', 'DESCRIPTION', 'SHE', 'GAVE', 'OF', 'THE', 'COMING', 'RECEPTION', 'TO', 'THE', "WOMEN'S", 'POLITICAL', 'LEAGUE', 'WAS', 'SO', 'HUMOROUS', 'AND', 'DIVERTING', 'THAT', 'THEY', 'WERE', 'BOTH', 'LAUGHING', 'HEARTILY', 'OVER', 'THE', 'THING', 'WHEN', 'THE', 'YOUNG', 'PEOPLE', 'PASSED', 'THEM', 'AND', 'THUS', 'MISTER', 'HOPKINS', 'FAILED', 'TO', 'NOTICE', 'WHO', 'THE', 'OCCUPANTS', 'OF', 'THE', 'OTHER', 'VEHICLE', 'WERE'] +6829-68771-0008-1922: ref=['THESE', 'WOMEN', 'WERE', 'FLATTERED', 'BY', 'THE', 'ATTENTION', 'OF', 'THE', 'YOUNG', 'LADY', 'AND', 'HAD', 'PROMISED', 'TO', 'ASSIST', 'IN', 'ELECTING', 'MISTER', 'FORBES'] +6829-68771-0008-1922: hyp=['THESE', 'WOMEN', 'WERE', 'FLATTERED', 'BY', 'THE', 'ATTENTION', 'OF', 'THE', 'YOUNG', 'LADY', 'AND', 'HAD', 'PROMISED', 'TO', 'ASSIST', 'IN', 'ELECTING', 'MISTER', 'FORBES'] +6829-68771-0009-1923: ref=['LOUISE', 'HOPED', 'FOR', 'EXCELLENT', 'RESULTS', 'FROM', 'THIS', 'ORGANIZATION', 'AND', 'WISHED', 'THE', 'ENTERTAINMENT', 'TO', 'BE', 'SO', 'EFFECTIVE', 'IN', 'WINNING', 'THEIR', 'GOOD', 'WILL', 'THAT', 'THEY', 'WOULD', 'WORK', 'EARNESTLY', 'FOR', 'THE', 'CAUSE', 'IN', 'WHICH', 'THEY', 'WERE', 'ENLISTED'] +6829-68771-0009-1923: hyp=['LOUISE', 'HOPED', 'FOR', 'EXCELLENT', 'RESULTS', 'FROM', 'THIS', 'ORGANIZATION', 'AND', 'WISHED', 'THE', 'ENTERTAINMENT', 'TO', 'BE', 'SO', 'EFFECTIVE', 'IN', 'WINNING', 'THEIR', 'GOOD', 'WILL', 'THAT', 'THEY', 'WOULD', 'WORK', 'EARNESTLY', 'FOR', 'THE', 'CAUSE', 'IN', 'WHICH', 'THEY', 'WERE', 'ENLISTED'] +6829-68771-0010-1924: ref=['THE', 'FAIRVIEW', 'BAND', 'WAS', 'ENGAGED', 'TO', 'DISCOURSE', 'AS', 'MUCH', 'HARMONY', 'AS', 'IT', 'COULD', 'PRODUCE', 'AND', 'THE', 'RESOURCES', 'OF', 'THE', 'GREAT', 'HOUSE', 'WERE', 'TAXED', 'TO', 'ENTERTAIN', 'THE', 'GUESTS'] +6829-68771-0010-1924: hyp=['THE', 'FAIRVIEW', 'BAND', 'WAS', 'ENGAGED', 'TO', 'DISCOURSE', 'AS', 'MUCH', 'HARMONY', 'AS', 'IT', 'COULD', 'PRODUCE', 'AND', 'THE', 'RESOURCES', 'OF', 'THE', 'GREAT', 'HOUSE', 'WERE', 'TAXED', 'TO', 'ENTERTAIN', 'THE', 'GUESTS'] +6829-68771-0011-1925: ref=['TABLES', 'WERE', 'SPREAD', 'ON', 'THE', 'LAWN', 'AND', 'A', 'DAINTY', 'BUT', 'SUBSTANTIAL', 'REPAST', 'WAS', 'TO', 'BE', 'SERVED'] +6829-68771-0011-1925: hyp=['TABLES', 'WERE', 'SPREAD', 'ON', 'THE', 'LAWN', 'AND', 'A', 'DAINTY', 'BUT', 'SUBSTANTIAL', 'REPAST', 'WAS', 'TO', 'BE', 'SERVED'] +6829-68771-0012-1926: ref=['THIS', 'WAS', 'THE', 'FIRST', 'OCCASION', 'WITHIN', 'A', 'GENERATION', 'WHEN', 'SUCH', 'AN', 'ENTERTAINMENT', 'HAD', 'BEEN', 'GIVEN', 'AT', 'ELMHURST', 'AND', 'THE', 'ONLY', 'ONE', 'WITHIN', 'THE', 'MEMORY', 'OF', 'MAN', 'WHERE', 'THE', 'NEIGHBORS', 'AND', 'COUNTRY', 'PEOPLE', 'HAD', 'BEEN', 'INVITED', 'GUESTS'] +6829-68771-0012-1926: hyp=['THIS', 'WAS', 'THE', 'FIRST', 'OCCASION', 'WITHIN', 'A', 'GENERATION', 'WHEN', 'SUCH', 'AN', 'ENTERTAINMENT', 'HAD', 'BEEN', 'GIVEN', 'AT', 'ELMHURST', 'AND', 'THE', 'ONLY', 'WHEN', 'WITHIN', 'THE', 'MEMORY', 'OF', 'MAN', 'WHERE', 'THE', 'NEIGHBORS', 'AND', 'COUNTRY', 'PEOPLE', 'HAD', 'BEEN', 'THE', 'INVITED', 'GUESTS'] +6829-68771-0013-1927: ref=['THE', 'ATTENDANCE', 'WAS', 'UNEXPECTEDLY', 'LARGE', 'AND', 'THE', 'GIRLS', 'WERE', 'DELIGHTED', 'FORESEEING', 'GREAT', 'SUCCESS', 'FOR', 'THEIR', 'FETE'] +6829-68771-0013-1927: hyp=['THE', 'ATTENDANCE', 'WAS', 'UNEXPECTEDLY', 'LARGE', 'AND', 'THE', 'GIRLS', 'WERE', 'DELIGHTED', 'FORESEEING', 'GREAT', 'SUCCESS', 'FOR', 'THEIR', 'FIGHT'] +6829-68771-0014-1928: ref=['WE', 'OUGHT', 'TO', 'HAVE', 'MORE', 'ATTENDANTS', 'BETH', 'SAID', 'LOUISE', 'APPROACHING', 'HER', 'COUSIN'] +6829-68771-0014-1928: hyp=['WE', 'OUGHT', 'TO', 'HAVE', 'MORE', 'ATTENDANCE', 'BETH', 'SAID', 'LOUISE', 'APPROACHING', 'HER', 'COUSIN'] +6829-68771-0015-1929: ref=["WON'T", 'YOU', 'RUN', 'INTO', 'THE', 'HOUSE', 'AND', 'SEE', 'IF', 'MARTHA', "CAN'T", 'SPARE', 'ONE', 'OR', 'TWO', 'MORE', 'MAIDS'] +6829-68771-0015-1929: hyp=["WON'T", 'YOU', 'RUN', 'INTO', 'THE', 'HOUSE', 'AND', 'SEE', 'IF', 'MARTHA', "CAN'T", 'SPARE', 'ONE', 'OR', 'TWO', 'MORE', 'MAIDS'] +6829-68771-0016-1930: ref=['SHE', 'WAS', 'VERY', 'FOND', 'OF', 'THE', 'YOUNG', 'LADIES', 'WHOM', 'SHE', 'HAD', 'KNOWN', 'WHEN', 'AUNT', 'JANE', 'WAS', 'THE', 'MISTRESS', 'HERE', 'AND', 'BETH', 'WAS', 'HER', 'ESPECIAL', 'FAVORITE'] +6829-68771-0016-1930: hyp=['SHE', 'WAS', 'VERY', 'FOND', 'OF', 'THE', 'YOUNG', 'LADIES', 'WHOM', 'SHE', 'HAD', 'KNOWN', 'WHEN', 'AUNT', 'JANE', 'WAS', 'THEIR', 'MISTRESS', 'HERE', 'AND', 'BETH', 'WAS', 'HER', 'ESPECIAL', 'FAVOURITE'] +6829-68771-0017-1931: ref=['THE', 'HOUSEKEEPER', 'LED', 'THE', 'WAY', 'AND', 'BETH', 'FOLLOWED'] +6829-68771-0017-1931: hyp=['THE', 'HOUSEKEEPER', 'LED', 'THE', 'WAY', 'AND', 'BETH', 'FOLLOWED'] +6829-68771-0018-1932: ref=['FOR', 'A', 'MOMENT', 'BETH', 'STOOD', 'STARING', 'WHILE', 'THE', 'NEW', 'MAID', 'REGARDED', 'HER', 'WITH', 'COMPOSURE', 'AND', 'A', 'SLIGHT', 'SMILE', 'UPON', 'HER', 'BEAUTIFUL', 'FACE'] +6829-68771-0018-1932: hyp=['FOR', 'A', 'MOMENT', 'BETH', 'STOOD', 'STARING', 'WHILE', 'THE', 'NEW', 'MAID', 'REGARDED', 'HER', 'WITH', 'COMPOSURE', 'AND', 'A', 'SLIGHT', 'SMILE', 'UPON', 'HER', 'BEAUTIFUL', 'FACE'] +6829-68771-0019-1933: ref=['SHE', 'WAS', 'DRESSED', 'IN', 'THE', 'REGULATION', 'COSTUME', 'OF', 'THE', 'MAIDS', 'AT', 'ELMHURST', 'A', 'PLAIN', 'BLACK', 'GOWN', 'WITH', 'WHITE', 'APRON', 'AND', 'CAP'] +6829-68771-0019-1933: hyp=['SHE', 'WAS', 'DRESSED', 'IN', 'THE', 'REGULATION', 'COSTUME', 'OF', 'THE', 'MAIDS', 'AT', 'ELMHURST', 'A', 'PLAIN', 'BLACK', 'GOWN', 'WITH', 'A', 'WHITE', 'APRON', 'AND', 'CAP'] +6829-68771-0020-1934: ref=['THEN', 'SHE', 'GAVE', 'A', 'LITTLE', 'LAUGH', 'AND', 'REPLIED', 'NO', 'MISS', 'BETH', "I'M", 'ELIZABETH', 'PARSONS'] +6829-68771-0020-1934: hyp=['THEN', 'SHE', 'GAVE', 'A', 'LITTLE', 'LAUGH', 'AND', 'REPLIED', 'NO', 'MISS', 'BETH', 'I', 'AM', 'ELIZABETH', 'PARSONS'] +6829-68771-0021-1935: ref=['BUT', 'IT', "CAN'T", 'BE', 'PROTESTED', 'THE', 'GIRL'] +6829-68771-0021-1935: hyp=['BUT', 'IT', "CAN'T", 'BE', 'PROTESTED', 'THE', 'GIRL'] +6829-68771-0022-1936: ref=['I', 'ATTEND', 'TO', 'THE', 'HOUSEHOLD', 'MENDING', 'YOU', 'KNOW', 'AND', 'CARE', 'FOR', 'THE', 'LINEN'] +6829-68771-0022-1936: hyp=['I', 'ATTEND', 'TO', 'THE', 'HOUSEHOLD', 'MENDING', 'YOU', 'KNOW', 'AND', 'CARE', 'FOR', 'THE', 'LINEN'] +6829-68771-0023-1937: ref=['YOU', 'SPEAK', 'LIKE', 'AN', 'EDUCATED', 'PERSON', 'SAID', 'BETH', 'WONDERINGLY', 'WHERE', 'IS', 'YOUR', 'HOME'] +6829-68771-0023-1937: hyp=['YOU', 'SPEAK', 'LIKE', 'AN', 'EDUCATED', 'PERSON', 'SAID', 'BETH', 'WONDERINGLY', 'WHERE', 'IS', 'YOUR', 'HOME'] +6829-68771-0024-1938: ref=['FOR', 'THE', 'FIRST', 'TIME', 'THE', 'MAID', 'SEEMED', 'A', 'LITTLE', 'CONFUSED', 'AND', 'HER', 'GAZE', 'WANDERED', 'FROM', 'THE', 'FACE', 'OF', 'HER', 'VISITOR'] +6829-68771-0024-1938: hyp=['FOR', 'THE', 'FIRST', 'TIME', 'THE', 'MAID', 'SEEMED', 'A', 'LITTLE', 'CONFUSED', 'AND', 'HER', 'GAZE', 'WANDERED', 'FROM', 'THE', 'FACE', 'OF', 'HER', 'VISITOR'] +6829-68771-0025-1939: ref=['SHE', 'SAT', 'DOWN', 'IN', 'A', 'ROCKING', 'CHAIR', 'AND', 'CLASPING', 'HER', 'HANDS', 'IN', 'HER', 'LAP', 'ROCKED', 'SLOWLY', 'BACK', 'AND', 'FORTH', "I'M", 'SORRY', 'SAID', 'BETH'] +6829-68771-0025-1939: hyp=['SHE', 'SAT', 'DOWN', 'IN', 'A', 'ROCKING', 'CHAIR', 'AND', 'CLASPING', 'HER', 'HANDS', 'IN', 'HER', 'LAP', 'ROCKED', 'SLOWLY', 'BACK', 'AND', 'FORTH', "I'M", 'SORRY', 'SAID', 'BETH'] +6829-68771-0026-1940: ref=['ELIZA', 'PARSONS', 'SHOOK', 'HER', 'HEAD'] +6829-68771-0026-1940: hyp=['ELIZA', 'PARSONS', 'SHOOK', 'HER', 'HEAD'] +6829-68771-0027-1941: ref=['THEY', 'THEY', 'EXCITE', 'ME', 'IN', 'SOME', 'WAY', 'AND', 'I', 'I', "CAN'T", 'BEAR', 'THEM', 'YOU', 'MUST', 'EXCUSE', 'ME'] +6829-68771-0027-1941: hyp=['THEY', 'THEY', 'EXCITE', 'ME', 'IN', 'SOME', 'WAY', 'AND', 'I', 'I', "CAN'T", 'BEAR', 'THEM', 'YOU', 'MUST', 'EXCUSE', 'ME'] +6829-68771-0028-1942: ref=['SHE', 'EVEN', 'SEEMED', 'MILDLY', 'AMUSED', 'AT', 'THE', 'ATTENTION', 'SHE', 'ATTRACTED'] +6829-68771-0028-1942: hyp=['SHE', 'EVEN', 'SEEMED', 'MILDLY', 'AMUSED', 'AT', 'THE', 'ATTENTION', 'SHE', 'ATTRACTED'] +6829-68771-0029-1943: ref=['BETH', 'WAS', 'A', 'BEAUTIFUL', 'GIRL', 'THE', 'HANDSOMEST', 'OF', 'THE', 'THREE', 'COUSINS', 'BY', 'FAR', 'YET', 'ELIZA', 'SURPASSED', 'HER', 'IN', 'NATURAL', 'CHARM', 'AND', 'SEEMED', 'WELL', 'AWARE', 'OF', 'THE', 'FACT'] +6829-68771-0029-1943: hyp=['BETH', 'WAS', 'A', 'BEAUTIFUL', 'GIRL', 'THE', 'HANDSOMEST', 'OF', 'THE', 'THREE', 'COUSINS', 'BY', 'FAR', 'YET', 'ELIZA', 'SURPASSED', 'HER', 'IN', 'NATURAL', 'CHARM', 'AND', 'SEEMED', 'WELL', 'AWARE', 'OF', 'THE', 'FACT'] +6829-68771-0030-1944: ref=['HER', 'MANNER', 'WAS', 'NEITHER', 'INDEPENDENT', 'NOR', 'ASSERTIVE', 'BUT', 'RATHER', 'ONE', 'OF', 'WELL', 'BRED', 'COMPOSURE', 'AND', 'CALM', 'RELIANCE'] +6829-68771-0030-1944: hyp=['HER', 'MANNER', 'WAS', 'NEITHER', 'INDEPENDENT', 'NOR', 'ASSERTIVE', 'BUT', 'RATHER', 'ONE', 'OF', 'WELL', 'BRED', 'COMPOSURE', 'AND', 'CALM', 'RELIANCE'] +6829-68771-0031-1945: ref=['HER', 'EYES', 'WANDERED', 'TO', 'THE', "MAID'S", 'HANDS'] +6829-68771-0031-1945: hyp=['HER', 'EYES', 'WANDERED', 'TO', 'THE', "MAID'S", 'HANDS'] +6829-68771-0032-1946: ref=['HOWEVER', 'HER', 'FEATURES', 'AND', 'FORM', 'MIGHT', 'REPRESS', 'ANY', 'EVIDENCE', 'OF', 'NERVOUSNESS', 'THESE', 'HANDS', 'TOLD', 'A', 'DIFFERENT', 'STORY'] +6829-68771-0032-1946: hyp=['HOWEVER', 'HER', 'FEATURES', 'AND', 'FORM', 'MIGHT', 'REPRESS', 'ANY', 'EVIDENCE', 'OF', 'NERVOUSNESS', 'THESE', 'HANDS', 'TOLD', 'A', 'DIFFERENT', 'STORY'] +6829-68771-0033-1947: ref=['SHE', 'ROSE', 'QUICKLY', 'TO', 'HER', 'FEET', 'WITH', 'AN', 'IMPETUOUS', 'GESTURE', 'THAT', 'MADE', 'HER', 'VISITOR', 'CATCH', 'HER', 'BREATH'] +6829-68771-0033-1947: hyp=['SHE', 'ROSE', 'QUICKLY', 'TO', 'HER', 'FEET', 'WITH', 'AN', 'IMPETUOUS', 'GESTURE', 'THAT', 'MADE', 'HER', 'VISITOR', 'CATCH', 'HER', 'BREATH'] +6829-68771-0034-1948: ref=['I', 'WISH', 'I', 'KNEW', 'MYSELF', 'SHE', 'CRIED', 'FIERCELY'] +6829-68771-0034-1948: hyp=['I', 'WISH', 'I', 'KNEW', 'MYSELF', 'SHE', 'CRIED', 'FIERCELY'] +6829-68771-0035-1949: ref=['WILL', 'YOU', 'LEAVE', 'ME', 'ALONE', 'IN', 'MY', 'OWN', 'ROOM', 'OR', 'MUST', 'I', 'GO', 'AWAY', 'TO', 'ESCAPE', 'YOU'] +6829-68771-0035-1949: hyp=['WILL', 'YOU', 'LEAVE', 'ME', 'ALONE', 'IN', 'MY', 'OWN', 'ROOM', 'OR', 'MUST', 'I', 'GO', 'AWAY', 'TO', 'ESCAPE', 'YOU'] +6829-68771-0036-1950: ref=['ELIZA', 'CLOSED', 'THE', 'DOOR', 'BEHIND', 'HER', 'WITH', 'A', 'DECIDED', 'SLAM', 'AND', 'A', 'KEY', 'CLICKED', 'IN', 'THE', 'LOCK'] +6829-68771-0036-1950: hyp=['ELIZA', 'CLOSED', 'THE', 'DOOR', 'BEHIND', 'HER', 'WITH', 'A', 'DECIDED', 'SLAM', 'AND', 'A', 'KEY', 'CLICKED', 'IN', 'THE', 'LOCK'] +6930-75918-0000-1951: ref=['CONCORD', 'RETURNED', 'TO', 'ITS', 'PLACE', 'AMIDST', 'THE', 'TENTS'] +6930-75918-0000-1951: hyp=['CONCORD', 'RETURNED', 'TO', 'ITS', 'PLACE', 'AMIDST', 'THE', 'TENTS'] +6930-75918-0001-1952: ref=['THE', 'ENGLISH', 'FORWARDED', 'TO', 'THE', 'FRENCH', 'BASKETS', 'OF', 'FLOWERS', 'OF', 'WHICH', 'THEY', 'HAD', 'MADE', 'A', 'PLENTIFUL', 'PROVISION', 'TO', 'GREET', 'THE', 'ARRIVAL', 'OF', 'THE', 'YOUNG', 'PRINCESS', 'THE', 'FRENCH', 'IN', 'RETURN', 'INVITED', 'THE', 'ENGLISH', 'TO', 'A', 'SUPPER', 'WHICH', 'WAS', 'TO', 'BE', 'GIVEN', 'THE', 'NEXT', 'DAY'] +6930-75918-0001-1952: hyp=['THE', 'ENGLISH', 'FORWARDED', 'TO', 'THE', 'FRENCH', 'BASKETS', 'OF', 'FLOWERS', 'OF', 'WHICH', 'THEY', 'HAD', 'MADE', 'A', 'PLENTIFUL', 'PROVISION', 'TO', 'GREET', 'THE', 'ARRIVAL', 'OF', 'THE', 'YOUNG', 'PRINCESS', 'THE', 'FRENCH', 'IN', 'RETURN', 'INVITED', 'THE', 'ENGLISH', 'TO', 'A', 'SUPPER', 'WHICH', 'WAS', 'TO', 'BE', 'GIVEN', 'THE', 'NEXT', 'DAY'] +6930-75918-0002-1953: ref=['CONGRATULATIONS', 'WERE', 'POURED', 'IN', 'UPON', 'THE', 'PRINCESS', 'EVERYWHERE', 'DURING', 'HER', 'JOURNEY'] +6930-75918-0002-1953: hyp=['CONGRATULATIONS', 'WERE', 'POURED', 'IN', 'UPON', 'THE', 'PRINCESS', 'EVERYWHERE', 'DURING', 'HER', 'JOURNEY'] +6930-75918-0003-1954: ref=['FROM', 'THE', 'RESPECT', 'PAID', 'HER', 'ON', 'ALL', 'SIDES', 'SHE', 'SEEMED', 'LIKE', 'A', 'QUEEN', 'AND', 'FROM', 'THE', 'ADORATION', 'WITH', 'WHICH', 'SHE', 'WAS', 'TREATED', 'BY', 'TWO', 'OR', 'THREE', 'SHE', 'APPEARED', 'AN', 'OBJECT', 'OF', 'WORSHIP', 'THE', 'QUEEN', 'MOTHER', 'GAVE', 'THE', 'FRENCH', 'THE', 'MOST', 'AFFECTIONATE', 'RECEPTION', 'FRANCE', 'WAS', 'HER', 'NATIVE', 'COUNTRY', 'AND', 'SHE', 'HAD', 'SUFFERED', 'TOO', 'MUCH', 'UNHAPPINESS', 'IN', 'ENGLAND', 'FOR', 'ENGLAND', 'TO', 'HAVE', 'MADE', 'HER', 'FORGET', 'FRANCE'] +6930-75918-0003-1954: hyp=['FROM', 'THE', 'RESPECT', 'PAID', 'HER', 'ON', 'ALL', 'SIDES', 'SHE', 'SEEMED', 'LIKE', 'A', 'QUEEN', 'AND', 'FROM', 'THE', 'ADORATION', 'WITH', 'WHICH', 'SHE', 'WAS', 'TREATED', 'BY', 'TWO', 'OR', 'THREE', 'SHE', 'APPEARED', 'AN', 'OBJECT', 'OF', 'WORSHIP', 'THE', 'QUEEN', 'MOTHER', 'GAVE', 'THE', 'FRENCH', 'THE', 'MOST', 'AFFECTIONATE', 'RECEPTION', 'FRANCE', 'WAS', 'HER', 'NATIVE', 'COUNTRY', 'AND', 'SHE', 'HAD', 'SUFFERED', 'TOO', 'MUCH', 'UNHAPPINESS', 'IN', 'ENGLAND', 'FOR', 'ENGLAND', 'TO', 'HAVE', 'MADE', 'HER', 'FORGET', 'FRANCE'] +6930-75918-0004-1955: ref=['SHE', 'TAUGHT', 'HER', 'DAUGHTER', 'THEN', 'BY', 'HER', 'OWN', 'AFFECTION', 'FOR', 'IT', 'THAT', 'LOVE', 'FOR', 'A', 'COUNTRY', 'WHERE', 'THEY', 'HAD', 'BOTH', 'BEEN', 'HOSPITABLY', 'RECEIVED', 'AND', 'WHERE', 'A', 'BRILLIANT', 'FUTURE', 'OPENED', 'BEFORE', 'THEM'] +6930-75918-0004-1955: hyp=['SHE', 'TAUGHT', 'HER', 'DAUGHTER', 'THEN', 'BY', 'HER', 'OWN', 'AFFECTION', 'FOR', 'IT', 'THAT', 'LOVE', 'FOR', 'A', 'COUNTRY', 'WHERE', 'THEY', 'HAD', 'BOTH', 'BEEN', 'HOSPITABLY', 'RECEIVED', 'AND', 'WHERE', 'A', 'BRILLIANT', 'FUTURE', 'OPENED', 'FOR', 'THEM'] +6930-75918-0005-1956: ref=['THE', 'COUNT', 'HAD', 'THROWN', 'HIMSELF', 'BACK', 'ON', 'HIS', 'SEAT', 'LEANING', 'HIS', 'SHOULDERS', 'AGAINST', 'THE', 'PARTITION', 'OF', 'THE', 'TENT', 'AND', 'REMAINED', 'THUS', 'HIS', 'FACE', 'BURIED', 'IN', 'HIS', 'HANDS', 'WITH', 'HEAVING', 'CHEST', 'AND', 'RESTLESS', 'LIMBS'] +6930-75918-0005-1956: hyp=['THE', 'COUNT', 'HAD', 'THROWN', 'HIMSELF', 'BACK', 'ON', 'HIS', 'SEAT', 'LEANING', 'HIS', 'SHOULDERS', 'AGAINST', 'THE', 'PARTITION', 'OF', 'THE', 'TENT', 'AND', 'REMAINED', 'THUS', 'HIS', 'FACE', 'BURIED', 'IN', 'HIS', 'HANDS', 'WITH', 'HEAVING', 'CHEST', 'AND', 'RESTLESS', 'LIMBS'] +6930-75918-0006-1957: ref=['THIS', 'HAS', 'INDEED', 'BEEN', 'A', 'HARASSING', 'DAY', 'CONTINUED', 'THE', 'YOUNG', 'MAN', 'HIS', 'EYES', 'FIXED', 'UPON', 'HIS', 'FRIEND'] +6930-75918-0006-1957: hyp=['THIS', 'HAS', 'INDEED', 'BEEN', 'A', 'HARASSING', 'DAY', 'CONTINUED', 'THE', 'YOUNG', 'MAN', 'HIS', 'EYES', 'FIXED', 'UPON', 'HIS', 'FRIEND'] +6930-75918-0007-1958: ref=['YOU', 'WILL', 'BE', 'FRANK', 'WITH', 'ME', 'I', 'ALWAYS', 'AM'] +6930-75918-0007-1958: hyp=['YOU', 'WILL', 'BE', 'FRANK', 'WITH', 'ME', 'I', 'ALWAYS', 'AM'] +6930-75918-0008-1959: ref=['CAN', 'YOU', 'IMAGINE', 'WHY', 'BUCKINGHAM', 'HAS', 'BEEN', 'SO', 'VIOLENT', 'I', 'SUSPECT'] +6930-75918-0008-1959: hyp=['CAN', 'YOU', 'IMAGINE', 'WHY', 'BUCKINGHAM', 'HAS', 'BEEN', 'SO', 'VIOLENT', 'I', 'SUSPECT'] +6930-75918-0009-1960: ref=['IT', 'IS', 'YOU', 'WHO', 'ARE', 'MISTAKEN', 'RAOUL', 'I', 'HAVE', 'READ', 'HIS', 'DISTRESS', 'IN', 'HIS', 'EYES', 'IN', 'HIS', 'EVERY', 'GESTURE', 'AND', 'ACTION', 'THE', 'WHOLE', 'DAY'] +6930-75918-0009-1960: hyp=['IT', 'IS', 'YOU', 'WHO', 'ARE', 'MISTAKEN', 'RAOUL', 'I', 'HAVE', 'READ', 'HIS', 'DISTRESS', 'IN', 'HIS', 'EYES', 'IN', 'HIS', 'EVERY', 'GESTURE', 'AND', 'ACTION', 'THE', 'WHOLE', 'DAY'] +6930-75918-0010-1961: ref=['I', 'CAN', 'PERCEIVE', 'LOVE', 'CLEARLY', 'ENOUGH'] +6930-75918-0010-1961: hyp=['I', 'CAN', 'PERCEIVE', 'LOVE', 'CLEARLY', 'ENOUGH'] +6930-75918-0011-1962: ref=['I', 'AM', 'CONVINCED', 'OF', 'WHAT', 'I', 'SAY', 'SAID', 'THE', 'COUNT'] +6930-75918-0011-1962: hyp=['I', 'AM', 'CONVINCED', 'OF', 'WHAT', 'I', 'SAY', 'SAID', 'THE', 'COUNT'] +6930-75918-0012-1963: ref=['IT', 'IS', 'ANNOYANCE', 'THEN'] +6930-75918-0012-1963: hyp=['IT', 'IS', 'ANNOYANCE', 'THEN'] +6930-75918-0013-1964: ref=['IN', 'THOSE', 'VERY', 'TERMS', 'I', 'EVEN', 'ADDED', 'MORE'] +6930-75918-0013-1964: hyp=['IN', 'THOSE', 'VERY', 'TERMS', 'I', 'EVEN', 'ADDED', 'MORE'] +6930-75918-0014-1965: ref=['BUT', 'CONTINUED', 'RAOUL', 'NOT', 'INTERRUPTED', 'BY', 'THIS', 'MOVEMENT', 'OF', 'HIS', 'FRIEND', 'HEAVEN', 'BE', 'PRAISED', 'THE', 'FRENCH', 'WHO', 'ARE', 'PRONOUNCED', 'TO', 'BE', 'THOUGHTLESS', 'AND', 'INDISCREET', 'RECKLESS', 'EVEN', 'ARE', 'CAPABLE', 'OF', 'BRINGING', 'A', 'CALM', 'AND', 'SOUND', 'JUDGMENT', 'TO', 'BEAR', 'ON', 'MATTERS', 'OF', 'SUCH', 'HIGH', 'IMPORTANCE'] +6930-75918-0014-1965: hyp=['BUT', 'CONTINUED', 'RAOUL', 'NOT', 'INTERRUPTED', 'BY', 'THIS', 'MOVEMENT', 'OF', 'HIS', 'FRIEND', 'HEAVEN', 'BE', 'PRAISED', 'THE', 'FRENCH', 'WHO', 'ARE', 'PRONOUNCED', 'TO', 'BE', 'THOUGHTLESS', 'AND', 'INDISCREET', 'RECKLESS', 'EVEN', 'ARE', 'CAPABLE', 'OF', 'BRINGING', 'A', 'CALM', 'AND', 'SOUND', 'JUDGMENT', 'TO', 'BEAR', 'ON', 'MATTERS', 'OF', 'SUCH', 'HIGH', 'IMPORTANCE'] +6930-75918-0015-1966: ref=['THUS', 'IT', 'IS', 'THAT', 'THE', 'HONOR', 'OF', 'THREE', 'IS', 'SAVED', 'OUR', "COUNTRY'S", 'OUR', "MASTER'S", 'AND', 'OUR', 'OWN'] +6930-75918-0015-1966: hyp=['THUS', 'IT', 'IS', 'THAT', 'THE', 'HONOUR', 'OF', 'THREE', 'IS', 'SAVED', 'OUR', 'COUNTRY', 'OUR', "MASTER'S", 'AND', 'OUR', 'OWN'] +6930-75918-0016-1967: ref=['YES', 'I', 'NEED', 'REPOSE', 'MANY', 'THINGS', 'HAVE', 'AGITATED', 'ME', 'TO', 'DAY', 'BOTH', 'IN', 'MIND', 'AND', 'BODY', 'WHEN', 'YOU', 'RETURN', 'TO', 'MORROW', 'I', 'SHALL', 'NO', 'LONGER', 'BE', 'THE', 'SAME', 'MAN'] +6930-75918-0016-1967: hyp=['YES', 'I', 'NEED', 'REPOSE', 'MANY', 'THINGS', 'HAVE', 'AGITATED', 'ME', 'TO', 'DAY', 'BOTH', 'IN', 'MIND', 'AND', 'BODY', 'WHEN', 'YOU', 'RETURN', 'TO', 'MORROW', 'I', 'SHALL', 'NO', 'LONGER', 'BE', 'THE', 'SAME', 'MAN'] +6930-75918-0017-1968: ref=['BUT', 'IN', 'THIS', 'FRIENDLY', 'PRESSURE', 'RAOUL', 'COULD', 'DETECT', 'THE', 'NERVOUS', 'AGITATION', 'OF', 'A', 'GREAT', 'INTERNAL', 'CONFLICT'] +6930-75918-0017-1968: hyp=['BUT', 'IN', 'THIS', 'FRIENDLY', 'PRESSURE', 'RALPH', 'COULD', 'DETECT', 'THE', 'NERVOUS', 'AGITATION', 'OF', 'A', 'GREAT', 'INTERNAL', 'CONFLICT'] +6930-75918-0018-1969: ref=['THE', 'NIGHT', 'WAS', 'CLEAR', 'STARLIT', 'AND', 'SPLENDID', 'THE', 'TEMPEST', 'HAD', 'PASSED', 'AWAY', 'AND', 'THE', 'SWEET', 'INFLUENCES', 'OF', 'THE', 'EVENING', 'HAD', 'RESTORED', 'LIFE', 'PEACE', 'AND', 'SECURITY', 'EVERYWHERE'] +6930-75918-0018-1969: hyp=['THE', 'NIGHT', 'WAS', 'CLEAR', 'STARLET', 'AND', 'SPLENDID', 'THE', 'TEMPEST', 'HAD', 'PASSED', 'AWAY', 'AND', 'THE', 'SWEET', 'INFLUENCES', 'OF', 'THE', 'EVENING', 'HAD', 'RESTORED', 'LIFE', 'PEACE', 'AND', 'SECURITY', 'EVERYWHERE'] +6930-75918-0019-1970: ref=['UPON', 'THE', 'LARGE', 'SQUARE', 'IN', 'FRONT', 'OF', 'THE', 'HOTEL', 'THE', 'SHADOWS', 'OF', 'THE', 'TENTS', 'INTERSECTED', 'BY', 'THE', 'GOLDEN', 'MOONBEAMS', 'FORMED', 'AS', 'IT', 'WERE', 'A', 'HUGE', 'MOSAIC', 'OF', 'JET', 'AND', 'YELLOW', 'FLAGSTONES'] +6930-75918-0019-1970: hyp=['UPON', 'THE', 'LARGE', 'SQUARE', 'IN', 'FRONT', 'OF', 'THE', 'HOTEL', 'THE', 'SHADOWS', 'OF', 'THE', 'TENTS', 'INTERSECTED', 'BY', 'THE', 'GOLDEN', 'MOONBEAMS', 'FORMED', 'AS', 'IT', 'WERE', 'A', 'HUGE', 'MOSAIC', 'OF', 'JET', 'AND', 'YELLOW', 'FLAGSTONES'] +6930-75918-0020-1971: ref=['BRAGELONNE', 'WATCHED', 'FOR', 'SOME', 'TIME', 'THE', 'CONDUCT', 'OF', 'THE', 'TWO', 'LOVERS', 'LISTENED', 'TO', 'THE', 'LOUD', 'AND', 'UNCIVIL', 'SLUMBERS', 'OF', 'MANICAMP', 'WHO', 'SNORED', 'AS', 'IMPERIOUSLY', 'AS', 'THOUGH', 'HE', 'WAS', 'WEARING', 'HIS', 'BLUE', 'AND', 'GOLD', 'INSTEAD', 'OF', 'HIS', 'VIOLET', 'SUIT'] +6930-75918-0020-1971: hyp=['BRAGELONNE', 'WATCHED', 'FOR', 'SOME', 'TIME', 'THE', 'CONDUCT', 'OF', 'THE', 'TWO', 'LOVERS', 'LISTENED', 'TO', 'THE', 'LOUD', 'AND', 'UNCIVIL', 'SLUMBERS', 'OF', 'MANICAMP', 'WHO', 'SNORED', 'AS', 'IMPERIOUSLY', 'AS', 'THOUGH', 'HE', 'WAS', 'WEARING', 'HIS', 'BLUE', 'AND', 'GOLD', 'INSTEAD', 'OF', 'HIS', 'VIOLET', 'SUIT'] +6930-76324-0000-1972: ref=['GOLIATH', 'MAKES', 'ANOTHER', 'DISCOVERY'] +6930-76324-0000-1972: hyp=['GOLIATH', 'MAKES', 'ANOTHER', 'DISCOVERY'] +6930-76324-0001-1973: ref=['THEY', 'WERE', 'CERTAINLY', 'NO', 'NEARER', 'THE', 'SOLUTION', 'OF', 'THEIR', 'PROBLEM'] +6930-76324-0001-1973: hyp=['THERE', 'WERE', 'CERTAINLY', 'NO', 'NEARER', 'THE', 'SOLUTION', 'OF', 'THEIR', 'PROBLEM'] +6930-76324-0002-1974: ref=['THE', 'POOR', 'LITTLE', 'THINGS', 'CRIED', 'CYNTHIA', 'THINK', 'OF', 'THEM', 'HAVING', 'BEEN', 'TURNED', 'TO', 'THE', 'WALL', 'ALL', 'THESE', 'YEARS'] +6930-76324-0002-1974: hyp=['THE', 'POOR', 'LITTLE', 'THINGS', 'CRIED', 'CYNTHIA', 'THINK', 'OF', 'THEM', 'HAVING', 'BEEN', 'TURNED', 'TO', 'THE', 'WALL', 'ALL', 'THESE', 'YEARS'] +6930-76324-0003-1975: ref=['NOW', 'WHAT', 'WAS', 'THE', 'SENSE', 'OF', 'IT', 'TWO', 'INNOCENT', 'BABIES', 'LIKE', 'THAT'] +6930-76324-0003-1975: hyp=['NOW', 'WHAT', 'IS', 'THE', 'SENSE', 'OF', 'IT', 'TWO', 'INNOCENT', 'BABIES', 'LIKE', 'THAT'] +6930-76324-0004-1976: ref=['BUT', 'JOYCE', 'HAD', 'NOT', 'BEEN', 'LISTENING', 'ALL', 'AT', 'ONCE', 'SHE', 'PUT', 'DOWN', 'HER', 'CANDLE', 'ON', 'THE', 'TABLE', 'AND', 'FACED', 'HER', 'COMPANION'] +6930-76324-0004-1976: hyp=['BUT', 'JOYCE', 'HAD', 'NOT', 'BEEN', 'LISTENING', 'ALL', 'AT', 'ONCE', 'SHE', 'PUT', 'DOWN', 'HER', 'CANDLE', 'ON', 'THE', 'TABLE', 'AND', 'FACED', 'HER', 'COMPANION'] +6930-76324-0005-1977: ref=['THE', 'TWIN', 'BROTHER', 'DID', 'SOMETHING', 'SHE', "DIDN'T", 'LIKE', 'AND', 'SHE', 'TURNED', 'HIS', 'PICTURE', 'TO', 'THE', 'WALL'] +6930-76324-0005-1977: hyp=['THE', 'TWIN', 'BROTHER', 'DID', 'SOMETHING', 'SHE', "DIDN'T", 'LIKE', 'AND', 'SHE', 'TURNED', 'HIS', 'PICTURE', 'TO', 'THE', 'WALL'] +6930-76324-0006-1978: ref=['HERS', 'HAPPENED', 'TO', 'BE', 'IN', 'THE', 'SAME', 'FRAME', 'TOO', 'BUT', 'SHE', 'EVIDENTLY', "DIDN'T", 'CARE', 'ABOUT', 'THAT'] +6930-76324-0006-1978: hyp=['HERS', 'HAPPENED', 'TO', 'BE', 'ON', 'THE', 'SAME', 'FRAME', 'TOO', 'BUT', 'SHE', 'EVIDENTLY', "DIDN'T", 'CARE', 'ABOUT', 'IT'] +6930-76324-0007-1979: ref=['NOW', 'WHAT', 'HAVE', 'YOU', 'TO', 'SAY', 'CYNTHIA', 'SPRAGUE'] +6930-76324-0007-1979: hyp=['NOW', 'WHAT', 'HAVE', 'YOU', 'TO', 'SAY', 'CYNTHIA', 'SPROGU'] +6930-76324-0008-1980: ref=['I', 'THOUGHT', 'WE', 'WERE', 'STUMPED', 'AGAIN', 'WHEN', 'I', 'FIRST', 'SAW', 'THAT', 'PICTURE', 'BUT', "IT'S", 'BEEN', 'OF', 'SOME', 'USE', 'AFTER', 'ALL'] +6930-76324-0008-1980: hyp=['I', 'THOUGHT', 'WE', 'WERE', 'STUMPED', 'AGAIN', 'WHEN', 'I', 'FIRST', 'SAW', 'THAT', 'PICTURE', 'BUT', "IT'S", 'BEEN', 'OF', 'SOME', 'USE', 'AFTER', 'ALL'] +6930-76324-0009-1981: ref=['DO', 'YOU', 'SUPPOSE', 'THE', 'MINIATURE', 'WAS', 'A', 'COPY', 'OF', 'THE', 'SAME', 'THING'] +6930-76324-0009-1981: hyp=['DO', 'YOU', 'SUPPOSE', 'THE', 'MINIATURE', 'WAS', 'A', 'COPY', 'OF', 'THE', 'SAME', 'THING'] +6930-76324-0010-1982: ref=['WHAT', 'IN', 'THE', 'WORLD', 'IS', 'THAT', 'QUERIED', 'JOYCE'] +6930-76324-0010-1982: hyp=['WHAT', 'IN', 'THE', 'WORLD', 'IS', 'IT', 'QUERIED', 'JOYCE'] +6930-76324-0011-1983: ref=['THEY', 'WORRY', 'ME', 'TERRIBLY', 'AND', 'BESIDES', "I'D", 'LIKE', 'TO', 'SEE', 'WHAT', 'THIS', 'LOVELY', 'FURNITURE', 'LOOKS', 'LIKE', 'WITHOUT', 'SUCH', 'QUANTITIES', 'OF', 'DUST', 'ALL', 'OVER', 'IT', 'GOOD', 'SCHEME', 'CYN'] +6930-76324-0011-1983: hyp=['THEY', 'WORRY', 'ME', 'TERRIBLY', 'AND', 'BESIDES', "I'D", 'LIKE', 'TO', 'SEE', 'WHAT', 'THIS', 'LOVELY', 'FURNITURE', 'LOOKS', 'LIKE', 'WITHOUT', 'SUCH', 'QUANTITIES', 'OF', 'DUST', 'ALL', 'OVER', 'IT', 'GOOD', 'SCHEME', 'SYM'] +6930-76324-0012-1984: ref=["WE'LL", 'COME', 'IN', 'HERE', 'THIS', 'AFTERNOON', 'WITH', 'OLD', 'CLOTHES', 'ON', 'AND', 'HAVE', 'A', 'REGULAR', 'HOUSE', 'CLEANING'] +6930-76324-0012-1984: hyp=["WE'LL", 'COME', 'IN', 'HERE', 'THIS', 'AFTERNOON', 'WITH', 'OLD', 'CLOTHES', 'ON', 'AND', 'HAVE', 'A', 'REGULAR', 'HOUSE', 'CLEANING'] +6930-76324-0013-1985: ref=['IT', "CAN'T", 'HURT', 'ANYTHING', "I'M", 'SURE', 'FOR', 'WE', "WON'T", 'DISTURB', 'THINGS', 'AT', 'ALL'] +6930-76324-0013-1985: hyp=['IT', "CAN'T", 'HURT', 'ANYTHING', "I'M", 'SURE', 'FOR', 'WE', "WON'T", 'DISTURB', 'THINGS', 'AT', 'ALL'] +6930-76324-0014-1986: ref=['THIS', 'THOUGHT', 'HOWEVER', 'DID', 'NOT', 'ENTER', 'THE', 'HEADS', 'OF', 'THE', 'ENTHUSIASTIC', 'PAIR'] +6930-76324-0014-1986: hyp=['THIS', 'THOUGHT', 'HOWEVER', 'DID', 'NOT', 'ENTER', 'THE', 'HEADS', 'OF', 'THE', 'ENTHUSIASTIC', 'PAIR'] +6930-76324-0015-1987: ref=['SMUGGLING', 'THE', 'HOUSE', 'CLEANING', 'PARAPHERNALIA', 'INTO', 'THE', 'CELLAR', 'WINDOW', 'UNOBSERVED', 'THAT', 'AFTERNOON', 'PROVED', 'NO', 'EASY', 'TASK', 'FOR', 'CYNTHIA', 'HAD', 'ADDED', 'A', 'WHISK', 'BROOM', 'AND', 'DUST', 'PAN', 'TO', 'THE', 'OUTFIT'] +6930-76324-0015-1987: hyp=['SMUGGLING', 'THE', 'HOUSE', 'CLEANING', 'PARAPHERNALIA', 'INTO', 'THE', 'CELLAR', 'WINDOW', 'UNOBSERVED', 'THAT', 'AFTERNOON', 'PROVED', 'NO', 'EASY', 'TASK', 'FOR', 'CYNTHIA', 'HAD', 'ADDED', 'A', 'WHISK', 'BROOM', 'AND', 'DUST', 'PAN', 'TO', 'THE', 'OUTFIT'] +6930-76324-0016-1988: ref=['THE', 'LURE', 'PROVED', 'TOO', 'MUCH', 'FOR', 'HIM', 'AND', 'HE', 'CAME', 'SPORTING', 'AFTER', 'IT', 'AS', 'FRISKILY', 'AS', 'A', 'YOUNG', 'KITTEN', 'MUCH', 'TO', "CYNTHIA'S", 'DELIGHT', 'WHEN', 'SHE', 'CAUGHT', 'SIGHT', 'OF', 'HIM'] +6930-76324-0016-1988: hyp=['THE', 'LURE', 'PROVED', 'TOO', 'MUCH', 'FOR', 'HIM', 'AND', 'HE', 'CAME', 'SPORTING', 'AFTER', 'IT', 'AS', 'FRISKLY', 'AS', 'A', 'YOUNG', 'KITTEN', 'MUCH', 'TO', "CYNTHIA'S", 'DELIGHT', 'WHEN', 'SHE', 'CAUGHT', 'SIGHT', 'OF', 'HIM'] +6930-76324-0017-1989: ref=['OH', 'LET', 'HIM', 'COME', 'ALONG', 'SHE', 'URGED', 'I', 'DO', 'LOVE', 'TO', 'SEE', 'HIM', 'ABOUT', 'THAT', 'OLD', 'HOUSE'] +6930-76324-0017-1989: hyp=['OH', 'LET', 'HIM', 'COME', 'ALONG', 'SHE', 'URGED', 'I', 'DO', 'LOVE', 'TO', 'SEE', 'HIM', 'ABOUT', 'THAT', 'OLD', 'HOUSE'] +6930-76324-0018-1990: ref=['HE', 'MAKES', 'IT', 'SORT', 'OF', 'COZIER'] +6930-76324-0018-1990: hyp=['HE', 'MAKES', 'IT', 'SORT', 'OF', 'COSIER'] +6930-76324-0019-1991: ref=['NOW', "LET'S", 'DUST', 'THE', 'FURNITURE', 'AND', 'PICTURES'] +6930-76324-0019-1991: hyp=['NOW', "LET'S", 'DUST', 'THE', 'FURNITURE', 'AND', 'PICTURES'] +6930-76324-0020-1992: ref=['YET', 'LITTLE', 'AS', 'IT', 'WAS', 'IT', 'HAD', 'ALREADY', 'MADE', 'A', 'VAST', 'DIFFERENCE', 'IN', 'THE', 'ASPECT', 'OF', 'THE', 'ROOM'] +6930-76324-0020-1992: hyp=['YET', 'LITTLE', 'AS', 'IT', 'WAS', 'IT', 'HAD', 'ALREADY', 'MADE', 'A', 'VAST', 'DIFFERENCE', 'IN', 'THE', 'ASPECT', 'OF', 'THE', 'ROOM'] +6930-76324-0021-1993: ref=['SURFACE', 'DUST', 'AT', 'LEAST', 'HAD', 'BEEN', 'REMOVED', 'AND', 'THE', 'FINE', 'OLD', 'FURNITURE', 'GAVE', 'A', 'HINT', 'OF', 'ITS', 'REAL', 'ELEGANCE', 'AND', 'POLISH'] +6930-76324-0021-1993: hyp=['SURFACE', 'DUST', 'AT', 'LEAST', 'HAD', 'BEEN', 'REMOVED', 'AND', 'THE', 'FINE', 'OLD', 'FURNITURE', 'GAVE', 'A', 'HINT', 'OF', 'ITS', 'REAL', 'ELEGANCE', 'AND', 'POLISH'] +6930-76324-0022-1994: ref=['THEN', 'SHE', 'SUDDENLY', 'REMARKED'] +6930-76324-0022-1994: hyp=['THEN', 'SHE', 'SUDDENLY', 'REMARKED'] +6930-76324-0023-1995: ref=['AND', 'MY', 'POCKET', 'MONEY', 'IS', 'GETTING', 'LOW', 'AGAIN', 'AND', 'YOU', "HAVEN'T", 'ANY', 'LEFT', 'AS', 'USUAL'] +6930-76324-0023-1995: hyp=['AND', 'MY', 'POCKET', 'MONEY', 'IS', 'GETTING', 'LOW', 'AGAIN', 'AND', 'YOU', "HAVEN'T", 'ANY', 'LEFT', 'AS', 'USUAL'] +6930-76324-0024-1996: ref=['THEY', 'SAY', 'ILLUMINATION', 'BY', 'CANDLE', 'LIGHT', 'IS', 'THE', 'PRETTIEST', 'IN', 'THE', 'WORLD'] +6930-76324-0024-1996: hyp=['THEY', 'SAY', 'ILLUMINATION', 'BY', 'CANDLELIGHT', 'IS', 'THE', 'PRETTIEST', 'IN', 'THE', 'WORLD'] +6930-76324-0025-1997: ref=['WHY', "IT'S", 'GOLIATH', 'AS', 'USUAL', 'THEY', 'BOTH', 'CRIED', 'PEERING', 'IN'] +6930-76324-0025-1997: hyp=['WHY', "IT'S", 'GOLIATH', 'AS', 'USUAL', 'THEY', 'BOTH', 'CRIED', 'PEERING', 'IN'] +6930-76324-0026-1998: ref=["ISN'T", 'HE', 'THE', 'GREATEST', 'FOR', 'GETTING', 'INTO', 'ODD', 'CORNERS'] +6930-76324-0026-1998: hyp=["ISN'T", 'HE', 'THE', 'GREATEST', 'FOR', 'GETTING', 'INTO', 'ODD', 'CORNERS'] +6930-76324-0027-1999: ref=['FORGETTING', 'ALL', 'THEIR', 'WEARINESS', 'THEY', 'SEIZED', 'THEIR', 'CANDLES', 'AND', 'SCURRIED', 'THROUGH', 'THE', 'HOUSE', 'FINDING', 'AN', 'OCCASIONAL', 'PAPER', 'TUCKED', 'AWAY', 'IN', 'SOME', 'ODD', 'CORNER'] +6930-76324-0027-1999: hyp=['FORGETTING', 'ALL', 'THEIR', 'WEARINESS', 'THEY', 'SEIZED', 'THEIR', 'CANDLES', 'AND', 'SCURRIED', 'THROUGH', 'THE', 'HOUSE', 'FINDING', 'ON', 'OCCASIONAL', 'PAPER', 'TUCKED', 'AWAY', 'IN', 'SOME', 'ODD', 'CORNER'] +6930-76324-0028-2000: ref=['WELL', "I'M", 'CONVINCED', 'THAT', 'THE', 'BOARDED', 'UP', 'HOUSE', 'MYSTERY', 'HAPPENED', 'NOT', 'EARLIER', 'THAN', 'APRIL', 'SIXTEENTH', 'EIGHTEEN', 'SIXTY', 'ONE', 'AND', 'PROBABLY', 'NOT', 'MUCH', 'LATER'] +6930-76324-0028-2000: hyp=['WELL', "I'M", 'CONVINCED', 'THAT', 'THE', 'BOARDED', 'UP', 'HOUSE', 'MYSTERY', 'HAPPENED', 'NOT', 'EARLIER', 'THAN', 'APRIL', 'SIXTEENTH', 'EIGHTEEN', 'SIXTY', 'ONE', 'AND', 'PROBABLY', 'NOT', 'MUCH', 'LATER'] +6930-81414-0000-2001: ref=['NO', 'WORDS', 'WERE', 'SPOKEN', 'NO', 'LANGUAGE', 'WAS', 'UTTERED', 'SAVE', 'THAT', 'OF', 'WAILING', 'AND', 'HISSING', 'AND', 'THAT', 'SOMEHOW', 'WAS', 'INDISTINCT', 'AS', 'IF', 'IT', 'EXISTED', 'IN', 'FANCY', 'AND', 'NOT', 'IN', 'REALITY'] +6930-81414-0000-2001: hyp=['NO', 'WORDS', 'WERE', 'SPOKEN', 'NO', 'LANGUAGE', 'WAS', 'UTTERED', 'SAVE', 'THAT', 'OF', 'WAILING', 'AND', 'HISSING', 'AND', 'THAT', 'SOMEHOW', 'WAS', 'INDISTINCT', 'AS', 'IF', 'IT', 'EXISTED', 'IN', 'FANCY', 'AND', 'NOT', 'IN', 'REALITY'] +6930-81414-0001-2002: ref=['I', 'HEARD', 'A', 'NOISE', 'BEHIND', 'I', 'TURNED', 'AND', 'SAW', 'KAFFAR', 'HIS', 'BLACK', 'EYES', 'SHINING', 'WHILE', 'IN', 'HIS', 'HAND', 'HE', 'HELD', 'A', 'GLEAMING', 'KNIFE', 'HE', 'LIFTED', 'IT', 'ABOVE', 'HIS', 'HEAD', 'AS', 'IF', 'TO', 'STRIKE', 'BUT', 'I', 'HAD', 'THE', 'STRENGTH', 'OF', 'TEN', 'MEN', 'AND', 'I', 'HURLED', 'HIM', 'FROM', 'ME'] +6930-81414-0001-2002: hyp=['I', 'HEARD', 'A', 'NOISE', 'BEHIND', 'I', 'TURNED', 'AND', 'SAW', 'KAFFIR', 'HIS', 'BLACK', 'EYES', 'SHINING', 'WHILE', 'IN', 'HIS', 'HAND', 'HE', 'HELD', 'A', 'GLEAMING', 'KNIFE', 'HE', 'LIFTED', 'IT', 'ABOVE', 'HIS', 'HEAD', 'AS', 'IF', 'TO', 'STRIKE', 'BUT', 'I', 'HAD', 'THE', 'STRENGTH', 'OF', 'TEN', 'MEN', 'AND', 'I', 'HURLED', 'HIM', 'FROM', 'ME'] +6930-81414-0002-2003: ref=['ONWARD', 'SAID', 'A', 'DISTANT', 'VOICE'] +6930-81414-0002-2003: hyp=['ONWARD', 'SAID', 'A', 'DISTANT', 'VOICE'] +6930-81414-0003-2004: ref=['NO', 'SOUND', 'BROKE', 'THE', 'STILLNESS', 'OF', 'THE', 'NIGHT'] +6930-81414-0003-2004: hyp=['NO', 'SOUND', 'BROKE', 'THE', 'STILLNESS', 'OF', 'THE', 'NIGHT'] +6930-81414-0004-2005: ref=['THE', 'STORY', 'OF', 'ITS', 'EVIL', 'INFLUENCE', 'CAME', 'BACK', 'TO', 'ME', 'AND', 'IN', 'MY', 'BEWILDERED', 'CONDITION', 'I', 'WONDERED', 'WHETHER', 'THERE', 'WAS', 'NOT', 'SOME', 'TRUTH', 'IN', 'WHAT', 'HAD', 'BEEN', 'SAID'] +6930-81414-0004-2005: hyp=['THE', 'STORY', 'OF', 'ITS', 'EVIL', 'INFLUENCE', 'CAME', 'BACK', 'TO', 'ME', 'AND', 'IN', 'MY', 'BEWILDERED', 'CONDITION', 'I', 'WONDERED', 'WHETHER', 'THERE', 'WAS', 'NOT', 'SOME', 'TRUTH', 'IN', 'WHAT', 'HAD', 'BEEN', 'SAID'] +6930-81414-0005-2006: ref=['WHAT', 'WAS', 'THAT'] +6930-81414-0005-2006: hyp=['WHAT', 'WAS', 'THAT'] +6930-81414-0006-2007: ref=['WHAT', 'THEN', 'A', 'HUMAN', 'HAND', 'LARGE', 'AND', 'SHAPELY', 'APPEARED', 'DISTINCTLY', 'ON', 'THE', 'SURFACE', 'OF', 'THE', 'POND'] +6930-81414-0006-2007: hyp=['WHAT', 'THEN', 'A', 'HUMAN', 'HAND', 'LARGE', 'AND', 'SHAPELY', 'APPEARED', 'DISTINCTLY', 'ON', 'THE', 'SURFACE', 'OF', 'THE', 'POND'] +6930-81414-0007-2008: ref=['NOTHING', 'MORE', 'NOT', 'EVEN', 'THE', 'WRIST', 'TO', 'WHICH', 'IT', 'MIGHT', 'BE', 'ATTACHED'] +6930-81414-0007-2008: hyp=['NOTHING', 'MORE', 'NOT', 'EVEN', 'THE', 'RISK', 'TO', 'WHICH', 'IT', 'MIGHT', 'BE', 'ATTACHED'] +6930-81414-0008-2009: ref=['IT', 'DID', 'NOT', 'BECKON', 'OR', 'INDEED', 'MOVE', 'AT', 'ALL', 'IT', 'WAS', 'AS', 'STILL', 'AS', 'THE', 'HAND', 'OF', 'DEATH'] +6930-81414-0008-2009: hyp=['IT', 'DID', 'NOT', 'BECKON', 'OR', 'INDEED', 'MOVE', 'AT', 'ALL', 'IT', 'WAS', 'AS', 'STILL', 'AS', 'THE', 'HAND', 'OF', 'DEATH'] +6930-81414-0009-2010: ref=['I', 'AWOKE', 'TO', 'CONSCIOUSNESS', 'FIGHTING', 'AT', 'FIRST', 'IT', 'SEEMED', 'AS', 'IF', 'I', 'WAS', 'FIGHTING', 'WITH', 'A', 'PHANTOM', 'BUT', 'GRADUALLY', 'MY', 'OPPONENT', 'BECAME', 'MORE', 'REAL', 'TO', 'ME', 'IT', 'WAS', 'KAFFAR'] +6930-81414-0009-2010: hyp=['I', 'AWOKE', 'TO', 'CONSCIOUSNESS', 'FIGHTING', 'AT', 'FIRST', 'IT', 'SEEMED', 'AS', 'IF', 'I', 'WAS', 'FIGHTING', 'WITH', 'A', 'PHANTOM', 'BUT', 'GRADUALLY', 'MY', 'OPPONENT', 'BECAME', 'MORE', 'REAL', 'TO', 'ME', 'IT', 'WAS', 'KAFFIR'] +6930-81414-0010-2011: ref=['A', 'SOUND', 'OF', 'VOICES', 'A', 'FLASH', 'OF', 'LIGHT'] +6930-81414-0010-2011: hyp=['A', 'SOUND', 'OF', 'VOICES', 'A', 'FLASH', 'OF', 'LIGHT'] +6930-81414-0011-2012: ref=['A', 'FEELING', 'OF', 'FREEDOM', 'AND', 'I', 'WAS', 'AWAKE', 'WHERE'] +6930-81414-0011-2012: hyp=['A', 'FEELING', 'OF', 'FREEDOM', 'AND', 'I', 'WAS', 'AWAKE', 'WHERE'] +6930-81414-0012-2013: ref=['SAID', 'ANOTHER', 'VOICE', 'WHICH', 'I', 'RECOGNIZED', 'AS', "VOLTAIRE'S", 'KAFFAR'] +6930-81414-0012-2013: hyp=['SAID', 'ANOTHER', 'VOICE', 'WHICH', 'I', 'RECOGNIZED', 'AS', "VOLTAIRE'S", 'CAFFER'] +6930-81414-0013-2014: ref=['I', 'HAD', 'SCARCELY', 'KNOWN', 'WHAT', 'I', 'HAD', 'BEEN', 'SAYING', 'OR', 'DOING', 'UP', 'TO', 'THIS', 'TIME', 'BUT', 'AS', 'HE', 'SPOKE', 'I', 'LOOKED', 'AT', 'MY', 'HAND'] +6930-81414-0013-2014: hyp=['I', 'HAD', 'SCARCELY', 'KNOWN', 'WHAT', 'I', 'HAD', 'BEEN', 'SAYING', 'OR', 'DOING', 'UP', 'TO', 'THIS', 'TIME', 'BUT', 'AS', 'HE', 'SPOKE', 'I', 'LOOKED', 'AT', 'MY', 'HAND'] +6930-81414-0014-2015: ref=['IN', 'THE', 'LIGHT', 'OF', 'THE', 'MOON', 'I', 'SAW', 'A', 'KNIFE', 'RED', 'WITH', 'BLOOD', 'AND', 'MY', 'HAND', 'TOO', 'WAS', 'ALSO', 'DISCOLOURED'] +6930-81414-0014-2015: hyp=['IN', 'THE', 'LIGHT', 'OF', 'THE', 'MOON', 'I', 'SAW', 'A', 'KNIFE', 'RED', 'WITH', 'BLOOD', 'AND', 'MY', 'HAND', 'TOO', 'WAS', 'ALSO', 'DISCOLOURED'] +6930-81414-0015-2016: ref=['I', 'DO', 'NOT', 'KNOW', 'I', 'AM', 'DAZED', 'BEWILDERED'] +6930-81414-0015-2016: hyp=['I', 'DO', 'NOT', 'KNOW', 'I', 'AM', 'DAZED', 'BEWILDERED'] +6930-81414-0016-2017: ref=['BUT', 'THAT', 'IS', "KAFFAR'S", 'KNIFE'] +6930-81414-0016-2017: hyp=['BUT', 'THAT', 'IS', "KAFFIR'S", 'KNIFE'] +6930-81414-0017-2018: ref=['I', 'KNOW', 'HE', 'HAD', 'IT', 'THIS', 'VERY', 'EVENING'] +6930-81414-0017-2018: hyp=['I', 'KNOW', 'HE', 'HAD', 'IT', 'THIS', 'VERY', 'EVENING'] +6930-81414-0018-2019: ref=['I', 'REMEMBER', 'SAYING', 'HAVE', 'WE', 'BEEN', 'TOGETHER'] +6930-81414-0018-2019: hyp=['I', 'REMEMBERED', 'SAYING', 'HAVE', 'WE', 'BEEN', 'TOGETHER'] +6930-81414-0019-2020: ref=['VOLTAIRE', 'PICKED', 'UP', 'SOMETHING', 'FROM', 'THE', 'GROUND', 'AND', 'LOOKED', 'AT', 'IT'] +6930-81414-0019-2020: hyp=['VOLCHERRE', 'PICKED', 'UP', 'SOMETHING', 'FROM', 'THE', 'GROUND', 'AND', 'LOOKED', 'AT', 'IT'] +6930-81414-0020-2021: ref=['I', 'SAY', 'YOU', 'DO', 'KNOW', 'WHAT', 'THIS', 'MEANS', 'AND', 'YOU', 'MUST', 'TELL', 'US'] +6930-81414-0020-2021: hyp=['I', 'SAY', 'YOU', 'DO', 'KNOW', 'WHAT', 'THIS', 'MEANS', 'AND', 'YOU', 'MUST', 'TELL', 'US'] +6930-81414-0021-2022: ref=['A', 'TERRIBLE', 'THOUGHT', 'FLASHED', 'INTO', 'MY', 'MIND'] +6930-81414-0021-2022: hyp=['A', 'TERRIBLE', 'THOUGHT', 'FLASHED', 'INTO', 'MY', 'MIND'] +6930-81414-0022-2023: ref=['I', 'HAD', 'AGAIN', 'BEEN', 'ACTING', 'UNDER', 'THE', 'INFLUENCE', 'OF', 'THIS', "MAN'S", 'POWER'] +6930-81414-0022-2023: hyp=['I', 'HAD', 'AGAIN', 'BEEN', 'ACTING', 'UNDER', 'THE', 'INFLUENCE', 'OF', 'THIS', "MAN'S", 'POWER'] +6930-81414-0023-2024: ref=['PERCHANCE', 'TOO', "KAFFAR'S", 'DEATH', 'MIGHT', 'SERVE', 'HIM', 'IN', 'GOOD', 'STEAD'] +6930-81414-0023-2024: hyp=['PERCHANCE', 'TOO', 'KAFFIRS', 'DEATH', 'MIGHT', 'SERVE', 'HIM', 'IN', 'GOOD', 'STEAD'] +6930-81414-0024-2025: ref=['MY', 'TONGUE', 'REFUSED', 'TO', 'ARTICULATE', 'MY', 'POWER', 'OF', 'SPEECH', 'LEFT', 'ME'] +6930-81414-0024-2025: hyp=['MY', 'TONGUE', 'REFUSED', 'TO', 'ARTICULATE', 'MY', 'POWER', 'OF', 'SPEECH', 'LEFT', 'ME'] +6930-81414-0025-2026: ref=['MY', 'POSITION', 'WAS', 'TOO', 'TERRIBLE'] +6930-81414-0025-2026: hyp=['MY', 'POSITION', 'WAS', 'TOO', 'TERRIBLE'] +6930-81414-0026-2027: ref=['MY', 'OVERWROUGHT', 'NERVES', 'YIELDED', 'AT', 'LAST'] +6930-81414-0026-2027: hyp=['MY', 'OVER', 'WROUGHT', 'NERVES', 'YIELDED', 'AT', 'LAST'] +6930-81414-0027-2028: ref=['FOR', 'SOME', 'TIME', 'AFTER', 'THAT', 'I', 'REMEMBERED', 'NOTHING', 'DISTINCTLY'] +6930-81414-0027-2028: hyp=['FOR', 'SOME', 'TIME', 'AFTER', 'THAT', 'I', 'REMEMBERED', 'NOTHING', 'DISTINCTLY'] +7021-79730-0000-2029: ref=['THE', 'THREE', 'MODES', 'OF', 'MANAGEMENT'] +7021-79730-0000-2029: hyp=['THE', 'THREE', 'MODES', 'OF', 'MANAGEMENT'] +7021-79730-0001-2030: ref=['TO', 'SUPPOSE', 'THAT', 'THE', 'OBJECT', 'OF', 'THIS', 'WORK', 'IS', 'TO', 'AID', 'IN', 'EFFECTING', 'SUCH', 'A', 'SUBSTITUTION', 'AS', 'THAT', 'IS', 'ENTIRELY', 'TO', 'MISTAKE', 'ITS', 'NATURE', 'AND', 'DESIGN'] +7021-79730-0001-2030: hyp=['TO', 'SUPPOSE', 'THAT', 'THE', 'OBJECT', 'OF', 'THIS', 'WORK', 'IS', 'TO', 'AID', 'IN', 'EFFECTING', 'SUCH', 'A', 'SUBSTITUTION', 'AS', 'THAT', 'IS', 'ENTIRELY', 'TO', 'MISTAKE', 'ITS', 'NATURE', 'AND', 'DESIGN'] +7021-79730-0002-2031: ref=['BY', 'REASON', 'AND', 'AFFECTION'] +7021-79730-0002-2031: hyp=['BY', 'REASON', 'AND', 'AFFECTION'] +7021-79730-0003-2032: ref=['AS', 'THE', 'CHAISE', 'DRIVES', 'AWAY', 'MARY', 'STANDS', 'BEWILDERED', 'AND', 'PERPLEXED', 'ON', 'THE', 'DOOR', 'STEP', 'HER', 'MIND', 'IN', 'A', 'TUMULT', 'OF', 'EXCITEMENT', 'IN', 'WHICH', 'HATRED', 'OF', 'THE', 'DOCTOR', 'DISTRUST', 'AND', 'SUSPICION', 'OF', 'HER', 'MOTHER', 'DISAPPOINTMENT', 'VEXATION', 'AND', 'ILL', 'HUMOR', 'SURGE', 'AND', 'SWELL', 'AMONG', 'THOSE', 'DELICATE', 'ORGANIZATIONS', 'ON', 'WHICH', 'THE', 'STRUCTURE', 'AND', 'DEVELOPMENT', 'OF', 'THE', 'SOUL', 'SO', 'CLOSELY', 'DEPEND', 'DOING', 'PERHAPS', 'AN', 'IRREPARABLE', 'INJURY'] +7021-79730-0003-2032: hyp=['AS', 'THE', 'CHASE', 'DRIVES', 'AWAY', 'MARY', 'STANDS', 'BEWILDERED', 'AND', 'PERPLEXED', 'ON', 'THE', 'DOORSTEP', 'HER', 'MIND', 'IN', 'A', 'TUMULT', 'OF', 'EXCITEMENT', 'IN', 'WHICH', 'HATRED', 'OF', 'THE', 'DOCTOR', 'DISTRUST', 'AND', 'SUSPICION', 'OF', 'HER', 'MOTHER', 'DISAPPOINTMENT', 'VEXATION', 'AND', 'ILL', 'HUMOR', 'SURGE', 'AND', 'SWELL', 'AMONG', 'THOSE', 'DELICATE', 'ORGANIZATIONS', 'ON', 'WHICH', 'THE', 'STRUCTURE', 'AND', 'DEVELOPMENT', 'OF', 'THE', 'SOUL', 'SO', 'CLOSELY', 'DEPEND', 'DOING', 'PERHAPS', 'AN', 'IRREPARABLE', 'INJURY'] +7021-79730-0004-2033: ref=['THE', 'MOTHER', 'AS', 'SOON', 'AS', 'THE', 'CHAISE', 'IS', 'SO', 'FAR', 'TURNED', 'THAT', 'MARY', 'CAN', 'NO', 'LONGER', 'WATCH', 'THE', 'EXPRESSION', 'OF', 'HER', 'COUNTENANCE', 'GOES', 'AWAY', 'FROM', 'THE', 'DOOR', 'WITH', 'A', 'SMILE', 'OF', 'COMPLACENCY', 'AND', 'SATISFACTION', 'UPON', 'HER', 'FACE', 'AT', 'THE', 'INGENUITY', 'AND', 'SUCCESS', 'OF', 'HER', 'LITTLE', 'ARTIFICE'] +7021-79730-0004-2033: hyp=['THE', 'MOTHER', 'AS', 'SOON', 'AS', 'THE', 'CHASE', 'IS', 'SO', 'FAR', 'TURNED', 'THAT', 'MARY', 'CAN', 'NO', 'LONGER', 'WATCH', 'THE', 'EXPRESSION', 'OF', 'HER', 'COUNTENANCE', 'GOES', 'AWAY', 'FROM', 'THE', 'DOOR', 'WITH', 'A', 'SMILE', 'OF', 'COMPLACENCY', 'AND', 'SATISFACTION', 'ON', 'HER', 'FACE', 'AT', 'THE', 'INGENUITY', 'AND', 'SUCCESS', 'OF', 'HER', 'LITTLE', 'ARTIFICE'] +7021-79730-0005-2034: ref=['SO', 'YOU', 'WILL', 'BE', 'A', 'GOOD', 'GIRL', 'I', 'KNOW', 'AND', 'NOT', 'MAKE', 'ANY', 'TROUBLE', 'BUT', 'WILL', 'STAY', 'AT', 'HOME', 'CONTENTEDLY', "WON'T", 'YOU'] +7021-79730-0005-2034: hyp=['SO', 'YOU', 'WILL', 'BE', 'A', 'GOOD', 'GIRL', 'I', 'KNOW', 'AND', 'NOT', 'MAKE', 'ANY', 'TROUBLE', 'BUT', 'WILL', 'STAY', 'AT', 'HOME', 'CONTENTEDLY', "WON'T", 'YOU'] +7021-79730-0006-2035: ref=['THE', 'MOTHER', 'IN', 'MANAGING', 'THE', 'CASE', 'IN', 'THIS', 'WAY', 'RELIES', 'PARTLY', 'ON', 'CONVINCING', 'THE', 'REASON', 'OF', 'THE', 'CHILD', 'AND', 'PARTLY', 'ON', 'AN', 'APPEAL', 'TO', 'HER', 'AFFECTION'] +7021-79730-0006-2035: hyp=['THE', 'MOTHER', 'IN', 'MANAGING', 'THE', 'CASE', 'IN', 'THIS', 'WAY', 'RELIES', 'PARTLY', 'ON', 'CONVINCING', 'THE', 'REASON', 'OF', 'THE', 'CHILD', 'AND', 'PARTLY', 'ON', 'AN', 'APPEAL', 'TO', 'HER', 'AFFECTION'] +7021-79730-0007-2036: ref=['IF', 'YOU', 'SHOULD', 'NOT', 'BE', 'A', 'GOOD', 'GIRL', 'BUT', 'SHOULD', 'SHOW', 'SIGNS', 'OF', 'MAKING', 'US', 'ANY', 'TROUBLE', 'I', 'SHALL', 'HAVE', 'TO', 'SEND', 'YOU', 'OUT', 'SOMEWHERE', 'TO', 'THE', 'BACK', 'PART', 'OF', 'THE', 'HOUSE', 'UNTIL', 'WE', 'ARE', 'GONE'] +7021-79730-0007-2036: hyp=['IF', 'YOU', 'SHOULD', 'NOT', 'BE', 'A', 'GOOD', 'GIRL', 'BUT', 'SHOULD', 'SHOW', 'SIGNS', 'OF', 'MAKING', 'US', 'ANY', 'TROUBLE', 'I', 'SHALL', 'HAVE', 'TO', 'SEND', 'YOU', 'OUT', 'SOMEWHERE', 'TO', 'THE', 'BACK', 'PART', 'OF', 'THE', 'HOUSE', 'UNTIL', 'WE', 'ARE', 'GONE'] +7021-79730-0008-2037: ref=['BUT', 'THIS', 'LAST', 'SUPPOSITION', 'IS', 'ALMOST', 'ALWAYS', 'UNNECESSARY', 'FOR', 'IF', 'MARY', 'HAS', 'BEEN', 'HABITUALLY', 'MANAGED', 'ON', 'THIS', 'PRINCIPLE', 'SHE', 'WILL', 'NOT', 'MAKE', 'ANY', 'TROUBLE'] +7021-79730-0008-2037: hyp=['BUT', 'THIS', 'LAST', 'SUPPOSITION', 'IS', 'ALMOST', 'ALWAYS', 'UNNECESSARY', 'FOR', 'IF', 'MARY', 'HAS', 'BEEN', 'HABITUALLY', 'MANAGED', 'ON', 'THIS', 'PRINCIPLE', 'SHE', 'WILL', 'NOT', 'MAKE', 'ANY', 'TROUBLE'] +7021-79730-0009-2038: ref=['IT', 'IS', 'INDEED', 'TRUE', 'THAT', 'THE', 'IMPORTANCE', 'OF', 'TACT', 'AND', 'SKILL', 'IN', 'THE', 'TRAINING', 'OF', 'THE', 'YOUNG', 'AND', 'OF', 'CULTIVATING', 'THEIR', 'REASON', 'AND', 'SECURING', 'THEIR', 'AFFECTION', 'CAN', 'NOT', 'BE', 'OVERRATED'] +7021-79730-0009-2038: hyp=['IT', 'IS', 'INDEED', 'TRUE', 'THAT', 'THE', 'IMPORTANCE', 'OF', 'TACT', 'AND', 'SKILL', 'IN', 'THE', 'TRAINING', 'OF', 'THE', 'YOUNG', 'AND', 'OF', 'CULTIVATING', 'THEIR', 'REASON', 'AND', 'SECURING', 'THEIR', 'AFFECTION', 'CANNOT', 'BE', 'OVERRATED'] +7021-79740-0000-2039: ref=['TO', 'SUCH', 'PERSONS', 'THESE', 'INDIRECT', 'MODES', 'OF', 'TRAINING', 'CHILDREN', 'IN', 'HABITS', 'OF', 'SUBORDINATION', 'TO', 'THEIR', 'WILL', 'OR', 'RATHER', 'OF', 'YIELDING', 'TO', 'THEIR', 'INFLUENCE', 'ARE', 'SPECIALLY', 'USEFUL'] +7021-79740-0000-2039: hyp=['TO', 'SUCH', 'PERSONS', 'THESE', 'INDIRECT', 'MODES', 'OF', 'TRAINING', 'CHILDREN', 'IN', 'HABITS', 'OF', 'SUBORDINATION', 'TO', 'THEIR', 'WILL', 'OR', 'RATHER', 'OF', 'YIELDING', 'TO', 'THEIR', 'INFLUENCE', 'ARE', 'SPECIALLY', 'USEFUL'] +7021-79740-0001-2040: ref=['DELLA', 'HAD', 'A', 'YOUNG', 'SISTER', 'NAMED', 'MARIA', 'AND', 'A', 'COUSIN', 'WHOSE', 'NAME', 'WAS', 'JANE'] +7021-79740-0001-2040: hyp=['DELLA', 'HAD', 'A', 'YOUNG', 'SISTER', 'NAMED', 'MARIA', 'AND', 'A', 'COUSIN', 'WHOSE', 'NAME', 'WAS', 'JANE'] +7021-79740-0002-2041: ref=['NOW', 'DELIA', 'CONTRIVED', 'TO', 'OBTAIN', 'A', 'GREAT', 'INFLUENCE', 'AND', 'ASCENDENCY', 'OVER', 'THE', 'MINDS', 'OF', 'THE', 'CHILDREN', 'BY', 'MEANS', 'OF', 'THESE', 'DOLLS'] +7021-79740-0002-2041: hyp=['NOW', 'DELIA', 'CONTRIVED', 'TO', 'OBTAIN', 'A', 'GREAT', 'INFLUENCE', 'AND', 'ASCENDANCY', 'OVER', 'THE', 'MINDS', 'OF', 'THE', 'CHILDREN', 'BY', 'MEANS', 'OF', 'THESE', 'DOLLS'] +7021-79740-0003-2042: ref=['TO', 'GIVE', 'AN', 'IDEA', 'OF', 'THESE', 'CONVERSATIONS', 'I', 'WILL', 'REPORT', 'ONE', 'OF', 'THEM', 'IN', 'FULL'] +7021-79740-0003-2042: hyp=['TO', 'GIVE', 'AN', 'IDEA', 'OF', 'THESE', 'CONVERSATIONS', 'I', 'WILL', 'REPORT', 'ONE', 'OF', 'THEM', 'IN', 'FULL'] +7021-79740-0004-2043: ref=['YOU', 'HAVE', 'COME', 'ANDELLA', 'ANDELLA', 'WAS', 'THE', 'NAME', 'OF', "JANE'S", 'DOLL', 'TO', 'MAKE', 'ROSALIE', 'A', 'VISIT'] +7021-79740-0004-2043: hyp=['YOU', 'HAVE', 'COME', 'ANDELA', 'AND', 'DELA', 'WAS', 'THE', 'NAME', 'OF', "JANE'S", 'DOLL', 'TO', 'MAKE', 'ROSALIE', 'A', 'VISIT'] +7021-79740-0005-2044: ref=['I', 'AM', 'VERY', 'GLAD'] +7021-79740-0005-2044: hyp=['I', 'AM', 'VERY', 'GLAD'] +7021-79740-0006-2045: ref=['I', 'EXPECT', 'YOU', 'HAVE', 'BEEN', 'A', 'VERY', 'GOOD', 'GIRL', 'ANDELLA', 'SINCE', 'YOU', 'WERE', 'HERE', 'LAST'] +7021-79740-0006-2045: hyp=['I', 'EXPECT', 'YOU', 'HAVE', 'BEEN', 'A', 'VERY', 'GOOD', 'GIRL', 'ANDELLA', 'SINCE', 'YOU', 'WERE', 'HERE', 'LAST'] +7021-79740-0007-2046: ref=['THEN', 'TURNING', 'TO', 'JANE', 'SHE', 'ASKED', 'IN', 'A', 'SOMEWHAT', 'ALTERED', 'TONE', 'HAS', 'SHE', 'BEEN', 'A', 'GOOD', 'GIRL', 'JANE'] +7021-79740-0007-2046: hyp=['THEN', 'TURNING', 'TO', 'JANE', 'SHE', 'ASKED', 'IN', 'A', 'SOMEWHAT', 'ALTERED', 'TONE', 'HAS', 'SHE', 'BEEN', 'A', 'GOOD', 'GIRL', 'JANE'] +7021-79740-0008-2047: ref=['FOR', 'INSTANCE', 'ONE', 'DAY', 'THE', 'CHILDREN', 'HAD', 'BEEN', 'PLAYING', 'UPON', 'THE', 'PIAZZA', 'WITH', 'BLOCKS', 'AND', 'OTHER', 'PLAYTHINGS', 'AND', 'FINALLY', 'HAD', 'GONE', 'INTO', 'THE', 'HOUSE', 'LEAVING', 'ALL', 'THE', 'THINGS', 'ON', 'THE', 'FLOOR', 'OF', 'THE', 'PIAZZA', 'INSTEAD', 'OF', 'PUTTING', 'THEM', 'AWAY', 'IN', 'THEIR', 'PLACES', 'AS', 'THEY', 'OUGHT', 'TO', 'HAVE', 'DONE'] +7021-79740-0008-2047: hyp=['FOR', 'INSTANCE', 'ONE', 'DAY', 'THE', 'CHILDREN', 'HAD', 'BEEN', 'PLAYING', 'UPON', 'THE', 'PIAZZA', 'WITH', 'BLOCKS', 'AND', 'OTHER', 'PLAYTHINGS', 'AND', 'FINALLY', 'HAD', 'GONE', 'INTO', 'THE', 'HOUSE', 'LEAVING', 'ALL', 'THE', 'THINGS', 'ON', 'THE', 'FLOOR', 'OF', 'THE', 'PIAZZA', 'INSTEAD', 'OF', 'PUTTING', 'THEM', 'AWAY', 'IN', 'THEIR', 'PLACES', 'AS', 'THEY', 'OUGHT', 'TO', 'HAVE', 'DONE'] +7021-79740-0009-2048: ref=['THEY', 'WERE', 'NOW', 'PLAYING', 'WITH', 'THEIR', 'DOLLS', 'IN', 'THE', 'PARLOR'] +7021-79740-0009-2048: hyp=['THEY', 'WERE', 'NOW', 'PLAYING', 'WITH', 'THEIR', 'DOLLS', 'IN', 'THE', 'PARLOR'] +7021-79740-0010-2049: ref=['DELIA', 'CAME', 'TO', 'THE', 'PARLOR', 'AND', 'WITH', 'AN', 'AIR', 'OF', 'GREAT', 'MYSTERY', 'BECKONED', 'THE', 'CHILDREN', 'ASIDE', 'AND', 'SAID', 'TO', 'THEM', 'IN', 'A', 'WHISPER', 'LEAVE', 'ANDELLA', 'AND', 'ROSALIE', 'HERE', 'AND', "DON'T", 'SAY', 'A', 'WORD', 'TO', 'THEM'] +7021-79740-0010-2049: hyp=['DELLIA', 'CAME', 'TO', 'THE', 'PARLOR', 'AND', 'WITH', 'AN', 'AIR', 'OF', 'GREAT', 'MYSTERY', 'BECKONED', 'THE', 'CHILDREN', 'ASIDE', 'AND', 'SAID', 'TO', 'THEM', 'IN', 'A', 'WHISPER', 'LEAVE', 'ANDELA', 'AND', 'ROSALIE', 'HERE', 'AND', "DON'T", 'SAY', 'A', 'WORD', 'TO', 'THEM'] +7021-79740-0011-2050: ref=['SO', 'SAYING', 'SHE', 'LED', 'THE', 'WAY', 'ON', 'TIPTOE', 'FOLLOWED', 'BY', 'THE', 'CHILDREN', 'OUT', 'OF', 'THE', 'ROOM', 'AND', 'ROUND', 'BY', 'A', 'CIRCUITOUS', 'ROUTE', 'TO', 'THE', 'PIAZZA', 'THERE'] +7021-79740-0011-2050: hyp=['SO', 'SAYING', 'SHE', 'LED', 'THE', 'WAY', 'ON', 'TIPTOE', 'FOLLOWED', 'BY', 'THE', 'CHILDREN', 'OUT', 'OF', 'THE', 'ROOM', 'AND', 'ROUND', 'BY', 'A', 'CIRCUITOUS', 'ROUTE', 'TO', 'THE', 'PIAZZA', 'THERE'] +7021-79740-0012-2051: ref=['SAID', 'SHE', 'POINTING', 'TO', 'THE', 'PLAYTHINGS', 'SEE'] +7021-79740-0012-2051: hyp=['SAID', 'SHE', 'POINTING', 'TO', 'THE', 'PLAYTHINGS', 'SEE'] +7021-79740-0013-2052: ref=['PUT', 'THESE', 'PLAYTHINGS', 'ALL', 'AWAY', 'QUICK', 'AND', 'CAREFULLY', 'AND', 'WE', 'WILL', 'NOT', 'LET', 'THEM', 'KNOW', 'ANY', 'THING', 'ABOUT', 'YOUR', 'LEAVING', 'THEM', 'OUT'] +7021-79740-0013-2052: hyp=['PUT', 'THESE', 'PLAYTHINGS', 'ALL', 'AWAY', 'QUICK', 'AND', 'CAREFULLY', 'AND', 'WE', 'WILL', 'NOT', 'LET', 'THEM', 'KNOW', 'ANYTHING', 'ABOUT', 'YOUR', 'LEAVING', 'THEM', 'OUT'] +7021-79740-0014-2053: ref=['AND', 'THIS', 'METHOD', 'OF', 'TREATING', 'THE', 'CASE', 'WAS', 'MUCH', 'MORE', 'EFFECTUAL', 'IN', 'MAKING', 'THEM', 'DISPOSED', 'TO', 'AVOID', 'COMMITTING', 'A', 'SIMILAR', 'FAULT', 'ANOTHER', 'TIME', 'THAN', 'ANY', 'DIRECT', 'REBUKES', 'OR', 'EXPRESSIONS', 'OF', 'DISPLEASURE', 'ADDRESSED', 'PERSONALLY', 'TO', 'THEM', 'WOULD', 'HAVE', 'BEEN'] +7021-79740-0014-2053: hyp=['AND', 'THIS', 'METHOD', 'OF', 'TREATING', 'THE', 'CASE', 'WAS', 'MUCH', 'MORE', 'EFFECTUAL', 'IN', 'MAKING', 'THEM', 'DISPOSED', 'TO', 'AVOID', 'COMMITTING', 'A', 'SIMILAR', 'FAULT', 'ANOTHER', 'TIME', 'THAN', 'ANY', 'DIRECT', 'REBUKES', 'OR', 'EXPRESSIONS', 'OF', 'DISPLEASURE', 'ADDRESSED', 'PERSONALLY', 'TO', 'THEM', 'WOULD', 'HAVE', 'BEEN'] +7021-79759-0000-2054: ref=['NATURE', 'OF', 'THE', 'EFFECT', 'PRODUCED', 'BY', 'EARLY', 'IMPRESSIONS'] +7021-79759-0000-2054: hyp=['NATURE', 'OF', 'THE', 'EFFECT', 'PRODUCED', 'BY', 'EARLY', 'IMPRESSIONS'] +7021-79759-0001-2055: ref=['THAT', 'IS', 'COMPARATIVELY', 'NOTHING'] +7021-79759-0001-2055: hyp=['THAT', 'IS', 'COMPARATIVELY', 'NOTHING'] +7021-79759-0002-2056: ref=['THEY', 'ARE', 'CHIEFLY', 'FORMED', 'FROM', 'COMBINATIONS', 'OF', 'THE', 'IMPRESSIONS', 'MADE', 'IN', 'CHILDHOOD'] +7021-79759-0002-2056: hyp=['THEY', 'ARE', 'CHIEFLY', 'FORMED', 'FROM', 'COMBINATIONS', 'OF', 'THE', 'IMPRESSIONS', 'MADE', 'IN', 'CHILDHOOD'] +7021-79759-0003-2057: ref=['VAST', 'IMPORTANCE', 'AND', 'INFLUENCE', 'OF', 'THIS', 'MENTAL', 'FURNISHING'] +7021-79759-0003-2057: hyp=['VAST', 'IMPORTANCE', 'AND', 'INFLUENCE', 'OF', 'THIS', 'MENTAL', 'FURNISHING'] +7021-79759-0004-2058: ref=['WITHOUT', 'GOING', 'TO', 'ANY', 'SUCH', 'EXTREME', 'AS', 'THIS', 'WE', 'CAN', 'EASILY', 'SEE', 'ON', 'REFLECTION', 'HOW', 'VAST', 'AN', 'INFLUENCE', 'ON', 'THE', 'IDEAS', 'AND', 'CONCEPTIONS', 'AS', 'WELL', 'AS', 'ON', 'THE', 'PRINCIPLES', 'OF', 'ACTION', 'IN', 'MATURE', 'YEARS', 'MUST', 'BE', 'EXERTED', 'BY', 'THE', 'NATURE', 'AND', 'CHARACTER', 'OF', 'THE', 'IMAGES', 'WHICH', 'THE', 'PERIOD', 'OF', 'INFANCY', 'AND', 'CHILDHOOD', 'IMPRESSES', 'UPON', 'THE', 'MIND'] +7021-79759-0004-2058: hyp=['WITHOUT', 'GOING', 'TO', 'ANY', 'SUCH', 'EXTREME', 'AS', 'THIS', 'WE', 'CAN', 'EASILY', 'SEE', 'ON', 'REFLECTION', 'HOW', 'VAST', 'AN', 'INFLUENCE', 'ON', 'THE', 'IDEAS', 'AND', 'CONCEPTIONS', 'AS', 'WELL', 'AS', 'ON', 'THE', 'PRINCIPLES', 'OF', 'ACTION', 'IN', 'MATURE', 'YEARS', 'MUST', 'BE', 'EXERTED', 'BY', 'THE', 'NATURE', 'AND', 'CHARACTER', 'OF', 'THE', 'IMAGES', 'WHICH', 'THE', 'PERIOD', 'OF', 'INFANCY', 'AND', 'CHILDHOOD', 'IMPRESS', 'UPON', 'THE', 'MIND'] +7021-79759-0005-2059: ref=['THE', 'PAIN', 'PRODUCED', 'BY', 'AN', 'ACT', 'OF', 'HASTY', 'AND', 'ANGRY', 'VIOLENCE', 'TO', 'WHICH', 'A', 'FATHER', 'SUBJECTS', 'HIS', 'SON', 'MAY', 'SOON', 'PASS', 'AWAY', 'BUT', 'THE', 'MEMORY', 'OF', 'IT', 'DOES', 'NOT', 'PASS', 'AWAY', 'WITH', 'THE', 'PAIN'] +7021-79759-0005-2059: hyp=['THE', 'PAIN', 'PRODUCED', 'BY', 'AN', 'ACT', 'OF', 'HASTY', 'AND', 'ANGRY', 'VIOLENCE', 'TO', 'WHICH', 'A', 'FATHER', 'SUBJECTS', 'HIS', 'SON', 'MAY', 'SOON', 'PASS', 'AWAY', 'BUT', 'THE', 'MEMORY', 'OF', 'IT', 'DOES', 'NOT', 'PASS', 'AWAY', 'WITH', 'THE', 'PAIN'] +7021-85628-0000-2060: ref=['BUT', 'ANDERS', 'CARED', 'NOTHING', 'ABOUT', 'THAT'] +7021-85628-0000-2060: hyp=['BUT', 'ANDREWS', 'CARED', 'NOTHING', 'ABOUT', 'THAT'] +7021-85628-0001-2061: ref=['HE', 'MADE', 'A', 'BOW', 'SO', 'DEEP', 'THAT', 'HIS', 'BACK', 'CAME', 'NEAR', 'BREAKING', 'AND', 'HE', 'WAS', 'DUMBFOUNDED', 'I', 'CAN', 'TELL', 'YOU', 'WHEN', 'HE', 'SAW', 'IT', 'WAS', 'NOBODY', 'BUT', 'ANDERS'] +7021-85628-0001-2061: hyp=['HE', 'MADE', 'A', 'BOW', 'SO', 'DEEP', 'THAT', 'HIS', 'BACK', 'CAME', 'NEAR', 'BREAKING', 'AND', 'HE', 'WAS', 'DUMBFOUNDED', 'I', 'CAN', 'TELL', 'YOU', 'WHEN', 'HE', 'SAW', 'IT', 'WAS', 'NOBODY', 'BUT', 'ANDERS'] +7021-85628-0002-2062: ref=['HE', 'WAS', 'SUCH', 'A', 'BIG', 'BOY', 'THAT', 'HE', 'WORE', 'HIGH', 'BOOTS', 'AND', 'CARRIED', 'A', 'JACK', 'KNIFE'] +7021-85628-0002-2062: hyp=['HE', 'WAS', 'SUCH', 'A', 'BIG', 'BOY', 'THAT', 'HE', 'WORE', 'HIGH', 'BOOTS', 'AND', 'CARRIED', 'A', 'JACKKNIFE'] +7021-85628-0003-2063: ref=['NOW', 'THIS', 'KNIFE', 'WAS', 'A', 'SPLENDID', 'ONE', 'THOUGH', 'HALF', 'THE', 'BLADE', 'WAS', 'GONE', 'AND', 'THE', 'HANDLE', 'WAS', 'A', 'LITTLE', 'CRACKED', 'AND', 'ANDERS', 'KNEW', 'THAT', 'ONE', 'IS', 'ALMOST', 'A', 'MAN', 'AS', 'SOON', 'AS', 'ONE', 'HAS', 'A', 'JACK', 'KNIFE'] +7021-85628-0003-2063: hyp=['NOW', 'THIS', 'KNIFE', 'WAS', 'A', 'SPLENDID', 'ONE', 'THOUGH', 'HALF', 'THE', 'BLADE', 'WAS', 'GONE', 'AND', 'THE', 'HANDLE', 'WAS', 'A', 'LITTLE', 'CRACKED', 'AND', 'ANDERS', 'KNEW', 'THAT', 'ONE', 'IS', 'ALMOST', 'A', 'MAN', 'AS', 'SOON', 'AS', 'ONE', 'HAS', 'A', 'JACK', 'KNIFE'] +7021-85628-0004-2064: ref=['YES', 'WHY', 'NOT', 'THOUGHT', 'ANDERS'] +7021-85628-0004-2064: hyp=['YES', 'WHY', 'NOT', 'THOUGHT', 'ANDERS'] +7021-85628-0005-2065: ref=['SEEING', 'THAT', 'I', 'AM', 'SO', 'FINE', 'I', 'MAY', 'AS', 'WELL', 'GO', 'AND', 'VISIT', 'THE', 'KING'] +7021-85628-0005-2065: hyp=['SEEING', 'THAT', 'I', 'AM', 'SO', 'FINE', 'I', 'MAY', 'AS', 'WELL', 'GO', 'AND', 'VISIT', 'THE', 'KING'] +7021-85628-0006-2066: ref=['I', 'AM', 'GOING', 'TO', 'THE', 'COURT', 'BALL', 'ANSWERED', 'ANDERS'] +7021-85628-0006-2066: hyp=['I', 'AM', 'GOING', 'TO', 'THE', 'COURT', 'BALL', 'ANSWERED', 'ANDREWS'] +7021-85628-0007-2067: ref=['AND', 'SHE', 'TOOK', 'ANDERS', 'HAND', 'AND', 'WALKED', 'WITH', 'HIM', 'UP', 'THE', 'BROAD', 'MARBLE', 'STAIRS', 'WHERE', 'SOLDIERS', 'WERE', 'POSTED', 'AT', 'EVERY', 'THIRD', 'STEP', 'AND', 'THROUGH', 'THE', 'MAGNIFICENT', 'HALLS', 'WHERE', 'COURTIERS', 'IN', 'SILK', 'AND', 'VELVET', 'STOOD', 'BOWING', 'WHEREVER', 'HE', 'WENT'] +7021-85628-0007-2067: hyp=['AND', 'SHE', 'TOOK', "ANDREW'S", 'HAND', 'AND', 'WALKED', 'WITH', 'HIM', 'UP', 'THE', 'BROAD', 'MARBLE', 'STAIRS', 'WHERE', 'SOLDIERS', 'WERE', 'POSTED', 'AT', 'EVERY', 'THIRD', 'STEP', 'AND', 'THROUGH', 'THE', 'MAGNIFICENT', 'HALLS', 'WHERE', 'COURTIERS', 'IN', 'SILK', 'AND', 'VELVET', 'STOOD', 'BOWING', 'WHEREVER', 'HE', 'WENT'] +7021-85628-0008-2068: ref=['FOR', 'LIKE', 'AS', 'NOT', 'THEY', 'MUST', 'HAVE', 'THOUGHT', 'HIM', 'A', 'PRINCE', 'WHEN', 'THEY', 'SAW', 'HIS', 'FINE', 'CAP'] +7021-85628-0008-2068: hyp=['FOR', 'LIKE', 'AS', 'NOT', 'THEY', 'MUST', 'HAVE', 'THOUGHT', 'HIM', 'A', 'PRINCE', 'WHEN', 'THEY', 'SAW', 'HIS', 'FINE', 'CAP'] +7021-85628-0009-2069: ref=['AT', 'THE', 'FARTHER', 'END', 'OF', 'THE', 'LARGEST', 'HALL', 'A', 'TABLE', 'WAS', 'SET', 'WITH', 'GOLDEN', 'CUPS', 'AND', 'GOLDEN', 'PLATES', 'IN', 'LONG', 'ROWS'] +7021-85628-0009-2069: hyp=['AT', 'THE', 'FARTHER', 'END', 'OF', 'THE', 'LARGEST', 'HALL', 'A', 'TABLE', 'WAS', 'SET', 'WITH', 'GOLDEN', 'CUPS', 'AND', 'GOLDEN', 'PLATES', 'IN', 'LONG', 'ROWS'] +7021-85628-0010-2070: ref=['ON', 'HUGE', 'SILVER', 'PLATTERS', 'WERE', 'PYRAMIDS', 'OF', 'TARTS', 'AND', 'CAKES', 'AND', 'RED', 'WINE', 'SPARKLED', 'IN', 'GLITTERING', 'DECANTERS'] +7021-85628-0010-2070: hyp=['ON', 'HUGE', 'SILVER', 'PLATTERS', 'WERE', 'PYRAMIDS', 'OF', 'TARTS', 'AND', 'CAKES', 'AND', 'RED', 'WINE', 'SPARKLED', 'IN', 'GLITTERING', 'DECANTERS'] +7021-85628-0011-2071: ref=['THE', 'PRINCESS', 'SAT', 'DOWN', 'UNDER', 'A', 'BLUE', 'CANOPY', 'WITH', 'BOUQUETS', 'OF', 'ROSES', 'AND', 'SHE', 'LET', 'ANDERS', 'SIT', 'IN', 'A', 'GOLDEN', 'CHAIR', 'BY', 'HER', 'SIDE'] +7021-85628-0011-2071: hyp=['THE', 'PRINCESS', 'SAT', 'DOWN', 'UNDER', 'A', 'BLUE', 'CANOPY', 'WITH', 'BOUQUETS', 'OF', 'ROSES', 'AND', 'SHE', 'LET', 'ANDER', 'SIT', 'IN', 'A', 'GOLDEN', 'CHAIR', 'BY', 'HER', 'SIDE'] +7021-85628-0012-2072: ref=['BUT', 'YOU', 'MUST', 'NOT', 'EAT', 'WITH', 'YOUR', 'CAP', 'ON', 'YOUR', 'HEAD', 'SHE', 'SAID', 'AND', 'WAS', 'GOING', 'TO', 'TAKE', 'IT', 'OFF'] +7021-85628-0012-2072: hyp=['BUT', 'YOU', 'MUST', 'NOT', 'EAT', 'WITH', 'YOUR', 'CAP', 'ON', 'YOUR', 'HEAD', 'SHE', 'SAID', 'AND', 'WAS', 'GOING', 'TO', 'TAKE', 'IT', 'OFF'] +7021-85628-0013-2073: ref=['THE', 'PRINCESS', 'CERTAINLY', 'WAS', 'BEAUTIFUL', 'AND', 'HE', 'WOULD', 'HAVE', 'DEARLY', 'LIKED', 'TO', 'BE', 'KISSED', 'BY', 'HER', 'BUT', 'THE', 'CAP', 'WHICH', 'HIS', 'MOTHER', 'HAD', 'MADE', 'HE', 'WOULD', 'NOT', 'GIVE', 'UP', 'ON', 'ANY', 'CONDITION'] +7021-85628-0013-2073: hyp=['THE', 'PRINCESS', 'CERTAINLY', 'WAS', 'BEAUTIFUL', 'AND', 'HE', 'WOULD', 'HAVE', 'DEARLY', 'LIKED', 'TO', 'BE', 'KISSED', 'BY', 'HER', 'BUT', 'THE', 'CAP', 'WHICH', 'HIS', 'MOTHER', 'HAD', 'MADE', 'HE', 'WOULD', 'NOT', 'GIVE', 'UP', 'ON', 'ANY', 'CONDITION'] +7021-85628-0014-2074: ref=['HE', 'ONLY', 'SHOOK', 'HIS', 'HEAD'] +7021-85628-0014-2074: hyp=['HE', 'ONLY', 'SHOOK', 'HIS', 'HEAD'] +7021-85628-0015-2075: ref=['WELL', 'BUT', 'NOW', 'SAID', 'THE', 'PRINCESS', 'AND', 'SHE', 'FILLED', 'HIS', 'POCKETS', 'WITH', 'CAKES', 'AND', 'PUT', 'HER', 'OWN', 'HEAVY', 'GOLD', 'CHAIN', 'AROUND', 'HIS', 'NECK', 'AND', 'BENT', 'DOWN', 'AND', 'KISSED', 'HIM'] +7021-85628-0015-2075: hyp=['WELL', 'BUT', 'NOW', 'SAID', 'THE', 'PRINCESS', 'AND', 'SHE', 'FILLED', 'HIS', 'POCKETS', 'WITH', 'CAKES', 'AND', 'PUT', 'HER', 'OWN', 'HEAVY', 'GOLD', 'CHAIN', 'AROUND', 'HIS', 'NECK', 'AND', 'BENT', 'DOWN', 'AND', 'KISSED', 'HIM'] +7021-85628-0016-2076: ref=['THAT', 'IS', 'A', 'VERY', 'FINE', 'CAP', 'YOU', 'HAVE', 'HE', 'SAID'] +7021-85628-0016-2076: hyp=['THAT', 'IS', 'A', 'VERY', 'FINE', 'CAP', 'YOU', 'HAVE', 'HE', 'SAID'] +7021-85628-0017-2077: ref=['SO', 'IT', 'IS', 'SAID', 'ANDERS'] +7021-85628-0017-2077: hyp=['SO', 'IT', 'IS', 'SAID', 'ANDREWS'] +7021-85628-0018-2078: ref=['AND', 'IT', 'IS', 'MADE', 'OF', "MOTHER'S", 'BEST', 'YARN', 'AND', 'SHE', 'KNITTED', 'IT', 'HERSELF', 'AND', 'EVERYBODY', 'WANTS', 'TO', 'GET', 'IT', 'AWAY', 'FROM', 'ME'] +7021-85628-0018-2078: hyp=['AND', 'IT', 'IS', 'MADE', 'OF', "MOTHER'S", 'BEST', 'YARN', 'AND', 'SHE', 'KNITTED', 'IT', 'HERSELF', 'AND', 'EVERYBODY', 'WANTS', 'TO', 'GET', 'IT', 'AWAY', 'FROM', 'ME'] +7021-85628-0019-2079: ref=['WITH', 'ONE', 'JUMP', 'ANDERS', 'GOT', 'OUT', 'OF', 'HIS', 'CHAIR'] +7021-85628-0019-2079: hyp=['WITH', 'ONE', 'JUMP', 'ANDREWS', 'GOT', 'OUT', 'OF', 'HIS', 'CHAIR'] +7021-85628-0020-2080: ref=['HE', 'DARTED', 'LIKE', 'AN', 'ARROW', 'THROUGH', 'ALL', 'THE', 'HALLS', 'DOWN', 'ALL', 'THE', 'STAIRS', 'AND', 'ACROSS', 'THE', 'YARD'] +7021-85628-0020-2080: hyp=['HE', 'DARTED', 'LIKE', 'AN', 'ARROW', 'THROUGH', 'ALL', 'THE', 'HALLS', 'DOWN', 'ALL', 'THE', 'STAIRS', 'AND', 'ACROSS', 'THE', 'YARD'] +7021-85628-0021-2081: ref=['HE', 'STILL', 'HELD', 'ON', 'TO', 'IT', 'WITH', 'BOTH', 'HANDS', 'AS', 'HE', 'RUSHED', 'INTO', 'HIS', "MOTHER'S", 'COTTAGE'] +7021-85628-0021-2081: hyp=['HE', 'STILL', 'HELD', 'ON', 'TO', 'IT', 'WITH', 'BOTH', 'HANDS', 'AS', 'HE', 'RUSHED', 'INTO', 'HIS', "MOTHER'S", 'COTTAGE'] +7021-85628-0022-2082: ref=['AND', 'ALL', 'HIS', 'BROTHERS', 'AND', 'SISTERS', 'STOOD', 'ROUND', 'AND', 'LISTENED', 'WITH', 'THEIR', 'MOUTHS', 'OPEN'] +7021-85628-0022-2082: hyp=['AND', 'ALL', 'HIS', 'BROTHERS', 'AND', 'SISTERS', 'STOOD', 'ROUND', 'AND', 'LISTENED', 'WITH', 'THEIR', 'MOUTHS', 'OPEN'] +7021-85628-0023-2083: ref=['BUT', 'WHEN', 'HIS', 'BIG', 'BROTHER', 'HEARD', 'THAT', 'HE', 'HAD', 'REFUSED', 'TO', 'GIVE', 'HIS', 'CAP', 'FOR', 'A', "KING'S", 'GOLDEN', 'CROWN', 'HE', 'SAID', 'THAT', 'ANDERS', 'WAS', 'A', 'STUPID'] +7021-85628-0023-2083: hyp=['BUT', 'WHEN', 'HIS', 'BIG', 'BROTHER', 'HEARD', 'THAT', 'HE', 'HAD', 'REFUSED', 'TO', 'GIVE', 'HIS', 'CAP', 'FOR', 'A', "KING'S", 'GOLDEN', 'CROWN', 'HE', 'SAID', 'THAT', 'ANDERS', 'WAS', 'A', 'STUPID'] +7021-85628-0024-2084: ref=['ANDERS', 'FACE', 'GREW', 'RED'] +7021-85628-0024-2084: hyp=["ANDREW'S", 'FACE', 'GREW', 'RED'] +7021-85628-0025-2085: ref=['BUT', 'HIS', 'MOTHER', 'HUGGED', 'HIM', 'CLOSE'] +7021-85628-0025-2085: hyp=['BUT', 'HIS', 'MOTHER', 'HUGGED', 'HIM', 'CLOSE'] +7021-85628-0026-2086: ref=['NO', 'MY', 'LITTLE', 'SON', 'SHE', 'SAID'] +7021-85628-0026-2086: hyp=['NO', 'MY', 'LITTLE', 'SON', 'SHE', 'SAID'] +7021-85628-0027-2087: ref=['IF', 'YOU', 'DRESSED', 'IN', 'SILK', 'AND', 'GOLD', 'FROM', 'TOP', 'TO', 'TOE', 'YOU', 'COULD', 'NOT', 'LOOK', 'ANY', 'NICER', 'THAN', 'IN', 'YOUR', 'LITTLE', 'RED', 'CAP'] +7021-85628-0027-2087: hyp=['IF', 'YOU', 'DRESSED', 'IN', 'SILK', 'AND', 'GOLD', 'FROM', 'TOP', 'TO', 'TOE', 'YOU', 'COULD', 'NOT', 'LOOK', 'ANY', 'NICER', 'THAN', 'IN', 'YOUR', 'LITTLE', 'RED', 'CAP'] +7127-75946-0000-2088: ref=['AT', 'THE', 'CONCLUSION', 'OF', 'THE', 'BANQUET', 'WHICH', 'WAS', 'SERVED', 'AT', 'FIVE', "O'CLOCK", 'THE', 'KING', 'ENTERED', 'HIS', 'CABINET', 'WHERE', 'HIS', 'TAILORS', 'WERE', 'AWAITING', 'HIM', 'FOR', 'THE', 'PURPOSE', 'OF', 'TRYING', 'ON', 'THE', 'CELEBRATED', 'COSTUME', 'REPRESENTING', 'SPRING', 'WHICH', 'WAS', 'THE', 'RESULT', 'OF', 'SO', 'MUCH', 'IMAGINATION', 'AND', 'HAD', 'COST', 'SO', 'MANY', 'EFFORTS', 'OF', 'THOUGHT', 'TO', 'THE', 'DESIGNERS', 'AND', 'ORNAMENT', 'WORKERS', 'OF', 'THE', 'COURT'] +7127-75946-0000-2088: hyp=['AT', 'THE', 'CONCLUSION', 'OF', 'THE', 'BANQUET', 'WHICH', 'WAS', 'SERVED', 'AT', 'FIVE', "O'CLOCK", 'THE', 'KING', 'ENTERED', 'HIS', 'CABINET', 'WHERE', 'HIS', 'TAILORS', 'WERE', 'AWAITING', 'HIM', 'FOR', 'THE', 'PURPOSE', 'OF', 'TRYING', 'ON', 'THE', 'CELEBRATED', 'COSTUME', 'REPRESENTING', 'SPRING', 'WHICH', 'WAS', 'THE', 'RESULT', 'OF', 'SO', 'MUCH', 'IMAGINATION', 'AND', 'HAD', 'CAUSED', 'SO', 'MANY', 'EFFORTS', 'OF', 'THOUGHT', 'TO', 'THE', 'DESIGNERS', 'AND', 'ORNAMENT', 'WORKERS', 'OF', 'THE', 'COURT'] +7127-75946-0001-2089: ref=['AH', 'VERY', 'WELL'] +7127-75946-0001-2089: hyp=['AH', 'VERY', 'WELL'] +7127-75946-0002-2090: ref=['LET', 'HIM', 'COME', 'IN', 'THEN', 'SAID', 'THE', 'KING', 'AND', 'AS', 'IF', 'COLBERT', 'HAD', 'BEEN', 'LISTENING', 'AT', 'THE', 'DOOR', 'FOR', 'THE', 'PURPOSE', 'OF', 'KEEPING', 'HIMSELF', 'AU', 'COURANT', 'WITH', 'THE', 'CONVERSATION', 'HE', 'ENTERED', 'AS', 'SOON', 'AS', 'THE', 'KING', 'HAD', 'PRONOUNCED', 'HIS', 'NAME', 'TO', 'THE', 'TWO', 'COURTIERS'] +7127-75946-0002-2090: hyp=['LET', 'HIM', 'COME', 'IN', 'THEN', 'SAID', 'THE', 'KING', 'AND', 'AS', 'IF', 'COLBERT', 'HAD', 'BEEN', 'LISTENING', 'AT', 'THE', 'DOOR', 'FOR', 'THE', 'PURPOSE', 'OF', 'KEEPING', 'HIMSELF', 'ACCORANT', 'WITH', 'THE', 'CONVERSATION', 'HE', 'ENTERED', 'AS', 'SOON', 'AS', 'THE', 'KING', 'HAD', 'PRONOUNCED', 'HIS', 'NAME', 'TO', 'THE', 'TWO', 'COURTIERS'] +7127-75946-0003-2091: ref=['GENTLEMEN', 'TO', 'YOUR', 'POSTS', 'WHEREUPON', 'SAINT', 'AIGNAN', 'AND', 'VILLEROY', 'TOOK', 'THEIR', 'LEAVE'] +7127-75946-0003-2091: hyp=['GENTLEMEN', 'TO', 'YOUR', 'POSTS', 'WHEREUPON', 'SAINT', 'ANNON', 'AND', 'VILLEROI', 'TOOK', 'THEIR', 'LEAVE'] +7127-75946-0004-2092: ref=['CERTAINLY', 'SIRE', 'BUT', 'I', 'MUST', 'HAVE', 'MONEY', 'TO', 'DO', 'THAT', 'WHAT'] +7127-75946-0004-2092: hyp=['CERTAINLY', 'SIRE', 'BUT', 'I', 'MUST', 'HAVE', 'MONEY', 'TO', 'DO', 'THAT', 'WHAT'] +7127-75946-0005-2093: ref=['WHAT', 'DO', 'YOU', 'MEAN', 'INQUIRED', 'LOUIS'] +7127-75946-0005-2093: hyp=['WHAT', 'DO', 'YOU', 'MEAN', 'INQUIRED', 'LOUISE'] +7127-75946-0006-2094: ref=['HE', 'HAS', 'GIVEN', 'THEM', 'WITH', 'TOO', 'MUCH', 'GRACE', 'NOT', 'TO', 'HAVE', 'OTHERS', 'STILL', 'TO', 'GIVE', 'IF', 'THEY', 'ARE', 'REQUIRED', 'WHICH', 'IS', 'THE', 'CASE', 'AT', 'THE', 'PRESENT', 'MOMENT'] +7127-75946-0006-2094: hyp=['HE', 'HAS', 'GIVEN', 'THEM', 'WITH', 'TOO', 'MUCH', 'GRACE', 'NOT', 'TO', 'HAVE', 'OTHERS', 'STILL', 'TO', 'GIVE', 'IF', 'THEY', 'ARE', 'REQUIRED', 'WHICH', 'IS', 'THE', 'CASE', 'AT', 'THE', 'PRESENT', 'MOMENT'] +7127-75946-0007-2095: ref=['IT', 'IS', 'NECESSARY', 'THEREFORE', 'THAT', 'HE', 'SHOULD', 'COMPLY', 'THE', 'KING', 'FROWNED'] +7127-75946-0007-2095: hyp=['IT', 'IS', 'NECESSARY', 'THEREFORE', 'THAT', 'HE', 'SHOULD', 'COMPLY', 'THE', 'KING', 'FROWNED'] +7127-75946-0008-2096: ref=['DOES', 'YOUR', 'MAJESTY', 'THEN', 'NO', 'LONGER', 'BELIEVE', 'THE', 'DISLOYAL', 'ATTEMPT'] +7127-75946-0008-2096: hyp=['DOES', 'YOUR', 'MAJESTY', 'THEN', 'NO', 'LONGER', 'BELIEVE', 'THE', 'DISLOYAL', 'ATTEMPT'] +7127-75946-0009-2097: ref=['NOT', 'AT', 'ALL', 'YOU', 'ARE', 'ON', 'THE', 'CONTRARY', 'MOST', 'AGREEABLE', 'TO', 'ME'] +7127-75946-0009-2097: hyp=['NOT', 'AT', 'ALL', 'YOU', 'ARE', 'ON', 'THE', 'CONTRARY', 'MOST', 'AGREEABLE', 'TO', 'ME'] +7127-75946-0010-2098: ref=['YOUR', "MAJESTY'S", 'PLAN', 'THEN', 'IN', 'THIS', 'AFFAIR', 'IS'] +7127-75946-0010-2098: hyp=['YOUR', "MAJESTY'S", 'PLAN', 'THEN', 'IN', 'THIS', 'AFFAIR', 'IS'] +7127-75946-0011-2099: ref=['YOU', 'WILL', 'TAKE', 'THEM', 'FROM', 'MY', 'PRIVATE', 'TREASURE'] +7127-75946-0011-2099: hyp=['YOU', 'WILL', 'TAKE', 'THEM', 'FROM', 'MY', 'PRIVATE', 'TREASURE'] +7127-75946-0012-2100: ref=['THE', 'NEWS', 'CIRCULATED', 'WITH', 'THE', 'RAPIDITY', 'OF', 'LIGHTNING', 'DURING', 'ITS', 'PROGRESS', 'IT', 'KINDLED', 'EVERY', 'VARIETY', 'OF', 'COQUETRY', 'DESIRE', 'AND', 'WILD', 'AMBITION'] +7127-75946-0012-2100: hyp=['THE', 'NEWS', 'CIRCULATED', 'WITH', 'THE', 'RAPIDITY', 'OF', 'LIGHTNING', 'DURING', 'ITS', 'PROGRESS', 'IT', 'KINDLED', 'EVERY', 'VARIETY', 'OF', 'COQUETRY', 'DESIRE', 'AND', 'WILD', 'AMBITION'] +7127-75946-0013-2101: ref=['THE', 'KING', 'HAD', 'COMPLETED', 'HIS', 'TOILETTE', 'BY', 'NINE', "O'CLOCK", 'HE', 'APPEARED', 'IN', 'AN', 'OPEN', 'CARRIAGE', 'DECORATED', 'WITH', 'BRANCHES', 'OF', 'TREES', 'AND', 'FLOWERS'] +7127-75946-0013-2101: hyp=['THE', 'KING', 'HAD', 'COMPLETED', 'HIS', 'TOILET', 'BY', 'NINE', "O'CLOCK", 'HE', 'APPEARED', 'IN', 'AN', 'OPEN', 'CARRIAGE', 'DECORATED', 'WITH', 'BRANCHES', 'OF', 'TREES', 'AND', 'FLOWERS'] +7127-75946-0014-2102: ref=['THE', 'QUEENS', 'HAD', 'TAKEN', 'THEIR', 'SEATS', 'UPON', 'A', 'MAGNIFICENT', 'DIAS', 'OR', 'PLATFORM', 'ERECTED', 'UPON', 'THE', 'BORDERS', 'OF', 'THE', 'LAKE', 'IN', 'A', 'THEATER', 'OF', 'WONDERFUL', 'ELEGANCE', 'OF', 'CONSTRUCTION'] +7127-75946-0014-2102: hyp=['THE', 'QUEENS', 'HAD', 'TAKEN', 'THEIR', 'SEATS', 'UPON', 'A', 'MAGNIFICENT', 'DAIS', 'OR', 'PLATFORM', 'ERECTED', 'UPON', 'THE', 'BORDERS', 'OF', 'THE', 'LAKE', 'IN', 'A', 'THEATRE', 'OF', 'WONDERFUL', 'ELEGANCE', 'OF', 'CONSTRUCTION'] +7127-75946-0015-2103: ref=['SUDDENLY', 'FOR', 'THE', 'PURPOSE', 'OF', 'RESTORING', 'PEACE', 'AND', 'ORDER', 'SPRING', 'ACCOMPANIED', 'BY', 'HIS', 'WHOLE', 'COURT', 'MADE', 'HIS', 'APPEARANCE'] +7127-75946-0015-2103: hyp=['SUDDENLY', 'FOR', 'THE', 'PURPOSE', 'OF', 'RESTORING', 'PEACE', 'AND', 'ORDER', 'SPRING', 'ACCOMPANIED', 'BY', 'HIS', 'WHOLE', 'COURT', 'MADE', 'HIS', 'APPEARANCE'] +7127-75946-0016-2104: ref=['THE', 'SEASONS', 'ALLIES', 'OF', 'SPRING', 'FOLLOWED', 'HIM', 'CLOSELY', 'TO', 'FORM', 'A', 'QUADRILLE', 'WHICH', 'AFTER', 'MANY', 'WORDS', 'OF', 'MORE', 'OR', 'LESS', 'FLATTERING', 'IMPORT', 'WAS', 'THE', 'COMMENCEMENT', 'OF', 'THE', 'DANCE'] +7127-75946-0016-2104: hyp=['THE', 'SEASONS', 'ALLIES', 'OF', 'SPRING', 'FOLLOWED', 'HIM', 'CLOSELY', 'TO', 'FORM', 'A', 'QUADRILLE', 'WHICH', 'AFTER', 'MANY', 'WORDS', 'OF', 'MORE', 'OR', 'LESS', 'FLATTERING', 'IMPORT', 'WAS', 'THE', 'COMMENCEMENT', 'OF', 'THE', 'DANCE'] +7127-75946-0017-2105: ref=['HIS', 'LEGS', 'THE', 'BEST', 'SHAPED', 'AT', 'COURT', 'WERE', 'DISPLAYED', 'TO', 'GREAT', 'ADVANTAGE', 'IN', 'FLESH', 'COLORED', 'SILKEN', 'HOSE', 'OF', 'SILK', 'SO', 'FINE', 'AND', 'SO', 'TRANSPARENT', 'THAT', 'IT', 'SEEMED', 'ALMOST', 'LIKE', 'FLESH', 'ITSELF'] +7127-75946-0017-2105: hyp=['HIS', 'LEGS', 'THE', 'BEST', 'SHAPED', 'AT', 'COURT', 'WERE', 'DISPLAYED', 'TO', 'GREAT', 'ADVANTAGE', 'IN', 'FLESH', 'COLOURED', 'SILKEN', 'HOSE', 'OF', 'SILK', 'SO', 'FINE', 'AND', 'SO', 'TRANSPARENT', 'THAT', 'IT', 'SEEMED', 'ALMOST', 'LIKE', 'FLESH', 'ITSELF'] +7127-75946-0018-2106: ref=['THERE', 'WAS', 'SOMETHING', 'IN', 'HIS', 'CARRIAGE', 'WHICH', 'RESEMBLED', 'THE', 'BUOYANT', 'MOVEMENTS', 'OF', 'AN', 'IMMORTAL', 'AND', 'HE', 'DID', 'NOT', 'DANCE', 'SO', 'MUCH', 'AS', 'SEEM', 'TO', 'SOAR', 'ALONG'] +7127-75946-0018-2106: hyp=['THERE', 'WAS', 'SOMETHING', 'IN', 'HIS', 'CARRIAGE', 'WHICH', 'RESEMBLED', 'THE', 'BUOYANT', 'MOVEMENTS', 'OF', 'AN', 'IMMORTAL', 'AND', 'HE', 'DID', 'NOT', 'DANCE', 'SO', 'MUCH', 'AS', 'SEEMED', 'TO', 'SOAR', 'ALONG'] +7127-75946-0019-2107: ref=['YES', 'IT', 'IS', 'SUPPRESSED'] +7127-75946-0019-2107: hyp=['YES', 'IT', 'IS', 'SUPPRESSED'] +7127-75946-0020-2108: ref=['FAR', 'FROM', 'IT', 'SIRE', 'YOUR', 'MAJESTY', 'HAVING', 'GIVEN', 'NO', 'DIRECTIONS', 'ABOUT', 'IT', 'THE', 'MUSICIANS', 'HAVE', 'RETAINED', 'IT'] +7127-75946-0020-2108: hyp=['FAR', 'FROM', 'IT', 'SIRE', 'YOUR', 'MAJESTY', 'HAVING', 'GIVEN', 'NO', 'DIRECTIONS', 'ABOUT', 'IT', 'THE', 'MUSICIANS', 'HAVE', 'RETAINED', 'IT'] +7127-75946-0021-2109: ref=['YES', 'SIRE', 'AND', 'READY', 'DRESSED', 'FOR', 'THE', 'BALLET'] +7127-75946-0021-2109: hyp=['YES', 'SIRE', 'AND', 'READY', 'DRESSED', 'FOR', 'THE', 'BALLET'] +7127-75946-0022-2110: ref=['SIRE', 'HE', 'SAID', 'YOUR', "MAJESTY'S", 'MOST', 'DEVOTED', 'SERVANT', 'APPROACHES', 'TO', 'PERFORM', 'A', 'SERVICE', 'ON', 'THIS', 'OCCASION', 'WITH', 'SIMILAR', 'ZEAL', 'THAT', 'HE', 'HAS', 'ALREADY', 'SHOWN', 'ON', 'THE', 'FIELD', 'OF', 'BATTLE'] +7127-75946-0022-2110: hyp=['SIRE', 'HE', 'SAID', 'YOUR', "MAJESTY'S", 'MOST', 'DEVOTED', 'SERVANT', 'APPROACHES', 'TO', 'PERFORM', 'A', 'SERVICE', 'ON', 'THIS', 'OCCASION', 'WITH', 'SIMILAR', 'ZEAL', 'THAN', 'HE', 'HAS', 'ALREADY', 'SHOWN', 'ON', 'THE', 'FIELD', 'OF', 'BATTLE'] +7127-75946-0023-2111: ref=['THE', 'KING', 'SEEMED', 'ONLY', 'PLEASED', 'WITH', 'EVERY', 'ONE', 'PRESENT'] +7127-75946-0023-2111: hyp=['THE', 'KING', 'SEEMED', 'ONLY', 'PLEASED', 'WITH', 'EVERYONE', 'PRESENT'] +7127-75946-0024-2112: ref=['MONSIEUR', 'WAS', 'THE', 'ONLY', 'ONE', 'WHO', 'DID', 'NOT', 'UNDERSTAND', 'ANYTHING', 'ABOUT', 'THE', 'MATTER'] +7127-75946-0024-2112: hyp=['MONSIEUR', 'WAS', 'THE', 'ONLY', 'ONE', 'WHO', 'DID', 'NOT', 'UNDERSTAND', 'ANYTHING', 'ABOUT', 'THE', 'MATTER'] +7127-75946-0025-2113: ref=['THE', 'BALLET', 'BEGAN', 'THE', 'EFFECT', 'WAS', 'MORE', 'THAN', 'BEAUTIFUL'] +7127-75946-0025-2113: hyp=['THE', 'BALLET', 'BEGAN', 'THE', 'EFFECT', 'WAS', 'MORE', 'THAN', 'BEAUTIFUL'] +7127-75946-0026-2114: ref=['WHEN', 'THE', 'MUSIC', 'BY', 'ITS', 'BURSTS', 'OF', 'MELODY', 'CARRIED', 'AWAY', 'THESE', 'ILLUSTRIOUS', 'DANCERS', 'WHEN', 'THE', 'SIMPLE', 'UNTUTORED', 'PANTOMIME', 'OF', 'THAT', 'PERIOD', 'ONLY', 'THE', 'MORE', 'NATURAL', 'ON', 'ACCOUNT', 'OF', 'THE', 'VERY', 'INDIFFERENT', 'ACTING', 'OF', 'THE', 'AUGUST', 'ACTORS', 'HAD', 'REACHED', 'ITS', 'CULMINATING', 'POINT', 'OF', 'TRIUMPH', 'THE', 'THEATER', 'SHOOK', 'WITH', 'TUMULTUOUS', 'APPLAUSE'] +7127-75946-0026-2114: hyp=['WHEN', 'THE', 'MUSIC', 'BY', 'ITS', 'BURSTS', 'OF', 'MELODY', 'CARRIED', 'AWAY', 'THESE', 'ILLUSTRIOUS', 'DANCERS', 'WHEN', 'THE', 'SIMPLE', 'UNTUTORED', 'PANTOMIME', 'OF', 'THAT', 'PERIOD', 'ONLY', 'THE', 'MORE', 'NATURAL', 'ON', 'ACCOUNT', 'OF', 'THE', 'VERY', 'INDIFFERENT', 'ACTING', 'OF', 'THE', 'AUGUST', 'ACTORS', 'HAD', 'REACHED', 'ITS', 'CULMINATING', 'POINT', 'OF', 'TRIUMPH', 'THE', 'THEATRE', 'SHOOK', 'WITH', 'TUMULTUOUS', 'APPLAUSE'] +7127-75946-0027-2115: ref=['DISDAINFUL', 'OF', 'A', 'SUCCESS', 'OF', 'WHICH', 'MADAME', 'SHOWED', 'NO', 'ACKNOWLEDGEMENT', 'HE', 'THOUGHT', 'OF', 'NOTHING', 'BUT', 'BOLDLY', 'REGAINING', 'THE', 'MARKED', 'PREFERENCE', 'OF', 'THE', 'PRINCESS'] +7127-75946-0027-2115: hyp=['DISDAINFUL', 'OF', 'A', 'SUCCESS', 'OF', 'WHICH', 'MADAME', 'SHOWED', 'NO', 'ACKNOWLEDGMENT', 'HE', 'THOUGHT', 'OF', 'NOTHING', 'BUT', 'BOLDLY', 'REGAINING', 'THE', 'MARKED', 'PREFERENCE', 'OF', 'THE', 'PRINCESS'] +7127-75946-0028-2116: ref=['BY', 'DEGREES', 'ALL', 'HIS', 'HAPPINESS', 'ALL', 'HIS', 'BRILLIANCY', 'SUBSIDED', 'INTO', 'REGRET', 'AND', 'UNEASINESS', 'SO', 'THAT', 'HIS', 'LIMBS', 'LOST', 'THEIR', 'POWER', 'HIS', 'ARMS', 'HUNG', 'HEAVILY', 'BY', 'HIS', 'SIDES', 'AND', 'HIS', 'HEAD', 'DROOPED', 'AS', 'THOUGH', 'HE', 'WAS', 'STUPEFIED'] +7127-75946-0028-2116: hyp=['BY', 'DEGREES', 'ALL', 'HIS', 'HAPPINESS', 'ALL', 'HIS', 'BRILLIANCY', 'SUBSIDED', 'INTO', 'REGRET', 'AND', 'UNEASINESS', 'SO', 'THAT', 'HIS', 'LIMBS', 'LOST', 'THEIR', 'POWER', 'HIS', 'ARMS', 'HUNG', 'HEAVILY', 'BY', 'HIS', 'SIDES', 'AND', 'HIS', 'HEAD', 'DROOPED', 'AS', 'THOUGH', 'HE', 'WAS', 'STUPEFIED'] +7127-75946-0029-2117: ref=['THE', 'KING', 'WHO', 'HAD', 'FROM', 'THIS', 'MOMENT', 'BECOME', 'IN', 'REALITY', 'THE', 'PRINCIPAL', 'DANCER', 'IN', 'THE', 'QUADRILLE', 'CAST', 'A', 'LOOK', 'UPON', 'HIS', 'VANQUISHED', 'RIVAL'] +7127-75946-0029-2117: hyp=['THE', 'KING', 'WHO', 'HAD', 'FROM', 'THIS', 'MOMENT', 'BECOME', 'IN', 'REALITY', 'THE', 'PRINCIPAL', 'DANCER', 'IN', 'THE', 'QUADRILLE', 'CAST', 'A', 'LOOK', 'UPON', 'HIS', 'VANQUISHED', 'RIVAL'] +7127-75947-0000-2118: ref=['EVERY', 'ONE', 'COULD', 'OBSERVE', 'HIS', 'AGITATION', 'AND', 'PROSTRATION', 'A', 'PROSTRATION', 'WHICH', 'WAS', 'INDEED', 'THE', 'MORE', 'REMARKABLE', 'SINCE', 'PEOPLE', 'WERE', 'NOT', 'ACCUSTOMED', 'TO', 'SEE', 'HIM', 'WITH', 'HIS', 'ARMS', 'HANGING', 'LISTLESSLY', 'BY', 'HIS', 'SIDE', 'HIS', 'HEAD', 'BEWILDERED', 'AND', 'HIS', 'EYES', 'WITH', 'ALL', 'THEIR', 'BRIGHT', 'INTELLIGENCE', 'BEDIMMED'] +7127-75947-0000-2118: hyp=['EVERY', 'ONE', 'COULD', 'OBSERVE', 'HIS', 'AGITATION', 'AND', 'PROSTRATION', 'A', 'PROSTRATION', 'WHICH', 'WAS', 'INDEED', 'THE', 'MORE', 'REMARKABLE', 'SINCE', 'PEOPLE', 'WERE', 'NOT', 'ACCUSTOMED', 'TO', 'SEE', 'HIM', 'WITH', 'HIS', 'ARMS', 'HANGING', 'LISTLESSLY', 'BY', 'HIS', 'SIDE', 'HIS', 'HEAD', 'BEWILDERED', 'AND', 'HIS', 'EYES', 'WITH', 'ALL', 'THEIR', 'BRIGHT', 'INTELLIGENCE', 'BE', 'DIMMED'] +7127-75947-0001-2119: ref=['UPON', 'THIS', 'MADAME', 'DEIGNED', 'TO', 'TURN', 'HER', 'EYES', 'LANGUISHINGLY', 'TOWARDS', 'THE', 'COMTE', 'OBSERVING'] +7127-75947-0001-2119: hyp=['UPON', 'THIS', 'MADAME', 'DEIGNED', 'TO', 'TURN', 'HER', 'EYES', 'LANGUISHINGLY', 'TOWARDS', 'THE', 'COMTE', 'OBSERVING'] +7127-75947-0002-2120: ref=['DO', 'YOU', 'THINK', 'SO', 'SHE', 'REPLIED', 'WITH', 'INDIFFERENCE'] +7127-75947-0002-2120: hyp=['DO', 'YOU', 'THINK', 'SO', 'SHE', 'REPLIED', 'WITH', 'INDIFFERENCE'] +7127-75947-0003-2121: ref=['YES', 'THE', 'CHARACTER', 'WHICH', 'YOUR', 'ROYAL', 'HIGHNESS', 'ASSUMED', 'IS', 'IN', 'PERFECT', 'HARMONY', 'WITH', 'YOUR', 'OWN'] +7127-75947-0003-2121: hyp=['YES', 'THE', 'CHARACTER', 'WHICH', 'YOUR', 'ROYAL', 'HIGHNESS', 'ASSUMED', 'IS', 'IN', 'PERFECT', 'HARMONY', 'WITH', 'YOUR', 'OWN'] +7127-75947-0004-2122: ref=['EXPLAIN', 'YOURSELF'] +7127-75947-0004-2122: hyp=['EXPLAIN', 'YOURSELF'] +7127-75947-0005-2123: ref=['I', 'ALLUDE', 'TO', 'THE', 'GODDESS'] +7127-75947-0005-2123: hyp=['I', 'ALLUDE', 'TO', 'THE', 'GODDESS'] +7127-75947-0006-2124: ref=['THE', 'PRINCESS', 'INQUIRED', 'NO'] +7127-75947-0006-2124: hyp=['THE', 'PRINCESS', 'INQUIRED', 'NO'] +7127-75947-0007-2125: ref=['SHE', 'THEN', 'ROSE', 'HUMMING', 'THE', 'AIR', 'TO', 'WHICH', 'SHE', 'WAS', 'PRESENTLY', 'GOING', 'TO', 'DANCE'] +7127-75947-0007-2125: hyp=['SHE', 'THEN', 'ROSE', 'HUMMING', 'THE', 'AIR', 'TO', 'WHICH', 'SHE', 'WAS', 'PRESENTLY', 'GOING', 'TO', 'DANCE'] +7127-75947-0008-2126: ref=['THE', 'ARROW', 'PIERCED', 'HIS', 'HEART', 'AND', 'WOUNDED', 'HIM', 'MORTALLY'] +7127-75947-0008-2126: hyp=['THE', 'ARROW', 'PIERCED', 'HIS', 'HEART', 'AND', 'WOUNDED', 'HIM', 'MORTALLY'] +7127-75947-0009-2127: ref=['A', 'QUARTER', 'OF', 'AN', 'HOUR', 'AFTERWARDS', 'HE', 'RETURNED', 'TO', 'THE', 'THEATER', 'BUT', 'IT', 'WILL', 'BE', 'READILY', 'BELIEVED', 'THAT', 'IT', 'WAS', 'ONLY', 'A', 'POWERFUL', 'EFFORT', 'OF', 'REASON', 'OVER', 'HIS', 'GREAT', 'EXCITEMENT', 'THAT', 'ENABLED', 'HIM', 'TO', 'GO', 'BACK', 'OR', 'PERHAPS', 'FOR', 'LOVE', 'IS', 'THUS', 'STRANGELY', 'CONSTITUTED', 'HE', 'FOUND', 'IT', 'IMPOSSIBLE', 'EVEN', 'TO', 'REMAIN', 'MUCH', 'LONGER', 'SEPARATED', 'FROM', 'THE', 'PRESENCE', 'OF', 'ONE', 'WHO', 'HAD', 'BROKEN', 'HIS', 'HEART'] +7127-75947-0009-2127: hyp=['A', 'QUARTER', 'OF', 'AN', 'HOUR', 'AFTERWARDS', 'HE', 'RETURNED', 'TO', 'THE', 'THEATRE', 'BUT', 'IT', 'WILL', 'BE', 'READILY', 'BELIEVED', 'THAT', 'IT', 'WAS', 'ONLY', 'A', 'POWERFUL', 'EFFORT', 'OF', 'REASON', 'OVER', 'HIS', 'GREAT', 'EXCITEMENT', 'THAT', 'ENABLED', 'HIM', 'TO', 'GO', 'BACK', 'OR', 'PERHAPS', 'FOR', 'LOVE', 'IS', 'THUS', 'STRANGELY', 'CONSTITUTED', 'HE', 'FOUND', 'IT', 'IMPOSSIBLE', 'EVEN', 'TO', 'REMAIN', 'MUCH', 'LONGER', 'SEPARATED', 'FROM', 'THE', 'PRESENCE', 'OF', 'ONE', 'WHO', 'HAD', 'BROKEN', 'HIS', 'HEART'] +7127-75947-0010-2128: ref=['WHEN', 'SHE', 'PERCEIVED', 'THE', 'YOUNG', 'MAN', 'SHE', 'ROSE', 'LIKE', 'A', 'WOMAN', 'SURPRISED', 'IN', 'THE', 'MIDST', 'OF', 'IDEAS', 'SHE', 'WAS', 'DESIROUS', 'OF', 'CONCEALING', 'FROM', 'HERSELF'] +7127-75947-0010-2128: hyp=['WHEN', 'SHE', 'PERCEIVED', 'THE', 'YOUNG', 'MAN', 'SHE', 'ROSE', 'LIKE', 'A', 'WOMAN', 'SURPRISED', 'IN', 'THE', 'MIDST', 'OF', 'IDEAS', 'SHE', 'WAS', 'DESIROUS', 'OF', 'CONCEALING', 'FROM', 'HERSELF'] +7127-75947-0011-2129: ref=['REMAIN', 'I', 'IMPLORE', 'YOU', 'THE', 'EVENING', 'IS', 'MOST', 'LOVELY'] +7127-75947-0011-2129: hyp=['REMAIN', 'I', 'IMPLORE', 'YOU', 'THE', 'EVENING', 'IS', 'MOST', 'LOVELY'] +7127-75947-0012-2130: ref=['INDEED', 'AH'] +7127-75947-0012-2130: hyp=['INDEED', 'AH'] +7127-75947-0013-2131: ref=['I', 'REMEMBER', 'NOW', 'AND', 'I', 'CONGRATULATE', 'MYSELF', 'DO', 'YOU', 'LOVE', 'ANY', 'ONE'] +7127-75947-0013-2131: hyp=['I', 'REMEMBER', 'NOW', 'AND', 'I', 'CONGRATULATE', 'MYSELF', 'DO', 'YOU', 'LOVE', 'ANYONE'] +7127-75947-0014-2132: ref=['FORGIVE', 'ME', 'I', 'HARDLY', 'KNOW', 'WHAT', 'I', 'AM', 'SAYING', 'A', 'THOUSAND', 'TIMES', 'FORGIVE', 'ME', 'MADAME', 'WAS', 'RIGHT', 'QUITE', 'RIGHT', 'THIS', 'BRUTAL', 'EXILE', 'HAS', 'COMPLETELY', 'TURNED', 'MY', 'BRAIN'] +7127-75947-0014-2132: hyp=['FORGIVE', 'ME', 'I', 'HARDLY', 'KNOW', 'WHAT', 'I', 'AM', 'SAYING', 'A', 'THOUSAND', 'TIMES', 'FORGIVE', 'ME', 'MADAME', 'WAS', 'RIGHT', 'QUITE', 'RIGHT', 'THIS', 'BRUTAL', 'EXILE', 'HAS', 'COMPLETELY', 'TURNED', 'MY', 'BRAIN'] +7127-75947-0015-2133: ref=['THERE', 'CANNOT', 'BE', 'A', 'DOUBT', 'HE', 'RECEIVED', 'YOU', 'KINDLY', 'FOR', 'IN', 'FACT', 'YOU', 'RETURNED', 'WITHOUT', 'HIS', 'PERMISSION'] +7127-75947-0015-2133: hyp=['THERE', 'CANNOT', 'BE', 'A', 'DOUBT', 'HE', 'RECEIVED', 'YOU', 'KINDLY', 'FOR', 'IN', 'FACT', 'YOU', 'RETURNED', 'WITHOUT', 'HIS', 'PERMISSION'] +7127-75947-0016-2134: ref=['OH', 'MADEMOISELLE', 'WHY', 'HAVE', 'I', 'NOT', 'A', 'DEVOTED', 'SISTER', 'OR', 'A', 'TRUE', 'FRIEND', 'SUCH', 'AS', 'YOURSELF'] +7127-75947-0016-2134: hyp=['OH', 'MADEMOISELLE', 'WHY', 'HAVE', 'I', 'NOT', 'A', 'DEVOTED', 'SISTER', 'OR', 'A', 'TRUE', 'FRIEND', 'SUCH', 'AS', 'YOURSELF'] +7127-75947-0017-2135: ref=['WHAT', 'ALREADY', 'HERE', 'THEY', 'SAID', 'TO', 'HER'] +7127-75947-0017-2135: hyp=['WHAT', 'ALREADY', 'HERE', 'THEY', 'SAID', 'TO', 'HER'] +7127-75947-0018-2136: ref=['I', 'HAVE', 'BEEN', 'HERE', 'THIS', 'QUARTER', 'OF', 'AN', 'HOUR', 'REPLIED', 'LA', 'VALLIERE'] +7127-75947-0018-2136: hyp=['I', 'HAVE', 'BEEN', 'HERE', 'THIS', 'QUARTER', 'OF', 'AN', 'HOUR', 'REPLIED', 'LA', 'VALLIER'] +7127-75947-0019-2137: ref=['DID', 'NOT', 'THE', 'DANCING', 'AMUSE', 'YOU', 'NO'] +7127-75947-0019-2137: hyp=['DID', 'NOT', 'THE', 'DANCING', 'AMUSE', 'YOU', 'NO'] +7127-75947-0020-2138: ref=['NO', 'MORE', 'THAN', 'THE', 'DANCING'] +7127-75947-0020-2138: hyp=['NO', 'MORE', 'THAN', 'THE', 'DANCING'] +7127-75947-0021-2139: ref=['LA', 'VALLIERE', 'IS', 'QUITE', 'A', 'POETESS', 'SAID', 'TONNAY', 'CHARENTE'] +7127-75947-0021-2139: hyp=['LA', 'VALLIER', 'IS', 'QUITE', 'A', 'POETESS', 'SAID', 'TONNICHERANT'] +7127-75947-0022-2140: ref=['I', 'AM', 'A', 'WOMAN', 'AND', 'THERE', 'ARE', 'FEW', 'LIKE', 'ME', 'WHOEVER', 'LOVES', 'ME', 'FLATTERS', 'ME', 'WHOEVER', 'FLATTERS', 'ME', 'PLEASES', 'ME', 'AND', 'WHOEVER', 'PLEASES', 'WELL', 'SAID', 'MONTALAIS', 'YOU', 'DO', 'NOT', 'FINISH'] +7127-75947-0022-2140: hyp=['I', 'AM', 'A', 'WOMAN', 'AND', 'THERE', 'ARE', 'FEW', 'LIKE', 'ME', 'WHOEVER', 'LOVES', 'ME', 'FLATTERS', 'ME', 'WHOEVER', 'FLATTERS', 'ME', 'PLEASES', 'ME', 'AND', 'WHOEVER', 'PLEASES', 'WELL', 'SAID', 'MONTALAIS', 'YOU', 'DO', 'NOT', 'FINISH'] +7127-75947-0023-2141: ref=['IT', 'IS', 'TOO', 'DIFFICULT', 'REPLIED', 'MADEMOISELLE', 'DE', 'TONNAY', 'CHARENTE', 'LAUGHING', 'LOUDLY'] +7127-75947-0023-2141: hyp=['IT', 'IS', 'TOO', 'DIFFICULT', 'REPLIED', 'MADEMOISELLE', 'DE', 'TUNNICHAVENT', 'LAUGHING', 'LOUDLY'] +7127-75947-0024-2142: ref=['LOOK', 'YONDER', 'DO', 'YOU', 'NOT', 'SEE', 'THE', 'MOON', 'SLOWLY', 'RISING', 'SILVERING', 'THE', 'TOPMOST', 'BRANCHES', 'OF', 'THE', 'CHESTNUTS', 'AND', 'THE', 'OAKS'] +7127-75947-0024-2142: hyp=['LOOK', 'YONDER', 'DO', 'YOU', 'NOT', 'SEE', 'THE', 'MOON', 'SLOWLY', 'RISING', 'SILVERING', 'THE', 'TOPMOST', 'BRANCHES', 'OF', 'THE', 'CHESTNUTS', 'AND', 'THE', 'OAKS'] +7127-75947-0025-2143: ref=['EXQUISITE', 'SOFT', 'TURF', 'OF', 'THE', 'WOODS', 'THE', 'HAPPINESS', 'WHICH', 'YOUR', 'FRIENDSHIP', 'CONFERS', 'UPON', 'ME'] +7127-75947-0025-2143: hyp=['EXQUISITE', 'SOFT', 'TURF', 'OF', 'THE', 'WOODS', 'THE', 'HAPPINESS', 'WHICH', 'YOUR', 'FRIENDSHIP', 'CONFERS', 'UPON', 'ME'] +7127-75947-0026-2144: ref=['WELL', 'SAID', 'MADEMOISELLE', 'DE', 'TONNAY', 'CHARENTE', 'I', 'ALSO', 'THINK', 'A', 'GOOD', 'DEAL', 'BUT', 'I', 'TAKE', 'CARE'] +7127-75947-0026-2144: hyp=['WELL', 'SAID', 'MADEMOISELLE', 'DE', 'TOURNACHEANT', 'I', 'ALSO', 'THINK', 'A', 'GOOD', 'DEAL', 'BUT', 'I', 'TAKE', 'CARE'] +7127-75947-0027-2145: ref=['TO', 'SAY', 'NOTHING', 'SAID', 'MONTALAIS', 'SO', 'THAT', 'WHEN', 'MADEMOISELLE', 'DE', 'TONNAY', 'CHARENTE', 'THINKS', 'ATHENAIS', 'IS', 'THE', 'ONLY', 'ONE', 'WHO', 'KNOWS', 'IT'] +7127-75947-0027-2145: hyp=['TO', 'SAY', 'NOTHING', 'SAID', 'MONTALAIS', 'SO', 'THAT', 'WHEN', 'MADEMOISELLE', 'DE', 'TONAICHERANT', 'THINKS', 'ETHONAY', 'IS', 'THE', 'ONLY', 'ONE', 'WHO', 'KNOWS', 'IT'] +7127-75947-0028-2146: ref=['QUICK', 'QUICK', 'THEN', 'AMONG', 'THE', 'HIGH', 'REED', 'GRASS', 'SAID', 'MONTALAIS', 'STOOP', 'ATHENAIS', 'YOU', 'ARE', 'SO', 'TALL'] +7127-75947-0028-2146: hyp=['QUICK', 'QUICK', 'THEN', 'AMONG', 'THE', 'HIGH', 'REED', 'GRASS', 'SAID', 'MONTALAIS', 'STOOP', 'ETHINAE', 'YOU', 'ARE', 'SO', 'TALL'] +7127-75947-0029-2147: ref=['THE', 'YOUNG', 'GIRLS', 'HAD', 'INDEED', 'MADE', 'THEMSELVES', 'SMALL', 'INDEED', 'INVISIBLE'] +7127-75947-0029-2147: hyp=['THE', 'YOUNG', 'GIRLS', 'HAD', 'INDEED', 'MADE', 'THEMSELVES', 'SMALL', 'INDEED', 'INVISIBLE'] +7127-75947-0030-2148: ref=['SHE', 'WAS', 'HERE', 'JUST', 'NOW', 'SAID', 'THE', 'COUNT'] +7127-75947-0030-2148: hyp=['SHE', 'WAS', 'HERE', 'JUST', 'NOW', 'SAID', 'THE', 'COUNT'] +7127-75947-0031-2149: ref=['YOU', 'ARE', 'POSITIVE', 'THEN'] +7127-75947-0031-2149: hyp=['YOU', 'ARE', 'POSITIVE', 'THEN'] +7127-75947-0032-2150: ref=['YES', 'BUT', 'PERHAPS', 'I', 'FRIGHTENED', 'HER', 'IN', 'WHAT', 'WAY'] +7127-75947-0032-2150: hyp=['YES', 'BUT', 'PERHAPS', 'I', 'FRIGHTENED', 'HER', 'IN', 'WHAT', 'WAY'] +7127-75947-0033-2151: ref=['HOW', 'IS', 'IT', 'LA', 'VALLIERE', 'SAID', 'MADEMOISELLE', 'DE', 'TONNAY', 'CHARENTE', 'THAT', 'THE', 'VICOMTE', 'DE', 'BRAGELONNE', 'SPOKE', 'OF', 'YOU', 'AS', 'LOUISE'] +7127-75947-0033-2151: hyp=['HOW', 'IS', 'IT', 'LA', 'VALLIER', 'SAID', 'MADEMOISELLE', 'DE', 'TENNICHANT', 'THAT', 'THE', 'VICOMTE', 'DE', 'BRAGELONE', 'SPOKE', 'OF', 'YOU', 'AS', 'LOUISE'] +7127-75947-0034-2152: ref=['IT', 'SEEMS', 'THE', 'KING', 'WILL', 'NOT', 'CONSENT', 'TO', 'IT'] +7127-75947-0034-2152: hyp=['IT', 'SEEMS', 'THE', 'KING', 'WILL', 'NOT', 'CONSENT', 'TO', 'IT'] +7127-75947-0035-2153: ref=['GOOD', 'GRACIOUS', 'HAS', 'THE', 'KING', 'ANY', 'RIGHT', 'TO', 'INTERFERE', 'IN', 'MATTERS', 'OF', 'THAT', 'KIND'] +7127-75947-0035-2153: hyp=['GOOD', 'GRACIOUS', 'HAS', 'THE', 'KING', 'ANY', 'RIGHT', 'TO', 'INTERFERE', 'IN', 'MATTERS', 'OF', 'THAT', 'KIND'] +7127-75947-0036-2154: ref=['I', 'GIVE', 'MY', 'CONSENT'] +7127-75947-0036-2154: hyp=['I', 'GIVE', 'MY', 'CONSENT'] +7127-75947-0037-2155: ref=['OH', 'I', 'AM', 'SPEAKING', 'SERIOUSLY', 'REPLIED', 'MONTALAIS', 'AND', 'MY', 'OPINION', 'IN', 'THIS', 'CASE', 'IS', 'QUITE', 'AS', 'GOOD', 'AS', 'THE', "KING'S", 'I', 'SUPPOSE', 'IS', 'IT', 'NOT', 'LOUISE'] +7127-75947-0037-2155: hyp=['OH', 'I', 'AM', 'SPEAKING', 'SERIOUSLY', 'REPLIED', 'MONTALAIS', 'AND', 'MY', 'OPINION', 'IN', 'THIS', 'CASE', 'IS', 'QUITE', 'AS', 'GOOD', 'AS', 'THE', "KING'S", 'I', 'SUPPOSE', 'IS', 'IT', 'NOT', 'LOUISE'] +7127-75947-0038-2156: ref=['LET', 'US', 'RUN', 'THEN', 'SAID', 'ALL', 'THREE', 'AND', 'GRACEFULLY', 'LIFTING', 'UP', 'THE', 'LONG', 'SKIRTS', 'OF', 'THEIR', 'SILK', 'DRESSES', 'THEY', 'LIGHTLY', 'RAN', 'ACROSS', 'THE', 'OPEN', 'SPACE', 'BETWEEN', 'THE', 'LAKE', 'AND', 'THE', 'THICKEST', 'COVERT', 'OF', 'THE', 'PARK'] +7127-75947-0038-2156: hyp=['LET', 'US', 'RUN', 'THEN', 'SAID', 'ALL', 'THREE', 'AND', 'GRACEFULLY', 'LIFTING', 'UP', 'THE', 'LONG', 'SKIRTS', 'OF', 'THEIR', 'SILK', 'DRESSES', 'THEY', 'LIGHTLY', 'RAN', 'ACROSS', 'THE', 'OPEN', 'SPACE', 'BETWEEN', 'THE', 'LAKE', 'AND', 'THE', 'THICKEST', 'COVERT', 'OF', 'THE', 'PARK'] +7127-75947-0039-2157: ref=['IN', 'FACT', 'THE', 'SOUND', 'OF', "MADAME'S", 'AND', 'THE', "QUEEN'S", 'CARRIAGES', 'COULD', 'BE', 'HEARD', 'IN', 'THE', 'DISTANCE', 'UPON', 'THE', 'HARD', 'DRY', 'GROUND', 'OF', 'THE', 'ROADS', 'FOLLOWED', 'BY', 'THE', 'MOUNTED', 'CAVALIERS'] +7127-75947-0039-2157: hyp=['IN', 'FACT', 'THE', 'SOUND', 'OF', "MADAME'S", 'AND', 'THE', "QUEEN'S", 'CARRIAGES', 'COULD', 'BE', 'HEARD', 'IN', 'THE', 'DISTANCE', 'UPON', 'THE', 'HARD', 'DRY', 'GROUND', 'OF', 'THE', 'ROADS', 'FOLLOWED', 'BY', 'THE', 'MOUNTAIN', 'CAVALIERS'] +7127-75947-0040-2158: ref=['IN', 'THIS', 'WAY', 'THE', 'FETE', 'OF', 'THE', 'WHOLE', 'COURT', 'WAS', 'A', 'FETE', 'ALSO', 'FOR', 'THE', 'MYSTERIOUS', 'INHABITANTS', 'OF', 'THE', 'FOREST', 'FOR', 'CERTAINLY', 'THE', 'DEER', 'IN', 'THE', 'BRAKE', 'THE', 'PHEASANT', 'ON', 'THE', 'BRANCH', 'THE', 'FOX', 'IN', 'ITS', 'HOLE', 'WERE', 'ALL', 'LISTENING'] +7127-75947-0040-2158: hyp=['IN', 'THIS', 'WAY', 'THE', 'FETE', 'OF', 'THE', 'WHOLE', 'COURT', 'WAS', 'A', 'FETE', 'ALSO', 'FOR', 'THE', 'MYSTERIOUS', 'INHABITANTS', 'OF', 'THE', 'FOREST', 'FOR', 'CERTAINLY', 'THE', 'DEER', 'IN', 'THE', 'BRAKE', 'THE', 'PHEASANT', 'ON', 'THE', 'BRANCH', 'THE', 'FOX', 'IN', 'ITS', 'HOLE', 'WERE', 'ALL', 'LISTENING'] +7176-88083-0000-2159: ref=['ALL', 'ABOUT', 'HIM', 'WAS', 'A', 'TUMULT', 'OF', 'BRIGHT', 'AND', 'BROKEN', 'COLOR', 'SCATTERED', 'IN', 'BROAD', 'SPLASHES'] +7176-88083-0000-2159: hyp=['ALL', 'ABOUT', 'HIM', 'WAS', 'A', 'TUMULT', 'OF', 'BRIGHT', 'AND', 'BROKEN', 'COLOR', 'SCATTERED', 'IN', 'BROAD', 'SPLASHES'] +7176-88083-0001-2160: ref=['THE', 'MERGANSER', 'HAD', 'A', 'CRESTED', 'HEAD', 'OF', 'IRIDESCENT', 'GREEN', 'BLACK', 'A', 'BROAD', 'COLLAR', 'OF', 'LUSTROUS', 'WHITE', 'BLACK', 'BACK', 'BLACK', 'AND', 'WHITE', 'WINGS', 'WHITE', 'BELLY', 'SIDES', 'FINELY', 'PENCILLED', 'IN', 'BLACK', 'AND', 'WHITE', 'AND', 'A', 'BREAST', 'OF', 'RICH', 'CHESTNUT', 'RED', 'STREAKED', 'WITH', 'BLACK'] +7176-88083-0001-2160: hyp=['THE', 'MERGANSER', 'HAD', 'A', 'CRESTED', 'HEAD', 'OF', 'IRIDESCENT', 'GREEN', 'BLACK', 'A', 'BROAD', 'COLLAR', 'OF', 'LUSTROUS', 'WHITE', 'BLACK', 'BACK', 'BLACK', 'AND', 'WHITE', 'WINGS', 'WHITE', 'BELLY', 'SIDES', 'FINELY', 'PENCILLED', 'IN', 'BLACK', 'AND', 'WHITE', 'AND', 'A', 'BREAST', 'OF', 'RICH', 'CHESTNUT', 'RED', 'STREAKED', 'WITH', 'BLACK'] +7176-88083-0002-2161: ref=['HIS', 'FEET', 'WERE', 'RED', 'HIS', 'LONG', 'NARROW', 'BEAK', 'WITH', 'ITS', 'SAW', 'TOOTHED', 'EDGES', 'AND', 'SHARP', 'HOOKED', 'TIP', 'WAS', 'BRIGHT', 'RED'] +7176-88083-0002-2161: hyp=['HIS', 'FEET', 'WERE', 'RED', 'HIS', 'LONG', 'NARROW', 'BEAK', 'WITH', 'ITS', 'SAW', 'TOOTHED', 'EDGES', 'AND', 'SHARP', 'HOOKED', 'TIP', 'WAS', 'BRIGHT', 'RED'] +7176-88083-0003-2162: ref=['BUT', 'HERE', 'HE', 'WAS', 'AT', 'A', 'TERRIBLE', 'DISADVANTAGE', 'AS', 'COMPARED', 'WITH', 'THE', 'OWLS', 'HAWKS', 'AND', 'EAGLES', 'HE', 'HAD', 'NO', 'RENDING', 'CLAWS'] +7176-88083-0003-2162: hyp=['BUT', 'HERE', 'HE', 'WAS', 'AT', 'A', 'TERRIBLE', 'DISADVANTAGE', 'AS', 'COMPARED', 'WITH', 'THE', 'OWLS', 'HAWKS', 'AND', 'EAGLES', 'HE', 'HAD', 'NO', 'RENDING', 'CLAWS'] +7176-88083-0004-2163: ref=['BUT', 'SUDDENLY', 'STRAIGHT', 'AND', 'SWIFT', 'AS', 'A', 'DIVING', 'CORMORANT', 'HE', 'SHOT', 'DOWN', 'INTO', 'THE', 'TORRENT', 'AND', 'DISAPPEARED', 'BENEATH', 'THE', 'SURFACE'] +7176-88083-0004-2163: hyp=['BUT', 'SUDDENLY', 'STRAIGHT', 'AND', 'SWIFT', 'AS', 'A', 'DIVING', 'CORMERANT', 'HE', 'SHOT', 'DOWN', 'INTO', 'THE', 'TORRENT', 'AND', 'DISAPPEARED', 'BENEATH', 'THE', 'SURFACE'] +7176-88083-0005-2164: ref=['ONCE', 'FAIRLY', 'A', 'WING', 'HOWEVER', 'HE', 'WHEELED', 'AND', 'MADE', 'BACK', 'HURRIEDLY', 'FOR', 'HIS', 'PERCH'] +7176-88083-0005-2164: hyp=['ONCE', 'FAIRLY', 'AWING', 'HOWEVER', 'HE', 'WHEELED', 'AND', 'MADE', 'BACK', 'HURRIEDLY', 'FOR', 'HIS', 'PERCH'] +7176-88083-0006-2165: ref=['IT', 'MIGHT', 'HAVE', 'SEEMED', 'THAT', 'A', 'TROUT', 'OF', 'THIS', 'SIZE', 'WAS', 'A', 'FAIRLY', 'SUBSTANTIAL', 'MEAL'] +7176-88083-0006-2165: hyp=['IT', 'MIGHT', 'HAVE', 'SEEMED', 'THAT', 'A', 'TROUT', 'OF', 'THIS', 'SIZE', 'WAS', 'A', 'FAIRLY', 'SUBSTANTIAL', 'MEAL'] +7176-88083-0007-2166: ref=['BUT', 'SUCH', 'WAS', 'HIS', 'KEENNESS', 'THAT', 'EVEN', 'WHILE', 'THE', 'WIDE', 'FLUKES', 'OF', 'HIS', 'ENGORGED', 'VICTIM', 'WERE', 'STILL', 'STICKING', 'OUT', 'AT', 'THE', 'CORNERS', 'OF', 'HIS', 'BEAK', 'HIS', 'FIERCE', 'RED', 'EYES', 'WERE', 'ONCE', 'MORE', 'PEERING', 'DOWNWARD', 'INTO', 'THE', 'TORRENT', 'IN', 'SEARCH', 'OF', 'FRESH', 'PREY'] +7176-88083-0007-2166: hyp=['BUT', 'SUCH', 'WAS', 'HIS', 'KEENNESS', 'THAT', 'EVEN', 'WHILE', 'THE', 'WIDE', 'FLUKES', 'OF', 'HIS', 'ENGORGED', 'VICTIM', 'WERE', 'STILL', 'STICKING', 'OUT', 'AT', 'THE', 'CORNERS', 'OF', 'HIS', 'BEAK', 'HIS', 'FIERCE', 'RED', 'EYES', 'WERE', 'ONCE', 'MORE', 'PEERING', 'DOWNWARD', 'INTO', 'THE', 'TORRENT', 'IN', 'SEARCH', 'OF', 'FRESH', 'PREY'] +7176-88083-0008-2167: ref=['IN', 'DESPAIR', 'HE', 'HURLED', 'HIMSELF', 'DOWNWARD', 'TOO', 'SOON'] +7176-88083-0008-2167: hyp=['IN', 'DESPAIR', 'HE', 'HURLED', 'HIMSELF', 'DOWNWARD', 'TOO', 'SOON'] +7176-88083-0009-2168: ref=['THE', 'GREAT', 'HAWK', 'FOLLOWED', 'HURRIEDLY', 'TO', 'RETRIEVE', 'HIS', 'PREY', 'FROM', 'THE', 'GROUND'] +7176-88083-0009-2168: hyp=['THE', 'GREAT', 'HAWK', 'FOLLOWED', 'HURRIEDLY', 'TO', 'RETRIEVE', 'HIS', 'PREY', 'FROM', 'THE', 'GROUND'] +7176-88083-0010-2169: ref=['THE', 'CAT', 'GROWLED', 'SOFTLY', 'PICKED', 'UP', 'THE', 'PRIZE', 'IN', 'HER', 'JAWS', 'AND', 'TROTTED', 'INTO', 'THE', 'BUSHES', 'TO', 'DEVOUR', 'IT'] +7176-88083-0010-2169: hyp=['THE', 'CAT', 'GROWLED', 'SOFTLY', 'PICKED', 'UP', 'THE', 'PRIZE', 'IN', 'HER', 'JAWS', 'AND', 'TROTTED', 'INTO', 'THE', 'BUSHES', 'TO', 'DEVOUR', 'IT'] +7176-88083-0011-2170: ref=['IN', 'FACT', 'HE', 'HAD', 'JUST', 'FINISHED', 'IT', 'THE', 'LAST', 'OF', 'THE', "TROUT'S", 'TAIL', 'HAD', 'JUST', 'VANISHED', 'WITH', 'A', 'SPASM', 'DOWN', 'HIS', 'STRAINED', 'GULLET', 'WHEN', 'THE', 'BAFFLED', 'HAWK', 'CAUGHT', 'SIGHT', 'OF', 'HIM', 'AND', 'SWOOPED'] +7176-88083-0011-2170: hyp=['IN', 'FACT', 'HE', 'HAD', 'JUST', 'FINISHED', 'IT', 'THE', 'LAST', 'OF', 'THE', "TROUT'S", 'TAIL', 'HAD', 'JUST', 'VANISHED', 'WITH', 'A', 'SPASM', 'DOWN', 'HIS', 'STRAINED', 'GULLET', 'WHEN', 'THE', 'BAFFLED', 'HAWK', 'CAUGHT', 'SIGHT', 'OF', 'HIM', 'AND', 'SWOOPED'] +7176-88083-0012-2171: ref=['THE', 'HAWK', 'ALIGHTED', 'ON', 'THE', 'DEAD', 'BRANCH', 'AND', 'SAT', 'UPRIGHT', 'MOTIONLESS', 'AS', 'IF', 'SURPRISED'] +7176-88083-0012-2171: hyp=['THE', 'HAWK', 'ALIGHTED', 'ON', 'THE', 'DEAD', 'BRANCH', 'AND', 'SAT', 'UPRIGHT', 'MOTIONLESS', 'AS', 'IF', 'SURPRISED'] +7176-88083-0013-2172: ref=['LIKE', 'HIS', 'UNFORTUNATE', 'LITTLE', 'COUSIN', 'THE', 'TEAL', 'HE', 'TOO', 'HAD', 'FELT', 'THE', 'FEAR', 'OF', 'DEATH', 'SMITTEN', 'INTO', 'HIS', 'HEART', 'AND', 'WAS', 'HEADING', 'DESPERATELY', 'FOR', 'THE', 'REFUGE', 'OF', 'SOME', 'DARK', 'OVERHANGING', 'BANK', 'DEEP', 'FRINGED', 'WITH', 'WEEDS', 'WHERE', 'THE', 'DREADFUL', 'EYE', 'OF', 'THE', 'HAWK', 'SHOULD', 'NOT', 'DISCERN', 'HIM'] +7176-88083-0013-2172: hyp=['LIKE', 'HIS', 'UNFORTUNATE', 'LITTLE', 'COUSIN', 'THE', 'TEAL', 'HE', 'TOO', 'HAD', 'FELT', 'THE', 'FEAR', 'OF', 'DEATH', 'SMITTEN', 'INTO', 'HIS', 'HEART', 'AND', 'WAS', 'HEADING', 'DESPERATELY', 'FOR', 'THE', 'REFUGE', 'OF', 'SOME', 'DARK', 'OVERHANGING', 'BANK', 'DEEP', 'FRINGED', 'WITH', 'WEEDS', 'WHERE', 'THE', 'DREADFUL', 'EYE', 'OF', 'THE', 'HAWK', 'SHOULD', 'NOT', 'DISCERN', 'HIM'] +7176-88083-0014-2173: ref=['THE', 'HAWK', 'SAT', 'UPON', 'THE', 'BRANCH', 'AND', 'WATCHED', 'HIS', 'QUARRY', 'SWIMMING', 'BENEATH', 'THE', 'SURFACE'] +7176-88083-0014-2173: hyp=['THE', 'HAWK', 'SAT', 'UPON', 'THE', 'BRANCH', 'AND', 'WATCHED', 'HIS', 'QUARRY', 'SWIMMING', 'BENEATH', 'THE', 'SURFACE'] +7176-88083-0015-2174: ref=['ALMOST', 'INSTANTLY', 'HE', 'WAS', 'FORCED', 'TO', 'THE', 'TOP'] +7176-88083-0015-2174: hyp=['ALMOST', 'INSTANTLY', 'HE', 'WAS', 'FORCED', 'TO', 'THE', 'TOP'] +7176-88083-0016-2175: ref=['STRAIGHTWAY', 'THE', 'HAWK', 'GLIDED', 'FROM', 'HIS', 'PERCH', 'AND', 'DARTED', 'AFTER', 'HIM'] +7176-88083-0016-2175: hyp=['STRAIGHTWAY', 'THE', 'HAWK', 'GLIDED', 'FROM', 'HIS', 'PERCH', 'AND', 'DARTED', 'AFTER', 'HIM'] +7176-88083-0017-2176: ref=['BUT', 'AT', 'THIS', 'POINT', 'IN', 'THE', 'RAPIDS', 'IT', 'WAS', 'IMPOSSIBLE', 'FOR', 'HIM', 'TO', 'STAY', 'DOWN'] +7176-88083-0017-2176: hyp=['BUT', 'AT', 'THIS', 'POINT', 'IN', 'THE', 'RAPIDS', 'IT', 'WAS', 'IMPOSSIBLE', 'FOR', 'HIM', 'TO', 'STAY', 'DOWN'] +7176-88083-0018-2177: ref=['BUT', 'THIS', 'FREQUENTER', 'OF', 'THE', 'HEIGHTS', 'OF', 'AIR', 'FOR', 'ALL', 'HIS', 'SAVAGE', 'VALOR', 'WAS', 'TROUBLED', 'AT', 'THE', 'LEAPING', 'WAVES', 'AND', 'THE', 'TOSSING', 'FOAM', 'OF', 'THESE', 'MAD', 'RAPIDS', 'HE', 'DID', 'NOT', 'UNDERSTAND', 'THEM'] +7176-88083-0018-2177: hyp=['BUT', 'THIS', 'FREQUENTER', 'OF', 'THE', 'HEIGHTS', 'OF', 'AIR', 'FOR', 'ALL', 'HIS', 'SAVAGE', 'VALOR', 'WAS', 'TROUBLED', 'AT', 'THE', 'LEAPING', 'WAVES', 'AND', 'THE', 'TOSSING', 'FOAM', 'OF', 'THESE', 'MAD', 'RAPIDS', 'HE', 'DID', 'NOT', 'UNDERSTAND', 'THEM'] +7176-88083-0019-2178: ref=['AS', 'HE', 'FLEW', 'HIS', 'DOWN', 'REACHING', 'CLUTCHING', 'TALONS', 'WERE', 'NOT', 'HALF', 'A', 'YARD', 'ABOVE', 'THE', "FUGITIVE'S", 'HEAD'] +7176-88083-0019-2178: hyp=['AS', 'HE', 'FLEW', 'HIS', 'DOWN', 'REACHING', 'CLUTCHING', 'TALONS', 'WERE', 'NOT', 'HALF', 'A', 'YARD', 'ABOVE', 'THE', "FUGITIVE'S", 'HEAD'] +7176-88083-0020-2179: ref=['WHERE', 'THE', 'WAVES', 'FOR', 'AN', 'INSTANT', 'SANK', 'THEY', 'CAME', 'CLOSER', 'BUT', 'NOT', 'QUITE', 'WITHIN', 'GRASPING', 'REACH'] +7176-88083-0020-2179: hyp=['WHERE', 'THE', 'WAVES', 'FOR', 'AN', 'INSTANT', 'SANK', 'THEY', 'CAME', 'CLOSER', 'BUT', 'NOT', 'QUITE', 'WITHIN', 'GRASPING', 'REACH'] +7176-88083-0021-2180: ref=['BUT', 'AS', 'BEFORE', 'THE', 'LEAPING', 'WAVES', 'OF', 'THE', 'RAPIDS', 'WERE', 'TOO', 'MUCH', 'FOR', 'HIS', 'PURSUER', 'AND', 'HE', 'WAS', 'ABLE', 'TO', 'FLAP', 'HIS', 'WAY', 'ONWARD', 'IN', 'A', 'CLOUD', 'OF', 'FOAM', 'WHILE', 'DOOM', 'HUNG', 'LOW', 'ABOVE', 'HIS', 'HEAD', 'YET', 'HESITATED', 'TO', 'STRIKE'] +7176-88083-0021-2180: hyp=['BUT', 'AS', 'BEFORE', 'THE', 'LEAPING', 'WAVES', 'OF', 'THE', 'RAPIDS', 'WERE', 'TOO', 'MUCH', 'FOR', 'HIS', 'PURSUER', 'AND', 'HE', 'WAS', 'ABLE', 'TO', 'FLAP', 'HIS', 'WAY', 'ONWARD', 'IN', 'A', 'CLOUD', 'OF', 'FOAM', 'WHILE', 'DOOM', 'HUNG', 'LOW', 'ABOVE', 'HIS', 'HEAD', 'YET', 'HESITATED', 'TO', 'STRIKE'] +7176-88083-0022-2181: ref=['THE', 'HAWK', 'EMBITTERED', 'BY', 'THE', 'LOSS', 'OF', 'HIS', 'FIRST', 'QUARRY', 'HAD', 'BECOME', 'AS', 'DOGGED', 'IN', 'PURSUIT', 'AS', 'A', 'WEASEL', 'NOT', 'TO', 'BE', 'SHAKEN', 'OFF', 'OR', 'EVADED', 'OR', 'DECEIVED'] +7176-88083-0022-2181: hyp=['THE', 'HAWK', 'EMBITTERED', 'BY', 'THE', 'LOSS', 'OF', 'HIS', 'FIRST', 'QUARRY', 'HAD', 'BECOME', 'AS', 'DOGGED', 'IN', 'PURSUIT', 'AS', 'A', 'WEASEL', 'NOT', 'TO', 'BE', 'SHAKEN', 'OFF', 'OR', 'EVADED', 'OR', 'DECEIVED'] +7176-88083-0023-2182: ref=['HE', 'HAD', 'A', 'LOT', 'OF', 'LINE', 'OUT', 'AND', 'THE', 'PLACE', 'WAS', 'NONE', 'TOO', 'FREE', 'FOR', 'A', 'LONG', 'CAST', 'BUT', 'HE', 'WAS', 'IMPATIENT', 'TO', 'DROP', 'HIS', 'FLIES', 'AGAIN', 'ON', 'THE', 'SPOT', 'WHERE', 'THE', 'BIG', 'FISH', 'WAS', 'FEEDING'] +7176-88083-0023-2182: hyp=['HE', 'HAD', 'A', 'LOT', 'OF', 'LINE', 'OUT', 'AND', 'THE', 'PLACE', 'WAS', 'NONE', 'TOO', 'FREE', 'FOR', 'A', 'LONG', 'CAST', 'BUT', 'HE', 'WAS', 'IMPATIENT', 'TO', 'DROP', 'HIS', 'FLIES', 'AGAIN', 'ON', 'THE', 'SPOT', 'WHERE', 'THE', 'BIG', 'FISH', 'WAS', 'FEEDING'] +7176-88083-0024-2183: ref=['THE', 'LAST', 'DROP', 'FLY', 'AS', 'LUCK', 'WOULD', 'HAVE', 'IT', 'CAUGHT', 'JUST', 'IN', 'THE', 'CORNER', 'OF', 'THE', "HAWK'S", 'ANGRILY', 'OPEN', 'BEAK', 'HOOKING', 'ITSELF', 'FIRMLY'] +7176-88083-0024-2183: hyp=['THE', 'LAST', 'DROP', 'FLY', 'AS', 'LUCK', 'WOULD', 'HAVE', 'IT', 'GOT', 'JUST', 'IN', 'THE', 'CORNER', 'OF', 'THE', "HAWK'S", 'ANGRILY', 'OPEN', 'BEAK', 'HOOKING', 'ITSELF', 'FIRMLY'] +7176-88083-0025-2184: ref=['AT', 'THE', 'SUDDEN', 'SHARP', 'STING', 'OF', 'IT', 'THE', 'GREAT', 'BIRD', 'TURNED', 'HIS', 'HEAD', 'AND', 'NOTICED', 'FOR', 'THE', 'FIRST', 'TIME', 'THE', 'FISHERMAN', 'STANDING', 'ON', 'THE', 'BANK'] +7176-88083-0025-2184: hyp=['AT', 'THE', 'SUDDEN', 'SHARP', 'STING', 'OF', 'IT', 'THE', 'GREAT', 'BIRD', 'TURNED', 'HIS', 'HEAD', 'AND', 'NOTICED', 'FOR', 'THE', 'FIRST', 'TIME', 'THE', 'FISHERMAN', 'STANDING', 'ON', 'THE', 'BANK'] +7176-88083-0026-2185: ref=['THE', 'DRAG', 'UPON', 'HIS', 'BEAK', 'AND', 'THE', 'LIGHT', 'CHECK', 'UPON', 'HIS', 'WINGS', 'WERE', 'INEXPLICABLE', 'TO', 'HIM', 'AND', 'APPALLING'] +7176-88083-0026-2185: hyp=['THE', 'DRAG', 'UPON', 'HIS', 'BEAK', 'AND', 'THE', 'LIGHT', 'CHECK', 'UPON', 'HIS', 'WINGS', 'WERE', 'INEXPLICABLE', 'TO', 'HIM', 'AND', 'APPALLING'] +7176-88083-0027-2186: ref=['THEN', 'THE', 'LEADER', 'PARTED', 'FROM', 'THE', 'LINE'] +7176-88083-0027-2186: hyp=['THEN', 'THE', 'LEADER', 'PARTED', 'FROM', 'THE', 'LINE'] +7176-92135-0000-2187: ref=['HE', 'IS', 'A', 'WELCOME', 'FIGURE', 'AT', 'THE', 'GARDEN', 'PARTIES', 'OF', 'THE', 'ELECT', 'WHO', 'ARE', 'ALWAYS', 'READY', 'TO', 'ENCOURAGE', 'HIM', 'BY', 'ACCEPTING', 'FREE', 'SEATS', 'FOR', 'HIS', 'PLAY', 'ACTOR', 'MANAGERS', 'NOD', 'TO', 'HIM', 'EDITORS', 'ALLOW', 'HIM', 'TO', 'CONTRIBUTE', 'WITHOUT', 'CHARGE', 'TO', 'A', 'SYMPOSIUM', 'ON', 'THE', 'PRICE', 'OF', 'GOLF', 'BALLS'] +7176-92135-0000-2187: hyp=['HE', 'IS', 'A', 'WELCOME', 'FIGURE', 'AT', 'THE', 'GARDEN', 'PARTIES', 'OF', 'THE', 'ELECT', 'WHO', 'ARE', 'ALWAYS', 'READY', 'TO', 'ENCOURAGE', 'HIM', 'BY', 'ACCEPTING', 'FREE', 'SEATS', 'FOR', 'HIS', 'PLAY', 'ACTOR', 'MANAGERS', 'NOD', 'TO', 'HIM', 'EDITORS', 'ALLOW', 'HIM', 'TO', 'CONTRIBUTE', 'WITHOUT', 'CHARGE', 'TO', 'A', 'SYMPOSIUM', 'ON', 'THE', 'PRICE', 'OF', 'GOLF', 'BALLS'] +7176-92135-0001-2188: ref=['IN', 'SHORT', 'HE', 'BECOMES', 'A', 'PROMINENT', 'FIGURE', 'IN', 'LONDON', 'SOCIETY', 'AND', 'IF', 'HE', 'IS', 'NOT', 'CAREFUL', 'SOMEBODY', 'WILL', 'SAY', 'SO'] +7176-92135-0001-2188: hyp=['IN', 'SHORT', 'HE', 'BECOMES', 'A', 'PROMINENT', 'FIGURE', 'IN', 'LONDON', 'SOCIETY', 'AND', 'IF', 'HE', 'IS', 'NOT', 'CAREFUL', 'SOMEBODY', 'WILL', 'SAY', 'SO'] +7176-92135-0002-2189: ref=['BUT', 'EVEN', 'THE', 'UNSUCCESSFUL', 'DRAMATIST', 'HAS', 'HIS', 'MOMENTS'] +7176-92135-0002-2189: hyp=['BUT', 'EVEN', 'THE', 'UNSUCCESSFUL', 'DRAMATIST', 'HAS', 'HIS', 'MOMENTS'] +7176-92135-0003-2190: ref=['YOUR', 'PLAY', 'MUST', 'BE', 'NOT', 'MERELY', 'A', 'GOOD', 'PLAY', 'BUT', 'A', 'SUCCESSFUL', 'ONE'] +7176-92135-0003-2190: hyp=['YOUR', 'PLAY', 'MUST', 'BE', 'NOT', 'MERELY', 'A', 'GOOD', 'PLAY', 'BUT', 'A', 'SUCCESSFUL', 'ONE'] +7176-92135-0004-2191: ref=['FRANKLY', 'I', 'CANNOT', 'ALWAYS', 'SAY'] +7176-92135-0004-2191: hyp=['FRANKLY', 'I', 'CANNOT', 'ALWAYS', 'SAY'] +7176-92135-0005-2192: ref=['BUT', 'SUPPOSE', 'YOU', 'SAID', "I'M", 'FOND', 'OF', 'WRITING', 'MY', 'PEOPLE', 'ALWAYS', 'SAY', 'MY', 'LETTERS', 'HOME', 'ARE', 'GOOD', 'ENOUGH', 'FOR', 'PUNCH'] +7176-92135-0005-2192: hyp=['BUT', 'SUPPOSE', 'YOU', 'SAID', "I'M", 'FOND', 'OF', 'WRITING', 'MY', 'PEOPLE', 'ALWAYS', 'SAY', 'MY', 'LETTERS', 'HOME', 'ARE', 'GOOD', 'ENOUGH', 'FOR', 'PUNCH'] +7176-92135-0006-2193: ref=["I'VE", 'GOT', 'A', 'LITTLE', 'IDEA', 'FOR', 'A', 'PLAY', 'ABOUT', 'A', 'MAN', 'AND', 'A', 'WOMAN', 'AND', 'ANOTHER', 'WOMAN', 'AND', 'BUT', 'PERHAPS', "I'D", 'BETTER', 'KEEP', 'THE', 'PLOT', 'A', 'SECRET', 'FOR', 'THE', 'MOMENT'] +7176-92135-0006-2193: hyp=["I'VE", 'GOT', 'A', 'LITTLE', 'IDEA', 'FOR', 'A', 'PLAY', 'ABOUT', 'A', 'MAN', 'AND', 'A', 'WOMAN', 'AND', 'ANOTHER', 'WOMAN', 'AND', 'BUT', 'PERHAPS', "I'D", 'BETTER', 'KEEP', 'THE', 'PLOT', 'A', 'SECRET', 'FOR', 'THE', 'MOMENT'] +7176-92135-0007-2194: ref=['ANYHOW', "IT'S", 'JOLLY', 'EXCITING', 'AND', 'I', 'CAN', 'DO', 'THE', 'DIALOGUE', 'ALL', 'RIGHT'] +7176-92135-0007-2194: hyp=['ANYHOW', "IT'S", 'JOLLY', 'EXCITING', 'AND', 'I', 'CAN', 'DO', 'THE', 'DIALOGUE', 'ALL', 'RIGHT'] +7176-92135-0008-2195: ref=['LEND', 'ME', 'YOUR', 'EAR', 'FOR', 'TEN', 'MINUTES', 'AND', 'YOU', 'SHALL', 'LEARN', 'JUST', 'WHAT', 'STAGECRAFT', 'IS'] +7176-92135-0008-2195: hyp=['LEND', 'ME', 'YOUR', 'EAR', 'FOR', 'TEN', 'MINUTES', 'AND', 'YOU', 'SHALL', 'LEARN', 'JUST', 'WHAT', 'STAGE', 'CRAFT', 'IS'] +7176-92135-0009-2196: ref=['AND', 'I', 'SHOULD', 'BEGIN', 'WITH', 'A', 'SHORT', 'HOMILY', 'ON', 'SOLILOQUY'] +7176-92135-0009-2196: hyp=['AND', 'I', 'SHOULD', 'BEGIN', 'WITH', 'A', 'SHORT', 'HOMILY', 'ON', 'SOLILOQUY'] +7176-92135-0010-2197: ref=['HAM', 'TO', 'BE', 'OR', 'NOT', 'TO', 'BE'] +7176-92135-0010-2197: hyp=['HIM', 'TO', 'BE', 'OR', 'NOT', 'TO', 'BE'] +7176-92135-0011-2198: ref=['NOW', 'THE', 'OBJECT', 'OF', 'THIS', 'SOLILOQUY', 'IS', 'PLAIN'] +7176-92135-0011-2198: hyp=['NOW', 'THE', 'OBJECT', 'OF', 'THIS', 'SOLILOQUY', 'IS', 'PLAIN'] +7176-92135-0012-2199: ref=['INDEED', 'IRRESOLUTION', 'BEING', 'THE', 'KEYNOTE', 'OF', "HAMLET'S", 'SOLILOQUY', 'A', 'CLEVER', 'PLAYER', 'COULD', 'TO', 'SOME', 'EXTENT', 'INDICATE', 'THE', 'WHOLE', 'THIRTY', 'LINES', 'BY', 'A', 'SILENT', 'WORKING', 'OF', 'THE', 'JAW', 'BUT', 'AT', 'THE', 'SAME', 'TIME', 'IT', 'WOULD', 'BE', 'IDLE', 'TO', 'DENY', 'THAT', 'HE', 'WOULD', 'MISS', 'THE', 'FINER', 'SHADES', 'OF', 'THE', "DRAMATIST'S", 'MEANING'] +7176-92135-0012-2199: hyp=['INDEED', 'IRRESOLUTION', 'BEING', 'THE', 'KEYNOTE', 'OF', "HAMLET'S", 'SOLILOQUY', 'A', 'CLEVER', 'PLAYER', 'COULD', 'TO', 'SOME', 'EXTENT', 'INDICATE', 'THE', 'WHOLE', 'THIRTY', 'LINES', 'BY', 'A', 'SILENT', 'WORKING', 'OF', 'THE', 'JAW', 'BUT', 'AT', 'THE', 'SAME', 'TIME', 'IT', 'WOULD', 'BE', 'IDLE', 'TO', 'DENY', 'THAT', 'HE', 'WOULD', 'MISS', 'THE', 'FINER', 'SHADES', 'OF', 'THE', "DRAMATIST'S", 'MEANING'] +7176-92135-0013-2200: ref=['WE', 'MODERNS', 'HOWEVER', 'SEE', 'THE', 'ABSURDITY', 'OF', 'IT'] +7176-92135-0013-2200: hyp=['WE', 'MODERNS', 'HOWEVER', 'SEE', 'THE', 'ABSURDITY', 'OF', 'IT'] +7176-92135-0014-2201: ref=['IF', 'IT', 'BE', 'GRANTED', 'FIRST', 'THAT', 'THE', 'THOUGHTS', 'OF', 'A', 'CERTAIN', 'CHARACTER', 'SHOULD', 'BE', 'KNOWN', 'TO', 'THE', 'AUDIENCE', 'AND', 'SECONDLY', 'THAT', 'SOLILOQUY', 'OR', 'THE', 'HABIT', 'OF', 'THINKING', 'ALOUD', 'IS', 'IN', 'OPPOSITION', 'TO', 'MODERN', 'STAGE', 'TECHNIQUE', 'HOW', 'SHALL', 'A', 'SOLILOQUY', 'BE', 'AVOIDED', 'WITHOUT', 'DAMAGE', 'TO', 'THE', 'PLAY'] +7176-92135-0014-2201: hyp=['IF', 'IT', 'BE', 'GRANTED', 'FIRST', 'THAT', 'THE', 'THOUGHTS', 'OF', 'A', 'CERTAIN', 'CHARACTER', 'SHOULD', 'BE', 'KNOWN', 'TO', 'THE', 'AUDIENCE', 'AND', 'SECONDLY', 'THAT', 'SOLILOQUY', 'OR', 'THE', 'HABIT', 'OF', 'THINKING', 'ALOUD', 'IS', 'IN', 'OPPOSITION', 'TO', 'MODERN', 'STAGE', 'TECHNIQUE', 'HOW', 'SHALL', 'A', 'SOLILOQUY', 'BE', 'AVOIDED', 'WITHOUT', 'DAMAGE', 'TO', 'THE', 'PLAY'] +7176-92135-0015-2202: ref=['AND', 'SO', 'ON', 'TILL', 'YOU', 'GET', 'TO', 'THE', 'END', 'WHEN', 'OPHELIA', 'MIGHT', 'SAY', 'AH', 'YES', 'OR', 'SOMETHING', 'NON', 'COMMITTAL', 'OF', 'THAT', 'SORT'] +7176-92135-0015-2202: hyp=['AND', 'SO', 'ON', 'TILL', 'YOU', 'GET', 'TO', 'THE', 'END', 'WHEN', 'OPHELIA', 'MIGHT', 'SAY', 'AH', 'YES', 'OR', 'SOMETHING', 'NON', 'COMMITTAL', 'OF', 'THAT', 'SORT'] +7176-92135-0016-2203: ref=['THIS', 'WOULD', 'BE', 'AN', 'EASY', 'WAY', 'OF', 'DOING', 'IT', 'BUT', 'IT', 'WOULD', 'NOT', 'BE', 'THE', 'BEST', 'WAY', 'FOR', 'THE', 'REASON', 'THAT', 'IT', 'IS', 'TOO', 'EASY', 'TO', 'CALL', 'ATTENTION', 'TO', 'ITSELF'] +7176-92135-0016-2203: hyp=['THIS', 'WOULD', 'BE', 'AN', 'EASY', 'WAY', 'OF', 'DOING', 'IT', 'BUT', 'IT', 'WOULD', 'NOT', 'BE', 'THE', 'BEST', 'WAY', 'FOR', 'THE', 'REASON', 'THAT', 'IT', 'IS', 'TOO', 'EASY', 'TO', 'CALL', 'ATTENTION', 'TO', 'ITSELF'] +7176-92135-0017-2204: ref=['IN', 'THE', 'OLD', 'BADLY', 'MADE', 'PLAY', 'IT', 'WAS', 'FREQUENTLY', 'NECESSARY', 'FOR', 'ONE', 'OF', 'THE', 'CHARACTERS', 'TO', 'TAKE', 'THE', 'AUDIENCE', 'INTO', 'HIS', 'CONFIDENCE'] +7176-92135-0017-2204: hyp=['IN', 'THE', 'OLD', 'BADLY', 'MADE', 'PLAY', 'IT', 'WAS', 'FREQUENTLY', 'NECESSARY', 'FOR', 'ONE', 'OF', 'THE', 'CHARACTERS', 'TO', 'TAKE', 'THE', 'AUDIENCE', 'INTO', 'HIS', 'CONFIDENCE'] +7176-92135-0018-2205: ref=['IN', 'THE', 'MODERN', 'WELL', 'CONSTRUCTED', 'PLAY', 'HE', 'SIMPLY', 'RINGS', 'UP', 'AN', 'IMAGINARY', 'CONFEDERATE', 'AND', 'TELLS', 'HIM', 'WHAT', 'HE', 'IS', 'GOING', 'TO', 'DO', 'COULD', 'ANYTHING', 'BE', 'MORE', 'NATURAL'] +7176-92135-0018-2205: hyp=['IN', 'THE', 'MODERN', 'WELL', 'CONSTRUCTED', 'PLAY', 'HE', 'SIMPLY', 'RINGS', 'UP', 'AN', 'IMAGINARY', 'CONFEDERATE', 'AND', 'TELLS', 'HIM', 'WHAT', 'HE', 'IS', 'GOING', 'TO', 'DO', 'COULD', 'ANYTHING', 'BE', 'MORE', 'NATURAL'] +7176-92135-0019-2206: ref=['I', 'WANT', 'DOUBLE', 'NINE', 'HAL', 'LO'] +7176-92135-0019-2206: hyp=['I', 'WANT', 'DOUBLE', 'NINE', 'HALLOA'] +7176-92135-0020-2207: ref=['DOUBLE', 'NINE', 'TWO', 'THREE', 'ELSINORE', 'DOUBLE', 'NINE', 'YES', 'HALLO', 'IS', 'THAT', 'YOU', 'HORATIO', 'HAMLET', 'SPEAKING'] +7176-92135-0020-2207: hyp=['DOUBLE', 'NINE', 'TWO', 'THREE', 'ELZINOR', 'DOUBLE', 'NOT', 'YES', 'HULLO', 'IS', 'THAT', 'YOU', 'HORATIO', 'HAMLET', 'SPEAKING'] +7176-92135-0021-2208: ref=['I', 'SAY', "I'VE", 'BEEN', 'WONDERING', 'ABOUT', 'THIS', 'BUSINESS'] +7176-92135-0021-2208: hyp=['I', 'SAY', "I'VE", 'BEEN', 'WONDERING', 'ABOUT', 'THIS', 'BUSINESS'] +7176-92135-0022-2209: ref=['TO', 'BE', 'OR', 'NOT', 'TO', 'BE', 'THAT', 'IS', 'THE', 'QUESTION', 'WHETHER', 'TIS', 'NOBLER', 'IN', 'THE', 'MIND', 'TO', 'SUFFER', 'THE', 'SLINGS', 'AND', 'ARROWS', 'WHAT', 'NO', 'HAMLET', 'SPEAKING'] +7176-92135-0022-2209: hyp=['TO', 'BE', 'OR', 'NOT', 'TO', 'BE', 'THAT', 'IS', 'THE', 'QUESTION', 'WHETHER', 'TIS', 'NOBLER', 'IN', 'THE', 'MIND', 'TO', 'SUFFER', 'THE', 'SLINGS', 'AND', 'ARROWS', 'WHAT', 'NO', 'HAMLET', 'SPEAKING'] +7176-92135-0023-2210: ref=['YOU', 'GAVE', 'ME', 'DOUBLE', 'FIVE', 'I', 'WANT', 'DOUBLE', 'NINE', 'HALLO', 'IS', 'THAT', 'YOU', 'HORATIO', 'HAMLET', 'SPEAKING'] +7176-92135-0023-2210: hyp=['YOU', 'GAVE', 'ME', 'DOUBLE', 'FIVE', 'I', 'WANT', 'DOUBLE', 'NINE', 'HALLO', 'IS', 'THAT', 'YOU', 'HORATIO', 'HAMLET', 'SPEAKING'] +7176-92135-0024-2211: ref=['TO', 'BE', 'OR', 'NOT', 'TO', 'BE', 'THAT', 'IS', 'THE', 'QUESTION', 'WHETHER', 'TIS', 'NOBLER'] +7176-92135-0024-2211: hyp=['TO', 'BE', 'OR', 'NOT', 'TO', 'BE', 'THAT', 'IS', 'THE', 'QUESTION', 'WHETHER', 'TIS', 'NOBLER'] +7176-92135-0025-2212: ref=['IT', 'IS', 'TO', 'LET', 'HAMLET', 'IF', 'THAT', 'HAPPEN', 'TO', 'BE', 'THE', 'NAME', 'OF', 'YOUR', 'CHARACTER', 'ENTER', 'WITH', 'A', 'SMALL', 'DOG', 'PET', 'FALCON', 'MONGOOSE', 'TAME', 'BEAR', 'OR', 'WHATEVER', 'ANIMAL', 'IS', 'MOST', 'IN', 'KEEPING', 'WITH', 'THE', 'PART', 'AND', 'CONFIDE', 'IN', 'THIS', 'ANIMAL', 'SUCH', 'SORROWS', 'HOPES', 'OR', 'SECRET', 'HISTORY', 'AS', 'THE', 'AUDIENCE', 'HAS', 'GOT', 'TO', 'KNOW'] +7176-92135-0025-2212: hyp=['IT', 'IS', 'TO', 'LET', 'HAMLET', 'IF', 'THAT', 'HAPPEN', 'TO', 'BE', 'THE', 'NAME', 'OF', 'YOUR', 'CHARACTER', 'ENTER', 'WITH', 'A', 'SMALL', 'DOG', 'PET', 'FALCON', 'MONGOOSE', 'TAME', 'BEAR', 'OR', 'WHATEVER', 'ANIMAL', 'IS', 'MOST', 'IN', 'KEEPING', 'WITH', 'THE', 'PART', 'AND', 'CONFIDE', 'IN', 'THIS', 'ANIMAL', 'SUCH', 'SORROWS', 'HOPES', 'OR', 'SECRET', 'HISTORY', 'AS', 'THE', 'AUDIENCE', 'HAS', 'GOT', 'TO', 'KNOW'] +7176-92135-0026-2213: ref=['ENTER', 'HAMLET', 'WITH', 'HIS', 'FAVOURITE', 'BOAR', 'HOUND'] +7176-92135-0026-2213: hyp=['ENTER', 'HAMLET', 'WITH', 'HIS', 'FAVOURITE', 'BOARHOUND'] +7176-92135-0027-2214: ref=['LADY', 'LARKSPUR', 'STARTS', 'SUDDENLY', 'AND', 'TURNS', 'TOWARDS', 'HIM'] +7176-92135-0027-2214: hyp=['LADY', 'LARCHBUR', 'START', 'SUDDENLY', 'AND', 'TURNED', 'TOWARDS', 'HIM'] +7176-92135-0028-2215: ref=['LARKSPUR', 'BIT', 'ME', 'AGAIN', 'THIS', 'MORNING', 'FOR', 'THE', 'THIRD', 'TIME'] +7176-92135-0028-2215: hyp=['LARKS', 'FOR', 'BIT', 'ME', 'AGAIN', 'THIS', 'MORNING', 'FOR', 'THE', 'THIRD', 'TIME'] +7176-92135-0029-2216: ref=['I', 'WANT', 'TO', 'GET', 'AWAY', 'FROM', 'IT', 'ALL', 'SWOONS'] +7176-92135-0029-2216: hyp=['I', 'WANT', 'TO', 'GET', 'AWAY', 'FROM', 'IT', 'ALL', 'SWOON'] +7176-92135-0030-2217: ref=['ENTER', 'LORD', 'ARTHUR', 'FLUFFINOSE'] +7176-92135-0030-2217: hyp=['ENTERED', 'LORD', 'ARTHUR', "FLOPENNO'S"] +7176-92135-0031-2218: ref=['AND', 'THERE', 'YOU', 'ARE', 'YOU', 'WILL', 'OF', 'COURSE', 'APPRECIATE', 'THAT', 'THE', 'UNFINISHED', 'SENTENCES', 'NOT', 'ONLY', 'SAVE', 'TIME', 'BUT', 'ALSO', 'MAKE', 'THE', 'MANOEUVRING', 'VERY', 'MUCH', 'MORE', 'NATURAL'] +7176-92135-0031-2218: hyp=['AND', 'THERE', 'YOU', 'ARE', 'YOU', 'WILL', 'OF', 'COURSE', 'APPRECIATE', 'THAT', 'THE', 'UNFINISHED', 'SENTENCES', 'NOT', 'ONLY', 'SAVE', 'TIME', 'BUT', 'ALSO', 'MAKE', 'THE', 'MANOEUVRING', 'VERY', 'MUCH', 'MORE', 'NATURAL'] +7176-92135-0032-2219: ref=['HOW', 'YOU', 'MAY', 'BE', 'WONDERING', 'ARE', 'YOU', 'TO', 'BEGIN', 'YOUR', 'MASTERPIECE'] +7176-92135-0032-2219: hyp=['HOW', 'YOU', 'MAY', 'BE', 'WONDERING', 'ARE', 'YOU', 'TO', 'BEGIN', 'YOUR', 'MASTERPIECE'] +7176-92135-0033-2220: ref=['RELAPSES', 'INTO', 'SILENCE', 'FOR', 'THE', 'REST', 'OF', 'THE', 'EVENING'] +7176-92135-0033-2220: hyp=['RELAPSES', 'INTO', 'SILENCE', 'FOR', 'THE', 'REST', 'OF', 'THE', 'EVENING'] +7176-92135-0034-2221: ref=['THE', 'DUCHESS', 'OF', 'SOUTHBRIDGE', 'TO', 'LORD', 'REGGIE', 'OH', 'REGGIE', 'WHAT', 'DID', 'YOU', 'SAY'] +7176-92135-0034-2221: hyp=['THE', 'DUCHESS', 'OF', 'SOUTHBRIDGE', 'TO', 'LORD', 'REGGIE', 'OH', 'REGGIE', 'WHAT', 'DID', 'YOU', 'SAY'] +7176-92135-0035-2222: ref=['THEN', 'LORD', 'TUPPENY', 'WELL', 'WHAT', 'ABOUT', 'AUCTION'] +7176-92135-0035-2222: hyp=['THEN', 'LORD', 'TUPPENNY', 'WELL', 'WHAT', 'ABOUT', 'AUCTION'] +7176-92135-0036-2223: ref=['THE', 'CROWD', 'DRIFTS', 'OFF', 'LEAVING', 'THE', 'HERO', 'AND', 'HEROINE', 'ALONE', 'IN', 'THE', 'MIDDLE', 'OF', 'THE', 'STAGE', 'AND', 'THEN', 'YOU', 'CAN', 'BEGIN'] +7176-92135-0036-2223: hyp=['THE', 'CROWD', 'DRIFTS', 'OFF', 'LEAVING', 'THE', 'HERO', 'AND', 'HEROINE', 'ALONE', 'IN', 'THE', 'MIDDLE', 'OF', 'THE', 'STAGE', 'AND', 'THEN', 'YOU', 'CAN', 'BEGIN'] +7176-92135-0037-2224: ref=['THEN', 'IS', 'THE', 'TIME', 'TO', 'INTRODUCE', 'A', 'MEAL', 'ON', 'THE', 'STAGE'] +7176-92135-0037-2224: hyp=['THEN', 'IS', 'THE', 'TIME', 'TO', 'INTRODUCE', 'A', 'MEAL', 'ON', 'THE', 'STAGE'] +7176-92135-0038-2225: ref=['A', 'STAGE', 'MEAL', 'IS', 'POPULAR', 'BECAUSE', 'IT', 'PROVES', 'TO', 'THE', 'AUDIENCE', 'THAT', 'THE', 'ACTORS', 'EVEN', 'WHEN', 'CALLED', 'CHARLES', 'HAWTREY', 'OR', 'OWEN', 'NARES', 'ARE', 'REAL', 'PEOPLE', 'JUST', 'LIKE', 'YOU', 'AND', 'ME'] +7176-92135-0038-2225: hyp=['A', 'STAGE', 'MEAL', 'IS', 'POPULAR', 'BECAUSE', 'IT', 'PROVES', 'TO', 'THE', 'AUDIENCE', 'THAT', 'THE', 'ACTORS', 'EVEN', 'WHEN', 'CALLED', 'CHARLES', 'HOULTREE', 'OR', 'OWEN', 'NAYERS', 'ARE', 'REAL', 'PEOPLE', 'JUST', 'LIKE', 'YOU', 'AND', 'ME'] +7176-92135-0039-2226: ref=['TEA', 'PLEASE', 'MATTHEWS', 'BUTLER', 'IMPASSIVELY'] +7176-92135-0039-2226: hyp=['TEA', 'PLEASE', 'MATTHEWS', 'BUTLER', 'IMPASSIVELY'] +7176-92135-0040-2227: ref=['HOSTESS', 'REPLACES', 'LUMP', 'AND', 'INCLINES', 'EMPTY', 'TEAPOT', 'OVER', 'TRAY', 'FOR', 'A', 'MOMENT', 'THEN', 'HANDS', 'HIM', 'A', 'CUP', 'PAINTED', 'BROWN', 'INSIDE', 'THUS', 'DECEIVING', 'THE', 'GENTLEMAN', 'WITH', 'THE', 'TELESCOPE', 'IN', 'THE', 'UPPER', 'CIRCLE'] +7176-92135-0040-2227: hyp=['HOSTESS', 'REPLACES', 'LUMP', 'AND', 'INCLINES', 'EMPTY', 'TEAPOT', 'OVER', 'TRAY', 'FOR', 'A', 'MOMENT', 'THEN', 'HANDS', 'HIM', 'A', 'CUP', 'PAINTED', 'BROWN', 'INSIDE', 'THUS', 'DECEIVING', 'THE', 'GENTLEMAN', 'WITH', 'THE', 'TELESCOPE', 'IN', 'THE', 'UPPER', 'CIRCLE'] +7176-92135-0041-2228: ref=['RE', 'ENTER', 'BUTLER', 'AND', 'THREE', 'FOOTMEN', 'WHO', 'REMOVE', 'THE', 'TEA', 'THINGS', 'HOSTESS', 'TO', 'GUEST'] +7176-92135-0041-2228: hyp=['REENTRE', 'BUTLER', 'AND', 'THREE', 'FOOTMEN', 'WHO', 'MOVE', 'THE', 'TEA', 'THINGS', 'HOSTESS', 'TWO', 'GUESTS'] +7176-92135-0042-2229: ref=['IN', 'NOVELS', 'THE', 'HERO', 'HAS', 'OFTEN', 'PUSHED', 'HIS', 'MEALS', 'AWAY', 'UNTASTED', 'BUT', 'NO', 'STAGE', 'HERO', 'WOULD', 'DO', 'ANYTHING', 'SO', 'UNNATURAL', 'AS', 'THIS'] +7176-92135-0042-2229: hyp=['IN', 'NOVELS', 'THE', 'HERO', 'HAS', 'OFTEN', 'PUSHED', 'HIS', 'MEALS', 'AWAY', 'UNTASTED', 'BUT', 'NO', 'STEED', 'HERO', 'WOULD', 'DO', 'ANYTHING', 'SO', 'UNNATURAL', 'AS', 'THIS'] +7176-92135-0043-2230: ref=['TWO', 'BITES', 'ARE', 'MADE', 'AND', 'THE', 'BREAD', 'IS', 'CRUMBLED', 'WITH', 'AN', 'AIR', 'OF', 'GREAT', 'EAGERNESS', 'INDEED', 'ONE', 'FEELS', 'THAT', 'IN', 'REAL', 'LIFE', 'THE', 'GUEST', 'WOULD', 'CLUTCH', 'HOLD', 'OF', 'THE', 'FOOTMAN', 'AND', 'SAY', 'HALF', 'A', 'MO', 'OLD', 'CHAP', 'I', "HAVEN'T", 'NEARLY', 'FINISHED', 'BUT', 'THE', 'ACTOR', 'IS', 'BETTER', 'SCHOOLED', 'THAN', 'THIS'] +7176-92135-0043-2230: hyp=['TWO', 'BITES', 'ARE', 'MADE', 'AND', 'THE', 'BREAD', 'IS', 'CRUMBLED', 'WITH', 'AN', 'AIR', 'OF', 'GREAT', 'EAGERNESS', 'INDEED', 'ONE', 'FEELS', 'THAT', 'IN', 'REAL', 'LIFE', 'THE', 'GUEST', 'WOULD', 'CLUTCH', 'HOLD', 'OF', 'THE', 'FOOTMAN', 'AND', 'SAY', 'HALF', 'A', 'MO', 'OLD', 'CHAP', 'I', "HAVEN'T", 'NEARLY', 'FINISHED', 'BUT', 'THE', 'ACTOR', 'IS', 'BETTER', 'SCHOOLED', 'THAN', 'THIS'] +7176-92135-0044-2231: ref=['BUT', 'IT', 'IS', 'THE', 'CIGARETTE', 'WHICH', 'CHIEFLY', 'HAS', 'BROUGHT', 'THE', 'MODERN', 'DRAMA', 'TO', 'ITS', 'PRESENT', 'STATE', 'OF', 'PERFECTION'] +7176-92135-0044-2231: hyp=['BUT', 'IT', 'IS', 'A', 'CIGARETTE', 'WHICH', 'CHIEFLY', 'HAS', 'BROUGHT', 'THE', 'MODERN', 'DRAMA', 'TO', 'ITS', 'PRESENT', 'STATE', 'OF', 'PERFECTION'] +7176-92135-0045-2232: ref=['LORD', 'JOHN', 'TAKING', 'OUT', 'GOLD', 'CIGARETTE', 'CASE', 'FROM', 'HIS', 'LEFT', 'HAND', 'UPPER', 'WAISTCOAT', 'POCKET'] +7176-92135-0045-2232: hyp=['LORD', 'JOHN', 'TAKING', 'A', 'GOLD', 'CIGARETTE', 'CASE', 'FROM', 'HIS', 'LEFT', 'HAND', 'UPPER', 'WAISTCOAT', 'POCKET'] +7729-102255-0000-2233: ref=['THE', 'BOGUS', 'LEGISLATURE', 'NUMBERED', 'THIRTY', 'SIX', 'MEMBERS'] +7729-102255-0000-2233: hyp=['THE', 'BOGUS', 'LEGISLATURE', 'NUMBERED', 'THIRTY', 'SIX', 'MEMBERS'] +7729-102255-0001-2234: ref=['THIS', 'WAS', 'AT', 'THE', 'MARCH', 'ELECTION', 'EIGHTEEN', 'FIFTY', 'FIVE'] +7729-102255-0001-2234: hyp=['THIS', 'WAS', 'AT', 'THE', 'MARCH', 'ELECTION', 'EIGHTEEN', 'FIFTY', 'FIVE'] +7729-102255-0002-2235: ref=['THAT', "SUMMER'S", 'EMIGRATION', 'HOWEVER', 'BEING', 'MAINLY', 'FROM', 'THE', 'FREE', 'STATES', 'GREATLY', 'CHANGED', 'THE', 'RELATIVE', 'STRENGTH', 'OF', 'THE', 'TWO', 'PARTIES'] +7729-102255-0002-2235: hyp=['THAT', "SUMMER'S", 'EMIGRATION', 'HOWEVER', 'BEING', 'MAINLY', 'FROM', 'THE', 'FREE', 'STATES', 'GREATLY', 'CHANGED', 'THE', 'RELATIVE', 'STRENGTH', 'OF', 'THE', 'TWO', 'PARTIES'] +7729-102255-0003-2236: ref=['FOR', 'GENERAL', 'SERVICE', 'THEREFORE', 'REQUIRING', 'NO', 'SPECIAL', 'EFFORT', 'THE', 'NUMERICAL', 'STRENGTH', 'OF', 'THE', 'FACTIONS', 'WAS', 'ABOUT', 'EQUAL', 'WHILE', 'ON', 'EXTRAORDINARY', 'OCCASIONS', 'THE', 'TWO', 'THOUSAND', 'BORDER', 'RUFFIAN', 'RESERVE', 'LYING', 'A', 'LITTLE', 'FARTHER', 'BACK', 'FROM', 'THE', 'STATE', 'LINE', 'COULD', 'AT', 'ANY', 'TIME', 'EASILY', 'TURN', 'THE', 'SCALE'] +7729-102255-0003-2236: hyp=['FOR', 'GENERAL', 'SERVICE', 'THEREFORE', 'REQUIRING', 'NO', 'SPECIAL', 'EFFORT', 'THE', 'NUMERICAL', 'STRENGTH', 'OF', 'THE', 'FACTIONS', 'WAS', 'ABOUT', 'EQUAL', 'WHILE', 'ON', 'EXTRAORDINARY', 'OCCASIONS', 'THE', 'TWO', 'THOUSAND', 'BORDER', 'RUFFIAN', 'RESERVE', 'LYING', 'A', 'LITTLE', 'FARTHER', 'BACK', 'FROM', 'THE', 'STATE', 'LINE', 'COULD', 'AT', 'ANY', 'TIME', 'EASILY', 'TURN', 'THE', 'SCALE'] +7729-102255-0004-2237: ref=['THE', 'FREE', 'STATE', 'MEN', 'HAD', 'ONLY', 'THEIR', 'CONVICTIONS', 'THEIR', 'INTELLIGENCE', 'THEIR', 'COURAGE', 'AND', 'THE', 'MORAL', 'SUPPORT', 'OF', 'THE', 'NORTH', 'THE', 'CONSPIRACY', 'HAD', 'ITS', 'SECRET', 'COMBINATION', 'THE', 'TERRITORIAL', 'OFFICIALS', 'THE', 'LEGISLATURE', 'THE', 'BOGUS', 'LAWS', 'THE', 'COURTS', 'THE', 'MILITIA', 'OFFICERS', 'THE', 'PRESIDENT', 'AND', 'THE', 'ARMY'] +7729-102255-0004-2237: hyp=['THE', 'FREE', 'STATE', 'MEN', 'HAD', 'ONLY', 'THEIR', 'CONVICTIONS', 'THEIR', 'INTELLIGENCE', 'THEIR', 'COURAGE', 'AND', 'THE', 'MORAL', 'SUPPORT', 'OF', 'THE', 'NORTH', 'THE', 'CONSPIRACY', 'HAD', 'ITS', 'SECRET', 'COMBINATION', 'THE', 'TERRITORIAL', 'OFFICIALS', 'THE', 'LEGISLATURE', 'THE', 'BOGUS', 'LAWS', 'THE', 'COURTS', 'THE', 'MILITIA', 'OFFICERS', 'THE', 'PRESIDENT', 'AND', 'THE', 'ARMY'] +7729-102255-0005-2238: ref=['THIS', 'WAS', 'A', 'FORMIDABLE', 'ARRAY', 'OF', 'ADVANTAGES', 'SLAVERY', 'WAS', 'PLAYING', 'WITH', 'LOADED', 'DICE'] +7729-102255-0005-2238: hyp=['THIS', 'WAS', 'A', 'FORMIDABLE', 'ARRAY', 'OF', 'ADVANTAGES', 'SLAVERY', 'WAS', 'PLAYING', 'WITH', 'LOADED', 'DICE'] +7729-102255-0006-2239: ref=['COMING', 'BY', 'WAY', 'OF', 'THE', 'MISSOURI', 'RIVER', 'TOWNS', 'HE', 'FELL', 'FIRST', 'AMONG', 'BORDER', 'RUFFIAN', 'COMPANIONSHIP', 'AND', 'INFLUENCES', 'AND', 'PERHAPS', 'HAVING', 'HIS', 'INCLINATIONS', 'ALREADY', 'MOLDED', 'BY', 'HIS', 'WASHINGTON', 'INSTRUCTIONS', 'HIS', 'EARLY', 'IMPRESSIONS', 'WERE', 'DECIDEDLY', 'ADVERSE', 'TO', 'THE', 'FREE', 'STATE', 'CAUSE'] +7729-102255-0006-2239: hyp=['COMMON', 'BY', 'WAY', 'OF', 'THE', 'MISSOURI', 'RIVER', 'TOWNS', 'HE', 'FELL', 'FIRST', 'AMONG', 'BORDER', 'RUFFIAN', 'COMPANIONSHIP', 'AND', 'INFLUENCES', 'AND', 'PERHAPS', 'HAVING', 'HIS', 'INCLINATIONS', 'ALREADY', 'MOLDED', 'BY', 'HIS', 'WASHINGTON', 'INSTRUCTIONS', 'HIS', 'EARLY', 'IMPRESSIONS', 'WERE', 'DECIDEDLY', 'ADVERSE', 'TO', 'THE', 'FREE', 'STATE', 'CAUSE'] +7729-102255-0007-2240: ref=['HIS', 'RECEPTION', 'SPEECH', 'AT', 'WESTPORT', 'IN', 'WHICH', 'HE', 'MAINTAINED', 'THE', 'LEGALITY', 'OF', 'THE', 'LEGISLATURE', 'AND', 'HIS', 'DETERMINATION', 'TO', 'ENFORCE', 'THEIR', 'LAWS', 'DELIGHTED', 'HIS', 'PRO', 'SLAVERY', 'AUDITORS'] +7729-102255-0007-2240: hyp=['HIS', 'RECEPTION', 'SPEECH', 'AT', 'WESTPORT', 'IN', 'WHICH', 'HE', 'MAINTAINED', 'THE', 'LEGALITY', 'OF', 'THE', 'LEGISLATURE', 'AND', 'HIS', 'DETERMINATION', 'TO', 'ENFORCE', 'THEIR', 'LAWS', 'DELIGHTED', 'HIS', 'PRO', 'SLAVERY', 'AUDITORS'] +7729-102255-0008-2241: ref=['ALL', 'THE', 'TERRITORIAL', 'DIGNITARIES', 'WERE', 'PRESENT', 'GOVERNOR', 'SHANNON', 'PRESIDED', 'JOHN', 'CALHOUN', 'THE', 'SURVEYOR', 'GENERAL', 'MADE', 'THE', 'PRINCIPAL', 'SPEECH', 'A', 'DENUNCIATION', 'OF', 'THE', 'ABOLITIONISTS', 'SUPPORTING', 'THE', 'TOPEKA', 'MOVEMENT', 'CHIEF', 'JUSTICE', 'LECOMPTE', 'DIGNIFIED', 'THE', 'OCCASION', 'WITH', 'APPROVING', 'REMARKS'] +7729-102255-0008-2241: hyp=['ALL', 'THE', 'TERRITORIAL', 'DIGNITARIES', 'WERE', 'PRESENT', 'GOVERNOR', 'SHANNON', 'PRESIDED', 'JOHN', 'CALHOUN', 'THE', 'SURVEYOR', 'GENERAL', 'MADE', 'THE', 'PRINCIPAL', 'SPEECH', 'A', 'DENUNCIATION', 'OF', 'THE', 'ABOLITIONIST', 'SUPPORTING', 'THE', 'TOPEKA', 'MOVEMENT', 'CHIEF', 'JUSTICE', 'LEC', 'COMTE', 'DIGNIFIED', 'THE', 'OCCASION', 'WITH', 'APPROVING', 'REMARKS'] +7729-102255-0009-2242: ref=['ALL', 'DISSENT', 'ALL', 'NON', 'COMPLIANCE', 'ALL', 'HESITATION', 'ALL', 'MERE', 'SILENCE', 'EVEN', 'WERE', 'IN', 'THEIR', 'STRONGHOLD', 'TOWNS', 'LIKE', 'LEAVENWORTH', 'BRANDED', 'AS', 'ABOLITIONISM', 'DECLARED', 'TO', 'BE', 'HOSTILITY', 'TO', 'THE', 'PUBLIC', 'WELFARE', 'AND', 'PUNISHED', 'WITH', 'PROSCRIPTION', 'PERSONAL', 'VIOLENCE', 'EXPULSION', 'AND', 'FREQUENTLY', 'DEATH'] +7729-102255-0009-2242: hyp=['ALL', 'DISSENT', 'ALL', 'NON', 'COMPLIANCE', 'ALL', 'HESITATION', 'ALL', 'MERE', 'SILENCE', 'EVEN', 'WERE', 'IN', 'THEIR', 'STRONGHOLD', 'TOWNS', 'LIKE', 'LEAVENWORTH', 'BRANDED', 'AS', 'ABOLITIONISM', 'DECLARED', 'TO', 'BE', 'HOSTILITY', 'TO', 'THE', 'PUBLIC', 'WELFARE', 'AND', 'PUNISHED', 'WITH', 'PROSCRIPTION', 'PERSONAL', 'VIOLENCE', 'EXPULSION', 'AND', 'FREQUENTLY', 'DEATH'] +7729-102255-0010-2243: ref=['OF', 'THE', 'LYNCHINGS', 'THE', 'MOBS', 'AND', 'THE', 'MURDERS', 'IT', 'WOULD', 'BE', 'IMPOSSIBLE', 'EXCEPT', 'IN', 'A', 'VERY', 'EXTENDED', 'WORK', 'TO', 'NOTE', 'THE', 'FREQUENT', 'AND', 'ATROCIOUS', 'DETAILS'] +7729-102255-0010-2243: hyp=['OF', 'THE', 'LYNCHINGS', 'THE', 'MOBS', 'AND', 'THE', 'MURDERS', 'IT', 'WOULD', 'BE', 'IMPOSSIBLE', 'EXCEPT', 'IN', 'A', 'VERY', 'EXTENDED', 'WORK', 'TO', 'NOTE', 'THE', 'FREQUENT', 'AND', 'ATROCIOUS', 'DETAILS'] +7729-102255-0011-2244: ref=['THE', 'PRESENT', 'CHAPTERS', 'CAN', 'ONLY', 'TOUCH', 'UPON', 'THE', 'MORE', 'SALIENT', 'MOVEMENTS', 'OF', 'THE', 'CIVIL', 'WAR', 'IN', 'KANSAS', 'WHICH', 'HAPPILY', 'WERE', 'NOT', 'SANGUINARY', 'IF', 'HOWEVER', 'THE', 'INDIVIDUAL', 'AND', 'MORE', 'ISOLATED', 'CASES', 'OF', 'BLOODSHED', 'COULD', 'BE', 'DESCRIBED', 'THEY', 'WOULD', 'SHOW', 'A', 'STARTLING', 'AGGREGATE', 'OF', 'BARBARITY', 'AND', 'LOSS', 'OF', 'LIFE', 'FOR', "OPINION'S", 'SAKE'] +7729-102255-0011-2244: hyp=['THE', 'PRESENT', 'CHAPTERS', 'CAN', 'ONLY', 'TOUCH', 'UPON', 'THE', 'MORE', 'SALIENT', 'MOVEMENTS', 'OF', 'THE', 'CIVIL', 'WAR', 'IN', 'KANSAS', 'WHICH', 'HAPPILY', 'ARE', 'NOT', 'SANGUINARY', 'IF', 'HOWEVER', 'THE', 'INDIVIDUAL', 'AND', 'MORE', 'ISOLATED', 'CASES', 'OF', 'BLOODSHED', 'COULD', 'BE', 'DESCRIBED', 'THEY', 'WOULD', 'SHOW', 'A', 'STARTLING', 'AGGREGATE', 'OF', 'BARBARITY', 'AND', 'LOSS', 'OF', 'LIFE', 'FOR', "OPINION'S", 'SAKE'] +7729-102255-0012-2245: ref=['SEVERAL', 'HUNDRED', 'FREE', 'STATE', 'MEN', 'PROMPTLY', 'RESPONDED', 'TO', 'THE', 'SUMMONS'] +7729-102255-0012-2245: hyp=['SEVERAL', 'HUNDRED', 'FREE', 'STATE', 'MEN', 'PROMPTLY', 'RESPONDED', 'TO', 'THE', 'SUMMONS'] +7729-102255-0013-2246: ref=['IT', 'WAS', 'IN', 'FACT', 'THE', 'BEST', 'WEAPON', 'OF', 'ITS', 'DAY'] +7729-102255-0013-2246: hyp=['IT', 'WAS', 'IN', 'FACT', 'THE', 'BEST', 'WEAPON', 'OF', 'ITS', 'DAY'] +7729-102255-0014-2247: ref=['THE', 'LEADERS', 'OF', 'THE', 'CONSPIRACY', 'BECAME', 'DISTRUSTFUL', 'OF', 'THEIR', 'POWER', 'TO', 'CRUSH', 'THE', 'TOWN'] +7729-102255-0014-2247: hyp=['THE', 'LEADERS', 'OF', 'THE', 'CONSPIRACY', 'BECAME', 'DISTRUSTFUL', 'OF', 'THEIR', 'POWER', 'TO', 'CRUSH', 'THE', 'TOWN'] +7729-102255-0015-2248: ref=['ONE', 'OF', 'HIS', 'MILITIA', 'GENERALS', 'SUGGESTED', 'THAT', 'THE', 'GOVERNOR', 'SHOULD', 'REQUIRE', 'THE', 'OUTLAWS', 'AT', 'LAWRENCE', 'AND', 'ELSEWHERE', 'TO', 'SURRENDER', 'THE', 'SHARPS', 'RIFLES', 'ANOTHER', 'WROTE', 'ASKING', 'HIM', 'TO', 'CALL', 'OUT', 'THE', 'GOVERNMENT', 'TROOPS', 'AT', 'FORT', 'LEAVENWORTH'] +7729-102255-0015-2248: hyp=['ONE', 'OF', 'HIS', 'MILITIA', 'GENERALS', 'SUGGESTED', 'THAT', 'THE', 'GOVERNOR', 'SHOULD', 'REQUIRE', 'THE', 'OUTLAWS', 'AT', 'LAWRENCE', 'AND', 'ELSEWHERE', 'TO', 'SURRENDER', 'THE', "SHARP'S", 'RIFLES', 'ANOTHER', 'WROTE', 'ASKING', 'HIM', 'TO', 'CALL', 'OUT', 'THE', 'GOVERNMENT', 'TROOPS', 'AT', 'FORT', 'LEAVENWORTH'] +7729-102255-0016-2249: ref=['THE', 'GOVERNOR', 'ON', 'HIS', 'PART', 'BECOMING', 'DOUBTFUL', 'OF', 'THE', 'LEGALITY', 'OF', 'EMPLOYING', 'MISSOURI', 'MILITIA', 'TO', 'ENFORCE', 'KANSAS', 'LAWS', 'WAS', 'ALSO', 'EAGER', 'TO', 'SECURE', 'THE', 'HELP', 'OF', 'FEDERAL', 'TROOPS'] +7729-102255-0016-2249: hyp=['THE', 'GOVERNOR', 'ON', 'HIS', 'PART', 'BECOMING', 'DOUBTFUL', 'OF', 'THE', 'LEGALITY', 'OF', 'EMPLOYING', 'MISSOURI', 'MILITIA', 'TO', 'ENFORCE', 'KANSAS', 'LAWS', 'WAS', 'ALSO', 'EAGER', 'TO', 'SECURE', 'THE', 'HELP', 'OF', 'FEDERAL', 'TROOPS'] +7729-102255-0017-2250: ref=['SHERIFF', 'JONES', 'HAD', 'HIS', 'POCKETS', 'ALWAYS', 'FULL', 'OF', 'WRITS', 'ISSUED', 'IN', 'THE', 'SPIRIT', 'OF', 'PERSECUTION', 'BUT', 'WAS', 'OFTEN', 'BAFFLED', 'BY', 'THE', 'SHARP', 'WITS', 'AND', 'READY', 'RESOURCES', 'OF', 'THE', 'FREE', 'STATE', 'PEOPLE', 'AND', 'SOMETIMES', 'DEFIED', 'OUTRIGHT'] +7729-102255-0017-2250: hyp=['SHERIFF', 'JONES', 'HAD', 'HIS', 'POCKETS', 'ALWAYS', 'FULL', 'OF', 'WRITS', 'ISSUED', 'IN', 'THE', 'SPIRIT', 'OF', 'PERSECUTION', 'BUT', 'WAS', 'OFTEN', 'BAFFLED', 'BY', 'THE', 'SHARP', 'WITS', 'AND', 'READY', 'RESOURCES', 'OF', 'THE', 'FREE', 'STATE', 'PEOPLE', 'AND', 'SOMETIMES', 'DEFIED', 'OUTRIGHT'] +7729-102255-0018-2251: ref=['LITTLE', 'BY', 'LITTLE', 'HOWEVER', 'THE', 'LATTER', 'BECAME', 'HEMMED', 'AND', 'BOUND', 'IN', 'THE', 'MESHES', 'OF', 'THE', 'VARIOUS', 'DEVICES', 'AND', 'PROCEEDINGS', 'WHICH', 'THE', 'TERRITORIAL', 'OFFICIALS', 'EVOLVED', 'FROM', 'THE', 'BOGUS', 'LAWS'] +7729-102255-0018-2251: hyp=['LITTLE', 'BY', 'LITTLE', 'HOWEVER', 'THE', 'LATTER', 'BECAME', 'HEMMED', 'AND', 'BOUND', 'IN', 'THE', 'MESHES', 'OF', 'THE', 'VARIOUS', 'DEVICES', 'AND', 'PROCEEDINGS', 'WHICH', 'THE', 'TERRITORIAL', 'OFFICIALS', 'EVOLVED', 'FROM', 'THE', 'BOGUS', 'LAWS'] +7729-102255-0019-2252: ref=['TO', 'EMBARRASS', 'THIS', 'DAMAGING', 'EXPOSURE', 'JUDGE', 'LECOMPTE', 'ISSUED', 'A', 'WRIT', 'AGAINST', 'THE', 'EX', 'GOVERNOR', 'ON', 'A', 'FRIVOLOUS', 'CHARGE', 'OF', 'CONTEMPT'] +7729-102255-0019-2252: hyp=['TO', 'EMBARRASS', 'THIS', 'DAMAGING', 'EXPOSURE', 'JUDGE', 'LE', 'COMTE', 'ISSUED', 'A', 'WRIT', 'AGAINST', 'THE', 'EX', 'GOVERNOR', 'ON', 'A', 'FRIVOLOUS', 'CHARGE', 'OF', 'CONTEMPT'] +7729-102255-0020-2253: ref=['THE', 'INCIDENT', 'WAS', 'NOT', 'VIOLENT', 'NOR', 'EVEN', 'DRAMATIC', 'NO', 'POSSE', 'WAS', 'SUMMONED', 'NO', 'FURTHER', 'EFFORT', 'MADE', 'AND', 'REEDER', 'FEARING', 'PERSONAL', 'VIOLENCE', 'SOON', 'FLED', 'IN', 'DISGUISE'] +7729-102255-0020-2253: hyp=['THE', 'INCIDENT', 'WAS', 'NOT', 'VIOLENT', 'NOR', 'EVEN', 'DRAMATIC', 'NO', 'POSSE', 'WAS', 'SUMMONED', 'NO', 'FURTHER', 'EFFORT', 'MADE', 'AND', 'READER', 'FEARING', 'PERSONAL', 'VIOLENCE', 'SOON', 'FLED', 'IN', 'DISGUISE'] +7729-102255-0021-2254: ref=['BUT', 'THE', 'AFFAIR', 'WAS', 'MAGNIFIED', 'AS', 'A', 'CROWNING', 'PROOF', 'THAT', 'THE', 'FREE', 'STATE', 'MEN', 'WERE', 'INSURRECTIONISTS', 'AND', 'OUTLAWS'] +7729-102255-0021-2254: hyp=['BUT', 'THE', 'AFFAIR', 'WAS', 'MAGNIFIED', 'AS', 'A', 'CROWNING', 'PROOF', 'THAT', 'THE', 'FREE', 'STATE', 'MEN', 'WERE', 'INSURRECTIONISTS', 'AND', 'OUTLAWS'] +7729-102255-0022-2255: ref=['FROM', 'THESE', 'AGAIN', 'SPRANG', 'BARRICADED', 'AND', 'FORTIFIED', 'DWELLINGS', 'CAMPS', 'AND', 'SCOUTING', 'PARTIES', 'FINALLY', 'CULMINATING', 'IN', 'ROVING', 'GUERRILLA', 'BANDS', 'HALF', 'PARTISAN', 'HALF', 'PREDATORY'] +7729-102255-0022-2255: hyp=['FROM', 'THESE', 'AGAIN', 'SPRANG', 'BARRICADED', 'AND', 'FORTIFIED', 'DWELLINGS', 'CAMPS', 'AND', 'SCOUT', 'PARTIES', 'FINALLY', 'CULMINATING', 'IN', 'ROVING', 'GUERRILLA', 'BANDS', 'HALF', 'PARTISAN', 'HALF', 'PREDATORY'] +7729-102255-0023-2256: ref=['THEIR', 'DISTINCTIVE', 'CHARACTERS', 'HOWEVER', 'DISPLAY', 'ONE', 'BROAD', 'AND', 'UNFAILING', 'DIFFERENCE'] +7729-102255-0023-2256: hyp=['THEIR', 'DISTINCTIVE', 'CHARACTERS', 'HOWEVER', 'DISPLAY', 'ONE', 'BROAD', 'AND', 'UNFAILING', 'DIFFERENCE'] +7729-102255-0024-2257: ref=['THE', 'FREE', 'STATE', 'MEN', 'CLUNG', 'TO', 'THEIR', 'PRAIRIE', 'TOWNS', 'AND', 'PRAIRIE', 'RAVINES', 'WITH', 'ALL', 'THE', 'OBSTINACY', 'AND', 'COURAGE', 'OF', 'TRUE', 'DEFENDERS', 'OF', 'THEIR', 'HOMES', 'AND', 'FIRESIDES'] +7729-102255-0024-2257: hyp=['THE', 'FREE', 'STATE', 'MEN', 'CLUNG', 'TO', 'THEIR', 'PRAIRIE', 'TOWNS', 'AND', 'PRAIRIE', 'RAVINES', 'WITH', 'ALL', 'THE', 'OBSTINACY', 'AND', 'COURAGE', 'OF', 'TRUE', 'DEFENDERS', 'OF', 'THEIR', 'HOMES', 'AND', 'FIRESIDES'] +7729-102255-0025-2258: ref=['THEIR', 'ASSUMED', 'CHARACTER', 'CHANGED', 'WITH', 'THEIR', 'CHANGING', 'OPPORTUNITIES', 'OR', 'NECESSITIES'] +7729-102255-0025-2258: hyp=['THEIR', 'ASSUMED', 'CHARACTER', 'CHANGED', 'WITH', 'THEIR', 'CHANGING', 'OPPORTUNITIES', 'OR', 'NECESSITIES'] +7729-102255-0026-2259: ref=['IN', 'THE', 'SHOOTING', 'OF', 'SHERIFF', 'JONES', 'IN', 'LAWRENCE', 'AND', 'IN', 'THE', 'REFUSAL', 'OF', 'EX', 'GOVERNOR', 'BEEDER', 'TO', 'ALLOW', 'THE', 'DEPUTY', 'MARSHAL', 'TO', 'ARREST', 'HIM', 'THEY', 'DISCOVERED', 'GRAVE', 'OFFENSES', 'AGAINST', 'THE', 'TERRITORIAL', 'AND', 'UNITED', 'STATES', 'LAWS'] +7729-102255-0026-2259: hyp=['IN', 'THE', 'SHOOTING', 'OF', 'SHERIFF', 'JONES', 'IN', 'LAWRENCE', 'AND', 'IN', 'THE', 'REFUSAL', 'OF', 'EX', 'GOVERNOR', 'READER', 'TO', 'ALLOW', 'THE', 'DEPUTY', 'MARSHAL', 'TO', 'ARREST', 'HIM', 'THEY', 'DISCOVERED', 'GRAVE', 'OFFENSES', 'AGAINST', 'THE', 'TERRITORIAL', 'AND', 'THE', 'UNITED', 'STATES', 'LAWS'] +7729-102255-0027-2260: ref=['FOOTNOTE', 'SUMNER', 'TO', 'SHANNON', 'MAY', 'TWELFTH', 'EIGHTEEN', 'FIFTY', 'SIX'] +7729-102255-0027-2260: hyp=['FOOTNOTE', 'SUMNER', 'TO', 'SHANNON', 'MAY', 'TWELFTH', 'EIGHTEEN', 'FIFTY', 'SIX'] +7729-102255-0028-2261: ref=['PRIVATE', 'PERSONS', 'WHO', 'HAD', 'LEASED', 'THE', 'FREE', 'STATE', 'HOTEL', 'VAINLY', 'BESOUGHT', 'THE', 'VARIOUS', 'AUTHORITIES', 'TO', 'PREVENT', 'THE', 'DESTRUCTION', 'OF', 'THEIR', 'PROPERTY'] +7729-102255-0028-2261: hyp=['PRIVATE', 'PERSONS', 'WHO', 'HAD', 'LEASED', 'THE', 'FREE', 'STATE', 'HOTEL', 'VAINLY', 'BESOUGHT', 'THE', 'VARIOUS', 'AUTHORITIES', 'TO', 'PRESENT', 'THE', 'DESTRUCTION', 'OF', 'THEIR', 'PROPERTY'] +7729-102255-0029-2262: ref=['TEN', 'DAYS', 'WERE', 'CONSUMED', 'IN', 'THESE', 'NEGOTIATIONS', 'BUT', 'THE', 'SPIRIT', 'OF', 'VENGEANCE', 'REFUSED', 'TO', 'YIELD'] +7729-102255-0029-2262: hyp=['TEN', 'DAYS', 'WERE', 'CONSUMED', 'IN', 'THESE', 'NEGOTIATIONS', 'BUT', 'THE', 'SPIRIT', 'OF', 'VENGEANCE', 'REFUSED', 'TO', 'YIELD'] +7729-102255-0030-2263: ref=['HE', 'SUMMONED', 'HALF', 'A', 'DOZEN', 'CITIZENS', 'TO', 'JOIN', 'HIS', 'POSSE', 'WHO', 'FOLLOWED', 'OBEYED', 'AND', 'ASSISTED', 'HIM'] +7729-102255-0030-2263: hyp=['HE', 'SUMMONED', 'HALF', 'A', 'DOZEN', 'CITIZENS', 'TO', 'JOIN', 'HIS', 'POSSE', 'WHO', 'FOLLOWED', 'OBEYED', 'AND', 'ASSISTED', 'HIM'] +7729-102255-0031-2264: ref=['HE', 'CONTINUED', 'HIS', 'PRETENDED', 'SEARCH', 'AND', 'TO', 'GIVE', 'COLOR', 'TO', 'HIS', 'ERRAND', 'MADE', 'TWO', 'ARRESTS'] +7729-102255-0031-2264: hyp=['HE', 'CONTINUED', 'HIS', 'PRETENDED', 'SEARCH', 'AND', 'TO', 'GIVE', 'COLOR', 'TO', 'HIS', 'ERRAND', 'MADE', 'TO', 'ARREST'] +7729-102255-0032-2265: ref=['THE', 'FREE', 'STATE', 'HOTEL', 'A', 'STONE', 'BUILDING', 'IN', 'DIMENSIONS', 'FIFTY', 'BY', 'SEVENTY', 'FEET', 'THREE', 'STORIES', 'HIGH', 'AND', 'HANDSOMELY', 'FURNISHED', 'PREVIOUSLY', 'OCCUPIED', 'ONLY', 'FOR', 'LODGING', 'ROOMS', 'ON', 'THAT', 'DAY', 'FOR', 'THE', 'FIRST', 'TIME', 'OPENED', 'ITS', 'TABLE', 'ACCOMMODATIONS', 'TO', 'THE', 'PUBLIC', 'AND', 'PROVIDED', 'A', 'FREE', 'DINNER', 'IN', 'HONOR', 'OF', 'THE', 'OCCASION'] +7729-102255-0032-2265: hyp=['THE', 'FREE', 'STATE', 'HOTEL', 'A', 'STONE', 'BUILDING', 'IN', 'DIMENSIONS', 'FIFTY', 'BY', 'SEVENTY', 'FEET', 'THREE', 'STORIES', 'HIGH', 'AND', 'HANDSOMELY', 'FURNISHED', 'PREVIOUSLY', 'OCCUPIED', 'ONLY', 'FOR', 'LODGING', 'ROOMS', 'ON', 'THAT', 'DAY', 'FOR', 'THE', 'FIRST', 'TIME', 'OPENED', 'ITS', 'TABLE', 'ACCOMMODATIONS', 'TO', 'THE', 'PUBLIC', 'AND', 'PROVIDED', 'A', 'FREE', 'DINNER', 'IN', 'HONOR', 'OF', 'THE', 'OCCASION'] +7729-102255-0033-2266: ref=['AS', 'HE', 'HAD', 'PROMISED', 'TO', 'PROTECT', 'THE', 'HOTEL', 'THE', 'REASSURED', 'CITIZENS', 'BEGAN', 'TO', 'LAUGH', 'AT', 'THEIR', 'OWN', 'FEARS'] +7729-102255-0033-2266: hyp=['AS', 'HE', 'HAD', 'PROMISED', 'TO', 'PROTECT', 'THE', 'HOTEL', 'THE', 'REASSURED', 'CITIZENS', 'BEGAN', 'TO', 'LAUGH', 'AT', 'THEIR', 'OWN', 'FEARS'] +7729-102255-0034-2267: ref=['TO', 'THEIR', 'SORROW', 'THEY', 'WERE', 'SOON', 'UNDECEIVED'] +7729-102255-0034-2267: hyp=['TO', 'THEIR', 'SORROW', 'THEY', 'WERE', 'SOON', 'UNDECEIVED'] +7729-102255-0035-2268: ref=['THE', 'MILITARY', 'FORCE', 'PARTLY', 'RABBLE', 'PARTLY', 'ORGANIZED', 'HAD', 'MEANWHILE', 'MOVED', 'INTO', 'THE', 'TOWN'] +7729-102255-0035-2268: hyp=['THE', 'MILITARY', 'FORCE', 'PARTLY', 'REBEL', 'PARTLY', 'ORGANIZED', 'HAD', 'MEANWHILE', 'MOVED', 'INTO', 'THE', 'TOWN'] +7729-102255-0036-2269: ref=['HE', 'PLANTED', 'A', 'COMPANY', 'BEFORE', 'THE', 'HOTEL', 'AND', 'DEMANDED', 'A', 'SURRENDER', 'OF', 'THE', 'ARMS', 'BELONGING', 'TO', 'THE', 'FREE', 'STATE', 'MILITARY', 'COMPANIES'] +7729-102255-0036-2269: hyp=['HE', 'PLANTED', 'A', 'COMPANY', 'BEFORE', 'THE', 'HOTEL', 'AND', 'DEMANDED', 'A', 'SURRENDER', 'OF', 'THE', 'ARMS', 'BELONGING', 'TO', 'THE', 'FREE', 'STATE', 'MILITARY', 'COMPANIES'] +7729-102255-0037-2270: ref=['HALF', 'AN', 'HOUR', 'LATER', 'TURNING', 'A', 'DEAF', 'EAR', 'TO', 'ALL', 'REMONSTRANCE', 'HE', 'GAVE', 'THE', 'PROPRIETORS', 'UNTIL', 'FIVE', "O'CLOCK", 'TO', 'REMOVE', 'THEIR', 'FAMILIES', 'AND', 'PERSONAL', 'PROPERTY', 'FROM', 'THE', 'FREE', 'STATE', 'HOTEL'] +7729-102255-0037-2270: hyp=['HALF', 'AN', 'HOUR', 'LATER', 'TURNING', 'A', 'DEAF', 'EAR', 'TO', 'ALL', 'REMONSTRANCE', 'HE', 'GAVE', 'THE', 'PROPRIETORS', 'UNTIL', 'FIVE', "O'CLOCK", 'TO', 'REMOVE', 'THEIR', 'FAMILIES', 'AND', 'PERSONAL', 'PROPERTY', 'FROM', 'THE', 'FREE', 'STATE', 'HOTEL'] +7729-102255-0038-2271: ref=['ATCHISON', 'WHO', 'HAD', 'BEEN', 'HARANGUING', 'THE', 'MOB', 'PLANTED', 'HIS', 'TWO', 'GUNS', 'BEFORE', 'THE', 'BUILDING', 'AND', 'TRAINED', 'THEM', 'UPON', 'IT'] +7729-102255-0038-2271: hyp=['ATCHISON', 'WHO', 'HAD', 'BEEN', 'HARANGUING', 'THE', 'MOB', 'PLANTED', 'HIS', 'TWO', 'GUNS', 'BEFORE', 'THE', 'BUILDING', 'AND', 'TRAINED', 'THEM', 'UPON', 'IT'] +7729-102255-0039-2272: ref=['THE', 'INMATES', 'BEING', 'REMOVED', 'AT', 'THE', 'APPOINTED', 'HOUR', 'A', 'FEW', 'CANNON', 'BALLS', 'WERE', 'FIRED', 'THROUGH', 'THE', 'STONE', 'WALLS'] +7729-102255-0039-2272: hyp=['THE', 'INMATES', 'BEING', 'REMOVED', 'AT', 'THE', 'APPOINTED', 'HOUR', 'A', 'FEW', 'CANNON', 'BALLS', 'WERE', 'FIRED', 'THROUGH', 'THE', 'STONE', 'WALLS'] +7729-102255-0040-2273: ref=['IN', 'THIS', 'INCIDENT', 'CONTRASTING', 'THE', 'CREATIVE', 'AND', 'THE', 'DESTRUCTIVE', 'SPIRIT', 'OF', 'THE', 'FACTIONS', 'THE', 'EMIGRANT', 'AID', 'SOCIETY', 'OF', 'MASSACHUSETTS', 'FINDS', 'ITS', 'MOST', 'HONORABLE', 'AND', 'TRIUMPHANT', 'VINDICATION'] +7729-102255-0040-2273: hyp=['IN', 'THIS', 'INCIDENT', 'CONTRASTING', 'THE', 'CREATIVE', 'AND', 'THE', 'DESTRUCTIVE', 'SPIRIT', 'OF', 'THE', 'FACTIONS', 'THE', 'EMIGRANT', 'AID', 'SOCIETY', 'OF', 'MASSACHUSETTS', 'FINDS', 'ITS', 'MOST', 'HONORABLE', 'AND', 'TRIUMPHANT', 'VINDICATION'] +7729-102255-0041-2274: ref=['THE', 'WHOLE', 'PROCEEDING', 'WAS', 'SO', 'CHILDISH', 'THE', 'MISERABLE', 'PLOT', 'SO', 'TRANSPARENT', 'THE', 'OUTRAGE', 'SO', 'GROSS', 'AS', 'TO', 'BRING', 'DISGUST', 'TO', 'THE', 'BETTER', 'CLASS', 'OF', 'BORDER', 'RUFFIANS', 'WHO', 'WERE', 'WITNESSES', 'AND', 'ACCESSORIES'] +7729-102255-0041-2274: hyp=['THE', 'WHOLE', 'PROCEEDING', 'WAS', 'SO', 'CHILDISH', 'THE', 'MISERABLE', 'PLOT', 'SO', 'TRANSPARENT', 'THE', 'OUTRAGE', 'SO', 'GROSS', 'AS', 'TO', 'BRING', 'DISGUST', 'TO', 'THE', 'BETTER', 'CLASS', 'OF', 'BORDER', 'RUFFIANS', 'WHO', 'WERE', 'WITNESSES', 'AND', 'ACCESSORIES'] +7729-102255-0042-2275: ref=['RELOCATED', 'FOOTNOTE', 'GOVERNOR', 'ROBINSON', 'BEING', 'ON', 'HIS', 'WAY', 'EAST', 'THE', 'STEAMBOAT', 'ON', 'WHICH', 'HE', 'WAS', 'TRAVELING', 'STOPPED', 'AT', 'LEXINGTON', 'MISSOURI'] +7729-102255-0042-2275: hyp=['RELOCATED', 'FOOTNOTE', 'GOVERNOR', 'ROBINSON', 'BEING', 'ON', 'HIS', 'WAY', 'EAST', 'THE', 'STEAMBOAT', 'ON', 'WHICH', 'HE', 'WAS', 'TRAVELING', 'STOPPED', 'AT', 'LEXINGTON', 'MISSOURI'] +7729-102255-0043-2276: ref=['IN', 'A', 'FEW', 'DAYS', 'AN', 'OFFICER', 'CAME', 'WITH', 'A', 'REQUISITION', 'FROM', 'GOVERNOR', 'SHANNON', 'AND', 'TOOK', 'THE', 'PRISONER', 'BY', 'LAND', 'TO', 'WESTPORT', 'AND', 'AFTERWARDS', 'FROM', 'THERE', 'TO', 'KANSAS', 'CITY', 'AND', 'LEAVENWORTH'] +7729-102255-0043-2276: hyp=['IN', 'A', 'FEW', 'DAYS', 'AN', 'OFFICER', 'CAME', 'WITH', 'A', 'REQUISITION', 'FROM', 'GOVERNOR', 'SHANON', 'AND', 'TOOK', 'THE', 'PRISONER', 'BY', 'LAND', 'TO', 'WESTPORT', 'AND', 'AFTERWARDS', 'FROM', 'THERE', 'TO', 'KANSAS', 'CITY', 'AND', 'LEAVENWORTH'] +7729-102255-0044-2277: ref=['HERE', 'HE', 'WAS', 'PLACED', 'IN', 'THE', 'CUSTODY', 'OF', 'CAPTAIN', 'MARTIN', 'OF', 'THE', 'KICKAPOO', 'RANGERS', 'WHO', 'PROVED', 'A', 'KIND', 'JAILER', 'AND', 'MATERIALLY', 'ASSISTED', 'IN', 'PROTECTING', 'HIM', 'FROM', 'THE', 'DANGEROUS', 'INTENTIONS', 'OF', 'THE', 'MOB', 'WHICH', 'AT', 'THAT', 'TIME', 'HELD', 'LEAVENWORTH', 'UNDER', 'A', 'REIGN', 'OF', 'TERROR'] +7729-102255-0044-2277: hyp=['HERE', 'HE', 'WAS', 'PLACED', 'IN', 'THE', 'CUSTODY', 'OF', 'CAPTAIN', 'MARTIN', 'OF', 'THE', 'KICKAPOO', 'RANGERS', 'WHO', 'PROVED', 'A', 'KIND', 'JAILER', 'AND', 'MATERIALLY', 'ASSISTED', 'IN', 'PROTECTING', 'HIM', 'FROM', 'THE', 'DANGEROUS', 'INTENTIONS', 'OF', 'THE', 'MOB', 'WHICH', 'AT', 'THAT', 'TIME', 'HELD', 'LEAVENWORTH', 'UNDER', 'THE', 'REIGN', 'OF', 'TERROR'] +7729-102255-0045-2278: ref=['CAPTAIN', 'MARTIN', 'SAID', 'I', 'SHALL', 'GIVE', 'YOU', 'A', 'PISTOL', 'TO', 'HELP', 'PROTECT', 'YOURSELF', 'IF', 'WORSE', 'COMES', 'TO', 'WORST'] +7729-102255-0045-2278: hyp=['CAPTAIN', 'MARTIN', 'SAID', 'I', 'SHALL', 'GIVE', 'YOU', 'A', 'PISTOL', 'TO', 'HELP', 'PROTECT', 'YOURSELF', 'IF', 'WORSE', 'COMES', 'TO', 'WORST'] +7729-102255-0046-2279: ref=['IN', 'THE', 'EARLY', 'MORNING', 'OF', 'THE', 'NEXT', 'DAY', 'MAY', 'TWENTY', 'NINTH', 'A', 'COMPANY', 'OF', 'DRAGOONS', 'WITH', 'ONE', 'EMPTY', 'SADDLE', 'CAME', 'DOWN', 'FROM', 'THE', 'FORT', 'AND', 'WHILE', 'THE', 'PRO', 'SLAVERY', 'MEN', 'STILL', 'SLEPT', 'THE', 'PRISONER', 'AND', 'HIS', 'ESCORT', 'WERE', 'ON', 'THEIR', 'WAY', 'ACROSS', 'THE', 'PRAIRIES', 'TO', 'LECOMPTON', 'IN', 'THE', 'CHARGE', 'OF', 'OFFICERS', 'OF', 'THE', 'UNITED', 'STATES', 'ARMY'] +7729-102255-0046-2279: hyp=['IN', 'THE', 'EARLY', 'MORNING', 'OF', 'THE', 'NEXT', 'DAY', 'MAY', 'TWENTY', 'NINTH', 'A', 'COMPANY', 'OF', 'DRAGOONS', 'WITH', 'ONE', 'EMPTY', 'SADDLE', 'CAME', 'DOWN', 'FROM', 'THE', 'FORT', 'AND', 'WHILE', 'THE', 'PRO', 'SLAVERY', 'MEN', 'STILL', 'SLEPT', 'THE', 'PRISONER', 'AND', 'HIS', 'ESCORT', 'WERE', 'ON', 'THEIR', 'WAY', 'ACROSS', 'THE', 'PRAIRIES', 'TO', 'LA', 'COMPTON', 'IN', 'THE', 'CHARGE', 'OF', 'OFFICERS', 'OF', 'THE', 'UNITED', 'STATES', 'ARMY'] +8224-274381-0000-2280: ref=['THOUGH', 'THROWN', 'INTO', 'PRISON', 'FOR', 'THIS', 'ENTERPRISE', 'AND', 'DETAINED', 'SOME', 'TIME', 'HE', 'WAS', 'NOT', 'DISCOURAGED', 'BUT', 'STILL', 'CONTINUED', 'BY', 'HIS', 'COUNTENANCE', 'AND', 'PROTECTION', 'TO', 'INFUSE', 'SPIRIT', 'INTO', 'THE', 'DISTRESSED', 'ROYALISTS'] +8224-274381-0000-2280: hyp=['THOUGH', 'THROWN', 'INTO', 'PRISON', 'FOR', 'THIS', 'ENTERPRISE', 'AND', 'DETAINED', 'SOME', 'TIME', 'HE', 'WAS', 'NOT', 'DISCOURAGED', 'BUT', 'STILL', 'CONTINUED', 'BY', 'HIS', 'COUNTENANCE', 'AND', 'PROTECTION', 'TO', 'INFUSE', 'SPIRIT', 'INTO', 'THE', 'DISTRESSED', 'ROYALISTS'] +8224-274381-0001-2281: ref=['AMONG', 'OTHER', 'PERSONS', 'OF', 'DISTINCTION', 'WHO', 'UNITED', 'THEMSELVES', 'TO', 'HIM', 'WAS', 'LORD', 'NAPIER', 'OF', 'MERCHISTON', 'SON', 'OF', 'THE', 'FAMOUS', 'INVENTOR', 'OF', 'THE', 'LOGARITHMS', 'THE', 'PERSON', 'TO', 'WHOM', 'THE', 'TITLE', 'OF', 'A', 'GREAT', 'MAN', 'IS', 'MORE', 'JUSTLY', 'DUE', 'THAN', 'TO', 'ANY', 'OTHER', 'WHOM', 'HIS', 'COUNTRY', 'EVER', 'PRODUCED'] +8224-274381-0001-2281: hyp=['AMONG', 'OTHER', 'PERSONS', 'OF', 'DISTINCTION', 'WHO', 'UNITED', 'THEMSELVES', 'TO', 'HIM', 'WAS', 'LORD', 'NAPIER', 'OF', 'MURCHESTON', 'SON', 'OF', 'THE', 'FAMOUS', 'INVENTOR', 'OF', 'THE', 'LOGARITHMS', 'THE', 'PERSON', 'TO', 'WHOM', 'THE', 'TITLE', 'OF', 'A', 'GREAT', 'MAN', 'IS', 'MORE', 'JUSTLY', 'DUE', 'THAN', 'TO', 'ANY', 'OTHER', 'WHOM', 'HIS', 'COUNTRY', 'EVER', 'PRODUCED'] +8224-274381-0002-2282: ref=['WHILE', 'THE', 'FORMER', 'FORETOLD', 'THAT', 'THE', 'SCOTTISH', 'COVENANTERS', 'WERE', 'SECRETLY', 'FORMING', 'A', 'UNION', 'WITH', 'THE', 'ENGLISH', 'PARLIAMENT', 'AND', 'INCULCATED', 'THE', 'NECESSITY', 'OF', 'PREVENTING', 'THEM', 'BY', 'SOME', 'VIGOROUS', 'UNDERTAKING', 'THE', 'LATTER', 'STILL', 'INSISTED', 'THAT', 'EVERY', 'SUCH', 'ATTEMPT', 'WOULD', 'PRECIPITATE', 'THEM', 'INTO', 'MEASURES', 'TO', 'WHICH', 'OTHERWISE', 'THEY', 'WERE', 'NOT', 'PERHAPS', 'INCLINED'] +8224-274381-0002-2282: hyp=['WHILE', 'THE', 'FORMER', 'FORETOLD', 'THAT', 'THE', 'SCOTTISH', 'COVENANTERS', 'WERE', 'SECRETLY', 'FORMING', 'A', 'UNION', 'WITH', 'THE', 'ENGLISH', 'PARLIAMENT', 'AND', 'INCULCATED', 'THE', 'NECESSITY', 'OF', 'PREVENTING', 'THEM', 'BY', 'SOME', 'VIGOROUS', 'UNDERTAKING', 'THE', 'LATTER', 'STILL', 'INSISTED', 'THAT', 'EVERY', 'SUCH', 'ATTEMPT', 'WOULD', 'PRECIPITATE', 'THEM', 'INTO', 'MEASURES', 'TO', 'WHICH', 'OTHERWISE', 'THEY', 'WERE', 'NOT', 'PERHAPS', 'INCLINED'] +8224-274381-0003-2283: ref=['THE', "KING'S", 'EARS', 'WERE', 'NOW', 'OPEN', 'TO', "MONTROSE'S", 'COUNSELS', 'WHO', 'PROPOSED', 'NONE', 'BUT', 'THE', 'BOLDEST', 'AND', 'MOST', 'DARING', 'AGREEABLY', 'TO', 'THE', 'DESPERATE', 'STATE', 'OF', 'THE', 'ROYAL', 'CAUSE', 'IN', 'SCOTLAND'] +8224-274381-0003-2283: hyp=['THE', "KING'S", 'EARS', 'WERE', 'NOW', 'OPEN', 'TO', "MONTROSE'S", 'COUNSELS', 'WHO', 'PROPOSED', 'NONE', 'BUT', 'THE', 'BOLDEST', 'AND', 'MOST', 'DARING', 'AGREEABLY', 'TO', 'THE', 'DESPERATE', 'STATE', 'OF', 'THE', 'ROYAL', 'CAUSE', 'IN', 'SCOTLAND'] +8224-274381-0004-2284: ref=['FIVE', 'HUNDRED', 'MEN', 'MORE', 'WHO', 'HAD', 'BEEN', 'LEVIED', 'BY', 'THE', 'COVENANTERS', 'WERE', 'PERSUADED', 'TO', 'EMBRACE', 'THE', 'ROYAL', 'CAUSE', 'AND', 'WITH', 'THIS', 'COMBINED', 'FORCE', 'HE', 'HASTENED', 'TO', 'ATTACK', 'LORD', 'ELCHO', 'WHO', 'LAY', 'AT', 'PERTH', 'WITH', 'AN', 'ARMY', 'OF', 'SIX', 'THOUSAND', 'MEN', 'ASSEMBLED', 'UPON', 'THE', 'FIRST', 'NEWS', 'OF', 'THE', 'IRISH', 'INVASION'] +8224-274381-0004-2284: hyp=['FIVE', 'HUNDRED', 'MEN', 'MORE', 'WHO', 'HAD', 'BEEN', 'LEVIED', 'BY', 'THE', 'COVENANTERS', 'WERE', 'PERSUADED', 'TO', 'EMBRACE', 'THE', 'ROYAL', 'CAUSE', 'AND', 'WITH', 'THIS', 'COMBINED', 'FORCE', 'HE', 'HASTENED', 'TO', 'ATTACK', 'LORD', 'ELCO', 'WHO', 'LAY', 'AT', 'PERTH', 'WITH', 'AN', 'ARMY', 'OF', 'SIX', 'THOUSAND', 'MEN', 'ASSEMBLED', 'UPON', 'THE', 'FIRST', 'NEWS', 'OF', 'THE', 'IRISH', 'INVASION'] +8224-274381-0005-2285: ref=['DREADING', 'THE', 'SUPERIOR', 'POWER', 'OF', 'ARGYLE', 'WHO', 'HAVING', 'JOINED', 'HIS', 'VASSALS', 'TO', 'A', 'FORCE', 'LEVIED', 'BY', 'THE', 'PUBLIC', 'WAS', 'APPROACHING', 'WITH', 'A', 'CONSIDERABLE', 'ARMY', 'MONTROSE', 'HASTENED', 'NORTHWARDS', 'IN', 'ORDER', 'TO', 'ROUSE', 'AGAIN', 'THE', 'MARQUIS', 'OF', 'HUNTLEY', 'AND', 'THE', 'GORDONS', 'WHO', 'HAVING', 'BEFORE', 'HASTILY', 'TAKEN', 'ARMS', 'HAD', 'BEEN', 'INSTANTLY', 'SUPPRESSED', 'BY', 'THE', 'COVENANTERS'] +8224-274381-0005-2285: hyp=['DREADING', 'THE', 'SUPERIOR', 'POWER', 'OF', 'ARGYLE', 'WHO', 'HAVING', 'JOINED', 'HIS', 'VASSALS', 'TO', 'A', 'FORCE', 'LEVIED', 'BY', 'THE', 'PUBLIC', 'WAS', 'APPROACHING', 'WITH', 'A', 'CONSIDERABLE', 'ARMY', 'MONTROSE', 'HASTENED', 'NORTHWARD', 'IN', 'ORDER', 'TO', 'ROUSE', 'AGAIN', 'THE', 'MARQUIS', 'OF', 'HUNTLEY', 'AND', 'THE', 'GORDONS', 'WHO', 'HAVING', 'BEFORE', 'HASTILY', 'TAKEN', 'ARMS', 'HAD', 'BEEN', 'INSTANTLY', 'SUPPRESSED', 'BY', 'THE', 'COVENANTERS'] +8224-274381-0006-2286: ref=['THIS', "NOBLEMAN'S", 'CHARACTER', 'THOUGH', 'CELEBRATED', 'FOR', 'POLITICAL', 'COURAGE', 'AND', 'CONDUCT', 'WAS', 'VERY', 'LOW', 'FOR', 'MILITARY', 'PROWESS', 'AND', 'AFTER', 'SOME', 'SKIRMISHES', 'IN', 'WHICH', 'HE', 'WAS', 'WORSTED', 'HE', 'HERE', 'ALLOWED', 'MONTROSE', 'TO', 'ESCAPE', 'HIM'] +8224-274381-0006-2286: hyp=['THIS', "NOBLEMAN'S", 'CHARACTER', 'THOUGH', 'CELEBRATED', 'FOR', 'POLITICAL', 'COURAGE', 'AND', 'CONDUCT', 'WAS', 'VERY', 'LOW', 'FOR', 'MILITARY', 'PROWESS', 'AND', 'AFTER', 'SOME', 'SKIRMISHES', 'IN', 'WHICH', 'HE', 'WAS', 'WORSTED', 'HE', 'HERE', 'ALLOWED', 'MONTROSE', 'TO', 'ESCAPE', 'HIM'] +8224-274381-0007-2287: ref=['BY', 'QUICK', 'MARCHES', 'THROUGH', 'THESE', 'INACCESSIBLE', 'MOUNTAINS', 'THAT', 'GENERAL', 'FREED', 'HIMSELF', 'FROM', 'THE', 'SUPERIOR', 'FORCES', 'OF', 'THE', 'COVENANTERS'] +8224-274381-0007-2287: hyp=['BY', 'QUICK', 'MARCHES', 'THROUGH', 'THESE', 'INACCESSIBLE', 'MOUNTAINS', 'THAT', 'GENERAL', 'FREED', 'HIMSELF', 'FROM', 'THE', 'SUPERIOR', 'FORCES', 'OF', 'THE', 'COVENANTERS'] +8224-274381-0008-2288: ref=['WITH', 'THESE', 'AND', 'SOME', 'REENFORCEMENTS', 'OF', 'THE', 'ATHOLEMEN', 'AND', 'MACDONALDS', 'WHOM', 'HE', 'HAD', 'RECALLED', 'MONTROSE', 'FELL', 'SUDDENLY', 'UPON', "ARGYLE'S", 'COUNTRY', 'AND', 'LET', 'LOOSE', 'UPON', 'IT', 'ALL', 'THE', 'RAGE', 'OF', 'WAR', 'CARRYING', 'OFF', 'THE', 'CATTLE', 'BURNING', 'THE', 'HOUSES', 'AND', 'PUTTING', 'THE', 'INHABITANTS', 'TO', 'THE', 'SWORD'] +8224-274381-0008-2288: hyp=['WITH', 'THESE', 'AND', 'SOME', 'REINFORCEMENTS', 'OF', 'THE', 'ATHOL', 'MEN', 'AND', 'MACDONALDS', 'WHOM', 'HE', 'HAD', 'RECALLED', 'MONTROSE', 'FELL', 'SUDDENLY', 'UPON', "ARGYLE'S", 'COUNTRY', 'AND', 'LET', 'LOOSE', 'UPON', 'IT', 'ALL', 'THE', 'RAGE', 'OF', 'WAR', 'CARRYING', 'OFF', 'THE', 'CATTLE', 'BURNING', 'THE', 'HOUSES', 'AND', 'PUTTING', 'THE', 'INHABITANTS', 'TO', 'THE', 'SWORD'] +8224-274381-0009-2289: ref=['THIS', 'SEVERITY', 'BY', 'WHICH', 'MONTROSE', 'SULLIED', 'HIS', 'VICTORIES', 'WAS', 'THE', 'RESULT', 'OF', 'PRIVATE', 'ANIMOSITY', 'AGAINST', 'THE', 'CHIEFTAIN', 'AS', 'MUCH', 'AS', 'OF', 'ZEAL', 'FOR', 'THE', 'PUBLIC', 'CAUSE', 'ARGYLE', 'COLLECTING', 'THREE', 'THOUSAND', 'MEN', 'MARCHED', 'IN', 'QUEST', 'OF', 'THE', 'ENEMY', 'WHO', 'HAD', 'RETIRED', 'WITH', 'THEIR', 'PLUNDER', 'AND', 'HE', 'LAY', 'AT', 'INNERLOCHY', 'SUPPOSING', 'HIMSELF', 'STILL', 'AT', 'A', 'CONSIDERABLE', 'DISTANCE', 'FROM', 'THEM'] +8224-274381-0009-2289: hyp=['THIS', 'SEVERITY', 'BY', 'WHICH', 'MONTROSE', 'SULLIED', 'HIS', 'VICTORIES', 'WAS', 'THE', 'RESULT', 'OF', 'PRIVATE', 'ANIMOSITY', 'AGAINST', 'THE', 'CHIEFTAIN', 'AS', 'MUCH', 'AS', 'OF', 'ZEAL', 'FOR', 'THE', 'PUBLIC', 'CAUSE', 'ARGYLE', 'COLLECTING', 'THREE', 'THOUSAND', 'MEN', 'MARCHED', 'IN', 'QUEST', 'OF', 'THE', 'ENEMY', 'WHO', 'HAD', 'RETIRED', 'WITH', 'THEIR', 'PLUNDER', 'AND', 'HE', 'LAY', 'AT', 'INNER', 'LOCKI', 'SUPPOSING', 'HIMSELF', 'STILL', 'AT', 'A', 'CONSIDERABLE', 'DISTANCE', 'FROM', 'THEM'] +8224-274381-0010-2290: ref=['BY', 'A', 'QUICK', 'AND', 'UNEXPECTED', 'MARCH', 'MONTROSE', 'HASTENED', 'TO', 'INNERLOCHY', 'AND', 'PRESENTED', 'HIMSELF', 'IN', 'ORDER', 'OF', 'BATTLE', 'BEFORE', 'THE', 'SURPRISED', 'BUT', 'NOT', 'AFFRIGHTENED', 'COVENANTERS'] +8224-274381-0010-2290: hyp=['BY', 'A', 'QUICK', 'AND', 'UNEXPECTED', 'MARCH', 'MONTROSE', 'HASTENED', 'TO', 'INNILOCKI', 'AND', 'PRESENTED', 'HIMSELF', 'IN', 'ORDER', 'OF', 'BATTLE', 'BEFORE', 'THE', 'SURPRISED', 'BUT', 'NOT', 'AFFRIGHTENED', 'COVENANTERS'] +8224-274381-0011-2291: ref=['HIS', 'CONDUCT', 'AND', 'PRESENCE', 'OF', 'MIND', 'IN', 'THIS', 'EMERGENCE', 'APPEARED', 'CONSPICUOUS'] +8224-274381-0011-2291: hyp=['HIS', 'CONDUCT', 'AND', 'PRESENCE', 'OF', 'MIND', 'IN', 'THIS', 'EMERGENCE', 'APPEARED', 'CONSPICUOUS'] +8224-274381-0012-2292: ref=['MONTROSE', 'WEAK', 'IN', 'CAVALRY', 'HERE', 'LINED', 'HIS', 'TROOPS', 'OF', 'HORSE', 'WITH', 'INFANTRY', 'AND', 'AFTER', 'PUTTING', 'THE', "ENEMY'S", 'HORSE', 'TO', 'ROUT', 'FELL', 'WITH', 'UNITED', 'FORCE', 'UPON', 'THEIR', 'FOOT', 'WHO', 'WERE', 'ENTIRELY', 'CUT', 'IN', 'PIECES', 'THOUGH', 'WITH', 'THE', 'LOSS', 'OF', 'THE', 'GALLANT', 'LORD', 'GORDON', 'ON', 'THE', 'PART', 'OF', 'THE', 'ROYALISTS'] +8224-274381-0012-2292: hyp=['MONTROSE', 'WEAK', 'IN', 'CAVALRY', 'HERE', 'LINED', 'HIS', 'TROOPS', 'OF', 'HORSE', 'WITH', 'INFANTRY', 'AND', 'AFTER', 'PUTTING', 'THE', "ENEMY'S", 'HORSE', 'TO', 'ROUT', 'FELL', 'WITH', 'UNITED', 'FORCE', 'UPON', 'THEIR', 'FOOT', 'WHO', 'WERE', 'ENTIRELY', 'CUT', 'IN', 'PIECES', 'THOUGH', 'WITH', 'THE', 'LOSS', 'OF', 'THE', 'GALLANT', 'LORD', 'GORDON', 'ON', 'THE', 'PART', 'OF', 'THE', 'ROYALISTS'] +8224-274381-0013-2293: ref=['FROM', 'THE', 'SAME', 'MEN', 'NEW', 'REGIMENTS', 'AND', 'NEW', 'COMPANIES', 'WERE', 'FORMED', 'DIFFERENT', 'OFFICERS', 'APPOINTED', 'AND', 'THE', 'WHOLE', 'MILITARY', 'FORCE', 'PUT', 'INTO', 'SUCH', 'HANDS', 'AS', 'THE', 'INDEPENDENTS', 'COULD', 'RELY', 'ON'] +8224-274381-0013-2293: hyp=['FROM', 'THE', 'SAME', 'MEN', 'NEW', 'REGIMENTS', 'AND', 'NEW', 'COMPANIES', 'WERE', 'FORMED', 'DIFFERENT', 'OFFICERS', 'APPOINTED', 'AND', 'THE', 'WHOLE', 'MILITARY', 'FORCE', 'PUT', 'INTO', 'SUCH', 'HANDS', 'AS', 'THE', 'INDEPENDENTS', 'COULD', 'RELY', 'ON'] +8224-274381-0014-2294: ref=['BESIDES', 'MEMBERS', 'OF', 'PARLIAMENT', 'WHO', 'WERE', 'EXCLUDED', 'MANY', 'OFFICERS', 'UNWILLING', 'TO', 'SERVE', 'UNDER', 'THE', 'NEW', 'GENERALS', 'THREW', 'UP', 'THEIR', 'COMMISSIONS', 'AND', 'UNWARILY', 'FACILITATED', 'THE', 'PROJECT', 'OF', 'PUTTING', 'THE', 'ARMY', 'ENTIRELY', 'INTO', 'THE', 'HANDS', 'OF', 'THAT', 'FACTION'] +8224-274381-0014-2294: hyp=['BESIDES', 'MEMBERS', 'OF', 'PARLIAMENT', 'WHO', 'WERE', 'EXCLUDED', 'MANY', 'OFFICERS', 'UNWILLING', 'TO', 'SERVE', 'UNDER', 'THE', 'NEW', 'GENERALS', 'THREW', 'UP', 'THEIR', 'COMMISSIONS', 'AND', 'AND', 'WARILY', 'FACILITATED', 'THE', 'PROJECT', 'OF', 'PUTTING', 'THE', 'ARMY', 'ENTIRELY', 'INTO', 'THE', 'HANDS', 'OF', 'THAT', 'FACTION'] +8224-274381-0015-2295: ref=['THOUGH', 'THE', 'DISCIPLINE', 'OF', 'THE', 'FORMER', 'PARLIAMENTARY', 'ARMY', 'WAS', 'NOT', 'CONTEMPTIBLE', 'A', 'MORE', 'EXACT', 'PLAN', 'WAS', 'INTRODUCED', 'AND', 'RIGOROUSLY', 'EXECUTED', 'BY', 'THESE', 'NEW', 'COMMANDERS'] +8224-274381-0015-2295: hyp=['THOUGH', 'THE', 'DISCIPLINE', 'OF', 'THE', 'FORMER', 'PARLIAMENTARY', 'ARMY', 'WAS', 'NOT', 'CONTEMPTIBLE', 'A', 'MORE', 'EXACT', 'PLAN', 'WAS', 'INTRODUCED', 'AND', 'RIGOROUSLY', 'EXECUTED', 'BY', 'THESE', 'NEW', 'COMMANDERS'] +8224-274381-0016-2296: ref=['VALOR', 'INDEED', 'WAS', 'VERY', 'GENERALLY', 'DIFFUSED', 'OVER', 'THE', 'ONE', 'PARTY', 'AS', 'WELL', 'AS', 'THE', 'OTHER', 'DURING', 'THIS', 'PERIOD', 'DISCIPLINE', 'ALSO', 'WAS', 'ATTAINED', 'BY', 'THE', 'FORCES', 'OF', 'THE', 'PARLIAMENT', 'BUT', 'THE', 'PERFECTION', 'OF', 'THE', 'MILITARY', 'ART', 'IN', 'CONCERTING', 'THE', 'GENERAL', 'PLANS', 'OF', 'ACTION', 'AND', 'THE', 'OPERATIONS', 'OF', 'THE', 'FIELD', 'SEEMS', 'STILL', 'ON', 'BOTH', 'SIDES', 'TO', 'HAVE', 'BEEN', 'IN', 'A', 'GREAT', 'MEASURE', 'WANTING'] +8224-274381-0016-2296: hyp=['VALOR', 'INDEED', 'WAS', 'VERY', 'GENERALLY', 'DIFFUSED', 'OVER', 'THE', 'ONE', 'PARTY', 'AS', 'WELL', 'AS', 'THE', 'OTHER', 'DURING', 'THIS', 'PERIOD', 'DISCIPLINE', 'ALSO', 'WAS', 'ATTAINED', 'BY', 'THE', 'FORCES', 'OF', 'THE', 'PARLIAMENT', 'BUT', 'THE', 'PERFECTION', 'OF', 'THE', 'MILITARY', 'ART', 'IN', 'CONCERTING', 'THE', 'GENERAL', 'PLANS', 'OF', 'ACTION', 'AND', 'THE', 'OPERATIONS', 'OF', 'THE', 'FIELD', 'SEEMS', 'STILL', 'ON', 'BOTH', 'SIDES', 'TO', 'HAVE', 'BEEN', 'IN', 'A', 'GREAT', 'MEASURE', 'WANTING'] +8224-274381-0017-2297: ref=['HISTORIANS', 'AT', 'LEAST', 'PERHAPS', 'FROM', 'THEIR', 'OWN', 'IGNORANCE', 'AND', 'INEXPERIENCE', 'HAVE', 'NOT', 'REMARKED', 'ANY', 'THING', 'BUT', 'A', 'HEADLONG', 'IMPETUOUS', 'CONDUCT', 'EACH', 'PARTY', 'HURRYING', 'TO', 'A', 'BATTLE', 'WHERE', 'VALOR', 'AND', 'FORTUNE', 'CHIEFLY', 'DETERMINED', 'THE', 'SUCCESS'] +8224-274381-0017-2297: hyp=['HISTORIANS', 'AT', 'LEAST', 'PERHAPS', 'FROM', 'THEIR', 'OWN', 'IGNORANCE', 'AND', 'INEXPERIENCE', 'HAVE', 'NOT', 'REMARKED', 'ANY', 'THING', 'BUT', 'A', 'HEADLONG', 'IMPETUOUS', 'CONDUCT', 'EACH', 'PARTY', 'HURRYING', 'TO', 'A', 'BATTLE', 'WHERE', 'VALOR', 'AND', 'FORTUNE', 'CHIEFLY', 'DETERMINE', 'THE', 'SUCCESS'] +8224-274384-0000-2298: ref=['HE', 'PASSED', 'THROUGH', 'HENLEY', 'SAINT', 'ALBANS', 'AND', 'CAME', 'SO', 'NEAR', 'TO', 'LONDON', 'AS', 'HARROW', 'ON', 'THE', 'HILL'] +8224-274384-0000-2298: hyp=['HE', 'PASSED', 'THROUGH', 'HENLEY', 'SAINT', 'ALBANS', 'AND', 'CAME', 'SO', 'NEAR', 'TO', 'LONDON', 'AS', 'HARROW', 'ON', 'THE', 'HILL'] +8224-274384-0001-2299: ref=['THE', 'SCOTTISH', 'GENERALS', 'AND', 'COMMISSIONERS', 'AFFECTED', 'GREAT', 'SURPRISE', 'ON', 'THE', 'APPEARANCE', 'OF', 'THE', 'KING', 'AND', 'THOUGH', 'THEY', 'PAID', 'HIM', 'ALL', 'THE', 'EXTERIOR', 'RESPECT', 'DUE', 'TO', 'HIS', 'DIGNITY', 'THEY', 'INSTANTLY', 'SET', 'A', 'GUARD', 'UPON', 'HIM', 'UNDER', 'COLOR', 'OF', 'PROTECTION', 'AND', 'MADE', 'HIM', 'IN', 'REALITY', 'A', 'PRISONER'] +8224-274384-0001-2299: hyp=['THE', 'SCOTTISH', 'GENERALS', 'AND', 'COMMISSIONERS', 'AFFECTED', 'GREAT', 'SURPRISE', 'ON', 'THE', 'APPEARANCE', 'OF', 'THE', 'KING', 'AND', 'THOUGH', 'THEY', 'PAID', 'HIM', 'ALL', 'THE', 'EXTERIOR', 'RESPECT', 'DUE', 'TO', 'HIS', 'DIGNITY', 'THEY', 'INSTANTLY', 'SET', 'A', 'GUARD', 'UPON', 'HIM', 'UNDER', 'COLOR', 'OF', 'PROTECTION', 'AND', 'MADE', 'HIM', 'IN', 'REALITY', 'A', 'PRISONER'] +8224-274384-0002-2300: ref=['THEY', 'INFORMED', 'THE', 'ENGLISH', 'PARLIAMENT', 'OF', 'THIS', 'UNEXPECTED', 'INCIDENT', 'AND', 'ASSURED', 'THEM', 'THAT', 'THEY', 'HAD', 'ENTERED', 'INTO', 'NO', 'PRIVATE', 'TREATY', 'WITH', 'THE', 'KING'] +8224-274384-0002-2300: hyp=['THEY', 'INFORMED', 'THE', 'ENGLISH', 'PARLIAMENT', 'OF', 'THIS', 'UNEXPECTED', 'INCIDENT', 'AND', 'ASSURED', 'THEM', 'THAT', 'THEY', 'HAD', 'ENTERED', 'INTO', 'NO', 'PRIVATE', 'TREATY', 'WITH', 'THE', 'KING'] +8224-274384-0003-2301: ref=['OR', 'HATH', 'HE', 'GIVEN', 'US', 'ANY', 'GIFT'] +8224-274384-0003-2301: hyp=['OR', 'HATH', 'HE', 'GIVEN', 'US', 'ANY', 'GIFT'] +8224-274384-0004-2302: ref=['AND', 'THE', 'MEN', 'OF', 'ISRAEL', 'ANSWERED', 'THE', 'MEN', 'OF', 'JUDAH', 'AND', 'SAID', 'WE', 'HAVE', 'TEN', 'PARTS', 'IN', 'THE', 'KING', 'AND', 'WE', 'HAVE', 'ALSO', 'MORE', 'RIGHT', 'IN', 'DAVID', 'THAN', 'YE', 'WHY', 'THEN', 'DID', 'YE', 'DESPISE', 'US', 'THAT', 'OUR', 'ADVICE', 'SHOULD', 'NOT', 'BE', 'FIRST', 'HAD', 'IN', 'BRINGING', 'BACK', 'OUR', 'KING'] +8224-274384-0004-2302: hyp=['AND', 'THE', 'MEN', 'OF', 'ISRAEL', 'ANSWERED', 'THE', 'MEN', 'OF', 'JUDAH', 'AND', 'SAID', 'WE', 'HAVE', 'TEN', 'PARTS', 'IN', 'THE', 'KING', 'AND', 'WE', 'HAVE', 'ALSO', 'MORE', 'RIGHT', 'IN', 'DAVID', 'THAN', 'YE', 'WHY', 'THEN', 'DID', 'YE', 'DESPISE', 'US', 'THAT', 'OUR', 'ADVICE', 'SHOULD', 'NOT', 'BE', 'FIRST', 'HAD', 'IN', 'BRINGING', 'BACK', 'OUR', 'KING'] +8224-274384-0005-2303: ref=['ANOTHER', 'PREACHER', 'AFTER', 'REPROACHING', 'HIM', 'TO', 'HIS', 'FACE', 'WITH', 'HIS', 'MISGOVERNMENT', 'ORDERED', 'THIS', 'PSALM', 'TO', 'BE', 'SUNG'] +8224-274384-0005-2303: hyp=['ANOTHER', 'PREACHER', 'AFTER', 'REPROACHING', 'HIM', 'TO', 'HIS', 'FACE', 'WITH', 'HIS', 'MISGOVERNMENT', 'ORDERED', 'THIS', 'PSALM', 'TO', 'BE', 'SUNG'] +8224-274384-0006-2304: ref=['THE', 'KING', 'STOOD', 'UP', 'AND', 'CALLED', 'FOR', 'THAT', 'PSALM', 'WHICH', 'BEGINS', 'WITH', 'THESE', 'WORDS'] +8224-274384-0006-2304: hyp=['THE', 'KING', 'STOOD', 'UP', 'AND', 'CALLED', 'FOR', 'THAT', 'PSALM', 'WHICH', 'BEGINS', 'WITH', 'THESE', 'WORDS'] +8224-274384-0007-2305: ref=['HAVE', 'MERCY', 'LORD', 'ON', 'ME', 'I', 'PRAY', 'FOR', 'MEN', 'WOULD', 'ME', 'DEVOUR'] +8224-274384-0007-2305: hyp=['HAVE', 'MERCY', 'LORD', 'ON', 'ME', 'I', 'PRAY', 'FOR', 'MEN', 'WOULD', 'ME', 'DEVOUR'] +8224-274384-0008-2306: ref=['THE', 'GOOD', 'NATURED', 'AUDIENCE', 'IN', 'PITY', 'TO', 'FALLEN', 'MAJESTY', 'SHOWED', 'FOR', 'ONCE', 'GREATER', 'DEFERENCE', 'TO', 'THE', 'KING', 'THAN', 'TO', 'THE', 'MINISTER', 'AND', 'SUNG', 'THE', 'PSALM', 'WHICH', 'THE', 'FORMER', 'HAD', 'CALLED', 'FOR'] +8224-274384-0008-2306: hyp=['THE', 'GOOD', 'NATURED', 'AUDIENCE', 'IN', 'PITY', 'TO', 'FALLEN', 'MAJESTY', 'SHOWED', 'FOR', 'ONCE', 'GREATER', 'DEFERENCE', 'TO', 'THE', 'KING', 'THAN', 'TO', 'THE', 'MINISTER', 'AND', 'SUNG', 'THE', 'PSALM', 'WHICH', 'THE', 'FORMER', 'HAD', 'CALLED', 'FOR'] +8224-274384-0009-2307: ref=['THE', 'PARLIAMENT', 'AND', 'THE', 'SCOTS', 'LAID', 'THEIR', 'PROPOSALS', 'BEFORE', 'THE', 'KING'] +8224-274384-0009-2307: hyp=['THE', 'PARLIAMENT', 'AND', 'THE', 'SCOTS', 'LAID', 'THEIR', 'PROPOSALS', 'BEFORE', 'THE', 'KING'] +8224-274384-0010-2308: ref=['BEFORE', 'THE', 'SETTLEMENT', 'OF', 'TERMS', 'THE', 'ADMINISTRATION', 'MUST', 'BE', 'POSSESSED', 'ENTIRELY', 'BY', 'THE', 'PARLIAMENTS', 'OF', 'BOTH', 'KINGDOMS', 'AND', 'HOW', 'INCOMPATIBLE', 'THAT', 'SCHEME', 'WITH', 'THE', 'LIBERTY', 'OF', 'THE', 'KING', 'IS', 'EASILY', 'IMAGINED'] +8224-274384-0010-2308: hyp=['BEFORE', 'THE', 'SETTLEMENT', 'OF', 'TERMS', 'THE', 'ADMINISTRATION', 'MUST', 'BE', 'POSSESSED', 'ENTIRELY', 'BY', 'THE', 'PARLIAMENTS', 'OF', 'BOTH', 'KINGDOMS', 'AND', 'HOW', 'INCOMPATIBLE', 'THAT', 'SCHEME', 'WITH', 'THE', 'LIBERTY', 'OF', 'THE', 'KING', 'IS', 'EASILY', 'IMAGINED'] +8224-274384-0011-2309: ref=['THE', 'ENGLISH', 'IT', 'IS', 'EVIDENT', 'HAD', 'THEY', 'NOT', 'BEEN', 'PREVIOUSLY', 'ASSURED', 'OF', 'RECEIVING', 'THE', 'KING', 'WOULD', 'NEVER', 'HAVE', 'PARTED', 'WITH', 'SO', 'CONSIDERABLE', 'A', 'SUM', 'AND', 'WHILE', 'THEY', 'WEAKENED', 'THEMSELVES', 'BY', 'THE', 'SAME', 'MEASURE', 'HAVE', 'STRENGTHENED', 'A', 'PEOPLE', 'WITH', 'WHOM', 'THEY', 'MUST', 'AFTERWARDS', 'HAVE', 'SO', 'MATERIAL', 'AN', 'INTEREST', 'TO', 'DISCUSS'] +8224-274384-0011-2309: hyp=['THE', 'ENGLISH', 'IT', 'IS', 'EVIDENT', 'HAD', 'THEY', 'NOT', 'BEEN', 'PREVIOUSLY', 'ASSURED', 'OF', 'RECEIVING', 'THE', 'KING', 'WOULD', 'NEVER', 'HAVE', 'PARTED', 'WITH', 'SO', 'CONSIDERABLE', 'A', 'SUM', 'AND', 'WHILE', 'THEY', 'WEAKENED', 'THEMSELVES', 'BY', 'THE', 'SAME', 'MEASURE', 'HAVE', 'STRENGTHENED', 'A', 'PEOPLE', 'WITH', 'WHOM', 'THEY', 'MUST', 'AFTERWARDS', 'HAVE', 'SO', 'MATERIAL', 'AN', 'INTEREST', 'TO', 'DISCUSS'] +8224-274384-0012-2310: ref=['IF', 'ANY', 'STILL', 'RETAINED', 'RANCOR', 'AGAINST', 'HIM', 'IN', 'HIS', 'PRESENT', 'CONDITION', 'THEY', 'PASSED', 'IN', 'SILENCE', 'WHILE', 'HIS', 'WELL', 'WISHERS', 'MORE', 'GENEROUS', 'THAN', 'PRUDENT', 'ACCOMPANIED', 'HIS', 'MARCH', 'WITH', 'TEARS', 'WITH', 'ACCLAMATIONS', 'AND', 'WITH', 'PRAYERS', 'FOR', 'HIS', 'SAFETY'] +8224-274384-0012-2310: hyp=['IF', 'ANY', 'STILL', 'RETAINED', 'RANCOUR', 'AGAINST', 'HIM', 'IN', 'HIS', 'PRESENT', 'CONDITION', 'THEY', 'PASSED', 'IN', 'SILENCE', 'WHILE', 'HIS', 'WELL', 'WISHERS', 'MORE', 'GENEROUS', 'THAN', 'PRUDENT', 'ACCOMPANIED', 'HIS', 'MARCH', 'WITH', 'TEARS', 'WITH', 'ACCLAMATIONS', 'AND', 'WITH', 'PRAYERS', 'FOR', 'HIS', 'SAFETY'] +8224-274384-0013-2311: ref=['HIS', 'DEATH', 'IN', 'THIS', 'CONJUNCTURE', 'WAS', 'A', 'PUBLIC', 'MISFORTUNE'] +8224-274384-0013-2311: hyp=['HIS', 'DEATH', 'IN', 'THIS', 'CONJUNCTURE', 'WAS', 'A', 'PUBLIC', 'MISFORTUNE'] +8230-279154-0000-2312: ref=['THE', 'ANALYSIS', 'OF', 'KNOWLEDGE', 'WILL', 'OCCUPY', 'US', 'UNTIL', 'THE', 'END', 'OF', 'THE', 'THIRTEENTH', 'LECTURE', 'AND', 'IS', 'THE', 'MOST', 'DIFFICULT', 'PART', 'OF', 'OUR', 'WHOLE', 'ENTERPRISE'] +8230-279154-0000-2312: hyp=['THE', 'ANALYSIS', 'OF', 'KNOWLEDGE', 'WILL', 'OCCUPY', 'US', 'UNTIL', 'THE', 'END', 'OF', 'THE', 'THIRTEENTH', 'LECTURE', 'AND', 'IS', 'THE', 'MOST', 'DIFFICULT', 'PART', 'OF', 'OUR', 'WHOLE', 'ENTERPRISE'] +8230-279154-0001-2313: ref=['WHAT', 'IS', 'CALLED', 'PERCEPTION', 'DIFFERS', 'FROM', 'SENSATION', 'BY', 'THE', 'FACT', 'THAT', 'THE', 'SENSATIONAL', 'INGREDIENTS', 'BRING', 'UP', 'HABITUAL', 'ASSOCIATES', 'IMAGES', 'AND', 'EXPECTATIONS', 'OF', 'THEIR', 'USUAL', 'CORRELATES', 'ALL', 'OF', 'WHICH', 'ARE', 'SUBJECTIVELY', 'INDISTINGUISHABLE', 'FROM', 'THE', 'SENSATION'] +8230-279154-0001-2313: hyp=['WHAT', 'IS', 'CALLED', 'PERCEPTION', 'DIFFERS', 'FROM', 'SENSATION', 'BY', 'THE', 'FACT', 'THAT', 'THE', 'SENSATIONAL', 'INGREDIENTS', 'BRING', 'UP', 'HABITUAL', 'ASSOCIATES', 'IMAGES', 'AND', 'EXPECTATIONS', 'OF', 'THEIR', 'USUAL', 'CORELETS', 'ALL', 'OF', 'WHICH', 'ARE', 'SUBJECTIVELY', 'INDISTINGUISHABLE', 'FROM', 'THE', 'SENSATION'] +8230-279154-0002-2314: ref=['WHETHER', 'OR', 'NOT', 'THIS', 'PRINCIPLE', 'IS', 'LIABLE', 'TO', 'EXCEPTIONS', 'EVERYONE', 'WOULD', 'AGREE', 'THAT', 'IS', 'HAS', 'A', 'BROAD', 'MEASURE', 'OF', 'TRUTH', 'THOUGH', 'THE', 'WORD', 'EXACTLY', 'MIGHT', 'SEEM', 'AN', 'OVERSTATEMENT', 'AND', 'IT', 'MIGHT', 'SEEM', 'MORE', 'CORRECT', 'TO', 'SAY', 'THAT', 'IDEAS', 'APPROXIMATELY', 'REPRESENT', 'IMPRESSIONS'] +8230-279154-0002-2314: hyp=['WHETHER', 'OR', 'NOT', 'THIS', 'PRINCIPLE', 'IS', 'LIABLE', 'TO', 'EXCEPTIONS', 'EVERY', 'ONE', 'WOULD', 'AGREE', 'THAT', 'IT', 'HAS', 'A', 'BROAD', 'MEASURE', 'OF', 'TRUTH', 'THOUGH', 'THE', 'WORD', 'EXACTLY', 'MIGHT', 'SEEM', 'AN', 'OVERSTATEMENT', 'AND', 'IT', 'MIGHT', 'SEEM', 'MORE', 'CORRECT', 'TO', 'SAY', 'THAT', 'IDEAS', 'APPROXIMATELY', 'REPRESENT', 'IMPRESSIONS'] +8230-279154-0003-2315: ref=['AND', 'WHAT', 'SORT', 'OF', 'EVIDENCE', 'IS', 'LOGICALLY', 'POSSIBLE'] +8230-279154-0003-2315: hyp=['AND', 'WHAT', 'SORT', 'OF', 'EVIDENCE', 'IS', 'LOGICALLY', 'POSSIBLE'] +8230-279154-0004-2316: ref=['THERE', 'IS', 'NO', 'LOGICAL', 'IMPOSSIBILITY', 'IN', 'THE', 'HYPOTHESIS', 'THAT', 'THE', 'WORLD', 'SPRANG', 'INTO', 'BEING', 'FIVE', 'MINUTES', 'AGO', 'EXACTLY', 'AS', 'IT', 'THEN', 'WAS', 'WITH', 'A', 'POPULATION', 'THAT', 'REMEMBERED', 'A', 'WHOLLY', 'UNREAL', 'PAST'] +8230-279154-0004-2316: hyp=['THERE', 'IS', 'NO', 'LOGICAL', 'IMPOSSIBILITY', 'IN', 'THE', 'HYPOTHESIS', 'THAT', 'THE', 'WORLD', 'SPRANG', 'INTO', 'BEING', 'FIVE', 'MINUTES', 'AGO', 'EXACTLY', 'AS', 'IT', 'THEN', 'WAS', 'WITH', 'A', 'POPULATION', 'THAT', 'REMEMBERED', 'A', 'WHOLLY', 'UNREAL', 'PAST'] +8230-279154-0005-2317: ref=['ALL', 'THAT', 'I', 'AM', 'DOING', 'IS', 'TO', 'USE', 'ITS', 'LOGICAL', 'TENABILITY', 'AS', 'A', 'HELP', 'IN', 'THE', 'ANALYSIS', 'OF', 'WHAT', 'OCCURS', 'WHEN', 'WE', 'REMEMBER'] +8230-279154-0005-2317: hyp=['ALL', 'THAT', 'I', 'AM', 'DOING', 'IS', 'TO', 'USE', 'ITS', 'LOGICAL', 'TENABILITY', 'AS', 'A', 'HELP', 'IN', 'THE', 'ANALYSIS', 'OF', 'WHAT', 'OCCURS', 'WHEN', 'WE', 'REMEMBER'] +8230-279154-0006-2318: ref=['THE', 'BEHAVIOURIST', 'WHO', 'ATTEMPTS', 'TO', 'MAKE', 'PSYCHOLOGY', 'A', 'RECORD', 'OF', 'BEHAVIOUR', 'HAS', 'TO', 'TRUST', 'HIS', 'MEMORY', 'IN', 'MAKING', 'THE', 'RECORD'] +8230-279154-0006-2318: hyp=['THE', 'BEHAVIORIST', 'WHO', 'ATTEMPTS', 'TO', 'MAKE', 'PSYCHOLOGY', 'A', 'RECORD', 'OF', 'BEHAVIOR', 'HAS', 'TO', 'TRUST', 'HIS', 'MEMORY', 'IN', 'MAKING', 'THE', 'RECORD'] +8230-279154-0007-2319: ref=['HABIT', 'IS', 'A', 'CONCEPT', 'INVOLVING', 'THE', 'OCCURRENCE', 'OF', 'SIMILAR', 'EVENTS', 'AT', 'DIFFERENT', 'TIMES', 'IF', 'THE', 'BEHAVIOURIST', 'FEELS', 'CONFIDENT', 'THAT', 'THERE', 'IS', 'SUCH', 'A', 'PHENOMENON', 'AS', 'HABIT', 'THAT', 'CAN', 'ONLY', 'BE', 'BECAUSE', 'HE', 'TRUSTS', 'HIS', 'MEMORY', 'WHEN', 'IT', 'ASSURES', 'HIM', 'THAT', 'THERE', 'HAVE', 'BEEN', 'OTHER', 'TIMES'] +8230-279154-0007-2319: hyp=['HABIT', 'IS', 'A', 'CONCEPT', 'INVOLVING', 'THE', 'OCCURRENCE', 'OF', 'SIMILAR', 'EVENTS', 'AT', 'DIFFERENT', 'TIMES', 'IF', 'THE', 'BEHAVIORIST', 'FEELS', 'CONFIDENT', 'THAT', 'THERE', 'IS', 'SUCH', 'A', 'PHENOMENON', 'AS', 'HABIT', 'THAT', 'CAN', 'ONLY', 'BE', 'BECAUSE', 'HE', 'TRUSTS', 'HIS', 'MEMORY', 'WHEN', 'IT', 'ASSURES', 'HIM', 'THAT', 'THERE', 'HAVE', 'BEEN', 'OTHER', 'TIMES'] +8230-279154-0008-2320: ref=['BUT', 'I', 'DO', 'NOT', 'THINK', 'SUCH', 'AN', 'INFERENCE', 'IS', 'WARRANTED'] +8230-279154-0008-2320: hyp=['BUT', 'I', 'DO', 'NOT', 'THINK', 'SUCH', 'AN', 'INFERENCE', 'IS', 'WARRANTED'] +8230-279154-0009-2321: ref=['OUR', 'CONFIDENCE', 'OR', 'LACK', 'OF', 'CONFIDENCE', 'IN', 'THE', 'ACCURACY', 'OF', 'A', 'MEMORY', 'IMAGE', 'MUST', 'IN', 'FUNDAMENTAL', 'CASES', 'BE', 'BASED', 'UPON', 'A', 'CHARACTERISTIC', 'OF', 'THE', 'IMAGE', 'ITSELF', 'SINCE', 'WE', 'CANNOT', 'EVOKE', 'THE', 'PAST', 'BODILY', 'AND', 'COMPARE', 'IT', 'WITH', 'THE', 'PRESENT', 'IMAGE'] +8230-279154-0009-2321: hyp=['OUR', 'CONFIDENCE', 'OR', 'LACK', 'OF', 'CONFIDENCE', 'IN', 'THE', 'ACCURACY', 'OF', 'A', 'MEMORY', 'IMAGE', 'MUST', 'IN', 'FUNDAMENTAL', 'CASES', 'BE', 'BASED', 'UPON', 'A', 'CHARACTERISTIC', 'OF', 'THE', 'IMAGE', 'ITSELF', 'SINCE', 'WE', 'CANNOT', 'EVOKE', 'THE', 'PAST', 'BODILY', 'AND', 'COMPARE', 'IT', 'WITH', 'THE', 'PRESENT', 'IMAGE'] +8230-279154-0010-2322: ref=['WE', 'SOMETIMES', 'HAVE', 'IMAGES', 'THAT', 'ARE', 'BY', 'NO', 'MEANS', 'PECULIARLY', 'VAGUE', 'WHICH', 'YET', 'WE', 'DO', 'NOT', 'TRUST', 'FOR', 'EXAMPLE', 'UNDER', 'THE', 'INFLUENCE', 'OF', 'FATIGUE', 'WE', 'MAY', 'SEE', 'A', "FRIEND'S", 'FACE', 'VIVIDLY', 'AND', 'CLEARLY', 'BUT', 'HORRIBLY', 'DISTORTED'] +8230-279154-0010-2322: hyp=['WE', 'SOMETIMES', 'HAVE', 'IMAGES', 'THAT', 'ARE', 'BY', 'NO', 'MEANS', 'PECULIARLY', 'VAGUE', 'WHICH', 'YET', 'WE', 'DO', 'NOT', 'TRUST', 'FOR', 'EXAMPLE', 'UNDER', 'THE', 'INFLUENCE', 'OF', 'FATIGUE', 'WE', 'MAY', 'SEE', 'A', "FRIEND'S", 'FACE', 'VIVIDLY', 'AND', 'CLEARLY', 'BUT', 'HORRIBLY', 'DISTORTED'] +8230-279154-0011-2323: ref=['SOME', 'IMAGES', 'LIKE', 'SOME', 'SENSATIONS', 'FEEL', 'VERY', 'FAMILIAR', 'WHILE', 'OTHERS', 'FEEL', 'STRANGE'] +8230-279154-0011-2323: hyp=['SOME', 'IMAGES', 'LIKE', 'SOME', 'SENSATIONS', 'FEEL', 'VERY', 'FAMILIAR', 'WHILE', 'OTHERS', 'FEEL', 'STRANGE'] +8230-279154-0012-2324: ref=['FAMILIARITY', 'IS', 'A', 'FEELING', 'CAPABLE', 'OF', 'DEGREES'] +8230-279154-0012-2324: hyp=['FAMILIARITY', 'IS', 'A', 'FEELING', 'CAPABLE', 'OF', 'DEGREES'] +8230-279154-0013-2325: ref=['IN', 'AN', 'IMAGE', 'OF', 'A', 'WELL', 'KNOWN', 'FACE', 'FOR', 'EXAMPLE', 'SOME', 'PARTS', 'MAY', 'FEEL', 'MORE', 'FAMILIAR', 'THAN', 'OTHERS', 'WHEN', 'THIS', 'HAPPENS', 'WE', 'HAVE', 'MORE', 'BELIEF', 'IN', 'THE', 'ACCURACY', 'OF', 'THE', 'FAMILIAR', 'PARTS', 'THAN', 'IN', 'THAT', 'OF', 'THE', 'UNFAMILIAR', 'PARTS'] +8230-279154-0013-2325: hyp=['IN', 'AN', 'IMAGE', 'OF', 'A', 'WELL', 'KNOWN', 'FACE', 'FOR', 'EXAMPLE', 'SOME', 'PARTS', 'MAY', 'FEEL', 'MORE', 'FAMILIAR', 'THAN', 'OTHERS', 'WHEN', 'THIS', 'HAPPENS', 'WE', 'HAVE', 'MORE', 'BELIEF', 'IN', 'THE', 'ACCURACY', 'OF', 'THE', 'FAMILIAR', 'PARTS', 'THAN', 'IN', 'THAT', 'OF', 'THE', 'UNFAMILIAR', 'PARTS'] +8230-279154-0014-2326: ref=['I', 'COME', 'NOW', 'TO', 'THE', 'OTHER', 'CHARACTERISTIC', 'WHICH', 'MEMORY', 'IMAGES', 'MUST', 'HAVE', 'IN', 'ORDER', 'TO', 'ACCOUNT', 'FOR', 'OUR', 'KNOWLEDGE', 'OF', 'THE', 'PAST'] +8230-279154-0014-2326: hyp=['I', 'COME', 'NOW', 'TO', 'THE', 'OTHER', 'CHARACTERISTIC', 'WHICH', 'MEMORY', 'IMAGES', 'MUST', 'HAVE', 'IN', 'ORDER', 'TO', 'ACCOUNT', 'FOR', 'OUR', 'KNOWLEDGE', 'OF', 'THE', 'PAST'] +8230-279154-0015-2327: ref=['THEY', 'MUST', 'HAVE', 'SOME', 'CHARACTERISTIC', 'WHICH', 'MAKES', 'US', 'REGARD', 'THEM', 'AS', 'REFERRING', 'TO', 'MORE', 'OR', 'LESS', 'REMOTE', 'PORTIONS', 'OF', 'THE', 'PAST'] +8230-279154-0015-2327: hyp=['THEY', 'MUST', 'HAVE', 'SOME', 'CHARACTERISTIC', 'WHICH', 'MAKES', 'US', 'REGARD', 'THEM', 'AS', 'REFERRING', 'TO', 'MORE', 'OR', 'LESS', 'REMOTE', 'PORTIONS', 'OF', 'THE', 'PAST'] +8230-279154-0016-2328: ref=['IN', 'ACTUAL', 'FACT', 'THERE', 'ARE', 'DOUBTLESS', 'VARIOUS', 'FACTORS', 'THAT', 'CONCUR', 'IN', 'GIVING', 'US', 'THE', 'FEELING', 'OF', 'GREATER', 'OR', 'LESS', 'REMOTENESS', 'IN', 'SOME', 'REMEMBERED', 'EVENT'] +8230-279154-0016-2328: hyp=['IN', 'ACTUAL', 'FACT', 'THERE', 'ARE', 'DOUBTLESS', 'VARIOUS', 'FACTORS', 'THAT', 'CONCUR', 'IN', 'GIVING', 'US', 'THE', 'FEELING', 'OF', 'GREATER', 'OR', 'LESS', 'REMOTENESS', 'IN', 'SOME', 'REMEMBERED', 'EVENT'] +8230-279154-0017-2329: ref=['THERE', 'MAY', 'BE', 'A', 'SPECIFIC', 'FEELING', 'WHICH', 'COULD', 'BE', 'CALLED', 'THE', 'FEELING', 'OF', 'PASTNESS', 'ESPECIALLY', 'WHERE', 'IMMEDIATE', 'MEMORY', 'IS', 'CONCERNED'] +8230-279154-0017-2329: hyp=['THERE', 'MAY', 'BE', 'A', 'SPECIFIC', 'FEELING', 'WHICH', 'COULD', 'BE', 'CALLED', 'THE', 'FEELING', 'OF', 'PASTNESS', 'ESPECIALLY', 'WHERE', 'IMMEDIATE', 'MEMORY', 'IS', 'CONCERNED'] +8230-279154-0018-2330: ref=['THERE', 'IS', 'OF', 'COURSE', 'A', 'DIFFERENCE', 'BETWEEN', 'KNOWING', 'THE', 'TEMPORAL', 'RELATION', 'OF', 'A', 'REMEMBERED', 'EVENT', 'TO', 'THE', 'PRESENT', 'AND', 'KNOWING', 'THE', 'TIME', 'ORDER', 'OF', 'TWO', 'REMEMBERED', 'EVENTS'] +8230-279154-0018-2330: hyp=['THERE', 'IS', 'OF', 'COURSE', 'A', 'DIFFERENCE', 'BETWEEN', 'KNOWING', 'THE', 'TEMPORAL', 'RELATION', 'OF', 'A', 'REMEMBERED', 'EVENT', 'TO', 'THE', 'PRESENT', 'AND', 'KNOWING', 'THE', 'TIME', 'ORDER', 'OF', 'TWO', 'REMEMBERED', 'EVENTS'] +8230-279154-0019-2331: ref=['IT', 'WOULD', 'SEEM', 'THAT', 'ONLY', 'RATHER', 'RECENT', 'EVENTS', 'CAN', 'BE', 'PLACED', 'AT', 'ALL', 'ACCURATELY', 'BY', 'MEANS', 'OF', 'FEELINGS', 'GIVING', 'THEIR', 'TEMPORAL', 'RELATION', 'TO', 'THE', 'PRESENT', 'BUT', 'IT', 'IS', 'CLEAR', 'THAT', 'SUCH', 'FEELINGS', 'MUST', 'PLAY', 'AN', 'ESSENTIAL', 'PART', 'IN', 'THE', 'PROCESS', 'OF', 'DATING', 'REMEMBERED', 'EVENTS'] +8230-279154-0019-2331: hyp=['IT', 'WOULD', 'SEEM', 'THAT', 'ONLY', 'RATHER', 'RECENT', 'EVENTS', 'CAN', 'BE', 'PLACED', 'AT', 'ALL', 'ACCURATELY', 'BY', 'MEANS', 'OF', 'FEELINGS', 'GIVING', 'THEIR', 'TEMPORAL', 'RELATION', 'TO', 'THE', 'PRESENT', 'BUT', 'IT', 'IS', 'CLEAR', 'THAT', 'SUCH', 'FEELINGS', 'MUST', 'PLAY', 'AN', 'ESSENTIAL', 'PART', 'IN', 'THE', 'PROCESS', 'OF', 'DATING', 'REMEMBERED', 'EVENTS'] +8230-279154-0020-2332: ref=['IF', 'WE', 'HAD', 'RETAINED', 'THE', 'SUBJECT', 'OR', 'ACT', 'IN', 'KNOWLEDGE', 'THE', 'WHOLE', 'PROBLEM', 'OF', 'MEMORY', 'WOULD', 'HAVE', 'BEEN', 'COMPARATIVELY', 'SIMPLE'] +8230-279154-0020-2332: hyp=['IF', 'WE', 'HAD', 'RETAINED', 'THE', 'SUBJECT', 'OR', 'ACT', 'IN', 'KNOWLEDGE', 'THE', 'WHOLE', 'PROBLEM', 'OF', 'MEMORY', 'WOULD', 'HAVE', 'BEEN', 'COMPARATIVELY', 'SIMPLE'] +8230-279154-0021-2333: ref=['REMEMBERING', 'HAS', 'TO', 'BE', 'A', 'PRESENT', 'OCCURRENCE', 'IN', 'SOME', 'WAY', 'RESEMBLING', 'OR', 'RELATED', 'TO', 'WHAT', 'IS', 'REMEMBERED'] +8230-279154-0021-2333: hyp=['REMEMBERING', 'HAS', 'TO', 'BE', 'A', 'PRESENT', 'OCCURRENCE', 'IN', 'SOME', 'WAY', 'RESEMBLING', 'OR', 'RELATED', 'TO', 'WHAT', 'IS', 'REMEMBERED'] +8230-279154-0022-2334: ref=['SOME', 'POINTS', 'MAY', 'BE', 'TAKEN', 'AS', 'FIXED', 'AND', 'SUCH', 'AS', 'ANY', 'THEORY', 'OF', 'MEMORY', 'MUST', 'ARRIVE', 'AT'] +8230-279154-0022-2334: hyp=['SOME', 'POINTS', 'MAY', 'BE', 'TAKEN', 'AS', 'FIXED', 'AND', 'SUCH', 'AS', 'ANY', 'THEORY', 'OF', 'MEMORY', 'MUST', 'ARRIVE', 'AT'] +8230-279154-0023-2335: ref=['IN', 'THIS', 'CASE', 'AS', 'IN', 'MOST', 'OTHERS', 'WHAT', 'MAY', 'BE', 'TAKEN', 'AS', 'CERTAIN', 'IN', 'ADVANCE', 'IS', 'RATHER', 'VAGUE'] +8230-279154-0023-2335: hyp=['IN', 'THIS', 'CASE', 'AS', 'IN', 'MOST', 'OTHERS', 'WHAT', 'MAY', 'BE', 'TAKEN', 'AS', 'CERTAIN', 'IN', 'ADVANCE', 'IS', 'RATHER', 'VAGUE'] +8230-279154-0024-2336: ref=['THE', 'FIRST', 'OF', 'OUR', 'VAGUE', 'BUT', 'INDUBITABLE', 'DATA', 'IS', 'THAT', 'THERE', 'IS', 'KNOWLEDGE', 'OF', 'THE', 'PAST'] +8230-279154-0024-2336: hyp=['THE', 'FIRST', 'OF', 'OUR', 'VAGUE', 'BUT', 'INDUBITABLE', 'DATA', 'IS', 'THAT', 'THERE', 'IS', 'KNOWLEDGE', 'OF', 'THE', 'PAST'] +8230-279154-0025-2337: ref=['WE', 'MIGHT', 'PROVISIONALLY', 'THOUGH', 'PERHAPS', 'NOT', 'QUITE', 'CORRECTLY', 'DEFINE', 'MEMORY', 'AS', 'THAT', 'WAY', 'OF', 'KNOWING', 'ABOUT', 'THE', 'PAST', 'WHICH', 'HAS', 'NO', 'ANALOGUE', 'IN', 'OUR', 'KNOWLEDGE', 'OF', 'THE', 'FUTURE', 'SUCH', 'A', 'DEFINITION', 'WOULD', 'AT', 'LEAST', 'SERVE', 'TO', 'MARK', 'THE', 'PROBLEM', 'WITH', 'WHICH', 'WE', 'ARE', 'CONCERNED', 'THOUGH', 'SOME', 'EXPECTATIONS', 'MAY', 'DESERVE', 'TO', 'RANK', 'WITH', 'MEMORY', 'AS', 'REGARDS', 'IMMEDIACY'] +8230-279154-0025-2337: hyp=['WE', 'MIGHT', 'PROVISIONALLY', 'THOUGH', 'PERHAPS', 'NOT', 'QUITE', 'CORRECTLY', 'DEFINE', 'MEMORY', 'AS', 'THAT', 'WAY', 'OF', 'KNOWING', 'ABOUT', 'THE', 'PAST', 'WHICH', 'HAS', 'NO', 'ANALOGUE', 'IN', 'OUR', 'KNOWLEDGE', 'OF', 'THE', 'FUTURE', 'SUCH', 'A', 'DEFINITION', 'WOULD', 'AT', 'LEAST', 'SERVE', 'TO', 'MARK', 'THE', 'PROBLEM', 'WITH', 'WHICH', 'WE', 'ARE', 'CONCERNED', 'THOUGH', 'SOME', 'EXPECTATIONS', 'MAY', 'DESERVE', 'TO', 'RANK', 'WITH', 'MEMORY', 'AS', 'REGARDS', 'IMMEDIACY'] +8230-279154-0026-2338: ref=['THIS', 'DISTINCTION', 'IS', 'VITAL', 'TO', 'THE', 'UNDERSTANDING', 'OF', 'MEMORY', 'BUT', 'IT', 'IS', 'NOT', 'SO', 'EASY', 'TO', 'CARRY', 'OUT', 'IN', 'PRACTICE', 'AS', 'IT', 'IS', 'TO', 'DRAW', 'IN', 'THEORY'] +8230-279154-0026-2338: hyp=['THIS', 'DISTINCTION', 'IS', 'VITAL', 'TO', 'THE', 'UNDERSTANDING', 'OF', 'MEMORY', 'BUT', 'IT', 'IS', 'NOT', 'SO', 'EASY', 'TO', 'CARRY', 'OUT', 'IN', 'PRACTICE', 'AS', 'IT', 'IS', 'TO', 'DRAW', 'IN', 'THEORY'] +8230-279154-0027-2339: ref=['A', 'GRAMOPHONE', 'BY', 'THE', 'HELP', 'OF', 'SUITABLE', 'RECORDS', 'MIGHT', 'RELATE', 'TO', 'US', 'THE', 'INCIDENTS', 'OF', 'ITS', 'PAST', 'AND', 'PEOPLE', 'ARE', 'NOT', 'SO', 'DIFFERENT', 'FROM', 'GRAMOPHONES', 'AS', 'THEY', 'LIKE', 'TO', 'BELIEVE'] +8230-279154-0027-2339: hyp=['A', 'GRAMOPHONE', 'BY', 'THE', 'HELP', 'OF', 'SUITABLE', 'RECORDS', 'MIGHT', 'RELATE', 'TO', 'US', 'THE', 'INCIDENTS', 'OF', 'ITS', 'PAST', 'AND', 'PEOPLE', 'ARE', 'NOT', 'SO', 'DIFFERENT', 'FROM', 'GRAMOPHONES', 'AS', 'THEY', 'LIKE', 'TO', 'BELIEVE'] +8230-279154-0028-2340: ref=['I', 'CAN', 'SET', 'TO', 'WORK', 'NOW', 'TO', 'REMEMBER', 'THINGS', 'I', 'NEVER', 'REMEMBERED', 'BEFORE', 'SUCH', 'AS', 'WHAT', 'I', 'HAD', 'TO', 'EAT', 'FOR', 'BREAKFAST', 'THIS', 'MORNING', 'AND', 'IT', 'CAN', 'HARDLY', 'BE', 'WHOLLY', 'HABIT', 'THAT', 'ENABLES', 'ME', 'TO', 'DO', 'THIS'] +8230-279154-0028-2340: hyp=['I', 'CAN', 'SET', 'TO', 'WORK', 'NOW', 'TO', 'REMEMBER', 'THINGS', 'I', 'NEVER', 'REMEMBERED', 'BEFORE', 'SUCH', 'AS', 'WHAT', 'I', 'HAD', 'TO', 'EAT', 'FOR', 'BREAKFAST', 'THIS', 'MORNING', 'AND', 'IT', 'CAN', 'HARDLY', 'BE', 'HOLY', 'HABIT', 'THAT', 'ENABLES', 'ME', 'TO', 'DO', 'THIS'] +8230-279154-0029-2341: ref=['THE', 'FACT', 'THAT', 'A', 'MAN', 'CAN', 'RECITE', 'A', 'POEM', 'DOES', 'NOT', 'SHOW', 'THAT', 'HE', 'REMEMBERS', 'ANY', 'PREVIOUS', 'OCCASION', 'ON', 'WHICH', 'HE', 'HAS', 'RECITED', 'OR', 'READ', 'IT'] +8230-279154-0029-2341: hyp=['THE', 'FACT', 'THAT', 'A', 'MAN', 'CAN', 'RECITE', 'A', 'POEM', 'DOES', 'NOT', 'SHOW', 'THAT', 'HE', 'REMEMBERS', 'ANY', 'PREVIOUS', 'OCCASION', 'ON', 'WHICH', 'HE', 'HAS', 'RECITED', 'OR', 'READ', 'IT'] +8230-279154-0030-2342: ref=["SEMON'S", 'TWO', 'BOOKS', 'MENTIONED', 'IN', 'AN', 'EARLIER', 'LECTURE', 'DO', 'NOT', 'TOUCH', 'KNOWLEDGE', 'MEMORY', 'AT', 'ALL', 'CLOSELY'] +8230-279154-0030-2342: hyp=['SYMONDS', 'TWO', 'BOOKS', 'MENTIONED', 'IN', 'AN', 'EARLIER', 'LECTURE', 'DO', 'NOT', 'TOUCH', 'KNOWLEDGE', 'MEMORY', 'AT', 'ALL', 'CLOSELY'] +8230-279154-0031-2343: ref=['THEY', 'GIVE', 'LAWS', 'ACCORDING', 'TO', 'WHICH', 'IMAGES', 'OF', 'PAST', 'OCCURRENCES', 'COME', 'INTO', 'OUR', 'MINDS', 'BUT', 'DO', 'NOT', 'DISCUSS', 'OUR', 'BELIEF', 'THAT', 'THESE', 'IMAGES', 'REFER', 'TO', 'PAST', 'OCCURRENCES', 'WHICH', 'IS', 'WHAT', 'CONSTITUTES', 'KNOWLEDGE', 'MEMORY'] +8230-279154-0031-2343: hyp=['THEY', 'GIVE', 'LAWS', 'ACCORDING', 'TO', 'WHICH', 'IMAGES', 'OF', 'PAST', 'OCCURRENCES', 'COME', 'INTO', 'OUR', 'MINDS', 'BUT', 'DO', 'NOT', 'DISCUSS', 'OUR', 'BELIEF', 'THAT', 'THESE', 'IMAGES', 'REFER', 'TO', 'PAST', 'OCCURRENCES', 'WHICH', 'IS', 'WHAT', 'CONSTITUTES', 'KNOWLEDGE', 'OF', 'MEMORY'] +8230-279154-0032-2344: ref=['IT', 'IS', 'THIS', 'THAT', 'IS', 'OF', 'INTEREST', 'TO', 'THEORY', 'OF', 'KNOWLEDGE'] +8230-279154-0032-2344: hyp=['IT', 'IS', 'THIS', 'THAT', 'IS', 'OF', 'INTEREST', 'TO', 'THEORY', 'OF', 'KNOWLEDGE'] +8230-279154-0033-2345: ref=['IT', 'IS', 'BY', 'NO', 'MEANS', 'ALWAYS', 'RELIABLE', 'ALMOST', 'EVERYBODY', 'HAS', 'AT', 'SOME', 'TIME', 'EXPERIENCED', 'THE', 'WELL', 'KNOWN', 'ILLUSION', 'THAT', 'ALL', 'THAT', 'IS', 'HAPPENING', 'NOW', 'HAPPENED', 'BEFORE', 'AT', 'SOME', 'TIME'] +8230-279154-0033-2345: hyp=['IT', 'IS', 'BY', 'NO', 'MEANS', 'ALWAYS', 'RELIABLE', 'ALMOST', 'EVERYBODY', 'HAS', 'AT', 'SOME', 'TIME', 'EXPERIENCED', 'THE', 'WELL', 'KNOWN', 'ILLUSION', 'THAT', 'ALL', 'THAT', 'IS', 'HAPPENING', 'NOW', 'HAPPENED', 'BEFORE', 'AT', 'SOME', 'TIME'] +8230-279154-0034-2346: ref=['WHENEVER', 'THE', 'SENSE', 'OF', 'FAMILIARITY', 'OCCURS', 'WITHOUT', 'A', 'DEFINITE', 'OBJECT', 'IT', 'LEADS', 'US', 'TO', 'SEARCH', 'THE', 'ENVIRONMENT', 'UNTIL', 'WE', 'ARE', 'SATISFIED', 'THAT', 'WE', 'HAVE', 'FOUND', 'THE', 'APPROPRIATE', 'OBJECT', 'WHICH', 'LEADS', 'US', 'TO', 'THE', 'JUDGMENT', 'THIS', 'IS', 'FAMILIAR'] +8230-279154-0034-2346: hyp=['WHENEVER', 'THE', 'SENSE', 'OF', 'FAMILIARITY', 'OCCURS', 'WITHOUT', 'A', 'DEFINITE', 'OBJECT', 'IT', 'LEADS', 'US', 'TO', 'SEARCH', 'THE', 'ENVIRONMENT', 'UNTIL', 'WE', 'ARE', 'SATISFIED', 'THAT', 'WE', 'HAVE', 'FOUND', 'THE', 'APPROPRIATE', 'OBJECT', 'WHICH', 'LEADS', 'US', 'TO', 'THE', 'JUDGMENT', 'THIS', 'IS', 'FAMILIAR'] +8230-279154-0035-2347: ref=['THUS', 'NO', 'KNOWLEDGE', 'AS', 'TO', 'THE', 'PAST', 'IS', 'TO', 'BE', 'DERIVED', 'FROM', 'THE', 'FEELING', 'OF', 'FAMILIARITY', 'ALONE'] +8230-279154-0035-2347: hyp=['THUS', 'NO', 'KNOWLEDGE', 'AS', 'TO', 'THE', 'PAST', 'IS', 'TO', 'BE', 'DERIVED', 'FROM', 'THE', 'FEELING', 'OF', 'FAMILIARITY', 'ALONE'] +8230-279154-0036-2348: ref=['A', 'FURTHER', 'STAGE', 'IS', 'RECOGNITION'] +8230-279154-0036-2348: hyp=['A', 'FURTHER', 'STAGE', 'IS', 'RECOGNITION'] +8230-279154-0037-2349: ref=['RECOGNITION', 'IN', 'THIS', 'SENSE', 'DOES', 'NOT', 'NECESSARILY', 'INVOLVE', 'MORE', 'THAN', 'A', 'HABIT', 'OF', 'ASSOCIATION', 'THE', 'KIND', 'OF', 'OBJECT', 'WE', 'ARE', 'SEEING', 'AT', 'THE', 'MOMENT', 'IS', 'ASSOCIATED', 'WITH', 'THE', 'WORD', 'CAT', 'OR', 'WITH', 'AN', 'AUDITORY', 'IMAGE', 'OF', 'PURRING', 'OR', 'WHATEVER', 'OTHER', 'CHARACTERISTIC', 'WE', 'MAY', 'HAPPEN', 'TO', 'RECOGNIZE', 'IN', 'THE', 'CAT', 'OF', 'THE', 'MOMENT'] +8230-279154-0037-2349: hyp=['RECOGNITION', 'IN', 'THIS', 'SENSE', 'DOES', 'NOT', 'NECESSARILY', 'INVOLVE', 'MORE', 'THAN', 'A', 'HABIT', 'OF', 'ASSOCIATION', 'THE', 'KIND', 'OF', 'OBJECT', 'WE', 'ARE', 'SEEING', 'AT', 'THE', 'MOMENT', 'IS', 'ASSOCIATED', 'WITH', 'THE', 'WORD', 'CAT', 'OR', 'WITH', 'AN', 'AUDITORY', 'IMAGE', 'OF', 'PURRING', 'OR', 'WHATEVER', 'OTHER', 'CHARACTERISTIC', 'WE', 'MAY', 'HAPPEN', 'TO', 'RECOGNIZE', 'IN', 'THE', 'CAT', 'OF', 'THE', 'MOMENT'] +8230-279154-0038-2350: ref=['WE', 'ARE', 'OF', 'COURSE', 'IN', 'FACT', 'ABLE', 'TO', 'JUDGE', 'WHEN', 'WE', 'RECOGNIZE', 'AN', 'OBJECT', 'THAT', 'WE', 'HAVE', 'SEEN', 'IT', 'BEFORE', 'BUT', 'THIS', 'JUDGMENT', 'IS', 'SOMETHING', 'OVER', 'AND', 'ABOVE', 'RECOGNITION', 'IN', 'THIS', 'FIRST', 'SENSE', 'AND', 'MAY', 'VERY', 'PROBABLY', 'BE', 'IMPOSSIBLE', 'TO', 'ANIMALS', 'THAT', 'NEVERTHELESS', 'HAVE', 'THE', 'EXPERIENCE', 'OF', 'RECOGNITION', 'IN', 'THIS', 'FIRST', 'SENSE', 'OF', 'THE', 'WORD'] +8230-279154-0038-2350: hyp=['WE', 'ARE', 'OF', 'COURSE', 'IN', 'FACT', 'ABLE', 'TO', 'JUDGE', 'WHEN', 'WE', 'RECOGNIZE', 'AN', 'OBJECT', 'THAT', 'WE', 'HAVE', 'SEEN', 'IT', 'BEFORE', 'BUT', 'THIS', 'JUDGEMENT', 'IS', 'SOMETHING', 'OVER', 'AND', 'ABOVE', 'RECOGNITION', 'IN', 'THIS', 'FIRST', 'SENSE', 'AND', 'MAY', 'VERY', 'PROBABLY', 'BE', 'IMPOSSIBLE', 'TO', 'ANIMALS', 'THAT', 'NEVERTHELESS', 'HAVE', 'THE', 'EXPERIENCE', 'OF', 'RECOGNITION', 'IN', 'THIS', 'FIRST', 'SENSE', 'OF', 'THE', 'WORD'] +8230-279154-0039-2351: ref=['THIS', 'KNOWLEDGE', 'IS', 'MEMORY', 'IN', 'ONE', 'SENSE', 'THOUGH', 'IN', 'ANOTHER', 'IT', 'IS', 'NOT'] +8230-279154-0039-2351: hyp=['THIS', 'KNOWLEDGE', 'IS', 'MEMORY', 'IN', 'ONE', 'SENSE', 'THOUGH', 'IN', 'ANOTHER', 'IT', 'IS', 'NOT'] +8230-279154-0040-2352: ref=['THERE', 'ARE', 'HOWEVER', 'SEVERAL', 'POINTS', 'IN', 'WHICH', 'SUCH', 'AN', 'ACCOUNT', 'OF', 'RECOGNITION', 'IS', 'INADEQUATE', 'TO', 'BEGIN', 'WITH', 'IT', 'MIGHT', 'SEEM', 'AT', 'FIRST', 'SIGHT', 'MORE', 'CORRECT', 'TO', 'DEFINE', 'RECOGNITION', 'AS', 'I', 'HAVE', 'SEEN', 'THIS', 'BEFORE', 'THAN', 'AS', 'THIS', 'HAS', 'EXISTED', 'BEFORE'] +8230-279154-0040-2352: hyp=['THERE', 'ARE', 'HOWEVER', 'SEVERAL', 'POINTS', 'IN', 'WHICH', 'SUCH', 'AN', 'ACCOUNT', 'OF', 'RECOGNITION', 'IS', 'INADEQUATE', 'TO', 'BEGIN', 'WITH', 'IT', 'MIGHT', 'SEEM', 'AT', 'FIRST', 'SIGHT', 'MORE', 'CORRECT', 'TO', 'DEFINE', 'RECOGNITION', 'AS', 'I', 'HAVE', 'SEEN', 'THIS', 'BEFORE', 'THAN', 'AS', 'THIS', 'HAS', 'EXISTED', 'BEFORE'] +8230-279154-0041-2353: ref=['THE', 'DEFINITION', 'OF', 'MY', 'EXPERIENCE', 'IS', 'DIFFICULT', 'BROADLY', 'SPEAKING', 'IT', 'IS', 'EVERYTHING', 'THAT', 'IS', 'CONNECTED', 'WITH', 'WHAT', 'I', 'AM', 'EXPERIENCING', 'NOW', 'BY', 'CERTAIN', 'LINKS', 'OF', 'WHICH', 'THE', 'VARIOUS', 'FORMS', 'OF', 'MEMORY', 'ARE', 'AMONG', 'THE', 'MOST', 'IMPORTANT'] +8230-279154-0041-2353: hyp=['THE', 'DEFINITION', 'OF', 'MY', 'EXPERIENCE', 'IS', 'DIFFICULT', 'BROADLY', 'SPEAKING', 'IT', 'IS', 'EVERYTHING', 'THAT', 'IS', 'CONNECTED', 'WITH', 'WHAT', 'I', 'AM', 'EXPERIENCING', 'NOW', 'BY', 'CERTAIN', 'LINKS', 'OF', 'WHICH', 'THE', 'VARIOUS', 'FORMS', 'OF', 'MEMORY', 'ARE', 'AMONG', 'THE', 'MOST', 'IMPORTANT'] +8230-279154-0042-2354: ref=['THUS', 'IF', 'I', 'RECOGNIZE', 'A', 'THING', 'THE', 'OCCASION', 'OF', 'ITS', 'PREVIOUS', 'EXISTENCE', 'IN', 'VIRTUE', 'OF', 'WHICH', 'I', 'RECOGNIZE', 'IT', 'FORMS', 'PART', 'OF', 'MY', 'EXPERIENCE', 'BY', 'DEFINITION', 'RECOGNITION', 'WILL', 'BE', 'ONE', 'OF', 'THE', 'MARKS', 'BY', 'WHICH', 'MY', 'EXPERIENCE', 'IS', 'SINGLED', 'OUT', 'FROM', 'THE', 'REST', 'OF', 'THE', 'WORLD'] +8230-279154-0042-2354: hyp=['THUS', 'IF', 'I', 'RECOGNIZE', 'A', 'THING', 'THE', 'OCCASION', 'OF', 'ITS', 'PREVIOUS', 'EXISTENCE', 'IN', 'VIRTUE', 'OF', 'WHICH', 'I', 'RECOGNIZE', 'IT', 'FORMS', 'PART', 'OF', 'MY', 'EXPERIENCE', 'BY', 'DEFINITION', 'RECOGNITION', 'WILL', 'BE', 'ONE', 'OF', 'THE', 'MARKS', 'BY', 'WHICH', 'MY', 'EXPERIENCE', 'IS', 'SINGLED', 'OUT', 'FROM', 'THE', 'REST', 'OF', 'THE', 'WORLD'] +8230-279154-0043-2355: ref=['OF', 'COURSE', 'THE', 'WORDS', 'THIS', 'HAS', 'EXISTED', 'BEFORE', 'ARE', 'A', 'VERY', 'INADEQUATE', 'TRANSLATION', 'OF', 'WHAT', 'ACTUALLY', 'HAPPENS', 'WHEN', 'WE', 'FORM', 'A', 'JUDGMENT', 'OF', 'RECOGNITION', 'BUT', 'THAT', 'IS', 'UNAVOIDABLE', 'WORDS', 'ARE', 'FRAMED', 'TO', 'EXPRESS', 'A', 'LEVEL', 'OF', 'THOUGHT', 'WHICH', 'IS', 'BY', 'NO', 'MEANS', 'PRIMITIVE', 'AND', 'ARE', 'QUITE', 'INCAPABLE', 'OF', 'EXPRESSING', 'SUCH', 'AN', 'ELEMENTARY', 'OCCURRENCE', 'AS', 'RECOGNITION'] +8230-279154-0043-2355: hyp=['OF', 'COURSE', 'THE', 'WORDS', 'THIS', 'HAS', 'EXISTED', 'BEFORE', 'ARE', 'A', 'VERY', 'INADEQUATE', 'TRANSLATION', 'OF', 'WHAT', 'ACTUALLY', 'HAPPENS', 'WHEN', 'WE', 'FORM', 'A', 'JUDGMENT', 'OF', 'RECOGNITION', 'BUT', 'THAT', 'IS', 'UNAVOIDABLE', 'WORDS', 'ARE', 'FRAMED', 'TO', 'EXPRESS', 'A', 'LEVEL', 'OF', 'THOUGHT', 'WHICH', 'IS', 'BY', 'NO', 'MEANS', 'PRIMITIVE', 'AND', 'ARE', 'QUITE', 'INCAPABLE', 'OF', 'EXPRESSING', 'SUCH', 'AN', 'ELEMENTARY', 'OCCURRENCE', 'AS', 'RECOGNITION'] +8455-210777-0000-2356: ref=['I', 'REMAINED', 'THERE', 'ALONE', 'FOR', 'MANY', 'HOURS', 'BUT', 'I', 'MUST', 'ACKNOWLEDGE', 'THAT', 'BEFORE', 'I', 'LEFT', 'THE', 'CHAMBERS', 'I', 'HAD', 'GRADUALLY', 'BROUGHT', 'MYSELF', 'TO', 'LOOK', 'AT', 'THE', 'MATTER', 'IN', 'ANOTHER', 'LIGHT'] +8455-210777-0000-2356: hyp=['I', 'REMAINED', 'THERE', 'ALONE', 'FOR', 'MANY', 'HOURS', 'BUT', 'I', 'MUST', 'ACKNOWLEDGE', 'THAT', 'BEFORE', 'I', 'LEFT', 'THE', 'CHAMBERS', 'I', 'HAD', 'GRADUALLY', 'BROUGHT', 'MYSELF', 'TO', 'LOOK', 'AT', 'THE', 'MATTER', 'IN', 'ANOTHER', 'LIGHT'] +8455-210777-0001-2357: ref=['HAD', 'EVA', 'CRASWELLER', 'NOT', 'BEEN', 'GOOD', 'LOOKING', 'HAD', 'JACK', 'BEEN', 'STILL', 'AT', 'COLLEGE', 'HAD', 'SIR', 'KENNINGTON', 'OVAL', 'REMAINED', 'IN', 'ENGLAND', 'HAD', 'MISTER', 'BUNNIT', 'AND', 'THE', 'BAR', 'KEEPER', 'NOT', 'SUCCEEDED', 'IN', 'STOPPING', 'MY', 'CARRIAGE', 'ON', 'THE', 'HILL', 'SHOULD', 'I', 'HAVE', 'SUCCEEDED', 'IN', 'ARRANGING', 'FOR', 'THE', 'FINAL', 'DEPARTURE', 'OF', 'MY', 'OLD', 'FRIEND'] +8455-210777-0001-2357: hyp=['HAD', 'EVA', 'CRASWELLER', 'NOT', 'BEEN', 'GOOD', 'LOOKING', 'HAD', 'JACK', 'BEEN', 'STILL', 'AT', 'COLLEGE', 'HAD', 'SIR', 'KENNINGTON', 'OVAL', 'REMAINED', 'IN', 'ENGLAND', 'HAD', 'MISTER', 'BENNETT', 'AND', 'THE', 'BAR', 'KEEPER', 'NOT', 'SUCCEEDED', 'IN', 'STOPPING', 'MY', 'CARRIAGE', 'ON', 'THE', 'HILL', 'SHOULD', 'I', 'HAVE', 'SUCCEEDED', 'IN', 'ARRANGING', 'FOR', 'THE', 'FINAL', 'DEPARTURE', 'OF', 'MY', 'OLD', 'FRIEND'] +8455-210777-0002-2358: ref=['ON', 'ARRIVING', 'AT', 'HOME', 'AT', 'MY', 'OWN', 'RESIDENCE', 'I', 'FOUND', 'THAT', 'OUR', 'SALON', 'WAS', 'FILLED', 'WITH', 'A', 'BRILLIANT', 'COMPANY'] +8455-210777-0002-2358: hyp=['ON', 'ARRIVING', 'AT', 'HOME', 'AT', 'MY', 'OWN', 'RESIDENCE', 'I', 'FOUND', 'THAT', 'OUR', 'SALON', 'WAS', 'FILLED', 'WITH', 'A', 'BRILLIANT', 'COMPANY'] +8455-210777-0003-2359: ref=['AS', 'I', 'SPOKE', 'I', 'MADE', 'HIM', 'A', 'GRACIOUS', 'BOW', 'AND', 'I', 'THINK', 'I', 'SHOWED', 'HIM', 'BY', 'MY', 'MODE', 'OF', 'ADDRESS', 'THAT', 'I', 'DID', 'NOT', 'BEAR', 'ANY', 'GRUDGE', 'AS', 'TO', 'MY', 'INDIVIDUAL', 'SELF'] +8455-210777-0003-2359: hyp=['AS', 'I', 'SPOKE', 'I', 'MADE', 'HIM', 'A', 'GRACIOUS', 'BOW', 'AND', 'I', 'THINK', 'I', 'SHOWED', 'HIM', 'BY', 'MY', 'MODE', 'OF', 'ADDRESS', 'THAT', 'I', 'DID', 'NOT', 'BEAR', 'ANY', 'GRUDGE', 'AS', 'TO', 'MY', 'INDIVIDUAL', 'SELF'] +8455-210777-0004-2360: ref=['I', 'HAVE', 'COME', 'TO', 'YOUR', 'SHORES', 'MISTER', 'PRESIDENT', 'WITH', 'THE', 'PURPOSE', 'OF', 'SEEING', 'HOW', 'THINGS', 'ARE', 'PROGRESSING', 'IN', 'THIS', 'DISTANT', 'QUARTER', 'OF', 'THE', 'WORLD'] +8455-210777-0004-2360: hyp=['I', 'HAVE', 'COME', 'TO', 'YOUR', 'SHORES', 'MISTER', 'PRESIDENT', 'WITH', 'THE', 'PURPOSE', 'OF', 'SEEING', 'HOW', 'THINGS', 'ARE', 'PROGRESSING', 'IN', 'THIS', 'DISTANT', 'QUARTER', 'OF', 'THE', 'WORLD'] +8455-210777-0005-2361: ref=['WE', 'HAVE', 'OUR', 'LITTLE', 'STRUGGLES', 'HERE', 'AS', 'ELSEWHERE', 'AND', 'ALL', 'THINGS', 'CANNOT', 'BE', 'DONE', 'BY', 'ROSE', 'WATER'] +8455-210777-0005-2361: hyp=['WE', 'HAVE', 'OUR', 'LITTLE', 'STRUGGLES', 'HERE', 'AS', 'ELSEWHERE', 'AND', 'ALL', 'THINGS', 'CANNOT', 'BE', 'DONE', 'BY', 'ROSE', 'WATER'] +8455-210777-0006-2362: ref=['WE', 'ARE', 'QUITE', 'SATISFIED', 'NOW', 'CAPTAIN', 'BATTLEAX', 'SAID', 'MY', 'WIFE'] +8455-210777-0006-2362: hyp=['WE', 'ARE', 'QUITE', 'SATISFIED', 'NOW', 'CAPTAIN', 'BATTLE', 'AXE', 'SAID', 'MY', 'WIFE'] +8455-210777-0007-2363: ref=['QUITE', 'SATISFIED', 'SAID', 'EVA'] +8455-210777-0007-2363: hyp=['QUITE', 'SATISFIED', 'SAID', 'EVA'] +8455-210777-0008-2364: ref=['THE', 'LADIES', 'IN', 'COMPLIANCE', 'WITH', 'THAT', 'SOFTNESS', 'OF', 'HEART', 'WHICH', 'IS', 'THEIR', 'CHARACTERISTIC', 'ARE', 'ON', 'ONE', 'SIDE', 'AND', 'THE', 'MEN', 'BY', 'WHOM', 'THE', 'WORLD', 'HAS', 'TO', 'BE', 'MANAGED', 'ARE', 'ON', 'THE', 'OTHER'] +8455-210777-0008-2364: hyp=['THE', 'LADIES', 'IN', 'COMPLIANCE', 'WITH', 'THAT', 'SOFTNESS', 'OF', 'HEART', 'WHICH', 'IS', 'THEIR', 'CHARACTERISTIC', 'ARE', 'ON', 'ONE', 'SIDE', 'AND', 'THE', 'MEN', 'BY', 'WHOM', 'THE', 'WORLD', 'HAS', 'TO', 'BE', 'MANAGED', 'ARE', 'ON', 'THE', 'OTHER'] +8455-210777-0009-2365: ref=['NO', 'DOUBT', 'IN', 'PROCESS', 'OF', 'TIME', 'THE', 'LADIES', 'WILL', 'FOLLOW'] +8455-210777-0009-2365: hyp=['NO', 'DOUBT', 'IN', 'PROCESS', 'OF', 'TIME', 'THE', 'LADIES', 'WILL', 'FOLLOW'] +8455-210777-0010-2366: ref=['THEIR', 'MASTERS', 'SAID', 'MISSUS', 'NEVERBEND'] +8455-210777-0010-2366: hyp=['THEIR', 'MASTERS', 'SAID', 'MISSUS', 'NEVERBEND'] +8455-210777-0011-2367: ref=['I', 'DID', 'NOT', 'MEAN', 'SAID', 'CAPTAIN', 'BATTLEAX', 'TO', 'TOUCH', 'UPON', 'PUBLIC', 'SUBJECTS', 'AT', 'SUCH', 'A', 'MOMENT', 'AS', 'THIS'] +8455-210777-0011-2367: hyp=['I', 'DID', 'NOT', 'MEAN', 'SAID', 'CAPTAIN', 'BATTLE', 'AX', 'TO', 'TOUCH', 'UPON', 'PUBLIC', 'SUBJECTS', 'AT', 'SUCH', 'A', 'MOMENT', 'AS', 'THIS'] +8455-210777-0012-2368: ref=['MISSUS', 'NEVERBEND', 'YOU', 'MUST', 'INDEED', 'BE', 'PROUD', 'OF', 'YOUR', 'SON'] +8455-210777-0012-2368: hyp=['MISSUS', 'NEVERBEND', 'YOU', 'MUST', 'INDEED', 'BE', 'PROUD', 'OF', 'YOUR', 'SON'] +8455-210777-0013-2369: ref=['JACK', 'HAD', 'BEEN', 'STANDING', 'IN', 'THE', 'FAR', 'CORNER', 'OF', 'THE', 'ROOM', 'TALKING', 'TO', 'EVA', 'AND', 'WAS', 'NOW', 'REDUCED', 'TO', 'SILENCE', 'BY', 'HIS', 'PRAISES'] +8455-210777-0013-2369: hyp=['JACK', 'HAD', 'BEEN', 'STANDING', 'IN', 'THE', 'FAR', 'CORNER', 'OF', 'THE', 'ROOM', 'TALKING', 'TO', 'EVA', 'AND', 'WAS', 'NOW', 'REDUCED', 'TO', 'SILENCE', 'BY', 'HIS', 'PRAISES'] +8455-210777-0014-2370: ref=['SIR', 'KENNINGTON', 'OVAL', 'IS', 'A', 'VERY', 'FINE', 'PLAYER', 'SAID', 'MY', 'WIFE'] +8455-210777-0014-2370: hyp=['SIR', 'KENNINGTON', 'OVAL', 'IS', 'A', 'VERY', 'FINE', 'PLAYER', 'SAID', 'MY', 'WIFE'] +8455-210777-0015-2371: ref=['I', 'AND', 'MY', 'WIFE', 'AND', 'SON', 'AND', 'THE', 'TWO', 'CRASWELLERS', 'AND', 'THREE', 'OR', 'FOUR', 'OTHERS', 'AGREED', 'TO', 'DINE', 'ON', 'BOARD', 'THE', 'SHIP', 'ON', 'THE', 'NEXT'] +8455-210777-0015-2371: hyp=['I', 'AND', 'MY', 'WIFE', 'AND', 'SON', 'AND', 'THE', 'TWO', 'CRESSWELLERS', 'AND', 'THREE', 'OR', 'FOUR', 'OTHERS', 'AGREED', 'TO', 'DINE', 'ON', 'BOARD', 'THE', 'SHIP', 'ON', 'THE', 'NEXT'] +8455-210777-0016-2372: ref=['THIS', 'I', 'FELT', 'WAS', 'PAID', 'TO', 'ME', 'AS', 'BEING', 'PRESIDENT', 'OF', 'THE', 'REPUBLIC', 'AND', 'I', 'ENDEAVOURED', 'TO', 'BEHAVE', 'MYSELF', 'WITH', 'SUCH', 'MINGLED', 'HUMILITY', 'AND', 'DIGNITY', 'AS', 'MIGHT', 'BEFIT', 'THE', 'OCCASION', 'BUT', 'I', 'COULD', 'NOT', 'BUT', 'FEEL', 'THAT', 'SOMETHING', 'WAS', 'WANTING', 'TO', 'THE', 'SIMPLICITY', 'OF', 'MY', 'ORDINARY', 'LIFE'] +8455-210777-0016-2372: hyp=['THIS', 'I', 'FELT', 'WAS', 'PAID', 'TO', 'ME', 'AS', 'BEING', 'PRESIDENT', 'OF', 'THE', 'REPUBLIC', 'AND', 'I', 'ENDEAVOURED', 'TO', 'BEHAVE', 'MYSELF', 'WITH', 'SUCH', 'MINGLED', 'HUMILITY', 'AND', 'DIGNITY', 'AS', 'MIGHT', 'BEFIT', 'THE', 'OCCASION', 'BUT', 'I', 'COULD', 'NOT', 'BUT', 'FEEL', 'THAT', 'SOMETHING', 'WAS', 'WANTING', 'TO', 'THE', 'SIMPLICITY', 'OF', 'MY', 'ORDINARY', 'LIFE'] +8455-210777-0017-2373: ref=['MY', 'WIFE', 'ON', 'THE', 'SPUR', 'OF', 'THE', 'MOMENT', 'MANAGED', 'TO', 'GIVE', 'THE', 'GENTLEMEN', 'A', 'VERY', 'GOOD', 'DINNER'] +8455-210777-0017-2373: hyp=['MY', 'WIFE', 'ON', 'THE', 'SPUR', 'OF', 'THE', 'MOMENT', 'MANAGED', 'TO', 'GIVE', 'THE', 'GENTLEMAN', 'A', 'VERY', 'GOOD', 'DINNER'] +8455-210777-0018-2374: ref=['THIS', 'SHE', 'SAID', 'WAS', 'TRUE', 'HOSPITALITY', 'AND', 'I', 'AM', 'NOT', 'SURE', 'THAT', 'I', 'DID', 'NOT', 'AGREE', 'WITH', 'HER'] +8455-210777-0018-2374: hyp=['THIS', 'SHE', 'SAID', 'WAS', 'TRUE', 'HOSPITALITY', 'AND', 'I', 'AM', 'NOT', 'SURE', 'THAT', 'I', 'DID', 'NOT', 'AGREE', 'WITH', 'HER'] +8455-210777-0019-2375: ref=['THEN', 'THERE', 'WERE', 'THREE', 'OR', 'FOUR', 'LEADING', 'MEN', 'OF', 'THE', 'COMMUNITY', 'WITH', 'THEIR', 'WIVES', 'WHO', 'WERE', 'FOR', 'THE', 'MOST', 'PART', 'THE', 'FATHERS', 'AND', 'MOTHERS', 'OF', 'THE', 'YOUNG', 'LADIES'] +8455-210777-0019-2375: hyp=['THEN', 'THERE', 'WERE', 'THREE', 'OR', 'FOUR', 'LEADING', 'MEN', 'OF', 'THE', 'COMMUNITY', 'WITH', 'THEIR', 'WIVES', 'WHO', 'WERE', 'FOR', 'THE', 'MOST', 'PART', 'THE', 'FATHERS', 'AND', 'MOTHERS', 'OF', 'THE', 'YOUNG', 'LADIES'] +8455-210777-0020-2376: ref=['OH', 'YES', 'SAID', 'JACK', 'AND', "I'M", 'NOWHERE'] +8455-210777-0020-2376: hyp=['OH', 'YES', 'SAID', 'JACK', 'THEN', "I'M", 'NOWHERE'] +8455-210777-0021-2377: ref=['BUT', 'I', 'MEAN', 'TO', 'HAVE', 'MY', 'INNINGS', 'BEFORE', 'LONG'] +8455-210777-0021-2377: hyp=['BUT', 'I', 'MEAN', 'TO', 'HAVE', 'MY', 'INNINGS', 'BEFORE', 'LONG'] +8455-210777-0022-2378: ref=['OF', 'WHAT', 'MISSUS', 'NEVERBEND', 'HAD', 'GONE', 'THROUGH', 'IN', 'PROVIDING', 'BIRDS', 'BEASTS', 'AND', 'FISHES', 'NOT', 'TO', 'TALK', 'OF', 'TARTS', 'AND', 'JELLIES', 'FOR', 'THE', 'DINNER', 'OF', 'THAT', 'DAY', 'NO', 'ONE', 'BUT', 'MYSELF', 'CAN', 'HAVE', 'ANY', 'IDEA', 'BUT', 'IT', 'MUST', 'BE', 'ADMITTED', 'THAT', 'SHE', 'ACCOMPLISHED', 'HER', 'TASK', 'WITH', 'THOROUGH', 'SUCCESS'] +8455-210777-0022-2378: hyp=['OF', 'WHAT', 'MISSUS', 'NEVERBEND', 'HAD', 'GONE', 'THROUGH', 'IN', 'PROVIDING', 'BIRDS', 'BEASTS', 'AND', 'FISHES', 'NOT', 'TO', 'TALK', 'OF', 'TARTS', 'AND', 'JELLIES', 'FOR', 'THE', 'DINNER', 'OF', 'THAT', 'DAY', 'NO', 'ONE', 'BUT', 'MYSELF', 'CAN', 'HAVE', 'ANY', 'IDEA', 'BUT', 'IT', 'MUST', 'BE', 'ADMITTED', 'THAT', 'SHE', 'ACCOMPLISHED', 'HER', 'TASK', 'WITH', 'THOROUGH', 'SUCCESS'] +8455-210777-0023-2379: ref=['WE', 'SAT', 'WITH', 'THE', 'OFFICERS', 'SOME', 'LITTLE', 'TIME', 'AFTER', 'DINNER', 'AND', 'THEN', 'WENT', 'ASHORE'] +8455-210777-0023-2379: hyp=['WE', 'SAT', 'WITH', 'THE', 'OFFICERS', 'SOME', 'LITTLE', 'TIME', 'AFTER', 'DINNER', 'AND', 'THEN', 'WENT', 'ASHORE'] +8455-210777-0024-2380: ref=['HOW', 'MUCH', 'OF', 'EVIL', 'OF', 'REAL', 'ACCOMPLISHED', 'EVIL', 'HAD', 'THERE', 'NOT', 'OCCURRED', 'TO', 'ME', 'DURING', 'THE', 'LAST', 'FEW', 'DAYS'] +8455-210777-0024-2380: hyp=['HOW', 'MUCH', 'OF', 'EVIL', 'OF', 'REAL', 'ACCOMPLISHED', 'EVIL', 'HAD', 'THERE', 'NOT', 'OCCURRED', 'TO', 'ME', 'DURING', 'THE', 'LAST', 'FEW', 'DAYS'] +8455-210777-0025-2381: ref=['WHAT', 'COULD', 'I', 'DO', 'NOW', 'BUT', 'JUST', 'LAY', 'MYSELF', 'DOWN', 'AND', 'DIE'] +8455-210777-0025-2381: hyp=['WHAT', 'COULD', 'I', 'DO', 'NOW', 'BUT', 'JUST', 'LAY', 'MYSELF', 'DOWN', 'AND', 'DIE'] +8455-210777-0026-2382: ref=['AND', 'THE', 'DEATH', 'OF', 'WHICH', 'I', 'DREAMT', 'COULD', 'NOT', 'ALAS'] +8455-210777-0026-2382: hyp=['AND', 'THE', 'DEATH', 'OF', 'WHICH', 'I', 'DREAMT', 'COULD', 'NOT', 'ALAS'] +8455-210777-0027-2383: ref=['WHEN', 'THIS', 'CAPTAIN', 'SHOULD', 'HAVE', 'TAKEN', 'HIMSELF', 'AND', 'HIS', 'VESSEL', 'BACK', 'TO', 'ENGLAND', 'I', 'WOULD', 'RETIRE', 'TO', 'A', 'SMALL', 'FARM', 'WHICH', 'I', 'POSSESSED', 'AT', 'THE', 'FARTHEST', 'SIDE', 'OF', 'THE', 'ISLAND', 'AND', 'THERE', 'IN', 'SECLUSION', 'WOULD', 'I', 'END', 'MY', 'DAYS'] +8455-210777-0027-2383: hyp=['WHEN', 'THIS', 'CAPTAIN', 'SHOULD', 'HAVE', 'TAKEN', 'HIMSELF', 'AND', 'HIS', 'VESSEL', 'BACK', 'TO', 'ENGLAND', 'I', 'WOULD', 'RETIRE', 'TO', 'A', 'SMALL', 'FARM', 'WHICH', 'I', 'POSSESSED', 'AT', 'THE', 'FURTHEST', 'SIDE', 'OF', 'THE', 'ISLAND', 'AND', 'THERE', 'IN', 'SECLUSION', 'WOULD', 'I', 'END', 'MY', 'DAYS'] +8455-210777-0028-2384: ref=['JACK', 'WOULD', 'BECOME', "EVA'S", 'HAPPY', 'HUSBAND', 'AND', 'WOULD', 'REMAIN', 'AMIDST', 'THE', 'HURRIED', 'DUTIES', 'OF', 'THE', 'EAGER', 'WORLD'] +8455-210777-0028-2384: hyp=['JACK', 'WOULD', 'BECOME', "EVA'S", 'HAPPY', 'HUSBAND', 'AND', 'WOULD', 'REMAIN', 'AMIDST', 'THE', 'HURRIED', 'DUTIES', 'OF', 'THE', 'EAGER', 'WORLD'] +8455-210777-0029-2385: ref=['THINKING', 'OF', 'ALL', 'THIS', 'I', 'WENT', 'TO', 'SLEEP'] +8455-210777-0029-2385: hyp=['THINKING', 'OF', 'ALL', 'THIS', 'I', 'WENT', 'TO', 'SLEEP'] +8455-210777-0030-2386: ref=['MISTER', 'NEVERBEND', 'BEGAN', 'THE', 'CAPTAIN', 'AND', 'I', 'OBSERVED', 'THAT', 'UP', 'TO', 'THAT', 'MOMENT', 'HE', 'HAD', 'GENERALLY', 'ADDRESSED', 'ME', 'AS', 'PRESIDENT', 'IT', 'CANNOT', 'BE', 'DENIED', 'THAT', 'WE', 'HAVE', 'COME', 'HERE', 'ON', 'AN', 'UNPLEASANT', 'MISSION'] +8455-210777-0030-2386: hyp=['MISTER', 'NEVERBEND', 'BEGAN', 'THE', 'CAPTAIN', 'AND', 'I', 'OBSERVED', 'THAT', 'UP', 'TO', 'THAT', 'MOMENT', 'HE', 'HAD', 'GENERALLY', 'ADDRESSED', 'ME', 'AS', 'PRESIDENT', 'IT', 'CANNOT', 'BE', 'DENIED', 'THAT', 'WE', 'HAVE', 'COME', 'HERE', 'ON', 'AN', 'UNPLEASANT', 'MISSION'] +8455-210777-0031-2387: ref=['YOU', 'HAVE', 'RECEIVED', 'US', 'WITH', 'ALL', 'THAT', 'COURTESY', 'AND', 'HOSPITALITY', 'FOR', 'WHICH', 'YOUR', 'CHARACTER', 'IN', 'ENGLAND', 'STANDS', 'SO', 'HIGH'] +8455-210777-0031-2387: hyp=['YOU', 'HAVE', 'RECEIVED', 'US', 'WITH', 'ALL', 'THAT', 'COURTESY', 'AND', 'HOSPITALITY', 'FOR', 'WHICH', 'YOUR', 'CHARACTER', 'IN', 'ENGLAND', 'STANDS', 'SO', 'HIGH'] +8455-210777-0032-2388: ref=['IT', 'IS', 'A', 'DUTY', 'SAID', 'I'] +8455-210777-0032-2388: hyp=['IT', 'IS', 'A', 'DUTY', 'SAID', 'I'] +8455-210777-0033-2389: ref=['BUT', 'YOUR', 'POWER', 'IS', 'SO', 'SUPERIOR', 'TO', 'ANY', 'THAT', 'I', 'CAN', 'ADVANCE', 'AS', 'TO', 'MAKE', 'US', 'HERE', 'FEEL', 'THAT', 'THERE', 'IS', 'NO', 'DISGRACE', 'IN', 'YIELDING', 'TO', 'IT'] +8455-210777-0033-2389: hyp=['BUT', 'YOUR', 'POWER', 'IS', 'SO', 'SUPERIOR', 'TO', 'ANY', 'THAT', 'I', 'CAN', 'ADVANCE', 'AS', 'TO', 'MAKE', 'US', 'HERE', 'FEEL', 'THAT', 'THERE', 'IS', 'NO', 'DISGRACE', 'IN', 'YIELDING', 'TO', 'IT'] +8455-210777-0034-2390: ref=['NOT', 'A', 'DOUBT', 'BUT', 'HAD', 'YOUR', 'FORCE', 'BEEN', 'ONLY', 'DOUBLE', 'OR', 'TREBLE', 'OUR', 'OWN', 'I', 'SHOULD', 'HAVE', 'FOUND', 'IT', 'MY', 'DUTY', 'TO', 'STRUGGLE', 'WITH', 'YOU'] +8455-210777-0034-2390: hyp=['NOT', 'A', 'DOUBT', 'BUT', 'HAD', 'YOUR', 'FORCE', 'BEEN', 'ONLY', 'DOUBLE', 'OR', 'TROUBLE', 'OUR', 'OWN', 'I', 'SHOULD', 'HAVE', 'FOUND', 'IT', 'MY', 'DUTY', 'TO', 'STRUGGLE', 'WITH', 'YOU'] +8455-210777-0035-2391: ref=['THAT', 'IS', 'ALL', 'QUITE', 'TRUE', 'MISTER', 'NEVERBEND', 'SAID', 'SIR', 'FERDINANDO', 'BROWN'] +8455-210777-0035-2391: hyp=['THAT', 'IS', 'ALL', 'QUITE', 'TRUE', 'MISTER', 'NEVERBEND', 'SAID', 'SIR', 'FERDINAND', "O'BROWN"] +8455-210777-0036-2392: ref=['I', 'CAN', 'AFFORD', 'TO', 'SMILE', 'BECAUSE', 'I', 'AM', 'ABSOLUTELY', 'POWERLESS', 'BEFORE', 'YOU', 'BUT', 'I', 'DO', 'NOT', 'THE', 'LESS', 'FEEL', 'THAT', 'IN', 'A', 'MATTER', 'IN', 'WHICH', 'THE', 'PROGRESS', 'OF', 'THE', 'WORLD', 'IS', 'CONCERNED', 'I', 'OR', 'RATHER', 'WE', 'HAVE', 'BEEN', 'PUT', 'DOWN', 'BY', 'BRUTE', 'FORCE'] +8455-210777-0036-2392: hyp=['I', 'CAN', 'AFFORD', 'TO', 'SMILE', 'BECAUSE', 'I', 'AM', 'ABSOLUTELY', 'POWERLESS', 'BEFORE', 'YOU', 'BUT', 'I', 'DO', 'NOT', 'THE', 'LESS', 'FEEL', 'THAT', 'IN', 'A', 'MATTER', 'OF', 'WHICH', 'THE', 'PROGRESS', 'OF', 'THE', 'WORLD', 'IS', 'CONCERNED', 'I', 'OR', 'RATHER', 'WE', 'HAVE', 'BEEN', 'PUT', 'DOWN', 'BY', 'BRUTE', 'FORCE'] +8455-210777-0037-2393: ref=['YOU', 'HAVE', 'COME', 'TO', 'US', 'THREATENING', 'US', 'WITH', 'ABSOLUTE', 'DESTRUCTION'] +8455-210777-0037-2393: hyp=['YOU', 'HAVE', 'COME', 'TO', 'US', 'THREATENING', 'US', 'WITH', 'ABSOLUTE', 'DESTRUCTION'] +8455-210777-0038-2394: ref=['THEREFORE', 'I', 'FEEL', 'MYSELF', 'QUITE', 'ABLE', 'AS', 'PRESIDENT', 'OF', 'THIS', 'REPUBLIC', 'TO', 'RECEIVE', 'YOU', 'WITH', 'A', 'COURTESY', 'DUE', 'TO', 'THE', 'SERVANTS', 'OF', 'A', 'FRIENDLY', 'ALLY'] +8455-210777-0038-2394: hyp=['THEREFORE', 'I', 'FEEL', 'MYSELF', 'QUITE', 'ABLE', 'AS', 'PRESIDENT', 'OF', 'THIS', 'REPUBLIC', 'TO', 'RECEIVE', 'YOU', 'WITH', 'A', 'COURTESY', 'DUE', 'TO', 'THE', 'SERVANTS', 'OF', 'A', 'FRIENDLY', 'ALLY'] +8455-210777-0039-2395: ref=['I', 'CAN', 'ASSURE', 'YOU', 'HE', 'HAS', 'NOT', 'EVEN', 'ALLOWED', 'ME', 'TO', 'SEE', 'THE', 'TRIGGER', 'SINCE', 'I', 'HAVE', 'BEEN', 'ON', 'BOARD'] +8455-210777-0039-2395: hyp=['I', 'CAN', 'ASSURE', 'YOU', 'HE', 'HAS', 'NOT', 'EVEN', 'ALLOWED', 'ME', 'TO', 'SEE', 'THE', 'TRIGGER', 'SINCE', 'I', 'HAVE', 'BEEN', 'ON', 'BOARD'] +8455-210777-0040-2396: ref=['THEN', 'SAID', 'SIR', 'FERDINANDO', 'THERE', 'IS', 'NOTHING', 'FOR', 'IT', 'BUT', 'THAT', 'HE', 'MUST', 'TAKE', 'YOU', 'WITH', 'HIM'] +8455-210777-0040-2396: hyp=['THEN', 'SAID', 'SIR', 'FERDINANDO', 'THERE', 'IS', 'NOTHING', 'FOR', 'IT', 'BUT', 'THAT', 'WE', 'MUST', 'TAKE', 'YOU', 'WITH', 'HIM'] +8455-210777-0041-2397: ref=['THERE', 'CAME', 'UPON', 'ME', 'A', 'SUDDEN', 'SHOCK', 'WHEN', 'I', 'HEARD', 'THESE', 'WORDS', 'WHICH', 'EXCEEDED', 'ANYTHING', 'WHICH', 'I', 'HAD', 'YET', 'FELT'] +8455-210777-0041-2397: hyp=['THERE', 'CAME', 'UPON', 'ME', 'A', 'SUDDEN', 'SHOCK', 'WHEN', 'I', 'HEARD', 'THESE', 'WORDS', 'WHICH', 'EXCEEDED', 'ANYTHING', 'WHICH', 'I', 'HAD', 'YET', 'FELT'] +8455-210777-0042-2398: ref=['YOU', 'HEAR', 'WHAT', 'SIR', 'FERDINANDO', 'BROWN', 'HAS', 'SAID', 'REPLIED', 'CAPTAIN', 'BATTLEAX'] +8455-210777-0042-2398: hyp=['YOU', 'HEAR', 'WHAT', 'SIR', 'FERDINAND', "O'", 'BROWN', 'HAS', 'SAID', 'REPLIED', 'CAPTAIN', 'BATTLE', 'AXE'] +8455-210777-0043-2399: ref=['BUT', 'WHAT', 'IS', 'THE', 'DELICATE', 'MISSION', 'I', 'ASKED'] +8455-210777-0043-2399: hyp=['BUT', 'WHAT', 'IS', 'THE', 'DELICATE', 'MISSION', 'I', 'ASKED'] +8455-210777-0044-2400: ref=['I', 'WAS', 'TO', 'BE', 'TAKEN', 'AWAY', 'AND', 'CARRIED', 'TO', 'ENGLAND', 'OR', 'ELSEWHERE', 'OR', 'DROWNED', 'UPON', 'THE', 'VOYAGE', 'IT', 'MATTERED', 'NOT', 'WHICH'] +8455-210777-0044-2400: hyp=['I', 'WAS', 'TO', 'BE', 'TAKEN', 'AWAY', 'AND', 'CARRIED', 'TO', 'ENGLAND', 'OR', 'ELSEWHERE', 'OR', 'DROWNED', 'UPON', 'THE', 'VOYAGE', 'IT', 'MATTERED', 'NOT', 'WHICH'] +8455-210777-0045-2401: ref=['THEN', 'THE', 'REPUBLIC', 'OF', 'BRITANNULA', 'WAS', 'TO', 'BE', 'DECLARED', 'AS', 'NON', 'EXISTENT', 'AND', 'THE', 'BRITISH', 'FLAG', 'WAS', 'TO', 'BE', 'EXALTED', 'AND', 'A', 'BRITISH', 'GOVERNOR', 'INSTALLED', 'IN', 'THE', 'EXECUTIVE', 'CHAMBERS'] +8455-210777-0045-2401: hyp=['THEN', 'THE', 'REPUBLIC', 'OF', 'BRITANNULA', 'WAS', 'TO', 'BE', 'DECLARED', 'AS', 'NON', 'EXISTENT', 'AND', 'THE', 'BRITISH', 'FLAG', 'WAS', 'TO', 'BE', 'EXALTED', 'AND', 'A', 'BRITISH', 'GOVERNOR', 'INSTALLED', 'IN', 'THE', 'EXECUTIVE', 'CHAMBERS'] +8455-210777-0046-2402: ref=['YOU', 'MAY', 'BE', 'QUITE', 'SURE', "IT'S", 'THERE', 'SAID', 'CAPTAIN', 'BATTLEAX', 'AND', 'THAT', 'I', 'CAN', 'SO', 'USE', 'IT', 'AS', 'TO', 'HALF', 'OBLITERATE', 'YOUR', 'TOWN', 'WITHIN', 'TWO', 'MINUTES', 'OF', 'MY', 'RETURN', 'ON', 'BOARD'] +8455-210777-0046-2402: hyp=['YOU', 'MAY', 'BE', 'QUITE', 'SURE', "IT'S", 'THERE', 'SAID', 'CAPTAIN', 'BATTLE', 'AX', 'AND', 'THAT', 'I', 'CAN', 'SO', 'USE', 'IT', 'AS', 'TO', 'HALF', 'OBLITERATE', 'YOUR', 'TOWN', 'WITHIN', 'TWO', 'MINUTES', 'OF', 'MY', 'RETURN', 'ON', 'BOARD'] +8455-210777-0047-2403: ref=['YOU', 'PROPOSE', 'TO', 'KIDNAP', 'ME', 'I', 'SAID'] +8455-210777-0047-2403: hyp=['YOU', 'PROPOSE', 'TO', 'KIDNAP', 'ME', 'I', 'SAID'] +8455-210777-0048-2404: ref=['WHAT', 'WOULD', 'BECOME', 'OF', 'YOUR', 'GUN', 'WERE', 'I', 'TO', 'KIDNAP', 'YOU'] +8455-210777-0048-2404: hyp=['WHAT', 'WOULD', 'BECOME', 'OF', 'YOUR', 'GUN', 'WERE', 'I', 'TO', 'KIDNAP', 'YOU'] +8455-210777-0049-2405: ref=['LIEUTENANT', 'CROSSTREES', 'IS', 'A', 'VERY', 'GALLANT', 'OFFICER'] +8455-210777-0049-2405: hyp=['LIEUTENANT', 'CROSSTREES', 'IS', 'A', 'VERY', 'GALLANT', 'OFFICER'] +8455-210777-0050-2406: ref=['ONE', 'OF', 'US', 'ALWAYS', 'REMAINS', 'ON', 'BOARD', 'WHILE', 'THE', 'OTHER', 'IS', 'ON', 'SHORE'] +8455-210777-0050-2406: hyp=['ONE', 'OF', 'US', 'ALWAYS', 'REMAINS', 'ON', 'BOARD', 'WHILE', 'THE', 'OTHER', 'IS', 'ON', 'SHORE'] +8455-210777-0051-2407: ref=['WHAT', 'WORLD', 'WIDE', 'INIQUITY', 'SUCH', 'A', 'SPEECH', 'AS', 'THAT', 'DISCLOSES', 'SAID', 'I', 'STILL', 'TURNING', 'MYSELF', 'TO', 'THE', 'CAPTAIN', 'FOR', 'THOUGH', 'I', 'WOULD', 'HAVE', 'CRUSHED', 'THEM', 'BOTH', 'BY', 'MY', 'WORDS', 'HAD', 'IT', 'BEEN', 'POSSIBLE', 'MY', 'DISLIKE', 'CENTRED', 'ITSELF', 'ON', 'SIR', 'FERDINANDO'] +8455-210777-0051-2407: hyp=['WHAT', 'WORLDWIDE', 'INIQUITY', 'SUCH', 'A', 'SPEECH', 'AS', 'THAT', 'DISCLOSES', 'SAID', 'I', 'STILL', 'TURNING', 'MYSELF', 'TO', 'THE', 'CAPTAIN', 'FOR', 'THOUGH', 'I', 'WOULD', 'HAVE', 'CRUSHED', 'THEM', 'BOTH', 'BY', 'MY', 'WORDS', 'HAD', 'IT', 'BEEN', 'POSSIBLE', 'MY', 'DISLIKE', 'CENTERED', 'ITSELF', 'ON', 'SIR', 'FERDINANDO'] +8455-210777-0052-2408: ref=['YOU', 'WILL', 'ALLOW', 'ME', 'TO', 'SUGGEST', 'SAID', 'HE', 'THAT', 'THAT', 'IS', 'A', 'MATTER', 'OF', 'OPINION'] +8455-210777-0052-2408: hyp=['YOU', 'WILL', 'ALLOW', 'ME', 'TO', 'SUGGEST', 'SAID', 'HE', 'THAT', 'THAT', 'IS', 'A', 'MATTER', 'OF', 'OPINION'] +8455-210777-0053-2409: ref=['WERE', 'I', 'TO', 'COMPLY', 'WITH', 'YOUR', 'ORDERS', 'WITHOUT', 'EXPRESSING', 'MY', 'OWN', 'OPINION', 'I', 'SHOULD', 'SEEM', 'TO', 'HAVE', 'DONE', 'SO', 'WILLINGLY', 'HEREAFTER'] +8455-210777-0053-2409: hyp=['WERE', 'I', 'TO', 'COMPLY', 'WITH', 'YOUR', 'ORDERS', 'WITHOUT', 'EXPRESSING', 'MY', 'OWN', 'OPINION', 'I', 'SHOULD', 'SEEM', 'TO', 'HAVE', 'DONE', 'SO', 'WILLINGLY', 'HEREAFTER'] +8455-210777-0054-2410: ref=['THE', 'LETTER', 'RAN', 'AS', 'FOLLOWS'] +8455-210777-0054-2410: hyp=['THE', 'LETTER', 'RAN', 'AS', 'FOLLOWS'] +8455-210777-0055-2411: ref=['SIR', 'I', 'HAVE', 'IT', 'IN', 'COMMAND', 'TO', 'INFORM', 'YOUR', 'EXCELLENCY', 'THAT', 'YOU', 'HAVE', 'BEEN', 'APPOINTED', 'GOVERNOR', 'OF', 'THE', 'CROWN', 'COLONY', 'WHICH', 'IS', 'CALLED', 'BRITANNULA'] +8455-210777-0055-2411: hyp=['SIR', 'I', 'HAVE', 'IT', 'IN', 'COMMAND', 'TO', 'INFORM', 'YOUR', 'EXCELLENCY', 'THAT', 'YOU', 'HAVE', 'BEEN', 'APPOINTED', 'GOVERNOR', 'OF', 'THE', 'CROWN', 'COLONY', 'WHICH', 'IS', 'CALLED', 'BRITANNULA'] +8455-210777-0056-2412: ref=['THE', 'PECULIAR', 'CIRCUMSTANCES', 'OF', 'THE', 'COLONY', 'ARE', 'WITHIN', 'YOUR', "EXCELLENCY'S", 'KNOWLEDGE'] +8455-210777-0056-2412: hyp=['THE', 'PECULIAR', 'CIRCUMSTANCES', 'OF', 'THE', 'COLONY', 'ARE', 'WITHIN', 'YOUR', "EXCELLENCY'S", 'KNOWLEDGE'] +8455-210777-0057-2413: ref=['BUT', 'IN', 'THEIR', 'SELECTION', 'OF', 'A', 'CONSTITUTION', 'THE', 'BRITANNULISTS', 'HAVE', 'UNFORTUNATELY', 'ALLOWED', 'THEMSELVES', 'BUT', 'ONE', 'DELIBERATIVE', 'ASSEMBLY', 'AND', 'HENCE', 'HAVE', 'SPRUNG', 'THEIR', 'PRESENT', 'DIFFICULTIES'] +8455-210777-0057-2413: hyp=['BUT', 'IN', 'THEIR', 'SELECTION', 'OF', 'A', 'CONSTITUTION', 'THE', 'BRITON', 'ULISTS', 'HAVE', 'UNFORTUNATELY', 'ALLOWED', 'THEMSELVES', 'BUT', 'ONE', 'DELIBERATE', 'ASSEMBLY', 'AND', 'HENCE', 'HAVE', 'SPRUNG', 'THEIR', 'PRESENT', 'DIFFICULTIES'] +8455-210777-0058-2414: ref=['IT', 'IS', 'FOUNDED', 'ON', 'THE', 'ACKNOWLEDGED', 'WEAKNESS', 'OF', 'THOSE', 'WHO', 'SURVIVE', 'THAT', 'PERIOD', 'OF', 'LIFE', 'AT', 'WHICH', 'MEN', 'CEASE', 'TO', 'WORK'] +8455-210777-0058-2414: hyp=['IT', 'IS', 'FOUNDED', 'ON', 'THE', 'ACKNOWLEDGED', 'WEAKNESS', 'OF', 'THOSE', 'WHO', 'SURVIVE', 'THAT', 'PERIOD', 'OF', 'LIFE', 'AT', 'WHICH', 'MEN', 'CEASE', 'TO', 'WORK'] +8455-210777-0059-2415: ref=['BUT', 'IT', 'IS', 'SURMISED', 'THAT', 'YOU', 'WILL', 'FIND', 'DIFFICULTIES', 'IN', 'THE', 'WAY', 'OF', 'YOUR', 'ENTERING', 'AT', 'ONCE', 'UPON', 'YOUR', 'GOVERNMENT'] +8455-210777-0059-2415: hyp=['BUT', 'IT', 'IS', 'SURMISED', 'THAT', 'YOU', 'WILL', 'FIND', 'DIFFICULTIES', 'IN', 'THE', 'WAY', 'OF', 'YOUR', 'ENTERING', 'AT', 'ONCE', 'UPON', 'YOUR', 'GOVERNOR'] +8455-210777-0060-2416: ref=['THE', 'JOHN', 'BRIGHT', 'IS', 'ARMED', 'WITH', 'A', 'WEAPON', 'OF', 'GREAT', 'POWER', 'AGAINST', 'WHICH', 'IT', 'IS', 'IMPOSSIBLE', 'THAT', 'THE', 'PEOPLE', 'OF', 'BRITANNULA', 'SHOULD', 'PREVAIL'] +8455-210777-0060-2416: hyp=['THE', 'JOHN', 'BRIGHT', 'IS', 'ARMED', 'WITH', 'A', 'WEAPON', 'OF', 'GREAT', 'POWER', 'AGAINST', 'WHICH', 'IT', 'IS', 'IMPOSSIBLE', 'THAT', 'THE', 'PEOPLE', 'OF', 'BRITANULA', 'SHOULD', 'PREVAIL'] +8455-210777-0061-2417: ref=['YOU', 'WILL', 'CARRY', 'OUT', 'WITH', 'YOU', 'ONE', 'HUNDRED', 'MEN', 'OF', 'THE', 'NORTH', 'NORTH', 'WEST', 'BIRMINGHAM', 'REGIMENT', 'WHICH', 'WILL', 'PROBABLY', 'SUFFICE', 'FOR', 'YOUR', 'OWN', 'SECURITY', 'AS', 'IT', 'IS', 'THOUGHT', 'THAT', 'IF', 'MISTER', 'NEVERBEND', 'BE', 'WITHDRAWN', 'THE', 'PEOPLE', 'WILL', 'REVERT', 'EASILY', 'TO', 'THEIR', 'OLD', 'HABITS', 'OF', 'OBEDIENCE'] +8455-210777-0061-2417: hyp=['YOU', 'WILL', 'CARRY', 'OUT', 'WITH', 'YOU', 'ONE', 'HUNDRED', 'MEN', 'OF', 'THE', 'NORTH', 'NORTHWEST', 'BIRMINGHAM', 'REGIMENT', 'WHICH', 'WILL', 'PROBABLY', 'SUFFICE', 'FOR', 'YOUR', 'OWN', 'SECURITY', 'AS', 'IT', 'IS', 'THOUGHT', 'THAT', 'IF', 'MISTER', 'NEVERBEND', 'BE', 'WITHDRAWN', 'THE', 'PEOPLE', 'WILL', 'REVERT', 'EASILY', 'TO', 'THEIR', 'OLD', 'HABITS', 'OF', 'OBEDIENCE'] +8455-210777-0062-2418: ref=['WHEN', 'DO', 'YOU', 'INTEND', 'THAT', 'THE', 'JOHN', 'BRIGHT', 'SHALL', 'START'] +8455-210777-0062-2418: hyp=['WHEN', 'DO', 'YOU', 'INTEND', 'THAT', 'THE', 'JOHN', 'BRIGHT', 'SHALL', 'START'] +8455-210777-0063-2419: ref=['TO', 'DAY', 'I', 'SHOUTED'] +8455-210777-0063-2419: hyp=['TO', 'DAY', 'I', 'SHOUTED'] +8455-210777-0064-2420: ref=['AND', 'I', 'HAVE', 'NO', 'ONE', 'READY', 'TO', 'WHOM', 'I', 'CAN', 'GIVE', 'UP', 'THE', 'ARCHIVES', 'OF', 'THE', 'GOVERNMENT'] +8455-210777-0064-2420: hyp=['AND', 'I', 'HAVE', 'NO', 'ONE', 'READY', 'TO', 'WHOM', 'I', 'CAN', 'GIVE', 'UP', 'THE', 'ARCHIVES', 'OF', 'THE', 'GOVERNMENT'] +8455-210777-0065-2421: ref=['I', 'SHALL', 'BE', 'HAPPY', 'TO', 'TAKE', 'CHARGE', 'OF', 'THEM', 'SAID', 'SIR', 'FERDINANDO'] +8455-210777-0065-2421: hyp=['I', 'SHALL', 'BE', 'HAPPY', 'TO', 'TAKE', 'CHARGE', 'OF', 'THEM', 'SAID', 'SIR', 'FERDINANDO'] +8455-210777-0066-2422: ref=['THEY', 'OF', 'COURSE', 'MUST', 'ALL', 'BE', 'ALTERED'] +8455-210777-0066-2422: hyp=['THEY', 'OF', 'COURSE', 'MUST', 'ALL', 'BE', 'ALTERED'] +8455-210777-0067-2423: ref=['OR', 'OF', 'THE', 'HABITS', 'OF', 'OUR', 'PEOPLE', 'IT', 'IS', 'QUITE', 'IMPOSSIBLE'] +8455-210777-0067-2423: hyp=['OR', 'OF', 'THE', 'HABITS', 'OF', 'OUR', 'PEOPLE', 'IT', 'IS', 'QUITE', 'IMPOSSIBLE'] +8455-210777-0068-2424: ref=['YOUR', 'POWER', 'IS', 'SUFFICIENT', 'I', 'SAID'] +8455-210777-0068-2424: hyp=['YOUR', 'POWER', 'IS', 'SUFFICIENT', 'I', 'SAID'] +8455-210777-0069-2425: ref=['IF', 'YOU', 'WILL', 'GIVE', 'US', 'YOUR', 'PROMISE', 'TO', 'MEET', 'CAPTAIN', 'BATTLEAX', 'HERE', 'AT', 'THIS', 'TIME', 'TO', 'MORROW', 'WE', 'WILL', 'STRETCH', 'A', 'POINT', 'AND', 'DELAY', 'THE', 'DEPARTURE', 'OF', 'THE', 'JOHN', 'BRIGHT', 'FOR', 'TWENTY', 'FOUR', 'HOURS'] +8455-210777-0069-2425: hyp=['IF', 'YOU', 'WILL', 'GIVE', 'US', 'YOUR', 'PROMISE', 'TO', 'MEET', 'CAPTAIN', 'ATTILAX', 'HERE', 'AT', 'THIS', 'TIME', 'TO', 'MORROW', 'WE', 'WILL', 'STRETCH', 'A', 'POINT', 'AND', 'DELAY', 'THE', 'DEPARTURE', 'OF', 'THE', 'JOHN', 'BRIGHT', 'FOR', 'TWENTY', 'FOUR', 'HOURS'] +8455-210777-0070-2426: ref=['AND', 'THIS', 'PLAN', 'WAS', 'ADOPTED', 'TOO', 'IN', 'ORDER', 'TO', 'EXTRACT', 'FROM', 'ME', 'A', 'PROMISE', 'THAT', 'I', 'WOULD', 'DEPART', 'IN', 'PEACE'] +8455-210777-0070-2426: hyp=['AND', 'THIS', 'PLAN', 'WAS', 'ADOPTED', 'TOO', 'IN', 'ORDER', 'TO', 'EXTRACT', 'FROM', 'ME', 'A', 'PROMISE', 'THAT', 'I', 'WOULD', 'DEPART', 'IN', 'PEACE'] +8463-287645-0000-2427: ref=['THIS', 'WAS', 'WHAT', 'DID', 'THE', 'MISCHIEF', 'SO', 'FAR', 'AS', 'THE', 'RUNNING', 'AWAY', 'WAS', 'CONCERNED'] +8463-287645-0000-2427: hyp=['THIS', 'WAS', 'WHAT', 'DID', 'THE', 'MISCHIEF', 'SO', 'FAR', 'AS', 'THE', 'RUNNING', 'AWAY', 'WAS', 'CONCERNED'] +8463-287645-0001-2428: ref=['IT', 'IS', 'HARDLY', 'NECESSARY', 'TO', 'SAY', 'MORE', 'OF', 'THEM', 'HERE'] +8463-287645-0001-2428: hyp=['IT', 'IS', 'HARDLY', 'NECESSARY', 'TO', 'SAY', 'MORE', 'OF', 'THEM', 'HERE'] +8463-287645-0002-2429: ref=['FROM', 'THE', 'MANNER', 'IN', 'WHICH', 'HE', 'EXPRESSED', 'HIMSELF', 'WITH', 'REGARD', 'TO', 'ROBERT', 'HOLLAN', 'NO', 'MAN', 'IN', 'THE', 'WHOLE', 'RANGE', 'OF', 'HIS', 'RECOLLECTIONS', 'WILL', 'BE', 'LONGER', 'REMEMBERED', 'THAN', 'HE', 'HIS', 'ENTHRALMENT', 'WHILE', 'UNDER', 'HOLLAN', 'WILL', 'HARDLY', 'EVER', 'BE', 'FORGOTTEN'] +8463-287645-0002-2429: hyp=['FROM', 'THE', 'MANNER', 'IN', 'WHICH', 'HE', 'EXPRESSED', 'HIMSELF', 'WITH', 'REGARD', 'TO', 'ROBERT', 'HOLLAND', 'NO', 'MAN', 'IN', 'THE', 'WHOLE', 'RANGE', 'OF', 'HIS', 'RECOLLECTIONS', 'WILL', 'BE', 'LONGER', 'REMEMBERED', 'THAN', 'HE', 'HIS', 'ENTHRALLMENT', 'WHILE', 'UNDER', 'HOLLAND', 'WILL', 'HARDLY', 'EVER', 'BE', 'FORGOTTEN'] +8463-287645-0003-2430: ref=['OF', 'THIS', 'PARTY', 'EDWARD', 'A', 'BOY', 'OF', 'SEVENTEEN', 'CALLED', 'FORTH', 'MUCH', 'SYMPATHY', 'HE', 'TOO', 'WAS', 'CLAIMED', 'BY', 'HOLLAN'] +8463-287645-0003-2430: hyp=['OF', 'THIS', 'PARTY', 'EDWARD', 'A', 'BOY', 'OF', 'SEVENTEEN', 'CALLED', 'FORTH', 'MUCH', 'SYMPATHY', 'HE', 'TOO', 'WAS', 'CLAIMED', 'BY', 'HOLLAND'] +8463-287645-0004-2431: ref=['JOHN', 'WESLEY', 'COMBASH', 'JACOB', 'TAYLOR', 'AND', 'THOMAS', 'EDWARD', 'SKINNER'] +8463-287645-0004-2431: hyp=['JOHN', 'WESLEY', 'COMBASH', 'JACOB', 'TAYLOR', 'AND', 'THOMAS', 'EDWARD', 'SKINNER'] +8463-287645-0005-2432: ref=['A', 'FEW', 'YEARS', 'BACK', 'ONE', 'OF', 'THEIR', 'SLAVES', 'A', 'COACHMAN', 'WAS', 'KEPT', 'ON', 'THE', 'COACH', 'BOX', 'ONE', 'COLD', 'NIGHT', 'WHEN', 'THEY', 'WERE', 'OUT', 'AT', 'A', 'BALL', 'UNTIL', 'HE', 'BECAME', 'ALMOST', 'FROZEN', 'TO', 'DEATH', 'IN', 'FACT', 'HE', 'DID', 'DIE', 'IN', 'THE', 'INFIRMARY', 'FROM', 'THE', 'EFFECTS', 'OF', 'THE', 'FROST', 'ABOUT', 'ONE', 'WEEK', 'AFTERWARDS'] +8463-287645-0005-2432: hyp=['A', 'FEW', 'YEARS', 'BACK', 'ONE', 'OF', 'THEIR', 'SLAVES', 'A', 'COACHMAN', 'WAS', 'KEPT', 'ON', 'THE', 'COACH', 'BOX', 'ONE', 'COLD', 'NIGHT', 'WHEN', 'THEY', 'WERE', 'OUT', 'AT', 'A', 'BALL', 'UNTIL', 'HE', 'BECAME', 'ALMOST', 'FROZEN', 'TO', 'DEATH', 'IN', 'FACT', 'HE', 'DID', 'DIE', 'IN', 'THE', 'INFIRMARY', 'FROM', 'THE', 'EFFECTS', 'OF', 'THE', 'FROST', 'ABOUT', 'ONE', 'WEEK', 'AFTERWARDS'] +8463-287645-0006-2433: ref=['THE', 'DOCTOR', 'WHO', 'ATTENDED', 'THE', 'INJURED', 'CREATURE', 'IN', 'THIS', 'CASE', 'WAS', 'SIMPLY', 'TOLD', 'THAT', 'SHE', 'SLIPPED', 'AND', 'FELL', 'DOWN', 'STAIRS', 'AS', 'SHE', 'WAS', 'COMING', 'DOWN'] +8463-287645-0006-2433: hyp=['THE', 'DOCTOR', 'WHO', 'ATTENDED', 'THE', 'INJURED', 'CREATURE', 'IN', 'THIS', 'CASE', 'WAS', 'SIMPLY', 'TOLD', 'THAT', 'SHE', 'SLIPPED', 'AND', 'FELL', 'DOWN', 'THE', 'STAIRS', 'AS', 'SHE', 'WAS', 'COMING', 'DOWN'] +8463-287645-0007-2434: ref=['ANOTHER', 'CASE', 'SAID', 'JOHN', 'WESLEY', 'WAS', 'A', 'LITTLE', 'GIRL', 'HALF', 'GROWN', 'WHO', 'WAS', 'WASHING', 'WINDOWS', 'UP', 'STAIRS', 'ONE', 'DAY', 'AND', 'UNLUCKILY', 'FELL', 'ASLEEP', 'IN', 'THE', 'WINDOW', 'AND', 'IN', 'THIS', 'POSITION', 'WAS', 'FOUND', 'BY', 'HER', 'MISTRESS', 'IN', 'A', 'RAGE', 'THE', 'MISTRESS', 'HIT', 'HER', 'A', 'HEAVY', 'SLAP', 'KNOCKED', 'HER', 'OUT', 'OF', 'THE', 'WINDOW', 'AND', 'SHE', 'FELL', 'TO', 'THE', 'PAVEMENT', 'AND', 'DIED', 'IN', 'A', 'FEW', 'HOURS', 'FROM', 'THE', 'EFFECTS', 'THEREOF'] +8463-287645-0007-2434: hyp=['ANOTHER', 'CASE', 'SAID', 'JOHN', 'WESLEY', 'WAS', 'A', 'LITTLE', 'GIRL', 'HALF', 'GROWN', 'WHO', 'WAS', 'WASHING', 'WINDOWS', 'UP', 'STAIRS', 'ONE', 'DAY', 'AND', 'UNLUCKILY', 'FELL', 'ASLEEP', 'IN', 'THE', 'WINDOW', 'AND', 'IN', 'THIS', 'POSITION', 'WAS', 'FOUND', 'BY', 'HER', 'MISTRESS', 'IN', 'A', 'RAGE', 'THE', 'MISTRESS', 'HID', 'HER', 'A', 'HEAVY', 'SLAP', 'KNOCKED', 'HER', 'OUT', 'OF', 'THE', 'WINDOW', 'AND', 'SHE', 'FELL', 'TO', 'THE', 'PAVEMENT', 'AND', 'DIED', 'IN', 'A', 'FEW', 'HOURS', 'FROM', 'THE', 'EFFECTS', 'THEREOF'] +8463-287645-0008-2435: ref=['AS', 'USUAL', 'NOTHING', 'WAS', 'DONE', 'IN', 'THE', 'WAY', 'OF', 'PUNISHMENT'] +8463-287645-0008-2435: hyp=['AS', 'USUAL', 'NOTHING', 'WAS', 'DONE', 'IN', 'THE', 'WAY', 'OF', 'PUNISHMENT'] +8463-287645-0009-2436: ref=['I', 'NEVER', 'KNEW', 'OF', 'BUT', 'ONE', 'MAN', 'WHO', 'COULD', 'EVER', 'PLEASE', 'HIM'] +8463-287645-0009-2436: hyp=['I', 'NEVER', 'KNEW', 'OF', 'BUT', 'ONE', 'MAN', 'WHO', 'COULD', 'EVER', 'PLEASE', 'HIM'] +8463-287645-0010-2437: ref=['HE', 'WORKED', 'ME', 'VERY', 'HARD', 'HE', 'WANTED', 'TO', 'BE', 'BEATING', 'ME', 'ALL', 'THE', 'TIME'] +8463-287645-0010-2437: hyp=['HE', 'WORKED', 'ME', 'VERY', 'HARD', 'HE', 'WANTED', 'TO', 'BE', 'BEATING', 'ME', 'ALL', 'THE', 'TIME'] +8463-287645-0011-2438: ref=['SHE', 'WAS', 'A', 'LARGE', 'HOMELY', 'WOMAN', 'THEY', 'WERE', 'COMMON', 'WHITE', 'PEOPLE', 'WITH', 'NO', 'REPUTATION', 'IN', 'THE', 'COMMUNITY'] +8463-287645-0011-2438: hyp=['SHE', 'WAS', 'A', 'LARGE', 'HOMELY', 'WOMAN', 'THEY', 'WERE', 'COMMON', 'WHITE', 'PEOPLE', 'WITH', 'NO', 'REPUTATION', 'IN', 'THE', 'COMMUNITY'] +8463-287645-0012-2439: ref=['SUBSTANTIALLY', 'THIS', 'WAS', "JACOB'S", 'UNVARNISHED', 'DESCRIPTION', 'OF', 'HIS', 'MASTER', 'AND', 'MISTRESS'] +8463-287645-0012-2439: hyp=['SUBSTANTIALLY', 'THIS', 'WAS', "JACOB'S", 'UNVARNISHED', 'DESCRIPTION', 'OF', 'HIS', 'MASTER', 'AND', 'MISTRESS'] +8463-287645-0013-2440: ref=['AS', 'TO', 'HIS', 'AGE', 'AND', 'ALSO', 'THE', 'NAME', 'OF', 'HIS', 'MASTER', "JACOB'S", 'STATEMENT', 'VARIED', 'SOMEWHAT', 'FROM', 'THE', 'ADVERTISEMENT'] +8463-287645-0013-2440: hyp=['AS', 'TO', 'HIS', 'AGE', 'AND', 'ALSO', 'THE', 'NAME', 'OF', 'HIS', 'MASTER', "JACOB'S", 'STATEMENT', 'VARIED', 'SOMEWHAT', 'FROM', 'THE', 'ADVERTISEMENT'] +8463-287645-0014-2441: ref=['OF', 'STARTING', 'I', "DIDN'T", 'KNOW', 'THE', 'WAY', 'TO', 'COME'] +8463-287645-0014-2441: hyp=['OF', 'STARTING', 'I', "DIDN'T", 'KNOW', 'THE', 'WAY', 'TO', 'COME'] +8463-294825-0000-2442: ref=["IT'S", 'ALMOST', 'BEYOND', 'CONJECTURE'] +8463-294825-0000-2442: hyp=["IT'S", 'ALMOST', 'BEYOND', 'CONJECTURE'] +8463-294825-0001-2443: ref=['THIS', 'REALITY', 'BEGINS', 'TO', 'EXPLAIN', 'THE', 'DARK', 'POWER', 'AND', 'OTHERWORLDLY', 'FASCINATION', 'OF', 'TWENTY', 'THOUSAND', 'LEAGUES', 'UNDER', 'THE', 'SEAS'] +8463-294825-0001-2443: hyp=['THIS', 'REALITY', 'BEGINS', 'TO', 'EXPLAIN', 'THE', 'DARK', 'POWER', 'AND', 'OTHER', 'WORLDDLY', 'FASCINATION', 'OF', 'TWENTY', 'THOUSAND', 'LEAGUES', 'UNDER', 'THE', 'SEAS'] +8463-294825-0002-2444: ref=['FIRST', 'AS', 'A', 'PARIS', 'STOCKBROKER', 'LATER', 'AS', 'A', 'CELEBRATED', 'AUTHOR', 'AND', 'YACHTSMAN', 'HE', 'WENT', 'ON', 'FREQUENT', 'VOYAGES', 'TO', 'BRITAIN', 'AMERICA', 'THE', 'MEDITERRANEAN'] +8463-294825-0002-2444: hyp=['FIRST', 'AS', 'A', 'PARIS', 'STOCKBROKER', 'LATER', 'AS', 'A', 'CELEBRATED', 'AUTHOR', 'AND', 'YACHTSMAN', 'HE', 'WENT', 'ON', 'FREQUENT', 'VOYAGES', 'TO', 'BRITAIN', 'AMERICA', 'THE', 'MEDITERRANEAN'] +8463-294825-0003-2445: ref=['NEMO', 'BUILDS', 'A', 'FABULOUS', 'FUTURISTIC', 'SUBMARINE', 'THE', 'NAUTILUS', 'THEN', 'CONDUCTS', 'AN', 'UNDERWATER', 'CAMPAIGN', 'OF', 'VENGEANCE', 'AGAINST', 'HIS', 'IMPERIALIST', 'OPPRESSOR'] +8463-294825-0003-2445: hyp=['NEMO', 'BUILDS', 'A', 'FABULOUS', 'FUTURISTIC', 'SUBMARINE', 'THE', 'NAUTILUS', 'THEN', 'CONDUCTS', 'AN', 'UNDERWATER', 'CAMPAIGN', 'OF', 'VENGEANCE', 'AGAINST', 'HIS', 'IMPERIALIST', 'OPPRESSOR'] +8463-294825-0004-2446: ref=['IN', 'ALL', 'THE', 'NOVEL', 'HAD', 'A', 'DIFFICULT', 'GESTATION'] +8463-294825-0004-2446: hyp=['IN', 'ALL', 'THE', 'NOVEL', 'HAD', 'A', 'DIFFICULT', 'GESTATION'] +8463-294825-0005-2447: ref=['OTHER', 'SUBTLETIES', 'OCCUR', 'INSIDE', 'EACH', 'EPISODE', 'THE', 'TEXTURES', 'SPARKLING', 'WITH', 'WIT', 'INFORMATION', 'AND', 'INSIGHT'] +8463-294825-0005-2447: hyp=['OTHER', 'SUBTLETIES', 'OCCUR', 'INSIDE', 'EACH', 'EPISODE', 'THE', 'TEXTURES', 'SPARKLING', 'WITH', 'WIT', 'INFORMATION', 'AND', 'INSIGHT'] +8463-294825-0006-2448: ref=['HIS', 'SPECIFICATIONS', 'FOR', 'AN', 'OPEN', 'SEA', 'SUBMARINE', 'AND', 'A', 'SELF', 'CONTAINED', 'DIVING', 'SUIT', 'WERE', 'DECADES', 'BEFORE', 'THEIR', 'TIME', 'YET', 'MODERN', 'TECHNOLOGY', 'BEARS', 'THEM', 'OUT', 'TRIUMPHANTLY'] +8463-294825-0006-2448: hyp=['HIS', 'SPECIFICATIONS', 'FOR', 'AN', 'OPEN', 'SEA', 'SUBMARINE', 'AND', 'A', 'SELF', 'CONTAINING', 'DIVING', 'SUIT', 'WERE', 'DECADES', 'BEFORE', 'THEIR', 'TIME', 'YET', 'MODERN', 'TECHNOLOGY', 'BEARS', 'THEM', 'OUT', 'TRIUMPHANTLY'] +8463-294825-0007-2449: ref=['EVEN', 'THE', 'SUPPORTING', 'CAST', 'IS', 'SHREWDLY', 'DRAWN', 'PROFESSOR', 'ARONNAX', 'THE', 'CAREER', 'SCIENTIST', 'CAUGHT', 'IN', 'AN', 'ETHICAL', 'CONFLICT', 'CONSEIL', 'THE', 'COMPULSIVE', 'CLASSIFIER', 'WHO', 'SUPPLIES', 'HUMOROUS', 'TAG', 'LINES', 'FOR', "VERNE'S", 'FAST', 'FACTS', 'THE', 'HARPOONER', 'NED', 'LAND', 'A', 'CREATURE', 'OF', 'CONSTANT', 'APPETITES', 'MAN', 'AS', 'HEROIC', 'ANIMAL'] +8463-294825-0007-2449: hyp=['EVEN', 'THE', 'SUPPORTING', 'CAST', 'IS', 'SHREWDLY', 'DRAWN', 'PROFESSOR', 'ARONNAX', 'THE', 'CAREER', 'SCIENTIST', 'CAUGHT', 'IN', 'AN', 'ETHICAL', 'CONFLICT', 'CONSEIL', 'THE', 'COMPULSIVE', 'CLASSIFIER', 'WHO', 'SUPPLIES', 'HUMOROUS', 'TAG', 'LINES', 'FOR', "VERRNE'S", 'FAST', 'FACTS', 'THE', 'HARPOONER', 'NED', 'LAND', 'A', 'CREATURE', 'OF', 'CONSTANT', 'APPETITES', 'MAN', 'AS', 'HEROIC', 'ANIMAL'] +8463-294825-0008-2450: ref=['BUT', 'MUCH', 'OF', 'THE', "NOVEL'S", 'BROODING', 'POWER', 'COMES', 'FROM', 'CAPTAIN', 'NEMO'] +8463-294825-0008-2450: hyp=['BUT', 'MUCH', 'OF', 'THE', "NOVEL'S", 'BROODING', 'POWER', 'COMES', 'FROM', 'CAPTAIN', 'NEMO'] +8463-294825-0009-2451: ref=['THIS', 'COMPULSION', 'LEADS', 'NEMO', 'INTO', 'UGLY', 'CONTRADICTIONS', "HE'S", 'A', 'FIGHTER', 'FOR', 'FREEDOM', 'YET', 'ALL', 'WHO', 'BOARD', 'HIS', 'SHIP', 'ARE', 'IMPRISONED', 'THERE', 'FOR', 'GOOD', 'HE', 'WORKS', 'TO', 'SAVE', 'LIVES', 'BOTH', 'HUMAN', 'AND', 'ANIMAL', 'YET', 'HE', 'HIMSELF', 'CREATES', 'A', 'HOLOCAUST', 'HE', 'DETESTS', 'IMPERIALISM', 'YET', 'HE', 'LAYS', 'PERSONAL', 'CLAIM', 'TO', 'THE', 'SOUTH', 'POLE'] +8463-294825-0009-2451: hyp=['THIS', 'COMPULSION', 'LEADS', 'NEMO', 'INTO', 'UGLY', 'CONTRADICTIONS', 'HE', 'IS', 'A', 'FIGHTER', 'FOR', 'FREEDOM', 'YET', 'ALL', 'WHO', 'BOARD', 'HIS', 'SHIP', 'ARE', 'IMPRISONED', 'THERE', 'FOR', 'GOOD', 'HE', 'WORKS', 'TO', 'SAVE', 'LIVES', 'BOTH', 'HUMAN', 'AND', 'ANIMAL', 'YET', 'HE', 'HIMSELF', 'CREATES', 'A', 'HOLOCAUST', 'HE', 'DETESTS', 'IMPERIALISM', 'YET', 'HE', 'LAYS', 'PERSONAL', 'CLAIM', 'TO', 'THE', 'SOUTH', 'POLE'] +8463-294825-0010-2452: ref=['AND', 'IN', 'THIS', 'LAST', 'ACTION', 'HE', 'FALLS', 'INTO', 'THE', 'CLASSIC', 'SIN', 'OF', 'PRIDE'] +8463-294825-0010-2452: hyp=['AND', 'IN', 'THIS', 'LAST', 'ACTION', 'HE', 'FALLS', 'INTO', 'THE', 'CLASSIC', 'SIN', 'OF', 'PRIDE'] +8463-294825-0011-2453: ref=["HE'S", 'SWIFTLY', 'PUNISHED'] +8463-294825-0011-2453: hyp=['HE', 'IS', 'SWIFTLY', 'PUNISHED'] +8463-294825-0012-2454: ref=['THE', 'NAUTILUS', 'NEARLY', 'PERISHES', 'IN', 'THE', 'ANTARCTIC', 'AND', 'NEMO', 'SINKS', 'INTO', 'A', 'GROWING', 'DEPRESSION'] +8463-294825-0012-2454: hyp=['THE', 'NAUTILUS', 'NEARLY', 'PERISHES', 'IN', 'THE', 'ANTARCTIC', 'AND', 'NEMO', 'SINKS', 'INTO', 'A', 'GROWING', 'DEPRESSION'] +8463-294825-0013-2455: ref=['FOR', 'MANY', 'THEN', 'THIS', 'BOOK', 'HAS', 'BEEN', 'A', 'SOURCE', 'OF', 'FASCINATION', 'SURELY', 'ONE', 'OF', 'THE', 'MOST', 'INFLUENTIAL', 'NOVELS', 'EVER', 'WRITTEN', 'AN', 'INSPIRATION', 'FOR', 'SUCH', 'SCIENTISTS', 'AND', 'DISCOVERERS', 'AS', 'ENGINEER', 'SIMON', 'LAKE', 'OCEANOGRAPHER', 'WILLIAM', 'BEEBE', 'POLAR', 'TRAVELER', 'SIR', 'ERNEST', 'SHACKLETON'] +8463-294825-0013-2455: hyp=['FOR', 'MANY', 'THEN', 'THIS', 'BOOK', 'HAS', 'BEEN', 'A', 'SOURCE', 'OF', 'FASCINATION', 'SURELY', 'ONE', 'OF', 'THE', 'MOST', 'INFLUENTIAL', 'NOVELS', 'EVER', 'WRITTEN', 'AN', 'INSPIRATION', 'FOR', 'SUCH', 'SCIENTISTS', 'AND', 'DISCOVERERS', 'AS', 'ENGINEER', 'SIMON', 'LAKE', 'OCEANOGRAPHER', 'WILLIAM', 'B', 'B', 'POLAR', 'TRAVELLER', 'SIR', 'ERNEST', 'SHACKLETON'] +8463-294825-0014-2456: ref=['FATHOM', 'SIX', 'FEET'] +8463-294825-0014-2456: hyp=['FATHOM', 'SIX', 'FEET'] +8463-294825-0015-2457: ref=['GRAM', 'ROUGHLY', 'ONE', 'TWENTY', 'EIGHTH', 'OF', 'AN', 'OUNCE'] +8463-294825-0015-2457: hyp=['GRAHAM', 'ROUGHLY', 'WON', 'TWENTY', 'EIGHTH', 'OF', 'AN', 'OUNCE'] +8463-294825-0016-2458: ref=['MILLIGRAM', 'ROUGHLY', 'ONE', 'TWENTY', 'EIGHT', 'THOUSAND', 'OF', 'AN', 'OUNCE'] +8463-294825-0016-2458: hyp=['MILLIGRAM', 'ROUGHLY', 'ONE', 'TWENTY', 'EIGHT', 'THOUSANDTH', 'OF', 'AN', 'OUNCE'] +8463-294825-0017-2459: ref=['LITER', 'ROUGHLY', 'ONE', 'QUART'] +8463-294825-0017-2459: hyp=['LEADER', 'ROUGHLY', 'WON', 'COURT'] +8463-294825-0018-2460: ref=['METER', 'ROUGHLY', 'ONE', 'YARD', 'THREE', 'INCHES'] +8463-294825-0018-2460: hyp=['METRE', 'ROUGHLY', 'ONE', 'YARD', 'THREE', 'INCHES'] +8463-294825-0019-2461: ref=['MILLIMETER', 'ROUGHLY', 'ONE', 'TWENTY', 'FIFTH', 'OF', 'AN', 'INCH'] +8463-294825-0019-2461: hyp=['MILLIMETER', 'ROUGHLY', 'ONE', 'TWENTY', 'FIFTH', 'OF', 'AN', 'INCH'] +8463-294828-0000-2462: ref=['CHAPTER', 'THREE', 'AS', 'MASTER', 'WISHES'] +8463-294828-0000-2462: hyp=['CHAPTER', 'THREE', 'AS', 'MASTER', 'WISHES'] +8463-294828-0001-2463: ref=['THREE', 'SECONDS', 'BEFORE', 'THE', 'ARRIVAL', 'OF', 'J', 'B', "HOBSON'S", 'LETTER', 'I', 'NO', 'MORE', 'DREAMED', 'OF', 'CHASING', 'THE', 'UNICORN', 'THAN', 'OF', 'TRYING', 'FOR', 'THE', 'NORTHWEST', 'PASSAGE'] +8463-294828-0001-2463: hyp=['THREE', 'SECONDS', 'BEFORE', 'THE', 'ARRIVAL', 'OF', 'J', 'B', "HOBSON'S", 'LETTER', 'I', 'NO', 'MORE', 'DREAMED', 'OF', 'CHASING', 'THE', 'UNICORN', 'THAN', 'OF', 'TRYING', 'FOR', 'THE', 'NORTH', 'WEST', 'PASSAGE'] +8463-294828-0002-2464: ref=['EVEN', 'SO', 'I', 'HAD', 'JUST', 'RETURNED', 'FROM', 'AN', 'ARDUOUS', 'JOURNEY', 'EXHAUSTED', 'AND', 'BADLY', 'NEEDING', 'A', 'REST'] +8463-294828-0002-2464: hyp=['EVEN', 'SO', 'I', 'HAD', 'JUST', 'RETURNED', 'FROM', 'AN', 'ARDUOUS', 'JOURNEY', 'EXHAUSTED', 'AND', 'BADLY', 'NEEDING', 'A', 'REST'] +8463-294828-0003-2465: ref=['I', 'WANTED', 'NOTHING', 'MORE', 'THAN', 'TO', 'SEE', 'MY', 'COUNTRY', 'AGAIN', 'MY', 'FRIENDS', 'MY', 'MODEST', 'QUARTERS', 'BY', 'THE', 'BOTANICAL', 'GARDENS', 'MY', 'DEARLY', 'BELOVED', 'COLLECTIONS'] +8463-294828-0003-2465: hyp=['I', 'WANTED', 'NOTHING', 'MORE', 'THAN', 'TO', 'SEE', 'MY', 'COUNTRY', 'AGAIN', 'MY', 'FRIENDS', 'MY', 'MODEST', 'QUARTERS', 'BY', 'THE', 'BOTANICAL', 'GARDENS', 'MY', 'DEARLY', 'BELOVED', 'COLLECTIONS'] +8463-294828-0004-2466: ref=['BUT', 'NOW', 'NOTHING', 'COULD', 'HOLD', 'ME', 'BACK'] +8463-294828-0004-2466: hyp=['BUT', 'NOW', 'NOTHING', 'COULD', 'HOLD', 'ME', 'BACK'] +8463-294828-0005-2467: ref=['CONSEIL', 'WAS', 'MY', 'MANSERVANT'] +8463-294828-0005-2467: hyp=['CONSEIL', 'WAS', 'MY', 'MANSERVANT'] +8463-294828-0006-2468: ref=['FROM', 'RUBBING', 'SHOULDERS', 'WITH', 'SCIENTISTS', 'IN', 'OUR', 'LITTLE', 'UNIVERSE', 'BY', 'THE', 'BOTANICAL', 'GARDENS', 'THE', 'BOY', 'HAD', 'COME', 'TO', 'KNOW', 'A', 'THING', 'OR', 'TWO'] +8463-294828-0006-2468: hyp=['FROM', 'RUBBING', 'SHOULDERS', 'WITH', 'SCIENTISTS', 'IN', 'OUR', 'LITTLE', 'UNIVERSE', 'BY', 'THE', 'BOTANICAL', 'GARDENS', 'THE', 'BOY', 'HAD', 'COME', 'TO', 'KNOW', 'A', 'THING', 'OR', 'TWO'] +8463-294828-0007-2469: ref=['CLASSIFYING', 'WAS', 'EVERYTHING', 'TO', 'HIM', 'SO', 'HE', 'KNEW', 'NOTHING', 'ELSE', 'WELL', 'VERSED', 'IN', 'THE', 'THEORY', 'OF', 'CLASSIFICATION', 'HE', 'WAS', 'POORLY', 'VERSED', 'IN', 'ITS', 'PRACTICAL', 'APPLICATION', 'AND', 'I', 'DOUBT', 'THAT', 'HE', 'COULD', 'TELL', 'A', 'SPERM', 'WHALE', 'FROM', 'A', 'BALEEN', 'WHALE'] +8463-294828-0007-2469: hyp=['CLASSIFYING', 'WAS', 'EVERYTHING', 'TO', 'HIM', 'SO', 'HE', 'KNEW', 'NOTHING', 'ELSE', 'WELL', 'VERSED', 'IN', 'THE', 'THEORY', 'OF', 'CLASSIFICATION', 'HE', 'WAS', 'POORLY', 'VERSED', 'IN', 'ITS', 'PRACTICAL', 'APPLICATION', 'AND', 'I', 'DOUBT', 'THAT', 'HE', 'COULD', 'TELL', 'A', 'SPERM', 'WHALE', 'FROM', 'A', 'BALEEN', 'WHALE'] +8463-294828-0008-2470: ref=['AND', 'YET', 'WHAT', 'A', 'FINE', 'GALLANT', 'LAD'] +8463-294828-0008-2470: hyp=['AND', 'YET', 'WHAT', 'A', 'FINE', 'GALLANT', 'LAD'] +8463-294828-0009-2471: ref=['NOT', 'ONCE', 'DID', 'HE', 'COMMENT', 'ON', 'THE', 'LENGTH', 'OR', 'THE', 'HARDSHIPS', 'OF', 'A', 'JOURNEY'] +8463-294828-0009-2471: hyp=['NOT', 'ONCE', 'DID', 'HE', 'COMMENT', 'ON', 'THE', 'LENGTH', 'OR', 'THE', 'HARDSHIPS', 'OF', 'THE', 'JOURNEY'] +8463-294828-0010-2472: ref=['NEVER', 'DID', 'HE', 'OBJECT', 'TO', 'BUCKLING', 'UP', 'HIS', 'SUITCASE', 'FOR', 'ANY', 'COUNTRY', 'WHATEVER', 'CHINA', 'OR', 'THE', 'CONGO', 'NO', 'MATTER', 'HOW', 'FAR', 'OFF', 'IT', 'WAS'] +8463-294828-0010-2472: hyp=['NEVER', 'DID', 'HE', 'OBJECT', 'TO', 'BUCKLING', 'UP', 'HIS', 'SUITCASE', 'FOR', 'ANY', 'COUNTRY', 'WHATEVER', 'CHINA', 'OR', 'THE', 'CONGO', 'NO', 'MATTER', 'HOW', 'FAR', 'OFF', 'IT', 'WAS'] +8463-294828-0011-2473: ref=['HE', 'WENT', 'HERE', 'THERE', 'AND', 'EVERYWHERE', 'IN', 'PERFECT', 'CONTENTMENT'] +8463-294828-0011-2473: hyp=['HE', 'WENT', 'HERE', 'THERE', 'AND', 'EVERYWHERE', 'IN', 'PERFECT', 'CONTENTMENT'] +8463-294828-0012-2474: ref=['PLEASE', 'FORGIVE', 'ME', 'FOR', 'THIS', 'UNDERHANDED', 'WAY', 'OF', 'ADMITTING', 'I', 'HAD', 'TURNED', 'FORTY'] +8463-294828-0012-2474: hyp=['PLEASE', 'FORGIVE', 'ME', 'FOR', 'THIS', 'UNDERHANDED', 'WAY', 'OF', 'ADMITTING', 'THAT', 'I', 'HAD', 'TURNED', 'FORTY'] +8463-294828-0013-2475: ref=['HE', 'WAS', 'A', 'FANATIC', 'ON', 'FORMALITY', 'AND', 'HE', 'ONLY', 'ADDRESSED', 'ME', 'IN', 'THE', 'THIRD', 'PERSON', 'TO', 'THE', 'POINT', 'WHERE', 'IT', 'GOT', 'TIRESOME'] +8463-294828-0013-2475: hyp=['HE', 'WAS', 'A', 'FANATIC', 'ON', 'FORMALITY', 'AND', 'HE', 'ONLY', 'ADDRESSED', 'ME', 'IN', 'THE', 'THIRD', 'PERSON', 'TO', 'THE', 'POINT', 'WHERE', 'IT', 'GOT', 'TIRESOME'] +8463-294828-0014-2476: ref=['THERE', 'WAS', 'GOOD', 'REASON', 'TO', 'STOP', 'AND', 'THINK', 'EVEN', 'FOR', 'THE', "WORLD'S", 'MOST', 'EMOTIONLESS', 'MAN'] +8463-294828-0014-2476: hyp=['THERE', 'WAS', 'GOOD', 'REASON', 'TO', 'STOP', 'AND', 'THINK', 'EVEN', 'FOR', 'THE', "WORLD'S", 'MOST', 'EMOTIONLESS', 'MAN'] +8463-294828-0015-2477: ref=['CONSEIL', 'I', 'CALLED', 'A', 'THIRD', 'TIME', 'CONSEIL', 'APPEARED'] +8463-294828-0015-2477: hyp=['CONSEIL', 'I', 'CALLED', 'A', 'THIRD', 'TIME', 'CONSEIL', 'APPEARED'] +8463-294828-0016-2478: ref=['DID', 'MASTER', 'SUMMON', 'ME', 'HE', 'SAID', 'ENTERING'] +8463-294828-0016-2478: hyp=['DID', 'MASTER', 'SUMMON', 'ME', 'HE', 'SAID', 'ENTERING'] +8463-294828-0017-2479: ref=['PACK', 'AS', 'MUCH', 'INTO', 'MY', 'TRUNK', 'AS', 'YOU', 'CAN', 'MY', 'TRAVELING', 'KIT', 'MY', 'SUITS', 'SHIRTS', 'AND', 'SOCKS', "DON'T", 'BOTHER', 'COUNTING', 'JUST', 'SQUEEZE', 'IT', 'ALL', 'IN', 'AND', 'HURRY'] +8463-294828-0017-2479: hyp=['PACK', 'AS', 'MUCH', 'INTO', 'MY', 'TRUNK', 'AS', 'YOU', 'CAN', 'MY', 'TRAVELING', 'KIT', 'MY', 'SUITS', 'SHIRTS', 'AND', 'SOCKS', "DON'T", 'BOTHER', 'COUNTING', 'JUST', 'SQUEEZE', 'IT', 'ALL', 'IN', 'AND', 'HURRY'] +8463-294828-0018-2480: ref=["WE'LL", 'DEAL', 'WITH', 'THEM', 'LATER', 'WHAT'] +8463-294828-0018-2480: hyp=["WE'LL", 'DEAL', 'WITH', 'THEM', 'LATER', 'WHAT'] +8463-294828-0019-2481: ref=['ANYHOW', "WE'LL", 'LEAVE', 'INSTRUCTIONS', 'TO', 'SHIP', 'THE', 'WHOLE', 'MENAGERIE', 'TO', 'FRANCE'] +8463-294828-0019-2481: hyp=['ANYHOW', "WE'LL", 'LEAVE', 'INSTRUCTIONS', 'TO', 'SHIP', 'THE', 'WHOLE', 'MENAGERIE', 'TO', 'FRANCE'] +8463-294828-0020-2482: ref=['YES', 'WE', 'ARE', 'CERTAINLY', 'I', 'REPLIED', 'EVASIVELY', 'BUT', 'AFTER', 'WE', 'MAKE', 'A', 'DETOUR'] +8463-294828-0020-2482: hyp=['YES', 'WE', 'ARE', 'CERTAINLY', 'I', 'REPLIED', 'EVASIVELY', 'BUT', 'AFTER', 'WE', 'MAKE', 'A', 'DETOUR'] +8463-294828-0021-2483: ref=['A', 'ROUTE', 'SLIGHTLY', 'LESS', 'DIRECT', "THAT'S", 'ALL'] +8463-294828-0021-2483: hyp=['A', 'ROUTE', 'SLIGHTLY', 'LESS', 'DIRECT', "THAT'S", 'ALL'] +8463-294828-0022-2484: ref=["WE'RE", 'LEAVING', 'ON', 'THE', 'ABRAHAM', 'LINCOLN'] +8463-294828-0022-2484: hyp=['WERE', 'LEAVING', 'ON', 'THE', 'ABRAHAM', 'LINCOLN'] +8463-294828-0023-2485: ref=['YOU', 'SEE', 'MY', 'FRIEND', "IT'S", 'AN', 'ISSUE', 'OF', 'THE', 'MONSTER', 'THE', 'NOTORIOUS', 'NARWHALE'] +8463-294828-0023-2485: hyp=['YOU', 'SEE', 'MY', 'FRIEND', "IT'S", 'AN', 'ISSUE', 'OF', 'THE', 'MONSTER', 'THE', 'NOTORIOUS', 'NARWHALE'] +8463-294828-0024-2486: ref=['WE', "DON'T", 'KNOW', 'WHERE', 'IT', 'WILL', 'TAKE', 'US'] +8463-294828-0024-2486: hyp=['WE', "DON'T", 'KNOW', 'WHERE', 'IT', 'WILL', 'TAKE', 'US'] +8463-294828-0025-2487: ref=['BUT', "WE'RE", 'GOING', 'JUST', 'THE', 'SAME'] +8463-294828-0025-2487: hyp=['BUT', "WE'RE", 'GOING', 'JUST', 'THE', 'SAME'] +8463-294828-0026-2488: ref=['WE', 'HAVE', 'A', 'COMMANDER', "WHO'S", 'GAME', 'FOR', 'ANYTHING'] +8463-294828-0026-2488: hyp=['WE', 'HAVE', 'A', 'COMMANDER', "WHO'S", 'GAME', 'FOR', 'ANYTHING'] +8463-294828-0027-2489: ref=['I', 'LEFT', 'INSTRUCTIONS', 'FOR', 'SHIPPING', 'MY', 'CONTAINERS', 'OF', 'STUFFED', 'ANIMALS', 'AND', 'DRIED', 'PLANTS', 'TO', 'PARIS', 'FRANCE'] +8463-294828-0027-2489: hyp=['I', 'LEFT', 'INSTRUCTIONS', 'FOR', 'SHIPPING', 'MY', 'CONTAINERS', 'OF', 'STUFFED', 'ANIMALS', 'AND', 'DRIED', 'PLANTS', 'TO', 'PARIS', 'FRANCE'] +8463-294828-0028-2490: ref=['I', 'OPENED', 'A', 'LINE', 'OF', 'CREDIT', 'SUFFICIENT', 'TO', 'COVER', 'THE', 'BABIRUSA', 'AND', 'CONSEIL', 'AT', 'MY', 'HEELS', 'I', 'JUMPED', 'INTO', 'A', 'CARRIAGE'] +8463-294828-0028-2490: hyp=['I', 'OPENED', 'A', 'LINE', 'OF', 'CREDIT', 'SUFFICIENT', 'TO', 'COVER', 'THE', 'BARBAROOSA', 'AND', 'CONSEIL', 'AT', 'MY', 'HEELS', 'I', 'JUMPED', 'INTO', 'A', 'CARRIAGE'] +8463-294828-0029-2491: ref=['OUR', 'BAGGAGE', 'WAS', 'IMMEDIATELY', 'CARRIED', 'TO', 'THE', 'DECK', 'OF', 'THE', 'FRIGATE', 'I', 'RUSHED', 'ABOARD'] +8463-294828-0029-2491: hyp=['OUR', 'BAGGAGE', 'WAS', 'IMMEDIATELY', 'CARRIED', 'TO', 'THE', 'DECK', 'OF', 'THE', 'FRIGATE', 'I', 'RUSHED', 'ABOARD'] +8463-294828-0030-2492: ref=['I', 'ASKED', 'FOR', 'COMMANDER', 'FARRAGUT'] +8463-294828-0030-2492: hyp=['I', 'ASKED', 'FOR', 'COMMANDER', 'FARRAGUT'] +8463-294828-0031-2493: ref=['ONE', 'OF', 'THE', 'SAILORS', 'LED', 'ME', 'TO', 'THE', 'AFTERDECK', 'WHERE', 'I', 'STOOD', 'IN', 'THE', 'PRESENCE', 'OF', 'A', 'SMART', 'LOOKING', 'OFFICER', 'WHO', 'EXTENDED', 'HIS', 'HAND', 'TO', 'ME'] +8463-294828-0031-2493: hyp=['ONE', 'OF', 'THE', 'SAILORS', 'LED', 'ME', 'TO', 'THE', 'AFTERDECK', 'WHERE', 'I', 'STOOD', 'IN', 'THE', 'PRESENCE', 'OF', 'A', 'SMART', 'LOOKING', 'OFFICER', 'WHO', 'EXTENDED', 'HIS', 'HAND', 'TO', 'ME'] +8463-294828-0032-2494: ref=['IN', 'PERSON', 'WELCOME', 'ABOARD', 'PROFESSOR', 'YOUR', 'CABIN', 'IS', 'WAITING', 'FOR', 'YOU'] +8463-294828-0032-2494: hyp=['IN', 'PERSON', 'WELCOME', 'ABOARD', 'PROFESSOR', 'YOUR', 'CABIN', 'IS', 'WAITING', 'FOR', 'YOU'] +8463-294828-0033-2495: ref=['I', 'WAS', 'WELL', 'SATISFIED', 'WITH', 'MY', 'CABIN', 'WHICH', 'WAS', 'LOCATED', 'IN', 'THE', 'STERN', 'AND', 'OPENED', 'INTO', 'THE', 'OFFICERS', 'MESS'] +8463-294828-0033-2495: hyp=['I', 'WAS', 'WELL', 'SATISFIED', 'WITH', 'MY', 'CABIN', 'WHICH', 'WAS', 'LOCATED', 'IN', 'THE', 'STERN', 'AND', 'OPENED', 'INTO', 'THE', 'OFFICERS', 'MESS'] +8463-294828-0034-2496: ref=["WE'LL", 'BE', 'QUITE', 'COMFORTABLE', 'HERE', 'I', 'TOLD', 'CONSEIL'] +8463-294828-0034-2496: hyp=["WE'LL", 'BE', 'QUITE', 'COMFORTABLE', 'HERE', 'I', 'TOLD', 'CONSEIL'] +8463-294828-0035-2497: ref=['AND', 'SO', 'IF', "I'D", 'BEEN', 'DELAYED', 'BY', 'A', 'QUARTER', 'OF', 'AN', 'HOUR', 'OR', 'EVEN', 'LESS', 'THE', 'FRIGATE', 'WOULD', 'HAVE', 'GONE', 'WITHOUT', 'ME', 'AND', 'I', 'WOULD', 'HAVE', 'MISSED', 'OUT', 'ON', 'THIS', 'UNEARTHLY', 'EXTRAORDINARY', 'AND', 'INCONCEIVABLE', 'EXPEDITION', 'WHOSE', 'TRUE', 'STORY', 'MIGHT', 'WELL', 'MEET', 'WITH', 'SOME', 'SKEPTICISM'] +8463-294828-0035-2497: hyp=['AND', 'SO', 'IF', 'I', 'HAD', 'BEEN', 'DELAYED', 'BY', 'A', 'QUARTER', 'OF', 'AN', 'HOUR', 'OR', 'EVEN', 'LESS', 'THE', 'FRIGATE', 'WOULD', 'HAVE', 'GONE', 'WITHOUT', 'ME', 'AND', 'I', 'WOULD', 'HAVE', 'MISSED', 'OUT', 'ON', 'THIS', 'UNEARTHLY', 'EXTRAORDINARY', 'AND', 'INCONCEIVABLE', 'EXPEDITION', 'WHOSE', 'TRUE', 'STORY', 'MIGHT', 'WELL', 'MEET', 'WITH', 'SOME', 'SKEPTICISM'] +8463-294828-0036-2498: ref=['THE', 'WHARVES', 'OF', 'BROOKLYN', 'AND', 'EVERY', 'PART', 'OF', 'NEW', 'YORK', 'BORDERING', 'THE', 'EAST', 'RIVER', 'WERE', 'CROWDED', 'WITH', 'CURIOSITY', 'SEEKERS'] +8463-294828-0036-2498: hyp=['THE', 'WHARVES', 'OF', 'BROOKLYN', 'AND', 'EVERY', 'PART', 'OF', 'NEW', 'YORK', 'BORDERING', 'THE', 'EAST', 'RIVER', 'WERE', 'CROWDED', 'WITH', 'CURIOSITY', 'SEEKERS'] +8463-294828-0037-2499: ref=['DEPARTING', 'FROM', 'FIVE', 'HUNDRED', 'THOUSAND', 'THROATS', 'THREE', 'CHEERS', 'BURST', 'FORTH', 'IN', 'SUCCESSION'] +8463-294828-0037-2499: hyp=['DEPARTING', 'FROM', 'FIVE', 'HUNDRED', 'THOUSAND', 'THROATS', 'THREE', 'CHEERS', 'BURST', 'FORTH', 'IN', 'SUCCESSION'] +8463-294828-0038-2500: ref=['THOUSANDS', 'OF', 'HANDKERCHIEFS', 'WERE', 'WAVING', 'ABOVE', 'THESE', 'TIGHTLY', 'PACKED', 'MASSES', 'HAILING', 'THE', 'ABRAHAM', 'LINCOLN', 'UNTIL', 'IT', 'REACHED', 'THE', 'WATERS', 'OF', 'THE', 'HUDSON', 'RIVER', 'AT', 'THE', 'TIP', 'OF', 'THE', 'LONG', 'PENINSULA', 'THAT', 'FORMS', 'NEW', 'YORK', 'CITY'] +8463-294828-0038-2500: hyp=['THOUSANDS', 'OF', 'HANDKERCHIEFS', 'WERE', 'WAVING', 'ABOVE', 'THESE', 'TIGHTLY', 'PACKED', 'MASSES', 'HAILING', 'THE', 'ABRAHAM', 'LINCOLN', 'UNTIL', 'IT', 'REACHED', 'THE', 'WATERS', 'OF', 'THE', 'HUDSON', 'RIVER', 'AT', 'THE', 'TIP', 'OF', 'THE', 'LONG', 'PENINSULA', 'THAT', 'FORMS', 'NEW', 'YORK', 'CITY'] +8555-284447-0000-2501: ref=['THEN', 'HE', 'RUSHED', 'DOWN', 'STAIRS', 'INTO', 'THE', 'COURTYARD', 'SHOUTING', 'LOUDLY', 'FOR', 'HIS', 'SOLDIERS', 'AND', 'THREATENING', 'TO', 'PATCH', 'EVERYBODY', 'IN', 'HIS', 'DOMINIONS', 'IF', 'THE', 'SAILORMAN', 'WAS', 'NOT', 'RECAPTURED'] +8555-284447-0000-2501: hyp=['THEN', 'HE', 'RUSHED', 'DOWNSTAIRS', 'INTO', 'THE', 'COURTYARD', 'SHOUTING', 'LOUDLY', 'FOR', 'HIS', 'SOLDIERS', 'AND', 'THREATENING', 'TO', 'PATCH', 'EVERYBODY', 'IN', 'HIS', 'DOMINIONS', 'IF', 'THE', 'SAILORMAN', 'WAS', 'NOT', 'RECAPTURED'] +8555-284447-0001-2502: ref=['HOLD', 'HIM', 'FAST', 'MY', 'MEN', 'AND', 'AS', 'SOON', 'AS', "I'VE", 'HAD', 'MY', 'COFFEE', 'AND', 'OATMEAL', "I'LL", 'TAKE', 'HIM', 'TO', 'THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'AND', 'PATCH', 'HIM'] +8555-284447-0001-2502: hyp=['HOLD', 'HIM', 'FAST', 'MY', 'MAN', 'AND', 'AS', 'SOON', 'AS', "I'VE", 'HAD', 'MY', 'COFFEE', 'AND', 'OATMEAL', "I'LL", 'TAKE', 'HIM', 'TO', 'THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'AND', 'PATCH', 'HIM'] +8555-284447-0002-2503: ref=['I', "WOULDN'T", 'MIND', 'A', 'CUP', 'O', 'COFFEE', 'MYSELF', 'SAID', "CAP'N", 'BILL', "I'VE", 'HAD', "CONSID'BLE", 'EXERCISE', 'THIS', 'MORNIN', 'AND', "I'M", 'ALL', 'READY', 'FOR', 'BREAKFAS'] +8555-284447-0002-2503: hyp=['I', "WOULDN'T", 'MIND', 'A', 'CUP', 'OF', 'COFFEE', 'MYSELF', 'SAID', "CAP'N", 'BILL', "I'VE", 'HAD', 'CONSRABLE', 'EXERCISE', 'THIS', 'MORNIN', 'AND', "I'M", 'ALL', 'READY', 'FOR', 'BREAKFAST'] +8555-284447-0003-2504: ref=['BUT', "CAP'N", 'BILL', 'MADE', 'NO', 'SUCH', 'ATTEMPT', 'KNOWING', 'IT', 'WOULD', 'BE', 'USELESS'] +8555-284447-0003-2504: hyp=['BUT', "CAP'N", 'BILL', 'MADE', 'NO', 'SUCH', 'ATTEMPT', 'KNOWING', 'IT', 'WOULD', 'BE', 'USELESS'] +8555-284447-0004-2505: ref=['AS', 'SOON', 'AS', 'THEY', 'ENTERED', 'THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'THE', 'BOOLOOROO', 'GAVE', 'A', 'YELL', 'OF', 'DISAPPOINTMENT'] +8555-284447-0004-2505: hyp=['AS', 'SOON', 'AS', 'THEY', 'ENTERED', 'THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'THE', 'BOOLOOROO', 'GAVE', 'A', 'YELL', 'OF', 'DISAPPOINTMENT'] +8555-284447-0005-2506: ref=['THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'WAS', 'HIGH', 'AND', 'BIG', 'AND', 'AROUND', 'IT', 'RAN', 'ROWS', 'OF', 'BENCHES', 'FOR', 'THE', 'SPECTATORS', 'TO', 'SIT', 'UPON'] +8555-284447-0005-2506: hyp=['THE', 'ROOM', 'OF', 'THE', 'GREAT', 'KNIFE', 'WAS', 'HIGH', 'AND', 'BIG', 'AND', 'AROUND', 'IT', 'RAN', 'ROWS', 'OF', 'BENCHES', 'FOR', 'THE', 'SPECTATORS', 'TO', 'SIT', 'UPON'] +8555-284447-0006-2507: ref=['IN', 'ONE', 'PLACE', 'AT', 'THE', 'HEAD', 'OF', 'THE', 'ROOM', 'WAS', 'A', 'RAISED', 'PLATFORM', 'FOR', 'THE', 'ROYAL', 'FAMILY', 'WITH', 'ELEGANT', 'THRONE', 'CHAIRS', 'FOR', 'THE', 'KING', 'AND', 'QUEEN', 'AND', 'SIX', 'SMALLER', 'BUT', 'RICHLY', 'UPHOLSTERED', 'CHAIRS', 'FOR', 'THE', 'SNUBNOSED', 'PRINCESSES'] +8555-284447-0006-2507: hyp=['IN', 'ONE', 'PLACE', 'AT', 'THE', 'HEAD', 'OF', 'THE', 'ROOM', 'WAS', 'A', 'RAISED', 'PLATFORM', 'FOR', 'THE', 'ROYAL', 'FAMILY', 'WITH', 'ELEGANT', 'THRONE', 'CHAIRS', 'FOR', 'THE', 'KING', 'AND', 'QUEEN', 'AND', 'SIX', 'SMALLER', 'BUT', 'RICHLY', 'UPHOLSTERED', 'CHAIRS', 'FOR', 'THE', 'SNUBNOSED', 'PRINCESSES'] +8555-284447-0007-2508: ref=['THEREFORE', 'HER', 'MAJESTY', 'PAID', 'NO', 'ATTENTION', 'TO', 'ANYONE', 'AND', 'NO', 'ONE', 'PAID', 'ANY', 'ATTENTION', 'TO', 'HER'] +8555-284447-0007-2508: hyp=['THEREFORE', 'HER', 'MAJESTY', 'PAID', 'NO', 'ATTENTION', 'TO', 'ANYONE', 'AND', 'NO', 'ONE', 'PAID', 'ANY', 'ATTENTION', 'TO', 'HER'] +8555-284447-0008-2509: ref=['RICH', 'JEWELS', 'OF', 'BLUE', 'STONES', 'GLITTERED', 'UPON', 'THEIR', 'PERSONS', 'AND', 'THE', 'ROYAL', 'LADIES', 'WERE', 'FULLY', 'AS', 'GORGEOUS', 'AS', 'THEY', 'WERE', 'HAUGHTY', 'AND', 'OVERBEARING'] +8555-284447-0008-2509: hyp=['RICH', 'JEWELS', 'OF', 'BLUE', 'STONES', 'GLITTERED', 'UPON', 'THEIR', 'PERSONS', 'AND', 'THE', 'ROYAL', 'LADIES', 'WERE', 'FULLY', 'AS', 'GORGEOUS', 'AS', 'THEY', 'WERE', 'HAUGHTY', 'AND', 'OVERBEARING'] +8555-284447-0009-2510: ref=['MORNIN', 'GIRLS', 'HOPE', 'YE', 'FEEL', 'AS', 'WELL', 'AS', 'YE', 'LOOK'] +8555-284447-0009-2510: hyp=['MORNING', 'GIRLS', 'HOPE', 'YOU', 'FEEL', 'AS', 'WELL', 'AS', 'YOU', 'LOOK'] +8555-284447-0010-2511: ref=['CONTROL', 'YOURSELVES', 'MY', 'DEARS', 'REPLIED', 'THE', 'BOOLOOROO', 'THE', 'WORST', 'PUNISHMENT', 'I', 'KNOW', 'HOW', 'TO', 'INFLICT', 'ON', 'ANYONE', 'THIS', 'PRISONER', 'IS', 'ABOUT', 'TO', 'SUFFER', "YOU'LL", 'SEE', 'A', 'VERY', 'PRETTY', 'PATCHING', 'MY', 'ROYAL', 'DAUGHTERS'] +8555-284447-0010-2511: hyp=['CONTROL', 'YOURSELVES', 'MY', 'DEARS', 'REPLIED', 'THE', 'BOOLOOROO', 'THE', 'WORST', 'PUNISHMENT', 'I', 'KNOW', 'HOW', 'TO', 'INFLICT', 'ON', 'ANY', 'ONE', 'THIS', 'PRISONER', 'IS', 'ABOUT', 'TO', 'SUFFER', 'YOU', 'WILL', 'SEE', 'A', 'VERY', 'PRETTY', 'PATCHING', 'MY', 'ROYAL', 'DAUGHTERS'] +8555-284447-0011-2512: ref=['SUPPOSE', "IT'S", 'A', 'FRIEND'] +8555-284447-0011-2512: hyp=['SUPPOSE', "IT'S", 'A', 'FRIEND'] +8555-284447-0012-2513: ref=['THE', 'CAPTAIN', 'SHOOK', 'HIS', 'HEAD'] +8555-284447-0012-2513: hyp=['THE', 'CAPTAIN', 'SHOOK', 'HIS', 'HEAD'] +8555-284447-0013-2514: ref=['WHY', 'YOU', 'SAID', 'TO', 'FETCH', 'THE', 'FIRST', 'LIVING', 'CREATURE', 'WE', 'MET', 'AND', 'THAT', 'WAS', 'THIS', 'BILLYGOAT', 'REPLIED', 'THE', 'CAPTAIN', 'PANTING', 'HARD', 'AS', 'HE', 'HELD', 'FAST', 'TO', 'ONE', 'OF', 'THE', "GOAT'S", 'HORNS'] +8555-284447-0013-2514: hyp=['WHY', 'YOU', 'SENT', 'TO', 'FETCH', 'THE', 'FIRST', 'LIVING', 'CREATURE', 'WE', 'MET', 'AND', 'THAT', 'WAS', 'THE', 'SPILLY', 'GOAT', 'REPLIED', 'THE', 'CAPTAIN', 'PANTING', 'HARD', 'AS', 'HE', 'HELD', 'FAST', 'TO', 'ONE', 'OF', 'THE', "GOAT'S", 'HORNS'] +8555-284447-0014-2515: ref=['THE', 'IDEA', 'OF', 'PATCHING', "CAP'N", 'BILL', 'TO', 'A', 'GOAT', 'WAS', 'VASTLY', 'AMUSING', 'TO', 'HIM', 'AND', 'THE', 'MORE', 'HE', 'THOUGHT', 'OF', 'IT', 'THE', 'MORE', 'HE', 'ROARED', 'WITH', 'LAUGHTER'] +8555-284447-0014-2515: hyp=['THE', 'IDEA', 'OF', 'PATCHING', "CAP'N", 'BILL', 'TO', 'A', 'GOAT', 'WAS', 'VASTLY', 'AMUSING', 'TO', 'HIM', 'AND', 'THE', 'MORE', 'HE', 'THOUGHT', 'OF', 'IT', 'THE', 'MORE', 'HE', 'ROARED', 'WITH', 'LAUGHTER'] +8555-284447-0015-2516: ref=['THEY', 'LOOK', 'SOMETHING', 'ALIKE', 'YOU', 'KNOW', 'SUGGESTED', 'THE', 'CAPTAIN', 'OF', 'THE', 'GUARDS', 'LOOKING', 'FROM', 'ONE', 'TO', 'THE', 'OTHER', 'DOUBTFULLY', 'AND', "THEY'RE", 'NEARLY', 'THE', 'SAME', 'SIZE', 'IF', 'YOU', 'STAND', 'THE', 'GOAT', 'ON', 'HIS', 'HIND', 'LEGS', "THEY'VE", 'BOTH', 'GOT', 'THE', 'SAME', 'STYLE', 'OF', 'WHISKERS', 'AND', "THEY'RE", 'BOTH', 'OF', 'EM', 'OBSTINATE', 'AND', 'DANGEROUS', 'SO', 'THEY', 'OUGHT', 'TO', 'MAKE', 'A', 'GOOD', 'PATCH', 'SPLENDID'] +8555-284447-0015-2516: hyp=['THEY', 'LOOK', 'SOMETHING', 'ALIKE', 'YOU', 'KNOW', 'SUGGESTED', 'THE', 'CAPTAIN', 'OF', 'THE', 'GUARDS', 'LOOKING', 'FROM', 'ONE', 'TO', 'THE', 'OTHER', 'DOUBTFULLY', 'AND', "THEY'RE", 'NEARLY', 'THE', 'SAME', 'SIZE', 'IF', 'YOU', 'STAND', 'THE', 'GOAT', 'ON', 'HIS', 'HIND', 'LEGS', "THEY'VE", 'BOTH', 'GOT', 'THE', 'SAME', 'STYLE', 'OF', 'WHISKERS', 'AND', "THEY'RE", 'BOTH', 'OF', 'THEM', 'OBSTINATE', 'AND', 'DANGEROUS', 'SO', 'THEY', 'OUGHT', 'TO', 'MAKE', 'A', 'GOOD', 'PATCH', 'SPLENDID'] +8555-284447-0016-2517: ref=['FINE', 'GLORIOUS'] +8555-284447-0016-2517: hyp=['FINE', 'GLORIOUS'] +8555-284447-0017-2518: ref=['WHEN', 'THIS', 'HAD', 'BEEN', 'ACCOMPLISHED', 'THE', 'BOOLOOROO', 'LEANED', 'OVER', 'TO', 'TRY', 'TO', 'DISCOVER', 'WHY', 'THE', 'FRAME', 'ROLLED', 'AWAY', 'SEEMINGLY', 'OF', 'ITS', 'OWN', 'ACCORD', 'AND', 'HE', 'WAS', 'THE', 'MORE', 'PUZZLED', 'BECAUSE', 'IT', 'HAD', 'NEVER', 'DONE', 'SUCH', 'A', 'THING', 'BEFORE'] +8555-284447-0017-2518: hyp=['WHEN', 'THIS', 'HAD', 'BEEN', 'ACCOMPLISHED', 'THE', 'BOOLOOROO', 'LEANED', 'OVER', 'TO', 'TRY', 'TO', 'DISCOVER', 'WHY', 'THE', 'FRAME', 'ROLLED', 'AWAY', 'SEEMINGLY', 'OF', 'ITS', 'OWN', 'ACCORD', 'AND', 'HE', 'WAS', 'THE', 'MORE', 'PUZZLED', 'BECAUSE', 'IT', 'HAD', 'NEVER', 'DONE', 'SUCH', 'A', 'THING', 'BEFORE'] +8555-284447-0018-2519: ref=['AT', 'ONCE', 'THE', 'GOAT', 'GAVE', 'A', 'LEAP', 'ESCAPED', 'FROM', 'THE', 'SOLDIERS', 'AND', 'WITH', 'BOWED', 'HEAD', 'RUSHED', 'UPON', 'THE', 'BOOLOOROO'] +8555-284447-0018-2519: hyp=['AT', 'ONCE', 'THE', 'GOAT', 'GAVE', 'A', 'LEAP', 'ESCAPED', 'FROM', 'THE', 'SOLDIERS', 'AND', 'WITH', 'BOWED', 'HEAD', 'RUSHED', 'UPON', 'THE', 'BOOLOOROO'] +8555-284447-0019-2520: ref=['BEFORE', 'ANY', 'COULD', 'STOP', 'HIM', 'HE', 'BUTTED', 'HIS', 'MAJESTY', 'SO', 'FURIOUSLY', 'THAT', 'THE', 'KING', 'SOARED', 'FAR', 'INTO', 'THE', 'AIR', 'AND', 'TUMBLED', 'IN', 'A', 'HEAP', 'AMONG', 'THE', 'BENCHES', 'WHERE', 'HE', 'LAY', 'MOANING', 'AND', 'GROANING'] +8555-284447-0019-2520: hyp=['BEFORE', 'ANY', 'COULD', 'STOP', 'HIM', 'HE', 'BUTTED', 'HIS', 'MAJESTY', 'SO', 'FURIOUSLY', 'THAT', 'THE', 'KING', 'SOARED', 'FAR', 'INTO', 'THE', 'AIR', 'AND', 'TUMBLED', 'IN', 'A', 'HEAP', 'AMONG', 'THE', 'BENCHES', 'WHERE', 'HE', 'LAY', 'MOANING', 'AND', 'GROANING'] +8555-284447-0020-2521: ref=['THE', "GOAT'S", 'WARLIKE', 'SPIRIT', 'WAS', 'ROUSED', 'BY', 'THIS', 'SUCCESSFUL', 'ATTACK'] +8555-284447-0020-2521: hyp=['THE', "GOAT'S", 'WARLIKE', 'SPIRIT', 'WAS', 'ROUSED', 'BY', 'THIS', 'SUCCESSFUL', 'ATTACK'] +8555-284447-0021-2522: ref=['THEN', 'THEY', 'SPED', 'IN', 'GREAT', 'HASTE', 'FOR', 'THE', 'DOOR', 'AND', 'THE', 'GOAT', 'GAVE', 'A', 'FINAL', 'BUTT', 'THAT', 'SENT', 'THE', 'ROW', 'OF', 'ROYAL', 'LADIES', 'ALL', 'DIVING', 'INTO', 'THE', 'CORRIDOR', 'IN', 'ANOTHER', 'TANGLE', 'WHEREUPON', 'THEY', 'SHRIEKED', 'IN', 'A', 'MANNER', 'THAT', 'TERRIFIED', 'EVERYONE', 'WITHIN', 'SOUND', 'OF', 'THEIR', 'VOICES'] +8555-284447-0021-2522: hyp=['THEN', 'THEY', 'SPED', 'IN', 'GREAT', 'HASTE', 'FOR', 'THE', 'DOOR', 'AND', 'THE', 'GOAT', 'GAVE', 'A', 'FINAL', 'BUTT', 'THAT', 'SENT', 'THE', 'ROW', 'OF', 'ROYAL', 'LADIES', 'ALL', 'DIVING', 'INTO', 'THE', 'CORRIDOR', 'IN', 'ANOTHER', 'TANGLE', 'WHEREUPON', 'THEY', 'SHRIEKED', 'IN', 'A', 'MANNER', 'THAT', 'TERRIFIED', 'EVERY', 'ONE', 'WITHIN', 'SOUND', 'OF', 'THEIR', 'VOICES'] +8555-284447-0022-2523: ref=['I', 'HAD', 'A', 'NOTION', 'IT', 'WAS', 'YOU', 'MATE', 'AS', 'SAVED', 'ME', 'FROM', 'THE', 'KNIFE'] +8555-284447-0022-2523: hyp=['I', 'HAD', 'A', 'NOTION', 'IT', 'WAS', 'YOU', 'MADE', 'TO', 'SEE', 'ME', 'FROM', 'THE', 'KNIFE'] +8555-284447-0023-2524: ref=['I', "COULDN'T", 'SHIVER', 'MUCH', 'BEIN', 'BOUND', 'SO', 'TIGHT', 'BUT', 'WHEN', "I'M", 'LOOSE', 'I', 'MEAN', 'TO', 'HAVE', 'JUS', 'ONE', 'GOOD', 'SHIVER', 'TO', 'RELIEVE', 'MY', "FEELIN'S"] +8555-284447-0023-2524: hyp=['I', "COULDN'T", 'SHIVER', 'MUCH', 'BEING', 'BOUND', 'SO', 'TIGHT', 'BUT', 'WHEN', "I'M", 'LOOSE', 'I', 'MEAN', 'TO', 'HAVE', 'JUST', 'ONE', 'GOOD', 'SHIVER', 'TO', 'RELIEVE', 'MY', 'FEELINGS'] +8555-284447-0024-2525: ref=['COME', 'AND', 'GET', 'THE', 'BOOLOOROO', 'SHE', 'SAID', 'GOING', 'TOWARD', 'THE', 'BENCHES'] +8555-284447-0024-2525: hyp=['COME', 'AND', 'GET', 'THE', 'BOOLOOROO', 'SHE', 'SAID', 'GOING', 'TOWARD', 'THE', 'BENCHES'] +8555-284449-0000-2526: ref=['SO', 'THEY', 'WERE', 'QUITE', 'WILLING', 'TO', 'OBEY', 'THE', 'ORDERS', 'OF', 'THEIR', 'GIRL', 'QUEEN', 'AND', 'IN', 'A', 'SHORT', 'TIME', 'THE', 'BLASTS', 'OF', 'TRUMPETS', 'AND', 'ROLL', 'OF', 'DRUMS', 'AND', 'CLASHING', 'OF', 'CYMBALS', 'TOLD', 'TROT', 'AND', "CAP'N", 'BILL', 'THAT', 'THE', 'BLUE', 'BANDS', 'HAD', 'ASSEMBLED', 'BEFORE', 'THE', 'PALACE'] +8555-284449-0000-2526: hyp=['SO', 'THEY', 'WERE', 'QUITE', 'WILLING', 'TO', 'OBEY', 'THE', 'ORDERS', 'OF', 'THEIR', 'GIRL', 'QUEEN', 'AND', 'IN', 'A', 'SHORT', 'TIME', 'THE', 'BLAST', 'OF', 'TRUMPETS', 'AND', 'ROLL', 'OF', 'DRUMS', 'AND', 'CLASHING', 'OF', 'CYMBALS', 'TOLD', 'TROT', 'AND', "CAP'N", 'BILL', 'THAT', 'THE', 'BLUE', 'BANDS', 'HAD', 'ASSEMBLED', 'BEFORE', 'THE', 'PALACE'] +8555-284449-0001-2527: ref=['THEN', 'THEY', 'ALL', 'MARCHED', 'OUT', 'A', 'LITTLE', 'WAY', 'INTO', 'THE', 'FIELDS', 'AND', 'FOUND', 'THAT', 'THE', 'ARMY', 'OF', 'PINKIES', 'HAD', 'ALREADY', 'FORMED', 'AND', 'WAS', 'ADVANCING', 'STEADILY', 'TOWARD', 'THEM'] +8555-284449-0001-2527: hyp=['THEN', 'THEY', 'ALL', 'MARCHED', 'OUT', 'A', 'LITTLE', 'WAY', 'INTO', 'THE', 'FIELDS', 'AND', 'FOUND', 'THAT', 'THE', 'ARMY', 'OF', 'PINKIES', 'HAD', 'ALREADY', 'FORMED', 'AND', 'WAS', 'ADVANCING', 'STEADILY', 'TOWARD', 'THEM'] +8555-284449-0002-2528: ref=['AT', 'THE', 'HEAD', 'OF', 'THE', 'PINKIES', 'WERE', 'GHIP', 'GHISIZZLE', 'AND', 'BUTTON', 'BRIGHT', 'WHO', 'HAD', 'THE', 'PARROT', 'ON', 'HIS', 'SHOULDER', 'AND', 'THEY', 'WERE', 'SUPPORTED', 'BY', 'CAPTAIN', 'CORALIE', 'AND', 'CAPTAIN', 'TINTINT', 'AND', 'ROSALIE', 'THE', 'WITCH'] +8555-284449-0002-2528: hyp=['AT', 'THE', 'HEAD', 'OF', 'THE', 'PINKIES', 'WERE', 'GHIP', 'GHISIZZLE', 'AND', 'BUTTON', 'BRIGHT', 'WHO', 'HAD', 'THE', 'PARROT', 'ON', 'HIS', 'SHOULDER', 'AND', 'THEY', 'WERE', 'SUPPORTED', 'BY', 'CAPTAIN', 'CORALIE', 'AND', 'CAPTAIN', 'TINTENT', 'AND', 'ROSALIE', 'THE', 'WITCH'] +8555-284449-0003-2529: ref=['WHEN', 'THE', 'BLUESKINS', 'SAW', 'GHIP', 'GHISIZZLE', 'THEY', 'RAISED', 'ANOTHER', 'GREAT', 'SHOUT', 'FOR', 'HE', 'WAS', 'THE', 'FAVORITE', 'OF', 'THE', 'SOLDIERS', 'AND', 'VERY', 'POPULAR', 'WITH', 'ALL', 'THE', 'PEOPLE'] +8555-284449-0003-2529: hyp=['WHEN', 'THE', 'BLUESKINS', 'SAW', 'GHIP', 'GHISIZZLE', 'THEY', 'RAISED', 'ANOTHER', 'GREAT', 'SHOUT', 'FOR', 'HE', 'WAS', 'THE', 'FAVORITE', 'OF', 'THE', 'SOLDIERS', 'AND', 'VERY', 'POPULAR', 'WITH', 'ALL', 'THE', 'PEOPLE'] +8555-284449-0004-2530: ref=['SINCE', 'LAST', 'THURSDAY', 'I', 'GHIP', 'GHISIZZLE', 'HAVE', 'BEEN', 'THE', 'LAWFUL', 'BOOLOOROO', 'OF', 'THE', 'BLUE', 'COUNTRY', 'BUT', 'NOW', 'THAT', 'YOU', 'ARE', 'CONQUERED', 'BY', 'QUEEN', 'TROT', 'I', 'SUPPOSE', 'I', 'AM', 'CONQUERED', 'TOO', 'AND', 'YOU', 'HAVE', 'NO', 'BOOLOOROO', 'AT', 'ALL'] +8555-284449-0004-2530: hyp=['SINCE', 'LAST', 'THURSDAY', 'I', 'GHIP', 'GHISIZZLE', 'HAVE', 'BEEN', 'THE', 'LAWFUL', 'BOOLOOROO', 'OF', 'THE', 'BLUE', 'COUNTRY', 'BUT', 'NOW', 'THAT', 'YOU', 'ARE', 'CONQUERED', 'BY', 'QUEEN', 'TROT', 'I', 'SUPPOSE', 'I', 'AM', 'CONQUERED', 'TOO', 'AND', 'YOU', 'HAVE', 'NO', 'BOOLOOROO', 'AT', 'ALL'] +8555-284449-0005-2531: ref=['WHEN', 'HE', 'FINISHED', 'SHE', 'SAID', 'CHEERFULLY'] +8555-284449-0005-2531: hyp=['WHEN', 'HE', 'FINISHED', 'SHE', 'SAID', 'CHEERFULLY'] +8555-284449-0006-2532: ref=["DON'T", 'WORRY', 'SIZZLE', 'DEAR', "IT'LL", 'ALL', 'COME', 'RIGHT', 'PRETTY', 'SOON'] +8555-284449-0006-2532: hyp=["DON'T", 'WORRY', 'SIZZLE', 'DEAR', "IT'LL", 'ALL', 'COME', 'RIGHT', 'PRETTY', 'SOON'] +8555-284449-0007-2533: ref=['NOW', 'THEN', "LET'S", 'ENTER', 'THE', 'CITY', 'AN', 'ENJOY', 'THE', 'GRAND', 'FEAST', "THAT'S", 'BEING', 'COOKED', "I'M", 'NEARLY', 'STARVED', 'MYSELF', 'FOR', 'THIS', 'CONQUERIN', 'KINGDOMS', 'IS', 'HARD', 'WORK'] +8555-284449-0007-2533: hyp=['NOW', 'THEN', "LET'S", 'ENTER', 'THE', 'CITY', 'AND', 'ENJOY', 'THE', 'GREAT', 'FEAST', "THAT'S", 'BEING', 'COOKED', "I'M", 'NEARLY', 'STARVED', 'MYSELF', 'FOR', 'THIS', 'CONQUERING', "KINGDOM'S", 'IS', 'HARD', 'WORK'] +8555-284449-0008-2534: ref=['THEN', 'SHE', 'GAVE', 'ROSALIE', 'BACK', 'HER', 'MAGIC', 'RING', 'THANKING', 'THE', 'KIND', 'WITCH', 'FOR', 'ALL', 'SHE', 'HAD', 'DONE', 'FOR', 'THEM'] +8555-284449-0008-2534: hyp=['THEN', 'SHE', 'GAVE', 'ROSALIE', 'BACK', 'HER', 'MAGIC', 'RING', 'THANKING', 'THE', 'KIND', 'WITCH', 'FOR', 'ALL', 'SHE', 'HAD', 'DONE', 'FOR', 'THEM'] +8555-284449-0009-2535: ref=['YOU', 'ARE', 'MATE', 'REPLIED', 'THE', 'SAILOR'] +8555-284449-0009-2535: hyp=['YOU', 'ARE', 'MATE', 'REPLIED', 'THE', 'SAILOR'] +8555-284449-0010-2536: ref=['IT', 'WILL', 'BE', 'SUCH', 'A', 'SATISFACTION'] +8555-284449-0010-2536: hyp=['IT', 'WILL', 'BE', 'SUCH', 'A', 'SATISFACTION'] +8555-284449-0011-2537: ref=['THE', 'GUARDS', 'HAD', 'A', 'TERRIBLE', 'STRUGGLE', 'WITH', 'THE', 'GOAT', 'WHICH', 'WAS', 'LOOSE', 'IN', 'THE', 'ROOM', 'AND', 'STILL', 'WANTED', 'TO', 'FIGHT', 'BUT', 'FINALLY', 'THEY', 'SUBDUED', 'THE', 'ANIMAL', 'AND', 'THEN', 'THEY', 'TOOK', 'THE', 'BOOLOOROO', 'OUT', 'OF', 'THE', 'FRAME', 'HE', 'WAS', 'TIED', 'IN', 'AND', 'BROUGHT', 'BOTH', 'HIM', 'AND', 'THE', 'GOAT', 'BEFORE', 'QUEEN', 'TROT', 'WHO', 'AWAITED', 'THEM', 'IN', 'THE', 'THRONE', 'ROOM', 'OF', 'THE', 'PALACE'] +8555-284449-0011-2537: hyp=['THE', 'GUARDS', 'HAD', 'A', 'TERRIBLE', 'STRUGGLE', 'WITH', 'THE', 'GOAT', 'WHICH', 'WAS', 'LOOSE', 'IN', 'THE', 'ROOM', 'AND', 'STILL', 'WANTED', 'TO', 'FIGHT', 'BUT', 'FINALLY', 'THEY', 'SUBDUED', 'THE', 'ANIMAL', 'AND', 'THEN', 'THEY', 'TOOK', 'THE', 'BOOLOOROO', 'OUT', 'OF', 'THE', 'FRAME', 'HE', 'WAS', 'TIED', 'IN', 'AND', 'BROUGHT', 'BOTH', 'HIM', 'AND', 'THE', 'GOAT', 'BEFORE', 'QUEEN', 'TROT', 'WHO', 'AWAITED', 'THEM', 'IN', 'THE', 'THRONE', 'ROOM', 'OF', 'THE', 'PALACE'] +8555-284449-0012-2538: ref=["I'LL", 'GLADLY', 'DO', 'THAT', 'PROMISED', 'THE', 'NEW', 'BOOLOOROO', 'AND', "I'LL", 'FEED', 'THE', 'HONORABLE', 'GOAT', 'ALL', 'THE', 'SHAVINGS', 'AND', 'LEATHER', 'AND', 'TIN', 'CANS', 'HE', 'CAN', 'EAT', 'BESIDES', 'THE', 'GRASS'] +8555-284449-0012-2538: hyp=["I'LL", 'GLADLY', 'DO', 'THAT', 'PROMISED', 'THE', 'NEW', 'BOOLOOROO', 'AND', "I'LL", 'FEED', 'THE', 'HONORABLE', 'GOAT', 'ALL', 'THE', 'SHAVINGS', 'AND', 'LEATHER', 'AND', 'TIN', 'CANS', 'HE', 'CAN', 'EAT', 'BESIDES', 'THE', 'GRASS'] +8555-284449-0013-2539: ref=['SCUSE', 'ME', 'SAID', 'TROT', 'I', 'NEGLECTED', 'TO', 'TELL', 'YOU', 'THAT', "YOU'RE", 'NOT', 'THE', 'BOOLOOROO', 'ANY', 'MORE'] +8555-284449-0013-2539: hyp=['EXCUSE', 'ME', 'SAID', 'TROT', 'I', 'NEGLECTED', 'TO', 'TELL', 'YOU', 'THAT', "YOU'RE", 'NOT', 'THE', 'BOOLOOROO', 'ANY', 'MORE'] +8555-284449-0014-2540: ref=['THE', 'FORMER', 'BOOLOOROO', 'GROANED'] +8555-284449-0014-2540: hyp=['THE', 'FORMER', 'BOOLOOROO', 'GROANED'] +8555-284449-0015-2541: ref=["I'LL", 'NOT', 'BE', 'WICKED', 'ANY', 'MORE', 'SIGHED', 'THE', 'OLD', 'BOOLOOROO', "I'LL", 'REFORM'] +8555-284449-0015-2541: hyp=["I'LL", 'NOT', 'BE', 'WICKED', 'ANY', 'MORE', 'SIGHED', 'THE', 'OLD', 'BOOLOOROO', "I'LL", 'REFORM'] +8555-284449-0016-2542: ref=['AS', 'A', 'PRIVATE', 'CITIZEN', 'I', 'SHALL', 'BE', 'A', 'MODEL', 'OF', 'DEPORTMENT', 'BECAUSE', 'IT', 'WOULD', 'BE', 'DANGEROUS', 'TO', 'BE', 'OTHERWISE'] +8555-284449-0016-2542: hyp=['AS', 'A', 'PRIVATE', 'CITIZEN', 'I', 'SHALL', 'BE', 'A', 'MODEL', 'OF', 'DEPORTMENT', 'BECAUSE', 'IT', 'WOULD', 'BE', 'DANGEROUS', 'TO', 'BE', 'OTHERWISE'] +8555-284449-0017-2543: ref=['WHEN', 'FIRST', 'THEY', 'ENTERED', 'THE', 'THRONE', 'ROOM', 'THEY', 'TRIED', 'TO', 'BE', 'AS', 'HAUGHTY', 'AND', 'SCORNFUL', 'AS', 'EVER', 'BUT', 'THE', 'BLUES', 'WHO', 'WERE', 'ASSEMBLED', 'THERE', 'ALL', 'LAUGHED', 'AT', 'THEM', 'AND', 'JEERED', 'THEM', 'FOR', 'THERE', 'WAS', 'NOT', 'A', 'SINGLE', 'PERSON', 'IN', 'ALL', 'THE', 'BLUE', 'COUNTRY', 'WHO', 'LOVED', 'THE', 'PRINCESSES', 'THE', 'LEAST', 'LITTLE', 'BIT'] +8555-284449-0017-2543: hyp=['WHEN', 'FIRST', 'THEY', 'ENTERED', 'THE', 'THRONE', 'ROOM', 'THEY', 'TRIED', 'TO', 'BE', 'AS', 'HAUGHTY', 'AND', 'SCORNFUL', 'AS', 'EVER', 'BUT', 'THE', 'BLUES', 'WHO', 'WERE', 'ASSEMBLED', 'THERE', 'ALL', 'LAUGHED', 'AT', 'THEM', 'AND', 'JEERED', 'THEM', 'FOR', 'THERE', 'WAS', 'NOT', 'A', 'SINGLE', 'PERSON', 'IN', 'ALL', 'THE', 'BLUE', 'COUNTRY', 'WHO', 'LOVED', 'THE', 'PRINCESSES', 'THE', 'LEAST', 'LITTLE', 'BIT'] +8555-284449-0018-2544: ref=['SO', 'GHIP', 'GHISIZZLE', 'ORDERED', 'THE', 'CAPTAIN', 'TO', 'TAKE', 'A', 'FILE', 'OF', 'SOLDIERS', 'AND', 'ESCORT', 'THE', 'RAVING', 'BEAUTIES', 'TO', 'THEIR', 'NEW', 'HOME'] +8555-284449-0018-2544: hyp=['SO', 'GHIP', 'GHISIZZLE', 'ORDERED', 'THE', 'CAPTAIN', 'TO', 'TAKE', 'A', 'FILE', 'OF', 'SOLDIERS', 'AND', 'ESCORT', 'THE', 'RAVING', 'BEAUTIES', 'TO', 'THEIR', 'NEW', 'HOME'] +8555-284449-0019-2545: ref=['THAT', 'EVENING', 'TROT', 'GAVE', 'A', 'GRAND', 'BALL', 'IN', 'THE', 'PALACE', 'TO', 'WHICH', 'THE', 'MOST', 'IMPORTANT', 'OF', 'THE', 'PINKIES', 'AND', 'THE', 'BLUESKINS', 'WERE', 'INVITED'] +8555-284449-0019-2545: hyp=['THAT', 'EVENING', 'TROT', 'GAVE', 'A', 'GRAND', 'BALL', 'IN', 'THE', 'PALACE', 'TO', 'WHICH', 'THE', 'MOST', 'IMPORTANT', 'OF', 'THE', 'PINKIES', 'AND', 'THE', 'BLUESKINS', 'WERE', 'INVITED'] +8555-284449-0020-2546: ref=['THE', 'COMBINED', 'BANDS', 'OF', 'BOTH', 'THE', 'COUNTRIES', 'PLAYED', 'THE', 'MUSIC', 'AND', 'A', 'FINE', 'SUPPER', 'WAS', 'SERVED'] +8555-284449-0020-2546: hyp=['THE', 'COMBINED', 'BANDS', 'OF', 'BOTH', 'THE', 'COUNTRIES', 'PLAYED', 'THE', 'MUSIC', 'AND', 'A', 'FINE', 'SUPPER', 'WAS', 'SERVED'] +8555-292519-0000-2547: ref=['BRIGHTER', 'THAN', 'EARLY', "DAWN'S", 'MOST', 'BRILLIANT', 'DYE', 'ARE', 'BLOWN', 'CLEAR', 'BANDS', 'OF', 'COLOR', 'THROUGH', 'THE', 'SKY', 'THAT', 'SWIRL', 'AND', 'SWEEP', 'AND', 'MEET', 'TO', 'BREAK', 'AND', 'FOAM', 'LIKE', 'RAINBOW', 'VEILS', 'UPON', 'A', "BUBBLE'S", 'DOME'] +8555-292519-0000-2547: hyp=['BRIGHTER', 'THAN', 'EARLY', "DAWN'S", 'MOST', 'BRILLIANT', 'DYE', 'ARE', 'BLOWN', 'CLEAR', 'BANDS', 'OF', 'COLOR', 'THROUGH', 'THE', 'SKY', 'THAT', 'SWIRL', 'AND', 'SWEEP', 'AND', 'MEET', 'TO', 'BREAK', 'AND', 'FOAM', 'LIKE', 'RAINBOW', 'VEILS', 'UPON', 'A', "BUBBLE'S", 'DOME'] +8555-292519-0001-2548: ref=['GUIDED', 'BY', 'YOU', 'HOW', 'WE', 'MIGHT', 'STROLL', 'TOWARDS', 'DEATH', 'OUR', 'ONLY', 'MUSIC', 'ONE', "ANOTHER'S", 'BREATH', 'THROUGH', 'GARDENS', 'INTIMATE', 'WITH', 'HOLLYHOCKS', 'WHERE', 'SILENT', 'POPPIES', 'BURN', 'BETWEEN', 'THE', 'ROCKS', 'BY', 'POOLS', 'WHERE', 'BIRCHES', 'BEND', 'TO', 'CONFIDANTS', 'ABOVE', 'GREEN', 'WATERS', 'SCUMMED', 'WITH', 'LILY', 'PLANTS'] +8555-292519-0001-2548: hyp=['GUIDED', 'BY', 'YOU', 'HOW', 'WE', 'MIGHT', 'STROLL', 'TOWARDS', 'DEATH', 'OUR', 'ONLY', 'MUSIC', 'ONE', "ANOTHER'S", 'BREATH', 'THROUGH', 'GARDENS', 'INTIMATE', 'WITH', 'HOLLYHOCKS', 'WHERE', 'SILENT', 'POPPIES', 'BURN', 'BETWEEN', 'THE', 'ROCKS', 'BY', 'POOLS', 'WHERE', 'BIRCHES', 'BEND', 'TO', 'CONFIDANTS', 'ABOVE', 'GREEN', 'WATERS', 'SKUMMED', 'WITH', 'LILY', 'PLANTS'] +8555-292519-0002-2549: ref=['VENICE'] +8555-292519-0002-2549: hyp=['VENICE'] +8555-292519-0003-2550: ref=['IN', 'A', 'SUNSET', 'GLOWING', 'OF', 'CRIMSON', 'AND', 'GOLD', 'SHE', 'LIES', 'THE', 'GLORY', 'OF', 'THE', 'WORLD', 'A', 'BEACHED', "KING'S", 'GALLEY', 'WHOSE', 'SAILS', 'ARE', 'FURLED', 'WHO', 'IS', 'HUNG', 'WITH', 'TAPESTRIES', 'RICH', 'AND', 'OLD'] +8555-292519-0003-2550: hyp=['IN', 'A', 'SUNSET', 'GLOWING', 'OF', 'CRIMSON', 'AND', 'GOLD', 'SHE', 'LIES', 'THE', 'GLORY', 'OF', 'THE', 'WORLD', 'A', 'BEACHED', "KING'S", 'GALLEY', 'WHOSE', 'SAILS', 'ARE', 'FURLED', 'WHO', 'IS', 'HUNG', 'WITH', 'TAPESTRIES', 'RICH', 'AND', 'OLD'] +8555-292519-0004-2551: ref=['THE', 'PITY', 'THAT', 'WE', 'MUST', 'COME', 'AND', 'GO'] +8555-292519-0004-2551: hyp=['THE', 'PITY', 'THAT', 'WE', 'MUST', 'COME', 'AND', 'GO'] +8555-292519-0005-2552: ref=['WHILE', 'THE', 'OLD', 'GOLD', 'AND', 'THE', 'MARBLE', 'STAYS', 'FOREVER', 'GLEAMING', 'ITS', 'SOFT', 'STRONG', 'BLAZE', 'CALM', 'IN', 'THE', 'EARLY', 'EVENING', 'GLOW'] +8555-292519-0005-2552: hyp=['WHILE', 'THE', 'OLD', 'GOLD', 'IN', 'THE', 'MARBLE', 'STAYS', 'FOR', 'EVER', 'GLEAMING', 'ITS', 'SOFT', 'STRONG', 'BLAZE', 'CALM', 'IN', 'THE', 'EARLY', 'EVENING', 'GLOW'] +8555-292519-0006-2553: ref=['THE', 'PLEASANT', 'GRAVEYARD', 'OF', 'MY', 'SOUL', 'WITH', 'SENTIMENTAL', 'CYPRESS', 'TREES', 'AND', 'FLOWERS', 'IS', 'FILLED', 'THAT', 'I', 'MAY', 'STROLL', 'IN', 'MEDITATION', 'AT', 'MY', 'EASE'] +8555-292519-0006-2553: hyp=['THE', 'PLEASANT', 'GRAVEYARD', 'OF', 'MY', 'SOUL', 'WITH', 'SENTIMENTAL', 'CYPRESS', 'TREES', 'AND', 'FLOWERS', 'IS', 'FILLED', 'THAT', 'I', 'MAY', 'STROLL', 'IN', 'MEDITATION', 'AT', 'MY', 'EASE'] +8555-292519-0007-2554: ref=['IT', 'IS', 'MY', 'HEART', 'HUNG', 'IN', 'THE', 'SKY', 'AND', 'NO', 'CLOUDS', 'EVER', 'FLOAT', 'BETWEEN', 'THE', 'GRAVE', 'FLOWERS', 'AND', 'MY', 'HEART', 'ON', 'HIGH'] +8555-292519-0007-2554: hyp=['IT', 'IS', 'MY', 'HEART', 'HUNG', 'IN', 'THE', 'SKY', 'AND', 'NO', 'CLOUDS', 'EVER', 'FLOAT', 'BETWEEN', 'THE', 'GRAVE', 'FLOWERS', 'AND', 'MY', 'HEART', 'ON', 'HIGH'] +8555-292519-0008-2555: ref=['OVER', 'THE', 'TRACK', 'LINED', 'CITY', 'STREET', 'THE', 'YOUNG', 'MEN', 'THE', 'GRINNING', 'MEN', 'PASS'] +8555-292519-0008-2555: hyp=['OVER', 'THE', 'TRACK', 'LINED', 'CITY', 'STREET', 'THE', 'YOUNG', 'MEN', 'THE', 'GRINNING', 'MAN', 'PASS'] +8555-292519-0009-2556: ref=['HO', 'YE', 'SAILS', 'THAT', 'SEEM', 'TO', 'WANDER', 'IN', 'DREAM', 'FILLED', 'MEADOWS', 'SAY', 'IS', 'THE', 'SHORE', 'WHERE', 'I', 'STAND', 'THE', 'ONLY', 'FIELD', 'OF', 'STRUGGLE', 'OR', 'ARE', 'YE', 'HIT', 'AND', 'BATTERED', 'OUT', 'THERE', 'BY', 'WAVES', 'AND', 'WIND', 'GUSTS', 'AS', 'YE', 'TACK', 'OVER', 'A', 'CLASHING', 'SEA', 'OF', 'WATERY', 'ECHOES'] +8555-292519-0009-2556: hyp=['HO', 'YE', 'SAILS', 'THAT', 'SEEM', 'TO', 'WANDER', 'IN', 'DREAM', 'FILLED', 'MEADOWS', 'SAY', 'IS', 'THIS', 'SHORE', 'WHERE', 'I', 'STAND', 'THE', 'ONLY', 'FIELD', 'OF', 'STRUGGLE', 'OR', 'ARE', 'YE', 'HIT', 'AND', 'BATTERED', 'OUT', 'THERE', 'BY', 'WAVES', 'AND', 'WIND', 'GUSTS', 'AS', 'YE', 'TACK', 'OVER', 'A', 'CLASHING', 'SEA', 'OF', 'WATERY', 'ECHOES'] +8555-292519-0010-2557: ref=['OLD', 'DANCES', 'ARE', 'SIMPLIFIED', 'OF', 'THEIR', 'YEARNING', 'BLEACHED', 'BY', 'TIME'] +8555-292519-0010-2557: hyp=['OLD', 'DANCES', 'ARE', 'SIMPLIFIED', 'OF', 'THEIR', 'YEARNING', 'BLEACHED', 'BY', 'TIME'] +8555-292519-0011-2558: ref=['HE', 'HAD', 'GOT', 'INTO', 'HER', 'COURTYARD'] +8555-292519-0011-2558: hyp=['HE', 'HAD', 'GOT', 'INTO', 'HER', 'COURTYARD'] +8555-292519-0012-2559: ref=['THROUGH', 'THE', 'BLACK', 'NIGHT', 'RAIN', 'HE', 'SANG', 'TO', 'HER', 'WINDOW', 'BARS'] +8555-292519-0012-2559: hyp=['THROUGH', 'THE', 'BLACK', 'NIGHT', 'RAIN', 'HE', 'SANG', 'TO', 'HER', 'WINDOW', 'BARS'] +8555-292519-0013-2560: ref=['THAT', 'WAS', 'BUT', 'RUSTLING', 'OF', 'DRIPPING', 'PLANTS', 'IN', 'THE', 'DARK'] +8555-292519-0013-2560: hyp=['THAT', 'WAS', 'BUT', 'RUSTLING', 'OF', 'DRIPPING', 'PLANTS', 'IN', 'THE', 'DARK'] +8555-292519-0014-2561: ref=['SHE', 'WAS', 'ALONE', 'THAT', 'NIGHT'] +8555-292519-0014-2561: hyp=['SHE', 'WAS', 'ALONE', 'THAT', 'NIGHT'] +8555-292519-0015-2562: ref=['HE', 'HAD', 'BROKEN', 'INTO', 'HER', 'COURTYARD'] +8555-292519-0015-2562: hyp=['HE', 'HAD', 'BROKEN', 'INTO', 'HER', 'COURTYARD'] +908-157963-0000-2563: ref=['TO', 'FADE', 'AWAY', 'LIKE', 'MORNING', 'BEAUTY', 'FROM', 'HER', 'MORTAL', 'DAY', 'DOWN', 'BY', 'THE', 'RIVER', 'OF', 'ADONA', 'HER', 'SOFT', 'VOICE', 'IS', 'HEARD', 'AND', 'THUS', 'HER', 'GENTLE', 'LAMENTATION', 'FALLS', 'LIKE', 'MORNING', 'DEW'] +908-157963-0000-2563: hyp=['TO', 'FADE', 'AWAY', 'LIKE', 'MORNING', 'BEAUTY', 'FROM', 'HER', 'MORTAL', 'DAY', 'DOWN', 'BY', 'THE', 'RIVER', 'OF', 'ADONNA', 'HER', 'SOFT', 'VOICE', 'IS', 'HEARD', 'AND', 'THUS', 'HER', 'GENTLE', 'LAMENTATION', 'FALLS', 'LIKE', 'MORNING', 'DEW'] +908-157963-0001-2564: ref=['O', 'LIFE', 'OF', 'THIS', 'OUR', 'SPRING'] +908-157963-0001-2564: hyp=['O', 'LIFE', 'OF', 'THIS', 'OUR', 'SPRING'] +908-157963-0002-2565: ref=['WHY', 'FADES', 'THE', 'LOTUS', 'OF', 'THE', 'WATER'] +908-157963-0002-2565: hyp=['WHY', 'FADES', 'THE', 'LOTUS', 'OF', 'THE', 'WATER'] +908-157963-0003-2566: ref=['WHY', 'FADE', 'THESE', 'CHILDREN', 'OF', 'THE', 'SPRING'] +908-157963-0003-2566: hyp=['WHY', 'FADE', 'THESE', 'CHILDREN', 'OF', 'THE', 'SPRING'] +908-157963-0004-2567: ref=['THEL', 'IS', 'LIKE', 'A', 'WATRY', 'BOW', 'AND', 'LIKE', 'A', 'PARTING', 'CLOUD', 'LIKE', 'A', 'REFLECTION', 'IN', 'A', 'GLASS', 'LIKE', 'SHADOWS', 'IN', 'THE', 'WATER', 'LIKE', 'DREAMS', 'OF', 'INFANTS', 'LIKE', 'A', 'SMILE', 'UPON', 'AN', 'INFANTS', 'FACE'] +908-157963-0004-2567: hyp=['FELL', 'IS', 'LIKE', 'A', 'WATERY', 'BOW', 'AND', 'LIKE', 'A', 'PARTING', 'CLOUD', 'LIKE', 'A', 'REFLECTION', 'IN', 'A', 'GLASS', 'LIKE', 'SHADOWS', 'IN', 'THE', 'WATER', 'LIKE', 'DREAMS', 'OF', 'INFANTS', 'LIKE', 'A', 'SMILE', 'UPON', 'AN', "INFANT'S", 'FACE'] +908-157963-0005-2568: ref=['LIKE', 'THE', 'DOVES', 'VOICE', 'LIKE', 'TRANSIENT', 'DAY', 'LIKE', 'MUSIC', 'IN', 'THE', 'AIR', 'AH'] +908-157963-0005-2568: hyp=['LIKE', 'THE', "DOVE'S", 'VOICE', 'LIKE', 'TRANSIENT', 'DAY', 'LIKE', 'MUSIC', 'IN', 'THE', 'AIR', 'AH'] +908-157963-0006-2569: ref=['AND', 'GENTLE', 'SLEEP', 'THE', 'SLEEP', 'OF', 'DEATH', 'AND', 'GENTLY', 'HEAR', 'THE', 'VOICE', 'OF', 'HIM', 'THAT', 'WALKETH', 'IN', 'THE', 'GARDEN', 'IN', 'THE', 'EVENING', 'TIME'] +908-157963-0006-2569: hyp=['AND', 'GENTLE', 'SLEEP', 'THE', 'SLEEP', 'OF', 'DEATH', 'AND', 'GENTLY', 'HEAR', 'THE', 'VOICE', 'OF', 'HIM', 'THAT', 'WALKETH', 'IN', 'THE', 'GARDEN', 'IN', 'THE', 'EVENING', 'TIME'] +908-157963-0007-2570: ref=['THE', 'LILLY', 'OF', 'THE', 'VALLEY', 'BREATHING', 'IN', 'THE', 'HUMBLE', 'GRASS', 'ANSWERD', 'THE', 'LOVELY', 'MAID', 'AND', 'SAID', 'I', 'AM', 'A', 'WATRY', 'WEED', 'AND', 'I', 'AM', 'VERY', 'SMALL', 'AND', 'LOVE', 'TO', 'DWELL', 'IN', 'LOWLY', 'VALES', 'SO', 'WEAK', 'THE', 'GILDED', 'BUTTERFLY', 'SCARCE', 'PERCHES', 'ON', 'MY', 'HEAD', 'YET', 'I', 'AM', 'VISITED', 'FROM', 'HEAVEN', 'AND', 'HE', 'THAT', 'SMILES', 'ON', 'ALL', 'WALKS', 'IN', 'THE', 'VALLEY', 'AND', 'EACH', 'MORN', 'OVER', 'ME', 'SPREADS', 'HIS', 'HAND', 'SAYING', 'REJOICE', 'THOU', 'HUMBLE', 'GRASS', 'THOU', 'NEW', 'BORN', 'LILY', 'FLOWER'] +908-157963-0007-2570: hyp=['THE', 'LILY', 'OF', 'THE', 'VALLEY', 'BREATHING', 'IN', 'THE', 'HUMBLE', 'GRASS', 'ANSWERED', 'THE', 'LOVELY', 'MAIDEN', 'SAID', 'I', 'AM', 'A', 'WATERYED', 'AND', 'I', 'AM', 'VERY', 'SMALL', 'AND', 'LOVE', 'TO', 'DWELL', 'IN', 'LOWLY', 'VALES', 'SO', 'WEAK', 'THE', 'GILDED', 'BUTTERFLY', 'SCARCE', 'PERCHES', 'ON', 'MY', 'HEAD', 'YET', 'I', 'AM', 'VISITED', 'FROM', 'HEAVEN', 'AND', 'HE', 'THAT', 'SMILES', 'ON', 'ALL', 'WALKS', 'IN', 'THE', 'VALLEY', 'AND', 'EACH', 'MORN', 'OVER', 'ME', 'SPREADS', 'HIS', 'HAND', 'SAYING', 'REJOICE', 'THOU', 'HUMBLE', 'GRASS', 'THOU', 'NEW', 'BORN', 'LILY', 'FLOWER'] +908-157963-0008-2571: ref=['THOU', 'GENTLE', 'MAID', 'OF', 'SILENT', 'VALLEYS', 'AND', 'OF', 'MODEST', 'BROOKS', 'FOR', 'THOU', 'SHALL', 'BE', 'CLOTHED', 'IN', 'LIGHT', 'AND', 'FED', 'WITH', 'MORNING', 'MANNA', 'TILL', 'SUMMERS', 'HEAT', 'MELTS', 'THEE', 'BESIDE', 'THE', 'FOUNTAINS', 'AND', 'THE', 'SPRINGS', 'TO', 'FLOURISH', 'IN', 'ETERNAL', 'VALES', 'THEY', 'WHY', 'SHOULD', 'THEL', 'COMPLAIN'] +908-157963-0008-2571: hyp=['THOU', 'GENTLE', 'MAID', 'OF', 'SILENT', 'VALLEYS', 'AND', 'OF', 'MODEST', 'BROOKS', 'FOR', 'THOU', 'SHALT', 'BE', 'CLOTHED', 'IN', 'LIGHT', 'AND', 'FED', 'WITH', 'MORNING', 'MANNA', 'TILL', "SUMMER'S", 'HEAT', 'MELTS', 'THEE', 'BESIDE', 'THE', 'FOUNTAINS', 'AND', 'THE', 'SPRINGS', 'TO', 'FLOURISH', 'IN', 'ETERNAL', 'VALES', 'THEY', 'WHY', 'SHOULDST', 'THOU', 'COMPLAIN'] +908-157963-0009-2572: ref=['WHY', 'SHOULD', 'THE', 'MISTRESS', 'OF', 'THE', 'VALES', 'OF', 'HAR', 'UTTER', 'A', 'SIGH'] +908-157963-0009-2572: hyp=['WHY', 'SHOULD', 'THE', 'MISTRESS', 'OF', 'THE', 'VALES', 'OF', 'HARR', 'UTTER', 'A', 'SIGH'] +908-157963-0010-2573: ref=['SHE', 'CEASD', 'AND', 'SMILD', 'IN', 'TEARS', 'THEN', 'SAT', 'DOWN', 'IN', 'HER', 'SILVER', 'SHRINE'] +908-157963-0010-2573: hyp=['SHE', 'CEASED', 'AND', 'SMILED', 'IN', 'TEARS', 'THEN', 'SAT', 'DOWN', 'IN', 'HER', 'SILVER', 'SHRINE'] +908-157963-0011-2574: ref=['WHICH', 'THOU', 'DOST', 'SCATTER', 'ON', 'EVERY', 'LITTLE', 'BLADE', 'OF', 'GRASS', 'THAT', 'SPRINGS', 'REVIVES', 'THE', 'MILKED', 'COW', 'AND', 'TAMES', 'THE', 'FIRE', 'BREATHING', 'STEED'] +908-157963-0011-2574: hyp=['WHICH', 'THOU', 'DOST', 'SCATTER', 'ON', 'EVERY', 'LITTLE', 'BLADE', 'OF', 'GRASS', 'THAT', 'SPRINGS', 'REVIVES', 'THE', 'MILKED', 'COW', 'AND', 'TAMES', 'THE', 'FIRE', 'BREATHING', 'STEED'] +908-157963-0012-2575: ref=['BUT', 'THEL', 'IS', 'LIKE', 'A', 'FAINT', 'CLOUD', 'KINDLED', 'AT', 'THE', 'RISING', 'SUN', 'I', 'VANISH', 'FROM', 'MY', 'PEARLY', 'THRONE', 'AND', 'WHO', 'SHALL', 'FIND', 'MY', 'PLACE'] +908-157963-0012-2575: hyp=['BUT', 'THOU', 'IS', 'LIKE', 'A', 'FAINT', 'CLOUD', 'KINDLED', 'AT', 'THE', 'RISING', 'SUN', 'I', 'VANISH', 'FROM', 'MY', 'PEARLY', 'THRONE', 'AND', 'WHO', 'SHALL', 'FIND', 'MY', 'PLACE'] +908-157963-0013-2576: ref=['AND', 'WHY', 'IT', 'SCATTERS', 'ITS', 'BRIGHT', 'BEAUTY', 'THRO', 'THE', 'HUMID', 'AIR'] +908-157963-0013-2576: hyp=['AND', 'WHY', 'IT', 'SCATTERS', 'ITS', 'BRIGHT', 'BEAUTY', 'THROUGH', 'THE', 'HUMID', 'AIR'] +908-157963-0014-2577: ref=['DESCEND', 'O', 'LITTLE', 'CLOUD', 'AND', 'HOVER', 'BEFORE', 'THE', 'EYES', 'OF', 'THEL'] +908-157963-0014-2577: hyp=['DESCEND', 'O', 'LITTLE', 'CLOUD', 'AND', 'HOVER', 'BEFORE', 'THE', 'EYES', 'OF', 'THELL'] +908-157963-0015-2578: ref=['O', 'LITTLE', 'CLOUD', 'THE', 'VIRGIN', 'SAID', 'I', 'CHARGE', 'THEE', 'TO', 'TELL', 'ME', 'WHY', 'THOU', 'COMPLAINEST', 'NOW', 'WHEN', 'IN', 'ONE', 'HOUR', 'THOU', 'FADE', 'AWAY', 'THEN', 'WE', 'SHALL', 'SEEK', 'THEE', 'BUT', 'NOT', 'FIND', 'AH', 'THEL', 'IS', 'LIKE', 'TO', 'THEE'] +908-157963-0015-2578: hyp=['O', 'LITTLE', 'CLOUD', 'THE', 'VIRGIN', 'SAID', 'I', 'CHARGE', 'THEE', 'TO', 'TELL', 'ME', 'WHY', 'THOU', 'COMPLAINEST', 'NOW', 'WHEN', 'IN', 'ONE', 'HOUR', 'THOU', 'FADE', 'AWAY', 'THEN', 'WE', 'SHALL', 'SEEK', 'THEE', 'BUT', 'NOT', 'FIND', 'AH', 'FELL', 'IS', 'LIKE', 'TO', 'THEE'] +908-157963-0016-2579: ref=['I', 'PASS', 'AWAY', 'YET', 'I', 'COMPLAIN', 'AND', 'NO', 'ONE', 'HEARS', 'MY', 'VOICE'] +908-157963-0016-2579: hyp=['I', 'PASS', 'AWAY', 'YET', 'I', 'COMPLAIN', 'AND', 'NO', 'ONE', 'HEARS', 'MY', 'VOICE'] +908-157963-0017-2580: ref=['THE', 'CLOUD', 'THEN', 'SHEWD', 'HIS', 'GOLDEN', 'HEAD', 'AND', 'HIS', 'BRIGHT', 'FORM', "EMERG'D"] +908-157963-0017-2580: hyp=['THE', 'CLOUD', 'THEN', 'SHOWED', 'HIS', 'GOLDEN', 'HEAD', 'AND', 'HIS', 'BRIGHT', 'FORM', "EMERG'D"] +908-157963-0018-2581: ref=['AND', 'FEAREST', 'THOU', 'BECAUSE', 'I', 'VANISH', 'AND', 'AM', 'SEEN', 'NO', 'MORE'] +908-157963-0018-2581: hyp=['AND', 'FEAREST', 'THOU', 'BECAUSE', 'I', 'VANISH', 'AND', 'AM', 'SEEN', 'NO', 'MORE'] +908-157963-0019-2582: ref=['IT', 'IS', 'TO', 'TENFOLD', 'LIFE', 'TO', 'LOVE', 'TO', 'PEACE', 'AND', 'RAPTURES', 'HOLY', 'UNSEEN', 'DESCENDING', 'WEIGH', 'MY', 'LIGHT', 'WINGS', 'UPON', 'BALMY', 'FLOWERS', 'AND', 'COURT', 'THE', 'FAIR', 'EYED', 'DEW', 'TO', 'TAKE', 'ME', 'TO', 'HER', 'SHINING', 'TENT', 'THE', 'WEEPING', 'VIRGIN', 'TREMBLING', 'KNEELS', 'BEFORE', 'THE', 'RISEN', 'SUN'] +908-157963-0019-2582: hyp=['IT', 'IS', 'TO', 'TENFOLD', 'LIFE', 'TO', 'LOVE', 'TO', 'PEACE', 'AND', 'RAPTURES', 'HOLY', 'UNSEEN', 'DESCENDING', 'WEIGH', 'MY', 'LIGHT', 'WINGS', 'UPON', 'BALMY', 'FLOWERS', 'AND', 'COURT', 'THE', 'FAIR', "EY'D", 'DEW', 'TO', 'TAKE', 'ME', 'TO', 'HER', 'SHINING', 'TENT', 'THE', 'WEEPING', 'VIRGIN', 'TREMBLING', 'KNEELS', 'BEFORE', 'THE', 'RISEN', 'SUN'] +908-157963-0020-2583: ref=['TILL', 'WE', 'ARISE', "LINK'D", 'IN', 'A', 'GOLDEN', 'BAND', 'AND', 'NEVER', 'PART', 'BUT', 'WALK', 'UNITED', 'BEARING', 'FOOD', 'TO', 'ALL', 'OUR', 'TENDER', 'FLOWERS'] +908-157963-0020-2583: hyp=['TILL', 'WE', 'ARISE', 'LINKED', 'IN', 'A', 'GOLDEN', 'BAND', 'AND', 'NEVER', 'PART', 'BUT', 'WALK', 'UNITED', 'BEARING', 'FOOD', 'TO', 'ALL', 'OUR', 'TENDER', 'FLOWERS'] +908-157963-0021-2584: ref=['LIVES', 'NOT', 'ALONE', 'NOR', 'OR', 'ITSELF', 'FEAR', 'NOT', 'AND', 'I', 'WILL', 'CALL', 'THE', 'WEAK', 'WORM', 'FROM', 'ITS', 'LOWLY', 'BED', 'AND', 'THOU', 'SHALT', 'HEAR', 'ITS', 'VOICE'] +908-157963-0021-2584: hyp=['LIVES', 'NOT', 'ALONE', 'NOR', 'OF', 'ITSELF', 'FEAR', 'NOT', 'AND', 'I', 'WILL', 'CALL', 'THE', 'WEAK', 'WORM', 'FROM', 'ITS', 'LOWLY', 'BED', 'AND', 'THOU', 'SHALT', 'HEAR', 'ITS', 'VOICE'] +908-157963-0022-2585: ref=['COME', 'FORTH', 'WORM', 'AND', 'THE', 'SILENT', 'VALLEY', 'TO', 'THY', 'PENSIVE', 'QUEEN'] +908-157963-0022-2585: hyp=['COME', 'FORTH', 'WORM', 'IN', 'THE', 'SILENT', 'VALLEY', 'TO', 'THY', 'PENSIVE', 'QUEEN'] +908-157963-0023-2586: ref=['THE', 'HELPLESS', 'WORM', 'AROSE', 'AND', 'SAT', 'UPON', 'THE', 'LILLYS', 'LEAF', 'AND', 'THE', 'BRIGHT', 'CLOUD', 'SAILD', 'ON', 'TO', 'FIND', 'HIS', 'PARTNER', 'IN', 'THE', 'VALE'] +908-157963-0023-2586: hyp=['THE', 'HELPLESS', 'WORM', 'AROSE', 'AND', 'SAT', 'UPON', 'THE', "LILY'S", 'LEAF', 'AND', 'THE', 'BRIGHT', 'CLOUD', 'SAILED', 'ON', 'TO', 'FIND', 'HIS', 'PARTNER', 'IN', 'THE', 'VALE'] +908-157963-0024-2587: ref=['IMAGE', 'OF', 'WEAKNESS', 'ART', 'THOU', 'BUT', 'A', 'WORM'] +908-157963-0024-2587: hyp=['IMAGE', 'OF', 'WEAKNESS', 'ART', 'THOU', 'BUT', 'A', 'WORM'] +908-157963-0025-2588: ref=['I', 'SEE', 'THEY', 'LAY', 'HELPLESS', 'AND', 'NAKED', 'WEEPING', 'AND', 'NONE', 'TO', 'ANSWER', 'NONE', 'TO', 'CHERISH', 'THEE', 'WITH', 'MOTHERS', 'SMILES'] +908-157963-0025-2588: hyp=['I', 'SEE', 'THEY', 'LAY', 'HELPLESS', 'AND', 'NAKED', 'WEEPING', 'AND', 'NONE', 'TO', 'ANSWER', 'NONE', 'TO', 'CHERISH', 'THEE', 'WITH', "MOTHER'S", 'SMILES'] +908-157963-0026-2589: ref=['AND', 'SAYS', 'THOU', 'MOTHER', 'OF', 'MY', 'CHILDREN', 'I', 'HAVE', 'LOVED', 'THEE', 'AND', 'I', 'HAVE', 'GIVEN', 'THEE', 'A', 'CROWN', 'THAT', 'NONE', 'CAN', 'TAKE', 'AWAY'] +908-157963-0026-2589: hyp=['AND', 'SAYS', 'THOU', 'MOTHER', 'OF', 'MY', 'CHILDREN', 'I', 'HAVE', 'LOVED', 'THEE', 'AND', 'I', 'HAVE', 'GIVEN', 'THEE', 'A', 'CROWN', 'THAT', 'NONE', 'CAN', 'TAKE', 'AWAY'] +908-157963-0027-2590: ref=['AND', 'LAY', 'ME', 'DOWN', 'IN', 'THY', 'COLD', 'BED', 'AND', 'LEAVE', 'MY', 'SHINING', 'LOT'] +908-157963-0027-2590: hyp=['AND', 'LAY', 'ME', 'DOWN', 'IN', 'THY', 'COLD', 'BED', 'AND', 'LEAVE', 'MY', 'SHINING', 'LOT'] +908-157963-0028-2591: ref=['OR', 'AN', 'EYE', 'OF', 'GIFTS', 'AND', 'GRACES', 'SHOWRING', 'FRUITS', 'AND', 'COINED', 'GOLD'] +908-157963-0028-2591: hyp=['OR', 'AN', 'EYE', 'OF', 'GIFTS', 'AND', 'GRACES', 'SHOWERING', 'FRUITS', 'IN', 'COINED', 'GOLD'] +908-157963-0029-2592: ref=['WHY', 'A', 'TONGUE', "IMPRESS'D", 'WITH', 'HONEY', 'FROM', 'EVERY', 'WIND'] +908-157963-0029-2592: hyp=['WHY', 'A', 'TONGUE', 'IMPRESSED', 'WITH', 'HONEY', 'FROM', 'EVERY', 'WIND'] +908-157963-0030-2593: ref=['WHY', 'AN', 'EAR', 'A', 'WHIRLPOOL', 'FIERCE', 'TO', 'DRAW', 'CREATIONS', 'IN'] +908-157963-0030-2593: hyp=['WHY', 'AN', 'EAR', 'A', 'WHIRLPOOL', 'FIERCE', 'TO', 'DRAW', 'CREATIONS', 'IN'] +908-31957-0000-2594: ref=['ALL', 'IS', 'SAID', 'WITHOUT', 'A', 'WORD'] +908-31957-0000-2594: hyp=['ALL', 'IS', 'SAID', 'WITHOUT', 'A', 'WORD'] +908-31957-0001-2595: ref=['I', 'SIT', 'BENEATH', 'THY', 'LOOKS', 'AS', 'CHILDREN', 'DO', 'IN', 'THE', 'NOON', 'SUN', 'WITH', 'SOULS', 'THAT', 'TREMBLE', 'THROUGH', 'THEIR', 'HAPPY', 'EYELIDS', 'FROM', 'AN', 'UNAVERRED', 'YET', 'PRODIGAL', 'INWARD', 'JOY'] +908-31957-0001-2595: hyp=['I', 'SIT', 'BENEATH', 'THY', 'LOOKS', 'AS', 'CHILDREN', 'DO', 'IN', 'THE', 'NOON', 'SUN', 'WITH', 'SOULS', 'THAT', 'TREMBLE', 'THROUGH', 'THEIR', 'HAPPY', 'EYELIDS', 'FROM', 'AN', 'UNAVERRED', 'YET', 'PRODIGAL', 'INWARD', 'JOY'] +908-31957-0002-2596: ref=['I', 'DID', 'NOT', 'WRONG', 'MYSELF', 'SO', 'BUT', 'I', 'PLACED', 'A', 'WRONG', 'ON', 'THEE'] +908-31957-0002-2596: hyp=['I', 'DID', 'NOT', 'WRONG', 'MYSELF', 'SO', 'BUT', 'I', 'PLACED', 'A', 'WRONG', 'ON', 'THEE'] +908-31957-0003-2597: ref=['WHEN', 'CALLED', 'BEFORE', 'I', 'TOLD', 'HOW', 'HASTILY', 'I', 'DROPPED', 'MY', 'FLOWERS', 'OR', 'BRAKE', 'OFF', 'FROM', 'A', 'GAME'] +908-31957-0003-2597: hyp=['WHEN', 'CALLED', 'BEFORE', 'I', 'TOLD', 'HOW', 'HASTILY', 'I', 'DROPPED', 'MY', 'FLOWERS', 'OR', 'BREAK', 'OFF', 'FROM', 'A', 'GAME'] +908-31957-0004-2598: ref=['SHALL', 'I', 'NEVER', 'MISS', 'HOME', 'TALK', 'AND', 'BLESSING', 'AND', 'THE', 'COMMON', 'KISS', 'THAT', 'COMES', 'TO', 'EACH', 'IN', 'TURN', 'NOR', 'COUNT', 'IT', 'STRANGE', 'WHEN', 'I', 'LOOK', 'UP', 'TO', 'DROP', 'ON', 'A', 'NEW', 'RANGE', 'OF', 'WALLS', 'AND', 'FLOORS', 'ANOTHER', 'HOME', 'THAN', 'THIS'] +908-31957-0004-2598: hyp=['SHALL', 'I', 'NEVER', 'MISS', 'HOME', 'TALK', 'AND', 'BLESSING', 'AND', 'THE', 'COMMON', 'KISS', 'THAT', 'COMES', 'TO', 'EACH', 'IN', 'TURN', 'NOR', 'COUNT', 'IT', 'STRANGE', 'WHEN', 'I', 'LOOK', 'UP', 'TO', 'DROP', 'ON', 'A', 'NEW', 'RANGE', 'OF', 'WALLS', 'AND', 'FLOORS', 'ANOTHER', 'HOME', 'THAN', 'THIS'] +908-31957-0005-2599: ref=['ALAS', 'I', 'HAVE', 'GRIEVED', 'SO', 'I', 'AM', 'HARD', 'TO', 'LOVE'] +908-31957-0005-2599: hyp=['ALAS', 'I', 'HAVE', 'GRIEVED', 'SO', 'I', 'AM', 'HARD', 'TO', 'LOVE'] +908-31957-0006-2600: ref=['OPEN', 'THY', 'HEART', 'WIDE', 'AND', 'FOLD', 'WITHIN', 'THE', 'WET', 'WINGS', 'OF', 'THY', 'DOVE'] +908-31957-0006-2600: hyp=['OPEN', 'THY', 'HEART', 'WIDE', 'AND', 'FOLD', 'WITHIN', 'THE', 'WET', 'WINGS', 'OF', 'THY', 'DOVE'] +908-31957-0007-2601: ref=['COULD', 'IT', 'MEAN', 'TO', 'LAST', 'A', 'LOVE', 'SET', 'PENDULOUS', 'BETWEEN', 'SORROW', 'AND', 'SORROW'] +908-31957-0007-2601: hyp=['COULD', 'IT', 'MEAN', 'TO', 'LAST', 'A', 'LOVE', 'SET', 'PENDULOUS', 'BETWEEN', 'SORROW', 'AND', 'SORROW'] +908-31957-0008-2602: ref=['NAY', 'I', 'RATHER', 'THRILLED', 'DISTRUSTING', 'EVERY', 'LIGHT', 'THAT', 'SEEMED', 'TO', 'GILD', 'THE', 'ONWARD', 'PATH', 'AND', 'FEARED', 'TO', 'OVERLEAN', 'A', 'FINGER', 'EVEN'] +908-31957-0008-2602: hyp=['NAY', 'I', 'RATHER', 'THRILLED', 'DISTRUSTING', 'EVERY', 'LIGHT', 'THAT', 'SEEMED', 'TO', 'GILD', 'THE', 'ONWARD', 'PATH', 'AND', 'FEARED', 'TO', 'OVERLEAN', 'A', 'FINGER', 'EVEN'] +908-31957-0009-2603: ref=['AND', 'THOUGH', 'I', 'HAVE', 'GROWN', 'SERENE', 'AND', 'STRONG', 'SINCE', 'THEN', 'I', 'THINK', 'THAT', 'GOD', 'HAS', 'WILLED', 'A', 'STILL', 'RENEWABLE', 'FEAR'] +908-31957-0009-2603: hyp=['AND', 'THOUGH', 'I', 'HAVE', 'GROWN', 'SERENE', 'AND', 'STRONG', 'SINCE', 'THEN', 'I', 'THINK', 'THAT', 'GOD', 'HAS', 'WILLED', 'A', 'STILL', 'RENEWABLE', 'FEAR'] +908-31957-0010-2604: ref=['O', 'LOVE', 'O', 'TROTH'] +908-31957-0010-2604: hyp=['O', 'LOVE', 'OH', 'TROTH'] +908-31957-0011-2605: ref=['AND', 'LOVE', 'BE', 'FALSE'] +908-31957-0011-2605: hyp=['AND', 'LOVE', 'BE', 'FALSE'] +908-31957-0012-2606: ref=['IF', 'HE', 'TO', 'KEEP', 'ONE', 'OATH', 'MUST', 'LOSE', 'ONE', 'JOY', 'BY', 'HIS', "LIFE'S", 'STAR', 'FORETOLD'] +908-31957-0012-2606: hyp=['IF', 'HE', 'TO', 'KEEP', 'ONE', 'OATH', 'MUST', 'LOSE', 'ONE', 'JOY', 'BY', 'HIS', "LIFE'S", 'STAR', 'FORETOLD'] +908-31957-0013-2607: ref=['SLOW', 'TO', 'WORLD', 'GREETINGS', 'QUICK', 'WITH', 'ITS', 'O', 'LIST', 'WHEN', 'THE', 'ANGELS', 'SPEAK'] +908-31957-0013-2607: hyp=['SLOW', 'TO', 'WORLD', 'GREETINGS', 'QUICK', 'WITH', 'ITS', 'O', 'LIST', 'WHEN', 'THE', 'ANGELS', 'SPEAK'] +908-31957-0014-2608: ref=['A', 'RING', 'OF', 'AMETHYST', 'I', 'COULD', 'NOT', 'WEAR', 'HERE', 'PLAINER', 'TO', 'MY', 'SIGHT', 'THAN', 'THAT', 'FIRST', 'KISS'] +908-31957-0014-2608: hyp=['A', 'RING', 'OF', 'AMETHYST', 'I', 'COULD', 'NOT', 'WEAR', 'HERE', 'PLAINER', 'TO', 'MY', 'SIGHT', 'THAN', 'THAT', 'FIRST', 'KISS'] +908-31957-0015-2609: ref=['THAT', 'WAS', 'THE', 'CHRISM', 'OF', 'LOVE', 'WHICH', "LOVE'S", 'OWN', 'CROWN', 'WITH', 'SANCTIFYING', 'SWEETNESS', 'DID', 'PRECEDE', 'THE', 'THIRD', 'UPON', 'MY', 'LIPS', 'WAS', 'FOLDED', 'DOWN', 'IN', 'PERFECT', 'PURPLE', 'STATE', 'SINCE', 'WHEN', 'INDEED', 'I', 'HAVE', 'BEEN', 'PROUD', 'AND', 'SAID', 'MY', 'LOVE', 'MY', 'OWN'] +908-31957-0015-2609: hyp=['THAT', 'WAS', 'THE', 'CHRISM', 'OF', 'LOVE', 'WHICH', "LOVE'S", 'OWN', 'CROWN', 'WITH', 'SANCTIFYING', 'SWEETNESS', 'DID', 'PROCEED', 'THE', 'THIRD', 'UPON', 'MY', 'LIPS', 'WAS', 'FOLDED', 'DOWN', 'IN', 'PERFECT', 'PURPLE', 'STATE', 'SINCE', 'WHEN', 'INDEED', 'I', 'HAVE', 'BEEN', 'PROUD', 'AND', 'SAID', 'MY', 'LOVE', 'MY', 'OWN'] +908-31957-0016-2610: ref=['DEAREST', 'TEACH', 'ME', 'SO', 'TO', 'POUR', 'OUT', 'GRATITUDE', 'AS', 'THOU', 'DOST', 'GOOD'] +908-31957-0016-2610: hyp=['DEAREST', 'TEACH', 'ME', 'SO', 'TO', 'POUR', 'OUT', 'GRATITUDE', 'AS', 'THOU', 'DOST', 'GOOD'] +908-31957-0017-2611: ref=['MUSSULMANS', 'AND', 'GIAOURS', 'THROW', 'KERCHIEFS', 'AT', 'A', 'SMILE', 'AND', 'HAVE', 'NO', 'RUTH', 'FOR', 'ANY', 'WEEPING'] +908-31957-0017-2611: hyp=['MUSSULMANS', 'AND', 'GUYORES', 'THROW', 'KERCHIEFS', 'AT', 'A', 'SMILE', 'AND', 'HAVE', 'NO', 'RUOTH', 'FOR', 'ANY', 'WEEPING'] +908-31957-0018-2612: ref=['BUT', 'THOU', 'ART', 'NOT', 'SUCH', 'A', 'LOVER', 'MY', 'BELOVED'] +908-31957-0018-2612: hyp=['BUT', 'THOU', 'ART', 'NOT', 'SUCH', 'A', 'LOVER', 'MY', 'BELOVED'] +908-31957-0019-2613: ref=['THOU', 'CANST', 'WAIT', 'THROUGH', 'SORROW', 'AND', 'SICKNESS', 'TO', 'BRING', 'SOULS', 'TO', 'TOUCH', 'AND', 'THINK', 'IT', 'SOON', 'WHEN', 'OTHERS', 'CRY', 'TOO', 'LATE'] +908-31957-0019-2613: hyp=['THOU', 'CANST', 'WAIT', 'THROUGH', 'SORROW', 'AND', 'SICKNESS', 'TO', 'BRING', 'SOULS', 'TO', 'TOUCH', 'AND', 'THINK', 'IT', 'SOON', 'WHEN', 'OTHERS', 'CRY', 'TOO', 'LATE'] +908-31957-0020-2614: ref=['I', 'THANK', 'ALL', 'WHO', 'HAVE', 'LOVED', 'ME', 'IN', 'THEIR', 'HEARTS', 'WITH', 'THANKS', 'AND', 'LOVE', 'FROM', 'MINE'] +908-31957-0020-2614: hyp=['I', 'THANK', 'ALL', 'WHO', 'HAVE', 'LOVED', 'ME', 'IN', 'THEIR', 'HEARTS', 'WITH', 'THANKS', 'AND', 'LOVE', 'FROM', 'MINE'] +908-31957-0021-2615: ref=['OH', 'TO', 'SHOOT', 'MY', "SOUL'S", 'FULL', 'MEANING', 'INTO', 'FUTURE', 'YEARS', 'THAT', 'THEY', 'SHOULD', 'LEND', 'IT', 'UTTERANCE', 'AND', 'SALUTE', 'LOVE', 'THAT', 'ENDURES', 'FROM', 'LIFE', 'THAT', 'DISAPPEARS'] +908-31957-0021-2615: hyp=['OH', 'TO', 'SHOOT', 'MY', "SOUL'S", 'FULL', 'MEANING', 'INTO', 'FUTURE', 'YEARS', 'THAT', 'THEY', 'SHOULD', 'LEND', 'IT', 'UTTERANCE', 'AND', 'SALUTE', 'LOVE', 'THAT', 'ENDURES', 'FROM', 'LIFE', 'THAT', 'DISAPPEARS'] +908-31957-0022-2616: ref=['THEN', 'I', 'LONG', 'TRIED', 'BY', 'NATURAL', 'ILLS', 'RECEIVED', 'THE', 'COMFORT', 'FAST', 'WHILE', 'BUDDING', 'AT', 'THY', 'SIGHT', 'MY', "PILGRIM'S", 'STAFF', 'GAVE', 'OUT', 'GREEN', 'LEAVES', 'WITH', 'MORNING', 'DEWS', 'IMPEARLED'] +908-31957-0022-2616: hyp=['THEN', 'I', 'LONG', 'TRIED', 'BY', 'NATURAL', 'ILLS', 'RECEIVED', 'THE', 'COMFORT', 'FAST', 'WHILE', 'BUDDING', 'AT', 'THY', 'SIGHT', 'MY', "PILGRIM'S", 'STAFF', 'GAVE', 'OUT', 'GREEN', 'LEAVES', 'WITH', 'MORNING', 'DEWS', 'EMPEARLED'] +908-31957-0023-2617: ref=['I', 'LOVE', 'THEE', 'FREELY', 'AS', 'MEN', 'STRIVE', 'FOR', 'RIGHT', 'I', 'LOVE', 'THEE', 'PURELY', 'AS', 'THEY', 'TURN', 'FROM', 'PRAISE'] +908-31957-0023-2617: hyp=['I', 'LOVE', 'THEE', 'FREELY', 'AS', 'MEN', 'STRIVE', 'FOR', 'RIGHT', 'I', 'LOVE', 'THEE', 'PURELY', 'AS', 'THEY', 'TURN', 'FROM', 'PRAISE'] +908-31957-0024-2618: ref=['I', 'LOVE', 'THEE', 'WITH', 'THE', 'PASSION', 'PUT', 'TO', 'USE', 'IN', 'MY', 'OLD', 'GRIEFS', 'AND', 'WITH', 'MY', "CHILDHOOD'S", 'FAITH'] +908-31957-0024-2618: hyp=['I', 'LOVE', 'THEE', 'WITH', 'THE', 'PASSION', 'PUT', 'TO', 'USE', 'AND', 'MY', 'OLD', 'GRIEFS', 'AND', 'WITH', 'MY', "CHILDHOOD'S", 'FAITH'] +908-31957-0025-2619: ref=['I', 'LOVE', 'THEE', 'WITH', 'A', 'LOVE', 'I', 'SEEMED', 'TO', 'LOSE', 'WITH', 'MY', 'LOST', 'SAINTS', 'I', 'LOVE', 'THEE', 'WITH', 'THE', 'BREATH', 'SMILES', 'TEARS', 'OF', 'ALL', 'MY', 'LIFE', 'AND', 'IF', 'GOD', 'CHOOSE', 'I', 'SHALL', 'BUT', 'LOVE', 'THEE', 'BETTER', 'AFTER', 'DEATH'] +908-31957-0025-2619: hyp=['I', 'LOVE', 'THEE', 'WITH', 'A', 'LOVE', 'I', 'SEEMED', 'TO', 'LOSE', 'WITH', 'MY', 'LOST', 'SAINTS', 'I', 'LOVE', 'THEE', 'WITH', 'THE', 'BREATH', 'SMILES', 'TEARS', 'OF', 'ALL', 'MY', 'LIFE', 'AND', 'IF', 'GOD', 'CHOOSE', 'I', 'SHALL', 'BUT', 'LOVE', 'THEE', 'BETTER', 'AFTER', 'DEATH'] diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..de53d61d845bb4e471710832549882576d6f9d1e --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/recogs-test-other-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,5878 @@ +1688-142285-0000-0: ref=["THERE'S", 'IRON', 'THEY', 'SAY', 'IN', 'ALL', 'OUR', 'BLOOD', 'AND', 'A', 'GRAIN', 'OR', 'TWO', 'PERHAPS', 'IS', 'GOOD', 'BUT', 'HIS', 'HE', 'MAKES', 'ME', 'HARSHLY', 'FEEL', 'HAS', 'GOT', 'A', 'LITTLE', 'TOO', 'MUCH', 'OF', 'STEEL', 'ANON'] +1688-142285-0000-0: hyp=["THERE'S", 'IRON', 'THEY', 'SAY', 'IN', 'ALL', 'OUR', 'BLOOD', 'AND', 'A', 'GRAIN', 'OR', 'TWO', 'PERHAPS', 'IS', 'GOOD', 'BUT', 'HIS', 'HE', 'MAKES', 'ME', 'HARSHLY', 'FEEL', 'HAS', 'GOT', 'A', 'LITTLE', 'TOO', 'MUCH', 'OF', 'STEEL', 'ANON'] +1688-142285-0001-1: ref=['MARGARET', 'SAID', 'MISTER', 'HALE', 'AS', 'HE', 'RETURNED', 'FROM', 'SHOWING', 'HIS', 'GUEST', 'DOWNSTAIRS', 'I', 'COULD', 'NOT', 'HELP', 'WATCHING', 'YOUR', 'FACE', 'WITH', 'SOME', 'ANXIETY', 'WHEN', 'MISTER', 'THORNTON', 'MADE', 'HIS', 'CONFESSION', 'OF', 'HAVING', 'BEEN', 'A', 'SHOP', 'BOY'] +1688-142285-0001-1: hyp=['MARGARET', 'SAID', 'MISTER', 'HALE', 'AS', 'HE', 'RETURNED', 'FROM', 'SHOWING', 'HIS', 'GUEST', 'DOWN', 'STAIRS', 'I', 'COULD', 'NOT', 'HELP', 'WATCHING', 'YOUR', 'FACE', 'WITH', 'SOME', 'ANXIETY', 'WHEN', 'MISTER', 'THORNTON', 'MADE', 'HIS', 'CONFESSION', 'OF', 'HAVING', 'BEEN', 'A', 'SHOP', 'BOY'] +1688-142285-0002-2: ref=['YOU', "DON'T", 'MEAN', 'THAT', 'YOU', 'THOUGHT', 'ME', 'SO', 'SILLY'] +1688-142285-0002-2: hyp=['YOU', "DON'T", 'MEAN', 'THAT', 'YOU', 'THOUGHT', 'ME', 'SO', 'SILLY'] +1688-142285-0003-3: ref=['I', 'REALLY', 'LIKED', 'THAT', 'ACCOUNT', 'OF', 'HIMSELF', 'BETTER', 'THAN', 'ANYTHING', 'ELSE', 'HE', 'SAID'] +1688-142285-0003-3: hyp=['I', 'REALLY', 'LIKED', 'THAT', 'ACCOUNT', 'OF', 'HIMSELF', 'BETTER', 'THAN', 'ANYTHING', 'ELSE', 'HE', 'SAID'] +1688-142285-0004-4: ref=['HIS', 'STATEMENT', 'OF', 'HAVING', 'BEEN', 'A', 'SHOP', 'BOY', 'WAS', 'THE', 'THING', 'I', 'LIKED', 'BEST', 'OF', 'ALL'] +1688-142285-0004-4: hyp=['HIS', 'STATEMENT', 'OF', 'HAVING', 'BEEN', 'A', 'SHOPBOY', 'WAS', 'THE', 'THING', 'I', 'LIKED', 'BEST', 'OF', 'ALL'] +1688-142285-0005-5: ref=['YOU', 'WHO', 'WERE', 'ALWAYS', 'ACCUSING', 'PEOPLE', 'OF', 'BEING', 'SHOPPY', 'AT', 'HELSTONE'] +1688-142285-0005-5: hyp=['YOU', 'WHO', 'WERE', 'ALWAYS', 'ACCUSING', 'PEOPLE', 'OF', 'BEING', 'SHOPPY', 'AT', 'HELSTONE'] +1688-142285-0006-6: ref=['I', "DON'T", 'THINK', 'MISTER', 'HALE', 'YOU', 'HAVE', 'DONE', 'QUITE', 'RIGHT', 'IN', 'INTRODUCING', 'SUCH', 'A', 'PERSON', 'TO', 'US', 'WITHOUT', 'TELLING', 'US', 'WHAT', 'HE', 'HAD', 'BEEN'] +1688-142285-0006-6: hyp=['I', "DON'T", 'THINK', 'MISTER', 'HALE', 'YOU', 'HAVE', 'DONE', 'QUITE', 'RIGHT', 'IN', 'INTRODUCING', 'SUCH', 'A', 'PERSON', 'TO', 'US', 'WITHOUT', 'TELLING', 'US', 'WHAT', 'HE', 'HAD', 'BEEN'] +1688-142285-0007-7: ref=['I', 'REALLY', 'WAS', 'VERY', 'MUCH', 'AFRAID', 'OF', 'SHOWING', 'HIM', 'HOW', 'MUCH', 'SHOCKED', 'I', 'WAS', 'AT', 'SOME', 'PARTS', 'OF', 'WHAT', 'HE', 'SAID'] +1688-142285-0007-7: hyp=['I', 'REALLY', 'WAS', 'VERY', 'MUCH', 'AFRAID', 'OF', 'SHOWING', 'HIM', 'HOW', 'MUCH', 'SHOCKED', 'I', 'WAS', 'AT', 'SOME', 'PART', 'OF', 'WHAT', 'HE', 'SAID'] +1688-142285-0008-8: ref=['HIS', 'FATHER', 'DYING', 'IN', 'MISERABLE', 'CIRCUMSTANCES'] +1688-142285-0008-8: hyp=['HIS', 'FATHER', 'DYING', 'IN', 'MISERABLE', 'CIRCUMSTANCES'] +1688-142285-0009-9: ref=['WHY', 'IT', 'MIGHT', 'HAVE', 'BEEN', 'IN', 'THE', 'WORKHOUSE'] +1688-142285-0009-9: hyp=['WHY', 'IT', 'MIGHT', 'HAVE', 'BEEN', 'IN', 'THE', 'WORKHOUSE'] +1688-142285-0010-10: ref=['HIS', 'FATHER', 'SPECULATED', 'WILDLY', 'FAILED', 'AND', 'THEN', 'KILLED', 'HIMSELF', 'BECAUSE', 'HE', 'COULD', 'NOT', 'BEAR', 'THE', 'DISGRACE'] +1688-142285-0010-10: hyp=['HIS', 'FATHER', 'SPECULATED', 'WILDLY', 'FAILED', 'AND', 'THEN', 'KILLED', 'HIMSELF', 'BECAUSE', 'HE', 'COULD', 'NOT', 'BEAR', 'THE', 'DISGRACE'] +1688-142285-0011-11: ref=['ALL', 'HIS', 'FORMER', 'FRIENDS', 'SHRUNK', 'FROM', 'THE', 'DISCLOSURES', 'THAT', 'HAD', 'TO', 'BE', 'MADE', 'OF', 'HIS', 'DISHONEST', 'GAMBLING', 'WILD', 'HOPELESS', 'STRUGGLES', 'MADE', 'WITH', 'OTHER', "PEOPLE'S", 'MONEY', 'TO', 'REGAIN', 'HIS', 'OWN', 'MODERATE', 'PORTION', 'OF', 'WEALTH'] +1688-142285-0011-11: hyp=['ALL', 'HIS', 'FORMER', 'FRIENDS', 'SHRUNK', 'FROM', 'THE', 'DISCLOSURES', 'THAT', 'HAD', 'TO', 'BE', 'MADE', 'OF', 'HIS', 'DISHONEST', 'GAMBLING', 'WILD', 'HOPELESS', 'STRUGGLES', 'MADE', 'WITH', 'OTHER', "PEOPLE'S", 'MONEY', 'TO', 'REGAIN', 'HIS', 'OWN', 'MODERATE', 'PORTION', 'OF', 'WEALTH'] +1688-142285-0012-12: ref=['NO', 'ONE', 'CAME', 'FORWARDS', 'TO', 'HELP', 'THE', 'MOTHER', 'AND', 'THIS', 'BOY'] +1688-142285-0012-12: hyp=['NO', 'ONE', 'CAME', 'FORWARDS', 'TO', 'HELP', 'THE', 'MOTHER', 'AND', 'THIS', 'BOY'] +1688-142285-0013-13: ref=['AT', 'LEAST', 'NO', 'FRIEND', 'CAME', 'FORWARDS', 'IMMEDIATELY', 'AND', 'MISSUS', 'THORNTON', 'IS', 'NOT', 'ONE', 'I', 'FANCY', 'TO', 'WAIT', 'TILL', 'TARDY', 'KINDNESS', 'COMES', 'TO', 'FIND', 'HER', 'OUT'] +1688-142285-0013-13: hyp=['AT', 'LEAST', 'NO', 'FRIEND', 'CAME', 'FORWARDS', 'IMMEDIATELY', 'AND', 'MISTER', 'THORNTON', 'IS', 'NOT', 'ONE', 'I', 'FANCY', 'TO', 'WAIT', 'TILL', 'TARDY', 'KINDNESS', 'COMES', 'TO', 'FIND', 'HER', 'OUT'] +1688-142285-0014-14: ref=['SO', 'THEY', 'LEFT', 'MILTON'] +1688-142285-0014-14: hyp=['SO', 'THEY', 'LEFT', 'MILTON'] +1688-142285-0015-15: ref=['HOW', 'TAINTED', 'ASKED', 'HER', 'FATHER'] +1688-142285-0015-15: hyp=['HOW', 'TAINTED', 'ASKED', 'HER', 'FATHER'] +1688-142285-0016-16: ref=['OH', 'PAPA', 'BY', 'THAT', 'TESTING', 'EVERYTHING', 'BY', 'THE', 'STANDARD', 'OF', 'WEALTH'] +1688-142285-0016-16: hyp=['OH', 'PAPA', 'BY', 'THAT', 'TESTING', 'EVERYTHING', 'BY', 'THE', 'STANDARD', 'OF', 'WEALTH'] +1688-142285-0017-17: ref=['WHEN', 'HE', 'SPOKE', 'OF', 'THE', 'MECHANICAL', 'POWERS', 'HE', 'EVIDENTLY', 'LOOKED', 'UPON', 'THEM', 'ONLY', 'AS', 'NEW', 'WAYS', 'OF', 'EXTENDING', 'TRADE', 'AND', 'MAKING', 'MONEY'] +1688-142285-0017-17: hyp=['WHEN', 'HE', 'SPOKE', 'OF', 'THE', 'MECHANICAL', 'POWERS', 'HE', 'EVIDENTLY', 'LOOKED', 'UPON', 'THEM', 'ONLY', 'AS', 'NEW', 'WAYS', 'OF', 'EXTENDING', 'TRADE', 'AND', 'MAKING', 'MONEY'] +1688-142285-0018-18: ref=['AND', 'THE', 'POOR', 'MEN', 'AROUND', 'HIM', 'THEY', 'WERE', 'POOR', 'BECAUSE', 'THEY', 'WERE', 'VICIOUS', 'OUT', 'OF', 'THE', 'PALE', 'OF', 'HIS', 'SYMPATHIES', 'BECAUSE', 'THEY', 'HAD', 'NOT', 'HIS', 'IRON', 'NATURE', 'AND', 'THE', 'CAPABILITIES', 'THAT', 'IT', 'GIVES', 'HIM', 'FOR', 'BEING', 'RICH'] +1688-142285-0018-18: hyp=['AND', 'THE', 'POOR', 'MEN', 'AROUND', 'HIM', 'THEY', 'WERE', 'POOR', 'BECAUSE', 'THEY', 'WERE', 'VICIOUS', 'OUT', 'OF', 'THE', 'PALE', 'OF', 'HIS', 'SYMPATHIES', 'BECAUSE', 'THEY', 'HAD', 'NOT', 'HIS', 'IRON', 'NATURE', 'AND', 'THE', 'CAPABILITIES', 'THAT', 'IT', 'GIVES', 'HIM', 'FOR', 'BEING', 'RICH'] +1688-142285-0019-19: ref=['NOT', 'VICIOUS', 'HE', 'NEVER', 'SAID', 'THAT'] +1688-142285-0019-19: hyp=['NOT', 'VICIOUS', 'HE', 'NEVER', 'SAID', 'THAT'] +1688-142285-0020-20: ref=['IMPROVIDENT', 'AND', 'SELF', 'INDULGENT', 'WERE', 'HIS', 'WORDS'] +1688-142285-0020-20: hyp=['IMPROVIDENT', 'AND', 'SELF', 'INDULGENT', 'WERE', 'HIS', 'WORDS'] +1688-142285-0021-21: ref=['MARGARET', 'WAS', 'COLLECTING', 'HER', "MOTHER'S", 'WORKING', 'MATERIALS', 'AND', 'PREPARING', 'TO', 'GO', 'TO', 'BED'] +1688-142285-0021-21: hyp=['MARGARET', 'WAS', 'COLLECTING', 'HER', "MOTHER'S", 'WORKING', 'MATERIALS', 'AND', 'PREPARING', 'TO', 'GO', 'TO', 'BED'] +1688-142285-0022-22: ref=['JUST', 'AS', 'SHE', 'WAS', 'LEAVING', 'THE', 'ROOM', 'SHE', 'HESITATED', 'SHE', 'WAS', 'INCLINED', 'TO', 'MAKE', 'AN', 'ACKNOWLEDGMENT', 'WHICH', 'SHE', 'THOUGHT', 'WOULD', 'PLEASE', 'HER', 'FATHER', 'BUT', 'WHICH', 'TO', 'BE', 'FULL', 'AND', 'TRUE', 'MUST', 'INCLUDE', 'A', 'LITTLE', 'ANNOYANCE'] +1688-142285-0022-22: hyp=['JUST', 'AS', 'SHE', 'WAS', 'LEAVING', 'THE', 'ROOM', 'SHE', 'HESITATED', 'SHE', 'WAS', 'INCLINED', 'TO', 'MAKE', 'AN', 'ACKNOWLEDGMENT', 'WHICH', 'SHE', 'THOUGHT', 'WOULD', 'PLEASE', 'HER', 'FATHER', 'BUT', 'WHICH', 'TO', 'BE', 'FULL', 'AND', 'TRUE', 'MUST', 'INCLUDE', 'A', 'LITTLE', 'ANNOYANCE'] +1688-142285-0023-23: ref=['HOWEVER', 'OUT', 'IT', 'CAME'] +1688-142285-0023-23: hyp=['HOWEVER', 'OUT', 'IT', 'CAME'] +1688-142285-0024-24: ref=['PAPA', 'I', 'DO', 'THINK', 'MISTER', 'THORNTON', 'A', 'VERY', 'REMARKABLE', 'MAN', 'BUT', 'PERSONALLY', 'I', "DON'T", 'LIKE', 'HIM', 'AT', 'ALL'] +1688-142285-0024-24: hyp=['PAPA', 'I', 'DO', 'THINK', 'MISTER', 'THORNTON', 'A', 'VERY', 'REMARKABLE', 'MAN', 'BUT', 'PERSONALLY', 'I', "DON'T", 'LIKE', 'HIM', 'AT', 'ALL'] +1688-142285-0025-25: ref=['AND', 'I', 'DO', 'SAID', 'HER', 'FATHER', 'LAUGHING'] +1688-142285-0025-25: hyp=['AND', 'I', 'DO', 'SAID', 'HER', 'FATHER', 'LAUGHING'] +1688-142285-0026-26: ref=['PERSONALLY', 'AS', 'YOU', 'CALL', 'IT', 'AND', 'ALL'] +1688-142285-0026-26: hyp=['PERSONALLY', 'AS', 'YOU', 'CALL', 'IT', 'AND', 'ALL'] +1688-142285-0027-27: ref=['I', "DON'T", 'SET', 'HIM', 'UP', 'FOR', 'A', 'HERO', 'OR', 'ANYTHING', 'OF', 'THAT', 'KIND'] +1688-142285-0027-27: hyp=['I', "DON'T", 'SET', 'HIM', 'UP', 'FOR', 'A', 'HERO', 'OR', 'ANYTHING', 'OF', 'THAT', 'KIND'] +1688-142285-0028-28: ref=['BUT', 'GOOD', 'NIGHT', 'CHILD'] +1688-142285-0028-28: hyp=['BUT', 'GOOD', 'NIGHT', 'CHILD'] +1688-142285-0029-29: ref=['THERE', 'WERE', 'SEVERAL', 'OTHER', 'SIGNS', 'OF', 'SOMETHING', 'WRONG', 'ABOUT', 'MISSUS', 'HALE'] +1688-142285-0029-29: hyp=['THERE', 'WERE', 'SEVERAL', 'OTHER', 'SIGNS', 'OF', 'SOMETHING', 'WRONG', 'ABOUT', 'MISSUS', 'HALE'] +1688-142285-0030-30: ref=['SHE', 'AND', 'DIXON', 'HELD', 'MYSTERIOUS', 'CONSULTATIONS', 'IN', 'HER', 'BEDROOM', 'FROM', 'WHICH', 'DIXON', 'WOULD', 'COME', 'OUT', 'CRYING', 'AND', 'CROSS', 'AS', 'WAS', 'HER', 'CUSTOM', 'WHEN', 'ANY', 'DISTRESS', 'OF', 'HER', 'MISTRESS', 'CALLED', 'UPON', 'HER', 'SYMPATHY'] +1688-142285-0030-30: hyp=['SHE', 'AND', 'DIXON', 'HELD', 'MYSTERIOUS', 'CONSULTATIONS', 'IN', 'HER', 'BEDROOM', 'FROM', 'WHICH', 'DIXON', 'WOULD', 'COME', 'OUT', 'CRYING', 'AND', 'CROSS', 'AS', 'WAS', 'HER', 'CUSTOM', 'WHEN', 'ANY', 'DISTRESS', 'OF', 'HER', 'MISTRESS', 'CALLED', 'UPON', 'HER', 'SYMPATHY'] +1688-142285-0031-31: ref=['ONCE', 'MARGARET', 'HAD', 'GONE', 'INTO', 'THE', 'CHAMBER', 'SOON', 'AFTER', 'DIXON', 'LEFT', 'IT', 'AND', 'FOUND', 'HER', 'MOTHER', 'ON', 'HER', 'KNEES', 'AND', 'AS', 'MARGARET', 'STOLE', 'OUT', 'SHE', 'CAUGHT', 'A', 'FEW', 'WORDS', 'WHICH', 'WERE', 'EVIDENTLY', 'A', 'PRAYER', 'FOR', 'STRENGTH', 'AND', 'PATIENCE', 'TO', 'ENDURE', 'SEVERE', 'BODILY', 'SUFFERING'] +1688-142285-0031-31: hyp=['ONCE', 'MARGARET', 'HAD', 'GONE', 'INTO', 'THE', 'CHAMBER', 'SOON', 'AFTER', 'DIXON', 'LEFT', 'IT', 'AND', 'FOUND', 'HER', 'MOTHER', 'ON', 'HER', 'KNEES', 'AND', 'AS', 'MARGARET', 'STOLE', 'OUT', 'SHE', 'CAUGHT', 'A', 'FEW', 'WORDS', 'WHICH', 'WERE', 'EVIDENTLY', 'A', 'PRAYER', 'FOR', 'STRENGTH', 'AND', 'PATIENCE', 'TO', 'ENDURE', 'SEVERE', 'BODILY', 'SUFFERING'] +1688-142285-0032-32: ref=['BUT', 'THOUGH', 'SHE', 'RECEIVED', 'CARESSES', 'AND', 'FOND', 'WORDS', 'BACK', 'AGAIN', 'IN', 'SUCH', 'PROFUSION', 'AS', 'WOULD', 'HAVE', 'GLADDENED', 'HER', 'FORMERLY', 'YET', 'SHE', 'FELT', 'THAT', 'THERE', 'WAS', 'A', 'SECRET', 'WITHHELD', 'FROM', 'HER', 'AND', 'SHE', 'BELIEVED', 'IT', 'BORE', 'SERIOUS', 'REFERENCE', 'TO', 'HER', "MOTHER'S", 'HEALTH'] +1688-142285-0032-32: hyp=['BUT', 'THOUGH', 'SHE', 'RECEIVED', 'CARESSES', 'AND', 'FOND', 'WORDS', 'BACK', 'AGAIN', 'IN', 'SUCH', 'PROFUSION', 'AS', 'WOULD', 'HAVE', 'GLADDENED', 'HER', 'FORMERLY', 'YET', 'SHE', 'FELT', 'THAT', 'THERE', 'WAS', 'A', 'SECRET', 'WITHHELD', 'FROM', 'HER', 'AND', 'SHE', 'BELIEVED', 'IT', 'BORE', 'SERIOUS', 'REFERENCE', 'TO', 'HER', "MOTHER'S", 'HEALTH'] +1688-142285-0033-33: ref=['SHE', 'LAY', 'AWAKE', 'VERY', 'LONG', 'THIS', 'NIGHT', 'PLANNING', 'HOW', 'TO', 'LESSEN', 'THE', 'EVIL', 'INFLUENCE', 'OF', 'THEIR', 'MILTON', 'LIFE', 'ON', 'HER', 'MOTHER'] +1688-142285-0033-33: hyp=['SHE', 'LAY', 'AWAKE', 'VERY', 'LONG', 'THIS', 'NIGHT', 'PLANNING', 'HOW', 'TO', 'LESSEN', 'THE', 'EVIL', 'INFLUENCE', 'OF', 'THEIR', 'MILTON', 'LIFE', 'ON', 'HER', 'MOTHER'] +1688-142285-0034-34: ref=['A', 'SERVANT', 'TO', 'GIVE', 'DIXON', 'PERMANENT', 'ASSISTANCE', 'SHOULD', 'BE', 'GOT', 'IF', 'SHE', 'GAVE', 'UP', 'HER', 'WHOLE', 'TIME', 'TO', 'THE', 'SEARCH', 'AND', 'THEN', 'AT', 'ANY', 'RATE', 'HER', 'MOTHER', 'MIGHT', 'HAVE', 'ALL', 'THE', 'PERSONAL', 'ATTENTION', 'SHE', 'REQUIRED', 'AND', 'HAD', 'BEEN', 'ACCUSTOMED', 'TO', 'HER', 'WHOLE', 'LIFE'] +1688-142285-0034-34: hyp=['A', 'SERVANT', 'TO', 'GIVE', 'DIXON', 'PERMANENT', 'ASSISTANCE', 'SHOULD', 'BE', 'GOT', 'IF', 'SHE', 'GAVE', 'UP', 'THE', 'WHOLE', 'TIME', 'TO', 'THE', 'SEARCH', 'AND', 'THEN', 'AT', 'ANY', 'RATE', 'HER', 'MOTHER', 'MIGHT', 'HAVE', 'ALL', 'THE', 'PERSONAL', 'ATTENTIONS', 'SHE', 'REQUIRED', 'AND', 'HAD', 'BEEN', 'ACCUSTOMED', 'TO', 'HER', 'WHOLE', 'LIFE'] +1688-142285-0035-35: ref=['VISITING', 'REGISTER', 'OFFICES', 'SEEING', 'ALL', 'MANNER', 'OF', 'UNLIKELY', 'PEOPLE', 'AND', 'VERY', 'FEW', 'IN', 'THE', 'LEAST', 'LIKELY', 'ABSORBED', "MARGARET'S", 'TIME', 'AND', 'THOUGHTS', 'FOR', 'SEVERAL', 'DAYS'] +1688-142285-0035-35: hyp=['VISITING', 'REGISTER', 'OFFICES', 'SEEING', 'ALL', 'MANNER', 'OF', 'UNLIKELY', 'PEOPLE', 'AND', 'VERY', 'FEW', 'IN', 'THE', 'LEAST', 'LIKELY', 'ABSORBED', "MARGARET'S", 'TIME', 'AND', 'THOUGHTS', 'FOR', 'SEVERAL', 'DAYS'] +1688-142285-0036-36: ref=['ONE', 'AFTERNOON', 'SHE', 'MET', 'BESSY', 'HIGGINS', 'IN', 'THE', 'STREET', 'AND', 'STOPPED', 'TO', 'SPEAK', 'TO', 'HER'] +1688-142285-0036-36: hyp=['ONE', 'AFTERNOON', 'SHE', 'MET', 'BESSY', 'HIGGINS', 'IN', 'THE', 'STREET', 'AND', 'STOPPED', 'TO', 'SPEAK', 'TO', 'HER'] +1688-142285-0037-37: ref=['WELL', 'BESSY', 'HOW', 'ARE', 'YOU'] +1688-142285-0037-37: hyp=['WELL', 'BESSY', 'HOW', 'ARE', 'YOU'] +1688-142285-0038-38: ref=['BETTER', 'AND', 'NOT', 'BETTER', 'IF', 'YO', 'KNOW', 'WHAT', 'THAT', 'MEANS'] +1688-142285-0038-38: hyp=['BETTER', 'AND', 'NOT', 'BETTER', 'IF', 'YOU', 'KNOW', 'WHAT', 'THAT', 'MEANS'] +1688-142285-0039-39: ref=['NOT', 'EXACTLY', 'REPLIED', 'MARGARET', 'SMILING'] +1688-142285-0039-39: hyp=['NOT', 'EXACTLY', 'REPLIED', 'MARGARET', 'SMILING'] +1688-142285-0040-40: ref=["I'M", 'BETTER', 'IN', 'NOT', 'BEING', 'TORN', 'TO', 'PIECES', 'BY', 'COUGHING', "O'NIGHTS", 'BUT', "I'M", 'WEARY', 'AND', 'TIRED', 'O', 'MILTON', 'AND', 'LONGING', 'TO', 'GET', 'AWAY', 'TO', 'THE', 'LAND', 'O', 'BEULAH', 'AND', 'WHEN', 'I', 'THINK', "I'M", 'FARTHER', 'AND', 'FARTHER', 'OFF', 'MY', 'HEART', 'SINKS', 'AND', "I'M", 'NO', 'BETTER', "I'M", 'WORSE'] +1688-142285-0040-40: hyp=["I'M", 'BETTER', 'IN', 'NOT', 'BEING', 'TORN', 'TO', 'PIECES', 'BY', 'COUGHING', 'O', 'NIGHTS', 'BUT', "I'M", 'WEARY', 'AND', 'TIRED', 'O', 'MILTON', 'AND', 'LONGING', 'TO', 'GET', 'AWAY', 'TO', 'THE', 'LAND', 'OF', 'BOOLA', 'AND', 'WHEN', 'I', 'THINK', "I'M", 'FARTHER', 'AND', 'FARTHER', 'OFF', 'MY', 'HEART', 'SINKS', 'AND', "I'M", 'NO', 'BETTER', "I'M", 'WORSE'] +1688-142285-0041-41: ref=['MARGARET', 'TURNED', 'ROUND', 'TO', 'WALK', 'ALONGSIDE', 'OF', 'THE', 'GIRL', 'IN', 'HER', 'FEEBLE', 'PROGRESS', 'HOMEWARD'] +1688-142285-0041-41: hyp=['MARGARET', 'TURNED', 'ROUND', 'TO', 'WALK', 'ALONGSIDE', 'OF', 'THE', 'GIRL', 'IN', 'HER', 'FEEBLE', 'PROGRESS', 'HOMEWARDS'] +1688-142285-0042-42: ref=['BUT', 'FOR', 'A', 'MINUTE', 'OR', 'TWO', 'SHE', 'DID', 'NOT', 'SPEAK'] +1688-142285-0042-42: hyp=['BUT', 'FOR', 'A', 'MINUTE', 'OR', 'TWO', 'SHE', 'DID', 'NOT', 'SPEAK'] +1688-142285-0043-43: ref=['AT', 'LAST', 'SHE', 'SAID', 'IN', 'A', 'LOW', 'VOICE'] +1688-142285-0043-43: hyp=['AT', 'LAST', 'SHE', 'SAID', 'IN', 'A', 'LOW', 'VOICE'] +1688-142285-0044-44: ref=['BESSY', 'DO', 'YOU', 'WISH', 'TO', 'DIE'] +1688-142285-0044-44: hyp=['BESSY', 'DO', 'YOU', 'WISH', 'TO', 'DIE'] +1688-142285-0045-45: ref=['BESSY', 'WAS', 'SILENT', 'IN', 'HER', 'TURN', 'FOR', 'A', 'MINUTE', 'OR', 'TWO', 'THEN', 'SHE', 'REPLIED'] +1688-142285-0045-45: hyp=['BESSY', 'WAS', 'SILENT', 'IN', 'HER', 'TURN', 'FOR', 'A', 'MINUTE', 'OR', 'TWO', 'THEN', 'SHE', 'REPLIED'] +1688-142285-0046-46: ref=['NOUGHT', 'WORSE', 'THAN', 'MANY', 'OTHERS', 'I', 'RECKON'] +1688-142285-0046-46: hyp=['NOT', 'WORSE', 'THAN', 'MANY', 'OTHERS', 'I', 'RECKON'] +1688-142285-0047-47: ref=['BUT', 'WHAT', 'WAS', 'IT'] +1688-142285-0047-47: hyp=['BUT', 'WHAT', 'WAS', 'IT'] +1688-142285-0048-48: ref=['YOU', 'KNOW', "I'M", 'A', 'STRANGER', 'HERE', 'SO', 'PERHAPS', "I'M", 'NOT', 'SO', 'QUICK', 'AT', 'UNDERSTANDING', 'WHAT', 'YOU', 'MEAN', 'AS', 'IF', "I'D", 'LIVED', 'ALL', 'MY', 'LIFE', 'AT', 'MILTON'] +1688-142285-0048-48: hyp=['YOU', 'KNOW', "I'M", 'A', 'STRANGER', 'HERE', 'SO', 'PERHAPS', "I'M", 'NOT', 'SO', 'QUICK', 'AT', 'UNDERSTANDING', 'WHAT', 'YOU', 'MEAN', 'AS', 'IF', "I'D", 'LIVED', 'ALL', 'MY', 'LIFE', 'IN', 'MILTON'] +1688-142285-0049-49: ref=['I', 'HAD', 'FORGOTTEN', 'WHAT', 'I', 'SAID', 'FOR', 'THE', 'TIME', 'CONTINUED', 'MARGARET', 'QUIETLY'] +1688-142285-0049-49: hyp=['I', 'HAD', 'FORGOTTEN', 'WHAT', 'I', 'SAID', 'FOR', 'THE', 'TIME', 'CONTINUED', 'MARGARET', 'QUIETLY'] +1688-142285-0050-50: ref=['I', 'SHOULD', 'HAVE', 'THOUGHT', 'OF', 'IT', 'AGAIN', 'WHEN', 'I', 'WAS', 'LESS', 'BUSY', 'MAY', 'I', 'GO', 'WITH', 'YOU', 'NOW'] +1688-142285-0050-50: hyp=['I', 'SHOULD', 'HAVE', 'THOUGHT', 'OF', 'IT', 'AGAIN', 'WHEN', 'I', 'WAS', 'LESS', 'BUSY', 'MAY', 'I', 'GO', 'WITH', 'YOU', 'NOW'] +1688-142285-0051-51: ref=['THE', 'SHARPNESS', 'IN', 'HER', 'EYE', 'TURNED', 'TO', 'A', 'WISTFUL', 'LONGING', 'AS', 'SHE', 'MET', "MARGARET'S", 'SOFT', 'AND', 'FRIENDLY', 'GAZE'] +1688-142285-0051-51: hyp=['THE', 'SHARPNESS', 'IN', 'HER', 'EYE', 'TURNED', 'TO', 'A', 'WISTFUL', 'LONGING', 'AS', 'SHE', 'MET', "MARGARET'S", 'SOFT', 'AND', 'FRIENDLY', 'GAZE'] +1688-142285-0052-52: ref=['AS', 'THEY', 'TURNED', 'UP', 'INTO', 'A', 'SMALL', 'COURT', 'OPENING', 'OUT', 'OF', 'A', 'SQUALID', 'STREET', 'BESSY', 'SAID'] +1688-142285-0052-52: hyp=['AS', 'THEY', 'TURNED', 'UP', 'INTO', 'A', 'SMALL', 'COURT', 'OPENING', 'OUT', 'INTO', 'A', 'SQUALID', 'STREET', 'BESSY', 'SAID'] +1688-142285-0053-53: ref=["YO'LL", 'NOT', 'BE', 'DAUNTED', 'IF', "FATHER'S", 'AT', 'HOME', 'AND', 'SPEAKS', 'A', 'BIT', 'GRUFFISH', 'AT', 'FIRST'] +1688-142285-0053-53: hyp=["YOU'LL", 'NOT', 'BE', 'DAUNTED', 'IF', "FATHER'S", 'AT', 'HOME', 'AND', 'SPEAKS', 'A', 'BIT', 'GRUFFISH', 'AT', 'FIRST'] +1688-142285-0054-54: ref=['BUT', 'NICHOLAS', 'WAS', 'NOT', 'AT', 'HOME', 'WHEN', 'THEY', 'ENTERED'] +1688-142285-0054-54: hyp=['BUT', 'NICHOLAS', 'WAS', 'NOT', 'AT', 'HOME', 'WHEN', 'THEY', 'ENTERED'] +1688-142285-0055-55: ref=['GASPED', 'BESSY', 'AT', 'LAST'] +1688-142285-0055-55: hyp=['GASPED', 'BESSY', 'AT', 'LAST'] +1688-142285-0056-56: ref=['BESSY', 'TOOK', 'A', 'LONG', 'AND', 'FEVERISH', 'DRAUGHT', 'AND', 'THEN', 'FELL', 'BACK', 'AND', 'SHUT', 'HER', 'EYES'] +1688-142285-0056-56: hyp=['BESSY', 'TOOK', 'A', 'LONG', 'AND', 'FEVERISH', 'DRAUGHT', 'AND', 'THEN', 'FELL', 'BACK', 'AND', 'SHUT', 'HER', 'EYES'] +1688-142285-0057-57: ref=['MARGARET', 'BENT', 'OVER', 'AND', 'SAID', 'BESSY', "DON'T", 'BE', 'IMPATIENT', 'WITH', 'YOUR', 'LIFE', 'WHATEVER', 'IT', 'IS', 'OR', 'MAY', 'HAVE', 'BEEN'] +1688-142285-0057-57: hyp=['MARGARET', 'BENT', 'OVER', 'AND', 'SAID', 'BESSY', "DON'T", 'BE', 'IMPATIENT', 'WITH', 'YOUR', 'LIFE', 'WHATEVER', 'IT', 'IS', 'OR', 'MAY', 'HAVE', 'BEEN'] +1688-142285-0058-58: ref=['REMEMBER', 'WHO', 'GAVE', 'IT', 'YOU', 'AND', 'MADE', 'IT', 'WHAT', 'IT', 'IS'] +1688-142285-0058-58: hyp=['REMEMBER', 'WHO', 'GAVE', 'IT', 'TO', 'YOU', 'AND', 'MADE', 'IT', 'WHAT', 'IT', 'IS'] +1688-142285-0059-59: ref=['NOW', "I'LL", 'NOT', 'HAVE', 'MY', 'WENCH', 'PREACHED', 'TO'] +1688-142285-0059-59: hyp=['NOW', "I'LL", 'NOT', 'HAVE', 'MY', 'WENCH', 'PREACHED', 'TO'] +1688-142285-0060-60: ref=['BUT', 'SURELY', 'SAID', 'MARGARET', 'FACING', 'ROUND', 'YOU', 'BELIEVE', 'IN', 'WHAT', 'I', 'SAID', 'THAT', 'GOD', 'GAVE', 'HER', 'LIFE', 'AND', 'ORDERED', 'WHAT', 'KIND', 'OF', 'LIFE', 'IT', 'WAS', 'TO', 'BE'] +1688-142285-0060-60: hyp=['BUT', 'SURELY', 'SAID', 'MARGARET', 'FACING', 'ROUND', 'YOU', 'BELIEVE', 'IN', 'WHAT', 'I', 'SAID', 'THAT', 'GOD', 'GAVE', 'HER', 'LIFE', 'AND', 'ORDERED', 'WHAT', 'KIND', 'OF', 'LIFE', 'IT', 'WAS', 'TO', 'BE'] +1688-142285-0061-61: ref=['I', 'BELIEVE', 'WHAT', 'I', 'SEE', 'AND', 'NO', 'MORE'] +1688-142285-0061-61: hyp=['I', 'BELIEVE', 'WHAT', 'I', 'SEE', 'AND', 'NO', 'MORE'] +1688-142285-0062-62: ref=["THAT'S", 'WHAT', 'I', 'BELIEVE', 'YOUNG', 'WOMAN'] +1688-142285-0062-62: hyp=["THAT'S", 'WHAT', 'I', 'BELIEVE', 'YOUNG', 'WOMAN'] +1688-142285-0063-63: ref=['I', "DON'T", 'BELIEVE', 'ALL', 'I', 'HEAR', 'NO', 'NOT', 'BY', 'A', 'BIG', 'DEAL'] +1688-142285-0063-63: hyp=['I', "DON'T", 'BELIEVE', 'ALL', 'I', 'HEAR', 'NO', 'NOT', 'BY', 'A', 'BIG', 'DEAL'] +1688-142285-0064-64: ref=['BUT', "HOO'S", 'COME', 'AT', 'LAST', 'AND', "HOO'S", 'WELCOME', 'AS', 'LONG', 'AS', "HOO'LL", 'KEEP', 'FROM', 'PREACHING', 'ON', 'WHAT', 'HOO', 'KNOWS', 'NOUGHT', 'ABOUT'] +1688-142285-0064-64: hyp=['BUT', "WHO'S", 'COME', 'AT', 'LAST', 'AND', "WHO'S", 'WELCOME', 'AS', 'LONG', 'AS', "HE'LL", 'KEEP', 'FROM', 'PREACHING', 'ON', 'WHAT', 'HE', 'KNOWS', 'NOUGHT', 'ABOUT'] +1688-142285-0065-65: ref=["IT'S", 'SIMPLE', 'AND', 'NOT', 'FAR', 'TO', 'FETCH', 'NOR', 'HARD', 'TO', 'WORK'] +1688-142285-0065-65: hyp=["IT'S", 'SIMPLE', 'AND', 'NOT', 'FAR', 'TO', 'FETCH', 'NOR', 'HARD', 'TO', 'WORK'] +1688-142285-0066-66: ref=['BUT', 'THE', 'GIRL', 'ONLY', 'PLEADED', 'THE', 'MORE', 'WITH', 'MARGARET'] +1688-142285-0066-66: hyp=['BUT', 'THE', 'GIRL', 'ONLY', 'PLEADED', 'THE', 'MORE', 'WITH', 'MARGARET'] +1688-142285-0067-67: ref=["DON'T", 'THINK', 'HARDLY', 'ON', 'HIM', "HE'S", 'A', 'GOOD', 'MAN', 'HE', 'IS'] +1688-142285-0067-67: hyp=["DON'T", 'THINK', 'HARDLY', 'ON', 'HIM', "HE'S", 'A', 'GOOD', 'MAN', 'HE', 'IS'] +1688-142285-0068-68: ref=['I', 'SOMETIMES', 'THINK', 'I', 'SHALL', 'BE', 'MOPED', 'WI', 'SORROW', 'EVEN', 'IN', 'THE', 'CITY', 'OF', 'GOD', 'IF', 'FATHER', 'IS', 'NOT', 'THERE'] +1688-142285-0068-68: hyp=['I', 'SOMETIMES', 'THINK', 'I', 'SHALL', 'BE', 'MILKED', 'WITH', 'SORROW', 'EVEN', 'IN', 'THE', 'CITY', 'OF', 'GOD', 'IF', 'FATHER', 'IS', 'NOT', 'THERE'] +1688-142285-0069-69: ref=['THE', 'FEVERISH', 'COLOUR', 'CAME', 'INTO', 'HER', 'CHEEK', 'AND', 'THE', 'FEVERISH', 'FLAME', 'INTO', 'HER', 'EYE'] +1688-142285-0069-69: hyp=['THE', 'FEVERISH', 'COLOUR', 'CAME', 'INTO', 'HER', 'CHEEKS', 'AND', 'THE', 'FEVERISH', 'FLAME', 'INTO', 'HER', 'EYE'] +1688-142285-0070-70: ref=['BUT', 'YOU', 'WILL', 'BE', 'THERE', 'FATHER', 'YOU', 'SHALL', 'OH', 'MY', 'HEART'] +1688-142285-0070-70: hyp=['BUT', "YOU'LL", 'BE', 'THERE', 'FATHER', 'YOU', 'SHALL', 'OH', 'MY', 'HEART'] +1688-142285-0071-71: ref=['SHE', 'PUT', 'HER', 'HAND', 'TO', 'IT', 'AND', 'BECAME', 'GHASTLY', 'PALE'] +1688-142285-0071-71: hyp=['SHE', 'PUT', 'HER', 'HAND', 'TO', 'IT', 'AND', 'BECAME', 'GHASTLY', 'PALE'] +1688-142285-0072-72: ref=['MARGARET', 'HELD', 'HER', 'IN', 'HER', 'ARMS', 'AND', 'PUT', 'THE', 'WEARY', 'HEAD', 'TO', 'REST', 'UPON', 'HER', 'BOSOM'] +1688-142285-0072-72: hyp=['MARGARET', 'HELD', 'HER', 'IN', 'HER', 'ARMS', 'AND', 'PUT', 'THE', 'WEARY', 'HEAD', 'TO', 'REST', 'UPON', 'HER', 'BOSOM'] +1688-142285-0073-73: ref=['PRESENTLY', 'THE', 'SPASM', 'THAT', 'FORESHADOWED', 'DEATH', 'HAD', 'PASSED', 'AWAY', 'AND', 'BESSY', 'ROUSED', 'HERSELF', 'AND', 'SAID'] +1688-142285-0073-73: hyp=['PRESENTLY', 'THE', 'SPASM', 'THAT', 'FORESHADOWED', 'DEATH', 'HAD', 'PASSED', 'AWAY', 'AND', 'BESSY', 'ROUSED', 'HERSELF', 'AND', 'SAID'] +1688-142285-0074-74: ref=["I'LL", 'GO', 'TO', 'BED', "IT'S", 'BEST', 'PLACE', 'BUT', 'CATCHING', 'AT', "MARGARET'S", 'GOWN', "YO'LL", 'COME', 'AGAIN', 'I', 'KNOW', 'YO', 'WILL', 'BUT', 'JUST', 'SAY', 'IT'] +1688-142285-0074-74: hyp=["I'LL", 'GO', 'TO', 'BED', 'ITS', 'BEST', 'PLACE', 'BUT', 'CATCHING', 'AT', "MARGARET'S", 'GOWN', "YOU'LL", 'COME', 'AGAIN', 'I', 'KNOW', 'YOU', 'WILL', 'BUT', 'JUST', 'SAY', 'IT'] +1688-142285-0075-75: ref=['I', 'WILL', 'COME', 'TO', 'MORROW', 'SAID', 'MARGARET'] +1688-142285-0075-75: hyp=['OH', 'COME', 'TO', 'MORROW', 'SAID', 'MARGARET'] +1688-142285-0076-76: ref=['MARGARET', 'WENT', 'AWAY', 'VERY', 'SAD', 'AND', 'THOUGHTFUL'] +1688-142285-0076-76: hyp=['MARGARET', 'WENT', 'AWAY', 'VERY', 'SAD', 'AND', 'THOUGHTFUL'] +1688-142285-0077-77: ref=['SHE', 'WAS', 'LATE', 'FOR', 'TEA', 'AT', 'HOME'] +1688-142285-0077-77: hyp=['SHE', 'WAS', 'LATE', 'FOR', 'TEA', 'AT', 'HOME'] +1688-142285-0078-78: ref=['HAVE', 'YOU', 'MET', 'WITH', 'A', 'SERVANT', 'DEAR'] +1688-142285-0078-78: hyp=['HAVE', 'YOU', 'MET', 'WITH', 'A', 'SERVANT', 'DEAR'] +1688-142285-0079-79: ref=['NO', 'MAMMA', 'THAT', 'ANNE', 'BUCKLEY', 'WOULD', 'NEVER', 'HAVE', 'DONE'] +1688-142285-0079-79: hyp=['NO', 'MAMMA', 'THAT', 'ANNE', 'BUCKLEY', 'WOULD', 'NEVER', 'HAVE', 'DONE'] +1688-142285-0080-80: ref=['SUPPOSE', 'I', 'TRY', 'SAID', 'MISTER', 'HALE'] +1688-142285-0080-80: hyp=["S'POSE", 'I', 'TRY', 'SAID', 'MISTER', 'HALE'] +1688-142285-0081-81: ref=['EVERYBODY', 'ELSE', 'HAS', 'HAD', 'THEIR', 'TURN', 'AT', 'THIS', 'GREAT', 'DIFFICULTY', 'NOW', 'LET', 'ME', 'TRY'] +1688-142285-0081-81: hyp=['EVERYBODY', 'ELSE', 'HAS', 'HAD', 'THEIR', 'TURN', 'AT', 'THIS', 'GREAT', 'DIFFICULTY', 'NOW', 'LET', 'ME', 'TRY'] +1688-142285-0082-82: ref=['I', 'MAY', 'BE', 'THE', 'CINDERELLA', 'TO', 'PUT', 'ON', 'THE', 'SLIPPER', 'AFTER', 'ALL'] +1688-142285-0082-82: hyp=['I', 'MAY', 'BE', 'THE', 'CINDERELLA', 'TO', 'PUT', 'ON', 'THE', 'SLIPPER', 'AFTER', 'ALL'] +1688-142285-0083-83: ref=['WHAT', 'WOULD', 'YOU', 'DO', 'PAPA', 'HOW', 'WOULD', 'YOU', 'SET', 'ABOUT', 'IT'] +1688-142285-0083-83: hyp=['WHAT', 'WOULD', 'YOU', 'DO', 'PAPA', 'HOW', 'WOULD', 'YOU', 'SET', 'ABOUT', 'IT'] +1688-142285-0084-84: ref=['WHY', 'I', 'WOULD', 'APPLY', 'TO', 'SOME', 'GOOD', 'HOUSE', 'MOTHER', 'TO', 'RECOMMEND', 'ME', 'ONE', 'KNOWN', 'TO', 'HERSELF', 'OR', 'HER', 'SERVANTS'] +1688-142285-0084-84: hyp=['WELL', 'I', 'I', 'WOULD', 'APPLY', 'IT', 'TO', 'SOME', 'GOOD', 'HOUSEMOTHER', 'TO', 'RECOMMEND', 'ME', 'ONE', 'KNOWN', 'TO', 'HERSELF', 'OR', 'HER', 'SERVANTS'] +1688-142285-0085-85: ref=['VERY', 'GOOD', 'BUT', 'WE', 'MUST', 'FIRST', 'CATCH', 'OUR', 'HOUSE', 'MOTHER'] +1688-142285-0085-85: hyp=['VERY', 'GOOD', 'BUT', 'WE', 'MUST', 'FIRST', 'CATCH', 'OUR', 'HOUSE', 'MOTHER'] +1688-142285-0086-86: ref=['THE', 'MOTHER', 'OF', 'WHOM', 'HE', 'SPOKE', 'TO', 'US', 'SAID', 'MARGARET'] +1688-142285-0086-86: hyp=['THE', 'MOTHER', 'OF', 'WHOM', 'YOU', 'SPOKE', 'TO', 'US', 'SAID', 'MARGARET'] +1688-142285-0087-87: ref=['MISSUS', 'THORNTON', 'THE', 'ONLY', 'MOTHER', 'HE', 'HAS', 'I', 'BELIEVE', 'SAID', 'MISTER', 'HALE', 'QUIETLY'] +1688-142285-0087-87: hyp=['MISTER', 'THORNTON', 'THE', 'ONLY', 'MOTHER', 'HE', 'HAS', 'I', 'BELIEVE', 'SAID', 'MISTER', 'HALE', 'QUIETLY'] +1688-142285-0088-88: ref=['I', 'SHALL', 'LIKE', 'TO', 'SEE', 'HER', 'SHE', 'MUST', 'BE', 'AN', 'UNCOMMON', 'PERSON', 'HER', 'MOTHER', 'ADDED'] +1688-142285-0088-88: hyp=['I', 'SHALL', 'LIKE', 'TO', 'SEE', 'HER', 'SHE', 'MUST', 'BE', 'AN', 'UNCOMMON', 'PERSON', 'HER', 'MOTHER', 'ADDED'] +1688-142285-0089-89: ref=['PERHAPS', 'SHE', 'MAY', 'HAVE', 'A', 'RELATION', 'WHO', 'MIGHT', 'SUIT', 'US', 'AND', 'BE', 'GLAD', 'OF', 'OUR', 'PLACE'] +1688-142285-0089-89: hyp=['PERHAPS', 'SHE', 'MAY', 'HAVE', 'A', 'RELATION', 'WHO', 'MIGHT', 'SUIT', 'US', 'AND', 'BE', 'GLAD', 'OF', 'OUR', 'PLACE'] +1688-142285-0090-90: ref=['SHE', 'SOUNDED', 'TO', 'BE', 'SUCH', 'A', 'CAREFUL', 'ECONOMICAL', 'PERSON', 'THAT', 'I', 'SHOULD', 'LIKE', 'ANY', 'ONE', 'OUT', 'OF', 'THE', 'SAME', 'FAMILY'] +1688-142285-0090-90: hyp=['SHE', 'SOUNDED', 'TO', 'BE', 'SUCH', 'A', 'CAREFUL', 'ECONOMICAL', 'PERSON', 'THAT', 'I', 'SHOULD', 'LIKE', 'ANY', 'ONE', 'OUT', 'OF', 'THE', 'SAME', 'FAMILY'] +1688-142285-0091-91: ref=['MY', 'DEAR', 'SAID', 'MISTER', 'HALE', 'ALARMED', 'PRAY', "DON'T", 'GO', 'OFF', 'ON', 'THAT', 'IDEA'] +1688-142285-0091-91: hyp=['MY', 'DEAR', 'SAID', 'MISTER', 'HALE', 'ALARMED', 'PRAY', "DON'T", 'GO', 'OFF', 'ON', 'THAT', 'IDEA'] +1688-142285-0092-92: ref=['I', 'AM', 'SURE', 'AT', 'ANY', 'RATE', 'SHE', 'WOULD', 'NOT', 'LIKE', 'STRANGERS', 'TO', 'KNOW', 'ANYTHING', 'ABOUT', 'IT'] +1688-142285-0092-92: hyp=['I', 'AM', 'SURE', 'AT', 'ANY', 'RATE', 'SHE', 'WOULD', 'NOT', 'LIKE', 'STRANGERS', 'TO', 'KNOW', 'ANYTHING', 'ABOUT', 'IT'] +1688-142285-0093-93: ref=['TAKE', 'NOTICE', 'THAT', 'IS', 'NOT', 'MY', 'KIND', 'OF', 'HAUGHTINESS', 'PAPA', 'IF', 'I', 'HAVE', 'ANY', 'AT', 'ALL', 'WHICH', 'I', "DON'T", 'AGREE', 'TO', 'THOUGH', "YOU'RE", 'ALWAYS', 'ACCUSING', 'ME', 'OF', 'IT'] +1688-142285-0093-93: hyp=['TAKE', 'NOTICE', 'THAT', 'THIS', 'IS', 'NOT', 'MY', 'KIND', 'OF', 'HAUGHTINESS', 'PAPA', 'IF', 'I', 'HAVE', 'ANY', 'AT', 'ALL', 'WHICH', 'I', "DON'T", 'AGREE', 'TO', 'THOUGH', "YOU'RE", 'ALWAYS', 'ACCUSING', 'ME', 'OF', 'IT'] +1688-142285-0094-94: ref=['I', "DON'T", 'KNOW', 'POSITIVELY', 'THAT', 'IT', 'IS', 'HERS', 'EITHER', 'BUT', 'FROM', 'LITTLE', 'THINGS', 'I', 'HAVE', 'GATHERED', 'FROM', 'HIM', 'I', 'FANCY', 'SO'] +1688-142285-0094-94: hyp=['I', "DON'T", 'KNOW', 'POSITIVELY', 'THAT', 'IT', 'IS', 'HERS', 'EITHER', 'BUT', 'FROM', 'LITTLE', 'THINGS', 'I', 'HAVE', 'GATHERED', 'FROM', 'HIM', 'I', 'FANCY', 'SO'] +1688-142285-0095-95: ref=['THEY', 'CARED', 'TOO', 'LITTLE', 'TO', 'ASK', 'IN', 'WHAT', 'MANNER', 'HER', 'SON', 'HAD', 'SPOKEN', 'ABOUT', 'HER'] +1688-142285-0095-95: hyp=['THEY', 'CARED', 'TOO', 'LITTLE', 'TO', 'ASK', 'IN', 'WHAT', 'MANNER', 'HER', 'SON', 'HAD', 'SPOKEN', 'ABOUT', 'HER'] +1998-15444-0000-96: ref=['IF', 'CALLED', 'TO', 'A', 'CASE', 'SUPPOSED', 'OR', 'SUSPECTED', 'TO', 'BE', 'ONE', 'OF', 'POISONING', 'THE', 'MEDICAL', 'MAN', 'HAS', 'TWO', 'DUTIES', 'TO', 'PERFORM', 'TO', 'SAVE', 'THE', "PATIENT'S", 'LIFE', 'AND', 'TO', 'PLACE', 'HIMSELF', 'IN', 'A', 'POSITION', 'TO', 'GIVE', 'EVIDENCE', 'IF', 'CALLED', 'ON', 'TO', 'DO', 'SO'] +1998-15444-0000-96: hyp=['IF', 'CALLED', 'TO', 'A', 'CASE', 'SUPPOSED', 'OR', 'SUSPECTED', 'TO', 'BE', 'ONE', 'OF', 'POISONING', 'THE', 'MEDICAL', 'MAN', 'HAS', 'TWO', 'DUTIES', 'TO', 'PERFORM', 'TO', 'SAVE', 'THE', "PATIENT'S", 'LIFE', 'AND', 'TO', 'PLACE', 'HIMSELF', 'IN', 'A', 'POSITION', 'TO', 'GIVE', 'EVIDENCE', 'IF', 'CALLED', 'ON', 'TO', 'DO', 'SO'] +1998-15444-0001-97: ref=['HE', 'SHOULD', 'MAKE', 'INQUIRIES', 'AS', 'TO', 'SYMPTOMS', 'AND', 'TIME', 'AT', 'WHICH', 'FOOD', 'OR', 'MEDICINE', 'WAS', 'LAST', 'TAKEN'] +1998-15444-0001-97: hyp=['HE', 'SHOULD', 'MAKE', 'INQUIRIES', 'AS', 'TO', 'SYMPTOMS', 'AND', 'TIME', 'AT', 'WHICH', 'FOOD', 'OR', 'MEDICINE', 'WAS', 'LAST', 'TAKEN'] +1998-15444-0002-98: ref=['HE', 'SHOULD', 'NOTICE', 'THE', 'POSITION', 'AND', 'TEMPERATURE', 'OF', 'THE', 'BODY', 'THE', 'CONDITION', 'OF', 'RIGOR', 'MORTIS', 'MARKS', 'OF', 'VIOLENCE', 'APPEARANCE', 'OF', 'LIPS', 'AND', 'MOUTH'] +1998-15444-0002-98: hyp=['HE', 'SHOULD', 'NOTICE', 'THE', 'POSITION', 'AND', 'TEMPERATURE', 'OF', 'THE', 'BODY', 'THE', 'CONDITION', 'OF', 'RIGOR', 'MORTIS', 'MARKS', 'OF', 'VIOLENCE', 'APPEARANCE', 'OF', 'LIPS', 'AND', 'MOUTH'] +1998-15444-0003-99: ref=['IN', 'MAKING', 'A', 'POST', 'MORTEM', 'EXAMINATION', 'THE', 'ALIMENTARY', 'CANAL', 'SHOULD', 'BE', 'REMOVED', 'AND', 'PRESERVED', 'FOR', 'FURTHER', 'INVESTIGATION'] +1998-15444-0003-99: hyp=['IN', 'MAKING', 'A', 'POST', 'MORTEM', 'EXAMINATION', 'THE', 'ELEMENTARY', 'CANAL', 'SHOULD', 'BE', 'REMOVED', 'AND', 'PRESERVED', 'FOR', 'FURTHER', 'INVESTIGATION'] +1998-15444-0004-100: ref=['THE', 'GUT', 'AND', 'THE', 'GULLET', 'BEING', 'CUT', 'ACROSS', 'BETWEEN', 'THESE', 'LIGATURES', 'THE', 'STOMACH', 'MAY', 'BE', 'REMOVED', 'ENTIRE', 'WITHOUT', 'SPILLING', 'ITS', 'CONTENTS'] +1998-15444-0004-100: hyp=['THE', 'GUT', 'AND', 'THE', 'GULLET', 'BEING', 'CUT', 'ACROSS', 'BETWEEN', 'THESE', 'LIGATURES', 'THE', 'STOMACH', 'MAY', 'BE', 'REMOVED', 'ENTIRE', 'WITHOUT', 'SPINNING', 'ITS', 'CONTENTS'] +1998-15444-0005-101: ref=['IF', 'THE', 'MEDICAL', 'PRACTITIONER', 'IS', 'IN', 'DOUBT', 'ON', 'ANY', 'POINT', 'HE', 'SHOULD', 'OBTAIN', 'TECHNICAL', 'ASSISTANCE', 'FROM', 'SOMEONE', 'WHO', 'HAS', 'PAID', 'ATTENTION', 'TO', 'THE', 'SUBJECT'] +1998-15444-0005-101: hyp=['IF', 'THE', 'MEDICAL', 'PRACTITIONER', 'IS', 'IN', 'DOUBT', 'ON', 'ANY', 'POINT', 'HE', 'SHOULD', 'OBTAIN', 'TECHNICAL', 'ASSISTANCE', 'FROM', 'SOME', 'ONE', 'WHO', 'HAS', 'PAID', 'ATTENTION', 'TO', 'THE', 'SUBJECT'] +1998-15444-0006-102: ref=['IN', 'A', 'CASE', 'OF', 'ATTEMPTED', 'SUICIDE', 'BY', 'POISONING', 'IS', 'IT', 'THE', 'DUTY', 'OF', 'THE', 'DOCTOR', 'TO', 'INFORM', 'THE', 'POLICE'] +1998-15444-0006-102: hyp=['IN', 'A', 'CASE', 'OF', 'ATTEMPTED', 'SUICIDE', 'BY', 'POISONING', 'IS', 'IT', 'THE', 'DUTY', 'OF', 'THE', 'DOCTOR', 'TO', 'INFORM', 'THE', 'POLICE'] +1998-15444-0007-103: ref=['THE', 'BEST', 'EMETIC', 'IS', 'THAT', 'WHICH', 'IS', 'AT', 'HAND'] +1998-15444-0007-103: hyp=['THE', 'BEST', 'AMATIC', 'IS', 'THAT', 'WHICH', 'IS', 'AT', 'HAND'] +1998-15444-0008-104: ref=['THE', 'DOSE', 'FOR', 'AN', 'ADULT', 'IS', 'TEN', 'MINIMS'] +1998-15444-0008-104: hyp=['THE', 'DOSE', 'FOR', 'AN', 'ADULT', 'IS', 'TEN', 'MINIMS'] +1998-15444-0009-105: ref=['APOMORPHINE', 'IS', 'NOT', 'ALLIED', 'IN', 'PHYSIOLOGICAL', 'ACTION', 'TO', 'MORPHINE', 'AND', 'MAY', 'BE', 'GIVEN', 'IN', 'CASES', 'OF', 'NARCOTIC', 'POISONING'] +1998-15444-0009-105: hyp=['APOMORPHINE', 'IS', 'NOT', 'ALLIED', 'IN', 'PHYSIOLOGICAL', 'ACTION', 'TO', 'MORPHINE', 'AND', 'MAY', 'BE', 'GIVEN', 'IN', 'CASES', 'OF', 'NARCOTIC', 'POISONING'] +1998-15444-0010-106: ref=['TICKLING', 'THE', 'FAUCES', 'WITH', 'A', 'FEATHER', 'MAY', 'EXCITE', 'VOMITING'] +1998-15444-0010-106: hyp=['TICKLING', 'THE', 'PHOCES', 'WITH', 'A', 'FEATHER', 'MAKES', 'OUT', 'VOMITING'] +1998-15444-0011-107: ref=['IN', 'USING', 'THE', 'ELASTIC', 'STOMACH', 'TUBE', 'SOME', 'FLUID', 'SHOULD', 'BE', 'INTRODUCED', 'INTO', 'THE', 'STOMACH', 'BEFORE', 'ATTEMPTING', 'TO', 'EMPTY', 'IT', 'OR', 'A', 'PORTION', 'OF', 'THE', 'MUCOUS', 'MEMBRANE', 'MAY', 'BE', 'SUCKED', 'INTO', 'THE', 'APERTURE'] +1998-15444-0011-107: hyp=['IN', 'USING', 'THE', 'ELASTIC', 'STOMACH', 'TUBE', 'SOME', 'FLUID', 'SHOULD', 'BE', 'INTRODUCED', 'INTO', 'THE', 'STOMACH', 'BEFORE', 'ATTEMPTING', 'TO', 'EMPTY', 'IT', 'OR', 'A', 'PORTION', 'OF', 'THE', 'MUCOUS', 'MEMBRANE', 'MAY', 'BE', 'SUCKED', 'INTO', 'THE', 'APERTURE'] +1998-15444-0012-108: ref=['THE', 'TUBE', 'SHOULD', 'BE', 'EXAMINED', 'TO', 'SEE', 'THAT', 'IT', 'IS', 'NOT', 'BROKEN', 'OR', 'CRACKED', 'AS', 'ACCIDENTS', 'HAVE', 'HAPPENED', 'FROM', 'NEGLECTING', 'THIS', 'PRECAUTION'] +1998-15444-0012-108: hyp=['THE', 'TUBE', 'SHOULD', 'BE', 'EXAMINED', 'TO', 'SEE', 'THAT', 'IT', 'IS', 'NOT', 'BROKEN', 'OR', 'CRACKED', 'AS', 'ACCIDENTS', 'HAVE', 'HAPPENED', 'FROM', 'NEGLECTING', 'THIS', 'PRECAUTION'] +1998-15444-0013-109: ref=['ANTIDOTES', 'ARE', 'USUALLY', 'GIVEN', 'HYPODERMICALLY', 'OR', 'IF', 'BY', 'MOUTH', 'IN', 'THE', 'FORM', 'OF', 'TABLETS'] +1998-15444-0013-109: hyp=['ANTIDOTES', 'ARE', 'USUALLY', 'GIVEN', 'HYPODERMICALLY', 'OR', 'IF', 'BY', 'MOUTH', 'IN', 'THE', 'FORM', 'OF', 'TABLETS'] +1998-15444-0014-110: ref=['IN', 'THE', 'ABSENCE', 'OF', 'A', 'HYPODERMIC', 'SYRINGE', 'THE', 'REMEDY', 'MAY', 'BE', 'GIVEN', 'BY', 'THE', 'RECTUM'] +1998-15444-0014-110: hyp=['IN', 'THE', 'ABSENCE', 'OF', 'A', 'HYPODERMIC', 'SYRINGE', 'THE', 'REMEDY', 'MAY', 'BE', 'GIVEN', 'BY', 'THE', 'RECTUM'] +1998-15444-0015-111: ref=['NOTICE', 'THE', 'SMELL', 'COLOUR', 'AND', 'GENERAL', 'APPEARANCE', 'OF', 'THE', 'MATTER', 'SUBMITTED', 'FOR', 'EXAMINATION'] +1998-15444-0015-111: hyp=['NOTICE', 'THE', 'SMELL', 'COLOR', 'AND', 'GENERAL', 'APPEARANCE', 'OF', 'THE', 'MATTER', 'SUBMITTED', 'FOR', 'EXAMINATION'] +1998-15444-0016-112: ref=['FOR', 'THE', 'SEPARATION', 'OF', 'AN', 'ALKALOID', 'THE', 'FOLLOWING', 'IS', 'THE', 'PROCESS', 'OF', 'STAS', 'OTTO'] +1998-15444-0016-112: hyp=['FOR', 'THE', 'SEPARATION', 'OF', 'AN', 'ALKALOID', 'THE', 'FOLLOWING', 'IS', 'THE', 'PROCESS', 'OF', 'STARS', 'OTTO'] +1998-15444-0017-113: ref=['THIS', 'PROCESS', 'IS', 'BASED', 'UPON', 'THE', 'PRINCIPLE', 'THAT', 'THE', 'SALTS', 'OF', 'THE', 'ALKALOIDS', 'ARE', 'SOLUBLE', 'IN', 'ALCOHOL', 'AND', 'WATER', 'AND', 'INSOLUBLE', 'IN', 'ETHER'] +1998-15444-0017-113: hyp=['THIS', 'PROCESS', 'IS', 'BASED', 'UPON', 'THE', 'PRINCIPLE', 'THAT', 'THE', 'SALTS', 'OF', 'THE', 'AKALITES', 'ARE', 'SOLUBLE', 'IN', 'ALKALINE', 'WATER', 'AND', 'INSOLUBLE', 'IN', 'ETHER'] +1998-15444-0018-114: ref=['THE', 'PURE', 'ALKALOIDS', 'WITH', 'THE', 'EXCEPTION', 'OF', 'MORPHINE', 'IN', 'ITS', 'CRYSTALLINE', 'FORM', 'ARE', 'SOLUBLE', 'IN', 'ETHER'] +1998-15444-0018-114: hyp=['THE', 'PURE', 'AKALOIDS', 'WITH', 'THE', 'EXCEPTION', 'OF', 'MORPHINE', 'IN', 'ITS', 'CRYSTALLINE', 'FORM', 'ARE', 'SOLUBLE', 'IN', 'ETHER'] +1998-15444-0019-115: ref=['TWO', 'COOL', 'THE', 'MIXTURE', 'AND', 'FILTER', 'WASH', 'THE', 'RESIDUE', 'WITH', 'STRONG', 'ALCOHOL', 'AND', 'MIX', 'THE', 'FILTRATES'] +1998-15444-0019-115: hyp=['TWO', 'COOL', 'THE', 'MIXTURE', 'AND', 'FILTER', 'WASH', 'THE', 'RESIDUE', 'WITH', 'STRONG', 'ALCOHOL', 'AND', 'MIX', 'THE', 'FULL', 'TRADES'] +1998-15444-0020-116: ref=['THE', 'RESIDUE', 'MAY', 'BE', 'SET', 'ASIDE', 'FOR', 'THE', 'DETECTION', 'OF', 'THE', 'METALLIC', 'POISONS', 'IF', 'SUSPECTED', 'EXPEL', 'THE', 'ALCOHOL', 'BY', 'CAREFUL', 'EVAPORATION'] +1998-15444-0020-116: hyp=['THE', 'RESIDUE', 'MAY', 'BE', 'SET', 'ASIDE', 'FOR', 'THE', 'DETECTION', 'OF', 'THE', 'METALLIC', 'POISONS', 'IF', 'SUSPECTED', 'EXPEL', 'THE', 'ALCOHOL', 'BY', 'CAREFUL', 'EVAPORATION'] +1998-15444-0021-117: ref=['ON', 'THE', 'EVAPORATION', 'OF', 'THE', 'ALCOHOL', 'THE', 'RESINOUS', 'AND', 'FATTY', 'MATTERS', 'SEPARATE'] +1998-15444-0021-117: hyp=['ON', 'THE', 'EVAPORATION', 'OF', 'THE', 'ALCOHOL', 'THE', 'VESINOUS', 'AND', 'FATIMATAS', 'SEPARATE'] +1998-15444-0022-118: ref=['EVAPORATE', 'THE', 'FILTRATE', 'TO', 'A', 'SYRUP', 'AND', 'EXTRACT', 'WITH', 'SUCCESSIVE', 'PORTIONS', 'OF', 'ABSOLUTE', 'ALCOHOL'] +1998-15444-0022-118: hyp=['EVAPORATE', 'THE', 'FILDRATE', 'TO', 'A', 'SYRUP', 'AND', 'EXTRACT', 'WITH', 'SUCCESSIVE', 'PORTIONS', 'OF', 'ABSOLUTE', 'ALCOHOL'] +1998-15444-0023-119: ref=['SEPARATE', 'THE', 'ETHEREAL', 'SOLUTION', 'AND', 'EVAPORATE'] +1998-15444-0023-119: hyp=['SEPARATE', 'THE', 'ETHEREAL', 'SOLUTION', 'AND', 'EVAPORATE'] +1998-15444-0024-120: ref=['FIVE', 'A', 'PART', 'OF', 'THIS', 'ETHEREAL', 'SOLUTION', 'IS', 'POURED', 'INTO', 'A', 'WATCH', 'GLASS', 'AND', 'ALLOWED', 'TO', 'EVAPORATE'] +1998-15444-0024-120: hyp=['FIVE', 'A', 'PART', 'OF', 'THIS', 'ETHEREAL', 'SOLUTION', 'IS', 'POURED', 'INTO', 'A', 'WATCH', 'GLASS', 'AND', 'ALLOWED', 'TO', 'EVAPORATE'] +1998-15444-0025-121: ref=['TO', 'PURIFY', 'IT', 'ADD', 'A', 'SMALL', 'QUANTITY', 'OF', 'DILUTE', 'SULPHURIC', 'ACID', 'AND', 'AFTER', 'EVAPORATING', 'TO', 'THREE', 'QUARTERS', 'OF', 'ITS', 'BULK', 'ADD', 'A', 'SATURATED', 'SOLUTION', 'OF', 'CARBONATE', 'OF', 'POTASH', 'OR', 'SODA'] +1998-15444-0025-121: hyp=['TO', 'PURIFY', 'IT', 'ADD', 'A', 'SMALL', 'QUANTITY', 'OF', 'DILUTE', 'SULPHURIC', 'ACID', 'AND', 'AFTER', 'EVAPORATING', 'TO', 'THREE', 'QUARTERS', 'OF', 'ITS', 'BULK', 'ADD', 'A', 'SATURATED', 'SOLUTION', 'OF', 'CARBONATE', 'OF', 'POTASH', 'OR', 'SODA'] +1998-15444-0026-122: ref=['BOIL', 'THE', 'FINELY', 'DIVIDED', 'SUBSTANCE', 'WITH', 'ABOUT', 'ONE', 'EIGHTH', 'ITS', 'BULK', 'OF', 'PURE', 'HYDROCHLORIC', 'ACID', 'ADD', 'FROM', 'TIME', 'TO', 'TIME', 'POTASSIC', 'CHLORATE', 'UNTIL', 'THE', 'SOLIDS', 'ARE', 'REDUCED', 'TO', 'A', 'STRAW', 'YELLOW', 'FLUID'] +1998-15444-0026-122: hyp=['BOIL', 'THE', 'FINELY', 'DIVIDED', 'SUBSTANCE', 'WITH', 'ABOUT', 'ONE', 'EIGHTH', 'ITS', 'BULK', 'OF', 'PURE', 'HYDROCHLORIC', 'ACID', 'ADD', 'FROM', 'TIME', 'TO', 'TIME', 'POTASSIC', 'CHLORATE', 'UNTIL', 'THE', 'SOLIDS', 'ARE', 'REDUCED', 'TO', 'A', 'STRAW', 'YELLOW', 'FLUID'] +1998-15444-0027-123: ref=['THE', 'RESIDUE', 'OF', 'THE', 'MATERIAL', 'AFTER', 'DIGESTION', 'WITH', 'HYDROCHLORIC', 'ACID', 'AND', 'POTASSIUM', 'CHLORATE', 'MAY', 'HAVE', 'TO', 'BE', 'EXAMINED', 'FOR', 'SILVER', 'LEAD', 'AND', 'BARIUM'] +1998-15444-0027-123: hyp=['THE', 'RESIDUE', 'OF', 'THE', 'MATERIAL', 'AFTER', 'DIGESTION', 'WITH', 'HYDROCHLORIC', 'ACID', 'AND', 'POTASSIUM', 'CHLORIDE', 'MAY', 'HAVE', 'TO', 'BE', 'EXAMINED', 'FOR', 'SILVER', 'LEAD', 'AND', 'BARIUM'] +1998-29454-0000-124: ref=['A', 'THOUSAND', 'BLESSINGS', 'FROM', 'A', 'GRATEFUL', 'HEART'] +1998-29454-0000-124: hyp=['A', 'THOUSAND', 'BLESSINGS', 'FROM', 'A', 'GRATEFUL', 'HEART'] +1998-29454-0001-125: ref=['PERUSAL', 'SAID', 'THE', 'PAWNBROKER', "THAT'S", 'THE', 'WAY', 'TO', 'PERNOUNCE', 'IT'] +1998-29454-0001-125: hyp=['PERUSAL', 'SAID', 'THE', 'PAWNBROKER', "THAT'S", 'THE', 'WAY', 'TO', 'PRONOUNCE', 'IT'] +1998-29454-0002-126: ref=['HIS', 'BOOKS', 'TOLD', 'HIM', 'THAT', 'TREASURE', 'IS', 'BEST', 'HIDDEN', 'UNDER', 'LOOSE', 'BOARDS', 'UNLESS', 'OF', 'COURSE', 'YOUR', 'HOUSE', 'HAS', 'A', 'SECRET', 'PANEL', 'WHICH', 'HIS', 'HAD', 'NOT'] +1998-29454-0002-126: hyp=['HIS', 'BOOKS', 'TOLD', 'HIM', 'THE', 'TREASURE', 'IS', 'BEST', 'HIDDEN', 'UNDER', 'LOOSE', 'BOARDS', 'AND', 'AS', 'OF', 'COURSE', 'YOUR', 'HOUSE', 'HAD', 'A', 'SECRET', 'PENNEL', 'WHICH', 'HIS', 'HAD', 'NOT'] +1998-29454-0003-127: ref=['HE', 'GOT', 'IT', 'UP', 'AND', 'PUSHED', 'HIS', 'TREASURES', 'AS', 'FAR', 'IN', 'AS', 'HE', 'COULD', 'ALONG', 'THE', 'ROUGH', 'CRUMBLY', 'SURFACE', 'OF', 'THE', 'LATH', 'AND', 'PLASTER'] +1998-29454-0003-127: hyp=['HE', 'GOT', 'IT', 'UP', 'AND', 'PUSHED', 'HIS', 'TREASURES', 'AS', 'FAR', 'IN', 'AS', 'HE', 'COULD', 'ALONG', 'THE', 'ROUGH', 'CRUMBLY', 'SURFACE', 'OF', 'THE', 'GLASS', 'AND', 'PLASTER'] +1998-29454-0004-128: ref=['WHEN', 'DICKIE', 'CAME', 'DOWN', 'HIS', 'AUNT', 'SLIGHTLY', 'SLAPPED', 'HIM', 'AND', 'HE', 'TOOK', 'THE', 'HALFPENNY', 'AND', 'LIMPED', 'OFF', 'OBEDIENTLY'] +1998-29454-0004-128: hyp=['WHEN', 'DICKIE', 'CAME', 'DOWN', 'HIS', 'AUNT', 'SLIGHTLY', 'SLAPPED', 'HIM', 'AND', 'HE', 'TOOK', 'THE', 'HALFPENNY', 'AND', 'LIMPED', 'OFF', 'OBEDIENTLY'] +1998-29454-0005-129: ref=['HE', 'HAD', 'NEVER', 'SEEN', 'ONE', 'BEFORE', 'AND', 'IT', 'INTERESTED', 'HIM', 'EXTREMELY'] +1998-29454-0005-129: hyp=['HE', 'HAD', 'NEVER', 'SEEN', 'ONE', 'BEFORE', 'AND', 'IT', 'INTERESTED', 'HIM', 'EXTREMELY'] +1998-29454-0006-130: ref=['HE', 'LOOKED', 'ABOUT', 'HIM', 'AND', 'KNEW', 'THAT', 'HE', 'DID', 'NOT', 'AT', 'ALL', 'KNOW', 'WHERE', 'HE', 'WAS'] +1998-29454-0006-130: hyp=['HE', 'LOOKED', 'ABOUT', 'HIM', 'AND', 'KNEW', 'THAT', 'HE', 'DID', 'NOT', 'AT', 'ALL', 'KNOW', 'WHERE', 'HE', 'WAS'] +1998-29454-0007-131: ref=["WHAT'S", 'UP', 'MATEY', 'LOST', 'YOUR', 'WAY', 'DICKIE', 'EXPLAINED'] +1998-29454-0007-131: hyp=["WHAT'S", 'UP', 'MAITIE', 'LOST', 'YOUR', 'WAY', 'DICKIE', 'EXPLAINED'] +1998-29454-0008-132: ref=['WHEN', 'HE', 'SAID', 'AVE', 'I', 'BIN', 'ASLEEP'] +1998-29454-0008-132: hyp=['WHEN', 'HE', 'SAID', 'HAVE', 'I', 'BEEN', 'ASLEEP'] +1998-29454-0009-133: ref=['HERE', 'WE', 'ARE', 'SAID', 'THE', 'MAN'] +1998-29454-0009-133: hyp=['HERE', 'WE', 'ARE', 'SAID', 'THE', 'MAN'] +1998-29454-0010-134: ref=['NOT', 'EXACKLY', 'SAID', 'THE', 'MAN', 'BUT', "IT'S", 'ALL', 'RIGHT'] +1998-29454-0010-134: hyp=['NOT', 'EXACTLY', 'SAID', 'THE', 'MAN', 'BUT', "IT'S", 'ALL', 'RIGHT'] +1998-29454-0011-135: ref=['WHEN', 'IT', 'WAS', 'OVER', 'THE', 'MAN', 'ASKED', 'DICKIE', 'IF', 'HE', 'COULD', 'WALK', 'A', 'LITTLE', 'WAY', 'AND', 'WHEN', 'DICKIE', 'SAID', 'HE', 'COULD', 'THEY', 'SET', 'OUT', 'IN', 'THE', 'MOST', 'FRIENDLY', 'WAY', 'SIDE', 'BY', 'SIDE'] +1998-29454-0011-135: hyp=['WHEN', 'IT', 'WAS', 'OVER', 'THE', 'MAN', 'ASKED', 'DICKIE', 'IF', 'HE', 'COULD', 'WALK', 'A', 'LITTLE', 'WAY', 'AND', 'WHEN', 'DICKIE', 'SAID', 'HE', 'COULD', 'THEY', 'SET', 'OUT', 'IN', 'THE', 'MOST', 'FRIENDLY', 'WAY', 'SIDE', 'BY', 'SIDE'] +1998-29454-0012-136: ref=['AND', 'THE', 'TEA', 'AND', 'ALL', 'AN', 'THE', 'EGG'] +1998-29454-0012-136: hyp=['AND', 'THE', 'TEENO', 'AND', 'THE', 'EGG'] +1998-29454-0013-137: ref=['AND', 'THIS', 'IS', 'THE', 'PRETTIEST', 'PLACE', 'EVER', 'I', 'SEE'] +1998-29454-0013-137: hyp=['AND', 'THIS', 'IS', 'THE', 'PRETTIEST', 'PLACE', 'EVER', 'I', 'SEE'] +1998-29454-0014-138: ref=['I', 'SHALL', 'CATCH', 'IT', 'A', 'FAIR', 'TREAT', 'AS', 'IT', 'IS'] +1998-29454-0014-138: hyp=['I', 'SHALL', 'CATCH', 'IT', 'A', 'FAIR', 'TREAT', 'AS', 'IT', 'IS'] +1998-29454-0015-139: ref=['SHE', 'WAS', 'WAITIN', 'FOR', 'THE', 'WOOD', 'TO', 'BOIL', 'THE', 'KETTLE', 'WHEN', 'I', 'COME', 'OUT', 'MOTHER'] +1998-29454-0015-139: hyp=['SHE', 'WAS', 'WAITING', 'FOR', 'THE', 'WOOD', 'TO', 'BOIL', 'THE', 'CATTLE', 'WHEN', 'I', 'COME', 'OUT', 'MOTHER'] +1998-29454-0016-140: ref=["AIN'T", 'BAD', 'WHEN', "SHE'S", 'IN', 'A', 'GOOD', 'TEMPER'] +1998-29454-0016-140: hyp=['AND', 'BAD', 'WHEN', "SHE'S", 'IN', 'A', 'GOOD', 'TEMPER'] +1998-29454-0017-141: ref=['THAT', "AIN'T", 'WHAT', "SHE'LL", 'BE', 'IN', 'WHEN', 'YOU', 'GETS', 'BACK'] +1998-29454-0017-141: hyp=['THAT', "AIRN'T", 'BUT', "SHE'LL", 'BE', 'IN', 'WHEN', 'HE', 'GETS', 'BACK'] +1998-29454-0018-142: ref=['I', 'GOT', 'TO', 'STICK', 'IT', 'SAID', 'DICKIE', 'SADLY', "I'D", 'BEST', 'BE', 'GETTING', 'HOME'] +1998-29454-0018-142: hyp=["I'VE", 'GOT', 'TO', 'STICK', 'IT', 'SAID', 'DICKI', 'SADLY', "I'D", 'BEST', 'BE', 'GETTING', 'HOME'] +1998-29454-0019-143: ref=['I', "WOULDN'T", 'GO', 'OME', 'NOT', 'IF', 'I', 'WAS', 'YOU', 'SAID', 'THE', 'MAN'] +1998-29454-0019-143: hyp=['I', "WOULDN'T", 'GO', 'HOME', 'NOT', 'IF', 'I', 'WAS', 'YOU', 'SAID', 'THE', 'MAN'] +1998-29454-0020-144: ref=['NO', 'SAID', 'DICKIE', 'OH', 'NO', 'NO', 'I', 'NEVER'] +1998-29454-0020-144: hyp=['NO', 'SAID', 'DICKIE', 'OH', 'NO', 'NO', 'I', 'NEVER'] +1998-29454-0021-145: ref=['I', "AIN'T", 'IT', 'YER', 'HAVE', 'I', 'LIKE', 'WHAT', 'YER', 'AUNT', 'DO'] +1998-29454-0021-145: hyp=['I', "AIN'T", 'IT', 'HERE', 'HAVE', 'I', 'LIKE', 'WHAT', "YOU'", 'AUNT', 'TOO'] +1998-29454-0022-146: ref=['WELL', "THAT'LL", 'SHOW', 'YOU', 'THE', 'SORT', 'OF', 'MAN', 'I', 'AM'] +1998-29454-0022-146: hyp=['WELL', "THAT'LL", 'SHOW', 'YOU', 'THE', 'SORT', 'OF', 'MAN', 'I', 'AM'] +1998-29454-0023-147: ref=['THE', "MAN'S", 'MANNER', 'WAS', 'SO', 'KIND', 'AND', 'HEARTY', 'THE', 'WHOLE', 'ADVENTURE', 'WAS', 'SO', 'WONDERFUL', 'AND', 'NEW', 'IS', 'IT', 'COUNTRY', 'WHERE', 'YOU', 'GOING'] +1998-29454-0023-147: hyp=['THE', "MAN'S", 'MANNER', 'WAS', 'SO', 'KIND', 'AND', 'HEARTY', 'THE', 'WHOLE', 'ADVENTURE', 'WAS', 'SO', 'WONDERFUL', 'AND', 'NEW', 'IS', 'IT', 'COUNTRY', 'WHERE', "YOU'RE", 'GOING'] +1998-29454-0024-148: ref=['THE', 'SUN', 'SHOT', 'LONG', 'GOLDEN', 'BEAMS', 'THROUGH', 'THE', 'GAPS', 'IN', 'THE', 'HEDGE'] +1998-29454-0024-148: hyp=['THE', 'SUN', 'SHOT', 'LONG', 'GOLDEN', 'BEAMS', 'THROUGH', 'THE', 'GAPS', 'IN', 'THE', 'HEDGE'] +1998-29454-0025-149: ref=['A', 'BIRD', 'PAUSED', 'IN', 'ITS', 'FLIGHT', 'ON', 'A', 'BRANCH', 'QUITE', 'CLOSE', 'AND', 'CLUNG', 'THERE', 'SWAYING'] +1998-29454-0025-149: hyp=['A', 'BIRD', 'PAUSED', 'IN', 'ITS', 'FLIGHT', 'ON', 'A', 'BRANCH', 'QUITE', 'CLOSE', 'AND', 'CLUNG', 'THERE', 'SWING'] +1998-29454-0026-150: ref=['HE', 'TOOK', 'OUT', 'OF', 'HIS', 'POCKET', 'A', 'NEW', 'ENVELOPE', 'A', 'NEW', 'SHEET', 'OF', 'PAPER', 'AND', 'A', 'NEW', 'PENCIL', 'READY', 'SHARPENED', 'BY', 'MACHINERY'] +1998-29454-0026-150: hyp=['HE', 'TOOK', 'OUT', 'OF', 'HIS', 'POCKET', 'IN', 'YOUR', 'ENVELOPE', 'A', 'NEW', 'SHEET', 'OF', 'PAPER', 'AND', 'A', 'NEW', 'PENCIL', 'READY', 'SHARPENED', 'BY', 'MACHINERY'] +1998-29454-0027-151: ref=['AN', 'I', 'ASKS', 'YOU', 'LET', 'ME', 'COME', 'ALONGER', 'YOU', 'GOT', 'THAT'] +1998-29454-0027-151: hyp=['AND', 'I', 'ASK', 'YOU', 'LET', 'ME', 'COME', 'ALONG', 'O', 'YOU', 'GOT', 'THAT'] +1998-29454-0028-152: ref=['GET', 'IT', 'WROTE', 'DOWN', 'THEN', 'DONE'] +1998-29454-0028-152: hyp=['GET', 'US', 'WELL', 'DOWN', 'THEN', 'DONE'] +1998-29454-0029-153: ref=['THEN', 'HE', 'FOLDED', 'IT', 'AND', 'PUT', 'IT', 'IN', 'HIS', 'POCKET'] +1998-29454-0029-153: hyp=['THEN', 'HE', 'FOLDED', 'IT', 'AND', 'PUT', 'IT', 'IN', 'HIS', 'POCKET'] +1998-29454-0030-154: ref=['NOW', "WE'RE", 'SQUARE', 'HE', 'SAID'] +1998-29454-0030-154: hyp=['NOW', "WE'RE", 'SQUEER', 'HE', 'SAID'] +1998-29454-0031-155: ref=['THEY', 'COULD', 'PUT', 'A', 'MAN', 'AWAY', 'FOR', 'LESS', 'THAN', 'THAT'] +1998-29454-0031-155: hyp=['THEY', 'COULD', 'PUT', 'A', 'MAN', 'AWAY', 'FOR', 'LESS', 'THAN', 'THAT'] +1998-29454-0032-156: ref=['I', 'SEE', 'THAT', 'THERE', 'IN', 'A', 'BOOK', 'SAID', 'DICKIE', 'CHARMED'] +1998-29454-0032-156: hyp=['I', 'SEE', 'THAT', 'THERE', 'IN', 'A', 'BOOK', 'SAID', 'DICKIE', 'CHARMED'] +1998-29454-0033-157: ref=['HE', 'REWARD', 'THE', 'WAKE', 'THE', 'LAST', 'OF', 'THE', 'ENGLISH', 'AND', 'I', 'WUNNERED', 'WHAT', 'IT', 'STOOD', 'FOR'] +1998-29454-0033-157: hyp=['HE', 'REVORED', 'THE', 'WAKE', 'THE', 'LAST', 'OF', 'THE', 'ENGLISH', 'AND', 'I', 'WONDERED', 'WHAT', 'I', 'STOOD', 'FOR'] +1998-29454-0034-158: ref=['WILD', 'ONES', "AIN'T", 'ALF', 'THE', 'SIZE', 'I', 'LAY'] +1998-29454-0034-158: hyp=['WILD', 'ONES', 'AND', 'A', 'HALF', 'THE', 'SIZE', 'I', 'LAY'] +1998-29454-0035-159: ref=['ADVENTURES', 'I', 'SHOULD', 'THINK', 'SO'] +1998-29454-0035-159: hyp=['ADVENTURES', 'I', 'SHOULD', 'THINK', 'SO'] +1998-29454-0036-160: ref=['AH', 'SAID', 'DICKIE', 'AND', 'A', 'FULL', 'SILENCE', 'FELL', 'BETWEEN', 'THEM'] +1998-29454-0036-160: hyp=['AH', 'SAID', 'DICKY', 'AND', 'A', 'SHORT', 'SILENCE', 'FELL', 'BETWEEN', 'THEM'] +1998-29454-0037-161: ref=['THAT', 'WAS', 'CHARMING', 'BUT', 'IT', 'WAS', 'PLEASANT', 'TOO', 'TO', 'WASH', 'THE', 'MUD', 'OFF', 'ON', 'THE', 'WET', 'GRASS'] +1998-29454-0037-161: hyp=['THAT', 'WAS', 'CHARMING', 'BUT', 'IT', 'WAS', 'PLEASANT', 'TOO', 'TO', 'WASH', 'THE', 'MUD', 'OFF', 'ON', 'THE', 'WET', 'GRASS'] +1998-29454-0038-162: ref=['DICKIE', 'ALWAYS', 'REMEMBERED', 'THAT', 'MOMENT'] +1998-29454-0038-162: hyp=['DICKIE', 'ALWAYS', 'REMEMBERED', 'THAT', 'MOMENT'] +1998-29454-0039-163: ref=['SO', 'YOU', 'SHALL', 'SAID', 'MISTER', 'BEALE', 'A', "REG'LER", 'WASH', 'ALL', 'OVER', 'THIS', 'VERY', 'NIGHT', 'I', 'ALWAYS', 'LIKE', 'A', 'WASH', 'MESELF'] +1998-29454-0039-163: hyp=['SO', 'YOU', 'SHALL', 'SAID', 'MISTER', 'BEALE', 'A', "REG'LAR", 'WASH', 'ALL', 'OVER', 'THIS', 'VERY', 'NIGHT', 'I', 'ALWAYS', 'LIKE', 'A', 'WASH', 'MESELF'] +1998-29454-0040-164: ref=['SOME', 'BLOKES', 'THINK', 'IT', 'PAYS', 'TO', 'BE', 'DIRTY', 'BUT', 'IT', "DON'T"] +1998-29454-0040-164: hyp=['SOME', 'LOLKS', 'THINK', 'IT', 'PAYS', 'TO', 'BE', 'DIRTY', 'BUT', 'IT', "DON'T"] +1998-29454-0041-165: ref=['IF', "YOU'RE", 'CLEAN', 'THEY', 'SAY', 'HONEST', 'POVERTY', 'AN', 'IF', "YOU'RE", 'DIRTY', 'THEY', 'SAY', 'SERVE', 'YOU', 'RIGHT'] +1998-29454-0041-165: hyp=['IF', "YOU'RE", 'CLEAN', 'THEY', 'SAY', 'HONEST', 'POVERTY', 'AND', 'IF', "YOU'RE", 'DIRTY', 'THEY', 'SAY', 'SERVE', 'YOU', 'RIGHT'] +1998-29454-0042-166: ref=['YOU', 'ARE', 'GOOD', 'SAID', 'DICKIE', 'I', 'DO', 'LIKE', 'YOU'] +1998-29454-0042-166: hyp=['YOU', 'ARE', 'GOOD', 'SAID', 'DICKIE', 'I', 'DO', 'LIKE', 'YOU'] +1998-29454-0043-167: ref=['I', 'KNOW', 'YOU', 'WILL', 'SAID', 'DICKIE', 'WITH', 'ENTHUSIASM', 'I', 'KNOW', 'OW', 'GOOD', 'YOU', 'ARE'] +1998-29454-0043-167: hyp=['I', 'KNOW', 'YOU', 'WILL', 'SAID', 'DICKIE', 'WITH', 'ENTHUSIASM', 'I', 'KNOW', 'HOW', 'GOOD', 'YOU', 'ARE'] +1998-29454-0044-168: ref=['BLESS', 'ME', 'SAID', 'MISTER', 'BEALE', 'UNCOMFORTABLY', 'WELL', 'THERE'] +1998-29454-0044-168: hyp=['BLESS', 'ME', 'SAID', 'MISTER', 'BELL', 'UNCOMFORTABLY', 'WELL', 'THERE'] +1998-29454-0045-169: ref=['STEP', 'OUT', 'SONNY', 'OR', "WE'LL", 'NEVER', 'GET', 'THERE', 'THIS', 'SIDE', 'CHRISTMAS'] +1998-29454-0045-169: hyp=['STEP', 'OUT', 'SANNY', 'OR', "WE'LL", 'NEVER', 'GET', 'THERE', 'THIS', 'SIDE', 'OF', 'CHRISTMAS'] +1998-29454-0046-170: ref=['WELL', "YOU'LL", 'KNOW', 'ALL', 'ABOUT', 'IT', 'PRESENTLY'] +1998-29454-0046-170: hyp=['WELL', 'YOU', 'KNOW', 'ALL', 'ABOUT', 'IT', 'PRESENTLY'] +1998-29455-0000-171: ref=['THE', 'SINGING', 'AND', 'LAUGHING', 'WENT', 'ON', 'LONG', 'AFTER', 'HE', 'HAD', 'FALLEN', 'ASLEEP', 'AND', 'IF', 'LATER', 'IN', 'THE', 'EVENING', 'THERE', 'WERE', 'LOUD', 'VOICED', 'ARGUMENTS', 'OR', 'QUARRELS', 'EVEN', 'DICKIE', 'DID', 'NOT', 'HEAR', 'THEM'] +1998-29455-0000-171: hyp=['THE', 'SINGING', 'AND', 'LAUGHING', 'WENT', 'ON', 'LONG', 'AFTER', 'HE', 'HAD', 'FALLEN', 'ASLEEP', 'AND', 'IF', 'LATER', 'IN', 'THE', 'EVENING', 'THEY', 'WERE', 'LOUD', 'VOICED', 'ARGUMENTS', 'OR', 'QUARRELS', 'EVEN', 'DICKIE', 'DID', 'NOT', 'HEAR', 'THEM'] +1998-29455-0001-172: ref=["WHAT'S", 'ALL', 'THAT', 'THERE', 'DICKIE', 'ASKED', 'POINTING', 'TO', 'THE', 'ODD', 'KNOBBLY', 'BUNDLES', 'OF', 'ALL', 'SORTS', 'AND', 'SHAPES', 'TIED', 'ON', 'TO', 'THE', "PERAMBULATOR'S", 'FRONT'] +1998-29455-0001-172: hyp=["WHAT'S", 'ON', 'THAT', 'THERE', 'DICKIE', 'ASKED', 'POINTING', 'TO', 'THE', 'ODD', 'KNOBBY', 'BUNDLES', 'OF', 'ALL', 'SORTS', 'AND', 'SHAPES', 'TIED', 'ON', 'TO', 'THE', "PERAMBULATOR'S", 'FRONT'] +1998-29455-0002-173: ref=['TELL', 'YER', 'WHAT', 'MATE', 'LOOKS', 'TO', 'ME', 'AS', 'IF', "I'D", 'TOOK', 'A', 'FANCY', 'TO', 'YOU'] +1998-29455-0002-173: hyp=['TELL', 'YOU', 'WHAT', 'MATE', 'LOOKS', 'TO', 'ME', 'AS', 'IF', 'I', 'TOOK', 'A', 'FANCY', 'TO', 'YOU'] +1998-29455-0003-174: ref=['SWELP', 'ME', 'HE', 'SAID', 'HELPLESSLY'] +1998-29455-0003-174: hyp=['SWAP', 'ME', 'HE', 'SAID', 'HELPLESSLY'] +1998-29455-0004-175: ref=['OH', 'LOOK', 'SAID', 'DICKIE', 'THE', 'FLOWERS'] +1998-29455-0004-175: hyp=['OH', 'LOOK', 'SAID', 'DICKIE', 'THE', 'FLOWERS'] +1998-29455-0005-176: ref=["THEY'RE", 'ONLY', 'WEEDS', 'SAID', 'BEALE'] +1998-29455-0005-176: hyp=["THEY'RE", 'ONLY', 'WEEDS', 'SAID', 'BEAL'] +1998-29455-0006-177: ref=['BUT', 'I', 'SHALL', 'HAVE', 'THEM', 'WHILE', "THEY'RE", 'ALIVE', 'SAID', 'DICKIE', 'AS', 'HE', 'HAD', 'SAID', 'TO', 'THE', 'PAWNBROKER', 'ABOUT', 'THE', 'MOONFLOWERS'] +1998-29455-0006-177: hyp=['BUT', 'I', 'SHALL', 'HAVE', 'THEM', 'WHILE', 'THEY', 'ARE', 'LIVE', 'SAID', 'DICKIE', 'AS', 'HE', 'HAD', 'SAID', 'TO', 'THE', 'PAWNBROKER', 'ABOUT', 'THE', 'MOONFLOWERS'] +1998-29455-0007-178: ref=['HI', 'THERE', 'GOES', 'A', 'RABBIT'] +1998-29455-0007-178: hyp=['HI', 'THERE', 'GOES', 'A', 'RABBIT'] +1998-29455-0008-179: ref=['SEE', 'IM', 'CROST', 'THE', 'ROAD', 'THERE', 'SEE', 'HIM'] +1998-29455-0008-179: hyp=['SEEM', 'CCHOSTAWTE', 'SEEM'] +1998-29455-0009-180: ref=['HOW', 'BEAUTIFUL', 'SAID', 'DICKIE', 'WRIGGLING', 'WITH', 'DELIGHT'] +1998-29455-0009-180: hyp=['HOW', 'BEAUTIFUL', 'SAID', 'DICKIE', 'WRIGGLING', 'WITH', 'DELIGHT'] +1998-29455-0010-181: ref=['THIS', 'LIFE', 'OF', 'THE', 'RABBIT', 'AS', 'DESCRIBED', 'BY', 'MISTER', 'BEALE', 'WAS', 'THE', "CHILD'S", 'FIRST', 'GLIMPSE', 'OF', 'FREEDOM', "I'D", 'LIKE', 'TO', 'BE', 'A', 'RABBIT'] +1998-29455-0010-181: hyp=['THIS', 'LIFE', 'OF', 'THE', 'RABBIT', 'AS', 'DESCRIBED', 'BY', 'MISTER', 'BEALE', 'WAS', 'THE', "CHILD'S", 'FIRST', 'GLIMPSE', 'OF', 'FREEDOM', "I'D", 'LIKE', 'TO', 'BE', 'A', 'RABBIT'] +1998-29455-0011-182: ref=["OW'M", 'I', 'TO', 'WHEEL', 'THE', 'BLOOMIN', 'PRAM', 'IF', 'YOU', 'GOES', 'ON', 'LIKE', 'AS', 'IF', 'YOU', 'WAS', 'A', 'BAG', 'OF', 'EELS'] +1998-29455-0011-182: hyp=['HOW', 'AM', 'I', 'TO', 'WEAR', 'THE', 'BLOOMIN', 'PRAM', 'IF', 'YOU', 'GOES', 'ON', 'LIKE', 'AS', 'IF', 'YOU', 'WAS', 'A', 'PACK', 'OF', 'FIELDS'] +1998-29455-0012-183: ref=['I', 'LIKE', 'YOU', 'NEXTER', 'MY', 'OWN', 'DADDY', 'AND', 'MISTER', 'BAXTER', 'NEXT', 'DOOR'] +1998-29455-0012-183: hyp=['I', 'LIKE', 'YOU', 'NEXT', 'TO', 'MY', 'OWN', 'DADDY', 'AND', 'MISTER', 'BAXTER', 'NEXT', 'DOOR'] +1998-29455-0013-184: ref=["THAT'S", 'ALL', 'RIGHT', 'SAID', 'MISTER', 'BEALE', 'AWKWARDLY'] +1998-29455-0013-184: hyp=["THAT'S", 'ALL', 'RIGHT', 'SAID', 'MISTER', 'BELE', 'AWKWARDLY'] +1998-29455-0014-185: ref=['DICKIE', 'QUICK', 'TO', 'IMITATE', 'TOUCHED', 'HIS'] +1998-29455-0014-185: hyp=['DICKIE', 'QUICK', 'TO', 'IMITATE', 'TOUCHED', 'HIS'] +1998-29455-0015-186: ref=['POOR', 'LITTLE', 'MAN', 'SAID', 'THE', 'LADY', 'YOU', 'MISS', 'YOUR', 'MOTHER', "DON'T", 'YOU'] +1998-29455-0015-186: hyp=['POOR', 'LITTLE', 'MAN', 'SAID', 'THE', 'LADY', 'YOU', 'MISS', 'YOUR', 'MOTHER', "DON'T", 'YOU'] +1998-29455-0016-187: ref=['OH', 'WELL', 'DONE', 'LITTLE', 'UN', 'SAID', 'MISTER', 'BEALE', 'TO', 'HIMSELF'] +1998-29455-0016-187: hyp=['OH', 'WELL', 'DONE', 'LITTLE', 'ONE', 'SAID', 'MISTER', 'BEALE', 'TO', 'HIMSELF'] +1998-29455-0017-188: ref=['THE', 'TWO', 'TRAVELLERS', 'WERE', 'LEFT', 'FACING', 'EACH', 'OTHER', 'THE', 'RICHER', 'BY', 'A', 'PENNY', 'AND', 'OH', 'WONDERFUL', 'GOOD', 'FORTUNE', 'A', 'WHOLE', 'HALF', 'CROWN'] +1998-29455-0017-188: hyp=['THE', 'TWO', 'TRAVELLERS', 'WERE', 'LEFT', 'FACING', 'EACH', 'OTHER', 'THE', 'RICHER', 'BY', 'A', 'PENNY', 'AND', 'OH', 'WONDERFUL', 'GOOD', 'FORTUNE', 'A', 'WHOLE', 'HALF', 'CROWN'] +1998-29455-0018-189: ref=['NO', 'I', 'NEVER', 'SAID', 'DICKIE', "ERE'S", 'THE', 'STEEVER'] +1998-29455-0018-189: hyp=['NO', 'I', 'NEVER', 'SAID', 'DICKIE', 'YES', 'THE', 'STEVER'] +1998-29455-0019-190: ref=['YOU', 'STICK', 'TO', 'THAT', 'SAID', 'BEALE', 'RADIANT', 'WITH', 'DELIGHT', "YOU'RE", 'A', 'FAIR', 'MASTERPIECE', 'YOU', 'ARE', 'YOU', 'EARNED', 'IT', 'HONEST', 'IF', 'EVER', 'A', 'KID', 'DONE'] +1998-29455-0019-190: hyp=['YOU', 'STICK', 'TO', 'THAT', 'SAID', 'BEALE', 'RADIANT', 'WITH', 'DELIGHT', "YOU'RE", 'A', 'FAIR', 'MASTERPIECE', 'YOU', 'ARE', 'YOU', 'EARNED', 'IT', 'HONEST', 'IF', 'EVER', 'KIT', 'DONE'] +1998-29455-0020-191: ref=['THEY', 'WENT', 'ON', 'UP', 'THE', 'HILL', 'AS', 'HAPPY', 'AS', 'ANY', 'ONE', 'NEED', 'WISH', 'TO', 'BE'] +1998-29455-0020-191: hyp=['THEY', 'WENT', 'ON', 'UP', 'THE', 'HILL', 'AS', 'HAPPY', 'AS', 'ANYONE', 'NEED', 'WISH', 'TO', 'BE'] +1998-29455-0021-192: ref=['PLEASE', 'DO', 'NOT', 'BE', 'TOO', 'SHOCKED'] +1998-29455-0021-192: hyp=['PLEASE', 'DO', 'NOT', 'BE', 'TOO', 'SHOCKED'] +1998-29455-0022-193: ref=['REMEMBER', 'THAT', 'NEITHER', 'OF', 'THEM', 'KNEW', 'ANY', 'BETTER'] +1998-29455-0022-193: hyp=['REMEMBER', 'THAT', 'NEITHER', 'OF', 'THEM', 'KNEW', 'ANY', 'BETTER'] +1998-29455-0023-194: ref=['TO', 'THE', 'ELDER', 'TRAMP', 'LIES', 'AND', 'BEGGING', 'WERE', 'NATURAL', 'MEANS', 'OF', 'LIVELIHOOD'] +1998-29455-0023-194: hyp=['TO', 'THE', 'OTHER', 'TRAMP', 'LIES', 'AND', 'BEGGING', 'WERE', 'NATURAL', 'MEANS', 'OF', 'LIVELIHOOD'] +1998-29455-0024-195: ref=['BUT', 'YOU', 'SAID', 'THE', 'BED', 'WITH', 'THE', 'GREEN', 'CURTAINS', 'URGED', 'DICKIE'] +1998-29455-0024-195: hyp=['BUT', 'YOU', 'SAID', 'THE', 'BED', 'WITH', 'THE', 'GREEN', 'CURTAINS', 'URGED', 'DICKIE'] +1998-29455-0025-196: ref=['WHICH', 'THIS', "AIN'T", 'NOT', 'BY', 'NO', 'MEANS'] +1998-29455-0025-196: hyp=['WHICH', 'THIS', 'END', 'NOT', 'BY', 'NO', 'MEANS'] +1998-29455-0026-197: ref=['THE', 'NIGHT', 'IS', 'FULL', 'OF', 'INTERESTING', 'LITTLE', 'SOUNDS', 'THAT', 'WILL', 'NOT', 'AT', 'FIRST', 'LET', 'YOU', 'SLEEP', 'THE', 'RUSTLE', 'OF', 'LITTLE', 'WILD', 'THINGS', 'IN', 'THE', 'HEDGES', 'THE', 'BARKING', 'OF', 'DOGS', 'IN', 'DISTANT', 'FARMS', 'THE', 'CHIRP', 'OF', 'CRICKETS', 'AND', 'THE', 'CROAKING', 'OF', 'FROGS'] +1998-29455-0026-197: hyp=['THE', 'NIGHT', 'IS', 'FULL', 'OF', 'INTERESTING', 'LITTLE', 'SOUNDS', 'THAT', 'WILL', 'NOT', 'AT', 'FIRST', 'LET', 'YOU', 'SLEEP', 'THE', 'RUSTLE', 'OF', 'LITTLE', 'WHITE', 'THINGS', 'IN', 'THE', 'HEDGES', 'THE', 'BARKING', 'OF', 'DOGS', 'IN', 'DISTANT', 'FARMS', 'THE', 'CHIRRUP', 'OF', 'CRICKETS', 'AND', 'THE', 'CROAKING', 'OF', 'FROGS'] +1998-29455-0027-198: ref=['THE', 'NEW', 'GAME', 'OF', 'BEGGING', 'AND', 'INVENTING', 'STORIES', 'TO', 'INTEREST', 'THE', 'PEOPLE', 'FROM', 'WHOM', 'IT', 'WAS', 'WORTH', 'WHILE', 'TO', 'BEG', 'WENT', 'ON', 'GAILY', 'DAY', 'BY', 'DAY', 'AND', 'WEEK', 'BY', 'WEEK', 'AND', 'DICKIE', 'BY', 'CONSTANT', 'PRACTICE', 'GREW', 'SO', 'CLEVER', 'AT', 'TAKING', 'HIS', 'PART', 'IN', 'THE', 'ACTING', 'THAT', 'MISTER', 'BEALE', 'WAS', 'QUITE', 'DAZED', 'WITH', 'ADMIRATION'] +1998-29455-0027-198: hyp=['THE', 'NEW', 'GAME', 'OF', 'BEGGING', 'AND', 'INVENTING', 'STORIES', 'TO', 'INTEREST', 'THE', 'PEOPLE', 'FROM', 'WHOM', 'IT', 'WAS', 'WORTH', 'WHILE', 'TO', 'BEG', 'WENT', 'ON', 'GAILY', 'DAY', 'BY', 'DAY', 'AND', 'WEEK', 'BY', 'WEEK', 'AND', 'DICKIE', 'BY', 'CONSTANT', 'PRACTICE', 'GREW', 'SO', 'CLEVER', 'AT', 'TAKING', 'HIS', 'PART', 'IN', 'THE', 'ACTING', 'THAT', 'MISTER', 'BEALE', 'WAS', 'QUITE', 'DAZED', 'WITH', 'ADMIRATION'] +1998-29455-0028-199: ref=['BLESSED', 'IF', 'I', 'EVER', 'SEE', 'SUCH', 'A', 'NIPPER', 'HE', 'SAID', 'OVER', 'AND', 'OVER', 'AGAIN'] +1998-29455-0028-199: hyp=['BLEST', 'IF', 'I', 'EVER', 'SEE', 'SUCH', 'A', 'NIBBER', 'HE', 'SAID', 'OVER', 'AND', 'OVER', 'AGAIN'] +1998-29455-0029-200: ref=['CLEVER', 'AS', 'A', 'TRAINDAWG', 'E', 'IS', 'AN', 'ALL', 'OUTER', 'IS', 'OWN', 'EAD'] +1998-29455-0029-200: hyp=['CLEVER', 'AS', 'A', 'TRAINED', 'DOG', 'IS', 'AND', 'WHILE', 'OUT', 'OF', 'HIS', 'OWN', 'ATT'] +1998-29455-0030-201: ref=['I', "AIN'T", 'SURE', 'AS', 'I', "ADN'T", 'BETTER', 'STICK', 'TO', 'THE', 'ROAD', 'AND', 'KEEP', 'AWAY', 'FROM', 'OLD', 'ANDS', 'LIKE', 'YOU', 'JIM'] +1998-29455-0030-201: hyp=['I', 'AM', 'SURE', 'AS', 'I', "HADN'T", 'BETTER', 'STICK', 'TO', 'THE', 'ROAD', 'AND', 'KEEP', 'AWAY', 'FROM', 'OLD', 'ENDS', 'LIKE', 'EU', 'JIM'] +1998-29455-0031-202: ref=['I', 'OPE', "E'S", 'CLEVER', 'ENOUGH', 'TO', 'DO', 'WOT', "E'S", 'TOLD', 'KEEP', 'IS', 'MUG', 'SHUT', "THAT'S", 'ALL'] +1998-29455-0031-202: hyp=['I', 'HOPE', "HE'S", 'CLEVER', 'ENOUGH', 'TO', 'DO', 'WHAT', "HE'S", 'TOLD', 'KEEP', 'IS', 'MUCH', 'AT', "THAT'S", 'ALL'] +1998-29455-0032-203: ref=['IF', "E'S", 'STRAIGHT', "E'LL", 'DO', 'FOR', 'ME', 'AND', 'IF', 'HE', "AIN'T", "I'LL", 'DO', 'FOR', 'IM', 'SEE'] +1998-29455-0032-203: hyp=['IF', "HE'S", 'STRAIGHT', "HE'LL", 'DO', 'FOR', 'ME', 'AND', 'IF', 'HE', "AIN'T", "I'LL", 'DO', 'FOR', 'HIM', 'SEE'] +1998-29455-0033-204: ref=['SEE', 'THAT', 'BLOKE', 'JUST', 'NOW', 'SAID', 'MISTER', 'BEALE', 'YUSS', 'SAID', 'DICKIE'] +1998-29455-0033-204: hyp=['SEE', 'THAT', 'BLOKE', 'JUST', 'NOW', 'SAID', 'MISTER', 'BELE', 'YES', 'SAID', 'DICKIE'] +1998-29455-0034-205: ref=['WELL', 'YOU', 'NEVER', 'SEE', 'IM'] +1998-29455-0034-205: hyp=['WELL', 'YOU', 'NEVER', 'SEE', 'HIM'] +1998-29455-0035-206: ref=['IF', 'ANY', 'ONE', 'ARSTS', 'YOU', 'IF', 'YOU', 'EVER', 'SEE', 'IM', 'YOU', 'NEVER', 'SET', 'EYES', 'ON', 'IM', 'IN', 'ALL', 'YOUR', 'BORN', 'NOT', 'TO', 'REMEMBER', 'IM'] +1998-29455-0035-206: hyp=['IF', 'ANY', 'ONE', 'ASKS', 'YOU', 'IF', 'YOU', 'EVER', 'SEE', 'HIM', 'YOU', 'NEVER', 'SET', 'EYES', 'ON', 'HIM', 'IN', 'ALL', "YOU'RE", 'BORN', 'NOT', 'TO', 'REMEMBER', 'HIM'] +1998-29455-0036-207: ref=['DICKIE', 'WAS', 'FULL', 'OF', 'QUESTIONS', 'BUT', 'MISTER', 'BEALE', 'HAD', 'NO', 'ANSWERS', 'FOR', 'THEM'] +1998-29455-0036-207: hyp=['DICKIE', 'WAS', 'FULL', 'OF', 'QUESTIONS', 'BUT', 'MISTER', 'BELL', 'HAD', 'NO', 'ANSWERS', 'FOR', 'THEM'] +1998-29455-0037-208: ref=['NOR', 'WAS', 'IT', 'SUNDAY', 'ON', 'WHICH', 'THEY', 'TOOK', 'A', 'REST', 'AND', 'WASHED', 'THEIR', 'SHIRTS', 'ACCORDING', 'TO', 'MISTER', "BEALE'S", 'RULE', 'OF', 'LIFE'] +1998-29455-0037-208: hyp=['NOR', 'WAS', 'IT', 'SUNDAY', 'ON', 'WHICH', 'THEY', 'TOOK', 'A', 'REST', 'AND', 'WASHED', 'THEIR', 'SHIRTS', 'ACCORDING', 'TO', 'MISTER', "BEALE'S", 'RULE', 'OF', 'LIFE'] +1998-29455-0038-209: ref=['THEY', 'DID', 'NOT', 'STAY', 'THERE', 'BUT', 'WALKED', 'OUT', 'ACROSS', 'THE', 'DOWNS', 'WHERE', 'THE', 'SKYLARKS', 'WERE', 'SINGING', 'AND', 'ON', 'A', 'DIP', 'OF', 'THE', 'DOWNS', 'CAME', 'UPON', 'GREAT', 'STONE', 'WALLS', 'AND', 'TOWERS', 'VERY', 'STRONG', 'AND', 'GRAY'] +1998-29455-0038-209: hyp=['THEY', 'DID', 'NOT', 'STAY', 'THERE', 'BUT', 'WALKED', 'OUT', 'ACROSS', 'THE', 'DOWNS', 'WHERE', 'THE', 'SKYLARKS', 'WERE', 'SINGING', 'AND', 'ON', 'A', 'DIP', 'OF', 'THE', 'DOWNS', 'CAME', 'UPON', 'GREAT', 'STONE', 'WALLS', 'AND', 'TOWERS', 'VERY', 'STRONG', 'AND', 'GREY'] +1998-29455-0039-210: ref=["WHAT'S", 'THAT', 'THERE', 'SAID', 'DICKIE'] +1998-29455-0039-210: hyp=["WHAT'S", 'THAT', 'THERE', 'SAID', 'DICKIE'] +2033-164914-0000-211: ref=['REPLIED', 'HE', 'OF', 'A', 'TRUTH', 'I', 'HEARD', 'HIM', 'NOT', 'AND', 'I', 'WOT', 'HIM', 'NOT', 'AND', 'FOLKS', 'ARE', 'ALL', 'SLEEPING'] +2033-164914-0000-211: hyp=['REPLIED', 'HE', 'OF', 'A', 'TRUTH', 'I', 'HEARD', 'HIM', 'NOT', 'AND', 'I', 'WOT', 'HIM', 'NOT', 'AND', 'FOLKS', 'ARE', 'ALL', 'SLEEPING'] +2033-164914-0001-212: ref=['BUT', 'SHE', 'SAID', 'WHOMSOEVER', 'THOU', 'SEEST', 'AWAKE', 'HE', 'IS', 'THE', 'RECITER'] +2033-164914-0001-212: hyp=['BUT', 'SHE', 'SAID', 'WHOMSOEVER', 'THOU', 'SEEST', 'AWAKE', 'HE', 'IS', 'THE', 'RECITER'] +2033-164914-0002-213: ref=['THEN', 'SAID', 'THE', 'EUNUCH', 'ART', 'THOU', 'HE', 'WHO', 'REPEATED', 'POETRY', 'BUT', 'NOW', 'AND', 'MY', 'LADY', 'HEARD', 'HIM'] +2033-164914-0002-213: hyp=['THEN', 'SAID', 'THE', 'EUNUCH', 'ART', 'THOU', 'HE', 'WHO', 'REPEATED', 'POETRY', 'BUT', 'NOW', 'AND', 'MY', 'LADY', 'HEARD', 'HIM'] +2033-164914-0003-214: ref=['REJOINED', 'THE', 'EUNUCH', 'WHO', 'THEN', 'WAS', 'THE', 'RECITER', 'POINT', 'HIM', 'OUT', 'TO', 'ME'] +2033-164914-0003-214: hyp=['REJOINED', 'THE', 'EUNUCH', 'WHO', 'THEN', 'WAS', 'THE', 'RECITER', 'POINT', 'HIM', 'OUT', 'TO', 'ME'] +2033-164914-0004-215: ref=['BY', 'ALLAH', 'REPLIED', 'THE', 'FIREMAN', 'I', 'TELL', 'THEE', 'THE', 'TRUTH'] +2033-164914-0004-215: hyp=['BY', 'ALLAH', 'REPLIED', 'THE', 'FIREMAN', 'I', 'TELL', 'THEE', 'THE', 'TRUTH'] +2033-164914-0005-216: ref=['TELL', 'ME', 'WHAT', 'HAPPENED', 'QUOTH', 'ZAU', 'AL', 'MAKAN'] +2033-164914-0005-216: hyp=['TELL', 'ME', 'WHAT', 'HAPPENED', 'QUOTH', 'ZUL', 'MAKAN'] +2033-164914-0006-217: ref=['WHAT', 'AILS', 'THEE', 'THEN', 'THAT', 'THOU', 'MUST', 'NEEDS', 'RECITE', 'VERSES', 'SEEING', 'THAT', 'WE', 'ARE', 'TIRED', 'OUT', 'WITH', 'WALKING', 'AND', 'WATCHING', 'AND', 'ALL', 'THE', 'FOLK', 'ARE', 'ASLEEP', 'FOR', 'THEY', 'REQUIRE', 'SLEEP', 'TO', 'REST', 'THEM', 'OF', 'THEIR', 'FATIGUE'] +2033-164914-0006-217: hyp=['WHAT', 'AILS', 'THEE', 'THEN', 'THAT', 'THOU', 'MUST', 'NEEDS', 'RECITE', 'VERSES', 'SEEING', 'THAT', 'WE', 'ARE', 'TIRED', 'OUT', 'WITH', 'WALKING', 'AND', 'WATCHING', 'AND', 'ALL', 'THE', 'FOLK', 'ARE', 'ASLEEP', 'FOR', 'THEY', 'REQUIRE', 'SLEEP', 'TO', 'REST', 'THEM', 'OF', 'THEIR', 'FATIGUE'] +2033-164914-0007-218: ref=['AND', 'HE', 'ALSO', 'IMPROVISED', 'THE', 'TWO', 'FOLLOWING', 'DISTICHS'] +2033-164914-0007-218: hyp=['AND', 'HE', 'ALSO', 'IMPROVISED', 'THE', 'TWO', 'FOLLOWING', 'DISTICHES'] +2033-164914-0008-219: ref=['WHEN', 'NUZHAT', 'AL', 'ZAMAN', 'HEARD', 'THE', 'FIRST', 'IMPROVISATION', 'SHE', 'CALLED', 'TO', 'MIND', 'HER', 'FATHER', 'AND', 'HER', 'MOTHER', 'AND', 'HER', 'BROTHER', 'AND', 'THEIR', 'WHILOME', 'HOME', 'THEN', 'SHE', 'WEPT', 'AND', 'CRIED', 'AT', 'THE', 'EUNUCH', 'AND', 'SAID', 'TO', 'HIM', 'WOE', 'TO', 'THEE'] +2033-164914-0008-219: hyp=['WHEN', 'NUZHAT', 'AL', 'ZAMAN', 'HEARD', 'THE', 'FIRST', 'IMPROCISATION', 'SHE', 'CALLED', 'TO', 'MIND', 'HER', 'FATHER', 'AND', 'HER', 'MOTHER', 'AND', 'HER', 'BROTHER', 'AND', 'THEIR', 'WILLOW', 'HOME', 'THEN', 'SHE', 'WEPT', 'AND', 'CRIED', 'TO', 'THE', 'EUNUCH', 'AND', 'SAID', 'TO', 'HIM', 'WOE', 'TO', 'THEE'] +2033-164914-0009-220: ref=['HE', 'WHO', 'RECITED', 'THE', 'FIRST', 'TIME', 'HATH', 'RECITED', 'A', 'SECOND', 'TIME', 'AND', 'I', 'HEARD', 'HIM', 'HARD', 'BY'] +2033-164914-0009-220: hyp=['HE', 'WHO', 'RECITED', 'THE', 'FIRST', 'TIME', 'HATH', 'RECITED', 'A', 'SECOND', 'TIME', 'AND', 'I', 'HEARD', 'HIM', 'HARD', 'BY'] +2033-164914-0010-221: ref=['BY', 'ALLAH', 'AN', 'THOU', 'FETCH', 'HIM', 'NOT', 'TO', 'ME', 'I', 'WILL', 'ASSUREDLY', 'ROUSE', 'THE', 'CHAMBERLAIN', 'ON', 'THEE', 'AND', 'HE', 'SHALL', 'BEAT', 'THEE', 'AND', 'CAST', 'THEE', 'OUT'] +2033-164914-0010-221: hyp=['BY', 'ALLAH', 'AN', 'THOU', 'FETCH', 'HIM', 'NOT', 'TO', 'ME', 'I', 'WILL', 'ASSUREDLY', 'ROUSE', 'THE', 'CHAMBERLAIN', 'ON', 'THEE', 'AND', 'HE', 'SHALL', 'BEAT', 'THEE', 'AND', 'CAST', 'THEE', 'OUT'] +2033-164914-0011-222: ref=['BUT', 'TAKE', 'THESE', 'HUNDRED', 'DINERS', 'AND', 'GIVE', 'THEM', 'TO', 'THE', 'SINGER', 'AND', 'BRING', 'HIM', 'TO', 'ME', 'GENTLY', 'AND', 'DO', 'HIM', 'NO', 'HURT'] +2033-164914-0011-222: hyp=['BUT', 'TAKE', 'THESE', 'HUNDRED', 'DINARS', 'AND', 'GIVE', 'THEM', 'TO', 'THE', 'SINGER', 'AND', 'BRING', 'HIM', 'TO', 'ME', 'GENTLY', 'AND', 'DO', 'HIM', 'NO', 'HURT'] +2033-164914-0012-223: ref=['RETURN', 'QUICKLY', 'AND', 'LINGER', 'NOT'] +2033-164914-0012-223: hyp=['RETURN', 'QUICKLY', 'AND', 'LINGER', 'NOT'] +2033-164914-0013-224: ref=['WHEN', 'IT', 'WAS', 'THE', 'SEVENTY', 'THIRD', 'NIGHT'] +2033-164914-0013-224: hyp=['WHEN', 'IT', 'WAS', 'THE', 'SEVENTY', 'THIRD', 'NIGHT'] +2033-164914-0014-225: ref=['BUT', 'THE', 'EUNUCH', 'SAID', 'I', 'WILL', 'NOT', 'LEAVE', 'THEE', 'TILL', 'THOU', 'SHOW', 'ME', 'WHO', 'IT', 'WAS', 'THAT', 'RECITED', 'THE', 'VERSES', 'FOR', 'I', 'DREAD', 'RETURNING', 'TO', 'MY', 'LADY', 'WITHOUT', 'HIM'] +2033-164914-0014-225: hyp=['BUT', 'THE', 'EUNUCH', 'SAID', 'I', 'WILL', 'NOT', 'LEAVE', 'THEE', 'TILL', 'THOU', 'SHOW', 'ME', 'WHO', 'IT', 'WAS', 'THAT', 'RECITED', 'THE', 'VERSES', 'FOR', 'I', 'DREAD', 'RETURNING', 'TO', 'MY', 'LADY', 'WITHOUT', 'HIM'] +2033-164914-0015-226: ref=['NOW', 'WHEN', 'THE', 'FIREMAN', 'HEARD', 'THESE', 'WORDS', 'HE', 'FEARED', 'FOR', 'ZAU', 'AL', 'MAKAN', 'AND', 'WEPT', 'WITH', 'EXCEEDING', 'WEEPING', 'AND', 'SAID', 'TO', 'THE', 'EUNUCH', 'BY', 'ALLAH', 'IT', 'WAS', 'NOT', 'I', 'AND', 'I', 'KNOW', 'HIM', 'NOT'] +2033-164914-0015-226: hyp=['NOW', 'WHEN', 'THE', 'FIREMAN', 'HEARD', 'THESE', 'WORDS', 'HE', 'FEARED', 'FOR', 'ZAU', 'AL', 'MAKAN', 'AND', 'WEPT', 'WITH', 'EXCEEDING', 'WEEPING', 'AND', 'SAID', 'TO', 'THE', 'EUNUCH', 'BY', 'ALLAH', 'IT', 'WAS', 'NOT', 'I', 'AND', 'I', 'KNOW', 'HIM', 'NOT'] +2033-164914-0016-227: ref=['SO', 'GO', 'THOU', 'TO', 'THY', 'STATION', 'AND', 'IF', 'THOU', 'AGAIN', 'MEET', 'ANY', 'ONE', 'AFTER', 'THIS', 'HOUR', 'RECITING', 'AUGHT', 'OF', 'POETRY', 'WHETHER', 'HE', 'BE', 'NEAR', 'OR', 'FAR', 'IT', 'WILL', 'BE', 'I', 'OR', 'SOME', 'ONE', 'I', 'KNOW', 'AND', 'THOU', 'SHALT', 'NOT', 'LEARN', 'OF', 'HIM', 'BUT', 'BY', 'ME'] +2033-164914-0016-227: hyp=['SO', 'GO', 'THOU', 'TO', 'THY', 'STATION', 'AND', 'IF', 'THOU', 'AGAIN', 'MEET', 'ANY', 'ONE', 'AFTER', 'THIS', 'HOUR', 'RECITING', 'AUGHT', 'OF', 'POETRY', 'WHETHER', 'HE', 'BE', 'NEAR', 'OR', 'FAR', 'IT', 'WILL', 'BE', 'I', 'OR', 'SOME', 'ONE', 'I', 'KNOW', 'AND', 'THOU', 'SHALT', 'NOT', 'LEARN', 'OF', 'HIM', 'BUT', 'BY', 'ME'] +2033-164914-0017-228: ref=['THEN', 'HE', 'KISSED', 'THE', "EUNUCH'S", 'HEAD', 'AND', 'SPAKE', 'HIM', 'FAIR', 'TILL', 'HE', 'WENT', 'AWAY', 'BUT', 'THE', 'CASTRATO', 'FETCHED', 'A', 'ROUND', 'AND', 'RETURNING', 'SECRETLY', 'CAME', 'AND', 'STOOD', 'BEHIND', 'THE', 'FIREMAN', 'FEARING', 'TO', 'GO', 'BACK', 'TO', 'HIS', 'MISTRESS', 'WITHOUT', 'TIDINGS'] +2033-164914-0017-228: hyp=['THEN', 'HE', 'KISSED', 'THE', "EUNUCH'S", 'HEAD', 'AND', 'SPAKE', 'HIM', 'FARE', 'TILL', 'HE', 'WENT', 'AWAY', 'BUT', 'THE', 'CASTRATO', 'FETCHED', 'A', 'ROUND', 'AND', 'RETURNING', 'SECRETLY', 'CAME', 'AND', 'STOOD', 'BEHIND', 'THE', 'FIREMAN', 'FEARING', 'TO', 'GO', 'BACK', 'TO', 'HIS', 'MISTRESS', 'WITHOUT', 'TIDINGS'] +2033-164914-0018-229: ref=['I', 'SAY', 'WHAT', 'MADE', 'MY', 'IGNOMY', "WHATE'ER", 'THE', 'BITTER', 'CUP', 'I', 'DRAIN', 'FAR', 'BE', 'FRO', 'ME', 'THAT', 'LAND', 'TO', 'FLEE', 'NOR', 'WILL', 'I', 'BOW', 'TO', 'THOSE', 'WHO', 'BLAME', 'AND', 'FOR', 'SUCH', 'LOVE', 'WOULD', 'DEAL', 'ME', 'SHAME'] +2033-164914-0018-229: hyp=['I', 'SAY', 'WHAT', 'MADE', 'MY', 'IGNOMY', 'WHATEVER', 'THE', 'BITTER', 'CUP', 'I', 'DRAIN', 'FAR', 'BE', 'FROM', 'ME', 'THY', 'LAND', 'TO', 'FLEE', 'NOR', 'WILL', 'I', 'BOW', 'TO', 'THOSE', 'WHO', 'BLAME', 'AND', 'FOR', 'SUCH', 'LOVE', 'WOULD', 'DEAL', 'ME', 'SHAME'] +2033-164914-0019-230: ref=['THEN', 'SAID', 'THE', 'EUNUCH', 'TO', 'ZAU', 'AL', 'MAKAN', 'PEACE', 'BE', 'WITH', 'THEE', 'O', 'MY', 'LORD'] +2033-164914-0019-230: hyp=['THEN', 'SAID', 'THE', 'EUNUCH', 'TO', 'ZAU', 'AL', 'MAKAN', 'PEACE', 'BE', 'WITH', 'THEE', 'O', 'MY', 'LORD'] +2033-164914-0020-231: ref=['O', 'MY', 'LORD', 'CONTINUED', 'THE', 'EUNUCH', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'TO', 'SAY', 'HER', 'PERMITTED', 'SAY'] +2033-164914-0020-231: hyp=['O', 'MY', 'LORD', 'CONTINUED', 'THE', 'EUNUCH', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'TO', 'SAY', 'HER', 'PERMITTED', 'SAY'] +2033-164914-0021-232: ref=['WE', 'WILL', 'DO', 'THEE', 'NO', 'UPRIGHT', 'O', 'MY', 'SON', 'NOR', 'WRONG', 'THEE', 'IN', 'AUGHT', 'BUT', 'OUR', 'OBJECT', 'IS', 'THAT', 'THOU', 'BEND', 'THY', 'GRACIOUS', 'STEPS', 'WITH', 'ME', 'TO', 'MY', 'MISTRESS', 'TO', 'RECEIVE', 'HER', 'ANSWER', 'AND', 'RETURN', 'IN', 'WEAL', 'AND', 'SAFETY', 'AND', 'THOU', 'SHALT', 'HAVE', 'A', 'HANDSOME', 'PRESENT', 'AS', 'ONE', 'WHO', 'BRINGETH', 'GOOD', 'NEWS'] +2033-164914-0021-232: hyp=['WE', 'WILL', 'DO', 'THEE', 'NO', 'UPRIGHT', 'O', 'MY', 'SON', 'NOR', 'WRONG', 'THEE', 'IN', 'AUGHT', 'BUT', 'OUR', 'OBJECT', 'IS', 'THAT', 'THOU', 'BEND', 'THY', 'GRACIOUS', 'STEPS', 'WITH', 'ME', 'TO', 'MY', 'MISTRESS', 'TO', 'RECEIVE', 'HER', 'ANSWER', 'AND', 'RETURN', 'IN', 'WEAL', 'AND', 'SAFETY', 'AND', 'THOU', 'SHALT', 'HAVE', 'A', 'HANDSOME', 'PRESENT', 'AS', 'ONE', 'WHO', 'BRINGETH', 'GOOD', 'NEWS'] +2033-164914-0022-233: ref=['THEN', 'THE', 'EUNUCH', 'WENT', 'OUT', 'TO', 'ZAU', 'AL', 'MAKAN', 'AND', 'SAID', 'TO', 'HIM', 'RECITE', 'WHAT', 'VERSES', 'THOU', 'KNOWEST', 'FOR', 'MY', 'LADY', 'IS', 'HERE', 'HARD', 'BY', 'LISTENING', 'TO', 'THEE', 'AND', 'AFTER', 'I', 'WILL', 'ASK', 'THEE', 'OF', 'THY', 'NAME', 'AND', 'THY', 'NATIVE', 'COUNTRY', 'AND', 'THY', 'CONDITION'] +2033-164914-0022-233: hyp=['THEN', 'THE', 'EUNUCH', 'WENT', 'OUT', 'TO', 'ZAU', 'AL', 'MAKAN', 'AND', 'SAID', 'TO', 'HIM', 'RECITE', 'WHAT', 'VERSES', 'THOU', 'KNOWEST', 'FOR', 'MY', 'LADY', 'IS', 'HERE', 'HARD', 'BY', 'LISTENING', 'TO', 'THEE', 'AND', 'AFTER', 'I', 'WILL', 'ASK', 'THEE', 'OF', 'THY', 'NAME', 'AND', 'THY', 'NATIVE', 'COUNTRY', 'AND', 'THY', 'CONDITION'] +2033-164915-0000-234: ref=['AND', 'ALSO', 'THESE'] +2033-164915-0000-234: hyp=['AND', 'ALSO', 'THESE'] +2033-164915-0001-235: ref=['THEN', 'SHE', 'THREW', 'HERSELF', 'UPON', 'HIM', 'AND', 'HE', 'GATHERED', 'HER', 'TO', 'HIS', 'BOSOM', 'AND', 'THE', 'TWAIN', 'FELL', 'DOWN', 'IN', 'A', 'FAINTING', 'FIT'] +2033-164915-0001-235: hyp=['THEN', 'SHE', 'THREW', 'HERSELF', 'UPON', 'HIM', 'AND', 'HE', 'GATHERED', 'HER', 'TO', 'HIS', 'BOSOM', 'AND', 'THE', 'TWAIN', 'FELL', 'DOWN', 'IN', 'A', 'FAINTING', 'FIT'] +2033-164915-0002-236: ref=['WHEN', 'THE', 'EUNUCH', 'SAW', 'THIS', 'CASE', 'HE', 'WONDERED', 'AT', 'THEM', 'AND', 'THROWING', 'OVER', 'THEM', 'SOMEWHAT', 'TO', 'COVER', 'THEM', 'WAITED', 'TILL', 'THEY', 'SHOULD', 'RECOVER'] +2033-164915-0002-236: hyp=['WHEN', 'THE', 'EUNUCH', 'SAW', 'THIS', 'CASE', 'HE', 'WONDERED', 'AT', 'THEM', 'AND', 'THROWING', 'OVER', 'THEM', 'SOMEWHAT', 'TO', 'COVER', 'THEM', 'WAITED', 'TILL', 'THEY', 'SHOULD', 'RECOVER'] +2033-164915-0003-237: ref=['AFTER', 'A', 'WHILE', 'THEY', 'CAME', 'TO', 'THEMSELVES', 'AND', 'NUZHAT', 'AL', 'ZAMAN', 'REJOICED', 'WITH', 'EXCEEDING', 'JOY', 'OPPRESSION', 'AND', 'DEPRESSION', 'LEFT', 'HER', 'AND', 'GLADNESS', 'TOOK', 'THE', 'MASTERY', 'OF', 'HER', 'AND', 'SHE', 'REPEATED', 'THESE', 'VERSES'] +2033-164915-0003-237: hyp=['AFTER', 'AWHILE', 'THEY', 'CAME', 'TO', 'THEMSELVES', 'AND', 'NUZHAT', 'AL', 'ZAMAN', 'REJOICED', 'WITH', 'EXCEEDING', 'JOY', 'OPPRESSION', 'AND', 'DEPRESSION', 'LEFT', 'HER', 'AND', 'GLADNESS', 'TOOK', 'THE', 'MASTERY', 'OF', 'HER', 'AND', 'SHE', 'REPEATED', 'THESE', 'VERSES'] +2033-164915-0004-238: ref=['ACCORDINGLY', 'SHE', 'TOLD', 'HIM', 'ALL', 'THAT', 'HAD', 'COME', 'TO', 'HER', 'SINCE', 'THEIR', 'SEPARATION', 'AT', 'THE', 'KHAN', 'AND', 'WHAT', 'HAD', 'HAPPENED', 'TO', 'HER', 'WITH', 'THE', 'BADAWI', 'HOW', 'THE', 'MERCHANT', 'HAD', 'BOUGHT', 'HER', 'OF', 'HIM', 'AND', 'HAD', 'TAKEN', 'HER', 'TO', 'HER', 'BROTHER', 'SHARRKAN', 'AND', 'HAD', 'SOLD', 'HER', 'TO', 'HIM', 'HOW', 'HE', 'HAD', 'FREED', 'HER', 'AT', 'THE', 'TIME', 'OF', 'BUYING', 'HOW', 'HE', 'HAD', 'MADE', 'A', 'MARRIAGE', 'CONTRACT', 'WITH', 'HER', 'AND', 'HAD', 'GONE', 'IN', 'TO', 'HER', 'AND', 'HOW', 'THE', 'KING', 'THEIR', 'SIRE', 'HAD', 'SENT', 'AND', 'ASKED', 'FOR', 'HER', 'FROM', 'SHARRKAN'] +2033-164915-0004-238: hyp=['ACCORDINGLY', 'SHE', 'TOLD', 'HIM', 'ALL', 'THAT', 'HAD', 'COME', 'TO', 'HER', 'SINCE', 'THEIR', 'SEPARATION', 'AT', 'THE', 'KHAN', 'AND', 'WHAT', 'HAD', 'HAPPENED', 'TO', 'HER', 'WITH', 'THE', 'BADAWI', 'HOW', 'THE', 'MERCHANT', 'HAD', 'BOUGHT', 'HER', 'OF', 'HIM', 'AND', 'HAD', 'TAKEN', 'HER', 'TO', 'HER', 'BROTHER', 'SHARKAN', 'AND', 'HAD', 'SOLD', 'HER', 'TO', 'HIM', 'HOW', 'HE', 'HAD', 'FREED', 'HER', 'AT', 'THE', 'TIME', 'OF', 'BUYING', 'HOW', 'HE', 'HAD', 'MADE', 'A', 'MARRIAGE', 'CONTRACT', 'WITH', 'HER', 'AND', 'HAD', 'GONE', 'IN', 'TO', 'HER', 'AND', 'HOW', 'THE', 'KING', 'THEIR', 'SIRE', 'HAD', 'SENT', 'AND', 'ASKED', 'FOR', 'HER', 'FROM', 'SHARKAN'] +2033-164915-0005-239: ref=['BUT', 'NOW', 'GO', 'TO', 'THY', 'MASTER', 'AND', 'BRING', 'HIM', 'QUICKLY', 'TO', 'ME'] +2033-164915-0005-239: hyp=['BUT', 'NOW', 'GO', 'TO', 'THY', 'MASTER', 'AND', 'BRING', 'HIM', 'QUICKLY', 'TO', 'ME'] +2033-164915-0006-240: ref=['THE', 'CHAMBERLAIN', 'CALLED', 'THE', 'CASTRATO', 'AND', 'CHARGED', 'HIM', 'TO', 'DO', 'ACCORDINGLY', 'SO', 'HE', 'REPLIED', 'I', 'HEAR', 'AND', 'I', 'OBEY', 'AND', 'HE', 'TOOK', 'HIS', 'PAGES', 'WITH', 'HIM', 'AND', 'WENT', 'OUT', 'IN', 'SEARCH', 'OF', 'THE', 'STOKER', 'TILL', 'HE', 'FOUND', 'HIM', 'IN', 'THE', 'REAR', 'OF', 'THE', 'CARAVAN', 'GIRTHING', 'HIS', 'ASS', 'AND', 'PREPARING', 'FOR', 'FLIGHT'] +2033-164915-0006-240: hyp=['THE', 'CHAMBERLAIN', 'CALLED', 'CASTRATO', 'AND', 'CHARGED', 'HIM', 'TO', 'DO', 'ACCORDINGLY', 'SO', 'HE', 'REPLIED', 'I', 'HEAR', 'AND', 'I', 'OBEY', 'AND', 'HE', 'TOOK', 'HIS', 'PAGES', 'WITH', 'HIM', 'AND', 'WENT', 'OUT', 'IN', 'SEARCH', 'OF', 'THE', 'STALKER', 'TILL', 'HE', 'FOUND', 'HIM', 'IN', 'THE', 'REAR', 'OF', 'THE', 'CARAVAN', 'GIRDING', 'HIS', 'ASS', 'AND', 'PREPARING', 'FOR', 'FLIGHT'] +2033-164915-0007-241: ref=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'WHEN', 'THE', 'STOKER', 'GIRTHED', 'HIS', 'ASS', 'FOR', 'FLIGHT', 'AND', 'BESPAKE', 'HIMSELF', 'SAYING', 'OH', 'WOULD', 'I', 'KNEW', 'WHAT', 'IS', 'BECOME', 'OF', 'HIM'] +2033-164915-0007-241: hyp=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'WHEN', 'THE', 'STALKER', 'GIRDED', 'HIS', 'ASS', 'FOR', 'FLIGHT', 'AND', 'BESPAKE', 'HIMSELF', 'SAYING', 'O', 'WOULD', 'I', 'KNEW', 'WHAT', 'IS', 'BECOME', 'OF', 'HIM'] +2033-164915-0008-242: ref=['I', 'BELIEVE', 'HE', 'HATH', 'DENOUNCED', 'ME', 'TO', 'THE', 'EUNUCH', 'HENCE', 'THESE', 'PAGES', 'ET', 'ABOUT', 'ME', 'AND', 'HE', 'HATH', 'MADE', 'ME', 'AN', 'ACCOMPLICE', 'IN', 'HIS', 'CRIME'] +2033-164915-0008-242: hyp=['I', 'BELIEVE', 'HE', 'HATH', 'DENOUNCED', 'ME', 'TO', 'THE', 'EUNUCH', 'HENCE', 'THESE', 'PAGES', 'AT', 'ABOUT', 'ME', 'AND', 'HE', 'HATH', 'MADE', 'ME', 'AN', 'ACCOMPLICE', 'IN', 'HIS', 'CRIME'] +2033-164915-0009-243: ref=['WHY', 'DIDST', 'THOU', 'SAY', 'I', 'NEVER', 'REPEATED', 'THESE', 'COUPLETS', 'NOR', 'DO', 'I', 'KNOW', 'WHO', 'REPEATED', 'THEM', 'WHEN', 'IT', 'WAS', 'THY', 'COMPANION'] +2033-164915-0009-243: hyp=['WHY', 'DIDST', 'THOU', 'SAY', 'I', 'NEVER', 'REPEATED', 'THESE', 'COUPLETS', 'NOR', 'DO', 'I', 'KNOW', 'WHO', 'REPEATED', 'THEM', 'WHEN', 'IT', 'WAS', 'THY', 'COMPANION'] +2033-164915-0010-244: ref=['BUT', 'NOW', 'I', 'WILL', 'NOT', 'LEAVE', 'THEE', 'BETWEEN', 'THIS', 'PLACE', 'AND', 'BAGHDAD', 'AND', 'WHAT', 'BETIDETH', 'THY', 'COMRADE', 'SHALL', 'BETIDE', 'THEE'] +2033-164915-0010-244: hyp=['BUT', 'NOW', 'I', 'WILL', 'NOT', 'LEAVE', 'THEE', 'BETWEEN', 'THIS', 'PLACE', 'AND', 'BAGHDAD', 'AND', 'WHAT', 'BETIDETH', 'THY', 'COMRADE', 'SHALL', 'BETIDE', 'THEE'] +2033-164915-0011-245: ref=['TWAS', 'AS', 'I', 'FEARED', 'THE', 'COMING', 'ILLS', 'DISCERNING', 'BUT', 'UNTO', 'ALLAH', 'WE', 'ARE', 'ALL', 'RETURNING'] +2033-164915-0011-245: hyp=['TWAS', 'AS', 'I', 'FEARED', 'THE', 'CAMEN', 'EILS', 'DISCERNING', 'BUT', 'UNTO', 'ALLAH', 'WE', 'ARE', 'ALL', 'RETURNING'] +2033-164915-0012-246: ref=['THEN', 'THE', 'EUNUCH', 'CRIED', 'UPON', 'THE', 'PAGES', 'SAYING', 'TAKE', 'HIM', 'OFF', 'THE', 'ASS'] +2033-164915-0012-246: hyp=['THEN', 'THE', 'EUNUCH', 'CRIED', 'UPON', 'THE', 'PAGES', 'SAYING', 'TAKE', 'HIM', 'OFF', 'THE', 'ASS'] +2033-164915-0013-247: ref=['AND', 'HE', 'ANSWERED', 'I', 'AM', 'THE', 'CHAMBERLAIN', 'OF', 'THE', 'EMIR', 'OF', 'DAMASCUS', 'KING', 'SHARRKAN', 'SON', 'OF', 'OMAR', 'BIN', 'AL', "NU'UMAN", 'LORD', 'OF', 'BAGHDAD', 'AND', 'OF', 'THE', 'LAND', 'OF', 'KHORASAN', 'AND', 'I', 'BRING', 'TRIBUTE', 'AND', 'PRESENTS', 'FROM', 'HIM', 'TO', 'HIS', 'FATHER', 'IN', 'BAGHDAD'] +2033-164915-0013-247: hyp=['AND', 'HE', 'ANSWERED', 'I', 'AM', 'THE', 'CHAMBERLAIN', 'OF', 'THE', 'EMIR', 'OF', 'DAMASCUS', 'KING', 'SHARKAN', 'SON', 'OF', 'OMAR', 'BIN', 'AL', 'NUMAN', 'LORD', 'OF', 'BAGHDAD', 'AND', 'OF', 'THE', 'LAND', 'OF', 'KHORASAN', 'AND', 'I', 'BRING', 'TRIBUTE', 'AND', 'PRESENTS', 'FROM', 'HIM', 'TO', 'HIS', 'FATHER', 'IN', 'BAGHDAD'] +2033-164915-0014-248: ref=['SO', 'FARE', 'YE', 'FORWARDS', 'NO', 'HARM', 'SHALL', 'BEFAL', 'YOU', 'TILL', 'YOU', 'JOIN', 'HIS', 'GRAND', 'WAZIR', 'DANDAN'] +2033-164915-0014-248: hyp=['SO', 'FARE', 'YE', 'FORWARDS', 'NO', 'HARM', 'SHALL', 'BEFALL', 'YOU', 'TILL', 'YOU', 'JOIN', 'HIS', 'GRAND', 'WAZIR', 'DANDAN'] +2033-164915-0015-249: ref=['THEN', 'HE', 'BADE', 'HIM', 'BE', 'SEATED', 'AND', 'QUESTIONED', 'HIM', 'AND', 'HE', 'REPLIED', 'THAT', 'HE', 'WAS', 'CHAMBERLAIN', 'TO', 'THE', 'EMIR', 'OF', 'DAMASCUS', 'AND', 'WAS', 'BOUND', 'TO', 'KING', 'OMAR', 'WITH', 'PRESENTS', 'AND', 'THE', 'TRIBUTE', 'OF', 'SYRIA'] +2033-164915-0015-249: hyp=['THEN', 'HE', 'BADE', 'HIM', 'BE', 'SEATED', 'AND', 'QUESTIONED', 'HIM', 'AND', 'HE', 'REPLIED', 'THAT', 'HE', 'WAS', 'CHAMBERLAIN', 'TO', 'THE', 'EMIR', 'OF', 'DAMASCUS', 'AND', 'WAS', 'BOUND', 'TO', 'KING', 'OMAR', 'WITH', 'PRESENTS', 'AND', 'THE', 'TRIBUTE', 'OF', 'SYRIA'] +2033-164915-0016-250: ref=['SO', 'IT', 'WAS', 'AGREED', 'THAT', 'WE', 'GO', 'TO', 'DAMASCUS', 'AND', 'FETCH', 'THENCE', 'THE', "KING'S", 'SON', 'SHARRKAN', 'AND', 'MAKE', 'HIM', 'SULTAN', 'OVER', 'HIS', "FATHER'S", 'REALM'] +2033-164915-0016-250: hyp=['SO', 'IT', 'WAS', 'AGREED', 'THAT', 'WE', 'GO', 'TO', 'DAMASCUS', 'AND', 'FETCH', 'THENCE', 'THE', "KING'S", 'SON', 'SHARRKAN', 'AND', 'MAKE', 'HIM', 'SULTAN', 'OVER', 'HIS', "FATHER'S", 'REALM'] +2033-164915-0017-251: ref=['AND', 'AMONGST', 'THEM', 'WERE', 'SOME', 'WHO', 'WOULD', 'HAVE', 'CHOSEN', 'THE', 'CADET', 'ZAU', 'AL', 'MAKAN', 'FOR', 'QUOTH', 'THEY', 'HIS', 'NAME', 'BE', 'LIGHT', 'OF', 'THE', 'PLACE', 'AND', 'HE', 'HATH', 'A', 'SISTER', 'NUZHAT', 'AL', 'ZAMAN', 'HIGHS', 'THE', 'DELIGHT', 'OF', 'THE', 'TIME', 'BUT', 'THEY', 'SET', 'OUT', 'FIVE', 'YEARS', 'AGO', 'FOR', 'AL', 'HIJAZ', 'AND', 'NONE', 'WOTTETH', 'WHAT', 'IS', 'BECOME', 'OF', 'THEM'] +2033-164915-0017-251: hyp=['AND', 'AMONGST', 'THEM', 'WERE', 'SOME', 'WHO', 'WOULD', 'HAVE', 'CHOSEN', 'THE', 'CADET', 'ZAU', 'AL', 'MAKAN', 'FOR', 'QUOTH', 'THEY', 'HIS', 'NAME', 'BE', 'LIGHT', 'OF', 'THE', 'PLACE', 'AND', 'HE', 'HATH', 'A', 'SISTER', 'NUZHAT', 'AL', 'ZAMAN', 'HIGHS', 'THE', 'DELIGHT', 'OF', 'THE', 'TIME', 'BUT', 'THEY', 'SET', 'OUT', 'FIVE', 'YEARS', 'AGO', 'FOR', 'ALHIJAZ', 'AND', 'NONE', 'WOTTETH', 'WHAT', 'IS', 'BECOME', 'OF', 'THEM'] +2033-164916-0000-252: ref=['SO', 'HE', 'TURNED', 'TO', 'THE', 'WAZIR', 'DANDAN', 'AND', 'SAID', 'TO', 'HIM', 'VERILY', 'YOUR', 'TALE', 'IS', 'A', 'WONDER', 'OF', 'WONDERS'] +2033-164916-0000-252: hyp=['SO', 'HE', 'TURNED', 'TO', 'THE', 'WAZIR', 'DANDAN', 'AND', 'SAID', 'TO', 'HIM', 'VERILY', 'YOUR', 'TALE', 'IS', 'A', 'WONDER', 'OF', 'WONDERS'] +2033-164916-0001-253: ref=['KNOW', 'O', 'CHIEF', 'WAZIR', 'THAT', 'HERE', 'WHERE', 'YOU', 'HAVE', 'ENCOUNTERED', 'ME', 'ALLAH', 'HATH', 'GIVEN', 'YOU', 'REST', 'FROM', 'FATIGUE', 'AND', 'BRINGETH', 'YOU', 'YOUR', 'DESIRE', 'AFTER', 'THE', 'EASIEST', 'OF', 'FASHIONS', 'FOR', 'THAT', 'HIS', 'ALMIGHTY', 'WILL', 'RESTORETH', 'TO', 'YOU', 'ZAU', 'AL', 'MAKAN', 'AND', 'HIS', 'SISTER', 'NUZHAT', 'AL', 'ZAMAN', 'WHEREBY', 'WE', 'WILL', 'SETTLE', 'THE', 'MATTER', 'AS', 'WE', 'EASILY', 'CAN'] +2033-164916-0001-253: hyp=['KNOW', 'O', 'CHIEF', 'WAZIR', 'THAT', 'HERE', 'WHERE', 'YOU', 'HAVE', 'ENCOUNTERED', 'ME', 'ALLAH', 'HATH', 'GIVEN', 'YOU', 'REST', 'FROM', 'FATIGUE', 'AND', 'BRINGETH', 'YOU', 'YOUR', 'DESIRE', 'AFTER', 'THE', 'EASIEST', 'OF', 'FASHIONS', 'FOR', 'THAT', 'HIS', 'ALMIGHTY', 'WILL', 'RESTORETH', 'TO', 'YOU', 'ZAU', 'MAKAN', 'AND', 'HIS', 'SISTER', 'NUZHAT', 'AL', 'ZAMAN', 'WHEREBY', 'WE', 'WILL', 'SETTLE', 'THE', 'MATTER', 'AS', 'WE', 'EASILY', 'CAN'] +2033-164916-0002-254: ref=['WHEN', 'THE', 'MINISTER', 'HEARD', 'THESE', 'WORDS', 'HE', 'REJOICED', 'WITH', 'GREAT', 'JOY', 'AND', 'SAID', 'O', 'CHAMBERLAIN', 'TELL', 'ME', 'THE', 'TALE', 'OF', 'THE', 'TWAIN', 'AND', 'WHAT', 'BEFEL', 'THEM', 'AND', 'THE', 'CAUSE', 'OF', 'THEIR', 'LONG', 'ABSENCE'] +2033-164916-0002-254: hyp=['WHEN', 'THE', 'MINISTER', 'HEARD', 'THESE', 'WORDS', 'HE', 'REJOICED', 'WITH', 'GREAT', 'JOY', 'AND', 'SAID', 'O', 'CHAMBERLAIN', 'TELL', 'ME', 'THE', 'TALE', 'OF', 'THE', 'TWAIN', 'AND', 'WHAT', 'BEFELL', 'THEM', 'AND', 'THE', 'CAUSE', 'OF', 'THEIR', 'LONG', 'ABSENCE'] +2033-164916-0003-255: ref=['ZAU', 'AL', 'MAKAN', 'BOWED', 'HIS', 'HEAD', 'AWHILE', 'AND', 'THEN', 'SAID', 'I', 'ACCEPT', 'THIS', 'POSITION', 'FOR', 'INDEED', 'THERE', 'WAS', 'NO', 'REFUSING', 'AND', 'HE', 'WAS', 'CERTIFIED', 'THAT', 'THE', 'CHAMBERLAIN', 'HAD', 'COUNSELLED', 'HIM', 'WELL', 'AND', 'WISELY', 'AND', 'SET', 'HIM', 'ON', 'THE', 'RIGHT', 'WAY'] +2033-164916-0003-255: hyp=['ZAUAM', 'MAKAN', 'BOWED', 'HIS', 'HEAD', 'AWHILE', 'AND', 'THEN', 'SAID', 'I', 'ACCEPT', 'THE', 'POSITION', 'FOR', 'INDEED', 'THERE', 'WAS', 'NO', 'REFUSING', 'AND', 'HE', 'WAS', 'CERTIFIED', 'THAT', 'THE', 'CHAMBERLAIN', 'HAD', 'COUNSELLED', 'HIM', 'WELL', 'AND', 'WISELY', 'AND', 'SET', 'HIM', 'ON', 'THE', 'RIGHT', 'WAY'] +2033-164916-0004-256: ref=['THEN', 'HE', 'ADDED', 'O', 'MY', 'UNCLE', 'HOW', 'SHALL', 'I', 'DO', 'WITH', 'MY', 'BROTHER', 'SHARRKAN'] +2033-164916-0004-256: hyp=['THEN', 'HE', 'ADDED', 'O', 'MY', 'UNCLE', 'HOW', 'SHALL', 'I', 'DO', 'WITH', 'MY', 'BROTHER', 'SHARKAN'] +2033-164916-0005-257: ref=['AFTER', 'AWHILE', 'THE', 'DUST', 'DISPERSED', 'AND', 'THERE', 'APPEARED', 'UNDER', 'IT', 'THE', 'ARMY', 'OF', 'BAGHDAD', 'AND', 'KHORASAN', 'A', 'CONQUERING', 'HOST', 'LIKE', 'THE', 'FULL', 'TIDE', 'SEA', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'TO', 'SAY', 'HER', 'PERMITTED', 'SAY'] +2033-164916-0005-257: hyp=['AFTER', 'A', 'WHILE', 'THE', 'DUST', 'DISPERSED', 'AND', 'THERE', 'APPEARED', 'UNDER', 'IT', 'THE', 'ARMY', 'OF', 'BAGHDAD', 'AND', 'KHORASAN', 'A', 'CONQUERING', 'HOST', 'LIKE', 'THE', 'FULL', 'TIDE', 'SEA', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'TO', 'SAY', 'HER', 'PERMITTED', 'SAY'] +2033-164916-0006-258: ref=['WHEN', 'IT', 'WAS', 'THE', 'SEVENTY', 'EIGHTH', 'NIGHT'] +2033-164916-0006-258: hyp=['WHEN', 'IT', 'WAS', 'THE', 'SEVENTY', 'EIGHTH', 'NIGHT'] +2033-164916-0007-259: ref=['AND', 'IN', 'IT', 'ALL', 'REJOICED', 'AT', 'THE', 'ACCESSION', 'OF', 'THE', 'LIGHT', 'OF', 'THE', 'PLACE'] +2033-164916-0007-259: hyp=['AND', 'IN', 'IT', 'ALL', 'REJOICED', 'AT', 'THE', 'ACCESSION', 'OF', 'THE', 'LIGHT', 'OF', 'THE', 'PLACE'] +2033-164916-0008-260: ref=['LASTLY', 'THE', 'MINISTER', 'WENT', 'IN', 'AND', 'KISSED', 'THE', 'GROUND', 'BEFORE', 'ZAU', 'AL', 'MAKAN', 'WHO', 'ROSE', 'TO', 'MEET', 'HIM', 'SAYING', 'WELCOME', 'O', 'WAZIR', 'AND', 'SIRE', 'SANS', 'PEER'] +2033-164916-0008-260: hyp=['LASTLY', 'THE', 'MINISTER', 'WENT', 'IN', 'AND', 'KISSED', 'THE', 'GROUND', 'BEFORE', 'ZAU', 'AL', 'MAKAN', 'WHO', 'ROSE', 'TO', 'MEET', 'HIM', 'SAYING', 'WELCOME', 'O', 'WAZIR', 'AND', "SIRE'S", 'SON', 'SPEAR'] +2033-164916-0009-261: ref=['MOREOVER', 'THE', 'SULTAN', 'COMMANDED', 'HIS', 'WAZIR', 'DANDAN', 'CALL', 'A', 'TEN', 'DAYS', 'HALT', 'OF', 'THE', 'ARMY', 'THAT', 'HE', 'MIGHT', 'BE', 'PRIVATE', 'WITH', 'HIM', 'AND', 'LEARN', 'FROM', 'HIM', 'HOW', 'AND', 'WHEREFORE', 'HIS', 'FATHER', 'HAD', 'BEEN', 'SLAIN'] +2033-164916-0009-261: hyp=['MOREOVER', 'THE', 'SULTAN', 'COMMANDED', 'HIS', 'WAZIR', 'DANDAN', 'TO', 'CALL', 'AT', 'TEN', 'DAYS', 'HALT', 'OF', 'THE', 'ARMY', 'THAT', 'HE', 'MIGHT', 'BE', 'PRIVATE', 'WITH', 'HIM', 'AND', 'LEARN', 'FROM', 'HIM', 'HOW', 'AND', 'WHEREFORE', 'HIS', 'FATHER', 'HAD', 'BEEN', 'SLAIN'] +2033-164916-0010-262: ref=['HE', 'THEN', 'REPAIRED', 'TO', 'THE', 'HEART', 'OF', 'THE', 'ENCAMPMENT', 'AND', 'ORDERED', 'THE', 'HOST', 'TO', 'HALT', 'TEN', 'DAYS'] +2033-164916-0010-262: hyp=['HE', 'THEN', 'REPAIRED', 'TO', 'THE', 'HEART', 'OF', 'THE', 'ENCAMPMENT', 'AND', 'ORDERED', 'THE', 'HOST', 'TO', 'HALT', 'TEN', 'DAYS'] +2414-128291-0000-263: ref=['WHAT', 'HATH', 'HAPPENED', 'UNTO', 'ME'] +2414-128291-0000-263: hyp=['WHAT', 'HATH', 'HAPPENED', 'UNTO', 'ME'] +2414-128291-0001-264: ref=['HE', 'ASKED', 'HIMSELF', 'SOMETHING', 'WARM', 'AND', 'LIVING', 'QUICKENETH', 'ME', 'IT', 'MUST', 'BE', 'IN', 'THE', 'NEIGHBOURHOOD'] +2414-128291-0001-264: hyp=['HE', 'ASKED', 'HIMSELF', 'SOMETHING', 'WARM', 'AND', 'LIVING', 'QUICKENETH', 'ME', 'IT', 'MUST', 'BE', 'IN', 'THAT', 'NEIGHBORHOOD'] +2414-128291-0002-265: ref=['WHEN', 'HOWEVER', 'ZARATHUSTRA', 'WAS', 'QUITE', 'NIGH', 'UNTO', 'THEM', 'THEN', 'DID', 'HE', 'HEAR', 'PLAINLY', 'THAT', 'A', 'HUMAN', 'VOICE', 'SPAKE', 'IN', 'THE', 'MIDST', 'OF', 'THE', 'KINE', 'AND', 'APPARENTLY', 'ALL', 'OF', 'THEM', 'HAD', 'TURNED', 'THEIR', 'HEADS', 'TOWARDS', 'THE', 'SPEAKER'] +2414-128291-0002-265: hyp=['WHEN', 'HOWEVER', 'ZARATHUSTRA', 'WAS', 'QUITE', 'NIGH', 'UNTO', 'THEM', 'THEN', 'DID', 'HE', 'HEAR', 'PLAINLY', 'HUMAN', 'VOICE', 'SPAKE', 'IN', 'THE', 'MIDST', 'OF', 'THE', 'KINE', 'AND', 'APPARENTLY', 'ALL', 'OF', 'THEM', 'HAD', 'TURNED', 'THEIR', 'HEADS', 'TOWARDS', 'THE', 'SPEAKER'] +2414-128291-0003-266: ref=['WHAT', 'DO', 'I', 'HERE', 'SEEK'] +2414-128291-0003-266: hyp=['OR', 'DO', 'I', 'HERE', 'SEEK'] +2414-128291-0004-267: ref=['ANSWERED', 'HE', 'THE', 'SAME', 'THAT', 'THOU', 'SEEKEST', 'THOU', 'MISCHIEF', 'MAKER', 'THAT', 'IS', 'TO', 'SAY', 'HAPPINESS', 'UPON', 'EARTH'] +2414-128291-0004-267: hyp=['ANSWERED', 'HE', 'THE', 'SAME', 'THAT', 'THOU', 'SEEKEST', 'THOU', 'MISCHIEF', 'MAKER', 'THAT', 'IS', 'TO', 'SAY', 'HAPPINESS', 'UPON', 'EARTH'] +2414-128291-0005-268: ref=['FOR', 'I', 'TELL', 'THEE', 'THAT', 'I', 'HAVE', 'ALREADY', 'TALKED', 'HALF', 'A', 'MORNING', 'UNTO', 'THEM', 'AND', 'JUST', 'NOW', 'WERE', 'THEY', 'ABOUT', 'TO', 'GIVE', 'ME', 'THEIR', 'ANSWER'] +2414-128291-0005-268: hyp=['FOR', 'I', 'TELL', 'THEE', 'THAT', 'I', 'HAVE', 'ALREADY', 'TALKED', 'HALF', 'A', 'MORNING', 'UNTO', 'THEM', 'AND', 'JUST', 'NOW', 'WERE', 'THEY', 'ABOUT', 'TO', 'GIVE', 'ME', 'THEIR', 'ANSWER'] +2414-128291-0006-269: ref=['HE', 'WOULD', 'NOT', 'BE', 'RID', 'OF', 'HIS', 'AFFLICTION'] +2414-128291-0006-269: hyp=['HE', 'WOULD', 'NOT', 'BE', 'RID', 'OF', 'HIS', 'AFFLICTION'] +2414-128291-0007-270: ref=['WHO', 'HATH', 'NOT', 'AT', 'PRESENT', 'HIS', 'HEART', 'HIS', 'MOUTH', 'AND', 'HIS', 'EYES', 'FULL', 'OF', 'DISGUST'] +2414-128291-0007-270: hyp=['WHO', 'HATH', 'NOT', 'AT', 'PRESENT', 'HIS', 'HEART', 'HIS', 'MOUTH', 'AND', 'HIS', 'EYES', 'FULL', 'OF', 'DISGUST'] +2414-128291-0008-271: ref=['THOU', 'ALSO', 'THOU', 'ALSO'] +2414-128291-0008-271: hyp=['THOU', 'ALSO', 'THOU', 'ALSO'] +2414-128291-0009-272: ref=['BUT', 'BEHOLD', 'THESE', 'KINE'] +2414-128291-0009-272: hyp=['BUT', 'BEHOLD', 'THIS', 'KIND'] +2414-128291-0010-273: ref=['THE', 'KINE', 'HOWEVER', 'GAZED', 'AT', 'IT', 'ALL', 'AND', 'WONDERED'] +2414-128291-0010-273: hyp=['THE', 'KIND', 'HOWEVER', 'GAZED', 'AT', 'IT', 'ALL', 'AND', 'WONDERED'] +2414-128291-0011-274: ref=['WANTON', 'AVIDITY', 'BILIOUS', 'ENVY', 'CAREWORN', 'REVENGE', 'POPULACE', 'PRIDE', 'ALL', 'THESE', 'STRUCK', 'MINE', 'EYE'] +2414-128291-0011-274: hyp=['WANTON', 'AID', 'DUTY', 'BILIOUS', 'ENVY', 'CAREWORN', 'REVENGE', 'POPULOUS', 'PRIDE', 'ALL', 'ALWAYS', 'STRUCK', 'MINE', 'EYE'] +2414-128291-0012-275: ref=['IT', 'IS', 'NO', 'LONGER', 'TRUE', 'THAT', 'THE', 'POOR', 'ARE', 'BLESSED'] +2414-128291-0012-275: hyp=['IT', 'IS', 'NO', 'LONGER', 'TRUE', 'THAT', 'THE', 'POOR', 'ARE', 'BLESSED'] +2414-128291-0013-276: ref=['THE', 'KINGDOM', 'OF', 'HEAVEN', 'HOWEVER', 'IS', 'WITH', 'THE', 'KINE', 'AND', 'WHY', 'IS', 'IT', 'NOT', 'WITH', 'THE', 'RICH'] +2414-128291-0013-276: hyp=['THE', 'KINGDOM', 'OF', 'HEAVEN', 'HOWEVER', 'IS', 'WITH', 'THE', 'KIND', 'AND', 'WHY', 'IS', 'IT', 'NOT', 'WITH', 'THE', 'RICH'] +2414-128291-0014-277: ref=['WHY', 'DOST', 'THOU', 'TEMPT', 'ME'] +2414-128291-0014-277: hyp=['WHY', 'DOST', 'THOU', 'TEMPT', 'ME'] +2414-128291-0015-278: ref=['ANSWERED', 'THE', 'OTHER'] +2414-128291-0015-278: hyp=['ANSWERED', 'THE', 'OTHER'] +2414-128291-0016-279: ref=['THOU', 'KNOWEST', 'IT', 'THYSELF', 'BETTER', 'EVEN', 'THAN', 'I'] +2414-128291-0016-279: hyp=['THOU', 'KNOWEST', 'IT', 'THYSELF', 'BETTER', 'EVEN', 'THAN', 'I'] +2414-128291-0017-280: ref=['THUS', 'SPAKE', 'THE', 'PEACEFUL', 'ONE', 'AND', 'PUFFED', 'HIMSELF', 'AND', 'PERSPIRED', 'WITH', 'HIS', 'WORDS', 'SO', 'THAT', 'THE', 'KINE', 'WONDERED', 'ANEW'] +2414-128291-0017-280: hyp=['THUS', 'SPAKE', 'THE', 'B', 'YOUTHFUL', 'ONE', 'AND', 'PUFFED', 'HIMSELF', 'AND', 'PERSPIRED', 'WITH', 'HIS', 'WORDS', 'SO', 'IN', 'THE', 'KIND', 'WANDERED', 'ANEW'] +2414-128291-0018-281: ref=['THOU', 'DOEST', 'VIOLENCE', 'TO', 'THYSELF', 'THOU', 'PREACHER', 'ON', 'THE', 'MOUNT', 'WHEN', 'THOU', 'USEST', 'SUCH', 'SEVERE', 'WORDS'] +2414-128291-0018-281: hyp=['THOU', 'DOEST', 'VICE', 'TO', 'THYSELF', 'THOU', 'PREACHER', 'OF', 'THE', 'MOUNT', 'AND', 'THOU', 'USEST', 'SUCH', 'SEVERE', 'WORDS'] +2414-128291-0019-282: ref=['THEY', 'ALSO', 'ABSTAIN', 'FROM', 'ALL', 'HEAVY', 'THOUGHTS', 'WHICH', 'INFLATE', 'THE', 'HEART'] +2414-128291-0019-282: hyp=['THEY', 'ALSO', 'ABSTAINED', 'FROM', 'ALL', 'HEAVY', 'THOUGHTS', 'WHICH', 'INFLATE', 'THE', 'HEART'] +2414-128291-0020-283: ref=['WELL'] +2414-128291-0020-283: hyp=['WELL'] +2414-128291-0021-284: ref=['SAID', 'ZARATHUSTRA', 'THOU', 'SHOULDST', 'ALSO', 'SEE', 'MINE', 'ANIMALS', 'MINE', 'EAGLE', 'AND', 'MY', 'SERPENT', 'THEIR', 'LIKE', 'DO', 'NOT', 'AT', 'PRESENT', 'EXIST', 'ON', 'EARTH'] +2414-128291-0021-284: hyp=['SAID', 'ZARATHUSTRA', 'THOU', 'SHOULDST', 'ALSO', 'SEE', 'MINE', 'ANIMALS', 'MINE', 'EAGLE', 'AND', 'MY', 'SERPENT', 'THEY', 'ALIKE', 'DO', 'NOT', 'AT', 'PRESENT', 'EXIST', 'ON', 'EARTH'] +2414-128291-0022-285: ref=['AND', 'TALK', 'TO', 'MINE', 'ANIMALS', 'OF', 'THE', 'HAPPINESS', 'OF', 'ANIMALS'] +2414-128291-0022-285: hyp=['AND', 'TALKED', 'TO', 'MINE', 'ANIMALS', 'OF', 'THE', 'HAPPINESS', 'OF', 'ANIMALS'] +2414-128291-0023-286: ref=['NOW', 'HOWEVER', 'TAKE', 'LEAVE', 'AT', 'ONCE', 'OF', 'THY', 'KINE', 'THOU', 'STRANGE', 'ONE'] +2414-128291-0023-286: hyp=['NOW', 'HOWEVER', 'TAKE', 'LEAVE', 'AT', 'ONCE', 'OF', 'THY', 'KIND', 'THOU', 'STRANGE', 'ONE'] +2414-128291-0024-287: ref=['THOU', 'AMIABLE', 'ONE'] +2414-128291-0024-287: hyp=['THOU', 'AMIABLE', 'ONE'] +2414-128291-0025-288: ref=['FOR', 'THEY', 'ARE', 'THY', 'WARMEST', 'FRIENDS', 'AND', 'PRECEPTORS'] +2414-128291-0025-288: hyp=['FOR', 'THEY', 'ARE', 'THY', 'WARMEST', 'FRIENDS', 'AND', 'PERSEPTORS'] +2414-128291-0026-289: ref=['THOU', 'EVIL', 'FLATTERER'] +2414-128291-0026-289: hyp=['THOU', 'AVIOUS', 'FLATTERER'] +2414-128292-0000-290: ref=['WHITHER', 'HATH', 'MY', 'LONESOMENESS', 'GONE', 'SPAKE', 'HE'] +2414-128292-0000-290: hyp=['WHITHER', 'HATH', 'MY', 'LONESOMENESS', 'GONE', 'SPAKE', 'HE'] +2414-128292-0001-291: ref=['MY', 'SHADOW', 'CALLETH', 'ME'] +2414-128292-0001-291: hyp=['MY', 'SHADOW', 'CALLETH', 'ME'] +2414-128292-0002-292: ref=['WHAT', 'MATTER', 'ABOUT', 'MY', 'SHADOW'] +2414-128292-0002-292: hyp=['WHAT', 'MATTER', 'ABOUT', 'MY', 'SHADOW'] +2414-128292-0003-293: ref=['LET', 'IT', 'RUN', 'AFTER', 'ME', 'I', 'RUN', 'AWAY', 'FROM', 'IT'] +2414-128292-0003-293: hyp=['LET', 'IT', 'RUN', 'AFTER', 'ME', 'I', 'RAN', 'AWAY', 'FROM', 'IT'] +2414-128292-0004-294: ref=['THUS', 'SPAKE', 'ZARATHUSTRA', 'TO', 'HIS', 'HEART', 'AND', 'RAN', 'AWAY'] +2414-128292-0004-294: hyp=['THUS', 'SAYING', 'THE', 'TWO', 'STRIKE', 'TO', 'HIS', 'HEART', 'AND', 'RAN', 'AWAY'] +2414-128292-0005-295: ref=['VERILY', 'MY', 'FOLLY', 'HATH', 'GROWN', 'BIG', 'IN', 'THE', 'MOUNTAINS'] +2414-128292-0005-295: hyp=['VERILY', 'MY', 'FOLLY', 'HATH', 'GROWN', 'BIG', 'IN', 'THE', 'MOUNTAINS'] +2414-128292-0006-296: ref=['NOW', 'DO', 'I', 'HEAR', 'SIX', 'OLD', 'FOOLS', 'LEGS', 'RATTLING', 'BEHIND', 'ONE', 'ANOTHER'] +2414-128292-0006-296: hyp=['NOW', 'DO', 'I', 'HEAR', 'SIX', 'OLD', "FOOL'S", 'LEGS', 'RATTLING', 'BEHIND', 'ONE', 'ANOTHER'] +2414-128292-0007-297: ref=['BUT', 'DOTH', 'ZARATHUSTRA', 'NEED', 'TO', 'BE', 'FRIGHTENED', 'BY', 'HIS', 'SHADOW'] +2414-128292-0007-297: hyp=['BUT', 'DO', 'ZARATHUSTRA', 'NEED', 'TO', 'BE', 'FRIGHTENED', 'BY', 'HIS', 'SHADOW'] +2414-128292-0008-298: ref=['ALSO', 'METHINKETH', 'THAT', 'AFTER', 'ALL', 'IT', 'HATH', 'LONGER', 'LEGS', 'THAN', 'MINE'] +2414-128292-0008-298: hyp=['ALSO', 'METHINK', 'IT', 'THAT', 'AFTER', 'ALL', 'IT', 'HATH', 'LONGER', 'LESS', 'THAN', 'MINE'] +2414-128292-0009-299: ref=['FOR', 'WHEN', 'ZARATHUSTRA', 'SCRUTINISED', 'HIM', 'WITH', 'HIS', 'GLANCE', 'HE', 'WAS', 'FRIGHTENED', 'AS', 'BY', 'A', 'SUDDEN', 'APPARITION', 'SO', 'SLENDER', 'SWARTHY', 'HOLLOW', 'AND', 'WORN', 'OUT', 'DID', 'THIS', 'FOLLOWER', 'APPEAR'] +2414-128292-0009-299: hyp=['FOR', 'WHEN', 'THEIR', 'TOESTRAS', 'SCRUTINIZED', 'HIM', 'WITH', 'HIS', 'GLANCE', 'HE', 'WAS', 'FRIGHTENED', 'AS', 'BY', 'A', 'SUDDEN', 'APPARITION', 'SO', 'SLENDER', 'SWARTHY', 'HOLLOW', 'AND', 'WORN', 'OUT', 'DID', 'HIS', 'FOLLOWER', 'APPEAR'] +2414-128292-0010-300: ref=['ASKED', 'ZARATHUSTRA', 'VEHEMENTLY', 'WHAT', 'DOEST', 'THOU', 'HERE'] +2414-128292-0010-300: hyp=['ASKED', 'THEIR', 'T', 'EXTRA', 'VEHEMENTLY', 'WHAT', 'DOEST', 'THOU', 'HERE'] +2414-128292-0011-301: ref=['AND', 'WHY', 'CALLEST', 'THOU', 'THYSELF', 'MY', 'SHADOW'] +2414-128292-0011-301: hyp=['AND', 'WHY', 'CALLEST', 'THOU', 'THYSELF', 'MY', 'SHADOW'] +2414-128292-0012-302: ref=['THOU', 'ART', 'NOT', 'PLEASING', 'UNTO', 'ME'] +2414-128292-0012-302: hyp=['THOU', 'ART', 'NOT', 'PLEASING', 'UNTO', 'ME'] +2414-128292-0013-303: ref=['MUST', 'I', 'EVER', 'BE', 'ON', 'THE', 'WAY'] +2414-128292-0013-303: hyp=['MUST', 'I', 'EVER', 'BE', 'ON', 'THE', 'WAY'] +2414-128292-0014-304: ref=['O', 'EARTH', 'THOU', 'HAST', 'BECOME', 'TOO', 'ROUND', 'FOR', 'ME'] +2414-128292-0014-304: hyp=['O', 'ART', 'THOU', 'HAST', 'BECOME', 'TOO', 'ROUND', 'FOR', 'ME'] +2414-128292-0015-305: ref=['WHEN', 'THE', 'DEVIL', 'CASTETH', 'HIS', 'SKIN', 'DOTH', 'NOT', 'HIS', 'NAME', 'ALSO', 'FALL', 'AWAY', 'IT', 'IS', 'ALSO', 'SKIN'] +2414-128292-0015-305: hyp=['WHEN', 'THE', 'DEVIL', 'CASTETH', 'HIS', 'SKIN', 'DOTH', 'NOT', 'HIS', 'NAME', 'ALSO', 'FALL', 'AWAY', 'IT', 'IS', 'ALSO', 'SKINNED'] +2414-128292-0016-306: ref=['THE', 'DEVIL', 'HIMSELF', 'IS', 'PERHAPS', 'SKIN'] +2414-128292-0016-306: hyp=['THE', 'DEVIL', 'HIMSELF', 'IS', 'PERHAPS', 'KIN'] +2414-128292-0017-307: ref=['SOMETIMES', 'I', 'MEANT', 'TO', 'LIE', 'AND', 'BEHOLD'] +2414-128292-0017-307: hyp=['SOMETIMES', 'I', 'MEANT', 'TO', 'LIE', 'AND', 'BEHOLD'] +2414-128292-0018-308: ref=['THEN', 'ONLY', 'DID', 'I', 'HIT', 'THE', 'TRUTH'] +2414-128292-0018-308: hyp=['THE', 'NO', 'ONLY', 'DID', 'I', 'HATE', 'THAT', 'TRUTH'] +2414-128292-0019-309: ref=['HOW', 'HAVE', 'I', 'STILL', 'INCLINATION'] +2414-128292-0019-309: hyp=['HOW', 'HAVE', 'I', 'STILL', 'INCLINATIONS'] +2414-128292-0020-310: ref=['HAVE', 'I', 'STILL', 'A', 'GOAL'] +2414-128292-0020-310: hyp=['AM', 'I', 'STILL', 'A', 'GOLD'] +2414-128292-0021-311: ref=['A', 'HAVEN', 'TOWARDS', 'WHICH', 'MY', 'SAIL', 'IS', 'SET'] +2414-128292-0021-311: hyp=['A', 'HAVEN', 'TOWARDS', 'WHICH', 'MY', 'SAILORS', 'SET'] +2414-128292-0022-312: ref=['FOR', 'IT', 'DO', 'I', 'ASK', 'AND', 'SEEK', 'AND', 'HAVE', 'SOUGHT', 'BUT', 'HAVE', 'NOT', 'FOUND', 'IT'] +2414-128292-0022-312: hyp=['FOR', 'IT', 'TOO', 'I', 'ASK', 'AND', 'SEEK', 'AND', 'HAVE', 'SOUGHT', 'BUT', 'HAVE', 'NOT', 'FOUND', 'IT'] +2414-128292-0023-313: ref=['O', 'ETERNAL', 'EVERYWHERE', 'O', 'ETERNAL', 'NOWHERE', 'O', 'ETERNAL', 'IN', 'VAIN'] +2414-128292-0023-313: hyp=['OR', 'ETERNAL', 'EVERYWHERE', 'OR', 'E', 'TURNED', 'NOWHERE', 'OR', 'TURNEDETH', 'IN', 'VAIN'] +2414-128292-0024-314: ref=['THOU', 'ART', 'MY', 'SHADOW'] +2414-128292-0024-314: hyp=['THOU', 'ART', 'MY', 'SHADOW'] +2414-128292-0025-315: ref=['SAID', 'HE', 'AT', 'LAST', 'SADLY'] +2414-128292-0025-315: hyp=['SAID', 'HE', 'AT', 'LAST', 'SADLY'] +2414-128292-0026-316: ref=['THY', 'DANGER', 'IS', 'NOT', 'SMALL', 'THOU', 'FREE', 'SPIRIT', 'AND', 'WANDERER'] +2414-128292-0026-316: hyp=['THY', 'DANGER', 'IS', 'BUT', 'SMALL', 'THOU', 'FREE', 'SPIRIT', 'AND', 'WANDERER'] +2414-128292-0027-317: ref=['THEY', 'SLEEP', 'QUIETLY', 'THEY', 'ENJOY', 'THEIR', 'NEW', 'SECURITY'] +2414-128292-0027-317: hyp=['THEY', 'SLEEP', 'QUIETLY', 'THEY', 'ENJOY', 'THEIR', 'NEW', 'SECURITY'] +2414-128292-0028-318: ref=['BEWARE', 'LEST', 'IN', 'THE', 'END', 'A', 'NARROW', 'FAITH', 'CAPTURE', 'THEE', 'A', 'HARD', 'RIGOROUS', 'DELUSION'] +2414-128292-0028-318: hyp=['BEWARE', 'LEST', 'IN', 'THE', 'END', 'A', 'NARROW', 'FAITH', 'CAPTURE', 'THEE', 'A', 'HARD', 'REGOROUS', 'DELUSION'] +2414-128292-0029-319: ref=['FOR', 'NOW', 'EVERYTHING', 'THAT', 'IS', 'NARROW', 'AND', 'FIXED', 'SEDUCETH', 'AND', 'TEMPTETH', 'THEE'] +2414-128292-0029-319: hyp=['FOR', 'NOW', 'EVERYTHING', 'THAT', 'IS', 'NARROW', 'AND', 'FIXED', 'SEDUCETH', 'AND', 'TEMPTETH', 'THEE'] +2414-128292-0030-320: ref=['THOU', 'HAST', 'LOST', 'THY', 'GOAL'] +2414-128292-0030-320: hyp=['THOU', 'HAST', 'LOST', 'THY', 'GOLD'] +2414-128292-0031-321: ref=['THOU', 'POOR', 'ROVER', 'AND', 'RAMBLER', 'THOU', 'TIRED', 'BUTTERFLY'] +2414-128292-0031-321: hyp=['THOU', 'POOR', 'ROVER', 'AND', 'RAMBLER', 'THOU', 'TIRED', 'BUT', 'TO', 'FLY'] +2414-128292-0032-322: ref=['WILT', 'THOU', 'HAVE', 'A', 'REST', 'AND', 'A', 'HOME', 'THIS', 'EVENING'] +2414-128292-0032-322: hyp=['WILT', 'THOU', 'HAVE', 'A', 'REST', 'AND', 'A', 'HOME', 'THIS', 'EVENING'] +2414-159411-0000-323: ref=['ONCE', 'UPON', 'A', 'TIME', 'A', 'BRAHMAN', 'WHO', 'WAS', 'WALKING', 'ALONG', 'THE', 'ROAD', 'CAME', 'UPON', 'AN', 'IRON', 'CAGE', 'IN', 'WHICH', 'A', 'GREAT', 'TIGER', 'HAD', 'BEEN', 'SHUT', 'UP', 'BY', 'THE', 'VILLAGERS', 'WHO', 'CAUGHT', 'HIM'] +2414-159411-0000-323: hyp=['ONCE', 'UPON', 'A', 'TIME', 'A', 'BRAHMAN', 'WHO', 'WAS', 'WALKING', 'ALONG', 'THE', 'ROAD', 'CAME', 'UPON', 'AN', 'IRON', 'CAGE', 'IN', 'WHICH', 'A', 'GREAT', 'TIGER', 'HAD', 'BEEN', 'SHUT', 'UP', 'BY', 'THE', 'VILLAGERS', 'WHO', 'CAUGHT', 'HIM'] +2414-159411-0001-324: ref=['THE', 'BRAHMAN', 'ANSWERED', 'NO', 'I', 'WILL', 'NOT', 'FOR', 'IF', 'I', 'LET', 'YOU', 'OUT', 'OF', 'THE', 'CAGE', 'YOU', 'WILL', 'EAT', 'ME'] +2414-159411-0001-324: hyp=['THE', 'BRAHMAN', 'ANSWERED', 'NO', 'I', 'WILL', 'NOT', 'FOR', 'IF', 'I', 'LET', 'YOU', 'OUT', 'OF', 'THE', 'CAGE', 'YOU', 'WILL', 'EAT', 'ME'] +2414-159411-0002-325: ref=['OH', 'FATHER', 'OF', 'MERCY', 'ANSWERED', 'THE', 'TIGER', 'IN', 'TRUTH', 'THAT', 'I', 'WILL', 'NOT'] +2414-159411-0002-325: hyp=['O', 'FATHER', 'OF', 'MERCY', 'ANSWERED', 'THE', 'TIGER', 'IN', 'TRUTH', 'THAT', 'I', 'WILL', 'NOT'] +2414-159411-0003-326: ref=['I', 'WILL', 'NEVER', 'BE', 'SO', 'UNGRATEFUL', 'ONLY', 'LET', 'ME', 'OUT', 'THAT', 'I', 'MAY', 'DRINK', 'SOME', 'WATER', 'AND', 'RETURN'] +2414-159411-0003-326: hyp=['I', 'WILL', 'NEVER', 'BE', 'SO', 'UNGRATEFUL', 'ONLY', 'LET', 'ME', 'OUT', 'THAT', 'I', 'MAY', 'DRINK', 'SOME', 'WATER', 'AND', 'RETURN'] +2414-159411-0004-327: ref=['THEN', 'THE', 'BRAHMAN', 'TOOK', 'PITY', 'ON', 'HIM', 'AND', 'OPENED', 'THE', 'CAGE', 'DOOR', 'BUT', 'NO', 'SOONER', 'HAD', 'HE', 'DONE', 'SO', 'THAN', 'THE', 'TIGER', 'JUMPING', 'OUT', 'SAID', 'NOW', 'I', 'WILL', 'EAT', 'YOU', 'FIRST', 'AND', 'DRINK', 'THE', 'WATER', 'AFTERWARDS'] +2414-159411-0004-327: hyp=['THEN', 'THE', 'BRAHMAN', 'TOOK', 'PITY', 'ON', 'HIM', 'AND', 'OPENED', 'THE', 'CAGE', 'DOOR', 'BUT', 'NO', 'SOONER', 'HAD', 'HE', 'TURNED', 'SO', 'THAN', 'THE', 'TIGER', 'JUMPING', 'OUT', 'SAID', 'NOW', 'I', 'WILL', 'EAT', 'YOU', 'FIRST', 'AND', 'DRINK', 'THE', 'WATER', 'AFTERWARDS'] +2414-159411-0005-328: ref=['SO', 'THE', 'BRAHMAN', 'AND', 'THE', 'TIGER', 'WALKED', 'ON', 'TILL', 'THEY', 'CAME', 'TO', 'A', 'BANYAN', 'TREE', 'AND', 'THE', 'BRAHMAN', 'SAID', 'TO', 'IT', 'BANYAN', 'TREE', 'BANYAN', 'TREE', 'HEAR', 'AND', 'GIVE', 'JUDGMENT'] +2414-159411-0005-328: hyp=['SO', 'THE', 'BRAHMAN', 'AND', 'THE', 'TIGER', 'WALKED', 'ON', 'TILL', 'THEY', 'CAME', 'TO', 'A', 'BANDON', 'TREE', 'AND', 'THE', 'BRAHMAN', 'SAID', 'TO', 'IT', 'BANNON', 'TREE', 'BANNON', 'TREE', 'HEAR', 'AND', 'GIVE', 'JURGMENT'] +2414-159411-0006-329: ref=['ON', 'WHAT', 'MUST', 'I', 'GIVE', 'JUDGMENT', 'ASKED', 'THE', 'BANYAN', 'TREE'] +2414-159411-0006-329: hyp=['ON', 'WHAT', 'MUST', 'I', 'GIVE', 'JUDGMENT', 'ASKED', 'THE', 'BANDED', 'TREE'] +2414-159411-0007-330: ref=['THIS', 'TIGER', 'SAID', 'THE', 'BRAHMAN', 'BEGGED', 'ME', 'TO', 'LET', 'HIM', 'OUT', 'OF', 'HIS', 'CAGE', 'TO', 'DRINK', 'A', 'LITTLE', 'WATER', 'AND', 'HE', 'PROMISED', 'NOT', 'TO', 'HURT', 'ME', 'IF', 'I', 'DID', 'SO', 'BUT', 'NOW', 'THAT', 'I', 'HAVE', 'LET', 'HIM', 'OUT', 'HE', 'WISHES', 'TO', 'EAT', 'ME'] +2414-159411-0007-330: hyp=['THIS', 'TIGER', 'SAID', 'THE', 'BRAHMAN', 'BEGGED', 'ME', 'TO', 'LET', 'HIM', 'OUT', 'OF', 'HIS', 'CAGE', 'TO', 'DRINK', 'A', 'LITTLE', 'WATER', 'AND', 'HE', 'PROMISED', 'NOT', 'TO', 'HURT', 'ME', 'IF', 'I', 'DID', 'SO', 'BUT', 'NOW', 'THAT', 'I', 'HAVE', 'LEFT', 'HIM', 'OUT', 'HE', 'WISHES', 'TO', 'EAT', 'ME'] +2414-159411-0008-331: ref=['IS', 'IT', 'JUST', 'THAT', 'HE', 'SHOULD', 'DO', 'SO', 'OR', 'NO'] +2414-159411-0008-331: hyp=['IT', 'IS', 'JEALOUS', 'THAT', 'HE', 'SHOULD', 'DO', 'SO', 'I', 'KNOW'] +2414-159411-0009-332: ref=['LET', 'THE', 'TIGER', 'EAT', 'THE', 'MAN', 'FOR', 'MEN', 'ARE', 'AN', 'UNGRATEFUL', 'RACE'] +2414-159411-0009-332: hyp=['LET', 'THE', 'TIGER', 'EAT', 'THE', 'MAN', 'FOR', 'MEN', 'ARE', 'AN', 'UNGRATEFUL', 'RACE'] +2414-159411-0010-333: ref=['SIR', 'CAMEL', 'SIR', 'CAMEL', 'CRIED', 'THE', 'BRAHMAN', 'HEAR', 'AND', 'GIVE', 'JUDGMENT'] +2414-159411-0010-333: hyp=['SIR', 'CAMEL', 'SIR', 'CAMEL', 'CRIED', 'THE', 'BRAHMAN', 'HEAR', 'AND', 'GIVE', 'JUDGMENT'] +2414-159411-0011-334: ref=['AT', 'A', 'LITTLE', 'DISTANCE', 'THEY', 'FOUND', 'A', 'BULLOCK', 'LYING', 'BY', 'THE', 'ROADSIDE'] +2414-159411-0011-334: hyp=['AT', 'A', 'LITTLE', 'DISTANCE', 'THEY', 'FOUND', 'A', 'BULLOCK', 'LYING', 'BY', 'THE', 'ROADSIDE'] +2414-159411-0012-335: ref=['IS', 'IT', 'FAIR', 'THAT', 'HE', 'SHOULD', 'DO', 'SO', 'OR', 'NOT'] +2414-159411-0012-335: hyp=['IS', 'IT', 'FAIR', 'THAT', 'HE', 'SHOULD', 'DO', 'SO', 'OR', 'NOT'] +2414-159411-0013-336: ref=['LET', 'THE', 'TIGER', 'EAT', 'THE', 'MAN', 'FOR', 'MEN', 'HAVE', 'NO', 'PITY'] +2414-159411-0013-336: hyp=['LET', 'THE', 'TIGER', 'EAT', 'THE', 'MAN', 'FOR', 'MEN', 'HAVE', 'NO', 'PITY'] +2414-159411-0014-337: ref=['THREE', 'OUT', 'OF', 'THE', 'SIX', 'HAD', 'GIVEN', 'JUDGMENT', 'AGAINST', 'THE', 'BRAHMAN', 'BUT', 'STILL', 'HE', 'DID', 'NOT', 'LOSE', 'ALL', 'HOPE', 'AND', 'DETERMINED', 'TO', 'ASK', 'THE', 'OTHER', 'THREE'] +2414-159411-0014-337: hyp=['THREE', 'OUT', 'OF', 'THE', 'SIX', 'HAD', 'KEEPN', 'JUDGMENT', 'AGAINST', 'THE', 'BRAHMAN', 'BUT', 'STILL', 'HE', 'DID', 'NOT', 'LOSE', 'ALL', 'HOPE', 'AND', 'DETERMINED', 'TO', 'ASK', 'THE', 'OTHER', 'THREE'] +2414-159411-0015-338: ref=['ON', 'WHAT', 'MUST', 'I', 'GIVE', 'JUDGMENT', 'ASKED', 'THE', 'EAGLE'] +2414-159411-0015-338: hyp=['ON', 'WHAT', 'MUST', 'I', 'GIVE', 'JUDGMENT', 'ASKED', 'THE', 'EAGLE'] +2414-159411-0016-339: ref=['THE', 'BRAHMAN', 'STATED', 'THE', 'CASE', 'AND', 'THE', 'EAGLE', 'ANSWERED', 'WHENEVER', 'MEN', 'SEE', 'ME', 'THEY', 'TRY', 'TO', 'SHOOT', 'ME', 'THEY', 'CLIMB', 'THE', 'ROCKS', 'AND', 'STEAL', 'AWAY', 'MY', 'LITTLE', 'ONES'] +2414-159411-0016-339: hyp=['THE', 'BRAHMIN', 'STATED', 'THE', 'CASE', 'AND', 'THE', 'EAGLE', 'ANSWERED', 'WHENEVER', 'MEN', 'SEE', 'ME', 'THEY', 'TRY', 'TO', 'SHOOT', 'ME', 'THEY', 'CLIMB', 'THE', 'ROCKS', 'AND', 'STEAL', 'AWAY', 'MY', 'LITTLE', 'ONES'] +2414-159411-0017-340: ref=['THEN', 'THE', 'TIGER', 'BEGAN', 'TO', 'ROAR', 'AND', 'SAID', 'THE', 'JUDGMENT', 'OF', 'ALL', 'IS', 'AGAINST', 'YOU', 'O', 'BRAHMAN'] +2414-159411-0017-340: hyp=['THEN', 'THE', 'TIGER', 'BEGAN', 'TO', 'ROAR', 'AND', 'SAID', 'JUDGMENT', 'OF', 'ALL', 'IS', 'AGAINST', 'YOU', 'O', 'BRAHMAN'] +2414-159411-0018-341: ref=['AFTER', 'THIS', 'THEY', 'SAW', 'AN', 'ALLIGATOR', 'AND', 'THE', 'BRAHMAN', 'RELATED', 'THE', 'MATTER', 'TO', 'HIM', 'HOPING', 'FOR', 'A', 'MORE', 'FAVORABLE', 'VERDICT'] +2414-159411-0018-341: hyp=['AFTER', 'THIS', 'THEY', 'SAW', 'AN', 'ALLIGATOR', 'AND', 'THE', 'BRAHMAN', 'RELATED', 'THE', 'MATTER', 'TO', 'HIM', 'HOPING', 'FOR', 'A', 'MORE', 'FAVOURABLE', 'VERDICT'] +2414-159411-0019-342: ref=['BUT', 'THE', 'ALLIGATOR', 'SAID', 'WHENEVER', 'I', 'PUT', 'MY', 'NOSE', 'OUT', 'OF', 'THE', 'WATER', 'MEN', 'TORMENT', 'ME', 'AND', 'TRY', 'TO', 'KILL', 'ME'] +2414-159411-0019-342: hyp=['BUT', 'THE', 'ALLIGATOR', 'SAID', 'WHENEVER', 'I', 'PUT', 'MY', 'NOSE', 'OUT', 'OF', 'THE', 'WATER', 'MEN', 'TORMENT', 'ME', 'AND', 'TRY', 'TO', 'KILL', 'ME'] +2414-159411-0020-343: ref=['THE', 'BRAHMAN', 'GAVE', 'HIMSELF', 'UP', 'AS', 'LOST', 'BUT', 'AGAIN', 'HE', 'PRAYED', 'THE', 'TIGER', 'TO', 'HAVE', 'PATIENCE', 'AND', 'LET', 'HIM', 'ASK', 'THE', 'OPINION', 'OF', 'THE', 'SIXTH', 'JUDGE'] +2414-159411-0020-343: hyp=['THE', 'BRAHMAN', 'GAVE', 'HIMSELF', 'UP', 'AS', 'LOST', 'BUT', 'AGAIN', 'HE', 'PRAYED', 'THE', 'TIGER', 'TO', 'HAVE', 'PATIENCE', 'AND', 'LET', 'HIM', 'ASK', 'THE', 'OPINION', 'OF', 'THE', 'SIXTH', 'JUDGE'] +2414-159411-0021-344: ref=['NOW', 'THE', 'SIXTH', 'WAS', 'A', 'JACKAL'] +2414-159411-0021-344: hyp=['ON', 'THE', 'SIXTH', 'WAS', 'A', 'JACKAL'] +2414-159411-0022-345: ref=['THE', 'BRAHMAN', 'TOLD', 'HIS', 'STORY', 'AND', 'SAID', 'TO', 'HIM', 'UNCLE', 'JACKAL', 'UNCLE', 'JACKAL', 'SAY', 'WHAT', 'IS', 'YOUR', 'JUDGMENT'] +2414-159411-0022-345: hyp=['THE', 'BRAHMAN', 'TOLD', 'HIS', 'STORY', 'AND', 'SAID', 'TO', 'HIM', 'UNCLE', 'JACKAL', 'UNCLE', 'JACKAL', 'SAY', 'WHAT', 'IS', 'YOUR', 'JUDGMENT'] +2414-159411-0023-346: ref=['SHOW', 'ME', 'THE', 'PLACE'] +2414-159411-0023-346: hyp=['SHOW', 'ME', 'THE', 'PACE'] +2414-159411-0024-347: ref=['WHEN', 'THEY', 'GOT', 'THERE', 'THE', 'JACKAL', 'SAID', 'NOW', 'BRAHMAN', 'SHOW', 'ME', 'EXACTLY', 'WHERE', 'YOU', 'STOOD'] +2414-159411-0024-347: hyp=['WHEN', 'THEY', 'GOT', 'THERE', 'THE', 'JACKAL', 'SAID', 'NOW', 'BROWMAN', 'SHOW', 'ME', 'EXACTLY', 'WHERE', 'YOU', 'STOOD'] +2414-159411-0025-348: ref=['EXACTLY', 'THERE', 'WAS', 'IT', 'ASKED', 'THE', 'JACKAL'] +2414-159411-0025-348: hyp=['EXACTLY', 'THERE', 'WAS', 'IT', 'ASKED', 'THE', 'JACKAL'] +2414-159411-0026-349: ref=['EXACTLY', 'HERE', 'REPLIED', 'THE', 'BRAHMAN'] +2414-159411-0026-349: hyp=['EXACTLY', 'HERE', 'REPLIED', 'THE', 'BRAHMAN'] +2414-159411-0027-350: ref=['WHERE', 'WAS', 'THE', 'TIGER', 'THEN'] +2414-159411-0027-350: hyp=['THERE', 'WAS', 'THE', 'CHILD', 'THEN'] +2414-159411-0028-351: ref=['WHY', 'I', 'STOOD', 'SO', 'SAID', 'THE', 'TIGER', 'JUMPING', 'INTO', 'THE', 'CAGE', 'AND', 'MY', 'HEAD', 'WAS', 'ON', 'THIS', 'SIDE'] +2414-159411-0028-351: hyp=['WHY', 'I', 'STOOD', 'SO', 'SAID', 'THE', 'DRIVER', 'JUMPING', 'INTO', 'THE', 'CAGE', 'AND', 'MY', 'HEAD', 'WAS', 'ON', 'THIS', 'SIDE'] +2414-159411-0029-352: ref=['VERY', 'GOOD', 'SAID', 'THE', 'JACKAL', 'BUT', 'I', 'CANNOT', 'JUDGE', 'WITHOUT', 'UNDERSTANDING', 'THE', 'WHOLE', 'MATTER', 'EXACTLY'] +2414-159411-0029-352: hyp=['VERY', 'GOOD', 'SAID', 'THE', 'JACKAL', 'BUT', 'I', 'CANNOT', 'JUDGE', 'WITHOUT', 'UNDERSTANDING', 'THE', 'WHOLE', 'MATTER', 'EXACTLY'] +2414-159411-0030-353: ref=['SHUT', 'AND', 'BOLTED', 'SAID', 'THE', 'BRAHMAN'] +2414-159411-0030-353: hyp=['SHUT', 'AND', 'BOLTED', 'SAID', 'THE', 'BRAHMAN'] +2414-159411-0031-354: ref=['THEN', 'SHUT', 'AND', 'BOLT', 'IT', 'SAID', 'THE', 'JACKAL'] +2414-159411-0031-354: hyp=['THEN', 'SHUT', 'AND', 'BOLT', 'IT', 'SAID', 'THE', 'JACKAL'] +2414-159411-0032-355: ref=['WHEN', 'THE', 'BRAHMAN', 'HAD', 'DONE', 'THIS', 'THE', 'JACKAL', 'SAID', 'OH', 'YOU', 'WICKED', 'AND', 'UNGRATEFUL', 'TIGER'] +2414-159411-0032-355: hyp=['WHEN', 'THE', 'BRAHMAN', 'HAD', 'DONE', 'THIS', 'THE', 'JACKAL', 'SAID', 'OH', 'YOU', 'WICKED', 'AND', 'UNGRATEFUL', 'TIGER'] +2414-159411-0033-356: ref=['WHEN', 'THE', 'GOOD', 'BRAHMAN', 'OPENED', 'YOUR', 'CAGE', 'DOOR', 'IS', 'TO', 'EAT', 'HIM', 'THE', 'ONLY', 'RETURN', 'YOU', 'WOULD', 'MAKE'] +2414-159411-0033-356: hyp=['WHEN', 'A', 'GOOD', 'BRAHMAN', 'OPENED', 'YOUR', 'CASE', 'DOOR', 'IS', 'TO', 'EAT', 'HIM', 'THE', 'ONLY', 'RETURN', 'YOU', 'WILL', 'MAKE'] +2414-159411-0034-357: ref=['PROCEED', 'ON', 'YOUR', 'JOURNEY', 'FRIEND', 'BRAHMAN'] +2414-159411-0034-357: hyp=['PROCEED', 'ON', 'YOUR', 'JOURNEY', 'FRIEND', 'RAMAN'] +2414-159411-0035-358: ref=['YOUR', 'ROAD', 'LIES', 'THAT', 'WAY', 'AND', 'MINE', 'THIS'] +2414-159411-0035-358: hyp=['YOUR', 'ROAD', 'LIES', 'THAT', 'WAY', 'AND', 'MIND', 'THIS'] +2414-165385-0000-359: ref=['THUS', 'ACCOMPLISHED', 'HE', 'EXCITED', 'THE', 'ADMIRATION', 'OF', 'EVERY', 'SILLY', 'COQUETTE', 'AND', 'THE', 'ENVY', 'OF', 'EVERY', 'FLUTTERING', 'COXCOMB', 'BUT', 'BY', 'ALL', 'YOUNG', 'GENTLEMEN', 'AND', 'LADIES', 'OF', 'UNDERSTANDING', 'HE', 'WAS', 'HEARTILY', 'DESPISED', 'AS', 'A', 'MERE', 'CIVILIZED', 'MONKEY'] +2414-165385-0000-359: hyp=['AND', 'THUS', 'ACCOMPLISHED', 'HE', 'EXCITED', 'THE', 'ADMIRATION', 'OF', 'EVERY', 'SILLY', 'COCKET', 'AND', 'THE', 'ENVY', 'OF', 'EVERY', 'FLUTTERING', 'PROCOMB', 'BUT', 'BY', 'ALL', 'YOUNG', 'GENTLEMEN', 'AND', 'LADIES', 'OF', 'UNDERSTANDING', 'HE', 'WAS', 'HEARTILY', 'DESPISED', 'AS', 'A', 'MERE', 'CIVILIZED', 'MONKEY'] +2414-165385-0001-360: ref=['THAT', 'HIS', 'SOUL', 'MIGHT', 'AFTERWARDS', 'OCCUPY', 'SUCH', 'A', 'STATION', 'AS', 'WOULD', 'BE', 'MOST', 'SUITABLE', 'TO', 'HIS', 'CHARACTER', 'IT', 'WAS', 'SENTENCED', 'TO', 'INHABIT', 'THE', 'BODY', 'OF', 'THAT', 'FINICAL', 'GRINNING', 'AND', 'MISCHIEVOUS', 'LITTLE', 'MIMICK', 'WITH', 'FOUR', 'LEGS', 'WHICH', 'YOU', 'NOW', 'BEHOLD', 'BEFORE', 'YOU'] +2414-165385-0001-360: hyp=['AND', 'THAT', 'HIS', 'SOUL', 'MIGHT', 'AFTERWARDS', 'OCCUPY', 'SUCH', 'A', 'STATION', 'AS', 'WOULD', 'BE', 'MOST', 'SUITABLE', 'TO', 'HIS', 'CHARACTER', 'IT', 'WAS', 'INTENSE', 'TO', 'INHABIT', 'THE', 'BODY', 'OF', 'THAT', 'FINNICAL', 'GRINNING', 'AND', 'MISCHIEVOUS', 'LITTLE', 'MIMIC', 'WITH', 'FOUR', 'LEGS', 'WHICH', 'YOU', 'NOW', 'BEHOLD', 'BEFORE', 'YOU'] +2609-156975-0000-361: ref=['THEN', 'MOSES', 'WAS', 'AFRAID', 'AND', 'SAID', 'SURELY', 'THE', 'THING', 'IS', 'KNOWN'] +2609-156975-0000-361: hyp=['THE', 'MOSES', 'WAS', 'AFRAID', 'AND', 'SAID', 'SURELY', 'THE', 'THING', 'IS', 'KNOWN'] +2609-156975-0001-362: ref=['HOLD', 'ON', 'HOLD', 'FAST', 'HOLD', 'OUT', 'PATIENCE', 'IS', 'GENIUS'] +2609-156975-0001-362: hyp=['O', 'ON', 'HOFAX', 'HODOBT', 'PATESENCES', 'HIS', 'GENIUS'] +2609-156975-0002-363: ref=['LET', 'US', 'HAVE', 'FAITH', 'THAT', 'RIGHT', 'MAKES', 'MIGHT', 'AND', 'IN', 'THAT', 'FAITH', 'LET', 'US', 'DARE', 'TO', 'DO', 'OUR', 'DUTY', 'AS', 'WE', 'UNDERSTAND', 'IT', 'LINCOLN'] +2609-156975-0002-363: hyp=['LET', 'US', 'HAVE', 'FAITH', 'THAT', 'RIGHT', 'MATRON', 'MIGHT', 'AND', 'IN', 'THAT', 'FAITH', 'LET', 'US', 'DARE', 'TO', 'DO', 'OUR', 'DUTY', 'AS', 'WE', 'UNDERSTAND', 'IT', 'LINTON'] +2609-156975-0003-364: ref=['THE', 'EGYPTIAN', 'BACKGROUND', 'OF', 'THE', 'BONDAGE'] +2609-156975-0003-364: hyp=['THE', 'EGYPTIAN', 'BACKGROUND', 'OF', 'THE', 'BONDAGE'] +2609-156975-0004-365: ref=['EVERY', 'ONE', 'WHO', 'IS', 'TURBULENT', 'HAS', 'BEEN', 'FOUND', 'BY', 'KING', 'MERNEPTAH', 'THE', 'TESTIMONY', 'OF', 'THE', 'OLDEST', 'BIBLICAL', 'NARRATIVES', 'REGARDING', 'THE', 'SOJOURN', 'OF', 'THE', 'HEBREWS', 'IN', 'EGYPT', 'IS', 'ALSO', 'IN', 'PERFECT', 'ACCORD', 'WITH', 'THE', 'PICTURE', 'WHICH', 'THE', 'CONTEMPORARY', 'EGYPTIAN', 'INSCRIPTIONS', 'GIVE', 'OF', 'THE', 'PERIOD'] +2609-156975-0004-365: hyp=['EVERY', 'ONE', 'WHOSE', 'TURBRIMENT', 'HAS', 'BEEN', 'FOUND', 'BY', 'GHEIMURNETH', 'PATH', 'THE', 'TESTIMONY', 'OF', 'THE', 'OLDEST', 'BIBLICAL', 'NARRATIVES', 'REGARDING', 'THE', 'SOJOURN', 'OF', 'THE', 'HEBREWS', 'IN', 'EGYPT', 'IS', 'ALSO', 'IN', 'PERFECT', 'ACCORD', 'WITH', 'THE', 'PICTURE', 'WHICH', 'THE', 'CONTEMPORARY', 'EGYPTIAN', 'INSCRIPTIONS', 'GIVE', 'OF', 'THE', 'PERIOD'] +2609-156975-0005-366: ref=['THE', 'ABSENCE', 'OF', 'DETAILED', 'REFERENCE', 'TO', 'THE', 'HEBREWS', 'IS', 'THEREFORE', 'PERFECTLY', 'NATURAL'] +2609-156975-0005-366: hyp=['THE', 'ABSENCE', 'OF', 'DETAILED', 'REFERENCE', 'TO', 'THE', 'HEBREWS', 'IS', 'THEREFORE', 'PERFECTLY', 'NATURAL'] +2609-156975-0006-367: ref=['IT', 'SEEMS', 'PROBABLE', 'THAT', 'NOT', 'ALL', 'BUT', 'ONLY', 'PART', 'OF', 'THE', 'TRIBES', 'WHICH', 'ULTIMATELY', 'COALESCED', 'INTO', 'THE', 'HEBREW', 'NATION', 'FOUND', 'THEIR', 'WAY', 'TO', 'EGYPT'] +2609-156975-0006-367: hyp=['IT', 'SEEMS', 'PROBABLE', 'THAT', 'NOT', 'ALL', 'BUT', 'ONLY', 'PART', 'OF', 'THE', 'TRIBES', 'WHICH', 'OUGHT', 'TO', 'MAKE', 'COROTS', 'INTO', 'THE', 'HEBREW', 'NATION', 'FOUND', 'THEIR', 'WAY', 'TO', 'EGYPT'] +2609-156975-0007-368: ref=['THE', 'STORIES', 'REGARDING', 'JOSEPH', 'THE', 'TRADITIONAL', 'FATHER', 'OF', 'EPHRAIM', 'AND', 'MANASSEH', 'IMPLY', 'THAT', 'THESE', 'STRONG', 'CENTRAL', 'TRIBES', 'POSSIBLY', 'TOGETHER', 'WITH', 'THE', 'SOUTHERN', 'TRIBES', 'OF', 'BENJAMIN', 'AND', 'JUDAH', 'WERE', 'THE', 'CHIEF', 'ACTORS', 'IN', 'THIS', 'OPENING', 'SCENE', 'IN', "ISRAEL'S", 'HISTORY'] +2609-156975-0007-368: hyp=['THE', 'STORIES', 'REGARDING', 'JOSEPH', 'THEIR', 'TRADITIONAL', 'FATHER', 'EPH', 'FROM', 'AND', 'MANASSEH', 'IMPLY', 'THAT', 'THESE', 'STRONG', 'CENTAL', 'TRIBES', 'POSSIBLY', 'TOGETHER', 'WITH', 'THE', 'SOUTHERN', 'TRIBES', 'OF', 'BENJAMIN', 'AND', 'JUDAH', 'WERE', 'THE', 'CHIEF', 'FACTORS', 'OF', 'THIS', 'OPENING', 'SCENE', 'IN', "ISRAEL'S", 'HISTORY'] +2609-156975-0008-369: ref=['THE', 'BIBLICAL', 'NARRATIVES', 'APPARENTLY', 'DISAGREE', 'REGARDING', 'THE', 'DURATION', 'OF', 'THE', 'SOJOURN', 'IN', 'EGYPT'] +2609-156975-0008-369: hyp=['THE', 'BIBLICAL', 'NARRATIVES', 'APPARENTLY', 'DISAGREE', 'REGARDING', 'THE', 'DURATION', 'OF', 'THE', 'SOJOURN', 'IN', 'EGYPT'] +2609-156975-0009-370: ref=['THE', 'LATER', 'TRADITIONS', 'TEND', 'TO', 'EXTEND', 'THE', 'PERIOD'] +2609-156975-0009-370: hyp=['THE', 'LATER', 'TRADITIONS', 'TEND', 'TO', 'EXTEND', 'THE', 'PERIOD'] +2609-156975-0010-371: ref=['HERE', 'WERE', 'FOUND', 'SEVERAL', 'INSCRIPTIONS', 'BEARING', 'THE', 'EGYPTIAN', 'NAME', 'OF', 'THE', 'CITY', 'P', 'ATUM', 'HOUSE', 'OF', 'THE', 'GOD', 'ATUM'] +2609-156975-0010-371: hyp=['HERE', 'WERE', 'FOUND', 'SEVERAL', 'INSCRIPTIONS', 'BEARING', 'THE', 'EGYPTIAN', 'NAME', 'OF', 'THE', 'CITY', 'PATUM', 'HOUSE', 'OF', 'THE', 'GOD', 'ATOM'] +2609-156975-0011-372: ref=['A', 'CONTEMPORARY', 'INSCRIPTION', 'ALSO', 'STATES', 'THAT', 'HE', 'FOUNDED', 'NEAR', 'PITHUM', 'THE', 'HOUSE', 'OF', 'RAMSES', 'A', 'CITY', 'WITH', 'A', 'ROYAL', 'RESIDENCE', 'AND', 'TEMPLES'] +2609-156975-0011-372: hyp=['A', 'CONTEMPORARY', 'INSCRIPTS', 'AND', 'ALSO', 'STATES', 'THAT', 'HE', 'FOUNDED', 'NEAR', 'PITTHAM', 'THE', 'HOUSE', 'OF', 'RAMESES', 'A', 'CITY', 'WITH', 'A', 'ROYAL', 'RESIDENCE', 'AND', 'TEMPLES'] +2609-156975-0012-373: ref=['THAT', 'THE', 'HEBREWS', 'WERE', 'RESTIVE', 'UNDER', 'THIS', 'TYRANNY', 'WAS', 'NATURAL', 'INEVITABLE'] +2609-156975-0012-373: hyp=['THAT', 'THE', 'HEBREWS', 'WERE', 'RESTIVE', 'UNDER', 'THIS', 'CEREMONY', 'WAS', 'NATURALLY', 'INEVITABLE'] +2609-156975-0013-374: ref=['WAS', 'ANY', 'OTHER', 'PROCEDURE', 'TO', 'BE', 'EXPECTED', 'FROM', 'A', 'DESPOTIC', 'RULER', 'OF', 'THAT', 'LAND', 'AND', 'DAY'] +2609-156975-0013-374: hyp=['WAS', 'ANY', 'OTHER', 'PROCEDURE', 'TO', 'BE', 'EXPECTED', 'FROM', 'IT', 'THE', 'DESPOTIC', 'ROAR', 'OF', 'THAT', 'LAND', 'AND', 'DAY'] +2609-156975-0014-375: ref=['THE', 'MAKING', 'OF', 'A', 'LOYAL', 'PATRIOT'] +2609-156975-0014-375: hyp=['THE', 'MAKING', 'OF', 'A', 'LOYAL', 'PATRIOT'] +2609-156975-0015-376: ref=['THE', 'STORY', 'OF', 'MOSES', 'BIRTH', 'AND', 'EARLY', 'CHILDHOOD', 'IS', 'ONE', 'OF', 'THE', 'MOST', 'INTERESTING', 'CHAPTERS', 'IN', 'BIBLICAL', 'HISTORY'] +2609-156975-0015-376: hyp=['THE', 'STORY', 'OF', "MOSES'S", 'BIRTH', 'IN', 'EARLY', 'CHILDHOOD', 'IS', 'ONE', 'OF', 'THE', 'MOST', 'INTERESTING', 'CHAPTERS', 'IN', 'BIBLICAL', 'HISTORY'] +2609-156975-0016-377: ref=['WAS', 'MOSES', 'JUSTIFIED', 'IN', 'RESISTING', 'THE', 'EGYPTIAN', 'TASKMASTER'] +2609-156975-0016-377: hyp=['WHICH', 'MOVES', 'IT', 'JUSTFIED', 'AND', 'RESISTING', 'THE', 'EGIPSIAN', 'TAX', 'MASTER'] +2609-156975-0017-378: ref=['IS', 'PEONAGE', 'ALWAYS', 'DISASTROUS', 'NOT', 'ONLY', 'TO', 'ITS', 'VICTIMS', 'BUT', 'ALSO', 'TO', 'THE', 'GOVERNMENT', 'IMPOSING', 'IT'] +2609-156975-0017-378: hyp=['IS', 'OPINION', 'IS', 'ALWAYS', 'DISASTROUS', 'NOT', 'ONLY', 'TO', 'ITS', 'VICTIMS', 'BUT', 'ALSO', 'TO', 'THE', 'GOVERNMENT', 'IMPOSING', 'IT'] +2609-156975-0018-379: ref=['NATURALLY', 'HE', 'WENT', 'TO', 'THE', 'LAND', 'OF', 'MIDIAN'] +2609-156975-0018-379: hyp=['NATURALLY', 'HE', 'WENT', 'TO', 'THE', 'LAND', 'OF', 'MEDIAN'] +2609-156975-0019-380: ref=['THE', 'WILDERNESS', 'TO', 'THE', 'EAST', 'OF', 'EGYPT', 'HAD', 'FOR', 'CENTURIES', 'BEEN', 'THE', 'PLACE', 'OF', 'REFUGE', 'FOR', 'EGYPTIAN', 'FUGITIVES'] +2609-156975-0019-380: hyp=['THE', 'WILDERNESS', 'TO', 'THE', 'EAST', 'OF', 'EGYPT', 'HAD', 'FOR', 'CENTURIES', 'BEEN', 'THE', 'PLACE', 'OF', 'REFUGE', 'FOR', 'EGYPTIAN', 'FUGITIVES'] +2609-156975-0020-381: ref=['FROM', 'ABOUT', 'TWO', 'THOUSAND', 'B', 'C'] +2609-156975-0020-381: hyp=['FROM', 'ABOUT', 'TWO', 'THOUSAND', 'B', 'C'] +2609-156975-0021-382: ref=['ON', 'THE', 'BORDERS', 'OF', 'THE', 'WILDERNESS', 'HE', 'FOUND', 'CERTAIN', 'BEDOUIN', 'HERDSMEN', 'WHO', 'RECEIVED', 'HIM', 'HOSPITABLY'] +2609-156975-0021-382: hyp=['ON', 'THE', 'BORDERS', 'OF', 'THE', 'WILDERNESS', 'HE', 'FOUND', 'CERTAIN', 'BEDOUIN', 'HERDSMEN', 'WHO', 'RECEIVED', 'HIM', 'HOSPITABLY'] +2609-156975-0022-383: ref=['THESE', 'SAND', 'WANDERERS', 'SENT', 'HIM', 'ON', 'FROM', 'TRIBE', 'TO', 'TRIBE', 'UNTIL', 'HE', 'REACHED', 'THE', 'LAND', 'OF', 'KEDEM', 'EAST', 'OF', 'THE', 'DEAD', 'SEA', 'WHERE', 'HE', 'REMAINED', 'FOR', 'A', 'YEAR', 'AND', 'A', 'HALF'] +2609-156975-0022-383: hyp=['THESE', 'SAND', 'WANDERERS', 'SENT', 'HIM', 'ON', 'FROM', 'TIME', 'TO', 'TIME', 'UNTIL', 'HE', 'REACHED', 'THE', 'LAND', 'OF', 'KEIDAM', 'EAST', 'OF', 'THE', 'DEAD', 'SEA', 'WHERE', 'HE', 'REMAINED', 'FOR', 'A', 'YEAR', 'AND', 'A', 'HALF'] +2609-156975-0023-384: ref=['LATER', 'HE', 'FOUND', 'HIS', 'WAY', 'TO', 'THE', 'COURT', 'OF', 'ONE', 'OF', 'THE', 'LOCAL', 'KINGS', 'IN', 'CENTRAL', 'PALESTINE', 'WHERE', 'HE', 'MARRIED', 'AND', 'BECAME', 'IN', 'TIME', 'A', 'PROSPEROUS', 'LOCAL', 'PRINCE'] +2609-156975-0023-384: hyp=['LATER', 'HE', 'FOUND', 'HIS', 'WAY', 'TO', 'THE', 'COURT', 'OF', 'ONE', 'OF', 'THE', 'LOCAL', 'KINGS', 'IN', 'CENTRAL', 'PALESTINE', 'WHERE', 'HE', 'MARRIED', 'AND', 'BECAME', 'IN', 'THE', 'TIME', 'A', 'PROSPEROUS', 'LOCAL', 'PRINCE'] +2609-156975-0024-385: ref=['THE', 'SCHOOL', 'OF', 'THE', 'WILDERNESS'] +2609-156975-0024-385: hyp=['THE', 'SCORE', 'OF', 'THE', 'WILDERNESS'] +2609-156975-0025-386: ref=['THE', 'STORY', 'OF', 'MOSES', 'IS', 'IN', 'MANY', 'WAYS', 'CLOSELY', 'PARALLEL', 'TO', 'THAT', 'OF', 'SINUHIT'] +2609-156975-0025-386: hyp=['THE', 'STORY', 'OF', 'MOSES', 'IS', 'IN', 'MANY', 'WAYS', 'CLOSELY', 'PARALLEL', 'TO', 'THAT', 'OF', 'SINEWET'] +2609-156975-0026-387: ref=['THE', 'PRIEST', 'OF', 'THE', 'SUB', 'TRIBE', 'OF', 'THE', 'KENITES', 'RECEIVED', 'HIM', 'INTO', 'HIS', 'HOME', 'AND', 'GAVE', 'HIM', 'HIS', 'DAUGHTER', 'IN', 'MARRIAGE'] +2609-156975-0026-387: hyp=['THE', 'PRIESTS', 'OF', 'THE', 'SUB', 'TRIBE', 'OF', 'THE', 'CANAITES', 'RECEIVED', 'HIM', 'INTO', 'HIS', 'HOME', 'AND', 'GAVE', 'HIM', 'HIS', 'DAUGHTER', 'IN', 'MARRIAGE'] +2609-156975-0027-388: ref=['NOTE', 'THE', 'CHARACTERISTIC', 'ORIENTAL', 'IDEA', 'OF', 'MARRIAGE'] +2609-156975-0027-388: hyp=['NOTE', 'THE', 'CHAAVERALISTIC', 'ORIENT', 'RE', 'OF', 'MARYS'] +2609-156975-0028-389: ref=['HERE', 'MOSES', 'LEARNED', 'THE', 'LESSONS', 'THAT', 'WERE', 'ESSENTIAL', 'FOR', 'HIS', 'TRAINING', 'AS', 'THE', 'LEADER', 'AND', 'DELIVERER', 'OF', 'HIS', 'PEOPLE'] +2609-156975-0028-389: hyp=['HERE', 'MOSES', 'LEARNED', 'THE', 'LESSONS', 'THAT', 'WERE', 'ESSENTIAL', 'FOR', 'HIS', 'TRAINING', 'AS', 'THE', 'LEADER', 'AND', 'DELIVERER', 'OF', 'HIS', 'PEOPLE'] +2609-156975-0029-390: ref=['AFTER', 'THE', 'CAPTURE', 'OF', 'JERICHO', 'CERTAIN', 'OF', 'THEM', 'WENT', 'UP', 'WITH', 'THE', 'SOUTHERN', 'TRIBES', 'TO', 'CONQUER', 'SOUTHERN', 'PALESTINE'] +2609-156975-0029-390: hyp=['AFTER', 'THE', 'CAPTURE', 'OF', 'JERICHO', 'CERTAIN', 'OF', 'THEM', 'WENT', 'UP', 'WITH', 'THE', 'SOUTHERN', 'TRIBES', 'TO', 'CONQUER', 'SOUTHERN', 'PALESTINE'] +2609-156975-0030-391: ref=['MANY', 'MODERN', 'SCHOLARS', 'DRAW', 'THE', 'CONCLUSION', 'FROM', 'THE', 'BIBLICAL', 'NARRATIVE', 'THAT', 'IT', 'WAS', 'FROM', 'THE', 'KENITES', 'THAT', 'MOSES', 'FIRST', 'LEARNED', 'OF', 'YAHWEH', 'OR', 'AS', 'THE', 'DISTINCTIVE', 'NAME', 'OF', "ISRAEL'S", 'GOD', 'WAS', 'TRANSLATED', 'BY', 'LATER', 'JEWISH', 'SCRIBES', 'JEHOVAH'] +2609-156975-0030-391: hyp=['MANY', 'MODERN', 'SCHOLARS', 'DRAW', 'THE', 'CONCLUSION', 'FROM', 'THE', 'BIBLICAL', 'NARRATIVE', 'THAT', 'IT', 'WAS', 'FROM', 'THE', 'CANAITES', 'THAT', 'MOSES', 'FIRST', 'LEARNED', 'OF', 'YAHWAY', 'ORAS', 'THE', 'DISTINCTIVE', 'NAME', 'OF', "ISRAEL'S", 'GOD', 'WAS', 'TRANSLATED', 'BY', 'LATER', 'JEWISH', 'SCRIBES', 'JEHOVAH'] +2609-156975-0031-392: ref=['DO', 'THE', 'EARLIEST', 'HEBREW', 'TRADITIONS', 'IMPLY', 'THAT', 'THE', 'ANCESTORS', 'OF', 'THE', 'ISRAELITES', 'WERE', 'WORSHIPPERS', 'OF', 'JEHOVAH'] +2609-156975-0031-392: hyp=['DO', 'THE', 'EARLIEST', 'HEBREW', 'TRADITIONS', 'IMPLY', 'THAT', 'THE', 'ANCESTORS', 'OF', 'THE', 'ISRAELITES', 'WERE', 'WORSHIPPERS', 'OF', 'JEHOVAH'] +2609-156975-0032-393: ref=['THE', 'TITLE', 'OF', 'HIS', 'FATHER', 'IN', 'LAW', 'IMPLIES', 'THAT', 'THIS', 'PRIEST', 'MINISTERED', 'AT', 'SOME', 'WILDERNESS', 'SANCTUARY'] +2609-156975-0032-393: hyp=['THE', 'TITLE', 'OF', 'HIS', 'FATHER', 'IN', 'LAW', 'IMPLIES', 'THAT', 'THIS', 'PREACH', 'MINISTERED', 'AT', 'SOME', 'MOTHER', 'SANCTUARY'] +2609-156975-0033-394: ref=['MOSES', 'IN', 'THE', 'HOME', 'OF', 'THE', 'MIDIAN', 'PRIEST', 'WAS', 'BROUGHT', 'INTO', 'DIRECT', 'AND', 'CONSTANT', 'CONTACT', 'WITH', 'THE', 'JEHOVAH', 'WORSHIP'] +2609-156975-0033-394: hyp=['MOSES', 'IN', 'THE', 'HOME', 'OF', 'THE', 'MIDIAN', 'PRIESTS', 'WAS', 'BROUGHT', 'INTO', 'DIRECT', 'AND', 'CONSTANT', 'CONTACT', 'WITH', 'THE', 'JEHOVAH', 'WORSHIP'] +2609-156975-0034-395: ref=['THE', 'CRUEL', 'FATE', 'OF', 'HIS', 'PEOPLE', 'AND', 'THE', 'PAINFUL', 'EXPERIENCE', 'IN', 'EGYPT', 'THAT', 'HAD', 'DRIVEN', 'HIM', 'INTO', 'THE', 'WILDERNESS', 'PREPARED', 'HIS', 'MIND', 'TO', 'RECEIVE', 'THIS', 'TRAINING'] +2609-156975-0034-395: hyp=['THE', 'CRUEL', 'FATE', 'OF', 'HIS', 'PEOPLE', 'AND', 'THE', 'PAINFUL', 'EXPERIENCE', 'IN', 'EGYPT', 'THAT', 'HAD', 'DRIVEN', 'HIM', 'INTO', 'THE', 'WILDERNESS', 'PREPARED', 'HIS', 'MIND', 'TO', 'RECEIVE', 'THIS', 'TRAINING'] +2609-156975-0035-396: ref=['HIS', 'QUEST', 'WAS', 'FOR', 'A', 'JUST', 'AND', 'STRONG', 'GOD', 'ABLE', 'TO', 'DELIVER', 'THE', 'OPPRESSED'] +2609-156975-0035-396: hyp=['HIS', 'PLACE', 'WAS', 'FULL', 'OF', 'JUTS', 'AND', 'STRONG', 'GUNS', 'ABLE', 'TO', 'DELIVER', 'WITH', 'THE', 'OPPRESSED'] +2609-156975-0036-397: ref=['THE', 'WILDERNESS', 'WITH', 'ITS', 'LURKING', 'FOES', 'AND', 'THE', 'EVER', 'PRESENT', 'DREAD', 'OF', 'HUNGER', 'AND', 'THIRST', 'DEEPENED', 'HIS', 'SENSE', 'OF', 'NEED', 'AND', 'OF', 'DEPENDENCE', 'UPON', 'A', 'POWER', 'ABLE', 'TO', 'GUIDE', 'THE', 'DESTINIES', 'OF', 'MEN'] +2609-156975-0036-397: hyp=['THE', 'WILDERNESS', 'WITH', 'ITS', 'LURKING', 'FOES', 'AND', 'THE', 'EVER', 'PRESENT', 'DREAD', 'OF', 'HUNGER', 'AND', 'THIRST', 'DEEPENS', 'A', 'SENSE', 'OF', 'NEED', 'AND', 'OF', 'DEPENDENCE', 'UPON', 'A', 'POWER', 'ABLE', 'TO', 'GUIDE', 'THE', 'DEST', 'NEEDS', 'OF', 'MEN'] +2609-156975-0037-398: ref=['THE', 'PEASANTS', 'OF', 'THE', 'VAST', 'ANTOLIAN', 'PLAIN', 'IN', 'CENTRAL', 'ASIA', 'MINOR', 'STILL', 'CALL', 'EVERY', 'LIFE', 'GIVING', 'SPRING', 'GOD', 'HATH', 'GIVEN'] +2609-156975-0037-398: hyp=['THE', 'PEASANTS', 'OF', 'THE', 'VATS', 'ANTONIAN', 'PLAIN', 'OF', 'CENTRAL', 'ASIA', 'MINOR', 'STILL', 'WILL', 'CALL', 'EVERY', 'LIFE', 'GIING', 'SPRING', 'GOD', 'HATH', 'GIVEN'] +2609-156975-0038-399: ref=['THE', 'CONSTANT', 'NECESSITY', 'OF', 'MEETING', 'THE', 'DANGERS', 'OF', 'THE', 'WILDERNESS', 'AND', 'OF', 'DEFENDING', 'THE', 'FLOCKS', 'ENTRUSTED', 'TO', 'MOSES', 'CARE', 'DEVELOPED', 'HIS', 'COURAGE', 'AND', 'POWER', 'OF', 'LEADERSHIP', 'AND', 'ACTION'] +2609-156975-0038-399: hyp=['THE', 'CONSTANT', 'NECESSITY', 'OF', 'MEETING', 'THE', 'DANGERS', 'OF', 'THE', 'WILDERNESS', 'AND', 'OF', 'DEFENDING', 'THE', 'FLOCKS', 'INTRUSTED', 'TO', 'MOSES', 'CARE', 'DEVELOPED', 'HIS', 'COURAGE', 'AND', 'POWER', 'OF', 'LEADERSHIP', 'AND', 'ACTION'] +2609-157645-0000-400: ref=['EVIDENTLY', 'THE', 'INTENTION', 'WAS', 'TO', 'MAKE', 'THINGS', 'PLEASANT', 'FOR', 'THE', 'ROYAL', 'FOE', 'OF', 'TOBACCO', 'DURING', 'HIS', 'VISIT'] +2609-157645-0000-400: hyp=['EVIDENTLY', 'THE', 'INTENTION', 'WAS', 'MADE', 'THINGS', 'PRESENT', 'FOR', 'THE', 'ROYAL', 'FOLK', 'OF', 'TOBACCO', 'DURING', 'HIS', 'VISIT'] +2609-157645-0001-401: ref=['THE', 'PROHIBITION', 'IN', 'THE', 'REGULATION', 'QUOTED', 'OF', 'SMOKING', 'IN', 'SAINT', "MARY'S", 'CHURCH', 'REFERRED', 'IT', 'MAY', 'BE', 'NOTED', 'TO', 'THE', 'ACT', 'WHICH', 'WAS', 'HELD', 'THEREIN'] +2609-157645-0001-401: hyp=['THE', 'PROHIBITION', 'IN', 'THE', 'REGULATION', 'QUOTE', 'OF', 'SMOKING', 'IN', 'SAINT', "MARY'S", 'CHURCH', 'REFERRED', 'IT', 'MAY', 'BE', 'NOTED', 'TO', 'THE', 'ACT', 'WHICH', 'WAS', 'HELD', 'THEREIN'] +2609-157645-0002-402: ref=['SOMETIMES', 'TOBACCO', 'WAS', 'USED', 'IN', 'CHURCH', 'FOR', 'DISINFECTING', 'OR', 'DEODORIZING', 'PURPOSES'] +2609-157645-0002-402: hyp=['SOMETIMES', 'TOBACCO', 'IS', 'USED', 'IN', 'CHURCH', 'FOR', 'DISINFECTING', 'OR', 'DEORTERIZING', 'PURPOSES'] +2609-157645-0003-403: ref=['BLACKBURN', 'ARCHBISHOP', 'OF', 'YORK', 'WAS', 'A', 'GREAT', 'SMOKER'] +2609-157645-0003-403: hyp=['BLACKBURN', 'ARCHBISHOP', 'OF', 'YORK', 'WAS', 'A', 'GREAT', 'SMOKER'] +2609-157645-0004-404: ref=['ON', 'ONE', 'OCCASION', 'HE', 'WAS', 'AT', 'SAINT', "MARY'S", 'CHURCH', 'NOTTINGHAM', 'FOR', 'A', 'CONFIRMATION'] +2609-157645-0004-404: hyp=['ON', 'ONE', 'OCCASION', 'HE', 'WAS', 'AT', 'SAINT', "MARY'S", 'CHURCH', 'NOTTINGHAM', 'FOR', 'A', 'CONFIRM', 'MASON'] +2609-157645-0005-405: ref=['ANOTHER', 'EIGHTEENTH', 'CENTURY', 'CLERICAL', 'WORTHY', 'THE', 'FAMOUS', 'DOCTOR', 'PARR', 'AN', 'INVETERATE', 'SMOKER', 'WAS', 'ACCUSTOMED', 'TO', 'DO', 'WHAT', 'MISTER', 'DISNEY', 'PREVENTED', 'ARCHBISHOP', 'BLACKBURN', 'FROM', 'DOING', 'HE', 'SMOKED', 'IN', 'HIS', 'VESTRY', 'AT', 'HATTON'] +2609-157645-0005-405: hyp=['ANOTHER', 'EIGHTEENTH', 'CENTURY', 'CLERICAL', 'WORTHY', 'THE', 'FAMOUS', 'DOCTOR', 'PARR', 'AN', 'INVETERATE', 'SMOKER', 'WAS', 'ACCUSTOMED', 'TO', 'DO', 'WHAT', 'MITZTER', 'DNEY', 'PREVENTED', 'ARCHBISHOP', 'BLACKBURN', 'FROM', 'DOING', 'HE', 'SMOKED', 'IN', 'HIS', 'VETXRY', 'AT', 'HATTON'] +2609-157645-0006-406: ref=['PARR', 'WAS', 'SUCH', 'A', 'CONTINUAL', 'SMOKER', 'THAT', 'ANYONE', 'WHO', 'CAME', 'INTO', 'HIS', 'COMPANY', 'IF', 'HE', 'HAD', 'NEVER', 'SMOKED', 'BEFORE', 'HAD', 'TO', 'LEARN', 'THE', 'USE', 'OF', 'A', 'PIPE', 'AS', 'A', 'MEANS', 'OF', 'SELF', 'DEFENCE'] +2609-157645-0006-406: hyp=['PAR', 'WAS', 'SUCH', 'A', 'CONTINUOUS', 'SMOKER', 'THAT', 'ANY', 'ONE', 'WHO', 'CAME', 'INTO', 'HIS', 'COMPANY', 'IF', 'HE', 'HAD', 'NEVER', 'SMOKED', 'BEFORE', 'HAD', 'TO', 'LEARN', 'THE', 'USE', 'OF', 'A', 'PIPE', 'AS', 'A', 'MEANS', 'OF', 'SELF', 'DEFENCE'] +2609-157645-0007-407: ref=['ONE', 'SUNDAY', 'SAYS', 'MISTER', 'DITCHFIELD', 'HE', 'HAD', 'AN', 'EXTRA', 'PIPE', 'AND', 'JOSHUA', 'THE', 'CLERK', 'TOLD', 'HIM', 'THAT', 'THE', 'PEOPLE', 'WERE', 'GETTING', 'IMPATIENT'] +2609-157645-0007-407: hyp=['ONE', 'SUNDAY', 'SAYS', 'MISTER', 'DITCHFIELD', 'HE', 'HAD', 'ANPERAL', 'PIPE', 'AND', 'JONCEWA', 'THE', 'CLERK', 'TOLD', 'HIM', 'THAT', 'THE', 'PEOPLE', 'WERE', 'GETTING', 'IMPATIENT'] +2609-157645-0008-408: ref=['LET', 'THEM', 'SING', 'ANOTHER', 'PSALM', 'SAID', 'THE', 'CURATE'] +2609-157645-0008-408: hyp=['THEM', 'THEM', 'SINGING', 'AN', 'ANOTHER', 'OTHER', 'SONG', 'THE', 'CURATE'] +2609-157645-0009-409: ref=['THEY', 'HAVE', 'SIR', 'REPLIED', 'THE', 'CLERK'] +2609-157645-0009-409: hyp=['THEY', 'HAVE', 'SIR', 'REPLIED', 'THE', 'CLERK'] +2609-157645-0010-410: ref=['THEN', 'LET', 'THEM', 'SING', 'THE', 'HUNDRED', 'AND', 'NINETEENTH', 'REPLIED', 'THE', 'CURATE'] +2609-157645-0010-410: hyp=['THEN', 'LET', 'THEM', 'SING', 'THE', 'HUNDRED', 'AND', 'NINETEENTH', 'REPLIED', 'THE', 'CURATE'] +2609-157645-0011-411: ref=['SIX', 'ARMS', 'THE', 'NEAREST', 'WITHIN', 'REACH', 'PRESENTED', 'WITH', 'AN', 'OBEDIENT', 'START', 'AS', 'MANY', 'TOBACCO', 'POUCHES', 'TO', 'THE', 'MAN', 'OF', 'OFFICE'] +2609-157645-0011-411: hyp=['SIX', 'ARMS', 'THE', 'NEAREST', 'WITHIN', 'REACH', 'PRESENTED', 'WITH', 'AN', 'OBEDIENT', 'START', 'AS', 'MANY', 'TOBACCO', 'PIROUCHES', 'TO', 'THE', 'MAN', 'OF', 'OFFICE'] +2609-157645-0012-412: ref=['DAVID', 'DEANS', 'HOWEVER', 'DID', 'NOT', 'AT', 'ALL', 'APPROVE', 'THIS', 'IRREVERENCE'] +2609-157645-0012-412: hyp=['DAVID', 'DENES', 'HOWEVER', 'DID', 'NOT', 'AT', 'ALL', 'PROVE', 'THIS', 'IRREVERENCE'] +2609-157645-0013-413: ref=['GOING', 'TO', 'CHURCH', 'AT', 'HAYES', 'IN', 'THOSE', 'DAYS', 'MUST', 'HAVE', 'BEEN', 'QUITE', 'AN', 'EXCITING', 'EXPERIENCE'] +2609-157645-0013-413: hyp=['GOING', 'TO', 'CHURCH', 'AT', 'HAYES', 'IN', 'THOSE', 'DAYS', 'MUST', 'HAVE', 'BEEN', 'A', 'THE', 'SOUNDTING', 'SPIRITENCE'] +2609-157645-0014-414: ref=['WHEN', 'THESE', 'MEN', 'IN', 'THE', 'COURSE', 'OF', 'MY', 'REMONSTRANCE', 'FOUND', 'THAT', 'I', 'WAS', 'NOT', 'GOING', 'TO', 'CONTINUE', 'THE', 'CUSTOM', 'THEY', 'NO', 'LONGER', 'CARED', 'TO', 'BE', 'COMMUNICANTS'] +2609-157645-0014-414: hyp=['WHEN', 'THESE', 'MEN', 'IN', 'THE', 'COURSE', 'OF', 'MY', 'REMONSTRANCE', 'FOUND', 'OUT', 'THAT', 'I', 'WAS', 'NOT', 'GOING', 'TO', 'CONTINUE', 'THE', 'CUSTOM', 'THEY', 'NO', 'LONGER', 'CARED', 'TO', 'BE', 'COMMUNICANTS'] +2609-169640-0000-415: ref=['PROAS', 'IN', 'THAT', 'QUARTER', 'WERE', 'USUALLY', 'DISTRUSTED', 'BY', 'SHIPS', 'IT', 'IS', 'TRUE', 'BUT', 'THE', 'SEA', 'IS', 'FULL', 'OF', 'THEM', 'AND', 'FAR', 'MORE', 'ARE', 'INNOCENT', 'THAN', 'ARE', 'GUILTY', 'OF', 'ANY', 'ACTS', 'OF', 'VIOLENCE'] +2609-169640-0000-415: hyp=['PEROCKS', 'IN', 'THAT', 'QUARTER', 'WERE', 'USUALLY', 'DESTRUCTED', 'BY', 'THE', 'SHIPS', 'IT', 'IS', 'TRUE', 'BUT', 'THE', 'SEA', 'IS', 'FULL', 'OF', 'THEM', 'FAR', 'MORE', 'ARE', 'INNOCENT', 'THAN', 'ARE', 'GUILTY', 'OF', 'ANY', 'ACTS', 'OF', 'VIOLENCE'] +2609-169640-0001-416: ref=['AN', 'HOUR', 'AFTER', 'THE', 'SUN', 'HAD', 'SET', 'THE', 'WIND', 'FELL', 'TO', 'A', 'LIGHT', 'AIR', 'THAT', 'JUST', 'KEPT', 'STEERAGE', 'WAY', 'ON', 'THE', 'SHIP'] +2609-169640-0001-416: hyp=['AN', 'HOUR', 'AFTER', 'THE', 'SUN', 'HAD', 'SET', 'THE', 'WIND', 'FELL', 'TO', 'A', 'LIGHT', 'AIR', 'THE', 'HAGITTS', 'KEPT', 'STEERAGE', 'WAY', 'ON', 'THE', 'SHIP'] +2609-169640-0002-417: ref=['FORTUNATELY', 'THE', 'JOHN', 'WAS', 'NOT', 'ONLY', 'FAST', 'BUT', 'SHE', 'MINDED', 'HER', 'HELM', 'AS', 'A', 'LIGHT', 'FOOTED', 'GIRL', 'TURNS', 'IN', 'A', 'LIVELY', 'DANCE'] +2609-169640-0002-417: hyp=['FORTUNATELY', 'THE', 'JOHN', 'WAS', 'NOT', 'ONLY', 'FAT', 'BUT', 'SHE', 'MINDED', 'HER', 'HAM', 'AS', 'A', 'LIGHT', 'FOOTED', 'GIRL', 'TURNS', 'IN', 'A', 'LIVELY', 'DANCE'] +2609-169640-0003-418: ref=['I', 'NEVER', 'WAS', 'IN', 'A', 'BETTER', 'STEERING', 'SHIP', 'MOST', 'ESPECIALLY', 'IN', 'MODERATE', 'WEATHER'] +2609-169640-0003-418: hyp=['I', 'NEVER', 'WAS', 'IN', 'A', 'BETTER', 'STEERING', 'SHIP', 'MOSTLY', 'SPECTREE', 'IN', 'MODERATE', 'WEATHER'] +2609-169640-0004-419: ref=['MISTER', 'MARBLE', 'HE', 'I', 'DO', 'BELIEVE', 'WAS', 'FAIRLY', 'SNOOZING', 'ON', 'THE', 'HEN', 'COOPS', 'BEING', 'LIKE', 'THE', 'SAILS', 'AS', 'ONE', 'MIGHT', 'SAY', 'BARELY', 'ASLEEP'] +2609-169640-0004-419: hyp=['MISTER', 'MARRBLE', 'HE', 'OUGHED', 'TO', 'BELIEVE', 'WAS', 'FAIRLY', 'SNOOZING', 'OF', 'THE', 'HINGOOPS', 'BEING', 'LIKE', 'TO', 'THE', 'SAILS', 'AS', 'ONE', 'MIGHT', 'SAY', 'VERY', 'ASLEEP'] +2609-169640-0005-420: ref=['AT', 'THAT', 'MOMENT', 'I', 'HEARD', 'A', 'NOISE', 'ONE', 'FAMILIAR', 'TO', 'SEAMEN', 'THAT', 'OF', 'AN', 'OAR', 'FALLING', 'IN', 'A', 'BOAT'] +2609-169640-0005-420: hyp=['AT', 'THAT', 'MOMENT', 'I', 'HEARD', 'A', 'NOISE', 'WHEN', 'FAMILIAR', 'TO', 'SEAMEN', 'THAT', 'OF', 'AN', 'OAR', 'FALLING', 'IN', 'THE', 'BOAT'] +2609-169640-0006-421: ref=['I', 'SANG', 'OUT', 'SAIL', 'HO', 'AND', 'CLOSE', 'ABOARD'] +2609-169640-0006-421: hyp=['I', 'SING', 'OUT', 'SAIL', 'HO', 'AND', 'CLOSE', 'ABROAD'] +2609-169640-0007-422: ref=['HE', 'WAS', 'TOO', 'MUCH', 'OF', 'A', 'SEAMAN', 'TO', 'REQUIRE', 'A', 'SECOND', 'LOOK', 'IN', 'ORDER', 'TO', 'ASCERTAIN', 'WHAT', 'WAS', 'TO', 'BE', 'DONE'] +2609-169640-0007-422: hyp=['HE', 'WAS', 'TOO', 'MUCH', 'OF', 'A', 'SEAMAN', 'TO', 'REQUIRE', 'A', 'SECOND', 'LOOK', 'IN', 'ORDER', 'TO', 'ASCERTAIN', 'WHAT', 'WAS', 'TO', 'BE', 'DONE'] +2609-169640-0008-423: ref=['ALTHOUGH', 'THEY', 'WENT', 'THREE', 'FEET', 'TO', 'OUR', 'TWO', 'THIS', 'GAVE', 'US', 'A', 'MOMENT', 'OF', 'BREATHING', 'TIME'] +2609-169640-0008-423: hyp=['ALTHOUGH', 'THEY', 'WENT', 'THREE', 'FEET', 'TO', 'OUR', 'TWO', 'THIS', 'GAVE', 'US', 'A', 'MOMENT', 'OF', 'BREATHING', 'TIME'] +2609-169640-0009-424: ref=['AS', 'OUR', 'SHEETS', 'WERE', 'ALL', 'FLYING', 'FORWARD', 'AND', 'REMAINED', 'SO', 'FOR', 'A', 'FEW', 'MINUTES', 'IT', 'GAVE', 'ME', 'LEISURE', 'TO', 'LOOK', 'ABOUT'] +2609-169640-0009-424: hyp=['AS', 'OUR', 'SEATS', 'WERE', 'ALL', 'FLYING', 'FORWARD', 'AND', 'REMAINED', 'SO', 'FOR', 'A', 'FEW', 'MINUTES', 'IT', 'GAVE', 'ME', 'A', 'LEISURE', 'TO', 'LOOK', 'ABOUT'] +2609-169640-0010-425: ref=['I', 'SOON', 'SAW', 'BOTH', 'PROAS', 'AND', 'GLAD', 'ENOUGH', 'WAS', 'I', 'TO', 'PERCEIVE', 'THAT', 'THEY', 'HAD', 'NOT', 'APPROACHED', 'MATERIALLY', 'NEARER'] +2609-169640-0010-425: hyp=['I', 'SOON', 'SAW', 'BOTH', 'P', 'PROCKS', 'AND', 'GREAT', 'ENOUGH', 'WAS', 'I', 'TO', 'PERCEIVE', 'THAT', 'THEY', 'HAD', 'NOT', 'APPROACHED', 'MATERIALLY', 'NEAR'] +2609-169640-0011-426: ref=['MISTER', 'KITE', 'OBSERVED', 'THIS', 'ALSO', 'AND', 'REMARKED', 'THAT', 'OUR', 'MOVEMENTS', 'HAD', 'BEEN', 'SO', 'PROMPT', 'AS', 'TO', 'TAKE', 'THE', 'RASCALS', 'ABACK'] +2609-169640-0011-426: hyp=['MISTER', 'KAIGHT', 'OBSERVED', 'THIS', 'ALSO', 'AND', 'REMARKED', 'THAT', 'OUR', 'MOVEMENTS', 'HAD', 'BEEN', 'SO', 'PROMPT', 'AS', 'TO', 'TAKE', 'THE', 'RASCALS', 'ABACK'] +2609-169640-0012-427: ref=['A', 'BREATHLESS', 'STILLNESS', 'SUCCEEDED'] +2609-169640-0012-427: hyp=['A', 'BREATHLESS', 'STILLNESS', 'SUCCEEDED'] +2609-169640-0013-428: ref=['THE', 'PROAS', 'DID', 'NOT', 'ALTER', 'THEIR', 'COURSE', 'BUT', 'NEARED', 'US', 'FAST'] +2609-169640-0013-428: hyp=['THE', 'PROETS', 'DID', 'NOT', 'ALTER', 'THE', 'COURSE', 'BUT', 'NEARED', 'US', 'FAST'] +2609-169640-0014-429: ref=['I', 'HEARD', 'THE', 'RATTLING', 'OF', 'THE', 'BOARDING', 'PIKES', 'TOO', 'AS', 'THEY', 'WERE', 'CUT', 'ADRIFT', 'FROM', 'THE', 'SPANKER', 'BOOM', 'AND', 'FELL', 'UPON', 'THE', 'DECKS'] +2609-169640-0014-429: hyp=['I', 'HEARD', 'THE', 'RATTLING', 'OF', 'THE', 'BOARDING', 'PIPES', 'TOO', 'AS', 'THEY', 'WERE', 'CUT', 'ADRIFT', 'FROM', 'THE', 'SPANKER', 'BOOM', 'AND', 'FELL', 'UPON', 'THE', 'DECKS'] +2609-169640-0015-430: ref=['KITE', 'WENT', 'AFT', 'AND', 'RETURNED', 'WITH', 'THREE', 'OR', 'FOUR', 'MUSKETS', 'AND', 'AS', 'MANY', 'PIKES'] +2609-169640-0015-430: hyp=['KITE', 'WENT', 'AFT', 'AND', 'RETURNED', 'WITH', 'THREE', 'OR', 'FOUR', 'MUSKETS', 'AND', 'AS', 'MANY', 'PIKES'] +2609-169640-0016-431: ref=['THE', 'STILLNESS', 'THAT', 'REIGNED', 'ON', 'BOTH', 'SIDES', 'WAS', 'LIKE', 'THAT', 'OF', 'DEATH'] +2609-169640-0016-431: hyp=['THE', 'STILLNESS', 'THAT', 'REIGNED', 'ON', 'BOTH', 'SIDES', 'WAS', 'LIKE', 'THAT', 'OF', 'DEATH'] +2609-169640-0017-432: ref=['THE', 'JOHN', 'BEHAVED', 'BEAUTIFULLY', 'AND', 'CAME', 'ROUND', 'LIKE', 'A', 'TOP'] +2609-169640-0017-432: hyp=['THE', 'GIAN', 'BEHAVED', 'BEAUTIFULLY', 'HE', 'CAME', 'AROUND', 'LIKE', 'A', 'TART'] +2609-169640-0018-433: ref=['THE', 'QUESTION', 'WAS', 'NOW', 'WHETHER', 'WE', 'COULD', 'PASS', 'THEM', 'OR', 'NOT', 'BEFORE', 'THEY', 'GOT', 'NEAR', 'ENOUGH', 'TO', 'GRAPPLE'] +2609-169640-0018-433: hyp=['THE', 'QUESTION', 'WAS', 'NOW', 'WHETHER', 'WE', 'COULD', 'PASS', 'THEM', 'OR', 'NOT', 'BEFORE', 'THEY', 'GOT', 'NEAR', 'ENOUGH', 'TO', 'GRAPPLE'] +2609-169640-0019-434: ref=['THE', 'CAPTAIN', 'BEHAVED', 'PERFECTLY', 'WELL', 'IN', 'THIS', 'CRITICAL', 'INSTANT', 'COMMANDING', 'A', 'DEAD', 'SILENCE', 'AND', 'THE', 'CLOSEST', 'ATTENTION', 'TO', 'HIS', 'ORDERS'] +2609-169640-0019-434: hyp=['THE', 'CAPTAIN', 'BEHAVED', 'PERFECTLY', 'WELL', 'IN', 'THIS', 'CRITICAL', 'INCIDENT', 'COMMANDING', 'A', 'DEAD', 'SILENCE', 'IN', 'THE', 'CLOSEST', 'INTENTION', 'TO', 'HIS', 'ORDERS'] +2609-169640-0020-435: ref=['NOT', 'A', 'SOUL', 'ON', 'BOARD', 'THE', 'JOHN', 'WAS', 'HURT'] +2609-169640-0020-435: hyp=['NOT', 'SO', 'ON', 'BOARD', 'THE', 'JON', 'WAS', 'HEART'] +2609-169640-0021-436: ref=['ON', 'OUR', 'SIDE', 'WE', 'GAVE', 'THE', 'GENTLEMEN', 'THE', 'FOUR', 'SIXES', 'TWO', 'AT', 'THE', 'NEAREST', 'AND', 'TWO', 'AT', 'THE', 'STERN', 'MOST', 'PROA', 'WHICH', 'WAS', 'STILL', 'NEAR', 'A', "CABLE'S", 'LENGTH', 'DISTANT'] +2609-169640-0021-436: hyp=['ON', 'OUR', 'SIDE', 'WE', 'GAVE', 'THE', 'GENTLEMEN', 'THE', 'FOUR', 'SIXXES', 'TWO', 'OUT', 'THE', 'NEAREST', 'AND', 'TWO', 'AT', 'THE', 'STERNMOST', 'PROW', 'WHICH', 'WAS', 'STILL', 'NEAR', 'A', "CABLE'S", 'LENGTH', 'DISTANT'] +2609-169640-0022-437: ref=['THEY', 'WERE', 'LIKE', 'THE', 'YELLS', 'OF', 'FIENDS', 'IN', 'ANGUISH'] +2609-169640-0022-437: hyp=['THEY', 'WERE', 'LIKE', 'THE', 'YELLS', 'OF', 'FIENDS', 'IN', 'ANGUISH'] +2609-169640-0023-438: ref=['I', 'DOUBT', 'IF', 'WE', 'TOUCHED', 'A', 'MAN', 'IN', 'THE', 'NEAREST', 'PROA'] +2609-169640-0023-438: hyp=['I', 'DOUBT', 'IF', 'WE', 'TOCH', 'THE', 'MEN', 'IN', 'THE', 'NEAR', 'EXPRARA'] +2609-169640-0024-439: ref=['IN', 'THIS', 'STATE', 'THE', 'SHIP', 'PASSED', 'AHEAD', 'ALL', 'HER', 'CANVAS', 'BEING', 'FULL', 'LEAVING', 'THE', 'PROA', 'MOTIONLESS', 'IN', 'HER', 'WAKE'] +2609-169640-0024-439: hyp=['IN', 'THIS', 'STATE', 'THESHIP', 'POUCHED', 'AHEAD', 'ALL', 'OF', 'HER', 'CANVATES', 'BEEN', 'FOR', 'LEAVING', 'THE', 'PROW', 'MOTION', 'IT', 'IN', 'HER', 'WAKE'] +3005-163389-0000-440: ref=['THEY', 'SWARMED', 'UP', 'IN', 'FRONT', 'OF', "SHERBURN'S", 'PALINGS', 'AS', 'THICK', 'AS', 'THEY', 'COULD', 'JAM', 'TOGETHER', 'AND', 'YOU', "COULDN'T", 'HEAR', 'YOURSELF', 'THINK', 'FOR', 'THE', 'NOISE'] +3005-163389-0000-440: hyp=['THEY', 'SWARMED', 'UP', 'IN', 'FRONT', 'OF', "SHERBURN'S", 'PALINGS', 'AS', 'THICK', 'AS', 'THEY', 'COULD', 'JAM', 'TOGETHER', 'AND', 'YOU', "COULDN'T", 'HEAR', 'YOURSELF', 'THINK', 'FOR', 'THE', 'NOISE'] +3005-163389-0001-441: ref=['SOME', 'SUNG', 'OUT', 'TEAR', 'DOWN', 'THE', 'FENCE', 'TEAR', 'DOWN', 'THE', 'FENCE'] +3005-163389-0001-441: hyp=['SOME', 'SUNG', 'OUT', 'TEAR', 'DOWN', 'THE', 'FENCE', 'TEAR', 'DOWN', 'THE', 'FENCE'] +3005-163389-0002-442: ref=['THE', 'STILLNESS', 'WAS', 'AWFUL', 'CREEPY', 'AND', 'UNCOMFORTABLE'] +3005-163389-0002-442: hyp=['THE', 'STILLNESS', 'WAS', 'AWFUL', 'CREEPY', 'AND', 'UNCOMFORTABLE'] +3005-163389-0003-443: ref=['SHERBURN', 'RUN', 'HIS', 'EYE', 'SLOW', 'ALONG', 'THE', 'CROWD', 'AND', 'WHEREVER', 'IT', 'STRUCK', 'THE', 'PEOPLE', 'TRIED', 'A', 'LITTLE', 'TO', 'OUT', 'GAZE', 'HIM', 'BUT', 'THEY', "COULDN'T", 'THEY', 'DROPPED', 'THEIR', 'EYES', 'AND', 'LOOKED', 'SNEAKY'] +3005-163389-0003-443: hyp=['SHERBIN', 'RUN', 'HIS', 'EYE', 'SLOW', 'ALONG', 'THE', 'CROWD', 'AND', 'WHEREVER', 'IT', 'STRUCK', 'THE', 'PEOPLE', 'TRIED', 'A', 'LITTLE', 'TO', 'OUTGAZE', 'HIM', 'BUT', 'THEY', "COULDN'T", 'THEY', 'DROPPED', 'THEIR', 'EYES', 'AND', 'LOOKED', 'SNEAKY'] +3005-163389-0004-444: ref=['THE', 'AVERAGE', "MAN'S", 'A', 'COWARD'] +3005-163389-0004-444: hyp=['THE', 'AVERAGE', "MAN'S", 'A', 'COWARD'] +3005-163389-0005-445: ref=['BECAUSE', "THEY'RE", 'AFRAID', 'THE', "MAN'S", 'FRIENDS', 'WILL', 'SHOOT', 'THEM', 'IN', 'THE', 'BACK', 'IN', 'THE', 'DARKAND', "IT'S", 'JUST', 'WHAT', 'THEY', 'WOULD', 'DO'] +3005-163389-0005-445: hyp=['BECAUSE', "THEY'RE", 'AFRAID', 'THE', "MAN'S", 'FRIENDS', 'WILL', 'SHOOT', 'THEM', 'IN', 'THE', 'BACK', 'IN', 'THE', 'DARK', 'AND', "IT'S", 'JUST', 'WHAT', 'THEY', 'WOULD', 'DO'] +3005-163389-0006-446: ref=['SO', 'THEY', 'ALWAYS', 'ACQUIT', 'AND', 'THEN', 'A', 'MAN', 'GOES', 'IN', 'THE', 'NIGHT', 'WITH', 'A', 'HUNDRED', 'MASKED', 'COWARDS', 'AT', 'HIS', 'BACK', 'AND', 'LYNCHES', 'THE', 'RASCAL'] +3005-163389-0006-446: hyp=['SO', 'THEY', 'ALWAYS', 'ACQUIT', 'AND', 'THEN', 'A', 'MAN', 'GOES', 'IN', 'THE', 'NIGHT', 'WITH', 'A', 'HUNDRED', 'MASTED', 'COWARDS', 'AT', 'HIS', 'BACK', 'AND', 'LYNCHES', 'THE', 'RASCAL'] +3005-163389-0007-447: ref=['YOU', "DIDN'T", 'WANT', 'TO', 'COME'] +3005-163389-0007-447: hyp=['YOU', "DIDN'T", 'WANT', 'TO', 'COME'] +3005-163389-0008-448: ref=['BUT', 'A', 'MOB', 'WITHOUT', 'ANY', 'MAN', 'AT', 'THE', 'HEAD', 'OF', 'IT', 'IS', 'BENEATH', 'PITIFULNESS'] +3005-163389-0008-448: hyp=['BUT', 'A', 'MOB', 'WITHOUT', 'ANY', 'MAN', 'AT', 'THE', 'HEAD', 'OF', 'IT', 'IS', 'BENEATH', 'PITIFULNESS'] +3005-163389-0009-449: ref=['NOW', 'LEAVE', 'AND', 'TAKE', 'YOUR', 'HALF', 'A', 'MAN', 'WITH', 'YOU', 'TOSSING', 'HIS', 'GUN', 'UP', 'ACROSS', 'HIS', 'LEFT', 'ARM', 'AND', 'COCKING', 'IT', 'WHEN', 'HE', 'SAYS', 'THIS'] +3005-163389-0009-449: hyp=['NOW', 'LEAVE', 'AND', 'TAKE', 'YOUR', 'HALF', 'A', 'MAN', 'WITH', 'YOU', 'TOSSING', 'HIS', 'GUN', 'UP', 'ACROSS', 'HIS', 'LEFT', 'ARM', 'AND', 'COCKING', 'IT', 'WHEN', 'HE', 'SAYS', 'THIS'] +3005-163389-0010-450: ref=['THE', 'CROWD', 'WASHED', 'BACK', 'SUDDEN', 'AND', 'THEN', 'BROKE', 'ALL', 'APART', 'AND', 'WENT', 'TEARING', 'OFF', 'EVERY', 'WHICH', 'WAY', 'AND', 'BUCK', 'HARKNESS', 'HE', 'HEELED', 'IT', 'AFTER', 'THEM', 'LOOKING', 'TOLERABLE', 'CHEAP'] +3005-163389-0010-450: hyp=['THE', 'CROWD', 'WASHED', 'BACK', 'SUDDEN', 'AND', 'THEN', 'BROKE', 'ALL', 'APART', 'AND', 'WENT', 'TEARING', 'OFF', 'EVERY', 'WHICH', 'WAY', 'AND', 'BUCK', 'HARKNESS', 'HE', 'HEELED', 'IT', 'AFTER', 'THEM', 'LOOKING', 'TOLERABLE', 'CHEAP'] +3005-163389-0011-451: ref=['YOU', "CAN'T", 'BE', 'TOO', 'CAREFUL'] +3005-163389-0011-451: hyp=['YOU', "CAN'T", 'BE', 'TOO', 'CAREFUL'] +3005-163389-0012-452: ref=['THEY', 'ARGUED', 'AND', 'TRIED', 'TO', 'KEEP', 'HIM', 'OUT', 'BUT', 'HE', "WOULDN'T", 'LISTEN', 'AND', 'THE', 'WHOLE', 'SHOW', 'COME', 'TO', 'A', 'STANDSTILL'] +3005-163389-0012-452: hyp=['THEY', 'ARGUED', 'AND', 'TRIED', 'TO', 'KEEP', 'HIM', 'OUT', 'BUT', 'HE', "WOULDN'T", 'LISTEN', 'AND', 'THE', 'WHOLE', 'SHOW', 'COME', 'TO', 'A', 'STANDSTILL'] +3005-163389-0013-453: ref=['AND', 'ONE', 'OR', 'TWO', 'WOMEN', 'BEGUN', 'TO', 'SCREAM'] +3005-163389-0013-453: hyp=['AND', 'ONE', 'OR', 'TWO', 'WOMEN', 'BEGIN', 'TO', 'SCREAM'] +3005-163389-0014-454: ref=['SO', 'THEN', 'THE', 'RINGMASTER', 'HE', 'MADE', 'A', 'LITTLE', 'SPEECH', 'AND', 'SAID', 'HE', 'HOPED', 'THERE', "WOULDN'T", 'BE', 'NO', 'DISTURBANCE', 'AND', 'IF', 'THE', 'MAN', 'WOULD', 'PROMISE', 'HE', "WOULDN'T", 'MAKE', 'NO', 'MORE', 'TROUBLE', 'HE', 'WOULD', 'LET', 'HIM', 'RIDE', 'IF', 'HE', 'THOUGHT', 'HE', 'COULD', 'STAY', 'ON', 'THE', 'HORSE'] +3005-163389-0014-454: hyp=['SO', 'THEN', 'THE', 'RING', 'MASTER', 'HE', 'MADE', 'A', 'LITTLE', 'SPEECH', 'AND', 'SAID', 'HE', 'HOPED', 'THERE', "WOULDN'T", 'BE', 'NO', 'DISTURBANCE', 'AND', 'IF', 'THE', 'MAN', 'WOULD', 'PROMISE', 'HE', "WOULDN'T", 'MAKE', 'NO', 'MORE', 'TROUBLE', 'HE', 'WOULD', 'LET', 'HIM', 'RIDE', 'IF', 'HE', 'THOUGHT', 'HE', 'COULD', 'STAY', 'ON', 'THE', 'HORSE'] +3005-163389-0015-455: ref=['IT', "WARN'T", 'FUNNY', 'TO', 'ME', 'THOUGH', 'I', 'WAS', 'ALL', 'OF', 'A', 'TREMBLE', 'TO', 'SEE', 'HIS', 'DANGER'] +3005-163389-0015-455: hyp=['IT', "WARN'T", 'FUNNY', 'TO', 'ME', 'THOUGH', 'I', 'WAS', 'ALL', 'OF', 'A', 'TREMBLE', 'TO', 'SEE', 'HIS', 'DANGER'] +3005-163389-0016-456: ref=['AND', 'THE', 'HORSE', 'A', 'GOING', 'LIKE', 'A', 'HOUSE', 'AFIRE', 'TOO'] +3005-163389-0016-456: hyp=['AND', 'THE', 'HORSE', 'A', 'GOING', 'LIKE', 'A', 'HOUSE', 'AFIRE', 'TOO'] +3005-163389-0017-457: ref=['HE', 'SHED', 'THEM', 'SO', 'THICK', 'THEY', 'KIND', 'OF', 'CLOGGED', 'UP', 'THE', 'AIR', 'AND', 'ALTOGETHER', 'HE', 'SHED', 'SEVENTEEN', 'SUITS'] +3005-163389-0017-457: hyp=['HE', 'SHED', 'THEM', 'SO', 'THICK', 'THEY', 'KIND', 'OF', 'CLOGGED', 'UP', 'THE', 'AIR', 'AND', 'ALTOGETHER', 'HE', 'SHED', 'SEVENTEEN', 'SUITS'] +3005-163389-0018-458: ref=['WHY', 'IT', 'WAS', 'ONE', 'OF', 'HIS', 'OWN', 'MEN'] +3005-163389-0018-458: hyp=['WHY', 'IT', 'WAS', 'ONE', 'OF', 'HIS', 'OWN', 'MEN'] +3005-163390-0000-459: ref=['ANDBUT', 'NEVER', 'MIND', 'THE', 'REST', 'OF', 'HIS', 'OUTFIT', 'IT', 'WAS', 'JUST', 'WILD', 'BUT', 'IT', 'WAS', 'AWFUL', 'FUNNY'] +3005-163390-0000-459: hyp=['AN', 'BUT', 'NEVER', 'MIND', 'THE', 'REST', 'OF', 'HIS', 'OUTFIT', 'IT', 'WAS', 'JEST', 'WILD', 'BUT', 'IT', 'WAS', 'AWFUL', 'FUNNY'] +3005-163390-0001-460: ref=['THE', 'PEOPLE', 'MOST', 'KILLED', 'THEMSELVES', 'LAUGHING', 'AND', 'WHEN', 'THE', 'KING', 'GOT', 'DONE', 'CAPERING', 'AND', 'CAPERED', 'OFF', 'BEHIND', 'THE', 'SCENES', 'THEY', 'ROARED', 'AND', 'CLAPPED', 'AND', 'STORMED', 'AND', 'HAW', 'HAWED', 'TILL', 'HE', 'COME', 'BACK', 'AND', 'DONE', 'IT', 'OVER', 'AGAIN', 'AND', 'AFTER', 'THAT', 'THEY', 'MADE', 'HIM', 'DO', 'IT', 'ANOTHER', 'TIME'] +3005-163390-0001-460: hyp=['THE', 'PEOPLE', 'MOST', 'KILLED', 'THEMSELVES', 'LAUGHING', 'AND', 'WHEN', 'THE', 'KING', 'GOT', 'DONE', 'CAPERING', 'AND', 'CAPERED', 'OFF', 'BEHIND', 'THE', 'SCENES', 'THEY', 'ROARED', 'AND', 'CLAPPED', 'AND', 'STORMED', 'AND', 'JAWHAWED', 'TILL', 'HE', 'COME', 'BACK', 'AND', 'DONE', 'IT', 'OVER', 'AGAIN', 'AND', 'AFTER', 'THAT', 'THEY', 'MADE', 'HIM', 'DO', 'IT', 'ANOTHER', 'TIME'] +3005-163390-0002-461: ref=['TWENTY', 'PEOPLE', 'SINGS', 'OUT'] +3005-163390-0002-461: hyp=['TWENTY', 'PEOPLE', 'SINGS', 'OUT'] +3005-163390-0003-462: ref=['THE', 'DUKE', 'SAYS', 'YES'] +3005-163390-0003-462: hyp=['THE', 'DUKE', 'SAYS', 'YES'] +3005-163390-0004-463: ref=['EVERYBODY', 'SINGS', 'OUT', 'SOLD'] +3005-163390-0004-463: hyp=['EVERYBODY', 'SINGS', 'OUT', 'SOLD'] +3005-163390-0005-464: ref=['BUT', 'A', 'BIG', 'FINE', 'LOOKING', 'MAN', 'JUMPS', 'UP', 'ON', 'A', 'BENCH', 'AND', 'SHOUTS', 'HOLD', 'ON'] +3005-163390-0005-464: hyp=['BUT', 'A', 'BIG', 'FINE', 'LOOKING', 'MAN', 'JUMPS', 'UP', 'ON', 'A', 'BENCH', 'AND', 'SHOUTS', 'HOLD', 'ON'] +3005-163390-0006-465: ref=['JUST', 'A', 'WORD', 'GENTLEMEN', 'THEY', 'STOPPED', 'TO', 'LISTEN'] +3005-163390-0006-465: hyp=['JUST', 'A', 'WORD', 'GENTLEMEN', 'THEY', 'STOPPED', 'TO', 'LISTEN'] +3005-163390-0007-466: ref=['WHAT', 'WE', 'WANT', 'IS', 'TO', 'GO', 'OUT', 'OF', 'HERE', 'QUIET', 'AND', 'TALK', 'THIS', 'SHOW', 'UP', 'AND', 'SELL', 'THE', 'REST', 'OF', 'THE', 'TOWN'] +3005-163390-0007-466: hyp=['WHAT', 'WE', 'WANT', 'IS', 'TO', 'GO', 'OUT', 'OF', 'HERE', 'QUIET', 'AND', 'TALK', 'THIS', 'SHOW', 'UP', 'AND', 'SELL', 'THE', 'REST', 'OF', 'THE', 'TOWN'] +3005-163390-0008-467: ref=['YOU', 'BET', 'IT', 'IS', 'THE', 'JEDGE', 'IS', 'RIGHT', 'EVERYBODY', 'SINGS', 'OUT'] +3005-163390-0008-467: hyp=['YOU', 'BET', 'IT', 'IS', 'THE', 'JUDGE', 'IS', 'RIGHT', 'EVERYBODY', 'SINGS', 'OUT'] +3005-163390-0009-468: ref=['WE', 'STRUCK', 'THE', 'RAFT', 'AT', 'THE', 'SAME', 'TIME', 'AND', 'IN', 'LESS', 'THAN', 'TWO', 'SECONDS', 'WE', 'WAS', 'GLIDING', 'DOWN', 'STREAM', 'ALL', 'DARK', 'AND', 'STILL', 'AND', 'EDGING', 'TOWARDS', 'THE', 'MIDDLE', 'OF', 'THE', 'RIVER', 'NOBODY', 'SAYING', 'A', 'WORD'] +3005-163390-0009-468: hyp=['WE', 'STRUCK', 'THE', 'RAFT', 'AT', 'THE', 'SAME', 'TIME', 'AND', 'IN', 'LESS', 'THAN', 'TWO', 'SECONDS', 'WE', 'WAS', 'GLIDING', 'DOWN', 'STREAM', 'ALL', 'DARK', 'AND', 'STILL', 'AND', 'EDGING', 'TOWARDS', 'THE', 'MIDDLE', 'OF', 'THE', 'RIVER', 'NOBODY', 'SAYING', 'A', 'WORD'] +3005-163390-0010-469: ref=['WE', 'NEVER', 'SHOWED', 'A', 'LIGHT', 'TILL', 'WE', 'WAS', 'ABOUT', 'TEN', 'MILE', 'BELOW', 'THE', 'VILLAGE'] +3005-163390-0010-469: hyp=['WE', 'NEVER', 'SHOWED', 'A', 'LIGHT', 'TILL', 'WE', 'WAS', 'ABOUT', 'TEN', 'MILE', 'BELOW', 'THE', 'VILLAGE'] +3005-163390-0011-470: ref=['GREENHORNS', 'FLATHEADS'] +3005-163390-0011-470: hyp=['GREENHORNS', 'FLATHEADS'] +3005-163390-0012-471: ref=['NO', 'I', 'SAYS', 'IT', "DON'T"] +3005-163390-0012-471: hyp=['NO', 'I', 'SAYS', 'IT', "DON'T"] +3005-163390-0013-472: ref=['WELL', 'IT', "DON'T", 'BECAUSE', "IT'S", 'IN', 'THE', 'BREED', 'I', 'RECKON', "THEY'RE", 'ALL', 'ALIKE'] +3005-163390-0013-472: hyp=['WELL', 'IT', "DON'T", 'BECAUSE', "IT'S", 'IN', 'THE', 'BREED', 'I', 'RECKON', "THEY'RE", 'ALL', 'ALIKE'] +3005-163390-0014-473: ref=['WELL', "THAT'S", 'WHAT', "I'M", 'A', 'SAYING', 'ALL', 'KINGS', 'IS', 'MOSTLY', 'RAPSCALLIONS', 'AS', 'FUR', 'AS', 'I', 'CAN', 'MAKE', 'OUT', 'IS', 'DAT', 'SO'] +3005-163390-0014-473: hyp=['WELL', "THAT'S", 'WHAT', "I'M", 'A', 'SAYING', 'ALL', 'KINGS', 'IS', 'MOSTLY', 'RAPSCALLIONS', 'AS', 'FUR', 'AS', 'I', 'CAN', 'MAKE', 'OUT', 'IS', 'THAT', 'SO'] +3005-163390-0015-474: ref=['AND', 'LOOK', 'AT', 'CHARLES', 'SECOND', 'AND', 'LOUIS', 'FOURTEEN', 'AND', 'LOUIS', 'FIFTEEN', 'AND', 'JAMES', 'SECOND', 'AND', 'EDWARD', 'SECOND', 'AND', 'RICHARD', 'THIRD', 'AND', 'FORTY', 'MORE', 'BESIDES', 'ALL', 'THEM', 'SAXON', 'HEPTARCHIES', 'THAT', 'USED', 'TO', 'RIP', 'AROUND', 'SO', 'IN', 'OLD', 'TIMES', 'AND', 'RAISE', 'CAIN'] +3005-163390-0015-474: hyp=['AND', 'LOOK', 'AT', 'CHARLES', 'SECOND', 'AND', 'LOUIS', 'FOURTEEN', 'AND', 'LOUIS', 'FIFTEENTH', 'AND', 'JAMES', 'SECOND', 'AND', 'EDWARD', 'SECOND', 'AND', 'RICHARD', 'THIRD', 'AND', 'FORTY', 'MORE', 'BESIDES', 'ALL', 'THEM', 'SAXON', 'HEPTARKEYS', 'THAT', 'USED', 'TO', 'RIP', 'AROUND', 'SO', 'IN', 'OLD', 'TIMES', 'AND', 'RAISE', 'CANE'] +3005-163390-0016-475: ref=['MY', 'YOU', 'OUGHT', 'TO', 'SEEN', 'OLD', 'HENRY', 'THE', 'EIGHT', 'WHEN', 'HE', 'WAS', 'IN', 'BLOOM', 'HE', 'WAS', 'A', 'BLOSSOM'] +3005-163390-0016-475: hyp=['MY', 'YOU', 'OUGHT', 'TO', 'SEEN', 'OLD', 'HENRY', 'THE', 'EIGHTH', 'WHEN', 'HE', 'WAS', 'IN', 'BLOOM', 'HE', 'WAS', 'A', 'BLOSSOM'] +3005-163390-0017-476: ref=['RING', 'UP', 'FAIR', 'ROSAMUN'] +3005-163390-0017-476: hyp=['BRING', 'UP', 'FAIR', 'ROSAMOND'] +3005-163390-0018-477: ref=['WELL', 'HENRY', 'HE', 'TAKES', 'A', 'NOTION', 'HE', 'WANTS', 'TO', 'GET', 'UP', 'SOME', 'TROUBLE', 'WITH', 'THIS', 'COUNTRY'] +3005-163390-0018-477: hyp=['WELL', 'HENRY', 'HE', 'TAKES', 'A', 'NOTION', 'HE', 'WANTS', 'TO', 'GIT', 'UP', 'SOME', 'TROUBLE', 'WITH', 'THIS', 'COUNTRY'] +3005-163390-0019-478: ref=["S'POSE", 'HE', 'OPENED', 'HIS', 'MOUTHWHAT', 'THEN'] +3005-163390-0019-478: hyp=["S'POSE", 'HE', 'OPENED', 'HIS', 'MOUTH', 'WHAT', 'THEN'] +3005-163390-0020-479: ref=['ALL', 'I', 'SAY', 'IS', 'KINGS', 'IS', 'KINGS', 'AND', 'YOU', 'GOT', 'TO', 'MAKE', 'ALLOWANCES'] +3005-163390-0020-479: hyp=['ALL', 'I', 'SAY', 'IS', 'KINGS', 'IS', 'KINGS', 'AND', 'YOU', 'GOT', 'TO', 'MAKE', 'ALLOWANCES'] +3005-163390-0021-480: ref=['TAKE', 'THEM', 'ALL', 'AROUND', "THEY'RE", 'A', 'MIGHTY', 'ORNERY', 'LOT', "IT'S", 'THE', 'WAY', "THEY'RE", 'RAISED'] +3005-163390-0021-480: hyp=['TAKE', 'EM', 'ALL', 'AROUND', "THEY'RE", 'A', 'MIGHTY', 'ORNERY', 'LOT', "IT'S", 'THE', 'WAY', "THEY'RE", 'RAISED'] +3005-163390-0022-481: ref=['WELL', 'THEY', 'ALL', 'DO', 'JIM'] +3005-163390-0022-481: hyp=['WELL', 'THEY', 'ALL', 'DO', 'JIM'] +3005-163390-0023-482: ref=['NOW', 'DE', 'DUKE', "HE'S", 'A', 'TOLERBLE', 'LIKELY', 'MAN', 'IN', 'SOME', 'WAYS'] +3005-163390-0023-482: hyp=['NOW', 'DE', 'DUPE', "HE'S", 'A', 'TOLERABLE', 'LIKE', 'MAN', 'IN', 'SOME', 'WAYS'] +3005-163390-0024-483: ref=['THIS', "ONE'S", 'A', 'MIDDLING', 'HARD', 'LOT', 'FOR', 'A', 'DUKE'] +3005-163390-0024-483: hyp=['THIS', 'WAS', 'A', 'MIDDLIN', 'HARD', 'LOT', 'FOR', 'A', 'DUPE'] +3005-163390-0025-484: ref=['WHEN', 'I', 'WAKED', 'UP', 'JUST', 'AT', 'DAYBREAK', 'HE', 'WAS', 'SITTING', 'THERE', 'WITH', 'HIS', 'HEAD', 'DOWN', 'BETWIXT', 'HIS', 'KNEES', 'MOANING', 'AND', 'MOURNING', 'TO', 'HIMSELF'] +3005-163390-0025-484: hyp=['WHEN', 'I', 'WAKED', 'UP', 'JUST', 'AT', 'DAYBREAK', 'HE', 'WAS', 'SITTING', 'THERE', 'WITH', 'HIS', 'HEAD', 'DOWN', 'BETWIXT', 'HIS', 'KNEES', 'MOANING', 'AND', 'MOURNING', 'TO', 'HIMSELF'] +3005-163390-0026-485: ref=['IT', "DON'T", 'SEEM', 'NATURAL', 'BUT', 'I', 'RECKON', "IT'S", 'SO'] +3005-163390-0026-485: hyp=['IT', "DON'T", 'SEEM', 'NATURAL', 'BUT', 'I', 'RECKON', "IT'S", 'SO'] +3005-163390-0027-486: ref=['HE', 'WAS', 'OFTEN', 'MOANING', 'AND', 'MOURNING', 'THAT', 'WAY', 'NIGHTS', 'WHEN', 'HE', 'JUDGED', 'I', 'WAS', 'ASLEEP', 'AND', 'SAYING', 'PO', 'LITTLE', 'LIZABETH'] +3005-163390-0027-486: hyp=['HE', 'WAS', 'OFTEN', 'MOANING', 'AND', 'MOURNING', 'THAT', 'WAY', 'NIGHTS', 'WHEN', 'HE', 'JUDGED', 'I', 'WAS', 'ASLEEP', 'AND', 'SAYING', 'POOR', 'LITTLE', "LIZ'BETH"] +3005-163390-0028-487: ref=['DOAN', 'YOU', 'HEAR', 'ME', 'SHET', 'DE', 'DO'] +3005-163390-0028-487: hyp=["DON'T", 'YOU', 'HEAR', 'ME', 'SHUT', 'DE', 'DO'] +3005-163390-0029-488: ref=['I', 'LAY', 'I', 'MAKE', 'YOU', 'MINE'] +3005-163390-0029-488: hyp=['I', 'LAY', 'I', 'MAKE', 'YOU', 'MINE'] +3005-163390-0030-489: ref=['JIS', 'AS', 'LOUD', 'AS', 'I', 'COULD', 'YELL'] +3005-163390-0030-489: hyp=['JUST', 'AS', 'LOUD', 'AS', 'I', 'COULD', 'YELL'] +3005-163391-0000-490: ref=['WHICH', 'WAS', 'SOUND', 'ENOUGH', 'JUDGMENT', 'BUT', 'YOU', 'TAKE', 'THE', 'AVERAGE', 'MAN', 'AND', 'HE', "WOULDN'T", 'WAIT', 'FOR', 'HIM', 'TO', 'HOWL'] +3005-163391-0000-490: hyp=['WHICH', 'WAS', 'SOUND', 'ENOUGH', 'JUDGMENT', 'BUT', 'YOU', 'TAKE', 'THE', 'AVERAGE', 'MAN', 'AND', 'HE', "WOULDN'T", 'WAIT', 'FOR', 'HIM', 'TO', 'HIRE'] +3005-163391-0001-491: ref=['THE', "KING'S", 'DUDS', 'WAS', 'ALL', 'BLACK', 'AND', 'HE', 'DID', 'LOOK', 'REAL', 'SWELL', 'AND', 'STARCHY'] +3005-163391-0001-491: hyp=['THE', "KING'S", 'DUDS', 'WAS', 'ALL', 'BLACK', 'AND', 'HE', 'DID', 'LOOK', 'REAL', 'SWELL', 'AND', 'STARCHY'] +3005-163391-0002-492: ref=['WHY', 'BEFORE', 'HE', 'LOOKED', 'LIKE', 'THE', 'ORNERIEST', 'OLD', 'RIP', 'THAT', 'EVER', 'WAS', 'BUT', 'NOW', 'WHEN', "HE'D", 'TAKE', 'OFF', 'HIS', 'NEW', 'WHITE', 'BEAVER', 'AND', 'MAKE', 'A', 'BOW', 'AND', 'DO', 'A', 'SMILE', 'HE', 'LOOKED', 'THAT', 'GRAND', 'AND', 'GOOD', 'AND', 'PIOUS', 'THAT', "YOU'D", 'SAY', 'HE', 'HAD', 'WALKED', 'RIGHT', 'OUT', 'OF', 'THE', 'ARK', 'AND', 'MAYBE', 'WAS', 'OLD', 'LEVITICUS', 'HIMSELF'] +3005-163391-0002-492: hyp=['WHY', 'BEFORE', 'HE', 'LOOKED', 'LIKE', 'THE', 'ORNERIEST', 'OLD', 'RIP', 'THAT', 'EVER', 'WAS', 'BUT', 'NOW', 'WHEN', "HE'D", 'TAKE', 'OFF', 'HIS', 'NEW', 'WHITE', 'BEAVER', 'AND', 'MAKE', 'A', 'BOW', 'AND', 'DEW', 'A', 'SMILE', 'HE', 'LOOKED', 'THAT', 'GRAND', 'AND', 'GOOD', 'AND', 'PIOUS', 'THAT', "YOU'D", 'SAY', "HE'D", 'WALKED', 'RIGHT', 'OUT', 'OF', 'THE', 'ARK', 'AND', 'MAYBE', 'WAS', 'OLD', 'LEVITICUS', 'HIMSELF'] +3005-163391-0003-493: ref=['JIM', 'CLEANED', 'UP', 'THE', 'CANOE', 'AND', 'I', 'GOT', 'MY', 'PADDLE', 'READY'] +3005-163391-0003-493: hyp=['JIM', 'CLEANED', 'UP', 'THE', 'CANOE', 'AND', 'I', 'GOT', 'MY', 'PADDLE', 'READY'] +3005-163391-0004-494: ref=['WHER', 'YOU', 'BOUND', 'FOR', 'YOUNG', 'MAN'] +3005-163391-0004-494: hyp=['WHERE', 'ARE', 'YOU', 'BOUND', 'FOR', 'YOUNG', 'MAN'] +3005-163391-0005-495: ref=['GIT', 'ABOARD', 'SAYS', 'THE', 'KING'] +3005-163391-0005-495: hyp=['GET', 'ABOARD', 'SAYS', 'THE', 'KING'] +3005-163391-0006-496: ref=['I', 'DONE', 'SO', 'AND', 'THEN', 'WE', 'ALL', 'THREE', 'STARTED', 'ON', 'AGAIN'] +3005-163391-0006-496: hyp=['I', 'DONE', 'SO', 'AND', 'THEN', 'WE', 'ALL', 'THREE', 'STARTED', 'ON', 'AGAIN'] +3005-163391-0007-497: ref=['THE', 'YOUNG', 'CHAP', 'WAS', 'MIGHTY', 'THANKFUL', 'SAID', 'IT', 'WAS', 'TOUGH', 'WORK', 'TOTING', 'HIS', 'BAGGAGE', 'SUCH', 'WEATHER'] +3005-163391-0007-497: hyp=['THE', 'YOUNG', 'CHAP', 'WAS', 'MIGHTY', 'THANKFUL', 'SAID', 'IT', 'WAS', 'TOUGH', 'WORK', 'TOADING', 'HIS', 'BAGGAGE', 'SUCH', 'WEATHER'] +3005-163391-0008-498: ref=['HE', 'ASKED', 'THE', 'KING', 'WHERE', 'HE', 'WAS', 'GOING', 'AND', 'THE', 'KING', 'TOLD', 'HIM', "HE'D", 'COME', 'DOWN', 'THE', 'RIVER', 'AND', 'LANDED', 'AT', 'THE', 'OTHER', 'VILLAGE', 'THIS', 'MORNING', 'AND', 'NOW', 'HE', 'WAS', 'GOING', 'UP', 'A', 'FEW', 'MILE', 'TO', 'SEE', 'AN', 'OLD', 'FRIEND', 'ON', 'A', 'FARM', 'UP', 'THERE', 'THE', 'YOUNG', 'FELLOW', 'SAYS'] +3005-163391-0008-498: hyp=['HE', 'ASKED', 'THE', 'KING', 'WHERE', 'HE', 'WAS', 'GOING', 'AND', 'THE', 'KING', 'TOLD', 'HIM', "HE'D", 'COME', 'DOWN', 'THE', 'RIVER', 'AND', 'LANDED', 'AT', 'THE', 'OTHER', 'VILLAGE', 'THIS', 'MORNING', 'AND', 'NOW', 'HE', 'WAS', 'GOING', 'UP', 'A', 'FEW', 'MILE', 'TO', 'SEE', 'AN', 'OLD', 'FRIEND', 'ON', 'A', 'FARM', 'UP', 'THERE', 'THE', 'YOUNG', 'FELLOW', 'SAYS'] +3005-163391-0009-499: ref=['BUT', 'THEN', 'I', 'SAYS', 'AGAIN', 'NO', 'I', 'RECKON', 'IT', "AIN'T", 'HIM', 'OR', 'ELSE', 'HE', "WOULDN'T", 'BE', 'PADDLING', 'UP', 'THE', 'RIVER', 'YOU', "AIN'T", 'HIM', 'ARE', 'YOU'] +3005-163391-0009-499: hyp=['BUT', 'THEN', 'I', 'SAYS', 'AGAIN', 'NO', 'I', 'RECKON', 'IT', "AIN'T", 'HIM', 'OR', 'ELSE', 'HE', "WOULDN'T", 'BE', 'PADDLIN', 'UP', 'THE', 'RIVER', 'YOU', "AIN'T", 'HIM', 'ARE', 'YOU'] +3005-163391-0010-500: ref=['NO', 'MY', "NAME'S", 'BLODGETT', 'ELEXANDER', 'BLODGETT', 'REVEREND', 'ELEXANDER', 'BLODGETT', 'I', "S'POSE", 'I', 'MUST', 'SAY', 'AS', "I'M", 'ONE', 'O', 'THE', "LORD'S", 'POOR', 'SERVANTS'] +3005-163391-0010-500: hyp=['NO', 'MY', "NAME'S", 'BLODGET', 'ALEXANDER', 'BLAGET', 'REVEREND', 'ALEXANDER', 'BLDGET', 'I', "S'POSE", 'I', 'MUST', 'SAY', 'AS', "I'M", 'ONE', 'OF', 'THE', 'LARGE', 'POOR', 'SERVANTS'] +3005-163391-0011-501: ref=['YOU', 'SEE', 'HE', 'WAS', 'PRETTY', 'OLD', 'AND', "GEORGE'S", "G'YIRLS", 'WAS', 'TOO', 'YOUNG', 'TO', 'BE', 'MUCH', 'COMPANY', 'FOR', 'HIM', 'EXCEPT', 'MARY', 'JANE', 'THE', 'RED', 'HEADED', 'ONE', 'AND', 'SO', 'HE', 'WAS', 'KINDER', 'LONESOME', 'AFTER', 'GEORGE', 'AND', 'HIS', 'WIFE', 'DIED', 'AND', "DIDN'T", 'SEEM', 'TO', 'CARE', 'MUCH', 'TO', 'LIVE'] +3005-163391-0011-501: hyp=['YOU', 'SEE', 'HE', 'WAS', 'PRETTY', 'OLE', 'AND', "GEORGE'S", 'GO', 'GIRLS', 'WAS', 'TOO', 'YOUNG', 'TO', 'BE', 'MUCH', 'COMPANY', 'FOR', 'HIM', 'EXCEPT', 'MARY', 'JANE', 'THE', 'RED', 'HEADED', 'ONE', 'AND', 'SO', 'HE', 'WAS', 'KINDER', 'LONESOME', 'AFTER', 'GEORGE', 'AND', 'HIS', 'WIFE', 'DIED', 'AND', "DIDN'T", 'SEEM', 'TO', 'CARE', 'MUCH', 'TO', 'LIVE'] +3005-163391-0012-502: ref=['TOO', 'BAD', 'TOO', 'BAD', 'HE', "COULDN'T", 'A', 'LIVED', 'TO', 'SEE', 'HIS', 'BROTHERS', 'POOR', 'SOUL'] +3005-163391-0012-502: hyp=['TOO', 'BAD', 'TOO', 'BAD', 'HE', "COULDN'T", 'HAVE', 'LIVED', 'TO', 'SEE', 'HIS', 'BROTHERS', 'POOR', 'SOUL'] +3005-163391-0013-503: ref=["I'M", 'GOING', 'IN', 'A', 'SHIP', 'NEXT', 'WEDNESDAY', 'FOR', 'RYO', 'JANEERO', 'WHERE', 'MY', 'UNCLE', 'LIVES'] +3005-163391-0013-503: hyp=["I'M", 'GOING', 'IN', 'A', 'SHIP', 'NEXT', 'WEDNESDAY', 'FOR', 'RIO', 'JANEIRO', 'WHERE', 'MY', 'UNCLE', 'LIVES'] +3005-163391-0014-504: ref=['BUT', "IT'LL", 'BE', 'LOVELY', 'WISHT', 'I', 'WAS', 'A', 'GOING'] +3005-163391-0014-504: hyp=['BUT', "IT'LL", 'BE', 'LOVELY', 'WISHED', 'I', 'WAS', 'A', 'GOIN'] +3005-163391-0015-505: ref=['MARY', "JANE'S", 'NINETEEN', "SUSAN'S", 'FIFTEEN', 'AND', "JOANNA'S", 'ABOUT', "FOURTEENTHAT'S", 'THE', 'ONE', 'THAT', 'GIVES', 'HERSELF', 'TO', 'GOOD', 'WORKS', 'AND', 'HAS', 'A', 'HARE', 'LIP', 'POOR', 'THINGS'] +3005-163391-0015-505: hyp=['MARY', "JANE'S", 'NINETEEN', "SUSAN'S", 'FIFTEEN', 'AN', "JOANNA'S", 'ABOUT', 'FOURTEEN', "THAT'S", 'THE', 'ONE', 'THAT', 'GIVES', 'HERSELF', 'TO', 'GOOD', 'WORKS', 'AN', 'HAS', 'A', 'HAIR', 'LIP', 'POOR', 'THINGS'] +3005-163391-0016-506: ref=['WELL', 'THEY', 'COULD', 'BE', 'WORSE', 'OFF'] +3005-163391-0016-506: hyp=['WELL', 'THEY', 'COULD', 'BE', 'WORSE', 'OFF'] +3005-163391-0017-507: ref=['OLD', 'PETER', 'HAD', 'FRIENDS', 'AND', 'THEY', "AIN'T", 'GOING', 'TO', 'LET', 'THEM', 'COME', 'TO', 'NO', 'HARM'] +3005-163391-0017-507: hyp=['OLD', 'PETER', 'HAD', 'FRIENDS', 'AND', 'THEY', "AIN'T", 'GOING', 'TO', 'LET', 'THEM', 'COME', 'TO', 'NO', 'HARM'] +3005-163391-0018-508: ref=['BLAMED', 'IF', 'HE', "DIDN'T", 'INQUIRE', 'ABOUT', 'EVERYBODY', 'AND', 'EVERYTHING', 'IN', 'THAT', 'BLESSED', 'TOWN', 'AND', 'ALL', 'ABOUT', 'THE', 'WILKSES', 'AND', 'ABOUT', "PETER'S", 'BUSINESSWHICH', 'WAS', 'A', 'TANNER', 'AND', 'ABOUT', "GEORGE'SWHICH", 'WAS', 'A', 'CARPENTER', 'AND', 'ABOUT', "HARVEY'SWHICH", 'WAS', 'A', 'DISSENTERING', 'MINISTER', 'AND', 'SO', 'ON', 'AND', 'SO', 'ON', 'THEN', 'HE', 'SAYS'] +3005-163391-0018-508: hyp=['BLAMED', 'IF', 'HE', "DIDN'T", 'INQUIRE', 'ABOUT', 'EVERYBODY', 'AND', 'EVERYTHING', 'IN', 'THAT', 'BLESSED', 'TOWN', 'AND', 'ALL', 'ABOUT', 'THE', 'WILKESES', 'AND', 'ABOUT', "PETER'S", 'BUSINESS', 'WHICH', 'WAS', 'A', 'TANNER', 'AND', 'ABOUT', "GEORGE'S", 'WHICH', 'WAS', 'A', 'CARPENTER', 'AND', 'ABOUT', "HARVEY'S", 'WHICH', 'WAS', 'A', 'DISSENTERING', 'MINISTER', 'AND', 'SO', 'ON', 'AND', 'SO', 'ON', 'THEN', 'HE', 'SAYS'] +3005-163391-0019-509: ref=['WHEN', "THEY'RE", 'DEEP', 'THEY', "WON'T", 'STOP', 'FOR', 'A', 'HAIL'] +3005-163391-0019-509: hyp=['WHEN', "THEY'RE", 'DEEP', 'THEY', "WON'T", 'STOP', 'FOR', 'A', 'HAIL'] +3005-163391-0020-510: ref=['WAS', 'PETER', 'WILKS', 'WELL', 'OFF'] +3005-163391-0020-510: hyp=['WAS', 'PETER', 'WILKES', 'WELL', 'OFF'] +3005-163391-0021-511: ref=['WHEN', 'WE', 'STRUCK', 'THE', 'BOAT', 'SHE', 'WAS', 'ABOUT', 'DONE', 'LOADING', 'AND', 'PRETTY', 'SOON', 'SHE', 'GOT', 'OFF'] +3005-163391-0021-511: hyp=['WHEN', 'WE', 'STRUCK', 'THE', 'BOAT', 'SHE', 'WAS', 'ABOUT', 'DONE', 'LOADING', 'AND', 'PRETTY', 'SOON', 'SHE', 'GOT', 'OFF'] +3005-163391-0022-512: ref=['NOW', 'HUSTLE', 'BACK', 'RIGHT', 'OFF', 'AND', 'FETCH', 'THE', 'DUKE', 'UP', 'HERE', 'AND', 'THE', 'NEW', 'CARPET', 'BAGS'] +3005-163391-0022-512: hyp=['NOW', 'HUSTLE', 'BACK', 'RIGHT', 'OFF', 'AND', 'FETCH', 'THE', 'DUKE', 'UP', 'HERE', 'AND', 'THE', 'NEW', 'CARPET', 'BAGS'] +3005-163391-0023-513: ref=['SO', 'THEN', 'THEY', 'WAITED', 'FOR', 'A', 'STEAMBOAT'] +3005-163391-0023-513: hyp=['SO', 'THEN', 'THEY', 'WAITED', 'FOR', 'A', 'STEAMBOAT'] +3005-163391-0024-514: ref=['BUT', 'THE', 'KING', 'WAS', "CA'M", 'HE', 'SAYS'] +3005-163391-0024-514: hyp=['BUT', 'THE', 'KING', 'WAS', 'CALM', 'HE', 'SAYS'] +3005-163391-0025-515: ref=['THEY', 'GIVE', 'A', 'GLANCE', 'AT', 'ONE', 'ANOTHER', 'AND', 'NODDED', 'THEIR', 'HEADS', 'AS', 'MUCH', 'AS', 'TO', 'SAY', 'WHAT', 'D', 'I', 'TELL', 'YOU'] +3005-163391-0025-515: hyp=['THEY', 'GIVE', 'A', 'GLANCE', 'AT', 'ONE', 'ANOTHER', 'AND', 'NODDED', 'THEIR', 'HEADS', 'AS', 'MUCH', 'AS', 'TO', 'SAY', 'WOULD', 'THEY', 'TELL', 'YOU'] +3005-163391-0026-516: ref=['THEN', 'ONE', 'OF', 'THEM', 'SAYS', 'KIND', 'OF', 'SOFT', 'AND', 'GENTLE'] +3005-163391-0026-516: hyp=['THEN', 'ONE', 'OF', 'THEM', 'SAYS', 'KIND', 'OF', 'SOFT', 'AND', 'GENTLE'] +3005-163399-0000-517: ref=['PHELPS', 'WAS', 'ONE', 'OF', 'THESE', 'LITTLE', 'ONE', 'HORSE', 'COTTON', 'PLANTATIONS', 'AND', 'THEY', 'ALL', 'LOOK', 'ALIKE'] +3005-163399-0000-517: hyp=["PHELPS'S", 'WAS', 'ONE', 'OF', 'THESE', 'LITTLE', 'ONE', 'HORSE', 'COTTON', 'PLANTATIONS', 'AND', 'THEY', 'ALL', 'LOOK', 'ALIKE'] +3005-163399-0001-518: ref=['I', 'WENT', 'AROUND', 'AND', 'CLUMB', 'OVER', 'THE', 'BACK', 'STILE', 'BY', 'THE', 'ASH', 'HOPPER', 'AND', 'STARTED', 'FOR', 'THE', 'KITCHEN'] +3005-163399-0001-518: hyp=['I', 'WENT', 'AROUND', 'AND', 'CLIMBED', 'OVER', 'THE', 'BACK', 'STILE', 'BY', 'THE', 'ASH', 'HOPPER', 'AND', 'STARTED', 'FOR', 'THE', 'KITCHEN'] +3005-163399-0002-519: ref=['I', 'OUT', 'WITH', 'A', "YES'M", 'BEFORE', 'I', 'THOUGHT'] +3005-163399-0002-519: hyp=['I', 'OUT', 'WITH', 'A', 'YASSEM', 'FORE', 'I', 'THOUGHT'] +3005-163399-0003-520: ref=['SO', 'THEN', 'SHE', 'STARTED', 'FOR', 'THE', 'HOUSE', 'LEADING', 'ME', 'BY', 'THE', 'HAND', 'AND', 'THE', 'CHILDREN', 'TAGGING', 'AFTER'] +3005-163399-0003-520: hyp=['SO', 'THEN', 'SHE', 'STARTED', 'FOR', 'THE', 'HOUSE', 'LEADING', 'ME', 'BY', 'THE', 'HAND', 'AND', 'THE', 'CHILDREN', 'TAGGING', 'AFTER'] +3005-163399-0004-521: ref=['WHEN', 'WE', 'GOT', 'THERE', 'SHE', 'SET', 'ME', 'DOWN', 'IN', 'A', 'SPLIT', 'BOTTOMED', 'CHAIR', 'AND', 'SET', 'HERSELF', 'DOWN', 'ON', 'A', 'LITTLE', 'LOW', 'STOOL', 'IN', 'FRONT', 'OF', 'ME', 'HOLDING', 'BOTH', 'OF', 'MY', 'HANDS', 'AND', 'SAYS'] +3005-163399-0004-521: hyp=['WHEN', 'WE', 'GOT', 'THERE', 'SHE', 'SET', 'ME', 'DOWN', 'IN', 'A', 'SPLIT', 'BOTTOM', 'CHAIR', 'AND', 'SET', 'HERSELF', 'DOWN', 'ON', 'A', 'LITTLE', 'LOW', 'STOOL', 'IN', 'FRONT', 'OF', 'ME', 'HOLDING', 'BOTH', 'OF', 'MY', 'HANDS', 'AND', 'SAYS'] +3005-163399-0005-522: ref=['WELL', "IT'S", 'LUCKY', 'BECAUSE', 'SOMETIMES', 'PEOPLE', 'DO', 'GET', 'HURT'] +3005-163399-0005-522: hyp=['WELL', "IT'S", 'LUCKY', 'BECAUSE', 'SOMETIMES', 'PEOPLE', 'DO', 'GET', 'HURT'] +3005-163399-0006-523: ref=['AND', 'I', 'THINK', 'HE', 'DIED', 'AFTERWARDS', 'HE', 'WAS', 'A', 'BAPTIST'] +3005-163399-0006-523: hyp=['AND', 'I', 'THINK', 'HE', 'DIED', 'AFTERWARDS', 'HE', 'WAS', 'A', 'BAPTIST'] +3005-163399-0007-524: ref=['YES', 'IT', 'WAS', 'MORTIFICATIONTHAT', 'WAS', 'IT'] +3005-163399-0007-524: hyp=['YES', 'IT', 'WAS', 'MORTIFICATION', 'THAT', 'WAS', 'IT'] +3005-163399-0008-525: ref=['YOUR', "UNCLE'S", 'BEEN', 'UP', 'TO', 'THE', 'TOWN', 'EVERY', 'DAY', 'TO', 'FETCH', 'YOU'] +3005-163399-0008-525: hyp=['YOUR', "UNCLE'S", 'BEEN', 'UP', 'TO', 'TOWN', 'EVERY', 'DAY', 'TO', 'FETCH', 'YOU'] +3005-163399-0009-526: ref=['YOU', 'MUST', 'A', 'MET', 'HIM', 'ON', 'THE', 'ROAD', "DIDN'T", 'YOU', 'OLDISH', 'MAN', 'WITH', 'A'] +3005-163399-0009-526: hyp=['YOU', 'MUST', 'A', 'MET', 'HIM', 'ON', 'THE', 'ROAD', "DIDN'T", 'YOU', 'OLDISH', 'MAN', 'WITH', 'A'] +3005-163399-0010-527: ref=['WHY', 'CHILD', 'IT', 'LL', 'BE', 'STOLE'] +3005-163399-0010-527: hyp=['WHY', 'CHILD', "IT'LL", 'BE', 'STOLE'] +3005-163399-0011-528: ref=['IT', 'WAS', 'KINDER', 'THIN', 'ICE', 'BUT', 'I', 'SAYS'] +3005-163399-0011-528: hyp=['IT', 'WAS', 'KIND', 'OF', 'THIN', 'ICE', 'BUT', 'I', 'SAYS'] +3005-163399-0012-529: ref=['I', 'HAD', 'MY', 'MIND', 'ON', 'THE', 'CHILDREN', 'ALL', 'THE', 'TIME', 'I', 'WANTED', 'TO', 'GET', 'THEM', 'OUT', 'TO', 'ONE', 'SIDE', 'AND', 'PUMP', 'THEM', 'A', 'LITTLE', 'AND', 'FIND', 'OUT', 'WHO', 'I', 'WAS'] +3005-163399-0012-529: hyp=['I', 'HAD', 'MY', 'MIND', 'ON', 'THE', 'CHILDREN', 'ALL', 'THE', 'TIME', 'I', 'WANTED', 'TO', 'GET', 'THEM', 'OUT', 'TO', 'ONE', 'SIDE', 'AND', 'PUMP', 'THEM', 'A', 'LITTLE', 'AND', 'FIND', 'OUT', 'WHO', 'I', 'WAS'] +3005-163399-0013-530: ref=['PRETTY', 'SOON', 'SHE', 'MADE', 'THE', 'COLD', 'CHILLS', 'STREAK', 'ALL', 'DOWN', 'MY', 'BACK', 'BECAUSE', 'SHE', 'SAYS'] +3005-163399-0013-530: hyp=['PRETTY', 'SOON', 'SHE', 'MADE', 'THE', 'COLD', 'CHILL', 'STREAK', 'ALL', 'DOWN', 'MY', 'BACK', 'BECAUSE', 'SHE', 'SAYS'] +3005-163399-0014-531: ref=['I', 'SEE', 'IT', "WARN'T", 'A', 'BIT', 'OF', 'USE', 'TO', 'TRY', 'TO', 'GO', 'AHEAD', "I'D", 'GOT', 'TO', 'THROW', 'UP', 'MY', 'HAND'] +3005-163399-0014-531: hyp=['I', 'SEE', 'IT', "WARN'T", 'A', 'BIT', 'OF', 'USE', 'TO', 'TRY', 'TO', 'GO', 'AHEAD', "I'D", 'GOT', 'TO', 'THROW', 'UP', 'MY', 'HAND'] +3005-163399-0015-532: ref=['SO', 'I', 'SAYS', 'TO', 'MYSELF', "HERE'S", 'ANOTHER', 'PLACE', 'WHERE', 'I', 'GOT', 'TO', 'RESK', 'THE', 'TRUTH'] +3005-163399-0015-532: hyp=['SO', 'I', 'SAYS', 'TO', 'MYSELF', 'HERE', 'IS', 'ANOTHER', 'PLACE', 'WHERE', 'I', 'GOT', 'TO', 'REST', 'THE', 'TRUTH'] +3005-163399-0016-533: ref=['I', 'OPENED', 'MY', 'MOUTH', 'TO', 'BEGIN', 'BUT', 'SHE', 'GRABBED', 'ME', 'AND', 'HUSTLED', 'ME', 'IN', 'BEHIND', 'THE', 'BED', 'AND', 'SAYS', 'HERE', 'HE', 'COMES'] +3005-163399-0016-533: hyp=['I', 'OPENED', 'MY', 'MOUTH', 'TO', 'BEGIN', 'BUT', 'SHE', 'GRABBED', 'ME', 'AND', 'HUSTLED', 'ME', 'IN', 'BEHIND', 'THE', 'BED', 'AND', 'SAYS', 'HERE', 'IT', 'COMES'] +3005-163399-0017-534: ref=['CHILDREN', "DON'T", 'YOU', 'SAY', 'A', 'WORD'] +3005-163399-0017-534: hyp=['CHILDREN', "DON'T", 'YOU', 'SAY', 'A', 'WORD'] +3005-163399-0018-535: ref=['I', 'SEE', 'I', 'WAS', 'IN', 'A', 'FIX', 'NOW'] +3005-163399-0018-535: hyp=['I', 'SEE', 'I', 'WAS', 'IN', 'A', 'FIX', 'NOW'] +3005-163399-0019-536: ref=['MISSUS', 'PHELPS', 'SHE', 'JUMPS', 'FOR', 'HIM', 'AND', 'SAYS'] +3005-163399-0019-536: hyp=['MISSUS', 'PHELPS', 'SHE', 'JUMPS', 'FOR', 'HIM', 'AND', 'SAYS'] +3005-163399-0020-537: ref=['HAS', 'HE', 'COME', 'NO', 'SAYS', 'HER', 'HUSBAND'] +3005-163399-0020-537: hyp=['HAS', 'HE', 'COME', 'NO', 'SAYS', 'HER', 'HUSBAND'] +3005-163399-0021-538: ref=['I', "CAN'T", 'IMAGINE', 'SAYS', 'THE', 'OLD', 'GENTLEMAN', 'AND', 'I', 'MUST', 'SAY', 'IT', 'MAKES', 'ME', 'DREADFUL', 'UNEASY'] +3005-163399-0021-538: hyp=['I', "CAN'T", 'IMAGINE', 'SAYS', 'THE', 'OLD', 'GENTLEMAN', 'AND', 'I', 'MUST', 'SAY', 'IT', 'MAKES', 'ME', 'DREADFUL', 'UNEASY'] +3005-163399-0022-539: ref=['UNEASY', 'SHE', 'SAYS', "I'M", 'READY', 'TO', 'GO', 'DISTRACTED'] +3005-163399-0022-539: hyp=['UNEASY', 'SHE', 'SAYS', "I'M", 'READY', 'TO', 'GO', 'DISTRACTED'] +3005-163399-0023-540: ref=['HE', 'MUST', 'A', 'COME', 'AND', "YOU'VE", 'MISSED', 'HIM', 'ALONG', 'THE', 'ROAD'] +3005-163399-0023-540: hyp=['HE', 'MUST', 'A', 'COME', 'AND', "YOU'VE", 'MISSED', 'HIM', 'ALONG', 'THE', 'ROAD'] +3005-163399-0024-541: ref=['OH', "DON'T", 'DISTRESS', 'ME', 'ANY', "MORE'N", "I'M", 'ALREADY', 'DISTRESSED'] +3005-163399-0024-541: hyp=['OH', "DON'T", 'DISTRESS', 'ME', 'ANY', 'MORE', 'AND', "I'M", 'ALREADY', 'DISTRESSED'] +3005-163399-0025-542: ref=['WHY', 'SILAS', 'LOOK', 'YONDER', 'UP', 'THE', 'ROAD', "AIN'T", 'THAT', 'SOMEBODY', 'COMING'] +3005-163399-0025-542: hyp=['WHY', 'SILAS', 'LOOK', 'YONDER', 'UP', 'THE', 'ROAD', "AIN'T", 'THAT', 'SOMEBODY', 'COMING'] +3005-163399-0026-543: ref=['THE', 'OLD', 'GENTLEMAN', 'STARED', 'AND', 'SAYS'] +3005-163399-0026-543: hyp=['THE', 'OLD', 'GENTLEMAN', 'STARED', 'AND', 'SAYS'] +3005-163399-0027-544: ref=['I', "HAIN'T", 'NO', 'IDEA', 'WHO', 'IS', 'IT'] +3005-163399-0027-544: hyp=['I', "HAIN'T", 'NO', 'IDEA', 'WHO', 'IS', 'IT'] +3005-163399-0028-545: ref=["IT'S", 'TOM', 'SAWYER'] +3005-163399-0028-545: hyp=['IS', 'TOM', 'SAWYER'] +3005-163399-0029-546: ref=['BEING', 'TOM', 'SAWYER', 'WAS', 'EASY', 'AND', 'COMFORTABLE', 'AND', 'IT', 'STAYED', 'EASY', 'AND', 'COMFORTABLE', 'TILL', 'BY', 'AND', 'BY', 'I', 'HEAR', 'A', 'STEAMBOAT', 'COUGHING', 'ALONG', 'DOWN', 'THE', 'RIVER'] +3005-163399-0029-546: hyp=['BEING', 'TOM', 'SAWYER', 'WAS', 'EASY', 'AND', 'COMFORTABLE', 'AND', 'IT', 'STAYED', 'EASY', 'AND', 'COMFORTABLE', 'TILL', 'BY', 'AND', 'BY', 'I', 'HEAR', 'A', 'STEAMBOAT', 'COUGHING', 'ALONG', 'DOWN', 'THE', 'RIVER'] +3005-163399-0030-547: ref=['THEN', 'I', 'SAYS', 'TO', 'MYSELF', "S'POSE", 'TOM', 'SAWYER', 'COMES', 'DOWN', 'ON', 'THAT', 'BOAT'] +3005-163399-0030-547: hyp=['THEN', 'I', 'SAYS', 'TO', 'MYSELF', "S'POSE", 'TOM', 'SAWYER', 'COMES', 'DOWN', 'ON', 'THAT', 'BOAT'] +3080-5032-0000-548: ref=['BUT', 'I', 'AM', 'HUGELY', 'PLEASED', 'THAT', 'YOU', 'HAVE', 'SEEN', 'MY', 'LADY'] +3080-5032-0000-548: hyp=['BUT', 'I', 'AM', 'HUGELY', 'PLEASED', 'THAT', 'YOU', 'HAVE', 'SEEN', 'MY', 'LADY'] +3080-5032-0001-549: ref=['I', 'KNEW', 'YOU', 'COULD', 'NOT', 'CHOOSE', 'BUT', 'LIKE', 'HER', 'BUT', 'YET', 'LET', 'ME', 'TELL', 'YOU', 'YOU', 'HAVE', 'SEEN', 'BUT', 'THE', 'WORST', 'OF', 'HER'] +3080-5032-0001-549: hyp=['I', 'KNEW', 'YOU', 'COULD', 'NOT', 'CHOOSE', 'BUT', 'LIKE', 'HER', 'BUT', 'YET', 'LET', 'ME', 'TELL', 'YOU', 'YOU', 'HAVE', 'SEEN', 'BUT', 'THE', 'WORST', 'OF', 'HER'] +3080-5032-0002-550: ref=['HER', 'CONVERSATION', 'HAS', 'MORE', 'CHARMS', 'THAN', 'CAN', 'BE', 'IN', 'MERE', 'BEAUTY', 'AND', 'HER', 'HUMOUR', 'AND', 'DISPOSITION', 'WOULD', 'MAKE', 'A', 'DEFORMED', 'PERSON', 'APPEAR', 'LOVELY'] +3080-5032-0002-550: hyp=['HER', 'CONVERSATION', 'HAS', 'MORE', 'CHARMS', 'THAN', 'CAN', 'BE', 'IN', 'MERE', 'BEAUTY', 'AND', 'A', 'HUMOUR', 'AND', 'DISPOSITION', 'WOULD', 'MAKE', 'A', 'DEFORMED', 'PERSON', 'APPEAR', 'LOVELY'] +3080-5032-0003-551: ref=['WHY', 'DID', 'YOU', 'NOT', 'SEND', 'ME', 'THAT', 'NEWS', 'AND', 'A', 'GARLAND'] +3080-5032-0003-551: hyp=['WHY', "DIDN'T", 'YOU', 'NOT', 'SEND', 'ME', 'THAT', 'NEWS', 'AND', 'A', 'GARLAND'] +3080-5032-0004-552: ref=['WELL', 'THE', 'BEST', "ON'T", 'IS', 'I', 'HAVE', 'A', 'SQUIRE', 'NOW', 'THAT', 'IS', 'AS', 'GOOD', 'AS', 'A', 'KNIGHT'] +3080-5032-0004-552: hyp=['WELL', 'THE', 'BEST', 'HONOUR', 'IS', 'THAT', 'I', 'HAVE', 'A', 'SQUIRE', 'NOW', 'THAT', 'IS', 'AS', 'GOOD', 'AS', 'A', 'KNIGHT'] +3080-5032-0005-553: ref=['IN', 'EARNEST', 'WE', 'HAVE', 'HAD', 'SUCH', 'A', 'SKIRMISH', 'AND', 'UPON', 'SO', 'FOOLISH', 'AN', 'OCCASION', 'AS', 'I', 'CANNOT', 'TELL', 'WHICH', 'IS', 'STRANGEST'] +3080-5032-0005-553: hyp=['IN', 'EARNEST', 'WE', 'HAVE', 'HAD', 'SUCH', 'A', 'SKIRMISH', 'AND', 'UPON', 'SO', 'FOOLISH', 'AN', 'OCCASION', 'AS', 'I', 'CANNOT', 'TELL', 'WHICH', 'OF', 'STRANGERS'] +3080-5032-0006-554: ref=['ALL', 'THE', 'PEOPLE', 'THAT', 'I', 'HAD', 'EVER', 'IN', 'MY', 'LIFE', 'REFUSED', 'WERE', 'BROUGHT', 'AGAIN', 'UPON', 'THE', 'STAGE', 'LIKE', 'RICHARD', 'THE', 'THREE', 'S', 'GHOSTS', 'TO', 'REPROACH', 'ME', 'WITHAL', 'AND', 'ALL', 'THE', 'KINDNESS', 'HIS', 'DISCOVERIES', 'COULD', 'MAKE', 'I', 'HAD', 'FOR', 'YOU', 'WAS', 'LAID', 'TO', 'MY', 'CHARGE'] +3080-5032-0006-554: hyp=['ALL', 'THE', 'PEOPLE', 'THAT', 'I', 'HAD', 'EVER', 'IN', 'MY', 'LIFE', 'REFUSED', 'WERE', 'BROUGHT', 'AGAIN', 'UPON', 'THE', 'STAGE', 'LIKE', 'RICHARD', 'THE', "THIRD'S", 'GHOSTS', 'TO', 'REPROACH', 'ME', 'WITH', 'A', 'AND', 'ALL', 'THE', 'KINDNESS', 'HIS', 'DISCOVERIES', 'COULD', 'MAKE', 'I', 'HAD', 'FOR', 'YOU', 'WAS', 'LAID', 'TO', 'MY', 'CHARGE'] +3080-5032-0007-555: ref=['MY', 'BEST', 'QUALITIES', 'IF', 'I', 'HAVE', 'ANY', 'THAT', 'ARE', 'GOOD', 'SERVED', 'BUT', 'FOR', 'AGGRAVATIONS', 'OF', 'MY', 'FAULT', 'AND', 'I', 'WAS', 'ALLOWED', 'TO', 'HAVE', 'WIT', 'AND', 'UNDERSTANDING', 'AND', 'DISCRETION', 'IN', 'OTHER', 'THINGS', 'THAT', 'IT', 'MIGHT', 'APPEAR', 'I', 'HAD', 'NONE', 'IN', 'THIS'] +3080-5032-0007-555: hyp=['MY', 'BEST', 'QUALITIES', 'IF', 'I', 'HAVE', 'ANY', 'THAT', 'ARE', 'GOOD', 'SERVED', 'BUT', 'FOR', 'AGGRAVATIONS', 'OF', 'MY', 'FAULT', 'AND', 'I', 'WAS', 'ALLOWED', 'TO', 'HAVE', 'WIT', 'AND', 'UNDERSTANDING', 'AND', 'DISCRETION', 'IN', 'OTHER', 'THINGS', 'THAT', 'IT', 'MIGHT', 'APPEAR', 'I', 'HAD', 'NONE', 'IN', 'THIS'] +3080-5032-0008-556: ref=['TIS', 'A', 'STRANGE', 'CHANGE', 'AND', 'I', 'AM', 'VERY', 'SORRY', 'FOR', 'IT', 'BUT', "I'LL", 'SWEAR', 'I', 'KNOW', 'NOT', 'HOW', 'TO', 'HELP', 'IT'] +3080-5032-0008-556: hyp=['TIS', 'A', 'STRANGE', 'CHANGE', 'AND', 'I', 'AM', 'VERY', 'SORRY', 'FOR', 'IT', 'BUT', "I'LL", 'SWEAR', 'I', 'KNOW', 'NOT', 'HOW', 'TO', 'HELP', 'IT'] +3080-5032-0009-557: ref=['MISTER', 'FISH', 'IS', 'THE', 'SQUIRE', 'OF', 'DAMES', 'AND', 'HAS', 'SO', 'MANY', 'MISTRESSES', 'THAT', 'ANYBODY', 'MAY', 'PRETEND', 'A', 'SHARE', 'IN', 'HIM', 'AND', 'BE', 'BELIEVED', 'BUT', 'THOUGH', 'I', 'HAVE', 'THE', 'HONOUR', 'TO', 'BE', 'HIS', 'NEAR', 'NEIGHBOUR', 'TO', 'SPEAK', 'FREELY', 'I', 'CANNOT', 'BRAG', 'MUCH', 'THAT', 'HE', 'MAKES', 'ANY', 'COURT', 'TO', 'ME', 'AND', 'I', 'KNOW', 'NO', 'YOUNG', 'WOMAN', 'IN', 'THE', 'COUNTRY', 'THAT', 'HE', 'DOES', 'NOT', 'VISIT', 'OFTEN'] +3080-5032-0009-557: hyp=['MISTER', 'FISH', 'IS', 'THE', 'SQUIRE', 'OF', 'DAMES', 'AND', 'HAS', 'SO', 'MANY', 'MISTRESSES', 'THAT', 'ANYBODY', 'MAY', 'PRETEND', 'TO', 'SHARE', 'IN', 'HIM', 'AND', 'BE', 'BELIEVED', 'BUT', 'THOUGH', 'I', 'HAVE', 'THE', 'HONOUR', 'TO', 'BE', 'HIS', 'NEAR', 'NEIGHBOUR', 'TO', 'SPEAK', 'FREELY', 'I', 'CANNOT', 'BRAG', 'MUCH', 'THAT', 'HE', 'MAKES', 'ANY', 'COURT', 'TO', 'ME', 'AND', 'I', 'KNOW', 'NO', 'YOUNG', 'WOMAN', 'IN', 'THE', 'COUNTRY', 'THAT', 'HE', 'DOES', 'NOT', 'VISIT', 'OFTEN'] +3080-5032-0010-558: ref=['I', 'THINK', 'MY', 'YOUNGEST', 'BROTHER', 'COMES', 'DOWN', 'WITH', 'HIM'] +3080-5032-0010-558: hyp=['I', 'THINK', 'MY', 'YOUNGEST', 'BROTHER', 'COMES', 'DOWN', 'WITH', 'HIM'] +3080-5032-0011-559: ref=['I', 'CAN', 'NO', 'SOONER', 'GIVE', 'YOU', 'SOME', 'LITTLE', 'HINTS', 'WHEREABOUTS', 'THEY', 'LIVE', 'BUT', 'YOU', 'KNOW', 'THEM', 'PRESENTLY', 'AND', 'I', 'MEANT', 'YOU', 'SHOULD', 'BE', 'BEHOLDING', 'TO', 'ME', 'FOR', 'YOUR', 'ACQUAINTANCE'] +3080-5032-0011-559: hyp=['I', 'CAN', 'NO', 'SOONER', 'GIVE', 'YOU', 'SOME', 'LITTLE', 'HINTS', 'WHEREABOUT', 'THEY', 'LIVE', 'BUT', 'YOU', 'KNOW', 'THEM', 'PRESENTLY', 'AND', 'I', 'MEANT', 'YOU', 'SHOULD', 'BE', 'BEHOLDING', 'TO', 'ME', 'FOR', 'YOUR', 'ACQUAINTANCE'] +3080-5032-0012-560: ref=['BUT', 'IT', 'SEEMS', 'THIS', 'GENTLEMAN', 'IS', 'NOT', 'SO', 'EASY', 'ACCESS', 'BUT', 'YOU', 'MAY', 'ACKNOWLEDGE', 'SOMETHING', 'DUE', 'TO', 'ME', 'IF', 'I', 'INCLINE', 'HIM', 'TO', 'LOOK', 'GRACIOUSLY', 'UPON', 'YOU', 'AND', 'THEREFORE', 'THERE', 'IS', 'NOT', 'MUCH', 'HARM', 'DONE'] +3080-5032-0012-560: hyp=['BUT', 'IT', 'SEEMS', 'THIS', 'GENTLEMAN', 'IS', 'NOT', 'SO', 'EASY', 'ACCESS', 'BUT', 'YOU', 'MAY', 'ACKNOWLEDGE', 'SOMETHING', 'DUE', 'TO', 'ME', 'IF', 'I', 'INCLINE', 'HIM', 'TO', 'LOOK', 'GRACIOUSLY', 'UPON', 'YOU', 'AND', 'THEREFORE', 'THERE', 'IS', 'NOT', 'MUCH', 'HARM', 'DONE'] +3080-5032-0013-561: ref=['I', 'HAVE', 'MISSED', 'FOUR', 'FITS', 'AND', 'HAD', 'BUT', 'FIVE', 'AND', 'HAVE', 'RECOVERED', 'SO', 'MUCH', 'STRENGTH', 'AS', 'MADE', 'ME', 'VENTURE', 'TO', 'MEET', 'YOUR', 'LETTER', 'ON', 'WEDNESDAY', 'A', 'MILE', 'FROM', 'HOME'] +3080-5032-0013-561: hyp=['I', 'HAVE', 'MISSED', 'FOUR', 'FITS', 'AND', 'HAVE', 'HAD', 'BUT', 'FIVE', 'AND', 'HAVE', 'RECOVERED', 'SO', 'MUCH', 'STRENGTH', 'AS', 'MADE', 'ME', 'VENTURE', 'TO', 'MEET', 'YOUR', 'LETTER', 'ON', 'WEDNESDAY', 'A', 'MILE', 'FROM', 'HOME'] +3080-5032-0014-562: ref=['BUT', 'BESIDES', 'I', 'CAN', 'GIVE', 'YOU', 'OTHERS'] +3080-5032-0014-562: hyp=['BUT', 'BESIDES', 'I', 'CAN', 'GIVE', 'YOU', 'OTHERS'] +3080-5032-0015-563: ref=['I', 'AM', 'HERE', 'MUCH', 'MORE', 'OUT', 'OF', "PEOPLE'S", 'WAY', 'THAN', 'IN', 'TOWN', 'WHERE', 'MY', 'AUNT', 'AND', 'SUCH', 'AS', 'PRETEND', 'AN', 'INTEREST', 'IN', 'ME', 'AND', 'A', 'POWER', 'OVER', 'ME', 'DO', 'SO', 'PERSECUTE', 'ME', 'WITH', 'THEIR', 'GOOD', 'NATURE', 'AND', 'TAKE', 'IT', 'SO', 'ILL', 'THAT', 'THEY', 'ARE', 'NOT', 'ACCEPTED', 'AS', 'I', 'WOULD', 'LIVE', 'IN', 'A', 'HOLLOW', 'TREE', 'TO', 'AVOID', 'THEM'] +3080-5032-0015-563: hyp=['I', 'AM', 'HERE', 'MUCH', 'MORE', 'OUT', 'OF', "PEOPLE'S", 'WAY', 'THAN', 'IN', 'TOWN', 'WHERE', 'MY', 'AUNT', 'AND', 'SUCH', 'AS', 'PRETEND', 'AN', 'INTEREST', 'IN', 'ME', 'AND', 'A', 'POWER', 'OVER', 'ME', 'DO', 'SO', 'PERSECUTE', 'ME', 'WITH', 'DEAR', 'GOOD', 'NATURE', 'AND', 'TAKE', 'IT', 'SO', 'ILL', 'THAT', 'THEY', 'ARE', 'NOT', 'ACCEPTED', 'AS', 'I', 'WOULD', 'LIVE', 'IN', 'A', 'HOLLOW', 'TREE', 'TO', 'AVOID', 'THEM'] +3080-5032-0016-564: ref=['YOU', 'WILL', 'THINK', 'HIM', 'ALTERED', 'AND', 'IF', 'IT', 'BE', 'POSSIBLE', 'MORE', 'MELANCHOLY', 'THAN', 'HE', 'WAS'] +3080-5032-0016-564: hyp=['YOU', 'WILL', 'THINK', 'HIM', 'ALTERED', 'AND', 'IF', 'IT', 'BE', 'POSSIBLE', 'MORE', 'MELANCHOLY', 'THAN', 'HE', 'WAS'] +3080-5032-0017-565: ref=['IF', 'MARRIAGE', 'AGREES', 'NO', 'BETTER', 'WITH', 'OTHER', 'PEOPLE', 'THAN', 'IT', 'DOES', 'WITH', 'HIM', 'I', 'SHALL', 'PRAY', 'THAT', 'ALL', 'MY', 'FRIENDS', 'MAY', 'SCAPE', 'IT'] +3080-5032-0017-565: hyp=['IF', 'MARRIAGE', 'AGREES', 'NO', 'BETTER', 'WITH', 'OTHER', 'PEOPLE', 'THAN', 'IT', 'DOES', 'WITH', 'HIM', 'I', 'SHALL', 'PRAY', 'THAT', 'ALL', 'MY', 'FRIENDS', 'MAY', 'ESCAPE', 'IT'] +3080-5032-0018-566: ref=['WELL', 'IN', 'EARNEST', 'IF', 'I', 'WERE', 'A', 'PRINCE', 'THAT', 'LADY', 'SHOULD', 'BE', 'MY', 'MISTRESS', 'BUT', 'I', 'CAN', 'GIVE', 'NO', 'RULE', 'TO', 'ANY', 'ONE', 'ELSE', 'AND', 'PERHAPS', 'THOSE', 'THAT', 'ARE', 'IN', 'NO', 'DANGER', 'OF', 'LOSING', 'THEIR', 'HEARTS', 'TO', 'HER', 'MAY', 'BE', 'INFINITELY', 'TAKEN', 'WITH', 'ONE', 'I', 'SHOULD', 'NOT', 'VALUE', 'AT', 'ALL', 'FOR', 'SO', 'SAYS', 'THE', 'JUSTINIAN', 'WISE', 'PROVIDENCE', 'HAS', 'ORDAINED', 'IT', 'THAT', 'BY', 'THEIR', 'DIFFERENT', 'HUMOURS', 'EVERYBODY', 'MIGHT', 'FIND', 'SOMETHING', 'TO', 'PLEASE', 'THEMSELVES', 'WITHAL', 'WITHOUT', 'ENVYING', 'THEIR', 'NEIGHBOURS'] +3080-5032-0018-566: hyp=['WELL', 'IN', 'EARNEST', 'IF', 'I', 'WERE', 'A', 'PRINCE', 'THAT', 'LADY', 'SHOULD', 'BE', 'MY', 'MISTRESS', 'BUT', 'I', 'CAN', 'GIVE', 'NO', 'RULE', 'TO', 'ANY', 'ONE', 'ELSE', 'AND', 'PERHAPS', 'THOSE', 'THAT', 'ARE', 'IN', 'NO', 'DANGER', 'OF', 'LOSING', 'THEIR', 'HEARTS', 'TO', 'HER', 'MAY', 'BE', 'INFINITELY', 'TAKEN', 'WITH', 'ONE', 'I', 'SHOULD', 'NOT', 'VALUE', 'AT', 'ALL', 'FOR', 'SO', 'SAYS', 'JUSTINIAN', 'WISE', 'PROVIDENCE', 'HAS', 'ORDAINED', 'IT', 'THAT', 'BY', 'THEIR', 'DIFFERENT', 'HUMOURS', 'EVERYBODY', 'MIGHT', 'FIND', 'SOMETHING', 'TO', 'PLEASE', 'THEMSELVES', 'WITH', 'OF', 'WITHOUT', 'ENVYING', 'THEIR', 'NEIGHBOURS'] +3080-5032-0019-567: ref=['THE', 'MATTER', 'IS', 'NOT', 'GREAT', 'FOR', 'I', 'CONFESS', 'I', 'DO', 'NATURALLY', 'HATE', 'THE', 'NOISE', 'AND', 'TALK', 'OF', 'THE', 'WORLD', 'AND', 'SHOULD', 'BE', 'BEST', 'PLEASED', 'NEVER', 'TO', 'BE', 'KNOWN', "IN'T", 'UPON', 'ANY', 'OCCASION', 'WHATSOEVER', 'YET', 'SINCE', 'IT', 'CAN', 'NEVER', 'BE', 'WHOLLY', 'AVOIDED', 'ONE', 'MUST', 'SATISFY', 'ONESELF', 'BY', 'DOING', 'NOTHING', 'THAT', 'ONE', 'NEED', 'CARE', 'WHO', 'KNOWS'] +3080-5032-0019-567: hyp=['THE', 'MATTER', 'IS', 'NOT', 'GREAT', 'FOR', 'I', 'CONFESS', 'I', 'DO', 'NATURALLY', 'HATE', 'THE', 'NOISE', 'AND', 'TALK', 'OF', 'THE', 'WORLD', 'AND', 'SHOULD', 'BE', 'BEST', 'PLEASED', 'NEVER', 'TO', 'BE', 'KNOWN', 'IN', 'IT', 'UPON', 'ANY', 'OCCASION', 'WHATSOEVER', 'YET', 'SINCE', 'IT', 'CAN', 'NEVER', 'BE', 'WHOLLY', 'AVOIDED', 'ONE', 'MUST', 'SATISFY', 'ONESELF', 'BY', 'DOING', 'NOTHING', 'THAT', 'ONE', 'NEED', 'CARE', 'KNOWS'] +3080-5032-0020-568: ref=['IF', 'I', 'HAD', 'A', 'PICTURE', 'THAT', 'WERE', 'FIT', 'FOR', 'YOU', 'YOU', 'SHOULD', 'HAVE', 'IT'] +3080-5032-0020-568: hyp=['IF', 'I', 'HAD', 'A', 'PICTURE', 'THAT', 'WERE', 'FIT', 'FOR', 'YOU', 'YOU', 'SHOULD', 'HAVE', 'IT'] +3080-5032-0021-569: ref=['HOW', 'CAN', 'YOU', 'TALK', 'OF', 'DEFYING', 'FORTUNE', 'NOBODY', 'LIVES', 'WITHOUT', 'IT', 'AND', 'THEREFORE', 'WHY', 'SHOULD', 'YOU', 'IMAGINE', 'YOU', 'COULD'] +3080-5032-0021-569: hyp=['HOW', 'CAN', 'YOU', 'TALK', 'OF', 'DEFYING', 'FORTUNE', 'NOBODY', 'LIVES', 'WITHOUT', 'IT', 'AND', 'THEREFORE', 'WHY', 'SHOULD', 'YOU', 'IMAGINE', 'YOU', 'COULD'] +3080-5032-0022-570: ref=['I', 'KNOW', 'NOT', 'HOW', 'MY', 'BROTHER', 'COMES', 'TO', 'BE', 'SO', 'WELL', 'INFORMED', 'AS', 'YOU', 'SAY', 'BUT', 'I', 'AM', 'CERTAIN', 'HE', 'KNOWS', 'THE', 'UTMOST', 'OF', 'THE', 'INJURIES', 'YOU', 'HAVE', 'RECEIVED', 'FROM', 'HER'] +3080-5032-0022-570: hyp=['I', 'KNOW', 'NOT', 'HOW', 'MY', 'BROTHER', 'COMES', 'TO', 'BE', 'SO', 'WELL', 'INFORMED', 'AS', 'YOU', 'SAY', 'BUT', 'I', 'AM', 'CERTAIN', 'HE', 'KNOWS', 'THE', 'UTMOST', 'OF', 'THE', 'INJURIES', 'YOU', 'HAVE', 'RECEIVED', 'FROM', 'HER'] +3080-5032-0023-571: ref=['WE', 'HAVE', 'HAD', 'ANOTHER', 'DEBATE', 'BUT', 'MUCH', 'MORE', 'CALMLY'] +3080-5032-0023-571: hyp=['WE', 'HAVE', 'HAD', 'ANOTHER', 'DEBATE', 'BUT', 'MUCH', 'MORE', 'CALMLY'] +3080-5032-0024-572: ref=['AND', 'BESIDES', 'THERE', 'WAS', 'A', 'TIME', 'WHEN', 'WE', 'OURSELVES', 'WERE', 'INDIFFERENT', 'TO', 'ONE', 'ANOTHER', 'DID', 'I', 'DO', 'SO', 'THEN', 'OR', 'HAVE', 'I', 'LEARNED', 'IT', 'SINCE'] +3080-5032-0024-572: hyp=['AND', 'BESIDES', 'THERE', 'WAS', 'A', 'TIME', 'WHEN', 'WE', 'OURSELVES', 'WERE', 'INDIFFERENT', 'TO', 'ONE', 'ANOTHER', 'DID', 'I', 'DO', 'SO', 'THEN', 'OR', 'HAVE', 'I', 'LEARNED', 'IT', 'SINCE'] +3080-5032-0025-573: ref=['I', 'HAVE', 'BEEN', 'STUDYING', 'HOW', 'TOM', 'CHEEKE', 'MIGHT', 'COME', 'BY', 'HIS', 'INTELLIGENCE', 'AND', 'I', 'VERILY', 'BELIEVE', 'HE', 'HAS', 'IT', 'FROM', 'MY', 'COUSIN', 'PETERS'] +3080-5032-0025-573: hyp=['I', 'HAVE', 'BEEN', 'STUDYING', 'HOW', 'TOM', 'CHEEK', 'MIGHT', 'COME', 'BY', 'HIS', 'INTELLIGENCE', 'AND', 'I', 'VERILY', 'BELIEVE', 'HE', 'HAS', 'IT', 'FROM', 'MY', 'COUSIN', 'PETERS'] +3080-5032-0026-574: ref=['HOW', 'KINDLY', 'DO', 'I', 'TAKE', 'THESE', 'CIVILITIES', 'OF', 'YOUR', "FATHER'S", 'IN', 'EARNEST', 'YOU', 'CANNOT', 'IMAGINE', 'HOW', 'HIS', 'LETTER', 'PLEASED', 'ME'] +3080-5032-0026-574: hyp=['HOW', 'KINDLY', 'DO', 'I', 'TAKE', 'THE', 'CIVILITIES', 'OF', 'YOUR', 'FATHERS', 'IN', 'EARNEST', 'YOU', 'CANNOT', 'IMAGINE', 'HOW', 'HIS', 'LETTER', 'PLEASED', 'ME'] +3080-5040-0000-575: ref=['WOULD', 'IT', 'WOULD', 'LEAVE', 'ME', 'AND', 'THEN', 'I', 'COULD', 'BELIEVE', 'I', 'SHALL', 'NOT', 'ALWAYS', 'HAVE', 'OCCASION', 'FOR', 'IT'] +3080-5040-0000-575: hyp=['WHAT', 'IT', 'WOULD', 'LEAVE', 'ME', 'AND', 'THEN', 'I', 'COULD', 'BELIEVE', 'I', 'SHALL', 'NOT', 'ALWAYS', 'HAVE', 'OCCASION', 'FOR', 'IT'] +3080-5040-0001-576: ref=['MY', 'POOR', 'LADY', 'VAVASOUR', 'IS', 'CARRIED', 'TO', 'THE', 'TOWER', 'AND', 'HER', 'GREAT', 'BELLY', 'COULD', 'NOT', 'EXCUSE', 'HER', 'BECAUSE', 'SHE', 'WAS', 'ACQUAINTED', 'BY', 'SOMEBODY', 'THAT', 'THERE', 'WAS', 'A', 'PLOT', 'AGAINST', 'THE', 'PROTECTOR', 'AND', 'DID', 'NOT', 'DISCOVER', 'IT'] +3080-5040-0001-576: hyp=['MY', 'POOR', 'LADY', 'VAVASOR', 'IS', 'CARRIED', 'TO', 'THE', 'TOWER', 'AND', 'HER', 'GREAT', 'BELLY', 'COULD', 'NOT', 'EXCUSE', 'HER', 'BECAUSE', 'SHE', 'WAS', 'ACQUAINTED', 'BY', 'SOMEBODY', 'THAT', 'THERE', 'WAS', 'A', 'PLOT', 'AGAINST', 'THE', 'PROTECTOR', 'AND', 'DID', 'NOT', 'DISCOVER', 'IT'] +3080-5040-0002-577: ref=['SHE', 'HAS', 'TOLD', 'NOW', 'ALL', 'THAT', 'WAS', 'TOLD', 'HER', 'BUT', 'VOWS', 'SHE', 'WILL', 'NEVER', 'SAY', 'FROM', 'WHENCE', 'SHE', 'HAD', 'IT', 'WE', 'SHALL', 'SEE', 'WHETHER', 'HER', 'RESOLUTIONS', 'ARE', 'AS', 'UNALTERABLE', 'AS', 'THOSE', 'OF', 'MY', 'LADY', 'TALMASH'] +3080-5040-0002-577: hyp=['SHE', 'HAS', 'TOLD', 'NOW', 'ALL', 'THAT', 'WAS', 'TOLD', 'HER', 'BUT', 'VOWS', 'SHE', 'WILL', 'NEVER', 'SAY', 'FROM', 'WHENCE', 'SHE', 'HAD', 'IT', 'WE', 'SHALL', 'SEE', 'WHETHER', 'HER', 'RESOLUTIONS', 'ARE', 'AS', 'UNALTERABLE', 'AS', 'THOSE', 'OF', 'MY', 'LADY', 'TALMASH'] +3080-5040-0003-578: ref=['I', 'WONDER', 'HOW', 'SHE', 'BEHAVED', 'HERSELF', 'WHEN', 'SHE', 'WAS', 'MARRIED'] +3080-5040-0003-578: hyp=['I', 'WONDER', 'HOW', 'SHE', 'BEHAVED', 'HERSELF', 'WHEN', 'SHE', 'WAS', 'MARRIED'] +3080-5040-0004-579: ref=['I', 'NEVER', 'SAW', 'ANY', 'ONE', 'YET', 'THAT', 'DID', 'NOT', 'LOOK', 'SIMPLY', 'AND', 'OUT', 'OF', 'COUNTENANCE', 'NOR', 'EVER', 'KNEW', 'A', 'WEDDING', 'WELL', 'DESIGNED', 'BUT', 'ONE', 'AND', 'THAT', 'WAS', 'OF', 'TWO', 'PERSONS', 'WHO', 'HAD', 'TIME', 'ENOUGH', 'I', 'CONFESS', 'TO', 'CONTRIVE', 'IT', 'AND', 'NOBODY', 'TO', 'PLEASE', "IN'T", 'BUT', 'THEMSELVES'] +3080-5040-0004-579: hyp=['I', 'NEVER', 'SAW', 'ANY', 'ONE', 'YET', 'THAT', 'DID', 'NOT', 'LOOK', 'SIMPLY', 'AND', 'OUT', 'OF', 'COUNTENANCE', 'NOR', 'EVER', 'KNEW', 'A', 'WEDDING', 'WELL', 'DESIGNED', 'BUT', 'ONE', 'AND', 'THAT', 'WAS', 'OF', 'TWO', 'PERSONS', 'WHO', 'HAD', 'TIME', 'ENOUGH', 'I', 'CONFESS', 'TO', 'CONTRIVE', 'IT', 'AND', 'NOBODY', 'TO', 'PLEASE', 'IN', 'BUT', 'THEMSELVES'] +3080-5040-0005-580: ref=['THE', 'TRUTH', 'IS', 'I', 'COULD', 'NOT', 'ENDURE', 'TO', 'BE', 'MISSUS', 'BRIDE', 'IN', 'A', 'PUBLIC', 'WEDDING', 'TO', 'BE', 'MADE', 'THE', 'HAPPIEST', 'PERSON', 'ON', 'EARTH'] +3080-5040-0005-580: hyp=['THE', 'TRUTH', 'IS', 'I', 'COULD', 'NOT', 'ENDURE', 'TO', 'BE', 'MISSUS', 'BRIDE', 'IN', 'A', 'PUBLIC', 'WEDDING', 'TO', 'BE', 'MADE', 'THE', 'HAPPIEST', 'PERSON', 'ON', 'EARTH'] +3080-5040-0006-581: ref=['DO', 'NOT', 'TAKE', 'IT', 'ILL', 'FOR', 'I', 'WOULD', 'ENDURE', 'IT', 'IF', 'I', 'COULD', 'RATHER', 'THAN', 'FAIL', 'BUT', 'IN', 'EARNEST', 'I', 'DO', 'NOT', 'THINK', 'IT', 'WERE', 'POSSIBLE', 'FOR', 'ME'] +3080-5040-0006-581: hyp=['DO', 'NOT', 'TAKE', 'IT', 'ILL', 'FOR', 'I', 'WOULD', 'ENDURE', 'IT', 'IF', 'I', 'COULD', 'RATHER', 'THAN', 'FAIL', 'BUT', 'IN', 'EARNEST', 'I', 'DO', 'NOT', 'THINK', 'IT', 'WERE', 'POSSIBLE', 'FOR', 'ME'] +3080-5040-0007-582: ref=['YET', 'IN', 'EARNEST', 'YOUR', 'FATHER', 'WILL', 'NOT', 'FIND', 'MY', 'BROTHER', 'PEYTON', 'WANTING', 'IN', 'CIVILITY', 'THOUGH', 'HE', 'IS', 'NOT', 'A', 'MAN', 'OF', 'MUCH', 'COMPLIMENT', 'UNLESS', 'IT', 'BE', 'IN', 'HIS', 'LETTERS', 'TO', 'ME', 'NOR', 'AN', 'UNREASONABLE', 'PERSON', 'IN', 'ANYTHING', 'SO', 'HE', 'WILL', 'ALLOW', 'HIM', 'OUT', 'OF', 'HIS', 'KINDNESS', 'TO', 'HIS', 'WIFE', 'TO', 'SET', 'A', 'HIGHER', 'VALUE', 'UPON', 'HER', 'SISTER', 'THAN', 'SHE', 'DESERVES'] +3080-5040-0007-582: hyp=['YET', 'IN', 'EARNEST', 'YOUR', 'FATHER', 'WILL', 'NOT', 'FIND', 'MY', 'BROTHER', 'PEYTON', 'WANTING', 'IN', 'CIVILITY', 'THOUGH', 'HE', 'IS', 'NOT', 'A', 'MAN', 'OF', 'MUCH', 'COMPLIMENT', 'UNLESS', 'IT', 'BE', 'IN', 'HIS', 'LETTERS', 'TO', 'ME', 'NOR', 'AN', 'UNREASONABLE', 'PERSON', 'IN', 'ANYTHING', 'SO', 'HE', 'WILL', 'ALLOW', 'HIM', 'OUT', 'OF', 'HIS', 'KINDNESS', 'TO', 'HIS', 'WIFE', 'TO', 'SET', 'A', 'HIGHER', 'VALUE', 'UPON', 'HER', 'SISTER', 'THAN', 'SHE', 'DESERVES'] +3080-5040-0008-583: ref=['MY', 'AUNT', 'TOLD', 'ME', 'NO', 'LONGER', 'AGONE', 'THAN', 'YESTERDAY', 'THAT', 'I', 'WAS', 'THE', 'MOST', 'WILFUL', 'WOMAN', 'THAT', 'EVER', 'SHE', 'KNEW', 'AND', 'HAD', 'AN', 'OBSTINACY', 'OF', 'SPIRIT', 'NOTHING', 'COULD', 'OVERCOME', 'TAKE', 'HEED'] +3080-5040-0008-583: hyp=['MY', 'AUNT', 'TOLD', 'ME', 'NO', 'LONGER', 'AGONE', 'THAN', 'YESTERDAY', 'THAT', 'I', 'WAS', 'THE', 'MOST', 'WILFUL', 'WOMAN', 'THAT', 'EVER', 'SHE', 'KNEW', 'AND', 'HAD', 'AN', 'OBSTINACY', 'OF', 'SPIRIT', 'NOTHING', 'COULD', 'OVERCOME', 'TAKE', 'HEED'] +3080-5040-0009-584: ref=['YOU', 'SEE', 'I', 'GIVE', 'YOU', 'FAIR', 'WARNING'] +3080-5040-0009-584: hyp=['YOU', 'SEE', 'I', 'GIVE', 'YOU', 'FAIR', 'WARNING'] +3080-5040-0010-585: ref=['BY', 'THE', 'NEXT', 'I', 'SHALL', 'BE', 'GONE', 'INTO', 'KENT', 'AND', 'MY', 'OTHER', 'JOURNEY', 'IS', 'LAID', 'ASIDE', 'WHICH', 'I', 'AM', 'NOT', 'DISPLEASED', 'AT', 'BECAUSE', 'IT', 'WOULD', 'HAVE', 'BROKEN', 'OUR', 'INTERCOURSE', 'VERY', 'MUCH'] +3080-5040-0010-585: hyp=['BY', 'THE', 'NEXT', 'I', 'SHALL', 'BE', 'GONE', 'INTO', 'KENT', 'AND', 'MY', 'OTHER', 'JOURNEY', 'IS', 'LAID', 'ASIDE', 'WHICH', 'I', 'AM', 'NOT', 'DISPLEASED', 'AT', 'BECAUSE', 'IT', 'WOULD', 'HAVE', 'BROKEN', 'OUR', 'INTERCOURSE', 'VERY', 'MUCH'] +3080-5040-0011-586: ref=['HERE', 'ARE', 'SOME', 'VERSES', 'OF', "COWLEY'S", 'TELL', 'ME', 'HOW', 'YOU', 'LIKE', 'THEM'] +3080-5040-0011-586: hyp=['HERE', 'ARE', 'SOME', 'VERSES', 'OF', 'COLLEIES', 'TELL', 'ME', 'HOW', 'YOU', 'LIKE', 'THEM'] +3080-5040-0012-587: ref=['I', 'TOLD', 'YOU', 'IN', 'MY', 'LAST', 'THAT', 'MY', 'SUFFOLK', 'JOURNEY', 'WAS', 'LAID', 'ASIDE', 'AND', 'THAT', 'INTO', 'KENT', 'HASTENED'] +3080-5040-0012-587: hyp=['I', 'TOLD', 'YOU', 'IN', 'MY', 'LAST', 'THAT', 'MY', 'SUFFOLED', 'JOURNEY', 'WAS', 'LAID', 'ASIDE', 'AND', 'THAT', 'INTO', 'KENT', 'HASTENED'] +3080-5040-0013-588: ref=['IF', 'I', 'DROWN', 'BY', 'THE', 'WAY', 'THIS', 'WILL', 'BE', 'MY', 'LAST', 'LETTER', 'AND', 'LIKE', 'A', 'WILL', 'I', 'BEQUEATH', 'ALL', 'MY', 'KINDNESS', 'TO', 'YOU', 'IN', 'IT', 'WITH', 'A', 'CHARGE', 'NEVER', 'TO', 'BESTOW', 'IT', 'ALL', 'UPON', 'ANOTHER', 'MISTRESS', 'LEST', 'MY', 'GHOST', 'RISE', 'AGAIN', 'AND', 'HAUNT', 'YOU'] +3080-5040-0013-588: hyp=['IF', 'I', 'DROWN', 'BY', 'THE', 'WAY', 'THIS', 'WILL', 'BE', 'MY', 'LAST', 'LETTER', 'AND', 'LIKE', 'A', 'WILL', 'I', 'BEQUEATH', 'ALL', 'MY', 'KINDNESS', 'TO', 'YOU', 'IN', 'IT', 'WITH', 'A', 'CHARGE', 'NEVER', 'TO', 'BESTOW', 'IT', 'ALL', 'UPON', 'ANOTHER', 'MISTRESS', 'LEST', 'MY', 'GHOST', 'RISE', 'AGAIN', 'AND', 'HAUNT', 'YOU'] +3080-5040-0014-589: ref=['INDEED', 'I', 'LIKE', 'HIM', 'EXTREMELY', 'AND', 'HE', 'IS', 'COMMENDED', 'TO', 'ME', 'BY', 'PEOPLE', 'THAT', 'KNOW', 'HIM', 'VERY', 'WELL', 'AND', 'ARE', 'ABLE', 'TO', 'JUDGE', 'FOR', 'A', 'MOST', 'EXCELLENT', 'SERVANT', 'AND', 'FAITHFUL', 'AS', 'POSSIBLE'] +3080-5040-0014-589: hyp=['INDEED', 'I', 'LIKE', 'HIM', 'EXTREMELY', 'AND', 'HE', 'IS', 'COMMENDED', 'TO', 'ME', 'BY', 'PEOPLE', 'THAT', 'KNOW', 'HIM', 'VERY', 'WELL', 'AND', 'ARE', 'ABLE', 'TO', 'JUDGE', 'FOR', 'A', 'MOST', 'EXCELLENT', 'SERVANT', 'AND', 'FAITHFUL', 'AS', 'POSSIBLE'] +3080-5040-0015-590: ref=['BECAUSE', 'YOU', 'FIND', 'FAULT', 'WITH', 'MY', 'OTHER', 'LETTERS', 'THIS', 'IS', 'LIKE', 'TO', 'BE', 'SHORTER', 'THAN', 'THEY', 'I', 'DID', 'NOT', 'INTEND', 'IT', 'SO', 'THOUGH', 'I', 'CAN', 'ASSURE', 'YOU'] +3080-5040-0015-590: hyp=['BECAUSE', 'YOU', 'FIND', 'FAULT', 'WITH', 'MY', 'OTHER', 'LETTERS', 'THIS', 'IS', 'LIKE', 'TO', 'BE', 'SHORTER', 'THAN', 'THEY', 'I', 'DID', 'NOT', 'INTEND', 'IT', 'SO', 'THOUGH', 'I', 'CAN', 'ASSURE', 'YOU'] +3080-5040-0016-591: ref=['I', 'DO', 'NOT', 'FIND', 'IT', 'THOUGH', 'I', 'AM', 'TOLD', 'I', 'WAS', 'SO', 'EXTREMELY', 'WHEN', 'I', 'BELIEVED', 'YOU', 'LOVED', 'ME'] +3080-5040-0016-591: hyp=['I', 'DO', 'NOT', 'FIND', 'IT', 'THOUGH', 'I', 'AM', 'TOLD', 'I', 'WAS', 'SO', 'EXTREMELY', 'WHEN', 'I', 'BELIEVED', 'YOU', 'LOVED', 'ME'] +3080-5040-0017-592: ref=['BUT', 'I', 'AM', 'CALLED', 'UPON'] +3080-5040-0017-592: hyp=['BUT', 'I', 'AM', 'CALLED', 'UPON'] +3080-5040-0018-593: ref=['DIRECTED', 'FOR', 'YOUR', 'MASTER'] +3080-5040-0018-593: hyp=['DIRECTED', 'FOR', 'YOUR', 'MASTER'] +3080-5040-0019-594: ref=['I', 'SEE', 'YOU', 'CAN', 'CHIDE', 'WHEN', 'YOU', 'PLEASE', 'AND', 'WITH', 'AUTHORITY', 'BUT', 'I', 'DESERVE', 'IT', 'I', 'CONFESS', 'AND', 'ALL', 'I', 'CAN', 'SAY', 'FOR', 'MYSELF', 'IS', 'THAT', 'MY', 'FAULT', 'PROCEEDED', 'FROM', 'A', 'VERY', 'GOOD', 'PRINCIPLE', 'IN', 'ME'] +3080-5040-0019-594: hyp=['I', 'SEE', 'YOU', 'CAN', 'CHIT', 'WHEN', 'YOU', 'PLEASE', 'AND', 'WITH', 'AUTHORITY', 'BUT', 'I', 'DESERVE', 'IT', 'I', 'CONFESS', 'AND', 'ALL', 'I', 'CAN', 'SAY', 'FOR', 'MYSELF', 'IS', 'THAT', 'MY', 'FAULT', 'PROCEEDED', 'FROM', 'A', 'VERY', 'GOOD', 'PRINCIPLE', 'IN', 'ME'] +3080-5040-0020-595: ref=['WE', 'DARE', 'NOT', 'LET', 'OUR', 'TONGUES', 'LIE', 'MORE', 'ON', 'ONE', 'SIDE', 'OF', 'OUR', 'MOUTHS', 'THAN', "T'OTHER", 'FOR', 'FEAR', 'OF', 'OVERTURNING', 'IT'] +3080-5040-0020-595: hyp=['WE', 'DARE', 'NOT', 'LET', 'OUR', 'TONGUES', 'LIE', 'MORE', 'ON', 'ONE', 'SIDE', 'OF', 'OUR', 'MOUTH', 'THAN', 'THE', 'OTHER', 'FOR', 'FEAR', 'OF', 'OVERTURNING', 'IT'] +3080-5040-0021-596: ref=['YOU', 'ARE', 'SATISFIED', 'I', 'HOPE', 'ERE', 'THIS', 'THAT', 'I', 'SCAPED', 'DROWNING'] +3080-5040-0021-596: hyp=['YOU', 'ARE', 'SATISFIED', 'I', 'HOPE', 'AT', 'THIS', 'THAT', 'I', 'ESCAPED', 'DROWNING'] +3080-5040-0022-597: ref=['BUT', 'I', 'AM', 'TROUBLED', 'MUCH', 'YOU', 'SHOULD', 'MAKE', 'SO', 'ILL', 'A', 'JOURNEY', 'TO', 'SO', 'LITTLE', 'PURPOSE', 'INDEED', 'I', 'WRIT', 'BY', 'THE', 'FIRST', 'POST', 'AFTER', 'MY', 'ARRIVAL', 'HERE', 'AND', 'CANNOT', 'IMAGINE', 'HOW', 'YOU', 'CAME', 'TO', 'MISS', 'OF', 'MY', 'LETTERS'] +3080-5040-0022-597: hyp=['BUT', 'I', 'AM', 'TROUBLED', 'MUCH', 'YOU', 'SHOULD', 'MAKE', 'SO', 'ILL', 'A', 'JOURNEY', 'TO', 'SO', 'LITTLE', 'PURPOSE', 'INDEED', 'I', 'WROTE', 'BY', 'THE', 'FIRST', 'POST', 'AFTER', 'MY', 'ARRIVAL', 'HERE', 'AND', 'CANNOT', 'IMAGINE', 'HOW', 'YOU', 'CAME', 'TO', 'MISS', 'OF', 'MY', 'LETTERS'] +3080-5040-0023-598: ref=['HOW', 'WELCOME', 'YOU', 'WILL', 'BE', 'BUT', 'ALAS'] +3080-5040-0023-598: hyp=['HOW', 'WELCOME', 'YOU', 'WILL', 'BE', 'BUT', 'ALAS'] +3080-5040-0024-599: ref=['FOR', 'MY', 'LIFE', 'I', 'CANNOT', 'BEAT', 'INTO', 'THEIR', 'HEADS', 'A', 'PASSION', 'THAT', 'MUST', 'BE', 'SUBJECT', 'TO', 'NO', 'DECAY', 'AN', 'EVEN', 'PERFECT', 'KINDNESS', 'THAT', 'MUST', 'LAST', 'PERPETUALLY', 'WITHOUT', 'THE', 'LEAST', 'INTERMISSION'] +3080-5040-0024-599: hyp=['FOR', 'MY', 'LIFE', 'I', 'CANNOT', 'BEAT', 'INTO', 'THEIR', 'HEADS', 'A', 'PASSION', 'THAT', 'MUST', 'BE', 'SUBJECT', 'TO', 'NO', 'DECAY', 'AN', 'EVEN', 'PERFECT', 'KINDNESS', 'THAT', 'MUST', 'LAST', 'PERPETUALLY', 'WITHOUT', 'THE', 'LEAST', 'INTERMISSION'] +3080-5040-0025-600: ref=['THEY', 'LAUGH', 'TO', 'HEAR', 'ME', 'SAY', 'THAT', 'ONE', 'UNKIND', 'WORD', 'WOULD', 'DESTROY', 'ALL', 'THE', 'SATISFACTION', 'OF', 'MY', 'LIFE', 'AND', 'THAT', 'I', 'SHOULD', 'EXPECT', 'OUR', 'KINDNESS', 'SHOULD', 'INCREASE', 'EVERY', 'DAY', 'IF', 'IT', 'WERE', 'POSSIBLE', 'BUT', 'NEVER', 'LESSEN'] +3080-5040-0025-600: hyp=['THEY', 'LAUGH', 'TO', 'HEAR', 'ME', 'SAY', 'THAT', 'ONE', 'UNKIND', 'WORD', 'WOULD', 'DESTROY', 'ALL', 'THE', 'SATISFACTION', 'OF', 'MY', 'LIFE', 'AND', 'THAT', 'I', 'SHOULD', 'EXPECT', 'OUR', 'KINDNESS', 'SHOULD', 'INCREASE', 'EVERY', 'DAY', 'IF', 'IT', 'WERE', 'POSSIBLE', 'BUT', 'NEVER', 'LESSEN'] +3080-5040-0026-601: ref=['WE', 'GO', 'ABROAD', 'ALL', 'DAY', 'AND', 'PLAY', 'ALL', 'NIGHT', 'AND', 'SAY', 'OUR', 'PRAYERS', 'WHEN', 'WE', 'HAVE', 'TIME'] +3080-5040-0026-601: hyp=['WE', 'GO', 'ABROAD', 'ALL', 'DAY', 'AND', 'PLAY', 'ALL', 'NIGHT', 'AND', 'SAY', 'OUR', 'PRAYERS', 'WHEN', 'WE', 'HAVE', 'TIME'] +3080-5040-0027-602: ref=['WELL', 'IN', 'SOBER', 'EARNEST', 'NOW', 'I', 'WOULD', 'NOT', 'LIVE', 'THUS', 'A', 'TWELVEMONTH', 'TO', 'GAIN', 'ALL', 'THAT', 'THE', 'KING', 'HAS', 'LOST', 'UNLESS', 'IT', 'WERE', 'TO', 'GIVE', 'IT', 'HIM', 'AGAIN'] +3080-5040-0027-602: hyp=['WHILE', 'IN', 'SOBER', 'EARNEST', 'NOW', 'I', 'WOULD', 'NOT', 'LIVE', 'THUS', 'A', 'TWELVEMONTH', 'TO', 'GAIN', 'ALL', 'THAT', 'THE', 'KING', 'HAS', 'LOST', 'UNLESS', 'IT', 'WERE', 'TO', 'GIVE', 'IT', 'HIM', 'AGAIN'] +3080-5040-0028-603: ref=['WILL', 'YOU', 'BE', 'SO', 'GOOD', 'NATURED'] +3080-5040-0028-603: hyp=['WILL', 'YOU', 'BE', 'SO', 'GOOD', 'NATURED'] +3080-5040-0029-604: ref=['HE', 'HAS', 'ONE', 'SON', 'AND', 'TIS', 'THE', 'FINEST', 'BOY', 'THAT', "E'ER", 'YOU', 'SAW', 'AND', 'HAS', 'A', 'NOBLE', 'SPIRIT', 'BUT', 'YET', 'STANDS', 'IN', 'THAT', 'AWE', 'OF', 'HIS', 'FATHER', 'THAT', 'ONE', 'WORD', 'FROM', 'HIM', 'IS', 'AS', 'MUCH', 'AS', 'TWENTY', 'WHIPPINGS'] +3080-5040-0029-604: hyp=['HE', 'HAS', 'ONE', 'SON', 'AND', 'TIS', 'THE', 'FINEST', 'BOY', 'THAT', 'EVER', 'YOU', 'SAW', 'AND', 'HAS', 'A', 'NOBLE', 'SPIRIT', 'BUT', 'YET', 'STANDS', 'IN', 'THAT', 'AWE', 'OF', 'HIS', 'FATHER', 'THAT', 'ONE', 'WORD', 'FROM', 'HIM', 'IS', 'AS', 'MUCH', 'AS', 'TWENTY', 'WHIPPINGS'] +3080-5040-0030-605: ref=['YOU', 'MUST', 'GIVE', 'ME', 'LEAVE', 'TO', 'ENTERTAIN', 'YOU', 'THUS', 'WITH', 'DISCOURSES', 'OF', 'THE', 'FAMILY', 'FOR', 'I', 'CAN', 'TELL', 'YOU', 'NOTHING', 'ELSE', 'FROM', 'HENCE'] +3080-5040-0030-605: hyp=['YOU', 'MUST', 'GIVE', 'ME', 'LEAVE', 'TO', 'ENTERTAIN', 'YOU', 'THUS', 'WITH', 'DISCOURSES', 'OF', 'THE', 'FAMILY', 'FOR', 'I', 'CAN', 'TELL', 'YOU', 'NOTHING', 'ELSE', 'FROM', 'HENCE'] +3080-5040-0031-606: ref=['NOT', 'TO', 'KNOW', 'WHEN', 'YOU', 'WOULD', 'COME', 'HOME', 'I', 'CAN', 'ASSURE', 'YOU', 'NOR', 'FOR', 'ANY', 'OTHER', 'OCCASION', 'OF', 'MY', 'OWN', 'BUT', 'WITH', 'A', 'COUSIN', 'OF', 'MINE', 'THAT', 'HAD', 'LONG', 'DESIGNED', 'TO', 'MAKE', 'HERSELF', 'SPORT', 'WITH', 'HIM', 'AND', 'DID', 'NOT', 'MISS', 'OF', 'HER', 'AIM'] +3080-5040-0031-606: hyp=['NOT', 'TO', 'KNOW', 'WHEN', 'YOU', 'WOULD', 'COME', 'HOME', 'I', 'CAN', 'ASSURE', 'YOU', 'NOR', 'FOR', 'ANY', 'OTHER', 'OCCASION', 'OF', 'MY', 'OWN', 'BUT', 'WITH', 'A', 'COUSIN', 'OF', 'MINE', 'THAT', 'HAD', 'LONG', 'DESIGNED', 'TO', 'MAKE', 'HERSELF', 'SPORT', 'WITH', 'HIM', 'AND', 'DID', 'NOT', 'MISS', 'OF', 'HER', 'AIM'] +3080-5040-0032-607: ref=['IN', 'MY', 'LIFE', 'I', 'NEVER', 'HEARD', 'SO', 'RIDICULOUS', 'A', 'DISCOURSE', 'AS', 'HE', 'MADE', 'US', 'AND', 'NO', 'OLD', 'WOMAN', 'WHO', 'PASSES', 'FOR', 'A', 'WITCH', 'COULD', 'HAVE', 'BEEN', 'MORE', 'PUZZLED', 'TO', 'SEEK', 'WHAT', 'TO', 'SAY', 'TO', 'REASONABLE', 'PEOPLE', 'THAN', 'HE', 'WAS'] +3080-5040-0032-607: hyp=['IN', 'MY', 'LIFE', 'I', 'NEVER', 'HEARD', 'SO', 'RIDICULOUS', 'A', 'DISCOURSE', 'AS', 'HE', 'MADE', 'US', 'AND', 'NO', 'OLD', 'WOMAN', 'WHO', 'PASSES', 'FOR', 'A', 'WITCH', 'COULD', 'HAVE', 'BEEN', 'MORE', 'PUZZLED', 'TO', 'SEEK', 'WHAT', 'TO', 'SAY', 'TO', 'REASONABLE', 'PEOPLE', 'THAN', 'HE', 'WAS'] +3080-5040-0033-608: ref=['EVER', 'SINCE', 'THIS', 'ADVENTURE', 'I', 'HAVE', 'HAD', 'SO', 'GREAT', 'A', 'BELIEF', 'IN', 'ALL', 'THINGS', 'OF', 'THIS', 'NATURE', 'THAT', 'I', 'COULD', 'NOT', 'FORBEAR', 'LAYING', 'A', 'PEAS', 'COD', 'WITH', 'NINE', 'PEAS', "IN'T", 'UNDER', 'MY', 'DOOR', 'YESTERDAY', 'AND', 'WAS', 'INFORMED', 'BY', 'IT', 'THAT', 'MY', "HUSBAND'S", 'NAME', 'SHOULD', 'BE', 'THOMAS', 'HOW', 'DO', 'YOU', 'LIKE', 'THAT'] +3080-5040-0033-608: hyp=['EVER', 'SINCE', 'THIS', 'ADVENTURE', 'I', 'HAVE', 'HAD', 'SO', 'GREAT', 'A', 'BELIEF', 'IN', 'ALL', 'THINGS', 'OF', 'THIS', 'NATURE', 'THAT', 'I', 'COULD', 'NOT', 'FORBEAR', 'LAYING', 'A', 'PEASE', 'CARD', 'WITH', 'NINE', 'PEAS', 'IN', 'IT', 'UNDER', 'MY', 'DOOR', 'YESTERDAY', 'AND', 'WAS', 'INFORMED', 'BY', 'IT', 'THAT', 'MY', "HUSBAND'S", 'NAME', 'SHOULD', 'BE', 'THOMAS', 'HOW', 'DO', 'YOU', 'LIKE', 'THAT'] +3331-159605-0000-609: ref=['SHE', 'PULLED', 'HER', 'HAIR', 'DOWN', 'TURNED', 'HER', 'SKIRT', 'BACK', 'PUT', 'HER', 'FEET', 'ON', 'THE', 'FENDER', 'AND', 'TOOK', 'PUTTEL', 'INTO', 'HER', 'LAP', 'ALL', 'OF', 'WHICH', 'ARRANGEMENTS', 'SIGNIFIED', 'THAT', 'SOMETHING', 'VERY', 'IMPORTANT', 'HAD', 'GOT', 'TO', 'BE', 'THOUGHT', 'OVER', 'AND', 'SETTLED'] +3331-159605-0000-609: hyp=['SHE', 'PULLED', 'HER', 'HAIR', 'DOWN', 'TURNED', 'HER', 'SKIRT', 'BACK', 'PUT', 'HER', 'FEET', 'ON', 'THE', 'FENDER', 'AND', 'TOOK', 'PATTER', 'INTO', 'HER', 'LAP', 'ALL', 'OF', 'WHICH', 'ARRANGEMENTS', 'SIGNIFIED', 'THAT', 'SOMETHING', 'VERY', 'IMPORTANT', 'HAD', 'GOT', 'TO', 'BE', 'THOUGHT', 'OVER', 'AND', 'SETTLED'] +3331-159605-0001-610: ref=['THE', 'MORE', 'PROPOSALS', 'THE', 'MORE', 'CREDIT'] +3331-159605-0001-610: hyp=['THE', 'MORE', 'PROPOSALS', 'THE', 'MORE', 'CREDIT'] +3331-159605-0002-611: ref=['I', 'VE', 'TRIED', 'IT', 'AND', 'LIKED', 'IT', 'AND', 'MAYBE', 'THIS', 'IS', 'THE', 'CONSEQUENCE', 'OF', 'THAT', "NIGHT'S", 'FUN'] +3331-159605-0002-611: hyp=["I'VE", 'TRIED', 'IT', 'AND', 'LIKED', 'IT', 'AND', 'MAYBE', 'THIS', 'IS', 'THE', 'CONSEQUENCE', 'OF', 'THAT', "NIGHT'S", 'FUN'] +3331-159605-0003-612: ref=['JUST', 'SUPPOSE', 'IT', 'IS', 'TRUE', 'THAT', 'HE', 'DOES', 'ASK', 'ME', 'AND', 'I', 'SAY', 'YES'] +3331-159605-0003-612: hyp=['JUST', 'SUPPOSE', 'IT', 'IS', 'TRUE', 'THAT', 'HE', 'DOES', 'ASK', 'ME', 'AND', 'I', 'SAY', 'YES'] +3331-159605-0004-613: ref=['WHAT', 'A', 'SPITEFUL', 'THING', 'I', 'AM'] +3331-159605-0004-613: hyp=['WHAT', 'A', 'SPITEFUL', 'THING', 'I', 'AM'] +3331-159605-0005-614: ref=['I', 'COULD', 'DO', 'SO', 'MUCH', 'FOR', 'ALL', 'AT', 'HOME', 'HOW', 'I', 'SHOULD', 'ENJOY', 'THAT'] +3331-159605-0005-614: hyp=['I', 'COULD', 'DO', 'SO', 'MUCH', 'FOR', 'ALL', 'AT', 'HOME', 'HOW', 'I', 'SHOULD', 'ENJOY', 'THAT'] +3331-159605-0006-615: ref=['LET', 'ME', 'SEE', 'HOW', 'CAN', 'I', 'BEGIN'] +3331-159605-0006-615: hyp=['LET', 'ME', 'SEE', 'HOW', 'CAN', 'I', 'BEGIN'] +3331-159605-0007-616: ref=['HE', 'HAS', 'KNOWN', 'HER', 'ALL', 'HER', 'LIFE', 'AND', 'HAS', 'A', 'GOOD', 'INFLUENCE', 'OVER', 'HER'] +3331-159605-0007-616: hyp=['HE', 'HAS', 'KNOWN', 'HER', 'ALL', 'HER', 'LIFE', 'AND', 'HAS', 'A', 'GOOD', 'INFLUENCE', 'OVER', 'HER'] +3331-159605-0008-617: ref=['NOW', 'AS', 'POLLY', 'WAS', 'BY', 'NO', 'MEANS', 'A', 'PERFECT', 'CREATURE', 'I', 'AM', 'FREE', 'TO', 'CONFESS', 'THAT', 'THE', 'OLD', 'TEMPTATION', 'ASSAILED', 'HER', 'MORE', 'THAN', 'ONCE', 'THAT', 'WEEK', 'FOR', 'WHEN', 'THE', 'FIRST', 'EXCITEMENT', 'OF', 'THE', 'DODGING', 'REFORM', 'HAD', 'SUBSIDED', 'SHE', 'MISSED', 'THE', 'PLEASANT', 'LITTLE', 'INTERVIEWS', 'THAT', 'USED', 'TO', 'PUT', 'A', 'CERTAIN', 'FLAVOR', 'OF', 'ROMANCE', 'INTO', 'HER', 'DULL', 'HARD', 'WORKING', 'DAYS'] +3331-159605-0008-617: hyp=['NOW', 'AS', 'POLLY', 'WAS', 'BY', 'NO', 'MEANS', 'A', 'PERFECT', 'CREATURE', 'I', 'AM', 'FREE', 'TO', 'CONFESS', 'THAT', 'THE', 'OLD', 'TEMPTATION', 'ASSAILED', 'HER', 'MORE', 'THAN', 'ONCE', 'THAT', 'WEEK', 'FOR', 'WHEN', 'THE', 'FIRST', 'EXCITEMENT', 'OF', 'THE', 'DODGING', 'REFORM', 'HAD', 'SUBSIDED', 'SHE', 'MISSED', 'THE', 'PLEASANT', 'LITTLE', 'INTERVIEWS', 'THAT', 'USED', 'TO', 'PUT', 'A', 'CERTAIN', 'FLAVOUR', 'OF', 'ROMANCE', 'INTO', 'HER', 'DULL', 'HARD', 'WORKING', 'DAYS'] +3331-159605-0009-618: ref=['I', "DON'T", 'THINK', 'IT', 'WAS', 'HIS', 'WEALTH', 'ACCOMPLISHMENTS', 'OR', 'POSITION', 'THAT', 'MOST', 'ATTRACTED', 'POLLY', 'THOUGH', 'THESE', 'DOUBTLESS', 'POSSESSED', 'A', 'GREATER', 'INFLUENCE', 'THAN', 'SHE', 'SUSPECTED'] +3331-159605-0009-618: hyp=['I', "DON'T", 'THINK', 'IT', 'WAS', 'HIS', 'WEALTH', 'ACCOMPLISHMENTS', 'OR', 'POSITION', 'THAT', 'MOST', 'ATTRACTED', 'POLLY', 'THOUGH', 'THESE', 'DOUBTLESS', 'POSSESSED', 'A', 'GREATER', 'INFLUENCE', 'THAN', 'SHE', 'SUSPECTED'] +3331-159605-0010-619: ref=['IT', 'WAS', 'THAT', 'INDESCRIBABLE', 'SOMETHING', 'WHICH', 'WOMEN', 'ARE', 'QUICK', 'TO', 'SEE', 'AND', 'FEEL', 'IN', 'MEN', 'WHO', 'HAVE', 'BEEN', 'BLESSED', 'WITH', 'WISE', 'AND', 'GOOD', 'MOTHERS'] +3331-159605-0010-619: hyp=['IT', 'WAS', 'THAT', 'INDESCRIBABLE', 'SOMETHING', 'WHICH', 'WOMEN', 'ARE', 'QUICK', 'TO', 'SEE', 'AND', 'FEEL', 'IN', 'MEN', 'WHO', 'HAVE', 'BEEN', 'BLESSED', 'WITH', 'WISE', 'AND', 'GOOD', 'MOTHERS'] +3331-159605-0011-620: ref=['THIS', 'HAD', 'AN', 'ESPECIAL', 'CHARM', 'TO', 'POLLY', 'FOR', 'SHE', 'SOON', 'FOUND', 'THAT', 'THIS', 'SIDE', 'OF', 'HIS', 'CHARACTER', 'WAS', 'NOT', 'SHOWN', 'TO', 'EVERY', 'ONE'] +3331-159605-0011-620: hyp=['THIS', 'HAD', 'AN', 'ESPECIAL', 'CHARM', 'TO', 'POLLY', 'FOR', 'SHE', 'SOON', 'FOUND', 'THAT', 'THIS', 'SIDE', 'OF', 'HIS', 'CHARACTER', 'WAS', 'NOT', 'SHOWN', 'TO', 'EVERY', 'ONE'] +3331-159605-0012-621: ref=['LATELY', 'THIS', 'HAD', 'CHANGED', 'ESPECIALLY', 'TOWARDS', 'POLLY', 'AND', 'IT', 'FLATTERED', 'HER', 'MORE', 'THAN', 'SHE', 'WOULD', 'CONFESS', 'EVEN', 'TO', 'HERSELF'] +3331-159605-0012-621: hyp=['LATELY', 'THIS', 'HAD', 'CHANGED', 'ESPECIALLY', 'TOWARDS', 'POLLY', 'AND', 'IT', 'FLATTERED', 'HER', 'MORE', 'THAN', 'SHE', 'WOULD', 'CONFESS', 'EVEN', 'TO', 'HERSELF'] +3331-159605-0013-622: ref=['AT', 'FIRST', 'SHE', 'TRIED', 'TO', 'THINK', 'SHE', 'COULD', 'BUT', 'UNFORTUNATELY', 'HEARTS', 'ARE', 'SO', 'CONTRARY', 'THAT', 'THEY', "WON'T", 'BE', 'OBEDIENT', 'TO', 'REASON', 'WILL', 'OR', 'EVEN', 'GRATITUDE'] +3331-159605-0013-622: hyp=['AT', 'FIRST', 'SHE', 'TRIED', 'TO', 'THINK', 'SHE', 'COULD', 'BUT', 'UNFORTUNATELY', 'HEARTS', 'ARE', 'SO', 'CONTRARY', 'THAT', 'THEY', "WON'T", 'BE', 'OBEDIENT', 'TO', 'REASON', 'WILL', 'OR', 'EVEN', 'GRATITUDE'] +3331-159605-0014-623: ref=['POLLY', 'FELT', 'A', 'VERY', 'CORDIAL', 'FRIENDSHIP', 'FOR', 'MISTER', 'SYDNEY', 'BUT', 'NOT', 'ONE', 'PARTICLE', 'OF', 'THE', 'LOVE', 'WHICH', 'IS', 'THE', 'ONLY', 'COIN', 'IN', 'WHICH', 'LOVE', 'CAN', 'BE', 'TRULY', 'PAID'] +3331-159605-0014-623: hyp=['POLLY', 'FELT', 'A', 'VERY', 'CORDIAL', 'FRIENDSHIP', 'FOR', 'MISTER', 'SYDNEY', 'BUT', 'NOT', 'ONE', 'PARTICLE', 'OF', 'THE', 'LOVE', 'WHICH', 'IS', 'THE', 'ONLY', 'COIN', 'IN', 'WHICH', 'LOVE', 'CAN', 'BE', 'TRULY', 'PAID'] +3331-159605-0015-624: ref=['THIS', 'FINISHED', "POLLY'S", 'INDECISION', 'AND', 'AFTER', 'THAT', 'NIGHT', 'SHE', 'NEVER', 'ALLOWED', 'HERSELF', 'TO', 'DWELL', 'UPON', 'THE', 'PLEASANT', 'TEMPTATION', 'WHICH', 'CAME', 'IN', 'A', 'GUISE', 'PARTICULARLY', 'ATTRACTIVE', 'TO', 'A', 'YOUNG', 'GIRL', 'WITH', 'A', 'SPICE', 'OF', 'THE', 'OLD', 'EVE', 'IN', 'HER', 'COMPOSITION'] +3331-159605-0015-624: hyp=['THIS', 'FINISHED', "POLLY'S", 'INDECISION', 'AND', 'AFTER', 'THAT', 'NIGHT', 'SHE', 'NEVER', 'ALLOWED', 'HERSELF', 'TO', 'DWELL', 'UPON', 'THE', 'PLEASANT', 'TEMPTATION', 'WHICH', 'CAME', 'IN', 'A', 'GUISE', 'PARTICULARLY', 'ATTRACTIVE', 'TO', 'A', 'YOUNG', 'GIRL', 'WITH', 'THE', 'SPIES', 'OF', 'THE', 'OLD', 'EVE', 'IN', 'HER', 'COMPOSITION'] +3331-159605-0016-625: ref=['WHEN', 'SATURDAY', 'CAME', 'POLLY', 'STARTED', 'AS', 'USUAL', 'FOR', 'A', 'VISIT', 'TO', 'BECKY', 'AND', 'BESS', 'BUT', 'COULD', "N'T", 'RESIST', 'STOPPING', 'AT', 'THE', 'SHAWS', 'TO', 'LEAVE', 'A', 'LITTLE', 'PARCEL', 'FOR', 'FAN', 'THOUGH', 'IT', 'WAS', 'CALLING', 'TIME'] +3331-159605-0016-625: hyp=['WHEN', 'SATURDAY', 'CAME', 'POLLY', 'STARTED', 'AS', 'USUAL', 'FOR', 'A', 'VISIT', 'TO', 'BECKY', 'AND', 'BESS', 'BUT', 'COULD', 'NOT', 'RESIST', 'STOPPING', 'AT', 'THE', 'SHORES', 'TO', 'LEAVE', 'A', 'LITTLE', 'PARCEL', 'FOR', 'FAN', 'THOUGH', 'IT', 'WAS', 'CALLING', 'TIME'] +3331-159605-0017-626: ref=['A', 'FOOLISH', 'LITTLE', 'SPEECH', 'TO', 'MAKE', 'TO', 'A', 'DOG', 'BUT', 'YOU', 'SEE', 'POLLY', 'WAS', 'ONLY', 'A', 'TENDER', 'HEARTED', 'GIRL', 'TRYING', 'TO', 'DO', 'HER', 'DUTY'] +3331-159605-0017-626: hyp=['A', 'FOOLISH', 'LITTLE', 'SPEECH', 'TO', 'MAKE', 'TO', 'A', 'DARK', 'BUT', 'YOU', 'SEE', 'POLLY', 'WAS', 'ONLY', 'A', 'TENDER', 'HEARTED', 'GIRL', 'TRYING', 'TO', 'DO', 'HER', 'DUTY'] +3331-159605-0018-627: ref=['TAKE', 'HOLD', 'OF', 'MASTER', "CHARLEY'S", 'HAND', 'MISS', 'MAMIE', 'AND', 'WALK', 'PRETTY', 'LIKE', 'WILLY', 'AND', 'FLOSSY', 'SAID', 'THE', 'MAID'] +3331-159605-0018-627: hyp=['TAKE', 'HOLD', 'OF', 'MASSA', "CHARLIE'S", 'HAND', 'MISS', 'MAMIE', 'AND', 'WALK', 'PRETTY', 'LIKE', 'WILLIE', 'AND', 'FLOSSIE', 'SAID', 'THE', 'MAID'] +3331-159605-0019-628: ref=['AT', 'A', 'STREET', 'CORNER', 'A', 'BLACK', 'EYED', 'SCHOOL', 'BOY', 'WAS', 'PARTING', 'FROM', 'A', 'ROSY', 'FACED', 'SCHOOL', 'GIRL', 'WHOSE', 'MUSIC', 'ROLL', 'HE', 'WAS', 'RELUCTANTLY', 'SURRENDERING'] +3331-159605-0019-628: hyp=['AT', 'A', 'STREET', 'CORNER', 'A', 'BLACK', 'EYED', 'SCHOOLBOY', 'WAS', 'PARTING', 'FROM', 'A', 'ROSY', 'FACED', 'SCHOOL', 'GIRL', 'WHOSE', 'MUSIC', 'ROLL', 'HE', 'WAS', 'RELUCTANTLY', 'SURRENDERING'] +3331-159605-0020-629: ref=['HOW', 'HE', 'GOT', 'THERE', 'WAS', 'NEVER', 'VERY', 'CLEAR', 'TO', 'POLLY', 'BUT', 'THERE', 'HE', 'WAS', 'FLUSHED', 'AND', 'A', 'LITTLE', 'OUT', 'OF', 'BREATH', 'BUT', 'LOOKING', 'SO', 'GLAD', 'TO', 'SEE', 'HER', 'THAT', 'SHE', 'HAD', "N'T", 'THE', 'HEART', 'TO', 'BE', 'STIFF', 'AND', 'COOL', 'AS', 'SHE', 'HAD', 'FULLY', 'INTENDED', 'TO', 'BE', 'WHEN', 'THEY', 'MET'] +3331-159605-0020-629: hyp=['HOW', 'HE', 'GOT', 'THERE', 'WAS', 'NEVER', 'VERY', 'CLEAR', 'TO', 'POLLY', 'BUT', 'THERE', 'HE', 'WAS', 'FLUSHED', 'AND', 'A', 'LITTLE', 'OUT', 'OF', 'BREATH', 'BUT', 'LOOKING', 'SO', 'GLAD', 'TO', 'SEE', 'HER', 'THAT', 'SHE', 'HAD', 'NOT', 'THE', 'HEART', 'TO', 'BE', 'STIFF', 'AND', 'COOL', 'AS', 'SHE', 'HAD', 'FULLY', 'INTENDED', 'TO', 'BE', 'WHEN', 'THEY', 'MET'] +3331-159605-0021-630: ref=['SHE', 'REALLY', 'COULD', "N'T", 'HELP', 'IT', 'IT', 'WAS', 'SO', 'PLEASANT', 'TO', 'SEE', 'HIM', 'AGAIN', 'JUST', 'WHEN', 'SHE', 'WAS', 'FEELING', 'SO', 'LONELY'] +3331-159605-0021-630: hyp=['SHE', 'REALLY', 'COULD', 'NOT', 'HELP', 'IT', 'IT', 'WAS', 'SO', 'PLEASANT', 'TO', 'SEE', 'HIM', 'AGAIN', 'JUST', 'WHEN', 'SHE', 'WAS', 'FEELING', 'SO', 'LONELY'] +3331-159605-0022-631: ref=['THAT', 'IS', 'THE', 'WAY', 'I', 'GET', 'TO', 'THE', 'ROTHS', 'ANSWERED', 'POLLY'] +3331-159605-0022-631: hyp=['THAT', 'IS', 'THE', 'WAY', 'I', 'GET', 'TO', 'THE', 'WARS', 'ANSWERED', 'POLLY'] +3331-159605-0023-632: ref=['SHE', 'DID', 'NOT', 'MEAN', 'TO', 'TELL', 'BUT', 'HIS', 'FRANKNESS', 'WAS', 'SO', 'AGREEABLE', 'SHE', 'FORGOT', 'HERSELF'] +3331-159605-0023-632: hyp=['SHE', 'DID', 'NOT', 'MEAN', 'TO', 'TELL', 'BUT', 'HIS', 'FRANKNESS', 'WAS', 'SO', 'AGREEABLE', 'SHE', 'FORGOT', 'HERSELF'] +3331-159605-0024-633: ref=['BUT', 'I', 'KNOW', 'HER', 'BETTER', 'AND', 'I', 'ASSURE', 'YOU', 'THAT', 'SHE', 'DOES', 'IMPROVE', 'SHE', 'TRIES', 'TO', 'MEND', 'HER', 'FAULTS', 'THOUGH', 'SHE', "WON'T", 'OWN', 'IT', 'AND', 'WILL', 'SURPRISE', 'YOU', 'SOME', 'DAY', 'BY', 'THE', 'AMOUNT', 'OF', 'HEART', 'AND', 'SENSE', 'AND', 'GOODNESS', 'SHE', 'HAS', 'GOT'] +3331-159605-0024-633: hyp=['BUT', 'I', 'KNOW', 'HER', 'BETTER', 'AND', 'I', 'ASSURE', 'YOU', 'THAT', 'SHE', 'DOES', 'IMPROVE', 'SHE', 'TRIES', 'TO', 'MEND', 'HER', 'FAULTS', 'THOUGH', 'SHE', "WON'T", 'OWN', 'IT', 'AND', 'WILL', 'SURPRISE', 'YOU', 'SOME', 'DAY', 'BY', 'THE', 'AMOUNT', 'OF', 'HEART', 'AND', 'SENSE', 'AND', 'GOODNESS', 'SHE', 'HAS', 'GOT'] +3331-159605-0025-634: ref=['THANK', 'YOU', 'NO'] +3331-159605-0025-634: hyp=['THANK', 'YOU', 'NO'] +3331-159605-0026-635: ref=['HOW', 'LOVELY', 'THE', 'PARK', 'LOOKS', 'SHE', 'SAID', 'IN', 'GREAT', 'CONFUSION'] +3331-159605-0026-635: hyp=['HOW', 'LOVELY', 'THE', 'PARK', 'LOOKS', 'SHE', 'SAID', 'IN', 'GREAT', 'CONFUSION'] +3331-159605-0027-636: ref=['ASKED', 'THE', 'ARTFUL', 'YOUNG', 'MAN', 'LAYING', 'A', 'TRAP', 'INTO', 'WHICH', 'POLLY', 'IMMEDIATELY', 'FELL'] +3331-159605-0027-636: hyp=['ASKED', 'THE', 'ARTFUL', 'YOUNG', 'MAN', 'LAYING', 'A', 'TRAP', 'INTO', 'WHICH', 'POLLY', 'IMMEDIATELY', 'FELL'] +3331-159605-0028-637: ref=['HE', 'WAS', 'QUICKER', 'TO', 'TAKE', 'A', 'HINT', 'THAN', 'SHE', 'HAD', 'EXPECTED', 'AND', 'BEING', 'BOTH', 'PROUD', 'AND', 'GENEROUS', 'RESOLVED', 'TO', 'SETTLE', 'THE', 'MATTER', 'AT', 'ONCE', 'FOR', "POLLY'S", 'SAKE', 'AS', 'WELL', 'AS', 'HIS', 'OWN'] +3331-159605-0028-637: hyp=['HE', 'WAS', 'QUICKER', 'TO', 'TAKE', 'A', 'HAND', 'THAN', 'SHE', 'HAD', 'EXPECTED', 'AND', 'BEING', 'BOTH', 'PROUD', 'AND', 'GENEROUS', 'RESOLVED', 'TO', 'SETTLE', 'THE', 'MATTER', 'AT', 'ONCE', 'FOR', "POLLY'S", 'SAKE', 'AS', 'WELL', 'AS', 'HIS', 'OWN'] +3331-159605-0029-638: ref=['SO', 'WHEN', 'SHE', 'MADE', 'HER', 'LAST', 'BRILLIANT', 'REMARK', 'HE', 'SAID', 'QUIETLY', 'WATCHING', 'HER', 'FACE', 'KEENLY', 'ALL', 'THE', 'WHILE', 'I', 'THOUGHT', 'SO', 'WELL', 'I', 'M', 'GOING', 'OUT', 'OF', 'TOWN', 'ON', 'BUSINESS', 'FOR', 'SEVERAL', 'WEEKS', 'SO', 'YOU', 'CAN', 'ENJOY', 'YOUR', 'LITTLE', 'BIT', 'OF', 'COUNTRY', 'WITHOUT', 'BEING', 'ANNOYED', 'BY', 'ME', 'ANNOYED'] +3331-159605-0029-638: hyp=['SO', 'WHEN', 'SHE', 'MADE', 'HER', 'LAST', 'BUOYANT', 'REMARK', 'HE', 'SAID', 'QUIETLY', 'WATCHING', 'HER', 'FACE', 'KEENLY', 'ALL', 'THE', 'WHILE', 'I', 'THOUGHT', 'SO', 'WELL', "I'M", 'GOING', 'OUT', 'OF', 'TOWN', 'ON', 'BUSINESS', 'FOR', 'SEVERAL', 'WEEKS', 'SO', 'YOU', 'CAN', 'ENJOY', 'YOUR', 'LITTLE', 'BIT', 'OF', 'COUNTRY', 'WITHOUT', 'BEING', 'ANNOYED', 'BY', 'ME', 'ANNOYED'] +3331-159605-0030-639: ref=['SHE', 'THOUGHT', 'SHE', 'HAD', 'A', 'GOOD', 'DEAL', 'OF', 'THE', 'COQUETTE', 'IN', 'HER', 'AND', 'I', 'VE', 'NO', 'DOUBT', 'THAT', 'WITH', 'TIME', 'AND', 'TRAINING', 'SHE', 'WOULD', 'HAVE', 'BECOME', 'A', 'VERY', 'DANGEROUS', 'LITTLE', 'PERSON', 'BUT', 'NOW', 'SHE', 'WAS', 'FAR', 'TOO', 'TRANSPARENT', 'AND', 'STRAIGHTFORWARD', 'BY', 'NATURE', 'EVEN', 'TO', 'TELL', 'A', 'WHITE', 'LIE', 'CLEVERLY'] +3331-159605-0030-639: hyp=['SHE', 'THOUGHT', 'SHE', 'HAD', 'A', 'GOOD', 'DEAL', 'OF', 'THE', 'COQUETTE', 'IN', 'HER', 'AND', "I'VE", 'NO', 'DOUBT', 'THAT', 'WITH', 'TIME', 'AND', 'TRAINING', 'SHE', 'WOULD', 'HAVE', 'BECOME', 'A', 'VERY', 'DANGEROUS', 'LITTLE', 'PERSON', 'BUT', 'NOW', 'SHE', 'WAS', 'FAR', 'TOO', 'TRANSPARENT', 'AND', 'STRAIGHTFORWARD', 'BY', 'NATURE', 'EVEN', 'TO', 'TELL', 'A', 'WI', 'LIKE', 'LEVERLY'] +3331-159605-0031-640: ref=['HE', 'WAS', 'GONE', 'BEFORE', 'SHE', 'COULD', 'DO', 'ANYTHING', 'BUT', 'LOOK', 'UP', 'AT', 'HIM', 'WITH', 'A', 'REMORSEFUL', 'FACE', 'AND', 'SHE', 'WALKED', 'ON', 'FEELING', 'THAT', 'THE', 'FIRST', 'AND', 'PERHAPS', 'THE', 'ONLY', 'LOVER', 'SHE', 'WOULD', 'EVER', 'HAVE', 'HAD', 'READ', 'HIS', 'ANSWER', 'AND', 'ACCEPTED', 'IT', 'IN', 'SILENCE'] +3331-159605-0031-640: hyp=['HE', 'WAS', 'GONE', 'BEFORE', 'SHE', 'COULD', 'DO', 'ANYTHING', 'BUT', 'LOOK', 'UP', 'AT', 'HIM', 'WITH', 'A', 'REMORSEFUL', 'FACE', 'AND', 'SHE', 'WALKED', 'ON', 'FEELING', 'THAT', 'THE', 'FIRST', 'AND', 'PERHAPS', 'THE', 'ONLY', 'LOVER', 'SHE', 'WOULD', 'EVER', 'HAVE', 'HAD', 'READ', 'HIS', 'ANSWER', 'AND', 'ACCEPTED', 'IT', 'IN', 'SILENCE'] +3331-159605-0032-641: ref=['POLLY', 'DID', 'NOT', 'RETURN', 'TO', 'HER', 'FAVORITE', 'WALK', 'TILL', 'SHE', 'LEARNED', 'FROM', 'MINNIE', 'THAT', 'UNCLE', 'HAD', 'REALLY', 'LEFT', 'TOWN', 'AND', 'THEN', 'SHE', 'FOUND', 'THAT', 'HIS', 'FRIENDLY', 'COMPANY', 'AND', 'CONVERSATION', 'WAS', 'WHAT', 'HAD', 'MADE', 'THE', 'WAY', 'SO', 'PLEASANT', 'AFTER', 'ALL'] +3331-159605-0032-641: hyp=['POLLY', 'DID', 'NOT', 'RETURN', 'TO', 'HER', 'FAVOURITE', 'WALK', 'TILL', 'SHE', 'LEARNED', 'FROM', 'MINNIE', 'THAT', 'UNCLE', 'HAD', 'REALLY', 'LEFT', 'TOWN', 'AND', 'THEN', 'SHE', 'FOUND', 'THAT', 'HIS', 'FRIENDLY', 'COMPANY', 'AND', 'CONVERSATION', 'WAS', 'WHAT', 'HAD', 'MADE', 'THE', 'WAY', 'SO', 'PLEASANT', 'AFTER', 'ALL'] +3331-159605-0033-642: ref=['WAGGING', 'TO', 'AND', 'FRO', 'AS', 'USUAL', "WHAT'S", 'THE', 'NEWS', 'WITH', 'YOU'] +3331-159605-0033-642: hyp=['WORKING', 'TO', 'AND', 'FRO', 'AS', 'USUAL', "WHAT'S", 'THE', 'NEWS', 'WITH', 'YOU'] +3331-159605-0034-643: ref=['PERHAPS', 'SHE', 'LL', 'JILT', 'HIM'] +3331-159605-0034-643: hyp=['PERHAPS', 'SHE', 'HAD', 'TOLD', 'HIM'] +3331-159605-0035-644: ref=['UTTERLY', 'DONE', 'WITH', 'AND', 'LAID', 'UPON', 'THE', 'SHELF'] +3331-159605-0035-644: hyp=['UTTERLY', 'DONE', 'WITH', 'AND', 'LAID', 'UPON', 'THE', 'SHELF'] +3331-159605-0036-645: ref=['MINNIE', 'SAID', 'THE', 'OTHER', 'DAY', 'SHE', 'WISHED', 'SHE', 'WAS', 'A', 'PIGEON', 'SO', 'SHE', 'COULD', 'PADDLE', 'IN', 'THE', 'PUDDLES', 'AND', 'NOT', 'FUSS', 'ABOUT', 'RUBBERS'] +3331-159605-0036-645: hyp=['MINNY', 'SAID', 'THE', 'OTHER', 'DAY', 'SHE', 'WISHED', 'SHE', 'WAS', 'A', 'PIGEON', 'SO', 'SHE', 'COULD', 'PADDLE', 'IN', 'THE', 'BOTTLES', 'AND', 'NOT', 'FUSS', 'ABOUT', 'RUBBERS'] +3331-159605-0037-646: ref=['NOW', "DON'T", 'BE', 'AFFECTED', 'POLLY', 'BUT', 'JUST', 'TELL', 'ME', 'LIKE', 'A', 'DEAR', 'HAS', "N'T", 'HE', 'PROPOSED'] +3331-159605-0037-646: hyp=['NOW', "DON'T", 'BE', 'AFFECTED', 'POLLY', 'BUT', 'JUST', 'TELL', 'ME', 'LIKE', 'A', 'DEAR', 'HAS', 'NOT', 'HE', 'PROPOSED'] +3331-159605-0038-647: ref=["DON'T", 'YOU', 'THINK', 'HE', 'MEANS', 'TO'] +3331-159605-0038-647: hyp=["DON'T", 'YOU', 'THINK', 'HE', 'MEANS', 'TO'] +3331-159605-0039-648: ref=['TRULY', 'TRULY', 'FAN'] +3331-159605-0039-648: hyp=['TRULY', 'JULIE', 'FANN'] +3331-159605-0040-649: ref=['I', "DON'T", 'MEAN', 'TO', 'BE', 'PRYING', 'BUT', 'I', 'REALLY', 'THOUGHT', 'HE', 'DID'] +3331-159605-0040-649: hyp=['I', "DON'T", 'MEAN', 'TO', 'BE', 'PRYING', 'BUT', 'I', 'REALLY', 'THOUGHT', 'HE', 'DID'] +3331-159605-0041-650: ref=['WELL', 'I', 'ALWAYS', 'MEANT', 'TO', 'TRY', 'IT', 'IF', 'I', 'GOT', 'A', 'CHANCE', 'AND', 'I', 'HAVE'] +3331-159605-0041-650: hyp=['WELL', 'I', 'ALWAYS', 'MEANT', 'TO', 'TRY', 'IT', 'IF', 'I', 'GOT', 'A', 'CHANCE', 'AND', 'I', 'HAVE'] +3331-159605-0042-651: ref=['I', 'JUST', 'GAVE', 'HIM', 'A', 'HINT', 'AND', 'HE', 'TOOK', 'IT'] +3331-159605-0042-651: hyp=['I', 'JUST', 'GAVE', 'HIM', 'A', 'HINT', 'AND', 'HE', 'TOOK', 'IT'] +3331-159605-0043-652: ref=['HE', 'MEANT', 'TO', 'GO', 'AWAY', 'BEFORE', 'THAT', 'SO', "DON'T", 'THINK', 'HIS', 'HEART', 'IS', 'BROKEN', 'OR', 'MIND', 'WHAT', 'SILLY', 'TATTLERS', 'SAY'] +3331-159605-0043-652: hyp=['HE', 'MEANT', 'TO', 'GO', 'AWAY', 'BEFORE', 'THAT', 'SO', "DON'T", 'THINK', 'HIS', 'HEART', 'IS', 'BROKEN', 'OR', 'MIND', 'WHAT', 'SIDDY', 'TATLERS', 'SAY'] +3331-159605-0044-653: ref=['HE', 'UNDERSTOOD', 'AND', 'BEING', 'A', 'GENTLEMAN', 'MADE', 'NO', 'FUSS'] +3331-159605-0044-653: hyp=['HE', 'UNDERSTOOD', 'AND', 'BEING', 'A', 'GENTLEMAN', 'MADE', 'NO', 'FUSS'] +3331-159605-0045-654: ref=['BUT', 'POLLY', 'IT', 'WOULD', 'HAVE', 'BEEN', 'A', 'GRAND', 'THING', 'FOR', 'YOU'] +3331-159605-0045-654: hyp=['BUT', 'POLLY', 'IT', 'WOULD', 'HAVE', 'BEEN', 'A', 'GRAND', 'THING', 'FOR', 'YOU'] +3331-159605-0046-655: ref=['I', 'M', 'ODD', 'YOU', 'KNOW', 'AND', 'PREFER', 'TO', 'BE', 'AN', 'INDEPENDENT', 'SPINSTER', 'AND', 'TEACH', 'MUSIC', 'ALL', 'MY', 'DAYS'] +3331-159605-0046-655: hyp=["I'M", 'ART', 'YOU', 'KNOW', 'AND', 'PREFER', 'TO', 'BE', 'AN', 'INDEPENDENT', 'SPINSTER', 'AND', 'TEACH', 'MUSIC', 'ALL', 'MY', 'DAYS'] +3331-159609-0000-656: ref=['NEVER', 'MIND', 'WHAT', 'THE', 'BUSINESS', 'WAS', 'IT', 'SUFFICES', 'TO', 'SAY', 'THAT', 'IT', 'WAS', 'A', 'GOOD', 'BEGINNING', 'FOR', 'A', 'YOUNG', 'MAN', 'LIKE', 'TOM', 'WHO', 'HAVING', 'BEEN', 'BORN', 'AND', 'BRED', 'IN', 'THE', 'MOST', 'CONSERVATIVE', 'CLASS', 'OF', 'THE', 'MOST', 'CONCEITED', 'CITY', 'IN', 'NEW', 'ENGLAND', 'NEEDED', 'JUST', 'THE', 'HEALTHY', 'HEARTY', 'SOCIAL', 'INFLUENCES', 'OF', 'THE', 'WEST', 'TO', 'WIDEN', 'HIS', 'VIEWS', 'AND', 'MAKE', 'A', 'MAN', 'OF', 'HIM'] +3331-159609-0000-656: hyp=['NEVER', 'MIND', 'WHAT', 'THE', 'BUSINESS', 'WAS', 'IT', 'SUFFICES', 'TO', 'SAY', 'THAT', 'IT', 'WAS', 'A', 'GOOD', 'BEGINNING', 'FOR', 'A', 'YOUNG', 'MAN', 'LIKE', 'TOM', 'WHO', 'HAVING', 'BEEN', 'BORN', 'AND', 'BRED', 'IN', 'THE', 'MOST', 'CONSERVATIVE', 'CLASS', 'OF', 'THE', 'MOST', 'CONCEITED', 'CITY', 'IN', 'NEW', 'ENGLAND', 'NEEDED', 'JUST', 'THE', 'HEALTHY', 'HEARTY', 'SOCIAL', 'INFLUENCES', 'OF', 'THE', 'WEST', 'TO', 'WIDEN', 'HIS', 'VIEWS', 'AND', 'MAKE', 'A', 'MAN', 'OF', 'HIM'] +3331-159609-0001-657: ref=['FORTUNATELY', 'EVERY', 'ONE', 'WAS', 'SO', 'BUSY', 'WITH', 'THE', 'NECESSARY', 'PREPARATIONS', 'THAT', 'THERE', 'WAS', 'NO', 'TIME', 'FOR', 'ROMANCE', 'OF', 'ANY', 'SORT', 'AND', 'THE', 'FOUR', 'YOUNG', 'PEOPLE', 'WORKED', 'TOGETHER', 'AS', 'SOBERLY', 'AND', 'SENSIBLY', 'AS', 'IF', 'ALL', 'SORTS', 'OF', 'EMOTIONS', 'WERE', 'NOT', 'BOTTLED', 'UP', 'IN', 'THEIR', 'RESPECTIVE', 'HEARTS'] +3331-159609-0001-657: hyp=['FORTUNATELY', 'EVERY', 'ONE', 'WAS', 'SO', 'BUSY', 'WITH', 'THE', 'NECESSARY', 'PREPARATIONS', 'THAT', 'THERE', 'WAS', 'NO', 'TIME', 'FOR', 'ROMANCE', 'OF', 'ANY', 'SORT', 'AND', 'THE', 'FOUR', 'YOUNG', 'PEOPLE', 'WORKED', 'TOGETHER', 'AS', 'SOBERLY', 'AND', 'SENSIBLY', 'AS', 'IF', 'ALL', 'SORTS', 'OF', 'EMOTIONS', 'WERE', 'NOT', 'BOTTLED', 'UP', 'IN', 'THEIR', 'RESPECTIVE', 'HEARTS'] +3331-159609-0002-658: ref=['PITY', 'THAT', 'THE', 'END', 'SHOULD', 'COME', 'SO', 'SOON', 'BUT', 'THE', 'HOUR', 'DID', 'ITS', 'WORK', 'AND', 'WENT', 'ITS', 'WAY', 'LEAVING', 'A', 'CLEARER', 'ATMOSPHERE', 'BEHIND', 'THOUGH', 'THE', 'YOUNG', 'FOLKS', 'DID', 'NOT', 'SEE', 'IT', 'THEN', 'FOR', 'THEIR', 'EYES', 'WERE', 'DIM', 'BECAUSE', 'OF', 'THE', 'PARTINGS', 'THAT', 'MUST', 'BE'] +3331-159609-0002-658: hyp=['PITY', 'THAT', 'THE', 'END', 'SHOULD', 'COME', 'SO', 'SOON', 'BUT', 'THE', 'HOUR', 'DID', 'ITS', 'WORK', 'AND', 'WENT', 'ITS', 'WAY', 'LEAVING', 'A', 'CLEARER', 'ATMOSPHERE', 'BEHIND', 'THOUGH', 'THE', 'YOUNG', 'FOLKS', 'DID', 'NOT', 'SEE', 'IT', 'THEN', 'FOR', 'THEIR', 'EYES', 'WERE', 'DIM', 'BECAUSE', 'OF', 'THE', 'PARTINGS', 'THAT', 'MUST', 'BE'] +3331-159609-0003-659: ref=['IF', 'IT', 'HAD', 'NOT', 'BEEN', 'FOR', 'TWO', 'THINGS', 'I', 'FEAR', 'SHE', 'NEVER', 'WOULD', 'HAVE', 'STOOD', 'A', 'SUMMER', 'IN', 'TOWN', 'BUT', 'SYDNEY', 'OFTEN', 'CALLED', 'TILL', 'HIS', 'VACATION', 'CAME', 'AND', 'A', 'VOLUMINOUS', 'CORRESPONDENCE', 'WITH', 'POLLY', 'BEGUILED', 'THE', 'LONG', 'DAYS'] +3331-159609-0003-659: hyp=['IF', 'IT', 'HAD', 'NOT', 'BEEN', 'FOR', 'TWO', 'THINGS', 'I', 'FEAR', 'SHE', 'NEVER', 'WOULD', 'HAVE', 'STOOD', 'A', 'SUMMER', 'IN', 'TOWN', 'BUT', 'SYDNEY', 'OFTEN', 'CALLED', 'TILL', 'HIS', 'VACATION', 'CAME', 'AND', 'A', 'VOLUMINOUS', 'CORRESPONDENCE', 'WITH', 'POLLY', 'BEGUILED', 'THE', 'LONG', 'DAYS'] +3331-159609-0004-660: ref=['TOM', 'WROTE', 'ONCE', 'A', 'WEEK', 'TO', 'HIS', 'MOTHER', 'BUT', 'THE', 'LETTERS', 'WERE', 'SHORT', 'AND', 'NOT', 'VERY', 'SATISFACTORY', 'FOR', 'MEN', 'NEVER', 'DO', 'TELL', 'THE', 'INTERESTING', 'LITTLE', 'THINGS', 'THAT', 'WOMEN', 'BEST', 'LIKE', 'TO', 'HEAR'] +3331-159609-0004-660: hyp=['TOM', 'WROTE', 'ONCE', 'A', 'WEEK', 'TO', 'HIS', 'MOTHER', 'BUT', 'THE', 'LETTERS', 'WERE', 'SHORT', 'AND', 'NOT', 'VERY', 'SATISFACTORY', 'FOR', 'MEN', 'NEVER', 'DO', 'TELL', 'THE', 'INTERESTING', 'LITTLE', 'THINGS', 'THAT', 'WOMEN', 'BEST', 'LIKE', 'TO', 'HEAR'] +3331-159609-0005-661: ref=['NO', 'I', 'M', 'ONLY', 'TIRED', 'HAD', 'A', 'GOOD', 'DEAL', 'TO', 'DO', 'LATELY', 'AND', 'THE', 'DULL', 'WEATHER', 'MAKES', 'ME', 'JUST', 'A', 'TRIFLE', 'BLUE'] +3331-159609-0005-661: hyp=['NOW', 'I', 'AM', 'ONLY', 'TIRED', 'HAD', 'A', 'GOOD', 'DEAL', 'TO', 'DO', 'LATELY', 'AND', 'THE', 'DULL', 'WEATHER', 'MAKES', 'ME', 'JUST', 'A', 'TRAVEL', 'BLUE'] +3331-159609-0006-662: ref=['FORGIVE', 'ME', 'POLLY', 'BUT', 'I', "CAN'T", 'HELP', 'SAYING', 'IT', 'FOR', 'IT', 'IS', 'THERE', 'AND', 'I', 'WANT', 'TO', 'BE', 'AS', 'TRUE', 'TO', 'YOU', 'AS', 'YOU', 'WERE', 'TO', 'ME', 'IF', 'I', 'CAN'] +3331-159609-0006-662: hyp=['FORGIVE', 'ME', 'POLLY', 'BUT', 'I', "CAN'T", 'HELP', 'SAYING', 'IT', 'FOR', 'IT', 'IS', 'THERE', 'AND', 'I', 'WANT', 'TO', 'BE', 'AS', 'TRUE', 'TO', 'YOU', 'AS', 'YOU', 'WERE', 'TO', 'ME', 'IF', 'I', 'CAN'] +3331-159609-0007-663: ref=['I', 'TRY', 'NOT', 'TO', 'DECEIVE', 'MYSELF', 'BUT', 'IT', 'DOES', 'SEEM', 'AS', 'IF', 'THERE', 'WAS', 'A', 'CHANCE', 'OF', 'HAPPINESS', 'FOR', 'ME'] +3331-159609-0007-663: hyp=['I', 'TRY', 'NOT', 'TO', 'DECEIVE', 'MYSELF', 'BUT', 'IT', 'DOES', 'SEEM', 'AS', 'IF', 'THERE', 'WAS', 'A', 'CHANCE', 'OF', 'HAPPINESS', 'FOR', 'ME'] +3331-159609-0008-664: ref=['THANK', 'HEAVEN', 'FOR', 'THAT'] +3331-159609-0008-664: hyp=['THANK', 'HEAVEN', 'FOR', 'THAT'] +3331-159609-0009-665: ref=['CRIED', 'POLLY', 'WITH', 'THE', 'HEARTIEST', 'SATISFACTION', 'IN', 'HER', 'VOICE'] +3331-159609-0009-665: hyp=['CRIED', 'POLLY', 'WITH', 'THE', 'HEARTIEST', 'SATISFACTION', 'IN', 'HER', 'VOICE'] +3331-159609-0010-666: ref=['POOR', 'POLLY', 'WAS', 'SO', 'TAKEN', 'BY', 'SURPRISE', 'THAT', 'SHE', 'HAD', 'NOT', 'A', 'WORD', 'TO', 'SAY'] +3331-159609-0010-666: hyp=['POOR', 'POLLY', 'WAS', 'SO', 'TAKEN', 'BY', 'SURPRISE', 'THAT', 'SHE', 'HAD', 'NOT', 'A', 'WORD', 'TO', 'SAY'] +3331-159609-0011-667: ref=['NONE', 'WERE', 'NEEDED', 'HER', 'TELLTALE', 'FACE', 'ANSWERED', 'FOR', 'HER', 'AS', 'WELL', 'AS', 'THE', 'IMPULSE', 'WHICH', 'MADE', 'HER', 'HIDE', 'HER', 'HEAD', 'IN', 'THE', 'SOFA', 'CUSHION', 'LIKE', 'A', 'FOOLISH', 'OSTRICH', 'WHEN', 'THE', 'HUNTERS', 'ARE', 'AFTER', 'IT'] +3331-159609-0011-667: hyp=['NONE', 'WERE', 'NEEDED', 'HER', 'TELLTALE', 'FACE', 'ANSWERED', 'FOR', 'HER', 'AS', 'WELL', 'AS', 'THE', 'IMPULSE', 'WHICH', 'MADE', 'HER', 'HIDE', 'HER', 'HEAD', 'IN', 'THE', 'SOFA', 'CUSHION', 'LIKE', 'A', 'FOOLISH', 'OSTRICH', 'WHEN', 'THE', 'HANDERS', 'ARE', 'AFTER', 'IT'] +3331-159609-0012-668: ref=['ONCE', 'OR', 'TWICE', 'BUT', 'SORT', 'OF', 'JOKINGLY', 'AND', 'I', 'THOUGHT', 'IT', 'WAS', 'ONLY', 'SOME', 'LITTLE', 'FLIRTATION'] +3331-159609-0012-668: hyp=['ONCE', 'OR', 'TWICE', 'BUT', 'SORT', 'OF', 'CHOKINGLY', 'AND', 'I', 'THOUGHT', 'IT', 'WAS', 'ONLY', 'SOME', 'LITTLE', 'FLIRTATION'] +3331-159609-0013-669: ref=['IT', 'WAS', 'SO', 'STUPID', 'OF', 'ME', 'NOT', 'TO', 'GUESS', 'BEFORE'] +3331-159609-0013-669: hyp=['IT', 'WAS', 'SO', 'STUPID', 'OF', 'ME', 'NOT', 'TO', 'GUESS', 'BEFORE'] +3331-159609-0014-670: ref=['IT', 'WAS', 'SO', 'TENDER', 'EARNEST', 'AND', 'DEFIANT', 'THAT', 'FANNY', 'FORGOT', 'THE', 'DEFENCE', 'OF', 'HER', 'OWN', 'LOVER', 'IN', 'ADMIRATION', 'OF', "POLLY'S", 'LOYALTY', 'TO', 'HERS', 'FOR', 'THIS', 'FAITHFUL', 'ALL', 'ABSORBING', 'LOVE', 'WAS', 'A', 'NEW', 'REVELATION', 'TO', 'FANNY', 'WHO', 'WAS', 'USED', 'TO', 'HEARING', 'HER', 'FRIENDS', 'BOAST', 'OF', 'TWO', 'OR', 'THREE', 'LOVERS', 'A', 'YEAR', 'AND', 'CALCULATE', 'THEIR', 'RESPECTIVE', 'VALUES', 'WITH', 'ALMOST', 'AS', 'MUCH', 'COOLNESS', 'AS', 'THE', 'YOUNG', 'MEN', 'DISCUSSED', 'THE', 'FORTUNES', 'OF', 'THE', 'GIRLS', 'THEY', 'WISHED', 'FOR', 'BUT', 'COULD', 'NOT', 'AFFORD', 'TO', 'MARRY'] +3331-159609-0014-670: hyp=['IT', 'WAS', 'SO', 'TENDER', 'EARNEST', 'AND', 'DEFIANT', 'THAT', 'FANNY', 'FORGOT', 'THE', 'DEFENCE', 'OF', 'HER', 'OWN', 'LOVER', 'IN', 'ADMIRATION', 'OF', "POLLY'S", 'LOYALTY', 'TO', 'HERS', 'FOR', 'THIS', 'FAITHFUL', 'ALL', 'ABSORBING', 'LOVE', 'WAS', 'A', 'NEW', 'REVELATION', 'TO', 'FANNY', 'WHO', 'WAS', 'USED', 'TO', 'HEARING', 'HER', 'FRIENDS', 'BOAST', 'OF', 'TWO', 'OR', 'THREE', 'LOVERS', 'A', 'YEAR', 'AND', 'CALCULATE', 'THEIR', 'RESPECTIVE', 'VALUES', 'WITH', 'ALMOST', 'AS', 'MUCH', 'COOLNESS', 'AS', 'THE', 'YOUNG', 'MEN', 'DISCUSSED', 'THE', 'FORTUNES', 'OF', 'THE', 'GIRLS', 'THEY', 'WISHED', 'FOR', 'BUT', 'COULD', 'NOT', 'AFFORD', 'TO', 'MARRY'] +3331-159609-0015-671: ref=['I', 'HOPE', 'MARIA', 'BAILEY', 'IS', 'ALL', 'HE', 'THINKS', 'HER', 'SHE', 'ADDED', 'SOFTLY', 'FOR', 'I', 'COULD', "N'T", 'BEAR', 'TO', 'HAVE', 'HIM', 'DISAPPOINTED', 'AGAIN'] +3331-159609-0015-671: hyp=['I', 'HOPE', 'MARIA', 'BAILEY', 'IS', 'ALL', 'HE', 'THINKS', 'HER', 'SHE', 'ADDED', 'SOFTLY', 'FOR', 'I', 'COULD', 'NOT', 'BEAR', 'TO', 'HAVE', 'HIM', 'DISAPPOINTED', 'AGAIN'] +3331-159609-0016-672: ref=['SAID', 'FANNY', 'TURNING', 'HOPEFUL', 'ALL', 'AT', 'ONCE'] +3331-159609-0016-672: hyp=['SAID', 'FANNY', 'TURNING', 'HOPEFUL', 'ALL', 'AT', 'ONCE'] +3331-159609-0017-673: ref=['SUPPOSE', 'I', 'SAY', 'A', 'WORD', 'TO', 'TOM', 'JUST', 'INQUIRE', 'AFTER', 'HIS', 'HEART', 'IN', 'A', 'GENERAL', 'WAY', 'YOU', 'KNOW', 'AND', 'GIVE', 'HIM', 'A', 'CHANCE', 'TO', 'TELL', 'ME', 'IF', 'THERE', 'IS', 'ANYTHING', 'TO', 'TELL'] +3331-159609-0017-673: hyp=['SUPPOSE', 'I', 'SAY', 'A', 'WORD', 'TO', 'TOM', 'JUST', 'INQUIRE', 'AFTER', 'HIS', 'HEART', 'IN', 'A', 'GENERAL', 'WAY', 'YOU', 'KNOW', 'AND', 'GIVE', 'HIM', 'A', 'CHANCE', 'TO', 'TELL', 'ME', 'IF', "THERE'S", 'ANYTHING', 'TO', 'TELL'] +3331-159609-0018-674: ref=['BEAR', 'IT', 'PEOPLE', 'ALWAYS', 'DO', 'BEAR', 'THINGS', 'SOMEHOW', 'ANSWERED', 'POLLY', 'LOOKING', 'AS', 'IF', 'SENTENCE', 'HAD', 'BEEN', 'PASSED', 'UPON', 'HER'] +3331-159609-0018-674: hyp=['BEAR', 'IT', 'PEOPLE', 'ALWAYS', 'DO', 'BARE', 'THINGS', 'SOMEHOW', 'ANSWERED', 'POLLY', 'LOOKING', 'AS', 'IF', 'SENTENCE', 'HAD', 'BEEN', 'PASSED', 'UPON', 'HER'] +3331-159609-0019-675: ref=['IT', 'WAS', 'A', 'VERY', 'DIFFERENT', 'WINTER', 'FROM', 'THE', 'LAST', 'FOR', 'BOTH', 'THE', 'GIRLS'] +3331-159609-0019-675: hyp=['IT', 'WAS', 'A', 'VERY', 'DIFFERENT', 'WINTER', 'FROM', 'THE', 'LAST', 'FOR', 'BOTH', 'THE', 'GIRLS'] +3331-159609-0020-676: ref=['IF', 'FANNY', 'WANTED', 'TO', 'SHOW', 'HIM', 'WHAT', 'SHE', 'COULD', 'DO', 'TOWARD', 'MAKING', 'A', 'PLEASANT', 'HOME', 'SHE', 'CERTAINLY', 'SUCCEEDED', 'BETTER', 'THAN', 'SHE', 'SUSPECTED', 'FOR', 'IN', 'SPITE', 'OF', 'MANY', 'FAILURES', 'AND', 'DISCOURAGEMENTS', 'BEHIND', 'THE', 'SCENES', 'THE', 'LITTLE', 'HOUSE', 'BECAME', 'A', 'MOST', 'ATTRACTIVE', 'PLACE', 'TO', 'MISTER', 'SYDNEY', 'AT', 'LEAST', 'FOR', 'HE', 'WAS', 'MORE', 'THE', 'HOUSE', 'FRIEND', 'THAN', 'EVER', 'AND', 'SEEMED', 'DETERMINED', 'TO', 'PROVE', 'THAT', 'CHANGE', 'OF', 'FORTUNE', 'MADE', 'NO', 'DIFFERENCE', 'TO', 'HIM'] +3331-159609-0020-676: hyp=['IF', 'FANNY', 'WANTED', 'TO', 'SHOW', 'HIM', 'WHAT', 'SHE', 'COULD', 'DO', 'TOWARD', 'MAKING', 'A', 'PLEASANT', 'HOME', 'SHE', 'CERTAINLY', 'SUCCEEDED', 'BETTER', 'THAN', 'SHE', 'SUSPECTED', 'FOR', 'IN', 'SPITE', 'OF', 'MANY', 'FAILURES', 'AND', 'DISCOURAGEMENTS', 'BEHIND', 'THE', 'SCENES', 'THE', 'LITTLE', 'HOUSE', 'BECAME', 'A', 'MOST', 'ATTRACTIVE', 'PLACE', 'TO', 'MISTER', 'SYDNEY', 'AT', 'LEAST', 'FOR', 'HE', 'WAS', 'MORE', 'THE', 'HOUSE', 'FRIEND', 'THAN', 'EVER', 'AND', 'SEEMED', 'DETERMINED', 'TO', 'PROVE', 'THAT', 'CHANGE', 'OF', 'FORTUNE', 'MADE', 'NO', 'DIFFERENCE', 'TO', 'HIM'] +3331-159609-0021-677: ref=['SHE', 'KEPT', 'MUCH', 'AT', 'HOME', 'WHEN', 'THE', "DAY'S", 'WORK', 'WAS', 'DONE', 'FINDING', 'IT', 'PLEASANTER', 'TO', 'SIT', 'DREAMING', 'OVER', 'BOOK', 'OR', 'SEWING', 'ALONE', 'THAN', 'TO', 'EXERT', 'HERSELF', 'EVEN', 'TO', 'GO', 'TO', 'THE', 'SHAWS'] +3331-159609-0021-677: hyp=['SHE', 'KEPT', 'MUCH', 'AT', 'HOME', 'WHEN', 'THE', "DAY'S", 'WORK', 'WAS', 'DONE', 'FINDING', 'IT', 'PLEASANTER', 'TO', 'SIT', 'DREAMING', 'OF', 'A', 'BOOK', 'OR', 'SEWING', 'ALONE', 'THAN', 'TO', 'EXERT', 'HERSELF', 'EVEN', 'TO', 'GO', 'TO', 'THE', 'SHORES'] +3331-159609-0022-678: ref=['POLLY', 'WAS', 'NOT', 'AT', 'ALL', 'LIKE', 'HERSELF', 'THAT', 'WINTER', 'AND', 'THOSE', 'NEAREST', 'TO', 'HER', 'SAW', 'AND', 'WONDERED', 'AT', 'IT', 'MOST'] +3331-159609-0022-678: hyp=['POLLY', 'WAS', 'NOT', 'AT', 'ALL', 'LIKE', 'HERSELF', 'THAT', 'WINTER', 'AND', 'THOSE', 'NEAREST', 'TO', 'HER', 'SAW', 'AND', 'WONDERED', 'AT', 'IT', 'MOST'] +3331-159609-0023-679: ref=['FOR', 'NED', 'WAS', 'SO', 'ABSORBED', 'IN', 'BUSINESS', 'THAT', 'HE', 'IGNORED', 'THE', 'WHOLE', 'BAILEY', 'QUESTION', 'AND', 'LEFT', 'THEM', 'IN', 'UTTER', 'DARKNESS'] +3331-159609-0023-679: hyp=['FOR', 'NED', 'WAS', 'SO', 'ABSORBED', 'IN', 'BUSINESS', 'THAT', 'HE', 'IGNORED', 'THE', 'WHOLE', 'BAILEY', 'QUESTION', 'AND', 'LEFT', 'THEM', 'IN', 'UTTER', 'DARKNESS'] +3331-159609-0024-680: ref=['FANNY', 'CAME', 'WALKING', 'IN', 'UPON', 'HER', 'ONE', 'DAY', 'LOOKING', 'AS', 'IF', 'SHE', 'BROUGHT', 'TIDINGS', 'OF', 'SUCH', 'GREAT', 'JOY', 'THAT', 'SHE', 'HARDLY', 'KNEW', 'HOW', 'TO', 'TELL', 'THEM'] +3331-159609-0024-680: hyp=['FANNY', 'CAME', 'WALKING', 'IN', 'UPON', 'HER', 'ONE', 'DAY', 'LOOKING', 'AS', 'IF', 'SHE', 'BROUGHT', 'TIDINGS', 'OF', 'SUCH', 'GREAT', 'JOY', 'THAT', 'SHE', 'HARDLY', 'KNEW', 'HOW', 'TO', 'TELL', 'THEM'] +3331-159609-0025-681: ref=['BUT', 'IF', 'WORK', 'BASKETS', 'WERE', 'GIFTED', 'WITH', 'POWERS', 'OF', 'SPEECH', 'THEY', 'COULD', 'TELL', 'STORIES', 'MORE', 'TRUE', 'AND', 'TENDER', 'THAN', 'ANY', 'WE', 'READ'] +3331-159609-0025-681: hyp=['BUT', 'IF', 'WORK', 'BASKETS', 'WERE', 'GIFTED', 'WITH', 'POWERS', 'OF', 'SPEECH', 'THEY', 'COULD', 'TELL', 'STORIES', 'MORE', 'TRUE', 'AND', 'TENDER', 'THAN', 'ANY', 'WE', 'READ'] +3528-168656-0000-682: ref=['SHE', 'HAD', 'EVEN', 'BEEN', 'IN', 'SOCIETY', 'BEFORE', 'THE', 'REVOLUTION'] +3528-168656-0000-682: hyp=['SHE', 'HAD', 'EVEN', 'BEEN', 'IN', 'SOCIETY', 'BEFORE', 'THE', 'REVOLUTION'] +3528-168656-0001-683: ref=['IT', 'WAS', 'HER', 'PLEASURE', 'AND', 'HER', 'VANITY', 'TO', 'DRAG', 'IN', 'THESE', 'NAMES', 'ON', 'EVERY', 'PRETEXT'] +3528-168656-0001-683: hyp=['IT', 'WAS', 'HER', 'PLEASURE', 'AND', 'HER', 'VANITY', 'TO', 'DRAG', 'IN', 'THESE', 'NAMES', 'ON', 'EVERY', 'PRETEXT'] +3528-168656-0002-684: ref=['EVERY', 'YEAR', 'SHE', 'SOLEMNLY', 'RENEWED', 'HER', 'VOWS', 'AND', 'AT', 'THE', 'MOMENT', 'OF', 'TAKING', 'THE', 'OATH', 'SHE', 'SAID', 'TO', 'THE', 'PRIEST', 'MONSEIGNEUR', 'SAINT', 'FRANCOIS', 'GAVE', 'IT', 'TO', 'MONSEIGNEUR', 'SAINT', 'JULIEN', 'MONSEIGNEUR', 'SAINT', 'JULIEN', 'GAVE', 'IT', 'TO', 'MONSEIGNEUR', 'SAINT', 'EUSEBIUS', 'MONSEIGNEUR', 'SAINT', 'EUSEBIUS', 'GAVE', 'IT', 'TO', 'MONSEIGNEUR', 'SAINT', 'PROCOPIUS', 'ET', 'CETERA', 'ET', 'CETERA'] +3528-168656-0002-684: hyp=['EVERY', 'YEAR', 'SHE', 'SOLEMNLY', 'RENEWED', 'HER', 'VOWS', 'AND', 'AT', 'THE', 'MOMENT', 'OF', 'TAKING', 'THE', 'OATH', 'SHE', 'SAID', 'TO', 'THE', 'PRIEST', 'MONSEIGNEUR', 'SAINT', 'FRANCOIS', 'GAVE', 'IT', 'TO', 'MONSEIGNOR', 'SAINT', 'JULIAN', 'MONSEIGNOR', 'SAINT', 'JULIAN', 'GAVE', 'IT', 'TO', 'MONSEIGNEUR', 'SAINT', 'CUPIUS', 'A', 'SIGNOR', 'SAINT', 'JOSEPIUS', 'GAVE', 'IT', 'MONSEIGNOR', 'SAINT', 'PROCOPIUS', 'ET', 'CETERA', 'ET', 'CETERA'] +3528-168656-0003-685: ref=['AND', 'THE', 'SCHOOL', 'GIRLS', 'WOULD', 'BEGIN', 'TO', 'LAUGH', 'NOT', 'IN', 'THEIR', 'SLEEVES', 'BUT', 'UNDER', 'THEIR', 'VEILS', 'CHARMING', 'LITTLE', 'STIFLED', 'LAUGHS', 'WHICH', 'MADE', 'THE', 'VOCAL', 'MOTHERS', 'FROWN'] +3528-168656-0003-685: hyp=['AND', 'THE', 'SCHOOLGIRLS', 'WOULD', 'BEGIN', 'TO', 'LAUGH', 'NOT', 'IN', 'THEIR', 'SLEEVES', 'BUT', 'UNDER', 'THEIR', 'VEILS', 'CHARMING', 'LITTLE', 'STIFLED', 'LAUGHS', 'WHICH', 'MADE', 'THE', 'VOCAL', 'MOTHERS', 'FROWN'] +3528-168656-0004-686: ref=['IT', 'WAS', 'A', 'CENTURY', 'WHICH', 'SPOKE', 'THROUGH', 'HER', 'BUT', 'IT', 'WAS', 'THE', 'EIGHTEENTH', 'CENTURY'] +3528-168656-0004-686: hyp=['IT', 'WAS', 'A', 'CENTURY', 'WHICH', 'SPOKE', 'THROUGH', 'HER', 'BUT', 'IT', 'WAS', 'THE', 'EIGHTEENTH', 'CENTURY'] +3528-168656-0005-687: ref=['THE', 'RULE', 'OF', 'FONTEVRAULT', 'DID', 'NOT', 'FORBID', 'THIS'] +3528-168656-0005-687: hyp=['THE', 'RULE', 'OF', 'FONTREVALLE', 'DID', 'NOT', 'FORBID', 'THIS'] +3528-168656-0006-688: ref=['SHE', 'WOULD', 'NOT', 'SHOW', 'THIS', 'OBJECT', 'TO', 'ANYONE'] +3528-168656-0006-688: hyp=['SHE', 'WOULD', 'NOT', 'SHOW', 'THE', 'SUBJECT', 'TO', 'ANY', 'ONE'] +3528-168656-0007-689: ref=['THUS', 'IT', 'FURNISHED', 'A', 'SUBJECT', 'OF', 'COMMENT', 'FOR', 'ALL', 'THOSE', 'WHO', 'WERE', 'UNOCCUPIED', 'OR', 'BORED', 'IN', 'THE', 'CONVENT'] +3528-168656-0007-689: hyp=['THUS', 'IT', 'FURNISHED', 'A', 'SUBJECT', 'OF', 'COMMENT', 'FOR', 'ALL', 'THOSE', 'WHO', 'WERE', 'UNOCCUPIED', 'OR', 'BORED', 'IN', 'THE', 'CONVENT'] +3528-168656-0008-690: ref=['SOME', 'UNIQUE', 'CHAPLET', 'SOME', 'AUTHENTIC', 'RELIC'] +3528-168656-0008-690: hyp=['SOME', 'UNIQUE', 'CHAPLET', 'SOME', 'AUTHENTIC', 'RELIC'] +3528-168656-0009-691: ref=['THEY', 'LOST', 'THEMSELVES', 'IN', 'CONJECTURES'] +3528-168656-0009-691: hyp=['THEY', 'LOST', 'THEMSELVES', 'IN', 'CONJECTURES'] +3528-168656-0010-692: ref=['WHEN', 'THE', 'POOR', 'OLD', 'WOMAN', 'DIED', 'THEY', 'RUSHED', 'TO', 'HER', 'CUPBOARD', 'MORE', 'HASTILY', 'THAN', 'WAS', 'FITTING', 'PERHAPS', 'AND', 'OPENED', 'IT'] +3528-168656-0010-692: hyp=['WHEN', 'THE', 'POOR', 'OLD', 'WOMAN', 'DIED', 'THEY', 'RUSHED', 'TO', 'HER', 'CUPBOARD', 'MORE', 'HASTILY', 'THAN', 'WAS', 'FITTING', 'PERHAPS', 'AND', 'OPENED', 'IT'] +3528-168656-0011-693: ref=['HE', 'IS', 'RESISTING', 'FLUTTERING', 'HIS', 'TINY', 'WINGS', 'AND', 'STILL', 'MAKING', 'AN', 'EFFORT', 'TO', 'FLY', 'BUT', 'THE', 'DANCER', 'IS', 'LAUGHING', 'WITH', 'A', 'SATANICAL', 'AIR'] +3528-168656-0011-693: hyp=['HE', 'IS', 'RESISTING', 'FLUTTERING', 'HIS', 'TINY', 'WINGS', 'AND', 'STILL', 'MAKING', 'AN', 'EFFORT', 'TO', 'FLY', 'BUT', 'THE', 'DANCER', 'IS', 'LAUGHING', 'WITH', 'A', 'SATANICAL', 'AIR'] +3528-168656-0012-694: ref=['MORAL', 'LOVE', 'CONQUERED', 'BY', 'THE', 'COLIC'] +3528-168656-0012-694: hyp=['MORAL', 'LOVE', 'CONQUERED', 'BY', 'THE', 'COLIC'] +3528-168669-0000-695: ref=['THE', 'PRIORESS', 'RETURNED', 'AND', 'SEATED', 'HERSELF', 'ONCE', 'MORE', 'ON', 'HER', 'CHAIR'] +3528-168669-0000-695: hyp=['THE', 'PRIORESS', 'RETURNED', 'AND', 'SEATED', 'HERSELF', 'ONCE', 'MORE', 'ON', 'HER', 'CHAIR'] +3528-168669-0001-696: ref=['WE', 'WILL', 'PRESENT', 'A', 'STENOGRAPHIC', 'REPORT', 'OF', 'THE', 'DIALOGUE', 'WHICH', 'THEN', 'ENSUED', 'TO', 'THE', 'BEST', 'OF', 'OUR', 'ABILITY'] +3528-168669-0001-696: hyp=['WE', 'WILL', 'PRESENT', 'A', 'STENOGRAPHIC', 'REPORT', 'OF', 'THE', 'DIALOGUE', 'WHICH', 'THEN', 'ENSUED', 'TO', 'THE', 'BEST', 'OF', 'OUR', 'ABILITY'] +3528-168669-0002-697: ref=['FATHER', 'FAUVENT'] +3528-168669-0002-697: hyp=['FATHER', 'FAUVENT'] +3528-168669-0003-698: ref=['REVEREND', 'MOTHER', 'DO', 'YOU', 'KNOW', 'THE', 'CHAPEL'] +3528-168669-0003-698: hyp=['REVEREND', 'MOTHER', 'DO', 'YOU', 'KNOW', 'THE', 'CHAPEL'] +3528-168669-0004-699: ref=['AND', 'YOU', 'HAVE', 'BEEN', 'IN', 'THE', 'CHOIR', 'IN', 'PURSUANCE', 'OF', 'YOUR', 'DUTIES', 'TWO', 'OR', 'THREE', 'TIMES'] +3528-168669-0004-699: hyp=['AND', 'YOU', 'HAVE', 'BEEN', 'IN', 'THE', 'CHOIR', 'IN', 'PURSUANCE', 'OF', 'YOUR', 'DUTIES', 'TWO', 'OR', 'THREE', 'TIMES'] +3528-168669-0005-700: ref=['THERE', 'IS', 'A', 'STONE', 'TO', 'BE', 'RAISED', 'HEAVY'] +3528-168669-0005-700: hyp=['THERE', 'IS', 'A', 'STONE', 'TO', 'BE', 'RAISED', 'HEAVY'] +3528-168669-0006-701: ref=['THE', 'SLAB', 'OF', 'THE', 'PAVEMENT', 'WHICH', 'IS', 'AT', 'THE', 'SIDE', 'OF', 'THE', 'ALTAR'] +3528-168669-0006-701: hyp=['THIS', 'SLAB', 'OF', 'THE', 'PAVEMENT', 'WHICH', 'IS', 'AT', 'THE', 'SIDE', 'OF', 'THE', 'ALTAR'] +3528-168669-0007-702: ref=['THE', 'SLAB', 'WHICH', 'CLOSES', 'THE', 'VAULT', 'YES'] +3528-168669-0007-702: hyp=['THE', 'SLAP', 'WHICH', 'CLOSES', 'THE', 'VAULT', 'YES'] +3528-168669-0008-703: ref=['IT', 'WOULD', 'BE', 'A', 'GOOD', 'THING', 'TO', 'HAVE', 'TWO', 'MEN', 'FOR', 'IT'] +3528-168669-0008-703: hyp=['IT', 'WOULD', 'BE', 'A', 'GOOD', 'THING', 'TO', 'HAVE', 'TWO', 'MEN', 'FOR', 'IT'] +3528-168669-0009-704: ref=['A', 'WOMAN', 'IS', 'NEVER', 'A', 'MAN'] +3528-168669-0009-704: hyp=['A', 'WOMAN', 'IS', 'NEVER', 'A', 'MAN'] +3528-168669-0010-705: ref=['BECAUSE', 'DOM', 'MABILLON', 'GIVES', 'FOUR', 'HUNDRED', 'AND', 'SEVENTEEN', 'EPISTLES', 'OF', 'SAINT', 'BERNARD', 'WHILE', 'MERLONUS', 'HORSTIUS', 'ONLY', 'GIVES', 'THREE', 'HUNDRED', 'AND', 'SIXTY', 'SEVEN', 'I', 'DO', 'NOT', 'DESPISE', 'MERLONUS', 'HORSTIUS', 'NEITHER', 'DO', 'I'] +3528-168669-0010-705: hyp=['BECAUSE', 'DONEBYON', 'GIVES', 'FOUR', 'HUNDRED', 'AND', 'SEVENTEEN', 'EPISTLES', 'OF', 'SAINT', 'BERNARD', 'WHILE', "MERELONA'S", 'HORSES', 'ONLY', 'GIVES', 'THREE', 'HUNDRED', 'SIXTY', 'SEVEN', 'I', 'DO', 'NOT', 'DESPISE', 'MELONUS', 'HORSES', 'NEITHER', 'DO', 'I'] +3528-168669-0011-706: ref=['MERIT', 'CONSISTS', 'IN', 'WORKING', 'ACCORDING', 'TO', "ONE'S", 'STRENGTH', 'A', 'CLOISTER', 'IS', 'NOT', 'A', 'DOCK', 'YARD'] +3528-168669-0011-706: hyp=['MARRIAGE', 'CONSISTS', 'IN', 'WORKING', 'ACCORDING', 'TO', "ONE'S", 'STRENGTH', 'A', 'CLOISTER', 'IS', 'NOT', 'A', 'DOCKYARD'] +3528-168669-0012-707: ref=['AND', 'A', 'WOMAN', 'IS', 'NOT', 'A', 'MAN', 'BUT', 'MY', 'BROTHER', 'IS', 'THE', 'STRONG', 'ONE', 'THOUGH'] +3528-168669-0012-707: hyp=['AND', 'A', 'WOMAN', 'IS', 'NOT', 'A', 'MAN', 'BUT', 'MY', 'BROTHER', 'IS', 'THE', 'STRONG', 'ONE', 'THOUGH'] +3528-168669-0013-708: ref=['AND', 'CAN', 'YOU', 'GET', 'A', 'LEVER'] +3528-168669-0013-708: hyp=['AND', 'CAN', 'YOU', 'GET', 'A', 'LOVER'] +3528-168669-0014-709: ref=['THERE', 'IS', 'A', 'RING', 'IN', 'THE', 'STONE'] +3528-168669-0014-709: hyp=['THERE', 'IS', 'A', 'RING', 'IN', 'THE', 'STONE'] +3528-168669-0015-710: ref=['I', 'WILL', 'PUT', 'THE', 'LEVER', 'THROUGH', 'IT'] +3528-168669-0015-710: hyp=['I', 'WILL', 'PUT', 'THE', 'LOVER', 'THROUGH', 'IT'] +3528-168669-0016-711: ref=['THAT', 'IS', 'GOOD', 'REVEREND', 'MOTHER', 'I', 'WILL', 'OPEN', 'THE', 'VAULT'] +3528-168669-0016-711: hyp=['THAT', 'IS', 'GOOD', 'REVEREND', 'MOTHER', 'I', 'WILL', 'OPEN', 'THE', 'VAULT'] +3528-168669-0017-712: ref=['WILL', 'THAT', 'BE', 'ALL', 'NO'] +3528-168669-0017-712: hyp=['WILL', 'THAT', 'BE', 'ALL', 'NO'] +3528-168669-0018-713: ref=['GIVE', 'ME', 'YOUR', 'ORDERS', 'VERY', 'REVEREND', 'MOTHER'] +3528-168669-0018-713: hyp=['GIVE', 'ME', 'YOUR', 'ORDERS', 'VERY', 'REVEREND', 'MOTHER'] +3528-168669-0019-714: ref=['FAUVENT', 'WE', 'HAVE', 'CONFIDENCE', 'IN', 'YOU'] +3528-168669-0019-714: hyp=['FOR', 'THAT', 'WE', 'HAVE', 'CONFIDENCE', 'IN', 'YOU'] +3528-168669-0020-715: ref=['I', 'AM', 'HERE', 'TO', 'DO', 'ANYTHING', 'YOU', 'WISH'] +3528-168669-0020-715: hyp=['I', 'AM', 'HERE', 'TO', 'DO', 'ANYTHING', 'YOU', 'WISH'] +3528-168669-0021-716: ref=['AND', 'TO', 'HOLD', 'YOUR', 'PEACE', 'ABOUT', 'EVERYTHING', 'YES', 'REVEREND', 'MOTHER'] +3528-168669-0021-716: hyp=['AND', 'TO', 'HOLD', 'YOUR', 'PEACE', 'ABOUT', 'EVERYTHING', 'YES', 'REVERED', 'MOTHER'] +3528-168669-0022-717: ref=['WHEN', 'THE', 'VAULT', 'IS', 'OPEN', 'I', 'WILL', 'CLOSE', 'IT', 'AGAIN'] +3528-168669-0022-717: hyp=['WHEN', 'THE', 'VAULT', 'IS', 'OPEN', 'I', 'WILL', 'CLOSE', 'IT', 'AGAIN'] +3528-168669-0023-718: ref=['BUT', 'BEFORE', 'THAT', 'WHAT', 'REVEREND', 'MOTHER'] +3528-168669-0023-718: hyp=['BUT', 'BEFORE', 'THAT', 'WHAT', 'REVEREND', 'MOTHER'] +3528-168669-0024-719: ref=['FATHER', 'FAUVENT', 'REVEREND', 'MOTHER'] +3528-168669-0024-719: hyp=['FATHER', 'FOUVET', 'REVEREND', 'MOTHER'] +3528-168669-0025-720: ref=['YOU', 'KNOW', 'THAT', 'A', 'MOTHER', 'DIED', 'THIS', 'MORNING'] +3528-168669-0025-720: hyp=['YOU', 'KNOW', 'THAT', 'HER', 'MOTHER', 'DIED', 'THIS', 'MORNING'] +3528-168669-0026-721: ref=['NO', 'DID', 'YOU', 'NOT', 'HEAR', 'THE', 'BELL'] +3528-168669-0026-721: hyp=['NO', 'DID', 'YOU', 'NOT', 'HEAR', 'THE', 'BELL'] +3528-168669-0027-722: ref=['NOTHING', 'CAN', 'BE', 'HEARD', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'GARDEN', 'REALLY'] +3528-168669-0027-722: hyp=['NOTHING', 'CAN', 'BE', 'HEARD', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'GARDEN', 'REALLY'] +3528-168669-0028-723: ref=['AND', 'THEN', 'THE', 'WIND', 'IS', 'NOT', 'BLOWING', 'IN', 'MY', 'DIRECTION', 'THIS', 'MORNING'] +3528-168669-0028-723: hyp=['AND', 'THEN', 'THE', 'WIND', 'IS', 'NOT', 'BLOWING', 'IN', 'MY', 'DIRECTION', 'THIS', 'MORNING'] +3528-168669-0029-724: ref=['IT', 'WAS', 'MOTHER', 'CRUCIFIXION'] +3528-168669-0029-724: hyp=['IT', 'WAS', 'MOTHER', 'CROSS', 'FICTION'] +3528-168669-0030-725: ref=['THREE', 'YEARS', 'AGO', 'MADAME', 'DE', 'BETHUNE', 'A', 'JANSENIST', 'TURNED', 'ORTHODOX', 'MERELY', 'FROM', 'HAVING', 'SEEN', 'MOTHER', 'CRUCIFIXION', 'AT', 'PRAYER', 'AH'] +3528-168669-0030-725: hyp=['THREE', 'YEARS', 'AGO', 'MADAME', 'DE', 'BESSOON', 'A', 'JANSENIST', 'TURNED', 'ORTHODOX', 'MERELY', 'FROM', 'HAVING', 'SEEN', 'MOTHER', 'CRUCIFIXION', 'AT', 'PRAYER', 'AH'] +3528-168669-0031-726: ref=['THE', 'MOTHERS', 'HAVE', 'TAKEN', 'HER', 'TO', 'THE', 'DEAD', 'ROOM', 'WHICH', 'OPENS', 'ON', 'THE', 'CHURCH', 'I', 'KNOW'] +3528-168669-0031-726: hyp=['THE', 'MOTHERS', 'HAVE', 'TAKEN', 'HER', 'TO', 'THE', 'DEAD', 'ROOM', 'WHICH', 'OPENS', 'ON', 'THE', 'CHURCH', 'I', 'KNOW'] +3528-168669-0032-727: ref=['A', 'FINE', 'SIGHT', 'IT', 'WOULD', 'BE', 'TO', 'SEE', 'A', 'MAN', 'ENTER', 'THE', 'DEAD', 'ROOM', 'MORE', 'OFTEN'] +3528-168669-0032-727: hyp=['A', 'FINE', 'SIGHT', 'IT', 'WOULD', 'BE', 'TO', 'SEE', 'A', 'MAN', 'ENTER', 'THE', 'BED', 'ROOM', 'MORE', 'OFTEN'] +3528-168669-0033-728: ref=['HEY', 'MORE', 'OFTEN'] +3528-168669-0033-728: hyp=['HEY', 'MORE', 'OFTEN'] +3528-168669-0034-729: ref=['WHAT', 'DO', 'YOU', 'SAY'] +3528-168669-0034-729: hyp=['WHAT', 'DO', 'YOU', 'SAY'] +3528-168669-0035-730: ref=['I', 'SAY', 'MORE', 'OFTEN', 'MORE', 'OFTEN', 'THAN', 'WHAT'] +3528-168669-0035-730: hyp=['I', 'SAY', 'MORE', 'OFTEN', 'MORE', 'OFTEN', 'THAN', 'WHAT'] +3528-168669-0036-731: ref=['REVEREND', 'MOTHER', 'I', 'DID', 'NOT', 'SAY', 'MORE', 'OFTEN', 'THAN', 'WHAT', 'I', 'SAID', 'MORE', 'OFTEN'] +3528-168669-0036-731: hyp=['REVEREND', 'MOTHER', 'I', 'DID', 'NOT', 'SAY', 'MORE', 'OFTEN', 'THAN', 'WHAT', 'I', 'SAID', 'MORE', 'OFTEN'] +3528-168669-0037-732: ref=['BUT', 'I', 'DID', 'NOT', 'SAY', 'MORE', 'OFTEN'] +3528-168669-0037-732: hyp=['BUT', 'I', 'DID', 'NOT', 'SAY', 'MORE', 'OFTEN'] +3528-168669-0038-733: ref=['AT', 'THAT', 'MOMENT', 'NINE', "O'CLOCK", 'STRUCK'] +3528-168669-0038-733: hyp=['AT', 'THAT', 'MOMENT', 'NINE', "O'CLOCK", 'STRUCK'] +3528-168669-0039-734: ref=['AT', 'NINE', "O'CLOCK", 'IN', 'THE', 'MORNING', 'AND', 'AT', 'ALL', 'HOURS', 'PRAISED', 'AND', 'ADORED', 'BE', 'THE', 'MOST', 'HOLY', 'SACRAMENT', 'OF', 'THE', 'ALTAR', 'SAID', 'THE', 'PRIORESS'] +3528-168669-0039-734: hyp=['AT', 'NINE', "O'CLOCK", 'IN', 'THE', 'MORNING', 'AND', 'AT', 'ALL', 'HOURS', 'PRAISED', 'AND', 'ADORED', 'BE', 'THE', 'MOST', 'HOLY', 'SACRAMENT', 'OF', 'THE', 'ALTAR', 'SAID', 'THE', 'PRIORESS'] +3528-168669-0040-735: ref=['IT', 'CUT', 'MORE', 'OFTEN', 'SHORT'] +3528-168669-0040-735: hyp=['IT', 'CUT', 'MORE', 'OFTEN', 'SHORT'] +3528-168669-0041-736: ref=['FAUCHELEVENT', 'MOPPED', 'HIS', 'FOREHEAD'] +3528-168669-0041-736: hyp=['FAUCHELEVENT', 'MOPPED', 'HIS', 'FOREHEAD'] +3528-168669-0042-737: ref=['IN', 'HER', 'LIFETIME', 'MOTHER', 'CRUCIFIXION', 'MADE', 'CONVERTS', 'AFTER', 'HER', 'DEATH', 'SHE', 'WILL', 'PERFORM', 'MIRACLES', 'SHE', 'WILL'] +3528-168669-0042-737: hyp=['IN', 'HER', 'LIFETIME', 'MOTHER', 'CRUCIFIXION', 'MADE', 'CONVERTS', 'AFTER', 'HER', 'DEATH', 'SHE', 'WILL', 'PERFORM', 'MIRACLES', 'SHE', 'WILL'] +3528-168669-0043-738: ref=['FATHER', 'FAUVENT', 'THE', 'COMMUNITY', 'HAS', 'BEEN', 'BLESSED', 'IN', 'MOTHER', 'CRUCIFIXION'] +3528-168669-0043-738: hyp=['FATHER', 'FAUVENT', 'THE', 'COMMUNITY', 'HAS', 'BEEN', 'BLESSED', 'IN', 'MOTHER', 'CRUCIFICTION'] +3528-168669-0044-739: ref=['SHE', 'RETAINED', 'HER', 'CONSCIOUSNESS', 'TO', 'THE', 'VERY', 'LAST', 'MOMENT'] +3528-168669-0044-739: hyp=['SHE', 'RETAINED', 'HER', 'CONSCIOUSNESS', 'TO', 'THE', 'VERY', 'LAST', 'MOMENT'] +3528-168669-0045-740: ref=['SHE', 'GAVE', 'US', 'HER', 'LAST', 'COMMANDS'] +3528-168669-0045-740: hyp=['SHE', 'GAVE', 'US', 'HER', 'LAST', 'COMMANDS'] +3528-168669-0046-741: ref=['IF', 'YOU', 'HAD', 'A', 'LITTLE', 'MORE', 'FAITH', 'AND', 'IF', 'YOU', 'COULD', 'HAVE', 'BEEN', 'IN', 'HER', 'CELL', 'SHE', 'WOULD', 'HAVE', 'CURED', 'YOUR', 'LEG', 'MERELY', 'BY', 'TOUCHING', 'IT', 'SHE', 'SMILED'] +3528-168669-0046-741: hyp=['IF', 'YOU', 'HAD', 'A', 'LITTLE', 'MORE', 'FAITH', 'AND', 'IF', 'YOU', 'COULD', 'HAVE', 'BEEN', 'IN', 'HER', 'CELL', 'SHE', 'WOULD', 'HAVE', 'CURED', 'YOUR', 'LEG', 'MERELY', 'BY', 'TOUCHING', 'IT', 'SHE', 'SMILED'] +3528-168669-0047-742: ref=['THERE', 'WAS', 'SOMETHING', 'OF', 'PARADISE', 'IN', 'THAT', 'DEATH'] +3528-168669-0047-742: hyp=['THERE', 'WAS', 'SOMETHING', 'OF', 'PARADISE', 'IN', 'THAT', 'DEATH'] +3528-168669-0048-743: ref=['FAUCHELEVENT', 'THOUGHT', 'THAT', 'IT', 'WAS', 'AN', 'ORISON', 'WHICH', 'SHE', 'WAS', 'FINISHING'] +3528-168669-0048-743: hyp=['FAUCHELEVENT', 'THOUGHT', 'THAT', 'IT', 'WAS', 'AN', 'ORISON', 'WHICH', 'SHE', 'WAS', 'FINISHING'] +3528-168669-0049-744: ref=['FAUCHELEVENT', 'HELD', 'HIS', 'PEACE', 'SHE', 'WENT', 'ON'] +3528-168669-0049-744: hyp=['FAUCHELEVENT', 'HELD', 'HIS', 'PEACE', 'SHE', 'WENT', 'ON'] +3528-168669-0050-745: ref=['I', 'HAVE', 'CONSULTED', 'UPON', 'THIS', 'POINT', 'MANY', 'ECCLESIASTICS', 'LABORING', 'IN', 'OUR', 'LORD', 'WHO', 'OCCUPY', 'THEMSELVES', 'IN', 'THE', 'EXERCISES', 'OF', 'THE', 'CLERICAL', 'LIFE', 'AND', 'WHO', 'BEAR', 'WONDERFUL', 'FRUIT'] +3528-168669-0050-745: hyp=['I', 'HAVE', 'CONSULTED', 'UPON', 'THIS', 'POINT', 'MANY', 'ECCLESIASTICS', 'LABOURING', 'IN', 'OUR', 'LORD', 'WHO', 'OCCUPY', 'THEMSELVES', 'IN', 'THE', 'EXERCISES', 'OF', 'THE', 'CLERICAL', 'LIFE', 'AND', 'WHO', 'BEAR', 'WONDERFUL', 'FRUIT'] +3528-168669-0051-746: ref=['FORTUNATELY', 'THE', 'PRIORESS', 'COMPLETELY', 'ABSORBED', 'IN', 'HER', 'OWN', 'THOUGHTS', 'DID', 'NOT', 'HEAR', 'IT'] +3528-168669-0051-746: hyp=['FORTUNATELY', 'THE', 'PRIORESS', 'COMPLETELY', 'ABSORBED', 'IN', 'HER', 'OWN', 'THOUGHTS', 'DID', 'NOT', 'HEAR', 'IT'] +3528-168669-0052-747: ref=['SHE', 'CONTINUED', 'FATHER', 'FAUVENT'] +3528-168669-0052-747: hyp=['SHE', 'CONTINUED', 'FOR', 'THE', 'PROUVENT'] +3528-168669-0053-748: ref=['YES', 'REVEREND', 'MOTHER'] +3528-168669-0053-748: hyp=['YES', 'REVEREND', 'MOTHER'] +3528-168669-0054-749: ref=['SAINT', 'TERENTIUS', 'BISHOP', 'OF', 'PORT', 'WHERE', 'THE', 'MOUTH', 'OF', 'THE', 'TIBER', 'EMPTIES', 'INTO', 'THE', 'SEA', 'REQUESTED', 'THAT', 'ON', 'HIS', 'TOMB', 'MIGHT', 'BE', 'ENGRAVED', 'THE', 'SIGN', 'WHICH', 'WAS', 'PLACED', 'ON', 'THE', 'GRAVES', 'OF', 'PARRICIDES', 'IN', 'THE', 'HOPE', 'THAT', 'PASSERS', 'BY', 'WOULD', 'SPIT', 'ON', 'HIS', 'TOMB', 'THIS', 'WAS', 'DONE'] +3528-168669-0054-749: hyp=['SAINT', 'TERENTIUS', 'BISHOP', 'OF', 'PORT', 'WHERE', 'THE', 'MOUTH', 'OF', 'THE', 'TIBER', 'EMPTIES', 'INTO', 'THE', 'SEA', 'REQUESTED', 'THAT', 'ON', 'HIS', 'TOMB', 'MIGHT', 'BE', 'ENGRAVED', 'THE', 'SIGN', 'WHICH', 'WAS', 'PLACED', 'ON', 'THE', 'GRAVES', 'OF', 'PARRICIDES', 'IN', 'THE', 'HOPE', 'THAT', 'PASSERS', 'BY', 'WOULD', 'SPIT', 'ON', 'HIS', 'TOMB', 'THIS', 'WAS', 'DONE'] +3528-168669-0055-750: ref=['THE', 'DEAD', 'MUST', 'BE', 'OBEYED', 'SO', 'BE', 'IT'] +3528-168669-0055-750: hyp=['THE', 'DEAD', 'MUST', 'BE', 'OBEYED', 'SO', 'BE', 'IT'] +3528-168669-0056-751: ref=['FOR', 'THAT', 'MATTER', 'NO', 'REVEREND', 'MOTHER'] +3528-168669-0056-751: hyp=['FOR', 'THAT', 'MATTER', 'NO', 'REVEREND', 'MOTHER'] +3528-168669-0057-752: ref=['FATHER', 'FAUVENT', 'MOTHER', 'CRUCIFIXION', 'WILL', 'BE', 'INTERRED', 'IN', 'THE', 'COFFIN', 'IN', 'WHICH', 'SHE', 'HAS', 'SLEPT', 'FOR', 'THE', 'LAST', 'TWENTY', 'YEARS', 'THAT', 'IS', 'JUST'] +3528-168669-0057-752: hyp=['FATHER', 'PROUVENT', 'MOTHER', 'CRUCIFIXION', 'WILL', 'BE', 'INTERRED', 'IN', 'THE', 'COFFIN', 'IN', 'WHICH', 'SHE', 'HAS', 'SLEPT', 'FOR', 'THE', 'LAST', 'TWENTY', 'YEARS', 'THAT', 'IS', 'JUST'] +3528-168669-0058-753: ref=['IT', 'IS', 'A', 'CONTINUATION', 'OF', 'HER', 'SLUMBER'] +3528-168669-0058-753: hyp=['IT', 'IS', 'A', 'CONTINUATION', 'OF', 'HER', 'SLUMBER'] +3528-168669-0059-754: ref=['SO', 'I', 'SHALL', 'HAVE', 'TO', 'NAIL', 'UP', 'THAT', 'COFFIN', 'YES'] +3528-168669-0059-754: hyp=['SO', 'I', 'SHALL', 'HAVE', 'TO', 'NAIL', 'UP', 'THAT', 'COFFIN', 'YES'] +3528-168669-0060-755: ref=['I', 'AM', 'AT', 'THE', 'ORDERS', 'OF', 'THE', 'VERY', 'REVEREND', 'COMMUNITY'] +3528-168669-0060-755: hyp=['I', 'AM', 'AT', 'THE', 'ORDERS', 'OF', 'THE', 'VERY', 'REVEREND', 'KUNITY'] +3528-168669-0061-756: ref=['THE', 'FOUR', 'MOTHER', 'PRECENTORS', 'WILL', 'ASSIST', 'YOU'] +3528-168669-0061-756: hyp=['THE', 'FOUR', 'MOTHER', 'PRESENTERS', 'WILL', 'ASSIST', 'YOU'] +3528-168669-0062-757: ref=['NO', 'IN', 'LOWERING', 'THE', 'COFFIN'] +3528-168669-0062-757: hyp=['NO', 'IN', 'LOWERING', 'THE', 'COFFIN'] +3528-168669-0063-758: ref=['WHERE', 'INTO', 'THE', 'VAULT'] +3528-168669-0063-758: hyp=['WHERE', 'INTO', 'THE', 'VAULT'] +3528-168669-0064-759: ref=['FAUCHELEVENT', 'STARTED', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR'] +3528-168669-0064-759: hyp=['FAUCHELEVENT', 'STARTED', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR'] +3528-168669-0065-760: ref=['UNDER', 'THE', 'ALTAR', 'BUT'] +3528-168669-0065-760: hyp=['UNDER', 'THE', 'ALTAR', 'BUT'] +3528-168669-0066-761: ref=['YOU', 'WILL', 'HAVE', 'AN', 'IRON', 'BAR', 'YES', 'BUT'] +3528-168669-0066-761: hyp=['YOU', 'WILL', 'HAVE', 'AN', 'IRON', 'BAR', 'YES', 'BUT'] +3528-168669-0067-762: ref=['YOU', 'WILL', 'RAISE', 'THE', 'STONE', 'WITH', 'THE', 'BAR', 'BY', 'MEANS', 'OF', 'THE', 'RING', 'BUT'] +3528-168669-0067-762: hyp=['YOU', 'WILL', 'RAISE', 'THE', 'STONE', 'WITH', 'THE', 'BAR', 'BY', 'MEANS', 'OF', 'THE', 'RING', 'BUT'] +3528-168669-0068-763: ref=['THE', 'DEAD', 'MUST', 'BE', 'OBEYED', 'TO', 'BE', 'BURIED', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CHAPEL', 'NOT', 'TO', 'GO', 'TO', 'PROFANE', 'EARTH', 'TO', 'REMAIN', 'THERE', 'IN', 'DEATH', 'WHERE', 'SHE', 'PRAYED', 'WHILE', 'LIVING', 'SUCH', 'WAS', 'THE', 'LAST', 'WISH', 'OF', 'MOTHER', 'CRUCIFIXION'] +3528-168669-0068-763: hyp=['THE', 'DEAD', 'MUST', 'BE', 'OBEYED', 'TO', 'BE', 'BURIED', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CHAPEL', 'NOT', 'TO', 'GO', 'TO', 'PROFANE', 'EARTH', 'TO', 'REMAIN', 'THERE', 'IN', 'DEATH', 'WHERE', 'SHE', 'PRAYED', 'WHILE', 'LIVING', 'SUCH', 'WAS', 'THE', 'LAST', 'WISH', 'OF', 'MOTHER', 'CRUCIFIXION'] +3528-168669-0069-764: ref=['SHE', 'ASKED', 'IT', 'OF', 'US', 'THAT', 'IS', 'TO', 'SAY', 'COMMANDED', 'US'] +3528-168669-0069-764: hyp=['SHE', 'ASKED', 'IT', 'OF', 'US', 'THAT', 'IS', 'TO', 'SAY', 'COMMANDED', 'US'] +3528-168669-0070-765: ref=['BUT', 'IT', 'IS', 'FORBIDDEN'] +3528-168669-0070-765: hyp=['BUT', 'IT', 'IS', 'FORBIDDEN'] +3528-168669-0071-766: ref=['OH', 'I', 'AM', 'A', 'STONE', 'IN', 'YOUR', 'WALLS'] +3528-168669-0071-766: hyp=['OH', 'I', 'AM', 'A', 'STONE', 'IN', 'YOUR', 'WALLS'] +3528-168669-0072-767: ref=['THINK', 'FATHER', 'FAUVENT', 'IF', 'SHE', 'WERE', 'TO', 'WORK', 'MIRACLES', 'HERE'] +3528-168669-0072-767: hyp=['THINK', 'FATHER', 'FAUVAIN', 'IF', 'SHE', 'WERE', 'TO', 'WORK', 'MIRACLES', 'HERE'] +3528-168669-0073-768: ref=['WHAT', 'A', 'GLORY', 'OF', 'GOD', 'FOR', 'THE', 'COMMUNITY', 'AND', 'MIRACLES', 'ISSUE', 'FROM', 'TOMBS'] +3528-168669-0073-768: hyp=['WHAT', 'A', 'GLORY', 'OF', 'GOD', 'FOR', 'THE', 'COMMUNITY', 'AND', 'MIRACLES', 'ISSUE', 'FROM', 'TOMBS'] +3528-168669-0074-769: ref=['BUT', 'REVEREND', 'MOTHER', 'IF', 'THE', 'AGENT', 'OF', 'THE', 'SANITARY', 'COMMISSION'] +3528-168669-0074-769: hyp=['BUT', 'REVEREND', 'MOTHER', 'IF', 'THE', 'AGENT', 'OF', 'THE', 'SANITARY', 'COMMISSION'] +3528-168669-0075-770: ref=['BUT', 'THE', 'COMMISSARY', 'OF', 'POLICE'] +3528-168669-0075-770: hyp=['BUT', 'THE', 'COMMISSARY', 'OF', 'POLICE'] +3528-168669-0076-771: ref=['CHONODEMAIRE', 'ONE', 'OF', 'THE', 'SEVEN', 'GERMAN', 'KINGS', 'WHO', 'ENTERED', 'AMONG', 'THE', 'GAULS', 'UNDER', 'THE', 'EMPIRE', 'OF', 'CONSTANTIUS', 'EXPRESSLY', 'RECOGNIZED', 'THE', 'RIGHT', 'OF', 'NUNS', 'TO', 'BE', 'BURIED', 'IN', 'RELIGION', 'THAT', 'IS', 'TO', 'SAY', 'BENEATH', 'THE', 'ALTAR'] +3528-168669-0076-771: hyp=['SHONAU', 'DE', 'MER', 'ONE', 'OF', 'THE', 'SEVEN', 'GERMAN', 'KINGS', 'WHO', 'ENTERED', 'AMONG', 'THE', 'GAULS', 'UNDER', 'THE', 'EMPIRE', 'OF', 'CONSTANTIUS', 'EXPRESSLY', 'RECOGNIZED', 'THE', 'RIGHT', 'OF', 'NUNS', 'TO', 'BE', 'BURIED', 'IN', 'RELIGION', 'THAT', 'IS', 'TO', 'SAY', 'BENEATH', 'THE', 'ALTAR'] +3528-168669-0077-772: ref=['THE', 'WORLD', 'IS', 'NOTHING', 'IN', 'THE', 'PRESENCE', 'OF', 'THE', 'CROSS'] +3528-168669-0077-772: hyp=['THE', 'WORLD', 'IS', 'NOTHING', 'IN', 'THE', 'PRESENCE', 'OF', 'THE', 'CROSS'] +3528-168669-0078-773: ref=['MARTIN', 'THE', 'ELEVENTH', 'GENERAL', 'OF', 'THE', 'CARTHUSIANS', 'GAVE', 'TO', 'HIS', 'ORDER', 'THIS', 'DEVICE', 'STAT', 'CRUX', 'DUM', 'VOLVITUR', 'ORBIS'] +3528-168669-0078-773: hyp=['MARTIN', 'THE', 'ELEVENTH', 'GENERAL', 'OF', 'THE', 'CARTHUSIANS', 'GAVE', 'TO', 'HIS', 'ORDER', 'THIS', 'DEVICE', 'STAT', 'CREW', 'DUME', 'VOLVETER', 'ORBIS'] +3528-168669-0079-774: ref=['THE', 'PRIORESS', 'WHO', 'WAS', 'USUALLY', 'SUBJECTED', 'TO', 'THE', 'BARRIER', 'OF', 'SILENCE', 'AND', 'WHOSE', 'RESERVOIR', 'WAS', 'OVERFULL', 'ROSE', 'AND', 'EXCLAIMED', 'WITH', 'THE', 'LOQUACITY', 'OF', 'A', 'DAM', 'WHICH', 'HAS', 'BROKEN', 'AWAY'] +3528-168669-0079-774: hyp=['THE', 'PRIORESS', 'WHO', 'WAS', 'USUALLY', 'SUBJECTED', 'TO', 'THE', 'BARRIER', 'OF', 'SILENCE', 'AND', 'WHOSE', 'RESERVOIR', 'WAS', 'OVER', 'FULL', 'ROSE', 'AND', 'EXCLAIMED', 'WITH', 'THE', 'LOQUACITY', 'OF', 'A', 'DAM', 'WHICH', 'HAS', 'BROKEN', 'AWAY'] +3528-168669-0080-775: ref=['I', 'HAVE', 'ON', 'MY', 'RIGHT', 'BENOIT', 'AND', 'ON', 'MY', 'LEFT', 'BERNARD', 'WHO', 'WAS', 'BERNARD'] +3528-168669-0080-775: hyp=['I', 'HAVE', 'ON', 'MY', 'RIGHT', 'BENOIS', 'AND', 'ON', 'MY', 'LEFT', 'BERNARD', 'WHO', 'WAS', 'BERNARD'] +3528-168669-0081-776: ref=['THE', 'FIRST', 'ABBOT', 'OF', 'CLAIRVAUX'] +3528-168669-0081-776: hyp=['THE', 'FIRST', 'ABBOT', 'OF', 'CLERVAL'] +3528-168669-0082-777: ref=['HIS', 'ORDER', 'HAS', 'PRODUCED', 'FORTY', 'POPES', 'TWO', 'HUNDRED', 'CARDINALS', 'FIFTY', 'PATRIARCHS', 'SIXTEEN', 'HUNDRED', 'ARCHBISHOPS', 'FOUR', 'THOUSAND', 'SIX', 'HUNDRED', 'BISHOPS', 'FOUR', 'EMPERORS', 'TWELVE', 'EMPRESSES', 'FORTY', 'SIX', 'KINGS', 'FORTY', 'ONE', 'QUEENS', 'THREE', 'THOUSAND', 'SIX', 'HUNDRED', 'CANONIZED', 'SAINTS', 'AND', 'HAS', 'BEEN', 'IN', 'EXISTENCE', 'FOR', 'FOURTEEN', 'HUNDRED', 'YEARS'] +3528-168669-0082-777: hyp=['HIS', 'ORDER', 'HAS', 'PRODUCED', 'FORTY', 'POPES', 'TWO', 'HUNDRED', 'CARDINALS', 'FIFTY', 'PATRIARCHS', 'SIXTEEN', 'HUNDRED', 'ARCHBISHOPS', 'FOUR', 'THOUSAND', 'SIX', 'HUNDRED', 'BISHOPS', 'FOUR', 'EMPERORS', 'TWELVE', 'EMPRESSES', 'FORTY', 'SIX', 'KINGS', 'FORTY', 'ONE', 'QUEENS', 'THREE', 'THOUSAND', 'SIX', 'HUNDRED', 'CANONIZED', 'SAINTS', 'AND', 'HAS', 'BEEN', 'IN', 'EXISTENCE', 'FOR', 'FOURTEEN', 'HUNDRED', 'YEARS'] +3528-168669-0083-778: ref=['ON', 'ONE', 'SIDE', 'SAINT', 'BERNARD', 'ON', 'THE', 'OTHER', 'THE', 'AGENT', 'OF', 'THE', 'SANITARY', 'DEPARTMENT'] +3528-168669-0083-778: hyp=['ON', 'ONE', 'SIDE', 'SAINT', 'BERNARD', 'ON', 'THE', 'OTHER', 'THE', 'AGENT', 'OF', 'THE', 'SANITARY', 'DEPARTMENT'] +3528-168669-0084-779: ref=['GOD', 'SUBORDINATED', 'TO', 'THE', 'COMMISSARY', 'OF', 'POLICE', 'SUCH', 'IS', 'THE', 'AGE', 'SILENCE', 'FAUVENT'] +3528-168669-0084-779: hyp=['GOD', 'SUBORDINATED', 'TO', 'THE', 'COMMISSARY', 'OF', 'POLICE', 'SUCH', 'IS', 'THE', 'AGE', 'SILENCE', 'FAVANT'] +3528-168669-0085-780: ref=['NO', 'ONE', 'DOUBTS', 'THE', 'RIGHT', 'OF', 'THE', 'MONASTERY', 'TO', 'SEPULTURE'] +3528-168669-0085-780: hyp=['NO', 'ONE', 'DOUBTS', 'THE', 'RIGHT', 'OF', 'THE', 'MONASTERY', 'TO', 'SEPULTURE'] +3528-168669-0086-781: ref=['ONLY', 'FANATICS', 'AND', 'THOSE', 'IN', 'ERROR', 'DENY', 'IT'] +3528-168669-0086-781: hyp=['ONLY', 'FANATICS', 'AND', 'THOSE', 'IN', 'ERROR', 'DENY', 'IT'] +3528-168669-0087-782: ref=['WE', 'LIVE', 'IN', 'TIMES', 'OF', 'TERRIBLE', 'CONFUSION'] +3528-168669-0087-782: hyp=['WE', 'LIVE', 'IN', 'TIMES', 'OF', 'TERRIBLE', 'CONFUSION'] +3528-168669-0088-783: ref=['WE', 'ARE', 'IGNORANT', 'AND', 'IMPIOUS'] +3528-168669-0088-783: hyp=['WE', 'ARE', 'IGNORANT', 'AND', 'IMPIOUS'] +3528-168669-0089-784: ref=['AND', 'THEN', 'RELIGION', 'IS', 'ATTACKED', 'WHY'] +3528-168669-0089-784: hyp=['AND', 'THEN', 'RELIGION', 'IS', 'ATTACKED', 'WHY'] +3528-168669-0090-785: ref=['BECAUSE', 'THERE', 'HAVE', 'BEEN', 'BAD', 'PRIESTS', 'BECAUSE', 'SAGITTAIRE', 'BISHOP', 'OF', 'GAP', 'WAS', 'THE', 'BROTHER', 'OF', 'SALONE', 'BISHOP', 'OF', 'EMBRUN', 'AND', 'BECAUSE', 'BOTH', 'OF', 'THEM', 'FOLLOWED', 'MOMMOL'] +3528-168669-0090-785: hyp=['BECAUSE', 'THERE', 'HAVE', 'BEEN', 'BAD', 'PRIESTS', 'BECAUSE', 'SAGOTARE', 'BISHOP', 'OF', 'GAP', 'WAS', 'THE', 'BROTHER', 'OF', 'SALOON', 'BISHOP', 'OF', 'EMBRO', 'AND', 'BECAUSE', 'BOTH', 'OF', 'THEM', 'FOLLOWED', 'MAMAL'] +3528-168669-0091-786: ref=['THEY', 'PERSECUTE', 'THE', 'SAINTS'] +3528-168669-0091-786: hyp=['THEY', 'PERSECUTE', 'THE', 'SAINTS'] +3528-168669-0092-787: ref=['THEY', 'SHUT', 'THEIR', 'EYES', 'TO', 'THE', 'TRUTH', 'DARKNESS', 'IS', 'THE', 'RULE'] +3528-168669-0092-787: hyp=['THEY', 'SHUT', 'THEIR', 'EYES', 'TO', 'THE', 'TRUTH', 'DARKNESS', 'IS', 'THE', 'RULE'] +3528-168669-0093-788: ref=['THE', 'MOST', 'FEROCIOUS', 'BEASTS', 'ARE', 'BEASTS', 'WHICH', 'ARE', 'BLIND'] +3528-168669-0093-788: hyp=['THE', 'MOST', 'FEROCIOUS', 'BEASTS', 'ARE', 'BEASTS', 'WHICH', 'ARE', 'BLIND'] +3528-168669-0094-789: ref=['OH', 'HOW', 'WICKED', 'PEOPLE', 'ARE'] +3528-168669-0094-789: hyp=['OH', 'HOW', 'WICKED', 'PEOPLE', 'ARE'] +3528-168669-0095-790: ref=['BY', 'ORDER', 'OF', 'THE', 'KING', 'SIGNIFIES', 'TO', 'DAY', 'BY', 'ORDER', 'OF', 'THE', 'REVOLUTION'] +3528-168669-0095-790: hyp=['BY', 'ORDER', 'OF', 'THE', 'KING', 'SIGNIFIES', 'TO', 'DAY', 'BY', 'ORDER', 'OF', 'THE', 'REVOLUTION'] +3528-168669-0096-791: ref=['ONE', 'NO', 'LONGER', 'KNOWS', 'WHAT', 'IS', 'DUE', 'TO', 'THE', 'LIVING', 'OR', 'TO', 'THE', 'DEAD', 'A', 'HOLY', 'DEATH', 'IS', 'PROHIBITED'] +3528-168669-0096-791: hyp=['ONE', 'NO', 'LONGER', 'KNOWS', 'WHAT', 'IS', 'DUE', 'TO', 'THE', 'LIVING', 'OR', 'TO', 'THE', 'DEAD', 'A', 'HOLY', 'DEATH', 'IS', 'PROHIBITED'] +3528-168669-0097-792: ref=['GAUTHIER', 'BISHOP', 'OF', 'CHALONS', 'HELD', 'HIS', 'OWN', 'IN', 'THIS', 'MATTER', 'AGAINST', 'OTHO', 'DUKE', 'OF', 'BURGUNDY'] +3528-168669-0097-792: hyp=['GATHIER', 'BISHOP', 'OF', 'CHELAN', 'HELD', 'HIS', 'OWN', 'IN', 'THIS', 'MATTER', 'AGAINST', 'OTHO', 'DUKE', 'OF', 'BURGUNDY'] +3528-168669-0098-793: ref=['THE', 'PRIORESS', 'TOOK', 'BREATH', 'THEN', 'TURNED', 'TO', 'FAUCHELEVENT'] +3528-168669-0098-793: hyp=['THE', 'PRIORESS', 'TOOK', 'BREATH', 'THEN', 'TURNED', 'TO', 'FAUCHELEVENT'] +3528-168669-0099-794: ref=['YOU', 'WILL', 'CLOSE', 'THE', 'COFFIN', 'THE', 'SISTERS', 'WILL', 'CARRY', 'IT', 'TO', 'THE', 'CHAPEL'] +3528-168669-0099-794: hyp=['YOU', 'WILL', 'CLOSE', 'THE', 'COFFIN', 'THE', 'SISTERS', 'WILL', 'CARRY', 'IT', 'TO', 'THE', 'CHAPEL'] +3528-168669-0100-795: ref=['THE', 'OFFICE', 'FOR', 'THE', 'DEAD', 'WILL', 'THEN', 'BE', 'SAID'] +3528-168669-0100-795: hyp=['THE', 'OFFICE', 'FOR', 'THE', 'DEAD', 'WILL', 'THEN', 'BE', 'SET'] +3528-168669-0101-796: ref=['BUT', 'SHE', 'WILL', 'HEAR', 'SHE', 'WILL', 'NOT', 'LISTEN'] +3528-168669-0101-796: hyp=['BUT', 'SHE', 'WILL', 'HEAR', 'SHE', 'WILL', 'NOT', 'LISTEN'] +3528-168669-0102-797: ref=['BESIDES', 'WHAT', 'THE', 'CLOISTER', 'KNOWS', 'THE', 'WORLD', 'LEARNS', 'NOT'] +3528-168669-0102-797: hyp=['BESIDES', 'WHAT', 'THE', 'CLOISTER', 'KNOWS', 'THE', 'WORLD', 'LEARNS', 'NOT'] +3528-168669-0103-798: ref=['A', 'PAUSE', 'ENSUED'] +3528-168669-0103-798: hyp=['A', 'PAUSE', 'ENSUED'] +3528-168669-0104-799: ref=['YOU', 'WILL', 'REMOVE', 'YOUR', 'BELL'] +3528-168669-0104-799: hyp=['YOU', 'WILL', 'REMOVE', 'YOUR', 'BELT'] +3528-168669-0105-800: ref=['HAS', 'THE', 'DOCTOR', 'FOR', 'THE', 'DEAD', 'PAID', 'HIS', 'VISIT'] +3528-168669-0105-800: hyp=['HAS', 'THE', 'DOCTOR', 'FOR', 'THE', 'DEAD', 'PAID', 'HIS', 'VISIT'] +3528-168669-0106-801: ref=['HE', 'WILL', 'PAY', 'IT', 'AT', 'FOUR', "O'CLOCK", 'TO', 'DAY'] +3528-168669-0106-801: hyp=['HE', 'WILL', 'PAY', 'IT', 'AT', 'FOUR', "O'CLOCK", 'TO', 'DAY'] +3528-168669-0107-802: ref=['THE', 'PEAL', 'WHICH', 'ORDERS', 'THE', 'DOCTOR', 'FOR', 'THE', 'DEAD', 'TO', 'BE', 'SUMMONED', 'HAS', 'ALREADY', 'BEEN', 'RUNG'] +3528-168669-0107-802: hyp=['THE', 'PEAL', 'WHICH', 'ORDERS', 'THE', 'DOCTOR', 'FOR', 'THE', 'DEAD', 'TO', 'BE', 'SUMMONED', 'HAS', 'ALREADY', 'BEEN', 'RUNG'] +3528-168669-0108-803: ref=['BUT', 'YOU', 'DO', 'NOT', 'UNDERSTAND', 'ANY', 'OF', 'THE', 'PEALS'] +3528-168669-0108-803: hyp=['BUT', 'YOU', 'DO', 'NOT', 'UNDERSTAND', 'ANY', 'OF', 'APPEALS'] +3528-168669-0109-804: ref=['THAT', 'IS', 'WELL', 'FATHER', 'FAUVENT'] +3528-168669-0109-804: hyp=['THAT', 'IS', 'WELL', 'FATHER', 'FERVEN'] +3528-168669-0110-805: ref=['WHERE', 'WILL', 'YOU', 'OBTAIN', 'IT'] +3528-168669-0110-805: hyp=['WHERE', 'WILL', 'YOU', 'OBTAIN', 'IT'] +3528-168669-0111-806: ref=['I', 'HAVE', 'MY', 'HEAP', 'OF', 'OLD', 'IRON', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'GARDEN'] +3528-168669-0111-806: hyp=['I', 'HAVE', 'MY', 'HEAP', 'OF', 'OLD', 'IRON', 'AT', 'THE', 'BOTTOM', 'OF', 'THE', 'GARDEN'] +3528-168669-0112-807: ref=['REVEREND', 'MOTHER', 'WHAT'] +3528-168669-0112-807: hyp=['REVEREND', 'MOTHER', 'WHAT'] +3528-168669-0113-808: ref=['IF', 'YOU', 'WERE', 'EVER', 'TO', 'HAVE', 'ANY', 'OTHER', 'JOBS', 'OF', 'THIS', 'SORT', 'MY', 'BROTHER', 'IS', 'THE', 'STRONG', 'MAN', 'FOR', 'YOU', 'A', 'PERFECT', 'TURK'] +3528-168669-0113-808: hyp=['IF', 'YOU', 'WERE', 'EVER', 'TO', 'HAVE', 'ANY', 'OTHER', 'JOBS', 'OF', 'THIS', 'SORT', 'MY', 'BROTHER', 'IS', 'THE', 'STRONG', 'MAN', 'FOR', 'YOU', 'A', 'PERFECT', 'TURK'] +3528-168669-0114-809: ref=['YOU', 'WILL', 'DO', 'IT', 'AS', 'SPEEDILY', 'AS', 'POSSIBLE'] +3528-168669-0114-809: hyp=['YOU', 'WILL', 'DO', 'IT', 'AS', 'SPEEDILY', 'AS', 'POSSIBLE'] +3528-168669-0115-810: ref=['I', 'CANNOT', 'WORK', 'VERY', 'FAST', 'I', 'AM', 'INFIRM', 'THAT', 'IS', 'WHY', 'I', 'REQUIRE', 'AN', 'ASSISTANT', 'I', 'LIMP'] +3528-168669-0115-810: hyp=['I', 'CANNOT', 'WORK', 'VERY', 'FAST', 'I', 'AM', 'INFIRM', 'THAT', 'IS', 'WHY', 'I', 'REQUIRE', 'AN', 'ASSISTANT', 'I', 'LIMP'] +3528-168669-0116-811: ref=['EVERYTHING', 'MUST', 'HAVE', 'BEEN', 'COMPLETED', 'A', 'GOOD', 'QUARTER', 'OF', 'AN', 'HOUR', 'BEFORE', 'THAT'] +3528-168669-0116-811: hyp=['EVERYTHING', 'MUST', 'HAVE', 'BEEN', 'COMPLETED', 'A', 'GOOD', 'QUARTER', 'OF', 'AN', 'HOUR', 'BEFORE', 'THAT'] +3528-168669-0117-812: ref=['I', 'WILL', 'DO', 'ANYTHING', 'TO', 'PROVE', 'MY', 'ZEAL', 'TOWARDS', 'THE', 'COMMUNITY', 'THESE', 'ARE', 'MY', 'ORDERS', 'I', 'AM', 'TO', 'NAIL', 'UP', 'THE', 'COFFIN'] +3528-168669-0117-812: hyp=['I', 'WILL', 'DO', 'ANYTHING', 'TO', 'PROVE', 'MY', 'ZEAL', 'TOWARDS', 'THE', 'COMMUNITY', 'THESE', 'ARE', 'MY', 'ORDERS', 'I', 'AM', 'TO', 'NAIL', 'UP', 'THE', 'COFFIN'] +3528-168669-0118-813: ref=['AT', 'ELEVEN', "O'CLOCK", 'EXACTLY', 'I', 'AM', 'TO', 'BE', 'IN', 'THE', 'CHAPEL'] +3528-168669-0118-813: hyp=['AT', 'ELEVEN', "O'CLOCK", 'EXACTLY', 'I', 'AM', 'TO', 'BE', 'IN', 'THE', 'CHAPEL'] +3528-168669-0119-814: ref=['MOTHER', 'ASCENSION', 'WILL', 'BE', 'THERE', 'TWO', 'MEN', 'WOULD', 'BE', 'BETTER'] +3528-168669-0119-814: hyp=['MOTHER', 'ASCENSON', 'WILL', 'BE', 'THERE', 'TWO', 'MEN', 'WOULD', 'BE', 'BETTER'] +3528-168669-0120-815: ref=['HOWEVER', 'NEVER', 'MIND', 'I', 'SHALL', 'HAVE', 'MY', 'LEVER'] +3528-168669-0120-815: hyp=['HOWEVER', 'NEVER', 'MIND', 'I', 'SHALL', 'HAVE', 'MY', 'LOVER'] +3528-168669-0121-816: ref=['AFTER', 'WHICH', 'THERE', 'WILL', 'BE', 'NO', 'TRACE', 'OF', 'ANYTHING'] +3528-168669-0121-816: hyp=['AFTER', 'WHICH', 'THERE', 'WILL', 'BE', 'NO', 'TRACE', 'OF', 'ANYTHING'] +3528-168669-0122-817: ref=['THE', 'GOVERNMENT', 'WILL', 'HAVE', 'NO', 'SUSPICION'] +3528-168669-0122-817: hyp=['THE', 'GOVERNMENT', 'WILL', 'HAVE', 'NO', 'SUSPICION'] +3528-168669-0123-818: ref=['THE', 'EMPTY', 'COFFIN', 'REMAINS', 'THIS', 'PRODUCED', 'A', 'PAUSE'] +3528-168669-0123-818: hyp=['THE', 'EMPTY', 'COFFIN', 'REMAINS', 'THIS', 'PRODUCED', 'A', 'PAUSE'] +3528-168669-0124-819: ref=['WHAT', 'IS', 'TO', 'BE', 'DONE', 'WITH', 'THAT', 'COFFIN', 'FATHER', 'FAUVENT'] +3528-168669-0124-819: hyp=['WHAT', 'IS', 'TO', 'BE', 'DONE', 'WITH', 'THAT', 'COFFIN', 'FATHER', 'PREVENT'] +3528-168669-0125-820: ref=['IT', 'WILL', 'BE', 'GIVEN', 'TO', 'THE', 'EARTH', 'EMPTY'] +3528-168669-0125-820: hyp=['IT', 'WILL', 'BE', 'GIVEN', 'TO', 'THE', 'EARTH', 'EMPTY'] +3528-168669-0126-821: ref=['AH', 'THE', 'DE', 'EXCLAIMED', 'FAUCHELEVENT'] +3528-168669-0126-821: hyp=['AH', 'VIDA', 'EXCLAIMED', 'FAUCHELEVENT'] +3528-168669-0127-822: ref=['THE', 'VIL', 'STUCK', 'FAST', 'IN', 'HIS', 'THROAT'] +3528-168669-0127-822: hyp=['THE', 'VEIL', 'STUCK', 'FAST', 'IN', 'HIS', 'THROAT'] +3528-168669-0128-823: ref=['HE', 'MADE', 'HASTE', 'TO', 'IMPROVISE', 'AN', 'EXPEDIENT', 'TO', 'MAKE', 'HER', 'FORGET', 'THE', 'OATH'] +3528-168669-0128-823: hyp=['HE', 'MADE', 'HASTE', 'TO', 'IMPROVISE', 'AN', 'EXPEDIENT', 'TO', 'MAKE', 'HER', 'FORGET', 'THE', 'OATH'] +3528-168669-0129-824: ref=['I', 'WILL', 'PUT', 'EARTH', 'IN', 'THE', 'COFFIN', 'REVEREND', 'MOTHER', 'THAT', 'WILL', 'PRODUCE', 'THE', 'EFFECT', 'OF', 'A', 'CORPSE'] +3528-168669-0129-824: hyp=['I', 'WILL', 'PUT', 'EARTH', 'IN', 'THE', 'COFFIN', 'REVEREND', 'MOTHER', 'THAT', 'WILL', 'PRODUCE', 'THE', 'EFFECT', 'OF', 'A', 'CORPSE'] +3528-168669-0130-825: ref=['I', 'WILL', 'MAKE', 'THAT', 'MY', 'SPECIAL', 'BUSINESS'] +3528-168669-0130-825: hyp=['I', 'WILL', 'MAKE', 'THAT', 'MY', 'SPECIAL', 'BUSINESS'] +3538-142836-0000-826: ref=['GENERAL', 'OBSERVATIONS', 'ON', 'PRESERVES', 'CONFECTIONARY', 'ICES', 'AND', 'DESSERT', 'DISHES'] +3538-142836-0000-826: hyp=['GENERAL', 'OBSERVATIONS', 'ON', 'PRESERVES', 'CONFECTIONERY', 'ICES', 'AND', 'DESSERT', 'DISHES'] +3538-142836-0001-827: ref=['THE', 'EXPENSE', 'OF', 'PRESERVING', 'THEM', 'WITH', 'SUGAR', 'IS', 'A', 'SERIOUS', 'OBJECTION', 'FOR', 'EXCEPT', 'THE', 'SUGAR', 'IS', 'USED', 'IN', 'CONSIDERABLE', 'QUANTITIES', 'THE', 'SUCCESS', 'IS', 'VERY', 'UNCERTAIN'] +3538-142836-0001-827: hyp=['THE', 'EXPENSE', 'OF', 'PRESERVING', 'THEM', 'WITH', 'SUGAR', 'IS', 'A', 'SERIOUS', 'OBJECTION', 'FOR', 'EXCEPT', 'THE', 'SUGAR', 'IS', 'USED', 'IN', 'CONSIDERABLE', 'QUALITIES', 'THE', 'SUCCESS', 'IS', 'VERY', 'UNCERTAIN'] +3538-142836-0002-828: ref=['FRUIT', 'GATHERED', 'IN', 'WET', 'OR', 'FOGGY', 'WEATHER', 'WILL', 'SOON', 'BE', 'MILDEWED', 'AND', 'BE', 'OF', 'NO', 'SERVICE', 'FOR', 'PRESERVES'] +3538-142836-0002-828: hyp=['FRUIT', 'GATHERED', 'IN', 'WET', 'OR', 'FOGGY', 'WEATHER', 'WILL', 'SOON', 'BE', 'MILDEWED', 'AND', 'BE', 'OF', 'NO', 'SERVICE', 'FOR', 'PRESERVES'] +3538-142836-0003-829: ref=['BUT', 'TO', 'DISTINGUISH', 'THESE', 'PROPERLY', 'REQUIRES', 'VERY', 'GREAT', 'ATTENTION', 'AND', 'CONSIDERABLE', 'EXPERIENCE'] +3538-142836-0003-829: hyp=['BUT', 'TO', 'DISTINGUISH', 'THESE', 'PROPERLY', 'REQUIRES', 'VERY', 'GREAT', 'ATTENTION', 'AND', 'CONSIDERABLE', 'EXPERIENCE'] +3538-142836-0004-830: ref=['IF', 'YOU', 'DIP', 'THE', 'FINGER', 'INTO', 'THE', 'SYRUP', 'AND', 'APPLY', 'IT', 'TO', 'THE', 'THUMB', 'THE', 'TENACITY', 'OF', 'THE', 'SYRUP', 'WILL', 'ON', 'SEPARATING', 'THE', 'FINGER', 'AND', 'THUMB', 'AFFORD', 'A', 'THREAD', 'WHICH', 'SHORTLY', 'BREAKS', 'THIS', 'IS', 'THE', 'LITTLE', 'THREAD'] +3538-142836-0004-830: hyp=['IF', 'YOU', 'DIP', 'THE', 'FINGER', 'INTO', 'THE', 'SYRUP', 'AND', 'APPLY', 'IT', 'TO', 'THE', 'THUMB', 'THE', 'TENACITY', 'OF', 'THE', 'SYRUP', 'WILL', 'ON', 'SEPARATING', 'THE', 'FINGER', 'AND', 'THUMB', 'AFFORD', 'A', 'THREAD', 'WHICH', 'SHORTLY', 'BREAKS', 'THIS', 'IS', 'THE', 'LITTLE', 'THREAD'] +3538-142836-0005-831: ref=['LET', 'IT', 'BOIL', 'UP', 'AGAIN', 'THEN', 'TAKE', 'IT', 'OFF', 'AND', 'REMOVE', 'CAREFULLY', 'THE', 'SCUM', 'THAT', 'HAS', 'RISEN'] +3538-142836-0005-831: hyp=['LET', 'IT', 'BOIL', 'UP', 'AGAIN', 'THEN', 'TAKE', 'IT', 'OFF', 'AND', 'REMOVE', 'CAREFULLY', 'THE', 'SCUM', 'THAT', 'HAS', 'RISEN'] +3538-142836-0006-832: ref=['IT', 'IS', 'CONSIDERED', 'TO', 'BE', 'SUFFICIENTLY', 'BOILED', 'WHEN', 'SOME', 'TAKEN', 'UP', 'IN', 'A', 'SPOON', 'POURS', 'OUT', 'LIKE', 'OIL'] +3538-142836-0006-832: hyp=['IT', 'IS', 'CONSIDERED', 'TO', 'BE', 'SUFFICIENTLY', 'BOILED', 'WHEN', 'SOME', 'TAKEN', 'UP', 'IN', 'A', 'SPOON', 'POURS', 'OUT', 'LIKE', 'OIL'] +3538-142836-0007-833: ref=['BEFORE', 'SUGAR', 'WAS', 'IN', 'USE', 'HONEY', 'WAS', 'EMPLOYED', 'TO', 'PRESERVE', 'MANY', 'VEGETABLE', 'PRODUCTIONS', 'THOUGH', 'THIS', 'SUBSTANCE', 'HAS', 'NOW', 'GIVEN', 'WAY', 'TO', 'THE', 'JUICE', 'OF', 'THE', 'SUGAR', 'CANE'] +3538-142836-0007-833: hyp=['BEFORE', 'SUGAR', 'WAS', 'IN', 'USE', 'HONEY', 'WAS', 'EMPLOYED', 'TO', 'PRESERVE', 'MANY', 'VEGETABLE', 'PRODUCTIONS', 'THOUGH', 'THIS', 'SUBSTANCE', 'HAS', 'NOW', 'GIVEN', 'WAY', 'TO', 'THE', 'JUICE', 'OF', 'THE', 'SUGAR', 'CANE'] +3538-142836-0008-834: ref=['FOURTEEN', 'NINETY', 'NINE'] +3538-142836-0008-834: hyp=['FOURTEEN', 'NINETY', 'NINE'] +3538-142836-0009-835: ref=['BOIL', 'THEM', 'UP', 'THREE', 'DAYS', 'SUCCESSIVELY', 'SKIMMING', 'EACH', 'TIME', 'AND', 'THEY', 'WILL', 'THEN', 'BE', 'FINISHED', 'AND', 'IN', 'A', 'STATE', 'FIT', 'TO', 'BE', 'PUT', 'INTO', 'POTS', 'FOR', 'USE'] +3538-142836-0009-835: hyp=['BOIL', 'THEM', 'UP', 'THREE', 'DAYS', 'SUCCESSIVELY', 'SKIMMING', 'EACH', 'TIME', 'AND', 'THEY', 'WILL', 'THEN', 'BE', 'FINISHED', 'AND', 'IN', 'A', 'STATE', 'FIT', 'TO', 'BE', 'PUT', 'INTO', 'POTS', 'FOR', 'USE'] +3538-142836-0010-836: ref=['THE', 'REASON', 'WHY', 'THE', 'FRUIT', 'IS', 'EMPTIED', 'OUT', 'OF', 'THE', 'PRESERVING', 'PAN', 'INTO', 'AN', 'EARTHEN', 'PAN', 'IS', 'THAT', 'THE', 'ACID', 'OF', 'THE', 'FRUIT', 'ACTS', 'UPON', 'THE', 'COPPER', 'OF', 'WHICH', 'THE', 'PRESERVING', 'PANS', 'ARE', 'USUALLY', 'MADE'] +3538-142836-0010-836: hyp=['THE', 'REASON', 'WHY', 'THE', 'FRUIT', 'IS', 'EMPTIED', 'OUT', 'OF', 'THE', 'PRESERVING', 'PAN', 'INTO', 'AN', 'EARTHEN', 'PAN', 'IS', 'THAT', 'THE', 'ACID', 'OF', 'THE', 'FRUIT', 'ACTS', 'UPON', 'THE', 'COPPER', 'OF', 'WHICH', 'THE', 'PRESERVING', 'PANS', 'ARE', 'USUALLY', 'MADE'] +3538-142836-0011-837: ref=['FROM', 'THIS', 'EXAMPLE', 'THE', 'PROCESS', 'OF', 'PRESERVING', 'FRUITS', 'BY', 'SYRUP', 'WILL', 'BE', 'EASILY', 'COMPREHENDED'] +3538-142836-0011-837: hyp=['FROM', 'THIS', 'EXAMPLE', 'THE', 'PROCESS', 'OF', 'PRESERVING', 'FRUITS', 'BY', 'SYRUP', 'WOULD', 'BE', 'EASILY', 'COMPREHENDED'] +3538-142836-0012-838: ref=['THEY', 'SHOULD', 'BE', 'DRIED', 'IN', 'THE', 'STOVE', 'OR', 'OVEN', 'ON', 'A', 'SIEVE', 'AND', 'TURNED', 'EVERY', 'SIX', 'OR', 'EIGHT', 'HOURS', 'FRESH', 'POWDERED', 'SUGAR', 'BEING', 'SIFTED', 'OVER', 'THEM', 'EVERY', 'TIME', 'THEY', 'ARE', 'TURNED'] +3538-142836-0012-838: hyp=['THEY', 'SHOULD', 'BE', 'DRIED', 'IN', 'A', 'STOVE', 'OR', 'OVEN', 'ON', 'A', 'SIEVE', 'AND', 'TURNED', 'EVERY', 'SIX', 'OR', 'EIGHT', 'HOURS', 'FRESH', 'POWDERED', 'SUGAR', 'BEING', 'SIFTED', 'OVER', 'THEM', 'EVERY', 'TIME', 'THEY', 'ARE', 'TURNED'] +3538-142836-0013-839: ref=['IN', 'THIS', 'WAY', 'IT', 'IS', 'ALSO', 'THAT', 'ORANGE', 'AND', 'LEMON', 'CHIPS', 'ARE', 'PRESERVED'] +3538-142836-0013-839: hyp=['IN', 'THIS', 'WAY', 'IT', 'IS', 'ALSO', 'THAT', 'ORANGE', 'AND', 'LEMONSHIPS', 'ARE', 'PRESERVED'] +3538-142836-0014-840: ref=['MARMALADES', 'JAMS', 'AND', 'FRUIT', 'PASTES', 'ARE', 'OF', 'THE', 'SAME', 'NATURE', 'AND', 'ARE', 'NOW', 'IN', 'VERY', 'GENERAL', 'REQUEST'] +3538-142836-0014-840: hyp=['MARMALADES', 'JAMS', 'AND', 'FRUIT', 'PASTE', 'ARE', 'OF', 'THE', 'SAME', 'NATURE', 'AND', 'ARE', 'NOW', 'IN', 'VERY', 'GENERAL', 'REQUEST'] +3538-142836-0015-841: ref=['MARMALADES', 'AND', 'JAMS', 'DIFFER', 'LITTLE', 'FROM', 'EACH', 'OTHER', 'THEY', 'ARE', 'PRESERVES', 'OF', 'A', 'HALF', 'LIQUID', 'CONSISTENCY', 'MADE', 'BY', 'BOILING', 'THE', 'PULP', 'OF', 'FRUITS', 'AND', 'SOMETIMES', 'PART', 'OF', 'THE', 'RINDS', 'WITH', 'SUGAR'] +3538-142836-0015-841: hyp=['MARMALADES', 'AND', 'JAMS', 'DIFFER', 'LITTLE', 'FROM', 'EACH', 'OTHER', 'THEY', 'ARE', 'PRESERVES', 'OF', 'HALF', 'LIQUID', 'CONSISTENCY', 'MADE', 'BY', 'BOILING', 'THE', 'PULP', 'OF', 'FRUITS', 'AND', 'SOMETIMES', 'PART', 'OF', 'THE', 'RINDS', 'WITH', 'SUGAR'] +3538-142836-0016-842: ref=['THAT', 'THEY', 'MAY', 'KEEP', 'IT', 'IS', 'NECESSARY', 'NOT', 'TO', 'BE', 'SPARING', 'OF', 'SUGAR', 'FIFTEEN', 'O', 'THREE'] +3538-142836-0016-842: hyp=['THAT', 'THEY', 'MAY', 'KEEP', 'IT', 'IS', 'NECESSARY', 'NOT', 'TO', 'BE', 'SPARING', 'OF', 'SUGAR', 'FIFTEEN', 'O', 'THREE'] +3538-142836-0017-843: ref=['IN', 'ALL', 'THE', 'OPERATIONS', 'FOR', 'PRESERVE', 'MAKING', 'WHEN', 'THE', 'PRESERVING', 'PAN', 'IS', 'USED', 'IT', 'SHOULD', 'NOT', 'BE', 'PLACED', 'ON', 'THE', 'FIRE', 'BUT', 'ON', 'A', 'TRIVET', 'UNLESS', 'THE', 'JAM', 'IS', 'MADE', 'ON', 'A', 'HOT', 'PLATE', 'WHEN', 'THIS', 'IS', 'NOT', 'NECESSARY'] +3538-142836-0017-843: hyp=['IN', 'ALL', 'THE', 'OPERATIONS', 'FOR', 'PRESERVE', 'MAKING', 'WHEN', 'THE', 'PRESERVING', 'PAN', 'IS', 'USED', 'IT', 'SHOULD', 'NOT', 'BE', 'PLACED', 'ON', 'THE', 'FIRE', 'BUT', 'ON', 'A', 'TRIVET', 'UNLESS', 'THE', 'JAM', 'IS', 'MADE', 'ON', 'A', 'HOT', 'PLATE', 'WHEN', 'THIS', 'IS', 'NOT', 'NECESSARY'] +3538-142836-0018-844: ref=['CONFECTIONARY', 'FIFTEEN', 'O', 'EIGHT'] +3538-142836-0018-844: hyp=['CONFECTIONERY', 'FIFTEEN', 'O', 'EIGHT'] +3538-142836-0019-845: ref=['IN', 'SPEAKING', 'OF', 'CONFECTIONARY', 'IT', 'SHOULD', 'BE', 'REMARKED', 'THAT', 'ALL', 'THE', 'VARIOUS', 'PREPARATIONS', 'ABOVE', 'NAMED', 'COME', 'STRICTLY', 'SPEAKING', 'UNDER', 'THAT', 'HEAD', 'FOR', 'THE', 'VARIOUS', 'FRUITS', 'FLOWERS', 'HERBS', 'ROOTS', 'AND', 'JUICES', 'WHICH', 'WHEN', 'BOILED', 'WITH', 'SUGAR', 'WERE', 'FORMERLY', 'EMPLOYED', 'IN', 'PHARMACY', 'AS', 'WELL', 'AS', 'FOR', 'SWEETMEATS', 'WERE', 'CALLED', 'CONFECTIONS', 'FROM', 'THE', 'LATIN', 'WORD', 'CONFICERE', 'TO', 'MAKE', 'UP', 'BUT', 'THE', 'TERM', 'CONFECTIONARY', 'EMBRACES', 'A', 'VERY', 'LARGE', 'CLASS', 'INDEED', 'OF', 'SWEET', 'FOOD', 'MANY', 'KINDS', 'OF', 'WHICH', 'SHOULD', 'NOT', 'BE', 'ATTEMPTED', 'IN', 'THE', 'ORDINARY', 'CUISINE'] +3538-142836-0019-845: hyp=['IN', 'SPEAKING', 'OF', 'CONFECTIONERY', 'IT', 'SHOULD', 'BE', 'REMARKED', 'THAT', 'ALL', 'THE', 'VARIOUS', 'PREPARATIONS', 'ABOVE', 'NAMED', 'COME', 'STRICTLY', 'SPEAKING', 'UNDER', 'THAT', 'HEAD', 'FOR', 'THE', 'VARIOUS', 'FRUITS', 'FLOWERS', 'HERBS', 'ROOTS', 'AND', 'JUICES', 'WHICH', 'WHEN', 'BOILED', 'WITH', 'SUGAR', 'WERE', 'FORMERLY', 'EMPLOYED', 'IN', 'PHARMACY', 'AS', 'WELL', 'AS', 'FOR', 'SWEETMEATS', 'WERE', 'CALLED', 'CONFECTIONS', 'FROM', 'THE', 'LATIN', 'WORD', 'CONFIRCET', 'TO', 'MAKE', 'UP', 'BUT', 'THE', 'TERM', 'CONFECTIONERY', 'EMBRACES', 'A', 'LARGE', 'CLASS', 'INDEED', 'OF', 'SWEET', 'FOOD', 'MANY', 'KINDS', 'OF', 'WHICH', 'SHOULD', 'NOT', 'BE', 'ATTEMPTED', 'IN', 'THE', 'ORDINARY', 'CUISINE'] +3538-142836-0020-846: ref=['THE', 'THOUSAND', 'AND', 'ONE', 'ORNAMENTAL', 'DISHES', 'THAT', 'ADORN', 'THE', 'TABLES', 'OF', 'THE', 'WEALTHY', 'SHOULD', 'BE', 'PURCHASED', 'FROM', 'THE', 'CONFECTIONER', 'THEY', 'CANNOT', 'PROFITABLY', 'BE', 'MADE', 'AT', 'HOME'] +3538-142836-0020-846: hyp=['THE', 'THOUSAND', 'AND', 'ONE', 'ORNAMENTAL', 'DISHES', 'THAT', 'ADORN', 'THE', 'TABLES', 'OF', 'THE', 'WEALTHY', 'SHOULD', 'BE', 'PURCHASED', 'FROM', 'THE', 'CONFECTIONER', 'THEY', 'CAN', 'NOT', 'PROFITABLY', 'BE', 'MADE', 'AT', 'HOME'] +3538-142836-0021-847: ref=['HOWEVER', 'AS', 'LATE', 'AS', 'THE', 'REIGNS', 'OF', 'OUR', 'TWO', 'LAST', 'GEORGES', 'FABULOUS', 'SUMS', 'WERE', 'OFTEN', 'EXPENDED', 'UPON', 'FANCIFUL', 'DESSERTS'] +3538-142836-0021-847: hyp=['HOWEVER', 'AS', 'LATE', 'AS', 'THE', 'REIGN', 'OF', 'OUR', 'TWO', 'LAST', 'GEORGES', 'FABULOUS', 'SUMS', 'WERE', 'OFTEN', 'EXPENDED', 'UPON', 'FANCIFUL', 'DESERTS'] +3538-142836-0022-848: ref=['THE', 'SHAPE', 'OF', 'THE', 'DISHES', 'VARIES', 'AT', 'DIFFERENT', 'PERIODS', 'THE', 'PREVAILING', 'FASHION', 'AT', 'PRESENT', 'BEING', 'OVAL', 'AND', 'CIRCULAR', 'DISHES', 'ON', 'STEMS'] +3538-142836-0022-848: hyp=['THE', 'SHAPE', 'OF', 'THE', 'DISHES', 'VARIES', 'AT', 'DIFFERENT', 'PERIODS', 'THE', 'PREVAILING', 'FASHION', 'AT', 'PRESENT', 'BEING', 'OVAL', 'AND', 'CIRCULAR', 'DISHES', 'ON', 'STEMS'] +3538-142836-0023-849: ref=['ICES'] +3538-142836-0023-849: hyp=['ISIS'] +3538-142836-0024-850: ref=['AT', 'DESSERTS', 'OR', 'AT', 'SOME', 'EVENING', 'PARTIES', 'ICES', 'ARE', 'SCARCELY', 'TO', 'BE', 'DISPENSED', 'WITH'] +3538-142836-0024-850: hyp=['AT', 'DESSERTS', 'OR', 'AT', 'SOME', 'EVENING', 'PARTIES', 'ICES', 'ARE', 'SCARCELY', 'TO', 'BE', 'DISPENSED', 'WITH'] +3538-142836-0025-851: ref=['THE', 'SPADDLE', 'IS', 'GENERALLY', 'MADE', 'OF', 'COPPER', 'KEPT', 'BRIGHT', 'AND', 'CLEAN'] +3538-142836-0025-851: hyp=['THE', 'SPATTLE', 'IS', 'GENERALLY', 'MADE', 'OF', 'COPPER', 'KEPT', 'BRIGHT', 'AND', 'CLEAN'] +3538-142836-0026-852: ref=['THEY', 'SHOULD', 'BE', 'TAKEN', 'IMMEDIATELY', 'AFTER', 'THE', 'REPAST', 'OR', 'SOME', 'HOURS', 'AFTER', 'BECAUSE', 'THE', 'TAKING', 'THESE', 'SUBSTANCES', 'DURING', 'THE', 'PROCESS', 'OF', 'DIGESTION', 'IS', 'APT', 'TO', 'PROVOKE', 'INDISPOSITION'] +3538-142836-0026-852: hyp=['THEY', 'SHOULD', 'BE', 'TAKEN', 'IMMEDIATELY', 'AFTER', 'THE', 'REPAST', 'OR', 'SOME', 'HOURS', 'AFTER', 'BECAUSE', 'THE', 'TAKING', 'OF', 'THESE', 'SUBSTANCES', 'DURING', 'THE', 'PROCESS', 'OF', 'DIGESTION', 'IS', 'APT', 'TO', 'PROVOKE', 'INDISPOSITION'] +3538-163619-0000-853: ref=['THERE', 'WAS', 'ONCE', 'ON', 'A', 'TIME', 'A', 'WIDOWER', 'WHO', 'HAD', 'A', 'SON', 'AND', 'A', 'DAUGHTER', 'BY', 'HIS', 'FIRST', 'WIFE'] +3538-163619-0000-853: hyp=['THERE', 'WAS', 'ONCE', 'ON', 'A', 'TIME', 'A', 'WIDOWER', 'WHO', 'HAD', 'A', 'SON', 'AND', 'A', 'DAUGHTER', 'BY', 'HIS', 'FIRST', 'WIFE'] +3538-163619-0001-854: ref=['FROM', 'THE', 'VERY', 'DAY', 'THAT', 'THE', 'NEW', 'WIFE', 'CAME', 'INTO', 'THE', 'HOUSE', 'THERE', 'WAS', 'NO', 'PEACE', 'FOR', 'THE', "MAN'S", 'CHILDREN', 'AND', 'NOT', 'A', 'CORNER', 'TO', 'BE', 'FOUND', 'WHERE', 'THEY', 'COULD', 'GET', 'ANY', 'REST', 'SO', 'THE', 'BOY', 'THOUGHT', 'THAT', 'THE', 'BEST', 'THING', 'HE', 'COULD', 'DO', 'WAS', 'TO', 'GO', 'OUT', 'INTO', 'THE', 'WORLD', 'AND', 'TRY', 'TO', 'EARN', 'HIS', 'OWN', 'BREAD'] +3538-163619-0001-854: hyp=['FROM', 'THE', 'VERY', 'DAY', 'THAT', 'THE', 'NEW', 'WIFE', 'CAME', 'INTO', 'THE', 'HOUSE', 'THERE', 'WAS', 'NO', 'PEACE', 'FOR', 'THE', "MAN'S", 'CHILDREN', 'AND', 'NOT', 'A', 'CORNER', 'TO', 'BE', 'FOUND', 'WHERE', 'THEY', 'COULD', 'GET', 'ANY', 'REST', 'SO', 'THE', 'BOY', 'THOUGHT', 'THAT', 'THE', 'BEST', 'THING', 'HE', 'COULD', 'DO', 'WAS', 'TO', 'GO', 'OUT', 'INTO', 'THE', 'WORLD', 'AND', 'TRY', 'TO', 'EARN', 'HIS', 'OWN', 'BREAD'] +3538-163619-0002-855: ref=['BUT', 'HIS', 'SISTER', 'WHO', 'WAS', 'STILL', 'AT', 'HOME', 'FARED', 'WORSE', 'AND', 'WORSE'] +3538-163619-0002-855: hyp=['BUT', 'HIS', 'SISTER', 'WHO', 'WAS', 'STILL', 'AT', 'HOME', 'FARED', 'WORSE', 'AND', 'WORSE'] +3538-163619-0003-856: ref=['KISS', 'ME', 'GIRL', 'SAID', 'THE', 'HEAD'] +3538-163619-0003-856: hyp=['KISS', 'ME', 'GIRL', 'SAID', 'THE', 'HEAD'] +3538-163619-0004-857: ref=['WHEN', 'THE', 'KING', 'ENTERED', 'AND', 'SAW', 'IT', 'HE', 'STOOD', 'STILL', 'AS', 'IF', 'HE', 'WERE', 'IN', 'FETTERS', 'AND', 'COULD', 'NOT', 'STIR', 'FROM', 'THE', 'SPOT', 'FOR', 'THE', 'PICTURE', 'SEEMED', 'TO', 'HIM', 'SO', 'BEAUTIFUL'] +3538-163619-0004-857: hyp=['WHEN', 'THE', 'KING', 'ENTERED', 'AND', 'SAW', 'IT', 'HE', 'STOOD', 'STILL', 'AS', 'IF', 'HE', 'WERE', 'IN', 'FETTERS', 'AND', 'COULD', 'NOT', 'STIR', 'FROM', 'THE', 'SPOT', 'FOR', 'THE', 'PICTURE', 'SEEMED', 'TO', 'HIM', 'SO', 'BEAUTIFUL'] +3538-163619-0005-858: ref=['THE', 'YOUTH', 'PROMISED', 'TO', 'MAKE', 'ALL', 'THE', 'HASTE', 'HE', 'COULD', 'AND', 'SET', 'FORTH', 'FROM', 'THE', "KING'S", 'PALACE'] +3538-163619-0005-858: hyp=['THE', 'YOUTH', 'PROMISED', 'TO', 'MAKE', 'ALL', 'THE', 'HASTE', 'HE', 'COULD', 'AND', 'SET', 'FORTH', 'FROM', 'THE', "KING'S", 'PALACE'] +3538-163619-0006-859: ref=['AT', 'LAST', 'THEY', 'CAME', 'IN', 'SIGHT', 'OF', 'LAND'] +3538-163619-0006-859: hyp=['AT', 'LAST', 'THEY', 'CAME', 'IN', 'SIGHT', 'OF', 'LAND'] +3538-163619-0007-860: ref=['WELL', 'IF', 'MY', 'BROTHER', 'SAYS', 'SO', 'I', 'MUST', 'DO', 'IT', 'SAID', 'THE', "MAN'S", 'DAUGHTER', 'AND', 'SHE', 'FLUNG', 'HER', 'CASKET', 'INTO', 'THE', 'SEA'] +3538-163619-0007-860: hyp=['WELL', 'IF', 'MY', 'BROTHER', 'SAYS', 'SO', 'I', 'MUST', 'DO', 'IT', 'SAID', 'THE', "MAN'S", 'DAUGHTER', 'AND', 'SHE', 'FLUNG', 'HER', 'CASKET', 'INTO', 'THE', 'SEA'] +3538-163619-0008-861: ref=['WHAT', 'IS', 'MY', 'BROTHER', 'SAYING', 'ASKED', 'HIS', 'SISTER', 'AGAIN'] +3538-163619-0008-861: hyp=['WHAT', 'IS', 'MY', 'BROTHER', 'SAYING', 'ASKED', 'HIS', 'SISTER', 'AGAIN'] +3538-163619-0009-862: ref=['ON', 'THE', 'FIRST', 'THURSDAY', 'NIGHT', 'AFTER', 'THIS', 'A', 'BEAUTIFUL', 'MAIDEN', 'CAME', 'INTO', 'THE', 'KITCHEN', 'OF', 'THE', 'PALACE', 'AND', 'BEGGED', 'THE', 'KITCHEN', 'MAID', 'WHO', 'SLEPT', 'THERE', 'TO', 'LEND', 'HER', 'A', 'BRUSH'] +3538-163619-0009-862: hyp=['ON', 'THE', 'FIRST', 'THURSDAY', 'NIGHT', 'AFTER', 'THIS', 'A', 'BEAUTIFUL', 'MAIDEN', 'CAME', 'INTO', 'THE', 'KITCHEN', 'OF', 'THE', 'PALACE', 'AND', 'BEGGED', 'THE', 'KITCHEN', 'MAID', 'WHO', 'SLEPT', 'THERE', 'TO', 'LEND', 'HER', 'A', 'BRUSH'] +3538-163619-0010-863: ref=['SHE', 'BEGGED', 'VERY', 'PRETTILY', 'AND', 'GOT', 'IT', 'AND', 'THEN', 'SHE', 'BRUSHED', 'HER', 'HAIR', 'AND', 'THE', 'GOLD', 'DROPPED', 'FROM', 'IT'] +3538-163619-0010-863: hyp=['SHE', 'BEGGED', 'VERY', 'PRETTILY', 'AND', 'GOT', 'IT', 'AND', 'THEN', 'SHE', 'BRUSHED', 'HER', 'HAIR', 'AND', 'THE', 'GOLD', 'DROPPED', 'FROM', 'IT'] +3538-163619-0011-864: ref=['OUT', 'ON', 'THEE', 'UGLY', 'BUSHY', 'BRIDE', 'SLEEPING', 'SO', 'SOFT', 'BY', 'THE', 'YOUNG', "KING'S", 'SIDE', 'ON', 'SAND', 'AND', 'STONES', 'MY', 'BED', 'I', 'MAKE', 'AND', 'MY', 'BROTHER', 'SLEEPS', 'WITH', 'THE', 'COLD', 'SNAKE', 'UNPITIED', 'AND', 'UNWEPT'] +3538-163619-0011-864: hyp=['OUT', 'ON', 'ME', 'UGLY', 'BUSHY', 'BRIDE', 'SLEEPING', 'SO', 'SOFT', 'BY', 'THE', 'YOUNG', "KING'S", 'SIDE', 'ON', 'SAND', 'AND', 'STONES', 'MY', 'BED', 'I', 'MAKE', 'AND', 'MY', 'BROTHER', 'SLEEPS', 'WITH', 'A', 'COLD', 'SNAKE', 'UNPITIED', 'AND', 'UNWEPT'] +3538-163619-0012-865: ref=['I', 'SHALL', 'COME', 'TWICE', 'MORE', 'AND', 'THEN', 'NEVER', 'AGAIN', 'SAID', 'SHE'] +3538-163619-0012-865: hyp=['I', 'SHALL', 'COME', 'TWICE', 'MORE', 'AND', 'THEN', 'NEVER', 'AGAIN', 'SAID', 'SHE'] +3538-163619-0013-866: ref=['THIS', 'TIME', 'ALSO', 'AS', 'BEFORE', 'SHE', 'BORROWED', 'A', 'BRUSH', 'AND', 'BRUSHED', 'HER', 'HAIR', 'WITH', 'IT', 'AND', 'THE', 'GOLD', 'DROPPED', 'DOWN', 'AS', 'SHE', 'DID', 'IT', 'AND', 'AGAIN', 'SHE', 'SENT', 'THE', 'DOG', 'OUT', 'THREE', 'TIMES', 'AND', 'WHEN', 'DAY', 'DAWNED', 'SHE', 'DEPARTED', 'BUT', 'AS', 'SHE', 'WAS', 'GOING', 'SHE', 'SAID', 'AS', 'SHE', 'HAD', 'SAID', 'BEFORE', 'I', 'SHALL', 'COME', 'ONCE', 'MORE', 'AND', 'THEN', 'NEVER', 'AGAIN'] +3538-163619-0013-866: hyp=['THIS', 'TIME', 'ALSO', 'AS', 'BEFORE', 'SHE', 'BORROWED', 'A', 'BRUSH', 'AND', 'BRUSHED', 'HER', 'HAIR', 'WITH', 'IT', 'AND', 'THE', 'GOLD', 'DROPPED', 'DOWN', 'AS', 'SHE', 'DID', 'IT', 'AND', 'AGAIN', 'SHE', 'SENT', 'THE', 'DOG', 'OUT', 'THREE', 'TIMES', 'AND', 'WHEN', 'DAY', 'DAWNED', 'SHE', 'DEPARTED', 'BUT', 'AS', 'SHE', 'WAS', 'GOING', 'SHE', 'SAID', 'AS', 'SHE', 'HAD', 'SAID', 'BEFORE', 'I', 'SHALL', 'COME', 'ONCE', 'MORE', 'AND', 'THEN', 'NEVER', 'AGAIN'] +3538-163619-0014-867: ref=['NO', 'ONE', 'CAN', 'TELL', 'HOW', 'DELIGHTED', 'THE', 'KING', 'WAS', 'TO', 'GET', 'RID', 'OF', 'THAT', 'HIDEOUS', 'BUSHY', 'BRIDE', 'AND', 'GET', 'A', 'QUEEN', 'WHO', 'WAS', 'BRIGHT', 'AND', 'BEAUTIFUL', 'AS', 'DAY', 'ITSELF'] +3538-163619-0014-867: hyp=['NO', 'ONE', 'CAN', 'TELL', 'HOW', 'DELIGHTED', 'THE', 'KING', 'WAS', 'TO', 'GET', 'RID', 'OF', 'THAT', 'HIDEOUS', 'BUSHY', 'BRIDE', 'AND', 'GET', 'A', 'QUEEN', 'WHO', 'WAS', 'BRIGHT', 'AND', 'BEAUTIFUL', 'AS', 'DAY', 'ITSELF'] +3538-163622-0000-868: ref=['WILT', 'THOU', 'SERVE', 'ME', 'AND', 'WATCH', 'MY', 'SEVEN', 'FOALS', 'ASKED', 'THE', 'KING'] +3538-163622-0000-868: hyp=['WILT', 'THOU', 'SERVE', 'ME', 'AND', 'WATCH', 'MY', 'SEVEN', 'FOALS', 'ASKED', 'THE', 'KING'] +3538-163622-0001-869: ref=['THE', 'YOUTH', 'THOUGHT', 'THAT', 'IT', 'WAS', 'VERY', 'EASY', 'WORK', 'TO', 'WATCH', 'THE', 'FOALS', 'AND', 'THAT', 'HE', 'COULD', 'DO', 'IT', 'WELL', 'ENOUGH'] +3538-163622-0001-869: hyp=['THE', 'YOUTH', 'THOUGHT', 'THAT', 'IT', 'WAS', 'VERY', 'EASY', 'WORK', 'TO', 'WATCH', 'THE', 'HOLES', 'AND', 'THAT', 'HE', 'COULD', 'DO', 'IT', 'WELL', 'ENOUGH'] +3538-163622-0002-870: ref=['HAST', 'THOU', 'WATCHED', 'FAITHFULLY', 'AND', 'WELL', 'THE', 'WHOLE', 'DAY', 'LONG', 'SAID', 'THE', 'KING', 'WHEN', 'THE', 'LAD', 'CAME', 'INTO', 'HIS', 'PRESENCE', 'IN', 'THE', 'EVENING'] +3538-163622-0002-870: hyp=['HAST', 'THOU', 'WATCHED', 'FAITHFULLY', 'AND', 'WELL', 'THE', 'WHOLE', 'DAY', 'LONG', 'SAID', 'THE', 'KING', 'WHEN', 'THE', 'LAD', 'CAME', 'INTO', 'HIS', 'PRESENCE', 'IN', 'THE', 'EVENING'] +3538-163622-0003-871: ref=['YES', 'THAT', 'I', 'HAVE', 'SAID', 'THE', 'YOUTH'] +3538-163622-0003-871: hyp=['YES', 'THAT', 'I', 'HAVE', 'SAID', 'THE', 'YOUTH'] +3538-163622-0004-872: ref=['HE', 'HAD', 'GONE', 'OUT', 'ONCE', 'TO', 'SEEK', 'A', 'PLACE', 'HE', 'SAID', 'BUT', 'NEVER', 'WOULD', 'HE', 'DO', 'SUCH', 'A', 'THING', 'AGAIN'] +3538-163622-0004-872: hyp=['HE', 'HAD', 'GONE', 'OUT', 'ONCE', 'TO', 'SEEK', 'A', 'PLACE', 'HE', 'SAID', 'BUT', 'NEVER', 'WOULD', 'HE', 'DO', 'SUCH', 'A', 'THING', 'AGAIN'] +3538-163622-0005-873: ref=['THEN', 'THE', 'KING', 'PROMISED', 'HIM', 'THE', 'SAME', 'PUNISHMENT', 'AND', 'THE', 'SAME', 'REWARD', 'THAT', 'HE', 'HAD', 'PROMISED', 'HIS', 'BROTHER'] +3538-163622-0005-873: hyp=['THEN', 'THE', 'KING', 'PROMISED', 'HIM', 'THE', 'SAME', 'PUNISHMENT', 'AND', 'THE', 'SAME', 'REWARD', 'THAT', 'HE', 'HAD', 'PROMISED', 'HIS', 'BROTHER'] +3538-163622-0006-874: ref=['WHEN', 'HE', 'HAD', 'RUN', 'AFTER', 'THE', 'FOALS', 'FOR', 'A', 'LONG', 'LONG', 'TIME', 'AND', 'WAS', 'HOT', 'AND', 'TIRED', 'HE', 'PASSED', 'BY', 'A', 'CLEFT', 'IN', 'THE', 'ROCK', 'WHERE', 'AN', 'OLD', 'WOMAN', 'WAS', 'SITTING', 'SPINNING', 'WITH', 'A', 'DISTAFF', 'AND', 'SHE', 'CALLED', 'TO', 'HIM'] +3538-163622-0006-874: hyp=['WHEN', 'HE', 'HAD', 'RUN', 'AFTER', 'THE', 'FOALS', 'FOR', 'A', 'LONG', 'LONG', 'TIME', 'AND', 'WAS', 'HOT', 'AND', 'TIRED', 'HE', 'PASSED', 'BY', 'A', 'CLEFT', 'IN', 'THE', 'ROCK', 'WHERE', 'AN', 'OLD', 'WOMAN', 'WAS', 'SITTING', 'SPINNING', 'WITH', 'A', 'DISTAFF', 'AND', 'SHE', 'CALLED', 'TO', 'HIM'] +3538-163622-0007-875: ref=['COME', 'HITHER', 'COME', 'HITHER', 'MY', 'HANDSOME', 'SON', 'AND', 'LET', 'ME', 'COMB', 'YOUR', 'HAIR'] +3538-163622-0007-875: hyp=['COME', 'HITHER', 'COME', 'HITHER', 'MY', 'HANDSOME', 'SON', 'AND', 'LET', 'ME', 'COMB', 'YOUR', 'HAIR'] +3538-163622-0008-876: ref=['THE', 'YOUTH', 'LIKED', 'THE', 'THOUGHT', 'OF', 'THIS', 'LET', 'THE', 'FOALS', 'RUN', 'WHERE', 'THEY', 'CHOSE', 'AND', 'SEATED', 'HIMSELF', 'IN', 'THE', 'CLEFT', 'OF', 'THE', 'ROCK', 'BY', 'THE', 'SIDE', 'OF', 'THE', 'OLD', 'HAG'] +3538-163622-0008-876: hyp=['THE', 'YOUTH', 'LIKED', 'THE', 'THOUGHT', 'OF', 'THIS', 'LET', 'THE', 'FOALS', 'RUN', 'WHERE', 'THEY', 'CHOSE', 'AND', 'SEATED', 'HIMSELF', 'IN', 'THE', 'CLEFT', 'OF', 'THE', 'ROCK', 'BY', 'THE', 'SIDE', 'OF', 'THE', 'OLD', 'HAG'] +3538-163622-0009-877: ref=['SO', 'THERE', 'HE', 'SAT', 'WITH', 'HIS', 'HEAD', 'ON', 'HER', 'LAP', 'TAKING', 'HIS', 'EASE', 'THE', 'LIVELONG', 'DAY'] +3538-163622-0009-877: hyp=['SO', 'THERE', 'HE', 'SAT', 'WITH', 'HIS', 'HEAD', 'ON', 'HER', 'LAP', 'TAKING', 'HIS', 'EASE', 'THE', 'LIVELONG', 'DAY'] +3538-163622-0010-878: ref=['ON', 'THE', 'THIRD', 'DAY', 'CINDERLAD', 'WANTED', 'TO', 'SET', 'OUT'] +3538-163622-0010-878: hyp=['ON', 'THE', 'THIRD', 'DAY', 'SAID', 'THE', 'LAD', 'WANTED', 'TO', 'SET', 'OUT'] +3538-163622-0011-879: ref=['THE', 'TWO', 'BROTHERS', 'LAUGHED', 'AT', 'HIM', 'AND', 'HIS', 'FATHER', 'AND', 'MOTHER', 'BEGGED', 'HIM', 'NOT', 'TO', 'GO', 'BUT', 'ALL', 'TO', 'NO', 'PURPOSE', 'AND', 'CINDERLAD', 'SET', 'OUT', 'ON', 'HIS', 'WAY'] +3538-163622-0011-879: hyp=['THE', 'TWO', 'BROTHERS', 'LAUGHED', 'AT', 'HIM', 'AND', 'HIS', 'FATHER', 'AND', 'MOTHER', 'BEGGED', 'HIM', 'NOT', 'TO', 'GO', 'BUT', 'ALL', 'TO', 'NO', 'PURPOSE', 'AND', 'SINDERLAD', 'SET', 'OUT', 'ON', 'HIS', 'WAY'] +3538-163622-0012-880: ref=['I', 'AM', 'WALKING', 'ABOUT', 'IN', 'SEARCH', 'OF', 'A', 'PLACE', 'SAID', 'CINDERLAD'] +3538-163622-0012-880: hyp=["I'M", 'WALKING', 'ABOUT', 'IN', 'SEARCH', 'OF', 'A', 'PLACE', 'SAID', 'SINDA', 'LAD'] +3538-163622-0013-881: ref=['I', 'WOULD', 'MUCH', 'RATHER', 'HAVE', 'THE', 'PRINCESS', 'SAID', 'CINDERLAD'] +3538-163622-0013-881: hyp=['I', 'WOULD', 'MUCH', 'RATHER', 'HAVE', 'THE', 'PRINCESS', 'SAID', 'CINDERLAD'] +3538-163622-0014-882: ref=['AND', 'THUS', 'THEY', 'JOURNEYED', 'ONWARDS', 'A', 'LONG', 'LONG', 'WAY'] +3538-163622-0014-882: hyp=['AND', 'THUS', 'THEY', 'JOURNEYED', 'ONWARDS', 'A', 'LONG', 'LONG', 'WAY'] +3538-163622-0015-883: ref=['WHEN', 'THEY', 'HAD', 'GONE', 'THUS', 'FOR', 'A', 'LONG', 'LONG', 'WAY', 'THE', 'FOAL', 'AGAIN', 'ASKED', 'DOST', 'THOU', 'SEE', 'ANYTHING', 'NOW'] +3538-163622-0015-883: hyp=['WHEN', 'THEY', 'HAD', 'GONE', 'THUS', 'FOR', 'A', 'LONG', 'LONG', 'WAY', 'THE', 'FOAL', 'AGAIN', 'ASKED', 'DOST', 'THOU', 'SEE', 'ANYTHING', 'NOW'] +3538-163622-0016-884: ref=['YES', 'NOW', 'I', 'SEE', 'SOMETHING', 'THAT', 'IS', 'WHITE', 'SAID', 'CINDERLAD'] +3538-163622-0016-884: hyp=['YES', 'NOW', 'I', 'SEE', 'SOMETHING', 'THAT', 'IS', 'WHITE', 'SAID', 'CINDERLAD'] +3538-163622-0017-885: ref=['IT', 'LOOKS', 'LIKE', 'THE', 'TRUNK', 'OF', 'A', 'GREAT', 'THICK', 'BIRCH', 'TREE'] +3538-163622-0017-885: hyp=['IT', 'LOOKS', 'LIKE', 'THE', 'TRUNK', 'OF', 'A', 'GREAT', 'THICK', 'BIRCH', 'TREE'] +3538-163622-0018-886: ref=['CINDERLAD', 'TRIED', 'BUT', 'COULD', 'NOT', 'DO', 'IT', 'SO', 'HE', 'HAD', 'TO', 'TAKE', 'A', 'DRAUGHT', 'FROM', 'THE', 'PITCHER', 'AND', 'THEN', 'ONE', 'MORE', 'AND', 'AFTER', 'THAT', 'STILL', 'ANOTHER', 'AND', 'THEN', 'HE', 'WAS', 'ABLE', 'TO', 'WIELD', 'THE', 'SWORD', 'WITH', 'PERFECT', 'EASE'] +3538-163622-0018-886: hyp=['SINDERLAD', 'TRIED', 'BUT', 'COULD', 'NOT', 'DO', 'IT', 'SO', 'HE', 'HAD', 'TO', 'TAKE', 'A', 'DROP', 'FROM', 'THE', 'PITCHER', 'AND', 'THEN', 'ONE', 'MORE', 'AND', 'AFTER', 'THAT', 'STILL', 'ANOTHER', 'AND', 'THEN', 'HE', 'WAS', 'ABLE', 'TO', 'WIELD', 'THE', 'SWORD', 'WITH', 'PERFECT', 'EASE'] +3538-163622-0019-887: ref=['FOR', 'WE', 'ARE', 'BROTHERS', 'OF', 'THE', 'PRINCESS', 'WHOM', 'THOU', 'ART', 'TO', 'HAVE', 'WHEN', 'THOU', 'CANST', 'TELL', 'THE', 'KING', 'WHAT', 'WE', 'EAT', 'AND', 'DRINK', 'BUT', 'THERE', 'IS', 'A', 'MIGHTY', 'TROLL', 'WHO', 'HAS', 'CAST', 'A', 'SPELL', 'OVER', 'US'] +3538-163622-0019-887: hyp=['FOR', 'WE', 'ARE', 'BROTHERS', 'OF', 'THE', 'PRINCESS', 'WHOM', 'THOU', 'ART', 'TO', 'HAVE', 'WHEN', 'THOU', 'CANST', 'TELL', 'THE', 'KING', 'WHAT', 'WE', 'EAT', 'AND', 'DRINK', 'BUT', 'THERE', 'IS', 'A', 'MIGHTY', 'TROLL', 'WHO', 'HAS', 'CAST', 'A', 'SPELL', 'OVER', 'US'] +3538-163622-0020-888: ref=['WHEN', 'THEY', 'HAD', 'TRAVELLED', 'A', 'LONG', 'LONG', 'WAY', 'THE', 'FOAL', 'SAID', 'DOST', 'THOU', 'SEE', 'ANYTHING'] +3538-163622-0020-888: hyp=['WHEN', 'THEY', 'HAD', 'TRAVELED', 'A', 'LONG', 'LONG', 'WAY', 'THE', 'FOAL', 'SAID', 'DOST', 'THOU', 'SEE', 'ANYTHING'] +3538-163622-0021-889: ref=['AND', 'NOW', 'INQUIRED', 'THE', 'FOAL', 'SEEST', 'THOU', 'NOTHING', 'NOW'] +3538-163622-0021-889: hyp=['AND', 'NOW', 'INQUIRED', 'THE', 'POLE', 'SEEST', 'THOU', 'NOTHING', 'NOW'] +3538-163622-0022-890: ref=['NOW', 'THEN', 'SAID', 'THE', 'FOAL', 'DOST', 'THOU', 'NOT', 'SEE', 'ANYTHING', 'NOW'] +3538-163622-0022-890: hyp=['NOW', 'THEN', 'SAID', 'THE', 'FOAL', 'DOST', 'THOU', 'NOT', 'SEE', 'ANYTHING', 'NOW'] +3538-163622-0023-891: ref=['THAT', 'IS', 'A', 'RIVER', 'SAID', 'THE', 'FOAL', 'AND', 'WE', 'HAVE', 'TO', 'CROSS', 'IT'] +3538-163622-0023-891: hyp=['THAT', 'IS', 'A', 'RIVER', 'SAID', 'THE', 'FOAL', 'AND', 'WE', 'HAVE', 'TO', 'CROSS', 'IT'] +3538-163622-0024-892: ref=['I', 'HAVE', 'DONE', 'MY', 'BEST', 'REPLIED', 'CINDERLAD'] +3538-163622-0024-892: hyp=['I', 'HAVE', 'DONE', 'MY', 'BEST', 'REPLIED', 'SIN', 'LAD'] +3538-163624-0000-893: ref=['ONCE', 'UPON', 'A', 'TIME', 'THERE', 'WAS', 'A', 'KING', 'IN', 'THE', 'NORTH', 'WHO', 'HAD', 'WON', 'MANY', 'WARS', 'BUT', 'NOW', 'HE', 'WAS', 'OLD'] +3538-163624-0000-893: hyp=['ONCE', 'UPON', 'A', 'TIME', 'THERE', 'WAS', 'A', 'KING', 'IN', 'THE', 'NORTH', 'WHO', 'HAD', 'WON', 'MANY', 'WARS', 'BUT', 'NOW', 'HE', 'WAS', 'OLD'] +3538-163624-0001-894: ref=['THE', 'OLD', 'KING', 'WENT', 'OUT', 'AND', 'FOUGHT', 'BRAVELY', 'BUT', 'AT', 'LAST', 'HIS', 'SWORD', 'BROKE', 'AND', 'HE', 'WAS', 'WOUNDED', 'AND', 'HIS', 'MEN', 'FLED'] +3538-163624-0001-894: hyp=['THE', 'OLD', 'KING', 'WENT', 'OUT', 'AND', 'FOUGHT', 'BRAVELY', 'BUT', 'AT', 'LAST', 'HIS', 'SWORD', 'BROKE', 'AND', 'HE', 'WAS', 'WOUNDED', 'AND', 'HIS', 'MEN', 'FLED'] +3538-163624-0002-895: ref=['BUT', 'IN', 'THE', 'NIGHT', 'WHEN', 'THE', 'BATTLE', 'WAS', 'OVER', 'HIS', 'YOUNG', 'WIFE', 'CAME', 'OUT', 'AND', 'SEARCHED', 'FOR', 'HIM', 'AMONG', 'THE', 'SLAIN', 'AND', 'AT', 'LAST', 'SHE', 'FOUND', 'HIM', 'AND', 'ASKED', 'WHETHER', 'HE', 'MIGHT', 'BE', 'HEALED'] +3538-163624-0002-895: hyp=['BUT', 'IN', 'THE', 'NIGHT', 'WHEN', 'THE', 'BATTLE', 'WAS', 'OVER', 'HIS', 'YOUNG', 'WIFE', 'CAME', 'OUT', 'AND', 'SEARCHED', 'FOR', 'HIM', 'AMONG', 'THE', 'SLAIN', 'AND', 'AT', 'LAST', 'SHE', 'FOUND', 'HIM', 'AND', 'ASKED', 'WHETHER', 'HE', 'MIGHT', 'BE', 'HEALED'] +3538-163624-0003-896: ref=['SO', 'HE', 'ASKED', 'THE', 'QUEEN', 'HOW', 'DO', 'YOU', 'KNOW', 'IN', 'THE', 'DARK', 'OF', 'NIGHT', 'WHETHER', 'THE', 'HOURS', 'ARE', 'WEARING', 'TO', 'THE', 'MORNING', 'AND', 'SHE', 'SAID'] +3538-163624-0003-896: hyp=['SO', 'HE', 'ASKED', 'THE', 'QUEEN', 'HOW', 'DO', 'YOU', 'KNOW', 'IN', 'THE', 'DARK', 'OF', 'NIGHT', 'WHETHER', 'THE', 'HOURS', 'ARE', 'WEARING', 'TO', 'THE', 'MORNING', 'AND', 'SHE', 'SAID'] +3538-163624-0004-897: ref=['THEN', 'THE', 'OLD', 'MAN', 'SAID', 'DRIVE', 'ALL', 'THE', 'HORSES', 'INTO', 'THE', 'RIVER', 'AND', 'CHOOSE', 'THE', 'ONE', 'THAT', 'SWIMS', 'ACROSS'] +3538-163624-0004-897: hyp=['THEN', 'THE', 'OLD', 'MAN', 'SAID', 'DRIVE', 'ALL', 'THE', 'HORSES', 'INTO', 'THE', 'RIVER', 'AND', 'CHOOSE', 'THE', 'ONE', 'THAT', 'SWIMS', 'ACROSS'] +3538-163624-0005-898: ref=['HE', 'IS', 'NO', 'BIGGER', 'THAN', 'OTHER', 'DRAGONS', 'SAID', 'THE', 'TUTOR', 'AND', 'IF', 'YOU', 'WERE', 'AS', 'BRAVE', 'AS', 'YOUR', 'FATHER', 'YOU', 'WOULD', 'NOT', 'FEAR', 'HIM'] +3538-163624-0005-898: hyp=['HE', 'IS', 'NO', 'BIGGER', 'THAN', 'OTHER', 'DRAGONS', 'SAID', 'THE', 'TUTOR', 'AND', 'IF', 'YOU', 'WERE', 'AS', 'BRAVE', 'AS', 'YOUR', 'FATHER', 'YOU', 'WOULD', 'NOT', 'FEAR', 'HIM'] +3538-163624-0006-899: ref=['THEN', 'THE', 'PERSON', 'WHO', 'HAD', 'KILLED', 'OTTER', 'WENT', 'DOWN', 'AND', 'CAUGHT', 'THE', 'DWARF', 'WHO', 'OWNED', 'ALL', 'THE', 'TREASURE', 'AND', 'TOOK', 'IT', 'FROM', 'HIM'] +3538-163624-0006-899: hyp=['THEN', 'THE', 'PERSON', 'WHO', 'HAD', 'KILLED', 'OTTER', 'WENT', 'DOWN', 'AND', 'CAUGHT', 'THE', 'DWARF', 'WHO', 'OWNED', 'ALL', 'THE', 'TREASURE', 'AND', 'TOOK', 'IT', 'FROM', 'HIM'] +3538-163624-0007-900: ref=['ONLY', 'ONE', 'RING', 'WAS', 'LEFT', 'WHICH', 'THE', 'DWARF', 'WORE', 'AND', 'EVEN', 'THAT', 'WAS', 'TAKEN', 'FROM', 'HIM'] +3538-163624-0007-900: hyp=['ONLY', 'ONE', 'RING', 'WAS', 'LEFT', 'WHICH', 'THE', 'DWARF', 'WORE', 'AND', 'EVEN', 'THAT', 'WAS', 'TAKEN', 'FROM', 'HIM'] +3538-163624-0008-901: ref=['SO', 'REGIN', 'MADE', 'A', 'SWORD', 'AND', 'SIGURD', 'TRIED', 'IT', 'WITH', 'A', 'BLOW', 'ON', 'A', 'LUMP', 'OF', 'IRON', 'AND', 'THE', 'SWORD', 'BROKE'] +3538-163624-0008-901: hyp=['SO', 'REGAN', 'MADE', 'A', 'SWORD', 'AND', 'SIGURD', 'TRIED', 'IT', 'WITH', 'A', 'BLOW', 'ON', 'A', 'LUMP', 'OF', 'IRON', 'AND', 'THE', 'SWORD', 'BROKE'] +3538-163624-0009-902: ref=['THEN', 'SIGURD', 'WENT', 'TO', 'HIS', 'MOTHER', 'AND', 'ASKED', 'FOR', 'THE', 'BROKEN', 'PIECES', 'OF', 'HIS', "FATHER'S", 'BLADE', 'AND', 'GAVE', 'THEM', 'TO', 'REGIN'] +3538-163624-0009-902: hyp=['THEN', 'SIGURD', 'WENT', 'TO', 'HIS', 'MOTHER', 'AND', 'ASKED', 'FOR', 'THE', 'BROKEN', 'PIECES', 'OF', 'HIS', "FATHER'S", 'BLADE', 'AND', 'GAVE', 'THEM', 'TO', 'REGAN'] +3538-163624-0010-903: ref=['SO', 'SIGURD', 'SAID', 'THAT', 'SWORD', 'WOULD', 'DO'] +3538-163624-0010-903: hyp=['SO', 'SIGURD', 'SAID', 'THAT', 'SWORD', 'WOULD', 'DO'] +3538-163624-0011-904: ref=['THEN', 'HE', 'SAW', 'THE', 'TRACK', 'WHICH', 'THE', 'DRAGON', 'MADE', 'WHEN', 'HE', 'WENT', 'TO', 'A', 'CLIFF', 'TO', 'DRINK', 'AND', 'THE', 'TRACK', 'WAS', 'AS', 'IF', 'A', 'GREAT', 'RIVER', 'HAD', 'ROLLED', 'ALONG', 'AND', 'LEFT', 'A', 'DEEP', 'VALLEY'] +3538-163624-0011-904: hyp=['THEN', 'HE', 'SAW', 'THE', 'TRACK', 'WHICH', 'THE', 'DRAGON', 'HAD', 'MADE', 'WHEN', 'HE', 'WENT', 'TO', 'A', 'CLIFF', 'TO', 'DRINK', 'AND', 'THE', 'TRACK', 'WAS', 'AS', 'IF', 'A', 'GREAT', 'RIVER', 'HAD', 'ROLLED', 'ALONG', 'AND', 'LEFT', 'A', 'DEEP', 'VALLEY'] +3538-163624-0012-905: ref=['BUT', 'SIGURD', 'WAITED', 'TILL', 'HALF', 'OF', 'HIM', 'HAD', 'CRAWLED', 'OVER', 'THE', 'PIT', 'AND', 'THEN', 'HE', 'THRUST', 'THE', 'SWORD', 'GRAM', 'RIGHT', 'INTO', 'HIS', 'VERY', 'HEART'] +3538-163624-0012-905: hyp=['BUT', 'SIGURD', 'WAITED', 'TILL', 'HALF', 'OF', 'HIM', 'HAD', 'CRAWLED', 'OVER', 'THE', 'PIT', 'AND', 'THEN', 'HE', 'THRUST', 'THE', 'SWORD', 'GRAHAM', 'RIGHT', 'INTO', 'HIS', 'VERY', 'HEART'] +3538-163624-0013-906: ref=['SIGURD', 'SAID', 'I', 'WOULD', 'TOUCH', 'NONE', 'OF', 'IT', 'IF', 'BY', 'LOSING', 'IT', 'I', 'SHOULD', 'NEVER', 'DIE'] +3538-163624-0013-906: hyp=['SIGURD', 'SAID', 'I', 'WOULD', 'TOUCH', 'NONE', 'OF', 'IT', 'IF', 'BY', 'LOSING', 'IT', 'I', 'SHOULD', 'NEVER', 'DIE'] +3538-163624-0014-907: ref=['BUT', 'ALL', 'MEN', 'DIE', 'AND', 'NO', 'BRAVE', 'MAN', 'LETS', 'DEATH', 'FRIGHTEN', 'HIM', 'FROM', 'HIS', 'DESIRE'] +3538-163624-0014-907: hyp=['BUT', 'ALL', 'MEN', 'DIE', 'AND', 'NO', 'BRAVE', 'MAN', 'LETS', 'DEATH', 'FRIGHTEN', 'HIM', 'FROM', 'HIS', 'DESIRE'] +3538-163624-0015-908: ref=['DIE', 'THOU', 'FAFNIR', 'AND', 'THEN', 'FAFNIR', 'DIED'] +3538-163624-0015-908: hyp=['DIE', 'THOU', 'FAFNER', 'AND', 'THEN', 'FAFNER', 'DIED'] +3538-163624-0016-909: ref=['THEN', 'SIGURD', 'RODE', 'BACK', 'AND', 'MET', 'REGIN', 'AND', 'REGIN', 'ASKED', 'HIM', 'TO', 'ROAST', "FAFNIR'S", 'HEART', 'AND', 'LET', 'HIM', 'TASTE', 'OF', 'IT'] +3538-163624-0016-909: hyp=['THEN', 'SIGURD', 'RODE', 'BACK', 'AND', 'MET', 'REGEN', 'AND', 'REGAN', 'ASKED', 'HIM', 'TO', 'ROAST', "FAFFNER'S", 'HEART', 'AND', 'LET', 'HIM', 'TASTE', 'OF', 'IT'] +3538-163624-0017-910: ref=['SO', 'SIGURD', 'PUT', 'THE', 'HEART', 'OF', 'FAFNIR', 'ON', 'A', 'STAKE', 'AND', 'ROASTED', 'IT'] +3538-163624-0017-910: hyp=['SO', 'SIGURD', 'PUT', 'THE', 'HEART', 'OF', 'FAFNER', 'ON', 'A', 'STEAK', 'AND', 'ROASTED', 'IT'] +3538-163624-0018-911: ref=['THERE', 'IS', 'SIGURD', 'ROASTING', "FAFNIR'S", 'HEART', 'FOR', 'ANOTHER', 'WHEN', 'HE', 'SHOULD', 'TASTE', 'OF', 'IT', 'HIMSELF', 'AND', 'LEARN', 'ALL', 'WISDOM'] +3538-163624-0018-911: hyp=['THERE', 'IS', 'SIGURD', 'ROASTING', "FAFNIR'S", 'HEART', 'FOR', 'ANOTHER', 'WHEN', 'HE', 'SHOULD', 'TASTE', 'OF', 'IT', 'HIMSELF', 'AND', 'LEARN', 'ALL', 'WISDOM'] +3538-163624-0019-912: ref=['THAT', 'LET', 'HIM', 'DO', 'AND', 'THEN', 'RIDE', 'OVER', 'HINDFELL', 'TO', 'THE', 'PLACE', 'WHERE', 'BRYNHILD', 'SLEEPS'] +3538-163624-0019-912: hyp=['THAT', 'LET', 'HIM', 'DO', 'THEN', 'RIDE', 'OVER', 'HENFELD', 'TO', 'THE', 'PLACE', 'WHERE', 'BRUNHILD', 'SLEEPS'] +3538-163624-0020-913: ref=['THERE', 'MUST', 'SHE', 'SLEEP', 'TILL', 'THOU', 'COMEST', 'FOR', 'HER', 'WAKING', 'RISE', 'UP', 'AND', 'RIDE', 'FOR', 'NOW', 'SURE', 'SHE', 'WILL', 'SWEAR', 'THE', 'VOW', 'FEARLESS', 'OF', 'BREAKING'] +3538-163624-0020-913: hyp=['THERE', 'MUST', 'SHE', 'SLEEP', 'TILL', 'THOU', 'COMES', 'FOR', 'HER', 'WAKING', 'RISE', 'UP', 'AND', 'RIDE', 'FOR', 'NOW', 'SURE', 'SHE', 'WILL', 'SWEAR', 'THE', 'VOW', 'FEARLESS', 'OF', 'BREAKING'] +3538-163624-0021-914: ref=['THEN', 'HE', 'TOOK', 'THE', 'HELMET', 'OFF', 'THE', 'HEAD', 'OF', 'THE', 'SLEEPER', 'AND', 'BEHOLD', 'SHE', 'WAS', 'A', 'MOST', 'BEAUTIFUL', 'LADY'] +3538-163624-0021-914: hyp=['THEN', 'HE', 'TOOK', 'THE', 'HELMET', 'OFF', 'THE', 'HEAD', 'OF', 'THE', 'SLEEPER', 'AND', 'BEHOLD', 'SHE', 'WAS', 'A', 'MOST', 'BEAUTIFUL', 'LADY'] +3538-163624-0022-915: ref=['THEN', 'SIGURD', 'RODE', 'AWAY', 'AND', 'HE', 'CAME', 'TO', 'THE', 'HOUSE', 'OF', 'A', 'KING', 'WHO', 'HAD', 'A', 'FAIR', 'DAUGHTER'] +3538-163624-0022-915: hyp=['THEN', 'SIGURD', 'RODE', 'AWAY', 'AND', 'HE', 'CAME', 'TO', 'THE', 'HOUSE', 'OF', 'A', 'KING', 'WHO', 'HAD', 'A', 'FAIR', 'DAUGHTER'] +3538-163624-0023-916: ref=['THEN', "BRYNHILD'S", 'FATHER', 'TOLD', 'GUNNAR', 'THAT', 'SHE', 'WOULD', 'MARRY', 'NONE', 'BUT', 'HIM', 'WHO', 'COULD', 'RIDE', 'THE', 'FLAME', 'IN', 'FRONT', 'OF', 'HER', 'ENCHANTED', 'TOWER', 'AND', 'THITHER', 'THEY', 'RODE', 'AND', 'GUNNAR', 'SET', 'HIS', 'HORSE', 'AT', 'THE', 'FLAME', 'BUT', 'HE', 'WOULD', 'NOT', 'FACE', 'IT'] +3538-163624-0023-916: hyp=['LIM', "BRUNHILD'S", 'FATHER', 'TOLD', 'GUNNER', 'THAT', 'SHE', 'WOULD', 'MARRY', 'NONE', 'BUT', 'HIM', 'WHO', 'COULD', 'RIDE', 'THE', 'FLAME', 'IN', 'FRONT', 'OF', 'HER', 'ENCHANTED', 'TOWER', 'AND', 'THITHER', 'THEY', 'RODE', 'AND', 'GUNNER', 'SET', 'HIS', 'HORSE', 'TO', 'THE', 'FLAME', 'BUT', 'HE', 'WOULD', 'NOT', 'FACE', 'IT'] +3538-163624-0024-917: ref=['FOR', 'ONE', 'DAY', 'WHEN', 'BRYNHILD', 'AND', 'GUDRUN', 'WERE', 'BATHING', 'BRYNHILD', 'WADED', 'FARTHEST', 'OUT', 'INTO', 'THE', 'RIVER', 'AND', 'SAID', 'SHE', 'DID', 'THAT', 'TO', 'SHOW', 'SHE', 'WAS', "GUIRUN'S", 'SUPERIOR'] +3538-163624-0024-917: hyp=['FOR', 'ONE', 'DAY', 'WHEN', 'BEURNHILD', 'AND', 'GUNDRAN', 'WERE', 'BATHING', 'BURNHILD', 'WADED', 'FARTHEST', 'OUT', 'INTO', 'THE', 'RIVER', 'AND', 'SAID', 'SHE', 'DID', 'THAT', 'TO', 'SHOW', 'SHE', 'WAS', "GUNDERN'S", 'SUPERIOR'] +3538-163624-0025-918: ref=['FOR', 'HER', 'HUSBAND', 'SHE', 'SAID', 'HAD', 'RIDDEN', 'THROUGH', 'THE', 'FLAME', 'WHEN', 'NO', 'OTHER', 'MAN', 'DARED', 'FACE', 'IT'] +3538-163624-0025-918: hyp=['FOR', 'HER', 'HUSBAND', 'SHE', 'SAID', 'HAD', 'RIDDEN', 'THROUGH', 'THE', 'FLAME', 'WHEN', 'NO', 'OTHER', 'MAN', 'DARED', 'FACE', 'IT'] +3538-163624-0026-919: ref=['NOT', 'LONG', 'TO', 'WAIT', 'HE', 'SAID', 'TILL', 'THE', 'BITTER', 'SWORD', 'STANDS', 'FAST', 'IN', 'MY', 'HEART', 'AND', 'THOU', 'WILL', 'NOT', 'LIVE', 'LONG', 'WHEN', 'I', 'AM', 'DEAD'] +3538-163624-0026-919: hyp=['NOT', 'LONG', 'TO', 'WAIT', 'HE', 'SAID', 'TILL', 'THE', 'BITTER', 'SWORD', 'STANDS', 'FAST', 'IN', 'MY', 'HEART', 'AND', 'THOU', 'WILT', 'NOT', 'LIVE', 'LONG', 'WHEN', 'I', 'AM', 'DEAD'] +367-130732-0000-920: ref=['LOBSTERS', 'AND', 'LOBSTERS'] +367-130732-0000-920: hyp=['LOBSTERS', 'AND', 'LOBSTERS'] +367-130732-0001-921: ref=['WHEN', 'IS', 'A', 'LOBSTER', 'NOT', 'A', 'LOBSTER', 'WHEN', 'IT', 'IS', 'A', 'CRAYFISH'] +367-130732-0001-921: hyp=['WHEN', 'IS', 'A', 'LOBSTER', 'NOT', 'A', 'LOBSTER', 'WHEN', 'IT', 'IS', 'A', 'CRAYFISH'] +367-130732-0002-922: ref=['THIS', 'QUESTION', 'AND', 'ANSWER', 'MIGHT', 'WELL', 'GO', 'INTO', 'THE', 'PRIMER', 'OF', 'INFORMATION', 'FOR', 'THOSE', 'WHO', 'COME', 'TO', 'SAN', 'FRANCISCO', 'FROM', 'THE', 'EAST', 'FOR', 'WHAT', 'IS', 'CALLED', 'A', 'LOBSTER', 'IN', 'SAN', 'FRANCISCO', 'IS', 'NOT', 'A', 'LOBSTER', 'AT', 'ALL', 'BUT', 'A', 'CRAYFISH'] +367-130732-0002-922: hyp=['THIS', 'QUESTION', 'IN', 'ANSWER', 'MIGHT', 'WELL', 'GO', 'INTO', 'THE', 'PRIMARY', 'OF', 'INFORMATION', 'FOR', 'THOSE', 'WHO', 'COME', 'TO', 'SAN', 'FRANCISCO', 'FROM', 'THE', 'EAST', 'FOR', 'WHAT', 'IS', 'CALLED', 'A', 'LOBSTER', 'IN', 'SAN', 'FRANCISCO', 'IS', 'NOT', 'A', 'LOBSTER', 'AT', 'ALL', 'BUT', 'A', 'CRAYFISH'] +367-130732-0003-923: ref=['THE', 'PACIFIC', 'CRAYFISH', 'HOWEVER', 'SERVES', 'EVERY', 'PURPOSE', 'AND', 'WHILE', 'MANY', 'CONTEND', 'THAT', 'ITS', 'MEAT', 'IS', 'NOT', 'SO', 'DELICATE', 'IN', 'FLAVOR', 'AS', 'THAT', 'OF', 'ITS', 'EASTERN', 'COUSIN', 'THE', 'CALIFORNIAN', 'WILL', 'AS', 'STRENUOUSLY', 'INSIST', 'THAT', 'IT', 'IS', 'BETTER', 'BUT', 'OF', 'COURSE', 'SOMETHING', 'MUST', 'ALWAYS', 'BE', 'ALLOWED', 'FOR', 'THE', 'PATRIOTISM', 'OF', 'THE', 'CALIFORNIAN'] +367-130732-0003-923: hyp=['THE', 'PACIFIC', 'CRAYFISH', 'HOWEVER', 'SERVES', 'EVERY', 'PURPOSE', 'AND', 'WHILE', 'MANY', 'CONTEND', 'THAT', 'ITS', 'MEAT', 'IS', 'NOT', 'SO', 'DELICATE', 'IN', 'FLAVOR', 'AS', 'THAT', 'OF', 'ITS', 'EASTERN', 'COUSIN', 'THE', 'CALIFORNIAN', 'WALLA', 'STRENUOUSLY', 'INSISTS', 'THAT', 'IT', 'IS', 'BETTER', 'BUT', 'OF', 'COURSE', 'SOMETHING', 'MUST', 'ALWAYS', 'BE', 'ALLOWED', 'FOR', 'THE', 'PATRIOTISM', 'OF', 'THE', 'CALIFORNIAN'] +367-130732-0004-924: ref=['A', 'BOOK', 'COULD', 'BE', 'WRITTEN', 'ABOUT', 'THIS', 'RESTAURANT', 'AND', 'THEN', 'ALL', 'WOULD', 'NOT', 'BE', 'TOLD', 'FOR', 'ALL', 'ITS', 'SECRETS', 'CAN', 'NEVER', 'BE', 'KNOWN'] +367-130732-0004-924: hyp=['A', 'BOOK', 'COULD', 'BE', 'WRITTEN', 'ABOUT', 'THIS', 'RESTAURANT', 'AND', 'THEN', 'ALL', 'WOULD', 'NOT', 'BE', 'TOLD', 'FOR', 'ALL', 'ITS', 'SECRETS', 'CAN', 'NEVER', 'BE', 'KNOWN'] +367-130732-0005-925: ref=['IT', 'WAS', 'HERE', 'THAT', 'MOST', 'MAGNIFICENT', 'DINNERS', 'WERE', 'ARRANGED', 'IT', 'WAS', 'HERE', 'THAT', 'EXTRAORDINARY', 'DISHES', 'WERE', 'CONCOCTED', 'BY', 'CHEFS', 'OF', 'WORLD', 'WIDE', 'FAME', 'IT', 'WAS', 'HERE', 'THAT', 'LOBSTER', 'A', 'LA', 'NEWBERG', 'REACHED', 'ITS', 'HIGHEST', 'PERFECTION', 'AND', 'THIS', 'IS', 'THE', 'RECIPE', 'THAT', 'WAS', 'FOLLOWED', 'WHEN', 'IT', 'WAS', 'PREPARED', 'IN', 'THE', 'DELMONICO'] +367-130732-0005-925: hyp=['IT', 'WAS', 'HERE', 'THAT', 'MOST', 'MAGNIFICENT', 'DINNERS', 'WERE', 'ARRANGED', 'IT', 'WAS', 'HERE', 'THAT', 'EXTRAORDINARY', 'DISHES', 'WERE', 'CONCOCTED', 'BY', 'CHEFS', 'OF', 'WORLD', 'WIDE', 'FAME', 'IT', 'WAS', 'HERE', 'THAT', 'LOBSTER', 'AEUBERG', 'REACHED', 'ITS', 'HIGHEST', 'PERFECTION', 'AND', 'THIS', 'IS', 'THE', 'RECIPE', 'THAT', 'WAS', 'FOLLOWED', 'WHEN', 'IT', 'WAS', 'PREPARED', 'IN', 'THE', 'DOMONICO'] +367-130732-0006-926: ref=['LOBSTER', 'A', 'LA', 'NEWBERG'] +367-130732-0006-926: hyp=['LOBSTER', 'A', 'NEWBURG'] +367-130732-0007-927: ref=['ONE', 'POUND', 'OF', 'LOBSTER', 'MEAT', 'ONE', 'TEASPOONFUL', 'OF', 'BUTTER', 'ONE', 'HALF', 'PINT', 'OF', 'CREAM', 'YOLKS', 'OF', 'FOUR', 'EGGS', 'ONE', 'WINE', 'GLASS', 'OF', 'SHERRY', 'LOBSTER', 'FAT'] +367-130732-0007-927: hyp=['ONE', 'POUNDS', 'OF', 'LOBSTER', 'MEAT', 'ONE', 'TEASPOONFUL', 'OF', 'BUTTER', 'ONE', 'HALF', 'PINT', 'OF', 'CREAM', 'YOLKS', 'OF', 'FOUR', 'EGGS', 'ONE', 'WINE', 'GLASS', 'OF', 'SHERRY', 'LOBSTER', 'FAT'] +367-130732-0008-928: ref=['PUT', 'THIS', 'IN', 'A', 'DOUBLE', 'BOILER', 'AND', 'LET', 'COOK', 'UNTIL', 'THICK', 'STIRRING', 'CONSTANTLY'] +367-130732-0008-928: hyp=['PUT', 'THIS', 'IN', 'A', 'DOUBLE', 'BOILER', 'AND', 'LET', 'COOK', 'UNTIL', 'THICK', 'STIRRING', 'CONSTANTLY'] +367-130732-0009-929: ref=['SERVE', 'IN', 'A', 'CHAFING', 'DISH', 'WITH', 'THIN', 'SLICES', 'OF', 'DRY', 'TOAST'] +367-130732-0009-929: hyp=['SERVE', 'IN', 'A', 'CHAFING', 'DISH', 'WITH', 'THIN', 'SLICES', 'OF', 'DRY', 'TOAST'] +367-130732-0010-930: ref=['KING', 'OF', 'SHELL', 'FISH'] +367-130732-0010-930: hyp=['KING', 'OF', 'SHELLFISH'] +367-130732-0011-931: ref=['ONE', 'HAS', 'TO', 'COME', 'TO', 'SAN', 'FRANCISCO', 'TO', 'PARTAKE', 'OF', 'THE', 'KING', 'OF', 'SHELL', 'FISH', 'THE', 'MAMMOTH', 'PACIFIC', 'CRAB'] +367-130732-0011-931: hyp=['ONE', 'HAS', 'TO', 'COME', 'TO', 'SAN', 'FRANCISCO', 'TO', 'PARTAKE', 'OF', 'THE', 'KING', 'OF', 'SHELLFISH', 'THE', 'MAMMOTH', 'PACIFIC', 'CRAB'] +367-130732-0012-932: ref=['I', 'SAY', 'COME', 'TO', 'SAN', 'FRANCISCO', 'ADVISEDLY', 'FOR', 'WHILE', 'THE', 'CRAB', 'IS', 'FOUND', 'ALL', 'ALONG', 'THE', 'COAST', 'IT', 'IS', 'PREPARED', 'NOWHERE', 'SO', 'DELICIOUSLY', 'AS', 'IN', 'SAN', 'FRANCISCO'] +367-130732-0012-932: hyp=['I', 'SAY', 'COME', 'TO', 'SAN', 'FRANCISCO', 'ADVISEDLY', 'FOR', 'WHILE', 'THE', 'CRAB', 'IS', 'FOUND', 'ALL', 'ALONG', 'THE', 'COAST', 'IT', 'IS', 'PREPARED', 'NOWHERE', 'SO', 'DELICIOUSLY', 'AS', 'IN', 'SAN', 'FRANCISCO'] +367-130732-0013-933: ref=["GOBEY'S", 'PASSED', 'WITH', 'THE', 'FIRE', 'AND', 'THE', 'LITTLE', 'RESTAURANT', 'BEARING', 'HIS', 'NAME', 'AND', 'IN', 'CHARGE', 'OF', 'HIS', 'WIDOW', 'IN', 'UNION', 'SQUARE', 'AVENUE', 'HAS', 'NOT', 'ATTAINED', 'THE', 'FAME', 'OF', 'THE', 'OLD', 'PLACE'] +367-130732-0013-933: hyp=['GOBIES', 'PASSED', 'WITH', 'THE', 'FIRE', 'AND', 'THE', 'LITTLE', 'RESTAURANT', 'BEARING', 'HIS', 'NAME', 'AND', 'IN', 'CHARGE', 'OF', 'HIS', 'WIDOW', 'IN', 'UNION', 'SQUARE', 'AVENUE', 'HAS', 'NOT', 'ATTAINED', 'THE', 'FAME', 'OF', 'THE', 'OLD', 'PLACE'] +367-130732-0014-934: ref=['IT', 'IS', 'POSSIBLE', 'THAT', 'SHE', 'KNOWS', 'THE', 'SECRET', 'OF', 'PREPARING', 'CRAB', 'AS', 'IT', 'WAS', 'PREPARED', 'IN', 'THE', "GOBEY'S", 'OF', 'BEFORE', 'THE', 'FIRE', 'BUT', 'HIS', 'PRESTIGE', 'DID', 'NOT', 'DESCEND', 'TO', 'HER'] +367-130732-0014-934: hyp=['IT', 'IS', 'POSSIBLE', 'THAT', 'SHE', 'KNOWS', 'THE', 'SECRET', 'OF', 'PREPARING', 'CRAB', 'AS', 'IT', 'WAS', 'PREPARED', 'IN', 'THE', 'GOBIES', 'OF', 'BEFORE', 'THE', 'FIRE', 'BUT', 'HIS', 'PRESGE', 'DID', 'NOT', 'DESCEND', 'TO', 'HER'] +367-130732-0015-935: ref=["GOBEY'S", 'CRAB', 'STEW'] +367-130732-0015-935: hyp=['GOBIES', 'CRABS', 'DO'] +367-130732-0016-936: ref=['TAKE', 'THE', 'MEAT', 'OF', 'ONE', 'LARGE', 'CRAB', 'SCRAPING', 'OUT', 'ALL', 'OF', 'THE', 'FAT', 'FROM', 'THE', 'SHELL'] +367-130732-0016-936: hyp=['TAKE', 'THE', 'MEAT', 'OF', 'ONE', 'LARGE', 'CRAB', 'SCRAPING', 'OUT', 'ALL', 'THE', 'FAT', 'FROM', 'THE', 'SHELL'] +367-130732-0017-937: ref=['SOAK', 'THE', 'CRAB', 'MEAT', 'IN', 'THE', 'SHERRY', 'TWO', 'HOURS', 'BEFORE', 'COOKING'] +367-130732-0017-937: hyp=['SOAK', 'THE', 'CRAB', 'MEAT', 'IN', 'THE', 'SHERRY', 'TWO', 'HOURS', 'BEFORE', 'COOKING'] +367-130732-0018-938: ref=['CHOP', 'FINE', 'THE', 'ONION', 'SWEET', 'PEPPER', 'AND', 'TOMATO', 'WITH', 'THE', 'ROSEMARY'] +367-130732-0018-938: hyp=['CHOP', 'FINE', 'THE', 'ONION', 'SWEET', 'PEPPER', 'AND', 'TOMATO', 'WITH', 'THE', 'ROSEMARY'] +367-130732-0019-939: ref=['HEAT', 'THIS', 'IN', 'A', 'STEWPAN', 'AND', 'WHEN', 'SIMMERING', 'ADD', 'THE', 'SHERRY', 'AND', 'CRAB', 'MEAT', 'AND', 'LET', 'ALL', 'COOK', 'TOGETHER', 'WITH', 'A', 'SLOW', 'FIRE', 'FOR', 'EIGHT', 'MINUTES'] +367-130732-0019-939: hyp=['HEAT', 'THIS', 'IN', 'A', 'STEWPAN', 'AND', 'WHEN', 'SIMMERING', 'ADD', 'THE', 'SHERRY', 'AND', 'CRAB', 'MEAT', 'AND', 'LET', 'ALL', 'COOK', 'TOGETHER', 'WITH', 'A', 'SLOW', 'FIRE', 'FOR', 'EIGHT', 'MINUTES'] +367-130732-0020-940: ref=['SERVE', 'IN', 'A', 'CHAFING', 'DISH', 'WITH', 'TOASTED', 'CRACKERS', 'OR', 'THIN', 'SLICES', 'OF', 'TOASTED', 'BREAD'] +367-130732-0020-940: hyp=['SERVE', 'IN', 'A', 'CHAFING', 'DISH', 'WITH', 'TOASTED', 'CRACKERS', 'OR', 'THIN', 'SLICES', 'OF', 'TOASTED', 'BREAD'] +367-130732-0021-941: ref=['LOBSTER', 'IN', 'MINIATURE'] +367-130732-0021-941: hyp=['LOBSTER', 'IN', 'MINIATURE'] +367-130732-0022-942: ref=['SO', 'FAR', 'IT', 'HAS', 'BEEN', 'USED', 'MOSTLY', 'FOR', 'GARNISHMENT', 'OF', 'OTHER', 'DISHES', 'AND', 'IT', 'IS', 'ONLY', 'RECENTLY', 'THAT', 'THE', 'HOF', 'BRAU', 'HAS', 'BEEN', 'MAKING', 'A', 'SPECIALTY', 'OF', 'THEM'] +367-130732-0022-942: hyp=['SO', 'FAR', 'IT', 'HAS', 'BEEN', 'USED', 'MOSTLY', 'FOR', 'GARNISHMENT', 'OF', 'OTHER', 'DISHES', 'AND', 'IT', 'IS', 'ONLY', 'RECENTLY', 'THAT', 'THE', 'WHOLE', 'BROW', 'HAS', 'BEEN', 'MAKING', 'A', 'SPECIALTY', 'OF', 'THEM'] +367-130732-0023-943: ref=['ALL', 'OF', 'THE', 'BETTER', 'CLASS', 'RESTAURANTS', 'HOWEVER', 'WILL', 'SERVE', 'THEM', 'IF', 'YOU', 'ORDER', 'THEM'] +367-130732-0023-943: hyp=['ALL', 'THE', 'BETTER', 'CLASS', 'RESTAURANTS', 'HOWEVER', 'WILL', 'SERVE', 'THEM', 'IF', 'YOU', 'ORDER', 'THEM'] +367-130732-0024-944: ref=['THIS', 'IS', 'THE', 'RECIPE', 'FOR', 'EIGHT', 'PEOPLE', 'AND', 'IT', 'IS', 'WELL', 'WORTH', 'TRYING', 'IF', 'YOU', 'ARE', 'GIVING', 'A', 'DINNER', 'OF', 'IMPORTANCE'] +367-130732-0024-944: hyp=['THIS', 'IS', 'THE', 'RECIPE', 'FOR', 'EIGHT', 'PEOPLE', 'AND', 'IT', 'IS', 'WELL', 'AND', 'WORTH', 'TRYING', 'IF', 'YOU', 'ARE', 'GIVING', 'A', 'DINNER', 'OF', 'IMPORTANCE'] +367-130732-0025-945: ref=['BISQUE', 'OF', 'CRAWFISH'] +367-130732-0025-945: hyp=['FISK', 'OF', 'CRAWFISH'] +367-130732-0026-946: ref=['TAKE', 'THIRTY', 'CRAWFISH', 'FROM', 'WHICH', 'REMOVE', 'THE', 'GUT', 'CONTAINING', 'THE', 'GALL', 'IN', 'THE', 'FOLLOWING', 'MANNER', 'TAKE', 'FIRM', 'HOLD', 'OF', 'THE', 'CRAWFISH', 'WITH', 'THE', 'LEFT', 'HAND', 'SO', 'AS', 'TO', 'AVOID', 'BEING', 'PINCHED', 'BY', 'ITS', 'CLAWS', 'WITH', 'THE', 'THUMB', 'AND', 'FOREFINGER', 'OF', 'THE', 'RIGHT', 'HAND', 'PINCH', 'THE', 'EXTREME', 'END', 'OF', 'THE', 'CENTRAL', 'FIN', 'OF', 'THE', 'TAIL', 'AND', 'WITH', 'A', 'SUDDEN', 'JERK', 'THE', 'GUT', 'WILL', 'BE', 'WITHDRAWN'] +367-130732-0026-946: hyp=['TAKE', 'THIRTY', 'CRAWFISH', 'FROM', 'WHICH', 'REMOVE', 'THE', 'GUT', 'CONTAINING', 'THE', 'GALL', 'IN', 'THE', 'FOLLOWING', 'MANNER', 'TAKE', 'FIRM', 'HOLD', 'OF', 'THE', 'CRAWFISH', 'WITH', 'THE', 'LEFT', 'HAND', 'SO', 'AS', 'TO', 'AVOID', 'BEING', 'PINCHED', 'BY', 'ITS', 'CLAWS', 'WITH', 'THE', 'THUMB', 'AND', 'FOREFINGER', 'OF', 'THE', 'RIGHT', 'HAND', 'PINCH', 'THE', 'EXTREME', 'END', 'OF', 'THE', 'CENTRAL', 'FIN', 'OF', 'THE', 'TAIL', 'AND', 'WITH', 'A', 'SUDDEN', 'JERK', 'THE', 'GUT', 'WILL', 'BE', 'WITHDRAWN'] +367-130732-0027-947: ref=['MINCE', 'OR', 'CUT', 'INTO', 'SMALL', 'DICE', 'A', 'CARROT', 'AN', 'ONION', 'ONE', 'HEAD', 'OF', 'CELERY', 'AND', 'A', 'FEW', 'PARSLEY', 'ROOTS', 'AND', 'TO', 'THESE', 'ADD', 'A', 'BAY', 'LEAF', 'A', 'SPRIG', 'OF', 'THYME', 'A', 'LITTLE', 'MINIONETTE', 'PEPPER', 'AND', 'TWO', 'OUNCES', 'OF', 'BUTTER'] +367-130732-0027-947: hyp=['MINCE', 'OR', 'CUT', 'INTO', 'SMALL', 'DICE', 'A', 'CARROT', 'AN', 'ONION', 'ONE', 'HEAD', 'OF', 'CELERY', 'AND', 'A', 'FEW', 'PARSLEY', 'ROOTS', 'AND', 'TO', 'THESE', 'ADD', 'A', 'BAY', 'LEAF', 'A', 'SPRIG', 'OF', 'THYME', 'A', 'LITTLE', 'MANONET', 'PEPPER', 'AND', 'TWO', 'OUNCES', 'OF', 'BUTTER'] +367-130732-0028-948: ref=['PUT', 'THESE', 'INGREDIENTS', 'INTO', 'A', 'STEWPAN', 'AND', 'FRY', 'THEM', 'TEN', 'MINUTES', 'THEN', 'THROW', 'IN', 'THE', 'CRAWFISH', 'AND', 'POUR', 'ON', 'THEM', 'HALF', 'A', 'BOTTLE', 'OF', 'FRENCH', 'WHITE', 'WINE'] +367-130732-0028-948: hyp=['PUT', 'THESE', 'INGREDIENTS', 'INTO', 'A', 'STEWPAN', 'AND', 'FRY', 'THEM', 'TEN', 'MINUTES', 'THEN', 'THROW', 'IN', 'THE', 'CRAWFISH', 'AND', 'POUR', 'ON', 'THEM', 'HALF', 'A', 'BOTTLE', 'OF', 'FRENCH', 'WHITE', 'WINE'] +367-130732-0029-949: ref=['ALLOW', 'THIS', 'TO', 'BOIL', 'AND', 'THEN', 'ADD', 'A', 'QUART', 'OF', 'STRONG', 'CONSOMME', 'AND', 'LET', 'ALL', 'CONTINUE', 'BOILING', 'FOR', 'HALF', 'AN', 'HOUR'] +367-130732-0029-949: hyp=['ALLOW', 'THIS', 'TO', 'BOIL', 'AND', 'THEN', 'ADD', 'A', 'QUART', 'OF', 'STRONG', 'CONSUMM', 'AND', 'LET', 'ALL', 'CONTINUE', 'BOILING', 'FOR', 'HALF', 'AN', 'HOUR'] +367-130732-0030-950: ref=['PICK', 'OUT', 'THE', 'CRAWFISH', 'AND', 'STRAIN', 'THE', 'BROTH', 'THROUGH', 'A', 'NAPKIN', 'BY', 'PRESSURE', 'INTO', 'A', 'BASIN', 'IN', 'ORDER', 'TO', 'EXTRACT', 'ALL', 'THE', 'ESSENCE', 'FROM', 'THE', 'VEGETABLES'] +367-130732-0030-950: hyp=['PICK', 'OUT', 'THE', 'CRAWFISH', 'AND', 'STRAIN', 'THE', 'BROTH', 'THROUGH', 'A', 'NAPKIN', 'BY', 'PRESSURE', 'INTO', 'A', 'BASIN', 'IN', 'ORDER', 'TO', 'EXTRACT', 'ALL', 'THE', 'ESSENCE', 'FROM', 'THE', 'VEGETABLES'] +367-130732-0031-951: ref=['PICK', 'THE', 'SHELLS', 'OFF', 'TWENTY', 'FIVE', 'OF', 'THE', 'CRAWFISH', 'TAILS', 'TRIM', 'THEM', 'NEATLY', 'AND', 'SET', 'THEM', 'ASIDE', 'UNTIL', 'WANTED'] +367-130732-0031-951: hyp=['PICK', 'THE', 'SHELLS', 'OFF', 'TWENTY', 'FIVE', 'OF', 'THE', 'CRAWFISH', 'TAILS', 'TRIM', 'THEM', 'NEATLY', 'AND', 'SET', 'THEM', 'ASIDE', 'UNTIL', 'WANTED'] +367-130732-0032-952: ref=['RESERVE', 'SOME', 'OF', 'THE', 'SPAWN', 'ALSO', 'HALF', 'OF', 'THE', 'BODY', 'SHELLS', 'WITH', 'WHICH', 'TO', 'MAKE', 'THE', 'CRAWFISH', 'BUTTER', 'TO', 'FINISH', 'THE', 'SOUP'] +367-130732-0032-952: hyp=['RESERVE', 'SOME', 'OF', 'THE', 'SPAWN', 'ALSO', 'HALF', 'OF', 'THE', 'BODY', 'SHELLS', 'WITH', 'WHICH', 'TO', 'MAKE', 'THE', 'CRAWFISH', 'BUTTER', 'TO', 'FINISH', 'THE', 'SOUP'] +367-130732-0033-953: ref=['THIS', 'BUTTER', 'IS', 'MADE', 'AS', 'FOLLOWS', 'PLACE', 'THE', 'SHELLS', 'ON', 'A', 'BAKING', 'SHEET', 'IN', 'THE', 'OVEN', 'TO', 'DRY', 'LET', 'THE', 'SHELLS', 'COOL', 'AND', 'THEN', 'POUND', 'THEM', 'IN', 'A', 'MORTAR', 'WITH', 'A', 'LITTLE', 'LOBSTER', 'CORAL', 'AND', 'FOUR', 'OUNCES', 'OF', 'FRESH', 'BUTTER', 'THOROUGHLY', 'BRUISING', 'THE', 'WHOLE', 'TOGETHER', 'SO', 'AS', 'TO', 'MAKE', 'A', 'FINE', 'PASTE'] +367-130732-0033-953: hyp=['THIS', 'BUTTER', 'IS', 'MADE', 'AS', 'FOLLOWS', 'PLACE', 'THE', 'SHELLS', 'IN', 'A', 'BAKING', 'SHEET', 'IN', 'THE', 'OVEN', 'TO', 'DRY', 'LET', 'THE', 'SHELLS', 'COOL', 'AND', 'THEN', 'POUND', 'THEM', 'IN', 'A', 'MORTAR', 'WITH', 'A', 'LITTLE', 'LOBSTER', 'COAL', 'AND', 'FOUR', 'OUNCES', 'OF', 'FRESH', 'BUTTER', 'THOROUGHLY', 'BRUISING', 'THE', 'WHOLE', 'TOGETHER', 'SO', 'AS', 'TO', 'MAKE', 'A', 'FINE', 'PASTE'] +367-293981-0000-954: ref=['I', 'SWEAR', 'IT', 'ANSWERED', 'SANCHO'] +367-293981-0000-954: hyp=['I', 'SWEAR', 'IT', 'ANSWERED', 'SANCHO'] +367-293981-0001-955: ref=['I', 'SAY', 'SO', 'CONTINUED', 'DON', 'QUIXOTE', 'BECAUSE', 'I', 'HATE', 'TAKING', 'AWAY', "ANYONE'S", 'GOOD', 'NAME'] +367-293981-0001-955: hyp=['I', 'SAY', 'SO', 'CONTINUED', 'DON', 'QUIXOTE', 'BECAUSE', 'I', 'HATE', 'TAKING', 'AWAY', 'ANY', "ONE'S", 'GOOD', 'NAME'] +367-293981-0002-956: ref=['I', 'SAY', 'REPLIED', 'SANCHO', 'THAT', 'I', 'SWEAR', 'TO', 'HOLD', 'MY', 'TONGUE', 'ABOUT', 'IT', 'TILL', 'THE', 'END', 'OF', 'YOUR', "WORSHIP'S", 'DAYS', 'AND', 'GOD', 'GRANT', 'I', 'MAY', 'BE', 'ABLE', 'TO', 'LET', 'IT', 'OUT', 'TOMORROW'] +367-293981-0002-956: hyp=['I', 'SAY', 'REPLIED', 'SANCHO', 'THAT', 'I', 'SWEAR', 'TO', 'HOLD', 'MY', 'TONGUE', 'ABOUT', 'IT', 'TILL', 'THE', 'END', 'OF', 'YOUR', 'WORSHIP', 'STAYS', 'AND', 'GON', 'GRAT', 'I', 'MAY', 'BE', 'ABLE', 'TO', 'LET', 'IT', 'OUT', 'TO', 'MORROW'] +367-293981-0003-957: ref=['THOUGH', 'YOUR', 'WORSHIP', 'WAS', 'NOT', 'SO', 'BADLY', 'OFF', 'HAVING', 'IN', 'YOUR', 'ARMS', 'THAT', 'INCOMPARABLE', 'BEAUTY', 'YOU', 'SPOKE', 'OF', 'BUT', 'I', 'WHAT', 'DID', 'I', 'HAVE', 'EXCEPT', 'THE', 'HEAVIEST', 'WHACKS', 'I', 'THINK', 'I', 'HAD', 'IN', 'ALL', 'MY', 'LIFE'] +367-293981-0003-957: hyp=['THOUGH', 'YOUR', 'WORSHIP', 'WAS', 'NOT', 'SO', 'BADLY', 'OFF', 'HAVING', 'IN', 'YOUR', 'ARMS', 'THE', 'INCOMPARABLE', 'BEAUTY', 'YOU', 'SPOKE', 'OF', 'BUT', 'AYE', 'WHAT', 'DID', 'I', 'HAVE', 'EXCEPT', 'THE', 'HEAVIEST', 'WAX', 'THAT', 'I', 'THINK', 'I', 'HAD', 'IN', 'ALL', 'MY', 'LIFE'] +367-293981-0004-958: ref=['UNLUCKY', 'ME', 'AND', 'THE', 'MOTHER', 'THAT', 'BORE', 'ME'] +367-293981-0004-958: hyp=['UNLUCKY', 'MAN', 'TO', 'THE', 'MOTHER', 'THAT', 'BORE', 'ME'] +367-293981-0005-959: ref=["DIDN'T", 'I', 'SAY', 'SO', 'WORSE', 'LUCK', 'TO', 'MY', 'LINE', 'SAID', 'SANCHO'] +367-293981-0005-959: hyp=["DIDN'T", 'I', 'SAY', 'SO', 'WORSE', 'LUCK', 'TO', 'MY', 'LINE', 'SAID', 'SANCHO'] +367-293981-0006-960: ref=['IT', 'CANNOT', 'BE', 'THE', 'MOOR', 'ANSWERED', 'DON', 'QUIXOTE', 'FOR', 'THOSE', 'UNDER', 'ENCHANTMENT', 'DO', 'NOT', 'LET', 'THEMSELVES', 'BE', 'SEEN', 'BY', 'ANYONE'] +367-293981-0006-960: hyp=['IT', 'CANNOT', 'BE', 'THE', 'MORE', 'ANSWERED', 'DON', 'QUIXOTE', 'FOR', 'THOSE', 'UNDER', 'ENCHANTMENT', 'DO', 'NOT', 'LET', 'THEMSELVES', 'BE', 'SEEN', 'BY', 'ANYONE'] +367-293981-0007-961: ref=['IF', 'THEY', "DON'T", 'LET', 'THEMSELVES', 'BE', 'SEEN', 'THEY', 'LET', 'THEMSELVES', 'BE', 'FELT', 'SAID', 'SANCHO', 'IF', 'NOT', 'LET', 'MY', 'SHOULDERS', 'SPEAK', 'TO', 'THE', 'POINT'] +367-293981-0007-961: hyp=['IF', 'THEY', 'DO', 'NOT', 'LET', 'THEMSELVES', 'BE', 'SEEN', 'THEY', 'LET', 'THEMSELVES', 'BE', 'FELT', 'SAID', 'SANCHO', 'IF', 'NOT', 'LET', 'MY', 'SHOULDERS', 'SPEAK', 'TO', 'THE', 'POINT'] +367-293981-0008-962: ref=['MINE', 'COULD', 'SPEAK', 'TOO', 'SAID', 'DON', 'QUIXOTE', 'BUT', 'THAT', 'IS', 'NOT', 'A', 'SUFFICIENT', 'REASON', 'FOR', 'BELIEVING', 'THAT', 'WHAT', 'WE', 'SEE', 'IS', 'THE', 'ENCHANTED', 'MOOR'] +367-293981-0008-962: hyp=['I', 'COULD', 'SPEAK', 'TOO', 'SAID', 'DON', 'QUIXOTE', 'BUT', 'THAT', 'IS', 'NOT', 'A', 'SUFFICIENT', 'REASON', 'FOR', 'BELIEVING', 'THAT', 'WHAT', 'WE', 'SEE', 'IS', 'THE', 'ENCHANTED', 'MOOR'] +367-293981-0009-963: ref=['THE', 'OFFICER', 'TURNED', 'TO', 'HIM', 'AND', 'SAID', 'WELL', 'HOW', 'GOES', 'IT', 'GOOD', 'MAN'] +367-293981-0009-963: hyp=['THE', 'OFFICERS', 'TURNED', 'TO', 'HIM', 'AND', 'SAID', 'WELL', 'HOW', 'GOES', 'IT', 'GOOD', 'MAN'] +367-293981-0010-964: ref=['SANCHO', 'GOT', 'UP', 'WITH', 'PAIN', 'ENOUGH', 'IN', 'HIS', 'BONES', 'AND', 'WENT', 'AFTER', 'THE', 'INNKEEPER', 'IN', 'THE', 'DARK', 'AND', 'MEETING', 'THE', 'OFFICER', 'WHO', 'WAS', 'LOOKING', 'TO', 'SEE', 'WHAT', 'HAD', 'BECOME', 'OF', 'HIS', 'ENEMY', 'HE', 'SAID', 'TO', 'HIM', 'SENOR', 'WHOEVER', 'YOU', 'ARE', 'DO', 'US', 'THE', 'FAVOUR', 'AND', 'KINDNESS', 'TO', 'GIVE', 'US', 'A', 'LITTLE', 'ROSEMARY', 'OIL', 'SALT', 'AND', 'WINE', 'FOR', 'IT', 'IS', 'WANTED', 'TO', 'CURE', 'ONE', 'OF', 'THE', 'BEST', 'KNIGHTS', 'ERRANT', 'ON', 'EARTH', 'WHO', 'LIES', 'ON', 'YONDER', 'BED', 'WOUNDED', 'BY', 'THE', 'HANDS', 'OF', 'THE', 'ENCHANTED', 'MOOR', 'THAT', 'IS', 'IN', 'THIS', 'INN'] +367-293981-0010-964: hyp=['SANCHO', 'GOT', 'UP', 'WITH', 'PAIN', 'ENOUGH', 'IN', 'HIS', 'BONES', 'AND', 'WENT', 'AFTER', 'THE', 'INNKEEPER', 'IN', 'THE', 'DARK', 'AND', 'MEETING', 'THE', 'OFFICER', 'WHO', 'WAS', 'LOOKING', 'TO', 'SEE', 'WHAT', 'HAD', 'BECOME', 'OF', 'HIS', 'ENEMY', 'HE', 'SAID', 'TO', 'HIM', 'SENOR', 'WHOEVER', 'YOU', 'ARE', 'DO', 'US', 'THE', 'FAVOUR', 'AND', 'KINDNESS', 'TO', 'GIVE', 'US', 'A', 'LITTLE', 'ROSEMARY', 'OIL', 'SALT', 'AND', 'WINE', 'FOR', 'IT', 'IS', 'WANTED', 'TO', 'CURE', 'ONE', 'OF', 'OUR', 'BEST', 'KNIGHTS', 'ERRANT', 'ON', 'EARTH', 'WHO', 'LIES', 'ON', 'YONDER', 'BED', 'WOUNDED', 'BY', 'THE', 'HANDS', 'OF', 'THE', 'ENCHANTED', 'MOOR', 'THAT', 'IS', 'IN', 'THIS', 'INN'] +367-293981-0011-965: ref=['TO', 'BE', 'BRIEF', 'HE', 'TOOK', 'THE', 'MATERIALS', 'OF', 'WHICH', 'HE', 'MADE', 'A', 'COMPOUND', 'MIXING', 'THEM', 'ALL', 'AND', 'BOILING', 'THEM', 'A', 'GOOD', 'WHILE', 'UNTIL', 'IT', 'SEEMED', 'TO', 'HIM', 'THEY', 'HAD', 'COME', 'TO', 'PERFECTION'] +367-293981-0011-965: hyp=['TO', 'BE', 'BRIEF', 'HE', 'TOOK', 'THE', 'MATERIALS', 'OF', 'WHICH', 'HE', 'MADE', 'A', 'COMPOUND', 'MIXING', 'THEM', 'WELL', 'AND', 'BOILING', 'THEM', 'A', 'GOOD', 'WALLET', 'UNTIL', 'IT', 'SEEMED', 'TO', 'HIM', 'THEY', 'HAD', 'COME', 'TO', 'PERFECTION'] +367-293981-0012-966: ref=['SANCHO', 'PANZA', 'WHO', 'ALSO', 'REGARDED', 'THE', 'AMENDMENT', 'OF', 'HIS', 'MASTER', 'AS', 'MIRACULOUS', 'BEGGED', 'HIM', 'TO', 'GIVE', 'HIM', 'WHAT', 'WAS', 'LEFT', 'IN', 'THE', 'PIGSKIN', 'WHICH', 'WAS', 'NO', 'SMALL', 'QUANTITY'] +367-293981-0012-966: hyp=['SANCHO', 'PANZA', 'WHO', 'ALSO', 'REGARDED', 'THE', 'AMENDMENT', 'OF', 'HIS', 'MASTER', 'AS', 'MIRACULOUS', 'BEGGED', 'HIM', 'TO', 'GIVE', 'HIM', 'WHAT', 'WAS', 'LEFT', 'IN', 'THE', 'PIG', 'SKIN', 'WHICH', 'WAS', 'NO', 'SMALL', 'QUANTITY'] +367-293981-0013-967: ref=['DON', 'QUIXOTE', 'CONSENTED', 'AND', 'HE', 'TAKING', 'IT', 'WITH', 'BOTH', 'HANDS', 'IN', 'GOOD', 'FAITH', 'AND', 'WITH', 'A', 'BETTER', 'WILL', 'GULPED', 'DOWN', 'AND', 'DRAINED', 'OFF', 'VERY', 'LITTLE', 'LESS', 'THAN', 'HIS', 'MASTER'] +367-293981-0013-967: hyp=['DON', 'QUIXOTE', 'CONSENTED', 'AND', 'HE', 'TAKING', 'IT', 'WITH', 'BOTH', 'HANDS', 'IN', 'GOOD', 'FAITH', 'AND', 'WITH', 'A', 'BETTER', 'WILL', 'GULPED', 'IT', 'DOWN', 'AND', 'DRAINED', 'OUT', 'VERY', 'LITTLE', 'LESS', 'THAN', 'HIS', 'MASTER'] +367-293981-0014-968: ref=['IF', 'YOUR', 'WORSHIP', 'KNEW', 'THAT', 'RETURNED', 'SANCHO', 'WOE', 'BETIDE', 'ME', 'AND', 'ALL', 'MY', 'KINDRED', 'WHY', 'DID', 'YOU', 'LET', 'ME', 'TASTE', 'IT'] +367-293981-0014-968: hyp=['IF', 'YOUR', 'WORSHIP', 'KNEW', 'THAT', 'RETURNED', 'SANCHO', 'WOE', 'BETIDE', 'ME', 'AND', 'ALL', 'MY', 'KINDRED', 'WHY', 'DID', 'YOU', 'LET', 'ME', 'TASTE', 'IT'] +367-293981-0015-969: ref=['SEARCH', 'YOUR', 'MEMORY', 'AND', 'IF', 'YOU', 'FIND', 'ANYTHING', 'OF', 'THIS', 'KIND', 'YOU', 'NEED', 'ONLY', 'TELL', 'ME', 'OF', 'IT', 'AND', 'I', 'PROMISE', 'YOU', 'BY', 'THE', 'ORDER', 'OF', 'KNIGHTHOOD', 'WHICH', 'I', 'HAVE', 'RECEIVED', 'TO', 'PROCURE', 'YOU', 'SATISFACTION', 'AND', 'REPARATION', 'TO', 'THE', 'UTMOST', 'OF', 'YOUR', 'DESIRE'] +367-293981-0015-969: hyp=['SEARCH', 'YOUR', 'MEMORY', 'AND', 'IF', 'YOU', 'FIND', 'ANYTHING', 'OF', 'THIS', 'KIND', 'YOU', 'NEED', 'ONLY', 'TELL', 'ME', 'OF', 'IT', 'AND', 'I', 'PROMISE', 'YOU', 'BY', 'THE', 'ORDER', 'OF', 'KNIGHTHOOD', 'WHICH', 'I', 'HAVE', 'RECEIVED', 'TO', 'PROCURE', 'YOU', 'SATISFACTION', 'AND', 'REPARATION', 'TO', 'THE', 'UTMOST', 'OF', 'YOUR', 'DESIRE'] +367-293981-0016-970: ref=['THEN', 'THIS', 'IS', 'AN', 'INN', 'SAID', 'DON', 'QUIXOTE'] +367-293981-0016-970: hyp=['THEN', 'THIS', 'IS', 'AN', 'INN', 'SAID', 'DON', 'QUIXOTE'] +367-293981-0017-971: ref=['AND', 'A', 'VERY', 'RESPECTABLE', 'ONE', 'SAID', 'THE', 'INNKEEPER'] +367-293981-0017-971: hyp=['AND', 'A', 'VERY', 'RESPECTABLE', 'ONE', 'SAID', 'THE', 'INNKEEPER'] +367-293981-0018-972: ref=['THE', 'CRIES', 'OF', 'THE', 'POOR', 'BLANKETED', 'WRETCH', 'WERE', 'SO', 'LOUD', 'THAT', 'THEY', 'REACHED', 'THE', 'EARS', 'OF', 'HIS', 'MASTER', 'WHO', 'HALTING', 'TO', 'LISTEN', 'ATTENTIVELY', 'WAS', 'PERSUADED', 'THAT', 'SOME', 'NEW', 'ADVENTURE', 'WAS', 'COMING', 'UNTIL', 'HE', 'CLEARLY', 'PERCEIVED', 'THAT', 'IT', 'WAS', 'HIS', 'SQUIRE', 'WHO', 'UTTERED', 'THEM'] +367-293981-0018-972: hyp=['THE', 'CRIES', 'OF', 'THE', 'POOR', 'BLANDED', 'WRETCH', 'WERE', 'SO', 'LOUD', 'THAT', 'THEY', 'REACHED', 'THE', 'EARS', 'OF', 'HIS', 'MASTER', 'WHO', 'HALTING', 'TO', 'LISTEN', 'ATTENTIVELY', 'WAS', 'PERSUADED', 'THAT', 'SOME', 'NEW', 'ADVENTURE', 'WAS', 'COMING', 'UNTIL', 'HE', 'CLEARLY', 'PERCEIVED', 'THAT', 'IT', 'WAS', 'HIS', 'SQUIRE', 'WHO', 'UTTERED', 'THEM'] +367-293981-0019-973: ref=['HE', 'SAW', 'HIM', 'RISING', 'AND', 'FALLING', 'IN', 'THE', 'AIR', 'WITH', 'SUCH', 'GRACE', 'AND', 'NIMBLENESS', 'THAT', 'HAD', 'HIS', 'RAGE', 'ALLOWED', 'HIM', 'IT', 'IS', 'MY', 'BELIEF', 'HE', 'WOULD', 'HAVE', 'LAUGHED'] +367-293981-0019-973: hyp=['HE', 'SAW', 'HIM', 'RISING', 'AND', 'FALLING', 'IN', 'THE', 'AIR', 'WITH', 'SUCH', 'GRACE', 'AND', 'NIMBLENESS', 'THAT', 'HAD', 'HIS', 'RAGE', 'ALLOWED', 'HIM', 'IT', 'IS', 'MY', 'BELIEF', 'HE', 'WOULD', 'HAVE', 'LAUGHED'] +367-293981-0020-974: ref=['SANCHO', 'TOOK', 'IT', 'AND', 'AS', 'HE', 'WAS', 'RAISING', 'IT', 'TO', 'HIS', 'MOUTH', 'HE', 'WAS', 'STOPPED', 'BY', 'THE', 'CRIES', 'OF', 'HIS', 'MASTER', 'EXCLAIMING', 'SANCHO', 'MY', 'SON', 'DRINK', 'NOT', 'WATER', 'DRINK', 'IT', 'NOT', 'MY', 'SON', 'FOR', 'IT', 'WILL', 'KILL', 'THEE', 'SEE', 'HERE', 'I', 'HAVE', 'THE', 'BLESSED', 'BALSAM', 'AND', 'HE', 'HELD', 'UP', 'THE', 'FLASK', 'OF', 'LIQUOR', 'AND', 'WITH', 'DRINKING', 'TWO', 'DROPS', 'OF', 'IT', 'THOU', 'WILT', 'CERTAINLY', 'BE', 'RESTORED'] +367-293981-0020-974: hyp=['SANCHO', 'TOOK', 'IT', 'AND', 'AS', 'HE', 'WAS', 'RAISING', 'IT', 'TO', 'HIS', 'MOUTH', 'HE', 'WAS', 'STOPPED', 'BY', 'THE', 'CRIES', 'OF', 'HIS', 'MASTER', 'EXCLAIMING', 'SANCHO', 'MY', 'SON', 'DRINK', 'NOT', 'WATER', 'DRINK', 'IT', 'NOT', 'MY', 'SON', 'FOR', 'IT', 'WILL', 'KILL', 'THEE', 'SEE', 'HERE', 'I', 'HAVE', 'THE', 'BLESSED', 'BALSAM', 'AND', 'HE', 'HELD', 'UP', 'THE', 'FLASK', 'OF', 'LIQUOR', 'AND', 'WITH', 'DRINKING', 'TWO', 'DROPS', 'OF', 'IT', 'THOU', 'WILT', 'CERTAINLY', 'BE', 'RESTORED'] +3764-168670-0000-975: ref=['THE', 'STRIDES', 'OF', 'A', 'LAME', 'MAN', 'ARE', 'LIKE', 'THE', 'OGLING', 'GLANCES', 'OF', 'A', 'ONE', 'EYED', 'MAN', 'THEY', 'DO', 'NOT', 'REACH', 'THEIR', 'GOAL', 'VERY', 'PROMPTLY'] +3764-168670-0000-975: hyp=['THE', 'STRIDES', 'OF', 'A', 'LAME', 'MAN', 'ARE', 'LIKE', 'THE', 'OGLING', 'GLANCES', 'OF', 'A', 'ONE', 'EYED', 'MAN', 'THEY', 'DO', 'NOT', 'REACH', 'THEIR', 'GOAL', 'VERY', 'PROMPTLY'] +3764-168670-0001-976: ref=['COSETTE', 'HAD', 'WAKED', 'UP'] +3764-168670-0001-976: hyp=['COSETTE', 'HAD', 'WAKED', 'UP'] +3764-168670-0002-977: ref=['JEAN', 'VALJEAN', 'HAD', 'PLACED', 'HER', 'NEAR', 'THE', 'FIRE'] +3764-168670-0002-977: hyp=['JEAN', 'VALJEAN', 'HAD', 'PLACED', 'HER', 'NEAR', 'THE', 'FIRE'] +3764-168670-0003-978: ref=['YOU', 'WILL', 'WAIT', 'FOR', 'ME', 'AT', 'A', "LADY'S", 'HOUSE', 'I', 'SHALL', 'COME', 'TO', 'FETCH', 'YOU'] +3764-168670-0003-978: hyp=['YOU', 'WILL', 'WAIT', 'FOR', 'ME', 'AT', 'A', "LADY'S", 'HOUSE', 'I', 'SHALL', 'COME', 'TO', 'FETCH', 'YOU'] +3764-168670-0004-979: ref=['EVERYTHING', 'IS', 'ARRANGED', 'AND', 'NOTHING', 'IS', 'SAID', 'FAUCHELEVENT'] +3764-168670-0004-979: hyp=['EVERYTHING', 'IS', 'ARRANGED', 'AND', 'NOTHING', 'IS', 'SAID', 'FAUCHELEVENT'] +3764-168670-0005-980: ref=['I', 'HAVE', 'PERMISSION', 'TO', 'BRING', 'YOU', 'IN', 'BUT', 'BEFORE', 'BRINGING', 'YOU', 'IN', 'YOU', 'MUST', 'BE', 'GOT', 'OUT'] +3764-168670-0005-980: hyp=['I', 'HAVE', 'PERMISSION', 'TO', 'BRING', 'YOU', 'IN', 'BUT', 'BEFORE', 'BRINGING', 'YOU', 'IN', 'YOU', 'MUST', 'BE', 'GOT', 'OUT'] +3764-168670-0006-981: ref=["THAT'S", 'WHERE', 'THE', 'DIFFICULTY', 'LIES'] +3764-168670-0006-981: hyp=["THAT'S", 'WHERE', 'THE', 'DIFFICULTY', 'LIES'] +3764-168670-0007-982: ref=['IT', 'IS', 'EASY', 'ENOUGH', 'WITH', 'THE', 'CHILD', 'YOU', 'WILL', 'CARRY', 'HER', 'OUT'] +3764-168670-0007-982: hyp=['IT', 'IS', 'EASY', 'ENOUGH', 'WITH', 'THE', 'CHILD', 'YOU', 'WILL', 'CARRY', 'HER', 'OUT'] +3764-168670-0008-983: ref=['AND', 'SHE', 'WILL', 'HOLD', 'HER', 'TONGUE', 'I', 'ANSWER', 'FOR', 'THAT'] +3764-168670-0008-983: hyp=['AND', 'SHE', 'WILL', 'HOLD', 'HER', 'TONGUE', 'I', 'ANSWER', 'FOR', 'THAT'] +3764-168670-0009-984: ref=['FAUCHELEVENT', 'GRUMBLED', 'MORE', 'TO', 'HIMSELF', 'THAN', 'TO', 'JEAN', 'VALJEAN'] +3764-168670-0009-984: hyp=['FAUCHELEVENT', 'GRUMBLED', 'MORE', 'TO', 'HIMSELF', 'THAN', 'TO', 'JEAN', 'VALJEAN'] +3764-168670-0010-985: ref=['YOU', 'UNDERSTAND', 'FATHER', 'MADELEINE', 'THE', 'GOVERNMENT', 'WILL', 'NOTICE', 'IT'] +3764-168670-0010-985: hyp=['YOU', 'UNDERSTAND', 'FATHER', 'MADELEINE', 'THE', 'GOVERNMENT', 'WILL', 'NOTICE', 'IT'] +3764-168670-0011-986: ref=['JEAN', 'VALJEAN', 'STARED', 'HIM', 'STRAIGHT', 'IN', 'THE', 'EYE', 'AND', 'THOUGHT', 'THAT', 'HE', 'WAS', 'RAVING'] +3764-168670-0011-986: hyp=['JEAN', 'VALJEAN', 'STARED', 'HIM', 'STRAIGHT', 'IN', 'THE', 'EYE', 'AND', 'THOUGHT', 'THAT', 'HE', 'WAS', 'RAVING'] +3764-168670-0012-987: ref=['FAUCHELEVENT', 'WENT', 'ON'] +3764-168670-0012-987: hyp=['FAUCHELEVENT', 'WENT', 'ON'] +3764-168670-0013-988: ref=['IT', 'IS', 'TO', 'MORROW', 'THAT', 'I', 'AM', 'TO', 'BRING', 'YOU', 'IN', 'THE', 'PRIORESS', 'EXPECTS', 'YOU'] +3764-168670-0013-988: hyp=['IT', 'IS', 'TO', 'MORROW', 'THAT', 'I', 'AM', 'TO', 'BRING', 'YOU', 'IN', 'THE', 'PRIORESS', 'EXPECTS', 'YOU'] +3764-168670-0014-989: ref=['THEN', 'HE', 'EXPLAINED', 'TO', 'JEAN', 'VALJEAN', 'THAT', 'THIS', 'WAS', 'HIS', 'RECOMPENSE', 'FOR', 'A', 'SERVICE', 'WHICH', 'HE', 'FAUCHELEVENT', 'WAS', 'TO', 'RENDER', 'TO', 'THE', 'COMMUNITY'] +3764-168670-0014-989: hyp=['THEN', 'HE', 'EXPLAINED', 'TO', 'JEAN', 'VALJEAN', 'THAT', 'THIS', 'WAS', 'HIS', 'RECOMPENSE', 'FOR', 'A', 'SERVICE', 'WHICH', 'HE', 'FAUCHELEVENT', 'WAS', 'TO', 'RENDER', 'TO', 'THE', 'COMMUNITY'] +3764-168670-0015-990: ref=['THAT', 'THE', 'NUN', 'WHO', 'HAD', 'DIED', 'THAT', 'MORNING', 'HAD', 'REQUESTED', 'TO', 'BE', 'BURIED', 'IN', 'THE', 'COFFIN', 'WHICH', 'HAD', 'SERVED', 'HER', 'FOR', 'A', 'BED', 'AND', 'INTERRED', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CHAPEL'] +3764-168670-0015-990: hyp=['THAT', 'THE', 'NUN', 'WHO', 'HAD', 'DIED', 'THAT', 'MORNING', 'HAD', 'REQUESTED', 'TO', 'BE', 'BURIED', 'IN', 'THE', 'COFFIN', 'WHICH', 'HAD', 'SERVED', 'HER', 'FOR', 'A', 'BED', 'AND', 'INTERRED', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CHAPEL'] +3764-168670-0016-991: ref=['THAT', 'THE', 'PRIORESS', 'AND', 'THE', 'VOCAL', 'MOTHERS', 'INTENDED', 'TO', 'FULFIL', 'THE', 'WISH', 'OF', 'THE', 'DECEASED'] +3764-168670-0016-991: hyp=['THAT', 'THE', 'PRIORESS', 'AND', 'THE', 'VOCAL', 'MOTHERS', 'INTENDED', 'TO', 'FULFIL', 'THE', 'WISH', 'OF', 'THE', 'DECEASED'] +3764-168670-0017-992: ref=['THAT', 'HE', 'FAUCHELEVENT', 'WAS', 'TO', 'NAIL', 'UP', 'THE', 'COFFIN', 'IN', 'THE', 'CELL', 'RAISE', 'THE', 'STONE', 'IN', 'THE', 'CHAPEL', 'AND', 'LOWER', 'THE', 'CORPSE', 'INTO', 'THE', 'VAULT'] +3764-168670-0017-992: hyp=['THAT', 'HE', 'FAUCHELEVENT', 'WAS', 'TO', 'NAIL', 'UP', 'THE', 'COFFIN', 'IN', 'THE', 'CELL', 'RAISE', 'THE', 'STONE', 'IN', 'THE', 'CHAPEL', 'AND', 'BLOWER', 'THE', 'CORPSE', 'INTO', 'THE', 'VAULT'] +3764-168670-0018-993: ref=['AND', 'THEN', 'THAT', 'THERE', 'WAS', 'ANOTHER', 'THE', 'EMPTY', 'COFFIN'] +3764-168670-0018-993: hyp=['AND', 'THEN', 'THAT', 'THERE', 'WAS', 'ANOTHER', 'THE', 'EMPTY', 'COFFIN'] +3764-168670-0019-994: ref=['WHAT', 'IS', 'THAT', 'EMPTY', 'COFFIN'] +3764-168670-0019-994: hyp=['WHAT', 'IS', 'THAT', 'EMPTY', 'COFFIN'] +3764-168670-0020-995: ref=['ASKED', 'JEAN', 'VALJEAN', 'FAUCHELEVENT', 'REPLIED'] +3764-168670-0020-995: hyp=['ASKED', 'JEAN', 'VALJEAN', 'FAUCHELEVENT', 'REPLIED'] +3764-168670-0021-996: ref=['WHAT', 'COFFIN', 'WHAT', 'ADMINISTRATION'] +3764-168670-0021-996: hyp=['WHAT', 'COFFIN', 'WHAT', 'ADMINISTRATION'] +3764-168670-0022-997: ref=['FAUCHELEVENT', 'WHO', 'WAS', 'SEATED', 'SPRANG', 'UP', 'AS', 'THOUGH', 'A', 'BOMB', 'HAD', 'BURST', 'UNDER', 'HIS', 'CHAIR', 'YOU'] +3764-168670-0022-997: hyp=['FAUCHELEVENT', 'WHO', 'WAS', 'SEATED', 'SPRANG', 'UP', 'AS', 'THOUGH', 'A', 'BOMB', 'HAD', 'BURST', 'UNDER', 'HIS', 'CHAIR', 'YOU'] +3764-168670-0023-998: ref=['YOU', 'KNOW', 'FAUCHELEVENT', 'WHAT', 'YOU', 'HAVE', 'SAID', 'MOTHER', 'CRUCIFIXION', 'IS', 'DEAD'] +3764-168670-0023-998: hyp=['YOU', 'KNOW', 'FAUCHELEVENT', 'WHAT', 'YOU', 'HAVE', 'SAID', 'MOTHER', 'CRUCIFIXION', 'IS', 'DEAD'] +3764-168670-0024-999: ref=['AND', 'I', 'ADD', 'AND', 'FATHER', 'MADELEINE', 'IS', 'BURIED', 'AH'] +3764-168670-0024-999: hyp=['AND', 'I', 'ADD', 'AND', 'FATHER', 'MADELEINE', 'IS', 'BURIED', 'AH'] +3764-168670-0025-1000: ref=['YOU', 'ARE', 'NOT', 'LIKE', 'OTHER', 'MEN', 'FATHER', 'MADELEINE'] +3764-168670-0025-1000: hyp=['YOU', 'ARE', 'NOT', 'LIKE', 'OTHER', 'MEN', 'FATHER', 'MADELEINE'] +3764-168670-0026-1001: ref=['THIS', 'OFFERS', 'THE', 'MEANS', 'BUT', 'GIVE', 'ME', 'SOME', 'INFORMATION', 'IN', 'THE', 'FIRST', 'PLACE'] +3764-168670-0026-1001: hyp=['THIS', 'OFFERS', 'THE', 'MEANS', 'GIVE', 'ME', 'SOME', 'INFORMATION', 'IN', 'THE', 'FIRST', 'PLACE'] +3764-168670-0027-1002: ref=['HOW', 'LONG', 'IS', 'THE', 'COFFIN', 'SIX', 'FEET'] +3764-168670-0027-1002: hyp=['HOW', 'LONG', 'IS', 'THE', 'COFFIN', 'SIX', 'FEET'] +3764-168670-0028-1003: ref=['IT', 'IS', 'A', 'CHAMBER', 'ON', 'THE', 'GROUND', 'FLOOR', 'WHICH', 'HAS', 'A', 'GRATED', 'WINDOW', 'OPENING', 'ON', 'THE', 'GARDEN', 'WHICH', 'IS', 'CLOSED', 'ON', 'THE', 'OUTSIDE', 'BY', 'A', 'SHUTTER', 'AND', 'TWO', 'DOORS', 'ONE', 'LEADS', 'INTO', 'THE', 'CONVENT', 'THE', 'OTHER', 'INTO', 'THE', 'CHURCH', 'WHAT', 'CHURCH'] +3764-168670-0028-1003: hyp=['IT', 'IS', 'A', 'CHAMBER', 'ON', 'THE', 'GROUND', 'FLOOR', 'WHICH', 'HAS', 'A', 'GRATED', 'WINDOW', 'OPENING', 'ON', 'THE', 'GARDEN', 'WHICH', 'IS', 'CLOSED', 'ON', 'THE', 'OUTSIDE', 'BY', 'A', 'SHUTTER', 'AND', 'TWO', 'DOORS', 'ONE', 'LEADS', 'INTO', 'THE', 'CONVENT', 'THE', 'OTHER', 'INTO', 'THE', 'CHURCH', 'WHAT', 'CHURCH'] +3764-168670-0029-1004: ref=['THE', 'CHURCH', 'IN', 'THE', 'STREET', 'THE', 'CHURCH', 'WHICH', 'ANY', 'ONE', 'CAN', 'ENTER'] +3764-168670-0029-1004: hyp=['THE', 'CHURCH', 'IN', 'THE', 'STREET', 'THE', 'CHURCH', 'WHICH', 'ANY', 'ONE', 'CAN', 'ENTER'] +3764-168670-0030-1005: ref=['HAVE', 'YOU', 'THE', 'KEYS', 'TO', 'THOSE', 'TWO', 'DOORS'] +3764-168670-0030-1005: hyp=['HAVE', 'YOU', 'THE', 'KEYS', 'TO', 'THOSE', 'TWO', 'DOORS'] +3764-168670-0031-1006: ref=['NO', 'I', 'HAVE', 'THE', 'KEY', 'TO', 'THE', 'DOOR', 'WHICH', 'COMMUNICATES', 'WITH', 'THE', 'CONVENT', 'THE', 'PORTER', 'HAS', 'THE', 'KEY', 'TO', 'THE', 'DOOR', 'WHICH', 'COMMUNICATES', 'WITH', 'THE', 'CHURCH'] +3764-168670-0031-1006: hyp=['NO', 'I', 'HAVE', 'THE', 'KEY', 'TO', 'THE', 'DOOR', 'WHICH', 'COMMUNICATES', 'WITH', 'THE', 'CONVENT', 'THE', 'PORTER', 'HAS', 'THE', 'KEY', 'TO', 'THE', 'DOOR', 'WHICH', 'COMMUNICATES', 'WITH', 'THE', 'CHURCH'] +3764-168670-0032-1007: ref=['ONLY', 'TO', 'ALLOW', 'THE', "UNDERTAKER'S", 'MEN', 'TO', 'ENTER', 'WHEN', 'THEY', 'COME', 'TO', 'GET', 'THE', 'COFFIN'] +3764-168670-0032-1007: hyp=['ONLY', 'TO', 'ALLOW', 'THE', "UNDERTAKER'S", 'MEN', 'TO', 'ENTER', 'WHEN', 'THEY', 'COME', 'TO', 'GET', 'THE', 'COFFIN'] +3764-168670-0033-1008: ref=['WHO', 'NAILS', 'UP', 'THE', 'COFFIN', 'I', 'DO'] +3764-168670-0033-1008: hyp=['WHO', 'NAILS', 'UP', 'THE', 'COFFIN', 'I', 'DO'] +3764-168670-0034-1009: ref=['WHO', 'SPREADS', 'THE', 'PALL', 'OVER', 'IT'] +3764-168670-0034-1009: hyp=['WHO', 'SPREADS', 'THE', 'PAW', 'OVER', 'IT'] +3764-168670-0035-1010: ref=['NOT', 'ANOTHER', 'MAN', 'EXCEPT', 'THE', 'POLICE', 'DOCTOR', 'CAN', 'ENTER', 'THE', 'DEAD', 'ROOM', 'THAT', 'IS', 'EVEN', 'WRITTEN', 'ON', 'THE', 'WALL'] +3764-168670-0035-1010: hyp=['NOT', 'ANOTHER', 'MAN', 'EXCEPT', 'THE', 'POLICE', 'DOCTOR', 'CAN', 'ENTER', 'THE', 'DEDROOM', 'THAT', 'IS', 'EVEN', 'WRITTEN', 'ON', 'THE', 'WALL'] +3764-168670-0036-1011: ref=['COULD', 'YOU', 'HIDE', 'ME', 'IN', 'THAT', 'ROOM', 'TO', 'NIGHT', 'WHEN', 'EVERY', 'ONE', 'IS', 'ASLEEP'] +3764-168670-0036-1011: hyp=['COULD', 'YOU', 'HIDE', 'ME', 'IN', 'THAT', 'ROOM', 'TO', 'NIGHT', 'WHEN', 'EVERYONE', 'IS', 'ASLEEP'] +3764-168670-0037-1012: ref=['ABOUT', 'THREE', "O'CLOCK", 'IN', 'THE', 'AFTERNOON'] +3764-168670-0037-1012: hyp=['ABOUT', 'THREE', "O'CLOCK", 'IN', 'THE', 'AFTERNOON'] +3764-168670-0038-1013: ref=['I', 'SHALL', 'BE', 'HUNGRY', 'I', 'WILL', 'BRING', 'YOU', 'SOMETHING'] +3764-168670-0038-1013: hyp=['I', 'SHALL', 'BE', 'HUNGRY', 'I', 'WILL', 'BRING', 'YOU', 'SOMETHING'] +3764-168670-0039-1014: ref=['YOU', 'CAN', 'COME', 'AND', 'NAIL', 'ME', 'UP', 'IN', 'THE', 'COFFIN', 'AT', 'TWO', "O'CLOCK"] +3764-168670-0039-1014: hyp=['YOU', 'CAN', 'COME', 'AND', 'NAIL', 'ME', 'UP', 'IN', 'THE', 'COFFIN', 'AT', 'TWO', "O'CLOCK"] +3764-168670-0040-1015: ref=['FAUCHELEVENT', 'RECOILED', 'AND', 'CRACKED', 'HIS', 'FINGER', 'JOINTS', 'BUT', 'THAT', 'IS', 'IMPOSSIBLE'] +3764-168670-0040-1015: hyp=['FAUCHELEVENT', 'RECOILED', 'AND', 'CRACKED', 'HIS', 'FINGER', 'JOINTS', 'BUT', 'THAT', 'IS', 'IMPOSSIBLE'] +3764-168670-0041-1016: ref=['BAH', 'IMPOSSIBLE', 'TO', 'TAKE', 'A', 'HAMMER', 'AND', 'DRIVE', 'SOME', 'NAILS', 'IN', 'A', 'PLANK'] +3764-168670-0041-1016: hyp=['BAH', 'IMPOSSIBLE', 'TO', 'TAKE', 'A', 'HAMMER', 'AND', 'DRIVE', 'SOME', 'NAILS', 'IN', 'A', 'PLANK'] +3764-168670-0042-1017: ref=['JEAN', 'VALJEAN', 'HAD', 'BEEN', 'IN', 'WORSE', 'STRAITS', 'THAN', 'THIS'] +3764-168670-0042-1017: hyp=['JEAN', 'VALJEAN', 'HAD', 'BEEN', 'IN', 'WORSE', 'STRAITS', 'THAN', 'THIS'] +3764-168670-0043-1018: ref=['ANY', 'MAN', 'WHO', 'HAS', 'BEEN', 'A', 'PRISONER', 'UNDERSTANDS', 'HOW', 'TO', 'CONTRACT', 'HIMSELF', 'TO', 'FIT', 'THE', 'DIAMETER', 'OF', 'THE', 'ESCAPE'] +3764-168670-0043-1018: hyp=['ANY', 'MAN', 'WHO', 'HAS', 'BEEN', 'A', 'PRISONER', 'UNDERSTANDS', 'HOW', 'TO', 'CONTRACT', 'HIMSELF', 'TO', 'FIT', 'THE', 'DIAMETER', 'OF', 'THE', 'ESCAPE'] +3764-168670-0044-1019: ref=['WHAT', 'DOES', 'NOT', 'A', 'MAN', 'UNDERGO', 'FOR', 'THE', 'SAKE', 'OF', 'A', 'CURE'] +3764-168670-0044-1019: hyp=['WHAT', 'DOES', 'NOT', 'A', 'MAN', 'UNDERGO', 'FOR', 'THE', 'SAKE', 'OF', 'A', 'CURE'] +3764-168670-0045-1020: ref=['TO', 'HAVE', 'HIMSELF', 'NAILED', 'UP', 'IN', 'A', 'CASE', 'AND', 'CARRIED', 'OFF', 'LIKE', 'A', 'BALE', 'OF', 'GOODS', 'TO', 'LIVE', 'FOR', 'A', 'LONG', 'TIME', 'IN', 'A', 'BOX', 'TO', 'FIND', 'AIR', 'WHERE', 'THERE', 'IS', 'NONE', 'TO', 'ECONOMIZE', 'HIS', 'BREATH', 'FOR', 'HOURS', 'TO', 'KNOW', 'HOW', 'TO', 'STIFLE', 'WITHOUT', 'DYING', 'THIS', 'WAS', 'ONE', 'OF', 'JEAN', "VALJEAN'S", 'GLOOMY', 'TALENTS'] +3764-168670-0045-1020: hyp=['TO', 'HAVE', 'HIMSELF', 'NAILED', 'UP', 'IN', 'A', 'CASE', 'AND', 'CARRIED', 'OFF', 'LIKE', 'A', 'BALE', 'OF', 'GOODS', 'TO', 'LIVE', 'FOR', 'A', 'LONG', 'TIME', 'IN', 'A', 'BOX', 'TO', 'FIND', 'AIR', 'WHERE', 'THERE', 'IS', 'NONE', 'TO', 'ECONOMIZE', 'HIS', 'BREATH', 'FOR', 'HOURS', 'TO', 'KNOW', 'HOW', 'TO', 'STIFLE', 'WITHOUT', 'DYING', 'THIS', 'WAS', 'ONE', 'OF', 'JEAN', "VALJEAN'S", 'GLOOMY', 'TALENTS'] +3764-168670-0046-1021: ref=['YOU', 'SURELY', 'MUST', 'HAVE', 'A', 'GIMLET', 'YOU', 'WILL', 'MAKE', 'A', 'FEW', 'HOLES', 'HERE', 'AND', 'THERE', 'AROUND', 'MY', 'MOUTH', 'AND', 'YOU', 'WILL', 'NAIL', 'THE', 'TOP', 'PLANK', 'ON', 'LOOSELY', 'GOOD', 'AND', 'WHAT', 'IF', 'YOU', 'SHOULD', 'HAPPEN', 'TO', 'COUGH', 'OR', 'TO', 'SNEEZE'] +3764-168670-0046-1021: hyp=['YOU', 'SURELY', 'MUST', 'HAVE', 'A', 'GIMLET', 'YOU', 'WILL', 'MAKE', 'A', 'FEW', 'HOLES', 'HERE', 'AND', 'THERE', 'AROUND', 'MY', 'MOUTH', 'AND', 'YOU', 'WILL', 'NAIL', 'THE', 'TOP', 'PLANK', 'ON', 'LOOSELY', 'GOOD', 'AND', 'WHAT', 'IF', 'YOU', 'SHOULD', 'HAPPEN', 'TO', 'COUGH', 'OR', 'TO', 'SNEEZE'] +3764-168670-0047-1022: ref=['A', 'MAN', 'WHO', 'IS', 'MAKING', 'HIS', 'ESCAPE', 'DOES', 'NOT', 'COUGH', 'OR', 'SNEEZE'] +3764-168670-0047-1022: hyp=['A', 'MAN', 'WHO', 'IS', 'MAKING', 'HIS', 'ESCAPE', 'DOES', 'NOT', 'COUGH', 'OR', 'SNEEZE'] +3764-168670-0048-1023: ref=['WHO', 'IS', 'THERE', 'WHO', 'HAS', 'NOT', 'SAID', 'TO', 'A', 'CAT', 'DO', 'COME', 'IN'] +3764-168670-0048-1023: hyp=['WHO', 'IS', 'THERE', 'WHO', 'HAS', 'NOT', 'SAID', 'TO', 'A', 'CAT', 'DO', 'COME', 'IN'] +3764-168670-0049-1024: ref=['THE', 'OVER', 'PRUDENT', 'CATS', 'AS', 'THEY', 'ARE', 'AND', 'BECAUSE', 'THEY', 'ARE', 'CATS', 'SOMETIMES', 'INCUR', 'MORE', 'DANGER', 'THAN', 'THE', 'AUDACIOUS'] +3764-168670-0049-1024: hyp=['THE', 'OVER', 'PRUDENT', 'CATS', 'AS', 'THEY', 'ARE', 'AND', 'BECAUSE', 'THEY', 'ARE', 'CATS', 'SOMETIMES', 'INCUR', 'MORE', 'DANGER', 'THAN', 'THE', 'AUDACIOUS'] +3764-168670-0050-1025: ref=['BUT', 'JEAN', "VALJEAN'S", 'COOLNESS', 'PREVAILED', 'OVER', 'HIM', 'IN', 'SPITE', 'OF', 'HIMSELF', 'HE', 'GRUMBLED'] +3764-168670-0050-1025: hyp=['BUT', 'JEAN', "VALJEAN'S", 'COOLNESS', 'PREVAILED', 'OVER', 'HIM', 'IN', 'SPITE', 'OF', 'HIMSELF', 'HE', 'GRUMBLED'] +3764-168670-0051-1026: ref=['IF', 'YOU', 'ARE', 'SURE', 'OF', 'COMING', 'OUT', 'OF', 'THE', 'COFFIN', 'ALL', 'RIGHT', 'I', 'AM', 'SURE', 'OF', 'GETTING', 'YOU', 'OUT', 'OF', 'THE', 'GRAVE'] +3764-168670-0051-1026: hyp=['IF', 'YOU', 'ARE', 'SURE', 'OF', 'COMING', 'OUT', 'OF', 'THE', 'COFFIN', 'ALL', 'RIGHT', 'I', 'AM', 'SURE', 'OF', 'GETTING', 'YOU', 'OUT', 'OF', 'THE', 'GRAVE'] +3764-168670-0052-1027: ref=['AN', 'OLD', 'FELLOW', 'OF', 'THE', 'OLD', 'SCHOOL', 'THE', 'GRAVE', 'DIGGER', 'PUTS', 'THE', 'CORPSES', 'IN', 'THE', 'GRAVE', 'AND', 'I', 'PUT', 'THE', 'GRAVE', 'DIGGER', 'IN', 'MY', 'POCKET'] +3764-168670-0052-1027: hyp=['AN', 'OLD', 'FELLOW', 'OF', 'THE', 'OLD', 'SCHOOL', 'THE', 'GRAVEDIGGER', 'PUTS', 'THE', 'CORPSES', 'IN', 'THE', 'GRAVE', 'AND', 'I', 'PUT', 'THE', 'GRAVEDIGGER', 'IN', 'MY', 'POCKET'] +3764-168670-0053-1028: ref=['I', 'SHALL', 'FOLLOW', 'THAT', 'IS', 'MY', 'BUSINESS'] +3764-168670-0053-1028: hyp=['I', 'SHALL', 'FOLLOW', 'THAT', 'IS', 'MY', 'BUSINESS'] +3764-168670-0054-1029: ref=['THE', 'HEARSE', 'HALTS', 'THE', "UNDERTAKER'S", 'MEN', 'KNOT', 'A', 'ROPE', 'AROUND', 'YOUR', 'COFFIN', 'AND', 'LOWER', 'YOU', 'DOWN'] +3764-168670-0054-1029: hyp=['THE', 'HOUSE', 'HALTS', 'THE', "UNDERTAKER'S", 'MEN', 'KNOT', 'A', 'ROPE', 'ROUND', 'YOUR', 'COFFIN', 'AND', 'LOWER', 'YOU', 'DOWN'] +3764-168670-0055-1030: ref=['THE', 'PRIEST', 'SAYS', 'THE', 'PRAYERS', 'MAKES', 'THE', 'SIGN', 'OF', 'THE', 'CROSS', 'SPRINKLES', 'THE', 'HOLY', 'WATER', 'AND', 'TAKES', 'HIS', 'DEPARTURE'] +3764-168670-0055-1030: hyp=['THE', 'PRIEST', 'SAYS', 'THE', 'PRAYERS', 'MAKES', 'THE', 'SIGN', 'OF', 'THE', 'CROSS', 'SPRINKLES', 'THE', 'HOLY', 'WATER', 'AND', 'TAKES', 'HIS', 'DEPARTURE'] +3764-168670-0056-1031: ref=['ONE', 'OF', 'TWO', 'THINGS', 'WILL', 'HAPPEN', 'HE', 'WILL', 'EITHER', 'BE', 'SOBER', 'OR', 'HE', 'WILL', 'NOT', 'BE', 'SOBER'] +3764-168670-0056-1031: hyp=['ONE', 'OF', 'TWO', 'THINGS', 'WILL', 'HAPPEN', 'HE', 'WILL', 'EITHER', 'BE', 'SOBER', 'OR', 'HE', 'WILL', 'NOT', 'BE', 'SOBER'] +3764-168670-0057-1032: ref=['THAT', 'IS', 'SETTLED', 'FATHER', 'FAUCHELEVENT', 'ALL', 'WILL', 'GO', 'WELL'] +3764-168670-0057-1032: hyp=['THAT', 'IS', 'SETTLED', 'FATHER', 'FAUCHELEVENT', 'ALL', 'WILL', 'GO', 'WELL'] +3764-168671-0000-1033: ref=['ON', 'THE', 'FOLLOWING', 'DAY', 'AS', 'THE', 'SUN', 'WAS', 'DECLINING', 'THE', 'VERY', 'RARE', 'PASSERS', 'BY', 'ON', 'THE', 'BOULEVARD', 'DU', 'MAINE', 'PULLED', 'OFF', 'THEIR', 'HATS', 'TO', 'AN', 'OLD', 'FASHIONED', 'HEARSE', 'ORNAMENTED', 'WITH', 'SKULLS', 'CROSS', 'BONES', 'AND', 'TEARS'] +3764-168671-0000-1033: hyp=['ON', 'THE', 'FOLLOWING', 'DAY', 'AS', 'THE', 'SUN', 'WAS', 'DECLINING', 'THE', 'VERY', 'RARE', 'PASSERS', 'BY', 'ON', 'THE', 'BOULEVARD', 'DES', 'MAIN', 'PULLED', 'OFF', 'THEIR', 'HATS', 'TO', 'AN', 'OLD', 'FASHIONED', 'HEARSE', 'ORNAMENTED', 'WITH', 'SKULLS', 'CROSS', 'BONES', 'AND', 'TEARS'] +3764-168671-0001-1034: ref=['THIS', 'HEARSE', 'CONTAINED', 'A', 'COFFIN', 'COVERED', 'WITH', 'A', 'WHITE', 'CLOTH', 'OVER', 'WHICH', 'SPREAD', 'A', 'LARGE', 'BLACK', 'CROSS', 'LIKE', 'A', 'HUGE', 'CORPSE', 'WITH', 'DROOPING', 'ARMS'] +3764-168671-0001-1034: hyp=['THIS', 'HEARSE', 'CONTAINED', 'A', 'COFFIN', 'COVERED', 'WITH', 'A', 'WHITE', 'CLOTH', 'OVER', 'WHICH', 'SPREAD', 'A', 'LARGE', 'BLACK', 'CROSS', 'LIKE', 'A', 'HUGE', 'CORPSE', 'WITH', 'DROOPING', 'ARMS'] +3764-168671-0002-1035: ref=['A', 'MOURNING', 'COACH', 'IN', 'WHICH', 'COULD', 'BE', 'SEEN', 'A', 'PRIEST', 'IN', 'HIS', 'SURPLICE', 'AND', 'A', 'CHOIR', 'BOY', 'IN', 'HIS', 'RED', 'CAP', 'FOLLOWED'] +3764-168671-0002-1035: hyp=['A', 'MOURNING', 'COACH', 'IN', 'WHICH', 'COULD', 'BE', 'SEEN', 'A', 'PRIEST', 'IN', 'HIS', 'SURPLICE', 'AND', 'A', 'CHOIR', 'BOY', 'IN', 'HIS', 'RED', 'CAP', 'FOLLOWED'] +3764-168671-0003-1036: ref=['BEHIND', 'IT', 'CAME', 'AN', 'OLD', 'MAN', 'IN', 'THE', 'GARMENTS', 'OF', 'A', 'LABORER', 'WHO', 'LIMPED', 'ALONG'] +3764-168671-0003-1036: hyp=['BEHIND', 'IT', 'CAME', 'AN', 'OLD', 'MAN', 'IN', 'THE', 'GARMENTS', 'OF', 'A', 'LABORER', 'WHO', 'LIMPED', 'ALONG'] +3764-168671-0004-1037: ref=['THE', 'GRAVE', 'DIGGERS', 'BEING', 'THUS', 'BOUND', 'TO', 'SERVICE', 'IN', 'THE', 'EVENING', 'IN', 'SUMMER', 'AND', 'AT', 'NIGHT', 'IN', 'WINTER', 'IN', 'THIS', 'CEMETERY', 'THEY', 'WERE', 'SUBJECTED', 'TO', 'A', 'SPECIAL', 'DISCIPLINE'] +3764-168671-0004-1037: hyp=['THE', 'GRAVE', 'DIGGERS', 'BEING', 'THUS', 'BOUND', 'TO', 'SERVICE', 'IN', 'THE', 'EVENING', 'IN', 'SUMMER', 'AND', 'AT', 'NIGHT', 'IN', 'WINTER', 'IN', 'THIS', 'CEMETERY', 'THEY', 'WERE', 'SUBJECTED', 'TO', 'A', 'SPECIAL', 'DISCIPLINE'] +3764-168671-0005-1038: ref=['THESE', 'GATES', 'THEREFORE', 'SWUNG', 'INEXORABLY', 'ON', 'THEIR', 'HINGES', 'AT', 'THE', 'INSTANT', 'WHEN', 'THE', 'SUN', 'DISAPPEARED', 'BEHIND', 'THE', 'DOME', 'OF', 'THE', 'INVALIDES'] +3764-168671-0005-1038: hyp=['THESE', 'GATES', 'THEREFORE', 'SWUNG', 'INEXORABLY', 'ON', 'THEIR', 'HINGES', 'AT', 'THE', 'INSTANT', 'WHEN', 'THE', 'SUN', 'DISAPPEARED', 'BEHIND', 'THE', 'DOME', 'OF', 'THE', 'INVALIDES'] +3764-168671-0006-1039: ref=['DAMPNESS', 'WAS', 'INVADING', 'IT', 'THE', 'FLOWERS', 'WERE', 'DESERTING', 'IT'] +3764-168671-0006-1039: hyp=['DAMPNESS', 'WAS', 'INVADING', 'IT', 'THE', 'FLOWERS', 'WERE', 'DESERTING', 'IT'] +3764-168671-0007-1040: ref=['THE', 'BOURGEOIS', 'DID', 'NOT', 'CARE', 'MUCH', 'ABOUT', 'BEING', 'BURIED', 'IN', 'THE', 'VAUGIRARD', 'IT', 'HINTED', 'AT', 'POVERTY', 'PERE', 'LACHAISE', 'IF', 'YOU', 'PLEASE'] +3764-168671-0007-1040: hyp=['THE', 'BOURGEOIS', 'DID', 'NOT', 'CARE', 'MUCH', 'ABOUT', 'BEING', 'BURIED', 'IN', 'THE', 'ROUGE', 'HOISS', 'IT', 'HINTED', 'AT', 'POVERTY', 'PARLAISE', 'IF', 'YOU', 'PLEASE'] +3764-168671-0008-1041: ref=['TO', 'BE', 'BURIED', 'IN', 'PERE', 'LACHAISE', 'IS', 'EQUIVALENT', 'TO', 'HAVING', 'FURNITURE', 'OF', 'MAHOGANY', 'IT', 'IS', 'RECOGNIZED', 'AS', 'ELEGANT'] +3764-168671-0008-1041: hyp=['TO', 'BE', 'BURIED', 'IN', 'PERELACHASE', 'IS', 'EQUIVALENT', 'TO', 'HAVING', 'FURNITURE', 'OF', 'MAHOGANY', 'IT', 'IS', 'RECOGNIZED', 'AS', 'ELEGANT'] +3764-168671-0009-1042: ref=['THE', 'INTERMENT', 'OF', 'MOTHER', 'CRUCIFIXION', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'THE', 'EXIT', 'OF', 'COSETTE', 'THE', 'INTRODUCTION', 'OF', 'JEAN', 'VALJEAN', 'TO', 'THE', 'DEAD', 'ROOM', 'ALL', 'HAD', 'BEEN', 'EXECUTED', 'WITHOUT', 'DIFFICULTY', 'AND', 'THERE', 'HAD', 'BEEN', 'NO', 'HITCH', 'LET', 'US', 'REMARK', 'IN', 'PASSING', 'THAT', 'THE', 'BURIAL', 'OF', 'MOTHER', 'CRUCIFIXION', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CONVENT', 'IS', 'A', 'PERFECTLY', 'VENIAL', 'OFFENCE', 'IN', 'OUR', 'SIGHT'] +3764-168671-0009-1042: hyp=['THE', 'INTERMENT', 'OF', 'MOTHER', 'CRUCIFIXION', 'IN', 'THE', 'VAULT', 'UNDER', 'THE', 'ALTAR', 'THE', 'EXIT', 'OF', 'COSETTE', 'THE', 'INTRODUCTION', 'OF', 'JEAN', 'VALJEAN', 'INTO', 'THE', 'DEAD', 'ROOM', 'ALL', 'HAD', 'BEEN', 'EXECUTED', 'WITHOUT', 'DIFFICULTY', 'AND', 'THERE', 'HAD', 'BEEN', 'NO', 'HITCH', 'LET', 'US', 'REMARK', 'IN', 'PASSING', 'THAT', 'THE', 'BURIAL', 'OF', 'MOTHER', 'CRUCIFIXION', 'UNDER', 'THE', 'ALTAR', 'OF', 'THE', 'CONVENT', 'IS', 'A', 'PERFECTLY', 'VENAL', 'OFFENCE', 'IN', 'OUR', 'SIGHT'] +3764-168671-0010-1043: ref=['IT', 'IS', 'ONE', 'OF', 'THE', 'FAULTS', 'WHICH', 'RESEMBLE', 'A', 'DUTY'] +3764-168671-0010-1043: hyp=['IT', 'IS', 'ONE', 'OF', 'THE', 'FAULTS', 'WHICH', 'RESEMBLE', 'A', 'DUTY'] +3764-168671-0011-1044: ref=['THE', 'NUNS', 'HAD', 'COMMITTED', 'IT', 'NOT', 'ONLY', 'WITHOUT', 'DIFFICULTY', 'BUT', 'EVEN', 'WITH', 'THE', 'APPLAUSE', 'OF', 'THEIR', 'OWN', 'CONSCIENCES'] +3764-168671-0011-1044: hyp=['THE', 'NUNS', 'HAD', 'COMMITTED', 'IT', 'NOT', 'ONLY', 'WITHOUT', 'DIFFICULTY', 'BUT', 'EVEN', 'WITH', 'THE', 'APPLAUSE', 'OF', 'THEIR', 'OWN', 'CONSCIENCES'] +3764-168671-0012-1045: ref=['IN', 'THE', 'CLOISTER', 'WHAT', 'IS', 'CALLED', 'THE', 'GOVERNMENT', 'IS', 'ONLY', 'AN', 'INTERMEDDLING', 'WITH', 'AUTHORITY', 'AN', 'INTERFERENCE', 'WHICH', 'IS', 'ALWAYS', 'QUESTIONABLE'] +3764-168671-0012-1045: hyp=['IN', 'THE', 'CLOISTER', 'WHAT', 'IS', 'CALLED', 'THE', 'GOVERNMENT', 'IS', 'ONLY', 'AN', 'INTERMEDDLING', 'WITH', 'AUTHORITY', 'AN', 'INTERFERENCE', 'WHICH', 'IS', 'ALWAYS', 'QUESTIONABLE'] +3764-168671-0013-1046: ref=['MAKE', 'AS', 'MANY', 'LAWS', 'AS', 'YOU', 'PLEASE', 'MEN', 'BUT', 'KEEP', 'THEM', 'FOR', 'YOURSELVES'] +3764-168671-0013-1046: hyp=['MAKE', 'AS', 'MANY', 'NOISE', 'AS', 'YOU', 'PLEASE', 'MEN', 'BUT', 'KEEP', 'THEM', 'FOR', 'YOURSELVES'] +3764-168671-0014-1047: ref=['A', 'PRINCE', 'IS', 'NOTHING', 'IN', 'THE', 'PRESENCE', 'OF', 'A', 'PRINCIPLE'] +3764-168671-0014-1047: hyp=['A', 'PRINCE', 'IS', 'NOTHING', 'IN', 'THE', 'PRESENCE', 'OF', 'A', 'PRINCIPLE'] +3764-168671-0015-1048: ref=['FAUCHELEVENT', 'LIMPED', 'ALONG', 'BEHIND', 'THE', 'HEARSE', 'IN', 'A', 'VERY', 'CONTENTED', 'FRAME', 'OF', 'MIND'] +3764-168671-0015-1048: hyp=['FAUCHELEVENT', 'LIMPED', 'ALONG', 'BEHIND', 'THE', 'HEARSE', 'IN', 'A', 'VERY', 'CONTENTED', 'FRAME', 'OF', 'MIND'] +3764-168671-0016-1049: ref=['JEAN', "VALJEAN'S", 'COMPOSURE', 'WAS', 'ONE', 'OF', 'THOSE', 'POWERFUL', 'TRANQUILLITIES', 'WHICH', 'ARE', 'CONTAGIOUS'] +3764-168671-0016-1049: hyp=['JEAN', "VALJEAN'S", 'COMPOSURE', 'WAS', 'ONE', 'OF', 'THOSE', 'POWERFUL', 'TRANQUILLITIES', 'WHICH', 'ARE', 'CONTAGIOUS'] +3764-168671-0017-1050: ref=['WHAT', 'REMAINED', 'TO', 'BE', 'DONE', 'WAS', 'A', 'MERE', 'NOTHING'] +3764-168671-0017-1050: hyp=['WHAT', 'REMAINED', 'TO', 'BE', 'DONE', 'WAS', 'A', 'MERE', 'NOTHING'] +3764-168671-0018-1051: ref=['HE', 'PLAYED', 'WITH', 'FATHER', 'MESTIENNE'] +3764-168671-0018-1051: hyp=['HE', 'PLAYED', 'WITH', 'FATHER', 'MISTIAN'] +3764-168671-0019-1052: ref=['HE', 'DID', 'WHAT', 'HE', 'LIKED', 'WITH', 'HIM', 'HE', 'MADE', 'HIM', 'DANCE', 'ACCORDING', 'TO', 'HIS', 'WHIM'] +3764-168671-0019-1052: hyp=['HE', 'DID', 'WHAT', 'HE', 'LIKED', 'WITH', 'HIM', 'HE', 'MADE', 'HIM', 'DANCE', 'ACCORDING', 'TO', 'HIS', 'WHIM'] +3764-168671-0020-1053: ref=['THE', 'PERMISSION', 'FOR', 'INTERMENT', 'MUST', 'BE', 'EXHIBITED'] +3764-168671-0020-1053: hyp=['THE', 'PERMISSION', 'FOR', 'INTERMENT', 'MUST', 'BE', 'EXHIBITED'] +3764-168671-0021-1054: ref=['HE', 'WAS', 'A', 'SORT', 'OF', 'LABORING', 'MAN', 'WHO', 'WORE', 'A', 'WAISTCOAT', 'WITH', 'LARGE', 'POCKETS', 'AND', 'CARRIED', 'A', 'MATTOCK', 'UNDER', 'HIS', 'ARM'] +3764-168671-0021-1054: hyp=['HE', 'WAS', 'A', 'SORT', 'OF', 'LABOURING', 'MAN', 'WHO', 'WORE', 'A', 'WAISTCOAT', 'WITH', 'LARGE', 'POCKETS', 'AND', 'CARRIED', 'A', 'MATTOCK', 'UNDER', 'HIS', 'ARM'] +3764-168671-0022-1055: ref=['THE', 'MAN', 'REPLIED', 'THE', 'GRAVE', 'DIGGER'] +3764-168671-0022-1055: hyp=['THE', 'MAN', 'REPLIED', 'THE', 'GRAVE', 'DIGGER'] +3764-168671-0023-1056: ref=['THE', 'GRAVE', 'DIGGER', 'YES'] +3764-168671-0023-1056: hyp=['THE', 'GRAVE', 'DIGGER', 'YES'] +3764-168671-0024-1057: ref=['YOU', 'I'] +3764-168671-0024-1057: hyp=['YOU', 'I'] +3764-168671-0025-1058: ref=['FATHER', 'MESTIENNE', 'IS', 'THE', 'GRAVE', 'DIGGER', 'HE', 'WAS'] +3764-168671-0025-1058: hyp=['FATHER', 'MUSTHIENNE', 'IS', 'THE', 'GRAVE', 'DIGGER', 'HE', 'WAS'] +3764-168671-0026-1059: ref=['FAUCHELEVENT', 'HAD', 'EXPECTED', 'ANYTHING', 'BUT', 'THIS', 'THAT', 'A', 'GRAVE', 'DIGGER', 'COULD', 'DIE'] +3764-168671-0026-1059: hyp=['FAUCHELEVENT', 'HAD', 'EXPECTED', 'ANYTHING', 'BUT', 'THIS', 'THAT', 'A', 'GRAVE', 'DIGGER', 'COULD', 'DIE'] +3764-168671-0027-1060: ref=['IT', 'IS', 'TRUE', 'NEVERTHELESS', 'THAT', 'GRAVE', 'DIGGERS', 'DO', 'DIE', 'THEMSELVES'] +3764-168671-0027-1060: hyp=['IT', 'IS', 'TRUE', 'NEVERTHELESS', 'THAT', 'GRAVE', 'DIGGERS', 'DO', 'DIE', 'THEMSELVES'] +3764-168671-0028-1061: ref=['HE', 'HAD', 'HARDLY', 'THE', 'STRENGTH', 'TO', 'STAMMER'] +3764-168671-0028-1061: hyp=['HE', 'HAD', 'HARDLY', 'THE', 'STRENGTH', 'TO', 'STAMMER'] +3764-168671-0029-1062: ref=['BUT', 'HE', 'PERSISTED', 'FEEBLY', 'FATHER', 'MESTIENNE', 'IS', 'THE', 'GRAVE', 'DIGGER'] +3764-168671-0029-1062: hyp=['BUT', 'HE', 'PERSISTED', 'FEEBLY', 'FATHER', 'MASTIENNE', 'IS', 'THE', 'GRAVE', 'DIGGER'] +3764-168671-0030-1063: ref=['DO', 'YOU', 'KNOW', 'WHO', 'LITTLE', 'FATHER', 'LENOIR', 'IS', 'HE', 'IS', 'A', 'JUG', 'OF', 'RED', 'WINE'] +3764-168671-0030-1063: hyp=['DO', 'YOU', 'KNOW', 'WHO', 'LITTLE', 'FATHER', 'LE', 'NOIR', 'IS', 'HE', 'IS', 'A', 'JUG', 'OF', 'RED', 'WINE'] +3764-168671-0031-1064: ref=['BUT', 'YOU', 'ARE', 'A', 'JOLLY', 'FELLOW', 'TOO'] +3764-168671-0031-1064: hyp=['BUT', "YOU'RE", 'A', 'JOLLY', 'FELLOW', 'TOO'] +3764-168671-0032-1065: ref=['ARE', 'YOU', 'NOT', 'COMRADE', "WE'LL", 'GO', 'AND', 'HAVE', 'A', 'DRINK', 'TOGETHER', 'PRESENTLY'] +3764-168671-0032-1065: hyp=['ARE', 'YOU', 'NOT', 'COMRADE', "WE'LL", 'GO', 'AND', 'HAVE', 'A', 'DRINK', 'TOGETHER', 'PRESENTLY'] +3764-168671-0033-1066: ref=['THE', 'MAN', 'REPLIED'] +3764-168671-0033-1066: hyp=['THE', 'MAN', 'REPLIED'] +3764-168671-0034-1067: ref=['HE', 'LIMPED', 'MORE', 'OUT', 'OF', 'ANXIETY', 'THAN', 'FROM', 'INFIRMITY'] +3764-168671-0034-1067: hyp=['HE', 'LIMPED', 'MORE', 'OUT', 'OF', 'ANXIETY', 'THAN', 'FROM', 'INFIRMITY'] +3764-168671-0035-1068: ref=['THE', 'GRAVE', 'DIGGER', 'WALKED', 'ON', 'IN', 'FRONT', 'OF', 'HIM'] +3764-168671-0035-1068: hyp=['THE', 'GRAVE', 'DIGGER', 'WALKED', 'ON', 'IN', 'FRONT', 'OF', 'HIM'] +3764-168671-0036-1069: ref=['FAUCHELEVENT', 'PASSED', 'THE', 'UNEXPECTED', 'GRIBIER', 'ONCE', 'MORE', 'IN', 'REVIEW'] +3764-168671-0036-1069: hyp=['FAUCHELEVENT', 'PASSED', 'THE', 'UNEXPECTED', 'CRIBIER', 'ONCE', 'MORE', 'IN', 'REVIEW'] +3764-168671-0037-1070: ref=['FAUCHELEVENT', 'WHO', 'WAS', 'ILLITERATE', 'BUT', 'VERY', 'SHARP', 'UNDERSTOOD', 'THAT', 'HE', 'HAD', 'TO', 'DEAL', 'WITH', 'A', 'FORMIDABLE', 'SPECIES', 'OF', 'MAN', 'WITH', 'A', 'FINE', 'TALKER', 'HE', 'MUTTERED'] +3764-168671-0037-1070: hyp=['FAUCHELEVENT', 'WHO', 'WAS', 'ILLITERATE', 'BUT', 'VERY', 'SHARP', 'UNDERSTOOD', 'THAT', 'HE', 'HAD', 'TO', 'DEAL', 'WITH', 'A', 'FORMIDABLE', 'SPECIES', 'OF', 'MAN', 'WITH', 'A', 'FINE', 'TALKER', 'HE', 'MUTTERED'] +3764-168671-0038-1071: ref=['SO', 'FATHER', 'MESTIENNE', 'IS', 'DEAD'] +3764-168671-0038-1071: hyp=['BUT', 'SO', 'FATHER', 'MESTIENNE', 'IS', 'DEAD'] +3764-168671-0039-1072: ref=['THE', 'MAN', 'REPLIED', 'COMPLETELY'] +3764-168671-0039-1072: hyp=['THE', 'MAN', 'REPLIED', 'COMPLETELY'] +3764-168671-0040-1073: ref=['THE', 'GOOD', 'GOD', 'CONSULTED', 'HIS', 'NOTE', 'BOOK', 'WHICH', 'SHOWS', 'WHEN', 'THE', 'TIME', 'IS', 'UP', 'IT', 'WAS', 'FATHER', "MESTIENNE'S", 'TURN', 'FATHER', 'MESTIENNE', 'DIED'] +3764-168671-0040-1073: hyp=['THE', 'GOOD', 'GOD', 'CONSULTED', 'HIS', 'NOTE', 'BOOK', 'WHICH', 'SHOWS', 'WHEN', 'THE', 'TIME', 'IS', 'UP', 'IT', 'WAS', 'FATHER', "MAIAN'S", 'TURN', 'FATHER', 'MESTIENNE', 'DIED'] +3764-168671-0041-1074: ref=['STAMMERED', 'FAUCHELEVENT', 'IT', 'IS', 'MADE'] +3764-168671-0041-1074: hyp=['STAMMERED', 'FAUCHELEVENT', 'IT', 'IS', 'MADE'] +3764-168671-0042-1075: ref=['YOU', 'ARE', 'A', 'PEASANT', 'I', 'AM', 'A', 'PARISIAN'] +3764-168671-0042-1075: hyp=['YOU', 'ARE', 'A', 'PEASANT', 'I', 'AM', 'A', 'PARISIAN'] +3764-168671-0043-1076: ref=['FAUCHELEVENT', 'THOUGHT', 'I', 'AM', 'LOST'] +3764-168671-0043-1076: hyp=['FAUCHELEVENT', 'THOUGHT', 'I', 'AM', 'LOST'] +3764-168671-0044-1077: ref=['THEY', 'WERE', 'ONLY', 'A', 'FEW', 'TURNS', 'OF', 'THE', 'WHEEL', 'DISTANT', 'FROM', 'THE', 'SMALL', 'ALLEY', 'LEADING', 'TO', 'THE', 'NUNS', 'CORNER'] +3764-168671-0044-1077: hyp=['THEY', 'WERE', 'ONLY', 'A', 'FEW', 'TURNS', 'OF', 'THE', 'WHEEL', 'DISTANT', 'FROM', 'THE', 'SMALL', 'ALLEY', 'LEADING', 'TO', 'THE', "NUN'S", 'CORNER'] +3764-168671-0045-1078: ref=['AND', 'HE', 'ADDED', 'WITH', 'THE', 'SATISFACTION', 'OF', 'A', 'SERIOUS', 'MAN', 'WHO', 'IS', 'TURNING', 'A', 'PHRASE', 'WELL'] +3764-168671-0045-1078: hyp=['AND', 'HE', 'ADDED', 'WITH', 'THE', 'SATISFACTION', 'OF', 'A', 'SERIOUS', 'MAN', 'WHO', 'IS', 'TURNING', 'A', 'PHRASE', 'WELL'] +3764-168671-0046-1079: ref=['FORTUNATELY', 'THE', 'SOIL', 'WHICH', 'WAS', 'LIGHT', 'AND', 'WET', 'WITH', 'THE', 'WINTER', 'RAINS', 'CLOGGED', 'THE', 'WHEELS', 'AND', 'RETARDED', 'ITS', 'SPEED'] +3764-168671-0046-1079: hyp=['FORTUNATELY', 'THE', 'SOIL', 'WHICH', 'WAS', 'LIGHT', 'AND', 'WET', 'WITH', 'THE', 'WINTER', 'RAINS', 'CLOGGED', 'THE', 'WHEELS', 'AND', 'RETARDED', 'ITS', 'SPEED'] +3764-168671-0047-1080: ref=['MY', 'FATHER', 'WAS', 'A', 'PORTER', 'AT', 'THE', 'PRYTANEUM', 'TOWN', 'HALL'] +3764-168671-0047-1080: hyp=['MY', 'FATHER', 'WAS', 'A', 'PORTER', 'AT', 'THE', 'PRETINNIUM', 'TOWN', 'HALL'] +3764-168671-0048-1081: ref=['BUT', 'HE', 'HAD', 'REVERSES', 'HE', 'HAD', 'LOSSES', 'ON', 'CHANGE', 'I', 'WAS', 'OBLIGED', 'TO', 'RENOUNCE', 'THE', 'PROFESSION', 'OF', 'AUTHOR', 'BUT', 'I', 'AM', 'STILL', 'A', 'PUBLIC', 'WRITER'] +3764-168671-0048-1081: hyp=['BUT', 'HE', 'HAD', 'REVERSES', 'HE', 'HAD', 'LOSSES', 'ON', 'CHANGE', 'I', 'WAS', 'OBLIGED', 'TO', 'RENOUNCE', 'THE', 'PROFESSION', 'OF', 'AUTHOR', 'BUT', 'I', 'AM', 'STILL', 'A', 'PUBLIC', 'WRITER'] +3764-168671-0049-1082: ref=['SO', 'YOU', 'ARE', 'NOT', 'A', 'GRAVE', 'DIGGER', 'THEN'] +3764-168671-0049-1082: hyp=['SO', 'YOU', 'ARE', 'NOT', 'A', 'GRAVE', 'DIGGER', 'THEN'] +3764-168671-0050-1083: ref=['RETURNED', 'FAUCHELEVENT', 'CLUTCHING', 'AT', 'THIS', 'BRANCH', 'FEEBLE', 'AS', 'IT', 'WAS'] +3764-168671-0050-1083: hyp=['RETURNED', 'FAUCHELEVENT', 'CLUTCHING', 'AT', 'THIS', 'BRANCH', 'FEEBLE', 'AS', 'IT', 'WAS'] +3764-168671-0051-1084: ref=['HERE', 'A', 'REMARK', 'BECOMES', 'NECESSARY'] +3764-168671-0051-1084: hyp=['HERE', 'A', 'REMARK', 'BECOMES', 'NECESSARY'] +3764-168671-0052-1085: ref=['FAUCHELEVENT', 'WHATEVER', 'HIS', 'ANGUISH', 'OFFERED', 'A', 'DRINK', 'BUT', 'HE', 'DID', 'NOT', 'EXPLAIN', 'HIMSELF', 'ON', 'ONE', 'POINT', 'WHO', 'WAS', 'TO', 'PAY'] +3764-168671-0052-1085: hyp=['FAUCHELEVENT', 'WHATEVER', 'HIS', 'ANGUISH', 'OFFERED', 'A', 'DRINK', 'BUT', 'HE', 'DID', 'NOT', 'EXPLAIN', 'HIMSELF', 'ON', 'ONE', 'POINT', 'WHO', 'WAS', 'TO', 'PAY'] +3764-168671-0053-1086: ref=['THE', 'GRAVE', 'DIGGER', 'WENT', 'ON', 'WITH', 'A', 'SUPERIOR', 'SMILE'] +3764-168671-0053-1086: hyp=['THE', 'GRAVE', 'DIGGER', 'WENT', 'ON', 'WITH', 'A', 'SUPERIOR', 'SMILE'] +3764-168671-0054-1087: ref=['ONE', 'MUST', 'EAT'] +3764-168671-0054-1087: hyp=['ONE', 'MUST', 'EAT'] +3997-180294-0000-1088: ref=['THE', 'DUKE', 'COMES', 'EVERY', 'MORNING', 'THEY', 'WILL', 'TELL', 'HIM', 'WHEN', 'HE', 'COMES', 'THAT', 'I', 'AM', 'ASLEEP', 'AND', 'PERHAPS', 'HE', 'WILL', 'WAIT', 'UNTIL', 'I', 'WAKE'] +3997-180294-0000-1088: hyp=['THE', 'DUKE', 'COMES', 'EVERY', 'MORNING', 'THEY', 'WILL', 'TELL', 'HIM', 'WHEN', 'HE', 'COMES', 'THAT', 'I', 'AM', 'ASLEEP', 'AND', 'PERHAPS', 'HE', 'WILL', 'WAIT', 'UNTIL', 'I', 'AWAKE'] +3997-180294-0001-1089: ref=['YES', 'BUT', 'IF', 'I', 'SHOULD', 'ALREADY', 'ASK', 'FOR', 'SOMETHING', 'WHAT'] +3997-180294-0001-1089: hyp=['YES', 'BUT', 'IF', 'I', 'SHOULD', 'ALREADY', 'ASK', 'FOR', 'SOMETHING', 'WHAT'] +3997-180294-0002-1090: ref=['WELL', 'DO', 'IT', 'FOR', 'ME', 'FOR', 'I', 'SWEAR', 'TO', 'YOU', 'THAT', 'I', "DON'T", 'LOVE', 'YOU', 'AS', 'THE', 'OTHERS', 'HAVE', 'LOVED', 'YOU'] +3997-180294-0002-1090: hyp=['WELL', 'DO', 'IT', 'FOR', 'ME', 'FOR', 'I', 'SWEAR', 'TO', 'YOU', 'THAT', 'I', "DON'T", 'LOVE', 'YOU', 'AS', 'THE', 'OTHERS', 'HAVE', 'LOVED', 'YOU'] +3997-180294-0003-1091: ref=['THERE', 'ARE', 'BOLTS', 'ON', 'THE', 'DOOR', 'WRETCH'] +3997-180294-0003-1091: hyp=['THERE', 'ARE', 'BOLTS', 'IN', 'THE', 'DOOR', 'WRETCH'] +3997-180294-0004-1092: ref=['I', "DON'T", 'KNOW', 'HOW', 'IT', 'IS', 'BUT', 'IT', 'SEEMS', 'TO', 'ME', 'AS', 'IF', 'I', 'DO'] +3997-180294-0004-1092: hyp=['I', "DON'T", 'KNOW', 'HOW', 'IT', 'IS', 'BUT', 'IT', 'SEEMS', 'TO', 'ME', 'AS', 'IF', 'I', 'DO'] +3997-180294-0005-1093: ref=['NOW', 'GO', 'I', "CAN'T", 'KEEP', 'MY', 'EYES', 'OPEN'] +3997-180294-0005-1093: hyp=['NOW', 'GO', 'I', "CAN'T", 'KEEP', 'MY', 'EYES', 'OPEN'] +3997-180294-0006-1094: ref=['IT', 'SEEMED', 'TO', 'ME', 'AS', 'IF', 'THIS', 'SLEEPING', 'CITY', 'BELONGED', 'TO', 'ME', 'I', 'SEARCHED', 'MY', 'MEMORY', 'FOR', 'THE', 'NAMES', 'OF', 'THOSE', 'WHOSE', 'HAPPINESS', 'I', 'HAD', 'ONCE', 'ENVIED', 'AND', 'I', 'COULD', 'NOT', 'RECALL', 'ONE', 'WITHOUT', 'FINDING', 'MYSELF', 'THE', 'HAPPIER'] +3997-180294-0006-1094: hyp=['IT', 'SEEMED', 'TO', 'ME', 'AS', 'IF', 'THIS', 'SLEEPING', 'CITY', 'BELONGED', 'TO', 'ME', 'I', 'SEARCHED', 'MY', 'MEMORY', 'FOR', 'THE', 'NAMES', 'OF', 'THOSE', 'WHOSE', 'HAPPINESS', 'I', 'HAD', 'ONCE', 'ENVIED', 'AND', 'I', 'COULD', 'NOT', 'RECALL', 'ONE', 'WITHOUT', 'FINDING', 'MYSELF', 'THE', 'HAPPIER'] +3997-180294-0007-1095: ref=['EDUCATION', 'FAMILY', 'FEELING', 'THE', 'SENSE', 'OF', 'DUTY', 'THE', 'FAMILY', 'ARE', 'STRONG', 'SENTINELS', 'BUT', 'THERE', 'ARE', 'NO', 'SENTINELS', 'SO', 'VIGILANT', 'AS', 'NOT', 'TO', 'BE', 'DECEIVED', 'BY', 'A', 'GIRL', 'OF', 'SIXTEEN', 'TO', 'WHOM', 'NATURE', 'BY', 'THE', 'VOICE', 'OF', 'THE', 'MAN', 'SHE', 'LOVES', 'GIVES', 'THE', 'FIRST', 'COUNSELS', 'OF', 'LOVE', 'ALL', 'THE', 'MORE', 'ARDENT', 'BECAUSE', 'THEY', 'SEEM', 'SO', 'PURE'] +3997-180294-0007-1095: hyp=['EDUCATION', 'FAMILY', 'FEELING', 'THE', 'SENSE', 'OF', 'DUTY', 'THE', 'FAMILY', 'ARE', 'STRONG', 'SENTINELS', 'BUT', 'THERE', 'ARE', 'NO', 'SENTINELS', 'SO', 'VIGILANT', 'AS', 'NOT', 'TO', 'BE', 'DECEIVED', 'BY', 'A', 'GIRL', 'OF', 'SIXTEEN', 'TO', 'WHOM', 'NATURE', 'BY', 'THE', 'VOICE', 'OF', 'THE', 'MAN', 'SHE', 'LOVES', 'GIVES', 'THE', 'FIRST', 'COUNSEL', 'OF', 'LOVE', 'ALL', 'THE', 'MORE', 'ARDENT', 'BECAUSE', 'THEY', 'SEEM', 'SO', 'PURE'] +3997-180294-0008-1096: ref=['THE', 'MORE', 'A', 'GIRL', 'BELIEVES', 'IN', 'GOODNESS', 'THE', 'MORE', 'EASILY', 'WILL', 'SHE', 'GIVE', 'WAY', 'IF', 'NOT', 'TO', 'HER', 'LOVER', 'AT', 'LEAST', 'TO', 'LOVE', 'FOR', 'BEING', 'WITHOUT', 'MISTRUST', 'SHE', 'IS', 'WITHOUT', 'FORCE', 'AND', 'TO', 'WIN', 'HER', 'LOVE', 'IS', 'A', 'TRIUMPH', 'THAT', 'CAN', 'BE', 'GAINED', 'BY', 'ANY', 'YOUNG', 'MAN', 'OF', 'FIVE', 'AND', 'TWENTY', 'SEE', 'HOW', 'YOUNG', 'GIRLS', 'ARE', 'WATCHED', 'AND', 'GUARDED'] +3997-180294-0008-1096: hyp=['THE', 'MORE', 'A', 'GIRL', 'BELIEVES', 'IN', 'GOODNESS', 'THE', 'MORE', 'EASILY', 'WILL', 'SHE', 'GIVE', 'WAY', 'IF', 'NOT', 'TO', 'A', 'LOVER', 'AT', 'LEAST', 'TO', 'LOVE', 'FOR', 'BEING', 'WITHOUT', 'MISTRUST', 'SHE', 'IS', 'WITHOUT', 'FORCE', 'AND', 'TO', 'WIN', 'HER', 'LOVE', 'IS', 'A', 'TRIUMPH', 'THAT', 'CAN', 'BE', 'GAINED', 'BY', 'ANY', 'YOUNG', 'MEN', 'OF', 'FIVE', 'AND', 'TWENTY', 'SEE', 'HOW', 'YOUNG', 'GIRLS', 'ARE', 'WATCHED', 'AND', 'GUARDED'] +3997-180294-0009-1097: ref=['THEN', 'HOW', 'SURELY', 'MUST', 'THEY', 'DESIRE', 'THE', 'WORLD', 'WHICH', 'IS', 'HIDDEN', 'FROM', 'THEM', 'HOW', 'SURELY', 'MUST', 'THEY', 'FIND', 'IT', 'TEMPTING', 'HOW', 'SURELY', 'MUST', 'THEY', 'LISTEN', 'TO', 'THE', 'FIRST', 'VOICE', 'WHICH', 'COMES', 'TO', 'TELL', 'ITS', 'SECRETS', 'THROUGH', 'THEIR', 'BARS', 'AND', 'BLESS', 'THE', 'HAND', 'WHICH', 'IS', 'THE', 'FIRST', 'TO', 'RAISE', 'A', 'CORNER', 'OF', 'THE', 'MYSTERIOUS', 'VEIL'] +3997-180294-0009-1097: hyp=['THEN', 'HOW', 'SURELY', 'MUST', 'THEY', 'DESIRE', 'THE', 'WORLD', 'WHICH', 'IS', 'HIDDEN', 'FROM', 'THEM', 'HOW', 'SURELY', 'MUST', 'THEY', 'FIND', 'IT', 'TEMPTING', 'HOW', 'SURELY', 'MUST', 'THEY', 'LISTEN', 'TO', 'THE', 'FIRST', 'VOICE', 'WHICH', 'COMES', 'TO', 'TELL', 'ITS', 'SECRETS', 'THROUGH', 'THEIR', 'BARS', 'AND', 'BLESS', 'THE', 'HAND', 'WHICH', 'IS', 'THE', 'FIRST', 'TO', 'RAISE', 'A', 'CORNER', 'OF', 'THE', 'MYSTERY', 'VEIL'] +3997-180294-0010-1098: ref=['WITH', 'THEM', 'THE', 'BODY', 'HAS', 'WORN', 'OUT', 'THE', 'SOUL', 'THE', 'SENSES', 'HAVE', 'BURNED', 'UP', 'THE', 'HEART', 'DISSIPATION', 'HAS', 'BLUNTED', 'THE', 'FEELINGS'] +3997-180294-0010-1098: hyp=['WITH', 'THEM', 'THE', 'BODY', 'HAS', 'WORN', 'OUT', 'THE', 'SOUL', 'THE', 'SENSES', 'HAVE', 'BURNED', 'UP', 'THE', 'HEART', 'DISSIPATION', 'HAS', 'BLUNTED', 'THE', 'FEELINGS'] +3997-180294-0011-1099: ref=['THEY', 'LOVE', 'BY', 'PROFESSION', 'AND', 'NOT', 'BY', 'INSTINCT'] +3997-180294-0011-1099: hyp=['THEY', 'LOVE', 'BY', 'PROFESSION', 'AND', 'NOT', 'BY', 'INSTINCT'] +3997-180294-0012-1100: ref=['WHEN', 'A', 'CREATURE', 'WHO', 'HAS', 'ALL', 'HER', 'PAST', 'TO', 'REPROACH', 'HERSELF', 'WITH', 'IS', 'TAKEN', 'ALL', 'AT', 'ONCE', 'BY', 'A', 'PROFOUND', 'SINCERE', 'IRRESISTIBLE', 'LOVE', 'OF', 'WHICH', 'SHE', 'HAD', 'NEVER', 'FELT', 'HERSELF', 'CAPABLE', 'WHEN', 'SHE', 'HAS', 'CONFESSED', 'HER', 'LOVE', 'HOW', 'ABSOLUTELY', 'THE', 'MAN', 'WHOM', 'SHE', 'LOVES', 'DOMINATES', 'HER'] +3997-180294-0012-1100: hyp=['WHEN', 'A', 'CREATURE', 'WHO', 'HAS', 'ALL', 'HER', 'PAST', 'TO', 'REPROACH', 'HERSELF', 'WITH', 'IS', 'TAKEN', 'ALL', 'AT', 'ONCE', 'BY', 'A', 'PROFOUND', 'SINCERE', 'IRRESISTIBLE', 'LOVE', 'OF', 'WHICH', 'SHE', 'HAD', 'NEVER', 'FELT', 'HERSELF', 'CAPABLE', 'WHEN', 'SHE', 'HAS', 'CONFESSED', 'HER', 'LOVE', 'HOW', 'ABSOLUTELY', 'THE', 'MAN', 'WHOM', 'SHE', 'LOVES', 'DOMINATES', 'HER'] +3997-180294-0013-1101: ref=['THEY', 'KNOW', 'NOT', 'WHAT', 'PROOF', 'TO', 'GIVE'] +3997-180294-0013-1101: hyp=['THEY', 'KNOW', 'NOT', 'WHAT', 'PROOF', 'TO', 'GIVE'] +3997-180294-0014-1102: ref=['IN', 'ORDER', 'TO', 'DISTURB', 'THE', 'LABOURERS', 'IN', 'THE', 'FIELD', 'WAS', 'ONE', 'DAY', 'DEVOURED', 'BY', 'A', 'WOLF', 'BECAUSE', 'THOSE', 'WHOM', 'HE', 'HAD', 'SO', 'OFTEN', 'DECEIVED', 'NO', 'LONGER', 'BELIEVED', 'IN', 'HIS', 'CRIES', 'FOR', 'HELP'] +3997-180294-0014-1102: hyp=['IN', 'ORDER', 'TO', 'DISTURB', 'THE', 'LABOURERS', 'IN', 'THE', 'FIELDS', 'WAS', 'ONE', 'DAY', 'DEVOURED', 'BY', 'A', 'WOLF', 'BECAUSE', 'THOSE', 'WHOM', 'HE', 'HAD', 'SO', 'OFTEN', 'DECEIVED', 'NO', 'LONGER', 'BELIEVED', 'IN', 'HIS', 'CRIES', 'FOR', 'HELP'] +3997-180294-0015-1103: ref=['IT', 'IS', 'THE', 'SAME', 'WITH', 'THESE', 'UNHAPPY', 'WOMEN', 'WHEN', 'THEY', 'LOVE', 'SERIOUSLY'] +3997-180294-0015-1103: hyp=['THIS', 'IS', 'THE', 'SAME', 'WITH', 'THESE', 'UNHAPPY', 'WOMEN', 'WHEN', 'THEY', 'LOVE', 'SERIOUSLY'] +3997-180294-0016-1104: ref=['BUT', 'WHEN', 'THE', 'MAN', 'WHO', 'INSPIRES', 'THIS', 'REDEEMING', 'LOVE', 'IS', 'GREAT', 'ENOUGH', 'IN', 'SOUL', 'TO', 'RECEIVE', 'IT', 'WITHOUT', 'REMEMBERING', 'THE', 'PAST', 'WHEN', 'HE', 'GIVES', 'HIMSELF', 'UP', 'TO', 'IT', 'WHEN', 'IN', 'SHORT', 'HE', 'LOVES', 'AS', 'HE', 'IS', 'LOVED', 'THIS', 'MAN', 'DRAINS', 'AT', 'ONE', 'DRAUGHT', 'ALL', 'EARTHLY', 'EMOTIONS', 'AND', 'AFTER', 'SUCH', 'A', 'LOVE', 'HIS', 'HEART', 'WILL', 'BE', 'CLOSED', 'TO', 'EVERY', 'OTHER'] +3997-180294-0016-1104: hyp=['BUT', 'WHEN', 'THE', 'MAN', 'WHO', 'INSPIRES', 'THIS', 'REDEEMING', 'LOVE', 'IS', 'GREAT', 'ENOUGH', 'IN', 'SOUL', 'TO', 'RECEIVE', 'IT', 'WITHOUT', 'REMEMBERING', 'THE', 'PAST', 'WHEN', 'HE', 'GIVES', 'HIMSELF', 'UP', 'TO', 'IT', 'WHEN', 'IN', 'SHORT', 'HE', 'LOVES', 'AS', 'HE', 'IS', 'LOVED', 'THIS', 'MAN', 'DRAINS', 'AT', 'ONE', 'DRAUGHT', 'ALL', 'EARTHLY', 'EMOTIONS', 'AND', 'AFTER', 'SUCH', 'A', 'LOVE', 'HIS', 'HEART', 'WILL', 'BE', 'CLOSED', 'TO', 'EVERY', 'OTHER'] +3997-180294-0017-1105: ref=['BUT', 'TO', 'RETURN', 'TO', 'THE', 'FIRST', 'DAY', 'OF', 'MY', 'LIAISON'] +3997-180294-0017-1105: hyp=['BUT', 'TO', 'RETURN', 'TO', 'THE', 'FIRST', 'DAY', 'OF', 'MY', 'LIYER', 'SONG'] +3997-180294-0018-1106: ref=['WHEN', 'I', 'REACHED', 'HOME', 'I', 'WAS', 'IN', 'A', 'STATE', 'OF', 'MAD', 'GAIETY'] +3997-180294-0018-1106: hyp=['WHEN', 'I', 'REACHED', 'HOME', 'I', 'WAS', 'IN', 'A', 'STATE', 'OF', 'MAD', 'GAIETY'] +3997-180294-0019-1107: ref=['THE', 'WOMAN', 'BECOMES', 'THE', "MAN'S", 'MISTRESS', 'AND', 'LOVES', 'HIM'] +3997-180294-0019-1107: hyp=['THE', 'WOMAN', 'BECOMES', 'THE', "MAN'S", 'MISTRESS', 'AND', 'LOVES', 'HIM'] +3997-180294-0020-1108: ref=['HOW', 'WHY'] +3997-180294-0020-1108: hyp=['HOW', 'WHY'] +3997-180294-0021-1109: ref=['MY', 'WHOLE', 'BEING', 'WAS', 'EXALTED', 'INTO', 'JOY', 'AT', 'THE', 'MEMORY', 'OF', 'THE', 'WORDS', 'WE', 'HAD', 'EXCHANGED', 'DURING', 'THAT', 'FIRST', 'NIGHT'] +3997-180294-0021-1109: hyp=['MY', 'WHOLE', 'BEING', 'WAS', 'EXALTED', 'INTO', 'JOY', 'AT', 'THE', 'MEMORY', 'OF', 'THE', 'WORDS', 'WE', 'HAD', 'EXCHANGED', 'DURING', 'THAT', 'FIRST', 'NIGHT'] +3997-180294-0022-1110: ref=['HERE', 'ARE', 'MY', 'ORDERS', 'TO', 'NIGHT', 'AT', 'THE', 'VAUDEVILLE'] +3997-180294-0022-1110: hyp=['HERE', 'ARE', 'MY', 'ORDERS', 'TO', 'NIGHT', 'AT', 'A', 'VAUDEVILLE'] +3997-180294-0023-1111: ref=['COME', 'DURING', 'THE', 'THIRD', "ENTR'ACTE"] +3997-180294-0023-1111: hyp=['COME', 'DUN', 'THE', 'THIRD', 'ENTRANCE'] +3997-180294-0024-1112: ref=['THE', 'BOXES', 'FILLED', 'ONE', 'AFTER', 'ANOTHER'] +3997-180294-0024-1112: hyp=['THE', 'BOXES', 'FILLED', 'ONE', 'AFTER', 'ANOTHER'] +3997-180294-0025-1113: ref=['ONLY', 'ONE', 'REMAINED', 'EMPTY', 'THE', 'STAGE', 'BOX'] +3997-180294-0025-1113: hyp=['ONLY', 'ONE', 'REMAINED', 'EMPTY', 'THE', 'STAGE', 'BOX'] +3997-180294-0026-1114: ref=['AT', 'THE', 'BEGINNING', 'OF', 'THE', 'THIRD', 'ACT', 'I', 'HEARD', 'THE', 'DOOR', 'OF', 'THE', 'BOX', 'ON', 'WHICH', 'MY', 'EYES', 'HAD', 'BEEN', 'ALMOST', 'CONSTANTLY', 'FIXED', 'OPEN', 'AND', 'MARGUERITE', 'APPEARED'] +3997-180294-0026-1114: hyp=['AT', 'THE', 'BEGINNING', 'OF', 'THE', 'THIRD', 'ACT', 'I', 'HEARD', 'THE', 'DOOR', 'OF', 'THE', 'BOX', 'ON', 'WHICH', 'MY', 'EYES', 'HAD', 'BEEN', 'ALMOST', 'CONSTANTLY', 'FIXED', 'OPEN', 'AND', 'MARGUERITE', 'APPEARED'] +3997-180294-0027-1115: ref=['DID', 'SHE', 'LOVE', 'ME', 'ENOUGH', 'TO', 'BELIEVE', 'THAT', 'THE', 'MORE', 'BEAUTIFUL', 'SHE', 'LOOKED', 'THE', 'HAPPIER', 'I', 'SHOULD', 'BE'] +3997-180294-0027-1115: hyp=['DID', 'SHE', 'LOVE', 'ME', 'ENOUGH', 'TO', 'BELIEVE', 'THAT', 'THE', 'MORE', 'BEAUTIFUL', 'SHE', 'LOOKED', 'THE', 'HAPPIER', 'I', 'SHOULD', 'BE'] +3997-180294-0028-1116: ref=['WHAT', 'IS', 'THE', 'MATTER', 'WITH', 'YOU', 'TO', 'NIGHT', 'SAID', 'MARGUERITE', 'RISING', 'AND', 'COMING', 'TO', 'THE', 'BACK', 'OF', 'THE', 'BOX', 'AND', 'KISSING', 'ME', 'ON', 'THE', 'FOREHEAD'] +3997-180294-0028-1116: hyp=['WHAT', 'IS', 'THE', 'MATTER', 'WITH', 'YOU', 'TO', 'NIGHT', 'SAID', 'MARGUERITE', 'RISING', 'AND', 'COMING', 'TO', 'THE', 'BACK', 'OF', 'THE', 'BOX', 'AND', 'KISSING', 'ME', 'ON', 'THE', 'FOREHEAD'] +3997-180294-0029-1117: ref=['YOU', 'SHOULD', 'GO', 'TO', 'BED', 'SHE', 'REPLIED', 'WITH', 'THAT', 'IRONICAL', 'AIR', 'WHICH', 'WENT', 'SO', 'WELL', 'WITH', 'HER', 'DELICATE', 'AND', 'WITTY', 'FACE'] +3997-180294-0029-1117: hyp=['HE', 'SHOULD', 'GO', 'TO', 'BED', 'SHE', 'REPLIED', 'WITH', 'THAT', 'IRONIC', 'AIR', 'WHICH', 'WENT', 'SO', 'WELL', 'WITH', 'HER', 'DELICATE', 'AND', 'WITTY', 'FACE'] +3997-180294-0030-1118: ref=['WHERE', 'AT', 'HOME'] +3997-180294-0030-1118: hyp=['WHERE', 'AT', 'HOME'] +3997-180294-0031-1119: ref=['YOU', 'STILL', 'LOVE', 'ME', 'CAN', 'YOU', 'ASK'] +3997-180294-0031-1119: hyp=['YOU', 'STILL', 'LOVE', 'ME', 'CAN', 'YOU', 'ASK'] +3997-180294-0032-1120: ref=['BECAUSE', 'YOU', "DON'T", 'LIKE', 'SEEING', 'HIM'] +3997-180294-0032-1120: hyp=['BECAUSE', 'YOU', "DON'T", 'LIKE', 'SEEING', 'HIM'] +3997-180294-0033-1121: ref=['NONETHELESS', 'I', 'WAS', 'VERY', 'UNHAPPY', 'ALL', 'THE', 'REST', 'OF', 'THE', 'EVENING', 'AND', 'WENT', 'AWAY', 'VERY', 'SADLY', 'AFTER', 'HAVING', 'SEEN', 'PRUDENCE', 'THE', 'COUNT', 'AND', 'MARGUERITE', 'GET', 'INTO', 'THE', 'CARRIAGE', 'WHICH', 'WAS', 'WAITING', 'FOR', 'THEM', 'AT', 'THE', 'DOOR'] +3997-180294-0033-1121: hyp=['NEVERTHELESS', 'I', 'WAS', 'VERY', 'UNHAPPY', 'ALL', 'THE', 'REST', 'OF', 'THE', 'EVENING', 'AND', 'WENT', 'AWAY', 'VERY', 'SADLY', 'AFTER', 'HAVING', 'SEEN', 'PRUDENCE', 'THE', 'COUNT', 'AND', 'MARGUERITE', 'GET', 'INTO', 'THE', 'CARRIAGE', 'WHICH', 'WAS', 'WAITING', 'FOR', 'THEM', 'AT', 'THE', 'DOOR'] +3997-180297-0000-1122: ref=['I', 'HAVE', 'NOT', 'COME', 'TO', 'HINDER', 'YOU', 'FROM', 'LEAVING', 'PARIS'] +3997-180297-0000-1122: hyp=['I', 'HAVE', 'NOT', 'COME', 'TO', 'HINDER', 'YOU', 'FROM', 'LEAVING', 'PARIS'] +3997-180297-0001-1123: ref=['YOU', 'IN', 'THE', 'WAY', 'MARGUERITE', 'BUT', 'HOW'] +3997-180297-0001-1123: hyp=['YOU', 'IN', 'THE', 'WAY', 'MARGUERITE', 'BUT', 'HOW'] +3997-180297-0002-1124: ref=['WELL', 'YOU', 'MIGHT', 'HAVE', 'HAD', 'A', 'WOMAN', 'HERE', 'SAID', 'PRUDENCE', 'AND', 'IT', 'WOULD', 'HARDLY', 'HAVE', 'BEEN', 'AMUSING', 'FOR', 'HER', 'TO', 'SEE', 'TWO', 'MORE', 'ARRIVE'] +3997-180297-0002-1124: hyp=['WELL', 'YOU', 'MIGHT', 'HAVE', 'HAD', 'A', 'WOMAN', 'HERE', 'SAID', 'PRUDENCE', 'AND', 'IT', 'WOULD', 'HARDLY', 'HAVE', 'BEEN', 'AMUSING', 'FOR', 'HER', 'TO', 'SEE', 'TWO', 'MORE', 'ARRIVE'] +3997-180297-0003-1125: ref=['DURING', 'THIS', 'REMARK', 'MARGUERITE', 'LOOKED', 'AT', 'ME', 'ATTENTIVELY'] +3997-180297-0003-1125: hyp=['DURING', 'THIS', 'REMARK', 'MARGUERITE', 'LOOKED', 'AT', 'ME', 'ATTENTIVELY'] +3997-180297-0004-1126: ref=['MY', 'DEAR', 'PRUDENCE', 'I', 'ANSWERED', 'YOU', 'DO', 'NOT', 'KNOW', 'WHAT', 'YOU', 'ARE', 'SAYING'] +3997-180297-0004-1126: hyp=['MY', 'DEAR', 'PRUDENCE', 'I', 'ANSWERED', 'YOU', 'DO', 'NOT', 'KNOW', 'WHAT', 'YOU', 'ARE', 'SAYING'] +3997-180297-0005-1127: ref=['YES', 'BUT', 'BESIDES', 'NOT', 'WISHING', 'TO', 'PUT', 'YOU', 'OUT', 'I', 'WAS', 'SURE', 'THAT', 'IF', 'YOU', 'CAME', 'AS', 'FAR', 'AS', 'MY', 'DOOR', 'YOU', 'WOULD', 'WANT', 'TO', 'COME', 'UP', 'AND', 'AS', 'I', 'COULD', 'NOT', 'LET', 'YOU', 'I', 'DID', 'NOT', 'WISH', 'TO', 'LET', 'YOU', 'GO', 'AWAY', 'BLAMING', 'ME', 'FOR', 'SAYING', 'NO'] +3997-180297-0005-1127: hyp=['YES', 'BUT', 'BESIDES', 'NOT', 'WISHING', 'TO', 'PUT', 'YOU', 'OUT', 'I', 'WAS', 'SURE', 'THAT', 'IF', 'YOU', 'CAME', 'AS', 'FAR', 'AS', 'MY', 'DOOR', 'YOU', 'WOULD', 'WANT', 'TO', 'COME', 'UP', 'AND', 'AS', 'I', 'COULD', 'NOT', 'LET', 'YOU', 'I', 'DID', 'NOT', 'WISH', 'TO', 'LET', 'YOU', 'GO', 'AWAY', 'BLAMING', 'ME', 'FOR', 'SAYING', 'NO'] +3997-180297-0006-1128: ref=['BECAUSE', 'I', 'AM', 'WATCHED', 'AND', 'THE', 'LEAST', 'SUSPICION', 'MIGHT', 'DO', 'ME', 'THE', 'GREATEST', 'HARM'] +3997-180297-0006-1128: hyp=['BECAUSE', 'I', 'AM', 'WATCHED', 'AND', 'THE', 'LEAST', 'SUSPICION', 'MIGHT', 'DO', 'ME', 'THE', 'GREATEST', 'HARM'] +3997-180297-0007-1129: ref=['IS', 'THAT', 'REALLY', 'THE', 'ONLY', 'REASON'] +3997-180297-0007-1129: hyp=['IS', 'THAT', 'REALLY', 'THE', 'ONLY', 'REASON'] +3997-180297-0008-1130: ref=['IF', 'THERE', 'WERE', 'ANY', 'OTHER', 'I', 'WOULD', 'TELL', 'YOU', 'FOR', 'WE', 'ARE', 'NOT', 'TO', 'HAVE', 'ANY', 'SECRETS', 'FROM', 'ONE', 'ANOTHER', 'NOW'] +3997-180297-0008-1130: hyp=['IF', 'THERE', 'WERE', 'ANY', 'OTHER', 'I', 'WOULD', 'TELL', 'YOU', 'FOR', 'WE', 'ARE', 'NOT', 'TO', 'HAVE', 'ANY', 'SECRETS', 'FROM', 'ONE', 'ANOTHER', 'NOW'] +3997-180297-0009-1131: ref=['HONESTLY', 'DO', 'YOU', 'CARE', 'FOR', 'ME', 'A', 'LITTLE', 'A', 'GREAT', 'DEAL'] +3997-180297-0009-1131: hyp=['HONESTLY', 'DO', 'YOU', 'CARE', 'FOR', 'ME', 'A', 'LITTLE', 'A', 'GREAT', 'DEAL'] +3997-180297-0010-1132: ref=['I', 'FANCIED', 'FOR', 'A', 'MOMENT', 'THAT', 'I', 'MIGHT', 'GIVE', 'MYSELF', 'THAT', 'HAPPINESS', 'FOR', 'SIX', 'MONTHS', 'YOU', 'WOULD', 'NOT', 'HAVE', 'IT', 'YOU', 'INSISTED', 'ON', 'KNOWING', 'THE', 'MEANS'] +3997-180297-0010-1132: hyp=['I', 'FANCIED', 'FOR', 'A', 'MOMENT', 'THAT', 'I', 'MIGHT', 'GIVE', 'MYSELF', 'THAT', 'HAPPINESS', 'FOR', 'SIX', 'MONTHS', 'YOU', 'WOULD', 'NOT', 'HAVE', 'IT', 'YOU', 'INSISTED', 'ON', 'KNOWING', 'THE', 'MEANS'] +3997-180297-0011-1133: ref=['WELL', 'GOOD', 'HEAVENS', 'THE', 'MEANS', 'WERE', 'EASY', 'ENOUGH', 'TO', 'GUESS'] +3997-180297-0011-1133: hyp=['WELL', 'GOOD', 'HEAVENS', 'THE', 'MEANS', 'WERE', 'EASY', 'ENOUGH', 'TO', 'GUESS'] +3997-180297-0012-1134: ref=['I', 'LISTENED', 'AND', 'I', 'GAZED', 'AT', 'MARGUERITE', 'WITH', 'ADMIRATION'] +3997-180297-0012-1134: hyp=['I', 'LISTENED', 'AND', 'I', 'GAZED', 'AT', 'MARGUERITE', 'WITH', 'ADMIRATION'] +3997-180297-0013-1135: ref=['WHEN', 'I', 'THOUGHT', 'THAT', 'THIS', 'MARVELLOUS', 'CREATURE', 'WHOSE', 'FEET', 'I', 'HAD', 'ONCE', 'LONGED', 'TO', 'KISS', 'WAS', 'WILLING', 'TO', 'LET', 'ME', 'TAKE', 'MY', 'PLACE', 'IN', 'HER', 'THOUGHTS', 'MY', 'PART', 'IN', 'HER', 'LIFE', 'AND', 'THAT', 'I', 'WAS', 'NOT', 'YET', 'CONTENT', 'WITH', 'WHAT', 'SHE', 'GAVE', 'ME', 'I', 'ASKED', 'IF', "MAN'S", 'DESIRE', 'HAS', 'INDEED', 'LIMITS', 'WHEN', 'SATISFIED', 'AS', 'PROMPTLY', 'AS', 'MINE', 'HAD', 'BEEN', 'IT', 'REACHED', 'AFTER', 'SOMETHING', 'FURTHER'] +3997-180297-0013-1135: hyp=['WHEN', 'I', 'THOUGHT', 'THAT', 'THIS', 'MARVELLOUS', 'CREATURE', 'WHOSE', 'FEET', 'I', 'HAD', 'ONCE', 'LONGED', 'TO', 'KISS', 'WAS', 'WILLING', 'TO', 'LET', 'ME', 'TAKE', 'MY', 'PLACE', 'IN', 'HER', 'THOUGHTS', 'MY', 'PART', 'IN', 'HER', 'LIFE', 'AND', 'THAT', 'I', 'WAS', 'NOT', 'YET', 'CONTENT', 'WITH', 'WHAT', 'SHE', 'GAVE', 'ME', 'I', 'ASKED', 'IF', "MAN'S", 'DESIRE', 'HAD', 'INDEED', 'LIMITS', 'WHEN', 'SATISFIED', 'AS', 'PROMPTLY', 'AS', 'MINE', 'HAD', 'BEEN', 'IT', 'REACHED', 'AFTER', 'SOMETHING', 'FURTHER'] +3997-180297-0014-1136: ref=['TRULY', 'SHE', 'CONTINUED', 'WE', 'POOR', 'CREATURES', 'OF', 'CHANCE', 'HAVE', 'FANTASTIC', 'DESIRES', 'AND', 'INCONCEIVABLE', 'LOVES'] +3997-180297-0014-1136: hyp=['TRULY', 'SHE', 'CONTINUED', 'WE', 'POOR', 'CREATURES', 'OF', 'CHANCE', 'HAVE', 'FANTASTIC', 'DESIRES', 'AND', 'INCONCEIVABLE', 'LOVES'] +3997-180297-0015-1137: ref=['WE', 'ARE', 'NOT', 'ALLOWED', 'TO', 'HAVE', 'HEARTS', 'UNDER', 'PENALTY', 'OF', 'BEING', 'HOOTED', 'DOWN', 'AND', 'OF', 'RUINING', 'OUR', 'CREDIT'] +3997-180297-0015-1137: hyp=['WE', 'ARE', 'NOT', 'ALLOWED', 'TO', 'HAVE', 'HEARTS', 'UNDER', 'PENALTY', 'OF', 'BEING', 'HOOTED', 'DOWN', 'AND', 'OF', 'RUINING', 'OUR', 'CREDIT'] +3997-180297-0016-1138: ref=['WE', 'NO', 'LONGER', 'BELONG', 'TO', 'OURSELVES'] +3997-180297-0016-1138: hyp=['WE', 'NO', 'LONGER', 'BELONG', 'TO', 'OURSELVES'] +3997-180297-0017-1139: ref=['WE', 'STAND', 'FIRST', 'IN', 'THEIR', 'SELF', 'ESTEEM', 'LAST', 'IN', 'THEIR', 'ESTEEM'] +3997-180297-0017-1139: hyp=['WE', 'STAND', 'FIRST', 'IN', 'THEIR', 'SELF', 'ESTEEM', 'LAST', 'IN', 'THEIR', 'ESTEEM'] +3997-180297-0018-1140: ref=['NEVER', 'DO', 'THEY', 'GIVE', 'YOU', 'ADVICE', 'WHICH', 'IS', 'NOT', 'LUCRATIVE'] +3997-180297-0018-1140: hyp=['NEVER', 'DID', 'HE', 'GIVE', 'YOU', 'ADVICE', 'WHICH', 'IS', 'NOT', 'LUCRATIVE'] +3997-180297-0019-1141: ref=['IT', 'MEANS', 'LITTLE', 'ENOUGH', 'TO', 'THEM', 'THAT', 'WE', 'SHOULD', 'HAVE', 'TEN', 'LOVERS', 'EXTRA', 'AS', 'LONG', 'AS', 'THEY', 'GET', 'DRESSES', 'OR', 'A', 'BRACELET', 'OUT', 'OF', 'THEM', 'AND', 'THAT', 'THEY', 'CAN', 'DRIVE', 'IN', 'OUR', 'CARRIAGE', 'FROM', 'TIME', 'TO', 'TIME', 'OR', 'COME', 'TO', 'OUR', 'BOX', 'AT', 'THE', 'THEATRE'] +3997-180297-0019-1141: hyp=['IT', 'MEANS', 'LITTLE', 'ENOUGH', 'TO', 'THEM', 'THAT', 'WE', 'SHOULD', 'HAVE', 'TEN', 'LOVERS', 'EXTRA', 'AS', 'LONG', 'AS', 'THEY', 'GET', 'DRESSES', 'OR', 'A', 'BRACELET', 'OUT', 'OF', 'THEM', 'AND', 'THAT', 'THEY', 'CAN', 'DRIVE', 'IN', 'OUR', 'CARRIAGE', 'FROM', 'TIME', 'TO', 'TIME', 'OR', 'COME', 'TO', 'OUR', 'BOX', 'AT', 'THE', 'THEATRE'] +3997-180297-0020-1142: ref=['SUCH', 'A', 'MAN', 'I', 'FOUND', 'IN', 'THE', 'DUKE', 'BUT', 'THE', 'DUKE', 'IS', 'OLD', 'AND', 'OLD', 'AGE', 'NEITHER', 'PROTECTS', 'NOR', 'CONSOLES'] +3997-180297-0020-1142: hyp=['SUCH', 'A', 'MAN', 'I', 'FOUND', 'IN', 'THE', 'DUKE', 'BUT', 'THE', 'DUKE', 'IS', 'OLD', 'AND', 'OLD', 'AGE', 'NEITHER', 'PROTECTS', 'NOR', 'CONSOLES'] +3997-180297-0021-1143: ref=['I', 'THOUGHT', 'I', 'COULD', 'ACCEPT', 'THE', 'LIFE', 'WHICH', 'HE', 'OFFERED', 'ME', 'BUT', 'WHAT', 'WOULD', 'YOU', 'HAVE'] +3997-180297-0021-1143: hyp=['I', 'THOUGHT', 'I', 'COULD', 'ACCEPT', 'THE', 'LIFE', 'WHICH', 'HE', 'OFFERED', 'ME', 'BUT', 'WHAT', 'WOULD', 'YOU', 'HAVE'] +3997-180297-0022-1144: ref=['WHAT', 'I', 'LOVED', 'IN', 'YOU', 'WAS', 'NOT', 'THE', 'MAN', 'WHO', 'WAS', 'BUT', 'THE', 'MAN', 'WHO', 'WAS', 'GOING', 'TO', 'BE'] +3997-180297-0022-1144: hyp=['WHAT', 'I', 'LOVED', 'IN', 'YOU', 'WAS', 'NOT', 'THE', 'MAN', 'WHO', 'WAS', 'BUT', 'THE', 'MAN', 'WHO', 'WAS', 'GOING', 'TO', 'BE'] +3997-180297-0023-1145: ref=['MARGUERITE', 'TIRED', 'OUT', 'WITH', 'THIS', 'LONG', 'CONFESSION', 'THREW', 'HERSELF', 'BACK', 'ON', 'THE', 'SOFA', 'AND', 'TO', 'STIFLE', 'A', 'SLIGHT', 'COUGH', 'PUT', 'UP', 'HER', 'HANDKERCHIEF', 'TO', 'HER', 'LIPS', 'AND', 'FROM', 'THAT', 'TO', 'HER', 'EYES'] +3997-180297-0023-1145: hyp=['MARGUERITE', 'TIRED', 'OUT', 'WITH', 'THIS', 'LONG', 'CONFESSION', 'THREW', 'HERSELF', 'BACK', 'ON', 'THE', 'SOFA', 'AND', 'TO', 'STIFLE', 'A', 'SLIGHT', 'COUGH', 'PUT', 'UP', 'HER', 'HANDKERCHIEF', 'TO', 'HER', 'LIPS', 'AND', 'FROM', 'THAT', 'TO', 'HER', 'EYES'] +3997-180297-0024-1146: ref=['MARGUERITE', 'DO', 'WITH', 'ME', 'AS', 'YOU', 'WILL', 'I', 'AM', 'YOUR', 'SLAVE', 'YOUR', 'DOG', 'BUT', 'IN', 'THE', 'NAME', 'OF', 'HEAVEN', 'TEAR', 'UP', 'THE', 'LETTER', 'WHICH', 'I', 'WROTE', 'TO', 'YOU', 'AND', 'DO', 'NOT', 'MAKE', 'ME', 'LEAVE', 'YOU', 'TO', 'MORROW', 'IT', 'WOULD', 'KILL', 'ME'] +3997-180297-0024-1146: hyp=['MARGUERITE', 'DO', 'WITH', 'ME', 'AS', 'YOU', 'WILL', 'I', 'AM', 'YOUR', 'SLAVE', 'YOUR', 'DOG', 'BUT', 'IN', 'THE', 'NAME', 'OF', 'HEAVEN', 'TEAR', 'UP', 'THE', 'LETTER', 'WHICH', 'I', 'WROTE', 'TO', 'YOU', 'AND', 'DO', 'NOT', 'MAKE', 'ME', 'LEAVE', 'YOU', 'TO', 'MORROW', 'IT', 'WOULD', 'KILL', 'ME'] +3997-180297-0025-1147: ref=['MARGUERITE', 'DREW', 'THE', 'LETTER', 'FROM', 'HER', 'BOSOM', 'AND', 'HANDING', 'IT', 'TO', 'ME', 'WITH', 'A', 'SMILE', 'OF', 'INFINITE', 'SWEETNESS', 'SAID'] +3997-180297-0025-1147: hyp=['MARGUERITE', 'DREW', 'THE', 'LETTER', 'FROM', 'HER', 'BOSOM', 'AND', 'HANDING', 'IT', 'TO', 'ME', 'WITH', 'A', 'SMILE', 'OF', 'INFINITE', 'SWEETNESS', 'SAID'] +3997-180297-0026-1148: ref=['HERE', 'IT', 'IS', 'I', 'HAVE', 'BROUGHT', 'IT', 'BACK'] +3997-180297-0026-1148: hyp=['HERE', 'IT', 'IS', 'I', 'HAVE', 'BROUGHT', 'IT', 'BACK'] +3997-180297-0027-1149: ref=['I', 'TORE', 'THE', 'LETTER', 'INTO', 'FRAGMENTS', 'AND', 'KISSED', 'WITH', 'TEARS', 'THE', 'HAND', 'THAT', 'GAVE', 'IT', 'TO', 'ME'] +3997-180297-0027-1149: hyp=['I', 'TORE', 'THE', 'LETTER', 'INTO', 'FRAGMENTS', 'AND', 'KISSED', 'WITH', 'TEARS', 'THE', 'HAND', 'THAT', 'GAVE', 'IT', 'TO', 'ME'] +3997-180297-0028-1150: ref=['LOOK', 'HERE', 'PRUDENCE', 'DO', 'YOU', 'KNOW', 'WHAT', 'HE', 'WANTS', 'SAID', 'MARGUERITE'] +3997-180297-0028-1150: hyp=['LOOK', 'HERE', 'PRUDENCE', 'DO', 'YOU', 'KNOW', 'WHAT', 'HE', 'WANTS', 'SAID', 'MARGUERITE'] +3997-180297-0029-1151: ref=['HE', 'WANTS', 'YOU', 'TO', 'FORGIVE', 'HIM'] +3997-180297-0029-1151: hyp=['HE', 'WANTS', 'YOU', 'TO', 'FORGIVE', 'HIM'] +3997-180297-0030-1152: ref=['ONE', 'HAS', 'TO', 'BUT', 'HE', 'WANTS', 'MORE', 'THAN', 'THAT', 'WHAT', 'THEN'] +3997-180297-0030-1152: hyp=['ONE', 'HAS', 'TO', 'BUT', 'HE', 'WANTS', 'MORE', 'THAN', 'THAT', 'WHAT', 'THEN'] +3997-180297-0031-1153: ref=['I', 'EMBRACED', 'MARGUERITE', 'UNTIL', 'SHE', 'WAS', 'ALMOST', 'STIFLED'] +3997-180297-0031-1153: hyp=['I', 'EMBRACED', 'MARGUERITE', 'UNTIL', 'SHE', 'WAS', 'ALMOST', 'STIFLED'] +3997-182399-0000-1154: ref=['OL', 'MISTAH', 'BUZZARD', 'GRINNED'] +3997-182399-0000-1154: hyp=['OLD', 'MISTER', 'BUZZARD', 'GRINNED'] +3997-182399-0001-1155: ref=['THIS', 'SOUNDED', 'LIKE', 'ANOTHER', 'STORY'] +3997-182399-0001-1155: hyp=['THIS', 'SOUNDED', 'LIKE', 'ANOTHER', 'STORY'] +3997-182399-0002-1156: ref=['HE', 'WAS', 'CURIOUS', 'ABOUT', 'THAT', 'BLACK', 'HEADED', 'COUSIN', 'OF', 'OL', 'MISTAH', 'BUZZARD', 'VERY', 'CURIOUS', 'INDEED'] +3997-182399-0002-1156: hyp=['HE', 'WAS', 'CURIOUS', 'ABOUT', 'THAT', 'BLACK', 'HEADED', 'COUSIN', 'OF', 'OLD', 'MISTER', 'BUZZARD', 'VERY', 'CURIOUS', 'INDEED'] +3997-182399-0003-1157: ref=['ANYWAY', 'HE', 'WOULD', 'FIND', 'OUT'] +3997-182399-0003-1157: hyp=['ANYWAY', 'HE', 'WOULD', 'FIND', 'OUT'] +3997-182399-0004-1158: ref=['PLEASE', 'MISTER', 'BUZZARD', 'PLEASE', 'TELL', 'US', 'THE', 'STORY', 'HE', 'BEGGED'] +3997-182399-0004-1158: hyp=['PLEASE', 'MISTER', 'BUZZARD', 'PLEASE', 'TELL', 'US', 'THE', 'STORY', 'HE', 'BEGGED'] +3997-182399-0005-1159: ref=['NOW', 'OL', 'MISTAH', 'BUZZARD', 'IS', 'NATURALLY', 'GOOD', 'NATURED', 'AND', 'ACCOMMODATING', 'AND', 'WHEN', 'PETER', 'BEGGED', 'SO', 'HARD', 'HE', 'JUST', "COULDN'T", 'FIND', 'IT', 'IN', 'HIS', 'HEART', 'TO', 'REFUSE'] +3997-182399-0005-1159: hyp=['NOW', 'OLD', 'MISTER', 'BUZZARD', 'IS', 'NATURALLY', 'GOOD', 'NATURED', 'AND', 'ACCOMMODATING', 'AND', 'WHEN', 'PETER', 'BEGGED', 'SO', 'HARD', 'HE', 'JUST', "COULDN'T", 'FIND', 'IT', 'IN', 'HIS', 'HEART', 'TO', 'REFUSE'] +3997-182399-0006-1160: ref=['WAY', 'BACK', 'IN', 'THE', 'DAYS', 'WHEN', 'GRANDPAP', 'BUZZARD', 'HAD', 'HIS', 'LIL', 'FALLING', 'OUT', 'WITH', 'OL', 'KING', 'EAGLE', 'AND', 'DONE', 'FLY', 'SO', 'HIGH', 'HE', "SCO'TCH", 'THE', 'FEATHERS', 'OFFEN', 'HIS', 'HAID', 'HE', 'HAD', 'A', 'COUSIN', 'DID', 'GRANDPAP', 'BUZZARD', 'AND', 'THIS', 'COUSIN', 'WAS', 'JES', 'NATURALLY', 'LAZY', 'AND', 'NO', 'COUNT'] +3997-182399-0006-1160: hyp=['WAY', 'BACK', 'IN', 'THE', 'DAYS', 'WHEN', 'GRANDPAP', 'BUZZARD', 'HAD', 'HIS', 'LITTLE', 'FALLING', 'OUT', 'WITH', 'OLD', 'KING', 'EAGLE', 'AND', 'DONE', 'FLIES', 'SO', 'HIGH', 'HE', 'SCORCHED', 'THE', 'FEATHERS', 'OFF', 'HIS', 'HEAD', 'HE', 'HAD', 'COUSIN', 'DID', 'GRANDPAP', 'BUZZARD', 'AND', 'THIS', 'COUSIN', 'WAS', 'JUST', 'NATURALLY', 'LAZY', 'AND', 'NO', 'COUNT'] +3997-182399-0007-1161: ref=['LIKE', 'MOST', 'NO', 'COUNT', 'PEOPLE', 'HE', 'USED', 'TO', 'MAKE', 'A', 'REGULAR', 'NUISANCE', 'OF', 'HISSELF', 'POKING', 'HIS', 'NOSE', 'INTO', "EV'YBODY'S", 'BUSINESS', 'AND', 'NEVER', 'TENDING', 'TO', 'HIS', 'OWN'] +3997-182399-0007-1161: hyp=['LIKE', 'MOST', 'NOCOMN', 'PEOPLE', 'HE', 'USED', 'TO', 'MAKE', 'A', 'REGULAR', 'NUISANCE', 'OF', 'HIMSELF', 'POKING', 'HIS', 'NOSE', 'INTO', "EVERYBODY'S", 'BUSINESS', 'AND', 'NEVER', 'TENDING', 'TO', 'HIS', 'OWN'] +3997-182399-0008-1162: ref=["WASN'T", 'ANYTHING', 'GOING', 'ON', 'THAT', 'THIS', 'TRIFLING', 'MEMBER', 'OF', 'THE', 'BUZZARD', "FAM'LY", "DIDN'T", 'FIND', 'OUT', 'ABOUT', 'AND', 'MEDDLE', 'IN', 'HE', 'COULD', 'ASK', 'MO', 'QUESTIONS', 'THAN', 'PETER', 'RABBIT', 'CAN', 'AN', 'ANYBODY', 'THAT', 'CAN', 'DO', 'THAT', 'HAS', 'GOT', 'TO', 'ASK', 'A', 'LOT'] +3997-182399-0008-1162: hyp=["WASN'T", 'ANYTHING', 'GOING', 'ON', 'THAT', 'THIS', 'TRIFLING', 'MEMBER', 'OF', 'THE', 'BUZZARD', 'FAMILY', "DIDN'T", 'FIND', 'OUT', 'ABOUT', 'AND', 'MEDDLE', 'IN', 'HE', 'COULD', 'ASK', 'MORE', 'QUESTIONS', 'THAN', 'PETER', 'RABBIT', 'CAN', 'AND', 'ANYBODY', 'THAT', 'CAN', 'DO', 'THAT', 'HAS', 'GOT', 'TO', 'ASK', 'A', 'LOT'] +3997-182399-0009-1163: ref=['EVERYBODY', 'LOOKED', 'AT', 'PETER', 'AND', 'LAUGHED'] +3997-182399-0009-1163: hyp=['EVERYBODY', 'LOOKED', 'AT', 'PETER', 'AND', 'LAUGHED'] +3997-182399-0010-1164: ref=['SO', 'WE', 'UNS', 'SIT', 'ON', 'THE', 'CHIMNEY', 'TOPS', 'WHENEVER', 'OL', 'JACK', 'FROST', 'GETS', 'TO', 'STRAYING', 'DOWN', 'WHERE', 'HE', 'HAVE', 'NO', 'BUSINESS'] +3997-182399-0010-1164: hyp=['SO', 'WE', 'UNS', 'SIT', 'ON', 'THE', 'CHIMNEY', 'TOPS', 'WHENEVER', 'OLE', 'JACK', 'FROST', 'GETS', 'TO', 'STRAYING', 'DOWN', 'WHERE', 'HE', 'HAVE', 'NO', 'BUSINESS'] +3997-182399-0011-1165: ref=['ONE', 'DAY', 'THIS', 'NO', 'COUNT', 'TRIFLING', 'COUSIN', 'OF', 'GRANDPAP', 'BUZZARD', 'GET', 'COLD', 'IN', 'HIS', 'FEET'] +3997-182399-0011-1165: hyp=['ONE', 'DAY', 'THIS', 'NOCCOUNT', 'TRIFLING', 'COUSIN', 'OF', 'GRANPAP', 'BUZZARD', 'GET', 'COLD', 'IN', 'HIS', 'FEET'] +3997-182399-0012-1166: ref=['IT', 'WAS', 'ON', 'A', 'LIL', 'OL', 'HOUSE', 'A', 'LIL', 'OL', 'TUMBLE', 'DOWN', 'HOUSE'] +3997-182399-0012-1166: hyp=['IT', 'WAS', 'IN', 'A', 'LITTLE', 'OLD', 'HOUSE', 'A', 'LITTLE', 'OLD', 'TUMBLE', 'DOWN', 'HOUSE'] +3997-182399-0013-1167: ref=['WHY', 'HE', 'JES', 'STRETCH', 'HIS', 'FOOL', 'HAID', 'AS', 'FAR', 'DOWN', 'THAT', 'CHIMNEY', 'AS', 'HE', 'CAN', 'AN', 'LISTEN', 'AN', 'LISTEN'] +3997-182399-0013-1167: hyp=['WHY', 'HE', 'JUST', 'STRETCH', 'HIS', 'FULL', 'HEAD', 'AS', 'FAR', 'DOWN', 'THE', 'CHIMNEY', 'AS', 'HE', 'CAN', 'AND', 'LISTEN', 'AND', 'LISTEN'] +3997-182399-0014-1168: ref=['BUT', 'HE', "DON'T", 'MIND', 'THAT'] +3997-182399-0014-1168: hyp=['BUT', 'YOU', "DON'T", 'MIND', 'THAT'] +3997-182399-0015-1169: ref=['WILL', "YO'", 'ALLS', 'PLEASE', 'SPEAK', 'A', 'LIL', 'LOUDER', 'HE', 'HOLLER', 'DOWN', 'THE', 'CHIMNEY', 'JES', 'LIKE', 'THAT'] +3997-182399-0015-1169: hyp=['WILL', 'YOU', 'ALL', 'PLEASE', 'SPEAK', 'A', 'LITTLE', 'LOUDER', 'HE', 'HOLLERED', 'DOWN', 'THE', 'CHIMNEY', 'JUST', 'LIKE', 'THAT'] +3997-182399-0016-1170: ref=['YES', 'SAH', 'SHE', "SHO'LY", 'WAS', 'PLUMB', 'SCARED'] +3997-182399-0016-1170: hyp=['YES', 'SAH', 'SHE', 'SURELY', 'WAS', 'PLUM', 'SCARED'] +3997-182399-0017-1171: ref=['THEY', 'LIKE', 'TO', 'CHOKE', 'THAT', 'NO', 'COUNT', 'BUZZARD', 'TO', 'DEATH'] +3997-182399-0017-1171: hyp=['THEY', 'LIKE', 'TO', 'CHOKE', 'THAT', 'NO', 'CON', 'BUZZER', 'TO', 'DEATH'] +3997-182399-0018-1172: ref=['WHEN', 'HE', 'GET', 'HOME', 'HE', 'TRY', 'AN', 'TRY', 'TO', 'BRUSH', 'THAT', 'SOOT', 'OFF', 'BUT', 'IT', 'DONE', 'GET', 'INTO', 'THE', 'SKIN', 'AN', 'IT', 'STAY', 'THERE'] +3997-182399-0018-1172: hyp=['WHEN', 'HE', 'GET', 'HOME', 'HE', 'TRY', 'AND', 'TRY', 'TO', 'BRUSH', 'THE', 'SUIT', 'OFF', 'BUT', 'IT', "DOESN'T", 'GET', 'INTO', 'THE', 'SKIN', 'AND', 'IT', 'STAYED', 'THERE'] +3997-182399-0019-1173: ref=['A', 'LITTLE', 'SIGH', 'OF', 'SATISFACTION', 'WENT', 'AROUND', 'THE', 'CIRCLE', 'OF', 'LISTENERS'] +3997-182399-0019-1173: hyp=['A', 'LITTLE', 'SIGH', 'OF', 'SATISFACTION', 'WENT', 'ROUND', 'THE', 'CIRCLE', 'OF', 'LISTENERS'] +3997-182399-0020-1174: ref=['IT', 'WAS', 'JUST', 'AS', 'GOOD', 'AS', 'ONE', 'OF', 'GRANDFATHER', "FROG'S"] +3997-182399-0020-1174: hyp=['IT', 'WAS', 'JUST', 'AS', 'GOOD', 'AS', 'ONE', 'OF', 'GRANDFATHER', 'FROGS'] +4198-12259-0000-1175: ref=['DRAW', 'REACH', 'FILL', 'MIX', 'GIVE', 'IT', 'ME', 'WITHOUT', 'WATER'] +4198-12259-0000-1175: hyp=['DRAW', 'REACH', 'FILL', 'MIX', 'GIVE', 'IT', 'ME', 'WITHOUT', 'WATER'] +4198-12259-0001-1176: ref=['SO', 'MY', 'FRIEND', 'SO', 'WHIP', 'ME', 'OFF', 'THIS', 'GLASS', 'NEATLY', 'BRING', 'ME', 'HITHER', 'SOME', 'CLARET', 'A', 'FULL', 'WEEPING', 'GLASS', 'TILL', 'IT', 'RUN', 'OVER'] +4198-12259-0001-1176: hyp=['SO', 'MY', 'FRIEND', 'SO', 'WHIP', 'ME', 'OFF', 'THIS', 'GLASS', 'NEATLY', 'BRING', 'ME', 'HITHER', 'SOME', 'CLARET', 'A', 'FULL', 'WEEPING', 'GLASS', 'TILL', 'IT', 'RUN', 'OVER'] +4198-12259-0002-1177: ref=['A', 'CESSATION', 'AND', 'TRUCE', 'WITH', 'THIRST'] +4198-12259-0002-1177: hyp=['A', 'CESSATION', 'AND', 'TRUTH', 'WITH', 'THIRST'] +4198-12259-0003-1178: ref=['YOU', 'HAVE', 'CATCHED', 'A', 'COLD', 'GAMMER', 'YEA', 'FORSOOTH', 'SIR'] +4198-12259-0003-1178: hyp=['YOU', 'HAVE', 'CAST', 'A', 'COLD', 'GAMMER', 'YEA', 'FORSOOTH', 'SIR'] +4198-12259-0004-1179: ref=['BY', 'THE', 'BELLY', 'OF', 'SANCT', 'BUFF', 'LET', 'US', 'TALK', 'OF', 'OUR', 'DRINK', 'I', 'NEVER', 'DRINK', 'BUT', 'AT', 'MY', 'HOURS', 'LIKE', 'THE', "POPE'S", 'MULE'] +4198-12259-0004-1179: hyp=['BY', 'THE', 'BELLY', 'OF', 'SAINT', 'BUFF', 'LET', 'US', 'TALK', 'OF', 'OUR', 'DRINK', 'I', 'NEVER', 'DRINK', 'BUT', 'AT', 'MY', 'HOURS', 'LIKE', 'THE', "POPE'S", 'MULE'] +4198-12259-0005-1180: ref=['WHICH', 'WAS', 'FIRST', 'THIRST', 'OR', 'DRINKING'] +4198-12259-0005-1180: hyp=['WHICH', 'WAS', 'FIRST', 'THIRST', 'A', 'DRINKING'] +4198-12259-0006-1181: ref=['WHAT', 'IT', 'SEEMS', 'I', 'DO', 'NOT', 'DRINK', 'BUT', 'BY', 'AN', 'ATTORNEY'] +4198-12259-0006-1181: hyp=['WHAT', 'IT', 'SEEMS', 'I', 'DO', 'NOT', 'DRINK', 'BUT', 'BUY', 'AN', 'ATTORNEY'] +4198-12259-0007-1182: ref=['DRINK', 'ALWAYS', 'AND', 'YOU', 'SHALL', 'NEVER', 'DIE'] +4198-12259-0007-1182: hyp=['DRINK', 'ALWAYS', 'AND', 'YOU', 'SHALL', 'NEVER', 'DIE'] +4198-12259-0008-1183: ref=['IF', 'I', 'DRINK', 'NOT', 'I', 'AM', 'A', 'GROUND', 'DRY', 'GRAVELLED', 'AND', 'SPENT', 'I', 'AM', 'STARK', 'DEAD', 'WITHOUT', 'DRINK', 'AND', 'MY', 'SOUL', 'READY', 'TO', 'FLY', 'INTO', 'SOME', 'MARSH', 'AMONGST', 'FROGS', 'THE', 'SOUL', 'NEVER', 'DWELLS', 'IN', 'A', 'DRY', 'PLACE', 'DROUTH', 'KILLS', 'IT'] +4198-12259-0008-1183: hyp=['IF', 'I', 'DRINK', 'NOT', 'I', 'AM', 'A', 'GROUND', 'DRY', 'GRAVELLED', 'AND', 'SPENT', 'I', 'AM', 'STARK', 'DEAD', 'WITHOUT', 'DRINK', 'AND', 'MY', 'SOUL', 'READY', 'TO', 'FLY', 'INTO', 'SOME', 'MARSH', 'AMONGST', 'FROGS', 'THE', 'SOUL', 'NEVER', 'DWELLS', 'IN', 'A', 'DRY', 'PLACE', 'DROUGHTH', 'KILLETH', 'IT'] +4198-12259-0009-1184: ref=['HE', 'DRINKS', 'IN', 'VAIN', 'THAT', 'FEELS', 'NOT', 'THE', 'PLEASURE', 'OF', 'IT'] +4198-12259-0009-1184: hyp=['HE', 'DRINKS', 'IN', 'VAIN', 'THAT', 'FILLS', 'NOT', 'THE', 'PLEASURE', 'OF', 'IT'] +4198-12259-0010-1185: ref=['IT', 'IS', 'ENOUGH', 'TO', 'BREAK', 'BOTH', 'GIRDS', 'AND', 'PETREL'] +4198-12259-0010-1185: hyp=['IT', 'IS', 'ENOUGH', 'TO', 'BREAK', 'BOTH', 'GURGE', 'AND', 'PETROL'] +4198-12259-0011-1186: ref=['WHAT', 'DIFFERENCE', 'IS', 'THERE', 'BETWEEN', 'A', 'BOTTLE', 'AND', 'A', 'FLAGON'] +4198-12259-0011-1186: hyp=['WHAT', 'DIFFERENCE', 'IS', 'THERE', 'BETWEEN', 'A', 'BOTTLE', 'AND', 'A', 'FLAGON'] +4198-12259-0012-1187: ref=['BRAVELY', 'AND', 'WELL', 'PLAYED', 'UPON', 'THE', 'WORDS'] +4198-12259-0012-1187: hyp=['BRAVELY', 'AND', 'WELL', 'PLAYED', 'UPON', 'THE', 'WORDS'] +4198-12259-0013-1188: ref=['OUR', 'FATHERS', 'DRANK', 'LUSTILY', 'AND', 'EMPTIED', 'THEIR', 'CANS'] +4198-12259-0013-1188: hyp=['OUR', 'FATHERS', 'DRANK', 'LUSTILY', 'AND', 'EMPTIED', 'THEIR', 'CANS'] +4198-12259-0014-1189: ref=['WELL', 'CACKED', 'WELL', 'SUNG'] +4198-12259-0014-1189: hyp=['WELL', 'KACKLED', 'WELL', 'SUNG'] +4198-12259-0015-1190: ref=['COME', 'LET', 'US', 'DRINK', 'WILL', 'YOU', 'SEND', 'NOTHING', 'TO', 'THE', 'RIVER'] +4198-12259-0015-1190: hyp=['COME', 'LET', 'US', 'DRINK', 'WILL', 'YOU', 'SEND', 'NOTHING', 'TO', 'THE', 'RIVER'] +4198-12259-0016-1191: ref=['I', 'DRINK', 'NO', 'MORE', 'THAN', 'A', 'SPONGE'] +4198-12259-0016-1191: hyp=['I', 'DRINK', 'NO', 'MORE', 'THAN', 'THE', 'SPONNES'] +4198-12259-0017-1192: ref=['I', 'DRINK', 'LIKE', 'A', 'TEMPLAR', 'KNIGHT'] +4198-12259-0017-1192: hyp=['I', 'DRINK', 'LIKE', 'A', 'TEMPT', 'LAW', 'NIGHT'] +4198-12259-0018-1193: ref=['AND', 'I', 'TANQUAM', 'SPONSUS'] +4198-12259-0018-1193: hyp=['AN', 'I', 'TAM', 'CORRESPONSES'] +4198-12259-0019-1194: ref=['AND', 'I', 'SICUT', 'TERRA', 'SINE', 'AQUA'] +4198-12259-0019-1194: hyp=['AND', 'I', 'SICUT', 'TERIS', 'INAQUA'] +4198-12259-0020-1195: ref=['GIVE', 'ME', 'A', 'SYNONYMON', 'FOR', 'A', 'GAMMON', 'OF', 'BACON'] +4198-12259-0020-1195: hyp=['GIVE', 'ME', 'A', 'SYNONYM', 'FOR', 'A', 'GAMIN', 'OF', 'BACON'] +4198-12259-0021-1196: ref=['IT', 'IS', 'THE', 'COMPULSORY', 'OF', 'DRINKERS', 'IT', 'IS', 'A', 'PULLEY'] +4198-12259-0021-1196: hyp=['IT', 'IS', 'THE', 'COMPULSORY', 'OF', 'DRINKERS', 'IT', 'IS', 'A', 'PULLY'] +4198-12259-0022-1197: ref=['A', 'LITTLE', 'RAIN', 'ALLAYS', 'A', 'GREAT', 'DEAL', 'OF', 'WIND', 'LONG', 'TIPPLING', 'BREAKS', 'THE', 'THUNDER'] +4198-12259-0022-1197: hyp=['A', 'LITTLE', 'RAIN', 'ALLAYS', 'A', 'GREAT', 'DEAL', 'OF', 'WIND', 'LONG', 'TIPPLING', 'BREAKS', 'THE', 'THUNDER'] +4198-12259-0023-1198: ref=['BUT', 'IF', 'THERE', 'CAME', 'SUCH', 'LIQUOR', 'FROM', 'MY', 'BALLOCK', 'WOULD', 'YOU', 'NOT', 'WILLINGLY', 'THEREAFTER', 'SUCK', 'THE', 'UDDER', 'WHENCE', 'IT', 'ISSUED'] +4198-12259-0023-1198: hyp=['BUT', 'IF', 'THERE', 'CAME', 'SUCH', 'LIQUOR', 'FOR', 'MY', 'BALLOT', 'WILL', 'YOU', 'NOT', 'WILLINGLY', 'THEREAFTER', 'SUCK', 'THE', 'UTTER', 'WHENCE', 'IT', 'ISSUED'] +4198-12259-0024-1199: ref=['HERE', 'PAGE', 'FILL'] +4198-12259-0024-1199: hyp=['HERE', 'PAGE', 'FILL'] +4198-12259-0025-1200: ref=['I', 'APPEAL', 'FROM', 'THIRST', 'AND', 'DISCLAIM', 'ITS', 'JURISDICTION'] +4198-12259-0025-1200: hyp=['I', 'APPEAL', 'FROM', 'THIRST', 'AND', 'DISCLAIM', 'ITS', 'JURISDICTION'] +4198-12259-0026-1201: ref=['I', 'WAS', 'WONT', 'HERETOFORE', 'TO', 'DRINK', 'OUT', 'ALL', 'BUT', 'NOW', 'I', 'LEAVE', 'NOTHING'] +4198-12259-0026-1201: hyp=['I', 'WAS', 'WONT', 'HERETOFORE', 'TO', 'DRINK', 'OUT', 'ALL', 'BUT', 'NOW', 'I', 'LEAVE', 'NOTHING'] +4198-12259-0027-1202: ref=['HEYDAY', 'HERE', 'ARE', 'TRIPES', 'FIT', 'FOR', 'OUR', 'SPORT', 'AND', 'IN', 'EARNEST', 'EXCELLENT', 'GODEBILLIOS', 'OF', 'THE', 'DUN', 'OX', 'YOU', 'KNOW', 'WITH', 'THE', 'BLACK', 'STREAK'] +4198-12259-0027-1202: hyp=['HEY', 'THEY', 'HERE', 'ARE', 'TRIPES', 'FIT', 'FOR', 'OUR', 'SPORT', 'AND', 'IN', 'EARNEST', 'EXCELLENT', 'GOTA', 'BILLIOS', 'OF', 'THE', 'DUN', 'OX', 'YOU', 'KNOW', 'WITH', 'THE', 'BLACK', 'STREAK'] +4198-12259-0028-1203: ref=['O', 'FOR', "GOD'S", 'SAKE', 'LET', 'US', 'LASH', 'THEM', 'SOUNDLY', 'YET', 'THRIFTILY'] +4198-12259-0028-1203: hyp=['OH', 'FOR', "GOD'S", 'SAKE', 'LET', 'US', 'LAST', 'THEM', 'SOUNDLY', 'YET', 'THRIFTILY'] +4198-12259-0029-1204: ref=['SPARROWS', 'WILL', 'NOT', 'EAT', 'UNLESS', 'YOU', 'BOB', 'THEM', 'ON', 'THE', 'TAIL', 'NOR', 'CAN', 'I', 'DRINK', 'IF', 'I', 'BE', 'NOT', 'FAIRLY', 'SPOKE', 'TO'] +4198-12259-0029-1204: hyp=['SPARROWS', 'WILL', 'NOT', 'EAT', 'UNLESS', 'YOU', 'BOB', 'THEM', 'ON', 'THE', 'TAIL', 'NOR', 'CAN', 'I', 'DRINK', 'IF', 'I', 'BE', 'NOT', 'FAIRLY', 'SPOKE', 'TO'] +4198-12259-0030-1205: ref=['HO', 'THIS', 'WILL', 'BANG', 'IT', 'SOUNDLY'] +4198-12259-0030-1205: hyp=['OH', 'THIS', 'WAS', 'BANG', 'IT', 'SOUNDLY'] +4198-12259-0031-1206: ref=['BUT', 'THIS', 'SHALL', 'BANISH', 'IT', 'UTTERLY'] +4198-12259-0031-1206: hyp=['BUT', 'THEY', 'SHALL', 'BANISH', 'IT', 'UTTERLY'] +4198-12259-0032-1207: ref=['LET', 'US', 'WIND', 'OUR', 'HORNS', 'BY', 'THE', 'SOUND', 'OF', 'FLAGONS', 'AND', 'BOTTLES', 'AND', 'CRY', 'ALOUD', 'THAT', 'WHOEVER', 'HATH', 'LOST', 'HIS', 'THIRST', 'COME', 'NOT', 'HITHER', 'TO', 'SEEK', 'IT'] +4198-12259-0032-1207: hyp=['LET', 'US', 'WIND', 'OUR', 'HORNS', 'BY', 'THE', 'SOUND', 'OF', 'FLAGONS', 'AND', 'BOTTLES', 'AND', 'CRY', 'ALOUD', 'THAT', 'WHOEVER', 'HATH', 'LOST', 'HIS', 'THIRST', 'COME', 'NOT', 'HITHER', 'TO', 'SEEK', 'IT'] +4198-12259-0033-1208: ref=['THE', 'GREAT', 'GOD', 'MADE', 'THE', 'PLANETS', 'AND', 'WE', 'MAKE', 'THE', 'PLATTERS', 'NEAT'] +4198-12259-0033-1208: hyp=['THE', 'GREAT', 'GOD', 'MADE', 'THE', 'PLANETS', 'AND', 'WE', 'MAKE', 'THE', 'PLATTERS', 'NEAT'] +4198-12259-0034-1209: ref=['APPETITE', 'COMES', 'WITH', 'EATING', 'SAYS', 'ANGESTON', 'BUT', 'THE', 'THIRST', 'GOES', 'AWAY', 'WITH', 'DRINKING'] +4198-12259-0034-1209: hyp=['APPETITE', 'COMES', 'WITH', 'EATING', 'SAYS', 'ANGISTON', 'BUT', 'THE', 'THIRST', 'GOES', 'AWAY', 'WITH', 'DRINKING'] +4198-12259-0035-1210: ref=['I', 'HAVE', 'A', 'REMEDY', 'AGAINST', 'THIRST', 'QUITE', 'CONTRARY', 'TO', 'THAT', 'WHICH', 'IS', 'GOOD', 'AGAINST', 'THE', 'BITING', 'OF', 'A', 'MAD', 'DOG'] +4198-12259-0035-1210: hyp=['I', 'HAVE', 'A', 'REMEDY', 'AGAINST', 'THIRST', 'QUITE', 'CONTRARY', 'TO', 'THAT', 'WHICH', 'IS', 'GOOD', 'AGAINST', 'THE', 'BITING', 'OF', 'A', 'MAD', 'DOG'] +4198-12259-0036-1211: ref=['WHITE', 'WINE', 'HERE', 'WINE', 'BOYS'] +4198-12259-0036-1211: hyp=['WHITE', 'WINE', 'HERE', 'WINE', 'BOYS'] +4198-12259-0037-1212: ref=['O', 'LACHRYMA', 'CHRISTI', 'IT', 'IS', 'OF', 'THE', 'BEST', 'GRAPE'] +4198-12259-0037-1212: hyp=['O', 'LACHRYMAL', 'CHRISTI', 'IT', 'IS', 'OF', 'THE', 'BEST', 'GRAPE'] +4198-12259-0038-1213: ref=["I'FAITH", 'PURE', 'GREEK', 'GREEK', 'O', 'THE', 'FINE', 'WHITE', 'WINE'] +4198-12259-0038-1213: hyp=['I', 'FAITH', 'PURE', 'GREEK', 'GREEK', 'O', 'THE', 'FINE', 'WHITE', 'WINE'] +4198-12259-0039-1214: ref=['THERE', 'IS', 'NO', 'ENCHANTMENT', 'NOR', 'CHARM', 'THERE', 'EVERY', 'ONE', 'OF', 'YOU', 'HATH', 'SEEN', 'IT'] +4198-12259-0039-1214: hyp=['THERE', 'IS', 'NO', 'ENCHANTMENT', 'NOR', 'CHARM', 'THERE', 'EVERY', 'ONE', 'OF', 'YOU', 'HATH', 'SEEN', 'IT'] +4198-12259-0040-1215: ref=['MY', 'PRENTICESHIP', 'IS', 'OUT', 'I', 'AM', 'A', 'FREE', 'MAN', 'AT', 'THIS', 'TRADE'] +4198-12259-0040-1215: hyp=['MY', 'APPRENTICESHIP', 'IS', "I'M", 'A', 'FREE', 'MAN', 'AT', 'THIS', 'TRADE'] +4198-12259-0041-1216: ref=['I', 'SHOULD', 'SAY', 'MASTER', 'PAST'] +4198-12259-0041-1216: hyp=['I', 'SHOULD', 'SAY', 'MASTER', 'PASS'] +4198-12259-0042-1217: ref=['O', 'THE', 'DRINKERS', 'THOSE', 'THAT', 'ARE', 'A', 'DRY', 'O', 'POOR', 'THIRSTY', 'SOULS'] +4198-12259-0042-1217: hyp=['O', 'THE', 'DRINKERS', 'THOSE', 'THAT', 'ARE', 'ADRY', 'O', 'POOR', 'THIRSTY', 'SOULS'] +4198-12259-0043-1218: ref=['CLEAR', 'OFF', 'NEAT', 'SUPERNACULUM'] +4198-12259-0043-1218: hyp=['CLEAR', 'OFF', 'NEAT', 'SUPERNACULUM'] +4198-12281-0000-1219: ref=['ALTHOUGH', 'THE', 'PLAGUE', 'WAS', 'THERE', 'IN', 'THE', 'MOST', 'PART', 'OF', 'ALL', 'THE', 'HOUSES', 'THEY', 'NEVERTHELESS', 'ENTERED', 'EVERYWHERE', 'THEN', 'PLUNDERED', 'AND', 'CARRIED', 'AWAY', 'ALL', 'THAT', 'WAS', 'WITHIN', 'AND', 'YET', 'FOR', 'ALL', 'THIS', 'NOT', 'ONE', 'OF', 'THEM', 'TOOK', 'ANY', 'HURT', 'WHICH', 'IS', 'A', 'MOST', 'WONDERFUL', 'CASE'] +4198-12281-0000-1219: hyp=['ALTHOUGH', 'THE', 'PLAGUE', 'WAS', 'THERE', 'IN', 'THE', 'MOST', 'PART', 'OF', 'ALL', 'THE', 'HOUSES', 'THEY', 'NEVERTHELESS', 'ENTERED', 'EVERYWHERE', 'THEN', 'PLUNDERED', 'AND', 'CARRIED', 'AWAY', 'ALL', 'THAT', 'WAS', 'WITHIN', 'AND', 'YET', 'FOR', 'ALL', 'THIS', 'NOT', 'ONE', 'OF', 'THEM', 'TOOK', 'ANY', 'HURT', 'WHICH', 'IS', 'A', 'MOST', 'WONDERFUL', 'CASE'] +4198-12281-0001-1220: ref=['I', 'BESEECH', 'YOU', 'THINK', 'UPON', 'IT'] +4198-12281-0001-1220: hyp=['I', 'BESEECH', 'YOU', 'THINK', 'UPON', 'IT'] +4198-12281-0002-1221: ref=['NEVERTHELESS', 'AT', 'ALL', 'ADVENTURES', 'THEY', 'RANG', 'THE', 'BELLS', 'AD', 'CAPITULUM', 'CAPITULANTES'] +4198-12281-0002-1221: hyp=['NEVERTHELESS', 'AT', 'ALL', 'VENTURES', 'THEY', 'RANG', 'THE', 'BELLS', 'ED', 'CAPITULUM', 'CAPITULANT', 'DAYS'] +4198-12281-0003-1222: ref=['BY', 'THE', 'VIRTUE', 'OF', 'GOD', 'WHY', 'DO', 'NOT', 'YOU', 'SING', 'PANNIERS', 'FAREWELL', 'VINTAGE', 'IS', 'DONE'] +4198-12281-0003-1222: hyp=['BY', 'THE', 'VIRTUE', 'OF', 'GOD', 'WHY', 'DO', 'NOT', 'YOU', 'SING', 'PANNIERS', 'FAREWELL', 'VINTAGE', 'IS', 'DONE'] +4198-12281-0004-1223: ref=['BY', 'THE', 'BELLY', 'OF', 'SANCT', 'JAMES', 'WHAT', 'SHALL', 'WE', 'POOR', 'DEVILS', 'DRINK', 'THE', 'WHILE'] +4198-12281-0004-1223: hyp=['BY', 'THE', 'BELLY', 'OF', 'SAINT', 'JAMES', 'WHAT', 'SHALL', 'WE', 'POOR', 'DEVILS', 'DRINK', 'THE', 'WHILE'] +4198-12281-0005-1224: ref=['LORD', 'GOD', 'DA', 'MIHI', 'POTUM'] +4198-12281-0005-1224: hyp=['LORD', 'GOD', 'DOMIHIPPOTEM'] +4198-12281-0006-1225: ref=['LET', 'HIM', 'BE', 'CARRIED', 'TO', 'PRISON', 'FOR', 'TROUBLING', 'THE', 'DIVINE', 'SERVICE'] +4198-12281-0006-1225: hyp=['LET', 'HIM', 'BE', 'CARRIED', 'TO', 'PRISON', 'FOR', 'TROUBLING', 'THE', 'DIVINE', 'SERVICE'] +4198-12281-0007-1226: ref=['WHEREFORE', 'IS', 'IT', 'THAT', 'OUR', 'DEVOTIONS', 'WERE', 'INSTITUTED', 'TO', 'BE', 'SHORT', 'IN', 'THE', 'TIME', 'OF', 'HARVEST', 'AND', 'VINTAGE', 'AND', 'LONG', 'IN', 'THE', 'ADVENT', 'AND', 'ALL', 'THE', 'WINTER'] +4198-12281-0007-1226: hyp=['WHEREFORE', 'IS', 'IT', 'THAT', 'OUR', 'DEVOTIONS', 'WERE', 'INSTITUTED', 'TO', 'BE', 'SHORT', 'IN', 'THE', 'TIME', 'OF', 'HARVEST', 'AND', 'VINTAGE', 'AND', 'LONG', 'IN', 'THE', 'ADVENT', 'IN', 'ALL', 'THE', 'WINTER'] +4198-12281-0008-1227: ref=['HARK', 'YOU', 'MY', 'MASTERS', 'YOU', 'THAT', 'LOVE', 'THE', 'WINE', "COP'S", 'BODY', 'FOLLOW', 'ME', 'FOR', 'SANCT', 'ANTHONY', 'BURN', 'ME', 'AS', 'FREELY', 'AS', 'A', 'FAGGOT', 'IF', 'THEY', 'GET', 'LEAVE', 'TO', 'TASTE', 'ONE', 'DROP', 'OF', 'THE', 'LIQUOR', 'THAT', 'WILL', 'NOT', 'NOW', 'COME', 'AND', 'FIGHT', 'FOR', 'RELIEF', 'OF', 'THE', 'VINE'] +4198-12281-0008-1227: hyp=['HARK', 'YOU', 'MY', 'MASTERS', 'YOU', 'THAT', 'LOVE', 'THE', 'WINE', "COP'S", 'BODY', 'FOLLOW', 'ME', 'FOR', 'SAINT', 'ANTHONY', 'BURNED', 'ME', 'AS', 'FREELY', 'AS', 'A', 'FAGGOT', 'DID', 'THEY', 'GET', 'LEAVE', 'TO', 'TASTE', 'ONE', 'DROP', 'OF', 'THE', 'LIQUOR', 'THAT', 'WOULD', 'NOT', 'NOW', 'COME', 'AND', 'FIGHT', 'FOR', 'RELIEF', 'OF', 'THE', 'VINE'] +4198-12281-0009-1228: ref=['TO', 'OTHERS', 'AGAIN', 'HE', 'UNJOINTED', 'THE', 'SPONDYLES', 'OR', 'KNUCKLES', 'OF', 'THE', 'NECK', 'DISFIGURED', 'THEIR', 'CHAPS', 'GASHED', 'THEIR', 'FACES', 'MADE', 'THEIR', 'CHEEKS', 'HANG', 'FLAPPING', 'ON', 'THEIR', 'CHIN', 'AND', 'SO', 'SWINGED', 'AND', 'BALAMMED', 'THEM', 'THAT', 'THEY', 'FELL', 'DOWN', 'BEFORE', 'HIM', 'LIKE', 'HAY', 'BEFORE', 'A', 'MOWER'] +4198-12281-0009-1228: hyp=['TO', 'OTHERS', 'AGAIN', 'HE', 'UNJOINTED', 'THE', 'SPINE', 'NEULES', 'OR', 'KNUCKLES', 'OF', 'THE', 'NECK', 'DISFIGURED', 'THEIR', 'CHAPS', 'GASHED', 'THEIR', 'FACES', 'MADE', 'THEIR', 'CHEEKS', 'HANG', 'FLAPPING', 'ON', 'THEIR', 'CHIN', 'AND', 'SO', 'SWINGED', 'AND', 'BELAMMED', 'THEM', 'THAT', 'THEY', 'FELL', 'DOWN', 'BEFORE', 'HIM', 'LIKE', 'HAY', 'BEFORE', 'A', 'MOOR'] +4198-12281-0010-1229: ref=['TO', 'SOME', 'WITH', 'A', 'SMART', 'SOUSE', 'ON', 'THE', 'EPIGASTER', 'HE', 'WOULD', 'MAKE', 'THEIR', 'MIDRIFF', 'SWAG', 'THEN', 'REDOUBLING', 'THE', 'BLOW', 'GAVE', 'THEM', 'SUCH', 'A', 'HOMEPUSH', 'ON', 'THE', 'NAVEL', 'THAT', 'HE', 'MADE', 'THEIR', 'PUDDINGS', 'TO', 'GUSH', 'OUT'] +4198-12281-0010-1229: hyp=['TO', 'SOME', 'WITH', 'A', 'SMART', 'SOULS', 'ON', 'THEIR', 'EBERGASTER', 'HE', 'WOULD', 'MAKE', 'THEIR', 'MIDDRIFTS', 'WAAG', 'THEN', 'REDOUBLING', 'THE', 'BLOW', 'GAVE', 'THEM', 'SUCH', 'A', 'HOME', 'PUSH', 'ON', 'THE', 'NAVEL', 'THAT', 'HE', 'MADE', 'THEIR', 'PUDDINGS', 'TO', 'GUSH', 'OUT'] +4198-12281-0011-1230: ref=['BELIEVE', 'THAT', 'IT', 'WAS', 'THE', 'MOST', 'HORRIBLE', 'SPECTACLE', 'THAT', 'EVER', 'ONE', 'SAW'] +4198-12281-0011-1230: hyp=['BELIEVE', 'THAT', 'IT', 'WAS', 'THE', 'MOST', 'HORRIBLE', 'SPECTACLE', 'THAT', 'EVER', 'ONE', 'SAW'] +4198-12281-0012-1231: ref=['O', 'THE', 'HOLY', 'LADY', 'NYTOUCH', 'SAID', 'ONE', 'THE', 'GOOD', 'SANCTESS', 'O', 'OUR', 'LADY', 'OF', 'SUCCOURS', 'SAID', 'ANOTHER', 'HELP', 'HELP'] +4198-12281-0012-1231: hyp=['O', 'THE', 'HOLY', 'LADY', 'KNIGHTS', 'SAID', 'ONE', 'THE', 'GOOD', 'SANCTUS', 'O', 'OUR', 'LADY', 'OF', 'SUCCOURUS', 'SAID', 'ANOTHER', 'HELP', 'HELP'] +4198-12281-0013-1232: ref=['SOME', 'DIED', 'WITHOUT', 'SPEAKING', 'OTHERS', 'SPOKE', 'WITHOUT', 'DYING', 'SOME', 'DIED', 'IN', 'SPEAKING', 'OTHERS', 'SPOKE', 'IN', 'DYING'] +4198-12281-0013-1232: hyp=['SOME', 'DIED', 'WITHOUT', 'SPEAKING', 'OTHERS', 'SPOKE', 'WITHOUT', 'DYING', 'SOME', 'DIED', 'IN', 'SPEAKING', 'OTHERS', 'SPOKE', 'IN', 'DYING'] +4198-12281-0014-1233: ref=['CAN', 'YOU', 'TELL', 'WITH', 'WHAT', 'INSTRUMENTS', 'THEY', 'DID', 'IT'] +4198-12281-0014-1233: hyp=['CAN', 'YOU', 'TELL', 'WITH', 'WHAT', 'INSTRUMENTS', 'THEY', 'DID', 'IT'] +4198-12281-0015-1234: ref=['IN', 'THE', 'MEANTIME', 'FRIAR', 'JOHN', 'WITH', 'HIS', 'FORMIDABLE', 'BATON', 'OF', 'THE', 'CROSS', 'GOT', 'TO', 'THE', 'BREACH', 'WHICH', 'THE', 'ENEMIES', 'HAD', 'MADE', 'AND', 'THERE', 'STOOD', 'TO', 'SNATCH', 'UP', 'THOSE', 'THAT', 'ENDEAVOURED', 'TO', 'ESCAPE'] +4198-12281-0015-1234: hyp=['IN', 'THE', 'MEANTIME', 'FRIAR', 'JOHN', 'WITH', 'HIS', 'FORMIDABLE', 'BATON', 'OF', 'THE', 'CROSS', 'GOT', 'TO', 'THE', 'BREACH', 'WHICH', 'THE', 'ENEMIES', 'HAD', 'MADE', 'AND', 'THERE', 'STOOD', 'TO', 'SNATCH', 'UP', 'THOSE', 'THAT', 'ENDEAVORED', 'TO', 'ESCAPE'] +4198-61336-0000-1235: ref=['IT', 'IS', 'SIGNIFICANT', 'TO', 'NOTE', 'IN', 'THIS', 'CONNECTION', 'THAT', 'THE', 'NEW', 'KING', 'WAS', 'AN', 'UNSWERVING', 'ADHERENT', 'OF', 'THE', 'CULT', 'OF', 'ASHUR', 'BY', 'THE', 'ADHERENTS', 'OF', 'WHICH', 'HE', 'WAS', 'PROBABLY', 'STRONGLY', 'SUPPORTED'] +4198-61336-0000-1235: hyp=['IT', 'IS', 'SIGNIFICANT', 'TO', 'NOTE', 'IN', 'THIS', 'CONNECTION', 'THAT', 'THE', 'NEW', 'KING', 'WAS', 'AN', 'UNSWERVING', 'ADHERENT', 'OF', 'THE', 'CULT', 'OF', 'AESRE', 'BY', 'THE', 'ADHERENCE', 'OF', 'WHICH', 'HE', 'WAS', 'PROBABLY', 'STRONGLY', 'SUPPORTED'] +4198-61336-0001-1236: ref=['AT', 'THE', 'BEGINNING', 'OF', 'HIS', 'REIGN', 'THERE', 'WAS', 'MUCH', 'SOCIAL', 'DISCONTENT', 'AND', 'SUFFERING'] +4198-61336-0001-1236: hyp=['AT', 'THE', 'BEGINNING', 'OF', 'HIS', 'REIGN', 'THERE', 'WAS', 'MUCH', 'SOCIAL', 'DISCONTENT', 'AND', 'SUFFERING'] +4198-61336-0002-1237: ref=['WELL', 'MIGHT', 'SHARDURIS', 'EXCLAIM', 'IN', 'THE', 'WORDS', 'OF', 'THE', 'PROPHET', 'WHERE', 'IS', 'THE', 'KING', 'OF', 'ARPAD'] +4198-61336-0002-1237: hyp=['WELL', 'MIGHT', 'JODURIS', 'EXCLAIM', 'IN', 'THE', 'WORDS', 'OF', 'THE', 'PROPHET', 'WHERE', 'IS', 'THE', 'KING', 'OF', 'ARPAT'] +4198-61336-0003-1238: ref=['TIGLATH', 'PILESER', 'HOWEVER', 'CROSSED', 'THE', 'EUPHRATES', 'AND', 'MOVING', 'NORTHWARD', 'DELIVERED', 'AN', 'UNEXPECTED', 'ATTACK', 'ON', 'THE', 'URARTIAN', 'ARMY', 'IN', 'QUMMUKH'] +4198-61336-0003-1238: hyp=['TIGLATH', 'POLESU', 'HOWEVER', 'CROSSED', 'THE', 'EUPHRATES', 'AND', 'MOVING', 'NORTHWARD', 'DELIVERED', 'AN', 'UNEXPECTED', 'ATTACK', 'ON', 'THE', 'GORACIAN', 'ARMY', 'IN', 'CUMAC'] +4198-61336-0004-1239: ref=['A', 'FIERCE', 'BATTLE', 'ENSUED', 'AND', 'ONE', 'OF', 'ITS', 'DRAMATIC', 'INCIDENTS', 'WAS', 'A', 'SINGLE', 'COMBAT', 'BETWEEN', 'THE', 'RIVAL', 'KINGS'] +4198-61336-0004-1239: hyp=['A', 'FIERCE', 'BATTLE', 'ENSUED', 'AND', 'ONE', 'OF', 'ITS', 'DRAMATIC', 'INCIDENTS', 'WAS', 'A', 'SINGLE', 'COMBAT', 'BETWEEN', 'THE', 'RIVAL', 'KINGS'] +4198-61336-0005-1240: ref=['AN', 'ATTEMPT', 'WAS', 'MADE', 'TO', 'CAPTURE', 'KING', 'SHARDURIS', 'WHO', 'LEAPT', 'FROM', 'HIS', 'CHARIOT', 'AND', 'MADE', 'HASTY', 'ESCAPE', 'ON', 'HORSEBACK', 'HOTLY', 'PURSUED', 'IN', 'THE', 'GATHERING', 'DARKNESS', 'BY', 'AN', 'ASSYRIAN', 'CONTINGENT', 'OF', 'CAVALRY'] +4198-61336-0005-1240: hyp=['AN', 'ATTEMPT', 'WAS', 'MADE', 'TO', 'CAPTURE', 'KING', 'CHADORIS', 'WHO', 'LEAPED', 'FROM', 'HIS', 'CHARIOT', 'AND', 'MADE', 'HASTY', 'ESCAPE', 'ON', 'HORSEBACK', 'HOTLY', 'PURSUED', 'IN', 'THE', 'GATHERING', 'DARKNESS', 'BY', 'AN', 'ASSYRIAN', 'CONTINGENT', 'OF', 'CAVALRY'] +4198-61336-0006-1241: ref=['DESPITE', 'THE', 'BLOW', 'DEALT', 'AGAINST', 'URARTU', 'ASSYRIA', 'DID', 'NOT', 'IMMEDIATELY', 'REGAIN', 'POSSESSION', 'OF', 'NORTH', 'SYRIA'] +4198-61336-0006-1241: hyp=['DESPITE', 'THE', 'BLOW', 'DEALT', 'AGAINST', 'URTU', 'ASSYRIA', 'DID', 'NOT', 'IMMEDIATELY', 'REGAIN', 'POSSESSION', 'OF', 'NORTH', 'SYRIA'] +4198-61336-0007-1242: ref=['THE', 'SHIFTY', 'MATI', 'ILU', 'EITHER', 'CHERISHED', 'THE', 'HOPE', 'THAT', 'SHARDURIS', 'WOULD', 'RECOVER', 'STRENGTH', 'AND', 'AGAIN', 'INVADE', 'NORTH', 'SYRIA', 'OR', 'THAT', 'HE', 'MIGHT', 'HIMSELF', 'ESTABLISH', 'AN', 'EMPIRE', 'IN', 'THAT', 'REGION'] +4198-61336-0007-1242: hyp=['THE', 'SHIFTY', 'MATIILLIU', 'EITHER', 'CHERISHED', 'THE', 'HOPE', 'THAT', 'CHARDURIS', 'WOULD', 'RECOVER', 'STRENGTH', 'AND', 'AGAIN', 'INVADE', 'NORTH', 'ASSYRIA', 'OR', 'THAT', 'HE', 'MIGHT', 'HIMSELF', 'ESTABLISH', 'AN', 'EMPIRE', 'IN', 'THAT', 'REGION'] +4198-61336-0008-1243: ref=['TIGLATH', 'PILESER', 'HAD', 'THEREFORE', 'TO', 'MARCH', 'WESTWARD', 'AGAIN'] +4198-61336-0008-1243: hyp=['TIGLAS', 'BELEISURE', 'HAD', 'THEREFORE', 'TO', 'MARCH', 'WESTWARD', 'AGAIN'] +4198-61336-0009-1244: ref=['FOR', 'THREE', 'YEARS', 'HE', 'CONDUCTED', 'VIGOROUS', 'CAMPAIGNS', 'IN', 'THE', 'WESTERN', 'LAND', 'WHERE', 'HE', 'MET', 'WITH', 'VIGOROUS', 'RESISTANCE'] +4198-61336-0009-1244: hyp=['FOR', 'THREE', 'YEARS', 'HE', 'CONDUCTED', 'VIGOROUS', 'CAMPAIGNS', 'IN', 'THE', 'WESTERNLAND', 'WHERE', 'HE', 'MET', 'WITH', 'VIGOROUS', 'RESISTANCE'] +4198-61336-0010-1245: ref=['ARPAD', 'WAS', 'CAPTURED', 'AND', 'MATI', 'ILU', 'DEPOSED', 'AND', 'PROBABLY', 'PUT', 'TO', 'DEATH'] +4198-61336-0010-1245: hyp=['ARPAD', 'WAS', 'CAPTURED', 'AND', 'MEANT', 'TO', 'ILU', 'DEPOSED', 'AND', 'PROBABLY', 'PUT', 'TO', 'DEATH'] +4198-61336-0011-1246: ref=['ONCE', 'AGAIN', 'THE', 'HEBREWS', 'CAME', 'INTO', 'CONTACT', 'WITH', 'ASSYRIA'] +4198-61336-0011-1246: hyp=['ONCE', 'AGAIN', 'THE', 'HEBREWS', 'CAME', 'INTO', 'CONTACT', 'WITH', 'ASSYRIA'] +4198-61336-0012-1247: ref=['ITS', 'FALL', 'MAY', 'NOT', 'HAVE', 'BEEN', 'UNCONNECTED', 'WITH', 'THE', 'TREND', 'OF', 'EVENTS', 'IN', 'ASSYRIA', 'DURING', 'THE', 'CLOSING', 'YEARS', 'OF', 'THE', 'MIDDLE', 'EMPIRE'] +4198-61336-0012-1247: hyp=['ITS', 'FALL', 'MAY', 'NOT', 'HAVE', 'BEEN', 'UNCONNECTED', 'WITH', 'THE', 'TREND', 'OF', 'EVENTS', 'IN', 'ASSYRIA', 'DURING', 'THE', 'CLOSING', 'YEARS', 'OF', 'THE', 'MIDDLE', 'EMPIRE'] +4198-61336-0013-1248: ref=['JEHOASH', 'THE', 'GRANDSON', 'OF', 'JEHU', 'HAD', 'ACHIEVED', 'SUCCESSES', 'IN', 'CONFLICT', 'WITH', 'DAMASCUS'] +4198-61336-0013-1248: hyp=['JEHOASH', 'THE', 'GRANDSON', 'OF', 'JEHU', 'HAD', 'ACHIEVED', 'SUCCESSES', 'IN', 'CONFLICT', 'WITH', 'DAMASCUS'] +4198-61336-0014-1249: ref=['SIX', 'MONTHS', 'AFTERWARDS', 'HE', 'WAS', 'ASSASSINATED', 'BY', 'SHALLUM'] +4198-61336-0014-1249: hyp=['SIX', 'MONTHS', 'AFTERWARD', 'HE', 'WAS', 'ASSASSINATED', 'BY', 'SCHILUM'] +4198-61336-0015-1250: ref=['THIS', 'USURPER', 'HELD', 'SWAY', 'AT', 'SAMARIA', 'FOR', 'ONLY', 'A', 'MONTH'] +4198-61336-0015-1250: hyp=['THIS', 'USURPER', 'HELD', 'SWAY', 'AT', 'SUMERIA', 'FOR', 'ONLY', 'A', 'MONTH'] +4198-61336-0016-1251: ref=['NO', 'RESISTANCE', 'WAS', 'POSSIBLE', 'ON', 'THE', 'PART', 'OF', 'MENAHEM', 'THE', 'USURPER', 'WHO', 'WAS', 'PROBABLY', 'READY', 'TO', 'WELCOME', 'THE', 'ASSYRIAN', 'CONQUEROR', 'SO', 'THAT', 'BY', 'ARRANGING', 'AN', 'ALLIANCE', 'HE', 'MIGHT', 'SECURE', 'HIS', 'OWN', 'POSITION'] +4198-61336-0016-1251: hyp=['NO', 'RESISTANCE', 'WAS', 'POSSIBLE', 'ON', 'THE', 'PART', 'OF', 'MANAHEM', 'THE', 'USURPER', 'WHO', 'WAS', 'PROBABLY', 'READY', 'TO', 'WELCOME', 'THE', 'ASSYRIAN', 'CONQUEROR', 'SO', 'THAT', 'BY', 'ARRANGING', 'AN', 'ALLIANCE', 'HE', 'MIGHT', 'SECURE', 'HIS', 'OWN', 'POSITION'] +4198-61336-0017-1252: ref=['TIGLATH', 'PILESER', 'NEXT', 'OPERATED', 'AGAINST', 'THE', 'MEDIAN', 'AND', 'OTHER', 'HILL', 'TRIBES', 'IN', 'THE', 'NORTH', 'EAST'] +4198-61336-0017-1252: hyp=['TIGLAS', 'POLYSER', 'NEXT', 'OPERATED', 'AGAINST', 'THE', 'MEDEAN', 'AND', 'OTHER', 'HI', 'TRIBES', 'IN', 'THE', 'NORTHEAST'] +4198-61336-0018-1253: ref=['HE', 'OVERTHREW', 'BUILDINGS', 'DESTROYED', 'ORCHARDS', 'AND', 'TRANSPORTED', 'TO', 'NINEVEH', 'THOSE', 'OF', 'THE', 'INHABITANTS', 'HE', 'HAD', 'NOT', 'PUT', 'TO', 'THE', 'SWORD', 'WITH', 'ALL', 'THE', 'LIVE', 'STOCK', 'HE', 'COULD', 'LAY', 'HANDS', 'ON'] +4198-61336-0018-1253: hyp=['HE', 'OVERTHREW', 'BUILDINGS', 'DESTROYED', 'ORCHARDS', 'AND', 'TRANSPORTED', 'TO', 'NINEVEH', 'THOSE', 'OF', 'THE', 'INHABITANTS', 'HE', 'HAD', 'NOT', 'PUT', 'TO', 'THE', 'SWARD', 'WITH', 'ALL', 'THE', 'LIVE', 'STOCK', 'HE', 'COULD', 'LAY', 'HANDS', 'ON'] +4198-61336-0019-1254: ref=['THUS', 'WAS', 'URARTU', 'CRIPPLED', 'AND', 'HUMILIATED', 'IT', 'NEVER', 'REGAINED', 'ITS', 'FORMER', 'PRESTIGE', 'AMONG', 'THE', 'NORTHERN', 'STATES'] +4198-61336-0019-1254: hyp=['THUS', 'WAS', 'URYTU', 'CRIPPLED', 'AND', 'HUMILIATED', 'IT', 'NEVER', 'REGAINED', 'ITS', 'FORMER', 'PRESTIGE', 'AMONG', 'THE', 'NORTHERN', 'STATES'] +4198-61336-0020-1255: ref=['IN', 'THE', 'FOLLOWING', 'YEAR', 'TIGLATH', 'PILESER', 'RETURNED', 'TO', 'SYRIA'] +4198-61336-0020-1255: hyp=['IN', 'THE', 'FOLLOWING', 'YEAR', 'TIGLATH', 'BELEASER', 'RETURNED', 'TO', 'SYRIA'] +4198-61336-0021-1256: ref=['MENAHEM', 'KING', 'OF', 'ISRAEL', 'HAD', 'DIED', 'AND', 'WAS', 'SUCCEEDED', 'BY', 'HIS', 'SON', 'PEKAHIAH'] +4198-61336-0021-1256: hyp=['MANAHIM', 'KING', 'OF', 'ISRAEL', 'HAD', 'DIED', 'AND', 'WAS', 'SUCCEEDED', 'BY', 'HIS', 'SON', 'PEKAHIAH'] +4198-61336-0022-1257: ref=['JUDAH', 'HAD', 'TAKEN', 'ADVANTAGE', 'OF', 'THE', 'DISTURBED', 'CONDITIONS', 'IN', 'ISRAEL', 'TO', 'ASSERT', 'ITS', 'INDEPENDENCE'] +4198-61336-0022-1257: hyp=['JUDAH', 'HAD', 'TAKEN', 'ADVANTAGE', 'OF', 'THE', 'DISTURBED', 'CONDITIONS', 'IN', 'ISRAEL', 'TO', 'ASSERT', 'ITS', 'INDEPENDENCE'] +4198-61336-0023-1258: ref=['HE', 'CONDEMNED', 'ISRAEL', 'FOR', 'ITS', 'IDOLATRIES', 'AND', 'CRIED'] +4198-61336-0023-1258: hyp=['HE', 'CONDEMNED', 'ISRAEL', 'FOR', 'ITS', 'IDOLATRIES', 'AND', 'CRIED'] +4198-61336-0024-1259: ref=['FOR', 'THUS', 'SAITH', 'THE', 'LORD', 'UNTO', 'THE', 'HOUSE', 'OF', 'ISRAEL', 'SEEK', 'YE', 'ME', 'AND', 'YE', 'SHALL', 'LIVE', 'HAVE', 'YE', 'OFFERED', 'UNTO', 'ME', 'SACRIFICES', 'AND', 'OFFERINGS', 'IN', 'THE', 'WILDERNESS', 'FORTY', 'YEARS', 'O', 'HOUSE', 'OF', 'ISRAEL'] +4198-61336-0024-1259: hyp=['FOR', 'THUS', 'SAITH', 'THE', 'LORD', 'UNTO', 'THE', 'HOUSE', 'OF', 'ISRAEL', 'SEEK', 'YE', 'ME', 'AND', 'YE', 'SHALL', 'LIVE', 'HAVE', 'YE', 'OFFERED', 'UNTO', 'ME', 'SACRIFICES', 'AND', 'OFFERINGS', 'IN', 'THE', 'WILDERNESS', 'FORTY', 'YEARS', 'O', 'HOUSE', 'OF', 'ISRAEL'] +4198-61336-0025-1260: ref=['THE', 'REMNANT', 'OF', 'THE', 'PHILISTINES', 'SHALL', 'PERISH'] +4198-61336-0025-1260: hyp=['THE', 'REMNANT', 'OF', 'THE', 'PHILISTINE', 'SHALL', 'PERISH'] +4198-61336-0026-1261: ref=['ISRAEL', 'WAS', 'ALSO', 'DEALT', 'WITH'] +4198-61336-0026-1261: hyp=['ISRAEL', 'WAS', 'ALSO', 'DEALT', 'WITH'] +4198-61336-0027-1262: ref=['HE', 'SWEPT', 'THROUGH', 'ISRAEL', 'LIKE', 'A', 'HURRICANE'] +4198-61336-0027-1262: hyp=['HE', 'SWEPT', 'THROUGH', 'ISRAEL', 'LIKE', 'A', 'HURRICANE'] +4198-61336-0028-1263: ref=['THE', 'PHILISTINES', 'AND', 'THE', 'ARABIANS', 'OF', 'THE', 'DESERT', 'WERE', 'ALSO', 'SUBDUED'] +4198-61336-0028-1263: hyp=['THE', 'PHILISTINES', 'AND', 'THE', 'ARABIANS', 'OF', 'THE', 'DESERT', 'WERE', 'ALSO', 'SUBDUED'] +4198-61336-0029-1264: ref=['HE', 'INVADED', 'BABYLONIA'] +4198-61336-0029-1264: hyp=['HE', 'INVADED', 'BABYLONIA'] +4198-61336-0030-1265: ref=['UKINZER', 'TOOK', 'REFUGE', 'IN', 'HIS', 'CAPITAL', 'SHAPIA', 'WHICH', 'HELD', 'OUT', 'SUCCESSFULLY', 'ALTHOUGH', 'THE', 'SURROUNDING', 'COUNTRY', 'WAS', 'RAVAGED', 'AND', 'DESPOILED'] +4198-61336-0030-1265: hyp=['AKENJER', 'TOOK', 'REFUGE', 'IN', 'HIS', 'CAPITAL', 'SHAPIA', 'WHICH', 'HELD', 'OUT', 'SUCCESSFULLY', 'ALTHOUGH', 'THE', 'SURROUNDING', 'COUNTRY', 'WAS', 'RAVAGED', 'AND', 'DESPOILED'] +4294-14317-0000-1266: ref=['AS', 'I', 'THOUGHT', 'THAT', 'THIS', 'WAS', 'DUE', 'TO', 'SOME', 'FAULT', 'IN', 'THE', 'EARTH', 'I', 'WANTED', 'TO', 'MAKE', 'THESE', 'FIRST', 'EXPERIMENTS', 'BEFORE', 'I', 'UNDERTOOK', 'MY', 'PERSEUS'] +4294-14317-0000-1266: hyp=['AS', 'I', 'THOUGHT', 'THAT', 'THIS', 'WAS', 'DUE', 'TO', 'SOME', 'FAULT', 'IN', 'THE', 'EARTH', 'I', 'WANTED', 'TO', 'MAKE', 'THESE', 'FIRST', 'EXPERIMENTS', 'BEFORE', 'I', 'UNDERTOOK', 'MY', 'PERSEUS'] +4294-14317-0001-1267: ref=['WHEN', 'I', 'SAW', 'THAT', 'THIS', 'BUST', 'CAME', 'OUT', 'SHARP', 'AND', 'CLEAN', 'I', 'SET', 'AT', 'ONCE', 'TO', 'CONSTRUCT', 'A', 'LITTLE', 'FURNACE', 'IN', 'THE', 'WORKSHOP', 'ERECTED', 'FOR', 'ME', 'BY', 'THE', 'DUKE', 'AFTER', 'MY', 'OWN', 'PLANS', 'AND', 'DESIGN', 'IN', 'THE', 'HOUSE', 'WHICH', 'THE', 'DUKE', 'HAD', 'GIVEN', 'ME'] +4294-14317-0001-1267: hyp=['WHEN', 'I', 'SAW', 'THAT', 'THIS', 'BUST', 'CAME', 'OUT', 'SHARP', 'AND', 'CLEAN', 'I', 'SET', 'AT', 'ONCE', 'TO', 'CONSTRUCT', 'A', 'LITTLE', 'FURNACE', 'IN', 'THE', 'WORKSHOP', 'ERECTED', 'FOR', 'ME', 'BY', 'THE', 'DUKE', 'AFTER', 'MY', 'OWN', 'PLANS', 'AND', 'DESIGN', 'IN', 'THE', 'HOUSE', 'WHICH', 'THE', 'DUKE', 'HAD', 'GIVEN', 'ME'] +4294-14317-0002-1268: ref=['IT', 'WAS', 'AN', 'EXTREMELY', 'DIFFICULT', 'TASK', 'AND', 'I', 'WAS', 'ANXIOUS', 'TO', 'OBSERVE', 'ALL', 'THE', 'NICETIES', 'OF', 'ART', 'WHICH', 'I', 'HAD', 'LEARNED', 'SO', 'AS', 'NOT', 'TO', 'LAPSE', 'INTO', 'SOME', 'ERROR'] +4294-14317-0002-1268: hyp=['IT', 'WAS', 'AN', 'EXTREMELY', 'DIFFICULT', 'TASK', 'AND', 'I', 'WAS', 'ANXIOUS', 'TO', 'OBSERVE', 'ALL', 'THE', 'NICETIES', 'OF', 'ART', 'WHICH', 'I', 'HAD', 'LEARNED', 'SO', 'AS', 'NOT', 'TO', 'LAPSE', 'INTO', 'SOME', 'ERROR'] +4294-14317-0003-1269: ref=['I', 'IN', 'MY', 'TURN', 'FEEL', 'THE', 'SAME', 'DESIRE', 'AND', 'HOPE', 'TO', 'PLAY', 'MY', 'PART', 'LIKE', 'THEM', 'THEREFORE', 'MY', 'LORD', 'GIVE', 'ME', 'THE', 'LEAVE', 'TO', 'GO'] +4294-14317-0003-1269: hyp=['I', 'IN', 'MY', 'TURN', 'FEEL', 'THE', 'SAME', 'DESIRE', 'AND', 'HOPE', 'TO', 'PLAY', 'MY', 'PART', 'LIKE', 'THEM', 'THEREFORE', 'MY', 'LORD', 'GIVE', 'ME', 'THE', 'LEAVE', 'TO', 'GO'] +4294-14317-0004-1270: ref=['BUT', 'BEWARE', 'OF', 'LETTING', 'BANDINELLO', 'QUIT', 'YOU', 'RATHER', 'BESTOW', 'UPON', 'HIM', 'ALWAYS', 'MORE', 'THAN', 'HE', 'DEMANDS', 'FOR', 'IF', 'HE', 'GOES', 'INTO', 'FOREIGN', 'PARTS', 'HIS', 'IGNORANCE', 'IS', 'SO', 'PRESUMPTUOUS', 'THAT', 'HE', 'IS', 'JUST', 'THE', 'MAN', 'TO', 'DISGRACE', 'OUR', 'MOST', 'ILLUSTRIOUS', 'SCHOOL'] +4294-14317-0004-1270: hyp=['BUT', 'BEWARE', 'OF', 'LETTING', 'BEND', 'NELLO', 'QUIT', 'YOU', 'RATHER', 'BESTOW', 'UPON', 'HIM', 'ALWAYS', 'MORE', 'THAN', 'HE', 'DEMANDS', 'FOR', 'IF', 'HE', 'GOES', 'INTO', 'FOREIGN', 'PARTS', 'HIS', 'IGNORANCE', 'IS', 'SO', 'PRESUMPTUOUS', 'THAT', 'HE', 'IS', 'JUST', 'THE', 'MAN', 'TO', 'DISGRACE', 'OUR', 'MOST', 'ILLUSTRIOUS', 'SCHOOL'] +4294-14317-0005-1271: ref=['I', 'ASK', 'NO', 'FURTHER', 'REWARD', 'FOR', 'MY', 'LABOURS', 'UP', 'TO', 'THIS', 'TIME', 'THAN', 'THE', 'GRACIOUS', 'FAVOUR', 'OF', 'YOUR', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY'] +4294-14317-0005-1271: hyp=['I', 'ASK', 'NO', 'FURTHER', 'REWARD', 'FOR', 'MY', 'LABOURS', 'UP', 'TO', 'THIS', 'TIME', 'THAN', 'THE', 'GRACIOUS', 'FAVOUR', 'OF', 'YOUR', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY'] +4294-14317-0006-1272: ref=['THEN', 'I', 'THANKED', 'HIM', 'AND', 'SAID', 'I', 'HAD', 'NO', 'GREATER', 'DESIRE', 'THAN', 'TO', 'SHOW', 'THOSE', 'ENVIOUS', 'FOLK', 'THAT', 'I', 'HAD', 'IT', 'IN', 'ME', 'TO', 'EXECUTE', 'THE', 'PROMISED', 'WORK'] +4294-14317-0006-1272: hyp=['THEN', 'I', 'THANKED', 'HIM', 'AND', 'SAID', 'I', 'HAD', 'NO', 'GREATER', 'DESIRE', 'THAN', 'TO', 'SHOW', 'THOSE', 'ENVIOUS', 'FOLK', 'THAT', 'I', 'HAD', 'IT', 'IN', 'NEED', 'TO', 'EXECUTE', 'THE', 'PROMISED', 'WORK'] +4294-14317-0007-1273: ref=['I', 'HAD', 'BETTER', 'LOOK', 'TO', 'MY', 'CONDUCT', 'FOR', 'IT', 'HAD', 'COME', 'TO', 'HIS', 'EARS', 'THAT', 'I', 'RELIED', 'UPON', 'HIS', 'FAVOUR', 'TO', 'TAKE', 'IN', 'FIRST', 'ONE', 'MAN', 'AND', 'THEN', 'ANOTHER'] +4294-14317-0007-1273: hyp=['I', 'HAD', 'BETTER', 'LOOK', 'TO', 'MY', 'CONDUCT', 'FOR', 'IT', 'HAD', 'COME', 'TO', 'HIS', 'EARS', 'THAT', 'I', 'RELIED', 'UPON', 'HIS', 'FAVOUR', 'TO', 'TAKE', 'IN', 'FIRST', 'ONE', 'MAN', 'AND', 'THEN', 'ANOTHER'] +4294-14317-0008-1274: ref=['I', 'BEGGED', 'HIS', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY', 'TO', 'NAME', 'A', 'SINGLE', 'PERSON', 'WHOM', 'I', 'HAD', 'EVER', 'TAKEN', 'IN'] +4294-14317-0008-1274: hyp=['I', 'BEGGED', 'HIS', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY', 'TO', 'NAME', 'A', 'SINGLE', 'PERSON', 'WHOM', 'I', 'HAD', 'EVER', 'TAKEN', 'IN'] +4294-14317-0009-1275: ref=['I', 'SAID', 'MY', 'LORD', 'I', 'THANK', 'YOU', 'AND', 'BEG', 'YOU', 'TO', 'CONDESCEND', 'SO', 'FAR', 'AS', 'TO', 'LISTEN', 'TO', 'FOUR', 'WORDS', 'IT', 'IS', 'TRUE', 'THAT', 'HE', 'LENT', 'ME', 'A', 'PAIR', 'OF', 'OLD', 'SCALES', 'TWO', 'ANVILS', 'AND', 'THREE', 'LITTLE', 'HAMMERS', 'WHICH', 'ARTICLES', 'I', 'BEGGED', 'HIS', 'WORKMAN', 'GIORGIO', 'DA', 'CORTONA', 'FIFTEEN', 'DAYS', 'AGO', 'TO', 'FETCH', 'BACK'] +4294-14317-0009-1275: hyp=['I', 'SAID', 'MY', 'LORD', 'I', 'THANK', 'YOU', 'AND', 'BEG', 'YOU', 'TO', 'CONDESCEND', 'SO', 'FAR', 'AS', 'TO', 'LISTEN', 'TO', 'FOUR', 'WORDS', 'IT', 'IS', 'TRUE', 'THAT', 'HE', 'LENT', 'ME', 'A', 'PAIR', 'OF', 'OLD', 'SCALES', 'TWO', 'ANVILS', 'AND', 'THREE', 'LITTLE', 'HAMMERS', 'WHICH', 'ARTICLES', 'I', 'BEGGED', 'HIS', 'WORKMAN', 'GIORGIO', 'DA', 'CORTONA', 'FIFTEEN', 'DAYS', 'AGO', 'TO', 'FETCH', 'BACK'] +4294-14317-0010-1276: ref=['GIORGIO', 'CAME', 'FOR', 'THEM', 'HIMSELF'] +4294-14317-0010-1276: hyp=['GEORGE', 'O', 'CAME', 'FOR', 'THEM', 'HIMSELF'] +4294-14317-0011-1277: ref=['I', 'HOPE', 'TO', 'PROVE', 'ON', 'WHAT', 'ACCOUNT', 'THAT', 'SCOUNDREL', 'TRIES', 'TO', 'BRING', 'ME', 'INTO', 'DISGRACE'] +4294-14317-0011-1277: hyp=['I', 'HOPE', 'TO', 'PROVE', 'ON', 'WHAT', 'ACCOUNT', 'THAT', 'SCOUNDREL', 'TRIES', 'TO', 'BRING', 'ME', 'INTO', 'DISGRACE'] +4294-14317-0012-1278: ref=['WHEN', 'HE', 'HAD', 'HEARD', 'THIS', 'SPEECH', 'THE', 'DUKE', 'ROSE', 'UP', 'IN', 'ANGER', 'AND', 'SENT', 'FOR', 'BERNARDONE', 'WHO', 'WAS', 'FORCED', 'TO', 'TAKE', 'FLIGHT', 'AS', 'FAR', 'AS', 'VENICE', 'HE', 'AND', 'ANTONIO', 'LANDI', 'WITH', 'HIM'] +4294-14317-0012-1278: hyp=['WHEN', 'HE', 'HAD', 'HEARD', 'THIS', 'SPEECH', 'THE', 'DUKE', 'ROSE', 'UP', 'IN', 'ANGER', 'AND', 'SENT', 'FOR', 'BERNARDONE', 'WHO', 'WAS', 'FORCED', 'TO', 'TAKE', 'FLIGHT', 'AS', 'FAR', 'AS', 'VENICE', 'HE', 'AND', 'ANTONIO', 'LANDI', 'WITH', 'HIM'] +4294-14317-0013-1279: ref=['YOU', 'HAD', 'BETTER', 'PUT', 'THIS', 'TO', 'THE', 'PROOF', 'AND', 'I', 'WILL', 'GO', 'AT', 'ONCE', 'TO', 'THE', 'BARGELLO'] +4294-14317-0013-1279: hyp=['YOU', 'HAD', 'BETTER', 'PUT', 'THIS', 'TO', 'THE', 'PROOF', 'AND', 'I', 'WILL', 'GO', 'AT', 'ONCE', 'TO', 'THE', 'BARGELO'] +4294-14317-0014-1280: ref=['I', 'AM', 'WILLING', 'TO', 'ENTER', 'INTO', 'COMPETITION', 'WITH', 'THE', 'ANCIENTS', 'AND', 'FEEL', 'ABLE', 'TO', 'SURPASS', 'THEM', 'FOR', 'SINCE', 'THOSE', 'EARLY', 'DAYS', 'IN', 'WHICH', 'I', 'MADE', 'THE', 'MEDALS', 'OF', 'POPE', 'CLEMENT', 'I', 'HAVE', 'LEARNED', 'SO', 'MUCH', 'THAT', 'I', 'CAN', 'NOW', 'PRODUCE', 'FAR', 'BETTER', 'PIECES', 'OF', 'THE', 'KIND', 'I', 'THINK', 'I', 'CAN', 'ALSO', 'OUTDO', 'THE', 'COINS', 'I', 'STRUCK', 'FOR', 'DUKE', 'ALESSANDRO', 'WHICH', 'ARE', 'STILL', 'HELD', 'IN', 'HIGH', 'ESTEEM', 'IN', 'LIKE', 'MANNER', 'I', 'COULD', 'MAKE', 'FOR', 'YOU', 'LARGE', 'PIECES', 'OF', 'GOLD', 'AND', 'SILVER', 'PLATE', 'AS', 'I', 'DID', 'SO', 'OFTEN', 'FOR', 'THAT', 'NOBLE', 'MONARCH', 'KING', 'FRANCIS', 'OF', 'FRANCE', 'THANKS', 'TO', 'THE', 'GREAT', 'CONVENIENCES', 'HE', 'ALLOWED', 'ME', 'WITHOUT', 'EVER', 'LOSING', 'TIME', 'FOR', 'THE', 'EXECUTION', 'OF', 'COLOSSAL', 'STATUES', 'OR', 'OTHER', 'WORKS', 'OF', 'THE', 'SCULPTORS', 'CRAFT'] +4294-14317-0014-1280: hyp=['I', 'AM', 'WILLING', 'TO', 'ENTER', 'INTO', 'COMPETITION', 'WITH', 'THE', 'ANCIENTS', 'AND', 'FEEL', 'ABLE', 'TO', 'SURPASS', 'THEM', 'FOR', 'SINCE', 'THOSE', 'EARLY', 'DAYS', 'IN', 'WHICH', 'I', 'MADE', 'THE', 'MEDALS', 'OF', 'POPE', 'CLEMENT', 'I', 'HAVE', 'LEARNED', 'SO', 'MUCH', 'THAT', 'I', 'CAN', 'NOW', 'PRODUCE', 'FAR', 'BETTER', 'PIECES', 'OF', 'THE', 'KIND', 'I', 'THINK', 'I', 'CAN', 'ALSO', 'OUTDO', 'THE', 'COINS', 'I', 'STRUCK', 'FOR', 'DUKE', 'ALISANDRO', 'WHICH', 'ARE', 'STILL', 'HELD', 'IN', 'HIGH', 'ESTEEM', 'IN', 'LIKE', 'MANNER', 'I', 'COULD', 'MAKE', 'FOR', 'YOU', 'LARGE', 'PIECES', 'OF', 'GOLD', 'AND', 'SILVER', 'PLATE', 'AS', 'I', 'DID', 'SO', 'OFTEN', 'FOR', 'THAT', 'NOBLE', 'MONARCH', 'KING', 'FRANCIS', 'OF', 'FRANCE', 'THANKS', 'TO', 'THE', 'GREAT', 'CONVENIENCES', 'HE', 'ALLOWED', 'ME', 'WITHOUT', 'EVER', 'LOSING', 'TIME', 'FOR', 'THE', 'EXECUTION', 'OF', 'COLOSSAL', 'STATUES', 'OR', 'OTHER', 'WORKS', 'OF', 'THE', "SCULPTOR'S", 'CRAFT'] +4294-14317-0015-1281: ref=['AFTER', 'SEVERAL', 'MONTHS', 'WERE', 'WASTED', 'AND', 'PIERO', 'WOULD', 'NEITHER', 'WORK', 'NOR', 'PUT', 'MEN', 'TO', 'WORK', 'UPON', 'THE', 'PIECE', 'I', 'MADE', 'HIM', 'GIVE', 'IT', 'BACK'] +4294-14317-0015-1281: hyp=['AFTER', 'SEVERAL', 'MONTHS', 'WERE', 'WASTED', 'AND', 'PIERO', 'WOULD', 'NEITHER', 'WORK', 'NOR', 'PUT', 'MEN', 'TO', 'WORK', 'UPON', 'THE', 'PIECE', 'I', 'MADE', 'HIM', 'GIVE', 'IT', 'BACK'] +4294-14317-0016-1282: ref=['AMONG', 'ARTISTS', 'CERTAIN', 'ENRAGED', 'SCULPTORS', 'LAUGHED', 'AT', 'ME', 'AND', 'CALLED', 'ME', 'THE', 'NEW', 'SCULPTOR'] +4294-14317-0016-1282: hyp=['AMONG', 'ARTISTS', 'CERTAIN', 'ENRAGED', 'SCULPTORS', 'LAUGHED', 'AT', 'ME', 'AND', 'CALLED', 'ME', 'THE', 'NEW', 'SCULPTOR'] +4294-14317-0017-1283: ref=['NOW', 'I', 'HOPE', 'TO', 'SHOW', 'THEM', 'THAT', 'I', 'AM', 'AN', 'OLD', 'SCULPTOR', 'IF', 'GOD', 'SHALL', 'GRANT', 'ME', 'THE', 'BOON', 'OF', 'FINISHING', 'MY', 'PERSEUS', 'FOR', 'THAT', 'NOBLE', 'PIAZZA', 'OF', 'HIS', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY'] +4294-14317-0017-1283: hyp=['NOW', 'I', 'HOPE', 'TO', 'SHOW', 'THEM', 'THAT', 'I', 'AM', 'AN', 'OLD', 'SCULPTOR', 'IF', 'GOD', 'SHALL', 'GRANT', 'ME', 'THE', 'BOON', 'OF', 'FINISHING', 'MY', 'PERSEUS', 'FOR', 'THAT', 'NOBLE', 'PIAZZA', 'OF', 'HIS', 'MOST', 'ILLUSTRIOUS', 'EXCELLENCY'] +4294-14317-0018-1284: ref=['HAVING', 'THIS', 'EXCELLENT', 'RESOLVE', 'IN', 'HEART', 'I', 'REACHED', 'MY', 'HOME'] +4294-14317-0018-1284: hyp=['HAVING', 'THIS', 'EXCELLENT', 'RESOLVE', 'IN', 'HEART', 'I', 'REACHED', 'MY', 'HOME'] +4294-32859-0000-1285: ref=['WYLDER', 'WAS', 'RATHER', 'SURLY', 'AFTER', 'THE', 'LADIES', 'HAD', 'FLOATED', 'AWAY', 'FROM', 'THE', 'SCENE', 'AND', 'HE', 'DRANK', 'HIS', 'LIQUOR', 'DOGGEDLY'] +4294-32859-0000-1285: hyp=['WYLDER', 'WAS', 'RATHER', 'SURLY', 'AFTER', 'THE', 'LADIES', 'HAD', 'FLOATED', 'AWAY', 'FROM', 'THE', 'SCENE', 'AND', 'HE', 'DRANK', 'HIS', 'LIQUOR', 'DOGGEDLY'] +4294-32859-0001-1286: ref=['IT', 'WAS', 'HIS', 'FANCY', 'I', 'SUPPOSE', 'TO', 'REVIVE', 'CERTAIN', 'SENTIMENTAL', 'RELATIONS', 'WHICH', 'HAD', 'IT', 'MAY', 'BE', 'ONCE', 'EXISTED', 'BETWEEN', 'HIM', 'AND', 'MISS', 'LAKE', 'AND', 'HE', 'WAS', 'A', 'PERSON', 'OF', 'THAT', 'COMBATIVE', 'TEMPERAMENT', 'THAT', 'MAGNIFIES', 'AN', 'OBJECT', 'IN', 'PROPORTION', 'AS', 'ITS', 'PURSUIT', 'IS', 'THWARTED'] +4294-32859-0001-1286: hyp=['IT', 'WAS', 'HIS', 'FANCY', 'I', 'SUPPOSE', 'TO', 'REVIVE', 'CERTAIN', 'SENTIMENTAL', 'RELATIONS', 'WHICH', 'HAD', 'IT', 'MAY', 'BE', 'ONCE', 'EXISTED', 'BETWEEN', 'HIM', 'AND', 'MISS', 'LAKE', 'AND', 'HE', 'WAS', 'A', 'PERSON', 'OF', 'THAT', 'COMBATIVE', 'TEMPERAMENT', 'THAT', 'MAGNIFIES', 'AN', 'OBJECT', 'IN', 'PROPORTION', 'AS', 'ITS', 'PURSUIT', 'IS', 'THWARTED'] +4294-32859-0002-1287: ref=['THE', 'STORY', 'OF', 'FRIDOLIN', 'AND', "RETZCH'S", 'PRETTY', 'OUTLINES'] +4294-32859-0002-1287: hyp=['THE', 'STORY', 'OF', 'FRIEDOLIN', 'AND', "WRETCH'S", 'PRETTY', 'OUTLINES'] +4294-32859-0003-1288: ref=['SIT', 'DOWN', 'BESIDE', 'ME', 'AND', "I'LL", 'TELL', 'YOU', 'THE', 'STORY'] +4294-32859-0003-1288: hyp=['SIT', 'DOWN', 'BESIDE', 'ME', 'AND', "I'LL", 'TELL', 'YOU', 'THE', 'STORY'] +4294-32859-0004-1289: ref=['HE', 'ASSISTED', 'AT', 'IT', 'BUT', 'TOOK', 'NO', 'PART', 'AND', 'IN', 'FACT', 'WAS', 'LISTENING', 'TO', 'THAT', 'OTHER', 'CONVERSATION', 'WHICH', 'SOUNDED', 'WITH', 'ITS', 'PLEASANT', 'GABBLE', 'AND', 'LAUGHTER', 'LIKE', 'A', 'LITTLE', 'MUSICAL', 'TINKLE', 'OF', 'BELLS', 'IN', 'THE', 'DISTANCE'] +4294-32859-0004-1289: hyp=['HE', 'ASSISTED', 'AT', 'IT', 'BUT', 'TOOK', 'NO', 'PART', 'AND', 'IN', 'FACT', 'WAS', 'LISTENING', 'TO', 'THAT', 'OTHER', 'CONVERSATION', 'WHICH', 'SOUNDED', 'WITH', 'ITS', 'PLEASANT', 'GABBLE', 'AND', 'LAUGHTER', 'LIKE', 'A', 'LITTLE', 'MUSICAL', 'TINKLE', 'OF', 'BELLS', 'IN', 'THE', 'DISTANCE'] +4294-32859-0005-1290: ref=['BUT', 'HONEST', 'MARK', 'FORGOT', 'THAT', 'YOUNG', 'LADIES', 'DO', 'NOT', 'ALWAYS', 'COME', 'OUT', 'QUITE', 'ALONE', 'AND', 'JUMP', 'UNASSISTED', 'INTO', 'THEIR', 'VEHICLES'] +4294-32859-0005-1290: hyp=['BUT', 'HONEST', 'MARK', 'FORGOT', 'THAT', 'YOUNG', 'LADIES', 'DO', 'NOT', 'ALWAYS', 'COME', 'OUT', 'QUITE', 'ALONE', 'AND', 'JUMP', 'UNASSISTED', 'INTO', 'THEIR', 'VEHICLES'] +4294-35475-0000-1291: ref=['BUT', 'THE', 'MIDDLE', 'SON', 'WAS', 'LITTLE', 'AND', 'LORN', 'HE', 'WAS', 'NEITHER', 'DARK', 'NOR', 'FAIR', 'HE', 'WAS', 'NEITHER', 'HANDSOME', 'NOR', 'STRONG'] +4294-35475-0000-1291: hyp=['BUT', 'THE', 'MIDDLE', 'SON', 'WAS', 'LITTLE', 'AND', 'LORN', 'HE', 'WAS', 'NEITHER', 'DARK', 'NOR', 'FAIR', 'HE', 'WAS', 'NEITHER', 'HANDSOME', 'NOR', 'STRONG'] +4294-35475-0001-1292: ref=['THROWING', 'HIMSELF', 'ON', 'HIS', 'KNEES', 'BEFORE', 'THE', 'KING', 'HE', 'CRIED', 'OH', 'ROYAL', 'SIRE', 'BESTOW', 'UPON', 'ME', 'ALSO', 'A', 'SWORD', 'AND', 'A', 'STEED', 'THAT', 'I', 'MAY', 'UP', 'AND', 'AWAY', 'TO', 'FOLLOW', 'MY', 'BRETHREN'] +4294-35475-0001-1292: hyp=['THROWING', 'HIMSELF', 'ON', 'HIS', 'KNEES', 'BEFORE', 'THE', 'KING', 'HE', 'CRIED', 'O', 'ROYAL', 'SIRE', 'BESTOW', 'UPON', 'ME', 'ALSO', 'A', 'SWORD', 'AND', 'A', 'STEED', 'THAT', 'I', 'MAY', 'UP', 'AND', 'AWAY', 'TO', 'FOLLOW', 'MY', 'BRETHREN'] +4294-35475-0002-1293: ref=['BUT', 'THE', 'KING', 'LAUGHED', 'HIM', 'TO', 'SCORN', 'THOU', 'A', 'SWORD', 'HE', 'QUOTH'] +4294-35475-0002-1293: hyp=['BUT', 'THE', 'KING', "LAUGH'D", 'HIM', 'TO', 'SCORN', 'THOU', 'A', 'SWORD', 'HE', 'QUOTH'] +4294-35475-0003-1294: ref=['IN', 'SOOTH', 'THOU', 'SHALT', 'HAVE', 'ONE', 'BUT', 'IT', 'SHALL', 'BE', 'ONE', 'BEFITTING', 'THY', 'MAIDEN', 'SIZE', 'AND', 'COURAGE', 'IF', 'SO', 'SMALL', 'A', 'WEAPON', 'CAN', 'BE', 'FOUND', 'IN', 'ALL', 'MY', 'KINGDOM'] +4294-35475-0003-1294: hyp=['IN', 'SOOTH', 'THOU', 'SHALT', 'HAVE', 'ONE', 'BUT', 'IT', 'SHALL', 'BE', 'ONE', 'BEFITTING', 'THY', 'MAIDEN', 'SIZE', 'AND', 'COURAGE', 'IF', 'SO', 'SMALL', 'A', 'WEAPON', 'CAN', 'BE', 'FOUND', 'IN', 'ALL', 'MY', 'KINGDOM'] +4294-35475-0004-1295: ref=['FORTHWITH', 'THE', 'GRINNING', 'JESTER', 'BEGAN', 'SHRIEKING', 'WITH', 'LAUGHTER', 'SO', 'THAT', 'THE', 'BELLS', 'UPON', 'HIS', 'MOTLEY', 'CAP', 'WERE', 'ALL', 'SET', 'A', 'JANGLING'] +4294-35475-0004-1295: hyp=['FORTHWITH', 'THE', 'GRINNING', 'JESTER', 'BEGAN', 'SHRIEKING', 'WITH', 'LAUGHTER', 'SO', 'THAT', 'THE', 'BELLS', 'UPON', 'HIS', 'MOTLEY', 'CAP', 'WERE', 'ALL', 'SET', 'A', 'JANGLING'] +4294-35475-0005-1296: ref=['I', 'DID', 'BUT', 'LAUGH', 'TO', 'THINK', 'THE', 'SWORD', 'OF', 'ETHELRIED', 'HAD', 'BEEN', 'SO', 'QUICKLY', 'FOUND', 'RESPONDED', 'THE', 'JESTER', 'AND', 'HE', 'POINTED', 'TO', 'THE', 'SCISSORS', 'HANGING', 'FROM', 'THE', "TAILOR'S", 'GIRDLE'] +4294-35475-0005-1296: hyp=['I', 'DID', 'BUT', 'LAUGH', 'TO', 'THINK', 'THE', 'SWORD', 'OF', 'ETHELRIED', 'HAD', 'BEEN', 'SO', 'QUICKLY', 'FOUND', 'RESPONDED', 'THE', 'JESTER', 'AND', 'HE', 'POINTED', 'TO', 'THE', 'SCISSORS', 'HANGING', 'FROM', 'THE', "TAILOR'S", 'GIRDLE'] +4294-35475-0006-1297: ref=['ONE', 'NIGHT', 'AS', 'HE', 'LAY', 'IN', 'A', 'DEEP', 'FOREST', 'TOO', 'UNHAPPY', 'TO', 'SLEEP', 'HE', 'HEARD', 'A', 'NOISE', 'NEAR', 'AT', 'HAND', 'IN', 'THE', 'BUSHES'] +4294-35475-0006-1297: hyp=['ONE', 'NIGHT', 'AS', 'HE', 'LAY', 'IN', 'A', 'DEEP', 'FOREST', 'TOO', 'UNHAPPY', 'TO', 'SLEEP', 'HE', 'HEARD', 'A', 'NOISE', 'NEAR', 'AT', 'HAND', 'IN', 'THE', 'BUSHES'] +4294-35475-0007-1298: ref=['THOU', 'SHALT', 'HAVE', 'THY', 'LIBERTY', 'HE', 'CRIED', 'EVEN', 'THOUGH', 'THOU', 'SHOULDST', 'REND', 'ME', 'IN', 'PIECES', 'THE', 'MOMENT', 'THOU', 'ART', 'FREE'] +4294-35475-0007-1298: hyp=['THOU', 'SHALT', 'HAVE', 'THY', 'LIBERTY', 'HE', 'CRIED', 'EVEN', 'THOUGH', 'THOU', 'SHOULDST', 'RUN', 'ME', 'IN', 'PIECES', 'THE', 'MOMENT', 'THOU', 'ART', 'FREE'] +4294-35475-0008-1299: ref=['IT', 'HAD', 'SUDDENLY', 'DISAPPEARED', 'AND', 'IN', 'ITS', 'PLACE', 'STOOD', 'A', 'BEAUTIFUL', 'FAIRY', 'WITH', 'FILMY', 'WINGS', 'WHICH', 'SHONE', 'LIKE', 'RAINBOWS', 'IN', 'THE', 'MOONLIGHT'] +4294-35475-0008-1299: hyp=['IT', 'HAD', 'SUDDENLY', 'DISAPPEARED', 'AND', 'IN', 'ITS', 'PLACE', 'STOOD', 'A', 'BEAUTIFUL', 'FAIRY', 'WITH', 'FILMY', 'WINGS', 'WHICH', 'SHONE', 'LIKE', 'RAINBOWS', 'IN', 'THE', 'MOONLIGHT'] +4294-35475-0009-1300: ref=['AT', 'THIS', 'MOMENT', 'THERE', 'WAS', 'A', 'DISTANT', 'RUMBLING', 'AS', 'OF', 'THUNDER', 'TIS', 'THE', 'OGRE', 'CRIED', 'THE', 'FAIRY', 'WE', 'MUST', 'HASTEN'] +4294-35475-0009-1300: hyp=['AT', 'THIS', 'MOMENT', 'THERE', 'WAS', 'A', 'DISTANT', 'RUMBLING', 'AS', 'OF', 'THUNDER', 'TIS', 'THE', 'OGRE', 'CRIED', 'THE', 'FAIRY', 'WE', 'MUST', 'HASTEN'] +4294-35475-0010-1301: ref=['SCISSORS', 'GROW', 'A', "GIANT'S", 'HEIGHT', 'AND', 'SAVE', 'US', 'FROM', 'THE', "OGRE'S", 'MIGHT'] +4294-35475-0010-1301: hyp=['SCISSORS', 'GROW', 'A', "GIANT'S", 'HEIGHT', 'AND', 'SAVE', 'US', 'FROM', 'THE', "OGRE'S", 'MIGHT'] +4294-35475-0011-1302: ref=['HE', 'COULD', 'SEE', 'THE', 'OGRE', 'STANDING', 'POWERLESS', 'TO', 'HURT', 'HIM', 'ON', 'THE', 'OTHER', 'SIDE', 'OF', 'THE', 'CHASM', 'AND', 'GNASHING', 'HIS', 'TEETH', 'EACH', 'ONE', 'OF', 'WHICH', 'WAS', 'AS', 'BIG', 'AS', 'A', 'MILLSTON'] +4294-35475-0011-1302: hyp=['HE', 'COULD', 'SEE', 'THE', 'OGRE', 'STANDING', 'POWERLESS', 'TO', 'HURT', 'HIM', 'ON', 'THE', 'OTHER', 'SIDE', 'OF', 'THE', 'CHASM', 'AND', 'GNASHING', 'HIS', 'TEETH', 'EACH', 'ONE', 'OF', 'WHICH', 'WAS', 'AS', 'BIG', 'AS', 'A', 'MILLSTONE'] +4294-35475-0012-1303: ref=['THE', 'SIGHT', 'WAS', 'SO', 'TERRIBLE', 'THAT', 'HE', 'TURNED', 'ON', 'HIS', 'HEEL', 'AND', 'FLED', 'AWAY', 'AS', 'FAST', 'AS', 'HIS', 'FEET', 'COULD', 'CARRY', 'HIM'] +4294-35475-0012-1303: hyp=['THE', 'SIGHT', 'WAS', 'SO', 'TERRIBLE', 'THAT', 'HE', 'TURNED', 'ON', 'HIS', 'HEEL', 'AND', 'FLED', 'AWAY', 'AS', 'FAST', 'AS', 'HIS', 'FEET', 'COULD', 'CARRY', 'HIM'] +4294-35475-0013-1304: ref=['THOU', 'SHALT', 'NOT', 'BE', 'LEFT', 'A', 'PRISONER', 'IN', 'THIS', 'DISMAL', 'SPOT', 'WHILE', 'I', 'HAVE', 'THE', 'POWER', 'TO', 'HELP', 'THEE'] +4294-35475-0013-1304: hyp=['THOU', 'SHALT', 'NOT', 'BE', 'LEFT', 'A', 'PRISONER', 'IN', 'THIS', 'DISMAL', 'SPOT', 'WHILE', 'I', 'HAVE', 'THE', 'POWER', 'TO', 'HELP', 'THEE'] +4294-35475-0014-1305: ref=['HE', 'LIFTED', 'THE', 'SCISSORS', 'AND', 'WITH', 'ONE', 'STROKE', 'DESTROYED', 'THE', 'WEB', 'AND', 'GAVE', 'THE', 'FLY', 'ITS', 'FREEDOM'] +4294-35475-0014-1305: hyp=['HE', 'LIFTED', 'THE', 'SCISSORS', 'AND', 'WITH', 'ONE', 'STROKE', 'DESTROYED', 'THE', 'WEB', 'AND', 'GAVE', 'THE', 'FLY', 'ITS', 'FREEDOM'] +4294-35475-0015-1306: ref=['A', 'FAINT', 'GLIMMER', 'OF', 'LIGHT', 'ON', 'THE', 'OPPOSITE', 'WALL', 'SHOWS', 'ME', 'THE', 'KEYHOLE'] +4294-35475-0015-1306: hyp=['A', 'FAINT', 'GLIMMER', 'OF', 'LIGHT', 'ON', 'THE', 'OPPOSITE', 'WALL', 'SHOWS', 'ME', 'THE', 'KEYHOLE'] +4294-35475-0016-1307: ref=['THE', 'PRINCE', 'SPENT', 'ALL', 'THE', 'FOLLOWING', 'TIME', 'UNTIL', 'MIDNIGHT', 'TRYING', 'TO', 'THINK', 'OF', 'A', 'SUITABLE', 'VERSE', 'TO', 'SAY', 'TO', 'THE', 'SCISSORS'] +4294-35475-0016-1307: hyp=['THE', 'PRINCE', 'SPENT', 'ALL', 'THE', 'FOLLOWING', 'TIME', 'UNTIL', 'MIDNIGHT', 'TRYING', 'TO', 'THINK', 'OF', 'A', 'SUITABLE', 'VERSE', 'TO', 'SAY', 'TO', 'THE', 'SCISSORS'] +4294-35475-0017-1308: ref=['AS', 'HE', 'UTTERED', 'THE', 'WORDS', 'THE', 'SCISSORS', 'LEAPED', 'OUT', 'OF', 'HIS', 'HAND', 'AND', 'BEGAN', 'TO', 'CUT', 'THROUGH', 'THE', 'WOODEN', 'SHUTTERS', 'AS', 'EASILY', 'AS', 'THROUGH', 'A', 'CHEESE'] +4294-35475-0017-1308: hyp=['AS', 'HE', 'UTTERED', 'THE', 'WORDS', 'THE', 'SCISSORS', 'LEAPED', 'OUT', 'OF', 'HIS', 'HAND', 'AND', 'BEGAN', 'TO', 'CUT', 'THROUGH', 'THE', 'WOODEN', 'SHUTTERS', 'AS', 'EASILY', 'AS', 'THROUGH', 'A', 'CHEESE'] +4294-35475-0018-1309: ref=['IN', 'A', 'VERY', 'SHORT', 'TIME', 'THE', 'PRINCE', 'HAD', 'CRAWLED', 'THROUGH', 'THE', 'OPENING'] +4294-35475-0018-1309: hyp=['IN', 'A', 'VERY', 'SHORT', 'TIME', 'THE', 'PRINCE', 'HAD', 'CRAWLED', 'THROUGH', 'THE', 'OPENING'] +4294-35475-0019-1310: ref=['WHILE', 'HE', 'STOOD', 'LOOKING', 'AROUND', 'HIM', 'IN', 'BEWILDERMENT', 'A', 'FIREFLY', 'ALIGHTED', 'ON', 'HIS', 'ARM', 'FLASHING', 'ITS', 'LITTLE', 'LANTERN', 'IN', 'THE', "PRINCE'S", 'FACE', 'IT', 'CRIED', 'THIS', 'WAY', 'MY', 'FRIEND', 'THE', 'FLY', 'SENT', 'ME', 'TO', 'GUIDE', 'YOU', 'TO', 'A', 'PLACE', 'OF', 'SAFETY'] +4294-35475-0019-1310: hyp=['WHILE', 'HE', 'STOOD', 'LOOKING', 'AROUND', 'HIM', 'IN', 'BEWILDERMENT', 'A', 'FIREFLY', 'ALIGHTED', 'ON', 'HIS', 'ARM', 'FLASHING', 'ITS', 'LITTLE', 'LANTERN', 'IN', 'THE', "PRINCE'S", 'FACE', 'IT', 'CRIED', 'THIS', 'WAY', 'MY', 'FRIEND', 'THE', 'FLY', 'SENT', 'ME', 'TO', 'GUIDE', 'YOU', 'TO', 'A', 'PLACE', 'OF', 'SAFETY'] +4294-35475-0020-1311: ref=['WHAT', 'IS', 'TO', 'BECOME', 'OF', 'ME', 'CRIED', 'THE', 'POOR', 'PEASANT'] +4294-35475-0020-1311: hyp=['WHAT', 'IS', 'TO', 'BECOME', 'OF', 'ME', 'CRIED', 'THE', 'POOR', 'PEASANT'] +4294-35475-0021-1312: ref=['MY', 'GRAIN', 'MUST', 'FALL', 'AND', 'ROT', 'IN', 'THE', 'FIELD', 'FROM', 'OVERRIPENESS', 'BECAUSE', 'I', 'HAVE', 'NOT', 'THE', 'STRENGTH', 'TO', 'RISE', 'AND', 'HARVEST', 'IT', 'THEN', 'INDEED', 'MUST', 'WE', 'ALL', 'STARVE'] +4294-35475-0021-1312: hyp=['MY', 'GRAIN', 'MUST', 'FALL', 'AND', 'ROT', 'IN', 'THE', 'FIELD', 'FROM', 'OVER', 'RIPENESS', 'BECAUSE', 'I', 'HAVE', 'NOT', 'THE', 'STRENGTH', 'TO', 'RISE', 'AND', 'HARVEST', 'IT', 'THEN', 'INDEED', 'MUST', 'WE', 'ALL', 'STARVE'] +4294-35475-0022-1313: ref=['THE', 'GRANDAME', 'WHOM', 'HE', 'SUPPLIED', 'WITH', 'FAGOTS', 'THE', 'MERCHANT', 'WHOM', 'HE', 'RESCUED', 'FROM', 'ROBBERS', 'THE', "KING'S", 'COUNCILLOR', 'TO', 'WHOM', 'HE', 'GAVE', 'AID', 'ALL', 'BECAME', 'HIS', 'FRIENDS', 'UP', 'AND', 'DOWN', 'THE', 'LAND', 'TO', 'BEGGAR', 'OR', 'LORD', 'HOMELESS', 'WANDERER', 'OR', 'HIGH', 'BORN', 'DAME', 'HE', 'GLADLY', 'GAVE', 'UNSELFISH', 'SERVICE', 'ALL', 'UNSOUGHT', 'AND', 'SUCH', 'AS', 'HE', 'HELPED', 'STRAIGHTWAY', 'BECAME', 'HIS', 'FRIENDS'] +4294-35475-0022-1313: hyp=['THE', 'GRAND', 'DAME', 'WHOM', 'HE', 'SUPPLIED', 'WITH', 'FAGGOTS', 'THE', 'MERCHANT', 'WHOM', 'HE', 'RESCUED', 'FROM', 'ROBBERS', 'THE', "KING'S", 'COUNSELLOR', 'TO', 'WHOM', 'HE', 'GAVE', 'AID', 'ALL', 'BECAME', 'HIS', 'FRIENDS', 'UP', 'AND', 'DOWN', 'THE', 'LAND', 'BEGGAR', 'OR', 'LORD', 'HOMELESS', 'WANDERER', 'OR', 'HIGH', 'BORN', 'DAME', 'HE', 'GLADLY', 'GAVE', 'UNSELFISH', 'SERVICE', 'ALL', 'UNSOUGHT', 'AND', 'SUCH', 'AS', 'HE', 'HELPED', 'STRAIGHTWAY', 'BECAME', 'HIS', 'FRIENDS'] +4294-35475-0023-1314: ref=['TO', 'HIM', 'WHO', 'COULD', 'BRING', 'HER', 'BACK', 'TO', 'HER', "FATHER'S", 'CASTLE', 'SHOULD', 'BE', 'GIVEN', 'THE', 'THRONE', 'AND', 'KINGDOM', 'AS', 'WELL', 'AS', 'THE', 'PRINCESS', 'HERSELF', 'SO', 'FROM', 'FAR', 'AND', 'NEAR', 'INDEED', 'FROM', 'ALMOST', 'EVERY', 'COUNTRY', 'UNDER', 'THE', 'SUN', 'CAME', 'KNIGHTS', 'AND', 'PRINCES', 'TO', 'FIGHT', 'THE', 'OGRE'] +4294-35475-0023-1314: hyp=['TO', 'HIM', 'WHO', 'COULD', 'BRING', 'HER', 'BACK', 'TO', 'HER', "FATHER'S", 'CASTLE', 'SHOULD', 'BE', 'GIVEN', 'THE', 'THRONE', 'AND', 'KINGDOM', 'AS', 'WELL', 'AS', 'THE', 'PRINCESS', 'HERSELF', 'SO', 'FROM', 'FAR', 'AND', 'NEAR', 'INDEED', 'FROM', 'ALMOST', 'EVERY', 'COUNTRY', 'UNDER', 'THE', 'SUN', 'CAME', 'KNIGHTS', 'AND', 'PRINCES', 'TO', 'FIGHT', 'THE', 'OGRE'] +4294-35475-0024-1315: ref=['AMONG', 'THOSE', 'WHO', 'DREW', 'BACK', 'WERE', "ETHELRIED'S", 'BROTHERS', 'THE', 'THREE', 'THAT', 'WERE', 'DARK', 'AND', 'THE', 'THREE', 'THAT', 'WERE', 'FAIR'] +4294-35475-0024-1315: hyp=['AMONG', 'THOSE', 'WHO', 'DREW', 'BACK', 'WERE', "ETHELRIED'S", 'BROTHERS', 'THE', 'THREE', 'THAT', 'WERE', 'DARK', 'AND', 'THE', 'THREE', 'THAT', 'WERE', 'FAIR'] +4294-35475-0025-1316: ref=['BUT', 'ETHELRIED', 'HEEDED', 'NOT', 'THEIR', 'TAUNTS'] +4294-35475-0025-1316: hyp=['BUT', 'ETHEL', 'REED', 'HEEDED', 'NOT', 'THEIR', 'TAUNTS'] +4294-35475-0026-1317: ref=['SO', 'THEY', 'ALL', 'CRIED', 'OUT', 'LONG', 'AND', 'LOUD', 'LONG', 'LIVE', 'THE', 'PRINCE', 'PRINCE', 'CISEAUX'] +4294-35475-0026-1317: hyp=['SO', 'THEY', 'ALL', 'CRIED', 'OUT', 'LONG', 'AND', 'LOUD', 'LONG', 'LOVE', 'THE', 'PRINCE', 'PRINCE', 'IS', 'ALL'] +4294-9934-0000-1318: ref=['HE', 'FELT', 'WHAT', 'THE', 'EARTH', 'MAY', 'POSSIBLY', 'FEEL', 'AT', 'THE', 'MOMENT', 'WHEN', 'IT', 'IS', 'TORN', 'OPEN', 'WITH', 'THE', 'IRON', 'IN', 'ORDER', 'THAT', 'GRAIN', 'MAY', 'BE', 'DEPOSITED', 'WITHIN', 'IT', 'IT', 'FEELS', 'ONLY', 'THE', 'WOUND', 'THE', 'QUIVER', 'OF', 'THE', 'GERM', 'AND', 'THE', 'JOY', 'OF', 'THE', 'FRUIT', 'ONLY', 'ARRIVE', 'LATER'] +4294-9934-0000-1318: hyp=['HE', 'FELT', 'WHAT', 'THE', 'EARTH', 'MAY', 'POSSIBLY', 'FEEL', 'AT', 'THE', 'MOMENT', 'WHEN', 'IT', 'IS', 'TORN', 'OPEN', 'WITH', 'THE', 'IRON', 'IN', 'ORDER', 'THAT', 'GRAIN', 'MAY', 'BE', 'DEPOSITED', 'WITHIN', 'IT', 'IT', 'FEELS', 'ONLY', 'THE', 'WOUND', 'THE', 'QUIVER', 'OF', 'THE', 'GERM', 'THE', 'JOY', 'OF', 'THE', 'FRUIT', 'ONLY', 'ARRIVE', 'LATER'] +4294-9934-0001-1319: ref=['HE', 'HAD', 'BUT', 'JUST', 'ACQUIRED', 'A', 'FAITH', 'MUST', 'HE', 'THEN', 'REJECT', 'IT', 'ALREADY'] +4294-9934-0001-1319: hyp=['HE', 'HAD', 'BUT', 'JUST', 'ACQUIRED', 'A', 'FAITH', 'MUST', 'HE', 'THEN', 'REJECT', 'IT', 'ALREADY'] +4294-9934-0002-1320: ref=['HE', 'AFFIRMED', 'TO', 'HIMSELF', 'THAT', 'HE', 'WOULD', 'NOT', 'HE', 'DECLARED', 'TO', 'HIMSELF', 'THAT', 'HE', 'WOULD', 'NOT', 'DOUBT', 'AND', 'HE', 'BEGAN', 'TO', 'DOUBT', 'IN', 'SPITE', 'OF', 'HIMSELF'] +4294-9934-0002-1320: hyp=['HE', 'AFFIRMED', 'TO', 'HIMSELF', 'THAT', 'HE', 'WOULD', 'NOT', 'HE', 'DECLARED', 'TO', 'HIMSELF', 'THAT', 'HE', 'WOULD', 'NOT', 'DOUBT', 'AND', 'HE', 'BEGAN', 'TO', 'DOUBT', 'IN', 'SPITE', 'OF', 'HIMSELF'] +4294-9934-0003-1321: ref=['TO', 'STAND', 'BETWEEN', 'TWO', 'RELIGIONS', 'FROM', 'ONE', 'OF', 'WHICH', 'YOU', 'HAVE', 'NOT', 'AS', 'YET', 'EMERGED', 'AND', 'ANOTHER', 'INTO', 'WHICH', 'YOU', 'HAVE', 'NOT', 'YET', 'ENTERED', 'IS', 'INTOLERABLE', 'AND', 'TWILIGHT', 'IS', 'PLEASING', 'ONLY', 'TO', 'BAT', 'LIKE', 'SOULS'] +4294-9934-0003-1321: hyp=['TO', 'STAND', 'BETWEEN', 'TWO', 'RELIGIONS', 'FROM', 'ONE', 'OF', 'WHICH', 'YOU', 'HAVE', 'NOT', 'AS', 'YET', 'EMERGED', 'AND', 'ANOTHER', 'INTO', 'WHICH', 'YOU', 'HAVE', 'NOT', 'YET', 'ENTERED', 'IS', 'INTOLERABLE', 'AND', 'TWILIGHT', 'IS', 'PLEASING', 'ONLY', 'TO', 'BAT', 'LIKE', 'SOULS'] +4294-9934-0004-1322: ref=['MARIUS', 'WAS', 'CLEAR', 'EYED', 'AND', 'HE', 'REQUIRED', 'THE', 'TRUE', 'LIGHT'] +4294-9934-0004-1322: hyp=['MARIUS', 'WAS', 'CLEAR', 'EYED', 'AND', 'HE', 'REQUIRED', 'THE', 'TRUE', 'LIGHT'] +4294-9934-0005-1323: ref=['THE', 'HALF', 'LIGHTS', 'OF', 'DOUBT', 'PAINED', 'HIM'] +4294-9934-0005-1323: hyp=['THE', 'HALF', 'LIGHTS', 'OF', 'DOUBT', 'PAINED', 'HIM'] +4294-9934-0006-1324: ref=['WHATEVER', 'MAY', 'HAVE', 'BEEN', 'HIS', 'DESIRE', 'TO', 'REMAIN', 'WHERE', 'HE', 'WAS', 'HE', 'COULD', 'NOT', 'HALT', 'THERE', 'HE', 'WAS', 'IRRESISTIBLY', 'CONSTRAINED', 'TO', 'CONTINUE', 'TO', 'ADVANCE', 'TO', 'EXAMINE', 'TO', 'THINK', 'TO', 'MARCH', 'FURTHER'] +4294-9934-0006-1324: hyp=['WHATEVER', 'MAY', 'HAVE', 'BEEN', 'HIS', 'DESIRE', 'TO', 'REMAIN', 'WHERE', 'HE', 'WAS', 'HE', 'COULD', 'NOT', 'HALT', 'THERE', 'HE', 'WAS', 'IRRESISTIBLY', 'CONSTRAINED', 'TO', 'CONTINUE', 'TO', 'ADVANCE', 'TO', 'EXAMINE', 'TO', 'THINK', 'TO', 'MARCH', 'FURTHER'] +4294-9934-0007-1325: ref=['HE', 'FEARED', 'AFTER', 'HAVING', 'TAKEN', 'SO', 'MANY', 'STEPS', 'WHICH', 'HAD', 'BROUGHT', 'HIM', 'NEARER', 'TO', 'HIS', 'FATHER', 'TO', 'NOW', 'TAKE', 'A', 'STEP', 'WHICH', 'SHOULD', 'ESTRANGE', 'HIM', 'FROM', 'THAT', 'FATHER'] +4294-9934-0007-1325: hyp=['HE', 'FEARED', 'AFTER', 'HAVING', 'TAKEN', 'SO', 'MANY', 'STEPS', 'WHICH', 'HAD', 'BROUGHT', 'HIM', 'NEARER', 'TO', 'HIS', 'FATHER', 'TO', 'NOW', 'TAKE', 'A', 'STEP', 'WHICH', 'SHOULD', 'ESTRANGE', 'HIM', 'FROM', 'THAT', 'FATHER'] +4294-9934-0008-1326: ref=['HIS', 'DISCOMFORT', 'WAS', 'AUGMENTED', 'BY', 'ALL', 'THE', 'REFLECTIONS', 'WHICH', 'OCCURRED', 'TO', 'HIM'] +4294-9934-0008-1326: hyp=['HIS', 'DISCOMFORT', 'WAS', 'AUGMENTED', 'BY', 'ALL', 'THE', 'REFLECTIONS', 'WHICH', 'OCCURRED', 'TO', 'HIM'] +4294-9934-0009-1327: ref=['IN', 'THE', 'TROUBLED', 'STATE', 'OF', 'HIS', 'CONSCIENCE', 'HE', 'NO', 'LONGER', 'THOUGHT', 'OF', 'CERTAIN', 'SERIOUS', 'SIDES', 'OF', 'EXISTENCE'] +4294-9934-0009-1327: hyp=['IN', 'THE', 'TROUBLED', 'STATE', 'OF', 'HIS', 'CONSCIENCE', 'HE', 'NO', 'LONGER', 'THOUGHT', 'OF', 'CERTAIN', 'SERIOUS', 'SIDES', 'OF', 'EXISTENCE'] +4294-9934-0010-1328: ref=['THEY', 'SOON', 'ELBOWED', 'HIM', 'ABRUPTLY'] +4294-9934-0010-1328: hyp=['THEY', 'SOON', 'ELBOWED', 'HIM', 'ABRUPTLY'] +4294-9934-0011-1329: ref=['REQUEST', 'COURFEYRAC', 'TO', 'COME', 'AND', 'TALK', 'WITH', 'ME', 'SAID', 'MARIUS'] +4294-9934-0011-1329: hyp=['REQUEST', 'COURFEYRAC', 'TO', 'COME', 'AND', 'TALK', 'WITH', 'ME', 'SAID', 'MARIUS'] +4294-9934-0012-1330: ref=['WHAT', 'IS', 'TO', 'BECOME', 'OF', 'YOU', 'SAID', 'COURFEYRAC'] +4294-9934-0012-1330: hyp=['WHAT', 'IS', 'TO', 'BECOME', 'OF', 'YOU', 'SAID', 'COURFEYRAC'] +4294-9934-0013-1331: ref=['WHAT', 'ARE', 'YOU', 'GOING', 'TO', 'DO', 'I', 'DO', 'NOT', 'KNOW'] +4294-9934-0013-1331: hyp=['WHAT', 'ARE', 'YOU', 'GOING', 'TO', 'DO', 'I', 'DO', 'NOT', 'KNOW'] +4294-9934-0014-1332: ref=['SILVER', 'GOLD', 'HERE', 'IT', 'IS'] +4294-9934-0014-1332: hyp=['SILVER', 'GOLD', 'HERE', 'IT', 'IS'] +4294-9934-0015-1333: ref=['YOU', 'WILL', 'THEN', 'HAVE', 'ONLY', 'A', 'PAIR', 'OF', 'TROUSERS', 'A', 'WAISTCOAT', 'A', 'HAT', 'AND', 'A', 'COAT', 'AND', 'MY', 'BOOTS'] +4294-9934-0015-1333: hyp=['YOU', 'WILL', 'THEN', 'HAVE', 'ONLY', 'A', 'PAIR', 'OF', 'TROUSERS', 'A', 'WAISTCOAT', 'A', 'HAT', 'AND', 'A', 'COAT', 'AND', 'MY', 'BOOTS'] +4294-9934-0016-1334: ref=['THAT', 'WILL', 'BE', 'ENOUGH'] +4294-9934-0016-1334: hyp=['THAT', 'WILL', 'BE', 'ENOUGH'] +4294-9934-0017-1335: ref=['NO', 'IT', 'IS', 'NOT', 'GOOD', 'WHAT', 'WILL', 'YOU', 'DO', 'AFTER', 'THAT'] +4294-9934-0017-1335: hyp=['NO', 'IT', 'IS', 'NOT', 'GOOD', 'WHAT', 'WILL', 'YOU', 'DO', 'AFTER', 'THAT'] +4294-9934-0018-1336: ref=['DO', 'YOU', 'KNOW', 'GERMAN', 'NO'] +4294-9934-0018-1336: hyp=['DO', 'YOU', 'KNOW', 'GERMAN', 'NO'] +4294-9934-0019-1337: ref=['IT', 'IS', 'BADLY', 'PAID', 'WORK', 'BUT', 'ONE', 'CAN', 'LIVE', 'BY', 'IT'] +4294-9934-0019-1337: hyp=['IT', 'IS', 'BADLY', 'PAID', 'WORK', 'BUT', 'ONE', 'CAN', 'LIVE', 'BY', 'IT'] +4294-9934-0020-1338: ref=['THE', 'CLOTHES', 'DEALER', 'WAS', 'SENT', 'FOR'] +4294-9934-0020-1338: hyp=['THE', 'CLOTHES', 'DEALER', 'WAS', 'SENT', 'FOR'] +4294-9934-0021-1339: ref=['HE', 'PAID', 'TWENTY', 'FRANCS', 'FOR', 'THE', 'CAST', 'OFF', 'GARMENTS', 'THEY', 'WENT', 'TO', 'THE', "WATCHMAKER'S"] +4294-9934-0021-1339: hyp=['HE', 'PAID', 'TWENTY', 'FRANCS', 'FOR', 'THE', 'CAST', 'OFF', 'GARMENTS', 'THEY', 'WENT', 'TO', 'THE', "WATCHMAKER'S"] +4294-9934-0022-1340: ref=['HE', 'BOUGHT', 'THE', 'WATCH', 'FOR', 'FORTY', 'FIVE', 'FRANCS'] +4294-9934-0022-1340: hyp=['HE', 'BOUGHT', 'THE', 'WATCH', 'FOR', 'FORTY', 'FIVE', 'FRANCS'] +4294-9934-0023-1341: ref=['HELLO', 'I', 'HAD', 'FORGOTTEN', 'THAT', 'SAID', 'MARIUS'] +4294-9934-0023-1341: hyp=['HALLO', 'I', 'HAD', 'FORGOTTEN', 'THAT', 'SAID', 'MARIUS'] +4294-9934-0024-1342: ref=['THE', 'LANDLORD', 'PRESENTED', 'HIS', 'BILL', 'WHICH', 'HAD', 'TO', 'BE', 'PAID', 'ON', 'THE', 'SPOT'] +4294-9934-0024-1342: hyp=['THE', 'LANDLORD', 'PRESENTED', 'HIS', 'BILL', 'WHICH', 'HAD', 'TO', 'BE', 'PAID', 'ON', 'THE', 'SPOT'] +4294-9934-0025-1343: ref=['I', 'HAVE', 'TEN', 'FRANCS', 'LEFT', 'SAID', 'MARIUS'] +4294-9934-0025-1343: hyp=['I', 'HAVE', 'TEN', 'FRANCS', 'LEFT', 'SAID', 'MARIUS'] +4294-9934-0026-1344: ref=['THAT', 'WILL', 'BE', 'SWALLOWING', 'A', 'TONGUE', 'VERY', 'FAST', 'OR', 'A', 'HUNDRED', 'SOUS', 'VERY', 'SLOWLY'] +4294-9934-0026-1344: hyp=['THAT', 'WILL', 'BE', 'SWALLOWING', 'A', 'TONGUE', 'VERY', 'FAST', 'OR', 'A', 'HUNDRED', 'SOUS', 'VERY', 'SLOWLY'] +4294-9934-0027-1345: ref=['ONE', 'MORNING', 'ON', 'HIS', 'RETURN', 'FROM', 'THE', 'LAW', 'SCHOOL', 'MARIUS', 'FOUND', 'A', 'LETTER', 'FROM', 'HIS', 'AUNT', 'AND', 'THE', 'SIXTY', 'PISTOLES', 'THAT', 'IS', 'TO', 'SAY', 'SIX', 'HUNDRED', 'FRANCS', 'IN', 'GOLD', 'IN', 'A', 'SEALED', 'BOX'] +4294-9934-0027-1345: hyp=['ONE', 'MORNING', 'ON', 'HIS', 'RETURN', 'FROM', 'THE', 'LAW', 'SCHOOL', 'MARIUS', 'FOUND', 'A', 'LETTER', 'FROM', 'HIS', 'AUNT', 'AND', 'THE', 'SIXTY', 'PISTOLES', 'THAT', 'IS', 'TO', 'SAY', 'SIX', 'HUNDRED', 'FRANCS', 'IN', 'GOLD', 'IN', 'A', 'SEALED', 'BOX'] +4294-9934-0028-1346: ref=['MARIUS', 'SENT', 'BACK', 'THE', 'THIRTY', 'LOUIS', 'TO', 'HIS', 'AUNT', 'WITH', 'A', 'RESPECTFUL', 'LETTER', 'IN', 'WHICH', 'HE', 'STATED', 'THAT', 'HE', 'HAD', 'SUFFICIENT', 'MEANS', 'OF', 'SUBSISTENCE', 'AND', 'THAT', 'HE', 'SHOULD', 'BE', 'ABLE', 'THENCEFORTH', 'TO', 'SUPPLY', 'ALL', 'HIS', 'NEEDS'] +4294-9934-0028-1346: hyp=['MARIUS', 'SENT', 'BACK', 'THE', 'THIRTY', 'LOUIS', 'TO', 'HIS', 'AUNT', 'WITH', 'A', 'RESPECTFUL', 'LETTER', 'IN', 'WHICH', 'HE', 'STATED', 'THAT', 'HE', 'HAD', 'SUFFICIENT', 'MEANS', 'OF', 'SUBSISTENCE', 'AND', 'THAT', 'HE', 'SHOULD', 'BE', 'ABLE', 'THENCEFORTH', 'TO', 'SUPPLY', 'ALL', 'HIS', 'NEEDS'] +4294-9934-0029-1347: ref=['AT', 'THAT', 'MOMENT', 'HE', 'HAD', 'THREE', 'FRANCS', 'LEFT'] +4294-9934-0029-1347: hyp=['AT', 'THAT', 'MOMENT', 'HE', 'HAD', 'THREE', 'FRANCS', 'LEFT'] +4350-10919-0000-1348: ref=['HE', 'PERCEIVED', 'THAT', 'IT', 'WAS', 'NO', 'GOOD', 'TALKING', 'TO', 'THE', 'OLD', 'MAN', 'AND', 'THAT', 'THE', 'PRINCIPAL', 'PERSON', 'IN', 'THE', 'HOUSE', 'WAS', 'THE', 'MOTHER'] +4350-10919-0000-1348: hyp=['HE', 'PERCEIVED', 'THAT', 'IT', 'WAS', 'NO', 'GOOD', 'TALKING', 'TO', 'THE', 'OLD', 'MAN', 'AND', 'THAT', 'THE', 'PRINCIPAL', 'PERSON', 'IN', 'THE', 'HOUSE', 'WAS', 'THE', 'MOTHER'] +4350-10919-0001-1349: ref=['BEFORE', 'HER', 'HE', 'DECIDED', 'TO', 'SCATTER', 'HIS', 'PEARLS'] +4350-10919-0001-1349: hyp=['BEFORE', 'HER', 'HE', 'DECIDED', 'TO', 'SCATTER', 'HIS', 'PEARLS'] +4350-10919-0002-1350: ref=['THE', 'PRINCESS', 'WAS', 'DISTRACTED', 'AND', 'DID', 'NOT', 'KNOW', 'WHAT', 'TO', 'DO', 'SHE', 'FELT', 'SHE', 'HAD', 'SINNED', 'AGAINST', 'KITTY'] +4350-10919-0002-1350: hyp=['THE', 'PRINCESS', 'WAS', 'DISTRACTED', 'AND', 'DID', 'NOT', 'KNOW', 'WHAT', 'TO', 'DO', 'SHE', 'FELT', 'SHE', 'HAD', 'SINNED', 'AGAINST', 'KITTY'] +4350-10919-0003-1351: ref=['WELL', 'DOCTOR', 'DECIDE', 'OUR', 'FATE', 'SAID', 'THE', 'PRINCESS', 'TELL', 'ME', 'EVERYTHING'] +4350-10919-0003-1351: hyp=['WELL', 'DOCTOR', 'DECIDE', 'OUR', 'FATE', 'SAID', 'THE', 'PRINCESS', 'TELL', 'ME', 'EVERYTHING'] +4350-10919-0004-1352: ref=['IS', 'THERE', 'HOPE', 'SHE', 'MEANT', 'TO', 'SAY', 'BUT', 'HER', 'LIPS', 'QUIVERED', 'AND', 'SHE', 'COULD', 'NOT', 'UTTER', 'THE', 'QUESTION', 'WELL', 'DOCTOR'] +4350-10919-0004-1352: hyp=['IS', 'THERE', 'HOPE', 'SHE', 'MEANT', 'TO', 'SAY', 'BUT', 'HER', 'LIPS', 'QUIVERED', 'AND', 'SHE', 'COULD', 'NOT', 'UTTER', 'THE', 'QUESTION', 'WELL', 'DOCTOR'] +4350-10919-0005-1353: ref=['AS', 'YOU', 'PLEASE', 'THE', 'PRINCESS', 'WENT', 'OUT', 'WITH', 'A', 'SIGH'] +4350-10919-0005-1353: hyp=['AS', 'YOU', 'PLEASE', 'THE', 'PRINCESS', 'WENT', 'OUT', 'WITH', 'A', 'SIGH'] +4350-10919-0006-1354: ref=['THE', 'FAMILY', 'DOCTOR', 'RESPECTFULLY', 'CEASED', 'IN', 'THE', 'MIDDLE', 'OF', 'HIS', 'OBSERVATIONS'] +4350-10919-0006-1354: hyp=['THE', 'FAMILY', 'DOCTOR', 'RESPECTFULLY', 'CEASED', 'IN', 'THE', 'MIDDLE', 'OF', 'HIS', 'OBSERVATIONS'] +4350-10919-0007-1355: ref=['AND', 'THERE', 'ARE', 'INDICATIONS', 'MALNUTRITION', 'NERVOUS', 'EXCITABILITY', 'AND', 'SO', 'ON'] +4350-10919-0007-1355: hyp=['AND', 'THERE', 'ARE', 'INDICATIONS', 'MAL', 'NUTRITION', 'NERVOUS', 'EXCITABILITY', 'AND', 'SO', 'ON'] +4350-10919-0008-1356: ref=['THE', 'QUESTION', 'STANDS', 'THUS', 'IN', 'PRESENCE', 'OF', 'INDICATIONS', 'OF', 'TUBERCULOUS', 'PROCESS', 'WHAT', 'IS', 'TO', 'BE', 'DONE', 'TO', 'MAINTAIN', 'NUTRITION'] +4350-10919-0008-1356: hyp=['THE', 'QUESTION', 'STANDS', 'THUS', 'IN', 'PRESENCE', 'OF', 'INDICATIONS', 'OF', 'TUBERCULOUS', 'PROCESS', 'WHAT', 'IS', 'TO', 'BE', 'DONE', 'TO', 'MAINTAIN', 'NUTRITION'] +4350-10919-0009-1357: ref=['YES', "THAT'S", 'AN', 'UNDERSTOOD', 'THING', 'RESPONDED', 'THE', 'CELEBRATED', 'PHYSICIAN', 'AGAIN', 'GLANCING', 'AT', 'HIS', 'WATCH'] +4350-10919-0009-1357: hyp=['YES', "THAT'S", 'AN', 'UNDERSTOOD', 'THING', 'RESPONDED', 'THE', 'CELEBRATED', 'PHYSICIAN', 'AGAIN', 'GLANCING', 'AT', 'HIS', 'WATCH'] +4350-10919-0010-1358: ref=['BEG', 'PARDON', 'IS', 'THE', 'YAUSKY', 'BRIDGE', 'DONE', 'YET', 'OR', 'SHALL', 'I', 'HAVE', 'TO', 'DRIVE', 'AROUND'] +4350-10919-0010-1358: hyp=['BEG', 'PARDON', 'IT', 'IS', 'THE', 'YOZKI', 'BRIDGE', 'DONE', 'YET', 'OR', 'SHALL', 'I', 'HAVE', 'TO', 'DRIVE', 'ROUND'] +4350-10919-0011-1359: ref=['HE', 'ASKED', 'AH', 'IT', 'IS'] +4350-10919-0011-1359: hyp=['HE', 'ASKED', 'AH', 'IT', 'IS'] +4350-10919-0012-1360: ref=['OH', 'WELL', 'THEN', 'I', 'CAN', 'DO', 'IT', 'IN', 'TWENTY', 'MINUTES'] +4350-10919-0012-1360: hyp=['OH', 'WELL', 'THEN', 'I', 'CAN', 'DO', 'IT', 'IN', 'TWENTY', 'MINUTES'] +4350-10919-0013-1361: ref=['AND', 'HOW', 'ABOUT', 'A', 'TOUR', 'ABROAD', 'ASKED', 'THE', 'FAMILY', 'DOCTOR'] +4350-10919-0013-1361: hyp=['AND', 'HOW', 'ABOUT', 'A', 'TOUR', 'ABROAD', 'ASKED', 'THE', 'FAMILY', 'DOCTOR'] +4350-10919-0014-1362: ref=['WHAT', 'IS', 'WANTED', 'IS', 'MEANS', 'OF', 'IMPROVING', 'NUTRITION', 'AND', 'NOT', 'FOR', 'LOWERING', 'IT'] +4350-10919-0014-1362: hyp=['WHAT', 'IS', 'WANTED', 'IS', 'THE', 'MEANS', 'OF', 'IMPROVING', 'NUTRITION', 'AND', 'NOT', 'FOR', 'LOWERING', 'IT'] +4350-10919-0015-1363: ref=['THE', 'FAMILY', 'DOCTOR', 'LISTENED', 'ATTENTIVELY', 'AND', 'RESPECTFULLY'] +4350-10919-0015-1363: hyp=['THE', 'FAMILY', 'DOCTOR', 'LISTENED', 'ATTENTIVELY', 'AND', 'RESPECTFULLY'] +4350-10919-0016-1364: ref=['BUT', 'IN', 'FAVOR', 'OF', 'FOREIGN', 'TRAVEL', 'I', 'WOULD', 'URGE', 'THE', 'CHANGE', 'OF', 'HABITS', 'THE', 'REMOVAL', 'FROM', 'CONDITIONS', 'CALLING', 'UP', 'REMINISCENCES'] +4350-10919-0016-1364: hyp=['BUT', 'IN', 'FAVOUR', 'OF', 'FOREIGN', 'TRAVEL', 'I', 'WOULD', 'URGE', 'THE', 'CHANGE', 'OF', 'HABITS', 'THE', 'REMOVAL', 'FROM', 'CONDITIONS', 'CALLING', 'UP', 'REMINISCENCES'] +4350-10919-0017-1365: ref=['AND', 'THEN', 'THE', 'MOTHER', 'WISHES', 'IT', 'HE', 'ADDED'] +4350-10919-0017-1365: hyp=['AND', 'THEN', 'THE', 'MOTHER', 'WISHES', 'IT', 'HE', 'ADDED'] +4350-10919-0018-1366: ref=['AH', 'WELL', 'IN', 'THAT', 'CASE', 'TO', 'BE', 'SURE', 'LET', 'THEM', 'GO', 'ONLY', 'THOSE', 'GERMAN', 'QUACKS', 'ARE', 'MISCHIEVOUS'] +4350-10919-0018-1366: hyp=['AH', 'WELL', 'IN', 'THAT', 'CASE', 'TO', 'BE', 'SURE', 'LET', 'THEM', 'GO', 'ONLY', 'THOSE', 'GERMAN', 'QUACKS', 'ARE', 'MISCHIEVOUS'] +4350-10919-0019-1367: ref=['OH', "TIME'S", 'UP', 'ALREADY', 'AND', 'HE', 'WENT', 'TO', 'THE', 'DOOR'] +4350-10919-0019-1367: hyp=['OH', "TIME'S", 'UP', 'ALREADY', 'AND', 'HE', 'WENT', 'TO', 'THE', 'DOOR'] +4350-10919-0020-1368: ref=['THE', 'CELEBRATED', 'DOCTOR', 'ANNOUNCED', 'TO', 'THE', 'PRINCESS', 'A', 'FEELING', 'OF', 'WHAT', 'WAS', 'DUE', 'FROM', 'HIM', 'DICTATED', 'HIS', 'DOING', 'SO', 'THAT', 'HE', 'OUGHT', 'TO', 'SEE', 'THE', 'PATIENT', 'ONCE', 'MORE'] +4350-10919-0020-1368: hyp=['THE', 'CELEBRATED', 'DOCTOR', 'ANNOUNCED', 'TO', 'THE', 'PRINCESS', 'A', 'FEELING', 'OF', 'WHAT', 'WAS', 'DUE', 'FROM', 'HIM', 'DICTATED', 'HIS', 'DOING', 'SO', 'THAT', 'HE', 'OUGHT', 'TO', 'SEE', 'THE', 'PATIENT', 'ONCE', 'MORE'] +4350-10919-0021-1369: ref=['OH', 'NO', 'ONLY', 'A', 'FEW', 'DETAILS', 'PRINCESS', 'COME', 'THIS', 'WAY'] +4350-10919-0021-1369: hyp=['OH', 'NO', 'ONLY', 'A', 'FEW', 'DETAILS', 'PRINCESS', 'COME', 'THIS', 'WAY'] +4350-10919-0022-1370: ref=['AND', 'THE', 'MOTHER', 'ACCOMPANIED', 'BY', 'THE', 'DOCTOR', 'WENT', 'INTO', 'THE', 'DRAWING', 'ROOM', 'TO', 'KITTY'] +4350-10919-0022-1370: hyp=['AND', 'THE', 'MOTHER', 'ACCOMPANIED', 'BY', 'THE', 'DOCTOR', 'WENT', 'INTO', 'THE', 'DRAWING', 'ROOM', 'TO', 'KITTY'] +4350-10919-0023-1371: ref=['WHEN', 'THE', 'DOCTOR', 'CAME', 'IN', 'SHE', 'FLUSHED', 'CRIMSON', 'AND', 'HER', 'EYES', 'FILLED', 'WITH', 'TEARS'] +4350-10919-0023-1371: hyp=['WHEN', 'THE', 'DOCTOR', 'CAME', 'IN', 'SHE', 'FLUSHED', 'CRIMSON', 'AND', 'HER', 'EYES', 'FILLED', 'WITH', 'TEARS'] +4350-10919-0024-1372: ref=['SHE', 'ANSWERED', 'HIM', 'AND', 'ALL', 'AT', 'ONCE', 'GOT', 'UP', 'FURIOUS'] +4350-10919-0024-1372: hyp=['SHE', 'ANSWERED', 'HIM', 'AND', 'ALL', 'AT', 'ONCE', 'GOT', 'UP', 'FURIOUS'] +4350-10919-0025-1373: ref=['EXCUSE', 'ME', 'DOCTOR', 'BUT', 'THERE', 'IS', 'REALLY', 'NO', 'OBJECT', 'IN', 'THIS'] +4350-10919-0025-1373: hyp=['EXCUSE', 'ME', 'DOCTOR', 'BUT', 'THERE', 'IS', 'REALLY', 'NO', 'OBJECT', 'IN', 'THIS'] +4350-10919-0026-1374: ref=['THIS', 'IS', 'THE', 'THIRD', 'TIME', "YOU'VE", 'ASKED', 'ME', 'THE', 'SAME', 'THING'] +4350-10919-0026-1374: hyp=['THIS', 'IS', 'THE', 'THIRD', 'TIME', 'YOU', 'HAVE', 'ASKED', 'ME', 'THE', 'SAME', 'THING'] +4350-10919-0027-1375: ref=['THE', 'CELEBRATED', 'DOCTOR', 'DID', 'NOT', 'TAKE', 'OFFENSE'] +4350-10919-0027-1375: hyp=['THE', 'CELEBRATED', 'DOCTOR', 'DID', 'NOT', 'TAKE', 'OFFENCE'] +4350-10919-0028-1376: ref=['NERVOUS', 'IRRITABILITY', 'HE', 'SAID', 'TO', 'THE', 'PRINCESS', 'WHEN', 'KITTY', 'HAD', 'LEFT', 'THE', 'ROOM', 'HOWEVER', 'I', 'HAD', 'FINISHED'] +4350-10919-0028-1376: hyp=['NERVOUS', 'IRRITABILITY', 'HE', 'SAID', 'TO', 'THE', 'PRINCESS', 'WHEN', 'KITTY', 'HAD', 'LEFT', 'THE', 'ROOM', 'HOWEVER', 'I', 'HAD', 'FINISHED'] +4350-10919-0029-1377: ref=['AND', 'THE', 'DOCTOR', 'BEGAN', 'SCIENTIFICALLY', 'EXPLAINING', 'TO', 'THE', 'PRINCESS', 'AS', 'AN', 'EXCEPTIONALLY', 'INTELLIGENT', 'WOMAN', 'THE', 'CONDITION', 'OF', 'THE', 'YOUNG', 'PRINCESS', 'AND', 'CONCLUDED', 'BY', 'INSISTING', 'ON', 'THE', 'DRINKING', 'OF', 'THE', 'WATERS', 'WHICH', 'WERE', 'CERTAINLY', 'HARMLESS'] +4350-10919-0029-1377: hyp=['AND', 'THE', 'DOCTOR', 'BEGAN', 'SCIENTIFICALLY', 'EXPLAINING', 'TO', 'THE', 'PRINCESS', 'AS', 'AN', 'EXCEPTIONALLY', 'INTELLIGENT', 'WOMAN', 'THE', 'CONDITION', 'OF', 'THE', 'YOUNG', 'PRINCESS', 'AND', 'CONCLUDED', 'BY', 'INSISTING', 'ON', 'THE', 'DRINKING', 'OF', 'THE', 'WATERS', 'WHICH', 'WAS', 'CERTAINLY', 'HARMLESS'] +4350-10919-0030-1378: ref=['AT', 'THE', 'QUESTION', 'SHOULD', 'THEY', 'GO', 'ABROAD', 'THE', 'DOCTOR', 'PLUNGED', 'INTO', 'DEEP', 'MEDITATION', 'AS', 'THOUGH', 'RESOLVING', 'A', 'WEIGHTY', 'PROBLEM'] +4350-10919-0030-1378: hyp=['BUT', 'THE', 'QUESTION', 'SHOULD', 'THEY', 'GO', 'ABROAD', 'THE', 'DOCTOR', 'PLUNGED', 'INTO', 'DEEP', 'MEDITATION', 'AS', 'THOUGH', 'RESOLVING', 'A', 'WEIGHTY', 'PROBLEM'] +4350-10919-0031-1379: ref=['FINALLY', 'HIS', 'DECISION', 'WAS', 'PRONOUNCED', 'THEY', 'WERE', 'TO', 'GO', 'ABROAD', 'BUT', 'TO', 'PUT', 'NO', 'FAITH', 'IN', 'FOREIGN', 'QUACKS', 'AND', 'TO', 'APPLY', 'TO', 'HIM', 'IN', 'ANY', 'NEED'] +4350-10919-0031-1379: hyp=['FINALLY', 'HIS', 'DECISION', 'WAS', 'PRONOUNCED', 'THEY', 'WERE', 'TO', 'GO', 'ABROAD', 'BUT', 'TO', 'PUT', 'NO', 'FAITH', 'IN', 'FOREIGN', 'QUACKS', 'AND', 'TO', 'APPLY', 'TO', 'HIM', 'IN', 'ANY', 'NEED'] +4350-10919-0032-1380: ref=['IT', 'SEEMED', 'AS', 'THOUGH', 'SOME', 'PIECE', 'OF', 'GOOD', 'FORTUNE', 'HAD', 'COME', 'TO', 'PASS', 'AFTER', 'THE', 'DOCTOR', 'HAD', 'GONE'] +4350-10919-0032-1380: hyp=['IT', 'SEEMED', 'AS', 'THOUGH', 'SOME', 'PIECE', 'OF', 'GOOD', 'FORTUNE', 'HAD', 'COME', 'TO', 'PASS', 'AFTER', 'THE', 'DOCTOR', 'HAD', 'GONE'] +4350-10919-0033-1381: ref=['THE', 'MOTHER', 'WAS', 'MUCH', 'MORE', 'CHEERFUL', 'WHEN', 'SHE', 'WENT', 'BACK', 'TO', 'HER', 'DAUGHTER', 'AND', 'KITTY', 'PRETENDED', 'TO', 'BE', 'MORE', 'CHEERFUL'] +4350-10919-0033-1381: hyp=['THE', 'MOTHER', 'WAS', 'MUCH', 'MORE', 'CHEERFUL', 'WHEN', 'SHE', 'WENT', 'BACK', 'TO', 'HER', 'DAUGHTER', 'AND', 'KITTY', 'PRETENDED', 'TO', 'BE', 'MORE', 'CHEERFUL'] +4350-9170-0000-1382: ref=['EDUCATED', 'PEOPLE', 'OF', 'THE', 'UPPER', 'CLASSES', 'ARE', 'TRYING', 'TO', 'STIFLE', 'THE', 'EVER', 'GROWING', 'SENSE', 'OF', 'THE', 'NECESSITY', 'OF', 'TRANSFORMING', 'THE', 'EXISTING', 'SOCIAL', 'ORDER'] +4350-9170-0000-1382: hyp=['EDUCATED', 'PEOPLE', 'OF', 'THE', 'UPPER', 'CLASSES', 'ARE', 'TRYING', 'TO', 'STIFLE', 'THE', 'EVER', 'GROWING', 'SENSE', 'OF', 'THE', 'NECESSITY', 'OF', 'TRANSFORMING', 'THE', 'EXISTING', 'SOCIAL', 'ORDER'] +4350-9170-0001-1383: ref=['THIS', 'IS', 'ABSOLUTELY', 'INCORRECT'] +4350-9170-0001-1383: hyp=['THIS', 'IS', 'ABSOLUTELY', 'INCORRECT'] +4350-9170-0002-1384: ref=['IN', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'IT', 'IS', 'SUPPOSED', 'THAT', 'SINCE', 'THE', 'AIM', 'OF', 'LIFE', 'IS', 'FOUND', 'IN', 'GROUPS', 'OF', 'INDIVIDUALS', 'INDIVIDUALS', 'WILL', 'VOLUNTARILY', 'SACRIFICE', 'THEIR', 'OWN', 'INTERESTS', 'FOR', 'THE', 'INTERESTS', 'OF', 'THE', 'GROUP'] +4350-9170-0002-1384: hyp=['IN', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'IT', 'IS', 'SUPPOSED', 'THAT', 'SINCE', 'THE', 'AIM', 'OF', 'LIFE', 'IS', 'FOUND', 'IN', 'GROUPS', 'OF', 'INDIVIDUALS', 'INDIVIDUALS', 'WILL', 'VOLUNTARILY', 'SACRIFICE', 'THEIR', 'OWN', 'INTERESTS', 'FOR', 'THE', 'INTEREST', 'OF', 'THE', 'GROUP'] +4350-9170-0003-1385: ref=['THE', 'CHAMPIONS', 'OF', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'USUALLY', 'TRY', 'TO', 'CONNECT', 'THE', 'IDEA', 'OF', 'AUTHORITY', 'THAT', 'IS', 'OF', 'VIOLENCE', 'WITH', 'THE', 'IDEA', 'OF', 'MORAL', 'INFLUENCE', 'BUT', 'THIS', 'CONNECTION', 'IS', 'QUITE', 'IMPOSSIBLE'] +4350-9170-0003-1385: hyp=['THE', 'CHAMPIONS', 'OF', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'USUALLY', 'TRY', 'TO', 'CONNECT', 'THE', 'IDEA', 'OF', 'AUTHORITY', 'THAT', 'IS', 'OF', 'VIOLENCE', 'WITH', 'THE', 'IDEA', 'OF', 'MORAL', 'INFLUENCE', 'BUT', 'THIS', 'CONNECTION', 'IS', 'QUITE', 'IMPOSSIBLE'] +4350-9170-0004-1386: ref=['THE', 'MAN', 'WHO', 'IS', 'CONTROLLED', 'BY', 'MORAL', 'INFLUENCE', 'ACTS', 'IN', 'ACCORDANCE', 'WITH', 'HIS', 'OWN', 'DESIRES'] +4350-9170-0004-1386: hyp=['THE', 'MAN', 'WHO', 'IS', 'CONTROLLED', 'BY', 'MORAL', 'INFLUENCE', 'ACTS', 'IN', 'ACCORDANCE', 'WITH', 'HIS', 'OWN', 'DESIRES'] +4350-9170-0005-1387: ref=['THE', 'BASIS', 'OF', 'AUTHORITY', 'IS', 'BODILY', 'VIOLENCE'] +4350-9170-0005-1387: hyp=['THE', 'BASIS', 'OF', 'AUTHORITY', 'IS', 'BODILY', 'VIOLENCE'] +4350-9170-0006-1388: ref=['THE', 'POSSIBILITY', 'OF', 'APPLYING', 'BODILY', 'VIOLENCE', 'TO', 'PEOPLE', 'IS', 'PROVIDED', 'ABOVE', 'ALL', 'BY', 'AN', 'ORGANIZATION', 'OF', 'ARMED', 'MEN', 'TRAINED', 'TO', 'ACT', 'IN', 'UNISON', 'IN', 'SUBMISSION', 'TO', 'ONE', 'WILL'] +4350-9170-0006-1388: hyp=['THE', 'POSSIBILITY', 'OF', 'APPLYING', 'BODILY', 'VIOLENCE', 'TO', 'PEOPLE', 'IS', 'PROVIDED', 'ABOVE', 'ALL', 'BY', 'AN', 'ORGANIZATION', 'OF', 'ARMED', 'MEN', 'TRAINED', 'TO', 'ACT', 'IN', 'UNISON', 'AND', 'SUBMISSION', 'TO', 'ONE', 'WILL'] +4350-9170-0007-1389: ref=['THESE', 'BANDS', 'OF', 'ARMED', 'MEN', 'SUBMISSIVE', 'TO', 'A', 'SINGLE', 'WILL', 'ARE', 'WHAT', 'CONSTITUTE', 'THE', 'ARMY'] +4350-9170-0007-1389: hyp=['THESE', 'BANDS', 'OF', 'ARMED', 'MEN', 'SUBMISSIVE', 'TO', 'A', 'SINGLE', 'WILL', 'ARE', 'WHAT', 'CONSTITUTE', 'THE', 'ARMY'] +4350-9170-0008-1390: ref=['THE', 'ARMY', 'HAS', 'ALWAYS', 'BEEN', 'AND', 'STILL', 'IS', 'THE', 'BASIS', 'OF', 'POWER'] +4350-9170-0008-1390: hyp=['THE', 'ARMY', 'HAS', 'ALWAYS', 'BEEN', 'AND', 'STILL', 'IS', 'THE', 'BASIS', 'OF', 'POWER'] +4350-9170-0009-1391: ref=['POWER', 'IS', 'ALWAYS', 'IN', 'THE', 'HANDS', 'OF', 'THOSE', 'WHO', 'CONTROL', 'THE', 'ARMY', 'AND', 'ALL', 'MEN', 'IN', 'POWER', 'FROM', 'THE', 'ROMAN', 'CAESARS', 'TO', 'THE', 'RUSSIAN', 'AND', 'GERMAN', 'EMPERORS', 'TAKE', 'MORE', 'INTEREST', 'IN', 'THEIR', 'ARMY', 'THAN', 'IN', 'ANYTHING', 'AND', 'COURT', 'POPULARITY', 'IN', 'THE', 'ARMY', 'KNOWING', 'THAT', 'IF', 'THAT', 'IS', 'ON', 'THEIR', 'SIDE', 'THEIR', 'POWER', 'IS', 'SECURE'] +4350-9170-0009-1391: hyp=['POWER', 'IS', 'ALWAYS', 'IN', 'THE', 'HANDS', 'OF', 'THOSE', 'WHO', 'CONTROL', 'THE', 'ARMY', 'AND', 'ALL', 'MEN', 'IN', 'POWER', 'FROM', 'THE', 'ROMAN', 'CAESARS', 'TO', 'THE', 'RUSSIAN', 'AND', 'GERMAN', 'EMPERORS', 'TAKE', 'MORE', 'INTEREST', 'IN', 'THEIR', 'ARMY', 'THAN', 'IN', 'ANYTHING', 'AND', 'COURT', 'POPULARITY', 'IN', 'THE', 'ARMY', 'KNOWING', 'THAT', 'IF', 'THAT', 'IS', 'ON', 'THEIR', 'SIDE', 'THEIR', 'POWER', 'IS', 'SECURE'] +4350-9170-0010-1392: ref=['INDEED', 'IT', 'COULD', 'NOT', 'BE', 'OTHERWISE'] +4350-9170-0010-1392: hyp=['INDEED', 'IT', 'COULD', 'NOT', 'BE', 'OTHERWISE'] +4350-9170-0011-1393: ref=['ONLY', 'UNDER', 'THOSE', 'CONDITIONS', 'COULD', 'THE', 'SOCIAL', 'ORGANIZATION', 'BE', 'JUSTIFIED'] +4350-9170-0011-1393: hyp=['ONLY', 'UNDER', 'THOSE', 'CONDITIONS', 'COULD', 'THE', 'SOCIAL', 'ORGANIZATION', 'BE', 'JUSTIFIED'] +4350-9170-0012-1394: ref=['BUT', 'SINCE', 'THIS', 'IS', 'NOT', 'THE', 'CASE', 'AND', 'ON', 'THE', 'CONTRARY', 'MEN', 'IN', 'POWER', 'ARE', 'ALWAYS', 'FAR', 'FROM', 'BEING', 'SAINTS', 'THROUGH', 'THE', 'VERY', 'FACT', 'OF', 'THEIR', 'POSSESSION', 'OF', 'POWER', 'THE', 'SOCIAL', 'ORGANIZATION', 'BASED', 'ON', 'POWER', 'HAS', 'NO', 'JUSTIFICATION'] +4350-9170-0012-1394: hyp=['BUT', 'SINCE', 'THIS', 'IS', 'NOT', 'THE', 'CASE', 'AND', 'ON', 'THE', 'CONTRARY', 'MEN', 'IN', 'POWER', 'ARE', 'ALWAYS', 'FAR', 'FROM', 'BEING', 'SAINTS', 'THROUGH', 'THE', 'VERY', 'FACT', 'OF', 'THEIR', 'POSSESSION', 'OF', 'POWER', 'THE', 'SOCIAL', 'ORGANIZATION', 'BASED', 'ON', 'POWER', 'HAS', 'NO', 'JUSTIFICATION'] +4350-9170-0013-1395: ref=['EVEN', 'IF', 'THERE', 'WAS', 'ONCE', 'A', 'TIME', 'WHEN', 'OWING', 'TO', 'THE', 'LOW', 'STANDARD', 'OF', 'MORALS', 'AND', 'THE', 'DISPOSITION', 'OF', 'MEN', 'TO', 'VIOLENCE', 'THE', 'EXISTENCE', 'OF', 'AN', 'AUTHORITY', 'TO', 'RESTRAIN', 'SUCH', 'VIOLENCE', 'WAS', 'AN', 'ADVANTAGE', 'BECAUSE', 'THE', 'VIOLENCE', 'OF', 'GOVERNMENT', 'WAS', 'LESS', 'THAN', 'THE', 'VIOLENCE', 'OF', 'INDIVIDUALS', 'ONE', 'CANNOT', 'BUT', 'SEE', 'THAT', 'THIS', 'ADVANTAGE', 'COULD', 'NOT', 'BE', 'LASTING'] +4350-9170-0013-1395: hyp=['EVEN', 'IF', 'THERE', 'WAS', 'ONCE', 'A', 'TIME', 'WHEN', 'OWING', 'TO', 'THE', 'LOW', 'STANDARDS', 'OF', 'MORALS', 'AND', 'THE', 'DISPOSITION', 'OF', 'MEN', 'TO', 'VIOLENCE', 'THE', 'EXISTENCE', 'OF', 'AN', 'AUTHORITY', 'TO', 'RESTRAIN', 'SUCH', 'VIOLENCE', 'WAS', 'AN', 'ADVANTAGE', 'BECAUSE', 'THE', 'VIOLENCE', 'OF', 'THE', 'GOVERNMENT', 'WAS', 'LESS', 'THAN', 'THE', 'VIOLENCE', 'OF', 'INDIVIDUALS', 'ONE', 'CANNOT', 'BUT', 'SEE', 'THAT', 'THIS', 'ADVANTAGE', 'COULD', 'NOT', 'BE', 'LASTING'] +4350-9170-0014-1396: ref=['BETWEEN', 'THE', 'MEMBERS', 'OF', 'ONE', 'STATE', 'SUBJECT', 'TO', 'A', 'SINGLE', 'AUTHORITY', 'THE', 'STRIFE', 'BETWEEN', 'INDIVIDUALS', 'SEEMS', 'STILL', 'LESS', 'AND', 'THE', 'LIFE', 'OF', 'THE', 'STATE', 'SEEMS', 'EVEN', 'MORE', 'SECURE'] +4350-9170-0014-1396: hyp=['BETWEEN', 'THE', 'MEMBERS', 'OF', 'ONE', 'STATE', 'SUBJECT', 'TO', 'A', 'SINGLE', 'AUTHORITY', 'THE', 'STRIFE', 'BETWEEN', 'THE', 'INDIVIDUALS', 'SEEMS', 'STILL', 'LESS', 'AND', 'THE', 'LIFE', 'OF', 'THE', 'STATE', 'SEEMS', 'EVEN', 'MORE', 'SECURE'] +4350-9170-0015-1397: ref=['IT', 'WAS', 'PRODUCED', 'ON', 'ONE', 'HAND', 'BY', 'THE', 'NATURAL', 'GROWTH', 'OF', 'POPULATION', 'AND', 'ON', 'THE', 'OTHER', 'BY', 'STRUGGLE', 'AND', 'CONQUEST'] +4350-9170-0015-1397: hyp=['IT', 'WAS', 'PRODUCED', 'ON', 'ONE', 'HAND', 'BY', 'THE', 'NATURAL', 'GROWTH', 'OF', 'POPULATION', 'AND', 'ON', 'THE', 'OTHER', 'BY', 'STRUGGLE', 'AND', 'CONQUEST'] +4350-9170-0016-1398: ref=['AFTER', 'CONQUEST', 'THE', 'POWER', 'OF', 'THE', 'EMPEROR', 'PUTS', 'AN', 'END', 'TO', 'INTERNAL', 'DISSENSIONS', 'AND', 'SO', 'THE', 'STATE', 'CONCEPTION', 'OF', 'LIFE', 'JUSTIFIES', 'ITSELF'] +4350-9170-0016-1398: hyp=['AFTER', 'CONQUEST', 'THE', 'POWER', 'OF', 'THE', 'EMPEROR', 'PUTS', 'AN', 'END', 'TO', 'INTERNAL', 'DISSENSIONS', 'AND', 'SO', 'THE', 'STATE', 'CONCEPTION', 'OF', 'LIFE', 'JUSTIFIES', 'ITSELF'] +4350-9170-0017-1399: ref=['BUT', 'THIS', 'JUSTIFICATION', 'IS', 'NEVER', 'MORE', 'THAN', 'TEMPORARY'] +4350-9170-0017-1399: hyp=['BUT', 'THIS', 'JUSTIFICATION', 'IS', 'NEVER', 'MORE', 'THAN', 'TEMPORARY'] +4350-9170-0018-1400: ref=['INTERNAL', 'DISSENSIONS', 'DISAPPEAR', 'ONLY', 'IN', 'PROPORTION', 'TO', 'THE', 'DEGREE', 'OF', 'OPPRESSION', 'EXERTED', 'BY', 'THE', 'AUTHORITY', 'OVER', 'THE', 'DISSENTIENT', 'INDIVIDUALS'] +4350-9170-0018-1400: hyp=['INTERNAL', 'DISSENSIONS', 'DISAPPEAR', 'ONLY', 'IN', 'PROPORTION', 'TO', 'THE', 'DEGREE', 'OF', 'OPPRESSION', 'EXERTED', 'BY', 'THE', 'AUTHORITY', 'OVER', 'THE', 'DISSENTIENT', 'INDIVIDUALS'] +4350-9170-0019-1401: ref=['GOVERNMENT', 'AUTHORITY', 'EVEN', 'IF', 'IT', 'DOES', 'SUPPRESS', 'PRIVATE', 'VIOLENCE', 'ALWAYS', 'INTRODUCES', 'INTO', 'THE', 'LIFE', 'OF', 'MEN', 'FRESH', 'FORMS', 'OF', 'VIOLENCE', 'WHICH', 'TEND', 'TO', 'BECOME', 'GREATER', 'AND', 'GREATER', 'IN', 'PROPORTION', 'TO', 'THE', 'DURATION', 'AND', 'STRENGTH', 'OF', 'THE', 'GOVERNMENT'] +4350-9170-0019-1401: hyp=['GOVERNMENT', 'AUTHORITY', 'EVEN', 'IF', 'IT', 'DOES', 'SUPPRESS', 'PRIVATE', 'VIOLENCE', 'ALWAYS', 'INTRODUCES', 'INTO', 'THE', 'LIFE', 'OF', 'MEN', 'FRESH', 'FORMS', 'OF', 'VIOLENCE', 'WHICH', 'TEND', 'TO', 'BECOME', 'GREATER', 'AND', 'GREATER', 'IN', 'PROPORTION', 'TO', 'THE', 'DURATION', 'AND', 'STRENGTH', 'OF', 'THE', 'GOVERNMENT'] +4350-9170-0020-1402: ref=['AND', 'THEREFORE', 'THE', 'OPPRESSION', 'OF', 'THE', 'OPPRESSED', 'ALWAYS', 'GOES', 'ON', 'GROWING', 'UP', 'TO', 'THE', 'FURTHEST', 'LIMIT', 'BEYOND', 'WHICH', 'IT', 'CANNOT', 'GO', 'WITHOUT', 'KILLING', 'THE', 'GOOSE', 'WITH', 'THE', 'GOLDEN', 'EGGS'] +4350-9170-0020-1402: hyp=['AND', 'THEREFORE', 'THE', 'OPPRESSION', 'OF', 'THE', 'OPPRESSED', 'ALWAYS', 'GOES', 'ON', 'GROWING', 'UP', 'TO', 'THE', 'FURTHEST', 'LIMIT', 'BEYOND', 'WHICH', 'IT', 'CANNOT', 'GO', 'WITHOUT', 'KILLING', 'THE', 'GOOSE', 'WITH', 'THE', 'GOLDEN', 'AXE'] +4350-9170-0021-1403: ref=['THE', 'MOST', 'CONVINCING', 'EXAMPLE', 'OF', 'THIS', 'IS', 'TO', 'BE', 'FOUND', 'IN', 'THE', 'CONDITION', 'OF', 'THE', 'WORKING', 'CLASSES', 'OF', 'OUR', 'EPOCH', 'WHO', 'ARE', 'IN', 'REALITY', 'NO', 'BETTER', 'THAN', 'THE', 'SLAVES', 'OF', 'ANCIENT', 'TIMES', 'SUBDUED', 'BY', 'CONQUEST'] +4350-9170-0021-1403: hyp=['THE', 'MOST', 'CONVINCING', 'EXAMPLE', 'OF', 'THIS', 'IS', 'TO', 'BE', 'FOUND', 'IN', 'THE', 'CONDITION', 'OF', 'THE', 'WORKING', 'CLASSES', 'OF', 'OUR', 'EPOCH', 'WHO', 'ARE', 'IN', 'REALITY', 'NO', 'BETTER', 'THAN', 'THE', 'SLAVES', 'OF', 'ANCIENT', 'TIMES', 'SUBDUED', 'BY', 'CONQUEST'] +4350-9170-0022-1404: ref=['SO', 'IT', 'HAS', 'ALWAYS', 'BEEN'] +4350-9170-0022-1404: hyp=['SO', 'IT', 'HAS', 'ALWAYS', 'BEEN'] +4350-9170-0023-1405: ref=['FOOTNOTE', 'THE', 'FACT', 'THAT', 'IN', 'AMERICA', 'THE', 'ABUSES', 'OF', 'AUTHORITY', 'EXIST', 'IN', 'SPITE', 'OF', 'THE', 'SMALL', 'NUMBER', 'OF', 'THEIR', 'TROOPS', 'NOT', 'ONLY', 'FAILS', 'TO', 'DISPROVE', 'THIS', 'POSITION', 'BUT', 'POSITIVELY', 'CONFIRMS', 'IT'] +4350-9170-0023-1405: hyp=['FOOTNOTE', 'THE', 'FACT', 'THAT', 'IN', 'AMERICA', 'THE', 'ABUSES', 'OF', 'AUTHORITY', 'EXIST', 'IN', 'SPITE', 'OF', 'THE', 'SMALL', 'NUMBER', 'OF', 'THEIR', 'TROOPS', 'NOT', 'ONLY', 'FAILS', 'TO', 'DISPROVE', 'THIS', 'POSITION', 'BUT', 'POSITIVELY', 'CONFIRMS', 'IT'] +4350-9170-0024-1406: ref=['THE', 'UPPER', 'CLASSES', 'KNOW', 'THAT', 'AN', 'ARMY', 'OF', 'FIFTY', 'THOUSAND', 'WILL', 'SOON', 'BE', 'INSUFFICIENT', 'AND', 'NO', 'LONGER', 'RELYING', 'ON', "PINKERTON'S", 'MEN', 'THEY', 'FEEL', 'THAT', 'THE', 'SECURITY', 'OF', 'THEIR', 'POSITION', 'DEPENDS', 'ON', 'THE', 'INCREASED', 'STRENGTH', 'OF', 'THE', 'ARMY'] +4350-9170-0024-1406: hyp=['THE', 'UPPER', 'CLASSES', 'KNOW', 'THAT', 'AN', 'ARMY', 'OF', 'FIFTY', 'THOUSAND', 'WILL', 'SOON', 'BE', 'INSUFFICIENT', 'AND', 'NO', 'LONGER', 'RELYING', 'ON', "PINKERTON'S", 'MEN', 'THEY', 'FEEL', 'THAT', 'THE', 'SECURITY', 'OF', 'THEIR', 'POSITION', 'DEPENDS', 'ON', 'THE', 'INCREASED', 'STRENGTH', 'OF', 'THE', 'ARMY'] +4350-9170-0025-1407: ref=['THE', 'REASON', 'TO', 'WHICH', 'HE', 'GAVE', 'EXPRESSION', 'IS', 'ESSENTIALLY', 'THE', 'SAME', 'AS', 'THAT', 'WHICH', 'MADE', 'THE', 'FRENCH', 'KINGS', 'AND', 'THE', 'POPES', 'ENGAGE', 'SWISS', 'AND', 'SCOTCH', 'GUARDS', 'AND', 'MAKES', 'THE', 'RUSSIAN', 'AUTHORITIES', 'OF', 'TO', 'DAY', 'SO', 'CAREFULLY', 'DISTRIBUTE', 'THE', 'RECRUITS', 'SO', 'THAT', 'THE', 'REGIMENTS', 'FROM', 'THE', 'FRONTIERS', 'ARE', 'STATIONED', 'IN', 'CENTRAL', 'DISTRICTS', 'AND', 'THE', 'REGIMENTS', 'FROM', 'THE', 'CENTER', 'ARE', 'STATIONED', 'ON', 'THE', 'FRONTIERS'] +4350-9170-0025-1407: hyp=['THE', 'REASON', 'TO', 'WHICH', 'HE', 'GAVE', 'EXPRESSION', 'IS', 'ESSENTIALLY', 'THE', 'SAME', 'AS', 'THAT', 'WHICH', 'MADE', 'THE', 'FRENCH', 'KINGS', 'AND', 'THE', 'POPES', 'ENGAGE', 'SWISS', 'AND', 'SCOTCH', 'GUARDS', 'AND', 'MAKES', 'THE', 'RUSSIAN', 'AUTHORITIES', 'OF', 'TO', 'DAY', 'SO', 'CAREFULLY', 'DISTRIBUTE', 'THE', 'RECRUITS', 'SO', 'THAT', 'THE', 'REGIMENTS', 'FROM', 'THE', 'FRONTIERS', 'ARE', 'STATIONED', 'IN', 'CENTRAL', 'DISTRICTS', 'AND', 'THE', 'REGIMENTS', 'FROM', 'THE', 'CENTER', 'ARE', 'STATIONED', 'ON', 'THE', 'FRONTIERS'] +4350-9170-0026-1408: ref=['THE', 'MEANING', 'OF', "CAPRIVI'S", 'SPEECH', 'PUT', 'INTO', 'PLAIN', 'LANGUAGE', 'IS', 'THAT', 'FUNDS', 'ARE', 'NEEDED', 'NOT', 'TO', 'RESIST', 'FOREIGN', 'FOES', 'BUT', 'TO', 'BUY', 'UNDER', 'OFFICERS', 'TO', 'BE', 'READY', 'TO', 'ACT', 'AGAINST', 'THE', 'ENSLAVED', 'TOILING', 'MASSES'] +4350-9170-0026-1408: hyp=['THE', 'MEANING', 'OF', 'CAPRIVI', 'SPEECH', 'PUT', 'INTO', 'PLAIN', 'LANGUAGE', 'IS', 'THAT', 'FUNDS', 'ARE', 'NEEDED', 'NOT', 'TO', 'RESIST', 'FOREIGN', 'FOES', 'BUT', 'TO', 'BUY', 'UNDER', 'OFFICERS', 'TO', 'BE', 'READY', 'TO', 'ACT', 'AGAINST', 'THE', 'ENSLAVE', 'TOILING', 'MASSES'] +4350-9170-0027-1409: ref=['AND', 'THIS', 'ABNORMAL', 'ORDER', 'OF', 'THINGS', 'IS', 'MAINTAINED', 'BY', 'THE', 'ARMY'] +4350-9170-0027-1409: hyp=['AND', 'THIS', 'ABNORMAL', 'ORDER', 'OF', 'THINGS', 'IS', 'MAINTAINED', 'BY', 'THE', 'ARMY'] +4350-9170-0028-1410: ref=['BUT', 'THERE', 'IS', 'NOT', 'ONLY', 'ONE', 'GOVERNMENT', 'THERE', 'ARE', 'OTHER', 'GOVERNMENTS', 'EXPLOITING', 'THEIR', 'SUBJECTS', 'BY', 'VIOLENCE', 'IN', 'THE', 'SAME', 'WAY', 'AND', 'ALWAYS', 'READY', 'TO', 'POUNCE', 'DOWN', 'ON', 'ANY', 'OTHER', 'GOVERNMENT', 'AND', 'CARRY', 'OFF', 'THE', 'FRUITS', 'OF', 'THE', 'TOIL', 'OF', 'ITS', 'ENSLAVED', 'SUBJECTS'] +4350-9170-0028-1410: hyp=['BUT', 'THERE', 'IS', 'NOT', 'ONLY', 'ONE', 'GOVERNMENT', 'THERE', 'ARE', 'OTHER', 'GOVERNMENTS', 'EXPLOITING', 'THEIR', 'SUBJECTS', 'BY', 'VIOLENCE', 'IN', 'THE', 'SAME', 'WAY', 'AND', 'ARE', 'ALWAYS', 'READY', 'TO', 'POUNCE', 'DOWN', 'ON', 'ANY', 'OTHER', 'GOVERNMENT', 'AND', 'CARRY', 'OFF', 'THE', 'FRUITS', 'OF', 'THE', 'TOIL', 'OF', 'ITS', 'ENSLAVED', 'SUBJECTS'] +4350-9170-0029-1411: ref=['AND', 'SO', 'EVERY', 'GOVERNMENT', 'NEEDS', 'AN', 'ARMY', 'ALSO', 'TO', 'PROTECT', 'ITS', 'BOOTY', 'FROM', 'ITS', 'NEIGHBOR', 'BRIGANDS'] +4350-9170-0029-1411: hyp=['AND', 'SO', 'EVERY', 'GOVERNMENT', 'NEEDS', 'AN', 'ARMY', 'ALSO', 'TO', 'PROTECT', 'ITS', 'BOOTY', 'FROM', 'ITS', 'NEIGHBOUR', 'BRIGANDS'] +4350-9170-0030-1412: ref=['THIS', 'INCREASE', 'IS', 'CONTAGIOUS', 'AS', 'MONTESQUIEU', 'POINTED', 'OUT', 'ONE', 'HUNDRED', 'FIFTY', 'YEARS', 'AGO'] +4350-9170-0030-1412: hyp=['THIS', 'INCREASE', 'IS', 'CONTAGIOUS', 'AS', 'MONTESQUIEU', 'POINTED', 'OUT', 'A', 'HUNDRED', 'FIFTY', 'YEARS', 'AGO'] +4350-9170-0031-1413: ref=['EVERY', 'INCREASE', 'IN', 'THE', 'ARMY', 'OF', 'ONE', 'STATE', 'WITH', 'THE', 'AIM', 'OF', 'SELF', 'DEFENSE', 'AGAINST', 'ITS', 'SUBJECTS', 'BECOMES', 'A', 'SOURCE', 'OF', 'DANGER', 'FOR', 'NEIGHBORING', 'STATES', 'AND', 'CALLS', 'FOR', 'A', 'SIMILAR', 'INCREASE', 'IN', 'THEIR', 'ARMIES'] +4350-9170-0031-1413: hyp=['EVERY', 'INCREASE', 'IN', 'THE', 'ARMY', 'OF', 'ONE', 'STATE', 'WITH', 'THE', 'AIM', 'OF', 'SELF', 'DEFENSE', 'AGAINST', 'ITS', 'SUBJECTS', 'BECOMES', 'A', 'SOURCE', 'OF', 'DANGER', 'FOR', 'NEIGHBORING', 'STATES', 'AND', 'CALLS', 'FOR', 'A', 'SIMILAR', 'INCREASE', 'IN', 'THEIR', 'ARMIES'] +4350-9170-0032-1414: ref=['THE', 'DESPOTISM', 'OF', 'A', 'GOVERNMENT', 'ALWAYS', 'INCREASES', 'WITH', 'THE', 'STRENGTH', 'OF', 'THE', 'ARMY', 'AND', 'ITS', 'EXTERNAL', 'SUCCESSES', 'AND', 'THE', 'AGGRESSIVENESS', 'OF', 'A', 'GOVERNMENT', 'INCREASES', 'WITH', 'ITS', 'INTERNAL', 'DESPOTISM'] +4350-9170-0032-1414: hyp=['THE', 'DESPOTISM', 'OF', 'THE', 'GOVERNMENT', 'ALWAYS', 'INCREASES', 'WITH', 'THE', 'STRENGTH', 'OF', 'THE', 'ARMY', 'AND', 'ITS', 'EXTERNAL', 'SUCCESSES', 'AND', 'THE', 'AGGRESSIVENESS', 'OF', 'THE', 'GOVERNMENT', 'INCREASES', 'WITH', 'ITS', 'INTERNAL', 'DESPOTISM'] +4350-9170-0033-1415: ref=['THE', 'RIVALRY', 'OF', 'THE', 'EUROPEAN', 'STATES', 'IN', 'CONSTANTLY', 'INCREASING', 'THEIR', 'FORCES', 'HAS', 'REDUCED', 'THEM', 'TO', 'THE', 'NECESSITY', 'OF', 'HAVING', 'RECOURSE', 'TO', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'SINCE', 'BY', 'THAT', 'MEANS', 'THE', 'GREATEST', 'POSSIBLE', 'NUMBER', 'OF', 'SOLDIERS', 'IS', 'OBTAINED', 'AT', 'THE', 'LEAST', 'POSSIBLE', 'EXPENSE'] +4350-9170-0033-1415: hyp=['THE', 'RIVALRY', 'OF', 'THE', 'EUROPEAN', 'STATES', 'IN', 'CONSTANTLY', 'INCREASING', 'THEIR', 'FORCES', 'HAS', 'REDUCED', 'THEM', 'TO', 'THE', 'NECESSITY', 'OF', 'HAVING', 'RECOURSE', 'TO', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'SINCE', 'BY', 'THAT', 'MEANS', 'THE', 'GREATEST', 'POSSIBLE', 'NUMBER', 'OF', 'SOLDIERS', 'IS', 'OBTAINED', 'AT', 'THE', 'LEAST', 'POSSIBLE', 'EXPENSE'] +4350-9170-0034-1416: ref=['AND', 'BY', 'THIS', 'MEANS', 'ALL', 'CITIZENS', 'ARE', 'UNDER', 'ARMS', 'TO', 'SUPPORT', 'THE', 'INIQUITIES', 'PRACTICED', 'UPON', 'THEM', 'ALL', 'CITIZENS', 'HAVE', 'BECOME', 'THEIR', 'OWN', 'OPPRESSORS'] +4350-9170-0034-1416: hyp=['AND', 'BY', 'THIS', 'MEANS', 'ALL', 'CITIZENS', 'ARE', 'UNDER', 'ARMS', 'TO', 'SUPPORT', 'THE', 'INIQUITIES', 'PRACTISED', 'UPON', 'THEM', 'ALL', 'CITIZENS', 'HAVE', 'BECOME', 'THEIR', 'OWN', 'OPPRESSORS'] +4350-9170-0035-1417: ref=['THIS', 'INCONSISTENCY', 'HAS', 'BECOME', 'OBVIOUS', 'IN', 'UNIVERSAL', 'MILITARY', 'SERVICE'] +4350-9170-0035-1417: hyp=['THIS', 'INCONSISTENCY', 'HAS', 'BECOME', 'OBVIOUS', 'IN', 'UNIVERSAL', 'MILITARY', 'SERVICE'] +4350-9170-0036-1418: ref=['IN', 'FACT', 'THE', 'WHOLE', 'SIGNIFICANCE', 'OF', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'CONSISTS', 'IN', "MAN'S", 'RECOGNITION', 'OF', 'THE', 'BARBARITY', 'OF', 'STRIFE', 'BETWEEN', 'INDIVIDUALS', 'AND', 'THE', 'TRANSITORINESS', 'OF', 'PERSONAL', 'LIFE', 'ITSELF', 'AND', 'THE', 'TRANSFERENCE', 'OF', 'THE', 'AIM', 'OF', 'LIFE', 'TO', 'GROUPS', 'OF', 'PERSONS'] +4350-9170-0036-1418: hyp=['IN', 'FACT', 'THE', 'WHOLE', 'SIGNIFICANCE', 'OF', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'CONSISTS', 'IN', "MAN'S", 'RECOGNITION', 'OF', 'THE', 'BARBARITY', 'OF', 'STRIFE', 'BETWEEN', 'INDIVIDUALS', 'AND', 'THE', 'TRANSITORINESS', 'OF', 'PERSONAL', 'LIFE', 'ITSELF', 'AND', 'THE', 'TRANSFERENCE', 'OF', 'THE', 'AIM', 'OF', 'LIFE', 'TO', 'GROUPS', 'OF', 'PERSONS'] +4350-9170-0037-1419: ref=['BUT', 'WITH', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'IT', 'COMES', 'TO', 'PASS', 'THAT', 'MEN', 'AFTER', 'MAKING', 'EVERY', 'SACRIFICE', 'TO', 'GET', 'RID', 'OF', 'THE', 'CRUELTY', 'OF', 'STRIFE', 'AND', 'THE', 'INSECURITY', 'OF', 'EXISTENCE', 'ARE', 'CALLED', 'UPON', 'TO', 'FACE', 'ALL', 'THE', 'PERILS', 'THEY', 'HAD', 'MEANT', 'TO', 'AVOID'] +4350-9170-0037-1419: hyp=['BUT', 'WITH', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'IT', 'COMES', 'TO', 'PASS', 'THAT', 'MEN', 'AFTER', 'MAKING', 'EVERY', 'SACRIFICE', 'TO', 'GET', 'RID', 'OF', 'THE', 'CRUELTY', 'OF', 'STRIFE', 'AND', 'THE', 'INSECURITY', 'OF', 'EXISTENCE', 'ARE', 'CALLED', 'UPON', 'TO', 'FACE', 'ALL', 'THE', 'PERILS', 'THEY', 'HAD', 'MEANT', 'TO', 'AVOID'] +4350-9170-0038-1420: ref=['BUT', 'INSTEAD', 'OF', 'DOING', 'THAT', 'THEY', 'EXPOSE', 'THE', 'INDIVIDUALS', 'TO', 'THE', 'SAME', 'NECESSITY', 'OF', 'STRIFE', 'SUBSTITUTING', 'STRIFE', 'WITH', 'INDIVIDUALS', 'OF', 'OTHER', 'STATES', 'FOR', 'STRIFE', 'WITH', 'NEIGHBORS'] +4350-9170-0038-1420: hyp=['BUT', 'INSTEAD', 'OF', 'DOING', 'THAT', 'THEY', 'EXPOSE', 'THE', 'INDIVIDUALS', 'TO', 'THE', 'SAME', 'NECESSITY', 'OF', 'STRIFE', 'SUBSTITUTING', 'STRIFE', 'WITH', 'INDIVIDUALS', 'OF', 'OTHER', 'STATES', 'FOR', 'STRIFE', 'WITH', 'NEIGHBORS'] +4350-9170-0039-1421: ref=['THE', 'TAXES', 'RAISED', 'FROM', 'THE', 'PEOPLE', 'FOR', 'WAR', 'PREPARATIONS', 'ABSORB', 'THE', 'GREATER', 'PART', 'OF', 'THE', 'PRODUCE', 'OF', 'LABOR', 'WHICH', 'THE', 'ARMY', 'OUGHT', 'TO', 'DEFEND'] +4350-9170-0039-1421: hyp=['THE', 'TAXES', 'RAISED', 'FROM', 'THE', 'PEOPLE', 'FOR', 'WAR', 'PREPARATIONS', 'ABSORB', 'THE', 'GREATER', 'PART', 'OF', 'THE', 'PRODUCE', 'OF', 'LABOR', 'WHICH', 'THE', 'ARMY', 'OUGHT', 'TO', 'DEFEND'] +4350-9170-0040-1422: ref=['THE', 'DANGER', 'OF', 'WAR', 'EVER', 'READY', 'TO', 'BREAK', 'OUT', 'RENDERS', 'ALL', 'REFORMS', 'OF', 'LIFE', 'SOCIAL', 'LIFE', 'VAIN', 'AND', 'FRUITLESS'] +4350-9170-0040-1422: hyp=['THE', 'DANGER', 'OF', 'WAR', 'EVER', 'READY', 'TO', 'BREAK', 'OUT', 'RENDERS', 'ALL', 'REFORMS', 'OF', "LIFE'S", 'SOCIAL', 'LIFE', 'VAIN', 'AND', 'FRUITLESS'] +4350-9170-0041-1423: ref=['BUT', 'THE', 'FATAL', 'SIGNIFICANCE', 'OF', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'AS', 'THE', 'MANIFESTATION', 'OF', 'THE', 'CONTRADICTION', 'INHERENT', 'IN', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'IS', 'NOT', 'ONLY', 'APPARENT', 'IN', 'THAT'] +4350-9170-0041-1423: hyp=['BUT', 'THE', 'FATAL', 'SIGNIFICANCE', 'OF', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'AS', 'THE', 'MANIFESTATION', 'OF', 'THE', 'CONTRADICTION', 'INHERENT', 'IN', 'THE', 'SOCIAL', 'CONCEPTION', 'OF', 'LIFE', 'IS', 'NOT', 'ONLY', 'APPARENT', 'IN', 'THAT'] +4350-9170-0042-1424: ref=['GOVERNMENTS', 'ASSERT', 'THAT', 'ARMIES', 'ARE', 'NEEDED', 'ABOVE', 'ALL', 'FOR', 'EXTERNAL', 'DEFENSE', 'BUT', 'THAT', 'IS', 'NOT', 'TRUE'] +4350-9170-0042-1424: hyp=['GOVERNMENTS', 'ASSERT', 'THAT', 'ARMIES', 'ARE', 'NEEDED', 'ABOVE', 'ALL', 'FOR', 'EXTERNAL', 'DEFENSE', 'BUT', 'THAT', 'IS', 'NOT', 'TRUE'] +4350-9170-0043-1425: ref=['THEY', 'ARE', 'NEEDED', 'PRINCIPALLY', 'AGAINST', 'THEIR', 'SUBJECTS', 'AND', 'EVERY', 'MAN', 'UNDER', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'BECOMES', 'AN', 'ACCOMPLICE', 'IN', 'ALL', 'THE', 'ACTS', 'OF', 'VIOLENCE', 'OF', 'THE', 'GOVERNMENT', 'AGAINST', 'THE', 'CITIZENS', 'WITHOUT', 'ANY', 'CHOICE', 'OF', 'HIS', 'OWN'] +4350-9170-0043-1425: hyp=['THEY', 'ARE', 'NEEDED', 'PRINCIPALLY', 'AGAINST', 'THEIR', 'SUBJECTS', 'AND', 'EVERY', 'MAN', 'UNDER', 'UNIVERSAL', 'MILITARY', 'SERVICE', 'BECOMES', 'AN', 'ACCOMPLICE', 'IN', 'ALL', 'THE', 'ACTS', 'OF', 'VIOLENCE', 'OF', 'THE', 'GOVERNMENT', 'AGAINST', 'THE', 'CITIZENS', 'WITHOUT', 'ANY', 'CHOICE', 'OF', 'HIS', 'OWN'] +4350-9170-0044-1426: ref=['AND', 'FOR', 'THE', 'SAKE', 'OF', 'WHAT', 'AM', 'I', 'MAKING', 'THEM'] +4350-9170-0044-1426: hyp=['AND', 'FOR', 'THE', 'SAKE', 'OF', 'WHAT', 'AM', 'I', 'MAKING', 'THEM'] +4350-9170-0045-1427: ref=['I', 'AM', 'EXPECTED', 'FOR', 'THE', 'SAKE', 'OF', 'THE', 'STATE', 'TO', 'MAKE', 'THESE', 'SACRIFICES', 'TO', 'RENOUNCE', 'EVERYTHING', 'THAT', 'CAN', 'BE', 'PRECIOUS', 'TO', 'MAN', 'PEACE', 'FAMILY', 'SECURITY', 'AND', 'HUMAN', 'DIGNITY'] +4350-9170-0045-1427: hyp=['I', 'AM', 'EXPECTED', 'FOR', 'THE', 'SAKE', 'OF', 'THE', 'STATE', 'TO', 'MAKE', 'THESE', 'SACRIFICES', 'TO', 'RENOUNCE', 'EVERYTHING', 'THAT', 'CAN', 'BE', 'PRECIOUS', 'TO', 'MAN', 'PEACE', 'FAMILY', 'SECURITY', 'AND', 'HUMAN', 'DIGNITY'] +4350-9170-0046-1428: ref=['EXCEPT', 'FOR', 'THE', 'STATE', 'THEY', 'SAY', 'WE', 'SHOULD', 'BE', 'EXPOSED', 'TO', 'THE', 'ATTACKS', 'OF', 'EVIL', 'DISPOSED', 'PERSONS', 'IN', 'OUR', 'OWN', 'COUNTRY'] +4350-9170-0046-1428: hyp=['EXCEPT', 'FOR', 'THE', 'STATE', 'THEY', 'SAY', 'WE', 'SHOULD', 'BE', 'EXPOSED', 'TO', 'THE', 'ATTACKS', 'OF', 'EVIL', 'DISPOSED', 'PERSONS', 'IN', 'OUR', 'OWN', 'COUNTRY'] +4350-9170-0047-1429: ref=['WE', 'KNOW', 'NOW', 'THAT', 'THREATS', 'AND', 'PUNISHMENTS', 'CANNOT', 'DIMINISH', 'THEIR', 'NUMBER', 'THAT', 'THAT', 'CAN', 'ONLY', 'BE', 'DONE', 'BY', 'CHANGE', 'OF', 'ENVIRONMENT', 'AND', 'MORAL', 'INFLUENCE'] +4350-9170-0047-1429: hyp=['WE', 'NOW', 'KNOW', 'THAT', 'THREATS', 'AND', 'PUNISHMENTS', 'CANNOT', 'DIMINISH', 'THEIR', 'NUMBER', 'THAT', 'THAT', 'CAN', 'ONLY', 'BE', 'DONE', 'BY', 'CHANGE', 'OF', 'ENVIRONMENT', 'AND', 'MORAL', 'INFLUENCE'] +4350-9170-0048-1430: ref=['SO', 'THAT', 'THE', 'JUSTIFICATION', 'OF', 'STATE', 'VIOLENCE', 'ON', 'THE', 'GROUND', 'OF', 'THE', 'PROTECTION', 'IT', 'GIVES', 'US', 'FROM', 'EVIL', 'DISPOSED', 'PERSONS', 'EVEN', 'IF', 'IT', 'HAD', 'SOME', 'FOUNDATION', 'THREE', 'OR', 'FOUR', 'CENTURIES', 'AGO', 'HAS', 'NONE', 'WHATEVER', 'NOW'] +4350-9170-0048-1430: hyp=['SO', 'THAT', 'THE', 'JUSTIFICATION', 'OF', 'STATE', 'VIOLENCE', 'ON', 'THE', 'GROUND', 'OF', 'THE', 'PROTECTION', 'IT', 'GIVES', 'US', 'FROM', 'EVIL', 'DISPOSED', 'PERSONS', 'EVEN', 'IF', 'IT', 'HAD', 'SOME', 'FOUNDATION', 'THREE', 'OR', 'FOUR', 'CENTURIES', 'AGO', 'HAS', 'NONE', 'WHATEVER', 'KNOWN'] +4350-9170-0049-1431: ref=['EXCEPT', 'FOR', 'THE', 'STATE', 'THEY', 'TELL', 'US', 'WE', 'SHOULD', 'NOT', 'HAVE', 'ANY', 'RELIGION', 'EDUCATION', 'CULTURE', 'MEANS', 'OF', 'COMMUNICATION', 'AND', 'SO', 'ON'] +4350-9170-0049-1431: hyp=['EXCEPT', 'FOR', 'THE', 'STATE', 'THEY', 'TELL', 'US', 'WE', 'SHOULD', 'NOT', 'HAVE', 'ANY', 'RELIGION', 'EDUCATION', 'CULTURE', 'MEANS', 'OF', 'COMMUNICATION', 'AND', 'SO', 'ON'] +4350-9170-0050-1432: ref=['WITHOUT', 'THE', 'STATE', 'MEN', 'WOULD', 'NOT', 'HAVE', 'BEEN', 'ABLE', 'TO', 'FORM', 'THE', 'SOCIAL', 'INSTITUTIONS', 'NEEDED', 'FOR', 'DOING', 'ANY', 'THING'] +4350-9170-0050-1432: hyp=['WITHOUT', 'THE', 'STATE', 'MEN', 'WOULD', 'NOT', 'HAVE', 'BEEN', 'ABLE', 'TO', 'FORM', 'THE', 'SOCIAL', 'INSTITUTIONS', 'NEEDED', 'FOR', 'DOING', 'ANYTHING'] +4350-9170-0051-1433: ref=['THIS', 'ARGUMENT', 'TOO', 'WAS', 'WELL', 'FOUNDED', 'ONLY', 'SOME', 'CENTURIES', 'AGO'] +4350-9170-0051-1433: hyp=['THIS', 'ARGUMENT', 'TOO', 'WAS', 'WELL', 'FOUNDED', 'ONLY', 'SOME', 'CENTURIES', 'AGO'] +4350-9170-0052-1434: ref=['THE', 'GREAT', 'EXTENSION', 'OF', 'MEANS', 'OF', 'COMMUNICATION', 'AND', 'INTERCHANGE', 'OF', 'IDEAS', 'HAS', 'MADE', 'MEN', 'COMPLETELY', 'ABLE', 'TO', 'DISPENSE', 'WITH', 'STATE', 'AID', 'IN', 'FORMING', 'SOCIETIES', 'ASSOCIATIONS', 'CORPORATIONS', 'AND', 'CONGRESSES', 'FOR', 'SCIENTIFIC', 'ECONOMIC', 'AND', 'POLITICAL', 'OBJECTS'] +4350-9170-0052-1434: hyp=['THE', 'GREAT', 'EXTENSION', 'OF', 'MEANS', 'OF', 'COMMUNICATION', 'AND', 'INTERCHANGE', 'OF', 'IDEAS', 'HAS', 'MADE', 'MEN', 'COMPLETELY', 'ABLE', 'TO', 'DISPENSE', 'WITH', 'STATE', 'AID', 'IN', 'FORMING', 'SOCIETIES', 'ASSOCIATIONS', 'CORPORATIONS', 'AND', 'CONGRESSES', 'FOR', 'SCIENTIFIC', 'ECONOMIC', 'AND', 'POLITICAL', 'OBJECTS'] +4350-9170-0053-1435: ref=['WITHOUT', 'GOVERNMENTS', 'NATIONS', 'WOULD', 'BE', 'ENSLAVED', 'BY', 'THEIR', 'NEIGHBORS'] +4350-9170-0053-1435: hyp=['WITHOUT', 'GOVERNMENTS', 'NATIONS', 'WOULD', 'BE', 'ENSLAVED', 'BY', 'THEIR', 'NEIGHBORS'] +4350-9170-0054-1436: ref=['THE', 'GOVERNMENT', 'THEY', 'TELL', 'US', 'WITH', 'ITS', 'ARMY', 'IS', 'NECESSARY', 'TO', 'DEFEND', 'US', 'FROM', 'NEIGHBORING', 'STATES', 'WHO', 'MIGHT', 'ENSLAVE', 'US'] +4350-9170-0054-1436: hyp=['THE', 'GOVERNMENT', 'THEY', 'TELL', 'US', 'WITH', 'ITS', 'ARMY', 'IS', 'NECESSARY', 'TO', 'DEFEND', 'US', 'FROM', 'NEIGHBORING', 'STATES', 'WHO', 'MIGHT', 'ENSLAVE', 'US'] +4350-9170-0055-1437: ref=['AND', 'IF', 'DEFENSE', 'AGAINST', 'BARBAROUS', 'NATIONS', 'IS', 'MEANT', 'ONE', 'THOUSANDTH', 'PART', 'OF', 'THE', 'TROOPS', 'NOW', 'UNDER', 'ARMS', 'WOULD', 'BE', 'AMPLY', 'SUFFICIENT', 'FOR', 'THAT', 'PURPOSE'] +4350-9170-0055-1437: hyp=['AND', 'IF', 'DEFENCE', 'AGAINST', 'BARBAROUS', 'NATIONS', 'IS', 'MEANT', 'ONE', 'THOUSANDTH', 'PART', 'OF', 'THE', 'TROOPS', 'NOW', 'UNDER', 'ARMS', 'WOULD', 'BE', 'AMPLY', 'SUFFICIENT', 'FOR', 'THAT', 'PURPOSE'] +4350-9170-0056-1438: ref=['THE', 'POWER', 'OF', 'THE', 'STATE', 'FAR', 'FROM', 'BEING', 'A', 'SECURITY', 'AGAINST', 'THE', 'ATTACKS', 'OF', 'OUR', 'NEIGHBORS', 'EXPOSES', 'US', 'ON', 'THE', 'CONTRARY', 'TO', 'MUCH', 'GREATER', 'DANGER', 'OF', 'SUCH', 'ATTACKS'] +4350-9170-0056-1438: hyp=['THE', 'POWER', 'OF', 'THE', 'STATE', 'FAR', 'FROM', 'BEING', 'A', 'SECURITY', 'AGAINST', 'THE', 'ATTACKS', 'OF', 'OUR', 'NEIGHBORS', 'EXPOSES', 'US', 'ON', 'THE', 'CONTRARY', 'TO', 'MUCH', 'GREATER', 'DANGER', 'OF', 'SUCH', 'ATTACKS'] +4350-9170-0057-1439: ref=['EVEN', 'LOOKING', 'AT', 'IT', 'PRACTICALLY', 'WEIGHING', 'THAT', 'IS', 'TO', 'SAY', 'ALL', 'THE', 'BURDENS', 'LAID', 'ON', 'HIM', 'BY', 'THE', 'STATE', 'NO', 'MAN', 'CAN', 'FAIL', 'TO', 'SEE', 'THAT', 'FOR', 'HIM', 'PERSONALLY', 'TO', 'COMPLY', 'WITH', 'STATE', 'DEMANDS', 'AND', 'SERVE', 'IN', 'THE', 'ARMY', 'WOULD', 'IN', 'THE', 'MAJORITY', 'OF', 'CASES', 'BE', 'MORE', 'DISADVANTAGEOUS', 'THAN', 'TO', 'REFUSE', 'TO', 'DO', 'SO'] +4350-9170-0057-1439: hyp=['EVEN', 'LOOKING', 'AT', 'IT', 'PRACTICALLY', 'WEIGHING', 'THAT', 'IS', 'TO', 'SAY', 'ALL', 'THE', 'BURDENS', 'LAID', 'ON', 'HIM', 'BY', 'THE', 'STATES', 'NO', 'MAN', 'CAN', 'FAIL', 'TO', 'SEE', 'THAT', 'FOR', 'HIM', 'PERSONALLY', 'TO', 'COMPLY', 'WITH', 'THE', 'STATE', 'DEMANDS', 'AND', 'SERVE', 'IN', 'THE', 'ARMY', 'WOULD', 'IN', 'THE', 'MAJORITY', 'OF', 'CASES', 'BE', 'MORE', 'DISADVANTAGEOUS', 'THAN', 'TO', 'REFUSE', 'TO', 'DO', 'SO'] +4350-9170-0058-1440: ref=['TO', 'RESIST', 'WOULD', 'NEED', 'INDEPENDENT', 'THOUGHT', 'AND', 'EFFORT', 'OF', 'WHICH', 'EVERY', 'MAN', 'IS', 'NOT', 'CAPABLE'] +4350-9170-0058-1440: hyp=['TO', 'RESIST', 'WOULD', 'NEED', 'INDEPENDENT', 'THOUGHT', 'AND', 'EFFORT', 'OF', 'WHICH', 'EVERY', 'MAN', 'IS', 'NOT', 'CAPABLE'] +4350-9170-0059-1441: ref=['SO', 'MUCH', 'FOR', 'THE', 'ADVANTAGES', 'AND', 'DISADVANTAGES', 'OF', 'BOTH', 'LINES', 'OF', 'CONDUCT', 'FOR', 'A', 'MAN', 'OF', 'THE', 'WEALTHY', 'CLASSES', 'AN', 'OPPRESSOR'] +4350-9170-0059-1441: hyp=['SO', 'MUCH', 'FOR', 'THE', 'ADVANTAGES', 'AND', 'DISADVANTAGES', 'OF', 'BOTH', 'LINES', 'OF', 'CONDUCT', 'FOR', 'A', 'MAN', 'OF', 'THE', 'WEALTHY', 'CLASS', 'AND', 'OPPRESSOR'] +4350-9170-0060-1442: ref=['FOR', 'A', 'MAN', 'OF', 'THE', 'POOR', 'WORKING', 'CLASS', 'THE', 'ADVANTAGES', 'AND', 'DISADVANTAGES', 'WILL', 'BE', 'THE', 'SAME', 'BUT', 'WITH', 'A', 'GREAT', 'INCREASE', 'OF', 'DISADVANTAGES'] +4350-9170-0060-1442: hyp=['FOR', 'A', 'MAN', 'OF', 'THE', 'POORER', 'WORKING', 'CLASS', 'THE', 'ADVANTAGES', 'AND', 'DISADVANTAGES', 'WILL', 'BE', 'THE', 'SAME', 'BUT', 'WITH', 'A', 'GREAT', 'INCREASE', 'OF', 'DISADVANTAGES'] +4852-28311-0000-1443: ref=['SAY', 'YOU', 'KNOW', 'SUMTHIN'] +4852-28311-0000-1443: hyp=['SAY', 'YOU', 'KNOW', 'SUPPER'] +4852-28311-0001-1444: ref=['CHRIS', 'LOOKED', 'FROM', 'A', 'NICKEL', 'PLATED', 'FLASHLIGHT', 'TO', 'A', 'CAR', 'JACK', 'AND', 'SPARK', 'PLUG'] +4852-28311-0001-1444: hyp=['CHRIS', 'LOOKED', 'FROM', 'A', 'NICKEL', 'PLATED', 'FLASHLIGHT', 'TO', 'A', 'CAR', 'JACK', 'AND', 'SPARK', 'PLUG'] +4852-28311-0002-1445: ref=['KNOW', 'WHO', 'NEEDS', 'A', 'JOB', 'BAD', "THAT'S", 'JAKEY', 'HARRIS'] +4852-28311-0002-1445: hyp=['KNOW', 'WHO', 'NEEDS', 'A', 'JOB', 'BAN', "THAT'S", 'JIKI', 'HARRIS'] +4852-28311-0003-1446: ref=['O', 'K', 'HE', 'SAID'] +4852-28311-0003-1446: hyp=['O', 'K', 'HE', 'SAID'] +4852-28311-0004-1447: ref=['ONLY', 'WHY', "DIDN'T", 'YOU', 'ASK', 'HIM', 'YOURSELF'] +4852-28311-0004-1447: hyp=['ONLY', 'WHY', "DIDN'T", 'YOU', 'ASK', 'HIM', 'YOURSELF'] +4852-28311-0005-1448: ref=['MIKE', 'BECAME', 'UNEASY', 'AND', 'FISHED', 'AN', 'ELASTIC', 'BAND', 'OUT', 'OF', 'HIS', 'POCKET', 'MADE', 'A', 'FLICK', 'OF', 'PAPER', 'AND', 'SENT', 'IT', 'SOARING', 'OUT', 'INTO', 'M', 'STREET'] +4852-28311-0005-1448: hyp=['MIKE', 'BECAME', 'UNEASY', 'AND', 'FISHED', 'AN', 'ELASTIC', 'BAND', 'OUT', 'OF', 'HIS', 'POCKET', 'MADE', 'A', 'FLICK', 'OF', 'PAPER', 'AND', 'SENT', 'IT', 'SOARING', 'OUT', 'INTO', 'EM', 'STREET'] +4852-28311-0006-1449: ref=['WELL', 'HE', 'ADMITTED', 'I', 'DID'] +4852-28311-0006-1449: hyp=['WELL', 'HE', 'ADMITTED', 'I', 'DID'] +4852-28311-0007-1450: ref=['CHRIS', 'ASKED', 'AND', 'FOR', 'THE', 'FIRST', 'TIME', 'THAT', 'DAY', 'THE', 'HEAVY', 'WEIGHT', 'HE', 'CARRIED', 'WITHIN', 'HIM', 'LIFTED', 'AND', 'LIGHTENED', 'A', 'LITTLE'] +4852-28311-0007-1450: hyp=['CHRIS', 'ASKED', 'AND', 'FOR', 'THE', 'FIRST', 'TIME', 'THAT', 'DAY', 'THE', 'HEAVY', 'WEIGHT', 'HE', 'CARRIED', 'WITHIN', 'HIM', 'LIFTED', 'AND', 'LIGHTENED', 'A', 'LITTLE'] +4852-28311-0008-1451: ref=['THINK', 'HE', 'REALLY', 'NEEDS', 'IT', 'HE', 'PURSUED'] +4852-28311-0008-1451: hyp=['THINK', 'HE', 'REALLY', 'NEEDS', 'IT', 'HE', 'PURSUED'] +4852-28311-0009-1452: ref=['HE', 'WOULD', 'HAVE', 'LIKED', 'TO', 'GET', 'THE', 'JOB', 'FOR', 'JAKEY', 'WHO', 'NEEDED', 'IT', 'BUT', 'SOMEHOW', 'THE', 'TASK', 'OF', 'FACING', 'MISTER', 'WICKER', 'ESPECIALLY', 'NOW', 'THAT', 'THE', 'LIGHT', 'WAS', 'GOING', 'AND', 'DUSK', 'EDGING', 'INTO', 'THE', 'STREETS', 'WAS', 'NOT', 'WHAT', 'CHRIS', 'HAD', 'INTENDED', 'FOR', 'ENDING', 'THE', 'AFTERNOON'] +4852-28311-0009-1452: hyp=['HE', 'WOULD', 'HAVE', 'LIKED', 'TO', 'GET', 'THE', 'JOB', 'FOR', 'JAKIE', 'WHO', 'NEEDED', 'IT', 'BUT', 'SOMEHOW', 'THE', 'TASK', 'OF', 'FACING', 'MISTER', 'WICKER', 'ESPECIALLY', 'NOW', 'THAT', 'THE', 'LIGHT', 'WAS', 'GOING', 'AND', 'DUSK', 'EDGED', 'INTO', 'THE', 'STREETS', 'WAS', 'NOT', 'WHAT', 'CHRIS', 'HAD', 'INTENDED', 'FOR', 'ENDING', 'THE', 'AFTERNOON'] +4852-28311-0010-1453: ref=["MIKE'S", 'EXPRESSION', 'CHANGED', 'AT', 'ONCE', 'TO', 'ONE', 'OF', 'TRIUMPH', 'BUT', 'CHRIS', 'WAS', 'ONLY', 'PARTLY', 'ENCOURAGED'] +4852-28311-0010-1453: hyp=["MIKE'S", 'EXPRESSION', 'CHANGED', 'AT', 'ONCE', 'TO', 'ONE', 'OF', 'TRIUMPH', 'BUT', 'CHRIS', 'WAS', 'ONLY', 'PARTIALLY', 'ENCOURAGED'] +4852-28311-0011-1454: ref=['BETCHA', "AREN'T", 'GOIN', 'AFTER', 'ALL', 'CHRIS', 'TURNED', 'ON', 'HIM'] +4852-28311-0011-1454: hyp=['BITCHER', "AREN'T", 'GOING', 'AFTER', 'ALL', 'CHRIS', 'TURNED', 'ON', 'HIM'] +4852-28311-0012-1455: ref=['MIKE', 'WAS', 'STANDING', 'ON', 'THE', 'CORNER'] +4852-28311-0012-1455: hyp=['MIKE', 'WAS', 'STANDING', 'ON', 'THE', 'CORNER'] +4852-28311-0013-1456: ref=['AW', 'SHUCKS'] +4852-28311-0013-1456: hyp=['OH', 'SHUCKS'] +4852-28311-0014-1457: ref=['CHRIS', 'STARTED', 'OFF', 'ONCE', 'MORE', 'PASSING', 'THE', 'BLEAK', 'LITTLE', 'VICTORIAN', 'CHURCH', 'PERCHED', 'ON', 'THE', 'HILL', 'ABOVE', 'MISTER', "WICKER'S", 'HOUSE'] +4852-28311-0014-1457: hyp=['CHRIS', 'STARTED', 'OFF', 'ONCE', 'MORE', 'PASSING', 'THE', 'BLEAK', 'LITTLE', 'VICTORIAN', 'CHURCH', 'PERCHED', 'ON', 'THE', 'HILL', 'ABOVE', 'MISTER', "WICKER'S", 'HOUSE'] +4852-28311-0015-1458: ref=['AN', 'EMPTY', 'LOT', 'CUT', 'INTO', 'BY', 'CHURCH', 'LANE', 'GAVE', 'A', 'LOOK', 'OF', 'ISOLATION', 'TO', 'THE', 'L', 'SHAPED', 'BRICK', 'BUILDING', 'THAT', 'SERVED', 'MISTER', 'WICKER', 'AS', 'BOTH', 'HOUSE', 'AND', 'PLACE', 'OF', 'BUSINESS'] +4852-28311-0015-1458: hyp=['AN', 'EMPTY', 'LOT', 'CUT', 'IN', 'INTO', 'BY', 'CHURCH', 'LANE', 'GAVE', 'A', 'LOOK', 'OF', 'ISOLATION', 'TO', 'THE', 'L', 'SHAPED', 'BRICK', 'BUILDING', 'THAT', 'SERVED', 'MISTER', 'WICKER', 'AS', 'BOTH', 'HOUSE', 'AND', 'PLACE', 'OF', 'BUSINESS'] +4852-28311-0016-1459: ref=['THE', 'LONGER', 'WING', 'TOWARD', 'THE', 'BACK', 'HAD', 'A', 'BACK', 'DOOR', 'THAT', 'OPENED', 'ONTO', 'WATER', 'STREET', 'THE', 'SPACE', 'BETWEEN', 'THE', 'HOUSE', 'AND', 'WISCONSIN', 'AVENUE', 'HAD', 'BEEN', 'MADE', 'INTO', 'A', 'NEAT', 'OBLONG', 'FLOWER', 'GARDEN', 'FENCED', 'OFF', 'FROM', 'THE', 'SIDEWALK', 'BY', 'BOX', 'SHRUBS', 'AND', 'A', 'WHITE', 'PICKET', 'FENCE'] +4852-28311-0016-1459: hyp=['THE', 'LONGER', 'WING', 'TOWARDS', 'THE', 'BACK', 'GOT', 'A', 'BACK', 'DOOR', 'THAT', 'OPENED', 'ON', 'A', 'WATER', 'STREET', 'THE', 'SPACE', 'BETWEEN', 'THE', 'HOUSE', 'AND', 'WISCONSIN', 'AVENUE', 'HAD', 'BEEN', 'MADE', 'INTO', 'A', 'NEAT', 'OBLONG', 'FLOWER', 'GARDEN', 'FENCED', 'OFF', 'FROM', 'THE', 'SIDEWALK', 'BY', 'BOX', 'SHRUGS', 'AND', 'A', 'WHITE', 'PICKET', 'FENCE'] +4852-28311-0017-1460: ref=['A', 'LIVID', 'YELLOW', 'STAINED', 'THE', 'HORIZON', 'BEYOND', 'THE', 'FACTORIES', 'AND', 'GRAY', 'CLOUDS', 'LOWERED', 'AND', 'TUMBLED', 'ABOVE'] +4852-28311-0017-1460: hyp=['A', 'LIVID', 'YELLOW', 'STAINED', 'THE', 'HORIZON', 'BEYOND', 'THE', 'FACTORIES', 'AND', 'GRAY', 'CLOUDS', 'LOWERED', 'AND', 'TUMBLED', 'ABOVE'] +4852-28311-0018-1461: ref=['THE', 'AIR', 'WAS', 'GROWING', 'CHILL', 'AND', 'CHRIS', 'DECIDED', 'TO', 'FINISH', 'HIS', 'JOB'] +4852-28311-0018-1461: hyp=['THE', 'AIR', 'WAS', 'GROWING', 'CHILL', 'WHEN', 'CHRIS', 'DECIDED', 'TO', 'FINISH', 'THE', 'JOB'] +4852-28311-0019-1462: ref=['ALL', 'AT', 'ONCE', 'HE', 'WONDERED', 'HOW', 'HIS', 'MOTHER', 'WAS', 'AND', 'EVERYTHING', 'IN', 'HIM', 'PINCHED', 'AND', 'TIGHTENED', 'ITSELF'] +4852-28311-0019-1462: hyp=['ALL', 'AT', 'ONCE', 'HE', 'WONDERED', 'HOW', 'HIS', 'MOTHER', 'WAS', 'AND', 'EVERYTHING', 'IN', 'HIM', 'PINCHED', 'AND', 'TIGHTENED', 'ITSELF'] +4852-28311-0020-1463: ref=['AT', 'THE', 'FOOT', 'OF', 'THE', 'HILL', 'HE', 'REACHED', 'THE', 'HOUSE'] +4852-28311-0020-1463: hyp=['AT', 'THE', 'FOOT', 'OF', 'THE', 'HILL', 'HE', 'REACHED', 'THE', 'HOUSE'] +4852-28311-0021-1464: ref=['THERE', 'WERE', 'THREE', 'THINGS', 'THAT', 'ALWAYS', 'CAUGHT', 'HIS', 'EYE', 'AMID', 'THE', 'LITTER', 'OF', 'DUSTY', 'PIECES'] +4852-28311-0021-1464: hyp=['THERE', 'WERE', 'THREE', 'THINGS', 'THAT', 'ALWAYS', 'CAUGHT', 'HIS', 'EYE', 'AMID', 'THE', 'LITTER', 'OF', 'DUSTY', 'PIECES'] +4852-28311-0022-1465: ref=['ON', 'THE', 'LEFT', 'THE', 'COIL', 'OF', 'ROPE', 'IN', 'THE', 'CENTER', 'THE', 'MODEL', 'OF', 'A', 'SAILING', 'SHIP', 'IN', 'A', 'GREEN', 'GLASS', 'BOTTLE', 'AND', 'ON', 'THE', 'RIGHT', 'THE', 'WOODEN', 'STATUE', 'OF', 'A', 'NEGRO', 'BOY', 'IN', 'BAGGY', 'TROUSERS', 'TURKISH', 'JACKET', 'AND', 'WHITE', 'TURBAN'] +4852-28311-0022-1465: hyp=['ON', 'THE', 'LEFT', 'THE', 'COIL', 'OF', 'ROPE', 'IN', 'THE', 'CENTER', 'THE', 'MODEL', 'OF', 'A', 'SAILING', 'SHIP', 'IN', 'A', 'GRAY', 'GLASS', 'BOTTLE', 'AND', 'ON', 'THE', 'RIGHT', 'THE', 'WOODEN', 'STATUE', 'OF', 'A', 'NEGRO', 'BOY', 'IN', 'BAGGY', 'TROUSERS', 'TURKISH', 'JACKET', 'AND', 'WHITE', 'TURBAN'] +4852-28311-0023-1466: ref=['BUT', 'THE', 'NAME', 'STILL', 'SHOWED', 'AT', 'THE', 'PROW', 'AND', 'MANY', 'A', 'TIME', 'CHRIS', 'SAFE', 'AT', 'HOME', 'IN', 'BED', 'HAD', 'SAILED', 'IMAGINARY', 'VOYAGES', 'IN', 'THE', 'MIRABELLE'] +4852-28311-0023-1466: hyp=['BUT', 'THE', 'NAME', 'STILL', 'SHOWED', 'AT', 'THE', 'PROW', 'AND', 'MANY', 'A', 'TIME', 'CHRIS', 'SAFE', 'AT', 'HOME', 'AND', 'BED', 'HAD', 'SAILED', 'IMAGINARY', 'VOYAGES', 'IN', 'THE', 'MIRABELLE'] +4852-28311-0024-1467: ref=['HE', 'HAD', 'NEVER', 'SEEN', 'ANYONE', 'GO', 'INTO', 'MISTER', "WICKER'S", 'SHOP', 'NOW', 'HE', 'THOUGHT', 'OF', 'IT'] +4852-28311-0024-1467: hyp=['HE', 'HAD', 'NEVER', 'SEEN', 'ANY', 'ONE', 'GO', 'INTO', 'MISTER', "WICKER'S", 'SHOP', 'NOW', 'HE', 'THOUGHT', 'OF', 'THAT'] +4852-28311-0025-1468: ref=['HOW', 'THEN', 'DID', 'HE', 'LIVE', 'AND', 'WHAT', 'DID', 'HE', 'EVER', 'SELL'] +4852-28311-0025-1468: hyp=['HOW', 'THEN', 'DID', 'HE', 'LIVE', 'AND', 'WHAT', 'DID', 'HE', 'EVER', 'SELL'] +4852-28311-0026-1469: ref=['A', 'SUDDEN', 'CAR', 'HORN', 'WOKE', 'HIM', 'FROM', 'HIS', 'DREAM'] +4852-28311-0026-1469: hyp=['A', 'SUDDEN', 'CARHORN', 'WELCOMED', 'FROM', 'HIS', 'DREAM'] +4852-28312-0000-1470: ref=['OF', 'THE', 'MANY', 'TIMES', 'HE', 'HAD', 'EXAMINED', 'MISTER', "WICKER'S", 'WINDOW', 'AND', 'PORED', 'OVER', 'THE', 'ROPE', 'THE', 'SHIP', 'AND', 'THE', 'NUBIAN', 'BOY', 'HE', 'HAD', 'NEVER', 'GONE', 'INTO', 'MISTER', "WICKER'S", 'SHOP'] +4852-28312-0000-1470: hyp=['OF', 'THE', 'MANY', 'TIMES', 'HE', 'HAD', 'EXAMINED', 'MISTER', "WICKER'S", 'WINDOW', 'AND', 'PORED', 'OVER', 'THE', 'ROPE', 'TO', 'SHIP', 'AND', 'THE', 'NUBIAN', 'BOY', 'HE', 'HAD', 'NEVER', 'GONE', 'INTO', 'MISTER', "WICKER'S", 'SHOP'] +4852-28312-0001-1471: ref=['SO', 'NOW', 'ALONE', 'UNTIL', 'SOMEONE', 'SHOULD', 'ANSWER', 'THE', 'BELL', 'HE', 'LOOKED', 'EAGERLY', 'IF', 'UNEASILY', 'AROUND', 'HIM'] +4852-28312-0001-1471: hyp=['SO', 'NOW', 'ALONE', 'UNTIL', 'SOMEONE', 'SHOULD', 'ANSWER', 'THE', 'BELL', 'HE', 'LOOKED', 'EAGERLY', 'IF', 'UNEASILY', 'AROUND', 'HIM'] +4852-28312-0002-1472: ref=['WHAT', 'WITH', 'THE', 'ONE', 'WINDOW', 'AND', 'THE', 'LOWERING', 'DAY', 'OUTSIDE', 'THE', 'LONG', 'NARROW', 'SHOP', 'WAS', 'SOMBER'] +4852-28312-0002-1472: hyp=['WHAT', 'WITH', 'ONE', 'WINDOW', 'AND', 'THE', 'LOWERING', 'DAY', 'OUTSIDE', 'THE', 'LONG', 'NARROW', 'SHOP', 'WAS', 'SOMBER'] +4852-28312-0003-1473: ref=['HEAVY', 'HAND', 'HEWN', 'BEAMS', 'CROSSED', 'IT', 'FROM', 'ONE', 'SIDE', 'TO', 'THE', 'OTHER'] +4852-28312-0003-1473: hyp=['HEAVY', 'HAND', 'HEWN', 'BEAMS', 'CROSSED', 'IT', 'FROM', 'ONE', 'SIDE', 'TO', 'THE', 'OTHER'] +4852-28312-0004-1474: ref=['MISTER', "WICKER'S", 'BACK', 'BEING', 'TOWARD', 'THE', 'SOURCE', 'OF', 'LIGHT', 'CHRIS', 'COULD', 'NOT', 'SEE', 'HIS', 'FACE'] +4852-28312-0004-1474: hyp=['MISTER', "WICKER'S", 'BACK', 'BEING', 'TOWARDS', 'THE', 'SOURCE', 'OF', 'LIGHT', 'CHRIS', 'COULD', 'NOT', 'SEE', 'HIS', 'FACE'] +4852-28312-0005-1475: ref=['THE', 'DOUBLE', 'FANS', 'OF', 'MINUTE', 'WRINKLES', 'BREAKING', 'FROM', 'EYE', 'CORNER', 'TO', 'TEMPLE', 'AND', 'JOINING', 'WITH', 'THOSE', 'OVER', 'THE', 'CHEEKBONES', 'WERE', 'DRAWN', 'INTO', 'THE', 'HORIZONTAL', 'LINES', 'ACROSS', 'THE', 'DOMED', 'FOREHEAD'] +4852-28312-0005-1475: hyp=['THE', 'DOUBLE', 'FANS', 'OF', 'MINUTE', 'WRINKLES', 'BREAKING', 'FROM', 'EYE', 'CORNER', 'TO', 'TEMPLE', 'AND', 'JOINING', 'WITH', 'THOSE', 'OVER', 'THE', 'CHEEK', 'BONES', 'WERE', 'DRAWN', 'INTO', 'THE', 'HORIZONTAL', 'LINES', 'ACROSS', 'THE', 'DOMED', 'FOREHEAD'] +4852-28312-0006-1476: ref=['LITTLE', 'TUFTS', 'OF', 'WHITE', 'FUZZ', 'ABOVE', 'THE', 'EARS', 'WERE', 'ALL', 'THAT', 'REMAINED', 'OF', 'THE', "ANTIQUARIAN'S", 'HAIR', 'BUT', 'WHAT', 'DREW', 'AND', 'HELD', "CHRIS'S", 'GAZE', 'WERE', 'THE', 'OLD', "MAN'S", 'EYES'] +4852-28312-0006-1476: hyp=['LITTLE', 'TUFTS', 'OF', 'WHITE', 'FUZZ', 'ABOVE', 'THE', 'EARS', 'WERE', 'ALL', 'THAT', 'REMAINED', 'OF', 'THE', "ANTIQUARIAN'S", 'HAIR', 'BUT', 'WHAT', 'DREW', 'AND', 'HELD', "CHRIS'S", 'GAZE', 'WERE', 'THE', 'OLD', "MAN'S", 'EYES'] +4852-28312-0007-1477: ref=['CHRIS', 'BLINKED', 'AND', 'LOOKED', 'AGAIN', 'YES', 'THEY', 'WERE', 'STILL', 'THERE'] +4852-28312-0007-1477: hyp=['CHRIS', 'BLINKED', 'AND', 'LOOKED', 'AGAIN', 'YES', 'THEY', 'WERE', 'STILL', 'THERE'] +4852-28312-0008-1478: ref=['CHRIS', 'SWALLOWED', 'AND', 'HIS', 'VOICE', 'CAME', 'BACK', 'TO', 'HIM'] +4852-28312-0008-1478: hyp=['CHRIS', 'SWALLOWED', 'THEN', 'HIS', 'VOICE', 'CAME', 'BACK', 'TO', 'HIM'] +4852-28312-0009-1479: ref=['YES', 'SIR', 'HE', 'SAID'] +4852-28312-0009-1479: hyp=['YES', 'SIR', 'HE', 'SAID'] +4852-28312-0010-1480: ref=['I', 'SAW', 'YOUR', 'SIGN', 'AND', 'I', 'KNOW', 'A', 'BOY', 'WHO', 'NEEDS', 'THE', 'JOB'] +4852-28312-0010-1480: hyp=['I', 'SAW', 'YOUR', 'SIGN', 'AND', 'I', 'KNOW', 'A', 'BOY', 'WHO', 'NEEDS', 'THE', 'JOB'] +4852-28312-0011-1481: ref=["HE'S", 'A', 'SCHOOLMATE', 'OF', 'MINE'] +4852-28312-0011-1481: hyp=["HE'S", 'A', 'SCHOOLMATE', 'OF', 'MINE'] +4852-28312-0012-1482: ref=['JAKEY', 'HARRIS', 'HIS', 'NAME', 'IS', 'AND', 'HE', 'REALLY', 'NEEDS', 'THE', 'JOB'] +4852-28312-0012-1482: hyp=['JAKIE', "HARRIS'S", 'NAME', "ISN'T", 'HE', 'REALLY', 'NEEDS', 'THE', 'JOB'] +4852-28312-0013-1483: ref=['I', 'I', 'JUST', 'WONDERED', 'IF', 'THE', 'PLACE', 'WAS', 'STILL', 'OPEN'] +4852-28312-0013-1483: hyp=['I', 'I', 'JUST', 'WONDERED', 'IF', 'THE', 'PLACE', 'WAS', 'STILL', 'OPEN'] +4852-28312-0014-1484: ref=['WHAT', 'HE', 'SAW', 'WAS', 'A', 'FRESH', 'CHEEKED', 'LAD', 'TALL', 'FOR', 'THIRTEEN', 'STURDY', 'WITH', 'SINCERITY', 'AND', 'GOOD', 'HUMOR', 'IN', 'HIS', 'FACE', 'AND', 'SOMETHING', 'SENSITIVE', 'AND', 'APPEALING', 'ABOUT', 'HIS', 'EYES'] +4852-28312-0014-1484: hyp=['WHAT', 'HE', 'SAW', 'WAS', 'A', 'FRESH', 'CHEEKED', 'LAD', 'TALL', 'FOR', 'THIRTEEN', 'STURDY', 'WITH', 'SINCERITY', 'AND', 'GOOD', 'HUMOUR', 'IN', 'HIS', 'FACE', 'AND', 'SOMETHING', 'SENSITIVE', 'AND', 'APPEALING', 'ABOUT', 'HIS', 'EYES'] +4852-28312-0015-1485: ref=['HE', 'GUESSED', 'THERE', 'MUST', 'BE', 'A', 'LIVELY', 'FIRE', 'IN', 'THAT', 'ROOM', 'BEYOND'] +4852-28312-0015-1485: hyp=['HE', 'GUESSED', 'THERE', 'MUST', 'BE', 'A', 'LIVELY', 'FIRE', 'IN', 'THAT', 'ROOM', 'BEYOND'] +4852-28312-0016-1486: ref=['WOULD', 'THAT', 'INTERFERE', 'WITH', "JAKEY'S", 'GETTING', 'THE', 'JOB', 'SIR'] +4852-28312-0016-1486: hyp=['WOULD', 'THAT', 'INTERFERE', 'WITH', 'JAKIE', "GINK'S", 'GETTING', 'THE', 'JOB', 'SIR'] +4852-28312-0017-1487: ref=['BUT', 'EVEN', 'AS', 'HE', 'SLOWLY', 'TURNED', 'THE', 'THOUGHT', 'PIERCED', 'HIS', 'MIND', 'WHY', 'HAD', 'HE', 'NOT', 'SEEN', 'THE', 'REFLECTION', 'OF', 'THE', 'HEADLIGHTS', 'OF', 'THE', 'CARS', 'MOVING', 'UP', 'AROUND', 'THE', 'CORNER', 'OF', 'WATER', 'STREET', 'AND', 'UP', 'THE', 'HILL', 'TOWARD', 'THE', 'TRAFFIC', 'SIGNALS'] +4852-28312-0017-1487: hyp=['BUT', 'EVEN', 'AS', 'HE', 'SLOWLY', 'TURNED', 'THE', 'THOUGHT', 'PIERCED', 'HIS', 'MIND', 'WHY', 'HAD', 'HE', 'NOT', 'SEEN', 'THE', 'REFLECTION', 'OF', 'THE', 'HEADLIGHTS', 'OF', 'THE', 'CARS', 'MOVING', 'UP', 'A', 'ROUND', 'THE', 'CORNER', 'OF', 'WALUTTER', 'STREET', 'AND', 'UP', 'THE', 'HILL', 'TOWARD', 'THE', 'TRAPHIC', 'SIGNALS'] +4852-28312-0018-1488: ref=['THE', 'ROOM', 'SEEMED', 'OVERLY', 'STILL'] +4852-28312-0018-1488: hyp=['THE', 'ROOM', 'SEEMED', 'OVERLY', 'STILL'] +4852-28312-0019-1489: ref=['THEN', 'IN', 'THAT', 'SECOND', 'HE', 'TURNED', 'AND', 'FACED', 'ABOUT'] +4852-28312-0019-1489: hyp=['THEN', 'IN', 'THAT', 'SECOND', 'HE', 'TURNED', 'AND', 'FACED', 'ABOUT'] +4852-28312-0020-1490: ref=['THE', 'WIDE', 'BOW', 'WINDOW', 'WAS', 'THERE', 'BEFORE', 'HIM', 'THE', 'THREE', 'OBJECTS', 'HE', 'LIKED', 'BEST', 'SHOWING', 'FROSTY', 'IN', 'THE', 'MOONLIGHT', 'THAT', 'POURED', 'IN', 'FROM', 'ACROSS', 'THE', 'WATER'] +4852-28312-0020-1490: hyp=['THE', 'WIDE', 'BOW', 'WINDOW', 'WAS', 'THERE', 'BEFORE', 'HIM', 'THE', 'THREE', 'OBJECTS', 'HE', 'LIKED', 'BEST', 'SHOWING', 'FROSTY', 'IN', 'THE', 'MOONLIGHT', 'THAT', 'POURED', 'IN', 'FROM', 'ACROSS', 'THE', 'WATER'] +4852-28312-0021-1491: ref=['ACROSS', 'THE', 'WATER', 'WHERE', 'WAS', 'THE', 'FREEWAY'] +4852-28312-0021-1491: hyp=['ACROSS', 'THE', 'WATER', 'WHERE', 'WAS', 'THE', 'FREEWAY'] +4852-28312-0022-1492: ref=['IT', 'WAS', 'NO', 'LONGER', 'THERE', 'NOR', 'WERE', 'THE', 'HIGH', 'WALLS', 'AND', 'SMOKESTACKS', 'OF', 'FACTORIES', 'TO', 'BE', 'SEEN'] +4852-28312-0022-1492: hyp=['IT', 'WAS', 'NO', 'LONGER', 'THERE', 'NOR', 'WERE', 'THE', 'HIGH', 'WALLS', 'AND', 'SMOKESTACKS', 'OF', 'FACTORIES', 'TO', 'BE', 'SEEN'] +4852-28312-0023-1493: ref=['THE', 'WAREHOUSES', 'WERE', 'STILL', 'THERE'] +4852-28312-0023-1493: hyp=['THE', 'WAREHOUSES', 'WERE', 'STILL', 'THERE'] +4852-28312-0024-1494: ref=['FLABBERGASTED', 'AND', 'BREATHLESS', 'CHRIS', 'WAS', 'UNAWARE', 'THAT', 'HE', 'HAD', 'MOVED', 'CLOSER', 'TO', 'PEER', 'OUT', 'THE', 'WINDOW', 'IN', 'EVERY', 'DIRECTION'] +4852-28312-0024-1494: hyp=['FLABBERGASTED', 'AND', 'BREATHLESS', 'CHRIS', 'WAS', 'UNAWARE', 'THAT', 'HE', 'HAD', 'MOVED', 'CLOSER', 'TO', 'PEER', 'OUT', 'THE', 'WINDOW', 'IN', 'EVERY', 'DIRECTION'] +4852-28312-0025-1495: ref=['NO', 'ELECTRIC', 'SIGNS', 'NO', 'LAMPLIT', 'STREETS'] +4852-28312-0025-1495: hyp=['NO', 'ELECTRIC', 'SIGNS', 'NO', 'LAMPLIT', 'STREETS'] +4852-28312-0026-1496: ref=['WHERE', 'THE', "PEOPLE'S", 'DRUGSTORE', 'HAD', 'STOOD', 'BUT', 'A', 'HALF', 'HOUR', 'BEFORE', 'ROSE', 'THE', 'ROOFS', 'OF', 'WHAT', 'WAS', 'EVIDENTLY', 'AN', 'INN'] +4852-28312-0026-1496: hyp=['WHERE', 'THE', "PEOPLE'S", 'DRUG', 'STORE', 'HAD', 'STOOD', 'BUT', 'HALF', 'AN', 'HOUR', 'BEFORE', 'ROSE', 'THE', 'ROOFS', 'OF', 'WHAT', 'WAS', 'EVIDENTLY', 'AN', 'INN'] +4852-28312-0027-1497: ref=['A', 'COURTYARD', 'WAS', 'SPARSELY', 'LIT', 'BY', 'A', 'FLARING', 'TORCH', 'OR', 'TWO', 'SHOWING', 'A', 'SWINGING', 'SIGN', 'HUNG', 'ON', 'A', 'POST'] +4852-28312-0027-1497: hyp=['A', 'COURTYARD', 'WAS', 'FIRSTLY', 'LIT', 'BY', 'A', 'FLARING', 'TORCH', 'OR', 'TWO', 'SHOWING', 'A', 'SWINGING', 'SIGN', 'HUNG', 'ON', 'A', 'POST'] +4852-28312-0028-1498: ref=['THE', 'POST', 'WAS', 'PLANTED', 'AT', 'THE', 'EDGE', 'OF', 'WHAT', 'WAS', 'NOW', 'A', 'BROAD', 'AND', 'MUDDY', 'ROAD'] +4852-28312-0028-1498: hyp=['THE', 'POST', 'WAS', 'PLANTED', 'AT', 'THE', 'EDGE', 'OF', 'WHAT', 'WAS', 'NOW', 'A', 'BROAD', 'AND', 'MUDDY', 'ROAD'] +4852-28312-0029-1499: ref=['A', 'COACH', 'WITH', 'ITS', 'TOP', 'PILED', 'HIGH', 'WITH', 'LUGGAGE', 'STAMPED', 'TO', 'A', 'HALT', 'BESIDE', 'THE', 'FLAGGED', 'COURTYARD'] +4852-28312-0029-1499: hyp=['A', 'COACH', 'WITH', 'ITS', 'TOP', 'PILED', 'HIGH', 'WITH', 'LUGGAGE', 'STAMPED', 'TO', 'A', 'HALT', 'BESIDE', 'THE', 'FLAGGED', 'COURTYARD'] +4852-28312-0030-1500: ref=['THEY', 'MOVED', 'INTO', 'THE', 'INN', 'THE', 'COACH', 'RATTLED', 'OFF', 'TO', 'THE', 'STABLE'] +4852-28312-0030-1500: hyp=['THEY', 'MOVED', 'INTO', 'THE', 'INN', 'THE', 'COACH', 'RATTLED', 'OFF', 'TO', 'THE', 'STABLE'] +4852-28312-0031-1501: ref=['MY', 'WINDOW', 'HAS', 'A', 'POWER', 'FOR', 'THOSE', 'FEW', 'WHO', 'ARE', 'TO', 'SEE'] +4852-28312-0031-1501: hyp=['MY', 'WINDOW', 'HAS', 'A', 'POWER', 'FOR', 'THOSE', 'FEW', 'WHO', 'ARE', 'TO', 'SEE'] +4852-28319-0000-1502: ref=['THE', 'LEARNING', 'OF', 'MAGIC', 'WAS', 'BY', 'NO', 'MEANS', 'EASY'] +4852-28319-0000-1502: hyp=['THE', 'LEARNING', 'OF', 'MAGIC', 'WAS', 'BY', 'NO', 'MEANS', 'EASY'] +4852-28319-0001-1503: ref=['HE', 'HAD', 'TOLD', 'HIS', 'MASTER', 'AT', 'ONCE', 'ABOUT', 'SIMON', 'GOSLER', 'HIS', 'HORDE', 'OF', 'MONEY', 'AND', 'HIS', 'HIDING', 'PLACES', 'FOR', 'IT'] +4852-28319-0001-1503: hyp=['HE', 'HAD', 'TOLD', 'HIS', 'MASTER', 'AT', 'ONCE', 'ABOUT', 'SIMON', 'GOSTLER', 'HIS', 'HOARD', 'OF', 'MONEY', 'AND', 'HIS', 'HIDING', 'PLACES', 'FOR', 'IT'] +4852-28319-0002-1504: ref=['CHRIS', 'THEREFORE', 'THREW', 'HIMSELF', 'INTO', 'ALL', 'THE', 'PRELIMINARIES', 'OF', 'HIS', 'TASK'] +4852-28319-0002-1504: hyp=['CHRIS', 'THEREFORE', 'THREW', 'HIMSELF', 'INTO', 'ALL', 'THE', 'PRELIMINARIES', 'OF', 'HIS', 'TASK'] +4852-28319-0003-1505: ref=['ONE', 'AFTERNOON', 'WHEN', 'HE', 'RETURNED', 'AFTER', 'A', 'REST', 'TO', 'MISTER', "WICKER'S", 'STUDY', 'HE', 'SAW', 'THAT', 'THERE', 'WAS', 'SOMETHING', 'NEW', 'IN', 'THE', 'ROOM', 'A', 'BOWL', 'WITH', 'A', 'GOLDFISH', 'IN', 'IT', 'STOOD', 'ON', 'THE', 'TABLE', 'BUT', 'MISTER', 'WICKER', 'WAS', 'NOT', 'TO', 'BE', 'SEEN'] +4852-28319-0003-1505: hyp=['ONE', 'AFTERNOON', 'WHEN', 'HE', 'HAD', 'RETURNED', 'AFTER', 'A', 'REST', 'TO', 'MISTER', "WICKER'S", 'STUDY', 'HE', 'SAW', 'THAT', 'THERE', 'WAS', 'SOMETHING', 'NEW', 'IN', 'THE', 'ROOM', 'A', 'BOWL', 'WITH', 'A', 'GOLDFISH', 'IN', 'IT', 'STOOD', 'ON', 'THE', 'TABLE', 'BUT', 'MISTER', 'WICKER', 'WAS', 'NOT', 'TO', 'BE', 'SEEN'] +4852-28319-0004-1506: ref=['WHAT', 'SHALL', 'I', 'DO', 'FIRST'] +4852-28319-0004-1506: hyp=['WHAT', 'SHOULD', 'ALL', 'I', 'DO', 'FIRST'] +4852-28319-0005-1507: ref=['HOW', 'YOU', 'HAVE', 'IMPROVED', 'MY', 'BOY', 'HE', 'EXCLAIMED', 'IT', 'IS', 'NOW', 'TIME', 'FOR', 'YOU', 'TO', 'TRY', 'AND', 'THIS', 'IS', 'AS', 'GOOD', 'A', 'CHANGE', 'AS', 'ANY'] +4852-28319-0005-1507: hyp=['HOW', 'YOU', 'HAVE', 'IMPROVED', 'MY', 'BOY', 'HE', 'EXCLAIMED', 'IT', 'IS', 'NOW', 'TIME', 'FOR', 'YOU', 'TO', 'TRY', 'THIS', 'IS', 'AS', 'GOOD', 'A', 'CHANGE', 'AS', 'ANY'] +4852-28319-0006-1508: ref=['SUPPOSE', 'I', 'CHANGE', 'AND', "CAN'T", 'CHANGE', 'BACK'] +4852-28319-0006-1508: hyp=['SUPPOSE', 'I', 'CHANGE', 'AND', "CAN'T", 'CHANGE', 'BACK'] +4852-28319-0007-1509: ref=['MISTER', 'WICKER', 'WAITED', 'PATIENTLY', 'BESIDE', 'HIM', 'FOR', 'A', 'FEW', 'MOMENTS', 'FOR', 'CHRIS', 'TO', 'GET', 'UP', 'HIS', 'COURAGE'] +4852-28319-0007-1509: hyp=['MISTER', 'WICKER', 'WAITED', 'PATIENTLY', 'BESIDE', 'HIM', 'FOR', 'A', 'FEW', 'MOMENTS', 'FOR', 'CHRIS', 'TO', 'GET', 'UP', 'HIS', 'COURAGE'] +4852-28319-0008-1510: ref=['THEN', 'AS', 'NOTHING', 'HAPPENED', 'WITH', 'A', 'VOICE', 'LIKE', 'A', 'WHIP', 'MISTER', 'WICKER', 'SAID', 'START', 'AT', 'ONCE'] +4852-28319-0008-1510: hyp=['THEN', 'AS', 'NOTHING', 'HAPPENED', 'WITH', 'A', 'VOICE', 'LIKE', 'A', 'WHIP', 'MISTER', 'WICKER', 'SAID', 'START', 'AT', 'ONCE'] +4852-28319-0009-1511: ref=['THE', 'SENSATION', 'SPREAD', 'FASTER', 'AND', 'FASTER'] +4852-28319-0009-1511: hyp=['THE', 'SENSATION', 'SPREAD', 'FASTER', 'AND', 'FASTER'] +4852-28319-0010-1512: ref=['HIS', 'HEAD', 'SWAM', 'AND', 'HE', 'FELT', 'FAINT', 'AND', 'A', 'LITTLE', 'SICK', 'BUT', 'HE', 'PERSISTED', 'THROUGH', 'THE', 'FINAL', 'WORDS'] +4852-28319-0010-1512: hyp=['HIS', 'HEAD', 'SWAM', 'AND', 'HE', 'FELT', 'FAINT', 'AND', 'A', 'LITTLE', 'SICK', 'BUT', 'HE', 'PERSISTED', 'THROUGH', 'THE', 'FINAL', 'WORDS'] +4852-28319-0011-1513: ref=['HE', 'THOUGHT', 'NOT', 'WITHOUT', 'A', 'FEELING', 'OF', 'PRIDE', 'AND', 'COMMENCED', 'EXPERIMENTING', 'WITH', 'HIS', 'TAIL', 'AND', 'FINS', 'WITH', 'SUCH', 'ENTHUSIASM', 'AND', 'DELIGHT', 'THAT', 'SOME', 'LITTLE', 'TIME', 'ELAPSED', 'BEFORE', 'MISTER', "WICKER'S", 'VOICE', 'BOOMED', 'CLOSE', 'BY'] +4852-28319-0011-1513: hyp=['HE', 'THOUGHT', 'NOT', 'WITHOUT', 'A', 'FEELING', 'OF', 'PRIDE', 'AND', 'COMMENCED', 'EXPERIMENTING', 'WITH', 'HIS', 'TAIL', 'AND', 'FINS', 'WITH', 'SUCH', 'ENTHUSIASM', 'AND', 'DELIGHT', 'THAT', 'SOME', 'LITTLE', 'TIME', 'ELAPSED', 'BEFORE', 'MISTER', "WICKER'S", 'VOICE', 'BOOMED', 'CLOSE', 'BY'] +4852-28319-0012-1514: ref=['SEVENTY', 'FOUR', 'BOOK', 'ONE', 'THE', 'RETURN'] +4852-28319-0012-1514: hyp=['SEVENTY', 'FOUR', 'BOOK', 'ONE', 'THE', 'RETURN'] +4852-28319-0013-1515: ref=['THE', "FIGURE'S", 'SHOES', 'CARVED', 'IN', 'SOME', 'EASTERN', 'STYLE', 'HAD', 'CURVED', 'UP', 'POINTING', 'TOES'] +4852-28319-0013-1515: hyp=['THE', "FIGURE'S", 'SHOES', 'CARVED', 'IN', 'SOME', 'EASTERN', 'STYLE', 'HAD', 'CURVED', 'UP', 'POINTING', 'TOES'] +4852-28319-0014-1516: ref=['THEN', 'ALL', 'AT', 'ONCE', 'THE', 'IDEA', 'CAME', 'TO', 'CHRIS'] +4852-28319-0014-1516: hyp=['THEN', 'ALL', 'AT', 'ONCE', 'THE', 'IDEA', 'CAME', 'TO', 'CHRIS'] +4852-28319-0015-1517: ref=['IF', 'HE', 'WAS', 'TO', 'BE', 'A', 'MAGICIAN', 'COULD', 'HE', 'MAKE', 'THIS', 'BOY', 'COME', 'TO', 'LIFE'] +4852-28319-0015-1517: hyp=['IF', 'HE', 'WAS', 'TO', 'BE', 'A', 'MAGICIAN', 'COULD', 'HE', 'MAKE', 'THIS', 'BOY', 'COME', 'TO', 'LIFE'] +4852-28319-0016-1518: ref=['HE', 'SQUATTED', 'ON', 'HIS', 'HAUNCHES', 'EXAMINING', 'THE', 'CARVED', 'WOODEN', 'FIGURE', 'ATTENTIVELY', 'AND', 'FELT', 'CONVINCED', 'THAT', 'ONCE', 'ALIVE', 'THE', 'BOY', 'WOULD', 'BE', 'AN', 'IDEAL', 'AND', 'HAPPY', 'COMPANION'] +4852-28319-0016-1518: hyp=['HE', 'SQUATTED', 'ON', 'HIS', 'HAUNCHES', 'EXAMINED', 'THE', 'CARVED', 'WOODEN', 'FIGURE', 'ATTENTIVELY', 'AND', 'FELT', 'CONVINCED', 'THAT', 'ONCE', 'ALIVE', 'THE', 'BOY', 'WOULD', 'BE', 'AN', 'IDEAL', 'AND', 'HAPPY', 'COMPANION'] +4852-28319-0017-1519: ref=['BUT', 'HOW', 'DID', 'ONE', 'CHANGE', 'INANIMATE', 'TO', 'ANIMATE'] +4852-28319-0017-1519: hyp=['BUT', 'HOW', 'DID', 'ONE', 'CHANGE', 'INANIMATE', 'TO', 'ANIMATE'] +4852-28319-0018-1520: ref=['CHRIS', 'GOT', 'UP', 'AND', 'STOLE', 'BACK', 'TO', 'MISTER', "WICKER'S", 'DOOR'] +4852-28319-0018-1520: hyp=['CHRIS', 'GOT', 'UP', 'AND', 'STOLE', 'BACK', 'TO', 'MISTER', "WICKER'S", 'DOOR'] +4852-28319-0019-1521: ref=['HE', 'HEARD', 'THE', 'MAGICIAN', 'GOING', 'UP', 'THE', 'SPIRAL', 'STAIRCASE', 'TO', 'HIS', 'ROOM', 'ABOVE', 'AND', 'AFTER', 'CHANGING', 'HIMSELF', 'TO', 'A', 'MOUSE', 'TO', 'SLIP', 'UNDER', 'THE', 'DOOR', 'AND', 'SEE', 'THAT', 'THE', 'ROOM', 'WAS', 'REALLY', 'EMPTY', 'CHRIS', 'RESUMED', 'HIS', 'PROPER', 'SHAPE', 'AND', 'OPENED', 'THE', 'DOORS', 'OF', 'THE', 'CUPBOARD', 'AT', 'THE', 'FAR', 'END', 'OF', 'THE', 'ROOM'] +4852-28319-0019-1521: hyp=['HE', 'HEARD', 'THAT', 'MAGICIAN', 'GOING', 'UP', 'THE', 'SPIRAL', 'STAIRCASE', 'TO', 'HIS', 'ROOM', 'ABOVE', 'AND', 'AFTER', 'CHANGING', 'HIMSELF', 'TO', 'A', 'MOUSE', 'TO', 'SLIP', 'UNDER', 'THE', 'DOOR', 'AND', 'SEE', 'THAT', 'THE', 'ROOM', 'WAS', 'REALLY', 'EMPTY', 'IT', 'RESUMED', 'ITS', 'PROPER', 'SHAPE', 'AND', 'OPENED', 'THE', 'DOORS', 'OF', 'THE', 'CUPBOARD', 'AT', 'THE', 'FAR', 'END', 'OF', 'THE', 'ROOM'] +4852-28319-0020-1522: ref=['THE', 'AFTERNOON', 'RAINY', 'BEFORE', 'INCREASED', 'IN', 'STORM'] +4852-28319-0020-1522: hyp=['THE', 'AFTERNOON', 'RAINY', 'BEFORE', 'INCREASED', 'IN', 'STORM'] +4852-28319-0021-1523: ref=['DUSK', 'CAME', 'TWO', 'HOURS', 'BEFORE', 'ITS', 'TIME', 'THUNDER', 'SNARLED', 'IN', 'THE', 'SKY'] +4852-28319-0021-1523: hyp=['DUSK', 'CAME', 'TWO', 'HOURS', 'BEFORE', 'ITS', 'TIME', 'THUNDER', 'SNARLS', 'IN', 'THE', 'SKY'] +4852-28319-0022-1524: ref=['CERTAIN', 'ELEMENTS', 'WERE', 'TO', 'BE', 'MIXED', 'AND', 'POURED', 'AT', 'THE', 'PROPER', 'TIME'] +4852-28319-0022-1524: hyp=['CERTAIN', 'ELEMENTS', 'WERE', 'TO', 'BE', 'MIXED', 'AND', 'POURED', 'AT', 'THE', 'PROPER', 'TIME'] +4852-28319-0023-1525: ref=['MISTER', 'WICKER', 'BEGAN', 'MOVING', 'ABOUT', 'UPSTAIRS', 'THE', 'FLOORBOARDS', 'CREAKED', 'AND', 'STILL', 'CHRIS', 'COULD', 'NOT', 'LEAVE', 'UNTIL', 'THE', 'POTION', 'FUMED', 'AND', 'GLOWED'] +4852-28319-0023-1525: hyp=['MISTER', 'WICKER', 'BEGAN', 'MOVING', 'ABOUT', 'UPSTAIRS', 'THE', 'FLOOR', 'BOARDS', 'CREAKED', 'CREAKERY', 'AND', 'STILL', 'CHRIS', 'COULD', 'NOT', 'LEAVE', 'UNTIL', 'THE', 'FOCCETION', 'FUMED', 'AND', 'GLOWED'] +4852-28319-0024-1526: ref=['WITH', 'INFINITE', 'CAUTION', 'CHRIS', 'CLOSED', 'THE', 'DOOR', 'SILENTLY', 'BEHIND', 'HIM', 'AND', 'RUNNING', 'LIGHTLY', 'FORWARD', 'REACHED', 'THE', 'FIGURE', 'OF', 'THE', 'NEGRO', 'BOY'] +4852-28319-0024-1526: hyp=['WITH', 'INFINITE', 'CAUTION', 'CHRIS', 'CLOSED', 'THE', 'DOOR', 'SILENTLY', 'BEHIND', 'HIM', 'AND', 'RUNNING', 'LIGHTLY', 'FORWARD', 'REACHED', 'THE', 'FIGURE', 'OF', 'THE', 'NEGRO', 'BOY'] +4852-28319-0025-1527: ref=['IT', 'WAS', 'AS', 'IF', 'THE', 'STIFFNESS', 'MELTED'] +4852-28319-0025-1527: hyp=['IT', 'WAS', 'AS', 'IF', 'THE', 'STIFFNESS', 'MELTED'] +4852-28319-0026-1528: ref=['UNDER', 'HIS', 'EYES', 'THE', 'WOODEN', 'FOLDS', 'OF', 'CLOTH', 'BECAME', 'RICH', 'SILK', 'EMBROIDERY', 'GLEAMED', 'IN', 'ITS', 'REALITY', 'UPON', 'THE', 'COAT', 'AND', 'OH', 'THE', 'FACE'] +4852-28319-0026-1528: hyp=['UNDER', 'HIS', 'EYES', 'WOODEN', 'FOLDS', 'OF', 'CLOTH', 'BECAME', 'RICH', 'SILK', 'EMBROIDERY', 'GLEAMED', 'IN', 'ITS', 'REALITY', 'UPON', 'THE', 'COAT', 'AND', 'OH', 'THE', 'FACE'] +4852-28319-0027-1529: ref=['THE', 'WOODEN', 'GRIN', 'LOOSENED', 'THE', 'LARGE', 'EYES', 'TURNED', 'THE', 'HAND', 'HOLDING', 'THE', 'HARD', 'BOUQUET', 'OF', 'CARVED', 'FLOWERS', 'MOVED', 'AND', 'LET', 'THE', 'BOUQUET', 'FALL'] +4852-28319-0027-1529: hyp=['THE', 'WOODEN', 'GRIN', 'LOOSENED', 'THE', 'LARGE', 'EYES', 'TURNED', 'THE', 'HAND', 'HOLDING', 'THE', 'HARD', 'BOUQUET', 'OF', 'CARVED', 'FLOWERS', 'MOVED', 'LET', 'THE', 'BOUQUET', 'FALL'] +4852-28330-0000-1530: ref=['THEY', 'WENT', 'DOWN', 'TO', 'THEIR', 'QUARTERS', 'FIRST'] +4852-28330-0000-1530: hyp=['THEY', 'WENT', 'DOWN', 'TO', 'THEIR', 'QUARTERS', 'FIRST'] +4852-28330-0001-1531: ref=['GUESS', 'MISTER', 'FINNEY', 'WENT', 'TO', 'HIS', 'QUARTERS', 'I', "DON'T", 'REMEMBER', 'SEEING', 'HIM', 'CROSS', 'THE', 'DECK', 'OR', 'COME', 'OVER', 'THAT', 'WAY', 'AT', 'ALL'] +4852-28330-0001-1531: hyp=['GUESS', 'MISTER', 'FINNEY', 'WENT', 'TO', 'HIS', 'QUARTERS', 'I', "DON'T", 'REMEMBER', 'SEEING', 'HIM', 'CROSS', 'THE', 'DECK', 'OR', 'COME', 'OVER', 'THAT', 'WAY', 'AT', 'ALL'] +4852-28330-0002-1532: ref=['NEXT', 'NED', 'CILLEY', 'WAS', 'RELIEVED', 'AT', 'THE', 'HELM', 'BY', 'ELBERT', 'JONES', 'WHO', 'TOOK', 'OVER', 'NED', 'WENT', 'ON', 'DOWN'] +4852-28330-0002-1532: hyp=['NEXT', 'NED', 'CILLEY', 'WAS', 'RELIEVED', 'AT', 'THE', 'HELM', 'BY', 'ELBERT', 'JONES', 'WHO', 'TOOK', 'OVER', 'NED', 'WENT', 'ON', 'DOWN'] +4852-28330-0003-1533: ref=['IT', 'LOOKS', 'TO', 'ME', 'AS', 'IF', 'IT', 'COULD', 'HAVE', 'BEEN', 'ONE', 'OF', 'SEVERAL', 'PEOPLE', 'AND', "I'LL", 'BE', 'SWITCHED', 'IF', 'I', 'KNOW', 'WHO', "I'LL", 'KEEP', 'MY', 'EYES', 'OPEN'] +4852-28330-0003-1533: hyp=['IT', 'LOOKS', 'TO', 'ME', 'AS', 'IF', 'IT', 'HAD', 'BEEN', 'ONE', 'OF', 'SEVERAL', 'PEOPLE', 'AND', "I'LL", 'BE', 'SWITCHED', 'IF', 'I', 'KNOW', 'WHO', "I'LL", 'KEEP', 'MY', 'EYES', 'OPEN'] +4852-28330-0004-1534: ref=['THE', 'MIRABELLE', 'WAS', 'NEARING', 'TAHITI'] +4852-28330-0004-1534: hyp=['THE', 'MIRABELLE', 'WAS', 'NEARING', 'TITTI'] +4852-28330-0005-1535: ref=["WE'VE", 'WATER', 'AND', 'FRESH', 'STORES', 'TO', 'TAKE', 'ON', 'THERE'] +4852-28330-0005-1535: hyp=["WE'VE", 'WATER', 'AND', 'FRESH', 'STORES', 'TO', 'TAKE', 'ON', 'THERE'] +4852-28330-0006-1536: ref=['CHRIS', 'LOST', 'NO', 'TIME', 'AS', 'SOON', 'AS', 'HE', 'COULD', 'DO', 'IT', 'WITHOUT', 'BEING', 'NOTICED', 'IN', 'HURRYING', 'DOWN', 'TO', 'HIS', 'CABIN'] +4852-28330-0006-1536: hyp=['CHRIS', 'LOST', 'NO', 'TIME', 'AS', 'SOON', 'AS', 'HE', 'COULD', 'DO', 'IT', 'WITHOUT', 'BEING', 'NOTICED', 'IN', 'HURRYING', 'DOWN', 'TO', 'HIS', 'CABIN'] +4852-28330-0007-1537: ref=['CERTAINLY', 'MY', 'BOY', 'BOOMED', 'OUT', 'THE', 'CAPTAIN', 'HIS', 'BLUE', 'EYES', 'ABRUPTLY', 'KEEN', 'AND', 'PENETRATING'] +4852-28330-0007-1537: hyp=['CERTAINLY', 'MY', 'BOY', 'BOOMED', 'OUT', 'THE', 'CAPTAIN', 'HIS', 'BLUE', 'EYES', 'ABRUPTLY', 'KEEN', 'AND', 'PENETRATING'] +4852-28330-0008-1538: ref=['MISTER', 'FINNEY', 'WILL', 'BE', 'SOME', 'TIME', 'ON', 'DECK', 'WE', 'CANNOT', 'BE', 'OVERHEARD', 'IN', 'HERE'] +4852-28330-0008-1538: hyp=['MISTER', 'FINNEY', 'WILL', 'BE', 'SOME', 'TIME', 'ON', 'DECK', 'WE', 'CANNOT', 'BE', 'OVERHEARD', 'IN', 'HERE'] +4852-28330-0009-1539: ref=['HIS', 'FACE', 'FROZE', 'WITH', 'NERVOUSNESS', 'THAT', 'THIS', 'MIGHT', 'NOT', 'DO', 'AS', 'AN', 'ANSWER', 'AND', 'HE', 'STOOD', 'STIFF', 'AND', 'STILL', 'BEFORE', 'CAPTAIN', 'BLIZZARD'] +4852-28330-0009-1539: hyp=['HIS', 'FACE', 'ROSE', 'WITH', 'NERVOUSNESS', 'THAT', 'THIS', 'MIGHT', 'DO', 'OUTDO', 'AS', 'AN', 'ANSWER', 'AND', 'HE', 'STOOD', 'STIFF', 'AND', 'STILL', 'BEFORE', 'CAPTAIN', 'BLIZZARD'] +4852-28330-0010-1540: ref=['THE', 'CAPTAIN', 'SAT', 'FORWARD', 'IN', 'HIS', 'CHAIR', 'LOOKING', 'AT', 'HIM', 'FOR', 'A', 'LONG', 'MOMENT', 'CONSIDERING'] +4852-28330-0010-1540: hyp=['THE', 'CAPTAIN', 'SAT', 'FORWARD', 'IN', 'HIS', 'CHAIR', 'LOOKING', 'AT', 'HIM', 'FOR', 'A', 'LONG', 'MOMENT', 'CONSIDERING'] +4852-28330-0011-1541: ref=['THEN', 'HE', 'SAID', 'WELL', 'I', 'DO', 'NOT', 'CARE', 'FOR', 'IT', 'I', 'CANNOT', 'SAY', 'I', 'DO'] +4852-28330-0011-1541: hyp=['THEN', 'HE', 'SAID', 'WELL', 'I', 'DO', 'NOT', 'CARE', 'FOR', 'IT', 'I', 'CANNOT', 'SAY', 'THAT', 'I', 'DO'] +4852-28330-0012-1542: ref=['THIS', 'SHIP', 'IS', 'MORE', 'TO', 'ME', 'THAN', 'WIFE', 'OR', 'MOTHER', 'OR', 'FAMILY'] +4852-28330-0012-1542: hyp=['THIS', 'SHIP', 'IS', 'MORE', 'TO', 'ME', 'THAN', 'WIFE', 'OR', 'MOTHER', 'OR', 'FAMILY'] +4852-28330-0013-1543: ref=['HE', 'PAUSED', 'FINGERING', 'HIS', 'LOWER', 'LIP', 'AND', 'LOOKING', 'SIDEWAYS', 'IN', 'A', 'REFLECTIVE', 'FASHION', 'AT', 'CHRIS', 'STANDING', 'BEFORE', 'HIM'] +4852-28330-0013-1543: hyp=['HE', 'PAUSED', 'FINGERING', 'HIS', 'LOWER', 'LIP', 'AND', 'LOOKING', 'SIDEWAYS', 'IN', 'A', 'REFLECTIVE', 'FASHION', 'AT', 'CHRIS', 'STANDING', 'BEFORE', 'HIM'] +4852-28330-0014-1544: ref=['WE', 'SHALL', 'SAY', 'NO', 'MORE', 'BUT', 'I', 'TRUST', 'YOU', 'UNDERSTAND', 'THE', 'RESPONSIBILITY', 'YOU', 'HAVE'] +4852-28330-0014-1544: hyp=['WE', 'SHALL', 'SAY', 'NO', 'MORE', 'BUT', 'I', 'TRUST', 'YOU', 'UNDERSTAND', 'THE', 'RESPONSIBILITY', 'YOU', 'HAVE'] +4852-28330-0015-1545: ref=['THIS', 'SHIP', 'ITS', 'CARGO', 'AND', 'ITS', 'MEN', 'WILL', 'BE', 'IN', 'YOUR', 'HANDS'] +4852-28330-0015-1545: hyp=['THIS', 'SHIP', 'ITS', 'CARGO', 'AND', 'ITS', 'MEN', 'WILL', 'BE', 'IN', 'YOUR', 'HANDS'] +4852-28330-0016-1546: ref=['YES', 'SIR', 'I', 'THINK', 'I', 'CAN', 'DO', 'IT', 'SAFELY', 'OR', 'I', 'SHOULD', 'NOT', 'TRY', 'SIR'] +4852-28330-0016-1546: hyp=['YES', 'SIR', 'I', 'THINK', 'I', 'CAN', 'DO', 'IT', 'SAFELY', 'OR', 'I', 'SHOULD', 'NOT', 'TRY', 'SIR'] +4852-28330-0017-1547: ref=['CAPTAIN', "BLIZZARD'S", 'ROUND', 'PINK', 'FACE', 'CREASED', 'IN', 'HIS', 'WINNING', 'SMILE'] +4852-28330-0017-1547: hyp=['CAPTAIN', "BLIZZARD'S", 'ROUND', 'PINK', 'FACE', 'CREASED', 'IN', 'ITS', 'WINNING', 'SMILE'] +4852-28330-0018-1548: ref=['HE', 'THEN', 'WENT', 'ON', 'TO', 'DESCRIBE', 'WHAT', 'ELSE', 'WAS', 'TO', 'FOLLOW', 'THE', 'COVERING', 'OF', 'THE', 'SHIP', 'WITH', 'LEAVES', 'TO', 'MAKE', 'IT', 'BLEND', 'WITH', 'ITS', 'SURROUNDINGS'] +4852-28330-0018-1548: hyp=['HE', 'THEN', 'WENT', 'ON', 'TO', 'DESCRIBE', 'WHAT', 'ELSE', 'WAS', 'TO', 'FOLLOW', 'THE', 'COVERING', 'OF', 'THE', 'SHIP', 'WITH', 'LEAVES', 'TO', 'MAKE', 'IT', 'BLEND', 'WITH', 'ITS', 'SURROUNDINGS'] +4852-28330-0019-1549: ref=['CAMOUFLAGE', 'WAS', 'NOT', 'A', 'WORD', 'THE', 'CAPTAIN', 'OR', 'ANYONE', 'ELSE', 'OF', 'HIS', 'TIME', 'YET', 'UNDERSTOOD'] +4852-28330-0019-1549: hyp=['THE', 'CAMOUFLAGE', 'WAS', 'NOT', 'A', 'WORD', 'THE', 'CAPTAIN', 'OR', 'ANYONE', 'ELSE', 'OF', 'HIS', 'TIME', 'YET', 'UNDERSTOOD'] +4852-28330-0020-1550: ref=['WHAT', 'CAN', 'BE', 'SAID', 'DURING', 'THAT', 'TIME', 'SIR', 'CHRIS', 'THOUGHT', 'TO', 'ASK'] +4852-28330-0020-1550: hyp=['WHAT', 'CAN', 'BE', 'SAID', 'DURING', 'THAT', 'TIME', 'SIR', 'CHRIS', 'THOUGHT', 'TO', 'ASK'] +4852-28330-0021-1551: ref=['I', 'AM', 'SOMEWHAT', 'SKILLED', 'IN', 'MEDICAMENTS', 'I', 'HAVE', 'TO', 'BE', 'AS', 'CAPTAIN', 'OF', 'A', 'SHIP', 'AND', 'THE', 'CREW', 'KNOW', 'IT'] +4852-28330-0021-1551: hyp=['I', 'AM', 'SOMEWHAT', 'SKILLED', 'IN', 'MEDICAMENTS', 'I', 'HAVE', 'TO', 'BE', 'AS', 'THE', 'CAPTAIN', 'OF', 'A', 'SHIP', 'AND', 'CREW', 'KNOW', 'IT'] +4852-28330-0022-1552: ref=['I', 'SHALL', 'SAY', 'THAT', 'YOU', 'ARE', 'IN', 'MY', 'OWN', 'CABIN', 'SO', 'THAT', 'I', 'CAN', 'CARE', 'FOR', 'YOU'] +4852-28330-0022-1552: hyp=['I', 'SHALL', 'SAY', 'THAT', 'YOU', 'ARE', 'IN', 'MY', 'OWN', 'CABIN', 'SO', 'THAT', 'I', 'CAN', 'CARE', 'FOR', 'YOU'] +4852-28330-0023-1553: ref=['NOT', 'SINCE', 'HE', 'HAD', 'LEFT', 'MISTER', 'WICKER', 'HAD', 'CHRIS', 'FELT', 'SUCH', 'CONFIDENCE', 'AS', 'HE', 'DID', 'IN', 'THE', 'WORDS', 'AND', 'ACTIONS', 'OF', 'CAPTAIN', 'BLIZZARD'] +4852-28330-0023-1553: hyp=['NOT', 'SINCE', 'HE', 'HAD', 'LEFT', 'MISTER', 'WICKER', 'AND', 'CHRIS', 'FELT', 'SUCH', 'CONFIDENCE', 'AS', 'HE', 'DID', 'IN', 'THE', 'WORDS', 'AND', 'ACTIONS', 'OF', 'CAPTAIN', 'BLIZZARD'] +4852-28330-0024-1554: ref=['HE', 'KNEW', 'NOW', 'THAT', 'HIS', 'ABSENCE', 'FOR', 'AS', 'LONG', 'AS', 'HE', 'HAD', 'TO', 'BE', 'AWAY', 'WOULD', 'BE', 'COVERED', 'UP', 'AND', 'SATISFACTORILY', 'ACCOUNTED', 'FOR'] +4852-28330-0024-1554: hyp=['HE', 'KNEW', 'NOW', 'THAT', 'HIS', 'ABSENCE', 'FOR', 'AS', 'LONG', 'AS', 'HE', 'HAD', 'TO', 'BE', 'AWAY', 'WOULD', 'BE', 'COVERED', 'UP', 'AND', 'SATISFACTIONILY', 'ACCOUNTED', 'FOR'] +4852-28330-0025-1555: ref=['THEIR', 'CONVERSATION', 'HAD', 'TAKEN', 'SOME', 'LITTLE', 'WHILE'] +4852-28330-0025-1555: hyp=['THEIR', 'CONVERSATION', 'HAD', 'TAKEN', 'SOME', 'LITTLE', 'WHILE'] +533-1066-0000-1556: ref=['WHEN', 'CHURCHYARDS', 'YAWN'] +533-1066-0000-1556: hyp=['WHEN', 'CHURCHYARDS', 'YAWN'] +533-1066-0001-1557: ref=['I', 'KNEW', 'WELL', 'ENOUGH', 'THAT', 'HE', 'MIGHT', 'BE', 'CARRIED', 'THOUSANDS', 'OF', 'MILES', 'IN', 'THE', 'BOX', 'CAR', 'LOCKED', 'IN', 'PERHAPS', 'WITHOUT', 'WATER', 'OR', 'FOOD'] +533-1066-0001-1557: hyp=['I', 'KNEW', 'WELL', 'ENOUGH', 'THAT', 'HE', 'MIGHT', 'BE', 'CARRIED', 'THOUSANDS', 'OF', 'MILES', 'IN', 'THE', 'BOX', 'CAR', 'LOCKED', 'IN', 'PERHAPS', 'WITHOUT', 'WATER', 'OR', 'FOOD'] +533-1066-0002-1558: ref=['I', 'AM', 'SURE', 'I', 'KISSED', 'LIDDY', 'AND', 'I', 'HAVE', 'HAD', 'TERRIBLE', 'MOMENTS', 'SINCE', 'WHEN', 'I', 'SEEM', 'TO', 'REMEMBER', 'KISSING', 'MISTER', 'JAMIESON', 'TOO', 'IN', 'THE', 'EXCITEMENT'] +533-1066-0002-1558: hyp=["I'M", 'SURE', 'I', 'KISSED', 'LIDDY', 'AND', "I'VE", 'HAD', 'TERRIBLE', 'MOMENTS', 'SINCE', 'WHEN', 'I', 'SEEMED', 'TO', 'REMEMBER', 'KISSING', 'MISTER', 'JAMIESON', 'TOO', 'IN', 'THE', 'EXCITEMENT'] +533-1066-0003-1559: ref=['FORTUNATELY', 'WARNER', 'AND', 'THE', 'DETECTIVES', 'WERE', 'KEEPING', 'BACHELOR', 'HALL', 'IN', 'THE', 'LODGE'] +533-1066-0003-1559: hyp=['FORTUNATELY', 'WARNER', 'AND', 'THE', 'DETECTIVE', 'WERE', 'KEEPING', 'BACHELOR', 'HALLAND', 'LODGE'] +533-1066-0004-1560: ref=['OUT', 'OF', 'DEFERENCE', 'TO', 'LIDDY', 'THEY', 'WASHED', 'THEIR', 'DISHES', 'ONCE', 'A', 'DAY', 'AND', 'THEY', 'CONCOCTED', 'QUEER', 'MESSES', 'ACCORDING', 'TO', 'THEIR', 'SEVERAL', 'ABILITIES'] +533-1066-0004-1560: hyp=['OUT', 'OF', 'DEFERENCE', 'TO', 'LIDDY', 'THEY', 'WASHED', 'HER', 'DISHES', 'ONCE', 'A', 'DAY', 'AND', 'THEY', 'CONCLUDED', 'QUEER', 'MESSES', 'ACCORDING', 'TO', 'THEIR', 'SEVERAL', 'ABILITIES'] +533-1066-0005-1561: ref=['MISS', 'INNES', 'HE', 'SAID', 'STOPPING', 'ME', 'AS', 'I', 'WAS', 'ABOUT', 'TO', 'GO', 'TO', 'MY', 'ROOM', 'UP', 'STAIRS', 'HOW', 'ARE', 'YOUR', 'NERVES', 'TONIGHT'] +533-1066-0005-1561: hyp=['MISS', 'EANES', 'HE', 'SAID', 'STOPPING', 'ME', 'AS', 'I', 'WAS', 'ABOUT', 'TO', 'GO', 'TO', 'MY', 'ROOM', 'UPSTAIRS', 'HOW', 'ARE', 'YOUR', 'NERVES', 'TO', 'NIGHT'] +533-1066-0006-1562: ref=['I', 'HAVE', 'NONE', 'I', 'SAID', 'HAPPILY'] +533-1066-0006-1562: hyp=['I', 'HAVE', 'NONE', 'I', 'SAID', 'HAPPILY'] +533-1066-0007-1563: ref=['I', 'MEAN', 'HE', 'PERSISTED', 'DO', 'YOU', 'FEEL', 'AS', 'THOUGH', 'YOU', 'COULD', 'GO', 'THROUGH', 'WITH', 'SOMETHING', 'RATHER', 'UNUSUAL'] +533-1066-0007-1563: hyp=['I', 'MEAN', 'HE', 'PERSISTED', 'DO', 'YOU', 'FEEL', 'AS', 'THOUGH', 'YOU', 'COULD', 'GO', 'THROUGH', 'WITH', 'SOMETHING', 'RATHER', 'UNUSUAL'] +533-1066-0008-1564: ref=['THE', 'MOST', 'UNUSUAL', 'THING', 'I', 'CAN', 'THINK', 'OF', 'WOULD', 'BE', 'A', 'PEACEFUL', 'NIGHT'] +533-1066-0008-1564: hyp=['THE', 'MOST', 'UNUSUAL', 'THING', 'I', 'CAN', 'THINK', 'OF', 'WOULD', 'BE', 'A', 'PEACEFUL', 'NIGHT'] +533-1066-0009-1565: ref=['SOMETHING', 'IS', 'GOING', 'TO', 'OCCUR', 'HE', 'SAID'] +533-1066-0009-1565: hyp=['SOMETHING', 'IS', 'GOING', 'TO', 'OCCUR', 'HE', 'SAID'] +533-1066-0010-1566: ref=['PUT', 'ON', 'HEAVY', 'SHOES', 'AND', 'SOME', 'OLD', 'DARK', 'CLOTHES', 'AND', 'MAKE', 'UP', 'YOUR', 'MIND', 'NOT', 'TO', 'BE', 'SURPRISED', 'AT', 'ANYTHING'] +533-1066-0010-1566: hyp=['PUT', 'ON', 'HEAVY', 'SHOES', 'AND', 'SOME', 'OLD', 'DARK', 'CLOTHES', 'AND', 'MAKE', 'UP', 'YOUR', 'MIND', 'NOT', 'TO', 'BE', 'SURPRISED', 'AT', 'ANYTHING'] +533-1066-0011-1567: ref=['LIDDY', 'WAS', 'SLEEPING', 'THE', 'SLEEP', 'OF', 'THE', 'JUST', 'WHEN', 'I', 'WENT', 'UP', 'STAIRS', 'AND', 'I', 'HUNTED', 'OUT', 'MY', 'THINGS', 'CAUTIOUSLY'] +533-1066-0011-1567: hyp=['LIDDY', 'WAS', 'SLEEPING', 'THE', 'SLEEP', 'OF', 'THE', 'JUST', 'WHEN', 'I', 'WENT', 'UPSTAIRS', 'AND', 'I', 'HUNTED', 'OUT', 'MY', 'THINGS', 'CAUTIOUSLY'] +533-1066-0012-1568: ref=['THEY', 'WERE', 'TALKING', 'CONFIDENTIALLY', 'TOGETHER', 'BUT', 'WHEN', 'I', 'CAME', 'DOWN', 'THEY', 'CEASED'] +533-1066-0012-1568: hyp=['THEY', 'WERE', 'TALKING', 'CONFIDENTLY', 'TOGETHER', 'BUT', 'WHEN', 'I', 'CAME', 'DOWN', 'THEY', 'CEASED'] +533-1066-0013-1569: ref=['THERE', 'WERE', 'A', 'FEW', 'PREPARATIONS', 'TO', 'BE', 'MADE', 'THE', 'LOCKS', 'TO', 'BE', 'GONE', 'OVER', 'WINTERS', 'TO', 'BE', 'INSTRUCTED', 'AS', 'TO', 'RENEWED', 'VIGILANCE', 'AND', 'THEN', 'AFTER', 'EXTINGUISHING', 'THE', 'HALL', 'LIGHT', 'WE', 'CREPT', 'IN', 'THE', 'DARKNESS', 'THROUGH', 'THE', 'FRONT', 'DOOR', 'AND', 'INTO', 'THE', 'NIGHT'] +533-1066-0013-1569: hyp=['THERE', 'WERE', 'A', 'FEW', 'PREPARATIONS', 'TO', 'BE', 'MADE', 'LOGS', 'TO', 'BE', 'GONE', 'OVER', 'WINTERS', 'TO', 'BE', 'INSTRUCTED', 'AS', 'TO', 'RENEWED', 'VIGILANCE', 'AND', 'THEN', 'AFTER', 'EXTINGUISHING', 'THE', 'WHOLE', 'LIGHT', 'WE', 'CREPT', 'IN', 'THE', 'DARKNESS', 'THROUGH', 'THE', 'FRONT', 'DOOR', 'AND', 'INTO', 'THE', 'NIGHT'] +533-1066-0014-1570: ref=['I', 'ASKED', 'NO', 'QUESTIONS'] +533-1066-0014-1570: hyp=['I', 'ASKED', 'NO', 'QUESTIONS'] +533-1066-0015-1571: ref=['ONCE', 'ONLY', 'SOMEBODY', 'SPOKE', 'AND', 'THEN', 'IT', 'WAS', 'AN', 'EMPHATIC', 'BIT', 'OF', 'PROFANITY', 'FROM', 'DOCTOR', 'STEWART', 'WHEN', 'HE', 'RAN', 'INTO', 'A', 'WIRE', 'FENCE'] +533-1066-0015-1571: hyp=['ONCE', 'ONLY', 'SOMEBODY', 'SPOKE', 'AND', 'THEN', 'IT', 'WAS', 'AN', 'EMPHATIC', 'BIT', 'OF', 'PROFANITY', 'FROM', 'DOCTOR', 'STEWART', 'WHEN', 'HE', 'RAN', 'INTO', 'A', 'WIRE', 'FENCE'] +533-1066-0016-1572: ref=['I', 'HARDLY', 'KNOW', 'WHAT', 'I', 'EXPECTED'] +533-1066-0016-1572: hyp=['I', 'HARDLY', 'KNOW', 'WHAT', 'I', 'EXPECTED'] +533-1066-0017-1573: ref=['THE', 'DOCTOR', 'WAS', 'PUFFING', 'SOMEWHAT', 'WHEN', 'WE', 'FINALLY', 'CAME', 'TO', 'A', 'HALT'] +533-1066-0017-1573: hyp=['THE', 'DOCTOR', 'WAS', 'PUFFING', 'SOMEWHAT', 'WHEN', 'WE', 'FINALLY', 'CAME', 'TO', 'A', 'HALT'] +533-1066-0018-1574: ref=['I', 'CONFESS', 'THAT', 'JUST', 'AT', 'THAT', 'MINUTE', 'EVEN', 'SUNNYSIDE', 'SEEMED', 'A', 'CHEERFUL', 'SPOT'] +533-1066-0018-1574: hyp=['I', 'CONFESS', 'THAT', 'JUST', 'AT', 'THAT', 'MINUTE', 'EVEN', 'SUNNYSIDE', 'SEEMED', 'A', 'CHEERFUL', 'SPOT'] +533-1066-0019-1575: ref=['IN', 'SPITE', 'OF', 'MYSELF', 'I', 'DREW', 'MY', 'BREATH', 'IN', 'SHARPLY'] +533-1066-0019-1575: hyp=['IN', 'SPITE', 'OF', 'MYSELF', 'I', 'DREW', 'MY', 'BREATH', 'IN', 'SHARPLY'] +533-1066-0020-1576: ref=['IT', 'WAS', 'ALEX', 'ARMED', 'WITH', 'TWO', 'LONG', 'HANDLED', 'SPADES'] +533-1066-0020-1576: hyp=['IT', 'WAS', 'ALEX', 'ARMED', 'WITH', 'TWO', 'LONG', 'HANDLED', 'SPADES'] +533-1066-0021-1577: ref=['THE', 'DOCTOR', 'KEPT', 'A', 'KEEN', 'LOOKOUT', 'BUT', 'NO', 'ONE', 'APPEARED'] +533-1066-0021-1577: hyp=['THE', 'DOCTOR', 'KEPT', 'A', 'KEEN', 'LOOK', 'OUT', 'BUT', 'NO', 'ONE', 'APPEARED'] +533-1066-0022-1578: ref=["THERE'S", 'ONE', 'THING', 'SURE', "I'LL", 'NOT', 'BE', 'SUSPECTED', 'OF', 'COMPLICITY'] +533-1066-0022-1578: hyp=["THERE'S", 'ONE', 'THING', 'SURE', "I'LL", 'NOT', 'BE', 'SUSPECTED', 'OF', 'COMPLICITY'] +533-1066-0023-1579: ref=['A', 'DOCTOR', 'IS', 'GENERALLY', 'SUPPOSED', 'TO', 'BE', 'HANDIER', 'AT', 'BURYING', 'FOLKS', 'THAN', 'AT', 'DIGGING', 'THEM', 'UP'] +533-1066-0023-1579: hyp=['A', 'DOCTOR', 'IS', 'GENERALLY', 'SUPPOSED', 'TO', 'BE', 'A', 'HANDIER', 'AT', 'BURYING', 'FOLKS', 'THAN', 'AT', 'DIGGING', 'THEM', 'UP'] +533-1066-0024-1580: ref=['I', 'HELD', 'ON', 'TO', 'HIM', 'FRANTICALLY', 'AND', 'SOMEHOW', 'I', 'GOT', 'THERE', 'AND', 'LOOKED', 'DOWN'] +533-1066-0024-1580: hyp=['I', 'HELD', 'ON', 'TO', 'HIM', 'FRANTICALLY', 'AND', 'SOMEHOW', 'I', 'GOT', 'THERE', 'AND', 'LOOKED', 'DOWN'] +533-131556-0000-1581: ref=['BUT', 'HOW', 'AM', 'I', 'TO', 'GET', 'OVER', 'THE', 'TEN', 'OR', 'TWELVE', 'DAYS', 'THAT', 'MUST', 'YET', 'ELAPSE', 'BEFORE', 'THEY', 'GO'] +533-131556-0000-1581: hyp=['BUT', 'HOW', 'AM', 'I', 'TO', 'GET', 'OVER', 'THE', 'TEN', 'OR', 'TWELVE', 'DAYS', 'THAT', 'MUST', 'YET', 'ELAPSE', 'BEFORE', 'THEY', 'GO'] +533-131556-0001-1582: ref=['FOR', 'NONE', 'COULD', 'INJURE', 'ME', 'AS', 'HE', 'HAS', 'DONE', 'OH'] +533-131556-0001-1582: hyp=['FOR', 'NONE', 'COULD', 'ENDURE', 'ME', 'AS', 'HE', 'HAS', 'DONE', 'OH'] +533-131556-0002-1583: ref=['THE', 'WORD', 'STARES', 'ME', 'IN', 'THE', 'FACE', 'LIKE', 'A', 'GUILTY', 'CONFESSION', 'BUT', 'IT', 'IS', 'TRUE', 'I', 'HATE', 'HIM', 'I', 'HATE', 'HIM'] +533-131556-0002-1583: hyp=['THE', 'WORD', 'STARES', 'ME', 'IN', 'THE', 'FACE', 'LIKE', 'A', 'GUILTY', 'CONFESSION', 'BUT', 'IT', 'IS', 'TRUE', 'I', 'HATE', 'HIM', 'I', 'HATE', 'HIM'] +533-131556-0003-1584: ref=['I', 'SOMETIMES', 'THINK', 'I', 'OUGHT', 'TO', 'GIVE', 'HIM', 'CREDIT', 'FOR', 'THE', 'GOOD', 'FEELING', 'HE', 'SIMULATES', 'SO', 'WELL', 'AND', 'THEN', 'AGAIN', 'I', 'THINK', 'IT', 'IS', 'MY', 'DUTY', 'TO', 'SUSPECT', 'HIM', 'UNDER', 'THE', 'PECULIAR', 'CIRCUMSTANCES', 'IN', 'WHICH', 'I', 'AM', 'PLACED'] +533-131556-0003-1584: hyp=['I', 'SOMETIMES', 'THINK', 'I', 'OUGHT', 'TO', 'GIVE', 'HIM', 'CREDIT', 'FOR', 'THE', 'GOOD', 'FEELING', 'HE', 'SIMULATES', 'SO', 'WELL', 'AND', 'THEN', 'AGAIN', 'I', 'THINK', 'IT', 'IS', 'MY', 'DUTY', 'TO', 'SUSPECT', 'HIM', 'UNDER', 'THE', 'PECULIAR', 'CIRCUMSTANCES', 'IN', 'WHICH', 'I', 'AM', 'PLACED'] +533-131556-0004-1585: ref=['I', 'HAVE', 'DONE', 'WELL', 'TO', 'RECORD', 'THEM', 'SO', 'MINUTELY'] +533-131556-0004-1585: hyp=['I', 'HAVE', 'DONE', 'WELL', 'TO', 'RECORD', 'THEM', 'SO', 'MINUTELY'] +533-131556-0005-1586: ref=['THEY', 'HAD', 'BETAKEN', 'THEMSELVES', 'TO', 'THEIR', 'WORK', 'I', 'LESS', 'TO', 'DIVERT', 'MY', 'MIND', 'THAN', 'TO', 'DEPRECATE', 'CONVERSATION', 'HAD', 'PROVIDED', 'MYSELF', 'WITH', 'A', 'BOOK'] +533-131556-0005-1586: hyp=['THEY', 'HAD', 'BETAKEN', 'THEMSELVES', 'TO', 'THEIR', 'WORK', 'I', 'LESS', 'TO', 'DIVERT', 'MY', 'MIND', 'THAN', 'TO', 'DEPRECATE', 'CONVERSATION', 'I', 'PROVIDED', 'MYSELF', 'WITH', 'A', 'BOOK'] +533-131556-0006-1587: ref=['I', 'AM', 'TOO', 'WELL', 'ACQUAINTED', 'WITH', 'YOUR', 'CHARACTER', 'AND', 'CONDUCT', 'TO', 'FEEL', 'ANY', 'REAL', 'FRIENDSHIP', 'FOR', 'YOU', 'AND', 'AS', 'I', 'AM', 'WITHOUT', 'YOUR', 'TALENT', 'FOR', 'DISSIMULATION', 'I', 'CANNOT', 'ASSUME', 'THE', 'APPEARANCE', 'OF', 'IT'] +533-131556-0006-1587: hyp=['I', 'AM', 'TOO', 'WELL', 'ACQUAINTED', 'WITH', 'YOUR', 'CHARACTER', 'AND', 'CONDUCT', 'TO', 'FEEL', 'ANY', 'REAL', 'FRIENDSHIP', 'FOR', 'YOU', 'AND', 'AS', 'I', 'AM', 'WITHOUT', 'YOUR', 'TALENT', 'FOR', 'DISSIMULATION', 'I', 'CANNOT', 'ASSUME', 'THE', 'APPEARANCE', 'OF', 'IT'] +533-131556-0007-1588: ref=['UPON', 'PERUSING', 'THIS', 'SHE', 'TURNED', 'SCARLET', 'AND', 'BIT', 'HER', 'LIP'] +533-131556-0007-1588: hyp=['UP', 'IN', 'PERUSING', 'THIS', 'SHE', 'TURNED', 'SCARLET', 'AND', 'BIT', 'HER', 'LIP'] +533-131556-0008-1589: ref=['YOU', 'MAY', 'GO', 'MILICENT', 'AND', "SHE'LL", 'FOLLOW', 'IN', 'A', 'WHILE', 'MILICENT', 'WENT'] +533-131556-0008-1589: hyp=['YOU', 'MAY', 'GO', 'MILLICENT', 'AND', "YOU'LL", 'FOLLOW', 'IN', 'A', 'WHILE', 'MELLICENT', 'WENT'] +533-131556-0009-1590: ref=['WILL', 'YOU', 'OBLIGE', 'ME', 'HELEN', 'CONTINUED', 'SHE'] +533-131556-0009-1590: hyp=['WILL', 'YOU', 'OBLIGE', 'ME', 'ELLEN', 'CONTINUED', 'SHE'] +533-131556-0010-1591: ref=['AH', 'YOU', 'ARE', 'SUSPICIOUS'] +533-131556-0010-1591: hyp=['AH', 'YOU', 'ARE', 'SUSPICIOUS'] +533-131556-0011-1592: ref=['IF', 'I', 'WERE', 'SUSPICIOUS', 'I', 'REPLIED', 'I', 'SHOULD', 'HAVE', 'DISCOVERED', 'YOUR', 'INFAMY', 'LONG', 'BEFORE'] +533-131556-0011-1592: hyp=['IF', 'I', 'WERE', 'SUSPICIOUS', 'I', 'REPLIED', 'I', 'SHOULD', 'HAVE', 'DISCOVERED', 'YOUR', 'INFAMY', 'LONG', 'BEFORE'] +533-131556-0012-1593: ref=['I', 'ENJOY', 'A', 'MOONLIGHT', 'RAMBLE', 'AS', 'WELL', 'AS', 'YOU', 'I', 'ANSWERED', 'STEADILY', 'FIXING', 'MY', 'EYES', 'UPON', 'HER', 'AND', 'THE', 'SHRUBBERY', 'HAPPENS', 'TO', 'BE', 'ONE', 'OF', 'MY', 'FAVOURITE', 'RESORTS'] +533-131556-0012-1593: hyp=['I', 'ENJOY', 'YOUR', 'MOONLIGHT', 'RAMBLE', 'AS', 'WELL', 'AS', 'YOU', 'I', 'ANSWERED', 'STEADILY', 'FIXING', 'MY', 'EYES', 'UPON', 'HER', 'AND', 'THE', 'SHRUBBERY', 'HAPPENS', 'TO', 'BE', 'ONE', 'OF', 'MY', 'FAVOURITE', 'RESORTS'] +533-131556-0013-1594: ref=['SHE', 'COLOURED', 'AGAIN', 'EXCESSIVELY', 'AND', 'REMAINED', 'SILENT', 'PRESSING', 'HER', 'FINGER', 'AGAINST', 'HER', 'TEETH', 'AND', 'GAZING', 'INTO', 'THE', 'FIRE'] +533-131556-0013-1594: hyp=['SHE', 'COLOURED', 'AGAIN', 'EXCESSIVELY', 'AND', 'REMAINED', 'SILENT', 'PRESSING', 'HER', 'FINGER', 'AGAINST', 'HER', 'CHEEK', 'AND', 'GAZING', 'INTO', 'THE', 'FIRE'] +533-131556-0014-1595: ref=['I', 'WATCHED', 'HER', 'A', 'FEW', 'MOMENTS', 'WITH', 'A', 'FEELING', 'OF', 'MALEVOLENT', 'GRATIFICATION', 'THEN', 'MOVING', 'TOWARDS', 'THE', 'DOOR', 'I', 'CALMLY', 'ASKED', 'IF', 'SHE', 'HAD', 'ANYTHING', 'MORE', 'TO', 'SAY'] +533-131556-0014-1595: hyp=['I', 'WATCHED', 'HER', 'A', 'FEW', 'MOMENTS', 'WITH', 'A', 'FEELING', 'OF', 'MALEVOLENT', 'GRATIFICATION', 'THEN', 'MOVING', 'TOWARDS', 'THE', 'DOOR', 'I', 'CALMLY', 'ASKED', 'IF', 'SHE', 'HAD', 'ANYTHING', 'MORE', 'TO', 'SAY'] +533-131556-0015-1596: ref=['YES', 'YES'] +533-131556-0015-1596: hyp=['YES', 'YES'] +533-131556-0016-1597: ref=['SUPPOSE', 'I', 'DO'] +533-131556-0016-1597: hyp=['SUPPOSE', 'I', 'DO'] +533-131556-0017-1598: ref=['SHE', 'PAUSED', 'IN', 'EVIDENT', 'DISCONCERTION', 'AND', 'PERPLEXITY', 'MINGLED', 'WITH', 'ANGER', 'SHE', 'DARED', 'NOT', 'SHOW'] +533-131556-0017-1598: hyp=['SHE', 'PAUSED', 'IN', 'EVIDENT', 'DISCONCERTION', 'AND', 'PERPLEXITY', 'MINGLED', 'WITH', 'ANGER', 'SHE', 'DARED', 'NOT', 'SHOW'] +533-131556-0018-1599: ref=['I', 'CANNOT', 'RENOUNCE', 'WHAT', 'IS', 'DEARER', 'THAN', 'LIFE', 'SHE', 'MUTTERED', 'IN', 'A', 'LOW', 'HURRIED', 'TONE'] +533-131556-0018-1599: hyp=['I', 'CANNOT', 'RENOUNCE', 'WHAT', 'IS', 'DEARER', 'THAN', 'LIFE', 'SHE', 'MUTTERED', 'IN', 'A', 'LOW', 'HURRIED', 'TONE'] +533-131556-0019-1600: ref=['IF', 'YOU', 'ARE', 'GENEROUS', 'HERE', 'IS', 'A', 'FITTING', 'OPPORTUNITY', 'FOR', 'THE', 'EXERCISE', 'OF', 'YOUR', 'MAGNANIMITY', 'IF', 'YOU', 'ARE', 'PROUD', 'HERE', 'AM', 'I', 'YOUR', 'RIVAL', 'READY', 'TO', 'ACKNOWLEDGE', 'MYSELF', 'YOUR', 'DEBTOR', 'FOR', 'AN', 'ACT', 'OF', 'THE', 'MOST', 'NOBLE', 'FORBEARANCE'] +533-131556-0019-1600: hyp=['IF', 'YOU', 'ARE', 'GENEROUS', 'HERE', 'IS', 'A', 'FITTING', 'OPPORTUNITY', 'FOR', 'THE', 'EXERCISE', 'OF', 'YOUR', 'MAGNANIMITY', 'IF', 'YOU', 'ARE', 'PROUD', 'HERE', 'AM', 'I', 'YOUR', 'RIVAL', 'RAY', 'TO', 'PRONOUNCE', 'MYSELF', 'YOUR', 'ADEPTOR', 'FOR', 'AN', 'ACT', 'OF', 'THE', 'MOST', 'NOBLE', 'FORBEARANCE'] +533-131556-0020-1601: ref=['I', 'SHALL', 'NOT', 'TELL', 'HIM'] +533-131556-0020-1601: hyp=['I', 'SHALL', 'NOT', 'TELL', 'HIM'] +533-131556-0021-1602: ref=['GIVE', 'ME', 'NO', 'THANKS', 'IT', 'IS', 'NOT', 'FOR', 'YOUR', 'SAKE', 'THAT', 'I', 'REFRAIN'] +533-131556-0021-1602: hyp=['GIVE', 'ME', 'NO', 'THANKS', 'IT', 'IS', 'NOT', 'FOR', 'YOUR', 'SAKE', 'THAT', 'I', 'REFRAIN'] +533-131556-0022-1603: ref=['AND', 'MILICENT', 'WILL', 'YOU', 'TELL', 'HER'] +533-131556-0022-1603: hyp=['AND', 'ME', 'LISTEN', 'WILL', 'YOU', 'TELL', 'HER'] +533-131556-0023-1604: ref=['I', 'WOULD', 'NOT', 'FOR', 'MUCH', 'THAT', 'SHE', 'SHOULD', 'KNOW', 'THE', 'INFAMY', 'AND', 'DISGRACE', 'OF', 'HER', 'RELATION'] +533-131556-0023-1604: hyp=['I', 'WOULD', 'NOT', 'FOR', 'MUCH', 'THAT', 'SHE', 'SHOULD', 'KNOW', 'THE', 'INFAMY', 'AND', 'DISGRACE', 'OF', 'HER', 'RELATION'] +533-131556-0024-1605: ref=['YOU', 'USE', 'HARD', 'WORDS', 'MISSUS', 'HUNTINGDON', 'BUT', 'I', 'CAN', 'PARDON', 'YOU'] +533-131556-0024-1605: hyp=['YOU', 'USE', 'HARD', 'WORDS', 'MISSUS', 'HUNTINGDON', 'BUT', 'I', 'CAN', 'PARDON', 'YOU'] +533-131556-0025-1606: ref=['HOW', 'DARE', 'YOU', 'MENTION', 'HIS', 'NAME', 'TO', 'ME'] +533-131556-0025-1606: hyp=['HOW', 'DARE', 'YOU', 'MENTION', 'HIS', 'NAME', 'TO', 'ME'] +533-131562-0000-1607: ref=['IT', 'SEEMS', 'VERY', 'INTERESTING', 'LOVE', 'SAID', 'HE', 'LIFTING', 'HIS', 'HEAD', 'AND', 'TURNING', 'TO', 'WHERE', 'I', 'STOOD', 'WRINGING', 'MY', 'HANDS', 'IN', 'SILENT', 'RAGE', 'AND', 'ANGUISH', 'BUT', "IT'S", 'RATHER', 'LONG', "I'LL", 'LOOK', 'AT', 'IT', 'SOME', 'OTHER', 'TIME', 'AND', 'MEANWHILE', "I'LL", 'TROUBLE', 'YOU', 'FOR', 'YOUR', 'KEYS', 'MY', 'DEAR', 'WHAT', 'KEYS'] +533-131562-0000-1607: hyp=['IT', 'SEEMS', 'VERY', 'INTERESTING', 'LOVE', 'SAID', 'HE', 'LIFTING', 'HIS', 'HEAD', 'AND', 'TURNING', 'TO', 'WHERE', 'I', 'STOOD', 'WRINGING', 'MY', 'HAND', 'IN', 'SILENT', 'RAGE', 'AND', 'ANGUISH', 'BUT', "IT'S", 'RATHER', 'LONG', "I'LL", 'LOOK', 'AT', 'IT', 'SOME', 'OTHER', 'TIME', 'AND', 'MEANWHILE', "I'LL", 'TROUBLE', 'YOU', 'FOR', 'YOUR', 'KEYS', 'MY', 'DEAR', 'WHAT', 'KEYS'] +533-131562-0001-1608: ref=['THE', 'KEYS', 'OF', 'YOUR', 'CABINET', 'DESK', 'DRAWERS', 'AND', 'WHATEVER', 'ELSE', 'YOU', 'POSSESS', 'SAID', 'HE', 'RISING', 'AND', 'HOLDING', 'OUT', 'HIS', 'HAND'] +533-131562-0001-1608: hyp=['THE', 'KEYS', 'OF', 'YOUR', 'CABINET', 'DESK', 'DRAWER', 'AND', 'WHATEVER', 'ELSE', 'YOU', 'POSSESS', 'SAID', 'HE', 'RISING', 'AND', 'HOLDING', 'OUT', 'HIS', 'HAND'] +533-131562-0002-1609: ref=['THE', 'KEY', 'OF', 'MY', 'DESK', 'IN', 'FACT', 'WAS', 'AT', 'THAT', 'MOMENT', 'IN', 'THE', 'LOCK', 'AND', 'THE', 'OTHERS', 'WERE', 'ATTACHED', 'TO', 'IT'] +533-131562-0002-1609: hyp=['THE', 'KEY', 'OF', 'MY', 'DESK', 'IN', 'FACT', 'WAS', 'AT', 'THAT', 'MOMENT', 'IN', 'LOCK', 'AND', 'THE', 'OTHERS', 'WERE', 'ATTACHED', 'TO', 'IT'] +533-131562-0003-1610: ref=['NOW', 'THEN', 'SNEERED', 'HE', 'WE', 'MUST', 'HAVE', 'A', 'CONFISCATION', 'OF', 'PROPERTY'] +533-131562-0003-1610: hyp=['NOW', 'THEN', 'SNEERED', 'HE', 'WE', 'MUST', 'HAVE', 'A', 'CONFISCATION', 'OF', 'PROPERTY'] +533-131562-0004-1611: ref=['AND', 'PUTTING', 'THE', 'KEYS', 'INTO', 'HIS', 'POCKET', 'HE', 'WALKED', 'INTO', 'THE', 'LIBRARY'] +533-131562-0004-1611: hyp=['AND', 'PUTTING', 'THE', 'KEYS', 'INTO', 'HIS', 'POCKET', 'HE', 'WALKED', 'INTO', 'THE', 'LIBRARY'] +533-131562-0005-1612: ref=['THAT', 'AND', 'ALL', 'REPLIED', 'THE', 'MASTER', 'AND', 'THE', 'THINGS', 'WERE', 'CLEARED', 'AWAY'] +533-131562-0005-1612: hyp=['THAT', 'AND', 'ALL', 'REPLIED', 'THE', 'MASTER', 'AND', 'THE', 'THINGS', 'WERE', 'CLEARED', 'AWAY'] +533-131562-0006-1613: ref=['MISTER', 'HUNTINGDON', 'THEN', 'WENT', 'UP', 'STAIRS'] +533-131562-0006-1613: hyp=['MISTER', 'HUNTINGDON', 'THEN', 'WENT', 'UPSTAIRS'] +533-131562-0007-1614: ref=['MUTTERED', 'HE', 'STARTING', 'BACK', "SHE'S", 'THE', 'VERY', 'DEVIL', 'FOR', 'SPITE'] +533-131562-0007-1614: hyp=['MUTTERED', 'HE', 'STARTING', 'BACK', "SHE'S", 'THE', 'VERY', 'DEVIL', 'FOR', 'SPITE'] +533-131562-0008-1615: ref=['I', "DIDN'T", 'SAY', "I'D", 'BROKEN', 'IT', 'DID', 'I', 'RETURNED', 'HE'] +533-131562-0008-1615: hyp=['I', "DIDN'T", 'SAY', 'I', 'BROKEN', 'IT', 'DID', 'I', 'RETURNED', 'HE'] +533-131562-0009-1616: ref=['I', 'SHALL', 'PUT', 'YOU', 'UPON', 'A', 'SMALL', 'MONTHLY', 'ALLOWANCE', 'IN', 'FUTURE', 'FOR', 'YOUR', 'OWN', 'PRIVATE', 'EXPENSES', 'AND', 'YOU', "NEEDN'T", 'TROUBLE', 'YOURSELF', 'ANY', 'MORE', 'ABOUT', 'MY', 'CONCERNS', 'I', 'SHALL', 'LOOK', 'OUT', 'FOR', 'A', 'STEWARD', 'MY', 'DEAR', 'I', "WON'T", 'EXPOSE', 'YOU', 'TO', 'THE', 'TEMPTATION'] +533-131562-0009-1616: hyp=['I', 'SHALL', 'PUT', 'YOU', 'UPON', 'A', 'SMALL', 'MONTHLY', 'ALLOWANCE', 'IN', 'FUTURE', 'FOR', 'YOUR', 'OWN', 'PRIVATE', 'EXPENSES', 'AND', 'YOU', "NEEDN'T", 'TROUBLE', 'YOURSELF', 'ANY', 'MORE', 'ABOUT', 'MY', 'CONCERNS', 'I', 'SHALL', 'LOOK', 'OUT', 'FOR', 'A', 'STEWARD', 'MY', 'DEAR', 'I', "WON'T", 'EXPOSE', 'YOU', 'TO', 'TEMPTATION'] +533-131562-0010-1617: ref=['AND', 'AS', 'FOR', 'THE', 'HOUSEHOLD', 'MATTERS', 'MISSUS', 'GREAVES', 'MUST', 'BE', 'VERY', 'PARTICULAR', 'IN', 'KEEPING', 'HER', 'ACCOUNTS', 'WE', 'MUST', 'GO', 'UPON', 'AN', 'ENTIRELY', 'NEW', 'PLAN'] +533-131562-0010-1617: hyp=['AND', 'AS', 'FOR', 'THE', 'HOUSEHOLD', 'MATTERS', 'MISSUS', 'GRIEBS', 'MUST', 'BE', 'VERY', 'PARTICULAR', 'IN', 'KEEPING', 'HER', 'ACCOUNTS', 'WE', 'MUST', 'GO', 'UP', 'ON', 'AN', 'ENTIRELY', 'NEW', 'PLAN'] +533-131562-0011-1618: ref=['WHAT', 'GREAT', 'DISCOVERY', 'HAVE', 'YOU', 'MADE', 'NOW', 'MISTER', 'HUNTINGDON'] +533-131562-0011-1618: hyp=['WHAT', 'GREAT', 'DISCOVERY', 'HAVE', 'YOU', 'MADE', 'NOW', 'MISTER', 'HINTING', 'AN'] +533-131562-0012-1619: ref=['HAVE', 'I', 'ATTEMPTED', 'TO', 'DEFRAUD', 'YOU'] +533-131562-0012-1619: hyp=['IF', 'I', 'ATTEMPTED', 'TO', 'DEFRAUD', 'YOU'] +533-131562-0013-1620: ref=['NOT', 'IN', 'MONEY', 'MATTERS', 'EXACTLY', 'IT', 'SEEMS', 'BUT', "IT'S", 'BEST', 'TO', 'KEEP', 'OUT', 'OF', 'THE', 'WAY', 'OF', 'TEMPTATION'] +533-131562-0013-1620: hyp=['NOT', 'IN', 'MONEY', 'MATTERS', 'EXACTLY', 'IT', 'SEEMS', 'BUT', "IT'S", 'BEST', 'TO', 'KEEP', 'OUT', 'OF', 'THE', 'WAY', 'OF', 'TEMPTATION'] +533-131562-0014-1621: ref=['HERE', 'BENSON', 'ENTERED', 'WITH', 'THE', 'CANDLES', 'AND', 'THERE', 'FOLLOWED', 'A', 'BRIEF', 'INTERVAL', 'OF', 'SILENCE', 'I', 'SITTING', 'STILL', 'IN', 'MY', 'CHAIR', 'AND', 'HE', 'STANDING', 'WITH', 'HIS', 'BACK', 'TO', 'THE', 'FIRE', 'SILENTLY', 'TRIUMPHING', 'IN', 'MY', 'DESPAIR'] +533-131562-0014-1621: hyp=['HERE', 'BASSOM', 'ENTERED', 'THE', 'CANDLES', 'AND', 'THERE', 'FOLLOWED', 'A', 'BRIEF', 'INTERVAL', 'OF', 'SILENCE', 'I', 'SITTING', 'STILL', 'IN', 'MY', 'CHAIR', 'AND', 'HE', 'STANDING', 'WITH', 'HIS', 'BACK', 'TO', 'THE', 'FIRE', 'SILENTLY', 'TRIUMPHING', 'IN', 'MY', 'DESPAIR'] +533-131562-0015-1622: ref=['I', 'KNOW', 'THAT', 'DAY', 'AFTER', 'DAY', 'SUCH', 'FEELINGS', 'WILL', 'RETURN', 'UPON', 'ME'] +533-131562-0015-1622: hyp=['I', 'KNOW', 'THAT', 'DAY', 'AFTER', 'DAY', 'SUCH', 'FEELINGS', 'WILL', 'RETURN', 'UPON', 'ME'] +533-131562-0016-1623: ref=['I', 'TRY', 'TO', 'LOOK', 'TO', 'HIM', 'AND', 'RAISE', 'MY', 'HEART', 'TO', 'HEAVEN', 'BUT', 'IT', 'WILL', 'CLEAVE', 'TO', 'THE', 'DUST'] +533-131562-0016-1623: hyp=['I', 'TRY', 'TO', 'LOOK', 'TO', 'HIM', 'AND', 'RAISE', 'MY', 'HEART', 'TO', 'HEAVEN', 'BUT', 'IT', 'WILL', 'CLEAVE', 'TO', 'THE', 'DUST'] +533-131564-0000-1624: ref=['VAIN', 'HOPE', 'I', 'FEAR'] +533-131564-0000-1624: hyp=['VAIN', 'HOPE', 'I', 'FEAR'] +533-131564-0001-1625: ref=['MISTER', 'AND', 'MISSUS', 'HATTERSLEY', 'HAVE', 'BEEN', 'STAYING', 'AT', 'THE', 'GROVE', 'A', 'FORTNIGHT', 'AND', 'AS', 'MISTER', 'HARGRAVE', 'IS', 'STILL', 'ABSENT', 'AND', 'THE', 'WEATHER', 'WAS', 'REMARKABLY', 'FINE', 'I', 'NEVER', 'PASSED', 'A', 'DAY', 'WITHOUT', 'SEEING', 'MY', 'TWO', 'FRIENDS', 'MILICENT', 'AND', 'ESTHER', 'EITHER', 'THERE', 'OR', 'HERE'] +533-131564-0001-1625: hyp=['MISTER', 'AND', 'MISSUS', 'HAUTTERSLEY', 'HAVE', 'BEEN', 'SEEING', 'AT', 'THE', 'GROVE', 'A', 'FORTNIGHT', 'AND', 'AS', 'MISSUS', 'HARGRAVE', 'IS', 'STILL', 'ABSENT', 'AND', 'THE', 'WEATHER', 'WAS', 'REMARKABLY', 'FINE', 'I', 'NEVER', 'PASS', 'THE', 'DAY', 'WITHOUT', 'SEEING', 'MY', 'TWO', 'FRIENDS', 'MILICON', 'AND', 'ESTHER', 'EITHER', 'THERE', 'OR', 'HERE'] +533-131564-0002-1626: ref=['NO', 'UNLESS', 'YOU', 'CAN', 'TELL', 'ME', 'WHEN', 'TO', 'EXPECT', 'HIM', 'HOME'] +533-131564-0002-1626: hyp=['NO', 'UNLESS', 'YOU', 'CAN', 'TELL', 'ME', 'WHEN', 'TO', 'EXPECT', 'HIM', 'HOME'] +533-131564-0003-1627: ref=['I', "CAN'T", 'YOU', "DON'T", 'WANT', 'HIM', 'DO', 'YOU'] +533-131564-0003-1627: hyp=['I', "CAN'T", 'YOU', "DON'T", 'WANT', 'HIM', 'DO', 'YOU'] +533-131564-0004-1628: ref=['IT', 'IS', 'A', 'RESOLUTION', 'YOU', 'OUGHT', 'TO', 'HAVE', 'FORMED', 'LONG', 'AGO'] +533-131564-0004-1628: hyp=['IT', 'IS', 'A', 'RESOLUTION', 'YOU', 'HAVE', 'REFORMED', 'LONG', 'AGO'] +533-131564-0005-1629: ref=['WE', 'ALL', 'HAVE', 'A', 'BIT', 'OF', 'A', 'LIKING', 'FOR', 'HIM', 'AT', 'THE', 'BOTTOM', 'OF', 'OUR', 'HEARTS', 'THOUGH', 'WE', "CAN'T", 'RESPECT', 'HIM'] +533-131564-0005-1629: hyp=['WE', 'ALL', 'HAVE', 'A', 'BIT', 'OF', 'A', 'LIKING', 'FOR', 'HIM', 'AT', 'THE', 'BOTTOM', 'OF', 'OUR', 'HEARTS', 'THOUGH', 'WE', "CAN'T", 'RESPECT', 'HIM'] +533-131564-0006-1630: ref=['NO', "I'D", 'RATHER', 'BE', 'LIKE', 'MYSELF', 'BAD', 'AS', 'I', 'AM'] +533-131564-0006-1630: hyp=['NO', "I'D", 'RATHER', 'BE', 'LIKE', 'MYSELF', 'THAN', 'AS', 'I', 'AM'] +533-131564-0007-1631: ref=['NEVER', 'MIND', 'MY', 'PLAIN', 'SPEAKING', 'SAID', 'I', 'IT', 'IS', 'FROM', 'THE', 'BEST', 'OF', 'MOTIVES'] +533-131564-0007-1631: hyp=['NEVER', 'MIND', 'MY', 'PLAIN', 'SPEAKING', 'SAID', 'I', 'IT', 'IS', 'FROM', 'THE', 'BEST', 'OF', 'MOTIVES'] +533-131564-0008-1632: ref=['BUT', 'TELL', 'ME', 'SHOULD', 'YOU', 'WISH', 'YOUR', 'SONS', 'TO', 'BE', 'LIKE', 'MISTER', 'HUNTINGDON', 'OR', 'EVEN', 'LIKE', 'YOURSELF'] +533-131564-0008-1632: hyp=['BUT', 'TELL', 'ME', 'SHOULD', 'YOU', 'WISH', 'YOUR', 'SONS', 'TO', 'BE', 'LIKE', 'MISTER', 'HUNTINGDON', 'OR', 'EVEN', 'LIKE', 'YOURSELF'] +533-131564-0009-1633: ref=['OH', 'NO', 'I', "COULDN'T", 'STAND', 'THAT'] +533-131564-0009-1633: hyp=['OH', 'NO', 'I', "COULDN'T", 'STAND', 'THAT'] +533-131564-0010-1634: ref=['FIRE', 'AND', 'FURY'] +533-131564-0010-1634: hyp=['FIRE', 'AND', 'FURY'] +533-131564-0011-1635: ref=['NOW', "DON'T", 'BURST', 'INTO', 'A', 'TEMPEST', 'AT', 'THAT'] +533-131564-0011-1635: hyp=['NOW', "DON'T", 'BURST', 'INTO', 'A', 'TEMPEST', 'AT', 'THAT'] +533-131564-0012-1636: ref=['BUT', 'HANG', 'IT', "THAT'S", 'NOT', 'MY', 'FAULT'] +533-131564-0012-1636: hyp=['BUT', 'HANG', 'IT', "THAT'S", 'NOT', 'MY', 'FAULT'] +533-131564-0013-1637: ref=['NOT', 'YEARS', 'FOR', "SHE'S", 'ONLY', 'FIVE', 'AND', 'TWENTY'] +533-131564-0013-1637: hyp=['NOT', 'YEARS', 'FOR', 'SHE', 'IS', 'ONLY', 'FIVE', 'AND', 'TWENTY'] +533-131564-0014-1638: ref=['WHAT', 'WOULD', 'YOU', 'MAKE', 'OF', 'ME', 'AND', 'THE', 'CHILDREN', 'TO', 'BE', 'SURE', 'THAT', 'WORRY', 'HER', 'TO', 'DEATH', 'BETWEEN', 'THEM'] +533-131564-0014-1638: hyp=['WHAT', 'DID', 'YOU', 'MAKE', 'OF', 'ME', 'AND', 'THE', 'CHILDREN', 'TO', 'BE', 'SURE', 'THAT', 'WERE', 'A', 'HURT', 'DEATH', 'BETWEEN', 'THEM'] +533-131564-0015-1639: ref=['I', 'KNOW', 'THEY', 'ARE', 'BLESS', 'THEM'] +533-131564-0015-1639: hyp=['I', 'KNOW', 'THEY', 'ARE', 'BLESS', 'THEM'] +533-131564-0016-1640: ref=['HE', 'FOLLOWED', 'ME', 'INTO', 'THE', 'LIBRARY'] +533-131564-0016-1640: hyp=['HE', 'FOLLOWED', 'ME', 'INTO', 'THE', 'LIBRARY'] +533-131564-0017-1641: ref=['I', 'SOUGHT', 'OUT', 'AND', 'PUT', 'INTO', 'HIS', 'HANDS', 'TWO', 'OF', "MILICENT'S", 'LETTERS', 'ONE', 'DATED', 'FROM', 'LONDON', 'AND', 'WRITTEN', 'DURING', 'ONE', 'OF', 'HIS', 'WILDEST', 'SEASONS', 'OF', 'RECKLESS', 'DISSIPATION', 'THE', 'OTHER', 'IN', 'THE', 'COUNTRY', 'DURING', 'A', 'LUCID', 'INTERVAL'] +533-131564-0017-1641: hyp=['I', 'SOUGHT', 'HOUGHTON', 'PUT', 'INTO', 'HIS', 'HANDS', 'TWO', 'OF', "MILICON'S", 'LETTERS', 'ONE', 'THEY', 'DID', 'FROM', 'LONDON', 'AND', 'WRITTEN', 'DURING', 'ONE', 'OF', 'HIS', 'WILDEST', 'SEASONS', 'OF', 'RECKLESS', 'DISSIPATION', 'THE', 'OTHER', 'IN', 'THE', 'COUNTRY', 'DURING', 'A', 'LUCID', 'INTERVAL'] +533-131564-0018-1642: ref=['THE', 'FORMER', 'WAS', 'FULL', 'OF', 'TROUBLE', 'AND', 'ANGUISH', 'NOT', 'ACCUSING', 'HIM', 'BUT', 'DEEPLY', 'REGRETTING', 'HIS', 'CONNECTION', 'WITH', 'HIS', 'PROFLIGATE', 'COMPANIONS', 'ABUSING', 'MISTER', 'GRIMSBY', 'AND', 'OTHERS', 'INSINUATING', 'BITTER', 'THINGS', 'AGAINST', 'MISTER', 'HUNTINGDON', 'AND', 'MOST', 'INGENIOUSLY', 'THROWING', 'THE', 'BLAME', 'OF', 'HER', "HUSBAND'S", 'MISCONDUCT', 'ON', 'TO', 'OTHER', "MEN'S", 'SHOULDERS'] +533-131564-0018-1642: hyp=['THE', 'FORMER', 'WAS', 'FULL', 'OF', 'TROUBLE', 'AND', 'ANGUISH', 'NOT', 'ACCUSING', 'HIM', 'BUT', 'DEEPLY', 'REGRETTING', 'HIS', 'CONNECTION', 'WITH', 'HIS', 'PROFLIGATE', 'COMPANIONS', 'ABUSING', 'MISTER', 'GRIMSBY', 'AND', 'OTHERS', 'INSINUATING', 'BITTER', 'THINGS', 'AGAINST', 'MISTER', 'HUNTINGDON', 'AND', 'MOST', 'INGENUOUSLY', 'THROWING', 'THE', 'BLAME', 'OF', 'HER', "HUSBAND'S", 'MISCONDUCT', 'ON', 'THE', 'OTHER', "MAN'S", 'SHOULDERS'] +533-131564-0019-1643: ref=["I'VE", 'BEEN', 'A', 'CURSED', 'RASCAL', 'GOD', 'KNOWS', 'SAID', 'HE', 'AS', 'HE', 'GAVE', 'IT', 'A', 'HEARTY', 'SQUEEZE', 'BUT', 'YOU', 'SEE', 'IF', 'I', "DON'T", 'MAKE', 'AMENDS', 'FOR', 'IT', 'D', 'N', 'ME', 'IF', 'I', "DON'T"] +533-131564-0019-1643: hyp=["I'VE", 'BEEN', 'A', 'CURSED', 'RASCAL', 'GOD', 'KNOWS', 'SAID', 'HE', 'AS', 'HE', 'GAVE', 'IT', 'A', 'EARTHY', 'SQUEEZE', 'BUT', 'YOU', 'SEE', 'IF', 'I', "DON'T", 'MAKE', 'AMENDS', 'FOR', 'IT', 'DAMN', 'ME', 'IF', 'I', "DON'T"] +533-131564-0020-1644: ref=['IF', 'YOU', 'INTEND', 'TO', 'REFORM', 'INVOKE', "GOD'S", 'BLESSING', 'HIS', 'MERCY', 'AND', 'HIS', 'AID', 'NOT', 'HIS', 'CURSE'] +533-131564-0020-1644: hyp=['IF', 'YOU', 'INTEND', 'TO', 'REFORM', 'INVOKE', "GOD'S", 'BLESSING', 'IS', 'MERCY', 'IN', 'THIS', 'APE', 'NOT', 'DISCURSE'] +533-131564-0021-1645: ref=['GOD', 'HELP', 'ME', 'THEN', 'FOR', "I'M", 'SURE', 'I', 'NEED', 'IT'] +533-131564-0021-1645: hyp=['GOD', 'HELP', 'ME', 'THEN', 'FOR', "I'M", 'SURE', 'I', 'NEED', 'IT'] +533-131564-0022-1646: ref=["WHERE'S", 'MILICENT'] +533-131564-0022-1646: hyp=["WHERE'S", 'MILICENT'] +533-131564-0023-1647: ref=['NAY', 'NOT', 'I', 'SAID', 'HE', 'TURNING', 'HER', 'ROUND', 'AND', 'PUSHING', 'HER', 'TOWARDS', 'ME'] +533-131564-0023-1647: hyp=['NAY', 'NOT', 'I', 'SAID', 'HE', 'TURNING', 'ROUND', 'AND', 'PUSHING', 'IT', 'TOWARDS', 'ME'] +533-131564-0024-1648: ref=['MILICENT', 'FLEW', 'TO', 'THANK', 'ME', 'OVERFLOWING', 'WITH', 'GRATITUDE'] +533-131564-0024-1648: hyp=['MILICENT', 'FLEW', 'TO', 'THANK', 'ME', 'OVERWHELMING', 'HIS', 'GRATITUDE'] +533-131564-0025-1649: ref=['CRIED', 'SHE', 'I', "COULDN'T", 'HAVE', 'INFLUENCED', 'HIM', "I'M", 'SURE', 'BY', 'ANYTHING', 'THAT', 'I', 'COULD', 'HAVE', 'SAID'] +533-131564-0025-1649: hyp=['CRIED', 'SHE', 'I', "COULDN'T", 'HAVE', 'INFLUENCED', 'HIM', 'I', 'AM', 'SURE', 'BY', 'ANYTHING', 'THAT', 'I', 'COULD', 'HAVE', 'SAID'] +533-131564-0026-1650: ref=['YOU', 'NEVER', 'TRIED', 'ME', 'MILLY', 'SAID', 'HE'] +533-131564-0026-1650: hyp=['YOU', 'NEVER', 'TRIVE', 'ME', 'MERELY', 'SAID', 'HE'] +533-131564-0027-1651: ref=['AFTER', 'THAT', 'THEY', 'WILL', 'REPAIR', 'TO', 'THEIR', 'COUNTRY', 'HOME'] +533-131564-0027-1651: hyp=['AFTER', 'THAT', 'THEY', 'WILL', 'REPAIR', 'TO', 'THEIR', 'COUNTRY', 'HOME'] +5442-32873-0000-1652: ref=['CAPTAIN', 'LAKE', 'DID', 'NOT', 'LOOK', 'AT', 'ALL', 'LIKE', 'A', 'LONDON', 'DANDY', 'NOW'] +5442-32873-0000-1652: hyp=['CAPTAIN', 'LAKE', 'DID', 'NOT', 'LOOK', 'AT', 'ALL', 'LIKE', 'A', 'LONDON', 'DANDY', 'NOW'] +5442-32873-0001-1653: ref=['THERE', 'WAS', 'A', 'VERY', 'NATURAL', 'SAVAGERY', 'AND', 'DEJECTION', 'THERE', 'AND', 'A', 'WILD', 'LEER', 'IN', 'HIS', 'YELLOW', 'EYES', 'RACHEL', 'SAT', 'DOWN'] +5442-32873-0001-1653: hyp=['THERE', 'WAS', 'A', 'VERY', 'NATURAL', 'SAVAGERY', 'AND', 'DEJECTION', 'THERE', 'AND', 'A', 'WILD', 'LEER', 'IN', 'HIS', 'YELLOW', 'EYES', 'RACHEL', 'SAT', 'DOWN'] +5442-32873-0002-1654: ref=['A', 'SLAVE', 'ONLY', 'THINK', 'A', 'SLAVE'] +5442-32873-0002-1654: hyp=['A', 'SLAVE', 'ONLY', 'THINK', 'A', 'SLAVE'] +5442-32873-0003-1655: ref=['OH', 'FRIGHTFUL', 'FRIGHTFUL', 'IS', 'IT', 'A', 'DREAM'] +5442-32873-0003-1655: hyp=['OH', 'FRIGHTFUL', 'FRIGHTFUL', 'IS', 'IT', 'A', 'DREAM'] +5442-32873-0004-1656: ref=['OH', 'FRIGHTFUL', 'FRIGHTFUL'] +5442-32873-0004-1656: hyp=['OH', 'FRIGHTFUL', 'FRIGHTFUL'] +5442-32873-0005-1657: ref=['STANLEY', 'STANLEY', 'IT', 'WOULD', 'BE', 'MERCY', 'TO', 'KILL', 'ME', 'SHE', 'BROKE', 'OUT', 'AGAIN'] +5442-32873-0005-1657: hyp=['STANLEY', 'STANLEY', 'IT', 'WOULD', 'BE', 'MERCY', 'TO', 'KILL', 'ME', 'SHE', 'BROKE', 'OUT', 'AGAIN'] +5442-32873-0006-1658: ref=['BRIGHT', 'AND', 'NATTY', 'WERE', 'THE', 'CHINTZ', 'CURTAINS', 'AND', 'THE', 'LITTLE', 'TOILET', 'SET', 'OUT', 'NOT', 'INELEGANTLY', 'AND', 'HER', 'PET', 'PIPING', 'GOLDFINCH', 'ASLEEP', 'ON', 'HIS', 'PERCH', 'WITH', 'HIS', 'BIT', 'OF', 'SUGAR', 'BETWEEN', 'THE', 'WIRES', 'OF', 'HIS', 'CAGE', 'HER', 'PILLOW', 'SO', 'WHITE', 'AND', 'UNPRESSED', 'WITH', 'ITS', 'LITTLE', 'EDGING', 'OF', 'LACE'] +5442-32873-0006-1658: hyp=['BRIGHT', 'AND', 'NATTY', 'WERE', 'THE', 'CHINTZ', 'CURTAINS', 'AND', 'THE', 'LITTLE', 'TOILET', 'SET', 'OUT', 'NOT', 'INELEGANTLY', 'AND', 'HER', 'PET', 'PIPING', 'GOLDFINCH', 'ASLEEP', 'ON', 'HIS', 'PERCH', 'WITH', 'HIS', 'BIT', 'OF', 'SUGAR', 'BETWEEN', 'THE', 'WINDS', 'OF', 'HIS', 'CAGE', 'HER', 'PILLOW', 'SO', 'WHITE', 'AND', 'UNPRESSED', 'WITH', 'ITS', 'LITTLE', 'EDGING', 'OF', 'LACE'] +5442-32873-0007-1659: ref=['WHEN', 'HE', 'CAME', 'BACK', 'TO', 'THE', 'DRAWING', 'ROOM', 'A', 'TOILET', 'BOTTLE', 'OF', 'EAU', 'DE', 'COLOGNE', 'IN', 'HIS', 'HAND', 'WITH', 'HER', 'LACE', 'HANDKERCHIEF', 'HE', 'BATHED', 'HER', 'TEMPLES', 'AND', 'FOREHEAD'] +5442-32873-0007-1659: hyp=['WHEN', 'HE', 'CAME', 'BACK', 'TO', 'THE', 'DRAWING', 'ROOM', 'A', 'TOURID', 'BOTTLE', 'OFAU', 'THE', 'COLOGNE', 'IN', 'HIS', 'HAND', 'WITH', 'HER', 'LACE', 'HANDKERCHIEF', 'HE', 'BATHED', 'HER', 'TEMPLES', 'AND', 'FOREHEAD'] +5442-32873-0008-1660: ref=['THERE', 'WAS', 'NOTHING', 'VERY', 'BROTHERLY', 'IN', 'HIS', 'LOOK', 'AS', 'HE', 'PEERED', 'INTO', 'HER', 'PALE', 'SHARP', 'FEATURES', 'DURING', 'THE', 'PROCESS'] +5442-32873-0008-1660: hyp=['THERE', 'WAS', 'NOTHING', 'VERY', 'BROTHERLY', 'IN', 'HIS', 'LOOK', 'AS', 'HE', 'PEERED', 'INTO', 'HER', 'PALE', 'SHARP', 'FEATURES', 'DURING', 'THE', 'PROCESS'] +5442-32873-0009-1661: ref=['THERE', "DON'T", 'MIND', 'ME', 'SHE', 'SAID', 'SHARPLY', 'AND', 'GETTING', 'UP', 'SHE', 'LOOKED', 'DOWN', 'AT', 'HER', 'DRESS', 'AND', 'THIN', 'SHOES', 'AND', 'SEEMING', 'TO', 'RECOLLECT', 'HERSELF', 'SHE', 'TOOK', 'THE', 'CANDLE', 'HE', 'HAD', 'JUST', 'SET', 'DOWN', 'AND', 'WENT', 'SWIFTLY', 'TO', 'HER', 'ROOM'] +5442-32873-0009-1661: hyp=['THERE', "DON'T", 'MIND', 'ME', 'SHE', 'SAID', 'SHARPLY', 'AND', 'GETTING', 'UP', 'SHE', 'LOOKED', 'DOWN', 'AT', 'HER', 'DRESS', 'AND', 'THIN', 'SHOES', 'AND', 'SEEMING', 'TO', 'RECOLLECT', 'HERSELF', 'SHE', 'TOOK', 'THE', 'CANDLE', 'HE', 'HAD', 'JUST', 'SET', 'DOWN', 'AND', 'WENT', 'SWIFTLY', 'TO', 'HER', 'ROOM'] +5442-32873-0010-1662: ref=['AND', 'SHE', 'THREW', 'BACK', 'HER', 'VEIL', 'AND', 'GOING', 'HURRIEDLY', 'TO', 'THE', 'TOILET', 'MECHANICALLY', 'SURVEYED', 'HERSELF', 'IN', 'THE', 'GLASS'] +5442-32873-0010-1662: hyp=['AND', 'SHE', 'THREW', 'BACK', 'HER', 'VEIL', 'AND', 'GOING', 'HURRIEDLY', 'TO', 'THE', 'TOILET', 'MECHANICALLY', 'SURVEYED', 'HERSELF', 'IN', 'THE', 'GLASS'] +5442-32873-0011-1663: ref=['RACHEL', 'LAKE', 'RACHEL', 'LAKE', 'WHAT', 'ARE', 'YOU', 'NOW'] +5442-32873-0011-1663: hyp=['RACHEL', 'MEEK', 'LACH', 'TO', 'LEEK', 'WHAT', 'ARE', 'YOU', 'NOW'] +5442-32873-0012-1664: ref=["I'LL", 'STAY', 'HERE', 'THAT', 'IS', 'IN', 'THE', 'DRAWING', 'ROOM', 'SHE', 'ANSWERED', 'AND', 'THE', 'FACE', 'WAS', 'WITHDRAWN'] +5442-32873-0012-1664: hyp=["I'LL", 'STAY', 'HERE', 'THAT', 'IS', 'IN', 'THE', 'DRAWING', 'ROOM', 'SHE', 'ANSWERED', 'AND', 'THE', 'FACE', 'WAS', 'WITHDRAWN'] +5442-32873-0013-1665: ref=['HE', 'SLACKENED', 'HIS', 'PACE', 'AND', 'TAPPED', 'SHARPLY', 'AT', 'THE', 'LITTLE', 'WINDOW', 'OF', 'THAT', 'MODEST', 'POST', 'OFFICE', 'AT', 'WHICH', 'THE', 'YOUNG', 'LADIES', 'IN', 'THE', 'PONY', 'CARRIAGE', 'HAD', 'PULLED', 'UP', 'THE', 'DAY', 'BEFORE', 'AND', 'WITHIN', 'WHICH', 'LUKE', 'WAGGOT', 'WAS', 'WONT', 'TO', 'SLEEP', 'IN', 'A', 'SORT', 'OF', 'WOODEN', 'BOX', 'THAT', 'FOLDED', 'UP', 'AND', 'APPEARED', 'TO', 'BE', 'A', 'CHEST', 'OF', 'DRAWERS', 'ALL', 'DAY'] +5442-32873-0013-1665: hyp=['HE', 'SLACKENED', 'HIS', 'PACE', 'AND', 'TAPPED', 'SHARPLY', 'AT', 'THE', 'LITTLE', 'WINDOW', 'OF', 'THAT', 'MODEST', 'POST', 'OFFICE', 'AT', 'WHICH', 'THE', 'YOUNG', 'LADIES', 'IN', 'THE', 'PONY', 'CARRIAGE', 'HAD', 'PULLED', 'UP', 'THE', 'DAY', 'BEFORE', 'AND', 'WITHIN', 'WHICH', 'LUKE', 'WAGGETT', 'WAS', 'WONT', 'TO', 'SLEEP', 'IN', 'A', 'SORT', 'OF', 'WOODEN', 'BOX', 'THAT', 'FOLDED', 'UP', 'WHAT', 'APPEARED', 'TO', 'BE', 'A', 'CHEST', 'OF', 'DRAWERS', 'ALL', 'DAY'] +5442-32873-0014-1666: ref=['LUKE', 'TOOK', 'CARE', 'OF', 'MISTER', "LARKIN'S", 'DOGS', 'AND', 'GROOMED', 'MISTER', "WYLDER'S", 'HORSE', 'AND', 'CLEANED', 'UP', 'HIS', 'DOG', 'CART', 'FOR', 'MARK', 'BEING', 'CLOSE', 'ABOUT', 'MONEY', 'AND', 'FINDING', 'THAT', 'THE', 'THING', 'WAS', 'TO', 'BE', 'DONE', 'MORE', 'CHEAPLY', 'THAT', 'WAY', 'PUT', 'UP', 'HIS', 'HORSE', 'AND', 'DOG', 'CART', 'IN', 'THE', 'POST', 'OFFICE', 'PREMISES', 'AND', 'SO', 'EVADED', 'THE', 'LIVERY', 'CHARGES', 'OF', 'THE', 'BRANDON', 'ARMS'] +5442-32873-0014-1666: hyp=['LUKE', 'TOOK', 'CARE', 'OF', 'MISTER', "LARKIN'S", 'DOGS', 'AND', 'GROOMED', 'MISTER', "WYLDER'S", 'HORSE', 'AND', 'CLEANED', 'UP', 'HIS', 'DOG', 'CART', 'FOR', 'MARK', 'BEING', 'CLOSE', 'ABOUT', 'MONEY', 'AND', 'FINDING', 'THAT', 'THE', 'THING', 'WAS', 'TO', 'BE', 'DONE', 'MORE', 'CHEAPLY', 'THAT', 'WAY', 'PUT', 'UP', 'HIS', 'HORSE', 'AND', 'DOG', 'CART', 'IN', 'THE', 'POST', 'OFFICE', 'PREMISES', 'AND', 'SO', 'EVADED', 'THE', 'LIVERY', 'CHARGES', 'OF', 'THE', 'BRANDON', 'ARMS'] +5442-32873-0015-1667: ref=['BUT', 'LUKE', 'WAS', 'NOT', 'THERE', 'AND', 'CAPTAIN', 'LAKE', 'RECOLLECTING', 'HIS', 'HABITS', 'AND', 'HIS', 'HAUNT', 'HURRIED', 'ON', 'TO', 'THE', 'SILVER', 'LION', 'WHICH', 'HAS', 'ITS', 'GABLE', 'TOWARDS', 'THE', 'COMMON', 'ONLY', 'ABOUT', 'A', 'HUNDRED', 'STEPS', 'AWAY', 'FOR', 'DISTANCES', 'ARE', 'NOT', 'GREAT', 'IN', 'GYLINGDEN'] +5442-32873-0015-1667: hyp=['BUT', 'LUKE', 'WAS', 'NOT', 'THERE', 'AND', 'CAPTAIN', 'LAKE', 'RECOLLECTING', 'HIS', 'HABITS', 'AND', 'HIS', 'HAUNT', 'HURRIED', 'ON', 'TO', 'THE', 'SILVER', 'LION', 'WHICH', 'HAS', 'ITS', 'CABLE', 'TOWARDS', 'THE', 'COMMON', 'ONLY', 'ABOUT', 'A', 'HUNDRED', 'STEPS', 'AWAY', 'FOR', 'DISTANCES', 'ARE', 'NOT', 'GREAT', 'IN', 'GYLINGDEN'] +5442-32873-0016-1668: ref=['HERE', 'WERE', 'THE', 'FLOW', 'OF', 'SOUL', 'AND', 'OF', 'STOUT', 'LONG', 'PIPES', 'LONG', 'YARNS', 'AND', 'TOLERABLY', 'LONG', 'CREDITS', 'AND', 'THE', 'HUMBLE', 'SCAPEGRACES', 'OF', 'THE', 'TOWN', 'RESORTED', 'THITHER', 'FOR', 'THE', 'PLEASURES', 'OF', 'A', 'CLUB', 'LIFE', 'AND', 'OFTEN', 'REVELLED', 'DEEP', 'INTO', 'THE', 'SMALL', 'HOURS', 'OF', 'THE', 'MORNING'] +5442-32873-0016-1668: hyp=['HERE', 'WERE', 'THE', 'FLOW', 'OF', 'SOLE', 'AND', 'OF', 'STOUT', 'LONG', 'PIPES', 'LONG', 'YARNS', 'AND', 'TOLERABLY', 'LONG', 'CREDITS', 'AND', 'THE', 'HUMBLE', 'SCAPEGRACES', 'OF', 'THE', 'TOWN', 'RESORTED', 'THITHER', 'FOR', 'THE', 'PLEASURES', 'OF', 'A', 'CLUB', 'LIFE', 'AND', 'OFTEN', 'REVELLED', 'DEEP', 'INTO', 'THE', 'SMALL', 'HOURS', 'OF', 'THE', 'MORNING'] +5442-32873-0017-1669: ref=['LOSE', 'NO', 'TIME', 'AND', "I'LL", 'GIVE', 'YOU', 'HALF', 'A', 'CROWN'] +5442-32873-0017-1669: hyp=['LOSE', 'NO', 'TIME', 'AND', "I'LL", 'GIVE', 'YOU', 'HALF', 'A', 'CROWN'] +5442-32873-0018-1670: ref=['LUKE', 'STUCK', 'ON', 'HIS', 'GREASY', 'WIDEAWAKE', 'AND', 'IN', 'A', 'FEW', 'MINUTES', 'MORE', 'THE', 'DOG', 'CART', 'WAS', 'TRUNDLED', 'OUT', 'INTO', 'THE', 'LANE', 'AND', 'THE', 'HORSE', 'HARNESSED', 'WENT', 'BETWEEN', 'THE', 'SHAFTS', 'WITH', 'THAT', 'WONDERFUL', 'CHEERFULNESS', 'WITH', 'WHICH', 'THEY', 'BEAR', 'TO', 'BE', 'CALLED', 'UP', 'UNDER', 'STARTLING', 'CIRCUMSTANCES', 'AT', 'UNSEASONABLE', 'HOURS'] +5442-32873-0018-1670: hyp=['LUKE', 'STUCK', 'HOME', 'HIS', 'GREASY', 'WIDE', 'AWAKE', 'AND', 'IN', 'A', 'FEW', 'MINUTES', 'MORE', 'THE', 'DOG', 'CART', 'WAS', 'TRUNDLED', 'OUT', 'INTO', 'THE', 'LANE', 'AND', 'THE', 'HORSE', 'HARNESSED', 'WENT', 'BETWEEN', 'THE', 'SHAFTS', 'WITH', 'THAT', 'WONDERFUL', 'CHEERFULNESS', 'WITH', 'WHICH', 'THEY', 'BEAR', 'TO', 'BE', 'CALLED', 'UP', 'UNDER', 'STARTLING', 'CIRCUMSTANCES', 'AT', 'UNSEASONABLE', 'HOURS'] +5442-32873-0019-1671: ref=['IF', 'I', 'THOUGHT', "YOU'D", 'FAIL', 'ME', 'NOW', 'TAMAR', 'I', 'SHOULD', 'NEVER', 'COME', 'BACK', 'GOOD', 'NIGHT', 'TAMAR'] +5442-32873-0019-1671: hyp=['IF', 'I', 'THOUGHT', "YOU'D", 'FAIL', 'ME', 'NOW', 'TO', 'MORROW', 'I', 'SHOULD', 'NEVER', 'COME', 'BACK', 'GOOD', 'NIGHT', 'TAMAR'] +5442-41168-0000-1672: ref=['THE', 'ACT', 'SAID', 'THAT', 'IN', 'CASE', 'OF', 'DIFFERENCE', 'OF', 'OPINION', 'THERE', 'MUST', 'BE', 'A', 'BALLOT'] +5442-41168-0000-1672: hyp=['THE', 'ACT', 'SAID', 'THAT', 'IN', 'CASE', 'OF', 'DIFFERENCE', 'OF', 'OPINION', 'THERE', 'MUST', 'BE', 'A', 'BALLOT'] +5442-41168-0001-1673: ref=['HE', 'WENT', 'UP', 'TO', 'THE', 'TABLE', 'AND', 'STRIKING', 'IT', 'WITH', 'HIS', 'FINGER', 'RING', 'HE', 'SHOUTED', 'LOUDLY', 'A', 'BALLOT'] +5442-41168-0001-1673: hyp=['HE', 'WENT', 'UP', 'TO', 'THE', 'TABLE', 'AND', 'STRIKING', 'IT', 'WITH', 'HIS', 'FINGER', 'RING', 'HE', 'SHOUTED', 'LOUDLY', 'A', 'BALLOT'] +5442-41168-0002-1674: ref=['HE', 'WAS', 'SHOUTING', 'FOR', 'THE', 'VERY', 'COURSE', 'SERGEY', 'IVANOVITCH', 'HAD', 'PROPOSED', 'BUT', 'IT', 'WAS', 'EVIDENT', 'THAT', 'HE', 'HATED', 'HIM', 'AND', 'ALL', 'HIS', 'PARTY', 'AND', 'THIS', 'FEELING', 'OF', 'HATRED', 'SPREAD', 'THROUGH', 'THE', 'WHOLE', 'PARTY', 'AND', 'ROUSED', 'IN', 'OPPOSITION', 'TO', 'IT', 'THE', 'SAME', 'VINDICTIVENESS', 'THOUGH', 'IN', 'A', 'MORE', 'SEEMLY', 'FORM', 'ON', 'THE', 'OTHER', 'SIDE'] +5442-41168-0002-1674: hyp=['HE', 'WAS', 'SHOUTING', 'FOR', 'THE', 'VERY', 'COURSE', 'SERGEY', 'IVANOVITCH', 'HAD', 'PROPOSED', 'BUT', 'IT', 'WAS', 'EVIDENT', 'THAT', 'HE', 'HATED', 'HIM', 'AND', 'ALL', 'HIS', 'PARTY', 'AND', 'THIS', 'FEELING', 'OF', 'HATRED', 'SPREAD', 'THROUGH', 'THE', 'WHOLE', 'PARTY', 'AND', 'ROUSED', 'IN', 'OPPOSITION', 'TO', 'IT', 'THE', 'SAME', 'VINDICTIVENESS', 'THOUGH', 'IN', 'A', 'MORE', 'SEEMLY', 'FORM', 'ON', 'THE', 'OTHER', 'SIDE'] +5442-41168-0003-1675: ref=['SHOUTS', 'WERE', 'RAISED', 'AND', 'FOR', 'A', 'MOMENT', 'ALL', 'WAS', 'CONFUSION', 'SO', 'THAT', 'THE', 'MARSHAL', 'OF', 'THE', 'PROVINCE', 'HAD', 'TO', 'CALL', 'FOR', 'ORDER', 'A', 'BALLOT'] +5442-41168-0003-1675: hyp=['SHOUTS', 'WERE', 'RAISED', 'AND', 'FOR', 'A', 'MOMENT', 'ALL', 'WAS', 'CONFUSION', 'SO', 'THAT', 'THE', 'MARSHAL', 'OF', 'THE', 'PROVINCE', 'HAD', 'TO', 'CALL', 'FOR', 'ORDER', 'A', 'BALLOT'] +5442-41168-0004-1676: ref=['WE', 'SHED', 'OUR', 'BLOOD', 'FOR', 'OUR', 'COUNTRY'] +5442-41168-0004-1676: hyp=['WE', 'SHED', 'OUR', 'BLOOD', 'FOR', 'OUR', 'COUNTRY'] +5442-41168-0005-1677: ref=['THE', 'CONFIDENCE', 'OF', 'THE', 'MONARCH', 'NO', 'CHECKING', 'THE', 'ACCOUNTS', 'OF', 'THE', 'MARSHAL', "HE'S", 'NOT', 'A', 'CASHIER', 'BUT', "THAT'S", 'NOT', 'THE', 'POINT'] +5442-41168-0005-1677: hyp=['THE', 'CONFIDENCE', 'OF', 'THE', 'MONARCH', 'NO', 'CHECKING', 'THE', 'ACCOUNTS', 'OF', 'THE', 'MARTIAN', 'HE', 'IS', 'NOT', 'A', 'CASHIER', 'BUT', "THAT'S", 'NOT', 'THE', 'POINT'] +5442-41168-0006-1678: ref=['VOTES', 'PLEASE', 'BEASTLY'] +5442-41168-0006-1678: hyp=['VOTES', 'PLEASE', 'PEASLEY'] +5442-41168-0007-1679: ref=['THEY', 'EXPRESSED', 'THE', 'MOST', 'IMPLACABLE', 'HATRED'] +5442-41168-0007-1679: hyp=['THEY', 'EXPRESSED', 'THE', 'MOST', 'IMPLACABLE', 'HATRED'] +5442-41168-0008-1680: ref=['LEVIN', 'DID', 'NOT', 'IN', 'THE', 'LEAST', 'UNDERSTAND', 'WHAT', 'WAS', 'THE', 'MATTER', 'AND', 'HE', 'MARVELED', 'AT', 'THE', 'PASSION', 'WITH', 'WHICH', 'IT', 'WAS', 'DISPUTED', 'WHETHER', 'OR', 'NOT', 'THE', 'DECISION', 'ABOUT', 'FLEROV', 'SHOULD', 'BE', 'PUT', 'TO', 'THE', 'VOTE'] +5442-41168-0008-1680: hyp=['LEVIN', 'DID', 'NOT', 'IN', 'THE', 'LEAST', 'UNDERSTAND', 'WHAT', 'WAS', 'THE', 'MATTER', 'AND', 'HE', 'MARVELLED', 'AT', 'THE', 'PASSION', 'WITH', 'WHICH', 'IT', 'WAS', 'DISPUTED', 'WHETHER', 'OR', 'NOT', 'THE', 'DECISION', 'ABOUT', 'FLAIROFF', 'SHOULD', 'BE', 'PUT', 'TO', 'THE', 'VOTE'] +5442-41168-0009-1681: ref=['HE', 'FORGOT', 'AS', 'SERGEY', 'IVANOVITCH', 'EXPLAINED', 'TO', 'HIM', 'AFTERWARDS', 'THIS', 'SYLLOGISM', 'THAT', 'IT', 'WAS', 'NECESSARY', 'FOR', 'THE', 'PUBLIC', 'GOOD', 'TO', 'GET', 'RID', 'OF', 'THE', 'MARSHAL', 'OF', 'THE', 'PROVINCE', 'THAT', 'TO', 'GET', 'RID', 'OF', 'THE', 'MARSHAL', 'IT', 'WAS', 'NECESSARY', 'TO', 'HAVE', 'A', 'MAJORITY', 'OF', 'VOTES', 'THAT', 'TO', 'GET', 'A', 'MAJORITY', 'OF', 'VOTES', 'IT', 'WAS', 'NECESSARY', 'TO', 'SECURE', "FLEROV'S", 'RIGHT', 'TO', 'VOTE', 'THAT', 'TO', 'SECURE', 'THE', 'RECOGNITION', 'OF', "FLEROV'S", 'RIGHT', 'TO', 'VOTE', 'THEY', 'MUST', 'DECIDE', 'ON', 'THE', 'INTERPRETATION', 'TO', 'BE', 'PUT', 'ON', 'THE', 'ACT'] +5442-41168-0009-1681: hyp=['HE', 'FORGOT', 'AS', 'SERGEY', 'IVANOVITCH', 'EXPLAINED', 'TO', 'HIM', 'AFTERWARDS', 'THIS', 'SYLLOGISM', 'THAT', 'IT', 'WAS', 'NECESSARY', 'FOR', 'THE', 'PUBLIC', 'GOOD', 'TO', 'GET', 'RID', 'OF', 'THE', 'MARSHAL', 'OF', 'THE', 'PROVINCE', 'THAT', 'TO', 'GET', 'IT', 'TO', 'THE', 'MARSHAL', 'IT', 'WAS', 'NECESSARY', 'TO', 'HAVE', 'A', 'MAJORITY', 'OF', 'VOTES', 'THAT', 'TO', 'GET', 'A', 'MAJORITY', 'OF', 'VOTES', 'IT', 'WAS', 'NECESSARY', 'TO', 'SECURE', "FLIROV'S", 'RIGHT', 'TO', 'VOTE', 'THAT', 'TO', 'SECURE', 'THE', 'RECOGNITION', 'OF', "FLAEROFF'S", 'RIGHT', 'TO', 'VOTE', 'THEY', 'MUST', 'DECIDE', 'ON', 'THE', 'INTERPRETATION', 'TO', 'BE', 'PUT', 'ON', 'THE', 'ACT'] +5442-41168-0010-1682: ref=['BUT', 'LEVIN', 'FORGOT', 'ALL', 'THAT', 'AND', 'IT', 'WAS', 'PAINFUL', 'TO', 'HIM', 'TO', 'SEE', 'ALL', 'THESE', 'EXCELLENT', 'PERSONS', 'FOR', 'WHOM', 'HE', 'HAD', 'A', 'RESPECT', 'IN', 'SUCH', 'AN', 'UNPLEASANT', 'AND', 'VICIOUS', 'STATE', 'OF', 'EXCITEMENT'] +5442-41168-0010-1682: hyp=['BUT', 'LEVIN', 'FORGOT', 'ALL', 'THAT', 'AND', 'IT', 'WAS', 'PAINFUL', 'TO', 'HIM', 'TO', 'SEE', 'ALL', 'THESE', 'EXCELLENT', 'PERSONS', 'FOR', 'WHOM', 'HE', 'HAD', 'A', 'RESPECT', 'IN', 'SUCH', 'AN', 'UNPLEASANT', 'AND', 'VICIOUS', 'STATE', 'OF', 'EXCITEMENT'] +5442-41168-0011-1683: ref=['TO', 'ESCAPE', 'FROM', 'THIS', 'PAINFUL', 'FEELING', 'HE', 'WENT', 'AWAY', 'INTO', 'THE', 'OTHER', 'ROOM', 'WHERE', 'THERE', 'WAS', 'NOBODY', 'EXCEPT', 'THE', 'WAITERS', 'AT', 'THE', 'REFRESHMENT', 'BAR'] +5442-41168-0011-1683: hyp=['TO', 'ESCAPE', 'FROM', 'THIS', 'PAINFUL', 'FEELING', 'HE', 'WENT', 'AWAY', 'INTO', 'THE', 'OTHER', 'ROOM', 'WHERE', 'THERE', 'WAS', 'NOBODY', 'EXCEPT', 'THE', 'WAITERS', 'AT', 'THE', 'REFRESHMENT', 'BAR'] +5442-41168-0012-1684: ref=['HE', 'PARTICULARLY', 'LIKED', 'THE', 'WAY', 'ONE', 'GRAY', 'WHISKERED', 'WAITER', 'WHO', 'SHOWED', 'HIS', 'SCORN', 'FOR', 'THE', 'OTHER', 'YOUNGER', 'ONES', 'AND', 'WAS', 'JEERED', 'AT', 'BY', 'THEM', 'WAS', 'TEACHING', 'THEM', 'HOW', 'TO', 'FOLD', 'UP', 'NAPKINS', 'PROPERLY'] +5442-41168-0012-1684: hyp=['HE', 'PARTICULARLY', 'LIKED', 'THE', 'WAY', 'ONE', 'GRAY', 'WHISKERED', 'WAITER', 'WHO', 'SHOWED', 'A', 'SCORN', 'FOR', 'THE', 'OTHER', 'YOUNGER', 'ONES', 'AND', 'WAS', 'JEERED', 'AT', 'BY', 'THEM', 'WAS', 'TEACHING', 'THEM', 'HOW', 'TO', 'FOLD', 'UP', 'NAPKINS', 'PROPERLY'] +5442-41168-0013-1685: ref=['LEVIN', 'ADVANCED', 'BUT', 'UTTERLY', 'FORGETTING', 'WHAT', 'HE', 'WAS', 'TO', 'DO', 'AND', 'MUCH', 'EMBARRASSED', 'HE', 'TURNED', 'TO', 'SERGEY', 'IVANOVITCH', 'WITH', 'THE', 'QUESTION', 'WHERE', 'AM', 'I', 'TO', 'PUT', 'IT'] +5442-41168-0013-1685: hyp=['LEVIN', 'ADVANCED', 'BUT', 'UTTERLY', 'FORGETTING', 'WHAT', 'HE', 'WAS', 'TO', 'DO', 'AND', 'MUCH', 'EMBARRASSED', 'HE', 'TURNED', 'TO', 'SERGEY', 'IVANOVITCH', 'WITH', 'THE', 'QUESTION', 'WHERE', 'AM', 'I', 'TO', 'PUT', 'IT'] +5442-41168-0014-1686: ref=['SERGEY', 'IVANOVITCH', 'FROWNED'] +5442-41168-0014-1686: hyp=['SERGEY', 'IVANOVITCH', 'FROWNED'] +5442-41168-0015-1687: ref=['THAT', 'IS', 'A', 'MATTER', 'FOR', 'EACH', "MAN'S", 'OWN', 'DECISION', 'HE', 'SAID', 'SEVERELY'] +5442-41168-0015-1687: hyp=['THAT', 'IS', 'A', 'MATTER', 'FOR', 'EACH', "MAN'S", 'OWN', 'DECISION', 'HE', 'SAID', 'SEVERELY'] +5442-41168-0016-1688: ref=['HAVING', 'PUT', 'IT', 'IN', 'HE', 'RECOLLECTED', 'THAT', 'HE', 'OUGHT', 'TO', 'HAVE', 'THRUST', 'HIS', 'LEFT', 'HAND', 'TOO', 'AND', 'SO', 'HE', 'THRUST', 'IT', 'IN', 'THOUGH', 'TOO', 'LATE', 'AND', 'STILL', 'MORE', 'OVERCOME', 'WITH', 'CONFUSION', 'HE', 'BEAT', 'A', 'HASTY', 'RETREAT', 'INTO', 'THE', 'BACKGROUND'] +5442-41168-0016-1688: hyp=['HAVING', 'PUT', 'IT', 'IN', 'HE', 'RECOLLECTED', 'THAT', 'HE', 'OUGHT', 'TO', 'HAVE', 'THRUST', 'HIS', 'LEFT', 'HAND', 'TOO', 'AND', 'SO', 'HE', 'THRUST', 'IT', 'IN', 'THOUGH', 'TOO', 'LATE', 'AND', 'STILL', 'MORE', 'OVERCOME', 'WITH', 'CONFUSION', 'HE', 'BEAT', 'A', 'HASTY', 'RETREAT', 'INTO', 'THE', 'BACKGROUND'] +5442-41168-0017-1689: ref=['A', 'HUNDRED', 'AND', 'TWENTY', 'SIX', 'FOR', 'ADMISSION', 'NINETY', 'EIGHT', 'AGAINST'] +5442-41168-0017-1689: hyp=['A', 'HUNDRED', 'AND', 'TWENTY', 'SIX', 'FOR', 'ADMISSION', 'NINETY', 'EIGHT', 'AGAINST'] +5442-41168-0018-1690: ref=['SANG', 'OUT', 'THE', 'VOICE', 'OF', 'THE', 'SECRETARY', 'WHO', 'COULD', 'NOT', 'PRONOUNCE', 'THE', 'LETTER', 'R'] +5442-41168-0018-1690: hyp=['SANG', 'THE', 'VOICE', 'OF', 'THE', 'SECRETARY', 'WHO', 'COULD', 'NOT', 'PRONOUNCE', 'THE', 'LETTER', 'R'] +5442-41168-0019-1691: ref=['THEN', 'THERE', 'WAS', 'A', 'LAUGH', 'A', 'BUTTON', 'AND', 'TWO', 'NUTS', 'WERE', 'FOUND', 'IN', 'THE', 'BOX'] +5442-41168-0019-1691: hyp=['THEN', 'THERE', 'WAS', 'A', 'LAUGH', 'A', 'BOTTOM', 'AND', 'TWO', 'KNOTS', 'WERE', 'FOUND', 'ON', 'THE', 'BOX'] +5442-41168-0020-1692: ref=['BUT', 'THE', 'OLD', 'PARTY', 'DID', 'NOT', 'CONSIDER', 'THEMSELVES', 'CONQUERED'] +5442-41168-0020-1692: hyp=['BUT', 'THE', 'OLD', 'PARTY', 'DID', 'NOT', 'CONSIDER', 'THEMSELVES', 'CONQUERED'] +5442-41168-0021-1693: ref=['IN', 'REPLY', 'SNETKOV', 'SPOKE', 'OF', 'THE', 'TRUST', 'THE', 'NOBLEMEN', 'OF', 'THE', 'PROVINCE', 'HAD', 'PLACED', 'IN', 'HIM', 'THE', 'AFFECTION', 'THEY', 'HAD', 'SHOWN', 'HIM', 'WHICH', 'HE', 'DID', 'NOT', 'DESERVE', 'AS', 'HIS', 'ONLY', 'MERIT', 'HAD', 'BEEN', 'HIS', 'ATTACHMENT', 'TO', 'THE', 'NOBILITY', 'TO', 'WHOM', 'HE', 'HAD', 'DEVOTED', 'TWELVE', 'YEARS', 'OF', 'SERVICE'] +5442-41168-0021-1693: hyp=['IN', 'REPLY', 'SNETKOV', 'SPOKE', 'OF', 'THE', 'TRUST', 'THE', 'NOBLEMEN', 'OF', 'THE', 'PROVINCE', 'HAD', 'PLACED', 'IN', 'HIM', 'THE', 'AFFECTION', 'THEY', 'HAD', 'SHOWN', 'HIM', 'WHICH', 'HE', 'DID', 'NOT', 'DESERVE', 'AS', 'HIS', 'ONLY', 'MERIT', 'HAD', 'BEEN', 'HIS', 'ATTACHMENT', 'TO', 'THE', 'NOBILITY', 'TO', 'WHOM', 'HE', 'HAD', 'DEVOTED', 'TWELVE', 'YEARS', 'OF', 'SERVICE'] +5442-41168-0022-1694: ref=['THIS', 'EXPRESSION', 'IN', 'THE', "MARSHAL'S", 'FACE', 'WAS', 'PARTICULARLY', 'TOUCHING', 'TO', 'LEVIN', 'BECAUSE', 'ONLY', 'THE', 'DAY', 'BEFORE', 'HE', 'HAD', 'BEEN', 'AT', 'HIS', 'HOUSE', 'ABOUT', 'HIS', 'TRUSTEE', 'BUSINESS', 'AND', 'HAD', 'SEEN', 'HIM', 'IN', 'ALL', 'HIS', 'GRANDEUR', 'A', 'KIND', 'HEARTED', 'FATHERLY', 'MAN'] +5442-41168-0022-1694: hyp=['THIS', 'EXPRESSION', 'IN', 'THE', "MARSHAL'S", 'FACE', 'WAS', 'PARTICULARLY', 'TOUCHING', 'TO', 'LEVIN', 'BECAUSE', 'ONLY', 'THE', 'DAY', 'BEFORE', 'HE', 'HAD', 'BEEN', 'AT', 'HIS', 'HOUSE', 'ABOUT', 'HIS', 'TRUSTY', 'BUSINESS', 'AND', 'HAD', 'SEEN', 'HIM', 'IN', 'ALL', 'HIS', 'GRANDEUR', 'A', 'KIND', 'HEARTED', 'FATHERLY', 'MAN'] +5442-41168-0023-1695: ref=['IF', 'THERE', 'ARE', 'MEN', 'YOUNGER', 'AND', 'MORE', 'DESERVING', 'THAN', 'I', 'LET', 'THEM', 'SERVE'] +5442-41168-0023-1695: hyp=['IF', 'THERE', 'ARE', 'MEN', 'YOUNGER', 'AND', 'MORE', 'DESERVING', 'THAN', 'I', 'LET', 'THEM', 'SERVE'] +5442-41168-0024-1696: ref=['AND', 'THE', 'MARSHAL', 'DISAPPEARED', 'THROUGH', 'A', 'SIDE', 'DOOR'] +5442-41168-0024-1696: hyp=['AND', 'THE', 'MARSHAL', 'DISAPPEARED', 'THROUGH', 'A', 'SIDE', 'DOOR'] +5442-41168-0025-1697: ref=['THEY', 'WERE', 'TO', 'PROCEED', 'IMMEDIATELY', 'TO', 'THE', 'ELECTION'] +5442-41168-0025-1697: hyp=['THEY', 'WERE', 'TO', 'PROCEED', 'IMMEDIATELY', 'TO', 'THE', 'ELECTION'] +5442-41168-0026-1698: ref=['TWO', 'NOBLE', 'GENTLEMEN', 'WHO', 'HAD', 'A', 'WEAKNESS', 'FOR', 'STRONG', 'DRINK', 'HAD', 'BEEN', 'MADE', 'DRUNK', 'BY', 'THE', 'PARTISANS', 'OF', 'SNETKOV', 'AND', 'A', 'THIRD', 'HAD', 'BEEN', 'ROBBED', 'OF', 'HIS', 'UNIFORM'] +5442-41168-0026-1698: hyp=['TWO', 'NOBLE', 'GENTLEMEN', 'WHO', 'HAD', 'A', 'WEAKNESS', 'FOR', 'STRONG', 'DRINK', 'HAD', 'BEEN', 'MADE', 'DRUNK', 'BY', 'THE', 'PARTISANS', 'OF', 'SNETKOV', 'AND', 'A', 'THIRD', 'HAD', 'BEEN', 'ROBBED', 'OF', 'HIS', 'UNIFORM'] +5442-41168-0027-1699: ref=['ON', 'LEARNING', 'THIS', 'THE', 'NEW', 'PARTY', 'HAD', 'MADE', 'HASTE', 'DURING', 'THE', 'DISPUTE', 'ABOUT', 'FLEROV', 'TO', 'SEND', 'SOME', 'OF', 'THEIR', 'MEN', 'IN', 'A', 'SLEDGE', 'TO', 'CLOTHE', 'THE', 'STRIPPED', 'GENTLEMAN', 'AND', 'TO', 'BRING', 'ALONG', 'ONE', 'OF', 'THE', 'INTOXICATED', 'TO', 'THE', 'MEETING'] +5442-41168-0027-1699: hyp=['ON', 'LEARNING', 'THIS', 'THE', 'NEW', 'PARTY', 'HAD', 'MADE', 'HASTE', 'DURING', 'THE', 'DISPUTE', 'ABOUT', 'FLEROFF', 'TO', 'SEND', 'SOME', 'OF', 'THEIR', 'MEN', 'IN', 'A', 'SLEDGE', 'TO', 'CLOTHE', 'THE', 'STRIPPED', 'GENTLEMEN', 'AND', 'TO', 'BRING', 'ALONG', 'ONE', 'OF', 'THE', 'INTOXICATED', 'TO', 'THE', 'MEETING'] +5442-41169-0000-1700: ref=['LEVIN', 'DID', 'NOT', 'CARE', 'TO', 'EAT', 'AND', 'HE', 'WAS', 'NOT', 'SMOKING', 'HE', 'DID', 'NOT', 'WANT', 'TO', 'JOIN', 'HIS', 'OWN', 'FRIENDS', 'THAT', 'IS', 'SERGEY', 'IVANOVITCH', 'STEPAN', 'ARKADYEVITCH', 'SVIAZHSKY', 'AND', 'THE', 'REST', 'BECAUSE', 'VRONSKY', 'IN', 'HIS', "EQUERRY'S", 'UNIFORM', 'WAS', 'STANDING', 'WITH', 'THEM', 'IN', 'EAGER', 'CONVERSATION'] +5442-41169-0000-1700: hyp=['LEVIN', 'DID', 'NOT', 'CARE', 'TO', 'EAT', 'AND', 'HE', 'WAS', 'NOT', 'SMOKING', 'HE', 'DID', 'NOT', 'WANT', 'TO', 'JOIN', 'HIS', 'OWN', 'FRIENDS', 'THAT', 'IS', 'SERGEY', 'IVANOVITCH', 'STEPAN', 'ARKADYEVITCH', 'SVIAZHSKY', 'AND', 'THE', 'REST', 'BECAUSE', 'VRONSKY', 'IN', 'HIS', "EQUERRY'S", 'UNIFORM', 'WAS', 'STANDING', 'WITH', 'THEM', 'IN', 'EAGER', 'CONVERSATION'] +5442-41169-0001-1701: ref=['HE', 'WENT', 'TO', 'THE', 'WINDOW', 'AND', 'SAT', 'DOWN', 'SCANNING', 'THE', 'GROUPS', 'AND', 'LISTENING', 'TO', 'WHAT', 'WAS', 'BEING', 'SAID', 'AROUND', 'HIM'] +5442-41169-0001-1701: hyp=['HE', 'WENT', 'TO', 'THE', 'WINDOW', 'AND', 'SAT', 'DOWN', 'SCANNING', 'THE', 'GROUPS', 'AND', 'LISTENING', 'TO', 'WHAT', 'WAS', 'BEING', 'SAID', 'AROUND', 'HIM'] +5442-41169-0002-1702: ref=["HE'S", 'SUCH', 'A', 'BLACKGUARD'] +5442-41169-0002-1702: hyp=["HE'S", 'SUCH', 'A', 'BLACKGUARD'] +5442-41169-0003-1703: ref=['I', 'HAVE', 'TOLD', 'HIM', 'SO', 'BUT', 'IT', 'MAKES', 'NO', 'DIFFERENCE', 'ONLY', 'THINK', 'OF', 'IT'] +5442-41169-0003-1703: hyp=['I', 'HAVE', 'TOLD', 'HIM', 'SO', 'BUT', 'IT', 'MAKES', 'NO', 'DIFFERENCE', 'ONLY', 'THINK', 'OF', 'IT'] +5442-41169-0004-1704: ref=['THESE', 'PERSONS', 'WERE', 'UNMISTAKABLY', 'SEEKING', 'A', 'PLACE', 'WHERE', 'THEY', 'COULD', 'TALK', 'WITHOUT', 'BEING', 'OVERHEARD'] +5442-41169-0004-1704: hyp=['THESE', 'PERSONS', 'WERE', 'UNMISTAKABLY', 'SEEKING', 'A', 'PLACE', 'WHERE', 'THEY', 'COULD', 'TALK', 'WITHOUT', 'BEING', 'OVERHEARD'] +5442-41169-0005-1705: ref=['SHALL', 'WE', 'GO', 'ON', 'YOUR', 'EXCELLENCY', 'FINE', 'CHAMPAGNE'] +5442-41169-0005-1705: hyp=['SHALL', 'WE', 'GO', 'ON', 'YOUR', 'EXCELLENCY', 'FINE', 'CHAMPAGNE'] +5442-41169-0006-1706: ref=['LAST', 'YEAR', 'AT', 'OUR', 'DISTRICT', 'MARSHAL', 'NIKOLAY', "IVANOVITCH'S"] +5442-41169-0006-1706: hyp=['LAST', 'YEAR', 'AT', 'OUR', 'DISTRICT', 'MARSHAL', 'NIKOLAY', "IVANOVITCH'S"] +5442-41169-0007-1707: ref=['OH', 'STILL', 'JUST', 'THE', 'SAME', 'ALWAYS', 'AT', 'A', 'LOSS', 'THE', 'LANDOWNER', 'ANSWERED', 'WITH', 'A', 'RESIGNED', 'SMILE', 'BUT', 'WITH', 'AN', 'EXPRESSION', 'OF', 'SERENITY', 'AND', 'CONVICTION', 'THAT', 'SO', 'IT', 'MUST', 'BE'] +5442-41169-0007-1707: hyp=['OH', 'STILL', 'JUST', 'THE', 'SAME', 'ALWAYS', 'AT', 'A', 'LOSS', 'THE', 'LANDOWNER', 'ANSWERED', 'WITH', 'A', 'RESIGNED', 'SMILE', 'BUT', 'WITH', 'AN', 'EXPRESSION', 'OF', 'SERENITY', 'AND', 'CONVICTION', 'THAT', 'SO', 'IT', 'MUST', 'BE'] +5442-41169-0008-1708: ref=['WHY', 'WHAT', 'IS', 'THERE', 'TO', 'UNDERSTAND'] +5442-41169-0008-1708: hyp=['WHY', 'WHAT', 'IS', 'THERE', 'TO', 'UNDERSTAND'] +5442-41169-0009-1709: ref=["THERE'S", 'NO', 'MEANING', 'IN', 'IT', 'AT', 'ALL'] +5442-41169-0009-1709: hyp=['THERE', 'IS', 'NO', 'MEANING', 'IN', 'IT', 'AT', 'ALL'] +5442-41169-0010-1710: ref=['THEN', 'TOO', 'ONE', 'MUST', 'KEEP', 'UP', 'CONNECTIONS'] +5442-41169-0010-1710: hyp=['THEN', 'TOO', 'ONE', 'MUST', 'KEEP', 'UP', 'CONNECTIONS'] +5442-41169-0011-1711: ref=["IT'S", 'A', 'MORAL', 'OBLIGATION', 'OF', 'A', 'SORT'] +5442-41169-0011-1711: hyp=["IT'S", 'A', 'MORAL', 'OBLIGATION', 'OF', 'A', 'SORT'] +5442-41169-0012-1712: ref=['AND', 'THEN', 'TO', 'TELL', 'THE', 'TRUTH', "THERE'S", "ONE'S", 'OWN', 'INTERESTS'] +5442-41169-0012-1712: hyp=['AND', 'THEN', 'TO', 'TELL', 'THE', 'TRUTH', "THERE'S", "ONE'S", 'OWN', 'INTEREST'] +5442-41169-0013-1713: ref=["THEY'RE", 'PROPRIETORS', 'OF', 'A', 'SORT', 'BUT', "WE'RE", 'THE', 'LANDOWNERS'] +5442-41169-0013-1713: hyp=["THEY'RE", 'PROPRIETORS', 'OF', 'A', 'SORT', 'BUT', 'WE', 'ARE', 'THE', 'LANDOWNERS'] +5442-41169-0014-1714: ref=['THAT', 'IT', 'MAY', 'BE', 'BUT', 'STILL', 'IT', 'OUGHT', 'TO', 'BE', 'TREATED', 'A', 'LITTLE', 'MORE', 'RESPECTFULLY'] +5442-41169-0014-1714: hyp=['THAT', 'IT', 'MAY', 'BE', 'BUT', 'STILL', 'IT', 'OUGHT', 'TO', 'BE', 'TREATED', 'A', 'LITTLE', 'MORE', 'RESPECTFULLY'] +5442-41169-0015-1715: ref=['IF', "WE'RE", 'LAYING', 'OUT', 'A', 'GARDEN', 'PLANNING', 'ONE', 'BEFORE', 'THE', 'HOUSE', 'YOU', 'KNOW', 'AND', 'THERE', "YOU'VE", 'A', 'TREE', "THAT'S", 'STOOD', 'FOR', 'CENTURIES', 'IN', 'THE', 'VERY', 'SPOT', 'OLD', 'AND', 'GNARLED', 'IT', 'MAY', 'BE', 'AND', 'YET', 'YOU', "DON'T", 'CUT', 'DOWN', 'THE', 'OLD', 'FELLOW', 'TO', 'MAKE', 'ROOM', 'FOR', 'THE', 'FLOWERBEDS', 'BUT', 'LAY', 'OUT', 'YOUR', 'BEDS', 'SO', 'AS', 'TO', 'TAKE', 'ADVANTAGE', 'OF', 'THE', 'TREE'] +5442-41169-0015-1715: hyp=['IF', 'WE', 'ARE', 'LAYING', 'OUT', 'A', 'GARDEN', 'PLANTING', 'ONE', 'BEFORE', 'THE', 'HOUSE', 'YOU', 'KNOW', 'AND', 'THERE', 'YOU', 'HAVE', 'A', 'TREE', 'THAT', 'STOOD', 'FOR', 'CENTURIES', 'IN', 'THE', 'VERY', 'SPOT', 'OLD', 'AND', 'GNARLED', 'IT', 'MAY', 'BE', 'AND', 'YET', 'YOU', "DON'T", 'CUT', 'DOWN', 'THE', 'OLD', 'FELLOW', 'TO', 'MAKE', 'ROOM', 'FOR', 'THE', 'FLOWER', 'BEDS', 'BUT', 'LAY', 'OUT', 'YOUR', 'BEDS', 'SO', 'AS', 'TO', 'TAKE', 'ADVANTAGE', 'OF', 'THE', 'TREE'] +5442-41169-0016-1716: ref=['WELL', 'AND', 'HOW', 'IS', 'YOUR', 'LAND', 'DOING'] +5442-41169-0016-1716: hyp=['WELL', 'AND', "HOW'S", 'YOUR', 'LAND', 'DOING'] +5442-41169-0017-1717: ref=['BUT', "ONE'S", 'WORK', 'IS', 'THROWN', 'IN', 'FOR', 'NOTHING'] +5442-41169-0017-1717: hyp=['BUT', "ONE'S", 'WORK', 'IS', 'THROWN', 'IN', 'FOR', 'NOTHING'] +5442-41169-0018-1718: ref=['OH', 'WELL', 'ONE', 'DOES', 'IT', 'WHAT', 'WOULD', 'YOU', 'HAVE'] +5442-41169-0018-1718: hyp=['OH', 'WELL', 'ONE', 'DOES', 'IT', 'WHAT', 'WOULD', 'YOU', 'HAVE'] +5442-41169-0019-1719: ref=['AND', "WHAT'S", 'MORE', 'THE', 'LANDOWNER', 'WENT', 'ON', 'LEANING', 'HIS', 'ELBOWS', 'ON', 'THE', 'WINDOW', 'AND', 'CHATTING', 'ON', 'MY', 'SON', 'I', 'MUST', 'TELL', 'YOU', 'HAS', 'NO', 'TASTE', 'FOR', 'IT'] +5442-41169-0019-1719: hyp=['AND', "WHAT'S", 'MORE', 'THE', 'LANDOWNER', 'WENT', 'ON', 'LEANING', 'HIS', 'ELBOWS', 'ON', 'THE', 'WINDOW', 'AND', 'CHATTING', 'ON', 'MY', 'SON', 'I', 'MUST', 'TELL', 'YOU', 'HAS', 'NO', 'TASTE', 'FOR', 'IT'] +5442-41169-0020-1720: ref=['SO', "THERE'LL", 'BE', 'NO', 'ONE', 'TO', 'KEEP', 'IT', 'UP', 'AND', 'YET', 'ONE', 'DOES', 'IT'] +5442-41169-0020-1720: hyp=['SO', "THERE'LL", 'BE', 'NO', 'ONE', 'TO', 'KEEP', 'IT', 'UP', 'AND', 'YET', 'ONE', 'DOES', 'IT'] +5442-41169-0021-1721: ref=['WE', 'WALKED', 'ABOUT', 'THE', 'FIELDS', 'AND', 'THE', 'GARDEN', 'NO', 'SAID', 'HE', 'STEPAN', 'VASSILIEVITCH', "EVERYTHING'S", 'WELL', 'LOOKED', 'AFTER', 'BUT', 'YOUR', "GARDEN'S", 'NEGLECTED'] +5442-41169-0021-1721: hyp=['WE', 'WALKED', 'ABOUT', 'THE', 'FIELDS', 'AND', 'THE', 'GARDEN', 'NO', 'SAID', 'HE', 'STEPAN', 'MASLOVITCH', "EVERYTHING'S", 'WELL', 'LOOKED', 'AFTER', 'BUT', 'YOUR', "GARDEN'S", 'NEGLECTED'] +5442-41169-0022-1722: ref=['TO', 'MY', 'THINKING', "I'D", 'CUT', 'DOWN', 'THAT', 'LIME', 'TREE'] +5442-41169-0022-1722: hyp=['TO', 'MY', 'THINKING', "I'D", 'CUT', 'DOWN', 'THE', 'LINERY'] +5442-41169-0023-1723: ref=['HERE', "YOU'VE", 'THOUSANDS', 'OF', 'LIMES', 'AND', 'EACH', 'WOULD', 'MAKE', 'TWO', 'GOOD', 'BUNDLES', 'OF', 'BARK'] +5442-41169-0023-1723: hyp=['HERE', 'YOU', 'HAVE', 'THOUSANDS', 'OF', 'LIMES', 'AND', 'EACH', 'WOULD', 'MAKE', 'TWO', 'GOOD', 'BUNDLES', 'OF', 'BARK'] +5442-41169-0024-1724: ref=["YOU'RE", 'MARRIED', "I'VE", 'HEARD', 'SAID', 'THE', 'LANDOWNER'] +5442-41169-0024-1724: hyp=["YOU'RE", 'MARRIED', 'I', 'HEARD', 'SAID', 'THE', 'LANDOWNER'] +5442-41169-0025-1725: ref=['YES', "IT'S", 'RATHER', 'STRANGE', 'HE', 'WENT', 'ON'] +5442-41169-0025-1725: hyp=['YES', "IT'S", 'RATHER', 'STRANGE', 'HE', 'WENT', 'ON'] +5442-41169-0026-1726: ref=['THE', 'LANDOWNER', 'CHUCKLED', 'UNDER', 'HIS', 'WHITE', 'MUSTACHES'] +5442-41169-0026-1726: hyp=['THE', 'LANDOWNER', 'CHUCKLED', 'UNDER', 'HIS', 'WHITE', 'MOUSTACHES'] +5442-41169-0027-1727: ref=['WHY', "DON'T", 'WE', 'CUT', 'DOWN', 'OUR', 'PARKS', 'FOR', 'TIMBER'] +5442-41169-0027-1727: hyp=['WHY', "DON'T", 'WE', 'CUT', 'DOWN', 'OUR', 'BOGS', 'FOR', 'TIMBER'] +5442-41169-0028-1728: ref=['SAID', 'LEVIN', 'RETURNING', 'TO', 'A', 'THOUGHT', 'THAT', 'HAD', 'STRUCK', 'HIM'] +5442-41169-0028-1728: hyp=['SAID', 'LEVIN', 'RETURNING', 'TO', 'A', 'THOUGHT', 'THAT', 'HAD', 'STRUCK', 'HIM'] +5442-41169-0029-1729: ref=["THERE'S", 'A', 'CLASS', 'INSTINCT', 'TOO', 'OF', 'WHAT', 'ONE', 'OUGHT', 'AND', "OUGHTN'T", 'TO', 'DO'] +5442-41169-0029-1729: hyp=['THERE', 'IS', 'A', 'CLASS', 'INSTINCT', 'TOO', 'OF', 'WHAT', 'ONE', 'OUGHT', 'AND', 'OUGHT', 'NOT', 'TO', 'DO'] +5442-41169-0030-1730: ref=["THERE'S", 'THE', 'PEASANTS', 'TOO', 'I', 'WONDER', 'AT', 'THEM', 'SOMETIMES', 'ANY', 'GOOD', 'PEASANT', 'TRIES', 'TO', 'TAKE', 'ALL', 'THE', 'LAND', 'HE', 'CAN'] +5442-41169-0030-1730: hyp=["THERE'S", 'THE', 'PEASANTS', 'TOO', 'I', 'WONDER', 'AT', 'THEM', 'SOMETIMES', 'ANY', 'GOOD', 'PEASANT', 'TRIES', 'TO', 'TAKE', 'ALL', 'THE', 'LAND', 'HE', 'CAN'] +5442-41169-0031-1731: ref=['WITHOUT', 'A', 'RETURN', 'TOO', 'AT', 'A', 'SIMPLE', 'LOSS'] +5442-41169-0031-1731: hyp=['WITHOUT', 'A', 'RETURN', 'TOO', 'AT', 'A', 'SIMPLE', 'LOSS'] +5484-24317-0000-1732: ref=['WHEN', 'HE', 'CAME', 'FROM', 'THE', 'BATH', 'PROCLUS', 'VISITED', 'HIM', 'AGAIN'] +5484-24317-0000-1732: hyp=['WHEN', 'HE', 'CAME', 'FROM', 'THE', 'BATH', 'PROCLAS', 'VISITED', 'HIM', 'AGAIN'] +5484-24317-0001-1733: ref=['BUT', 'HERMON', 'WAS', 'NOT', 'IN', 'THE', 'MOOD', 'TO', 'SHARE', 'A', 'JOYOUS', 'REVEL', 'AND', 'HE', 'FRANKLY', 'SAID', 'SO', 'ALTHOUGH', 'IMMEDIATELY', 'AFTER', 'HIS', 'RETURN', 'HE', 'HAD', 'ACCEPTED', 'THE', 'INVITATION', 'TO', 'THE', 'FESTIVAL', 'WHICH', 'THE', 'WHOLE', 'FELLOWSHIP', 'OF', 'ARTISTS', 'WOULD', 'GIVE', 'THE', 'FOLLOWING', 'DAY', 'IN', 'HONOUR', 'OF', 'THE', 'SEVENTIETH', 'BIRTHDAY', 'OF', 'THE', 'OLD', 'SCULPTOR', 'EUPHRANOR'] +5484-24317-0001-1733: hyp=['BUT', 'HARE', 'WAS', 'NOT', 'IN', 'THE', 'MOOD', 'TO', 'SHARE', 'A', 'JOYOUS', 'REVEL', 'AND', 'HE', 'FRANKLY', 'SAID', 'SO', 'ALTHOUGH', 'IMMEDIATELY', 'AFTER', 'HIS', 'RETURN', 'HE', 'HAD', 'ACCEPTED', 'THE', 'INVITATION', 'TO', 'THE', 'FESTIVAL', 'WHICH', 'THE', 'WHOLE', 'FELLOWSHIP', 'OF', 'ARTISTS', 'WOULD', 'GIVE', 'THE', 'FOLLOWING', 'DAY', 'IN', 'HONOR', 'OF', 'THE', 'SEVENTIETH', 'BIRTHDAY', 'OF', 'THE', 'OLD', 'SCULPTOR', 'EUPHRANER'] +5484-24317-0002-1734: ref=['SHE', 'WOULD', 'APPEAR', 'HERSELF', 'AT', 'DESSERT', 'AND', 'THE', 'BANQUET', 'MUST', 'THEREFORE', 'BEGIN', 'AT', 'AN', 'UNUSUALLY', 'EARLY', 'HOUR'] +5484-24317-0002-1734: hyp=['SHE', 'WOULD', 'APPEAR', 'HERSELF', 'AT', 'DESSERT', 'AND', 'THE', 'BANQUET', 'MUST', 'THEREFORE', 'BEGIN', 'AT', 'AN', 'UNUSUALLY', 'EARLY', 'HOUR'] +5484-24317-0003-1735: ref=['SO', 'THE', 'ARTIST', 'FOUND', 'HIMSELF', 'OBLIGED', 'TO', 'RELINQUISH', 'HIS', 'OPPOSITION'] +5484-24317-0003-1735: hyp=['SO', 'THE', 'ARTIST', 'FOUND', 'HIMSELF', 'OBLIGED', 'TO', 'RELINQUISH', 'HIS', 'OPPOSITION'] +5484-24317-0004-1736: ref=['THE', 'BANQUET', 'WAS', 'TO', 'BEGIN', 'IN', 'A', 'FEW', 'HOURS', 'YET', 'HE', 'COULD', 'NOT', 'LET', 'THE', 'DAY', 'PASS', 'WITHOUT', 'SEEING', 'DAPHNE', 'AND', 'TELLING', 'HER', 'THE', 'WORDS', 'OF', 'THE', 'ORACLE'] +5484-24317-0004-1736: hyp=['THE', 'BANQUET', 'WAS', 'TO', 'BEGIN', 'IN', 'A', 'FEW', 'HOURS', 'YET', 'HE', 'COULD', 'NOT', 'LET', 'THE', 'DAY', 'PASS', 'WITHOUT', 'SEEING', 'DAPHNE', 'AND', 'TELLING', 'HER', 'THE', 'WORDS', 'OF', 'THE', 'ORACLE'] +5484-24317-0005-1737: ref=['HE', 'LONGED', 'WITH', 'ARDENT', 'YEARNING', 'FOR', 'THE', 'SOUND', 'OF', 'HER', 'VOICE', 'AND', 'STILL', 'MORE', 'TO', 'UNBURDEN', 'HIS', 'SORELY', 'TROUBLED', 'SOUL', 'TO', 'HER'] +5484-24317-0005-1737: hyp=['HE', 'LONGED', 'WITH', 'ARDENT', 'YEARNING', 'FOR', 'THE', 'SOUND', 'OF', 'HER', 'VOICE', 'AND', 'STILL', 'MORE', 'TO', 'UNBURDEN', 'HIS', 'SORELY', 'TROUBLED', 'SOUL', 'TO', 'HER'] +5484-24317-0006-1738: ref=['SINCE', 'HIS', 'RETURN', 'FROM', 'THE', 'ORACLE', 'THE', 'FEAR', 'THAT', 'THE', 'RESCUED', 'DEMETER', 'MIGHT', 'YET', 'BE', 'THE', 'WORK', 'OF', 'MYRTILUS', 'HAD', 'AGAIN', 'MASTERED', 'HIM'] +5484-24317-0006-1738: hyp=['SINCE', 'HIS', 'RETURN', 'FROM', 'THE', 'ORACLE', 'THE', 'FEAR', 'THAT', 'THE', 'RESCUE', 'DEMETER', 'MIGHT', 'YET', 'BE', 'THE', 'WORK', 'OF', 'MERTILLUS', 'HAD', 'AGAIN', 'MASTERED', 'HIM'] +5484-24317-0007-1739: ref=['THE', 'APPROVAL', 'AS', 'WELL', 'AS', 'THE', 'DOUBTS', 'WHICH', 'IT', 'AROUSED', 'IN', 'OTHERS', 'STRENGTHENED', 'HIS', 'OPINION', 'ALTHOUGH', 'EVEN', 'NOW', 'HE', 'COULD', 'NOT', 'SUCCEED', 'IN', 'BRINGING', 'IT', 'INTO', 'HARMONY', 'WITH', 'THE', 'FACTS'] +5484-24317-0007-1739: hyp=['THE', 'APPROVAL', 'AS', 'WELL', 'AS', 'THE', 'DOUBTS', 'WHICH', 'HAD', 'AROUSED', 'IN', 'OTHERS', 'STRENGTHENED', 'HIS', 'OPINION', 'ALTHOUGH', 'EVEN', 'NOW', 'HE', 'COULD', 'NOT', 'SUCCEED', 'IN', 'BRINGING', 'IT', 'INTO', 'HARMONY', 'WITH', 'THE', 'FACTS'] +5484-24317-0008-1740: ref=['THEN', 'HE', 'WENT', 'DIRECTLY', 'TO', 'THE', 'NEIGHBOURING', 'PALACE', 'THE', 'QUEEN', 'MIGHT', 'HAVE', 'APPEARED', 'ALREADY', 'AND', 'IT', 'WOULD', 'NOT', 'DO', 'TO', 'KEEP', 'HER', 'WAITING'] +5484-24317-0008-1740: hyp=['THEN', 'HE', 'WENT', 'DIRECTLY', 'TO', 'THE', 'NEIGHBOURING', 'PALACE', 'THE', 'QUEEN', 'MIGHT', 'HAVE', 'APPEARED', 'ALREADY', 'AND', 'IT', 'WOULD', 'NOT', 'DO', 'TO', 'KEEP', 'HER', 'WAITING'] +5484-24317-0009-1741: ref=['HITHERTO', 'THE', 'MERCHANT', 'HAD', 'BEEN', 'INDUCED', 'IT', 'IS', 'TRUE', 'TO', 'ADVANCE', 'LARGE', 'SUMS', 'OF', 'MONEY', 'TO', 'THE', 'QUEEN', 'BUT', 'THE', 'LOYAL', 'DEVOTION', 'WHICH', 'HE', 'SHOWED', 'TO', 'HER', 'ROYAL', 'HUSBAND', 'HAD', 'RENDERED', 'IT', 'IMPOSSIBLE', 'TO', 'GIVE', 'HIM', 'EVEN', 'A', 'HINT', 'OF', 'THE', 'CONSPIRACY'] +5484-24317-0009-1741: hyp=['HITHERTO', 'THE', 'MERCHANT', 'HAD', 'BEEN', 'INDUCED', 'IT', 'IS', 'TRUE', 'TO', 'ADVANCE', 'LARGE', 'SUMS', 'OF', 'MONEY', 'TO', 'THE', 'QUEEN', 'BUT', 'THE', 'LOYAL', 'DEVOTION', 'WHICH', 'HE', 'SHOWED', 'TO', 'HER', 'ROYAL', 'HUSBAND', 'HAD', 'RENDERED', 'IT', 'IMPOSSIBLE', 'TO', 'GIVE', 'HIM', 'EVEN', 'A', 'HINT', 'OF', 'THE', 'CONSPIRACY'] +5484-24317-0010-1742: ref=['WHEN', 'HERMON', 'ENTERED', 'THE', 'RESIDENCE', 'OF', 'THE', 'GRAMMATEUS', 'IN', 'THE', 'PALACE', 'THE', 'GUESTS', 'HAD', 'ALREADY', 'ASSEMBLED'] +5484-24317-0010-1742: hyp=['WHEN', 'HERMANN', 'ENTERED', 'THE', 'RESIDENCE', 'OF', 'THE', 'GRAMMATIUS', 'IN', 'THE', 'PALACE', 'THE', 'GUESTS', 'HAD', 'ALREADY', 'ASSEMBLED'] +5484-24317-0011-1743: ref=['THE', 'PLACE', 'BY', "HERMON'S", 'SIDE', 'WHICH', 'ALTHEA', 'HAD', 'CHOSEN', 'FOR', 'HERSELF', 'WOULD', 'THEN', 'BE', 'GIVEN', 'UP', 'TO', 'ARSINOE'] +5484-24317-0011-1743: hyp=['THE', 'PLACE', 'BY', "HERMONT'S", 'SIDE', 'WHICH', 'ALPHIE', 'HAD', 'CHOSEN', 'FOR', 'HERSELF', 'WOULD', 'THEN', 'BE', 'GIVEN', 'UP', 'TO', 'ARSENO'] +5484-24317-0012-1744: ref=['TRUE', 'AN', 'INTERESTING', 'CONVERSATION', 'STILL', 'HAD', 'POWER', 'TO', 'CHARM', 'HIM', 'BUT', 'OFTEN', 'DURING', 'ITS', 'CONTINUANCE', 'THE', 'FULL', 'CONSCIOUSNESS', 'OF', 'HIS', 'MISFORTUNE', 'FORCED', 'ITSELF', 'UPON', 'HIS', 'MIND', 'FOR', 'THE', 'MAJORITY', 'OF', 'THE', 'SUBJECTS', 'DISCUSSED', 'BY', 'THE', 'ARTISTS', 'CAME', 'TO', 'THEM', 'THROUGH', 'THE', 'MEDIUM', 'OF', 'SIGHT', 'AND', 'REFERRED', 'TO', 'NEW', 'CREATIONS', 'OF', 'ARCHITECTURE', 'SCULPTURE', 'AND', 'PAINTING', 'FROM', 'WHOSE', 'ENJOYMENT', 'HIS', 'BLINDNESS', 'DEBARRED', 'HIM'] +5484-24317-0012-1744: hyp=['TRUE', 'AN', 'INTERESTING', 'CONVERSATION', 'STILL', 'HAD', 'POWER', 'TO', 'CHARM', 'HIM', 'BUT', 'OFTEN', 'DURING', 'ITS', 'CONTINUANCE', 'THE', 'FULL', 'CONSCIOUSNESS', 'OF', 'HIS', 'MISFORTUNE', 'FORCED', 'ITSELF', 'UPON', 'HIS', 'MIND', 'FOR', 'THE', 'MAJORITY', 'OF', 'THE', 'SUBJECTS', 'DISCUSSED', 'BY', 'THE', 'ARTISTS', 'CAME', 'TO', 'THEM', 'THROUGH', 'THE', 'MEDIUM', 'OF', 'SIGHT', 'AND', 'REFERRED', 'TO', 'NEW', 'CREATIONS', 'OF', 'ARCHITECTURE', 'SCULPTURE', 'AND', 'PAINTING', 'FROM', 'WHOSE', 'ENJOYMENT', 'HIS', 'BLINDNESS', 'DEBARRED', 'HIM'] +5484-24317-0013-1745: ref=['A', 'STRANGER', 'OUT', 'OF', 'HIS', 'OWN', 'SPHERE', 'HE', 'FELT', 'CHILLED', 'AMONG', 'THESE', 'CLOSELY', 'UNITED', 'MEN', 'AND', 'WOMEN', 'TO', 'WHOM', 'NO', 'TIE', 'BOUND', 'HIM', 'SAVE', 'THE', 'PRESENCE', 'OF', 'THE', 'SAME', 'HOST'] +5484-24317-0013-1745: hyp=['A', 'STRANGER', 'OUT', 'OF', 'HIS', 'OWN', 'SPHERE', 'HE', 'FELL', 'CHILLED', 'AMONG', 'THESE', 'CLOSELY', 'UNITED', 'MEN', 'AND', 'WOMEN', 'TO', 'WHOM', 'NO', 'TIE', 'BOUND', 'HIM', 'SAVE', 'THE', 'PRESENCE', 'OF', 'THE', 'SAME', 'HOST'] +5484-24317-0014-1746: ref=['CRATES', 'HAD', 'REALLY', 'BEEN', 'INVITED', 'IN', 'ORDER', 'TO', 'WIN', 'HIM', 'OVER', 'TO', 'THE', "QUEEN'S", 'CAUSE', 'BUT', 'CHARMING', 'FAIR', 'HAIRED', 'NICO', 'HAD', 'BEEN', 'COMMISSIONED', 'BY', 'THE', 'CONSPIRATORS', 'TO', 'PERSUADE', 'HIM', 'TO', 'SING', "ARSINOE'S", 'PRAISES', 'AMONG', 'HIS', 'PROFESSIONAL', 'ASSOCIATES'] +5484-24317-0014-1746: hyp=['CRATES', 'HAD', 'REALLY', 'BEEN', 'INVITED', 'IN', 'ORDER', 'TO', 'WIN', 'HIM', 'OVER', 'TO', 'THE', "QUEEN'S", 'CAUSE', 'BUT', 'CHARMING', 'FAIR', 'HAIRED', 'NICHO', 'HAD', 'BEEN', 'COMMISSIONED', 'BY', 'THE', 'CONSPIRATORS', 'TO', 'PERSUADE', 'HIM', 'TO', 'SING', "ARSENAL'S", 'PRAISES', 'AMONG', 'HIS', 'PROFESSIONAL', 'ASSOCIATES'] +5484-24317-0015-1747: ref=['HIS', 'SON', 'HAD', 'BEEN', 'THIS', 'ROYAL', "DAME'S", 'FIRST', 'HUSBAND', 'AND', 'SHE', 'HAD', 'DESERTED', 'HIM', 'TO', 'MARRY', 'LYSIMACHUS', 'THE', 'AGED', 'KING', 'OF', 'THRACE'] +5484-24317-0015-1747: hyp=['HIS', 'SON', 'HAD', 'BEEN', 'THE', 'ROYAL', "DAME'S", 'FIRST', 'HUSBAND', 'AND', 'SHE', 'HAD', 'DESERTED', 'HIM', 'TO', 'MARRY', 'LISUMACHUS', 'THE', 'AGED', 'KING', 'OF', 'THRACE'] +5484-24317-0016-1748: ref=['THE', "KING'S", 'SISTER', 'THE', 'OBJECT', 'OF', 'HIS', 'LOVE', 'CRIED', 'HERMON', 'INCREDULOUSLY'] +5484-24317-0016-1748: hyp=['THE', "KING'S", 'SISTER', 'THE', 'OBJECT', 'OF', 'HIS', 'LOVE', 'CRIED', 'HARMONT', 'INCREDULOUSLY'] +5484-24317-0017-1749: ref=['WE', 'WOMEN', 'ARE', 'ONLY', 'AS', 'OLD', 'AS', 'WE', 'LOOK', 'AND', 'THE', 'LEECHES', 'AND', 'TIRING', 'WOMEN', 'OF', 'THIS', 'BEAUTY', 'OF', 'FORTY', 'PRACTISE', 'ARTS', 'WHICH', 'GIVE', 'HER', 'THE', 'APPEARANCE', 'OF', 'TWENTY', 'FIVE', 'YET', 'PERHAPS', 'THE', 'KING', 'VALUES', 'HER', 'INTELLECT', 'MORE', 'THAN', 'HER', 'PERSON', 'AND', 'THE', 'WISDOM', 'OF', 'A', 'HUNDRED', 'SERPENTS', 'IS', 'CERTAINLY', 'UNITED', 'IN', 'THIS', "WOMAN'S", 'HEAD'] +5484-24317-0017-1749: hyp=['WE', 'WOMEN', 'ARE', 'ONLY', 'AS', 'OLD', 'AS', 'WE', 'LOOK', 'AND', 'THE', 'LEECHES', 'AND', 'TIRING', 'WOMEN', 'OF', 'THIS', 'BEAUTY', 'OF', 'FORTY', 'PRACTISE', 'ARTS', 'WHICH', 'GIVE', 'HER', 'THE', 'APPEARANCE', 'OF', 'TWENTY', 'FIVE', 'YET', 'PERHAPS', 'THE', 'KING', 'VALUES', 'HER', 'INTELLECT', 'MORE', 'THAN', 'HER', 'PERSON', 'AND', 'THE', 'WISDOM', 'OF', 'A', 'HUNDRED', 'SERPENTS', 'IS', 'CERTAINLY', 'UNITED', 'IN', 'THIS', "WOMAN'S", 'HEAD'] +5484-24317-0018-1750: ref=['THE', 'THREE', 'MOST', 'TRUSTWORTHY', 'ONES', 'ARE', 'HERE', 'AMYNTAS', 'THE', 'LEECH', 'CHRYSIPPUS', 'AND', 'THE', 'ADMIRABLE', 'PROCLUS'] +5484-24317-0018-1750: hyp=['THE', 'THREE', 'MOST', 'TRUSTWORTHY', 'ONES', 'ARE', 'HERE', 'AMUNTUS', 'THE', 'LIEGE', 'CHRYSIPPUS', 'IN', 'THE', 'ADMIRABLE', 'PROCLYS'] +5484-24317-0019-1751: ref=['LET', 'US', 'HOPE', 'THAT', 'YOU', 'WILL', 'MAKE', 'THIS', 'THREE', 'LEAVED', 'CLOVER', 'THE', 'LUCK', 'PROMISING', 'FOUR', 'LEAVED', 'ONE'] +5484-24317-0019-1751: hyp=['LET', 'US', 'HOPE', 'THAT', 'YOU', 'WILL', 'MAKE', 'THIS', 'THREE', 'LEAVED', 'CLOVER', 'THE', 'LUCK', 'PROMISING', 'FOUR', 'LEAVED', 'ONE'] +5484-24317-0020-1752: ref=['YOUR', 'UNCLE', 'TOO', 'HAS', 'OFTEN', 'WITH', 'PRAISEWORTHY', 'GENEROSITY', 'HELPED', 'ARSINOE', 'IN', 'MANY', 'AN', 'EMBARRASSMENT'] +5484-24317-0020-1752: hyp=['YOUR', 'UNCLE', 'TOO', 'HAS', 'OFTEN', 'WITH', 'PRAISEWORTHY', 'GENEROSITY', 'HELPED', 'ARSENAL', 'IN', 'MANY', 'AN', 'EMBARRASSMENT'] +5484-24317-0021-1753: ref=['HOW', 'LONG', 'HE', 'KEPT', 'YOU', 'WAITING', 'FOR', 'THE', 'FIRST', 'WORD', 'CONCERNING', 'A', 'WORK', 'WHICH', 'JUSTLY', 'TRANSPORTED', 'THE', 'WHOLE', 'CITY', 'WITH', 'DELIGHT'] +5484-24317-0021-1753: hyp=['HOW', 'LONG', 'HE', 'KEPT', 'YOU', 'WAITING', 'FROM', 'THE', 'FIRST', 'WORD', 'CONCERNING', 'A', 'WORK', 'WHICH', 'JUSTLY', 'TRANSPORTED', 'THE', 'WHOLE', 'CITY', 'WITH', 'DELIGHT'] +5484-24317-0022-1754: ref=['WHEN', 'HE', 'DID', 'FINALLY', 'SUMMON', 'YOU', 'HE', 'SAID', 'THINGS', 'WHICH', 'MUST', 'HAVE', 'WOUNDED', 'YOU'] +5484-24317-0022-1754: hyp=['WHEN', 'HE', 'DID', 'FINALLY', 'SUMMON', 'YOU', 'HE', 'SAID', 'THINGS', 'WHICH', 'MUST', 'HAVE', 'WOUNDED', 'YOU'] +5484-24317-0023-1755: ref=['THAT', 'IS', 'GOING', 'TOO', 'FAR', 'REPLIED', 'HERMON'] +5484-24317-0023-1755: hyp=['THAT', 'IS', 'GOING', 'TOO', 'FAR', 'REPLIED', 'HARMON'] +5484-24317-0024-1756: ref=['HE', 'WINKED', 'AT', 'HER', 'AND', 'MADE', 'A', 'SIGNIFICANT', 'GESTURE', 'AS', 'HE', 'SPOKE', 'AND', 'THEN', 'INFORMED', 'THE', 'BLIND', 'ARTIST', 'HOW', 'GRACIOUSLY', 'ARSINOE', 'HAD', 'REMEMBERED', 'HIM', 'WHEN', 'SHE', 'HEARD', 'OF', 'THE', 'REMEDY', 'BY', 'WHOSE', 'AID', 'MANY', 'A', 'WONDERFUL', 'CURE', 'OF', 'BLIND', 'EYES', 'HAD', 'BEEN', 'MADE', 'IN', 'RHODES'] +5484-24317-0024-1756: hyp=['HE', 'WINKED', 'AT', 'HER', 'AND', 'MADE', 'A', 'SIGNIFICANT', 'GESTURE', 'AS', 'HE', 'SPOKE', 'AND', 'THEN', 'INFORMED', 'THE', 'BLIND', 'ARTIST', 'HOW', 'GRACIOUSLY', 'ARSENAL', 'HAD', 'REMEMBERED', 'HIM', 'WHEN', 'SHE', 'HEARD', 'OF', 'THE', 'REMEDY', 'BY', 'WHOSE', 'AID', 'MANY', 'A', 'WONDERFUL', 'CURE', 'OF', 'BLIND', 'EYE', 'HAD', 'BEEN', 'MADE', 'IN', 'RHODES'] +5484-24317-0025-1757: ref=['THE', 'ROYAL', 'LADY', 'HAD', 'INQUIRED', 'ABOUT', 'HIM', 'AND', 'HIS', 'SUFFERINGS', 'WITH', 'ALMOST', 'SISTERLY', 'INTEREST', 'AND', 'ALTHEA', 'EAGERLY', 'CONFIRMED', 'THE', 'STATEMENT'] +5484-24317-0025-1757: hyp=['THE', 'ROYAL', 'LADY', 'HAD', 'INQUIRED', 'ABOUT', 'HIM', 'AND', 'HIS', 'SUFFERINGS', 'WITH', 'ALMOST', 'SISTERLY', 'INTEREST', 'AND', 'ALTHEA', 'EAGERLY', 'CONFIRMED', 'THE', 'STATEMENT'] +5484-24317-0026-1758: ref=['HERMON', 'LISTENED', 'TO', 'THE', 'PAIR', 'IN', 'SILENCE'] +5484-24317-0026-1758: hyp=['HERMANN', 'LISTENED', 'TO', 'THE', 'PAIR', 'IN', 'SILENCE'] +5484-24317-0027-1759: ref=['THE', 'RHODIAN', 'WAS', 'JUST', 'BEGINNING', 'TO', 'PRAISE', 'ARSINOE', 'ALSO', 'AS', 'A', 'SPECIAL', 'FRIEND', 'AND', 'CONNOISSEUR', 'OF', 'THE', "SCULPTOR'S", 'ART', 'WHEN', 'CRATES', "HERMON'S", 'FELLOW', 'STUDENT', 'ASKED', 'THE', 'BLIND', 'ARTIST', 'IN', 'BEHALF', 'OF', 'HIS', 'BEAUTIFUL', 'COMPANION', 'WHY', 'HIS', 'DEMETER', 'WAS', 'PLACED', 'UPON', 'A', 'PEDESTAL', 'WHICH', 'TO', 'OTHERS', 'AS', 'WELL', 'AS', 'HIMSELF', 'SEEMED', 'TOO', 'HIGH', 'FOR', 'THE', 'SIZE', 'OF', 'THE', 'STATUE'] +5484-24317-0027-1759: hyp=['THE', 'ROUDIAN', 'WAS', 'JUST', 'BEGINNING', 'TO', 'PRAISE', 'ARSENAL', 'ALSO', 'AS', 'A', 'SPECIAL', 'FRIEND', 'AND', 'CONNOISSEUR', 'OF', 'THE', "SCULPT'S", 'ART', 'WHEN', 'CRATES', "HERMANN'S", 'FELLOW', 'STUDENT', 'ASKED', 'THE', 'BLIND', 'ARTIST', 'IN', 'BEHALF', 'OF', 'HIS', 'BEAUTIFUL', 'COMPANION', 'WHY', 'DEMEANOUR', 'WAS', 'PLACED', 'UPON', 'A', 'PEDESTAL', 'WHICH', 'TO', 'OTHERS', 'AS', 'WELL', 'AS', 'HIMSELF', 'SEEMED', 'TOO', 'HIGH', 'FOR', 'THE', 'SIZE', 'OF', 'THE', 'STATUE'] +5484-24317-0028-1760: ref=['YET', 'WHAT', 'MATTERED', 'IT', 'EVEN', 'IF', 'THESE', 'MISERABLE', 'PEOPLE', 'CONSIDERED', 'THEMSELVES', 'DECEIVED', 'AND', 'POINTED', 'THE', 'FINGER', 'OF', 'SCORN', 'AT', 'HIM'] +5484-24317-0028-1760: hyp=['YET', 'WHAT', 'MATTERED', 'IT', 'EVEN', 'IF', 'THESE', 'MISERABLE', 'PEOPLE', 'CONSIDERED', 'THEMSELVES', 'DECEIVED', 'AND', 'POINTED', 'THE', 'FINGER', 'OF', 'SCORN', 'AT', 'HIM'] +5484-24317-0029-1761: ref=['A', 'WOMAN', 'WHO', 'YEARNS', 'FOR', 'THE', 'REGARD', 'OF', 'ALL', 'MEN', 'AND', 'MAKES', 'LOVE', 'A', 'TOY', 'EASILY', 'LESSENS', 'THE', 'DEMANDS', 'SHE', 'IMPOSES', 'UPON', 'INDIVIDUALS'] +5484-24317-0029-1761: hyp=['A', 'WOMAN', 'WHO', 'YEARNS', 'FOR', 'THE', 'REGARD', 'OF', 'ALL', 'MEN', 'AND', 'MAKES', 'LOVE', 'A', 'TOY', 'EASILY', 'LESSENS', 'THE', 'DEMANDS', 'SHE', 'IMPOSES', 'UPON', 'INDIVIDUALS'] +5484-24317-0030-1762: ref=['ONLY', 'EVEN', 'THOUGH', 'LOVE', 'HAS', 'WHOLLY', 'DISAPPEARED', 'SHE', 'STILL', 'CLAIMS', 'CONSIDERATION', 'AND', 'ALTHEA', 'DID', 'NOT', 'WISH', 'TO', 'LOSE', "HERMON'S", 'REGARD'] +5484-24317-0030-1762: hyp=['ONLY', 'EVEN', 'THOUGH', 'LOVE', 'HAS', 'WHOLLY', 'DISAPPEARED', 'SHE', 'STILL', 'CLAIMS', 'CONSIDERATION', 'AND', 'ALTHEA', 'DID', 'NOT', 'WISH', 'TO', 'LOSE', "HARMONT'S", 'REGARD'] +5484-24317-0031-1763: ref=['HOW', 'INDIFFERENT', 'YOU', 'LOOK', 'BUT', 'I', 'TELL', 'YOU', 'HER', 'DEEP', 'BLUE', 'EYES', 'FLASHED', 'AS', 'SHE', 'SPOKE', 'THAT', 'SO', 'LONG', 'AS', 'YOU', 'WERE', 'STILL', 'A', 'GENUINE', 'CREATING', 'ARTIST', 'THE', 'CASE', 'WAS', 'DIFFERENT'] +5484-24317-0031-1763: hyp=['HOW', 'INDIFFERENT', 'YOU', 'LOOK', 'BUT', 'I', 'TELL', 'YOU', 'HER', 'DEEP', 'BLUE', 'EYES', 'FLASHED', 'AS', 'SHE', 'SPOKE', 'THAT', 'SO', 'LONG', 'AS', 'YOU', 'WERE', 'STILL', 'A', 'GENUINE', 'CREATING', 'ARTIST', 'THE', 'CASE', 'WAS', 'DIFFERENT'] +5484-24317-0032-1764: ref=['THOUGH', 'SO', 'LOUD', 'A', 'DENIAL', 'IS', 'WRITTEN', 'ON', 'YOUR', 'FACE', 'I', 'PERSIST', 'IN', 'MY', 'CONVICTION', 'AND', 'THAT', 'NO', 'IDLE', 'DELUSION', 'ENSNARES', 'ME', 'I', 'CAN', 'PROVE'] +5484-24317-0032-1764: hyp=['THOUGH', 'SO', 'LOUD', 'A', 'DENIAL', 'IS', 'WRITTEN', 'ON', 'YOUR', 'FACE', 'I', 'PERSIST', 'IN', 'MY', 'CONVICTION', 'AND', 'THAT', 'NO', 'IDLE', 'DELUSION', 'ENSNAS', 'ME', 'I', 'CAN', 'PROVE'] +5484-24317-0033-1765: ref=['IT', 'WAS', 'NAY', 'IT', 'COULD', 'HAVE', 'BEEN', 'NOTHING', 'ELSE', 'THAT', 'VERY', 'SPIDER'] +5484-24317-0033-1765: hyp=['IT', 'WAS', 'NAY', 'IT', 'COULD', 'HAVE', 'BEEN', 'NOTHING', 'ELSE', 'THAT', 'VERY', 'SPIDER'] +5484-24318-0000-1766: ref=['NOT', 'A', 'SOUND', 'IF', 'YOU', 'VALUE', 'YOUR', 'LIVES'] +5484-24318-0000-1766: hyp=['NOT', 'A', 'SOUND', 'IF', 'YOU', 'VALUE', 'YOUR', 'LIVES'] +5484-24318-0001-1767: ref=['TO', 'OFFER', 'RESISTANCE', 'WOULD', 'HAVE', 'BEEN', 'MADNESS', 'FOR', 'EVEN', 'HERMON', 'PERCEIVED', 'BY', 'THE', 'LOUD', 'CLANKING', 'OF', 'WEAPONS', 'AROUND', 'THEM', 'THE', 'GREATLY', 'SUPERIOR', 'POWER', 'OF', 'THE', 'ENEMY', 'AND', 'THEY', 'WERE', 'ACTING', 'BY', 'THE', 'ORDERS', 'OF', 'THE', 'KING', 'TO', 'THE', 'PRISON', 'NEAR', 'THE', 'PLACE', 'OF', 'EXECUTION'] +5484-24318-0001-1767: hyp=['TO', 'OFFER', 'RESISTANCE', 'WOULD', 'HAVE', 'BEEN', 'MADNESS', 'FOR', 'EVEN', 'HERMONN', 'PERCEIVED', 'BY', 'THE', 'LOUD', 'CLANKING', 'OF', 'WEAPONS', 'AROUND', 'THEM', 'THE', 'GREATLY', 'SUPERIOR', 'POWER', 'OF', 'THE', 'ENEMY', 'AND', 'THEY', 'WERE', 'ACTING', 'BY', 'THE', 'ORDERS', 'OF', 'THE', 'KING', 'TO', 'THE', 'PRISON', 'NEAR', 'THE', 'PLACE', 'OF', 'EXECUTION'] +5484-24318-0002-1768: ref=['WAS', 'HE', 'TO', 'BE', 'LED', 'TO', 'THE', "EXECUTIONER'S", 'BLOCK'] +5484-24318-0002-1768: hyp=['WAS', 'HE', 'TO', 'BE', 'LED', 'TO', 'THE', "EXECUTIONER'S", 'BLOCK'] +5484-24318-0003-1769: ref=['WHAT', 'PLEASURE', 'HAD', 'LIFE', 'TO', 'OFFER', 'HIM', 'THE', 'BLIND', 'MAN', 'WHO', 'WAS', 'ALREADY', 'DEAD', 'TO', 'HIS', 'ART'] +5484-24318-0003-1769: hyp=['WHAT', 'PLEASURE', 'HAD', 'LIFE', 'TO', 'OFFER', 'HIM', 'THE', 'BLIND', 'MAN', 'WHO', 'WAS', 'ALREADY', 'DEAD', 'TO', 'HIS', 'ART'] +5484-24318-0004-1770: ref=['OUGHT', 'HE', 'NOT', 'TO', 'GREET', 'THIS', 'SUDDEN', 'END', 'AS', 'A', 'BOON', 'FROM', 'THE', 'IMMORTALS'] +5484-24318-0004-1770: hyp=['OUGHT', 'HE', 'NOT', 'TO', 'GREET', 'HIS', 'SUDDEN', 'END', 'AS', 'A', 'BOON', 'FROM', 'THE', 'IMMORTALS'] +5484-24318-0005-1771: ref=['DID', 'IT', 'NOT', 'SPARE', 'HIM', 'A', 'HUMILIATION', 'AS', 'GREAT', 'AND', 'PAINFUL', 'AS', 'COULD', 'BE', 'IMAGINED'] +5484-24318-0005-1771: hyp=['DID', 'IT', 'NOT', 'SPARE', 'HIM', 'A', 'HUMILIATION', 'AS', 'GREAT', 'AND', 'PAINFUL', 'AS', 'COULD', 'BE', 'IMAGINED'] +5484-24318-0006-1772: ref=['WHATEVER', 'MIGHT', 'AWAIT', 'HIM', 'HE', 'DESIRED', 'NO', 'BETTER', 'FATE'] +5484-24318-0006-1772: hyp=['WHATEVER', 'MIGHT', 'AWAIT', 'HIM', 'HE', 'DESIRED', 'NO', 'BETTER', 'FATE'] +5484-24318-0007-1773: ref=['IF', 'HE', 'HAD', 'PASSED', 'INTO', 'ANNIHILATION', 'HE', 'HERMON', 'WISHED', 'TO', 'FOLLOW', 'HIM', 'THITHER', 'AND', 'ANNIHILATION', 'CERTAINLY', 'MEANT', 'REDEMPTION', 'FROM', 'PAIN', 'AND', 'MISERY'] +5484-24318-0007-1773: hyp=['IF', 'HE', 'HAD', 'PASSED', 'INTO', 'ANNIHILATION', 'HE', 'HERMOD', 'WISHED', 'TO', 'FOLLOW', 'HIM', 'THITHER', 'AND', 'ANNIHILATION', 'CERTAINLY', 'MEANT', 'REDEMPTION', 'FROM', 'PAIN', 'AND', 'MISERY'] +5484-24318-0008-1774: ref=['BUT', 'IF', 'HE', 'WERE', 'DESTINED', 'TO', 'MEET', 'HIS', 'MYRTILUS', 'AND', 'HIS', 'MOTHER', 'IN', 'THE', 'WORLD', 'BEYOND', 'THE', 'GRAVE', 'WHAT', 'HAD', 'HE', 'NOT', 'TO', 'TELL', 'THEM', 'HOW', 'SURE', 'HE', 'WAS', 'OF', 'FINDING', 'A', 'JOYFUL', 'RECEPTION', 'THERE', 'FROM', 'BOTH'] +5484-24318-0008-1774: hyp=['BUT', 'IF', 'HE', 'WERE', 'DESTINED', 'TO', 'MEET', 'HIS', 'MERTALUS', 'AND', 'HIS', 'MOTHER', 'IN', 'THE', 'WORLD', 'BEYOND', 'THE', 'GRAVE', 'WHAT', 'HAD', 'HE', 'NOT', 'TO', 'TELL', 'THEM', 'HOW', 'SURE', 'HE', 'WAS', 'OF', 'FINDING', 'A', 'JOYFUL', 'RECEPTION', 'THERE', 'FROM', 'BOTH'] +5484-24318-0009-1775: ref=['THE', 'POWER', 'WHICH', 'DELIVERED', 'HIM', 'OVER', 'TO', 'DEATH', 'JUST', 'AT', 'THAT', 'MOMENT', 'WAS', 'NOT', 'NEMESIS', 'NO', 'IT', 'WAS', 'A', 'KINDLY', 'DEITY'] +5484-24318-0009-1775: hyp=['THE', 'POWER', 'WHICH', 'DELIVERED', 'HIM', 'OVER', 'TO', 'DEATH', 'JUST', 'AT', 'THAT', 'MOMENT', 'WAS', 'NOT', 'NEMESIS', 'NO', 'IT', 'WAS', 'A', 'KINDLY', 'DEITY'] +5484-24318-0010-1776: ref=['YET', 'IT', 'WAS', 'NO', 'ILLUSION', 'THAT', 'DECEIVED', 'HIM'] +5484-24318-0010-1776: hyp=['YET', 'IT', 'WAS', 'NO', 'ILLUSION', 'THAT', 'DECEIVED', 'HIM'] +5484-24318-0011-1777: ref=['AGAIN', 'HE', 'HEARD', 'THE', 'BELOVED', 'VOICE', 'AND', 'THIS', 'TIME', 'IT', 'ADDRESSED', 'NOT', 'ONLY', 'HIM', 'BUT', 'WITH', 'THE', 'UTMOST', 'HASTE', 'THE', 'COMMANDER', 'OF', 'THE', 'SOLDIERS'] +5484-24318-0011-1777: hyp=['AGAIN', 'HE', 'HEARD', 'THE', 'BELOVED', 'VOICE', 'AND', 'THIS', 'TIME', 'IT', 'ADDRESSED', 'NOT', 'ONLY', 'HIM', 'BUT', 'WITH', 'THE', 'UTMOST', 'HASTE', 'THE', 'COMMANDER', 'OF', 'THE', 'SOLDIERS'] +5484-24318-0012-1778: ref=['SOMETIMES', 'WITH', 'TOUCHING', 'ENTREATY', 'SOMETIMES', 'WITH', 'IMPERIOUS', 'COMMAND', 'SHE', 'PROTESTED', 'AFTER', 'GIVING', 'HIM', 'HER', 'NAME', 'THAT', 'THIS', 'MATTER', 'COULD', 'BE', 'NOTHING', 'BUT', 'AN', 'UNFORTUNATE', 'MISTAKE'] +5484-24318-0012-1778: hyp=['SOMETIMES', 'WITH', 'TOUCHING', 'ENTREATY', 'SOMETIMES', 'WITH', 'IMPERIOUS', 'COMMAND', 'SHE', 'PROTESTED', 'AFTER', 'GIVING', 'HIM', 'HER', 'NAME', 'THAT', 'THIS', 'MATTER', 'COULD', 'BE', 'NOTHING', 'BUT', 'AN', 'UNFORTUNATE', 'MISTAKE'] +5484-24318-0013-1779: ref=['LASTLY', 'WITH', 'EARNEST', 'WARMTH', 'SHE', 'BESOUGHT', 'HIM', 'BEFORE', 'TAKING', 'THE', 'PRISONERS', 'AWAY', 'TO', 'PERMIT', 'HER', 'TO', 'SPEAK', 'TO', 'THE', 'COMMANDING', 'GENERAL', 'PHILIPPUS', 'HER', "FATHER'S", 'GUEST', 'WHO', 'SHE', 'WAS', 'CERTAIN', 'WAS', 'IN', 'THE', 'PALACE'] +5484-24318-0013-1779: hyp=['LASTLY', 'WITH', 'EARNEST', 'WARMTH', 'SHE', 'BESOUGHT', 'HIM', 'BEFORE', 'TAKING', 'THE', 'PRISONERS', 'AWAY', 'TO', 'PERMIT', 'HER', 'TO', 'SPEAK', 'TO', 'THE', 'COMMANDING', 'GENERAL', 'PHILIPPUS', 'HER', "FATHER'S", 'GUEST', 'WHO', 'SHE', 'WAS', 'CERTAIN', 'WAS', 'IN', 'THE', 'PALACE'] +5484-24318-0014-1780: ref=['CRIED', 'HERMON', 'IN', 'GRATEFUL', 'AGITATION', 'BUT', 'SHE', 'WOULD', 'NOT', 'LISTEN', 'TO', 'HIM', 'AND', 'FOLLOWED', 'THE', 'SOLDIER', 'WHOM', 'THE', 'CAPTAIN', 'DETAILED', 'TO', 'GUIDE', 'HER', 'INTO', 'THE', 'PALACE'] +5484-24318-0014-1780: hyp=['CRIED', 'HAREMAN', 'IN', 'GRATEFUL', 'AGITATION', 'BUT', 'SHE', 'WOULD', 'NOT', 'LISTEN', 'TO', 'HIM', 'AND', 'FOLLOWED', 'THE', 'SOLDIER', 'WHOM', 'THE', 'CAPTAIN', 'DETAILED', 'TO', 'GUIDE', 'HER', 'INTO', 'THE', 'PALACE'] +5484-24318-0015-1781: ref=['TO', 'MORROW', 'YOU', 'SHALL', 'CONFESS', 'TO', 'ME', 'WHO', 'TREACHEROUSLY', 'DIRECTED', 'YOU', 'TO', 'THIS', 'DANGEROUS', 'PATH'] +5484-24318-0015-1781: hyp=['TO', 'MORROW', 'YOU', 'SHALL', 'CONFESS', 'TO', 'ME', 'WHO', 'TREACHEROUSLY', 'DIRECTED', 'YOU', 'TO', 'THIS', 'DANGEROUS', 'PATH'] +5484-24318-0016-1782: ref=['DAPHNE', 'AGAIN', 'PLEADED', 'FOR', 'THE', 'LIBERATION', 'OF', 'THE', 'PRISONERS', 'BUT', 'PHILIPPUS', 'SILENCED', 'HER', 'WITH', 'THE', 'GRAVE', 'EXCLAMATION', 'THE', 'ORDER', 'OF', 'THE', 'KING'] +5484-24318-0016-1782: hyp=['DAPHNE', 'AGAIN', 'PLEADED', 'FOR', 'THE', 'LIBERATION', 'OF', 'THE', 'PRISONERS', 'BUT', 'PHILIPPA', 'SILENCED', 'HER', 'WITH', 'THE', 'GRAVE', 'EXCLAMATION', 'THE', 'ORDER', 'OF', 'THE', 'KING'] +5484-24318-0017-1783: ref=['AS', 'SOON', 'AS', 'THE', 'CAPTIVE', 'ARTIST', 'WAS', 'ALONE', 'WITH', 'THE', 'WOMAN', 'HE', 'LOVED', 'HE', 'CLASPED', 'HER', 'HAND', 'POURING', 'FORTH', 'INCOHERENT', 'WORDS', 'OF', 'THE', 'MOST', 'ARDENT', 'GRATITUDE', 'AND', 'WHEN', 'HE', 'FELT', 'HER', 'WARMLY', 'RETURN', 'THE', 'PRESSURE', 'HE', 'COULD', 'NOT', 'RESTRAIN', 'THE', 'DESIRE', 'TO', 'CLASP', 'HER', 'TO', 'HIS', 'HEART'] +5484-24318-0017-1783: hyp=['AS', 'SOON', 'AS', 'THE', 'CAPTIVE', 'ARTIST', 'WAS', 'ALONE', 'WITH', 'THE', 'WOMAN', 'HE', 'LOVED', 'HE', 'CLASPED', 'HER', 'HAND', 'POURING', 'FORTH', 'INCOHERENT', 'WORDS', 'OF', 'THE', 'MOST', 'ARDENT', 'GRATITUDE', 'AND', 'WHEN', 'HE', 'FELT', 'HER', 'WARMLY', 'RETURN', 'THE', 'PRESSURE', 'HE', 'COULD', 'NOT', 'RESTRAIN', 'THE', 'DESIRE', 'TO', 'CLASP', 'HER', 'TO', 'HIS', 'HEART'] +5484-24318-0018-1784: ref=['IN', 'SPITE', 'OF', 'HIS', 'DEEP', 'MENTAL', 'DISTRESS', 'HE', 'COULD', 'HAVE', 'SHOUTED', 'ALOUD', 'IN', 'HIS', 'DELIGHT', 'AND', 'GRATITUDE'] +5484-24318-0018-1784: hyp=['IN', 'SPITE', 'OF', 'HIS', 'DEEP', 'MENTAL', 'DISTRESS', 'HE', 'COULD', 'HAVE', 'SHOUTED', 'ALOUD', 'IN', 'HIS', 'DELIGHT', 'AND', 'GRATITUDE'] +5484-24318-0019-1785: ref=['HE', 'MIGHT', 'NOW', 'HAVE', 'BEEN', 'PERMITTED', 'TO', 'BIND', 'FOREVER', 'TO', 'HIS', 'LIFE', 'THE', 'WOMAN', 'WHO', 'HAD', 'JUST', 'RESCUED', 'HIM', 'FROM', 'THE', 'GREATEST', 'DANGER', 'BUT', 'THE', 'CONFESSION', 'HE', 'MUST', 'MAKE', 'TO', 'HIS', 'FELLOW', 'ARTISTS', 'IN', 'THE', 'PALAESTRA', 'THE', 'FOLLOWING', 'MORNING', 'STILL', 'SEALED', 'HIS', 'LIPS', 'YET', 'IN', 'THIS', 'HOUR', 'HE', 'FELT', 'THAT', 'HE', 'WAS', 'UNITED', 'TO', 'HER', 'AND', 'OUGHT', 'NOT', 'TO', 'CONCEAL', 'WHAT', 'AWAITED', 'HIM', 'SO', 'OBEYING', 'A', 'STRONG', 'IMPULSE', 'HE', 'EXCLAIMED', 'YOU', 'KNOW', 'THAT', 'I', 'LOVE', 'YOU'] +5484-24318-0019-1785: hyp=['HE', 'MIGHT', 'NOW', 'HAVE', 'BEEN', 'PERMITTED', 'TO', 'BIND', 'FOREVER', 'TO', 'HIS', 'LIFE', 'THE', 'WOMAN', 'WHO', 'HAD', 'JUST', 'RESCUED', 'HIM', 'FROM', 'THE', 'GREATEST', 'DANGER', 'BUT', 'THE', 'CONFESSION', 'HE', 'MUST', 'MAKE', 'TO', 'HIS', 'FELLOW', 'ARTISTS', 'IN', 'THE', 'PILASTER', 'OF', 'THE', 'FOLLOWING', 'MORNING', 'STILL', 'SEALED', 'HIS', 'LIPS', 'YET', 'IN', 'THIS', 'HOUR', 'HE', 'FELT', 'THAT', 'HE', 'WAS', 'UNITED', 'TO', 'HER', 'AND', 'OUGHT', 'NOT', 'TO', 'CONCEAL', 'WHAT', 'AWAITED', 'HIM', 'SO', 'OBEYING', 'A', 'STRONG', 'IMPULSE', 'HE', 'EXCLAIMED', 'YOU', 'KNOW', 'THAT', 'I', 'LOVE', 'YOU'] +5484-24318-0020-1786: ref=['I', 'LOVE', 'YOU', 'AND', 'HAVE', 'LOVED', 'YOU', 'ALWAYS'] +5484-24318-0020-1786: hyp=['I', 'LOVE', 'YOU', 'AND', 'HAVE', 'LOVED', 'YOU', 'ALWAYS'] +5484-24318-0021-1787: ref=['DAPHNE', 'EXCLAIMED', 'TENDERLY', 'WHAT', 'MORE', 'IS', 'NEEDED'] +5484-24318-0021-1787: hyp=['DAPHNE', 'EXCLAIMED', 'TENDERLY', 'WHAT', 'MORE', 'IS', 'NEEDED'] +5484-24318-0022-1788: ref=['BUT', 'HERMON', 'WITH', 'DROOPING', 'HEAD', 'MURMURED', 'TO', 'MORROW', 'I', 'SHALL', 'NO', 'LONGER', 'BE', 'WHAT', 'I', 'AM', 'NOW'] +5484-24318-0022-1788: hyp=['BUT', 'HAREMON', 'WITH', 'DROOPING', 'HEAD', 'MURMURED', 'TO', 'MORROW', 'I', 'SHALL', 'NO', 'LONGER', 'BE', 'WHAT', 'I', 'AM', 'NOW'] +5484-24318-0023-1789: ref=['THEN', 'DAPHNE', 'RAISED', 'HER', 'FACE', 'TO', 'HIS', 'ASKING', 'SO', 'THE', 'DEMETER', 'IS', 'THE', 'WORK', 'OF', 'MYRTILUS'] +5484-24318-0023-1789: hyp=['THEN', 'DAPHANE', 'RAISED', 'HER', 'FACE', 'TO', 'HIS', 'ASKING', 'SO', 'THAT', 'DEMETER', 'IS', 'THE', 'WORK', 'OF', 'MYRTLES'] +5484-24318-0024-1790: ref=['WHAT', 'A', 'TERRIBLE', 'ORDEAL', 'AGAIN', 'AWAITS', 'YOU'] +5484-24318-0024-1790: hyp=['WHAT', 'A', 'TERRIBLE', 'ORDEAL', 'AGAIN', 'AWAITS', 'YOU'] +5484-24318-0025-1791: ref=['AND', 'I', 'FOOL', 'BLINDED', 'ALSO', 'IN', 'MIND', 'COULD', 'BE', 'VEXED', 'WITH', 'YOU', 'FOR', 'IT'] +5484-24318-0025-1791: hyp=['AND', 'I', 'FOOL', 'BLINDED', 'ALSO', 'IN', 'MIND', 'COULD', 'BE', 'VEXED', 'WITH', 'YOU', 'FOR', 'IT'] +5484-24318-0026-1792: ref=['BRING', 'THIS', 'BEFORE', 'YOUR', 'MIND', 'AND', 'EVERYTHING', 'ELSE', 'THAT', 'YOU', 'MUST', 'ACCEPT', 'WITH', 'IT', 'IF', 'YOU', 'CONSENT', 'WHEN', 'THE', 'TIME', 'ARRIVES', 'TO', 'BECOME', 'MINE', 'CONCEAL', 'AND', 'PALLIATE', 'NOTHING'] +5484-24318-0026-1792: hyp=['BRING', 'THIS', 'BEFORE', 'YOUR', 'MIND', 'AND', 'EVERYTHING', 'ELSE', 'THAT', 'YOU', 'MUST', 'ACCEPT', 'WITH', 'IT', 'IF', 'YOU', 'CONSENT', 'WHEN', 'THE', 'TIME', 'ARRIVES', 'TO', 'BECOME', 'MINE', 'CONCEAL', 'AND', 'PALLIATE', 'NOTHING'] +5484-24318-0027-1793: ref=['SO', 'ARCHIAS', 'INTENDED', 'TO', 'LEAVE', 'THE', 'CITY', 'ON', 'ONE', 'OF', 'HIS', 'OWN', 'SHIPS', 'THAT', 'VERY', 'DAY'] +5484-24318-0027-1793: hyp=['SORCHAUS', 'INTENDED', 'TO', 'LEAVE', 'THE', 'CITY', 'ON', 'ONE', 'OF', 'HIS', 'OWN', 'SHIPS', 'THAT', 'VERY', 'DAY'] +5484-24318-0028-1794: ref=['HE', 'HIMSELF', 'ON', 'THE', 'WAY', 'TO', 'EXPOSE', 'HIMSELF', 'TO', 'THE', 'MALICE', 'AND', 'MOCKERY', 'OF', 'THE', 'WHOLE', 'CITY'] +5484-24318-0028-1794: hyp=['HE', 'HIMSELF', 'ON', 'THE', 'WAY', 'TO', 'EXPOSE', 'HIMSELF', 'TO', 'THE', 'MALICE', 'AND', 'MOCKERY', 'OF', 'THE', 'WHOLE', 'CITY'] +5484-24318-0029-1795: ref=['HIS', 'HEART', 'CONTRACTED', 'PAINFULLY', 'AND', 'HIS', 'SOLICITUDE', 'ABOUT', 'HIS', "UNCLE'S", 'FATE', 'INCREASED', 'WHEN', 'PHILIPPUS', 'INFORMED', 'HIM', 'THAT', 'THE', 'CONSPIRATORS', 'HAD', 'BEEN', 'ARRESTED', 'AT', 'THE', 'BANQUET', 'AND', 'HEADED', 'BY', 'AMYNTAS', 'THE', 'RHODIAN', 'CHRYSIPPUS', 'AND', 'PROCLUS', 'HAD', 'PERISHED', 'BY', 'THE', "EXECUTIONER'S", 'SWORD', 'AT', 'SUNRISE'] +5484-24318-0029-1795: hyp=['HIS', 'HEART', 'CONTRACTED', 'PAINFULLY', 'AND', 'HIS', 'SOLICITUDE', 'ABOUT', 'HIS', "UNCLE'S", 'FATE', 'INCREASED', 'WHEN', 'PHILIPPUS', 'INFORMED', 'HIM', 'THAT', 'THE', 'CONSPIRATORS', 'HAD', 'BEEN', 'ARRESTED', 'AT', 'THE', 'BANQUET', 'AND', 'HEADED', 'BY', 'AMENTUS', 'HERRODIAN', 'CHRYSIPPUS', 'AND', 'PROCLIS', 'HAD', 'PERISHED', 'BY', 'THE', "EXECUTIONER'S", 'SWORD', 'AT', 'SUNRISE'] +5484-24318-0030-1796: ref=['BESIDES', 'HE', 'KNEW', 'THAT', 'THE', 'OBJECT', 'OF', 'HIS', 'LOVE', 'WOULD', 'NOT', 'PART', 'FROM', 'HIM', 'WITHOUT', 'GRANTING', 'HIM', 'ONE', 'LAST', 'WORD'] +5484-24318-0030-1796: hyp=['BESIDES', 'HE', 'KNEW', 'THAT', 'THE', 'OBJECT', 'OF', 'HIS', 'LOVE', 'WOULD', 'NOT', 'PART', 'FROM', 'HIM', 'WITHOUT', 'GRANTING', 'HIM', 'ONE', 'LAST', 'WORD'] +5484-24318-0031-1797: ref=['ON', 'THE', 'WAY', 'HIS', 'HEART', 'THROBBED', 'ALMOST', 'TO', 'BURSTING'] +5484-24318-0031-1797: hyp=['ON', 'THE', 'WAY', 'HIS', 'HEART', 'THROBBED', 'ALMOST', 'TO', 'BURSTING'] +5484-24318-0032-1798: ref=['EVEN', "DAPHNE'S", 'IMAGE', 'AND', 'WHAT', 'THREATENED', 'HER', 'FATHER', 'AND', 'HER', 'WITH', 'HIM', 'RECEDED', 'FAR', 'INTO', 'THE', 'BACKGROUND'] +5484-24318-0032-1798: hyp=['EVEN', "DAPHNE'S", 'IMAGE', 'AND', 'WHAT', 'THREATENED', 'HER', 'FATHER', 'AND', 'HER', 'WITH', 'HIM', 'RECEDED', 'FAR', 'INTO', 'THE', 'BACKGROUND'] +5484-24318-0033-1799: ref=['HE', 'WAS', 'APPEARING', 'BEFORE', 'HIS', 'COMPANIONS', 'ONLY', 'TO', 'GIVE', 'TRUTH', 'ITS', 'JUST', 'DUE'] +5484-24318-0033-1799: hyp=['HE', 'WAS', 'APPEARING', 'BEFORE', 'HIS', 'COMPANIONS', 'ONLY', 'TO', 'GIVE', 'TRUTH', 'ITS', 'JUST', 'DUE'] +5484-24318-0034-1800: ref=['THE', 'EGYPTIAN', 'OBEYED', 'AND', 'HIS', 'MASTER', 'CROSSED', 'THE', 'WIDE', 'SPACE', 'STREWN', 'WITH', 'SAND', 'AND', 'APPROACHED', 'THE', 'STAGE', 'WHICH', 'HAD', 'BEEN', 'ERECTED', 'FOR', 'THE', 'FESTAL', 'PERFORMANCES', 'EVEN', 'HAD', 'HIS', 'EYES', 'RETAINED', 'THE', 'POWER', 'OF', 'SIGHT', 'HIS', 'BLOOD', 'WAS', 'COURSING', 'SO', 'WILDLY', 'THROUGH', 'HIS', 'VEINS', 'THAT', 'HE', 'MIGHT', 'PERHAPS', 'HAVE', 'BEEN', 'UNABLE', 'TO', 'DISTINGUISH', 'THE', 'STATUES', 'AROUND', 'HIM', 'AND', 'THE', 'THOUSANDS', 'OF', 'SPECTATORS', 'WHO', 'CROWDED', 'CLOSELY', 'TOGETHER', 'RICHLY', 'GARLANDED', 'THEIR', 'CHEEKS', 'GLOWING', 'WITH', 'ENTHUSIASM', 'SURROUNDED', 'THE', 'ARENA', 'HERMON'] +5484-24318-0034-1800: hyp=['THE', 'EGYPTIAN', 'OBEYED', 'AND', 'HIS', 'MASTER', 'CROSSED', 'THE', 'WIDE', 'SPACE', 'STREWN', 'WITH', 'SAND', 'AND', 'APPROACHED', 'THE', 'STAGE', 'WHICH', 'HAD', 'BEEN', 'ERECTED', 'FOR', 'THE', 'FESTAL', 'PERFORMANCES', 'EVEN', 'HAD', 'HIS', 'EYES', 'RETAINED', 'THE', 'POWER', 'OF', 'SIGHT', 'HIS', 'BLOOD', 'WAS', 'COURSING', 'SO', 'WILDLY', 'THROUGH', 'HIS', 'VEINS', 'THAT', 'HE', 'MIGHT', 'PERHAPS', 'HAVE', 'BEEN', 'UNABLE', 'TO', 'DISTINGUISH', 'THE', 'STATUES', 'AROUND', 'HIM', 'AND', 'THE', 'THOUSANDS', 'OF', 'SPECTATORS', 'WHO', 'CROWDED', 'CLOSELY', 'TOGETHER', 'RICHLY', 'GARLANDED', 'THEIR', 'CHEEKS', 'GLOWING', 'WITH', 'ENTHUSIASM', 'SURROUNDED', 'THE', 'ARENA', 'HERMON'] +5484-24318-0035-1801: ref=['SHOUTED', 'HIS', 'FRIEND', 'SOTELES', 'IN', 'JOYFUL', 'SURPRISE', 'IN', 'THE', 'MIDST', 'OF', 'THIS', 'PAINFUL', 'WALK', 'HERMON'] +5484-24318-0035-1801: hyp=['SHOUTED', 'HIS', 'FRIEND', 'SOCULUS', 'IN', 'JOYFUL', 'SURPRISE', 'IN', 'THE', 'MIDST', 'OF', 'HIS', 'PAINFUL', 'WALK', 'HAREMON'] +5484-24318-0036-1802: ref=['EVEN', 'WHILE', 'HE', 'BELIEVED', 'HIMSELF', 'TO', 'BE', 'THE', 'CREATOR', 'OF', 'THE', 'DEMETER', 'HE', 'HAD', 'BEEN', 'SERIOUSLY', 'TROUBLED', 'BY', 'THE', 'PRAISE', 'OF', 'SO', 'MANY', 'CRITICS', 'BECAUSE', 'IT', 'HAD', 'EXPOSED', 'HIM', 'TO', 'THE', 'SUSPICION', 'OF', 'HAVING', 'BECOME', 'FAITHLESS', 'TO', 'HIS', 'ART', 'AND', 'HIS', 'NATURE'] +5484-24318-0036-1802: hyp=['EVEN', 'WHILE', 'HE', 'BELIEVED', 'HIMSELF', 'TO', 'BE', 'THE', 'CREATOR', 'OF', 'THE', 'DEMEANOR', 'HE', 'HAD', 'BEEN', 'SERIOUSLY', 'TROUBLED', 'BY', 'THE', 'PRAISE', 'OF', 'SO', 'MANY', 'CRITICS', 'BECAUSE', 'IT', 'HAD', 'EXPOSED', 'HIM', 'TO', 'THE', 'SUSPICION', 'OF', 'HAVING', 'BECOME', 'FAITHLESS', 'TO', 'HIS', 'ART', 'AND', 'HIS', 'NATURE'] +5484-24318-0037-1803: ref=['HONOUR', 'TO', 'MYRTILUS', 'AND', 'HIS', 'ART', 'BUT', 'HE', 'TRUSTED', 'THIS', 'NOBLE', 'FESTAL', 'ASSEMBLAGE', 'WOULD', 'PARDON', 'THE', 'UNINTENTIONAL', 'DECEPTION', 'AND', 'AID', 'HIS', 'PRAYER', 'FOR', 'RECOVERY'] +5484-24318-0037-1803: hyp=['HONOR', 'TO', 'MERTILLUS', 'AND', 'HIS', 'ART', 'BUT', 'HE', 'TRUSTED', 'THIS', 'NOBLE', 'FESTALE', 'ASSEMBLAGE', 'WOULD', 'PARDON', 'THE', 'UNINTENTIONAL', 'DECEPTION', 'AND', 'AID', 'HIS', 'PRAYER', 'FOR', 'RECOVERY'] +5764-299665-0000-1804: ref=['AFTERWARD', 'IT', 'WAS', 'SUPPOSED', 'THAT', 'HE', 'WAS', 'SATISFIED', 'WITH', 'THE', 'BLOOD', 'OF', 'OXEN', 'LAMBS', 'AND', 'DOVES', 'AND', 'THAT', 'IN', 'EXCHANGE', 'FOR', 'OR', 'ON', 'ACCOUNT', 'OF', 'THESE', 'SACRIFICES', 'THIS', 'GOD', 'GAVE', 'RAIN', 'SUNSHINE', 'AND', 'HARVEST'] +5764-299665-0000-1804: hyp=['AFTERWARDS', 'IT', 'WAS', 'SUPPOSED', 'THAT', 'HE', 'WAS', 'SATISFIED', 'WITH', 'THE', 'BLOOD', 'OF', 'OXEN', 'LAMBS', 'AND', 'DOVES', 'AND', 'THAT', 'IN', 'EXCHANGE', 'FOR', 'OR', 'IN', 'ACCOUNT', 'OF', 'THESE', 'SACRIFICES', 'THIS', 'GOD', 'GAVE', 'RAIN', 'SUNSHINE', 'AND', 'HARVEST'] +5764-299665-0001-1805: ref=['WHETHER', 'HE', 'WAS', 'THE', 'CREATOR', 'OF', 'YOURSELF', 'AND', 'MYSELF'] +5764-299665-0001-1805: hyp=['WHETHER', 'HE', 'WAS', 'THE', 'CREATOR', 'OF', 'YOURSELF', 'AND', 'MYSELF'] +5764-299665-0002-1806: ref=['WHETHER', 'ANY', 'PRAYER', 'WAS', 'EVER', 'ANSWERED'] +5764-299665-0002-1806: hyp=['WHETHER', 'ANY', 'PRAYER', 'WAS', 'EVER', 'ANSWERED'] +5764-299665-0003-1807: ref=['WHY', 'DID', 'HE', 'CREATE', 'THE', 'INTELLECTUALLY', 'INFERIOR'] +5764-299665-0003-1807: hyp=['WHY', 'DID', 'HE', 'CREATE', 'THE', 'INTELLECTUAL', 'INFERIOR'] +5764-299665-0004-1808: ref=['WHY', 'DID', 'HE', 'CREATE', 'THE', 'DEFORMED', 'AND', 'HELPLESS', 'WHY', 'DID', 'HE', 'CREATE', 'THE', 'CRIMINAL', 'THE', 'IDIOTIC', 'THE', 'INSANE'] +5764-299665-0004-1808: hyp=['WHY', 'DID', 'HE', 'CREATE', 'THE', 'DEFORMED', 'AND', 'HELPLESS', 'WHY', 'DID', 'HE', 'CREATE', 'THE', 'CRIMINAL', 'THE', 'IDIOTIC', 'THE', 'INSANE'] +5764-299665-0005-1809: ref=['ARE', 'THE', 'FAILURES', 'UNDER', 'OBLIGATION', 'TO', 'THEIR', 'CREATOR'] +5764-299665-0005-1809: hyp=['ARE', 'THE', 'FAILURES', 'UNDER', 'OBLIGATION', 'TO', 'THEIR', 'CREATOR'] +5764-299665-0006-1810: ref=['IS', 'HE', 'RESPONSIBLE', 'FOR', 'ALL', 'THE', 'WARS', 'THAT', 'HAVE', 'BEEN', 'WAGED', 'FOR', 'ALL', 'THE', 'INNOCENT', 'BLOOD', 'THAT', 'HAS', 'BEEN', 'SHED'] +5764-299665-0006-1810: hyp=['IS', 'HE', 'RESPONSIBLE', 'FOR', 'ALL', 'THE', 'WARS', 'THAT', 'HAVE', 'BEEN', 'WAGED', 'FOR', 'ALL', 'THE', 'INNOCENT', 'BLOOD', 'THAT', 'HAS', 'BEEN', 'SHED'] +5764-299665-0007-1811: ref=['IS', 'HE', 'RESPONSIBLE', 'FOR', 'THE', 'CENTURIES', 'OF', 'SLAVERY', 'FOR', 'THE', 'BACKS', 'THAT', 'HAVE', 'BEEN', 'SCARRED', 'WITH', 'THE', 'LASH', 'FOR', 'THE', 'BABES', 'THAT', 'HAVE', 'BEEN', 'SOLD', 'FROM', 'THE', 'BREASTS', 'OF', 'MOTHERS', 'FOR', 'THE', 'FAMILIES', 'THAT', 'HAVE', 'BEEN', 'SEPARATED', 'AND', 'DESTROYED'] +5764-299665-0007-1811: hyp=['IS', 'HE', 'RESPONSIBLE', 'FOR', 'THE', 'CENTURIES', 'OF', 'SLAVERY', 'FOR', 'THE', 'BACKS', 'THAT', 'HAVE', 'BEEN', 'SCARRED', 'WITH', 'A', 'LASH', 'FOR', 'THE', 'BABES', 'THAT', 'HAVE', 'BEEN', 'SOLD', 'FROM', 'THE', 'BREASTS', 'OF', 'MOTHERS', 'FOR', 'THE', 'FAMILIES', 'THAT', 'HAVE', 'BEEN', 'SEPARATED', 'AND', 'DESTROYED'] +5764-299665-0008-1812: ref=['IS', 'THIS', 'GOD', 'RESPONSIBLE', 'FOR', 'RELIGIOUS', 'PERSECUTION', 'FOR', 'THE', 'INQUISITION', 'FOR', 'THE', 'THUMB', 'SCREW', 'AND', 'RACK', 'AND', 'FOR', 'ALL', 'THE', 'INSTRUMENTS', 'OF', 'TORTURE'] +5764-299665-0008-1812: hyp=['IS', 'THIS', 'GOD', 'RESPONSIBLE', 'FOR', 'RELIGIOUS', 'PERSECUTION', 'FOR', 'THE', 'INQUISITION', 'FOR', 'THE', 'THUMB', 'SCREW', 'AND', 'RAG', 'AND', 'FOR', 'ALL', 'THE', 'INSTRUMENTS', 'OF', 'TORTURE'] +5764-299665-0009-1813: ref=['DID', 'THIS', 'GOD', 'ALLOW', 'THE', 'CRUEL', 'AND', 'VILE', 'TO', 'DESTROY', 'THE', 'BRAVE', 'AND', 'VIRTUOUS'] +5764-299665-0009-1813: hyp=['DID', 'THIS', 'GOD', 'ALOAD', 'THE', 'CRUEL', 'AND', 'VILE', 'TO', 'DESTROY', 'THE', 'BRAVE', 'AND', 'VIRTUOUS'] +5764-299665-0010-1814: ref=['DID', 'HE', 'ALLOW', 'TYRANTS', 'TO', 'SHED', 'THE', 'BLOOD', 'OF', 'PATRIOTS'] +5764-299665-0010-1814: hyp=['DID', 'HE', 'ALLOW', 'TYRANTS', 'TO', 'SHED', 'THE', 'BLOOD', 'OF', 'PATRIOTS'] +5764-299665-0011-1815: ref=['CAN', 'WE', 'CONCEIVE', 'OF', 'A', 'DEVIL', 'BASE', 'ENOUGH', 'TO', 'PREFER', 'HIS', 'ENEMIES', 'TO', 'HIS', 'FRIENDS'] +5764-299665-0011-1815: hyp=['CAN', 'WE', 'CONCEIVE', 'OF', 'A', 'DEVIL', 'BASE', 'ENOUGH', 'TO', 'PREFER', 'HIS', 'ENEMIES', 'TO', 'HIS', 'FRIENDS'] +5764-299665-0012-1816: ref=['HOW', 'CAN', 'WE', 'ACCOUNT', 'FOR', 'THE', 'WILD', 'BEASTS', 'THAT', 'DEVOUR', 'HUMAN', 'BEINGS', 'FOR', 'THE', 'FANGED', 'SERPENTS', 'WHOSE', 'BITE', 'IS', 'DEATH'] +5764-299665-0012-1816: hyp=['HOW', 'CAN', 'WE', 'ACCOUNT', 'FOR', 'THE', 'WILD', 'BEASTS', 'THAT', 'DEVOUR', 'HUMAN', 'BEINGS', 'FOR', 'THE', 'FANGED', 'SERPENTS', 'WHOSE', 'BITE', 'IS', 'DEATH'] +5764-299665-0013-1817: ref=['HOW', 'CAN', 'WE', 'ACCOUNT', 'FOR', 'A', 'WORLD', 'WHERE', 'LIFE', 'FEEDS', 'ON', 'LIFE'] +5764-299665-0013-1817: hyp=['HOW', 'CAN', 'WE', 'ACCOUNT', 'FOR', 'A', 'WORLD', 'WHERE', 'LIFE', 'FEEDS', 'ON', 'LIFE'] +5764-299665-0014-1818: ref=['DID', 'INFINITE', 'WISDOM', 'INTENTIONALLY', 'PRODUCE', 'THE', 'MICROSCOPIC', 'BEASTS', 'THAT', 'FEED', 'UPON', 'THE', 'OPTIC', 'NERVE', 'THINK', 'OF', 'BLINDING', 'A', 'MAN', 'TO', 'SATISFY', 'THE', 'APPETITE', 'OF', 'A', 'MICROBE'] +5764-299665-0014-1818: hyp=['DID', 'INFINITE', 'WISDOM', 'INTENTIALLY', 'PRODUCE', 'THE', 'MICROSCOPIC', 'BEASTS', 'THAT', 'FEED', 'UPON', 'THE', 'OPTIC', 'NURSE', 'THINK', 'OF', 'BLINDING', 'A', 'MAN', 'TO', 'SATISFY', 'THE', 'APPETITE', 'OF', 'A', 'MICROBE'] +5764-299665-0015-1819: ref=['FEAR', 'BUILDS', 'THE', 'ALTAR', 'AND', 'OFFERS', 'THE', 'SACRIFICE'] +5764-299665-0015-1819: hyp=['FEAR', 'BUILDS', 'THE', 'ALTAR', 'AND', 'OFFERS', 'THE', 'SACRIFICE'] +5764-299665-0016-1820: ref=['FEAR', 'ERECTS', 'THE', 'CATHEDRAL', 'AND', 'BOWS', 'THE', 'HEAD', 'OF', 'MAN', 'IN', 'WORSHIP'] +5764-299665-0016-1820: hyp=['FEAR', 'ERECTS', 'THE', 'CATHEDRAL', 'AND', 'BOWS', 'THE', 'HEAD', 'OF', 'MAN', 'IN', 'WORSHIP'] +5764-299665-0017-1821: ref=['LIPS', 'RELIGIOUS', 'AND', 'FEARFUL', 'TREMBLINGLY', 'REPEAT', 'THIS', 'PASSAGE', 'THOUGH', 'HE', 'SLAY', 'ME', 'YET', 'WILL', 'I', 'TRUST', 'HIM'] +5764-299665-0017-1821: hyp=['LIPS', 'RELIGIOUS', 'AND', 'FEARFUL', 'TREMBLINGLY', 'REPEAT', 'THIS', 'PASSAGE', 'THOUGH', 'HE', 'SLAY', 'ME', 'YET', 'WILL', 'I', 'TRUST', 'HIM'] +5764-299665-0018-1822: ref=['CAN', 'WE', 'SAY', 'THAT', 'HE', 'CARED', 'FOR', 'THE', 'CHILDREN', 'OF', 'MEN'] +5764-299665-0018-1822: hyp=['CAN', 'WE', 'SAY', 'THAT', 'HE', 'CARED', 'FOR', 'THE', 'CHILDREN', 'OF', 'MEN'] +5764-299665-0019-1823: ref=['CAN', 'WE', 'SAY', 'THAT', 'HIS', 'MERCY', 'ENDURETH', 'FOREVER'] +5764-299665-0019-1823: hyp=['CAN', 'WE', 'SAY', 'THAT', 'HIS', 'MERCY', 'ENDURED', 'FOREVER'] +5764-299665-0020-1824: ref=['DO', 'WE', 'PROVE', 'HIS', 'GOODNESS', 'BY', 'SHOWING', 'THAT', 'HE', 'HAS', 'OPENED', 'THE', 'EARTH', 'AND', 'SWALLOWED', 'THOUSANDS', 'OF', 'HIS', 'HELPLESS', 'CHILDREN', 'OR', 'THAT', 'WITH', 'THE', 'VOLCANOES', 'HE', 'HAS', 'OVERWHELMED', 'THEM', 'WITH', 'RIVERS', 'OF', 'FIRE'] +5764-299665-0020-1824: hyp=['DO', 'WE', 'PROVE', 'HIS', 'GOODNESS', 'BY', 'SHOWING', 'THAT', 'HE', 'HAS', 'OPENED', 'THE', 'EARTH', 'AND', 'SWALLOWED', 'THOUSANDS', 'OF', 'HIS', 'HELPLESS', 'CHILDREN', 'OR', 'THAT', 'WITH', 'THE', 'VOLCANOES', 'HE', 'HAS', 'OVERWHELMED', 'THEM', 'WITH', 'RIVERS', 'OF', 'FIRE'] +5764-299665-0021-1825: ref=['WAS', 'THERE', 'GOODNESS', 'WAS', 'THERE', 'WISDOM', 'IN', 'THIS'] +5764-299665-0021-1825: hyp=['WAS', 'THERE', 'GOODNESS', 'WAS', 'THERE', 'WISDOM', 'IN', 'THIS'] +5764-299665-0022-1826: ref=['OUGHT', 'THE', 'SUPERIOR', 'RACES', 'TO', 'THANK', 'GOD', 'THAT', 'THEY', 'ARE', 'NOT', 'THE', 'INFERIOR'] +5764-299665-0022-1826: hyp=['OUGHT', 'THE', 'SUPERIOR', 'RACE', 'TO', 'THANK', 'GOD', 'THAT', 'THEY', 'ARE', 'NOT', 'THE', 'INFERIOR'] +5764-299665-0023-1827: ref=['MOST', 'PEOPLE', 'CLING', 'TO', 'THE', 'SUPERNATURAL'] +5764-299665-0023-1827: hyp=['MOST', 'PEOPLE', 'CLING', 'TO', 'THE', 'SUPERNATURAL'] +5764-299665-0024-1828: ref=['IF', 'THEY', 'GIVE', 'UP', 'ONE', 'GOD', 'THEY', 'IMAGINE', 'ANOTHER'] +5764-299665-0024-1828: hyp=['IF', 'THEY', 'GIVE', 'UP', 'ONE', 'GOD', 'THEY', 'IMAGINE', 'ANOTHER'] +5764-299665-0025-1829: ref=['WHAT', 'IS', 'THIS', 'POWER'] +5764-299665-0025-1829: hyp=['WHAT', 'IS', 'THIS', 'POWER'] +5764-299665-0026-1830: ref=['MAN', 'ADVANCES', 'AND', 'NECESSARILY', 'ADVANCES', 'THROUGH', 'EXPERIENCE'] +5764-299665-0026-1830: hyp=['MAN', 'ADVANCES', 'AND', 'NECESSARILY', 'ADVANCES', 'THROUGH', 'EXPERIENCE'] +5764-299665-0027-1831: ref=['A', 'MAN', 'WISHING', 'TO', 'GO', 'TO', 'A', 'CERTAIN', 'PLACE', 'COMES', 'TO', 'WHERE', 'THE', 'ROAD', 'DIVIDES'] +5764-299665-0027-1831: hyp=['A', 'MAN', 'WISHING', 'TO', 'GO', 'TO', 'A', 'CERTAIN', 'PLACE', 'COME', 'TO', 'WHERE', 'THE', 'ROAD', 'DIVIDES'] +5764-299665-0028-1832: ref=['HE', 'HAS', 'TRIED', 'THAT', 'ROAD', 'AND', 'KNOWS', 'THAT', 'IT', 'IS', 'THE', 'WRONG', 'ROAD'] +5764-299665-0028-1832: hyp=['HE', 'HAS', 'TRIED', 'THAT', 'ROAD', 'AND', 'KNOWS', 'THAT', 'IT', 'IS', 'THE', 'WRONG', 'ROAD'] +5764-299665-0029-1833: ref=['A', 'CHILD', 'CHARMED', 'BY', 'THE', 'BEAUTY', 'OF', 'THE', 'FLAME', 'GRASPS', 'IT', 'WITH', 'ITS', 'DIMPLED', 'HAND'] +5764-299665-0029-1833: hyp=['A', 'CHILD', 'CHARMED', 'BY', 'THE', 'BEAUTY', 'OF', 'THE', 'FLAME', 'GRASPED', 'WITH', 'HIS', 'DIMPLED', 'HAND'] +5764-299665-0030-1834: ref=['THE', 'POWER', 'THAT', 'WORKS', 'FOR', 'RIGHTEOUSNESS', 'HAS', 'TAUGHT', 'THE', 'CHILD', 'A', 'LESSON'] +5764-299665-0030-1834: hyp=['THE', 'POWER', 'THAT', 'WORKS', 'FOR', 'RIGHTEOUSNESS', 'HAD', 'TAUGHT', 'THE', 'CHILD', 'A', 'LESSON'] +5764-299665-0031-1835: ref=['IT', 'IS', 'A', 'RESULT'] +5764-299665-0031-1835: hyp=['IT', 'IS', 'A', 'RESULT'] +5764-299665-0032-1836: ref=['IT', 'IS', 'INSISTED', 'BY', 'THESE', 'THEOLOGIANS', 'AND', 'BY', 'MANY', 'OF', 'THE', 'SO', 'CALLED', 'PHILOSOPHERS', 'THAT', 'THIS', 'MORAL', 'SENSE', 'THIS', 'SENSE', 'OF', 'DUTY', 'OF', 'OBLIGATION', 'WAS', 'IMPORTED', 'AND', 'THAT', 'CONSCIENCE', 'IS', 'AN', 'EXOTIC'] +5764-299665-0032-1836: hyp=['IT', 'IS', 'INSISTED', 'BY', 'THESE', 'THEOLOGIANS', 'AND', 'BY', 'MANY', 'OF', 'THE', 'SO', 'CALLED', 'PHILOSOPHERS', 'THAT', 'THIS', 'MORAL', 'SENSE', 'THIS', 'SENSE', 'OF', 'DUTY', 'OF', 'OBLIGATION', 'WAS', 'IMPORTED', 'AND', 'THAT', 'CONSCIENCE', 'IS', 'AN', 'EXOTIC'] +5764-299665-0033-1837: ref=['WE', 'LIVE', 'TOGETHER', 'IN', 'FAMILIES', 'TRIBES', 'AND', 'NATIONS'] +5764-299665-0033-1837: hyp=['WE', 'LIVE', 'TOGETHER', 'IN', 'FAMILIES', 'TRIBES', 'AND', 'NATIONS'] +5764-299665-0034-1838: ref=['THEY', 'ARE', 'PRAISED', 'ADMIRED', 'AND', 'RESPECTED'] +5764-299665-0034-1838: hyp=['THEY', 'ARE', 'PRAISED', 'ADMIRED', 'AND', 'RESPECTED'] +5764-299665-0035-1839: ref=['THEY', 'ARE', 'REGARDED', 'AS', 'GOOD', 'THAT', 'IS', 'TO', 'SAY', 'AS', 'MORAL'] +5764-299665-0035-1839: hyp=['THEY', 'ARE', 'REGARDED', 'AS', 'GOOD', 'THAT', 'IS', 'TO', 'SAY', 'AS', 'MORAL'] +5764-299665-0036-1840: ref=['THE', 'MEMBERS', 'WHO', 'ADD', 'TO', 'THE', 'MISERY', 'OF', 'THE', 'FAMILY', 'THE', 'TRIBE', 'OR', 'THE', 'NATION', 'ARE', 'CONSIDERED', 'BAD', 'MEMBERS'] +5764-299665-0036-1840: hyp=['THE', 'MEMBERS', 'WHO', 'ADD', 'TO', 'THE', 'MISERY', 'OF', 'THE', 'FAMILY', 'THE', 'TRIBE', 'OR', 'THE', 'NATION', 'ARE', 'CONSIDERED', 'BAD', 'MEMBERS'] +5764-299665-0037-1841: ref=['THE', 'GREATEST', 'OF', 'HUMAN', 'BEINGS', 'HAS', 'SAID', 'CONSCIENCE', 'IS', 'BORN', 'OF', 'LOVE'] +5764-299665-0037-1841: hyp=['THE', 'GREATEST', 'OF', 'HUMAN', 'BEINGS', 'HAD', 'SAID', 'CONSCIENCE', 'IS', 'BORN', 'OF', 'LOVE'] +5764-299665-0038-1842: ref=['AS', 'PEOPLE', 'ADVANCE', 'THE', 'REMOTE', 'CONSEQUENCES', 'ARE', 'PERCEIVED'] +5764-299665-0038-1842: hyp=['AS', 'PEOPLE', 'ADVANCE', 'THE', 'REMOTE', 'CONSEQUENCES', 'ARE', 'PERCEIVED'] +5764-299665-0039-1843: ref=['THE', 'IMAGINATION', 'IS', 'CULTIVATED'] +5764-299665-0039-1843: hyp=['THE', 'IMAGINATION', 'IS', 'CULTIVATED'] +5764-299665-0040-1844: ref=['A', 'MAN', 'PUTS', 'HIMSELF', 'IN', 'THE', 'PLACE', 'OF', 'ANOTHER'] +5764-299665-0040-1844: hyp=['A', 'MAN', 'PUTS', 'HIMSELF', 'IN', 'THE', 'PLACE', 'OF', 'ANOTHER'] +5764-299665-0041-1845: ref=['THE', 'SENSE', 'OF', 'DUTY', 'BECOMES', 'STRONGER', 'MORE', 'IMPERATIVE'] +5764-299665-0041-1845: hyp=['THE', 'SENSE', 'OF', 'DUTY', 'BECAME', 'STRONGER', 'MORE', 'IMPERATIVE'] +5764-299665-0042-1846: ref=['MAN', 'JUDGES', 'HIMSELF'] +5764-299665-0042-1846: hyp=['MAN', 'JUDGETH', 'HIMSELF'] +5764-299665-0043-1847: ref=['IN', 'ALL', 'THIS', 'THERE', 'IS', 'NOTHING', 'SUPERNATURAL'] +5764-299665-0043-1847: hyp=['IN', 'ALL', 'THIS', 'THERE', 'IS', 'NOTHING', 'SUPERNATURAL'] +5764-299665-0044-1848: ref=['MAN', 'HAS', 'DECEIVED', 'HIMSELF'] +5764-299665-0044-1848: hyp=['MAN', 'HAS', 'DECEIVED', 'HIMSELF'] +5764-299665-0045-1849: ref=['HAS', 'CHRISTIANITY', 'DONE', 'GOOD'] +5764-299665-0045-1849: hyp=['HAS', 'CHRISTIANITY', 'DONE', 'GOOD'] +5764-299665-0046-1850: ref=['WHEN', 'THE', 'CHURCH', 'HAD', 'CONTROL', 'WERE', 'MEN', 'MADE', 'BETTER', 'AND', 'HAPPIER'] +5764-299665-0046-1850: hyp=['WHEN', 'THE', 'CHURCH', 'HAD', 'CONTROL', 'WERE', 'MEN', 'MADE', 'BETTER', 'AND', 'HAPPIER'] +5764-299665-0047-1851: ref=['WHAT', 'HAS', 'RELIGION', 'DONE', 'FOR', 'HUNGARY', 'OR', 'AUSTRIA'] +5764-299665-0047-1851: hyp=['WHAT', 'HAS', 'RELIGION', 'DONE', 'FOR', 'HUNGARY', 'OR', 'AUSTRIA'] +5764-299665-0048-1852: ref=['COULD', 'THESE', 'COUNTRIES', 'HAVE', 'BEEN', 'WORSE', 'WITHOUT', 'RELIGION'] +5764-299665-0048-1852: hyp=['COULD', 'THESE', 'COUNTRIES', 'HAVE', 'BEEN', 'WORSE', 'WITHOUT', 'RELIGION'] +5764-299665-0049-1853: ref=['COULD', 'THEY', 'HAVE', 'BEEN', 'WORSE', 'HAD', 'THEY', 'HAD', 'ANY', 'OTHER', 'RELIGION', 'THAN', 'CHRISTIANITY'] +5764-299665-0049-1853: hyp=['COULD', 'THEY', 'HAVE', 'BEEN', 'WORSE', 'HAD', 'THEY', 'HAD', 'ANY', 'OTHER', 'RELIGION', 'THAN', 'CHRISTIANITY'] +5764-299665-0050-1854: ref=['WHAT', 'DID', 'CHRISTIANITY', 'DO', 'FOR', 'THEM'] +5764-299665-0050-1854: hyp=['WHAT', 'DID', 'CHRISTIANITY', 'DO', 'FOR', 'THEM'] +5764-299665-0051-1855: ref=['THEY', 'HATED', 'PLEASURE'] +5764-299665-0051-1855: hyp=['THEY', 'HATED', 'PLEASURE'] +5764-299665-0052-1856: ref=['THEY', 'MUFFLED', 'ALL', 'THE', 'BELLS', 'OF', 'GLADNESS'] +5764-299665-0052-1856: hyp=['THEY', 'MUFFLED', 'ALL', 'THE', 'BELLS', 'OF', 'GLADNESS'] +5764-299665-0053-1857: ref=['THE', 'RELIGION', 'OF', 'THE', 'PURITAN', 'WAS', 'AN', 'UNADULTERATED', 'CURSE'] +5764-299665-0053-1857: hyp=['THE', 'RELIGION', 'OF', 'THE', 'PURITAN', 'WAS', 'AN', 'UNADULTERATED', 'CURSE'] +5764-299665-0054-1858: ref=['THE', 'PURITAN', 'BELIEVED', 'THE', 'BIBLE', 'TO', 'BE', 'THE', 'WORD', 'OF', 'GOD', 'AND', 'THIS', 'BELIEF', 'HAS', 'ALWAYS', 'MADE', 'THOSE', 'WHO', 'HELD', 'IT', 'CRUEL', 'AND', 'WRETCHED'] +5764-299665-0054-1858: hyp=['THE', 'PURITAN', 'BELIEVED', 'THE', 'BIBLE', 'TO', 'BE', 'THE', 'WORD', 'OF', 'GOD', 'AND', 'THIS', 'BELIEF', 'HAS', 'ALWAYS', 'MADE', 'THOSE', 'WHO', 'HELD', 'IT', 'CRUEL', 'AND', 'WRETCHED'] +5764-299665-0055-1859: ref=['LET', 'ME', 'REFER', 'TO', 'JUST', 'ONE', 'FACT', 'SHOWING', 'THE', 'INFLUENCE', 'OF', 'A', 'BELIEF', 'IN', 'THE', 'BIBLE', 'ON', 'HUMAN', 'BEINGS'] +5764-299665-0055-1859: hyp=['LET', 'ME', 'REFER', 'TO', 'JUST', 'ONE', 'FACT', 'SHOWING', 'THE', 'INFLUENCE', 'OF', 'A', 'BELIEF', 'IN', 'THE', 'BIBLE', 'ON', 'HUMAN', 'BEINGS'] +5764-299665-0056-1860: ref=['THE', 'QUEEN', 'RECEIVED', 'THE', 'BIBLE', 'KISSED', 'IT', 'AND', 'PLEDGED', 'HERSELF', 'TO', 'DILIGENTLY', 'READ', 'THEREIN'] +5764-299665-0056-1860: hyp=['THE', 'QUEEN', 'RECEIVED', 'THE', 'BIBLE', 'KISSED', 'IT', 'AND', 'PLEDGED', 'HERSELF', 'TO', 'DILIGENTLY', 'READ', 'THEREIN'] +5764-299665-0057-1861: ref=['IN', 'OTHER', 'WORDS', 'IT', 'WAS', 'JUST', 'AS', 'FIENDISH', 'JUST', 'AS', 'INFAMOUS', 'AS', 'THE', 'CATHOLIC', 'SPIRIT'] +5764-299665-0057-1861: hyp=['IN', 'OTHER', 'WORDS', 'IT', 'WAS', 'JUST', 'AS', 'FIENDISH', 'JUST', 'AS', 'INFAMOUS', 'AS', 'THE', 'CATHOLIC', 'SPIRIT'] +5764-299665-0058-1862: ref=['HAS', 'THE', 'BIBLE', 'MADE', 'THE', 'PEOPLE', 'OF', 'GEORGIA', 'KIND', 'AND', 'MERCIFUL'] +5764-299665-0058-1862: hyp=['HAS', 'THE', 'BIBLE', 'MADE', 'THE', 'PEOPLE', 'OF', 'GEORGIA', 'KIND', 'AND', 'MERCIFUL'] +5764-299665-0059-1863: ref=['RELIGION', 'HAS', 'BEEN', 'TRIED', 'AND', 'IN', 'ALL', 'COUNTRIES', 'IN', 'ALL', 'TIMES', 'HAS', 'FAILED'] +5764-299665-0059-1863: hyp=['RELIGION', 'HAS', 'BEEN', 'TRIED', 'AND', 'IN', 'ALL', 'COUNTRIES', 'IN', 'ALL', 'TIMES', 'HAS', 'FAILED'] +5764-299665-0060-1864: ref=['RELIGION', 'HAS', 'ALWAYS', 'BEEN', 'THE', 'ENEMY', 'OF', 'SCIENCE', 'OF', 'INVESTIGATION', 'AND', 'THOUGHT'] +5764-299665-0060-1864: hyp=['RELIGION', 'HAS', 'ALWAYS', 'BEEN', 'THE', 'ENEMY', 'OF', 'SCIENCE', 'OF', 'INVESTIGATION', 'AND', 'THOUGHT'] +5764-299665-0061-1865: ref=['RELIGION', 'HAS', 'NEVER', 'MADE', 'MAN', 'FREE'] +5764-299665-0061-1865: hyp=['RELIGION', 'HAS', 'NEVER', 'MADE', 'MEN', 'FREE'] +5764-299665-0062-1866: ref=['IT', 'HAS', 'NEVER', 'MADE', 'MAN', 'MORAL', 'TEMPERATE', 'INDUSTRIOUS', 'AND', 'HONEST'] +5764-299665-0062-1866: hyp=['IT', 'HAS', 'NEVER', 'MADE', 'MAN', 'MORAL', 'TEMPERATE', 'INDUSTRIOUS', 'AND', 'HONEST'] +5764-299665-0063-1867: ref=['ARE', 'CHRISTIANS', 'MORE', 'TEMPERATE', 'NEARER', 'VIRTUOUS', 'NEARER', 'HONEST', 'THAN', 'SAVAGES'] +5764-299665-0063-1867: hyp=['ARE', 'CHRISTIANS', 'MORE', 'TEMPERATE', 'NEARER', 'VIRTUOUS', 'NEARER', 'HONEST', 'THAN', 'SAVAGES'] +5764-299665-0064-1868: ref=['CAN', 'WE', 'CURE', 'DISEASE', 'BY', 'SUPPLICATION'] +5764-299665-0064-1868: hyp=['CAN', 'WE', 'CURE', 'DISEASE', 'BY', 'SUPPLICATION'] +5764-299665-0065-1869: ref=['CAN', 'WE', 'RECEIVE', 'VIRTUE', 'OR', 'HONOR', 'AS', 'ALMS'] +5764-299665-0065-1869: hyp=['CAN', 'WE', 'RECEIVE', 'VIRTUE', 'OR', 'HONOR', 'AS', 'ALMS'] +5764-299665-0066-1870: ref=['RELIGION', 'RESTS', 'ON', 'THE', 'IDEA', 'THAT', 'NATURE', 'HAS', 'A', 'MASTER', 'AND', 'THAT', 'THIS', 'MASTER', 'WILL', 'LISTEN', 'TO', 'PRAYER', 'THAT', 'THIS', 'MASTER', 'PUNISHES', 'AND', 'REWARDS', 'THAT', 'HE', 'LOVES', 'PRAISE', 'AND', 'FLATTERY', 'AND', 'HATES', 'THE', 'BRAVE', 'AND', 'FREE'] +5764-299665-0066-1870: hyp=['RELIGION', 'RESTS', 'ON', 'THE', 'IDEA', 'THAT', 'NATURE', 'HAS', 'A', 'MASTER', 'AND', 'THAT', 'THIS', 'MASTER', 'WILL', 'LISTEN', 'TO', 'PRAYER', 'THAT', 'HIS', 'MASTER', 'PUNISHES', 'AND', 'REWARDS', 'THAT', 'HE', 'LOVES', 'PRAISE', 'AND', 'FLATTERY', 'AND', 'HATES', 'THE', 'BRAVE', 'AND', 'FREE'] +5764-299665-0067-1871: ref=['WE', 'MUST', 'HAVE', 'CORNER', 'STONES'] +5764-299665-0067-1871: hyp=['WE', 'MUST', 'HAVE', 'CORN', 'STONES'] +5764-299665-0068-1872: ref=['THE', 'STRUCTURE', 'MUST', 'HAVE', 'A', 'BASEMENT'] +5764-299665-0068-1872: hyp=['THE', 'STRUCTURE', 'MUST', 'HAVE', 'A', 'BASEMENT'] +5764-299665-0069-1873: ref=['IF', 'WE', 'BUILD', 'WE', 'MUST', 'BEGIN', 'AT', 'THE', 'BOTTOM'] +5764-299665-0069-1873: hyp=['IF', 'WE', 'BUILD', 'WE', 'MUST', 'BEGIN', 'AT', 'THE', 'BOTTOM'] +5764-299665-0070-1874: ref=['I', 'HAVE', 'A', 'THEORY', 'AND', 'I', 'HAVE', 'FOUR', 'CORNER', 'STONES'] +5764-299665-0070-1874: hyp=['I', 'HAVE', 'A', 'THEORY', 'AND', 'I', 'HAVE', 'FOUR', 'CORNER', 'STONES'] +5764-299665-0071-1875: ref=['THE', 'FIRST', 'STONE', 'IS', 'THAT', 'MATTER', 'SUBSTANCE', 'CANNOT', 'BE', 'DESTROYED', 'CANNOT', 'BE', 'ANNIHILATED'] +5764-299665-0071-1875: hyp=['THE', 'FIRST', 'STONE', 'IS', 'THAT', 'MATTER', 'SUBSTANCE', 'CANNOT', 'BE', 'DESTROYED', 'CANNOT', 'BE', 'ANNIHILATED'] +5764-299665-0072-1876: ref=['IF', 'THESE', 'CORNER', 'STONES', 'ARE', 'FACTS', 'IT', 'FOLLOWS', 'AS', 'A', 'NECESSITY', 'THAT', 'MATTER', 'AND', 'FORCE', 'ARE', 'FROM', 'AND', 'TO', 'ETERNITY', 'THAT', 'THEY', 'CAN', 'NEITHER', 'BE', 'INCREASED', 'NOR', 'DIMINISHED'] +5764-299665-0072-1876: hyp=['IF', 'THESE', 'CORNERSTONES', 'ARE', 'FACTS', 'IT', 'FOLLOWS', 'AS', 'A', 'NECESSITY', 'THAT', 'MATTER', 'AND', 'FORCE', 'ARE', 'FROM', 'END', 'TO', 'ETERNITY', 'THAT', 'THEY', 'CAN', 'NEITHER', 'BE', 'INCREASED', 'NOR', 'DIMINISHED'] +5764-299665-0073-1877: ref=['IT', 'FOLLOWS', 'THAT', 'NOTHING', 'HAS', 'BEEN', 'OR', 'CAN', 'BE', 'CREATED', 'THAT', 'THERE', 'NEVER', 'HAS', 'BEEN', 'OR', 'CAN', 'BE', 'A', 'CREATOR'] +5764-299665-0073-1877: hyp=['IT', 'FOLLOWS', 'THAT', 'NOTHING', 'HAS', 'BEEN', 'OR', 'CAN', 'BE', 'CREATED', 'THAT', 'THERE', 'NEVER', 'HAS', 'BEEN', 'OR', 'CAN', 'BE', 'A', 'CREATOR'] +5764-299665-0074-1878: ref=['IT', 'FOLLOWS', 'THAT', 'THERE', 'COULD', 'NOT', 'HAVE', 'BEEN', 'ANY', 'INTELLIGENCE', 'ANY', 'DESIGN', 'BACK', 'OF', 'MATTER', 'AND', 'FORCE'] +5764-299665-0074-1878: hyp=['IT', 'FOLLOWED', 'THAT', 'THERE', 'COULD', 'NOT', 'HAVE', 'BEEN', 'ANY', 'INTELLIGENCE', 'ANY', 'DESIGN', 'BACK', 'OF', 'MATTER', 'AND', 'FORCE'] +5764-299665-0075-1879: ref=['I', 'SAY', 'WHAT', 'I', 'THINK'] +5764-299665-0075-1879: hyp=['I', 'SAY', 'WHAT', 'I', 'THINK'] +5764-299665-0076-1880: ref=['EVERY', 'EVENT', 'HAS', 'PARENTS'] +5764-299665-0076-1880: hyp=['EVERY', 'EVENT', 'HAS', 'PARENTS'] +5764-299665-0077-1881: ref=['THAT', 'WHICH', 'HAS', 'NOT', 'HAPPENED', 'COULD', 'NOT'] +5764-299665-0077-1881: hyp=['THAT', 'WHICH', 'HAS', 'NOT', 'HAPPENED', 'COULD', 'NOT'] +5764-299665-0078-1882: ref=['IN', 'THE', 'INFINITE', 'CHAIN', 'THERE', 'IS', 'AND', 'THERE', 'CAN', 'BE', 'NO', 'BROKEN', 'NO', 'MISSING', 'LINK'] +5764-299665-0078-1882: hyp=['IN', 'THE', 'INFINITE', 'CHAIN', 'THERE', 'IS', 'AND', 'THERE', 'CAN', 'BE', 'NO', 'BROKEN', 'NO', 'MISSING', 'LINK'] +5764-299665-0079-1883: ref=['WE', 'NOW', 'KNOW', 'THAT', 'OUR', 'FIRST', 'PARENTS', 'WERE', 'NOT', 'FOREIGNERS'] +5764-299665-0079-1883: hyp=['WE', 'NOW', 'KNOW', 'THAT', 'OUR', 'FIRST', 'PARENTS', 'WERE', 'NOT', 'FOREIGNERS'] +5764-299665-0080-1884: ref=['WE', 'NOW', 'KNOW', 'IF', 'WE', 'KNOW', 'ANYTHING', 'THAT', 'THE', 'UNIVERSE', 'IS', 'NATURAL', 'AND', 'THAT', 'MEN', 'AND', 'WOMEN', 'HAVE', 'BEEN', 'NATURALLY', 'PRODUCED'] +5764-299665-0080-1884: hyp=['WE', 'NOW', 'KNOW', 'IF', 'WE', 'KNOW', 'ANYTHING', 'THAT', 'THE', 'UNIVERSE', 'IS', 'NATURAL', 'AND', 'THAT', 'MEN', 'AND', 'WOMEN', 'HAVE', 'BEEN', 'NATURALLY', 'PRODUCED'] +5764-299665-0081-1885: ref=['WE', 'KNOW', 'THE', 'PATHS', 'THAT', 'LIFE', 'HAS', 'TRAVELED'] +5764-299665-0081-1885: hyp=['WE', 'KNOW', 'THE', 'PATHS', 'THAT', 'LIFE', 'HAS', 'TRAVELLED'] +5764-299665-0082-1886: ref=['WE', 'KNOW', 'THE', 'FOOTSTEPS', 'OF', 'ADVANCE', 'THEY', 'HAVE', 'BEEN', 'TRACED'] +5764-299665-0082-1886: hyp=['WE', 'KNOW', 'THE', 'FOOTSTEPS', 'OF', 'ADVANCE', 'THEY', 'HAVE', 'BEEN', 'TRACED'] +5764-299665-0083-1887: ref=['FOR', 'THOUSANDS', 'OF', 'YEARS', 'MEN', 'AND', 'WOMEN', 'HAVE', 'BEEN', 'TRYING', 'TO', 'REFORM', 'THE', 'WORLD'] +5764-299665-0083-1887: hyp=['FOR', 'THOUSANDS', 'OF', 'YEARS', 'MEN', 'AND', 'WOMEN', 'HAVE', 'BEEN', 'TRYING', 'TO', 'REFORM', 'THE', 'WORLD'] +5764-299665-0084-1888: ref=['WHY', 'HAVE', 'THE', 'REFORMERS', 'FAILED'] +5764-299665-0084-1888: hyp=['WHY', 'HAVE', 'THE', 'REFORMERS', 'SA'] +5764-299665-0085-1889: ref=['THEY', 'DEPEND', 'ON', 'THE', 'LORD', 'ON', 'LUCK', 'AND', 'CHARITY'] +5764-299665-0085-1889: hyp=['THEY', 'DEPEND', 'ON', 'THE', 'LOT', 'ON', 'LUCK', 'AND', 'CHARITY'] +5764-299665-0086-1890: ref=['THEY', 'LIVE', 'BY', 'FRAUD', 'AND', 'VIOLENCE', 'AND', 'BEQUEATH', 'THEIR', 'VICES', 'TO', 'THEIR', 'CHILDREN'] +5764-299665-0086-1890: hyp=['THEY', 'LIVE', 'BY', 'FRAUD', 'AND', 'VIOLENCE', 'AND', 'BEQUEATH', 'THEIR', 'VICES', 'TO', 'THEIR', 'CHILDREN'] +5764-299665-0087-1891: ref=['FAILURE', 'SEEMS', 'TO', 'BE', 'THE', 'TRADEMARK', 'OF', 'NATURE', 'WHY'] +5764-299665-0087-1891: hyp=['FAILURE', 'SEEMS', 'TO', 'BE', 'THE', 'TRADEMARK', 'OF', 'NATURE', 'WHY'] +5764-299665-0088-1892: ref=['NATURE', 'PRODUCES', 'WITHOUT', 'PURPOSE', 'SUSTAINS', 'WITHOUT', 'INTENTION', 'AND', 'DESTROYS', 'WITHOUT', 'THOUGHT'] +5764-299665-0088-1892: hyp=['NATURE', 'PRODUCES', 'IT', 'WITHOUT', 'PURPOSE', 'SUSTAINS', 'WITHOUT', 'INTENTION', 'AND', 'DESTROYS', 'WITHOUT', 'THOUGHT'] +5764-299665-0089-1893: ref=['MUST', 'THE', 'WORLD', 'FOREVER', 'REMAIN', 'THE', 'VICTIM', 'OF', 'IGNORANT', 'PASSION'] +5764-299665-0089-1893: hyp=['MUST', 'THE', 'WORLD', 'FOREVER', 'REMAIN', 'THE', 'VICTIM', 'OF', 'IGNORANT', 'PASSION'] +5764-299665-0090-1894: ref=['WHY', 'SHOULD', 'MEN', 'AND', 'WOMEN', 'HAVE', 'CHILDREN', 'THAT', 'THEY', 'CANNOT', 'TAKE', 'CARE', 'OF', 'CHILDREN', 'THAT', 'ARE', 'BURDENS', 'AND', 'CURSES', 'WHY'] +5764-299665-0090-1894: hyp=['WHY', 'SHOULD', 'MEN', 'AND', 'WOMEN', 'HAVE', 'CHILDREN', 'THAT', 'THEY', 'CANNOT', 'TAKE', 'CARE', 'OF', 'CHILDREN', 'THAT', 'ARE', 'A', 'BURDEN', 'AND', 'CURSES', 'WHY'] +5764-299665-0091-1895: ref=['PASSION', 'IS', 'AND', 'ALWAYS', 'HAS', 'BEEN', 'DEAF'] +5764-299665-0091-1895: hyp=['PASSION', 'IS', 'AND', 'ALWAYS', 'HAS', 'BEEN', 'DEATH'] +5764-299665-0092-1896: ref=['LAW', 'CAN', 'PUNISH', 'BUT', 'IT', 'CAN', 'NEITHER', 'REFORM', 'CRIMINALS', 'NOR', 'PREVENT', 'CRIME'] +5764-299665-0092-1896: hyp=['LAW', 'CAN', 'PUNISH', 'BUT', 'IT', 'CAN', 'NEITHER', 'REFORM', 'CRIMINALS', 'NOR', 'PREVENT', 'CRIME'] +5764-299665-0093-1897: ref=['THIS', 'CANNOT', 'BE', 'DONE', 'BY', 'TALK', 'OR', 'EXAMPLE'] +5764-299665-0093-1897: hyp=['THIS', 'CANNOT', 'BE', 'DONE', 'BY', 'TALK', 'OR', 'EXAMPLE'] +5764-299665-0094-1898: ref=['THIS', 'IS', 'THE', 'SOLUTION', 'OF', 'THE', 'WHOLE', 'QUESTION'] +5764-299665-0094-1898: hyp=['THIS', 'IS', 'THE', 'SOLUTION', 'OF', 'THE', 'WHOLE', 'QUESTION'] +5764-299665-0095-1899: ref=['THIS', 'FREES', 'WOMAN'] +5764-299665-0095-1899: hyp=['THESE', 'FREESWOMEN'] +5764-299665-0096-1900: ref=['POVERTY', 'AND', 'CRIME', 'WILL', 'BE', 'CHILDLESS'] +5764-299665-0096-1900: hyp=['POVERTY', 'AND', 'CRIME', 'WILL', 'BE', 'CHILDLESS'] +5764-299665-0097-1901: ref=['IT', 'IS', 'FAR', 'BETTER', 'TO', 'BE', 'FREE', 'TO', 'LEAVE', 'THE', 'FORTS', 'AND', 'BARRICADES', 'OF', 'FEAR', 'TO', 'STAND', 'ERECT', 'AND', 'FACE', 'THE', 'FUTURE', 'WITH', 'A', 'SMILE'] +5764-299665-0097-1901: hyp=['IT', 'IS', 'FAR', 'BETTER', 'TO', 'BE', 'FREE', 'TO', 'LEAVE', 'THE', 'FAULTS', 'AND', 'BARRICADES', 'OF', 'FEAR', 'TO', 'STAND', 'ERECT', 'AND', 'FACE', 'THE', 'FUTURE', 'WITH', 'A', 'SMILE'] +6070-63485-0000-1902: ref=["THEY'RE", 'DONE', 'FOR', 'SAID', 'THE', 'SCHOOLMASTER', 'IN', 'A', 'LOW', 'KEY', 'TO', 'THE', 'CHOUETTE', 'OUT', 'WITH', 'YOUR', 'VITRIOL', 'AND', 'MIND', 'YOUR', 'EYE'] +6070-63485-0000-1902: hyp=['THERE', 'DONE', 'FAR', 'SAID', 'THE', 'SCHOOLMASTER', 'IN', 'A', 'LOW', 'KEY', 'TO', 'THE', 'SHUETTE', 'OUT', 'WITH', 'YOUR', 'VITRIOL', 'AND', 'MIND', 'YOUR', 'EYE'] +6070-63485-0001-1903: ref=['THE', 'TWO', 'MONSTERS', 'TOOK', 'OFF', 'THEIR', 'SHOES', 'AND', 'MOVED', 'STEALTHILY', 'ALONG', 'KEEPING', 'IN', 'THE', 'SHADOWS', 'OF', 'THE', 'HOUSES'] +6070-63485-0001-1903: hyp=['THE', 'TWO', 'MONSTERS', 'TOOK', 'OFF', 'THEIR', 'SHOES', 'AND', 'MOVED', 'STEALTHILY', 'ALONG', 'KEEPING', 'IN', 'THE', 'SHADOWS', 'OF', 'THE', 'HOUSES'] +6070-63485-0002-1904: ref=['BY', 'MEANS', 'OF', 'THIS', 'STRATAGEM', 'THEY', 'FOLLOWED', 'SO', 'CLOSELY', 'THAT', 'ALTHOUGH', 'WITHIN', 'A', 'FEW', 'STEPS', 'OF', 'SARAH', 'AND', 'TOM', 'THEY', 'DID', 'NOT', 'HEAR', 'THEM'] +6070-63485-0002-1904: hyp=['BY', 'MEANS', 'OF', 'THIS', 'STRATAGEM', 'THEY', 'FOLLOWED', 'SO', 'CLOSELY', 'THAT', 'ALTHOUGH', 'WITHIN', 'A', 'FEW', 'STEPS', 'OF', 'SARAH', 'AND', 'TOM', 'THEY', 'DID', 'NOT', 'HEAR', 'THEM'] +6070-63485-0003-1905: ref=['SARAH', 'AND', 'HER', 'BROTHER', 'HAVING', 'AGAIN', 'PASSED', 'BY', 'THE', 'TAPIS', 'FRANC', 'ARRIVED', 'CLOSE', 'TO', 'THE', 'DILAPIDATED', 'HOUSE', 'WHICH', 'WAS', 'PARTLY', 'IN', 'RUINS', 'AND', 'ITS', 'OPENED', 'CELLARS', 'FORMED', 'A', 'KIND', 'OF', 'GULF', 'ALONG', 'WHICH', 'THE', 'STREET', 'RAN', 'IN', 'THAT', 'DIRECTION'] +6070-63485-0003-1905: hyp=['SARAH', 'AND', 'HER', 'BROTHER', 'HAVING', 'AGAIN', 'PASSED', 'BY', 'THE', 'TAPI', 'FRANCS', 'ARRIVED', 'CLOSE', 'TO', 'THE', 'DILAPIDATED', 'HOUSE', 'WHICH', 'WAS', 'PARTLY', 'IN', 'RUINS', 'AND', 'ITS', 'OPEN', 'CELLARS', 'FORMED', 'A', 'KIND', 'OF', 'GULF', 'ALONG', 'WHICH', 'THE', 'STREET', 'RAN', 'IN', 'THAT', 'DIRECTION'] +6070-63485-0004-1906: ref=['IN', 'AN', 'INSTANT', 'THE', 'SCHOOLMASTER', 'WITH', 'A', 'LEAP', 'RESEMBLING', 'IN', 'STRENGTH', 'AND', 'AGILITY', 'THE', 'SPRING', 'OF', 'A', 'TIGER', 'SEIZED', 'SEYTON', 'WITH', 'ONE', 'HAND', 'BY', 'THE', 'THROAT', 'AND', 'EXCLAIMED', 'YOUR', 'MONEY', 'OR', 'I', 'WILL', 'FLING', 'YOU', 'INTO', 'THIS', 'HOLE'] +6070-63485-0004-1906: hyp=['IN', 'AN', 'INSTANT', 'THE', 'SCHOOLMASTER', 'WITH', 'A', 'LEAP', 'RESEMBLING', 'IN', 'STRENGTH', 'AND', 'AGILITY', 'THE', 'SPRING', 'OF', 'A', 'TIGER', 'SEIZED', 'SETAN', 'WITH', 'ONE', 'HAND', 'BY', 'THE', 'THROAT', 'AND', 'EXCLAIMED', 'YOUR', 'MONEY', 'OR', 'I', 'WILL', 'FLING', 'YOU', 'INTO', 'THIS', 'HOLE'] +6070-63485-0005-1907: ref=['NO', 'SAID', 'THE', 'OLD', 'BRUTE', 'GRUMBLINGLY', 'NO', 'NOT', 'ONE', 'RING', 'WHAT', 'A', 'SHAME'] +6070-63485-0005-1907: hyp=['NO', 'SAID', 'THE', 'OLD', 'BRUTE', 'GRUMBLINGLY', 'NO', 'NOT', 'ONE', 'RING', 'WHAT', 'A', 'SHAME'] +6070-63485-0006-1908: ref=['TOM', 'SEYTON', 'DID', 'NOT', 'LOSE', 'HIS', 'PRESENCE', 'OF', 'MIND', 'DURING', 'THIS', 'SCENE', 'RAPIDLY', 'AND', 'UNEXPECTEDLY', 'AS', 'IT', 'HAD', 'OCCURRED'] +6070-63485-0006-1908: hyp=['TOM', 'SEYTON', 'DID', 'NOT', 'LOSE', 'HIS', 'PRESENCE', 'OF', 'MIND', 'DURING', 'THIS', 'SCENE', 'RAPIDLY', 'AND', 'UNEXPECTEDLY', 'AS', 'IT', 'HAD', 'OCCURRED'] +6070-63485-0007-1909: ref=['OH', 'AH', 'TO', 'LAY', 'A', 'TRAP', 'TO', 'CATCH', 'US', 'REPLIED', 'THE', 'THIEF'] +6070-63485-0007-1909: hyp=['OH', 'AH', 'TO', 'LAY', 'A', 'TRAP', 'TO', 'CATCH', 'US', 'REPLIED', 'THE', 'THIEF'] +6070-63485-0008-1910: ref=['THEN', 'ADDRESSING', 'THOMAS', 'SEYTON', 'YOU', 'KNOW', 'THE', 'PLAIN', 'OF', 'SAINT', 'DENIS'] +6070-63485-0008-1910: hyp=['THEN', 'ADDRESSING', 'THOMAS', 'SEYTON', 'YOU', 'KNOW', 'THE', 'PLAIN', 'OF', 'SAINT', 'DENIS'] +6070-63485-0009-1911: ref=['DID', 'YOU', 'SEE', 'IN', 'THE', 'CABARET', 'WE', 'HAVE', 'JUST', 'LEFT', 'FOR', 'I', 'KNOW', 'YOU', 'AGAIN', 'THE', 'MAN', 'WHOM', 'THE', 'CHARCOAL', 'MAN', 'CAME', 'TO', 'SEEK'] +6070-63485-0009-1911: hyp=['DID', 'YOU', 'SEE', 'IN', 'THE', 'CABARET', 'WE', 'HAVE', 'JUST', 'LEFT', 'FOR', 'I', 'KNOW', 'YOU', 'AGAIN', 'THE', 'MAN', 'WHOM', 'THE', 'CHARCOAL', 'MAN', 'CAME', 'TO', 'SEEK'] +6070-63485-0010-1912: ref=['CRIED', 'THE', 'SCHOOLMASTER', 'A', 'THOUSAND', 'FRANCS', 'AND', "I'LL", 'KILL', 'HIM'] +6070-63485-0010-1912: hyp=['CRIED', 'THE', 'SCHOOLMASTER', 'A', 'THOUSAND', 'FRANCS', 'AND', "I'LL", 'KILL', 'HIM'] +6070-63485-0011-1913: ref=['WRETCH', 'I', 'DO', 'NOT', 'SEEK', 'HIS', 'LIFE', 'REPLIED', 'SARAH', 'TO', 'THE', 'SCHOOLMASTER'] +6070-63485-0011-1913: hyp=['VETCH', 'I', 'DO', 'NOT', 'SEE', 'HIS', 'LIFE', 'REPLIED', 'SARAH', 'TO', 'THE', 'SCHOOLMASTER'] +6070-63485-0012-1914: ref=["LET'S", 'GO', 'AND', 'MEET', 'HIM'] +6070-63485-0012-1914: hyp=["LET'S", 'GO', 'AND', 'MEET', 'HIM'] +6070-63485-0013-1915: ref=['OLD', 'BOY', 'IT', 'WILL', 'PAY', 'FOR', 'LOOKING', 'AFTER'] +6070-63485-0013-1915: hyp=['ALL', 'BY', 'IT', 'WOT', 'PAY', 'FOR', 'LOOKING', 'AFTER'] +6070-63485-0014-1916: ref=['WELL', 'MY', 'WIFE', 'SHALL', 'BE', 'THERE', 'SAID', 'THE', 'SCHOOLMASTER', 'YOU', 'WILL', 'TELL', 'HER', 'WHAT', 'YOU', 'WANT', 'AND', 'I', 'SHALL', 'SEE'] +6070-63485-0014-1916: hyp=['WELL', 'MY', 'WIFE', 'SHALL', 'BE', 'THERE', 'SAID', 'THE', 'SCHOOLMASTER', 'YOU', 'WILL', 'TELL', 'HER', 'WHAT', 'YOU', 'WANT', 'AND', 'I', 'SHALL', 'SEE'] +6070-63485-0015-1917: ref=['IN', 'THE', 'PLAIN', 'OF', 'SAINT', 'DENIS'] +6070-63485-0015-1917: hyp=['IN', 'THE', 'PLAIN', 'OF', 'SAINT', 'DENIS'] +6070-63485-0016-1918: ref=['BETWEEN', 'SAINT', 'OUEN', 'AND', 'THE', 'ROAD', 'OF', 'LA', 'REVOLTE', 'AT', 'THE', 'END', 'OF', 'THE', 'ROAD', 'AGREED'] +6070-63485-0016-1918: hyp=['BETWEEN', 'SAINT', 'JUIN', 'AND', 'THE', 'ROAD', 'OF', 'LA', 'REVOLT', 'AT', 'THE', 'END', 'OF', 'THE', 'ROAD', 'AGREED'] +6070-63485-0017-1919: ref=['HE', 'HAD', 'FORGOTTEN', 'THE', 'ADDRESS', 'OF', 'THE', 'SELF', 'STYLED', 'FAN', 'PAINTER'] +6070-63485-0017-1919: hyp=['HE', 'HAD', 'FORGOTTEN', 'THE', 'ADDRESS', 'OF', 'THE', 'SELF', 'STYLED', 'PEN', 'PAINTER'] +6070-63485-0018-1920: ref=['THE', 'FIACRE', 'STARTED'] +6070-63485-0018-1920: hyp=['THE', 'THEACCUS', 'STARTED'] +6070-86744-0000-1921: ref=['FRANZ', 'WHO', 'SEEMED', 'ATTRACTED', 'BY', 'SOME', 'INVISIBLE', 'INFLUENCE', 'TOWARDS', 'THE', 'COUNT', 'IN', 'WHICH', 'TERROR', 'WAS', 'STRANGELY', 'MINGLED', 'FELT', 'AN', 'EXTREME', 'RELUCTANCE', 'TO', 'PERMIT', 'HIS', 'FRIEND', 'TO', 'BE', 'EXPOSED', 'ALONE', 'TO', 'THE', 'SINGULAR', 'FASCINATION', 'THAT', 'THIS', 'MYSTERIOUS', 'PERSONAGE', 'SEEMED', 'TO', 'EXERCISE', 'OVER', 'HIM', 'AND', 'THEREFORE', 'MADE', 'NO', 'OBJECTION', 'TO', "ALBERT'S", 'REQUEST', 'BUT', 'AT', 'ONCE', 'ACCOMPANIED', 'HIM', 'TO', 'THE', 'DESIRED', 'SPOT', 'AND', 'AFTER', 'A', 'SHORT', 'DELAY', 'THE', 'COUNT', 'JOINED', 'THEM', 'IN', 'THE', 'SALON'] +6070-86744-0000-1921: hyp=['FRANZ', 'WHO', 'SEEMED', 'ATTRACTED', 'BY', 'SOME', 'INVISIBLE', 'INFLUENCE', 'TOWARDS', 'THE', 'COUNT', 'IN', 'WHICH', 'TERROR', 'WAS', 'STRANGELY', 'MINGLED', 'FELT', 'AN', 'EXTREME', 'RELUCTANCE', 'TO', 'PERMIT', 'HIS', 'FRIEND', 'TO', 'BE', 'EXPOSED', 'ALONE', 'TO', 'THE', 'SINGULAR', 'FASCINATION', 'THAT', 'THIS', 'MYSTERIOUS', 'PERSONAGE', 'SEEMED', 'TO', 'EXERCISE', 'OVER', 'HIM', 'AND', 'THEREFORE', 'MADE', 'NO', 'OBJECTION', 'TO', "ALBERT'S", 'REQUEST', 'BUT', 'AT', 'ONCE', 'ACCOMPANIED', 'HIM', 'TO', 'THE', 'DESIRED', 'SPOT', 'AND', 'AFTER', 'A', 'SHORT', 'DELAY', 'THE', 'COUNT', 'JOINED', 'THEM', 'IN', 'THE', 'SALON'] +6070-86744-0001-1922: ref=['MY', 'VERY', 'GOOD', 'FRIEND', 'AND', 'EXCELLENT', 'NEIGHBOR', 'REPLIED', 'THE', 'COUNT', 'WITH', 'A', 'SMILE', 'YOU', 'REALLY', 'EXAGGERATE', 'MY', 'TRIFLING', 'EXERTIONS'] +6070-86744-0001-1922: hyp=['MY', 'VERY', 'GOOD', 'FRIEND', 'AND', 'EXCELLENT', 'NEIGHBOR', 'REPLIED', 'THE', 'COUNT', 'WITH', 'A', 'SMILE', 'YOU', 'REALLY', 'EXAGGERATE', 'MY', 'TRIFLING', 'EXERTIONS'] +6070-86744-0002-1923: ref=['MY', 'FATHER', 'THE', 'COMTE', 'DE', 'MORCERF', 'ALTHOUGH', 'OF', 'SPANISH', 'ORIGIN', 'POSSESSES', 'CONSIDERABLE', 'INFLUENCE', 'BOTH', 'AT', 'THE', 'COURT', 'OF', 'FRANCE', 'AND', 'MADRID', 'AND', 'I', 'UNHESITATINGLY', 'PLACE', 'THE', 'BEST', 'SERVICES', 'OF', 'MYSELF', 'AND', 'ALL', 'TO', 'WHOM', 'MY', 'LIFE', 'IS', 'DEAR', 'AT', 'YOUR', 'DISPOSAL'] +6070-86744-0002-1923: hyp=['MY', 'FATHER', 'THE', 'COMTE', 'DE', 'MORCERF', 'ALTHOUGH', 'OF', 'SPANISH', 'ORIGIN', 'POSSESSES', 'CONSIDERABLE', 'INFLUENCE', 'BOTH', 'AT', 'THE', 'COURT', 'OF', 'FRANCE', 'AND', 'MADRID', 'AND', 'I', 'UNHESITATINGLY', 'PLACE', 'THE', 'BEST', 'SERVICES', 'OF', 'MYSELF', 'AND', 'ALL', 'TO', 'WHOM', 'MY', 'LIFE', 'IS', 'DEAR', 'AT', 'YOUR', 'DISPOSAL'] +6070-86744-0003-1924: ref=['I', 'CAN', 'SCARCELY', 'CREDIT', 'IT'] +6070-86744-0003-1924: hyp=['I', 'CAN', 'SCARCELY', 'CREDIT', 'IT'] +6070-86744-0004-1925: ref=['THEN', 'IT', 'IS', 'SETTLED', 'SAID', 'THE', 'COUNT', 'AND', 'I', 'GIVE', 'YOU', 'MY', 'SOLEMN', 'ASSURANCE', 'THAT', 'I', 'ONLY', 'WAITED', 'AN', 'OPPORTUNITY', 'LIKE', 'THE', 'PRESENT', 'TO', 'REALIZE', 'PLANS', 'THAT', 'I', 'HAVE', 'LONG', 'MEDITATED'] +6070-86744-0004-1925: hyp=['THEN', 'IT', 'IS', 'SETTLED', 'SAID', 'THE', 'COUNT', 'AND', 'I', 'GIVE', 'YOU', 'MY', 'SOLEMN', 'ASSURANCE', 'THAT', 'I', 'ONLY', 'WAITED', 'AN', 'OPPORTUNITY', 'LIKE', 'THE', 'PRESENT', 'TO', 'REALISE', 'PLANS', 'THAT', 'I', 'HAVE', 'LONG', 'MEDITATED'] +6070-86744-0005-1926: ref=['SHALL', 'WE', 'MAKE', 'A', 'POSITIVE', 'APPOINTMENT', 'FOR', 'A', 'PARTICULAR', 'DAY', 'AND', 'HOUR', 'INQUIRED', 'THE', 'COUNT', 'ONLY', 'LET', 'ME', 'WARN', 'YOU', 'THAT', 'I', 'AM', 'PROVERBIAL', 'FOR', 'MY', 'PUNCTILIOUS', 'EXACTITUDE', 'IN', 'KEEPING', 'MY', 'ENGAGEMENTS', 'DAY', 'FOR', 'DAY', 'HOUR', 'FOR', 'HOUR', 'SAID', 'ALBERT', 'THAT', 'WILL', 'SUIT', 'ME', 'TO', 'A', 'DOT'] +6070-86744-0005-1926: hyp=['SHALL', 'WE', 'MAKE', 'A', 'POSITIVE', 'APPOINTMENT', 'FOR', 'A', 'PARTICULAR', 'DAY', 'AND', 'HOUR', 'INQUIRED', 'THE', 'COUNT', 'ONLY', 'LET', 'ME', 'WARN', 'YOU', 'THAT', 'I', 'AM', 'PROVERBIAL', 'FOR', 'MY', 'PUNCTILIOUS', 'EXACTITUDE', 'IN', 'KEEPING', 'MY', 'ENGAGEMENTS', 'DAY', 'FOR', 'DAY', 'HOUR', 'FOR', 'HOUR', 'SAID', 'ALBERT', 'THAT', 'WILL', 'SUIT', 'ME', 'TO', 'A', 'DOT'] +6070-86744-0006-1927: ref=['SO', 'BE', 'IT', 'THEN', 'REPLIED', 'THE', 'COUNT', 'AND', 'EXTENDING', 'HIS', 'HAND', 'TOWARDS', 'A', 'CALENDAR', 'SUSPENDED', 'NEAR', 'THE', 'CHIMNEY', 'PIECE', 'HE', 'SAID', 'TO', 'DAY', 'IS', 'THE', 'TWENTY', 'FIRST', 'OF', 'FEBRUARY', 'AND', 'DRAWING', 'OUT', 'HIS', 'WATCH', 'ADDED', 'IT', 'IS', 'EXACTLY', 'HALF', 'PAST', 'TEN', "O'CLOCK", 'NOW', 'PROMISE', 'ME', 'TO', 'REMEMBER', 'THIS', 'AND', 'EXPECT', 'ME', 'THE', 'TWENTY', 'FIRST', 'OF', 'MAY', 'AT', 'THE', 'SAME', 'HOUR', 'IN', 'THE', 'FORENOON'] +6070-86744-0006-1927: hyp=['SO', 'BE', 'IT', 'THEN', 'REPLIED', 'THE', 'COUNT', 'AND', 'EXTENDING', 'HIS', 'HAND', 'TOWARDS', 'THE', 'CALENDAR', 'SUSPENDED', 'NEAR', 'THE', 'CHIMNEY', 'PIECE', 'HE', 'SAID', 'TO', 'DAY', 'IS', 'THE', 'TWENTY', 'FIRST', 'OF', 'FEBRUARY', 'AND', 'DRAWING', 'OUT', 'HIS', 'WATCH', 'ADDED', 'IT', 'IS', 'EXACTLY', 'HALF', 'PAST', 'TEN', "O'CLOCK", 'NOW', 'PROMISE', 'ME', 'TO', 'REMEMBER', 'THIS', 'AND', 'EXPECT', 'ME', 'THE', 'TWENTY', 'FIRST', 'OF', 'MAY', 'AT', 'THE', 'SAME', 'HOUR', 'IN', 'THE', 'FORENOON'] +6070-86744-0007-1928: ref=['I', 'RESIDE', 'IN', 'MY', "FATHER'S", 'HOUSE', 'BUT', 'OCCUPY', 'A', 'PAVILION', 'AT', 'THE', 'FARTHER', 'SIDE', 'OF', 'THE', 'COURT', 'YARD', 'ENTIRELY', 'SEPARATED', 'FROM', 'THE', 'MAIN', 'BUILDING'] +6070-86744-0007-1928: hyp=['I', 'RESIDE', 'IN', 'MY', "FATHER'S", 'HOUSE', 'BUT', 'OCCUPY', 'A', 'PAVILION', 'AT', 'THE', 'FARTHER', 'SIDE', 'OF', 'THE', 'COURTYARD', 'ENTIRELY', 'SEPARATED', 'FROM', 'THE', 'MAIN', 'BUILDING'] +6070-86744-0008-1929: ref=['NOW', 'THEN', 'SAID', 'THE', 'COUNT', 'RETURNING', 'HIS', 'TABLETS', 'TO', 'HIS', 'POCKET', 'MAKE', 'YOURSELF', 'PERFECTLY', 'EASY', 'THE', 'HAND', 'OF', 'YOUR', 'TIME', 'PIECE', 'WILL', 'NOT', 'BE', 'MORE', 'ACCURATE', 'IN', 'MARKING', 'THE', 'TIME', 'THAN', 'MYSELF'] +6070-86744-0008-1929: hyp=['NOW', 'THEN', 'SAID', 'THE', 'COUNT', 'RETURNING', 'HIS', 'TABLETS', 'TO', 'HIS', 'POCKET', 'MAKE', 'YOURSELF', 'PERFECTLY', 'EASY', 'THE', 'HAND', 'OF', 'YOUR', 'TIME', 'PIECE', 'WILL', 'NOT', 'BE', 'MORE', 'ACCURATE', 'IN', 'MARKING', 'THE', 'TIME', 'THAN', 'MYSELF'] +6070-86744-0009-1930: ref=['THAT', 'DEPENDS', 'WHEN', 'DO', 'YOU', 'LEAVE'] +6070-86744-0009-1930: hyp=['THAT', 'DEPENDS', 'WHEN', 'DO', 'YOU', 'LEAVE'] +6070-86744-0010-1931: ref=['FOR', 'FRANCE', 'NO', 'FOR', 'VENICE', 'I', 'SHALL', 'REMAIN', 'IN', 'ITALY', 'FOR', 'ANOTHER', 'YEAR', 'OR', 'TWO'] +6070-86744-0010-1931: hyp=['FOR', 'FRANCE', 'NO', 'FOR', 'VENICE', 'I', 'SHALL', 'REMAIN', 'IN', 'ITALY', 'FOR', 'ANOTHER', 'YEAR', 'OR', 'TWO'] +6070-86744-0011-1932: ref=['THEN', 'WE', 'SHALL', 'NOT', 'MEET', 'IN', 'PARIS'] +6070-86744-0011-1932: hyp=['THEN', 'WE', 'SHALL', 'NOT', 'MEET', 'IN', 'PARIS'] +6070-86744-0012-1933: ref=['I', 'FEAR', 'I', 'SHALL', 'NOT', 'HAVE', 'THAT', 'HONOR'] +6070-86744-0012-1933: hyp=['I', 'FEAR', 'I', 'SHALL', 'NOT', 'HAVE', 'THAT', 'HONOR'] +6070-86744-0013-1934: ref=['WELL', 'SINCE', 'WE', 'MUST', 'PART', 'SAID', 'THE', 'COUNT', 'HOLDING', 'OUT', 'A', 'HAND', 'TO', 'EACH', 'OF', 'THE', 'YOUNG', 'MEN', 'ALLOW', 'ME', 'TO', 'WISH', 'YOU', 'BOTH', 'A', 'SAFE', 'AND', 'PLEASANT', 'JOURNEY'] +6070-86744-0013-1934: hyp=['WELL', 'SINCE', 'WE', 'MUST', 'PART', 'SAID', 'THE', 'COUNT', 'HOLDING', 'OUT', 'A', 'HAND', 'TO', 'EACH', 'OF', 'THE', 'YOUNG', 'MEN', 'ALLOW', 'ME', 'TO', 'WISH', 'YOU', 'BOTH', 'A', 'SAFE', 'AND', 'PLEASANT', 'JOURNEY'] +6070-86744-0014-1935: ref=['WHAT', 'IS', 'THE', 'MATTER', 'ASKED', 'ALBERT', 'OF', 'FRANZ', 'WHEN', 'THEY', 'HAD', 'RETURNED', 'TO', 'THEIR', 'OWN', 'APARTMENTS', 'YOU', 'SEEM', 'MORE', 'THAN', 'COMMONLY', 'THOUGHTFUL'] +6070-86744-0014-1935: hyp=['WHAT', 'IS', 'THE', 'MATTER', 'ASKED', 'ALBERT', 'OF', 'FRANZ', 'WHEN', 'THEY', 'HAD', 'RETURNED', 'TO', 'THEIR', 'OWN', 'APARTMENTS', 'YOU', 'SEEM', 'MORE', 'THAN', 'COMMONLY', 'THOUGHTFUL'] +6070-86744-0015-1936: ref=['I', 'WILL', 'CONFESS', 'TO', 'YOU', 'ALBERT', 'REPLIED', 'FRANZ', 'THE', 'COUNT', 'IS', 'A', 'VERY', 'SINGULAR', 'PERSON', 'AND', 'THE', 'APPOINTMENT', 'YOU', 'HAVE', 'MADE', 'TO', 'MEET', 'HIM', 'IN', 'PARIS', 'FILLS', 'ME', 'WITH', 'A', 'THOUSAND', 'APPREHENSIONS'] +6070-86744-0015-1936: hyp=['I', 'WILL', 'CONFESS', 'TO', 'YOU', 'ALBERT', 'REPLIED', 'FRANZ', 'THE', 'COUNT', 'IS', 'A', 'VERY', 'SINGULAR', 'PERSON', 'AND', 'THE', 'APPOINTMENT', 'YOU', 'HAVE', 'MADE', 'TO', 'MEET', 'HIM', 'IN', 'PARIS', 'FILLS', 'ME', 'WITH', 'A', 'THOUSAND', 'APPREHENSIONS'] +6070-86744-0016-1937: ref=['DID', 'YOU', 'EVER', 'MEET', 'HIM', 'PREVIOUSLY', 'TO', 'COMING', 'HITHER'] +6070-86744-0016-1937: hyp=['DID', 'YOU', 'EVER', 'MEET', 'HIM', 'PREVIOUSLY', 'TO', 'COMING', 'HITHER'] +6070-86744-0017-1938: ref=['UPON', 'MY', 'HONOR', 'THEN', 'LISTEN', 'TO', 'ME'] +6070-86744-0017-1938: hyp=['UPON', 'MY', 'HONOR', 'THEN', 'LISTEN', 'TO', 'ME'] +6070-86744-0018-1939: ref=['HE', 'DWELT', 'WITH', 'CONSIDERABLE', 'FORCE', 'AND', 'ENERGY', 'ON', 'THE', 'ALMOST', 'MAGICAL', 'HOSPITALITY', 'HE', 'HAD', 'RECEIVED', 'FROM', 'THE', 'COUNT', 'AND', 'THE', 'MAGNIFICENCE', 'OF', 'HIS', 'ENTERTAINMENT', 'IN', 'THE', 'GROTTO', 'OF', 'THE', 'THOUSAND', 'AND', 'ONE', 'NIGHTS', 'HE', 'RECOUNTED', 'WITH', 'CIRCUMSTANTIAL', 'EXACTITUDE', 'ALL', 'THE', 'PARTICULARS', 'OF', 'THE', 'SUPPER', 'THE', 'HASHISH', 'THE', 'STATUES', 'THE', 'DREAM', 'AND', 'HOW', 'AT', 'HIS', 'AWAKENING', 'THERE', 'REMAINED', 'NO', 'PROOF', 'OR', 'TRACE', 'OF', 'ALL', 'THESE', 'EVENTS', 'SAVE', 'THE', 'SMALL', 'YACHT', 'SEEN', 'IN', 'THE', 'DISTANT', 'HORIZON', 'DRIVING', 'UNDER', 'FULL', 'SAIL', 'TOWARD', 'PORTO', 'VECCHIO'] +6070-86744-0018-1939: hyp=['HE', 'DWELT', 'WITH', 'CONSIDERABLE', 'FORCE', 'AND', 'ENERGY', 'ON', 'THE', 'ALMOST', 'MAGICAL', 'HOSPITALITY', 'HE', 'HAD', 'RECEIVED', 'FROM', 'THE', 'COUNT', 'AND', 'THE', 'MAGNIFICENCE', 'OF', 'HIS', 'ENTERTAINMENT', 'IN', 'THE', 'GROTTO', 'OF', 'THE', 'THOUSAND', 'AND', 'ONE', 'NIGHTS', 'HE', 'RECOUNTED', 'WITH', 'CIRCUMSTANTIAL', 'EXACTITUDE', 'ALL', 'THE', 'PARTICULARS', 'OF', 'THE', 'SUPPER', 'THE', 'HASHISH', 'THE', 'STATUES', 'THE', 'DREAM', 'AND', 'HOW', 'AT', 'HIS', 'AWAKENING', 'THERE', 'REMAINED', 'NO', 'PROOF', 'OR', 'TRACE', 'OF', 'ALL', 'THESE', 'EVENTS', 'SAVE', 'THE', 'SMALL', 'YACHT', 'SEEN', 'IN', 'THE', 'DISTANT', 'HORIZON', 'DRIVING', 'UNDER', 'FULL', 'SAIL', 'TOWARD', 'PORTO', 'VECCHIO'] +6070-86744-0019-1940: ref=['THEN', 'HE', 'DETAILED', 'THE', 'CONVERSATION', 'OVERHEARD', 'BY', 'HIM', 'AT', 'THE', 'COLOSSEUM', 'BETWEEN', 'THE', 'COUNT', 'AND', 'VAMPA', 'IN', 'WHICH', 'THE', 'COUNT', 'HAD', 'PROMISED', 'TO', 'OBTAIN', 'THE', 'RELEASE', 'OF', 'THE', 'BANDIT', 'PEPPINO', 'AN', 'ENGAGEMENT', 'WHICH', 'AS', 'OUR', 'READERS', 'ARE', 'AWARE', 'HE', 'MOST', 'FAITHFULLY', 'FULFILLED'] +6070-86744-0019-1940: hyp=['THEN', 'HE', 'DETAILED', 'THE', 'CONVERSATION', 'OVERHEARD', 'BY', 'HIM', 'AT', 'THE', 'COLISEUM', 'BETWEEN', 'THE', 'COUNT', 'AND', 'VAMPA', 'IN', 'WHICH', 'THE', 'COUNT', 'HAD', 'PROMISED', 'TO', 'OBTAIN', 'THE', 'RELEASE', 'OF', 'THE', 'BANDIT', 'PEPPINO', 'AN', 'ENGAGEMENT', 'WHICH', 'AS', 'OUR', 'READERS', 'ARE', 'AWARE', 'HE', 'MOST', 'FAITHFULLY', 'FULFILLED'] +6070-86744-0020-1941: ref=['BUT', 'SAID', 'FRANZ', 'THE', 'CORSICAN', 'BANDITS', 'THAT', 'WERE', 'AMONG', 'THE', 'CREW', 'OF', 'HIS', 'VESSEL'] +6070-86744-0020-1941: hyp=['BUT', 'SAID', 'FRANZ', 'THE', 'CORSICAN', 'BANDITS', 'THAT', 'WERE', 'AMONG', 'THE', 'CREW', 'OF', 'HIS', 'VESSEL'] +6070-86744-0021-1942: ref=['WHY', 'REALLY', 'THE', 'THING', 'SEEMS', 'TO', 'ME', 'SIMPLE', 'ENOUGH'] +6070-86744-0021-1942: hyp=['WHY', 'REALLY', 'THE', 'THING', 'SEEMS', 'TO', 'ME', 'SIMPLE', 'ENOUGH'] +6070-86744-0022-1943: ref=['TALKING', 'OF', 'COUNTRIES', 'REPLIED', 'FRANZ', 'OF', 'WHAT', 'COUNTRY', 'IS', 'THE', 'COUNT', 'WHAT', 'IS', 'HIS', 'NATIVE', 'TONGUE', 'WHENCE', 'DOES', 'HE', 'DERIVE', 'HIS', 'IMMENSE', 'FORTUNE', 'AND', 'WHAT', 'WERE', 'THOSE', 'EVENTS', 'OF', 'HIS', 'EARLY', 'LIFE', 'A', 'LIFE', 'AS', 'MARVELLOUS', 'AS', 'UNKNOWN', 'THAT', 'HAVE', 'TINCTURED', 'HIS', 'SUCCEEDING', 'YEARS', 'WITH', 'SO', 'DARK', 'AND', 'GLOOMY', 'A', 'MISANTHROPY'] +6070-86744-0022-1943: hyp=['TALKING', 'OF', 'COUNTRIES', 'REPLIED', 'FRANZ', 'OF', 'WHAT', 'COUNTRY', 'IS', 'THE', 'COUNT', 'WHAT', 'IS', 'HIS', 'NATIVE', 'TONGUE', 'WHENCE', 'DOES', 'HE', 'DERIVE', 'HIS', 'IMMENSE', 'FORTUNE', 'AND', 'WHAT', 'WERE', 'THOSE', 'EVENTS', 'OF', 'HIS', 'EARLY', 'LIFE', 'A', 'LIFE', 'AS', 'MARVELLOUS', 'AS', 'UNKNOWN', 'THAT', 'HAVE', 'TINCTURED', 'HIS', 'SUCCEEDING', 'YEARS', 'WITH', 'SO', 'DARK', 'AND', 'GLOOMY', 'A', 'MISANTHROPY'] +6070-86744-0023-1944: ref=['CERTAINLY', 'THESE', 'ARE', 'QUESTIONS', 'THAT', 'IN', 'YOUR', 'PLACE', 'I', 'SHOULD', 'LIKE', 'TO', 'HAVE', 'ANSWERED'] +6070-86744-0023-1944: hyp=['CERTAINLY', 'THESE', 'ARE', 'QUESTIONS', 'THAT', 'IN', 'YOUR', 'PLACE', 'I', 'SHOULD', 'LIKE', 'TO', 'HAVE', 'ANSWERED'] +6070-86744-0024-1945: ref=['MY', 'DEAR', 'FRANZ', 'REPLIED', 'ALBERT', 'WHEN', 'UPON', 'RECEIPT', 'OF', 'MY', 'LETTER', 'YOU', 'FOUND', 'THE', 'NECESSITY', 'OF', 'ASKING', 'THE', "COUNT'S", 'ASSISTANCE', 'YOU', 'PROMPTLY', 'WENT', 'TO', 'HIM', 'SAYING', 'MY', 'FRIEND', 'ALBERT', 'DE', 'MORCERF', 'IS', 'IN', 'DANGER', 'HELP', 'ME', 'TO', 'DELIVER', 'HIM'] +6070-86744-0024-1945: hyp=['MY', 'DEAR', 'FRANZ', 'REPLIED', 'ALBERT', 'WHEN', 'UPON', 'RECEIPT', 'OF', 'MY', 'LETTER', 'YOU', 'FOUND', 'THE', 'NECESSITY', 'OF', 'ASKING', 'THE', "COUNT'S", 'ASSISTANCE', 'YOU', 'PROMPTLY', 'WENT', 'TO', 'HIM', 'SAYING', 'MY', 'FRIEND', 'ALBERT', 'DE', 'MORCERF', 'IS', 'IN', 'DANGER', 'HELP', 'ME', 'TO', 'DELIVER', 'HIM'] +6070-86744-0025-1946: ref=['WHAT', 'ARE', 'HIS', 'MEANS', 'OF', 'EXISTENCE', 'WHAT', 'IS', 'HIS', 'BIRTHPLACE', 'OF', 'WHAT', 'COUNTRY', 'IS', 'HE', 'A', 'NATIVE'] +6070-86744-0025-1946: hyp=['WHAT', 'ARE', 'HIS', 'MEANS', 'OF', 'EXISTENCE', 'WHAT', 'IS', 'HIS', 'BIRTHPLACE', 'OF', 'WHAT', 'COUNTRY', 'IS', 'HE', 'A', 'NATIVE'] +6070-86744-0026-1947: ref=['I', 'CONFESS', 'HE', 'ASKED', 'ME', 'NONE', 'NO', 'HE', 'MERELY', 'CAME', 'AND', 'FREED', 'ME', 'FROM', 'THE', 'HANDS', 'OF', 'SIGNOR', 'VAMPA', 'WHERE', 'I', 'CAN', 'ASSURE', 'YOU', 'IN', 'SPITE', 'OF', 'ALL', 'MY', 'OUTWARD', 'APPEARANCE', 'OF', 'EASE', 'AND', 'UNCONCERN', 'I', 'DID', 'NOT', 'VERY', 'PARTICULARLY', 'CARE', 'TO', 'REMAIN'] +6070-86744-0026-1947: hyp=['I', 'CONFESS', 'HE', 'ASKED', 'ME', 'NONE', 'NO', 'HE', 'MERELY', 'CAME', 'AND', 'FREED', 'ME', 'FROM', 'THE', 'HANDS', 'OF', 'SIGNOR', 'VAMPA', 'WHERE', 'I', 'CAN', 'ASSURE', 'YOU', 'IN', 'SPITE', 'OF', 'ALL', 'MY', 'OUTWARD', 'APPEARANCE', 'OF', 'EASE', 'AND', 'UNCONCERN', 'I', 'DID', 'NOT', 'VERY', 'PARTICULARLY', 'CARE', 'TO', 'REMAIN'] +6070-86744-0027-1948: ref=['AND', 'THIS', 'TIME', 'IT', 'MUST', 'BE', 'CONFESSED', 'THAT', 'CONTRARY', 'TO', 'THE', 'USUAL', 'STATE', 'OF', 'AFFAIRS', 'IN', 'DISCUSSIONS', 'BETWEEN', 'THE', 'YOUNG', 'MEN', 'THE', 'EFFECTIVE', 'ARGUMENTS', 'WERE', 'ALL', 'ON', "ALBERT'S", 'SIDE'] +6070-86744-0027-1948: hyp=['AND', 'THIS', 'TIME', 'IT', 'MUST', 'BE', 'CONFESSED', 'THAT', 'CONTRARY', 'TO', 'THE', 'USUAL', 'STATE', 'OF', 'AFFAIRS', 'AND', 'DISCUSSIONS', 'BETWEEN', 'THE', 'YOUNG', 'MEN', 'THE', 'EFFECTIVE', 'ARGUMENTS', 'WERE', 'ALL', 'ON', "ALBERT'S", 'SIDE'] +6070-86744-0028-1949: ref=['WELL', 'SAID', 'FRANZ', 'WITH', 'A', 'SIGH', 'DO', 'AS', 'YOU', 'PLEASE', 'MY', 'DEAR', 'VISCOUNT', 'FOR', 'YOUR', 'ARGUMENTS', 'ARE', 'BEYOND', 'MY', 'POWERS', 'OF', 'REFUTATION'] +6070-86744-0028-1949: hyp=['WELL', 'SAID', 'FRANZ', 'WITH', 'A', 'SIGH', 'DO', 'AS', 'YOU', 'PLEASE', 'MY', 'DEAR', 'VISCOUNT', 'FOR', 'YOUR', 'ARGUMENTS', 'ARE', 'BEYOND', 'MY', 'POWERS', 'OF', 'REFUTATION'] +6070-86744-0029-1950: ref=['AND', 'NOW', 'MY', 'DEAR', 'FRANZ', 'LET', 'US', 'TALK', 'OF', 'SOMETHING', 'ELSE'] +6070-86744-0029-1950: hyp=['AND', 'NOW', 'MY', 'DEAR', 'FRANZ', 'LET', 'US', 'TALK', 'OF', 'SOMETHING', 'ELSE'] +6070-86745-0000-1951: ref=['THEN', 'SHOULD', 'ANYTHING', 'APPEAR', 'TO', 'MERIT', 'A', 'MORE', 'MINUTE', 'EXAMINATION', 'ALBERT', 'DE', 'MORCERF', 'COULD', 'FOLLOW', 'UP', 'HIS', 'RESEARCHES', 'BY', 'MEANS', 'OF', 'A', 'SMALL', 'GATE', 'SIMILAR', 'TO', 'THAT', 'CLOSE', 'TO', 'THE', "CONCIERGE'S", 'DOOR', 'AND', 'WHICH', 'MERITS', 'A', 'PARTICULAR', 'DESCRIPTION'] +6070-86745-0000-1951: hyp=['THEN', 'SHOULD', 'ANYTHING', 'APPEAR', 'TO', 'MERIT', 'A', 'MORE', 'MINUTE', 'EXAMINATION', 'ALBERT', 'DE', 'MORCERF', 'COULD', 'FOLLOW', 'UP', 'HIS', 'RESEARCHES', 'BY', 'MEANS', 'OF', 'A', 'SMALL', 'GATE', 'SIMILAR', 'TO', 'THAT', 'CLOSE', 'TO', 'THE', "CONCIERGE'S", 'DOOR', 'AND', 'WHICH', 'MERITS', 'A', 'PARTICULAR', 'DESCRIPTION'] +6070-86745-0001-1952: ref=['SHRUBS', 'AND', 'CREEPING', 'PLANTS', 'COVERED', 'THE', 'WINDOWS', 'AND', 'HID', 'FROM', 'THE', 'GARDEN', 'AND', 'COURT', 'THESE', 'TWO', 'APARTMENTS', 'THE', 'ONLY', 'ROOMS', 'INTO', 'WHICH', 'AS', 'THEY', 'WERE', 'ON', 'THE', 'GROUND', 'FLOOR', 'THE', 'PRYING', 'EYES', 'OF', 'THE', 'CURIOUS', 'COULD', 'PENETRATE'] +6070-86745-0001-1952: hyp=['SHRUBS', 'AND', 'CREEPING', 'PLANTS', 'COVERED', 'THE', 'WINDOWS', 'AND', 'HID', 'FROM', 'THE', 'GARDEN', 'AND', 'COURT', 'THESE', 'TWO', 'APARTMENTS', 'THE', 'ONLY', 'ROOMS', 'INTO', 'WHICH', 'AS', 'THEY', 'WERE', 'ON', 'THE', 'GROUND', 'FLOOR', 'THE', 'PRYING', 'EYES', 'OF', 'THE', 'CURIOUS', 'COULD', 'PENETRATE'] +6070-86745-0002-1953: ref=['AT', 'A', 'QUARTER', 'TO', 'TEN', 'A', 'VALET', 'ENTERED', 'HE', 'COMPOSED', 'WITH', 'A', 'LITTLE', 'GROOM', 'NAMED', 'JOHN', 'AND', 'WHO', 'ONLY', 'SPOKE', 'ENGLISH', 'ALL', "ALBERT'S", 'ESTABLISHMENT', 'ALTHOUGH', 'THE', 'COOK', 'OF', 'THE', 'HOTEL', 'WAS', 'ALWAYS', 'AT', 'HIS', 'SERVICE', 'AND', 'ON', 'GREAT', 'OCCASIONS', 'THE', "COUNT'S", 'CHASSEUR', 'ALSO'] +6070-86745-0002-1953: hyp=['AT', 'A', 'QUARTER', 'TO', 'TEN', 'A', 'VALET', 'ENTERED', 'HE', 'COMPOSED', 'WITH', 'A', 'LITTLE', 'GROOM', 'NAMED', 'JOHN', 'AND', 'WHO', 'ONLY', 'SPOKE', 'ENGLISH', 'ALL', "ALBERT'S", 'ESTABLISHMENT', 'ALTHOUGH', 'THE', 'COOK', 'OF', 'THE', 'HOTEL', 'WAS', 'ALWAYS', 'AT', 'HIS', 'SERVICE', 'AND', 'ON', 'GREAT', 'OCCASIONS', 'THE', "COUNT'S", 'CHASSEUR', 'ALSO'] +6070-86745-0003-1954: ref=['WAIT', 'THEN', 'DURING', 'THE', 'DAY', 'TELL', 'ROSA', 'THAT', 'WHEN', 'I', 'LEAVE', 'THE', 'OPERA', 'I', 'WILL', 'SUP', 'WITH', 'HER', 'AS', 'SHE', 'WISHES'] +6070-86745-0003-1954: hyp=['WAIT', 'THEN', 'DURING', 'THE', 'DAY', 'TELL', 'ROSA', 'THAT', 'WHEN', 'I', 'LEAVE', 'THE', 'OPERA', 'I', 'WILL', 'SUP', 'WITH', 'HER', 'AS', 'SHE', 'WISHES'] +6070-86745-0004-1955: ref=['VERY', 'WELL', 'AT', 'HALF', 'PAST', 'TEN'] +6070-86745-0004-1955: hyp=['VERY', 'WELL', 'AT', 'HALF', 'PAST', 'TEN'] +6070-86745-0005-1956: ref=['IS', 'THE', 'COUNTESS', 'UP', 'YET'] +6070-86745-0005-1956: hyp=['IS', 'THE', 'COUNTESS', 'UP', 'YET'] +6070-86745-0006-1957: ref=['THE', 'VALET', 'LEFT', 'THE', 'ROOM'] +6070-86745-0006-1957: hyp=['THE', 'VALET', 'LEFT', 'THE', 'ROOM'] +6070-86745-0007-1958: ref=['GOOD', 'MORNING', 'LUCIEN', 'GOOD', 'MORNING', 'SAID', 'ALBERT', 'YOUR', 'PUNCTUALITY', 'REALLY', 'ALARMS', 'ME'] +6070-86745-0007-1958: hyp=['GOOD', 'MORNING', 'LUCIEN', 'GOOD', 'MORNING', 'SAID', 'ALBERT', 'YOUR', 'PUNCTUALITY', 'REALLY', 'ALARMS', 'ME'] +6070-86745-0008-1959: ref=['YOU', 'WHOM', 'I', 'EXPECTED', 'LAST', 'YOU', 'ARRIVE', 'AT', 'FIVE', 'MINUTES', 'TO', 'TEN', 'WHEN', 'THE', 'TIME', 'FIXED', 'WAS', 'HALF', 'PAST'] +6070-86745-0008-1959: hyp=['YOU', 'WHOM', 'I', 'EXPECTED', 'LAST', 'YOU', 'ARRIVE', 'AT', 'FIVE', 'MINUTES', 'TO', 'TEN', 'WHEN', 'THE', 'TIME', 'FIXED', 'WAS', 'HALF', 'PAST'] +6070-86745-0009-1960: ref=['NO', 'NO', 'MY', 'DEAR', 'FELLOW', 'DO', 'NOT', 'CONFOUND', 'OUR', 'PLANS'] +6070-86745-0009-1960: hyp=['NO', 'NO', 'MY', 'DEAR', 'FELLOW', 'DO', 'NOT', 'CONFOUND', 'OUR', 'PLANS'] +6070-86745-0010-1961: ref=['YES', 'HE', 'HAS', 'NOT', 'MUCH', 'TO', 'COMPLAIN', 'OF', 'BOURGES', 'IS', 'THE', 'CAPITAL', 'OF', 'CHARLES', 'SEVEN'] +6070-86745-0010-1961: hyp=['YES', 'HE', 'HAS', 'NOT', 'MUCH', 'TO', 'COMPLAIN', 'OF', 'BOURGES', 'IS', 'THE', 'CAPITAL', 'OF', 'CHARLES', 'THE', 'SEVENTH'] +6070-86745-0011-1962: ref=['IT', 'IS', 'FOR', 'THAT', 'REASON', 'YOU', 'SEE', 'ME', 'SO', 'EARLY'] +6070-86745-0011-1962: hyp=['IT', 'IS', 'FOR', 'THAT', 'REASON', 'YOU', 'SEE', 'ME', 'SO', 'EARLY'] +6070-86745-0012-1963: ref=['I', 'RETURNED', 'HOME', 'AT', 'DAYBREAK', 'AND', 'STROVE', 'TO', 'SLEEP', 'BUT', 'MY', 'HEAD', 'ACHED', 'AND', 'I', 'GOT', 'UP', 'TO', 'HAVE', 'A', 'RIDE', 'FOR', 'AN', 'HOUR'] +6070-86745-0012-1963: hyp=['I', 'RETURNED', 'HOME', 'AT', 'DAYBREAK', 'AND', 'STROVE', 'TO', 'SLEEP', 'BUT', 'MY', 'HEAD', 'ACHED', 'AND', 'I', 'GOT', 'UP', 'TO', 'HAVE', 'A', 'RIDE', 'FOR', 'AN', 'HOUR'] +6070-86745-0013-1964: ref=['PESTE', 'I', 'WILL', 'DO', 'NOTHING', 'OF', 'THE', 'KIND', 'THE', 'MOMENT', 'THEY', 'COME', 'FROM', 'GOVERNMENT', 'YOU', 'WOULD', 'FIND', 'THEM', 'EXECRABLE'] +6070-86745-0013-1964: hyp=['PESTES', 'I', 'WILL', 'DO', 'NOTHING', 'OF', 'THE', 'KIND', 'THE', 'MOMENT', 'THEY', 'COME', 'FROM', 'GOVERNMENT', 'YOU', 'WOULD', 'FIND', 'THEM', 'EXECRABLE'] +6070-86745-0014-1965: ref=['BESIDES', 'THAT', 'DOES', 'NOT', 'CONCERN', 'THE', 'HOME', 'BUT', 'THE', 'FINANCIAL', 'DEPARTMENT'] +6070-86745-0014-1965: hyp=['BESIDES', 'THAT', 'DOES', 'NOT', 'CONCERN', 'THE', 'HOME', 'BUT', 'THE', 'FINANCIAL', 'DEPARTMENT'] +6070-86745-0015-1966: ref=['ABOUT', 'WHAT', 'ABOUT', 'THE', 'PAPERS'] +6070-86745-0015-1966: hyp=['ABOUT', 'WHAT', 'ABOUT', 'THE', 'PAPERS'] +6070-86745-0016-1967: ref=['IN', 'THE', 'ENTIRE', 'POLITICAL', 'WORLD', 'OF', 'WHICH', 'YOU', 'ARE', 'ONE', 'OF', 'THE', 'LEADERS'] +6070-86745-0016-1967: hyp=['IN', 'THE', 'ENTIRE', 'POLITICAL', 'WORLD', 'OF', 'WHICH', 'YOU', 'ARE', 'ONE', 'OF', 'THE', 'LEADERS'] +6070-86745-0017-1968: ref=['THEY', 'SAY', 'THAT', 'IT', 'IS', 'QUITE', 'FAIR', 'AND', 'THAT', 'SOWING', 'SO', 'MUCH', 'RED', 'YOU', 'OUGHT', 'TO', 'REAP', 'A', 'LITTLE', 'BLUE'] +6070-86745-0017-1968: hyp=['THEY', 'SAY', 'THAT', 'IT', 'IS', 'QUITE', 'FAIR', 'AND', 'THAT', 'SEWING', 'SO', 'MUCH', 'RED', 'YOU', 'OUGHT', 'TO', 'REAP', 'A', 'LITTLE', 'BLUE'] +6070-86745-0018-1969: ref=['COME', 'COME', 'THAT', 'IS', 'NOT', 'BAD', 'SAID', 'LUCIEN'] +6070-86745-0018-1969: hyp=['COME', 'COME', 'THAT', 'IS', 'NOT', 'BAD', 'SAID', 'LUCIEN'] +6070-86745-0019-1970: ref=['WITH', 'YOUR', 'TALENTS', 'YOU', 'WOULD', 'MAKE', 'YOUR', 'FORTUNE', 'IN', 'THREE', 'OR', 'FOUR', 'YEARS'] +6070-86745-0019-1970: hyp=['WITH', 'YOUR', 'TALENTS', 'YOU', 'WOULD', 'MAKE', 'YOUR', 'FORTUNE', 'IN', 'THREE', 'OR', 'FOUR', 'YEARS'] +6128-63240-0000-1971: ref=['THE', 'GENTLEMAN', 'HAD', 'NOT', 'EVEN', 'NEEDED', 'TO', 'SIT', 'DOWN', 'TO', 'BECOME', 'INTERESTED', 'APPARENTLY', 'HE', 'HAD', 'TAKEN', 'UP', 'THE', 'VOLUME', 'FROM', 'A', 'TABLE', 'AS', 'SOON', 'AS', 'HE', 'CAME', 'IN', 'AND', 'STANDING', 'THERE', 'AFTER', 'A', 'SINGLE', 'GLANCE', 'ROUND', 'THE', 'APARTMENT', 'HAD', 'LOST', 'HIMSELF', 'IN', 'ITS', 'PAGES'] +6128-63240-0000-1971: hyp=['THE', 'GENTLEMAN', 'HAD', 'NOT', 'EVEN', 'NEEDED', 'TO', 'SIT', 'DOWN', 'TO', 'BECOME', 'INTERESTED', 'APPARENTLY', 'HE', 'HAD', 'TAKEN', 'UP', 'THE', 'VOLUME', 'FROM', 'THE', 'TABLE', 'AS', 'SOON', 'AS', 'HE', 'CAME', 'IN', 'AND', 'STANDING', 'THERE', 'AFTER', 'A', 'SINGLE', 'GLANCE', 'ROUND', 'THE', 'APARTMENT', 'HAD', 'LOST', 'HIMSELF', 'IN', 'ITS', 'PAGES'] +6128-63240-0001-1972: ref=['THAT', 'HAS', 'AN', 'UNFLATTERING', 'SOUND', 'FOR', 'ME', 'SAID', 'THE', 'YOUNG', 'MAN'] +6128-63240-0001-1972: hyp=['THAT', 'HAS', 'AN', 'UNFLATTERING', 'SOUND', 'FOR', 'ME', 'SAID', 'THE', 'YOUNG', 'MAN'] +6128-63240-0002-1973: ref=['SHE', 'IS', 'WILLING', 'TO', 'RISK', 'THAT'] +6128-63240-0002-1973: hyp=['SHE', 'IS', 'WILLING', 'TO', 'RISK', 'THAT'] +6128-63240-0003-1974: ref=['JUST', 'AS', 'I', 'AM', 'THE', 'VISITOR', 'INQUIRED', 'PRESENTING', 'HIMSELF', 'WITH', 'RATHER', 'A', 'WORK', 'A', 'DAY', 'ASPECT'] +6128-63240-0003-1974: hyp=['JUST', 'AS', 'I', 'AM', 'THE', 'VISITOR', 'INQUIRED', 'PRESENTING', 'HIMSELF', 'WITH', 'RATHER', 'A', 'WORKADAY', 'ASPECT'] +6128-63240-0004-1975: ref=['HE', 'WAS', 'TALL', 'AND', 'LEAN', 'AND', 'DRESSED', 'THROUGHOUT', 'IN', 'BLACK', 'HIS', 'SHIRT', 'COLLAR', 'WAS', 'LOW', 'AND', 'WIDE', 'AND', 'THE', 'TRIANGLE', 'OF', 'LINEN', 'A', 'LITTLE', 'CRUMPLED', 'EXHIBITED', 'BY', 'THE', 'OPENING', 'OF', 'HIS', 'WAISTCOAT', 'WAS', 'ADORNED', 'BY', 'A', 'PIN', 'CONTAINING', 'A', 'SMALL', 'RED', 'STONE'] +6128-63240-0004-1975: hyp=['HE', 'WAS', 'TALL', 'AND', 'LEAN', 'AND', 'DRESSED', 'THROUGHOUT', 'IN', 'BLACK', 'HIS', 'SHIRT', 'COLLAR', 'WAS', 'LOW', 'AND', 'WIDE', 'AND', 'THE', 'TRIANGLE', 'OF', 'LINEN', 'A', 'LITTLE', 'CRUMPLED', 'EXHIBITED', 'BY', 'THE', 'OPENING', 'OF', 'HIS', 'WAISTCOAT', 'WAS', 'ADORNED', 'BY', 'A', 'PIN', 'CONTAINING', 'A', 'SMALL', 'RED', 'STONE'] +6128-63240-0005-1976: ref=['IN', 'SPITE', 'OF', 'THIS', 'DECORATION', 'THE', 'YOUNG', 'MAN', 'LOOKED', 'POOR', 'AS', 'POOR', 'AS', 'A', 'YOUNG', 'MAN', 'COULD', 'LOOK', 'WHO', 'HAD', 'SUCH', 'A', 'FINE', 'HEAD', 'AND', 'SUCH', 'MAGNIFICENT', 'EYES'] +6128-63240-0005-1976: hyp=['IN', 'SPITE', 'OF', 'THIS', 'DECORATION', 'THE', 'YOUNG', 'MAN', 'LOOKED', 'POOR', 'AS', 'POOR', 'AS', 'A', 'YOUNG', 'MAN', 'COULD', 'LOOK', 'WHO', 'HAD', 'SUCH', 'A', 'FINE', 'HEAD', 'AND', 'SUCH', 'MAGNIFICENT', 'EYES'] +6128-63240-0006-1977: ref=['THOSE', 'OF', 'BASIL', 'RANSOM', 'WERE', 'DARK', 'DEEP', 'AND', 'GLOWING', 'HIS', 'HEAD', 'HAD', 'A', 'CHARACTER', 'OF', 'ELEVATION', 'WHICH', 'FAIRLY', 'ADDED', 'TO', 'HIS', 'STATURE', 'IT', 'WAS', 'A', 'HEAD', 'TO', 'BE', 'SEEN', 'ABOVE', 'THE', 'LEVEL', 'OF', 'A', 'CROWD', 'ON', 'SOME', 'JUDICIAL', 'BENCH', 'OR', 'POLITICAL', 'PLATFORM', 'OR', 'EVEN', 'ON', 'A', 'BRONZE', 'MEDAL'] +6128-63240-0006-1977: hyp=['THOSE', 'OF', 'BASIL', 'RANSOM', 'WERE', 'DARK', 'DEEP', 'AND', 'GLOWING', 'HIS', 'HEAD', 'HAD', 'A', 'CHARACTER', 'OF', 'ELEVATION', 'WHICH', 'FAIRLY', 'ADDED', 'TO', 'HIS', 'STATURE', 'IT', 'WAS', 'A', 'HEAD', 'TO', 'BE', 'SEEN', 'ABOVE', 'THE', 'LEVEL', 'OF', 'THE', 'CROWD', 'ON', 'SOME', 'JUDICIAL', 'BENCH', 'OR', 'POLITICAL', 'PLATFORM', 'OR', 'EVEN', 'ON', 'A', 'BRONZE', 'MEDAL'] +6128-63240-0007-1978: ref=['THESE', 'THINGS', 'THE', 'EYES', 'ESPECIALLY', 'WITH', 'THEIR', 'SMOULDERING', 'FIRE', 'MIGHT', 'HAVE', 'INDICATED', 'THAT', 'HE', 'WAS', 'TO', 'BE', 'A', 'GREAT', 'AMERICAN', 'STATESMAN', 'OR', 'ON', 'THE', 'OTHER', 'HAND', 'THEY', 'MIGHT', 'SIMPLY', 'HAVE', 'PROVED', 'THAT', 'HE', 'CAME', 'FROM', 'CAROLINA', 'OR', 'ALABAMA'] +6128-63240-0007-1978: hyp=['THESE', 'THINGS', 'THE', 'EYES', 'ESPECIALLY', 'WITH', 'THEIR', 'SMOULDERING', 'FIRE', 'MIGHT', 'HAVE', 'INDICATED', 'THAT', 'HE', 'WAS', 'TO', 'BE', 'A', 'GREAT', 'AMERICAN', 'STATESMAN', 'OR', 'ON', 'THE', 'OTHER', 'HAND', 'THEY', 'MIGHT', 'SIMPLY', 'HAVE', 'PROVED', 'THAT', 'HE', 'CAME', 'FROM', 'CAROLINA', 'OR', 'ALABAMA'] +6128-63240-0008-1979: ref=['AND', 'YET', 'THE', 'READER', 'WHO', 'LIKES', 'A', 'COMPLETE', 'IMAGE', 'WHO', 'DESIRES', 'TO', 'READ', 'WITH', 'THE', 'SENSES', 'AS', 'WELL', 'AS', 'WITH', 'THE', 'REASON', 'IS', 'ENTREATED', 'NOT', 'TO', 'FORGET', 'THAT', 'HE', 'PROLONGED', 'HIS', 'CONSONANTS', 'AND', 'SWALLOWED', 'HIS', 'VOWELS', 'THAT', 'HE', 'WAS', 'GUILTY', 'OF', 'ELISIONS', 'AND', 'INTERPOLATIONS', 'WHICH', 'WERE', 'EQUALLY', 'UNEXPECTED', 'AND', 'THAT', 'HIS', 'DISCOURSE', 'WAS', 'PERVADED', 'BY', 'SOMETHING', 'SULTRY', 'AND', 'VAST', 'SOMETHING', 'ALMOST', 'AFRICAN', 'IN', 'ITS', 'RICH', 'BASKING', 'TONE', 'SOMETHING', 'THAT', 'SUGGESTED', 'THE', 'TEEMING', 'EXPANSE', 'OF', 'THE', 'COTTON', 'FIELD'] +6128-63240-0008-1979: hyp=['AND', 'YET', 'THE', 'READER', 'WHO', 'LIKES', 'A', 'COMPLETE', 'IMAGE', 'WHO', 'DESIRES', 'TO', 'READ', 'WITH', 'THE', 'SENSES', 'AS', 'WELL', 'AS', 'WITH', 'THE', 'REASON', 'IS', 'ENTREATED', 'NOT', 'TO', 'FORGET', 'THAT', 'HE', 'PROLONGED', 'HIS', 'CONSONANCE', 'AND', 'SWALLOWED', 'HIS', 'VOWALS', 'THAT', 'HE', 'WAS', 'GUILTY', 'OF', 'ALLEGIANCE', 'AND', 'INTERPOLATIONS', 'WHICH', 'WERE', 'EQUALLY', 'UNEXPECTED', 'AND', 'THAT', 'HIS', 'DISCOURSE', 'WAS', 'PERVADED', 'BY', 'SOMETHING', 'SULTRY', 'AND', 'VAST', 'SOMETHING', 'ALMOST', 'AFRICAN', 'IN', 'ITS', 'RICH', 'BASKING', 'TONE', 'SOMETHING', 'THAT', 'SUGGESTED', 'THE', 'TEEMING', 'EXPANSE', 'OF', 'THE', 'COTTONFIELD'] +6128-63240-0009-1980: ref=['AND', 'HE', 'TOOK', 'UP', 'HIS', 'HAT', 'VAGUELY', 'A', 'SOFT', 'BLACK', 'HAT', 'WITH', 'A', 'LOW', 'CROWN', 'AND', 'AN', 'IMMENSE', 'STRAIGHT', 'BRIM'] +6128-63240-0009-1980: hyp=['AND', 'HE', 'TOOK', 'UP', 'HIS', 'HAT', 'VAGUELY', 'A', 'SOFT', 'BLACK', 'HAT', 'WITH', 'A', 'LOW', 'CROWN', 'AND', 'AN', 'IMMENSE', 'STRAIGHT', 'BRIM'] +6128-63240-0010-1981: ref=['WELL', 'SO', 'IT', 'IS', 'THEY', 'ARE', 'ALL', 'WITCHES', 'AND', 'WIZARDS', 'MEDIUMS', 'AND', 'SPIRIT', 'RAPPERS', 'AND', 'ROARING', 'RADICALS'] +6128-63240-0010-1981: hyp=['WELL', 'SO', 'IT', 'IS', 'THEY', 'ARE', 'ALL', 'WITCHES', 'AND', 'WIZARDS', 'MEDIUMS', 'AND', 'SPIRIT', 'RAPPERS', 'AND', 'ROARING', 'RADICALS'] +6128-63240-0011-1982: ref=['IF', 'YOU', 'ARE', 'GOING', 'TO', 'DINE', 'WITH', 'HER', 'YOU', 'HAD', 'BETTER', 'KNOW', 'IT', 'OH', 'MURDER'] +6128-63240-0011-1982: hyp=['IF', 'YOU', 'ARE', 'GOING', 'TO', 'DINE', 'WITH', 'HER', 'YOU', 'HAD', 'BETTER', 'KNOW', 'IT', 'OH', 'MURDER'] +6128-63240-0012-1983: ref=['HE', 'LOOKED', 'AT', 'MISSUS', 'LUNA', 'WITH', 'INTELLIGENT', 'INCREDULITY'] +6128-63240-0012-1983: hyp=['HE', 'LOOKED', 'AT', 'MISSUS', 'LUNA', 'WITH', 'INTELLIGENT', 'INCREDULITY'] +6128-63240-0013-1984: ref=['SHE', 'WAS', 'ATTRACTIVE', 'AND', 'IMPERTINENT', 'ESPECIALLY', 'THE', 'LATTER'] +6128-63240-0013-1984: hyp=['SHE', 'WAS', 'ATTRACTIVE', 'AND', 'IMPERTINENT', 'ESPECIALLY', 'THE', 'LATTER'] +6128-63240-0014-1985: ref=['HAVE', 'YOU', 'BEEN', 'IN', 'EUROPE'] +6128-63240-0014-1985: hyp=['HAVE', 'YOU', 'BEEN', 'IN', 'EUROPE'] +6128-63240-0015-1986: ref=['NO', 'I', "HAVEN'T", 'BEEN', 'ANYWHERE'] +6128-63240-0015-1986: hyp=['NO', "HAVEN'T", 'BEEN', 'ANYWHERE'] +6128-63240-0016-1987: ref=['SHE', 'HATES', 'IT', 'SHE', 'WOULD', 'LIKE', 'TO', 'ABOLISH', 'IT'] +6128-63240-0016-1987: hyp=['SHE', 'HATES', 'IT', 'SHE', 'WOULD', 'LIKE', 'TO', 'ABOLISH', 'IT'] +6128-63240-0017-1988: ref=['THIS', 'LAST', 'REMARK', 'HE', 'MADE', 'AT', 'A', 'VENTURE', 'FOR', 'HE', 'HAD', 'NATURALLY', 'NOT', 'DEVOTED', 'ANY', 'SUPPOSITION', 'WHATEVER', 'TO', 'MISSUS', 'LUNA'] +6128-63240-0017-1988: hyp=['THIS', 'LAST', 'REMARK', 'HE', 'MADE', 'AT', 'HER', 'VENTURE', 'FOR', 'HE', 'HAD', 'NATURALLY', 'NOT', 'DEVOTED', 'ANY', 'SUPPOSITION', 'WHATEVER', 'TO', 'MISSUS', 'LOINER'] +6128-63240-0018-1989: ref=['ARE', 'YOU', 'VERY', 'AMBITIOUS', 'YOU', 'LOOK', 'AS', 'IF', 'YOU', 'WERE'] +6128-63240-0018-1989: hyp=['ARE', 'YOU', 'VERY', 'AMBITIOUS', 'YOU', 'LOOK', 'AS', 'IF', 'YOU', 'WERE'] +6128-63240-0019-1990: ref=['AND', 'MISSUS', 'LUNA', 'ADDED', 'THAT', 'NOW', 'SHE', 'WAS', 'BACK', 'SHE', "DIDN'T", 'KNOW', 'WHAT', 'SHE', 'SHOULD', 'DO'] +6128-63240-0019-1990: hyp=['AND', 'MISSUS', 'LENA', 'ADDED', 'THAT', 'NOW', 'SHE', 'WAS', 'BACK', 'SHE', "DIDN'T", 'KNOW', 'WHAT', 'SHE', 'SHOULD', 'DO'] +6128-63240-0020-1991: ref=['ONE', "DIDN'T", 'EVEN', 'KNOW', 'WHAT', 'ONE', 'HAD', 'COME', 'BACK', 'FOR'] +6128-63240-0020-1991: hyp=['ONE', "DIDN'T", 'EVEN', 'KNOW', 'WHAT', 'ONE', 'HAD', 'COME', 'BACK', 'FOR'] +6128-63240-0021-1992: ref=['BESIDES', 'OLIVE', "DIDN'T", 'WANT', 'HER', 'IN', 'BOSTON', 'AND', "DIDN'T", 'GO', 'THROUGH', 'THE', 'FORM', 'OF', 'SAYING', 'SO'] +6128-63240-0021-1992: hyp=['BESIDES', 'OLIVE', "DIDN'T", 'WANT', 'HER', 'IN', 'BOSTON', 'AND', "DIDN'T", 'GO', 'THROUGH', 'THE', 'FORM', 'OF', 'SAYING', 'SO'] +6128-63240-0022-1993: ref=['THAT', 'WAS', 'ONE', 'COMFORT', 'WITH', 'OLIVE', 'SHE', 'NEVER', 'WENT', 'THROUGH', 'ANY', 'FORMS'] +6128-63240-0022-1993: hyp=['THAT', 'WAS', 'ONE', 'COMFORT', 'WITH', 'ALIVE', 'SHE', 'NEVER', 'WENT', 'THROUGH', 'ANY', 'FORMS'] +6128-63240-0023-1994: ref=['SHE', 'STOOD', 'THERE', 'LOOKING', 'CONSCIOUSLY', 'AND', 'RATHER', 'SERIOUSLY', 'AT', 'MISTER', 'RANSOM', 'A', 'SMILE', 'OF', 'EXCEEDING', 'FAINTNESS', 'PLAYED', 'ABOUT', 'HER', 'LIPS', 'IT', 'WAS', 'JUST', 'PERCEPTIBLE', 'ENOUGH', 'TO', 'LIGHT', 'UP', 'THE', 'NATIVE', 'GRAVITY', 'OF', 'HER', 'FACE'] +6128-63240-0023-1994: hyp=['SHE', 'STOOD', 'THERE', 'LOOKING', 'CONSCIOUSLY', 'AND', 'RATHER', 'SERIOUSLY', 'AT', 'MISTER', 'RAMSON', 'A', 'SMILE', 'OF', 'EXCEEDING', 'FAINTNESS', 'PLAYED', 'ABOUT', 'HER', 'LIPS', 'IT', 'WAS', 'JUST', 'PERCEPTIBLE', 'ENOUGH', 'TO', 'LIGHT', 'UP', 'THE', 'NATIVE', 'GRAVITY', 'OF', 'HER', 'FACE'] +6128-63240-0024-1995: ref=['HER', 'VOICE', 'WAS', 'LOW', 'AND', 'AGREEABLE', 'A', 'CULTIVATED', 'VOICE', 'AND', 'SHE', 'EXTENDED', 'A', 'SLENDER', 'WHITE', 'HAND', 'TO', 'HER', 'VISITOR', 'WHO', 'REMARKED', 'WITH', 'SOME', 'SOLEMNITY', 'HE', 'FELT', 'A', 'CERTAIN', 'GUILT', 'OF', 'PARTICIPATION', 'IN', 'MISSUS', "LUNA'S", 'INDISCRETION', 'THAT', 'HE', 'WAS', 'INTENSELY', 'HAPPY', 'TO', 'MAKE', 'HER', 'ACQUAINTANCE'] +6128-63240-0024-1995: hyp=['HER', 'VOICE', 'WAS', 'LOW', 'AN', 'AGREEABLE', 'A', 'CULTIVATED', 'VOICE', 'AND', 'SHE', 'EXTENDED', 'A', 'SLENDER', 'WHITE', 'HAND', 'TO', 'HER', 'VISITOR', 'WHO', 'REMARKED', 'WITH', 'SOME', 'SOLEMNITY', 'HE', 'FELT', 'A', 'CERTAIN', 'GUILT', 'OF', 'PARTICIPATION', 'IN', 'MISSUS', "LUNER'S", 'INDISCRETION', 'THAT', 'HE', 'WAS', 'INTENSELY', 'HAPPY', 'TO', 'MAKE', 'HER', 'ACQUAINTANCE'] +6128-63240-0025-1996: ref=['HE', 'OBSERVED', 'THAT', 'MISS', "CHANCELLOR'S", 'HAND', 'WAS', 'AT', 'ONCE', 'COLD', 'AND', 'LIMP', 'SHE', 'MERELY', 'PLACED', 'IT', 'IN', 'HIS', 'WITHOUT', 'EXERTING', 'THE', 'SMALLEST', 'PRESSURE'] +6128-63240-0025-1996: hyp=['HE', 'OBSERVED', 'THAT', 'MISS', "CHANCELLOR'S", 'HAND', 'WAS', 'AT', 'ONCE', 'COLD', 'AND', 'LIMP', 'SHE', 'MERELY', 'PLACED', 'IT', 'IN', 'HIS', 'WITHOUT', 'EXERTING', 'THE', 'SMALLEST', 'PRESSURE'] +6128-63240-0026-1997: ref=['I', 'SHALL', 'BE', 'BACK', 'VERY', 'LATE', 'WE', 'ARE', 'GOING', 'TO', 'A', 'THEATRE', 'PARTY', "THAT'S", 'WHY', 'WE', 'DINE', 'SO', 'EARLY'] +6128-63240-0026-1997: hyp=['I', 'SHALL', 'BE', 'BACK', 'VERY', 'LATE', "WE'RE", 'GOING', 'TO', 'A', 'THEATRE', 'PARTY', "THAT'S", 'WHY', 'WE', 'DINE', 'SO', 'EARLY'] +6128-63240-0027-1998: ref=['MISSUS', "LUNA'S", 'FAMILIARITY', 'EXTENDED', 'EVEN', 'TO', 'HER', 'SISTER', 'SHE', 'REMARKED', 'TO', 'MISS', 'CHANCELLOR', 'THAT', 'SHE', 'LOOKED', 'AS', 'IF', 'SHE', 'WERE', 'GOT', 'UP', 'FOR', 'A', 'SEA', 'VOYAGE'] +6128-63240-0027-1998: hyp=['MISSUS', "LUNNY'S", 'FAMILIARITY', 'EXTENDED', 'EVEN', 'TO', 'HER', 'SISTER', 'SHE', 'REMARKED', 'TO', 'MISS', 'CHANCELLOR', 'THAT', 'SHE', 'LOOKED', 'AS', 'IF', 'SHE', 'WERE', 'GOT', 'UP', 'FOR', 'A', 'SEA', 'VOYAGE'] +6128-63241-0000-1999: ref=['POOR', 'RANSOM', 'ANNOUNCED', 'THIS', 'FACT', 'TO', 'HIMSELF', 'AS', 'IF', 'HE', 'HAD', 'MADE', 'A', 'GREAT', 'DISCOVERY', 'BUT', 'IN', 'REALITY', 'HE', 'HAD', 'NEVER', 'BEEN', 'SO', 'BOEOTIAN', 'AS', 'AT', 'THAT', 'MOMENT'] +6128-63241-0000-1999: hyp=['POOR', 'RANSOM', 'ANNOUNCED', 'THIS', 'FACT', 'TO', 'HIMSELF', 'AS', 'IF', 'HE', 'HAD', 'MADE', 'A', 'GREAT', 'DISCOVERY', 'BUT', 'IN', 'REALITY', 'HE', 'HAD', 'NEVER', 'BEEN', 'SO', 'BEE', 'OTIAN', 'AS', 'AT', 'THAT', 'MOMENT'] +6128-63241-0001-2000: ref=['THE', 'WOMEN', 'HE', 'HAD', 'HITHERTO', 'KNOWN', 'HAD', 'BEEN', 'MAINLY', 'OF', 'HIS', 'OWN', 'SOFT', 'CLIME', 'AND', 'IT', 'WAS', 'NOT', 'OFTEN', 'THEY', 'EXHIBITED', 'THE', 'TENDENCY', 'HE', 'DETECTED', 'AND', 'CURSORILY', 'DEPLORED', 'IN', 'MISSUS', "LUNA'S", 'SISTER'] +6128-63241-0001-2000: hyp=['THE', 'WOMEN', 'HE', 'HAD', 'HITHERTO', 'KNOWN', 'HAD', 'BEEN', 'MAINLY', 'OF', 'HIS', 'OWN', 'SOFT', 'CLIME', 'AND', 'IT', 'WAS', 'NOT', 'OFTEN', 'THEY', 'EXHIBITED', 'THE', 'TENDENCY', 'HE', 'DETECTED', 'AND', 'CURSORILY', 'DEPLORED', 'IN', 'MISSUS', "LUNNY'S", 'SISTER'] +6128-63241-0002-2001: ref=['RANSOM', 'WAS', 'PLEASED', 'WITH', 'THE', 'VISION', 'OF', 'THAT', 'REMEDY', 'IT', 'MUST', 'BE', 'REPEATED', 'THAT', 'HE', 'WAS', 'VERY', 'PROVINCIAL'] +6128-63241-0002-2001: hyp=['RUNSEN', 'WAS', 'PLEASED', 'WITH', 'THE', 'VISION', 'OF', 'THAT', 'REMEDY', 'IT', 'MUST', 'BE', 'REPEATED', 'THAT', 'HE', 'WAS', 'VERY', 'PROVINCIAL'] +6128-63241-0003-2002: ref=['HE', 'WAS', 'SORRY', 'FOR', 'HER', 'BUT', 'HE', 'SAW', 'IN', 'A', 'FLASH', 'THAT', 'NO', 'ONE', 'COULD', 'HELP', 'HER', 'THAT', 'WAS', 'WHAT', 'MADE', 'HER', 'TRAGIC'] +6128-63241-0003-2002: hyp=['HE', 'WAS', 'SORRY', 'FOR', 'HER', 'BUT', 'HE', 'SAW', 'IN', 'A', 'FLASH', 'THAT', 'NO', 'ONE', 'COULD', 'HELP', 'HER', 'THAT', 'WAS', 'WHAT', 'MADE', 'HER', 'TRAGIC'] +6128-63241-0004-2003: ref=['SHE', 'COULD', 'NOT', 'DEFEND', 'HERSELF', 'AGAINST', 'A', 'RICH', 'ADMIRATION', 'A', 'KIND', 'OF', 'TENDERNESS', 'OF', 'ENVY', 'OF', 'ANY', 'ONE', 'WHO', 'HAD', 'BEEN', 'SO', 'HAPPY', 'AS', 'TO', 'HAVE', 'THAT', 'OPPORTUNITY'] +6128-63241-0004-2003: hyp=['SHE', 'COULD', 'NOT', 'DEFEND', 'HERSELF', 'AGAINST', 'THE', 'RICH', 'ADMIRATION', 'A', 'KIND', 'OF', 'TENDERNESS', 'OF', 'ENVY', 'OF', 'ANY', 'ONE', 'WHO', 'HAD', 'BEEN', 'SO', 'HAPPY', 'AS', 'TO', 'HAVE', 'THAT', 'OPPORTUNITY'] +6128-63241-0005-2004: ref=['HIS', 'FAMILY', 'WAS', 'RUINED', 'THEY', 'HAD', 'LOST', 'THEIR', 'SLAVES', 'THEIR', 'PROPERTY', 'THEIR', 'FRIENDS', 'AND', 'RELATIONS', 'THEIR', 'HOME', 'HAD', 'TASTED', 'OF', 'ALL', 'THE', 'CRUELTY', 'OF', 'DEFEAT'] +6128-63241-0005-2004: hyp=['HIS', 'FAMILY', 'WAS', 'RUINED', 'THEY', 'HAD', 'LOST', 'THEIR', 'SLAVES', 'THEIR', 'PROPERTY', 'THEIR', 'FRIENDS', 'AND', 'RELATIONS', 'THEIR', 'HOME', 'HAD', 'TASTED', 'OF', 'ALL', 'THE', 'CRUELTY', 'OF', 'DEFEAT'] +6128-63241-0006-2005: ref=['THE', 'STATE', 'OF', 'MISSISSIPPI', 'SEEMED', 'TO', 'HIM', 'THE', 'STATE', 'OF', 'DESPAIR', 'SO', 'HE', 'SURRENDERED', 'THE', 'REMNANTS', 'OF', 'HIS', 'PATRIMONY', 'TO', 'HIS', 'MOTHER', 'AND', 'SISTERS', 'AND', 'AT', 'NEARLY', 'THIRTY', 'YEARS', 'OF', 'AGE', 'ALIGHTED', 'FOR', 'THE', 'FIRST', 'TIME', 'IN', 'NEW', 'YORK', 'IN', 'THE', 'COSTUME', 'OF', 'HIS', 'PROVINCE', 'WITH', 'FIFTY', 'DOLLARS', 'IN', 'HIS', 'POCKET', 'AND', 'A', 'GNAWING', 'HUNGER', 'IN', 'HIS', 'HEART'] +6128-63241-0006-2005: hyp=['THE', 'STATE', 'OF', 'MISSISSIPPI', 'SEEMED', 'TO', 'HIM', 'THE', 'STATE', 'OF', 'DESPAIR', 'SO', 'HE', 'SURRENDERED', 'THE', 'REMNANTS', 'OF', 'HIS', 'PATRIMONY', 'TO', 'HIS', 'MOTHER', 'AND', 'SISTERS', 'AND', 'AT', 'NEARLY', 'THIRTY', 'YEARS', 'OF', 'AGE', 'ALIGHTED', 'FOR', 'THE', 'FIRST', 'TIME', 'IN', 'NEW', 'YORK', 'IN', 'THE', 'COSTUME', 'OF', 'HIS', 'PROVINCE', 'WITH', 'FIFTY', 'DOLLARS', 'IN', 'HIS', 'POCKET', 'AND', 'A', 'GNAWING', 'HUNGER', 'IN', 'HIS', 'HEART'] +6128-63241-0007-2006: ref=['IT', 'WAS', 'IN', 'THE', 'FEMALE', 'LINE', 'AS', 'BASIL', 'RANSOM', 'HAD', 'WRITTEN', 'IN', 'ANSWERING', 'HER', 'LETTER', 'WITH', 'A', 'GOOD', 'DEAL', 'OF', 'FORM', 'AND', 'FLOURISH', 'HE', 'SPOKE', 'AS', 'IF', 'THEY', 'HAD', 'BEEN', 'ROYAL', 'HOUSES'] +6128-63241-0007-2006: hyp=['IT', 'WAS', 'IN', 'THE', 'FEMALE', 'LINE', 'AS', 'BEILS', 'AND', 'RANSOM', 'HAD', 'WRITTEN', 'IN', 'ANSWERING', 'HER', 'LETTER', 'WITH', 'A', 'GOOD', 'DEAL', 'OF', 'FORM', 'AND', 'FLOURISH', 'HE', 'SPOKE', 'AS', 'IF', 'THEY', 'HAD', 'BEEN', 'ROYAL', 'HOUSES'] +6128-63241-0008-2007: ref=['IF', 'IT', 'HAD', 'BEEN', 'POSSIBLE', 'TO', 'SEND', 'MISSUS', 'RANSOM', 'MONEY', 'OR', 'EVEN', 'CLOTHES', 'SHE', 'WOULD', 'HAVE', 'LIKED', 'THAT', 'BUT', 'SHE', 'HAD', 'NO', 'MEANS', 'OF', 'ASCERTAINING', 'HOW', 'SUCH', 'AN', 'OFFERING', 'WOULD', 'BE', 'TAKEN'] +6128-63241-0008-2007: hyp=['IF', 'IT', 'HAD', 'BEEN', 'POSSIBLE', 'TO', 'SEND', 'MISSUS', 'RANSOM', 'MONEY', 'OR', 'EVEN', 'CLOTHES', 'SHE', 'WOULD', 'HAVE', 'LIKED', 'THAT', 'BUT', 'SHE', 'HAD', 'NO', 'MEANS', 'OF', 'ASCERTAINING', 'HOW', 'SUCH', 'AN', 'OFFERING', 'WOULD', 'BE', 'TAKEN'] +6128-63241-0009-2008: ref=['OLIVE', 'HAD', 'A', 'FEAR', 'OF', 'EVERYTHING', 'BUT', 'HER', 'GREATEST', 'FEAR', 'WAS', 'OF', 'BEING', 'AFRAID'] +6128-63241-0009-2008: hyp=['OLIVE', 'HAD', 'A', 'FEAR', 'OF', 'EVERYTHING', 'BUT', 'HER', 'GREATEST', 'FEAR', 'WAS', 'OF', 'BEING', 'AFRAID'] +6128-63241-0010-2009: ref=['SHE', 'HAD', 'ERECTED', 'IT', 'INTO', 'A', 'SORT', 'OF', 'RULE', 'OF', 'CONDUCT', 'THAT', 'WHENEVER', 'SHE', 'SAW', 'A', 'RISK', 'SHE', 'WAS', 'TO', 'TAKE', 'IT', 'AND', 'SHE', 'HAD', 'FREQUENT', 'HUMILIATIONS', 'AT', 'FINDING', 'HERSELF', 'SAFE', 'AFTER', 'ALL'] +6128-63241-0010-2009: hyp=['SHE', 'HAD', 'ERECTED', 'IT', 'INTO', 'A', 'SORT', 'OF', 'RULE', 'OF', 'CONDUCT', 'THAT', 'WHENEVER', 'SHE', 'SAW', 'A', 'RISK', 'SHE', 'WAS', 'TO', 'TAKE', 'IT', 'AND', 'SHE', 'HAD', 'FREQUENT', 'HUMILIATIONS', 'AT', 'FINDING', 'HERSELF', 'SAVED', 'AFTER', 'ALL'] +6128-63241-0011-2010: ref=['SHE', 'WAS', 'PERFECTLY', 'SAFE', 'AFTER', 'WRITING', 'TO', 'BASIL', 'RANSOM', 'AND', 'INDEED', 'IT', 'WAS', 'DIFFICULT', 'TO', 'SEE', 'WHAT', 'HE', 'COULD', 'HAVE', 'DONE', 'TO', 'HER', 'EXCEPT', 'THANK', 'HER', 'HE', 'WAS', 'ONLY', 'EXCEPTIONALLY', 'SUPERLATIVE', 'FOR', 'HER', 'LETTER', 'AND', 'ASSURE', 'HER', 'THAT', 'HE', 'WOULD', 'COME', 'AND', 'SEE', 'HER', 'THE', 'FIRST', 'TIME', 'HIS', 'BUSINESS', 'HE', 'WAS', 'BEGINNING', 'TO', 'GET', 'A', 'LITTLE', 'SHOULD', 'TAKE', 'HIM', 'TO', 'BOSTON'] +6128-63241-0011-2010: hyp=['SHE', 'WAS', 'PERFECTLY', 'SAFE', 'AFTER', 'WRITING', 'TO', 'BAISIL', 'RANSOM', 'AND', 'INDEED', 'IT', 'WAS', 'DIFFICULT', 'TO', 'SEE', 'WHAT', 'HE', 'COULD', 'HAVE', 'DONE', 'TO', 'HER', 'EXCEPT', 'THANK', 'HER', 'HE', 'WAS', 'ONLY', 'EXCEPTIONALLY', 'SUPERLATIVE', 'FOR', 'HER', 'LETTER', 'AND', 'ASSURE', 'HER', 'THAT', 'HE', 'WOULD', 'COME', 'AND', 'SEE', 'HER', 'THE', 'FIRST', 'TIME', 'HIS', 'BUSINESS', 'HE', 'WAS', 'BEGINNING', 'TO', 'GET', 'A', 'LITTLE', 'SHOULD', 'TAKE', 'HIM', 'TO', 'BOSTON'] +6128-63241-0012-2011: ref=['HE', 'WAS', 'TOO', 'SIMPLE', 'TOO', 'MISSISSIPPIAN', 'FOR', 'THAT', 'SHE', 'WAS', 'ALMOST', 'DISAPPOINTED'] +6128-63241-0012-2011: hyp=['HE', 'WAS', 'TOO', 'SIMPLE', 'TOO', 'MISSISSIPPIENT', 'FOR', 'THAT', 'SHE', 'WAS', 'ALMOST', 'DISAPPOINTED'] +6128-63241-0013-2012: ref=['OF', 'ALL', 'THINGS', 'IN', 'THE', 'WORLD', 'CONTENTION', 'WAS', 'MOST', 'SWEET', 'TO', 'HER', 'THOUGH', 'WHY', 'IT', 'IS', 'HARD', 'TO', 'IMAGINE', 'FOR', 'IT', 'ALWAYS', 'COST', 'HER', 'TEARS', 'HEADACHES', 'A', 'DAY', 'OR', 'TWO', 'IN', 'BED', 'ACUTE', 'EMOTION', 'AND', 'IT', 'WAS', 'VERY', 'POSSIBLE', 'BASIL', 'RANSOM', 'WOULD', 'NOT', 'CARE', 'TO', 'CONTEND'] +6128-63241-0013-2012: hyp=['OF', 'ALL', 'THINGS', 'IN', 'THE', 'WORLD', 'CONTENTION', 'WAS', 'MOST', 'SWEET', 'TO', 'HER', 'THOUGH', 'WHY', 'IT', 'IS', 'HARD', 'TO', 'IMAGINE', 'FOR', 'IT', 'ALWAYS', 'COST', 'HER', 'TEARS', 'HEADACHES', 'A', 'DAY', 'OR', 'TWO', 'IN', 'BED', 'ACUTE', 'EMOTION', 'AND', 'IT', 'WAS', 'VERY', 'POSSIBLE', 'BASIL', 'RANSOM', 'WOULD', 'NOT', 'CARE', 'TO', 'CONTEND'] +6128-63244-0000-2013: ref=['MISS', 'CHANCELLOR', 'HERSELF', 'HAD', 'THOUGHT', 'SO', 'MUCH', 'ON', 'THE', 'VITAL', 'SUBJECT', 'WOULD', 'NOT', 'SHE', 'MAKE', 'A', 'FEW', 'REMARKS', 'AND', 'GIVE', 'THEM', 'SOME', 'OF', 'HER', 'EXPERIENCES'] +6128-63244-0000-2013: hyp=['MISS', 'CHANCELLOR', 'HERSELF', 'HAD', 'THOUGHT', 'SO', 'MUCH', 'ON', 'THE', 'VITAL', 'SUBJECT', 'WOULD', 'NOT', 'SHE', 'MAKE', 'A', 'FEW', 'REMARKS', 'AND', 'GIVE', 'THEM', 'SOME', 'OF', 'HER', 'EXPERIENCES'] +6128-63244-0001-2014: ref=['HOW', 'DID', 'THE', 'LADIES', 'ON', 'BEACON', 'STREET', 'FEEL', 'ABOUT', 'THE', 'BALLOT'] +6128-63244-0001-2014: hyp=['HOW', 'DID', 'THE', 'LADIES', 'ON', 'BEACON', 'STREET', 'FEEL', 'ABOUT', 'THE', 'BANNET'] +6128-63244-0002-2015: ref=['PERHAPS', 'SHE', 'COULD', 'SPEAK', 'FOR', 'THEM', 'MORE', 'THAN', 'FOR', 'SOME', 'OTHERS'] +6128-63244-0002-2015: hyp=['PERHAPS', 'SHE', 'COULD', 'SPEAK', 'FOR', 'THEM', 'MORE', 'THAN', 'FOR', 'SOME', 'OTHERS'] +6128-63244-0003-2016: ref=['WITH', 'HER', 'IMMENSE', 'SYMPATHY', 'FOR', 'REFORM', 'SHE', 'FOUND', 'HERSELF', 'SO', 'OFTEN', 'WISHING', 'THAT', 'REFORMERS', 'WERE', 'A', 'LITTLE', 'DIFFERENT'] +6128-63244-0003-2016: hyp=['WITH', 'HER', 'IMMENSE', 'SYMPATHY', 'FOR', 'REFORM', 'SHE', 'FOUND', 'HERSELF', 'SO', 'OFTEN', 'WISHING', 'THAT', 'REFORMERS', 'WERE', 'A', 'LITTLE', 'DIFFERENT'] +6128-63244-0004-2017: ref=['OLIVE', 'HATED', 'TO', 'HEAR', 'THAT', 'FINE', 'AVENUE', 'TALKED', 'ABOUT', 'AS', 'IF', 'IT', 'WERE', 'SUCH', 'A', 'REMARKABLE', 'PLACE', 'AND', 'TO', 'LIVE', 'THERE', 'WERE', 'A', 'PROOF', 'OF', 'WORLDLY', 'GLORY'] +6128-63244-0004-2017: hyp=['OLIVE', 'HATED', 'TO', 'HEAR', 'THAT', 'FINE', 'AVENUE', 'TALKED', 'ABOUT', 'AS', 'IF', 'IT', 'WERE', 'SUCH', 'A', 'REMARKABLE', 'PLACE', 'AND', 'TO', 'LIVE', 'THERE', 'WERE', 'A', 'PROOF', 'OF', 'WORLDLY', 'GLORY'] +6128-63244-0005-2018: ref=['ALL', 'SORTS', 'OF', 'INFERIOR', 'PEOPLE', 'LIVED', 'THERE', 'AND', 'SO', 'BRILLIANT', 'A', 'WOMAN', 'AS', 'MISSUS', 'FARRINDER', 'WHO', 'LIVED', 'AT', 'ROXBURY', 'OUGHT', 'NOT', 'TO', 'MIX', 'THINGS', 'UP'] +6128-63244-0005-2018: hyp=['ALL', 'SORTS', 'OF', 'INFERIOR', 'PEOPLE', 'LIVED', 'THERE', 'AND', 'SO', 'BRILLIANT', 'A', 'WOMAN', 'AS', 'MISSUS', 'FARINNDER', 'WHO', 'LIVED', 'AT', 'BRAXBURY', 'OUGHT', 'NOT', 'TO', 'MIX', 'THINGS', 'UP'] +6128-63244-0006-2019: ref=['SHE', 'KNEW', 'HER', 'PLACE', 'IN', 'THE', 'BOSTON', 'HIERARCHY', 'AND', 'IT', 'WAS', 'NOT', 'WHAT', 'MISSUS', 'FARRINDER', 'SUPPOSED', 'SO', 'THAT', 'THERE', 'WAS', 'A', 'WANT', 'OF', 'PERSPECTIVE', 'IN', 'TALKING', 'TO', 'HER', 'AS', 'IF', 'SHE', 'HAD', 'BEEN', 'A', 'REPRESENTATIVE', 'OF', 'THE', 'ARISTOCRACY'] +6128-63244-0006-2019: hyp=['SHE', 'KNEW', 'HER', 'PLACE', 'IN', 'THE', 'BOSTON', 'HIERARCHY', 'AND', 'IT', 'WAS', 'NOT', 'WHAT', 'MISSUS', 'FARRINGERS', 'SUPPOSED', 'SO', 'THAT', 'THERE', 'WAS', 'A', 'WANT', 'OF', 'PERSPECTIVE', 'IN', 'TALKING', 'TO', 'HER', 'AS', 'IF', 'SHE', 'HAD', 'BEEN', 'A', 'REPRESENTATIVE', 'OF', 'THE', 'ARISTOCRACY'] +6128-63244-0007-2020: ref=['SHE', 'WISHED', 'TO', 'WORK', 'IN', 'ANOTHER', 'FIELD', 'SHE', 'HAD', 'LONG', 'BEEN', 'PREOCCUPIED', 'WITH', 'THE', 'ROMANCE', 'OF', 'THE', 'PEOPLE'] +6128-63244-0007-2020: hyp=['SHE', 'WISHED', 'TO', 'WORK', 'IN', 'ANOTHER', 'FIELD', 'SHE', 'HAD', 'LONG', 'BEEN', 'PREOCCUPIED', 'WITH', 'THE', 'ROMANCE', 'OF', 'THE', 'PEOPLE'] +6128-63244-0008-2021: ref=['THIS', 'MIGHT', 'SEEM', 'ONE', 'OF', 'THE', 'MOST', 'ACCESSIBLE', 'OF', 'PLEASURES', 'BUT', 'IN', 'POINT', 'OF', 'FACT', 'SHE', 'HAD', 'NOT', 'FOUND', 'IT', 'SO'] +6128-63244-0008-2021: hyp=['THIS', 'MIGHT', 'SEEM', 'ONE', 'OF', 'THE', 'MOST', 'ACCESSIBLE', 'OF', 'PLEASURES', 'BUT', 'IN', 'POINT', 'OF', 'FACT', 'SHE', 'HAD', 'NOT', 'FOUND', 'IT', 'SO'] +6128-63244-0009-2022: ref=['CHARLIE', 'WAS', 'A', 'YOUNG', 'MAN', 'IN', 'A', 'WHITE', 'OVERCOAT', 'AND', 'A', 'PAPER', 'COLLAR', 'IT', 'WAS', 'FOR', 'HIM', 'IN', 'THE', 'LAST', 'ANALYSIS', 'THAT', 'THEY', 'CARED', 'MUCH', 'THE', 'MOST'] +6128-63244-0009-2022: hyp=['CHARLEY', 'WAS', 'A', 'YOUNG', 'MAN', 'IN', 'A', 'WHITE', 'OVERCOAT', 'AND', 'A', 'PAPER', 'COLLAR', 'IT', 'WAS', 'FOR', 'HIM', 'IN', 'THE', 'LAST', 'ANALYSIS', 'THAT', 'THEY', 'CARED', 'MUCH', 'THE', 'MOST'] +6128-63244-0010-2023: ref=['OLIVE', 'CHANCELLOR', 'WONDERED', 'HOW', 'MISSUS', 'FARRINDER', 'WOULD', 'TREAT', 'THAT', 'BRANCH', 'OF', 'THE', 'QUESTION'] +6128-63244-0010-2023: hyp=['OLIVE', 'CHANCELLOR', 'WONDERED', 'HOW', 'MISSUS', 'VERNDER', 'WOULD', 'TREAT', 'THAT', 'BRANCH', 'OF', 'THE', 'QUESTION'] +6128-63244-0011-2024: ref=['IF', 'IT', 'BE', 'NECESSARY', 'WE', 'ARE', 'PREPARED', 'TO', 'TAKE', 'CERTAIN', 'STEPS', 'TO', 'CONCILIATE', 'THE', 'SHRINKING'] +6128-63244-0011-2024: hyp=['IF', 'IT', 'BE', 'NECESSARY', 'WE', 'ARE', 'PREPARED', 'TO', 'TAKE', 'CERTAIN', 'STEPS', 'TO', 'CONCILIATE', 'THE', 'SHRINKING'] +6128-63244-0012-2025: ref=['OUR', 'MOVEMENT', 'IS', 'FOR', 'ALL', 'IT', 'APPEALS', 'TO', 'THE', 'MOST', 'DELICATE', 'LADIES'] +6128-63244-0012-2025: hyp=['OUR', 'MOVEMENT', 'IS', 'FOR', 'ALL', 'IT', 'APPEALS', 'TO', 'THE', 'MOST', 'DELICATE', 'LADIES'] +6128-63244-0013-2026: ref=['RAISE', 'THE', 'STANDARD', 'AMONG', 'THEM', 'AND', 'BRING', 'ME', 'A', 'THOUSAND', 'NAMES'] +6128-63244-0013-2026: hyp=['RAISE', 'THE', 'STANDARD', 'AMONG', 'THEM', 'AND', 'BRING', 'ME', 'A', 'THOUSAND', 'NAMES'] +6128-63244-0014-2027: ref=['I', 'LOOK', 'AFTER', 'THE', 'DETAILS', 'AS', 'WELL', 'AS', 'THE', 'BIG', 'CURRENTS', 'MISSUS', 'FARRINDER', 'ADDED', 'IN', 'A', 'TONE', 'AS', 'EXPLANATORY', 'AS', 'COULD', 'BE', 'EXPECTED', 'OF', 'SUCH', 'A', 'WOMAN', 'AND', 'WITH', 'A', 'SMILE', 'OF', 'WHICH', 'THE', 'SWEETNESS', 'WAS', 'THRILLING', 'TO', 'HER', 'LISTENER'] +6128-63244-0014-2027: hyp=['AND', 'LOOK', 'AFTER', 'THE', 'DETAILS', 'AS', 'WELL', 'AS', 'THE', 'BOOK', 'CURRANTS', 'MISSUS', 'FARLANDER', 'ADDED', 'IN', 'A', 'TONE', 'AS', 'EXPLANATORY', 'AS', 'COULD', 'BE', 'EXPECTED', 'OF', 'SUCH', 'A', 'WOMAN', 'AND', 'WITH', 'A', 'SMILE', 'OF', 'WHICH', 'THE', 'SWEETNESS', 'WAS', 'THRILLING', 'TO', 'HER', 'LISTENER'] +6128-63244-0015-2028: ref=['SAID', 'OLIVE', 'CHANCELLOR', 'WITH', 'A', 'FACE', 'WHICH', 'SEEMED', 'TO', 'PLEAD', 'FOR', 'A', 'REMISSION', 'OF', 'RESPONSIBILITY'] +6128-63244-0015-2028: hyp=['SAID', 'OLIVE', 'CHANCELLOR', 'WITH', 'A', 'FACE', 'WHICH', 'SEEMED', 'TO', 'PLEAD', 'FOR', 'A', "REMISSIONER'S", 'RESPONSIBILITY'] +6128-63244-0016-2029: ref=['I', 'WANT', 'TO', 'BE', 'NEAR', 'TO', 'THEM', 'TO', 'HELP', 'THEM'] +6128-63244-0016-2029: hyp=['I', 'WANT', 'TO', 'BE', 'NEAR', 'TO', 'THEM', 'TO', 'HELP', 'THEM'] +6128-63244-0017-2030: ref=['IT', 'WAS', 'ONE', 'THING', 'TO', 'CHOOSE', 'FOR', 'HERSELF', 'BUT', 'NOW', 'THE', 'GREAT', 'REPRESENTATIVE', 'OF', 'THE', 'ENFRANCHISEMENT', 'OF', 'THEIR', 'SEX', 'FROM', 'EVERY', 'FORM', 'OF', 'BONDAGE', 'HAD', 'CHOSEN', 'FOR', 'HER'] +6128-63244-0017-2030: hyp=['IT', 'WAS', 'ONE', 'THING', 'TO', 'CHOOSE', 'FOR', 'HERSELF', 'BUT', 'NOW', 'THE', 'GREAT', 'REPRESENTATIVE', 'OF', 'THE', 'ENFRANCHISEMENT', 'OF', 'THEIR', 'SEX', 'FROM', 'EVERY', 'FORM', 'OF', 'BONDAGE', 'HAD', 'CHOSEN', 'FOR', 'HER'] +6128-63244-0018-2031: ref=['THE', 'UNHAPPINESS', 'OF', 'WOMEN'] +6128-63244-0018-2031: hyp=['THE', 'UNHAPPINESS', 'OF', 'WOMEN'] +6128-63244-0019-2032: ref=['THEY', 'WERE', 'HER', 'SISTERS', 'THEY', 'WERE', 'HER', 'OWN', 'AND', 'THE', 'DAY', 'OF', 'THEIR', 'DELIVERY', 'HAD', 'DAWNED'] +6128-63244-0019-2032: hyp=['THEY', 'WERE', 'HER', 'SISTERS', 'THEY', 'WERE', 'HER', 'OWN', 'AND', 'THE', 'DAY', 'OF', 'THEIR', 'DELIVERY', 'HAD', 'DAWNED'] +6128-63244-0020-2033: ref=['THIS', 'WAS', 'THE', 'ONLY', 'SACRED', 'CAUSE', 'THIS', 'WAS', 'THE', 'GREAT', 'THE', 'JUST', 'REVOLUTION', 'IT', 'MUST', 'TRIUMPH', 'IT', 'MUST', 'SWEEP', 'EVERYTHING', 'BEFORE', 'IT', 'IT', 'MUST', 'EXACT', 'FROM', 'THE', 'OTHER', 'THE', 'BRUTAL', 'BLOOD', 'STAINED', 'RAVENING', 'RACE', 'THE', 'LAST', 'PARTICLE', 'OF', 'EXPIATION'] +6128-63244-0020-2033: hyp=['THIS', 'WAS', 'THE', 'ONLY', 'SACRED', 'CAUSE', 'THIS', 'WAS', 'THE', 'GREAT', 'THE', 'JUST', 'REVOLUTION', 'IT', 'MUST', 'TRIUMPH', 'IT', 'MUST', 'SWEEP', 'EVERYTHING', 'BEFORE', 'IT', 'IT', 'MUST', 'EXACT', 'FROM', 'THE', 'OTHER', 'THE', 'BRUTAL', 'BLOODSTAINED', 'RAVENING', 'RACE', 'THE', 'LAST', 'PARTICLE', 'OF', 'EXPIATION'] +6128-63244-0021-2034: ref=['THEY', 'WOULD', 'BE', 'NAMES', 'OF', 'WOMEN', 'WEAK', 'INSULTED', 'PERSECUTED', 'BUT', 'DEVOTED', 'IN', 'EVERY', 'PULSE', 'OF', 'THEIR', 'BEING', 'TO', 'THE', 'CAUSE', 'AND', 'ASKING', 'NO', 'BETTER', 'FATE', 'THAN', 'TO', 'DIE', 'FOR', 'IT'] +6128-63244-0021-2034: hyp=['THERE', 'WOULD', 'BE', 'NAMES', 'OF', 'WOMEN', 'WEAK', 'INSULTED', 'PERSECUTED', 'BUT', 'DEVOTED', 'IN', 'EVERY', 'PULSE', 'OF', 'THEIR', 'BEING', 'TO', 'THE', 'CAUSE', 'AND', 'ASKING', 'NO', 'BETTER', 'FATE', 'THAN', 'TO', 'DIE', 'FOR', 'IT'] +6128-63244-0022-2035: ref=['IT', 'WAS', 'NOT', 'CLEAR', 'TO', 'THIS', 'INTERESTING', 'GIRL', 'IN', 'WHAT', 'MANNER', 'SUCH', 'A', 'SACRIFICE', 'AS', 'THIS', 'LAST', 'WOULD', 'BE', 'REQUIRED', 'OF', 'HER', 'BUT', 'SHE', 'SAW', 'THE', 'MATTER', 'THROUGH', 'A', 'KIND', 'OF', 'SUNRISE', 'MIST', 'OF', 'EMOTION', 'WHICH', 'MADE', 'DANGER', 'AS', 'ROSY', 'AS', 'SUCCESS'] +6128-63244-0022-2035: hyp=['IT', 'WAS', 'NOT', 'CLEAR', 'TO', 'THIS', 'INTERESTING', 'GIRL', 'IN', 'WHAT', 'MANNER', 'SUCH', 'A', 'SACRIFICE', 'AS', 'THIS', 'LAST', 'WOULD', 'BE', 'REQUIRED', 'OF', 'HER', 'BUT', 'SHE', 'SAW', 'THE', 'MATTER', 'THROUGH', 'A', 'KIND', 'OF', 'SUNRISE', 'MIST', 'OF', 'EMOTION', 'WHICH', 'MADE', 'DANGER', 'AS', 'ROSY', 'IS', 'SUCCESS'] +6128-63244-0023-2036: ref=['WHEN', 'MISS', 'BIRDSEYE', 'APPROACHED', 'IT', 'TRANSFIGURED', 'HER', 'FAMILIAR', 'HER', 'COMICAL', 'SHAPE', 'AND', 'MADE', 'THE', 'POOR', 'LITTLE', 'HUMANITARY', 'HACK', 'SEEM', 'ALREADY', 'A', 'MARTYR'] +6128-63244-0023-2036: hyp=['WHEN', 'MISS', "BIRD'S", 'EYE', 'APPROACHED', 'IT', 'TRANSFIGURED', 'HER', 'FAMILIAR', 'HER', 'COMICAL', 'SHAPE', 'AND', 'MADE', 'THE', 'POOR', 'LITTLE', 'HUMANITARY', 'HACK', 'SEEM', 'ALREADY', 'A', 'MARTYR'] +6128-63244-0024-2037: ref=['OLIVE', 'CHANCELLOR', 'LOOKED', 'AT', 'HER', 'WITH', 'LOVE', 'REMEMBERED', 'THAT', 'SHE', 'HAD', 'NEVER', 'IN', 'HER', 'LONG', 'UNREWARDED', 'WEARY', 'LIFE', 'HAD', 'A', 'THOUGHT', 'OR', 'AN', 'IMPULSE', 'FOR', 'HERSELF'] +6128-63244-0024-2037: hyp=['OLIVE', 'CHANCELLOR', 'LOOKED', 'AT', 'HER', 'WITH', 'LOVE', 'REMEMBERED', 'THAT', 'SHE', 'HAD', 'NEVER', 'IN', 'HER', 'LONG', 'UNREWARDED', 'WEARY', 'LIFE', 'HAD', 'A', 'THOUGHT', 'OR', 'AN', 'IMPULSE', 'FOR', 'HERSELF'] +6128-63244-0025-2038: ref=['SHE', 'HAD', 'BEEN', 'CONSUMED', 'BY', 'THE', 'PASSION', 'OF', 'SYMPATHY', 'IT', 'HAD', 'CRUMPLED', 'HER', 'INTO', 'AS', 'MANY', 'CREASES', 'AS', 'AN', 'OLD', 'GLAZED', 'DISTENDED', 'GLOVE'] +6128-63244-0025-2038: hyp=['SHE', 'HAD', 'BEEN', 'CONSUMED', 'BY', 'THE', 'PASSION', 'OF', 'SYMPATHY', 'IT', 'HAD', 'CRUMPLED', 'HER', 'INTO', 'AS', 'MANY', 'CREASES', 'AS', 'AN', 'OLD', 'GLAZED', 'DISTENDED', 'GLOVE'] +6432-63722-0000-2039: ref=['BUT', 'SCUSE', 'ME', "DIDN'T", 'YO', 'FIGGER', 'ON', 'DOIN', 'SOME', 'DETECTIN', 'AN', 'GIVE', 'UP', 'FISHIN'] +6432-63722-0000-2039: hyp=['BUT', 'EXCUSE', 'ME', "DIDN'T", 'YOU', 'FIG', 'ON', 'DOIN', 'SOME', 'DETECTIN', 'AN', 'GIVIN', 'UP', 'FISHIN'] +6432-63722-0001-2040: ref=['AND', 'SHAG', 'WITH', 'THE', 'FREEDOM', 'OF', 'AN', 'OLD', 'SERVANT', 'STOOD', 'LOOKING', 'AT', 'HIS', 'MASTER', 'AS', 'IF', 'NOT', 'QUITE', 'UNDERSTANDING', 'THE', 'NEW', 'TWIST', 'THE', 'AFFAIRS', 'HAD', 'TAKEN'] +6432-63722-0001-2040: hyp=['AND', 'SHAGG', 'WITH', 'THE', 'FREEDOM', 'OF', 'AN', 'OLD', 'SERVANT', 'STOOD', 'LOOKING', 'AT', 'HIS', 'MASTER', 'AS', 'IF', 'NOT', 'QUITE', 'UNDERSTANDING', 'THE', 'NEW', 'TWIST', 'THE', 'AFFAIRS', 'HAD', 'TAKEN'] +6432-63722-0002-2041: ref=["I'M", 'GOING', 'OFF', 'FISHING', 'I', 'MAY', 'NOT', 'CATCH', 'ANYTHING', 'I', 'MAY', 'NOT', 'WANT', 'TO', 'AFTER', 'I', 'GET', 'THERE'] +6432-63722-0002-2041: hyp=["I'M", 'GOING', 'OFF', 'FISHING', 'I', 'MAY', 'NOT', 'CATCH', 'ANYTHING', 'AND', 'MAY', 'NOT', 'WANT', 'TO', 'AFTER', 'I', 'GET', 'THERE'] +6432-63722-0003-2042: ref=['GET', 'READY', 'SHAG', 'YES', 'SAH', 'COLONEL'] +6432-63722-0003-2042: hyp=['GET', 'READY', 'SHAD', 'YES', 'A', 'COLONEL'] +6432-63722-0004-2043: ref=['AND', 'HAVING', 'PUT', 'HIMSELF', 'IN', 'A', 'FAIR', 'WAY', 'AS', 'HE', 'HOPED', 'TO', 'SOLVE', 'SOME', 'OF', 'THE', 'PROBLEMS', 'CONNECTED', 'WITH', 'THE', 'DARCY', 'CASE', 'COLONEL', 'ASHLEY', 'WENT', 'DOWN', 'TO', 'POLICE', 'HEADQUARTERS', 'TO', 'LEARN', 'MORE', 'FACTS', 'IN', 'CONNECTION', 'WITH', 'THE', 'MURDER', 'OF', 'THE', 'EAST', 'INDIAN'] +6432-63722-0004-2043: hyp=['AND', 'HAVING', 'PUT', 'HIMSELF', 'IN', 'A', 'FAIR', 'WAY', 'AS', 'HE', 'HOPED', 'TO', 'SOLVE', 'SOME', 'OF', 'THE', 'PROBLEMS', 'CONNECTED', 'WITH', 'THE', 'DARCY', 'CASE', 'COLONEL', 'ASHLEY', 'WENT', 'DOWN', 'TO', 'POLICE', 'HEADQUARTERS', 'TO', 'LEARN', 'MORE', 'FACTS', 'IN', 'THE', 'CONNECTION', 'WITH', 'THE', 'MURDER', 'OF', 'THE', 'EAST', 'INDIAN'] +6432-63722-0005-2044: ref=['PINKUS', 'AND', 'DONOVAN', "HAVEN'T", 'THEY', 'CARROLL', 'YEP'] +6432-63722-0005-2044: hyp=['PINKUS', 'AND', 'DONOVAN', "HAVEN'T", 'THEY', 'CARROLL', 'YEP'] +6432-63722-0006-2045: ref=['CARROLL', 'WAS', 'TOO', 'MUCH', 'ENGAGED', 'IN', 'WATCHING', 'THE', 'BLUE', 'SMOKE', 'CURL', 'LAZILY', 'UPWARD', 'FROM', 'HIS', 'CIGAR', 'JUST', 'THEN', 'TO', 'SAY', 'MORE'] +6432-63722-0006-2045: hyp=['CARL', 'WAS', 'TOO', 'MUCH', 'ENGAGED', 'IN', 'WATCHING', 'THE', 'BLUE', 'SMOKE', 'CURL', 'LAZILY', 'UPWARD', 'FROM', 'HIS', 'CIGAR', 'JUST', 'THEN', 'TO', 'SAY', 'MORE'] +6432-63722-0007-2046: ref=['ARE', 'YOU', 'GOING', 'TO', 'WORK', 'ON', 'THAT', 'CASE', 'COLONEL'] +6432-63722-0007-2046: hyp=['ARE', 'YOU', 'GOING', 'TO', 'WORK', 'ON', 'THAT', 'CASE', 'COLONEL'] +6432-63722-0008-2047: ref=['BUT', 'HE', "HADN'T", 'ANY', 'MORE', 'TO', 'DO', 'WITH', 'IT', 'COLONEL', 'THAN', 'THAT', 'CAT'] +6432-63722-0008-2047: hyp=['BUT', 'HE', "HADN'T", 'ANY', 'MORE', 'TO', 'DO', 'WITH', 'IT', 'COLONEL', 'THAN', 'THAT', 'CAT'] +6432-63722-0009-2048: ref=['PERHAPS', 'NOT', 'ADMITTED', 'COLONEL', 'ASHLEY'] +6432-63722-0009-2048: hyp=['PERHAPS', 'NOT', 'ADMITTED', 'COLONEL', 'ASHLEY'] +6432-63722-0010-2049: ref=["WE'VE", 'GOT', 'OUR', 'MAN', 'AND', "THAT'S", 'ALL', 'WE', 'WANT'] +6432-63722-0010-2049: hyp=["WE'VE", 'GOT', 'OUR', 'MAN', 'AND', "THAT'S", 'ALL', 'WE', 'WANT'] +6432-63722-0011-2050: ref=["YOU'RE", 'ON', 'THE', 'DARCY', 'CASE', 'THEY', 'TELL', 'ME', 'IN', 'A', 'WAY', 'YES'] +6432-63722-0011-2050: hyp=["YOU'RE", 'ON', 'THE', 'DARCY', 'CASE', 'THEY', 'TELL', 'ME', 'IN', 'A', 'WAY', 'YES'] +6432-63722-0012-2051: ref=["I'M", 'WORKING', 'IN', 'THE', 'INTERESTS', 'OF', 'THE', 'YOUNG', 'MAN'] +6432-63722-0012-2051: hyp=["I'M", 'WORKING', 'IN', 'THE', 'INTEREST', 'OF', 'THE', 'YOUNG', 'MAN'] +6432-63722-0013-2052: ref=["IT'S", 'JUST', 'ONE', 'OF', 'THEM', 'COINCIDENCES', 'LIKE'] +6432-63722-0013-2052: hyp=["IT'S", 'JUST', 'ONE', 'OF', 'THEM', 'COINCIDENCES', 'LIKE'] +6432-63722-0014-2053: ref=['BUSTED', 'HIS', 'HEAD', 'IN', 'WITH', 'A', 'HEAVY', 'CANDLESTICK', 'ONE', 'OF', 'A', 'PAIR'] +6432-63722-0014-2053: hyp=['BUSTED', 'HIS', 'HEAD', 'IN', 'WITH', 'A', 'HEAVY', 'CANDLESTICK', 'ONE', 'OF', 'A', 'PAIR'] +6432-63722-0015-2054: ref=['GAD', 'EXCLAIMED', 'THE', 'COLONEL'] +6432-63722-0015-2054: hyp=['GAD', 'EXPLAINED', 'THE', 'COLONEL'] +6432-63722-0016-2055: ref=['THE', 'VERY', 'PAIR', 'I', 'WAS', 'GOING', 'TO', 'BUY'] +6432-63722-0016-2055: hyp=['THE', 'VERY', 'PAIR', 'I', 'WAS', 'GOING', 'TO', 'BUY'] +6432-63722-0017-2056: ref=['LOOK', 'HERE', 'COLONEL', 'DO', 'YOU', 'KNOW', 'ANYTHING', 'ABOUT', 'THIS'] +6432-63722-0017-2056: hyp=['LOOK', 'HERE', 'COLONEL', 'DO', 'YOU', 'KNOW', 'ANYTHING', 'ABOUT', 'THIS'] +6432-63722-0018-2057: ref=['AND', 'THE', "DETECTIVE'S", 'PROFESSIONAL', 'INSTINCTS', 'GOT', 'THE', 'UPPER', 'HAND', 'OF', 'HIS', 'FRIENDLINESS', 'NOT', 'THE', 'LEAST', 'IN', 'THE', 'WORLD', 'NOT', 'AS', 'MUCH', 'AS', 'YOU', 'DO', 'WAS', 'THE', 'COOL', 'ANSWER'] +6432-63722-0018-2057: hyp=['AND', 'THE', "DETECTIVE'S", 'PROFESSIONAL', 'INSTINCTS', 'GOT', 'THE', 'UPPER', 'HAND', 'OF', 'HIS', 'FRIENDLINESS', 'NOT', 'THE', 'LEAST', 'IN', 'THE', 'WORLD', 'NOT', 'AS', 'MUCH', 'AS', 'YOU', 'DO', 'WAS', 'THE', 'COOL', 'ANSWER'] +6432-63722-0019-2058: ref=['I', 'HAPPENED', 'TO', 'SEE', 'THOSE', 'CANDLESTICKS', 'IN', 'THE', 'WINDOW', 'OF', 'SINGA', "PHUT'S", 'SHOP', 'THE', 'OTHER', 'DAY', 'AND', 'I', 'MADE', 'UP', 'MY', 'MIND', 'TO', 'BUY', 'THEM', 'WHEN', 'I', 'HAD', 'A', 'CHANCE'] +6432-63722-0019-2058: hyp=['I', 'HAPPENED', 'TO', 'SEE', 'THOSE', 'CANDLESTICKS', 'IN', 'THE', 'WINDOW', 'OF', 'SINGA', "PHUT'S", 'SHOP', 'THE', 'OTHER', 'DAY', 'AND', 'I', 'MADE', 'UP', 'MY', 'MIND', 'TO', 'BUY', 'THEM', 'WHEN', 'I', 'HAD', 'A', 'CHANCE'] +6432-63722-0020-2059: ref=['NOW', "I'M", 'AFRAID', 'I', "WON'T", 'BUT', 'HOW', 'DID', 'IT', 'HAPPEN'] +6432-63722-0020-2059: hyp=['NOW', "I'M", 'AFRAID', 'I', "WON'T", 'BUT', 'HOW', 'DID', 'IT', 'HAPPEN'] +6432-63722-0021-2060: ref=['PHUT', 'I', "DON'T", 'KNOW', 'WHETHER', "THAT'S", 'HIS', 'FIRST', 'OR', 'HIS', 'LAST', 'NAME', 'ANYHOW', 'HE', 'HAD', 'A', 'PARTNER', 'NAMED', 'SHERE', 'ALI'] +6432-63722-0021-2060: hyp=['BUT', 'I', "DON'T", 'KNOW', 'WHETHER', "THAT'S", 'HIS', 'FIRST', 'OR', 'HIS', 'LAST', 'NAME', 'ANYHOW', 'HE', 'HAD', 'A', 'PARTNER', 'NAMED', 'SHERLLY'] +6432-63722-0022-2061: ref=['ANYHOW', 'HE', 'AND', 'PHUT', "DIDN'T", 'GET', 'ALONG', 'VERY', 'WELL', 'IT', 'SEEMS'] +6432-63722-0022-2061: hyp=['ANYHOW', 'HE', 'AND', 'FLUT', "DIDN'T", 'GET', 'ALONG', 'VERY', 'WELL', 'IT', 'SEEMS'] +6432-63722-0023-2062: ref=['NEIGHBORS', 'OFTEN', 'HEARD', 'EM', 'SCRAPPIN', 'A', 'LOT', 'AND', 'THIS', 'AFTERNOON', 'THEY', 'WENT', 'AT', 'IT', 'AGAIN', 'HOT', 'AND', 'HEAVY'] +6432-63722-0023-2062: hyp=['NEIGHBORS', 'OFTEN', 'HEARD', 'EM', 'SCRAPPING', 'A', 'LOT', 'AND', 'THIS', 'AFTERNOON', 'THEY', 'WENT', 'AT', 'IT', 'AGAIN', 'HOT', 'AND', 'HEAVY'] +6432-63722-0024-2063: ref=['TOWARD', 'DARK', 'A', 'MAN', 'WENT', 'IN', 'TO', 'BUY', 'A', 'LAMP'] +6432-63722-0024-2063: hyp=['TOWARD', 'DARK', 'A', 'MAN', 'WENT', 'IN', 'TO', 'BUY', 'A', 'LAMP'] +6432-63722-0025-2064: ref=['HE', 'FOUND', 'THE', 'PLACE', 'WITHOUT', 'A', 'LIGHT', 'IN', 'IT', 'STUMBLED', 'OVER', 'SOMETHING', 'ON', 'THE', 'FLOOR', 'AND', 'THERE', 'WAS', "ALI'S", 'BODY', 'WITH', 'THE', 'HEAD', 'BUSTED', 'IN', 'AND', 'THIS', 'HEAVY', 'CANDLESTICK', 'NEAR', 'IT'] +6432-63722-0025-2064: hyp=['HE', 'FOUND', 'THE', 'PLACE', 'WITHOUT', 'A', 'LIGHT', 'IN', 'IT', 'STUMBLED', 'OVER', 'SOMETHING', 'ON', 'THE', 'FLOOR', 'AND', 'THERE', 'WAS', "ALI'S", 'BODY', 'WITH', 'THE', 'HEAD', 'BUSTED', 'IN', 'AND', 'THIS', 'HEAVY', 'CANDLESTICK', 'NEAR', 'IT'] +6432-63722-0026-2065: ref=['SURE', 'HELD', 'SO', 'TIGHT', 'WE', 'COULD', 'HARDLY', 'GET', 'IT', 'OUT'] +6432-63722-0026-2065: hyp=['SHORE', 'HELD', 'SO', 'TIGHT', 'WE', 'COULD', 'HARDLY', 'GET', 'IT', 'OUT'] +6432-63722-0027-2066: ref=['MAYBE', 'THE', 'FIGHT', 'WAS', 'ABOUT', 'WHO', 'OWNED', 'THE', 'WATCH', 'FOR', 'THE', 'DAGOS', 'TALKED', 'IN', 'THEIR', 'FOREIGN', 'LINGO', 'AND', 'NONE', 'OF', 'THE', 'NEIGHBORS', 'COULD', 'TELL', 'WHAT', 'THEY', 'WERE', 'SAYIN', 'I', 'SEE'] +6432-63722-0027-2066: hyp=['MAYBE', 'THE', 'FIGHT', 'WAS', 'ABOUT', 'WHO', 'OWNED', 'THE', 'WATCH', 'FOR', 'THE', 'DAGOES', 'TALKED', 'IN', 'THEIR', 'FOREIGN', 'LINGO', 'AND', 'NONE', 'OF', 'THE', 'NEIGHBORS', 'COULD', 'TELL', 'WHAT', 'THEY', 'WERE', 'SAYING', 'I', 'SEE'] +6432-63722-0028-2067: ref=['AND', 'THE', 'WATCH', 'HAVE', 'YOU', 'IT', 'YES', "IT'S", 'HERE'] +6432-63722-0028-2067: hyp=['AND', 'THE', 'WATCH', 'HAVE', 'YOU', 'IT', 'YES', "IT'S", 'HERE'] +6432-63722-0029-2068: ref=["THAT'S", 'THE', 'WATCH', 'ANNOUNCED', 'THE', 'HEADQUARTERS', 'DETECTIVE', 'REACHING', 'IN', 'FOR', 'IT', 'GOING', 'YET', 'SEE'] +6432-63722-0029-2068: hyp=["THAT'S", 'THE', 'WATCH', 'ANNOUNCED', 'THE', 'HEADQUARTERS', 'DETECTIVE', 'REACHING', 'IN', 'FOR', 'IT', 'GOING', 'IT', 'SEE'] +6432-63722-0030-2069: ref=["YOU'RE", 'NOT', 'AS', 'SQUEAMISH', 'AS', 'ALL', 'THAT', 'ARE', 'YOU', 'JUST', 'BECAUSE', 'IT', 'WAS', 'IN', 'A', 'DEAD', "MAN'S", 'HAND', 'AND', 'IN', 'A', "WOMAN'S"] +6432-63722-0030-2069: hyp=["YOU'RE", 'NOT', 'AS', 'SQUEAMISH', 'AS', 'ALL', 'THAT', 'ARE', 'YOU', 'JUST', 'BECAUSE', 'IT', 'WAS', 'IN', 'A', 'DEAD', "MAN'S", 'HANDS', 'AND', 'A', "WOMAN'S"] +6432-63722-0031-2070: ref=['AND', "DONOVAN'S", 'VOICE', 'WAS', 'PLAINLY', 'SKEPTICAL'] +6432-63722-0031-2070: hyp=['AND', "DONOVAN'S", 'VOICE', 'WAS', 'PLAINLY', 'SKEPTICAL'] +6432-63722-0032-2071: ref=['YES', 'IT', 'MAY', 'HAVE', 'SOME', 'ROUGH', 'EDGES', 'ON', 'IT'] +6432-63722-0032-2071: hyp=['YES', 'IT', 'MAY', 'HAVE', 'SOME', 'ROUGH', 'EDGES', 'ON', 'IT'] +6432-63722-0033-2072: ref=['AND', "I'VE", 'READ', 'ENOUGH', 'ABOUT', 'GERMS', 'TO', 'KNOW', 'THE', 'DANGER', "I'D", 'ADVISE', 'YOU', 'TO', 'BE', 'CAREFUL'] +6432-63722-0033-2072: hyp=['AND', "I'VE", 'READ', 'ENOUGH', 'ABOUT', 'TERMS', 'TO', 'KNOW', 'THE', 'DANGER', "I'D", 'ADVISE', 'YOU', 'TO', 'BE', 'CAREFUL'] +6432-63722-0034-2073: ref=['IF', 'YOU', "DON'T", 'MIND', 'I', 'SHOULD', 'LIKE', 'TO', 'EXAMINE', 'THIS', 'A', 'BIT'] +6432-63722-0034-2073: hyp=['IF', 'YOU', "DON'T", 'MIND', 'I', 'SHOULD', 'LIKE', 'TO', 'EXAMINE', 'THIS', 'A', 'BIT'] +6432-63722-0035-2074: ref=['BEFORE', 'THE', 'BIG', 'WIND', 'IN', 'IRELAND', 'SUGGESTED', 'THONG', 'WITH', 'A', 'NOD', 'AT', 'HIS', 'IRISH', 'COMPATRIOT', 'SLIGHTLY', 'LAUGHED', 'THE', 'COLONEL'] +6432-63722-0035-2074: hyp=['BEFORE', 'THE', 'BIG', 'WIND', 'IN', 'IRELAND', 'SUGGESTED', 'THONG', 'WITH', 'A', 'NOD', 'AT', 'HIS', 'IRISH', 'COMPATRIOT', 'SLIGHTLY', 'LAUGHED', 'THE', 'COLONEL'] +6432-63722-0036-2075: ref=["THAT'S", 'RIGHT', 'AGREED', 'THE', 'COLONEL', 'AS', 'HE', 'CONTINUED', 'TO', 'MOVE', 'HIS', 'MAGNIFYING', 'GLASS', 'OVER', 'THE', 'SURFACE', 'OF', 'THE', 'STILL', 'TICKING', 'WATCH'] +6432-63722-0036-2075: hyp=["THAT'S", 'RIGHT', 'AGREED', 'THE', 'COLONEL', 'AS', 'HE', 'CONTINUED', 'TO', 'MOVE', 'HIS', 'MAGNIFYING', 'GLASS', 'OVER', 'THE', 'SURFACE', 'OF', 'THE', 'STILL', 'TICKING', 'WATCH'] +6432-63722-0037-2076: ref=['AND', 'A', 'CLOSE', 'OBSERVER', 'MIGHT', 'HAVE', 'OBSERVED', 'THAT', 'HE', 'DID', 'NOT', 'TOUCH', 'HIS', 'BARE', 'FINGERS', 'TO', 'THE', 'TIMEPIECE', 'BUT', 'POKED', 'IT', 'ABOUT', 'AND', 'TOUCHED', 'IT', 'HERE', 'AND', 'THERE', 'WITH', 'THE', 'END', 'OF', 'A', 'LEADPENCIL'] +6432-63722-0037-2076: hyp=['AND', 'A', 'CLOSE', 'OBSERVER', 'MIGHT', 'HAVE', 'OBSERVED', 'THAT', 'HE', 'DID', 'NOT', 'TOUCH', 'HIS', 'BARE', 'FINGERS', 'TO', 'THE', 'TIMEPIECE', 'BUT', 'POKED', 'IT', 'ABOUT', 'AND', 'TOUCHED', 'IT', 'HERE', 'AND', 'THERE', 'WITH', 'THE', 'END', 'OF', 'A', 'LEAD', 'PENCIL'] +6432-63722-0038-2077: ref=['AND', 'DONOVAN', 'TAKE', 'A', "FRIEND'S", 'ADVICE', 'AND', "DON'T", 'BE', 'TOO', 'FREE', 'WITH', 'THAT', 'WATCH', 'TOO', 'FREE', 'WITH', 'IT'] +6432-63722-0038-2077: hyp=['AND', 'DONOMAN', 'TAKE', 'A', "FRIEND'S", 'ADVICE', 'AND', "DON'T", 'BE', 'TOO', 'FREE', 'WITH', 'THAT', 'WATCH', 'TOO', 'FREE', 'WITH', 'IT'] +6432-63722-0039-2078: ref=['ASKED', 'THE', 'SURPRISED', 'DETECTIVE', 'YES'] +6432-63722-0039-2078: hyp=['ASKED', 'THE', 'SURPRISED', 'DETECTIVE', 'YES'] +6432-63722-0040-2079: ref=["DON'T", 'SCRATCH', 'YOURSELF', 'ON', 'IT', 'WHATEVER', 'YOU', 'DO', 'WHY', 'NOT'] +6432-63722-0040-2079: hyp=["DON'T", 'SCRATCH', 'YOURSELF', 'ON', 'IT', 'WHATEVER', 'YOU', 'DO', 'WHY', 'NOT'] +6432-63722-0041-2080: ref=['SIMPLY', 'BECAUSE', 'THIS', 'WATCH'] +6432-63722-0041-2080: hyp=['SIMPLY', 'BECAUSE', 'THIS', 'WATCH'] +6432-63722-0042-2081: ref=['SOME', 'ONE', 'OUT', 'HERE', 'TO', 'SEE', 'YOU'] +6432-63722-0042-2081: hyp=['SOME', 'ONE', 'OUT', 'HERE', 'TO', 'SEE', 'YOU'] +6432-63722-0043-2082: ref=['ALL', 'RIGHT', 'BE', 'THERE', 'IN', 'A', 'SECOND'] +6432-63722-0043-2082: hyp=['ALL', 'RIGHT', 'BE', 'THERE', 'IN', 'A', 'SECOND'] +6432-63722-0044-2083: ref=['SINGA', 'PHUT', 'WAS', 'THE', 'PANTING', 'ANSWER'] +6432-63722-0044-2083: hyp=['SINGA', 'PHUT', 'WAS', 'THE', 'PANTING', 'ANSWER'] +6432-63722-0045-2084: ref=['I', 'WANT', 'TO', 'TALK', 'OVER', "DARCY'S", 'CASE', 'WITH', 'YOU', 'THE', 'COLONEL', 'HAD', 'SAID', 'AND', 'THE', 'TWO', 'HAD', 'TALKED', 'HAD', 'THOUGHT', 'HAD', 'TALKED', 'AGAIN', 'AND', 'NOW', 'WERE', 'SILENT', 'FOR', 'A', 'TIME'] +6432-63722-0045-2084: hyp=['I', 'WANT', 'TO', 'TALK', 'OVER', "DARCY'S", 'CASE', 'WITH', 'YOU', 'THE', 'COLONEL', 'HAD', 'SAID', 'AND', 'THE', 'TWO', 'HAD', 'TALKED', 'HAD', 'THOUGHT', 'HAD', 'TALKED', 'AGAIN', 'AND', 'NOW', 'WERE', 'SILENT', 'FOR', 'A', 'TIME'] +6432-63722-0046-2085: ref=['WHAT', 'ARE', 'THE', 'CHANCES', 'OF', 'GETTING', 'HIM', 'OFF', 'LEGALLY', 'IF', 'WE', 'GO', 'AT', 'IT', 'FROM', 'A', 'NEGATIVE', 'STANDPOINT', 'ASKED', 'THE', 'COLONEL'] +6432-63722-0046-2085: hyp=['WHAT', 'ARE', 'THE', 'CHANCES', 'OF', 'GETTING', 'HIM', 'OFF', 'LEGALLY', 'IF', 'WE', 'GO', 'AT', 'IT', 'FROM', 'A', 'NEGATIVE', 'STANDPOINT', 'ASKED', 'THE', 'COLONEL'] +6432-63722-0047-2086: ref=['RATHER', 'A', 'HYPOTHETICAL', 'QUESTION', 'COLONEL', 'BUT', 'I', 'SHOULD', 'SAY', 'IT', 'MIGHT', 'BE', 'A', 'FIFTY', 'FIFTY', 'PROPOSITION'] +6432-63722-0047-2086: hyp=['RATHER', 'A', 'HYPOTHETICAL', 'QUESTION', 'COLONEL', 'BUT', 'I', 'SHOULD', 'SAY', 'IT', 'MIGHT', 'BE', 'A', 'FIFTY', 'FIFTY', 'PROPOSITION'] +6432-63722-0048-2087: ref=['AT', 'BEST', 'HE', 'WOULD', 'GET', 'OFF', 'WITH', 'A', 'SCOTCH', 'VERDICT', 'OF', 'NOT', 'PROVEN', 'BUT', 'HE', "DOESN'T", 'WANT', 'THAT', 'NOR', 'DO', 'I'] +6432-63722-0048-2087: hyp=['AT', 'BEST', 'HE', 'WOULD', 'GET', 'OFF', 'WITH', 'A', 'SCOTCH', 'VERDICT', 'OF', 'NOT', 'PROVING', 'BUT', 'HE', "DOESN'T", 'WANT', 'THAT', 'NOR', 'DO', 'I'] +6432-63722-0049-2088: ref=['AND', 'YOU', 'I', "DON'T", 'WANT', 'IT', 'EITHER'] +6432-63722-0049-2088: hyp=['AND', 'YOU', 'I', "DON'T", 'WANT', 'IT', 'EITHER'] +6432-63722-0050-2089: ref=['BUT', 'I', 'WANT', 'TO', 'KNOW', 'JUST', 'WHERE', 'WE', 'STAND', 'NOW', 'I', 'KNOW'] +6432-63722-0050-2089: hyp=['BUT', 'I', 'WANT', 'TO', 'KNOW', 'JUST', 'WHERE', 'WE', 'STAND', 'NOW', 'I', 'KNOW'] +6432-63722-0051-2090: ref=['BUT', 'I', 'NEED', 'TO', 'DO', 'A', 'LITTLE', 'MORE', 'SMOKING', 'OUT', 'FIRST', 'NOW', 'I', 'WANT', 'TO', 'THINK'] +6432-63722-0051-2090: hyp=['BUT', 'I', 'NEED', 'TO', 'DO', 'A', 'LITTLE', 'MORE', 'SMOKING', 'OUT', 'FIRST', 'NOW', 'I', 'WANT', 'TO', 'THINK'] +6432-63722-0052-2091: ref=['IF', "YOU'LL", 'EXCUSE', 'ME', "I'LL", 'PRETEND', "I'M", 'FISHING', 'AND', 'I', 'MAY', 'CATCH', 'SOMETHING'] +6432-63722-0052-2091: hyp=['IF', "YOU'LL", 'EXCUSE', 'ME', "I'LL", 'PRETEND', "I'M", 'FISHING', 'AND', 'I', 'MAY', 'CATCH', 'SOMETHING'] +6432-63722-0053-2092: ref=['IN', 'FACT', 'I', 'HAVE', 'A', 'FEELING', 'THAT', "I'LL", 'LAND', 'MY', 'FISH'] +6432-63722-0053-2092: hyp=['IN', 'FACT', 'I', 'HAVE', 'A', 'FEELING', 'THAT', 'I', 'WILL', 'LAND', 'MY', 'FISH'] +6432-63722-0054-2093: ref=["I'D", 'RECOMMEND', 'HIM', 'TO', 'YOU', 'INSTEAD', 'OF', 'BLACKSTONE', 'THANKS', 'LAUGHED', 'KENNETH'] +6432-63722-0054-2093: hyp=["I'D", 'RECOMMEND', 'HIM', 'TO', 'YOU', 'INSTEAD', 'OF', 'BLACKSTONE', 'THANKS', 'LAUGHED', 'KENNETH'] +6432-63722-0055-2094: ref=['WHAT', 'IS', 'IT', 'PERHAPS', 'I', 'CAN', 'HELP', 'YOU'] +6432-63722-0055-2094: hyp=['WHAT', 'IS', 'IT', 'PERHAPS', 'I', 'CAN', 'HELP', 'YOU'] +6432-63722-0056-2095: ref=['THE', 'OLD', 'ADAGE', 'OF', 'TWO', 'HEADS', 'YOU', 'KNOW'] +6432-63722-0056-2095: hyp=['THE', 'OLD', 'ADAGE', 'OF', 'TWO', 'HEADS', 'YOU', 'KNOW'] +6432-63722-0057-2096: ref=['YES', 'IT', 'STILL', 'HOLDS', 'GOOD'] +6432-63722-0057-2096: hyp=['YES', 'IT', 'STILL', 'HOLDS', 'GOOD'] +6432-63722-0058-2097: ref=['NO', 'ALIMONY', 'REPEATED', 'THE', 'COLONEL', 'PUZZLED', 'YES', 'JUST', 'THAT'] +6432-63722-0058-2097: hyp=['NO', 'ALIMONY', 'REPLIED', 'THE', 'COLONEL', 'PUZZLED', 'YES', 'JUST', 'THAT'] +6432-63722-0059-2098: ref=['AND', "THERE'S", 'NO', 'REASON', 'YOU', "SHOULDN'T", 'KNOW'] +6432-63722-0059-2098: hyp=['AND', "THERE'S", 'NO', 'REASON', 'YOU', "SHOULDN'T", 'KNOW'] +6432-63723-0000-2099: ref=['CHUCKLED', 'THE', 'COLONEL', 'AS', 'HE', 'SKILFULLY', 'PLAYED', 'THE', 'LUCKLESS', 'TROUT', 'NOW', 'STRUGGLING', 'TO', 'GET', 'LOOSE', 'FROM', 'THE', 'HOOK'] +6432-63723-0000-2099: hyp=['CHUCKLED', 'THE', 'COLONEL', 'AS', 'HE', 'SKILFULLY', 'PLAYED', 'THE', 'LUCKLESS', 'TROUT', 'NOW', 'STRUGGLING', 'TO', 'GET', 'LOOSE', 'FROM', 'THE', 'HOOK'] +6432-63723-0001-2100: ref=['AND', 'WHEN', 'THE', 'FISH', 'WAS', 'LANDED', 'PANTING', 'ON', 'THE', 'GRASS', 'AND', 'SHAG', 'HAD', 'BEEN', 'ROUSED', 'FROM', 'HIS', 'SLUMBER', 'TO', 'SLIP', 'THE', 'NOW', 'LIMP', 'FISH', 'INTO', 'THE', 'CREEL', 'COLONEL', 'ASHLEY', 'GAVE', 'A', 'SIGH', 'OF', 'RELIEF', 'AND', 'REMARKED', 'I', 'THINK', 'I', 'SEE', 'IT', 'NOW'] +6432-63723-0001-2100: hyp=['AND', 'WHEN', 'THE', 'FISH', 'WAS', 'LANDED', 'PANTING', 'ON', 'THE', 'GRASS', 'AND', 'SHAGG', 'HAD', 'BEEN', 'ROUSED', 'FROM', 'HIS', 'SLUMBER', 'TO', 'SLIP', 'THE', 'NOW', 'LIMP', 'FISH', 'INTO', 'THE', 'CREEL', 'COLONEL', 'ASHLEY', 'GAVE', 'A', 'SIGH', 'OF', 'RELIEF', 'AND', 'REMARKED', 'I', 'THINK', 'I', 'SEE', 'IT', 'NOW'] +6432-63723-0002-2101: ref=['THE', 'REASON', 'SHE', 'ASKED', 'NO', 'ALIMONY', 'INQUIRED', 'KENNETH'] +6432-63723-0002-2101: hyp=['THE', 'REASON', 'SHE', 'ASKED', 'NO', 'ALIMONY', 'INQUIRED', 'KENNETH'] +6432-63723-0003-2102: ref=['NO', 'I', "WASN'T", 'THINKING', 'OF', 'THAT'] +6432-63723-0003-2102: hyp=['NO', 'I', "WASN'T", 'THINKING', 'OF', 'THAT'] +6432-63723-0004-2103: ref=['HOWEVER', "DON'T", 'THINK', "I'M", 'NOT', 'INTERESTED', 'IN', 'YOUR', 'CASE', "I'VE", 'FISHED', 'ENOUGH', 'FOR', 'TO', 'DAY'] +6432-63723-0004-2103: hyp=['HOWEVER', "DON'T", 'THINK', "I'M", 'NOT', 'INTERESTED', 'IN', 'YOUR', 'CASE', 'I', 'HAVE', 'FINISHED', 'ENOUGH', 'FOR', 'TO', 'DAY'] +6432-63723-0005-2104: ref=['WELL', 'I', "DON'T", 'KNOW', 'THAT', 'YOU', 'CAN'] +6432-63723-0005-2104: hyp=['WELL', 'I', "DON'T", 'KNOW', 'THAT', 'YOU', 'CAN'] +6432-63723-0006-2105: ref=['IT', "ISN'T", 'GENERALLY', 'KNOWN', 'WENT', 'ON', 'THE', 'LAWYER', 'THAT', 'THE', 'HOTEL', "KEEPER'S", 'WIFE', 'HAS', 'LEFT', 'HIM'] +6432-63723-0006-2105: hyp=['IT', "ISN'T", 'GENERALLY', 'KNOWN', 'WENT', 'ON', 'THE', 'LAWYER', 'THAT', 'THE', 'HOTEL', "KEEPER'S", 'WIFE', 'HAS', 'LEFT', 'HIM'] +6432-63723-0007-2106: ref=['IT', 'WAS', 'ONE', 'OF', 'WHAT', 'AT', 'FIRST', 'MIGHT', 'BE', 'CALLED', 'REFINED', 'CRUELTY', 'ON', 'HER', "HUSBAND'S", 'PART', 'DEGENERATING', 'GRADUALLY', 'INTO', 'THAT', 'OF', 'THE', 'BASER', 'SORT'] +6432-63723-0007-2106: hyp=['IT', 'WAS', 'ONE', 'OF', 'WHAT', 'AT', 'FIRST', 'MIGHT', 'BE', 'CALLED', 'REFINED', 'CRUELTY', 'ON', 'HER', "HUSBAND'S", 'PART', 'DEGENERATING', 'GRADUALLY', 'INTO', 'THAT', 'OF', 'A', 'BASER', 'SORT'] +6432-63723-0008-2107: ref=['YOU', "DON'T", 'MEAN', 'THAT', 'LARCH', 'STRUCK', 'HER', 'THAT', 'THERE', 'WAS', 'PHYSICAL', 'ABUSE', 'DO', 'YOU', 'ASKED', 'THE', 'COLONEL', "THAT'S", 'WHAT', 'HE', 'DID'] +6432-63723-0008-2107: hyp=['YOU', "DON'T", 'MEAN', 'THAT', 'LARCH', 'STRUCK', 'HER', 'THAT', 'THERE', 'WAS', 'PHYSICAL', 'ABUSE', 'DO', 'YOU', 'ASKED', 'THE', 'COLONEL', "THAT'S", 'WHAT', 'HE', 'DID'] +6432-63723-0009-2108: ref=['THE', 'COLONEL', 'DID', 'NOT', 'DISCLOSE', 'THE', 'FACT', 'THAT', 'IT', 'WAS', 'NO', 'NEWS', 'TO', 'HIM'] +6432-63723-0009-2108: hyp=['THE', 'COLONEL', 'DID', 'NOT', 'DISCLOSE', 'THE', 'FACT', 'THAT', 'IT', 'WAS', 'NO', 'NEWS', 'TO', 'HIM'] +6432-63723-0010-2109: ref=['AARON', "GRAFTON'S", 'STATEMENT', 'WAS', 'BEING', 'UNEXPECTEDLY', 'CONFIRMED'] +6432-63723-0010-2109: hyp=['AARON', "GRAFTON'S", 'STATEMENT', 'WAS', 'BEING', 'UNEXPECTEDLY', 'CONFIRMED'] +6432-63723-0011-2110: ref=['HE', 'REMEMBERED', 'THAT', 'CYNTHIA', 'AND', 'GRAFTON', 'HAD', 'ONCE', 'BEEN', 'IN', 'LOVE', 'WITH', 'EACH', 'OTHER'] +6432-63723-0011-2110: hyp=['HE', 'REMEMBERED', 'THAT', 'CYNTHIA', 'AND', 'GRAFTON', 'HAD', 'ONCE', 'BEEN', 'IN', 'LOVE', 'WITH', 'EACH', 'OTHER'] +6432-63723-0012-2111: ref=['SHE', 'SAID', 'HE', 'HAD', 'STRUCK', 'HER', 'MORE', 'THAN', 'ONCE', 'AND', 'SHE', 'COULD', 'STAND', 'IT', 'NO', 'LONGER'] +6432-63723-0012-2111: hyp=['SHE', 'SAID', 'HE', 'HAD', 'STRUCK', 'HER', 'MORE', 'THAN', 'ONCE', 'AND', 'SHE', 'COULD', 'STAND', 'IT', 'NO', 'LONGER'] +6432-63723-0013-2112: ref=['BECAUSE', 'LARCH', 'MADE', 'NO', 'DEFENSE'] +6432-63723-0013-2112: hyp=['BECAUSE', 'LARCH', 'MADE', 'NO', 'DEFENCE'] +6432-63723-0014-2113: ref=['LARCH', 'BY', 'REFUSING', 'TO', 'APPEAR', 'PRACTICALLY', 'ADMITTED', 'THE', 'CHARGES', 'AGAINST', 'HIM', 'AND', 'DID', 'NOT', 'OPPOSE', 'THE', 'SEPARATION'] +6432-63723-0014-2113: hyp=['LARCH', 'BY', 'REFUSING', 'TO', 'APPEAR', 'PRACTICALLY', 'ADMITTED', 'THE', 'CHARGES', 'AGAINST', 'HIM', 'AND', 'DID', 'NOT', 'OPPOSE', 'THE', 'SEPARATION'] +6432-63723-0015-2114: ref=['SO', 'I', 'HAD', 'TO', 'LET', 'HER', 'HAVE', 'HER', 'WAY', 'AND', 'WE', 'DID', 'NOT', 'ASK', 'THE', 'COURT', 'FOR', 'MONEY', 'THOUGH', 'I', 'HAD', 'NO', 'SUCH', 'SQUEAMISH', 'FEELINGS', 'WHEN', 'IT', 'CAME', 'TO', 'MY', 'COUNSEL', 'FEE'] +6432-63723-0015-2114: hyp=['SO', 'I', 'HAD', 'TO', 'LET', 'HER', 'HAVE', 'HER', 'WAY', 'AND', 'WE', 'DID', 'NOT', 'ASK', 'THE', 'COURT', 'FOR', 'MONEY', 'THOUGH', 'I', 'HAD', 'NO', 'SUCH', 'SQUEAMISH', 'FEELINGS', 'WHEN', 'IT', 'CAME', 'TO', 'MY', 'COUNSEL', 'FEE'] +6432-63723-0016-2115: ref=['NO', 'BUT', 'HE', 'WILL', 'OR', "I'LL", 'SUE', 'HIM', 'AND', 'GET', 'JUDGMENT', 'OH', "HE'LL", 'PAY', 'ALL', 'RIGHT'] +6432-63723-0016-2115: hyp=['NO', 'BUT', 'HE', 'WILL', 'OR', "I'LL", 'SUE', 'HIM', 'AND', 'GET', 'JUDGMENT', 'OH', "HE'LL", 'PAY', 'ALL', 'RIGHT'] +6432-63723-0017-2116: ref=['AND', 'IT', 'TAKES', 'ALL', 'SORTS', 'OF', 'PERSONS', 'TO', 'MAKE', 'IT', 'UP'] +6432-63723-0017-2116: hyp=['AND', 'IT', 'TAKES', 'ALL', 'SORTS', 'OF', 'PERSONS', 'TO', 'MAKE', 'IT', 'UP'] +6432-63723-0018-2117: ref=['STILL', 'I', 'WOULD', 'LIKE', 'TO', 'KNOW'] +6432-63723-0018-2117: hyp=['STILL', 'I', 'WOULD', 'LIKE', 'TO', 'KNOW'] +6432-63723-0019-2118: ref=['THE', 'MURDER', 'OF', 'MISSUS', 'DARCY', 'HAD', 'SOME', 'TIME', 'AGO', 'BEEN', 'SHIFTED', 'OFF', 'THE', 'FRONT', 'PAGE', 'THOUGH', 'IT', 'WOULD', 'GET', 'BACK', 'THERE', 'WHEN', 'THE', 'YOUNG', 'JEWELER', 'WAS', 'TRIED'] +6432-63723-0019-2118: hyp=['THE', 'MURDER', 'OF', 'MISSUS', 'DARCY', 'HAD', 'SOME', 'TIME', 'AGO', 'BEEN', 'SHIFTED', 'OFF', 'THE', 'FRONT', 'PAGE', 'THOUGH', 'IT', 'WOULD', 'GET', 'BACK', 'THERE', 'WHEN', 'THE', 'YOUNG', 'JEWELER', 'WAS', 'TRIED'] +6432-63723-0020-2119: ref=['IT', 'HAD', 'A', 'DOUBLE', 'REPUTATION', 'SO', 'TO', 'SPEAK'] +6432-63723-0020-2119: hyp=['IT', 'HAD', 'A', 'DOUBLE', 'REPUTATION', 'SO', 'TO', 'SPEAK'] +6432-63723-0021-2120: ref=['GRAVE', 'AND', 'EVEN', 'REVEREND', 'CONVENTIONS', 'ASSEMBLED', 'IN', 'ITS', 'BALLROOM', 'AND', 'POLITICIANS', 'OF', 'THE', 'UPPER', 'IF', 'NOT', 'BETTER', 'CLASS', 'WERE', 'FREQUENTLY', 'SEEN', 'IN', 'ITS', 'DINING', 'ROOM', 'OR', 'CAFE'] +6432-63723-0021-2120: hyp=['GRAVE', 'AND', 'EVEN', 'REVEREND', 'THE', 'CONVENTIONS', 'ASSEMBLED', 'IN', 'ITS', 'BALL', 'ROOM', 'AND', 'POLITICIANS', 'OF', 'THE', 'UPPER', 'IF', 'NOT', 'BETTER', 'CLASS', 'WERE', 'FREQUENTLY', 'SEEN', 'IN', 'ITS', 'DINING', 'ROOM', 'OR', 'CAFE'] +6432-63723-0022-2121: ref=['LARCH', 'HIMSELF', 'WAS', 'A', 'PECULIAR', 'CHARACTER'] +6432-63723-0022-2121: hyp=['LARCH', 'HIMSELF', 'WAS', 'A', 'PECULIAR', 'CHARACTER'] +6432-63723-0023-2122: ref=['IN', 'A', 'SMALLER', 'PLACE', 'HE', 'WOULD', 'HAVE', 'BEEN', 'CALLED', 'A', 'SALOON', 'KEEPER'] +6432-63723-0023-2122: hyp=['IN', 'A', 'SMALLER', 'PLACE', 'HE', 'WOULD', 'HAVE', 'BEEN', 'CALLED', 'A', 'SALOON', 'KEEPER'] +6432-63723-0024-2123: ref=['AND', 'IT', 'WAS', 'THIS', 'MAN', 'RICH', 'IT', 'WAS', 'SAID', 'HANDSOME', 'CERTAINLY', 'THAT', 'CYNTHIA', 'RATCHFORD', 'HAD', 'MARRIED'] +6432-63723-0024-2123: hyp=['AND', 'IT', 'WAS', 'THIS', 'MAN', 'RICH', 'IT', 'WAS', 'SAID', 'HANDSOME', 'CERTAINLY', 'THAT', 'CYNTHIA', 'RACHFORD', 'HAD', 'MARRIED'] +6432-63723-0025-2124: ref=['TO', 'THIS', 'WAS', 'THE', 'ANSWER', 'WHISPERED', 'MONEY'] +6432-63723-0025-2124: hyp=['TO', 'THIS', 'WAS', 'THE', 'ANSWER', 'WHISPERED', 'MONEY'] +6432-63723-0026-2125: ref=['AND', 'IN', 'A', 'WAY', 'IT', 'WAS', 'TRUE'] +6432-63723-0026-2125: hyp=['AND', 'IN', 'A', 'WAY', 'IT', 'WAS', 'TRUE'] +6432-63723-0027-2126: ref=['SHE', 'ALSO', 'SAW', 'AN', 'OPPORTUNITY', 'OF', 'PAYING', 'OLD', 'DEBTS', 'AND', 'REAPING', 'SOME', 'REVENGES'] +6432-63723-0027-2126: hyp=['SHE', 'ALSO', 'SAW', 'AN', 'OPPORTUNITY', 'OF', 'PAYING', 'OLD', 'DEBTS', 'AND', 'REAPING', 'SOME', 'REVENGES'] +6432-63723-0028-2127: ref=['AFTER', 'THE', 'MARRIAGE', 'WHICH', 'WAS', 'A', 'BRILLIANT', 'AND', 'GAY', 'ONE', 'IF', 'NOT', 'HAPPY', 'THE', 'LARCH', 'HOTEL', 'IT', 'COULD', 'HARDLY', 'BE', 'CALLED', 'A', 'HOME', 'BECAME', 'THE', 'SCENE', 'OF', 'MANY', 'FESTIVE', 'OCCASIONS'] +6432-63723-0028-2127: hyp=['AFTER', 'THE', 'MARRIAGE', 'WHICH', 'WAS', 'A', 'BRILLIANT', 'AND', 'GAY', 'ONE', 'IF', 'NOT', 'HAPPY', 'THE', 'LARCH', 'HOTEL', 'IT', 'COULD', 'HARDLY', 'BE', 'CALLED', 'A', 'HOME', 'BECAME', 'THE', 'SCENE', 'OF', 'MANY', 'FESTIVE', 'OCCASIONS'] +6432-63723-0029-2128: ref=['THEN', 'IT', 'WAS', 'SAID', 'OF', 'LARCH', 'THAT', 'SOON', 'AFTER', 'THE', 'ECHOES', 'OF', 'THE', 'WEDDING', 'CHIMES', 'HAD', 'DIED', 'AWAY', 'HE', 'HAD', 'BEGUN', 'TO', 'TREAT', 'HIS', 'WIFE', 'WITH', 'REFINED', 'CRUELTY', 'THAT', 'HIDDEN', 'AWAY', 'FROM', 'THE', 'PUBLIC', 'UNDERNEATH', 'HIS', 'HABITUAL', 'MANNER', 'THERE', 'WAS', 'THE', 'RAWNESS', 'OF', 'THE', 'BRUTE'] +6432-63723-0029-2128: hyp=['THEN', 'IT', 'WAS', 'SAID', 'OF', 'LARGE', 'THAT', 'SOON', 'AFTER', 'THE', 'ECHOES', 'OF', 'THE', 'WEDDING', 'CHIMES', 'HAD', 'DIED', 'AWAY', 'HE', 'HAD', 'BEGUN', 'TO', 'TREAT', 'HIS', 'WIFE', 'WITH', 'A', 'REFINED', 'CRUELTY', 'THAT', 'HIDDEN', 'AWAY', 'FROM', 'THE', 'PUBLIC', 'UNDERNEATH', 'HIS', 'HABITUAL', 'MANNER', 'THERE', 'WAS', 'THE', 'RAWNESS', 'OF', 'THE', 'BRUTE'] +6432-63723-0030-2129: ref=['BUT', 'IT', 'WAS', 'NOTICED', 'THAT', 'THE', 'OLDER', 'AND', 'MORE', 'CONSERVATIVE', 'FAMILIES', 'WERE', 'LESS', 'OFTEN', 'REPRESENTED', 'AND', 'WHEN', 'THEY', 'WERE', 'IT', 'WAS', 'BY', 'SOME', 'OF', 'THE', 'YOUNGER', 'MEMBERS', 'WHOSE', 'REPUTATIONS', 'WERE', 'ALREADY', 'SMIRCHED', 'OR', 'WHO', 'HAD', 'NOT', 'YET', 'ACQUIRED', 'ANY', 'AND', 'WERE', 'WILLING', 'TO', 'TAKE', 'A', 'CHANCE'] +6432-63723-0030-2129: hyp=['BUT', 'IT', 'WAS', 'NOTICED', 'THAT', 'THE', 'OLDER', 'AND', 'MORE', 'CONSERVATIVE', 'FAMILIES', 'WERE', 'LESS', 'OFTEN', 'REPRESENTED', 'AND', 'WHEN', 'THEY', 'WERE', 'IT', 'WAS', 'BY', 'SOME', 'OF', 'THE', 'YOUNGER', 'MEMBERS', 'WHOSE', 'REPUTATIONS', 'WERE', 'ALREADY', 'SMIRCHED', 'OR', 'WHO', 'HAD', 'NOT', 'YET', 'ACQUIRED', 'ANY', 'AND', 'WERE', 'WILLING', 'TO', 'TAKE', 'A', 'CHANCE'] +6432-63723-0031-2130: ref=['IT', "WOULDN'T", 'DO', 'YOU', 'KNOW', 'AFTER', 'THAT', 'STORY', 'CAME', 'OUT', 'FOR', 'ME', 'AND', 'THE', 'VICE', 'CHANCELLOR', 'WHO', 'SAT', 'IN', 'THE', 'CASE', 'AS', 'WELL', 'AS', 'OTHER', 'JUDGES', 'AND', 'MEMBERS', 'OF', 'THE', 'BAR', 'TO', 'BE', 'SEEN', 'THERE', 'KENNETH', 'EXPLAINED', 'TO', 'THE', 'COLONEL'] +6432-63723-0031-2130: hyp=['IT', "WOULDN'T", 'DO', 'YOU', 'KNOW', 'AFTER', 'THAT', 'STORY', 'CAME', 'OUT', 'FOR', 'ME', 'AND', 'THE', 'VICE', 'CHANCELLOR', 'WHO', 'SAT', 'IN', 'THE', 'CASE', 'AS', 'WELL', 'AS', 'OTHER', 'JUDGES', 'AND', 'MEMBERS', 'OF', 'THE', 'BAR', 'TO', 'BE', 'SEEN', 'THERE', 'KENNETH', 'EXPLAINED', 'TO', 'THE', 'COLONEL'] +6432-63723-0032-2131: ref=['MEANWHILE', 'COLONEL', 'ASHLEY', 'WAS', 'A', 'VERY', 'BUSY', 'MAN', 'AND', 'TO', 'NO', 'ONE', 'DID', 'HE', 'TELL', 'VERY', 'MUCH', 'ABOUT', 'HIS', 'ACTIVITIES', 'HE', 'SAW', 'DARCY', 'FREQUENTLY', 'AT', 'THE', 'JAIL', 'AND', 'TO', 'THAT', 'YOUNG', "MAN'S", 'PLEADINGS', 'THAT', 'SOMETHING', 'BE', 'DONE', 'ALWAYS', 'RETURNED', 'THE', 'ANSWER'] +6432-63723-0032-2131: hyp=['MEANWHILE', 'COLONEL', 'ASHLEY', 'WAS', 'A', 'VERY', 'BUSY', 'MAN', 'AND', 'TO', 'NO', 'ONE', 'DID', 'HE', 'TELL', 'VERY', 'MUCH', 'ABOUT', 'HIS', 'ACTIVITIES', 'HE', 'SAW', 'DARCY', 'FREQUENTLY', 'AT', 'THE', 'JAIL', 'AND', 'TO', 'THAT', 'YOUNG', "MAN'S", 'PLEADINGS', 'THAT', 'SOMETHING', 'TO', 'BE', 'DONE', 'ALWAYS', 'RETURNED', 'THE', 'ANSWER'] +6432-63723-0033-2132: ref=["DON'T", 'WORRY', 'IT', 'WILL', 'COME', 'OUT', 'ALL', 'RIGHT'] +6432-63723-0033-2132: hyp=["DON'T", 'WORRY', 'IT', 'WILL', 'COME', 'OUT', 'ALL', 'RIGHT'] +6432-63723-0034-2133: ref=["I'M", 'GOING', 'TO', 'RECTIFY', 'THEM', 'BUT', 'IT', 'WILL', 'TAKE', 'TIME'] +6432-63723-0034-2133: hyp=["I'M", 'GOING', 'TO', 'RECTIFY', 'THEM', 'BUT', 'IT', 'WILL', 'TAKE', 'TIME'] +6432-63723-0035-2134: ref=["IT'S", 'HARD', 'FOR', 'MISS', 'MASON', 'TOO', 'ALTHOUGH', "SHE'S", 'BEARING', 'UP', 'LIKE', 'A', 'MAJOR'] +6432-63723-0035-2134: hyp=["IT'S", 'HARD', 'FOR', 'MISS', 'MASON', 'TOO', 'ALTHOUGH', "SHE'S", 'BEARING', 'UP', 'LIKE', 'A', 'MAJOR'] +6432-63723-0036-2135: ref=['SO', 'KING', 'GOT', 'BAIL', 'WHO', 'PUT', 'IT', 'UP'] +6432-63723-0036-2135: hyp=['SO', 'KING', 'GOT', 'BAIL', 'WHO', 'PUT', 'IT', 'UP'] +6432-63723-0037-2136: ref=['IT', 'WAS', 'HIGH', 'LARCH'] +6432-63723-0037-2136: hyp=['IT', 'WAS', 'TIME', 'LARCH'] +6432-63723-0038-2137: ref=['THEY', 'TOOK', 'HARRY', 'AWAY', 'A', 'WHILE', 'AGO'] +6432-63723-0038-2137: hyp=['THEY', 'TOOK', 'HARRY', 'AWAY', 'A', 'WHILE', 'AGO'] +6432-63723-0039-2138: ref=['BUT', 'HIS', 'ARE', 'PRETTY', 'UNCERTAIN', 'SHOES', 'TO', 'BE', 'IN', 'JUST', 'THE', 'SAME'] +6432-63723-0039-2138: hyp=['BUT', 'HE', 'HAS', 'A', 'PRETTY', 'UNCERTAIN', 'SHOES', 'TO', 'BE', 'IN', 'JUST', 'THE', 'SAME'] +6432-63723-0040-2139: ref=['ONLY', 'THAT', 'I', 'DARCY', 'HESITATED', 'AND', 'GREW', 'RED'] +6432-63723-0040-2139: hyp=['ONLY', 'THAT', 'I', 'DARCY', 'HESITATED', 'AND', 'GREW', 'RED'] +6432-63723-0041-2140: ref=['GOOD', 'EVENING', 'COLONEL', 'HE', 'CALLED', 'GENIALLY', 'WILL', 'YOU', 'JOIN', 'ME', 'IN', 'A', 'WELSH', 'RABBIT'] +6432-63723-0041-2140: hyp=['GOOD', 'EVENING', 'COLONEL', 'HE', 'CALLED', 'GENIALLY', 'WILL', 'YOU', 'JOIN', 'ME', 'IN', 'A', 'WELSH', 'RABBIT'] +6432-63723-0042-2141: ref=['THANK', 'YOU', 'NO'] +6432-63723-0042-2141: hyp=['THANK', 'YOU', 'NO'] +6432-63723-0043-2142: ref=["I'M", 'AFRAID', 'MY', 'DIGESTION', "ISN'T", 'QUITE', 'UP', 'TO', 'THAT', 'AS', "I'VE", 'HAD', 'TO', 'CUT', 'OUT', 'MY', 'FISHING', 'OF', 'LATE'] +6432-63723-0043-2142: hyp=["I'M", 'AFRAID', 'MY', 'DIGESTION', "ISN'T", 'QUITE', 'UP', 'TO', 'THAT', 'AS', "I'VE", 'HAD', 'TO', 'CUT', 'OUT', 'MY', 'FISHING', 'OF', 'LATE'] +6432-63723-0044-2143: ref=['NOW', 'AS', 'TO', 'CERTAIN', 'MATTERS', 'IN', 'THE', 'STORE', 'ON', 'THE', 'MORNING', 'OF', 'THE', 'MURDER'] +6432-63723-0044-2143: hyp=['NOW', 'AS', 'TO', 'CERTAIN', 'MATTERS', 'IN', 'THE', 'STORE', 'ON', 'THE', 'MORNING', 'OF', 'THE', 'MURDER'] +6432-63723-0045-2144: ref=['THE', 'STOPPED', 'CLOCKS', 'FOR', 'INSTANCE', 'HAVE', 'YOU', 'ANY', 'THEORY'] +6432-63723-0045-2144: hyp=['THE', 'STOPPED', 'CLOCKS', 'FOR', 'INSTANCE', 'HAVE', 'YOU', 'ANY', 'THEORY'] +6432-63723-0046-2145: ref=['THERE', 'WERE', 'THREE', 'OF', 'THEM', 'THE', 'CENTER', 'FIGURE', 'BEING', 'THAT', 'OF', 'HARRY', 'KING', 'AND', 'HE', 'WAS', 'VERY', 'MUCH', 'INTOXICATED'] +6432-63723-0046-2145: hyp=['THERE', 'WERE', 'THREE', 'OF', 'THEM', 'THE', 'CENTER', 'FIGURE', 'BEING', 'THAT', 'OF', 'HARRY', 'KING', 'AND', 'HE', 'WAS', 'VERY', 'MUCH', 'INTOXICATED'] +6432-63723-0047-2146: ref=['THAT', 'IS', 'NOT', 'ALWAYS', 'BUT', 'SOMETIMES', 'IT', 'HAPPENED', 'TO', 'BE', 'SO', 'NOW'] +6432-63723-0047-2146: hyp=['THAT', 'IS', 'NOT', 'ALWAYS', 'BUT', 'SOMETIMES', 'IT', 'HAPPENED', 'TO', 'BE', 'SO', 'NOW'] +6432-63723-0048-2147: ref=['I', 'BEG', 'YOUR', 'PARDON', 'HE', 'SAID', 'IN', 'THE', 'CULTURED', 'TONES', 'HE', 'KNEW', 'SO', 'WELL', 'HOW', 'TO', 'USE', 'YET', 'OF', 'WHICH', 'HE', 'MADE', 'SO', 'LITTLE', 'USE', 'OF', 'LATE'] +6432-63723-0048-2147: hyp=['I', 'BEG', 'YOUR', 'PARDON', 'HE', 'SAID', 'IN', 'THE', 'CULTURED', 'TONES', 'HE', 'KNEW', 'SO', 'WELL', 'HOW', 'TO', 'USE', 'YET', 'OF', 'WHICH', 'HE', 'MADE', 'SO', 'LITTLE', 'USE', 'OF', 'LATE'] +6432-63723-0049-2148: ref=['I', 'SAID', 'WHERE', 'HAVE', 'YOU', 'BEEN', 'REMARKED', 'THE', 'OTHER', "WE'VE", 'MISSED', 'YOU'] +6432-63723-0049-2148: hyp=['I', 'SAID', 'WHERE', 'HAVE', 'YOU', 'BEEN', 'REMARKED', 'THE', 'OTHER', "WE'VE", 'MISSED', 'YOU'] +6432-63723-0050-2149: ref=['I', 'SAID', 'I', 'WAS', 'GOLFING', 'HE', 'WENT', 'ON', 'EXCEEDINGLY', 'DISTINCTLY', 'THOUGH', 'WITH', 'AN', 'EFFORT'] +6432-63723-0050-2149: hyp=['I', 'SAID', 'I', 'WAS', 'GOLFING', 'HE', 'WENT', 'ON', 'EXCEEDINGLY', 'DISTINCTLY', 'THOUGH', 'WITH', 'AN', 'EFFORT'] +6432-63723-0051-2150: ref=['WHY', 'POLONIUS', 'SOME', 'ONE', 'ASKED'] +6432-63723-0051-2150: hyp=['WHY', 'POLONIUS', 'SOME', 'ONE', 'ASKS'] +6432-63723-0052-2151: ref=['BECAUSE', 'DEAR', 'FRIEND', 'REPLIED', 'KING', 'SOFTLY', 'HE', 'SOMEWHAT', 'RESEMBLES', 'A', 'CERTAIN', 'PERSON', 'HERE', 'WHO', 'TALKS', 'TOO', 'MUCH', 'BUT', 'WHO', 'IS', 'NOT', 'SO', 'WISE', 'AS', 'HE', 'THINKS'] +6432-63723-0052-2151: hyp=['BECAUSE', 'DEAR', 'FRIEND', 'REPLIED', 'KING', 'SOFTLY', 'HE', 'SOMEWHAT', 'RESEMBLES', 'A', 'CERTAIN', 'PERSON', 'HERE', 'WHO', 'TALKS', 'TOO', 'MUCH', 'BUT', 'WHO', 'IS', 'NOT', 'SO', 'WISE', 'AS', 'HE', 'THINKS'] +6432-63723-0053-2152: ref=['THERE', 'WAS', 'A', 'RATTLE', 'OF', 'COINS', 'ON', 'THE', 'MAHOGANY', 'BAR', 'AS', 'KING', 'SOUGHT', 'TO', 'DISENTANGLE', 'A', 'SINGLE', 'BILL', 'FROM', 'THE', 'WADDED', 'UP', 'CURRENCY', 'IN', 'HIS', 'POCKET'] +6432-63723-0053-2152: hyp=['THERE', 'WAS', 'A', 'RATTLE', 'OF', 'COINS', 'ON', 'THE', 'MAHOGANY', 'BAR', 'AS', 'KING', 'SOUGHT', 'TO', 'DISENTANGLE', 'A', 'SINGLE', 'BILL', 'FROM', 'THE', 'WADDED', 'UP', 'CURRENCY', 'IN', 'HIS', 'POCKET'] +6432-63723-0054-2153: ref=["IT'S", "IT'S", 'AN', 'ODD', 'COIN', 'AN', 'OLD', 'ROMAN', 'ONE', 'THAT', 'MISSUS', 'DARCY', 'HAD', 'IN', 'HER', 'PRIVATE', 'COLLECTION', 'KEPT', 'IN', 'THE', 'JEWELRY', 'STORE', 'SAFE', 'WAS', 'THE', 'WHISPERED', 'ANSWER'] +6432-63723-0054-2153: hyp=["IT'S", "IT'S", 'AN', 'ODD', 'COIN', 'AN', 'OLD', 'ROMAN', 'ONE', 'THAT', 'MISSUS', 'DARCY', 'HAD', 'IN', 'HER', 'PRIVATE', 'COLLECTION', 'KEPT', 'IN', 'THE', 'JEWELRY', 'STORE', 'SAFE', 'WAS', 'THE', 'WHISPERED', 'ANSWER'] +6432-63723-0055-2154: ref=['I', 'WENT', 'OVER', 'THEM', 'THE', 'OTHER', 'DAY', 'AND', 'NOTICED', 'SOME', 'WERE', 'MISSING', 'THOUGH', 'I', 'SAW', 'THEM', 'ALL', 'WHEN', 'I', 'PAID', 'A', 'VISIT', 'TO', 'HER', 'JUST', 'A', 'SHORT', 'TIME', 'BEFORE', 'SHE', 'WAS', 'KILLED'] +6432-63723-0055-2154: hyp=['I', 'WENT', 'OVER', 'THEM', 'THE', 'OTHER', 'DAY', 'AND', 'NOTICED', 'SOME', 'WERE', 'MISSING', 'THOUGH', 'I', 'SAW', 'THEM', 'ALL', 'WHEN', 'I', 'PAID', 'A', 'VISIT', 'TO', 'HER', 'JUST', 'A', 'SHORT', 'TIME', 'BEFORE', 'SHE', 'WAS', 'KILLED'] +6432-63723-0056-2155: ref=['THAT', 'WAS', 'HERS', 'WENT', 'ON', 'THE', 'JEWELER'] +6432-63723-0056-2155: hyp=['THAT', 'WAS', 'HERS', 'WENT', 'ON', 'THE', 'JAWER'] +6432-63723-0057-2156: ref=['NOW', 'HARRY', 'KING', 'HAS', 'IT', 'EXCLAIMED', 'COLONEL', 'ASHLEY'] +6432-63723-0057-2156: hyp=['NOW', 'HARRY', 'KING', 'HAS', 'IT', 'EXCLAIMED', 'COLONEL', 'ASHLEY'] +6938-70848-0000-2157: ref=['EVEN', 'THE', 'SUN', 'CAME', 'OUT', 'PALE', 'AND', 'WATERY', 'AT', 'NOON'] +6938-70848-0000-2157: hyp=['EVEN', 'THE', 'SUN', 'CAME', 'OUT', 'PALE', 'AND', 'WATERY', 'AT', 'NOON'] +6938-70848-0001-2158: ref=['THE', 'COLDS', 'AND', 'RHEUMATISM', 'OF', 'THE', 'RAINY', 'MONTHS', 'VANISHED'] +6938-70848-0001-2158: hyp=['THE', 'COLDS', 'AND', 'RHEUMATISM', 'OF', 'THE', 'RAINY', 'MONTHS', 'VANISHED'] +6938-70848-0002-2159: ref=['ASKED', 'A', 'WORKER', 'LAST', 'SUNDAY', 'YOU', 'DID', 'IT', 'WHEN', 'THE', 'YUNKERS'] +6938-70848-0002-2159: hyp=['AS', 'TO', 'WORKER', 'LAST', 'SUNDAY', 'YOU', 'DID', 'IT', 'WHEN', 'THE', 'YUNKERS'] +6938-70848-0003-2160: ref=['WELL', "DIDN'T", 'THEY', 'SHOOT', 'US', 'ONE', 'MAN', 'EXHIBITED', 'HIS', 'ARM', 'IN', 'A', 'SLING'] +6938-70848-0003-2160: hyp=['WELL', "DIDN'T", 'THEY', 'SHOOT', 'US', 'ONE', 'MAN', 'EXHIBITED', 'HIS', 'ARM', 'IN', 'A', 'SLING'] +6938-70848-0004-2161: ref=["HAVEN'T", 'I', 'GOT', 'SOMETHING', 'TO', 'REMEMBER', 'THEM', 'BY', 'THE', 'DEVILS'] +6938-70848-0004-2161: hyp=["HAVEN'T", 'I', 'GOT', 'SOMETHING', 'TO', 'REMEMBER', 'THEM', 'BY', 'THE', 'DEVILS'] +6938-70848-0005-2162: ref=['WHO', 'ARE', 'YOU', 'TO', 'DESTROY', 'THE', 'LEGAL', 'GOVERNMENT', 'WHO', 'IS', 'LENIN', 'A', 'GERMAN'] +6938-70848-0005-2162: hyp=['WHO', 'ARE', 'YOU', 'TO', 'DESTROY', 'THE', 'LEGAL', 'GOVERNMENT', 'WHO', 'IS', 'LENDING', 'A', 'GERMAN'] +6938-70848-0006-2163: ref=['WHO', 'ARE', 'YOU', 'A', 'COUNTER', 'REVOLUTIONIST', 'A', 'PROVOCATOR', 'THEY', 'BELLOWED', 'AT', 'HIM'] +6938-70848-0006-2163: hyp=['WHO', 'ARE', 'YOU', 'A', 'COUNTER', 'REVOLUTIONIST', 'A', 'PROVOCATOR', 'THEY', 'BELOWED', 'AT', 'HIM'] +6938-70848-0007-2164: ref=['YOU', 'CALL', 'YOURSELVES', 'THE', 'PEOPLE', 'OF', 'RUSSIA', 'BUT', "YOU'RE", 'NOT', 'THE', 'PEOPLE', 'OF', 'RUSSIA'] +6938-70848-0007-2164: hyp=['YOU', 'CALL', 'YOURSELVES', 'THE', 'PEOPLE', 'OF', 'RUSSIA', 'BUT', 'YOU', 'ARE', 'NOT', 'THE', 'PEOPLE', 'OF', 'RUSSIA'] +6938-70848-0008-2165: ref=['THE', 'PEASANTS', 'ARE', 'THE', 'PEOPLE', 'OF', 'RUSSIA', 'WAIT', 'UNTIL', 'THE', 'PEASANTS'] +6938-70848-0008-2165: hyp=['THE', 'PEACE', 'AND', 'OTHER', 'PEOPLE', 'OF', 'RUSSIA', 'WAIT', 'UNTIL', 'THE', 'PEASANTS'] +6938-70848-0009-2166: ref=['WE', 'KNOW', 'WHAT', 'THE', 'PEASANTS', 'WILL', 'SAY', "AREN'T", 'THEY', 'WORKINGMEN', 'LIKE', 'OURSELVES'] +6938-70848-0009-2166: hyp=['WE', 'KNOW', 'WHAT', 'THE', 'PEASANTS', 'WILL', 'SAY', "AREN'T", 'THEY', 'WORKING', 'MEN', 'LIKE', 'OURSELVES'] +6938-70848-0010-2167: ref=['THESE', 'MEN', 'ESPECIALLY', 'WELCOMED', 'THE', 'CALL', 'TO', 'A', 'CONGRESS', 'OF', 'PEASANTS'] +6938-70848-0010-2167: hyp=['THIS', 'MAN', 'HAD', 'SPECIALLY', 'WELCOMED', 'THE', 'CALL', 'TO', 'A', 'CONGRESS', 'OF', 'PEASANTS'] +6938-70848-0011-2168: ref=['THESE', 'LAST', 'WERE', 'THE', 'YOUNG', 'GENERATION', 'WHO', 'HAD', 'BEEN', 'SERVING', 'IN', 'THE', 'ARMY'] +6938-70848-0011-2168: hyp=['THESE', 'LAST', 'WERE', 'THE', 'YOUNG', 'GENERATION', 'WHO', 'HAD', 'BEEN', 'SERVING', 'IN', 'THE', 'ARMY'] +6938-70848-0012-2169: ref=['WHEREUPON', 'THE', 'OLD', 'EXECUTIVE', 'COMMITTEE', 'LEFT', 'THE', 'HALL'] +6938-70848-0012-2169: hyp=['WHEREUPON', 'THE', 'OLD', 'EXECUTIVE', 'COMMITTEE', 'LEFT', 'THE', 'HALL'] +6938-70848-0013-2170: ref=['DOWN', 'WITH', 'HIM', 'THEY', 'SHRIEKED'] +6938-70848-0013-2170: hyp=['DOWN', 'WITH', 'HIM', 'THEY', 'SHRIEKED'] +6938-70848-0014-2171: ref=['FEARFUL', 'TUMULT', 'CRIES', 'DOWN', 'WITH', 'THE', 'BOLSHEVIKI'] +6938-70848-0014-2171: hyp=['FEARFUL', 'TUMULT', 'CHRIST', 'DOWN', 'WITH', 'THE', 'BOLSHEVIKI'] +6938-70848-0015-2172: ref=['UPON', 'MY', 'RETURN', 'I', 'VISITED', 'SMOLNY', 'NO', 'SUCH', 'ACCUSATION', 'WAS', 'MADE', 'AGAINST', 'ME', 'THERE', 'AFTER', 'A', 'BRIEF', 'CONVERSATION', 'I', 'LEFT', 'AND', "THAT'S", 'ALL', 'LET', 'ANY', 'ONE', 'PRESENT', 'MAKE', 'SUCH', 'AN', 'ACCUSATION'] +6938-70848-0015-2172: hyp=['UPON', 'MY', 'RETURN', 'I', 'VISITED', 'SMOLNEY', 'NO', 'SUCH', 'ACCUSATION', 'WAS', 'MADE', 'AGAINST', 'ME', 'THERE', 'AFTER', 'A', 'BRIEF', 'CONVERSATION', 'I', 'LEFT', 'AND', "THAT'S", 'ALL', 'LET', 'ANY', 'ONE', 'PRESENT', 'MAKE', 'SUCH', 'AN', 'ACCUSATION'] +6938-70848-0016-2173: ref=['MEANWHILE', 'THE', 'QUESTION', 'OF', 'THE', 'STATUS', 'OF', 'THE', 'EXECUTIVE', 'COMMITTEE', 'WAS', 'AGITATING', 'ALL', 'MINDS'] +6938-70848-0016-2173: hyp=['MEANWHILE', 'THE', 'QUESTION', 'OF', 'THE', 'STATUS', 'OF', 'THE', 'EXECUTIVE', 'COMMITTEE', 'WAS', 'AGITATING', 'ALL', 'MINDS'] +6938-70848-0017-2174: ref=['BY', 'DECLARING', 'THE', 'ASSEMBLY', 'EXTRAORDINARY', 'CONFERENCE', 'IT', 'HAD', 'BEEN', 'PLANNED', 'TO', 'BLOCK', 'THE', 'REELECTION', 'OF', 'THE', 'EXECUTIVE', 'COMMITTEE'] +6938-70848-0017-2174: hyp=['BY', 'DECLARING', 'THE', 'ASSEMBLY', 'EXTRAORDINARY', 'CONFERENCE', 'IT', 'HAD', 'BEEN', 'PLANNED', 'TO', 'BLOCK', 'THE', 'REELECTION', 'OF', 'THE', 'EXECUTIVE', 'COMMITTEE'] +6938-70848-0018-2175: ref=['BUT', 'THIS', 'WORKED', 'BOTH', 'WAYS', 'THE', 'LEFT', 'SOCIALIST', 'REVOLUTIONISTS', 'DECIDED', 'THAT', 'IF', 'THE', 'CONGRESS', 'HAD', 'NO', 'POWER', 'OVER', 'THE', 'EXECUTIVE', 'COMMITTEE', 'THEN', 'THE', 'EXECUTIVE', 'COMMITTEE', 'HAD', 'NO', 'POWER', 'OVER', 'THE', 'CONGRESS'] +6938-70848-0018-2175: hyp=['BUT', 'THIS', 'WORKED', 'BOTH', 'WAYS', 'THE', 'LAST', 'SOCIALIST', 'REVOLUTIONIST', 'DECIDED', 'THAT', 'IF', 'THE', 'CONGRESS', 'HAD', 'NO', 'POWER', 'OVER', 'THE', 'EXECUTIVE', 'COMMITTEE', 'THEN', 'THE', 'EXECUTIVE', 'COMMITTEE', 'HAD', 'NO', 'POWER', 'OVER', 'THE', 'CONGRESS'] +6938-70848-0019-2176: ref=['ON', 'THE', 'TWENTY', 'SEVENTH', 'OCCURRED', 'THE', 'DEBATE', 'ON', 'THE', 'LAND', 'QUESTION', 'WHICH', 'REVEALED', 'THE', 'DIFFERENCES', 'BETWEEN', 'THE', 'AGRARIAN', 'PROGRAMME', 'OF', 'THE', 'BOLSHEVIKI', 'AND', 'THE', 'LEFT', 'SOCIALIST', 'REVOLUTIONARIES'] +6938-70848-0019-2176: hyp=['ON', 'THE', 'TWENTY', 'SEVENTH', 'OCCURRED', 'THE', 'DEBATE', 'ON', 'THE', 'LAND', 'QUESTION', 'WHICH', 'REVEALED', 'THE', 'DIFFERENCES', 'BETWEEN', 'THE', 'AGRIEAN', 'PROGRAMME', 'OF', 'THE', 'BOLSHEVIKI', 'AND', 'THE', 'LEFT', 'SOCIALIST', 'REVOLUTIONARIES'] +6938-70848-0020-2177: ref=['THE', 'CONSTITUENT', 'ASSEMBLY', 'WILL', 'NOT', 'DARE', 'TO', 'BREAK', 'WITH', 'THE', 'WILL', 'OF', 'THE', 'PEOPLE'] +6938-70848-0020-2177: hyp=['THE', 'CONSTITUENT', 'ASSEMBLY', 'WILL', 'NOT', 'DARE', 'TO', 'BREAK', 'WITH', 'THE', 'WILL', 'OF', 'THE', 'PEOPLE'] +6938-70848-0021-2178: ref=['FOLLOWED', 'HIM', 'LENIN', 'LISTENED', 'TO', 'NOW', 'WITH', 'ABSORBING', 'INTENSITY'] +6938-70848-0021-2178: hyp=['FOLLOWED', 'HIM', 'LENIN', 'LISTENED', 'TO', 'NOW', 'WITH', 'ABSORBING', 'INTENSITY'] +6938-70848-0022-2179: ref=['THE', 'FIRST', 'STAGE', 'WAS', 'THE', 'CRUSHING', 'OF', 'AUTOCRACY', 'AND', 'THE', 'CRUSHING', 'OF', 'THE', 'POWER', 'OF', 'THE', 'INDUSTRIAL', 'CAPITALISTS', 'AND', 'LAND', 'OWNERS', 'WHOSE', 'INTERESTS', 'ARE', 'CLOSELY', 'RELATED'] +6938-70848-0022-2179: hyp=['THE', 'FIRST', 'STAGE', 'WAS', 'THE', 'CRUSHING', 'OF', 'AUTOCRACY', 'AND', 'THE', 'CRUSHING', 'OF', 'THE', 'POWER', 'OF', 'THE', 'INDUSTRIAL', 'CAPITALIST', 'AND', 'THE', 'LANDOWNERS', 'WHOSE', 'INTERESTS', 'ARE', 'CLOSELY', 'RELATED'] +6938-70848-0023-2180: ref=['THE', 'DUMAS', 'AND', 'ZEMSTVOS', 'WERE', 'DROPPED'] +6938-70848-0023-2180: hyp=['THE', 'TUMICE', 'AND', 'THEM', 'STOVES', 'WERE', 'DROPPED'] +6938-70848-0024-2181: ref=['HE', 'KNEW', 'THAT', 'AN', 'AGREEMENT', 'WITH', 'THE', 'BOLSHEVIKI', 'WAS', 'BEING', 'DISCUSSED', 'BUT', 'HE', 'DID', 'NOT', 'KNOW', 'THAT', 'IT', 'HAD', 'BEEN', 'CONCLUDED'] +6938-70848-0024-2181: hyp=['HE', 'KNEW', 'THAT', 'AN', 'AGREEMENT', 'WITH', 'THE', 'BOLSHEVIKI', 'WAS', 'BEING', 'DISCUSSED', 'BUT', 'HE', 'DID', 'NOT', 'KNOW', 'THAT', 'IT', 'HAD', 'BEEN', 'CONCLUDED'] +6938-70848-0025-2182: ref=['HE', 'SPOKE', 'TO', 'THE', 'RUMP', 'CONVENTION'] +6938-70848-0025-2182: hyp=['HE', 'SPOKE', 'TO', 'THE', 'RUM', 'CONVENTION'] +6938-70848-0026-2183: ref=['THE', 'VILLAGES', 'WILL', 'SAVE', 'US', 'IN', 'THE', 'END'] +6938-70848-0026-2183: hyp=['THE', 'VILLAGES', 'WILL', 'SAVE', 'US', 'IN', 'THE', 'END'] +6938-70848-0027-2184: ref=['BUT', 'THE', 'PRESENT', 'MOVEMENT', 'IS', 'INTERNATIONAL', 'AND', 'THAT', 'IS', 'WHY', 'IT', 'IS', 'INVINCIBLE'] +6938-70848-0027-2184: hyp=['BUT', 'THE', 'PRESENT', 'MOMENT', 'IS', 'INTERNATIONAL', 'AND', 'THAT', 'IS', 'WHY', 'IT', 'IS', 'INVINCIBLE'] +6938-70848-0028-2185: ref=['THE', 'WILL', 'OF', 'MILLIONS', 'OF', 'WORKERS', 'IS', 'NOW', 'CONCENTRATED', 'IN', 'THIS', 'HALL'] +6938-70848-0028-2185: hyp=['THE', 'WILL', 'OF', 'MILLIONS', 'OF', 'WORKERS', 'IS', 'NOW', 'CONCENTRATED', 'IN', 'THE', 'HALL'] +6938-70848-0029-2186: ref=['A', 'NEW', 'HUMANITY', 'WILL', 'BE', 'BORN', 'OF', 'THIS', 'WAR'] +6938-70848-0029-2186: hyp=['A', 'NEW', 'HUMANITY', 'WILL', 'BE', 'BORN', 'OF', 'THIS', 'WAR'] +6938-70848-0030-2187: ref=['I', 'GREET', 'YOU', 'WITH', 'THE', 'CHRISTENING', 'OF', 'A', 'NEW', 'RUSSIAN', 'LIFE', 'AND', 'FREEDOM'] +6938-70848-0030-2187: hyp=['I', 'GREET', 'YOU', 'WITH', 'THE', 'CHRISTENING', 'OF', 'A', 'NEW', 'RUSSIAN', 'LIFE', 'AND', 'FREEDOM'] +7018-75788-0000-2188: ref=['THEN', 'I', 'TOOK', 'UP', 'A', 'GREAT', 'STONE', 'FROM', 'AMONG', 'THE', 'TREES', 'AND', 'COMING', 'UP', 'TO', 'HIM', 'SMOTE', 'HIM', 'THEREWITH', 'ON', 'THE', 'HEAD', 'WITH', 'ALL', 'MY', 'MIGHT', 'AND', 'CRUSHED', 'IN', 'HIS', 'SKULL', 'AS', 'HE', 'LAY', 'DEAD', 'DRUNK'] +7018-75788-0000-2188: hyp=['THEN', 'I', 'TOOK', 'UP', 'A', 'GREAT', 'STONE', 'FROM', 'AMONG', 'THE', 'TREES', 'AND', 'COMING', 'UP', 'TO', 'HIM', 'SMOTE', 'HIM', 'THEREWITH', 'ON', 'THE', 'HEAD', 'WITH', 'ALL', 'MY', 'MIGHT', 'AND', 'CRUSHED', 'IN', 'HIS', 'SKULL', 'AS', 'HE', 'LAY', 'DEAD', 'DRUNK'] +7018-75788-0001-2189: ref=['BEHOLD', 'A', 'SHIP', 'WAS', 'MAKING', 'FOR', 'THE', 'ISLAND', 'THROUGH', 'THE', 'DASHING', 'SEA', 'AND', 'CLASHING', 'WAVES'] +7018-75788-0001-2189: hyp=['BEHOLD', 'A', 'SHIP', 'WAS', 'MAKING', 'FOR', 'THE', 'ISLAND', 'THROUGH', 'THE', 'DASHING', 'SEA', 'AND', 'CLASHING', 'WAVES'] +7018-75788-0002-2190: ref=['HEARING', 'THIS', 'I', 'WAS', 'SORE', 'TROUBLED', 'REMEMBERING', 'WHAT', 'I', 'HAD', 'BEFORE', 'SUFFERED', 'FROM', 'THE', 'APE', 'KIND'] +7018-75788-0002-2190: hyp=['HEARING', 'THIS', 'I', 'WAS', 'SORE', 'TROUBLED', 'REMEMBERING', 'WHAT', 'I', 'HAD', 'BEFORE', 'SUFFERED', 'FROM', 'THE', 'APE', 'KIND'] +7018-75788-0003-2191: ref=['UPON', 'THIS', 'HE', 'BROUGHT', 'ME', 'A', 'COTTON', 'BAG', 'AND', 'GIVING', 'IT', 'TO', 'ME', 'SAID', 'TAKE', 'THIS', 'BAG', 'AND', 'FILL', 'IT', 'WITH', 'PEBBLES', 'FROM', 'THE', 'BEACH', 'AND', 'GO', 'FORTH', 'WITH', 'A', 'COMPANY', 'OF', 'THE', 'TOWNSFOLK', 'TO', 'WHOM', 'I', 'WILL', 'GIVE', 'A', 'CHARGE', 'RESPECTING', 'THEE'] +7018-75788-0003-2191: hyp=['UPON', 'THIS', 'HE', 'BROUGHT', 'ME', 'A', 'COTTON', 'BAG', 'AND', 'GIVING', 'IT', 'TO', 'ME', 'SAID', 'TAKE', 'THIS', 'BAG', 'AND', 'FILL', 'IT', 'WITH', 'PEBBLES', 'FROM', 'THE', 'BEACH', 'AND', 'GO', 'FORTH', 'WITH', 'A', 'COMPANY', 'OF', 'THE', 'TOWNSFOLK', 'TO', 'WHOM', 'I', 'WILL', 'GIVE', 'A', 'CHARGE', 'RESPECTING', 'THEE'] +7018-75788-0004-2192: ref=['DO', 'AS', 'THEY', 'DO', 'AND', 'BELIKE', 'THOU', 'SHALT', 'GAIN', 'WHAT', 'MAY', 'FURTHER', 'THY', 'RETURN', 'VOYAGE', 'TO', 'THY', 'NATIVE', 'LAND'] +7018-75788-0004-2192: hyp=['DO', 'AS', 'THEY', 'DO', 'AND', 'BELIKE', 'THOU', 'SHALT', 'GAIN', 'WHAT', 'MAY', 'FURTHER', 'THY', 'RETURN', 'VOYAGE', 'TO', 'THY', 'NATIVE', 'LAND'] +7018-75788-0005-2193: ref=['THEN', 'HE', 'CARRIED', 'ME', 'TO', 'THE', 'BEACH', 'WHERE', 'I', 'FILLED', 'MY', 'BAG', 'WITH', 'PEBBLES', 'LARGE', 'AND', 'SMALL', 'AND', 'PRESENTLY', 'WE', 'SAW', 'A', 'COMPANY', 'OF', 'FOLK', 'ISSUE', 'FROM', 'THE', 'TOWN', 'EACH', 'BEARING', 'A', 'BAG', 'LIKE', 'MINE', 'FILLED', 'WITH', 'PEBBLES'] +7018-75788-0005-2193: hyp=['THEN', 'HE', 'CARRIED', 'ME', 'TO', 'THE', 'BEACH', 'WHERE', 'I', 'FILLED', 'MY', 'BAG', 'WITH', 'PEBBLES', 'LARGE', 'AND', 'SMALL', 'AND', 'PRESENTLY', 'WE', 'SAW', 'A', 'COMPANY', 'OF', 'FOLK', 'ISSUE', 'FROM', 'THE', 'TOWN', 'EACH', 'BEARING', 'A', 'BAG', 'LIKE', 'MINE', 'FILLED', 'WITH', 'PEBBLES'] +7018-75788-0006-2194: ref=['TO', 'THESE', 'HE', 'COMMITTED', 'ME', 'COMMENDING', 'ME', 'TO', 'THEIR', 'CARE', 'AND', 'SAYING', 'THIS', 'MAN', 'IS', 'A', 'STRANGER', 'SO', 'TAKE', 'HIM', 'WITH', 'YOU', 'AND', 'TEACH', 'HIM', 'HOW', 'TO', 'GATHER', 'THAT', 'HE', 'MAY', 'GET', 'HIS', 'DAILY', 'BREAD', 'AND', 'YOU', 'WILL', 'EARN', 'YOUR', 'REWARD', 'AND', 'RECOMPENSE', 'IN', 'HEAVEN'] +7018-75788-0006-2194: hyp=['TO', 'THESE', 'HE', 'COMMITTED', 'ME', 'COMMENDING', 'ME', 'TO', 'THEIR', 'CARE', 'AND', 'SAYING', 'THIS', 'MAN', 'IS', 'A', 'STRANGER', 'SO', 'TAKE', 'HIM', 'WITH', 'YOU', 'AND', 'TEACH', 'HIM', 'HOW', 'TO', 'GATHER', 'THAT', 'HE', 'MAY', 'GET', 'HIS', 'DAILY', 'BREAD', 'AND', 'YOU', 'WILL', 'EARN', 'YOUR', 'REWARD', 'AND', 'RECOMPENSE', 'IN', 'HEAVEN'] +7018-75788-0007-2195: ref=['NOW', 'SLEEPING', 'UNDER', 'THESE', 'TREES', 'WERE', 'MANY', 'APES', 'WHICH', 'WHEN', 'THEY', 'SAW', 'US', 'ROSE', 'AND', 'FLED', 'FROM', 'US', 'AND', 'SWARMED', 'UP', 'AMONG', 'THE', 'BRANCHES', 'WHEREUPON', 'MY', 'COMPANIONS', 'BEGAN', 'TO', 'PELT', 'THEM', 'WITH', 'WHAT', 'THEY', 'HAD', 'IN', 'THEIR', 'BAGS', 'AND', 'THE', 'APES', 'FELL', 'TO', 'PLUCKING', 'OF', 'THE', 'FRUIT', 'OF', 'THE', 'TREES', 'AND', 'CASTING', 'THEM', 'AT', 'THE', 'FOLK'] +7018-75788-0007-2195: hyp=['NOW', 'SLEEPING', 'UNDER', 'THESE', 'TREES', 'WERE', 'MANY', 'APES', 'WHICH', 'WHEN', 'THEY', 'SAW', 'US', 'ROSE', 'AND', 'FLED', 'FROM', 'US', 'AND', 'SWARMED', 'UP', 'AMONG', 'THE', 'BRANCHES', 'WHEREUPON', 'MY', 'COMPANIONS', 'BEGAN', 'TO', 'PELT', 'THEM', 'WITH', 'WHAT', 'THEY', 'HAD', 'IN', 'THEIR', 'BAGS', 'AND', 'THE', 'APES', 'FELL', 'TO', 'PLUCKING', 'OF', 'THE', 'FRUIT', 'OF', 'THE', 'TREES', 'AND', 'CASTING', 'THEM', 'AT', 'THE', 'FOLK'] +7018-75788-0008-2196: ref=['WE', 'WEIGHED', 'ANCHOR', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'SAYING', 'HER', 'PERMITTED', 'SAY'] +7018-75788-0008-2196: hyp=['WE', 'WEIGHED', 'ANCHOR', 'AND', 'SHAHRAZAD', 'PERCEIVED', 'THE', 'DAWN', 'OF', 'DAY', 'AND', 'CEASED', 'SAYING', 'HER', 'PERMITTED', 'SAY'] +7018-75788-0009-2197: ref=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'FIFTY', 'NINTH', 'NIGHT'] +7018-75788-0009-2197: hyp=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'FIFTY', 'NINTH', 'NIGHT'] +7018-75788-0010-2198: ref=['AND', 'CEASED', 'NOT', 'SAILING', 'TILL', 'WE', 'ARRIVED', 'SAFELY', 'AT', 'BASSORAH'] +7018-75788-0010-2198: hyp=['AND', 'CEASED', 'NOT', 'SAILING', 'TILL', 'WE', 'ARRIVED', 'SAFELY', 'AT', 'PASORAH'] +7018-75788-0011-2199: ref=['THERE', 'I', 'ABODE', 'A', 'LITTLE', 'AND', 'THEN', 'WENT', 'ON', 'TO', 'BAGHDAD', 'WHERE', 'I', 'ENTERED', 'MY', 'QUARTER', 'AND', 'FOUND', 'MY', 'HOUSE', 'AND', 'FOREGATHERED', 'WITH', 'MY', 'FAMILY', 'AND', 'SALUTED', 'MY', 'FRIENDS', 'WHO', 'GAVE', 'ME', 'JOY', 'OF', 'MY', 'SAFE', 'RETURN', 'AND', 'I', 'LAID', 'UP', 'ALL', 'MY', 'GOODS', 'AND', 'VALUABLES', 'IN', 'MY', 'STOREHOUSES'] +7018-75788-0011-2199: hyp=['THERE', 'I', 'ABODE', 'A', 'LITTLE', 'AND', 'THEN', 'WENT', 'ON', 'TO', 'BAGHDAD', 'WHERE', 'I', 'ENTERED', 'MY', 'QUARTER', 'AND', 'FOUND', 'MY', 'HOUSE', 'AND', 'FOREGATHERED', 'WITH', 'MY', 'FAMILY', 'AND', 'SALUTED', 'MY', 'FRIENDS', 'WHO', 'GAVE', 'ME', 'JOY', 'OF', 'MY', 'SAFE', 'RETURN', 'AND', 'I', 'LAID', 'UP', 'ALL', 'MY', 'GOODS', 'AND', 'VALUABLES', 'IN', 'MY', 'STOREHOUSES'] +7018-75788-0012-2200: ref=['AFTER', 'WHICH', 'I', 'RETURNED', 'TO', 'MY', 'OLD', 'MERRY', 'WAY', 'OF', 'LIFE', 'AND', 'FORGOT', 'ALL', 'I', 'HAD', 'SUFFERED', 'IN', 'THE', 'GREAT', 'PROFIT', 'AND', 'GAIN', 'I', 'HAD', 'MADE'] +7018-75788-0012-2200: hyp=['AFTER', 'WHICH', 'I', 'RETURNED', 'TO', 'MY', 'OLD', 'MERRY', 'WAY', 'OF', 'LIFE', 'AND', 'FORGOT', 'ALL', 'I', 'HAD', 'SUFFERED', 'IN', 'THE', 'GREAT', 'PROFIT', 'AND', 'GAIN', 'I', 'HAD', 'MADE'] +7018-75788-0013-2201: ref=['NEXT', 'MORNING', 'AS', 'SOON', 'AS', 'IT', 'WAS', 'LIGHT', 'HE', 'PRAYED', 'THE', 'DAWN', 'PRAYER', 'AND', 'AFTER', 'BLESSING', 'MOHAMMED', 'THE', 'CREAM', 'OF', 'ALL', 'CREATURES', 'BETOOK', 'HIMSELF', 'TO', 'THE', 'HOUSE', 'OF', 'SINDBAD', 'THE', 'SEAMAN', 'AND', 'WISHED', 'HIM', 'A', 'GOOD', 'DAY'] +7018-75788-0013-2201: hyp=['NEXT', 'MORNING', 'AS', 'SOON', 'AS', 'IT', 'WAS', 'LIGHT', 'HE', 'PRAYED', 'THE', 'DAWN', 'PRAYER', 'AND', 'AFTER', 'BLESSING', 'MOHAMMED', 'THE', 'CREAM', 'OF', 'ALL', 'CREATURES', 'BETOOK', 'HIMSELF', 'TO', 'THE', 'HOUSE', 'OF', 'SINDBAD', 'THE', 'SEAMAN', 'AND', 'WISHED', 'HIM', 'A', 'GOOD', 'DAY'] +7018-75788-0014-2202: ref=['HERE', 'I', 'FOUND', 'A', 'GREAT', 'SHIP', 'READY', 'FOR', 'SEA', 'AND', 'FULL', 'OF', 'MERCHANTS', 'AND', 'NOTABLES', 'WHO', 'HAD', 'WITH', 'THEM', 'GOODS', 'OF', 'PRICE', 'SO', 'I', 'EMBARKED', 'MY', 'BALES', 'THEREIN'] +7018-75788-0014-2202: hyp=['HERE', 'I', 'FOUND', 'A', 'GREAT', 'SHIP', 'READY', 'FOR', 'SEA', 'AND', 'FULL', 'OF', 'MERCHANTS', 'AND', 'NOTABLES', 'WHO', 'HAD', 'WITH', 'THEM', 'GOODS', 'OF', 'PRICE', 'SO', 'I', 'EMBARKED', 'MY', 'BALES', 'THEREIN'] +7018-75788-0015-2203: ref=['HAPLY', 'AMONGST', 'YOU', 'IS', 'ONE', 'RIGHTEOUS', 'WHOSE', 'PRAYERS', 'THE', 'LORD', 'WILL', 'ACCEPT'] +7018-75788-0015-2203: hyp=['HAPPILY', 'AMONGST', 'YOU', 'IS', 'ONE', 'RIGHTEOUS', 'WHOSE', 'PRAYERS', 'THE', 'LORD', 'WILL', 'ACCEPT'] +7018-75788-0016-2204: ref=['PRESENTLY', 'THE', 'SHIP', 'STRUCK', 'THE', 'MOUNTAIN', 'AND', 'BROKE', 'UP', 'AND', 'ALL', 'AND', 'EVERYTHING', 'ON', 'BOARD', 'OF', 'HER', 'WERE', 'PLUNGED', 'INTO', 'THE', 'SEA'] +7018-75788-0016-2204: hyp=['PRESENTLY', 'THE', 'SHIP', 'STRUCK', 'THE', 'MOUNTAIN', 'AND', 'BROKE', 'UP', 'AND', 'ALL', 'AND', 'EVERYTHING', 'ON', 'BOARD', 'OF', 'HER', 'WERE', 'PLUNGED', 'INTO', 'THE', 'SEA'] +7018-75788-0017-2205: ref=['BUT', 'IT', 'BURNETH', 'IN', 'THEIR', 'BELLIES', 'SO', 'THEY', 'CAST', 'IT', 'UP', 'AGAIN', 'AND', 'IT', 'CONGEALETH', 'ON', 'THE', 'SURFACE', 'OF', 'THE', 'WATER', 'WHEREBY', 'ITS', 'COLOR', 'AND', 'QUANTITIES', 'ARE', 'CHANGED', 'AND', 'AT', 'LAST', 'THE', 'WAVES', 'CAST', 'IT', 'ASHORE', 'AND', 'THE', 'TRAVELLERS', 'AND', 'MERCHANTS', 'WHO', 'KNOW', 'IT', 'COLLECT', 'IT', 'AND', 'SELL', 'IT'] +7018-75788-0017-2205: hyp=['BUT', 'IT', 'BURNETH', 'IN', 'THEIR', 'BELLIES', 'SO', 'THEY', 'CAST', 'IT', 'UP', 'AGAIN', 'AND', 'IT', 'CONGEALETH', 'ON', 'THE', 'SURFACE', 'OF', 'THE', 'WATER', 'WHEREBY', 'ITS', 'COLOUR', 'AND', 'QUANTITIES', 'ARE', 'CHANGED', 'AND', 'AT', 'LAST', 'THE', 'WAVES', 'CAST', 'IT', 'ASHORE', 'AND', 'THE', 'TRAVELLERS', 'AND', 'MERCHANTS', 'WHO', 'KNOW', 'IT', 'COLLECT', 'IT', 'AND', 'SELL', 'IT'] +7018-75788-0018-2206: ref=['EACH', 'THAT', 'DIED', 'WE', 'WASHED', 'AND', 'SHROUDED', 'IN', 'SOME', 'OF', 'THE', 'CLOTHES', 'AND', 'LINEN', 'CAST', 'ASHORE', 'BY', 'THE', 'TIDES', 'AND', 'AFTER', 'A', 'LITTLE', 'THE', 'REST', 'OF', 'MY', 'FELLOWS', 'PERISHED', 'ONE', 'BY', 'ONE', 'TILL', 'I', 'HAD', 'BURIED', 'THE', 'LAST', 'OF', 'THE', 'PARTY', 'AND', 'ABODE', 'ALONE', 'ON', 'THE', 'ISLAND', 'WITH', 'BUT', 'A', 'LITTLE', 'PROVISION', 'LEFT', 'I', 'WHO', 'WAS', 'WONT', 'TO', 'HAVE', 'SO', 'MUCH'] +7018-75788-0018-2206: hyp=['EACH', 'THAT', 'DIED', 'WE', 'WASHED', 'AND', 'SHROUDED', 'IN', 'SOME', 'OF', 'THE', 'CLOTHES', 'AND', 'LINEN', 'CAST', 'ASHORE', 'BY', 'THE', 'TIDES', 'AND', 'AFTER', 'A', 'LITTLE', 'THE', 'REST', 'OF', 'MY', 'FELLOWS', 'PERISHED', 'ONE', 'BY', 'ONE', 'TILL', 'I', 'HAD', 'BURIED', 'THE', 'LAST', 'OF', 'THE', 'PARTY', 'AND', 'ABODE', 'ALONE', 'ON', 'THE', 'ISLAND', 'WITH', 'BUT', 'A', 'LITTLE', 'PROVISION', 'LEFT', 'I', 'WHO', 'WAS', 'WONT', 'TO', 'HAVE', 'SO', 'MUCH'] +7018-75788-0019-2207: ref=['BUT', 'THERE', 'IS', 'MAJESTY', 'AND', 'THERE', 'IS', 'NO', 'MIGHT', 'SAVE', 'IN', 'ALLAH', 'THE', 'GLORIOUS', 'THE', 'GREAT'] +7018-75788-0019-2207: hyp=['BUT', 'THERE', 'IS', 'MAJESTY', 'AND', 'THERE', 'IS', 'NO', 'MIGHT', 'SAVE', 'IN', 'ALLAH', 'THE', 'GLORIOUS', 'THE', 'GREAT'] +7018-75789-0000-2208: ref=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'FIRST', 'NIGHT'] +7018-75789-0000-2208: hyp=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'FIRST', 'NIGHT'] +7018-75789-0001-2209: ref=['THEN', 'SIGHING', 'FOR', 'MYSELF', 'I', 'SET', 'TO', 'WORK', 'COLLECTING', 'A', 'NUMBER', 'OF', 'PIECES', 'OF', 'CHINESE', 'AND', 'COMORIN', 'ALOES', 'WOOD', 'AND', 'I', 'BOUND', 'THEM', 'TOGETHER', 'WITH', 'ROPES', 'FROM', 'THE', 'WRECKAGE', 'THEN', 'I', 'CHOSE', 'OUT', 'FROM', 'THE', 'BROKEN', 'UP', 'SHIPS', 'STRAIGHT', 'PLANKS', 'OF', 'EVEN', 'SIZE', 'AND', 'FIXED', 'THEM', 'FIRMLY', 'UPON', 'THE', 'ALOES', 'WOOD', 'MAKING', 'ME', 'A', 'BOAT', 'RAFT', 'A', 'LITTLE', 'NARROWER', 'THAN', 'THE', 'CHANNEL', 'OF', 'THE', 'STREAM', 'AND', 'I', 'TIED', 'IT', 'TIGHTLY', 'AND', 'FIRMLY', 'AS', 'THOUGH', 'IT', 'WERE', 'NAILED'] +7018-75789-0001-2209: hyp=['THEN', 'SEWING', 'FOR', 'MYSELF', 'I', 'SET', 'TO', 'WORK', 'COLLECTING', 'A', 'NUMBER', 'OF', 'PIECES', 'OF', 'CHINESE', 'AND', 'CORMERAN', 'ALOES', 'WOOD', 'AND', 'I', 'BOUND', 'THEM', 'TOGETHER', 'WITH', 'ROPES', 'FROM', 'THE', 'WRECKAGE', 'THEN', 'I', 'CHOSE', 'OUT', 'FROM', 'THE', 'BROKEN', 'UP', 'SHIP', 'STRAIGHT', 'PLANKS', 'OF', 'EVEN', 'SIZE', 'AND', 'FIXED', 'THEM', 'FIRMLY', 'UPON', 'THE', 'ALOES', 'WOOD', 'MAKING', 'ME', 'A', 'BOAT', 'RAFT', 'A', 'LITTLE', 'NARROWER', 'THAN', 'THE', 'CHANNEL', 'OF', 'THE', 'STREAM', 'AND', 'I', 'TIED', 'IT', 'TIGHTLY', 'AND', 'FIRMLY', 'AS', 'THOUGH', 'IT', 'WERE', 'NAILED'] +7018-75789-0002-2210: ref=['LAND', 'AFTER', 'LAND', 'SHALT', 'THOU', 'SEEK', 'AND', 'FIND', 'BUT', 'NO', 'OTHER', 'LIFE', 'ON', 'THY', 'WISH', 'SHALL', 'WAIT', 'FRET', 'NOT', 'THY', 'SOUL', 'IN', 'THY', 'THOUGHTS', 'O', 'NIGHT', 'ALL', 'WOES', 'SHALL', 'END', 'OR', 'SOONER', 'OR', 'LATE'] +7018-75789-0002-2210: hyp=['LAND', 'AFTER', 'LAND', 'SHALT', 'THOU', 'SEE', 'CONFINED', 'BUT', 'NO', 'OTHER', 'LIFE', 'ON', 'THY', 'WISH', 'SHALL', 'WAIT', 'FRET', 'NOT', 'THY', 'SOUL', 'AND', 'THY', 'THOUGHTS', 'O', 'NIGHT', 'ALL', 'WOES', 'SHALL', 'END', 'OR', 'SOONER', 'OR', 'LATE'] +7018-75789-0003-2211: ref=['I', 'ROWED', 'MY', 'CONVEYANCE', 'INTO', 'THE', 'PLACE', 'WHICH', 'WAS', 'INTENSELY', 'DARK', 'AND', 'THE', 'CURRENT', 'CARRIED', 'THE', 'RAFT', 'WITH', 'IT', 'DOWN', 'THE', 'UNDERGROUND', 'CHANNEL'] +7018-75789-0003-2211: hyp=['I', 'ROWED', 'MY', 'CONVEYANCE', 'INTO', 'THE', 'PLACE', 'WHICH', 'WAS', 'INTENSELY', 'DARK', 'AND', 'THE', 'CURRENT', 'CARRIED', 'THE', 'RAFT', 'WITH', 'IT', 'DOWN', 'THE', 'UNDERGROUND', 'CHANNEL'] +7018-75789-0004-2212: ref=['AND', 'I', 'THREW', 'MYSELF', 'DOWN', 'UPON', 'MY', 'FACE', 'ON', 'THE', 'RAFT', 'BY', 'REASON', 'OF', 'THE', 'NARROWNESS', 'OF', 'THE', 'CHANNEL', 'WHILST', 'THE', 'STREAM', 'CEASED', 'NOT', 'TO', 'CARRY', 'ME', 'ALONG', 'KNOWING', 'NOT', 'NIGHT', 'FROM', 'DAY', 'FOR', 'THE', 'EXCESS', 'OF', 'THE', 'GLOOM', 'WHICH', 'ENCOMPASSED', 'ME', 'ABOUT', 'AND', 'MY', 'TERROR', 'AND', 'CONCERN', 'FOR', 'MYSELF', 'LEST', 'I', 'SHOULD', 'PERISH'] +7018-75789-0004-2212: hyp=['AND', 'I', 'THREW', 'MYSELF', 'DOWN', 'UPON', 'MY', 'FACE', 'ON', 'THE', 'RAFT', 'BY', 'REASON', 'OF', 'THE', 'NARROWNESS', 'OF', 'THE', 'CHANNEL', 'WHILST', 'THE', 'STREAM', 'CEASED', 'NOT', 'TO', 'CARRY', 'ME', 'ALONG', 'KNOWING', 'NOT', 'NIGHT', 'FROM', 'DAY', 'FOR', 'THE', 'EXCESS', 'OF', 'THE', 'GLOOM', 'WHICH', 'ENCOMPASSED', 'ME', 'ABOUT', 'IN', 'MY', 'TERROR', 'AND', 'CONCERN', 'FOR', 'MYSELF', 'LEST', 'I', 'SHOULD', 'PERISH'] +7018-75789-0005-2213: ref=['WHEN', 'I', 'AWOKE', 'AT', 'LAST', 'I', 'FOUND', 'MYSELF', 'IN', 'THE', 'LIGHT', 'OF', 'HEAVEN', 'AND', 'OPENING', 'MY', 'EYES', 'I', 'SAW', 'MYSELF', 'IN', 'A', 'BROAD', 'STREAM', 'AND', 'THE', 'RAFT', 'MOORED', 'TO', 'AN', 'ISLAND', 'IN', 'THE', 'MIDST', 'OF', 'A', 'NUMBER', 'OF', 'INDIANS', 'AND', 'ABYSSINIANS'] +7018-75789-0005-2213: hyp=['WHEN', 'I', 'AWOKE', 'AT', 'LAST', 'I', 'FOUND', 'MYSELF', 'IN', 'THE', 'LIGHT', 'OF', 'HEAVEN', 'AND', 'OPENING', 'MY', 'EYES', 'I', 'SAW', 'MYSELF', 'IN', 'A', 'BROAD', 'STREAM', 'AND', 'THE', 'RAFT', 'MOORED', 'TO', 'AN', 'ISLAND', 'IN', 'THE', 'MIDST', 'OF', 'A', 'NUMBER', 'OF', 'INDIANS', 'AND', 'ABYSSINIANS'] +7018-75789-0006-2214: ref=['BUT', 'I', 'WAS', 'DELIGHTED', 'AT', 'MY', 'ESCAPE', 'FROM', 'THE', 'RIVER'] +7018-75789-0006-2214: hyp=['BUT', 'I', 'WAS', 'DELIGHTED', 'AT', 'MY', 'ESCAPE', 'FROM', 'THE', 'RIVER'] +7018-75789-0007-2215: ref=['WHEN', 'THEY', 'SAW', 'I', 'UNDERSTOOD', 'THEM', 'NOT', 'AND', 'MADE', 'THEM', 'NO', 'ANSWER', 'ONE', 'OF', 'THEM', 'CAME', 'FORWARD', 'AND', 'SAID', 'TO', 'ME', 'IN', 'ARABIC', 'PEACE', 'BE', 'WITH', 'THEE', 'O', 'MY', 'BROTHER'] +7018-75789-0007-2215: hyp=['WHEN', 'THEY', 'SAW', 'I', 'UNDERSTOOD', 'THEM', 'NOT', 'AND', 'MADE', 'THEM', 'NO', 'ANSWER', 'ONE', 'OF', 'THEM', 'CAME', 'FORWARD', 'AND', 'SAID', 'TO', 'ME', 'IN', 'ARABIC', 'PEACE', 'BE', 'WITH', 'THEE', 'O', 'MY', 'BROTHER'] +7018-75789-0008-2216: ref=['O', 'MY', 'BROTHER', 'ANSWERED', 'HE', 'WE', 'ARE', 'HUSBANDMEN', 'AND', 'TILLERS', 'OF', 'THE', 'SOIL', 'WHO', 'CAME', 'OUT', 'TO', 'WATER', 'OUR', 'FIELDS', 'AND', 'PLANTATIONS', 'AND', 'FINDING', 'THEE', 'ASLEEP', 'ON', 'THIS', 'RAFT', 'LAID', 'HOLD', 'OF', 'IT', 'AND', 'MADE', 'IT', 'FAST', 'BY', 'US', 'AGAINST', 'THOU', 'SHOULDST', 'AWAKE', 'AT', 'THY', 'LEISURE'] +7018-75789-0008-2216: hyp=['O', 'MY', 'BROTHER', 'ANSWERED', 'HE', 'WE', 'ARE', 'HUSBANDMEN', 'AND', 'TILLERS', 'OF', 'THE', 'SOIL', 'WHO', 'CAME', 'OUT', 'TO', 'WATER', 'OUR', 'FIELDS', 'AND', 'PLANTATIONS', 'AND', 'FINDING', 'THEE', 'ASLEEP', 'ON', 'THIS', 'RAFT', 'LAID', 'HOLD', 'OF', 'IT', 'AND', 'MADE', 'IT', 'FAST', 'BY', 'US', 'AGAINST', 'THOU', 'SHOULDST', 'AWAKE', 'AT', 'THY', 'LEISURE'] +7018-75789-0009-2217: ref=['I', 'ANSWERED', 'FOR', "ALLAH'S", 'SAKE', 'O', 'MY', 'LORD', 'ERE', 'I', 'SPEAK', 'GIVE', 'ME', 'SOMEWHAT', 'TO', 'EAT', 'FOR', 'I', 'AM', 'STARVING', 'AND', 'AFTER', 'ASK', 'ME', 'WHAT', 'THOU', 'WILT'] +7018-75789-0009-2217: hyp=['I', 'ANSWERED', 'FOR', "ALLAH'S", 'SAKE', 'O', 'MY', 'LORD', 'ERE', 'I', 'SPEAK', 'GIVE', 'ME', 'SOMEWHAT', 'TO', 'EAT', 'FOR', 'I', 'AM', 'STARVING', 'AND', 'AFTER', 'ASK', 'ME', 'WHAT', 'THOU', 'WILT'] +7018-75789-0010-2218: ref=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'SECOND', 'NIGHT'] +7018-75789-0010-2218: hyp=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'SECOND', 'NIGHT'] +7018-75789-0011-2219: ref=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'SINDBAD', 'THE', 'SEAMAN', 'CONTINUED', 'WHEN', 'I', 'LANDED', 'AND', 'FOUND', 'MYSELF', 'AMONGST', 'THE', 'INDIANS', 'AND', 'ABYSSINIANS', 'AND', 'HAD', 'TAKEN', 'SOME', 'REST', 'THEY', 'CONSULTED', 'AMONG', 'THEMSELVES', 'AND', 'SAID', 'TO', 'ONE', 'ANOTHER', 'THERE', 'IS', 'NO', 'HELP', 'FOR', 'IT', 'BUT', 'WE', 'CARRY', 'HIM', 'WITH', 'US', 'AND', 'PRESENT', 'HIM', 'TO', 'OUR', 'KING', 'THAT', 'HE', 'MAY', 'ACQUAINT', 'HIM', 'WITH', 'HIS', 'ADVENTURES'] +7018-75789-0011-2219: hyp=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'SINDBAD', 'THE', 'SEAMAN', 'CONTINUED', 'WHEN', 'I', 'LANDED', 'AND', 'FOUND', 'MYSELF', 'AMONGST', 'THE', 'INDIANS', 'AND', 'ABYSSINIANS', 'AND', 'HAD', 'TAKEN', 'SOME', 'REST', 'THEY', 'CONSULTED', 'AMONG', 'THEMSELVES', 'AND', 'SAID', 'TO', 'ONE', 'ANOTHER', 'THERE', 'IS', 'NO', 'HELP', 'FOR', 'IT', 'BUT', 'WE', 'CARRY', 'HIM', 'WITH', 'US', 'AND', 'PRESENT', 'HIM', 'TO', 'OUR', 'KING', 'THAT', 'HE', 'MAY', 'ACQUAINT', 'HIM', 'WITH', 'HIS', 'ADVENTURES'] +7018-75789-0012-2220: ref=['SO', 'I', 'CONSORTED', 'WITH', 'THE', 'CHIEF', 'OF', 'THE', 'ISLANDERS', 'AND', 'THEY', 'PAID', 'ME', 'THE', 'UTMOST', 'RESPECT'] +7018-75789-0012-2220: hyp=['SO', 'I', 'CONSORTED', 'WITH', 'THE', 'CHIEF', 'OF', 'THE', 'ISLANDERS', 'AND', 'THEY', 'PAID', 'ME', 'THE', 'UTMOST', 'RESPECT'] +7018-75789-0013-2221: ref=['SO', 'I', 'ROSE', 'WITHOUT', 'STAY', 'OR', 'DELAY', 'AND', 'KISSED', 'THE', "KING'S", 'HAND', 'AND', 'ACQUAINTED', 'HIM', 'WITH', 'MY', 'LONGING', 'TO', 'SET', 'OUT', 'WITH', 'THE', 'MERCHANTS', 'FOR', 'THAT', 'I', 'PINED', 'AFTER', 'MY', 'PEOPLE', 'AND', 'MINE', 'OWN', 'LAND'] +7018-75789-0013-2221: hyp=['SO', 'I', 'ROSE', 'WITHOUT', 'STAY', 'OR', 'DELAY', 'AND', 'KISSED', 'THE', "KING'S", 'HAND', 'AND', 'ACQUAINTED', 'HIM', 'WITH', 'MY', 'LONGING', 'TO', 'SET', 'OUT', 'WITH', 'THE', 'MERCHANTS', 'FOR', 'THAT', 'I', 'PINED', 'AFTER', 'MY', 'PEOPLE', 'AND', 'MINE', 'OWN', 'LAND'] +7018-75789-0014-2222: ref=['QUOTH', 'HE', 'THOU', 'ART', 'THINE', 'OWN', 'MASTER', 'YET', 'IF', 'IT', 'BE', 'THY', 'WILL', 'TO', 'ABIDE', 'WITH', 'US', 'ON', 'OUR', 'HEAD', 'AND', 'EYES', 'BE', 'IT', 'FOR', 'THOU', 'GLADDENEST', 'US', 'WITH', 'THY', 'COMPANY'] +7018-75789-0014-2222: hyp=['QUOTH', 'HE', 'THOU', 'ART', 'THINE', 'OWN', 'MASTER', 'YET', 'IF', 'IT', 'BE', 'THY', 'WILL', 'TO', 'ABIDE', 'WITH', 'US', 'ON', 'OUR', 'HEAD', 'AND', 'EYES', 'BE', 'IT', 'FOR', 'THOU', 'GLADDENEST', 'US', 'WITH', 'THY', 'COMPANY'] +7018-75789-0015-2223: ref=['BY', 'ALLAH', 'O', 'MY', 'LORD', 'ANSWERED', 'I', 'THOU', 'HAST', 'INDEED', 'OVERWHELMED', 'ME', 'WITH', 'THY', 'FAVOURS', 'AND', 'WELL', 'DOINGS', 'BUT', 'I', 'WEARY', 'FOR', 'A', 'SIGHT', 'OF', 'MY', 'FRIENDS', 'AND', 'FAMILY', 'AND', 'NATIVE', 'COUNTRY'] +7018-75789-0015-2223: hyp=['BY', 'ALLAH', 'O', 'MY', 'LORD', 'ANSWERED', 'I', 'THOU', 'HAST', 'INDEED', 'OVERWHELMED', 'ME', 'WITH', 'THY', 'FAVOURS', 'AND', 'WELL', 'DOINGS', 'BUT', 'I', 'WEARY', 'FOR', 'A', 'SIGHT', 'OF', 'MY', 'FRIENDS', 'AND', 'FAMILY', 'AND', 'NATIVE', 'COUNTRY'] +7018-75789-0016-2224: ref=['THEN', 'I', 'TOOK', 'LEAVE', 'OF', 'HIM', 'AND', 'OF', 'ALL', 'MY', 'INTIMATES', 'AND', 'ACQUAINTANCES', 'IN', 'THE', 'ISLAND', 'AND', 'EMBARKED', 'WITH', 'THE', 'MERCHANTS', 'AFORESAID'] +7018-75789-0016-2224: hyp=['THEN', 'I', 'TOOK', 'LEAVE', 'OF', 'HIM', 'AND', 'OF', 'ALL', 'MY', 'INTIMATES', 'AND', 'ACQUAINTANCES', 'IN', 'THE', 'ISLAND', 'AND', 'EMBARKED', 'WITH', 'THE', 'MERCHANTS', 'AFORESAID'] +7018-75789-0017-2225: ref=['HE', 'ASKED', 'ME', 'WHENCE', 'THEY', 'CAME', 'AND', 'I', 'SAID', 'TO', 'HIM', 'BY', 'ALLAH', 'O', 'COMMANDER', 'OF', 'THE', 'FAITHFUL', 'I', 'KNOW', 'NOT', 'THE', 'NAME', 'OF', 'THE', 'CITY', 'NOR', 'THE', 'WAY', 'THITHER'] +7018-75789-0017-2225: hyp=['HE', 'ASKED', 'ME', 'WHENCE', 'THEY', 'CAME', 'AND', 'I', 'SAID', 'TO', 'HIM', 'BY', 'ALLAH', 'O', 'COMMANDER', 'OF', 'THE', 'FAITHFUL', 'I', 'KNOW', 'NOT', 'THE', 'NAME', 'OF', 'THE', 'CITY', 'NOR', 'THE', 'WAY', 'THITHER'] +7018-75789-0018-2226: ref=['FOR', 'STATE', 'PROCESSIONS', 'A', 'THRONE', 'IS', 'SET', 'FOR', 'HIM', 'UPON', 'A', 'HUGE', 'ELEPHANT', 'ELEVEN', 'CUBITS', 'HIGH', 'AND', 'UPON', 'THIS', 'HE', 'SITTETH', 'HAVING', 'HIS', 'GREAT', 'LORDS', 'AND', 'OFFICERS', 'AND', 'GUESTS', 'STANDING', 'IN', 'TWO', 'RANKS', 'ON', 'HIS', 'RIGHT', 'HAND', 'AND', 'ON', 'HIS', 'LEFT'] +7018-75789-0018-2226: hyp=['FOR', 'STATE', 'PROCESSIONS', 'A', 'THRONE', 'IS', 'SET', 'FOR', 'HIM', 'UPON', 'A', 'HUGE', 'ELEPHANT', 'ELEVEN', 'CUBITS', 'HIGH', 'AND', 'UPON', 'THIS', 'HE', 'SITTETH', 'HAVING', 'HIS', 'GREAT', 'LORDS', 'AND', 'OFFICERS', 'AND', 'GUESTS', 'STANDING', 'IN', 'TWO', 'RANKS', 'ON', 'HIS', 'RIGHT', 'HAND', 'AND', 'ON', 'HIS', 'LEFT'] +7018-75789-0019-2227: ref=['HIS', 'LETTER', 'HATH', 'SHOWN', 'ME', 'THIS', 'AND', 'AS', 'FOR', 'THE', 'MIGHTINESS', 'OF', 'HIS', 'DOMINION', 'THOU', 'HAST', 'TOLD', 'US', 'WHAT', 'THOU', 'HAST', 'EYE', 'WITNESSED'] +7018-75789-0019-2227: hyp=['HIS', 'LETTER', 'HATH', 'SHOWN', 'ME', 'THIS', 'AND', 'AS', 'FOR', 'THE', 'MIGHTINESS', 'OF', 'HIS', 'DOMINION', 'THOU', 'HAST', 'TOLD', 'US', 'WHAT', 'THOU', 'HAST', 'EYE', 'WITNESSED'] +7018-75789-0020-2228: ref=['PRESENTLY', 'MY', 'FRIENDS', 'CAME', 'TO', 'ME', 'AND', 'I', 'DISTRIBUTED', 'PRESENTS', 'AMONG', 'MY', 'FAMILY', 'AND', 'GAVE', 'ALMS', 'AND', 'LARGESSE', 'AFTER', 'WHICH', 'I', 'YIELDED', 'MYSELF', 'TO', 'JOYANCE', 'AND', 'ENJOYMENT', 'MIRTH', 'AND', 'MERRY', 'MAKING', 'AND', 'FORGOT', 'ALL', 'THAT', 'I', 'HAD', 'SUFFERED'] +7018-75789-0020-2228: hyp=['PRESENTLY', 'MY', 'FRIENDS', 'CAME', 'TO', 'ME', 'AND', 'I', 'DISTRIBUTED', 'PRESENTS', 'AMONG', 'MY', 'FAMILY', 'AND', 'GAVE', 'ALMS', 'AND', 'LARGESSE', 'AFTER', 'WHICH', 'I', 'YIELDED', 'MYSELF', 'TO', 'JOYANCE', 'AND', 'ENJOYMENT', 'MIRTH', 'AND', 'MERRYMAKING', 'AND', 'FORGOT', 'ALL', 'THAT', 'I', 'HAD', 'SUFFERED'] +7018-75789-0021-2229: ref=['SUCH', 'THEN', 'O', 'MY', 'BROTHERS', 'IS', 'THE', 'HISTORY', 'OF', 'WHAT', 'BEFEL', 'ME', 'IN', 'MY', 'SIXTH', 'VOYAGE', 'AND', 'TO', 'MORROW', 'INSHALLAH'] +7018-75789-0021-2229: hyp=['SUCH', 'THEN', 'O', 'MY', 'BROTHERS', 'IS', 'THE', 'HISTORY', 'OF', 'WHAT', 'BEFELL', 'ME', 'IN', 'MY', 'SIXTH', 'VOYAGE', 'AND', 'TO', 'MORROW', 'INSHALLAH'] +7018-75789-0022-2230: ref=['I', 'WILL', 'TELL', 'YOU', 'THE', 'STORY', 'OF', 'MY', 'SEVENTH', 'AND', 'LAST', 'VOYAGE', 'WHICH', 'IS', 'STILL', 'MORE', 'WONDROUS', 'AND', 'MARVELLOUS', 'THAN', 'THAT', 'OF', 'THE', 'FIRST', 'SIX'] +7018-75789-0022-2230: hyp=['I', 'WILL', 'TELL', 'YOU', 'THE', 'STORY', 'OF', 'MY', 'SEVENTH', 'AND', 'LAST', 'VOYAGE', 'WHICH', 'IS', 'STILL', 'MORE', 'WONDROUS', 'AND', 'MARVELOUS', 'THAN', 'THAT', 'OF', 'THE', 'FIRST', 'SIX'] +7018-75789-0023-2231: ref=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'THIRD', 'NIGHT'] +7018-75789-0023-2231: hyp=['WHEN', 'IT', 'WAS', 'THE', 'FIVE', 'HUNDRED', 'AND', 'SIXTY', 'THIRD', 'NIGHT'] +7018-75789-0024-2232: ref=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'WHEN', 'SINDBAD', 'THE', 'SEAMAN', 'HAD', 'RELATED', 'THE', 'HISTORY', 'OF', 'WHAT', 'BEFEL', 'HIM', 'IN', 'HIS', 'SIXTH', 'VOYAGE', 'AND', 'ALL', 'THE', 'COMPANY', 'HAD', 'DISPERSED', 'SINDBAD', 'THE', 'LANDSMAN', 'WENT', 'HOME', 'AND', 'SLEPT', 'AS', 'OF', 'WONT'] +7018-75789-0024-2232: hyp=['SHE', 'SAID', 'IT', 'HATH', 'REACHED', 'ME', 'O', 'AUSPICIOUS', 'KING', 'THAT', 'WHEN', 'SINDBAD', 'THE', 'SEAMAN', 'HAD', 'RELATED', 'THE', 'HISTORY', 'OF', 'WHAT', 'BEFELL', 'HIM', 'IN', 'HIS', 'SIXTH', 'VOYAGE', 'AND', 'ALL', 'THE', 'COMPANY', 'HAD', 'DISPERSED', 'SINDBAD', 'THE', 'LANDSMAN', 'WENT', 'HOME', 'AND', 'SLEPT', 'AS', 'OF', 'WONT'] +7018-75789-0025-2233: ref=['THE', 'SEVENTH', 'VOYAGE', 'OF', 'SINDBAD', 'THE', 'SEAMAN'] +7018-75789-0025-2233: hyp=['THE', 'SEVENTH', 'VOYAGE', 'OF', 'SINBAD', 'THE', 'SEAMAN'] +7018-75789-0026-2234: ref=['KNOW', 'O', 'COMPANY', 'THAT', 'AFTER', 'MY', 'RETURN', 'FROM', 'MY', 'SIXTH', 'VOYAGE', 'WHICH', 'BROUGHT', 'ME', 'ABUNDANT', 'PROFIT', 'I', 'RESUMED', 'MY', 'FORMER', 'LIFE', 'IN', 'ALL', 'POSSIBLE', 'JOYANCE', 'AND', 'ENJOYMENT', 'AND', 'MIRTH', 'AND', 'MAKING', 'MERRY', 'DAY', 'AND', 'NIGHT', 'AND', 'I', 'TARRIED', 'SOME', 'TIME', 'IN', 'THIS', 'SOLACE', 'AND', 'SATISFACTION', 'TILL', 'MY', 'SOUL', 'BEGAN', 'ONCE', 'MORE', 'TO', 'LONG', 'TO', 'SAIL', 'THE', 'SEAS', 'AND', 'SEE', 'FOREIGN', 'COUNTRIES', 'AND', 'COMPANY', 'WITH', 'MERCHANTS', 'AND', 'HEAR', 'NEW', 'THINGS'] +7018-75789-0026-2234: hyp=['KNOW', 'O', 'COMPANY', 'THAT', 'AFTER', 'MY', 'RETURN', 'FROM', 'MY', 'SIXTH', 'VOYAGE', 'WHICH', 'BROUGHT', 'ME', 'ABUNDANT', 'PROFIT', 'I', 'RESUMED', 'MY', 'FORMER', 'LIFE', 'IN', 'ALL', 'POSSIBLE', 'JOYANCE', 'AND', 'ENJOYMENT', 'AND', 'MIRTH', 'AND', 'MAKING', 'MERRY', 'DAY', 'AND', 'NIGHT', 'AND', 'I', 'TARRIED', 'SOME', 'TIME', 'IN', 'THIS', 'SOLACE', 'AND', 'SATISFACTION', 'TILL', 'MY', 'SOUL', 'BEGAN', 'ONCE', 'MORE', 'TO', 'LONG', 'TO', 'SAIL', 'THE', 'SEAS', 'AND', 'SEE', 'FOREIGN', 'COUNTRIES', 'AND', 'COMPANY', 'WITH', 'MERCHANTS', 'AND', 'HEAR', 'NEW', 'THINGS'] +7018-75789-0027-2235: ref=['SO', 'HAVING', 'MADE', 'UP', 'MY', 'MIND', 'I', 'PACKED', 'UP', 'IN', 'BALES', 'A', 'QUANTITY', 'OF', 'PRECIOUS', 'STUFFS', 'SUITED', 'FOR', 'SEA', 'TRADE', 'AND', 'REPAIRED', 'WITH', 'THEM', 'FROM', 'BAGHDAD', 'CITY', 'TO', 'BASSORAH', 'TOWN', 'WHERE', 'I', 'FOUND', 'A', 'SHIP', 'READY', 'FOR', 'SEA', 'AND', 'IN', 'HER', 'A', 'COMPANY', 'OF', 'CONSIDERABLE', 'MERCHANTS'] +7018-75789-0027-2235: hyp=['SO', 'HAVING', 'MADE', 'UP', 'MY', 'MIND', 'I', 'PACKED', 'UP', 'IN', 'BALES', 'A', 'QUANTITY', 'OF', 'PRECIOUS', 'STUFFS', 'SUITED', 'FOR', 'SEA', 'TRADE', 'AND', 'REPAIRED', 'WITH', 'THEM', 'FROM', 'BAGHDAD', 'CITY', 'TO', 'BASSORAH', 'TOWN', 'WHERE', 'I', 'FOUND', 'A', 'SHIP', 'READY', 'FOR', 'SEA', 'AND', 'IN', 'HER', 'A', 'COMPANY', 'OF', 'CONSIDERABLE', 'MERCHANTS'] +7018-75789-0028-2236: ref=['BUT', 'THE', 'CAPTAIN', 'AROSE', 'AND', 'TIGHTENING', 'HIS', 'GIRDLE', 'TUCKED', 'UP', 'HIS', 'SKIRTS', 'AND', 'AFTER', 'TAKING', 'REFUGE', 'WITH', 'ALLAH', 'FROM', 'SATAN', 'THE', 'STONED', 'CLOMB', 'TO', 'THE', 'MAST', 'HEAD', 'WHENCE', 'HE', 'LOOKED', 'OUT', 'RIGHT', 'AND', 'LEFT', 'AND', 'GAZING', 'AT', 'THE', 'PASSENGERS', 'AND', 'CREW', 'FELL', 'TO', 'BUFFETING', 'HIS', 'FACE', 'AND', 'PLUCKING', 'OUT', 'HIS', 'BEARD'] +7018-75789-0028-2236: hyp=['BUT', 'THE', 'CAPTAIN', 'AROSE', 'AND', 'TIGHTENING', 'HIS', 'GIRDLE', 'TUCKED', 'UP', 'HIS', 'SKIRTS', 'AND', 'AFTER', 'TAKING', 'REFUGE', 'WITH', 'ALLAH', 'FROM', 'SATAN', 'THE', 'STONED', 'CLIMBED', 'TO', 'THE', 'MAST', 'HEAD', 'WHENCE', 'HE', 'LOOKED', 'OUT', 'RIGHT', 'AND', 'LEFT', 'AND', 'GAZING', 'AT', 'THE', 'PASSENGERS', 'AND', 'CREW', 'FELL', 'TO', 'BUFFETING', 'HIS', 'FACE', 'AND', 'PLUCKING', 'OUT', 'HIS', 'BEARD'] +7018-75789-0029-2237: ref=['THIS', 'HE', 'SET', 'IN', 'A', 'SAUCER', 'WETTED', 'WITH', 'A', 'LITTLE', 'WATER', 'AND', 'AFTER', 'WAITING', 'A', 'SHORT', 'TIME', 'SMELT', 'AND', 'TASTED', 'IT', 'AND', 'THEN', 'HE', 'TOOK', 'OUT', 'OF', 'THE', 'CHEST', 'A', 'BOOKLET', 'WHEREIN', 'HE', 'READ', 'AWHILE', 'AND', 'SAID', 'WEEPING', 'KNOW', 'O', 'YE', 'PASSENGERS', 'THAT', 'IN', 'THIS', 'BOOK', 'IS', 'A', 'MARVELLOUS', 'MATTER', 'DENOTING', 'THAT', 'WHOSO', 'COMETH', 'HITHER', 'SHALL', 'SURELY', 'DIE', 'WITHOUT', 'HOPE', 'OF', 'ESCAPE', 'FOR', 'THAT', 'THIS', 'OCEAN', 'IS', 'CALLED', 'THE', 'SEA', 'OF', 'THE', 'CLIME', 'OF', 'THE', 'KING', 'WHEREIN', 'IS', 'THE', 'SEPULCHRE', 'OF', 'OUR', 'LORD', 'SOLOMON', 'SON', 'OF', 'DAVID', 'ON', 'BOTH', 'BE', 'PEACE'] +7018-75789-0029-2237: hyp=['THIS', 'HE', 'SET', 'IN', 'A', 'SAUCER', 'WETTED', 'WITH', 'A', 'LITTLE', 'WATER', 'AND', 'AFTER', 'WAITING', 'A', 'SHORT', 'TIME', 'SMELT', 'AND', 'TASTED', 'IT', 'AND', 'THEN', 'HE', 'TOOK', 'OUT', 'OF', 'THE', 'CHEST', 'A', 'BOOKLET', 'WHEREIN', 'HE', 'READ', 'A', 'WHILE', 'AND', 'SAID', 'WEEPING', 'KNOW', 'O', 'YE', 'PASSENGERS', 'THAT', 'IN', 'THIS', 'BOOK', 'IS', 'A', 'MARVELLOUS', 'MATTER', 'DENOTING', 'THAT', 'WHOSO', 'COMETH', 'HITHER', 'SHALL', 'SURELY', 'DIE', 'WITHOUT', 'HOPE', 'OF', 'ESCAPE', 'FOR', 'THAT', 'THIS', 'OCEAN', 'IS', 'CALLED', 'THE', 'SEA', 'OF', 'THE', 'CLIME', 'OF', 'THE', 'KING', 'WHEREIN', 'IS', 'A', 'SEPULCHRE', 'OF', 'OUR', 'LORD', 'SOLOMON', 'SON', 'OF', 'DAVID', 'ON', 'BOTH', 'BE', 'PEACE'] +7018-75789-0030-2238: ref=['A', 'SECOND', 'FISH', 'MADE', 'ITS', 'APPEARANCE', 'THAN', 'WHICH', 'WE', 'HAD', 'SEEN', 'NAUGHT', 'MORE', 'MONSTROUS'] +7018-75789-0030-2238: hyp=['A', 'SECOND', 'FISH', 'MADE', 'ITS', 'APPEARANCE', 'AND', 'WHICH', 'WE', 'HAD', 'SEEN', 'NOUGHT', 'MORE', 'MONSTROUS'] +7018-75789-0031-2239: ref=['WHEN', 'SUDDENLY', 'A', 'VIOLENT', 'SQUALL', 'OF', 'WIND', 'AROSE', 'AND', 'SMOTE', 'THE', 'SHIP', 'WHICH', 'ROSE', 'OUT', 'OF', 'THE', 'WATER', 'AND', 'SETTLED', 'UPON', 'A', 'GREAT', 'REEF', 'THE', 'HAUNT', 'OF', 'SEA', 'MONSTERS', 'WHERE', 'IT', 'BROKE', 'UP', 'AND', 'FELL', 'ASUNDER', 'INTO', 'PLANKS', 'AND', 'ALL', 'AND', 'EVERYTHING', 'ON', 'BOARD', 'WERE', 'PLUNGED', 'INTO', 'THE', 'SEA'] +7018-75789-0031-2239: hyp=['WHEN', 'SUDDENLY', 'A', 'VIOLENT', 'SQUALL', 'OF', 'WIND', 'AROSE', 'AND', 'SMOTE', 'THE', 'SHIP', 'WHICH', 'ROSE', 'OUT', 'OF', 'THE', 'WATER', 'AND', 'SETTLED', 'UPON', 'A', 'GREAT', 'REEF', 'THE', 'HAUNT', 'OF', 'SEA', 'MONSTERS', 'WHERE', 'IT', 'BROKE', 'UP', 'AND', 'FELL', 'ASUNDER', 'INTO', 'PLANKS', 'AND', 'ALL', 'AND', 'EVERYTHING', 'ON', 'BOARD', 'WERE', 'PLUNGED', 'INTO', 'THE', 'SEA'] +7105-2330-0000-2240: ref=['UNFORTUNATELY', 'THERE', 'COULD', 'BE', 'NO', 'DOUBT', 'OR', 'MISCONCEPTION', 'AS', 'TO', "PLATTERBAFF'S", 'GUILT'] +7105-2330-0000-2240: hyp=['UNFORTUNATELY', 'THERE', 'COULD', 'BE', 'NO', 'DOUBT', 'OR', 'MISCONCEPTION', 'AS', 'TO', "PLATTERBAFF'S", 'GUILT'] +7105-2330-0001-2241: ref=['HE', 'HAD', 'NOT', 'ONLY', 'PLEADED', 'GUILTY', 'BUT', 'HAD', 'EXPRESSED', 'HIS', 'INTENTION', 'OF', 'REPEATING', 'HIS', 'ESCAPADE', 'IN', 'OTHER', 'DIRECTIONS', 'AS', 'SOON', 'AS', 'CIRCUMSTANCES', 'PERMITTED', 'THROUGHOUT', 'THE', 'TRIAL', 'HE', 'WAS', 'BUSY', 'EXAMINING', 'A', 'SMALL', 'MODEL', 'OF', 'THE', 'FREE', 'TRADE', 'HALL', 'IN', 'MANCHESTER'] +7105-2330-0001-2241: hyp=['HE', 'HAD', 'NOT', 'ONLY', 'PLEADED', 'GUILTY', 'BUT', 'HAD', 'EXPRESSED', 'HIS', 'INTENTION', 'OF', 'REPEATING', 'HIS', 'ESCAPADE', 'IN', 'OTHER', 'DIRECTIONS', 'AS', 'SOON', 'AS', 'CIRCUMSTANCES', 'PERMITTED', 'THROUGHOUT', 'THE', 'TRIAL', 'HE', 'WAS', 'BUSY', 'EXAMINING', 'A', 'SMALL', 'MODEL', 'OF', 'THE', 'FREE', 'TRADE', 'HALL', 'IN', 'MANCHESTER'] +7105-2330-0002-2242: ref=['THE', 'JURY', 'COULD', 'NOT', 'POSSIBLY', 'FIND', 'THAT', 'THE', 'PRISONER', 'HAD', 'NOT', 'DELIBERATELY', 'AND', 'INTENTIONALLY', 'BLOWN', 'UP', 'THE', 'ALBERT', 'HALL', 'THE', 'QUESTION', 'WAS', 'COULD', 'THEY', 'FIND', 'ANY', 'EXTENUATING', 'CIRCUMSTANCES', 'WHICH', 'WOULD', 'PERMIT', 'OF', 'AN', 'ACQUITTAL'] +7105-2330-0002-2242: hyp=['THE', 'JURY', 'COULD', 'NOT', 'POSSIBLY', 'FIND', 'THAT', 'THE', 'PRISONER', 'HAD', 'NOT', 'DELIBERATELY', 'AND', 'INTENTIONALLY', 'BLOWN', 'UP', 'THE', 'ALBERT', 'HALL', 'THE', 'QUESTION', 'WAS', 'COULD', 'THEY', 'FIND', 'ANY', 'EXTENUATING', 'CIRCUMSTANCES', 'WHICH', 'WOULD', 'PERMIT', 'OF', 'AN', 'ACQUITTAL'] +7105-2330-0003-2243: ref=['OF', 'COURSE', 'ANY', 'SENTENCE', 'WHICH', 'THE', 'LAW', 'MIGHT', 'FEEL', 'COMPELLED', 'TO', 'INFLICT', 'WOULD', 'BE', 'FOLLOWED', 'BY', 'AN', 'IMMEDIATE', 'PARDON', 'BUT', 'IT', 'WAS', 'HIGHLY', 'DESIRABLE', 'FROM', 'THE', "GOVERNMENT'S", 'POINT', 'OF', 'VIEW', 'THAT', 'THE', 'NECESSITY', 'FOR', 'SUCH', 'AN', 'EXERCISE', 'OF', 'CLEMENCY', 'SHOULD', 'NOT', 'ARISE'] +7105-2330-0003-2243: hyp=['OF', 'COURSE', 'ANY', 'SENTENCE', 'WHICH', 'THE', 'LAW', 'MIGHT', 'FEEL', 'COMPELLED', 'TO', 'INFLICT', 'WOULD', 'BE', 'FOLLOWED', 'BY', 'AN', 'IMMEDIATE', 'PARDON', 'BUT', 'IT', 'WAS', 'HIGHLY', 'DESIRABLE', 'FROM', 'THE', "GOVERNMENT'S", 'POINT', 'OF', 'VIEW', 'THAT', 'THE', 'NECESSITY', 'FOR', 'SUCH', 'AN', 'EXERCISE', 'OF', 'CLEMENCY', 'SHOULD', 'NOT', 'ARISE'] +7105-2330-0004-2244: ref=['A', 'HEADLONG', 'PARDON', 'ON', 'THE', 'EVE', 'OF', 'A', 'BYE', 'ELECTION', 'WITH', 'THREATS', 'OF', 'A', 'HEAVY', 'VOTING', 'DEFECTION', 'IF', 'IT', 'WERE', 'WITHHELD', 'OR', 'EVEN', 'DELAYED', 'WOULD', 'NOT', 'NECESSARILY', 'BE', 'A', 'SURRENDER', 'BUT', 'IT', 'WOULD', 'LOOK', 'LIKE', 'ONE'] +7105-2330-0004-2244: hyp=['A', 'HEADLONG', 'PARDON', 'ON', 'THE', 'EVE', 'OF', 'A', 'BY', 'ELECTION', 'WITH', 'THREATS', 'OF', 'A', 'HEAVY', 'VOTING', 'DEFECTION', 'IF', 'IT', 'WERE', 'WITHHELD', 'OR', 'EVEN', 'DELAYED', 'WOULD', 'NOT', 'NECESSARILY', 'BE', 'A', 'SURRENDER', 'BUT', 'IT', 'WOULD', 'LOOK', 'LIKE', 'ONE'] +7105-2330-0005-2245: ref=['HENCE', 'THE', 'ANXIETY', 'IN', 'THE', 'CROWDED', 'COURT', 'AND', 'IN', 'THE', 'LITTLE', 'GROUPS', 'GATHERED', 'ROUND', 'THE', 'TAPE', 'MACHINES', 'IN', 'WHITEHALL', 'AND', 'DOWNING', 'STREET', 'AND', 'OTHER', 'AFFECTED', 'CENTRES'] +7105-2330-0005-2245: hyp=['HENCE', 'THE', 'ANXIETY', 'IN', 'THE', 'CROWDED', 'COURT', 'AND', 'IN', 'THE', 'LITTLE', 'GROUPS', 'GATHERED', 'ROUND', 'THE', 'TAPE', 'MACHINES', 'IN', 'WHITEHALL', 'AND', 'DAWNING', 'STREET', 'AND', 'OTHER', 'AFFECTED', 'CENTRES'] +7105-2330-0006-2246: ref=['THE', 'JURY', 'RETURNED', 'FROM', 'CONSIDERING', 'THEIR', 'VERDICT', 'THERE', 'WAS', 'A', 'FLUTTER', 'AN', 'EXCITED', 'MURMUR', 'A', 'DEATHLIKE', 'HUSH'] +7105-2330-0006-2246: hyp=['THE', 'JURY', 'RETURNED', 'FROM', 'CONSIDERING', 'THEIR', 'VERDICT', 'THERE', 'WAS', 'A', 'FLUTTER', 'AN', 'EXCITED', 'MURMUR', 'A', 'DEATH', 'LIKE', 'HUSH'] +7105-2330-0007-2247: ref=['THE', 'FOREMAN', 'DELIVERED', 'HIS', 'MESSAGE'] +7105-2330-0007-2247: hyp=['THE', 'FOREMAN', 'DELIVERED', 'HIS', 'MESSAGE'] +7105-2330-0008-2248: ref=['THE', 'JURY', 'FIND', 'THE', 'PRISONER', 'GUILTY', 'OF', 'BLOWING', 'UP', 'THE', 'ALBERT', 'HALL'] +7105-2330-0008-2248: hyp=['THE', 'JURY', 'FIND', 'THE', 'PRISONER', 'GUILTY', 'OF', 'BLOWING', 'UP', 'THE', 'ALBERT', 'HALL'] +7105-2330-0009-2249: ref=['THE', 'JURY', 'WISH', 'TO', 'ADD', 'A', 'RIDER', 'DRAWING', 'ATTENTION', 'TO', 'THE', 'FACT', 'THAT', 'A', 'BY', 'ELECTION', 'IS', 'PENDING', 'IN', 'THE', 'PARLIAMENTARY', 'DIVISION', 'OF', 'NEMESIS', 'ON', 'HAND'] +7105-2330-0009-2249: hyp=['THE', 'JURY', 'WISHED', 'TO', 'ADD', 'A', 'WRITER', 'DRAWING', 'ATTENTION', 'TO', 'THE', 'FACT', 'THAT', 'A', 'BIE', 'ELECTION', 'IS', 'SPENDING', 'IN', 'THE', 'PARLIAMENTARY', 'DIVISION', 'OF', 'NEMESIS', 'ON', 'HAND'] +7105-2330-0010-2250: ref=['AND', 'MAY', 'THE', 'LORD', 'HAVE', 'MERCY', 'ON', 'THE', 'POLL', 'A', 'JUNIOR', 'COUNSEL', 'EXCLAIMED', 'IRREVERENTLY'] +7105-2330-0010-2250: hyp=['AND', 'MAY', 'THE', 'LORD', 'HAVE', 'MERCY', 'ON', 'THE', 'POLE', 'A', 'JUNIOR', 'CONSUL', 'EXCLAIMED', 'IRREVERENTLY'] +7105-2330-0011-2251: ref=['FIFTEEN', 'HUNDRED', 'SAID', 'THE', 'PRIME', 'MINISTER', 'WITH', 'A', 'SHUDDER', "IT'S", 'TOO', 'HORRIBLE', 'TO', 'THINK', 'OF'] +7105-2330-0011-2251: hyp=['FIFTEEN', 'HUNDRED', 'SAID', 'THE', 'PRIME', 'MINISTER', 'WITH', 'A', 'SHUDDER', "IT'S", 'TOO', 'HORRIBLE', 'TO', 'THINK', 'OF'] +7105-2330-0012-2252: ref=['OUR', 'MAJORITY', 'LAST', 'TIME', 'WAS', 'ONLY', 'A', 'THOUSAND', 'AND', 'SEVEN'] +7105-2330-0012-2252: hyp=['OUR', 'MAJORITY', 'LAST', 'TIME', 'WAS', 'ONLY', 'A', 'THOUSAND', 'AND', 'SEVEN'] +7105-2330-0013-2253: ref=['SEVEN', 'THIRTY', 'AMENDED', 'THE', 'PRIME', 'MINISTER', 'WE', 'MUST', 'AVOID', 'ANY', 'APPEARANCE', 'OF', 'PRECIPITANCY'] +7105-2330-0013-2253: hyp=['SEVEN', 'THIRTY', 'AMENDED', 'THE', 'PRIME', 'MINISTER', 'WE', 'MUST', 'AVOID', 'ANY', 'APPEARANCE', 'OF', 'PRECIPITANCY'] +7105-2330-0014-2254: ref=['NOT', 'LATER', 'THAN', 'SEVEN', 'THIRTY', 'THEN', 'SAID', 'THE', 'CHIEF', 'ORGANISER', 'I', 'HAVE', 'PROMISED', 'THE', 'AGENT', 'DOWN', 'THERE', 'THAT', 'HE', 'SHALL', 'BE', 'ABLE', 'TO', 'DISPLAY', 'POSTERS', 'ANNOUNCING', 'PLATTERBAFF', 'IS', 'OUT', 'BEFORE', 'THE', 'POLL', 'OPENS'] +7105-2330-0014-2254: hyp=['NOT', 'LATER', 'THAN', 'SEVEN', 'THIRTY', 'THEN', 'SAID', 'THE', 'CHIEF', 'ORGANISER', 'I', 'HAVE', 'PROMISED', 'THE', 'AGENT', 'DOWN', 'THERE', 'THAT', 'HE', 'SHALL', 'BE', 'ABLE', 'TO', 'DISPLAY', 'POSTERS', 'ANNOUNCING', 'PLATTER', 'BAFF', 'IS', 'OUT', 'BEFORE', 'THE', 'POLL', 'OPENS'] +7105-2330-0015-2255: ref=['HE', 'SAID', 'IT', 'WAS', 'OUR', 'ONLY', 'CHANCE', 'OF', 'GETTING', 'A', 'TELEGRAM', 'RADPROP', 'IS', 'IN', 'TO', 'NIGHT'] +7105-2330-0015-2255: hyp=['HE', 'SAID', 'IT', 'WAS', 'OUR', 'ONLY', 'CHANCE', 'OF', 'GETTING', 'A', 'TELEGRAM', 'RHYDROPPIST', 'IN', 'TO', 'NIGHT'] +7105-2330-0016-2256: ref=['DESPITE', 'THE', 'EARLINESS', 'OF', 'THE', 'HOUR', 'A', 'SMALL', 'CROWD', 'HAD', 'GATHERED', 'IN', 'THE', 'STREET', 'OUTSIDE', 'AND', 'THE', 'HORRIBLE', 'MENACING', 'TRELAWNEY', 'REFRAIN', 'OF', 'THE', 'FIFTEEN', 'HUNDRED', 'VOTING', 'MEN', 'CAME', 'IN', 'A', 'STEADY', 'MONOTONOUS', 'CHANT'] +7105-2330-0016-2256: hyp=['DESPITE', 'THE', 'EARLINESS', 'OF', 'THE', 'HOUR', 'A', 'SMALL', 'CROWD', 'HAD', 'GATHERED', 'IN', 'THE', 'STREET', 'OUTSIDE', 'AND', 'THE', 'HORRIBLE', 'MENACING', 'TRE', 'LONE', 'REFRAIN', 'OF', 'THE', 'FIFTEEN', 'HUNDRED', 'VOTING', 'MEN', 'CAME', 'IN', 'A', 'STEADY', 'MONOTONOUS', 'CHANT'] +7105-2330-0017-2257: ref=['HE', 'EXCLAIMED', "WON'T", 'GO'] +7105-2330-0017-2257: hyp=['HE', 'EXCLAIMED', "WON'T", 'GO'] +7105-2330-0018-2258: ref=['HE', 'SAYS', 'HE', 'NEVER', 'HAS', 'LEFT', 'PRISON', 'WITHOUT', 'A', 'BRASS', 'BAND', 'TO', 'PLAY', 'HIM', 'OUT', 'AND', "HE'S", 'NOT', 'GOING', 'TO', 'GO', 'WITHOUT', 'ONE', 'NOW'] +7105-2330-0018-2258: hyp=['HE', 'SAYS', 'HE', 'NEVER', 'HAS', 'LEFT', 'PRISON', 'WITHOUT', 'A', 'BREAST', 'BAND', 'TO', 'PLAY', 'HIM', 'OUT', 'AND', "HE'S", 'NOT', 'GOING', 'TO', 'GO', 'WITHOUT', 'ONE', 'NOW'] +7105-2330-0019-2259: ref=['SAID', 'THE', 'PRIME', 'MINISTER', 'WE', 'CAN', 'HARDLY', 'BE', 'SUPPOSED', 'TO', 'SUPPLY', 'A', 'RELEASED', 'PRISONER', 'WITH', 'A', 'BRASS', 'BAND', 'HOW', 'ON', 'EARTH', 'COULD', 'WE', 'DEFEND', 'IT', 'ON', 'THE', 'ESTIMATES'] +7105-2330-0019-2259: hyp=['SAID', 'THE', 'PRIME', 'MINISTER', 'WE', 'CAN', 'HARDLY', 'BE', 'SUPPOSED', 'TO', 'SUPPLY', 'A', 'RELEASED', 'PRISONER', 'WITH', 'A', 'BRASS', 'BAND', 'HOW', 'ON', 'EARTH', 'COULD', 'WE', 'DEFEND', 'IT', 'ON', 'THE', 'ESTIMATES'] +7105-2330-0020-2260: ref=['ANYWAY', 'HE', "WON'T", 'GO', 'UNLESS', 'HE', 'HAS', 'A', 'BAND'] +7105-2330-0020-2260: hyp=['ANYWAY', 'HE', "WON'T", 'GO', 'UNLESS', 'HE', 'HAS', 'A', 'BAND'] +7105-2330-0021-2261: ref=['POLL', 'OPENS', 'IN', 'FIVE', 'MINUTES'] +7105-2330-0021-2261: hyp=['ALL', 'OPENS', 'IN', 'FIVE', 'MINUTES'] +7105-2330-0022-2262: ref=['IS', 'PLATTERBAFF', 'OUT', 'YET'] +7105-2330-0022-2262: hyp=['IS', 'FLATTERBUFF', 'OUT', 'YET'] +7105-2330-0023-2263: ref=['IN', "HEAVEN'S", 'NAME', 'WHY'] +7105-2330-0023-2263: hyp=['IN', "HEAVEN'S", 'NAME', 'WHY'] +7105-2330-0024-2264: ref=['THE', 'CHIEF', 'ORGANISER', 'RANG', 'OFF'] +7105-2330-0024-2264: hyp=['THE', 'CHIEF', 'ORGANISER', 'RANG', 'OFF'] +7105-2330-0025-2265: ref=['THIS', 'IS', 'NOT', 'A', 'MOMENT', 'FOR', 'STANDING', 'ON', 'DIGNITY', 'HE', 'OBSERVED', 'BLUNTLY', 'MUSICIANS', 'MUST', 'BE', 'SUPPLIED', 'AT', 'ONCE'] +7105-2330-0025-2265: hyp=['THIS', 'IS', 'NOT', 'A', 'MOMENT', 'FOR', 'STANDING', 'ON', 'DIGNITY', 'HE', 'OBSERVED', 'BLUNTLY', 'MUSICIANS', 'MUST', 'BE', 'SUPPLIED', 'AT', 'ONCE'] +7105-2330-0026-2266: ref=["CAN'T", 'YOU', 'GET', 'A', 'STRIKE', 'PERMIT', 'ASKED', 'THE', 'ORGANISER'] +7105-2330-0026-2266: hyp=["CAN'T", 'YOU', 'GET', 'A', 'STRIKE', 'PERMIT', 'ASKED', 'THE', 'ORGANISR'] +7105-2330-0027-2267: ref=["I'LL", 'TRY', 'SAID', 'THE', 'HOME', 'SECRETARY', 'AND', 'WENT', 'TO', 'THE', 'TELEPHONE'] +7105-2330-0027-2267: hyp=["I'LL", 'TRY', 'SAID', 'THE', 'HOME', 'SECRETARY', 'AND', 'WENT', 'TO', 'THE', 'TELEPHONE'] +7105-2330-0028-2268: ref=['EIGHT', "O'CLOCK", 'STRUCK', 'THE', 'CROWD', 'OUTSIDE', 'CHANTED', 'WITH', 'AN', 'INCREASING', 'VOLUME', 'OF', 'SOUND', 'WILL', 'VOTE', 'THE', 'OTHER', 'WAY'] +7105-2330-0028-2268: hyp=['EIGHT', "O'CLOCK", 'STRUCK', 'THE', 'CROWD', 'OUTSIDE', 'CHANTED', 'WITH', 'AN', 'INCREASING', 'VOLUME', 'OF', 'SOUND', "WE'LL", 'VOTE', 'THE', 'OTHER', 'WAY'] +7105-2330-0029-2269: ref=['A', 'TELEGRAM', 'WAS', 'BROUGHT', 'IN'] +7105-2330-0029-2269: hyp=['A', 'TELEGRAM', 'WAS', 'BROUGHT', 'IN'] +7105-2330-0030-2270: ref=['IT', 'WAS', 'FROM', 'THE', 'CENTRAL', 'COMMITTEE', 'ROOMS', 'AT', 'NEMESIS'] +7105-2330-0030-2270: hyp=['IT', 'WAS', 'FROM', 'THE', 'CENTRAL', 'COMMITTEE', 'ROOMS', 'AT', 'NEMESIS'] +7105-2330-0031-2271: ref=['WITHOUT', 'A', 'BAND', 'HE', 'WOULD', 'NOT', 'GO', 'AND', 'THEY', 'HAD', 'NO', 'BAND'] +7105-2330-0031-2271: hyp=['WITHOUT', 'A', 'BAND', 'HE', 'WOULD', 'NOT', 'GO', 'AND', 'THEY', 'HAD', 'NO', 'BAND'] +7105-2330-0032-2272: ref=['A', 'QUARTER', 'PAST', 'TEN', 'HALF', 'PAST'] +7105-2330-0032-2272: hyp=['A', 'QUARTER', 'PAST', 'TEN', 'HALF', 'PAST'] +7105-2330-0033-2273: ref=['HAVE', 'YOU', 'ANY', 'BAND', 'INSTRUMENTS', 'OF', 'AN', 'EASY', 'NATURE', 'TO', 'PLAY'] +7105-2330-0033-2273: hyp=['HAVE', 'YOU', 'ANY', 'BAND', 'INSTRUMENTS', 'OF', 'AN', 'EASY', 'NATURE', 'TO', 'PLAY'] +7105-2330-0034-2274: ref=['DEMANDED', 'THE', 'CHIEF', 'ORGANISER', 'OF', 'THE', 'PRISON', 'GOVERNOR', 'DRUMS', 'CYMBALS', 'THOSE', 'SORT', 'OF', 'THINGS'] +7105-2330-0034-2274: hyp=['DEMANDED', 'THE', 'CHIEF', 'ORGANISER', 'OF', 'THE', 'PRISON', 'GOVERNOR', 'DRUMS', 'CYMBALS', 'THOSE', 'SORT', 'OF', 'THINGS'] +7105-2330-0035-2275: ref=['THE', 'WARDERS', 'HAVE', 'A', 'PRIVATE', 'BAND', 'OF', 'THEIR', 'OWN', 'SAID', 'THE', 'GOVERNOR', 'BUT', 'OF', 'COURSE', 'I', "COULDN'T", 'ALLOW', 'THE', 'MEN', 'THEMSELVES'] +7105-2330-0035-2275: hyp=['THE', 'WARDERS', 'HAVE', 'A', 'PRIVATE', 'BAND', 'OF', 'THEIR', 'OWN', 'SAID', 'THE', 'GOVERNOR', 'BUT', 'OF', 'COURSE', 'I', "COULDN'T", 'ALLOW', 'THE', 'MEN', 'THEMSELVES'] +7105-2330-0036-2276: ref=['LEND', 'US', 'THE', 'INSTRUMENTS', 'SAID', 'THE', 'CHIEF', 'ORGANISER'] +7105-2330-0036-2276: hyp=['LEND', 'US', 'THE', 'INSTRUMENTS', 'SAID', 'THE', 'CHIEF', 'ORGANIZER'] +7105-2330-0037-2277: ref=['THE', 'POPULAR', 'SONG', 'OF', 'THE', 'MOMENT', 'REPLIED', 'THE', 'AGITATOR', 'AFTER', 'A', "MOMENT'S", 'REFLECTION'] +7105-2330-0037-2277: hyp=['THE', 'POPULAR', 'SONG', 'OF', 'THE', 'MOMENT', 'REPLIED', 'THE', 'AGITATOR', 'AFTER', 'A', "MOMENT'S", 'REFLECTION'] +7105-2330-0038-2278: ref=['IT', 'WAS', 'A', 'TUNE', 'THEY', 'HAD', 'ALL', 'HEARD', 'HUNDREDS', 'OF', 'TIMES', 'SO', 'THERE', 'WAS', 'NO', 'DIFFICULTY', 'IN', 'TURNING', 'OUT', 'A', 'PASSABLE', 'IMITATION', 'OF', 'IT', 'TO', 'THE', 'IMPROVISED', 'STRAINS', 'OF', 'I', "DIDN'T", 'WANT', 'TO', 'DO', 'IT', 'THE', 'PRISONER', 'STRODE', 'FORTH', 'TO', 'FREEDOM'] +7105-2330-0038-2278: hyp=['IT', 'WAS', 'A', 'TUNE', 'THEY', 'HAD', 'ALL', 'HEARD', 'HUNDREDS', 'OF', 'TIMES', 'SO', 'THERE', 'WAS', 'NO', 'DIFFICULTY', 'IN', 'TURNING', 'OUT', 'A', 'PASSABLE', 'IMITATION', 'OF', 'IT', 'TO', 'THE', 'IMPROVISED', 'TRAINS', 'OF', 'I', "DON'T", 'WANT', 'TO', 'DO', 'IT', 'THE', 'PRISONERS', 'STROLLED', 'FORTH', 'TO', 'FREEDOM'] +7105-2330-0039-2279: ref=['THE', 'WORD', 'OF', 'THE', 'SONG', 'HAD', 'REFERENCE', 'IT', 'WAS', 'UNDERSTOOD', 'TO', 'THE', 'INCARCERATING', 'GOVERNMENT', 'AND', 'NOT', 'TO', 'THE', 'DESTROYER', 'OF', 'THE', 'ALBERT', 'HALL'] +7105-2330-0039-2279: hyp=['THE', 'WORD', 'OF', 'THE', 'SONG', 'HAD', 'REFERENCE', 'IT', 'WAS', 'UNDERSTOOD', 'TO', 'THE', 'INCARCERATING', 'GOVERNMENT', 'AND', 'NOT', 'TO', 'THE', 'DESTROYER', 'OF', 'THE', 'ALBERT', 'HALL'] +7105-2330-0040-2280: ref=['THE', 'SEAT', 'WAS', 'LOST', 'AFTER', 'ALL', 'BY', 'A', 'NARROW', 'MAJORITY'] +7105-2330-0040-2280: hyp=['THE', 'SEAT', 'WAS', 'LOST', 'AFTER', 'ALL', 'BY', 'A', 'NARROW', 'MATURITY'] +7105-2330-0041-2281: ref=['THE', 'LOCAL', 'TRADE', 'UNIONISTS', 'TOOK', 'OFFENCE', 'AT', 'THE', 'FACT', 'OF', 'CABINET', 'MINISTERS', 'HAVING', 'PERSONALLY', 'ACTED', 'AS', 'STRIKE', 'BREAKERS', 'AND', 'EVEN', 'THE', 'RELEASE', 'OF', 'PLATTERBAFF', 'FAILED', 'TO', 'PACIFY', 'THEM'] +7105-2330-0041-2281: hyp=['THE', 'LOCAL', 'TRADE', 'UNIONISTS', 'TOOK', 'OFFENCE', 'AT', 'THE', 'FACT', 'OF', 'CABINET', 'MINISTERS', 'HAVING', 'PERSONALLY', 'ACTED', 'AS', 'STRIKE', 'BREAKERS', 'AND', 'EVEN', 'THE', 'RELEASE', 'OF', 'PLATTERBAFF', 'FAILED', 'TO', 'PACIFY', 'THEM'] +7105-2340-0000-2282: ref=['WITH', 'THAT', 'NOTORIOUS', 'FAILING', 'OF', 'HIS', 'HE', 'WAS', 'NOT', 'THE', 'SORT', 'OF', 'PERSON', 'ONE', 'WANTED', 'IN', "ONE'S", 'HOUSE'] +7105-2340-0000-2282: hyp=['WITH', 'THAT', 'NOTORIOUS', 'FAILING', 'OF', 'HIS', 'HE', 'WAS', 'NOT', 'THE', 'SORT', 'OF', 'PERSON', 'ONE', 'WANTED', 'IN', "ONE'S", 'HOUSE'] +7105-2340-0001-2283: ref=['WELL', 'THE', 'FAILING', 'STILL', 'EXISTS', "DOESN'T", 'IT', 'SAID', 'HER', 'HUSBAND', 'OR', 'DO', 'YOU', 'SUPPOSE', 'A', 'REFORM', 'OF', 'CHARACTER', 'IS', 'ENTAILED', 'ALONG', 'WITH', 'THE', 'ESTATE'] +7105-2340-0001-2283: hyp=['WELL', 'THE', 'FAILING', 'STILL', 'EXISTS', "DOESN'T", 'IT', 'SAID', 'THE', 'HUSBAND', 'OR', 'DO', 'YOU', 'SUPPOSE', 'A', 'REFORM', 'OF', 'CHARACTER', 'IS', 'ENTAILED', 'ALONG', 'WITH', 'THE', 'ESTATE'] +7105-2340-0002-2284: ref=['BESIDES', 'CYNICISM', 'APART', 'HIS', 'BEING', 'RICH', 'WILL', 'MAKE', 'A', 'DIFFERENCE', 'IN', 'THE', 'WAY', 'PEOPLE', 'WILL', 'LOOK', 'AT', 'HIS', 'FAILING'] +7105-2340-0002-2284: hyp=['BESIDES', 'CYNICISM', 'APART', 'HIS', 'BEING', 'RICH', 'WILL', 'MAKE', 'A', 'DIFFERENCE', 'IN', 'THE', 'WAY', 'PEOPLE', 'WILL', 'LOOK', 'AT', 'HIS', 'FAILING'] +7105-2340-0003-2285: ref=['WHEN', 'A', 'MAN', 'IS', 'ABSOLUTELY', 'WEALTHY', 'NOT', 'MERELY', 'WELL', 'TO', 'DO', 'ALL', 'SUSPICION', 'OF', 'SORDID', 'MOTIVE', 'NATURALLY', 'DISAPPEARS', 'THE', 'THING', 'BECOMES', 'MERELY', 'A', 'TIRESOME', 'MALADY'] +7105-2340-0003-2285: hyp=['WHEN', 'A', 'MAN', 'IS', 'ABSOLUTELY', 'WEALTHY', 'NOT', 'MERELY', 'WELL', 'TO', 'DO', 'ALL', 'SUSPICION', 'OF', 'SORDID', 'MOTIVE', 'NATURALLY', 'DISAPPEARS', 'THE', 'THING', 'BECOMES', 'MERELY', 'A', 'TIRESOME', 'MALADY'] +7105-2340-0004-2286: ref=['WILFRID', 'PIGEONCOTE', 'HAD', 'SUDDENLY', 'BECOME', 'HEIR', 'TO', 'HIS', 'UNCLE', 'SIR', 'WILFRID', 'PIGEONCOTE', 'ON', 'THE', 'DEATH', 'OF', 'HIS', 'COUSIN', 'MAJOR', 'WILFRID', 'PIGEONCOTE', 'WHO', 'HAD', 'SUCCUMBED', 'TO', 'THE', 'AFTER', 'EFFECTS', 'OF', 'A', 'POLO', 'ACCIDENT'] +7105-2340-0004-2286: hyp=['WILFRID', 'PIGEONCOTE', 'HAD', 'SUDDENLY', 'BECOME', 'HEIR', 'TO', 'HIS', 'UNCLE', 'SIR', 'WILFRID', 'PIGEON', 'COTE', 'ON', 'THE', 'DEATH', 'OF', 'HIS', 'COUSIN', 'MAJOR', 'WILFRID', 'PIGEONCOTE', 'WHO', 'HAD', 'SUCCUMBED', 'TO', 'THE', 'AFTER', 'EFFECTS', 'OF', 'A', 'POLO', 'ACCIDENT'] +7105-2340-0005-2287: ref=['A', 'WILFRID', 'PIGEONCOTE', 'HAD', 'COVERED', 'HIMSELF', 'WITH', 'HONOURS', 'IN', 'THE', 'COURSE', 'OF', "MARLBOROUGH'S", 'CAMPAIGNS', 'AND', 'THE', 'NAME', 'WILFRID', 'HAD', 'BEEN', 'A', 'BAPTISMAL', 'WEAKNESS', 'IN', 'THE', 'FAMILY', 'EVER', 'SINCE', 'THE', 'NEW', 'HEIR', 'TO', 'THE', 'FAMILY', 'DIGNITY', 'AND', 'ESTATES', 'WAS', 'A', 'YOUNG', 'MAN', 'OF', 'ABOUT', 'FIVE', 'AND', 'TWENTY', 'WHO', 'WAS', 'KNOWN', 'MORE', 'BY', 'REPUTATION', 'THAN', 'BY', 'PERSON', 'TO', 'A', 'WIDE', 'CIRCLE', 'OF', 'COUSINS', 'AND', 'KINSFOLK'] +7105-2340-0005-2287: hyp=['A', 'WILFRED', 'PICHKOTE', 'HAD', 'COVERED', 'HIMSELF', 'WITH', 'HONOURS', 'IN', 'THE', 'COURSE', 'OF', "MARLBOROUGH'S", 'CAMPAIGNS', 'AND', 'THE', 'NAME', 'WILFRID', 'HAD', 'BEEN', 'A', 'BAPTISMAL', 'WEAKNESS', 'IN', 'THE', 'FAMILY', 'EVER', 'SINCE', 'THE', 'NEW', 'HEIR', 'TO', 'THE', 'FAMILY', 'DIGNITY', 'AND', 'ESTATES', 'WAS', 'A', 'YOUNG', 'MAN', 'OF', 'ABOUT', 'FIVE', 'AND', 'TWENTY', 'WHO', 'WAS', 'KNOWN', 'MORE', 'BY', 'REPUTATION', 'THAN', 'BY', 'PERSON', 'TO', 'A', 'WIDE', 'CIRCLE', 'OF', 'COUSINS', 'AND', 'KINSFOLK'] +7105-2340-0006-2288: ref=['AND', 'THE', 'REPUTATION', 'WAS', 'AN', 'UNPLEASANT', 'ONE'] +7105-2340-0006-2288: hyp=['AND', 'THE', 'REPUTATION', 'WAS', 'AN', 'UNPLEASANT', 'ONE'] +7105-2340-0007-2289: ref=['FROM', 'HIS', 'LATE', 'SCHOOLDAYS', 'ONWARD', 'HE', 'HAD', 'BEEN', 'POSSESSED', 'BY', 'AN', 'ACUTE', 'AND', 'OBSTINATE', 'FORM', 'OF', 'KLEPTOMANIA', 'HE', 'HAD', 'THE', 'ACQUISITIVE', 'INSTINCT', 'OF', 'THE', 'COLLECTOR', 'WITHOUT', 'ANY', 'OF', 'THE', "COLLECTOR'S", 'DISCRIMINATION'] +7105-2340-0007-2289: hyp=['FROM', 'HIS', 'LATE', 'SCHOOL', 'DAYS', 'ONWARD', 'HE', 'HAD', 'BEEN', 'POSSESSED', 'BY', 'AN', 'ACUTE', 'AND', 'OBSTINATE', 'FORM', 'OF', 'CLAPTOMANIA', 'HE', 'HAD', 'THE', 'ACQUISITIVE', 'INSTINCT', 'OF', 'THE', 'COLLECTOR', 'WITHOUT', 'ANY', 'OF', 'THE', "COLLECTOR'S", 'DISCRIMINATION'] +7105-2340-0008-2290: ref=['THE', 'SEARCH', 'USUALLY', 'PRODUCED', 'A', 'LARGE', 'AND', 'VARIED', 'YIELD', 'THIS', 'IS', 'FUNNY', 'SAID', 'PETER', 'PIGEONCOTE', 'TO', 'HIS', 'WIFE', 'SOME', 'HALF', 'HOUR', 'AFTER', 'THEIR', 'CONVERSATION', "HERE'S", 'A', 'TELEGRAM', 'FROM', 'WILFRID', 'SAYING', "HE'S", 'PASSING', 'THROUGH', 'HERE', 'IN', 'HIS', 'MOTOR', 'AND', 'WOULD', 'LIKE', 'TO', 'STOP', 'AND', 'PAY', 'US', 'HIS', 'RESPECTS'] +7105-2340-0008-2290: hyp=['THE', 'SEARCH', 'USUALLY', 'PRODUCED', 'A', 'LARGE', 'AND', 'VARIED', 'YIELD', 'THIS', 'IS', 'FUNNY', 'SAID', 'PETER', 'PIGEONBUL', 'TO', 'HIS', 'WIFE', 'SOME', 'HALF', 'HOUR', 'AFTER', 'THEIR', 'CONVERSATION', "HERE'S", 'A', 'TELEGRAM', 'FROM', 'WILFRED', 'SAYING', 'HE', 'IS', 'PASSING', 'THROUGH', 'HERE', 'IN', 'HIS', 'MOTOR', 'AND', 'WOULD', 'LIKE', 'TO', 'STOP', 'AND', 'PAY', 'US', 'HIS', 'RESPECTS'] +7105-2340-0009-2291: ref=['SIGNED', 'WILFRID', 'PIGEONCOTE'] +7105-2340-0009-2291: hyp=['SIGHING', 'WILFRED', 'PIGEONCOTE'] +7105-2340-0010-2292: ref=['I', 'SUPPOSE', "HE'S", 'BRINGING', 'US', 'A', 'PRESENT', 'FOR', 'THE', 'SILVER', 'WEDDING', 'GOOD', 'GRACIOUS'] +7105-2340-0010-2292: hyp=['I', 'SUPPOSE', "HE'S", 'BRINGING', 'US', 'A', 'PRESENT', 'FOR', 'THE', 'SILVER', 'WEDDING', 'GOOD', 'GRACIOUS'] +7105-2340-0011-2293: ref=['THE', 'TALK', 'FLITTED', 'NERVOUSLY', 'AND', 'HURRIEDLY', 'FROM', 'ONE', 'IMPERSONAL', 'TOPIC', 'TO', 'ANOTHER'] +7105-2340-0011-2293: hyp=['THE', 'TALK', 'FLITTED', 'NERVOUSLY', 'AND', 'HURRIEDLY', 'FROM', 'ONE', 'IMPERSONAL', 'TOPIC', 'TO', 'ANOTHER'] +7105-2340-0012-2294: ref=['IN', 'THE', 'DRAWING', 'ROOM', 'AFTER', 'DINNER', 'THEIR', 'NERVOUSNESS', 'AND', 'AWKWARDNESS', 'INCREASED'] +7105-2340-0012-2294: hyp=['IN', 'THE', 'DRAWING', 'ROOM', 'AFTER', 'DINNER', 'THEIR', 'NERVOUSNESS', 'AND', 'AWKWARDNESS', 'INCREASED'] +7105-2340-0013-2295: ref=['OH', 'WE', "HAVEN'T", 'SHOWN', 'YOU', 'THE', 'SILVER', 'WEDDING', 'PRESENTS', 'SAID', 'MISSUS', 'PETER', 'SUDDENLY', 'AS', 'THOUGH', 'STRUCK', 'BY', 'A', 'BRILLIANT', 'IDEA', 'FOR', 'ENTERTAINING', 'THE', 'GUEST', 'HERE', 'THEY', 'ALL', 'ARE'] +7105-2340-0013-2295: hyp=['OH', 'WE', "HAVEN'T", 'SHOWN', 'YOU', 'THE', 'SILVER', 'WEDDING', 'PRESENTS', 'SAID', 'MISSUS', 'PETER', 'SUDDENLY', 'AS', 'THOUGH', 'STRUCK', 'BY', 'A', 'BRILLIANT', 'IDEA', 'FOR', 'ENTERTAINING', 'THE', 'GUEST', 'HERE', 'THEY', 'ALL', 'ARE'] +7105-2340-0014-2296: ref=['SUCH', 'NICE', 'USEFUL', 'GIFTS', 'A', 'FEW', 'DUPLICATES', 'OF', 'COURSE'] +7105-2340-0014-2296: hyp=['SUCH', 'NICE', 'USEFUL', 'GIFTS', 'A', 'FEW', 'DUPLICATES', 'OF', 'COURSE'] +7105-2340-0015-2297: ref=['SEVEN', 'CREAM', 'JUGS', 'PUT', 'IN', 'PETER'] +7105-2340-0015-2297: hyp=['SEVEN', 'CREAM', 'JUGS', 'PUT', 'IN', 'PETER'] +7105-2340-0016-2298: ref=['WE', 'FEEL', 'THAT', 'WE', 'MUST', 'LIVE', 'ON', 'CREAM', 'FOR', 'THE', 'REST', 'OF', 'OUR', 'LIVES'] +7105-2340-0016-2298: hyp=['WE', 'FEEL', 'THAT', 'WE', 'MUST', 'LIVE', 'UNCREAM', 'FOR', 'THE', 'REST', 'OF', 'OUR', 'LIVES'] +7105-2340-0017-2299: ref=['OF', 'COURSE', 'SOME', 'OF', 'THEM', 'CAN', 'BE', 'CHANGED'] +7105-2340-0017-2299: hyp=['OF', 'COURSE', 'SOME', 'OF', 'THEM', 'CAN', 'BE', 'CHANGED'] +7105-2340-0018-2300: ref=['I', 'PUT', 'IT', 'DOWN', 'BY', 'THE', 'CLARET', 'JUG', 'SAID', 'WILFRID', 'BUSY', 'WITH', 'ANOTHER', 'OBJECT'] +7105-2340-0018-2300: hyp=['I', 'PUT', 'IT', 'DOWN', 'BY', 'THE', 'CLARY', 'JUG', 'SAID', 'WILFRID', 'BUSY', 'WITH', 'ANOTHER', 'OBJECT'] +7105-2340-0019-2301: ref=['VIGILANCE', 'WAS', 'NOT', 'COMPLETELY', 'CROWNED', 'WITH', 'A', 'SENSE', 'OF', 'VICTORY'] +7105-2340-0019-2301: hyp=['VIGILANCE', 'WAS', 'NOT', 'COMPLETELY', 'CROWNED', 'WITH', 'A', 'SENSE', 'OF', 'VICTORY'] +7105-2340-0020-2302: ref=['AFTER', 'THEY', 'HAD', 'SAID', 'GOOD', 'NIGHT', 'TO', 'THEIR', 'VISITOR', 'MISSUS', 'PETER', 'EXPRESSED', 'HER', 'CONVICTION', 'THAT', 'HE', 'HAD', 'TAKEN', 'SOMETHING'] +7105-2340-0020-2302: hyp=['AFTER', 'THEY', 'HAD', 'SAID', 'GOOD', 'NIGHT', 'TO', 'THEIR', 'VISITOR', 'MISSUS', 'PETER', 'EXPRESSED', 'HER', 'CONVICTION', 'THAT', 'HE', 'HAD', 'TAKEN', 'SOMETHING'] +7105-2340-0021-2303: ref=['HOW', 'ON', 'EARTH', 'ARE', 'WE', 'TO', 'KNOW', 'SAID', 'PETER', 'THE', 'MEAN', 'PIG', "HASN'T", 'BROUGHT', 'US', 'A', 'PRESENT', 'AND', "I'M", 'HANGED', 'IF', 'HE', 'SHALL', 'CARRY', 'ONE', 'OFF'] +7105-2340-0021-2303: hyp=['HOW', 'ON', 'EARTH', 'ARE', 'WE', 'TO', 'KNOW', 'SAID', 'PETER', 'THE', 'MEAN', 'PIG', "HASN'T", 'BROUGHT', 'US', 'A', 'PRESENT', 'AND', "I'M", 'HANGED', 'IF', 'HE', 'SHALL', 'CARRY', 'ONE', 'OFF'] +7105-2340-0022-2304: ref=["IT'S", 'THE', 'ONLY', 'THING', 'TO', 'DO'] +7105-2340-0022-2304: hyp=['IS', 'THE', 'ONLY', 'THING', 'TO', 'DO'] +7105-2340-0023-2305: ref=['WILFRID', 'WAS', 'LATE', 'IN', 'COMING', 'DOWN', 'TO', 'BREAKFAST', 'AND', 'HIS', 'MANNER', 'SHOWED', 'PLAINLY', 'THAT', 'SOMETHING', 'WAS', 'AMISS'] +7105-2340-0023-2305: hyp=['WILFRID', 'WAS', 'LATE', 'IN', 'COMING', 'DOWN', 'TO', 'BREAKFAST', 'AND', 'HIS', 'MANNER', 'SHOWED', 'PLAINLY', 'THAT', 'SOMETHING', 'WAS', 'AMISS'] +7105-2340-0024-2306: ref=["IT'S", 'AN', 'UNPLEASANT', 'THING', 'TO', 'HAVE', 'TO', 'SAY', 'HE', 'BLURTED', 'OUT', 'PRESENTLY', 'BUT', "I'M", 'AFRAID', 'YOU', 'MUST', 'HAVE', 'A', 'THIEF', 'AMONG', 'YOUR', 'SERVANTS', "SOMETHING'S", 'BEEN', 'TAKEN', 'OUT', 'OF', 'MY', 'PORTMANTEAU'] +7105-2340-0024-2306: hyp=["IT'S", 'AN', 'UNPLEASANT', 'THING', 'TO', 'HAVE', 'TO', 'SAY', 'HE', 'BLURTED', 'OUT', 'PRESENTLY', 'BUT', "I'M", 'AFRAID', 'YOU', 'MUST', 'HAVE', 'A', 'THIEF', 'AMONG', 'YOUR', 'SERVANTS', "SOMETHING'S", 'BEEN', 'TAKEN', 'OUT', 'OF', 'MY', 'PORTMANTEAU'] +7105-2340-0025-2307: ref=['IT', 'WAS', 'A', 'LITTLE', 'PRESENT', 'FROM', 'MY', 'MOTHER', 'AND', 'MYSELF', 'FOR', 'YOUR', 'SILVER', 'WEDDING'] +7105-2340-0025-2307: hyp=['IT', 'WAS', 'A', 'LITTLE', 'PRESENT', 'FROM', 'MY', 'MOTHER', 'AND', 'MYSELF', 'FOR', 'YOUR', 'SILVER', 'WEDDING'] +7105-2340-0026-2308: ref=['I', 'SHOULD', 'HAVE', 'GIVEN', 'IT', 'TO', 'YOU', 'LAST', 'NIGHT', 'AFTER', 'DINNER', 'ONLY', 'IT', 'HAPPENED', 'TO', 'BE', 'A', 'CREAM', 'JUG', 'AND', 'YOU', 'SEEMED', 'ANNOYED', 'AT', 'HAVING', 'SO', 'MANY', 'DUPLICATES', 'SO', 'I', 'FELT', 'RATHER', 'AWKWARD', 'ABOUT', 'GIVING', 'YOU', 'ANOTHER'] +7105-2340-0026-2308: hyp=['I', 'SHOULD', 'HAVE', 'GIVEN', 'IT', 'TO', 'YOU', 'LAST', 'NIGHT', 'AFTER', 'DINNER', 'ONLY', 'IT', 'HAPPENED', 'TO', 'BE', 'A', 'CREAM', 'JUG', 'AND', 'YOU', 'SEEMED', 'ANNOYED', 'AT', 'HAVING', 'SO', 'MANY', 'DUPLICATES', 'SO', 'I', 'FELT', 'RATHER', 'AWKWARD', 'ABOUT', 'GIVING', 'YOU', 'ANOTHER'] +7105-2340-0027-2309: ref=['THE', 'SNATCHER', 'HAD', 'BEEN', 'AN', 'ORPHAN', 'THESE', 'MANY', 'YEARS'] +7105-2340-0027-2309: hyp=['THE', 'SNATCHER', 'HAD', 'BEEN', 'AN', 'ORPHAN', 'THESE', 'MANY', 'YEARS'] +7105-2340-0028-2310: ref=['LADY', 'ERNESTINE', 'PIGEONCOTE', 'HIS', 'MOTHER', 'MOVED', 'IN', 'CIRCLES', 'WHICH', 'WERE', 'ENTIRELY', 'BEYOND', 'THEIR', 'COMPASS', 'OR', 'AMBITIONS', 'AND', 'THE', 'SON', 'WOULD', 'PROBABLY', 'ONE', 'DAY', 'BE', 'AN', 'AMBASSADOR'] +7105-2340-0028-2310: hyp=['LADY', 'ERNESTINE', 'PIECOTE', 'HIS', 'MOTHER', 'MOVED', 'IN', 'CIRCLES', 'WHICH', 'WERE', 'ENTIRELY', 'BEYOND', 'THEIR', 'COMPASS', 'OR', 'AMBITIONS', 'AND', 'THE', 'SON', 'WOULD', 'PROBABLY', 'ONE', 'DAY', 'BE', 'AN', 'AMBASSADOR'] +7105-2340-0029-2311: ref=['HUSBAND', 'AND', 'WIFE', 'LOOKED', 'BLANKLY', 'AND', 'DESPERATELY', 'AT', 'ONE', 'ANOTHER'] +7105-2340-0029-2311: hyp=['HUSBAND', 'AND', 'WIFE', 'LOOKED', 'BLANKLY', 'AND', 'DESPERATELY', 'AT', 'ONE', 'ANOTHER'] +7105-2340-0030-2312: ref=['IT', 'WAS', 'MISSUS', 'PETER', 'WHO', 'ARRIVED', 'FIRST', 'AT', 'AN', 'INSPIRATION', 'HOW', 'DREADFUL', 'TO', 'THINK', 'THERE', 'ARE', 'THIEVES', 'IN', 'THE', 'HOUSE', 'WE', 'KEEP', 'THE', 'DRAWING', 'ROOM', 'LOCKED', 'UP', 'AT', 'NIGHT', 'OF', 'COURSE', 'BUT', 'ANYTHING', 'MIGHT', 'BE', 'CARRIED', 'OFF', 'WHILE', 'WE', 'ARE', 'AT', 'BREAKFAST'] +7105-2340-0030-2312: hyp=['IT', 'WAS', 'MISSUS', 'PETER', 'WHO', 'ARRIVED', 'FIRST', 'AT', 'AN', 'INSPIRATION', 'HOW', 'DREADFUL', 'TO', 'THINK', 'THERE', 'ARE', 'THIEVES', 'IN', 'THE', 'HOUSE', 'WE', 'KEEP', 'THE', 'DRAWING', 'ROOM', 'LOCKED', 'UP', 'AT', 'NIGHT', 'OF', 'COURSE', 'BUT', 'ANYTHING', 'MIGHT', 'BE', 'CARRIED', 'OFF', 'WHILE', 'WE', 'ARE', 'AT', 'BREAKFAST'] +7105-2340-0031-2313: ref=['SHE', 'ROSE', 'AND', 'WENT', 'OUT', 'HURRIEDLY', 'AS', 'THOUGH', 'TO', 'ASSURE', 'HERSELF', 'THAT', 'THE', 'DRAWING', 'ROOM', 'WAS', 'NOT', 'BEING', 'STRIPPED', 'OF', 'ITS', 'SILVERWARE', 'AND', 'RETURNED', 'A', 'MOMENT', 'LATER', 'BEARING', 'A', 'CREAM', 'JUG', 'IN', 'HER', 'HANDS'] +7105-2340-0031-2313: hyp=['SHE', 'ROSE', 'AND', 'WENT', 'OUT', 'HURRIEDLY', 'AS', 'THOUGH', 'TO', 'ASSURE', 'HERSELF', 'THAT', 'THE', 'DRAWING', 'ROOM', 'WAS', 'NOT', 'BEING', 'STRIPPED', 'OF', 'ITS', 'SILVERWARE', 'AND', 'RETURNED', 'A', 'MOMENT', 'LATER', 'BEARING', 'A', 'CREAM', 'JUG', 'IN', 'HER', 'HANDS'] +7105-2340-0032-2314: ref=['THE', 'PIGEONCOTES', 'HAD', 'TURNED', 'PALER', 'THAN', 'EVER', 'MISSUS', 'PETER', 'HAD', 'A', 'FINAL', 'INSPIRATION'] +7105-2340-0032-2314: hyp=['THE', 'PIGEON', 'COATS', 'HAD', 'TURNED', 'PALER', 'THAN', 'EVER', 'MISSUS', 'PETER', 'HAD', 'A', 'FINAL', 'INSPIRATION'] +7105-2340-0033-2315: ref=['PETER', 'DASHED', 'OUT', 'OF', 'THE', 'ROOM', 'WITH', 'GLAD', 'RELIEF', 'HE', 'HAD', 'LIVED', 'SO', 'LONG', 'DURING', 'THE', 'LAST', 'FEW', 'MINUTES', 'THAT', 'A', 'GOLDEN', 'WEDDING', 'SEEMED', 'WITHIN', 'MEASURABLE', 'DISTANCE'] +7105-2340-0033-2315: hyp=['PETER', 'DASHED', 'OUT', 'OF', 'THE', 'ROOM', 'WITH', 'GLAD', 'RELIEF', 'HE', 'HAD', 'LIVED', 'SO', 'LONG', 'DURING', 'THE', 'LAST', 'FEW', 'MINUTES', 'THAT', 'A', 'GOLDEN', 'WEDDING', 'SEEMED', 'WITHIN', 'MEASURABLE', 'DISTANCE'] +7105-2340-0034-2316: ref=['MISSUS', 'PETER', 'TURNED', 'TO', 'HER', 'GUEST', 'WITH', 'CONFIDENTIAL', 'COYNESS'] +7105-2340-0034-2316: hyp=['MISSUS', 'PETER', 'TURNED', 'TO', 'HER', 'GUESTS', 'WITH', 'CONFIDENTIAL', 'KINDNESS'] +7105-2340-0035-2317: ref=["PETER'S", 'LITTLE', 'WEAKNESS', 'IT', 'RUNS', 'IN', 'THE', 'FAMILY', 'GOOD', 'LORD'] +7105-2340-0035-2317: hyp=["PETER'S", 'LITTLE', 'WEAKNESS', 'IT', 'RUNS', 'IN', 'THE', 'FAMILY', 'GOOD', 'LORD'] +7105-2340-0036-2318: ref=['DO', 'YOU', 'MEAN', 'TO', 'SAY', "HE'S", 'A', 'KLEPTOMANIAC', 'LIKE', 'COUSIN', 'SNATCHER'] +7105-2340-0036-2318: hyp=['DO', 'YOU', 'MEAN', 'TO', 'SAY', "HE'S", 'ACLEPTOMANIA', 'LIKE', 'COUSIN', 'SNATCHER'] +7105-2340-0037-2319: ref=['BRAVE', 'LITTLE', 'WOMAN', 'SAID', 'PETER', 'WITH', 'A', 'GASP', 'OF', 'RELIEF', 'I', 'COULD', 'NEVER', 'HAVE', 'DONE', 'IT'] +7105-2340-0037-2319: hyp=['BRAVE', 'LITTLE', 'WOMAN', 'SAID', 'PETER', 'WITH', 'A', 'GASP', 'OF', 'RELIEF', 'I', 'COULD', 'NEVER', 'HAVE', 'DONE', 'IT'] +7902-96591-0000-2320: ref=['I', 'AM', 'FROM', 'THE', 'CUTTER', 'LYING', 'OFF', 'THE', 'COAST'] +7902-96591-0000-2320: hyp=['I', 'AM', 'FROM', 'THE', 'CUTTER', 'LYING', 'OFF', 'THE', 'COAST'] +7902-96591-0001-2321: ref=["DON'T", 'CRY', 'HE', 'SAID', 'I', 'WAS', 'OBLIGED', 'TO', 'COME'] +7902-96591-0001-2321: hyp=["DON'T", 'CRY', 'HE', 'SAID', 'I', 'WAS', 'OBLIGED', 'TO', 'COME'] +7902-96591-0002-2322: ref=['AND', 'AND', 'YOU', 'HAVE', 'NOT', 'FOUND', 'OUT', 'ANYTHING', 'CAME', 'IN', 'QUICK', 'FRIGHTENED', 'TONES'] +7902-96591-0002-2322: hyp=['AND', 'AND', 'YOU', 'HAVE', 'NOT', 'FOUND', 'OUT', 'ANYTHING', 'CAME', 'IN', 'QUICK', 'FRIGHTENED', 'TONES'] +7902-96591-0003-2323: ref=['I', 'WISH', 'YOU', 'WOULD', 'BELIEVE', 'ME', 'THAT', 'I', 'AM', 'IN', 'AS', 'GREAT', 'TROUBLE', 'ABOUT', 'IT', 'AS', 'YOU', 'ARE'] +7902-96591-0003-2323: hyp=['I', 'WISH', 'YOU', 'WOULD', 'BELIEVE', 'ME', 'THAT', 'I', 'AM', 'IN', 'AS', 'GREAT', 'TROUBLE', 'ABOUT', 'IT', 'AS', 'YOU', 'ARE'] +7902-96591-0004-2324: ref=['THAT', 'MY', 'FATHER', 'SIR', 'RISDON', 'GRAEME', 'HAS', 'SMUGGLED', 'GOODS', 'HERE'] +7902-96591-0004-2324: hyp=['THAT', 'MY', 'FATHER', 'SIR', 'RISDON', 'GRAHAM', 'HAS', 'SMUGGLED', 'GOODS', 'HERE'] +7902-96591-0005-2325: ref=['HE', 'COULD', 'NOT', 'HELP', 'IT', 'HE', 'HATES', 'THE', 'SMUGGLERS', 'YOU', 'SHALL', 'NOT', 'TELL'] +7902-96591-0005-2325: hyp=['HE', 'COULD', 'NOT', 'HELP', 'IT', 'HE', 'HATES', 'THE', 'SMUGGLERS', 'YOU', 'SHALL', 'NOT', 'TELL'] +7902-96591-0006-2326: ref=['PRAY', 'PRAY', 'SAY', 'YOU', 'WILL', 'NOT', 'ARCHY', 'WAS', 'SILENT'] +7902-96591-0006-2326: hyp=['PRAY', 'PRAY', 'SAY', 'YOU', 'WILL', 'NOT', 'ARCHIE', 'WAS', 'SILENT'] +7902-96591-0007-2327: ref=['THEN', 'AS', 'ARCHY', 'STOOD', 'IN', 'THE', 'DARK', 'LITERALLY', 'AGHAST', 'WITH', 'ASTONISHMENT', 'HE', 'HEARD', 'THE', 'FAINT', 'RUSTLING', 'ONCE', 'MORE', 'AND', 'AGAIN', 'ALL', 'WAS', 'SILENT'] +7902-96591-0007-2327: hyp=['THEN', 'AS', 'ARCHY', 'STOOD', 'IN', 'THE', 'DARK', 'LITERALLY', 'AGHAST', 'WITH', 'ASTONISHMENT', 'HE', 'HEARD', 'THE', 'FAINT', 'RUSTLING', 'ONCE', 'MORE', 'AND', 'AGAIN', 'ALL', 'WAS', 'SILENT'] +7902-96591-0008-2328: ref=['HE', 'LAUGHED', 'BUT', 'IT', 'WAS', 'A', 'CURIOUS', 'KIND', 'OF', 'LAUGH', 'FULL', 'OF', 'VEXATION', 'INJURED', 'AMOUR', 'PROPRE', 'AS', 'THE', 'FRENCH', 'CALL', 'OUR', 'LOVE', 'OF', 'OUR', 'OWN', 'DIGNITY', 'OF', 'WHICH', 'ARCHIBALD', 'RAYSTOKE', 'IN', 'THE', 'FULL', 'FLUSH', 'OF', 'HIS', 'YOUNG', 'BELIEF', 'IN', 'HIS', 'IMPORTANCE', 'AS', 'A', 'BRITISH', 'OFFICER', 'HAD', 'A', 'PRETTY', 'GOOD', 'STOCK'] +7902-96591-0008-2328: hyp=['HE', 'LAUGHED', 'BUT', 'IT', 'WAS', 'A', 'CURIOUS', 'KIND', 'OF', 'LAUGH', 'FULL', 'OF', 'VEXATION', 'INJURED', 'AMOUR', 'PROPERA', 'AS', 'THE', 'FRENCH', 'CALL', 'OUR', 'LOVE', 'OF', 'OUR', 'OWN', 'DIGNITY', 'OF', 'WHICH', 'ARCHIBALD', 'REYSTROKE', 'IN', 'THE', 'FULL', 'FLUSH', 'OF', 'HIS', 'YOUNG', 'BELIEF', 'IN', 'HIS', 'IMPORTANCE', 'AS', 'A', 'BRITISH', 'OFFICER', 'HAD', 'A', 'PRETTY', 'GOOD', 'STOCK'] +7902-96591-0009-2329: ref=['IT', 'ALL', 'COMES', 'OF', 'DRESSING', 'UP', 'IN', 'THIS', 'STUPID', 'WAY', 'LIKE', 'A', 'ROUGH', 'FISHER', 'LAD'] +7902-96591-0009-2329: hyp=['AND', 'ALL', 'COMES', 'OF', 'DRESSING', 'UP', 'IN', 'THIS', 'STUPID', 'WAY', 'LIKE', 'A', 'ROUGH', 'FISHER', 'LAD'] +7902-96591-0010-2330: ref=['COLD', 'WATER', 'CAME', 'ON', 'THIS', 'IDEA', 'DIRECTLY', 'AS', 'HE', 'RECALLED', 'THE', 'FACT', 'THAT', 'THE', 'DARKNESS', 'WAS', 'INTENSE', 'AND', 'CELIA', 'COULD', 'NOT', 'HAVE', 'SEEN', 'HIM'] +7902-96591-0010-2330: hyp=['COLD', 'WATER', 'CAME', 'ON', 'THIS', 'IDEA', 'DIRECTLY', 'AS', 'HE', 'RECALLED', 'THE', 'FACT', 'THAT', 'THE', 'DARKNESS', 'WAS', 'INTENSE', 'AND', 'CELIA', 'COULD', 'NOT', 'HAVE', 'SEEN', 'HIM'] +7902-96591-0011-2331: ref=["I'LL", 'SOON', 'SHOW', 'THEM', 'THAT', 'I', 'AM', 'NOT', 'GOING', 'TO', 'BE', 'PLAYED', 'WITH'] +7902-96591-0011-2331: hyp=["I'LL", 'SOON', 'SHOW', 'THEM', 'THAT', 'I', 'AM', 'NOT', 'GOING', 'TO', 'BE', 'PLAYED', 'WITH'] +7902-96591-0012-2332: ref=['FOR', 'IT', 'SUDDENLY', 'OCCURRED', 'TO', 'HIM', 'THAT', 'HE', 'WAS', 'NOT', 'ONLY', 'A', 'PRISONER', 'BUT', 'A', 'PRISONER', 'IN', 'THE', 'POWER', 'OF', 'A', 'VERY', 'RECKLESS', 'SET', 'OF', 'PEOPLE', 'WHO', 'WOULD', 'STOP', 'AT', 'NOTHING'] +7902-96591-0012-2332: hyp=['FOR', 'IT', 'SUDDENLY', 'OCCURRED', 'TO', 'HIM', 'THAT', 'HE', 'WAS', 'NOT', 'ONLY', 'A', 'PRISONER', 'BUT', 'A', 'PRISONER', 'IN', 'THE', 'POWER', 'OF', 'A', 'VERY', 'RECKLESS', 'SET', 'OF', 'PEOPLE', 'WHO', 'WOULD', 'STOP', 'AT', 'NOTHING'] +7902-96591-0013-2333: ref=['NO', 'HE', 'THOUGHT', 'TO', 'HIMSELF', 'I', "DON'T", 'BELIEVE', 'THEY', 'WOULD', 'KILL', 'ME', 'BUT', 'THEY', 'WOULD', 'KNOCK', 'ME', 'ABOUT'] +7902-96591-0013-2333: hyp=['NO', 'HE', 'THOUGHT', 'TO', 'HIMSELF', 'I', "DON'T", 'BELIEVE', 'THEY', 'WOULD', 'KILL', 'ME', 'BUT', 'THEY', 'WOULD', 'KNOCK', 'ME', 'ABOUT'] +7902-96591-0014-2334: ref=['THE', 'KICK', 'HE', 'HAD', 'RECEIVED', 'WAS', 'A', 'FORETASTE', 'OF', 'WHAT', 'HE', 'MIGHT', 'EXPECT', 'AND', 'AFTER', 'A', 'LITTLE', 'CONSIDERATION', 'HE', 'CAME', 'TO', 'THE', 'CONCLUSION', 'THAT', 'HIS', 'DUTY', 'WAS', 'TO', 'ESCAPE', 'AND', 'GET', 'BACK', 'TO', 'THE', 'CUTTER', 'AS', 'QUICKLY', 'AS', 'HE', 'COULD'] +7902-96591-0014-2334: hyp=['THE', 'KICK', 'HE', 'HAD', 'RECEIVED', 'WAS', 'A', 'FORETASTE', 'OF', 'WHAT', 'HE', 'MIGHT', 'EXPECT', 'AND', 'AFTER', 'A', 'LITTLE', 'CONSIDERATION', 'HE', 'CAME', 'TO', 'THE', 'CONCLUSION', 'THAT', 'HIS', 'DUTY', 'WAS', 'TO', 'ESCAPE', 'AND', 'GET', 'BACK', 'TO', 'THE', 'CUTTER', 'AS', 'QUICKLY', 'AS', 'HE', 'COULD'] +7902-96591-0015-2335: ref=['TO', 'DO', 'THIS', 'HE', 'MUST', 'SCHEME', 'LIE', 'HID', 'TILL', 'MORNING', 'THEN', 'MAKE', 'FOR', 'THE', 'NEAREST', 'POINT', 'AND', 'SIGNAL', 'FOR', 'HELP', 'UNLESS', 'A', "BOAT'S", 'CREW', 'WERE', 'ALREADY', 'SEARCHING', 'FOR', 'HIM', 'HOW', 'TO', 'ESCAPE'] +7902-96591-0015-2335: hyp=['TO', 'DO', 'THIS', 'HE', 'MUST', 'SCHEME', 'LIE', 'HID', 'TILL', 'MORNING', 'THEN', 'MAKE', 'FOR', 'THE', 'NEAREST', 'POINT', 'AND', 'SIGNAL', 'FOR', 'HELP', 'UNLESS', 'THE', "BOAT'S", 'CREW', 'WERE', 'ALREADY', 'SEARCHING', 'FOR', 'HIM', 'HOW', 'TO', 'ESCAPE'] +7902-96591-0016-2336: ref=['THE', 'WINDOW', 'WAS', 'BARRED', 'BUT', 'HE', 'WENT', 'TO', 'IT', 'AND', 'TRIED', 'THE', 'BARS', 'ONE', 'BY', 'ONE', 'TO', 'FIND', 'THEM', 'ALL', 'SOLIDLY', 'FITTED', 'INTO', 'THE', 'STONE', 'SILL'] +7902-96591-0016-2336: hyp=['THE', 'WINDOW', 'WAS', 'BARRED', 'BUT', 'HE', 'WENT', 'TO', 'IT', 'AND', 'TRIED', 'THE', 'BARS', 'ONE', 'BY', 'ONE', 'TO', 'FIND', 'THEM', 'ALL', 'SOLIDLY', 'FITTED', 'INTO', 'THE', 'STONE', 'SILL'] +7902-96591-0017-2337: ref=['NEXT', 'MOMENT', 'AS', 'HE', 'FELT', 'HIS', 'WAY', 'ABOUT', 'HIS', 'HAND', 'TOUCHED', 'AN', 'OLD', 'FASHIONED', 'MARBLE', 'MANTELPIECE', 'FIREPLACE', 'CHIMNEY'] +7902-96591-0017-2337: hyp=['NEXT', 'MOMENT', 'AS', 'HE', 'FELT', 'HIS', 'WAY', 'ABOUT', 'HIS', 'HAND', 'TOUCHED', 'AN', 'OLD', 'FASHIONED', 'MARBLE', 'MANTELPIECE', 'FIREPLACE', 'CHIMNEY'] +7902-96591-0018-2338: ref=['YES', 'IF', 'OTHER', 'WAYS', 'FAILED', 'HE', 'COULD', 'ESCAPE', 'UP', 'THE', 'CHIMNEY'] +7902-96591-0018-2338: hyp=['YES', 'IF', 'OTHER', 'WAYS', 'FAILED', 'HE', 'COULD', 'ESCAPE', 'UP', 'THE', 'CHIMNEY'] +7902-96591-0019-2339: ref=['NO', 'THAT', 'WAS', 'TOO', 'BAD', 'HE', 'COULD', 'NOT', 'DO', 'THAT'] +7902-96591-0019-2339: hyp=['NO', 'THAT', 'WAS', 'TOO', 'BAD', 'HE', 'COULD', 'NOT', 'DO', 'THAT'] +7902-96591-0020-2340: ref=['SYMPATHY', 'AND', 'PITY', 'FOR', 'THE', 'DWELLERS', 'IN', 'THE', 'HOZE', 'WERE', 'COMPLETELY', 'GONE', 'NOW', 'AND', 'HE', 'SET', 'HIS', 'TEETH', 'FAST', 'AND', 'MENTALLY', 'CALLED', 'HIMSELF', 'A', 'WEAK', 'IDIOT', 'FOR', 'EVER', 'THINKING', 'ABOUT', 'SUCH', 'PEOPLE'] +7902-96591-0020-2340: hyp=['SYMPATHY', 'AND', 'PITY', 'FOR', 'THE', 'DWELLERS', 'IN', 'THE', 'HOES', 'WERE', 'COMPLETELY', 'GONE', 'NOW', 'AND', 'HE', 'SET', 'HIS', 'TEETH', 'FAST', 'AND', 'MENTALLY', 'CALLED', 'HIMSELF', 'A', 'WEAK', 'IDIOT', 'FOR', 'EVER', 'THINKING', 'ABOUT', 'SUCH', 'PEOPLE'] +7902-96591-0021-2341: ref=['A', 'NARROW', 'TABLE', 'AGAINST', 'THE', 'WALL', 'IN', 'TWO', 'PLACES'] +7902-96591-0021-2341: hyp=['A', 'NARROW', 'TABLE', 'AGAINST', 'THE', 'WALL', 'IN', 'TWO', 'PLACES'] +7902-96591-0022-2342: ref=['HE', 'WENT', 'AND', 'TRIED', 'TO', 'FORCE', 'HIS', 'HEAD', 'THROUGH', 'RECALLING', 'AS', 'HE', 'DID', 'THAT', 'WHERE', 'A', "PERSON'S", 'HEAD', 'WOULD', 'GO', 'THE', 'REST', 'OF', 'THE', 'BODY', 'WOULD', 'PASS'] +7902-96591-0022-2342: hyp=['HE', 'WENT', 'AND', 'TRIED', 'TO', 'FORCE', 'HIS', 'HEAD', 'THROUGH', 'RECALLING', 'AS', 'HE', 'DID', 'THAT', 'WHERE', 'A', "PERSON'S", 'HEAD', 'WOULD', 'GO', 'THE', 'REST', 'OF', 'THE', 'BODY', 'WOULD', 'PASS'] +7902-96591-0023-2343: ref=['BUT', 'THERE', 'WAS', 'NO', 'CHANCE', 'FOR', 'HIS', 'BODY', 'THERE', 'THE', 'HEAD', 'WOULD', 'NOT', 'GO', 'FIRST'] +7902-96591-0023-2343: hyp=['BUT', 'THERE', 'WAS', 'NO', 'CHANCE', 'FOR', 'HIS', 'BODY', 'THERE', 'AND', 'THE', 'HEAD', 'WOULD', 'NOT', 'GO', 'FIRST'] +7902-96591-0024-2344: ref=['A', 'FELLOW', 'WHO', 'WAS', 'SHUT', 'UP', 'IN', 'PRISON', 'FOR', 'LIFE', 'MIGHT', 'DO', 'IT', 'HE', 'SAID', 'BUT', 'NOT', 'IN', 'A', 'CASE', 'LIKE', 'THIS'] +7902-96591-0024-2344: hyp=['A', 'FELLOW', 'WHO', 'WAS', 'SHUT', 'UP', 'IN', 'PRISON', 'FOR', 'LIFE', 'MIGHT', 'DO', 'IT', 'HE', 'SAID', 'BUT', 'NOT', 'IN', 'A', 'CASE', 'LIKE', 'THIS'] +7902-96592-0000-2345: ref=['SURE', "YOU'VE", 'LOOKED', 'ROUND', 'EVERYWHERE', 'BOY', 'YES', 'FATHER', 'QUITE'] +7902-96592-0000-2345: hyp=['SURE', "YOU'VE", 'LOOKED', 'ROUND', 'EVERYWHERE', 'BOY', 'YES', 'FATHER', 'QUITE'] +7902-96592-0001-2346: ref=["I'M", 'GOING', 'HOME', 'TO', 'BREAKFAST'] +7902-96592-0001-2346: hyp=["I'M", 'GOING', 'HOME', 'TO', 'BREAKFAST'] +7902-96592-0002-2347: ref=['SHALL', 'I', 'COME', 'TOO', 'FATHER', 'NO'] +7902-96592-0002-2347: hyp=['SHALL', 'I', 'COME', 'TOO', 'FATHER', 'NO'] +7902-96592-0003-2348: ref=['STOP', 'HERE', 'TILL', 'SIR', 'RISDON', 'COMES', 'DOWN', 'AND', 'TELL', 'HIM', "I'M", 'VERY', 'SORRY', 'THAT', 'WE', 'SHOULD', 'HAVE', 'CLEARED', 'OUT', 'LAST', 'NIGHT', 'ONLY', 'A', 'BORN', 'FOOL', 'SAW', 'JERRY', "NANDY'S", 'LOBSTER', 'BOAT', 'COMING', 'INTO', 'THE', 'COVE', 'AND', 'CAME', 'RUNNING', 'TO', 'SAY', 'IT', 'WAS', 'A', 'PARTY', 'FROM', 'THE', 'CUTTER', 'YES', 'FATHER'] +7902-96592-0003-2348: hyp=['STOP', 'HERE', 'TILL', 'SIR', 'RISDON', 'COMES', 'DOWN', 'AND', 'TELL', 'HIM', "I'M", 'VERY', 'SORRY', 'THAT', 'WE', 'SHOULD', 'HAVE', 'CLEARED', 'OUT', 'LAST', 'NIGHT', 'ONLY', 'A', 'BORN', 'FOOL', 'SAW', 'JERRY', 'AND', "ANDY'S", 'LOBSTER', 'BOAT', 'COMING', 'INTO', 'THE', 'COVE', 'AND', 'CAME', 'RUNNING', 'TO', 'SAY', 'IT', 'WAS', 'A', 'PARTY', 'FROM', 'THE', 'CUTTER', 'YES', 'FATHER'] +7902-96592-0004-2349: ref=['TELL', 'HIM', 'NOT', 'TO', 'BE', 'UNEASY', 'TIS', 'ALL', 'RIGHT', 'AND', "I'LL", 'HAVE', 'EVERYTHING', 'CLEAR', 'AWAY', 'TO', 'NIGHT'] +7902-96592-0004-2349: hyp=['TELL', 'HIM', 'NOT', 'TO', 'BE', 'UNEASY', 'TIS', 'ALL', 'RIGHT', 'AND', "I'LL", 'HAVE', 'EVERYTHING', 'CLEAR', 'AWAY', 'TO', 'NIGHT'] +7902-96592-0005-2350: ref=['THE', 'DULL', 'SOUND', 'OF', 'DEPARTING', 'STEPS', 'AND', 'A', 'LOW', 'WHISTLING', 'SOUND', 'COMING', 'DOWN', 'THROUGH', 'THE', 'SKYLIGHT', 'WINDOW', 'INTO', 'THE', 'CABIN', 'WHERE', 'ARCHY', 'RAYSTOKE', 'LAY', 'WITH', 'HIS', 'HEAVY', 'EYELIDS', 'PRESSED', 'DOWN', 'BY', 'SLEEP'] +7902-96592-0005-2350: hyp=['THE', 'DULL', 'SOUND', 'OF', 'DEPARTING', 'STEPS', 'AND', 'A', 'LOW', 'WHISTLING', 'SOUND', 'COMING', 'DOWN', 'THROUGH', 'THE', 'SKYLIGHT', 'WINDOW', 'INTO', 'THE', 'CABIN', 'WHERE', 'ARCHY', 'RAYSTROKE', 'LAY', 'WITH', 'HIS', 'HEAVY', 'EYELIDS', 'PRESSED', 'DOWN', 'BY', 'SLEEP'] +7902-96592-0006-2351: ref=['WHAT', 'A', 'QUEER', 'DREAM', 'HE', 'THOUGHT', 'TO', 'HIMSELF'] +7902-96592-0006-2351: hyp=['WHAT', 'A', 'QUEER', 'DREAM', 'HE', 'THOUGHT', 'TO', 'HIMSELF'] +7902-96592-0007-2352: ref=['BUT', 'HOW', 'QUEER', 'FOR', 'MISTER', 'GURR', 'TO', 'BE', 'TALKING', 'LIKE', 'THAT', 'TO', 'ANDREW', 'TEAL', 'THE', 'BOY', 'WHO', 'HELPED', 'THE', 'COOK'] +7902-96592-0007-2352: hyp=['BUT', 'HOW', 'QUEER', 'FOR', 'MISTER', 'GOURR', 'TO', 'BE', 'TALKING', 'LIKE', 'THAT', 'TO', 'ANDREW', 'TEALE', 'THE', 'BOY', 'WHO', 'HELPED', 'THE', 'COOK'] +7902-96592-0008-2353: ref=['AND', 'WHY', 'DID', 'ANDY', 'CALL', 'MISTER', 'GURR', 'FATHER'] +7902-96592-0008-2353: hyp=['AND', 'WHY', 'DID', 'ANDY', 'CALL', 'MISTER', 'GURRFATHER'] +7902-96592-0009-2354: ref=['THERE', 'WAS', 'AN', 'INTERVAL', 'OF', 'THINKING', 'OVER', 'THIS', 'KNOTTY', 'QUESTION', 'DURING', 'WHICH', 'THE', 'LOW', 'WHISTLING', 'WENT', 'ON'] +7902-96592-0009-2354: hyp=['THERE', 'WAS', 'AN', 'INTERVAL', 'OF', 'THINKING', 'OVER', 'THIS', 'KNOTTY', 'QUESTION', 'DURING', 'WHICH', 'THE', 'LOW', 'WHISTLING', 'WENT', 'ON'] +7902-96592-0010-2355: ref=['AND', "I'M", 'HUNGRY', 'TOO', 'TIME', 'I', 'WAS', 'UP', 'I', 'SUPPOSE'] +7902-96592-0010-2355: hyp=['AND', "I'M", 'HUNGRY', 'TOO', 'TIME', 'I', 'WAS', 'UP', 'I', 'SUPPOSE'] +7902-96592-0011-2356: ref=['NO', 'HE', 'WAS', 'NOT', 'DREAMING', 'FOR', 'HE', 'WAS', 'LOOKING', 'OUT', 'ON', 'THE', 'SEA', 'OVER', 'WHICH', 'A', 'FAINT', 'MIST', 'HUNG', 'LIKE', 'WREATHS', 'OF', 'SMOKE'] +7902-96592-0011-2356: hyp=['NO', 'HE', 'WAS', 'NOT', 'DREAMING', 'FOR', 'HE', 'WAS', 'LOOKING', 'OUT', 'ON', 'THE', 'SEA', 'OVER', 'WHICH', 'A', 'FAINT', 'MIST', 'HUNG', 'LIKE', 'WREATHS', 'OF', 'SMOKE'] +7902-96592-0012-2357: ref=['WHAT', 'DID', 'THEY', 'SAY', 'FALSE', 'ALARM', 'TELL', 'SIR', 'RISDON', 'THEY', 'WOULD', 'CLEAR', 'ALL', 'AWAY', 'TO', 'NIGHT', 'SEE', 'IF', 'ANYTHING', 'HAD', 'BEEN', 'LEFT', 'ABOUT', 'LOBSTER', 'BOAT'] +7902-96592-0012-2357: hyp=['WHAT', 'DID', 'THEY', 'SAY', 'FALSE', 'ALARM', 'TELL', 'SERVICE', 'AND', 'THEY', 'WOULD', 'CLEAR', 'ALL', 'AWAY', 'TO', 'NIGHT', 'SEE', 'IF', 'ANYTHING', 'HAD', 'BEEN', 'LEFT', 'ABOUT', 'LOBSTER', 'BOAT'] +7902-96592-0013-2358: ref=['ONCE', 'OUT', 'OF', 'THAT', 'ROOM', 'HE', 'COULD', 'RAN', 'AND', 'BY', 'DAYLIGHT', 'THE', 'SMUGGLERS', 'DARE', 'NOT', 'HUNT', 'HIM', 'DOWN'] +7902-96592-0013-2358: hyp=['ONCE', 'OUT', 'OF', 'THAT', 'ROOM', 'HE', 'COULD', 'RAN', 'AND', 'BY', 'DAYLIGHT', 'THE', 'SMUGGLERS', 'DARE', 'NOT', 'HUNT', 'HIM', 'DOWN'] +7902-96592-0014-2359: ref=['OH', 'THOSE', 'BARS', 'HE', 'MENTALLY', 'EXCLAIMED', 'AND', 'HE', 'WAS', 'ADVANCING', 'TOWARD', 'THEM', 'WHEN', 'JUST', 'AS', 'HE', 'DREW', 'NEAR', 'THERE', 'WAS', 'A', 'RUSTLING', 'NOISE', 'UNDER', 'THE', 'WINDOW', 'A', 'COUPLE', 'OF', 'HANDS', 'SEIZED', 'THE', 'BARS', 'THERE', 'WAS', 'A', 'SCRATCHING', 'OF', 'BOOT', 'TOES', 'AGAINST', 'STONE', 'WORK', 'AND', "RAM'S", 'FACE', 'APPEARED', 'TO', 'GAZE', 'INTO', 'THE', 'ROOM', 'BY', 'INTENTION', 'BUT', 'INTO', 'THE', 'ASTONISHED', 'COUNTENANCE', 'OF', 'THE', 'YOUNG', 'MIDSHIPMAN', 'INSTEAD'] +7902-96592-0014-2359: hyp=['OH', 'THOSE', 'BARS', 'HE', 'MENTALLY', 'EXCLAIMED', 'AND', 'HE', 'WAS', 'ADVANCING', 'TOWARDS', 'THEM', 'BUT', 'JUST', 'AS', 'HE', 'DREW', 'NEAR', 'THERE', 'WAS', 'A', 'RUSTLING', 'NOISE', 'UNDER', 'THE', 'WINDOWS', 'A', 'COUPLE', 'OF', 'HANDS', 'SEIZED', 'THE', 'BARS', 'THERE', 'WAS', 'A', 'SCRATCHING', 'OF', 'BOOT', 'TOES', 'AGAINST', 'STONE', 'WORK', 'AND', "RAHAM'S", 'FACE', 'APPEARED', 'TO', 'GAZE', 'INTO', 'THE', 'ROOM', 'BY', 'INTENTION', 'BUT', 'INTO', 'THE', 'ASTONISHED', 'COUNTENANCE', 'OF', 'THE', 'YOUNG', 'MIDSHIPMAN', 'INSTEAD'] +7902-96592-0015-2360: ref=['RAM', 'WAS', 'THE', 'FIRST', 'TO', 'RECOVER', 'FROM', 'HIS', 'SURPRISE'] +7902-96592-0015-2360: hyp=['GRIM', 'WAS', 'THE', 'FIRST', 'TO', 'RECOVER', 'FROM', 'HIS', 'SURPRISE'] +7902-96592-0016-2361: ref=['HULLO', 'HE', 'SAID', 'WHO', 'ARE', 'YOU'] +7902-96592-0016-2361: hyp=['HULLO', 'HE', 'SAID', 'WHO', 'ARE', 'YOU'] +7902-96592-0017-2362: ref=['GO', 'ROUND', 'AND', 'OPEN', 'THE', 'DOOR', 'I', 'WAS', 'SHUT', 'IN', 'LAST', 'NIGHT', 'BY', 'MISTAKE'] +7902-96592-0017-2362: hyp=['GO', 'ROUND', 'AND', 'OPEN', 'THE', 'DOOR', 'I', 'WAS', 'SHUT', 'IN', 'LAST', 'NIGHT', 'BY', 'MISTAKE'] +7902-96592-0018-2363: ref=['I', 'SAW', 'YOU', 'LAST', 'NIGHT', 'AND', 'WONDERED', 'WHOSE', 'BOY', 'YOU', 'WAS'] +7902-96592-0018-2363: hyp=['I', 'SAW', 'YOU', 'LAST', 'NIGHT', 'AND', 'WONDERED', 'WHOSE', 'BOY', 'HE', 'WAS'] +7902-96592-0019-2364: ref=['IT', 'WAS', 'YOU', 'FATHER', 'KICKED', 'FOR', 'SHIRKING', 'AND', 'MY', 'WELL', 'I', 'HARDLY', 'KNOWED', 'YOU'] +7902-96592-0019-2364: hyp=['IT', 'WAS', 'YOUR', 'FATHER', 'KICKED', 'FOR', 'SHIRKING', 'AND', 'MY', 'WELL', 'I', 'HARDLY', 'KNOWED', 'YOU'] +7902-96592-0020-2365: ref=['NONSENSE'] +7902-96592-0020-2365: hyp=['NONSENSE'] +7902-96592-0021-2366: ref=["WON'T", 'DO', 'SAID', 'RAM', 'GRINNING'] +7902-96592-0021-2366: hyp=["WON'T", 'DO', 'SAID', 'RAM', 'GRINNING'] +7902-96592-0022-2367: ref=['THINK', 'I', "DON'T", 'KNOW', 'YOU', 'MISTER', 'ORFICER'] +7902-96592-0022-2367: hyp=['THINK', 'I', "DON'T", 'KNOW', 'YOU', 'MISTER', 'ORFICER'] +7902-96592-0023-2368: ref=["WON'T", 'DO', 'SAID', 'RAM', 'QUICKLY', 'I', 'KNOW', 'YOU'] +7902-96592-0023-2368: hyp=["WON'T", 'DO', 'SAID', 'GRAHAM', 'QUICKLY', 'I', 'KNOW', 'YOU'] +7902-96592-0024-2369: ref=['BEEN', 'PLAYING', 'THE', 'SPY', "THAT'S", 'WHAT', "YOU'VE", 'BEEN', 'DOING', 'WHO', 'LOCKED', 'YOU', 'IN'] +7902-96592-0024-2369: hyp=['BEEN', 'PLAYING', 'THE', 'SPY', "THAT'S", 'WHAT', "YOU'VE", 'BEEN', 'DOING', 'WHO', 'LOCKED', 'YOU', 'IN'] +7902-96592-0025-2370: ref=['ARCHY', 'STEPPED', 'BACK', 'TO', 'THE', 'DOOR', 'LISTENING', 'BUT', 'THERE', 'WAS', 'NOT', 'A', 'SOUND'] +7902-96592-0025-2370: hyp=['ARCHIE', 'STEPPED', 'BACK', 'TO', 'THE', 'DOOR', 'LISTENING', 'BUT', 'THERE', 'WAS', 'NOT', 'A', 'SOUND'] +7902-96592-0026-2371: ref=['HE', 'HAS', 'GONE', 'TO', 'GIVE', 'THE', 'ALARM', 'THOUGHT', 'THE', 'PRISONER', 'AND', 'HE', 'LOOKED', 'EXCITEDLY', 'ROUND', 'FOR', 'A', 'WAY', 'OF', 'ESCAPE'] +7902-96592-0026-2371: hyp=['HE', 'HAS', 'GONE', 'TO', 'GIVE', 'THE', 'ALARM', 'THOUGHT', 'THE', 'PRISONER', 'AND', 'HE', 'LOOKED', 'EXCITEDLY', 'ROUND', 'FOR', 'A', 'WAY', 'OF', 'ESCAPE'] +7902-96592-0027-2372: ref=['NOTHING', 'BUT', 'THE', 'CHIMNEY', 'PRESENTED', 'ITSELF'] +7902-96592-0027-2372: hyp=['NOTHING', 'BUT', 'THE', 'CHIMNEY', 'PRESENTED', 'ITSELF'] +7902-96592-0028-2373: ref=['A', 'HAPPY', 'INSPIRATION', 'HAD', 'COME', 'AND', 'PLACING', 'ONE', 'HAND', 'UPON', 'HIS', 'BREAST', 'HE', 'THRUST', 'IN', 'THE', 'OTHER', 'GAVE', 'A', 'TUG', 'AND', 'DREW', 'OUT', 'HIS', 'LITTLE', 'CURVED', 'DIRK', 'GLANCED', 'AT', 'THE', 'EDGE', 'RAN', 'TO', 'THE', 'WINDOW', 'AND', 'BEGAN', 'TO', 'CUT', 'AT', 'ONE', 'OF', 'THE', 'BARS', 'LABOUR', 'IN', 'VAIN'] +7902-96592-0028-2373: hyp=['A', 'HAPPY', 'INSPIRATION', 'HAD', 'COME', 'AND', 'PLACING', 'ONE', 'HAND', 'UPON', 'HIS', 'CHEST', 'HE', 'THRUST', 'IN', 'THE', 'OTHER', 'GAVE', 'A', 'TUG', 'AND', 'DREW', 'OUT', 'HIS', 'LITTLE', 'CURVED', 'DIRK', 'GLANCED', 'AT', 'THE', 'EDGE', 'RAN', 'TO', 'THE', 'WINDOW', 'AND', 'BEGAN', 'TO', 'CUT', 'AT', 'ONE', 'OF', 'THE', 'BARS', 'LABOR', 'IN', 'VAIN'] +7902-96592-0029-2374: ref=['HE', 'DIVIDED', 'THE', 'PAINT', 'AND', 'PRODUCED', 'A', 'FEW', 'SQUEAKS', 'AND', 'GRATING', 'SOUNDS', 'AS', 'HE', 'REALISED', 'THAT', 'THE', 'ATTEMPT', 'WAS', 'MADNESS'] +7902-96592-0029-2374: hyp=['HE', 'DIVIDED', 'THE', 'PAINT', 'AND', 'PRODUCED', 'A', 'FEW', 'SQUEAKS', 'AND', 'GRATING', 'SOUNDS', 'AS', 'HE', 'REALIZED', 'THAT', 'THE', 'ATTEMPT', 'WAS', 'MADNESS'] +7902-96592-0030-2375: ref=['THE', 'RESULT', 'WAS', 'NOT', 'VERY', 'SATISFACTORY', 'BUT', 'SUFFICIENTLY', 'SO', 'TO', 'MAKE', 'HIM', 'ESSAY', 'THE', 'BAR', 'OF', 'THE', 'WINDOW', 'ONCE', 'MORE', 'PRODUCING', 'A', 'GRATING', 'EAR', 'ASSAILING', 'SOUND', 'AS', 'HE', 'FOUND', 'THAT', 'NOW', 'HE', 'DID', 'MAKE', 'A', 'LITTLE', 'IMPRESSION', 'SO', 'LITTLE', 'THOUGH', 'THAT', 'THE', 'PROBABILITY', 'WAS', 'IF', 'HE', 'KEPT', 'ON', 'WORKING', 'WELL', 'FOR', 'TWENTY', 'FOUR', 'HOURS', 'HE', 'WOULD', 'NOT', 'GET', 'THROUGH'] +7902-96592-0030-2375: hyp=['THE', 'RESULT', 'WAS', 'NOT', 'VERY', 'SATISFACTORY', 'BUT', 'SUFFICIENTLY', 'SO', 'TO', 'MAKE', 'HIM', 'ESSAY', 'THE', 'BAR', 'OF', 'THE', 'WINDOW', 'ONCE', 'MORE', 'PRODUCING', 'A', 'GRATING', 'IRASCELLING', 'SOUND', 'AS', 'HE', 'FOUND', 'THAT', 'NOW', 'HE', 'DID', 'MAKE', 'A', 'LITTLE', 'IMPRESSION', 'SO', 'LITTLE', 'THOUGH', 'THAT', 'THE', 'PROBABILITY', 'WAS', 'IF', 'HE', 'KEPT', 'ON', 'WORKING', 'WELL', 'FOR', 'TWENTY', 'FOUR', 'HOURS', 'HE', 'WOULD', 'NOT', 'GET', 'THROUGH'] +7902-96592-0031-2376: ref=['BUT', 'AT', 'THE', 'END', 'OF', 'FIVE', 'MINUTES', 'HE', 'STOPPED', 'AND', 'THRUST', 'BACK', 'THE', 'DIRK', 'INTO', 'ITS', 'SHEATH'] +7902-96592-0031-2376: hyp=['BUT', 'AT', 'THE', 'END', 'OF', 'FIVE', 'MINUTES', 'HE', 'STOPPED', 'AND', 'THRUST', 'BACK', 'THE', 'DIRK', 'INTO', 'ITS', 'SHEATH'] +7902-96592-0032-2377: ref=['NO', 'I', "CAN'T", 'PART', 'WITH', 'THAT', 'HA', 'HA', 'HA', 'LAUGHED', 'THE', 'BOY', 'JEERINGLY'] +7902-96592-0032-2377: hyp=['NO', 'I', "CAN'T", 'PART', 'WITH', 'THAT', 'HA', 'HA', 'HA', 'LAUGHED', 'THE', 'BOY', 'JEERINGLY'] +7902-96592-0033-2378: ref=['BUT', "I'LL", 'YES', "I'LL", 'GIVE', 'YOU', 'A', 'GUINEA', 'IF', 'YOU', 'WILL', 'LET', 'ME', 'OUT'] +7902-96592-0033-2378: hyp=['BUT', "I'LL", 'YES', "I'LL", 'GIVE', 'YOU', 'A', 'GUINEA', 'IF', 'YOU', 'WILL', 'LET', 'ME', 'OUT'] +7902-96592-0034-2379: ref=['GUINEA', 'SAID', 'THE', 'BOY', 'THINK', "I'D", 'DO', 'IT', 'FOR', 'A', 'GUINEA', 'WELL', 'THEN', 'TWO'] +7902-96592-0034-2379: hyp=['GUINEA', 'SAID', 'THE', 'BOY', 'THINK', "I'LL", 'DO', 'IT', 'FOR', 'A', 'GUINEA', 'WELL', 'THEN', 'TOO'] +7902-96592-0035-2380: ref=['BE', 'QUICK', "THERE'S", 'A', 'GOOD', 'FELLOW', 'I', 'WANT', 'TO', 'GET', 'AWAY', 'AT', 'ONCE'] +7902-96592-0035-2380: hyp=['BE', 'QUICK', "THERE'S", 'A', 'GOOD', 'FELLOW', 'I', 'WANT', 'TO', 'GET', 'AWAY', 'AT', 'ONCE'] +7902-96592-0036-2381: ref=['NOT', 'YOU', 'ONLY', 'A', 'SHAM'] +7902-96592-0036-2381: hyp=['NOT', 'YOU', 'ONLY', 'A', 'SHAM'] +7902-96592-0037-2382: ref=['WHY', 'YOUR', 'CLOTHES', "DON'T", 'FIT', 'YOU', 'AND', 'YOUR', "CAP'S", 'PUT', 'ON', 'ALL', 'SKEW', 'REW'] +7902-96592-0037-2382: hyp=['WHY', 'YOUR', 'CLOTHES', "DON'T", 'FIT', 'YOU', 'AND', 'YOUR', 'CAPS', 'PUT', 'ON', 'ALL', 'SKEWER'] +7902-96592-0038-2383: ref=['NEVER', 'MIND', 'ABOUT', 'THAT', 'LET', 'ME', 'OUT', 'OF', 'THIS', 'PLACE'] +7902-96592-0038-2383: hyp=['NEVER', 'MIND', 'ABOUT', 'THAT', 'LET', 'ME', 'OUT', 'OF', 'THIS', 'PLACE'] +7902-96592-0039-2384: ref=['I', 'TOLD', 'YOU', 'A', 'FISHER', 'BOY', 'CRIED', 'ARCHY', 'IMPATIENTLY', 'BUT', 'TRYING', 'NOT', 'TO', 'OFFEND', 'HIS', 'VISITOR', 'WHO', 'POSSESSED', 'THE', 'POWER', 'OF', 'CONFERRING', 'FREEDOM', 'BY', 'SPEAKING', 'SHARPLY'] +7902-96592-0039-2384: hyp=['I', 'TOLD', 'YOU', 'A', 'FISHER', 'BOY', 'CRIED', 'ARCHY', 'IMPATIENTLY', 'BUT', 'TRYING', 'NOT', 'TO', 'OFFEND', 'HIS', 'VISITOR', 'WHO', 'POSSESSED', 'THE', 'POWER', 'OF', 'CONFERRING', 'FREEDOM', 'BY', 'SPEAKING', 'SHARPLY'] +7902-96592-0040-2385: ref=['NOT', 'YOU', 'LOOK', 'LIKE', 'A', 'WILD', 'BEAST', 'IN', 'A', 'CAGE', 'LIKE', 'A', 'MONKEY', 'YOU', 'INSOLENT'] +7902-96592-0040-2385: hyp=['NOT', 'YOU', 'LOOK', 'LIKE', 'A', 'WILD', 'BEAST', 'IN', 'A', 'CAGE', 'LIKE', 'A', 'MONKEY', 'YOU', 'INSOLENT'] +7902-96592-0041-2386: ref=['ARCHY', 'CHECKED', 'HIMSELF', 'AND', 'THE', 'BOY', 'LAUGHED'] +7902-96592-0041-2386: hyp=['ARCHY', 'CHECKED', 'HIMSELF', 'AND', 'THE', 'BOY', 'LAUGHED'] +7902-96592-0042-2387: ref=['IT', 'WAS', 'YOUR', 'TURN', 'YESTERDAY', "IT'S", 'MINE', 'TO', 'DAY', 'WHAT', 'A', 'GAME'] +7902-96592-0042-2387: hyp=['IT', 'WAS', 'YOUR', 'TURN', 'YESTERDAY', "IT'S", 'MINE', 'TO', 'DAY', 'WHAT', 'A', 'GAME'] +7902-96592-0043-2388: ref=['YOU', 'LAUGHED', 'AND', 'FLEERED', 'AT', 'ME', 'WHEN', 'I', 'WAS', 'ON', 'THE', "CUTTER'S", 'DECK'] +7902-96592-0043-2388: hyp=['YOU', 'LAUGHED', 'AND', 'FLEERED', 'AT', 'ME', 'WHEN', 'I', 'WAS', 'ON', 'THE', "CUTTER'S", 'DECK'] +7902-96592-0044-2389: ref=['I', 'SAY', 'YOU', 'DO', 'LOOK', 'A', 'RUM', 'UN', 'JUST', 'LIKE', 'A', 'BIG', 'MONKEY', 'IN', 'A', 'SHOW'] +7902-96592-0044-2389: hyp=['I', 'SAY', 'YOU', 'DO', 'LOOK', 'LIKE', 'A', 'ROMAN', 'JUST', 'LIKE', 'A', 'BIG', 'MONKEY', 'IN', 'A', 'SHOW'] +7902-96592-0045-2390: ref=['RAM', 'SHOWED', 'HIS', 'WHITE', 'TEETH', 'AS', 'HE', 'BURST', 'OUT', 'WITH', 'A', 'LONG', 'LOW', 'FIT', 'OF', 'LAUGHTER'] +7902-96592-0045-2390: hyp=['GRAHAM', 'SHOWED', 'HIS', 'WHITE', 'TEETH', 'AS', 'HE', 'BURST', 'OUT', 'WITH', 'A', 'LONG', 'LOW', 'FIT', 'OF', 'LAUGHTER'] +7902-96592-0046-2391: ref=['YOU', "ROPE'S", 'END', 'ME', 'HE', 'SAID'] +7902-96592-0046-2391: hyp=['YOUR', 'ROPES', 'END', 'ME', 'HE', 'SAID'] +7902-96592-0047-2392: ref=['WHY', 'I', 'COULD', 'TIE', 'YOU', 'UP', 'IN', 'A', 'KNOT', 'AND', 'HEAVE', 'YOU', 'OFF', 'THE', 'CLIFF', 'ANY', 'DAY', 'WHAT', 'A', 'GAME'] +7902-96592-0047-2392: hyp=['WHY', 'I', 'COULD', 'TIE', 'YOU', 'UP', 'IN', 'A', 'KNOT', 'AND', 'HEAVE', 'YOU', 'OFF', 'THE', 'CLIFF', 'ANY', 'DAY', 'WHAT', 'A', 'GAME'] +7902-96592-0048-2393: ref=['BIT', 'OF', 'A', 'MIDDY', 'FED', 'ON', 'SALT', 'TACK', 'AND', 'WEEVILLY', 'BISCUIT', 'TALK', 'OF', 'GIVING', 'ME', "ROPE'S", 'END'] +7902-96592-0048-2393: hyp=['BIT', 'OF', 'A', 'MITTEE', 'FED', 'ON', 'A', 'SALT', 'TACK', 'IN', 'WEEVILY', 'BISCUIT', 'TALK', 'OF', 'GIVING', 'ME', 'ROPES', 'END'] +7902-96592-0049-2394: ref=['ONCE', 'MORE', 'WILL', 'YOU', 'COME', 'AND', 'LET', 'ME', 'OUT', 'NO'] +7902-96592-0049-2394: hyp=['ONCE', 'MORE', 'WILL', 'YOU', 'COME', 'AND', 'LET', 'ME', 'OUT', 'NO'] +7902-96592-0050-2395: ref=['TO', 'HIS', 'ASTONISHMENT', 'THE', 'BOY', 'DID', 'NOT', 'FLINCH', 'BUT', 'THRUST', 'HIS', 'OWN', 'ARMS', 'THROUGH', 'PLACING', 'THEM', 'ABOUT', 'THE', "MIDDY'S", 'WAIST', 'CLENCHING', 'HIS', 'HANDS', 'BEHIND', 'AND', 'UTTERING', 'A', 'SHARP', 'WHISTLE'] +7902-96592-0050-2395: hyp=['TO', 'HIS', 'ASTONISHMENT', 'THE', 'BOY', 'DID', 'NOT', 'FLINCH', 'BUT', 'THRUST', 'HIS', 'OWN', 'ARMS', 'THROUGH', 'PLACING', 'THEM', 'ABOUT', 'THE', "MIDDY'S", 'WAIST', 'CLENCHING', 'HIS', 'HAND', 'BEHIND', 'AND', 'UTTERING', 'A', 'SHARP', 'WHISTLE'] +7902-96594-0000-2396: ref=['SEEMED', 'IN', 'GOOD', 'SPIRITS', 'LAST', 'NIGHT', 'MISTER', 'GURR', 'EH'] +7902-96594-0000-2396: hyp=['SEEMED', 'IN', 'GOOD', 'SPIRITS', 'LAST', 'NIGHT', 'MISTER', 'GURR', 'EH'] +7902-96594-0001-2397: ref=['YES', 'SIR', 'BUT', 'HE', 'MAY', 'TURN', 'UP', 'ON', 'THE', 'CLIFF', 'AT', 'ANY', 'MOMENT'] +7902-96594-0001-2397: hyp=['YES', 'SIR', 'BUT', 'HE', 'MAY', 'TURN', 'UP', 'ON', 'THE', 'CLIFF', 'AT', 'ANY', 'MOMENT'] +7902-96594-0002-2398: ref=['YES', 'MEN', 'QUITE', 'READY', 'YES', 'SIR'] +7902-96594-0002-2398: hyp=['YES', 'MEN', 'QUITE', 'READY', 'YES', 'SIR'] +7902-96594-0003-2399: ref=["THAT'S", 'RIGHT', 'OF', 'COURSE', 'WELL', 'ARMED'] +7902-96594-0003-2399: hyp=["THAT'S", 'RIGHT', 'OF', 'COURSE', 'WELL', 'ARMED'] +7902-96594-0004-2400: ref=['SOON', 'AS', 'THE', 'SIGNAL', 'COMES', 'WE', 'SHALL', 'PUSH', 'OFF'] +7902-96594-0004-2400: hyp=['SOON', 'AS', 'THE', 'SIGNAL', 'COMES', 'WE', 'SHALL', 'PUSH', 'OFF'] +7902-96594-0005-2401: ref=['AWKWARD', 'BIT', 'O', 'COUNTRY', 'SIR', 'SIX', 'MILES', 'ROW', 'BEFORE', 'YOU', 'CAN', 'FIND', 'A', 'PLACE', 'TO', 'LAND'] +7902-96594-0005-2401: hyp=['AWKWARD', 'BIT', 'OF', 'COUNTRY', 'SIR', 'SIX', 'MILES', 'ROW', 'BEFORE', 'YOU', 'CAN', 'FIND', 'A', 'PLACE', 'TO', 'LAND'] +7902-96594-0006-2402: ref=['SO', 'SHALL', 'WE', 'YET', 'SIR'] +7902-96594-0006-2402: hyp=['SO', 'SHALL', 'WE', 'YET', 'SIR'] +7902-96594-0007-2403: ref=['YOU', "DON'T", 'THINK', 'MISTER', 'GURR', 'THAT', 'THEY', 'WOULD', 'DARE', 'TO', 'INJURE', 'HIM', 'IF', 'HE', 'WAS', 'SO', 'UNLUCKY', 'AS', 'TO', 'BE', 'CAUGHT'] +7902-96594-0007-2403: hyp=['YOU', "DON'T", 'THINK', 'MISTER', 'GURR', 'THAT', 'THEY', 'WOULD', 'DARE', 'TO', 'INJURE', 'HIM', 'IF', 'HE', 'WAS', 'SO', 'UNLUCKY', 'AS', 'TO', 'BE', 'CAUGHT'] +7902-96594-0008-2404: ref=['WELL', 'SIR', 'SAID', 'THE', 'MASTER', 'HESITATING', 'SMUGGLERS', 'ARE', 'SMUGGLERS'] +7902-96594-0008-2404: hyp=['WELL', 'SIR', 'SAID', 'THE', 'MASTER', 'HESITATING', 'SMUGGLERS', 'ARE', 'SMUGGLERS'] +7902-96594-0009-2405: ref=['CERTAINLY', 'SIR', 'SMUGGLERS', 'ARE', 'SMUGGLERS', 'INDEED'] +7902-96594-0009-2405: hyp=['CERTAINLY', 'SIR', 'SMUGGLERS', 'ARE', 'SMUGGLERS', 'INDEED'] +7902-96594-0010-2406: ref=['BEG', 'PARDON', 'SIR', "DIDN'T", 'MEAN', 'ANY', 'HARM'] +7902-96594-0010-2406: hyp=['BEG', 'PARDON', 'SIR', "DIDN'T", 'MEAN', 'ANY', 'HARM'] +7902-96594-0011-2407: ref=["I'M", 'GETTING', 'VERY', 'ANXIOUS', 'ABOUT', 'MISTER', 'RAYSTOKE', 'START', 'AT', 'ONCE', 'SIR'] +7902-96594-0011-2407: hyp=["I'M", 'GETTING', 'VERY', 'ANXIOUS', 'ABOUT', 'MISTER', 'RAYSTROKE', 'START', 'AT', 'ONCE', 'SIR'] +7902-96594-0012-2408: ref=['NO', 'WAIT', 'ANOTHER', 'HALF', 'HOUR'] +7902-96594-0012-2408: hyp=['NO', 'WAIT', 'ANOTHER', 'HALF', 'HOUR'] +7902-96594-0013-2409: ref=['VERY', 'ILL', 'ADVISED', 'THING', 'TO', 'DO'] +7902-96594-0013-2409: hyp=['VERY', 'ILL', 'ADVISED', 'THING', 'TO', 'DO'] +7902-96594-0014-2410: ref=['THEN', 'I', 'MUST', 'REQUEST', 'THAT', 'YOU', 'WILL', 'NOT', 'MAKE', 'IT', 'AGAIN', 'VERY', 'TRUE'] +7902-96594-0014-2410: hyp=['THEN', 'I', 'MUST', 'REQUEST', 'THAT', 'YOU', 'WILL', 'NOT', 'MAKE', 'IT', 'AGAIN', 'VERY', 'TRUE'] +7902-96594-0015-2411: ref=['AWK', 'WARD', 'MISTER', 'GURR', 'AWKWARD'] +7902-96594-0015-2411: hyp=['AWKWARD', 'MISTER', 'GURR', 'AWKWARD'] +7902-96594-0016-2412: ref=['YES', 'SIR', 'OF', 'COURSE'] +7902-96594-0016-2412: hyp=['YES', 'SIR', 'OF', 'COURSE'] +7902-96594-0017-2413: ref=['SAY', 'AWK', 'WARD', 'IN', 'FUTURE', 'NOT', "AWK'ARD"] +7902-96594-0017-2413: hyp=['SAY', 'AWKWARD', 'IN', 'FUTURE', 'NOT', 'AWKWARD'] +7902-96594-0018-2414: ref=['I', 'MEAN', 'ALL', 'ALONE', 'BY', 'MYSELF', 'SIR'] +7902-96594-0018-2414: hyp=['I', 'MEAN', 'ALL', 'ALONE', 'BY', 'MYSELF', 'SIR'] +7902-96594-0019-2415: ref=['WHAT', 'FOR', 'THERE', "AREN'T", 'A', 'PUBLIC', 'HOUSE', 'FOR', 'TEN', 'MILES', "DIDN'T", 'MEAN', 'THAT'] +7902-96594-0019-2415: hyp=['WHAT', 'FOR', 'THERE', "AREN'T", 'A', 'PUBLIC', 'HOUSE', 'FOR', 'TEN', 'MILES', "DIDN'T", 'MEAN', 'THAT'] +7902-96594-0020-2416: ref=['THEN', 'WHAT', 'DID', 'YOU', 'MEAN', 'SPEAK', 'OUT', 'AND', "DON'T", 'DO', 'THE', 'DOUBLE', 'SHUFFLE', 'ALL', 'OVER', 'MY', 'CLEAN', 'DECK', 'NO', 'SIR'] +7902-96594-0020-2416: hyp=['THEN', 'WHAT', 'DID', 'YOU', 'MEAN', 'SPEAK', 'OUT', 'AND', "DON'T", 'DO', 'THE', 'DOUBLE', 'SHUFFLE', 'ALL', 'OVER', 'MY', 'CLEAN', 'DECK', 'NO', 'SIR'] +7902-96594-0021-2417: ref=['HOPPING', 'ABOUT', 'LIKE', 'A', 'CAT', 'ON', 'HOT', 'BRICKS'] +7902-96594-0021-2417: hyp=['HOPPING', 'ABOUT', 'LIKE', 'A', 'CAT', 'ON', 'HOT', 'BRICKS'] +7902-96594-0022-2418: ref=['NOW', 'THEN', 'WHY', 'DO', 'YOU', 'WANT', 'TO', 'GO', 'ASHORE'] +7902-96594-0022-2418: hyp=['NOW', 'THEN', 'WHY', 'DO', 'YOU', 'WANT', 'TO', 'GO', 'ASHORE'] +7902-96594-0023-2419: ref=['BEG', 'PARDON', "DIDN'T", 'MEAN', 'NOWT', 'SIR', 'SAID', 'THE', 'SAILOR', 'TOUCHING', 'HIS', 'FORELOCK'] +7902-96594-0023-2419: hyp=['BEG', 'PARDON', "DIDN'T", 'MEAN', 'IT', 'OUT', 'SIR', 'SAID', 'THE', 'SAILOR', 'TOUCHING', 'HIS', 'FORELOCK'] +7902-96594-0024-2420: ref=['YES', 'SIR', 'SAID', 'THE', 'MAN', 'HUMBLY', 'SHALL', 'I', 'GO', 'AT', 'ONCE', 'SIR'] +7902-96594-0024-2420: hyp=['YES', 'SIR', 'SAID', 'THE', 'MAN', 'HUMBLY', 'SHALL', 'I', 'GO', 'AT', 'ONCE', 'SIR'] +7902-96594-0025-2421: ref=['NO', 'WAIT'] +7902-96594-0025-2421: hyp=['NO', 'WAIT'] +7902-96594-0026-2422: ref=['KEEP', 'A', 'SHARP', 'LOOK', 'OUT', 'ON', 'THE', 'CLIFF', 'TO', 'SEE', 'IF', 'MISTER', 'RAYSTOKE', 'IS', 'MAKING', 'SIGNALS', 'FOR', 'A', 'BOAT'] +7902-96594-0026-2422: hyp=['KEEP', 'A', 'SHARP', 'LOOKOUT', 'ON', 'THE', 'CLIFF', 'TO', 'SEE', 'IF', 'MISTER', 'RAYSTROKE', 'IS', 'MAKING', 'SIGNALS', 'FOR', 'A', 'BOAT'] +7902-96594-0027-2423: ref=['HE', 'SWUNG', 'ROUND', 'WALKED', 'AFT', 'AND', 'BEGAN', 'SWEEPING', 'THE', 'SHORE', 'AGAIN', 'WITH', 'HIS', 'GLASS', 'WHILE', 'THE', 'MASTER', 'AND', 'DICK', 'EXCHANGED', 'GLANCES', 'WHICH', 'MEANT', 'A', 'GREAT', 'DEAL'] +7902-96594-0027-2423: hyp=['HE', 'SWUNG', 'ROUND', 'WALKED', 'AFT', 'AND', 'BEGAN', 'SWEEPING', 'ASHORE', 'AGAIN', 'WITH', 'HIS', 'GLASS', 'WHILE', 'THE', 'MASTER', 'AND', 'DICK', 'EXCHANGED', 'GLANCES', 'WHICH', 'MEANT', 'A', 'GREAT', 'DEAL'] +7902-96594-0028-2424: ref=['AT', 'LAST', 'THE', 'LITTLE', 'LIEUTENANT', 'COULD', 'BEAR', 'THE', 'ANXIETY', 'NO', 'LONGER'] +7902-96594-0028-2424: hyp=['AT', 'LAST', 'THE', 'LITTLE', 'LIEUTENANT', 'COULD', 'BEAR', 'THE', 'ANXIETY', 'NO', 'LONGER'] +7902-96594-0029-2425: ref=['PIPE', 'AWAY', 'THE', 'MEN', 'TO', 'THAT', 'BOAT', 'THERE', 'HE', 'SAID', 'AND', 'AS', 'THE', 'CREW', 'SPRANG', 'IN'] +7902-96594-0029-2425: hyp=['PIPE', 'AWAY', 'THEM', 'INTO', 'THAT', 'BOAT', 'THERE', 'HE', 'SAID', 'AS', 'THE', 'CREW', 'SPRANG', 'IN'] +7902-96594-0030-2426: ref=['NOW', 'MISTER', 'GURR', 'HE', 'SAID', "I'M", 'ONLY', 'GOING', 'TO', 'SAY', 'ONE', 'THING', 'TO', 'YOU', 'IN', 'THE', 'WAY', 'OF', 'INSTRUCTIONS', 'YES', 'SIR'] +7902-96594-0030-2426: hyp=['NOW', 'MISTER', 'GURR', 'HE', 'SAID', "I'M", 'ONLY', 'GOING', 'TO', 'SAY', 'ONE', 'THING', 'TO', 'YOU', 'IN', 'THE', 'WAY', 'OF', 'INSTRUCTIONS', 'YES', 'SIR'] +7902-96594-0031-2427: ref=['BEG', 'PARDON', 'SIR', 'SAID', 'THE', 'MASTER', 'DEPRECATINGLY'] +7902-96594-0031-2427: hyp=['BEG', 'PARDON', 'SIR', 'SAID', 'THE', 'MASTER', 'DEPRECATINGLY'] +7902-96594-0032-2428: ref=['STEADY', 'MY', 'LADS', 'STEADY', 'CRIED', 'THE', 'MASTER', 'KEEP', 'STROKE', 'AND', 'THEN', 'HE', 'BEGAN', 'TO', 'MAKE', 'PLANS', 'AS', 'TO', 'HIS', 'FIRST', 'PROCEEDINGS', 'ON', 'GETTING', 'ASHORE'] +7902-96594-0032-2428: hyp=['STEADY', 'MY', 'LAD', 'STEADY', 'CRIED', 'THE', 'MASTER', 'KEEP', 'STROKE', 'AND', 'THEN', 'HE', 'BEGAN', 'TO', 'MAKE', 'PLANS', 'AS', 'TO', 'HIS', 'FIRST', 'PROCEEDINGS', 'ON', 'GETTING', 'ASHORE'] +7902-96595-0000-2429: ref=['SAY', 'MESTER', 'GURR', 'SAID', 'DICK', 'AFTER', 'ONE', 'OF', 'THESE', 'SEARCHES', 'HE', "WOULDN'T", 'RUN', 'AWAY', 'WHAT'] +7902-96595-0000-2429: hyp=['SAY', 'MISTER', 'GIRK', 'SAID', 'DICK', 'AFTER', 'ONE', 'OF', 'THESE', 'SEARCHES', 'HE', "WOULDN'T", 'RUN', 'AWAY', 'WHAT'] +7902-96595-0001-2430: ref=['MISTER', 'RAYSTOKE', 'SIR', "DON'T", 'BE', 'A', 'FOOL'] +7902-96595-0001-2430: hyp=['MISTER', 'GREYSTOKE', 'SIR', "DON'T", 'BE', 'A', 'FOOL'] +7902-96595-0002-2431: ref=['WHAT', 'CHUCKED', 'HIM', 'OFF', 'YONDER'] +7902-96595-0002-2431: hyp=['WHAT', 'CHUCKED', 'HIM', 'OFF', 'YONDER'] +7902-96595-0003-2432: ref=['GURR', 'GLANCED', 'ROUND', 'TO', 'SEE', 'IF', 'THE', 'MEN', 'WERE', 'LOOKING', 'AND', 'THEN', 'SAID', 'RATHER', 'HUSKILY', 'BUT', 'KINDLY'] +7902-96595-0003-2432: hyp=['GURR', 'GLANCED', 'ROUND', 'TO', 'SEE', 'IF', 'THE', 'MEN', 'WERE', 'LOOKING', 'AND', 'THEN', 'SAID', 'RATHER', 'HUSKILY', 'BUT', 'KINDLY'] +7902-96595-0004-2433: ref=['AH', 'EJACULATED', 'DICK', 'SADLY'] +7902-96595-0004-2433: hyp=['AH', 'EJACULATED', 'DICK', 'SADLY'] +7902-96595-0005-2434: ref=['SAY', 'MESTER', 'GURR', 'SIR', 'WHICH', 'THANKFUL', 'I', 'AM', 'TO', 'YOU', 'FOR', 'SPEAKING', 'SO', 'BUT', 'YOU', "DON'T", 'REALLY', 'THINK', 'AS', 'HE', 'HAS', 'COME', 'TO', 'HARM'] +7902-96595-0005-2434: hyp=['SAY', 'MISTER', 'GORE', 'SIR', 'WHICH', 'THANKFUL', 'I', 'AM', 'FOR', 'YOU', 'FOR', 'SPEAKING', 'SO', 'BUT', 'YOU', "DON'T", 'REALLY', 'THINK', 'AS', 'HE', 'HAS', 'COME', 'TO', 'HARM'] +7902-96595-0006-2435: ref=['I', 'HOPE', 'NOT', 'DICK', 'I', 'HOPE', 'NOT', 'BUT', 'SMUGGLERS', "DON'T", 'STAND', 'AT', 'ANYTHING', 'SOMETIMES'] +7902-96595-0006-2435: hyp=['I', 'HOPE', 'NOT', 'DICK', 'I', 'HOPE', 'NOT', 'BUT', 'SMUGGLERS', "DON'T", 'STAND', 'AT', 'ANYTHING', 'SOMETIMES'] +7902-96595-0007-2436: ref=['I', 'DO', 'ASSURE', 'YOU', "THERE'S", 'NOTHING', 'HERE', 'BUT', 'WHAT', 'YOU', 'MAY', 'SEE'] +7902-96595-0007-2436: hyp=['I', 'DO', 'ASSURE', 'YOU', "THERE'S", 'NOTHING', 'HERE', 'BUT', 'WHAT', 'YOU', 'MAY', 'SEE'] +7902-96595-0008-2437: ref=['IF', "YOU'D", 'LET', 'ME', 'FINISH', "YOU'D", 'KNOW', 'SAID', 'GURR', 'GRUFFLY', 'ONE', 'OF', 'OUR', 'BOYS', 'IS', 'MISSING', 'SEEN', 'HIM', 'UP', 'HERE'] +7902-96595-0008-2437: hyp=['IF', 'YOU', 'LET', 'ME', 'FINISH', "YOU'D", 'KNOW', 'SAID', 'GURR', 'GRUFFLY', 'ONE', 'OF', 'OUR', 'BOYS', 'IS', 'MISSING', 'SEEN', 'HIM', 'UP', 'HERE'] +7902-96595-0009-2438: ref=['BOY', 'BOUT', 'SEVENTEEN', 'WITH', 'A', 'RED', 'CAP', 'NO', 'SIR', 'INDEED', "I'VE", 'NOT'] +7902-96595-0009-2438: hyp=['BOY', 'ABOUT', 'SEVENTEEN', 'WITH', 'A', 'RED', 'CAP', 'NO', 'SIR', 'INDEED', 'HAVE', 'NOT'] +7902-96595-0010-2439: ref=["DON'T", 'KNOW', 'AS', 'HE', 'HAS', 'BEEN', 'SEEN', 'ABOUT', 'HERE', 'DO', 'YOU', 'SAID', 'GURR', 'LOOKING', 'AT', 'HER', 'SEARCHINGLY', 'NO', 'SIR'] +7902-96595-0010-2439: hyp=["DON'T", 'KNOW', 'AS', 'HE', 'HAS', 'BEEN', 'SEEN', 'ABOUT', 'HERE', 'DO', 'YOU', 'SAID', 'GIRL', 'LOOKING', 'AT', 'HER', 'SEARCHINGLY', 'NO', 'SIR'] +7902-96595-0011-2440: ref=['IF', 'SHE', 'KNEW', 'EVIL', 'HAD', 'COME', 'TO', 'THE', 'POOR', 'LAD', 'HER', 'FACE', 'WOULD', 'TELL', 'TALES', 'LIKE', 'PRINT'] +7902-96595-0011-2440: hyp=['IF', 'SHE', 'KNEW', 'EVIL', 'HAD', 'COME', 'TO', 'THE', 'POOR', 'LAD', 'HER', 'FACE', 'WOULD', 'TELL', 'TALES', 'LIKE', 'PRINT'] +7902-96595-0012-2441: ref=['I', 'SAID', 'A', 'LAD', 'BOUT', 'SEVENTEEN', 'IN', 'A', 'RED', 'CAP', 'LIKE', 'YOURS', 'SAID', 'GURR', 'VERY', 'SHORTLY'] +7902-96595-0012-2441: hyp=['I', 'SAID', 'A', 'LAD', 'ABOUT', 'SEVENTEEN', 'IN', 'A', 'RED', 'CAP', 'LIKE', 'YOURS', 'SAID', 'GURR', 'VERY', 'SHORTLY'] +7902-96595-0013-2442: ref=['THE', 'MAN', 'SHOOK', 'HIS', 'HEAD', 'AND', 'STARED', 'AS', 'IF', 'HE', "DIDN'T", 'HALF', 'UNDERSTAND', 'THE', 'DRIFT', 'OF', 'WHAT', 'WAS', 'SAID'] +7902-96595-0013-2442: hyp=['THE', 'MAN', 'SHOOK', 'HIS', 'HEAD', 'AND', 'STARED', 'AS', 'IF', 'HE', "DIDN'T", 'HALF', 'UNDERSTAND', 'THE', 'DRIFT', 'OF', 'WHAT', 'WAS', 'SAID'] +7902-96595-0014-2443: ref=['HERE', 'MY', 'LAD', "WHERE'S", 'YOUR', 'MASTER'] +7902-96595-0014-2443: hyp=['HERE', 'MY', 'LAD', "WHERE'S", 'YOUR', 'MASTER'] +7902-96595-0015-2444: ref=['EH', 'I', 'SAY', "WHERE'S", 'YOUR', 'MASTER'] +7902-96595-0015-2444: hyp=['EH', 'I', 'SAY', "WHERE'S", 'YOUR', 'MASTER'] +7902-96595-0016-2445: ref=['GURR', 'TURNED', 'AWAY', 'IMPATIENTLY', 'AGAIN', 'AND', 'SIGNING', 'TO', 'HIS', 'MEN', 'TO', 'FOLLOW', 'THEY', 'ALL', 'BEGAN', 'TO', 'TRAMP', 'UP', 'THE', 'STEEP', 'TRACK', 'LEADING', 'TOWARD', 'THE', 'HOZE', 'WITH', 'THE', 'RABBITS', 'SCUTTLING', 'AWAY', 'AMONG', 'THE', 'FURZE', 'AND', 'SHOWING', 'THEIR', 'WHITE', 'COTTONY', 'TAILS', 'FOR', 'A', 'MOMENT', 'AS', 'THEY', 'DARTED', 'DOWN', 'INTO', 'THEIR', 'HOLES'] +7902-96595-0016-2445: hyp=['GURR', 'TURNED', 'AWAY', 'IMPATIENTLY', 'AGAIN', 'AND', 'SIGNING', 'TO', 'HIS', 'MEN', 'TO', 'FOLLOW', 'THEY', 'ALL', 'BEGAN', 'TO', 'TRAMP', 'UP', 'THE', 'STEEP', 'TRACK', 'LEADING', 'TOWARD', 'THE', 'HOSE', 'WITH', 'THE', 'RABBITS', 'SCUTTLING', 'AWAY', 'AMONG', 'THE', 'FIRS', 'AND', 'SHOWING', 'THEIR', 'WHITE', 'COTTONY', 'TAILS', 'FOR', 'A', 'MOMENT', 'AS', 'THEY', 'DARTED', 'DOWN', 'INTO', 'THEIR', 'HOLES'] +7902-96595-0017-2446: ref=['I', 'DUNNO', 'MUTTERED', 'DICK', 'AND', 'A', 'MAN', "CAN'T", 'BE', 'SURE'] +7902-96595-0017-2446: hyp=['I', 'DUNNO', 'MUTTERED', 'DICK', 'AND', 'A', 'MEN', "CAN'T", 'BE', 'SURE'] +7902-96595-0018-2447: ref=['GURR', 'SALUTED', 'AND', 'STATED', 'HIS', 'BUSINESS', 'WHILE', 'THE', 'BARONET', 'WHO', 'HAD', 'TURNED', 'SALLOWER', 'AND', 'MORE', 'CAREWORN', 'THAN', 'HIS', 'LOT', 'DREW', 'A', 'BREATH', 'FULL', 'OF', 'RELIEF', 'ONE', 'OF', 'YOUR', 'SHIP', 'BOYS', 'HE', 'SAID'] +7902-96595-0018-2447: hyp=['GURR', 'SALUTED', 'AND', 'STATED', 'HIS', 'BUSINESS', 'WHILE', 'THE', 'BARONET', 'WHO', 'HAD', 'TURNED', 'SALLOWER', 'AND', 'MORE', 'CAREWORN', 'THAN', 'HIS', 'LOT', 'DREW', 'A', 'BREATH', 'OF', 'FULL', 'OF', 'RELIEF', 'ONE', 'OF', 'YOUR', 'SHIP', 'BOYS', 'HE', 'SAID'] +7902-96595-0019-2448: ref=['A', 'LAD', 'LOOKING', 'LIKE', 'A', 'COMMON', 'SAILOR', 'AND', 'WEARING', 'A', 'RED', 'CAP', 'NO', 'SAID', 'SIR', 'RISDON'] +7902-96595-0019-2448: hyp=['A', 'LAD', 'LOOKING', 'LIKE', 'A', 'COMMON', 'SAILOR', 'AND', 'WEARING', 'A', 'RED', 'CAP', 'NO', 'SAID', 'SIR', 'RISDON'] +7902-96595-0020-2449: ref=['I', 'HAVE', 'SEEN', 'NO', 'ONE', 'ANSWERING', 'TO', 'THE', 'DESCRIPTION', 'HERE'] +7902-96595-0020-2449: hyp=['I', 'HAVE', 'SEEN', 'NO', 'ONE', 'ANSWERING', 'TO', 'THE', 'DESCRIPTION', 'HERE'] +7902-96595-0021-2450: ref=['BEG', 'PARDON', 'SIR', 'BUT', 'CAN', 'YOU', 'AS', 'A', 'GENTLEMAN', 'ASSURE', 'ME', 'THAT', 'HE', 'IS', 'NOT', 'HERE', 'CERTAINLY', 'SAID', 'SIR', 'RISDON'] +7902-96595-0021-2450: hyp=['BEG', 'PARDON', 'SIR', 'BUT', 'CAN', 'YOU', 'AS', 'A', 'GENTLEMAN', 'ASSURE', 'ME', 'THAT', 'HE', 'IS', 'NOT', 'HERE', 'CERTAINLY', 'SAID', 'SIR', 'RISDON'] +7902-96595-0022-2451: ref=['SURELY', 'CRIED', 'SIR', 'RISDON', 'EXCITEDLY'] +7902-96595-0022-2451: hyp=['SURELY', 'CRIED', 'SIR', 'RISDON', 'EXCITEDLY'] +7902-96595-0023-2452: ref=['SIR', 'RISDON', 'WAS', 'SILENT'] +7902-96595-0023-2452: hyp=['SIR', 'RICHMOND', 'WAS', 'SILENT'] +7902-96595-0024-2453: ref=['LADY', 'GRAEME', 'LOOKED', 'GHASTLY'] +7902-96595-0024-2453: hyp=['LADY', 'GRAHAM', 'LOOKED', 'GHASTLY'] +7902-96595-0025-2454: ref=['YOU', 'DO', 'NOT', 'KNOW', 'NO'] +7902-96595-0025-2454: hyp=['YOU', 'DO', 'NOT', 'KNOW', 'NO'] +7975-280057-0000-2455: ref=['THESE', 'HATREDS', 'WERE', 'SOON', 'TO', 'MAKE', 'TROUBLE', 'FOR', 'ME', 'OF', 'WHICH', 'I', 'HAD', 'NEVER', 'DREAMED'] +7975-280057-0000-2455: hyp=['THESE', 'HATREDS', 'WERE', 'SOON', 'TO', 'MAKE', 'TROUBLE', 'FOR', 'ME', 'OF', 'WHICH', 'I', 'HAD', 'NEVER', 'DREAMED'] +7975-280057-0001-2456: ref=['HENRY', 'WASHINGTON', 'YOUNGER', 'MY', 'FATHER', 'REPRESENTED', 'JACKSON', 'COUNTY', 'THREE', 'TIMES', 'IN', 'THE', 'LEGISLATURE', 'AND', 'WAS', 'ALSO', 'JUDGE', 'OF', 'THE', 'COUNTY', 'COURT'] +7975-280057-0001-2456: hyp=['HENRY', 'WASHINGTON', 'YOUNGER', 'MY', 'FATHER', 'REPRESENTED', 'JACKSON', 'COUNTY', 'THREE', 'TIMES', 'IN', 'THE', 'LEGISLATURE', 'AND', 'WAS', 'ALSO', 'A', 'JUDGE', 'OF', 'THE', 'COUNTY', 'COURT'] +7975-280057-0002-2457: ref=['MY', 'MOTHER', 'WHO', 'WAS', 'BURSHEBA', 'FRISTOE', 'OF', 'INDEPENDENCE', 'WAS', 'THE', 'DAUGHTER', 'OF', 'RICHARD', 'FRISTOE', 'WHO', 'FOUGHT', 'UNDER', 'GENERAL', 'ANDREW', 'JACKSON', 'AT', 'NEW', 'ORLEANS', 'JACKSON', 'COUNTY', 'HAVING', 'BEEN', 'SO', 'NAMED', 'AT', 'MY', 'GRANDFATHER', "FRISTOE'S", 'INSISTENCE'] +7975-280057-0002-2457: hyp=['MY', 'MOTHER', 'WHO', 'WASURCEBAH', 'FOR', 'STOW', 'OF', 'INDEPENDENCE', 'WAS', 'A', 'DAUGHTER', 'OF', 'RICHARD', 'FRISTOW', 'WHO', 'FOUGHT', 'UNDER', 'GENERAL', 'ANDREW', 'JACKSON', 'AT', 'NEW', 'ORLEANS', 'JACKSON', 'COUNTY', 'HAVING', 'BEEN', 'SO', 'NAMED', 'AND', 'MY', 'GRANDFATHER', "FORSTOW'S", 'INSISTENCE'] +7975-280057-0003-2458: ref=['I', 'CANNOT', 'REMEMBER', 'WHEN', 'I', 'DID', 'NOT', 'KNOW', 'HOW', 'TO', 'SHOOT'] +7975-280057-0003-2458: hyp=['I', 'CANNOT', 'REMEMBER', 'WHEN', 'I', 'DID', 'NOT', 'KNOW', 'HOW', 'TO', 'SHOOT'] +7975-280057-0004-2459: ref=['MY', 'BROTHER', 'JAMES', 'WAS', 'BORN', 'JANUARY', 'FIFTEENTH', 'EIGHTEEN', 'FORTY', 'EIGHT', 'JOHN', 'IN', 'EIGHTEEN', 'FIFTY', 'ONE', 'AND', 'ROBERT', 'IN', 'DECEMBER', 'EIGHTEEN', 'FIFTY', 'THREE'] +7975-280057-0004-2459: hyp=['MY', 'BROTHER', 'JAMES', 'WAS', 'BORN', 'JANUARY', 'FIFTEENTH', 'EIGHTEEN', 'FORTY', 'EIGHT', 'JOHN', 'IN', 'EIGHTEEN', 'FIFTY', 'ONE', 'AND', 'ROBERT', 'IN', 'DECEMBER', 'EIGHTEEN', 'FIFTY', 'THREE'] +7975-280057-0005-2460: ref=['MY', 'ELDEST', 'BROTHER', 'RICHARD', 'DIED', 'IN', 'EIGHTEEN', 'SIXTY'] +7975-280057-0005-2460: hyp=['MY', 'ELDEST', 'BROTHER', 'RICHARD', 'DIED', 'IN', 'EIGHTEEN', 'SIXTY'] +7975-280057-0006-2461: ref=['MY', 'FATHER', 'WAS', 'IN', 'THE', 'EMPLOY', 'OF', 'THE', 'UNITED', 'STATES', 'GOVERNMENT', 'AND', 'HAD', 'THE', 'MAIL', 'CONTRACT', 'FOR', 'FIVE', 'HUNDRED', 'MILES'] +7975-280057-0006-2461: hyp=['MY', 'FATHER', 'WAS', 'IN', 'THE', 'EMPLOY', 'OF', 'THE', 'UNITED', 'STATES', 'GOVERNMENT', 'AND', 'HAD', 'THE', 'MAIL', 'CONTRACT', 'FOR', 'FIVE', 'HUNDRED', 'MILES'] +7975-280057-0007-2462: ref=['HE', 'HAD', 'STARTED', 'BACK', 'TO', 'HARRISONVILLE', 'IN', 'A', 'BUGGY', 'BUT', 'WAS', 'WAYLAID', 'ONE', 'MILE', 'SOUTH', 'OF', 'WESTPORT', 'A', 'SUBURB', 'OF', 'KANSAS', 'CITY', 'AND', 'BRUTALLY', 'MURDERED', 'FALLING', 'OUT', 'OF', 'HIS', 'BUGGY', 'INTO', 'THE', 'ROAD', 'WITH', 'THREE', 'MORTAL', 'BULLET', 'WOUNDS'] +7975-280057-0007-2462: hyp=['HE', 'HAD', 'STARTED', 'BACK', 'TO', 'HARRISONVILLE', 'IN', 'A', 'BUGGY', 'BUT', 'WAS', 'WAYLAID', 'ONE', 'MILE', 'SOUTH', 'OF', 'WESTPORT', 'A', 'SUBURB', 'OF', 'KANSAS', 'CITY', 'AND', 'BRUTALLY', 'MURDERED', 'FALLING', 'OUT', 'OF', 'HIS', 'BUGGY', 'INTO', 'THE', 'ROAD', 'WITH', 'THREE', 'MORTAL', 'BULLET', 'WOUNDS'] +7975-280057-0008-2463: ref=['MISSUS', 'WASHINGTON', 'WELLS', 'AND', 'HER', 'SON', 'SAMUEL', 'ON', 'THE', 'ROAD', 'HOME', 'FROM', 'KANSAS', 'CITY', 'TO', "LEE'S", 'SUMMIT', 'RECOGNIZED', 'THE', 'BODY', 'AS', 'THAT', 'OF', 'MY', 'FATHER'] +7975-280057-0008-2463: hyp=['MISSUS', 'WASHINGTON', 'WELLS', 'AND', 'HER', 'SON', 'SAMUEL', 'ON', 'THE', 'ROAD', 'HOME', 'FROM', 'KANSAS', 'CITY', 'TO', 'LEE', 'SUMMIT', 'RECOGNIZED', 'THE', 'BODY', 'AS', 'THAT', 'OF', 'MY', 'FATHER'] +7975-280057-0009-2464: ref=['MISSUS', 'WELLS', 'STAYED', 'TO', 'GUARD', 'THE', 'REMAINS', 'WHILE', 'HER', 'SON', 'CARRIED', 'THE', 'NEWS', 'OF', 'THE', 'MURDER', 'TO', 'COLONEL', 'PEABODY', 'OF', 'THE', 'FEDERAL', 'COMMAND', 'WHO', 'WAS', 'THEN', 'IN', 'CAMP', 'AT', 'KANSAS', 'CITY'] +7975-280057-0009-2464: hyp=['MISS', 'WELLS', 'STAYED', 'TO', 'GUARD', 'THE', 'REMAINS', 'WHILE', 'HER', 'SON', 'CARRIED', 'THE', 'NEWS', 'OF', 'THE', 'MURDER', 'TO', 'COLONEL', 'PEABODY', 'OF', 'THE', 'FEDERAL', 'COMMAND', 'WHO', 'WAS', 'THEN', 'IN', 'CAMP', 'AT', 'KANSAS', 'CITY'] +7975-280057-0010-2465: ref=['MISSUS', 'MC', 'CORKLE', 'JUMPED', 'FROM', 'THE', 'WINDOW', 'OF', 'THE', 'HOUSE', 'AND', 'ESCAPED'] +7975-280057-0010-2465: hyp=['MISS', 'MC', 'CORAL', 'JUMPED', 'FROM', 'THE', 'WINDOW', 'OF', 'THE', 'HOUSE', 'AND', 'ESCAPED'] +7975-280057-0011-2466: ref=['AS', 'THE', 'RAIDERS', 'LEFT', 'ONE', 'OF', 'THEM', 'SHOUTED'] +7975-280057-0011-2466: hyp=['AS', 'THE', 'RAIDERS', 'LEFT', 'ONE', 'OF', 'THEM', 'SHOUTED'] +7975-280057-0012-2467: ref=['NOW', 'OLD', 'LADY', 'CALL', 'ON', 'YOUR', 'PROTECTORS', 'WHY', "DON'T", 'YOU', 'CALL', 'ON', 'COLE', 'YOUNGER', 'NOW'] +7975-280057-0012-2467: hyp=['NOW', 'OLD', 'LADY', 'CALL', 'ON', 'YOUR', 'PROTECTORS', 'WHY', "DON'T", 'YOU', 'CALL', 'ON', 'CO', 'YOUNGER', 'NOW'] +7975-280057-0013-2468: ref=['EVERY', 'KNOT', 'REPRESENTED', 'A', 'HUMAN', 'LIFE'] +7975-280057-0013-2468: hyp=['EVERY', 'KNOT', 'REPRESENTED', 'A', 'HUMAN', 'LIFE'] +7975-280057-0014-2469: ref=['BUT', 'SHE', 'FAILED', 'TO', 'FIND', 'THE', 'COMFORT', 'SHE', 'SOUGHT', 'FOR', 'ANNOYANCES', 'CONTINUED', 'IN', 'A', 'MORE', 'AGGRAVATED', 'FORM'] +7975-280057-0014-2469: hyp=['BUT', 'SHE', 'FAILED', 'TO', 'FIND', 'THE', 'COMFORT', 'SHE', 'SOUGHT', 'FOR', 'ANNOYANCE', 'WAS', 'CONTINUED', 'IN', 'A', 'MORE', 'AGGRAVATED', 'FORM'] +7975-280057-0015-2470: ref=['TWO', 'MONTHS', 'AFTER', 'THIS', 'INCIDENT', 'THE', 'SAME', 'PERSECUTORS', 'AGAIN', 'ENTERED', 'OUR', 'HOME', 'IN', 'THE', 'DEAD', 'OF', 'THE', 'NIGHT', 'AND', 'AT', 'THE', 'POINT', 'OF', 'A', 'PISTOL', 'TRIED', 'TO', 'FORCE', 'MY', 'MOTHER', 'TO', 'SET', 'FIRE', 'TO', 'HER', 'OWN', 'HOME'] +7975-280057-0015-2470: hyp=['TWO', 'MONTHS', 'AFTER', 'THIS', 'INCIDENT', 'THE', 'SAME', 'PERSECUTORS', 'AGAIN', 'ENTERED', 'OUR', 'HOME', 'IN', 'THE', 'DEAD', 'OF', 'THE', 'NIGHT', 'AND', 'AT', 'THE', 'POINT', 'OF', 'A', 'PISTOL', 'TRIED', 'TO', 'FORCE', 'MY', 'MOTHER', 'TO', 'SET', 'FIRE', 'TO', 'HER', 'OWN', 'HOME'] +7975-280057-0016-2471: ref=['I', 'HAVE', 'ALWAYS', 'FELT', 'THAT', 'THE', 'EXPOSURE', 'TO', 'WHICH', 'SHE', 'WAS', 'SUBJECTED', 'ON', 'THIS', 'CRUEL', 'JOURNEY', 'TOO', 'HARD', 'EVEN', 'FOR', 'A', 'MAN', 'TO', 'TAKE', 'WAS', 'THE', 'DIRECT', 'CAUSE', 'OF', 'HER', 'DEATH'] +7975-280057-0016-2471: hyp=['I', 'HAVE', 'ALWAYS', 'FELT', 'THAT', 'THE', 'EXPOSURE', 'TO', 'WHICH', 'SHE', 'WAS', 'SUBJECTED', 'ON', 'THIS', 'CRUEL', 'JOURNEY', 'TOO', 'HARD', 'EVEN', 'FOR', 'A', 'MAN', 'TO', 'TAKE', 'WAS', 'THE', 'DIRECT', 'CAUSE', 'OF', 'HER', 'DEATH'] +7975-280057-0017-2472: ref=['FROM', 'HARRISONVILLE', 'SHE', 'WENT', 'TO', 'WAVERLY', 'WHERE', 'SHE', 'WAS', 'HOUNDED', 'CONTINUALLY'] +7975-280057-0017-2472: hyp=['FROM', 'HARRISONVILLE', 'SHE', 'WENT', 'TO', 'WAVERLEY', 'WHERE', 'SHE', 'WAS', 'HOUNDY', 'CONTINUALLY'] +7975-280057-0018-2473: ref=['ONE', 'OF', 'THE', 'CONDITIONS', 'UPON', 'WHICH', 'HER', 'LIFE', 'WAS', 'SPARED', 'WAS', 'THAT', 'SHE', 'WOULD', 'REPORT', 'AT', 'LEXINGTON', 'WEEKLY'] +7975-280057-0018-2473: hyp=['ONE', 'OF', 'THE', 'CONDITIONS', 'UPON', 'WHICH', 'HER', 'LIFE', 'WAS', 'SPARED', 'WAS', 'THAT', 'SHE', 'WOULD', 'REPORT', 'AT', 'LESSINGTON', 'WEEKLY'] +7975-280057-0019-2474: ref=['ONE', 'OF', 'MY', 'OLD', 'SCHOOL', 'TEACHERS', 'WHOM', 'I', 'HAVE', 'NEVER', 'SEEN', 'SINCE', 'THE', 'SPRING', 'OR', 'SUMMER', 'OF', 'EIGHTEEN', 'SIXTY', 'TWO', 'IS', 'STEPHEN', 'B', 'ELKINS', 'SENATOR', 'FROM', 'WEST', 'VIRGINIA'] +7975-280057-0019-2474: hyp=['ONE', 'OF', 'MY', 'OLD', 'SCHOOL', 'TEACHERS', 'WHOM', 'I', 'HAVE', 'NEVER', 'SEEN', 'SINCE', 'THE', 'SPRING', 'SUMMER', 'OF', 'EIGHTEEN', 'SIXTY', 'TWO', 'IS', 'STEPHEN', 'B', 'ELKINS', 'SENATOR', 'FROM', 'WEST', 'VIRGINIA'] +7975-280057-0020-2475: ref=['WHEN', 'I', 'WAS', 'TAKEN', 'PRISONER', 'I', 'EXPECTED', 'TO', 'BE', 'SHOT', 'WITHOUT', 'CEREMONY'] +7975-280057-0020-2475: hyp=['WHEN', 'I', 'WAS', 'TAKEN', 'PRISONER', 'I', 'EXPECTED', 'TO', 'BE', 'SHOT', 'WITHOUT', 'CEREMONY'] +7975-280063-0000-2476: ref=['WE', 'TOOK', 'THE', 'OATH', 'PERHAPS', 'THREE', 'HUNDRED', 'OF', 'US', 'DOWN', 'ON', 'LUTHER', "MASON'S", 'FARM', 'A', 'FEW', 'MILES', 'FROM', 'WHERE', 'I', 'NOW', 'WRITE', 'WHERE', 'COLONEL', 'HAYS', 'HAD', 'ENCAMPED', 'AFTER', 'INDEPENDENCE'] +7975-280063-0000-2476: hyp=['WE', 'TOOK', 'THE', 'OATH', 'PERHAPS', 'THREE', 'HUNDRED', 'OF', 'US', 'DOWN', 'ON', 'LUTHER', "MASON'S", 'FARM', 'A', 'FEW', 'MILES', 'FROM', 'WHERE', 'I', 'NOW', 'WRITE', 'WHERE', 'COLONEL', 'HAYES', 'HAD', 'ENCAMPED', 'AFTER', 'INDEPENDENCE'] +7975-280063-0001-2477: ref=['BOONE', 'MUIR', 'AND', 'MYSELF', 'MET', 'COFFEE', 'AND', 'THE', 'REST', 'BELOW', 'ROSE', 'HILL', 'ON', 'GRAND', 'RIVER'] +7975-280063-0001-2477: hyp=['BOOMEUER', 'AND', 'MYSELF', 'MAKE', 'COFFEE', 'IN', 'THE', 'REST', 'BELOW', 'ROSE', 'HILL', 'ON', 'GRAND', 'RIVER'] +7975-280063-0002-2478: ref=['ACCORDINGLY', 'I', 'WAS', 'SHORTLY', 'AWAKENED', 'TO', 'ACCOMPANY', 'HIM', 'TO', 'LONE', 'JACK', 'WHERE', 'HE', 'WOULD', 'PERSONALLY', 'MAKE', 'KNOWN', 'THE', 'SITUATION', 'TO', 'THE', 'OTHER', 'COLONELS'] +7975-280063-0002-2478: hyp=['ACCORDINGLY', 'I', 'WAS', 'SHORTLY', 'AWAKENED', 'TO', 'ACCOMPANY', 'HIM', 'TO', 'LONG', 'JACK', 'WHERE', 'HE', 'WOULD', 'PERSONALLY', 'MAKE', 'KNOWN', 'THE', 'SITUATION', 'TO', 'THE', 'OTHER', 'COLONELS'] +7975-280063-0003-2479: ref=['FOSTER', 'HAD', 'NEARLY', 'ONE', 'THOUSAND', 'CAVALRYMEN', 'AND', 'TWO', 'PIECES', 'OF', "RABB'S", 'INDIANA', 'BATTERY', 'THAT', 'HAD', 'ALREADY', 'MADE', 'FOR', 'ITSELF', 'A', 'NAME', 'FOR', 'HARD', 'FIGHTING'] +7975-280063-0003-2479: hyp=['FOSTER', 'HAD', 'NEARLY', 'ONE', 'THOUSAND', 'CAVERNMEN', 'AND', 'TWO', 'PIECES', 'OF', "RAB'S", 'INDIANA', 'BATTERY', 'THAT', 'HAD', 'ALREADY', 'MADE', 'FOR', 'ITSELF', 'A', 'NAME', 'FOR', 'HARD', 'FIGHTING'] +7975-280063-0004-2480: ref=['COME', 'IN', 'COLONEL', 'HAYS', 'EXCLAIMED', 'COLONEL', 'COCKRELL'] +7975-280063-0004-2480: hyp=['COME', 'IN', 'COLONEL', 'HAYES', 'EXCLAIMED', 'COLONEL', 'CONCRELL'] +7975-280063-0005-2481: ref=['I', 'THINK', "HE'LL", 'BE', 'RATHER', 'TOUGH', 'MEAT', 'FOR', 'BREAKFAST', 'I', 'REPLIED', 'HE', 'MIGHT', 'BE', 'ALL', 'RIGHT', 'FOR', 'DINNER'] +7975-280063-0005-2481: hyp=['I', 'THINK', "HE'LL", 'BE', 'RATHER', 'TOUGH', 'MEAT', 'FOR', 'BREAKFAST', 'I', 'REPLIED', 'HE', 'MIGHT', 'BE', 'ALL', 'RIGHT', 'FOR', 'DINNER'] +7975-280063-0006-2482: ref=['JACKMAN', 'WITH', 'A', 'PARTY', 'OF', 'THIRTY', 'SEASONED', 'MEN', 'CHARGED', 'THE', 'INDIANA', 'GUNS', 'AND', 'CAPTURED', 'THEM', 'BUT', 'MAJOR', 'FOSTER', 'LED', 'A', 'GALLANT', 'CHARGE', 'AGAINST', 'THE', 'INVADERS', 'AND', 'RECAPTURED', 'THE', 'PIECES'] +7975-280063-0006-2482: hyp=['JACKMAN', 'WITH', 'A', 'PARTY', 'OF', 'THIRTY', 'SEASONED', 'MEN', 'CHARGED', 'THE', 'INDIANA', 'GUNS', 'AND', 'CAPTURED', 'THEM', 'BUT', 'MAJOR', 'FOSTER', 'LED', 'A', 'GALLANT', 'CHARGE', 'AGAINST', 'THE', 'INVADERS', 'AND', 'RECAPTURED', 'THE', 'PIECES'] +7975-280063-0007-2483: ref=['WE', 'WERE', 'OUT', 'OF', 'AMMUNITION', 'AND', 'WERE', 'HELPLESS', 'HAD', 'THE', 'FIGHT', 'BEEN', 'PRESSED'] +7975-280063-0007-2483: hyp=['WE', 'WERE', 'OUT', 'OF', 'AMMUNITION', 'AND', 'WERE', 'HELPLESS', 'HAD', 'THE', 'FIGHT', 'BEEN', 'PRESSED'] +7975-280063-0008-2484: ref=['THEY', 'DID', 'MARK', 'MY', 'CLOTHES', 'IN', 'ONE', 'OR', 'TWO', 'PLACES', 'HOWEVER'] +7975-280063-0008-2484: hyp=['THEY', 'DID', 'MARK', 'MY', 'CLOTHES', 'IN', 'ONE', 'OR', 'TWO', 'PLACES', 'HOWEVER'] +7975-280063-0009-2485: ref=['MAJOR', 'FOSTER', 'IN', 'A', 'LETTER', 'TO', 'JUDGE', 'GEORGE', 'M', 'BENNETT', 'OF', 'MINNEAPOLIS', 'SAID'] +7975-280063-0009-2485: hyp=['MAJOR', 'FOSTER', 'IN', 'A', 'LETTER', 'TO', 'JUDGE', 'GEORGE', 'M', 'BENARD', 'OF', 'MINNEAPOLIS', 'SAID'] +7975-280063-0010-2486: ref=['I', 'WAS', 'TOLD', 'BY', 'SOME', 'OF', 'OUR', 'MEN', 'FROM', 'THE', 'WESTERN', 'BORDER', 'OF', 'THE', 'STATE', 'THAT', 'THEY', 'RECOGNIZED', 'THE', 'DARING', 'YOUNG', 'RIDER', 'AS', 'COLE', 'YOUNGER'] +7975-280063-0010-2486: hyp=['I', 'WAS', 'TOLD', 'BY', 'SOME', 'OF', 'OUR', 'MEN', 'FROM', 'THE', 'WESTERN', 'BORDER', 'OF', 'THE', 'STATE', 'THAT', 'THEY', 'RECOGNIZED', 'THE', 'DARING', 'YOUNG', 'RITER', 'AS', 'CO', 'YOUNGER'] +7975-280063-0011-2487: ref=['ABOUT', 'NINE', 'THIRTY', 'A', 'M', 'I', 'WAS', 'SHOT', 'DOWN'] +7975-280063-0011-2487: hyp=['ABOUT', 'NINE', 'THIRTY', 'A', 'M', 'I', 'WAS', 'SHOT', 'DOWN'] +7975-280063-0012-2488: ref=['THE', 'WOUNDED', 'OF', 'BOTH', 'FORCES', 'WERE', 'GATHERED', 'UP', 'AND', 'WERE', 'PLACED', 'IN', 'HOUSES'] +7975-280063-0012-2488: hyp=['THE', 'WOUNDED', 'OF', 'BOTH', 'FORCES', 'WERE', 'GATHERED', 'UP', 'AND', 'WERE', 'PLACED', 'IN', 'HOUSES'] +7975-280076-0000-2489: ref=['ALTHOUGH', 'EVERY', 'BOOK', 'PURPORTING', 'TO', 'NARRATE', 'THE', 'LIVES', 'OF', 'THE', 'YOUNGER', 'BROTHERS', 'HAS', 'TOLD', 'OF', 'THE', 'LIBERTY', 'ROBBERY', 'AND', 'IMPLIED', 'THAT', 'WE', 'HAD', 'A', 'PART', 'IN', 'IT', 'THE', 'YOUNGERS', 'WERE', 'NOT', 'SUSPECTED', 'AT', 'THAT', 'TIME', 'NOR', 'FOR', 'A', 'LONG', 'TIME', 'AFTERWARD'] +7975-280076-0000-2489: hyp=['ALTHOUGH', 'EVERY', 'BOOK', 'PURPORTING', 'TO', 'NARRATE', 'THE', 'LIVES', 'OF', 'THE', 'YOUNGER', 'BROTHERS', 'HAS', 'TOLD', 'OF', 'THE', 'LIBERTY', 'ROBBERY', 'AND', 'IMPLIED', 'THAT', 'WE', 'HAD', 'A', 'PART', 'IN', 'IT', 'THE', 'YOUNGERS', 'WERE', 'NOT', 'SUSPECTED', 'AT', 'THAT', 'TIME', 'NOR', 'FOR', 'A', 'LONG', 'TIME', 'AFTERWARD'] +7975-280076-0001-2490: ref=['IT', 'WAS', 'CLAIMED', 'BY', 'PEOPLE', 'OF', 'LIBERTY', 'THAT', 'THEY', 'POSITIVELY', 'RECOGNIZED', 'AMONG', 'THE', 'ROBBERS', 'OLL', 'SHEPHERD', 'RED', 'MONKERS', 'AND', 'BUD', 'PENCE', 'WHO', 'HAD', 'SEEN', 'SERVICE', 'WITH', 'QUANTRELL'] +7975-280076-0001-2490: hyp=['IT', 'WAS', 'CLAIMED', 'BY', 'PEOPLE', 'OF', 'LIBERTY', 'THAT', 'THEY', 'POSITIVELY', 'RECOGNIZED', 'AMONG', 'THE', 'ROBBERS', 'ALL', 'SHEPHERD', 'RED', 'MOCKERS', 'AND', 'BUD', 'PANTS', 'WHO', 'HAD', 'SEEN', 'SERVICE', 'WITH', 'QUANTRAILLE'] +7975-280076-0002-2491: ref=['THIS', 'RAID', 'WAS', 'ACCOMPANIED', 'BY', 'BLOODSHED', 'JUDGE', 'MC', 'LAIN', 'THE', 'BANKER', 'BEING', 'SHOT', 'THOUGH', 'NOT', 'FATALLY'] +7975-280076-0002-2491: hyp=['THIS', 'RAID', 'WAS', 'ACCOMPANIED', 'BY', 'BLOODSHED', 'JUDGE', 'MC', 'LANE', 'THE', 'BANKER', 'BEING', 'SHOT', 'THOUGH', 'NOT', 'FATALLY'] +7975-280076-0003-2492: ref=['NO', 'WARRANT', 'WAS', 'ISSUED', 'FOR', 'THE', 'YOUNGERS', 'BUT', 'SUBSEQUENT', 'HISTORIANS', 'HAVE', 'INFERENTIALLY', 'AT', 'LEAST', 'ACCUSED', 'US', 'OF', 'TAKING', 'PART', 'BUT', 'AS', 'I', 'SAID', 'BEFORE', 'THERE', 'IS', 'NO', 'TRUTH', 'IN', 'THE', 'ACCUSATION'] +7975-280076-0003-2492: hyp=['NO', 'WARRANT', 'WAS', 'ISSUED', 'FOR', 'THE', 'YOUNGERS', 'BUT', 'SUBSEQUENT', 'HISTORIANS', 'HAVE', 'INFERENTIALLY', 'AT', 'LEAST', 'ACCUSED', 'US', 'OF', 'TAKING', 'PART', 'BUT', 'AS', 'I', 'SAID', 'BEFORE', 'THERE', 'IS', 'NO', 'TRUTH', 'IN', 'THE', 'ACCUSATION'] +7975-280076-0004-2493: ref=['JUNE', 'THIRD', 'EIGHTEEN', 'SEVENTY', 'ONE', 'OBOCOCK', 'BROTHERS', 'BANK', 'AT', 'CORYDON', 'IOWA', 'WAS', 'ROBBED', 'OF', 'FORTY', 'THOUSAND', 'DOLLARS', 'BY', 'SEVEN', 'MEN', 'IN', 'BROAD', 'DAYLIGHT'] +7975-280076-0004-2493: hyp=['JUNE', 'THIRD', 'EIGHTEEN', 'SEVENTY', 'ONE', 'OBACOCK', "BROTHER'S", 'BANK', 'AT', 'CROYDON', 'IOWA', 'WAS', 'ROBBED', 'OF', 'FORTY', 'THOUSAND', 'DOLLARS', 'BY', 'SEVEN', 'MEN', 'IN', 'BROAD', 'DAYLIGHT'] +7975-280076-0005-2494: ref=['IT', 'WAS', 'CHARGED', 'THAT', 'ARTHUR', 'MC', 'COY', 'OR', 'A', 'C', 'MC', 'COY', 'AND', 'MYSELF', 'HAD', 'BEEN', 'PARTICIPANTS', 'IN', 'THE', "GAD'S", 'HILL', 'AFFAIR', 'AND', 'THE', 'TWO', 'STAGE', 'ROBBERIES'] +7975-280076-0005-2494: hyp=['IT', 'WAS', 'CHARGED', 'THAT', 'ARTHUR', 'MC', 'COY', 'OR', 'A', 'C', 'MAC', 'COY', 'AND', 'MYSELF', 'HAD', 'BEEN', 'PARTICIPANTS', 'IN', 'THE', "GAD'S", 'HILL', 'AFFAIR', 'AND', 'THE', 'TWO', 'STAGE', 'ROBBERIES'] +7975-280076-0006-2495: ref=['THE', 'PARTS', 'OF', 'THIS', 'LETTER', 'NOW', 'RELEVANT', 'ARE', 'AS', 'FOLLOWS'] +7975-280076-0006-2495: hyp=['THE', 'PARTS', 'OF', 'THIS', 'LETTER', 'NOW', 'RELEVANT', 'ARE', 'AS', 'FOLLOWS'] +7975-280076-0007-2496: ref=['YOU', 'MAY', 'USE', 'THIS', 'LETTER', 'IN', 'YOUR', 'OWN', 'WAY'] +7975-280076-0007-2496: hyp=['YOU', 'MAY', 'USE', 'THIS', 'LETTER', 'IN', 'YOUR', 'OWN', 'WAY'] +7975-280076-0008-2497: ref=['I', 'WILL', 'GIVE', 'YOU', 'THIS', 'OUTLINE', 'AND', 'SKETCH', 'OF', 'MY', 'WHEREABOUTS', 'AND', 'ACTIONS', 'AT', 'THE', 'TIME', 'OF', 'CERTAIN', 'ROBBERIES', 'WITH', 'WHICH', 'I', 'AM', 'CHARGED'] +7975-280076-0008-2497: hyp=['I', 'WILL', 'GIVE', 'YOU', 'THIS', 'OUTLINE', 'AND', 'SKETCH', 'OF', 'MY', 'WHEREABOUTS', 'AND', 'ACTIONS', 'AT', 'THE', 'TIME', 'OF', 'CERTAIN', 'ROBBERIES', 'WITH', 'WHICH', 'I', 'AM', 'CHARGED'] +7975-280076-0009-2498: ref=['AT', 'THE', 'TIME', 'OF', 'THE', 'GALLATIN', 'BANK', 'ROBBERY', 'I', 'WAS', 'GATHERING', 'CATTLE', 'IN', 'ELLIS', 'COUNTY', 'TEXAS', 'CATTLE', 'THAT', 'I', 'BOUGHT', 'FROM', 'PLEAS', 'TAYLOR', 'AND', 'RECTOR'] +7975-280076-0009-2498: hyp=['AT', 'THE', 'TIME', 'OF', 'THE', 'GLLOTON', 'BANK', 'ROBBERY', 'I', 'WAS', 'GATHERING', 'CATTLE', 'IN', 'ELLIS', 'COUNTY', 'TEXAS', 'CATTLE', 'HAD', 'I', 'BOUGHT', 'FROM', 'PLEDS', 'TAYLOR', 'AND', 'RECTOR'] +7975-280076-0010-2499: ref=['THIS', 'CAN', 'BE', 'PROVED', 'BY', 'BOTH', 'OF', 'THEM', 'ALSO', 'BY', 'SHERIFF', 'BARKLEY', 'AND', 'FIFTY', 'OTHER', 'RESPECTABLE', 'MEN', 'OF', 'THAT', 'COUNTY'] +7975-280076-0010-2499: hyp=['THIS', 'CAN', 'BE', 'PROVED', 'BY', 'BOTH', 'OF', 'THEM', 'ALSO', 'BY', 'SHERIFF', 'BARKELEY', 'AND', 'FIFTY', 'OTHER', 'RESPECTABLE', 'MEN', 'OF', 'THAT', 'COUNTY'] +7975-280076-0011-2500: ref=['I', 'BROUGHT', 'THE', 'CATTLE', 'TO', 'KANSAS', 'THAT', 'FALL', 'AND', 'REMAINED', 'IN', 'SAINT', 'CLAIR', 'COUNTY', 'UNTIL', 'FEBRUARY'] +7975-280076-0011-2500: hyp=['I', 'BROUGHT', 'THE', 'CATTLE', 'TO', 'KANSAS', 'THAT', 'FALL', 'AND', 'REMAINED', 'IN', 'SAINT', 'CLAIR', 'COUNTY', 'UNTIL', 'FEBRUARY'] +7975-280076-0012-2501: ref=['I', 'THEN', 'WENT', 'TO', 'ARKANSAS', 'AND', 'RETURNED', 'TO', 'SAINT', 'CLAIR', 'COUNTY', 'ABOUT', 'THE', 'FIRST', 'OF', 'MAY'] +7975-280076-0012-2501: hyp=['I', 'THEN', 'WENT', 'TO', 'ARKANSAS', 'AND', 'RETURNED', 'TO', 'SAINT', 'CLAIR', 'COUNTY', 'ABOUT', 'THE', 'FIRST', 'OF', 'MAY'] +7975-280076-0013-2502: ref=['I', 'WENT', 'TO', 'KANSAS', 'WHERE', 'OUR', 'CATTLE', 'WERE', 'IN', 'WOODSON', 'COUNTY', 'AT', 'COLONEL', "RIDGE'S"] +7975-280076-0013-2502: hyp=['AND', 'WENT', 'TO', 'KANSAS', 'WHERE', 'OUR', 'CATTLE', 'WERE', 'IN', 'WOODSON', 'COUNTY', 'AT', 'COLONEL', 'RIDGES'] +7975-280076-0014-2503: ref=['DURING', 'THE', 'SUMMER', 'I', 'WAS', 'EITHER', 'IN', 'SAINT', 'CLAIR', 'JACKSON', 'OR', 'KANSAS', 'BUT', 'AS', 'THERE', 'WAS', 'NO', 'ROBBERY', 'COMMITTED', 'THAT', 'SUMMER', 'IT', 'MAKES', 'NO', 'DIFFERENCE', 'WHERE', 'I', 'WAS'] +7975-280076-0014-2503: hyp=['DURING', 'THE', 'SUMMER', 'I', 'WAS', 'EITHER', 'IN', 'SAINT', 'CLAIR', 'JACKSON', 'OR', 'KANSAS', 'BUT', 'AS', 'THERE', 'WAS', 'NO', 'ROBBERY', 'COMMITTED', 'THAT', 'SUMMER', 'IT', 'MAKES', 'NO', 'DIFFERENCE', 'WHERE', 'I', 'WAS'] +7975-280076-0015-2504: ref=['I', 'WENT', 'THROUGH', 'INDEPENDENCE', 'AND', 'FROM', 'THERE', 'TO', 'ACE', "WEBB'S"] +7975-280076-0015-2504: hyp=['I', 'WENT', 'THROUGH', 'INDEPENDENCE', 'AND', 'FROM', 'THERE', 'TO', 'ACE', 'WEBBS'] +7975-280076-0016-2505: ref=['THERE', 'I', 'TOOK', 'DINNER', 'AND', 'THEN', 'WENT', 'TO', 'DOCTOR', 'L', 'W', "TWYMAN'S"] +7975-280076-0016-2505: hyp=['THERE', 'I', 'TOOK', 'DINNER', 'AND', 'THEN', 'WENT', 'TO', 'DOCTOR', 'L', 'W', 'TWIMMANS'] +7975-280076-0017-2506: ref=['OUR', 'BUSINESS', 'THERE', 'WAS', 'TO', 'SEE', 'E', 'P', 'WEST', 'HE', 'WAS', 'NOT', 'AT', 'HOME', 'BUT', 'THE', 'FAMILY', 'WILL', 'REMEMBER', 'THAT', 'WE', 'WERE', 'THERE'] +7975-280076-0017-2506: hyp=['OUR', 'BUSINESS', 'THERE', 'WAS', 'TO', 'SEE', 'E', 'P', 'WEST', 'HE', 'WAS', 'NOT', 'AT', 'HOME', 'BUT', 'THE', 'FAMILY', 'WILL', 'REMEMBER', 'THAT', 'WE', 'WERE', 'THERE'] +7975-280076-0018-2507: ref=['WE', 'CROSSED', 'ON', 'THE', 'BRIDGE', 'STAYED', 'IN', 'THE', 'CITY', 'ALL', 'NIGHT', 'AND', 'THE', 'NEXT', 'MORNING', 'WE', 'RODE', 'UP', 'THROUGH', 'THE', 'CITY'] +7975-280076-0018-2507: hyp=['WE', 'CROSSED', 'ON', 'THE', 'BRIDGE', 'STAYED', 'IN', 'THE', 'CITY', 'ALL', 'NIGHT', 'AND', 'THE', 'NEXT', 'MORNING', 'WE', 'RODE', 'UP', 'TO', 'THE', 'CITY'] +7975-280076-0019-2508: ref=['I', 'MET', 'SEVERAL', 'OF', 'MY', 'FRIENDS', 'AMONG', 'THEM', 'WAS', 'BOB', 'HUDSPETH'] +7975-280076-0019-2508: hyp=['I', 'MET', 'SEVERAL', 'OF', 'MY', 'FRIENDS', 'AMONG', 'THEM', 'WAS', 'BOB', 'HUDSPETH'] +7975-280076-0020-2509: ref=['WE', 'WERE', 'NOT', 'ON', 'GOOD', 'TERMS', 'AT', 'THE', 'TIME', 'NOR', 'HAVE', 'WE', 'BEEN', 'FOR', 'SEVERAL', 'YEARS'] +7975-280076-0020-2509: hyp=['WE', 'WERE', 'NOT', 'ON', 'GOOD', 'TERMS', 'AT', 'THE', 'TIME', 'NOR', 'HAVE', 'WE', 'BEEN', 'FOR', 'SEVERAL', 'YEARS'] +7975-280076-0021-2510: ref=['POOR', 'JOHN', 'HE', 'HAS', 'BEEN', 'HUNTED', 'DOWN', 'AND', 'SHOT', 'LIKE', 'A', 'WILD', 'BEAST', 'AND', 'NEVER', 'WAS', 'A', 'BOY', 'MORE', 'INNOCENT'] +7975-280076-0021-2510: hyp=['POOR', 'JOHN', 'HE', 'HAS', 'BEEN', 'HUNTED', 'DOWN', 'AND', 'SHOT', 'LIKE', 'A', 'WILD', 'BEAST', 'AND', 'NEVER', 'WAS', 'A', 'BOY', 'MORE', 'INNOCENT'] +7975-280076-0022-2511: ref=['DOCTOR', 'L', 'LEWIS', 'WAS', 'HIS', 'PHYSICIAN'] +7975-280076-0022-2511: hyp=['DOCTOR', 'L', 'LEWIS', 'WAS', 'HIS', 'PHYSICIAN'] +7975-280076-0023-2512: ref=['THERE', 'WERE', 'FIFTY', 'OR', 'A', 'HUNDRED', 'PERSONS', 'THERE', 'WHO', 'WILL', 'TESTIFY', 'IN', 'ANY', 'COURT', 'THAT', 'JOHN', 'AND', 'I', 'WERE', 'THERE'] +7975-280076-0023-2512: hyp=['THERE', 'WERE', 'FIFTY', 'OR', 'A', 'HUNDRED', 'PERSONS', 'THERE', 'WHO', 'WILL', 'TESTIFY', 'IN', 'ANY', 'COURT', 'THAT', 'JOHN', 'AND', 'I', 'WERE', 'THERE'] +7975-280076-0024-2513: ref=['HELVIN', 'FICKLE', 'AND', 'WIFE', 'OF', 'GREENTON', 'VALLEY', 'WERE', 'ATTENDING', 'THE', 'SPRINGS', 'AT', 'THAT', 'TIME', 'AND', 'EITHER', 'OF', 'THEM', 'WILL', 'TESTIFY', 'TO', 'THE', 'ABOVE', 'FOR', 'JOHN', 'AND', 'I', 'SAT', 'IN', 'FRONT', 'OF', 'MISTER', 'SMITH', 'WHILE', 'HE', 'WAS', 'PREACHING', 'AND', 'WAS', 'IN', 'HIS', 'COMPANY', 'FOR', 'A', 'FEW', 'MOMENTS', 'TOGETHER', 'WITH', 'HIS', 'WIFE', 'AND', 'MISTER', 'AND', 'MISSUS', 'FICKLE', 'AFTER', 'SERVICE'] +7975-280076-0024-2513: hyp=['HELVAN', 'FICKLE', 'AND', 'WIFE', 'OF', 'GREENTON', 'VALLEY', 'WERE', 'ATTENDING', 'THE', 'SPRINGS', 'AT', 'THAT', 'TIME', 'AND', 'EITHER', 'OF', 'THEM', 'WILL', 'TESTIFY', 'TO', 'THE', 'ABOVE', 'FOR', 'JOHN', 'AND', 'I', 'SAT', 'IN', 'FRONT', 'OF', 'MISTER', 'SMITH', 'WHILE', 'HE', 'WAS', 'PREACHING', 'AND', 'WAS', 'IN', 'HIS', 'COMPANY', 'FOR', 'A', 'FEW', 'MOMENTS', 'TOGETHER', 'WITH', 'HIS', 'WIFE', 'AND', 'MISTER', 'MISS', 'FICKLE', 'AFTER', 'THE', 'SERVICE'] +7975-280076-0025-2514: ref=['ABOUT', 'THE', 'LAST', 'OF', 'DECEMBER', 'EIGHTEEN', 'SEVENTY', 'THREE', 'I', 'ARRIVED', 'IN', 'CARROLL', 'PARISH', 'LOUISIANA'] +7975-280076-0025-2514: hyp=['ABOUT', 'THE', 'LAST', 'OF', 'DECEMBER', 'EIGHTEEN', 'SEVENTY', 'THREE', 'I', 'ARRIVED', 'IN', 'CARROLL', 'PARISH', 'LOUISIANA'] +7975-280076-0026-2515: ref=['I', 'STAYED', 'THERE', 'UNTIL', 'THE', 'EIGHTH', 'OF', 'FEBRUARY', 'EIGHTEEN', 'SEVENTY', 'FOUR'] +7975-280076-0026-2515: hyp=['I', 'STAYED', 'THERE', 'UNTIL', 'THE', 'EIGHTH', 'OF', 'FEBRUARY', 'EIGHTEEN', 'SEVENTY', 'FOUR'] +7975-280076-0027-2516: ref=['I', 'HAD', 'NOT', 'HEARD', 'OF', 'THAT', 'WHEN', 'I', 'WROTE', 'THE', 'LETTER', 'OF', 'EIGHTEEN', 'SEVENTY', 'FOUR', 'AND', 'TO', 'CORRECT', 'ANY', 'MISAPPREHENSION', 'THAT', 'MIGHT', 'BE', 'CREATED', 'BY', 'OMITTING', 'IT', 'I', 'WILL', 'SAY', 'THAT', 'AT', 'THAT', 'TIME', 'I', 'WAS', 'AT', 'NEOSHO', 'KANSAS', 'WITH', 'A', 'DROVE', 'OF', 'CATTLE', 'WHICH', 'I', 'SOLD', 'TO', 'MAJOR', 'RAY'] +7975-280076-0027-2516: hyp=['I', 'HAD', 'NOT', 'HEARD', 'OF', 'THAT', 'WHEN', 'I', 'WROTE', 'THE', 'LETTER', 'OF', 'EIGHTEEN', 'SEVENTY', 'FOUR', 'AND', 'TO', 'CORRECT', 'ANY', 'MISAPPREHENSION', 'THAT', 'MIGHT', 'BE', 'CREATED', 'BY', 'OMITTING', 'IT', 'I', 'WILL', 'SAY', 'THAT', 'AT', 'THE', 'TIME', 'I', 'WAS', 'AT', 'NEOTIO', 'KANSAS', 'WITH', 'A', 'DROVE', 'OF', 'CATTLE', 'WHICH', 'I', 'SOLD', 'TO', 'MAJOR', 'RAY'] +7975-280076-0028-2517: ref=['IT', 'WAS', 'IMMEDIATELY', 'FOLLOWING', 'THE', 'ROCK', 'ISLAND', 'ROBBERY', 'AT', 'ADAIR', 'IOWA', 'THAT', 'THERE', 'FIRST', 'APPEARED', 'A', 'DELIBERATE', 'ENLISTMENT', 'OF', 'SOME', 'LOCAL', 'PAPERS', 'IN', 'MISSOURI', 'TO', 'CONNECT', 'US', 'WITH', 'THIS', 'ROBBERY'] +7975-280076-0028-2517: hyp=['IT', 'WAS', 'IMMEDIATELY', 'FOLLOWING', 'THE', 'ROCK', 'ISLAND', 'ROBBERY', 'AT', 'EIGHT', 'AIR', 'IOWA', 'THAT', 'THERE', 'FIRST', 'APPEARED', 'A', 'DELIBERATE', 'ENLISTMENT', 'OF', 'SOME', 'LOCAL', 'PAPERS', 'IN', 'MISSOURI', 'TO', 'CONNECT', 'US', 'WITH', 'THIS', 'ROBBERY'] +7975-280084-0000-2518: ref=['I', 'URGED', 'ON', 'THE', 'BOYS', 'THAT', 'WHATEVER', 'HAPPENED', 'WE', 'SHOULD', 'NOT', 'SHOOT', 'ANY', 'ONE'] +7975-280084-0000-2518: hyp=['I', 'URGED', 'ON', 'THE', 'BOYS', 'THAT', 'WHATEVER', 'HAPPENED', 'WE', 'SHOULD', 'NOT', 'SHOOT', 'ANY', 'ONE'] +7975-280084-0001-2519: ref=['WHEN', 'MILLER', 'AND', 'I', 'CROSSED', 'THE', 'BRIDGE', 'THE', 'THREE', 'WERE', 'ON', 'SOME', 'DRY', 'GOODS', 'BOXES', 'AT', 'THE', 'CORNER', 'NEAR', 'THE', 'BANK', 'AND', 'AS', 'SOON', 'AS', 'THEY', 'SAW', 'US', 'WENT', 'RIGHT', 'INTO', 'THE', 'BANK', 'INSTEAD', 'OF', 'WAITING', 'FOR', 'US', 'TO', 'GET', 'THERE'] +7975-280084-0001-2519: hyp=['WHEN', 'MILLER', 'AND', 'I', 'CROSSED', 'THE', 'BRIDGE', 'THE', 'THREE', 'WERE', 'ON', 'SOME', 'DRY', 'GOOD', 'BOXES', 'AT', 'THE', 'CORNER', 'NEAR', 'THE', 'BANK', 'AND', 'AS', 'SOON', 'AS', 'THEY', 'SAW', 'US', 'WENT', 'RIGHT', 'INTO', 'THE', 'BANK', 'INSTEAD', 'OF', 'WAITING', 'FOR', 'US', 'TO', 'GET', 'THERE'] +7975-280084-0002-2520: ref=['WHEN', 'WE', 'CAME', 'UP', 'I', 'TOLD', 'MILLER', 'TO', 'SHUT', 'THE', 'BANK', 'DOOR', 'WHICH', 'THEY', 'HAD', 'LEFT', 'OPEN', 'IN', 'THEIR', 'HURRY'] +7975-280084-0002-2520: hyp=['WHEN', 'WE', 'CAME', 'UP', 'I', 'TOLD', 'MILLER', 'TO', 'SHUT', 'THE', 'BANK', 'DOOR', 'WHICH', 'THEY', 'HAD', 'LEFT', 'OPEN', 'IN', 'THEIR', 'HURRY'] +7975-280084-0003-2521: ref=['J', 'S', 'ALLEN', 'WHOSE', 'HARDWARE', 'STORE', 'WAS', 'NEAR', 'TRIED', 'TO', 'GO', 'INTO', 'THE', 'BANK', 'BUT', 'MILLER', 'ORDERED', 'HIM', 'AWAY', 'AND', 'HE', 'RAN', 'AROUND', 'THE', 'CORNER', 'SHOUTING'] +7975-280084-0003-2521: hyp=['J', 'S', 'ALLEN', 'WHOSE', 'HARDWARE', 'STORE', 'WAS', 'NEAR', 'TRIED', 'TO', 'GO', 'INTO', 'THE', 'BANK', 'BUT', 'MILLER', 'ORDERED', 'HIM', 'AWAY', 'AND', 'HE', 'RAN', 'AROUND', 'THE', 'CORNER', 'SHOUTING'] +7975-280084-0004-2522: ref=['GET', 'YOUR', 'GUNS', 'BOYS', "THEY'RE", 'ROBBING', 'THE', 'BANK'] +7975-280084-0004-2522: hyp=['GET', 'YOUR', 'GUNS', 'BOYS', "THEY'RE", 'ROBBING', 'THE', 'BANK'] +7975-280084-0005-2523: ref=['AND', 'I', 'CALLED', 'TO', 'HIM', 'TO', 'GET', 'INSIDE', 'AT', 'THE', 'SAME', 'TIME', 'FIRING', 'A', 'PISTOL', 'SHOT', 'IN', 'THE', 'AIR', 'AS', 'A', 'SIGNAL', 'TO', 'THE', 'THREE', 'BOYS', 'AT', 'THE', 'BRIDGE', 'THAT', 'WE', 'HAD', 'BEEN', 'DISCOVERED'] +7975-280084-0005-2523: hyp=['AND', 'I', 'CALLED', 'TO', 'HIM', 'TO', 'GET', 'INSIDE', 'AT', 'THE', 'SAME', 'TIME', 'FIRING', 'A', 'PISTOL', 'SHOT', 'IN', 'THE', 'AIR', 'AS', 'A', 'SIGNAL', 'TO', 'THE', 'THREE', 'BOYS', 'AT', 'THE', 'BRIDGE', 'THAT', 'WE', 'HAD', 'BEEN', 'DISCOVERED'] +7975-280084-0006-2524: ref=['ALMOST', 'AT', 'THIS', 'INSTANT', 'I', 'HEARD', 'A', 'PISTOL', 'SHOT', 'IN', 'THE', 'BANK'] +7975-280084-0006-2524: hyp=['ALMOST', 'AT', 'THIS', 'INSTANT', 'I', 'HEARD', 'A', 'PISTOL', 'SHOT', 'IN', 'THE', 'BANK'] +7975-280084-0007-2525: ref=['CHADWELL', 'WOODS', 'AND', 'JIM', 'RODE', 'UP', 'AND', 'JOINED', 'US', 'SHOUTING', 'TO', 'PEOPLE', 'IN', 'THE', 'STREET', 'TO', 'GET', 'INSIDE', 'AND', 'FIRING', 'THEIR', 'PISTOLS', 'TO', 'EMPHASIZE', 'THEIR', 'COMMANDS'] +7975-280084-0007-2525: hyp=['CHADWELL', 'WOODS', 'AND', 'JIM', 'RODE', 'UP', 'AND', 'JOINED', 'US', 'SHOUTING', 'TO', 'THE', 'PEOPLE', 'IN', 'THE', 'STREET', 'TO', 'GET', 'INSIDE', 'AND', 'FIRING', 'THEIR', 'PISTOLS', 'TO', 'EMPHASIZE', 'THEIR', 'COMMANDS'] +7975-280084-0008-2526: ref=['IF', 'ANY', 'OF', 'OUR', 'PARTY', 'SHOT', 'HIM', 'IT', 'MUST', 'HAVE', 'BEEN', 'WOODS'] +7975-280084-0008-2526: hyp=['IF', 'ANY', 'OF', 'OUR', 'PARTY', 'SHOT', 'HIM', 'IT', 'MUST', 'HAVE', 'BEEN', 'WOODS'] +7975-280084-0009-2527: ref=['MEANTIME', 'THE', 'STREET', 'WAS', 'GETTING', 'UNCOMFORTABLY', 'HOT'] +7975-280084-0009-2527: hyp=['MEANTIME', 'THE', 'STREET', 'WAS', 'GETTING', 'UNCOMFORTABLY', 'HOT'] +7975-280084-0010-2528: ref=['EVERY', 'TIME', 'I', 'SAW', 'ANY', 'ONE', 'WITH', 'A', 'BEAD', 'ON', 'ME', 'I', 'WOULD', 'DROP', 'OFF', 'MY', 'HORSE', 'AND', 'TRY', 'TO', 'DRIVE', 'THE', 'SHOOTER', 'INSIDE', 'BUT', 'I', 'COULD', 'NOT', 'SEE', 'IN', 'EVERY', 'DIRECTION'] +7975-280084-0010-2528: hyp=['EVERY', 'TIME', 'I', 'SAW', 'ANYONE', 'WITH', 'A', 'BEAD', 'ON', 'ME', 'I', 'WOULD', 'DROP', 'OFF', 'MY', 'HORSE', 'AND', 'TRY', 'TO', 'DRIVE', 'THE', 'SHOOTER', 'INSIDE', 'BUT', 'I', 'COULD', 'NOT', 'SEE', 'IN', 'EVERY', 'DIRECTION'] +7975-280084-0011-2529: ref=['DOCTOR', 'WHEELER', 'WHO', 'HAD', 'GONE', 'UPSTAIRS', 'IN', 'THE', 'HOTEL', 'SHOT', 'MILLER', 'AND', 'HE', 'LAY', 'DYING', 'IN', 'THE', 'STREET'] +7975-280084-0011-2529: hyp=['DOCTOR', 'WHEELER', 'WHO', 'HAD', 'GONE', 'UPSTAIRS', 'IN', 'THE', 'HOTEL', 'SHOT', 'MILLER', 'AND', 'HE', 'LAY', 'DYING', 'IN', 'THE', 'STREET'] +7975-280084-0012-2530: ref=['CHANGING', 'HIS', 'PISTOL', 'TO', 'HIS', 'LEFT', 'HAND', 'BOB', 'RAN', 'OUT', 'AND', 'MOUNTED', "MILLER'S", 'MARE'] +7975-280084-0012-2530: hyp=['CHANGING', 'HIS', 'PISTOL', 'TO', 'HIS', 'LEFT', 'HAND', 'BOB', 'RAN', 'OUT', 'AND', 'MOUNTED', "MILLER'S", 'MARE'] +7975-280084-0013-2531: ref=['WHAT', 'KEPT', 'YOU', 'SO', 'LONG', 'I', 'ASKED', 'PITTS'] +7975-280084-0013-2531: hyp=['WHAT', 'KEPT', 'YOU', 'SO', 'LONG', 'ASKED', 'PITT'] +7975-280084-0014-2532: ref=['AS', 'TO', 'THE', 'REST', 'OF', 'THE', 'AFFAIR', 'INSIDE', 'THE', 'BANK', 'I', 'TAKE', 'THE', 'ACCOUNT', 'OF', 'A', 'NORTHFIELD', 'NARRATOR'] +7975-280084-0014-2532: hyp=['AS', 'TO', 'THE', 'REST', 'OF', 'THE', 'AFFAIR', 'INSIDE', 'THE', 'BANK', 'I', 'TAKE', 'THE', 'ACCOUNT', 'OF', 'A', 'NORTHFIELD', 'NARRATOR'] +7975-280084-0015-2533: ref=["WHERE'S", 'THE', 'MONEY', 'OUTSIDE', 'THE', 'SAFE', 'BOB', 'ASKED'] +7975-280084-0015-2533: hyp=["WHERE'S", 'THE', 'MONEY', 'OUTSIDE', 'THE', 'SAFE', 'BOB', 'ASKED'] +7975-280084-0016-2534: ref=['THE', 'SHUTTERS', 'WERE', 'CLOSED', 'AND', 'THIS', 'CAUSED', 'BUNKER', 'AN', "INSTANT'S", 'DELAY', 'THAT', 'WAS', 'ALMOST', 'FATAL', 'PITTS', 'CHASED', 'HIM', 'WITH', 'A', 'BULLET'] +7975-280084-0016-2534: hyp=['THE', 'SHOULDERS', 'WERE', 'CLOSED', 'AND', 'THIS', 'CAUSED', 'BUNKER', 'AN', "INSTANT'S", 'DELAY', 'THAT', 'WAS', 'ALMOST', 'FATAL', 'FITZ', 'CHASED', 'HIM', 'WITH', 'A', 'BULLET'] +7975-280084-0017-2535: ref=['THE', 'FIRST', 'ONE', 'MISSED', 'HIM', 'BUT', 'THE', 'SECOND', 'WENT', 'THROUGH', 'HIS', 'RIGHT', 'SHOULDER'] +7975-280084-0017-2535: hyp=['THE', 'FIRST', 'ONE', 'MISSED', 'HIM', 'BUT', 'THE', 'SECOND', 'WENT', 'THROUGH', 'HIS', 'RIGHT', 'SHOULDER'] +7975-280085-0000-2536: ref=['THAT', 'NIGHT', 'IT', 'STARTED', 'TO', 'RAIN', 'AND', 'WE', 'WORE', 'OUT', 'OUR', 'HORSES'] +7975-280085-0000-2536: hyp=['THAT', 'NIGHT', 'IT', 'STARTED', 'TO', 'RAIN', 'AND', 'WE', 'WORE', 'OUT', 'OUR', 'HORSES'] +7975-280085-0001-2537: ref=['FRIDAY', 'WE', 'MOVED', 'TOWARD', 'WATERVILLE', 'AND', 'FRIDAY', 'NIGHT', 'WE', 'CAMPED', 'BETWEEN', 'ELYSIAN', 'AND', 'GERMAN', 'LAKE'] +7975-280085-0001-2537: hyp=['FRIDAY', 'WE', 'MOVED', 'TOWARD', 'WATERVILLE', 'AND', 'FRIDAY', 'NIGHT', 'WE', 'CAMPED', 'BETWEEN', 'ELYSIAN', 'AND', 'GERMAN', 'LAKE'] +7975-280085-0002-2538: ref=["BOB'S", 'SHATTERED', 'ELBOW', 'WAS', 'REQUIRING', 'FREQUENT', 'ATTENTION', 'AND', 'THAT', 'NIGHT', 'WE', 'MADE', 'ONLY', 'NINE', 'MILES', 'AND', 'MONDAY', 'MONDAY', 'NIGHT', 'AND', 'TUESDAY', 'WE', 'SPENT', 'IN', 'A', 'DESERTED', 'FARM', 'HOUSE', 'CLOSE', 'TO', 'MANKATO'] +7975-280085-0002-2538: hyp=["BOB'S", 'SHATTERED', 'ELBOW', 'WAS', 'REQUIRING', 'FREQUENT', 'ATTENTION', 'AND', 'THAT', 'NIGHT', 'WE', 'MADE', 'ONLY', 'NINE', 'MILES', 'ON', 'MONDAY', 'MONDAY', 'NIGHT', 'AND', 'TUESDAY', 'WE', 'SPENT', 'IN', 'A', 'DESERTED', 'FARMHOUSE', 'CLOSE', 'TO', 'MAN', 'CATO'] +7975-280085-0003-2539: ref=['THAT', 'DAY', 'A', 'MAN', 'NAMED', 'DUNNING', 'DISCOVERED', 'US', 'AND', 'WE', 'TOOK', 'HIM', 'PRISONER'] +7975-280085-0003-2539: hyp=['THAT', 'DAY', 'A', 'MAN', 'NAMED', 'DUNNING', 'DISCOVERED', 'US', 'AND', 'WE', 'TOOK', 'HIM', 'PRISONER'] +7975-280085-0004-2540: ref=['FINALLY', 'WE', 'ADMINISTERED', 'TO', 'HIM', 'AN', 'OATH', 'NOT', 'TO', 'BETRAY', 'OUR', 'WHEREABOUTS', 'UNTIL', 'WE', 'HAD', 'TIME', 'TO', 'MAKE', 'OUR', 'ESCAPE', 'AND', 'HE', 'AGREED', 'NOT', 'TO'] +7975-280085-0004-2540: hyp=['FINALLY', 'WE', 'ADMINISTERED', 'TO', 'HIM', 'AN', 'OATH', 'NOT', 'TO', 'BETRAY', 'OUR', 'WHEREABOUTS', 'UNTIL', 'WE', 'HAD', 'TIME', 'TO', 'MAKE', 'OUR', 'ESCAPE', 'AND', 'HE', 'AGREED', 'NOT', 'TO'] +7975-280085-0005-2541: ref=['NO', 'SOONER', 'HOWEVER', 'WAS', 'HE', 'RELEASED', 'THAN', 'HE', 'MADE', 'POSTHASTE', 'INTO', 'MANKATO', 'TO', 'ANNOUNCE', 'OUR', 'PRESENCE', 'AND', 'IN', 'A', 'FEW', 'MINUTES', 'ANOTHER', 'POSSE', 'WAS', 'LOOKING', 'FOR', 'US'] +7975-280085-0005-2541: hyp=['NO', 'SOONER', 'HOWEVER', 'WAS', 'HE', 'RELEASED', 'THAN', 'HE', 'MADE', 'POST', 'HASTE', 'INTO', 'MAN', 'CATO', 'TO', 'ANNOUNCE', 'OUR', 'PRESENCE', 'AND', 'IN', 'A', 'FEW', 'MINUTES', 'ANOTHER', 'POSSE', 'WAS', 'LOOKING', 'FOR', 'US'] +7975-280085-0006-2542: ref=['THE', 'WHISTLE', 'ON', 'THE', 'OIL', 'MILL', 'BLEW', 'AND', 'WE', 'FEARED', 'THAT', 'IT', 'WAS', 'A', 'SIGNAL', 'THAT', 'HAD', 'BEEN', 'AGREED', 'UPON', 'TO', 'ALARM', 'THE', 'TOWN', 'IN', 'CASE', 'WE', 'WERE', 'OBSERVED', 'BUT', 'WE', 'WERE', 'NOT', 'MOLESTED'] +7975-280085-0006-2542: hyp=['THE', 'WHISTLE', 'ON', 'THE', 'OIL', 'MILL', 'BLEW', 'AND', 'WE', 'FEARED', 'THAT', 'IT', 'WAS', 'A', 'SIGNAL', 'THAT', 'HAD', 'BEEN', 'AGREED', 'UPON', 'TO', 'ALARM', 'THE', 'TOWN', 'IN', 'CASE', 'WE', 'WERE', 'OBSERVED', 'BUT', 'WE', 'WERE', 'NOT', 'MOLESTED'] +7975-280085-0007-2543: ref=['HE', 'HAD', 'TO', 'SLEEP', 'WITH', 'IT', 'PILLOWED', 'ON', 'MY', 'BREAST', 'JIM', 'BEING', 'ALSO', 'CRIPPLED', 'WITH', 'A', 'WOUND', 'IN', 'HIS', 'SHOULDER', 'AND', 'WE', 'COULD', 'NOT', 'GET', 'MUCH', 'SLEEP'] +7975-280085-0007-2543: hyp=['HE', 'HAD', 'TO', 'SLEEP', 'WITH', 'THAT', 'PILLOW', 'ON', 'MY', 'BREAST', 'JIM', 'BEING', 'ALSO', 'A', 'CRIPPLE', 'WITH', 'A', 'WOUND', 'IN', 'HIS', 'SHOULDER', 'AND', 'WE', 'COULD', 'NOT', 'GET', 'MUCH', 'SLEEP'] +7975-280085-0008-2544: ref=['BUT', 'THEY', 'SOON', 'AFTER', 'GOT', 'CLOSE', 'ENOUGH', 'SO', 'THAT', 'ONE', 'OF', 'THEM', 'BROKE', 'MY', 'WALKING', 'STICK', 'WITH', 'A', 'SHOT'] +7975-280085-0008-2544: hyp=['BUT', 'THEY', 'SOON', 'AFTER', 'GOT', 'CLOSE', 'ENOUGH', 'SO', 'THAT', 'ONE', 'OF', 'THEM', 'BROKE', 'MY', 'WALKING', 'STICK', 'WITH', 'A', 'SHOT'] +7975-280085-0009-2545: ref=['WE', 'WERE', 'IN', 'SIGHT', 'OF', 'OUR', 'LONG', 'SOUGHT', 'HORSES', 'WHEN', 'THEY', 'CUT', 'US', 'OFF', 'FROM', 'THE', 'ANIMALS', 'AND', 'OUR', 'LAST', 'HOPE', 'WAS', 'GONE'] +7975-280085-0009-2545: hyp=['WE', 'WERE', 'IN', 'SIGHT', 'OF', 'OUR', 'LONG', 'SAWED', 'HORSES', 'WHEN', 'THEY', 'CUT', 'US', 'OFF', 'FROM', 'THE', 'ANIMALS', 'AND', 'OUR', 'LAST', 'HOPE', 'WAS', 'GONE'] +7975-280085-0010-2546: ref=['SIX', 'STEPPED', 'TO', 'THE', 'FRONT', 'SHERIFF', 'GLISPIN', 'COLONEL', 'T', 'L', 'VOUGHT', 'B', 'M', 'RICE', 'G', 'A', 'BRADFORD', 'C', 'A', 'POMEROY', 'AND', 'S', 'J', 'SEVERSON'] +7975-280085-0010-2546: hyp=['SIX', 'STEPS', 'TO', 'THE', 'FRONT', 'SHERIFF', 'LISPON', 'COLONEL', 'T', 'O', 'VAULT', 'B', 'M', 'RICE', 'G', 'A', 'BRADFORD', 'C', 'A', 'POLMROY', 'AND', 'S', 'J', 'CEVERSON'] +7975-280085-0011-2547: ref=['FORMING', 'IN', 'LINE', 'FOUR', 'PACES', 'APART', 'HE', 'ORDERED', 'THEM', 'TO', 'ADVANCE', 'RAPIDLY', 'AND', 'CONCENTRATE', 'THE', 'FIRE', 'OF', 'THE', 'WHOLE', 'LINE', 'THE', 'INSTANT', 'THE', 'ROBBERS', 'WERE', 'DISCOVERED'] +7975-280085-0011-2547: hyp=['FORMING', 'IN', 'LINE', 'FOUR', 'PACES', 'APART', 'HE', 'ORDERED', 'THEM', 'TO', 'ADVANCE', 'RAPIDLY', 'AND', 'CONCENTRATE', 'THE', 'FIRE', 'OF', 'THE', 'WHOLE', 'LINE', 'THE', 'INSTANT', 'THE', 'ROBBERS', 'WERE', 'DISCOVERED'] +7975-280085-0012-2548: ref=['MAKE', 'FOR', 'THE', 'HORSES', 'I', 'SAID', 'EVERY', 'MAN', 'FOR', 'HIMSELF'] +7975-280085-0012-2548: hyp=['MAKE', 'FOR', 'THE', 'HORSES', 'I', 'SAID', 'EVERY', 'MAN', 'FOR', 'HIMSELF'] +7975-280085-0013-2549: ref=['THERE', 'IS', 'NO', 'USE', 'STOPPING', 'TO', 'PICK', 'UP', 'A', 'COMRADE', 'HERE', 'FOR', 'WE', "CAN'T", 'GET', 'HIM', 'THROUGH', 'THE', 'LINE', 'JUST', 'CHARGE', 'THEM', 'AND', 'MAKE', 'IT', 'IF', 'WE', 'CAN'] +7975-280085-0013-2549: hyp=["THERE'S", 'NO', 'USE', 'STOPPING', 'TO', 'PICK', 'UP', 'A', 'COMRADE', 'HERE', 'FOR', 'WE', "CAN'T", 'GET', 'HIM', 'THROUGH', 'THE', 'LINE', 'JUST', 'CHARGE', 'HIM', 'AND', 'MAKE', 'IT', 'IF', 'WE', 'CAN'] +7975-280085-0014-2550: ref=['I', 'GOT', 'UP', 'AS', 'THE', 'SIGNAL', 'FOR', 'THE', 'CHARGE', 'AND', 'WE', 'FIRED', 'ONE', 'VOLLEY'] +7975-280085-0014-2550: hyp=['I', 'GOT', 'UP', 'AS', 'A', 'SIGNAL', 'FOR', 'THE', 'CHARGE', 'AND', 'WE', 'FIRED', 'ONE', 'VOLLEY'] +7975-280085-0015-2551: ref=['ONE', 'OF', 'THE', 'FELLOWS', 'IN', 'THE', 'OUTER', 'LINE', 'NOT', 'BRAVE', 'ENOUGH', 'HIMSELF', 'TO', 'JOIN', 'THE', 'VOLUNTEERS', 'WHO', 'HAD', 'COME', 'IN', 'TO', 'BEAT', 'US', 'OUT', 'WAS', 'NOT', 'DISPOSED', 'TO', 'BELIEVE', 'IN', 'THE', 'SURRENDER', 'AND', 'HAD', 'HIS', 'GUN', 'LEVELLED', 'ON', 'BOB', 'IN', 'SPITE', 'OF', 'THE', 'HANDKERCHIEF', 'WHICH', 'WAS', 'WAVING', 'AS', 'A', 'FLAG', 'OF', 'TRUCE'] +7975-280085-0015-2551: hyp=['ONE', 'OF', 'THE', 'FELLOWS', 'IN', 'THE', 'OUTER', 'LINE', 'NOT', 'BRAVE', 'ENOUGH', 'HIMSELF', 'TO', 'JOIN', 'THE', 'VOLUNTEERS', 'WHO', 'HAD', 'COME', 'IN', 'TO', 'BEAT', 'US', 'OUT', 'WAS', 'NOT', 'DISPOSED', 'TO', 'BELIEVE', 'IN', 'THE', 'SURRENDER', 'AND', 'HAD', 'HIS', 'GUN', 'LEVELLED', 'ON', 'BOB', 'IN', 'SPITE', 'OF', 'THE', 'HANDKERCHIEF', 'WHICH', 'WAS', 'WAVING', 'AS', 'A', 'FLAG', 'OF', 'TRUCE'] +7975-280085-0016-2552: ref=['SHERIFF', 'GLISPIN', 'OF', 'WATONWAN', 'COUNTY', 'WHO', 'WAS', 'TAKING', "BOB'S", 'PISTOL', 'FROM', 'HIM', 'WAS', 'ALSO', 'SHOUTING', 'TO', 'THE', 'FELLOW'] +7975-280085-0016-2552: hyp=['SHERIFF', 'GLISPIN', 'OF', 'WATERWAN', 'COUNTY', 'WHO', 'WAS', 'TAKING', "BOB'S", 'PISTOL', 'FROM', 'HIM', 'WAS', 'ALSO', 'SHOUTING', 'TO', 'THE', 'FELLOW'] +7975-280085-0017-2553: ref=['INCLUDING', 'THOSE', 'RECEIVED', 'IN', 'AND', 'ON', 'THE', 'WAY', 'FROM', 'NORTHFIELD', 'I', 'HAD', 'ELEVEN', 'WOUNDS'] +7975-280085-0017-2553: hyp=['INCLUDING', 'THOSE', 'RECEIVED', 'IN', 'AND', 'ON', 'THE', 'WAY', 'FROM', 'NORTHFIELD', 'I', 'HAD', 'ELEVEN', 'WOUNDS'] +7975-280085-0018-2554: ref=['AND', 'SHERIFF', "GLISPIN'S", 'ORDER', 'NOT', 'TO', 'SHOOT', 'WAS', 'THE', 'BEGINNING', 'OF', 'THE', 'PROTECTORATE', 'THAT', 'MINNESOTA', 'PEOPLE', 'ESTABLISHED', 'OVER', 'US'] +7975-280085-0018-2554: hyp=['AND', 'SHERIFF', "LISPIN'S", 'ORDER', 'NOT', 'TO', 'SHOOT', 'WAS', 'THE', 'BEGINNING', 'OF', 'THE', 'PROTECTORY', 'THE', 'MINNESOTA', 'PEOPLE', 'ESTABLISHED', 'OVER', 'US'] +8131-117016-0000-2555: ref=['CAPTAIN', 'MURDOCH'] +8131-117016-0000-2555: hyp=['CAPTAIN', 'MURDOCK'] +8131-117016-0001-2556: ref=['BUT', 'MARSPORT', 'HAD', 'FLOURISHED', 'ENOUGH', 'TO', 'KILL', 'IT', 'OFF'] +8131-117016-0001-2556: hyp=['BUT', 'MARSPORT', 'HAD', 'FLOURISHED', 'ENOUGH', 'TO', 'KILL', 'IT', 'OFF'] +8131-117016-0002-2557: ref=['SOME', 'OF', 'MARS', 'LAWS', 'DATED', 'FROM', 'THE', 'TIME', 'WHEN', 'LAW', 'ENFORCEMENT', 'HAD', 'BEEN', 'HAMPERED', 'BY', 'LACK', 'OF', 'MEN', 'RATHER', 'THAN', 'BY', 'THE', 'TYPE', 'OF', 'MEN'] +8131-117016-0002-2557: hyp=['SOME', 'OF', 'MARS', 'LAWS', 'DATED', 'FROM', 'THE', 'TIME', 'WHEN', 'LAW', 'ENFORCEMENT', 'HAD', 'BEEN', 'HAMPERED', 'BY', 'LACK', 'OF', 'MEN', 'RATHER', 'THAN', 'BY', 'THE', 'TYPE', 'OF', 'MEN'] +8131-117016-0003-2558: ref=['THE', 'STONEWALL', 'GANG', 'NUMBERED', 'PERHAPS', 'FIVE', 'HUNDRED'] +8131-117016-0003-2558: hyp=['THE', 'STONEWALL', 'GANG', 'NUMBERED', 'PERHAPS', 'FIVE', 'HUNDRED'] +8131-117016-0004-2559: ref=['EVEN', 'DERELICTS', 'AND', 'FAILURES', 'HAD', 'TO', 'EAT', 'THERE', 'WERE', 'STORES', 'AND', 'SHOPS', 'THROUGHOUT', 'THE', 'DISTRICT', 'WHICH', 'EKED', 'OUT', 'SOME', 'KIND', 'OF', 'A', 'MARGINAL', 'LIVING'] +8131-117016-0004-2559: hyp=['EVEN', 'DERELICTS', 'AND', 'FAILURES', 'HAD', 'TO', 'EAT', 'THERE', 'WERE', 'STORIES', 'AND', 'SHOPS', 'THROUGHOUT', 'THE', 'DISTRICT', 'WHICH', 'EKED', 'OUT', 'SOME', 'KIND', 'OF', 'A', 'MARGINAL', 'LIVING'] +8131-117016-0005-2560: ref=['THEY', 'WERE', 'SAFE', 'FROM', 'PROTECTION', 'RACKETEERS', 'THERE', 'NONE', 'BOTHERED', 'TO', 'COME', 'SO', 'FAR', 'OUT'] +8131-117016-0005-2560: hyp=['THEY', 'WERE', 'SAFE', 'FROM', 'PROTECTION', 'RACKETEERS', 'THERE', 'NONE', 'BOTHERED', 'TO', 'COME', 'SO', 'FAR', 'OUT'] +8131-117016-0006-2561: ref=['THE', 'SHOPKEEPERS', 'AND', 'SOME', 'OF', 'THE', 'LESS', 'UNFORTUNATE', 'PEOPLE', 'THERE', 'HAD', 'PROTESTED', 'LOUD', 'ENOUGH', 'TO', 'REACH', 'CLEAR', 'BACK', 'TO', 'EARTH'] +8131-117016-0006-2561: hyp=['THE', 'SHOPKEEPERS', 'AND', 'SOME', 'OF', 'THE', 'LESS', 'UNFORTUNATE', 'PEOPLE', 'THERE', 'HAD', 'PROTESTED', 'LOUD', 'ENOUGH', 'TO', 'REACH', 'CLEAR', 'BACK', 'TO', 'EARTH'] +8131-117016-0007-2562: ref=['CAPTAIN', 'MURDOCH', 'WAS', 'AN', 'UNKNOWN', 'FACTOR', 'AND', 'NOW', 'WAS', 'ASKING', 'FOR', 'MORE', 'MEN'] +8131-117016-0007-2562: hyp=['CAPTAIN', 'MURDOCK', 'WAS', 'AN', 'UNKNOWN', 'FACTOR', 'AND', 'NOW', 'WAS', 'ASKING', 'FOR', 'MORE', 'MEN'] +8131-117016-0008-2563: ref=['THE', 'PRESSURE', 'WAS', 'ENOUGH', 'TO', 'GET', 'THEM', 'FOR', 'HIM'] +8131-117016-0008-2563: hyp=['THE', 'PRESSURE', 'WAS', 'ENOUGH', 'TO', 'GET', 'THEM', 'FOR', 'HIM'] +8131-117016-0009-2564: ref=['GORDON', 'REPORTED', 'FOR', 'WORK', 'WITH', 'A', 'SENSE', 'OF', 'THE', 'BOTTOM', 'FALLING', 'OUT', 'MIXED', 'WITH', 'A', 'VAGUE', 'RELIEF'] +8131-117016-0009-2564: hyp=['GORDON', 'REPORTED', 'FOR', 'WORK', 'WITH', 'A', 'SENSE', 'OF', 'THE', 'BOTTOM', 'FALLING', 'OUT', 'MIXED', 'WITH', 'A', 'VAGUE', 'RELIEF'] +8131-117016-0010-2565: ref=["I'VE", 'GOT', 'A', 'FREE', 'HAND', 'AND', "WE'RE", 'GOING', 'TO', 'RUN', 'THIS', 'THE', 'WAY', 'WE', 'WOULD', 'ON', 'EARTH'] +8131-117016-0010-2565: hyp=["I'VE", 'GOT', 'A', 'FREE', 'HAND', 'AND', "WE'RE", 'GOING', 'TO', 'RUN', 'THIS', 'THE', 'WAY', 'WE', 'WOULD', 'ON', 'EARTH'] +8131-117016-0011-2566: ref=['YOUR', 'JOB', 'IS', 'TO', 'PROTECT', 'THE', 'CITIZENS', 'HERE', 'AND', 'THAT', 'MEANS', 'EVERYONE', 'NOT', 'BREAKING', 'THE', 'LAWS', 'WHETHER', 'YOU', 'FEEL', 'LIKE', 'IT', 'OR', 'NOT', 'NO', 'GRAFT'] +8131-117016-0011-2566: hyp=['YOUR', 'JOB', 'IS', 'TO', 'PROTECT', 'THE', 'CITIZENS', 'HERE', 'AND', 'THAT', 'MEANS', 'EVERYONE', 'NOT', 'BREAKING', 'THE', 'LAWS', 'WHETHER', 'YOU', 'FEEL', 'LIKE', 'IT', 'OR', 'NOT', 'NO', 'GRAFT'] +8131-117016-0012-2567: ref=['THE', 'FIRST', 'MAN', 'MAKING', 'A', 'SHAKEDOWN', 'WILL', 'GET', 'THE', 'SAME', 'TREATMENT', "WE'RE", 'GOING', 'TO', 'USE', 'ON', 'THE', 'STONEWALL', 'BOYS', "YOU'LL", 'GET', 'DOUBLE', 'PAY', 'HERE', 'AND', 'YOU', 'CAN', 'LIVE', 'ON', 'IT'] +8131-117016-0012-2567: hyp=['THE', 'FIRST', 'MAN', 'MAKING', 'A', 'SHAKEDOWN', 'WILL', 'GET', 'THE', 'SAME', 'TREATMENT', "WE'RE", 'GOING', 'TO', 'USE', 'ON', 'THE', 'STONEWALL', 'BOYS', "YOU'LL", 'GET', 'DOUBLE', 'PAY', 'HERE', 'AND', 'YOU', 'CAN', 'LIVE', 'ON', 'IT'] +8131-117016-0013-2568: ref=['HE', 'PICKED', 'OUT', 'FIVE', 'OF', 'THE', 'MEN', 'INCLUDING', 'GORDON', 'YOU', 'FIVE', 'WILL', 'COME', 'WITH', 'ME'] +8131-117016-0013-2568: hyp=['HE', 'PICKED', 'OUT', 'FIVE', 'OF', 'THE', 'MEN', 'INCLUDING', 'GORDON', 'YOU', 'FIVE', 'WILL', 'COME', 'WITH', 'ME'] +8131-117016-0014-2569: ref=['THE', 'REST', 'OF', 'YOU', 'CAN', 'TEAM', 'UP', 'ANY', 'WAY', 'YOU', 'WANT', 'TONIGHT', 'PICK', 'ANY', 'ROUTE', "THAT'S", 'OPEN', 'OKAY', 'MEN', "LET'S", 'GO'] +8131-117016-0014-2569: hyp=['THE', 'REST', 'OF', 'YOU', 'CAN', 'TEAM', 'UP', 'ANY', 'WAY', 'YOU', 'WANT', 'TO', 'NIGHT', 'PICK', 'ANY', 'ROUTE', "THAT'S", 'OPEN', 'O', 'CAMEN', "LET'S", 'GO'] +8131-117016-0015-2570: ref=['BRUCE', 'GORDON', 'GRINNED', 'SLOWLY', 'AS', 'HE', 'SWUNG', 'THE', 'STICK', 'AND', "MURDOCH'S", 'EYES', 'FELL', 'ON', 'HIM', 'EARTH', 'COP'] +8131-117016-0015-2570: hyp=['BRUCE', 'GORDON', 'GRINNED', 'SLOWLY', 'AS', 'HE', 'SWUNG', 'THE', 'STICK', 'AND', "MURDOCK'S", 'EYES', 'FELL', 'ON', 'HIM', 'EARTH', 'COP'] +8131-117016-0016-2571: ref=['TWO', 'YEARS', 'GORDON', 'ADMITTED'] +8131-117016-0016-2571: hyp=['TWO', 'YEARS', 'GORDON', 'ADMITTED'] +8131-117016-0017-2572: ref=['FOR', 'A', 'SECOND', 'GORDON', 'CURSED', 'HIMSELF'] +8131-117016-0017-2572: hyp=['FOR', 'A', 'SECOND', 'GORDON', 'CURSED', 'HIMSELF'] +8131-117016-0018-2573: ref=['HE', 'BEGAN', 'WONDERING', 'ABOUT', 'SECURITY', 'THEN'] +8131-117016-0018-2573: hyp=['HE', 'BEGAN', 'WONDERING', 'ABOUT', 'SECURITY', 'THEN'] +8131-117016-0019-2574: ref=['NOBODY', 'HAD', 'TRIED', 'TO', 'GET', 'IN', 'TOUCH', 'WITH', 'HIM'] +8131-117016-0019-2574: hyp=['NOBODY', 'HAD', 'TRIED', 'TO', 'GET', 'IN', 'TOUCH', 'WITH', 'HIM'] +8131-117016-0020-2575: ref=['THERE', 'WAS', 'A', 'CRUDE', 'LIGHTING', 'SYSTEM', 'HERE', 'PUT', 'UP', 'BY', 'THE', 'CITIZENS', 'AT', 'THE', 'FRONT', 'OF', 'EACH', 'BUILDING', 'A', 'DIM', 'PHOSPHOR', 'BULB', 'GLOWED', 'WHEN', 'DARKNESS', 'FELL', 'THEY', 'WOULD', 'HAVE', 'NOTHING', 'ELSE', 'TO', 'SEE', 'BY'] +8131-117016-0020-2575: hyp=['THERE', 'WAS', 'A', 'CRUDE', 'LIGHTING', 'SYSTEM', 'HERE', 'PUT', 'UP', 'BY', 'THE', 'CITIZENS', 'AT', 'THE', 'FRONT', 'OF', 'EACH', 'BUILDING', 'A', 'DIM', 'PHOSPHORIBULB', 'GLOWED', 'WHEN', 'DARKNESS', 'FELL', 'THEY', 'WOULD', 'HAVE', 'NOTHING', 'ELSE', 'TO', 'SEE', 'BY'] +8131-117016-0021-2576: ref=['MOVING', 'IN', 'TWO', 'GROUPS', 'OF', 'THREES', 'AT', 'OPPOSITE', 'SIDES', 'OF', 'THE', 'STREET', 'THEY', 'BEGAN', 'THEIR', 'BEAT'] +8131-117016-0021-2576: hyp=['MOVING', 'IN', 'TWO', 'GROUPS', 'OF', 'THREES', 'AT', 'OPPOSITE', 'SIDES', 'OF', 'THE', 'STREET', 'THEY', 'BEGAN', 'THEIR', 'BEAT'] +8131-117016-0022-2577: ref=['THERE', 'WAS', 'NO', 'CHANCE', 'TO', 'SAVE', 'THE', 'CITIZEN', 'WHO', 'WAS', 'DYING', 'FROM', 'LACK', 'OF', 'AIR'] +8131-117016-0022-2577: hyp=['THERE', 'WAS', 'NO', 'CHANCE', 'TO', 'SAVE', 'THE', 'CITIZEN', 'WHO', 'WAS', 'DYING', 'FROM', 'LACK', 'OF', 'AIR'] +8131-117016-0023-2578: ref=['GORDON', 'FELT', 'THE', 'SOLID', 'PLEASURE', 'OF', 'THE', 'FINELY', 'TURNED', 'CLUB', 'IN', 'HIS', 'HANDS'] +8131-117016-0023-2578: hyp=['GORDON', 'FELT', 'THE', 'SOLID', 'PLEASURE', 'OF', 'THE', 'FINELY', 'TURNED', 'CLUB', 'IN', 'HIS', 'HANDS'] +8131-117016-0024-2579: ref=["GORDON'S", 'EYES', 'POPPED', 'AT', 'THAT'] +8131-117016-0024-2579: hyp=["GORDON'S", 'EYES', 'POPPED', 'AT', 'THAT'] +8131-117016-0025-2580: ref=['HE', 'SWALLOWED', 'THE', 'SENTIMENT', 'HIS', 'OWN', 'CLUB', 'WAS', 'MOVING', 'NOW'] +8131-117016-0025-2580: hyp=['HE', 'SWALLOWED', 'THE', 'SENTIMENT', 'HIS', 'OWN', 'CLUB', 'WAS', 'MOVING', 'NOW'] +8131-117016-0026-2581: ref=['THE', 'OTHER', 'FOUR', 'COPS', 'HAD', 'COME', 'IN', 'RELUCTANTLY'] +8131-117016-0026-2581: hyp=['THE', 'OTHER', 'FOUR', 'COPS', 'HAD', 'COME', 'IN', 'RELUCTANTLY'] +8131-117016-0027-2582: ref=['HE', 'BROUGHT', 'HIM', 'TO', 'THE', 'GROUND', 'WITH', 'A', 'SINGLE', 'BLOW', 'ACROSS', 'THE', 'KIDNEYS'] +8131-117016-0027-2582: hyp=['HE', 'BROUGHT', 'HIM', 'TO', 'THE', 'GROUND', 'WITH', 'A', 'SINGLE', 'BLOW', 'ACROSS', 'THE', 'KIDNEYS'] +8131-117016-0028-2583: ref=['THEY', 'ROUNDED', 'UP', 'THE', 'MEN', 'OF', 'THE', 'GANG', 'AND', 'ONE', 'OF', 'THE', 'COPS', 'STARTED', 'OFF'] +8131-117016-0028-2583: hyp=['THEY', 'ROUNDED', 'UP', 'THE', 'MEN', 'OF', 'THE', 'GANG', 'AND', 'ONE', 'OF', 'THE', 'COUPS', 'STARTED', 'OFF'] +8131-117016-0029-2584: ref=['TO', 'FIND', 'A', 'PHONE', 'AND', 'CALL', 'THE', 'WAGON'] +8131-117016-0029-2584: hyp=['TO', 'FIND', 'A', 'PHONE', 'AND', 'CALL', 'THE', 'WAGON'] +8131-117016-0030-2585: ref=["WE'RE", 'NOT', 'USING', 'WAGONS', 'MURDOCH', 'TOLD', 'HIM', 'LINE', 'THEM', 'UP'] +8131-117016-0030-2585: hyp=["WE'RE", 'NOT', 'USING', 'WAGONS', 'MURDOCK', 'TOLD', 'HIM', 'LINE', 'THEM', 'UP'] +8131-117016-0031-2586: ref=['IF', 'THEY', 'TRIED', 'TO', 'RUN', 'THEY', 'WERE', 'HIT', 'FROM', 'BEHIND', 'IF', 'THEY', 'STOOD', 'STILL', 'THEY', 'WERE', 'CLUBBED', 'CAREFULLY'] +8131-117016-0031-2586: hyp=['IF', 'THEY', 'TRIED', 'TO', 'RUN', 'THEY', 'WERE', 'HIT', 'FROM', 'BEHIND', 'IF', 'THEY', 'STOOD', 'STILL', 'THEY', 'WERE', 'CLUBBED', 'CAREFULLY'] +8131-117016-0032-2587: ref=['MURDOCH', 'INDICATED', 'ONE', 'WHO', 'STOOD', 'WITH', 'HIS', 'SHOULDERS', 'SHAKING', 'AND', 'TEARS', 'RUNNING', 'DOWN', 'HIS', 'CHEEKS'] +8131-117016-0032-2587: hyp=['MURDOCK', 'INDICATED', 'ONE', 'WHO', 'STOOD', 'WITH', 'HIS', 'SHOULDERS', 'SHAKING', 'AND', 'TEARS', 'RUNNING', 'DOWN', 'HIS', 'CHEEKS'] +8131-117016-0033-2588: ref=['THE', "CAPTAIN'S", 'FACE', 'WAS', 'AS', 'SICK', 'AS', 'GORDON', 'FELT'] +8131-117016-0033-2588: hyp=['THE', "CAPTAIN'S", 'FACE', 'WAS', 'AS', 'SICK', 'AS', "GORDON'S", 'FELT'] +8131-117016-0034-2589: ref=['I', 'WANT', 'THE', 'NAME', 'OF', 'EVERY', 'MAN', 'IN', 'THE', 'GANG', 'YOU', 'CAN', 'REMEMBER', 'HE', 'TOLD', 'THE', 'MAN'] +8131-117016-0034-2589: hyp=['I', 'WANT', 'THE', 'NAME', 'OF', 'EVERY', 'MAN', 'IN', 'THE', 'GANG', 'YOU', 'CAN', 'REMEMBER', 'HE', 'TOLD', 'THE', 'MAN'] +8131-117016-0035-2590: ref=['COLONEL', "THEY'D", 'KILL', 'ME', 'I', "DON'T", 'KNOW'] +8131-117016-0035-2590: hyp=['COLONEL', "THEY'D", 'KILL', 'ME', 'I', "DON'T", 'KNOW'] +8131-117016-0036-2591: ref=['MURDOCH', 'TOOK', 'HIS', 'NOD', 'AS', 'EVIDENCE', 'ENOUGH', 'AND', 'TURNED', 'TO', 'THE', 'WRETCHED', 'TOUGHS'] +8131-117016-0036-2591: hyp=['MURDOCK', 'TOOK', 'HIS', 'NOD', 'AS', 'EVIDENCE', 'ENOUGH', 'AND', 'TURNED', 'TO', 'THE', 'WRETCHED', 'TOUGHS'] +8131-117016-0037-2592: ref=['IF', 'HE', 'SHOULD', 'TURN', 'UP', 'DEAD', "I'LL", 'KNOW', 'YOU', 'BOYS', 'ARE', 'RESPONSIBLE', 'AND', "I'LL", 'FIND', 'YOU'] +8131-117016-0037-2592: hyp=['IF', 'HE', 'SHOULD', 'TURN', 'UP', 'DEAD', "I'LL", 'KNOW', 'YOU', 'BOYS', 'ARE', 'RESPONSIBLE', 'AND', "I'LL", 'FIND', 'YOU'] +8131-117016-0038-2593: ref=['TROUBLE', 'BEGAN', 'BREWING', 'SHORTLY', 'AFTER', 'THOUGH'] +8131-117016-0038-2593: hyp=['TROUBLE', 'BEGAN', 'BREWING', 'SHORTLY', 'AFTER', 'THOUGH'] +8131-117016-0039-2594: ref=['MURDOCH', 'SENT', 'ONE', 'OF', 'THE', 'MEN', 'TO', 'PICK', 'UP', 'A', 'SECOND', 'SQUAD', 'OF', 'SIX', 'AND', 'THEN', 'A', 'THIRD'] +8131-117016-0039-2594: hyp=['MURDOCK', 'SENT', 'ONE', 'OF', 'THE', 'MEN', 'TO', 'PICK', 'UP', 'A', 'SECOND', 'SQUAD', 'OF', 'SIX', 'AND', 'THEN', 'A', 'THIRD'] +8131-117016-0040-2595: ref=['IN', 'THE', 'THIRD', 'ONE', 'BRUCE', 'GORDON', 'SPOTTED', 'ONE', 'OF', 'THE', 'MEN', "WHO'D", 'BEEN', 'BEATEN', 'BEFORE'] +8131-117016-0040-2595: hyp=['IN', 'THE', 'THIRD', 'ONE', 'BRUCE', 'GORDON', 'SPOTTED', 'ONE', 'OF', 'THE', 'MEN', 'WHO', 'HAD', 'BEEN', 'BEATEN', 'BEFORE'] +8131-117016-0041-2596: ref=['GET', 'A', 'STRETCHER', 'AND', 'TAKE', 'HIM', 'WHEREVER', 'HE', 'BELONGS', 'HE', 'ORDERED'] +8131-117016-0041-2596: hyp=['GET', 'A', 'STRETCHER', 'AND', 'TAKE', 'HIM', 'WHEREVER', 'HE', 'BELONGS', 'HE', 'ORDERED'] +8131-117016-0042-2597: ref=['BUT', 'THE', 'CAPTAIN', 'STIRRED', 'FINALLY', 'SIGHING'] +8131-117016-0042-2597: hyp=['BUT', 'THE', 'CAPTAIN', 'STIRRED', 'FINALLY', 'SIGHING'] +8131-117016-0043-2598: ref=['NO', 'THE', 'COPS', "THEY'RE", 'GIVING', 'ME', "WE'RE", 'COVERED', 'GORDON'] +8131-117016-0043-2598: hyp=['NO', 'THE', 'COPSE', "THEY'RE", 'GIVING', 'ME', "WE'RE", 'COVERED', 'GORDON'] +8131-117016-0044-2599: ref=['BUT', 'THE', 'STONEWALL', 'GANG', 'IS', 'BACKING', 'WAYNE'] +8131-117016-0044-2599: hyp=['BUT', 'THE', 'STERNWALL', 'GANG', 'IS', 'BACKING', 'WAIN'] +8131-117016-0045-2600: ref=['BUT', "IT'S", 'GOING', 'TO', 'BE', 'TOUGH', 'ON', 'THEM'] +8131-117016-0045-2600: hyp=['BUT', "IT'S", 'GOING', 'TO', 'BE', 'TOUGH', 'ON', 'THEM'] +8131-117016-0046-2601: ref=['BRUCE', 'GORDON', 'GRIMACED', "I'VE", 'GOT', 'A', 'YELLOW', 'TICKET', 'FROM', 'SECURITY'] +8131-117016-0046-2601: hyp=['BRUCE', 'GORDON', 'GRIMACED', "I'VE", 'GOT', 'A', 'YELLOW', 'TICKET', 'FROM', 'SECURITY'] +8131-117016-0047-2602: ref=['MURDOCH', 'BLINKED', 'HE', 'DROPPED', 'HIS', 'EYES', 'SLOWLY'] +8131-117016-0047-2602: hyp=['MURDOCK', 'BLINKED', 'HE', 'DROPPED', 'HIS', 'EYES', 'SLOWLY'] +8131-117016-0048-2603: ref=['WHAT', 'MAKES', 'YOU', 'THINK', 'WAYNE', 'WILL', 'BE', 'RE', 'ELECTED'] +8131-117016-0048-2603: hyp=['WHAT', 'MAKES', 'YOU', 'THINK', 'WAIN', 'WILL', 'BE', 'RE', 'ELECTED'] +8131-117016-0049-2604: ref=['NOBODY', 'WANTS', 'HIM', 'EXCEPT', 'A', 'GANG', 'OF', 'CROOKS', 'AND', 'THOSE', 'IN', 'POWER'] +8131-117016-0049-2604: hyp=['NOBODY', 'WANTS', 'HIM', 'EXCEPT', 'A', 'GANG', 'OF', 'CROOKS', 'AND', 'THOSE', 'IN', 'POWER'] +8131-117016-0050-2605: ref=['EVER', 'SEE', 'A', 'MARTIAN', 'ELECTION'] +8131-117016-0050-2605: hyp=['EVER', 'SEE', 'A', 'MARTIAN', 'ELECTION'] +8131-117016-0051-2606: ref=['NO', "YOU'RE", 'A', 'FIRSTER', 'HE', "CAN'T", 'LOSE'] +8131-117016-0051-2606: hyp=['NO', 'YOU', 'ARE', 'A', 'FIRSTER', 'HE', "CAN'T", 'LOSE'] +8131-117016-0052-2607: ref=['AND', 'THEN', 'HELL', 'IS', 'GOING', 'TO', 'POP', 'AND', 'THIS', 'WHOLE', 'PLANET', 'MAY', 'BE', 'BLOWN', 'WIDE', 'OPEN'] +8131-117016-0052-2607: hyp=['AND', 'THEN', 'HELL', 'IS', 'GOING', 'TO', 'POP', 'AND', 'THIS', 'WHOLE', 'PLANET', 'MAY', 'BE', 'BLOWN', 'WIDE', 'OPEN'] +8131-117016-0053-2608: ref=['IT', 'FITTED', 'WITH', 'THE', 'DIRE', 'PREDICTIONS', 'OF', 'SECURITY', 'AND', 'WITH', 'THE', 'SPYING', 'GORDON', 'WAS', 'GOING', 'TO', 'DO', 'ACCORDING', 'TO', 'THEM'] +8131-117016-0053-2608: hyp=['IT', 'FITTED', 'WITH', 'THE', 'DIRE', 'PREDICTIONS', 'OF', 'SECURITY', 'AND', 'WHAT', 'THE', 'SPYING', 'GORDON', 'WAS', 'GOING', 'TO', 'DO', 'ACCORDING', 'TO', 'THEM'] +8131-117016-0054-2609: ref=['HE', 'WAS', 'GETTING', 'EVEN', 'FATTER', 'NOW', 'THAT', 'HE', 'WAS', 'EATING', 'BETTER', 'FOOD', 'FROM', 'THE', 'FAIR', 'RESTAURANT', 'AROUND', 'THE', 'CORNER'] +8131-117016-0054-2609: hyp=['HE', 'WAS', 'GETTING', 'EVEN', 'FATTER', 'NOW', 'THAT', 'HE', 'WAS', 'EATING', 'BETTER', 'FOOD', 'FROM', 'THE', 'FAIR', 'RESTAURANT', 'AROUND', 'THE', 'CORNER'] +8131-117016-0055-2610: ref=['COST', 'EM', 'MORE', 'BUT', "THEY'D", 'BE', 'RESPECTABLE'] +8131-117016-0055-2610: hyp=['COST', 'EM', 'MORE', 'BUT', "THEY'D", 'BE', 'RESPECTABLE'] +8131-117016-0056-2611: ref=['BECAUSE', 'IZZY', 'IS', 'ALWAYS', 'HONEST', 'ACCORDING', 'TO', 'HOW', 'HE', 'SEES', 'IT'] +8131-117016-0056-2611: hyp=['BECAUSE', 'ISSY', 'IS', 'ALWAYS', 'HONEST', 'ACCORDING', 'TO', 'HOW', 'HE', 'SEES', 'IT'] +8131-117016-0057-2612: ref=['BUT', 'YOU', 'GOT', 'EARTH', 'IDEAS', 'OF', 'THE', 'STUFF', 'LIKE', 'I', 'HAD', 'ONCE'] +8131-117016-0057-2612: hyp=['BUT', 'YOU', 'GOT', 'EARTH', 'IDEAS', 'OF', 'THE', 'STUFF', 'LIKE', 'I', 'HAD', 'ONCE'] +8131-117016-0058-2613: ref=['THE', 'GROUPS', 'GREW', 'MORE', 'EXPERIENCED', 'AND', 'MURDOCH', 'WAS', 'TRAINING', 'A', 'NEW', 'SQUAD', 'EVERY', 'NIGHT'] +8131-117016-0058-2613: hyp=['THE', 'GROUPS', 'GREW', 'MORE', 'EXPERIENCED', 'AND', 'MURDOCK', 'WAS', 'TRAINING', 'A', 'NEW', 'SQUAD', 'EVERY', 'NIGHT'] +8131-117016-0059-2614: ref=['IT', "WASN'T", 'EXACTLY', 'LEGAL', 'BUT', 'NOTHING', 'WAS', 'HERE'] +8131-117016-0059-2614: hyp=['IT', "WASN'T", 'EXACTLY', 'LEGAL', 'BUT', 'NOTHING', 'WAS', 'HERE'] +8131-117016-0060-2615: ref=['THIS', 'COULD', 'LEAD', 'TO', 'ABUSES', 'AS', "HE'D", 'SEEN', 'ON', 'EARTH'] +8131-117016-0060-2615: hyp=['THIS', 'COULD', 'LEAD', 'TO', 'ABUSES', 'AS', "HE'D", 'SEEN', 'ON', 'EARTH'] +8131-117016-0061-2616: ref=['BUT', 'THERE', 'PROBABLY', "WOULDN'T", 'BE', 'TIME', 'FOR', 'IT', 'IF', 'MAYOR', 'WAYNE', 'WAS', 'RE', 'ELECTED'] +8131-117016-0061-2616: hyp=['BUT', 'THERE', 'PROBABLY', "WOULDN'T", 'BE', 'TIME', 'FOR', 'IT', 'IF', 'MAYOR', 'WAIN', 'WAS', 'RE', 'ELECTED'] +8131-117017-0000-2617: ref=['IT', 'WAS', 'NIGHT', 'OUTSIDE', 'AND', 'THE', 'PHOSPHOR', 'BULBS', 'AT', 'THE', 'CORNERS', 'GLOWED', 'DIMLY', 'GIVING', 'HIM', 'BARELY', 'ENOUGH', 'LIGHT', 'BY', 'WHICH', 'TO', 'LOCATE', 'THE', 'WAY', 'TO', 'THE', 'EXTEMPORIZED', 'PRECINCT', 'HOUSE'] +8131-117017-0000-2617: hyp=['IT', 'WAS', 'NIGHT', 'OUTSIDE', 'AND', 'THE', 'PHOSPHORE', 'BOBS', 'AT', 'THE', 'CORNERS', 'GLOWED', 'DIMLY', 'GIVING', 'HIM', 'BARELY', 'ENOUGH', 'LIGHT', 'BY', 'WHICH', 'TO', 'LOCATE', 'THE', 'WAY', 'TO', 'THE', 'EXTEMPORISED', 'PRECINCT', 'HOUSE'] +8131-117017-0001-2618: ref=['IT', 'HAD', 'PROBABLY', 'BEEN', 'YEARS', 'SINCE', 'ANY', 'HAD', 'DARED', 'RISK', 'IT', 'AFTER', 'THE', 'SUN', 'WENT', 'DOWN'] +8131-117017-0001-2618: hyp=['IT', 'HAD', 'PROBABLY', 'BEEN', 'YEARS', 'SINCE', 'ANY', 'HAD', 'DARED', 'RISK', 'IT', 'AFTER', 'THE', 'SUN', 'WENT', 'DOWN'] +8131-117017-0002-2619: ref=['AND', 'THE', 'SLOW', 'DOUBTFUL', 'RESPECT', 'ON', 'THE', 'FACES', 'OF', 'THE', 'CITIZENS', 'AS', 'THEY', 'NODDED', 'TO', 'HIM', 'WAS', 'EVEN', 'MORE', 'PROOF', 'THAT', "HALEY'S", 'SYSTEM', 'WAS', 'WORKING'] +8131-117017-0002-2619: hyp=['AND', 'THE', 'SLOW', 'DOUBTFUL', 'RESPECT', 'ON', 'THE', 'FACES', 'OF', 'THE', 'CITIZENS', 'AS', 'THEY', 'NODDED', 'TO', 'HIM', 'WAS', 'EVEN', 'MORE', 'PROOF', 'THAT', "HALEY'S", 'SYSTEM', 'WAS', 'WORKING'] +8131-117017-0003-2620: ref=['GORDON', 'HIT', 'THE', 'SIGNAL', 'SWITCH', 'AND', 'THE', 'MARSPEAKER', 'LET', 'OUT', 'A', 'SHRILL', 'WHISTLE'] +8131-117017-0003-2620: hyp=['GORDON', 'HIT', 'THE', 'SIGNAL', 'SWITCH', 'AND', 'THE', 'MAR', 'SPEAKER', 'LET', 'OUT', 'A', 'SHRILL', 'WHISTLE'] +8131-117017-0004-2621: ref=['GUNS', 'SUDDENLY', 'SEEMED', 'TO', 'BE', 'FLOURISHING', 'EVERYWHERE'] +8131-117017-0004-2621: hyp=['GUNS', 'SUDDENLY', 'SEEMED', 'TO', 'BE', 'FLOURISHING', 'EVERYWHERE'] +8131-117017-0005-2622: ref=['YOU', "CAN'T", 'DO', 'IT', 'TO', 'ME'] +8131-117017-0005-2622: hyp=['YOU', "CAN'T", 'DO', 'IT', 'TO', 'ME'] +8131-117017-0006-2623: ref=["I'M", 'REFORMED', "I'M", 'GOING', 'STRAIGHT'] +8131-117017-0006-2623: hyp=["I'M", 'REFORMED', "I'M", 'GOING', 'STRAIGHT'] +8131-117017-0007-2624: ref=['YOU', 'DAMNED', 'COPS', "CAN'T", "O'NEILL", 'WAS', 'BLUBBERING'] +8131-117017-0007-2624: hyp=['YOU', 'DAMNED', 'COPS', "CAN'T", "O'NEIL", 'WAS', 'BLUBBERING'] +8131-117017-0008-2625: ref=['ONE', 'LOOK', 'WAS', 'ENOUGH', 'THE', 'WORK', 'PAPERS', 'HAD', 'THE', 'TELLTALE', 'OVER', 'THICKENING', 'OF', 'THE', 'SIGNATURE', 'THAT', 'HAD', 'SHOWED', 'UP', 'ON', 'OTHER', 'PAPERS', 'OBVIOUSLY', 'FORGERIES'] +8131-117017-0008-2625: hyp=['ONE', 'LOOK', 'WAS', 'ENOUGH', 'THE', 'WORK', 'PAPERS', 'HAD', 'THE', 'TELL', 'TALE', 'OVER', 'THICKENING', 'OF', 'THE', 'SIGNATURE', 'THAT', 'HAD', 'SHOWED', 'UP', 'ON', 'OTHER', 'PAPERS', 'OBVIOUSLY', 'FORGERIES'] +8131-117017-0009-2626: ref=['SOME', 'TURNED', 'AWAY', 'AS', 'GORDON', 'AND', 'THE', 'OTHER', 'COP', 'WENT', 'TO', 'WORK', 'BUT', 'MOST', 'OF', 'THEM', "WEREN'T", 'SQUEAMISH'] +8131-117017-0009-2626: hyp=['SOME', 'TURNED', 'AWAY', 'AS', 'GORDON', 'AND', 'THE', 'OTHER', 'COP', 'WENT', 'TO', 'WORK', 'BUT', 'MOST', 'OF', 'THEM', "WEREN'T", 'SQUEAMISH'] +8131-117017-0010-2627: ref=['WHEN', 'IT', 'WAS', 'OVER', 'THE', 'TWO', 'PICKED', 'UP', 'THEIR', 'WHIMPERING', 'CAPTIVE'] +8131-117017-0010-2627: hyp=['WHEN', 'IT', 'WAS', 'OVER', 'THE', 'TWO', 'PICKED', 'UP', 'THEIR', 'WHIMPERING', 'CAPTIVE'] +8131-117017-0011-2628: ref=['JENKINS', 'THE', 'OTHER', 'COP', 'HAD', 'BEEN', 'HOLDING', 'THE', 'WALLET'] +8131-117017-0011-2628: hyp=['JENKINS', 'THE', 'OTHER', 'COP', 'HAD', 'BEEN', 'HOLDING', 'THE', 'WALLET'] +8131-117017-0012-2629: ref=['MUST', 'OF', 'BEEN', 'MAKING', 'A', 'BIG', 'CONTACT', 'IN', 'SOMETHING', 'FIFTY', 'FIFTY'] +8131-117017-0012-2629: hyp=['MUST', 'HAVE', 'BEEN', 'MAKING', 'A', 'BIG', 'CONTACT', 'IN', 'SOMETHING', 'FIFTY', 'FIFTY'] +8131-117017-0013-2630: ref=['THERE', 'MUST', 'HAVE', 'BEEN', 'OVER', 'TWO', 'THOUSAND', 'CREDITS', 'IN', 'THE', 'WALLET'] +8131-117017-0013-2630: hyp=['THERE', 'MUST', 'HAVE', 'BEEN', 'OVER', 'TWO', 'THOUSAND', 'CREDITS', 'IN', 'THE', 'WALLET'] +8131-117017-0014-2631: ref=['WHEN', 'GORDON', 'AND', 'JENKINS', 'CAME', 'BACK', 'MURDOCH', 'TOSSED', 'THE', 'MONEY', 'TO', 'THEM', 'SPLIT', 'IT'] +8131-117017-0014-2631: hyp=['WHEN', 'GORDON', 'AND', 'JENKINS', 'CAME', 'BACK', 'MURDOCK', 'TOSSED', 'THE', 'MONEY', 'TO', 'THEM', 'SPLIT', 'IT'] +8131-117017-0015-2632: ref=['WHATEVER', 'COMES', 'TO', 'HAND', "GOV'NOR"] +8131-117017-0015-2632: hyp=['WHATEVER', 'COMES', 'TO', 'HAND', "GUV'NOR"] +8131-117017-0016-2633: ref=['LIKE', 'THIS', 'SOCIAL', 'CALL', 'GORDON', 'ASKED', 'HIM'] +8131-117017-0016-2633: hyp=['LIKE', 'THIS', 'SOCIAL', 'CALL', 'GORDON', 'ASKED', 'HIM'] +8131-117017-0017-2634: ref=['THE', 'LITTLE', 'MAN', 'SHOOK', 'HIS', 'HEAD', 'HIS', 'ANCIENT', 'EIGHTEEN', 'YEAR', 'OLD', 'FACE', 'TURNING', 'SOBER', 'NOPE'] +8131-117017-0017-2634: hyp=['THE', 'LITTLE', 'MAN', 'SHOOK', 'HIS', 'HEAD', 'HIS', 'ANCIENT', 'EIGHTEEN', 'YEAR', 'OLD', 'FACE', 'TURNING', 'SOBER', 'NOPE'] +8131-117017-0018-2635: ref=['YOU', 'OWE', 'ME', 'SOME', 'BILLS', "GOV'NOR"] +8131-117017-0018-2635: hyp=['YOU', 'OWE', 'ME', 'SOME', 'BILLS', "GUV'NOR"] +8131-117017-0019-2636: ref=['ELEVEN', 'HUNDRED', 'FIFTY', 'CREDITS'] +8131-117017-0019-2636: hyp=['ELEVEN', 'HUNDRED', 'FIFTY', 'CREDITS'] +8131-117017-0020-2637: ref=['YOU', "DIDN'T", 'PAY', 'UP', 'YOUR', 'PLEDGE', 'TO', 'THE', 'CAMPAIGN', 'FUND', 'SO', 'I', 'HADDA', 'FILL', 'IN'] +8131-117017-0020-2637: hyp=['YOU', "DIDN'T", 'PAY', 'UP', 'YOUR', 'PLEDGE', 'TO', 'THE', 'CAPTAIN', 'FUND', 'SO', 'I', 'HAD', 'A', 'FILL', 'IN'] +8131-117017-0021-2638: ref=['A', 'THOUSAND', 'INTEREST', 'AT', 'TEN', 'PER', 'CENT', 'A', 'WEEK', 'STANDARD', 'RIGHT'] +8131-117017-0021-2638: hyp=['A', 'THOUSAND', 'INTEREST', 'AT', 'TEN', 'PER', 'CENT', 'A', 'WEEK', 'STANDARD', 'RIGHT'] +8131-117017-0022-2639: ref=['GORDON', 'HAD', 'HEARD', 'OF', 'THE', 'FRIENDLY', 'INTEREST', 'CHARGED', 'ON', 'THE', 'SIDE', 'HERE', 'BUT', 'HE', 'SHOOK', 'HIS', 'HEAD', 'WRONG', 'IZZY'] +8131-117017-0022-2639: hyp=['GORDON', 'HAD', 'HEARD', 'OF', 'THE', 'FRIENDLY', 'INTEREST', 'CHARGED', 'ON', 'THE', 'SIDE', 'HERE', 'BUT', 'HE', 'SHOOK', 'HIS', 'HEAD', 'WRONG', 'IS', 'HE'] +8131-117017-0023-2640: ref=['HUH', 'IZZY', 'TURNED', 'IT', 'OVER', 'AND', 'SHOOK', 'HIS', 'HEAD'] +8131-117017-0023-2640: hyp=['HA', 'AS', 'HE', 'TURNED', 'IT', 'OVER', 'AND', 'SHOOK', 'HIS', 'HEAD'] +8131-117017-0024-2641: ref=['NOW', 'SHOW', 'ME', 'WHERE', 'I', 'SIGNED', 'ANY', 'AGREEMENT', 'SAYING', "I'D", 'PAY', 'YOU', 'BACK'] +8131-117017-0024-2641: hyp=['NOW', 'SHOW', 'ME', 'WHERE', 'I', 'SIGNED', 'ANY', 'AGREEMENT', 'SAYING', "I'D", 'PAY', 'YOU', 'BACK'] +8131-117017-0025-2642: ref=['FOR', 'A', 'SECOND', "IZZY'S", 'FACE', 'WENT', 'BLANK', 'THEN', 'HE', 'CHUCKLED'] +8131-117017-0025-2642: hyp=['FOR', 'A', 'SECOND', "ISEY'S", 'FACE', 'WENT', 'BLANK', 'THEN', 'HE', 'CHUCKLED'] +8131-117017-0026-2643: ref=['HE', 'PULLED', 'OUT', 'THE', 'BILLS', 'AND', 'HANDED', 'THEM', 'OVER'] +8131-117017-0026-2643: hyp=['HE', 'PULLED', 'OUT', 'THE', 'BILLS', 'AND', 'HANDED', 'THEM', 'OVER'] +8131-117017-0027-2644: ref=['THANKS', 'IZZY', 'THANKS', 'YOURSELF'] +8131-117017-0027-2644: hyp=['THANKS', 'IZZY', 'THANKS', 'YOURSELF'] +8131-117017-0028-2645: ref=['THE', 'KID', 'POCKETED', 'THE', 'MONEY', 'CHEERFULLY', 'NODDING'] +8131-117017-0028-2645: hyp=['THE', 'KID', 'POCKETED', 'THE', 'MONEY', 'CHEERFULLY', 'NODDING'] +8131-117017-0029-2646: ref=['THE', 'LITTLE', 'GUY', 'KNEW', 'MARS', 'AS', 'FEW', 'OTHERS', 'DID', 'APPARENTLY', 'FROM', 'ALL', 'SIDES'] +8131-117017-0029-2646: hyp=['THE', 'LITTLE', 'GUY', 'KNEW', 'MARS', 'AS', 'FEW', 'OTHERS', 'DID', 'APPARENTLY', 'FROM', 'ALL', 'SIDES'] +8131-117017-0030-2647: ref=['AND', 'IF', 'ANY', 'OF', 'THE', 'OTHER', 'COPS', 'HAD', 'PRIVATE', 'RACKETS', 'OF', 'THEIR', 'OWN', 'IZZY', 'WAS', 'UNDOUBTEDLY', 'THE', 'MAN', 'TO', 'FIND', 'IT', 'OUT', 'AND', 'USE', 'THE', 'INFORMATION', 'WITH', 'A', 'BEAT', 'SUCH', 'AS', 'THAT', 'EVEN', 'GOING', 'HALVES', 'AND', 'WITH', 'ALL', 'THE', 'GRAFT', 'TO', 'THE', 'UPPER', 'BRACKETS', "HE'D", 'STILL', 'BE', 'ABLE', 'TO', 'MAKE', 'HIS', 'PILE', 'IN', 'A', 'MATTER', 'OF', 'MONTHS'] +8131-117017-0030-2647: hyp=['AND', 'IF', 'ANY', 'OF', 'THE', 'OTHER', 'COPS', 'HAD', 'PRIVATE', 'RACKETS', 'OF', 'THEIR', 'OWN', 'ISSEY', 'WAS', 'UNDOUBTEDLY', 'THE', 'MAN', 'TO', 'FIND', 'IT', 'OUT', 'AND', 'USE', 'THE', 'INFORMATION', 'WITH', 'A', 'BEAT', 'SUCH', 'AS', 'THAT', 'EVEN', 'GOING', 'HALVES', 'AND', 'WITH', 'ALL', 'THE', 'GRAFT', 'TO', 'THE', 'UPPER', 'BRACKETS', "HE'D", 'STILL', 'BE', 'ABLE', 'TO', 'MAKE', 'HIS', 'PILE', 'IN', 'A', 'MATTER', 'OF', 'MONTHS'] +8131-117017-0031-2648: ref=['THE', 'CAPTAIN', 'LOOKED', 'COMPLETELY', 'BEATEN', 'AS', 'HE', 'CAME', 'INTO', 'THE', 'ROOM', 'AND', 'DROPPED', 'ONTO', 'THE', 'BENCH'] +8131-117017-0031-2648: hyp=['THE', 'CAPTAIN', 'LOOKED', 'COMPLETELY', 'BEATEN', 'AS', 'HE', 'CAME', 'INTO', 'THE', 'ROOM', 'AND', 'DROPPED', 'ON', 'THE', 'BENCH'] +8131-117017-0032-2649: ref=['GO', 'ON', 'ACCEPT', 'DAMN', 'IT'] +8131-117017-0032-2649: hyp=['GO', 'ON', 'EXCEPT', 'DAMN', 'IT'] +8131-117029-0000-2650: ref=['THERE', 'WAS', 'A', 'MAN', 'COMING', 'FROM', 'EARTH', 'ON', 'A', 'SECOND', 'SHIP', 'WHO', 'WOULD', 'SEE', 'HIM'] +8131-117029-0000-2650: hyp=['THERE', 'WAS', 'A', 'MAN', 'COMING', 'FROM', 'EARTH', 'ON', 'A', 'SECOND', 'SHIP', 'WHO', 'WOULD', 'SEE', 'HIM'] +8131-117029-0001-2651: ref=['THE', 'LITTLE', 'PUBLISHER', 'WAS', 'BACK', 'AT', 'THE', 'CRUSADER', 'AGAIN'] +8131-117029-0001-2651: hyp=['THE', 'LITTLE', 'PUBLISHER', 'WAS', 'BACK', 'AT', 'THE', 'CRUSADER', 'AGAIN'] +8131-117029-0002-2652: ref=['ONLY', 'GORDON', 'AND', 'SHEILA', 'WERE', 'LEFT'] +8131-117029-0002-2652: hyp=['ONLY', 'GORDON', 'AND', 'SHEILA', 'WERE', 'LEFT'] +8131-117029-0003-2653: ref=['CREDIT', 'HAD', 'BEEN', 'ESTABLISHED', 'AGAIN', 'AND', 'THE', 'BUSINESSES', 'WERE', 'OPEN'] +8131-117029-0003-2653: hyp=['CREDIT', 'HAD', 'BEEN', 'ESTABLISHED', 'AGAIN', 'AND', 'THE', 'BUSINESSES', 'WERE', 'OPEN'] +8131-117029-0004-2654: ref=['GORDON', 'CAME', 'TO', 'A', 'ROW', 'OF', 'TEMPORARY', 'BUBBLES', 'INDIVIDUAL', 'DWELLINGS', 'BUILT', 'LIKE', 'THE', 'DOME', 'BUT', 'OPAQUE', 'FOR', 'PRIVACY'] +8131-117029-0004-2654: hyp=['GORDON', 'CAME', 'TO', 'A', 'ROW', 'OF', 'TEMPORARY', 'BUBBLES', 'INDIVIDUAL', 'DWELLINGS', 'BUILT', 'LIKE', 'THE', 'DOME', 'BUT', 'OPAQUE', 'FOR', 'PRIVACY'] +8131-117029-0005-2655: ref=['THEY', 'HAD', 'BEEN', 'LUCKY'] +8131-117029-0005-2655: hyp=['THEY', 'HAD', 'BEEN', 'LUCKY'] +8131-117029-0006-2656: ref=["SCHULBERG'S", 'VOLUNTEERS', 'WERE', 'OFFICIAL', 'NOW'] +8131-117029-0006-2656: hyp=["SCHOLBURG'S", 'VOLUNTEERS', 'WERE', 'OFFICIAL', 'NOW'] +8131-117029-0007-2657: ref=['FATS', 'PLACE', 'WAS', 'STILL', 'OPEN', 'THOUGH', 'THE', 'CROOKED', 'TABLES', 'HAD', 'BEEN', 'REMOVED', 'GORDON', 'DROPPED', 'TO', 'A', 'STOOL', 'SLIPPING', 'OFF', 'HIS', 'HELMET'] +8131-117029-0007-2657: hyp=["FATT'S", 'PLACE', 'WAS', 'STILL', 'OPEN', 'THOUGH', 'THE', 'CROOKED', 'TABLES', 'HAD', 'BEEN', 'REMOVED', 'GORDON', 'DROPPED', 'TO', 'A', 'STOOL', 'SLIPPING', 'OFF', 'HIS', 'HELMET'] +8131-117029-0008-2658: ref=['HE', 'REACHED', 'AUTOMATICALLY', 'FOR', 'THE', 'GLASS', 'OF', 'ETHER', 'NEEDLED', 'BEER'] +8131-117029-0008-2658: hyp=['HE', 'REACHED', 'AUTOMATICALLY', 'FOR', 'THE', 'GLASS', 'OF', 'ETHER', 'NEEDLE', 'BEER'] +8131-117029-0009-2659: ref=['THOUGHT', "YOU'D", 'BE', 'IN', 'THE', 'CHIPS'] +8131-117029-0009-2659: hyp=['THOUGHT', "YOU'D", 'BE', 'IN', 'THE', 'CHIPS'] +8131-117029-0010-2660: ref=["THAT'S", 'MARS', 'GORDON', 'ECHOED', 'THE', "OTHER'S", 'COMMENT', 'WHY', "DON'T", 'YOU', 'PULL', 'OFF', 'THE', 'PLANET', 'FATS', 'YOU', 'COULD', 'GO', 'BACK', 'TO', 'EARTH', "I'D", 'GUESS', 'THE', 'OTHER', 'NODDED'] +8131-117029-0010-2660: hyp=["THAT'S", 'MARS', 'GORDON', 'ECHOED', 'THE', "OTHER'S", 'COMMENT', 'WHY', "DON'T", 'YOU', 'PULL', 'OFF', 'THE', 'PLANET', 'THATS', 'YOU', 'COULD', 'GO', 'BACK', 'TO', 'EARTH', "I'D", 'GUESS', 'THE', 'OTHER', 'NODDED'] +8131-117029-0011-2661: ref=['GUESS', 'A', 'MAN', 'GETS', 'USED', 'TO', 'ANYTHING', 'HELL', 'MAYBE', 'I', 'CAN', 'HIRE', 'SOME', 'BUMS', 'TO', 'SIT', 'AROUND', 'AND', 'WHOOP', 'IT', 'UP', 'WHEN', 'THE', 'SHIPS', 'COME', 'IN', 'AND', 'BILL', 'THIS', 'AS', 'A', 'REAL', 'OLD', 'MARTIAN', 'DEN', 'OF', 'SIN'] +8131-117029-0011-2661: hyp=['GUESS', 'A', 'MAN', 'GETS', 'USED', 'TO', 'ANYTHING', 'HELL', 'MAYBE', 'I', 'CAN', 'HIRE', 'SOME', 'BUMS', 'TO', 'SIT', 'AROUND', 'AND', 'WHOOP', 'IT', 'UP', 'WHEN', 'THE', 'SHIPS', 'COME', 'IN', 'AND', 'BUILD', 'THIS', 'AS', 'A', 'REAL', 'OLD', 'MARTIAN', 'DEN', 'OF', 'SIN'] +8131-117029-0012-2662: ref=['THERE', 'WAS', 'A', 'GRIN', 'ON', 'THE', "OTHER'S", 'FACE'] +8131-117029-0012-2662: hyp=['THERE', 'WAS', 'A', 'GRIN', 'ON', 'THE', "OTHER'S", 'FACE'] +8131-117029-0013-2663: ref=['FINALLY', 'GOT', 'OUR', 'ORDERS', 'FOR', 'YOU', "IT'S", 'MERCURY'] +8131-117029-0013-2663: hyp=['FINALLY', 'GOT', 'OUR', 'ORDERS', 'FOR', 'YOU', "IT'S", 'MERCURY'] +8131-117029-0014-2664: ref=['WE', 'SENT', 'TWENTY', 'OTHERS', 'THE', 'SAME', 'WAY', 'AND', 'THEY', 'FAILED'] +8131-117029-0014-2664: hyp=['WE', 'SENT', 'TWENTY', 'OTHERS', 'THE', 'SAME', 'WAY', 'AND', 'THEY', 'FAILED'] +8131-117029-0015-2665: ref=["LET'S", 'SAY', "YOU'VE", 'SHIFTED', 'SOME', 'OF', 'THE', 'MISERY', 'AROUND', 'A', 'BIT', 'AND', 'GIVEN', 'THEM', 'A', 'CHANCE', 'TO', 'DO', 'BETTER'] +8131-117029-0015-2665: hyp=["LET'S", 'SAY', "YOU'VE", 'SHIFTED', 'SOME', 'OF', 'THE', 'MISERY', 'AROUND', 'A', 'BIT', 'AND', 'GIVEN', 'THEM', 'A', 'CHANCE', 'TO', 'DO', 'BETTER'] +8131-117029-0016-2666: ref=['YOU', "CAN'T", 'STAY', 'HERE'] +8131-117029-0016-2666: hyp=['YOU', "CAN'T", 'STAY', 'HERE'] +8131-117029-0017-2667: ref=["THERE'S", 'A', 'ROCKET', 'WAITING', 'TO', 'TRANSSHIP', 'YOU', 'TO', 'THE', 'MOON', 'ON', 'THE', 'WAY', 'TO', 'MERCURY', 'RIGHT', 'NOW', 'GORDON', 'SIGHED'] +8131-117029-0017-2667: hyp=["THERE'S", 'A', 'ROCKET', 'WAITING', 'TO', 'TRANSHIP', 'YOU', 'TO', 'THE', 'MOON', 'ON', 'THE', 'WAY', 'TO', 'MERCURY', 'RIGHT', 'NOW', 'GORDON', 'SIGHED'] +8131-117029-0018-2668: ref=['AND', "I'VE", 'PAID', 'HER', 'THE', 'PAY', 'WE', 'OWE', 'YOU', 'FROM', 'THE', 'TIME', 'YOU', 'BEGAN', 'USING', 'YOUR', 'BADGE', "SHE'S", 'OUT', 'SHOPPING'] +8131-117029-0018-2668: hyp=['AND', 'I', 'PAID', 'HER', 'THE', 'PAY', 'WE', 'OWE', 'YOU', 'FROM', 'THE', 'TIME', 'YOU', 'BEGAN', 'USING', 'YOUR', 'BADGE', "SHE'S", 'OUT', 'SHOPPING'] +8131-117029-0019-2669: ref=['BUT', 'HIS', 'OLD', 'EYES', 'WERE', 'GLINTING'] +8131-117029-0019-2669: hyp=['BUT', 'HIS', 'OLD', 'EYES', 'WERE', 'GLINTING'] +8131-117029-0020-2670: ref=['DID', 'YOU', 'THINK', "WE'D", 'LET', 'YOU', 'GO', 'WITHOUT', 'SEEING', 'YOU', 'OFF', 'COBBER', 'HE', 'ASKED'] +8131-117029-0020-2670: hyp=['DID', 'YOU', 'THINK', "WE'D", 'LET', 'YOU', 'GO', 'WITHOUT', 'SEEING', 'YOU', 'OFF', 'COWBER', 'HE', 'ASKED'] +8131-117029-0021-2671: ref=['I', 'I', 'OH', 'DRAT', 'IT', "I'M", 'GETTING', 'OLD', 'IZZY', 'YOU', 'TELL', 'HIM'] +8131-117029-0021-2671: hyp=['I', 'I', 'OH', 'DRAT', 'IT', "I'M", 'GETTING', 'OLD', 'IASY', 'YOU', 'TELL', 'HIM'] +8131-117029-0022-2672: ref=['HE', 'GRABBED', "GORDON'S", 'HAND', 'AND', 'WADDLED', 'DOWN', 'THE', 'LANDING', 'PLANK', 'IZZY', 'SHOOK', 'HIS', 'HEAD'] +8131-117029-0022-2672: hyp=['HE', 'GRABBED', "GORDON'S", 'HAND', 'AND', 'WADDLED', 'DOWN', 'THE', 'LANDING', 'PLANK', 'ISEY', 'SHOOK', 'HIS', 'HEAD'] +8188-269288-0000-2673: ref=['ANNIE', 'COLCHESTER', 'HAD', 'BEGUN', 'TO', 'MAKE', 'FRIENDS', 'WITH', 'LESLIE'] +8188-269288-0000-2673: hyp=['ANNIE', 'COLCHESTER', 'HAD', 'BEGUN', 'TO', 'MAKE', 'FRIENDS', 'WITH', 'LESLIE'] +8188-269288-0001-2674: ref=['LESLIE', 'DETERMINED', 'TO', 'TRY', 'FOR', 'HONORS', 'IN', 'ENGLISH', 'LANGUAGE', 'AND', 'LITERATURE'] +8188-269288-0001-2674: hyp=['LESLIE', 'DETERMINED', 'TO', 'TRIFLE', 'HONORS', 'IN', 'ENGLISH', 'LANGUAGE', 'AND', 'LITERATURE'] +8188-269288-0002-2675: ref=['HER', 'TASTES', 'ALL', 'LAY', 'IN', 'THIS', 'DIRECTION', 'HER', 'IDEA', 'BEING', 'BY', 'AND', 'BY', 'TO', 'FOLLOW', 'HER', "MOTHER'S", 'PROFESSION', 'OF', 'JOURNALISM', 'FOR', 'WHICH', 'SHE', 'ALREADY', 'SHOWED', 'CONSIDERABLE', 'APTITUDE'] +8188-269288-0002-2675: hyp=['HER', 'TASTES', 'ALL', 'LAY', 'IN', 'THIS', 'DIRECTION', 'HER', 'IDEA', 'BEING', 'BY', 'AND', 'BY', 'TO', 'FOLLOW', 'HER', "MOTHER'S", 'PROFESSION', 'OF', 'JOURNALISM', 'FOR', 'WHICH', 'SHE', 'ALREADY', 'SHOWED', 'CONSIDERABLE', 'APTITUDE'] +8188-269288-0003-2676: ref=['SHE', 'HAD', 'NO', 'IDEA', 'OF', 'ALLOWING', 'HERSELF', 'TO', 'BREAK', 'DOWN'] +8188-269288-0003-2676: hyp=['SHE', 'HAD', 'NO', 'IDEA', 'OF', 'ALLOWING', 'HERSELF', 'TO', 'BREAK', 'DOWN'] +8188-269288-0004-2677: ref=['WHAT', 'DO', 'YOU', 'MEAN', 'REPLIED', 'LESLIE'] +8188-269288-0004-2677: hyp=['WHAT', 'DO', 'YOU', 'MEAN', 'REPLIED', 'LESLIE'] +8188-269288-0005-2678: ref=['WHY', 'YOU', 'WILL', 'BE', 'PARTING', 'FROM', 'ME', 'YOU', 'KNOW'] +8188-269288-0005-2678: hyp=['WHY', 'YOU', 'WILL', 'BE', 'PARTING', 'FROM', 'ME', 'YOU', 'KNOW'] +8188-269288-0006-2679: ref=['I', "WON'T", 'BE', 'THE', 'CONSTANT', 'WORRY', 'AND', 'PLAGUE', 'OF', 'YOUR', 'LIFE'] +8188-269288-0006-2679: hyp=['I', 'WOULD', 'BE', 'THE', 'CONSTANT', 'WORRY', 'AND', 'PLAGUE', 'OF', 'YOUR', 'LIFE'] +8188-269288-0007-2680: ref=['IT', 'IS', 'THIS', 'IF', 'BY', 'ANY', 'CHANCE', 'YOU', "DON'T", 'LEAVE', 'SAINT', "WODE'S", 'ANNIE', 'I', 'HOPE', 'YOU', 'WILL', 'ALLOW', 'ME', 'TO', 'BE', 'YOUR', 'ROOMFELLOW', 'AGAIN', 'NEXT', 'TERM'] +8188-269288-0007-2680: hyp=['IT', 'IS', 'THIS', 'IF', 'BY', 'ANY', 'CHANCE', 'YOU', "DON'T", 'LEAVE', 'SAINT', "WODE'S", 'ANNIE', 'I', 'HOPE', 'YOU', 'WILL', 'ALLOW', 'ME', 'TO', 'BE', 'YOUR', 'ROOMFELLOW', 'AGAIN', 'NEXT', 'TERM'] +8188-269288-0008-2681: ref=['SAID', 'ANNIE', 'A', 'FLASH', 'OF', 'LIGHT', 'COMING', 'INTO', 'HER', 'EYES', 'AND', 'THEN', 'LEAVING', 'THEM'] +8188-269288-0008-2681: hyp=['SAID', 'ANNIE', 'A', 'FLASH', 'OF', 'LIGHT', 'COMING', 'INTO', 'HER', 'EYES', 'AND', 'THEN', 'LEAVING', 'THEM'] +8188-269288-0009-2682: ref=['BUT', 'SHE', 'ADDED', 'ABRUPTLY', 'YOU', 'SPEAK', 'OF', 'SOMETHING', 'WHICH', 'MUST', 'NOT', 'TAKE', 'PLACE'] +8188-269288-0009-2682: hyp=['BUT', 'SHE', 'ADDED', 'ABRUPTLY', 'YOU', 'SPEAK', 'OF', 'SOMETHING', 'WHICH', 'MUST', 'NOT', 'TAKE', 'PLACE'] +8188-269288-0010-2683: ref=['I', 'MUST', 'PASS', 'IN', 'HONORS', 'IF', 'I', "DON'T", 'I', 'SHALL', 'DIE'] +8188-269288-0010-2683: hyp=['I', 'MUST', 'PASS', 'IN', 'HONORS', 'IF', 'I', "DON'T", 'I', 'SHALL', 'DIE'] +8188-269288-0011-2684: ref=['A', 'FEW', 'MOMENTS', 'LATER', 'THERE', 'CAME', 'A', 'TAP', 'AT', 'THE', 'DOOR'] +8188-269288-0011-2684: hyp=['A', 'FEW', 'MOMENTS', 'LATER', 'THERE', 'CAME', 'A', 'TAP', 'AT', 'THE', 'DOOR'] +8188-269288-0012-2685: ref=['LESLIE', 'OPENED', 'THE', 'DOOR'] +8188-269288-0012-2685: hyp=['LESLIE', 'OPENED', 'THE', 'DOOR'] +8188-269288-0013-2686: ref=['JANE', 'HERIOT', 'STOOD', 'WITHOUT'] +8188-269288-0013-2686: hyp=['JANE', 'HERIOT', 'STOOD', 'WITHOUT'] +8188-269288-0014-2687: ref=['THESE', 'LETTERS', 'HAVE', 'JUST', 'COME', 'FOR', 'YOU', 'AND', 'ANNIE', 'COLCHESTER', 'SHE', 'SAID', 'AND', 'AS', 'I', 'WAS', 'COMING', 'UPSTAIRS', 'I', 'THOUGHT', 'I', 'WOULD', 'LEAVE', 'THEM', 'WITH', 'YOU'] +8188-269288-0014-2687: hyp=['THESE', 'LETTERS', 'HAVE', 'JUST', 'COME', 'FOR', 'YOU', 'AND', 'ANNIE', 'COLCHESTER', 'SHE', 'SAID', 'AND', 'AS', 'I', 'WAS', 'COMING', 'UP', 'STAIRS', 'I', 'THOUGHT', 'I', 'WOULD', 'LEAVE', 'THEM', 'WITH', 'YOU'] +8188-269288-0015-2688: ref=['LESLIE', 'THANKED', 'HER', 'AND', 'EAGERLY', 'GRASPED', 'THE', 'LITTLE', 'PARCEL'] +8188-269288-0015-2688: hyp=['LESLIE', 'THANKED', 'HER', 'AND', 'EAGERLY', 'GRASPED', 'THE', 'LITTLE', 'PARCEL'] +8188-269288-0016-2689: ref=['HER', 'EYES', 'SHONE', 'WITH', 'PLEASURE', 'AT', 'THE', 'ANTICIPATION', 'OF', 'THE', 'DELIGHTFUL', 'TIME', 'SHE', 'WOULD', 'HAVE', 'REVELING', 'IN', 'THE', 'HOME', 'NEWS', 'THE', 'OTHER', 'LETTER', 'WAS', 'DIRECTED', 'TO', 'ANNIE', 'COLCHESTER'] +8188-269288-0016-2689: hyp=['HER', 'EYES', 'SHONE', 'WITH', 'PLEASURE', 'AT', 'THE', 'ANTICIPATION', 'OF', 'THE', 'DELIGHTFUL', 'TIME', 'SHE', 'WOULD', 'HAVE', 'REVELLING', 'IN', 'THE', 'HOME', 'NEWS', 'THE', 'OTHER', 'LETTER', 'WAS', 'DIRECTED', 'TO', 'ANNIE', 'COLCHESTER'] +8188-269288-0017-2690: ref=['HERE', 'IS', 'A', 'LETTER', 'FOR', 'YOU', 'ANNIE', 'CRIED', 'LESLIE'] +8188-269288-0017-2690: hyp=['HERE', 'IS', 'A', 'LETTER', 'FOR', 'YOU', 'ANNIE', 'CRIED', 'LESLIE'] +8188-269288-0018-2691: ref=['HER', 'FACE', 'GREW', 'SUDDENLY', 'WHITE', 'AS', 'DEATH', 'WHAT', 'IS', 'IT', 'DEAR'] +8188-269288-0018-2691: hyp=['HER', 'FACE', 'GREW', 'SUDDENLY', 'WHITE', 'AS', 'DEATH', 'WHAT', 'IS', 'IT', 'DEAR'] +8188-269288-0019-2692: ref=['I', 'HAVE', 'BEEN', 'STARVING', 'OR', 'RATHER', 'I', 'HAVE', 'BEEN', 'THIRSTING'] +8188-269288-0019-2692: hyp=['I', 'HAVE', 'BEEN', 'STARVING', 'OR', 'RATHER', 'I', 'HAVE', 'BEEN', 'THIRSTING'] +8188-269288-0020-2693: ref=['WELL', 'READ', 'IT', 'IN', 'PEACE', 'SAID', 'LESLIE', 'I', "WON'T", 'DISTURB', 'YOU'] +8188-269288-0020-2693: hyp=['WELL', 'READ', 'IT', 'IN', 'PEACE', 'SAID', 'LESLEY', 'I', "WON'T", 'DISTURB', 'YOU'] +8188-269288-0021-2694: ref=['I', 'AM', 'TRULY', 'GLAD', 'IT', 'HAS', 'COME'] +8188-269288-0021-2694: hyp=['I', 'AM', 'TRULY', 'GLAD', 'IT', 'HAS', 'COME'] +8188-269288-0022-2695: ref=['LESLIE', 'SEATED', 'HERSELF', 'WITH', 'HER', 'BACK', 'TO', 'HER', 'COMPANION', 'AND', 'OPENED', 'HER', 'OWN', 'LETTERS'] +8188-269288-0022-2695: hyp=['LESLIE', 'SEATED', 'HERSELF', 'WITH', 'HER', 'BACK', 'TO', 'HER', 'COMPANION', 'AND', 'OPENED', 'HER', 'OWN', 'LETTERS'] +8188-269288-0023-2696: ref=["DON'T", 'NOTICE', 'ME', 'REPLIED', 'ANNIE'] +8188-269288-0023-2696: hyp=["DON'T", 'NOTICE', 'ME', 'REPLIED', 'ANNIE'] +8188-269288-0024-2697: ref=['I', 'MUST', 'GO', 'INTO', 'THE', 'GROUNDS', 'THE', 'AIR', 'IS', 'STIFLING'] +8188-269288-0024-2697: hyp=['I', 'MUST', 'GO', 'INTO', 'THE', 'GROUNDS', 'THE', 'AIR', 'IS', 'STIFLING'] +8188-269288-0025-2698: ref=['BUT', 'THEY', 'ARE', 'JUST', 'SHUTTING', 'UP'] +8188-269288-0025-2698: hyp=['BUT', 'THEY', 'ARE', 'JUST', 'SHUTTING', 'UP'] +8188-269288-0026-2699: ref=['I', 'SHALL', 'GO', 'I', 'KNOW', 'A', 'WAY'] +8188-269288-0026-2699: hyp=['I', 'SHALL', 'GO', 'I', 'KNOW', 'A', 'WAY'] +8188-269288-0027-2700: ref=['JUST', 'AFTER', 'MIDNIGHT', 'SHE', 'ROSE', 'WITH', 'A', 'SIGH', 'TO', 'PREPARE', 'FOR', 'BED'] +8188-269288-0027-2700: hyp=['JUST', 'AFTER', 'MIDNIGHT', 'SHE', 'ROSE', 'WITH', 'A', 'SIGH', 'TO', 'PREPARE', 'FOR', 'BED'] +8188-269288-0028-2701: ref=['SHE', 'LOOKED', 'ROUND', 'THE', 'ROOM'] +8188-269288-0028-2701: hyp=['SHE', 'LOOKED', 'ROUND', 'THE', 'ROOM'] +8188-269288-0029-2702: ref=['NOW', 'I', 'REMEMBER', 'SHE', 'GOT', 'A', 'LETTER', 'WHICH', 'UPSET', 'HER', 'VERY', 'MUCH', 'AND', 'WENT', 'OUT'] +8188-269288-0029-2702: hyp=['NOW', 'I', 'REMEMBER', 'SHE', 'GOT', 'A', 'LETTER', 'WHICH', 'UPSET', 'HER', 'VERY', 'MUCH', 'AND', 'WENT', 'OUT'] +8188-269288-0030-2703: ref=['LESLIE', 'WENT', 'TO', 'THE', 'WINDOW', 'AND', 'FLUNG', 'IT', 'OPEN', 'SHE', 'PUT', 'HER', 'HEAD', 'OUT', 'AND', 'TRIED', 'TO', 'PEER', 'INTO', 'THE', 'DARKNESS', 'BUT', 'THE', 'MOON', 'HAD', 'ALREADY', 'SET', 'AND', 'SHE', 'COULD', 'NOT', 'SEE', 'MORE', 'THAN', 'A', 'COUPLE', 'OF', 'YARDS', 'IN', 'FRONT', 'OF', 'HER'] +8188-269288-0030-2703: hyp=['LESLIE', 'WENT', 'TO', 'THE', 'WINDOW', 'AND', 'FLUNG', 'IT', 'OPEN', 'SHE', 'PUT', 'HER', 'HEAD', 'OUT', 'AND', 'TRIED', 'TO', 'PEER', 'INTO', 'THE', 'DARKNESS', 'BUT', 'THE', 'MOON', 'HAD', 'ALREADY', 'SET', 'AND', 'SHE', 'COULD', 'NOT', 'SEE', 'MORE', 'THAN', 'A', 'COUPLE', 'OF', 'YARDS', 'IN', 'FRONT', 'OF', 'HER'] +8188-269288-0031-2704: ref=['SHE', 'IS', 'A', 'VERY', 'QUEER', 'ERRATIC', 'CREATURE', 'AND', 'THAT', 'LETTER', 'THERE', 'WAS', 'BAD', 'NEWS', 'IN', 'THAT', 'LETTER'] +8188-269288-0031-2704: hyp=['SHE', 'IS', 'A', 'VERY', 'QUEER', 'ERRATIC', 'CREATURE', 'AND', 'THAT', 'LETTER', 'THERE', 'IS', 'BAD', 'NEWS', 'IN', 'THAT', 'LETTER'] +8188-269288-0032-2705: ref=['WHAT', 'CAN', 'SHE', 'BE', 'DOING', 'OUT', 'BY', 'HERSELF'] +8188-269288-0032-2705: hyp=['WHAT', 'CAN', 'SHE', 'BE', 'DOING', 'OUT', 'BY', 'HERSELF'] +8188-269288-0033-2706: ref=['LESLIE', 'LEFT', 'THE', 'ROOM', 'BUT', 'SHE', 'HAD', 'SCARCELY', 'GONE', 'A', 'DOZEN', 'PACES', 'DOWN', 'THE', 'CORRIDOR', 'BEFORE', 'SHE', 'MET', 'ANNIE', 'RETURNING'] +8188-269288-0033-2706: hyp=['LESLIE', 'LEFT', 'THE', 'ROOM', 'BUT', 'SHE', 'HAD', 'SCARCELY', 'GONE', 'A', 'DOZEN', 'PACES', 'DOWN', 'THE', 'CORRIDOR', 'BEFORE', 'SHE', 'MET', 'ANNIE', 'RETURNING'] +8188-269288-0034-2707: ref=["ANNIE'S", 'EYES', 'WERE', 'VERY', 'BRIGHT', 'HER', 'CHEEKS', 'WERE', 'NO', 'LONGER', 'PALE', 'AND', 'THERE', 'WAS', 'A', 'BRILLIANT', 'COLOR', 'IN', 'THEM'] +8188-269288-0034-2707: hyp=["ANNIE'S", 'EYES', 'WERE', 'VERY', 'BRIGHT', 'HER', 'CHEEKS', 'WERE', 'NO', 'LONGER', 'PALE', 'AND', 'THERE', 'WAS', 'A', 'BRILLIANT', 'COLOUR', 'IN', 'THEM'] +8188-269288-0035-2708: ref=['SHE', 'DID', 'NOT', 'TAKE', 'THE', 'LEAST', 'NOTICE', 'OF', 'LESLIE', 'BUT', 'GOING', 'INTO', 'THE', 'ROOM', 'SHUT', 'THE', 'DOOR'] +8188-269288-0035-2708: hyp=['SHE', 'DID', 'NOT', 'TAKE', 'THE', 'LEAST', 'NOTICE', 'OF', 'LESLIE', 'BUT', 'GOING', 'INTO', 'THE', 'ROOM', 'SHUT', 'THE', 'DOOR'] +8188-269288-0036-2709: ref=["DON'T", 'BEGIN', 'SAID', 'ANNIE'] +8188-269288-0036-2709: hyp=["DON'T", 'BEGIN', 'SAID', 'ANNIE'] +8188-269288-0037-2710: ref=["DON'T", 'BEGIN', 'WHAT', 'DO', 'YOU', 'MEAN'] +8188-269288-0037-2710: hyp=["DON'T", 'BEGIN', 'WHAT', 'DO', 'YOU', 'MEAN'] +8188-269288-0038-2711: ref=['I', 'MEAN', 'THAT', 'I', "DON'T", 'WANT', 'YOU', 'TO', 'BEGIN', 'TO', 'ASK', 'QUESTIONS'] +8188-269288-0038-2711: hyp=['I', 'MEAN', 'THAT', 'I', "DON'T", 'WANT', 'YOU', 'TO', 'BEGIN', 'TO', 'ASK', 'QUESTIONS'] +8188-269288-0039-2712: ref=['I', 'WALKED', 'UP', 'AND', 'DOWN', 'AS', 'FAST', 'AS', 'EVER', 'I', 'COULD', 'OUTSIDE', 'IN', 'ORDER', 'TO', 'MAKE', 'MYSELF', 'SLEEPY'] +8188-269288-0039-2712: hyp=['I', 'WALKED', 'UP', 'AND', 'DOWN', 'AS', 'FAST', 'AS', 'EVER', 'I', 'COULD', 'OUTSIDE', 'IN', 'ORDER', 'TO', 'MAKE', 'MYSELF', 'SLEEPY'] +8188-269288-0040-2713: ref=["DON'T", 'TALK', 'TO', 'ME', 'LESLIE', "DON'T", 'SAY', 'A', 'SINGLE', 'WORD'] +8188-269288-0040-2713: hyp=["DON'T", 'TALK', 'TO', 'ME', 'LESLIE', "DON'T", 'SAY', 'A', 'SINGLE', 'WORD'] +8188-269288-0041-2714: ref=['I', 'SHALL', 'GO', 'OFF', 'TO', 'SLEEP', 'THAT', 'IS', 'ALL', 'I', 'CARE', 'FOR'] +8188-269288-0041-2714: hyp=['I', 'SHALL', 'GO', 'OFF', 'TO', 'SLEEP', 'THAT', 'IS', 'ALL', 'I', 'CARE', 'FOR'] +8188-269288-0042-2715: ref=["DON'T", 'SAID', 'ANNIE'] +8188-269288-0042-2715: hyp=["DON'T", 'SAID', 'ANNIE'] +8188-269288-0043-2716: ref=['NOW', 'DRINK', 'THIS', 'AT', 'ONCE', 'SHE', 'SAID', 'IN', 'A', 'VOICE', 'OF', 'AUTHORITY', 'IF', 'YOU', 'REALLY', 'WISH', 'TO', 'SLEEP'] +8188-269288-0043-2716: hyp=['NOW', 'DRINK', 'THIS', 'AT', 'ONCE', 'SHE', 'SAID', 'IN', 'A', 'VOICE', 'OF', 'AUTHORITY', 'IF', 'YOU', 'REALLY', 'WISH', 'TO', 'SLEEP'] +8188-269288-0044-2717: ref=['ANNIE', 'STARED', 'VACANTLY', 'AT', 'THE', 'COCOA', 'THEN', 'SHE', 'UTTERED', 'A', 'LAUGH'] +8188-269288-0044-2717: hyp=['ANNIE', 'STARED', 'VACANTLY', 'AT', 'THE', 'COCOA', 'THEN', 'SHE', 'UTTERED', 'A', 'LAUGH'] +8188-269288-0045-2718: ref=['DRINK', 'THAT', 'SHE', 'SAID'] +8188-269288-0045-2718: hyp=['DRINK', 'THAT', 'SHE', 'SAID'] +8188-269288-0046-2719: ref=['DO', 'YOU', 'WANT', 'TO', 'KILL', 'ME', "DON'T", 'TALK', 'ANY', 'MORE'] +8188-269288-0046-2719: hyp=['DO', 'YOU', 'WANT', 'TO', 'KILL', 'ME', "DON'T", 'TALK', 'ANY', 'MORE'] +8188-269288-0047-2720: ref=['I', 'AM', 'SLEEPY', 'I', 'SHALL', 'SLEEP'] +8188-269288-0047-2720: hyp=['I', 'AM', 'SLEEPY', 'I', 'SHALL', 'SLEEP'] +8188-269288-0048-2721: ref=['SHE', 'GOT', 'INTO', 'BED', 'AS', 'SHE', 'SPOKE', 'AND', 'WRAPPED', 'THE', 'CLOTHES', 'TIGHTLY', 'ROUND', 'HER'] +8188-269288-0048-2721: hyp=['SHE', 'GOT', 'INTO', 'BED', 'AS', 'SHE', 'SPOKE', 'AND', 'WRAPPED', 'THE', 'CLOTHES', 'TIGHTLY', 'ROUND', 'HER'] +8188-269288-0049-2722: ref=["CAN'T", 'YOU', 'MANAGE', 'WITH', 'A', 'CANDLE', 'JUST', 'FOR', 'ONCE'] +8188-269288-0049-2722: hyp=["CAN'T", 'YOU', 'MANAGE', 'WITH', 'A', 'CANDLE', 'JUST', 'FOR', 'ONCE'] +8188-269288-0050-2723: ref=['CERTAINLY', 'SAID', 'LESLIE'] +8188-269288-0050-2723: hyp=['CERTAINLY', 'SAID', 'E'] +8188-269288-0051-2724: ref=['SHE', 'TURNED', 'OFF', 'THE', 'LIGHT', 'AND', 'LIT', 'A', 'CANDLE', 'WHICH', 'SHE', 'PUT', 'BEHIND', 'HER', 'SCREEN', 'THEN', 'PREPARED', 'TO', 'GET', 'INTO', 'BED'] +8188-269288-0051-2724: hyp=['SHE', 'TURNED', 'OFF', 'THE', 'LIGHT', 'AND', 'LIT', 'A', 'CANDLE', 'WHICH', 'SHE', 'PUT', 'BEHIND', 'HER', 'SCREEN', 'THEN', 'PREPARED', 'TO', 'GET', 'INTO', 'BED'] +8188-269288-0052-2725: ref=["ANNIE'S", 'MANNER', 'WAS', 'VERY', 'MYSTERIOUS'] +8188-269288-0052-2725: hyp=["ANNIE'S", 'MANNER', 'WAS', 'VERY', 'MYSTERIOUS'] +8188-269288-0053-2726: ref=['ANNIE', 'DID', 'NOT', 'MEAN', 'TO', 'CONFIDE', 'IN', 'ANYONE', 'THAT', 'NIGHT', 'AND', 'THE', 'KINDEST', 'THING', 'WAS', 'TO', 'LEAVE', 'HER', 'ALONE'] +8188-269288-0053-2726: hyp=['ANNIE', 'DID', 'NOT', 'MEAN', 'TO', 'CONFINE', 'IN', 'ANY', 'ONE', 'THAT', 'NIGHT', 'AND', 'THE', 'KINDEST', 'THING', 'WAS', 'TO', 'LEAVE', 'HER', 'ALONE'] +8188-269288-0054-2727: ref=['TIRED', 'OUT', 'LESLIE', 'HERSELF', 'DROPPED', 'ASLEEP'] +8188-269288-0054-2727: hyp=['TIRED', 'OUT', 'LESLIE', 'HERSELF', 'DROPPED', 'ASLEEP'] +8188-269288-0055-2728: ref=['ANNIE', 'IS', 'THAT', 'YOU', 'SHE', 'CALLED', 'OUT'] +8188-269288-0055-2728: hyp=['ANNIE', 'IS', 'THAT', 'YOU', 'SHE', 'CALLED', 'OUT'] +8188-269288-0056-2729: ref=['THERE', 'WAS', 'NO', 'REPLY', 'BUT', 'THE', 'SOUND', 'OF', 'HURRYING', 'STEPS', 'CAME', 'QUICKER', 'AND', 'QUICKER', 'NOW', 'AND', 'THEN', 'THEY', 'WERE', 'INTERRUPTED', 'BY', 'A', 'GROAN'] +8188-269288-0056-2729: hyp=['THERE', 'WAS', 'NO', 'REPLY', 'BUT', 'THE', 'SOUND', 'OF', 'HURRYING', 'STEPS', 'CAME', 'QUICKER', 'AND', 'QUICKER', 'NOW', 'AND', 'THEN', 'THEY', 'WERE', 'INTERRUPTED', 'BY', 'A', 'GROAN'] +8188-269288-0057-2730: ref=['OH', 'THIS', 'WILL', 'KILL', 'ME', 'MY', 'HEART', 'WILL', 'BREAK', 'THIS', 'WILL', 'KILL', 'ME'] +8188-269288-0057-2730: hyp=['OH', 'THIS', 'WILL', 'KILL', 'ME', 'MY', 'HEART', 'WILL', 'BREAK', 'THIS', 'WILL', 'KILL', 'ME'] +8188-269290-0000-2731: ref=['THE', 'GUILD', 'OF', 'SAINT', 'ELIZABETH'] +8188-269290-0000-2731: hyp=['THE', 'GIRL', 'OF', 'SAINT', 'ELIZABETH'] +8188-269290-0001-2732: ref=['IMMEDIATELY', 'AFTER', 'DINNER', 'THAT', 'EVENING', 'LESLIE', 'RAN', 'UP', 'TO', 'HER', 'ROOM', 'TO', 'MAKE', 'PREPARATIONS', 'FOR', 'HER', 'VISIT', 'TO', 'EAST', 'HALL'] +8188-269290-0001-2732: hyp=['IMMEDIATELY', 'AFTER', 'DINNER', 'THAT', 'EVENING', 'LESLIE', 'RAN', 'UP', 'TO', 'HER', 'ROOM', 'TO', 'MAKE', 'PREPARATIONS', 'FOR', 'HER', 'VISIT', 'TO', 'EAST', 'HALL'] +8188-269290-0002-2733: ref=["I'M", 'NOT', 'COMING', 'SAID', 'ANNIE'] +8188-269290-0002-2733: hyp=["I'M", 'NOT', 'COMING', 'SAID', 'ANNIE'] +8188-269290-0003-2734: ref=['EVERY', 'STUDENT', 'IS', 'TO', 'BE', 'IN', 'EAST', 'HALL', 'AT', 'HALF', 'PAST', 'EIGHT'] +8188-269290-0003-2734: hyp=['EVERY', 'STUDENT', 'IS', 'TO', 'BE', 'IN', 'EAST', 'HALL', 'AT', 'HALF', 'PAST', 'EIGHT'] +8188-269290-0004-2735: ref=['IT', "DOESN'T", 'MATTER', 'REPLIED', 'ANNIE', 'WHETHER', 'IT', 'IS', 'AN', 'ORDER', 'OR', 'NOT', "I'M", 'NOT', 'COMING', 'SAY', 'NOTHING', 'ABOUT', 'ME', 'PLEASE'] +8188-269290-0004-2735: hyp=['IT', "DOESN'T", 'MATTER', 'REPLIED', 'ENNY', 'WHETHER', 'IT', 'IS', 'AN', 'ORDER', 'OR', 'NOT', "I'M", 'NOT', 'COMING', 'SAY', 'NOTHING', 'ABOUT', 'ME', 'PLEASE'] +8188-269290-0005-2736: ref=['IT', 'BURNED', 'AS', 'IF', 'WITH', 'FEVER'] +8188-269290-0005-2736: hyp=['IT', 'BURNED', 'AS', 'IF', 'WITH', 'FEVER'] +8188-269290-0006-2737: ref=['YOU', "DON'T", 'KNOW', 'WHAT', 'A', 'TRIAL', 'IT', 'IS', 'FOR', 'ME', 'TO', 'HAVE', 'YOU', 'HERE'] +8188-269290-0006-2737: hyp=['YOU', "DON'T", 'KNOW', 'WHAT', 'A', 'TRIAL', 'IT', 'IS', 'FOR', 'ME', 'TO', 'HAVE', 'YOU', 'HERE'] +8188-269290-0007-2738: ref=['I', 'WANT', 'TO', 'BE', 'ALONE', 'GO'] +8188-269290-0007-2738: hyp=['I', 'WANT', 'TO', 'BE', 'ALONE', 'GO'] +8188-269290-0008-2739: ref=['I', 'KNOW', 'YOU', "DON'T", 'QUITE', 'MEAN', 'WHAT', 'YOU', 'SAY', 'SAID', 'LESLIE', 'BUT', 'OF', 'COURSE', 'IF', 'YOU', 'REALLY', 'WISH', 'ME'] +8188-269290-0008-2739: hyp=['I', 'KNOW', 'YOU', "DON'T", 'QUITE', 'MEAN', 'WHAT', 'YOU', 'SAY', 'SAID', 'LESLIE', 'BUT', 'OF', 'COURSE', 'IF', 'YOU', 'REALLY', 'WISH', 'ME'] +8188-269290-0009-2740: ref=['YOU', 'FRET', 'ME', 'BEYOND', 'ENDURANCE'] +8188-269290-0009-2740: hyp=['YOU', 'FRET', 'ME', 'BEYOND', 'ENDURANCE'] +8188-269290-0010-2741: ref=['WRAPPING', 'A', 'PRETTY', 'BLUE', 'SHAWL', 'ROUND', 'HER', 'HEAD', 'AND', 'SHOULDERS', 'SHE', 'TURNED', 'TO', 'ANNIE'] +8188-269290-0010-2741: hyp=['WRAPPING', 'A', 'PRETTY', 'BLUE', 'SHAWL', 'AROUND', 'HER', 'HEAD', 'AND', 'SHOULDERS', 'SHE', 'TURNED', 'TO', 'ANNIE'] +8188-269290-0011-2742: ref=['LESLIE', 'WAS', 'JUST', 'CLOSING', 'THE', 'DOOR', 'BEHIND', 'HER', 'WHEN', 'ANNIE', 'CALLED', 'AFTER', 'HER'] +8188-269290-0011-2742: hyp=['LESLIE', 'WAS', 'JUST', 'CLOSING', 'THE', 'DOOR', 'BEHIND', 'HER', 'WHEN', 'ANNIE', 'CALLED', 'AFTER', 'HER'] +8188-269290-0012-2743: ref=['I', 'TOOK', 'IT', 'OUT', 'SAID', 'LESLIE', 'TOOK', 'IT', 'OUT'] +8188-269290-0012-2743: hyp=['I', 'TOOK', 'IT', 'OUT', 'SAID', 'LESLIE', 'TOOK', 'IT', 'OUT'] +8188-269290-0013-2744: ref=['HAVE', 'THE', 'GOODNESS', 'TO', 'FIND', 'IT', 'AND', 'PUT', 'IT', 'BACK'] +8188-269290-0013-2744: hyp=['HAVE', 'THE', 'GOODNESS', 'TO', 'FIND', 'IT', 'AND', 'PUT', 'IT', 'BACK'] +8188-269290-0014-2745: ref=['BUT', "DON'T", 'LOCK', 'ME', 'OUT', 'PLEASE', 'ANNIE'] +8188-269290-0014-2745: hyp=['BUT', "DON'T", 'LOOK', 'ME', 'OUT', 'PLEASE', 'ANNIE'] +8188-269290-0015-2746: ref=['OH', 'I', "WON'T", 'LOCK', 'YOU', 'OUT', 'SHE', 'SAID', 'BUT', 'I', 'MUST', 'HAVE', 'THE', 'KEY'] +8188-269290-0015-2746: hyp=['OH', 'I', "WON'T", 'LOCK', 'YOU', 'OUT', 'SHE', 'SAID', 'BUT', 'I', 'MUST', 'HAVE', 'THE', 'KEY'] +8188-269290-0016-2747: ref=['JANE', "HERIOT'S", 'VOICE', 'WAS', 'HEARD', 'IN', 'THE', 'PASSAGE'] +8188-269290-0016-2747: hyp=['JANE', "HERRIOT'S", 'VOICE', 'WAS', 'HEARD', 'IN', 'THE', 'PASSAGE'] +8188-269290-0017-2748: ref=['AS', 'SHE', 'WALKED', 'DOWN', 'THE', 'CORRIDOR', 'SHE', 'HEARD', 'IT', 'BEING', 'TURNED', 'IN', 'THE', 'LOCK'] +8188-269290-0017-2748: hyp=['AS', 'SHE', 'WALKED', 'DOWN', 'THE', 'CORRIDOR', 'SHE', 'HEARD', 'IT', 'BEING', 'TURNED', 'TO', 'THE', 'LOCK'] +8188-269290-0018-2749: ref=['WHAT', 'CAN', 'THIS', 'MEAN', 'SHE', 'SAID', 'TO', 'HERSELF'] +8188-269290-0018-2749: hyp=['WHAT', 'CAN', 'THIS', 'MEAN', 'SHE', 'SAID', 'TO', 'HERSELF'] +8188-269290-0019-2750: ref=['OH', 'I', "WON'T", 'PRESS', 'YOU', 'REPLIED', 'JANE'] +8188-269290-0019-2750: hyp=['OH', 'I', "WON'T", 'PRESS', 'YOU', 'REPLIED', 'JANE'] +8188-269290-0020-2751: ref=['OH', 'I', 'SHALL', 'NEVER', 'DO', 'THAT', 'REPLIED', 'LESLIE'] +8188-269290-0020-2751: hyp=['OH', 'I', 'SHALL', 'NEVER', 'DO', 'THAT', 'REPLIED', 'LESLIE'] +8188-269290-0021-2752: ref=['YOU', 'SEE', 'ALL', 'THE', 'GIRLS', 'EXCEPT', 'EILEEN', 'AND', 'MARJORIE', 'LAUGH', 'AT', 'HER', 'AND', 'THAT', 'SEEMS', 'TO', 'ME', 'TO', 'MAKE', 'HER', 'WORSE'] +8188-269290-0021-2752: hyp=['YOU', 'SEE', 'ALL', 'THE', 'GIRLS', 'EXCEPT', 'EILEEN', 'AND', 'MARJORIE', 'LAUGH', 'AT', 'HER', 'AND', 'THAT', 'SEEMS', 'TO', 'ME', 'TO', 'MAKE', 'HER', 'WORSE'] +8188-269290-0022-2753: ref=['SOME', 'DAY', 'JANE', 'YOU', 'MUST', 'SEE', 'HER'] +8188-269290-0022-2753: hyp=['SOME', 'DAY', 'JANE', 'YOU', 'MUST', 'SEE', 'HER'] +8188-269290-0023-2754: ref=['IF', 'YOU', 'ARE', 'IN', 'LONDON', 'DURING', 'THE', 'SUMMER', 'YOU', 'MUST', 'COME', 'AND', 'PAY', 'US', 'A', 'VISIT', 'WILL', 'YOU'] +8188-269290-0023-2754: hyp=['IF', 'YOU', 'ARE', 'IN', 'LONDON', 'DURING', 'THE', 'SUMMER', 'YOU', 'MUST', 'COME', 'AND', 'PAY', 'US', 'A', 'VISIT', 'WILL', 'YOU'] +8188-269290-0024-2755: ref=['THAT', 'IS', 'IF', 'YOU', 'CARE', 'TO', 'CONFIDE', 'IN', 'ME'] +8188-269290-0024-2755: hyp=['THAT', 'IS', 'IF', 'YOU', 'CARE', 'TO', 'CONFIDE', 'IN', 'ME'] +8188-269290-0025-2756: ref=['I', 'BELIEVE', 'POOR', 'ANNIE', 'IS', 'DREADFULLY', 'UNHAPPY'] +8188-269290-0025-2756: hyp=['I', 'BELIEVE', 'POOR', 'ANNIE', 'IS', 'DREADFULLY', 'UNHAPPY'] +8188-269290-0026-2757: ref=["THAT'S", 'JUST', 'IT', 'JANE', 'THAT', 'IS', 'WHAT', 'FRIGHTENS', 'ME', 'SHE', 'REFUSES', 'TO', 'COME'] +8188-269290-0026-2757: hyp=["THAT'S", 'JUST', 'IT', 'JANE', 'THAT', 'IS', 'WHAT', 'FRIGHTENS', 'ME', 'SHE', 'REFUSES', 'TO', 'COME'] +8188-269290-0027-2758: ref=['REFUSES', 'TO', 'COME', 'SHE', 'CRIED'] +8188-269290-0027-2758: hyp=['REFUSES', 'TO', 'COME', 'SHE', 'CRIED'] +8188-269290-0028-2759: ref=['SHE', 'WILL', 'GET', 'INTO', 'AN', 'AWFUL', 'SCRAPE'] +8188-269290-0028-2759: hyp=["SHE'LL", 'GET', 'IN', 'AN', 'AWFUL', 'SCRAPE'] +8188-269290-0029-2760: ref=['I', 'AM', 'SURE', 'SHE', 'IS', 'ILL', 'SHE', 'WORKS', 'TOO', 'HARD', 'AND', 'SHE', 'BUT', 'THERE', 'I', "DON'T", 'KNOW', 'THAT', 'I', 'OUGHT', 'TO', 'SAY', 'ANY', 'MORE'] +8188-269290-0029-2760: hyp=['I', 'AM', 'SURE', 'SHE', 'IS', 'ILL', 'SHE', 'WORKS', 'TOO', 'HARD', 'AND', 'SHE', 'BUT', 'THERE', 'I', "DON'T", 'KNOW', 'THAT', 'I', 'OUGHT', 'TO', 'SAY', 'ANY', 'MORE'] +8188-269290-0030-2761: ref=["I'LL", 'WAIT', 'FOR', 'YOU', 'HERE', 'SAID', 'LESLIE'] +8188-269290-0030-2761: hyp=["I'LL", 'WAIT', 'FOR', 'YOU', 'HERE', 'SAID', 'LESLIE'] +8188-269290-0031-2762: ref=['DO', 'COME', 'ANNIE', 'DO'] +8188-269290-0031-2762: hyp=['DO', 'COME', 'ANNIE', 'DO'] +8188-269290-0032-2763: ref=['SCARCELY', 'LIKELY', 'REPLIED', 'LESLIE', 'SHE', 'TOLD', 'ME', 'SHE', 'WAS', 'DETERMINED', 'NOT', 'TO', 'COME', 'TO', 'THE', 'MEETING'] +8188-269290-0032-2763: hyp=['SCARCELY', 'LIKELY', 'REPLIED', 'LESLIE', 'SHE', 'TOLD', 'ME', 'SHE', 'WAS', 'DETERMINED', 'NOT', 'TO', 'COME', 'TO', 'THE', 'MEETING'] +8188-269290-0033-2764: ref=['BUT', 'MARJORIE', 'AND', 'EILEEN', 'HAD', 'ALREADY', 'DEPARTED', 'AND', 'LESLIE', 'AND', 'JANE', 'FOUND', 'THEMSELVES', 'AMONG', 'THE', 'LAST', 'STUDENTS', 'TO', 'ARRIVE', 'AT', 'THE', 'GREAT', 'EAST', 'HALL'] +8188-269290-0033-2764: hyp=['BUT', 'MARJORIE', 'AND', 'AILEEN', 'HAD', 'ALREADY', 'DEPARTED', 'AND', 'LESLIE', 'AND', 'JANE', 'FOUND', 'THEMSELVES', 'AMONG', 'THE', 'LAST', 'STUDENTS', 'TO', 'ARRIVE', 'AT', 'THE', 'GREAT', 'EAST', 'HALL'] +8188-269290-0034-2765: ref=['MISS', 'LAUDERDALE', 'WAS', 'STANDING', 'WITH', 'THE', 'OTHER', 'TUTORS', 'AND', 'PRINCIPALS', 'OF', 'THE', 'DIFFERENT', 'HALLS', 'ON', 'A', 'RAISED', 'PLATFORM'] +8188-269290-0034-2765: hyp=['MISSUS', 'LORDADALE', 'WAS', 'STANDING', 'WITH', 'THE', 'OTHER', 'TUTORS', 'AND', 'PRINCIPALS', 'OF', 'THE', 'DIFFERENT', 'HALLS', 'ON', 'A', 'RAISED', 'PLATFORM'] +8188-269290-0035-2766: ref=['THEN', 'A', 'ROLL', 'CALL', 'WAS', 'GONE', 'THROUGH', 'BY', 'ONE', 'OF', 'THE', 'TUTORS', 'THE', 'ONLY', 'ABSENTEE', 'WAS', 'ANNIE', 'COLCHESTER'] +8188-269290-0035-2766: hyp=['THEN', 'A', 'RAW', 'CALL', 'WAS', 'GONE', 'THROUGH', 'BY', 'ONE', 'OF', 'THE', 'TUTORS', 'THE', 'ONLY', 'ABSENTEE', 'WAS', 'ANNIE', 'COLCHESTER'] +8188-269290-0036-2767: ref=['THE', 'PHYSICAL', 'PART', 'OF', 'YOUR', 'TRAINING', 'AND', 'ALSO', 'THE', 'MENTAL', 'PART', 'ARE', 'ABUNDANTLY', 'SUPPLIED', 'IN', 'THIS', 'GREAT', 'HOUSE', 'OF', 'LEARNING', 'SHE', 'CONTINUED', 'BUT', 'THE', 'SPIRITUAL', 'PART', 'IT', 'SEEMS', 'TO', 'ME', 'OUGHT', 'NOW', 'TO', 'BE', 'STRENGTHENED'] +8188-269290-0036-2767: hyp=['THE', 'PHYSICAL', 'PART', 'OF', 'YOUR', 'TRAINING', 'AND', 'ALSO', 'THE', 'MENTAL', 'PART', 'ARE', 'ABUNDANTLY', 'SUPPLIED', 'IN', 'THIS', 'GREAT', 'HOUSE', 'OF', 'LEARNING', 'SHE', 'CONTINUED', 'BUT', 'THE', 'SPIRITUAL', 'PART', 'IT', 'SEEMS', 'TO', 'ME', 'OUGHT', 'NOW', 'TO', 'BE', 'STRENGTHENED'] +8188-269290-0037-2768: ref=['HEAR', 'HEAR', 'AND', 'ONCE', 'AGAIN', 'HEAR'] +8188-269290-0037-2768: hyp=['HARE', 'HERE', 'AND', 'ONCE', 'AGAIN', 'HARE'] +8188-269290-0038-2769: ref=['SHE', 'UTTERED', 'HER', 'STRANGE', 'REMARK', 'STANDING', 'UP'] +8188-269290-0038-2769: hyp=['SHE', 'UTTERED', 'HER', 'STREAM', 'REMARK', 'STANDING', 'UP'] +8188-269290-0039-2770: ref=['MARJORIE', 'AND', 'EILEEN', 'WERE', 'CLOSE', 'TO', 'HER'] +8188-269290-0039-2770: hyp=['MARJORIE', 'AND', 'AILEEN', 'WERE', 'CLOSE', 'TO', 'HER'] +8188-269290-0040-2771: ref=['I', 'WILL', 'TALK', 'WITH', 'YOU', 'BELLE', 'ACHESON', 'PRESENTLY', 'SHE', 'SAID'] +8188-269290-0040-2771: hyp=['I', 'WILL', 'TALK', 'WITH', 'YOU', 'BELL', 'ARCHISON', 'PRESENTLY', 'SHE', 'SAID'] +8188-269290-0041-2772: ref=['THE', 'NAMES', 'OF', 'PROPOSED', 'MEMBERS', 'ARE', 'TO', 'BE', 'SUBMITTED', 'TO', 'ME', 'BEFORE', 'THIS', 'DAY', 'WEEK'] +8188-269290-0041-2772: hyp=['THE', 'NAMES', 'OF', 'THE', 'PROPOSED', 'MEMBERS', 'ARE', 'TO', 'BE', 'SUBMITTED', 'TO', 'ME', 'BEFORE', 'THIS', 'DAY', 'WEEK'] +8188-269290-0042-2773: ref=['AM', 'I', 'MY', "BROTHER'S", 'KEEPER'] +8188-269290-0042-2773: hyp=['AM', 'I', 'MY', "BROTHER'S", 'KEEPER'] +8188-269290-0043-2774: ref=['YOU', 'ASK', 'SHE', 'CONTINUED'] +8188-269290-0043-2774: hyp=['YOU', 'ASK', 'SHE', 'CONTINUED'] +8188-269290-0044-2775: ref=['GOD', 'ANSWERS', 'TO', 'EACH', 'OF', 'YOU', 'YOU', 'ARE'] +8188-269290-0044-2775: hyp=['GOD', 'ANSWERS', 'EACH', 'OF', 'YOU', 'YOU', 'ARE'] +8188-269290-0045-2776: ref=['THE', 'WORLD', 'SAYS', 'NO', 'I', 'AM', 'NOT', 'BUT', 'GOD', 'SAYS', 'YES', 'YOU', 'ARE'] +8188-269290-0045-2776: hyp=['THE', 'WORLD', 'SAYS', 'NO', 'I', 'AM', 'NOT', 'BUT', 'GOD', 'SAYS', 'YES', 'YOU', 'ARE'] +8188-269290-0046-2777: ref=['ALL', 'MEN', 'ARE', 'YOUR', 'BROTHERS'] +8188-269290-0046-2777: hyp=['ALL', 'MEN', 'ARE', 'YOUR', 'BROTHERS'] +8188-269290-0047-2778: ref=['FOR', 'ALL', 'WHO', 'SIN', 'ALL', 'WHO', 'SUFFER', 'YOU', 'ARE', 'TO', 'A', 'CERTAIN', 'EXTENT', 'RESPONSIBLE'] +8188-269290-0047-2778: hyp=['FOR', 'ALL', 'WHO', 'SIN', 'ALL', 'WHO', 'SUFFER', 'YOU', 'ARE', 'TO', 'A', 'CERTAIN', 'EXTENT', 'RESPONSIBLE'] +8188-269290-0048-2779: ref=['AFTER', 'THE', 'ADDRESS', 'THE', 'GIRLS', 'THEMSELVES', 'WERE', 'ENCOURAGED', 'TO', 'SPEAK', 'AND', 'A', 'VERY', 'ANIMATED', 'DISCUSSION', 'FOLLOWED'] +8188-269290-0048-2779: hyp=['AFTER', 'THE', 'ADDRESS', 'THE', 'GIRLS', 'THEMSELVES', 'WERE', 'ENCOURAGED', 'TO', 'SPEAK', 'AND', 'A', 'VERY', 'ANIMATED', 'DISCUSSION', 'FOLLOWED'] +8188-269290-0049-2780: ref=['IT', 'WAS', 'PAST', 'TEN', "O'CLOCK", 'WHEN', 'SHE', 'LEFT', 'THE', 'HALL'] +8188-269290-0049-2780: hyp=['IT', 'WAS', 'PAST', 'TEN', "O'CLOCK", 'WHEN', 'SHE', 'LEFT', 'THE', 'HALL'] +8188-269290-0050-2781: ref=['JUST', 'AS', 'SHE', 'WAS', 'DOING', 'SO', 'MISS', 'FRERE', 'CAME', 'UP'] +8188-269290-0050-2781: hyp=['JUST', 'AS', 'SHE', 'WAS', 'DOING', 'SO', 'MISS', 'FRERE', 'CAME', 'UP'] +8188-269290-0051-2782: ref=['ANNIE', 'COLCHESTER', 'IS', 'YOUR', 'ROOMFELLOW', 'IS', 'SHE', 'NOT', 'SHE', 'SAID'] +8188-269290-0051-2782: hyp=['ENNIE', 'COLCHESTER', 'IS', 'YOUR', 'ROOMFELLOW', 'IS', 'SHE', 'NOT', 'SHE', 'SAID'] +8188-269290-0052-2783: ref=['I', 'SEE', 'BY', 'YOUR', 'FACE', 'MISS', 'GILROY', 'THAT', 'YOU', 'ARE', 'DISTRESSED', 'ABOUT', 'SOMETHING', 'ARE', 'YOU', 'KEEPING', 'ANYTHING', 'BACK'] +8188-269290-0052-2783: hyp=['I', 'SEE', 'BY', 'YOUR', 'FACE', 'MISSUS', 'GILROY', 'THAT', 'YOU', 'ARE', 'DISTRESSED', 'ABOUT', 'SOMETHING', 'ARE', 'YOU', 'KEEPING', 'ANYTHING', 'BACK'] +8188-269290-0053-2784: ref=['I', 'AM', 'AFRAID', 'I', 'AM', 'REPLIED', 'LESLIE', 'DISTRESS', 'NOW', 'IN', 'HER', 'TONE'] +8188-269290-0053-2784: hyp=["I'M", 'AFRAID', 'I', 'AM', 'REPLIED', 'LIZZIE', 'DISTRESSED', 'NOW', 'IN', 'HER', 'TONE'] +8188-269290-0054-2785: ref=['I', 'MUST', 'SEE', 'HER', 'MYSELF', 'EARLY', 'IN', 'THE', 'MORNING', 'AND', 'I', 'AM', 'QUITE', 'SURE', 'THAT', 'NOTHING', 'WILL', 'SATISFY', 'MISS', 'LAUDERDALE', 'EXCEPT', 'A', 'VERY', 'AMPLE', 'APOLOGY', 'AND', 'A', 'FULL', 'EXPLANATION', 'OF', 'THE', 'REASON', 'WHY', 'SHE', 'ABSENTED', 'HERSELF'] +8188-269290-0054-2785: hyp=['I', 'MUST', 'SEE', 'HER', 'MYSELF', 'EARLY', 'IN', 'THE', 'MORNING', 'AND', 'I', 'AM', 'QUITE', 'SURE', 'THAT', 'NOTHING', 'WILL', 'SATISFY', 'MISSUS', 'LARDADAE', 'EXCEPT', 'A', 'VERY', 'AMPLE', 'APOLOGY', 'AND', 'A', 'FULL', 'EXPLANATION', 'OF', 'THE', 'REASON', 'WHY', 'SHE', 'ABSENTED', 'HERSELF'] +8188-269290-0055-2786: ref=['EXCUSES', 'MAKE', 'NO', 'DIFFERENCE'] +8188-269290-0055-2786: hyp=['EXCUSES', 'MAKE', 'NO', 'DIFFERENCE'] +8188-269290-0056-2787: ref=['THE', 'GIRL', 'WHO', 'BREAKS', 'THE', 'RULES', 'HAS', 'TO', 'BE', 'PUNISHED'] +8188-269290-0056-2787: hyp=['THE', 'GIRL', 'WHO', 'BREAKS', 'THE', 'RULES', 'HAS', 'TO', 'BE', 'PUNISHED'] +8188-269290-0057-2788: ref=['I', 'WILL', 'TELL', 'HER'] +8188-269290-0057-2788: hyp=['I', 'WILL', 'TELL', 'HER'] +8188-274364-0000-2789: ref=['THE', 'COMMONS', 'ALSO', 'VOTED', 'THAT', 'THE', 'NEW', 'CREATED', 'PEERS', 'OUGHT', 'TO', 'HAVE', 'NO', 'VOICE', 'IN', 'THIS', 'TRIAL', 'BECAUSE', 'THE', 'ACCUSATION', 'BEING', 'AGREED', 'TO', 'WHILE', 'THEY', 'WERE', 'COMMONERS', 'THEIR', 'CONSENT', 'TO', 'IT', 'WAS', 'IMPLIED', 'WITH', 'THAT', 'OF', 'ALL', 'THE', 'COMMONS', 'OF', 'ENGLAND'] +8188-274364-0000-2789: hyp=['THE', 'COMMONS', 'ALSO', 'VOTED', 'THAT', 'THE', 'NEW', 'CREATED', 'PEERS', 'OUGHT', 'TO', 'HAVE', 'NO', 'VOICE', 'IN', 'THIS', 'TRIAL', 'BECAUSE', 'THE', 'ACCUSATION', 'BEING', 'AGREED', 'TO', 'WHILE', 'THEY', 'WERE', 'COMMONERS', 'THEIR', 'CONSENT', 'TO', 'IT', 'WAS', 'IMPLIED', 'WITH', 'THAT', 'OF', 'ALL', 'THE', 'COMMONS', 'OF', 'ENGLAND'] +8188-274364-0001-2790: ref=['IN', 'THE', 'GOVERNMENT', 'OF', 'IRELAND', 'HIS', 'ADMINISTRATION', 'HAD', 'BEEN', 'EQUALLY', 'PROMOTIVE', 'OF', 'HIS', "MASTER'S", 'INTEREST', 'AND', 'THAT', 'OF', 'THE', 'SUBJECTS', 'COMMITTED', 'TO', 'HIS', 'CARE'] +8188-274364-0001-2790: hyp=['IN', 'THE', 'GOVERNMENT', 'OF', 'IRELAND', 'HIS', 'ADMINISTRATION', 'HAD', 'BEEN', 'EQUALLY', 'PROMOTIVE', 'OF', 'HIS', "MASTER'S", 'INTEREST', 'AND', 'THAT', 'OF', 'THE', 'SUBJECTS', 'COMMITTED', 'TO', 'HIS', 'CARE'] +8188-274364-0002-2791: ref=['THE', 'CASE', 'OF', 'LORD', 'MOUNTNORRIS', 'OF', 'ALL', 'THOSE', 'WHICH', 'WERE', 'COLLECTED', 'WITH', 'SO', 'MUCH', 'INDUSTRY', 'IS', 'THE', 'MOST', 'FLAGRANT', 'AND', 'THE', 'LEAST', 'EXCUSABLE'] +8188-274364-0002-2791: hyp=['THE', 'CASE', 'OF', 'LORD', 'MONTNORRIS', 'OF', 'ALL', 'THOSE', 'WHICH', 'WERE', 'CONNECTED', 'WITH', 'SO', 'MUCH', 'INDUSTRY', 'IS', 'THE', 'MOST', 'FLAGRANT', 'AND', 'THE', 'LEAST', 'EXCUSABLE'] +8188-274364-0003-2792: ref=['THE', 'COURT', 'WHICH', 'CONSISTED', 'OF', 'THE', 'CHIEF', 'OFFICERS', 'OF', 'THE', 'ARMY', 'FOUND', 'THE', 'CRIME', 'TO', 'BE', 'CAPITAL', 'AND', 'CONDEMNED', 'THAT', 'NOBLEMAN', 'TO', 'LOSE', 'HIS', 'HEAD'] +8188-274364-0003-2792: hyp=['THE', 'COURT', 'WHICH', 'CONSISTED', 'OF', 'THE', 'CHIEF', 'OFFICIALS', 'OF', 'THE', 'ARMY', 'FOUND', 'THE', 'CRIME', 'TO', 'BE', 'CAPITAL', 'AND', 'CONDEMNED', 'THAT', 'NOBLEMAN', 'TO', 'LOSE', 'HIS', 'HEAD'] +8188-274364-0004-2793: ref=['WHERE', 'THE', 'TOKEN', 'BY', 'WHICH', 'I', 'SHOULD', 'DISCOVER', 'IT'] +8188-274364-0004-2793: hyp=['WERE', 'THE', 'TOKEN', 'BY', 'WHICH', 'I', 'SHALL', 'DISCOVER', 'IT'] +8188-274364-0005-2794: ref=['IT', 'IS', 'NOW', 'FULL', 'TWO', 'HUNDRED', 'AND', 'FORTY', 'YEARS', 'SINCE', 'TREASONS', 'WERE', 'DEFINED', 'AND', 'SO', 'LONG', 'HAS', 'IT', 'BEEN', 'SINCE', 'ANY', 'MAN', 'WAS', 'TOUCHED', 'TO', 'THIS', 'EXTENT', 'UPON', 'THIS', 'CRIME', 'BEFORE', 'MYSELF'] +8188-274364-0005-2794: hyp=['IT', 'IS', 'NOW', 'FULL', 'TWO', 'HUNDRED', 'AND', 'FORTY', 'YEARS', 'SINCE', 'TREASONS', 'WERE', 'DEFINED', 'AND', 'SO', 'LONG', 'HAS', 'IT', 'BEEN', 'SINCE', 'ANY', 'MAN', 'WAS', 'TOUCHED', 'TO', 'THIS', 'EXTENT', 'UPON', 'THIS', 'CRIME', 'BEFORE', 'MYSELF'] +8188-274364-0006-2795: ref=['LET', 'US', 'NOT', 'TO', 'OUR', 'OWN', 'DESTRUCTION', 'AWAKE', 'THOSE', 'SLEEPING', 'LIONS', 'BY', 'RATTLING', 'UP', 'A', 'COMPANY', 'OF', 'OLD', 'RECORDS', 'WHICH', 'HAVE', 'LAIN', 'FOR', 'SO', 'MANY', 'AGES', 'BY', 'THE', 'WALL', 'FORGOTTEN', 'AND', 'NEGLECTED'] +8188-274364-0006-2795: hyp=['LET', 'US', 'NOT', 'TO', 'OUR', 'OWN', 'DESTRUCTION', 'AWAKE', 'THOSE', 'SLEEPING', 'LIONS', 'BY', 'RATTLING', 'UP', 'A', 'COMPANY', 'OF', 'OLD', 'RECORDS', 'WHICH', 'HAVE', 'LAIN', 'FOR', 'SO', 'MANY', 'AGES', 'BY', 'THE', 'WALL', 'FORGOTTEN', 'AND', 'NEGLECTED'] +8188-274364-0007-2796: ref=['HOWEVER', 'THESE', 'GENTLEMEN', 'AT', 'THE', 'BAR', 'SAY', 'THEY', 'SPEAK', 'FOR', 'THE', 'COMMONWEALTH', 'AND', 'THEY', 'BELIEVE', 'SO', 'YET', 'UNDER', 'FAVOR', 'IT', 'IS', 'I', 'WHO', 'IN', 'THIS', 'PARTICULAR', 'SPEAK', 'FOR', 'THE', 'COMMONWEALTH'] +8188-274364-0007-2796: hyp=['HOWEVER', 'THESE', 'GENTLEMEN', 'AT', 'THE', 'BAR', 'SAY', 'THEY', 'SPEAK', 'FOR', 'THE', 'COMMONWEALTH', 'AND', 'THEY', 'BELIEVE', 'SO', 'YET', 'UNDER', 'FAVOUR', 'IT', 'IS', 'I', 'WHO', 'IN', 'THIS', 'PARTICULAR', 'SPEAK', 'FOR', 'THE', 'COMMONWEALTH'] +8188-274364-0008-2797: ref=['MY', 'LORDS', 'I', 'HAVE', 'NOW', 'TROUBLED', 'YOUR', 'LORDSHIPS', 'A', 'GREAT', 'DEAL', 'LONGER', 'THAN', 'I', 'SHOULD', 'HAVE', 'DONE'] +8188-274364-0008-2797: hyp=['MY', 'LORDS', 'I', 'HAVE', 'NOW', 'TROUBLED', 'YOUR', 'LORDSHIP', 'A', 'GREAT', 'DEAL', 'LONGER', 'THAN', 'I', 'SHOULD', 'HAVE', 'DONE'] +8188-274364-0009-2798: ref=['YOUNG', 'VANE', 'FALLING', 'UPON', 'THIS', 'PAPER', 'OF', 'NOTES', 'DEEMED', 'THE', 'MATTER', 'OF', 'THE', 'UTMOST', 'IMPORTANCE', 'AND', 'IMMEDIATELY', 'COMMUNICATED', 'IT', 'TO', 'PYM', 'WHO', 'NOW', 'PRODUCED', 'THE', 'PAPER', 'BEFORE', 'THE', 'HOUSE', 'OF', 'COMMONS'] +8188-274364-0009-2798: hyp=['YOUNG', 'VAIN', 'FALLING', 'UPON', 'THIS', 'PAPER', 'OF', 'NOTES', 'DEEMED', 'THE', 'MATTER', 'OF', 'THE', 'UTMOST', 'IMPORTANCE', 'AND', 'IMMEDIATELY', 'COMMUNICATED', 'IT', 'TO', 'PYM', 'WHO', 'NOW', 'PRODUCED', 'THE', 'PAPER', 'BEFORE', 'THE', 'HOUSE', 'OF', 'COMMONS'] +8188-274364-0010-2799: ref=['THE', 'KING', 'PROPOSES', 'THIS', 'DIFFICULTY', 'BUT', 'HOW', 'CAN', 'I', 'UNDERTAKE', 'OFFENSIVE', 'WAR', 'IF', 'I', 'HAVE', 'NO', 'MORE', 'MONEY'] +8188-274364-0010-2799: hyp=['THE', 'KING', 'PROPOSES', 'THIS', 'DIFFICULTY', 'BUT', 'HOW', 'CAN', 'I', 'UNDERTAKE', 'OFFENSIVE', 'WAR', 'IF', 'I', 'HAVE', 'NO', 'MORE', 'MONEY'] +8188-274364-0011-2800: ref=['YOUR', 'MAJESTY', 'HAVING', 'TRIED', 'THE', 'AFFECTIONS', 'OF', 'YOUR', 'PEOPLE', 'YOU', 'ARE', 'ABSOLVED', 'AND', 'LOOSE', 'FROM', 'ALL', 'RULES', 'OF', 'GOVERNMENT', 'AND', 'MAY', 'DO', 'WHAT', 'POWER', 'WILL', 'ADMIT'] +8188-274364-0011-2800: hyp=['YOUR', 'MAJESTY', 'HAVING', 'TRIED', 'THE', 'AFFECTIONS', 'OF', 'YOUR', 'PEOPLE', 'YOU', 'ARE', 'ABSOLVED', 'AND', 'LOOSE', 'FROM', 'ALL', 'RULES', 'OF', 'GOVERNMENT', 'AND', 'MAY', 'DO', 'WHAT', 'POWER', 'WILL', 'ADMIT'] +8280-266249-0000-2801: ref=['OLD', 'MISTER', 'DINSMORE', 'HAD', 'ACCEPTED', 'A', 'PRESSING', 'INVITATION', 'FROM', 'HIS', 'GRANDDAUGHTER', 'AND', 'HER', 'HUSBAND', 'TO', 'JOIN', 'THE', 'PARTY', 'AND', 'WITH', 'THE', 'ADDITION', 'OF', 'SERVANTS', 'IT', 'WAS', 'A', 'LARGE', 'ONE'] +8280-266249-0000-2801: hyp=['OLD', 'MISTER', 'DINSMORE', 'HAD', 'ACCEPTED', 'A', 'PRESSING', 'INVITATION', 'FROM', 'HIS', 'GRANDDAUGHTER', 'AND', 'HER', 'HUSBAND', 'TO', 'JOIN', 'THE', 'PARTY', 'AND', 'WITH', 'THE', 'ADDITION', 'OF', 'SERVANTS', 'IT', 'WAS', 'A', 'LARGE', 'ONE'] +8280-266249-0001-2802: ref=['AS', 'THEY', 'WERE', 'IN', 'NO', 'HASTE', 'AND', 'THE', 'CONFINEMENT', 'OF', 'A', 'RAILROAD', 'CAR', 'WOULD', 'BE', 'VERY', 'IRKSOME', 'TO', 'THE', 'YOUNGER', 'CHILDREN', 'IT', 'HAD', 'BEEN', 'DECIDED', 'TO', 'MAKE', 'THE', 'JOURNEY', 'BY', 'WATER'] +8280-266249-0001-2802: hyp=['AS', 'THEY', 'WERE', 'IN', 'NO', 'HASTE', 'AND', 'THE', 'CONFINEMENT', 'OF', 'A', 'RAILROAD', 'CAR', 'WOULD', 'BE', 'VERY', 'IRKSOME', 'TO', 'THE', 'YOUNGER', 'CHILDREN', 'IT', 'HAD', 'BEEN', 'DECIDED', 'TO', 'MAKE', 'THE', 'JOURNEY', 'BY', 'WATER'] +8280-266249-0002-2803: ref=['THERE', 'WERE', 'NO', 'SAD', 'LEAVE', 'TAKINGS', 'TO', 'MAR', 'THEIR', 'PLEASURE', 'THE', 'CHILDREN', 'WERE', 'IN', 'WILD', 'SPIRITS', 'AND', 'ALL', 'SEEMED', 'CHEERFUL', 'AND', 'HAPPY', 'AS', 'THEY', 'SAT', 'OR', 'STOOD', 'UPON', 'THE', 'DECK', 'WATCHING', 'THE', 'RECEDING', 'SHORE', 'AS', 'THE', 'VESSEL', 'STEAMED', 'OUT', 'OF', 'THE', 'HARBOR'] +8280-266249-0002-2803: hyp=['THERE', 'WERE', 'NO', 'SAD', 'LEAVE', 'TAKINGS', 'TO', 'MAR', 'THEIR', 'PLEASURE', 'THE', 'CHILDREN', 'WERE', 'IN', 'WILD', 'SPIRITS', 'AND', 'ALL', 'SEEMED', 'CHEERFUL', 'AND', 'HAPPY', 'AS', 'THEY', 'SAT', 'OR', 'STOOD', 'UPON', 'THE', 'DECK', 'WATCHING', 'THE', 'RECEDING', 'SHORE', 'AS', 'THE', 'VESSEL', 'STEAMED', 'OUT', 'OF', 'THE', 'HARBOR'] +8280-266249-0003-2804: ref=['AT', 'LENGTH', 'THE', 'LAND', 'HAD', 'QUITE', 'DISAPPEARED', 'NOTHING', 'COULD', 'BE', 'SEEN', 'BUT', 'THE', 'SKY', 'OVERHEAD', 'AND', 'A', 'VAST', 'EXPANSE', 'OF', 'WATER', 'ALL', 'AROUND', 'AND', 'THE', 'PASSENGERS', 'FOUND', 'LEISURE', 'TO', 'TURN', 'THEIR', 'ATTENTION', 'UPON', 'EACH', 'OTHER'] +8280-266249-0003-2804: hyp=['AT', 'LENGTH', 'THE', 'LAND', 'HAD', 'QUITE', 'DISAPPEARED', 'NOTHING', 'COULD', 'BE', 'SEEN', 'BUT', 'THE', 'SKY', 'OVERHEAD', 'AND', 'A', 'VAST', 'EXPANSE', 'OF', 'WATER', 'ALL', 'AROUND', 'AND', 'THE', 'PASSENGERS', 'FOUND', 'LEISURE', 'TO', 'TURN', 'THEIR', 'ATTENTION', 'UPON', 'EACH', 'OTHER'] +8280-266249-0004-2805: ref=['THERE', 'ARE', 'SOME', 'NICE', 'LOOKING', 'PEOPLE', 'ON', 'BOARD', 'REMARKED', 'MISTER', 'TRAVILLA', 'IN', 'AN', 'UNDERTONE', 'TO', 'HIS', 'WIFE'] +8280-266249-0004-2805: hyp=['THERE', 'ARE', 'SOME', 'NICE', 'LOOKING', 'PEOPLE', 'ON', 'BOARD', 'REMARKED', 'MISTER', 'TRAVILLA', 'IN', 'AN', 'UNDERTONE', 'TO', 'HIS', 'WIFE'] +8280-266249-0005-2806: ref=['BESIDE', 'OURSELVES', 'ADDED', 'COUSIN', 'RONALD', 'LAUGHING'] +8280-266249-0005-2806: hyp=['BESIDES', 'OURSELVES', 'ADDED', 'COUSIN', 'RONALD', 'LAUGHING'] +8280-266249-0006-2807: ref=['YES', 'SHE', 'ANSWERED', 'THAT', 'LITTLE', 'GROUP', 'YONDER', 'A', 'YOUNG', 'MINISTER', 'AND', 'HIS', 'WIFE', 'AND', 'CHILD', 'I', 'SUPPOSE'] +8280-266249-0006-2807: hyp=['YES', 'SHE', 'ANSWERED', 'THAT', 'LITTLE', 'GROUP', 'YONDER', 'A', 'YOUNG', 'MINISTER', 'AND', 'HIS', 'WIFE', 'AND', 'CHILD', 'I', 'SUPPOSE'] +8280-266249-0007-2808: ref=['AND', 'WHAT', 'A', 'DEAR', 'LITTLE', 'FELLOW', 'HE', 'IS', 'JUST', 'ABOUT', 'THE', 'AGE', 'OF', 'OUR', 'HAROLD', 'I', 'SHOULD', 'JUDGE'] +8280-266249-0007-2808: hyp=['AND', 'WHAT', 'A', 'DEAR', 'LITTLE', 'FELLOW', 'HE', 'IS', 'JUST', 'ABOUT', 'THE', 'AGE', 'OF', 'OUR', 'HAROLD', 'I', 'SHOULD', 'JUDGE'] +8280-266249-0008-2809: ref=['DO', 'YOU', 'SON', 'WAS', 'THE', 'SMILING', 'REJOINDER'] +8280-266249-0008-2809: hyp=['DO', 'YOU', 'SON', 'WAS', 'THE', 'SMILING', 'REJOINDER'] +8280-266249-0009-2810: ref=['HE', 'CERTAINLY', 'LOOKS', 'LIKE', 'A', 'VERY', 'NICE', 'LITTLE', 'BOY'] +8280-266249-0009-2810: hyp=['HE', 'CERTAINLY', 'LOOKS', 'LIKE', 'A', 'VERY', 'NICE', 'LITTLE', 'BOY'] +8280-266249-0010-2811: ref=['SUPPOSE', 'YOU', 'AND', 'HE', 'SHAKE', 'HANDS', 'FRANK'] +8280-266249-0010-2811: hyp=['SUPPOSE', 'YOU', 'AND', 'HE', 'SHAKE', 'HANDS', 'FRANK'] +8280-266249-0011-2812: ref=['I', 'DO', 'INDEED', 'THOUGH', 'PROBABLY', 'COMPARATIVELY', 'FEW', 'ARE', 'AWARE', 'THAT', 'TOBACCO', 'IS', 'THE', 'CAUSE', 'OF', 'THEIR', 'AILMENTS'] +8280-266249-0011-2812: hyp=['I', 'DO', 'INDEED', 'THOUGH', 'PROBABLY', 'COMPARATIVELY', 'FEW', 'ARE', 'AWARE', 'THAT', 'TOBACCO', 'IS', 'THE', 'CAUSE', 'OF', 'THEIR', 'AILMENTS'] +8280-266249-0012-2813: ref=['DOUBTLESS', 'THAT', 'IS', 'THE', 'CASE', 'REMARKED', 'MISTER', 'DINSMORE'] +8280-266249-0012-2813: hyp=['DOUBTLESS', 'THAT', 'IS', 'THE', 'CASE', 'REMARKED', 'MISTER', 'DINSMORE'] +8280-266249-0013-2814: ref=['WITH', 'ALL', 'MY', 'HEART', 'IF', 'YOU', 'WILL', 'STEP', 'INTO', 'THE', "GENTLEMEN'S", 'CABIN', 'WHERE', "THERE'S", 'A', 'LIGHT'] +8280-266249-0013-2814: hyp=['WITH', 'ALL', 'MY', 'HEART', 'IF', 'YOU', 'WILL', 'STEP', 'INTO', 'THE', "GENTLEMAN'S", 'CABIN', 'WHERE', "THERE'S", 'A', 'LIGHT'] +8280-266249-0014-2815: ref=['HE', 'LED', 'THE', 'WAY', 'THE', 'OTHERS', 'ALL', 'FOLLOWING', 'AND', 'TAKING', 'OUT', 'A', 'SLIP', 'OF', 'PAPER', 'READ', 'FROM', 'IT', 'IN', 'A', 'DISTINCT', 'TONE', 'LOUD', 'ENOUGH', 'TO', 'BE', 'HEARD', 'BY', 'THOSE', 'ABOUT', 'HIM', 'WITHOUT', 'DISTURBING', 'THE', 'OTHER', 'PASSENGERS'] +8280-266249-0014-2815: hyp=['HE', 'LED', 'THE', 'WAY', 'THE', 'OTHERS', 'ALL', 'FOLLOWING', 'AND', 'TAKING', 'OUT', 'A', 'SLIP', 'OF', 'PAPER', 'READ', 'FROM', 'IT', 'IN', 'A', 'DISTINCT', 'TONE', 'LOUD', 'ENOUGH', 'TO', 'BE', 'HEARD', 'BY', 'THOSE', 'ALL', 'ABOUT', 'HIM', 'WITHOUT', 'DISTURBING', 'THE', 'OTHER', 'PASSENGERS'] +8280-266249-0015-2816: ref=['ONE', 'DROP', 'OF', 'NICOTINE', 'EXTRACT', 'OF', 'TOBACCO', 'PLACED', 'ON', 'THE', 'TONGUE', 'OF', 'A', 'DOG', 'WILL', 'KILL', 'HIM', 'IN', 'A', 'MINUTE', 'THE', 'HUNDREDTH', 'PART', 'OF', 'A', 'GRAIN', 'PICKED', 'UNDER', 'THE', 'SKIN', 'OF', 'A', "MAN'S", 'ARM', 'WILL', 'PRODUCE', 'NAUSEA', 'AND', 'FAINTING'] +8280-266249-0015-2816: hyp=['ONE', 'DROP', 'OF', 'NICOTINE', 'EXTRACTED', 'TOBACCO', 'PLACED', 'ON', 'THE', 'TONGUE', 'OF', 'A', 'DOG', 'WILL', 'KILL', 'HIM', 'IN', 'A', 'MINUTE', 'THE', 'HUNDREDTH', 'PART', 'OF', 'A', 'GRAIN', 'PRICKED', 'UNDER', 'THE', 'SKIN', 'OF', 'A', "MAN'S", 'ARM', 'WILL', 'PRODUCE', 'NAUSEA', 'AND', 'FAINTING'] +8280-266249-0016-2817: ref=['THE', 'HALF', 'DOZEN', 'CIGARS', 'WHICH', 'MOST', 'SMOKERS', 'USE', 'A', 'DAY', 'CONTAIN', 'SIX', 'OR', 'SEVEN', 'GRAINS', 'ENOUGH', 'IF', 'CONCENTRATED', 'AND', 'ABSORBED', 'TO', 'KILL', 'THREE', 'MEN', 'AND', 'A', 'POUND', 'OF', 'TOBACCO', 'ACCORDING', 'TO', 'ITS', 'QUALITY', 'CONTAINS', 'FROM', 'ONE', 'QUARTER', 'TO', 'ONE', 'AND', 'A', 'QUARTER', 'OUNCES'] +8280-266249-0016-2817: hyp=['THE', 'HALF', 'DOZEN', 'CIGARS', 'WHICH', 'MOST', 'SMOKERS', 'USE', 'A', 'DAY', 'CONTAIN', 'SIX', 'OR', 'SEVEN', 'GRAINS', 'ENOUGH', 'IF', 'CONCENTRATED', 'AND', 'ABSORBED', 'TO', 'KILL', 'THREE', 'MEN', 'AND', 'A', 'POUND', 'OF', 'TOBACCO', 'ACCORDING', 'TO', 'ITS', 'QUALITY', 'CONTAINS', 'FROM', 'ONE', 'QUARTER', 'TO', 'ONE', 'AND', 'A', 'QUARTER', 'OUNCES'] +8280-266249-0017-2818: ref=['IS', 'IT', 'STRANGE', 'THEN', 'THAT', 'SMOKERS', 'AND', 'CHEWERS', 'HAVE', 'A', 'THOUSAND', 'AILMENTS'] +8280-266249-0017-2818: hyp=['IS', 'IT', 'STRANGE', 'THEN', 'THAT', 'SMOKERS', 'AND', 'CHEWERS', 'HAVE', 'A', 'THOUSAND', 'AILMENTS'] +8280-266249-0018-2819: ref=['THAT', 'THE', 'FRENCH', 'POLYTECHNIC', 'INSTITUTE', 'HAD', 'TO', 'PROHIBIT', 'ITS', 'USE', 'ON', 'ACCOUNT', 'OF', 'ITS', 'EFFECTS', 'ON', 'THE', 'MIND'] +8280-266249-0018-2819: hyp=['THAT', 'THE', 'FRENCH', 'POLYTECHNIC', 'INSTITUTE', 'HAD', 'TO', 'PROHIBIT', 'ITS', 'ILL', 'USE', 'ON', 'ACCOUNT', 'OF', 'ITS', 'EFFECTS', 'UPON', 'THE', 'MINE'] +8280-266249-0019-2820: ref=['NOTICE', 'THE', 'MULTITUDE', 'OF', 'SUDDEN', 'DEATHS', 'AND', 'SEE', 'HOW', 'MANY', 'ARE', 'SMOKERS', 'AND', 'CHEWERS'] +8280-266249-0019-2820: hyp=['NOTICE', 'THE', 'MULTITUDE', 'OF', 'SUDDEN', 'DEATHS', 'AND', 'SEE', 'HOW', 'MANY', 'ARE', 'SMOKERS', 'AND', 'CHEWERS'] +8280-266249-0020-2821: ref=['IN', 'A', 'SMALL', 'COUNTRY', 'TOWN', 'SEVEN', 'OF', 'THESE', 'MYSTERIOUS', 'PROVIDENCES', 'OCCURRED', 'WITHIN', 'THE', 'CIRCUIT', 'OF', 'A', 'MILE', 'ALL', 'DIRECTLY', 'TRACEABLE', 'TO', 'TOBACCO', 'AND', 'ANY', 'PHYSICIAN', 'ON', 'A', 'FEW', 'MOMENTS', 'REFLECTION', 'CAN', 'MATCH', 'THIS', 'FACT', 'BY', 'HIS', 'OWN', 'OBSERVATION'] +8280-266249-0020-2821: hyp=['IN', 'A', 'SMALL', 'COUNTRY', 'TOWN', 'SEVEN', 'OF', 'THESE', 'MYSTERIOUS', 'PROVIDENCES', 'OCCURRED', 'WITHIN', 'THE', 'CIRCUIT', 'OF', 'A', 'MILE', 'ALL', 'DIRECTLY', 'TRACEABLE', 'TO', 'TOBACCO', 'AND', 'ANY', 'PHYSICIAN', 'ON', 'A', 'FEW', 'MOMENTS', 'REFLECTION', 'CAN', 'MATCH', 'THIS', 'FACT', 'BY', 'HIS', 'OWN', 'OBSERVATION'] +8280-266249-0021-2822: ref=['AND', 'THEN', 'SUCH', 'POWERFUL', 'ACIDS', 'PRODUCE', 'INTENSE', 'IRRITATION', 'AND', 'THIRST', 'THIRST', 'WHICH', 'WATER', 'DOES', 'NOT', 'QUENCH'] +8280-266249-0021-2822: hyp=['AND', 'THEN', 'SUCH', 'POWERFUL', 'ACIDS', 'PRODUCE', 'INTENSE', 'IRRITATION', 'AND', 'THIRST', 'THIRST', 'WHICH', 'WATER', 'DOES', 'NOT', 'QUENCH'] +8280-266249-0022-2823: ref=['HENCE', 'A', 'RESORT', 'TO', 'CIDER', 'AND', 'BEER'] +8280-266249-0022-2823: hyp=['HENCE', 'A', 'RESORT', 'TO', 'CIDER', 'AND', 'BEER'] +8280-266249-0023-2824: ref=['NO', 'SIR', 'WHAT', 'KNOW', 'YE', 'NOT', 'THAT', 'YOUR', 'BODY', 'IS', 'THE', 'TEMPLE', 'OF', 'THE', 'HOLY', 'GHOST', 'WHICH', 'IS', 'IN', 'YOU', 'WHICH', 'YE', 'HAVE', 'OF', 'GOD', 'AND', 'YE', 'ARE', 'NOT', 'YOUR', 'OWN'] +8280-266249-0023-2824: hyp=['NO', 'SIR', 'WHAT', 'KNOW', 'YE', 'NOT', 'THAT', 'YOUR', 'BODY', 'IS', 'THE', 'TEMPLE', 'OF', 'THE', 'HOLY', 'GHOST', 'WHICH', 'IS', 'IN', 'YOU', 'WHICH', 'YE', 'HAVE', 'OF', 'GOD', 'AND', 'YE', 'ARE', 'NOT', 'YOUR', 'OWN'] +8280-266249-0024-2825: ref=['FOR', 'YE', 'ARE', 'BOUGHT', 'WITH', 'A', 'PRICE', 'THEREFORE', 'GLORIFY', 'GOD', 'IN', 'YOUR', 'BODY', 'AND', 'IN', 'YOUR', 'SPIRIT', 'WHICH', 'ARE', "GOD'S"] +8280-266249-0024-2825: hyp=['FOR', 'YOU', 'ARE', 'BOUGHT', 'WITH', 'A', 'PRICE', 'THEREFORE', 'GLORIFY', 'GOD', 'IN', 'YOUR', 'BODY', 'AND', 'IN', 'YOUR', 'SPIRIT', 'WHICH', 'ARE', 'GODS'] +8280-266249-0025-2826: ref=['WE', 'CERTAINLY', 'HAVE', 'NO', 'RIGHT', 'TO', 'INJURE', 'OUR', 'BODIES', 'EITHER', 'BY', 'NEGLECT', 'OR', 'SELF', 'INDULGENCE'] +8280-266249-0025-2826: hyp=['WE', 'CERTAINLY', 'HAVE', 'NO', 'RIGHT', 'TO', 'INJURE', 'OUR', 'BODIES', 'EITHER', 'BY', 'NEGLECT', 'OR', 'SELF', 'INDULGENCE'] +8280-266249-0026-2827: ref=['AND', 'AGAIN', 'I', 'BESEECH', 'YOU', 'THEREFORE', 'BRETHREN', 'BY', 'THE', 'MERCIES', 'OF', 'GOD', 'THAT', 'YE', 'PRESENT', 'YOUR', 'BODIES', 'A', 'LIVING', 'SACRIFICE', 'HOLY', 'ACCEPTABLE', 'UNTO', 'GOD', 'WHICH', 'IS', 'YOUR', 'REASONABLE', 'SERVICE'] +8280-266249-0026-2827: hyp=['AND', 'AGAIN', 'I', 'BESEECH', 'YOU', 'THEREFORE', 'BRETHREN', 'BY', 'THE', 'MERCIES', 'OF', 'GOD', 'THAT', 'YE', 'PRESENT', 'YOUR', 'BODIES', 'A', 'LIVING', 'SACRIFICE', 'WHOLLY', 'ACCEPTABLE', 'UNTO', 'GOD', 'WHICH', 'IS', 'YOUR', 'REASONABLE', 'SERVICE'] +8280-266249-0027-2828: ref=['IT', 'MUST', 'REQUIRE', 'A', 'GOOD', 'DEAL', 'OF', 'RESOLUTION', 'FOR', 'ONE', 'WHO', 'HAS', 'BECOME', 'FOND', 'OF', 'THE', 'INDULGENCE', 'TO', 'GIVE', 'IT', 'UP', 'REMARKED', 'MISTER', 'DALY'] +8280-266249-0027-2828: hyp=['IT', 'MUST', 'REQUIRE', 'A', 'GOOD', 'DEAL', 'OF', 'RESOLUTION', 'FOR', 'ONE', 'WHO', 'HAS', 'BECOME', 'FOND', 'OF', 'THE', 'INDULGENCE', 'TO', 'GIVE', 'IT', 'UP', 'REMARKED', 'MISTER', 'DALEY'] +8280-266249-0028-2829: ref=['NO', 'DOUBT', 'NO', 'DOUBT', 'RETURNED', 'MISTER', 'LILBURN', 'BUT', 'IF', 'THY', 'RIGHT', 'EYE', 'OFFEND', 'THEE', 'PLUCK', 'IT', 'OUT', 'AND', 'CAST', 'IT', 'FROM', 'THEE', 'FOR', 'IT', 'IS', 'PROFITABLE', 'FOR', 'THEE', 'THAT', 'ONE', 'OF', 'THY', 'MEMBERS', 'SHOULD', 'PERISH', 'AND', 'NOT', 'THAT', 'THY', 'WHOLE', 'BODY', 'SHOULD', 'BE', 'CAST', 'INTO', 'HELL'] +8280-266249-0028-2829: hyp=['NO', 'DOUBT', 'NO', 'DOUBT', 'RETURNED', 'MISTER', 'LOWBURN', 'BUT', 'IF', 'THY', 'RIGHT', 'EYE', 'OFFEND', 'THEE', 'PLUCK', 'IT', 'OUT', 'AND', 'CAST', 'IT', 'FROM', 'THEE', 'FOR', 'IT', 'IS', 'PROFITABLE', 'FOR', 'THEE', 'THAT', 'ONE', 'OF', 'THY', 'MEMBERS', 'SHOULD', 'PERISH', 'AND', 'NOT', 'THAT', 'THY', 'WHOLE', 'BODY', 'SHOULD', 'BE', 'CAST', 'INTO', 'HELL'] +8280-266249-0029-2830: ref=['THERE', 'WAS', 'A', 'PAUSE', 'BROKEN', 'BY', 'YOUNG', 'HORACE', 'WHO', 'HAD', 'BEEN', 'WATCHING', 'A', 'GROUP', 'OF', 'MEN', 'GATHERED', 'ABOUT', 'A', 'TABLE', 'AT', 'THE', 'FURTHER', 'END', 'OF', 'THE', 'ROOM'] +8280-266249-0029-2830: hyp=['THERE', 'WAS', 'A', 'PAUSE', 'BROKEN', 'BY', 'YOUNG', 'HORACE', 'WHO', 'HAD', 'BEEN', 'WATCHING', 'A', 'GROUP', 'OF', 'MEN', 'GATHERED', 'ABOUT', 'A', 'TABLE', 'AT', 'THE', 'FURTHER', 'END', 'OF', 'THE', 'ROOM'] +8280-266249-0030-2831: ref=['THEY', 'ARE', 'GAMBLING', 'YONDER', 'AND', "I'M", 'AFRAID', 'THAT', 'YOUNG', 'FELLOW', 'IS', 'BEING', 'BADLY', 'FLEECED', 'BY', 'THAT', 'MIDDLE', 'AGED', 'MAN', 'OPPOSITE'] +8280-266249-0030-2831: hyp=['THEY', 'ARE', 'GAMBLING', 'YONDER', 'AND', "I'M", 'AFRAID', 'THAT', 'YOUNG', 'FELLOW', 'IS', 'BEING', 'BADLY', 'FLEECED', 'BY', 'THE', 'MIDDLE', 'AGED', 'MAN', 'OPPOSITE'] +8280-266249-0031-2832: ref=['THE', 'EYES', 'OF', 'THE', 'WHOLE', 'PARTY', 'WERE', 'AT', 'ONCE', 'TURNED', 'IN', 'THAT', 'DIRECTION'] +8280-266249-0031-2832: hyp=['THE', 'EYES', 'OF', 'THE', 'WHOLE', 'PARTY', 'WERE', 'AT', 'ONCE', 'TURNED', 'IN', 'THAT', 'DIRECTION'] +8280-266249-0032-2833: ref=['NO', 'SIR', 'HE', 'IS', 'NOT', 'HERE'] +8280-266249-0032-2833: hyp=['NO', 'SIR', 'HE', 'IS', 'NOT', 'HERE'] +8280-266249-0033-2834: ref=['AND', 'THE', 'DOOR', 'WAS', 'SLAMMED', 'VIOLENTLY', 'TO'] +8280-266249-0033-2834: hyp=['AND', 'THE', 'DOOR', 'WAS', 'SLAMMED', 'VIOLENTLY', 'TO'] +8280-266249-0034-2835: ref=['NOW', 'THE', 'VOICE', 'CAME', 'FROM', 'THE', 'SKYLIGHT', 'OVERHEAD', 'APPARENTLY', 'AND', 'WITH', 'A', 'FIERCE', 'IMPRECATION', 'THE', 'IRATE', 'GAMESTER', 'RUSHED', 'UPON', 'DECK', 'AND', 'RAN', 'HITHER', 'AND', 'THITHER', 'IN', 'SEARCH', 'OF', 'HIS', 'TORMENTOR'] +8280-266249-0034-2835: hyp=['NOW', 'THE', 'VOICE', 'CAME', 'FROM', 'THE', 'SKYLIGHT', 'OVERHEAD', 'APPARENTLY', 'AND', 'WITH', 'A', 'FIERCE', 'IMPRECATION', 'THE', 'IRATE', 'GAMESTER', 'RUSHED', 'UPON', 'DECK', 'AND', 'RAN', 'HITHER', 'AND', 'THITHER', 'IN', 'SEARCH', 'OF', 'HIS', 'TORMENTOR'] +8280-266249-0035-2836: ref=['HIS', 'VICTIM', 'WHO', 'HAD', 'BEEN', 'LOOKING', 'ON', 'DURING', 'THE', 'LITTLE', 'SCENE', 'AND', 'LISTENING', 'TO', 'THE', 'MYSTERIOUS', 'VOICE', 'IN', 'SILENT', 'WIDE', 'EYED', 'WONDER', 'AND', 'FEAR', 'NOW', 'ROSE', 'HASTILY', 'HIS', 'FACE', 'DEATHLY', 'PALE', 'WITH', 'TREMBLING', 'HANDS', 'GATHERED', 'UP', 'THE', 'MONEY', 'HE', 'HAD', 'STAKED', 'AND', 'HURRYING', 'INTO', 'HIS', 'STATE', 'ROOM', 'LOCKED', 'HIMSELF', 'IN'] +8280-266249-0035-2836: hyp=['HIS', 'VICTIM', 'WHO', 'HAD', 'BEEN', 'LOOKING', 'ON', 'DURING', 'THE', 'LITTLE', 'SCENE', 'AND', 'LISTENING', 'TO', 'THE', 'MYSTERIOUS', 'VOICE', 'IN', 'SILENT', 'WIDE', 'EYED', 'WONDER', 'AND', 'FEAR', 'NOW', 'ROSE', 'HASTILY', 'HIS', 'FACE', 'DEATHLY', 'PALE', 'WITH', 'TREMBLING', 'HANDS', 'GATHERED', 'UP', 'THE', 'MONEY', 'HE', 'HAD', 'STAKED', 'AND', 'HURRYING', 'TO', 'HIS', 'STATEROOM', 'LOCKED', 'HIMSELF', 'IN'] +8280-266249-0036-2837: ref=['WHAT', 'DOES', 'IT', 'MEAN', 'CRIED', 'ONE'] +8280-266249-0036-2837: hyp=['WHAT', 'DOES', 'IT', 'MEAN', 'CRIED', 'ONE'] +8280-266249-0037-2838: ref=['A', 'VENTRILOQUIST', 'ABOARD', 'OF', 'COURSE', 'RETURNED', 'ANOTHER', "LET'S", 'FOLLOW', 'AND', 'SEE', 'THE', 'FUN'] +8280-266249-0037-2838: hyp=['A', 'VENTRILOQUEST', 'OF', 'BOARD', 'OF', 'COURSE', 'RETURNED', 'ANOTHER', "LET'S", 'FOLLOW', 'AND', 'SEE', 'THE', 'FUN'] +8280-266249-0038-2839: ref=['I', 'WONDER', 'WHICH', 'OF', 'US', 'IT', 'IS', 'REMARKED', 'THE', 'FIRST', 'LOOKING', 'HARD', 'AT', 'OUR', 'PARTY', 'I', "DON'T", 'KNOW', 'BUT', 'COME', 'ON'] +8280-266249-0038-2839: hyp=['I', 'WONDER', 'WHICH', 'OF', 'US', 'IT', 'IS', 'REMARKED', 'THE', 'FIRST', 'LOOKING', 'HARD', 'AT', 'OUR', 'PARTY', 'I', "DON'T", 'KNOW', 'BUT', 'COME', 'ON'] +8280-266249-0039-2840: ref=['THAT', 'FELLOW', 'NICK', 'WARD', 'IS', 'A', 'NOTED', 'BLACKLEG', 'AND', 'RUFFIAN', 'HAD', 'HIS', 'NOSE', 'BROKEN', 'IN', 'A', 'FIGHT', 'AND', 'IS', 'SENSITIVE', 'ON', 'THE', 'SUBJECT', 'WAS', 'CHEATING', 'OF', 'COURSE'] +8280-266249-0039-2840: hyp=['THAT', 'FELLOW', 'NICK', 'WARD', 'IS', 'A', 'NOTED', 'BLACK', 'LEG', 'AND', 'RUFFIAN', 'HAD', 'HIS', 'NOSE', 'BROKEN', 'IN', 'A', 'FIGHT', 'AND', 'IS', 'SENSITIVE', 'ON', 'THE', 'SUBJECT', 'WAS', 'CHEATING', 'OF', 'COURSE'] +8280-266249-0040-2841: ref=['WHO', 'ASKED', 'THE', 'MATE', "I'VE", 'SEEN', 'NONE', 'UP', 'HERE', 'THOUGH', 'THERE', 'ARE', 'SOME', 'IN', 'THE', 'STEERAGE'] +8280-266249-0040-2841: hyp=['WHO', 'ASKED', 'THE', 'MATE', "I'VE", 'SEEN', 'NO', 'ONE', 'UP', 'HERE', 'THOUGH', 'THERE', 'ARE', 'SOME', 'IN', 'THE', 'STEERAGE'] +8280-266249-0041-2842: ref=['THEY', 'HEARD', 'HIM', 'IN', 'SILENCE', 'WITH', 'A', 'COOL', 'PHLEGMATIC', 'INDIFFERENCE', 'MOST', 'EXASPERATING', 'TO', 'ONE', 'IN', 'HIS', 'PRESENT', 'MOOD'] +8280-266249-0041-2842: hyp=['THEY', 'HEARD', 'HIM', 'IN', 'SILENCE', 'WITH', 'A', 'COOL', 'PHLEGMATIC', 'INDIFFERENCE', 'MOST', 'EXASPERATING', 'TO', 'ONE', 'IN', 'HIS', 'PRESENT', 'MOOD'] +8280-266249-0042-2843: ref=['A', 'MAN', 'OF', 'GIANT', 'SIZE', 'AND', 'HERCULEAN', 'STRENGTH', 'HAD', 'LAID', 'ASIDE', 'HIS', 'PIPE', 'AND', 'SLOWLY', 'RISING', 'TO', 'HIS', 'FEET', 'SEIZED', 'THE', 'SCOUNDREL', 'IN', 'HIS', 'POWERFUL', 'GRASP'] +8280-266249-0042-2843: hyp=['A', 'MAN', 'OF', 'GIANT', 'SIZE', 'AND', 'HERCULEAN', 'STRENGTH', 'HAD', 'LAID', 'ASIDE', 'HIS', 'PIPE', 'AND', 'SLOWLY', 'RISING', 'TO', 'HIS', 'FEET', 'SEIZED', 'THE', 'SCOUNDREL', 'IN', 'HIS', 'POWERFUL', 'GRASP'] +8280-266249-0043-2844: ref=['LET', 'ME', 'GO', 'YELLED', 'WARD', 'MAKING', 'A', 'DESPERATE', 'EFFORT', 'TO', 'FREE', 'HIS', 'ARMS'] +8280-266249-0043-2844: hyp=['LET', 'ME', 'GO', 'YELLED', 'WARD', 'MAKING', 'A', 'DESPERATE', 'EFFORT', 'TO', 'FREE', 'HIS', 'ARMS'] +8280-266249-0044-2845: ref=['I', 'DINKS', 'NO', 'I', 'DINKS', 'I', 'DEACH', 'YOU', 'VON', 'LESSON', 'RETURNED', 'HIS', 'CAPTOR', 'NOT', 'RELAXING', 'HIS', 'GRASP', 'IN', 'THE', 'LEAST'] +8280-266249-0044-2845: hyp=['I', 'DINKS', 'NO', 'DINKS', 'I', 'DID', 'YOU', 'VUN', 'LESSON', 'RETURNED', 'HIS', 'CAPTOR', 'NOT', 'RELAXING', 'HIS', 'GRASP', 'IN', 'THE', 'LEAST'] +8280-266249-0045-2846: ref=['THE', 'GERMAN', 'RELEASED', 'HIS', 'PRISONER', 'AND', 'THE', 'LATTER', 'SLUNK', 'AWAY', 'WITH', 'MUTTERED', 'THREATS', 'AND', 'IMPRECATIONS', 'UPON', 'THE', 'HEAD', 'OF', 'HIS', 'TORMENTOR'] +8280-266249-0045-2846: hyp=['THE', 'GERMAN', 'RELEASED', 'HIS', 'PRISONER', 'AND', 'THE', 'LATTER', 'SLUNK', 'AWAY', 'WITH', 'MUTTERED', 'THREATS', 'AND', 'IMPRECATIONS', 'UPON', 'THE', 'HEAD', 'OF', 'HIS', 'TORMENTOR'] +8280-266249-0046-2847: ref=['MISTER', 'LILBURN', 'AND', 'MISTER', 'DALY', 'EACH', 'AT', 'A', 'DIFFERENT', 'TIME', 'SOUGHT', 'OUT', 'THE', 'YOUNG', 'MAN', "WARD'S", 'INTENDED', 'VICTIM', 'AND', 'TRIED', 'TO', 'INFLUENCE', 'HIM', 'FOR', 'GOOD'] +8280-266249-0046-2847: hyp=['MISTER', 'LILBURN', 'AND', 'MISTER', 'DALY', 'EACH', 'AT', 'A', 'DIFFERENT', 'TIME', 'SOUGHT', 'OUT', 'THE', 'YOUNG', 'MAN', "WORD'S", 'INTENDED', 'VICTIM', 'AND', 'TRIED', 'TO', 'INFLUENCE', 'HIM', 'FOR', 'GOOD'] +8280-266249-0047-2848: ref=['YET', 'THERE', 'WAS', 'GAMBLING', 'AGAIN', 'THE', 'SECOND', 'NIGHT', 'BETWEEN', 'WARD', 'AND', 'SEVERAL', 'OTHERS', 'OF', 'HIS', 'PROFESSION'] +8280-266249-0047-2848: hyp=['YET', 'THERE', 'WAS', 'GAMBLING', 'AGAIN', 'THE', 'SECOND', 'NIGHT', 'BETWEEN', 'WARD', 'AND', 'SEVERAL', 'OTHERS', 'OF', 'HIS', 'PROFESSION'] +8280-266249-0048-2849: ref=['THEY', 'KEPT', 'IT', 'UP', 'TILL', 'AFTER', 'MIDNIGHT'] +8280-266249-0048-2849: hyp=['THEY', 'KEPT', 'IT', 'UP', 'TILL', 'AFTER', 'MIDNIGHT'] +8280-266249-0049-2850: ref=['THEN', 'MISTER', 'LILBURN', 'WAKING', 'FROM', 'HIS', 'FIRST', 'SLEEP', 'IN', 'A', 'STATEROOM', 'NEAR', 'BY', 'THOUGHT', 'HE', 'WOULD', 'BREAK', 'IT', 'UP', 'ONCE', 'MORE'] +8280-266249-0049-2850: hyp=['THEN', 'MISTER', 'LOWBORN', 'WAKING', 'FROM', 'HIS', 'FIRST', 'SLEEP', 'IN', 'A', 'STATEROOM', 'NEAR', 'BY', 'THOUGHT', 'HE', 'WOULD', 'BREAK', 'IT', 'UP', 'ONCE', 'MORE'] +8280-266249-0050-2851: ref=['AN', 'INTENSE', 'VOICELESS', 'EXCITEMENT', 'POSSESSED', 'THE', 'PLAYERS', 'FOR', 'THE', 'GAME', 'WAS', 'A', 'CLOSE', 'ONE', 'AND', 'THE', 'STAKES', 'WERE', 'VERY', 'HEAVY'] +8280-266249-0050-2851: hyp=['AN', 'INTENSE', 'VOICELESS', 'EXCITEMENT', 'POSSESSED', 'THE', 'PLAYERS', 'FOR', 'THE', 'GAME', 'WAS', 'A', 'CLOSE', 'ONE', 'AND', 'THE', 'STAKES', 'WERE', 'VERY', 'HEAVY'] +8280-266249-0051-2852: ref=['THEY', 'BENT', 'EAGERLY', 'OVER', 'THE', 'BOARD', 'EACH', 'WATCHING', 'WITH', 'FEVERISH', 'ANXIETY', 'HIS', "COMPANION'S", 'MOVEMENTS', 'EACH', 'CASTING', 'NOW', 'AND', 'AGAIN', 'A', 'GLOATING', 'EYE', 'UPON', 'THE', 'HEAP', 'OF', 'GOLD', 'AND', 'GREENBACKS', 'THAT', 'LAY', 'BETWEEN', 'THEM', 'AND', 'AT', 'TIMES', 'HALF', 'STRETCHING', 'OUT', 'HIS', 'HAND', 'TO', 'CLUTCH', 'IT'] +8280-266249-0051-2852: hyp=['THEY', 'BENT', 'EAGERLY', 'OVER', 'THE', 'BOARD', 'EACH', 'WATCHING', 'WITH', 'FEVERISH', 'ANXIETY', 'HIS', "COMPANION'S", 'MOVEMENTS', 'EACH', 'CASTING', 'NOW', 'AND', 'AGAIN', 'A', 'GLOATING', 'EYE', 'UPON', 'THE', 'HEAP', 'OF', 'GOLD', 'AND', 'GREEN', 'BACKS', 'THAT', 'LAY', 'BETWEEN', 'THEM', 'AND', 'AT', 'TIMES', 'HALF', 'STRETCHING', 'OUT', 'HIS', 'HAND', 'TO', 'CLUTCH', 'IT'] +8280-266249-0052-2853: ref=['A', 'DEEP', 'GROAN', 'STARTLED', 'THEM', 'AND', 'THEY', 'SPRANG', 'TO', 'THEIR', 'FEET', 'PALE', 'AND', 'TREMBLING', 'WITH', 'SUDDEN', 'TERROR', 'EACH', 'HOLDING', 'HIS', 'BREATH', 'AND', 'STRAINING', 'HIS', 'EAR', 'TO', 'CATCH', 'A', 'REPETITION', 'OF', 'THE', 'DREAD', 'SOUND'] +8280-266249-0052-2853: hyp=['A', 'DEEP', 'GROAN', 'STARTLED', 'THEM', 'AND', 'THEY', 'SPRANG', 'TO', 'THEIR', 'FEET', 'PALE', 'AND', 'TREMBLING', 'WITH', 'SUDDEN', 'TERROR', 'EACH', 'HOLDING', 'HIS', 'BREATH', 'AND', 'STRAINING', 'HIS', 'EAR', 'TO', 'CATCH', 'A', 'REPETITION', 'OF', 'THE', 'DREAD', 'SOUND'] +8280-266249-0053-2854: ref=['BUT', 'ALL', 'WAS', 'SILENT', 'AND', 'AFTER', 'A', 'MOMENT', 'OF', 'ANXIOUS', 'WAITING', 'THEY', 'SAT', 'DOWN', 'TO', 'THEIR', 'GAME', 'AGAIN', 'TRYING', 'TO', 'CONCEAL', 'AND', 'SHAKE', 'OFF', 'THEIR', 'FEARS', 'WITH', 'A', 'FORCED', 'UNNATURAL', 'LAUGH'] +8280-266249-0053-2854: hyp=['BUT', 'ALL', 'WAS', 'SILENT', 'AND', 'AFTER', 'A', 'MOMENT', 'OF', 'ANXIOUS', 'WAITING', 'THEY', 'SAT', 'DOWN', 'TO', 'THEIR', 'GAME', 'AGAIN', 'TRYING', 'TO', 'CONCEAL', 'AND', 'SHAKE', 'OFF', 'THEIR', 'FEARS', 'WITH', 'A', 'FORCED', 'UNNATURAL', 'LAUGH'] +8280-266249-0054-2855: ref=['IT', 'CAME', 'FROM', 'UNDER', 'THE', 'TABLE', 'GASPED', 'WARD', 'LOOK', "WHAT'S", 'THERE', 'LOOK', 'YOURSELF'] +8280-266249-0054-2855: hyp=['IT', 'CAME', 'FROM', 'UNDER', 'THE', 'TABLE', 'GASPED', 'HOWARD', 'LOOK', "WHAT'S", 'THERE', 'LOOKED', 'YOURSELF'] +8280-266249-0055-2856: ref=['WHAT', 'CAN', 'IT', 'HAVE', 'BEEN', 'THEY', 'ASKED', 'EACH', 'OTHER'] +8280-266249-0055-2856: hyp=['WHAT', 'CAN', 'IT', 'HAVE', 'BEEN', 'THEY', 'ASKED', 'EACH', 'OTHER'] +8280-266249-0056-2857: ref=['OH', 'NONSENSE', 'WHAT', 'FOOLS', 'WE', 'ARE'] +8280-266249-0056-2857: hyp=['OH', 'NONSENSE', 'WHAT', 'FOOLS', 'WE', 'ARE'] +8280-266249-0057-2858: ref=['IT', 'WAS', 'THE', 'LAST', 'GAME', 'OF', 'CARDS', 'FOR', 'THAT', 'TRIP'] +8280-266249-0057-2858: hyp=['IT', 'WAS', 'THE', 'LAST', 'GAME', 'OF', 'CARDS', 'FOR', 'THAT', 'TRIP'] +8280-266249-0058-2859: ref=['THE', 'CAPTAIN', 'COMING', 'IN', 'SHORTLY', 'AFTER', 'THE', 'SUDDEN', 'FLIGHT', 'OF', 'THE', 'GAMBLERS', 'TOOK', 'CHARGE', 'OF', 'THE', 'MONEY', 'AND', 'THE', 'NEXT', 'DAY', 'RESTORED', 'IT', 'TO', 'THE', 'OWNERS'] +8280-266249-0058-2859: hyp=['THE', 'CAPTAIN', 'COMING', 'IN', 'SHORTLY', 'AFTER', 'THE', 'SUDDEN', 'FLIGHT', 'OF', 'THE', 'GAMBLERS', 'TOOK', 'CHARGE', 'OF', 'THE', 'MONEY', 'AND', 'THE', 'NEXT', 'DAY', 'RESTORED', 'IT', 'TO', 'THE', 'OWNERS'] +8280-266249-0059-2860: ref=['TO', "ELSIE'S", 'OBSERVANT', 'EYES', 'IT', 'PRESENTLY', 'BECAME', 'EVIDENT', 'THAT', 'THE', 'DALYS', 'WERE', 'IN', 'VERY', 'STRAITENED', 'CIRCUMSTANCES'] +8280-266249-0059-2860: hyp=['TO', "ELSIE'S", 'OBSERVANT', 'EYES', 'IT', 'PRESENTLY', 'BECAME', 'EVIDENT', 'THAT', 'THE', 'DAILIES', 'WERE', 'IN', 'VERY', 'STRAITENED', 'CIRCUMSTANCES'] +8280-266249-0060-2861: ref=['OH', 'HOW', 'KIND', 'HOW', 'VERY', 'KIND', 'MISSUS', 'DALY', 'SAID', 'WITH', 'TEARS', 'OF', 'JOY', 'AND', 'GRATITUDE', 'WE', 'HAVE', 'HARDLY', 'KNOWN', 'HOW', 'WE', 'SHOULD', 'MEET', 'THE', 'MOST', 'NECESSARY', 'EXPENSES', 'OF', 'THIS', 'TRIP', 'BUT', 'HAVE', 'BEEN', 'TRYING', 'TO', 'CAST', 'OUR', 'CARE', 'UPON', 'THE', 'LORD', 'ASKING', 'HIM', 'TO', 'PROVIDE'] +8280-266249-0060-2861: hyp=['OH', 'HOW', 'KIND', 'HOW', 'VERY', 'KIND', 'MISSUS', 'DALY', 'SAID', 'WITH', 'TEARS', 'OF', 'JOY', 'AND', 'GRATITUDE', 'WE', 'HAVE', 'HARDLY', 'KNOWN', 'HOW', 'WE', 'SHOULD', 'MEET', 'THE', 'MOST', 'NECESSARY', 'EXPENSES', 'OF', 'THIS', 'TRIP', 'BUT', 'HAVE', 'BEEN', 'TRYING', 'TO', 'CAST', 'OUR', 'CARE', 'UPON', 'THE', 'LORD', 'ASKING', 'HIM', 'TO', 'PROVIDE'] +8280-266249-0061-2862: ref=['AND', 'HOW', 'WONDERFULLY', 'HE', 'HAS', 'ANSWERED', 'OUR', 'PETITIONS'] +8280-266249-0061-2862: hyp=['AND', 'HOW', 'WONDERFULLY', 'HE', 'HAS', 'ANSWERED', 'OUR', 'PETITIONS'] +8280-266249-0062-2863: ref=['ELSIE', 'ANSWERED', 'PRESSING', 'HER', 'HAND', 'AFFECTIONATELY', 'ART', 'WE', 'NOT', 'SISTERS', 'IN', 'CHRIST'] +8280-266249-0062-2863: hyp=['ELSIE', 'ANSWERED', 'PRESSING', 'HER', 'HAND', 'AFFECTIONATELY', 'ARE', 'WE', 'NOT', 'SISTERS', 'IN', 'CHRIST'] +8280-266249-0063-2864: ref=['YE', 'ARE', 'ALL', 'THE', 'CHILDREN', 'OF', 'GOD', 'BY', 'FAITH', 'IN', 'CHRIST', 'JESUS'] +8280-266249-0063-2864: hyp=['YE', 'ARE', 'ALL', 'THE', 'CHILDREN', 'OF', 'GOD', 'BY', 'FAITH', 'IN', 'CHRIST', 'JESUS'] +8280-266249-0064-2865: ref=['YE', 'ARE', 'ALL', 'ONE', 'IN', 'CHRIST', 'JESUS'] +8280-266249-0064-2865: hyp=['YE', 'ARE', 'ALL', 'ONE', 'IN', 'CHRIST', 'JESUS'] +8280-266249-0065-2866: ref=['WE', 'FEEL', 'MY', 'HUSBAND', 'AND', 'I', 'THAT', 'WE', 'ARE', 'ONLY', 'THE', 'STEWARDS', 'OF', 'HIS', 'BOUNTY', 'AND', 'THAT', 'BECAUSE', 'HE', 'HAS', 'SAID', 'INASMUCH', 'AS', 'YE', 'HAVE', 'DONE', 'IT', 'UNTO', 'ONE', 'OF', 'THE', 'LEAST', 'OF', 'THESE', 'MY', 'BRETHREN', 'YE', 'HAVE', 'DONE', 'IT', 'UNTO', 'ME', 'IT', 'IS', 'THE', 'GREATEST', 'PRIVILEGE', 'AND', 'DELIGHT', 'TO', 'DO', 'ANYTHING', 'FOR', 'HIS', 'PEOPLE'] +8280-266249-0065-2866: hyp=['WE', 'FEEL', 'MY', 'HUSBAND', 'AND', 'I', 'THAT', 'WE', 'ARE', 'ONLY', 'THE', 'STEWARDS', 'OF', 'HIS', 'BOUNTY', 'AND', 'BECAUSE', 'HE', 'HAS', 'SAID', 'INASMUCH', 'AS', 'YE', 'HAVE', 'DONE', 'IT', 'UNTO', 'ONE', 'OF', 'THE', 'LEAST', 'OF', 'THESE', 'MY', 'BRETHREN', 'YE', 'HAVE', 'DONE', 'IT', 'UNTO', 'ME', 'IT', 'IS', 'THE', 'GREATEST', 'PRIVILEGE', 'AND', 'DELIGHT', 'TO', 'DO', 'ANYTHING', 'FOR', 'HIS', 'PEOPLE'] +8461-258277-0000-2867: ref=['WHEN', 'IT', 'WAS', 'THE', 'SEVEN', 'HUNDRED', 'AND', 'EIGHTEENTH', 'NIGHT'] +8461-258277-0000-2867: hyp=['WHEN', 'IT', 'WAS', 'THE', 'SEVEN', 'HUNDRED', 'AND', 'EIGHTEENTH', 'NIGHT'] +8461-258277-0001-2868: ref=['BUT', 'HE', 'ANSWERED', 'NEEDS', 'MUST', 'I', 'HAVE', 'ZAYNAB', 'ALSO', 'NOW', 'SUDDENLY', 'THERE', 'CAME', 'A', 'RAP', 'AT', 'THE', 'DOOR', 'AND', 'THE', 'MAID', 'SAID', 'WHO', 'IS', 'AT', 'THE', 'DOOR'] +8461-258277-0001-2868: hyp=['BUT', 'HE', 'ANSWERED', 'NEEDS', 'MUST', 'THY', 'HAVE', 'THY', 'NAB', 'ALSO', 'NOW', 'SUDDENLY', 'THERE', 'CAME', 'A', 'RAP', 'AT', 'THE', 'DOOR', 'AND', 'THE', 'MAID', 'SAID', 'WHO', 'IS', 'AT', 'THE', 'DOOR'] +8461-258277-0002-2869: ref=['THE', 'KNOCKER', 'REPLIED', 'KAMAR', 'DAUGHTER', 'OF', 'AZARIAH', 'THE', 'JEW', 'SAY', 'ME', 'IS', 'ALI', 'OF', 'CAIRO', 'WITH', 'YOU'] +8461-258277-0002-2869: hyp=['THE', 'KNOCKER', 'REPLIED', 'KAMA', 'DAUGHTER', 'OF', 'AZARAIAH', 'THE', 'JEW', 'SAY', 'ME', 'IS', 'ALI', 'OF', 'CAIRO', 'WITH', 'YOU'] +8461-258277-0003-2870: ref=['REPLIED', 'THE', "BROKER'S", 'DAUGHTER', 'O', 'THOU', 'DAUGHTER', 'OF', 'A', 'DOG'] +8461-258277-0003-2870: hyp=['REPLIED', 'THE', "BROKER'S", 'DAUGHTER', 'O', 'THOU', 'DAUGHTER', 'OF', 'A', 'DOG'] +8461-258277-0004-2871: ref=['AND', 'HAVING', 'THUS', 'ISLAMISED', 'SHE', 'ASKED', 'HIM', 'DO', 'MEN', 'IN', 'THE', 'FAITH', 'OF', 'AL', 'ISLAM', 'GIVE', 'MARRIAGE', 'PORTIONS', 'TO', 'WOMEN', 'OR', 'DO', 'WOMEN', 'DOWER', 'MEN'] +8461-258277-0004-2871: hyp=['AND', 'HAVING', 'THUS', 'ISLAMISED', 'SHE', 'ASKED', 'HIM', 'TWO', 'MEN', 'IN', 'THE', 'FAITH', 'OF', 'AL', 'ISLAM', 'GIVE', 'MARRIAGE', 'PORTIONS', 'TO', 'WOMEN', 'OR', 'DO', 'WOMEN', 'TO', 'OUR', 'MEN'] +8461-258277-0005-2872: ref=['AND', 'SHE', 'THREW', 'DOWN', 'THE', "JEW'S", 'HEAD', 'BEFORE', 'HIM'] +8461-258277-0005-2872: hyp=['AND', 'SHE', 'THREW', 'DOWN', 'THE', "JEW'S", 'HEAD', 'BEFORE', 'HIM'] +8461-258277-0006-2873: ref=['NOW', 'THE', 'CAUSE', 'OF', 'HER', 'SLAYING', 'HER', 'SIRE', 'WAS', 'AS', 'FOLLOWS'] +8461-258277-0006-2873: hyp=['NOW', 'THE', 'CAUSE', 'OF', 'HER', 'SLAYING', 'HER', 'SIRE', 'WAS', 'AS', 'FOLLOWS'] +8461-258277-0007-2874: ref=['THEN', 'HE', 'SET', 'OUT', 'REJOICING', 'TO', 'RETURN', 'TO', 'THE', 'BARRACK', 'OF', 'THE', 'FORTY'] +8461-258277-0007-2874: hyp=['THEN', 'HE', 'SET', 'OUT', 'REJOICING', 'TO', 'RETURN', 'TO', 'THE', 'BARRA', 'OF', 'THE', 'FORTY'] +8461-258277-0008-2875: ref=['SO', 'HE', 'ATE', 'AND', 'FELL', 'DOWN', 'SENSELESS', 'FOR', 'THE', 'SWEETMEATS', 'WERE', 'DRUGGED', 'WITH', 'BHANG', 'WHEREUPON', 'THE', 'KAZI', 'BUNDLED', 'HIM', 'INTO', 'THE', 'SACK', 'AND', 'MADE', 'OFF', 'WITH', 'HIM', 'CHARGER', 'AND', 'CHEST', 'AND', 'ALL', 'TO', 'THE', 'BARRACK', 'OF', 'THE', 'FORTY'] +8461-258277-0008-2875: hyp=['SO', 'HE', 'ATE', 'AND', 'FELL', 'DOWN', 'SENSELESS', 'FOR', 'THE', 'SWEETMEATS', 'WERE', 'DRUGGED', 'WITH', 'BANG', 'WHEREUPON', 'THE', 'KAZI', 'BUNDLED', 'HIM', 'INTO', 'THE', 'SACK', 'AND', 'MADE', 'OFF', 'WITH', 'HIM', 'CHARGER', 'AND', 'CHEST', 'AND', 'ALL', 'TO', 'THE', 'BARRACK', 'OF', 'THE', 'FORTY'] +8461-258277-0009-2876: ref=['PRESENTLY', 'HASAN', 'SHUMAN', 'CAME', 'OUT', 'OF', 'A', 'CLOSET', 'AND', 'SAID', 'TO', 'HIM', 'HAST', 'THOU', 'GOTTEN', 'THE', 'GEAR', 'O', 'ALI'] +8461-258277-0009-2876: hyp=['PRESENTLY', 'HASSAN', 'SHUMAN', 'CAME', 'OUT', 'OF', 'A', 'CLOSET', 'AND', 'SAID', 'TO', 'HIM', 'HAST', 'THOU', 'GOTTEN', 'THE', 'GEAR', 'O', 'ALI'] +8461-258277-0010-2877: ref=['SO', 'HE', 'TOLD', 'HIM', 'WHAT', 'HAD', 'BEFALLEN', 'HIM', 'AND', 'ADDED', 'IF', 'I', 'KNOW', 'WHITHER', 'THE', 'RASCAL', 'IS', 'GONE', 'AND', 'WHERE', 'TO', 'FIND', 'THE', 'KNAVE', 'I', 'WOULD', 'PAY', 'HIM', 'OUT'] +8461-258277-0010-2877: hyp=['SO', 'HE', 'TOLD', 'HIM', 'WHAT', 'HAD', 'BEFALLEN', 'HIM', 'AND', 'ADDED', 'IF', 'I', 'KNOW', 'WHITHER', 'THE', 'RASCAL', 'IS', 'GONE', 'AND', 'WHERE', 'TO', 'FIND', 'THE', 'KNAVE', 'I', 'WILL', 'PAY', 'HIM', 'OUT'] +8461-258277-0011-2878: ref=['KNOWEST', 'THOU', 'WHITHER', 'HE', 'WENT'] +8461-258277-0011-2878: hyp=['KNOWEST', 'THOU', 'WHITHER', 'HE', 'WENT'] +8461-258277-0012-2879: ref=['ANSWERED', 'HASAN', 'I', 'KNOW', 'WHERE', 'HE', 'IS', 'AND', 'OPENING', 'THE', 'DOOR', 'OF', 'THE', 'CLOSET', 'SHOWED', 'HIM', 'THE', 'SWEETMEAT', 'SELLER', 'WITHIN', 'DRUGGED', 'AND', 'SENSELESS'] +8461-258277-0012-2879: hyp=['ANSWERED', 'HASAN', 'I', 'KNOW', 'WHERE', 'HE', 'IS', 'AND', 'OPENING', 'THE', 'DOOR', 'OF', 'THE', 'CLOSET', 'SHOWED', 'HIM', 'THE', 'SWEETMEAT', 'CELLAR', 'WITHIN', 'DRUGGED', 'AND', 'SENSELESS'] +8461-258277-0013-2880: ref=['SO', 'I', 'WENT', 'ROUND', 'ABOUT', 'THE', 'HIGHWAYS', 'OF', 'THE', 'CITY', 'TILL', 'I', 'MET', 'A', 'SWEETMEAT', 'SELLER', 'AND', 'BUYING', 'HIS', 'CLOTHES', 'AND', 'STOCK', 'IN', 'TRADE', 'AND', 'GEAR', 'FOR', 'TEN', 'DINARS', 'DID', 'WHAT', 'WAS', 'DONE'] +8461-258277-0013-2880: hyp=['SO', 'I', 'WENT', 'ROUND', 'ABOUT', 'THE', 'HIGHWAYS', 'OF', 'THE', 'CITY', 'TILL', 'I', 'MET', 'A', 'SWEETMEAT', 'SELLER', 'AND', 'BUYING', 'HIS', 'CLOTHES', 'AND', 'STOCK', 'IN', 'TRADE', 'AND', 'GEAR', 'FOR', 'TEN', 'DINARS', 'DID', 'WHAT', 'WAS', 'DONE'] +8461-258277-0014-2881: ref=['QUOTH', 'AL', 'RASHID', 'WHOSE', 'HEAD', 'IS', 'THIS'] +8461-258277-0014-2881: hyp=['QUOTH', 'AL', 'RASHID', 'WHOSE', 'HEAD', 'IS', 'THIS'] +8461-258277-0015-2882: ref=['SO', 'ALI', 'RELATED', 'TO', 'HIM', 'ALL', 'THAT', 'HAD', 'PASSED', 'FROM', 'FIRST', 'TO', 'LAST', 'AND', 'THE', 'CALIPH', 'SAID', 'I', 'HAD', 'NOT', 'THOUGHT', 'THOU', 'WOULDST', 'KILL', 'HIM', 'FOR', 'THAT', 'HE', 'WAS', 'A', 'SORCERER'] +8461-258277-0015-2882: hyp=['SO', 'ALI', 'RELATED', 'TO', 'HIM', 'ALL', 'THAT', 'HAD', 'PASSED', 'FROM', 'FIRST', 'TO', 'LAST', 'AND', 'THE', 'CALIPH', 'SAID', 'I', 'HAD', 'NOT', 'THOUGHT', 'THOU', 'WOULDST', 'KILL', 'HIM', 'FOR', 'THAT', 'HE', 'WAS', 'A', 'SORCERER'] +8461-258277-0016-2883: ref=['HE', 'REPLIED', 'I', 'HAVE', 'FORTY', 'LADS', 'BUT', 'THEY', 'ARE', 'IN', 'CAIRO'] +8461-258277-0016-2883: hyp=['HE', 'REPLIED', 'I', 'HAVE', 'FORTY', 'LADS', 'BUT', 'THEY', 'ARE', 'IN', 'CAIRO'] +8461-278226-0000-2884: ref=['AND', 'LAURA', 'HAD', 'HER', 'OWN', 'PET', 'PLANS'] +8461-278226-0000-2884: hyp=['AND', 'LAURA', 'HAD', 'HER', 'OWN', 'PET', 'PLANS'] +8461-278226-0001-2885: ref=['SHE', 'MEANT', 'TO', 'BE', 'SCRUPULOUSLY', 'CONSCIENTIOUS', 'IN', 'THE', 'ADMINISTRATION', 'OF', 'HER', 'TALENTS', 'AND', 'SOMETIMES', 'AT', 'CHURCH', 'ON', 'A', 'SUNDAY', 'WHEN', 'THE', 'SERMON', 'WAS', 'PARTICULARLY', 'AWAKENING', 'SHE', 'MENTALLY', 'DEBATED', 'THE', 'SERIOUS', 'QUESTION', 'AS', 'TO', 'WHETHER', 'NEW', 'BONNETS', 'AND', 'A', 'PAIR', 'OF', "JOUVIN'S", 'GLOVES', 'DAILY', 'WERE', 'NOT', 'SINFUL', 'BUT', 'I', 'THINK', 'SHE', 'DECIDED', 'THAT', 'THE', 'NEW', 'BONNETS', 'AND', 'GLOVES', 'WERE', 'ON', 'THE', 'WHOLE', 'A', 'PARDONABLE', 'WEAKNESS', 'AS', 'BEING', 'GOOD', 'FOR', 'TRADE'] +8461-278226-0001-2885: hyp=['SHE', 'MEANT', 'TO', 'BE', 'SCRUPULOUSLY', 'CONSCIENTIOUS', 'IN', 'THE', 'ADMINISTRATION', 'OF', 'HER', 'TALENTS', 'AND', 'SOMETIMES', 'AT', 'CHURCH', 'ON', 'A', 'SUNDAY', 'WHEN', 'THE', 'SERMON', 'WAS', 'PARTICULARLY', 'AWAKENING', 'SHE', 'MENTALLY', 'DEBATED', 'A', 'SERIOUS', 'QUESTION', 'AS', 'TO', 'WHETHER', 'NEW', 'BONNETS', 'AND', 'A', 'PAIR', 'OF', 'ROUVNENES', 'GLOVES', 'DAILY', 'WERE', 'NOT', 'SINFUL', 'BUT', 'I', 'THINK', 'SHE', 'DECIDED', 'THAT', 'THE', 'NEW', 'BONNETS', 'AND', 'GLOVES', 'WERE', 'ON', 'THE', 'WHOLE', 'A', 'PARDONABLE', 'WEAKNESS', 'AS', 'BEING', 'GOOD', 'FOR', 'TRADE'] +8461-278226-0002-2886: ref=['ONE', 'MORNING', 'LAURA', 'TOLD', 'HER', 'HUSBAND', 'WITH', 'A', 'GAY', 'LAUGH', 'THAT', 'SHE', 'WAS', 'GOING', 'TO', 'VICTIMIZE', 'HIM', 'BUT', 'HE', 'WAS', 'TO', 'PROMISE', 'TO', 'BE', 'PATIENT', 'AND', 'BEAR', 'WITH', 'HER', 'FOR', 'ONCE', 'IN', 'A', 'WAY'] +8461-278226-0002-2886: hyp=['ONE', 'MORNING', 'LAURA', 'TOLD', 'HER', 'HUSBAND', 'WITH', 'A', 'GAY', 'LAUGH', 'THAT', 'SHE', 'WAS', 'GOING', 'TO', 'VICTIMIZE', 'HIM', 'BUT', 'HE', 'WAS', 'TO', 'PROMISE', 'TO', 'BE', 'PATIENT', 'AND', 'BEAR', 'WITH', 'HER', 'FOR', 'ONCE', 'IN', 'A', 'WAY'] +8461-278226-0003-2887: ref=['I', 'WANT', 'TO', 'SEE', 'ALL', 'THE', 'PICTURES', 'THE', 'MODERN', 'PICTURES', 'ESPECIALLY'] +8461-278226-0003-2887: hyp=['I', 'WANT', 'TO', 'SEE', 'ALL', 'THE', 'PICTURES', 'THE', 'MODERN', 'PICTURES', 'ESPECIALLY'] +8461-278226-0004-2888: ref=['I', 'REMEMBER', 'ALL', 'THE', 'RUBENSES', 'AT', 'THE', 'LOUVRE', 'FOR', 'I', 'SAW', 'THEM', 'THREE', 'YEARS', 'AGO', 'WHEN', 'I', 'WAS', 'STAYING', 'IN', 'PARIS', 'WITH', 'GRANDPAPA'] +8461-278226-0004-2888: hyp=['I', 'REMEMBER', 'ALL', 'THE', 'RUBENSES', 'AT', 'THE', 'LOUVRE', 'FOR', 'I', 'SAW', 'THEM', 'THREE', 'YEARS', 'AGO', 'WHEN', 'I', 'WAS', 'STAYING', 'IN', 'PARIS', 'WITH', 'GRANDPAPA'] +8461-278226-0005-2889: ref=['SHE', 'RETURNED', 'IN', 'A', 'LITTLE', 'MORE', 'THAN', 'TEN', 'MINUTES', 'IN', 'THE', 'FRESHEST', 'TOILETTE', 'ALL', 'PALE', 'SHIMMERING', 'BLUE', 'LIKE', 'THE', 'SPRING', 'SKY', 'WITH', 'PEARL', 'GREY', 'GLOVES', 'AND', 'BOOTS', 'AND', 'PARASOL', 'AND', 'A', 'BONNET', 'THAT', 'SEEMED', 'MADE', 'OF', 'AZURE', 'BUTTERFLIES'] +8461-278226-0005-2889: hyp=['SHE', 'RETURNED', 'IN', 'A', 'LITTLE', 'MORE', 'THAN', 'TEN', 'MINUTES', 'IN', 'THE', 'FRESHEST', 'TOILETTE', 'ALL', 'PALE', 'SHIMMERING', 'BLUE', 'LIKE', 'THE', 'SPRING', 'SKY', 'WITH', 'PURLE', 'GRAY', 'GLOVES', 'AND', 'BOOTS', 'AND', 'PARASOL', 'AND', 'A', 'BONNET', 'THAT', 'SEEMED', 'MADE', 'OF', 'AZURE', 'BUTTERFLIES'] +8461-278226-0006-2890: ref=['IT', 'WAS', 'DRAWING', 'TOWARDS', 'THE', 'CLOSE', 'OF', 'THIS', 'DELIGHTFUL', 'HONEYMOON', 'TOUR', 'AND', 'IT', 'WAS', 'A', 'BRIGHT', 'SUNSHINY', 'MORNING', 'EARLY', 'IN', 'FEBRUARY', 'BUT', 'FEBRUARY', 'IN', 'PARIS', 'IS', 'SOMETIMES', 'BETTER', 'THAN', 'APRIL', 'IN', 'LONDON'] +8461-278226-0006-2890: hyp=['HE', 'WAS', 'DRAWING', 'TOWARDS', 'THE', 'CLOSE', 'OF', 'THIS', 'DELIGHTFUL', 'HONEYMOON', 'TOUR', 'AND', 'IT', 'WAS', 'A', 'BRIGHT', 'SUNSHINY', 'MORNING', 'EARLY', 'IN', 'FEBRUARY', 'BUT', 'FEBRUARY', 'IN', 'PARIS', 'IS', 'SOMETIMES', 'BETTER', 'THAN', 'APRIL', 'IN', 'LONDON'] +8461-278226-0007-2891: ref=['BUT', 'SHE', 'FIXED', 'UPON', 'A', 'PICTURE', 'WHICH', 'SHE', 'SAID', 'SHE', 'PREFERRED', 'TO', 'ANYTHING', 'SHE', 'HAD', 'SEEN', 'IN', 'THE', 'GALLERY'] +8461-278226-0007-2891: hyp=['BUT', 'SHE', 'FIXED', 'UPON', 'A', 'PICTURE', 'WHICH', 'SHE', 'SAID', 'SHE', 'PREFERRED', 'TO', 'ANYTHING', 'SHE', 'HAD', 'SEEN', 'IN', 'THE', 'GALLERY'] +8461-278226-0008-2892: ref=['PHILIP', 'JOCELYN', 'WAS', 'EXAMINING', 'SOME', 'PICTURES', 'ON', 'THE', 'OTHER', 'SIDE', 'OF', 'THE', 'ROOM', 'WHEN', 'HIS', 'WIFE', 'MADE', 'THIS', 'DISCOVERY'] +8461-278226-0008-2892: hyp=['PHILIP', 'JOCELYN', 'WAS', 'EXAMINING', 'SOME', 'PICTURES', 'ON', 'THE', 'OTHER', 'SIDE', 'OF', 'THE', 'ROOM', 'WHEN', 'HIS', 'WIFE', 'MADE', 'THE', 'DISCOVERY'] +8461-278226-0009-2893: ref=['HOW', 'I', 'WISH', 'YOU', 'COULD', 'GET', 'ME', 'A', 'COPY', 'OF', 'THAT', 'PICTURE', 'PHILIP', 'LAURA', 'SAID', 'ENTREATINGLY'] +8461-278226-0009-2893: hyp=['HOW', 'I', 'WISH', 'YOU', 'COULD', 'GET', 'ME', 'A', 'COPY', 'OF', 'THAT', 'PICTURE', 'PHILIP', 'LORA', 'SAID', 'ENTREATINGLY'] +8461-278226-0010-2894: ref=['I', 'SHOULD', 'SO', 'LIKE', 'ONE', 'TO', 'HANG', 'IN', 'MY', 'MORNING', 'ROOM', 'AT', "JOCELYN'S", 'ROCK'] +8461-278226-0010-2894: hyp=['I', 'SHOULD', 'SO', 'LIKE', 'ONE', 'TO', 'HANG', 'IN', 'MY', 'MORNING', 'ROOM', 'AT', 'JOSTLINGS', 'FROCK'] +8461-278226-0011-2895: ref=['SHE', 'TURNED', 'TO', 'THE', 'FRENCH', 'ARTIST', 'PRESENTLY', 'AND', 'ASKED', 'HIM', 'WHERE', 'THE', 'ELDER', 'MISTER', 'KERSTALL', 'LIVED', 'AND', 'IF', 'THERE', 'WAS', 'ANY', 'POSSIBILITY', 'OF', 'SEEING', 'HIM'] +8461-278226-0011-2895: hyp=['SHE', 'TURNED', 'TO', 'THE', 'FRENCH', 'ARTIST', 'PRESENTLY', 'AND', 'ASKED', 'HIM', 'WHERE', 'THE', 'ELDER', 'MISTER', 'CRISTEL', 'LIVED', 'AND', 'IF', 'THERE', 'WAS', 'ANY', 'POSSIBILITY', 'OF', 'SEEING', 'HIM'] +8461-278226-0012-2896: ref=['THEY', 'HAVE', 'SAID', 'THAT', 'HE', 'IS', 'EVEN', 'A', 'LITTLE', 'IMBECILE', 'THAT', 'HE', 'DOES', 'NOT', 'REMEMBER', 'HIMSELF', 'OF', 'THE', 'MOST', 'COMMON', 'EVENTS', 'OF', 'HIS', 'LIFE'] +8461-278226-0012-2896: hyp=['THEY', 'HAVE', 'SAID', 'THAT', 'HE', 'IS', 'EVEN', 'A', 'LITTLE', 'IMBECILE', 'THAT', 'HE', 'DOES', 'NOT', 'REMEMBER', 'HIMSELF', 'OF', 'THE', 'MOST', 'COMMON', 'EVENTS', 'OF', 'HIS', 'LIFE'] +8461-278226-0013-2897: ref=['BUT', 'THERE', 'ARE', 'SOME', 'OTHERS', 'WHO', 'SAY', 'THAT', 'HIS', 'MEMORY', 'HAS', 'NOT', 'ALTOGETHER', 'FAILED', 'AND', 'THAT', 'HE', 'IS', 'STILL', 'ENOUGH', 'HARSHLY', 'CRITICAL', 'TOWARDS', 'THE', 'WORKS', 'OF', 'OTHERS'] +8461-278226-0013-2897: hyp=['BUT', 'THERE', 'ARE', 'SOME', 'OTHERS', 'WHO', 'SAY', 'THAT', 'HIS', 'MEMORY', 'HAS', 'NOT', 'ALTOGETHER', 'FAILED', 'AND', 'THAT', 'HE', 'IS', 'STILL', 'ENOUGH', 'HARSHLY', 'CRITICAL', 'TOWARDS', 'THE', 'WORKS', 'OF', 'OTHERS'] +8461-278226-0014-2898: ref=['I', "DON'T", 'THINK', 'YOU', 'WILL', 'HAVE', 'ANY', 'DIFFICULTY', 'IN', 'FINDING', 'THE', 'HOUSE'] +8461-278226-0014-2898: hyp=['I', "DON'T", 'THINK', 'YOU', 'WILL', 'HAVE', 'ANY', 'DIFFICULTY', 'IN', 'FINDING', 'THE', 'HOUSE'] +8461-278226-0015-2899: ref=['YOU', 'WILL', 'BE', 'DOING', 'ME', 'SUCH', 'A', 'FAVOUR', 'PHILIP', 'IF', "YOU'LL", 'SAY', 'YES'] +8461-278226-0015-2899: hyp=['YOU', 'WILL', 'BE', 'DOING', 'ME', 'SUCH', 'A', 'FAVOR', 'PHILIP', 'IF', "YOU'LL", 'SAY', 'YES'] +8461-281231-0000-2900: ref=['HIS', 'FOLLOWERS', 'RUSHED', 'FORWARD', 'TO', 'WHERE', 'HE', 'LAY', 'AND', 'THEIR', 'UNITED', 'FORCE', 'COMPELLING', 'THE', 'BLACK', 'KNIGHT', 'TO', 'PAUSE', 'THEY', 'DRAGGED', 'THEIR', 'WOUNDED', 'LEADER', 'WITHIN', 'THE', 'WALLS'] +8461-281231-0000-2900: hyp=['HIS', 'FOLLOWERS', 'RUSHED', 'FORWARD', 'TO', 'WHERE', 'HE', 'LAY', 'AND', 'THEIR', 'UNITED', 'FORCE', 'COMPELLING', 'THE', 'BLACK', 'KNIGHT', 'TO', 'PAUSE', 'THEY', 'DRAGGED', 'THE', 'WOUNDED', 'LEADER', 'WITHIN', 'THE', 'WALLS'] +8461-281231-0001-2901: ref=['IT', 'WAS', 'ON', 'THEIR', 'JOURNEY', 'TO', 'THAT', 'TOWN', 'THAT', 'THEY', 'WERE', 'OVERTAKEN', 'ON', 'THE', 'ROAD', 'BY', 'CEDRIC', 'AND', 'HIS', 'PARTY', 'IN', 'WHOSE', 'COMPANY', 'THEY', 'WERE', 'AFTERWARDS', 'CARRIED', 'CAPTIVE', 'TO', 'THE', 'CASTLE', 'OF', 'TORQUILSTONE'] +8461-281231-0001-2901: hyp=['IT', 'WAS', 'ON', 'THEIR', 'JOURNEY', 'TO', 'THAT', 'TOWN', 'THAT', 'THEY', 'WERE', 'OVERTAKEN', 'ON', 'THE', 'ROAD', 'BY', 'CEDRIC', 'AND', 'HIS', 'PARTY', 'IN', 'WHOSE', 'COMPANY', 'THEY', 'WERE', 'AFTERWARDS', 'CARRIED', 'CAPTIVE', 'TO', 'THE', 'CASTLE', 'OF', 'TORKELSTONE'] +8461-281231-0002-2902: ref=['AS', 'HE', 'LAY', 'UPON', 'HIS', 'BED', 'RACKED', 'WITH', 'PAIN', 'AND', 'MENTAL', 'AGONY', 'AND', 'FILLED', 'WITH', 'THE', 'FEAR', 'OF', 'RAPIDLY', 'APPROACHING', 'DEATH', 'HE', 'HEARD', 'A', 'VOICE', 'ADDRESS', 'HIM'] +8461-281231-0002-2902: hyp=['AS', 'HE', 'LAY', 'UPON', 'HIS', 'BED', 'WRAPPED', 'WITH', 'PAIN', 'AND', 'MENTAL', 'AGONY', 'AND', 'FILLED', 'WITH', 'THE', 'FEAR', 'OF', 'RAPIDLY', 'APPROACHING', 'DEATH', 'HE', 'HEARD', 'A', 'VOICE', 'ADDRESS', 'HIM'] +8461-281231-0003-2903: ref=['WHAT', 'ART', 'THOU', 'HE', 'EXCLAIMED', 'IN', 'TERROR'] +8461-281231-0003-2903: hyp=['WHAT', 'ART', 'THOU', 'HE', 'EXCLAIMED', 'IN', 'TERROR'] +8461-281231-0004-2904: ref=['LEAVE', 'ME', 'AND', 'SEEK', 'THE', 'SAXON', 'WITCH', 'ULRICA', 'WHO', 'WAS', 'MY', 'TEMPTRESS', 'LET', 'HER', 'AS', 'WELL', 'AS', 'I', 'TASTE', 'THE', 'TORTURES', 'WHICH', 'ANTICIPATE', 'HELL'] +8461-281231-0004-2904: hyp=['LEAVE', 'ME', 'AND', 'SEEK', 'THE', 'SAXON', 'WITCH', 'EUREKA', 'WHO', 'WAS', 'MY', 'TEMPTRESS', 'LET', 'HER', 'AS', 'WELL', 'AS', 'I', 'TASTE', 'THE', 'TORTURES', 'WHICH', 'ANTICIPATE', 'HELL'] +8461-281231-0005-2905: ref=['EXCLAIMED', 'THE', 'NORMAN', 'HO'] +8461-281231-0005-2905: hyp=['EXCLAIMED', 'THE', 'NORMAN', 'OH'] +8461-281231-0006-2906: ref=['REMEMBEREST', 'THOU', 'THE', 'MAGAZINE', 'OF', 'FUEL', 'THAT', 'IS', 'STORED', 'BENEATH', 'THESE', 'APARTMENTS', 'WOMAN'] +8461-281231-0006-2906: hyp=['REMEMBEREST', 'THOU', 'THE', 'MAGAZINE', 'OF', 'FUEL', 'THAT', 'IS', 'STORED', 'BENEATH', 'THESE', 'APARTMENTS', 'WOMAN'] +8461-281231-0007-2907: ref=['THEY', 'ARE', 'FAST', 'RISING', 'AT', 'LEAST', 'SAID', 'ULRICA', 'AND', 'A', 'SIGNAL', 'SHALL', 'SOON', 'WAVE', 'TO', 'WARN', 'THE', 'BESIEGERS', 'TO', 'PRESS', 'HARD', 'UPON', 'THOSE', 'WHO', 'WOULD', 'EXTINGUISH', 'THEM'] +8461-281231-0007-2907: hyp=['THEY', 'ARE', 'FAST', 'RISING', 'AT', 'LEAST', 'SAID', 'EUREKA', 'AND', 'THE', 'SIGNAL', 'SHALL', 'SOON', 'WAVE', 'TO', 'WARN', 'THE', 'BESIEGERS', 'TO', 'PRESS', 'HARD', 'UPON', 'THOSE', 'WHO', 'WOULD', 'EXTINGUISH', 'THEM'] +8461-281231-0008-2908: ref=['MEANWHILE', 'THE', 'BLACK', 'KNIGHT', 'HAD', 'LED', 'HIS', 'FORCES', 'AGAIN', 'TO', 'THE', 'ATTACK', 'AND', 'SO', 'VIGOROUS', 'WAS', 'THEIR', 'ASSAULT', 'THAT', 'BEFORE', 'LONG', 'THE', 'GATE', 'OF', 'THE', 'CASTLE', 'ALONE', 'SEPARATED', 'THEM', 'FROM', 'THOSE', 'WITHIN'] +8461-281231-0008-2908: hyp=['MEANWHILE', 'THE', 'BLACK', 'KNIGHT', 'HAD', 'LED', 'HIS', 'FORCES', 'AGAIN', 'TO', 'THE', 'ATTACK', 'AND', 'SO', 'VIGOROUS', 'WAS', 'THEIR', 'ASSAULT', 'THAT', 'BEFORE', 'LONG', 'THE', 'GATE', 'OF', 'THE', 'CASTLE', 'ALONE', 'SEPARATED', 'THEM', 'FROM', 'THOSE', 'WITHIN'] +8461-281231-0009-2909: ref=['THE', 'DEFENDERS', 'FINDING', 'THE', 'CASTLE', 'TO', 'BE', 'ON', 'FIRE', 'NOW', 'DETERMINED', 'TO', 'SELL', 'THEIR', 'LIVES', 'AS', 'DEARLY', 'AS', 'THEY', 'COULD', 'AND', 'HEADED', 'BY', 'DE', 'BRACY', 'THEY', 'THREW', 'OPEN', 'THE', 'GATE', 'AND', 'WERE', 'AT', 'ONCE', 'INVOLVED', 'IN', 'A', 'TERRIFIC', 'CONFLICT', 'WITH', 'THOSE', 'OUTSIDE'] +8461-281231-0009-2909: hyp=['THE', 'DEFENDERS', 'FINDING', 'THE', 'CASTLE', 'TO', 'BE', 'ON', 'FIRE', 'NOW', 'DETERMINED', 'TO', 'SELL', 'THEIR', 'LIVES', 'AS', 'DEARLY', 'AS', 'THEY', 'COULD', 'AND', 'HEADED', 'BY', 'THE', 'BRACY', 'THEY', 'THREW', 'OPEN', 'THE', 'GATE', 'AND', 'WERE', 'AT', 'ONCE', 'INVOLVED', 'IN', 'A', 'TERRIFIC', 'CONFLICT', 'WITH', 'THOSE', 'OUTSIDE'] +8461-281231-0010-2910: ref=['THE', 'BLACK', 'KNIGHT', 'WITH', 'PORTENTOUS', 'STRENGTH', 'FORCED', 'HIS', 'WAY', 'INWARD', 'IN', 'DESPITE', 'OF', 'DE', 'BRACY', 'AND', 'HIS', 'FOLLOWERS'] +8461-281231-0010-2910: hyp=['THE', 'BLACK', 'KNIGHT', 'WITH', 'PORTENTOUS', 'STRENGTH', 'FORCED', 'HIS', 'WAY', 'INWARD', 'IN', 'DESPITE', 'OF', 'DE', 'BRACY', 'AND', 'HIS', 'FOLLOWERS'] +8461-281231-0011-2911: ref=['TWO', 'OF', 'THE', 'FOREMOST', 'INSTANTLY', 'FELL', 'AND', 'THE', 'REST', 'GAVE', 'WAY', 'NOTWITHSTANDING', 'ALL', 'THEIR', 'LEADERS', 'EFFORTS', 'TO', 'STOP', 'THEM'] +8461-281231-0011-2911: hyp=['TWO', 'OF', 'THE', 'FOREMOST', 'INSTANTLY', 'FELL', 'AND', 'THE', 'REST', 'GAVE', 'WAY', 'NOTWITHSTANDING', 'ALL', 'THE', "LEADER'S", 'EFFORTS', 'TO', 'STOP', 'THEM'] +8461-281231-0012-2912: ref=['THE', 'BLACK', 'KNIGHT', 'WAS', 'SOON', 'ENGAGED', 'IN', 'DESPERATE', 'COMBAT', 'WITH', 'THE', 'NORMAN', 'CHIEF', 'AND', 'THE', 'VAULTED', 'ROOF', 'OF', 'THE', 'HALL', 'RUNG', 'WITH', 'THEIR', 'FURIOUS', 'BLOWS'] +8461-281231-0012-2912: hyp=['THE', 'BLACK', 'KNIGHT', 'WAS', 'SOON', 'ENGAGED', 'IN', 'DESPERATE', 'COMBAT', 'WITH', 'THE', 'NORMAN', 'CHIEF', 'AND', 'THE', 'VAULTED', 'ROOF', 'OF', 'THE', 'HALL', 'RUNG', 'WITH', 'FURIOUS', 'BLOWS'] +8461-281231-0013-2913: ref=['AT', 'LENGTH', 'DE', 'BRACY', 'FELL'] +8461-281231-0013-2913: hyp=['AT', 'LENGTH', 'THE', 'BRACELEY', 'FELL'] +8461-281231-0014-2914: ref=['TELL', 'ME', 'THY', 'NAME', 'OR', 'WORK', 'THY', 'PLEASURE', 'ON', 'ME'] +8461-281231-0014-2914: hyp=['TELL', 'ME', 'THY', 'NAME', 'OR', 'WORK', 'THY', 'PLEASURE', 'ON', 'ME'] +8461-281231-0015-2915: ref=['YET', 'FIRST', 'LET', 'ME', 'SAY', 'SAID', 'DE', 'BRACY', 'WHAT', 'IT', 'IMPORTS', 'THEE', 'TO', 'KNOW'] +8461-281231-0015-2915: hyp=['YET', 'FIRST', 'LET', 'ME', 'SAY', 'SAID', 'DE', 'BRACY', 'WHAT', 'IT', 'IMPORTS', 'THEE', 'TO', 'KNOW'] +8461-281231-0016-2916: ref=['EXCLAIMED', 'THE', 'BLACK', 'KNIGHT', 'PRISONER', 'AND', 'PERISH'] +8461-281231-0016-2916: hyp=['EXCLAIMED', 'THE', 'BLACK', 'KNIGHT', 'PRISONER', 'AND', 'PERISH'] +8461-281231-0017-2917: ref=['THE', 'LIFE', 'OF', 'EVERY', 'MAN', 'IN', 'THE', 'CASTLE', 'SHALL', 'ANSWER', 'IT', 'IF', 'A', 'HAIR', 'OF', 'HIS', 'HEAD', 'BE', 'SINGED', 'SHOW', 'ME', 'HIS', 'CHAMBER'] +8461-281231-0017-2917: hyp=['THE', 'LIFE', 'OF', 'EVERY', 'MAN', 'IN', 'THE', 'CASTLE', 'SHALL', 'ANSWER', 'IT', 'IF', 'A', 'HAIR', 'OF', 'HIS', 'HEAD', 'BE', 'SINGED', 'SHOW', 'ME', 'HIS', 'CHAMBER'] +8461-281231-0018-2918: ref=['RAISING', 'THE', 'WOUNDED', 'MAN', 'WITH', 'EASE', 'THE', 'BLACK', 'KNIGHT', 'RUSHED', 'WITH', 'HIM', 'TO', 'THE', 'POSTERN', 'GATE', 'AND', 'HAVING', 'THERE', 'DELIVERED', 'HIS', 'BURDEN', 'TO', 'THE', 'CARE', 'OF', 'TWO', 'YEOMEN', 'HE', 'AGAIN', 'ENTERED', 'THE', 'CASTLE', 'TO', 'ASSIST', 'IN', 'THE', 'RESCUE', 'OF', 'THE', 'OTHER', 'PRISONERS'] +8461-281231-0018-2918: hyp=['RAISING', 'THE', 'WOUNDED', 'MAN', 'WITH', 'EASE', 'THE', 'BLACK', 'KNIGHT', 'RUSHED', 'WITH', 'HIM', 'TO', 'THE', 'POSTERN', 'GATE', 'AND', 'HAVING', 'THERE', 'DELIVERED', 'HIS', 'BURDEN', 'TO', 'THE', 'CARE', 'OF', 'TWO', 'YEOMEN', 'HE', 'AGAIN', 'ENTERED', 'THE', 'CASTLE', 'TO', 'ASSIST', 'IN', 'THE', 'RESCUE', 'OF', 'THE', 'OTHER', 'PRISONERS'] +8461-281231-0019-2919: ref=['BUT', 'IN', 'OTHER', 'PARTS', 'THE', 'BESIEGERS', 'PURSUED', 'THE', 'DEFENDERS', 'OF', 'THE', 'CASTLE', 'FROM', 'CHAMBER', 'TO', 'CHAMBER', 'AND', 'SATIATED', 'IN', 'THEIR', 'BLOOD', 'THE', 'VENGEANCE', 'WHICH', 'HAD', 'LONG', 'ANIMATED', 'THEM', 'AGAINST', 'THE', 'SOLDIERS', 'OF', 'THE', 'TYRANT', 'FRONT', 'DE', 'BOEUF'] +8461-281231-0019-2919: hyp=['BUT', 'IN', 'OTHER', 'PARTS', 'THE', 'BESIEGERS', 'PURSUED', 'THE', 'DEFENDERS', 'OF', 'THE', 'CASTLE', 'FROM', 'CHAMBER', 'TO', 'CHAMBER', 'AND', 'SATIATED', 'IN', 'THEIR', 'BLOOD', 'THE', 'VENGEANCE', 'WHICH', 'HAD', 'LONG', 'ANIMATED', 'THEM', 'AGAINST', 'THE', 'SOLDIERS', 'OF', 'THE', 'TYRANT', 'FRONT', 'DE', 'BOEUF'] +8461-281231-0020-2920: ref=['AS', 'THE', 'FIRE', 'COMMENCED', 'TO', 'SPREAD', 'RAPIDLY', 'THROUGH', 'ALL', 'PARTS', 'OF', 'THE', 'CASTLE', 'ULRICA', 'APPEARED', 'ON', 'ONE', 'OF', 'THE', 'TURRETS'] +8461-281231-0020-2920: hyp=['AS', 'THE', 'FIRE', 'COMMENCED', 'TO', 'SPREAD', 'RAPIDLY', 'THROUGH', 'ALL', 'PARTS', 'OF', 'THE', 'CASTLE', 'EUREKA', 'APPEARED', 'ON', 'ONE', 'OF', 'THE', 'TURRETS'] +8461-281231-0021-2921: ref=['BEFORE', 'LONG', 'THE', 'TOWERING', 'FLAMES', 'HAD', 'SURMOUNTED', 'EVERY', 'OBSTRUCTION', 'AND', 'ROSE', 'TO', 'THE', 'EVENING', 'SKIES', 'ONE', 'HUGE', 'AND', 'BURNING', 'BEACON', 'SEEN', 'FAR', 'AND', 'WIDE', 'THROUGH', 'THE', 'ADJACENT', 'COUNTRY', 'TOWER', 'AFTER', 'TOWER', 'CRASHED', 'DOWN', 'WITH', 'BLAZING', 'ROOF', 'AND', 'RAFTER'] +8461-281231-0021-2921: hyp=['BEFORE', 'LONG', 'THE', 'TOWERING', 'FLAMES', 'HAD', 'SURMOUNTED', 'EVERY', 'OBSTRUCTION', 'AND', 'ROSE', 'TO', 'THE', 'EVENING', 'SKIES', 'ONE', 'HUGE', 'AND', 'BURNING', 'BEACON', 'SEEN', 'FAR', 'AND', 'WIDE', 'THROUGH', 'THE', 'ADJACENT', 'COUNTRY', 'TOWER', 'AFTER', 'TOWER', 'CRASHED', 'DOWN', 'WITH', 'BLAZING', 'ROOF', 'AND', 'RAFTER'] +8461-281231-0022-2922: ref=['AT', 'LENGTH', 'WITH', 'A', 'TERRIFIC', 'CRASH', 'THE', 'WHOLE', 'TURRET', 'GAVE', 'WAY', 'AND', 'SHE', 'PERISHED', 'IN', 'THE', 'FLAMES', 'WHICH', 'HAD', 'CONSUMED', 'HER', 'TYRANT'] +8461-281231-0022-2922: hyp=['AT', 'LENGTH', 'WITH', 'A', 'TERRIFIC', 'CRASH', 'THE', 'WHOLE', 'TURRET', 'GAVE', 'WAY', 'AND', 'SHE', 'PERISHED', 'IN', 'THE', 'FLAMES', 'WHICH', 'HAD', 'CONSUMED', 'HER', 'TYRANT'] +8461-281231-0023-2923: ref=['WHEN', 'THE', 'OUTLAWS', 'HAD', 'DIVIDED', 'THE', 'SPOILS', 'WHICH', 'THEY', 'HAD', 'TAKEN', 'FROM', 'THE', 'CASTLE', 'OF', 'TORQUILSTONE', 'CEDRIC', 'PREPARED', 'TO', 'TAKE', 'HIS', 'DEPARTURE'] +8461-281231-0023-2923: hyp=['WHEN', 'THE', 'OUTLAWS', 'HAD', 'DIVIDED', 'THE', 'SPOILS', 'WHICH', 'THEY', 'HAD', 'TAKEN', 'FROM', 'THE', 'CASTLE', 'OF', 'TORKELSTONE', 'CEDRIC', 'PREPARED', 'TO', 'TAKE', 'HIS', 'DEPARTURE'] +8461-281231-0024-2924: ref=['HE', 'LEFT', 'THE', 'GALLANT', 'BAND', 'OF', 'FORESTERS', 'SORROWING', 'DEEPLY', 'FOR', 'HIS', 'LOST', 'FRIEND', 'THE', 'LORD', 'OF', 'CONINGSBURGH', 'AND', 'HE', 'AND', 'HIS', 'FOLLOWERS', 'HAD', 'SCARCE', 'DEPARTED', 'WHEN', 'A', 'PROCESSION', 'MOVED', 'SLOWLY', 'FROM', 'UNDER', 'THE', 'GREENWOOD', 'BRANCHES', 'IN', 'THE', 'DIRECTION', 'WHICH', 'HE', 'HAD', 'TAKEN', 'IN', 'THE', 'CENTRE', 'OF', 'WHICH', 'WAS', 'THE', 'CAR', 'IN', 'WHICH', 'THE', 'BODY', 'OF', 'ATHELSTANE', 'WAS', 'LAID'] +8461-281231-0024-2924: hyp=['HE', 'LEFT', 'THE', 'GALLANT', 'BAND', 'OF', 'FORESTERS', 'SORROWING', 'DEEPLY', 'FOR', 'HIS', 'LOST', 'FRIEND', 'THE', 'LORD', 'OF', 'KENNINGSBURG', 'AND', 'HE', 'AND', 'HIS', 'FOLLOWERS', 'HAD', 'SCARCE', 'DEPARTED', 'WHEN', 'A', 'PROCESSION', 'MOVED', 'SLOWLY', 'FROM', 'UNDER', 'THE', 'GREENOOD', 'BRANCHES', 'IN', 'THE', 'DIRECTION', 'WHICH', 'HE', 'HAD', 'TAKEN', 'IN', 'THE', 'CENTRE', 'OF', 'WHICH', 'WAS', 'THE', 'CAR', 'IN', 'WHICH', 'THE', 'BODY', 'OF', 'ADELSTEIN', 'WAS', 'LAID'] +8461-281231-0025-2925: ref=['DE', 'BRACY', 'BOWED', 'LOW', 'AND', 'IN', 'SILENCE', 'THREW', 'HIMSELF', 'UPON', 'A', 'HORSE', 'AND', 'GALLOPED', 'OFF', 'THROUGH', 'THE', 'WOOD'] +8461-281231-0025-2925: hyp=['DE', 'BRACY', 'BOWED', 'LOW', 'AND', 'IN', 'SILENCE', 'THREW', 'HIMSELF', 'UPON', 'A', 'HORSE', 'AND', 'GALLOPED', 'OFF', 'THROUGH', 'THE', 'WOOD'] +8461-281231-0026-2926: ref=['HERE', 'IS', 'A', 'BUGLE', 'WHICH', 'AN', 'ENGLISH', 'YEOMAN', 'HAS', 'ONCE', 'WORN', 'I', 'PRAY', 'YOU', 'TO', 'KEEP', 'IT', 'AS', 'A', 'MEMORIAL', 'OF', 'YOUR', 'GALLANT', 'BEARING'] +8461-281231-0026-2926: hyp=['HERE', 'IS', 'A', 'BUGLE', 'WHICH', 'AN', 'ENGLISH', 'YEOMAN', 'HAS', 'ONCE', 'WORN', 'I', 'PRAY', 'YOU', 'TO', 'KEEP', 'IT', 'AS', 'A', 'MEMORIAL', 'OF', 'YOUR', 'GALLANT', 'BEARING'] +8461-281231-0027-2927: ref=['SO', 'SAYING', 'HE', 'MOUNTED', 'HIS', 'STRONG', 'WAR', 'HORSE', 'AND', 'RODE', 'OFF', 'THROUGH', 'THE', 'FOREST'] +8461-281231-0027-2927: hyp=['SO', 'SAYING', 'HE', 'MOUNTED', 'HIS', 'STRONG', 'WAR', 'HORSE', 'AND', 'RODE', 'OFF', 'THROUGH', 'THE', 'FOREST'] +8461-281231-0028-2928: ref=['DURING', 'ALL', 'THIS', 'TIME', 'ISAAC', 'OF', 'YORK', 'SAT', 'MOURNFULLY', 'APART', 'GRIEVING', 'FOR', 'THE', 'LOSS', 'OF', 'HIS', 'DEARLY', 'LOVED', 'DAUGHTER', 'REBECCA'] +8461-281231-0028-2928: hyp=['DURING', 'ALL', 'THIS', 'TIME', 'ISAAC', 'OF', 'YORKE', 'SAT', 'MOURNFULLY', 'APART', 'GRIEVING', 'FOR', 'THE', 'LOSS', 'OF', 'HIS', 'DEARLY', 'LOVED', 'DAUGHTER', 'REBECCA'] +8461-281231-0029-2929: ref=['AND', 'WITH', 'THIS', 'EPISTLE', 'THE', 'UNHAPPY', 'OLD', 'MAN', 'SET', 'OUT', 'TO', 'PROCURE', 'HIS', "DAUGHTER'S", 'LIBERATION'] +8461-281231-0029-2929: hyp=['AND', 'WITH', 'THIS', 'EPISTLE', 'THE', 'UNHAPPY', 'OLD', 'MAN', 'SET', 'OUT', 'TO', 'PROCURE', 'HIS', "DAUGHTER'S", 'LIBERATION'] +8461-281231-0030-2930: ref=['THE', 'TEMPLAR', 'IS', 'FLED', 'SAID', 'DE', 'BRACY', 'IN', 'ANSWER', 'TO', 'THE', "PRINCE'S", 'EAGER', 'QUESTIONS', 'FRONT', 'DE', 'BOEUF', 'YOU', 'WILL', 'NEVER', 'SEE', 'MORE', 'AND', 'HE', 'ADDED', 'IN', 'A', 'LOW', 'AND', 'EMPHATIC', 'TONE', 'RICHARD', 'IS', 'IN', 'ENGLAND', 'I', 'HAVE', 'SEEN', 'HIM', 'AND', 'SPOKEN', 'WITH', 'HIM'] +8461-281231-0030-2930: hyp=['THE', 'TEMPLAR', 'IS', 'FLED', 'SAID', 'DE', 'BRACY', 'IN', 'ANSWER', 'TO', 'THE', "PRINCE'S", 'EAGER', 'QUESTIONS', 'FRONT', 'DE', 'BOEUF', 'YOU', 'WILL', 'NEVER', 'SEE', 'MORE', 'AND', 'HE', 'ADDED', 'IN', 'A', 'LOW', 'AND', 'EMPHATIC', 'TONE', 'RICHARD', 'IS', 'IN', 'ENGLAND', 'I', 'HAVE', 'SEEN', 'HIM', 'AND', 'SPOKEN', 'WITH', 'HIM'] +8461-281231-0031-2931: ref=['HE', 'APPEALED', 'TO', 'DE', 'BRACY', 'TO', 'ASSIST', 'HIM', 'IN', 'THIS', 'PROJECT', 'AND', 'BECAME', 'AT', 'ONCE', 'DEEPLY', 'SUSPICIOUS', 'OF', 'THE', "KNIGHT'S", 'LOYALTY', 'TOWARDS', 'HIM', 'WHEN', 'HE', 'DECLINED', 'TO', 'LIFT', 'HAND', 'AGAINST', 'THE', 'MAN', 'WHO', 'HAD', 'SPARED', 'HIS', 'OWN', 'LIFE'] +8461-281231-0031-2931: hyp=['HE', 'APPEALED', 'TO', 'THE', 'BRAVELEY', 'TO', 'ASSIST', 'HIM', 'IN', 'THIS', 'PROJECT', 'AND', 'BECAME', 'AT', 'ONCE', 'DEEPLY', 'SUSPICIOUS', 'OF', 'THE', "KNIGHT'S", 'LOYALTY', 'TOWARDS', 'HIM', 'WHEN', 'HE', 'DECLINED', 'TO', 'LIFT', 'HAND', 'AGAINST', 'THE', 'MAN', 'WHO', 'HAD', 'SPARED', 'HIS', 'OWN', 'LIFE'] +8461-281231-0032-2932: ref=['BEFORE', 'REACHING', 'HIS', 'DESTINATION', 'HE', 'WAS', 'TOLD', 'THAT', 'LUCAS', 'DE', 'BEAUMANOIR', 'THE', 'GRAND', 'MASTER', 'OF', 'THE', 'ORDER', 'OF', 'THE', 'TEMPLARS', 'WAS', 'THEN', 'ON', 'VISIT', 'TO', 'THE', 'PRECEPTORY'] +8461-281231-0032-2932: hyp=['BEFORE', 'REACHING', 'HIS', 'DESTINATION', 'HE', 'WAS', 'TOLD', 'THAT', 'LYCAS', 'DE', 'BEAUMANOIR', 'THE', 'GRAND', 'MASTER', 'OF', 'THE', 'ORDER', 'OF', 'THE', 'TEMPLARS', 'WAS', 'THEN', 'ON', 'VISIT', 'TO', 'THE', 'PRECEPTARY'] +8461-281231-0033-2933: ref=['HE', 'HAD', 'NOT', 'UNTIL', 'THEN', 'BEEN', 'INFORMED', 'OF', 'THE', 'PRESENCE', 'OF', 'THE', 'JEWISH', 'MAIDEN', 'IN', 'THE', 'ABODE', 'OF', 'THE', 'TEMPLARS', 'AND', 'GREAT', 'WAS', 'HIS', 'FURY', 'AND', 'INDIGNATION', 'ON', 'LEARNING', 'THAT', 'SHE', 'WAS', 'AMONGST', 'THEM'] +8461-281231-0033-2933: hyp=['HE', 'HAD', 'NOT', 'UNTIL', 'THEN', 'BEEN', 'INFORMED', 'OF', 'THE', 'PRESENCE', 'OF', 'THE', 'JEWISH', 'MAIDEN', 'IN', 'THE', 'ABODE', 'OF', 'THE', 'TEMPLARS', 'AND', 'GREAT', 'WAS', 'HIS', 'FURY', 'AND', 'INDIGNATION', 'ON', 'LEARNING', 'THAT', 'SHE', 'WAS', 'AMONGST', 'THEM'] +8461-281231-0034-2934: ref=['POOR', 'ISAAC', 'WAS', 'HURRIED', 'OFF', 'ACCORDINGLY', 'AND', 'EXPELLED', 'FROM', 'THE', 'PRECEPTORY', 'ALL', 'HIS', 'ENTREATIES', 'AND', 'EVEN', 'HIS', 'OFFERS', 'UNHEARD', 'AND', 'DISREGARDED'] +8461-281231-0034-2934: hyp=['POOR', 'ISAAC', 'WAS', 'HURRIED', 'OFF', 'ACCORDINGLY', 'AND', 'EXPELLED', 'FROM', 'THE', 'PRECEPTORY', 'ALL', 'HIS', 'ENTREATIES', 'AND', 'EVEN', 'HIS', 'OFFERS', 'UNHEARD', 'AND', 'DISREGARDED'] +8461-281231-0035-2935: ref=['THE', 'ASSURANCE', 'THAT', 'SHE', 'POSSESSED', 'SOME', 'FRIEND', 'IN', 'THIS', 'AWFUL', 'ASSEMBLY', 'GAVE', 'HER', 'COURAGE', 'TO', 'LOOK', 'AROUND', 'AND', 'TO', 'MARK', 'INTO', 'WHOSE', 'PRESENCE', 'SHE', 'HAD', 'BEEN', 'CONDUCTED'] +8461-281231-0035-2935: hyp=['THE', 'ASSURANCE', 'THAT', 'SHE', 'POSSESSED', 'SOME', 'FRIEND', 'IN', 'THIS', 'AWFUL', 'ASSEMBLY', 'GAVE', 'HER', 'COURAGE', 'TO', 'LOOK', 'ROUND', 'AND', 'TO', 'MARK', 'INTO', 'WHOSE', 'PRESENCE', 'SHE', 'HAD', 'BEEN', 'CONDUCTED'] +8461-281231-0036-2936: ref=['SHE', 'GAZED', 'ACCORDINGLY', 'UPON', 'A', 'SCENE', 'WHICH', 'MIGHT', 'WELL', 'HAVE', 'STRUCK', 'TERROR', 'INTO', 'A', 'BOLDER', 'HEART', 'THAN', 'HERS'] +8461-281231-0036-2936: hyp=['SHE', 'GAZED', 'ACCORDINGLY', 'UPON', 'A', 'SCENE', 'WHICH', 'MIGHT', 'WELL', 'HAVE', 'STRUCK', 'TERROR', 'INTO', 'A', 'BOLDER', 'HEART', 'THAN', 'HERS'] +8461-281231-0037-2937: ref=['AT', 'HIS', 'FEET', 'WAS', 'PLACED', 'A', 'TABLE', 'OCCUPIED', 'BY', 'TWO', 'SCRIBES', 'WHOSE', 'DUTY', 'IT', 'WAS', 'TO', 'RECORD', 'THE', 'PROCEEDINGS', 'OF', 'THE', 'DAY'] +8461-281231-0037-2937: hyp=['AT', 'HIS', 'FEET', 'WAS', 'PLACED', 'A', 'TABLE', 'OCCUPIED', 'BY', 'TWO', 'SCRIBES', 'WHOSE', 'DUTY', 'IT', 'WAS', 'TO', 'RECORD', 'THE', 'PROCEEDINGS', 'OF', 'THE', 'DAY'] +8461-281231-0038-2938: ref=['THE', 'PRECEPTORS', 'OF', 'WHOM', 'THERE', 'WERE', 'FOUR', 'PRESENT', 'OCCUPIED', 'SEATS', 'BEHIND', 'THEIR', 'SUPERIORS', 'AND', 'BEHIND', 'THEM', 'STOOD', 'THE', 'ESQUIRES', 'OF', 'THE', 'ORDER', 'ROBED', 'IN', 'WHITE'] +8461-281231-0038-2938: hyp=['THE', 'PRECEPTORS', 'OF', 'WHOM', 'THERE', 'WERE', 'FOUR', 'PRESENT', 'OCCUPIED', 'SEATS', 'BEHIND', 'THE', 'SUPERIORS', 'AND', 'BEHIND', 'THEM', 'STOOD', 'THE', 'ESQUIRES', 'OF', 'THE', 'ORDER', 'ROBED', 'IN', 'WHITE'] diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-clean-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-clean-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9bd97aa05bdd308707c79c2b4a6baec5a6e5e80 --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-clean-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,29 @@ +settings WER +attention_scale_3.0 1.96 +attention_scale_5.0 1.96 +attention_scale_6.0 1.96 +attention_scale_7.0 1.96 +attention_scale_8.0 1.96 +attention_scale_9.0 1.96 +attention_scale_2.1 1.97 +attention_scale_2.2 1.97 +attention_scale_2.3 1.97 +attention_scale_2.5 1.97 +attention_scale_4.0 1.97 +attention_scale_1.7 1.98 +attention_scale_1.9 1.98 +attention_scale_2.0 1.98 +attention_scale_1.5 2.0 +attention_scale_1.3 2.01 +attention_scale_1.2 2.02 +attention_scale_1.0 2.04 +attention_scale_1.1 2.04 +attention_scale_0.9 2.05 +attention_scale_0.7 2.08 +attention_scale_0.6 2.13 +attention_scale_0.5 2.17 +attention_scale_0.3 2.28 +attention_scale_0.1 2.39 +attention_scale_0.08 2.43 +attention_scale_0.05 2.48 +attention_scale_0.01 2.53 diff --git a/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-other-epoch-50_avg-20_use-averaged-model.txt b/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-other-epoch-50_avg-20_use-averaged-model.txt new file mode 100644 index 0000000000000000000000000000000000000000..882d39bd7f2b206b7a16a8db890360de317b35b8 --- /dev/null +++ b/decoding_results/attention-decoder-rescoring-no-ngram/wer-summary-test-other-epoch-50_avg-20_use-averaged-model.txt @@ -0,0 +1,29 @@ +settings WER +attention_scale_8.0 4.08 +attention_scale_9.0 4.08 +attention_scale_7.0 4.09 +attention_scale_4.0 4.1 +attention_scale_6.0 4.1 +attention_scale_3.0 4.11 +attention_scale_5.0 4.11 +attention_scale_2.3 4.12 +attention_scale_2.5 4.12 +attention_scale_1.9 4.13 +attention_scale_2.0 4.13 +attention_scale_2.1 4.13 +attention_scale_2.2 4.13 +attention_scale_1.7 4.14 +attention_scale_1.5 4.15 +attention_scale_1.2 4.17 +attention_scale_1.3 4.17 +attention_scale_1.1 4.18 +attention_scale_1.0 4.2 +attention_scale_0.9 4.21 +attention_scale_0.7 4.26 +attention_scale_0.6 4.31 +attention_scale_0.5 4.33 +attention_scale_0.3 4.5 +attention_scale_0.1 4.74 +attention_scale_0.08 4.77 +attention_scale_0.05 4.83 +attention_scale_0.01 4.93 diff --git a/exp/decode.sh b/exp/decode.sh new file mode 100644 index 0000000000000000000000000000000000000000..0edebef0040181b0b09aaea6682ed356a74df1dd --- /dev/null +++ b/exp/decode.sh @@ -0,0 +1,15 @@ +export CUDA_VISIBLE_DEVICES=0 +./zipformer/ctc_decode.py \ + --epoch 50 \ + --avg 20 \ + --exp-dir zipformer/exp-large-cr-ctc-aed/ \ + --use-cr-ctc 1 \ + --use-ctc 1 \ + --use-transducer 0 \ + --use-attention-decoder 1 \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + --max-duration 200 \ + --decoding-method attention-decoder-rescoring-no-ngram diff --git a/exp/epoch-50.pt b/exp/epoch-50.pt new file mode 100644 index 0000000000000000000000000000000000000000..76eb1d7e8c4d30f0c22447872469c7199302c6bf --- /dev/null +++ b/exp/epoch-50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8119c491ec65c20b0439eb6d1a06bd85c6789ac17d345aa5cd11aa9fd01cc2c5 +size 2790384782 diff --git a/exp/export.sh b/exp/export.sh new file mode 100644 index 0000000000000000000000000000000000000000..64c9afaef827c6032ffdaa8e75d5addecad7797a --- /dev/null +++ b/exp/export.sh @@ -0,0 +1,13 @@ +./zipformer/export.py \ + --exp-dir zipformer/exp-large-cr-ctc-aed \ + --use-cr-ctc 1 \ + --use-ctc 1 \ + --use-transducer 0 \ + --use-attention-decoder 1 \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + --tokens data/lang_bpe_500/tokens.txt \ + --epoch 50 \ + --avg 20 diff --git a/exp/log/log-train-2024-09-16-12-25-21-0 b/exp/log/log-train-2024-09-16-12-25-21-0 new file mode 100644 index 0000000000000000000000000000000000000000..425ce998caaa00ba06b58a493731b7ba334ef9cd --- /dev/null +++ b/exp/log/log-train-2024-09-16-12-25-21-0 @@ -0,0 +1,35407 @@ +2024-09-16 12:25:21,779 INFO [train.py:1266] (0/2) Training started +2024-09-16 12:25:21,782 INFO [train.py:1276] (0/2) Device: cuda:0 +2024-09-16 12:25:21,784 INFO [train.py:1307] (0/2) Using dtype=torch.float16 +2024-09-16 12:25:21,784 INFO [train.py:1308] (0/2) Use AMP=True +2024-09-16 12:25:21,784 INFO [train.py:1310] (0/2) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'ignore_id': -1, 'label_smoothing': 0.1, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '9f6206b565b833d71e19b4411493d04d99f0a308', 'k2-git-date': 'Thu Mar 28 09:46:54 2024', 'lhotse-version': '1.27.0', 'torch-version': '2.2.2+cu118', 'torch-cuda-available': True, 'torch-cuda-version': '11.8', 'python-version': '3.10', 'icefall-git-branch': 'cr-ctc', 'icefall-git-sha1': '07d6b123-dirty', 'icefall-git-date': 'Wed Sep 4 19:33:41 2024', 'icefall-path': '/zw/mnt/yaozengwei/workspace/icefall_cr_ctc', 'k2-path': '/root/anaconda3/envs/python3.10/lib/python3.10/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/envs/python3.10/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'NGK_zengwei'}, 'world_size': 2, 'master_port': 12341, 'tensorboard': True, 'num_epochs': 50, 'start_epoch': 1, 'start_batch': 0, 'exp_dir': PosixPath('zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'context_size': 2, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'ctc_loss_scale': 0.1, 'cr_loss_scale': 0.02, 'time_mask_ratio': 2.5, 'cr_loss_masked_scale': 1.0, 'attention_decoder_loss_scale': 0.9, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_bf16': False, 'num_encoder_layers': '2,2,4,5,4,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1536,2048,1536,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,512,768,512,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,320,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'attention_decoder_dim': 512, 'attention_decoder_num_layers': 6, 'attention_decoder_attention_dim': 512, 'attention_decoder_num_heads': 8, 'attention_decoder_feedforward_dim': 2048, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': False, 'use_ctc': True, 'use_attention_decoder': True, 'use_cr_ctc': True, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': False, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'blank_id': 0, 'sos_id': 1, 'eos_id': 1, 'vocab_size': 500, 'dtype': torch.float16, 'use_autocast': True} +2024-09-16 12:25:21,784 INFO [train.py:1312] (0/2) About to create model +2024-09-16 12:25:22,609 INFO [train.py:1316] (0/2) Number of model parameters: 174319650 +2024-09-16 12:25:22,609 INFO [train.py:752] (0/2) num_frame_masks: 25.0, max_frames_mask_fraction: 0.375 +2024-09-16 12:25:24,846 INFO [train.py:1338] (0/2) Using DDP +2024-09-16 12:25:26,129 INFO [asr_datamodule.py:436] (0/2) About to get the shuffled train-clean-100, train-clean-360 and train-other-500 cuts +2024-09-16 12:25:26,130 INFO [asr_datamodule.py:232] (0/2) Enable MUSAN +2024-09-16 12:25:26,130 INFO [asr_datamodule.py:233] (0/2) About to get Musan cuts +2024-09-16 12:25:27,754 INFO [asr_datamodule.py:279] (0/2) Disable SpecAugment +2024-09-16 12:25:27,754 INFO [asr_datamodule.py:281] (0/2) About to create train dataset +2024-09-16 12:25:27,754 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler. +2024-09-16 12:25:28,567 INFO [asr_datamodule.py:325] (0/2) About to create train dataloader +2024-09-16 12:25:28,568 INFO [asr_datamodule.py:453] (0/2) About to get dev-clean cuts +2024-09-16 12:25:28,569 INFO [asr_datamodule.py:460] (0/2) About to get dev-other cuts +2024-09-16 12:25:28,570 INFO [asr_datamodule.py:356] (0/2) About to create dev dataset +2024-09-16 12:25:28,729 INFO [asr_datamodule.py:373] (0/2) About to create dev dataloader +2024-09-16 12:25:28,729 INFO [train.py:1545] (0/2) Sanity check -- see if any of the batches in epoch 1 would cause OOM. +2024-09-16 12:28:13,904 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 46330MB +2024-09-16 12:28:15,779 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 46406MB +2024-09-16 12:28:17,886 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 46728MB +2024-09-16 12:28:19,115 INFO [scaling.py:1024] (0/2) Whitening: name=None, num_groups=1, num_channels=512, metric=119.30 vs. limit=7.5 +2024-09-16 12:28:20,062 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 47362MB +2024-09-16 12:28:22,390 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 47362MB +2024-09-16 12:28:23,765 INFO [scaling.py:1024] (0/2) Whitening: name=None, num_groups=1, num_channels=512, metric=197.33 vs. limit=7.5 +2024-09-16 12:28:24,625 INFO [train.py:1576] (0/2) Maximum memory allocated so far is 47362MB +2024-09-16 12:28:54,373 INFO [train.py:1198] (0/2) Epoch 1, batch 0, loss[loss=8.249, ctc_loss=4.732, cr_loss=0.5653, attn_decoder_loss=8.627, over 29639.00 frames. ], tot_loss[loss=8.249, ctc_loss=4.732, cr_loss=0.5653, attn_decoder_loss=8.627, over 29639.00 frames. ], batch size: 73, lr: 2.25e-02, grad_scale: 2.0 +2024-09-16 12:28:54,374 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 12:29:13,593 INFO [train.py:1230] (0/2) Epoch 1, validation: loss=8.234, ctc_loss=4.87, cr_loss=1.182e-15, attn_decoder_loss=8.607, over 944034.00 frames. +2024-09-16 12:29:13,594 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 47369MB +2024-09-16 12:29:23,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.29 vs. limit=5.0 +2024-09-16 12:29:24,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=0.0, ans=7.5 +2024-09-16 12:29:31,004 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 2.329e+03 2.556e+03 3.012e+03 3.068e+03 4.530e+03, threshold=1.205e+04, percent-clipped=0.0 +2024-09-16 12:29:40,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=40.0, ans=0.498125 +2024-09-16 12:29:43,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=76.71 vs. limit=7.515 +2024-09-16 12:29:46,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=15.50 vs. limit=7.515 +2024-09-16 12:29:51,255 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.743e+03 2.098e+03 2.580e+03 3.037e+03 5.426e+03, threshold=1.032e+04, percent-clipped=0.0 +2024-09-16 12:29:57,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=21.92 vs. limit=7.53 +2024-09-16 12:30:02,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=80.0, ans=5.05 +2024-09-16 12:30:10,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=20.74 vs. limit=7.545 +2024-09-16 12:30:12,640 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.11 vs. limit=7.59 +2024-09-16 12:30:13,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=120.0, ans=0.8958 +2024-09-16 12:30:23,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=120.0, ans=0.09730000000000001 +2024-09-16 12:30:23,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=15.28 vs. limit=7.545 +2024-09-16 12:30:24,099 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.27 vs. limit=7.59 +2024-09-16 12:30:27,831 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.85 vs. limit=7.56 +2024-09-16 12:30:28,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.091e+02 1.328e+03 1.895e+03 2.580e+03 5.426e+03, threshold=7.580e+03, percent-clipped=0.0 +2024-09-16 12:30:28,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff2.min_abs, batch_count=160.0, ans=0.004 +2024-09-16 12:30:33,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=26.16 vs. limit=7.62 +2024-09-16 12:30:38,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=21.88 vs. limit=4.064 +2024-09-16 12:30:42,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=160.0, ans=0.8944 +2024-09-16 12:30:43,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=160.0, ans=0.4925 +2024-09-16 12:30:47,303 INFO [train.py:1198] (0/2) Epoch 1, batch 50, loss[loss=1.73, ctc_loss=1.098, cr_loss=0.1849, attn_decoder_loss=1.796, over 29435.00 frames. ], tot_loss[loss=3.654, ctc_loss=1.997, cr_loss=0.2521, attn_decoder_loss=3.832, over 1268774.28 frames. ], batch size: 70, lr: 2.48e-02, grad_scale: 2.0 +2024-09-16 12:30:48,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=79.26 vs. limit=5.1 +2024-09-16 12:30:54,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=24.49 vs. limit=5.1 +2024-09-16 12:31:01,663 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=15.25 vs. limit=4.08 +2024-09-16 12:31:04,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=50.89 vs. limit=7.575 +2024-09-16 12:31:08,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=240.0, ans=0.0946 +2024-09-16 12:31:13,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=28.12 vs. limit=7.68 +2024-09-16 12:31:17,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=63.66 vs. limit=5.12 +2024-09-16 12:31:29,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=280.0, ans=5.175 +2024-09-16 12:31:41,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=8.76 vs. limit=4.112 +2024-09-16 12:31:43,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=17.16 vs. limit=7.62 +2024-09-16 12:31:59,076 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=34.80 vs. limit=5.08 +2024-09-16 12:32:13,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=360.0, ans=0.1865 +2024-09-16 12:32:22,339 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 4.399e+02 6.271e+02 8.906e+02 1.633e+03 5.426e+03, threshold=1.781e+03, percent-clipped=0.0 +2024-09-16 12:32:22,362 INFO [train.py:1198] (0/2) Epoch 1, batch 100, loss[loss=1.178, ctc_loss=1.143, cr_loss=0.1249, attn_decoder_loss=1.179, over 29550.00 frames. ], tot_loss[loss=2.444, ctc_loss=1.554, cr_loss=0.1861, attn_decoder_loss=2.539, over 2251495.81 frames. ], batch size: 76, lr: 2.70e-02, grad_scale: 4.0 +2024-09-16 12:32:25,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=17.42 vs. limit=4.16 +2024-09-16 12:32:25,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=10.86 vs. limit=5.1 +2024-09-16 12:32:50,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=49.22 vs. limit=5.11 +2024-09-16 12:32:59,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=27.98 vs. limit=7.83 +2024-09-16 12:32:59,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=96.15 vs. limit=7.665 +2024-09-16 12:33:01,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=20.03 vs. limit=4.192 +2024-09-16 12:33:16,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=126.39 vs. limit=7.68 +2024-09-16 12:33:17,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=480.0, ans=0.0892 +2024-09-16 12:33:31,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.72 vs. limit=7.695 +2024-09-16 12:33:31,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=17.48 vs. limit=7.695 +2024-09-16 12:33:35,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=55.17 vs. limit=7.695 +2024-09-16 12:33:56,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=560.0, ans=5.14 +2024-09-16 12:33:58,497 INFO [train.py:1198] (0/2) Epoch 1, batch 150, loss[loss=0.9565, ctc_loss=1.064, cr_loss=0.113, attn_decoder_loss=0.942, over 29390.00 frames. ], tot_loss[loss=1.876, ctc_loss=1.394, cr_loss=0.1605, attn_decoder_loss=1.926, over 3046228.56 frames. ], batch size: 70, lr: 2.93e-02, grad_scale: 4.0 +2024-09-16 12:33:59,266 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=39.38 vs. limit=5.3 +2024-09-16 12:34:01,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.27 vs. limit=5.15 +2024-09-16 12:34:01,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.12 vs. limit=5.15 +2024-09-16 12:34:09,911 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=39.35 vs. limit=7.725 +2024-09-16 12:34:10,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=21.43 vs. limit=7.725 +2024-09-16 12:34:30,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=125.14 vs. limit=7.74 +2024-09-16 12:34:31,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=18.60 vs. limit=7.74 +2024-09-16 12:34:40,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=73.56 vs. limit=7.755 +2024-09-16 12:34:45,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=212.97 vs. limit=5.34 +2024-09-16 12:34:50,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=19.59 vs. limit=7.755 +2024-09-16 12:34:55,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=680.0, ans=7.755 +2024-09-16 12:34:56,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=720.0, ans=0.2928 +2024-09-16 12:34:58,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=176.79 vs. limit=8.04 +2024-09-16 12:35:06,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.73 vs. limit=5.18 +2024-09-16 12:35:07,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=720.0, ans=0.2928 +2024-09-16 12:35:11,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=720.0, ans=0.2928 +2024-09-16 12:35:12,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.whiten.whitening_limit, batch_count=720.0, ans=4.288 +2024-09-16 12:35:15,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=760.0, ans=0.20725 +2024-09-16 12:35:16,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.53 vs. limit=5.38 +2024-09-16 12:35:35,119 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=1.064e-01 +2024-09-16 12:35:36,604 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.861e+02 2.379e+02 2.686e+02 3.220e+02 5.129e+02, threshold=5.373e+02, percent-clipped=0.0 +2024-09-16 12:35:36,627 INFO [train.py:1198] (0/2) Epoch 1, batch 200, loss[loss=1.018, ctc_loss=1.183, cr_loss=0.1261, attn_decoder_loss=0.9971, over 27229.00 frames. ], tot_loss[loss=1.576, ctc_loss=1.315, cr_loss=0.147, attn_decoder_loss=1.602, over 3657771.64 frames. ], batch size: 124, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 12:35:37,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=800.0, ans=0.5 +2024-09-16 12:35:38,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=800.0, ans=0.292 +2024-09-16 12:35:43,264 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.90 vs. limit=5.4 +2024-09-16 12:35:57,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=840.0, ans=0.460625 +2024-09-16 12:36:06,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=8.13 vs. limit=4.336 +2024-09-16 12:36:09,548 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=26.34 vs. limit=7.815 +2024-09-16 12:36:16,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=880.0, ans=0.45875 +2024-09-16 12:36:30,895 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=17.61 vs. limit=7.83 +2024-09-16 12:36:33,093 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.91 vs. limit=8.19 +2024-09-16 12:36:44,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.05 vs. limit=5.23 +2024-09-16 12:36:49,582 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=148.19 vs. limit=7.845 +2024-09-16 12:36:51,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.87 vs. limit=8.22 +2024-09-16 12:36:53,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=960.0, ans=0.455 +2024-09-16 12:37:00,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=960.0, ans=0.2904 +2024-09-16 12:37:07,034 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=105.40 vs. limit=7.86 +2024-09-16 12:37:11,732 INFO [train.py:1198] (0/2) Epoch 1, batch 250, loss[loss=1.039, ctc_loss=1.224, cr_loss=0.1252, attn_decoder_loss=1.016, over 29327.00 frames. ], tot_loss[loss=1.398, ctc_loss=1.273, cr_loss=0.1413, attn_decoder_loss=1.408, over 4140463.03 frames. ], batch size: 100, lr: 3.38e-02, grad_scale: 8.0 +2024-09-16 12:37:12,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=1000.0, ans=0.453125 +2024-09-16 12:37:29,765 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.12 vs. limit=8.28 +2024-09-16 12:37:35,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.46 vs. limit=8.28 +2024-09-16 12:37:44,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.19 vs. limit=8.28 +2024-09-16 12:37:48,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.59 vs. limit=4.416 +2024-09-16 12:38:08,857 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=9.25 vs. limit=5.28 +2024-09-16 12:38:18,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=1120.0, ans=0.4475 +2024-09-16 12:38:40,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=14.45 vs. limit=7.935 +2024-09-16 12:38:43,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=62.83 vs. limit=5.58 +2024-09-16 12:38:48,404 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.367e+02 1.660e+02 1.791e+02 1.982e+02 5.267e+02, threshold=3.582e+02, percent-clipped=0.0 +2024-09-16 12:38:48,431 INFO [train.py:1198] (0/2) Epoch 1, batch 300, loss[loss=0.9728, ctc_loss=1.181, cr_loss=0.1256, attn_decoder_loss=0.9469, over 29541.00 frames. ], tot_loss[loss=1.274, ctc_loss=1.242, cr_loss=0.14, attn_decoder_loss=1.275, over 4509793.95 frames. ], batch size: 92, lr: 3.60e-02, grad_scale: 8.0 +2024-09-16 12:38:48,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=1200.0, ans=0.44375 +2024-09-16 12:38:58,915 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=19.97 vs. limit=7.95 +2024-09-16 12:38:59,215 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=11.94 vs. limit=7.95 +2024-09-16 12:39:08,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.23 vs. limit=8.43 +2024-09-16 12:39:09,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=1240.0, ans=0.8566 +2024-09-16 12:39:12,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=19.65 vs. limit=7.965 +2024-09-16 12:39:14,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=8.23 vs. limit=8.43 +2024-09-16 12:39:19,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=48.04 vs. limit=5.62 +2024-09-16 12:39:23,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=6.58 vs. limit=4.496 +2024-09-16 12:39:28,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=1280.0, ans=0.178 +2024-09-16 12:39:29,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=188.46 vs. limit=7.98 +2024-09-16 12:39:32,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=109.53 vs. limit=7.98 +2024-09-16 12:39:41,356 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=3.256e+00 +2024-09-16 12:39:46,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=1320.0, ans=0.438125 +2024-09-16 12:39:58,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=149.75 vs. limit=7.995 +2024-09-16 12:40:02,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=1320.0, ans=0.0703 +2024-09-16 12:40:07,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=1360.0, ans=0.43625 +2024-09-16 12:40:15,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=1360.0, ans=0.5 +2024-09-16 12:40:21,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=1360.0, ans=8.52 +2024-09-16 12:40:24,358 INFO [train.py:1198] (0/2) Epoch 1, batch 350, loss[loss=0.8687, ctc_loss=1.051, cr_loss=0.1826, attn_decoder_loss=0.8444, over 29335.00 frames. ], tot_loss[loss=1.187, ctc_loss=1.219, cr_loss=0.1457, attn_decoder_loss=1.18, over 4794808.84 frames. ], batch size: 71, lr: 3.83e-02, grad_scale: 8.0 +2024-09-16 12:40:25,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.85 vs. limit=5.35 +2024-09-16 12:40:36,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=65.95 vs. limit=8.025 +2024-09-16 12:40:38,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.32 vs. limit=8.55 +2024-09-16 12:41:00,898 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=131.36 vs. limit=8.055 +2024-09-16 12:41:06,356 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.82 vs. limit=8.61 +2024-09-16 12:41:11,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=101.87 vs. limit=8.055 +2024-09-16 12:41:25,073 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.72 vs. limit=8.64 +2024-09-16 12:41:28,133 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=4.905e+00 +2024-09-16 12:41:36,542 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=16.41 vs. limit=8.64 +2024-09-16 12:41:45,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.18 vs. limit=8.085 +2024-09-16 12:41:45,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=10.85 vs. limit=8.085 +2024-09-16 12:41:47,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.92 vs. limit=8.085 +2024-09-16 12:41:57,764 INFO [train.py:1198] (0/2) Epoch 1, batch 400, loss[loss=0.9182, ctc_loss=1.131, cr_loss=0.2054, attn_decoder_loss=0.8899, over 29724.00 frames. ], tot_loss[loss=1.119, ctc_loss=1.197, cr_loss=0.1579, attn_decoder_loss=1.106, over 5024820.18 frames. ], batch size: 82, lr: 4.05e-02, grad_scale: 8.0 +2024-09-16 12:41:59,563 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.379e+02 1.617e+02 1.838e+02 2.123e+02 1.289e+03, threshold=3.677e+02, percent-clipped=4.0 +2024-09-16 12:42:04,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=20.08 vs. limit=8.1 +2024-09-16 12:42:13,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.26 vs. limit=4.64 +2024-09-16 12:42:20,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=1640.0, ans=0.8426 +2024-09-16 12:42:21,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.24 vs. limit=8.73 +2024-09-16 12:42:28,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=12.54 vs. limit=5.82 +2024-09-16 12:42:29,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=1640.0, ans=6.025 +2024-09-16 12:42:32,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.95 vs. limit=8.73 +2024-09-16 12:42:35,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=1680.0, ans=0.1555 +2024-09-16 12:43:02,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=1720.0, ans=0.419375 +2024-09-16 12:43:03,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=5.47 vs. limit=5.0 +2024-09-16 12:43:07,012 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.99 vs. limit=5.86 +2024-09-16 12:43:33,845 INFO [train.py:1198] (0/2) Epoch 1, batch 450, loss[loss=0.952, ctc_loss=1.193, cr_loss=0.3137, attn_decoder_loss=0.9183, over 29659.00 frames. ], tot_loss[loss=1.067, ctc_loss=1.177, cr_loss=0.1732, attn_decoder_loss=1.051, over 5186771.85 frames. ], batch size: 83, lr: 4.28e-02, grad_scale: 8.0 +2024-09-16 12:43:34,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=1800.0, ans=0.0595 +2024-09-16 12:43:36,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=14.70 vs. limit=8.85 +2024-09-16 12:44:00,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.06 vs. limit=8.879999999999999 +2024-09-16 12:44:22,127 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=45.06 vs. limit=8.205 +2024-09-16 12:44:35,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.86 vs. limit=8.22 +2024-09-16 12:44:40,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=30.37 vs. limit=8.22 +2024-09-16 12:44:42,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.12 vs. limit=8.22 +2024-09-16 12:44:44,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=42.86 vs. limit=8.22 +2024-09-16 12:44:47,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=1960.0, ans=0.408125 +2024-09-16 12:44:51,517 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=1.88 vs. limit=3.294 +2024-09-16 12:44:56,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=27.61 vs. limit=8.235 +2024-09-16 12:44:56,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=14.98 vs. limit=8.97 +2024-09-16 12:45:02,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.07 vs. limit=8.97 +2024-09-16 12:45:04,718 INFO [train.py:1198] (0/2) Epoch 1, batch 500, loss[loss=0.9217, ctc_loss=1.132, cr_loss=0.2463, attn_decoder_loss=0.8929, over 29467.00 frames. ], tot_loss[loss=1.022, ctc_loss=1.155, cr_loss=0.1909, attn_decoder_loss=1.003, over 5330622.24 frames. ], batch size: 94, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:45:06,534 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.448e+02 1.612e+02 2.007e+02 3.487e+02, threshold=3.225e+02, percent-clipped=0.0 +2024-09-16 12:45:12,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=2000.0, ans=0.8300000000000001 +2024-09-16 12:45:18,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=2000.0, ans=0.125 +2024-09-16 12:45:24,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=2.27 vs. limit=4.816 +2024-09-16 12:45:27,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=2040.0, ans=0.404375 +2024-09-16 12:45:27,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=2040.0, ans=0.1235 +2024-09-16 12:45:44,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.70 vs. limit=6.04 +2024-09-16 12:45:46,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.71 vs. limit=8.28 +2024-09-16 12:46:05,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=2120.0, ans=0.400625 +2024-09-16 12:46:05,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=2120.0, ans=6.325 +2024-09-16 12:46:07,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=2120.0, ans=0.235 +2024-09-16 12:46:22,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=35.78 vs. limit=8.31 +2024-09-16 12:46:25,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=14.79 vs. limit=6.08 +2024-09-16 12:46:31,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=23.54 vs. limit=8.31 +2024-09-16 12:46:35,770 INFO [train.py:1198] (0/2) Epoch 1, batch 550, loss[loss=0.9017, ctc_loss=1.088, cr_loss=0.335, attn_decoder_loss=0.8736, over 28862.00 frames. ], tot_loss[loss=0.9882, ctc_loss=1.131, cr_loss=0.2114, attn_decoder_loss=0.9676, over 5424749.00 frames. ], batch size: 104, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:46:42,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.09 vs. limit=6.1 +2024-09-16 12:47:37,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=2320.0, ans=0.113 +2024-09-16 12:47:41,796 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.46 vs. limit=9.24 +2024-09-16 12:47:50,643 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.18 vs. limit=4.944 +2024-09-16 12:47:52,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.49 vs. limit=5.59 +2024-09-16 12:48:01,170 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.92 vs. limit=9.27 +2024-09-16 12:48:12,273 INFO [train.py:1198] (0/2) Epoch 1, batch 600, loss[loss=0.9046, ctc_loss=1.043, cr_loss=0.341, attn_decoder_loss=0.8816, over 29228.00 frames. ], tot_loss[loss=0.9594, ctc_loss=1.105, cr_loss=0.2344, attn_decoder_loss=0.938, over 5510636.73 frames. ], batch size: 100, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:48:13,538 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=14.21 vs. limit=8.4 +2024-09-16 12:48:14,061 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.060e+02 1.236e+02 1.461e+02 1.874e+02 1.065e+03, threshold=2.921e+02, percent-clipped=6.0 +2024-09-16 12:48:14,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=2400.0, ans=0.3875 +2024-09-16 12:48:16,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.01 vs. limit=6.2 +2024-09-16 12:48:18,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.61 vs. limit=9.3 +2024-09-16 12:48:22,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=33.11 vs. limit=8.4 +2024-09-16 12:48:23,876 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=26.52 vs. limit=8.4 +2024-09-16 12:48:28,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=2440.0, ans=0.8146 +2024-09-16 12:48:39,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=43.27 vs. limit=8.415 +2024-09-16 12:48:43,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.95 vs. limit=8.415 +2024-09-16 12:48:48,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.90 vs. limit=6.24 +2024-09-16 12:48:56,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=2480.0, ans=0.2752 +2024-09-16 12:49:02,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=2480.0, ans=0.2752 +2024-09-16 12:49:08,697 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.19 vs. limit=6.26 +2024-09-16 12:49:23,987 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=5.768e-01 +2024-09-16 12:49:29,663 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.75 vs. limit=8.46 +2024-09-16 12:49:41,294 INFO [train.py:1198] (0/2) Epoch 1, batch 650, loss[loss=0.8576, ctc_loss=0.9698, cr_loss=0.3533, attn_decoder_loss=0.8373, over 29749.00 frames. ], tot_loss[loss=0.9314, ctc_loss=1.071, cr_loss=0.258, attn_decoder_loss=0.9101, over 5588098.12 frames. ], batch size: 81, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:49:47,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.62 vs. limit=9.45 +2024-09-16 12:49:47,694 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.69 vs. limit=5.65 +2024-09-16 12:49:51,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.65 vs. limit=8.475 +2024-09-16 12:49:54,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=2600.0, ans=0.1025 +2024-09-16 12:50:00,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.85 vs. limit=8.49 +2024-09-16 12:50:04,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=2640.0, ans=0.8076000000000001 +2024-09-16 12:50:13,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=2640.0, ans=0.0406 +2024-09-16 12:50:17,793 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=5.072 +2024-09-16 12:50:22,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.49 vs. limit=8.504999999999999 +2024-09-16 12:50:26,842 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.78 vs. limit=5.072 +2024-09-16 12:50:30,394 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.15 vs. limit=8.504999999999999 +2024-09-16 12:50:36,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=2720.0, ans=0.3725 +2024-09-16 12:50:46,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=12.80 vs. limit=8.52 +2024-09-16 12:50:49,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=2720.0, ans=0.3725 +2024-09-16 12:50:51,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=13.81 vs. limit=9.57 +2024-09-16 12:51:09,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=2760.0, ans=0.2224 +2024-09-16 12:51:09,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.89 vs. limit=9.57 +2024-09-16 12:51:12,598 INFO [train.py:1198] (0/2) Epoch 1, batch 700, loss[loss=0.7817, ctc_loss=0.8814, cr_loss=0.337, attn_decoder_loss=0.7631, over 29510.00 frames. ], tot_loss[loss=0.9086, ctc_loss=1.043, cr_loss=0.2797, attn_decoder_loss=0.8875, over 5639107.19 frames. ], batch size: 76, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:51:14,379 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.893e+01 1.304e+02 1.539e+02 2.330e+02 9.417e+02, threshold=3.077e+02, percent-clipped=6.0 +2024-09-16 12:51:17,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.22 vs. limit=9.6 +2024-09-16 12:51:17,260 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.82 vs. limit=8.55 +2024-09-16 12:51:24,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.54 vs. limit=5.12 +2024-09-16 12:51:29,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.whiten.whitening_limit, batch_count=2840.0, ans=5.136 +2024-09-16 12:51:43,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.50 vs. limit=8.565 +2024-09-16 12:51:56,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=8.39 vs. limit=8.58 +2024-09-16 12:52:07,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=2920.0, ans=0.36312500000000003 +2024-09-16 12:52:15,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.93 vs. limit=8.595 +2024-09-16 12:52:16,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.47 vs. limit=8.595 +2024-09-16 12:52:20,769 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.95 vs. limit=9.69 +2024-09-16 12:52:22,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.59 vs. limit=9.69 +2024-09-16 12:52:32,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=10.05 vs. limit=8.61 +2024-09-16 12:52:33,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=2960.0, ans=0.36124999999999996 +2024-09-16 12:52:34,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=11.60 vs. limit=9.72 +2024-09-16 12:52:38,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=2960.0, ans=0.5 +2024-09-16 12:52:43,665 INFO [train.py:1198] (0/2) Epoch 1, batch 750, loss[loss=0.7794, ctc_loss=0.9116, cr_loss=0.3059, attn_decoder_loss=0.7579, over 29731.00 frames. ], tot_loss[loss=0.8792, ctc_loss=1.009, cr_loss=0.2929, attn_decoder_loss=0.8582, over 5677686.06 frames. ], batch size: 82, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:52:50,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.82 vs. limit=8.625 +2024-09-16 12:53:08,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=3040.0, ans=0.09899494936611666 +2024-09-16 12:53:09,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.40 vs. limit=8.64 +2024-09-16 12:53:10,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=3040.0, ans=0.2696 +2024-09-16 12:53:11,119 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.49 vs. limit=8.64 +2024-09-16 12:53:16,801 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=13.91 vs. limit=9.78 +2024-09-16 12:53:23,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.78 vs. limit=9.81 +2024-09-16 12:53:24,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=3080.0, ans=0.2692 +2024-09-16 12:53:27,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.64 vs. limit=5.232 +2024-09-16 12:53:31,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=3080.0, ans=0.07675000000000001 +2024-09-16 12:53:32,568 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.92 vs. limit=8.655 +2024-09-16 12:53:44,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.57 vs. limit=8.67 +2024-09-16 12:53:48,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.38 vs. limit=5.78 +2024-09-16 12:53:51,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.14 vs. limit=5.248 +2024-09-16 12:54:04,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.40 vs. limit=9.870000000000001 +2024-09-16 12:54:07,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=8.71 vs. limit=8.685 +2024-09-16 12:54:14,212 INFO [train.py:1198] (0/2) Epoch 1, batch 800, loss[loss=0.6445, ctc_loss=0.7679, cr_loss=0.3144, attn_decoder_loss=0.6239, over 29588.00 frames. ], tot_loss[loss=0.8448, ctc_loss=0.9749, cr_loss=0.3011, attn_decoder_loss=0.8237, over 5706838.24 frames. ], batch size: 73, lr: 4.49e-02, grad_scale: 16.0 +2024-09-16 12:54:15,974 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.656e+02 2.537e+02 3.189e+02 4.432e+02 8.958e+02, threshold=6.378e+02, percent-clipped=52.0 +2024-09-16 12:54:39,569 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=64.09 vs. limit=9.93 +2024-09-16 12:54:50,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.95 vs. limit=3.492 +2024-09-16 12:55:06,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=3320.0, ans=0.344375 +2024-09-16 12:55:13,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.56 vs. limit=6.66 +2024-09-16 12:55:14,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=43.42 vs. limit=8.745000000000001 +2024-09-16 12:55:43,350 INFO [train.py:1198] (0/2) Epoch 1, batch 850, loss[loss=0.7151, ctc_loss=0.8631, cr_loss=0.3715, attn_decoder_loss=0.6904, over 29691.00 frames. ], tot_loss[loss=0.8064, ctc_loss=0.9392, cr_loss=0.3067, attn_decoder_loss=0.7848, over 5736874.89 frames. ], batch size: 89, lr: 4.49e-02, grad_scale: 16.0 +2024-09-16 12:55:47,504 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.81 vs. limit=8.775 +2024-09-16 12:56:04,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=3440.0, ans=0.7796000000000001 +2024-09-16 12:56:07,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=3440.0, ans=0.07099999999999998 +2024-09-16 12:56:25,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.21 vs. limit=10.11 +2024-09-16 12:56:35,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten.whitening_limit, batch_count=3520.0, ans=8.82 +2024-09-16 12:56:56,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=3560.0, ans=0.333125 +2024-09-16 12:57:04,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=3560.0, ans=0.26439999999999997 +2024-09-16 12:57:11,501 INFO [train.py:1198] (0/2) Epoch 1, batch 900, loss[loss=0.6111, ctc_loss=0.7578, cr_loss=0.355, attn_decoder_loss=0.5869, over 29601.00 frames. ], tot_loss[loss=0.7675, ctc_loss=0.9037, cr_loss=0.3125, attn_decoder_loss=0.7454, over 5741832.46 frames. ], batch size: 73, lr: 4.48e-02, grad_scale: 16.0 +2024-09-16 12:57:13,149 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.606e+02 2.694e+02 3.422e+02 4.565e+02 1.517e+03, threshold=6.845e+02, percent-clipped=7.0 +2024-09-16 12:57:18,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=3600.0, ans=0.33125 +2024-09-16 12:57:23,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=3600.0, ans=0.254 +2024-09-16 12:57:44,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.39 vs. limit=10.26 +2024-09-16 12:57:45,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=3680.0, ans=0.06199999999999997 +2024-09-16 12:57:54,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=19.71 vs. limit=8.879999999999999 +2024-09-16 12:58:06,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=3720.0, ans=0.2558 +2024-09-16 12:58:15,293 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.19 vs. limit=8.895 +2024-09-16 12:58:31,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=3760.0, ans=0.26239999999999997 +2024-09-16 12:58:36,751 INFO [train.py:1198] (0/2) Epoch 1, batch 950, loss[loss=0.5455, ctc_loss=0.686, cr_loss=0.3595, attn_decoder_loss=0.5219, over 29515.00 frames. ], tot_loss[loss=0.7297, ctc_loss=0.8691, cr_loss=0.3182, attn_decoder_loss=0.7071, over 5742077.41 frames. ], batch size: 74, lr: 4.48e-02, grad_scale: 16.0 +2024-09-16 12:58:45,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=3800.0, ans=0.038125000000000006 +2024-09-16 12:58:47,313 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=4.316e+01 +2024-09-16 12:58:49,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=3800.0, ans=0.767 +2024-09-16 12:58:52,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=3840.0, ans=0.055999999999999994 +2024-09-16 12:59:00,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=3840.0, ans=0.32 +2024-09-16 12:59:10,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.44 vs. limit=8.955 +2024-09-16 12:59:13,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=3880.0, ans=10.41 +2024-09-16 12:59:25,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=8.89 vs. limit=8.955 +2024-09-16 12:59:52,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=3960.0, ans=0.31437499999999996 +2024-09-16 12:59:53,184 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.16 vs. limit=5.584 +2024-09-16 12:59:54,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=3960.0, ans=0.2104 +2024-09-16 13:00:04,839 INFO [train.py:1198] (0/2) Epoch 1, batch 1000, loss[loss=0.521, ctc_loss=0.6487, cr_loss=0.3394, attn_decoder_loss=0.4993, over 29498.00 frames. ], tot_loss[loss=0.6954, ctc_loss=0.8361, cr_loss=0.3278, attn_decoder_loss=0.6725, over 5736726.55 frames. ], batch size: 77, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:00:08,134 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.514e+02 2.266e+02 2.878e+02 3.816e+02 1.272e+03, threshold=5.756e+02, percent-clipped=5.0 +2024-09-16 13:00:27,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=10.53 +2024-09-16 13:00:29,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.84 vs. limit=10.53 +2024-09-16 13:00:51,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=4080.0, ans=0.07 +2024-09-16 13:00:52,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=4080.0, ans=0.0 +2024-09-16 13:00:56,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=4120.0, ans=0.306875 +2024-09-16 13:01:01,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.64 vs. limit=3.618 +2024-09-16 13:01:31,759 INFO [train.py:1198] (0/2) Epoch 1, batch 1050, loss[loss=0.5391, ctc_loss=0.6715, cr_loss=0.3807, attn_decoder_loss=0.5159, over 29683.00 frames. ], tot_loss[loss=0.659, ctc_loss=0.7987, cr_loss=0.3373, attn_decoder_loss=0.636, over 5745240.66 frames. ], batch size: 85, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:01:47,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=4240.0, ans=0.049 +2024-09-16 13:02:13,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=4280.0, ans=0.04883333333333333 +2024-09-16 13:02:26,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=4320.0, ans=0.2568 +2024-09-16 13:02:33,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.52 vs. limit=9.120000000000001 +2024-09-16 13:02:36,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=4320.0, ans=0.2568 +2024-09-16 13:02:55,794 INFO [train.py:1198] (0/2) Epoch 1, batch 1100, loss[loss=0.5469, ctc_loss=0.6524, cr_loss=0.3124, attn_decoder_loss=0.5282, over 29443.00 frames. ], tot_loss[loss=0.627, ctc_loss=0.7634, cr_loss=0.3467, attn_decoder_loss=0.6042, over 5757793.12 frames. ], batch size: 78, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:02:59,006 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.372e+02 1.990e+02 2.415e+02 3.242e+02 8.137e+02, threshold=4.830e+02, percent-clipped=5.0 +2024-09-16 13:03:09,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=4400.0, ans=0.266 +2024-09-16 13:03:38,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=4480.0, ans=0.29000000000000004 +2024-09-16 13:04:08,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=4560.0, ans=0.04949747468305833 +2024-09-16 13:04:09,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.84 vs. limit=10.92 +2024-09-16 13:04:12,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=4560.0, ans=0.04766666666666667 +2024-09-16 13:04:17,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=4560.0, ans=0.28625 +2024-09-16 13:04:20,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=4600.0, ans=0.009869565217391305 +2024-09-16 13:04:21,898 INFO [train.py:1198] (0/2) Epoch 1, batch 1150, loss[loss=0.4932, ctc_loss=0.6142, cr_loss=0.3888, attn_decoder_loss=0.4712, over 29480.00 frames. ], tot_loss[loss=0.601, ctc_loss=0.7334, cr_loss=0.3553, attn_decoder_loss=0.5784, over 5755990.57 frames. ], batch size: 78, lr: 4.47e-02, grad_scale: 8.0 +2024-09-16 13:04:37,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=4640.0, ans=0.2825 +2024-09-16 13:04:59,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.58 vs. limit=11.01 +2024-09-16 13:05:07,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=4680.0, ans=0.280625 +2024-09-16 13:05:15,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=4720.0, ans=0.27875 +2024-09-16 13:05:24,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.41 vs. limit=5.888 +2024-09-16 13:05:47,983 INFO [train.py:1198] (0/2) Epoch 1, batch 1200, loss[loss=0.498, ctc_loss=0.5944, cr_loss=0.3762, attn_decoder_loss=0.479, over 29686.00 frames. ], tot_loss[loss=0.5781, ctc_loss=0.7058, cr_loss=0.3638, attn_decoder_loss=0.5558, over 5748996.29 frames. ], batch size: 85, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:05:51,295 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.465e+02 1.904e+02 2.227e+02 2.860e+02 9.470e+02, threshold=4.454e+02, percent-clipped=3.0 +2024-09-16 13:06:02,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.98 vs. limit=11.1 +2024-09-16 13:06:05,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=4840.0, ans=0.2516 +2024-09-16 13:06:23,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=4880.0, ans=0.0 +2024-09-16 13:06:24,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=4880.0, ans=0.27125 +2024-09-16 13:06:26,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=4880.0, ans=0.04633333333333334 +2024-09-16 13:06:29,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.20 vs. limit=5.0 +2024-09-16 13:06:54,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=4960.0, ans=0.046 +2024-09-16 13:07:10,530 INFO [train.py:1198] (0/2) Epoch 1, batch 1250, loss[loss=0.4894, ctc_loss=0.5835, cr_loss=0.4242, attn_decoder_loss=0.4695, over 29527.00 frames. ], tot_loss[loss=0.5566, ctc_loss=0.6783, cr_loss=0.3726, attn_decoder_loss=0.5348, over 5776031.90 frames. ], batch size: 92, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:07:23,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=5000.0, ans=11.25 +2024-09-16 13:07:35,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=5040.0, ans=0.26375000000000004 +2024-09-16 13:07:44,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.96 vs. limit=11.31 +2024-09-16 13:07:45,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=5080.0, ans=0.045500000000000006 +2024-09-16 13:08:07,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=5120.0, ans=0.04533333333333334 +2024-09-16 13:08:30,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=5160.0, ans=0.009747826086956521 +2024-09-16 13:08:34,875 INFO [train.py:1198] (0/2) Epoch 1, batch 1300, loss[loss=0.5028, ctc_loss=0.6052, cr_loss=0.4383, attn_decoder_loss=0.4817, over 28242.00 frames. ], tot_loss[loss=0.5369, ctc_loss=0.6519, cr_loss=0.3785, attn_decoder_loss=0.5157, over 5780082.60 frames. ], batch size: 111, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:08:38,069 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.446e+02 1.796e+02 2.066e+02 2.551e+02 7.251e+02, threshold=4.131e+02, percent-clipped=4.0 +2024-09-16 13:08:49,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.48 vs. limit=6.3 +2024-09-16 13:08:51,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=5240.0, ans=0.254375 +2024-09-16 13:08:59,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.50 vs. limit=6.3100000000000005 +2024-09-16 13:09:48,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=11.52 +2024-09-16 13:09:49,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=5360.0, ans=0.0 +2024-09-16 13:09:53,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.73 vs. limit=6.34 +2024-09-16 13:09:56,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=5360.0, ans=0.24875000000000003 +2024-09-16 13:09:59,471 INFO [train.py:1198] (0/2) Epoch 1, batch 1350, loss[loss=0.4506, ctc_loss=0.5237, cr_loss=0.4385, attn_decoder_loss=0.4327, over 29769.00 frames. ], tot_loss[loss=0.5194, ctc_loss=0.6272, cr_loss=0.3845, attn_decoder_loss=0.4989, over 5796444.63 frames. ], batch size: 81, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:10:23,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=5440.0, ans=0.009686956521739131 +2024-09-16 13:10:45,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.16 vs. limit=11.61 +2024-09-16 13:11:04,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=5560.0, ans=0.2834 +2024-09-16 13:11:06,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=5560.0, ans=0.239375 +2024-09-16 13:11:07,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=5560.0, ans=0.025 +2024-09-16 13:11:21,354 INFO [train.py:1198] (0/2) Epoch 1, batch 1400, loss[loss=0.4132, ctc_loss=0.4784, cr_loss=0.3803, attn_decoder_loss=0.3975, over 29581.00 frames. ], tot_loss[loss=0.5043, ctc_loss=0.6047, cr_loss=0.3896, attn_decoder_loss=0.4845, over 5807429.20 frames. ], batch size: 69, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:11:24,541 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.351e+02 1.700e+02 1.984e+02 2.487e+02 6.195e+02, threshold=3.968e+02, percent-clipped=5.0 +2024-09-16 13:11:24,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=5600.0, ans=0.025 +2024-09-16 13:11:35,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.11 vs. limit=11.7 +2024-09-16 13:11:38,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.28 vs. limit=11.73 +2024-09-16 13:11:47,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.82 vs. limit=9.615 +2024-09-16 13:12:08,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=5680.0, ans=0.23375 +2024-09-16 13:12:13,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=5720.0, ans=0.6998 +2024-09-16 13:12:17,191 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:12:23,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=11.87 vs. limit=11.79 +2024-09-16 13:12:31,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=5760.0, ans=0.0 +2024-09-16 13:12:38,547 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.09 vs. limit=11.82 +2024-09-16 13:12:44,056 INFO [train.py:1198] (0/2) Epoch 1, batch 1450, loss[loss=0.4921, ctc_loss=0.5856, cr_loss=0.4361, attn_decoder_loss=0.472, over 29419.00 frames. ], tot_loss[loss=0.4929, ctc_loss=0.5873, cr_loss=0.3953, attn_decoder_loss=0.4737, over 5805179.68 frames. ], batch size: 94, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:12:52,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_ff3.min_abs, batch_count=5800.0, ans=0.2 +2024-09-16 13:12:54,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=5800.0, ans=0.22812500000000002 +2024-09-16 13:12:57,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.77 vs. limit=6.45 +2024-09-16 13:13:00,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=5840.0, ans=0.0635 +2024-09-16 13:13:29,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=5880.0, ans=0.025 +2024-09-16 13:13:47,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=5960.0, ans=0.22062500000000002 +2024-09-16 13:13:53,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=5960.0, ans=0.6914 +2024-09-16 13:13:56,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=5960.0, ans=0.00957391304347826 +2024-09-16 13:14:06,645 INFO [train.py:1198] (0/2) Epoch 1, batch 1500, loss[loss=0.4626, ctc_loss=0.5422, cr_loss=0.4061, attn_decoder_loss=0.4448, over 29638.00 frames. ], tot_loss[loss=0.4816, ctc_loss=0.5695, cr_loss=0.4004, attn_decoder_loss=0.4629, over 5804353.82 frames. ], batch size: 86, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:14:07,766 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.78 vs. limit=12.0 +2024-09-16 13:14:09,809 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.657e+02 1.840e+02 2.318e+02 6.248e+02, threshold=3.680e+02, percent-clipped=4.0 +2024-09-16 13:14:24,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=6040.0, ans=0.04949747468305833 +2024-09-16 13:14:36,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=9.765 +2024-09-16 13:14:37,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=6080.0, ans=0.21500000000000002 +2024-09-16 13:14:41,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=6080.0, ans=0.21500000000000002 +2024-09-16 13:14:54,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=11.98 vs. limit=12.09 +2024-09-16 13:15:01,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.83 vs. limit=3.918 +2024-09-16 13:15:04,743 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.98 vs. limit=12.09 +2024-09-16 13:15:12,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=6160.0, ans=0.2924 +2024-09-16 13:15:27,847 INFO [train.py:1198] (0/2) Epoch 1, batch 1550, loss[loss=0.4641, ctc_loss=0.5356, cr_loss=0.4579, attn_decoder_loss=0.446, over 29531.00 frames. ], tot_loss[loss=0.4727, ctc_loss=0.5548, cr_loss=0.4039, attn_decoder_loss=0.4546, over 5779811.56 frames. ], batch size: 90, lr: 4.45e-02, grad_scale: 16.0 +2024-09-16 13:15:29,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=6200.0, ans=0.20937499999999998 +2024-09-16 13:15:33,706 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.71 vs. limit=9.825 +2024-09-16 13:15:52,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=6240.0, ans=0.04066666666666667 +2024-09-16 13:16:03,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=6280.0, ans=0.6802 +2024-09-16 13:16:05,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=6280.0, ans=0.0405 +2024-09-16 13:16:08,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=6280.0, ans=0.0405 +2024-09-16 13:16:24,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=6320.0, ans=0.20375 +2024-09-16 13:16:26,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=6320.0, ans=0.0 +2024-09-16 13:16:27,049 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.04 vs. limit=3.948 +2024-09-16 13:16:28,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.30 vs. limit=9.870000000000001 +2024-09-16 13:16:38,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.33 vs. limit=9.885 +2024-09-16 13:16:42,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=6360.0, ans=0.20187500000000003 +2024-09-16 13:16:50,864 INFO [train.py:1198] (0/2) Epoch 1, batch 1600, loss[loss=0.4429, ctc_loss=0.4985, cr_loss=0.4625, attn_decoder_loss=0.4264, over 29661.00 frames. ], tot_loss[loss=0.4643, ctc_loss=0.5407, cr_loss=0.4076, attn_decoder_loss=0.4468, over 5760982.73 frames. ], batch size: 85, lr: 4.45e-02, grad_scale: 32.0 +2024-09-16 13:16:53,976 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.353e+02 1.789e+02 2.003e+02 2.671e+02 7.111e+02, threshold=4.005e+02, percent-clipped=7.0 +2024-09-16 13:16:55,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=6400.0, ans=0.2 +2024-09-16 13:16:57,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=6400.0, ans=0.2 +2024-09-16 13:16:58,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=6400.0, ans=0.236 +2024-09-16 13:17:03,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=6400.0, ans=0.2 +2024-09-16 13:17:20,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=6440.0, ans=0.009469565217391304 +2024-09-16 13:17:23,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=6480.0, ans=0.025 +2024-09-16 13:17:34,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=6480.0, ans=0.19624999999999998 +2024-09-16 13:18:08,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=6560.0, ans=0.2344 +2024-09-16 13:18:13,087 INFO [train.py:1198] (0/2) Epoch 1, batch 1650, loss[loss=0.4475, ctc_loss=0.5003, cr_loss=0.4588, attn_decoder_loss=0.4315, over 29704.00 frames. ], tot_loss[loss=0.4564, ctc_loss=0.527, cr_loss=0.4103, attn_decoder_loss=0.4395, over 5755977.87 frames. ], batch size: 89, lr: 4.45e-02, grad_scale: 32.0 +2024-09-16 13:18:16,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=6600.0, ans=0.03916666666666667 +2024-09-16 13:18:26,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=6600.0, ans=0.669 +2024-09-16 13:18:38,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=6640.0, ans=0.025 +2024-09-16 13:19:17,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=6760.0, ans=0.038500000000000006 +2024-09-16 13:19:20,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=6760.0, ans=0.6634 +2024-09-16 13:19:25,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=6760.0, ans=8.379999999999999 +2024-09-16 13:19:26,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=6760.0, ans=0.025 +2024-09-16 13:19:31,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=6800.0, ans=0.23199999999999998 +2024-09-16 13:19:32,657 INFO [train.py:1198] (0/2) Epoch 1, batch 1700, loss[loss=0.3626, ctc_loss=0.3917, cr_loss=0.3866, attn_decoder_loss=0.3508, over 29555.00 frames. ], tot_loss[loss=0.4476, ctc_loss=0.5114, cr_loss=0.4135, attn_decoder_loss=0.4313, over 5778848.91 frames. ], batch size: 69, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:19:37,455 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.203e+02 1.510e+02 1.749e+02 2.059e+02 5.300e+02, threshold=3.498e+02, percent-clipped=2.0 +2024-09-16 13:19:52,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=6840.0, ans=0.009382608695652174 +2024-09-16 13:19:53,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=6840.0, ans=0.179375 +2024-09-16 13:20:04,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=6840.0, ans=0.04949747468305833 +2024-09-16 13:20:10,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=6880.0, ans=0.23120000000000002 +2024-09-16 13:20:36,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.16 vs. limit=12.690000000000001 +2024-09-16 13:20:44,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.35 vs. limit=8.48 +2024-09-16 13:20:51,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=6960.0, ans=0.2304 +2024-09-16 13:20:54,500 INFO [train.py:1198] (0/2) Epoch 1, batch 1750, loss[loss=0.3629, ctc_loss=0.4045, cr_loss=0.3702, attn_decoder_loss=0.3501, over 29332.00 frames. ], tot_loss[loss=0.4402, ctc_loss=0.4982, cr_loss=0.4154, attn_decoder_loss=0.4245, over 5787860.01 frames. ], batch size: 67, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:20:58,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.80 vs. limit=10.125 +2024-09-16 13:21:53,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=7120.0, ans=0.009321739130434784 +2024-09-16 13:21:59,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=7160.0, ans=0.2284 +2024-09-16 13:22:16,321 INFO [train.py:1198] (0/2) Epoch 1, batch 1800, loss[loss=0.4156, ctc_loss=0.4391, cr_loss=0.4392, attn_decoder_loss=0.4033, over 29697.00 frames. ], tot_loss[loss=0.4347, ctc_loss=0.4877, cr_loss=0.4187, attn_decoder_loss=0.4195, over 5789992.94 frames. ], batch size: 83, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:22:17,190 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.76 vs. limit=12.9 +2024-09-16 13:22:21,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.247e+02 1.571e+02 1.759e+02 2.049e+02 3.849e+02, threshold=3.518e+02, percent-clipped=1.0 +2024-09-16 13:22:25,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=12.9 +2024-09-16 13:22:29,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=7200.0, ans=0.03666666666666667 +2024-09-16 13:22:29,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.75 vs. limit=12.9 +2024-09-16 13:22:56,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=7280.0, ans=0.2272 +2024-09-16 13:23:02,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.86 vs. limit=12.99 +2024-09-16 13:23:04,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=8.09 vs. limit=10.245000000000001 +2024-09-16 13:23:19,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=7360.0, ans=0.15500000000000003 +2024-09-16 13:23:29,618 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.08 vs. limit=13.02 +2024-09-16 13:23:35,125 INFO [train.py:1198] (0/2) Epoch 1, batch 1850, loss[loss=0.4266, ctc_loss=0.4562, cr_loss=0.4452, attn_decoder_loss=0.4134, over 29654.00 frames. ], tot_loss[loss=0.4286, ctc_loss=0.4763, cr_loss=0.4205, attn_decoder_loss=0.414, over 5796768.11 frames. ], batch size: 86, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:23:38,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=7400.0, ans=0.009260869565217392 +2024-09-16 13:23:40,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=7400.0, ans=0.025 +2024-09-16 13:23:46,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=7400.0, ans=0.153125 +2024-09-16 13:23:52,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=7440.0, ans=0.15125 +2024-09-16 13:23:54,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=7440.0, ans=0.09899494936611666 +2024-09-16 13:24:13,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=7480.0, ans=0.14937499999999998 +2024-09-16 13:24:17,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=7480.0, ans=0.14937499999999998 +2024-09-16 13:24:19,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=7480.0, ans=0.04949747468305833 +2024-09-16 13:24:39,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=7560.0, ans=0.025 +2024-09-16 13:24:53,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=7600.0, ans=0.025 +2024-09-16 13:24:54,517 INFO [train.py:1198] (0/2) Epoch 1, batch 1900, loss[loss=0.4147, ctc_loss=0.439, cr_loss=0.4387, attn_decoder_loss=0.4022, over 29721.00 frames. ], tot_loss[loss=0.4249, ctc_loss=0.4683, cr_loss=0.4239, attn_decoder_loss=0.4107, over 5804683.10 frames. ], batch size: 89, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:24:59,252 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.162e+02 1.597e+02 1.785e+02 2.217e+02 4.479e+02, threshold=3.571e+02, percent-clipped=3.0 +2024-09-16 13:25:07,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.62 vs. limit=13.2 +2024-09-16 13:25:13,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=7640.0, ans=0.14187499999999997 +2024-09-16 13:26:05,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=7760.0, ans=0.13624999999999998 +2024-09-16 13:26:05,887 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.75 vs. limit=13.32 +2024-09-16 13:26:14,696 INFO [train.py:1198] (0/2) Epoch 1, batch 1950, loss[loss=0.4166, ctc_loss=0.4456, cr_loss=0.4555, attn_decoder_loss=0.4033, over 29482.00 frames. ], tot_loss[loss=0.422, ctc_loss=0.4608, cr_loss=0.4279, attn_decoder_loss=0.4081, over 5819109.47 frames. ], batch size: 78, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:26:19,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=7800.0, ans=0.627 +2024-09-16 13:26:21,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=7800.0, ans=0.009173913043478261 +2024-09-16 13:26:31,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.92 vs. limit=10.44 +2024-09-16 13:26:54,639 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.34 vs. limit=13.41 +2024-09-16 13:27:02,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.11 vs. limit=10.47 +2024-09-16 13:27:22,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.00 vs. limit=6.99 +2024-09-16 13:27:26,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=7960.0, ans=0.22039999999999998 +2024-09-16 13:27:28,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=7960.0, ans=9.975 +2024-09-16 13:27:33,411 INFO [train.py:1198] (0/2) Epoch 1, batch 2000, loss[loss=0.3476, ctc_loss=0.3658, cr_loss=0.3484, attn_decoder_loss=0.3378, over 29328.00 frames. ], tot_loss[loss=0.4192, ctc_loss=0.4547, cr_loss=0.4296, attn_decoder_loss=0.4057, over 5797471.64 frames. ], batch size: 67, lr: 4.42e-02, grad_scale: 32.0 +2024-09-16 13:27:38,129 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.167e+02 1.451e+02 1.684e+02 2.248e+02 3.741e+02, threshold=3.368e+02, percent-clipped=1.0 +2024-09-16 13:27:43,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=8000.0, ans=0.125 +2024-09-16 13:28:01,628 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.28 vs. limit=10.515 +2024-09-16 13:28:08,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=8080.0, ans=0.125 +2024-09-16 13:28:12,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=8080.0, ans=0.6172 +2024-09-16 13:28:12,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=8080.0, ans=0.125 +2024-09-16 13:28:19,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=8080.0, ans=0.125 +2024-09-16 13:28:24,840 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.64 vs. limit=9.059999999999999 +2024-09-16 13:28:30,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=8120.0, ans=0.125 +2024-09-16 13:28:41,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=8160.0, ans=0.125 +2024-09-16 13:28:52,986 INFO [train.py:1198] (0/2) Epoch 1, batch 2050, loss[loss=0.368, ctc_loss=0.3769, cr_loss=0.4089, attn_decoder_loss=0.358, over 29435.00 frames. ], tot_loss[loss=0.415, ctc_loss=0.4467, cr_loss=0.4293, attn_decoder_loss=0.402, over 5789589.48 frames. ], batch size: 70, lr: 4.42e-02, grad_scale: 16.0 +2024-09-16 13:28:59,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=8200.0, ans=0.125 +2024-09-16 13:29:08,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=8240.0, ans=0.125 +2024-09-16 13:29:08,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=8240.0, ans=0.125 +2024-09-16 13:29:11,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=8240.0, ans=0.125 +2024-09-16 13:29:41,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=8320.0, ans=0.125 +2024-09-16 13:29:45,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=8320.0, ans=0.025 +2024-09-16 13:29:55,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.32 vs. limit=9.18 +2024-09-16 13:30:01,904 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.51 vs. limit=9.18 +2024-09-16 13:30:10,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.89 vs. limit=13.77 +2024-09-16 13:30:10,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=8400.0, ans=0.125 +2024-09-16 13:30:12,267 INFO [train.py:1198] (0/2) Epoch 1, batch 2100, loss[loss=0.3875, ctc_loss=0.3978, cr_loss=0.4165, attn_decoder_loss=0.3771, over 29795.00 frames. ], tot_loss[loss=0.4109, ctc_loss=0.4391, cr_loss=0.4306, attn_decoder_loss=0.3982, over 5801770.81 frames. ], batch size: 81, lr: 4.42e-02, grad_scale: 16.0 +2024-09-16 13:30:16,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.61 vs. limit=13.8 +2024-09-16 13:30:18,321 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.222e+02 1.523e+02 1.725e+02 2.064e+02 6.365e+02, threshold=3.449e+02, percent-clipped=2.0 +2024-09-16 13:30:35,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=8440.0, ans=0.125 +2024-09-16 13:30:38,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=8440.0, ans=0.009034782608695653 +2024-09-16 13:30:45,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.53 vs. limit=10.68 +2024-09-16 13:31:16,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.14 vs. limit=10.71 +2024-09-16 13:31:17,483 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:31:19,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=8560.0, ans=0.125 +2024-09-16 13:31:29,553 INFO [train.py:1198] (0/2) Epoch 1, batch 2150, loss[loss=0.394, ctc_loss=0.4058, cr_loss=0.4266, attn_decoder_loss=0.3832, over 29461.00 frames. ], tot_loss[loss=0.4068, ctc_loss=0.4317, cr_loss=0.4312, attn_decoder_loss=0.3944, over 5816398.39 frames. ], batch size: 78, lr: 4.41e-02, grad_scale: 16.0 +2024-09-16 13:31:29,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=8600.0, ans=0.009000000000000001 +2024-09-16 13:31:34,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=8600.0, ans=0.0 +2024-09-16 13:31:42,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=8600.0, ans=0.030833333333333338 +2024-09-16 13:31:56,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=8640.0, ans=0.125 +2024-09-16 13:32:06,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=8680.0, ans=0.008982608695652174 +2024-09-16 13:32:09,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=8680.0, ans=0.125 +2024-09-16 13:32:26,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.08 vs. limit=10.77 +2024-09-16 13:32:38,699 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:32:43,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.06 vs. limit=10.785 +2024-09-16 13:32:49,790 INFO [train.py:1198] (0/2) Epoch 1, batch 2200, loss[loss=0.4029, ctc_loss=0.4048, cr_loss=0.4228, attn_decoder_loss=0.3933, over 29615.00 frames. ], tot_loss[loss=0.4045, ctc_loss=0.4266, cr_loss=0.4323, attn_decoder_loss=0.3924, over 5813094.68 frames. ], batch size: 86, lr: 4.41e-02, grad_scale: 16.0 +2024-09-16 13:32:55,854 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.455e+02 1.695e+02 2.050e+02 4.766e+02, threshold=3.390e+02, percent-clipped=3.0 +2024-09-16 13:33:00,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=8800.0, ans=0.008956521739130436 +2024-09-16 13:33:16,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=8840.0, ans=0.125 +2024-09-16 13:33:17,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=8840.0, ans=0.2116 +2024-09-16 13:33:55,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=8960.0, ans=0.2104 +2024-09-16 13:33:56,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=8960.0, ans=0.5864 +2024-09-16 13:33:59,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=8960.0, ans=0.125 +2024-09-16 13:34:01,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=8960.0, ans=0.5864 +2024-09-16 13:34:09,150 INFO [train.py:1198] (0/2) Epoch 1, batch 2250, loss[loss=0.4089, ctc_loss=0.4177, cr_loss=0.4708, attn_decoder_loss=0.3974, over 29707.00 frames. ], tot_loss[loss=0.402, ctc_loss=0.4211, cr_loss=0.4331, attn_decoder_loss=0.3903, over 5811389.82 frames. ], batch size: 82, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:34:13,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=9000.0, ans=0.02916666666666667 +2024-09-16 13:34:34,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=9040.0, ans=0.5836 +2024-09-16 13:34:36,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=9040.0, ans=0.029 +2024-09-16 13:34:56,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.84 vs. limit=9.559999999999999 +2024-09-16 13:35:23,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=9160.0, ans=0.008878260869565217 +2024-09-16 13:35:26,223 INFO [train.py:1198] (0/2) Epoch 1, batch 2300, loss[loss=0.3703, ctc_loss=0.3731, cr_loss=0.4347, attn_decoder_loss=0.3604, over 29298.00 frames. ], tot_loss[loss=0.398, ctc_loss=0.4143, cr_loss=0.4329, attn_decoder_loss=0.3866, over 5798473.97 frames. ], batch size: 71, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:35:26,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=9200.0, ans=0.20800000000000002 +2024-09-16 13:35:31,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=9200.0, ans=0.125 +2024-09-16 13:35:32,248 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.236e+02 1.494e+02 1.712e+02 1.992e+02 4.170e+02, threshold=3.424e+02, percent-clipped=4.0 +2024-09-16 13:35:53,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.92 vs. limit=10.965 +2024-09-16 13:36:15,520 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.88 vs. limit=9.66 +2024-09-16 13:36:17,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=9320.0, ans=0.1568 +2024-09-16 13:36:23,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=24.39 vs. limit=14.49 +2024-09-16 13:36:27,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.92 vs. limit=14.49 +2024-09-16 13:36:45,226 INFO [train.py:1198] (0/2) Epoch 1, batch 2350, loss[loss=0.4071, ctc_loss=0.4221, cr_loss=0.4346, attn_decoder_loss=0.3958, over 29701.00 frames. ], tot_loss[loss=0.3958, ctc_loss=0.4096, cr_loss=0.4339, attn_decoder_loss=0.3847, over 5803444.36 frames. ], batch size: 83, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:36:51,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=9400.0, ans=0.027500000000000004 +2024-09-16 13:36:53,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=9400.0, ans=0.09899494936611666 +2024-09-16 13:37:04,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.25 vs. limit=14.58 +2024-09-16 13:37:06,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=9440.0, ans=0.2056 +2024-09-16 13:37:25,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=9480.0, ans=0.125 +2024-09-16 13:37:52,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.50 vs. limit=14.67 +2024-09-16 13:38:02,860 INFO [train.py:1198] (0/2) Epoch 1, batch 2400, loss[loss=0.3511, ctc_loss=0.3475, cr_loss=0.4182, attn_decoder_loss=0.3422, over 29532.00 frames. ], tot_loss[loss=0.3944, ctc_loss=0.406, cr_loss=0.4353, attn_decoder_loss=0.3834, over 5807653.97 frames. ], batch size: 76, lr: 4.39e-02, grad_scale: 32.0 +2024-09-16 13:38:08,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=9600.0, ans=0.125 +2024-09-16 13:38:08,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.34 vs. limit=9.8 +2024-09-16 13:38:10,878 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.192e+02 1.445e+02 1.624e+02 1.930e+02 3.418e+02, threshold=3.248e+02, percent-clipped=0.0 +2024-09-16 13:38:14,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=9600.0, ans=0.20400000000000001 +2024-09-16 13:38:17,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=9600.0, ans=0.125 +2024-09-16 13:38:26,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=9640.0, ans=0.026500000000000003 +2024-09-16 13:38:28,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=9640.0, ans=10.0 +2024-09-16 13:38:35,184 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.66 vs. limit=11.129999999999999 +2024-09-16 13:38:39,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=9680.0, ans=0.025 +2024-09-16 13:38:39,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=9680.0, ans=0.125 +2024-09-16 13:38:48,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=9680.0, ans=0.5612 +2024-09-16 13:38:56,389 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:39:04,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=9720.0, ans=11.145 +2024-09-16 13:39:11,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=9760.0, ans=0.026000000000000002 +2024-09-16 13:39:22,236 INFO [train.py:1198] (0/2) Epoch 1, batch 2450, loss[loss=0.4058, ctc_loss=0.4125, cr_loss=0.4606, attn_decoder_loss=0.3948, over 29700.00 frames. ], tot_loss[loss=0.3951, ctc_loss=0.4055, cr_loss=0.4372, attn_decoder_loss=0.3842, over 5783295.83 frames. ], batch size: 82, lr: 4.39e-02, grad_scale: 16.0 +2024-09-16 13:40:04,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=9880.0, ans=0.125 +2024-09-16 13:40:06,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.42 vs. limit=11.205 +2024-09-16 13:40:06,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.82 vs. limit=7.952 +2024-09-16 13:40:18,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=9920.0, ans=11.22 +2024-09-16 13:40:19,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=9920.0, ans=0.125 +2024-09-16 13:40:34,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.71 vs. limit=4.494 +2024-09-16 13:40:41,274 INFO [train.py:1198] (0/2) Epoch 1, batch 2500, loss[loss=0.3965, ctc_loss=0.3885, cr_loss=0.4856, attn_decoder_loss=0.3866, over 29609.00 frames. ], tot_loss[loss=0.3923, ctc_loss=0.4002, cr_loss=0.4385, attn_decoder_loss=0.3817, over 5793886.11 frames. ], batch size: 86, lr: 4.38e-02, grad_scale: 16.0 +2024-09-16 13:40:43,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.17 vs. limit=4.5 +2024-09-16 13:40:46,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=9.37 vs. limit=11.25 +2024-09-16 13:40:47,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=10000.0, ans=0.125 +2024-09-16 13:40:48,949 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.151e+02 1.428e+02 1.613e+02 1.938e+02 4.379e+02, threshold=3.227e+02, percent-clipped=3.0 +2024-09-16 13:41:08,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=10040.0, ans=0.1996 +2024-09-16 13:41:15,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=10080.0, ans=0.02 +2024-09-16 13:41:28,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=10120.0, ans=0.008669565217391305 +2024-09-16 13:41:34,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=10120.0, ans=0.5458000000000001 +2024-09-16 13:41:36,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=10120.0, ans=0.008669565217391305 +2024-09-16 13:41:37,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=10120.0, ans=0.008669565217391305 +2024-09-16 13:42:01,547 INFO [train.py:1198] (0/2) Epoch 1, batch 2550, loss[loss=0.3372, ctc_loss=0.326, cr_loss=0.4025, attn_decoder_loss=0.3295, over 29361.00 frames. ], tot_loss[loss=0.3898, ctc_loss=0.3954, cr_loss=0.4382, attn_decoder_loss=0.3795, over 5796611.75 frames. ], batch size: 67, lr: 4.38e-02, grad_scale: 16.0 +2024-09-16 13:42:17,318 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:42:52,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=4.06 vs. limit=11.370000000000001 +2024-09-16 13:42:53,707 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.55 vs. limit=15.24 +2024-09-16 13:43:05,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=10360.0, ans=0.125 +2024-09-16 13:43:07,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=10360.0, ans=0.125 +2024-09-16 13:43:08,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=10360.0, ans=0.023500000000000004 +2024-09-16 13:43:19,971 INFO [train.py:1198] (0/2) Epoch 1, batch 2600, loss[loss=0.365, ctc_loss=0.3506, cr_loss=0.4333, attn_decoder_loss=0.3569, over 29426.00 frames. ], tot_loss[loss=0.3883, ctc_loss=0.3915, cr_loss=0.4387, attn_decoder_loss=0.3782, over 5794113.80 frames. ], batch size: 78, lr: 4.37e-02, grad_scale: 16.0 +2024-09-16 13:43:21,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=10400.0, ans=0.023333333333333334 +2024-09-16 13:43:29,539 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.186e+02 1.430e+02 1.543e+02 1.954e+02 3.702e+02, threshold=3.087e+02, percent-clipped=5.0 +2024-09-16 13:43:29,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=10400.0, ans=0.125 +2024-09-16 13:43:30,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=7.50 vs. limit=11.4 +2024-09-16 13:43:35,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.54 vs. limit=10.2 +2024-09-16 13:43:46,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=10440.0, ans=0.125 +2024-09-16 13:43:49,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=10440.0, ans=0.125 +2024-09-16 13:44:04,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=10480.0, ans=0.125 +2024-09-16 13:44:32,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=10560.0, ans=0.02266666666666667 +2024-09-16 13:44:32,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=10560.0, ans=0.02266666666666667 +2024-09-16 13:44:38,232 INFO [train.py:1198] (0/2) Epoch 1, batch 2650, loss[loss=0.3933, ctc_loss=0.3927, cr_loss=0.4503, attn_decoder_loss=0.3833, over 29336.00 frames. ], tot_loss[loss=0.387, ctc_loss=0.3885, cr_loss=0.4392, attn_decoder_loss=0.3771, over 5800734.95 frames. ], batch size: 100, lr: 4.37e-02, grad_scale: 16.0 +2024-09-16 13:44:58,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=10640.0, ans=0.5276000000000001 +2024-09-16 13:45:07,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=11.49 +2024-09-16 13:45:28,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=10720.0, ans=0.125 +2024-09-16 13:45:39,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.85 vs. limit=15.57 +2024-09-16 13:45:48,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=10760.0, ans=0.021833333333333337 +2024-09-16 13:45:49,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=10760.0, ans=0.125 +2024-09-16 13:45:57,785 INFO [train.py:1198] (0/2) Epoch 1, batch 2700, loss[loss=0.4015, ctc_loss=0.3935, cr_loss=0.4598, attn_decoder_loss=0.3922, over 29511.00 frames. ], tot_loss[loss=0.3864, ctc_loss=0.3863, cr_loss=0.4408, attn_decoder_loss=0.3766, over 5796894.82 frames. ], batch size: 87, lr: 4.36e-02, grad_scale: 16.0 +2024-09-16 13:46:01,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.74 vs. limit=15.6 +2024-09-16 13:46:05,442 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.417e+02 1.675e+02 2.035e+02 4.386e+02, threshold=3.351e+02, percent-clipped=4.0 +2024-09-16 13:46:13,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=10840.0, ans=0.021500000000000002 +2024-09-16 13:46:26,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.55 vs. limit=15.63 +2024-09-16 13:46:28,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=10880.0, ans=0.125 +2024-09-16 13:46:34,226 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.27 vs. limit=11.58 +2024-09-16 13:46:41,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=10880.0, ans=0.125 +2024-09-16 13:47:17,504 INFO [train.py:1198] (0/2) Epoch 1, batch 2750, loss[loss=0.3506, ctc_loss=0.3514, cr_loss=0.4193, attn_decoder_loss=0.3412, over 29501.00 frames. ], tot_loss[loss=0.3831, ctc_loss=0.3813, cr_loss=0.4401, attn_decoder_loss=0.3735, over 5796533.77 frames. ], batch size: 75, lr: 4.36e-02, grad_scale: 16.0 +2024-09-16 13:47:20,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=11000.0, ans=0.125 +2024-09-16 13:47:52,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=11080.0, ans=0.125 +2024-09-16 13:47:54,955 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.42 vs. limit=15.81 +2024-09-16 13:47:55,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.98 vs. limit=11.655000000000001 +2024-09-16 13:47:57,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=11080.0, ans=0.125 +2024-09-16 13:48:13,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=11120.0, ans=0.1888 +2024-09-16 13:48:13,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=11120.0, ans=0.1888 +2024-09-16 13:48:14,698 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:48:18,831 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.44 vs. limit=11.684999999999999 +2024-09-16 13:48:30,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=11160.0, ans=0.18839999999999998 +2024-09-16 13:48:35,663 INFO [train.py:1198] (0/2) Epoch 1, batch 2800, loss[loss=0.4267, ctc_loss=0.445, cr_loss=0.462, attn_decoder_loss=0.4144, over 20050.00 frames. ], tot_loss[loss=0.3825, ctc_loss=0.3798, cr_loss=0.4405, attn_decoder_loss=0.3731, over 5776526.34 frames. ], batch size: 209, lr: 4.36e-02, grad_scale: 32.0 +2024-09-16 13:48:39,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=11200.0, ans=15.9 +2024-09-16 13:48:43,096 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.389e+02 1.617e+02 2.129e+02 5.220e+02, threshold=3.235e+02, percent-clipped=5.0 +2024-09-16 13:49:09,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=11280.0, ans=0.008417391304347826 +2024-09-16 13:49:14,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=11280.0, ans=0.125 +2024-09-16 13:49:18,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=11280.0, ans=0.0 +2024-09-16 13:49:31,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=11320.0, ans=0.008408695652173913 +2024-09-16 13:49:54,514 INFO [train.py:1198] (0/2) Epoch 1, batch 2850, loss[loss=0.3485, ctc_loss=0.3314, cr_loss=0.4234, attn_decoder_loss=0.341, over 29514.00 frames. ], tot_loss[loss=0.3819, ctc_loss=0.3781, cr_loss=0.4413, attn_decoder_loss=0.3725, over 5763794.66 frames. ], batch size: 77, lr: 4.35e-02, grad_scale: 32.0 +2024-09-16 13:50:13,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=11440.0, ans=0.49960000000000004 +2024-09-16 13:50:16,551 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:50:27,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=11480.0, ans=0.18519999999999998 +2024-09-16 13:50:27,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.29 vs. limit=11.805 +2024-09-16 13:50:28,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=11480.0, ans=0.008373913043478261 +2024-09-16 13:50:31,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=11480.0, ans=0.18519999999999998 +2024-09-16 13:50:36,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=11480.0, ans=0.125 +2024-09-16 13:50:47,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.12 vs. limit=7.88 +2024-09-16 13:50:49,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.79 vs. limit=11.82 +2024-09-16 13:51:13,785 INFO [train.py:1198] (0/2) Epoch 1, batch 2900, loss[loss=0.3713, ctc_loss=0.3634, cr_loss=0.4603, attn_decoder_loss=0.3619, over 29441.00 frames. ], tot_loss[loss=0.3817, ctc_loss=0.3763, cr_loss=0.4428, attn_decoder_loss=0.3725, over 5788267.97 frames. ], batch size: 79, lr: 4.35e-02, grad_scale: 16.0 +2024-09-16 13:51:22,927 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.082e+02 1.352e+02 1.492e+02 1.728e+02 4.022e+02, threshold=2.985e+02, percent-clipped=1.0 +2024-09-16 13:51:28,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.55 vs. limit=4.746 +2024-09-16 13:51:31,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=11640.0, ans=0.4926000000000001 +2024-09-16 13:51:44,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=11680.0, ans=0.125 +2024-09-16 13:51:58,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=11720.0, ans=0.125 +2024-09-16 13:52:08,650 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.66 vs. limit=16.29 +2024-09-16 13:52:11,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=11720.0, ans=0.05 +2024-09-16 13:52:17,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=11760.0, ans=0.125 +2024-09-16 13:52:17,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=11760.0, ans=0.1824 +2024-09-16 13:52:30,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.01 vs. limit=8.719999999999999 +2024-09-16 13:52:30,705 INFO [train.py:1198] (0/2) Epoch 1, batch 2950, loss[loss=0.3615, ctc_loss=0.3459, cr_loss=0.4431, attn_decoder_loss=0.3534, over 29510.00 frames. ], tot_loss[loss=0.3784, ctc_loss=0.3716, cr_loss=0.4404, attn_decoder_loss=0.3694, over 5782927.42 frames. ], batch size: 75, lr: 4.34e-02, grad_scale: 16.0 +2024-09-16 13:52:40,898 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.97 vs. limit=16.35 +2024-09-16 13:52:55,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=11840.0, ans=0.01733333333333334 +2024-09-16 13:53:04,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.06 vs. limit=4.782 +2024-09-16 13:53:04,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=11.955 +2024-09-16 13:53:15,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=11920.0, ans=0.4828 +2024-09-16 13:53:18,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=11920.0, ans=0.017 +2024-09-16 13:53:34,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=11960.0, ans=0.0 +2024-09-16 13:53:50,812 INFO [train.py:1198] (0/2) Epoch 1, batch 3000, loss[loss=0.3696, ctc_loss=0.3511, cr_loss=0.4192, attn_decoder_loss=0.3623, over 29753.00 frames. ], tot_loss[loss=0.3774, ctc_loss=0.3694, cr_loss=0.4412, attn_decoder_loss=0.3685, over 5784371.32 frames. ], batch size: 81, lr: 4.34e-02, grad_scale: 16.0 +2024-09-16 13:53:50,813 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 13:54:09,120 INFO [train.py:1230] (0/2) Epoch 1, validation: loss=0.2655, ctc_loss=0.1548, cr_loss=4.113e-15, attn_decoder_loss=0.2778, over 944034.00 frames. +2024-09-16 13:54:09,121 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 13:54:09,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=12000.0, ans=0.48000000000000004 +2024-09-16 13:54:11,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=12000.0, ans=0.00826086956521739 +2024-09-16 13:54:15,825 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:54:18,437 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.470e+02 1.654e+02 2.017e+02 3.240e+02, threshold=3.308e+02, percent-clipped=3.0 +2024-09-16 13:54:31,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=12040.0, ans=0.125 +2024-09-16 13:54:38,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=12080.0, ans=0.008243478260869566 +2024-09-16 13:54:47,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=12080.0, ans=0.125 +2024-09-16 13:54:54,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=12080.0, ans=0.008243478260869566 +2024-09-16 13:55:17,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=12160.0, ans=0.125 +2024-09-16 13:55:21,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.08 vs. limit=12.059999999999999 +2024-09-16 13:55:28,459 INFO [train.py:1198] (0/2) Epoch 1, batch 3050, loss[loss=0.3693, ctc_loss=0.3563, cr_loss=0.4395, attn_decoder_loss=0.361, over 29534.00 frames. ], tot_loss[loss=0.3768, ctc_loss=0.3677, cr_loss=0.4417, attn_decoder_loss=0.368, over 5779890.45 frames. ], batch size: 76, lr: 4.33e-02, grad_scale: 16.0 +2024-09-16 13:55:36,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=12200.0, ans=0.008217391304347826 +2024-09-16 13:55:45,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=12240.0, ans=0.008208695652173914 +2024-09-16 13:56:29,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=12360.0, ans=0.07 +2024-09-16 13:56:45,424 INFO [train.py:1198] (0/2) Epoch 1, batch 3100, loss[loss=0.3977, ctc_loss=0.3863, cr_loss=0.4856, attn_decoder_loss=0.3882, over 29297.00 frames. ], tot_loss[loss=0.3753, ctc_loss=0.3649, cr_loss=0.4411, attn_decoder_loss=0.3667, over 5779631.18 frames. ], batch size: 100, lr: 4.33e-02, grad_scale: 16.0 +2024-09-16 13:56:54,618 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.036e+02 1.315e+02 1.501e+02 1.811e+02 4.491e+02, threshold=3.002e+02, percent-clipped=4.0 +2024-09-16 13:57:09,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.94 vs. limit=11.219999999999999 +2024-09-16 13:57:18,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.15 vs. limit=16.86 +2024-09-16 13:57:24,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=12480.0, ans=0.125 +2024-09-16 13:57:43,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.65 vs. limit=16.89 +2024-09-16 13:57:45,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=12520.0, ans=0.125 +2024-09-16 13:57:51,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=12560.0, ans=0.0 +2024-09-16 13:57:54,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.57 vs. limit=4.884 +2024-09-16 13:58:02,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=12560.0, ans=0.125 +2024-09-16 13:58:04,869 INFO [train.py:1198] (0/2) Epoch 1, batch 3150, loss[loss=0.3875, ctc_loss=0.379, cr_loss=0.4516, attn_decoder_loss=0.3784, over 28928.00 frames. ], tot_loss[loss=0.3742, ctc_loss=0.3625, cr_loss=0.4417, attn_decoder_loss=0.3657, over 5784319.00 frames. ], batch size: 104, lr: 4.32e-02, grad_scale: 16.0 +2024-09-16 13:58:21,076 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.08 vs. limit=16.98 +2024-09-16 13:58:27,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.72 vs. limit=12.24 +2024-09-16 13:58:44,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=12680.0, ans=0.125 +2024-09-16 13:58:46,460 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.22 vs. limit=17.009999999999998 +2024-09-16 13:59:04,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=12720.0, ans=0.013666666666666667 +2024-09-16 13:59:19,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=12760.0, ans=0.05 +2024-09-16 13:59:24,639 INFO [train.py:1198] (0/2) Epoch 1, batch 3200, loss[loss=0.3648, ctc_loss=0.3402, cr_loss=0.4376, attn_decoder_loss=0.3578, over 29413.00 frames. ], tot_loss[loss=0.3725, ctc_loss=0.3596, cr_loss=0.4416, attn_decoder_loss=0.3641, over 5794778.73 frames. ], batch size: 79, lr: 4.32e-02, grad_scale: 32.0 +2024-09-16 13:59:27,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.39 vs. limit=17.1 +2024-09-16 13:59:33,918 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.352e+02 1.572e+02 1.941e+02 4.814e+02, threshold=3.143e+02, percent-clipped=7.0 +2024-09-16 13:59:48,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.69 vs. limit=17.130000000000003 +2024-09-16 14:00:05,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=12880.0, ans=0.125 +2024-09-16 14:00:11,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=12920.0, ans=0.125 +2024-09-16 14:00:14,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=12920.0, ans=0.012833333333333335 +2024-09-16 14:00:42,053 INFO [train.py:1198] (0/2) Epoch 1, batch 3250, loss[loss=0.3595, ctc_loss=0.3315, cr_loss=0.436, attn_decoder_loss=0.353, over 29695.00 frames. ], tot_loss[loss=0.3721, ctc_loss=0.3579, cr_loss=0.4427, attn_decoder_loss=0.3638, over 5801284.11 frames. ], batch size: 84, lr: 4.31e-02, grad_scale: 32.0 +2024-09-16 14:00:49,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.61 vs. limit=17.25 +2024-09-16 14:00:51,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=13000.0, ans=0.125 +2024-09-16 14:00:57,037 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.13 vs. limit=17.28 +2024-09-16 14:00:57,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=13040.0, ans=0.1696 +2024-09-16 14:01:04,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=17.09 vs. limit=12.39 +2024-09-16 14:01:48,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.71 vs. limit=17.369999999999997 +2024-09-16 14:01:54,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.44 vs. limit=12.434999999999999 +2024-09-16 14:02:01,650 INFO [train.py:1198] (0/2) Epoch 1, batch 3300, loss[loss=0.3864, ctc_loss=0.3744, cr_loss=0.4468, attn_decoder_loss=0.3779, over 28598.00 frames. ], tot_loss[loss=0.37, ctc_loss=0.3554, cr_loss=0.4409, attn_decoder_loss=0.3618, over 5798317.42 frames. ], batch size: 112, lr: 4.31e-02, grad_scale: 16.0 +2024-09-16 14:02:12,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.074e+02 1.388e+02 1.553e+02 1.864e+02 4.414e+02, threshold=3.106e+02, percent-clipped=4.0 +2024-09-16 14:02:36,339 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:02:45,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=13280.0, ans=0.125 +2024-09-16 14:02:45,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=13280.0, ans=0.43520000000000003 +2024-09-16 14:03:20,270 INFO [train.py:1198] (0/2) Epoch 1, batch 3350, loss[loss=0.3901, ctc_loss=0.3705, cr_loss=0.4856, attn_decoder_loss=0.3815, over 28840.00 frames. ], tot_loss[loss=0.37, ctc_loss=0.3547, cr_loss=0.4409, attn_decoder_loss=0.3619, over 5772863.26 frames. ], batch size: 104, lr: 4.30e-02, grad_scale: 16.0 +2024-09-16 14:03:25,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=13400.0, ans=0.125 +2024-09-16 14:03:36,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff3.min_abs, batch_count=13440.0, ans=0.2 +2024-09-16 14:04:07,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=13520.0, ans=0.4268 +2024-09-16 14:04:19,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=13520.0, ans=0.025 +2024-09-16 14:04:19,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.36 vs. limit=11.76 +2024-09-16 14:04:30,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.39 vs. limit=12.585 +2024-09-16 14:04:35,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.50 vs. limit=12.585 +2024-09-16 14:04:38,390 INFO [train.py:1198] (0/2) Epoch 1, batch 3400, loss[loss=0.3278, ctc_loss=0.306, cr_loss=0.4386, attn_decoder_loss=0.3205, over 29312.00 frames. ], tot_loss[loss=0.3685, ctc_loss=0.3522, cr_loss=0.4407, attn_decoder_loss=0.3605, over 5765995.42 frames. ], batch size: 67, lr: 4.29e-02, grad_scale: 16.0 +2024-09-16 14:04:49,180 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.135e+02 1.397e+02 1.601e+02 1.904e+02 5.092e+02, threshold=3.203e+02, percent-clipped=2.0 +2024-09-16 14:04:49,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=49.09 vs. limit=17.7 +2024-09-16 14:05:12,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.50 vs. limit=12.629999999999999 +2024-09-16 14:05:17,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=13680.0, ans=0.007895652173913043 +2024-09-16 14:05:24,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.04 vs. limit=5.058 +2024-09-16 14:05:28,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=13720.0, ans=0.009500000000000001 +2024-09-16 14:05:29,725 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=5.52 vs. limit=12.645 +2024-09-16 14:05:34,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=13720.0, ans=0.125 +2024-09-16 14:05:36,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=13720.0, ans=0.1628 +2024-09-16 14:05:42,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=13760.0, ans=0.125 +2024-09-16 14:05:51,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=13760.0, ans=0.16240000000000002 +2024-09-16 14:05:53,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=13760.0, ans=0.125 +2024-09-16 14:05:55,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.65 vs. limit=17.82 +2024-09-16 14:05:57,557 INFO [train.py:1198] (0/2) Epoch 1, batch 3450, loss[loss=0.3907, ctc_loss=0.3687, cr_loss=0.4695, attn_decoder_loss=0.3827, over 28315.00 frames. ], tot_loss[loss=0.3681, ctc_loss=0.3504, cr_loss=0.4413, attn_decoder_loss=0.3603, over 5773686.90 frames. ], batch size: 111, lr: 4.29e-02, grad_scale: 16.0 +2024-09-16 14:06:02,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=13800.0, ans=0.025 +2024-09-16 14:06:11,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=13840.0, ans=0.04949747468305833 +2024-09-16 14:06:36,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.23 vs. limit=12.705 +2024-09-16 14:06:42,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.38 vs. limit=5.082 +2024-09-16 14:06:46,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=13920.0, ans=0.125 +2024-09-16 14:07:16,931 INFO [train.py:1198] (0/2) Epoch 1, batch 3500, loss[loss=0.3396, ctc_loss=0.3184, cr_loss=0.4222, attn_decoder_loss=0.3326, over 29322.00 frames. ], tot_loss[loss=0.3667, ctc_loss=0.3482, cr_loss=0.4407, attn_decoder_loss=0.3589, over 5775191.89 frames. ], batch size: 71, lr: 4.28e-02, grad_scale: 16.0 +2024-09-16 14:07:23,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=14000.0, ans=0.16 +2024-09-16 14:07:27,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.349e+02 1.530e+02 1.819e+02 5.462e+02, threshold=3.060e+02, percent-clipped=1.0 +2024-09-16 14:07:55,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=14080.0, ans=0.4072 +2024-09-16 14:08:03,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=14120.0, ans=0.025 +2024-09-16 14:08:06,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=14120.0, ans=0.05 +2024-09-16 14:08:18,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=14160.0, ans=0.007666666666666669 +2024-09-16 14:08:32,831 INFO [train.py:1198] (0/2) Epoch 1, batch 3550, loss[loss=0.3711, ctc_loss=0.3418, cr_loss=0.4395, attn_decoder_loss=0.3646, over 29726.00 frames. ], tot_loss[loss=0.3657, ctc_loss=0.3462, cr_loss=0.4406, attn_decoder_loss=0.3581, over 5783007.99 frames. ], batch size: 89, lr: 4.28e-02, grad_scale: 16.0 +2024-09-16 14:08:39,533 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=18.90 vs. limit=12.825 +2024-09-16 14:08:51,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=14240.0, ans=0.125 +2024-09-16 14:09:01,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=14280.0, ans=0.125 +2024-09-16 14:09:10,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=14280.0, ans=0.125 +2024-09-16 14:09:45,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=14360.0, ans=0.007747826086956521 +2024-09-16 14:09:49,096 INFO [train.py:1198] (0/2) Epoch 1, batch 3600, loss[loss=0.3578, ctc_loss=0.3388, cr_loss=0.4385, attn_decoder_loss=0.3501, over 29507.00 frames. ], tot_loss[loss=0.3649, ctc_loss=0.3449, cr_loss=0.4413, attn_decoder_loss=0.3573, over 5792492.72 frames. ], batch size: 77, lr: 4.27e-02, grad_scale: 32.0 +2024-09-16 14:09:51,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.78 vs. limit=12.9 +2024-09-16 14:09:59,804 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.101e+02 1.344e+02 1.491e+02 1.790e+02 3.419e+02, threshold=2.982e+02, percent-clipped=2.0 +2024-09-16 14:10:09,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=14440.0, ans=0.09899494936611666 +2024-09-16 14:10:17,335 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.79 vs. limit=12.219999999999999 +2024-09-16 14:10:25,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=14480.0, ans=0.4172 +2024-09-16 14:10:31,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=14480.0, ans=0.006333333333333337 +2024-09-16 14:11:00,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.13 vs. limit=12.96 +2024-09-16 14:11:05,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=14600.0, ans=0.005833333333333336 +2024-09-16 14:11:06,519 INFO [train.py:1198] (0/2) Epoch 1, batch 3650, loss[loss=0.4012, ctc_loss=0.3826, cr_loss=0.5069, attn_decoder_loss=0.392, over 29485.00 frames. ], tot_loss[loss=0.363, ctc_loss=0.342, cr_loss=0.4397, attn_decoder_loss=0.3555, over 5794393.98 frames. ], batch size: 90, lr: 4.27e-02, grad_scale: 32.0 +2024-09-16 14:11:17,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.53 vs. limit=12.975 +2024-09-16 14:11:40,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=14680.0, ans=0.125 +2024-09-16 14:12:03,187 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=1.144e-01 +2024-09-16 14:12:14,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.33 vs. limit=18.57 +2024-09-16 14:12:15,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=14760.0, ans=0.125 +2024-09-16 14:12:20,084 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:12:24,267 INFO [train.py:1198] (0/2) Epoch 1, batch 3700, loss[loss=0.3749, ctc_loss=0.3458, cr_loss=0.4779, attn_decoder_loss=0.3675, over 29714.00 frames. ], tot_loss[loss=0.3622, ctc_loss=0.3402, cr_loss=0.4402, attn_decoder_loss=0.3548, over 5804349.53 frames. ], batch size: 84, lr: 4.26e-02, grad_scale: 32.0 +2024-09-16 14:12:34,992 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.086e+02 1.317e+02 1.543e+02 1.858e+02 5.259e+02, threshold=3.086e+02, percent-clipped=2.0 +2024-09-16 14:12:38,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=14840.0, ans=0.125 +2024-09-16 14:12:51,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=14840.0, ans=0.125 +2024-09-16 14:12:56,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=14880.0, ans=0.1512 +2024-09-16 14:12:57,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=14880.0, ans=0.004666666666666666 +2024-09-16 14:12:58,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.79 vs. limit=9.952 +2024-09-16 14:13:04,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=14880.0, ans=0.3792 +2024-09-16 14:13:18,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=14920.0, ans=0.007626086956521739 +2024-09-16 14:13:25,682 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.79 vs. limit=13.11 +2024-09-16 14:13:32,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=14960.0, ans=0.125 +2024-09-16 14:13:40,160 INFO [train.py:1198] (0/2) Epoch 1, batch 3750, loss[loss=0.3178, ctc_loss=0.2825, cr_loss=0.3824, attn_decoder_loss=0.3132, over 29319.00 frames. ], tot_loss[loss=0.361, ctc_loss=0.3383, cr_loss=0.4399, attn_decoder_loss=0.3538, over 5806888.05 frames. ], batch size: 67, lr: 4.26e-02, grad_scale: 32.0 +2024-09-16 14:13:43,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=15000.0, ans=0.125 +2024-09-16 14:13:57,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.44 vs. limit=13.14 +2024-09-16 14:14:03,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=15040.0, ans=0.1496 +2024-09-16 14:14:06,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=15040.0, ans=0.37360000000000004 +2024-09-16 14:14:18,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=15080.0, ans=0.0075913043478260875 +2024-09-16 14:14:27,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=15120.0, ans=0.14880000000000002 +2024-09-16 14:14:54,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.77 vs. limit=18.869999999999997 +2024-09-16 14:14:56,695 INFO [train.py:1198] (0/2) Epoch 1, batch 3800, loss[loss=0.3845, ctc_loss=0.3588, cr_loss=0.4869, attn_decoder_loss=0.3766, over 29641.00 frames. ], tot_loss[loss=0.3601, ctc_loss=0.3368, cr_loss=0.4394, attn_decoder_loss=0.353, over 5796913.66 frames. ], batch size: 86, lr: 4.25e-02, grad_scale: 32.0 +2024-09-16 14:15:07,357 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.116e+02 1.372e+02 1.609e+02 1.860e+02 5.053e+02, threshold=3.218e+02, percent-clipped=1.0 +2024-09-16 14:15:08,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.85 vs. limit=13.2 +2024-09-16 14:15:10,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=15240.0, ans=0.07 +2024-09-16 14:15:24,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=15240.0, ans=0.003166666666666672 +2024-09-16 14:15:26,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.39 vs. limit=13.23 +2024-09-16 14:15:26,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=13.31 vs. limit=12.64 +2024-09-16 14:15:35,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=15280.0, ans=0.125 +2024-09-16 14:15:36,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=15280.0, ans=0.36519999999999997 +2024-09-16 14:15:45,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.04 vs. limit=13.245000000000001 +2024-09-16 14:15:47,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=15320.0, ans=0.007539130434782609 +2024-09-16 14:16:02,605 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.18 vs. limit=19.02 +2024-09-16 14:16:12,322 INFO [train.py:1198] (0/2) Epoch 1, batch 3850, loss[loss=0.3808, ctc_loss=0.3531, cr_loss=0.4369, attn_decoder_loss=0.3742, over 29285.00 frames. ], tot_loss[loss=0.3592, ctc_loss=0.3346, cr_loss=0.4395, attn_decoder_loss=0.3521, over 5807244.80 frames. ], batch size: 100, lr: 4.24e-02, grad_scale: 32.0 +2024-09-16 14:16:51,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=15480.0, ans=0.04949747468305833 +2024-09-16 14:16:58,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=15520.0, ans=0.125 +2024-09-16 14:17:22,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=15560.0, ans=10.0 +2024-09-16 14:17:22,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.32 vs. limit=13.335 +2024-09-16 14:17:31,254 INFO [train.py:1198] (0/2) Epoch 1, batch 3900, loss[loss=0.3489, ctc_loss=0.3162, cr_loss=0.4551, attn_decoder_loss=0.3425, over 29652.00 frames. ], tot_loss[loss=0.3592, ctc_loss=0.334, cr_loss=0.4405, attn_decoder_loss=0.3522, over 5812476.41 frames. ], batch size: 86, lr: 4.24e-02, grad_scale: 32.0 +2024-09-16 14:17:34,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=15600.0, ans=0.0016666666666666705 +2024-09-16 14:17:41,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.343e+02 1.512e+02 1.794e+02 6.576e+02, threshold=3.024e+02, percent-clipped=3.0 +2024-09-16 14:17:42,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=15600.0, ans=0.0 +2024-09-16 14:17:49,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=15640.0, ans=0.3526 +2024-09-16 14:17:53,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=15640.0, ans=0.125 +2024-09-16 14:18:04,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.87 vs. limit=12.84 +2024-09-16 14:18:05,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=15680.0, ans=0.125 +2024-09-16 14:18:40,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=15760.0, ans=0.34840000000000004 +2024-09-16 14:18:46,500 INFO [train.py:1198] (0/2) Epoch 1, batch 3950, loss[loss=0.3601, ctc_loss=0.3232, cr_loss=0.4529, attn_decoder_loss=0.3541, over 29505.00 frames. ], tot_loss[loss=0.3593, ctc_loss=0.3336, cr_loss=0.4403, attn_decoder_loss=0.3523, over 5832640.08 frames. ], batch size: 97, lr: 4.23e-02, grad_scale: 32.0 +2024-09-16 14:18:46,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=15800.0, ans=0.347 +2024-09-16 14:18:55,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=15800.0, ans=0.125 +2024-09-16 14:18:58,113 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.93 vs. limit=12.9 +2024-09-16 14:19:20,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.47 vs. limit=19.41 +2024-09-16 14:20:00,779 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-4000.pt +2024-09-16 14:20:09,346 INFO [train.py:1198] (0/2) Epoch 1, batch 4000, loss[loss=0.341, ctc_loss=0.3168, cr_loss=0.4174, attn_decoder_loss=0.3344, over 29530.00 frames. ], tot_loss[loss=0.3589, ctc_loss=0.333, cr_loss=0.4405, attn_decoder_loss=0.352, over 5810777.84 frames. ], batch size: 74, lr: 4.23e-02, grad_scale: 32.0 +2024-09-16 14:20:17,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=16000.0, ans=0.0 +2024-09-16 14:20:19,715 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.414e+02 1.598e+02 1.942e+02 7.205e+02, threshold=3.195e+02, percent-clipped=1.0 +2024-09-16 14:20:20,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.39 vs. limit=10.4 +2024-09-16 14:20:29,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.68 vs. limit=19.53 +2024-09-16 14:20:44,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.65 vs. limit=10.432 +2024-09-16 14:20:53,568 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.92 vs. limit=13.06 +2024-09-16 14:21:03,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=16120.0, ans=0.125 +2024-09-16 14:21:24,252 INFO [train.py:1198] (0/2) Epoch 1, batch 4050, loss[loss=0.4206, ctc_loss=0.4309, cr_loss=0.4674, attn_decoder_loss=0.4091, over 19824.00 frames. ], tot_loss[loss=0.359, ctc_loss=0.3326, cr_loss=0.4404, attn_decoder_loss=0.3521, over 5793777.63 frames. ], batch size: 210, lr: 4.22e-02, grad_scale: 32.0 +2024-09-16 14:22:08,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=16280.0, ans=0.0 +2024-09-16 14:22:11,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=16320.0, ans=0.125 +2024-09-16 14:22:38,766 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.68 vs. limit=13.635 +2024-09-16 14:22:40,684 INFO [train.py:1198] (0/2) Epoch 1, batch 4100, loss[loss=0.3669, ctc_loss=0.3211, cr_loss=0.4425, attn_decoder_loss=0.3621, over 29491.00 frames. ], tot_loss[loss=0.3588, ctc_loss=0.3321, cr_loss=0.4407, attn_decoder_loss=0.352, over 5789591.17 frames. ], batch size: 90, lr: 4.22e-02, grad_scale: 32.0 +2024-09-16 14:22:51,003 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.063e+02 1.366e+02 1.525e+02 1.800e+02 4.946e+02, threshold=3.051e+02, percent-clipped=3.0 +2024-09-16 14:23:03,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.46 vs. limit=13.665 +2024-09-16 14:23:15,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=16480.0, ans=0.32320000000000004 +2024-09-16 14:23:21,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=16480.0, ans=0.007286956521739131 +2024-09-16 14:23:54,727 INFO [train.py:1198] (0/2) Epoch 1, batch 4150, loss[loss=0.351, ctc_loss=0.3155, cr_loss=0.43, attn_decoder_loss=0.3454, over 29502.00 frames. ], tot_loss[loss=0.3578, ctc_loss=0.3302, cr_loss=0.4402, attn_decoder_loss=0.3511, over 5796288.80 frames. ], batch size: 77, lr: 4.21e-02, grad_scale: 32.0 +2024-09-16 14:24:03,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten.whitening_limit, batch_count=16600.0, ans=13.725 +2024-09-16 14:24:16,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.90 vs. limit=13.74 +2024-09-16 14:24:31,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=16680.0, ans=0.125 +2024-09-16 14:24:40,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=16720.0, ans=0.125 +2024-09-16 14:24:42,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=16720.0, ans=0.1328 +2024-09-16 14:25:09,099 INFO [train.py:1198] (0/2) Epoch 1, batch 4200, loss[loss=0.3854, ctc_loss=0.3617, cr_loss=0.4513, attn_decoder_loss=0.378, over 29503.00 frames. ], tot_loss[loss=0.358, ctc_loss=0.3298, cr_loss=0.4411, attn_decoder_loss=0.3513, over 5798280.64 frames. ], batch size: 90, lr: 4.20e-02, grad_scale: 32.0 +2024-09-16 14:25:13,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=16800.0, ans=0.31200000000000006 +2024-09-16 14:25:19,663 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.040e+02 1.356e+02 1.563e+02 1.936e+02 3.144e+02, threshold=3.127e+02, percent-clipped=1.0 +2024-09-16 14:25:50,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=16880.0, ans=0.125 +2024-09-16 14:26:16,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=16960.0, ans=0.125 +2024-09-16 14:26:19,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=16960.0, ans=0.0 +2024-09-16 14:26:22,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=16960.0, ans=0.125 +2024-09-16 14:26:22,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=16960.0, ans=0.0 +2024-09-16 14:26:25,325 INFO [train.py:1198] (0/2) Epoch 1, batch 4250, loss[loss=0.3337, ctc_loss=0.2957, cr_loss=0.4459, attn_decoder_loss=0.328, over 29519.00 frames. ], tot_loss[loss=0.3571, ctc_loss=0.3279, cr_loss=0.4408, attn_decoder_loss=0.3505, over 5803651.38 frames. ], batch size: 74, lr: 4.20e-02, grad_scale: 32.0 +2024-09-16 14:26:30,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.13 vs. limit=10.8 +2024-09-16 14:26:34,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=17000.0, ans=0.0 +2024-09-16 14:26:37,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.71 vs. limit=10.8 +2024-09-16 14:27:10,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=17120.0, ans=0.0 +2024-09-16 14:27:23,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=17160.0, ans=0.025 +2024-09-16 14:27:39,364 INFO [train.py:1198] (0/2) Epoch 1, batch 4300, loss[loss=0.3627, ctc_loss=0.3337, cr_loss=0.4299, attn_decoder_loss=0.3564, over 29512.00 frames. ], tot_loss[loss=0.357, ctc_loss=0.3277, cr_loss=0.4417, attn_decoder_loss=0.3505, over 5791717.70 frames. ], batch size: 87, lr: 4.19e-02, grad_scale: 32.0 +2024-09-16 14:27:49,862 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.364e+02 1.537e+02 1.919e+02 5.209e+02, threshold=3.074e+02, percent-clipped=5.0 +2024-09-16 14:28:37,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=17360.0, ans=0.2924 +2024-09-16 14:28:39,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=17360.0, ans=0.125 +2024-09-16 14:28:53,573 INFO [train.py:1198] (0/2) Epoch 1, batch 4350, loss[loss=0.3797, ctc_loss=0.3505, cr_loss=0.4916, attn_decoder_loss=0.372, over 29506.00 frames. ], tot_loss[loss=0.3608, ctc_loss=0.3311, cr_loss=0.4457, attn_decoder_loss=0.3542, over 5796068.22 frames. ], batch size: 97, lr: 4.19e-02, grad_scale: 32.0 +2024-09-16 14:29:01,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=17400.0, ans=0.07 +2024-09-16 14:29:10,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=17440.0, ans=0.125 +2024-09-16 14:29:15,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=17440.0, ans=0.125 +2024-09-16 14:29:35,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=17480.0, ans=0.09899494936611666 +2024-09-16 14:29:44,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=3.32 vs. limit=14.07 +2024-09-16 14:29:50,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=17520.0, ans=0.28680000000000005 +2024-09-16 14:30:05,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.19 vs. limit=13.78 +2024-09-16 14:30:09,476 INFO [train.py:1198] (0/2) Epoch 1, batch 4400, loss[loss=0.3567, ctc_loss=0.3341, cr_loss=0.4373, attn_decoder_loss=0.3495, over 27614.00 frames. ], tot_loss[loss=0.3635, ctc_loss=0.3343, cr_loss=0.4481, attn_decoder_loss=0.3568, over 5768013.25 frames. ], batch size: 124, lr: 4.18e-02, grad_scale: 32.0 +2024-09-16 14:30:11,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=17600.0, ans=0.0 +2024-09-16 14:30:19,703 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.121e+02 1.315e+02 1.467e+02 1.766e+02 6.671e+02, threshold=2.933e+02, percent-clipped=1.0 +2024-09-16 14:30:37,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=17680.0, ans=0.125 +2024-09-16 14:31:02,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=17720.0, ans=0.27980000000000005 +2024-09-16 14:31:06,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.83 vs. limit=20.79 +2024-09-16 14:31:06,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.03 vs. limit=14.145 +2024-09-16 14:31:10,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.58 vs. limit=20.82 +2024-09-16 14:31:13,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=17760.0, ans=0.125 +2024-09-16 14:31:16,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=17760.0, ans=0.125 +2024-09-16 14:31:24,221 INFO [train.py:1198] (0/2) Epoch 1, batch 4450, loss[loss=0.3941, ctc_loss=0.393, cr_loss=0.4677, attn_decoder_loss=0.3838, over 20904.00 frames. ], tot_loss[loss=0.3675, ctc_loss=0.3411, cr_loss=0.4489, attn_decoder_loss=0.3605, over 5581140.11 frames. ], batch size: 209, lr: 4.17e-02, grad_scale: 32.0 +2024-09-16 14:31:29,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.44 vs. limit=20.85 +2024-09-16 14:31:35,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=17800.0, ans=0.12200000000000003 +2024-09-16 14:32:34,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.81 vs. limit=14.235 +2024-09-16 14:32:40,074 INFO [train.py:1198] (0/2) Epoch 1, batch 4500, loss[loss=0.3911, ctc_loss=0.3933, cr_loss=0.4535, attn_decoder_loss=0.3808, over 20691.00 frames. ], tot_loss[loss=0.3723, ctc_loss=0.351, cr_loss=0.4476, attn_decoder_loss=0.3647, over 5237228.89 frames. ], batch size: 209, lr: 4.17e-02, grad_scale: 32.0 +2024-09-16 14:32:40,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=18000.0, ans=0.12000000000000002 +2024-09-16 14:32:50,391 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.043e+02 1.290e+02 1.458e+02 1.671e+02 6.229e+02, threshold=2.915e+02, percent-clipped=1.0 +2024-09-16 14:33:16,834 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-1.pt +2024-09-16 14:34:13,792 INFO [train.py:1198] (0/2) Epoch 2, batch 0, loss[loss=0.4849, ctc_loss=0.3034, cr_loss=0.4392, attn_decoder_loss=0.4953, over 29614.00 frames. ], tot_loss[loss=0.4849, ctc_loss=0.3034, cr_loss=0.4392, attn_decoder_loss=0.4953, over 29614.00 frames. ], batch size: 73, lr: 4.08e-02, grad_scale: 32.0 +2024-09-16 14:34:13,793 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 14:34:17,241 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([3.1274, 2.0603, 2.5104, 2.5775, 2.4812, 1.3811, 1.9125, 2.1626], + device='cuda:0') +2024-09-16 14:34:32,032 INFO [train.py:1230] (0/2) Epoch 2, validation: loss=0.3071, ctc_loss=0.1367, cr_loss=4.721e-15, attn_decoder_loss=0.326, over 944034.00 frames. +2024-09-16 14:34:32,033 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 14:35:01,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=18180.0, ans=0.0 +2024-09-16 14:35:04,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=18180.0, ans=0.26370000000000005 +2024-09-16 14:35:14,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=18180.0, ans=0.125 +2024-09-16 14:35:35,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.19 vs. limit=21.195 +2024-09-16 14:35:48,032 INFO [train.py:1198] (0/2) Epoch 2, batch 50, loss[loss=0.322, ctc_loss=0.2946, cr_loss=0.3803, attn_decoder_loss=0.3166, over 29399.00 frames. ], tot_loss[loss=0.3769, ctc_loss=0.341, cr_loss=0.4442, attn_decoder_loss=0.3711, over 1266739.56 frames. ], batch size: 70, lr: 4.08e-02, grad_scale: 16.0 +2024-09-16 14:35:48,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=18300.0, ans=0.0068913043478260865 +2024-09-16 14:36:18,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=18380.0, ans=0.125 +2024-09-16 14:36:20,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=18380.0, ans=0.006873913043478261 +2024-09-16 14:36:25,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=18380.0, ans=0.025 +2024-09-16 14:36:42,198 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.396e+02 1.768e+02 2.293e+02 2.873e+03, threshold=3.536e+02, percent-clipped=13.0 +2024-09-16 14:36:42,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=18420.0, ans=0.0 +2024-09-16 14:37:06,518 INFO [train.py:1198] (0/2) Epoch 2, batch 100, loss[loss=0.3565, ctc_loss=0.3233, cr_loss=0.4547, attn_decoder_loss=0.3501, over 29558.00 frames. ], tot_loss[loss=0.3684, ctc_loss=0.3355, cr_loss=0.4447, attn_decoder_loss=0.3622, over 2250999.15 frames. ], batch size: 76, lr: 4.07e-02, grad_scale: 16.0 +2024-09-16 14:37:09,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=18500.0, ans=0.0 +2024-09-16 14:37:44,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.54 vs. limit=14.467500000000001 +2024-09-16 14:37:57,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.34 vs. limit=21.465 +2024-09-16 14:38:24,316 INFO [train.py:1198] (0/2) Epoch 2, batch 150, loss[loss=0.3277, ctc_loss=0.3017, cr_loss=0.4266, attn_decoder_loss=0.3211, over 29411.00 frames. ], tot_loss[loss=0.3599, ctc_loss=0.3269, cr_loss=0.4412, attn_decoder_loss=0.3537, over 3044961.00 frames. ], batch size: 70, lr: 4.06e-02, grad_scale: 16.0 +2024-09-16 14:38:25,164 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.79 vs. limit=14.5125 +2024-09-16 14:39:11,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=18820.0, ans=0.125 +2024-09-16 14:39:15,767 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.312e+02 1.456e+02 1.615e+02 4.569e+02, threshold=2.911e+02, percent-clipped=2.0 +2024-09-16 14:39:28,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=18860.0, ans=0.125 +2024-09-16 14:39:40,021 INFO [train.py:1198] (0/2) Epoch 2, batch 200, loss[loss=0.3698, ctc_loss=0.3424, cr_loss=0.4446, attn_decoder_loss=0.363, over 27174.00 frames. ], tot_loss[loss=0.3552, ctc_loss=0.3212, cr_loss=0.4392, attn_decoder_loss=0.3492, over 3656558.12 frames. ], batch size: 124, lr: 4.06e-02, grad_scale: 16.0 +2024-09-16 14:40:04,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=18940.0, ans=0.125 +2024-09-16 14:40:32,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=19020.0, ans=0.025 +2024-09-16 14:40:40,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=19020.0, ans=0.125 +2024-09-16 14:40:58,052 INFO [train.py:1198] (0/2) Epoch 2, batch 250, loss[loss=0.3889, ctc_loss=0.3663, cr_loss=0.4662, attn_decoder_loss=0.3811, over 29203.00 frames. ], tot_loss[loss=0.3531, ctc_loss=0.3186, cr_loss=0.4396, attn_decoder_loss=0.3471, over 4138741.90 frames. ], batch size: 100, lr: 4.05e-02, grad_scale: 16.0 +2024-09-16 14:41:14,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.85 vs. limit=21.855 +2024-09-16 14:41:21,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=19140.0, ans=0.125 +2024-09-16 14:41:22,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=19140.0, ans=0.125 +2024-09-16 14:41:23,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=19140.0, ans=0.07 +2024-09-16 14:41:36,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=19180.0, ans=0.125 +2024-09-16 14:41:42,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=19220.0, ans=0.125 +2024-09-16 14:41:48,974 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:41:50,137 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.071e+02 1.356e+02 1.504e+02 1.757e+02 3.092e+02, threshold=3.008e+02, percent-clipped=1.0 +2024-09-16 14:41:50,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=19220.0, ans=0.125 +2024-09-16 14:42:07,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=19260.0, ans=0.09899494936611666 +2024-09-16 14:42:16,768 INFO [train.py:1198] (0/2) Epoch 2, batch 300, loss[loss=0.3764, ctc_loss=0.331, cr_loss=0.4424, attn_decoder_loss=0.3716, over 29564.00 frames. ], tot_loss[loss=0.3515, ctc_loss=0.3164, cr_loss=0.4393, attn_decoder_loss=0.3457, over 4509217.47 frames. ], batch size: 92, lr: 4.05e-02, grad_scale: 16.0 +2024-09-16 14:42:18,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=19300.0, ans=0.05 +2024-09-16 14:42:20,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=19300.0, ans=0.10700000000000001 +2024-09-16 14:42:27,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=19300.0, ans=0.0 +2024-09-16 14:42:35,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=19340.0, ans=0.09899494936611666 +2024-09-16 14:42:43,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=19340.0, ans=0.125 +2024-09-16 14:42:53,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=19380.0, ans=0.0 +2024-09-16 14:42:54,361 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.43 vs. limit=11.751999999999999 +2024-09-16 14:43:10,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=11.768 +2024-09-16 14:43:28,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.92 vs. limit=9.865 +2024-09-16 14:43:33,266 INFO [train.py:1198] (0/2) Epoch 2, batch 350, loss[loss=0.327, ctc_loss=0.2912, cr_loss=0.4193, attn_decoder_loss=0.3217, over 29734.00 frames. ], tot_loss[loss=0.3514, ctc_loss=0.3158, cr_loss=0.4405, attn_decoder_loss=0.3456, over 4795080.78 frames. ], batch size: 72, lr: 4.04e-02, grad_scale: 16.0 +2024-09-16 14:43:42,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=19500.0, ans=0.21750000000000003 +2024-09-16 14:43:49,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.69 vs. limit=14.8275 +2024-09-16 14:43:51,776 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:44:22,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=19620.0, ans=0.125 +2024-09-16 14:44:26,668 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.410e+02 1.578e+02 1.828e+02 5.190e+02, threshold=3.157e+02, percent-clipped=4.0 +2024-09-16 14:44:31,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=19620.0, ans=0.1038 +2024-09-16 14:44:31,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=19620.0, ans=0.0 +2024-09-16 14:44:51,067 INFO [train.py:1198] (0/2) Epoch 2, batch 400, loss[loss=0.3505, ctc_loss=0.3163, cr_loss=0.4393, attn_decoder_loss=0.3445, over 29714.00 frames. ], tot_loss[loss=0.35, ctc_loss=0.314, cr_loss=0.4396, attn_decoder_loss=0.3442, over 5025132.01 frames. ], batch size: 82, lr: 4.03e-02, grad_scale: 32.0 +2024-09-16 14:44:54,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=19700.0, ans=0.04949747468305833 +2024-09-16 14:45:03,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=19700.0, ans=0.025 +2024-09-16 14:45:14,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=19740.0, ans=0.125 +2024-09-16 14:45:28,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=19780.0, ans=0.0 +2024-09-16 14:45:38,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=19820.0, ans=0.125 +2024-09-16 14:45:43,471 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:45:44,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=19820.0, ans=0.0 +2024-09-16 14:46:10,054 INFO [train.py:1198] (0/2) Epoch 2, batch 450, loss[loss=0.3532, ctc_loss=0.3186, cr_loss=0.4573, attn_decoder_loss=0.3469, over 29681.00 frames. ], tot_loss[loss=0.3497, ctc_loss=0.3141, cr_loss=0.4405, attn_decoder_loss=0.3439, over 5185003.38 frames. ], batch size: 83, lr: 4.03e-02, grad_scale: 32.0 +2024-09-16 14:46:12,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=19900.0, ans=9.975 +2024-09-16 14:46:14,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=19900.0, ans=0.101 +2024-09-16 14:46:16,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=7.03 vs. limit=11.96 +2024-09-16 14:46:30,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=19940.0, ans=0.125 +2024-09-16 14:46:43,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=19980.0, ans=0.125 +2024-09-16 14:47:02,432 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.051e+02 1.299e+02 1.486e+02 1.745e+02 5.446e+02, threshold=2.972e+02, percent-clipped=3.0 +2024-09-16 14:47:07,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.58 vs. limit=15.0 +2024-09-16 14:47:11,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=20060.0, ans=0.2 +2024-09-16 14:47:20,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=20060.0, ans=0.1 +2024-09-16 14:47:26,732 INFO [train.py:1198] (0/2) Epoch 2, batch 500, loss[loss=0.3547, ctc_loss=0.3169, cr_loss=0.4446, attn_decoder_loss=0.349, over 29442.00 frames. ], tot_loss[loss=0.3479, ctc_loss=0.312, cr_loss=0.4401, attn_decoder_loss=0.3421, over 5328411.28 frames. ], batch size: 94, lr: 4.02e-02, grad_scale: 32.0 +2024-09-16 14:48:04,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=8.84 vs. limit=15.0 +2024-09-16 14:48:10,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=10.11 vs. limit=10.0 +2024-09-16 14:48:16,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=20220.0, ans=0.1 +2024-09-16 14:48:19,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_ff3.min_abs, batch_count=20220.0, ans=0.2 +2024-09-16 14:48:21,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=20220.0, ans=0.125 +2024-09-16 14:48:25,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=20220.0, ans=0.0 +2024-09-16 14:48:28,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=20260.0, ans=0.125 +2024-09-16 14:48:44,958 INFO [train.py:1198] (0/2) Epoch 2, batch 550, loss[loss=0.3529, ctc_loss=0.3154, cr_loss=0.4583, attn_decoder_loss=0.3469, over 28785.00 frames. ], tot_loss[loss=0.3478, ctc_loss=0.3119, cr_loss=0.4407, attn_decoder_loss=0.342, over 5421229.98 frames. ], batch size: 104, lr: 4.02e-02, grad_scale: 16.0 +2024-09-16 14:49:03,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=20340.0, ans=0.2 +2024-09-16 14:49:06,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=20340.0, ans=0.125 +2024-09-16 14:49:14,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=20380.0, ans=0.0 +2024-09-16 14:49:15,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=20380.0, ans=0.2 +2024-09-16 14:49:22,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.86 vs. limit=22.5 +2024-09-16 14:49:26,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=20380.0, ans=0.0 +2024-09-16 14:49:38,284 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.068e+02 1.358e+02 1.600e+02 1.893e+02 5.686e+02, threshold=3.199e+02, percent-clipped=4.0 +2024-09-16 14:49:41,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=20420.0, ans=0.025 +2024-09-16 14:50:00,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=20460.0, ans=0.125 +2024-09-16 14:50:03,475 INFO [train.py:1198] (0/2) Epoch 2, batch 600, loss[loss=0.3606, ctc_loss=0.3191, cr_loss=0.4449, attn_decoder_loss=0.3554, over 29305.00 frames. ], tot_loss[loss=0.348, ctc_loss=0.3116, cr_loss=0.4408, attn_decoder_loss=0.3422, over 5508901.29 frames. ], batch size: 100, lr: 4.01e-02, grad_scale: 16.0 +2024-09-16 14:50:24,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=20540.0, ans=0.125 +2024-09-16 14:50:36,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=20580.0, ans=0.125 +2024-09-16 14:50:43,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.14 vs. limit=15.0 +2024-09-16 14:51:19,278 INFO [train.py:1198] (0/2) Epoch 2, batch 650, loss[loss=0.3291, ctc_loss=0.2826, cr_loss=0.4598, attn_decoder_loss=0.324, over 29780.00 frames. ], tot_loss[loss=0.3461, ctc_loss=0.3094, cr_loss=0.4395, attn_decoder_loss=0.3404, over 5585981.94 frames. ], batch size: 81, lr: 4.00e-02, grad_scale: 16.0 +2024-09-16 14:51:19,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=20700.0, ans=0.04949747468305833 +2024-09-16 14:51:39,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=20740.0, ans=0.2 +2024-09-16 14:51:59,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=20780.0, ans=0.125 +2024-09-16 14:52:04,390 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:52:07,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=20820.0, ans=0.125 +2024-09-16 14:52:11,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.10 vs. limit=15.0 +2024-09-16 14:52:15,201 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.068e+02 1.306e+02 1.501e+02 1.738e+02 3.373e+02, threshold=3.002e+02, percent-clipped=2.0 +2024-09-16 14:52:21,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=20860.0, ans=0.0 +2024-09-16 14:52:38,095 INFO [train.py:1198] (0/2) Epoch 2, batch 700, loss[loss=0.3355, ctc_loss=0.295, cr_loss=0.4435, attn_decoder_loss=0.3301, over 29532.00 frames. ], tot_loss[loss=0.3463, ctc_loss=0.3093, cr_loss=0.4402, attn_decoder_loss=0.3407, over 5637971.83 frames. ], batch size: 76, lr: 4.00e-02, grad_scale: 16.0 +2024-09-16 14:52:40,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.60 vs. limit=15.0 +2024-09-16 14:52:43,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=20900.0, ans=0.125 +2024-09-16 14:52:51,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=20940.0, ans=0.2 +2024-09-16 14:53:05,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=15.02 vs. limit=15.0 +2024-09-16 14:53:07,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=20980.0, ans=0.2 +2024-09-16 14:53:08,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=20980.0, ans=0.2 +2024-09-16 14:53:26,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=21020.0, ans=0.0 +2024-09-16 14:53:26,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=21020.0, ans=0.125 +2024-09-16 14:53:56,476 INFO [train.py:1198] (0/2) Epoch 2, batch 750, loss[loss=0.3454, ctc_loss=0.2978, cr_loss=0.4379, attn_decoder_loss=0.3409, over 29706.00 frames. ], tot_loss[loss=0.3458, ctc_loss=0.3088, cr_loss=0.4401, attn_decoder_loss=0.3401, over 5676946.78 frames. ], batch size: 82, lr: 3.99e-02, grad_scale: 16.0 +2024-09-16 14:54:23,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=21140.0, ans=0.1 +2024-09-16 14:54:49,446 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.356e+02 1.549e+02 1.774e+02 3.247e+02, threshold=3.098e+02, percent-clipped=2.0 +2024-09-16 14:55:08,616 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.70 vs. limit=6.0 +2024-09-16 14:55:12,220 INFO [train.py:1198] (0/2) Epoch 2, batch 800, loss[loss=0.2906, ctc_loss=0.2387, cr_loss=0.3861, attn_decoder_loss=0.2878, over 29611.00 frames. ], tot_loss[loss=0.3453, ctc_loss=0.3079, cr_loss=0.4395, attn_decoder_loss=0.3397, over 5707519.61 frames. ], batch size: 73, lr: 3.98e-02, grad_scale: 32.0 +2024-09-16 14:55:27,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.79 vs. limit=8.0 +2024-09-16 14:55:31,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=21340.0, ans=0.006230434782608696 +2024-09-16 14:56:21,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.79 vs. limit=15.0 +2024-09-16 14:56:28,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=21500.0, ans=0.2 +2024-09-16 14:56:30,085 INFO [train.py:1198] (0/2) Epoch 2, batch 850, loss[loss=0.3515, ctc_loss=0.314, cr_loss=0.4679, attn_decoder_loss=0.3453, over 29721.00 frames. ], tot_loss[loss=0.3443, ctc_loss=0.3062, cr_loss=0.4394, attn_decoder_loss=0.3387, over 5736994.44 frames. ], batch size: 89, lr: 3.98e-02, grad_scale: 16.0 +2024-09-16 14:56:53,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=21540.0, ans=0.1 +2024-09-16 14:57:13,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=21580.0, ans=0.125 +2024-09-16 14:57:25,356 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.083e+02 1.316e+02 1.489e+02 1.639e+02 3.105e+02, threshold=2.978e+02, percent-clipped=1.0 +2024-09-16 14:57:46,625 INFO [train.py:1198] (0/2) Epoch 2, batch 900, loss[loss=0.3113, ctc_loss=0.2664, cr_loss=0.3956, attn_decoder_loss=0.3075, over 29571.00 frames. ], tot_loss[loss=0.3443, ctc_loss=0.3062, cr_loss=0.4401, attn_decoder_loss=0.3388, over 5741541.30 frames. ], batch size: 73, lr: 3.97e-02, grad_scale: 16.0 +2024-09-16 14:57:49,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=21700.0, ans=0.0 +2024-09-16 14:58:00,297 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.96 vs. limit=22.5 +2024-09-16 14:58:01,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=21700.0, ans=0.006152173913043478 +2024-09-16 14:58:26,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=12.0 +2024-09-16 14:58:35,397 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-16 14:58:37,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=21820.0, ans=0.1 +2024-09-16 14:58:43,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=21820.0, ans=0.04949747468305833 +2024-09-16 14:58:46,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=21820.0, ans=0.2 +2024-09-16 14:58:57,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=21860.0, ans=0.1 +2024-09-16 14:59:01,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=21860.0, ans=0.006117391304347826 +2024-09-16 14:59:04,589 INFO [train.py:1198] (0/2) Epoch 2, batch 950, loss[loss=0.3196, ctc_loss=0.272, cr_loss=0.4017, attn_decoder_loss=0.316, over 29493.00 frames. ], tot_loss[loss=0.3443, ctc_loss=0.3062, cr_loss=0.4402, attn_decoder_loss=0.3387, over 5744361.87 frames. ], batch size: 74, lr: 3.97e-02, grad_scale: 16.0 +2024-09-16 14:59:06,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=21900.0, ans=0.2 +2024-09-16 14:59:11,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.03 vs. limit=15.0 +2024-09-16 14:59:29,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=21940.0, ans=0.125 +2024-09-16 14:59:32,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=21940.0, ans=0.0060999999999999995 +2024-09-16 14:59:43,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=21980.0, ans=0.1 +2024-09-16 14:59:52,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=22020.0, ans=0.2 +2024-09-16 14:59:52,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=22020.0, ans=0.07 +2024-09-16 15:00:01,565 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.124e+02 1.375e+02 1.582e+02 1.931e+02 4.850e+02, threshold=3.164e+02, percent-clipped=3.0 +2024-09-16 15:00:11,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=22060.0, ans=0.05 +2024-09-16 15:00:19,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=22060.0, ans=0.125 +2024-09-16 15:00:19,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=22060.0, ans=0.2 +2024-09-16 15:00:21,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=22100.0, ans=0.125 +2024-09-16 15:00:22,483 INFO [train.py:1198] (0/2) Epoch 2, batch 1000, loss[loss=0.3296, ctc_loss=0.2825, cr_loss=0.4437, attn_decoder_loss=0.325, over 29500.00 frames. ], tot_loss[loss=0.3456, ctc_loss=0.3077, cr_loss=0.4416, attn_decoder_loss=0.34, over 5736735.60 frames. ], batch size: 77, lr: 3.96e-02, grad_scale: 16.0 +2024-09-16 15:00:33,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=22100.0, ans=0.0 +2024-09-16 15:00:35,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=22100.0, ans=0.125 +2024-09-16 15:00:41,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=22140.0, ans=0.125 +2024-09-16 15:00:48,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=22140.0, ans=0.125 +2024-09-16 15:01:03,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=22180.0, ans=0.006047826086956522 +2024-09-16 15:01:05,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=22180.0, ans=0.125 +2024-09-16 15:01:06,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=22220.0, ans=0.07 +2024-09-16 15:01:09,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=22220.0, ans=0.125 +2024-09-16 15:01:11,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=22220.0, ans=0.1 +2024-09-16 15:01:15,164 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.41 vs. limit=10.0 +2024-09-16 15:01:24,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=22260.0, ans=0.125 +2024-09-16 15:01:26,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=22260.0, ans=0.125 +2024-09-16 15:01:28,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=22260.0, ans=0.07 +2024-09-16 15:01:34,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=22260.0, ans=0.125 +2024-09-16 15:01:34,780 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-16 15:01:38,480 INFO [train.py:1198] (0/2) Epoch 2, batch 1050, loss[loss=0.3376, ctc_loss=0.2988, cr_loss=0.4426, attn_decoder_loss=0.332, over 29686.00 frames. ], tot_loss[loss=0.3438, ctc_loss=0.306, cr_loss=0.4399, attn_decoder_loss=0.3383, over 5744760.89 frames. ], batch size: 85, lr: 3.95e-02, grad_scale: 16.0 +2024-09-16 15:02:10,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=22380.0, ans=0.1 +2024-09-16 15:02:14,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=22380.0, ans=0.125 +2024-09-16 15:02:35,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=22420.0, ans=0.125 +2024-09-16 15:02:36,118 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.353e+02 1.564e+02 1.813e+02 2.890e+02, threshold=3.129e+02, percent-clipped=0.0 +2024-09-16 15:02:36,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=22420.0, ans=0.025 +2024-09-16 15:02:36,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=22420.0, ans=0.125 +2024-09-16 15:02:57,505 INFO [train.py:1198] (0/2) Epoch 2, batch 1100, loss[loss=0.3258, ctc_loss=0.2786, cr_loss=0.4294, attn_decoder_loss=0.3215, over 29426.00 frames. ], tot_loss[loss=0.343, ctc_loss=0.3046, cr_loss=0.4404, attn_decoder_loss=0.3375, over 5756796.82 frames. ], batch size: 78, lr: 3.95e-02, grad_scale: 16.0 +2024-09-16 15:02:59,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=22500.0, ans=0.1 +2024-09-16 15:02:59,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.89 vs. limit=6.0 +2024-09-16 15:03:00,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=22500.0, ans=0.125 +2024-09-16 15:03:12,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.98 vs. limit=6.0 +2024-09-16 15:03:22,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=22540.0, ans=0.005969565217391304 +2024-09-16 15:04:06,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=22660.0, ans=0.1 +2024-09-16 15:04:10,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.75 vs. limit=10.0 +2024-09-16 15:04:15,691 INFO [train.py:1198] (0/2) Epoch 2, batch 1150, loss[loss=0.3363, ctc_loss=0.2976, cr_loss=0.416, attn_decoder_loss=0.3313, over 29460.00 frames. ], tot_loss[loss=0.343, ctc_loss=0.3045, cr_loss=0.4401, attn_decoder_loss=0.3375, over 5755685.04 frames. ], batch size: 78, lr: 3.94e-02, grad_scale: 16.0 +2024-09-16 15:04:33,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.84 vs. limit=22.5 +2024-09-16 15:04:42,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=8.08 vs. limit=15.0 +2024-09-16 15:04:44,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=22780.0, ans=0.1 +2024-09-16 15:04:51,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=22780.0, ans=0.125 +2024-09-16 15:04:54,994 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=8.98 vs. limit=15.0 +2024-09-16 15:05:08,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.27 vs. limit=15.0 +2024-09-16 15:05:10,469 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.047e+02 1.307e+02 1.503e+02 1.816e+02 4.036e+02, threshold=3.005e+02, percent-clipped=3.0 +2024-09-16 15:05:19,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=22860.0, ans=0.125 +2024-09-16 15:05:21,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.97 vs. limit=22.5 +2024-09-16 15:05:31,663 INFO [train.py:1198] (0/2) Epoch 2, batch 1200, loss[loss=0.3554, ctc_loss=0.313, cr_loss=0.4413, attn_decoder_loss=0.3503, over 29671.00 frames. ], tot_loss[loss=0.3442, ctc_loss=0.3058, cr_loss=0.4419, attn_decoder_loss=0.3387, over 5748685.00 frames. ], batch size: 85, lr: 3.93e-02, grad_scale: 32.0 +2024-09-16 15:05:34,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=22900.0, ans=0.125 +2024-09-16 15:06:01,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_na.min_abs, batch_count=22940.0, ans=0.02 +2024-09-16 15:06:09,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.45 vs. limit=22.5 +2024-09-16 15:06:29,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.86 vs. limit=15.0 +2024-09-16 15:06:50,272 INFO [train.py:1198] (0/2) Epoch 2, batch 1250, loss[loss=0.3632, ctc_loss=0.3214, cr_loss=0.4753, attn_decoder_loss=0.3573, over 29530.00 frames. ], tot_loss[loss=0.3442, ctc_loss=0.3054, cr_loss=0.4429, attn_decoder_loss=0.3386, over 5776466.00 frames. ], batch size: 92, lr: 3.93e-02, grad_scale: 16.0 +2024-09-16 15:07:49,030 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.075e+02 1.374e+02 1.508e+02 1.823e+02 4.800e+02, threshold=3.017e+02, percent-clipped=3.0 +2024-09-16 15:07:56,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.06 vs. limit=15.0 +2024-09-16 15:08:08,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=25.18 vs. limit=22.5 +2024-09-16 15:08:08,747 INFO [train.py:1198] (0/2) Epoch 2, batch 1300, loss[loss=0.3494, ctc_loss=0.3051, cr_loss=0.4641, attn_decoder_loss=0.344, over 28631.00 frames. ], tot_loss[loss=0.3426, ctc_loss=0.3036, cr_loss=0.4415, attn_decoder_loss=0.3371, over 5781427.55 frames. ], batch size: 112, lr: 3.92e-02, grad_scale: 16.0 +2024-09-16 15:08:15,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.33 vs. limit=15.0 +2024-09-16 15:08:18,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=23300.0, ans=0.125 +2024-09-16 15:08:21,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=23300.0, ans=0.125 +2024-09-16 15:08:41,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=23380.0, ans=0.05 +2024-09-16 15:08:42,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=23380.0, ans=0.125 +2024-09-16 15:08:44,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-16 15:08:48,722 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=6.965e-02 +2024-09-16 15:09:05,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=23420.0, ans=0.025 +2024-09-16 15:09:07,720 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-16 15:09:14,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=23460.0, ans=0.0057695652173913045 +2024-09-16 15:09:25,051 INFO [train.py:1198] (0/2) Epoch 2, batch 1350, loss[loss=0.3504, ctc_loss=0.3221, cr_loss=0.4508, attn_decoder_loss=0.3436, over 29786.00 frames. ], tot_loss[loss=0.3414, ctc_loss=0.3018, cr_loss=0.4405, attn_decoder_loss=0.3361, over 5797558.38 frames. ], batch size: 81, lr: 3.91e-02, grad_scale: 16.0 +2024-09-16 15:10:05,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.97 vs. limit=10.0 +2024-09-16 15:10:06,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=23580.0, ans=0.125 +2024-09-16 15:10:11,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.43 vs. limit=15.0 +2024-09-16 15:10:23,027 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.062e+02 1.283e+02 1.428e+02 1.705e+02 2.892e+02, threshold=2.856e+02, percent-clipped=0.0 +2024-09-16 15:10:39,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=23660.0, ans=0.125 +2024-09-16 15:10:42,652 INFO [train.py:1198] (0/2) Epoch 2, batch 1400, loss[loss=0.2837, ctc_loss=0.2332, cr_loss=0.3523, attn_decoder_loss=0.2815, over 29577.00 frames. ], tot_loss[loss=0.3411, ctc_loss=0.3013, cr_loss=0.4398, attn_decoder_loss=0.3358, over 5807985.58 frames. ], batch size: 69, lr: 3.91e-02, grad_scale: 16.0 +2024-09-16 15:10:44,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=23700.0, ans=0.125 +2024-09-16 15:10:52,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=23700.0, ans=0.2 +2024-09-16 15:11:32,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.43 vs. limit=22.5 +2024-09-16 15:11:48,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.23 vs. limit=22.5 +2024-09-16 15:11:51,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=23860.0, ans=0.125 +2024-09-16 15:11:54,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=23860.0, ans=0.2 +2024-09-16 15:11:55,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.64 vs. limit=22.5 +2024-09-16 15:11:59,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=23900.0, ans=0.04949747468305833 +2024-09-16 15:12:00,325 INFO [train.py:1198] (0/2) Epoch 2, batch 1450, loss[loss=0.3675, ctc_loss=0.3388, cr_loss=0.4611, attn_decoder_loss=0.3605, over 29451.00 frames. ], tot_loss[loss=0.3423, ctc_loss=0.3024, cr_loss=0.4405, attn_decoder_loss=0.3369, over 5805522.61 frames. ], batch size: 94, lr: 3.90e-02, grad_scale: 16.0 +2024-09-16 15:12:11,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=23900.0, ans=0.125 +2024-09-16 15:12:15,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=23940.0, ans=0.2 +2024-09-16 15:12:16,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.44 vs. limit=6.0 +2024-09-16 15:12:17,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=23940.0, ans=0.125 +2024-09-16 15:12:23,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.40 vs. limit=10.0 +2024-09-16 15:12:37,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=23980.0, ans=0.2 +2024-09-16 15:12:44,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=24020.0, ans=0.125 +2024-09-16 15:12:56,334 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.348e+02 1.492e+02 1.698e+02 3.722e+02, threshold=2.983e+02, percent-clipped=2.0 +2024-09-16 15:12:58,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=24020.0, ans=0.0 +2024-09-16 15:13:16,027 INFO [train.py:1198] (0/2) Epoch 2, batch 1500, loss[loss=0.343, ctc_loss=0.3014, cr_loss=0.4264, attn_decoder_loss=0.3381, over 29618.00 frames. ], tot_loss[loss=0.342, ctc_loss=0.3018, cr_loss=0.441, attn_decoder_loss=0.3367, over 5806731.72 frames. ], batch size: 86, lr: 3.90e-02, grad_scale: 16.0 +2024-09-16 15:13:22,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=24100.0, ans=0.2 +2024-09-16 15:13:25,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=24100.0, ans=0.07 +2024-09-16 15:13:31,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=24140.0, ans=0.025 +2024-09-16 15:13:37,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=24140.0, ans=0.0 +2024-09-16 15:13:38,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=24140.0, ans=0.125 +2024-09-16 15:13:54,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=24180.0, ans=10.0 +2024-09-16 15:14:02,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.62 vs. limit=15.0 +2024-09-16 15:14:05,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.22 vs. limit=22.5 +2024-09-16 15:14:34,957 INFO [train.py:1198] (0/2) Epoch 2, batch 1550, loss[loss=0.3563, ctc_loss=0.3137, cr_loss=0.4844, attn_decoder_loss=0.3502, over 29534.00 frames. ], tot_loss[loss=0.3422, ctc_loss=0.3023, cr_loss=0.442, attn_decoder_loss=0.3368, over 5782564.18 frames. ], batch size: 90, lr: 3.89e-02, grad_scale: 8.0 +2024-09-16 15:14:42,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=24300.0, ans=0.2 +2024-09-16 15:14:44,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=24300.0, ans=0.0 +2024-09-16 15:14:44,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=24300.0, ans=0.125 +2024-09-16 15:14:54,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=24340.0, ans=0.05 +2024-09-16 15:14:54,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=24340.0, ans=0.125 +2024-09-16 15:14:55,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=24340.0, ans=0.125 +2024-09-16 15:15:05,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=24380.0, ans=0.125 +2024-09-16 15:15:34,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.379e+02 1.577e+02 1.948e+02 4.764e+02, threshold=3.154e+02, percent-clipped=9.0 +2024-09-16 15:15:37,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=24460.0, ans=0.1 +2024-09-16 15:15:51,910 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.48 vs. limit=22.5 +2024-09-16 15:15:52,707 INFO [train.py:1198] (0/2) Epoch 2, batch 1600, loss[loss=0.3432, ctc_loss=0.2902, cr_loss=0.4677, attn_decoder_loss=0.3387, over 29673.00 frames. ], tot_loss[loss=0.3418, ctc_loss=0.3022, cr_loss=0.4422, attn_decoder_loss=0.3364, over 5763640.70 frames. ], batch size: 85, lr: 3.88e-02, grad_scale: 16.0 +2024-09-16 15:15:53,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=24500.0, ans=0.125 +2024-09-16 15:15:56,806 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-16 15:16:14,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=24540.0, ans=0.1 +2024-09-16 15:16:23,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=24580.0, ans=0.1 +2024-09-16 15:16:39,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.10 vs. limit=15.0 +2024-09-16 15:16:43,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=24620.0, ans=0.0 +2024-09-16 15:16:44,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=24620.0, ans=0.2 +2024-09-16 15:16:48,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.51 vs. limit=15.0 +2024-09-16 15:16:58,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=24660.0, ans=0.125 +2024-09-16 15:16:59,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=24660.0, ans=0.125 +2024-09-16 15:17:08,438 INFO [train.py:1198] (0/2) Epoch 2, batch 1650, loss[loss=0.3654, ctc_loss=0.3257, cr_loss=0.4712, attn_decoder_loss=0.3594, over 29694.00 frames. ], tot_loss[loss=0.3415, ctc_loss=0.3018, cr_loss=0.442, attn_decoder_loss=0.3361, over 5759161.09 frames. ], batch size: 89, lr: 3.88e-02, grad_scale: 16.0 +2024-09-16 15:17:24,484 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:17:25,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.25 vs. limit=15.0 +2024-09-16 15:17:42,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=24780.0, ans=0.125 +2024-09-16 15:17:53,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=24780.0, ans=0.2 +2024-09-16 15:17:53,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=24780.0, ans=0.0 +2024-09-16 15:17:56,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=24820.0, ans=0.0 +2024-09-16 15:18:08,692 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.020e+02 1.312e+02 1.453e+02 1.722e+02 6.388e+02, threshold=2.905e+02, percent-clipped=6.0 +2024-09-16 15:18:19,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=24860.0, ans=0.2 +2024-09-16 15:18:26,756 INFO [train.py:1198] (0/2) Epoch 2, batch 1700, loss[loss=0.301, ctc_loss=0.2543, cr_loss=0.4282, attn_decoder_loss=0.2967, over 29579.00 frames. ], tot_loss[loss=0.3408, ctc_loss=0.3007, cr_loss=0.4422, attn_decoder_loss=0.3355, over 5780811.13 frames. ], batch size: 69, lr: 3.87e-02, grad_scale: 16.0 +2024-09-16 15:18:33,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=24900.0, ans=10.0 +2024-09-16 15:19:05,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=24980.0, ans=0.125 +2024-09-16 15:19:16,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=25020.0, ans=0.0 +2024-09-16 15:19:19,059 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:19:28,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=25060.0, ans=0.125 +2024-09-16 15:19:40,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=25060.0, ans=0.025 +2024-09-16 15:19:44,699 INFO [train.py:1198] (0/2) Epoch 2, batch 1750, loss[loss=0.3122, ctc_loss=0.2731, cr_loss=0.4232, attn_decoder_loss=0.3071, over 29366.00 frames. ], tot_loss[loss=0.3402, ctc_loss=0.2997, cr_loss=0.4418, attn_decoder_loss=0.3348, over 5788857.78 frames. ], batch size: 67, lr: 3.86e-02, grad_scale: 16.0 +2024-09-16 15:19:50,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.36 vs. limit=8.0 +2024-09-16 15:20:10,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=25140.0, ans=0.0 +2024-09-16 15:20:23,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=25180.0, ans=0.2 +2024-09-16 15:20:41,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.48 vs. limit=15.0 +2024-09-16 15:20:42,197 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.124e+02 1.354e+02 1.539e+02 1.820e+02 3.547e+02, threshold=3.078e+02, percent-clipped=3.0 +2024-09-16 15:20:43,161 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.93 vs. limit=6.0 +2024-09-16 15:20:53,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.81 vs. limit=10.0 +2024-09-16 15:20:54,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=25260.0, ans=0.125 +2024-09-16 15:21:00,332 INFO [train.py:1198] (0/2) Epoch 2, batch 1800, loss[loss=0.3593, ctc_loss=0.3199, cr_loss=0.4433, attn_decoder_loss=0.3538, over 29700.00 frames. ], tot_loss[loss=0.3403, ctc_loss=0.2999, cr_loss=0.4413, attn_decoder_loss=0.335, over 5791115.41 frames. ], batch size: 83, lr: 3.86e-02, grad_scale: 16.0 +2024-09-16 15:21:12,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=14.54 vs. limit=15.0 +2024-09-16 15:21:17,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=25340.0, ans=0.07 +2024-09-16 15:21:32,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=25380.0, ans=0.125 +2024-09-16 15:21:33,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=25380.0, ans=0.1 +2024-09-16 15:21:44,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=25380.0, ans=0.2 +2024-09-16 15:21:50,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=25420.0, ans=0.1 +2024-09-16 15:21:50,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=25420.0, ans=0.0 +2024-09-16 15:21:53,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=25420.0, ans=0.0 +2024-09-16 15:21:54,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=25420.0, ans=0.0 +2024-09-16 15:22:00,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=25420.0, ans=0.125 +2024-09-16 15:22:08,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=25460.0, ans=0.125 +2024-09-16 15:22:18,555 INFO [train.py:1198] (0/2) Epoch 2, batch 1850, loss[loss=0.3562, ctc_loss=0.3009, cr_loss=0.4605, attn_decoder_loss=0.3521, over 29614.00 frames. ], tot_loss[loss=0.3398, ctc_loss=0.2991, cr_loss=0.4416, attn_decoder_loss=0.3345, over 5797722.75 frames. ], batch size: 86, lr: 3.85e-02, grad_scale: 16.0 +2024-09-16 15:22:30,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=25500.0, ans=0.125 +2024-09-16 15:22:56,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.38 vs. limit=15.0 +2024-09-16 15:23:19,024 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.341e+02 1.488e+02 1.704e+02 7.229e+02, threshold=2.976e+02, percent-clipped=2.0 +2024-09-16 15:23:37,144 INFO [train.py:1198] (0/2) Epoch 2, batch 1900, loss[loss=0.3371, ctc_loss=0.2896, cr_loss=0.4359, attn_decoder_loss=0.3327, over 29712.00 frames. ], tot_loss[loss=0.3405, ctc_loss=0.2993, cr_loss=0.4435, attn_decoder_loss=0.3353, over 5805749.38 frames. ], batch size: 89, lr: 3.85e-02, grad_scale: 16.0 +2024-09-16 15:23:53,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=25740.0, ans=0.005273913043478261 +2024-09-16 15:23:58,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=25740.0, ans=0.125 +2024-09-16 15:24:00,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=25740.0, ans=0.0 +2024-09-16 15:24:53,305 INFO [train.py:1198] (0/2) Epoch 2, batch 1950, loss[loss=0.3456, ctc_loss=0.3076, cr_loss=0.4719, attn_decoder_loss=0.3393, over 29451.00 frames. ], tot_loss[loss=0.3415, ctc_loss=0.2998, cr_loss=0.445, attn_decoder_loss=0.3362, over 5819842.44 frames. ], batch size: 78, lr: 3.84e-02, grad_scale: 16.0 +2024-09-16 15:25:02,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=25900.0, ans=0.125 +2024-09-16 15:25:11,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=25940.0, ans=0.1 +2024-09-16 15:25:25,300 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.00 vs. limit=15.0 +2024-09-16 15:25:27,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=25980.0, ans=0.04949747468305833 +2024-09-16 15:25:35,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=25980.0, ans=0.125 +2024-09-16 15:25:41,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.67 vs. limit=12.0 +2024-09-16 15:25:50,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=26020.0, ans=0.1 +2024-09-16 15:25:52,896 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.113e+02 1.307e+02 1.485e+02 1.949e+02 3.051e+02, threshold=2.970e+02, percent-clipped=1.0 +2024-09-16 15:25:57,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=26060.0, ans=0.125 +2024-09-16 15:26:10,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=26100.0, ans=0.125 +2024-09-16 15:26:11,402 INFO [train.py:1198] (0/2) Epoch 2, batch 2000, loss[loss=0.2864, ctc_loss=0.2374, cr_loss=0.3883, attn_decoder_loss=0.2832, over 29351.00 frames. ], tot_loss[loss=0.3416, ctc_loss=0.3002, cr_loss=0.4441, attn_decoder_loss=0.3364, over 5797484.79 frames. ], batch size: 67, lr: 3.83e-02, grad_scale: 32.0 +2024-09-16 15:26:40,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.86 vs. limit=15.0 +2024-09-16 15:26:48,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.16 vs. limit=10.0 +2024-09-16 15:26:52,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=26180.0, ans=0.025 +2024-09-16 15:26:54,052 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.60 vs. limit=10.0 +2024-09-16 15:27:11,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=26220.0, ans=0.0 +2024-09-16 15:27:30,018 INFO [train.py:1198] (0/2) Epoch 2, batch 2050, loss[loss=0.3034, ctc_loss=0.2525, cr_loss=0.4133, attn_decoder_loss=0.2999, over 29400.00 frames. ], tot_loss[loss=0.3403, ctc_loss=0.299, cr_loss=0.4437, attn_decoder_loss=0.3351, over 5790124.50 frames. ], batch size: 70, lr: 3.83e-02, grad_scale: 16.0 +2024-09-16 15:27:33,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=26300.0, ans=0.125 +2024-09-16 15:27:38,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.42 vs. limit=15.0 +2024-09-16 15:27:58,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=26380.0, ans=0.125 +2024-09-16 15:28:03,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=26380.0, ans=0.5 +2024-09-16 15:28:28,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=26420.0, ans=0.1 +2024-09-16 15:28:29,486 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.317e+02 1.483e+02 1.822e+02 5.194e+02, threshold=2.965e+02, percent-clipped=3.0 +2024-09-16 15:28:29,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=26460.0, ans=0.005117391304347826 +2024-09-16 15:28:41,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=26460.0, ans=0.95 +2024-09-16 15:28:46,403 INFO [train.py:1198] (0/2) Epoch 2, batch 2100, loss[loss=0.3264, ctc_loss=0.2695, cr_loss=0.417, attn_decoder_loss=0.3234, over 29758.00 frames. ], tot_loss[loss=0.3387, ctc_loss=0.2966, cr_loss=0.4428, attn_decoder_loss=0.3335, over 5801392.47 frames. ], batch size: 81, lr: 3.82e-02, grad_scale: 16.0 +2024-09-16 15:28:51,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=26500.0, ans=0.1 +2024-09-16 15:29:10,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=26540.0, ans=0.125 +2024-09-16 15:29:33,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=26620.0, ans=0.125 +2024-09-16 15:29:33,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.46 vs. limit=15.0 +2024-09-16 15:29:42,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=26620.0, ans=0.025 +2024-09-16 15:29:43,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=26620.0, ans=0.0 +2024-09-16 15:30:05,024 INFO [train.py:1198] (0/2) Epoch 2, batch 2150, loss[loss=0.317, ctc_loss=0.2622, cr_loss=0.4439, attn_decoder_loss=0.3133, over 29440.00 frames. ], tot_loss[loss=0.3373, ctc_loss=0.2947, cr_loss=0.4421, attn_decoder_loss=0.3322, over 5816277.93 frames. ], batch size: 78, lr: 3.81e-02, grad_scale: 16.0 +2024-09-16 15:30:16,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=26700.0, ans=0.125 +2024-09-16 15:30:22,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=26740.0, ans=0.1 +2024-09-16 15:30:45,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=26780.0, ans=0.1 +2024-09-16 15:31:06,845 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.157e+02 1.386e+02 1.602e+02 1.803e+02 8.431e+02, threshold=3.204e+02, percent-clipped=4.0 +2024-09-16 15:31:23,606 INFO [train.py:1198] (0/2) Epoch 2, batch 2200, loss[loss=0.3431, ctc_loss=0.2882, cr_loss=0.4551, attn_decoder_loss=0.339, over 29631.00 frames. ], tot_loss[loss=0.3375, ctc_loss=0.2946, cr_loss=0.4413, attn_decoder_loss=0.3324, over 5813750.49 frames. ], batch size: 86, lr: 3.81e-02, grad_scale: 16.0 +2024-09-16 15:31:45,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=26940.0, ans=0.125 +2024-09-16 15:31:56,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-16 15:31:57,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=26980.0, ans=0.0 +2024-09-16 15:32:05,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=26980.0, ans=0.125 +2024-09-16 15:32:05,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=26980.0, ans=10.0 +2024-09-16 15:32:40,168 INFO [train.py:1198] (0/2) Epoch 2, batch 2250, loss[loss=0.3271, ctc_loss=0.279, cr_loss=0.4323, attn_decoder_loss=0.3228, over 29710.00 frames. ], tot_loss[loss=0.3372, ctc_loss=0.2941, cr_loss=0.4414, attn_decoder_loss=0.3321, over 5812786.53 frames. ], batch size: 82, lr: 3.80e-02, grad_scale: 16.0 +2024-09-16 15:32:45,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.46 vs. limit=10.0 +2024-09-16 15:32:49,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=27100.0, ans=0.0 +2024-09-16 15:32:49,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=27100.0, ans=0.125 +2024-09-16 15:33:03,583 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.82 vs. limit=22.5 +2024-09-16 15:33:11,052 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.82 vs. limit=15.0 +2024-09-16 15:33:25,845 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.11 vs. limit=22.5 +2024-09-16 15:33:31,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=27220.0, ans=0.125 +2024-09-16 15:33:41,482 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.069e+02 1.301e+02 1.512e+02 1.808e+02 4.415e+02, threshold=3.025e+02, percent-clipped=2.0 +2024-09-16 15:33:58,448 INFO [train.py:1198] (0/2) Epoch 2, batch 2300, loss[loss=0.2851, ctc_loss=0.2408, cr_loss=0.3688, attn_decoder_loss=0.2818, over 29318.00 frames. ], tot_loss[loss=0.3359, ctc_loss=0.2933, cr_loss=0.4403, attn_decoder_loss=0.3309, over 5799903.35 frames. ], batch size: 71, lr: 3.79e-02, grad_scale: 16.0 +2024-09-16 15:34:04,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=27300.0, ans=0.95 +2024-09-16 15:34:04,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=27300.0, ans=0.2 +2024-09-16 15:34:09,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=27300.0, ans=0.125 +2024-09-16 15:34:13,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=27340.0, ans=0.2 +2024-09-16 15:34:16,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.01 vs. limit=6.0 +2024-09-16 15:34:34,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=27380.0, ans=0.125 +2024-09-16 15:34:40,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=27380.0, ans=0.004917391304347826 +2024-09-16 15:34:53,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=27420.0, ans=0.2 +2024-09-16 15:35:16,568 INFO [train.py:1198] (0/2) Epoch 2, batch 2350, loss[loss=0.3655, ctc_loss=0.3238, cr_loss=0.4927, attn_decoder_loss=0.3592, over 29692.00 frames. ], tot_loss[loss=0.3364, ctc_loss=0.2936, cr_loss=0.4415, attn_decoder_loss=0.3313, over 5805027.13 frames. ], batch size: 83, lr: 3.79e-02, grad_scale: 16.0 +2024-09-16 15:35:24,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=27500.0, ans=0.07 +2024-09-16 15:35:50,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.58 vs. limit=22.5 +2024-09-16 15:35:54,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=27580.0, ans=0.125 +2024-09-16 15:35:56,375 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=5.259e-03 +2024-09-16 15:36:00,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=27620.0, ans=0.0 +2024-09-16 15:36:11,543 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:36:15,850 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.428e+02 1.608e+02 2.014e+02 4.831e+02, threshold=3.217e+02, percent-clipped=8.0 +2024-09-16 15:36:32,932 INFO [train.py:1198] (0/2) Epoch 2, batch 2400, loss[loss=0.3103, ctc_loss=0.2626, cr_loss=0.4015, attn_decoder_loss=0.3067, over 29540.00 frames. ], tot_loss[loss=0.3367, ctc_loss=0.2941, cr_loss=0.4423, attn_decoder_loss=0.3316, over 5808782.19 frames. ], batch size: 76, lr: 3.78e-02, grad_scale: 32.0 +2024-09-16 15:36:48,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff2.min_abs, batch_count=27740.0, ans=0.1 +2024-09-16 15:36:52,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=11.47 vs. limit=12.0 +2024-09-16 15:36:59,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.39 vs. limit=10.0 +2024-09-16 15:37:31,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.79 vs. limit=15.0 +2024-09-16 15:37:48,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=27860.0, ans=0.1 +2024-09-16 15:37:51,267 INFO [train.py:1198] (0/2) Epoch 2, batch 2450, loss[loss=0.3279, ctc_loss=0.2829, cr_loss=0.4432, attn_decoder_loss=0.3231, over 29718.00 frames. ], tot_loss[loss=0.3379, ctc_loss=0.2952, cr_loss=0.4441, attn_decoder_loss=0.3327, over 5785726.79 frames. ], batch size: 82, lr: 3.78e-02, grad_scale: 16.0 +2024-09-16 15:37:57,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=27900.0, ans=0.2 +2024-09-16 15:38:03,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=27900.0, ans=0.004804347826086957 +2024-09-16 15:38:08,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=27940.0, ans=0.1 +2024-09-16 15:38:08,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=27940.0, ans=0.1 +2024-09-16 15:38:19,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=27940.0, ans=0.125 +2024-09-16 15:38:39,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=28020.0, ans=0.004778260869565217 +2024-09-16 15:38:54,382 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.358e+02 1.541e+02 1.889e+02 3.653e+02, threshold=3.082e+02, percent-clipped=2.0 +2024-09-16 15:39:06,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.80 vs. limit=15.0 +2024-09-16 15:39:08,575 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:39:09,707 INFO [train.py:1198] (0/2) Epoch 2, batch 2500, loss[loss=0.3528, ctc_loss=0.2964, cr_loss=0.4517, attn_decoder_loss=0.349, over 29653.00 frames. ], tot_loss[loss=0.3376, ctc_loss=0.2947, cr_loss=0.4439, attn_decoder_loss=0.3326, over 5795965.45 frames. ], batch size: 86, lr: 3.77e-02, grad_scale: 16.0 +2024-09-16 15:39:26,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=28140.0, ans=0.004752173913043479 +2024-09-16 15:39:38,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=28180.0, ans=0.125 +2024-09-16 15:39:41,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=28180.0, ans=0.1 +2024-09-16 15:39:44,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=28180.0, ans=0.035 +2024-09-16 15:40:04,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=28220.0, ans=0.125 +2024-09-16 15:40:25,989 INFO [train.py:1198] (0/2) Epoch 2, batch 2550, loss[loss=0.2871, ctc_loss=0.2372, cr_loss=0.3774, attn_decoder_loss=0.2843, over 29340.00 frames. ], tot_loss[loss=0.3378, ctc_loss=0.2951, cr_loss=0.4448, attn_decoder_loss=0.3327, over 5798714.25 frames. ], batch size: 67, lr: 3.76e-02, grad_scale: 16.0 +2024-09-16 15:40:39,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=28340.0, ans=0.2 +2024-09-16 15:40:42,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=28340.0, ans=0.2 +2024-09-16 15:40:45,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=28340.0, ans=0.125 +2024-09-16 15:41:01,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=28380.0, ans=0.125 +2024-09-16 15:41:02,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.11 vs. limit=6.0 +2024-09-16 15:41:23,204 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.80 vs. limit=15.0 +2024-09-16 15:41:24,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=28420.0, ans=0.004691304347826087 +2024-09-16 15:41:28,634 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.048e+02 1.393e+02 1.535e+02 1.794e+02 3.607e+02, threshold=3.070e+02, percent-clipped=1.0 +2024-09-16 15:41:30,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=28460.0, ans=0.125 +2024-09-16 15:41:35,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=28460.0, ans=0.05 +2024-09-16 15:41:38,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=28460.0, ans=0.0 +2024-09-16 15:41:44,116 INFO [train.py:1198] (0/2) Epoch 2, batch 2600, loss[loss=0.3421, ctc_loss=0.2968, cr_loss=0.4605, attn_decoder_loss=0.3369, over 29432.00 frames. ], tot_loss[loss=0.338, ctc_loss=0.295, cr_loss=0.4446, attn_decoder_loss=0.3329, over 5794738.27 frames. ], batch size: 78, lr: 3.76e-02, grad_scale: 16.0 +2024-09-16 15:41:53,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=28500.0, ans=0.1 +2024-09-16 15:42:10,931 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.46 vs. limit=15.0 +2024-09-16 15:42:11,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=28540.0, ans=0.0 +2024-09-16 15:42:25,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=28580.0, ans=0.025 +2024-09-16 15:42:41,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=28620.0, ans=0.0 +2024-09-16 15:42:41,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.63 vs. limit=15.0 +2024-09-16 15:42:53,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=28660.0, ans=0.125 +2024-09-16 15:43:00,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=28700.0, ans=0.004630434782608696 +2024-09-16 15:43:01,956 INFO [train.py:1198] (0/2) Epoch 2, batch 2650, loss[loss=0.36, ctc_loss=0.3239, cr_loss=0.493, attn_decoder_loss=0.353, over 29228.00 frames. ], tot_loss[loss=0.3377, ctc_loss=0.2944, cr_loss=0.4447, attn_decoder_loss=0.3326, over 5801157.00 frames. ], batch size: 100, lr: 3.75e-02, grad_scale: 16.0 +2024-09-16 15:43:02,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=28700.0, ans=0.125 +2024-09-16 15:43:10,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.whiten.whitening_limit, batch_count=28700.0, ans=12.0 +2024-09-16 15:43:11,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=28700.0, ans=0.125 +2024-09-16 15:43:19,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=28740.0, ans=0.125 +2024-09-16 15:43:20,073 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.45 vs. limit=15.0 +2024-09-16 15:43:26,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=28740.0, ans=0.125 +2024-09-16 15:44:03,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.130e+02 1.330e+02 1.517e+02 1.799e+02 4.153e+02, threshold=3.035e+02, percent-clipped=1.0 +2024-09-16 15:44:18,681 INFO [train.py:1198] (0/2) Epoch 2, batch 2700, loss[loss=0.3408, ctc_loss=0.2838, cr_loss=0.4782, attn_decoder_loss=0.3365, over 29540.00 frames. ], tot_loss[loss=0.3382, ctc_loss=0.2949, cr_loss=0.4462, attn_decoder_loss=0.3331, over 5796578.94 frames. ], batch size: 87, lr: 3.74e-02, grad_scale: 16.0 +2024-09-16 15:44:22,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=28900.0, ans=0.2 +2024-09-16 15:44:34,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=28940.0, ans=0.004578260869565217 +2024-09-16 15:44:37,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=28940.0, ans=0.07 +2024-09-16 15:44:37,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.19 vs. limit=15.0 +2024-09-16 15:44:43,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=28940.0, ans=0.2 +2024-09-16 15:45:00,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=28980.0, ans=0.0 +2024-09-16 15:45:01,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=28980.0, ans=0.125 +2024-09-16 15:45:27,819 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:45:36,948 INFO [train.py:1198] (0/2) Epoch 2, batch 2750, loss[loss=0.3227, ctc_loss=0.2824, cr_loss=0.4449, attn_decoder_loss=0.3172, over 29516.00 frames. ], tot_loss[loss=0.3367, ctc_loss=0.2933, cr_loss=0.4446, attn_decoder_loss=0.3317, over 5793992.87 frames. ], batch size: 75, lr: 3.74e-02, grad_scale: 8.0 +2024-09-16 15:45:43,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=29100.0, ans=0.2 +2024-09-16 15:45:47,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=29100.0, ans=0.125 +2024-09-16 15:45:54,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=29140.0, ans=0.125 +2024-09-16 15:46:08,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.60 vs. limit=12.0 +2024-09-16 15:46:09,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=29180.0, ans=0.025 +2024-09-16 15:46:11,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.78 vs. limit=15.0 +2024-09-16 15:46:18,617 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:46:23,806 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.18 vs. limit=22.5 +2024-09-16 15:46:30,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=29220.0, ans=0.1 +2024-09-16 15:46:40,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=29260.0, ans=0.125 +2024-09-16 15:46:41,282 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.163e+02 1.322e+02 1.540e+02 1.938e+02 5.454e+02, threshold=3.080e+02, percent-clipped=6.0 +2024-09-16 15:46:55,039 INFO [train.py:1198] (0/2) Epoch 2, batch 2800, loss[loss=0.4122, ctc_loss=0.4138, cr_loss=0.4561, attn_decoder_loss=0.4019, over 20155.00 frames. ], tot_loss[loss=0.3368, ctc_loss=0.2934, cr_loss=0.4439, attn_decoder_loss=0.3317, over 5774490.12 frames. ], batch size: 211, lr: 3.73e-02, grad_scale: 16.0 +2024-09-16 15:47:15,092 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:47:36,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.40 vs. limit=15.0 +2024-09-16 15:47:43,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=29420.0, ans=0.125 +2024-09-16 15:47:43,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=29420.0, ans=0.0 +2024-09-16 15:48:00,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=3.04 vs. limit=15.0 +2024-09-16 15:48:10,149 INFO [train.py:1198] (0/2) Epoch 2, batch 2850, loss[loss=0.3235, ctc_loss=0.2818, cr_loss=0.4218, attn_decoder_loss=0.3188, over 29521.00 frames. ], tot_loss[loss=0.3375, ctc_loss=0.2943, cr_loss=0.445, attn_decoder_loss=0.3325, over 5760796.79 frames. ], batch size: 77, lr: 3.73e-02, grad_scale: 16.0 +2024-09-16 15:48:13,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=29500.0, ans=0.125 +2024-09-16 15:48:36,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=14.69 vs. limit=15.0 +2024-09-16 15:48:42,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=29580.0, ans=0.125 +2024-09-16 15:48:42,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=29580.0, ans=0.004439130434782609 +2024-09-16 15:48:45,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=29580.0, ans=0.125 +2024-09-16 15:49:15,202 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.123e+02 1.408e+02 1.587e+02 1.885e+02 4.187e+02, threshold=3.175e+02, percent-clipped=5.0 +2024-09-16 15:49:20,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=29660.0, ans=0.004421739130434782 +2024-09-16 15:49:20,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=29660.0, ans=0.1 +2024-09-16 15:49:21,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=29660.0, ans=0.035 +2024-09-16 15:49:22,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=29660.0, ans=0.125 +2024-09-16 15:49:28,801 INFO [train.py:1198] (0/2) Epoch 2, batch 2900, loss[loss=0.3339, ctc_loss=0.2826, cr_loss=0.4583, attn_decoder_loss=0.3294, over 29398.00 frames. ], tot_loss[loss=0.3384, ctc_loss=0.2945, cr_loss=0.4468, attn_decoder_loss=0.3333, over 5786684.12 frames. ], batch size: 79, lr: 3.72e-02, grad_scale: 16.0 +2024-09-16 15:49:31,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.34 vs. limit=15.0 +2024-09-16 15:49:46,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.86 vs. limit=10.0 +2024-09-16 15:50:02,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=29780.0, ans=0.125 +2024-09-16 15:50:05,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=29780.0, ans=0.2 +2024-09-16 15:50:06,485 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.61 vs. limit=15.0 +2024-09-16 15:50:13,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=29780.0, ans=0.0 +2024-09-16 15:50:15,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.58 vs. limit=22.5 +2024-09-16 15:50:23,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=29820.0, ans=0.1 +2024-09-16 15:50:44,093 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.42 vs. limit=15.0 +2024-09-16 15:50:46,443 INFO [train.py:1198] (0/2) Epoch 2, batch 2950, loss[loss=0.3103, ctc_loss=0.2623, cr_loss=0.4438, attn_decoder_loss=0.3058, over 29529.00 frames. ], tot_loss[loss=0.3356, ctc_loss=0.2917, cr_loss=0.4442, attn_decoder_loss=0.3306, over 5782798.37 frames. ], batch size: 75, lr: 3.71e-02, grad_scale: 16.0 +2024-09-16 15:51:03,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=29940.0, ans=0.2 +2024-09-16 15:51:18,614 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:51:19,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=29980.0, ans=0.125 +2024-09-16 15:51:23,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=29980.0, ans=0.07 +2024-09-16 15:51:25,972 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:51:31,044 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.61 vs. limit=10.0 +2024-09-16 15:51:35,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=30020.0, ans=0.05 +2024-09-16 15:51:48,468 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.139e+02 1.374e+02 1.533e+02 1.890e+02 8.560e+02, threshold=3.066e+02, percent-clipped=4.0 +2024-09-16 15:52:02,437 INFO [train.py:1198] (0/2) Epoch 2, batch 3000, loss[loss=0.3302, ctc_loss=0.274, cr_loss=0.4408, attn_decoder_loss=0.3266, over 29750.00 frames. ], tot_loss[loss=0.3353, ctc_loss=0.2917, cr_loss=0.4437, attn_decoder_loss=0.3303, over 5784118.58 frames. ], batch size: 81, lr: 3.71e-02, grad_scale: 16.0 +2024-09-16 15:52:02,438 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 15:52:20,640 INFO [train.py:1230] (0/2) Epoch 2, validation: loss=0.2432, ctc_loss=0.1092, cr_loss=4.796e-15, attn_decoder_loss=0.2581, over 944034.00 frames. +2024-09-16 15:52:20,640 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 15:52:25,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=30100.0, ans=0.0 +2024-09-16 15:52:27,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=30100.0, ans=0.09899494936611666 +2024-09-16 15:52:34,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=30140.0, ans=0.0 +2024-09-16 15:52:36,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=30140.0, ans=0.2 +2024-09-16 15:52:39,895 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.79 vs. limit=15.0 +2024-09-16 15:52:57,548 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=3.044e-02 +2024-09-16 15:53:14,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=30220.0, ans=0.2 +2024-09-16 15:53:17,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=30220.0, ans=0.2 +2024-09-16 15:53:17,524 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:53:26,249 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.86 vs. limit=15.0 +2024-09-16 15:53:33,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=30260.0, ans=0.125 +2024-09-16 15:53:42,110 INFO [train.py:1198] (0/2) Epoch 2, batch 3050, loss[loss=0.3015, ctc_loss=0.2461, cr_loss=0.3951, attn_decoder_loss=0.2989, over 29525.00 frames. ], tot_loss[loss=0.3364, ctc_loss=0.2928, cr_loss=0.4451, attn_decoder_loss=0.3313, over 5778212.43 frames. ], batch size: 76, lr: 3.70e-02, grad_scale: 16.0 +2024-09-16 15:53:43,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=30300.0, ans=0.125 +2024-09-16 15:53:53,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.50 vs. limit=15.0 +2024-09-16 15:54:14,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.73 vs. limit=22.5 +2024-09-16 15:54:16,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.03 vs. limit=15.0 +2024-09-16 15:54:16,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.15 vs. limit=15.0 +2024-09-16 15:54:28,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=30420.0, ans=0.125 +2024-09-16 15:54:34,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.19 vs. limit=15.0 +2024-09-16 15:54:41,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=30460.0, ans=0.2 +2024-09-16 15:54:44,413 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.133e+02 1.365e+02 1.556e+02 1.852e+02 9.980e+02, threshold=3.113e+02, percent-clipped=5.0 +2024-09-16 15:54:47,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=30460.0, ans=0.0042478260869565215 +2024-09-16 15:54:47,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=30460.0, ans=0.0042478260869565215 +2024-09-16 15:54:54,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.75 vs. limit=22.5 +2024-09-16 15:54:56,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=30500.0, ans=0.004239130434782609 +2024-09-16 15:54:57,987 INFO [train.py:1198] (0/2) Epoch 2, batch 3100, loss[loss=0.3783, ctc_loss=0.3545, cr_loss=0.4712, attn_decoder_loss=0.3704, over 29258.00 frames. ], tot_loss[loss=0.3357, ctc_loss=0.292, cr_loss=0.4445, attn_decoder_loss=0.3307, over 5777605.01 frames. ], batch size: 100, lr: 3.69e-02, grad_scale: 16.0 +2024-09-16 15:55:08,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=30500.0, ans=0.125 +2024-09-16 15:55:10,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=30500.0, ans=0.125 +2024-09-16 15:55:25,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=30540.0, ans=0.04949747468305833 +2024-09-16 15:55:30,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=24.03 vs. limit=22.5 +2024-09-16 15:55:36,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.95 vs. limit=15.0 +2024-09-16 15:55:43,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=30620.0, ans=0.125 +2024-09-16 15:55:53,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.03 vs. limit=15.0 +2024-09-16 15:56:02,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=30660.0, ans=0.125 +2024-09-16 15:56:05,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=30660.0, ans=0.1 +2024-09-16 15:56:10,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=30660.0, ans=0.1 +2024-09-16 15:56:14,194 INFO [train.py:1198] (0/2) Epoch 2, batch 3150, loss[loss=0.365, ctc_loss=0.3211, cr_loss=0.4689, attn_decoder_loss=0.3594, over 28974.00 frames. ], tot_loss[loss=0.3353, ctc_loss=0.2917, cr_loss=0.4445, attn_decoder_loss=0.3303, over 5783729.60 frames. ], batch size: 104, lr: 3.69e-02, grad_scale: 16.0 +2024-09-16 15:56:41,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=30740.0, ans=0.125 +2024-09-16 15:57:18,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.35 vs. limit=15.0 +2024-09-16 15:57:20,644 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.062e+02 1.339e+02 1.514e+02 1.730e+02 4.890e+02, threshold=3.027e+02, percent-clipped=3.0 +2024-09-16 15:57:28,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=30860.0, ans=0.025 +2024-09-16 15:57:34,169 INFO [train.py:1198] (0/2) Epoch 2, batch 3200, loss[loss=0.3477, ctc_loss=0.3105, cr_loss=0.5123, attn_decoder_loss=0.3405, over 29413.00 frames. ], tot_loss[loss=0.3344, ctc_loss=0.2901, cr_loss=0.4447, attn_decoder_loss=0.3294, over 5793171.15 frames. ], batch size: 79, lr: 3.68e-02, grad_scale: 32.0 +2024-09-16 15:57:43,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=30900.0, ans=0.1 +2024-09-16 15:57:48,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=30940.0, ans=0.125 +2024-09-16 15:57:54,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=30940.0, ans=0.5 +2024-09-16 15:57:55,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=30940.0, ans=0.004143478260869566 +2024-09-16 15:58:17,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=30980.0, ans=0.125 +2024-09-16 15:58:19,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=31020.0, ans=0.0 +2024-09-16 15:58:29,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=31020.0, ans=0.125 +2024-09-16 15:58:36,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.63 vs. limit=22.5 +2024-09-16 15:58:38,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=31060.0, ans=0.025 +2024-09-16 15:58:50,040 INFO [train.py:1198] (0/2) Epoch 2, batch 3250, loss[loss=0.3339, ctc_loss=0.2805, cr_loss=0.4203, attn_decoder_loss=0.3305, over 29701.00 frames. ], tot_loss[loss=0.3345, ctc_loss=0.2898, cr_loss=0.4448, attn_decoder_loss=0.3296, over 5799976.37 frames. ], batch size: 84, lr: 3.68e-02, grad_scale: 16.0 +2024-09-16 15:59:02,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=31100.0, ans=0.125 +2024-09-16 15:59:24,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=31180.0, ans=0.125 +2024-09-16 15:59:47,168 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.49 vs. limit=22.5 +2024-09-16 15:59:53,724 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.050e+02 1.355e+02 1.599e+02 1.863e+02 1.090e+03, threshold=3.197e+02, percent-clipped=6.0 +2024-09-16 15:59:58,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=31260.0, ans=0.0 +2024-09-16 16:00:03,735 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.39 vs. limit=10.0 +2024-09-16 16:00:04,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=31300.0, ans=0.09899494936611666 +2024-09-16 16:00:06,053 INFO [train.py:1198] (0/2) Epoch 2, batch 3300, loss[loss=0.3465, ctc_loss=0.3002, cr_loss=0.4704, attn_decoder_loss=0.3412, over 28204.00 frames. ], tot_loss[loss=0.3332, ctc_loss=0.2888, cr_loss=0.4435, attn_decoder_loss=0.3282, over 5797286.09 frames. ], batch size: 111, lr: 3.67e-02, grad_scale: 16.0 +2024-09-16 16:00:15,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=31300.0, ans=0.0 +2024-09-16 16:00:22,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten.whitening_limit, batch_count=31340.0, ans=15.0 +2024-09-16 16:00:41,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=31380.0, ans=0.125 +2024-09-16 16:00:42,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.93 vs. limit=6.0 +2024-09-16 16:00:56,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=31420.0, ans=0.1 +2024-09-16 16:01:01,982 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=2.068e-02 +2024-09-16 16:01:11,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=31460.0, ans=0.0 +2024-09-16 16:01:25,569 INFO [train.py:1198] (0/2) Epoch 2, batch 3350, loss[loss=0.3568, ctc_loss=0.3173, cr_loss=0.4685, attn_decoder_loss=0.3508, over 28806.00 frames. ], tot_loss[loss=0.3347, ctc_loss=0.2907, cr_loss=0.4443, attn_decoder_loss=0.3297, over 5774403.99 frames. ], batch size: 104, lr: 3.66e-02, grad_scale: 16.0 +2024-09-16 16:02:03,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=31580.0, ans=0.125 +2024-09-16 16:02:05,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.85 vs. limit=6.0 +2024-09-16 16:02:18,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=31620.0, ans=0.1 +2024-09-16 16:02:29,255 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.156e+02 1.443e+02 1.604e+02 1.937e+02 5.792e+02, threshold=3.209e+02, percent-clipped=1.0 +2024-09-16 16:02:29,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=31660.0, ans=0.00398695652173913 +2024-09-16 16:02:41,560 INFO [train.py:1198] (0/2) Epoch 2, batch 3400, loss[loss=0.298, ctc_loss=0.2441, cr_loss=0.4402, attn_decoder_loss=0.2942, over 29356.00 frames. ], tot_loss[loss=0.3342, ctc_loss=0.2903, cr_loss=0.4445, attn_decoder_loss=0.3292, over 5766377.80 frames. ], batch size: 67, lr: 3.66e-02, grad_scale: 16.0 +2024-09-16 16:02:47,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=31700.0, ans=0.125 +2024-09-16 16:02:58,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=31740.0, ans=0.125 +2024-09-16 16:03:06,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=31740.0, ans=0.1 +2024-09-16 16:03:39,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=31820.0, ans=0.0 +2024-09-16 16:03:54,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=31860.0, ans=0.2 +2024-09-16 16:03:57,317 INFO [train.py:1198] (0/2) Epoch 2, batch 3450, loss[loss=0.3335, ctc_loss=0.2848, cr_loss=0.4398, attn_decoder_loss=0.3291, over 28234.00 frames. ], tot_loss[loss=0.3344, ctc_loss=0.2898, cr_loss=0.4449, attn_decoder_loss=0.3295, over 5774552.36 frames. ], batch size: 111, lr: 3.65e-02, grad_scale: 16.0 +2024-09-16 16:04:08,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=31900.0, ans=0.125 +2024-09-16 16:04:08,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=31900.0, ans=0.0 +2024-09-16 16:04:14,142 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:04:36,149 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-8000.pt +2024-09-16 16:04:55,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=32020.0, ans=0.125 +2024-09-16 16:05:09,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.17 vs. limit=22.5 +2024-09-16 16:05:12,126 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.023e+02 1.335e+02 1.514e+02 1.734e+02 4.417e+02, threshold=3.028e+02, percent-clipped=1.0 +2024-09-16 16:05:14,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=32060.0, ans=0.125 +2024-09-16 16:05:24,257 INFO [train.py:1198] (0/2) Epoch 2, batch 3500, loss[loss=0.3122, ctc_loss=0.2701, cr_loss=0.42, attn_decoder_loss=0.3075, over 29314.00 frames. ], tot_loss[loss=0.3338, ctc_loss=0.2896, cr_loss=0.4445, attn_decoder_loss=0.3289, over 5775977.11 frames. ], batch size: 71, lr: 3.65e-02, grad_scale: 16.0 +2024-09-16 16:05:42,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=32140.0, ans=0.0 +2024-09-16 16:05:54,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=32180.0, ans=0.09899494936611666 +2024-09-16 16:06:01,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=32180.0, ans=0.125 +2024-09-16 16:06:06,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=32180.0, ans=0.125 +2024-09-16 16:06:09,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=32220.0, ans=0.125 +2024-09-16 16:06:15,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=32220.0, ans=0.003865217391304348 +2024-09-16 16:06:20,331 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.93 vs. limit=15.0 +2024-09-16 16:06:25,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=32260.0, ans=0.05 +2024-09-16 16:06:38,935 INFO [train.py:1198] (0/2) Epoch 2, batch 3550, loss[loss=0.321, ctc_loss=0.261, cr_loss=0.4234, attn_decoder_loss=0.3183, over 29707.00 frames. ], tot_loss[loss=0.3331, ctc_loss=0.2883, cr_loss=0.444, attn_decoder_loss=0.3283, over 5782431.52 frames. ], batch size: 89, lr: 3.64e-02, grad_scale: 16.0 +2024-09-16 16:07:01,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=32340.0, ans=0.125 +2024-09-16 16:07:03,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.85 vs. limit=15.0 +2024-09-16 16:07:07,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=32380.0, ans=0.0 +2024-09-16 16:07:26,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=32420.0, ans=0.09899494936611666 +2024-09-16 16:07:26,932 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:07:41,333 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.383e+02 1.528e+02 1.788e+02 3.393e+02, threshold=3.056e+02, percent-clipped=1.0 +2024-09-16 16:07:44,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=14.37 vs. limit=15.0 +2024-09-16 16:07:53,261 INFO [train.py:1198] (0/2) Epoch 2, batch 3600, loss[loss=0.3095, ctc_loss=0.2552, cr_loss=0.4371, attn_decoder_loss=0.3058, over 29530.00 frames. ], tot_loss[loss=0.3329, ctc_loss=0.2877, cr_loss=0.4446, attn_decoder_loss=0.3281, over 5791223.75 frames. ], batch size: 77, lr: 3.63e-02, grad_scale: 32.0 +2024-09-16 16:07:54,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.60 vs. limit=15.0 +2024-09-16 16:08:06,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=32540.0, ans=0.125 +2024-09-16 16:08:09,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=32540.0, ans=0.125 +2024-09-16 16:08:19,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.07 vs. limit=15.0 +2024-09-16 16:08:31,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.98 vs. limit=15.0 +2024-09-16 16:08:48,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=32620.0, ans=0.003778260869565218 +2024-09-16 16:09:07,512 INFO [train.py:1198] (0/2) Epoch 2, batch 3650, loss[loss=0.3481, ctc_loss=0.2968, cr_loss=0.4608, attn_decoder_loss=0.3435, over 29488.00 frames. ], tot_loss[loss=0.332, ctc_loss=0.2865, cr_loss=0.4446, attn_decoder_loss=0.3272, over 5792980.17 frames. ], batch size: 90, lr: 3.63e-02, grad_scale: 16.0 +2024-09-16 16:09:21,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=32740.0, ans=0.125 +2024-09-16 16:09:56,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=32820.0, ans=0.0 +2024-09-16 16:10:02,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=32820.0, ans=0.1 +2024-09-16 16:10:12,078 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.100e+02 1.348e+02 1.495e+02 1.801e+02 3.465e+02, threshold=2.990e+02, percent-clipped=2.0 +2024-09-16 16:10:16,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=32860.0, ans=0.2 +2024-09-16 16:10:23,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=32900.0, ans=0.2 +2024-09-16 16:10:24,619 INFO [train.py:1198] (0/2) Epoch 2, batch 3700, loss[loss=0.3598, ctc_loss=0.3209, cr_loss=0.49, attn_decoder_loss=0.3532, over 29702.00 frames. ], tot_loss[loss=0.3322, ctc_loss=0.2868, cr_loss=0.4449, attn_decoder_loss=0.3273, over 5803487.21 frames. ], batch size: 84, lr: 3.62e-02, grad_scale: 16.0 +2024-09-16 16:10:40,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=32940.0, ans=0.125 +2024-09-16 16:10:43,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=32940.0, ans=0.125 +2024-09-16 16:10:43,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=32940.0, ans=0.125 +2024-09-16 16:11:11,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=33020.0, ans=10.0 +2024-09-16 16:11:26,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.76 vs. limit=15.0 +2024-09-16 16:11:32,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=33060.0, ans=0.1 +2024-09-16 16:11:32,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=33060.0, ans=0.2 +2024-09-16 16:11:41,225 INFO [train.py:1198] (0/2) Epoch 2, batch 3750, loss[loss=0.2848, ctc_loss=0.2478, cr_loss=0.3819, attn_decoder_loss=0.2804, over 29349.00 frames. ], tot_loss[loss=0.3316, ctc_loss=0.286, cr_loss=0.4442, attn_decoder_loss=0.3267, over 5808139.59 frames. ], batch size: 67, lr: 3.62e-02, grad_scale: 16.0 +2024-09-16 16:11:44,680 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=2.602e-03 +2024-09-16 16:11:52,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=33100.0, ans=0.0 +2024-09-16 16:11:52,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=33100.0, ans=0.2 +2024-09-16 16:11:59,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=33140.0, ans=0.125 +2024-09-16 16:11:59,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=33140.0, ans=0.125 +2024-09-16 16:12:18,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=33180.0, ans=0.003656521739130435 +2024-09-16 16:12:24,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=33220.0, ans=0.0036478260869565226 +2024-09-16 16:12:33,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=33220.0, ans=0.125 +2024-09-16 16:12:42,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=33260.0, ans=0.125 +2024-09-16 16:12:44,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=33260.0, ans=0.125 +2024-09-16 16:12:45,648 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.433e+02 1.658e+02 2.025e+02 1.075e+03, threshold=3.317e+02, percent-clipped=10.0 +2024-09-16 16:12:55,952 INFO [train.py:1198] (0/2) Epoch 2, batch 3800, loss[loss=0.33, ctc_loss=0.2706, cr_loss=0.4507, attn_decoder_loss=0.3266, over 29626.00 frames. ], tot_loss[loss=0.3315, ctc_loss=0.286, cr_loss=0.4443, attn_decoder_loss=0.3267, over 5799408.14 frames. ], batch size: 86, lr: 3.61e-02, grad_scale: 16.0 +2024-09-16 16:13:00,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=33300.0, ans=0.125 +2024-09-16 16:13:08,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=33300.0, ans=0.07 +2024-09-16 16:13:08,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=33300.0, ans=0.125 +2024-09-16 16:13:14,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=33340.0, ans=0.0 +2024-09-16 16:13:20,919 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.00 vs. limit=15.0 +2024-09-16 16:13:25,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=24.96 vs. limit=22.5 +2024-09-16 16:13:27,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=33380.0, ans=0.00361304347826087 +2024-09-16 16:13:48,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=33420.0, ans=0.0 +2024-09-16 16:14:10,551 INFO [train.py:1198] (0/2) Epoch 2, batch 3850, loss[loss=0.3489, ctc_loss=0.2984, cr_loss=0.4427, attn_decoder_loss=0.3447, over 29203.00 frames. ], tot_loss[loss=0.3316, ctc_loss=0.286, cr_loss=0.4444, attn_decoder_loss=0.3268, over 5813559.35 frames. ], batch size: 100, lr: 3.60e-02, grad_scale: 16.0 +2024-09-16 16:14:18,307 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:14:21,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=33500.0, ans=0.1 +2024-09-16 16:14:22,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=33500.0, ans=0.0 +2024-09-16 16:14:24,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.42 vs. limit=15.0 +2024-09-16 16:14:35,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=33540.0, ans=0.0 +2024-09-16 16:14:37,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=33540.0, ans=0.125 +2024-09-16 16:15:04,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=33620.0, ans=0.95 +2024-09-16 16:15:05,176 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.27 vs. limit=10.0 +2024-09-16 16:15:14,727 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.118e+02 1.336e+02 1.556e+02 1.830e+02 4.264e+02, threshold=3.112e+02, percent-clipped=3.0 +2024-09-16 16:15:25,317 INFO [train.py:1198] (0/2) Epoch 2, batch 3900, loss[loss=0.3764, ctc_loss=0.3482, cr_loss=0.4995, attn_decoder_loss=0.3685, over 29637.00 frames. ], tot_loss[loss=0.3321, ctc_loss=0.2865, cr_loss=0.4464, attn_decoder_loss=0.3273, over 5817847.41 frames. ], batch size: 86, lr: 3.60e-02, grad_scale: 16.0 +2024-09-16 16:15:57,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.37 vs. limit=10.0 +2024-09-16 16:16:20,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=18.31 vs. limit=22.5 +2024-09-16 16:16:42,720 INFO [train.py:1198] (0/2) Epoch 2, batch 3950, loss[loss=0.3441, ctc_loss=0.2985, cr_loss=0.442, attn_decoder_loss=0.3394, over 29451.00 frames. ], tot_loss[loss=0.3317, ctc_loss=0.2858, cr_loss=0.4465, attn_decoder_loss=0.3269, over 5837192.31 frames. ], batch size: 97, lr: 3.59e-02, grad_scale: 16.0 +2024-09-16 16:16:52,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.03 vs. limit=22.5 +2024-09-16 16:16:54,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=33900.0, ans=0.125 +2024-09-16 16:17:05,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=33940.0, ans=0.025 +2024-09-16 16:17:11,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=33980.0, ans=0.125 +2024-09-16 16:17:28,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=34020.0, ans=0.1 +2024-09-16 16:17:38,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.52 vs. limit=12.0 +2024-09-16 16:17:40,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.71 vs. limit=15.0 +2024-09-16 16:17:44,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.99 vs. limit=22.5 +2024-09-16 16:17:46,404 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.131e+02 1.360e+02 1.544e+02 1.951e+02 4.705e+02, threshold=3.088e+02, percent-clipped=4.0 +2024-09-16 16:17:57,006 INFO [train.py:1198] (0/2) Epoch 2, batch 4000, loss[loss=0.3295, ctc_loss=0.2892, cr_loss=0.4097, attn_decoder_loss=0.3249, over 29493.00 frames. ], tot_loss[loss=0.3317, ctc_loss=0.286, cr_loss=0.4456, attn_decoder_loss=0.3269, over 5814567.55 frames. ], batch size: 74, lr: 3.59e-02, grad_scale: 32.0 +2024-09-16 16:18:08,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=34100.0, ans=0.1 +2024-09-16 16:18:19,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=34140.0, ans=0.125 +2024-09-16 16:18:28,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=34180.0, ans=0.2 +2024-09-16 16:18:28,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.85 vs. limit=6.0 +2024-09-16 16:18:38,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=34180.0, ans=0.1 +2024-09-16 16:18:39,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=34180.0, ans=0.125 +2024-09-16 16:18:44,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_ff3.min_abs, batch_count=34220.0, ans=0.2 +2024-09-16 16:18:58,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=34260.0, ans=0.125 +2024-09-16 16:19:11,332 INFO [train.py:1198] (0/2) Epoch 2, batch 4050, loss[loss=0.3931, ctc_loss=0.3838, cr_loss=0.4493, attn_decoder_loss=0.3842, over 20658.00 frames. ], tot_loss[loss=0.3321, ctc_loss=0.2865, cr_loss=0.4456, attn_decoder_loss=0.3273, over 5799121.65 frames. ], batch size: 210, lr: 3.58e-02, grad_scale: 16.0 +2024-09-16 16:19:37,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.71 vs. limit=15.0 +2024-09-16 16:19:43,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=34380.0, ans=0.05 +2024-09-16 16:19:51,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=34380.0, ans=0.125 +2024-09-16 16:19:56,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.92 vs. limit=15.0 +2024-09-16 16:19:58,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=34420.0, ans=0.2 +2024-09-16 16:20:04,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=34420.0, ans=0.125 +2024-09-16 16:20:06,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.47 vs. limit=12.0 +2024-09-16 16:20:16,503 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.136e+02 1.473e+02 1.673e+02 1.934e+02 5.199e+02, threshold=3.345e+02, percent-clipped=3.0 +2024-09-16 16:20:19,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=34460.0, ans=0.125 +2024-09-16 16:20:26,777 INFO [train.py:1198] (0/2) Epoch 2, batch 4100, loss[loss=0.339, ctc_loss=0.2921, cr_loss=0.451, attn_decoder_loss=0.3342, over 29510.00 frames. ], tot_loss[loss=0.3319, ctc_loss=0.2859, cr_loss=0.4463, attn_decoder_loss=0.3271, over 5792730.46 frames. ], batch size: 90, lr: 3.57e-02, grad_scale: 16.0 +2024-09-16 16:20:28,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=34500.0, ans=0.125 +2024-09-16 16:20:51,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=34540.0, ans=0.025 +2024-09-16 16:21:21,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.28 vs. limit=15.0 +2024-09-16 16:21:42,416 INFO [train.py:1198] (0/2) Epoch 2, batch 4150, loss[loss=0.3178, ctc_loss=0.2734, cr_loss=0.4773, attn_decoder_loss=0.3121, over 29507.00 frames. ], tot_loss[loss=0.3312, ctc_loss=0.2851, cr_loss=0.4465, attn_decoder_loss=0.3264, over 5797992.66 frames. ], batch size: 77, lr: 3.57e-02, grad_scale: 8.0 +2024-09-16 16:21:44,752 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.44 vs. limit=22.5 +2024-09-16 16:22:07,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=34740.0, ans=0.003317391304347826 +2024-09-16 16:22:22,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=34780.0, ans=0.0033086956521739133 +2024-09-16 16:22:24,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=34780.0, ans=0.125 +2024-09-16 16:22:31,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=34820.0, ans=0.025 +2024-09-16 16:22:48,752 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.150e+02 1.357e+02 1.525e+02 1.720e+02 3.077e+02, threshold=3.049e+02, percent-clipped=0.0 +2024-09-16 16:22:50,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.36 vs. limit=22.5 +2024-09-16 16:22:56,137 INFO [train.py:1198] (0/2) Epoch 2, batch 4200, loss[loss=0.3626, ctc_loss=0.3153, cr_loss=0.4578, attn_decoder_loss=0.3577, over 29539.00 frames. ], tot_loss[loss=0.3314, ctc_loss=0.2849, cr_loss=0.446, attn_decoder_loss=0.3266, over 5800199.16 frames. ], batch size: 90, lr: 3.56e-02, grad_scale: 8.0 +2024-09-16 16:23:18,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=34940.0, ans=0.125 +2024-09-16 16:23:44,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.56 vs. limit=12.0 +2024-09-16 16:23:45,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=35020.0, ans=0.2 +2024-09-16 16:23:49,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=35020.0, ans=0.1 +2024-09-16 16:23:55,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=35060.0, ans=0.2 +2024-09-16 16:23:56,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=35060.0, ans=0.1 +2024-09-16 16:24:01,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=35060.0, ans=0.2 +2024-09-16 16:24:09,857 INFO [train.py:1198] (0/2) Epoch 2, batch 4250, loss[loss=0.2973, ctc_loss=0.2433, cr_loss=0.4014, attn_decoder_loss=0.2943, over 29496.00 frames. ], tot_loss[loss=0.3312, ctc_loss=0.2843, cr_loss=0.4453, attn_decoder_loss=0.3265, over 5805294.78 frames. ], batch size: 74, lr: 3.56e-02, grad_scale: 8.0 +2024-09-16 16:24:31,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=35140.0, ans=0.2 +2024-09-16 16:24:37,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.09 vs. limit=6.0 +2024-09-16 16:24:40,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=35180.0, ans=0.1 +2024-09-16 16:24:50,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=35180.0, ans=0.04949747468305833 +2024-09-16 16:25:18,427 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.126e+02 1.431e+02 1.619e+02 1.873e+02 2.888e+02, threshold=3.237e+02, percent-clipped=0.0 +2024-09-16 16:25:22,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.08 vs. limit=15.0 +2024-09-16 16:25:25,684 INFO [train.py:1198] (0/2) Epoch 2, batch 4300, loss[loss=0.3552, ctc_loss=0.3011, cr_loss=0.4552, attn_decoder_loss=0.3511, over 29517.00 frames. ], tot_loss[loss=0.3313, ctc_loss=0.2843, cr_loss=0.4456, attn_decoder_loss=0.3266, over 5794834.34 frames. ], batch size: 87, lr: 3.55e-02, grad_scale: 8.0 +2024-09-16 16:26:03,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=35380.0, ans=0.0 +2024-09-16 16:26:21,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=35420.0, ans=0.125 +2024-09-16 16:26:40,300 INFO [train.py:1198] (0/2) Epoch 2, batch 4350, loss[loss=0.33, ctc_loss=0.2854, cr_loss=0.4345, attn_decoder_loss=0.3253, over 29488.00 frames. ], tot_loss[loss=0.335, ctc_loss=0.2879, cr_loss=0.4504, attn_decoder_loss=0.3302, over 5796773.34 frames. ], batch size: 97, lr: 3.54e-02, grad_scale: 8.0 +2024-09-16 16:27:00,545 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=8.30 vs. limit=15.0 +2024-09-16 16:27:11,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=35580.0, ans=0.125 +2024-09-16 16:27:16,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=35580.0, ans=0.125 +2024-09-16 16:27:48,086 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.185e+02 1.425e+02 1.627e+02 1.817e+02 2.716e+02, threshold=3.254e+02, percent-clipped=0.0 +2024-09-16 16:27:48,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=35660.0, ans=0.0 +2024-09-16 16:27:55,406 INFO [train.py:1198] (0/2) Epoch 2, batch 4400, loss[loss=0.3466, ctc_loss=0.2974, cr_loss=0.478, attn_decoder_loss=0.3414, over 27412.00 frames. ], tot_loss[loss=0.338, ctc_loss=0.2915, cr_loss=0.4528, attn_decoder_loss=0.3331, over 5766800.93 frames. ], batch size: 124, lr: 3.54e-02, grad_scale: 16.0 +2024-09-16 16:28:00,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=35700.0, ans=0.0 +2024-09-16 16:28:07,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=35700.0, ans=0.1 +2024-09-16 16:28:08,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=35740.0, ans=0.1 +2024-09-16 16:28:19,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.46 vs. limit=22.5 +2024-09-16 16:28:36,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=35780.0, ans=0.125 +2024-09-16 16:29:09,905 INFO [train.py:1198] (0/2) Epoch 2, batch 4450, loss[loss=0.3684, ctc_loss=0.3553, cr_loss=0.4578, attn_decoder_loss=0.3597, over 20974.00 frames. ], tot_loss[loss=0.3419, ctc_loss=0.2985, cr_loss=0.4547, attn_decoder_loss=0.3366, over 5574518.81 frames. ], batch size: 209, lr: 3.53e-02, grad_scale: 16.0 +2024-09-16 16:29:23,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=35940.0, ans=0.0 +2024-09-16 16:29:27,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=35940.0, ans=0.0 +2024-09-16 16:29:34,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=35940.0, ans=0.125 +2024-09-16 16:29:35,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=35940.0, ans=0.125 +2024-09-16 16:29:47,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=35980.0, ans=0.2 +2024-09-16 16:29:51,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=35980.0, ans=0.125 +2024-09-16 16:29:58,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=36020.0, ans=0.125 +2024-09-16 16:29:59,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=36020.0, ans=0.125 +2024-09-16 16:30:00,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=36020.0, ans=0.125 +2024-09-16 16:30:13,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=36060.0, ans=0.1 +2024-09-16 16:30:15,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=36060.0, ans=0.125 +2024-09-16 16:30:16,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=16.41 vs. limit=15.0 +2024-09-16 16:30:18,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.305e+02 1.463e+02 1.734e+02 4.707e+02, threshold=2.926e+02, percent-clipped=2.0 +2024-09-16 16:30:26,375 INFO [train.py:1198] (0/2) Epoch 2, batch 4500, loss[loss=0.3663, ctc_loss=0.3515, cr_loss=0.4654, attn_decoder_loss=0.3576, over 20339.00 frames. ], tot_loss[loss=0.3467, ctc_loss=0.3083, cr_loss=0.4544, attn_decoder_loss=0.3408, over 5232175.73 frames. ], batch size: 210, lr: 3.53e-02, grad_scale: 16.0 +2024-09-16 16:30:42,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=36140.0, ans=0.0030130434782608692 +2024-09-16 16:30:43,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=36140.0, ans=0.125 +2024-09-16 16:31:03,468 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-2.pt +2024-09-16 16:31:56,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=36200.0, ans=0.125 +2024-09-16 16:31:58,151 INFO [train.py:1198] (0/2) Epoch 3, batch 0, loss[loss=0.415, ctc_loss=0.2741, cr_loss=0.4216, attn_decoder_loss=0.4213, over 29623.00 frames. ], tot_loss[loss=0.415, ctc_loss=0.2741, cr_loss=0.4216, attn_decoder_loss=0.4213, over 29623.00 frames. ], batch size: 73, lr: 3.35e-02, grad_scale: 8.0 +2024-09-16 16:31:58,152 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 16:32:16,468 INFO [train.py:1230] (0/2) Epoch 3, validation: loss=0.2699, ctc_loss=0.1122, cr_loss=5.059e-15, attn_decoder_loss=0.2874, over 944034.00 frames. +2024-09-16 16:32:16,469 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 16:32:17,921 WARNING [optim.py:503] (0/2) Scaling gradients by 0.08523014932870865, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:18,132 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.norm_self_attn.weight with proportion 0.29, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=3.447e+06, grad_sumsq=2.900e+09, orig_rms_sq=1.188e-03 +2024-09-16 16:32:25,520 WARNING [optim.py:503] (0/2) Scaling gradients by 0.08528286218643188, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:25,720 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.56, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=6.615e+06, grad_sumsq=1.664e+09, orig_rms_sq=3.977e-03 +2024-09-16 16:32:27,307 WARNING [optim.py:503] (0/2) Scaling gradients by 0.07857576757669449, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:27,524 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.54, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=7.424e+06, grad_sumsq=1.867e+09, orig_rms_sq=3.977e-03 +2024-09-16 16:32:32,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=36240.0, ans=0.0 +2024-09-16 16:32:35,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=36240.0, ans=0.125 +2024-09-16 16:32:38,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=36240.0, ans=0.2 +2024-09-16 16:32:54,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=36280.0, ans=0.125 +2024-09-16 16:33:05,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten.whitening_limit, batch_count=36320.0, ans=22.5 +2024-09-16 16:33:21,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=36360.0, ans=0.1 +2024-09-16 16:33:23,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=36360.0, ans=0.1 +2024-09-16 16:33:24,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=36360.0, ans=0.0029652173913043475 +2024-09-16 16:33:35,006 INFO [train.py:1198] (0/2) Epoch 3, batch 50, loss[loss=0.2906, ctc_loss=0.2464, cr_loss=0.3934, attn_decoder_loss=0.2867, over 29432.00 frames. ], tot_loss[loss=0.3464, ctc_loss=0.2964, cr_loss=0.4507, attn_decoder_loss=0.3419, over 1266798.90 frames. ], batch size: 70, lr: 3.34e-02, grad_scale: 8.0 +2024-09-16 16:34:08,225 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.045e+02 1.388e+02 1.745e+02 2.275e+02 3.724e+03, threshold=3.490e+02, percent-clipped=16.0 +2024-09-16 16:34:11,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=36480.0, ans=0.1 +2024-09-16 16:34:17,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=36480.0, ans=0.125 +2024-09-16 16:34:19,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=36520.0, ans=0.125 +2024-09-16 16:34:50,241 INFO [train.py:1198] (0/2) Epoch 3, batch 100, loss[loss=0.3199, ctc_loss=0.2682, cr_loss=0.4433, attn_decoder_loss=0.3157, over 29570.00 frames. ], tot_loss[loss=0.3406, ctc_loss=0.2925, cr_loss=0.4509, attn_decoder_loss=0.336, over 2251483.92 frames. ], batch size: 76, lr: 3.34e-02, grad_scale: 8.0 +2024-09-16 16:35:00,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=22.98 vs. limit=22.5 +2024-09-16 16:35:04,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=36600.0, ans=0.00291304347826087 +2024-09-16 16:35:22,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=36680.0, ans=0.025 +2024-09-16 16:35:31,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=36680.0, ans=0.125 +2024-09-16 16:35:40,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=36720.0, ans=0.125 +2024-09-16 16:36:06,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=36800.0, ans=0.125 +2024-09-16 16:36:07,252 INFO [train.py:1198] (0/2) Epoch 3, batch 150, loss[loss=0.2997, ctc_loss=0.2507, cr_loss=0.4149, attn_decoder_loss=0.2959, over 29450.00 frames. ], tot_loss[loss=0.3336, ctc_loss=0.2852, cr_loss=0.4457, attn_decoder_loss=0.3291, over 3047565.08 frames. ], batch size: 70, lr: 3.33e-02, grad_scale: 8.0 +2024-09-16 16:36:07,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=36800.0, ans=0.125 +2024-09-16 16:36:24,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=36840.0, ans=0.1 +2024-09-16 16:36:29,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=36840.0, ans=0.125 +2024-09-16 16:36:38,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=14.93 vs. limit=15.0 +2024-09-16 16:36:42,366 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.372e+02 1.536e+02 1.787e+02 3.735e+02, threshold=3.071e+02, percent-clipped=1.0 +2024-09-16 16:36:51,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=36880.0, ans=0.125 +2024-09-16 16:37:05,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.74 vs. limit=22.5 +2024-09-16 16:37:06,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=36920.0, ans=0.025 +2024-09-16 16:37:10,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=36960.0, ans=0.015 +2024-09-16 16:37:12,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=36960.0, ans=0.125 +2024-09-16 16:37:14,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=36960.0, ans=0.125 +2024-09-16 16:37:24,140 INFO [train.py:1198] (0/2) Epoch 3, batch 200, loss[loss=0.3383, ctc_loss=0.2915, cr_loss=0.4829, attn_decoder_loss=0.3327, over 27561.00 frames. ], tot_loss[loss=0.3304, ctc_loss=0.2822, cr_loss=0.445, attn_decoder_loss=0.3259, over 3658302.33 frames. ], batch size: 125, lr: 3.33e-02, grad_scale: 8.0 +2024-09-16 16:37:36,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=37000.0, ans=0.2 +2024-09-16 16:37:37,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.60 vs. limit=15.0 +2024-09-16 16:37:44,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=37040.0, ans=0.0 +2024-09-16 16:38:18,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=37120.0, ans=0.2 +2024-09-16 16:38:39,573 INFO [train.py:1198] (0/2) Epoch 3, batch 250, loss[loss=0.3455, ctc_loss=0.2922, cr_loss=0.4846, attn_decoder_loss=0.3406, over 29292.00 frames. ], tot_loss[loss=0.3288, ctc_loss=0.2797, cr_loss=0.4449, attn_decoder_loss=0.3244, over 4141526.98 frames. ], batch size: 100, lr: 3.32e-02, grad_scale: 8.0 +2024-09-16 16:39:03,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=37240.0, ans=0.125 +2024-09-16 16:39:15,193 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.348e+02 1.507e+02 1.717e+02 3.533e+02, threshold=3.014e+02, percent-clipped=1.0 +2024-09-16 16:39:35,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=37320.0, ans=0.125 +2024-09-16 16:39:35,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=37320.0, ans=0.0 +2024-09-16 16:39:46,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.18 vs. limit=22.5 +2024-09-16 16:39:51,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=37360.0, ans=0.125 +2024-09-16 16:39:57,440 INFO [train.py:1198] (0/2) Epoch 3, batch 300, loss[loss=0.3551, ctc_loss=0.3099, cr_loss=0.4436, attn_decoder_loss=0.3503, over 29505.00 frames. ], tot_loss[loss=0.3276, ctc_loss=0.2783, cr_loss=0.4439, attn_decoder_loss=0.3233, over 4509543.62 frames. ], batch size: 92, lr: 3.32e-02, grad_scale: 8.0 +2024-09-16 16:40:08,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=37400.0, ans=0.05 +2024-09-16 16:40:15,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=37440.0, ans=0.125 +2024-09-16 16:40:49,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.13 vs. limit=6.0 +2024-09-16 16:40:53,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=37520.0, ans=0.125 +2024-09-16 16:41:16,159 INFO [train.py:1198] (0/2) Epoch 3, batch 350, loss[loss=0.2853, ctc_loss=0.2372, cr_loss=0.4172, attn_decoder_loss=0.2814, over 29322.00 frames. ], tot_loss[loss=0.3276, ctc_loss=0.2783, cr_loss=0.4447, attn_decoder_loss=0.3232, over 4794516.51 frames. ], batch size: 71, lr: 3.31e-02, grad_scale: 8.0 +2024-09-16 16:41:49,293 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.316e+02 1.494e+02 1.817e+02 5.633e+02, threshold=2.988e+02, percent-clipped=5.0 +2024-09-16 16:41:57,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=37680.0, ans=0.1 +2024-09-16 16:42:08,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=37720.0, ans=0.0 +2024-09-16 16:42:11,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=37720.0, ans=0.0 +2024-09-16 16:42:26,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=37760.0, ans=0.125 +2024-09-16 16:42:32,048 INFO [train.py:1198] (0/2) Epoch 3, batch 400, loss[loss=0.3178, ctc_loss=0.2661, cr_loss=0.4033, attn_decoder_loss=0.3146, over 29712.00 frames. ], tot_loss[loss=0.3265, ctc_loss=0.277, cr_loss=0.4441, attn_decoder_loss=0.3222, over 5024392.84 frames. ], batch size: 82, lr: 3.31e-02, grad_scale: 16.0 +2024-09-16 16:42:35,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=37800.0, ans=0.1 +2024-09-16 16:42:35,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=37800.0, ans=0.07 +2024-09-16 16:42:46,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=37800.0, ans=0.125 +2024-09-16 16:43:10,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.39 vs. limit=5.0 +2024-09-16 16:43:12,831 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:43:21,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=37920.0, ans=0.1 +2024-09-16 16:43:40,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=37960.0, ans=0.125 +2024-09-16 16:43:42,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=37960.0, ans=15.0 +2024-09-16 16:43:50,666 INFO [train.py:1198] (0/2) Epoch 3, batch 450, loss[loss=0.3348, ctc_loss=0.2757, cr_loss=0.4439, attn_decoder_loss=0.3315, over 29692.00 frames. ], tot_loss[loss=0.3262, ctc_loss=0.2763, cr_loss=0.4435, attn_decoder_loss=0.3219, over 5187422.14 frames. ], batch size: 83, lr: 3.30e-02, grad_scale: 8.0 +2024-09-16 16:43:50,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=38000.0, ans=0.1 +2024-09-16 16:44:25,550 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.091e+02 1.316e+02 1.463e+02 1.797e+02 4.950e+02, threshold=2.926e+02, percent-clipped=3.0 +2024-09-16 16:44:39,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=38120.0, ans=0.1 +2024-09-16 16:44:43,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=38120.0, ans=0.125 +2024-09-16 16:44:43,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=38120.0, ans=0.125 +2024-09-16 16:44:54,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=38160.0, ans=0.125 +2024-09-16 16:45:09,110 INFO [train.py:1198] (0/2) Epoch 3, batch 500, loss[loss=0.3355, ctc_loss=0.276, cr_loss=0.4812, attn_decoder_loss=0.3314, over 29438.00 frames. ], tot_loss[loss=0.3247, ctc_loss=0.2745, cr_loss=0.4426, attn_decoder_loss=0.3204, over 5331144.63 frames. ], batch size: 94, lr: 3.30e-02, grad_scale: 8.0 +2024-09-16 16:45:09,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=38200.0, ans=0.1 +2024-09-16 16:45:12,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=38200.0, ans=0.0 +2024-09-16 16:45:12,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=38200.0, ans=0.1 +2024-09-16 16:45:20,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.80 vs. limit=15.0 +2024-09-16 16:45:39,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=38280.0, ans=0.0025478260869565214 +2024-09-16 16:45:47,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=38280.0, ans=0.0 +2024-09-16 16:46:20,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=38360.0, ans=0.1 +2024-09-16 16:46:25,200 INFO [train.py:1198] (0/2) Epoch 3, batch 550, loss[loss=0.36, ctc_loss=0.3186, cr_loss=0.4946, attn_decoder_loss=0.3536, over 28831.00 frames. ], tot_loss[loss=0.3253, ctc_loss=0.2753, cr_loss=0.4438, attn_decoder_loss=0.321, over 5424297.70 frames. ], batch size: 104, lr: 3.29e-02, grad_scale: 8.0 +2024-09-16 16:46:32,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.59 vs. limit=15.0 +2024-09-16 16:47:02,146 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.886e+01 1.383e+02 1.615e+02 1.876e+02 3.927e+02, threshold=3.230e+02, percent-clipped=4.0 +2024-09-16 16:47:11,189 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.39 vs. limit=5.0 +2024-09-16 16:47:35,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.51 vs. limit=15.0 +2024-09-16 16:47:43,876 INFO [train.py:1198] (0/2) Epoch 3, batch 600, loss[loss=0.348, ctc_loss=0.2966, cr_loss=0.474, attn_decoder_loss=0.3432, over 29216.00 frames. ], tot_loss[loss=0.325, ctc_loss=0.2745, cr_loss=0.444, attn_decoder_loss=0.3208, over 5510927.49 frames. ], batch size: 100, lr: 3.28e-02, grad_scale: 8.0 +2024-09-16 16:47:44,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=38600.0, ans=0.0 +2024-09-16 16:47:45,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=38600.0, ans=0.1 +2024-09-16 16:47:47,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=38600.0, ans=0.125 +2024-09-16 16:48:06,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=38640.0, ans=0.125 +2024-09-16 16:48:11,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=38640.0, ans=0.125 +2024-09-16 16:48:22,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=38680.0, ans=0.125 +2024-09-16 16:48:47,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=38760.0, ans=0.0024434782608695653 +2024-09-16 16:48:49,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=38760.0, ans=0.0 +2024-09-16 16:49:02,384 INFO [train.py:1198] (0/2) Epoch 3, batch 650, loss[loss=0.2999, ctc_loss=0.2398, cr_loss=0.4348, attn_decoder_loss=0.2969, over 29769.00 frames. ], tot_loss[loss=0.3233, ctc_loss=0.2725, cr_loss=0.4424, attn_decoder_loss=0.3191, over 5587769.55 frames. ], batch size: 81, lr: 3.28e-02, grad_scale: 8.0 +2024-09-16 16:49:07,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.77 vs. limit=6.0 +2024-09-16 16:49:22,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=38840.0, ans=0.125 +2024-09-16 16:49:28,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=38840.0, ans=0.07 +2024-09-16 16:49:34,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=38880.0, ans=0.0 +2024-09-16 16:49:37,337 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.937e+01 1.311e+02 1.474e+02 1.676e+02 3.343e+02, threshold=2.947e+02, percent-clipped=2.0 +2024-09-16 16:49:48,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=38920.0, ans=0.1 +2024-09-16 16:50:09,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=38960.0, ans=0.125 +2024-09-16 16:50:18,170 INFO [train.py:1198] (0/2) Epoch 3, batch 700, loss[loss=0.3072, ctc_loss=0.2469, cr_loss=0.4296, attn_decoder_loss=0.3044, over 29554.00 frames. ], tot_loss[loss=0.3239, ctc_loss=0.2731, cr_loss=0.4433, attn_decoder_loss=0.3197, over 5638374.08 frames. ], batch size: 76, lr: 3.27e-02, grad_scale: 8.0 +2024-09-16 16:50:23,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=39000.0, ans=0.125 +2024-09-16 16:50:57,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.85 vs. limit=22.5 +2024-09-16 16:51:13,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=39120.0, ans=0.0 +2024-09-16 16:51:16,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=39120.0, ans=0.04949747468305833 +2024-09-16 16:51:25,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=39160.0, ans=0.0023565217391304343 +2024-09-16 16:51:36,717 INFO [train.py:1198] (0/2) Epoch 3, batch 750, loss[loss=0.3255, ctc_loss=0.2701, cr_loss=0.4233, attn_decoder_loss=0.3222, over 29693.00 frames. ], tot_loss[loss=0.3235, ctc_loss=0.2725, cr_loss=0.4432, attn_decoder_loss=0.3193, over 5676685.11 frames. ], batch size: 82, lr: 3.27e-02, grad_scale: 8.0 +2024-09-16 16:51:58,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=39240.0, ans=0.0 +2024-09-16 16:52:02,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.10 vs. limit=15.0 +2024-09-16 16:52:11,489 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.529e+01 1.527e+02 1.781e+02 2.064e+02 4.131e+02, threshold=3.563e+02, percent-clipped=5.0 +2024-09-16 16:52:14,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=39280.0, ans=0.125 +2024-09-16 16:52:25,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=39320.0, ans=0.0 +2024-09-16 16:52:39,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=39360.0, ans=0.1 +2024-09-16 16:52:47,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=39360.0, ans=0.0 +2024-09-16 16:52:52,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.04 vs. limit=15.0 +2024-09-16 16:52:53,969 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=23.20 vs. limit=22.5 +2024-09-16 16:52:54,789 INFO [train.py:1198] (0/2) Epoch 3, batch 800, loss[loss=0.302, ctc_loss=0.2622, cr_loss=0.3992, attn_decoder_loss=0.2975, over 29604.00 frames. ], tot_loss[loss=0.3234, ctc_loss=0.2726, cr_loss=0.4433, attn_decoder_loss=0.3192, over 5706178.31 frames. ], batch size: 73, lr: 3.26e-02, grad_scale: 16.0 +2024-09-16 16:52:55,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=39400.0, ans=0.125 +2024-09-16 16:53:12,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.41 vs. limit=22.5 +2024-09-16 16:53:13,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=39440.0, ans=0.125 +2024-09-16 16:53:26,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=39480.0, ans=0.1 +2024-09-16 16:53:54,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=39560.0, ans=0.125 +2024-09-16 16:53:58,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=39560.0, ans=0.0 +2024-09-16 16:54:06,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.29 vs. limit=15.0 +2024-09-16 16:54:10,346 INFO [train.py:1198] (0/2) Epoch 3, batch 850, loss[loss=0.3422, ctc_loss=0.2871, cr_loss=0.4518, attn_decoder_loss=0.3383, over 29717.00 frames. ], tot_loss[loss=0.3229, ctc_loss=0.2716, cr_loss=0.4425, attn_decoder_loss=0.3187, over 5736278.91 frames. ], batch size: 89, lr: 3.26e-02, grad_scale: 8.0 +2024-09-16 16:54:30,817 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=1.579e-02 +2024-09-16 16:54:46,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=39680.0, ans=0.125 +2024-09-16 16:54:48,812 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.091e+02 1.307e+02 1.480e+02 1.661e+02 7.090e+02, threshold=2.960e+02, percent-clipped=1.0 +2024-09-16 16:55:04,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=39720.0, ans=0.2 +2024-09-16 16:55:16,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=39760.0, ans=0.125 +2024-09-16 16:55:28,604 INFO [train.py:1198] (0/2) Epoch 3, batch 900, loss[loss=0.2956, ctc_loss=0.2409, cr_loss=0.3858, attn_decoder_loss=0.2931, over 29581.00 frames. ], tot_loss[loss=0.3236, ctc_loss=0.2725, cr_loss=0.4427, attn_decoder_loss=0.3195, over 5741950.50 frames. ], batch size: 73, lr: 3.25e-02, grad_scale: 8.0 +2024-09-16 16:55:34,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.50 vs. limit=15.0 +2024-09-16 16:55:36,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=39800.0, ans=0.125 +2024-09-16 16:55:44,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=39840.0, ans=0.002208695652173912 +2024-09-16 16:55:59,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=39880.0, ans=10.0 +2024-09-16 16:56:06,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=39880.0, ans=0.1 +2024-09-16 16:56:08,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=39880.0, ans=0.125 +2024-09-16 16:56:08,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=17.44 vs. limit=15.0 +2024-09-16 16:56:39,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=39960.0, ans=0.09899494936611666 +2024-09-16 16:56:42,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=39960.0, ans=0.2 +2024-09-16 16:56:44,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.67 vs. limit=5.0 +2024-09-16 16:56:46,825 INFO [train.py:1198] (0/2) Epoch 3, batch 950, loss[loss=0.3012, ctc_loss=0.241, cr_loss=0.4434, attn_decoder_loss=0.298, over 29516.00 frames. ], tot_loss[loss=0.3242, ctc_loss=0.2734, cr_loss=0.4444, attn_decoder_loss=0.32, over 5743325.33 frames. ], batch size: 74, lr: 3.25e-02, grad_scale: 8.0 +2024-09-16 16:57:14,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=40040.0, ans=0.125 +2024-09-16 16:57:22,765 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.114e+02 1.433e+02 1.639e+02 1.993e+02 1.138e+03, threshold=3.278e+02, percent-clipped=4.0 +2024-09-16 16:57:24,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=40080.0, ans=0.0 +2024-09-16 16:57:29,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=40080.0, ans=0.125 +2024-09-16 16:57:53,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=40160.0, ans=0.0021391304347826087 +2024-09-16 16:58:01,740 INFO [train.py:1198] (0/2) Epoch 3, batch 1000, loss[loss=0.3102, ctc_loss=0.2548, cr_loss=0.4326, attn_decoder_loss=0.3067, over 29484.00 frames. ], tot_loss[loss=0.3244, ctc_loss=0.2734, cr_loss=0.4439, attn_decoder_loss=0.3202, over 5736393.60 frames. ], batch size: 77, lr: 3.24e-02, grad_scale: 8.0 +2024-09-16 16:58:57,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=40320.0, ans=0.035 +2024-09-16 16:59:00,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_ff2.min_abs, batch_count=40320.0, ans=0.1 +2024-09-16 16:59:03,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=40360.0, ans=0.2 +2024-09-16 16:59:06,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=40360.0, ans=0.0 +2024-09-16 16:59:07,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=40360.0, ans=0.125 +2024-09-16 16:59:16,298 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.44 vs. limit=12.0 +2024-09-16 16:59:19,706 INFO [train.py:1198] (0/2) Epoch 3, batch 1050, loss[loss=0.3449, ctc_loss=0.2885, cr_loss=0.4748, attn_decoder_loss=0.3406, over 29675.00 frames. ], tot_loss[loss=0.3234, ctc_loss=0.2719, cr_loss=0.4432, attn_decoder_loss=0.3192, over 5745429.03 frames. ], batch size: 85, lr: 3.24e-02, grad_scale: 8.0 +2024-09-16 16:59:32,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=40400.0, ans=0.125 +2024-09-16 16:59:32,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=40400.0, ans=0.2 +2024-09-16 16:59:36,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.84 vs. limit=22.5 +2024-09-16 16:59:43,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=40440.0, ans=0.002078260869565217 +2024-09-16 16:59:55,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=40480.0, ans=0.0020695652173913035 +2024-09-16 16:59:56,491 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.083e+02 1.303e+02 1.463e+02 1.706e+02 2.902e+02, threshold=2.927e+02, percent-clipped=0.0 +2024-09-16 17:00:26,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=40560.0, ans=0.0 +2024-09-16 17:00:32,686 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.05 vs. limit=15.0 +2024-09-16 17:00:37,928 INFO [train.py:1198] (0/2) Epoch 3, batch 1100, loss[loss=0.3061, ctc_loss=0.2579, cr_loss=0.4131, attn_decoder_loss=0.3023, over 29480.00 frames. ], tot_loss[loss=0.3226, ctc_loss=0.2708, cr_loss=0.4425, attn_decoder_loss=0.3185, over 5757614.45 frames. ], batch size: 78, lr: 3.23e-02, grad_scale: 8.0 +2024-09-16 17:00:48,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=40600.0, ans=0.125 +2024-09-16 17:00:57,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=40640.0, ans=0.125 +2024-09-16 17:01:26,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=40720.0, ans=0.125 +2024-09-16 17:01:27,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=40720.0, ans=0.1 +2024-09-16 17:01:35,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=40720.0, ans=0.2 +2024-09-16 17:01:47,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=40760.0, ans=0.125 +2024-09-16 17:01:52,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=40800.0, ans=0.1 +2024-09-16 17:01:53,973 INFO [train.py:1198] (0/2) Epoch 3, batch 1150, loss[loss=0.3057, ctc_loss=0.251, cr_loss=0.419, attn_decoder_loss=0.3024, over 29443.00 frames. ], tot_loss[loss=0.3226, ctc_loss=0.2709, cr_loss=0.443, attn_decoder_loss=0.3185, over 5757516.27 frames. ], batch size: 78, lr: 3.23e-02, grad_scale: 8.0 +2024-09-16 17:02:32,381 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.096e+02 1.405e+02 1.623e+02 1.892e+02 4.412e+02, threshold=3.246e+02, percent-clipped=6.0 +2024-09-16 17:02:35,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=40880.0, ans=0.0 +2024-09-16 17:03:06,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=40960.0, ans=0.0 +2024-09-16 17:03:10,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=41000.0, ans=0.2 +2024-09-16 17:03:11,767 INFO [train.py:1198] (0/2) Epoch 3, batch 1200, loss[loss=0.3291, ctc_loss=0.2683, cr_loss=0.4853, attn_decoder_loss=0.3251, over 29683.00 frames. ], tot_loss[loss=0.3238, ctc_loss=0.2719, cr_loss=0.4442, attn_decoder_loss=0.3196, over 5749116.76 frames. ], batch size: 85, lr: 3.22e-02, grad_scale: 16.0 +2024-09-16 17:03:12,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=41000.0, ans=0.125 +2024-09-16 17:03:15,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=41000.0, ans=0.001956521739130435 +2024-09-16 17:03:17,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=14.38 vs. limit=15.0 +2024-09-16 17:03:38,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.01 vs. limit=6.0 +2024-09-16 17:03:42,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=41080.0, ans=0.125 +2024-09-16 17:03:47,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=41080.0, ans=0.0019391304347826082 +2024-09-16 17:03:51,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=41080.0, ans=0.0019391304347826082 +2024-09-16 17:03:57,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=41120.0, ans=0.0 +2024-09-16 17:04:12,002 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:04:15,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.34 vs. limit=15.0 +2024-09-16 17:04:29,438 INFO [train.py:1198] (0/2) Epoch 3, batch 1250, loss[loss=0.3536, ctc_loss=0.3, cr_loss=0.467, attn_decoder_loss=0.3492, over 29502.00 frames. ], tot_loss[loss=0.3241, ctc_loss=0.272, cr_loss=0.4456, attn_decoder_loss=0.32, over 5776007.63 frames. ], batch size: 92, lr: 3.22e-02, grad_scale: 8.0 +2024-09-16 17:04:37,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=41200.0, ans=0.125 +2024-09-16 17:05:00,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=41280.0, ans=0.125 +2024-09-16 17:05:07,513 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.013e+02 1.378e+02 1.544e+02 1.840e+02 6.927e+02, threshold=3.087e+02, percent-clipped=1.0 +2024-09-16 17:05:41,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=41360.0, ans=0.125 +2024-09-16 17:05:43,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.48 vs. limit=15.0 +2024-09-16 17:05:47,461 INFO [train.py:1198] (0/2) Epoch 3, batch 1300, loss[loss=0.3339, ctc_loss=0.2771, cr_loss=0.4796, attn_decoder_loss=0.3295, over 28357.00 frames. ], tot_loss[loss=0.323, ctc_loss=0.2707, cr_loss=0.4448, attn_decoder_loss=0.3189, over 5782019.99 frames. ], batch size: 111, lr: 3.21e-02, grad_scale: 8.0 +2024-09-16 17:05:55,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=41400.0, ans=0.125 +2024-09-16 17:06:07,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=41440.0, ans=0.0018608695652173914 +2024-09-16 17:06:07,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=41440.0, ans=0.0018608695652173914 +2024-09-16 17:06:19,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=41480.0, ans=0.125 +2024-09-16 17:06:43,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=41520.0, ans=0.1 +2024-09-16 17:06:45,615 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:06:59,210 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:07:03,980 INFO [train.py:1198] (0/2) Epoch 3, batch 1350, loss[loss=0.3322, ctc_loss=0.2736, cr_loss=0.4601, attn_decoder_loss=0.3284, over 29764.00 frames. ], tot_loss[loss=0.3224, ctc_loss=0.2695, cr_loss=0.4447, attn_decoder_loss=0.3185, over 5796934.07 frames. ], batch size: 81, lr: 3.21e-02, grad_scale: 8.0 +2024-09-16 17:07:28,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=41640.0, ans=0.0 +2024-09-16 17:07:38,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=41680.0, ans=0.035 +2024-09-16 17:07:41,129 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.097e+02 1.317e+02 1.447e+02 1.601e+02 2.528e+02, threshold=2.895e+02, percent-clipped=1.0 +2024-09-16 17:07:42,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=41680.0, ans=0.1 +2024-09-16 17:07:52,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=41720.0, ans=0.125 +2024-09-16 17:07:53,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.33 vs. limit=15.0 +2024-09-16 17:08:07,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=41760.0, ans=0.125 +2024-09-16 17:08:09,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.82 vs. limit=15.0 +2024-09-16 17:08:21,363 INFO [train.py:1198] (0/2) Epoch 3, batch 1400, loss[loss=0.299, ctc_loss=0.2544, cr_loss=0.4225, attn_decoder_loss=0.2945, over 29598.00 frames. ], tot_loss[loss=0.3221, ctc_loss=0.2691, cr_loss=0.4448, attn_decoder_loss=0.3181, over 5807979.63 frames. ], batch size: 69, lr: 3.20e-02, grad_scale: 8.0 +2024-09-16 17:08:25,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.11 vs. limit=15.0 +2024-09-16 17:08:35,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=41840.0, ans=0.0017739130434782611 +2024-09-16 17:08:40,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.77 vs. limit=22.5 +2024-09-16 17:08:58,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.10 vs. limit=5.0 +2024-09-16 17:09:07,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=41920.0, ans=0.125 +2024-09-16 17:09:12,027 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=11.96 vs. limit=15.0 +2024-09-16 17:09:16,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=41920.0, ans=0.5 +2024-09-16 17:09:17,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=41920.0, ans=0.125 +2024-09-16 17:09:34,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=41960.0, ans=0.125 +2024-09-16 17:09:37,198 INFO [train.py:1198] (0/2) Epoch 3, batch 1450, loss[loss=0.3504, ctc_loss=0.2988, cr_loss=0.4749, attn_decoder_loss=0.3456, over 29416.00 frames. ], tot_loss[loss=0.3223, ctc_loss=0.2693, cr_loss=0.4452, attn_decoder_loss=0.3183, over 5803599.93 frames. ], batch size: 94, lr: 3.20e-02, grad_scale: 8.0 +2024-09-16 17:09:44,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=42000.0, ans=0.025 +2024-09-16 17:10:17,156 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.984e+01 1.371e+02 1.551e+02 1.946e+02 4.633e+02, threshold=3.101e+02, percent-clipped=3.0 +2024-09-16 17:10:17,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=42080.0, ans=0.125 +2024-09-16 17:10:29,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=42120.0, ans=0.125 +2024-09-16 17:10:42,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=42160.0, ans=0.07 +2024-09-16 17:10:55,033 INFO [train.py:1198] (0/2) Epoch 3, batch 1500, loss[loss=0.3321, ctc_loss=0.2658, cr_loss=0.444, attn_decoder_loss=0.3296, over 29635.00 frames. ], tot_loss[loss=0.3226, ctc_loss=0.2693, cr_loss=0.445, attn_decoder_loss=0.3186, over 5803000.33 frames. ], batch size: 86, lr: 3.19e-02, grad_scale: 8.0 +2024-09-16 17:10:58,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=42200.0, ans=0.125 +2024-09-16 17:11:12,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.57 vs. limit=6.0 +2024-09-16 17:11:15,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=42240.0, ans=0.125 +2024-09-16 17:11:33,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=42280.0, ans=0.125 +2024-09-16 17:11:36,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=42280.0, ans=0.1 +2024-09-16 17:11:38,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=42280.0, ans=0.0 +2024-09-16 17:12:04,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=42360.0, ans=0.125 +2024-09-16 17:12:06,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=42360.0, ans=0.125 +2024-09-16 17:12:13,828 INFO [train.py:1198] (0/2) Epoch 3, batch 1550, loss[loss=0.3408, ctc_loss=0.2845, cr_loss=0.4885, attn_decoder_loss=0.3362, over 29499.00 frames. ], tot_loss[loss=0.3224, ctc_loss=0.2695, cr_loss=0.4446, attn_decoder_loss=0.3184, over 5780344.23 frames. ], batch size: 90, lr: 3.19e-02, grad_scale: 8.0 +2024-09-16 17:12:17,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=42400.0, ans=0.0016521739130434792 +2024-09-16 17:12:29,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.42 vs. limit=15.0 +2024-09-16 17:12:51,103 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.108e+02 1.385e+02 1.541e+02 1.743e+02 3.737e+02, threshold=3.082e+02, percent-clipped=1.0 +2024-09-16 17:12:51,574 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:13:11,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=42520.0, ans=0.0 +2024-09-16 17:13:26,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=42560.0, ans=0.025 +2024-09-16 17:13:30,979 INFO [train.py:1198] (0/2) Epoch 3, batch 1600, loss[loss=0.3247, ctc_loss=0.2604, cr_loss=0.4697, attn_decoder_loss=0.3215, over 29653.00 frames. ], tot_loss[loss=0.3226, ctc_loss=0.2701, cr_loss=0.4448, attn_decoder_loss=0.3186, over 5763622.68 frames. ], batch size: 85, lr: 3.18e-02, grad_scale: 16.0 +2024-09-16 17:13:32,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=42600.0, ans=0.1 +2024-09-16 17:13:43,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=42600.0, ans=0.125 +2024-09-16 17:13:44,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=42640.0, ans=0.1 +2024-09-16 17:13:57,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.86 vs. limit=15.0 +2024-09-16 17:14:09,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=42680.0, ans=0.0 +2024-09-16 17:14:24,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=42720.0, ans=0.0 +2024-09-16 17:14:46,581 INFO [train.py:1198] (0/2) Epoch 3, batch 1650, loss[loss=0.3324, ctc_loss=0.2801, cr_loss=0.4633, attn_decoder_loss=0.3279, over 29701.00 frames. ], tot_loss[loss=0.3228, ctc_loss=0.2702, cr_loss=0.4453, attn_decoder_loss=0.3187, over 5757930.31 frames. ], batch size: 89, lr: 3.18e-02, grad_scale: 8.0 +2024-09-16 17:15:05,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=42840.0, ans=0.1 +2024-09-16 17:15:19,822 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.91 vs. limit=12.0 +2024-09-16 17:15:26,273 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.078e+02 1.391e+02 1.585e+02 1.858e+02 6.012e+02, threshold=3.169e+02, percent-clipped=6.0 +2024-09-16 17:15:37,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=42920.0, ans=0.0 +2024-09-16 17:16:01,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=42960.0, ans=0.125 +2024-09-16 17:16:03,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=43000.0, ans=0.2 +2024-09-16 17:16:04,587 INFO [train.py:1198] (0/2) Epoch 3, batch 1700, loss[loss=0.268, ctc_loss=0.2135, cr_loss=0.4073, attn_decoder_loss=0.265, over 29629.00 frames. ], tot_loss[loss=0.3218, ctc_loss=0.2689, cr_loss=0.4441, attn_decoder_loss=0.3178, over 5779016.01 frames. ], batch size: 69, lr: 3.17e-02, grad_scale: 8.0 +2024-09-16 17:16:06,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=43000.0, ans=0.2 +2024-09-16 17:16:13,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=43000.0, ans=0.125 +2024-09-16 17:16:39,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=43080.0, ans=0.1 +2024-09-16 17:16:51,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=43120.0, ans=0.0 +2024-09-16 17:16:52,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=43120.0, ans=0.09899494936611666 +2024-09-16 17:17:16,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=43160.0, ans=0.1 +2024-09-16 17:17:22,495 INFO [train.py:1198] (0/2) Epoch 3, batch 1750, loss[loss=0.2839, ctc_loss=0.2304, cr_loss=0.405, attn_decoder_loss=0.2808, over 29376.00 frames. ], tot_loss[loss=0.3214, ctc_loss=0.2686, cr_loss=0.4444, attn_decoder_loss=0.3174, over 5787389.07 frames. ], batch size: 67, lr: 3.17e-02, grad_scale: 8.0 +2024-09-16 17:17:30,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=43200.0, ans=0.1 +2024-09-16 17:17:32,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.52 vs. limit=15.0 +2024-09-16 17:17:40,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=43240.0, ans=0.125 +2024-09-16 17:18:01,942 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.347e+02 1.520e+02 1.785e+02 2.603e+02, threshold=3.040e+02, percent-clipped=0.0 +2024-09-16 17:18:38,185 INFO [train.py:1198] (0/2) Epoch 3, batch 1800, loss[loss=0.3449, ctc_loss=0.2904, cr_loss=0.4578, attn_decoder_loss=0.3407, over 29693.00 frames. ], tot_loss[loss=0.3215, ctc_loss=0.2687, cr_loss=0.4445, attn_decoder_loss=0.3175, over 5790928.27 frames. ], batch size: 83, lr: 3.16e-02, grad_scale: 8.0 +2024-09-16 17:18:59,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=43440.0, ans=0.2 +2024-09-16 17:19:10,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=43480.0, ans=0.1 +2024-09-16 17:19:24,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.73 vs. limit=12.0 +2024-09-16 17:19:51,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=43560.0, ans=0.1 +2024-09-16 17:19:54,249 INFO [train.py:1198] (0/2) Epoch 3, batch 1850, loss[loss=0.3404, ctc_loss=0.29, cr_loss=0.4613, attn_decoder_loss=0.3357, over 29625.00 frames. ], tot_loss[loss=0.3209, ctc_loss=0.2674, cr_loss=0.4445, attn_decoder_loss=0.3169, over 5796546.44 frames. ], batch size: 86, lr: 3.16e-02, grad_scale: 8.0 +2024-09-16 17:20:03,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=43600.0, ans=0.125 +2024-09-16 17:20:13,066 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:20:23,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=43640.0, ans=0.125 +2024-09-16 17:20:32,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=43680.0, ans=0.0 +2024-09-16 17:20:35,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.308e+02 1.428e+02 1.692e+02 5.194e+02, threshold=2.856e+02, percent-clipped=3.0 +2024-09-16 17:20:45,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.32 vs. limit=15.0 +2024-09-16 17:21:13,416 INFO [train.py:1198] (0/2) Epoch 3, batch 1900, loss[loss=0.3273, ctc_loss=0.2761, cr_loss=0.4456, attn_decoder_loss=0.3231, over 29694.00 frames. ], tot_loss[loss=0.3217, ctc_loss=0.2683, cr_loss=0.4452, attn_decoder_loss=0.3177, over 5805286.20 frames. ], batch size: 89, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 17:21:16,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=43800.0, ans=0.1 +2024-09-16 17:21:22,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=43800.0, ans=0.125 +2024-09-16 17:21:41,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=43840.0, ans=0.2 +2024-09-16 17:21:45,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=43880.0, ans=0.1 +2024-09-16 17:21:58,659 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.14 vs. limit=22.5 +2024-09-16 17:21:59,942 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.58 vs. limit=6.0 +2024-09-16 17:22:08,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=43920.0, ans=0.125 +2024-09-16 17:22:08,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=43920.0, ans=0.125 +2024-09-16 17:22:17,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=43960.0, ans=0.2 +2024-09-16 17:22:19,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=43960.0, ans=0.00131304347826087 +2024-09-16 17:22:19,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=43960.0, ans=0.0 +2024-09-16 17:22:22,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=43960.0, ans=0.0 +2024-09-16 17:22:29,847 INFO [train.py:1198] (0/2) Epoch 3, batch 1950, loss[loss=0.3175, ctc_loss=0.2679, cr_loss=0.4755, attn_decoder_loss=0.3124, over 29441.00 frames. ], tot_loss[loss=0.3227, ctc_loss=0.2686, cr_loss=0.4474, attn_decoder_loss=0.3187, over 5820468.18 frames. ], batch size: 78, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 17:23:02,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=44080.0, ans=0.2 +2024-09-16 17:23:09,312 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.072e+02 1.320e+02 1.491e+02 1.683e+02 2.702e+02, threshold=2.982e+02, percent-clipped=0.0 +2024-09-16 17:23:30,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=44160.0, ans=0.0 +2024-09-16 17:23:44,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=44200.0, ans=0.1 +2024-09-16 17:23:45,537 INFO [train.py:1198] (0/2) Epoch 3, batch 2000, loss[loss=0.2844, ctc_loss=0.2384, cr_loss=0.395, attn_decoder_loss=0.2807, over 29322.00 frames. ], tot_loss[loss=0.3234, ctc_loss=0.2694, cr_loss=0.4483, attn_decoder_loss=0.3195, over 5798124.92 frames. ], batch size: 67, lr: 3.14e-02, grad_scale: 16.0 +2024-09-16 17:24:07,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=44240.0, ans=0.125 +2024-09-16 17:24:23,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=44280.0, ans=0.0 +2024-09-16 17:24:30,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=44280.0, ans=0.125 +2024-09-16 17:24:31,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.72 vs. limit=22.5 +2024-09-16 17:25:05,794 INFO [train.py:1198] (0/2) Epoch 3, batch 2050, loss[loss=0.2958, ctc_loss=0.2461, cr_loss=0.4008, attn_decoder_loss=0.2925, over 29459.00 frames. ], tot_loss[loss=0.3223, ctc_loss=0.2686, cr_loss=0.4461, attn_decoder_loss=0.3183, over 5791300.63 frames. ], batch size: 70, lr: 3.14e-02, grad_scale: 8.0 +2024-09-16 17:25:30,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=44440.0, ans=0.1 +2024-09-16 17:25:45,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=44480.0, ans=0.0 +2024-09-16 17:25:46,771 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.399e+02 1.535e+02 1.932e+02 1.271e+03, threshold=3.069e+02, percent-clipped=4.0 +2024-09-16 17:26:01,356 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.18 vs. limit=10.0 +2024-09-16 17:26:11,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=44560.0, ans=0.125 +2024-09-16 17:26:21,863 INFO [train.py:1198] (0/2) Epoch 3, batch 2100, loss[loss=0.3147, ctc_loss=0.2481, cr_loss=0.4452, attn_decoder_loss=0.3122, over 29779.00 frames. ], tot_loss[loss=0.3205, ctc_loss=0.2664, cr_loss=0.4444, attn_decoder_loss=0.3166, over 5802433.30 frames. ], batch size: 81, lr: 3.13e-02, grad_scale: 8.0 +2024-09-16 17:26:26,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=44600.0, ans=0.125 +2024-09-16 17:26:28,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.28 vs. limit=22.5 +2024-09-16 17:26:32,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=44600.0, ans=0.125 +2024-09-16 17:26:53,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=44680.0, ans=0.0 +2024-09-16 17:26:55,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=44680.0, ans=0.0 +2024-09-16 17:27:10,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=44720.0, ans=0.125 +2024-09-16 17:27:13,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=44720.0, ans=0.125 +2024-09-16 17:27:13,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=44720.0, ans=0.001147826086956523 +2024-09-16 17:27:15,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=44720.0, ans=0.09899494936611666 +2024-09-16 17:27:15,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.03 vs. limit=15.0 +2024-09-16 17:27:16,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=44720.0, ans=0.125 +2024-09-16 17:27:16,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=44720.0, ans=0.0 +2024-09-16 17:27:22,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=44760.0, ans=0.1 +2024-09-16 17:27:25,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=44760.0, ans=0.2 +2024-09-16 17:27:38,156 INFO [train.py:1198] (0/2) Epoch 3, batch 2150, loss[loss=0.3168, ctc_loss=0.2567, cr_loss=0.4232, attn_decoder_loss=0.3141, over 29450.00 frames. ], tot_loss[loss=0.3197, ctc_loss=0.2653, cr_loss=0.444, attn_decoder_loss=0.3159, over 5816586.83 frames. ], batch size: 78, lr: 3.13e-02, grad_scale: 8.0 +2024-09-16 17:27:41,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=44800.0, ans=0.125 +2024-09-16 17:27:48,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.01 vs. limit=15.0 +2024-09-16 17:27:58,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=44840.0, ans=0.1 +2024-09-16 17:28:15,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=44880.0, ans=0.07 +2024-09-16 17:28:17,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=44880.0, ans=0.1 +2024-09-16 17:28:20,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.031e+02 1.285e+02 1.430e+02 1.712e+02 4.702e+02, threshold=2.859e+02, percent-clipped=3.0 +2024-09-16 17:28:25,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=44920.0, ans=0.125 +2024-09-16 17:28:46,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=44960.0, ans=0.125 +2024-09-16 17:28:56,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=45000.0, ans=0.125 +2024-09-16 17:28:57,954 INFO [train.py:1198] (0/2) Epoch 3, batch 2200, loss[loss=0.3176, ctc_loss=0.261, cr_loss=0.4705, attn_decoder_loss=0.3134, over 29626.00 frames. ], tot_loss[loss=0.3196, ctc_loss=0.2653, cr_loss=0.4446, attn_decoder_loss=0.3158, over 5812326.93 frames. ], batch size: 86, lr: 3.12e-02, grad_scale: 8.0 +2024-09-16 17:28:58,243 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:29:10,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=45000.0, ans=0.09899494936611666 +2024-09-16 17:29:12,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=45040.0, ans=0.2 +2024-09-16 17:29:31,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=45080.0, ans=0.125 +2024-09-16 17:30:00,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=45160.0, ans=0.07 +2024-09-16 17:30:08,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=45160.0, ans=0.025 +2024-09-16 17:30:13,751 INFO [train.py:1198] (0/2) Epoch 3, batch 2250, loss[loss=0.3329, ctc_loss=0.2825, cr_loss=0.4666, attn_decoder_loss=0.3282, over 29683.00 frames. ], tot_loss[loss=0.3195, ctc_loss=0.2649, cr_loss=0.4445, attn_decoder_loss=0.3157, over 5812690.09 frames. ], batch size: 82, lr: 3.12e-02, grad_scale: 8.0 +2024-09-16 17:30:18,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=45200.0, ans=0.1 +2024-09-16 17:30:38,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=45240.0, ans=0.125 +2024-09-16 17:30:54,405 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.156e+02 1.411e+02 1.554e+02 1.919e+02 3.789e+02, threshold=3.108e+02, percent-clipped=3.0 +2024-09-16 17:31:06,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=45320.0, ans=0.0 +2024-09-16 17:31:08,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=45320.0, ans=0.125 +2024-09-16 17:31:11,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=45320.0, ans=0.2 +2024-09-16 17:31:27,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=45400.0, ans=0.2 +2024-09-16 17:31:29,225 INFO [train.py:1198] (0/2) Epoch 3, batch 2300, loss[loss=0.2872, ctc_loss=0.229, cr_loss=0.4102, attn_decoder_loss=0.2846, over 29760.00 frames. ], tot_loss[loss=0.3181, ctc_loss=0.2638, cr_loss=0.4426, attn_decoder_loss=0.3143, over 5802029.12 frames. ], batch size: 72, lr: 3.11e-02, grad_scale: 8.0 +2024-09-16 17:31:37,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=45400.0, ans=0.125 +2024-09-16 17:31:40,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-16 17:31:45,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=45440.0, ans=0.125 +2024-09-16 17:32:00,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=45480.0, ans=0.0009826086956521742 +2024-09-16 17:32:12,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=45480.0, ans=0.0 +2024-09-16 17:32:21,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=45520.0, ans=0.0009739130434782608 +2024-09-16 17:32:25,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.88 vs. limit=15.0 +2024-09-16 17:32:49,801 INFO [train.py:1198] (0/2) Epoch 3, batch 2350, loss[loss=0.3212, ctc_loss=0.2628, cr_loss=0.4403, attn_decoder_loss=0.3179, over 29681.00 frames. ], tot_loss[loss=0.3181, ctc_loss=0.2636, cr_loss=0.4427, attn_decoder_loss=0.3143, over 5806892.70 frames. ], batch size: 83, lr: 3.11e-02, grad_scale: 8.0 +2024-09-16 17:32:55,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.78 vs. limit=22.5 +2024-09-16 17:33:01,363 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.58 vs. limit=10.0 +2024-09-16 17:33:05,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=45640.0, ans=0.025 +2024-09-16 17:33:14,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=45640.0, ans=0.0 +2024-09-16 17:33:23,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=45680.0, ans=0.125 +2024-09-16 17:33:30,847 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.361e+02 1.552e+02 1.880e+02 4.928e+02, threshold=3.104e+02, percent-clipped=4.0 +2024-09-16 17:33:32,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=45680.0, ans=0.0 +2024-09-16 17:33:32,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=45680.0, ans=0.000939130434782609 +2024-09-16 17:34:06,304 INFO [train.py:1198] (0/2) Epoch 3, batch 2400, loss[loss=0.3066, ctc_loss=0.2475, cr_loss=0.4204, attn_decoder_loss=0.3038, over 29526.00 frames. ], tot_loss[loss=0.3187, ctc_loss=0.264, cr_loss=0.443, attn_decoder_loss=0.3149, over 5810364.30 frames. ], batch size: 76, lr: 3.10e-02, grad_scale: 16.0 +2024-09-16 17:34:09,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=45800.0, ans=0.2 +2024-09-16 17:34:12,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=45800.0, ans=0.125 +2024-09-16 17:34:21,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=45840.0, ans=0.125 +2024-09-16 17:34:52,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=45920.0, ans=0.0 +2024-09-16 17:34:56,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=45920.0, ans=0.2 +2024-09-16 17:34:58,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=45920.0, ans=0.0 +2024-09-16 17:35:10,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=45960.0, ans=0.2 +2024-09-16 17:35:22,220 INFO [train.py:1198] (0/2) Epoch 3, batch 2450, loss[loss=0.3338, ctc_loss=0.2822, cr_loss=0.4493, attn_decoder_loss=0.3295, over 29714.00 frames. ], tot_loss[loss=0.3201, ctc_loss=0.2659, cr_loss=0.4441, attn_decoder_loss=0.3163, over 5786454.51 frames. ], batch size: 82, lr: 3.10e-02, grad_scale: 8.0 +2024-09-16 17:35:22,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=46000.0, ans=0.2 +2024-09-16 17:35:26,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=46000.0, ans=0.125 +2024-09-16 17:35:35,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=46040.0, ans=0.125 +2024-09-16 17:36:05,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=46080.0, ans=0.2 +2024-09-16 17:36:06,480 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.121e+02 1.425e+02 1.645e+02 1.863e+02 7.632e+02, threshold=3.291e+02, percent-clipped=3.0 +2024-09-16 17:36:08,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=46120.0, ans=0.5 +2024-09-16 17:36:41,942 INFO [train.py:1198] (0/2) Epoch 3, batch 2500, loss[loss=0.3302, ctc_loss=0.2672, cr_loss=0.4715, attn_decoder_loss=0.3267, over 29641.00 frames. ], tot_loss[loss=0.3197, ctc_loss=0.2651, cr_loss=0.4441, attn_decoder_loss=0.3159, over 5796356.77 frames. ], batch size: 86, lr: 3.09e-02, grad_scale: 8.0 +2024-09-16 17:36:55,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=46240.0, ans=0.1 +2024-09-16 17:37:09,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=46240.0, ans=0.125 +2024-09-16 17:37:47,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=46360.0, ans=0.125 +2024-09-16 17:37:54,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.32 vs. limit=10.0 +2024-09-16 17:37:58,400 INFO [train.py:1198] (0/2) Epoch 3, batch 2550, loss[loss=0.2905, ctc_loss=0.2329, cr_loss=0.4199, attn_decoder_loss=0.2875, over 29316.00 frames. ], tot_loss[loss=0.3191, ctc_loss=0.2639, cr_loss=0.4441, attn_decoder_loss=0.3154, over 5799956.47 frames. ], batch size: 67, lr: 3.09e-02, grad_scale: 8.0 +2024-09-16 17:37:58,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=46400.0, ans=0.0 +2024-09-16 17:38:40,772 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.116e+02 1.310e+02 1.464e+02 1.728e+02 3.657e+02, threshold=2.928e+02, percent-clipped=2.0 +2024-09-16 17:38:48,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=46520.0, ans=0.1 +2024-09-16 17:39:14,460 INFO [train.py:1198] (0/2) Epoch 3, batch 2600, loss[loss=0.3002, ctc_loss=0.2346, cr_loss=0.4261, attn_decoder_loss=0.298, over 29457.00 frames. ], tot_loss[loss=0.3192, ctc_loss=0.2638, cr_loss=0.4443, attn_decoder_loss=0.3155, over 5796988.92 frames. ], batch size: 78, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:39:16,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=46600.0, ans=0.2 +2024-09-16 17:39:26,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.27 vs. limit=15.0 +2024-09-16 17:39:48,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=46680.0, ans=0.125 +2024-09-16 17:39:49,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=46680.0, ans=0.2 +2024-09-16 17:39:50,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.35 vs. limit=10.0 +2024-09-16 17:40:17,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=46760.0, ans=0.0007043478260869568 +2024-09-16 17:40:20,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=46760.0, ans=0.125 +2024-09-16 17:40:20,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=46760.0, ans=0.0007043478260869568 +2024-09-16 17:40:23,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=46760.0, ans=0.0 +2024-09-16 17:40:30,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=46760.0, ans=0.1 +2024-09-16 17:40:32,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=46800.0, ans=0.125 +2024-09-16 17:40:33,422 INFO [train.py:1198] (0/2) Epoch 3, batch 2650, loss[loss=0.3341, ctc_loss=0.2799, cr_loss=0.4581, attn_decoder_loss=0.33, over 29284.00 frames. ], tot_loss[loss=0.3198, ctc_loss=0.2641, cr_loss=0.445, attn_decoder_loss=0.316, over 5801625.70 frames. ], batch size: 100, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:40:37,389 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.10 vs. limit=15.0 +2024-09-16 17:41:12,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=46880.0, ans=0.125 +2024-09-16 17:41:14,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=46880.0, ans=0.125 +2024-09-16 17:41:15,674 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.367e+02 1.536e+02 1.778e+02 3.177e+02, threshold=3.072e+02, percent-clipped=2.0 +2024-09-16 17:41:17,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=46920.0, ans=0.000669565217391305 +2024-09-16 17:41:31,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=46920.0, ans=0.125 +2024-09-16 17:41:38,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=46960.0, ans=0.125 +2024-09-16 17:41:49,015 INFO [train.py:1198] (0/2) Epoch 3, batch 2700, loss[loss=0.3261, ctc_loss=0.2675, cr_loss=0.4583, attn_decoder_loss=0.3224, over 29507.00 frames. ], tot_loss[loss=0.3201, ctc_loss=0.2647, cr_loss=0.4451, attn_decoder_loss=0.3164, over 5797086.19 frames. ], batch size: 87, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:42:31,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=47080.0, ans=0.1 +2024-09-16 17:42:40,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.03 vs. limit=15.0 +2024-09-16 17:42:54,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=47160.0, ans=0.0 +2024-09-16 17:43:00,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=47160.0, ans=0.025 +2024-09-16 17:43:05,710 INFO [train.py:1198] (0/2) Epoch 3, batch 2750, loss[loss=0.3011, ctc_loss=0.2478, cr_loss=0.4171, attn_decoder_loss=0.2977, over 29503.00 frames. ], tot_loss[loss=0.3193, ctc_loss=0.2641, cr_loss=0.4442, attn_decoder_loss=0.3156, over 5796053.74 frames. ], batch size: 75, lr: 3.07e-02, grad_scale: 8.0 +2024-09-16 17:43:16,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=47200.0, ans=0.1 +2024-09-16 17:43:50,014 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.345e+02 1.531e+02 1.898e+02 4.354e+02, threshold=3.062e+02, percent-clipped=3.0 +2024-09-16 17:43:56,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=47320.0, ans=0.125 +2024-09-16 17:43:59,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=47320.0, ans=0.1 +2024-09-16 17:44:22,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=47360.0, ans=0.1 +2024-09-16 17:44:25,561 INFO [train.py:1198] (0/2) Epoch 3, batch 2800, loss[loss=0.3731, ctc_loss=0.3543, cr_loss=0.443, attn_decoder_loss=0.3653, over 20214.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2637, cr_loss=0.4439, attn_decoder_loss=0.3152, over 5776016.68 frames. ], batch size: 211, lr: 3.07e-02, grad_scale: 16.0 +2024-09-16 17:44:33,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=47400.0, ans=0.0005652173913043481 +2024-09-16 17:44:37,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=47400.0, ans=0.1 +2024-09-16 17:44:38,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=47400.0, ans=0.05 +2024-09-16 17:44:51,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=47440.0, ans=0.0005565217391304347 +2024-09-16 17:44:51,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.71 vs. limit=15.0 +2024-09-16 17:45:05,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=47480.0, ans=0.0005478260869565214 +2024-09-16 17:45:25,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=47560.0, ans=0.1 +2024-09-16 17:45:37,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.25 vs. limit=15.0 +2024-09-16 17:45:40,750 INFO [train.py:1198] (0/2) Epoch 3, batch 2850, loss[loss=0.3128, ctc_loss=0.2555, cr_loss=0.4513, attn_decoder_loss=0.3091, over 29522.00 frames. ], tot_loss[loss=0.3203, ctc_loss=0.2654, cr_loss=0.4455, attn_decoder_loss=0.3165, over 5762291.85 frames. ], batch size: 77, lr: 3.06e-02, grad_scale: 8.0 +2024-09-16 17:46:05,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=47640.0, ans=0.125 +2024-09-16 17:46:06,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.97 vs. limit=15.0 +2024-09-16 17:46:25,030 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.134e+02 1.383e+02 1.687e+02 2.154e+02 5.154e+02, threshold=3.374e+02, percent-clipped=7.0 +2024-09-16 17:46:25,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=47720.0, ans=0.1 +2024-09-16 17:46:53,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=47760.0, ans=0.0004869565217391295 +2024-09-16 17:46:56,743 INFO [train.py:1198] (0/2) Epoch 3, batch 2900, loss[loss=0.3049, ctc_loss=0.2399, cr_loss=0.4128, attn_decoder_loss=0.3029, over 29408.00 frames. ], tot_loss[loss=0.3211, ctc_loss=0.2653, cr_loss=0.4463, attn_decoder_loss=0.3173, over 5788316.25 frames. ], batch size: 79, lr: 3.06e-02, grad_scale: 8.0 +2024-09-16 17:47:03,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=47800.0, ans=0.125 +2024-09-16 17:47:03,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=47800.0, ans=0.125 +2024-09-16 17:47:29,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=47880.0, ans=0.125 +2024-09-16 17:47:31,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=47880.0, ans=0.2 +2024-09-16 17:47:34,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=47880.0, ans=0.125 +2024-09-16 17:47:40,893 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.31 vs. limit=15.0 +2024-09-16 17:48:15,954 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-12000.pt +2024-09-16 17:48:24,250 INFO [train.py:1198] (0/2) Epoch 3, batch 2950, loss[loss=0.2902, ctc_loss=0.2205, cr_loss=0.3956, attn_decoder_loss=0.2891, over 29509.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2632, cr_loss=0.4441, attn_decoder_loss=0.3153, over 5782537.12 frames. ], batch size: 75, lr: 3.05e-02, grad_scale: 8.0 +2024-09-16 17:48:44,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=48040.0, ans=0.035 +2024-09-16 17:48:50,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.54 vs. limit=6.0 +2024-09-16 17:48:53,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=48080.0, ans=0.125 +2024-09-16 17:49:08,105 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.130e+02 1.336e+02 1.504e+02 1.810e+02 3.679e+02, threshold=3.009e+02, percent-clipped=1.0 +2024-09-16 17:49:23,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=48160.0, ans=0.1 +2024-09-16 17:49:40,405 INFO [train.py:1198] (0/2) Epoch 3, batch 3000, loss[loss=0.3272, ctc_loss=0.275, cr_loss=0.4518, attn_decoder_loss=0.323, over 29748.00 frames. ], tot_loss[loss=0.3184, ctc_loss=0.2629, cr_loss=0.4443, attn_decoder_loss=0.3147, over 5782926.65 frames. ], batch size: 81, lr: 3.05e-02, grad_scale: 8.0 +2024-09-16 17:49:40,406 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 17:49:58,748 INFO [train.py:1230] (0/2) Epoch 3, validation: loss=0.2335, ctc_loss=0.0936, cr_loss=4.436e-15, attn_decoder_loss=0.2491, over 944034.00 frames. +2024-09-16 17:49:58,749 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 17:50:15,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=48240.0, ans=0.0003826086956521726 +2024-09-16 17:50:46,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=48320.0, ans=0.1 +2024-09-16 17:50:52,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=48320.0, ans=0.0 +2024-09-16 17:50:56,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=48320.0, ans=0.125 +2024-09-16 17:51:05,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.85 vs. limit=6.0 +2024-09-16 17:51:08,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=48360.0, ans=0.0 +2024-09-16 17:51:16,894 INFO [train.py:1198] (0/2) Epoch 3, batch 3050, loss[loss=0.3111, ctc_loss=0.2508, cr_loss=0.4676, attn_decoder_loss=0.3074, over 29536.00 frames. ], tot_loss[loss=0.3198, ctc_loss=0.2643, cr_loss=0.4461, attn_decoder_loss=0.316, over 5776114.62 frames. ], batch size: 76, lr: 3.04e-02, grad_scale: 4.0 +2024-09-16 17:51:24,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=48400.0, ans=0.0003478260869565226 +2024-09-16 17:51:33,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.72 vs. limit=12.0 +2024-09-16 17:51:35,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=48440.0, ans=0.0003391304347826075 +2024-09-16 17:51:39,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=48440.0, ans=0.125 +2024-09-16 17:51:47,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=48480.0, ans=0.125 +2024-09-16 17:51:53,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=48480.0, ans=0.125 +2024-09-16 17:51:56,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=48480.0, ans=0.125 +2024-09-16 17:52:04,208 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.201e+02 1.405e+02 1.578e+02 1.940e+02 5.924e+02, threshold=3.157e+02, percent-clipped=5.0 +2024-09-16 17:52:33,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=48600.0, ans=0.025 +2024-09-16 17:52:34,211 INFO [train.py:1198] (0/2) Epoch 3, batch 3100, loss[loss=0.3238, ctc_loss=0.2694, cr_loss=0.4509, attn_decoder_loss=0.3198, over 29232.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2634, cr_loss=0.4447, attn_decoder_loss=0.3152, over 5775200.84 frames. ], batch size: 100, lr: 3.04e-02, grad_scale: 8.0 +2024-09-16 17:52:39,778 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.64 vs. limit=12.0 +2024-09-16 17:52:45,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=48600.0, ans=0.05 +2024-09-16 17:52:59,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=48640.0, ans=0.125 +2024-09-16 17:53:13,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=48680.0, ans=0.125 +2024-09-16 17:53:30,959 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.81 vs. limit=6.0 +2024-09-16 17:53:36,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=48760.0, ans=0.125 +2024-09-16 17:53:49,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=48800.0, ans=0.00026086956521739237 +2024-09-16 17:53:50,244 INFO [train.py:1198] (0/2) Epoch 3, batch 3150, loss[loss=0.3363, ctc_loss=0.278, cr_loss=0.4799, attn_decoder_loss=0.3321, over 28903.00 frames. ], tot_loss[loss=0.3188, ctc_loss=0.2631, cr_loss=0.4451, attn_decoder_loss=0.3151, over 5782384.22 frames. ], batch size: 104, lr: 3.03e-02, grad_scale: 8.0 +2024-09-16 17:54:35,637 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.107e+02 1.334e+02 1.533e+02 1.776e+02 7.773e+02, threshold=3.065e+02, percent-clipped=4.0 +2024-09-16 17:55:00,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=48960.0, ans=0.025 +2024-09-16 17:55:07,983 INFO [train.py:1198] (0/2) Epoch 3, batch 3200, loss[loss=0.313, ctc_loss=0.2542, cr_loss=0.451, attn_decoder_loss=0.3095, over 29406.00 frames. ], tot_loss[loss=0.3174, ctc_loss=0.2612, cr_loss=0.4436, attn_decoder_loss=0.3138, over 5792687.84 frames. ], batch size: 79, lr: 3.03e-02, grad_scale: 16.0 +2024-09-16 17:55:12,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.80 vs. limit=22.5 +2024-09-16 17:55:20,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=49000.0, ans=0.125 +2024-09-16 17:55:35,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.84 vs. limit=15.0 +2024-09-16 17:55:37,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=49040.0, ans=0.00020869565217391216 +2024-09-16 17:55:39,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=49080.0, ans=0.125 +2024-09-16 17:55:57,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=49120.0, ans=0.125 +2024-09-16 17:56:13,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.77 vs. limit=15.0 +2024-09-16 17:56:19,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.21 vs. limit=12.0 +2024-09-16 17:56:26,022 INFO [train.py:1198] (0/2) Epoch 3, batch 3250, loss[loss=0.331, ctc_loss=0.275, cr_loss=0.4423, attn_decoder_loss=0.3274, over 29703.00 frames. ], tot_loss[loss=0.3182, ctc_loss=0.2619, cr_loss=0.4453, attn_decoder_loss=0.3146, over 5798822.30 frames. ], batch size: 84, lr: 3.03e-02, grad_scale: 8.0 +2024-09-16 17:56:53,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=49240.0, ans=0.125 +2024-09-16 17:57:03,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=49280.0, ans=0.1 +2024-09-16 17:57:06,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=49280.0, ans=0.125 +2024-09-16 17:57:12,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=12.0 +2024-09-16 17:57:12,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.316e+02 1.449e+02 1.854e+02 6.916e+02, threshold=2.898e+02, percent-clipped=2.0 +2024-09-16 17:57:20,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=49320.0, ans=0.125 +2024-09-16 17:57:38,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=49360.0, ans=0.125 +2024-09-16 17:57:41,277 INFO [train.py:1198] (0/2) Epoch 3, batch 3300, loss[loss=0.3229, ctc_loss=0.2596, cr_loss=0.4433, attn_decoder_loss=0.3201, over 28218.00 frames. ], tot_loss[loss=0.3164, ctc_loss=0.26, cr_loss=0.4433, attn_decoder_loss=0.3128, over 5796063.55 frames. ], batch size: 111, lr: 3.02e-02, grad_scale: 8.0 +2024-09-16 17:57:57,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=49440.0, ans=0.125 +2024-09-16 17:58:01,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=49440.0, ans=0.1 +2024-09-16 17:58:11,274 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.22 vs. limit=22.5 +2024-09-16 17:58:16,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=49480.0, ans=0.0 +2024-09-16 17:58:33,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=49520.0, ans=0.00010434782608695695 +2024-09-16 17:58:34,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=49520.0, ans=0.0 +2024-09-16 17:58:40,897 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:58:42,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=49560.0, ans=0.125 +2024-09-16 17:58:59,515 INFO [train.py:1198] (0/2) Epoch 3, batch 3350, loss[loss=0.332, ctc_loss=0.2781, cr_loss=0.4762, attn_decoder_loss=0.3275, over 28848.00 frames. ], tot_loss[loss=0.3172, ctc_loss=0.2611, cr_loss=0.4443, attn_decoder_loss=0.3135, over 5771656.67 frames. ], batch size: 104, lr: 3.02e-02, grad_scale: 8.0 +2024-09-16 17:58:59,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=49600.0, ans=0.09899494936611666 +2024-09-16 17:59:02,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=49600.0, ans=0.125 +2024-09-16 17:59:09,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=49600.0, ans=0.125 +2024-09-16 17:59:17,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=49640.0, ans=10.0 +2024-09-16 17:59:21,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=49640.0, ans=0.025 +2024-09-16 17:59:48,722 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.019e+02 1.330e+02 1.460e+02 1.779e+02 4.186e+02, threshold=2.920e+02, percent-clipped=7.0 +2024-09-16 18:00:11,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=49760.0, ans=0.125 +2024-09-16 18:00:17,599 INFO [train.py:1198] (0/2) Epoch 3, batch 3400, loss[loss=0.2795, ctc_loss=0.2308, cr_loss=0.3804, attn_decoder_loss=0.2765, over 29294.00 frames. ], tot_loss[loss=0.3171, ctc_loss=0.2613, cr_loss=0.4446, attn_decoder_loss=0.3134, over 5763242.57 frames. ], batch size: 67, lr: 3.01e-02, grad_scale: 8.0 +2024-09-16 18:00:26,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=49800.0, ans=0.5 +2024-09-16 18:00:28,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=49800.0, ans=0.1 +2024-09-16 18:00:36,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.94 vs. limit=15.0 +2024-09-16 18:00:40,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=49840.0, ans=0.125 +2024-09-16 18:00:48,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=49880.0, ans=0.125 +2024-09-16 18:01:00,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.99 vs. limit=6.0 +2024-09-16 18:01:06,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=49920.0, ans=1.7391304347826736e-05 +2024-09-16 18:01:10,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=49920.0, ans=0.1 +2024-09-16 18:01:33,057 INFO [train.py:1198] (0/2) Epoch 3, batch 3450, loss[loss=0.3349, ctc_loss=0.2822, cr_loss=0.4925, attn_decoder_loss=0.3298, over 28149.00 frames. ], tot_loss[loss=0.3173, ctc_loss=0.2613, cr_loss=0.4447, attn_decoder_loss=0.3136, over 5772163.54 frames. ], batch size: 111, lr: 3.01e-02, grad_scale: 8.0 +2024-09-16 18:01:48,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=50040.0, ans=0.125 +2024-09-16 18:02:19,802 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.075e+02 1.389e+02 1.591e+02 1.812e+02 6.127e+02, threshold=3.183e+02, percent-clipped=1.0 +2024-09-16 18:02:23,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.66 vs. limit=12.0 +2024-09-16 18:02:30,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=50120.0, ans=0.125 +2024-09-16 18:02:40,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.02 vs. limit=12.0 +2024-09-16 18:02:50,661 INFO [train.py:1198] (0/2) Epoch 3, batch 3500, loss[loss=0.2893, ctc_loss=0.2389, cr_loss=0.4108, attn_decoder_loss=0.2858, over 29338.00 frames. ], tot_loss[loss=0.3165, ctc_loss=0.2607, cr_loss=0.4441, attn_decoder_loss=0.3128, over 5773820.44 frames. ], batch size: 71, lr: 3.00e-02, grad_scale: 8.0 +2024-09-16 18:02:59,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=50200.0, ans=0.1 +2024-09-16 18:03:09,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=50240.0, ans=0.125 +2024-09-16 18:03:14,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=50240.0, ans=0.1 +2024-09-16 18:03:24,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=50280.0, ans=0.125 +2024-09-16 18:03:43,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=50320.0, ans=0.0 +2024-09-16 18:03:45,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=50320.0, ans=0.125 +2024-09-16 18:04:07,558 INFO [train.py:1198] (0/2) Epoch 3, batch 3550, loss[loss=0.3245, ctc_loss=0.2646, cr_loss=0.4458, attn_decoder_loss=0.3213, over 29722.00 frames. ], tot_loss[loss=0.3163, ctc_loss=0.2602, cr_loss=0.4449, attn_decoder_loss=0.3126, over 5781062.76 frames. ], batch size: 89, lr: 3.00e-02, grad_scale: 4.0 +2024-09-16 18:04:26,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=50440.0, ans=0.2 +2024-09-16 18:04:43,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=50480.0, ans=0.125 +2024-09-16 18:04:55,143 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.129e+02 1.391e+02 1.610e+02 2.091e+02 4.528e+02, threshold=3.220e+02, percent-clipped=5.0 +2024-09-16 18:05:19,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.61 vs. limit=15.0 +2024-09-16 18:05:20,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.01 vs. limit=10.0 +2024-09-16 18:05:21,622 INFO [train.py:1198] (0/2) Epoch 3, batch 3600, loss[loss=0.3018, ctc_loss=0.2434, cr_loss=0.4045, attn_decoder_loss=0.2993, over 29483.00 frames. ], tot_loss[loss=0.3164, ctc_loss=0.26, cr_loss=0.4448, attn_decoder_loss=0.3128, over 5791060.05 frames. ], batch size: 77, lr: 2.99e-02, grad_scale: 8.0 +2024-09-16 18:05:44,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=50640.0, ans=0.0 +2024-09-16 18:05:54,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=50680.0, ans=0.1 +2024-09-16 18:06:17,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.77 vs. limit=22.5 +2024-09-16 18:06:20,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.93 vs. limit=15.0 +2024-09-16 18:06:27,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=50760.0, ans=0.0 +2024-09-16 18:06:35,940 INFO [train.py:1198] (0/2) Epoch 3, batch 3650, loss[loss=0.3398, ctc_loss=0.2864, cr_loss=0.4632, attn_decoder_loss=0.3354, over 29514.00 frames. ], tot_loss[loss=0.3156, ctc_loss=0.2591, cr_loss=0.4437, attn_decoder_loss=0.312, over 5793138.36 frames. ], batch size: 90, lr: 2.99e-02, grad_scale: 4.0 +2024-09-16 18:06:54,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=50840.0, ans=0.125 +2024-09-16 18:07:01,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=50840.0, ans=0.125 +2024-09-16 18:07:03,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=50840.0, ans=0.09899494936611666 +2024-09-16 18:07:07,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=50880.0, ans=0.125 +2024-09-16 18:07:25,480 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.018e+02 1.262e+02 1.447e+02 1.690e+02 1.332e+03, threshold=2.894e+02, percent-clipped=3.0 +2024-09-16 18:07:31,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=50920.0, ans=0.125 +2024-09-16 18:07:36,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=50960.0, ans=0.125 +2024-09-16 18:07:39,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=50960.0, ans=0.2 +2024-09-16 18:07:45,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=50960.0, ans=0.0 +2024-09-16 18:07:50,880 INFO [train.py:1198] (0/2) Epoch 3, batch 3700, loss[loss=0.3445, ctc_loss=0.2953, cr_loss=0.4958, attn_decoder_loss=0.3389, over 29705.00 frames. ], tot_loss[loss=0.3161, ctc_loss=0.2592, cr_loss=0.445, attn_decoder_loss=0.3125, over 5803511.77 frames. ], batch size: 84, lr: 2.99e-02, grad_scale: 8.0 +2024-09-16 18:08:03,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=51000.0, ans=0.125 +2024-09-16 18:08:12,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=51040.0, ans=0.2 +2024-09-16 18:08:22,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=51080.0, ans=0.125 +2024-09-16 18:08:37,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=51120.0, ans=0.125 +2024-09-16 18:08:46,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=51120.0, ans=0.04949747468305833 +2024-09-16 18:08:52,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=51160.0, ans=0.125 +2024-09-16 18:09:09,219 INFO [train.py:1198] (0/2) Epoch 3, batch 3750, loss[loss=0.274, ctc_loss=0.2208, cr_loss=0.3941, attn_decoder_loss=0.2712, over 29336.00 frames. ], tot_loss[loss=0.3155, ctc_loss=0.2585, cr_loss=0.4441, attn_decoder_loss=0.3119, over 5807393.49 frames. ], batch size: 67, lr: 2.98e-02, grad_scale: 8.0 +2024-09-16 18:09:11,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=51200.0, ans=0.0 +2024-09-16 18:09:15,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=51200.0, ans=0.125 +2024-09-16 18:09:26,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=51240.0, ans=0.0 +2024-09-16 18:09:40,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=51280.0, ans=0.125 +2024-09-16 18:09:40,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=51280.0, ans=0.0 +2024-09-16 18:09:50,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.88 vs. limit=6.0 +2024-09-16 18:09:52,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=51320.0, ans=0.025 +2024-09-16 18:09:58,526 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.044e+02 1.287e+02 1.522e+02 1.821e+02 1.090e+03, threshold=3.043e+02, percent-clipped=9.0 +2024-09-16 18:09:58,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=51320.0, ans=0.0 +2024-09-16 18:10:16,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=51360.0, ans=0.0 +2024-09-16 18:10:21,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=51360.0, ans=0.125 +2024-09-16 18:10:21,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.70 vs. limit=15.0 +2024-09-16 18:10:23,884 INFO [train.py:1198] (0/2) Epoch 3, batch 3800, loss[loss=0.3125, ctc_loss=0.2468, cr_loss=0.4355, attn_decoder_loss=0.3101, over 29632.00 frames. ], tot_loss[loss=0.3152, ctc_loss=0.2583, cr_loss=0.4438, attn_decoder_loss=0.3117, over 5798364.09 frames. ], batch size: 86, lr: 2.98e-02, grad_scale: 8.0 +2024-09-16 18:10:27,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=51400.0, ans=0.0 +2024-09-16 18:10:27,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=51400.0, ans=0.125 +2024-09-16 18:10:28,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=51400.0, ans=0.0 +2024-09-16 18:10:33,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=26.04 vs. limit=22.5 +2024-09-16 18:10:41,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=51440.0, ans=0.125 +2024-09-16 18:11:06,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.52 vs. limit=15.0 +2024-09-16 18:11:18,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.84 vs. limit=22.5 +2024-09-16 18:11:38,187 INFO [train.py:1198] (0/2) Epoch 3, batch 3850, loss[loss=0.3356, ctc_loss=0.278, cr_loss=0.4499, attn_decoder_loss=0.332, over 29205.00 frames. ], tot_loss[loss=0.315, ctc_loss=0.2573, cr_loss=0.4433, attn_decoder_loss=0.3116, over 5811381.97 frames. ], batch size: 100, lr: 2.97e-02, grad_scale: 8.0 +2024-09-16 18:11:38,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=51600.0, ans=0.1 +2024-09-16 18:11:40,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=51600.0, ans=0.125 +2024-09-16 18:11:46,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.14 vs. limit=22.5 +2024-09-16 18:11:48,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=51600.0, ans=0.2 +2024-09-16 18:11:53,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=51640.0, ans=0.125 +2024-09-16 18:12:03,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=51640.0, ans=0.125 +2024-09-16 18:12:18,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=51680.0, ans=0.025 +2024-09-16 18:12:20,128 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:12:27,164 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.157e+02 1.321e+02 1.509e+02 1.752e+02 3.872e+02, threshold=3.018e+02, percent-clipped=1.0 +2024-09-16 18:12:36,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_na.min_abs, batch_count=51760.0, ans=0.02 +2024-09-16 18:12:46,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=51760.0, ans=0.0 +2024-09-16 18:12:48,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=51760.0, ans=0.2 +2024-09-16 18:12:48,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.03 vs. limit=12.0 +2024-09-16 18:12:52,660 INFO [train.py:1198] (0/2) Epoch 3, batch 3900, loss[loss=0.3295, ctc_loss=0.2584, cr_loss=0.4622, attn_decoder_loss=0.3271, over 29630.00 frames. ], tot_loss[loss=0.3157, ctc_loss=0.2578, cr_loss=0.4438, attn_decoder_loss=0.3123, over 5815890.39 frames. ], batch size: 86, lr: 2.97e-02, grad_scale: 8.0 +2024-09-16 18:12:55,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=51800.0, ans=0.125 +2024-09-16 18:13:00,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=51800.0, ans=0.0 +2024-09-16 18:13:06,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.75 vs. limit=22.5 +2024-09-16 18:13:07,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=51840.0, ans=0.07 +2024-09-16 18:13:17,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=51840.0, ans=0.5 +2024-09-16 18:13:24,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=13.74 vs. limit=15.0 +2024-09-16 18:13:35,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=51920.0, ans=0.125 +2024-09-16 18:13:41,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=51920.0, ans=0.125 +2024-09-16 18:13:54,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=51960.0, ans=10.0 +2024-09-16 18:13:54,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=51960.0, ans=0.125 +2024-09-16 18:14:06,784 INFO [train.py:1198] (0/2) Epoch 3, batch 3950, loss[loss=0.3363, ctc_loss=0.2787, cr_loss=0.4589, attn_decoder_loss=0.3326, over 29505.00 frames. ], tot_loss[loss=0.3149, ctc_loss=0.2565, cr_loss=0.4432, attn_decoder_loss=0.3116, over 5834997.99 frames. ], batch size: 97, lr: 2.96e-02, grad_scale: 8.0 +2024-09-16 18:14:17,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.86 vs. limit=6.0 +2024-09-16 18:14:32,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=52040.0, ans=0.125 +2024-09-16 18:14:45,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=52080.0, ans=0.0 +2024-09-16 18:14:51,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.22 vs. limit=22.5 +2024-09-16 18:14:53,979 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:14:58,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.019e+02 1.359e+02 1.491e+02 1.794e+02 3.719e+02, threshold=2.982e+02, percent-clipped=2.0 +2024-09-16 18:15:22,983 INFO [train.py:1198] (0/2) Epoch 3, batch 4000, loss[loss=0.3007, ctc_loss=0.247, cr_loss=0.4411, attn_decoder_loss=0.2969, over 29511.00 frames. ], tot_loss[loss=0.3153, ctc_loss=0.2575, cr_loss=0.4444, attn_decoder_loss=0.3118, over 5811136.35 frames. ], batch size: 74, lr: 2.96e-02, grad_scale: 16.0 +2024-09-16 18:15:48,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=52240.0, ans=0.1 +2024-09-16 18:16:06,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=52320.0, ans=0.125 +2024-09-16 18:16:19,039 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.46 vs. limit=10.0 +2024-09-16 18:16:36,968 INFO [train.py:1198] (0/2) Epoch 3, batch 4050, loss[loss=0.3511, ctc_loss=0.3282, cr_loss=0.4782, attn_decoder_loss=0.343, over 20420.00 frames. ], tot_loss[loss=0.3148, ctc_loss=0.2569, cr_loss=0.444, attn_decoder_loss=0.3114, over 5795250.29 frames. ], batch size: 209, lr: 2.96e-02, grad_scale: 4.0 +2024-09-16 18:16:37,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.99 vs. limit=15.0 +2024-09-16 18:16:47,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.57 vs. limit=15.0 +2024-09-16 18:16:57,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=52440.0, ans=0.0 +2024-09-16 18:16:58,247 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.42 vs. limit=15.0 +2024-09-16 18:17:09,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=52480.0, ans=0.2 +2024-09-16 18:17:17,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=52480.0, ans=0.125 +2024-09-16 18:17:22,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=52520.0, ans=0.0 +2024-09-16 18:17:28,049 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.077e+02 1.348e+02 1.567e+02 1.841e+02 9.373e+02, threshold=3.134e+02, percent-clipped=5.0 +2024-09-16 18:17:31,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=52520.0, ans=0.0 +2024-09-16 18:17:34,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=52560.0, ans=0.1 +2024-09-16 18:17:45,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=52560.0, ans=0.0 +2024-09-16 18:17:50,222 INFO [train.py:1198] (0/2) Epoch 3, batch 4100, loss[loss=0.3338, ctc_loss=0.274, cr_loss=0.4589, attn_decoder_loss=0.3303, over 29510.00 frames. ], tot_loss[loss=0.3149, ctc_loss=0.2572, cr_loss=0.4437, attn_decoder_loss=0.3114, over 5791158.72 frames. ], batch size: 90, lr: 2.95e-02, grad_scale: 8.0 +2024-09-16 18:17:59,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=52600.0, ans=0.125 +2024-09-16 18:17:59,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=52600.0, ans=0.125 +2024-09-16 18:18:05,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=52640.0, ans=0.125 +2024-09-16 18:18:10,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.27 vs. limit=22.5 +2024-09-16 18:18:18,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=52680.0, ans=0.125 +2024-09-16 18:18:22,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=52680.0, ans=0.125 +2024-09-16 18:18:27,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=52680.0, ans=0.1 +2024-09-16 18:18:37,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=52720.0, ans=0.125 +2024-09-16 18:19:06,556 INFO [train.py:1198] (0/2) Epoch 3, batch 4150, loss[loss=0.304, ctc_loss=0.2463, cr_loss=0.4581, attn_decoder_loss=0.3002, over 29508.00 frames. ], tot_loss[loss=0.3143, ctc_loss=0.2567, cr_loss=0.4432, attn_decoder_loss=0.3109, over 5796541.51 frames. ], batch size: 77, lr: 2.95e-02, grad_scale: 4.0 +2024-09-16 18:19:09,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=52800.0, ans=0.125 +2024-09-16 18:19:19,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.56 vs. limit=15.0 +2024-09-16 18:19:24,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=52840.0, ans=0.125 +2024-09-16 18:19:42,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=52880.0, ans=0.125 +2024-09-16 18:19:55,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=52920.0, ans=0.125 +2024-09-16 18:19:59,721 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.278e+02 1.455e+02 1.672e+02 3.435e+02, threshold=2.910e+02, percent-clipped=1.0 +2024-09-16 18:20:13,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=52960.0, ans=0.125 +2024-09-16 18:20:16,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=52960.0, ans=0.125 +2024-09-16 18:20:20,386 INFO [train.py:1198] (0/2) Epoch 3, batch 4200, loss[loss=0.3248, ctc_loss=0.265, cr_loss=0.4751, attn_decoder_loss=0.3209, over 29480.00 frames. ], tot_loss[loss=0.3148, ctc_loss=0.257, cr_loss=0.4442, attn_decoder_loss=0.3113, over 5798866.17 frames. ], batch size: 90, lr: 2.94e-02, grad_scale: 8.0 +2024-09-16 18:20:23,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=53000.0, ans=0.025 +2024-09-16 18:20:27,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.14 vs. limit=10.0 +2024-09-16 18:20:35,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=53040.0, ans=0.0 +2024-09-16 18:20:37,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=25.82 vs. limit=22.5 +2024-09-16 18:20:39,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=53040.0, ans=0.2 +2024-09-16 18:20:50,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=53080.0, ans=0.125 +2024-09-16 18:20:50,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=53080.0, ans=0.025 +2024-09-16 18:20:51,746 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=2.554e-03 +2024-09-16 18:21:07,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=53120.0, ans=0.1 +2024-09-16 18:21:08,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.98 vs. limit=15.0 +2024-09-16 18:21:08,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.86 vs. limit=6.0 +2024-09-16 18:21:13,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.60 vs. limit=15.0 +2024-09-16 18:21:20,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.16 vs. limit=22.5 +2024-09-16 18:21:21,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=53160.0, ans=0.0 +2024-09-16 18:21:25,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=53160.0, ans=0.025 +2024-09-16 18:21:34,071 INFO [train.py:1198] (0/2) Epoch 3, batch 4250, loss[loss=0.2944, ctc_loss=0.2298, cr_loss=0.4204, attn_decoder_loss=0.2923, over 29521.00 frames. ], tot_loss[loss=0.3149, ctc_loss=0.2569, cr_loss=0.4442, attn_decoder_loss=0.3115, over 5804715.29 frames. ], batch size: 74, lr: 2.94e-02, grad_scale: 4.0 +2024-09-16 18:21:35,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=53200.0, ans=0.0 +2024-09-16 18:21:35,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=53200.0, ans=0.2 +2024-09-16 18:21:57,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=53240.0, ans=0.09899494936611666 +2024-09-16 18:22:29,124 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.928e+01 1.354e+02 1.567e+02 1.958e+02 1.183e+03, threshold=3.135e+02, percent-clipped=4.0 +2024-09-16 18:22:32,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=53360.0, ans=0.07 +2024-09-16 18:22:38,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=53360.0, ans=0.125 +2024-09-16 18:22:49,063 INFO [train.py:1198] (0/2) Epoch 3, batch 4300, loss[loss=0.326, ctc_loss=0.265, cr_loss=0.4558, attn_decoder_loss=0.3226, over 29535.00 frames. ], tot_loss[loss=0.3151, ctc_loss=0.257, cr_loss=0.4432, attn_decoder_loss=0.3117, over 5794553.15 frames. ], batch size: 87, lr: 2.93e-02, grad_scale: 8.0 +2024-09-16 18:22:49,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=53400.0, ans=0.2 +2024-09-16 18:22:57,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.51 vs. limit=15.0 +2024-09-16 18:23:15,672 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.42 vs. limit=15.0 +2024-09-16 18:24:03,903 INFO [train.py:1198] (0/2) Epoch 3, batch 4350, loss[loss=0.3419, ctc_loss=0.2782, cr_loss=0.4758, attn_decoder_loss=0.3384, over 29489.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2605, cr_loss=0.4481, attn_decoder_loss=0.3155, over 5797962.73 frames. ], batch size: 97, lr: 2.93e-02, grad_scale: 4.0 +2024-09-16 18:24:13,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.26 vs. limit=6.0 +2024-09-16 18:24:14,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=53600.0, ans=0.125 +2024-09-16 18:24:33,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=53680.0, ans=0.0 +2024-09-16 18:24:36,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=53680.0, ans=0.05 +2024-09-16 18:24:58,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.62 vs. limit=12.0 +2024-09-16 18:24:59,250 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.313e+02 1.497e+02 1.843e+02 5.151e+02, threshold=2.995e+02, percent-clipped=3.0 +2024-09-16 18:25:07,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=53760.0, ans=0.2 +2024-09-16 18:25:13,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=53760.0, ans=0.125 +2024-09-16 18:25:13,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=53760.0, ans=10.0 +2024-09-16 18:25:14,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=53760.0, ans=0.125 +2024-09-16 18:25:16,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=53800.0, ans=0.0 +2024-09-16 18:25:17,595 INFO [train.py:1198] (0/2) Epoch 3, batch 4400, loss[loss=0.3237, ctc_loss=0.2698, cr_loss=0.443, attn_decoder_loss=0.3198, over 27290.00 frames. ], tot_loss[loss=0.3221, ctc_loss=0.2642, cr_loss=0.4515, attn_decoder_loss=0.3185, over 5769184.62 frames. ], batch size: 124, lr: 2.93e-02, grad_scale: 8.0 +2024-09-16 18:25:17,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=53800.0, ans=0.0 +2024-09-16 18:25:20,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=53800.0, ans=0.125 +2024-09-16 18:25:20,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=53800.0, ans=0.0 +2024-09-16 18:25:46,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.54 vs. limit=15.0 +2024-09-16 18:26:07,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.44 vs. limit=15.0 +2024-09-16 18:26:08,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=53920.0, ans=0.125 +2024-09-16 18:26:20,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=53960.0, ans=0.1 +2024-09-16 18:26:31,927 INFO [train.py:1198] (0/2) Epoch 3, batch 4450, loss[loss=0.3666, ctc_loss=0.3487, cr_loss=0.4888, attn_decoder_loss=0.3577, over 19400.00 frames. ], tot_loss[loss=0.326, ctc_loss=0.2711, cr_loss=0.4537, attn_decoder_loss=0.322, over 5580234.61 frames. ], batch size: 210, lr: 2.92e-02, grad_scale: 8.0 +2024-09-16 18:26:36,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=54000.0, ans=0.125 +2024-09-16 18:26:53,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=54040.0, ans=0.125 +2024-09-16 18:27:29,246 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.938e+01 1.292e+02 1.431e+02 1.663e+02 2.911e+02, threshold=2.863e+02, percent-clipped=0.0 +2024-09-16 18:27:29,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=54120.0, ans=0.2 +2024-09-16 18:27:35,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=54160.0, ans=0.0 +2024-09-16 18:27:47,157 INFO [train.py:1198] (0/2) Epoch 3, batch 4500, loss[loss=0.3341, ctc_loss=0.2913, cr_loss=0.4512, attn_decoder_loss=0.3288, over 20270.00 frames. ], tot_loss[loss=0.331, ctc_loss=0.2811, cr_loss=0.4549, attn_decoder_loss=0.3265, over 5235745.44 frames. ], batch size: 209, lr: 2.92e-02, grad_scale: 8.0 +2024-09-16 18:27:54,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=54200.0, ans=0.0 +2024-09-16 18:28:20,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.77 vs. limit=22.5 +2024-09-16 18:28:24,120 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-3.pt +2024-09-16 18:29:13,306 INFO [train.py:1198] (0/2) Epoch 4, batch 0, loss[loss=0.405, ctc_loss=0.257, cr_loss=0.4284, attn_decoder_loss=0.4119, over 29605.00 frames. ], tot_loss[loss=0.405, ctc_loss=0.257, cr_loss=0.4284, attn_decoder_loss=0.4119, over 29605.00 frames. ], batch size: 73, lr: 2.73e-02, grad_scale: 4.0 +2024-09-16 18:29:13,307 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 18:29:31,681 INFO [train.py:1230] (0/2) Epoch 4, validation: loss=0.259, ctc_loss=0.0933, cr_loss=4.939e-15, attn_decoder_loss=0.2774, over 944034.00 frames. +2024-09-16 18:29:31,681 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 18:29:44,452 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:29:47,067 WARNING [optim.py:503] (0/2) Scaling gradients by 0.06680610030889511, model_norm_threshold=286.2942810058594 +2024-09-16 18:29:47,270 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.self_attn.linear_k.weight with proportion 0.28, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=5.084e+06, grad_sumsq=4.710e+06, orig_rms_sq=1.079e+00 +2024-09-16 18:29:49,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.17 vs. limit=22.5 +2024-09-16 18:29:52,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=54340.0, ans=0.07 +2024-09-16 18:29:55,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=25.44 vs. limit=22.5 +2024-09-16 18:30:06,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=54380.0, ans=0.5 +2024-09-16 18:30:07,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=54380.0, ans=0.125 +2024-09-16 18:30:18,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=54420.0, ans=0.125 +2024-09-16 18:30:39,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=29.39 vs. limit=22.5 +2024-09-16 18:30:46,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=54460.0, ans=0.0 +2024-09-16 18:30:48,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=54500.0, ans=0.125 +2024-09-16 18:30:51,642 INFO [train.py:1198] (0/2) Epoch 4, batch 50, loss[loss=0.2711, ctc_loss=0.2144, cr_loss=0.3672, attn_decoder_loss=0.2692, over 29437.00 frames. ], tot_loss[loss=0.3241, ctc_loss=0.264, cr_loss=0.4419, attn_decoder_loss=0.321, over 1268623.09 frames. ], batch size: 70, lr: 2.72e-02, grad_scale: 2.0 +2024-09-16 18:30:54,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=54500.0, ans=0.0 +2024-09-16 18:31:00,268 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.84 vs. limit=22.5 +2024-09-16 18:31:03,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=54500.0, ans=0.125 +2024-09-16 18:31:15,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.251e+02 1.386e+02 1.651e+02 4.285e+03, threshold=2.772e+02, percent-clipped=8.0 +2024-09-16 18:31:32,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.11 vs. limit=15.0 +2024-09-16 18:31:35,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=54620.0, ans=0.025 +2024-09-16 18:31:39,493 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.61 vs. limit=15.0 +2024-09-16 18:32:03,518 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.65 vs. limit=22.5 +2024-09-16 18:32:07,176 INFO [train.py:1198] (0/2) Epoch 4, batch 100, loss[loss=0.2959, ctc_loss=0.2323, cr_loss=0.4533, attn_decoder_loss=0.2929, over 29529.00 frames. ], tot_loss[loss=0.3212, ctc_loss=0.2621, cr_loss=0.4462, attn_decoder_loss=0.3179, over 2252251.77 frames. ], batch size: 76, lr: 2.72e-02, grad_scale: 4.0 +2024-09-16 18:32:42,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=54780.0, ans=0.1 +2024-09-16 18:32:51,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=54820.0, ans=0.125 +2024-09-16 18:32:54,913 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.05 vs. limit=22.5 +2024-09-16 18:32:55,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=54820.0, ans=0.0 +2024-09-16 18:33:07,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=54860.0, ans=0.0 +2024-09-16 18:33:16,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn2.whiten.whitening_limit, batch_count=54860.0, ans=22.5 +2024-09-16 18:33:18,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=54860.0, ans=0.125 +2024-09-16 18:33:23,764 INFO [train.py:1198] (0/2) Epoch 4, batch 150, loss[loss=0.2715, ctc_loss=0.2114, cr_loss=0.4041, attn_decoder_loss=0.2692, over 29394.00 frames. ], tot_loss[loss=0.3163, ctc_loss=0.2573, cr_loss=0.4447, attn_decoder_loss=0.313, over 3046641.74 frames. ], batch size: 70, lr: 2.72e-02, grad_scale: 4.0 +2024-09-16 18:33:48,188 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.067e+02 1.258e+02 1.425e+02 1.595e+02 3.260e+02, threshold=2.849e+02, percent-clipped=3.0 +2024-09-16 18:33:57,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=54980.0, ans=0.0 +2024-09-16 18:33:57,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=54980.0, ans=0.125 +2024-09-16 18:34:38,957 INFO [train.py:1198] (0/2) Epoch 4, batch 200, loss[loss=0.3171, ctc_loss=0.2476, cr_loss=0.4176, attn_decoder_loss=0.3155, over 27558.00 frames. ], tot_loss[loss=0.3143, ctc_loss=0.2552, cr_loss=0.4444, attn_decoder_loss=0.311, over 3658414.73 frames. ], batch size: 125, lr: 2.71e-02, grad_scale: 8.0 +2024-09-16 18:35:21,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=55180.0, ans=0.1 +2024-09-16 18:35:22,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=55180.0, ans=0.2 +2024-09-16 18:35:30,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=55220.0, ans=0.2 +2024-09-16 18:35:56,995 INFO [train.py:1198] (0/2) Epoch 4, batch 250, loss[loss=0.332, ctc_loss=0.2707, cr_loss=0.4745, attn_decoder_loss=0.3282, over 29208.00 frames. ], tot_loss[loss=0.313, ctc_loss=0.2533, cr_loss=0.4442, attn_decoder_loss=0.3098, over 4140526.30 frames. ], batch size: 100, lr: 2.71e-02, grad_scale: 4.0 +2024-09-16 18:36:12,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.79 vs. limit=22.5 +2024-09-16 18:36:14,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.21 vs. limit=15.0 +2024-09-16 18:36:22,544 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.428e+01 1.364e+02 1.529e+02 1.729e+02 3.264e+02, threshold=3.057e+02, percent-clipped=1.0 +2024-09-16 18:36:32,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=55380.0, ans=0.125 +2024-09-16 18:36:32,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=55380.0, ans=0.125 +2024-09-16 18:36:32,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.36 vs. limit=22.5 +2024-09-16 18:36:56,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=55460.0, ans=0.0 +2024-09-16 18:37:14,468 INFO [train.py:1198] (0/2) Epoch 4, batch 300, loss[loss=0.3172, ctc_loss=0.2495, cr_loss=0.423, attn_decoder_loss=0.3153, over 29539.00 frames. ], tot_loss[loss=0.312, ctc_loss=0.2522, cr_loss=0.4427, attn_decoder_loss=0.3088, over 4508804.27 frames. ], batch size: 92, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:37:43,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=55580.0, ans=0.1 +2024-09-16 18:37:44,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=55580.0, ans=0.0 +2024-09-16 18:38:29,981 INFO [train.py:1198] (0/2) Epoch 4, batch 350, loss[loss=0.2697, ctc_loss=0.2005, cr_loss=0.3718, attn_decoder_loss=0.2692, over 29301.00 frames. ], tot_loss[loss=0.312, ctc_loss=0.2515, cr_loss=0.4423, attn_decoder_loss=0.3089, over 4794406.45 frames. ], batch size: 71, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:38:34,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=55700.0, ans=0.025 +2024-09-16 18:38:59,298 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.063e+02 1.338e+02 1.528e+02 1.849e+02 4.816e+02, threshold=3.056e+02, percent-clipped=1.0 +2024-09-16 18:39:04,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=55780.0, ans=0.125 +2024-09-16 18:39:07,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=55780.0, ans=0.0 +2024-09-16 18:39:14,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=55780.0, ans=0.125 +2024-09-16 18:39:15,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.11 vs. limit=15.0 +2024-09-16 18:39:20,342 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.63 vs. limit=6.0 +2024-09-16 18:39:32,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=55860.0, ans=0.0 +2024-09-16 18:39:46,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=55900.0, ans=0.125 +2024-09-16 18:39:47,900 INFO [train.py:1198] (0/2) Epoch 4, batch 400, loss[loss=0.3261, ctc_loss=0.2746, cr_loss=0.4762, attn_decoder_loss=0.3212, over 29699.00 frames. ], tot_loss[loss=0.3113, ctc_loss=0.2505, cr_loss=0.4421, attn_decoder_loss=0.3082, over 5023211.26 frames. ], batch size: 82, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:39:55,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=55900.0, ans=0.125 +2024-09-16 18:40:08,596 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.84 vs. limit=6.0 +2024-09-16 18:40:41,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=56020.0, ans=0.125 +2024-09-16 18:40:50,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=56060.0, ans=0.1 +2024-09-16 18:40:51,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.99 vs. limit=22.5 +2024-09-16 18:40:52,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.29 vs. limit=15.0 +2024-09-16 18:41:05,934 INFO [train.py:1198] (0/2) Epoch 4, batch 450, loss[loss=0.3207, ctc_loss=0.2448, cr_loss=0.455, attn_decoder_loss=0.319, over 29686.00 frames. ], tot_loss[loss=0.3114, ctc_loss=0.2505, cr_loss=0.4424, attn_decoder_loss=0.3083, over 5187296.63 frames. ], batch size: 83, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:41:27,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=56140.0, ans=0.125 +2024-09-16 18:41:34,591 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.042e+02 1.288e+02 1.422e+02 1.644e+02 6.882e+02, threshold=2.845e+02, percent-clipped=3.0 +2024-09-16 18:41:42,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=56180.0, ans=0.125 +2024-09-16 18:41:58,546 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.26 vs. limit=15.0 +2024-09-16 18:42:12,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=56260.0, ans=0.0 +2024-09-16 18:42:14,845 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.92 vs. limit=22.5 +2024-09-16 18:42:18,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=56260.0, ans=0.2 +2024-09-16 18:42:21,534 INFO [train.py:1198] (0/2) Epoch 4, batch 500, loss[loss=0.3274, ctc_loss=0.2744, cr_loss=0.4524, attn_decoder_loss=0.3232, over 29414.00 frames. ], tot_loss[loss=0.3099, ctc_loss=0.2485, cr_loss=0.442, attn_decoder_loss=0.3069, over 5330441.85 frames. ], batch size: 94, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:42:57,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=56380.0, ans=0.125 +2024-09-16 18:43:14,027 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.15 vs. limit=12.0 +2024-09-16 18:43:24,933 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.17 vs. limit=15.0 +2024-09-16 18:43:30,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=56460.0, ans=0.125 +2024-09-16 18:43:38,981 INFO [train.py:1198] (0/2) Epoch 4, batch 550, loss[loss=0.3282, ctc_loss=0.2704, cr_loss=0.4623, attn_decoder_loss=0.3243, over 28828.00 frames. ], tot_loss[loss=0.3102, ctc_loss=0.2491, cr_loss=0.4413, attn_decoder_loss=0.3072, over 5422115.34 frames. ], batch size: 104, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:43:39,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=56500.0, ans=0.125 +2024-09-16 18:43:48,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=56500.0, ans=0.1 +2024-09-16 18:44:00,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=56540.0, ans=0.0 +2024-09-16 18:44:02,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=56540.0, ans=0.125 +2024-09-16 18:44:09,214 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.307e+02 1.429e+02 1.661e+02 4.927e+02, threshold=2.859e+02, percent-clipped=1.0 +2024-09-16 18:44:18,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=56580.0, ans=0.0 +2024-09-16 18:44:20,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.10 vs. limit=22.5 +2024-09-16 18:44:36,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=56620.0, ans=0.2 +2024-09-16 18:44:56,988 INFO [train.py:1198] (0/2) Epoch 4, batch 600, loss[loss=0.3214, ctc_loss=0.2617, cr_loss=0.4465, attn_decoder_loss=0.3182, over 29248.00 frames. ], tot_loss[loss=0.3097, ctc_loss=0.2482, cr_loss=0.4411, attn_decoder_loss=0.3068, over 5509426.60 frames. ], batch size: 100, lr: 2.68e-02, grad_scale: 8.0 +2024-09-16 18:45:13,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=56740.0, ans=0.125 +2024-09-16 18:45:22,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=56740.0, ans=0.0 +2024-09-16 18:45:29,042 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:45:30,998 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.17 vs. limit=22.5 +2024-09-16 18:45:47,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.27 vs. limit=15.0 +2024-09-16 18:46:07,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=15.37 vs. limit=22.5 +2024-09-16 18:46:12,467 INFO [train.py:1198] (0/2) Epoch 4, batch 650, loss[loss=0.3139, ctc_loss=0.2466, cr_loss=0.4264, attn_decoder_loss=0.3119, over 29766.00 frames. ], tot_loss[loss=0.3087, ctc_loss=0.247, cr_loss=0.44, attn_decoder_loss=0.3058, over 5586235.69 frames. ], batch size: 81, lr: 2.68e-02, grad_scale: 4.0 +2024-09-16 18:46:19,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=56900.0, ans=22.5 +2024-09-16 18:46:21,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=56900.0, ans=0.125 +2024-09-16 18:46:27,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=56940.0, ans=0.95 +2024-09-16 18:46:39,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=56940.0, ans=0.125 +2024-09-16 18:46:46,221 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.050e+02 1.273e+02 1.380e+02 1.624e+02 3.709e+02, threshold=2.760e+02, percent-clipped=3.0 +2024-09-16 18:46:59,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.97 vs. limit=15.0 +2024-09-16 18:47:30,026 INFO [train.py:1198] (0/2) Epoch 4, batch 700, loss[loss=0.3116, ctc_loss=0.2546, cr_loss=0.4484, attn_decoder_loss=0.308, over 29533.00 frames. ], tot_loss[loss=0.3091, ctc_loss=0.2471, cr_loss=0.4402, attn_decoder_loss=0.3062, over 5636603.97 frames. ], batch size: 76, lr: 2.67e-02, grad_scale: 8.0 +2024-09-16 18:47:33,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=57100.0, ans=0.0 +2024-09-16 18:47:43,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=57140.0, ans=0.125 +2024-09-16 18:47:48,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=57140.0, ans=0.125 +2024-09-16 18:47:54,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=57140.0, ans=0.125 +2024-09-16 18:48:07,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.09 vs. limit=15.0 +2024-09-16 18:48:12,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=57180.0, ans=0.0 +2024-09-16 18:48:37,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=57260.0, ans=0.1 +2024-09-16 18:48:37,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.71 vs. limit=22.5 +2024-09-16 18:48:41,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=57260.0, ans=0.035 +2024-09-16 18:48:44,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=57300.0, ans=0.125 +2024-09-16 18:48:46,079 INFO [train.py:1198] (0/2) Epoch 4, batch 750, loss[loss=0.3207, ctc_loss=0.2563, cr_loss=0.4624, attn_decoder_loss=0.3176, over 29720.00 frames. ], tot_loss[loss=0.3087, ctc_loss=0.2467, cr_loss=0.4404, attn_decoder_loss=0.3058, over 5675170.34 frames. ], batch size: 82, lr: 2.67e-02, grad_scale: 4.0 +2024-09-16 18:48:47,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=57300.0, ans=0.1 +2024-09-16 18:49:03,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=57340.0, ans=0.2 +2024-09-16 18:49:21,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.034e+02 1.371e+02 1.558e+02 1.817e+02 5.424e+02, threshold=3.116e+02, percent-clipped=2.0 +2024-09-16 18:49:26,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=57380.0, ans=0.1 +2024-09-16 18:49:26,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=57380.0, ans=0.125 +2024-09-16 18:49:26,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=16.96 vs. limit=15.0 +2024-09-16 18:49:40,519 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.70 vs. limit=6.0 +2024-09-16 18:50:03,606 INFO [train.py:1198] (0/2) Epoch 4, batch 800, loss[loss=0.2852, ctc_loss=0.2201, cr_loss=0.3984, attn_decoder_loss=0.2836, over 29617.00 frames. ], tot_loss[loss=0.3085, ctc_loss=0.2465, cr_loss=0.4405, attn_decoder_loss=0.3056, over 5706877.11 frames. ], batch size: 73, lr: 2.67e-02, grad_scale: 8.0 +2024-09-16 18:50:17,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=57540.0, ans=0.0 +2024-09-16 18:50:28,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.89 vs. limit=15.0 +2024-09-16 18:50:38,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=57580.0, ans=0.0 +2024-09-16 18:50:52,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=57620.0, ans=0.125 +2024-09-16 18:50:52,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=57620.0, ans=0.125 +2024-09-16 18:50:52,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=57620.0, ans=0.125 +2024-09-16 18:50:54,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=57620.0, ans=0.0 +2024-09-16 18:51:17,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.85 vs. limit=15.0 +2024-09-16 18:51:20,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.41 vs. limit=15.0 +2024-09-16 18:51:20,850 INFO [train.py:1198] (0/2) Epoch 4, batch 850, loss[loss=0.3162, ctc_loss=0.2455, cr_loss=0.4736, attn_decoder_loss=0.3135, over 29716.00 frames. ], tot_loss[loss=0.3081, ctc_loss=0.246, cr_loss=0.4403, attn_decoder_loss=0.3052, over 5735570.27 frames. ], batch size: 89, lr: 2.66e-02, grad_scale: 4.0 +2024-09-16 18:51:24,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=57700.0, ans=0.125 +2024-09-16 18:51:33,034 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:51:34,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=57740.0, ans=0.125 +2024-09-16 18:51:34,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=57740.0, ans=0.125 +2024-09-16 18:51:45,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.17 vs. limit=15.0 +2024-09-16 18:51:55,368 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.098e+02 1.339e+02 1.546e+02 1.753e+02 3.025e+02, threshold=3.091e+02, percent-clipped=0.0 +2024-09-16 18:52:06,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=57820.0, ans=0.0 +2024-09-16 18:52:16,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=57820.0, ans=0.0 +2024-09-16 18:52:17,320 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.07 vs. limit=15.0 +2024-09-16 18:52:36,426 INFO [train.py:1198] (0/2) Epoch 4, batch 900, loss[loss=0.2862, ctc_loss=0.2267, cr_loss=0.4371, attn_decoder_loss=0.2831, over 29592.00 frames. ], tot_loss[loss=0.3084, ctc_loss=0.2465, cr_loss=0.4405, attn_decoder_loss=0.3055, over 5740903.19 frames. ], batch size: 73, lr: 2.66e-02, grad_scale: 8.0 +2024-09-16 18:52:46,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=57900.0, ans=0.09899494936611666 +2024-09-16 18:52:47,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=57900.0, ans=0.0 +2024-09-16 18:52:53,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff3.min_abs, batch_count=57940.0, ans=0.2 +2024-09-16 18:53:05,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=57940.0, ans=0.035 +2024-09-16 18:53:05,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=57940.0, ans=0.125 +2024-09-16 18:53:19,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=57980.0, ans=0.125 +2024-09-16 18:53:25,280 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:53:37,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=58060.0, ans=0.07 +2024-09-16 18:53:40,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=58060.0, ans=0.1 +2024-09-16 18:53:53,365 INFO [train.py:1198] (0/2) Epoch 4, batch 950, loss[loss=0.289, ctc_loss=0.2254, cr_loss=0.4179, attn_decoder_loss=0.2867, over 29514.00 frames. ], tot_loss[loss=0.3086, ctc_loss=0.2464, cr_loss=0.441, attn_decoder_loss=0.3057, over 5741121.46 frames. ], batch size: 74, lr: 2.66e-02, grad_scale: 4.0 +2024-09-16 18:54:29,624 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.037e+02 1.318e+02 1.459e+02 1.683e+02 8.183e+02, threshold=2.918e+02, percent-clipped=3.0 +2024-09-16 18:54:30,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=58180.0, ans=0.0 +2024-09-16 18:54:32,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.51 vs. limit=22.5 +2024-09-16 18:54:42,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=58220.0, ans=0.125 +2024-09-16 18:54:44,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=58220.0, ans=0.125 +2024-09-16 18:55:02,191 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:55:07,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=58260.0, ans=0.0 +2024-09-16 18:55:10,844 INFO [train.py:1198] (0/2) Epoch 4, batch 1000, loss[loss=0.2891, ctc_loss=0.2206, cr_loss=0.4, attn_decoder_loss=0.2878, over 29502.00 frames. ], tot_loss[loss=0.3091, ctc_loss=0.2471, cr_loss=0.441, attn_decoder_loss=0.3061, over 5734894.02 frames. ], batch size: 77, lr: 2.65e-02, grad_scale: 8.0 +2024-09-16 18:55:25,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-16 18:55:42,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=58380.0, ans=0.2 +2024-09-16 18:55:47,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=58380.0, ans=0.035 +2024-09-16 18:55:50,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=58380.0, ans=0.125 +2024-09-16 18:56:15,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=58460.0, ans=0.2 +2024-09-16 18:56:24,110 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.71 vs. limit=22.5 +2024-09-16 18:56:28,235 INFO [train.py:1198] (0/2) Epoch 4, batch 1050, loss[loss=0.3211, ctc_loss=0.2513, cr_loss=0.4449, attn_decoder_loss=0.3189, over 29673.00 frames. ], tot_loss[loss=0.3081, ctc_loss=0.246, cr_loss=0.4403, attn_decoder_loss=0.3052, over 5743924.67 frames. ], batch size: 85, lr: 2.65e-02, grad_scale: 4.0 +2024-09-16 18:56:31,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.47 vs. limit=22.5 +2024-09-16 18:56:57,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=58580.0, ans=0.1 +2024-09-16 18:57:03,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=58580.0, ans=0.125 +2024-09-16 18:57:06,150 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.039e+02 1.263e+02 1.458e+02 1.745e+02 4.654e+02, threshold=2.917e+02, percent-clipped=3.0 +2024-09-16 18:57:07,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.15 vs. limit=15.0 +2024-09-16 18:57:21,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=58620.0, ans=0.025 +2024-09-16 18:57:34,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=58660.0, ans=6.0 +2024-09-16 18:57:35,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.02 vs. limit=15.0 +2024-09-16 18:57:43,659 INFO [train.py:1198] (0/2) Epoch 4, batch 1100, loss[loss=0.3121, ctc_loss=0.248, cr_loss=0.4885, attn_decoder_loss=0.3084, over 29451.00 frames. ], tot_loss[loss=0.308, ctc_loss=0.2455, cr_loss=0.4399, attn_decoder_loss=0.3051, over 5756257.48 frames. ], batch size: 78, lr: 2.65e-02, grad_scale: 8.0 +2024-09-16 18:58:11,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=58740.0, ans=0.125 +2024-09-16 18:58:43,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=26.50 vs. limit=22.5 +2024-09-16 18:59:01,087 INFO [train.py:1198] (0/2) Epoch 4, batch 1150, loss[loss=0.3106, ctc_loss=0.2493, cr_loss=0.4515, attn_decoder_loss=0.3074, over 29464.00 frames. ], tot_loss[loss=0.3078, ctc_loss=0.2453, cr_loss=0.4391, attn_decoder_loss=0.305, over 5754475.91 frames. ], batch size: 78, lr: 2.64e-02, grad_scale: 4.0 +2024-09-16 18:59:11,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=6.32 vs. limit=12.0 +2024-09-16 18:59:15,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=58940.0, ans=0.125 +2024-09-16 18:59:40,711 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.556e+01 1.271e+02 1.479e+02 1.697e+02 4.647e+02, threshold=2.959e+02, percent-clipped=3.0 +2024-09-16 18:59:47,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.29 vs. limit=15.0 +2024-09-16 18:59:59,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=59020.0, ans=0.0 +2024-09-16 19:00:13,769 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=21.12 vs. limit=22.5 +2024-09-16 19:00:18,996 INFO [train.py:1198] (0/2) Epoch 4, batch 1200, loss[loss=0.3072, ctc_loss=0.2447, cr_loss=0.4319, attn_decoder_loss=0.3046, over 29674.00 frames. ], tot_loss[loss=0.3087, ctc_loss=0.2467, cr_loss=0.4408, attn_decoder_loss=0.3058, over 5748355.51 frames. ], batch size: 85, lr: 2.64e-02, grad_scale: 8.0 +2024-09-16 19:00:35,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=59140.0, ans=0.025 +2024-09-16 19:01:11,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=59220.0, ans=0.125 +2024-09-16 19:01:14,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=59220.0, ans=0.05 +2024-09-16 19:01:14,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=15.74 vs. limit=15.0 +2024-09-16 19:01:17,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=59220.0, ans=0.125 +2024-09-16 19:01:29,726 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=12.72 vs. limit=15.0 +2024-09-16 19:01:34,925 INFO [train.py:1198] (0/2) Epoch 4, batch 1250, loss[loss=0.3246, ctc_loss=0.2698, cr_loss=0.4631, attn_decoder_loss=0.3203, over 29529.00 frames. ], tot_loss[loss=0.3094, ctc_loss=0.2473, cr_loss=0.4422, attn_decoder_loss=0.3065, over 5774882.58 frames. ], batch size: 92, lr: 2.63e-02, grad_scale: 4.0 +2024-09-16 19:01:43,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.72 vs. limit=15.0 +2024-09-16 19:01:51,346 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.94 vs. limit=6.0 +2024-09-16 19:02:02,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=21.45 vs. limit=22.5 +2024-09-16 19:02:05,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=59380.0, ans=0.025 +2024-09-16 19:02:07,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=59380.0, ans=0.1 +2024-09-16 19:02:08,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=59380.0, ans=0.125 +2024-09-16 19:02:08,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=59380.0, ans=0.0 +2024-09-16 19:02:15,813 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.032e+02 1.296e+02 1.466e+02 1.683e+02 4.153e+02, threshold=2.932e+02, percent-clipped=2.0 +2024-09-16 19:02:34,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=59420.0, ans=0.125 +2024-09-16 19:02:41,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=59460.0, ans=0.1 +2024-09-16 19:02:52,644 INFO [train.py:1198] (0/2) Epoch 4, batch 1300, loss[loss=0.3324, ctc_loss=0.2689, cr_loss=0.4503, attn_decoder_loss=0.3295, over 28299.00 frames. ], tot_loss[loss=0.3085, ctc_loss=0.2462, cr_loss=0.4415, attn_decoder_loss=0.3056, over 5778853.67 frames. ], batch size: 111, lr: 2.63e-02, grad_scale: 8.0 +2024-09-16 19:03:11,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.03 vs. limit=15.0 +2024-09-16 19:03:16,405 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.65 vs. limit=15.0 +2024-09-16 19:03:27,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=59580.0, ans=0.125 +2024-09-16 19:03:36,127 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=7.07 vs. limit=12.0 +2024-09-16 19:03:43,538 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.45 vs. limit=15.0 +2024-09-16 19:03:50,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=59620.0, ans=0.125 +2024-09-16 19:03:52,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=59660.0, ans=0.025 +2024-09-16 19:04:08,496 INFO [train.py:1198] (0/2) Epoch 4, batch 1350, loss[loss=0.3083, ctc_loss=0.2494, cr_loss=0.4523, attn_decoder_loss=0.3048, over 29763.00 frames. ], tot_loss[loss=0.3078, ctc_loss=0.245, cr_loss=0.4408, attn_decoder_loss=0.305, over 5797288.38 frames. ], batch size: 81, lr: 2.63e-02, grad_scale: 4.0 +2024-09-16 19:04:11,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=59700.0, ans=0.125 +2024-09-16 19:04:18,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=59700.0, ans=0.2 +2024-09-16 19:04:52,260 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.037e+02 1.260e+02 1.419e+02 1.691e+02 3.213e+02, threshold=2.838e+02, percent-clipped=1.0 +2024-09-16 19:05:25,810 INFO [train.py:1198] (0/2) Epoch 4, batch 1400, loss[loss=0.2593, ctc_loss=0.192, cr_loss=0.3979, attn_decoder_loss=0.258, over 29561.00 frames. ], tot_loss[loss=0.3076, ctc_loss=0.2446, cr_loss=0.4406, attn_decoder_loss=0.3048, over 5808062.96 frames. ], batch size: 69, lr: 2.62e-02, grad_scale: 8.0 +2024-09-16 19:05:27,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.39 vs. limit=15.0 +2024-09-16 19:05:35,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=59900.0, ans=0.0 +2024-09-16 19:05:38,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.98 vs. limit=22.5 +2024-09-16 19:05:50,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=59940.0, ans=0.125 +2024-09-16 19:06:15,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.67 vs. limit=6.0 +2024-09-16 19:06:19,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.35 vs. limit=15.0 +2024-09-16 19:06:28,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=60060.0, ans=0.125 +2024-09-16 19:06:43,769 INFO [train.py:1198] (0/2) Epoch 4, batch 1450, loss[loss=0.344, ctc_loss=0.2783, cr_loss=0.482, attn_decoder_loss=0.3406, over 29442.00 frames. ], tot_loss[loss=0.308, ctc_loss=0.2449, cr_loss=0.4404, attn_decoder_loss=0.3052, over 5805055.22 frames. ], batch size: 94, lr: 2.62e-02, grad_scale: 4.0 +2024-09-16 19:06:45,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=60100.0, ans=0.125 +2024-09-16 19:07:02,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=60140.0, ans=0.125 +2024-09-16 19:07:06,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=60140.0, ans=0.1 +2024-09-16 19:07:27,552 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.279e+02 1.464e+02 1.663e+02 3.366e+02, threshold=2.927e+02, percent-clipped=3.0 +2024-09-16 19:07:27,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=60220.0, ans=0.0 +2024-09-16 19:07:38,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=60220.0, ans=0.125 +2024-09-16 19:07:59,067 INFO [train.py:1198] (0/2) Epoch 4, batch 1500, loss[loss=0.32, ctc_loss=0.2592, cr_loss=0.4873, attn_decoder_loss=0.3159, over 29643.00 frames. ], tot_loss[loss=0.3084, ctc_loss=0.2453, cr_loss=0.4412, attn_decoder_loss=0.3056, over 5806845.02 frames. ], batch size: 86, lr: 2.62e-02, grad_scale: 8.0 +2024-09-16 19:07:59,972 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.5 +2024-09-16 19:09:02,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=60460.0, ans=0.0 +2024-09-16 19:09:14,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=60460.0, ans=0.025 +2024-09-16 19:09:16,973 INFO [train.py:1198] (0/2) Epoch 4, batch 1550, loss[loss=0.317, ctc_loss=0.2494, cr_loss=0.4572, attn_decoder_loss=0.3143, over 29520.00 frames. ], tot_loss[loss=0.3087, ctc_loss=0.2456, cr_loss=0.4411, attn_decoder_loss=0.3059, over 5781230.96 frames. ], batch size: 90, lr: 2.61e-02, grad_scale: 4.0 +2024-09-16 19:09:27,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=60500.0, ans=0.0 +2024-09-16 19:09:30,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=60540.0, ans=0.09899494936611666 +2024-09-16 19:09:40,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.63 vs. limit=15.0 +2024-09-16 19:09:53,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=60580.0, ans=0.0 +2024-09-16 19:10:01,805 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.301e+02 1.510e+02 1.822e+02 6.597e+02, threshold=3.020e+02, percent-clipped=6.0 +2024-09-16 19:10:09,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=60620.0, ans=15.0 +2024-09-16 19:10:09,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=60620.0, ans=0.0 +2024-09-16 19:10:11,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=60620.0, ans=0.125 +2024-09-16 19:10:25,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=60660.0, ans=0.125 +2024-09-16 19:10:31,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=60660.0, ans=0.0 +2024-09-16 19:10:34,147 INFO [train.py:1198] (0/2) Epoch 4, batch 1600, loss[loss=0.3333, ctc_loss=0.2657, cr_loss=0.4773, attn_decoder_loss=0.3302, over 29679.00 frames. ], tot_loss[loss=0.3086, ctc_loss=0.2459, cr_loss=0.4411, attn_decoder_loss=0.3058, over 5762112.39 frames. ], batch size: 85, lr: 2.61e-02, grad_scale: 8.0 +2024-09-16 19:10:38,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=60700.0, ans=0.125 +2024-09-16 19:10:51,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=60740.0, ans=0.125 +2024-09-16 19:10:57,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=60740.0, ans=0.1 +2024-09-16 19:11:18,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=60820.0, ans=0.125 +2024-09-16 19:11:39,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=60860.0, ans=0.07 +2024-09-16 19:11:47,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=60860.0, ans=0.0 +2024-09-16 19:11:50,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=60900.0, ans=0.125 +2024-09-16 19:11:52,009 INFO [train.py:1198] (0/2) Epoch 4, batch 1650, loss[loss=0.3295, ctc_loss=0.2696, cr_loss=0.4885, attn_decoder_loss=0.3253, over 29715.00 frames. ], tot_loss[loss=0.3084, ctc_loss=0.2458, cr_loss=0.4414, attn_decoder_loss=0.3056, over 5757864.34 frames. ], batch size: 89, lr: 2.61e-02, grad_scale: 4.0 +2024-09-16 19:11:58,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=60900.0, ans=0.09899494936611666 +2024-09-16 19:12:04,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=60900.0, ans=0.125 +2024-09-16 19:12:38,885 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.008e+02 1.275e+02 1.417e+02 1.655e+02 4.421e+02, threshold=2.835e+02, percent-clipped=2.0 +2024-09-16 19:12:40,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=61020.0, ans=0.125 +2024-09-16 19:12:42,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=61020.0, ans=0.2 +2024-09-16 19:12:43,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=61020.0, ans=0.125 +2024-09-16 19:12:47,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.02 vs. limit=10.0 +2024-09-16 19:13:03,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.17 vs. limit=12.0 +2024-09-16 19:13:07,426 INFO [train.py:1198] (0/2) Epoch 4, batch 1700, loss[loss=0.2719, ctc_loss=0.2149, cr_loss=0.3984, attn_decoder_loss=0.2693, over 29570.00 frames. ], tot_loss[loss=0.3076, ctc_loss=0.2446, cr_loss=0.4409, attn_decoder_loss=0.3048, over 5779875.67 frames. ], batch size: 69, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:13:54,837 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.50 vs. limit=6.0 +2024-09-16 19:14:06,686 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:14:22,896 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.23 vs. limit=15.0 +2024-09-16 19:14:25,000 INFO [train.py:1198] (0/2) Epoch 4, batch 1750, loss[loss=0.2648, ctc_loss=0.1964, cr_loss=0.3785, attn_decoder_loss=0.2639, over 29384.00 frames. ], tot_loss[loss=0.3064, ctc_loss=0.2429, cr_loss=0.4403, attn_decoder_loss=0.3037, over 5787906.87 frames. ], batch size: 67, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:14:28,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=61300.0, ans=0.2 +2024-09-16 19:14:31,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=61300.0, ans=0.125 +2024-09-16 19:14:54,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.57 vs. limit=12.0 +2024-09-16 19:15:09,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=61420.0, ans=0.125 +2024-09-16 19:15:11,792 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.273e+01 1.237e+02 1.382e+02 1.538e+02 2.452e+02, threshold=2.764e+02, percent-clipped=0.0 +2024-09-16 19:15:12,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.84 vs. limit=22.5 +2024-09-16 19:15:18,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.40 vs. limit=15.0 +2024-09-16 19:15:42,011 INFO [train.py:1198] (0/2) Epoch 4, batch 1800, loss[loss=0.3017, ctc_loss=0.2314, cr_loss=0.4596, attn_decoder_loss=0.2993, over 29695.00 frames. ], tot_loss[loss=0.3066, ctc_loss=0.2429, cr_loss=0.4406, attn_decoder_loss=0.3038, over 5790381.29 frames. ], batch size: 83, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:15:52,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=61500.0, ans=0.2 +2024-09-16 19:16:35,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=61620.0, ans=0.0 +2024-09-16 19:16:57,519 INFO [train.py:1198] (0/2) Epoch 4, batch 1850, loss[loss=0.3188, ctc_loss=0.2434, cr_loss=0.4509, attn_decoder_loss=0.3172, over 29640.00 frames. ], tot_loss[loss=0.3061, ctc_loss=0.2417, cr_loss=0.4401, attn_decoder_loss=0.3034, over 5795634.92 frames. ], batch size: 86, lr: 2.59e-02, grad_scale: 4.0 +2024-09-16 19:17:08,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=61700.0, ans=0.125 +2024-09-16 19:17:46,910 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.035e+02 1.284e+02 1.452e+02 1.621e+02 3.527e+02, threshold=2.905e+02, percent-clipped=2.0 +2024-09-16 19:17:48,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=61820.0, ans=0.2 +2024-09-16 19:18:08,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.48 vs. limit=6.0 +2024-09-16 19:18:12,181 INFO [train.py:1198] (0/2) Epoch 4, batch 1900, loss[loss=0.3061, ctc_loss=0.2287, cr_loss=0.4407, attn_decoder_loss=0.3049, over 29724.00 frames. ], tot_loss[loss=0.3066, ctc_loss=0.2423, cr_loss=0.4405, attn_decoder_loss=0.304, over 5803900.21 frames. ], batch size: 89, lr: 2.59e-02, grad_scale: 8.0 +2024-09-16 19:18:35,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=61940.0, ans=0.125 +2024-09-16 19:18:41,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=61940.0, ans=0.025 +2024-09-16 19:18:53,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=61980.0, ans=0.125 +2024-09-16 19:18:56,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=61980.0, ans=0.07 +2024-09-16 19:19:22,587 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=6.54 vs. limit=12.0 +2024-09-16 19:19:31,263 INFO [train.py:1198] (0/2) Epoch 4, batch 1950, loss[loss=0.2946, ctc_loss=0.2265, cr_loss=0.4166, attn_decoder_loss=0.2929, over 29486.00 frames. ], tot_loss[loss=0.3071, ctc_loss=0.2423, cr_loss=0.4412, attn_decoder_loss=0.3045, over 5818577.06 frames. ], batch size: 78, lr: 2.59e-02, grad_scale: 4.0 +2024-09-16 19:19:46,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=62140.0, ans=0.125 +2024-09-16 19:19:57,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=62140.0, ans=0.125 +2024-09-16 19:20:15,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=62220.0, ans=0.125 +2024-09-16 19:20:22,231 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.204e+02 1.396e+02 1.540e+02 6.321e+02, threshold=2.792e+02, percent-clipped=2.0 +2024-09-16 19:20:32,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=62260.0, ans=0.025 +2024-09-16 19:20:42,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=62260.0, ans=0.0 +2024-09-16 19:20:46,383 INFO [train.py:1198] (0/2) Epoch 4, batch 2000, loss[loss=0.2779, ctc_loss=0.2141, cr_loss=0.412, attn_decoder_loss=0.2758, over 29354.00 frames. ], tot_loss[loss=0.3081, ctc_loss=0.2431, cr_loss=0.4422, attn_decoder_loss=0.3055, over 5796278.74 frames. ], batch size: 67, lr: 2.58e-02, grad_scale: 8.0 +2024-09-16 19:21:07,346 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.86 vs. limit=6.0 +2024-09-16 19:22:02,156 INFO [train.py:1198] (0/2) Epoch 4, batch 2050, loss[loss=0.2888, ctc_loss=0.233, cr_loss=0.4361, attn_decoder_loss=0.2853, over 29446.00 frames. ], tot_loss[loss=0.3072, ctc_loss=0.243, cr_loss=0.4416, attn_decoder_loss=0.3045, over 5788948.16 frames. ], batch size: 70, lr: 2.58e-02, grad_scale: 4.0 +2024-09-16 19:22:28,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=62540.0, ans=0.125 +2024-09-16 19:22:45,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=62580.0, ans=0.0 +2024-09-16 19:22:49,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=62620.0, ans=0.125 +2024-09-16 19:22:54,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=62620.0, ans=0.05 +2024-09-16 19:22:57,031 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.877e+01 1.306e+02 1.501e+02 1.885e+02 4.145e+02, threshold=3.002e+02, percent-clipped=3.0 +2024-09-16 19:23:00,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=62620.0, ans=0.2 +2024-09-16 19:23:06,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=62660.0, ans=0.125 +2024-09-16 19:23:20,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=62700.0, ans=0.125 +2024-09-16 19:23:21,653 INFO [train.py:1198] (0/2) Epoch 4, batch 2100, loss[loss=0.2946, ctc_loss=0.2202, cr_loss=0.4119, attn_decoder_loss=0.2937, over 29758.00 frames. ], tot_loss[loss=0.3061, ctc_loss=0.2416, cr_loss=0.4407, attn_decoder_loss=0.3034, over 5801376.85 frames. ], batch size: 81, lr: 2.58e-02, grad_scale: 8.0 +2024-09-16 19:23:21,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=62700.0, ans=0.0 +2024-09-16 19:23:26,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=62700.0, ans=0.0 +2024-09-16 19:23:47,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=62740.0, ans=0.125 +2024-09-16 19:24:03,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=62780.0, ans=0.0 +2024-09-16 19:24:35,565 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:24:36,635 INFO [train.py:1198] (0/2) Epoch 4, batch 2150, loss[loss=0.3018, ctc_loss=0.2328, cr_loss=0.4189, attn_decoder_loss=0.3002, over 29436.00 frames. ], tot_loss[loss=0.3052, ctc_loss=0.2402, cr_loss=0.4392, attn_decoder_loss=0.3026, over 5816321.68 frames. ], batch size: 78, lr: 2.57e-02, grad_scale: 4.0 +2024-09-16 19:24:49,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=62900.0, ans=0.0 +2024-09-16 19:24:55,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=62940.0, ans=0.0 +2024-09-16 19:25:21,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.38 vs. limit=12.0 +2024-09-16 19:25:22,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=63020.0, ans=0.125 +2024-09-16 19:25:31,032 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.004e+02 1.239e+02 1.413e+02 1.658e+02 2.671e+02, threshold=2.826e+02, percent-clipped=0.0 +2024-09-16 19:25:41,122 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.32 vs. limit=15.0 +2024-09-16 19:25:41,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=63060.0, ans=0.0 +2024-09-16 19:25:52,108 INFO [train.py:1198] (0/2) Epoch 4, batch 2200, loss[loss=0.305, ctc_loss=0.2352, cr_loss=0.408, attn_decoder_loss=0.3037, over 29632.00 frames. ], tot_loss[loss=0.3055, ctc_loss=0.2406, cr_loss=0.4404, attn_decoder_loss=0.3029, over 5812180.55 frames. ], batch size: 86, lr: 2.57e-02, grad_scale: 8.0 +2024-09-16 19:26:01,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=63100.0, ans=0.0 +2024-09-16 19:26:12,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=21.70 vs. limit=22.5 +2024-09-16 19:26:18,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=63140.0, ans=0.0 +2024-09-16 19:27:07,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=63260.0, ans=0.125 +2024-09-16 19:27:09,948 INFO [train.py:1198] (0/2) Epoch 4, batch 2250, loss[loss=0.3105, ctc_loss=0.2427, cr_loss=0.4446, attn_decoder_loss=0.3082, over 29718.00 frames. ], tot_loss[loss=0.305, ctc_loss=0.2399, cr_loss=0.4401, attn_decoder_loss=0.3025, over 5810640.56 frames. ], batch size: 82, lr: 2.57e-02, grad_scale: 4.0 +2024-09-16 19:27:26,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=63340.0, ans=0.09899494936611666 +2024-09-16 19:27:35,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=63340.0, ans=0.1 +2024-09-16 19:27:42,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=63380.0, ans=0.0 +2024-09-16 19:27:49,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=63380.0, ans=0.125 +2024-09-16 19:27:55,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=63420.0, ans=0.0 +2024-09-16 19:27:57,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=63420.0, ans=0.1 +2024-09-16 19:27:57,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=63420.0, ans=0.0 +2024-09-16 19:28:07,579 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.060e+02 1.265e+02 1.418e+02 1.691e+02 4.004e+02, threshold=2.836e+02, percent-clipped=3.0 +2024-09-16 19:28:09,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=63420.0, ans=0.0 +2024-09-16 19:28:22,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=63460.0, ans=0.025 +2024-09-16 19:28:27,223 INFO [train.py:1198] (0/2) Epoch 4, batch 2300, loss[loss=0.2824, ctc_loss=0.2207, cr_loss=0.4317, attn_decoder_loss=0.2797, over 29298.00 frames. ], tot_loss[loss=0.3044, ctc_loss=0.2401, cr_loss=0.4391, attn_decoder_loss=0.3018, over 5798704.26 frames. ], batch size: 71, lr: 2.56e-02, grad_scale: 8.0 +2024-09-16 19:28:30,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=63500.0, ans=0.0 +2024-09-16 19:28:45,339 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:28:57,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=63580.0, ans=0.1 +2024-09-16 19:29:05,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=63580.0, ans=0.0 +2024-09-16 19:29:29,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=63660.0, ans=0.5 +2024-09-16 19:29:32,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=63660.0, ans=15.0 +2024-09-16 19:29:42,589 INFO [train.py:1198] (0/2) Epoch 4, batch 2350, loss[loss=0.312, ctc_loss=0.2468, cr_loss=0.4707, attn_decoder_loss=0.3088, over 29695.00 frames. ], tot_loss[loss=0.3044, ctc_loss=0.2398, cr_loss=0.4397, attn_decoder_loss=0.3018, over 5803691.53 frames. ], batch size: 83, lr: 2.56e-02, grad_scale: 4.0 +2024-09-16 19:30:02,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.08 vs. limit=15.0 +2024-09-16 19:30:03,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=63740.0, ans=0.125 +2024-09-16 19:30:18,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.51 vs. limit=15.0 +2024-09-16 19:30:41,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.09 vs. limit=15.0 +2024-09-16 19:30:41,598 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.090e+02 1.383e+02 1.538e+02 1.780e+02 4.486e+02, threshold=3.076e+02, percent-clipped=4.0 +2024-09-16 19:30:59,759 INFO [train.py:1198] (0/2) Epoch 4, batch 2400, loss[loss=0.3021, ctc_loss=0.24, cr_loss=0.4599, attn_decoder_loss=0.2988, over 29556.00 frames. ], tot_loss[loss=0.3058, ctc_loss=0.2414, cr_loss=0.4418, attn_decoder_loss=0.3032, over 5808114.00 frames. ], batch size: 76, lr: 2.56e-02, grad_scale: 8.0 +2024-09-16 19:31:35,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=63980.0, ans=0.1 +2024-09-16 19:31:38,510 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-16000.pt +2024-09-16 19:31:53,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=64020.0, ans=0.2 +2024-09-16 19:31:54,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=64020.0, ans=0.1 +2024-09-16 19:32:11,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=64060.0, ans=0.125 +2024-09-16 19:32:22,175 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:32:24,847 INFO [train.py:1198] (0/2) Epoch 4, batch 2450, loss[loss=0.307, ctc_loss=0.2469, cr_loss=0.4735, attn_decoder_loss=0.3032, over 29697.00 frames. ], tot_loss[loss=0.3071, ctc_loss=0.2432, cr_loss=0.4434, attn_decoder_loss=0.3043, over 5784456.98 frames. ], batch size: 82, lr: 2.55e-02, grad_scale: 4.0 +2024-09-16 19:32:28,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=64100.0, ans=0.1 +2024-09-16 19:32:31,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.81 vs. limit=15.0 +2024-09-16 19:32:47,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=64140.0, ans=0.0 +2024-09-16 19:32:52,729 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.22 vs. limit=15.0 +2024-09-16 19:32:58,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=64180.0, ans=0.2 +2024-09-16 19:33:03,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.01 vs. limit=15.0 +2024-09-16 19:33:16,181 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:33:20,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=64220.0, ans=0.0 +2024-09-16 19:33:23,219 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.817e+01 1.239e+02 1.387e+02 1.580e+02 7.191e+02, threshold=2.774e+02, percent-clipped=3.0 +2024-09-16 19:33:32,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=64260.0, ans=0.0 +2024-09-16 19:33:39,962 INFO [train.py:1198] (0/2) Epoch 4, batch 2500, loss[loss=0.3196, ctc_loss=0.2548, cr_loss=0.4545, attn_decoder_loss=0.3167, over 29640.00 frames. ], tot_loss[loss=0.3068, ctc_loss=0.2428, cr_loss=0.4426, attn_decoder_loss=0.3041, over 5795630.81 frames. ], batch size: 86, lr: 2.55e-02, grad_scale: 8.0 +2024-09-16 19:34:21,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=64380.0, ans=0.0 +2024-09-16 19:34:41,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=64460.0, ans=0.5 +2024-09-16 19:34:59,443 INFO [train.py:1198] (0/2) Epoch 4, batch 2550, loss[loss=0.2785, ctc_loss=0.2146, cr_loss=0.4309, attn_decoder_loss=0.2761, over 29351.00 frames. ], tot_loss[loss=0.3065, ctc_loss=0.2419, cr_loss=0.4419, attn_decoder_loss=0.3038, over 5799193.85 frames. ], batch size: 67, lr: 2.55e-02, grad_scale: 4.0 +2024-09-16 19:35:20,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=64540.0, ans=0.125 +2024-09-16 19:35:39,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.39 vs. limit=15.0 +2024-09-16 19:35:49,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=64620.0, ans=0.125 +2024-09-16 19:35:51,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=64620.0, ans=15.0 +2024-09-16 19:35:52,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=64620.0, ans=0.025 +2024-09-16 19:36:00,136 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.618e+01 1.258e+02 1.410e+02 1.550e+02 4.677e+02, threshold=2.819e+02, percent-clipped=4.0 +2024-09-16 19:36:04,129 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.65 vs. limit=15.0 +2024-09-16 19:36:11,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=64660.0, ans=0.0 +2024-09-16 19:36:15,328 INFO [train.py:1198] (0/2) Epoch 4, batch 2600, loss[loss=0.2949, ctc_loss=0.2242, cr_loss=0.4356, attn_decoder_loss=0.2931, over 29462.00 frames. ], tot_loss[loss=0.3071, ctc_loss=0.2424, cr_loss=0.442, attn_decoder_loss=0.3044, over 5795731.83 frames. ], batch size: 78, lr: 2.54e-02, grad_scale: 8.0 +2024-09-16 19:36:17,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=64700.0, ans=0.125 +2024-09-16 19:36:24,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=64700.0, ans=0.125 +2024-09-16 19:36:36,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=64740.0, ans=0.125 +2024-09-16 19:37:01,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=64820.0, ans=0.2 +2024-09-16 19:37:07,418 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.29 vs. limit=22.5 +2024-09-16 19:37:09,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=64820.0, ans=0.035 +2024-09-16 19:37:21,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=64860.0, ans=0.04949747468305833 +2024-09-16 19:37:23,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=64860.0, ans=0.125 +2024-09-16 19:37:29,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=64900.0, ans=0.125 +2024-09-16 19:37:30,536 INFO [train.py:1198] (0/2) Epoch 4, batch 2650, loss[loss=0.3277, ctc_loss=0.2652, cr_loss=0.4558, attn_decoder_loss=0.3245, over 29264.00 frames. ], tot_loss[loss=0.3074, ctc_loss=0.2428, cr_loss=0.4426, attn_decoder_loss=0.3047, over 5801405.70 frames. ], batch size: 100, lr: 2.54e-02, grad_scale: 4.0 +2024-09-16 19:37:48,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.72 vs. limit=15.0 +2024-09-16 19:37:48,233 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.24 vs. limit=22.5 +2024-09-16 19:37:51,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=64940.0, ans=0.035 +2024-09-16 19:38:23,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=65020.0, ans=0.0 +2024-09-16 19:38:28,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=65020.0, ans=0.125 +2024-09-16 19:38:34,070 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.066e+02 1.250e+02 1.369e+02 1.564e+02 3.210e+02, threshold=2.738e+02, percent-clipped=1.0 +2024-09-16 19:38:49,706 INFO [train.py:1198] (0/2) Epoch 4, batch 2700, loss[loss=0.3223, ctc_loss=0.2522, cr_loss=0.4699, attn_decoder_loss=0.3196, over 29501.00 frames. ], tot_loss[loss=0.3069, ctc_loss=0.242, cr_loss=0.442, attn_decoder_loss=0.3042, over 5795658.74 frames. ], batch size: 87, lr: 2.54e-02, grad_scale: 8.0 +2024-09-16 19:39:17,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=65140.0, ans=0.125 +2024-09-16 19:39:40,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.32 vs. limit=15.0 +2024-09-16 19:39:45,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=65220.0, ans=0.07 +2024-09-16 19:39:49,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=65260.0, ans=0.2 +2024-09-16 19:40:05,380 INFO [train.py:1198] (0/2) Epoch 4, batch 2750, loss[loss=0.2874, ctc_loss=0.2274, cr_loss=0.4296, attn_decoder_loss=0.2845, over 29508.00 frames. ], tot_loss[loss=0.3051, ctc_loss=0.2404, cr_loss=0.4401, attn_decoder_loss=0.3025, over 5795231.62 frames. ], batch size: 75, lr: 2.53e-02, grad_scale: 4.0 +2024-09-16 19:40:16,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=65300.0, ans=0.0 +2024-09-16 19:40:29,579 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:40:47,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=65380.0, ans=0.125 +2024-09-16 19:40:54,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=65420.0, ans=0.125 +2024-09-16 19:41:08,342 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.842e+01 1.245e+02 1.440e+02 1.752e+02 4.612e+02, threshold=2.880e+02, percent-clipped=7.0 +2024-09-16 19:41:18,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.59 vs. limit=15.0 +2024-09-16 19:41:20,408 INFO [train.py:1198] (0/2) Epoch 4, batch 2800, loss[loss=0.3609, ctc_loss=0.3362, cr_loss=0.5159, attn_decoder_loss=0.3522, over 19835.00 frames. ], tot_loss[loss=0.3054, ctc_loss=0.2408, cr_loss=0.4408, attn_decoder_loss=0.3028, over 5777039.89 frames. ], batch size: 210, lr: 2.53e-02, grad_scale: 8.0 +2024-09-16 19:41:28,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=65500.0, ans=0.2 +2024-09-16 19:41:31,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=65500.0, ans=0.025 +2024-09-16 19:41:41,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.16 vs. limit=6.0 +2024-09-16 19:42:08,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=65620.0, ans=0.125 +2024-09-16 19:42:16,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=65620.0, ans=0.2 +2024-09-16 19:42:24,231 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.25 vs. limit=15.0 +2024-09-16 19:42:40,271 INFO [train.py:1198] (0/2) Epoch 4, batch 2850, loss[loss=0.3007, ctc_loss=0.2321, cr_loss=0.412, attn_decoder_loss=0.2992, over 29499.00 frames. ], tot_loss[loss=0.3062, ctc_loss=0.2416, cr_loss=0.4408, attn_decoder_loss=0.3036, over 5762769.83 frames. ], batch size: 77, lr: 2.53e-02, grad_scale: 4.0 +2024-09-16 19:42:45,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.37 vs. limit=15.0 +2024-09-16 19:42:51,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=65700.0, ans=0.2 +2024-09-16 19:42:57,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=65740.0, ans=0.025 +2024-09-16 19:43:00,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=65740.0, ans=0.125 +2024-09-16 19:43:09,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=65780.0, ans=0.1 +2024-09-16 19:43:32,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.24 vs. limit=15.0 +2024-09-16 19:43:41,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=65860.0, ans=0.125 +2024-09-16 19:43:44,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=65860.0, ans=0.2 +2024-09-16 19:43:45,309 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.117e+02 1.371e+02 1.544e+02 1.863e+02 5.214e+02, threshold=3.089e+02, percent-clipped=4.0 +2024-09-16 19:43:55,810 INFO [train.py:1198] (0/2) Epoch 4, batch 2900, loss[loss=0.2962, ctc_loss=0.2267, cr_loss=0.4387, attn_decoder_loss=0.2942, over 29419.00 frames. ], tot_loss[loss=0.3067, ctc_loss=0.2412, cr_loss=0.4415, attn_decoder_loss=0.3042, over 5787646.73 frames. ], batch size: 79, lr: 2.52e-02, grad_scale: 8.0 +2024-09-16 19:43:59,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.56 vs. limit=10.0 +2024-09-16 19:44:00,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=65900.0, ans=0.125 +2024-09-16 19:44:28,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=65980.0, ans=0.1 +2024-09-16 19:44:35,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.77 vs. limit=15.0 +2024-09-16 19:44:46,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=66020.0, ans=15.0 +2024-09-16 19:44:58,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.19 vs. limit=15.0 +2024-09-16 19:45:00,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=66060.0, ans=0.025 +2024-09-16 19:45:10,942 INFO [train.py:1198] (0/2) Epoch 4, batch 2950, loss[loss=0.292, ctc_loss=0.2388, cr_loss=0.4408, attn_decoder_loss=0.2882, over 29511.00 frames. ], tot_loss[loss=0.3052, ctc_loss=0.24, cr_loss=0.4407, attn_decoder_loss=0.3027, over 5782396.69 frames. ], batch size: 75, lr: 2.52e-02, grad_scale: 4.0 +2024-09-16 19:45:31,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=66140.0, ans=0.125 +2024-09-16 19:46:05,050 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:46:11,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=66220.0, ans=0.125 +2024-09-16 19:46:12,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=66260.0, ans=0.125 +2024-09-16 19:46:18,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=66260.0, ans=0.09899494936611666 +2024-09-16 19:46:19,748 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.755e+01 1.228e+02 1.356e+02 1.566e+02 3.773e+02, threshold=2.713e+02, percent-clipped=2.0 +2024-09-16 19:46:30,794 INFO [train.py:1198] (0/2) Epoch 4, batch 3000, loss[loss=0.3149, ctc_loss=0.2539, cr_loss=0.4443, attn_decoder_loss=0.3117, over 29761.00 frames. ], tot_loss[loss=0.305, ctc_loss=0.2397, cr_loss=0.4403, attn_decoder_loss=0.3025, over 5782947.82 frames. ], batch size: 81, lr: 2.52e-02, grad_scale: 8.0 +2024-09-16 19:46:30,795 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 19:46:49,052 INFO [train.py:1230] (0/2) Epoch 4, validation: loss=0.2264, ctc_loss=0.07857, cr_loss=4.376e-15, attn_decoder_loss=0.2428, over 944034.00 frames. +2024-09-16 19:46:49,053 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 19:46:51,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.17 vs. limit=15.0 +2024-09-16 19:46:59,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.55 vs. limit=15.0 +2024-09-16 19:47:00,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=66300.0, ans=0.025 +2024-09-16 19:47:00,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.39 vs. limit=6.0 +2024-09-16 19:47:27,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=66380.0, ans=0.125 +2024-09-16 19:47:29,501 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:47:31,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.93 vs. limit=6.0 +2024-09-16 19:47:32,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=66380.0, ans=0.1 +2024-09-16 19:47:52,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=66460.0, ans=0.1 +2024-09-16 19:47:58,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=66460.0, ans=0.125 +2024-09-16 19:48:05,606 INFO [train.py:1198] (0/2) Epoch 4, batch 3050, loss[loss=0.2892, ctc_loss=0.2233, cr_loss=0.432, attn_decoder_loss=0.287, over 29552.00 frames. ], tot_loss[loss=0.3056, ctc_loss=0.24, cr_loss=0.4405, attn_decoder_loss=0.3031, over 5776345.78 frames. ], batch size: 76, lr: 2.51e-02, grad_scale: 4.0 +2024-09-16 19:48:12,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=66500.0, ans=0.125 +2024-09-16 19:48:21,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=66540.0, ans=0.125 +2024-09-16 19:48:48,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=66580.0, ans=0.125 +2024-09-16 19:49:06,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=66660.0, ans=0.125 +2024-09-16 19:49:13,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.917e+01 1.239e+02 1.360e+02 1.654e+02 2.744e+02, threshold=2.720e+02, percent-clipped=1.0 +2024-09-16 19:49:15,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=66660.0, ans=0.0 +2024-09-16 19:49:20,805 INFO [train.py:1198] (0/2) Epoch 4, batch 3100, loss[loss=0.3314, ctc_loss=0.266, cr_loss=0.4964, attn_decoder_loss=0.3276, over 29312.00 frames. ], tot_loss[loss=0.3053, ctc_loss=0.2397, cr_loss=0.4412, attn_decoder_loss=0.3027, over 5776757.75 frames. ], batch size: 100, lr: 2.51e-02, grad_scale: 8.0 +2024-09-16 19:49:44,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=66740.0, ans=0.1 +2024-09-16 19:49:48,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=66740.0, ans=0.0 +2024-09-16 19:49:53,373 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.41 vs. limit=12.0 +2024-09-16 19:50:02,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.35 vs. limit=10.0 +2024-09-16 19:50:19,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=66820.0, ans=15.0 +2024-09-16 19:50:32,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=66860.0, ans=0.025 +2024-09-16 19:50:37,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=66860.0, ans=0.125 +2024-09-16 19:50:40,199 INFO [train.py:1198] (0/2) Epoch 4, batch 3150, loss[loss=0.3301, ctc_loss=0.2623, cr_loss=0.4783, attn_decoder_loss=0.3271, over 28786.00 frames. ], tot_loss[loss=0.3052, ctc_loss=0.2395, cr_loss=0.4411, attn_decoder_loss=0.3027, over 5784244.59 frames. ], batch size: 104, lr: 2.51e-02, grad_scale: 4.0 +2024-09-16 19:50:43,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=66900.0, ans=0.1 +2024-09-16 19:50:57,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=66940.0, ans=0.0 +2024-09-16 19:51:01,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=66940.0, ans=0.025 +2024-09-16 19:51:01,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=66940.0, ans=0.125 +2024-09-16 19:51:13,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=66980.0, ans=0.0 +2024-09-16 19:51:25,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=67020.0, ans=0.07 +2024-09-16 19:51:49,435 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.339e+01 1.205e+02 1.438e+02 1.646e+02 4.024e+02, threshold=2.876e+02, percent-clipped=3.0 +2024-09-16 19:51:50,307 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.57 vs. limit=15.0 +2024-09-16 19:51:55,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.83 vs. limit=15.0 +2024-09-16 19:51:55,485 INFO [train.py:1198] (0/2) Epoch 4, batch 3200, loss[loss=0.3059, ctc_loss=0.2364, cr_loss=0.4557, attn_decoder_loss=0.3035, over 29411.00 frames. ], tot_loss[loss=0.3043, ctc_loss=0.2385, cr_loss=0.4406, attn_decoder_loss=0.3018, over 5793365.39 frames. ], batch size: 79, lr: 2.51e-02, grad_scale: 8.0 +2024-09-16 19:52:01,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=67100.0, ans=0.125 +2024-09-16 19:52:01,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=67100.0, ans=0.05 +2024-09-16 19:52:23,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=67140.0, ans=0.04949747468305833 +2024-09-16 19:52:24,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=67180.0, ans=0.125 +2024-09-16 19:52:49,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=67220.0, ans=0.5 +2024-09-16 19:53:11,544 INFO [train.py:1198] (0/2) Epoch 4, batch 3250, loss[loss=0.3028, ctc_loss=0.2379, cr_loss=0.4428, attn_decoder_loss=0.3002, over 29699.00 frames. ], tot_loss[loss=0.3042, ctc_loss=0.2382, cr_loss=0.44, attn_decoder_loss=0.3018, over 5800455.14 frames. ], batch size: 84, lr: 2.50e-02, grad_scale: 4.0 +2024-09-16 19:53:14,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=67300.0, ans=0.0 +2024-09-16 19:53:36,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=67340.0, ans=0.125 +2024-09-16 19:54:15,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=67460.0, ans=0.0 +2024-09-16 19:54:22,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.16 vs. limit=15.0 +2024-09-16 19:54:26,052 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.284e+02 1.425e+02 1.663e+02 2.668e+02, threshold=2.850e+02, percent-clipped=0.0 +2024-09-16 19:54:29,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=67500.0, ans=0.125 +2024-09-16 19:54:30,779 INFO [train.py:1198] (0/2) Epoch 4, batch 3300, loss[loss=0.3368, ctc_loss=0.2708, cr_loss=0.4808, attn_decoder_loss=0.3334, over 28153.00 frames. ], tot_loss[loss=0.3028, ctc_loss=0.237, cr_loss=0.4383, attn_decoder_loss=0.3004, over 5796478.89 frames. ], batch size: 111, lr: 2.50e-02, grad_scale: 8.0 +2024-09-16 19:54:40,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=67500.0, ans=10.0 +2024-09-16 19:55:01,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=67580.0, ans=0.1 +2024-09-16 19:55:11,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.94 vs. limit=15.0 +2024-09-16 19:55:27,405 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.29 vs. limit=15.0 +2024-09-16 19:55:46,073 INFO [train.py:1198] (0/2) Epoch 4, batch 3350, loss[loss=0.3149, ctc_loss=0.2442, cr_loss=0.421, attn_decoder_loss=0.3134, over 28928.00 frames. ], tot_loss[loss=0.3038, ctc_loss=0.238, cr_loss=0.4389, attn_decoder_loss=0.3013, over 5772632.21 frames. ], batch size: 104, lr: 2.50e-02, grad_scale: 4.0 +2024-09-16 19:55:49,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=67700.0, ans=0.2 +2024-09-16 19:55:59,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.74 vs. limit=15.0 +2024-09-16 19:56:10,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=67740.0, ans=0.1 +2024-09-16 19:56:10,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=67740.0, ans=0.0 +2024-09-16 19:56:13,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=67740.0, ans=0.125 +2024-09-16 19:56:16,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=67780.0, ans=0.0 +2024-09-16 19:56:17,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=67780.0, ans=10.0 +2024-09-16 19:56:22,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=67780.0, ans=0.0 +2024-09-16 19:56:42,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=67820.0, ans=0.0 +2024-09-16 19:56:45,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=67860.0, ans=0.125 +2024-09-16 19:56:54,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=67860.0, ans=0.125 +2024-09-16 19:56:55,146 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.73 vs. limit=15.0 +2024-09-16 19:56:58,641 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.186e+02 1.341e+02 1.622e+02 4.699e+02, threshold=2.682e+02, percent-clipped=3.0 +2024-09-16 19:57:01,673 INFO [train.py:1198] (0/2) Epoch 4, batch 3400, loss[loss=0.2758, ctc_loss=0.2176, cr_loss=0.3942, attn_decoder_loss=0.2735, over 29325.00 frames. ], tot_loss[loss=0.3037, ctc_loss=0.2383, cr_loss=0.4391, attn_decoder_loss=0.3012, over 5765698.17 frames. ], batch size: 67, lr: 2.49e-02, grad_scale: 8.0 +2024-09-16 19:57:20,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.30 vs. limit=12.0 +2024-09-16 19:58:08,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=68060.0, ans=0.0 +2024-09-16 19:58:21,365 INFO [train.py:1198] (0/2) Epoch 4, batch 3450, loss[loss=0.3142, ctc_loss=0.2439, cr_loss=0.4488, attn_decoder_loss=0.312, over 28285.00 frames. ], tot_loss[loss=0.3036, ctc_loss=0.2378, cr_loss=0.4396, attn_decoder_loss=0.3011, over 5774405.61 frames. ], batch size: 111, lr: 2.49e-02, grad_scale: 4.0 +2024-09-16 19:58:32,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=68100.0, ans=0.07 +2024-09-16 19:58:35,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.64 vs. limit=22.5 +2024-09-16 19:58:45,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=68140.0, ans=0.05 +2024-09-16 19:59:32,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=68260.0, ans=0.125 +2024-09-16 19:59:34,953 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.730e+01 1.161e+02 1.261e+02 1.469e+02 4.535e+02, threshold=2.521e+02, percent-clipped=3.0 +2024-09-16 19:59:36,488 INFO [train.py:1198] (0/2) Epoch 4, batch 3500, loss[loss=0.264, ctc_loss=0.1955, cr_loss=0.3531, attn_decoder_loss=0.2638, over 29730.00 frames. ], tot_loss[loss=0.3024, ctc_loss=0.2366, cr_loss=0.4379, attn_decoder_loss=0.2999, over 5777282.41 frames. ], batch size: 72, lr: 2.49e-02, grad_scale: 8.0 +2024-09-16 20:00:19,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=68420.0, ans=0.025 +2024-09-16 20:00:24,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=68420.0, ans=0.0 +2024-09-16 20:00:33,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.85 vs. limit=15.0 +2024-09-16 20:00:44,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.whiten.whitening_limit, batch_count=68460.0, ans=12.0 +2024-09-16 20:00:50,568 INFO [train.py:1198] (0/2) Epoch 4, batch 3550, loss[loss=0.3144, ctc_loss=0.2413, cr_loss=0.4716, attn_decoder_loss=0.3121, over 29711.00 frames. ], tot_loss[loss=0.3023, ctc_loss=0.2364, cr_loss=0.4378, attn_decoder_loss=0.2999, over 5782695.47 frames. ], batch size: 89, lr: 2.48e-02, grad_scale: 4.0 +2024-09-16 20:01:05,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=68540.0, ans=0.1 +2024-09-16 20:01:06,245 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.26 vs. limit=15.0 +2024-09-16 20:01:13,162 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.77 vs. limit=15.0 +2024-09-16 20:01:34,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.42 vs. limit=22.5 +2024-09-16 20:01:34,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.63 vs. limit=15.0 +2024-09-16 20:01:42,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=68620.0, ans=0.125 +2024-09-16 20:01:51,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=68660.0, ans=0.2 +2024-09-16 20:01:52,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=68660.0, ans=0.0 +2024-09-16 20:02:00,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=68660.0, ans=0.1 +2024-09-16 20:02:01,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=68660.0, ans=0.07 +2024-09-16 20:02:01,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=68660.0, ans=0.2 +2024-09-16 20:02:03,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_na.min_abs, batch_count=68700.0, ans=0.02 +2024-09-16 20:02:04,418 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.002e+02 1.275e+02 1.372e+02 1.558e+02 6.376e+02, threshold=2.743e+02, percent-clipped=5.0 +2024-09-16 20:02:04,445 INFO [train.py:1198] (0/2) Epoch 4, batch 3600, loss[loss=0.2921, ctc_loss=0.2195, cr_loss=0.4281, attn_decoder_loss=0.2906, over 29511.00 frames. ], tot_loss[loss=0.3026, ctc_loss=0.2364, cr_loss=0.4389, attn_decoder_loss=0.3002, over 5792285.99 frames. ], batch size: 77, lr: 2.48e-02, grad_scale: 8.0 +2024-09-16 20:02:12,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.27 vs. limit=15.0 +2024-09-16 20:02:43,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=68780.0, ans=0.125 +2024-09-16 20:02:56,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=68820.0, ans=0.125 +2024-09-16 20:02:56,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=68820.0, ans=0.025 +2024-09-16 20:02:59,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.60 vs. limit=22.5 +2024-09-16 20:03:04,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=68820.0, ans=0.125 +2024-09-16 20:03:17,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=68860.0, ans=0.125 +2024-09-16 20:03:22,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.43 vs. limit=15.0 +2024-09-16 20:03:23,378 INFO [train.py:1198] (0/2) Epoch 4, batch 3650, loss[loss=0.3255, ctc_loss=0.2536, cr_loss=0.4594, attn_decoder_loss=0.3233, over 29512.00 frames. ], tot_loss[loss=0.3019, ctc_loss=0.2355, cr_loss=0.438, attn_decoder_loss=0.2996, over 5794387.69 frames. ], batch size: 90, lr: 2.48e-02, grad_scale: 4.0 +2024-09-16 20:03:26,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=68900.0, ans=0.2 +2024-09-16 20:04:08,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=69020.0, ans=0.125 +2024-09-16 20:04:14,751 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.00 vs. limit=15.0 +2024-09-16 20:04:24,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=69060.0, ans=0.2 +2024-09-16 20:04:25,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=69060.0, ans=0.125 +2024-09-16 20:04:36,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=69100.0, ans=0.2 +2024-09-16 20:04:37,636 INFO [train.py:1198] (0/2) Epoch 4, batch 3700, loss[loss=0.3252, ctc_loss=0.2521, cr_loss=0.4647, attn_decoder_loss=0.3229, over 29701.00 frames. ], tot_loss[loss=0.3022, ctc_loss=0.2361, cr_loss=0.4387, attn_decoder_loss=0.2998, over 5804578.68 frames. ], batch size: 84, lr: 2.47e-02, grad_scale: 8.0 +2024-09-16 20:04:37,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=69100.0, ans=0.125 +2024-09-16 20:04:39,094 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.031e+02 1.266e+02 1.378e+02 1.578e+02 2.388e+02, threshold=2.756e+02, percent-clipped=0.0 +2024-09-16 20:04:51,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=69140.0, ans=0.125 +2024-09-16 20:05:05,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=69180.0, ans=0.0 +2024-09-16 20:05:28,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=69220.0, ans=0.125 +2024-09-16 20:05:35,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.26 vs. limit=12.0 +2024-09-16 20:05:42,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=69260.0, ans=0.0 +2024-09-16 20:05:44,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=69260.0, ans=0.125 +2024-09-16 20:05:47,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=69260.0, ans=0.1 +2024-09-16 20:05:48,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=69260.0, ans=0.1 +2024-09-16 20:05:50,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=69300.0, ans=0.07 +2024-09-16 20:05:51,403 INFO [train.py:1198] (0/2) Epoch 4, batch 3750, loss[loss=0.2733, ctc_loss=0.2143, cr_loss=0.4082, attn_decoder_loss=0.2708, over 29351.00 frames. ], tot_loss[loss=0.3019, ctc_loss=0.2356, cr_loss=0.4382, attn_decoder_loss=0.2995, over 5808907.57 frames. ], batch size: 67, lr: 2.47e-02, grad_scale: 4.0 +2024-09-16 20:05:59,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.51 vs. limit=22.5 +2024-09-16 20:06:02,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=69300.0, ans=0.95 +2024-09-16 20:06:15,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=69340.0, ans=0.125 +2024-09-16 20:06:16,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=69340.0, ans=0.125 +2024-09-16 20:06:20,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.05 vs. limit=22.5 +2024-09-16 20:06:30,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=69380.0, ans=0.2 +2024-09-16 20:06:36,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=69420.0, ans=0.0 +2024-09-16 20:06:49,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=69460.0, ans=0.0 +2024-09-16 20:06:54,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=69460.0, ans=0.125 +2024-09-16 20:06:55,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=69460.0, ans=0.125 +2024-09-16 20:07:04,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=69500.0, ans=0.0 +2024-09-16 20:07:05,694 INFO [train.py:1198] (0/2) Epoch 4, batch 3800, loss[loss=0.3235, ctc_loss=0.2534, cr_loss=0.4673, attn_decoder_loss=0.3209, over 29631.00 frames. ], tot_loss[loss=0.3017, ctc_loss=0.2357, cr_loss=0.4378, attn_decoder_loss=0.2993, over 5799624.18 frames. ], batch size: 86, lr: 2.47e-02, grad_scale: 8.0 +2024-09-16 20:07:08,684 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.301e+02 1.423e+02 1.744e+02 6.965e+02, threshold=2.846e+02, percent-clipped=5.0 +2024-09-16 20:07:10,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=69500.0, ans=0.025 +2024-09-16 20:07:14,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.37 vs. limit=12.0 +2024-09-16 20:07:22,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=69540.0, ans=0.0 +2024-09-16 20:07:23,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.93 vs. limit=12.0 +2024-09-16 20:08:14,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=69660.0, ans=0.2 +2024-09-16 20:08:20,414 INFO [train.py:1198] (0/2) Epoch 4, batch 3850, loss[loss=0.31, ctc_loss=0.2409, cr_loss=0.4424, attn_decoder_loss=0.3078, over 29249.00 frames. ], tot_loss[loss=0.3015, ctc_loss=0.2352, cr_loss=0.4377, attn_decoder_loss=0.2991, over 5813150.67 frames. ], batch size: 100, lr: 2.47e-02, grad_scale: 4.0 +2024-09-16 20:08:41,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=69740.0, ans=0.1 +2024-09-16 20:08:50,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=69740.0, ans=0.125 +2024-09-16 20:09:13,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.17 vs. limit=15.0 +2024-09-16 20:09:27,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=69860.0, ans=0.125 +2024-09-16 20:09:32,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.92 vs. limit=15.0 +2024-09-16 20:09:37,512 INFO [train.py:1198] (0/2) Epoch 4, batch 3900, loss[loss=0.3073, ctc_loss=0.2323, cr_loss=0.4315, attn_decoder_loss=0.3061, over 29626.00 frames. ], tot_loss[loss=0.3019, ctc_loss=0.2352, cr_loss=0.439, attn_decoder_loss=0.2995, over 5817436.61 frames. ], batch size: 86, lr: 2.46e-02, grad_scale: 8.0 +2024-09-16 20:09:41,920 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.044e+02 1.221e+02 1.343e+02 1.520e+02 2.719e+02, threshold=2.686e+02, percent-clipped=0.0 +2024-09-16 20:09:52,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=69940.0, ans=0.1 +2024-09-16 20:09:55,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=69940.0, ans=0.125 +2024-09-16 20:10:06,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.86 vs. limit=22.5 +2024-09-16 20:10:07,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=69980.0, ans=0.125 +2024-09-16 20:10:13,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=69980.0, ans=0.125 +2024-09-16 20:10:18,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=69980.0, ans=0.125 +2024-09-16 20:10:23,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=70020.0, ans=0.1 +2024-09-16 20:10:51,473 INFO [train.py:1198] (0/2) Epoch 4, batch 3950, loss[loss=0.3095, ctc_loss=0.2454, cr_loss=0.4451, attn_decoder_loss=0.3068, over 29495.00 frames. ], tot_loss[loss=0.3021, ctc_loss=0.2349, cr_loss=0.439, attn_decoder_loss=0.2998, over 5836725.79 frames. ], batch size: 97, lr: 2.46e-02, grad_scale: 4.0 +2024-09-16 20:11:15,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=70140.0, ans=0.0 +2024-09-16 20:12:05,013 INFO [train.py:1198] (0/2) Epoch 4, batch 4000, loss[loss=0.2775, ctc_loss=0.2083, cr_loss=0.391, attn_decoder_loss=0.2765, over 29519.00 frames. ], tot_loss[loss=0.3024, ctc_loss=0.2356, cr_loss=0.4396, attn_decoder_loss=0.3, over 5813578.41 frames. ], batch size: 74, lr: 2.46e-02, grad_scale: 8.0 +2024-09-16 20:12:12,304 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.084e+02 1.309e+02 1.435e+02 1.653e+02 3.484e+02, threshold=2.870e+02, percent-clipped=1.0 +2024-09-16 20:12:24,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=70340.0, ans=0.125 +2024-09-16 20:12:38,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.17 vs. limit=22.5 +2024-09-16 20:12:39,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=70380.0, ans=0.2 +2024-09-16 20:12:51,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=70420.0, ans=0.125 +2024-09-16 20:13:20,981 INFO [train.py:1198] (0/2) Epoch 4, batch 4050, loss[loss=0.3387, ctc_loss=0.2978, cr_loss=0.4322, attn_decoder_loss=0.3336, over 20236.00 frames. ], tot_loss[loss=0.3024, ctc_loss=0.2358, cr_loss=0.4395, attn_decoder_loss=0.3, over 5797307.39 frames. ], batch size: 210, lr: 2.45e-02, grad_scale: 4.0 +2024-09-16 20:13:22,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=70500.0, ans=0.025 +2024-09-16 20:13:35,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=70540.0, ans=0.125 +2024-09-16 20:13:47,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.83 vs. limit=15.0 +2024-09-16 20:13:57,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.18 vs. limit=15.0 +2024-09-16 20:13:59,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=70580.0, ans=0.125 +2024-09-16 20:14:02,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=70580.0, ans=0.125 +2024-09-16 20:14:16,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=70620.0, ans=0.125 +2024-09-16 20:14:35,708 INFO [train.py:1198] (0/2) Epoch 4, batch 4100, loss[loss=0.3259, ctc_loss=0.2577, cr_loss=0.4745, attn_decoder_loss=0.3229, over 29523.00 frames. ], tot_loss[loss=0.3021, ctc_loss=0.2355, cr_loss=0.4387, attn_decoder_loss=0.2997, over 5793031.28 frames. ], batch size: 90, lr: 2.45e-02, grad_scale: 8.0 +2024-09-16 20:14:40,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=70700.0, ans=0.125 +2024-09-16 20:14:42,854 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.753e+01 1.273e+02 1.617e+02 1.999e+02 3.514e+02, threshold=3.235e+02, percent-clipped=2.0 +2024-09-16 20:14:45,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=70700.0, ans=0.125 +2024-09-16 20:14:53,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=70740.0, ans=0.125 +2024-09-16 20:14:56,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=70740.0, ans=0.1 +2024-09-16 20:14:56,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=70740.0, ans=0.0 +2024-09-16 20:14:59,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=70740.0, ans=0.2 +2024-09-16 20:14:59,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=70740.0, ans=0.125 +2024-09-16 20:15:18,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.09 vs. limit=15.0 +2024-09-16 20:15:24,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=70820.0, ans=0.2 +2024-09-16 20:15:32,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=70860.0, ans=0.125 +2024-09-16 20:15:46,726 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.29 vs. limit=22.5 +2024-09-16 20:15:47,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=70900.0, ans=0.0 +2024-09-16 20:15:48,991 INFO [train.py:1198] (0/2) Epoch 4, batch 4150, loss[loss=0.2917, ctc_loss=0.2223, cr_loss=0.4434, attn_decoder_loss=0.2896, over 29498.00 frames. ], tot_loss[loss=0.3015, ctc_loss=0.2349, cr_loss=0.4387, attn_decoder_loss=0.2991, over 5798794.48 frames. ], batch size: 77, lr: 2.45e-02, grad_scale: 4.0 +2024-09-16 20:16:17,023 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:16:22,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=70980.0, ans=0.0 +2024-09-16 20:16:23,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.85 vs. limit=15.0 +2024-09-16 20:17:02,282 INFO [train.py:1198] (0/2) Epoch 4, batch 4200, loss[loss=0.3331, ctc_loss=0.2659, cr_loss=0.5061, attn_decoder_loss=0.3293, over 29475.00 frames. ], tot_loss[loss=0.3022, ctc_loss=0.2356, cr_loss=0.4397, attn_decoder_loss=0.2999, over 5800202.84 frames. ], batch size: 90, lr: 2.44e-02, grad_scale: 8.0 +2024-09-16 20:17:12,670 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.067e+02 1.228e+02 1.369e+02 1.579e+02 3.524e+02, threshold=2.737e+02, percent-clipped=1.0 +2024-09-16 20:17:14,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=71100.0, ans=0.125 +2024-09-16 20:17:37,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=71180.0, ans=0.125 +2024-09-16 20:17:37,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=71180.0, ans=0.025 +2024-09-16 20:17:44,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71180.0, ans=0.1 +2024-09-16 20:17:52,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.47 vs. limit=10.0 +2024-09-16 20:18:05,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=71260.0, ans=0.125 +2024-09-16 20:18:14,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71260.0, ans=0.1 +2024-09-16 20:18:17,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=71300.0, ans=0.125 +2024-09-16 20:18:18,324 INFO [train.py:1198] (0/2) Epoch 4, batch 4250, loss[loss=0.2736, ctc_loss=0.2027, cr_loss=0.4045, attn_decoder_loss=0.2724, over 29476.00 frames. ], tot_loss[loss=0.3024, ctc_loss=0.2355, cr_loss=0.4403, attn_decoder_loss=0.3001, over 5806161.72 frames. ], batch size: 74, lr: 2.44e-02, grad_scale: 4.0 +2024-09-16 20:18:34,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=71340.0, ans=0.125 +2024-09-16 20:18:39,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=12.0 +2024-09-16 20:18:40,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=71340.0, ans=0.125 +2024-09-16 20:18:44,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=71340.0, ans=0.0 +2024-09-16 20:18:52,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=71380.0, ans=0.125 +2024-09-16 20:19:01,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=71420.0, ans=0.0 +2024-09-16 20:19:06,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.88 vs. limit=15.0 +2024-09-16 20:19:15,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=71460.0, ans=0.125 +2024-09-16 20:19:17,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=71460.0, ans=0.125 +2024-09-16 20:19:31,897 INFO [train.py:1198] (0/2) Epoch 4, batch 4300, loss[loss=0.3072, ctc_loss=0.2285, cr_loss=0.4363, attn_decoder_loss=0.3062, over 29522.00 frames. ], tot_loss[loss=0.3025, ctc_loss=0.2353, cr_loss=0.4402, attn_decoder_loss=0.3002, over 5795291.55 frames. ], batch size: 87, lr: 2.44e-02, grad_scale: 8.0 +2024-09-16 20:19:43,711 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.834e+01 1.271e+02 1.418e+02 1.620e+02 3.004e+02, threshold=2.836e+02, percent-clipped=2.0 +2024-09-16 20:20:10,768 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:20:15,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=71620.0, ans=0.04949747468305833 +2024-09-16 20:20:47,050 INFO [train.py:1198] (0/2) Epoch 4, batch 4350, loss[loss=0.319, ctc_loss=0.2552, cr_loss=0.4449, attn_decoder_loss=0.3162, over 29504.00 frames. ], tot_loss[loss=0.3061, ctc_loss=0.2386, cr_loss=0.4449, attn_decoder_loss=0.3037, over 5796999.32 frames. ], batch size: 97, lr: 2.44e-02, grad_scale: 4.0 +2024-09-16 20:21:02,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=71740.0, ans=0.125 +2024-09-16 20:21:10,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=71740.0, ans=0.125 +2024-09-16 20:21:18,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=71780.0, ans=0.1 +2024-09-16 20:21:43,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=71820.0, ans=0.1 +2024-09-16 20:21:49,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=71860.0, ans=0.07 +2024-09-16 20:22:00,933 INFO [train.py:1198] (0/2) Epoch 4, batch 4400, loss[loss=0.3134, ctc_loss=0.2468, cr_loss=0.4459, attn_decoder_loss=0.3108, over 27052.00 frames. ], tot_loss[loss=0.3094, ctc_loss=0.2423, cr_loss=0.4487, attn_decoder_loss=0.3069, over 5768442.04 frames. ], batch size: 124, lr: 2.43e-02, grad_scale: 8.0 +2024-09-16 20:22:04,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=71900.0, ans=0.0 +2024-09-16 20:22:14,013 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.841e+01 1.227e+02 1.349e+02 1.608e+02 3.095e+02, threshold=2.698e+02, percent-clipped=2.0 +2024-09-16 20:22:24,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=71940.0, ans=0.125 +2024-09-16 20:22:24,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=71940.0, ans=0.125 +2024-09-16 20:22:32,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=18.21 vs. limit=15.0 +2024-09-16 20:22:51,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=72020.0, ans=0.2 +2024-09-16 20:22:53,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=72020.0, ans=0.0 +2024-09-16 20:22:59,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=72060.0, ans=0.125 +2024-09-16 20:23:03,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=72060.0, ans=0.5 +2024-09-16 20:23:16,018 INFO [train.py:1198] (0/2) Epoch 4, batch 4450, loss[loss=0.3401, ctc_loss=0.2984, cr_loss=0.4585, attn_decoder_loss=0.3346, over 20209.00 frames. ], tot_loss[loss=0.3137, ctc_loss=0.2495, cr_loss=0.4516, attn_decoder_loss=0.3108, over 5582465.30 frames. ], batch size: 211, lr: 2.43e-02, grad_scale: 4.0 +2024-09-16 20:23:21,504 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.38 vs. limit=15.0 +2024-09-16 20:23:30,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=72140.0, ans=0.2 +2024-09-16 20:23:37,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=72140.0, ans=0.125 +2024-09-16 20:24:11,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=72220.0, ans=0.2 +2024-09-16 20:24:23,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=72260.0, ans=0.125 +2024-09-16 20:24:23,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=72260.0, ans=0.125 +2024-09-16 20:24:31,033 INFO [train.py:1198] (0/2) Epoch 4, batch 4500, loss[loss=0.3459, ctc_loss=0.311, cr_loss=0.4643, attn_decoder_loss=0.3395, over 19924.00 frames. ], tot_loss[loss=0.3181, ctc_loss=0.2586, cr_loss=0.4523, attn_decoder_loss=0.3147, over 5240234.20 frames. ], batch size: 209, lr: 2.43e-02, grad_scale: 8.0 +2024-09-16 20:24:31,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.60 vs. limit=10.0 +2024-09-16 20:24:46,110 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.245e+02 1.357e+02 1.541e+02 2.817e+02, threshold=2.714e+02, percent-clipped=1.0 +2024-09-16 20:25:07,878 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-4.pt +2024-09-16 20:26:05,472 INFO [train.py:1198] (0/2) Epoch 5, batch 0, loss[loss=0.36, ctc_loss=0.2251, cr_loss=0.4235, attn_decoder_loss=0.3656, over 29603.00 frames. ], tot_loss[loss=0.36, ctc_loss=0.2251, cr_loss=0.4235, attn_decoder_loss=0.3656, over 29603.00 frames. ], batch size: 73, lr: 2.26e-02, grad_scale: 4.0 +2024-09-16 20:26:05,473 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 20:26:24,507 INFO [train.py:1230] (0/2) Epoch 5, validation: loss=0.2407, ctc_loss=0.07934, cr_loss=4.486e-15, attn_decoder_loss=0.2587, over 944034.00 frames. +2024-09-16 20:26:24,507 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 20:26:25,963 WARNING [optim.py:503] (0/2) Scaling gradients by 0.06828752905130386, model_norm_threshold=271.39923095703125 +2024-09-16 20:26:26,172 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.embed.weight with proportion 0.28, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=4.372e+06, grad_sumsq=1.717e+06, orig_rms_sq=2.546e+00 +2024-09-16 20:26:30,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=72400.0, ans=0.0 +2024-09-16 20:26:33,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.58 vs. limit=12.0 +2024-09-16 20:26:38,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=72440.0, ans=0.125 +2024-09-16 20:26:50,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=72440.0, ans=0.05 +2024-09-16 20:26:51,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=72440.0, ans=0.2 +2024-09-16 20:27:33,793 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.47 vs. limit=15.0 +2024-09-16 20:27:40,505 INFO [train.py:1198] (0/2) Epoch 5, batch 50, loss[loss=0.2756, ctc_loss=0.2171, cr_loss=0.3776, attn_decoder_loss=0.2737, over 29389.00 frames. ], tot_loss[loss=0.3114, ctc_loss=0.2444, cr_loss=0.4431, attn_decoder_loss=0.309, over 1267501.49 frames. ], batch size: 70, lr: 2.26e-02, grad_scale: 4.0 +2024-09-16 20:27:44,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.33 vs. limit=15.0 +2024-09-16 20:27:46,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=72600.0, ans=0.125 +2024-09-16 20:28:00,176 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.83 vs. limit=6.0 +2024-09-16 20:28:14,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=72680.0, ans=0.125 +2024-09-16 20:28:19,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=72680.0, ans=0.125 +2024-09-16 20:28:26,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=72720.0, ans=0.125 +2024-09-16 20:28:37,067 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.048e+02 1.241e+02 1.473e+02 1.722e+02 3.974e+03, threshold=2.946e+02, percent-clipped=9.0 +2024-09-16 20:28:38,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=72720.0, ans=0.125 +2024-09-16 20:28:58,586 INFO [train.py:1198] (0/2) Epoch 5, batch 100, loss[loss=0.2852, ctc_loss=0.2172, cr_loss=0.4136, attn_decoder_loss=0.2836, over 29548.00 frames. ], tot_loss[loss=0.3094, ctc_loss=0.242, cr_loss=0.445, attn_decoder_loss=0.307, over 2251938.46 frames. ], batch size: 76, lr: 2.25e-02, grad_scale: 8.0 +2024-09-16 20:29:07,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=72800.0, ans=0.125 +2024-09-16 20:29:09,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=72800.0, ans=0.0 +2024-09-16 20:29:22,984 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.83 vs. limit=15.0 +2024-09-16 20:29:27,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.45 vs. limit=12.0 +2024-09-16 20:29:28,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=72880.0, ans=0.0 +2024-09-16 20:29:35,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.73 vs. limit=22.5 +2024-09-16 20:29:49,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=72920.0, ans=0.2 +2024-09-16 20:30:01,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=72960.0, ans=0.1 +2024-09-16 20:30:14,763 INFO [train.py:1198] (0/2) Epoch 5, batch 150, loss[loss=0.2597, ctc_loss=0.1866, cr_loss=0.39, attn_decoder_loss=0.2592, over 29440.00 frames. ], tot_loss[loss=0.3033, ctc_loss=0.2352, cr_loss=0.439, attn_decoder_loss=0.3011, over 3046597.02 frames. ], batch size: 70, lr: 2.25e-02, grad_scale: 4.0 +2024-09-16 20:30:15,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=73000.0, ans=0.2 +2024-09-16 20:30:25,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=73000.0, ans=0.2 +2024-09-16 20:30:50,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.97 vs. limit=6.0 +2024-09-16 20:31:01,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=73120.0, ans=0.2 +2024-09-16 20:31:09,879 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.201e+01 1.170e+02 1.302e+02 1.516e+02 3.725e+02, threshold=2.604e+02, percent-clipped=3.0 +2024-09-16 20:31:13,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=73160.0, ans=0.125 +2024-09-16 20:31:26,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=73160.0, ans=0.125 +2024-09-16 20:31:29,486 INFO [train.py:1198] (0/2) Epoch 5, batch 200, loss[loss=0.3297, ctc_loss=0.2631, cr_loss=0.4635, attn_decoder_loss=0.3268, over 27661.00 frames. ], tot_loss[loss=0.3017, ctc_loss=0.2337, cr_loss=0.4383, attn_decoder_loss=0.2995, over 3658303.73 frames. ], batch size: 125, lr: 2.25e-02, grad_scale: 8.0 +2024-09-16 20:32:20,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=73320.0, ans=0.025 +2024-09-16 20:32:24,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=73320.0, ans=0.125 +2024-09-16 20:32:29,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=73320.0, ans=0.1 +2024-09-16 20:32:38,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=73360.0, ans=0.125 +2024-09-16 20:32:45,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=73400.0, ans=0.0 +2024-09-16 20:32:46,884 INFO [train.py:1198] (0/2) Epoch 5, batch 250, loss[loss=0.3133, ctc_loss=0.2491, cr_loss=0.4666, attn_decoder_loss=0.3101, over 29182.00 frames. ], tot_loss[loss=0.3009, ctc_loss=0.2326, cr_loss=0.4386, attn_decoder_loss=0.2987, over 4141098.65 frames. ], batch size: 100, lr: 2.25e-02, grad_scale: 4.0 +2024-09-16 20:33:16,519 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.05 vs. limit=10.0 +2024-09-16 20:33:18,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=73480.0, ans=0.125 +2024-09-16 20:33:44,206 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.445e+01 1.169e+02 1.336e+02 1.491e+02 2.357e+02, threshold=2.672e+02, percent-clipped=0.0 +2024-09-16 20:34:04,727 INFO [train.py:1198] (0/2) Epoch 5, batch 300, loss[loss=0.3048, ctc_loss=0.225, cr_loss=0.4492, attn_decoder_loss=0.3037, over 29511.00 frames. ], tot_loss[loss=0.2994, ctc_loss=0.2305, cr_loss=0.4359, attn_decoder_loss=0.2973, over 4509059.38 frames. ], batch size: 92, lr: 2.24e-02, grad_scale: 8.0 +2024-09-16 20:34:24,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=73640.0, ans=0.125 +2024-09-16 20:34:26,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=73640.0, ans=0.0 +2024-09-16 20:34:29,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=73640.0, ans=0.0 +2024-09-16 20:34:35,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=73680.0, ans=0.125 +2024-09-16 20:34:46,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.25 vs. limit=22.5 +2024-09-16 20:34:56,396 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.25 vs. limit=15.0 +2024-09-16 20:35:19,695 INFO [train.py:1198] (0/2) Epoch 5, batch 350, loss[loss=0.274, ctc_loss=0.2039, cr_loss=0.3937, attn_decoder_loss=0.273, over 29303.00 frames. ], tot_loss[loss=0.2993, ctc_loss=0.23, cr_loss=0.4368, attn_decoder_loss=0.2973, over 4793841.99 frames. ], batch size: 71, lr: 2.24e-02, grad_scale: 4.0 +2024-09-16 20:35:20,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=73800.0, ans=0.2 +2024-09-16 20:35:27,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=73800.0, ans=0.125 +2024-09-16 20:35:43,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=73840.0, ans=0.1 +2024-09-16 20:35:52,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=73880.0, ans=0.1 +2024-09-16 20:35:59,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=73880.0, ans=0.1 +2024-09-16 20:36:01,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=73880.0, ans=0.0 +2024-09-16 20:36:20,664 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.525e+01 1.174e+02 1.354e+02 1.521e+02 2.144e+02, threshold=2.708e+02, percent-clipped=0.0 +2024-09-16 20:36:30,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=73960.0, ans=0.1 +2024-09-16 20:36:37,087 INFO [train.py:1198] (0/2) Epoch 5, batch 400, loss[loss=0.3037, ctc_loss=0.2353, cr_loss=0.4362, attn_decoder_loss=0.3016, over 29725.00 frames. ], tot_loss[loss=0.2987, ctc_loss=0.2294, cr_loss=0.4355, attn_decoder_loss=0.2967, over 5024252.52 frames. ], batch size: 82, lr: 2.24e-02, grad_scale: 8.0 +2024-09-16 20:37:15,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=74080.0, ans=0.125 +2024-09-16 20:37:22,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=74120.0, ans=0.125 +2024-09-16 20:37:23,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=74120.0, ans=0.025 +2024-09-16 20:37:40,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=17.36 vs. limit=15.0 +2024-09-16 20:37:55,367 INFO [train.py:1198] (0/2) Epoch 5, batch 450, loss[loss=0.2979, ctc_loss=0.2177, cr_loss=0.439, attn_decoder_loss=0.297, over 29701.00 frames. ], tot_loss[loss=0.2984, ctc_loss=0.2291, cr_loss=0.4351, attn_decoder_loss=0.2964, over 5188388.82 frames. ], batch size: 83, lr: 2.24e-02, grad_scale: 4.0 +2024-09-16 20:37:57,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=74200.0, ans=0.0 +2024-09-16 20:38:09,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=74240.0, ans=0.125 +2024-09-16 20:38:36,070 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.67 vs. limit=15.0 +2024-09-16 20:38:39,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.86 vs. limit=15.0 +2024-09-16 20:38:56,441 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.507e+01 1.148e+02 1.317e+02 1.480e+02 2.097e+02, threshold=2.634e+02, percent-clipped=0.0 +2024-09-16 20:38:56,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=74360.0, ans=0.1 +2024-09-16 20:39:04,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=74360.0, ans=0.125 +2024-09-16 20:39:11,822 INFO [train.py:1198] (0/2) Epoch 5, batch 500, loss[loss=0.3175, ctc_loss=0.2443, cr_loss=0.4736, attn_decoder_loss=0.3151, over 29436.00 frames. ], tot_loss[loss=0.2975, ctc_loss=0.2281, cr_loss=0.4349, attn_decoder_loss=0.2955, over 5330729.26 frames. ], batch size: 94, lr: 2.23e-02, grad_scale: 8.0 +2024-09-16 20:39:15,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.96 vs. limit=22.5 +2024-09-16 20:39:18,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=74400.0, ans=0.125 +2024-09-16 20:39:59,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=74520.0, ans=0.0 +2024-09-16 20:39:59,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=74520.0, ans=0.125 +2024-09-16 20:40:11,874 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=16.34 vs. limit=15.0 +2024-09-16 20:40:29,206 INFO [train.py:1198] (0/2) Epoch 5, batch 550, loss[loss=0.3232, ctc_loss=0.2606, cr_loss=0.4924, attn_decoder_loss=0.3193, over 28860.00 frames. ], tot_loss[loss=0.298, ctc_loss=0.2286, cr_loss=0.4357, attn_decoder_loss=0.296, over 5423826.30 frames. ], batch size: 104, lr: 2.23e-02, grad_scale: 2.0 +2024-09-16 20:40:31,961 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.99 vs. limit=22.5 +2024-09-16 20:40:52,942 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.66 vs. limit=6.0 +2024-09-16 20:40:56,878 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:40:58,991 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.13 vs. limit=15.0 +2024-09-16 20:41:10,389 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:41:20,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=74720.0, ans=0.125 +2024-09-16 20:41:32,710 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.970e+01 1.190e+02 1.363e+02 1.590e+02 5.102e+02, threshold=2.726e+02, percent-clipped=4.0 +2024-09-16 20:41:44,819 INFO [train.py:1198] (0/2) Epoch 5, batch 600, loss[loss=0.3291, ctc_loss=0.2634, cr_loss=0.475, attn_decoder_loss=0.3258, over 29319.00 frames. ], tot_loss[loss=0.2981, ctc_loss=0.2286, cr_loss=0.436, attn_decoder_loss=0.2961, over 5510557.17 frames. ], batch size: 100, lr: 2.23e-02, grad_scale: 4.0 +2024-09-16 20:41:50,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=74800.0, ans=0.125 +2024-09-16 20:41:54,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=74800.0, ans=0.125 +2024-09-16 20:42:02,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=74840.0, ans=0.0 +2024-09-16 20:42:06,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=74840.0, ans=0.125 +2024-09-16 20:42:11,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.54 vs. limit=15.0 +2024-09-16 20:42:27,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=74880.0, ans=0.0 +2024-09-16 20:42:57,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=74960.0, ans=0.025 +2024-09-16 20:43:00,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.84 vs. limit=6.0 +2024-09-16 20:43:02,185 INFO [train.py:1198] (0/2) Epoch 5, batch 650, loss[loss=0.3022, ctc_loss=0.2348, cr_loss=0.433, attn_decoder_loss=0.3001, over 29753.00 frames. ], tot_loss[loss=0.2969, ctc_loss=0.2269, cr_loss=0.435, attn_decoder_loss=0.295, over 5587604.57 frames. ], batch size: 81, lr: 2.23e-02, grad_scale: 4.0 +2024-09-16 20:43:02,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=75000.0, ans=0.1 +2024-09-16 20:43:05,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=75000.0, ans=0.2 +2024-09-16 20:43:07,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=75000.0, ans=0.025 +2024-09-16 20:43:15,335 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.74 vs. limit=15.0 +2024-09-16 20:43:19,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=75040.0, ans=0.125 +2024-09-16 20:43:26,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.63 vs. limit=6.0 +2024-09-16 20:43:31,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=75040.0, ans=0.125 +2024-09-16 20:43:33,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=75080.0, ans=0.2 +2024-09-16 20:43:44,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=75080.0, ans=0.125 +2024-09-16 20:43:47,881 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=8.12 vs. limit=15.0 +2024-09-16 20:43:56,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.58 vs. limit=15.0 +2024-09-16 20:44:07,671 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.399e+01 1.145e+02 1.260e+02 1.468e+02 2.396e+02, threshold=2.520e+02, percent-clipped=0.0 +2024-09-16 20:44:20,243 INFO [train.py:1198] (0/2) Epoch 5, batch 700, loss[loss=0.2613, ctc_loss=0.1816, cr_loss=0.3775, attn_decoder_loss=0.2618, over 29514.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.2268, cr_loss=0.4352, attn_decoder_loss=0.2954, over 5639490.16 frames. ], batch size: 76, lr: 2.22e-02, grad_scale: 8.0 +2024-09-16 20:44:21,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=15.0 +2024-09-16 20:44:25,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.79 vs. limit=6.0 +2024-09-16 20:44:40,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=75240.0, ans=0.125 +2024-09-16 20:44:47,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=75240.0, ans=0.0 +2024-09-16 20:45:22,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=75360.0, ans=0.125 +2024-09-16 20:45:35,699 INFO [train.py:1198] (0/2) Epoch 5, batch 750, loss[loss=0.3036, ctc_loss=0.226, cr_loss=0.4137, attn_decoder_loss=0.3031, over 29707.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2264, cr_loss=0.4352, attn_decoder_loss=0.2949, over 5677474.16 frames. ], batch size: 82, lr: 2.22e-02, grad_scale: 4.0 +2024-09-16 20:45:45,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=75400.0, ans=0.5 +2024-09-16 20:46:01,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.30 vs. limit=6.0 +2024-09-16 20:46:12,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=75480.0, ans=0.0 +2024-09-16 20:46:19,787 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.93 vs. limit=8.0 +2024-09-16 20:46:37,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.03 vs. limit=15.0 +2024-09-16 20:46:42,564 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.394e+01 1.181e+02 1.291e+02 1.489e+02 2.242e+02, threshold=2.582e+02, percent-clipped=0.0 +2024-09-16 20:46:42,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=75560.0, ans=0.125 +2024-09-16 20:46:49,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.17 vs. limit=10.0 +2024-09-16 20:46:49,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.26 vs. limit=10.0 +2024-09-16 20:46:53,301 INFO [train.py:1198] (0/2) Epoch 5, batch 800, loss[loss=0.268, ctc_loss=0.1995, cr_loss=0.3974, attn_decoder_loss=0.2668, over 29558.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2263, cr_loss=0.4348, attn_decoder_loss=0.295, over 5707171.99 frames. ], batch size: 73, lr: 2.22e-02, grad_scale: 8.0 +2024-09-16 20:46:58,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=17.50 vs. limit=15.0 +2024-09-16 20:47:24,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.72 vs. limit=15.0 +2024-09-16 20:47:39,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=75720.0, ans=0.1 +2024-09-16 20:48:10,235 INFO [train.py:1198] (0/2) Epoch 5, batch 850, loss[loss=0.3071, ctc_loss=0.2251, cr_loss=0.463, attn_decoder_loss=0.3059, over 29724.00 frames. ], tot_loss[loss=0.2962, ctc_loss=0.2254, cr_loss=0.4337, attn_decoder_loss=0.2944, over 5734826.92 frames. ], batch size: 89, lr: 2.22e-02, grad_scale: 4.0 +2024-09-16 20:48:13,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=75800.0, ans=0.0 +2024-09-16 20:48:14,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=75800.0, ans=0.125 +2024-09-16 20:48:29,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=75840.0, ans=0.04949747468305833 +2024-09-16 20:48:32,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=75840.0, ans=0.2 +2024-09-16 20:49:01,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=75920.0, ans=0.0 +2024-09-16 20:49:06,955 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.56 vs. limit=6.0 +2024-09-16 20:49:16,710 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.020e+02 1.208e+02 1.339e+02 1.559e+02 5.118e+02, threshold=2.679e+02, percent-clipped=4.0 +2024-09-16 20:49:26,156 INFO [train.py:1198] (0/2) Epoch 5, batch 900, loss[loss=0.2647, ctc_loss=0.1894, cr_loss=0.3993, attn_decoder_loss=0.2642, over 29604.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2262, cr_loss=0.4336, attn_decoder_loss=0.2951, over 5737745.23 frames. ], batch size: 73, lr: 2.21e-02, grad_scale: 8.0 +2024-09-16 20:50:02,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=76080.0, ans=0.125 +2024-09-16 20:50:15,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=76120.0, ans=0.125 +2024-09-16 20:50:16,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=76120.0, ans=0.125 +2024-09-16 20:50:32,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=76160.0, ans=0.015 +2024-09-16 20:50:39,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=76160.0, ans=0.0 +2024-09-16 20:50:43,189 INFO [train.py:1198] (0/2) Epoch 5, batch 950, loss[loss=0.2705, ctc_loss=0.1946, cr_loss=0.4079, attn_decoder_loss=0.2699, over 29513.00 frames. ], tot_loss[loss=0.2973, ctc_loss=0.2266, cr_loss=0.4345, attn_decoder_loss=0.2955, over 5740328.23 frames. ], batch size: 74, lr: 2.21e-02, grad_scale: 4.0 +2024-09-16 20:51:01,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=76240.0, ans=0.0 +2024-09-16 20:51:02,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=76240.0, ans=0.125 +2024-09-16 20:51:22,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.52 vs. limit=15.0 +2024-09-16 20:51:47,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=76360.0, ans=0.125 +2024-09-16 20:51:48,758 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:51:52,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.45 vs. limit=15.0 +2024-09-16 20:51:52,956 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.976e+01 1.191e+02 1.361e+02 1.638e+02 5.772e+02, threshold=2.722e+02, percent-clipped=5.0 +2024-09-16 20:52:00,423 INFO [train.py:1198] (0/2) Epoch 5, batch 1000, loss[loss=0.2927, ctc_loss=0.2326, cr_loss=0.4673, attn_decoder_loss=0.289, over 29506.00 frames. ], tot_loss[loss=0.298, ctc_loss=0.2278, cr_loss=0.4355, attn_decoder_loss=0.2961, over 5735086.50 frames. ], batch size: 77, lr: 2.21e-02, grad_scale: 8.0 +2024-09-16 20:52:18,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=76440.0, ans=0.0 +2024-09-16 20:52:50,577 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:52:56,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=76520.0, ans=0.125 +2024-09-16 20:53:15,774 INFO [train.py:1198] (0/2) Epoch 5, batch 1050, loss[loss=0.3105, ctc_loss=0.2346, cr_loss=0.4399, attn_decoder_loss=0.3091, over 29691.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.2268, cr_loss=0.4352, attn_decoder_loss=0.2954, over 5742993.06 frames. ], batch size: 85, lr: 2.21e-02, grad_scale: 4.0 +2024-09-16 20:53:25,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=76600.0, ans=0.1 +2024-09-16 20:53:42,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=76640.0, ans=0.125 +2024-09-16 20:53:51,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=76680.0, ans=0.2 +2024-09-16 20:53:59,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=76680.0, ans=0.125 +2024-09-16 20:54:22,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.07 vs. limit=15.0 +2024-09-16 20:54:23,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=76760.0, ans=0.125 +2024-09-16 20:54:27,134 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.620e+01 1.158e+02 1.276e+02 1.580e+02 2.597e+02, threshold=2.552e+02, percent-clipped=0.0 +2024-09-16 20:54:33,692 INFO [train.py:1198] (0/2) Epoch 5, batch 1100, loss[loss=0.3017, ctc_loss=0.2303, cr_loss=0.4439, attn_decoder_loss=0.2997, over 29463.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.2265, cr_loss=0.4347, attn_decoder_loss=0.2954, over 5755162.66 frames. ], batch size: 78, lr: 2.20e-02, grad_scale: 8.0 +2024-09-16 20:54:50,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=76840.0, ans=0.5 +2024-09-16 20:55:02,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.51 vs. limit=12.0 +2024-09-16 20:55:04,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=76880.0, ans=0.125 +2024-09-16 20:55:07,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=76880.0, ans=0.1 +2024-09-16 20:55:11,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=76880.0, ans=0.125 +2024-09-16 20:55:13,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.83 vs. limit=15.0 +2024-09-16 20:55:27,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=76920.0, ans=0.0 +2024-09-16 20:55:36,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=76960.0, ans=0.09899494936611666 +2024-09-16 20:55:41,616 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.70 vs. limit=15.0 +2024-09-16 20:55:42,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=76960.0, ans=0.125 +2024-09-16 20:55:51,261 INFO [train.py:1198] (0/2) Epoch 5, batch 1150, loss[loss=0.2733, ctc_loss=0.1975, cr_loss=0.4112, attn_decoder_loss=0.2726, over 29435.00 frames. ], tot_loss[loss=0.2969, ctc_loss=0.2261, cr_loss=0.4351, attn_decoder_loss=0.2951, over 5754822.06 frames. ], batch size: 78, lr: 2.20e-02, grad_scale: 4.0 +2024-09-16 20:56:14,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=77040.0, ans=0.125 +2024-09-16 20:56:22,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=77080.0, ans=10.0 +2024-09-16 20:56:22,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=77080.0, ans=0.125 +2024-09-16 20:56:23,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=77080.0, ans=0.125 +2024-09-16 20:56:23,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=77080.0, ans=0.05 +2024-09-16 20:56:26,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=77080.0, ans=0.125 +2024-09-16 20:57:02,552 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.643e+01 1.177e+02 1.305e+02 1.494e+02 2.713e+02, threshold=2.610e+02, percent-clipped=1.0 +2024-09-16 20:57:04,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=27.37 vs. limit=22.5 +2024-09-16 20:57:06,992 INFO [train.py:1198] (0/2) Epoch 5, batch 1200, loss[loss=0.2983, ctc_loss=0.2311, cr_loss=0.4384, attn_decoder_loss=0.296, over 29676.00 frames. ], tot_loss[loss=0.298, ctc_loss=0.2275, cr_loss=0.4367, attn_decoder_loss=0.2961, over 5747308.57 frames. ], batch size: 85, lr: 2.20e-02, grad_scale: 8.0 +2024-09-16 20:57:13,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=77200.0, ans=0.0 +2024-09-16 20:57:16,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.73 vs. limit=15.0 +2024-09-16 20:57:36,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=77240.0, ans=0.0 +2024-09-16 20:57:47,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=77280.0, ans=0.2 +2024-09-16 20:57:52,406 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.30 vs. limit=15.0 +2024-09-16 20:58:00,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=77320.0, ans=0.125 +2024-09-16 20:58:24,359 INFO [train.py:1198] (0/2) Epoch 5, batch 1250, loss[loss=0.3145, ctc_loss=0.2437, cr_loss=0.4705, attn_decoder_loss=0.3119, over 29542.00 frames. ], tot_loss[loss=0.2978, ctc_loss=0.2268, cr_loss=0.4366, attn_decoder_loss=0.296, over 5774005.75 frames. ], batch size: 92, lr: 2.20e-02, grad_scale: 4.0 +2024-09-16 20:58:27,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=77400.0, ans=0.125 +2024-09-16 20:59:01,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=77480.0, ans=0.125 +2024-09-16 20:59:16,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=77520.0, ans=0.125 +2024-09-16 20:59:21,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=77520.0, ans=0.0 +2024-09-16 20:59:28,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=77560.0, ans=0.2 +2024-09-16 20:59:31,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=77560.0, ans=0.125 +2024-09-16 20:59:34,677 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:59:38,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.029e+02 1.175e+02 1.290e+02 1.503e+02 2.372e+02, threshold=2.579e+02, percent-clipped=0.0 +2024-09-16 20:59:42,133 INFO [train.py:1198] (0/2) Epoch 5, batch 1300, loss[loss=0.3205, ctc_loss=0.2454, cr_loss=0.4524, attn_decoder_loss=0.3188, over 28363.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.2265, cr_loss=0.4366, attn_decoder_loss=0.2954, over 5780005.18 frames. ], batch size: 111, lr: 2.19e-02, grad_scale: 8.0 +2024-09-16 20:59:47,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=77600.0, ans=0.025 +2024-09-16 21:00:26,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=77720.0, ans=22.5 +2024-09-16 21:00:30,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.64 vs. limit=12.0 +2024-09-16 21:00:35,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.01 vs. limit=15.0 +2024-09-16 21:00:37,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=77720.0, ans=0.09899494936611666 +2024-09-16 21:00:57,181 INFO [train.py:1198] (0/2) Epoch 5, batch 1350, loss[loss=0.3037, ctc_loss=0.2265, cr_loss=0.4312, attn_decoder_loss=0.3027, over 29759.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2255, cr_loss=0.4357, attn_decoder_loss=0.2949, over 5797560.95 frames. ], batch size: 81, lr: 2.19e-02, grad_scale: 4.0 +2024-09-16 21:00:57,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=77800.0, ans=0.125 +2024-09-16 21:01:04,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=77800.0, ans=0.125 +2024-09-16 21:01:08,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.08 vs. limit=22.5 +2024-09-16 21:01:47,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=77920.0, ans=0.0 +2024-09-16 21:02:07,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=77960.0, ans=0.09899494936611666 +2024-09-16 21:02:12,850 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.348e+01 1.157e+02 1.281e+02 1.429e+02 2.166e+02, threshold=2.563e+02, percent-clipped=0.0 +2024-09-16 21:02:14,437 INFO [train.py:1198] (0/2) Epoch 5, batch 1400, loss[loss=0.2609, ctc_loss=0.1969, cr_loss=0.3994, attn_decoder_loss=0.2591, over 29580.00 frames. ], tot_loss[loss=0.296, ctc_loss=0.2245, cr_loss=0.4344, attn_decoder_loss=0.2943, over 5808507.55 frames. ], batch size: 69, lr: 2.19e-02, grad_scale: 8.0 +2024-09-16 21:02:25,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=78000.0, ans=0.125 +2024-09-16 21:02:41,651 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:02:57,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=78080.0, ans=0.125 +2024-09-16 21:03:00,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=78120.0, ans=0.0 +2024-09-16 21:03:23,137 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:03:30,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=78200.0, ans=0.0 +2024-09-16 21:03:31,880 INFO [train.py:1198] (0/2) Epoch 5, batch 1450, loss[loss=0.3127, ctc_loss=0.233, cr_loss=0.4682, attn_decoder_loss=0.3111, over 29417.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.226, cr_loss=0.4366, attn_decoder_loss=0.2954, over 5803977.94 frames. ], batch size: 94, lr: 2.19e-02, grad_scale: 4.0 +2024-09-16 21:03:32,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.19 vs. limit=15.0 +2024-09-16 21:03:38,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=78200.0, ans=0.125 +2024-09-16 21:03:48,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=78240.0, ans=0.125 +2024-09-16 21:03:55,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.42 vs. limit=15.0 +2024-09-16 21:04:39,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=78360.0, ans=0.125 +2024-09-16 21:04:39,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=78360.0, ans=0.125 +2024-09-16 21:04:47,756 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.066e+02 1.240e+02 1.382e+02 1.635e+02 6.361e+02, threshold=2.763e+02, percent-clipped=6.0 +2024-09-16 21:04:47,778 INFO [train.py:1198] (0/2) Epoch 5, batch 1500, loss[loss=0.327, ctc_loss=0.2575, cr_loss=0.4849, attn_decoder_loss=0.3239, over 29623.00 frames. ], tot_loss[loss=0.2975, ctc_loss=0.2258, cr_loss=0.436, attn_decoder_loss=0.2958, over 5803510.91 frames. ], batch size: 86, lr: 2.18e-02, grad_scale: 8.0 +2024-09-16 21:04:49,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=78400.0, ans=0.0 +2024-09-16 21:05:03,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.87 vs. limit=15.0 +2024-09-16 21:05:21,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=78480.0, ans=0.07 +2024-09-16 21:05:22,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=78480.0, ans=0.125 +2024-09-16 21:05:34,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=78520.0, ans=0.2 +2024-09-16 21:05:46,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=78520.0, ans=0.125 +2024-09-16 21:05:47,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=78520.0, ans=0.0 +2024-09-16 21:06:05,492 INFO [train.py:1198] (0/2) Epoch 5, batch 1550, loss[loss=0.3236, ctc_loss=0.2546, cr_loss=0.4816, attn_decoder_loss=0.3205, over 29500.00 frames. ], tot_loss[loss=0.2973, ctc_loss=0.2257, cr_loss=0.4354, attn_decoder_loss=0.2955, over 5779019.23 frames. ], batch size: 90, lr: 2.18e-02, grad_scale: 4.0 +2024-09-16 21:06:08,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=78600.0, ans=0.07 +2024-09-16 21:06:13,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=78600.0, ans=0.04949747468305833 +2024-09-16 21:06:38,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=15.0 +2024-09-16 21:06:46,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=78680.0, ans=0.1 +2024-09-16 21:06:47,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=78680.0, ans=0.125 +2024-09-16 21:06:48,263 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.40 vs. limit=15.0 +2024-09-16 21:06:58,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=78720.0, ans=0.125 +2024-09-16 21:07:15,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=78760.0, ans=0.125 +2024-09-16 21:07:22,403 INFO [train.py:1198] (0/2) Epoch 5, batch 1600, loss[loss=0.2908, ctc_loss=0.2159, cr_loss=0.4255, attn_decoder_loss=0.2897, over 29666.00 frames. ], tot_loss[loss=0.297, ctc_loss=0.2257, cr_loss=0.4345, attn_decoder_loss=0.2952, over 5761145.43 frames. ], batch size: 85, lr: 2.18e-02, grad_scale: 8.0 +2024-09-16 21:07:23,871 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.848e+01 1.266e+02 1.474e+02 1.762e+02 4.006e+02, threshold=2.948e+02, percent-clipped=2.0 +2024-09-16 21:07:28,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=78800.0, ans=0.125 +2024-09-16 21:07:28,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=78800.0, ans=0.125 +2024-09-16 21:07:42,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.74 vs. limit=15.0 +2024-09-16 21:07:50,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=78840.0, ans=0.125 +2024-09-16 21:08:37,894 INFO [train.py:1198] (0/2) Epoch 5, batch 1650, loss[loss=0.3068, ctc_loss=0.2261, cr_loss=0.4277, attn_decoder_loss=0.3062, over 29721.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2257, cr_loss=0.4342, attn_decoder_loss=0.295, over 5755215.23 frames. ], batch size: 89, lr: 2.18e-02, grad_scale: 4.0 +2024-09-16 21:08:50,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=79000.0, ans=0.125 +2024-09-16 21:08:56,342 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:09:30,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=79120.0, ans=15.0 +2024-09-16 21:09:55,749 INFO [train.py:1198] (0/2) Epoch 5, batch 1700, loss[loss=0.2725, ctc_loss=0.2092, cr_loss=0.396, attn_decoder_loss=0.2708, over 29581.00 frames. ], tot_loss[loss=0.296, ctc_loss=0.2247, cr_loss=0.4332, attn_decoder_loss=0.2943, over 5778410.91 frames. ], batch size: 69, lr: 2.17e-02, grad_scale: 8.0 +2024-09-16 21:10:00,289 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.635e+01 1.159e+02 1.263e+02 1.450e+02 2.662e+02, threshold=2.527e+02, percent-clipped=0.0 +2024-09-16 21:10:02,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=79200.0, ans=0.0 +2024-09-16 21:10:10,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.94 vs. limit=15.0 +2024-09-16 21:10:11,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=79240.0, ans=0.125 +2024-09-16 21:10:15,906 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:11:07,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=79360.0, ans=0.025 +2024-09-16 21:11:12,998 INFO [train.py:1198] (0/2) Epoch 5, batch 1750, loss[loss=0.2599, ctc_loss=0.1877, cr_loss=0.4078, attn_decoder_loss=0.2588, over 29317.00 frames. ], tot_loss[loss=0.2952, ctc_loss=0.2236, cr_loss=0.4328, attn_decoder_loss=0.2935, over 5786438.33 frames. ], batch size: 67, lr: 2.17e-02, grad_scale: 4.0 +2024-09-16 21:11:21,705 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=3.99 vs. limit=15.0 +2024-09-16 21:11:25,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=79400.0, ans=0.0 +2024-09-16 21:11:40,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=79440.0, ans=0.125 +2024-09-16 21:11:45,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=79480.0, ans=0.125 +2024-09-16 21:11:54,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=79480.0, ans=0.0 +2024-09-16 21:12:28,474 INFO [train.py:1198] (0/2) Epoch 5, batch 1800, loss[loss=0.2906, ctc_loss=0.2098, cr_loss=0.4493, attn_decoder_loss=0.2896, over 29690.00 frames. ], tot_loss[loss=0.2953, ctc_loss=0.2236, cr_loss=0.4335, attn_decoder_loss=0.2936, over 5790513.24 frames. ], batch size: 83, lr: 2.17e-02, grad_scale: 8.0 +2024-09-16 21:12:33,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=79600.0, ans=0.125 +2024-09-16 21:12:34,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.637e+01 1.099e+02 1.224e+02 1.443e+02 2.616e+02, threshold=2.449e+02, percent-clipped=2.0 +2024-09-16 21:12:45,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=79640.0, ans=0.0 +2024-09-16 21:12:49,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=79640.0, ans=0.125 +2024-09-16 21:12:50,720 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.36 vs. limit=12.0 +2024-09-16 21:13:05,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=79680.0, ans=0.2 +2024-09-16 21:13:08,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=79680.0, ans=0.125 +2024-09-16 21:13:21,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=79720.0, ans=0.125 +2024-09-16 21:13:30,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.72 vs. limit=8.0 +2024-09-16 21:13:39,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.60 vs. limit=6.0 +2024-09-16 21:13:44,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=79800.0, ans=0.1 +2024-09-16 21:13:46,155 INFO [train.py:1198] (0/2) Epoch 5, batch 1850, loss[loss=0.3008, ctc_loss=0.2244, cr_loss=0.4033, attn_decoder_loss=0.3004, over 29617.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2228, cr_loss=0.4327, attn_decoder_loss=0.2932, over 5797810.63 frames. ], batch size: 86, lr: 2.17e-02, grad_scale: 4.0 +2024-09-16 21:13:55,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.97 vs. limit=6.0 +2024-09-16 21:14:04,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.97 vs. limit=22.5 +2024-09-16 21:14:05,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=79840.0, ans=0.04949747468305833 +2024-09-16 21:14:13,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=79840.0, ans=0.125 +2024-09-16 21:14:25,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=79880.0, ans=0.125 +2024-09-16 21:14:44,459 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-16 21:14:52,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=79960.0, ans=0.125 +2024-09-16 21:14:54,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=79960.0, ans=0.0 +2024-09-16 21:15:01,855 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-20000.pt +2024-09-16 21:15:10,196 INFO [train.py:1198] (0/2) Epoch 5, batch 1900, loss[loss=0.3072, ctc_loss=0.2247, cr_loss=0.4381, attn_decoder_loss=0.3066, over 29673.00 frames. ], tot_loss[loss=0.2961, ctc_loss=0.2242, cr_loss=0.435, attn_decoder_loss=0.2944, over 5805376.54 frames. ], batch size: 89, lr: 2.16e-02, grad_scale: 8.0 +2024-09-16 21:15:17,676 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.933e+01 1.145e+02 1.241e+02 1.387e+02 2.102e+02, threshold=2.481e+02, percent-clipped=0.0 +2024-09-16 21:15:18,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=80000.0, ans=0.125 +2024-09-16 21:15:19,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.35 vs. limit=6.0 +2024-09-16 21:15:28,894 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:15:36,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=80040.0, ans=0.125 +2024-09-16 21:15:36,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=80040.0, ans=0.025 +2024-09-16 21:15:39,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=80080.0, ans=0.2 +2024-09-16 21:15:51,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=80080.0, ans=0.1 +2024-09-16 21:15:59,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=80120.0, ans=0.125 +2024-09-16 21:16:03,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=80120.0, ans=0.0 +2024-09-16 21:16:07,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.39 vs. limit=15.0 +2024-09-16 21:16:26,183 INFO [train.py:1198] (0/2) Epoch 5, batch 1950, loss[loss=0.2947, ctc_loss=0.2309, cr_loss=0.4213, attn_decoder_loss=0.2924, over 29454.00 frames. ], tot_loss[loss=0.2974, ctc_loss=0.2253, cr_loss=0.4373, attn_decoder_loss=0.2957, over 5820839.45 frames. ], batch size: 78, lr: 2.16e-02, grad_scale: 4.0 +2024-09-16 21:16:31,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.01 vs. limit=10.0 +2024-09-16 21:16:32,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=80200.0, ans=0.95 +2024-09-16 21:16:40,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=80240.0, ans=0.125 +2024-09-16 21:16:58,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=80280.0, ans=0.125 +2024-09-16 21:17:19,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=80320.0, ans=10.0 +2024-09-16 21:17:22,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=80320.0, ans=0.125 +2024-09-16 21:17:28,866 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:17:30,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=80360.0, ans=0.0 +2024-09-16 21:17:39,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=80360.0, ans=0.125 +2024-09-16 21:17:43,609 INFO [train.py:1198] (0/2) Epoch 5, batch 2000, loss[loss=0.2614, ctc_loss=0.1883, cr_loss=0.4055, attn_decoder_loss=0.2605, over 29360.00 frames. ], tot_loss[loss=0.2978, ctc_loss=0.226, cr_loss=0.4376, attn_decoder_loss=0.2961, over 5800116.05 frames. ], batch size: 67, lr: 2.16e-02, grad_scale: 8.0 +2024-09-16 21:17:52,706 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.796e+01 1.236e+02 1.402e+02 1.608e+02 2.421e+02, threshold=2.804e+02, percent-clipped=0.0 +2024-09-16 21:18:19,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.53 vs. limit=22.5 +2024-09-16 21:18:26,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.93 vs. limit=15.0 +2024-09-16 21:18:38,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=80520.0, ans=0.0 +2024-09-16 21:18:50,170 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.29 vs. limit=12.0 +2024-09-16 21:18:56,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=80560.0, ans=0.125 +2024-09-16 21:19:01,084 INFO [train.py:1198] (0/2) Epoch 5, batch 2050, loss[loss=0.2642, ctc_loss=0.1991, cr_loss=0.3997, attn_decoder_loss=0.2625, over 29427.00 frames. ], tot_loss[loss=0.2964, ctc_loss=0.225, cr_loss=0.4361, attn_decoder_loss=0.2947, over 5791050.79 frames. ], batch size: 70, lr: 2.16e-02, grad_scale: 4.0 +2024-09-16 21:19:01,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=80600.0, ans=0.125 +2024-09-16 21:19:31,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=80680.0, ans=0.0 +2024-09-16 21:19:38,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.20 vs. limit=22.5 +2024-09-16 21:19:47,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.53 vs. limit=22.5 +2024-09-16 21:20:17,211 INFO [train.py:1198] (0/2) Epoch 5, batch 2100, loss[loss=0.3001, ctc_loss=0.2248, cr_loss=0.4456, attn_decoder_loss=0.2985, over 29760.00 frames. ], tot_loss[loss=0.296, ctc_loss=0.2242, cr_loss=0.4353, attn_decoder_loss=0.2944, over 5802766.02 frames. ], batch size: 81, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:20:18,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=80800.0, ans=0.125 +2024-09-16 21:20:20,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=80800.0, ans=0.1 +2024-09-16 21:20:22,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=12.55 vs. limit=15.0 +2024-09-16 21:20:24,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=80800.0, ans=0.125 +2024-09-16 21:20:26,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=80800.0, ans=0.0 +2024-09-16 21:20:27,415 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.761e+01 1.220e+02 1.373e+02 1.548e+02 8.609e+02, threshold=2.746e+02, percent-clipped=3.0 +2024-09-16 21:21:08,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=80920.0, ans=0.125 +2024-09-16 21:21:15,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=80960.0, ans=0.125 +2024-09-16 21:21:19,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.79 vs. limit=12.0 +2024-09-16 21:21:31,961 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.41 vs. limit=22.5 +2024-09-16 21:21:34,125 INFO [train.py:1198] (0/2) Epoch 5, batch 2150, loss[loss=0.2946, ctc_loss=0.2335, cr_loss=0.4647, attn_decoder_loss=0.2911, over 29448.00 frames. ], tot_loss[loss=0.295, ctc_loss=0.2227, cr_loss=0.4345, attn_decoder_loss=0.2933, over 5816769.33 frames. ], batch size: 78, lr: 2.15e-02, grad_scale: 4.0 +2024-09-16 21:21:34,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=81000.0, ans=0.125 +2024-09-16 21:21:41,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=81000.0, ans=0.025 +2024-09-16 21:21:57,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=81040.0, ans=0.125 +2024-09-16 21:21:59,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.37 vs. limit=15.0 +2024-09-16 21:22:02,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.19 vs. limit=6.0 +2024-09-16 21:22:03,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=81040.0, ans=0.1 +2024-09-16 21:22:21,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=81120.0, ans=0.1 +2024-09-16 21:22:51,825 INFO [train.py:1198] (0/2) Epoch 5, batch 2200, loss[loss=0.2988, ctc_loss=0.2244, cr_loss=0.428, attn_decoder_loss=0.2976, over 29633.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2227, cr_loss=0.4344, attn_decoder_loss=0.2931, over 5813003.44 frames. ], batch size: 86, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:23:02,278 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.674e+01 1.183e+02 1.300e+02 1.517e+02 2.352e+02, threshold=2.600e+02, percent-clipped=0.0 +2024-09-16 21:23:12,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=81240.0, ans=0.125 +2024-09-16 21:23:44,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=81320.0, ans=0.0 +2024-09-16 21:23:44,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=81320.0, ans=0.0 +2024-09-16 21:23:52,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=81360.0, ans=0.2 +2024-09-16 21:24:02,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.02 vs. limit=22.5 +2024-09-16 21:24:07,126 INFO [train.py:1198] (0/2) Epoch 5, batch 2250, loss[loss=0.3039, ctc_loss=0.2287, cr_loss=0.4323, attn_decoder_loss=0.3026, over 29738.00 frames. ], tot_loss[loss=0.2947, ctc_loss=0.2225, cr_loss=0.4344, attn_decoder_loss=0.293, over 5811258.53 frames. ], batch size: 82, lr: 2.15e-02, grad_scale: 4.0 +2024-09-16 21:24:27,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.97 vs. limit=22.5 +2024-09-16 21:24:36,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.60 vs. limit=15.0 +2024-09-16 21:24:38,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=81480.0, ans=0.0 +2024-09-16 21:24:43,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=81480.0, ans=0.0 +2024-09-16 21:25:23,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=81600.0, ans=0.125 +2024-09-16 21:25:23,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=81600.0, ans=0.07 +2024-09-16 21:25:25,073 INFO [train.py:1198] (0/2) Epoch 5, batch 2300, loss[loss=0.2613, ctc_loss=0.1896, cr_loss=0.3921, attn_decoder_loss=0.2606, over 29712.00 frames. ], tot_loss[loss=0.2939, ctc_loss=0.2214, cr_loss=0.4326, attn_decoder_loss=0.2923, over 5799344.04 frames. ], batch size: 72, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:25:38,312 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.849e+01 1.191e+02 1.337e+02 1.602e+02 2.823e+02, threshold=2.675e+02, percent-clipped=3.0 +2024-09-16 21:26:15,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=81720.0, ans=0.125 +2024-09-16 21:26:25,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=81760.0, ans=0.07 +2024-09-16 21:26:33,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=81760.0, ans=0.125 +2024-09-16 21:26:38,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=81760.0, ans=0.2 +2024-09-16 21:26:42,305 INFO [train.py:1198] (0/2) Epoch 5, batch 2350, loss[loss=0.3043, ctc_loss=0.2254, cr_loss=0.4429, attn_decoder_loss=0.3032, over 29702.00 frames. ], tot_loss[loss=0.2939, ctc_loss=0.2214, cr_loss=0.4324, attn_decoder_loss=0.2923, over 5804360.97 frames. ], batch size: 83, lr: 2.14e-02, grad_scale: 4.0 +2024-09-16 21:26:51,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=81800.0, ans=0.0 +2024-09-16 21:27:11,153 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:27:31,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.03 vs. limit=15.0 +2024-09-16 21:27:38,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=81920.0, ans=0.125 +2024-09-16 21:27:44,636 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:27:58,047 INFO [train.py:1198] (0/2) Epoch 5, batch 2400, loss[loss=0.2922, ctc_loss=0.2211, cr_loss=0.4334, attn_decoder_loss=0.2905, over 29522.00 frames. ], tot_loss[loss=0.2946, ctc_loss=0.2221, cr_loss=0.4337, attn_decoder_loss=0.293, over 5806924.88 frames. ], batch size: 76, lr: 2.14e-02, grad_scale: 8.0 +2024-09-16 21:28:12,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82040.0, ans=0.1 +2024-09-16 21:28:13,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.326e+01 1.225e+02 1.360e+02 1.581e+02 2.424e+02, threshold=2.721e+02, percent-clipped=0.0 +2024-09-16 21:28:27,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=82080.0, ans=0.125 +2024-09-16 21:28:47,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.25 vs. limit=15.0 +2024-09-16 21:28:52,532 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.40 vs. limit=8.0 +2024-09-16 21:29:14,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=82200.0, ans=10.0 +2024-09-16 21:29:15,780 INFO [train.py:1198] (0/2) Epoch 5, batch 2450, loss[loss=0.3028, ctc_loss=0.2221, cr_loss=0.4488, attn_decoder_loss=0.3018, over 29699.00 frames. ], tot_loss[loss=0.2958, ctc_loss=0.2235, cr_loss=0.4346, attn_decoder_loss=0.2942, over 5783629.11 frames. ], batch size: 82, lr: 2.14e-02, grad_scale: 4.0 +2024-09-16 21:30:01,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.78 vs. limit=6.0 +2024-09-16 21:30:24,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.76 vs. limit=15.0 +2024-09-16 21:30:33,951 INFO [train.py:1198] (0/2) Epoch 5, batch 2500, loss[loss=0.3028, ctc_loss=0.2177, cr_loss=0.4498, attn_decoder_loss=0.3023, over 29625.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2231, cr_loss=0.4348, attn_decoder_loss=0.2941, over 5793403.88 frames. ], batch size: 86, lr: 2.14e-02, grad_scale: 8.0 +2024-09-16 21:30:50,576 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.347e+01 1.183e+02 1.324e+02 1.493e+02 3.213e+02, threshold=2.647e+02, percent-clipped=2.0 +2024-09-16 21:31:03,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.79 vs. limit=12.0 +2024-09-16 21:31:15,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=82480.0, ans=0.125 +2024-09-16 21:31:19,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.51 vs. limit=15.0 +2024-09-16 21:31:26,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.53 vs. limit=15.0 +2024-09-16 21:31:31,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82520.0, ans=0.1 +2024-09-16 21:31:49,243 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.17 vs. limit=12.0 +2024-09-16 21:31:49,667 INFO [train.py:1198] (0/2) Epoch 5, batch 2550, loss[loss=0.2563, ctc_loss=0.1842, cr_loss=0.3872, attn_decoder_loss=0.2557, over 29335.00 frames. ], tot_loss[loss=0.2954, ctc_loss=0.2226, cr_loss=0.4339, attn_decoder_loss=0.2938, over 5797732.99 frames. ], batch size: 67, lr: 2.13e-02, grad_scale: 4.0 +2024-09-16 21:31:51,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=82600.0, ans=0.125 +2024-09-16 21:32:04,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=82640.0, ans=0.0 +2024-09-16 21:32:06,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=82640.0, ans=0.125 +2024-09-16 21:32:32,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=82680.0, ans=15.0 +2024-09-16 21:32:34,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82720.0, ans=0.1 +2024-09-16 21:32:45,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=82720.0, ans=0.1 +2024-09-16 21:32:48,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=82760.0, ans=0.0 +2024-09-16 21:33:02,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=82760.0, ans=0.2 +2024-09-16 21:33:04,972 INFO [train.py:1198] (0/2) Epoch 5, batch 2600, loss[loss=0.2778, ctc_loss=0.2062, cr_loss=0.4515, attn_decoder_loss=0.2757, over 29437.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2229, cr_loss=0.4352, attn_decoder_loss=0.2941, over 5793953.49 frames. ], batch size: 78, lr: 2.13e-02, grad_scale: 8.0 +2024-09-16 21:33:25,236 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.991e+01 1.177e+02 1.349e+02 1.549e+02 3.059e+02, threshold=2.698e+02, percent-clipped=1.0 +2024-09-16 21:33:33,907 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.50 vs. limit=15.0 +2024-09-16 21:33:37,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=82880.0, ans=0.015 +2024-09-16 21:33:45,904 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=22.22 vs. limit=22.5 +2024-09-16 21:33:56,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=82920.0, ans=0.05 +2024-09-16 21:34:02,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=82920.0, ans=0.5 +2024-09-16 21:34:03,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=82920.0, ans=0.2 +2024-09-16 21:34:22,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.75 vs. limit=10.0 +2024-09-16 21:34:24,551 INFO [train.py:1198] (0/2) Epoch 5, batch 2650, loss[loss=0.3197, ctc_loss=0.2515, cr_loss=0.4735, attn_decoder_loss=0.3168, over 29224.00 frames. ], tot_loss[loss=0.2962, ctc_loss=0.2234, cr_loss=0.4363, attn_decoder_loss=0.2946, over 5799687.67 frames. ], batch size: 100, lr: 2.13e-02, grad_scale: 4.0 +2024-09-16 21:34:25,468 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.98 vs. limit=22.5 +2024-09-16 21:34:56,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=83080.0, ans=0.07 +2024-09-16 21:35:09,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=83120.0, ans=0.05 +2024-09-16 21:35:22,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.63 vs. limit=6.0 +2024-09-16 21:35:24,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.44 vs. limit=22.5 +2024-09-16 21:35:28,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=83160.0, ans=0.1 +2024-09-16 21:35:31,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.55 vs. limit=15.0 +2024-09-16 21:35:39,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=83200.0, ans=0.0 +2024-09-16 21:35:40,224 INFO [train.py:1198] (0/2) Epoch 5, batch 2700, loss[loss=0.3106, ctc_loss=0.2316, cr_loss=0.4461, attn_decoder_loss=0.3094, over 29495.00 frames. ], tot_loss[loss=0.2961, ctc_loss=0.2232, cr_loss=0.4364, attn_decoder_loss=0.2945, over 5795819.39 frames. ], batch size: 87, lr: 2.13e-02, grad_scale: 8.0 +2024-09-16 21:35:55,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=83240.0, ans=0.0 +2024-09-16 21:35:58,611 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:35:59,713 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.832e+01 1.218e+02 1.347e+02 1.527e+02 8.149e+02, threshold=2.695e+02, percent-clipped=3.0 +2024-09-16 21:36:33,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=83320.0, ans=0.0 +2024-09-16 21:36:36,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=83320.0, ans=0.125 +2024-09-16 21:36:41,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=83360.0, ans=0.125 +2024-09-16 21:36:41,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.14 vs. limit=10.0 +2024-09-16 21:36:56,081 INFO [train.py:1198] (0/2) Epoch 5, batch 2750, loss[loss=0.2842, ctc_loss=0.2145, cr_loss=0.4354, attn_decoder_loss=0.2823, over 29523.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2222, cr_loss=0.4356, attn_decoder_loss=0.2932, over 5793569.30 frames. ], batch size: 75, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:37:46,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.91 vs. limit=15.0 +2024-09-16 21:38:15,626 INFO [train.py:1198] (0/2) Epoch 5, batch 2800, loss[loss=0.347, ctc_loss=0.3086, cr_loss=0.456, attn_decoder_loss=0.3411, over 20284.00 frames. ], tot_loss[loss=0.2951, ctc_loss=0.2226, cr_loss=0.4352, attn_decoder_loss=0.2935, over 5775675.79 frames. ], batch size: 210, lr: 2.12e-02, grad_scale: 8.0 +2024-09-16 21:38:24,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=83600.0, ans=0.125 +2024-09-16 21:38:25,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.56 vs. limit=15.0 +2024-09-16 21:38:29,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=83640.0, ans=0.125 +2024-09-16 21:38:36,672 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.137e+02 1.290e+02 1.487e+02 2.968e+02, threshold=2.580e+02, percent-clipped=1.0 +2024-09-16 21:38:37,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.02 vs. limit=12.0 +2024-09-16 21:38:43,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=83640.0, ans=0.1 +2024-09-16 21:38:44,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=83680.0, ans=0.125 +2024-09-16 21:38:49,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=83680.0, ans=0.0 +2024-09-16 21:38:52,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=83680.0, ans=0.0 +2024-09-16 21:39:03,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.59 vs. limit=22.5 +2024-09-16 21:39:16,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.76 vs. limit=22.5 +2024-09-16 21:39:23,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=83760.0, ans=0.125 +2024-09-16 21:39:30,816 INFO [train.py:1198] (0/2) Epoch 5, batch 2850, loss[loss=0.3021, ctc_loss=0.2321, cr_loss=0.4524, attn_decoder_loss=0.2998, over 29502.00 frames. ], tot_loss[loss=0.2958, ctc_loss=0.2232, cr_loss=0.4361, attn_decoder_loss=0.2941, over 5760697.28 frames. ], batch size: 77, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:39:41,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=83800.0, ans=0.1 +2024-09-16 21:39:41,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=83800.0, ans=0.0 +2024-09-16 21:39:44,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=13.71 vs. limit=15.0 +2024-09-16 21:39:50,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=83840.0, ans=0.0 +2024-09-16 21:39:57,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.whiten.whitening_limit, batch_count=83840.0, ans=12.0 +2024-09-16 21:40:12,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=83880.0, ans=0.0 +2024-09-16 21:40:25,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=83920.0, ans=0.125 +2024-09-16 21:40:25,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=83920.0, ans=10.0 +2024-09-16 21:40:47,090 INFO [train.py:1198] (0/2) Epoch 5, batch 2900, loss[loss=0.2966, ctc_loss=0.2265, cr_loss=0.4517, attn_decoder_loss=0.2944, over 29423.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2238, cr_loss=0.438, attn_decoder_loss=0.2952, over 5785972.92 frames. ], batch size: 79, lr: 2.12e-02, grad_scale: 8.0 +2024-09-16 21:40:58,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.87 vs. limit=10.0 +2024-09-16 21:41:04,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=84040.0, ans=0.125 +2024-09-16 21:41:13,838 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.341e+01 1.106e+02 1.208e+02 1.366e+02 2.377e+02, threshold=2.415e+02, percent-clipped=0.0 +2024-09-16 21:41:14,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.95 vs. limit=15.0 +2024-09-16 21:41:18,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=84040.0, ans=0.0 +2024-09-16 21:41:47,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=84120.0, ans=0.1 +2024-09-16 21:41:48,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=84120.0, ans=0.2 +2024-09-16 21:41:57,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=84160.0, ans=0.05 +2024-09-16 21:41:57,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=84160.0, ans=0.95 +2024-09-16 21:42:02,435 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:42:06,522 INFO [train.py:1198] (0/2) Epoch 5, batch 2950, loss[loss=0.2741, ctc_loss=0.2, cr_loss=0.4005, attn_decoder_loss=0.2734, over 29525.00 frames. ], tot_loss[loss=0.295, ctc_loss=0.2217, cr_loss=0.4356, attn_decoder_loss=0.2934, over 5783578.13 frames. ], batch size: 75, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:42:09,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=84200.0, ans=0.05 +2024-09-16 21:42:17,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.79 vs. limit=22.5 +2024-09-16 21:43:15,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.56 vs. limit=15.0 +2024-09-16 21:43:22,234 INFO [train.py:1198] (0/2) Epoch 5, batch 3000, loss[loss=0.2913, ctc_loss=0.2118, cr_loss=0.4394, attn_decoder_loss=0.2904, over 29738.00 frames. ], tot_loss[loss=0.2954, ctc_loss=0.2222, cr_loss=0.4359, attn_decoder_loss=0.2939, over 5785380.54 frames. ], batch size: 81, lr: 2.11e-02, grad_scale: 8.0 +2024-09-16 21:43:22,235 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 21:43:40,546 INFO [train.py:1230] (0/2) Epoch 5, validation: loss=0.2221, ctc_loss=0.06863, cr_loss=4.342e-15, attn_decoder_loss=0.2392, over 944034.00 frames. +2024-09-16 21:43:40,547 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 21:43:43,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=84400.0, ans=0.125 +2024-09-16 21:43:46,158 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.72 vs. limit=22.5 +2024-09-16 21:43:54,424 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:44:04,652 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.812e+01 1.181e+02 1.340e+02 1.602e+02 4.120e+02, threshold=2.680e+02, percent-clipped=4.0 +2024-09-16 21:44:06,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=84440.0, ans=0.125 +2024-09-16 21:44:08,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.40 vs. limit=15.0 +2024-09-16 21:44:09,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=84480.0, ans=0.05 +2024-09-16 21:44:13,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=84480.0, ans=0.1 +2024-09-16 21:44:27,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=84520.0, ans=0.0 +2024-09-16 21:44:41,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-16 21:44:46,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=84560.0, ans=0.125 +2024-09-16 21:44:59,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=84600.0, ans=0.125 +2024-09-16 21:45:00,285 INFO [train.py:1198] (0/2) Epoch 5, batch 3050, loss[loss=0.2795, ctc_loss=0.2143, cr_loss=0.4489, attn_decoder_loss=0.2768, over 29533.00 frames. ], tot_loss[loss=0.2961, ctc_loss=0.2233, cr_loss=0.4371, attn_decoder_loss=0.2945, over 5779246.80 frames. ], batch size: 76, lr: 2.11e-02, grad_scale: 4.0 +2024-09-16 21:45:08,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=84600.0, ans=0.025 +2024-09-16 21:45:30,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=84680.0, ans=0.0 +2024-09-16 21:45:39,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=84680.0, ans=0.025 +2024-09-16 21:45:50,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=84720.0, ans=0.05 +2024-09-16 21:46:15,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=84800.0, ans=0.95 +2024-09-16 21:46:16,184 INFO [train.py:1198] (0/2) Epoch 5, batch 3100, loss[loss=0.3234, ctc_loss=0.2576, cr_loss=0.46, attn_decoder_loss=0.3205, over 29271.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2232, cr_loss=0.4364, attn_decoder_loss=0.294, over 5778065.90 frames. ], batch size: 100, lr: 2.11e-02, grad_scale: 8.0 +2024-09-16 21:46:41,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.874e+01 1.199e+02 1.306e+02 1.594e+02 3.534e+02, threshold=2.612e+02, percent-clipped=1.0 +2024-09-16 21:46:43,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=84840.0, ans=0.2 +2024-09-16 21:46:49,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=84880.0, ans=0.1 +2024-09-16 21:47:12,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=84920.0, ans=0.125 +2024-09-16 21:47:22,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=84960.0, ans=0.125 +2024-09-16 21:47:25,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=84960.0, ans=0.1 +2024-09-16 21:47:27,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=84960.0, ans=0.2 +2024-09-16 21:47:31,537 INFO [train.py:1198] (0/2) Epoch 5, batch 3150, loss[loss=0.315, ctc_loss=0.2365, cr_loss=0.4687, attn_decoder_loss=0.3133, over 29024.00 frames. ], tot_loss[loss=0.2952, ctc_loss=0.2224, cr_loss=0.4361, attn_decoder_loss=0.2936, over 5783872.33 frames. ], batch size: 105, lr: 2.11e-02, grad_scale: 4.0 +2024-09-16 21:47:54,438 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:48:18,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.33 vs. limit=15.0 +2024-09-16 21:48:21,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=85120.0, ans=0.0 +2024-09-16 21:48:32,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.51 vs. limit=15.0 +2024-09-16 21:48:50,911 INFO [train.py:1198] (0/2) Epoch 5, batch 3200, loss[loss=0.2928, ctc_loss=0.2184, cr_loss=0.4496, attn_decoder_loss=0.2911, over 29423.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.222, cr_loss=0.4358, attn_decoder_loss=0.2932, over 5793936.77 frames. ], batch size: 79, lr: 2.10e-02, grad_scale: 8.0 +2024-09-16 21:49:04,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=85240.0, ans=0.125 +2024-09-16 21:49:12,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=85240.0, ans=0.1 +2024-09-16 21:49:18,340 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.158e+01 1.087e+02 1.227e+02 1.343e+02 2.511e+02, threshold=2.453e+02, percent-clipped=0.0 +2024-09-16 21:49:23,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=85280.0, ans=0.2 +2024-09-16 21:49:25,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.37 vs. limit=10.0 +2024-09-16 21:49:28,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=85280.0, ans=0.0 +2024-09-16 21:49:44,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=85320.0, ans=0.0 +2024-09-16 21:49:47,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=85320.0, ans=0.125 +2024-09-16 21:50:07,120 INFO [train.py:1198] (0/2) Epoch 5, batch 3250, loss[loss=0.3096, ctc_loss=0.2386, cr_loss=0.4452, attn_decoder_loss=0.3076, over 29729.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2219, cr_loss=0.4357, attn_decoder_loss=0.2932, over 5800809.10 frames. ], batch size: 84, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:50:25,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=85440.0, ans=0.1 +2024-09-16 21:50:28,471 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:51:22,833 INFO [train.py:1198] (0/2) Epoch 5, batch 3300, loss[loss=0.3157, ctc_loss=0.2445, cr_loss=0.4474, attn_decoder_loss=0.3136, over 28228.00 frames. ], tot_loss[loss=0.2935, ctc_loss=0.2204, cr_loss=0.4341, attn_decoder_loss=0.292, over 5796769.19 frames. ], batch size: 111, lr: 2.10e-02, grad_scale: 8.0 +2024-09-16 21:51:30,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=85600.0, ans=0.0 +2024-09-16 21:51:40,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=85640.0, ans=0.125 +2024-09-16 21:51:51,607 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.589e+01 1.170e+02 1.337e+02 1.496e+02 4.068e+02, threshold=2.673e+02, percent-clipped=4.0 +2024-09-16 21:52:31,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=85760.0, ans=0.125 +2024-09-16 21:52:42,504 INFO [train.py:1198] (0/2) Epoch 5, batch 3350, loss[loss=0.3052, ctc_loss=0.2301, cr_loss=0.4511, attn_decoder_loss=0.3035, over 28907.00 frames. ], tot_loss[loss=0.2943, ctc_loss=0.2215, cr_loss=0.4343, attn_decoder_loss=0.2927, over 5773077.88 frames. ], batch size: 104, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:52:49,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.78 vs. limit=15.0 +2024-09-16 21:52:59,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=85840.0, ans=0.1 +2024-09-16 21:53:01,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=85840.0, ans=0.1 +2024-09-16 21:53:07,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=85840.0, ans=0.125 +2024-09-16 21:53:08,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-16 21:53:18,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.58 vs. limit=15.0 +2024-09-16 21:53:31,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=85920.0, ans=0.1 +2024-09-16 21:53:54,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.40 vs. limit=15.0 +2024-09-16 21:53:58,050 INFO [train.py:1198] (0/2) Epoch 5, batch 3400, loss[loss=0.2583, ctc_loss=0.1876, cr_loss=0.3957, attn_decoder_loss=0.2574, over 29342.00 frames. ], tot_loss[loss=0.2942, ctc_loss=0.2215, cr_loss=0.4345, attn_decoder_loss=0.2927, over 5767258.06 frames. ], batch size: 67, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:54:07,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=86000.0, ans=0.025 +2024-09-16 21:54:14,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=86040.0, ans=0.025 +2024-09-16 21:54:16,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=86040.0, ans=0.125 +2024-09-16 21:54:17,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=86040.0, ans=0.0 +2024-09-16 21:54:18,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.07 vs. limit=15.0 +2024-09-16 21:54:28,087 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.899e+01 1.163e+02 1.316e+02 1.513e+02 4.040e+02, threshold=2.631e+02, percent-clipped=2.0 +2024-09-16 21:54:29,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.77 vs. limit=10.0 +2024-09-16 21:54:34,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=86080.0, ans=0.125 +2024-09-16 21:54:43,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=86120.0, ans=0.0 +2024-09-16 21:54:52,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=86120.0, ans=0.0 +2024-09-16 21:55:04,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=86160.0, ans=0.2 +2024-09-16 21:55:07,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=86160.0, ans=0.125 +2024-09-16 21:55:13,323 INFO [train.py:1198] (0/2) Epoch 5, batch 3450, loss[loss=0.3109, ctc_loss=0.2328, cr_loss=0.4524, attn_decoder_loss=0.3095, over 28217.00 frames. ], tot_loss[loss=0.294, ctc_loss=0.221, cr_loss=0.4344, attn_decoder_loss=0.2925, over 5774742.92 frames. ], batch size: 111, lr: 2.09e-02, grad_scale: 4.0 +2024-09-16 21:55:14,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=86200.0, ans=0.125 +2024-09-16 21:56:17,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=86360.0, ans=0.1 +2024-09-16 21:56:19,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.45 vs. limit=10.0 +2024-09-16 21:56:33,266 INFO [train.py:1198] (0/2) Epoch 5, batch 3500, loss[loss=0.2873, ctc_loss=0.2209, cr_loss=0.4289, attn_decoder_loss=0.2852, over 29327.00 frames. ], tot_loss[loss=0.2934, ctc_loss=0.2207, cr_loss=0.4341, attn_decoder_loss=0.2918, over 5775966.29 frames. ], batch size: 71, lr: 2.09e-02, grad_scale: 8.0 +2024-09-16 21:56:44,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=86400.0, ans=0.1 +2024-09-16 21:56:51,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=86440.0, ans=0.0 +2024-09-16 21:56:53,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=86440.0, ans=0.0 +2024-09-16 21:57:01,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=86480.0, ans=0.0 +2024-09-16 21:57:02,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.95 vs. limit=15.0 +2024-09-16 21:57:04,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.011e+02 1.247e+02 1.357e+02 1.561e+02 2.944e+02, threshold=2.714e+02, percent-clipped=1.0 +2024-09-16 21:57:14,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.46 vs. limit=22.5 +2024-09-16 21:57:19,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=86520.0, ans=0.125 +2024-09-16 21:57:28,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=86520.0, ans=0.125 +2024-09-16 21:57:46,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=86600.0, ans=0.0 +2024-09-16 21:57:48,009 INFO [train.py:1198] (0/2) Epoch 5, batch 3550, loss[loss=0.3126, ctc_loss=0.2443, cr_loss=0.4362, attn_decoder_loss=0.3105, over 29695.00 frames. ], tot_loss[loss=0.2934, ctc_loss=0.2205, cr_loss=0.4343, attn_decoder_loss=0.2918, over 5781857.09 frames. ], batch size: 89, lr: 2.09e-02, grad_scale: 4.0 +2024-09-16 21:58:10,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=86640.0, ans=0.0 +2024-09-16 21:58:15,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=86640.0, ans=0.025 +2024-09-16 21:58:21,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=86680.0, ans=0.125 +2024-09-16 21:58:57,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=86760.0, ans=0.1 +2024-09-16 21:59:00,297 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.76 vs. limit=15.0 +2024-09-16 21:59:02,142 INFO [train.py:1198] (0/2) Epoch 5, batch 3600, loss[loss=0.2711, ctc_loss=0.1974, cr_loss=0.4118, attn_decoder_loss=0.2701, over 29489.00 frames. ], tot_loss[loss=0.2934, ctc_loss=0.2201, cr_loss=0.4339, attn_decoder_loss=0.2919, over 5791421.83 frames. ], batch size: 77, lr: 2.09e-02, grad_scale: 8.0 +2024-09-16 21:59:11,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=86800.0, ans=0.125 +2024-09-16 21:59:21,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=86840.0, ans=0.05 +2024-09-16 21:59:23,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=86840.0, ans=0.0 +2024-09-16 21:59:24,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=86840.0, ans=0.125 +2024-09-16 21:59:24,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=86840.0, ans=0.09899494936611666 +2024-09-16 21:59:34,636 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.246e+01 1.105e+02 1.213e+02 1.386e+02 4.333e+02, threshold=2.426e+02, percent-clipped=4.0 +2024-09-16 21:59:36,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=86880.0, ans=0.125 +2024-09-16 22:00:16,129 INFO [train.py:1198] (0/2) Epoch 5, batch 3650, loss[loss=0.317, ctc_loss=0.2411, cr_loss=0.4715, attn_decoder_loss=0.3149, over 29514.00 frames. ], tot_loss[loss=0.2928, ctc_loss=0.2196, cr_loss=0.4331, attn_decoder_loss=0.2913, over 5793465.93 frames. ], batch size: 90, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:00:17,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=87000.0, ans=0.5 +2024-09-16 22:00:19,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=87000.0, ans=0.0 +2024-09-16 22:00:37,656 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.60 vs. limit=10.0 +2024-09-16 22:00:38,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=87040.0, ans=0.125 +2024-09-16 22:00:53,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=87080.0, ans=0.125 +2024-09-16 22:00:58,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.23 vs. limit=15.0 +2024-09-16 22:01:19,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=87160.0, ans=0.0 +2024-09-16 22:01:31,068 INFO [train.py:1198] (0/2) Epoch 5, batch 3700, loss[loss=0.3121, ctc_loss=0.2418, cr_loss=0.4866, attn_decoder_loss=0.3091, over 29699.00 frames. ], tot_loss[loss=0.2929, ctc_loss=0.2191, cr_loss=0.4328, attn_decoder_loss=0.2915, over 5804433.85 frames. ], batch size: 84, lr: 2.08e-02, grad_scale: 8.0 +2024-09-16 22:01:40,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=87200.0, ans=0.07 +2024-09-16 22:01:43,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=87200.0, ans=0.2 +2024-09-16 22:01:43,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.79 vs. limit=22.5 +2024-09-16 22:01:44,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=87240.0, ans=0.07 +2024-09-16 22:01:59,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=87280.0, ans=0.05 +2024-09-16 22:02:01,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.24 vs. limit=15.0 +2024-09-16 22:02:05,212 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.891e+01 1.136e+02 1.234e+02 1.353e+02 4.194e+02, threshold=2.467e+02, percent-clipped=4.0 +2024-09-16 22:02:05,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=87280.0, ans=0.04949747468305833 +2024-09-16 22:02:14,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=87320.0, ans=0.125 +2024-09-16 22:02:24,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=87320.0, ans=0.125 +2024-09-16 22:02:26,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=87320.0, ans=0.125 +2024-09-16 22:02:47,096 INFO [train.py:1198] (0/2) Epoch 5, batch 3750, loss[loss=0.2524, ctc_loss=0.1835, cr_loss=0.3876, attn_decoder_loss=0.2515, over 29348.00 frames. ], tot_loss[loss=0.2923, ctc_loss=0.2186, cr_loss=0.4328, attn_decoder_loss=0.2909, over 5807766.19 frames. ], batch size: 67, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:02:53,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=87400.0, ans=0.125 +2024-09-16 22:02:56,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=87400.0, ans=0.125 +2024-09-16 22:02:59,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=87400.0, ans=0.0 +2024-09-16 22:03:06,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.35 vs. limit=15.0 +2024-09-16 22:03:06,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=9.59 vs. limit=15.0 +2024-09-16 22:03:49,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=87560.0, ans=0.0 +2024-09-16 22:03:57,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=87560.0, ans=0.025 +2024-09-16 22:04:02,947 INFO [train.py:1198] (0/2) Epoch 5, batch 3800, loss[loss=0.3029, ctc_loss=0.2253, cr_loss=0.4539, attn_decoder_loss=0.3015, over 29630.00 frames. ], tot_loss[loss=0.2923, ctc_loss=0.219, cr_loss=0.4329, attn_decoder_loss=0.2908, over 5798270.30 frames. ], batch size: 86, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:04:07,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87600.0, ans=0.1 +2024-09-16 22:04:38,689 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.009e+02 1.217e+02 1.354e+02 1.572e+02 4.220e+02, threshold=2.708e+02, percent-clipped=3.0 +2024-09-16 22:05:02,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=87760.0, ans=0.125 +2024-09-16 22:05:06,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87760.0, ans=0.1 +2024-09-16 22:05:17,042 INFO [train.py:1198] (0/2) Epoch 5, batch 3850, loss[loss=0.3217, ctc_loss=0.2476, cr_loss=0.4838, attn_decoder_loss=0.3192, over 29273.00 frames. ], tot_loss[loss=0.292, ctc_loss=0.2187, cr_loss=0.4329, attn_decoder_loss=0.2906, over 5813039.80 frames. ], batch size: 100, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:05:20,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=87800.0, ans=0.125 +2024-09-16 22:05:26,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=14.23 vs. limit=15.0 +2024-09-16 22:05:50,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=87880.0, ans=0.0 +2024-09-16 22:06:03,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=87920.0, ans=0.0 +2024-09-16 22:06:19,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87960.0, ans=0.1 +2024-09-16 22:06:21,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.02 vs. limit=22.5 +2024-09-16 22:06:28,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=87960.0, ans=0.125 +2024-09-16 22:06:31,877 INFO [train.py:1198] (0/2) Epoch 5, batch 3900, loss[loss=0.2965, ctc_loss=0.2251, cr_loss=0.4382, attn_decoder_loss=0.2947, over 29638.00 frames. ], tot_loss[loss=0.2928, ctc_loss=0.2192, cr_loss=0.4339, attn_decoder_loss=0.2913, over 5817462.64 frames. ], batch size: 86, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:06:34,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.77 vs. limit=15.0 +2024-09-16 22:06:57,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.73 vs. limit=22.5 +2024-09-16 22:06:58,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=88040.0, ans=0.125 +2024-09-16 22:07:08,815 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.846e+01 1.141e+02 1.281e+02 1.435e+02 2.843e+02, threshold=2.562e+02, percent-clipped=1.0 +2024-09-16 22:07:17,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=88120.0, ans=0.125 +2024-09-16 22:07:47,171 INFO [train.py:1198] (0/2) Epoch 5, batch 3950, loss[loss=0.3155, ctc_loss=0.2384, cr_loss=0.4818, attn_decoder_loss=0.3133, over 29475.00 frames. ], tot_loss[loss=0.2926, ctc_loss=0.2184, cr_loss=0.433, attn_decoder_loss=0.2913, over 5836656.11 frames. ], batch size: 97, lr: 2.07e-02, grad_scale: 4.0 +2024-09-16 22:08:12,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.82 vs. limit=15.0 +2024-09-16 22:08:21,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=88280.0, ans=0.0 +2024-09-16 22:08:24,327 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:08:31,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=88320.0, ans=0.0 +2024-09-16 22:08:43,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=88320.0, ans=0.2 +2024-09-16 22:08:51,296 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.35 vs. limit=22.5 +2024-09-16 22:08:52,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=88360.0, ans=0.125 +2024-09-16 22:09:01,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=88400.0, ans=0.2 +2024-09-16 22:09:02,282 INFO [train.py:1198] (0/2) Epoch 5, batch 4000, loss[loss=0.2603, ctc_loss=0.1848, cr_loss=0.3943, attn_decoder_loss=0.2599, over 29515.00 frames. ], tot_loss[loss=0.293, ctc_loss=0.2192, cr_loss=0.4338, attn_decoder_loss=0.2916, over 5812944.76 frames. ], batch size: 74, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:09:17,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.whiten.whitening_limit, batch_count=88440.0, ans=12.0 +2024-09-16 22:09:40,467 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.385e+01 1.172e+02 1.271e+02 1.397e+02 4.120e+02, threshold=2.542e+02, percent-clipped=3.0 +2024-09-16 22:09:51,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=88520.0, ans=0.1 +2024-09-16 22:10:09,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.69 vs. limit=15.0 +2024-09-16 22:10:16,078 INFO [train.py:1198] (0/2) Epoch 5, batch 4050, loss[loss=0.3295, ctc_loss=0.287, cr_loss=0.4395, attn_decoder_loss=0.3245, over 20929.00 frames. ], tot_loss[loss=0.2929, ctc_loss=0.2193, cr_loss=0.4333, attn_decoder_loss=0.2914, over 5797448.39 frames. ], batch size: 210, lr: 2.07e-02, grad_scale: 4.0 +2024-09-16 22:10:23,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=88600.0, ans=0.125 +2024-09-16 22:10:31,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=88640.0, ans=0.125 +2024-09-16 22:10:45,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=88680.0, ans=0.125 +2024-09-16 22:10:48,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=88680.0, ans=0.125 +2024-09-16 22:11:16,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=88760.0, ans=0.125 +2024-09-16 22:11:16,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=88760.0, ans=0.125 +2024-09-16 22:11:25,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=88760.0, ans=0.09899494936611666 +2024-09-16 22:11:30,111 INFO [train.py:1198] (0/2) Epoch 5, batch 4100, loss[loss=0.3119, ctc_loss=0.2347, cr_loss=0.4778, attn_decoder_loss=0.3098, over 29492.00 frames. ], tot_loss[loss=0.2931, ctc_loss=0.2199, cr_loss=0.4341, attn_decoder_loss=0.2915, over 5792626.77 frames. ], batch size: 90, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:11:33,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=88800.0, ans=0.125 +2024-09-16 22:11:37,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=88800.0, ans=0.1 +2024-09-16 22:11:39,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=88800.0, ans=0.0 +2024-09-16 22:11:48,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.84 vs. limit=15.0 +2024-09-16 22:11:52,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=88840.0, ans=0.2 +2024-09-16 22:11:57,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=88840.0, ans=0.0 +2024-09-16 22:12:01,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=88880.0, ans=0.025 +2024-09-16 22:12:11,096 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.506e+01 1.179e+02 1.301e+02 1.533e+02 3.400e+02, threshold=2.603e+02, percent-clipped=2.0 +2024-09-16 22:12:15,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=88920.0, ans=0.025 +2024-09-16 22:12:32,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=88960.0, ans=0.1 +2024-09-16 22:12:42,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=88960.0, ans=0.0 +2024-09-16 22:12:45,306 INFO [train.py:1198] (0/2) Epoch 5, batch 4150, loss[loss=0.2938, ctc_loss=0.2149, cr_loss=0.4184, attn_decoder_loss=0.2933, over 29508.00 frames. ], tot_loss[loss=0.2921, ctc_loss=0.2188, cr_loss=0.4327, attn_decoder_loss=0.2906, over 5798571.41 frames. ], batch size: 77, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:12:59,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=89040.0, ans=0.0 +2024-09-16 22:13:08,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=89040.0, ans=0.0 +2024-09-16 22:13:09,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.21 vs. limit=15.0 +2024-09-16 22:13:15,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=89080.0, ans=0.125 +2024-09-16 22:13:17,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=89080.0, ans=0.0 +2024-09-16 22:13:26,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=89080.0, ans=0.035 +2024-09-16 22:13:59,806 INFO [train.py:1198] (0/2) Epoch 5, batch 4200, loss[loss=0.3275, ctc_loss=0.2623, cr_loss=0.4982, attn_decoder_loss=0.3237, over 29518.00 frames. ], tot_loss[loss=0.2925, ctc_loss=0.2189, cr_loss=0.4328, attn_decoder_loss=0.291, over 5800459.03 frames. ], batch size: 90, lr: 2.06e-02, grad_scale: 8.0 +2024-09-16 22:14:00,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=89200.0, ans=0.05 +2024-09-16 22:14:01,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=89200.0, ans=0.125 +2024-09-16 22:14:02,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=89200.0, ans=0.125 +2024-09-16 22:14:13,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=89240.0, ans=0.2 +2024-09-16 22:14:14,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=89240.0, ans=0.05 +2024-09-16 22:14:41,006 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.772e+01 1.118e+02 1.246e+02 1.404e+02 2.463e+02, threshold=2.492e+02, percent-clipped=0.0 +2024-09-16 22:15:08,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=89360.0, ans=0.1 +2024-09-16 22:15:12,895 INFO [train.py:1198] (0/2) Epoch 5, batch 4250, loss[loss=0.2561, ctc_loss=0.1859, cr_loss=0.3748, attn_decoder_loss=0.2555, over 29523.00 frames. ], tot_loss[loss=0.2923, ctc_loss=0.2182, cr_loss=0.4327, attn_decoder_loss=0.2909, over 5806937.85 frames. ], batch size: 74, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:15:19,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=89400.0, ans=0.2 +2024-09-16 22:15:21,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.61 vs. limit=15.0 +2024-09-16 22:15:45,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=89480.0, ans=0.125 +2024-09-16 22:15:45,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=89480.0, ans=0.2 +2024-09-16 22:15:53,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.00 vs. limit=15.0 +2024-09-16 22:15:56,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=89520.0, ans=0.125 +2024-09-16 22:16:16,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=89560.0, ans=0.125 +2024-09-16 22:16:27,391 INFO [train.py:1198] (0/2) Epoch 5, batch 4300, loss[loss=0.29, ctc_loss=0.2098, cr_loss=0.3975, attn_decoder_loss=0.2901, over 29523.00 frames. ], tot_loss[loss=0.2927, ctc_loss=0.2186, cr_loss=0.4331, attn_decoder_loss=0.2913, over 5796351.44 frames. ], batch size: 87, lr: 2.06e-02, grad_scale: 8.0 +2024-09-16 22:16:27,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=89600.0, ans=0.125 +2024-09-16 22:16:32,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=89600.0, ans=0.2 +2024-09-16 22:16:38,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.05 vs. limit=10.0 +2024-09-16 22:16:52,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=89640.0, ans=0.125 +2024-09-16 22:17:01,752 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.56 vs. limit=15.0 +2024-09-16 22:17:08,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=89680.0, ans=0.125 +2024-09-16 22:17:11,160 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.609e+01 1.163e+02 1.276e+02 1.524e+02 3.260e+02, threshold=2.552e+02, percent-clipped=3.0 +2024-09-16 22:17:14,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=89720.0, ans=0.1 +2024-09-16 22:17:30,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=89760.0, ans=0.125 +2024-09-16 22:17:42,370 INFO [train.py:1198] (0/2) Epoch 5, batch 4350, loss[loss=0.3087, ctc_loss=0.236, cr_loss=0.4377, attn_decoder_loss=0.3071, over 29467.00 frames. ], tot_loss[loss=0.2965, ctc_loss=0.222, cr_loss=0.4384, attn_decoder_loss=0.295, over 5798072.32 frames. ], batch size: 97, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:17:46,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.05 vs. limit=22.5 +2024-09-16 22:18:02,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=89840.0, ans=0.125 +2024-09-16 22:18:15,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=89880.0, ans=0.125 +2024-09-16 22:18:37,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=89920.0, ans=0.025 +2024-09-16 22:18:37,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=89920.0, ans=0.2 +2024-09-16 22:18:56,342 INFO [train.py:1198] (0/2) Epoch 5, batch 4400, loss[loss=0.3149, ctc_loss=0.2453, cr_loss=0.4474, attn_decoder_loss=0.3127, over 27220.00 frames. ], tot_loss[loss=0.2988, ctc_loss=0.2242, cr_loss=0.4402, attn_decoder_loss=0.2973, over 5768003.78 frames. ], batch size: 124, lr: 2.05e-02, grad_scale: 8.0 +2024-09-16 22:18:56,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=90000.0, ans=10.0 +2024-09-16 22:19:29,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=90080.0, ans=0.0 +2024-09-16 22:19:40,703 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.790e+01 1.097e+02 1.213e+02 1.417e+02 2.444e+02, threshold=2.426e+02, percent-clipped=0.0 +2024-09-16 22:19:56,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=10.85 vs. limit=10.0 +2024-09-16 22:20:10,762 INFO [train.py:1198] (0/2) Epoch 5, batch 4450, loss[loss=0.3262, ctc_loss=0.2807, cr_loss=0.4255, attn_decoder_loss=0.3218, over 19398.00 frames. ], tot_loss[loss=0.3027, ctc_loss=0.2308, cr_loss=0.4434, attn_decoder_loss=0.3008, over 5576475.78 frames. ], batch size: 209, lr: 2.05e-02, grad_scale: 4.0 +2024-09-16 22:20:34,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=90240.0, ans=0.125 +2024-09-16 22:20:35,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=90240.0, ans=0.0 +2024-09-16 22:20:43,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.35 vs. limit=22.5 +2024-09-16 22:21:10,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=90360.0, ans=0.125 +2024-09-16 22:21:26,520 INFO [train.py:1198] (0/2) Epoch 5, batch 4500, loss[loss=0.3242, ctc_loss=0.2748, cr_loss=0.4516, attn_decoder_loss=0.3196, over 19957.00 frames. ], tot_loss[loss=0.3071, ctc_loss=0.2398, cr_loss=0.4451, attn_decoder_loss=0.3047, over 5236382.48 frames. ], batch size: 210, lr: 2.05e-02, grad_scale: 8.0 +2024-09-16 22:21:28,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=90400.0, ans=0.125 +2024-09-16 22:21:31,837 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.82 vs. limit=15.0 +2024-09-16 22:21:33,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.65 vs. limit=22.5 +2024-09-16 22:22:03,467 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-5.pt +2024-09-16 22:22:53,646 INFO [train.py:1198] (0/2) Epoch 6, batch 0, loss[loss=0.3298, ctc_loss=0.2105, cr_loss=0.4328, attn_decoder_loss=0.3334, over 29623.00 frames. ], tot_loss[loss=0.3298, ctc_loss=0.2105, cr_loss=0.4328, attn_decoder_loss=0.3334, over 29623.00 frames. ], batch size: 73, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:22:53,647 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 22:22:59,667 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.5120, 4.9777, 4.6828, 4.9199], device='cuda:0') +2024-09-16 22:23:11,939 INFO [train.py:1230] (0/2) Epoch 6, validation: loss=0.2379, ctc_loss=0.06988, cr_loss=4.72e-15, attn_decoder_loss=0.2566, over 944034.00 frames. +2024-09-16 22:23:11,939 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 22:23:13,423 WARNING [optim.py:503] (0/2) Scaling gradients by 0.0589279979467392, model_norm_threshold=242.58145141601562 +2024-09-16 22:23:13,643 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.self_attn.linear_k.weight with proportion 0.26, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=4.469e+06, grad_sumsq=5.019e+05, orig_rms_sq=8.904e+00 +2024-09-16 22:23:21,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=90500.0, ans=0.125 +2024-09-16 22:23:22,772 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.192e+02 1.351e+02 1.731e+02 4.117e+03, threshold=2.703e+02, percent-clipped=9.0 +2024-09-16 22:23:23,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=90500.0, ans=0.125 +2024-09-16 22:23:29,674 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-16 22:23:35,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=90540.0, ans=0.1 +2024-09-16 22:23:35,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=90540.0, ans=0.125 +2024-09-16 22:23:40,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=90540.0, ans=0.0 +2024-09-16 22:23:52,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=90580.0, ans=0.125 +2024-09-16 22:23:59,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=90620.0, ans=0.2 +2024-09-16 22:24:03,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.83 vs. limit=15.0 +2024-09-16 22:24:17,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=90660.0, ans=0.1 +2024-09-16 22:24:17,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=90660.0, ans=0.125 +2024-09-16 22:24:28,161 INFO [train.py:1198] (0/2) Epoch 6, batch 50, loss[loss=0.2572, ctc_loss=0.1852, cr_loss=0.3854, attn_decoder_loss=0.2567, over 29449.00 frames. ], tot_loss[loss=0.2984, ctc_loss=0.2244, cr_loss=0.4364, attn_decoder_loss=0.297, over 1267546.53 frames. ], batch size: 70, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:24:29,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=90700.0, ans=0.125 +2024-09-16 22:24:30,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=90700.0, ans=0.1 +2024-09-16 22:24:36,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=90700.0, ans=0.2 +2024-09-16 22:24:40,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=90700.0, ans=0.025 +2024-09-16 22:24:43,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=90740.0, ans=0.125 +2024-09-16 22:24:50,570 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=14.27 vs. limit=15.0 +2024-09-16 22:24:51,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=90740.0, ans=0.2 +2024-09-16 22:25:23,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=90820.0, ans=0.125 +2024-09-16 22:25:45,648 INFO [train.py:1198] (0/2) Epoch 6, batch 100, loss[loss=0.2812, ctc_loss=0.2166, cr_loss=0.4336, attn_decoder_loss=0.2788, over 29517.00 frames. ], tot_loss[loss=0.2979, ctc_loss=0.223, cr_loss=0.4385, attn_decoder_loss=0.2965, over 2251293.29 frames. ], batch size: 76, lr: 1.91e-02, grad_scale: 8.0 +2024-09-16 22:25:57,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.583e+01 1.187e+02 1.367e+02 1.634e+02 6.216e+02, threshold=2.735e+02, percent-clipped=2.0 +2024-09-16 22:26:00,704 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:26:24,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=90980.0, ans=0.0 +2024-09-16 22:26:26,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=90980.0, ans=0.0 +2024-09-16 22:26:51,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=91060.0, ans=0.125 +2024-09-16 22:27:01,989 INFO [train.py:1198] (0/2) Epoch 6, batch 150, loss[loss=0.2533, ctc_loss=0.1777, cr_loss=0.3954, attn_decoder_loss=0.253, over 29461.00 frames. ], tot_loss[loss=0.2946, ctc_loss=0.2201, cr_loss=0.436, attn_decoder_loss=0.2932, over 3046386.01 frames. ], batch size: 70, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:27:20,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=91140.0, ans=0.1 +2024-09-16 22:27:22,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=91140.0, ans=0.1 +2024-09-16 22:27:25,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=91140.0, ans=0.2 +2024-09-16 22:27:28,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=91140.0, ans=0.1 +2024-09-16 22:27:38,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=91180.0, ans=0.07 +2024-09-16 22:27:50,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=91220.0, ans=0.5 +2024-09-16 22:27:55,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=91220.0, ans=0.0 +2024-09-16 22:27:55,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=91220.0, ans=0.1 +2024-09-16 22:28:00,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.72 vs. limit=15.0 +2024-09-16 22:28:16,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=91300.0, ans=0.0 +2024-09-16 22:28:17,517 INFO [train.py:1198] (0/2) Epoch 6, batch 200, loss[loss=0.3061, ctc_loss=0.2364, cr_loss=0.4242, attn_decoder_loss=0.3044, over 27098.00 frames. ], tot_loss[loss=0.2925, ctc_loss=0.2176, cr_loss=0.4332, attn_decoder_loss=0.2913, over 3659059.59 frames. ], batch size: 124, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:28:21,286 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.52 vs. limit=6.0 +2024-09-16 22:28:29,580 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.192e+01 1.064e+02 1.171e+02 1.354e+02 3.116e+02, threshold=2.342e+02, percent-clipped=1.0 +2024-09-16 22:28:57,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=91380.0, ans=0.125 +2024-09-16 22:29:09,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=91420.0, ans=0.07 +2024-09-16 22:29:27,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=91460.0, ans=0.125 +2024-09-16 22:29:35,028 INFO [train.py:1198] (0/2) Epoch 6, batch 250, loss[loss=0.3082, ctc_loss=0.2392, cr_loss=0.4386, attn_decoder_loss=0.3061, over 29244.00 frames. ], tot_loss[loss=0.2918, ctc_loss=0.2162, cr_loss=0.4324, attn_decoder_loss=0.2906, over 4141090.76 frames. ], batch size: 100, lr: 1.90e-02, grad_scale: 4.0 +2024-09-16 22:29:41,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=91500.0, ans=0.1 +2024-09-16 22:30:39,355 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:30:49,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=91660.0, ans=0.1 +2024-09-16 22:30:52,394 INFO [train.py:1198] (0/2) Epoch 6, batch 300, loss[loss=0.3067, ctc_loss=0.2385, cr_loss=0.4631, attn_decoder_loss=0.304, over 29540.00 frames. ], tot_loss[loss=0.2907, ctc_loss=0.215, cr_loss=0.4321, attn_decoder_loss=0.2895, over 4509657.22 frames. ], batch size: 92, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:31:07,398 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.038e+01 1.116e+02 1.244e+02 1.492e+02 2.099e+02, threshold=2.488e+02, percent-clipped=0.0 +2024-09-16 22:31:09,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=91740.0, ans=0.125 +2024-09-16 22:31:13,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=91740.0, ans=0.125 +2024-09-16 22:31:27,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.32 vs. limit=10.0 +2024-09-16 22:31:33,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=91780.0, ans=0.0 +2024-09-16 22:31:46,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=91820.0, ans=0.0 +2024-09-16 22:32:07,878 INFO [train.py:1198] (0/2) Epoch 6, batch 350, loss[loss=0.2523, ctc_loss=0.1761, cr_loss=0.3735, attn_decoder_loss=0.2525, over 29309.00 frames. ], tot_loss[loss=0.291, ctc_loss=0.215, cr_loss=0.4327, attn_decoder_loss=0.2898, over 4794283.29 frames. ], batch size: 71, lr: 1.90e-02, grad_scale: 4.0 +2024-09-16 22:32:32,818 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.23 vs. limit=15.0 +2024-09-16 22:32:35,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=91940.0, ans=0.125 +2024-09-16 22:32:37,300 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.32 vs. limit=15.0 +2024-09-16 22:32:52,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=91980.0, ans=0.125 +2024-09-16 22:33:17,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=92060.0, ans=0.0 +2024-09-16 22:33:19,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=92060.0, ans=0.0 +2024-09-16 22:33:23,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=92060.0, ans=0.1 +2024-09-16 22:33:26,405 INFO [train.py:1198] (0/2) Epoch 6, batch 400, loss[loss=0.2894, ctc_loss=0.2176, cr_loss=0.4517, attn_decoder_loss=0.2873, over 29707.00 frames. ], tot_loss[loss=0.2901, ctc_loss=0.2138, cr_loss=0.4316, attn_decoder_loss=0.289, over 5024448.54 frames. ], batch size: 82, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:33:38,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=92100.0, ans=0.0 +2024-09-16 22:33:43,123 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.703e+01 1.115e+02 1.264e+02 1.415e+02 3.594e+02, threshold=2.527e+02, percent-clipped=2.0 +2024-09-16 22:33:46,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=92140.0, ans=10.0 +2024-09-16 22:34:01,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=92180.0, ans=0.2 +2024-09-16 22:34:30,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=92260.0, ans=0.125 +2024-09-16 22:34:45,093 INFO [train.py:1198] (0/2) Epoch 6, batch 450, loss[loss=0.301, ctc_loss=0.2198, cr_loss=0.4621, attn_decoder_loss=0.2998, over 29693.00 frames. ], tot_loss[loss=0.29, ctc_loss=0.214, cr_loss=0.432, attn_decoder_loss=0.2889, over 5184053.35 frames. ], batch size: 83, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:34:49,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=92300.0, ans=0.125 +2024-09-16 22:35:11,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.70 vs. limit=15.0 +2024-09-16 22:35:12,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=92340.0, ans=0.0 +2024-09-16 22:35:12,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=92340.0, ans=0.05 +2024-09-16 22:35:40,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=92420.0, ans=0.125 +2024-09-16 22:35:40,534 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:35:41,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=92420.0, ans=0.1 +2024-09-16 22:35:50,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=92460.0, ans=0.125 +2024-09-16 22:35:56,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.40 vs. limit=22.5 +2024-09-16 22:36:01,408 INFO [train.py:1198] (0/2) Epoch 6, batch 500, loss[loss=0.3241, ctc_loss=0.2505, cr_loss=0.4861, attn_decoder_loss=0.3215, over 29445.00 frames. ], tot_loss[loss=0.2891, ctc_loss=0.213, cr_loss=0.4307, attn_decoder_loss=0.288, over 5327380.28 frames. ], batch size: 94, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:36:18,344 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.047e+01 1.094e+02 1.193e+02 1.318e+02 2.724e+02, threshold=2.387e+02, percent-clipped=2.0 +2024-09-16 22:36:40,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=92580.0, ans=0.125 +2024-09-16 22:36:50,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.63 vs. limit=15.0 +2024-09-16 22:36:55,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.70 vs. limit=15.0 +2024-09-16 22:36:59,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.90 vs. limit=10.0 +2024-09-16 22:37:02,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=92620.0, ans=0.125 +2024-09-16 22:37:07,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.47 vs. limit=15.0 +2024-09-16 22:37:20,293 INFO [train.py:1198] (0/2) Epoch 6, batch 550, loss[loss=0.2957, ctc_loss=0.2234, cr_loss=0.4255, attn_decoder_loss=0.2943, over 28881.00 frames. ], tot_loss[loss=0.2894, ctc_loss=0.2133, cr_loss=0.4314, attn_decoder_loss=0.2883, over 5421017.69 frames. ], batch size: 104, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:38:03,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=92780.0, ans=0.125 +2024-09-16 22:38:39,400 INFO [train.py:1198] (0/2) Epoch 6, batch 600, loss[loss=0.2999, ctc_loss=0.2273, cr_loss=0.4301, attn_decoder_loss=0.2984, over 29249.00 frames. ], tot_loss[loss=0.2896, ctc_loss=0.2133, cr_loss=0.4314, attn_decoder_loss=0.2885, over 5508726.32 frames. ], batch size: 100, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:38:44,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=92900.0, ans=0.125 +2024-09-16 22:38:48,031 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.28 vs. limit=15.0 +2024-09-16 22:38:49,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.52 vs. limit=6.0 +2024-09-16 22:38:53,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=92940.0, ans=0.1 +2024-09-16 22:38:59,011 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.669e+01 1.124e+02 1.276e+02 1.446e+02 7.170e+02, threshold=2.552e+02, percent-clipped=2.0 +2024-09-16 22:39:06,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=92940.0, ans=0.025 +2024-09-16 22:39:13,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.64 vs. limit=15.0 +2024-09-16 22:39:22,162 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:39:25,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=93020.0, ans=0.07 +2024-09-16 22:39:46,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=93060.0, ans=0.125 +2024-09-16 22:39:48,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=93060.0, ans=0.1 +2024-09-16 22:39:48,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=93060.0, ans=0.1 +2024-09-16 22:39:55,421 INFO [train.py:1198] (0/2) Epoch 6, batch 650, loss[loss=0.2855, ctc_loss=0.2123, cr_loss=0.4081, attn_decoder_loss=0.2845, over 29770.00 frames. ], tot_loss[loss=0.2884, ctc_loss=0.2119, cr_loss=0.4301, attn_decoder_loss=0.2873, over 5586050.77 frames. ], batch size: 81, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:39:57,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=93100.0, ans=0.0 +2024-09-16 22:40:06,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=93100.0, ans=0.125 +2024-09-16 22:40:14,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=93140.0, ans=0.125 +2024-09-16 22:40:46,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.34 vs. limit=22.5 +2024-09-16 22:40:48,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=93220.0, ans=0.125 +2024-09-16 22:40:54,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=93220.0, ans=0.0 +2024-09-16 22:40:55,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=93220.0, ans=0.125 +2024-09-16 22:41:05,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=93260.0, ans=0.0 +2024-09-16 22:41:13,970 INFO [train.py:1198] (0/2) Epoch 6, batch 700, loss[loss=0.2842, ctc_loss=0.2146, cr_loss=0.4334, attn_decoder_loss=0.2823, over 29533.00 frames. ], tot_loss[loss=0.2897, ctc_loss=0.2131, cr_loss=0.4316, attn_decoder_loss=0.2886, over 5636807.70 frames. ], batch size: 76, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:41:25,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.35 vs. limit=12.0 +2024-09-16 22:41:27,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=93340.0, ans=0.0 +2024-09-16 22:41:35,157 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.774e+01 1.081e+02 1.183e+02 1.296e+02 3.770e+02, threshold=2.365e+02, percent-clipped=2.0 +2024-09-16 22:41:46,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=93380.0, ans=0.0 +2024-09-16 22:41:53,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=93380.0, ans=0.125 +2024-09-16 22:42:10,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=93420.0, ans=0.0 +2024-09-16 22:42:22,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=93460.0, ans=0.0 +2024-09-16 22:42:32,786 INFO [train.py:1198] (0/2) Epoch 6, batch 750, loss[loss=0.2889, ctc_loss=0.2072, cr_loss=0.433, attn_decoder_loss=0.2883, over 29709.00 frames. ], tot_loss[loss=0.2891, ctc_loss=0.2127, cr_loss=0.4319, attn_decoder_loss=0.2879, over 5676630.50 frames. ], batch size: 82, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:42:37,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=93500.0, ans=0.2 +2024-09-16 22:42:52,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=15.0 +2024-09-16 22:43:17,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.47 vs. limit=15.0 +2024-09-16 22:43:49,718 INFO [train.py:1198] (0/2) Epoch 6, batch 800, loss[loss=0.271, ctc_loss=0.2022, cr_loss=0.4283, attn_decoder_loss=0.2691, over 29584.00 frames. ], tot_loss[loss=0.289, ctc_loss=0.2124, cr_loss=0.432, attn_decoder_loss=0.2879, over 5707342.02 frames. ], batch size: 73, lr: 1.88e-02, grad_scale: 8.0 +2024-09-16 22:43:58,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.50 vs. limit=12.0 +2024-09-16 22:44:12,705 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.997e+01 1.068e+02 1.156e+02 1.307e+02 3.410e+02, threshold=2.312e+02, percent-clipped=1.0 +2024-09-16 22:44:21,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.89 vs. limit=15.0 +2024-09-16 22:44:30,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.83 vs. limit=15.0 +2024-09-16 22:44:35,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=93820.0, ans=0.0 +2024-09-16 22:44:52,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=93860.0, ans=0.0 +2024-09-16 22:45:04,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.26 vs. limit=15.0 +2024-09-16 22:45:08,096 INFO [train.py:1198] (0/2) Epoch 6, batch 850, loss[loss=0.3093, ctc_loss=0.2226, cr_loss=0.4236, attn_decoder_loss=0.3095, over 29681.00 frames. ], tot_loss[loss=0.2882, ctc_loss=0.2114, cr_loss=0.4305, attn_decoder_loss=0.2872, over 5736315.16 frames. ], batch size: 89, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:45:17,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=93900.0, ans=0.1 +2024-09-16 22:45:20,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.07 vs. limit=15.0 +2024-09-16 22:45:21,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=93940.0, ans=0.07 +2024-09-16 22:45:37,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=93980.0, ans=0.125 +2024-09-16 22:45:44,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=93980.0, ans=0.1 +2024-09-16 22:45:47,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=93980.0, ans=0.0 +2024-09-16 22:46:03,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=94020.0, ans=0.0 +2024-09-16 22:46:04,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=94020.0, ans=0.1 +2024-09-16 22:46:06,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=94020.0, ans=0.025 +2024-09-16 22:46:22,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=94060.0, ans=0.2 +2024-09-16 22:46:24,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=94060.0, ans=0.2 +2024-09-16 22:46:27,094 INFO [train.py:1198] (0/2) Epoch 6, batch 900, loss[loss=0.2642, ctc_loss=0.1927, cr_loss=0.3979, attn_decoder_loss=0.2633, over 29558.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2118, cr_loss=0.4311, attn_decoder_loss=0.2875, over 5740180.13 frames. ], batch size: 73, lr: 1.88e-02, grad_scale: 8.0 +2024-09-16 22:46:27,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=94100.0, ans=0.125 +2024-09-16 22:46:34,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=94100.0, ans=0.1 +2024-09-16 22:46:37,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=94100.0, ans=0.025 +2024-09-16 22:46:39,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=94100.0, ans=0.125 +2024-09-16 22:46:49,681 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.705e+01 1.096e+02 1.207e+02 1.371e+02 3.827e+02, threshold=2.414e+02, percent-clipped=1.0 +2024-09-16 22:46:59,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=94180.0, ans=0.125 +2024-09-16 22:47:10,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=94180.0, ans=15.0 +2024-09-16 22:47:17,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=94220.0, ans=0.125 +2024-09-16 22:47:29,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=94260.0, ans=0.2 +2024-09-16 22:47:29,794 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:47:35,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=94260.0, ans=0.0 +2024-09-16 22:47:40,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=94260.0, ans=0.07 +2024-09-16 22:47:42,987 INFO [train.py:1198] (0/2) Epoch 6, batch 950, loss[loss=0.2816, ctc_loss=0.2128, cr_loss=0.4482, attn_decoder_loss=0.2793, over 29480.00 frames. ], tot_loss[loss=0.2888, ctc_loss=0.2122, cr_loss=0.4307, attn_decoder_loss=0.2877, over 5742990.31 frames. ], batch size: 74, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:47:52,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.58 vs. limit=15.0 +2024-09-16 22:48:03,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.38 vs. limit=15.0 +2024-09-16 22:48:12,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=94380.0, ans=0.05 +2024-09-16 22:48:28,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=94420.0, ans=0.2 +2024-09-16 22:48:32,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=94420.0, ans=0.1 +2024-09-16 22:49:01,838 INFO [train.py:1198] (0/2) Epoch 6, batch 1000, loss[loss=0.2725, ctc_loss=0.1865, cr_loss=0.3981, attn_decoder_loss=0.2732, over 29494.00 frames. ], tot_loss[loss=0.2896, ctc_loss=0.213, cr_loss=0.4315, attn_decoder_loss=0.2885, over 5736723.68 frames. ], batch size: 77, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:49:09,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=94500.0, ans=0.125 +2024-09-16 22:49:12,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=94500.0, ans=0.09899494936611666 +2024-09-16 22:49:26,357 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.001e+01 1.144e+02 1.278e+02 1.441e+02 2.268e+02, threshold=2.556e+02, percent-clipped=0.0 +2024-09-16 22:49:44,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.47 vs. limit=12.0 +2024-09-16 22:50:08,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=94660.0, ans=0.2 +2024-09-16 22:50:14,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=94660.0, ans=0.1 +2024-09-16 22:50:14,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=94660.0, ans=0.125 +2024-09-16 22:50:17,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=94660.0, ans=0.0 +2024-09-16 22:50:19,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=94700.0, ans=0.0 +2024-09-16 22:50:19,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=94700.0, ans=0.125 +2024-09-16 22:50:20,455 INFO [train.py:1198] (0/2) Epoch 6, batch 1050, loss[loss=0.2963, ctc_loss=0.2165, cr_loss=0.4432, attn_decoder_loss=0.2954, over 29687.00 frames. ], tot_loss[loss=0.2884, ctc_loss=0.2117, cr_loss=0.4307, attn_decoder_loss=0.2873, over 5745743.46 frames. ], batch size: 85, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:50:22,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=94700.0, ans=0.0 +2024-09-16 22:50:57,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=94780.0, ans=0.2 +2024-09-16 22:51:27,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=94860.0, ans=0.125 +2024-09-16 22:51:36,711 INFO [train.py:1198] (0/2) Epoch 6, batch 1100, loss[loss=0.2786, ctc_loss=0.1972, cr_loss=0.4147, attn_decoder_loss=0.2784, over 29423.00 frames. ], tot_loss[loss=0.288, ctc_loss=0.2114, cr_loss=0.4298, attn_decoder_loss=0.287, over 5757555.52 frames. ], batch size: 78, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:51:55,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.66 vs. limit=6.0 +2024-09-16 22:52:02,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=94940.0, ans=0.0 +2024-09-16 22:52:03,769 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.796e+01 1.080e+02 1.185e+02 1.359e+02 3.091e+02, threshold=2.369e+02, percent-clipped=1.0 +2024-09-16 22:52:29,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=95020.0, ans=0.1 +2024-09-16 22:52:45,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=95060.0, ans=0.125 +2024-09-16 22:52:51,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=95100.0, ans=0.2 +2024-09-16 22:52:54,938 INFO [train.py:1198] (0/2) Epoch 6, batch 1150, loss[loss=0.2924, ctc_loss=0.2074, cr_loss=0.4412, attn_decoder_loss=0.292, over 29485.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.211, cr_loss=0.4289, attn_decoder_loss=0.2869, over 5757667.42 frames. ], batch size: 78, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:53:11,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.08 vs. limit=15.0 +2024-09-16 22:53:16,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=95140.0, ans=0.125 +2024-09-16 22:53:37,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.98 vs. limit=15.0 +2024-09-16 22:53:40,609 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.57 vs. limit=12.0 +2024-09-16 22:53:48,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=95220.0, ans=0.125 +2024-09-16 22:53:55,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.64 vs. limit=22.5 +2024-09-16 22:54:14,541 INFO [train.py:1198] (0/2) Epoch 6, batch 1200, loss[loss=0.3048, ctc_loss=0.2245, cr_loss=0.4642, attn_decoder_loss=0.3034, over 29665.00 frames. ], tot_loss[loss=0.2886, ctc_loss=0.2118, cr_loss=0.4295, attn_decoder_loss=0.2876, over 5749432.52 frames. ], batch size: 85, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:54:14,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=95300.0, ans=0.5 +2024-09-16 22:54:39,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=95340.0, ans=0.1 +2024-09-16 22:54:43,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.331e+01 1.110e+02 1.224e+02 1.490e+02 4.215e+02, threshold=2.447e+02, percent-clipped=3.0 +2024-09-16 22:55:05,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=95420.0, ans=0.125 +2024-09-16 22:55:30,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=95500.0, ans=0.125 +2024-09-16 22:55:31,311 INFO [train.py:1198] (0/2) Epoch 6, batch 1250, loss[loss=0.2996, ctc_loss=0.2228, cr_loss=0.4511, attn_decoder_loss=0.2981, over 29520.00 frames. ], tot_loss[loss=0.2893, ctc_loss=0.2122, cr_loss=0.4313, attn_decoder_loss=0.2883, over 5777315.75 frames. ], batch size: 92, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:55:38,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.14 vs. limit=15.0 +2024-09-16 22:56:22,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=95620.0, ans=0.125 +2024-09-16 22:56:26,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=95620.0, ans=0.0 +2024-09-16 22:56:28,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=95620.0, ans=0.0 +2024-09-16 22:56:33,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=95660.0, ans=0.025 +2024-09-16 22:56:47,926 INFO [train.py:1198] (0/2) Epoch 6, batch 1300, loss[loss=0.3005, ctc_loss=0.2184, cr_loss=0.4297, attn_decoder_loss=0.3001, over 28204.00 frames. ], tot_loss[loss=0.2884, ctc_loss=0.2115, cr_loss=0.4305, attn_decoder_loss=0.2874, over 5782046.21 frames. ], batch size: 111, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 22:56:56,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=95700.0, ans=0.95 +2024-09-16 22:57:09,351 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.32 vs. limit=15.0 +2024-09-16 22:57:13,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=95740.0, ans=0.0 +2024-09-16 22:57:20,351 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.424e+01 1.067e+02 1.141e+02 1.259e+02 1.965e+02, threshold=2.283e+02, percent-clipped=0.0 +2024-09-16 22:57:21,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.27 vs. limit=10.0 +2024-09-16 22:57:40,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.23 vs. limit=12.0 +2024-09-16 22:58:01,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=95860.0, ans=0.0 +2024-09-16 22:58:09,241 INFO [train.py:1198] (0/2) Epoch 6, batch 1350, loss[loss=0.274, ctc_loss=0.1891, cr_loss=0.4178, attn_decoder_loss=0.2741, over 29751.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.2104, cr_loss=0.4296, attn_decoder_loss=0.2868, over 5797928.28 frames. ], batch size: 81, lr: 1.86e-02, grad_scale: 4.0 +2024-09-16 22:58:41,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.97 vs. limit=15.0 +2024-09-16 22:58:46,133 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-24000.pt +2024-09-16 22:59:20,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=96060.0, ans=0.125 +2024-09-16 22:59:24,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=96060.0, ans=0.0 +2024-09-16 22:59:25,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=96060.0, ans=0.1 +2024-09-16 22:59:33,027 INFO [train.py:1198] (0/2) Epoch 6, batch 1400, loss[loss=0.2663, ctc_loss=0.1939, cr_loss=0.3981, attn_decoder_loss=0.2655, over 29572.00 frames. ], tot_loss[loss=0.2874, ctc_loss=0.2099, cr_loss=0.4288, attn_decoder_loss=0.2865, over 5809564.32 frames. ], batch size: 69, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 22:59:40,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=96100.0, ans=0.07 +2024-09-16 22:59:54,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=96140.0, ans=0.125 +2024-09-16 23:00:05,175 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.329e+01 1.115e+02 1.239e+02 1.357e+02 3.096e+02, threshold=2.478e+02, percent-clipped=1.0 +2024-09-16 23:00:08,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=96180.0, ans=0.025 +2024-09-16 23:00:21,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=96220.0, ans=0.025 +2024-09-16 23:00:40,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=96260.0, ans=0.125 +2024-09-16 23:00:43,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=96260.0, ans=0.2 +2024-09-16 23:00:48,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=96300.0, ans=0.2 +2024-09-16 23:00:49,733 INFO [train.py:1198] (0/2) Epoch 6, batch 1450, loss[loss=0.3074, ctc_loss=0.2303, cr_loss=0.4587, attn_decoder_loss=0.3058, over 29430.00 frames. ], tot_loss[loss=0.2879, ctc_loss=0.2103, cr_loss=0.4291, attn_decoder_loss=0.287, over 5804583.60 frames. ], batch size: 94, lr: 1.86e-02, grad_scale: 4.0 +2024-09-16 23:00:54,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=96300.0, ans=0.125 +2024-09-16 23:00:55,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.78 vs. limit=22.5 +2024-09-16 23:01:04,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-16 23:01:23,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=96380.0, ans=0.125 +2024-09-16 23:01:28,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=96380.0, ans=0.125 +2024-09-16 23:01:51,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=96460.0, ans=0.0 +2024-09-16 23:02:07,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=96460.0, ans=0.125 +2024-09-16 23:02:10,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.64 vs. limit=5.0 +2024-09-16 23:02:10,328 INFO [train.py:1198] (0/2) Epoch 6, batch 1500, loss[loss=0.2947, ctc_loss=0.2121, cr_loss=0.4467, attn_decoder_loss=0.294, over 29640.00 frames. ], tot_loss[loss=0.2883, ctc_loss=0.2105, cr_loss=0.4299, attn_decoder_loss=0.2874, over 5805757.32 frames. ], batch size: 86, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 23:02:44,675 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.157e+01 1.117e+02 1.199e+02 1.410e+02 2.285e+02, threshold=2.399e+02, percent-clipped=0.0 +2024-09-16 23:03:02,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=96620.0, ans=0.125 +2024-09-16 23:03:27,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=96700.0, ans=0.125 +2024-09-16 23:03:28,275 INFO [train.py:1198] (0/2) Epoch 6, batch 1550, loss[loss=0.3103, ctc_loss=0.2401, cr_loss=0.4475, attn_decoder_loss=0.3081, over 29509.00 frames. ], tot_loss[loss=0.2881, ctc_loss=0.2106, cr_loss=0.4292, attn_decoder_loss=0.2872, over 5781479.98 frames. ], batch size: 90, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:03:35,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.78 vs. limit=15.0 +2024-09-16 23:03:36,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=96700.0, ans=0.125 +2024-09-16 23:03:37,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=96700.0, ans=0.1 +2024-09-16 23:03:45,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.64 vs. limit=10.0 +2024-09-16 23:03:55,149 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.83 vs. limit=22.5 +2024-09-16 23:04:00,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=96780.0, ans=0.2 +2024-09-16 23:04:03,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.41 vs. limit=15.0 +2024-09-16 23:04:13,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=96820.0, ans=0.025 +2024-09-16 23:04:17,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=96820.0, ans=0.125 +2024-09-16 23:04:45,350 INFO [train.py:1198] (0/2) Epoch 6, batch 1600, loss[loss=0.2907, ctc_loss=0.2021, cr_loss=0.4403, attn_decoder_loss=0.2908, over 29663.00 frames. ], tot_loss[loss=0.288, ctc_loss=0.2108, cr_loss=0.4299, attn_decoder_loss=0.287, over 5763810.48 frames. ], batch size: 85, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:04:49,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.29 vs. limit=15.0 +2024-09-16 23:04:51,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=96900.0, ans=0.125 +2024-09-16 23:04:51,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=96900.0, ans=0.125 +2024-09-16 23:05:11,513 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=9.23 vs. limit=15.0 +2024-09-16 23:05:22,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.34 vs. limit=10.0 +2024-09-16 23:05:22,804 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.461e+01 1.097e+02 1.251e+02 1.445e+02 2.140e+02, threshold=2.501e+02, percent-clipped=0.0 +2024-09-16 23:05:36,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=97020.0, ans=0.125 +2024-09-16 23:05:54,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=97060.0, ans=0.125 +2024-09-16 23:05:59,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=97060.0, ans=15.0 +2024-09-16 23:06:04,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=97060.0, ans=0.0 +2024-09-16 23:06:06,650 INFO [train.py:1198] (0/2) Epoch 6, batch 1650, loss[loss=0.2911, ctc_loss=0.2054, cr_loss=0.4353, attn_decoder_loss=0.2909, over 29735.00 frames. ], tot_loss[loss=0.288, ctc_loss=0.2109, cr_loss=0.43, attn_decoder_loss=0.287, over 5758371.47 frames. ], batch size: 89, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:06:31,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=97140.0, ans=0.025 +2024-09-16 23:06:37,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=97180.0, ans=0.125 +2024-09-16 23:06:47,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=97180.0, ans=0.125 +2024-09-16 23:06:52,382 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.61 vs. limit=6.0 +2024-09-16 23:07:19,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=97260.0, ans=0.2 +2024-09-16 23:07:23,290 INFO [train.py:1198] (0/2) Epoch 6, batch 1700, loss[loss=0.2613, ctc_loss=0.1912, cr_loss=0.4063, attn_decoder_loss=0.2601, over 29575.00 frames. ], tot_loss[loss=0.2875, ctc_loss=0.2101, cr_loss=0.4296, attn_decoder_loss=0.2866, over 5779274.26 frames. ], batch size: 69, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:07:30,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.53 vs. limit=15.0 +2024-09-16 23:07:49,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=97340.0, ans=0.125 +2024-09-16 23:07:51,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=97340.0, ans=0.125 +2024-09-16 23:08:00,041 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.175e+01 1.040e+02 1.164e+02 1.267e+02 1.903e+02, threshold=2.329e+02, percent-clipped=0.0 +2024-09-16 23:08:27,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=97460.0, ans=0.125 +2024-09-16 23:08:29,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=97460.0, ans=0.125 +2024-09-16 23:08:39,834 INFO [train.py:1198] (0/2) Epoch 6, batch 1750, loss[loss=0.2434, ctc_loss=0.1699, cr_loss=0.371, attn_decoder_loss=0.2433, over 29335.00 frames. ], tot_loss[loss=0.2873, ctc_loss=0.2098, cr_loss=0.4297, attn_decoder_loss=0.2864, over 5788221.78 frames. ], batch size: 67, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:08:51,269 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.48 vs. limit=15.0 +2024-09-16 23:08:52,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=97500.0, ans=0.125 +2024-09-16 23:08:52,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.07 vs. limit=6.0 +2024-09-16 23:08:59,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=97540.0, ans=0.2 +2024-09-16 23:09:02,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.65 vs. limit=15.0 +2024-09-16 23:09:32,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.49 vs. limit=6.0 +2024-09-16 23:09:47,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.47 vs. limit=12.0 +2024-09-16 23:09:57,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=97660.0, ans=0.0 +2024-09-16 23:10:01,593 INFO [train.py:1198] (0/2) Epoch 6, batch 1800, loss[loss=0.3136, ctc_loss=0.2344, cr_loss=0.472, attn_decoder_loss=0.312, over 29681.00 frames. ], tot_loss[loss=0.2876, ctc_loss=0.2101, cr_loss=0.4299, attn_decoder_loss=0.2867, over 5791212.52 frames. ], batch size: 83, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:10:17,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=97740.0, ans=0.025 +2024-09-16 23:10:39,658 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.002e+01 1.085e+02 1.174e+02 1.306e+02 4.568e+02, threshold=2.348e+02, percent-clipped=1.0 +2024-09-16 23:10:46,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=97820.0, ans=0.125 +2024-09-16 23:10:46,765 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.10 vs. limit=15.0 +2024-09-16 23:10:47,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=97820.0, ans=0.125 +2024-09-16 23:11:04,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.36 vs. limit=12.0 +2024-09-16 23:11:12,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=97860.0, ans=0.1 +2024-09-16 23:11:18,205 INFO [train.py:1198] (0/2) Epoch 6, batch 1850, loss[loss=0.299, ctc_loss=0.22, cr_loss=0.4457, attn_decoder_loss=0.2978, over 29651.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2094, cr_loss=0.4287, attn_decoder_loss=0.2862, over 5796678.80 frames. ], batch size: 86, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:11:23,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=97900.0, ans=0.0 +2024-09-16 23:11:24,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=97900.0, ans=0.0 +2024-09-16 23:11:26,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.23 vs. limit=6.0 +2024-09-16 23:11:32,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=97940.0, ans=0.1 +2024-09-16 23:11:50,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=97980.0, ans=0.125 +2024-09-16 23:12:10,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=98020.0, ans=0.125 +2024-09-16 23:12:22,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=98060.0, ans=0.0 +2024-09-16 23:12:33,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=98100.0, ans=0.2 +2024-09-16 23:12:34,394 INFO [train.py:1198] (0/2) Epoch 6, batch 1900, loss[loss=0.2933, ctc_loss=0.2112, cr_loss=0.4306, attn_decoder_loss=0.2929, over 29704.00 frames. ], tot_loss[loss=0.2879, ctc_loss=0.2099, cr_loss=0.4297, attn_decoder_loss=0.287, over 5804275.63 frames. ], batch size: 89, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:12:49,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=98140.0, ans=0.2 +2024-09-16 23:13:00,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=98140.0, ans=0.0 +2024-09-16 23:13:08,161 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.83 vs. limit=10.0 +2024-09-16 23:13:10,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=98180.0, ans=0.125 +2024-09-16 23:13:16,230 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.164e+01 1.098e+02 1.206e+02 1.393e+02 1.994e+02, threshold=2.412e+02, percent-clipped=0.0 +2024-09-16 23:13:17,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.93 vs. limit=6.0 +2024-09-16 23:13:24,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=98220.0, ans=0.0 +2024-09-16 23:13:49,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=98260.0, ans=0.0 +2024-09-16 23:13:55,787 INFO [train.py:1198] (0/2) Epoch 6, batch 1950, loss[loss=0.2948, ctc_loss=0.2181, cr_loss=0.4641, attn_decoder_loss=0.293, over 29465.00 frames. ], tot_loss[loss=0.2895, ctc_loss=0.2113, cr_loss=0.4323, attn_decoder_loss=0.2886, over 5819075.19 frames. ], batch size: 78, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:14:02,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=98300.0, ans=0.0 +2024-09-16 23:14:13,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=98340.0, ans=0.2 +2024-09-16 23:14:22,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=98340.0, ans=0.0 +2024-09-16 23:14:50,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=98420.0, ans=0.125 +2024-09-16 23:14:52,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.00 vs. limit=22.5 +2024-09-16 23:14:52,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=98420.0, ans=15.0 +2024-09-16 23:15:13,562 INFO [train.py:1198] (0/2) Epoch 6, batch 2000, loss[loss=0.2615, ctc_loss=0.19, cr_loss=0.4243, attn_decoder_loss=0.2601, over 29327.00 frames. ], tot_loss[loss=0.2898, ctc_loss=0.2115, cr_loss=0.432, attn_decoder_loss=0.2888, over 5797138.94 frames. ], batch size: 67, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:15:17,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.49 vs. limit=10.0 +2024-09-16 23:15:20,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=98500.0, ans=0.1 +2024-09-16 23:15:55,053 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.337e+01 1.180e+02 1.301e+02 1.522e+02 2.715e+02, threshold=2.602e+02, percent-clipped=3.0 +2024-09-16 23:15:58,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=98620.0, ans=0.0 +2024-09-16 23:16:03,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=98620.0, ans=0.1 +2024-09-16 23:16:13,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=98660.0, ans=0.125 +2024-09-16 23:16:29,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=98700.0, ans=0.125 +2024-09-16 23:16:30,526 INFO [train.py:1198] (0/2) Epoch 6, batch 2050, loss[loss=0.2679, ctc_loss=0.1925, cr_loss=0.4249, attn_decoder_loss=0.2668, over 29417.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2106, cr_loss=0.4306, attn_decoder_loss=0.2876, over 5789478.20 frames. ], batch size: 70, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:16:46,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=98740.0, ans=0.125 +2024-09-16 23:17:04,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn1.whiten.whitening_limit, batch_count=98780.0, ans=22.5 +2024-09-16 23:17:15,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=98780.0, ans=0.2 +2024-09-16 23:17:36,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=98860.0, ans=0.2 +2024-09-16 23:17:38,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=98860.0, ans=10.0 +2024-09-16 23:17:43,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=98860.0, ans=0.2 +2024-09-16 23:17:43,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.82 vs. limit=22.5 +2024-09-16 23:17:52,105 INFO [train.py:1198] (0/2) Epoch 6, batch 2100, loss[loss=0.2955, ctc_loss=0.2177, cr_loss=0.4486, attn_decoder_loss=0.2942, over 29745.00 frames. ], tot_loss[loss=0.2872, ctc_loss=0.2091, cr_loss=0.4294, attn_decoder_loss=0.2864, over 5801284.40 frames. ], batch size: 81, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:18:01,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=98900.0, ans=0.125 +2024-09-16 23:18:02,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=98900.0, ans=0.125 +2024-09-16 23:18:06,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.63 vs. limit=15.0 +2024-09-16 23:18:09,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=98940.0, ans=0.1 +2024-09-16 23:18:19,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=98940.0, ans=0.025 +2024-09-16 23:18:28,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=98980.0, ans=0.0 +2024-09-16 23:18:31,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.03 vs. limit=22.5 +2024-09-16 23:18:34,551 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.717e+01 1.049e+02 1.121e+02 1.246e+02 2.037e+02, threshold=2.242e+02, percent-clipped=0.0 +2024-09-16 23:18:39,909 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.38 vs. limit=15.0 +2024-09-16 23:19:08,355 INFO [train.py:1198] (0/2) Epoch 6, batch 2150, loss[loss=0.2854, ctc_loss=0.2104, cr_loss=0.4161, attn_decoder_loss=0.2844, over 29453.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.2076, cr_loss=0.4275, attn_decoder_loss=0.2854, over 5815283.94 frames. ], batch size: 78, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:19:14,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=99100.0, ans=0.125 +2024-09-16 23:20:08,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.95 vs. limit=10.0 +2024-09-16 23:20:09,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=99260.0, ans=0.0 +2024-09-16 23:20:12,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=99260.0, ans=0.125 +2024-09-16 23:20:25,666 INFO [train.py:1198] (0/2) Epoch 6, batch 2200, loss[loss=0.3036, ctc_loss=0.2273, cr_loss=0.4463, attn_decoder_loss=0.3022, over 29619.00 frames. ], tot_loss[loss=0.2865, ctc_loss=0.208, cr_loss=0.4276, attn_decoder_loss=0.2857, over 5811673.09 frames. ], batch size: 86, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:20:40,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.16 vs. limit=22.5 +2024-09-16 23:20:52,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=99340.0, ans=0.07 +2024-09-16 23:21:12,201 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.295e+01 1.080e+02 1.191e+02 1.298e+02 2.659e+02, threshold=2.382e+02, percent-clipped=1.0 +2024-09-16 23:21:14,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=99420.0, ans=0.2 +2024-09-16 23:21:14,123 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:21:14,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=99420.0, ans=0.1 +2024-09-16 23:21:45,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=99500.0, ans=0.0 +2024-09-16 23:21:46,865 INFO [train.py:1198] (0/2) Epoch 6, batch 2250, loss[loss=0.303, ctc_loss=0.2261, cr_loss=0.4892, attn_decoder_loss=0.3006, over 29690.00 frames. ], tot_loss[loss=0.2864, ctc_loss=0.2077, cr_loss=0.4276, attn_decoder_loss=0.2856, over 5811280.92 frames. ], batch size: 82, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:21:56,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=99500.0, ans=0.2 +2024-09-16 23:21:59,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=99500.0, ans=0.125 +2024-09-16 23:22:10,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=99540.0, ans=10.0 +2024-09-16 23:22:11,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=99540.0, ans=0.025 +2024-09-16 23:22:17,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=99580.0, ans=0.125 +2024-09-16 23:22:32,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=99620.0, ans=0.1 +2024-09-16 23:22:34,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=99620.0, ans=0.0 +2024-09-16 23:22:35,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=99620.0, ans=0.1 +2024-09-16 23:22:38,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=99620.0, ans=0.2 +2024-09-16 23:23:02,874 INFO [train.py:1198] (0/2) Epoch 6, batch 2300, loss[loss=0.2578, ctc_loss=0.1833, cr_loss=0.4123, attn_decoder_loss=0.2569, over 29319.00 frames. ], tot_loss[loss=0.2861, ctc_loss=0.2081, cr_loss=0.4273, attn_decoder_loss=0.2853, over 5799364.24 frames. ], batch size: 71, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:23:04,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=99700.0, ans=0.1 +2024-09-16 23:23:30,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=99740.0, ans=0.125 +2024-09-16 23:23:30,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=99740.0, ans=0.1 +2024-09-16 23:23:49,098 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.439e+01 1.127e+02 1.220e+02 1.323e+02 2.863e+02, threshold=2.441e+02, percent-clipped=2.0 +2024-09-16 23:23:52,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=99820.0, ans=0.125 +2024-09-16 23:24:19,889 INFO [train.py:1198] (0/2) Epoch 6, batch 2350, loss[loss=0.2907, ctc_loss=0.2148, cr_loss=0.4519, attn_decoder_loss=0.2891, over 29686.00 frames. ], tot_loss[loss=0.2857, ctc_loss=0.2075, cr_loss=0.4272, attn_decoder_loss=0.2849, over 5806094.23 frames. ], batch size: 83, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:24:27,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=99900.0, ans=0.125 +2024-09-16 23:24:45,393 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.98 vs. limit=15.0 +2024-09-16 23:24:55,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=99980.0, ans=0.0 +2024-09-16 23:25:04,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.39 vs. limit=15.0 +2024-09-16 23:25:41,910 INFO [train.py:1198] (0/2) Epoch 6, batch 2400, loss[loss=0.2697, ctc_loss=0.1859, cr_loss=0.3685, attn_decoder_loss=0.2708, over 29533.00 frames. ], tot_loss[loss=0.2863, ctc_loss=0.208, cr_loss=0.4276, attn_decoder_loss=0.2855, over 5808869.06 frames. ], batch size: 76, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:25:43,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=100100.0, ans=0.125 +2024-09-16 23:25:53,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=100100.0, ans=0.2 +2024-09-16 23:26:29,668 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.345e+01 1.104e+02 1.208e+02 1.363e+02 5.197e+02, threshold=2.416e+02, percent-clipped=3.0 +2024-09-16 23:26:30,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.03 vs. limit=15.0 +2024-09-16 23:26:31,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=100220.0, ans=0.125 +2024-09-16 23:26:31,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=100220.0, ans=0.1 +2024-09-16 23:26:58,937 INFO [train.py:1198] (0/2) Epoch 6, batch 2450, loss[loss=0.283, ctc_loss=0.2008, cr_loss=0.4125, attn_decoder_loss=0.2829, over 29716.00 frames. ], tot_loss[loss=0.2874, ctc_loss=0.2091, cr_loss=0.429, attn_decoder_loss=0.2866, over 5784807.03 frames. ], batch size: 82, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:27:09,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=100300.0, ans=0.125 +2024-09-16 23:27:18,261 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.38 vs. limit=22.5 +2024-09-16 23:27:31,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=100380.0, ans=0.0 +2024-09-16 23:28:02,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=100460.0, ans=0.1 +2024-09-16 23:28:11,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=100460.0, ans=0.025 +2024-09-16 23:28:15,744 INFO [train.py:1198] (0/2) Epoch 6, batch 2500, loss[loss=0.2846, ctc_loss=0.1976, cr_loss=0.4109, attn_decoder_loss=0.2851, over 29629.00 frames. ], tot_loss[loss=0.287, ctc_loss=0.2085, cr_loss=0.4282, attn_decoder_loss=0.2862, over 5795519.57 frames. ], batch size: 86, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:28:20,113 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.57 vs. limit=15.0 +2024-09-16 23:28:20,707 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:28:29,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=100540.0, ans=0.0 +2024-09-16 23:28:36,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=100540.0, ans=0.125 +2024-09-16 23:28:43,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=100540.0, ans=0.125 +2024-09-16 23:28:49,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=100580.0, ans=0.125 +2024-09-16 23:29:04,964 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.098e+02 1.228e+02 1.415e+02 3.536e+02, threshold=2.457e+02, percent-clipped=1.0 +2024-09-16 23:29:10,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=100620.0, ans=0.125 +2024-09-16 23:29:36,830 INFO [train.py:1198] (0/2) Epoch 6, batch 2550, loss[loss=0.2524, ctc_loss=0.177, cr_loss=0.3775, attn_decoder_loss=0.2523, over 29330.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2087, cr_loss=0.4292, attn_decoder_loss=0.2863, over 5796892.36 frames. ], batch size: 67, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:29:37,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=100700.0, ans=0.1 +2024-09-16 23:29:41,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=100700.0, ans=0.1 +2024-09-16 23:29:44,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=100700.0, ans=0.2 +2024-09-16 23:29:47,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=100700.0, ans=0.0 +2024-09-16 23:30:15,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=100780.0, ans=0.0 +2024-09-16 23:30:43,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=100860.0, ans=0.2 +2024-09-16 23:30:50,061 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:30:54,115 INFO [train.py:1198] (0/2) Epoch 6, batch 2600, loss[loss=0.2802, ctc_loss=0.2029, cr_loss=0.432, attn_decoder_loss=0.2792, over 29453.00 frames. ], tot_loss[loss=0.2875, ctc_loss=0.2092, cr_loss=0.4292, attn_decoder_loss=0.2866, over 5792815.59 frames. ], batch size: 78, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:31:00,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=100900.0, ans=10.0 +2024-09-16 23:31:15,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=100940.0, ans=0.04949747468305833 +2024-09-16 23:31:17,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=100940.0, ans=0.0 +2024-09-16 23:31:44,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.431e+01 1.047e+02 1.098e+02 1.263e+02 2.416e+02, threshold=2.197e+02, percent-clipped=0.0 +2024-09-16 23:31:45,446 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=16.66 vs. limit=15.0 +2024-09-16 23:31:49,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=101020.0, ans=0.07 +2024-09-16 23:31:52,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.72 vs. limit=10.0 +2024-09-16 23:32:01,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=101060.0, ans=0.125 +2024-09-16 23:32:08,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=101100.0, ans=0.1 +2024-09-16 23:32:10,133 INFO [train.py:1198] (0/2) Epoch 6, batch 2650, loss[loss=0.3016, ctc_loss=0.2213, cr_loss=0.4523, attn_decoder_loss=0.3005, over 29316.00 frames. ], tot_loss[loss=0.2873, ctc_loss=0.2088, cr_loss=0.4294, attn_decoder_loss=0.2865, over 5799544.86 frames. ], batch size: 100, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:32:10,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=101100.0, ans=0.1 +2024-09-16 23:32:19,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=101100.0, ans=0.1 +2024-09-16 23:32:19,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=101100.0, ans=0.125 +2024-09-16 23:33:04,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=101220.0, ans=0.125 +2024-09-16 23:33:31,357 INFO [train.py:1198] (0/2) Epoch 6, batch 2700, loss[loss=0.2947, ctc_loss=0.2115, cr_loss=0.4618, attn_decoder_loss=0.2937, over 29499.00 frames. ], tot_loss[loss=0.2881, ctc_loss=0.2094, cr_loss=0.4305, attn_decoder_loss=0.2872, over 5796551.04 frames. ], batch size: 87, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:33:48,544 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:33:56,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=101340.0, ans=0.125 +2024-09-16 23:33:56,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=101340.0, ans=0.125 +2024-09-16 23:34:04,463 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=17.54 vs. limit=15.0 +2024-09-16 23:34:23,728 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.471e+01 1.102e+02 1.222e+02 1.380e+02 2.898e+02, threshold=2.443e+02, percent-clipped=1.0 +2024-09-16 23:34:33,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=101460.0, ans=0.125 +2024-09-16 23:34:48,556 INFO [train.py:1198] (0/2) Epoch 6, batch 2750, loss[loss=0.2821, ctc_loss=0.2065, cr_loss=0.4325, attn_decoder_loss=0.2809, over 29524.00 frames. ], tot_loss[loss=0.2864, ctc_loss=0.2079, cr_loss=0.4282, attn_decoder_loss=0.2856, over 5795026.51 frames. ], batch size: 75, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:35:46,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=101620.0, ans=0.125 +2024-09-16 23:35:57,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=101660.0, ans=0.0 +2024-09-16 23:35:57,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=101660.0, ans=15.0 +2024-09-16 23:36:06,176 INFO [train.py:1198] (0/2) Epoch 6, batch 2800, loss[loss=0.3237, ctc_loss=0.2719, cr_loss=0.4323, attn_decoder_loss=0.3199, over 20423.00 frames. ], tot_loss[loss=0.2867, ctc_loss=0.2081, cr_loss=0.4274, attn_decoder_loss=0.2859, over 5775867.51 frames. ], batch size: 210, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:36:14,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=101700.0, ans=0.125 +2024-09-16 23:36:15,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=101700.0, ans=0.0 +2024-09-16 23:36:46,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=101780.0, ans=0.125 +2024-09-16 23:36:48,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=101780.0, ans=0.125 +2024-09-16 23:37:04,392 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.339e+01 1.139e+02 1.318e+02 1.529e+02 2.693e+02, threshold=2.635e+02, percent-clipped=4.0 +2024-09-16 23:37:13,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=101860.0, ans=0.125 +2024-09-16 23:37:14,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.19 vs. limit=22.5 +2024-09-16 23:37:27,426 INFO [train.py:1198] (0/2) Epoch 6, batch 2850, loss[loss=0.2858, ctc_loss=0.2152, cr_loss=0.4785, attn_decoder_loss=0.283, over 29460.00 frames. ], tot_loss[loss=0.2879, ctc_loss=0.2098, cr_loss=0.4297, attn_decoder_loss=0.287, over 5762817.06 frames. ], batch size: 77, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:37:33,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=101900.0, ans=0.0 +2024-09-16 23:37:36,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.49 vs. limit=15.0 +2024-09-16 23:37:39,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.11 vs. limit=6.0 +2024-09-16 23:38:02,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=101980.0, ans=0.125 +2024-09-16 23:38:09,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=101980.0, ans=0.2 +2024-09-16 23:38:43,937 INFO [train.py:1198] (0/2) Epoch 6, batch 2900, loss[loss=0.2901, ctc_loss=0.2105, cr_loss=0.4487, attn_decoder_loss=0.289, over 29400.00 frames. ], tot_loss[loss=0.2886, ctc_loss=0.2097, cr_loss=0.4311, attn_decoder_loss=0.2878, over 5787880.05 frames. ], batch size: 79, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:39:24,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=102180.0, ans=0.125 +2024-09-16 23:39:30,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=102220.0, ans=0.2 +2024-09-16 23:39:35,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.15 vs. limit=15.0 +2024-09-16 23:39:39,378 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.808e+01 1.155e+02 1.262e+02 1.445e+02 2.631e+02, threshold=2.524e+02, percent-clipped=0.0 +2024-09-16 23:39:54,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=102260.0, ans=0.125 +2024-09-16 23:39:56,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=102260.0, ans=0.1 +2024-09-16 23:40:00,662 INFO [train.py:1198] (0/2) Epoch 6, batch 2950, loss[loss=0.278, ctc_loss=0.2008, cr_loss=0.4208, attn_decoder_loss=0.2772, over 29511.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2084, cr_loss=0.4298, attn_decoder_loss=0.2863, over 5783037.81 frames. ], batch size: 75, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:40:08,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=102300.0, ans=0.1 +2024-09-16 23:40:08,899 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:40:11,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=102300.0, ans=0.125 +2024-09-16 23:40:18,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=102340.0, ans=0.0 +2024-09-16 23:40:24,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=102340.0, ans=0.125 +2024-09-16 23:40:24,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.37 vs. limit=22.5 +2024-09-16 23:40:40,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=102380.0, ans=0.0 +2024-09-16 23:40:44,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=102380.0, ans=0.125 +2024-09-16 23:41:13,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=102460.0, ans=0.0 +2024-09-16 23:41:19,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=102460.0, ans=0.125 +2024-09-16 23:41:23,646 INFO [train.py:1198] (0/2) Epoch 6, batch 3000, loss[loss=0.291, ctc_loss=0.2034, cr_loss=0.4196, attn_decoder_loss=0.2915, over 29763.00 frames. ], tot_loss[loss=0.287, ctc_loss=0.2082, cr_loss=0.4292, attn_decoder_loss=0.2862, over 5783716.85 frames. ], batch size: 81, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:41:23,647 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-16 23:41:42,098 INFO [train.py:1230] (0/2) Epoch 6, validation: loss=0.2192, ctc_loss=0.0625, cr_loss=4.383e-15, attn_decoder_loss=0.2366, over 944034.00 frames. +2024-09-16 23:41:42,098 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-16 23:42:03,140 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.74 vs. limit=15.0 +2024-09-16 23:42:06,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.78 vs. limit=15.0 +2024-09-16 23:42:19,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.11 vs. limit=15.0 +2024-09-16 23:42:25,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=102580.0, ans=0.125 +2024-09-16 23:42:28,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=102620.0, ans=0.125 +2024-09-16 23:42:38,833 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.071e+01 1.057e+02 1.164e+02 1.320e+02 2.426e+02, threshold=2.327e+02, percent-clipped=0.0 +2024-09-16 23:42:49,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=102660.0, ans=0.125 +2024-09-16 23:42:58,905 INFO [train.py:1198] (0/2) Epoch 6, batch 3050, loss[loss=0.2855, ctc_loss=0.2163, cr_loss=0.4426, attn_decoder_loss=0.2834, over 29528.00 frames. ], tot_loss[loss=0.2877, ctc_loss=0.2089, cr_loss=0.4305, attn_decoder_loss=0.2868, over 5777961.75 frames. ], batch size: 76, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:43:13,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=102740.0, ans=0.0 +2024-09-16 23:43:26,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=102740.0, ans=0.0 +2024-09-16 23:43:31,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=102780.0, ans=0.125 +2024-09-16 23:43:42,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=102780.0, ans=0.2 +2024-09-16 23:43:46,957 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:43:49,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=102820.0, ans=0.125 +2024-09-16 23:43:54,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=102820.0, ans=15.0 +2024-09-16 23:43:57,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=102820.0, ans=0.125 +2024-09-16 23:44:07,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=102860.0, ans=0.0 +2024-09-16 23:44:10,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=102860.0, ans=0.0 +2024-09-16 23:44:15,200 INFO [train.py:1198] (0/2) Epoch 6, batch 3100, loss[loss=0.2988, ctc_loss=0.2205, cr_loss=0.4452, attn_decoder_loss=0.2976, over 29219.00 frames. ], tot_loss[loss=0.2872, ctc_loss=0.2087, cr_loss=0.4295, attn_decoder_loss=0.2864, over 5778153.59 frames. ], batch size: 100, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:44:24,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=102900.0, ans=0.0 +2024-09-16 23:44:28,517 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.44 vs. limit=6.0 +2024-09-16 23:44:29,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=102940.0, ans=0.1 +2024-09-16 23:44:36,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=102940.0, ans=0.0 +2024-09-16 23:44:45,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.26 vs. limit=15.0 +2024-09-16 23:44:52,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=102980.0, ans=0.1 +2024-09-16 23:45:10,984 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.08 vs. limit=8.0 +2024-09-16 23:45:13,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=103020.0, ans=0.0 +2024-09-16 23:45:17,446 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.176e+01 1.082e+02 1.229e+02 1.361e+02 4.744e+02, threshold=2.458e+02, percent-clipped=3.0 +2024-09-16 23:45:19,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=103060.0, ans=0.125 +2024-09-16 23:45:20,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=103060.0, ans=0.1 +2024-09-16 23:45:28,310 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:45:35,733 INFO [train.py:1198] (0/2) Epoch 6, batch 3150, loss[loss=0.3256, ctc_loss=0.2467, cr_loss=0.499, attn_decoder_loss=0.3233, over 28850.00 frames. ], tot_loss[loss=0.2872, ctc_loss=0.2086, cr_loss=0.4295, attn_decoder_loss=0.2864, over 5783817.36 frames. ], batch size: 104, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:45:39,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=103100.0, ans=0.125 +2024-09-16 23:46:00,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=103140.0, ans=0.125 +2024-09-16 23:46:52,917 INFO [train.py:1198] (0/2) Epoch 6, batch 3200, loss[loss=0.2821, ctc_loss=0.2029, cr_loss=0.4413, attn_decoder_loss=0.2811, over 29404.00 frames. ], tot_loss[loss=0.2863, ctc_loss=0.2075, cr_loss=0.4286, attn_decoder_loss=0.2856, over 5793494.78 frames. ], batch size: 79, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:47:07,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=103340.0, ans=0.0 +2024-09-16 23:47:16,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=103340.0, ans=0.1 +2024-09-16 23:47:16,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=103340.0, ans=0.0 +2024-09-16 23:47:38,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.44 vs. limit=15.0 +2024-09-16 23:47:49,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=103420.0, ans=0.125 +2024-09-16 23:47:52,864 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.859e+01 1.056e+02 1.155e+02 1.311e+02 1.883e+02, threshold=2.309e+02, percent-clipped=0.0 +2024-09-16 23:48:02,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=103460.0, ans=0.1 +2024-09-16 23:48:08,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=103500.0, ans=0.125 +2024-09-16 23:48:09,762 INFO [train.py:1198] (0/2) Epoch 6, batch 3250, loss[loss=0.298, ctc_loss=0.2154, cr_loss=0.4321, attn_decoder_loss=0.2975, over 29715.00 frames. ], tot_loss[loss=0.2864, ctc_loss=0.2073, cr_loss=0.4285, attn_decoder_loss=0.2857, over 5801390.62 frames. ], batch size: 84, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:48:11,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=103500.0, ans=0.125 +2024-09-16 23:48:19,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.14 vs. limit=6.0 +2024-09-16 23:48:25,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=103540.0, ans=0.025 +2024-09-16 23:48:41,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.56 vs. limit=22.5 +2024-09-16 23:48:46,707 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.63 vs. limit=15.0 +2024-09-16 23:48:56,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.68 vs. limit=22.5 +2024-09-16 23:49:04,014 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.17 vs. limit=12.0 +2024-09-16 23:49:30,806 INFO [train.py:1198] (0/2) Epoch 6, batch 3300, loss[loss=0.3009, ctc_loss=0.2204, cr_loss=0.458, attn_decoder_loss=0.2996, over 28199.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.2068, cr_loss=0.4271, attn_decoder_loss=0.2846, over 5799277.53 frames. ], batch size: 111, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:49:32,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.63 vs. limit=22.5 +2024-09-16 23:49:32,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.53 vs. limit=15.0 +2024-09-16 23:49:39,796 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=12.0 +2024-09-16 23:49:46,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=103740.0, ans=0.07 +2024-09-16 23:49:50,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.53 vs. limit=6.0 +2024-09-16 23:50:04,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=103780.0, ans=0.0 +2024-09-16 23:50:10,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.49 vs. limit=10.0 +2024-09-16 23:50:20,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.10 vs. limit=15.0 +2024-09-16 23:50:32,019 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.684e+01 1.121e+02 1.244e+02 1.460e+02 3.755e+02, threshold=2.488e+02, percent-clipped=2.0 +2024-09-16 23:50:33,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=103860.0, ans=0.1 +2024-09-16 23:50:41,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=103860.0, ans=0.125 +2024-09-16 23:50:46,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=103900.0, ans=0.2 +2024-09-16 23:50:47,242 INFO [train.py:1198] (0/2) Epoch 6, batch 3350, loss[loss=0.311, ctc_loss=0.2343, cr_loss=0.4343, attn_decoder_loss=0.3098, over 28923.00 frames. ], tot_loss[loss=0.2866, ctc_loss=0.2083, cr_loss=0.4282, attn_decoder_loss=0.2857, over 5775231.94 frames. ], batch size: 104, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:51:26,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=103980.0, ans=0.0 +2024-09-16 23:51:26,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=103980.0, ans=0.1 +2024-09-16 23:51:44,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=104020.0, ans=0.2 +2024-09-16 23:52:00,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=104060.0, ans=0.0 +2024-09-16 23:52:04,357 INFO [train.py:1198] (0/2) Epoch 6, batch 3400, loss[loss=0.2442, ctc_loss=0.1754, cr_loss=0.3692, attn_decoder_loss=0.2437, over 29357.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.208, cr_loss=0.4274, attn_decoder_loss=0.2854, over 5767427.46 frames. ], batch size: 67, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:52:04,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=104100.0, ans=0.125 +2024-09-16 23:52:15,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=104100.0, ans=0.1 +2024-09-16 23:52:54,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=104220.0, ans=0.09899494936611666 +2024-09-16 23:52:54,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=104220.0, ans=0.1 +2024-09-16 23:53:03,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=104220.0, ans=0.0 +2024-09-16 23:53:06,910 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:53:11,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=104260.0, ans=0.025 +2024-09-16 23:53:12,533 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.970e+01 1.077e+02 1.207e+02 1.405e+02 5.237e+02, threshold=2.415e+02, percent-clipped=2.0 +2024-09-16 23:53:21,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=104260.0, ans=0.125 +2024-09-16 23:53:24,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.76 vs. limit=15.0 +2024-09-16 23:53:26,203 INFO [train.py:1198] (0/2) Epoch 6, batch 3450, loss[loss=0.2767, ctc_loss=0.1863, cr_loss=0.3676, attn_decoder_loss=0.2785, over 28267.00 frames. ], tot_loss[loss=0.2864, ctc_loss=0.2078, cr_loss=0.4282, attn_decoder_loss=0.2856, over 5773715.88 frames. ], batch size: 111, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:53:28,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=104300.0, ans=0.0 +2024-09-16 23:53:45,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=104340.0, ans=0.1 +2024-09-16 23:53:57,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=104380.0, ans=0.0 +2024-09-16 23:54:06,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=104380.0, ans=0.125 +2024-09-16 23:54:28,718 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.87 vs. limit=22.5 +2024-09-16 23:54:31,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=104460.0, ans=0.0 +2024-09-16 23:54:35,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=104460.0, ans=0.125 +2024-09-16 23:54:41,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=104500.0, ans=0.0 +2024-09-16 23:54:43,085 INFO [train.py:1198] (0/2) Epoch 6, batch 3500, loss[loss=0.2549, ctc_loss=0.1729, cr_loss=0.3595, attn_decoder_loss=0.256, over 29334.00 frames. ], tot_loss[loss=0.2854, ctc_loss=0.2067, cr_loss=0.4272, attn_decoder_loss=0.2846, over 5775929.94 frames. ], batch size: 71, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:55:00,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=104540.0, ans=0.025 +2024-09-16 23:55:01,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=104540.0, ans=0.125 +2024-09-16 23:55:18,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=104580.0, ans=0.1 +2024-09-16 23:55:19,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=104580.0, ans=0.125 +2024-09-16 23:55:22,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=104580.0, ans=0.0 +2024-09-16 23:55:27,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.19 vs. limit=6.0 +2024-09-16 23:55:33,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=104620.0, ans=0.0 +2024-09-16 23:55:41,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=104620.0, ans=0.2 +2024-09-16 23:55:46,678 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.951e+01 1.039e+02 1.144e+02 1.274e+02 4.432e+02, threshold=2.289e+02, percent-clipped=1.0 +2024-09-16 23:55:47,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=104660.0, ans=0.125 +2024-09-16 23:55:58,730 INFO [train.py:1198] (0/2) Epoch 6, batch 3550, loss[loss=0.2854, ctc_loss=0.1878, cr_loss=0.4313, attn_decoder_loss=0.2866, over 29683.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.2063, cr_loss=0.4273, attn_decoder_loss=0.2846, over 5782620.16 frames. ], batch size: 89, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:55:59,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=104700.0, ans=0.0 +2024-09-16 23:56:54,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=104820.0, ans=0.125 +2024-09-16 23:57:14,765 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.69 vs. limit=15.0 +2024-09-16 23:57:16,979 INFO [train.py:1198] (0/2) Epoch 6, batch 3600, loss[loss=0.2771, ctc_loss=0.197, cr_loss=0.4183, attn_decoder_loss=0.2767, over 29494.00 frames. ], tot_loss[loss=0.2858, ctc_loss=0.2068, cr_loss=0.4287, attn_decoder_loss=0.2851, over 5792158.60 frames. ], batch size: 77, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:57:39,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=104940.0, ans=0.125 +2024-09-16 23:57:43,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.02 vs. limit=22.5 +2024-09-16 23:57:47,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=104980.0, ans=0.2 +2024-09-16 23:58:05,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=105020.0, ans=0.125 +2024-09-16 23:58:23,463 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.018e+01 1.117e+02 1.191e+02 1.328e+02 4.381e+02, threshold=2.382e+02, percent-clipped=2.0 +2024-09-16 23:58:26,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=105060.0, ans=0.125 +2024-09-16 23:58:26,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=105060.0, ans=0.125 +2024-09-16 23:58:32,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=105100.0, ans=0.0 +2024-09-16 23:58:34,004 INFO [train.py:1198] (0/2) Epoch 6, batch 3650, loss[loss=0.3052, ctc_loss=0.2165, cr_loss=0.4671, attn_decoder_loss=0.3047, over 29495.00 frames. ], tot_loss[loss=0.2851, ctc_loss=0.2062, cr_loss=0.428, attn_decoder_loss=0.2844, over 5792965.58 frames. ], batch size: 90, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:58:35,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=105100.0, ans=0.0 +2024-09-16 23:58:49,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=7.20 vs. limit=12.0 +2024-09-16 23:58:50,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=105140.0, ans=0.2 +2024-09-16 23:59:13,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=105180.0, ans=0.125 +2024-09-16 23:59:21,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=105220.0, ans=0.125 +2024-09-16 23:59:41,184 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.56 vs. limit=10.0 +2024-09-16 23:59:44,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.02 vs. limit=15.0 +2024-09-16 23:59:49,423 INFO [train.py:1198] (0/2) Epoch 6, batch 3700, loss[loss=0.2965, ctc_loss=0.2131, cr_loss=0.4317, attn_decoder_loss=0.2962, over 29712.00 frames. ], tot_loss[loss=0.2849, ctc_loss=0.2055, cr_loss=0.4278, attn_decoder_loss=0.2842, over 5803201.80 frames. ], batch size: 84, lr: 1.78e-02, grad_scale: 8.0 +2024-09-16 23:59:57,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=105300.0, ans=0.0 +2024-09-17 00:00:00,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=105300.0, ans=0.1 +2024-09-17 00:00:12,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=105340.0, ans=0.125 +2024-09-17 00:00:12,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=105340.0, ans=0.1 +2024-09-17 00:00:17,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=105340.0, ans=0.0 +2024-09-17 00:00:20,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=105380.0, ans=0.025 +2024-09-17 00:00:27,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=105380.0, ans=0.0 +2024-09-17 00:00:31,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=105380.0, ans=0.1 +2024-09-17 00:00:44,520 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.38 vs. limit=22.5 +2024-09-17 00:00:51,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=105460.0, ans=0.125 +2024-09-17 00:00:55,740 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.176e+01 1.057e+02 1.159e+02 1.295e+02 2.172e+02, threshold=2.318e+02, percent-clipped=0.0 +2024-09-17 00:00:56,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=105460.0, ans=0.125 +2024-09-17 00:01:04,780 INFO [train.py:1198] (0/2) Epoch 6, batch 3750, loss[loss=0.256, ctc_loss=0.1806, cr_loss=0.3977, attn_decoder_loss=0.2555, over 29351.00 frames. ], tot_loss[loss=0.2845, ctc_loss=0.2052, cr_loss=0.4273, attn_decoder_loss=0.2838, over 5807084.24 frames. ], batch size: 67, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:01:05,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=105500.0, ans=0.04949747468305833 +2024-09-17 00:01:33,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=105580.0, ans=0.0 +2024-09-17 00:02:08,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=105660.0, ans=0.0 +2024-09-17 00:02:14,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=105660.0, ans=0.125 +2024-09-17 00:02:20,335 INFO [train.py:1198] (0/2) Epoch 6, batch 3800, loss[loss=0.29, ctc_loss=0.2084, cr_loss=0.4468, attn_decoder_loss=0.2892, over 29624.00 frames. ], tot_loss[loss=0.2841, ctc_loss=0.2048, cr_loss=0.4268, attn_decoder_loss=0.2834, over 5797951.66 frames. ], batch size: 86, lr: 1.78e-02, grad_scale: 8.0 +2024-09-17 00:02:24,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=105700.0, ans=0.125 +2024-09-17 00:02:43,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=105740.0, ans=0.125 +2024-09-17 00:03:01,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=105780.0, ans=0.0 +2024-09-17 00:03:04,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=105820.0, ans=0.125 +2024-09-17 00:03:19,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=105820.0, ans=0.0 +2024-09-17 00:03:27,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=105860.0, ans=0.0 +2024-09-17 00:03:29,921 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.809e+01 1.097e+02 1.194e+02 1.336e+02 2.111e+02, threshold=2.388e+02, percent-clipped=0.0 +2024-09-17 00:03:33,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.91 vs. limit=6.0 +2024-09-17 00:03:37,376 INFO [train.py:1198] (0/2) Epoch 6, batch 3850, loss[loss=0.3076, ctc_loss=0.2325, cr_loss=0.4549, attn_decoder_loss=0.3059, over 29242.00 frames. ], tot_loss[loss=0.2842, ctc_loss=0.2048, cr_loss=0.4273, attn_decoder_loss=0.2836, over 5810079.93 frames. ], batch size: 100, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:03:51,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=105940.0, ans=0.125 +2024-09-17 00:04:20,472 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.89 vs. limit=15.0 +2024-09-17 00:04:20,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.27 vs. limit=12.0 +2024-09-17 00:04:36,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=106020.0, ans=0.125 +2024-09-17 00:04:39,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=106060.0, ans=0.0 +2024-09-17 00:04:48,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=106060.0, ans=0.05 +2024-09-17 00:04:54,331 INFO [train.py:1198] (0/2) Epoch 6, batch 3900, loss[loss=0.2856, ctc_loss=0.1985, cr_loss=0.4158, attn_decoder_loss=0.286, over 29649.00 frames. ], tot_loss[loss=0.2842, ctc_loss=0.2042, cr_loss=0.427, attn_decoder_loss=0.2836, over 5814743.86 frames. ], batch size: 86, lr: 1.78e-02, grad_scale: 8.0 +2024-09-17 00:05:05,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=4.95 vs. limit=15.0 +2024-09-17 00:05:16,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=106140.0, ans=0.0 +2024-09-17 00:05:35,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=106180.0, ans=0.5 +2024-09-17 00:05:36,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=106180.0, ans=0.125 +2024-09-17 00:05:41,925 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.44 vs. limit=15.0 +2024-09-17 00:05:52,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=106260.0, ans=0.125 +2024-09-17 00:06:03,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.054e+01 1.064e+02 1.152e+02 1.217e+02 1.852e+02, threshold=2.304e+02, percent-clipped=0.0 +2024-09-17 00:06:03,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=106260.0, ans=0.125 +2024-09-17 00:06:09,212 INFO [train.py:1198] (0/2) Epoch 6, batch 3950, loss[loss=0.2983, ctc_loss=0.2211, cr_loss=0.4533, attn_decoder_loss=0.2968, over 29481.00 frames. ], tot_loss[loss=0.2841, ctc_loss=0.2037, cr_loss=0.4271, attn_decoder_loss=0.2835, over 5834774.02 frames. ], batch size: 97, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:06:17,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=106300.0, ans=0.2 +2024-09-17 00:07:24,790 INFO [train.py:1198] (0/2) Epoch 6, batch 4000, loss[loss=0.2708, ctc_loss=0.1907, cr_loss=0.4163, attn_decoder_loss=0.2704, over 29526.00 frames. ], tot_loss[loss=0.2844, ctc_loss=0.2044, cr_loss=0.4272, attn_decoder_loss=0.2838, over 5813306.35 frames. ], batch size: 74, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:07:39,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=106540.0, ans=0.125 +2024-09-17 00:07:44,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=106540.0, ans=0.125 +2024-09-17 00:08:31,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=106660.0, ans=0.0 +2024-09-17 00:08:36,817 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.741e+01 1.103e+02 1.193e+02 1.340e+02 9.903e+02, threshold=2.386e+02, percent-clipped=2.0 +2024-09-17 00:08:41,362 INFO [train.py:1198] (0/2) Epoch 6, batch 4050, loss[loss=0.332, ctc_loss=0.2825, cr_loss=0.4598, attn_decoder_loss=0.3273, over 19771.00 frames. ], tot_loss[loss=0.2843, ctc_loss=0.2046, cr_loss=0.4267, attn_decoder_loss=0.2836, over 5795907.38 frames. ], batch size: 210, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:08:44,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=106700.0, ans=0.1 +2024-09-17 00:08:44,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=106700.0, ans=0.125 +2024-09-17 00:09:36,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=106820.0, ans=0.025 +2024-09-17 00:09:44,051 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.91 vs. limit=22.5 +2024-09-17 00:09:56,862 INFO [train.py:1198] (0/2) Epoch 6, batch 4100, loss[loss=0.2999, ctc_loss=0.223, cr_loss=0.4533, attn_decoder_loss=0.2984, over 29507.00 frames. ], tot_loss[loss=0.2847, ctc_loss=0.2051, cr_loss=0.4272, attn_decoder_loss=0.2841, over 5791723.14 frames. ], batch size: 90, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:10:19,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=106940.0, ans=0.05 +2024-09-17 00:10:26,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=106980.0, ans=0.125 +2024-09-17 00:10:51,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=107020.0, ans=0.125 +2024-09-17 00:10:53,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=107020.0, ans=0.5 +2024-09-17 00:10:54,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=107060.0, ans=0.125 +2024-09-17 00:10:58,260 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.88 vs. limit=6.0 +2024-09-17 00:11:07,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.979e+01 1.121e+02 1.241e+02 1.471e+02 3.510e+02, threshold=2.481e+02, percent-clipped=3.0 +2024-09-17 00:11:10,806 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.49 vs. limit=8.0 +2024-09-17 00:11:11,062 INFO [train.py:1198] (0/2) Epoch 6, batch 4150, loss[loss=0.26, ctc_loss=0.1756, cr_loss=0.3865, attn_decoder_loss=0.2608, over 29515.00 frames. ], tot_loss[loss=0.2845, ctc_loss=0.2049, cr_loss=0.4271, attn_decoder_loss=0.2839, over 5797480.19 frames. ], batch size: 77, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:11:15,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=107100.0, ans=0.0 +2024-09-17 00:11:23,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=107100.0, ans=0.025 +2024-09-17 00:11:52,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.34 vs. limit=15.0 +2024-09-17 00:11:55,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=107220.0, ans=0.07 +2024-09-17 00:11:57,305 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.53 vs. limit=12.0 +2024-09-17 00:11:59,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=107220.0, ans=0.125 +2024-09-17 00:12:05,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=107220.0, ans=0.2 +2024-09-17 00:12:16,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=107260.0, ans=0.125 +2024-09-17 00:12:24,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.97 vs. limit=12.0 +2024-09-17 00:12:27,411 INFO [train.py:1198] (0/2) Epoch 6, batch 4200, loss[loss=0.3074, ctc_loss=0.2257, cr_loss=0.4743, attn_decoder_loss=0.3059, over 29498.00 frames. ], tot_loss[loss=0.2852, ctc_loss=0.2056, cr_loss=0.4281, attn_decoder_loss=0.2845, over 5800685.23 frames. ], batch size: 90, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:12:27,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=107300.0, ans=0.125 +2024-09-17 00:12:48,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=107340.0, ans=0.125 +2024-09-17 00:12:53,670 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.67 vs. limit=10.0 +2024-09-17 00:13:02,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=107380.0, ans=0.125 +2024-09-17 00:13:06,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=107380.0, ans=0.1 +2024-09-17 00:13:41,773 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.922e+01 1.074e+02 1.154e+02 1.259e+02 3.870e+02, threshold=2.307e+02, percent-clipped=1.0 +2024-09-17 00:13:43,277 INFO [train.py:1198] (0/2) Epoch 6, batch 4250, loss[loss=0.2607, ctc_loss=0.1784, cr_loss=0.3774, attn_decoder_loss=0.2615, over 29520.00 frames. ], tot_loss[loss=0.2852, ctc_loss=0.2053, cr_loss=0.4279, attn_decoder_loss=0.2846, over 5807253.24 frames. ], batch size: 74, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:13:45,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=107500.0, ans=0.125 +2024-09-17 00:14:10,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.33 vs. limit=15.0 +2024-09-17 00:14:17,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=107580.0, ans=0.2 +2024-09-17 00:14:26,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=107620.0, ans=0.0 +2024-09-17 00:14:28,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=107620.0, ans=0.125 +2024-09-17 00:14:32,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=107620.0, ans=0.125 +2024-09-17 00:14:42,998 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:14:53,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=107660.0, ans=0.125 +2024-09-17 00:14:53,580 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.24 vs. limit=15.0 +2024-09-17 00:14:57,398 INFO [train.py:1198] (0/2) Epoch 6, batch 4300, loss[loss=0.2875, ctc_loss=0.1993, cr_loss=0.4196, attn_decoder_loss=0.2879, over 29523.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.205, cr_loss=0.428, attn_decoder_loss=0.2848, over 5796515.43 frames. ], batch size: 87, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:15:02,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.74 vs. limit=22.5 +2024-09-17 00:15:28,182 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.83 vs. limit=12.0 +2024-09-17 00:15:35,942 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.60 vs. limit=15.0 +2024-09-17 00:15:47,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.78 vs. limit=15.0 +2024-09-17 00:15:56,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.23 vs. limit=15.0 +2024-09-17 00:16:13,577 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.817e+01 1.068e+02 1.179e+02 1.314e+02 6.167e+02, threshold=2.359e+02, percent-clipped=2.0 +2024-09-17 00:16:13,599 INFO [train.py:1198] (0/2) Epoch 6, batch 4350, loss[loss=0.2886, ctc_loss=0.2074, cr_loss=0.4218, attn_decoder_loss=0.2882, over 29480.00 frames. ], tot_loss[loss=0.289, ctc_loss=0.2086, cr_loss=0.4335, attn_decoder_loss=0.2883, over 5798126.64 frames. ], batch size: 97, lr: 1.76e-02, grad_scale: 4.0 +2024-09-17 00:16:41,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=107940.0, ans=0.125 +2024-09-17 00:16:56,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.32 vs. limit=10.0 +2024-09-17 00:17:08,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.84 vs. limit=6.0 +2024-09-17 00:17:13,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=108060.0, ans=0.1 +2024-09-17 00:17:15,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=108060.0, ans=0.0 +2024-09-17 00:17:19,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.whiten.whitening_limit, batch_count=108060.0, ans=12.0 +2024-09-17 00:17:19,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=108060.0, ans=0.125 +2024-09-17 00:17:22,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.09 vs. limit=22.5 +2024-09-17 00:17:25,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.72 vs. limit=6.0 +2024-09-17 00:17:28,660 INFO [train.py:1198] (0/2) Epoch 6, batch 4400, loss[loss=0.2933, ctc_loss=0.2243, cr_loss=0.4556, attn_decoder_loss=0.2908, over 27084.00 frames. ], tot_loss[loss=0.2916, ctc_loss=0.2111, cr_loss=0.4368, attn_decoder_loss=0.2908, over 5767529.78 frames. ], batch size: 124, lr: 1.76e-02, grad_scale: 8.0 +2024-09-17 00:17:44,307 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.99 vs. limit=15.0 +2024-09-17 00:18:01,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=108180.0, ans=0.025 +2024-09-17 00:18:20,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=108220.0, ans=10.0 +2024-09-17 00:18:25,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=108220.0, ans=0.0 +2024-09-17 00:18:38,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=108260.0, ans=0.1 +2024-09-17 00:18:43,509 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:18:44,520 INFO [train.py:1198] (0/2) Epoch 6, batch 4450, loss[loss=0.3186, ctc_loss=0.2646, cr_loss=0.4353, attn_decoder_loss=0.3149, over 20364.00 frames. ], tot_loss[loss=0.2956, ctc_loss=0.2177, cr_loss=0.4399, attn_decoder_loss=0.2945, over 5581725.90 frames. ], batch size: 209, lr: 1.76e-02, grad_scale: 4.0 +2024-09-17 00:18:46,020 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.821e+01 1.110e+02 1.171e+02 1.331e+02 5.376e+02, threshold=2.342e+02, percent-clipped=1.0 +2024-09-17 00:18:47,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=108300.0, ans=0.2 +2024-09-17 00:18:53,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=108300.0, ans=0.0 +2024-09-17 00:19:00,233 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-17 00:19:13,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=108380.0, ans=0.0 +2024-09-17 00:19:18,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=108380.0, ans=0.125 +2024-09-17 00:19:27,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=9.19 vs. limit=12.0 +2024-09-17 00:19:30,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=108420.0, ans=0.2 +2024-09-17 00:20:00,686 INFO [train.py:1198] (0/2) Epoch 6, batch 4500, loss[loss=0.3152, ctc_loss=0.2611, cr_loss=0.4315, attn_decoder_loss=0.3116, over 20853.00 frames. ], tot_loss[loss=0.3003, ctc_loss=0.2264, cr_loss=0.4419, attn_decoder_loss=0.2987, over 5243340.79 frames. ], batch size: 209, lr: 1.76e-02, grad_scale: 8.0 +2024-09-17 00:20:13,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=108500.0, ans=0.07 +2024-09-17 00:20:19,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=108540.0, ans=0.125 +2024-09-17 00:20:38,670 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-6.pt +2024-09-17 00:21:33,137 WARNING [optim.py:503] (0/2) Scaling gradients by 0.06814046949148178, model_norm_threshold=234.16368103027344 +2024-09-17 00:21:33,345 WARNING [optim.py:575] (0/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.27, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=3.188e+06, grad_sumsq=4.711e+10, orig_rms_sq=6.766e-05 +2024-09-17 00:21:33,373 INFO [train.py:1198] (0/2) Epoch 7, batch 0, loss[loss=0.3002, ctc_loss=0.1949, cr_loss=0.4463, attn_decoder_loss=0.3019, over 29600.00 frames. ], tot_loss[loss=0.3002, ctc_loss=0.1949, cr_loss=0.4463, attn_decoder_loss=0.3019, over 29600.00 frames. ], batch size: 73, lr: 1.65e-02, grad_scale: 8.0 +2024-09-17 00:21:33,374 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 00:21:51,800 INFO [train.py:1230] (0/2) Epoch 7, validation: loss=0.2253, ctc_loss=0.06341, cr_loss=4.598e-15, attn_decoder_loss=0.2433, over 944034.00 frames. +2024-09-17 00:21:51,800 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 00:21:53,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=108600.0, ans=0.125 +2024-09-17 00:22:19,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=108640.0, ans=0.125 +2024-09-17 00:22:20,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.55 vs. limit=15.0 +2024-09-17 00:22:25,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=108680.0, ans=0.2 +2024-09-17 00:22:26,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.79 vs. limit=15.0 +2024-09-17 00:22:36,025 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.794e+01 1.158e+02 1.328e+02 1.536e+02 3.436e+03, threshold=2.655e+02, percent-clipped=8.0 +2024-09-17 00:23:11,666 INFO [train.py:1198] (0/2) Epoch 7, batch 50, loss[loss=0.2562, ctc_loss=0.1762, cr_loss=0.3789, attn_decoder_loss=0.2567, over 29457.00 frames. ], tot_loss[loss=0.2905, ctc_loss=0.2127, cr_loss=0.4354, attn_decoder_loss=0.2895, over 1268121.79 frames. ], batch size: 70, lr: 1.65e-02, grad_scale: 4.0 +2024-09-17 00:23:49,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=108880.0, ans=0.0 +2024-09-17 00:23:51,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=108880.0, ans=0.125 +2024-09-17 00:23:53,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.41 vs. limit=15.0 +2024-09-17 00:24:21,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=108960.0, ans=0.0 +2024-09-17 00:24:25,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.50 vs. limit=15.0 +2024-09-17 00:24:27,109 INFO [train.py:1198] (0/2) Epoch 7, batch 100, loss[loss=0.2768, ctc_loss=0.1989, cr_loss=0.4219, attn_decoder_loss=0.2761, over 29539.00 frames. ], tot_loss[loss=0.2904, ctc_loss=0.2107, cr_loss=0.4346, attn_decoder_loss=0.2896, over 2251137.67 frames. ], batch size: 76, lr: 1.65e-02, grad_scale: 8.0 +2024-09-17 00:24:28,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=109000.0, ans=0.125 +2024-09-17 00:24:40,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=109040.0, ans=0.0 +2024-09-17 00:24:54,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=109040.0, ans=0.2 +2024-09-17 00:25:08,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.54 vs. limit=15.0 +2024-09-17 00:25:08,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.64 vs. limit=15.0 +2024-09-17 00:25:10,522 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.921e+01 1.059e+02 1.169e+02 1.320e+02 2.276e+02, threshold=2.339e+02, percent-clipped=0.0 +2024-09-17 00:25:15,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=109120.0, ans=0.125 +2024-09-17 00:25:19,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=109120.0, ans=0.1 +2024-09-17 00:25:19,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=109120.0, ans=0.025 +2024-09-17 00:25:43,878 INFO [train.py:1198] (0/2) Epoch 7, batch 150, loss[loss=0.2547, ctc_loss=0.1783, cr_loss=0.373, attn_decoder_loss=0.2549, over 29430.00 frames. ], tot_loss[loss=0.2869, ctc_loss=0.2069, cr_loss=0.4318, attn_decoder_loss=0.2862, over 3046228.76 frames. ], batch size: 70, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:25:57,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=109240.0, ans=0.125 +2024-09-17 00:26:31,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=109320.0, ans=0.125 +2024-09-17 00:26:40,397 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.07 vs. limit=15.0 +2024-09-17 00:26:44,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=109360.0, ans=0.0 +2024-09-17 00:26:46,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.30 vs. limit=10.0 +2024-09-17 00:26:47,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=109360.0, ans=0.0 +2024-09-17 00:26:48,011 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.05 vs. limit=6.0 +2024-09-17 00:27:00,800 INFO [train.py:1198] (0/2) Epoch 7, batch 200, loss[loss=0.3047, ctc_loss=0.2284, cr_loss=0.4459, attn_decoder_loss=0.3032, over 27326.00 frames. ], tot_loss[loss=0.2851, ctc_loss=0.2047, cr_loss=0.4289, attn_decoder_loss=0.2845, over 3658227.32 frames. ], batch size: 124, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:27:13,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=109400.0, ans=0.2 +2024-09-17 00:27:35,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=109480.0, ans=0.0 +2024-09-17 00:27:46,007 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.796e+01 1.025e+02 1.125e+02 1.234e+02 4.171e+02, threshold=2.251e+02, percent-clipped=1.0 +2024-09-17 00:27:47,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=109520.0, ans=0.0 +2024-09-17 00:28:17,035 INFO [train.py:1198] (0/2) Epoch 7, batch 250, loss[loss=0.3025, ctc_loss=0.2156, cr_loss=0.4548, attn_decoder_loss=0.302, over 29225.00 frames. ], tot_loss[loss=0.2847, ctc_loss=0.2038, cr_loss=0.4287, attn_decoder_loss=0.2841, over 4140978.43 frames. ], batch size: 100, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:28:41,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=109640.0, ans=0.1 +2024-09-17 00:28:52,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=109680.0, ans=0.125 +2024-09-17 00:29:03,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=109720.0, ans=0.125 +2024-09-17 00:29:04,541 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:29:09,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=109720.0, ans=0.125 +2024-09-17 00:29:10,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=109720.0, ans=0.0 +2024-09-17 00:29:24,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=109760.0, ans=0.0 +2024-09-17 00:29:33,125 INFO [train.py:1198] (0/2) Epoch 7, batch 300, loss[loss=0.3021, ctc_loss=0.2258, cr_loss=0.4469, attn_decoder_loss=0.3006, over 29550.00 frames. ], tot_loss[loss=0.2834, ctc_loss=0.2018, cr_loss=0.4265, attn_decoder_loss=0.2829, over 4508648.90 frames. ], batch size: 92, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:29:39,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=109800.0, ans=0.1 +2024-09-17 00:29:42,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=109800.0, ans=0.07 +2024-09-17 00:29:47,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.89 vs. limit=6.0 +2024-09-17 00:29:51,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=109840.0, ans=0.125 +2024-09-17 00:30:00,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.65 vs. limit=22.5 +2024-09-17 00:30:03,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=109840.0, ans=0.2 +2024-09-17 00:30:04,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=109880.0, ans=0.125 +2024-09-17 00:30:16,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=109880.0, ans=0.125 +2024-09-17 00:30:18,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=109880.0, ans=0.125 +2024-09-17 00:30:25,689 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.782e+01 1.034e+02 1.140e+02 1.272e+02 2.553e+02, threshold=2.279e+02, percent-clipped=1.0 +2024-09-17 00:30:30,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=109920.0, ans=0.125 +2024-09-17 00:30:33,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=109920.0, ans=0.125 +2024-09-17 00:30:53,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=110000.0, ans=0.1 +2024-09-17 00:30:54,838 INFO [train.py:1198] (0/2) Epoch 7, batch 350, loss[loss=0.262, ctc_loss=0.1816, cr_loss=0.4106, attn_decoder_loss=0.2619, over 29282.00 frames. ], tot_loss[loss=0.284, ctc_loss=0.2026, cr_loss=0.4275, attn_decoder_loss=0.2836, over 4793032.63 frames. ], batch size: 71, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:31:18,173 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.70 vs. limit=22.5 +2024-09-17 00:31:52,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=110120.0, ans=0.125 +2024-09-17 00:32:03,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=110160.0, ans=0.0 +2024-09-17 00:32:08,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.89 vs. limit=15.0 +2024-09-17 00:32:10,451 INFO [train.py:1198] (0/2) Epoch 7, batch 400, loss[loss=0.2874, ctc_loss=0.2024, cr_loss=0.4255, attn_decoder_loss=0.2873, over 29699.00 frames. ], tot_loss[loss=0.2839, ctc_loss=0.2025, cr_loss=0.427, attn_decoder_loss=0.2834, over 5023004.19 frames. ], batch size: 82, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:32:13,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=110200.0, ans=0.025 +2024-09-17 00:32:13,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=110200.0, ans=0.0 +2024-09-17 00:32:18,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.94 vs. limit=10.0 +2024-09-17 00:32:19,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=110200.0, ans=0.07 +2024-09-17 00:32:24,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=110240.0, ans=0.1 +2024-09-17 00:32:35,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=110240.0, ans=0.1 +2024-09-17 00:32:58,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=110320.0, ans=0.0 +2024-09-17 00:32:59,546 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.594e+01 1.062e+02 1.160e+02 1.275e+02 1.904e+02, threshold=2.320e+02, percent-clipped=0.0 +2024-09-17 00:33:19,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=110360.0, ans=0.07 +2024-09-17 00:33:24,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=110360.0, ans=0.125 +2024-09-17 00:33:27,321 INFO [train.py:1198] (0/2) Epoch 7, batch 450, loss[loss=0.2988, ctc_loss=0.2247, cr_loss=0.4417, attn_decoder_loss=0.2973, over 29690.00 frames. ], tot_loss[loss=0.2841, ctc_loss=0.2028, cr_loss=0.4276, attn_decoder_loss=0.2836, over 5187596.60 frames. ], batch size: 83, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:33:50,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.69 vs. limit=15.0 +2024-09-17 00:34:29,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=110520.0, ans=0.1 +2024-09-17 00:34:32,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=110560.0, ans=0.0 +2024-09-17 00:34:48,899 INFO [train.py:1198] (0/2) Epoch 7, batch 500, loss[loss=0.2922, ctc_loss=0.2059, cr_loss=0.438, attn_decoder_loss=0.2921, over 29448.00 frames. ], tot_loss[loss=0.2827, ctc_loss=0.2014, cr_loss=0.4263, attn_decoder_loss=0.2823, over 5329787.43 frames. ], batch size: 94, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:34:52,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=110600.0, ans=22.5 +2024-09-17 00:35:04,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=110640.0, ans=0.09899494936611666 +2024-09-17 00:35:23,555 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.99 vs. limit=15.0 +2024-09-17 00:35:39,102 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.537e+01 1.048e+02 1.174e+02 1.330e+02 3.263e+02, threshold=2.347e+02, percent-clipped=4.0 +2024-09-17 00:35:40,351 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.37 vs. limit=15.0 +2024-09-17 00:35:41,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.80 vs. limit=15.0 +2024-09-17 00:35:47,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=110720.0, ans=0.125 +2024-09-17 00:35:56,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=110760.0, ans=0.0 +2024-09-17 00:35:59,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=110760.0, ans=0.0 +2024-09-17 00:36:03,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=110800.0, ans=0.125 +2024-09-17 00:36:04,947 INFO [train.py:1198] (0/2) Epoch 7, batch 550, loss[loss=0.2937, ctc_loss=0.2095, cr_loss=0.4412, attn_decoder_loss=0.2932, over 28974.00 frames. ], tot_loss[loss=0.2822, ctc_loss=0.201, cr_loss=0.4247, attn_decoder_loss=0.2818, over 5423528.31 frames. ], batch size: 104, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:36:06,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=110800.0, ans=0.0 +2024-09-17 00:36:13,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.02 vs. limit=15.0 +2024-09-17 00:36:23,666 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:36:26,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=110840.0, ans=0.0 +2024-09-17 00:36:59,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=110920.0, ans=0.0 +2024-09-17 00:37:05,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=110960.0, ans=0.125 +2024-09-17 00:37:12,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=110960.0, ans=0.025 +2024-09-17 00:37:15,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=110960.0, ans=0.1 +2024-09-17 00:37:21,446 INFO [train.py:1198] (0/2) Epoch 7, batch 600, loss[loss=0.3014, ctc_loss=0.2204, cr_loss=0.4715, attn_decoder_loss=0.2999, over 29229.00 frames. ], tot_loss[loss=0.2821, ctc_loss=0.2004, cr_loss=0.4246, attn_decoder_loss=0.2818, over 5509576.41 frames. ], batch size: 100, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:37:21,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=111000.0, ans=0.125 +2024-09-17 00:37:30,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=111000.0, ans=0.05 +2024-09-17 00:37:33,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=111000.0, ans=6.0 +2024-09-17 00:37:41,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=111040.0, ans=0.2 +2024-09-17 00:37:55,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=111080.0, ans=0.1 +2024-09-17 00:37:56,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.46 vs. limit=15.0 +2024-09-17 00:38:02,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.75 vs. limit=15.0 +2024-09-17 00:38:09,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.60 vs. limit=12.0 +2024-09-17 00:38:14,465 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.423e+01 1.086e+02 1.156e+02 1.256e+02 2.672e+02, threshold=2.312e+02, percent-clipped=2.0 +2024-09-17 00:38:19,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=111120.0, ans=0.025 +2024-09-17 00:38:29,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=111160.0, ans=0.0 +2024-09-17 00:38:41,954 INFO [train.py:1198] (0/2) Epoch 7, batch 650, loss[loss=0.2815, ctc_loss=0.1963, cr_loss=0.4084, attn_decoder_loss=0.2818, over 29748.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1988, cr_loss=0.4234, attn_decoder_loss=0.2805, over 5586722.30 frames. ], batch size: 81, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:38:45,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=111200.0, ans=0.125 +2024-09-17 00:38:59,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.80 vs. limit=15.0 +2024-09-17 00:39:27,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.05 vs. limit=22.5 +2024-09-17 00:39:35,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=111320.0, ans=0.2 +2024-09-17 00:39:46,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=111360.0, ans=0.125 +2024-09-17 00:39:58,143 INFO [train.py:1198] (0/2) Epoch 7, batch 700, loss[loss=0.2737, ctc_loss=0.1955, cr_loss=0.4414, attn_decoder_loss=0.2726, over 29541.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1997, cr_loss=0.4252, attn_decoder_loss=0.2812, over 5637640.88 frames. ], batch size: 76, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:39:58,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=111400.0, ans=0.125 +2024-09-17 00:40:00,497 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.14 vs. limit=15.0 +2024-09-17 00:40:24,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=111440.0, ans=0.125 +2024-09-17 00:40:32,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=111480.0, ans=0.0 +2024-09-17 00:40:51,486 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.298e+01 1.046e+02 1.126e+02 1.229e+02 1.906e+02, threshold=2.253e+02, percent-clipped=0.0 +2024-09-17 00:40:51,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=111520.0, ans=0.025 +2024-09-17 00:40:55,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.53 vs. limit=15.0 +2024-09-17 00:41:08,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=111560.0, ans=0.125 +2024-09-17 00:41:14,392 INFO [train.py:1198] (0/2) Epoch 7, batch 750, loss[loss=0.289, ctc_loss=0.2029, cr_loss=0.4487, attn_decoder_loss=0.2886, over 29720.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1989, cr_loss=0.4243, attn_decoder_loss=0.2804, over 5677105.83 frames. ], batch size: 82, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:41:23,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=111600.0, ans=0.0 +2024-09-17 00:41:30,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.82 vs. limit=15.0 +2024-09-17 00:41:32,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=111640.0, ans=0.125 +2024-09-17 00:41:35,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=111640.0, ans=0.125 +2024-09-17 00:42:29,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=111760.0, ans=0.1 +2024-09-17 00:42:29,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=111760.0, ans=0.09899494936611666 +2024-09-17 00:42:35,173 INFO [train.py:1198] (0/2) Epoch 7, batch 800, loss[loss=0.2507, ctc_loss=0.1639, cr_loss=0.3699, attn_decoder_loss=0.2522, over 29599.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1989, cr_loss=0.4242, attn_decoder_loss=0.2803, over 5707851.40 frames. ], batch size: 73, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:42:36,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=111800.0, ans=15.0 +2024-09-17 00:42:56,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=111840.0, ans=0.125 +2024-09-17 00:43:02,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=111840.0, ans=0.125 +2024-09-17 00:43:05,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=111880.0, ans=0.0 +2024-09-17 00:43:13,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=111880.0, ans=0.2 +2024-09-17 00:43:16,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=111880.0, ans=0.125 +2024-09-17 00:43:24,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=111920.0, ans=0.025 +2024-09-17 00:43:29,885 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.814e+01 1.067e+02 1.173e+02 1.326e+02 3.037e+02, threshold=2.345e+02, percent-clipped=2.0 +2024-09-17 00:43:31,800 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:43:37,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=111960.0, ans=0.125 +2024-09-17 00:43:50,001 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-28000.pt +2024-09-17 00:43:58,057 INFO [train.py:1198] (0/2) Epoch 7, batch 850, loss[loss=0.3004, ctc_loss=0.2204, cr_loss=0.4198, attn_decoder_loss=0.3, over 29683.00 frames. ], tot_loss[loss=0.2801, ctc_loss=0.1983, cr_loss=0.4222, attn_decoder_loss=0.2798, over 5737440.71 frames. ], batch size: 89, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:43:58,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=112000.0, ans=0.95 +2024-09-17 00:44:11,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=112040.0, ans=0.1 +2024-09-17 00:44:47,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.79 vs. limit=6.0 +2024-09-17 00:45:08,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=112160.0, ans=0.025 +2024-09-17 00:45:14,255 INFO [train.py:1198] (0/2) Epoch 7, batch 900, loss[loss=0.2677, ctc_loss=0.1861, cr_loss=0.3989, attn_decoder_loss=0.2679, over 29619.00 frames. ], tot_loss[loss=0.2809, ctc_loss=0.1992, cr_loss=0.4229, attn_decoder_loss=0.2805, over 5741716.61 frames. ], batch size: 73, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:46:01,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=12.09 vs. limit=15.0 +2024-09-17 00:46:02,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=112320.0, ans=0.0 +2024-09-17 00:46:09,011 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.42 vs. limit=15.0 +2024-09-17 00:46:13,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=112320.0, ans=0.2 +2024-09-17 00:46:14,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.034e+01 1.106e+02 1.225e+02 1.378e+02 5.810e+02, threshold=2.450e+02, percent-clipped=7.0 +2024-09-17 00:46:27,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=112360.0, ans=0.125 +2024-09-17 00:46:31,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=112360.0, ans=0.0 +2024-09-17 00:46:34,342 INFO [train.py:1198] (0/2) Epoch 7, batch 950, loss[loss=0.2576, ctc_loss=0.1698, cr_loss=0.3765, attn_decoder_loss=0.259, over 29514.00 frames. ], tot_loss[loss=0.2813, ctc_loss=0.1996, cr_loss=0.4232, attn_decoder_loss=0.2809, over 5743763.00 frames. ], batch size: 74, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:46:39,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=112400.0, ans=0.125 +2024-09-17 00:46:43,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=112400.0, ans=0.125 +2024-09-17 00:46:45,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=112400.0, ans=0.025 +2024-09-17 00:47:01,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=112440.0, ans=0.125 +2024-09-17 00:47:10,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.10 vs. limit=10.0 +2024-09-17 00:47:14,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.50 vs. limit=15.0 +2024-09-17 00:47:30,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=112520.0, ans=0.125 +2024-09-17 00:47:48,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.23 vs. limit=10.0 +2024-09-17 00:47:50,432 INFO [train.py:1198] (0/2) Epoch 7, batch 1000, loss[loss=0.2716, ctc_loss=0.191, cr_loss=0.4203, attn_decoder_loss=0.2712, over 29504.00 frames. ], tot_loss[loss=0.2822, ctc_loss=0.2005, cr_loss=0.4246, attn_decoder_loss=0.2819, over 5738099.41 frames. ], batch size: 77, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:47:55,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=112600.0, ans=0.1 +2024-09-17 00:48:00,434 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.39 vs. limit=12.0 +2024-09-17 00:48:36,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=112720.0, ans=0.125 +2024-09-17 00:48:46,755 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.415e+01 1.043e+02 1.127e+02 1.327e+02 3.931e+02, threshold=2.254e+02, percent-clipped=2.0 +2024-09-17 00:48:59,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=112760.0, ans=0.0 +2024-09-17 00:49:06,945 INFO [train.py:1198] (0/2) Epoch 7, batch 1050, loss[loss=0.2815, ctc_loss=0.2017, cr_loss=0.4309, attn_decoder_loss=0.2808, over 29684.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1991, cr_loss=0.4229, attn_decoder_loss=0.2808, over 5746101.25 frames. ], batch size: 85, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:49:21,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.99 vs. limit=15.0 +2024-09-17 00:49:31,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=112840.0, ans=10.0 +2024-09-17 00:49:33,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=112840.0, ans=0.1 +2024-09-17 00:49:40,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=112880.0, ans=0.025 +2024-09-17 00:49:50,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=112880.0, ans=0.0 +2024-09-17 00:49:53,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.62 vs. limit=22.5 +2024-09-17 00:49:56,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=112920.0, ans=0.125 +2024-09-17 00:50:21,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=112960.0, ans=0.125 +2024-09-17 00:50:26,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.42 vs. limit=15.0 +2024-09-17 00:50:28,828 INFO [train.py:1198] (0/2) Epoch 7, batch 1100, loss[loss=0.2692, ctc_loss=0.1817, cr_loss=0.3963, attn_decoder_loss=0.2701, over 29459.00 frames. ], tot_loss[loss=0.2806, ctc_loss=0.1982, cr_loss=0.4221, attn_decoder_loss=0.2804, over 5758094.20 frames. ], batch size: 78, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:50:39,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=113000.0, ans=0.025 +2024-09-17 00:50:46,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=113040.0, ans=0.0 +2024-09-17 00:50:46,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.32 vs. limit=22.5 +2024-09-17 00:50:55,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=113040.0, ans=0.125 +2024-09-17 00:51:03,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=7.49 vs. limit=12.0 +2024-09-17 00:51:16,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.31 vs. limit=15.0 +2024-09-17 00:51:21,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=113120.0, ans=0.0 +2024-09-17 00:51:26,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=113120.0, ans=0.0 +2024-09-17 00:51:28,961 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.665e+01 1.036e+02 1.119e+02 1.238e+02 1.913e+02, threshold=2.238e+02, percent-clipped=0.0 +2024-09-17 00:51:30,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.09 vs. limit=15.0 +2024-09-17 00:51:36,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=113160.0, ans=0.125 +2024-09-17 00:51:37,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=113160.0, ans=0.1 +2024-09-17 00:51:45,873 INFO [train.py:1198] (0/2) Epoch 7, batch 1150, loss[loss=0.2734, ctc_loss=0.1908, cr_loss=0.4116, attn_decoder_loss=0.2734, over 29455.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1982, cr_loss=0.4218, attn_decoder_loss=0.2803, over 5756505.84 frames. ], batch size: 78, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:52:06,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=113240.0, ans=0.125 +2024-09-17 00:52:17,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=113280.0, ans=0.0 +2024-09-17 00:52:21,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=113280.0, ans=0.2 +2024-09-17 00:52:24,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=113280.0, ans=0.125 +2024-09-17 00:52:29,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=113280.0, ans=0.125 +2024-09-17 00:52:37,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=113320.0, ans=0.2 +2024-09-17 00:52:43,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.86 vs. limit=15.0 +2024-09-17 00:52:52,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=113360.0, ans=0.2 +2024-09-17 00:53:00,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=113360.0, ans=0.04949747468305833 +2024-09-17 00:53:02,763 INFO [train.py:1198] (0/2) Epoch 7, batch 1200, loss[loss=0.2884, ctc_loss=0.2007, cr_loss=0.4373, attn_decoder_loss=0.2884, over 29675.00 frames. ], tot_loss[loss=0.2817, ctc_loss=0.1993, cr_loss=0.4237, attn_decoder_loss=0.2814, over 5749159.39 frames. ], batch size: 85, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:53:13,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=113400.0, ans=0.0 +2024-09-17 00:53:13,876 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:54:08,721 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.382e+01 1.039e+02 1.128e+02 1.242e+02 2.195e+02, threshold=2.256e+02, percent-clipped=0.0 +2024-09-17 00:54:10,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=113560.0, ans=0.1 +2024-09-17 00:54:12,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=113560.0, ans=0.2 +2024-09-17 00:54:13,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=113560.0, ans=0.5 +2024-09-17 00:54:15,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=113560.0, ans=0.125 +2024-09-17 00:54:16,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=113560.0, ans=0.025 +2024-09-17 00:54:24,445 INFO [train.py:1198] (0/2) Epoch 7, batch 1250, loss[loss=0.2892, ctc_loss=0.1942, cr_loss=0.4155, attn_decoder_loss=0.2905, over 29530.00 frames. ], tot_loss[loss=0.282, ctc_loss=0.1995, cr_loss=0.4242, attn_decoder_loss=0.2817, over 5776920.20 frames. ], batch size: 92, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:54:29,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=113600.0, ans=0.2 +2024-09-17 00:54:30,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=113600.0, ans=0.2 +2024-09-17 00:54:56,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=113680.0, ans=0.125 +2024-09-17 00:54:58,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=113680.0, ans=0.125 +2024-09-17 00:55:03,059 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:55:24,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=113760.0, ans=0.0 +2024-09-17 00:55:27,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=113760.0, ans=0.0 +2024-09-17 00:55:28,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=113760.0, ans=0.125 +2024-09-17 00:55:34,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.75 vs. limit=6.0 +2024-09-17 00:55:34,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.81 vs. limit=15.0 +2024-09-17 00:55:40,885 INFO [train.py:1198] (0/2) Epoch 7, batch 1300, loss[loss=0.2962, ctc_loss=0.2073, cr_loss=0.4232, attn_decoder_loss=0.2966, over 28325.00 frames. ], tot_loss[loss=0.2816, ctc_loss=0.1992, cr_loss=0.4239, attn_decoder_loss=0.2813, over 5780499.23 frames. ], batch size: 111, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 00:55:53,446 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:55:56,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.99 vs. limit=15.0 +2024-09-17 00:55:59,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=113840.0, ans=0.0 +2024-09-17 00:56:12,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.78 vs. limit=15.0 +2024-09-17 00:56:40,162 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.20 vs. limit=8.0 +2024-09-17 00:56:43,502 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.471e+01 1.026e+02 1.124e+02 1.254e+02 2.028e+02, threshold=2.249e+02, percent-clipped=0.0 +2024-09-17 00:56:57,147 INFO [train.py:1198] (0/2) Epoch 7, batch 1350, loss[loss=0.2909, ctc_loss=0.2142, cr_loss=0.441, attn_decoder_loss=0.2896, over 29760.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1985, cr_loss=0.4235, attn_decoder_loss=0.2809, over 5798373.07 frames. ], batch size: 81, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:57:01,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=114000.0, ans=0.0 +2024-09-17 00:57:24,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=114040.0, ans=0.125 +2024-09-17 00:57:33,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=114080.0, ans=0.0 +2024-09-17 00:57:34,122 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.34 vs. limit=10.0 +2024-09-17 00:57:41,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=114120.0, ans=0.2 +2024-09-17 00:57:44,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=114120.0, ans=0.0 +2024-09-17 00:58:18,159 INFO [train.py:1198] (0/2) Epoch 7, batch 1400, loss[loss=0.25, ctc_loss=0.1761, cr_loss=0.3876, attn_decoder_loss=0.2496, over 29566.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1983, cr_loss=0.4236, attn_decoder_loss=0.2806, over 5808805.25 frames. ], batch size: 69, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 00:58:20,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=114200.0, ans=0.125 +2024-09-17 00:58:21,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=114200.0, ans=0.0 +2024-09-17 00:58:33,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=114240.0, ans=0.125 +2024-09-17 00:58:35,435 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.20 vs. limit=15.0 +2024-09-17 00:58:47,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=114280.0, ans=0.0 +2024-09-17 00:58:57,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=114280.0, ans=0.125 +2024-09-17 00:58:59,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=114280.0, ans=0.125 +2024-09-17 00:59:01,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.92 vs. limit=15.0 +2024-09-17 00:59:15,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.52 vs. limit=22.5 +2024-09-17 00:59:21,791 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.456e+01 9.951e+01 1.071e+02 1.173e+02 2.370e+02, threshold=2.143e+02, percent-clipped=1.0 +2024-09-17 00:59:22,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=114360.0, ans=0.125 +2024-09-17 00:59:23,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=114360.0, ans=0.0 +2024-09-17 00:59:28,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=114360.0, ans=0.0 +2024-09-17 00:59:34,522 INFO [train.py:1198] (0/2) Epoch 7, batch 1450, loss[loss=0.2967, ctc_loss=0.2084, cr_loss=0.4681, attn_decoder_loss=0.2961, over 29420.00 frames. ], tot_loss[loss=0.2812, ctc_loss=0.1984, cr_loss=0.4238, attn_decoder_loss=0.281, over 5804287.07 frames. ], batch size: 94, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:59:34,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=114400.0, ans=0.0 +2024-09-17 00:59:36,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=114400.0, ans=0.025 +2024-09-17 00:59:37,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=114400.0, ans=0.0 +2024-09-17 01:00:17,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=114480.0, ans=0.125 +2024-09-17 01:00:21,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=114520.0, ans=0.2 +2024-09-17 01:00:25,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=114520.0, ans=0.2 +2024-09-17 01:00:35,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=114560.0, ans=0.1 +2024-09-17 01:00:50,573 INFO [train.py:1198] (0/2) Epoch 7, batch 1500, loss[loss=0.286, ctc_loss=0.1926, cr_loss=0.4132, attn_decoder_loss=0.2872, over 29642.00 frames. ], tot_loss[loss=0.2814, ctc_loss=0.1981, cr_loss=0.4235, attn_decoder_loss=0.2813, over 5805154.06 frames. ], batch size: 86, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 01:00:56,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=114600.0, ans=0.1 +2024-09-17 01:01:09,258 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:01:35,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=114720.0, ans=0.125 +2024-09-17 01:01:58,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.51 vs. limit=15.0 +2024-09-17 01:01:58,844 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.036e+01 1.083e+02 1.173e+02 1.293e+02 2.517e+02, threshold=2.346e+02, percent-clipped=1.0 +2024-09-17 01:02:02,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=114760.0, ans=0.0 +2024-09-17 01:02:11,566 INFO [train.py:1198] (0/2) Epoch 7, batch 1550, loss[loss=0.2991, ctc_loss=0.204, cr_loss=0.4337, attn_decoder_loss=0.3, over 29475.00 frames. ], tot_loss[loss=0.2816, ctc_loss=0.1988, cr_loss=0.4239, attn_decoder_loss=0.2814, over 5782047.09 frames. ], batch size: 90, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 01:02:28,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=114840.0, ans=0.125 +2024-09-17 01:02:31,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=114840.0, ans=0.1 +2024-09-17 01:02:45,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.42 vs. limit=15.0 +2024-09-17 01:02:58,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=114920.0, ans=0.05 +2024-09-17 01:03:00,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=114920.0, ans=0.125 +2024-09-17 01:03:05,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=114920.0, ans=0.0 +2024-09-17 01:03:10,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.26 vs. limit=10.0 +2024-09-17 01:03:26,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=115000.0, ans=0.125 +2024-09-17 01:03:27,526 INFO [train.py:1198] (0/2) Epoch 7, batch 1600, loss[loss=0.2878, ctc_loss=0.2003, cr_loss=0.4401, attn_decoder_loss=0.2878, over 29666.00 frames. ], tot_loss[loss=0.2814, ctc_loss=0.1989, cr_loss=0.4228, attn_decoder_loss=0.2811, over 5764087.36 frames. ], batch size: 85, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:03:29,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=115000.0, ans=0.125 +2024-09-17 01:03:58,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=115080.0, ans=0.125 +2024-09-17 01:04:31,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=115160.0, ans=0.125 +2024-09-17 01:04:33,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=115160.0, ans=0.125 +2024-09-17 01:04:34,563 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.309e+01 1.047e+02 1.116e+02 1.262e+02 4.085e+02, threshold=2.232e+02, percent-clipped=3.0 +2024-09-17 01:04:43,957 INFO [train.py:1198] (0/2) Epoch 7, batch 1650, loss[loss=0.3057, ctc_loss=0.2193, cr_loss=0.4288, attn_decoder_loss=0.3058, over 29712.00 frames. ], tot_loss[loss=0.2812, ctc_loss=0.1987, cr_loss=0.4228, attn_decoder_loss=0.281, over 5757037.64 frames. ], batch size: 89, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:05:05,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=115240.0, ans=0.04949747468305833 +2024-09-17 01:05:08,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=115240.0, ans=0.0 +2024-09-17 01:05:08,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=115240.0, ans=0.1 +2024-09-17 01:05:53,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=115360.0, ans=0.125 +2024-09-17 01:05:57,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=115360.0, ans=0.125 +2024-09-17 01:05:57,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=115360.0, ans=0.0 +2024-09-17 01:06:04,530 INFO [train.py:1198] (0/2) Epoch 7, batch 1700, loss[loss=0.2522, ctc_loss=0.1741, cr_loss=0.3712, attn_decoder_loss=0.2526, over 29610.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1976, cr_loss=0.4221, attn_decoder_loss=0.2806, over 5779471.46 frames. ], batch size: 69, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:06:12,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=115400.0, ans=0.1 +2024-09-17 01:06:33,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.54 vs. limit=6.0 +2024-09-17 01:06:55,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=115520.0, ans=0.2 +2024-09-17 01:06:57,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-17 01:07:13,847 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 9.967e+01 1.096e+02 1.177e+02 1.822e+02, threshold=2.192e+02, percent-clipped=0.0 +2024-09-17 01:07:14,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=115560.0, ans=0.125 +2024-09-17 01:07:17,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=115560.0, ans=0.1 +2024-09-17 01:07:21,576 INFO [train.py:1198] (0/2) Epoch 7, batch 1750, loss[loss=0.2431, ctc_loss=0.1699, cr_loss=0.3876, attn_decoder_loss=0.2427, over 29372.00 frames. ], tot_loss[loss=0.2806, ctc_loss=0.1977, cr_loss=0.4225, attn_decoder_loss=0.2804, over 5788374.68 frames. ], batch size: 67, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:07:32,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=115600.0, ans=0.0 +2024-09-17 01:07:50,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=115640.0, ans=0.1 +2024-09-17 01:07:50,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=115640.0, ans=0.0 +2024-09-17 01:07:51,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=115680.0, ans=0.125 +2024-09-17 01:07:54,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=115680.0, ans=0.04949747468305833 +2024-09-17 01:07:55,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.25 vs. limit=22.5 +2024-09-17 01:08:14,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=115720.0, ans=0.2 +2024-09-17 01:08:14,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.14 vs. limit=15.0 +2024-09-17 01:08:21,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=115760.0, ans=0.04949747468305833 +2024-09-17 01:08:32,396 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:08:38,043 INFO [train.py:1198] (0/2) Epoch 7, batch 1800, loss[loss=0.2958, ctc_loss=0.2143, cr_loss=0.4562, attn_decoder_loss=0.2948, over 29684.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1978, cr_loss=0.4234, attn_decoder_loss=0.2805, over 5791502.68 frames. ], batch size: 83, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:08:42,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.90 vs. limit=15.0 +2024-09-17 01:08:49,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.62 vs. limit=22.5 +2024-09-17 01:09:01,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=115840.0, ans=0.0 +2024-09-17 01:09:04,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=115840.0, ans=0.125 +2024-09-17 01:09:22,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.86 vs. limit=10.0 +2024-09-17 01:09:50,653 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.679e+01 1.033e+02 1.126e+02 1.316e+02 3.290e+02, threshold=2.253e+02, percent-clipped=1.0 +2024-09-17 01:09:57,303 INFO [train.py:1198] (0/2) Epoch 7, batch 1850, loss[loss=0.3015, ctc_loss=0.2104, cr_loss=0.4517, attn_decoder_loss=0.3016, over 29651.00 frames. ], tot_loss[loss=0.2804, ctc_loss=0.1972, cr_loss=0.4231, attn_decoder_loss=0.2803, over 5797441.12 frames. ], batch size: 86, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:10:30,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.18 vs. limit=22.5 +2024-09-17 01:11:15,080 INFO [train.py:1198] (0/2) Epoch 7, batch 1900, loss[loss=0.279, ctc_loss=0.1902, cr_loss=0.3955, attn_decoder_loss=0.2801, over 29705.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1973, cr_loss=0.4234, attn_decoder_loss=0.2806, over 5804703.35 frames. ], batch size: 89, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:11:21,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=116200.0, ans=0.0 +2024-09-17 01:11:24,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=116200.0, ans=0.125 +2024-09-17 01:11:24,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=116200.0, ans=0.025 +2024-09-17 01:11:54,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.96 vs. limit=12.0 +2024-09-17 01:12:13,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=116320.0, ans=0.125 +2024-09-17 01:12:16,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=116360.0, ans=0.0 +2024-09-17 01:12:26,914 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.068e+01 1.016e+02 1.078e+02 1.162e+02 1.899e+02, threshold=2.156e+02, percent-clipped=0.0 +2024-09-17 01:12:28,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=116360.0, ans=0.0 +2024-09-17 01:12:31,512 INFO [train.py:1198] (0/2) Epoch 7, batch 1950, loss[loss=0.2803, ctc_loss=0.1942, cr_loss=0.4475, attn_decoder_loss=0.2799, over 29468.00 frames. ], tot_loss[loss=0.2817, ctc_loss=0.1975, cr_loss=0.4248, attn_decoder_loss=0.2816, over 5819381.58 frames. ], batch size: 78, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:12:37,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=116400.0, ans=0.0 +2024-09-17 01:12:38,102 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:13:07,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.23 vs. limit=15.0 +2024-09-17 01:13:24,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.48 vs. limit=12.0 +2024-09-17 01:13:36,136 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.67 vs. limit=15.0 +2024-09-17 01:13:46,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=116560.0, ans=0.0 +2024-09-17 01:13:50,415 INFO [train.py:1198] (0/2) Epoch 7, batch 2000, loss[loss=0.2605, ctc_loss=0.1898, cr_loss=0.394, attn_decoder_loss=0.2596, over 29380.00 frames. ], tot_loss[loss=0.2825, ctc_loss=0.1986, cr_loss=0.4254, attn_decoder_loss=0.2823, over 5796750.44 frames. ], batch size: 67, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:13:50,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=116600.0, ans=0.0 +2024-09-17 01:14:10,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=116640.0, ans=0.0 +2024-09-17 01:14:12,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=116640.0, ans=15.0 +2024-09-17 01:14:13,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=116640.0, ans=0.125 +2024-09-17 01:14:15,393 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.66 vs. limit=15.0 +2024-09-17 01:14:16,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=116640.0, ans=0.125 +2024-09-17 01:14:18,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.41 vs. limit=15.0 +2024-09-17 01:14:37,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=116720.0, ans=0.0 +2024-09-17 01:14:45,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=116720.0, ans=0.125 +2024-09-17 01:15:06,313 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.446e+01 1.080e+02 1.203e+02 1.389e+02 2.597e+02, threshold=2.406e+02, percent-clipped=3.0 +2024-09-17 01:15:09,745 INFO [train.py:1198] (0/2) Epoch 7, batch 2050, loss[loss=0.2407, ctc_loss=0.1633, cr_loss=0.3679, attn_decoder_loss=0.2411, over 29444.00 frames. ], tot_loss[loss=0.2814, ctc_loss=0.1981, cr_loss=0.4243, attn_decoder_loss=0.2812, over 5789329.29 frames. ], batch size: 70, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:15:17,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=116800.0, ans=0.125 +2024-09-17 01:15:26,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=116840.0, ans=0.0 +2024-09-17 01:15:29,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.11 vs. limit=6.0 +2024-09-17 01:15:33,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=116840.0, ans=0.1 +2024-09-17 01:15:37,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=116840.0, ans=0.2 +2024-09-17 01:15:46,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=116880.0, ans=0.125 +2024-09-17 01:15:51,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=116880.0, ans=0.125 +2024-09-17 01:15:52,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=116880.0, ans=0.125 +2024-09-17 01:16:25,958 INFO [train.py:1198] (0/2) Epoch 7, batch 2100, loss[loss=0.262, ctc_loss=0.1776, cr_loss=0.3922, attn_decoder_loss=0.2626, over 29753.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1975, cr_loss=0.4236, attn_decoder_loss=0.2806, over 5800802.09 frames. ], batch size: 81, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:17:05,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.75 vs. limit=22.5 +2024-09-17 01:17:16,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=117120.0, ans=10.0 +2024-09-17 01:17:21,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=117120.0, ans=0.125 +2024-09-17 01:17:36,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.25 vs. limit=10.0 +2024-09-17 01:17:38,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=117160.0, ans=0.0 +2024-09-17 01:17:42,586 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.423e+01 1.035e+02 1.132e+02 1.239e+02 1.917e+02, threshold=2.264e+02, percent-clipped=0.0 +2024-09-17 01:17:44,157 INFO [train.py:1198] (0/2) Epoch 7, batch 2150, loss[loss=0.2903, ctc_loss=0.2099, cr_loss=0.4395, attn_decoder_loss=0.2894, over 29473.00 frames. ], tot_loss[loss=0.2798, ctc_loss=0.1963, cr_loss=0.4221, attn_decoder_loss=0.2797, over 5816145.34 frames. ], batch size: 78, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:17:44,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=117200.0, ans=0.125 +2024-09-17 01:17:44,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=117200.0, ans=0.0 +2024-09-17 01:17:58,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=117200.0, ans=0.2 +2024-09-17 01:18:09,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=117240.0, ans=0.125 +2024-09-17 01:18:26,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=117280.0, ans=0.125 +2024-09-17 01:18:32,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=117320.0, ans=0.125 +2024-09-17 01:18:39,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=117320.0, ans=0.1 +2024-09-17 01:18:50,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=117360.0, ans=0.125 +2024-09-17 01:18:58,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=117360.0, ans=0.125 +2024-09-17 01:19:02,629 INFO [train.py:1198] (0/2) Epoch 7, batch 2200, loss[loss=0.2938, ctc_loss=0.2011, cr_loss=0.4095, attn_decoder_loss=0.295, over 29620.00 frames. ], tot_loss[loss=0.2803, ctc_loss=0.1967, cr_loss=0.4234, attn_decoder_loss=0.2802, over 5812435.94 frames. ], batch size: 86, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:19:21,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=117440.0, ans=0.125 +2024-09-17 01:19:25,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=117440.0, ans=0.125 +2024-09-17 01:19:55,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=117520.0, ans=0.125 +2024-09-17 01:20:04,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=117560.0, ans=0.07 +2024-09-17 01:20:19,563 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.574e+01 1.056e+02 1.108e+02 1.251e+02 3.146e+02, threshold=2.216e+02, percent-clipped=2.0 +2024-09-17 01:20:19,585 INFO [train.py:1198] (0/2) Epoch 7, batch 2250, loss[loss=0.2731, ctc_loss=0.1887, cr_loss=0.422, attn_decoder_loss=0.2731, over 29684.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1972, cr_loss=0.4234, attn_decoder_loss=0.2803, over 5812790.30 frames. ], batch size: 82, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:20:31,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=15.0 +2024-09-17 01:21:00,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=117680.0, ans=0.125 +2024-09-17 01:21:02,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=117680.0, ans=0.2 +2024-09-17 01:21:18,519 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:21:37,949 INFO [train.py:1198] (0/2) Epoch 7, batch 2300, loss[loss=0.2558, ctc_loss=0.1763, cr_loss=0.4009, attn_decoder_loss=0.2557, over 29302.00 frames. ], tot_loss[loss=0.2792, ctc_loss=0.1962, cr_loss=0.4216, attn_decoder_loss=0.279, over 5798291.71 frames. ], batch size: 71, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:21:51,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=117840.0, ans=0.125 +2024-09-17 01:22:01,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=117840.0, ans=0.95 +2024-09-17 01:22:08,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=117880.0, ans=0.09899494936611666 +2024-09-17 01:22:56,189 INFO [train.py:1198] (0/2) Epoch 7, batch 2350, loss[loss=0.2986, ctc_loss=0.2109, cr_loss=0.4571, attn_decoder_loss=0.2982, over 29663.00 frames. ], tot_loss[loss=0.279, ctc_loss=0.1956, cr_loss=0.4215, attn_decoder_loss=0.2789, over 5802718.07 frames. ], batch size: 83, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:22:57,667 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.765e+01 1.037e+02 1.131e+02 1.224e+02 2.356e+02, threshold=2.262e+02, percent-clipped=1.0 +2024-09-17 01:22:58,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=118000.0, ans=0.0 +2024-09-17 01:23:08,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=118000.0, ans=0.0 +2024-09-17 01:23:14,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=118040.0, ans=0.125 +2024-09-17 01:23:55,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=118160.0, ans=0.125 +2024-09-17 01:24:01,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.45 vs. limit=15.0 +2024-09-17 01:24:06,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=118160.0, ans=0.1 +2024-09-17 01:24:09,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=118160.0, ans=0.2 +2024-09-17 01:24:12,081 INFO [train.py:1198] (0/2) Epoch 7, batch 2400, loss[loss=0.2724, ctc_loss=0.1912, cr_loss=0.3997, attn_decoder_loss=0.2725, over 29540.00 frames. ], tot_loss[loss=0.2796, ctc_loss=0.1962, cr_loss=0.4217, attn_decoder_loss=0.2794, over 5806036.74 frames. ], batch size: 76, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:24:44,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=118280.0, ans=0.5 +2024-09-17 01:24:47,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=118280.0, ans=0.0 +2024-09-17 01:24:52,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=118280.0, ans=0.07 +2024-09-17 01:25:09,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=118320.0, ans=0.1 +2024-09-17 01:25:32,283 INFO [train.py:1198] (0/2) Epoch 7, batch 2450, loss[loss=0.2882, ctc_loss=0.1979, cr_loss=0.4435, attn_decoder_loss=0.2884, over 29684.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1971, cr_loss=0.4231, attn_decoder_loss=0.2806, over 5783888.64 frames. ], batch size: 82, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:25:32,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=118400.0, ans=0.0 +2024-09-17 01:25:35,213 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.912e+01 1.061e+02 1.128e+02 1.247e+02 1.833e+02, threshold=2.256e+02, percent-clipped=0.0 +2024-09-17 01:25:46,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=118440.0, ans=0.125 +2024-09-17 01:26:03,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=118480.0, ans=0.025 +2024-09-17 01:26:34,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=118560.0, ans=0.0 +2024-09-17 01:26:45,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.00 vs. limit=10.0 +2024-09-17 01:26:50,582 INFO [train.py:1198] (0/2) Epoch 7, batch 2500, loss[loss=0.2944, ctc_loss=0.2075, cr_loss=0.4253, attn_decoder_loss=0.2946, over 29627.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1971, cr_loss=0.4234, attn_decoder_loss=0.2805, over 5794289.33 frames. ], batch size: 86, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:26:50,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=118600.0, ans=0.125 +2024-09-17 01:27:03,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=118600.0, ans=0.0 +2024-09-17 01:27:07,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=118640.0, ans=0.125 +2024-09-17 01:27:15,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=118640.0, ans=0.0 +2024-09-17 01:27:16,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=118640.0, ans=0.1 +2024-09-17 01:27:40,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=118720.0, ans=15.0 +2024-09-17 01:28:07,516 INFO [train.py:1198] (0/2) Epoch 7, batch 2550, loss[loss=0.2437, ctc_loss=0.1668, cr_loss=0.3977, attn_decoder_loss=0.2434, over 29331.00 frames. ], tot_loss[loss=0.2803, ctc_loss=0.1963, cr_loss=0.4231, attn_decoder_loss=0.2802, over 5798434.84 frames. ], batch size: 67, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:28:11,980 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.793e+01 1.007e+02 1.102e+02 1.293e+02 3.039e+02, threshold=2.204e+02, percent-clipped=2.0 +2024-09-17 01:28:14,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.80 vs. limit=15.0 +2024-09-17 01:28:33,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.39 vs. limit=22.5 +2024-09-17 01:29:12,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=118960.0, ans=0.2 +2024-09-17 01:29:25,799 INFO [train.py:1198] (0/2) Epoch 7, batch 2600, loss[loss=0.2548, ctc_loss=0.1663, cr_loss=0.3816, attn_decoder_loss=0.2561, over 29448.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1964, cr_loss=0.4231, attn_decoder_loss=0.2804, over 5794817.31 frames. ], batch size: 78, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:29:29,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=119000.0, ans=0.125 +2024-09-17 01:30:08,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=119080.0, ans=0.125 +2024-09-17 01:30:11,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119120.0, ans=0.1 +2024-09-17 01:30:19,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.60 vs. limit=15.0 +2024-09-17 01:30:22,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=119120.0, ans=0.125 +2024-09-17 01:30:36,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.25 vs. limit=22.5 +2024-09-17 01:30:43,410 INFO [train.py:1198] (0/2) Epoch 7, batch 2650, loss[loss=0.3062, ctc_loss=0.2187, cr_loss=0.4495, attn_decoder_loss=0.3059, over 29213.00 frames. ], tot_loss[loss=0.2809, ctc_loss=0.1969, cr_loss=0.4231, attn_decoder_loss=0.2808, over 5801658.86 frames. ], batch size: 100, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:30:45,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=119200.0, ans=0.0 +2024-09-17 01:30:49,509 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.862e+01 1.038e+02 1.128e+02 1.278e+02 2.890e+02, threshold=2.256e+02, percent-clipped=2.0 +2024-09-17 01:31:00,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=119240.0, ans=0.1 +2024-09-17 01:31:12,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119280.0, ans=0.1 +2024-09-17 01:31:16,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.52 vs. limit=22.5 +2024-09-17 01:31:17,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=119280.0, ans=0.125 +2024-09-17 01:31:23,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=119280.0, ans=0.125 +2024-09-17 01:31:29,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.91 vs. limit=15.0 +2024-09-17 01:31:59,342 INFO [train.py:1198] (0/2) Epoch 7, batch 2700, loss[loss=0.2789, ctc_loss=0.1894, cr_loss=0.4025, attn_decoder_loss=0.2799, over 29505.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1969, cr_loss=0.4228, attn_decoder_loss=0.2808, over 5797319.90 frames. ], batch size: 87, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:32:25,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=119440.0, ans=0.0 +2024-09-17 01:32:45,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=119520.0, ans=0.025 +2024-09-17 01:32:55,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=119520.0, ans=0.1 +2024-09-17 01:33:01,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=119560.0, ans=0.125 +2024-09-17 01:33:02,802 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.24 vs. limit=15.0 +2024-09-17 01:33:14,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=119560.0, ans=0.125 +2024-09-17 01:33:18,438 INFO [train.py:1198] (0/2) Epoch 7, batch 2750, loss[loss=0.2656, ctc_loss=0.1771, cr_loss=0.4278, attn_decoder_loss=0.266, over 29550.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1957, cr_loss=0.421, attn_decoder_loss=0.2795, over 5794627.61 frames. ], batch size: 75, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:33:20,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=119600.0, ans=0.125 +2024-09-17 01:33:26,048 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.208e+01 9.926e+01 1.072e+02 1.182e+02 2.176e+02, threshold=2.145e+02, percent-clipped=0.0 +2024-09-17 01:33:31,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.75 vs. limit=6.0 +2024-09-17 01:33:50,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=119680.0, ans=0.0 +2024-09-17 01:33:51,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=119680.0, ans=0.125 +2024-09-17 01:33:58,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=119680.0, ans=0.125 +2024-09-17 01:34:28,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=119760.0, ans=0.0 +2024-09-17 01:34:37,079 INFO [train.py:1198] (0/2) Epoch 7, batch 2800, loss[loss=0.3224, ctc_loss=0.2657, cr_loss=0.4801, attn_decoder_loss=0.318, over 20296.00 frames. ], tot_loss[loss=0.2802, ctc_loss=0.1967, cr_loss=0.4224, attn_decoder_loss=0.2801, over 5775822.53 frames. ], batch size: 210, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:34:43,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.27 vs. limit=15.0 +2024-09-17 01:34:50,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=119800.0, ans=15.0 +2024-09-17 01:35:29,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=119920.0, ans=0.0 +2024-09-17 01:35:30,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=119920.0, ans=0.2 +2024-09-17 01:35:32,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=119920.0, ans=0.2 +2024-09-17 01:35:33,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=119920.0, ans=0.0 +2024-09-17 01:35:37,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=119960.0, ans=0.0 +2024-09-17 01:35:53,909 INFO [train.py:1198] (0/2) Epoch 7, batch 2850, loss[loss=0.274, ctc_loss=0.1904, cr_loss=0.4272, attn_decoder_loss=0.2738, over 29495.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1973, cr_loss=0.4228, attn_decoder_loss=0.2806, over 5759917.53 frames. ], batch size: 77, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:36:03,110 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.909e+01 1.092e+02 1.177e+02 1.435e+02 2.490e+02, threshold=2.355e+02, percent-clipped=3.0 +2024-09-17 01:36:05,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=120000.0, ans=0.125 +2024-09-17 01:36:20,305 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:36:21,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=120040.0, ans=0.125 +2024-09-17 01:36:23,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=120080.0, ans=0.2 +2024-09-17 01:36:23,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=120080.0, ans=0.0 +2024-09-17 01:36:45,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=120120.0, ans=0.125 +2024-09-17 01:36:53,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=120120.0, ans=0.125 +2024-09-17 01:37:00,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=120160.0, ans=0.125 +2024-09-17 01:37:08,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=120160.0, ans=0.0 +2024-09-17 01:37:11,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=120200.0, ans=0.0 +2024-09-17 01:37:13,102 INFO [train.py:1198] (0/2) Epoch 7, batch 2900, loss[loss=0.2726, ctc_loss=0.1922, cr_loss=0.4375, attn_decoder_loss=0.2718, over 29432.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1978, cr_loss=0.4244, attn_decoder_loss=0.2814, over 5785476.73 frames. ], batch size: 79, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:37:19,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=120200.0, ans=0.0 +2024-09-17 01:37:19,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=120200.0, ans=0.1 +2024-09-17 01:37:42,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=120240.0, ans=0.0 +2024-09-17 01:37:43,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.37 vs. limit=15.0 +2024-09-17 01:37:49,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=120280.0, ans=0.1 +2024-09-17 01:37:52,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=120280.0, ans=0.125 +2024-09-17 01:37:58,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=120280.0, ans=0.95 +2024-09-17 01:37:59,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=120320.0, ans=0.025 +2024-09-17 01:38:19,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-17 01:38:31,304 INFO [train.py:1198] (0/2) Epoch 7, batch 2950, loss[loss=0.2545, ctc_loss=0.1708, cr_loss=0.3869, attn_decoder_loss=0.2552, over 29506.00 frames. ], tot_loss[loss=0.2802, ctc_loss=0.1968, cr_loss=0.4232, attn_decoder_loss=0.2801, over 5781915.48 frames. ], batch size: 75, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:38:37,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=120400.0, ans=0.07 +2024-09-17 01:38:41,939 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.289e+01 1.032e+02 1.125e+02 1.263e+02 2.681e+02, threshold=2.250e+02, percent-clipped=2.0 +2024-09-17 01:38:42,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=120400.0, ans=0.2 +2024-09-17 01:38:53,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=120440.0, ans=0.125 +2024-09-17 01:39:02,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=120480.0, ans=0.125 +2024-09-17 01:39:10,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.00 vs. limit=15.0 +2024-09-17 01:39:34,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=120560.0, ans=0.125 +2024-09-17 01:39:47,816 INFO [train.py:1198] (0/2) Epoch 7, batch 3000, loss[loss=0.2657, ctc_loss=0.1773, cr_loss=0.3842, attn_decoder_loss=0.2669, over 29770.00 frames. ], tot_loss[loss=0.28, ctc_loss=0.1967, cr_loss=0.4232, attn_decoder_loss=0.2798, over 5782997.53 frames. ], batch size: 81, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:39:47,817 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 01:40:06,235 INFO [train.py:1230] (0/2) Epoch 7, validation: loss=0.2168, ctc_loss=0.05873, cr_loss=4.524e-15, attn_decoder_loss=0.2344, over 944034.00 frames. +2024-09-17 01:40:06,236 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 01:40:11,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=120600.0, ans=0.125 +2024-09-17 01:40:20,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=120640.0, ans=0.125 +2024-09-17 01:40:32,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.48 vs. limit=22.5 +2024-09-17 01:40:33,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=120640.0, ans=0.0 +2024-09-17 01:40:59,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=120720.0, ans=0.0 +2024-09-17 01:41:25,955 INFO [train.py:1198] (0/2) Epoch 7, batch 3050, loss[loss=0.276, ctc_loss=0.1973, cr_loss=0.4495, attn_decoder_loss=0.2747, over 29540.00 frames. ], tot_loss[loss=0.2813, ctc_loss=0.1979, cr_loss=0.4253, attn_decoder_loss=0.2811, over 5777187.77 frames. ], batch size: 76, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:41:40,283 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.612e+01 1.070e+02 1.194e+02 1.343e+02 6.918e+02, threshold=2.387e+02, percent-clipped=4.0 +2024-09-17 01:41:41,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=11.22 vs. limit=15.0 +2024-09-17 01:41:51,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=120840.0, ans=0.125 +2024-09-17 01:41:57,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=120880.0, ans=0.125 +2024-09-17 01:42:15,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=120920.0, ans=0.125 +2024-09-17 01:42:44,149 INFO [train.py:1198] (0/2) Epoch 7, batch 3100, loss[loss=0.3005, ctc_loss=0.2132, cr_loss=0.4504, attn_decoder_loss=0.3001, over 29147.00 frames. ], tot_loss[loss=0.2812, ctc_loss=0.1979, cr_loss=0.4246, attn_decoder_loss=0.281, over 5776684.40 frames. ], batch size: 100, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:42:51,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=121000.0, ans=0.125 +2024-09-17 01:42:56,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=121000.0, ans=0.125 +2024-09-17 01:43:45,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=121160.0, ans=0.125 +2024-09-17 01:43:47,078 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:43:48,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=121160.0, ans=0.0 +2024-09-17 01:44:00,605 INFO [train.py:1198] (0/2) Epoch 7, batch 3150, loss[loss=0.2994, ctc_loss=0.214, cr_loss=0.4526, attn_decoder_loss=0.2989, over 28912.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1976, cr_loss=0.4242, attn_decoder_loss=0.2806, over 5783086.86 frames. ], batch size: 104, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:44:14,324 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.541e+01 1.004e+02 1.097e+02 1.266e+02 2.300e+02, threshold=2.194e+02, percent-clipped=0.0 +2024-09-17 01:44:29,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=121240.0, ans=0.125 +2024-09-17 01:44:38,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=121280.0, ans=0.0 +2024-09-17 01:44:41,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=121280.0, ans=0.125 +2024-09-17 01:44:43,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=121280.0, ans=0.0 +2024-09-17 01:44:44,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=121280.0, ans=0.05 +2024-09-17 01:44:52,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=121320.0, ans=0.125 +2024-09-17 01:45:01,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=121320.0, ans=0.125 +2024-09-17 01:45:10,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=121360.0, ans=0.0 +2024-09-17 01:45:15,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=121360.0, ans=0.125 +2024-09-17 01:45:19,449 INFO [train.py:1198] (0/2) Epoch 7, batch 3200, loss[loss=0.2731, ctc_loss=0.1936, cr_loss=0.426, attn_decoder_loss=0.2725, over 29395.00 frames. ], tot_loss[loss=0.2799, ctc_loss=0.1964, cr_loss=0.4236, attn_decoder_loss=0.2798, over 5792658.06 frames. ], batch size: 79, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:45:43,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=121440.0, ans=0.125 +2024-09-17 01:45:52,913 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.46 vs. limit=15.0 +2024-09-17 01:45:57,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=121480.0, ans=0.0 +2024-09-17 01:46:05,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.09 vs. limit=15.0 +2024-09-17 01:46:12,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.40 vs. limit=6.0 +2024-09-17 01:46:25,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=121560.0, ans=15.0 +2024-09-17 01:46:27,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=121560.0, ans=0.09899494936611666 +2024-09-17 01:46:38,563 INFO [train.py:1198] (0/2) Epoch 7, batch 3250, loss[loss=0.2819, ctc_loss=0.1924, cr_loss=0.4183, attn_decoder_loss=0.2826, over 29689.00 frames. ], tot_loss[loss=0.2799, ctc_loss=0.1961, cr_loss=0.4238, attn_decoder_loss=0.2798, over 5799438.73 frames. ], batch size: 84, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:46:46,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=121600.0, ans=0.125 +2024-09-17 01:46:47,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.58 vs. limit=22.5 +2024-09-17 01:46:53,779 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.273e+01 1.020e+02 1.119e+02 1.210e+02 1.676e+02, threshold=2.238e+02, percent-clipped=0.0 +2024-09-17 01:47:19,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=121680.0, ans=0.2 +2024-09-17 01:47:21,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=121680.0, ans=0.025 +2024-09-17 01:47:21,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.27 vs. limit=22.5 +2024-09-17 01:47:23,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=121720.0, ans=0.125 +2024-09-17 01:47:30,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=121720.0, ans=0.0 +2024-09-17 01:47:32,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=121720.0, ans=0.125 +2024-09-17 01:47:35,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=121720.0, ans=0.0 +2024-09-17 01:47:46,216 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.15 vs. limit=10.0 +2024-09-17 01:47:54,941 INFO [train.py:1198] (0/2) Epoch 7, batch 3300, loss[loss=0.2882, ctc_loss=0.204, cr_loss=0.4115, attn_decoder_loss=0.2884, over 28245.00 frames. ], tot_loss[loss=0.2786, ctc_loss=0.1951, cr_loss=0.4219, attn_decoder_loss=0.2785, over 5795903.75 frames. ], batch size: 111, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:48:28,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=121880.0, ans=0.1 +2024-09-17 01:48:30,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=121880.0, ans=0.0 +2024-09-17 01:48:34,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=121880.0, ans=0.125 +2024-09-17 01:49:13,988 INFO [train.py:1198] (0/2) Epoch 7, batch 3350, loss[loss=0.2785, ctc_loss=0.1842, cr_loss=0.3851, attn_decoder_loss=0.2805, over 28928.00 frames. ], tot_loss[loss=0.2802, ctc_loss=0.197, cr_loss=0.4241, attn_decoder_loss=0.28, over 5774451.82 frames. ], batch size: 104, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:49:19,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.33 vs. limit=15.0 +2024-09-17 01:49:20,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=122000.0, ans=0.0 +2024-09-17 01:49:20,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=122000.0, ans=0.125 +2024-09-17 01:49:32,807 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.051e+01 1.075e+02 1.159e+02 1.381e+02 2.720e+02, threshold=2.319e+02, percent-clipped=3.0 +2024-09-17 01:49:33,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.89 vs. limit=15.0 +2024-09-17 01:50:03,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=122120.0, ans=0.1 +2024-09-17 01:50:10,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.40 vs. limit=10.0 +2024-09-17 01:50:14,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=122120.0, ans=0.125 +2024-09-17 01:50:28,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=122160.0, ans=0.2 +2024-09-17 01:50:32,406 INFO [train.py:1198] (0/2) Epoch 7, batch 3400, loss[loss=0.2445, ctc_loss=0.1584, cr_loss=0.3683, attn_decoder_loss=0.2459, over 29375.00 frames. ], tot_loss[loss=0.28, ctc_loss=0.1967, cr_loss=0.4235, attn_decoder_loss=0.2798, over 5767530.04 frames. ], batch size: 67, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:50:38,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=122200.0, ans=0.125 +2024-09-17 01:51:33,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=122360.0, ans=0.2 +2024-09-17 01:51:35,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=122360.0, ans=0.125 +2024-09-17 01:51:45,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=122360.0, ans=0.09899494936611666 +2024-09-17 01:51:48,684 INFO [train.py:1198] (0/2) Epoch 7, batch 3450, loss[loss=0.2896, ctc_loss=0.2005, cr_loss=0.419, attn_decoder_loss=0.2902, over 28475.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.197, cr_loss=0.4245, attn_decoder_loss=0.2804, over 5775822.42 frames. ], batch size: 112, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:51:54,327 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.20 vs. limit=15.0 +2024-09-17 01:52:09,033 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.695e+01 1.040e+02 1.098e+02 1.235e+02 2.393e+02, threshold=2.195e+02, percent-clipped=1.0 +2024-09-17 01:52:29,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=122480.0, ans=0.025 +2024-09-17 01:52:35,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=122520.0, ans=0.0 +2024-09-17 01:52:38,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=122520.0, ans=0.125 +2024-09-17 01:52:47,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=122520.0, ans=0.125 +2024-09-17 01:52:56,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=122560.0, ans=0.125 +2024-09-17 01:53:07,003 INFO [train.py:1198] (0/2) Epoch 7, batch 3500, loss[loss=0.2562, ctc_loss=0.1727, cr_loss=0.3782, attn_decoder_loss=0.2571, over 29304.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1958, cr_loss=0.4228, attn_decoder_loss=0.2795, over 5777549.52 frames. ], batch size: 71, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:53:13,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=122600.0, ans=0.07 +2024-09-17 01:53:19,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=122600.0, ans=0.0 +2024-09-17 01:53:53,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=122720.0, ans=0.025 +2024-09-17 01:53:56,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=122720.0, ans=0.0 +2024-09-17 01:53:57,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=122720.0, ans=0.1 +2024-09-17 01:54:23,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=122800.0, ans=0.125 +2024-09-17 01:54:24,518 INFO [train.py:1198] (0/2) Epoch 7, batch 3550, loss[loss=0.288, ctc_loss=0.2002, cr_loss=0.4234, attn_decoder_loss=0.2884, over 29694.00 frames. ], tot_loss[loss=0.2793, ctc_loss=0.1951, cr_loss=0.4225, attn_decoder_loss=0.2792, over 5783676.84 frames. ], batch size: 89, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:54:36,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=122800.0, ans=0.2 +2024-09-17 01:54:43,845 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.552e+01 9.824e+01 1.101e+02 1.214e+02 1.774e+02, threshold=2.203e+02, percent-clipped=0.0 +2024-09-17 01:54:53,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=122880.0, ans=0.125 +2024-09-17 01:55:09,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=122920.0, ans=0.125 +2024-09-17 01:55:11,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=122920.0, ans=0.1 +2024-09-17 01:55:20,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=122920.0, ans=0.5 +2024-09-17 01:55:39,586 INFO [train.py:1198] (0/2) Epoch 7, batch 3600, loss[loss=0.2726, ctc_loss=0.1845, cr_loss=0.4073, attn_decoder_loss=0.2733, over 29486.00 frames. ], tot_loss[loss=0.2791, ctc_loss=0.1948, cr_loss=0.4219, attn_decoder_loss=0.2791, over 5792408.76 frames. ], batch size: 77, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 01:55:51,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=123000.0, ans=0.2 +2024-09-17 01:56:05,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=123040.0, ans=0.1 +2024-09-17 01:56:13,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=123080.0, ans=0.0 +2024-09-17 01:56:31,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=123120.0, ans=0.0 +2024-09-17 01:56:43,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=123160.0, ans=0.1 +2024-09-17 01:56:55,559 INFO [train.py:1198] (0/2) Epoch 7, batch 3650, loss[loss=0.3016, ctc_loss=0.2149, cr_loss=0.4619, attn_decoder_loss=0.301, over 29491.00 frames. ], tot_loss[loss=0.2784, ctc_loss=0.194, cr_loss=0.4211, attn_decoder_loss=0.2784, over 5793010.91 frames. ], batch size: 90, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 01:56:58,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=123200.0, ans=0.125 +2024-09-17 01:57:12,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=123240.0, ans=0.1 +2024-09-17 01:57:14,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.55 vs. limit=6.0 +2024-09-17 01:57:16,628 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.280e+01 1.041e+02 1.137e+02 1.251e+02 2.329e+02, threshold=2.273e+02, percent-clipped=0.0 +2024-09-17 01:57:33,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=123280.0, ans=0.0 +2024-09-17 01:58:13,141 INFO [train.py:1198] (0/2) Epoch 7, batch 3700, loss[loss=0.2866, ctc_loss=0.1983, cr_loss=0.4287, attn_decoder_loss=0.2868, over 29711.00 frames. ], tot_loss[loss=0.2788, ctc_loss=0.1942, cr_loss=0.422, attn_decoder_loss=0.2789, over 5803762.14 frames. ], batch size: 84, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 01:58:32,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=123440.0, ans=0.025 +2024-09-17 01:58:36,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.69 vs. limit=12.0 +2024-09-17 01:58:49,983 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.00 vs. limit=12.0 +2024-09-17 01:58:53,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=123480.0, ans=0.2 +2024-09-17 01:59:13,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=123560.0, ans=0.025 +2024-09-17 01:59:28,054 INFO [train.py:1198] (0/2) Epoch 7, batch 3750, loss[loss=0.2488, ctc_loss=0.1625, cr_loss=0.3664, attn_decoder_loss=0.2503, over 29355.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.194, cr_loss=0.4222, attn_decoder_loss=0.2785, over 5808121.86 frames. ], batch size: 67, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 01:59:45,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=123640.0, ans=0.1 +2024-09-17 01:59:50,731 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.049e+02 1.152e+02 1.342e+02 3.942e+02, threshold=2.304e+02, percent-clipped=2.0 +2024-09-17 01:59:55,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.73 vs. limit=15.0 +2024-09-17 01:59:56,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=123680.0, ans=0.0 +2024-09-17 01:59:57,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.33 vs. limit=15.0 +2024-09-17 02:00:16,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=123720.0, ans=0.125 +2024-09-17 02:00:42,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=123760.0, ans=0.1 +2024-09-17 02:00:45,074 INFO [train.py:1198] (0/2) Epoch 7, batch 3800, loss[loss=0.2854, ctc_loss=0.1891, cr_loss=0.4356, attn_decoder_loss=0.2864, over 29629.00 frames. ], tot_loss[loss=0.2782, ctc_loss=0.1941, cr_loss=0.4218, attn_decoder_loss=0.2781, over 5798328.29 frames. ], batch size: 86, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:00:46,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=123800.0, ans=0.125 +2024-09-17 02:00:57,357 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:01:06,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=123840.0, ans=0.125 +2024-09-17 02:01:11,569 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.76 vs. limit=6.0 +2024-09-17 02:01:12,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=123840.0, ans=0.0 +2024-09-17 02:01:32,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=123920.0, ans=0.0 +2024-09-17 02:01:40,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=123920.0, ans=0.0 +2024-09-17 02:01:49,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=123960.0, ans=0.125 +2024-09-17 02:02:00,444 INFO [train.py:1198] (0/2) Epoch 7, batch 3850, loss[loss=0.3026, ctc_loss=0.2168, cr_loss=0.4539, attn_decoder_loss=0.3021, over 29214.00 frames. ], tot_loss[loss=0.2782, ctc_loss=0.194, cr_loss=0.4216, attn_decoder_loss=0.2782, over 5811737.44 frames. ], batch size: 100, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 02:02:24,324 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.352e+01 1.024e+02 1.121e+02 1.176e+02 2.647e+02, threshold=2.243e+02, percent-clipped=2.0 +2024-09-17 02:02:25,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.10 vs. limit=12.0 +2024-09-17 02:02:33,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=124080.0, ans=0.035 +2024-09-17 02:02:39,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=124080.0, ans=0.125 +2024-09-17 02:02:41,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=124080.0, ans=0.125 +2024-09-17 02:03:15,233 INFO [train.py:1198] (0/2) Epoch 7, batch 3900, loss[loss=0.3031, ctc_loss=0.2199, cr_loss=0.4761, attn_decoder_loss=0.3018, over 29631.00 frames. ], tot_loss[loss=0.2786, ctc_loss=0.1941, cr_loss=0.4216, attn_decoder_loss=0.2786, over 5816266.71 frames. ], batch size: 86, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:04:04,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=15.66 vs. limit=15.0 +2024-09-17 02:04:12,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=124320.0, ans=0.125 +2024-09-17 02:04:12,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=124320.0, ans=0.125 +2024-09-17 02:04:18,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=124360.0, ans=0.05 +2024-09-17 02:04:20,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.44 vs. limit=22.5 +2024-09-17 02:04:31,847 INFO [train.py:1198] (0/2) Epoch 7, batch 3950, loss[loss=0.289, ctc_loss=0.1947, cr_loss=0.411, attn_decoder_loss=0.2904, over 29489.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.1936, cr_loss=0.4216, attn_decoder_loss=0.2786, over 5835806.47 frames. ], batch size: 97, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 02:04:32,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=22.5 +2024-09-17 02:04:57,233 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.925e+01 1.017e+02 1.080e+02 1.236e+02 3.410e+02, threshold=2.160e+02, percent-clipped=1.0 +2024-09-17 02:05:02,744 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.95 vs. limit=15.0 +2024-09-17 02:05:15,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=124520.0, ans=0.125 +2024-09-17 02:05:46,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=124600.0, ans=0.125 +2024-09-17 02:05:47,556 INFO [train.py:1198] (0/2) Epoch 7, batch 4000, loss[loss=0.2624, ctc_loss=0.1782, cr_loss=0.4027, attn_decoder_loss=0.2628, over 29500.00 frames. ], tot_loss[loss=0.2787, ctc_loss=0.1942, cr_loss=0.4213, attn_decoder_loss=0.2788, over 5812064.47 frames. ], batch size: 74, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:05:56,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=124600.0, ans=0.125 +2024-09-17 02:06:06,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=124640.0, ans=0.125 +2024-09-17 02:06:16,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.16 vs. limit=10.0 +2024-09-17 02:06:44,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.59 vs. limit=22.5 +2024-09-17 02:06:45,176 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:06:47,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=124760.0, ans=0.1 +2024-09-17 02:06:50,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=124760.0, ans=0.125 +2024-09-17 02:06:59,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=124760.0, ans=0.1 +2024-09-17 02:07:02,364 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.53 vs. limit=22.5 +2024-09-17 02:07:02,864 INFO [train.py:1198] (0/2) Epoch 7, batch 4050, loss[loss=0.3218, ctc_loss=0.2749, cr_loss=0.4309, attn_decoder_loss=0.3174, over 20009.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.1942, cr_loss=0.4211, attn_decoder_loss=0.2786, over 5796146.67 frames. ], batch size: 209, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:07:05,389 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.98 vs. limit=15.0 +2024-09-17 02:07:29,478 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.830e+01 1.037e+02 1.133e+02 1.279e+02 3.685e+02, threshold=2.266e+02, percent-clipped=2.0 +2024-09-17 02:07:35,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=124880.0, ans=0.09899494936611666 +2024-09-17 02:07:38,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=124880.0, ans=0.125 +2024-09-17 02:07:53,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=124920.0, ans=0.0 +2024-09-17 02:08:05,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=124960.0, ans=0.125 +2024-09-17 02:08:05,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=124960.0, ans=0.125 +2024-09-17 02:08:18,296 INFO [train.py:1198] (0/2) Epoch 7, batch 4100, loss[loss=0.2996, ctc_loss=0.2221, cr_loss=0.4893, attn_decoder_loss=0.2973, over 29507.00 frames. ], tot_loss[loss=0.2791, ctc_loss=0.1948, cr_loss=0.4223, attn_decoder_loss=0.2791, over 5791299.91 frames. ], batch size: 90, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:08:21,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=125000.0, ans=10.0 +2024-09-17 02:08:28,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=125000.0, ans=0.1 +2024-09-17 02:09:01,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=125120.0, ans=0.0 +2024-09-17 02:09:17,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.89 vs. limit=6.0 +2024-09-17 02:09:21,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.34 vs. limit=22.5 +2024-09-17 02:09:26,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.97 vs. limit=15.0 +2024-09-17 02:09:32,348 INFO [train.py:1198] (0/2) Epoch 7, batch 4150, loss[loss=0.2726, ctc_loss=0.1932, cr_loss=0.4336, attn_decoder_loss=0.2718, over 29496.00 frames. ], tot_loss[loss=0.2787, ctc_loss=0.1946, cr_loss=0.4219, attn_decoder_loss=0.2787, over 5797734.40 frames. ], batch size: 77, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:09:39,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.38 vs. limit=15.0 +2024-09-17 02:09:46,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=125240.0, ans=0.125 +2024-09-17 02:09:49,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=125240.0, ans=0.025 +2024-09-17 02:09:52,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=125240.0, ans=0.07 +2024-09-17 02:10:01,820 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.306e+01 1.018e+02 1.090e+02 1.211e+02 2.746e+02, threshold=2.181e+02, percent-clipped=3.0 +2024-09-17 02:10:04,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=125280.0, ans=0.2 +2024-09-17 02:10:06,991 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.06 vs. limit=10.0 +2024-09-17 02:10:47,445 INFO [train.py:1198] (0/2) Epoch 7, batch 4200, loss[loss=0.2911, ctc_loss=0.2045, cr_loss=0.4386, attn_decoder_loss=0.291, over 29510.00 frames. ], tot_loss[loss=0.279, ctc_loss=0.1945, cr_loss=0.4222, attn_decoder_loss=0.279, over 5800281.02 frames. ], batch size: 90, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:10:49,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=125400.0, ans=0.0 +2024-09-17 02:11:11,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=125440.0, ans=0.07 +2024-09-17 02:11:27,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=125480.0, ans=0.125 +2024-09-17 02:11:38,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=125520.0, ans=0.2 +2024-09-17 02:11:41,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=125520.0, ans=0.125 +2024-09-17 02:11:43,962 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:11:43,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=125520.0, ans=0.125 +2024-09-17 02:12:02,720 INFO [train.py:1198] (0/2) Epoch 7, batch 4250, loss[loss=0.2433, ctc_loss=0.1456, cr_loss=0.3557, attn_decoder_loss=0.2463, over 29504.00 frames. ], tot_loss[loss=0.2791, ctc_loss=0.1942, cr_loss=0.4227, attn_decoder_loss=0.2792, over 5806515.20 frames. ], batch size: 74, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:12:16,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=125640.0, ans=0.125 +2024-09-17 02:12:22,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=125640.0, ans=0.125 +2024-09-17 02:12:30,969 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:12:31,955 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.603e+01 1.048e+02 1.150e+02 1.288e+02 2.522e+02, threshold=2.299e+02, percent-clipped=2.0 +2024-09-17 02:12:47,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=125720.0, ans=0.125 +2024-09-17 02:12:59,281 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:13:16,849 INFO [train.py:1198] (0/2) Epoch 7, batch 4300, loss[loss=0.2865, ctc_loss=0.2049, cr_loss=0.4451, attn_decoder_loss=0.2857, over 29541.00 frames. ], tot_loss[loss=0.2793, ctc_loss=0.1944, cr_loss=0.4224, attn_decoder_loss=0.2793, over 5795686.56 frames. ], batch size: 87, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:13:17,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=125800.0, ans=0.05 +2024-09-17 02:13:29,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=125800.0, ans=0.1 +2024-09-17 02:13:29,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=125800.0, ans=0.07 +2024-09-17 02:13:52,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=125880.0, ans=0.025 +2024-09-17 02:13:55,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=125880.0, ans=0.0 +2024-09-17 02:13:58,372 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:14:04,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=125920.0, ans=0.1 +2024-09-17 02:14:13,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=125920.0, ans=0.125 +2024-09-17 02:14:20,831 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:14:31,812 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.39 vs. limit=10.0 +2024-09-17 02:14:32,532 INFO [train.py:1198] (0/2) Epoch 7, batch 4350, loss[loss=0.2934, ctc_loss=0.2077, cr_loss=0.4622, attn_decoder_loss=0.2926, over 29464.00 frames. ], tot_loss[loss=0.2826, ctc_loss=0.1974, cr_loss=0.4269, attn_decoder_loss=0.2826, over 5798633.64 frames. ], batch size: 97, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:14:39,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=15.0 +2024-09-17 02:14:42,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.69 vs. limit=6.0 +2024-09-17 02:14:55,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=126040.0, ans=0.125 +2024-09-17 02:15:04,446 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.068e+01 1.042e+02 1.125e+02 1.257e+02 6.277e+02, threshold=2.251e+02, percent-clipped=2.0 +2024-09-17 02:15:10,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.44 vs. limit=15.0 +2024-09-17 02:15:21,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.66 vs. limit=22.5 +2024-09-17 02:15:32,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=126160.0, ans=0.125 +2024-09-17 02:15:34,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=126160.0, ans=0.125 +2024-09-17 02:15:39,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.35 vs. limit=6.0 +2024-09-17 02:15:44,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=126160.0, ans=0.02 +2024-09-17 02:15:47,176 INFO [train.py:1198] (0/2) Epoch 7, batch 4400, loss[loss=0.3023, ctc_loss=0.2295, cr_loss=0.4685, attn_decoder_loss=0.3, over 27388.00 frames. ], tot_loss[loss=0.2854, ctc_loss=0.2003, cr_loss=0.43, attn_decoder_loss=0.2853, over 5769376.10 frames. ], batch size: 124, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:15:47,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:15:54,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=126200.0, ans=0.5 +2024-09-17 02:15:56,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:15:57,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=126200.0, ans=0.0 +2024-09-17 02:16:22,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.02 vs. limit=15.0 +2024-09-17 02:16:24,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=126280.0, ans=0.1 +2024-09-17 02:16:33,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=126320.0, ans=0.2 +2024-09-17 02:16:38,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=126320.0, ans=0.125 +2024-09-17 02:16:40,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=126320.0, ans=0.2 +2024-09-17 02:16:56,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.17 vs. limit=22.5 +2024-09-17 02:17:03,047 INFO [train.py:1198] (0/2) Epoch 7, batch 4450, loss[loss=0.3192, ctc_loss=0.2661, cr_loss=0.4728, attn_decoder_loss=0.3146, over 19930.00 frames. ], tot_loss[loss=0.2886, ctc_loss=0.2058, cr_loss=0.4328, attn_decoder_loss=0.2882, over 5581612.91 frames. ], batch size: 210, lr: 1.53e-02, grad_scale: 4.0 +2024-09-17 02:17:29,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=126440.0, ans=0.1 +2024-09-17 02:17:36,225 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.504e+01 1.059e+02 1.182e+02 1.268e+02 2.368e+02, threshold=2.364e+02, percent-clipped=1.0 +2024-09-17 02:17:47,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=126520.0, ans=0.1 +2024-09-17 02:18:05,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=126560.0, ans=0.125 +2024-09-17 02:18:19,606 INFO [train.py:1198] (0/2) Epoch 7, batch 4500, loss[loss=0.3093, ctc_loss=0.2505, cr_loss=0.4358, attn_decoder_loss=0.3061, over 19347.00 frames. ], tot_loss[loss=0.2928, ctc_loss=0.2138, cr_loss=0.4345, attn_decoder_loss=0.2919, over 5238966.95 frames. ], batch size: 209, lr: 1.53e-02, grad_scale: 8.0 +2024-09-17 02:18:20,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.13 vs. limit=10.0 +2024-09-17 02:18:22,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.08 vs. limit=10.0 +2024-09-17 02:18:23,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=126600.0, ans=0.125 +2024-09-17 02:18:41,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=126640.0, ans=15.0 +2024-09-17 02:18:53,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.37 vs. limit=22.5 +2024-09-17 02:18:56,935 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-7.pt +2024-09-17 02:19:46,523 INFO [train.py:1198] (0/2) Epoch 8, batch 0, loss[loss=0.2652, ctc_loss=0.1734, cr_loss=0.3856, attn_decoder_loss=0.2668, over 29588.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1734, cr_loss=0.3856, attn_decoder_loss=0.2668, over 29588.00 frames. ], batch size: 73, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:19:46,524 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 02:20:02,854 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([6.1746, 6.0801, 5.6649, 5.9205], device='cuda:0') +2024-09-17 02:20:03,699 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.9685, 3.5046, 3.7976, 3.8010], device='cuda:0') +2024-09-17 02:20:04,922 INFO [train.py:1230] (0/2) Epoch 8, validation: loss=0.2208, ctc_loss=0.05894, cr_loss=4.762e-15, attn_decoder_loss=0.2387, over 944034.00 frames. +2024-09-17 02:20:04,923 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 02:20:44,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=126780.0, ans=0.025 +2024-09-17 02:21:19,342 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.753e+01 1.155e+02 1.254e+02 1.387e+02 1.225e+03, threshold=2.508e+02, percent-clipped=2.0 +2024-09-17 02:21:20,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.19 vs. limit=22.5 +2024-09-17 02:21:20,943 INFO [train.py:1198] (0/2) Epoch 8, batch 50, loss[loss=0.2492, ctc_loss=0.1705, cr_loss=0.4143, attn_decoder_loss=0.2487, over 29449.00 frames. ], tot_loss[loss=0.2813, ctc_loss=0.1992, cr_loss=0.4258, attn_decoder_loss=0.281, over 1268320.88 frames. ], batch size: 70, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:21:24,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=126900.0, ans=0.125 +2024-09-17 02:22:21,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.56 vs. limit=15.0 +2024-09-17 02:22:30,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=127060.0, ans=0.0 +2024-09-17 02:22:41,947 INFO [train.py:1198] (0/2) Epoch 8, batch 100, loss[loss=0.2669, ctc_loss=0.1831, cr_loss=0.4115, attn_decoder_loss=0.267, over 29541.00 frames. ], tot_loss[loss=0.2827, ctc_loss=0.1983, cr_loss=0.4267, attn_decoder_loss=0.2826, over 2251626.63 frames. ], batch size: 76, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:22:42,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=127100.0, ans=0.1 +2024-09-17 02:23:14,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.35 vs. limit=12.0 +2024-09-17 02:23:18,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=127180.0, ans=0.2 +2024-09-17 02:23:18,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.67 vs. limit=15.0 +2024-09-17 02:23:34,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=127220.0, ans=0.125 +2024-09-17 02:23:37,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.80 vs. limit=15.0 +2024-09-17 02:23:39,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=127220.0, ans=0.0 +2024-09-17 02:23:39,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=127220.0, ans=0.125 +2024-09-17 02:23:48,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=127260.0, ans=0.125 +2024-09-17 02:23:55,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=127300.0, ans=0.0 +2024-09-17 02:23:56,928 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.517e+01 1.081e+02 1.202e+02 1.454e+02 2.807e+02, threshold=2.403e+02, percent-clipped=1.0 +2024-09-17 02:23:56,954 INFO [train.py:1198] (0/2) Epoch 8, batch 150, loss[loss=0.2498, ctc_loss=0.1653, cr_loss=0.3858, attn_decoder_loss=0.2506, over 29449.00 frames. ], tot_loss[loss=0.2796, ctc_loss=0.195, cr_loss=0.4235, attn_decoder_loss=0.2796, over 3047043.81 frames. ], batch size: 70, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:24:35,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=127380.0, ans=0.125 +2024-09-17 02:24:45,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=127420.0, ans=0.95 +2024-09-17 02:24:54,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=127420.0, ans=0.025 +2024-09-17 02:24:57,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=127460.0, ans=0.1 +2024-09-17 02:25:12,582 INFO [train.py:1198] (0/2) Epoch 8, batch 200, loss[loss=0.2874, ctc_loss=0.2039, cr_loss=0.4189, attn_decoder_loss=0.2874, over 27500.00 frames. ], tot_loss[loss=0.2777, ctc_loss=0.1927, cr_loss=0.4212, attn_decoder_loss=0.2778, over 3660159.46 frames. ], batch size: 124, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:25:23,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=127500.0, ans=0.0 +2024-09-17 02:25:31,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.85 vs. limit=10.0 +2024-09-17 02:25:34,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=127540.0, ans=0.125 +2024-09-17 02:25:57,143 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:26:19,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=127660.0, ans=0.95 +2024-09-17 02:26:25,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=127660.0, ans=0.09899494936611666 +2024-09-17 02:26:27,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-17 02:26:33,476 INFO [train.py:1198] (0/2) Epoch 8, batch 250, loss[loss=0.29, ctc_loss=0.1985, cr_loss=0.4372, attn_decoder_loss=0.2905, over 29198.00 frames. ], tot_loss[loss=0.277, ctc_loss=0.1915, cr_loss=0.4198, attn_decoder_loss=0.2771, over 4142718.39 frames. ], batch size: 100, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:26:33,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=127700.0, ans=0.2 +2024-09-17 02:26:34,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.173e+01 9.771e+01 1.014e+02 1.103e+02 1.585e+02, threshold=2.028e+02, percent-clipped=0.0 +2024-09-17 02:26:47,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=127740.0, ans=0.0 +2024-09-17 02:27:00,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=127740.0, ans=0.125 +2024-09-17 02:27:03,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=127780.0, ans=0.125 +2024-09-17 02:27:03,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=127780.0, ans=0.1 +2024-09-17 02:27:12,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.34 vs. limit=6.0 +2024-09-17 02:27:17,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=127820.0, ans=0.1 +2024-09-17 02:27:48,966 INFO [train.py:1198] (0/2) Epoch 8, batch 300, loss[loss=0.2965, ctc_loss=0.2105, cr_loss=0.4632, attn_decoder_loss=0.2958, over 29534.00 frames. ], tot_loss[loss=0.277, ctc_loss=0.1915, cr_loss=0.4207, attn_decoder_loss=0.2771, over 4511923.77 frames. ], batch size: 92, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:27:50,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=127900.0, ans=0.1 +2024-09-17 02:27:58,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=127900.0, ans=0.0 +2024-09-17 02:28:14,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=127940.0, ans=0.125 +2024-09-17 02:28:22,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=127980.0, ans=0.125 +2024-09-17 02:28:22,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=127980.0, ans=0.125 +2024-09-17 02:28:25,487 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-32000.pt +2024-09-17 02:29:12,042 INFO [train.py:1198] (0/2) Epoch 8, batch 350, loss[loss=0.2351, ctc_loss=0.1481, cr_loss=0.3507, attn_decoder_loss=0.237, over 29325.00 frames. ], tot_loss[loss=0.2768, ctc_loss=0.1907, cr_loss=0.4201, attn_decoder_loss=0.277, over 4797536.39 frames. ], batch size: 71, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:29:13,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=128100.0, ans=0.2 +2024-09-17 02:29:14,916 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.409e+01 1.011e+02 1.095e+02 1.201e+02 2.476e+02, threshold=2.189e+02, percent-clipped=3.0 +2024-09-17 02:29:19,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=128100.0, ans=0.0 +2024-09-17 02:29:38,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=128140.0, ans=0.09899494936611666 +2024-09-17 02:29:38,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=128140.0, ans=10.0 +2024-09-17 02:29:59,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=128220.0, ans=0.125 +2024-09-17 02:30:27,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=128260.0, ans=0.125 +2024-09-17 02:30:29,954 INFO [train.py:1198] (0/2) Epoch 8, batch 400, loss[loss=0.275, ctc_loss=0.1879, cr_loss=0.4191, attn_decoder_loss=0.2753, over 29712.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1901, cr_loss=0.4199, attn_decoder_loss=0.2764, over 5026705.29 frames. ], batch size: 82, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:30:54,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=128340.0, ans=0.0 +2024-09-17 02:31:07,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.39 vs. limit=10.0 +2024-09-17 02:31:15,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=128380.0, ans=0.125 +2024-09-17 02:31:27,813 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:31:39,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=128460.0, ans=0.0 +2024-09-17 02:31:44,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=128460.0, ans=0.125 +2024-09-17 02:31:48,774 INFO [train.py:1198] (0/2) Epoch 8, batch 450, loss[loss=0.2764, ctc_loss=0.1863, cr_loss=0.4255, attn_decoder_loss=0.277, over 29685.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1898, cr_loss=0.419, attn_decoder_loss=0.2763, over 5188338.28 frames. ], batch size: 83, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:31:53,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.548e+01 1.003e+02 1.077e+02 1.187e+02 3.906e+02, threshold=2.154e+02, percent-clipped=1.0 +2024-09-17 02:32:11,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=128540.0, ans=0.5 +2024-09-17 02:32:11,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=128540.0, ans=0.2 +2024-09-17 02:33:05,016 INFO [train.py:1198] (0/2) Epoch 8, batch 500, loss[loss=0.2981, ctc_loss=0.2159, cr_loss=0.4642, attn_decoder_loss=0.297, over 29382.00 frames. ], tot_loss[loss=0.2753, ctc_loss=0.1892, cr_loss=0.4179, attn_decoder_loss=0.2756, over 5330994.94 frames. ], batch size: 94, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:33:14,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=128700.0, ans=0.0 +2024-09-17 02:33:19,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=128740.0, ans=0.1 +2024-09-17 02:33:26,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=128740.0, ans=0.025 +2024-09-17 02:33:39,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.16 vs. limit=10.0 +2024-09-17 02:33:56,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=128820.0, ans=0.125 +2024-09-17 02:34:06,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=128860.0, ans=0.125 +2024-09-17 02:34:23,770 INFO [train.py:1198] (0/2) Epoch 8, batch 550, loss[loss=0.3028, ctc_loss=0.2194, cr_loss=0.4613, attn_decoder_loss=0.3018, over 28775.00 frames. ], tot_loss[loss=0.2757, ctc_loss=0.1898, cr_loss=0.4189, attn_decoder_loss=0.2759, over 5423105.24 frames. ], batch size: 104, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:34:32,980 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.612e+01 1.023e+02 1.117e+02 1.226e+02 1.997e+02, threshold=2.234e+02, percent-clipped=0.0 +2024-09-17 02:34:47,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=128940.0, ans=0.125 +2024-09-17 02:34:51,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=128940.0, ans=0.125 +2024-09-17 02:35:02,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=128980.0, ans=0.2 +2024-09-17 02:35:11,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=129020.0, ans=0.07 +2024-09-17 02:35:19,376 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:35:20,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=129020.0, ans=0.125 +2024-09-17 02:35:41,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.62 vs. limit=15.0 +2024-09-17 02:35:43,599 INFO [train.py:1198] (0/2) Epoch 8, batch 600, loss[loss=0.3038, ctc_loss=0.2186, cr_loss=0.4544, attn_decoder_loss=0.3032, over 29259.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1901, cr_loss=0.4201, attn_decoder_loss=0.2763, over 5510160.71 frames. ], batch size: 100, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:35:43,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=129100.0, ans=0.2 +2024-09-17 02:36:02,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=129140.0, ans=0.1 +2024-09-17 02:36:37,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.68 vs. limit=15.0 +2024-09-17 02:36:38,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=129220.0, ans=0.1 +2024-09-17 02:36:53,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=129260.0, ans=0.07 +2024-09-17 02:36:55,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=129260.0, ans=0.125 +2024-09-17 02:36:59,411 INFO [train.py:1198] (0/2) Epoch 8, batch 650, loss[loss=0.282, ctc_loss=0.1989, cr_loss=0.4537, attn_decoder_loss=0.2812, over 29771.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1891, cr_loss=0.4189, attn_decoder_loss=0.2757, over 5587171.48 frames. ], batch size: 81, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:37:04,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=129300.0, ans=0.0 +2024-09-17 02:37:05,489 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.505e+01 9.950e+01 1.082e+02 1.181e+02 2.497e+02, threshold=2.164e+02, percent-clipped=2.0 +2024-09-17 02:37:23,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.44 vs. limit=6.0 +2024-09-17 02:37:38,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=129380.0, ans=0.1 +2024-09-17 02:37:50,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=129420.0, ans=0.015 +2024-09-17 02:38:09,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.18 vs. limit=15.0 +2024-09-17 02:38:15,949 INFO [train.py:1198] (0/2) Epoch 8, batch 700, loss[loss=0.2735, ctc_loss=0.191, cr_loss=0.4037, attn_decoder_loss=0.2737, over 29523.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1893, cr_loss=0.4187, attn_decoder_loss=0.2763, over 5637076.73 frames. ], batch size: 76, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:38:16,252 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:38:24,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=129500.0, ans=0.09899494936611666 +2024-09-17 02:38:48,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=129540.0, ans=0.2 +2024-09-17 02:38:51,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=129580.0, ans=0.025 +2024-09-17 02:38:59,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=129580.0, ans=0.125 +2024-09-17 02:39:06,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.00 vs. limit=12.0 +2024-09-17 02:39:07,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=129620.0, ans=0.0 +2024-09-17 02:39:18,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=129620.0, ans=0.1 +2024-09-17 02:39:21,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.34 vs. limit=22.5 +2024-09-17 02:39:37,333 INFO [train.py:1198] (0/2) Epoch 8, batch 750, loss[loss=0.2868, ctc_loss=0.1985, cr_loss=0.4734, attn_decoder_loss=0.2861, over 29702.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1887, cr_loss=0.418, attn_decoder_loss=0.2755, over 5674976.64 frames. ], batch size: 82, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:39:42,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=129700.0, ans=0.125 +2024-09-17 02:39:46,299 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 1.021e+02 1.093e+02 1.208e+02 3.929e+02, threshold=2.185e+02, percent-clipped=1.0 +2024-09-17 02:39:46,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=129700.0, ans=0.2 +2024-09-17 02:40:22,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=129820.0, ans=15.0 +2024-09-17 02:40:40,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.00 vs. limit=6.0 +2024-09-17 02:40:41,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=129860.0, ans=0.0 +2024-09-17 02:40:53,428 INFO [train.py:1198] (0/2) Epoch 8, batch 800, loss[loss=0.2555, ctc_loss=0.1737, cr_loss=0.3997, attn_decoder_loss=0.2557, over 29623.00 frames. ], tot_loss[loss=0.2753, ctc_loss=0.1889, cr_loss=0.4184, attn_decoder_loss=0.2756, over 5704886.57 frames. ], batch size: 73, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:40:59,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=129900.0, ans=0.09899494936611666 +2024-09-17 02:41:30,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=129980.0, ans=0.1 +2024-09-17 02:41:36,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=129980.0, ans=0.125 +2024-09-17 02:41:44,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.32 vs. limit=15.0 +2024-09-17 02:41:45,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=130020.0, ans=0.125 +2024-09-17 02:41:51,062 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=14.57 vs. limit=15.0 +2024-09-17 02:41:53,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.52 vs. limit=15.0 +2024-09-17 02:42:09,538 INFO [train.py:1198] (0/2) Epoch 8, batch 850, loss[loss=0.288, ctc_loss=0.1991, cr_loss=0.435, attn_decoder_loss=0.2882, over 29714.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1888, cr_loss=0.4186, attn_decoder_loss=0.2753, over 5735151.27 frames. ], batch size: 89, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:42:20,112 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.352e+01 1.021e+02 1.113e+02 1.293e+02 2.449e+02, threshold=2.226e+02, percent-clipped=1.0 +2024-09-17 02:42:36,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=130140.0, ans=0.0 +2024-09-17 02:42:56,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=130180.0, ans=0.125 +2024-09-17 02:42:58,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=130180.0, ans=15.0 +2024-09-17 02:43:17,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=130260.0, ans=0.07 +2024-09-17 02:43:21,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=130260.0, ans=0.1 +2024-09-17 02:43:23,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=130260.0, ans=0.125 +2024-09-17 02:43:31,942 INFO [train.py:1198] (0/2) Epoch 8, batch 900, loss[loss=0.2557, ctc_loss=0.1636, cr_loss=0.3702, attn_decoder_loss=0.2577, over 29599.00 frames. ], tot_loss[loss=0.2755, ctc_loss=0.1893, cr_loss=0.4186, attn_decoder_loss=0.2758, over 5741411.95 frames. ], batch size: 73, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:43:37,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.59 vs. limit=10.0 +2024-09-17 02:43:38,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=130300.0, ans=0.125 +2024-09-17 02:43:44,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.12 vs. limit=22.5 +2024-09-17 02:43:49,448 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.29 vs. limit=12.0 +2024-09-17 02:44:02,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=130380.0, ans=0.125 +2024-09-17 02:44:23,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.49 vs. limit=22.5 +2024-09-17 02:44:29,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-17 02:44:42,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.15 vs. limit=15.0 +2024-09-17 02:44:45,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=130460.0, ans=0.125 +2024-09-17 02:44:48,517 INFO [train.py:1198] (0/2) Epoch 8, batch 950, loss[loss=0.2507, ctc_loss=0.1557, cr_loss=0.3787, attn_decoder_loss=0.2529, over 29504.00 frames. ], tot_loss[loss=0.2759, ctc_loss=0.1896, cr_loss=0.4187, attn_decoder_loss=0.2762, over 5742862.70 frames. ], batch size: 74, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:44:48,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=130500.0, ans=0.1 +2024-09-17 02:44:50,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=130500.0, ans=0.1 +2024-09-17 02:44:57,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=130500.0, ans=0.2 +2024-09-17 02:45:00,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.707e+01 1.021e+02 1.105e+02 1.238e+02 2.320e+02, threshold=2.209e+02, percent-clipped=1.0 +2024-09-17 02:45:10,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=130540.0, ans=0.5 +2024-09-17 02:45:27,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=130580.0, ans=0.1 +2024-09-17 02:45:40,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=130620.0, ans=0.1 +2024-09-17 02:45:56,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=130660.0, ans=0.025 +2024-09-17 02:46:00,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=130660.0, ans=0.04949747468305833 +2024-09-17 02:46:04,900 INFO [train.py:1198] (0/2) Epoch 8, batch 1000, loss[loss=0.266, ctc_loss=0.1707, cr_loss=0.3987, attn_decoder_loss=0.2677, over 29494.00 frames. ], tot_loss[loss=0.2771, ctc_loss=0.1906, cr_loss=0.4202, attn_decoder_loss=0.2773, over 5738993.11 frames. ], batch size: 77, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:46:36,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.71 vs. limit=22.5 +2024-09-17 02:46:57,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=130820.0, ans=0.125 +2024-09-17 02:47:03,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=130820.0, ans=0.0 +2024-09-17 02:47:11,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.55 vs. limit=6.0 +2024-09-17 02:47:18,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=130860.0, ans=0.125 +2024-09-17 02:47:26,001 INFO [train.py:1198] (0/2) Epoch 8, batch 1050, loss[loss=0.277, ctc_loss=0.1852, cr_loss=0.4304, attn_decoder_loss=0.2777, over 29683.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1894, cr_loss=0.4191, attn_decoder_loss=0.2763, over 5746874.67 frames. ], batch size: 85, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:47:29,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.10 vs. limit=15.0 +2024-09-17 02:47:32,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=130900.0, ans=0.1 +2024-09-17 02:47:39,738 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.558e+01 1.020e+02 1.112e+02 1.252e+02 2.111e+02, threshold=2.224e+02, percent-clipped=0.0 +2024-09-17 02:48:00,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=130980.0, ans=0.1 +2024-09-17 02:48:05,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.59 vs. limit=15.0 +2024-09-17 02:48:06,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=130980.0, ans=0.0 +2024-09-17 02:48:12,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=131020.0, ans=0.125 +2024-09-17 02:48:17,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=131020.0, ans=0.125 +2024-09-17 02:48:32,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=131060.0, ans=0.2 +2024-09-17 02:48:36,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=131060.0, ans=0.125 +2024-09-17 02:48:42,658 INFO [train.py:1198] (0/2) Epoch 8, batch 1100, loss[loss=0.2631, ctc_loss=0.1727, cr_loss=0.4076, attn_decoder_loss=0.2641, over 29454.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1887, cr_loss=0.4184, attn_decoder_loss=0.2758, over 5757778.83 frames. ], batch size: 78, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:49:02,751 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:49:16,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=131180.0, ans=0.125 +2024-09-17 02:49:33,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=131220.0, ans=0.0 +2024-09-17 02:49:35,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=131220.0, ans=0.1 +2024-09-17 02:49:40,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.87 vs. limit=15.0 +2024-09-17 02:49:59,485 INFO [train.py:1198] (0/2) Epoch 8, batch 1150, loss[loss=0.2722, ctc_loss=0.1933, cr_loss=0.3954, attn_decoder_loss=0.2722, over 29432.00 frames. ], tot_loss[loss=0.2755, ctc_loss=0.1891, cr_loss=0.4187, attn_decoder_loss=0.2758, over 5755642.94 frames. ], batch size: 78, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:50:16,924 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.392e+01 9.941e+01 1.085e+02 1.238e+02 2.659e+02, threshold=2.171e+02, percent-clipped=2.0 +2024-09-17 02:50:36,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=131380.0, ans=0.125 +2024-09-17 02:50:42,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=131380.0, ans=0.125 +2024-09-17 02:50:56,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.21 vs. limit=15.0 +2024-09-17 02:51:00,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=131420.0, ans=0.125 +2024-09-17 02:51:15,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=131460.0, ans=0.95 +2024-09-17 02:51:19,981 INFO [train.py:1198] (0/2) Epoch 8, batch 1200, loss[loss=0.2933, ctc_loss=0.2062, cr_loss=0.4807, attn_decoder_loss=0.2923, over 29671.00 frames. ], tot_loss[loss=0.2765, ctc_loss=0.1897, cr_loss=0.4191, attn_decoder_loss=0.2768, over 5746749.16 frames. ], batch size: 85, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:51:29,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=131500.0, ans=0.125 +2024-09-17 02:51:32,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=131500.0, ans=0.125 +2024-09-17 02:51:32,585 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:51:36,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=131540.0, ans=0.0 +2024-09-17 02:51:43,348 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:51:52,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=131580.0, ans=0.2 +2024-09-17 02:52:16,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=131620.0, ans=0.0 +2024-09-17 02:52:19,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=131660.0, ans=0.1 +2024-09-17 02:52:36,147 INFO [train.py:1198] (0/2) Epoch 8, batch 1250, loss[loss=0.3051, ctc_loss=0.2138, cr_loss=0.4786, attn_decoder_loss=0.3046, over 29509.00 frames. ], tot_loss[loss=0.2772, ctc_loss=0.1903, cr_loss=0.4202, attn_decoder_loss=0.2775, over 5774357.42 frames. ], batch size: 92, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:52:52,813 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.535e+01 1.024e+02 1.090e+02 1.251e+02 7.392e+02, threshold=2.180e+02, percent-clipped=1.0 +2024-09-17 02:52:59,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=131740.0, ans=0.125 +2024-09-17 02:53:03,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=131740.0, ans=0.125 +2024-09-17 02:53:03,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=131740.0, ans=0.5 +2024-09-17 02:53:06,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=131780.0, ans=0.0 +2024-09-17 02:53:29,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=131820.0, ans=0.09899494936611666 +2024-09-17 02:53:40,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=131860.0, ans=0.125 +2024-09-17 02:53:44,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-17 02:53:52,342 INFO [train.py:1198] (0/2) Epoch 8, batch 1300, loss[loss=0.2872, ctc_loss=0.2091, cr_loss=0.4312, attn_decoder_loss=0.2862, over 28238.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1892, cr_loss=0.4186, attn_decoder_loss=0.2764, over 5778884.41 frames. ], batch size: 111, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:54:02,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=131900.0, ans=0.0 +2024-09-17 02:54:04,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.05 vs. limit=15.0 +2024-09-17 02:54:05,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=131900.0, ans=0.125 +2024-09-17 02:54:10,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=131940.0, ans=0.125 +2024-09-17 02:54:23,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=131940.0, ans=0.1 +2024-09-17 02:55:13,328 INFO [train.py:1198] (0/2) Epoch 8, batch 1350, loss[loss=0.2673, ctc_loss=0.18, cr_loss=0.3892, attn_decoder_loss=0.2684, over 29755.00 frames. ], tot_loss[loss=0.2751, ctc_loss=0.1877, cr_loss=0.4171, attn_decoder_loss=0.2755, over 5795601.86 frames. ], batch size: 81, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:55:29,761 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.512e+01 9.976e+01 1.075e+02 1.151e+02 1.437e+02, threshold=2.149e+02, percent-clipped=0.0 +2024-09-17 02:55:58,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=132220.0, ans=0.0 +2024-09-17 02:56:00,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=132220.0, ans=0.025 +2024-09-17 02:56:03,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=132220.0, ans=0.07 +2024-09-17 02:56:03,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.55 vs. limit=15.0 +2024-09-17 02:56:06,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=132220.0, ans=0.125 +2024-09-17 02:56:11,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.46 vs. limit=15.0 +2024-09-17 02:56:11,596 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=22.5 +2024-09-17 02:56:15,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=132260.0, ans=0.125 +2024-09-17 02:56:20,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=132260.0, ans=0.125 +2024-09-17 02:56:25,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=132260.0, ans=0.015 +2024-09-17 02:56:28,857 INFO [train.py:1198] (0/2) Epoch 8, batch 1400, loss[loss=0.2375, ctc_loss=0.1557, cr_loss=0.3574, attn_decoder_loss=0.2387, over 29558.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1873, cr_loss=0.4176, attn_decoder_loss=0.2751, over 5806028.42 frames. ], batch size: 69, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:56:44,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=132340.0, ans=0.125 +2024-09-17 02:56:57,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=132380.0, ans=0.0 +2024-09-17 02:57:24,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=132420.0, ans=0.2 +2024-09-17 02:57:24,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=132420.0, ans=0.1 +2024-09-17 02:57:44,593 INFO [train.py:1198] (0/2) Epoch 8, batch 1450, loss[loss=0.2955, ctc_loss=0.198, cr_loss=0.4433, attn_decoder_loss=0.2965, over 29463.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1881, cr_loss=0.4177, attn_decoder_loss=0.2758, over 5802626.51 frames. ], batch size: 94, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 02:58:06,575 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.324e+01 1.032e+02 1.089e+02 1.206e+02 2.427e+02, threshold=2.178e+02, percent-clipped=3.0 +2024-09-17 02:58:15,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=132580.0, ans=0.125 +2024-09-17 02:58:28,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=132580.0, ans=0.0 +2024-09-17 02:58:35,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=132620.0, ans=0.125 +2024-09-17 02:58:40,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=132620.0, ans=0.0 +2024-09-17 02:58:53,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=132660.0, ans=0.125 +2024-09-17 02:58:58,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=132660.0, ans=0.04949747468305833 +2024-09-17 02:59:04,180 INFO [train.py:1198] (0/2) Epoch 8, batch 1500, loss[loss=0.2909, ctc_loss=0.2045, cr_loss=0.4456, attn_decoder_loss=0.2906, over 29632.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1881, cr_loss=0.4184, attn_decoder_loss=0.276, over 5803327.74 frames. ], batch size: 86, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 02:59:10,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=132700.0, ans=0.5 +2024-09-17 02:59:16,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=132700.0, ans=0.025 +2024-09-17 02:59:27,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=132740.0, ans=0.0 +2024-09-17 02:59:44,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=132780.0, ans=0.125 +2024-09-17 02:59:56,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=132820.0, ans=0.025 +2024-09-17 02:59:57,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.06 vs. limit=15.0 +2024-09-17 02:59:59,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=132820.0, ans=0.125 +2024-09-17 03:00:15,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.20 vs. limit=15.0 +2024-09-17 03:00:20,635 INFO [train.py:1198] (0/2) Epoch 8, batch 1550, loss[loss=0.298, ctc_loss=0.2082, cr_loss=0.449, attn_decoder_loss=0.298, over 29518.00 frames. ], tot_loss[loss=0.2762, ctc_loss=0.1894, cr_loss=0.4195, attn_decoder_loss=0.2765, over 5779402.60 frames. ], batch size: 90, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:00:41,776 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.026e+01 9.829e+01 1.097e+02 1.218e+02 3.935e+02, threshold=2.194e+02, percent-clipped=3.0 +2024-09-17 03:00:47,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.05 vs. limit=15.0 +2024-09-17 03:01:12,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=133020.0, ans=0.125 +2024-09-17 03:01:28,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=133060.0, ans=0.2 +2024-09-17 03:01:35,980 INFO [train.py:1198] (0/2) Epoch 8, batch 1600, loss[loss=0.2957, ctc_loss=0.2054, cr_loss=0.4699, attn_decoder_loss=0.2952, over 29673.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1891, cr_loss=0.4193, attn_decoder_loss=0.2761, over 5762091.99 frames. ], batch size: 85, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:01:41,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=133100.0, ans=0.0 +2024-09-17 03:02:45,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=133260.0, ans=0.0 +2024-09-17 03:02:50,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.31 vs. limit=10.0 +2024-09-17 03:02:55,932 INFO [train.py:1198] (0/2) Epoch 8, batch 1650, loss[loss=0.2982, ctc_loss=0.2157, cr_loss=0.4517, attn_decoder_loss=0.2974, over 29713.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1891, cr_loss=0.4191, attn_decoder_loss=0.2761, over 5756605.96 frames. ], batch size: 89, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:03:05,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=133300.0, ans=0.1 +2024-09-17 03:03:11,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=133340.0, ans=0.125 +2024-09-17 03:03:12,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=133340.0, ans=0.1 +2024-09-17 03:03:18,410 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.840e+01 1.022e+02 1.128e+02 1.304e+02 4.033e+02, threshold=2.256e+02, percent-clipped=2.0 +2024-09-17 03:03:30,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=133380.0, ans=0.125 +2024-09-17 03:03:35,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.58 vs. limit=22.5 +2024-09-17 03:03:49,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=133420.0, ans=0.2 +2024-09-17 03:04:00,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.82 vs. limit=15.0 +2024-09-17 03:04:11,213 INFO [train.py:1198] (0/2) Epoch 8, batch 1700, loss[loss=0.2401, ctc_loss=0.1741, cr_loss=0.3908, attn_decoder_loss=0.2387, over 29565.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1883, cr_loss=0.4182, attn_decoder_loss=0.2757, over 5779091.30 frames. ], batch size: 69, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:04:37,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=133540.0, ans=0.1 +2024-09-17 03:04:38,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=133540.0, ans=0.125 +2024-09-17 03:04:48,639 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.44 vs. limit=15.0 +2024-09-17 03:05:20,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=133660.0, ans=0.2 +2024-09-17 03:05:23,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=133660.0, ans=0.0 +2024-09-17 03:05:23,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.39 vs. limit=15.0 +2024-09-17 03:05:24,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=133660.0, ans=0.2 +2024-09-17 03:05:27,787 INFO [train.py:1198] (0/2) Epoch 8, batch 1750, loss[loss=0.2489, ctc_loss=0.1679, cr_loss=0.3929, attn_decoder_loss=0.2492, over 29365.00 frames. ], tot_loss[loss=0.2749, ctc_loss=0.1879, cr_loss=0.4173, attn_decoder_loss=0.2753, over 5788189.72 frames. ], batch size: 67, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:05:48,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=133740.0, ans=0.025 +2024-09-17 03:05:55,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.904e+01 9.818e+01 1.049e+02 1.183e+02 2.492e+02, threshold=2.098e+02, percent-clipped=1.0 +2024-09-17 03:06:00,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=133780.0, ans=0.2 +2024-09-17 03:06:00,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.39 vs. limit=15.0 +2024-09-17 03:06:04,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=133780.0, ans=0.0 +2024-09-17 03:06:31,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=133820.0, ans=0.125 +2024-09-17 03:06:44,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=133860.0, ans=0.0 +2024-09-17 03:06:48,889 INFO [train.py:1198] (0/2) Epoch 8, batch 1800, loss[loss=0.2854, ctc_loss=0.1957, cr_loss=0.4039, attn_decoder_loss=0.2864, over 29676.00 frames. ], tot_loss[loss=0.2753, ctc_loss=0.1886, cr_loss=0.4178, attn_decoder_loss=0.2757, over 5790435.38 frames. ], batch size: 83, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:06:58,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=133900.0, ans=0.2 +2024-09-17 03:07:11,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-17 03:07:15,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=133940.0, ans=0.0 +2024-09-17 03:07:15,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=133940.0, ans=0.125 +2024-09-17 03:07:21,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=133980.0, ans=0.0 +2024-09-17 03:07:27,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=133980.0, ans=0.125 +2024-09-17 03:08:05,025 INFO [train.py:1198] (0/2) Epoch 8, batch 1850, loss[loss=0.2797, ctc_loss=0.18, cr_loss=0.4053, attn_decoder_loss=0.2818, over 29648.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1882, cr_loss=0.4181, attn_decoder_loss=0.2754, over 5796224.17 frames. ], batch size: 86, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:08:06,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=134100.0, ans=0.0 +2024-09-17 03:08:06,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=134100.0, ans=0.5 +2024-09-17 03:08:08,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=134100.0, ans=0.2 +2024-09-17 03:08:12,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=134100.0, ans=0.125 +2024-09-17 03:08:30,753 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.292e+01 1.011e+02 1.086e+02 1.212e+02 2.686e+02, threshold=2.172e+02, percent-clipped=1.0 +2024-09-17 03:08:40,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=134180.0, ans=0.125 +2024-09-17 03:08:53,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=134220.0, ans=0.125 +2024-09-17 03:08:57,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=7.65 vs. limit=15.0 +2024-09-17 03:09:03,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=134220.0, ans=0.2 +2024-09-17 03:09:05,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=134260.0, ans=0.0 +2024-09-17 03:09:20,861 INFO [train.py:1198] (0/2) Epoch 8, batch 1900, loss[loss=0.2887, ctc_loss=0.2017, cr_loss=0.4578, attn_decoder_loss=0.2882, over 29712.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1885, cr_loss=0.419, attn_decoder_loss=0.276, over 5803448.23 frames. ], batch size: 89, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:09:37,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=134340.0, ans=0.125 +2024-09-17 03:09:46,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=134340.0, ans=0.0 +2024-09-17 03:09:49,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=134340.0, ans=0.125 +2024-09-17 03:10:05,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=134380.0, ans=0.2 +2024-09-17 03:10:28,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=134460.0, ans=0.125 +2024-09-17 03:10:41,890 INFO [train.py:1198] (0/2) Epoch 8, batch 1950, loss[loss=0.2663, ctc_loss=0.1798, cr_loss=0.3935, attn_decoder_loss=0.2671, over 29438.00 frames. ], tot_loss[loss=0.2765, ctc_loss=0.1891, cr_loss=0.4205, attn_decoder_loss=0.2768, over 5818462.75 frames. ], batch size: 78, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:10:45,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=134500.0, ans=0.125 +2024-09-17 03:11:07,572 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.49 vs. limit=10.0 +2024-09-17 03:11:09,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.585e+01 1.007e+02 1.092e+02 1.214e+02 3.508e+02, threshold=2.184e+02, percent-clipped=3.0 +2024-09-17 03:11:17,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=134580.0, ans=0.025 +2024-09-17 03:11:22,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=134580.0, ans=0.125 +2024-09-17 03:11:32,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=134620.0, ans=0.2 +2024-09-17 03:11:49,239 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:11:57,948 INFO [train.py:1198] (0/2) Epoch 8, batch 2000, loss[loss=0.2372, ctc_loss=0.1498, cr_loss=0.3641, attn_decoder_loss=0.2388, over 29316.00 frames. ], tot_loss[loss=0.277, ctc_loss=0.1897, cr_loss=0.4208, attn_decoder_loss=0.2773, over 5794632.01 frames. ], batch size: 67, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:12:04,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=134700.0, ans=0.125 +2024-09-17 03:12:10,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=134700.0, ans=0.025 +2024-09-17 03:12:19,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=134740.0, ans=0.025 +2024-09-17 03:12:25,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.69 vs. limit=15.0 +2024-09-17 03:12:31,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=134780.0, ans=0.125 +2024-09-17 03:12:35,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=134780.0, ans=0.125 +2024-09-17 03:12:41,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=134780.0, ans=0.125 +2024-09-17 03:12:48,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=134820.0, ans=0.0 +2024-09-17 03:12:52,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=134820.0, ans=0.0 +2024-09-17 03:13:11,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=134860.0, ans=0.0 +2024-09-17 03:13:14,528 INFO [train.py:1198] (0/2) Epoch 8, batch 2050, loss[loss=0.2473, ctc_loss=0.1633, cr_loss=0.3971, attn_decoder_loss=0.2478, over 29439.00 frames. ], tot_loss[loss=0.2759, ctc_loss=0.1888, cr_loss=0.4196, attn_decoder_loss=0.2763, over 5787964.89 frames. ], batch size: 70, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:13:23,592 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:13:25,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=134900.0, ans=0.0 +2024-09-17 03:13:37,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=12.0 +2024-09-17 03:13:45,831 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.747e+01 9.821e+01 1.060e+02 1.158e+02 2.378e+02, threshold=2.119e+02, percent-clipped=1.0 +2024-09-17 03:13:53,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=134980.0, ans=0.95 +2024-09-17 03:14:17,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=135020.0, ans=0.125 +2024-09-17 03:14:21,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=135060.0, ans=0.125 +2024-09-17 03:14:31,516 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.80 vs. limit=22.5 +2024-09-17 03:14:35,271 INFO [train.py:1198] (0/2) Epoch 8, batch 2100, loss[loss=0.268, ctc_loss=0.1784, cr_loss=0.4428, attn_decoder_loss=0.2682, over 29759.00 frames. ], tot_loss[loss=0.2751, ctc_loss=0.1877, cr_loss=0.4182, attn_decoder_loss=0.2755, over 5799100.71 frames. ], batch size: 81, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:14:52,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.87 vs. limit=15.0 +2024-09-17 03:15:02,999 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.24 vs. limit=22.5 +2024-09-17 03:15:31,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=135220.0, ans=0.125 +2024-09-17 03:15:50,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=135300.0, ans=0.1 +2024-09-17 03:15:51,537 INFO [train.py:1198] (0/2) Epoch 8, batch 2150, loss[loss=0.2569, ctc_loss=0.1708, cr_loss=0.4024, attn_decoder_loss=0.2575, over 29415.00 frames. ], tot_loss[loss=0.2741, ctc_loss=0.1864, cr_loss=0.4169, attn_decoder_loss=0.2746, over 5813709.10 frames. ], batch size: 78, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:16:10,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=135340.0, ans=0.125 +2024-09-17 03:16:21,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=135380.0, ans=0.125 +2024-09-17 03:16:22,341 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.928e+01 9.784e+01 1.043e+02 1.111e+02 1.443e+02, threshold=2.086e+02, percent-clipped=0.0 +2024-09-17 03:16:30,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=135380.0, ans=0.1 +2024-09-17 03:16:31,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=135380.0, ans=0.025 +2024-09-17 03:16:48,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=135420.0, ans=0.125 +2024-09-17 03:16:54,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=135460.0, ans=0.125 +2024-09-17 03:17:02,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=135460.0, ans=0.125 +2024-09-17 03:17:06,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=135500.0, ans=0.1 +2024-09-17 03:17:07,882 INFO [train.py:1198] (0/2) Epoch 8, batch 2200, loss[loss=0.2876, ctc_loss=0.193, cr_loss=0.4384, attn_decoder_loss=0.2883, over 29622.00 frames. ], tot_loss[loss=0.2745, ctc_loss=0.1871, cr_loss=0.4179, attn_decoder_loss=0.2749, over 5810736.58 frames. ], batch size: 86, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:17:15,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=135500.0, ans=0.125 +2024-09-17 03:17:17,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=135500.0, ans=0.0 +2024-09-17 03:17:35,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=135540.0, ans=0.125 +2024-09-17 03:17:42,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=135580.0, ans=0.125 +2024-09-17 03:17:55,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=135620.0, ans=0.125 +2024-09-17 03:18:15,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=135660.0, ans=0.125 +2024-09-17 03:18:22,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=135660.0, ans=0.1 +2024-09-17 03:18:28,897 INFO [train.py:1198] (0/2) Epoch 8, batch 2250, loss[loss=0.2732, ctc_loss=0.1786, cr_loss=0.39, attn_decoder_loss=0.2751, over 29699.00 frames. ], tot_loss[loss=0.2741, ctc_loss=0.1865, cr_loss=0.4164, attn_decoder_loss=0.2746, over 5810439.11 frames. ], batch size: 82, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:18:44,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=135740.0, ans=0.0 +2024-09-17 03:18:47,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=135740.0, ans=0.025 +2024-09-17 03:18:53,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=135740.0, ans=0.0 +2024-09-17 03:18:54,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.93 vs. limit=15.0 +2024-09-17 03:19:00,614 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.680e+01 9.920e+01 1.107e+02 1.209e+02 3.496e+02, threshold=2.214e+02, percent-clipped=1.0 +2024-09-17 03:19:04,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=135780.0, ans=0.125 +2024-09-17 03:19:09,269 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.30 vs. limit=15.0 +2024-09-17 03:19:10,933 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.15 vs. limit=15.0 +2024-09-17 03:19:21,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.08 vs. limit=15.0 +2024-09-17 03:19:22,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=135820.0, ans=0.0 +2024-09-17 03:19:29,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=135860.0, ans=0.125 +2024-09-17 03:19:42,485 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.85 vs. limit=15.0 +2024-09-17 03:19:44,834 INFO [train.py:1198] (0/2) Epoch 8, batch 2300, loss[loss=0.2535, ctc_loss=0.1703, cr_loss=0.4259, attn_decoder_loss=0.2533, over 29330.00 frames. ], tot_loss[loss=0.2729, ctc_loss=0.1851, cr_loss=0.4149, attn_decoder_loss=0.2734, over 5798938.31 frames. ], batch size: 71, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:20:49,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.75 vs. limit=15.0 +2024-09-17 03:21:01,311 INFO [train.py:1198] (0/2) Epoch 8, batch 2350, loss[loss=0.2918, ctc_loss=0.1987, cr_loss=0.4678, attn_decoder_loss=0.2917, over 29677.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.1853, cr_loss=0.4154, attn_decoder_loss=0.2736, over 5805266.74 frames. ], batch size: 83, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:21:03,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=136100.0, ans=0.1 +2024-09-17 03:21:19,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=136140.0, ans=0.125 +2024-09-17 03:21:29,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=136140.0, ans=0.125 +2024-09-17 03:21:37,145 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.392e+01 1.038e+02 1.165e+02 1.369e+02 2.325e+02, threshold=2.330e+02, percent-clipped=1.0 +2024-09-17 03:21:51,863 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.78 vs. limit=12.0 +2024-09-17 03:22:15,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=136260.0, ans=0.125 +2024-09-17 03:22:19,683 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.50 vs. limit=6.0 +2024-09-17 03:22:21,878 INFO [train.py:1198] (0/2) Epoch 8, batch 2400, loss[loss=0.2598, ctc_loss=0.1781, cr_loss=0.3989, attn_decoder_loss=0.26, over 29559.00 frames. ], tot_loss[loss=0.2735, ctc_loss=0.1857, cr_loss=0.4156, attn_decoder_loss=0.274, over 5808502.42 frames. ], batch size: 76, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:22:28,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=136300.0, ans=0.0 +2024-09-17 03:23:04,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=136380.0, ans=0.0 +2024-09-17 03:23:13,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=136420.0, ans=0.0 +2024-09-17 03:23:22,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=136460.0, ans=0.2 +2024-09-17 03:23:34,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=136460.0, ans=0.125 +2024-09-17 03:23:37,608 INFO [train.py:1198] (0/2) Epoch 8, batch 2450, loss[loss=0.2763, ctc_loss=0.1833, cr_loss=0.4311, attn_decoder_loss=0.2771, over 29723.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1869, cr_loss=0.4169, attn_decoder_loss=0.2752, over 5784961.70 frames. ], batch size: 82, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:23:39,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=136500.0, ans=0.0 +2024-09-17 03:24:11,927 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.397e+01 1.019e+02 1.082e+02 1.263e+02 3.288e+02, threshold=2.163e+02, percent-clipped=1.0 +2024-09-17 03:24:24,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=136620.0, ans=0.125 +2024-09-17 03:24:32,659 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.48 vs. limit=15.0 +2024-09-17 03:24:36,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=136660.0, ans=0.0 +2024-09-17 03:24:39,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=136660.0, ans=0.125 +2024-09-17 03:24:45,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=136660.0, ans=0.125 +2024-09-17 03:24:52,939 INFO [train.py:1198] (0/2) Epoch 8, batch 2500, loss[loss=0.2729, ctc_loss=0.1823, cr_loss=0.4075, attn_decoder_loss=0.274, over 29643.00 frames. ], tot_loss[loss=0.2749, ctc_loss=0.1872, cr_loss=0.4176, attn_decoder_loss=0.2753, over 5794684.02 frames. ], batch size: 86, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:25:06,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.71 vs. limit=15.0 +2024-09-17 03:25:28,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=136780.0, ans=0.025 +2024-09-17 03:25:32,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=136780.0, ans=0.2 +2024-09-17 03:25:50,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=136820.0, ans=0.1 +2024-09-17 03:25:59,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=136860.0, ans=0.0 +2024-09-17 03:26:02,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.86 vs. limit=15.0 +2024-09-17 03:26:13,258 INFO [train.py:1198] (0/2) Epoch 8, batch 2550, loss[loss=0.2406, ctc_loss=0.1607, cr_loss=0.3813, attn_decoder_loss=0.241, over 29295.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.187, cr_loss=0.4173, attn_decoder_loss=0.2751, over 5797661.63 frames. ], batch size: 67, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:26:49,205 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.639e+01 1.024e+02 1.084e+02 1.212e+02 4.526e+02, threshold=2.168e+02, percent-clipped=2.0 +2024-09-17 03:27:19,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.63 vs. limit=15.0 +2024-09-17 03:27:28,808 INFO [train.py:1198] (0/2) Epoch 8, batch 2600, loss[loss=0.2625, ctc_loss=0.1712, cr_loss=0.399, attn_decoder_loss=0.2638, over 29457.00 frames. ], tot_loss[loss=0.2753, ctc_loss=0.1875, cr_loss=0.4187, attn_decoder_loss=0.2757, over 5793752.23 frames. ], batch size: 78, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:27:45,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=137140.0, ans=0.2 +2024-09-17 03:27:57,475 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:28:01,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=137180.0, ans=0.0 +2024-09-17 03:28:20,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=137220.0, ans=0.0 +2024-09-17 03:28:32,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=137260.0, ans=0.1 +2024-09-17 03:28:35,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.51 vs. limit=15.0 +2024-09-17 03:28:43,790 INFO [train.py:1198] (0/2) Epoch 8, batch 2650, loss[loss=0.2955, ctc_loss=0.208, cr_loss=0.4395, attn_decoder_loss=0.2954, over 29343.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1877, cr_loss=0.4197, attn_decoder_loss=0.2762, over 5800782.28 frames. ], batch size: 100, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:28:47,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=137300.0, ans=0.0 +2024-09-17 03:28:50,910 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.23 vs. limit=22.5 +2024-09-17 03:28:55,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=137300.0, ans=0.025 +2024-09-17 03:28:56,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=137300.0, ans=0.125 +2024-09-17 03:28:58,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=137300.0, ans=0.0 +2024-09-17 03:29:23,304 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.237e+01 1.027e+02 1.110e+02 1.218e+02 2.254e+02, threshold=2.220e+02, percent-clipped=2.0 +2024-09-17 03:29:25,786 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.26 vs. limit=22.5 +2024-09-17 03:29:32,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=137420.0, ans=0.1 +2024-09-17 03:29:38,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.06 vs. limit=22.5 +2024-09-17 03:30:02,785 INFO [train.py:1198] (0/2) Epoch 8, batch 2700, loss[loss=0.2887, ctc_loss=0.1908, cr_loss=0.4502, attn_decoder_loss=0.2896, over 29538.00 frames. ], tot_loss[loss=0.2764, ctc_loss=0.1884, cr_loss=0.4206, attn_decoder_loss=0.2768, over 5797346.09 frames. ], batch size: 87, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:30:06,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=137500.0, ans=0.025 +2024-09-17 03:30:09,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=137500.0, ans=0.1 +2024-09-17 03:30:13,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=137500.0, ans=0.1 +2024-09-17 03:30:52,043 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:31:02,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=137660.0, ans=0.0 +2024-09-17 03:31:13,661 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=15.0 +2024-09-17 03:31:18,721 INFO [train.py:1198] (0/2) Epoch 8, batch 2750, loss[loss=0.2619, ctc_loss=0.1782, cr_loss=0.4189, attn_decoder_loss=0.2619, over 29524.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1871, cr_loss=0.4181, attn_decoder_loss=0.2752, over 5795411.61 frames. ], batch size: 75, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:31:31,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=137700.0, ans=0.0 +2024-09-17 03:31:37,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=137740.0, ans=0.025 +2024-09-17 03:31:53,030 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=16.22 vs. limit=22.5 +2024-09-17 03:31:56,143 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.009e+02 1.091e+02 1.195e+02 3.553e+02, threshold=2.183e+02, percent-clipped=1.0 +2024-09-17 03:32:05,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=137820.0, ans=0.125 +2024-09-17 03:32:34,231 INFO [train.py:1198] (0/2) Epoch 8, batch 2800, loss[loss=0.321, ctc_loss=0.2713, cr_loss=0.4452, attn_decoder_loss=0.3166, over 20472.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1882, cr_loss=0.4187, attn_decoder_loss=0.2758, over 5777584.66 frames. ], batch size: 209, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:32:45,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=137900.0, ans=0.0 +2024-09-17 03:33:00,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=137940.0, ans=0.1 +2024-09-17 03:33:11,661 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.90 vs. limit=15.0 +2024-09-17 03:33:17,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.63 vs. limit=15.0 +2024-09-17 03:33:34,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.79 vs. limit=15.0 +2024-09-17 03:33:53,380 INFO [train.py:1198] (0/2) Epoch 8, batch 2850, loss[loss=0.2684, ctc_loss=0.1809, cr_loss=0.4146, attn_decoder_loss=0.2689, over 29516.00 frames. ], tot_loss[loss=0.2764, ctc_loss=0.1893, cr_loss=0.4203, attn_decoder_loss=0.2767, over 5763441.95 frames. ], batch size: 77, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:34:16,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=138140.0, ans=0.0 +2024-09-17 03:34:34,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 1.050e+02 1.191e+02 1.407e+02 3.981e+02, threshold=2.382e+02, percent-clipped=5.0 +2024-09-17 03:34:51,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys.whitening_limit, batch_count=138220.0, ans=6.0 +2024-09-17 03:34:55,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=138260.0, ans=0.5 +2024-09-17 03:34:57,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.18 vs. limit=10.0 +2024-09-17 03:35:06,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=138260.0, ans=0.0 +2024-09-17 03:35:09,045 INFO [train.py:1198] (0/2) Epoch 8, batch 2900, loss[loss=0.2657, ctc_loss=0.1709, cr_loss=0.3983, attn_decoder_loss=0.2674, over 29437.00 frames. ], tot_loss[loss=0.2773, ctc_loss=0.1896, cr_loss=0.422, attn_decoder_loss=0.2777, over 5788952.23 frames. ], batch size: 79, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:36:13,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.28 vs. limit=22.5 +2024-09-17 03:36:18,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=138460.0, ans=0.125 +2024-09-17 03:36:23,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=138500.0, ans=0.125 +2024-09-17 03:36:24,465 INFO [train.py:1198] (0/2) Epoch 8, batch 2950, loss[loss=0.2626, ctc_loss=0.1669, cr_loss=0.385, attn_decoder_loss=0.2647, over 29533.00 frames. ], tot_loss[loss=0.2757, ctc_loss=0.1883, cr_loss=0.4196, attn_decoder_loss=0.2761, over 5781903.50 frames. ], batch size: 75, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:37:00,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=138580.0, ans=0.0 +2024-09-17 03:37:08,774 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.343e+01 1.007e+02 1.102e+02 1.224e+02 2.215e+02, threshold=2.205e+02, percent-clipped=0.0 +2024-09-17 03:37:26,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.58 vs. limit=22.5 +2024-09-17 03:37:42,131 INFO [train.py:1198] (0/2) Epoch 8, batch 3000, loss[loss=0.2782, ctc_loss=0.194, cr_loss=0.435, attn_decoder_loss=0.2779, over 29746.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1878, cr_loss=0.4192, attn_decoder_loss=0.276, over 5783130.90 frames. ], batch size: 81, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:37:42,132 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 03:38:01,065 INFO [train.py:1230] (0/2) Epoch 8, validation: loss=0.2156, ctc_loss=0.0545, cr_loss=4.305e-15, attn_decoder_loss=0.2335, over 944034.00 frames. +2024-09-17 03:38:01,066 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 03:38:02,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=138700.0, ans=0.025 +2024-09-17 03:38:04,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=138700.0, ans=0.04949747468305833 +2024-09-17 03:38:06,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=138700.0, ans=0.125 +2024-09-17 03:38:10,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=138700.0, ans=0.125 +2024-09-17 03:38:13,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=138700.0, ans=0.0 +2024-09-17 03:38:23,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=138740.0, ans=6.0 +2024-09-17 03:38:24,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=138740.0, ans=0.1 +2024-09-17 03:39:01,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=138860.0, ans=0.125 +2024-09-17 03:39:13,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=138860.0, ans=0.07 +2024-09-17 03:39:16,488 INFO [train.py:1198] (0/2) Epoch 8, batch 3050, loss[loss=0.2582, ctc_loss=0.1715, cr_loss=0.4009, attn_decoder_loss=0.259, over 29551.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1882, cr_loss=0.4193, attn_decoder_loss=0.2764, over 5776967.42 frames. ], batch size: 76, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:39:36,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=138940.0, ans=0.125 +2024-09-17 03:39:40,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=138940.0, ans=0.125 +2024-09-17 03:39:58,627 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.688e+01 1.026e+02 1.087e+02 1.186e+02 2.791e+02, threshold=2.173e+02, percent-clipped=1.0 +2024-09-17 03:40:03,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=139020.0, ans=0.125 +2024-09-17 03:40:15,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=139020.0, ans=0.125 +2024-09-17 03:40:24,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=139060.0, ans=0.0 +2024-09-17 03:40:29,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=139060.0, ans=0.125 +2024-09-17 03:40:30,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=139060.0, ans=0.04949747468305833 +2024-09-17 03:40:32,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=139100.0, ans=0.09899494936611666 +2024-09-17 03:40:33,616 INFO [train.py:1198] (0/2) Epoch 8, batch 3100, loss[loss=0.3015, ctc_loss=0.2094, cr_loss=0.4319, attn_decoder_loss=0.3022, over 29287.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1876, cr_loss=0.4181, attn_decoder_loss=0.2756, over 5776640.30 frames. ], batch size: 100, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:40:59,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=139140.0, ans=0.09899494936611666 +2024-09-17 03:41:08,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=139180.0, ans=0.1 +2024-09-17 03:41:09,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=24.19 vs. limit=22.5 +2024-09-17 03:41:33,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=139220.0, ans=0.95 +2024-09-17 03:41:45,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=139260.0, ans=0.1 +2024-09-17 03:41:51,268 INFO [train.py:1198] (0/2) Epoch 8, batch 3150, loss[loss=0.292, ctc_loss=0.202, cr_loss=0.4581, attn_decoder_loss=0.2918, over 28894.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1874, cr_loss=0.418, attn_decoder_loss=0.2754, over 5783598.22 frames. ], batch size: 104, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:41:54,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=139300.0, ans=0.1 +2024-09-17 03:42:17,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=139340.0, ans=0.2 +2024-09-17 03:42:29,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=139380.0, ans=0.125 +2024-09-17 03:42:29,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.62 vs. limit=22.5 +2024-09-17 03:42:30,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=139380.0, ans=0.2 +2024-09-17 03:42:36,631 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.236e+01 1.018e+02 1.127e+02 1.309e+02 2.778e+02, threshold=2.254e+02, percent-clipped=1.0 +2024-09-17 03:42:45,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=139420.0, ans=0.0 +2024-09-17 03:42:47,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=139420.0, ans=0.125 +2024-09-17 03:42:59,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=139460.0, ans=0.125 +2024-09-17 03:43:06,665 INFO [train.py:1198] (0/2) Epoch 8, batch 3200, loss[loss=0.2731, ctc_loss=0.1815, cr_loss=0.4183, attn_decoder_loss=0.274, over 29414.00 frames. ], tot_loss[loss=0.2743, ctc_loss=0.1866, cr_loss=0.4168, attn_decoder_loss=0.2748, over 5794481.96 frames. ], batch size: 79, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:43:32,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=139540.0, ans=0.0 +2024-09-17 03:43:57,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=139620.0, ans=0.125 +2024-09-17 03:44:02,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=139620.0, ans=0.07 +2024-09-17 03:44:06,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=139620.0, ans=0.0 +2024-09-17 03:44:14,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=139660.0, ans=0.2 +2024-09-17 03:44:18,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=139660.0, ans=0.2 +2024-09-17 03:44:24,504 INFO [train.py:1198] (0/2) Epoch 8, batch 3250, loss[loss=0.284, ctc_loss=0.195, cr_loss=0.4385, attn_decoder_loss=0.2841, over 29712.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1871, cr_loss=0.4182, attn_decoder_loss=0.2755, over 5800033.22 frames. ], batch size: 84, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:44:48,818 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:44:52,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.61 vs. limit=15.0 +2024-09-17 03:44:59,516 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.60 vs. limit=10.0 +2024-09-17 03:45:08,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=139820.0, ans=0.0 +2024-09-17 03:45:09,633 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.457e+01 9.664e+01 1.027e+02 1.100e+02 2.131e+02, threshold=2.054e+02, percent-clipped=0.0 +2024-09-17 03:45:11,643 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:45:32,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=139860.0, ans=0.125 +2024-09-17 03:45:41,708 INFO [train.py:1198] (0/2) Epoch 8, batch 3300, loss[loss=0.301, ctc_loss=0.2091, cr_loss=0.4643, attn_decoder_loss=0.3009, over 28325.00 frames. ], tot_loss[loss=0.2737, ctc_loss=0.1862, cr_loss=0.4166, attn_decoder_loss=0.2742, over 5797312.70 frames. ], batch size: 111, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:45:47,182 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.53 vs. limit=6.0 +2024-09-17 03:45:55,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=139940.0, ans=0.0 +2024-09-17 03:46:04,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=139940.0, ans=0.125 +2024-09-17 03:46:15,291 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:46:39,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=140020.0, ans=0.125 +2024-09-17 03:46:51,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=140060.0, ans=0.0 +2024-09-17 03:46:57,288 INFO [train.py:1198] (0/2) Epoch 8, batch 3350, loss[loss=0.2967, ctc_loss=0.2077, cr_loss=0.4401, attn_decoder_loss=0.2968, over 29018.00 frames. ], tot_loss[loss=0.2748, ctc_loss=0.1875, cr_loss=0.4176, attn_decoder_loss=0.2753, over 5776061.67 frames. ], batch size: 104, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:47:04,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=140100.0, ans=0.07 +2024-09-17 03:47:21,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=140140.0, ans=0.2 +2024-09-17 03:47:22,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.whiten.whitening_limit, batch_count=140140.0, ans=12.0 +2024-09-17 03:47:32,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=140180.0, ans=0.5 +2024-09-17 03:47:43,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=140220.0, ans=0.125 +2024-09-17 03:47:43,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=140220.0, ans=0.125 +2024-09-17 03:47:46,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=140220.0, ans=0.04949747468305833 +2024-09-17 03:47:47,553 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.416e+01 1.028e+02 1.095e+02 1.236e+02 5.561e+02, threshold=2.191e+02, percent-clipped=3.0 +2024-09-17 03:47:54,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=140220.0, ans=0.1 +2024-09-17 03:47:56,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.95 vs. limit=15.0 +2024-09-17 03:48:04,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=140260.0, ans=0.1 +2024-09-17 03:48:14,970 INFO [train.py:1198] (0/2) Epoch 8, batch 3400, loss[loss=0.2377, ctc_loss=0.1569, cr_loss=0.3521, attn_decoder_loss=0.2388, over 29346.00 frames. ], tot_loss[loss=0.2745, ctc_loss=0.1871, cr_loss=0.4175, attn_decoder_loss=0.275, over 5768943.02 frames. ], batch size: 67, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:48:19,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=140300.0, ans=0.0 +2024-09-17 03:48:35,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=140340.0, ans=15.0 +2024-09-17 03:49:07,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=140420.0, ans=0.2 +2024-09-17 03:49:31,833 INFO [train.py:1198] (0/2) Epoch 8, batch 3450, loss[loss=0.2843, ctc_loss=0.1975, cr_loss=0.413, attn_decoder_loss=0.2847, over 28511.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1879, cr_loss=0.4188, attn_decoder_loss=0.2756, over 5777423.82 frames. ], batch size: 112, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:49:51,776 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:49:57,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=140540.0, ans=0.2 +2024-09-17 03:50:00,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=140580.0, ans=0.125 +2024-09-17 03:50:21,397 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.539e+01 1.005e+02 1.084e+02 1.145e+02 2.009e+02, threshold=2.168e+02, percent-clipped=0.0 +2024-09-17 03:50:38,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=140660.0, ans=0.2 +2024-09-17 03:50:47,169 INFO [train.py:1198] (0/2) Epoch 8, batch 3500, loss[loss=0.257, ctc_loss=0.1736, cr_loss=0.4132, attn_decoder_loss=0.2571, over 29329.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1875, cr_loss=0.4184, attn_decoder_loss=0.2751, over 5779003.89 frames. ], batch size: 71, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:50:50,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=140700.0, ans=0.125 +2024-09-17 03:50:52,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=140700.0, ans=0.0 +2024-09-17 03:51:18,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=140780.0, ans=0.125 +2024-09-17 03:51:41,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=140820.0, ans=0.1 +2024-09-17 03:51:42,064 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.85 vs. limit=12.0 +2024-09-17 03:51:50,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=140860.0, ans=0.1 +2024-09-17 03:51:53,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=140860.0, ans=0.125 +2024-09-17 03:51:58,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=140860.0, ans=0.125 +2024-09-17 03:52:01,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=140860.0, ans=0.125 +2024-09-17 03:52:03,852 INFO [train.py:1198] (0/2) Epoch 8, batch 3550, loss[loss=0.2885, ctc_loss=0.1972, cr_loss=0.4359, attn_decoder_loss=0.289, over 29712.00 frames. ], tot_loss[loss=0.2742, ctc_loss=0.1869, cr_loss=0.4177, attn_decoder_loss=0.2747, over 5784977.58 frames. ], batch size: 89, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:52:12,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=140900.0, ans=0.1 +2024-09-17 03:52:22,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.18 vs. limit=15.0 +2024-09-17 03:52:23,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=140940.0, ans=0.0 +2024-09-17 03:52:23,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=140940.0, ans=0.125 +2024-09-17 03:52:23,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=140940.0, ans=0.2 +2024-09-17 03:52:23,742 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.72 vs. limit=15.0 +2024-09-17 03:52:40,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=140980.0, ans=0.07 +2024-09-17 03:52:43,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.75 vs. limit=15.0 +2024-09-17 03:52:53,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.75 vs. limit=15.0 +2024-09-17 03:52:53,894 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.324e+01 1.017e+02 1.100e+02 1.203e+02 4.569e+02, threshold=2.200e+02, percent-clipped=1.0 +2024-09-17 03:53:01,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=141060.0, ans=0.125 +2024-09-17 03:53:02,128 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.94 vs. limit=15.0 +2024-09-17 03:53:06,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.63 vs. limit=22.5 +2024-09-17 03:53:17,658 INFO [train.py:1198] (0/2) Epoch 8, batch 3600, loss[loss=0.2705, ctc_loss=0.1836, cr_loss=0.4252, attn_decoder_loss=0.2707, over 29471.00 frames. ], tot_loss[loss=0.2739, ctc_loss=0.1862, cr_loss=0.417, attn_decoder_loss=0.2744, over 5793799.72 frames. ], batch size: 77, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:53:19,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=141100.0, ans=0.0 +2024-09-17 03:53:46,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=141180.0, ans=0.1 +2024-09-17 03:54:02,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=141220.0, ans=0.125 +2024-09-17 03:54:09,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=141220.0, ans=0.1 +2024-09-17 03:54:11,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=141220.0, ans=0.07 +2024-09-17 03:54:18,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.69 vs. limit=15.0 +2024-09-17 03:54:21,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=141260.0, ans=0.0 +2024-09-17 03:54:32,134 INFO [train.py:1198] (0/2) Epoch 8, batch 3650, loss[loss=0.3029, ctc_loss=0.2144, cr_loss=0.4606, attn_decoder_loss=0.3025, over 29531.00 frames. ], tot_loss[loss=0.2733, ctc_loss=0.1853, cr_loss=0.4161, attn_decoder_loss=0.2738, over 5795743.78 frames. ], batch size: 90, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:54:54,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=141340.0, ans=0.0 +2024-09-17 03:54:56,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=141340.0, ans=0.125 +2024-09-17 03:54:59,385 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:55:07,512 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.63 vs. limit=6.0 +2024-09-17 03:55:22,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.21 vs. limit=10.0 +2024-09-17 03:55:26,037 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.265e+01 1.005e+02 1.060e+02 1.174e+02 2.245e+02, threshold=2.119e+02, percent-clipped=1.0 +2024-09-17 03:55:33,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=141460.0, ans=0.1 +2024-09-17 03:55:47,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=141500.0, ans=0.125 +2024-09-17 03:55:48,544 INFO [train.py:1198] (0/2) Epoch 8, batch 3700, loss[loss=0.2778, ctc_loss=0.186, cr_loss=0.4217, attn_decoder_loss=0.2786, over 29720.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.185, cr_loss=0.4158, attn_decoder_loss=0.2737, over 5805537.72 frames. ], batch size: 84, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:55:51,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=141500.0, ans=0.125 +2024-09-17 03:56:03,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=141540.0, ans=0.1 +2024-09-17 03:56:18,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=141580.0, ans=0.125 +2024-09-17 03:56:21,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=141580.0, ans=0.1 +2024-09-17 03:56:22,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=141580.0, ans=0.07 +2024-09-17 03:56:40,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=141620.0, ans=0.2 +2024-09-17 03:56:58,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=141660.0, ans=0.0 +2024-09-17 03:57:02,878 INFO [train.py:1198] (0/2) Epoch 8, batch 3750, loss[loss=0.2463, ctc_loss=0.1655, cr_loss=0.4192, attn_decoder_loss=0.246, over 29315.00 frames. ], tot_loss[loss=0.2729, ctc_loss=0.1847, cr_loss=0.4163, attn_decoder_loss=0.2735, over 5808361.26 frames. ], batch size: 67, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:57:15,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.10 vs. limit=15.0 +2024-09-17 03:57:30,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=141740.0, ans=0.125 +2024-09-17 03:57:36,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.05 vs. limit=15.0 +2024-09-17 03:57:55,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=141820.0, ans=0.0 +2024-09-17 03:57:56,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.35 vs. limit=15.0 +2024-09-17 03:57:56,595 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.547e+01 9.777e+01 1.089e+02 1.271e+02 6.127e+02, threshold=2.178e+02, percent-clipped=4.0 +2024-09-17 03:58:04,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=141860.0, ans=0.2 +2024-09-17 03:58:09,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=141860.0, ans=0.025 +2024-09-17 03:58:10,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=141860.0, ans=0.025 +2024-09-17 03:58:18,896 INFO [train.py:1198] (0/2) Epoch 8, batch 3800, loss[loss=0.2914, ctc_loss=0.1992, cr_loss=0.4432, attn_decoder_loss=0.2918, over 29645.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.1848, cr_loss=0.4156, attn_decoder_loss=0.2737, over 5798818.12 frames. ], batch size: 86, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:58:28,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=141900.0, ans=0.09899494936611666 +2024-09-17 03:58:33,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=141940.0, ans=0.0 +2024-09-17 03:58:36,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=141940.0, ans=0.05 +2024-09-17 03:58:38,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=141940.0, ans=0.0 +2024-09-17 03:58:38,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=141940.0, ans=0.2 +2024-09-17 03:59:16,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=142060.0, ans=0.0 +2024-09-17 03:59:24,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=142060.0, ans=0.2 +2024-09-17 03:59:33,130 INFO [train.py:1198] (0/2) Epoch 8, batch 3850, loss[loss=0.2955, ctc_loss=0.1986, cr_loss=0.4216, attn_decoder_loss=0.2969, over 29309.00 frames. ], tot_loss[loss=0.2728, ctc_loss=0.1843, cr_loss=0.4153, attn_decoder_loss=0.2735, over 5812186.26 frames. ], batch size: 100, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:59:33,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=142100.0, ans=0.1 +2024-09-17 03:59:46,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=142140.0, ans=0.125 +2024-09-17 04:00:02,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=142180.0, ans=0.0 +2024-09-17 04:00:15,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.20 vs. limit=22.5 +2024-09-17 04:00:19,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=142220.0, ans=0.1 +2024-09-17 04:00:26,681 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.741e+01 9.765e+01 1.055e+02 1.135e+02 1.958e+02, threshold=2.110e+02, percent-clipped=1.0 +2024-09-17 04:00:40,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.53 vs. limit=15.0 +2024-09-17 04:00:47,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=142300.0, ans=0.1 +2024-09-17 04:00:48,846 INFO [train.py:1198] (0/2) Epoch 8, batch 3900, loss[loss=0.2865, ctc_loss=0.1824, cr_loss=0.4207, attn_decoder_loss=0.2888, over 29630.00 frames. ], tot_loss[loss=0.2734, ctc_loss=0.1846, cr_loss=0.4161, attn_decoder_loss=0.274, over 5816542.36 frames. ], batch size: 86, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 04:00:50,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=142300.0, ans=0.125 +2024-09-17 04:00:52,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=142300.0, ans=0.125 +2024-09-17 04:00:55,191 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:01:04,065 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:01:11,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=142340.0, ans=0.2 +2024-09-17 04:01:32,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=142420.0, ans=0.0 +2024-09-17 04:01:45,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=142420.0, ans=0.2 +2024-09-17 04:01:46,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=142460.0, ans=0.125 +2024-09-17 04:01:49,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=142460.0, ans=0.0 +2024-09-17 04:02:02,699 INFO [train.py:1198] (0/2) Epoch 8, batch 3950, loss[loss=0.2866, ctc_loss=0.1851, cr_loss=0.4135, attn_decoder_loss=0.2887, over 29474.00 frames. ], tot_loss[loss=0.2734, ctc_loss=0.1844, cr_loss=0.4166, attn_decoder_loss=0.274, over 5836000.69 frames. ], batch size: 97, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 04:02:06,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.78 vs. limit=15.0 +2024-09-17 04:02:12,726 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.57 vs. limit=22.5 +2024-09-17 04:02:19,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=142540.0, ans=0.125 +2024-09-17 04:02:22,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=142540.0, ans=0.2 +2024-09-17 04:02:32,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=142580.0, ans=0.0 +2024-09-17 04:02:58,361 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.061e+01 9.742e+01 1.045e+02 1.185e+02 2.599e+02, threshold=2.090e+02, percent-clipped=1.0 +2024-09-17 04:03:13,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.31 vs. limit=15.0 +2024-09-17 04:03:17,017 INFO [train.py:1198] (0/2) Epoch 8, batch 4000, loss[loss=0.2573, ctc_loss=0.1676, cr_loss=0.4027, attn_decoder_loss=0.2583, over 29505.00 frames. ], tot_loss[loss=0.2734, ctc_loss=0.1846, cr_loss=0.4168, attn_decoder_loss=0.274, over 5813588.09 frames. ], batch size: 74, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:03:26,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=142700.0, ans=0.1 +2024-09-17 04:03:36,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=142740.0, ans=0.1 +2024-09-17 04:04:04,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=142820.0, ans=0.125 +2024-09-17 04:04:09,519 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.52 vs. limit=10.0 +2024-09-17 04:04:30,885 INFO [train.py:1198] (0/2) Epoch 8, batch 4050, loss[loss=0.3076, ctc_loss=0.2408, cr_loss=0.4279, attn_decoder_loss=0.3055, over 20128.00 frames. ], tot_loss[loss=0.2735, ctc_loss=0.1848, cr_loss=0.4165, attn_decoder_loss=0.2741, over 5796498.80 frames. ], batch size: 209, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:04:45,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=142940.0, ans=0.1 +2024-09-17 04:05:20,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=143020.0, ans=0.125 +2024-09-17 04:05:23,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=143020.0, ans=0.125 +2024-09-17 04:05:29,425 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.245e+01 1.069e+02 1.234e+02 1.438e+02 3.012e+02, threshold=2.468e+02, percent-clipped=5.0 +2024-09-17 04:05:37,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.18 vs. limit=15.0 +2024-09-17 04:05:45,646 INFO [train.py:1198] (0/2) Epoch 8, batch 4100, loss[loss=0.2742, ctc_loss=0.1813, cr_loss=0.423, attn_decoder_loss=0.2751, over 29501.00 frames. ], tot_loss[loss=0.2733, ctc_loss=0.1845, cr_loss=0.4161, attn_decoder_loss=0.2739, over 5791291.76 frames. ], batch size: 90, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:05:45,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=143100.0, ans=0.0 +2024-09-17 04:05:47,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=143100.0, ans=0.1 +2024-09-17 04:06:21,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=143180.0, ans=0.05 +2024-09-17 04:06:42,513 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.73 vs. limit=22.5 +2024-09-17 04:06:48,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.68 vs. limit=12.0 +2024-09-17 04:06:59,231 INFO [train.py:1198] (0/2) Epoch 8, batch 4150, loss[loss=0.2736, ctc_loss=0.1861, cr_loss=0.4512, attn_decoder_loss=0.2733, over 29506.00 frames. ], tot_loss[loss=0.2732, ctc_loss=0.1844, cr_loss=0.416, attn_decoder_loss=0.2738, over 5796746.70 frames. ], batch size: 77, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:07:15,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=143340.0, ans=0.125 +2024-09-17 04:07:38,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=143380.0, ans=0.125 +2024-09-17 04:07:39,031 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.55 vs. limit=15.0 +2024-09-17 04:07:39,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.32 vs. limit=6.0 +2024-09-17 04:07:56,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=143420.0, ans=0.1 +2024-09-17 04:07:57,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=143460.0, ans=10.0 +2024-09-17 04:07:58,960 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.371e+01 9.814e+01 1.059e+02 1.146e+02 1.859e+02, threshold=2.118e+02, percent-clipped=0.0 +2024-09-17 04:08:06,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=143460.0, ans=0.05 +2024-09-17 04:08:09,599 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:08:13,622 INFO [train.py:1198] (0/2) Epoch 8, batch 4200, loss[loss=0.2944, ctc_loss=0.2036, cr_loss=0.4266, attn_decoder_loss=0.295, over 29488.00 frames. ], tot_loss[loss=0.2739, ctc_loss=0.185, cr_loss=0.4172, attn_decoder_loss=0.2745, over 5799359.61 frames. ], batch size: 90, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:08:33,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.85 vs. limit=15.0 +2024-09-17 04:08:47,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=143580.0, ans=0.025 +2024-09-17 04:09:06,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=143620.0, ans=0.1 +2024-09-17 04:09:27,761 INFO [train.py:1198] (0/2) Epoch 8, batch 4250, loss[loss=0.2429, ctc_loss=0.1583, cr_loss=0.3585, attn_decoder_loss=0.2444, over 29534.00 frames. ], tot_loss[loss=0.2739, ctc_loss=0.1847, cr_loss=0.4162, attn_decoder_loss=0.2745, over 5805565.48 frames. ], batch size: 74, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:09:38,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=143700.0, ans=0.1 +2024-09-17 04:09:39,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=143700.0, ans=0.1 +2024-09-17 04:09:48,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=143740.0, ans=0.5 +2024-09-17 04:09:56,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.59 vs. limit=10.0 +2024-09-17 04:10:25,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=143860.0, ans=0.035 +2024-09-17 04:10:27,815 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.334e+01 1.014e+02 1.108e+02 1.214e+02 2.997e+02, threshold=2.217e+02, percent-clipped=4.0 +2024-09-17 04:10:31,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.21 vs. limit=15.0 +2024-09-17 04:10:32,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=143860.0, ans=0.0 +2024-09-17 04:10:36,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=143860.0, ans=0.1 +2024-09-17 04:10:40,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.25 vs. limit=22.5 +2024-09-17 04:10:41,039 INFO [train.py:1198] (0/2) Epoch 8, batch 4300, loss[loss=0.2809, ctc_loss=0.1857, cr_loss=0.4374, attn_decoder_loss=0.2818, over 29505.00 frames. ], tot_loss[loss=0.2737, ctc_loss=0.1845, cr_loss=0.4156, attn_decoder_loss=0.2744, over 5794779.35 frames. ], batch size: 87, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:10:43,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.08 vs. limit=22.5 +2024-09-17 04:10:44,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=143900.0, ans=0.125 +2024-09-17 04:10:47,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=143900.0, ans=0.0 +2024-09-17 04:11:02,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=143940.0, ans=0.1 +2024-09-17 04:11:17,807 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-36000.pt +2024-09-17 04:11:36,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=144020.0, ans=0.0 +2024-09-17 04:11:48,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=144060.0, ans=0.0 +2024-09-17 04:11:49,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=144060.0, ans=0.0 +2024-09-17 04:11:49,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=144060.0, ans=0.125 +2024-09-17 04:11:52,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=144060.0, ans=0.04949747468305833 +2024-09-17 04:11:54,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.09 vs. limit=15.0 +2024-09-17 04:12:02,548 INFO [train.py:1198] (0/2) Epoch 8, batch 4350, loss[loss=0.2984, ctc_loss=0.2102, cr_loss=0.4612, attn_decoder_loss=0.2979, over 29478.00 frames. ], tot_loss[loss=0.2774, ctc_loss=0.1876, cr_loss=0.4211, attn_decoder_loss=0.2781, over 5796484.78 frames. ], batch size: 97, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:12:26,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=144140.0, ans=0.1 +2024-09-17 04:12:46,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=144220.0, ans=0.2 +2024-09-17 04:12:50,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=144220.0, ans=0.125 +2024-09-17 04:13:03,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=144260.0, ans=0.2 +2024-09-17 04:13:04,677 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.658e+01 1.032e+02 1.110e+02 1.170e+02 3.272e+02, threshold=2.221e+02, percent-clipped=1.0 +2024-09-17 04:13:09,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=144260.0, ans=0.0 +2024-09-17 04:13:16,186 INFO [train.py:1198] (0/2) Epoch 8, batch 4400, loss[loss=0.29, ctc_loss=0.2089, cr_loss=0.4593, attn_decoder_loss=0.2888, over 27229.00 frames. ], tot_loss[loss=0.28, ctc_loss=0.1902, cr_loss=0.424, attn_decoder_loss=0.2806, over 5767387.90 frames. ], batch size: 124, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:13:17,113 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-17 04:13:38,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=144340.0, ans=0.035 +2024-09-17 04:13:47,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=144380.0, ans=0.0 +2024-09-17 04:14:00,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=144420.0, ans=0.125 +2024-09-17 04:14:18,933 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.25 vs. limit=10.0 +2024-09-17 04:14:21,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=144460.0, ans=0.1 +2024-09-17 04:14:29,767 INFO [train.py:1198] (0/2) Epoch 8, batch 4450, loss[loss=0.3193, ctc_loss=0.2617, cr_loss=0.4495, attn_decoder_loss=0.3157, over 19898.00 frames. ], tot_loss[loss=0.2836, ctc_loss=0.1963, cr_loss=0.4276, attn_decoder_loss=0.2838, over 5580029.42 frames. ], batch size: 209, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:14:30,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=144500.0, ans=0.0 +2024-09-17 04:14:48,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=144540.0, ans=0.0 +2024-09-17 04:14:58,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=144580.0, ans=0.125 +2024-09-17 04:15:11,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=144580.0, ans=0.0 +2024-09-17 04:15:21,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=144620.0, ans=0.125 +2024-09-17 04:15:32,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=144660.0, ans=0.125 +2024-09-17 04:15:34,821 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.417e+01 1.090e+02 1.182e+02 1.322e+02 3.138e+02, threshold=2.364e+02, percent-clipped=1.0 +2024-09-17 04:15:45,077 INFO [train.py:1198] (0/2) Epoch 8, batch 4500, loss[loss=0.3112, ctc_loss=0.2385, cr_loss=0.4635, attn_decoder_loss=0.3089, over 20228.00 frames. ], tot_loss[loss=0.2873, ctc_loss=0.2036, cr_loss=0.4288, attn_decoder_loss=0.2871, over 5235560.03 frames. ], batch size: 209, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:15:53,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten.whitening_limit, batch_count=144700.0, ans=22.5 +2024-09-17 04:16:01,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=144740.0, ans=0.2 +2024-09-17 04:16:10,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=144740.0, ans=0.125 +2024-09-17 04:16:22,247 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-8.pt +2024-09-17 04:17:10,883 INFO [train.py:1198] (0/2) Epoch 9, batch 0, loss[loss=0.2639, ctc_loss=0.1725, cr_loss=0.383, attn_decoder_loss=0.2655, over 29618.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1725, cr_loss=0.383, attn_decoder_loss=0.2655, over 29618.00 frames. ], batch size: 73, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:17:10,884 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 04:17:29,062 INFO [train.py:1230] (0/2) Epoch 9, validation: loss=0.2184, ctc_loss=0.05457, cr_loss=4.594e-15, attn_decoder_loss=0.2366, over 944034.00 frames. +2024-09-17 04:17:29,062 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 04:17:58,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=144840.0, ans=0.125 +2024-09-17 04:18:05,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.27 vs. limit=15.0 +2024-09-17 04:18:09,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=144880.0, ans=0.2 +2024-09-17 04:18:16,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.64 vs. limit=22.5 +2024-09-17 04:18:26,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.31 vs. limit=15.0 +2024-09-17 04:18:27,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=144920.0, ans=0.025 +2024-09-17 04:18:44,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=144960.0, ans=0.125 +2024-09-17 04:18:48,438 INFO [train.py:1198] (0/2) Epoch 9, batch 50, loss[loss=0.2455, ctc_loss=0.1629, cr_loss=0.3747, attn_decoder_loss=0.2463, over 29400.00 frames. ], tot_loss[loss=0.2757, ctc_loss=0.1899, cr_loss=0.421, attn_decoder_loss=0.2759, over 1267956.52 frames. ], batch size: 70, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:18:59,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=145000.0, ans=0.125 +2024-09-17 04:19:03,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=145040.0, ans=0.0 +2024-09-17 04:19:16,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=145040.0, ans=0.025 +2024-09-17 04:19:18,666 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.315e+01 1.028e+02 1.122e+02 1.290e+02 1.269e+03, threshold=2.245e+02, percent-clipped=1.0 +2024-09-17 04:19:27,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=145080.0, ans=15.0 +2024-09-17 04:19:52,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=145160.0, ans=0.025 +2024-09-17 04:20:04,125 INFO [train.py:1198] (0/2) Epoch 9, batch 100, loss[loss=0.2629, ctc_loss=0.1817, cr_loss=0.4226, attn_decoder_loss=0.2625, over 29549.00 frames. ], tot_loss[loss=0.2768, ctc_loss=0.1893, cr_loss=0.4208, attn_decoder_loss=0.2772, over 2250943.85 frames. ], batch size: 76, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:20:07,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=145200.0, ans=0.5 +2024-09-17 04:20:18,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=145240.0, ans=0.125 +2024-09-17 04:20:43,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=145280.0, ans=0.2 +2024-09-17 04:20:50,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.88 vs. limit=15.0 +2024-09-17 04:20:52,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=145320.0, ans=0.125 +2024-09-17 04:21:10,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=145360.0, ans=0.1 +2024-09-17 04:21:10,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=145360.0, ans=0.125 +2024-09-17 04:21:12,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=145360.0, ans=0.09899494936611666 +2024-09-17 04:21:18,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=145400.0, ans=0.125 +2024-09-17 04:21:19,399 INFO [train.py:1198] (0/2) Epoch 9, batch 150, loss[loss=0.2448, ctc_loss=0.1609, cr_loss=0.4009, attn_decoder_loss=0.2452, over 29442.00 frames. ], tot_loss[loss=0.2742, ctc_loss=0.186, cr_loss=0.4163, attn_decoder_loss=0.2747, over 3045710.84 frames. ], batch size: 70, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:21:19,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=145400.0, ans=0.0 +2024-09-17 04:21:26,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=145400.0, ans=0.05 +2024-09-17 04:21:52,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.62 vs. limit=15.0 +2024-09-17 04:21:54,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=145480.0, ans=0.0 +2024-09-17 04:21:55,712 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.472e+01 1.015e+02 1.087e+02 1.260e+02 1.994e+02, threshold=2.174e+02, percent-clipped=0.0 +2024-09-17 04:22:18,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=145520.0, ans=0.025 +2024-09-17 04:22:29,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=145560.0, ans=0.0 +2024-09-17 04:22:30,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=145560.0, ans=0.125 +2024-09-17 04:22:38,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=145600.0, ans=0.125 +2024-09-17 04:22:39,823 INFO [train.py:1198] (0/2) Epoch 9, batch 200, loss[loss=0.2852, ctc_loss=0.2, cr_loss=0.4401, attn_decoder_loss=0.2849, over 27244.00 frames. ], tot_loss[loss=0.2729, ctc_loss=0.1842, cr_loss=0.4155, attn_decoder_loss=0.2735, over 3659186.65 frames. ], batch size: 124, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:22:49,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.55 vs. limit=22.5 +2024-09-17 04:22:55,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=145640.0, ans=0.95 +2024-09-17 04:22:56,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=145640.0, ans=0.0 +2024-09-17 04:22:59,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=145640.0, ans=0.2 +2024-09-17 04:23:11,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=145680.0, ans=0.025 +2024-09-17 04:23:13,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=145680.0, ans=0.0 +2024-09-17 04:23:13,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=145680.0, ans=0.025 +2024-09-17 04:23:13,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=145680.0, ans=0.1 +2024-09-17 04:23:46,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.06 vs. limit=10.0 +2024-09-17 04:23:55,899 INFO [train.py:1198] (0/2) Epoch 9, batch 250, loss[loss=0.2678, ctc_loss=0.171, cr_loss=0.3872, attn_decoder_loss=0.2699, over 29257.00 frames. ], tot_loss[loss=0.2723, ctc_loss=0.1829, cr_loss=0.4144, attn_decoder_loss=0.273, over 4141703.28 frames. ], batch size: 100, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:24:25,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.56 vs. limit=15.0 +2024-09-17 04:24:28,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.48 vs. limit=15.0 +2024-09-17 04:24:29,185 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.100e+01 9.608e+01 1.032e+02 1.129e+02 1.433e+02, threshold=2.064e+02, percent-clipped=0.0 +2024-09-17 04:24:49,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=145920.0, ans=0.1 +2024-09-17 04:24:55,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=145960.0, ans=0.125 +2024-09-17 04:25:01,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.34 vs. limit=15.0 +2024-09-17 04:25:02,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=145960.0, ans=0.1 +2024-09-17 04:25:11,661 INFO [train.py:1198] (0/2) Epoch 9, batch 300, loss[loss=0.2735, ctc_loss=0.1784, cr_loss=0.4157, attn_decoder_loss=0.2748, over 29528.00 frames. ], tot_loss[loss=0.272, ctc_loss=0.1823, cr_loss=0.4146, attn_decoder_loss=0.2727, over 4509035.52 frames. ], batch size: 92, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:25:33,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=146040.0, ans=0.025 +2024-09-17 04:25:56,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.35 vs. limit=12.0 +2024-09-17 04:26:00,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=146120.0, ans=0.125 +2024-09-17 04:26:02,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=146120.0, ans=0.125 +2024-09-17 04:26:06,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=146120.0, ans=0.0 +2024-09-17 04:26:18,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=146160.0, ans=0.125 +2024-09-17 04:26:24,178 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.08 vs. limit=12.0 +2024-09-17 04:26:32,383 INFO [train.py:1198] (0/2) Epoch 9, batch 350, loss[loss=0.243, ctc_loss=0.1578, cr_loss=0.3792, attn_decoder_loss=0.244, over 29311.00 frames. ], tot_loss[loss=0.2727, ctc_loss=0.183, cr_loss=0.4152, attn_decoder_loss=0.2734, over 4793451.15 frames. ], batch size: 71, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:26:41,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=146200.0, ans=0.0 +2024-09-17 04:26:43,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=146200.0, ans=0.2 +2024-09-17 04:27:06,992 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.424e+01 9.475e+01 1.008e+02 1.084e+02 2.956e+02, threshold=2.017e+02, percent-clipped=2.0 +2024-09-17 04:27:09,000 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:27:10,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=146280.0, ans=0.0 +2024-09-17 04:27:24,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=146320.0, ans=0.125 +2024-09-17 04:27:39,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=146360.0, ans=0.1 +2024-09-17 04:27:47,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=146400.0, ans=0.0 +2024-09-17 04:27:48,634 INFO [train.py:1198] (0/2) Epoch 9, batch 400, loss[loss=0.2786, ctc_loss=0.1824, cr_loss=0.4261, attn_decoder_loss=0.2798, over 29687.00 frames. ], tot_loss[loss=0.2724, ctc_loss=0.1825, cr_loss=0.4146, attn_decoder_loss=0.2731, over 5023530.68 frames. ], batch size: 82, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:27:57,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.56 vs. limit=12.0 +2024-09-17 04:27:59,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=146400.0, ans=0.0 +2024-09-17 04:28:19,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=146480.0, ans=0.125 +2024-09-17 04:28:27,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=146480.0, ans=0.0 +2024-09-17 04:28:39,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=146520.0, ans=0.025 +2024-09-17 04:28:59,216 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:29:04,922 INFO [train.py:1198] (0/2) Epoch 9, batch 450, loss[loss=0.2869, ctc_loss=0.1949, cr_loss=0.4252, attn_decoder_loss=0.2877, over 29687.00 frames. ], tot_loss[loss=0.2727, ctc_loss=0.1829, cr_loss=0.4154, attn_decoder_loss=0.2735, over 5184933.77 frames. ], batch size: 83, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:29:16,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.58 vs. limit=15.0 +2024-09-17 04:29:46,241 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.248e+01 9.637e+01 1.024e+02 1.129e+02 3.219e+02, threshold=2.049e+02, percent-clipped=1.0 +2024-09-17 04:30:21,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=146760.0, ans=0.125 +2024-09-17 04:30:22,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.18 vs. limit=15.0 +2024-09-17 04:30:25,861 INFO [train.py:1198] (0/2) Epoch 9, batch 500, loss[loss=0.3045, ctc_loss=0.2079, cr_loss=0.4593, attn_decoder_loss=0.305, over 29441.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1815, cr_loss=0.4135, attn_decoder_loss=0.2721, over 5328409.84 frames. ], batch size: 94, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:30:29,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=146800.0, ans=0.025 +2024-09-17 04:30:30,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=146800.0, ans=0.125 +2024-09-17 04:31:10,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=146920.0, ans=0.125 +2024-09-17 04:31:41,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.46 vs. limit=6.0 +2024-09-17 04:31:42,468 INFO [train.py:1198] (0/2) Epoch 9, batch 550, loss[loss=0.2875, ctc_loss=0.1986, cr_loss=0.4442, attn_decoder_loss=0.2876, over 28888.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1817, cr_loss=0.4139, attn_decoder_loss=0.2721, over 5421415.62 frames. ], batch size: 104, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:32:01,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=147040.0, ans=0.125 +2024-09-17 04:32:19,195 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.353e+01 9.413e+01 1.021e+02 1.124e+02 5.702e+02, threshold=2.041e+02, percent-clipped=1.0 +2024-09-17 04:32:22,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=147080.0, ans=0.125 +2024-09-17 04:32:59,451 INFO [train.py:1198] (0/2) Epoch 9, batch 600, loss[loss=0.2716, ctc_loss=0.1782, cr_loss=0.4017, attn_decoder_loss=0.273, over 29220.00 frames. ], tot_loss[loss=0.2715, ctc_loss=0.1818, cr_loss=0.4146, attn_decoder_loss=0.2722, over 5507517.01 frames. ], batch size: 100, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:33:13,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=147240.0, ans=0.2 +2024-09-17 04:33:17,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.44 vs. limit=10.0 +2024-09-17 04:33:50,221 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.87 vs. limit=22.5 +2024-09-17 04:33:57,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=147320.0, ans=0.1 +2024-09-17 04:33:59,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.41 vs. limit=22.5 +2024-09-17 04:34:00,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=147320.0, ans=0.2 +2024-09-17 04:34:00,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=147320.0, ans=0.125 +2024-09-17 04:34:20,290 INFO [train.py:1198] (0/2) Epoch 9, batch 650, loss[loss=0.2656, ctc_loss=0.1788, cr_loss=0.4047, attn_decoder_loss=0.2663, over 29792.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1803, cr_loss=0.4122, attn_decoder_loss=0.2711, over 5585769.73 frames. ], batch size: 81, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:34:29,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=147400.0, ans=0.125 +2024-09-17 04:34:45,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.83 vs. limit=15.0 +2024-09-17 04:34:51,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=147480.0, ans=0.125 +2024-09-17 04:34:58,647 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.437e+01 9.683e+01 1.026e+02 1.151e+02 1.521e+02, threshold=2.052e+02, percent-clipped=0.0 +2024-09-17 04:35:09,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=147520.0, ans=0.125 +2024-09-17 04:35:23,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=147560.0, ans=0.0 +2024-09-17 04:35:36,634 INFO [train.py:1198] (0/2) Epoch 9, batch 700, loss[loss=0.258, ctc_loss=0.1706, cr_loss=0.3956, attn_decoder_loss=0.2589, over 29546.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1809, cr_loss=0.4133, attn_decoder_loss=0.2721, over 5635767.17 frames. ], batch size: 76, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:35:50,553 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:35:54,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=147640.0, ans=0.2 +2024-09-17 04:35:58,021 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:35:58,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.97 vs. limit=15.0 +2024-09-17 04:36:12,532 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.66 vs. limit=15.0 +2024-09-17 04:36:13,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=147680.0, ans=0.125 +2024-09-17 04:36:52,810 INFO [train.py:1198] (0/2) Epoch 9, batch 750, loss[loss=0.2825, ctc_loss=0.1841, cr_loss=0.4199, attn_decoder_loss=0.2842, over 29704.00 frames. ], tot_loss[loss=0.2707, ctc_loss=0.1805, cr_loss=0.4126, attn_decoder_loss=0.2715, over 5676897.43 frames. ], batch size: 82, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:36:57,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=147800.0, ans=0.125 +2024-09-17 04:37:37,020 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.147e+01 9.690e+01 1.045e+02 1.120e+02 4.390e+02, threshold=2.090e+02, percent-clipped=1.0 +2024-09-17 04:37:57,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=147960.0, ans=0.2 +2024-09-17 04:38:10,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=147960.0, ans=0.125 +2024-09-17 04:38:13,961 INFO [train.py:1198] (0/2) Epoch 9, batch 800, loss[loss=0.2383, ctc_loss=0.1494, cr_loss=0.3591, attn_decoder_loss=0.2402, over 29594.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1804, cr_loss=0.4122, attn_decoder_loss=0.2715, over 5706087.84 frames. ], batch size: 73, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:38:18,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=148000.0, ans=0.125 +2024-09-17 04:38:24,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=148000.0, ans=0.0 +2024-09-17 04:38:24,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=148000.0, ans=0.025 +2024-09-17 04:38:29,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=148040.0, ans=0.0 +2024-09-17 04:39:29,746 INFO [train.py:1198] (0/2) Epoch 9, batch 850, loss[loss=0.2799, ctc_loss=0.1843, cr_loss=0.4181, attn_decoder_loss=0.2812, over 29717.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1798, cr_loss=0.4121, attn_decoder_loss=0.271, over 5736119.88 frames. ], batch size: 89, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:39:43,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=148240.0, ans=0.0 +2024-09-17 04:39:55,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=148240.0, ans=0.125 +2024-09-17 04:40:01,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=148280.0, ans=0.2 +2024-09-17 04:40:06,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.62 vs. limit=15.0 +2024-09-17 04:40:10,338 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.624e+01 1.050e+02 1.134e+02 2.702e+02, threshold=2.101e+02, percent-clipped=1.0 +2024-09-17 04:40:15,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.44 vs. limit=15.0 +2024-09-17 04:40:26,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=148320.0, ans=0.09899494936611666 +2024-09-17 04:40:42,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=148360.0, ans=0.125 +2024-09-17 04:40:45,743 INFO [train.py:1198] (0/2) Epoch 9, batch 900, loss[loss=0.2459, ctc_loss=0.1626, cr_loss=0.3693, attn_decoder_loss=0.247, over 29614.00 frames. ], tot_loss[loss=0.2704, ctc_loss=0.1801, cr_loss=0.4124, attn_decoder_loss=0.2712, over 5740208.50 frames. ], batch size: 73, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:40:54,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=148400.0, ans=0.2 +2024-09-17 04:40:55,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.98 vs. limit=22.5 +2024-09-17 04:41:01,842 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.02 vs. limit=10.0 +2024-09-17 04:41:04,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=148440.0, ans=0.125 +2024-09-17 04:41:05,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=148440.0, ans=0.1 +2024-09-17 04:41:16,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=148480.0, ans=0.1 +2024-09-17 04:41:18,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=148480.0, ans=0.0 +2024-09-17 04:41:21,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=148480.0, ans=0.025 +2024-09-17 04:41:31,863 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.25 vs. limit=10.0 +2024-09-17 04:41:38,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=148520.0, ans=0.2 +2024-09-17 04:41:39,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=148520.0, ans=0.125 +2024-09-17 04:41:40,147 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.40 vs. limit=6.0 +2024-09-17 04:42:06,663 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.66 vs. limit=15.0 +2024-09-17 04:42:06,884 INFO [train.py:1198] (0/2) Epoch 9, batch 950, loss[loss=0.2417, ctc_loss=0.153, cr_loss=0.3782, attn_decoder_loss=0.2432, over 29523.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1803, cr_loss=0.4123, attn_decoder_loss=0.2715, over 5741593.40 frames. ], batch size: 74, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:42:40,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=148680.0, ans=0.125 +2024-09-17 04:42:41,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=148680.0, ans=0.125 +2024-09-17 04:42:49,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.631e+01 1.018e+02 1.126e+02 1.313e+02 4.383e+02, threshold=2.253e+02, percent-clipped=5.0 +2024-09-17 04:42:53,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.74 vs. limit=22.5 +2024-09-17 04:43:08,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=148760.0, ans=0.125 +2024-09-17 04:43:11,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.81 vs. limit=6.0 +2024-09-17 04:43:16,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=148760.0, ans=0.0 +2024-09-17 04:43:23,704 INFO [train.py:1198] (0/2) Epoch 9, batch 1000, loss[loss=0.2564, ctc_loss=0.1691, cr_loss=0.4221, attn_decoder_loss=0.2567, over 29528.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1814, cr_loss=0.4139, attn_decoder_loss=0.2722, over 5736856.12 frames. ], batch size: 77, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:43:31,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=148800.0, ans=0.2 +2024-09-17 04:43:47,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=148840.0, ans=0.0 +2024-09-17 04:44:01,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=148880.0, ans=0.07 +2024-09-17 04:44:14,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=148920.0, ans=0.125 +2024-09-17 04:44:30,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=148960.0, ans=0.125 +2024-09-17 04:44:31,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.98 vs. limit=15.0 +2024-09-17 04:44:32,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=148960.0, ans=0.1 +2024-09-17 04:44:32,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=148960.0, ans=0.025 +2024-09-17 04:44:32,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=148960.0, ans=0.125 +2024-09-17 04:44:38,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=149000.0, ans=0.2 +2024-09-17 04:44:39,605 INFO [train.py:1198] (0/2) Epoch 9, batch 1050, loss[loss=0.2709, ctc_loss=0.1822, cr_loss=0.4033, attn_decoder_loss=0.2718, over 29691.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1806, cr_loss=0.4132, attn_decoder_loss=0.2713, over 5744111.30 frames. ], batch size: 85, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:44:53,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=149040.0, ans=0.125 +2024-09-17 04:45:04,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=149040.0, ans=0.125 +2024-09-17 04:45:06,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=149040.0, ans=0.125 +2024-09-17 04:45:26,426 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.757e+01 9.706e+01 1.051e+02 1.142e+02 2.250e+02, threshold=2.101e+02, percent-clipped=0.0 +2024-09-17 04:45:27,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.08 vs. limit=10.0 +2024-09-17 04:46:00,313 INFO [train.py:1198] (0/2) Epoch 9, batch 1100, loss[loss=0.2686, ctc_loss=0.173, cr_loss=0.4182, attn_decoder_loss=0.2699, over 29449.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1806, cr_loss=0.4132, attn_decoder_loss=0.2714, over 5755979.91 frames. ], batch size: 78, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:46:32,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=149280.0, ans=0.125 +2024-09-17 04:46:41,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=149280.0, ans=0.125 +2024-09-17 04:46:46,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=149320.0, ans=0.0 +2024-09-17 04:46:54,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.28 vs. limit=10.0 +2024-09-17 04:46:54,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=149320.0, ans=0.125 +2024-09-17 04:47:08,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=149360.0, ans=0.0 +2024-09-17 04:47:08,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=149360.0, ans=0.025 +2024-09-17 04:47:14,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=149400.0, ans=0.0 +2024-09-17 04:47:16,059 INFO [train.py:1198] (0/2) Epoch 9, batch 1150, loss[loss=0.2712, ctc_loss=0.1801, cr_loss=0.4413, attn_decoder_loss=0.2715, over 29474.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1805, cr_loss=0.4132, attn_decoder_loss=0.2714, over 5753695.31 frames. ], batch size: 78, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:47:23,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.32 vs. limit=22.5 +2024-09-17 04:47:33,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.28 vs. limit=22.5 +2024-09-17 04:47:36,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=149440.0, ans=0.07 +2024-09-17 04:47:44,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=149440.0, ans=0.125 +2024-09-17 04:48:01,923 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.308e+01 9.807e+01 1.085e+02 1.342e+02 2.441e+02, threshold=2.171e+02, percent-clipped=4.0 +2024-09-17 04:48:06,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=149520.0, ans=0.2 +2024-09-17 04:48:07,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.34 vs. limit=22.5 +2024-09-17 04:48:33,333 INFO [train.py:1198] (0/2) Epoch 9, batch 1200, loss[loss=0.273, ctc_loss=0.1788, cr_loss=0.3918, attn_decoder_loss=0.2748, over 29679.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.1815, cr_loss=0.4143, attn_decoder_loss=0.2725, over 5747368.80 frames. ], batch size: 85, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:49:11,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=149680.0, ans=0.125 +2024-09-17 04:49:11,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=149680.0, ans=0.125 +2024-09-17 04:49:14,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=149680.0, ans=0.125 +2024-09-17 04:49:17,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=149680.0, ans=0.04949747468305833 +2024-09-17 04:49:19,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=149720.0, ans=0.0 +2024-09-17 04:49:35,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=12.0 +2024-09-17 04:49:38,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=149760.0, ans=0.5 +2024-09-17 04:49:43,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.27 vs. limit=15.0 +2024-09-17 04:49:50,701 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.88 vs. limit=22.5 +2024-09-17 04:49:50,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.72 vs. limit=15.0 +2024-09-17 04:49:52,898 INFO [train.py:1198] (0/2) Epoch 9, batch 1250, loss[loss=0.2973, ctc_loss=0.2076, cr_loss=0.4638, attn_decoder_loss=0.297, over 29531.00 frames. ], tot_loss[loss=0.2723, ctc_loss=0.1819, cr_loss=0.4154, attn_decoder_loss=0.2731, over 5774468.10 frames. ], batch size: 92, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:49:56,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=149800.0, ans=0.125 +2024-09-17 04:49:57,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=149800.0, ans=0.125 +2024-09-17 04:50:13,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.13 vs. limit=15.0 +2024-09-17 04:50:31,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=149880.0, ans=0.125 +2024-09-17 04:50:34,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=149880.0, ans=0.125 +2024-09-17 04:50:38,399 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.591e+01 1.046e+02 1.160e+02 1.832e+02, threshold=2.092e+02, percent-clipped=0.0 +2024-09-17 04:51:08,756 INFO [train.py:1198] (0/2) Epoch 9, batch 1300, loss[loss=0.2859, ctc_loss=0.1925, cr_loss=0.4262, attn_decoder_loss=0.2868, over 28170.00 frames. ], tot_loss[loss=0.2718, ctc_loss=0.1814, cr_loss=0.4147, attn_decoder_loss=0.2726, over 5778849.66 frames. ], batch size: 111, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:51:19,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=150000.0, ans=0.125 +2024-09-17 04:51:25,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=150040.0, ans=0.125 +2024-09-17 04:51:37,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=150080.0, ans=0.1 +2024-09-17 04:51:50,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=150080.0, ans=0.125 +2024-09-17 04:52:06,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_na.min_abs, batch_count=150120.0, ans=0.02 +2024-09-17 04:52:20,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=150160.0, ans=0.05 +2024-09-17 04:52:21,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=150160.0, ans=0.2 +2024-09-17 04:52:24,373 INFO [train.py:1198] (0/2) Epoch 9, batch 1350, loss[loss=0.2718, ctc_loss=0.1777, cr_loss=0.418, attn_decoder_loss=0.273, over 29757.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1806, cr_loss=0.414, attn_decoder_loss=0.2721, over 5795996.35 frames. ], batch size: 81, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:52:27,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=150200.0, ans=0.0 +2024-09-17 04:52:34,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.38 vs. limit=22.5 +2024-09-17 04:52:41,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=150240.0, ans=0.0 +2024-09-17 04:52:50,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=150240.0, ans=0.125 +2024-09-17 04:52:55,149 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.39 vs. limit=15.0 +2024-09-17 04:53:01,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=150280.0, ans=0.0 +2024-09-17 04:53:02,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.50 vs. limit=10.0 +2024-09-17 04:53:11,520 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.083e+01 9.658e+01 1.049e+02 1.137e+02 1.500e+02, threshold=2.097e+02, percent-clipped=0.0 +2024-09-17 04:53:32,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.60 vs. limit=15.0 +2024-09-17 04:53:44,222 INFO [train.py:1198] (0/2) Epoch 9, batch 1400, loss[loss=0.2357, ctc_loss=0.1528, cr_loss=0.3783, attn_decoder_loss=0.2365, over 29605.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1802, cr_loss=0.4135, attn_decoder_loss=0.2719, over 5807450.23 frames. ], batch size: 69, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:54:11,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.16 vs. limit=15.0 +2024-09-17 04:54:30,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=150520.0, ans=0.125 +2024-09-17 04:54:36,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=150520.0, ans=0.025 +2024-09-17 04:54:41,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=150520.0, ans=0.0 +2024-09-17 04:54:43,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=150560.0, ans=10.0 +2024-09-17 04:54:53,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=150560.0, ans=0.125 +2024-09-17 04:54:55,746 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.36 vs. limit=15.0 +2024-09-17 04:54:59,196 INFO [train.py:1198] (0/2) Epoch 9, batch 1450, loss[loss=0.2809, ctc_loss=0.1865, cr_loss=0.4229, attn_decoder_loss=0.282, over 29410.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1804, cr_loss=0.4138, attn_decoder_loss=0.2724, over 5803808.47 frames. ], batch size: 94, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:55:16,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=150640.0, ans=0.125 +2024-09-17 04:55:16,125 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:55:23,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=150640.0, ans=0.2 +2024-09-17 04:55:30,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=150680.0, ans=0.125 +2024-09-17 04:55:45,592 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.347e+01 1.006e+02 1.117e+02 1.243e+02 2.760e+02, threshold=2.234e+02, percent-clipped=2.0 +2024-09-17 04:55:56,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=150720.0, ans=0.1 +2024-09-17 04:56:13,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=150800.0, ans=0.125 +2024-09-17 04:56:14,397 INFO [train.py:1198] (0/2) Epoch 9, batch 1500, loss[loss=0.2788, ctc_loss=0.1794, cr_loss=0.4253, attn_decoder_loss=0.2804, over 29638.00 frames. ], tot_loss[loss=0.2718, ctc_loss=0.1806, cr_loss=0.4145, attn_decoder_loss=0.2727, over 5805081.65 frames. ], batch size: 86, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:56:31,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=150840.0, ans=0.125 +2024-09-17 04:57:23,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.59 vs. limit=15.0 +2024-09-17 04:57:31,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=150960.0, ans=0.1 +2024-09-17 04:57:33,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=151000.0, ans=0.125 +2024-09-17 04:57:34,238 INFO [train.py:1198] (0/2) Epoch 9, batch 1550, loss[loss=0.2818, ctc_loss=0.1924, cr_loss=0.4437, attn_decoder_loss=0.2818, over 29505.00 frames. ], tot_loss[loss=0.2718, ctc_loss=0.181, cr_loss=0.4142, attn_decoder_loss=0.2727, over 5780926.64 frames. ], batch size: 90, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:57:54,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=151040.0, ans=0.125 +2024-09-17 04:58:04,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=151080.0, ans=0.125 +2024-09-17 04:58:22,185 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 9.832e+01 1.106e+02 1.253e+02 2.763e+02, threshold=2.212e+02, percent-clipped=1.0 +2024-09-17 04:58:24,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=151120.0, ans=0.0 +2024-09-17 04:58:30,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=151120.0, ans=0.125 +2024-09-17 04:58:48,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=151200.0, ans=0.1 +2024-09-17 04:58:49,783 INFO [train.py:1198] (0/2) Epoch 9, batch 1600, loss[loss=0.2879, ctc_loss=0.1989, cr_loss=0.4532, attn_decoder_loss=0.2877, over 29660.00 frames. ], tot_loss[loss=0.2715, ctc_loss=0.1807, cr_loss=0.4139, attn_decoder_loss=0.2724, over 5765228.09 frames. ], batch size: 85, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:59:02,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.28 vs. limit=15.0 +2024-09-17 04:59:42,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=151320.0, ans=0.04949747468305833 +2024-09-17 04:59:55,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=151360.0, ans=0.125 +2024-09-17 04:59:58,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=151360.0, ans=0.1 +2024-09-17 05:00:05,099 INFO [train.py:1198] (0/2) Epoch 9, batch 1650, loss[loss=0.2828, ctc_loss=0.1893, cr_loss=0.4185, attn_decoder_loss=0.2839, over 29715.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1804, cr_loss=0.413, attn_decoder_loss=0.2721, over 5758831.92 frames. ], batch size: 89, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 05:00:23,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=151440.0, ans=0.0 +2024-09-17 05:00:55,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_na.min_abs, batch_count=151520.0, ans=0.02 +2024-09-17 05:00:57,021 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.805e+01 9.589e+01 1.020e+02 1.089e+02 1.544e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 05:00:58,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=151520.0, ans=0.035 +2024-09-17 05:01:03,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=151520.0, ans=0.0 +2024-09-17 05:01:24,480 INFO [train.py:1198] (0/2) Epoch 9, batch 1700, loss[loss=0.247, ctc_loss=0.1692, cr_loss=0.4065, attn_decoder_loss=0.2466, over 29561.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1802, cr_loss=0.4133, attn_decoder_loss=0.2719, over 5781189.10 frames. ], batch size: 69, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 05:01:45,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=151640.0, ans=0.0 +2024-09-17 05:01:47,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=151640.0, ans=0.125 +2024-09-17 05:01:58,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.39 vs. limit=5.0 +2024-09-17 05:02:12,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=151720.0, ans=0.0 +2024-09-17 05:02:39,565 INFO [train.py:1198] (0/2) Epoch 9, batch 1750, loss[loss=0.2435, ctc_loss=0.1584, cr_loss=0.392, attn_decoder_loss=0.2443, over 29323.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1798, cr_loss=0.4129, attn_decoder_loss=0.2715, over 5790068.74 frames. ], batch size: 67, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:02:51,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=151800.0, ans=0.0 +2024-09-17 05:02:59,469 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:03:16,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=151880.0, ans=0.125 +2024-09-17 05:03:29,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=151920.0, ans=0.0 +2024-09-17 05:03:29,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.07 vs. limit=22.5 +2024-09-17 05:03:30,572 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.963e+01 9.433e+01 1.015e+02 1.120e+02 2.449e+02, threshold=2.030e+02, percent-clipped=1.0 +2024-09-17 05:03:54,783 INFO [train.py:1198] (0/2) Epoch 9, batch 1800, loss[loss=0.2719, ctc_loss=0.1751, cr_loss=0.3896, attn_decoder_loss=0.274, over 29683.00 frames. ], tot_loss[loss=0.2709, ctc_loss=0.1801, cr_loss=0.4134, attn_decoder_loss=0.2718, over 5792250.98 frames. ], batch size: 83, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:04:01,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=152000.0, ans=0.1 +2024-09-17 05:04:28,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=152080.0, ans=0.125 +2024-09-17 05:04:29,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=152080.0, ans=0.125 +2024-09-17 05:04:40,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=152120.0, ans=0.0 +2024-09-17 05:05:02,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.31 vs. limit=15.0 +2024-09-17 05:05:12,107 INFO [train.py:1198] (0/2) Epoch 9, batch 1850, loss[loss=0.2803, ctc_loss=0.1851, cr_loss=0.4138, attn_decoder_loss=0.2817, over 29627.00 frames. ], tot_loss[loss=0.2708, ctc_loss=0.1802, cr_loss=0.4144, attn_decoder_loss=0.2717, over 5798348.38 frames. ], batch size: 86, lr: 1.25e-02, grad_scale: 4.0 +2024-09-17 05:05:23,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=152200.0, ans=0.1 +2024-09-17 05:05:27,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=152240.0, ans=0.0 +2024-09-17 05:05:35,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=152240.0, ans=0.2 +2024-09-17 05:05:51,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.98 vs. limit=15.0 +2024-09-17 05:06:06,686 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.561e+01 1.000e+02 1.112e+02 1.269e+02 1.875e+02, threshold=2.225e+02, percent-clipped=0.0 +2024-09-17 05:06:13,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=152360.0, ans=0.125 +2024-09-17 05:06:22,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=152360.0, ans=0.125 +2024-09-17 05:06:29,019 INFO [train.py:1198] (0/2) Epoch 9, batch 1900, loss[loss=0.2833, ctc_loss=0.1929, cr_loss=0.4342, attn_decoder_loss=0.2837, over 29707.00 frames. ], tot_loss[loss=0.2718, ctc_loss=0.1809, cr_loss=0.415, attn_decoder_loss=0.2726, over 5805115.50 frames. ], batch size: 89, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:06:37,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=4.97 vs. limit=15.0 +2024-09-17 05:06:42,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=152440.0, ans=0.125 +2024-09-17 05:06:45,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.42 vs. limit=15.0 +2024-09-17 05:06:50,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.5 +2024-09-17 05:07:25,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=152520.0, ans=0.125 +2024-09-17 05:07:35,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=152560.0, ans=0.1 +2024-09-17 05:07:38,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=152560.0, ans=0.025 +2024-09-17 05:07:42,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=152600.0, ans=0.0 +2024-09-17 05:07:44,261 INFO [train.py:1198] (0/2) Epoch 9, batch 1950, loss[loss=0.2693, ctc_loss=0.1804, cr_loss=0.4056, attn_decoder_loss=0.2701, over 29458.00 frames. ], tot_loss[loss=0.2728, ctc_loss=0.1817, cr_loss=0.417, attn_decoder_loss=0.2737, over 5819884.90 frames. ], batch size: 78, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:08:16,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=152680.0, ans=0.05 +2024-09-17 05:08:40,100 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.742e+01 1.027e+02 1.111e+02 1.388e+02, threshold=2.054e+02, percent-clipped=0.0 +2024-09-17 05:08:46,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=152760.0, ans=0.125 +2024-09-17 05:08:46,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=152760.0, ans=0.125 +2024-09-17 05:08:46,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=152760.0, ans=0.2 +2024-09-17 05:09:00,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=152800.0, ans=0.1 +2024-09-17 05:09:01,811 INFO [train.py:1198] (0/2) Epoch 9, batch 2000, loss[loss=0.2386, ctc_loss=0.1569, cr_loss=0.381, attn_decoder_loss=0.2392, over 29302.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.1821, cr_loss=0.4168, attn_decoder_loss=0.2739, over 5796815.62 frames. ], batch size: 67, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:09:02,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-17 05:09:14,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=152800.0, ans=0.125 +2024-09-17 05:09:58,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=152920.0, ans=0.125 +2024-09-17 05:10:02,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=152960.0, ans=0.2 +2024-09-17 05:10:04,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=152960.0, ans=0.2 +2024-09-17 05:10:07,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=152960.0, ans=0.125 +2024-09-17 05:10:08,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=152960.0, ans=0.125 +2024-09-17 05:10:19,051 INFO [train.py:1198] (0/2) Epoch 9, batch 2050, loss[loss=0.2366, ctc_loss=0.1443, cr_loss=0.3671, attn_decoder_loss=0.2387, over 29442.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.1812, cr_loss=0.4151, attn_decoder_loss=0.2726, over 5789217.53 frames. ], batch size: 70, lr: 1.25e-02, grad_scale: 4.0 +2024-09-17 05:10:40,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=153040.0, ans=0.0 +2024-09-17 05:10:45,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=153040.0, ans=0.0 +2024-09-17 05:11:01,917 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:11:07,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=153120.0, ans=0.95 +2024-09-17 05:11:12,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=153120.0, ans=0.125 +2024-09-17 05:11:15,051 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.385e+01 9.413e+01 1.004e+02 1.102e+02 4.512e+02, threshold=2.009e+02, percent-clipped=3.0 +2024-09-17 05:11:34,703 INFO [train.py:1198] (0/2) Epoch 9, batch 2100, loss[loss=0.2664, ctc_loss=0.1692, cr_loss=0.4139, attn_decoder_loss=0.2681, over 29769.00 frames. ], tot_loss[loss=0.2707, ctc_loss=0.18, cr_loss=0.4137, attn_decoder_loss=0.2716, over 5800147.05 frames. ], batch size: 81, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:11:35,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.57 vs. limit=15.0 +2024-09-17 05:12:01,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.77 vs. limit=15.0 +2024-09-17 05:12:01,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=153240.0, ans=0.0 +2024-09-17 05:12:05,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.75 vs. limit=15.0 +2024-09-17 05:12:17,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=153280.0, ans=0.125 +2024-09-17 05:12:17,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=153280.0, ans=0.0 +2024-09-17 05:12:17,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=153280.0, ans=0.125 +2024-09-17 05:12:21,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=153320.0, ans=0.0 +2024-09-17 05:12:27,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=153320.0, ans=0.125 +2024-09-17 05:12:51,528 INFO [train.py:1198] (0/2) Epoch 9, batch 2150, loss[loss=0.2516, ctc_loss=0.1547, cr_loss=0.3741, attn_decoder_loss=0.254, over 29440.00 frames. ], tot_loss[loss=0.27, ctc_loss=0.1788, cr_loss=0.4122, attn_decoder_loss=0.2709, over 5814935.89 frames. ], batch size: 78, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:13:27,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.07 vs. limit=15.0 +2024-09-17 05:13:51,016 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.919e+01 9.836e+01 1.055e+02 1.144e+02 2.218e+02, threshold=2.111e+02, percent-clipped=2.0 +2024-09-17 05:13:57,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=153560.0, ans=0.0 +2024-09-17 05:14:08,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=153600.0, ans=0.125 +2024-09-17 05:14:09,673 INFO [train.py:1198] (0/2) Epoch 9, batch 2200, loss[loss=0.2911, ctc_loss=0.1983, cr_loss=0.4394, attn_decoder_loss=0.2917, over 29626.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1796, cr_loss=0.4133, attn_decoder_loss=0.2715, over 5811617.05 frames. ], batch size: 86, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:14:19,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=153600.0, ans=0.125 +2024-09-17 05:14:29,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=153640.0, ans=0.0 +2024-09-17 05:14:39,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.09 vs. limit=22.5 +2024-09-17 05:14:55,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=153720.0, ans=10.0 +2024-09-17 05:14:56,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=153720.0, ans=0.0 +2024-09-17 05:15:02,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=153720.0, ans=0.1 +2024-09-17 05:15:04,830 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.72 vs. limit=15.0 +2024-09-17 05:15:09,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=153760.0, ans=0.1 +2024-09-17 05:15:19,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=153760.0, ans=0.125 +2024-09-17 05:15:20,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=153760.0, ans=0.125 +2024-09-17 05:15:25,276 INFO [train.py:1198] (0/2) Epoch 9, batch 2250, loss[loss=0.262, ctc_loss=0.1656, cr_loss=0.3847, attn_decoder_loss=0.2641, over 29748.00 frames. ], tot_loss[loss=0.2704, ctc_loss=0.1793, cr_loss=0.4129, attn_decoder_loss=0.2714, over 5810736.13 frames. ], batch size: 82, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:15:33,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=153800.0, ans=0.2 +2024-09-17 05:15:37,179 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.40 vs. limit=22.5 +2024-09-17 05:15:45,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=153840.0, ans=10.0 +2024-09-17 05:15:52,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=153840.0, ans=0.125 +2024-09-17 05:16:18,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.07 vs. limit=15.0 +2024-09-17 05:16:24,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.18 vs. limit=15.0 +2024-09-17 05:16:24,413 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.430e+01 9.555e+01 1.015e+02 1.096e+02 3.730e+02, threshold=2.031e+02, percent-clipped=3.0 +2024-09-17 05:16:29,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=153960.0, ans=0.2 +2024-09-17 05:16:32,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=153960.0, ans=0.125 +2024-09-17 05:16:42,434 INFO [train.py:1198] (0/2) Epoch 9, batch 2300, loss[loss=0.2516, ctc_loss=0.1636, cr_loss=0.3839, attn_decoder_loss=0.2528, over 29321.00 frames. ], tot_loss[loss=0.2689, ctc_loss=0.1778, cr_loss=0.4104, attn_decoder_loss=0.2699, over 5797824.72 frames. ], batch size: 71, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:17:30,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=154120.0, ans=0.125 +2024-09-17 05:17:42,690 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:17:50,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=154160.0, ans=10.0 +2024-09-17 05:17:51,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=154160.0, ans=0.1 +2024-09-17 05:17:51,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=154160.0, ans=0.125 +2024-09-17 05:17:53,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=154160.0, ans=0.125 +2024-09-17 05:18:02,041 INFO [train.py:1198] (0/2) Epoch 9, batch 2350, loss[loss=0.2765, ctc_loss=0.181, cr_loss=0.4283, attn_decoder_loss=0.2776, over 29686.00 frames. ], tot_loss[loss=0.2691, ctc_loss=0.1779, cr_loss=0.4105, attn_decoder_loss=0.2701, over 5804098.15 frames. ], batch size: 83, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:18:05,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=154200.0, ans=0.0 +2024-09-17 05:18:09,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=154200.0, ans=0.125 +2024-09-17 05:18:26,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=154240.0, ans=0.2 +2024-09-17 05:18:51,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.34 vs. limit=15.0 +2024-09-17 05:18:53,091 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.67 vs. limit=6.0 +2024-09-17 05:18:58,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=154320.0, ans=0.125 +2024-09-17 05:18:59,586 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.337e+01 9.484e+01 1.020e+02 1.101e+02 1.845e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 05:19:17,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=154400.0, ans=0.125 +2024-09-17 05:19:18,465 INFO [train.py:1198] (0/2) Epoch 9, batch 2400, loss[loss=0.2495, ctc_loss=0.1634, cr_loss=0.384, attn_decoder_loss=0.2506, over 29524.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.1788, cr_loss=0.4119, attn_decoder_loss=0.2708, over 5807424.17 frames. ], batch size: 76, lr: 1.24e-02, grad_scale: 16.0 +2024-09-17 05:19:18,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=154400.0, ans=0.125 +2024-09-17 05:19:20,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_na.min_abs, batch_count=154400.0, ans=0.02 +2024-09-17 05:20:03,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=154480.0, ans=0.125 +2024-09-17 05:20:04,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=154520.0, ans=0.125 +2024-09-17 05:20:13,934 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:20:21,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=154560.0, ans=0.125 +2024-09-17 05:20:27,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=154560.0, ans=0.125 +2024-09-17 05:20:36,503 INFO [train.py:1198] (0/2) Epoch 9, batch 2450, loss[loss=0.2726, ctc_loss=0.1728, cr_loss=0.394, attn_decoder_loss=0.2749, over 29717.00 frames. ], tot_loss[loss=0.2709, ctc_loss=0.1798, cr_loss=0.4135, attn_decoder_loss=0.2718, over 5784768.76 frames. ], batch size: 82, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:20:39,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=154600.0, ans=0.125 +2024-09-17 05:20:42,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=154600.0, ans=0.125 +2024-09-17 05:21:02,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.55 vs. limit=15.0 +2024-09-17 05:21:10,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=154680.0, ans=0.125 +2024-09-17 05:21:38,515 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.373e+01 9.786e+01 1.038e+02 1.229e+02 2.658e+02, threshold=2.076e+02, percent-clipped=2.0 +2024-09-17 05:21:43,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=154760.0, ans=0.125 +2024-09-17 05:21:49,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=154760.0, ans=0.125 +2024-09-17 05:21:52,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=154800.0, ans=0.0 +2024-09-17 05:21:53,788 INFO [train.py:1198] (0/2) Epoch 9, batch 2500, loss[loss=0.2831, ctc_loss=0.1886, cr_loss=0.4342, attn_decoder_loss=0.284, over 29615.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1788, cr_loss=0.4123, attn_decoder_loss=0.2712, over 5795048.54 frames. ], batch size: 86, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:22:09,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=154840.0, ans=0.0 +2024-09-17 05:22:21,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.80 vs. limit=15.0 +2024-09-17 05:22:22,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=154880.0, ans=0.025 +2024-09-17 05:22:32,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=154880.0, ans=0.2 +2024-09-17 05:22:48,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=154920.0, ans=0.0 +2024-09-17 05:23:03,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=154960.0, ans=0.125 +2024-09-17 05:23:06,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=154960.0, ans=0.125 +2024-09-17 05:23:09,607 INFO [train.py:1198] (0/2) Epoch 9, batch 2550, loss[loss=0.2341, ctc_loss=0.1476, cr_loss=0.3869, attn_decoder_loss=0.2351, over 29351.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1784, cr_loss=0.4122, attn_decoder_loss=0.2712, over 5800061.40 frames. ], batch size: 67, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:23:13,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.63 vs. limit=15.0 +2024-09-17 05:23:29,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=155040.0, ans=0.0 +2024-09-17 05:23:31,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.51 vs. limit=15.0 +2024-09-17 05:23:37,640 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.18 vs. limit=15.0 +2024-09-17 05:23:39,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.50 vs. limit=15.0 +2024-09-17 05:23:49,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=155080.0, ans=0.125 +2024-09-17 05:23:52,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=155080.0, ans=0.2 +2024-09-17 05:24:06,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=155120.0, ans=0.0 +2024-09-17 05:24:06,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=155120.0, ans=0.125 +2024-09-17 05:24:12,478 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.204e+01 9.986e+01 1.053e+02 1.251e+02 2.083e+02, threshold=2.107e+02, percent-clipped=1.0 +2024-09-17 05:24:22,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.02 vs. limit=15.0 +2024-09-17 05:24:24,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=155160.0, ans=0.0 +2024-09-17 05:24:28,054 INFO [train.py:1198] (0/2) Epoch 9, batch 2600, loss[loss=0.2588, ctc_loss=0.1667, cr_loss=0.4163, attn_decoder_loss=0.2598, over 29450.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1795, cr_loss=0.4144, attn_decoder_loss=0.2721, over 5796228.05 frames. ], batch size: 78, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:24:37,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=155200.0, ans=0.125 +2024-09-17 05:24:53,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=155240.0, ans=0.125 +2024-09-17 05:25:06,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=155280.0, ans=0.2 +2024-09-17 05:25:12,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=155280.0, ans=0.125 +2024-09-17 05:25:14,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.87 vs. limit=6.0 +2024-09-17 05:25:31,371 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.46 vs. limit=6.0 +2024-09-17 05:25:33,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=155360.0, ans=0.0 +2024-09-17 05:25:45,390 INFO [train.py:1198] (0/2) Epoch 9, batch 2650, loss[loss=0.2907, ctc_loss=0.1969, cr_loss=0.4555, attn_decoder_loss=0.291, over 29212.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1786, cr_loss=0.4137, attn_decoder_loss=0.2715, over 5802667.30 frames. ], batch size: 100, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:25:50,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=155400.0, ans=0.0 +2024-09-17 05:25:51,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=155400.0, ans=0.125 +2024-09-17 05:26:42,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.78 vs. limit=15.0 +2024-09-17 05:26:47,528 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.070e+01 9.715e+01 1.022e+02 1.111e+02 3.079e+02, threshold=2.044e+02, percent-clipped=1.0 +2024-09-17 05:27:01,170 INFO [train.py:1198] (0/2) Epoch 9, batch 2700, loss[loss=0.2853, ctc_loss=0.1903, cr_loss=0.4197, attn_decoder_loss=0.2866, over 29512.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1797, cr_loss=0.4153, attn_decoder_loss=0.2723, over 5798415.18 frames. ], batch size: 87, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:27:12,706 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.02 vs. limit=15.0 +2024-09-17 05:27:27,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=155640.0, ans=0.125 +2024-09-17 05:27:31,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=155680.0, ans=0.0 +2024-09-17 05:27:43,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.98 vs. limit=15.0 +2024-09-17 05:28:09,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.64 vs. limit=22.5 +2024-09-17 05:28:10,878 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.82 vs. limit=15.0 +2024-09-17 05:28:19,255 INFO [train.py:1198] (0/2) Epoch 9, batch 2750, loss[loss=0.2644, ctc_loss=0.1775, cr_loss=0.4157, attn_decoder_loss=0.2649, over 29521.00 frames. ], tot_loss[loss=0.27, ctc_loss=0.1786, cr_loss=0.413, attn_decoder_loss=0.271, over 5797837.16 frames. ], batch size: 75, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:28:22,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=155800.0, ans=0.5 +2024-09-17 05:28:22,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=155800.0, ans=0.125 +2024-09-17 05:28:23,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=155800.0, ans=10.0 +2024-09-17 05:28:40,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=155840.0, ans=0.07 +2024-09-17 05:28:59,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=155880.0, ans=0.95 +2024-09-17 05:29:03,089 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.54 vs. limit=15.0 +2024-09-17 05:29:13,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=155920.0, ans=0.125 +2024-09-17 05:29:15,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.69 vs. limit=22.5 +2024-09-17 05:29:23,084 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.85 vs. limit=15.0 +2024-09-17 05:29:25,258 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 9.518e+01 1.047e+02 1.158e+02 3.298e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 05:29:36,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=156000.0, ans=0.125 +2024-09-17 05:29:37,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=156000.0, ans=0.125 +2024-09-17 05:29:38,023 INFO [train.py:1198] (0/2) Epoch 9, batch 2800, loss[loss=0.3038, ctc_loss=0.2406, cr_loss=0.4369, attn_decoder_loss=0.3011, over 20605.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1789, cr_loss=0.4129, attn_decoder_loss=0.2712, over 5778106.06 frames. ], batch size: 210, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:29:54,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=156040.0, ans=0.025 +2024-09-17 05:29:56,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=156040.0, ans=0.0 +2024-09-17 05:29:56,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.55 vs. limit=15.0 +2024-09-17 05:29:57,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=156040.0, ans=0.125 +2024-09-17 05:30:14,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=156080.0, ans=0.0 +2024-09-17 05:30:14,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.80 vs. limit=15.0 +2024-09-17 05:30:16,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.43 vs. limit=22.5 +2024-09-17 05:30:24,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=156120.0, ans=0.2 +2024-09-17 05:30:29,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=156120.0, ans=0.07 +2024-09-17 05:30:30,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=156120.0, ans=0.125 +2024-09-17 05:30:34,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.83 vs. limit=15.0 +2024-09-17 05:30:52,945 INFO [train.py:1198] (0/2) Epoch 9, batch 2850, loss[loss=0.2738, ctc_loss=0.1873, cr_loss=0.4138, attn_decoder_loss=0.2742, over 29490.00 frames. ], tot_loss[loss=0.2709, ctc_loss=0.1798, cr_loss=0.4134, attn_decoder_loss=0.2719, over 5763668.05 frames. ], batch size: 77, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:30:57,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=156200.0, ans=0.125 +2024-09-17 05:30:59,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=156200.0, ans=0.125 +2024-09-17 05:31:03,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=156200.0, ans=0.1 +2024-09-17 05:31:30,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=156280.0, ans=0.125 +2024-09-17 05:31:38,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.96 vs. limit=10.0 +2024-09-17 05:32:00,105 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.550e+01 9.773e+01 1.033e+02 1.202e+02 1.627e+02, threshold=2.066e+02, percent-clipped=0.0 +2024-09-17 05:32:10,736 INFO [train.py:1198] (0/2) Epoch 9, batch 2900, loss[loss=0.274, ctc_loss=0.1818, cr_loss=0.4443, attn_decoder_loss=0.2744, over 29805.00 frames. ], tot_loss[loss=0.2716, ctc_loss=0.1797, cr_loss=0.4148, attn_decoder_loss=0.2726, over 5789444.03 frames. ], batch size: 80, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:32:21,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=156400.0, ans=0.1 +2024-09-17 05:32:27,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=156440.0, ans=0.125 +2024-09-17 05:32:30,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=156440.0, ans=0.0 +2024-09-17 05:32:33,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=156440.0, ans=0.125 +2024-09-17 05:32:48,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=156480.0, ans=0.125 +2024-09-17 05:32:49,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=156480.0, ans=0.125 +2024-09-17 05:32:56,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=156520.0, ans=0.025 +2024-09-17 05:32:59,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=156520.0, ans=0.2 +2024-09-17 05:33:28,635 INFO [train.py:1198] (0/2) Epoch 9, batch 2950, loss[loss=0.2625, ctc_loss=0.181, cr_loss=0.4046, attn_decoder_loss=0.2626, over 29532.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1787, cr_loss=0.4131, attn_decoder_loss=0.2711, over 5785377.58 frames. ], batch size: 75, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:34:09,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=156680.0, ans=0.2 +2024-09-17 05:34:11,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.38 vs. limit=22.5 +2024-09-17 05:34:24,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=156720.0, ans=0.0 +2024-09-17 05:34:33,744 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.085e+01 9.535e+01 1.020e+02 1.127e+02 2.521e+02, threshold=2.039e+02, percent-clipped=1.0 +2024-09-17 05:34:38,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=156760.0, ans=0.125 +2024-09-17 05:34:44,910 INFO [train.py:1198] (0/2) Epoch 9, batch 3000, loss[loss=0.2679, ctc_loss=0.1737, cr_loss=0.4156, attn_decoder_loss=0.2691, over 29738.00 frames. ], tot_loss[loss=0.27, ctc_loss=0.1787, cr_loss=0.4127, attn_decoder_loss=0.2709, over 5786830.55 frames. ], batch size: 81, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:34:44,911 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 05:35:03,249 INFO [train.py:1230] (0/2) Epoch 9, validation: loss=0.2139, ctc_loss=0.05057, cr_loss=4.328e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-17 05:35:03,249 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 05:35:09,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=156800.0, ans=0.1 +2024-09-17 05:35:16,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=156800.0, ans=0.125 +2024-09-17 05:35:25,003 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.97 vs. limit=22.5 +2024-09-17 05:35:25,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=156840.0, ans=0.0 +2024-09-17 05:36:05,663 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.68 vs. limit=15.0 +2024-09-17 05:36:11,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=156960.0, ans=0.125 +2024-09-17 05:36:12,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=156960.0, ans=0.2 +2024-09-17 05:36:15,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=156960.0, ans=0.0 +2024-09-17 05:36:15,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=156960.0, ans=0.0 +2024-09-17 05:36:21,031 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=15.0 +2024-09-17 05:36:21,550 INFO [train.py:1198] (0/2) Epoch 9, batch 3050, loss[loss=0.2493, ctc_loss=0.1597, cr_loss=0.394, attn_decoder_loss=0.2505, over 29535.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1796, cr_loss=0.4138, attn_decoder_loss=0.2719, over 5779958.83 frames. ], batch size: 76, lr: 1.23e-02, grad_scale: 4.0 +2024-09-17 05:36:29,554 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:36:29,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=157000.0, ans=0.0 +2024-09-17 05:36:30,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=157000.0, ans=0.5 +2024-09-17 05:36:40,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=157040.0, ans=0.2 +2024-09-17 05:36:45,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.14 vs. limit=15.0 +2024-09-17 05:36:54,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=157080.0, ans=0.125 +2024-09-17 05:37:00,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=157080.0, ans=0.0 +2024-09-17 05:37:00,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.62 vs. limit=22.5 +2024-09-17 05:37:12,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=157120.0, ans=0.09899494936611666 +2024-09-17 05:37:13,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff3.min_abs, batch_count=157120.0, ans=0.2 +2024-09-17 05:37:29,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.561e+01 1.002e+02 1.065e+02 1.234e+02 3.157e+02, threshold=2.130e+02, percent-clipped=3.0 +2024-09-17 05:37:37,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=157200.0, ans=0.125 +2024-09-17 05:37:38,812 INFO [train.py:1198] (0/2) Epoch 9, batch 3100, loss[loss=0.2828, ctc_loss=0.1862, cr_loss=0.4262, attn_decoder_loss=0.2841, over 29321.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1793, cr_loss=0.4132, attn_decoder_loss=0.2714, over 5778907.11 frames. ], batch size: 100, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:37:43,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=157200.0, ans=0.025 +2024-09-17 05:37:46,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=157200.0, ans=0.0 +2024-09-17 05:37:52,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=157240.0, ans=0.125 +2024-09-17 05:37:54,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=157240.0, ans=0.0 +2024-09-17 05:38:00,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=157240.0, ans=0.0 +2024-09-17 05:38:02,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.24 vs. limit=15.0 +2024-09-17 05:38:18,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=157280.0, ans=10.0 +2024-09-17 05:38:31,327 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.36 vs. limit=15.0 +2024-09-17 05:38:43,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.69 vs. limit=22.5 +2024-09-17 05:38:54,760 INFO [train.py:1198] (0/2) Epoch 9, batch 3150, loss[loss=0.2759, ctc_loss=0.1765, cr_loss=0.4223, attn_decoder_loss=0.2776, over 28944.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.1787, cr_loss=0.4123, attn_decoder_loss=0.2709, over 5784661.06 frames. ], batch size: 104, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:38:55,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=157400.0, ans=0.025 +2024-09-17 05:39:24,588 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:39:47,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-17 05:40:04,837 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.427e+01 1.013e+02 1.077e+02 1.205e+02 2.021e+02, threshold=2.154e+02, percent-clipped=0.0 +2024-09-17 05:40:08,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=157560.0, ans=0.1 +2024-09-17 05:40:09,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=157560.0, ans=0.125 +2024-09-17 05:40:12,971 INFO [train.py:1198] (0/2) Epoch 9, batch 3200, loss[loss=0.2627, ctc_loss=0.1706, cr_loss=0.414, attn_decoder_loss=0.2637, over 29423.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.1781, cr_loss=0.4121, attn_decoder_loss=0.2705, over 5793940.27 frames. ], batch size: 79, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:40:21,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.19 vs. limit=10.0 +2024-09-17 05:40:30,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.04 vs. limit=15.0 +2024-09-17 05:40:32,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.79 vs. limit=15.0 +2024-09-17 05:40:33,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=157640.0, ans=0.0 +2024-09-17 05:40:41,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=157640.0, ans=0.0 +2024-09-17 05:41:07,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=157720.0, ans=0.125 +2024-09-17 05:41:11,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.72 vs. limit=15.0 +2024-09-17 05:41:16,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=157760.0, ans=0.125 +2024-09-17 05:41:25,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=157760.0, ans=0.015 +2024-09-17 05:41:31,266 INFO [train.py:1198] (0/2) Epoch 9, batch 3250, loss[loss=0.2799, ctc_loss=0.1821, cr_loss=0.4206, attn_decoder_loss=0.2814, over 29705.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1785, cr_loss=0.4125, attn_decoder_loss=0.2711, over 5801379.23 frames. ], batch size: 84, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:41:36,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=157800.0, ans=0.125 +2024-09-17 05:41:54,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=157840.0, ans=0.1 +2024-09-17 05:41:54,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=157840.0, ans=0.125 +2024-09-17 05:42:00,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=157880.0, ans=0.05 +2024-09-17 05:42:01,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=157880.0, ans=0.2 +2024-09-17 05:42:14,119 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:42:36,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=157960.0, ans=0.0 +2024-09-17 05:42:39,128 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.206e+01 9.558e+01 1.067e+02 1.153e+02 2.320e+02, threshold=2.135e+02, percent-clipped=2.0 +2024-09-17 05:42:46,865 INFO [train.py:1198] (0/2) Epoch 9, batch 3300, loss[loss=0.2788, ctc_loss=0.1773, cr_loss=0.3715, attn_decoder_loss=0.2818, over 28533.00 frames. ], tot_loss[loss=0.2687, ctc_loss=0.1771, cr_loss=0.4107, attn_decoder_loss=0.2697, over 5798699.25 frames. ], batch size: 112, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:42:47,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=158000.0, ans=0.125 +2024-09-17 05:42:59,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.05 vs. limit=15.0 +2024-09-17 05:43:07,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=158040.0, ans=0.125 +2024-09-17 05:43:31,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=158080.0, ans=0.0 +2024-09-17 05:43:55,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=158160.0, ans=0.025 +2024-09-17 05:44:01,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=158160.0, ans=0.07 +2024-09-17 05:44:02,221 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.83 vs. limit=15.0 +2024-09-17 05:44:04,444 INFO [train.py:1198] (0/2) Epoch 9, batch 3350, loss[loss=0.2896, ctc_loss=0.1984, cr_loss=0.4365, attn_decoder_loss=0.29, over 28911.00 frames. ], tot_loss[loss=0.2698, ctc_loss=0.1785, cr_loss=0.4128, attn_decoder_loss=0.2707, over 5774734.46 frames. ], batch size: 104, lr: 1.23e-02, grad_scale: 4.0 +2024-09-17 05:44:04,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=158200.0, ans=0.125 +2024-09-17 05:44:13,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=158200.0, ans=0.125 +2024-09-17 05:44:13,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=158200.0, ans=0.0 +2024-09-17 05:44:17,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.82 vs. limit=22.5 +2024-09-17 05:44:18,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=158240.0, ans=0.0 +2024-09-17 05:44:34,450 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:44:41,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=158280.0, ans=0.125 +2024-09-17 05:44:44,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=158280.0, ans=0.125 +2024-09-17 05:44:44,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=158280.0, ans=0.125 +2024-09-17 05:44:53,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=158320.0, ans=0.125 +2024-09-17 05:44:55,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=158320.0, ans=0.0 +2024-09-17 05:45:09,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.44 vs. limit=15.0 +2024-09-17 05:45:15,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=158360.0, ans=0.2 +2024-09-17 05:45:16,179 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.103e+01 9.844e+01 1.079e+02 1.203e+02 3.746e+02, threshold=2.158e+02, percent-clipped=3.0 +2024-09-17 05:45:18,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=158360.0, ans=0.1 +2024-09-17 05:45:19,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=158360.0, ans=0.09899494936611666 +2024-09-17 05:45:22,611 INFO [train.py:1198] (0/2) Epoch 9, batch 3400, loss[loss=0.2386, ctc_loss=0.1514, cr_loss=0.3554, attn_decoder_loss=0.2404, over 29363.00 frames. ], tot_loss[loss=0.2698, ctc_loss=0.1789, cr_loss=0.4123, attn_decoder_loss=0.2707, over 5767787.35 frames. ], batch size: 67, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:45:38,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=158440.0, ans=0.125 +2024-09-17 05:45:43,038 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.10 vs. limit=15.0 +2024-09-17 05:45:53,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=158480.0, ans=0.125 +2024-09-17 05:46:21,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.28 vs. limit=12.0 +2024-09-17 05:46:38,414 INFO [train.py:1198] (0/2) Epoch 9, batch 3450, loss[loss=0.2925, ctc_loss=0.2019, cr_loss=0.4366, attn_decoder_loss=0.2929, over 28149.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1793, cr_loss=0.4135, attn_decoder_loss=0.2714, over 5775926.38 frames. ], batch size: 111, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:46:48,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=158600.0, ans=0.2 +2024-09-17 05:46:51,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=158600.0, ans=0.1 +2024-09-17 05:47:00,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=158640.0, ans=0.1 +2024-09-17 05:47:21,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=158680.0, ans=0.125 +2024-09-17 05:47:21,522 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:47:32,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=158720.0, ans=0.125 +2024-09-17 05:47:49,418 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.56 vs. limit=22.5 +2024-09-17 05:47:50,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.76 vs. limit=15.0 +2024-09-17 05:47:51,398 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.572e+01 9.332e+01 9.969e+01 1.060e+02 1.614e+02, threshold=1.994e+02, percent-clipped=0.0 +2024-09-17 05:47:55,996 INFO [train.py:1198] (0/2) Epoch 9, batch 3500, loss[loss=0.2588, ctc_loss=0.1778, cr_loss=0.4192, attn_decoder_loss=0.2585, over 29311.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1784, cr_loss=0.4128, attn_decoder_loss=0.2707, over 5777687.84 frames. ], batch size: 71, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:48:03,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=158800.0, ans=0.025 +2024-09-17 05:48:05,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=158800.0, ans=0.1 +2024-09-17 05:48:07,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=158800.0, ans=0.0 +2024-09-17 05:48:14,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=158840.0, ans=0.125 +2024-09-17 05:48:17,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=158840.0, ans=0.1 +2024-09-17 05:48:19,949 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.80 vs. limit=10.0 +2024-09-17 05:48:37,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=158880.0, ans=0.1 +2024-09-17 05:48:42,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=158920.0, ans=0.125 +2024-09-17 05:48:52,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=158920.0, ans=0.125 +2024-09-17 05:49:07,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=158960.0, ans=0.125 +2024-09-17 05:49:12,739 INFO [train.py:1198] (0/2) Epoch 9, batch 3550, loss[loss=0.2887, ctc_loss=0.1889, cr_loss=0.4221, attn_decoder_loss=0.2904, over 29714.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.178, cr_loss=0.4123, attn_decoder_loss=0.2705, over 5784731.85 frames. ], batch size: 89, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:49:12,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=159000.0, ans=0.0 +2024-09-17 05:49:23,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=159000.0, ans=0.125 +2024-09-17 05:49:31,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.40 vs. limit=15.0 +2024-09-17 05:49:35,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=159040.0, ans=0.125 +2024-09-17 05:49:45,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=159080.0, ans=0.125 +2024-09-17 05:50:06,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=159120.0, ans=0.025 +2024-09-17 05:50:06,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=159120.0, ans=0.0 +2024-09-17 05:50:07,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=159120.0, ans=0.125 +2024-09-17 05:50:23,964 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.321e+01 9.598e+01 1.057e+02 1.166e+02 3.699e+02, threshold=2.113e+02, percent-clipped=1.0 +2024-09-17 05:50:27,357 INFO [train.py:1198] (0/2) Epoch 9, batch 3600, loss[loss=0.2558, ctc_loss=0.1639, cr_loss=0.4103, attn_decoder_loss=0.2569, over 29506.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.178, cr_loss=0.4125, attn_decoder_loss=0.271, over 5793281.63 frames. ], batch size: 77, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:50:37,168 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.03 vs. limit=10.0 +2024-09-17 05:50:38,751 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.21 vs. limit=15.0 +2024-09-17 05:51:00,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=159280.0, ans=0.2 +2024-09-17 05:51:09,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=159280.0, ans=0.0 +2024-09-17 05:51:27,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.83 vs. limit=22.5 +2024-09-17 05:51:41,635 INFO [train.py:1198] (0/2) Epoch 9, batch 3650, loss[loss=0.2903, ctc_loss=0.1906, cr_loss=0.4737, attn_decoder_loss=0.2908, over 29472.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1772, cr_loss=0.4112, attn_decoder_loss=0.2701, over 5795130.13 frames. ], batch size: 90, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:51:59,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=159440.0, ans=0.1 +2024-09-17 05:52:27,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.87 vs. limit=15.0 +2024-09-17 05:52:28,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=159520.0, ans=0.125 +2024-09-17 05:52:40,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=159520.0, ans=0.025 +2024-09-17 05:52:43,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=159560.0, ans=0.2 +2024-09-17 05:52:54,980 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.190e+01 9.468e+01 1.034e+02 1.121e+02 1.943e+02, threshold=2.068e+02, percent-clipped=0.0 +2024-09-17 05:52:57,919 INFO [train.py:1198] (0/2) Epoch 9, batch 3700, loss[loss=0.276, ctc_loss=0.1766, cr_loss=0.3883, attn_decoder_loss=0.2784, over 29718.00 frames. ], tot_loss[loss=0.2689, ctc_loss=0.1771, cr_loss=0.4108, attn_decoder_loss=0.27, over 5805702.79 frames. ], batch size: 84, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:53:07,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=159600.0, ans=0.0 +2024-09-17 05:53:36,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=159680.0, ans=0.05 +2024-09-17 05:53:48,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=159720.0, ans=0.125 +2024-09-17 05:53:53,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=159720.0, ans=0.02 +2024-09-17 05:54:12,622 INFO [train.py:1198] (0/2) Epoch 9, batch 3750, loss[loss=0.2468, ctc_loss=0.1535, cr_loss=0.3696, attn_decoder_loss=0.2489, over 29347.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1776, cr_loss=0.4111, attn_decoder_loss=0.27, over 5808920.46 frames. ], batch size: 67, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:54:29,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=159840.0, ans=0.125 +2024-09-17 05:54:44,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.52 vs. limit=15.0 +2024-09-17 05:54:48,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=159880.0, ans=0.125 +2024-09-17 05:54:53,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=159880.0, ans=0.125 +2024-09-17 05:55:08,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=159920.0, ans=0.125 +2024-09-17 05:55:13,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.67 vs. limit=15.0 +2024-09-17 05:55:18,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=159960.0, ans=10.0 +2024-09-17 05:55:25,527 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.200e+01 9.758e+01 1.061e+02 1.222e+02 3.852e+02, threshold=2.121e+02, percent-clipped=3.0 +2024-09-17 05:55:27,372 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-40000.pt +2024-09-17 05:55:35,963 INFO [train.py:1198] (0/2) Epoch 9, batch 3800, loss[loss=0.2775, ctc_loss=0.1773, cr_loss=0.4209, attn_decoder_loss=0.2793, over 29609.00 frames. ], tot_loss[loss=0.2689, ctc_loss=0.1775, cr_loss=0.4109, attn_decoder_loss=0.2699, over 5799932.54 frames. ], batch size: 86, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:55:46,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=160000.0, ans=0.0 +2024-09-17 05:55:57,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.01 vs. limit=15.0 +2024-09-17 05:56:11,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=160080.0, ans=0.125 +2024-09-17 05:56:16,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=22.44 vs. limit=22.5 +2024-09-17 05:56:27,039 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=17.44 vs. limit=15.0 +2024-09-17 05:56:44,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=160160.0, ans=0.125 +2024-09-17 05:56:50,300 INFO [train.py:1198] (0/2) Epoch 9, batch 3850, loss[loss=0.2945, ctc_loss=0.2069, cr_loss=0.4685, attn_decoder_loss=0.2939, over 29283.00 frames. ], tot_loss[loss=0.2688, ctc_loss=0.1773, cr_loss=0.411, attn_decoder_loss=0.2698, over 5813097.96 frames. ], batch size: 100, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:57:06,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=160240.0, ans=0.2 +2024-09-17 05:57:11,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=160240.0, ans=0.1 +2024-09-17 05:57:16,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.87 vs. limit=15.0 +2024-09-17 05:57:36,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=160320.0, ans=0.1 +2024-09-17 05:57:39,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=160320.0, ans=0.025 +2024-09-17 05:58:03,426 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.356e+01 9.561e+01 1.016e+02 1.083e+02 1.844e+02, threshold=2.033e+02, percent-clipped=0.0 +2024-09-17 05:58:06,432 INFO [train.py:1198] (0/2) Epoch 9, batch 3900, loss[loss=0.2851, ctc_loss=0.1837, cr_loss=0.4017, attn_decoder_loss=0.2875, over 29614.00 frames. ], tot_loss[loss=0.2693, ctc_loss=0.1777, cr_loss=0.4118, attn_decoder_loss=0.2704, over 5817288.54 frames. ], batch size: 86, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:58:12,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=160400.0, ans=0.0 +2024-09-17 05:58:20,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=160440.0, ans=0.0 +2024-09-17 05:58:33,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=160440.0, ans=0.1 +2024-09-17 05:58:41,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=160480.0, ans=0.125 +2024-09-17 05:59:09,199 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:59:20,923 INFO [train.py:1198] (0/2) Epoch 9, batch 3950, loss[loss=0.2987, ctc_loss=0.2067, cr_loss=0.471, attn_decoder_loss=0.2985, over 29460.00 frames. ], tot_loss[loss=0.2694, ctc_loss=0.1775, cr_loss=0.4127, attn_decoder_loss=0.2705, over 5836286.26 frames. ], batch size: 97, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 05:59:28,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=160600.0, ans=0.0 +2024-09-17 05:59:34,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_na.min_abs, batch_count=160640.0, ans=0.02 +2024-09-17 06:00:16,454 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-17 06:00:34,492 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.022e+01 9.640e+01 1.057e+02 1.201e+02 4.208e+02, threshold=2.114e+02, percent-clipped=4.0 +2024-09-17 06:00:36,383 INFO [train.py:1198] (0/2) Epoch 9, batch 4000, loss[loss=0.237, ctc_loss=0.1452, cr_loss=0.3646, attn_decoder_loss=0.2391, over 29507.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1779, cr_loss=0.4127, attn_decoder_loss=0.2708, over 5813149.69 frames. ], batch size: 74, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:01:05,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=160880.0, ans=0.0 +2024-09-17 06:01:50,828 INFO [train.py:1198] (0/2) Epoch 9, batch 4050, loss[loss=0.3068, ctc_loss=0.2345, cr_loss=0.4361, attn_decoder_loss=0.3052, over 20249.00 frames. ], tot_loss[loss=0.2696, ctc_loss=0.1776, cr_loss=0.4115, attn_decoder_loss=0.2707, over 5796803.30 frames. ], batch size: 210, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:01:53,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.92 vs. limit=15.0 +2024-09-17 06:02:03,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.15 vs. limit=15.0 +2024-09-17 06:02:03,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.45 vs. limit=15.0 +2024-09-17 06:02:05,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=161040.0, ans=0.025 +2024-09-17 06:02:10,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=161040.0, ans=0.2 +2024-09-17 06:02:13,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=161040.0, ans=0.1 +2024-09-17 06:02:20,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=161080.0, ans=0.2 +2024-09-17 06:02:21,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=161080.0, ans=0.025 +2024-09-17 06:02:38,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=161120.0, ans=0.125 +2024-09-17 06:03:05,382 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.114e+01 9.617e+01 1.028e+02 1.240e+02 2.479e+02, threshold=2.055e+02, percent-clipped=2.0 +2024-09-17 06:03:05,412 INFO [train.py:1198] (0/2) Epoch 9, batch 4100, loss[loss=0.2943, ctc_loss=0.1999, cr_loss=0.4541, attn_decoder_loss=0.2947, over 29515.00 frames. ], tot_loss[loss=0.2698, ctc_loss=0.1777, cr_loss=0.4117, attn_decoder_loss=0.2709, over 5791828.07 frames. ], batch size: 90, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:03:11,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=161200.0, ans=0.1 +2024-09-17 06:03:21,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=161240.0, ans=0.0 +2024-09-17 06:04:19,361 INFO [train.py:1198] (0/2) Epoch 9, batch 4150, loss[loss=0.2568, ctc_loss=0.1614, cr_loss=0.4009, attn_decoder_loss=0.2585, over 29519.00 frames. ], tot_loss[loss=0.2694, ctc_loss=0.1773, cr_loss=0.4115, attn_decoder_loss=0.2705, over 5797248.76 frames. ], batch size: 77, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:04:26,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=161400.0, ans=0.0 +2024-09-17 06:04:40,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=161440.0, ans=0.125 +2024-09-17 06:04:48,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=161480.0, ans=0.0 +2024-09-17 06:04:55,906 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:05:04,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.60 vs. limit=6.0 +2024-09-17 06:05:21,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=161560.0, ans=0.0 +2024-09-17 06:05:26,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=161560.0, ans=0.125 +2024-09-17 06:05:28,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=161560.0, ans=0.125 +2024-09-17 06:05:33,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=161600.0, ans=0.125 +2024-09-17 06:05:34,469 INFO [train.py:1198] (0/2) Epoch 9, batch 4200, loss[loss=0.2877, ctc_loss=0.1948, cr_loss=0.4464, attn_decoder_loss=0.2881, over 29555.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1776, cr_loss=0.412, attn_decoder_loss=0.2707, over 5799820.75 frames. ], batch size: 90, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:05:35,869 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.845e+01 9.556e+01 1.027e+02 1.111e+02 2.120e+02, threshold=2.054e+02, percent-clipped=2.0 +2024-09-17 06:05:38,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.94 vs. limit=15.0 +2024-09-17 06:06:11,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=161680.0, ans=0.07 +2024-09-17 06:06:28,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=161720.0, ans=0.1 +2024-09-17 06:06:30,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=161720.0, ans=0.125 +2024-09-17 06:06:37,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=161760.0, ans=0.125 +2024-09-17 06:06:48,814 INFO [train.py:1198] (0/2) Epoch 9, batch 4250, loss[loss=0.2509, ctc_loss=0.1589, cr_loss=0.3832, attn_decoder_loss=0.2526, over 29505.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.1773, cr_loss=0.4117, attn_decoder_loss=0.2707, over 5805153.07 frames. ], batch size: 74, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:07:05,907 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.73 vs. limit=15.0 +2024-09-17 06:07:20,204 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:07:25,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=12.0 +2024-09-17 06:07:45,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.21 vs. limit=22.5 +2024-09-17 06:08:02,495 INFO [train.py:1198] (0/2) Epoch 9, batch 4300, loss[loss=0.282, ctc_loss=0.1845, cr_loss=0.4514, attn_decoder_loss=0.2828, over 29556.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.1775, cr_loss=0.4123, attn_decoder_loss=0.271, over 5793667.29 frames. ], batch size: 87, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:08:05,467 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.660e+01 9.959e+01 1.074e+02 1.170e+02 2.141e+02, threshold=2.147e+02, percent-clipped=1.0 +2024-09-17 06:08:36,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=162080.0, ans=0.0 +2024-09-17 06:08:42,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.55 vs. limit=15.0 +2024-09-17 06:08:43,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=162080.0, ans=0.2 +2024-09-17 06:09:17,152 INFO [train.py:1198] (0/2) Epoch 9, batch 4350, loss[loss=0.2844, ctc_loss=0.1926, cr_loss=0.433, attn_decoder_loss=0.285, over 29487.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.1804, cr_loss=0.4169, attn_decoder_loss=0.2742, over 5795674.96 frames. ], batch size: 97, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:09:40,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=162240.0, ans=0.2 +2024-09-17 06:09:47,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=162280.0, ans=0.125 +2024-09-17 06:09:50,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=162280.0, ans=0.125 +2024-09-17 06:10:30,458 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.40 vs. limit=15.0 +2024-09-17 06:10:31,075 INFO [train.py:1198] (0/2) Epoch 9, batch 4400, loss[loss=0.2801, ctc_loss=0.1958, cr_loss=0.4088, attn_decoder_loss=0.2803, over 27133.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1828, cr_loss=0.4197, attn_decoder_loss=0.2766, over 5765689.79 frames. ], batch size: 124, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:10:34,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=162400.0, ans=0.1 +2024-09-17 06:10:35,517 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.675e+01 9.860e+01 1.034e+02 1.169e+02 1.757e+02, threshold=2.069e+02, percent-clipped=0.0 +2024-09-17 06:11:11,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=162480.0, ans=0.2 +2024-09-17 06:11:37,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=162560.0, ans=0.1 +2024-09-17 06:11:46,095 INFO [train.py:1198] (0/2) Epoch 9, batch 4450, loss[loss=0.3029, ctc_loss=0.2203, cr_loss=0.4344, attn_decoder_loss=0.3024, over 20222.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1889, cr_loss=0.4244, attn_decoder_loss=0.2801, over 5574389.15 frames. ], batch size: 210, lr: 1.21e-02, grad_scale: 4.0 +2024-09-17 06:11:55,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=162600.0, ans=0.015 +2024-09-17 06:11:55,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=162600.0, ans=0.0 +2024-09-17 06:11:55,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=162600.0, ans=0.0 +2024-09-17 06:12:16,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=162680.0, ans=0.125 +2024-09-17 06:12:19,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.65 vs. limit=15.0 +2024-09-17 06:12:45,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.31 vs. limit=15.0 +2024-09-17 06:12:56,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=162760.0, ans=0.1 +2024-09-17 06:13:01,695 INFO [train.py:1198] (0/2) Epoch 9, batch 4500, loss[loss=0.2997, ctc_loss=0.2322, cr_loss=0.4458, attn_decoder_loss=0.2973, over 20024.00 frames. ], tot_loss[loss=0.2831, ctc_loss=0.1961, cr_loss=0.4264, attn_decoder_loss=0.2833, over 5235550.11 frames. ], batch size: 209, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:13:07,510 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.470e+01 1.060e+02 1.171e+02 1.308e+02 2.646e+02, threshold=2.342e+02, percent-clipped=3.0 +2024-09-17 06:13:09,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=162800.0, ans=0.1 +2024-09-17 06:13:15,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=11.10 vs. limit=10.0 +2024-09-17 06:13:25,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=162840.0, ans=0.0 +2024-09-17 06:13:31,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=162880.0, ans=0.0 +2024-09-17 06:13:38,199 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-9.pt +2024-09-17 06:14:29,144 INFO [train.py:1198] (0/2) Epoch 10, batch 0, loss[loss=0.2468, ctc_loss=0.1506, cr_loss=0.3715, attn_decoder_loss=0.2493, over 29599.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1506, cr_loss=0.3715, attn_decoder_loss=0.2493, over 29599.00 frames. ], batch size: 73, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:14:29,145 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 06:14:47,512 INFO [train.py:1230] (0/2) Epoch 10, validation: loss=0.2171, ctc_loss=0.05118, cr_loss=4.759e-15, attn_decoder_loss=0.2355, over 944034.00 frames. +2024-09-17 06:14:47,512 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 06:14:50,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=162900.0, ans=0.125 +2024-09-17 06:15:02,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=162940.0, ans=0.2 +2024-09-17 06:15:13,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=162940.0, ans=0.125 +2024-09-17 06:15:25,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=162980.0, ans=0.2 +2024-09-17 06:15:32,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.14 vs. limit=15.0 +2024-09-17 06:15:49,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=163060.0, ans=0.125 +2024-09-17 06:15:49,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=163060.0, ans=0.125 +2024-09-17 06:15:49,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=163060.0, ans=0.1 +2024-09-17 06:15:55,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=163060.0, ans=0.125 +2024-09-17 06:16:02,849 INFO [train.py:1198] (0/2) Epoch 10, batch 50, loss[loss=0.246, ctc_loss=0.1603, cr_loss=0.3829, attn_decoder_loss=0.247, over 29441.00 frames. ], tot_loss[loss=0.2723, ctc_loss=0.1814, cr_loss=0.4167, attn_decoder_loss=0.2732, over 1268198.61 frames. ], batch size: 70, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:16:07,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=163100.0, ans=0.125 +2024-09-17 06:16:10,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=163100.0, ans=0.025 +2024-09-17 06:16:33,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=163180.0, ans=0.95 +2024-09-17 06:16:36,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=163180.0, ans=0.0 +2024-09-17 06:16:52,270 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.660e+01 1.078e+02 1.244e+02 7.750e+02, threshold=2.155e+02, percent-clipped=3.0 +2024-09-17 06:17:22,987 INFO [train.py:1198] (0/2) Epoch 10, batch 100, loss[loss=0.2598, ctc_loss=0.1726, cr_loss=0.4062, attn_decoder_loss=0.2605, over 29532.00 frames. ], tot_loss[loss=0.2736, ctc_loss=0.1811, cr_loss=0.4181, attn_decoder_loss=0.2746, over 2253636.68 frames. ], batch size: 76, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:17:29,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=163300.0, ans=0.0 +2024-09-17 06:17:29,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=163300.0, ans=0.125 +2024-09-17 06:18:09,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=163420.0, ans=0.2 +2024-09-17 06:18:09,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=163420.0, ans=0.5 +2024-09-17 06:18:18,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=163420.0, ans=0.125 +2024-09-17 06:18:22,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=163460.0, ans=0.1 +2024-09-17 06:18:22,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=163460.0, ans=0.0 +2024-09-17 06:18:30,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=163460.0, ans=0.2 +2024-09-17 06:18:35,506 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.99 vs. limit=15.0 +2024-09-17 06:18:37,444 INFO [train.py:1198] (0/2) Epoch 10, batch 150, loss[loss=0.2355, ctc_loss=0.1499, cr_loss=0.3779, attn_decoder_loss=0.2366, over 29432.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1777, cr_loss=0.4144, attn_decoder_loss=0.2712, over 3048279.39 frames. ], batch size: 70, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:18:40,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=163500.0, ans=0.125 +2024-09-17 06:19:03,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=163540.0, ans=0.125 +2024-09-17 06:19:04,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=163540.0, ans=0.0 +2024-09-17 06:19:25,266 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 9.231e+01 9.712e+01 1.046e+02 1.496e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 06:19:30,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=163620.0, ans=0.1 +2024-09-17 06:19:39,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=163660.0, ans=0.025 +2024-09-17 06:19:52,318 INFO [train.py:1198] (0/2) Epoch 10, batch 200, loss[loss=0.2928, ctc_loss=0.2045, cr_loss=0.461, attn_decoder_loss=0.2924, over 27488.00 frames. ], tot_loss[loss=0.2687, ctc_loss=0.1762, cr_loss=0.4134, attn_decoder_loss=0.2698, over 3660246.32 frames. ], batch size: 124, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:20:00,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.30 vs. limit=22.5 +2024-09-17 06:20:04,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=163700.0, ans=0.0 +2024-09-17 06:20:05,149 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.97 vs. limit=22.5 +2024-09-17 06:20:20,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=163780.0, ans=0.125 +2024-09-17 06:20:26,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.68 vs. limit=22.5 +2024-09-17 06:20:27,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=163780.0, ans=0.125 +2024-09-17 06:20:33,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=163780.0, ans=0.0 +2024-09-17 06:20:41,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=163820.0, ans=0.0 +2024-09-17 06:20:54,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=163820.0, ans=0.1 +2024-09-17 06:20:57,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=163860.0, ans=0.125 +2024-09-17 06:21:11,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=163900.0, ans=0.125 +2024-09-17 06:21:12,289 INFO [train.py:1198] (0/2) Epoch 10, batch 250, loss[loss=0.2986, ctc_loss=0.208, cr_loss=0.4642, attn_decoder_loss=0.2983, over 29258.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1764, cr_loss=0.4129, attn_decoder_loss=0.2701, over 4142685.48 frames. ], batch size: 100, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:21:26,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=163940.0, ans=0.07 +2024-09-17 06:21:44,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=163980.0, ans=0.125 +2024-09-17 06:21:44,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=163980.0, ans=0.0 +2024-09-17 06:21:49,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=163980.0, ans=0.04949747468305833 +2024-09-17 06:21:50,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=163980.0, ans=0.0 +2024-09-17 06:22:02,587 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.245e+01 9.493e+01 1.020e+02 1.129e+02 1.613e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 06:22:10,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=164020.0, ans=0.025 +2024-09-17 06:22:28,437 INFO [train.py:1198] (0/2) Epoch 10, batch 300, loss[loss=0.2778, ctc_loss=0.183, cr_loss=0.4143, attn_decoder_loss=0.2791, over 29545.00 frames. ], tot_loss[loss=0.2683, ctc_loss=0.1752, cr_loss=0.411, attn_decoder_loss=0.2695, over 4510690.87 frames. ], batch size: 92, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:22:30,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=164100.0, ans=0.125 +2024-09-17 06:22:41,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.34 vs. limit=10.0 +2024-09-17 06:22:53,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=164140.0, ans=0.125 +2024-09-17 06:23:29,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=164260.0, ans=0.0 +2024-09-17 06:23:35,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=164260.0, ans=0.125 +2024-09-17 06:23:35,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=164260.0, ans=0.2 +2024-09-17 06:23:44,702 INFO [train.py:1198] (0/2) Epoch 10, batch 350, loss[loss=0.2364, ctc_loss=0.143, cr_loss=0.3499, attn_decoder_loss=0.2391, over 29332.00 frames. ], tot_loss[loss=0.2683, ctc_loss=0.1751, cr_loss=0.4108, attn_decoder_loss=0.2695, over 4796276.05 frames. ], batch size: 71, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:23:58,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=164340.0, ans=0.125 +2024-09-17 06:24:06,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=164340.0, ans=0.5 +2024-09-17 06:24:24,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=164380.0, ans=0.1 +2024-09-17 06:24:25,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=164380.0, ans=0.2 +2024-09-17 06:24:28,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=164420.0, ans=0.125 +2024-09-17 06:24:34,757 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.056e+01 9.667e+01 1.045e+02 1.260e+02 3.351e+02, threshold=2.090e+02, percent-clipped=2.0 +2024-09-17 06:25:05,711 INFO [train.py:1198] (0/2) Epoch 10, batch 400, loss[loss=0.2789, ctc_loss=0.184, cr_loss=0.4218, attn_decoder_loss=0.28, over 29707.00 frames. ], tot_loss[loss=0.2679, ctc_loss=0.1747, cr_loss=0.4097, attn_decoder_loss=0.2691, over 5026691.05 frames. ], batch size: 82, lr: 1.15e-02, grad_scale: 16.0 +2024-09-17 06:25:30,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.33 vs. limit=22.5 +2024-09-17 06:25:35,354 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.03 vs. limit=15.0 +2024-09-17 06:25:58,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=164620.0, ans=0.125 +2024-09-17 06:26:20,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=164700.0, ans=0.2 +2024-09-17 06:26:21,196 INFO [train.py:1198] (0/2) Epoch 10, batch 450, loss[loss=0.2663, ctc_loss=0.1682, cr_loss=0.4074, attn_decoder_loss=0.2682, over 29693.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.175, cr_loss=0.4094, attn_decoder_loss=0.2694, over 5187681.68 frames. ], batch size: 83, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:26:52,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.33 vs. limit=15.0 +2024-09-17 06:27:13,194 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.450e+01 9.295e+01 1.006e+02 1.063e+02 1.826e+02, threshold=2.013e+02, percent-clipped=0.0 +2024-09-17 06:27:31,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=164860.0, ans=0.125 +2024-09-17 06:27:31,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=164860.0, ans=0.125 +2024-09-17 06:27:36,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.45 vs. limit=15.0 +2024-09-17 06:27:37,052 INFO [train.py:1198] (0/2) Epoch 10, batch 500, loss[loss=0.276, ctc_loss=0.1696, cr_loss=0.4105, attn_decoder_loss=0.2787, over 29439.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1744, cr_loss=0.4088, attn_decoder_loss=0.2687, over 5330404.34 frames. ], batch size: 94, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:27:39,482 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.07 vs. limit=15.0 +2024-09-17 06:28:00,818 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.21 vs. limit=22.5 +2024-09-17 06:28:01,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=164940.0, ans=0.04949747468305833 +2024-09-17 06:28:03,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=164940.0, ans=0.1 +2024-09-17 06:28:15,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=164980.0, ans=0.1 +2024-09-17 06:28:57,306 INFO [train.py:1198] (0/2) Epoch 10, batch 550, loss[loss=0.2833, ctc_loss=0.1884, cr_loss=0.4299, attn_decoder_loss=0.2843, over 28800.00 frames. ], tot_loss[loss=0.2679, ctc_loss=0.1751, cr_loss=0.4105, attn_decoder_loss=0.2691, over 5424215.36 frames. ], batch size: 104, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:29:09,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=165100.0, ans=0.0 +2024-09-17 06:29:20,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=165140.0, ans=0.125 +2024-09-17 06:29:43,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-17 06:29:50,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=165220.0, ans=0.1 +2024-09-17 06:29:51,813 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.131e+01 9.579e+01 1.029e+02 1.127e+02 2.367e+02, threshold=2.058e+02, percent-clipped=2.0 +2024-09-17 06:29:57,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.57 vs. limit=15.0 +2024-09-17 06:29:59,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=165260.0, ans=0.125 +2024-09-17 06:30:00,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.41 vs. limit=15.0 +2024-09-17 06:30:13,110 INFO [train.py:1198] (0/2) Epoch 10, batch 600, loss[loss=0.2803, ctc_loss=0.1828, cr_loss=0.4353, attn_decoder_loss=0.2814, over 29246.00 frames. ], tot_loss[loss=0.2685, ctc_loss=0.1754, cr_loss=0.4114, attn_decoder_loss=0.2697, over 5510033.08 frames. ], batch size: 100, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:30:13,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=165300.0, ans=0.0 +2024-09-17 06:30:29,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=165340.0, ans=0.125 +2024-09-17 06:30:38,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-17 06:30:39,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=165340.0, ans=0.2 +2024-09-17 06:31:03,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=1.99 vs. limit=15.0 +2024-09-17 06:31:06,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.51 vs. limit=15.0 +2024-09-17 06:31:09,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=165420.0, ans=0.125 +2024-09-17 06:31:19,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=165460.0, ans=0.0 +2024-09-17 06:31:23,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=165460.0, ans=0.0 +2024-09-17 06:31:27,698 INFO [train.py:1198] (0/2) Epoch 10, batch 650, loss[loss=0.2651, ctc_loss=0.17, cr_loss=0.4241, attn_decoder_loss=0.2662, over 29751.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.174, cr_loss=0.4102, attn_decoder_loss=0.2686, over 5586982.39 frames. ], batch size: 81, lr: 1.14e-02, grad_scale: 4.0 +2024-09-17 06:31:28,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.32 vs. limit=15.0 +2024-09-17 06:31:56,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=165580.0, ans=0.09899494936611666 +2024-09-17 06:31:58,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=165580.0, ans=0.125 +2024-09-17 06:32:12,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=165620.0, ans=0.1 +2024-09-17 06:32:12,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.08 vs. limit=15.0 +2024-09-17 06:32:23,906 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.175e+01 9.258e+01 9.852e+01 1.047e+02 1.585e+02, threshold=1.970e+02, percent-clipped=0.0 +2024-09-17 06:32:35,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=165660.0, ans=0.125 +2024-09-17 06:32:45,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=165660.0, ans=0.125 +2024-09-17 06:32:47,996 INFO [train.py:1198] (0/2) Epoch 10, batch 700, loss[loss=0.2625, ctc_loss=0.1695, cr_loss=0.402, attn_decoder_loss=0.264, over 29539.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1741, cr_loss=0.4108, attn_decoder_loss=0.2688, over 5638100.27 frames. ], batch size: 76, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:33:02,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.08 vs. limit=15.0 +2024-09-17 06:33:16,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=165780.0, ans=0.125 +2024-09-17 06:33:18,348 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=2.502e-03 +2024-09-17 06:33:19,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=165780.0, ans=0.2 +2024-09-17 06:33:34,818 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:33:41,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.53 vs. limit=15.0 +2024-09-17 06:33:47,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=165860.0, ans=0.125 +2024-09-17 06:33:47,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.31 vs. limit=15.0 +2024-09-17 06:33:53,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=165860.0, ans=0.125 +2024-09-17 06:34:03,341 INFO [train.py:1198] (0/2) Epoch 10, batch 750, loss[loss=0.2693, ctc_loss=0.1734, cr_loss=0.4356, attn_decoder_loss=0.2703, over 29712.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1733, cr_loss=0.4089, attn_decoder_loss=0.268, over 5676643.21 frames. ], batch size: 82, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:34:15,961 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.20 vs. limit=15.0 +2024-09-17 06:34:23,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=165940.0, ans=0.0 +2024-09-17 06:34:34,559 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.30 vs. limit=6.0 +2024-09-17 06:34:39,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=165980.0, ans=0.125 +2024-09-17 06:34:58,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=166020.0, ans=0.125 +2024-09-17 06:35:00,790 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 9.819e+01 1.062e+02 1.153e+02 3.541e+02, threshold=2.124e+02, percent-clipped=2.0 +2024-09-17 06:35:05,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=166060.0, ans=0.0 +2024-09-17 06:35:11,660 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:35:18,940 INFO [train.py:1198] (0/2) Epoch 10, batch 800, loss[loss=0.2377, ctc_loss=0.1374, cr_loss=0.3501, attn_decoder_loss=0.241, over 29611.00 frames. ], tot_loss[loss=0.2668, ctc_loss=0.1734, cr_loss=0.4093, attn_decoder_loss=0.2681, over 5708001.17 frames. ], batch size: 73, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:35:36,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.61 vs. limit=15.0 +2024-09-17 06:35:37,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=166140.0, ans=0.0 +2024-09-17 06:36:01,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=166180.0, ans=0.025 +2024-09-17 06:36:11,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=166220.0, ans=0.125 +2024-09-17 06:36:36,139 INFO [train.py:1198] (0/2) Epoch 10, batch 850, loss[loss=0.2763, ctc_loss=0.1737, cr_loss=0.3967, attn_decoder_loss=0.2789, over 29710.00 frames. ], tot_loss[loss=0.2666, ctc_loss=0.1734, cr_loss=0.4089, attn_decoder_loss=0.2679, over 5735597.04 frames. ], batch size: 89, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:36:59,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=166340.0, ans=0.2 +2024-09-17 06:37:02,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=166340.0, ans=0.125 +2024-09-17 06:37:25,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=166420.0, ans=0.95 +2024-09-17 06:37:36,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=166420.0, ans=0.125 +2024-09-17 06:37:37,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.461e+01 9.790e+01 1.072e+02 1.196e+02 1.464e+02, threshold=2.145e+02, percent-clipped=0.0 +2024-09-17 06:37:48,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=166460.0, ans=0.0 +2024-09-17 06:37:54,165 INFO [train.py:1198] (0/2) Epoch 10, batch 900, loss[loss=0.2393, ctc_loss=0.1511, cr_loss=0.3744, attn_decoder_loss=0.2408, over 29616.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1739, cr_loss=0.4093, attn_decoder_loss=0.2682, over 5741004.98 frames. ], batch size: 73, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:38:26,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=166580.0, ans=0.0 +2024-09-17 06:38:32,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-17 06:38:41,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=166620.0, ans=0.125 +2024-09-17 06:38:52,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=166620.0, ans=22.5 +2024-09-17 06:38:58,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.46 vs. limit=15.0 +2024-09-17 06:39:09,487 INFO [train.py:1198] (0/2) Epoch 10, batch 950, loss[loss=0.249, ctc_loss=0.1544, cr_loss=0.389, attn_decoder_loss=0.2509, over 29514.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1741, cr_loss=0.409, attn_decoder_loss=0.2681, over 5744250.11 frames. ], batch size: 74, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:39:18,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=166700.0, ans=0.125 +2024-09-17 06:39:29,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=166740.0, ans=0.2 +2024-09-17 06:39:47,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=166780.0, ans=0.125 +2024-09-17 06:39:50,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=166780.0, ans=0.125 +2024-09-17 06:39:58,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=166820.0, ans=0.0 +2024-09-17 06:40:00,037 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.08 vs. limit=15.0 +2024-09-17 06:40:05,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=166820.0, ans=0.125 +2024-09-17 06:40:09,834 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.298e+01 9.770e+01 1.085e+02 1.240e+02 2.634e+02, threshold=2.170e+02, percent-clipped=2.0 +2024-09-17 06:40:16,039 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.54 vs. limit=15.0 +2024-09-17 06:40:26,957 INFO [train.py:1198] (0/2) Epoch 10, batch 1000, loss[loss=0.2684, ctc_loss=0.1733, cr_loss=0.3976, attn_decoder_loss=0.2702, over 29530.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.1758, cr_loss=0.4109, attn_decoder_loss=0.2693, over 5737324.30 frames. ], batch size: 77, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:40:28,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=166900.0, ans=0.04949747468305833 +2024-09-17 06:40:35,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=166900.0, ans=0.2 +2024-09-17 06:40:38,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=166900.0, ans=0.125 +2024-09-17 06:41:01,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=166980.0, ans=0.125 +2024-09-17 06:41:13,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=167020.0, ans=0.0 +2024-09-17 06:41:33,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.37 vs. limit=15.0 +2024-09-17 06:41:39,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=167060.0, ans=0.04949747468305833 +2024-09-17 06:41:44,652 INFO [train.py:1198] (0/2) Epoch 10, batch 1050, loss[loss=0.2791, ctc_loss=0.1852, cr_loss=0.4329, attn_decoder_loss=0.2799, over 29683.00 frames. ], tot_loss[loss=0.2671, ctc_loss=0.1746, cr_loss=0.4095, attn_decoder_loss=0.2683, over 5746408.54 frames. ], batch size: 85, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:42:03,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=167140.0, ans=0.125 +2024-09-17 06:42:05,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=167140.0, ans=0.015 +2024-09-17 06:42:26,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=167180.0, ans=0.1 +2024-09-17 06:42:45,691 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.318e+01 9.401e+01 9.855e+01 1.069e+02 2.033e+02, threshold=1.971e+02, percent-clipped=0.0 +2024-09-17 06:42:46,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.90 vs. limit=15.0 +2024-09-17 06:43:00,925 INFO [train.py:1198] (0/2) Epoch 10, batch 1100, loss[loss=0.2583, ctc_loss=0.1669, cr_loss=0.4136, attn_decoder_loss=0.2593, over 29433.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1743, cr_loss=0.409, attn_decoder_loss=0.2679, over 5757530.38 frames. ], batch size: 78, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:43:04,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.99 vs. limit=15.0 +2024-09-17 06:43:06,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.10 vs. limit=22.5 +2024-09-17 06:43:10,239 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:43:10,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=167300.0, ans=0.1 +2024-09-17 06:43:11,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=167300.0, ans=0.2 +2024-09-17 06:43:25,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=167340.0, ans=0.1 +2024-09-17 06:43:37,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=167380.0, ans=0.125 +2024-09-17 06:43:43,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=167380.0, ans=0.0 +2024-09-17 06:43:47,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=167420.0, ans=0.125 +2024-09-17 06:44:05,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=167460.0, ans=0.0 +2024-09-17 06:44:06,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=167460.0, ans=0.025 +2024-09-17 06:44:18,500 INFO [train.py:1198] (0/2) Epoch 10, batch 1150, loss[loss=0.2651, ctc_loss=0.1718, cr_loss=0.3948, attn_decoder_loss=0.2667, over 29455.00 frames. ], tot_loss[loss=0.2668, ctc_loss=0.1744, cr_loss=0.4096, attn_decoder_loss=0.268, over 5755220.74 frames. ], batch size: 78, lr: 1.14e-02, grad_scale: 4.0 +2024-09-17 06:44:19,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=14.04 vs. limit=15.0 +2024-09-17 06:44:34,848 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:44:37,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=167540.0, ans=0.0 +2024-09-17 06:44:50,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=167580.0, ans=0.1 +2024-09-17 06:45:22,896 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.006e+01 9.612e+01 1.039e+02 1.179e+02 2.688e+02, threshold=2.078e+02, percent-clipped=2.0 +2024-09-17 06:45:26,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=167660.0, ans=0.125 +2024-09-17 06:45:36,415 INFO [train.py:1198] (0/2) Epoch 10, batch 1200, loss[loss=0.2656, ctc_loss=0.1596, cr_loss=0.4013, attn_decoder_loss=0.2684, over 29671.00 frames. ], tot_loss[loss=0.2679, ctc_loss=0.1752, cr_loss=0.4107, attn_decoder_loss=0.269, over 5749426.87 frames. ], batch size: 85, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:45:42,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=167700.0, ans=0.2 +2024-09-17 06:45:43,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-17 06:46:51,948 INFO [train.py:1198] (0/2) Epoch 10, batch 1250, loss[loss=0.284, ctc_loss=0.1814, cr_loss=0.4177, attn_decoder_loss=0.2861, over 29540.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.1751, cr_loss=0.411, attn_decoder_loss=0.2693, over 5776377.48 frames. ], batch size: 92, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:46:57,374 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.89 vs. limit=15.0 +2024-09-17 06:46:59,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=167900.0, ans=0.125 +2024-09-17 06:47:07,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=167940.0, ans=0.0 +2024-09-17 06:47:13,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=167940.0, ans=0.1 +2024-09-17 06:47:19,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=167940.0, ans=0.025 +2024-09-17 06:47:26,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=167980.0, ans=0.125 +2024-09-17 06:47:28,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=167980.0, ans=0.125 +2024-09-17 06:47:49,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=168020.0, ans=0.1 +2024-09-17 06:47:51,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=168060.0, ans=0.95 +2024-09-17 06:47:56,413 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.293e+01 9.363e+01 1.028e+02 1.124e+02 2.251e+02, threshold=2.057e+02, percent-clipped=1.0 +2024-09-17 06:47:59,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=168060.0, ans=0.125 +2024-09-17 06:48:02,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=168060.0, ans=0.125 +2024-09-17 06:48:06,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.25 vs. limit=15.0 +2024-09-17 06:48:08,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=168100.0, ans=0.125 +2024-09-17 06:48:09,973 INFO [train.py:1198] (0/2) Epoch 10, batch 1300, loss[loss=0.279, ctc_loss=0.182, cr_loss=0.4311, attn_decoder_loss=0.2802, over 28201.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1742, cr_loss=0.4102, attn_decoder_loss=0.2686, over 5779979.19 frames. ], batch size: 111, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:48:22,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=168100.0, ans=0.125 +2024-09-17 06:48:30,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.05 vs. limit=22.5 +2024-09-17 06:48:57,127 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.92 vs. limit=12.0 +2024-09-17 06:49:11,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=168260.0, ans=0.015 +2024-09-17 06:49:20,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=168260.0, ans=0.125 +2024-09-17 06:49:27,980 INFO [train.py:1198] (0/2) Epoch 10, batch 1350, loss[loss=0.2631, ctc_loss=0.1664, cr_loss=0.3978, attn_decoder_loss=0.265, over 29751.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1738, cr_loss=0.4097, attn_decoder_loss=0.2683, over 5796748.01 frames. ], batch size: 81, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:49:44,710 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:50:21,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=168420.0, ans=0.1 +2024-09-17 06:50:29,347 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.801e+01 9.605e+01 1.036e+02 1.132e+02 1.597e+02, threshold=2.072e+02, percent-clipped=0.0 +2024-09-17 06:50:38,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=168460.0, ans=0.125 +2024-09-17 06:50:42,763 INFO [train.py:1198] (0/2) Epoch 10, batch 1400, loss[loss=0.2296, ctc_loss=0.147, cr_loss=0.3693, attn_decoder_loss=0.2306, over 29541.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.1732, cr_loss=0.4091, attn_decoder_loss=0.2677, over 5807534.98 frames. ], batch size: 69, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:50:52,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=168500.0, ans=0.125 +2024-09-17 06:51:20,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=168580.0, ans=0.125 +2024-09-17 06:51:34,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=168620.0, ans=0.0 +2024-09-17 06:51:40,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=168620.0, ans=0.125 +2024-09-17 06:51:51,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=168660.0, ans=0.125 +2024-09-17 06:52:00,286 INFO [train.py:1198] (0/2) Epoch 10, batch 1450, loss[loss=0.2806, ctc_loss=0.1788, cr_loss=0.4122, attn_decoder_loss=0.2828, over 29427.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1732, cr_loss=0.4087, attn_decoder_loss=0.268, over 5803468.49 frames. ], batch size: 94, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:52:00,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=168700.0, ans=0.2 +2024-09-17 06:52:02,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.62 vs. limit=22.5 +2024-09-17 06:52:18,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=168740.0, ans=0.125 +2024-09-17 06:52:32,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=168780.0, ans=0.125 +2024-09-17 06:52:48,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=168820.0, ans=0.125 +2024-09-17 06:52:51,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.88 vs. limit=10.0 +2024-09-17 06:52:55,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=168820.0, ans=0.2 +2024-09-17 06:52:58,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=168820.0, ans=0.0 +2024-09-17 06:53:00,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=168820.0, ans=0.125 +2024-09-17 06:53:06,096 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.081e+01 9.586e+01 1.053e+02 1.129e+02 3.740e+02, threshold=2.106e+02, percent-clipped=3.0 +2024-09-17 06:53:16,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.89 vs. limit=22.5 +2024-09-17 06:53:18,307 INFO [train.py:1198] (0/2) Epoch 10, batch 1500, loss[loss=0.2793, ctc_loss=0.18, cr_loss=0.4193, attn_decoder_loss=0.281, over 29642.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1736, cr_loss=0.4098, attn_decoder_loss=0.2687, over 5804705.95 frames. ], batch size: 86, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:53:33,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=168940.0, ans=0.1 +2024-09-17 06:53:39,002 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.66 vs. limit=6.0 +2024-09-17 06:53:43,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.07 vs. limit=6.0 +2024-09-17 06:53:45,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=168940.0, ans=0.125 +2024-09-17 06:53:50,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=168980.0, ans=0.125 +2024-09-17 06:53:56,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=168980.0, ans=0.1 +2024-09-17 06:53:58,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=168980.0, ans=0.2 +2024-09-17 06:54:34,392 INFO [train.py:1198] (0/2) Epoch 10, batch 1550, loss[loss=0.2826, ctc_loss=0.1902, cr_loss=0.4478, attn_decoder_loss=0.2829, over 29522.00 frames. ], tot_loss[loss=0.268, ctc_loss=0.1749, cr_loss=0.411, attn_decoder_loss=0.2692, over 5780615.61 frames. ], batch size: 90, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 06:54:45,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=169100.0, ans=0.1 +2024-09-17 06:54:56,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.24 vs. limit=22.5 +2024-09-17 06:55:19,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=169220.0, ans=0.035 +2024-09-17 06:55:22,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=169220.0, ans=0.09899494936611666 +2024-09-17 06:55:39,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.97 vs. limit=12.0 +2024-09-17 06:55:41,101 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.001e+01 9.536e+01 1.067e+02 1.173e+02 2.612e+02, threshold=2.133e+02, percent-clipped=1.0 +2024-09-17 06:55:44,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=169260.0, ans=0.2 +2024-09-17 06:55:51,706 INFO [train.py:1198] (0/2) Epoch 10, batch 1600, loss[loss=0.2759, ctc_loss=0.1814, cr_loss=0.4237, attn_decoder_loss=0.2769, over 29680.00 frames. ], tot_loss[loss=0.268, ctc_loss=0.1752, cr_loss=0.4109, attn_decoder_loss=0.2691, over 5763623.98 frames. ], batch size: 85, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:55:55,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=169300.0, ans=0.125 +2024-09-17 06:56:03,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.02 vs. limit=22.5 +2024-09-17 06:56:04,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=169300.0, ans=0.0 +2024-09-17 06:56:05,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=169340.0, ans=0.025 +2024-09-17 06:56:13,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.25 vs. limit=10.0 +2024-09-17 06:56:16,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=169340.0, ans=0.04949747468305833 +2024-09-17 06:57:09,454 INFO [train.py:1198] (0/2) Epoch 10, batch 1650, loss[loss=0.2772, ctc_loss=0.1736, cr_loss=0.4211, attn_decoder_loss=0.2793, over 29695.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1745, cr_loss=0.4109, attn_decoder_loss=0.2687, over 5758589.64 frames. ], batch size: 89, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:57:14,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=169500.0, ans=0.025 +2024-09-17 06:57:36,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=169540.0, ans=0.1 +2024-09-17 06:57:44,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=169580.0, ans=0.04949747468305833 +2024-09-17 06:57:49,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=169580.0, ans=0.125 +2024-09-17 06:57:52,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=169580.0, ans=0.0 +2024-09-17 06:58:14,708 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.189e+01 9.368e+01 9.821e+01 1.048e+02 1.434e+02, threshold=1.964e+02, percent-clipped=0.0 +2024-09-17 06:58:21,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.52 vs. limit=15.0 +2024-09-17 06:58:25,120 INFO [train.py:1198] (0/2) Epoch 10, batch 1700, loss[loss=0.23, ctc_loss=0.143, cr_loss=0.3621, attn_decoder_loss=0.2316, over 29551.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1738, cr_loss=0.4105, attn_decoder_loss=0.2682, over 5779936.06 frames. ], batch size: 69, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:58:48,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=169740.0, ans=0.125 +2024-09-17 06:58:49,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-17 06:58:50,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.10 vs. limit=10.0 +2024-09-17 06:59:05,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-17 06:59:15,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=169820.0, ans=0.0 +2024-09-17 06:59:18,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=169820.0, ans=0.125 +2024-09-17 06:59:24,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=169860.0, ans=0.1 +2024-09-17 06:59:31,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.09 vs. limit=22.5 +2024-09-17 06:59:42,516 INFO [train.py:1198] (0/2) Epoch 10, batch 1750, loss[loss=0.2365, ctc_loss=0.143, cr_loss=0.3506, attn_decoder_loss=0.2391, over 29353.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.1732, cr_loss=0.4091, attn_decoder_loss=0.2678, over 5788056.87 frames. ], batch size: 67, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 06:59:56,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=169940.0, ans=0.125 +2024-09-17 07:00:03,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=169940.0, ans=0.0 +2024-09-17 07:00:10,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=169940.0, ans=0.125 +2024-09-17 07:00:40,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=170020.0, ans=0.125 +2024-09-17 07:00:43,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=170060.0, ans=0.0 +2024-09-17 07:00:51,161 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.948e+01 9.325e+01 9.999e+01 1.093e+02 1.950e+02, threshold=2.000e+02, percent-clipped=0.0 +2024-09-17 07:00:59,701 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.17 vs. limit=15.0 +2024-09-17 07:01:00,145 INFO [train.py:1198] (0/2) Epoch 10, batch 1800, loss[loss=0.285, ctc_loss=0.189, cr_loss=0.4399, attn_decoder_loss=0.2859, over 29694.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.173, cr_loss=0.4091, attn_decoder_loss=0.2678, over 5790303.28 frames. ], batch size: 83, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:01:14,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=170140.0, ans=0.0 +2024-09-17 07:01:21,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=170140.0, ans=0.2 +2024-09-17 07:01:51,061 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.75 vs. limit=22.5 +2024-09-17 07:02:15,924 INFO [train.py:1198] (0/2) Epoch 10, batch 1850, loss[loss=0.2777, ctc_loss=0.184, cr_loss=0.4301, attn_decoder_loss=0.2786, over 29647.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.173, cr_loss=0.4095, attn_decoder_loss=0.2677, over 5797165.17 frames. ], batch size: 86, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 07:02:19,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=170300.0, ans=0.125 +2024-09-17 07:02:34,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=170340.0, ans=0.1 +2024-09-17 07:02:55,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=170380.0, ans=0.125 +2024-09-17 07:03:03,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=170420.0, ans=0.0 +2024-09-17 07:03:03,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=170420.0, ans=0.0 +2024-09-17 07:03:10,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=170420.0, ans=0.125 +2024-09-17 07:03:11,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.20 vs. limit=15.0 +2024-09-17 07:03:18,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.29 vs. limit=15.0 +2024-09-17 07:03:26,159 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.871e+01 9.382e+01 1.051e+02 1.159e+02 3.606e+02, threshold=2.101e+02, percent-clipped=3.0 +2024-09-17 07:03:33,540 INFO [train.py:1198] (0/2) Epoch 10, batch 1900, loss[loss=0.2805, ctc_loss=0.174, cr_loss=0.4282, attn_decoder_loss=0.2828, over 29684.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1734, cr_loss=0.4097, attn_decoder_loss=0.2685, over 5805251.83 frames. ], batch size: 89, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:03:41,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=170500.0, ans=0.125 +2024-09-17 07:03:45,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=170500.0, ans=0.125 +2024-09-17 07:04:15,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=170580.0, ans=0.0 +2024-09-17 07:04:15,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=170580.0, ans=0.0 +2024-09-17 07:04:15,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.15 vs. limit=6.0 +2024-09-17 07:04:38,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=170660.0, ans=0.125 +2024-09-17 07:04:41,386 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:04:42,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=170660.0, ans=0.125 +2024-09-17 07:04:51,547 INFO [train.py:1198] (0/2) Epoch 10, batch 1950, loss[loss=0.273, ctc_loss=0.1781, cr_loss=0.4351, attn_decoder_loss=0.2739, over 29456.00 frames. ], tot_loss[loss=0.2683, ctc_loss=0.174, cr_loss=0.4115, attn_decoder_loss=0.2696, over 5819929.42 frames. ], batch size: 78, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 07:04:59,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=170700.0, ans=0.125 +2024-09-17 07:05:09,351 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.91 vs. limit=12.0 +2024-09-17 07:05:34,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=170780.0, ans=0.1 +2024-09-17 07:05:38,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=170820.0, ans=0.025 +2024-09-17 07:05:44,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=170820.0, ans=0.95 +2024-09-17 07:06:00,806 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.428e+01 1.003e+02 1.077e+02 1.161e+02 3.833e+02, threshold=2.155e+02, percent-clipped=2.0 +2024-09-17 07:06:04,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=170860.0, ans=0.2 +2024-09-17 07:06:06,876 INFO [train.py:1198] (0/2) Epoch 10, batch 2000, loss[loss=0.2585, ctc_loss=0.1794, cr_loss=0.4195, attn_decoder_loss=0.2579, over 29377.00 frames. ], tot_loss[loss=0.2689, ctc_loss=0.1751, cr_loss=0.4127, attn_decoder_loss=0.2702, over 5797252.74 frames. ], batch size: 67, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:06:17,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=170900.0, ans=0.0 +2024-09-17 07:06:28,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=170940.0, ans=0.0 +2024-09-17 07:06:30,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=170940.0, ans=22.5 +2024-09-17 07:06:31,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=170940.0, ans=0.125 +2024-09-17 07:06:32,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=170940.0, ans=0.125 +2024-09-17 07:07:14,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=171060.0, ans=0.1 +2024-09-17 07:07:24,546 INFO [train.py:1198] (0/2) Epoch 10, batch 2050, loss[loss=0.2514, ctc_loss=0.1674, cr_loss=0.4111, attn_decoder_loss=0.2516, over 29458.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.1746, cr_loss=0.4116, attn_decoder_loss=0.2694, over 5788791.56 frames. ], batch size: 70, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:07:30,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=171100.0, ans=10.0 +2024-09-17 07:07:39,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=171140.0, ans=0.125 +2024-09-17 07:07:39,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=171140.0, ans=0.2 +2024-09-17 07:07:46,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.23 vs. limit=15.0 +2024-09-17 07:07:49,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.18 vs. limit=12.0 +2024-09-17 07:07:56,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=171180.0, ans=0.125 +2024-09-17 07:08:04,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=171180.0, ans=0.2 +2024-09-17 07:08:05,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=171180.0, ans=0.0 +2024-09-17 07:08:24,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=171220.0, ans=0.125 +2024-09-17 07:08:37,667 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.516e+01 1.015e+02 1.092e+02 1.956e+02, threshold=2.031e+02, percent-clipped=0.0 +2024-09-17 07:08:42,365 INFO [train.py:1198] (0/2) Epoch 10, batch 2100, loss[loss=0.2579, ctc_loss=0.1584, cr_loss=0.3974, attn_decoder_loss=0.2601, over 29760.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1736, cr_loss=0.4109, attn_decoder_loss=0.2687, over 5801160.98 frames. ], batch size: 81, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:09:03,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=171340.0, ans=0.5 +2024-09-17 07:09:08,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=171340.0, ans=0.125 +2024-09-17 07:09:09,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=171340.0, ans=0.2 +2024-09-17 07:09:19,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=171380.0, ans=0.0 +2024-09-17 07:09:29,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=171420.0, ans=0.0 +2024-09-17 07:09:31,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.71 vs. limit=15.0 +2024-09-17 07:09:32,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=171420.0, ans=0.0 +2024-09-17 07:09:57,457 INFO [train.py:1198] (0/2) Epoch 10, batch 2150, loss[loss=0.2641, ctc_loss=0.1767, cr_loss=0.4131, attn_decoder_loss=0.2646, over 29454.00 frames. ], tot_loss[loss=0.2668, ctc_loss=0.1731, cr_loss=0.4098, attn_decoder_loss=0.2681, over 5815222.25 frames. ], batch size: 78, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:10:02,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.78 vs. limit=15.0 +2024-09-17 07:10:05,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=171500.0, ans=0.125 +2024-09-17 07:10:05,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=171500.0, ans=0.0 +2024-09-17 07:10:06,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=171500.0, ans=0.0 +2024-09-17 07:10:08,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=171500.0, ans=0.125 +2024-09-17 07:10:13,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.59 vs. limit=12.0 +2024-09-17 07:10:50,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=171620.0, ans=0.2 +2024-09-17 07:10:52,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=171620.0, ans=0.125 +2024-09-17 07:11:11,933 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.141e+01 9.514e+01 1.011e+02 1.070e+02 3.193e+02, threshold=2.022e+02, percent-clipped=1.0 +2024-09-17 07:11:15,096 INFO [train.py:1198] (0/2) Epoch 10, batch 2200, loss[loss=0.2639, ctc_loss=0.1622, cr_loss=0.3928, attn_decoder_loss=0.2665, over 29626.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1732, cr_loss=0.409, attn_decoder_loss=0.2682, over 5811576.17 frames. ], batch size: 86, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:11:39,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=171740.0, ans=0.05 +2024-09-17 07:11:43,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=171780.0, ans=0.125 +2024-09-17 07:11:51,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=171780.0, ans=0.07 +2024-09-17 07:11:55,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.46 vs. limit=22.5 +2024-09-17 07:11:59,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.85 vs. limit=15.0 +2024-09-17 07:12:00,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=171820.0, ans=0.0 +2024-09-17 07:12:19,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=171860.0, ans=0.125 +2024-09-17 07:12:32,669 INFO [train.py:1198] (0/2) Epoch 10, batch 2250, loss[loss=0.2703, ctc_loss=0.1673, cr_loss=0.4118, attn_decoder_loss=0.2726, over 29703.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1728, cr_loss=0.4086, attn_decoder_loss=0.2683, over 5810293.80 frames. ], batch size: 82, lr: 1.12e-02, grad_scale: 4.0 +2024-09-17 07:12:45,733 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.46 vs. limit=10.0 +2024-09-17 07:13:00,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.74 vs. limit=6.0 +2024-09-17 07:13:04,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=171980.0, ans=0.0 +2024-09-17 07:13:06,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=171980.0, ans=0.125 +2024-09-17 07:13:12,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=171980.0, ans=0.09899494936611666 +2024-09-17 07:13:26,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=172020.0, ans=10.0 +2024-09-17 07:13:38,973 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.95 vs. limit=15.0 +2024-09-17 07:13:42,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=172060.0, ans=0.0 +2024-09-17 07:13:46,983 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.071e+01 9.786e+01 1.069e+02 1.181e+02 2.871e+02, threshold=2.139e+02, percent-clipped=1.0 +2024-09-17 07:13:48,478 INFO [train.py:1198] (0/2) Epoch 10, batch 2300, loss[loss=0.2368, ctc_loss=0.1357, cr_loss=0.3582, attn_decoder_loss=0.2401, over 29305.00 frames. ], tot_loss[loss=0.2661, ctc_loss=0.1722, cr_loss=0.4075, attn_decoder_loss=0.2674, over 5797021.43 frames. ], batch size: 71, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:13:55,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.99 vs. limit=12.0 +2024-09-17 07:14:29,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=172180.0, ans=0.125 +2024-09-17 07:14:44,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=172220.0, ans=0.0 +2024-09-17 07:15:05,736 INFO [train.py:1198] (0/2) Epoch 10, batch 2350, loss[loss=0.2705, ctc_loss=0.1757, cr_loss=0.4174, attn_decoder_loss=0.2718, over 29692.00 frames. ], tot_loss[loss=0.2661, ctc_loss=0.172, cr_loss=0.4078, attn_decoder_loss=0.2674, over 5803581.47 frames. ], batch size: 83, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:15:14,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=172300.0, ans=0.2 +2024-09-17 07:15:36,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.59 vs. limit=22.5 +2024-09-17 07:16:00,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=172420.0, ans=0.125 +2024-09-17 07:16:06,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.76 vs. limit=15.0 +2024-09-17 07:16:10,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=172460.0, ans=0.125 +2024-09-17 07:16:21,797 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.951e+01 9.247e+01 1.012e+02 1.078e+02 1.616e+02, threshold=2.023e+02, percent-clipped=0.0 +2024-09-17 07:16:23,396 INFO [train.py:1198] (0/2) Epoch 10, batch 2400, loss[loss=0.2544, ctc_loss=0.1613, cr_loss=0.4175, attn_decoder_loss=0.2555, over 29543.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.1722, cr_loss=0.4084, attn_decoder_loss=0.2678, over 5808037.13 frames. ], batch size: 76, lr: 1.12e-02, grad_scale: 16.0 +2024-09-17 07:16:37,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=172540.0, ans=0.125 +2024-09-17 07:16:54,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=172580.0, ans=0.2 +2024-09-17 07:17:10,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=172620.0, ans=0.05 +2024-09-17 07:17:13,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=172620.0, ans=0.025 +2024-09-17 07:17:21,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=172620.0, ans=0.025 +2024-09-17 07:17:36,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=172660.0, ans=0.125 +2024-09-17 07:17:38,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.66 vs. limit=15.0 +2024-09-17 07:17:39,188 INFO [train.py:1198] (0/2) Epoch 10, batch 2450, loss[loss=0.2709, ctc_loss=0.1764, cr_loss=0.4125, attn_decoder_loss=0.2722, over 29685.00 frames. ], tot_loss[loss=0.2676, ctc_loss=0.1735, cr_loss=0.4101, attn_decoder_loss=0.269, over 5783643.26 frames. ], batch size: 82, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:17:50,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=172700.0, ans=0.125 +2024-09-17 07:18:11,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=172780.0, ans=0.125 +2024-09-17 07:18:17,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=172780.0, ans=0.2 +2024-09-17 07:18:39,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=172860.0, ans=0.2 +2024-09-17 07:18:53,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=172860.0, ans=0.125 +2024-09-17 07:18:57,667 INFO [train.py:1198] (0/2) Epoch 10, batch 2500, loss[loss=0.2782, ctc_loss=0.18, cr_loss=0.4443, attn_decoder_loss=0.2793, over 29613.00 frames. ], tot_loss[loss=0.2676, ctc_loss=0.1735, cr_loss=0.4111, attn_decoder_loss=0.2689, over 5794131.21 frames. ], batch size: 86, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:18:59,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.164e+01 9.501e+01 9.966e+01 1.113e+02 2.388e+02, threshold=1.993e+02, percent-clipped=1.0 +2024-09-17 07:19:03,027 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.84 vs. limit=15.0 +2024-09-17 07:19:04,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=172900.0, ans=0.125 +2024-09-17 07:19:25,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=172940.0, ans=0.125 +2024-09-17 07:19:36,231 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.03 vs. limit=15.0 +2024-09-17 07:19:52,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=173020.0, ans=0.1 +2024-09-17 07:20:15,466 INFO [train.py:1198] (0/2) Epoch 10, batch 2550, loss[loss=0.2319, ctc_loss=0.1475, cr_loss=0.3615, attn_decoder_loss=0.2332, over 29388.00 frames. ], tot_loss[loss=0.2676, ctc_loss=0.1734, cr_loss=0.4106, attn_decoder_loss=0.2689, over 5799143.15 frames. ], batch size: 67, lr: 1.12e-02, grad_scale: 4.0 +2024-09-17 07:20:17,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=173100.0, ans=0.125 +2024-09-17 07:20:41,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=173140.0, ans=0.025 +2024-09-17 07:20:47,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=173180.0, ans=0.125 +2024-09-17 07:21:14,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=173260.0, ans=0.125 +2024-09-17 07:21:19,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=173260.0, ans=0.125 +2024-09-17 07:21:30,618 INFO [train.py:1198] (0/2) Epoch 10, batch 2600, loss[loss=0.256, ctc_loss=0.1642, cr_loss=0.4094, attn_decoder_loss=0.2571, over 29431.00 frames. ], tot_loss[loss=0.2677, ctc_loss=0.1733, cr_loss=0.4101, attn_decoder_loss=0.2691, over 5794678.32 frames. ], batch size: 78, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:21:33,522 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.476e+01 9.594e+01 1.032e+02 1.139e+02 3.672e+02, threshold=2.065e+02, percent-clipped=4.0 +2024-09-17 07:21:36,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.33 vs. limit=15.0 +2024-09-17 07:21:39,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=173300.0, ans=0.0 +2024-09-17 07:21:48,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=173340.0, ans=0.0 +2024-09-17 07:21:50,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=173340.0, ans=0.5 +2024-09-17 07:22:18,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=173420.0, ans=0.125 +2024-09-17 07:22:33,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=173460.0, ans=0.2 +2024-09-17 07:22:47,840 INFO [train.py:1198] (0/2) Epoch 10, batch 2650, loss[loss=0.2768, ctc_loss=0.1885, cr_loss=0.4342, attn_decoder_loss=0.277, over 29312.00 frames. ], tot_loss[loss=0.2678, ctc_loss=0.1737, cr_loss=0.4109, attn_decoder_loss=0.2692, over 5800667.47 frames. ], batch size: 100, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:22:58,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=173500.0, ans=0.125 +2024-09-17 07:23:03,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=173540.0, ans=0.0 +2024-09-17 07:23:40,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=173620.0, ans=0.0 +2024-09-17 07:24:03,239 INFO [train.py:1198] (0/2) Epoch 10, batch 2700, loss[loss=0.2817, ctc_loss=0.184, cr_loss=0.4259, attn_decoder_loss=0.2831, over 29513.00 frames. ], tot_loss[loss=0.2679, ctc_loss=0.1734, cr_loss=0.4108, attn_decoder_loss=0.2692, over 5796376.80 frames. ], batch size: 87, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:24:08,381 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.951e+01 9.630e+01 1.023e+02 1.091e+02 1.557e+02, threshold=2.045e+02, percent-clipped=0.0 +2024-09-17 07:24:31,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=173740.0, ans=0.1 +2024-09-17 07:24:34,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=173780.0, ans=0.0 +2024-09-17 07:25:15,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=173860.0, ans=0.04949747468305833 +2024-09-17 07:25:17,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=173860.0, ans=0.1 +2024-09-17 07:25:21,533 INFO [train.py:1198] (0/2) Epoch 10, batch 2750, loss[loss=0.2622, ctc_loss=0.1689, cr_loss=0.4056, attn_decoder_loss=0.2635, over 29548.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.172, cr_loss=0.409, attn_decoder_loss=0.2679, over 5794471.40 frames. ], batch size: 75, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:25:32,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=173900.0, ans=0.0 +2024-09-17 07:25:35,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=173940.0, ans=0.1 +2024-09-17 07:25:41,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=173940.0, ans=6.0 +2024-09-17 07:25:54,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=173980.0, ans=0.125 +2024-09-17 07:25:56,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=173980.0, ans=0.025 +2024-09-17 07:26:05,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=174020.0, ans=0.0 +2024-09-17 07:26:27,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=174060.0, ans=0.125 +2024-09-17 07:26:36,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=174060.0, ans=0.0 +2024-09-17 07:26:39,170 INFO [train.py:1198] (0/2) Epoch 10, batch 2800, loss[loss=0.3036, ctc_loss=0.2314, cr_loss=0.4154, attn_decoder_loss=0.3024, over 20239.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1724, cr_loss=0.4098, attn_decoder_loss=0.2683, over 5775947.76 frames. ], batch size: 209, lr: 1.12e-02, grad_scale: 16.0 +2024-09-17 07:26:43,478 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.595e+01 1.029e+02 1.148e+02 1.291e+02 2.335e+02, threshold=2.295e+02, percent-clipped=2.0 +2024-09-17 07:26:57,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.39 vs. limit=12.0 +2024-09-17 07:27:13,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.71 vs. limit=6.0 +2024-09-17 07:27:31,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.71 vs. limit=15.0 +2024-09-17 07:27:38,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=174260.0, ans=0.125 +2024-09-17 07:27:42,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=174260.0, ans=0.025 +2024-09-17 07:27:54,278 INFO [train.py:1198] (0/2) Epoch 10, batch 2850, loss[loss=0.2686, ctc_loss=0.1743, cr_loss=0.3776, attn_decoder_loss=0.2707, over 29485.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1732, cr_loss=0.4103, attn_decoder_loss=0.2685, over 5761410.03 frames. ], batch size: 77, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:28:19,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.35 vs. limit=15.0 +2024-09-17 07:28:23,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=174340.0, ans=0.125 +2024-09-17 07:28:51,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=174420.0, ans=0.2 +2024-09-17 07:29:11,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=174500.0, ans=0.025 +2024-09-17 07:29:12,372 INFO [train.py:1198] (0/2) Epoch 10, batch 2900, loss[loss=0.2494, ctc_loss=0.1502, cr_loss=0.3791, attn_decoder_loss=0.252, over 29439.00 frames. ], tot_loss[loss=0.2684, ctc_loss=0.174, cr_loss=0.4122, attn_decoder_loss=0.2698, over 5786168.27 frames. ], batch size: 79, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:29:18,301 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.890e+01 9.394e+01 1.011e+02 1.079e+02 3.902e+02, threshold=2.022e+02, percent-clipped=2.0 +2024-09-17 07:29:27,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.08 vs. limit=22.5 +2024-09-17 07:29:50,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=174580.0, ans=0.125 +2024-09-17 07:29:56,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=174620.0, ans=0.025 +2024-09-17 07:30:01,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=174620.0, ans=0.0 +2024-09-17 07:30:18,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=174660.0, ans=0.95 +2024-09-17 07:30:27,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=174660.0, ans=0.0 +2024-09-17 07:30:30,071 INFO [train.py:1198] (0/2) Epoch 10, batch 2950, loss[loss=0.253, ctc_loss=0.1604, cr_loss=0.388, attn_decoder_loss=0.2547, over 29530.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1724, cr_loss=0.4089, attn_decoder_loss=0.2683, over 5781632.00 frames. ], batch size: 75, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:30:30,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=174700.0, ans=0.5 +2024-09-17 07:30:59,698 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.33 vs. limit=15.0 +2024-09-17 07:31:15,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.73 vs. limit=15.0 +2024-09-17 07:31:24,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.26 vs. limit=15.0 +2024-09-17 07:31:28,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=174820.0, ans=0.125 +2024-09-17 07:31:46,173 INFO [train.py:1198] (0/2) Epoch 10, batch 3000, loss[loss=0.2705, ctc_loss=0.1691, cr_loss=0.4437, attn_decoder_loss=0.2719, over 29761.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1727, cr_loss=0.4091, attn_decoder_loss=0.2684, over 5782139.92 frames. ], batch size: 81, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:31:46,173 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 07:32:05,336 INFO [train.py:1230] (0/2) Epoch 10, validation: loss=0.2137, ctc_loss=0.04855, cr_loss=4.713e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-17 07:32:05,337 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 07:32:06,537 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.89 vs. limit=15.0 +2024-09-17 07:32:14,596 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.331e+01 9.561e+01 1.037e+02 1.121e+02 2.530e+02, threshold=2.075e+02, percent-clipped=2.0 +2024-09-17 07:32:52,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=175020.0, ans=0.125 +2024-09-17 07:33:00,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=175020.0, ans=0.07 +2024-09-17 07:33:01,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=175020.0, ans=0.025 +2024-09-17 07:33:04,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=175060.0, ans=0.125 +2024-09-17 07:33:07,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=175060.0, ans=0.0 +2024-09-17 07:33:07,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=175060.0, ans=0.0 +2024-09-17 07:33:16,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=175060.0, ans=0.95 +2024-09-17 07:33:20,910 INFO [train.py:1198] (0/2) Epoch 10, batch 3050, loss[loss=0.2519, ctc_loss=0.1623, cr_loss=0.4096, attn_decoder_loss=0.2527, over 29535.00 frames. ], tot_loss[loss=0.2676, ctc_loss=0.1735, cr_loss=0.4107, attn_decoder_loss=0.269, over 5775788.27 frames. ], batch size: 76, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:33:30,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=175100.0, ans=0.0 +2024-09-17 07:33:55,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=175180.0, ans=0.1 +2024-09-17 07:34:12,444 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:34:19,001 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.01 vs. limit=15.0 +2024-09-17 07:34:36,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=175260.0, ans=0.2 +2024-09-17 07:34:39,150 INFO [train.py:1198] (0/2) Epoch 10, batch 3100, loss[loss=0.2929, ctc_loss=0.1933, cr_loss=0.449, attn_decoder_loss=0.294, over 29285.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.173, cr_loss=0.4099, attn_decoder_loss=0.2686, over 5776765.80 frames. ], batch size: 100, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:34:39,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=175300.0, ans=0.2 +2024-09-17 07:34:48,269 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.394e+01 9.542e+01 1.021e+02 1.174e+02 1.946e+02, threshold=2.041e+02, percent-clipped=0.0 +2024-09-17 07:34:48,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=175300.0, ans=0.2 +2024-09-17 07:34:50,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=175300.0, ans=0.125 +2024-09-17 07:34:57,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=175340.0, ans=0.07 +2024-09-17 07:35:03,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=175340.0, ans=0.025 +2024-09-17 07:35:21,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=175380.0, ans=0.1 +2024-09-17 07:35:39,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.33 vs. limit=22.5 +2024-09-17 07:35:57,191 INFO [train.py:1198] (0/2) Epoch 10, batch 3150, loss[loss=0.274, ctc_loss=0.175, cr_loss=0.4064, attn_decoder_loss=0.2759, over 28817.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1725, cr_loss=0.4093, attn_decoder_loss=0.2683, over 5783188.77 frames. ], batch size: 104, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:36:00,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=175500.0, ans=0.1 +2024-09-17 07:36:21,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=175540.0, ans=10.0 +2024-09-17 07:36:30,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=175580.0, ans=0.0 +2024-09-17 07:36:32,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=175580.0, ans=0.1 +2024-09-17 07:36:51,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.82 vs. limit=15.0 +2024-09-17 07:37:12,407 INFO [train.py:1198] (0/2) Epoch 10, batch 3200, loss[loss=0.2567, ctc_loss=0.1622, cr_loss=0.3981, attn_decoder_loss=0.2583, over 29402.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1723, cr_loss=0.4094, attn_decoder_loss=0.2681, over 5794011.20 frames. ], batch size: 79, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:37:24,405 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.118e+01 9.421e+01 9.970e+01 1.120e+02 1.872e+02, threshold=1.994e+02, percent-clipped=0.0 +2024-09-17 07:37:29,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=175740.0, ans=0.1 +2024-09-17 07:37:45,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=175780.0, ans=0.2 +2024-09-17 07:37:49,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=175780.0, ans=0.0 +2024-09-17 07:38:10,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=175820.0, ans=0.0 +2024-09-17 07:38:30,173 INFO [train.py:1198] (0/2) Epoch 10, batch 3250, loss[loss=0.2747, ctc_loss=0.1745, cr_loss=0.4224, attn_decoder_loss=0.2764, over 29712.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1728, cr_loss=0.4103, attn_decoder_loss=0.2686, over 5800208.71 frames. ], batch size: 84, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:38:30,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=175900.0, ans=0.09899494936611666 +2024-09-17 07:38:46,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.57 vs. limit=10.0 +2024-09-17 07:39:06,443 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-44000.pt +2024-09-17 07:39:19,569 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:39:37,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=176060.0, ans=0.0 +2024-09-17 07:39:45,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=176060.0, ans=0.07 +2024-09-17 07:39:54,677 INFO [train.py:1198] (0/2) Epoch 10, batch 3300, loss[loss=0.2864, ctc_loss=0.1917, cr_loss=0.4319, attn_decoder_loss=0.2873, over 28312.00 frames. ], tot_loss[loss=0.2663, ctc_loss=0.1721, cr_loss=0.4093, attn_decoder_loss=0.2677, over 5796883.31 frames. ], batch size: 111, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:40:01,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=176100.0, ans=0.125 +2024-09-17 07:40:06,777 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.780e+01 9.364e+01 1.005e+02 1.120e+02 3.139e+02, threshold=2.009e+02, percent-clipped=4.0 +2024-09-17 07:40:20,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=176140.0, ans=0.1 +2024-09-17 07:40:32,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=176180.0, ans=0.2 +2024-09-17 07:40:35,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=176180.0, ans=0.025 +2024-09-17 07:40:38,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=176220.0, ans=0.1 +2024-09-17 07:40:56,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=176260.0, ans=0.2 +2024-09-17 07:40:58,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-17 07:41:09,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.66 vs. limit=22.5 +2024-09-17 07:41:09,871 INFO [train.py:1198] (0/2) Epoch 10, batch 3350, loss[loss=0.2794, ctc_loss=0.1827, cr_loss=0.4324, attn_decoder_loss=0.2805, over 28870.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1733, cr_loss=0.4102, attn_decoder_loss=0.2687, over 5773905.06 frames. ], batch size: 104, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:41:17,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=176300.0, ans=0.0 +2024-09-17 07:41:32,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=176340.0, ans=0.1 +2024-09-17 07:41:33,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.02 vs. limit=12.0 +2024-09-17 07:41:34,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=176340.0, ans=0.125 +2024-09-17 07:41:49,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=176380.0, ans=0.0 +2024-09-17 07:41:55,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=176420.0, ans=0.2 +2024-09-17 07:42:01,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=176420.0, ans=0.125 +2024-09-17 07:42:02,355 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.66 vs. limit=15.0 +2024-09-17 07:42:27,214 INFO [train.py:1198] (0/2) Epoch 10, batch 3400, loss[loss=0.2413, ctc_loss=0.1557, cr_loss=0.3896, attn_decoder_loss=0.2422, over 29330.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1736, cr_loss=0.4107, attn_decoder_loss=0.2687, over 5766880.42 frames. ], batch size: 67, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:42:36,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=176500.0, ans=0.95 +2024-09-17 07:42:39,315 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.353e+01 9.300e+01 1.006e+02 1.112e+02 2.316e+02, threshold=2.013e+02, percent-clipped=1.0 +2024-09-17 07:42:41,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=176540.0, ans=0.1 +2024-09-17 07:42:56,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=176580.0, ans=0.025 +2024-09-17 07:43:05,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=176580.0, ans=0.125 +2024-09-17 07:43:05,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.29 vs. limit=15.0 +2024-09-17 07:43:06,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=176580.0, ans=0.07 +2024-09-17 07:43:14,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=176620.0, ans=0.2 +2024-09-17 07:43:21,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=176620.0, ans=0.0 +2024-09-17 07:43:31,697 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.82 vs. limit=12.0 +2024-09-17 07:43:44,824 INFO [train.py:1198] (0/2) Epoch 10, batch 3450, loss[loss=0.2648, ctc_loss=0.1618, cr_loss=0.3791, attn_decoder_loss=0.2678, over 28216.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1731, cr_loss=0.41, attn_decoder_loss=0.2686, over 5775709.15 frames. ], batch size: 111, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:43:58,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=176740.0, ans=0.1 +2024-09-17 07:43:58,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=176740.0, ans=0.0 +2024-09-17 07:44:12,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.29 vs. limit=22.5 +2024-09-17 07:44:27,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=176780.0, ans=0.09899494936611666 +2024-09-17 07:44:29,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=176820.0, ans=0.125 +2024-09-17 07:44:48,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=176860.0, ans=0.125 +2024-09-17 07:44:57,189 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.45 vs. limit=22.5 +2024-09-17 07:44:57,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=176860.0, ans=0.125 +2024-09-17 07:45:00,783 INFO [train.py:1198] (0/2) Epoch 10, batch 3500, loss[loss=0.2404, ctc_loss=0.1551, cr_loss=0.3864, attn_decoder_loss=0.2412, over 29315.00 frames. ], tot_loss[loss=0.2668, ctc_loss=0.1728, cr_loss=0.4097, attn_decoder_loss=0.2681, over 5777256.16 frames. ], batch size: 71, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:45:12,847 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.066e+01 9.560e+01 1.051e+02 1.170e+02 3.242e+02, threshold=2.102e+02, percent-clipped=4.0 +2024-09-17 07:45:22,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=176940.0, ans=0.1 +2024-09-17 07:45:30,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.93 vs. limit=15.0 +2024-09-17 07:45:31,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=176980.0, ans=0.1 +2024-09-17 07:45:32,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=176980.0, ans=0.0 +2024-09-17 07:45:37,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=176980.0, ans=0.125 +2024-09-17 07:45:39,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=176980.0, ans=0.025 +2024-09-17 07:45:45,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=176980.0, ans=0.125 +2024-09-17 07:46:16,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=177100.0, ans=0.1 +2024-09-17 07:46:17,482 INFO [train.py:1198] (0/2) Epoch 10, batch 3550, loss[loss=0.28, ctc_loss=0.1875, cr_loss=0.4393, attn_decoder_loss=0.2805, over 29708.00 frames. ], tot_loss[loss=0.2666, ctc_loss=0.1724, cr_loss=0.4091, attn_decoder_loss=0.268, over 5784224.02 frames. ], batch size: 89, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:46:33,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=177140.0, ans=0.125 +2024-09-17 07:46:38,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=177140.0, ans=0.125 +2024-09-17 07:46:47,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=177180.0, ans=0.125 +2024-09-17 07:46:53,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=177180.0, ans=0.125 +2024-09-17 07:47:06,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=177220.0, ans=0.125 +2024-09-17 07:47:20,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=177260.0, ans=0.2 +2024-09-17 07:47:21,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=177260.0, ans=0.125 +2024-09-17 07:47:31,489 INFO [train.py:1198] (0/2) Epoch 10, batch 3600, loss[loss=0.2664, ctc_loss=0.1691, cr_loss=0.4141, attn_decoder_loss=0.268, over 29495.00 frames. ], tot_loss[loss=0.2663, ctc_loss=0.1718, cr_loss=0.4082, attn_decoder_loss=0.2677, over 5792891.47 frames. ], batch size: 77, lr: 1.11e-02, grad_scale: 16.0 +2024-09-17 07:47:39,443 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:47:42,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=177300.0, ans=0.025 +2024-09-17 07:47:44,996 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 9.296e+01 9.828e+01 1.086e+02 1.804e+02, threshold=1.966e+02, percent-clipped=0.0 +2024-09-17 07:48:04,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=177380.0, ans=0.125 +2024-09-17 07:48:11,347 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.06 vs. limit=6.0 +2024-09-17 07:48:40,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=177460.0, ans=0.125 +2024-09-17 07:48:44,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.60 vs. limit=22.5 +2024-09-17 07:48:45,917 INFO [train.py:1198] (0/2) Epoch 10, batch 3650, loss[loss=0.2791, ctc_loss=0.1767, cr_loss=0.4274, attn_decoder_loss=0.281, over 29523.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1705, cr_loss=0.406, attn_decoder_loss=0.2667, over 5792929.07 frames. ], batch size: 90, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:48:46,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=177500.0, ans=0.125 +2024-09-17 07:48:50,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=177500.0, ans=0.025 +2024-09-17 07:48:53,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.48 vs. limit=15.0 +2024-09-17 07:48:58,297 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.77 vs. limit=22.5 +2024-09-17 07:48:59,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=177540.0, ans=0.0 +2024-09-17 07:49:06,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.60 vs. limit=22.5 +2024-09-17 07:49:08,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=177540.0, ans=0.125 +2024-09-17 07:49:25,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=177580.0, ans=0.1 +2024-09-17 07:49:25,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=177580.0, ans=0.0 +2024-09-17 07:49:37,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=177620.0, ans=0.025 +2024-09-17 07:50:03,432 INFO [train.py:1198] (0/2) Epoch 10, batch 3700, loss[loss=0.2732, ctc_loss=0.1709, cr_loss=0.4155, attn_decoder_loss=0.2754, over 29711.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1703, cr_loss=0.4059, attn_decoder_loss=0.2667, over 5803799.23 frames. ], batch size: 84, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:50:15,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=177700.0, ans=0.125 +2024-09-17 07:50:16,866 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 9.275e+01 9.841e+01 1.076e+02 3.002e+02, threshold=1.968e+02, percent-clipped=1.0 +2024-09-17 07:50:21,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=177740.0, ans=0.2 +2024-09-17 07:50:55,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=177820.0, ans=0.2 +2024-09-17 07:51:07,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=177860.0, ans=0.95 +2024-09-17 07:51:17,406 INFO [train.py:1198] (0/2) Epoch 10, batch 3750, loss[loss=0.2418, ctc_loss=0.1517, cr_loss=0.3831, attn_decoder_loss=0.2433, over 29308.00 frames. ], tot_loss[loss=0.2655, ctc_loss=0.1709, cr_loss=0.4069, attn_decoder_loss=0.267, over 5806864.01 frames. ], batch size: 67, lr: 1.10e-02, grad_scale: 4.0 +2024-09-17 07:51:52,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=177980.0, ans=0.0 +2024-09-17 07:52:10,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=178020.0, ans=0.0 +2024-09-17 07:52:16,419 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.57 vs. limit=10.0 +2024-09-17 07:52:17,944 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.15 vs. limit=15.0 +2024-09-17 07:52:21,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=178060.0, ans=0.125 +2024-09-17 07:52:28,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=178060.0, ans=0.125 +2024-09-17 07:52:32,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=178100.0, ans=0.0 +2024-09-17 07:52:33,648 INFO [train.py:1198] (0/2) Epoch 10, batch 3800, loss[loss=0.2709, ctc_loss=0.1694, cr_loss=0.4216, attn_decoder_loss=0.2728, over 29627.00 frames. ], tot_loss[loss=0.2648, ctc_loss=0.1699, cr_loss=0.4052, attn_decoder_loss=0.2663, over 5797175.08 frames. ], batch size: 86, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:52:48,525 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.594e+01 1.015e+02 1.096e+02 4.461e+02, threshold=2.030e+02, percent-clipped=1.0 +2024-09-17 07:52:54,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.14 vs. limit=12.0 +2024-09-17 07:53:05,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=178180.0, ans=0.125 +2024-09-17 07:53:26,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=178220.0, ans=0.125 +2024-09-17 07:53:48,156 INFO [train.py:1198] (0/2) Epoch 10, batch 3850, loss[loss=0.2761, ctc_loss=0.1767, cr_loss=0.4189, attn_decoder_loss=0.2778, over 29230.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1697, cr_loss=0.4055, attn_decoder_loss=0.2662, over 5811755.85 frames. ], batch size: 100, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:54:16,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=178380.0, ans=0.1 +2024-09-17 07:54:19,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=178380.0, ans=0.0 +2024-09-17 07:54:24,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=178380.0, ans=0.0 +2024-09-17 07:54:45,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=178420.0, ans=0.0 +2024-09-17 07:55:01,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=178500.0, ans=0.0 +2024-09-17 07:55:04,010 INFO [train.py:1198] (0/2) Epoch 10, batch 3900, loss[loss=0.284, ctc_loss=0.1926, cr_loss=0.4425, attn_decoder_loss=0.2843, over 29638.00 frames. ], tot_loss[loss=0.265, ctc_loss=0.1701, cr_loss=0.4063, attn_decoder_loss=0.2665, over 5816508.48 frames. ], batch size: 86, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:55:11,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=178500.0, ans=0.0 +2024-09-17 07:55:20,365 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.621e+01 1.032e+02 1.104e+02 1.342e+02, threshold=2.064e+02, percent-clipped=0.0 +2024-09-17 07:55:22,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=178540.0, ans=0.0 +2024-09-17 07:55:22,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.75 vs. limit=15.0 +2024-09-17 07:55:23,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=178540.0, ans=0.1 +2024-09-17 07:55:26,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=178540.0, ans=0.125 +2024-09-17 07:56:15,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=178660.0, ans=0.2 +2024-09-17 07:56:18,296 INFO [train.py:1198] (0/2) Epoch 10, batch 3950, loss[loss=0.2834, ctc_loss=0.1807, cr_loss=0.4176, attn_decoder_loss=0.2855, over 29490.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.17, cr_loss=0.4069, attn_decoder_loss=0.2668, over 5836067.45 frames. ], batch size: 97, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:57:01,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=178820.0, ans=0.0 +2024-09-17 07:57:01,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.90 vs. limit=15.0 +2024-09-17 07:57:02,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=178820.0, ans=0.0 +2024-09-17 07:57:09,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=178820.0, ans=0.125 +2024-09-17 07:57:10,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=178820.0, ans=0.125 +2024-09-17 07:57:27,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=178860.0, ans=0.2 +2024-09-17 07:57:33,108 INFO [train.py:1198] (0/2) Epoch 10, batch 4000, loss[loss=0.2473, ctc_loss=0.158, cr_loss=0.3984, attn_decoder_loss=0.2484, over 29506.00 frames. ], tot_loss[loss=0.2656, ctc_loss=0.1707, cr_loss=0.4078, attn_decoder_loss=0.2671, over 5812341.99 frames. ], batch size: 74, lr: 1.10e-02, grad_scale: 16.0 +2024-09-17 07:57:40,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=178900.0, ans=0.0 +2024-09-17 07:57:50,421 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 9.319e+01 1.030e+02 1.152e+02 2.635e+02, threshold=2.059e+02, percent-clipped=1.0 +2024-09-17 07:58:47,041 INFO [train.py:1198] (0/2) Epoch 10, batch 4050, loss[loss=0.3077, ctc_loss=0.2443, cr_loss=0.4507, attn_decoder_loss=0.3047, over 20444.00 frames. ], tot_loss[loss=0.2658, ctc_loss=0.171, cr_loss=0.4077, attn_decoder_loss=0.2672, over 5796888.87 frames. ], batch size: 210, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:59:01,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=179140.0, ans=0.1 +2024-09-17 07:59:01,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=179140.0, ans=0.125 +2024-09-17 07:59:16,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=179180.0, ans=10.0 +2024-09-17 07:59:37,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.68 vs. limit=15.0 +2024-09-17 07:59:38,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=179220.0, ans=0.2 +2024-09-17 07:59:56,363 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.81 vs. limit=15.0 +2024-09-17 08:00:00,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=179300.0, ans=0.0 +2024-09-17 08:00:01,773 INFO [train.py:1198] (0/2) Epoch 10, batch 4100, loss[loss=0.284, ctc_loss=0.1859, cr_loss=0.4158, attn_decoder_loss=0.2857, over 29493.00 frames. ], tot_loss[loss=0.2656, ctc_loss=0.1707, cr_loss=0.4071, attn_decoder_loss=0.2671, over 5791699.78 frames. ], batch size: 90, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:00:03,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=179300.0, ans=0.125 +2024-09-17 08:00:12,705 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.04 vs. limit=22.5 +2024-09-17 08:00:20,760 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.192e+01 9.136e+01 9.895e+01 1.094e+02 2.839e+02, threshold=1.979e+02, percent-clipped=1.0 +2024-09-17 08:00:46,416 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:01:00,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=179460.0, ans=0.0 +2024-09-17 08:01:05,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=179460.0, ans=0.0 +2024-09-17 08:01:05,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=179460.0, ans=0.125 +2024-09-17 08:01:15,649 INFO [train.py:1198] (0/2) Epoch 10, batch 4150, loss[loss=0.2479, ctc_loss=0.1598, cr_loss=0.3947, attn_decoder_loss=0.2489, over 29488.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1704, cr_loss=0.4064, attn_decoder_loss=0.2666, over 5797811.55 frames. ], batch size: 77, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:01:33,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=179540.0, ans=0.1 +2024-09-17 08:01:36,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=179540.0, ans=0.2 +2024-09-17 08:01:39,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=179540.0, ans=0.0 +2024-09-17 08:01:46,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=179580.0, ans=0.2 +2024-09-17 08:01:47,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=179580.0, ans=0.125 +2024-09-17 08:01:50,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=179580.0, ans=0.1 +2024-09-17 08:02:02,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=179620.0, ans=0.0 +2024-09-17 08:02:16,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=179660.0, ans=0.0 +2024-09-17 08:02:30,720 INFO [train.py:1198] (0/2) Epoch 10, batch 4200, loss[loss=0.2814, ctc_loss=0.182, cr_loss=0.423, attn_decoder_loss=0.283, over 29512.00 frames. ], tot_loss[loss=0.2658, ctc_loss=0.171, cr_loss=0.4071, attn_decoder_loss=0.2673, over 5800154.09 frames. ], batch size: 90, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:02:31,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=179700.0, ans=0.95 +2024-09-17 08:02:46,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=179740.0, ans=0.125 +2024-09-17 08:02:48,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=179740.0, ans=0.125 +2024-09-17 08:02:49,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=12.0 +2024-09-17 08:02:50,150 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.480e+01 1.011e+02 1.105e+02 3.367e+02, threshold=2.021e+02, percent-clipped=4.0 +2024-09-17 08:02:59,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=179780.0, ans=0.125 +2024-09-17 08:03:14,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.45 vs. limit=15.0 +2024-09-17 08:03:44,285 INFO [train.py:1198] (0/2) Epoch 10, batch 4250, loss[loss=0.2552, ctc_loss=0.1658, cr_loss=0.4041, attn_decoder_loss=0.2562, over 29501.00 frames. ], tot_loss[loss=0.2659, ctc_loss=0.1707, cr_loss=0.4068, attn_decoder_loss=0.2675, over 5805306.04 frames. ], batch size: 74, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:03:45,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=179900.0, ans=0.025 +2024-09-17 08:03:52,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=179900.0, ans=0.0 +2024-09-17 08:03:54,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=179900.0, ans=0.07 +2024-09-17 08:04:13,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=179980.0, ans=0.125 +2024-09-17 08:04:25,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=179980.0, ans=0.0 +2024-09-17 08:04:33,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=180020.0, ans=0.125 +2024-09-17 08:04:49,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=180060.0, ans=0.125 +2024-09-17 08:04:59,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=180100.0, ans=0.125 +2024-09-17 08:05:00,424 INFO [train.py:1198] (0/2) Epoch 10, batch 4300, loss[loss=0.2636, ctc_loss=0.1574, cr_loss=0.3865, attn_decoder_loss=0.2668, over 29536.00 frames. ], tot_loss[loss=0.2662, ctc_loss=0.1707, cr_loss=0.4067, attn_decoder_loss=0.2678, over 5794726.40 frames. ], batch size: 87, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:05:19,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.912e+01 9.707e+01 1.038e+02 1.136e+02 2.980e+02, threshold=2.076e+02, percent-clipped=1.0 +2024-09-17 08:05:34,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=180180.0, ans=0.1 +2024-09-17 08:05:37,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=180180.0, ans=0.0 +2024-09-17 08:05:49,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=180220.0, ans=0.125 +2024-09-17 08:05:49,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=180220.0, ans=0.0 +2024-09-17 08:05:55,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=180220.0, ans=15.0 +2024-09-17 08:06:10,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.77 vs. limit=10.0 +2024-09-17 08:06:14,238 INFO [train.py:1198] (0/2) Epoch 10, batch 4350, loss[loss=0.2812, ctc_loss=0.1753, cr_loss=0.4214, attn_decoder_loss=0.2836, over 29435.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1738, cr_loss=0.4118, attn_decoder_loss=0.2712, over 5796891.06 frames. ], batch size: 97, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:07:12,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=180460.0, ans=0.2 +2024-09-17 08:07:17,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=180460.0, ans=0.2 +2024-09-17 08:07:27,831 INFO [train.py:1198] (0/2) Epoch 10, batch 4400, loss[loss=0.2868, ctc_loss=0.2008, cr_loss=0.4554, attn_decoder_loss=0.2863, over 27168.00 frames. ], tot_loss[loss=0.2722, ctc_loss=0.1762, cr_loss=0.4154, attn_decoder_loss=0.2737, over 5767897.93 frames. ], batch size: 124, lr: 1.10e-02, grad_scale: 16.0 +2024-09-17 08:07:38,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=180500.0, ans=0.0 +2024-09-17 08:07:46,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=180540.0, ans=0.95 +2024-09-17 08:07:48,750 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.847e+01 9.762e+01 1.026e+02 1.096e+02 2.982e+02, threshold=2.053e+02, percent-clipped=1.0 +2024-09-17 08:07:53,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=180540.0, ans=0.125 +2024-09-17 08:08:24,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=180620.0, ans=0.0 +2024-09-17 08:08:26,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=180660.0, ans=0.125 +2024-09-17 08:08:41,975 INFO [train.py:1198] (0/2) Epoch 10, batch 4450, loss[loss=0.3005, ctc_loss=0.2256, cr_loss=0.4356, attn_decoder_loss=0.2992, over 19687.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1817, cr_loss=0.4196, attn_decoder_loss=0.2767, over 5582632.39 frames. ], batch size: 209, lr: 1.10e-02, grad_scale: 4.0 +2024-09-17 08:08:48,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=180700.0, ans=0.125 +2024-09-17 08:08:57,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.83 vs. limit=22.5 +2024-09-17 08:09:01,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.62 vs. limit=15.0 +2024-09-17 08:09:05,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=180740.0, ans=0.125 +2024-09-17 08:09:09,264 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=9.00 vs. limit=15.0 +2024-09-17 08:09:18,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.57 vs. limit=15.0 +2024-09-17 08:09:23,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=180780.0, ans=0.125 +2024-09-17 08:09:29,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=180820.0, ans=0.125 +2024-09-17 08:09:33,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=180820.0, ans=0.125 +2024-09-17 08:09:35,846 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.38 vs. limit=15.0 +2024-09-17 08:09:58,575 INFO [train.py:1198] (0/2) Epoch 10, batch 4500, loss[loss=0.2983, ctc_loss=0.2231, cr_loss=0.4442, attn_decoder_loss=0.2968, over 19704.00 frames. ], tot_loss[loss=0.2797, ctc_loss=0.1894, cr_loss=0.4223, attn_decoder_loss=0.2804, over 5239847.92 frames. ], batch size: 209, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:10:07,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=180900.0, ans=0.1 +2024-09-17 08:10:21,315 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.766e+01 1.077e+02 1.142e+02 1.231e+02 1.732e+02, threshold=2.283e+02, percent-clipped=0.0 +2024-09-17 08:10:24,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=180940.0, ans=0.0 +2024-09-17 08:10:35,968 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-10.pt +2024-09-17 08:11:32,465 INFO [train.py:1198] (0/2) Epoch 11, batch 0, loss[loss=0.2543, ctc_loss=0.1563, cr_loss=0.3716, attn_decoder_loss=0.2569, over 29639.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1563, cr_loss=0.3716, attn_decoder_loss=0.2569, over 29639.00 frames. ], batch size: 73, lr: 1.05e-02, grad_scale: 16.0 +2024-09-17 08:11:32,465 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 08:11:50,861 INFO [train.py:1230] (0/2) Epoch 11, validation: loss=0.2172, ctc_loss=0.0495, cr_loss=4.7e-15, attn_decoder_loss=0.2358, over 944034.00 frames. +2024-09-17 08:11:50,862 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 08:11:54,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=181000.0, ans=0.125 +2024-09-17 08:12:12,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=181040.0, ans=0.125 +2024-09-17 08:12:18,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=181040.0, ans=0.125 +2024-09-17 08:12:20,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.92 vs. limit=15.0 +2024-09-17 08:12:28,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=181080.0, ans=0.125 +2024-09-17 08:12:30,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.01 vs. limit=15.0 +2024-09-17 08:12:32,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=181080.0, ans=0.0 +2024-09-17 08:12:58,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=181160.0, ans=0.1 +2024-09-17 08:13:01,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=181160.0, ans=0.125 +2024-09-17 08:13:02,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.64 vs. limit=15.0 +2024-09-17 08:13:10,329 INFO [train.py:1198] (0/2) Epoch 11, batch 50, loss[loss=0.2368, ctc_loss=0.1469, cr_loss=0.3612, attn_decoder_loss=0.2387, over 29422.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.1735, cr_loss=0.4106, attn_decoder_loss=0.2687, over 1268309.62 frames. ], batch size: 70, lr: 1.05e-02, grad_scale: 8.0 +2024-09-17 08:13:10,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=181200.0, ans=0.0 +2024-09-17 08:13:35,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.22 vs. limit=22.5 +2024-09-17 08:13:47,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=181280.0, ans=22.5 +2024-09-17 08:14:13,813 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.260e+01 9.758e+01 1.123e+02 1.302e+02 1.602e+03, threshold=2.247e+02, percent-clipped=5.0 +2024-09-17 08:14:25,859 INFO [train.py:1198] (0/2) Epoch 11, batch 100, loss[loss=0.2614, ctc_loss=0.1727, cr_loss=0.4104, attn_decoder_loss=0.2622, over 29537.00 frames. ], tot_loss[loss=0.2692, ctc_loss=0.1742, cr_loss=0.4131, attn_decoder_loss=0.2706, over 2252457.89 frames. ], batch size: 76, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:14:34,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=181400.0, ans=0.125 +2024-09-17 08:14:44,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.35 vs. limit=15.0 +2024-09-17 08:14:45,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=181440.0, ans=0.0 +2024-09-17 08:14:50,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=181440.0, ans=0.125 +2024-09-17 08:14:50,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=181440.0, ans=0.125 +2024-09-17 08:14:53,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=181440.0, ans=0.2 +2024-09-17 08:14:56,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=181480.0, ans=0.125 +2024-09-17 08:15:05,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.14 vs. limit=10.0 +2024-09-17 08:15:06,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=181480.0, ans=0.1 +2024-09-17 08:15:13,531 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.72 vs. limit=15.0 +2024-09-17 08:15:33,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=181560.0, ans=0.1 +2024-09-17 08:15:35,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.67 vs. limit=10.0 +2024-09-17 08:15:39,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=181600.0, ans=0.1 +2024-09-17 08:15:41,049 INFO [train.py:1198] (0/2) Epoch 11, batch 150, loss[loss=0.2391, ctc_loss=0.1446, cr_loss=0.3746, attn_decoder_loss=0.2412, over 29400.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1714, cr_loss=0.4085, attn_decoder_loss=0.2682, over 3047301.48 frames. ], batch size: 70, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:15:49,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=181600.0, ans=0.125 +2024-09-17 08:15:51,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=181600.0, ans=0.1 +2024-09-17 08:16:05,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=181640.0, ans=0.2 +2024-09-17 08:16:09,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=181640.0, ans=0.0 +2024-09-17 08:16:22,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=181680.0, ans=0.0 +2024-09-17 08:16:25,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=181680.0, ans=0.0 +2024-09-17 08:16:49,222 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.120e+01 9.120e+01 9.727e+01 1.024e+02 1.360e+02, threshold=1.945e+02, percent-clipped=0.0 +2024-09-17 08:16:56,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=181760.0, ans=0.04949747468305833 +2024-09-17 08:17:01,151 INFO [train.py:1198] (0/2) Epoch 11, batch 200, loss[loss=0.2901, ctc_loss=0.1911, cr_loss=0.435, attn_decoder_loss=0.2915, over 27562.00 frames. ], tot_loss[loss=0.2655, ctc_loss=0.1702, cr_loss=0.4074, attn_decoder_loss=0.267, over 3659115.41 frames. ], batch size: 125, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:17:15,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=181840.0, ans=0.125 +2024-09-17 08:17:22,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=181840.0, ans=0.0 +2024-09-17 08:17:57,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=181920.0, ans=0.0 +2024-09-17 08:18:04,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=181960.0, ans=0.125 +2024-09-17 08:18:16,509 INFO [train.py:1198] (0/2) Epoch 11, batch 250, loss[loss=0.2846, ctc_loss=0.1821, cr_loss=0.438, attn_decoder_loss=0.2863, over 29243.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1687, cr_loss=0.4047, attn_decoder_loss=0.2663, over 4141709.34 frames. ], batch size: 100, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:18:16,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=182000.0, ans=0.0 +2024-09-17 08:18:26,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.47 vs. limit=15.0 +2024-09-17 08:18:32,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=182040.0, ans=0.0 +2024-09-17 08:18:34,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.59 vs. limit=22.5 +2024-09-17 08:18:36,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=182040.0, ans=0.125 +2024-09-17 08:18:44,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.22 vs. limit=15.0 +2024-09-17 08:18:47,818 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.33 vs. limit=22.5 +2024-09-17 08:19:11,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=182120.0, ans=0.2 +2024-09-17 08:19:16,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=182160.0, ans=0.025 +2024-09-17 08:19:20,161 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.171e+01 1.004e+02 1.090e+02 1.755e+02, threshold=2.009e+02, percent-clipped=0.0 +2024-09-17 08:19:32,196 INFO [train.py:1198] (0/2) Epoch 11, batch 300, loss[loss=0.2835, ctc_loss=0.1764, cr_loss=0.4261, attn_decoder_loss=0.286, over 29560.00 frames. ], tot_loss[loss=0.2643, ctc_loss=0.1684, cr_loss=0.4047, attn_decoder_loss=0.266, over 4510296.05 frames. ], batch size: 92, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:19:57,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=182240.0, ans=0.0 +2024-09-17 08:20:05,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.48 vs. limit=12.0 +2024-09-17 08:20:15,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=182280.0, ans=0.09899494936611666 +2024-09-17 08:20:31,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=182320.0, ans=0.0 +2024-09-17 08:20:37,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=182360.0, ans=0.0 +2024-09-17 08:20:45,017 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:20:45,686 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.99 vs. limit=15.0 +2024-09-17 08:20:52,577 INFO [train.py:1198] (0/2) Epoch 11, batch 350, loss[loss=0.2546, ctc_loss=0.1649, cr_loss=0.4017, attn_decoder_loss=0.2557, over 29342.00 frames. ], tot_loss[loss=0.265, ctc_loss=0.1689, cr_loss=0.4059, attn_decoder_loss=0.2667, over 4796724.14 frames. ], batch size: 71, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:21:06,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=182440.0, ans=0.025 +2024-09-17 08:21:27,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=182480.0, ans=0.5 +2024-09-17 08:21:36,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=182520.0, ans=0.125 +2024-09-17 08:21:42,597 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:21:45,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=182520.0, ans=0.0 +2024-09-17 08:21:55,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 9.345e+01 9.958e+01 1.088e+02 1.726e+02, threshold=1.992e+02, percent-clipped=0.0 +2024-09-17 08:22:07,832 INFO [train.py:1198] (0/2) Epoch 11, batch 400, loss[loss=0.2807, ctc_loss=0.1801, cr_loss=0.4426, attn_decoder_loss=0.282, over 29686.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1686, cr_loss=0.4057, attn_decoder_loss=0.2661, over 5025969.95 frames. ], batch size: 82, lr: 1.04e-02, grad_scale: 16.0 +2024-09-17 08:22:28,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.15 vs. limit=15.0 +2024-09-17 08:22:49,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.66 vs. limit=15.0 +2024-09-17 08:23:03,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=182720.0, ans=0.2 +2024-09-17 08:23:23,314 INFO [train.py:1198] (0/2) Epoch 11, batch 450, loss[loss=0.2692, ctc_loss=0.167, cr_loss=0.4207, attn_decoder_loss=0.2712, over 29699.00 frames. ], tot_loss[loss=0.2648, ctc_loss=0.169, cr_loss=0.4068, attn_decoder_loss=0.2663, over 5187708.50 frames. ], batch size: 83, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:23:23,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=182800.0, ans=0.125 +2024-09-17 08:23:26,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=182800.0, ans=0.0 +2024-09-17 08:23:56,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=182880.0, ans=0.0 +2024-09-17 08:24:17,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=182920.0, ans=0.0 +2024-09-17 08:24:32,889 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.364e+01 9.150e+01 9.879e+01 1.056e+02 3.994e+02, threshold=1.976e+02, percent-clipped=1.0 +2024-09-17 08:24:33,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=182960.0, ans=0.1 +2024-09-17 08:24:43,360 INFO [train.py:1198] (0/2) Epoch 11, batch 500, loss[loss=0.2933, ctc_loss=0.1957, cr_loss=0.4628, attn_decoder_loss=0.2938, over 29440.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1678, cr_loss=0.4048, attn_decoder_loss=0.2653, over 5329822.86 frames. ], batch size: 94, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:24:57,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=183040.0, ans=0.125 +2024-09-17 08:25:06,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=183040.0, ans=0.2 +2024-09-17 08:25:09,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=183040.0, ans=0.125 +2024-09-17 08:25:23,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=183080.0, ans=0.125 +2024-09-17 08:25:32,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=183120.0, ans=0.0 +2024-09-17 08:25:32,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.91 vs. limit=10.0 +2024-09-17 08:25:44,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=183160.0, ans=0.125 +2024-09-17 08:25:59,355 INFO [train.py:1198] (0/2) Epoch 11, batch 550, loss[loss=0.2651, ctc_loss=0.1637, cr_loss=0.3914, attn_decoder_loss=0.2677, over 28872.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1681, cr_loss=0.4046, attn_decoder_loss=0.2654, over 5422839.44 frames. ], batch size: 104, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:25:59,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=183200.0, ans=0.125 +2024-09-17 08:26:37,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=183280.0, ans=0.07 +2024-09-17 08:26:44,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.83 vs. limit=15.0 +2024-09-17 08:26:50,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=183320.0, ans=0.025 +2024-09-17 08:26:56,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=183320.0, ans=0.5 +2024-09-17 08:27:02,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=183360.0, ans=0.0 +2024-09-17 08:27:05,004 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 9.204e+01 9.712e+01 1.043e+02 1.936e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 08:27:14,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=183400.0, ans=0.1 +2024-09-17 08:27:15,705 INFO [train.py:1198] (0/2) Epoch 11, batch 600, loss[loss=0.2807, ctc_loss=0.1754, cr_loss=0.4224, attn_decoder_loss=0.283, over 29207.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.168, cr_loss=0.405, attn_decoder_loss=0.2656, over 5509685.71 frames. ], batch size: 100, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:27:16,107 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:27:43,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.45 vs. limit=22.5 +2024-09-17 08:27:43,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=183440.0, ans=0.125 +2024-09-17 08:28:20,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=183560.0, ans=0.0 +2024-09-17 08:28:35,708 INFO [train.py:1198] (0/2) Epoch 11, batch 650, loss[loss=0.2686, ctc_loss=0.1709, cr_loss=0.4206, attn_decoder_loss=0.2701, over 29740.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1671, cr_loss=0.4039, attn_decoder_loss=0.2649, over 5587091.60 frames. ], batch size: 81, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:28:36,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.73 vs. limit=6.0 +2024-09-17 08:28:56,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.13 vs. limit=22.5 +2024-09-17 08:28:57,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=183640.0, ans=0.025 +2024-09-17 08:29:21,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=183720.0, ans=0.0 +2024-09-17 08:29:36,644 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:29:42,309 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 9.127e+01 9.643e+01 1.047e+02 1.455e+02, threshold=1.929e+02, percent-clipped=0.0 +2024-09-17 08:29:51,506 INFO [train.py:1198] (0/2) Epoch 11, batch 700, loss[loss=0.2618, ctc_loss=0.1655, cr_loss=0.4076, attn_decoder_loss=0.2635, over 29516.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1673, cr_loss=0.4049, attn_decoder_loss=0.2653, over 5636975.61 frames. ], batch size: 76, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:29:53,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=183800.0, ans=0.125 +2024-09-17 08:29:54,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=183800.0, ans=0.125 +2024-09-17 08:30:05,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=183840.0, ans=0.125 +2024-09-17 08:30:26,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=183880.0, ans=0.1 +2024-09-17 08:30:35,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=183920.0, ans=0.125 +2024-09-17 08:30:39,537 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.93 vs. limit=15.0 +2024-09-17 08:30:39,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.79 vs. limit=15.0 +2024-09-17 08:30:45,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=183920.0, ans=0.1 +2024-09-17 08:30:48,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=183920.0, ans=0.125 +2024-09-17 08:31:00,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=183960.0, ans=0.0 +2024-09-17 08:31:00,569 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:31:08,141 INFO [train.py:1198] (0/2) Epoch 11, batch 750, loss[loss=0.2606, ctc_loss=0.164, cr_loss=0.4162, attn_decoder_loss=0.2621, over 29719.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1675, cr_loss=0.4049, attn_decoder_loss=0.2647, over 5676967.82 frames. ], batch size: 82, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:31:14,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.40 vs. limit=15.0 +2024-09-17 08:31:15,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=184000.0, ans=0.02 +2024-09-17 08:31:18,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=184000.0, ans=0.125 +2024-09-17 08:31:18,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=184000.0, ans=0.125 +2024-09-17 08:31:20,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=184000.0, ans=0.0 +2024-09-17 08:31:24,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=184040.0, ans=0.1 +2024-09-17 08:31:51,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.60 vs. limit=15.0 +2024-09-17 08:32:16,691 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.265e+01 9.471e+01 1.047e+02 1.151e+02 2.834e+02, threshold=2.094e+02, percent-clipped=4.0 +2024-09-17 08:32:27,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.35 vs. limit=15.0 +2024-09-17 08:32:28,008 INFO [train.py:1198] (0/2) Epoch 11, batch 800, loss[loss=0.2381, ctc_loss=0.1432, cr_loss=0.3732, attn_decoder_loss=0.2404, over 29590.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1679, cr_loss=0.4056, attn_decoder_loss=0.2652, over 5708108.86 frames. ], batch size: 73, lr: 1.04e-02, grad_scale: 16.0 +2024-09-17 08:32:45,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=184240.0, ans=0.125 +2024-09-17 08:32:46,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=184240.0, ans=0.2 +2024-09-17 08:32:51,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=184240.0, ans=0.1 +2024-09-17 08:33:35,269 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.03 vs. limit=15.0 +2024-09-17 08:33:35,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=184360.0, ans=0.0 +2024-09-17 08:33:42,956 INFO [train.py:1198] (0/2) Epoch 11, batch 850, loss[loss=0.2691, ctc_loss=0.1623, cr_loss=0.3659, attn_decoder_loss=0.2728, over 29726.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1675, cr_loss=0.4045, attn_decoder_loss=0.2649, over 5736575.20 frames. ], batch size: 89, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:33:45,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.17 vs. limit=15.0 +2024-09-17 08:33:58,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=184440.0, ans=0.125 +2024-09-17 08:34:08,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=184440.0, ans=0.0 +2024-09-17 08:34:16,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.53 vs. limit=22.5 +2024-09-17 08:34:36,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=184520.0, ans=0.0 +2024-09-17 08:34:37,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=184520.0, ans=0.0 +2024-09-17 08:34:40,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=184520.0, ans=0.0 +2024-09-17 08:34:40,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=184520.0, ans=0.125 +2024-09-17 08:34:40,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=184520.0, ans=0.125 +2024-09-17 08:34:50,949 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.310e+01 9.399e+01 9.999e+01 1.067e+02 1.963e+02, threshold=2.000e+02, percent-clipped=0.0 +2024-09-17 08:34:58,536 INFO [train.py:1198] (0/2) Epoch 11, batch 900, loss[loss=0.2416, ctc_loss=0.1501, cr_loss=0.3887, attn_decoder_loss=0.2431, over 29604.00 frames. ], tot_loss[loss=0.2633, ctc_loss=0.1678, cr_loss=0.405, attn_decoder_loss=0.265, over 5740937.85 frames. ], batch size: 73, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:35:03,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=184600.0, ans=0.125 +2024-09-17 08:35:03,999 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.33 vs. limit=6.0 +2024-09-17 08:35:06,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=184600.0, ans=0.1 +2024-09-17 08:35:49,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=184720.0, ans=0.125 +2024-09-17 08:35:49,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=184720.0, ans=0.05 +2024-09-17 08:35:52,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=184720.0, ans=0.0 +2024-09-17 08:35:55,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=184720.0, ans=0.125 +2024-09-17 08:36:16,596 INFO [train.py:1198] (0/2) Epoch 11, batch 950, loss[loss=0.2418, ctc_loss=0.1453, cr_loss=0.3668, attn_decoder_loss=0.2443, over 29502.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1677, cr_loss=0.4041, attn_decoder_loss=0.2652, over 5742525.53 frames. ], batch size: 74, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:36:28,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=184800.0, ans=0.0 +2024-09-17 08:36:52,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=184880.0, ans=0.125 +2024-09-17 08:37:02,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=184920.0, ans=0.5 +2024-09-17 08:37:26,871 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.438e+01 9.626e+01 1.055e+02 1.179e+02 5.157e+02, threshold=2.111e+02, percent-clipped=4.0 +2024-09-17 08:37:34,438 INFO [train.py:1198] (0/2) Epoch 11, batch 1000, loss[loss=0.2501, ctc_loss=0.1566, cr_loss=0.397, attn_decoder_loss=0.2517, over 29517.00 frames. ], tot_loss[loss=0.2643, ctc_loss=0.1685, cr_loss=0.4055, attn_decoder_loss=0.2659, over 5734684.07 frames. ], batch size: 77, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:37:39,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=185000.0, ans=0.0 +2024-09-17 08:37:42,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=185000.0, ans=0.1 +2024-09-17 08:38:02,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=185040.0, ans=0.1 +2024-09-17 08:38:02,196 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:38:02,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=185040.0, ans=0.0 +2024-09-17 08:38:03,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=185080.0, ans=0.025 +2024-09-17 08:38:09,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=185080.0, ans=0.125 +2024-09-17 08:38:29,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=185120.0, ans=0.125 +2024-09-17 08:38:35,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=185160.0, ans=0.04949747468305833 +2024-09-17 08:38:50,018 INFO [train.py:1198] (0/2) Epoch 11, batch 1050, loss[loss=0.2785, ctc_loss=0.1798, cr_loss=0.4399, attn_decoder_loss=0.2797, over 29671.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1683, cr_loss=0.4054, attn_decoder_loss=0.2655, over 5744922.05 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:40:01,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 9.188e+01 9.776e+01 1.031e+02 2.876e+02, threshold=1.955e+02, percent-clipped=0.0 +2024-09-17 08:40:07,595 INFO [train.py:1198] (0/2) Epoch 11, batch 1100, loss[loss=0.2512, ctc_loss=0.1593, cr_loss=0.3903, attn_decoder_loss=0.2527, over 29446.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1679, cr_loss=0.4048, attn_decoder_loss=0.2652, over 5757288.87 frames. ], batch size: 78, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:40:15,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=185400.0, ans=0.125 +2024-09-17 08:40:31,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=185440.0, ans=0.125 +2024-09-17 08:41:06,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=185520.0, ans=0.0 +2024-09-17 08:41:10,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=185560.0, ans=0.125 +2024-09-17 08:41:19,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=185560.0, ans=0.0 +2024-09-17 08:41:25,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-17 08:41:25,830 INFO [train.py:1198] (0/2) Epoch 11, batch 1150, loss[loss=0.2546, ctc_loss=0.1672, cr_loss=0.4009, attn_decoder_loss=0.2554, over 29435.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1683, cr_loss=0.405, attn_decoder_loss=0.2652, over 5755582.45 frames. ], batch size: 78, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:41:39,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.14 vs. limit=15.0 +2024-09-17 08:41:41,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=185640.0, ans=0.125 +2024-09-17 08:41:42,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.30 vs. limit=22.5 +2024-09-17 08:41:59,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=185680.0, ans=0.125 +2024-09-17 08:42:09,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=185680.0, ans=0.1 +2024-09-17 08:42:19,116 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.66 vs. limit=6.0 +2024-09-17 08:42:25,886 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:42:33,865 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=151.70 vs. limit=15.0 +2024-09-17 08:42:37,684 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.117e+01 9.543e+01 1.008e+02 1.096e+02 1.940e+02, threshold=2.016e+02, percent-clipped=1.0 +2024-09-17 08:42:39,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=185760.0, ans=0.125 +2024-09-17 08:42:42,184 INFO [train.py:1198] (0/2) Epoch 11, batch 1200, loss[loss=0.2666, ctc_loss=0.1568, cr_loss=0.3987, attn_decoder_loss=0.27, over 29689.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1689, cr_loss=0.4057, attn_decoder_loss=0.2662, over 5747895.58 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:43:05,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=185840.0, ans=0.125 +2024-09-17 08:43:21,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.69 vs. limit=22.5 +2024-09-17 08:43:31,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.78 vs. limit=10.0 +2024-09-17 08:43:39,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.76 vs. limit=15.0 +2024-09-17 08:43:51,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.84 vs. limit=15.0 +2024-09-17 08:44:00,288 INFO [train.py:1198] (0/2) Epoch 11, batch 1250, loss[loss=0.2782, ctc_loss=0.182, cr_loss=0.4319, attn_decoder_loss=0.2793, over 29520.00 frames. ], tot_loss[loss=0.2648, ctc_loss=0.1688, cr_loss=0.4062, attn_decoder_loss=0.2664, over 5775017.11 frames. ], batch size: 92, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:44:05,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.72 vs. limit=15.0 +2024-09-17 08:44:09,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=186000.0, ans=0.0 +2024-09-17 08:44:20,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=20.91 vs. limit=22.5 +2024-09-17 08:44:39,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=186080.0, ans=0.125 +2024-09-17 08:44:47,583 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.52 vs. limit=10.0 +2024-09-17 08:44:58,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=186120.0, ans=0.125 +2024-09-17 08:45:05,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=186160.0, ans=0.0 +2024-09-17 08:45:12,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=186160.0, ans=0.2 +2024-09-17 08:45:13,656 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 9.338e+01 9.972e+01 1.044e+02 2.073e+02, threshold=1.994e+02, percent-clipped=1.0 +2024-09-17 08:45:18,172 INFO [train.py:1198] (0/2) Epoch 11, batch 1300, loss[loss=0.2786, ctc_loss=0.1809, cr_loss=0.4148, attn_decoder_loss=0.2802, over 28134.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1678, cr_loss=0.4046, attn_decoder_loss=0.2655, over 5779399.43 frames. ], batch size: 111, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:45:36,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=186240.0, ans=0.2 +2024-09-17 08:45:40,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.34 vs. limit=15.0 +2024-09-17 08:46:00,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=186280.0, ans=0.0 +2024-09-17 08:46:08,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=186320.0, ans=0.125 +2024-09-17 08:46:15,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=186320.0, ans=0.0 +2024-09-17 08:46:23,508 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:46:34,061 INFO [train.py:1198] (0/2) Epoch 11, batch 1350, loss[loss=0.2747, ctc_loss=0.1757, cr_loss=0.4324, attn_decoder_loss=0.2761, over 29746.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1668, cr_loss=0.4037, attn_decoder_loss=0.2649, over 5796966.46 frames. ], batch size: 81, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:46:37,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=186400.0, ans=0.0 +2024-09-17 08:46:38,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.06 vs. limit=22.5 +2024-09-17 08:46:47,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=186440.0, ans=0.1 +2024-09-17 08:47:01,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=186440.0, ans=0.0 +2024-09-17 08:47:07,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=186480.0, ans=0.125 +2024-09-17 08:47:14,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.12 vs. limit=15.0 +2024-09-17 08:47:46,678 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.867e+01 9.722e+01 1.047e+02 1.106e+02 2.453e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 08:47:51,365 INFO [train.py:1198] (0/2) Epoch 11, batch 1400, loss[loss=0.2309, ctc_loss=0.1391, cr_loss=0.356, attn_decoder_loss=0.2332, over 29576.00 frames. ], tot_loss[loss=0.2633, ctc_loss=0.1669, cr_loss=0.404, attn_decoder_loss=0.265, over 5807559.50 frames. ], batch size: 69, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:47:54,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=186600.0, ans=0.0 +2024-09-17 08:48:14,944 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.70 vs. limit=15.0 +2024-09-17 08:48:22,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=186680.0, ans=0.125 +2024-09-17 08:48:24,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=186680.0, ans=0.0 +2024-09-17 08:48:49,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=186720.0, ans=0.125 +2024-09-17 08:49:09,108 INFO [train.py:1198] (0/2) Epoch 11, batch 1450, loss[loss=0.2819, ctc_loss=0.1874, cr_loss=0.4428, attn_decoder_loss=0.2826, over 29458.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1673, cr_loss=0.4046, attn_decoder_loss=0.2655, over 5804797.93 frames. ], batch size: 94, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:49:51,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=186880.0, ans=0.125 +2024-09-17 08:50:11,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.98 vs. limit=15.0 +2024-09-17 08:50:12,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=186960.0, ans=0.1 +2024-09-17 08:50:19,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=186960.0, ans=0.025 +2024-09-17 08:50:21,159 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 9.456e+01 1.033e+02 1.133e+02 2.904e+02, threshold=2.066e+02, percent-clipped=2.0 +2024-09-17 08:50:24,328 INFO [train.py:1198] (0/2) Epoch 11, batch 1500, loss[loss=0.2863, ctc_loss=0.1849, cr_loss=0.4481, attn_decoder_loss=0.2876, over 29629.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1672, cr_loss=0.4045, attn_decoder_loss=0.2659, over 5805836.85 frames. ], batch size: 86, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:51:01,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=187080.0, ans=0.125 +2024-09-17 08:51:14,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=187120.0, ans=0.125 +2024-09-17 08:51:14,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=187120.0, ans=0.2 +2024-09-17 08:51:20,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=187120.0, ans=0.125 +2024-09-17 08:51:43,029 INFO [train.py:1198] (0/2) Epoch 11, batch 1550, loss[loss=0.2814, ctc_loss=0.1761, cr_loss=0.4269, attn_decoder_loss=0.2836, over 29535.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1683, cr_loss=0.4057, attn_decoder_loss=0.2664, over 5781826.35 frames. ], batch size: 90, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:51:46,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=187200.0, ans=0.125 +2024-09-17 08:51:52,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=12.0 +2024-09-17 08:52:09,372 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.09 vs. limit=22.5 +2024-09-17 08:52:46,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=187360.0, ans=0.0 +2024-09-17 08:52:57,784 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.021e+01 9.413e+01 1.008e+02 1.137e+02 5.479e+02, threshold=2.016e+02, percent-clipped=2.0 +2024-09-17 08:53:00,778 INFO [train.py:1198] (0/2) Epoch 11, batch 1600, loss[loss=0.2738, ctc_loss=0.1719, cr_loss=0.4048, attn_decoder_loss=0.2761, over 29682.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1681, cr_loss=0.4053, attn_decoder_loss=0.2662, over 5764983.74 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 16.0 +2024-09-17 08:53:14,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=187440.0, ans=0.0 +2024-09-17 08:53:23,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=187440.0, ans=0.1 +2024-09-17 08:53:35,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=187480.0, ans=0.0 +2024-09-17 08:53:46,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=187520.0, ans=0.1 +2024-09-17 08:53:48,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=187520.0, ans=0.125 +2024-09-17 08:53:49,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=187520.0, ans=0.025 +2024-09-17 08:53:49,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=187520.0, ans=0.125 +2024-09-17 08:54:16,158 INFO [train.py:1198] (0/2) Epoch 11, batch 1650, loss[loss=0.2871, ctc_loss=0.1844, cr_loss=0.4474, attn_decoder_loss=0.2886, over 29678.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.1676, cr_loss=0.4044, attn_decoder_loss=0.2659, over 5760764.09 frames. ], batch size: 89, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:54:24,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=187600.0, ans=0.05 +2024-09-17 08:54:25,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=187600.0, ans=0.0 +2024-09-17 08:55:00,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-17 08:55:08,909 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.46 vs. limit=22.5 +2024-09-17 08:55:11,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=187720.0, ans=0.1 +2024-09-17 08:55:30,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=187760.0, ans=0.125 +2024-09-17 08:55:32,180 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 9.133e+01 1.002e+02 1.058e+02 1.581e+02, threshold=2.003e+02, percent-clipped=0.0 +2024-09-17 08:55:33,701 INFO [train.py:1198] (0/2) Epoch 11, batch 1700, loss[loss=0.2449, ctc_loss=0.1579, cr_loss=0.3898, attn_decoder_loss=0.2459, over 29592.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1675, cr_loss=0.4045, attn_decoder_loss=0.2657, over 5783071.64 frames. ], batch size: 69, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:55:52,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.25 vs. limit=15.0 +2024-09-17 08:56:47,572 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.99 vs. limit=22.5 +2024-09-17 08:56:51,928 INFO [train.py:1198] (0/2) Epoch 11, batch 1750, loss[loss=0.2377, ctc_loss=0.1619, cr_loss=0.379, attn_decoder_loss=0.2377, over 29333.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1671, cr_loss=0.4037, attn_decoder_loss=0.2655, over 5790035.53 frames. ], batch size: 67, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:57:06,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.49 vs. limit=15.0 +2024-09-17 08:57:30,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=188080.0, ans=0.125 +2024-09-17 08:58:04,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=188160.0, ans=0.0 +2024-09-17 08:58:05,833 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.041e+01 9.226e+01 9.775e+01 1.040e+02 1.595e+02, threshold=1.955e+02, percent-clipped=0.0 +2024-09-17 08:58:07,329 INFO [train.py:1198] (0/2) Epoch 11, batch 1800, loss[loss=0.2738, ctc_loss=0.176, cr_loss=0.436, attn_decoder_loss=0.2749, over 29670.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1672, cr_loss=0.4036, attn_decoder_loss=0.2656, over 5791899.89 frames. ], batch size: 83, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:58:38,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=188280.0, ans=0.1 +2024-09-17 08:58:43,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.49 vs. limit=6.0 +2024-09-17 08:58:52,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=188280.0, ans=0.0 +2024-09-17 08:58:55,725 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.11 vs. limit=15.0 +2024-09-17 08:58:57,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=188320.0, ans=0.0 +2024-09-17 08:59:06,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.50 vs. limit=10.0 +2024-09-17 08:59:12,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.80 vs. limit=15.0 +2024-09-17 08:59:24,941 INFO [train.py:1198] (0/2) Epoch 11, batch 1850, loss[loss=0.2704, ctc_loss=0.1753, cr_loss=0.4003, attn_decoder_loss=0.272, over 29641.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1672, cr_loss=0.4041, attn_decoder_loss=0.2653, over 5797563.27 frames. ], batch size: 86, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:59:34,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=188400.0, ans=0.2 +2024-09-17 08:59:38,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=188440.0, ans=0.0 +2024-09-17 08:59:45,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.04 vs. limit=15.0 +2024-09-17 08:59:47,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=188440.0, ans=0.125 +2024-09-17 08:59:59,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=188480.0, ans=0.125 +2024-09-17 08:59:59,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=188480.0, ans=0.125 +2024-09-17 09:00:13,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=188520.0, ans=15.0 +2024-09-17 09:00:13,091 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.89 vs. limit=15.0 +2024-09-17 09:00:18,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=188520.0, ans=0.125 +2024-09-17 09:00:30,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=188560.0, ans=22.5 +2024-09-17 09:00:41,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=188600.0, ans=0.0 +2024-09-17 09:00:42,135 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 9.168e+01 9.699e+01 1.053e+02 1.276e+02, threshold=1.940e+02, percent-clipped=0.0 +2024-09-17 09:00:42,161 INFO [train.py:1198] (0/2) Epoch 11, batch 1900, loss[loss=0.2856, ctc_loss=0.1845, cr_loss=0.4469, attn_decoder_loss=0.2869, over 29710.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.1675, cr_loss=0.4054, attn_decoder_loss=0.2659, over 5805276.02 frames. ], batch size: 89, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 09:01:00,538 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:01:11,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=188680.0, ans=0.125 +2024-09-17 09:01:17,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=188680.0, ans=0.0 +2024-09-17 09:01:50,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=188760.0, ans=0.125 +2024-09-17 09:01:57,962 INFO [train.py:1198] (0/2) Epoch 11, batch 1950, loss[loss=0.2541, ctc_loss=0.1609, cr_loss=0.401, attn_decoder_loss=0.2556, over 29468.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1681, cr_loss=0.4071, attn_decoder_loss=0.2669, over 5819719.28 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:01:58,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.11 vs. limit=15.0 +2024-09-17 09:02:12,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.19 vs. limit=15.0 +2024-09-17 09:02:22,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=188840.0, ans=0.1 +2024-09-17 09:02:22,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=188840.0, ans=10.0 +2024-09-17 09:02:27,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=188840.0, ans=0.1 +2024-09-17 09:02:27,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=188840.0, ans=0.125 +2024-09-17 09:02:30,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.79 vs. limit=6.0 +2024-09-17 09:02:33,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=188880.0, ans=0.0 +2024-09-17 09:02:37,127 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.97 vs. limit=10.0 +2024-09-17 09:02:39,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=188880.0, ans=0.125 +2024-09-17 09:02:48,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=188920.0, ans=0.125 +2024-09-17 09:02:57,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=188920.0, ans=0.125 +2024-09-17 09:02:58,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=188960.0, ans=0.2 +2024-09-17 09:02:58,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=188960.0, ans=0.025 +2024-09-17 09:03:15,373 INFO [train.py:1198] (0/2) Epoch 11, batch 2000, loss[loss=0.2433, ctc_loss=0.1568, cr_loss=0.3898, attn_decoder_loss=0.2442, over 29347.00 frames. ], tot_loss[loss=0.2657, ctc_loss=0.1688, cr_loss=0.4071, attn_decoder_loss=0.2675, over 5796372.65 frames. ], batch size: 67, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:03:16,928 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.350e+01 9.444e+01 9.987e+01 1.091e+02 4.605e+02, threshold=1.997e+02, percent-clipped=2.0 +2024-09-17 09:03:29,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=189040.0, ans=0.0 +2024-09-17 09:03:44,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=189080.0, ans=0.2 +2024-09-17 09:04:01,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=189120.0, ans=0.125 +2024-09-17 09:04:33,454 INFO [train.py:1198] (0/2) Epoch 11, batch 2050, loss[loss=0.2364, ctc_loss=0.1434, cr_loss=0.3716, attn_decoder_loss=0.2384, over 29430.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.168, cr_loss=0.4055, attn_decoder_loss=0.2664, over 5790097.64 frames. ], batch size: 70, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:04:36,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=189200.0, ans=0.2 +2024-09-17 09:04:36,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=189200.0, ans=0.125 +2024-09-17 09:04:53,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=189240.0, ans=0.0 +2024-09-17 09:04:56,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.23 vs. limit=22.5 +2024-09-17 09:05:13,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=189280.0, ans=0.125 +2024-09-17 09:05:24,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=189320.0, ans=0.025 +2024-09-17 09:05:33,531 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.10 vs. limit=10.0 +2024-09-17 09:05:40,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=189360.0, ans=0.125 +2024-09-17 09:05:47,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=189400.0, ans=0.0 +2024-09-17 09:05:49,139 INFO [train.py:1198] (0/2) Epoch 11, batch 2100, loss[loss=0.2631, ctc_loss=0.1603, cr_loss=0.3767, attn_decoder_loss=0.2662, over 29785.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1667, cr_loss=0.4044, attn_decoder_loss=0.2653, over 5801026.20 frames. ], batch size: 81, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:05:50,610 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.970e+01 9.676e+01 1.062e+02 4.848e+02, threshold=1.935e+02, percent-clipped=1.0 +2024-09-17 09:05:55,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=189400.0, ans=0.125 +2024-09-17 09:05:56,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=189400.0, ans=0.125 +2024-09-17 09:06:15,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=189440.0, ans=0.125 +2024-09-17 09:06:30,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=189480.0, ans=0.125 +2024-09-17 09:06:53,097 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:06:53,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.14 vs. limit=15.0 +2024-09-17 09:06:57,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=189560.0, ans=0.09899494936611666 +2024-09-17 09:06:57,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=189560.0, ans=0.125 +2024-09-17 09:06:58,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=189560.0, ans=0.0 +2024-09-17 09:06:59,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_ff2.min_abs, batch_count=189560.0, ans=0.1 +2024-09-17 09:07:06,749 INFO [train.py:1198] (0/2) Epoch 11, batch 2150, loss[loss=0.255, ctc_loss=0.152, cr_loss=0.3915, attn_decoder_loss=0.2577, over 29432.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1659, cr_loss=0.4037, attn_decoder_loss=0.2645, over 5815267.35 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:07:08,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=189600.0, ans=0.0 +2024-09-17 09:07:08,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=189600.0, ans=0.2 +2024-09-17 09:07:11,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.15 vs. limit=22.5 +2024-09-17 09:07:48,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=189680.0, ans=0.025 +2024-09-17 09:07:58,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=189720.0, ans=0.05 +2024-09-17 09:08:08,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=189760.0, ans=0.0 +2024-09-17 09:08:24,836 INFO [train.py:1198] (0/2) Epoch 11, batch 2200, loss[loss=0.2764, ctc_loss=0.1762, cr_loss=0.4225, attn_decoder_loss=0.2781, over 29632.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1664, cr_loss=0.4038, attn_decoder_loss=0.2648, over 5812008.77 frames. ], batch size: 86, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:08:26,327 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.350e+01 9.957e+01 1.083e+02 2.059e+02, threshold=1.991e+02, percent-clipped=1.0 +2024-09-17 09:08:43,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=189840.0, ans=0.125 +2024-09-17 09:08:54,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.26 vs. limit=15.0 +2024-09-17 09:09:02,797 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:09:06,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=189880.0, ans=15.0 +2024-09-17 09:09:40,357 INFO [train.py:1198] (0/2) Epoch 11, batch 2250, loss[loss=0.2544, ctc_loss=0.1547, cr_loss=0.3873, attn_decoder_loss=0.2568, over 29705.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1663, cr_loss=0.404, attn_decoder_loss=0.2646, over 5811565.07 frames. ], batch size: 82, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:09:40,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=190000.0, ans=0.035 +2024-09-17 09:09:54,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.62 vs. limit=15.0 +2024-09-17 09:10:06,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=190040.0, ans=0.2 +2024-09-17 09:10:24,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=190080.0, ans=0.0 +2024-09-17 09:10:32,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=190120.0, ans=0.5 +2024-09-17 09:10:45,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.59 vs. limit=15.0 +2024-09-17 09:10:57,794 INFO [train.py:1198] (0/2) Epoch 11, batch 2300, loss[loss=0.2366, ctc_loss=0.1397, cr_loss=0.3946, attn_decoder_loss=0.2386, over 29335.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1655, cr_loss=0.4025, attn_decoder_loss=0.2635, over 5798975.57 frames. ], batch size: 71, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:10:59,281 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.762e+01 9.241e+01 9.973e+01 1.088e+02 2.493e+02, threshold=1.995e+02, percent-clipped=2.0 +2024-09-17 09:11:11,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=190240.0, ans=0.0 +2024-09-17 09:11:14,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=190240.0, ans=0.2 +2024-09-17 09:11:17,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=190240.0, ans=0.0 +2024-09-17 09:11:23,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=190240.0, ans=0.0 +2024-09-17 09:11:25,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=190240.0, ans=0.0 +2024-09-17 09:11:42,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=190320.0, ans=10.0 +2024-09-17 09:11:49,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=190320.0, ans=0.1 +2024-09-17 09:11:54,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.62 vs. limit=22.5 +2024-09-17 09:12:09,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=190360.0, ans=0.0 +2024-09-17 09:12:11,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=190360.0, ans=0.2 +2024-09-17 09:12:15,946 INFO [train.py:1198] (0/2) Epoch 11, batch 2350, loss[loss=0.276, ctc_loss=0.1777, cr_loss=0.4294, attn_decoder_loss=0.2774, over 29686.00 frames. ], tot_loss[loss=0.2625, ctc_loss=0.1661, cr_loss=0.4035, attn_decoder_loss=0.2642, over 5804386.65 frames. ], batch size: 83, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:12:19,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=190400.0, ans=0.125 +2024-09-17 09:12:26,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=190400.0, ans=0.2 +2024-09-17 09:12:38,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=190440.0, ans=0.07 +2024-09-17 09:12:39,120 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:13:14,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.65 vs. limit=15.0 +2024-09-17 09:13:14,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.13 vs. limit=15.0 +2024-09-17 09:13:16,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=190560.0, ans=0.125 +2024-09-17 09:13:18,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=190560.0, ans=10.0 +2024-09-17 09:13:31,664 INFO [train.py:1198] (0/2) Epoch 11, batch 2400, loss[loss=0.254, ctc_loss=0.1557, cr_loss=0.3979, attn_decoder_loss=0.256, over 29531.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1665, cr_loss=0.4042, attn_decoder_loss=0.265, over 5807297.56 frames. ], batch size: 76, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:13:34,606 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.758e+01 9.144e+01 9.902e+01 1.071e+02 1.818e+02, threshold=1.980e+02, percent-clipped=0.0 +2024-09-17 09:14:01,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=190640.0, ans=0.2 +2024-09-17 09:14:11,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=190680.0, ans=0.125 +2024-09-17 09:14:21,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=190720.0, ans=0.125 +2024-09-17 09:14:38,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.47 vs. limit=12.0 +2024-09-17 09:14:50,026 INFO [train.py:1198] (0/2) Epoch 11, batch 2450, loss[loss=0.2739, ctc_loss=0.1741, cr_loss=0.4132, attn_decoder_loss=0.2758, over 29717.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1674, cr_loss=0.4055, attn_decoder_loss=0.2658, over 5783302.04 frames. ], batch size: 82, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:14:57,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=13.50 vs. limit=15.0 +2024-09-17 09:15:03,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=190840.0, ans=0.0 +2024-09-17 09:15:33,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=190920.0, ans=0.1 +2024-09-17 09:15:35,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=190920.0, ans=0.0 +2024-09-17 09:16:00,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=190960.0, ans=0.025 +2024-09-17 09:16:00,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.31 vs. limit=15.0 +2024-09-17 09:16:06,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=191000.0, ans=0.0 +2024-09-17 09:16:06,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.61 vs. limit=22.5 +2024-09-17 09:16:07,524 INFO [train.py:1198] (0/2) Epoch 11, batch 2500, loss[loss=0.2827, ctc_loss=0.1759, cr_loss=0.3962, attn_decoder_loss=0.2857, over 29611.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1673, cr_loss=0.4058, attn_decoder_loss=0.2658, over 5794113.25 frames. ], batch size: 86, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:16:07,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=191000.0, ans=0.0 +2024-09-17 09:16:12,137 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.899e+01 9.413e+01 9.956e+01 1.120e+02 1.816e+02, threshold=1.991e+02, percent-clipped=0.0 +2024-09-17 09:16:12,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=191000.0, ans=0.0 +2024-09-17 09:16:56,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff3.min_abs, batch_count=191120.0, ans=0.2 +2024-09-17 09:16:59,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=191120.0, ans=0.0 +2024-09-17 09:17:05,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=191120.0, ans=0.2 +2024-09-17 09:17:23,755 INFO [train.py:1198] (0/2) Epoch 11, batch 2550, loss[loss=0.239, ctc_loss=0.1487, cr_loss=0.3674, attn_decoder_loss=0.2408, over 29356.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1674, cr_loss=0.4054, attn_decoder_loss=0.2657, over 5798354.35 frames. ], batch size: 67, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:17:37,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=191240.0, ans=0.0 +2024-09-17 09:17:45,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=191240.0, ans=0.0 +2024-09-17 09:17:57,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=191280.0, ans=0.0 +2024-09-17 09:18:19,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=191320.0, ans=0.125 +2024-09-17 09:18:19,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=191320.0, ans=0.125 +2024-09-17 09:18:39,911 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.96 vs. limit=22.5 +2024-09-17 09:18:41,963 INFO [train.py:1198] (0/2) Epoch 11, batch 2600, loss[loss=0.2658, ctc_loss=0.1698, cr_loss=0.3943, attn_decoder_loss=0.2677, over 29465.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1674, cr_loss=0.4056, attn_decoder_loss=0.2661, over 5794505.11 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:18:45,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=191400.0, ans=0.2 +2024-09-17 09:18:46,523 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.056e+01 9.455e+01 1.019e+02 1.112e+02 3.211e+02, threshold=2.037e+02, percent-clipped=2.0 +2024-09-17 09:19:20,327 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.57 vs. limit=15.0 +2024-09-17 09:19:34,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=191520.0, ans=0.125 +2024-09-17 09:19:47,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.48 vs. limit=10.0 +2024-09-17 09:19:59,103 INFO [train.py:1198] (0/2) Epoch 11, batch 2650, loss[loss=0.2808, ctc_loss=0.1799, cr_loss=0.4219, attn_decoder_loss=0.2827, over 29222.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1672, cr_loss=0.4055, attn_decoder_loss=0.2662, over 5801307.17 frames. ], batch size: 100, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:19:59,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=191600.0, ans=0.125 +2024-09-17 09:20:06,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=191600.0, ans=0.0 +2024-09-17 09:20:22,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=191640.0, ans=0.1 +2024-09-17 09:20:22,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=191640.0, ans=0.125 +2024-09-17 09:20:26,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=191640.0, ans=0.025 +2024-09-17 09:20:26,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=191640.0, ans=0.125 +2024-09-17 09:20:40,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=191680.0, ans=0.125 +2024-09-17 09:20:43,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=191720.0, ans=0.125 +2024-09-17 09:20:56,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=191720.0, ans=0.125 +2024-09-17 09:21:14,609 INFO [train.py:1198] (0/2) Epoch 11, batch 2700, loss[loss=0.2816, ctc_loss=0.1671, cr_loss=0.4044, attn_decoder_loss=0.2853, over 29540.00 frames. ], tot_loss[loss=0.2649, ctc_loss=0.1676, cr_loss=0.4062, attn_decoder_loss=0.2667, over 5796694.53 frames. ], batch size: 87, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:21:20,545 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 9.206e+01 9.835e+01 1.075e+02 2.605e+02, threshold=1.967e+02, percent-clipped=2.0 +2024-09-17 09:21:20,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=191800.0, ans=0.025 +2024-09-17 09:21:20,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=191800.0, ans=0.125 +2024-09-17 09:21:21,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.69 vs. limit=22.5 +2024-09-17 09:21:48,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=191880.0, ans=0.0 +2024-09-17 09:21:56,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=191880.0, ans=0.0 +2024-09-17 09:22:05,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=191920.0, ans=0.0 +2024-09-17 09:22:12,173 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.48 vs. limit=22.5 +2024-09-17 09:22:26,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=191960.0, ans=0.125 +2024-09-17 09:22:31,571 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-48000.pt +2024-09-17 09:22:39,940 INFO [train.py:1198] (0/2) Epoch 11, batch 2750, loss[loss=0.2535, ctc_loss=0.1587, cr_loss=0.4204, attn_decoder_loss=0.2546, over 29511.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1667, cr_loss=0.4043, attn_decoder_loss=0.2654, over 5794957.27 frames. ], batch size: 75, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:22:46,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=192000.0, ans=0.0 +2024-09-17 09:22:50,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=192000.0, ans=0.125 +2024-09-17 09:22:57,236 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.78 vs. limit=12.0 +2024-09-17 09:23:17,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=192080.0, ans=0.125 +2024-09-17 09:23:32,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=192120.0, ans=0.125 +2024-09-17 09:23:40,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=192160.0, ans=0.0 +2024-09-17 09:23:42,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=192160.0, ans=0.2 +2024-09-17 09:23:44,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.99 vs. limit=15.0 +2024-09-17 09:23:48,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=192160.0, ans=0.1 +2024-09-17 09:23:57,820 INFO [train.py:1198] (0/2) Epoch 11, batch 2800, loss[loss=0.3007, ctc_loss=0.2352, cr_loss=0.4176, attn_decoder_loss=0.2987, over 20215.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1674, cr_loss=0.4053, attn_decoder_loss=0.2656, over 5775822.31 frames. ], batch size: 211, lr: 1.02e-02, grad_scale: 16.0 +2024-09-17 09:23:59,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=14.41 vs. limit=15.0 +2024-09-17 09:24:04,596 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.74 vs. limit=22.5 +2024-09-17 09:24:05,067 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 8.863e+01 9.648e+01 1.109e+02 4.510e+02, threshold=1.930e+02, percent-clipped=4.0 +2024-09-17 09:24:05,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.73 vs. limit=15.0 +2024-09-17 09:24:07,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=192200.0, ans=0.0 +2024-09-17 09:24:08,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=192200.0, ans=0.0 +2024-09-17 09:24:19,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=192240.0, ans=0.2 +2024-09-17 09:24:22,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=192240.0, ans=0.0 +2024-09-17 09:24:49,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=192320.0, ans=0.125 +2024-09-17 09:25:13,021 INFO [train.py:1198] (0/2) Epoch 11, batch 2850, loss[loss=0.2512, ctc_loss=0.1473, cr_loss=0.3796, attn_decoder_loss=0.2543, over 29459.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1682, cr_loss=0.4055, attn_decoder_loss=0.2663, over 5760529.73 frames. ], batch size: 77, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:25:15,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.11 vs. limit=10.0 +2024-09-17 09:25:19,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=192400.0, ans=0.125 +2024-09-17 09:25:25,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=192400.0, ans=0.125 +2024-09-17 09:25:54,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=192480.0, ans=0.0 +2024-09-17 09:25:57,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=192480.0, ans=0.0 +2024-09-17 09:26:25,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=192560.0, ans=0.125 +2024-09-17 09:26:30,890 INFO [train.py:1198] (0/2) Epoch 11, batch 2900, loss[loss=0.2475, ctc_loss=0.1531, cr_loss=0.3854, attn_decoder_loss=0.2494, over 29423.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1682, cr_loss=0.4065, attn_decoder_loss=0.2669, over 5786612.56 frames. ], batch size: 79, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:26:32,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=192600.0, ans=0.0 +2024-09-17 09:26:38,267 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.930e+01 9.694e+01 1.018e+02 1.122e+02 2.522e+02, threshold=2.035e+02, percent-clipped=2.0 +2024-09-17 09:26:48,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.11 vs. limit=12.0 +2024-09-17 09:26:50,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=192640.0, ans=0.025 +2024-09-17 09:27:13,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=192680.0, ans=0.125 +2024-09-17 09:27:21,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=192720.0, ans=0.125 +2024-09-17 09:27:49,119 INFO [train.py:1198] (0/2) Epoch 11, batch 2950, loss[loss=0.2607, ctc_loss=0.1669, cr_loss=0.4278, attn_decoder_loss=0.2616, over 29511.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1667, cr_loss=0.404, attn_decoder_loss=0.2655, over 5782239.33 frames. ], batch size: 75, lr: 1.01e-02, grad_scale: 4.0 +2024-09-17 09:27:58,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=192800.0, ans=0.025 +2024-09-17 09:28:18,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=192880.0, ans=0.125 +2024-09-17 09:28:41,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.09 vs. limit=12.0 +2024-09-17 09:29:04,914 INFO [train.py:1198] (0/2) Epoch 11, batch 3000, loss[loss=0.2658, ctc_loss=0.1638, cr_loss=0.3938, attn_decoder_loss=0.2684, over 29756.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1666, cr_loss=0.4038, attn_decoder_loss=0.2654, over 5782372.17 frames. ], batch size: 81, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:29:04,915 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 09:29:17,446 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([2.9760, 4.1393, 4.3561, 4.3107], device='cuda:0') +2024-09-17 09:29:24,076 INFO [train.py:1230] (0/2) Epoch 11, validation: loss=0.2124, ctc_loss=0.04636, cr_loss=4.851e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-17 09:29:24,076 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 09:29:33,324 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 9.274e+01 9.995e+01 1.117e+02 3.922e+02, threshold=1.999e+02, percent-clipped=3.0 +2024-09-17 09:30:16,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=193120.0, ans=0.2 +2024-09-17 09:30:17,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=193120.0, ans=0.2 +2024-09-17 09:30:22,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193120.0, ans=0.1 +2024-09-17 09:30:34,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193160.0, ans=0.1 +2024-09-17 09:30:39,846 INFO [train.py:1198] (0/2) Epoch 11, batch 3050, loss[loss=0.2577, ctc_loss=0.1717, cr_loss=0.3986, attn_decoder_loss=0.2584, over 29543.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1676, cr_loss=0.4055, attn_decoder_loss=0.2664, over 5777200.52 frames. ], batch size: 76, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:30:52,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=193200.0, ans=0.125 +2024-09-17 09:31:17,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=193280.0, ans=0.0 +2024-09-17 09:31:48,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=193360.0, ans=0.125 +2024-09-17 09:31:51,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.88 vs. limit=15.0 +2024-09-17 09:31:57,177 INFO [train.py:1198] (0/2) Epoch 11, batch 3100, loss[loss=0.2875, ctc_loss=0.1834, cr_loss=0.4555, attn_decoder_loss=0.2889, over 29270.00 frames. ], tot_loss[loss=0.2643, ctc_loss=0.1675, cr_loss=0.4053, attn_decoder_loss=0.266, over 5776739.36 frames. ], batch size: 100, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:32:07,733 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.928e+01 9.888e+01 1.137e+02 1.275e+02 2.184e+02, threshold=2.273e+02, percent-clipped=1.0 +2024-09-17 09:32:17,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.92 vs. limit=22.5 +2024-09-17 09:32:36,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193480.0, ans=0.1 +2024-09-17 09:32:42,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193520.0, ans=0.1 +2024-09-17 09:32:47,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.44 vs. limit=15.0 +2024-09-17 09:32:50,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=193520.0, ans=0.125 +2024-09-17 09:33:07,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=193560.0, ans=0.0 +2024-09-17 09:33:09,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=193560.0, ans=0.0 +2024-09-17 09:33:15,296 INFO [train.py:1198] (0/2) Epoch 11, batch 3150, loss[loss=0.2783, ctc_loss=0.1748, cr_loss=0.4258, attn_decoder_loss=0.2803, over 28956.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1672, cr_loss=0.4052, attn_decoder_loss=0.2659, over 5782795.76 frames. ], batch size: 104, lr: 1.01e-02, grad_scale: 4.0 +2024-09-17 09:33:37,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=193640.0, ans=0.125 +2024-09-17 09:33:38,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193640.0, ans=0.1 +2024-09-17 09:33:45,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=193680.0, ans=0.125 +2024-09-17 09:33:47,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=21.97 vs. limit=22.5 +2024-09-17 09:33:57,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=193680.0, ans=0.0 +2024-09-17 09:34:10,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.24 vs. limit=10.0 +2024-09-17 09:34:15,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=193760.0, ans=0.125 +2024-09-17 09:34:21,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=193760.0, ans=0.0 +2024-09-17 09:34:30,662 INFO [train.py:1198] (0/2) Epoch 11, batch 3200, loss[loss=0.2712, ctc_loss=0.1708, cr_loss=0.422, attn_decoder_loss=0.273, over 29442.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1668, cr_loss=0.4051, attn_decoder_loss=0.2657, over 5794093.28 frames. ], batch size: 79, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:34:38,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=193800.0, ans=0.025 +2024-09-17 09:34:42,601 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.896e+01 9.244e+01 9.694e+01 1.030e+02 2.478e+02, threshold=1.939e+02, percent-clipped=1.0 +2024-09-17 09:34:45,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.01 vs. limit=15.0 +2024-09-17 09:35:01,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=193880.0, ans=0.125 +2024-09-17 09:35:10,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=193880.0, ans=0.0 +2024-09-17 09:35:18,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193920.0, ans=0.1 +2024-09-17 09:35:21,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=193920.0, ans=0.125 +2024-09-17 09:35:24,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=193920.0, ans=0.5 +2024-09-17 09:35:49,419 INFO [train.py:1198] (0/2) Epoch 11, batch 3250, loss[loss=0.2694, ctc_loss=0.1746, cr_loss=0.4148, attn_decoder_loss=0.2707, over 29708.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1675, cr_loss=0.4065, attn_decoder_loss=0.2664, over 5801643.74 frames. ], batch size: 84, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:35:59,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.50 vs. limit=22.5 +2024-09-17 09:36:33,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=194120.0, ans=0.025 +2024-09-17 09:36:48,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=194160.0, ans=0.125 +2024-09-17 09:36:59,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=194160.0, ans=0.0 +2024-09-17 09:37:07,223 INFO [train.py:1198] (0/2) Epoch 11, batch 3300, loss[loss=0.2731, ctc_loss=0.1694, cr_loss=0.3897, attn_decoder_loss=0.276, over 28629.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.166, cr_loss=0.4042, attn_decoder_loss=0.2647, over 5797994.07 frames. ], batch size: 112, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:37:19,493 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.100e+01 9.489e+01 1.035e+02 1.154e+02 2.549e+02, threshold=2.070e+02, percent-clipped=1.0 +2024-09-17 09:37:19,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=194200.0, ans=0.025 +2024-09-17 09:37:25,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=194240.0, ans=0.1 +2024-09-17 09:37:51,997 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.63 vs. limit=15.0 +2024-09-17 09:38:22,981 INFO [train.py:1198] (0/2) Epoch 11, batch 3350, loss[loss=0.282, ctc_loss=0.1754, cr_loss=0.404, attn_decoder_loss=0.2849, over 28774.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1665, cr_loss=0.4046, attn_decoder_loss=0.2652, over 5774748.66 frames. ], batch size: 104, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:38:38,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=194440.0, ans=0.0 +2024-09-17 09:38:41,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=194440.0, ans=0.07 +2024-09-17 09:38:49,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=194440.0, ans=0.1 +2024-09-17 09:38:58,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=194480.0, ans=0.025 +2024-09-17 09:39:05,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=194480.0, ans=0.125 +2024-09-17 09:39:12,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.73 vs. limit=8.0 +2024-09-17 09:39:27,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=194560.0, ans=0.125 +2024-09-17 09:39:28,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=194560.0, ans=0.025 +2024-09-17 09:39:40,726 INFO [train.py:1198] (0/2) Epoch 11, batch 3400, loss[loss=0.2408, ctc_loss=0.1485, cr_loss=0.3977, attn_decoder_loss=0.2422, over 29323.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1671, cr_loss=0.4045, attn_decoder_loss=0.2655, over 5768936.10 frames. ], batch size: 67, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:39:51,025 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.76 vs. limit=22.5 +2024-09-17 09:39:52,867 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.232e+01 1.004e+02 1.095e+02 3.484e+02, threshold=2.008e+02, percent-clipped=1.0 +2024-09-17 09:40:14,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.46 vs. limit=15.0 +2024-09-17 09:40:32,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=194720.0, ans=0.125 +2024-09-17 09:40:58,438 INFO [train.py:1198] (0/2) Epoch 11, batch 3450, loss[loss=0.2754, ctc_loss=0.1807, cr_loss=0.426, attn_decoder_loss=0.2765, over 28404.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1668, cr_loss=0.4054, attn_decoder_loss=0.2656, over 5775925.10 frames. ], batch size: 111, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:41:03,436 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:41:32,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn2.whiten.whitening_limit, batch_count=194880.0, ans=22.5 +2024-09-17 09:41:34,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.79 vs. limit=22.5 +2024-09-17 09:41:48,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff3.min_abs, batch_count=194920.0, ans=0.2 +2024-09-17 09:41:59,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=194960.0, ans=0.0 +2024-09-17 09:42:11,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=194960.0, ans=0.125 +2024-09-17 09:42:12,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.26 vs. limit=22.5 +2024-09-17 09:42:13,890 INFO [train.py:1198] (0/2) Epoch 11, batch 3500, loss[loss=0.2243, ctc_loss=0.13, cr_loss=0.3367, attn_decoder_loss=0.2273, over 29325.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1665, cr_loss=0.4047, attn_decoder_loss=0.2649, over 5777987.30 frames. ], batch size: 71, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:42:14,998 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.70 vs. limit=10.0 +2024-09-17 09:42:19,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.28 vs. limit=15.0 +2024-09-17 09:42:26,191 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 9.210e+01 9.867e+01 1.123e+02 1.745e+02, threshold=1.973e+02, percent-clipped=0.0 +2024-09-17 09:42:26,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=195000.0, ans=0.125 +2024-09-17 09:42:29,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=195040.0, ans=0.2 +2024-09-17 09:42:31,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.78 vs. limit=12.0 +2024-09-17 09:43:14,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=195160.0, ans=0.2 +2024-09-17 09:43:29,167 INFO [train.py:1198] (0/2) Epoch 11, batch 3550, loss[loss=0.2607, ctc_loss=0.1563, cr_loss=0.3877, attn_decoder_loss=0.2636, over 29702.00 frames. ], tot_loss[loss=0.2628, ctc_loss=0.166, cr_loss=0.4035, attn_decoder_loss=0.2646, over 5783763.48 frames. ], batch size: 89, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:43:57,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=6.0 +2024-09-17 09:43:58,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=195240.0, ans=0.1 +2024-09-17 09:44:08,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=195280.0, ans=0.025 +2024-09-17 09:44:24,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=195320.0, ans=0.1 +2024-09-17 09:44:32,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=195360.0, ans=0.125 +2024-09-17 09:44:37,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.87 vs. limit=10.0 +2024-09-17 09:44:41,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=195360.0, ans=0.1 +2024-09-17 09:44:42,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=195360.0, ans=0.0 +2024-09-17 09:44:45,311 INFO [train.py:1198] (0/2) Epoch 11, batch 3600, loss[loss=0.2563, ctc_loss=0.1612, cr_loss=0.396, attn_decoder_loss=0.2581, over 29503.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1657, cr_loss=0.4032, attn_decoder_loss=0.2646, over 5793398.36 frames. ], batch size: 77, lr: 1.01e-02, grad_scale: 16.0 +2024-09-17 09:44:47,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=195400.0, ans=0.0 +2024-09-17 09:44:48,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=195400.0, ans=0.0 +2024-09-17 09:44:58,575 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.269e+01 9.046e+01 9.949e+01 1.066e+02 3.484e+02, threshold=1.990e+02, percent-clipped=1.0 +2024-09-17 09:45:04,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=195440.0, ans=0.0 +2024-09-17 09:45:10,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=195440.0, ans=0.0 +2024-09-17 09:45:24,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=195480.0, ans=0.125 +2024-09-17 09:45:28,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=195520.0, ans=0.0 +2024-09-17 09:45:37,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=195520.0, ans=0.125 +2024-09-17 09:45:39,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.87 vs. limit=15.0 +2024-09-17 09:45:44,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.17 vs. limit=12.0 +2024-09-17 09:45:49,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=195560.0, ans=0.1 +2024-09-17 09:45:59,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=195600.0, ans=0.2 +2024-09-17 09:46:01,226 INFO [train.py:1198] (0/2) Epoch 11, batch 3650, loss[loss=0.2818, ctc_loss=0.1807, cr_loss=0.4038, attn_decoder_loss=0.284, over 29513.00 frames. ], tot_loss[loss=0.2622, ctc_loss=0.165, cr_loss=0.4019, attn_decoder_loss=0.264, over 5795049.37 frames. ], batch size: 90, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:46:01,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=195600.0, ans=0.125 +2024-09-17 09:46:09,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.86 vs. limit=10.0 +2024-09-17 09:46:14,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=195640.0, ans=0.2 +2024-09-17 09:46:20,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=195640.0, ans=0.2 +2024-09-17 09:46:23,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=195640.0, ans=0.0 +2024-09-17 09:46:43,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.60 vs. limit=15.0 +2024-09-17 09:46:49,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=195720.0, ans=0.0 +2024-09-17 09:47:03,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=195760.0, ans=0.0 +2024-09-17 09:47:05,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=195760.0, ans=0.0 +2024-09-17 09:47:15,840 INFO [train.py:1198] (0/2) Epoch 11, batch 3700, loss[loss=0.2684, ctc_loss=0.1689, cr_loss=0.3907, attn_decoder_loss=0.2708, over 29709.00 frames. ], tot_loss[loss=0.2624, ctc_loss=0.1653, cr_loss=0.4025, attn_decoder_loss=0.2643, over 5806028.67 frames. ], batch size: 84, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:47:20,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=195800.0, ans=0.0 +2024-09-17 09:47:29,199 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.197e+01 9.899e+01 1.076e+02 2.230e+02, threshold=1.980e+02, percent-clipped=1.0 +2024-09-17 09:47:29,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=195840.0, ans=0.125 +2024-09-17 09:47:39,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.18 vs. limit=15.0 +2024-09-17 09:47:43,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.53 vs. limit=15.0 +2024-09-17 09:48:10,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=195920.0, ans=0.025 +2024-09-17 09:48:22,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=195960.0, ans=0.0 +2024-09-17 09:48:30,211 INFO [train.py:1198] (0/2) Epoch 11, batch 3750, loss[loss=0.2371, ctc_loss=0.1461, cr_loss=0.3559, attn_decoder_loss=0.2393, over 29359.00 frames. ], tot_loss[loss=0.2622, ctc_loss=0.165, cr_loss=0.4017, attn_decoder_loss=0.264, over 5809283.93 frames. ], batch size: 67, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:48:49,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196040.0, ans=0.1 +2024-09-17 09:49:10,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=196080.0, ans=0.1 +2024-09-17 09:49:11,153 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.54 vs. limit=15.0 +2024-09-17 09:49:34,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=196160.0, ans=0.125 +2024-09-17 09:49:36,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=15.0 +2024-09-17 09:49:44,089 INFO [train.py:1198] (0/2) Epoch 11, batch 3800, loss[loss=0.2594, ctc_loss=0.1631, cr_loss=0.3918, attn_decoder_loss=0.2615, over 29620.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1646, cr_loss=0.401, attn_decoder_loss=0.2635, over 5799597.88 frames. ], batch size: 86, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:49:47,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=196200.0, ans=0.2 +2024-09-17 09:49:54,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=196200.0, ans=0.125 +2024-09-17 09:49:57,597 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.230e+01 9.992e+01 1.078e+02 1.190e+02 1.793e+02, threshold=2.156e+02, percent-clipped=0.0 +2024-09-17 09:50:03,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=196240.0, ans=0.125 +2024-09-17 09:50:36,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=196320.0, ans=0.125 +2024-09-17 09:50:38,771 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.34 vs. limit=15.0 +2024-09-17 09:51:00,333 INFO [train.py:1198] (0/2) Epoch 11, batch 3850, loss[loss=0.272, ctc_loss=0.1713, cr_loss=0.4005, attn_decoder_loss=0.2743, over 29268.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1644, cr_loss=0.4008, attn_decoder_loss=0.2633, over 5812701.42 frames. ], batch size: 100, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:51:14,718 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.70 vs. limit=22.5 +2024-09-17 09:51:26,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.29 vs. limit=15.0 +2024-09-17 09:51:58,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=196520.0, ans=0.125 +2024-09-17 09:51:58,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=196520.0, ans=0.125 +2024-09-17 09:52:05,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=196560.0, ans=0.025 +2024-09-17 09:52:16,108 INFO [train.py:1198] (0/2) Epoch 11, batch 3900, loss[loss=0.2813, ctc_loss=0.1814, cr_loss=0.4221, attn_decoder_loss=0.283, over 29620.00 frames. ], tot_loss[loss=0.262, ctc_loss=0.1649, cr_loss=0.402, attn_decoder_loss=0.2638, over 5816717.29 frames. ], batch size: 86, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:52:25,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=196600.0, ans=0.09899494936611666 +2024-09-17 09:52:29,224 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.740e+01 9.236e+01 9.717e+01 1.078e+02 1.405e+02, threshold=1.943e+02, percent-clipped=0.0 +2024-09-17 09:52:51,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=196680.0, ans=0.0 +2024-09-17 09:53:30,389 INFO [train.py:1198] (0/2) Epoch 11, batch 3950, loss[loss=0.2718, ctc_loss=0.17, cr_loss=0.4286, attn_decoder_loss=0.2735, over 29438.00 frames. ], tot_loss[loss=0.2619, ctc_loss=0.1645, cr_loss=0.402, attn_decoder_loss=0.2638, over 5835963.12 frames. ], batch size: 97, lr: 1.00e-02, grad_scale: 4.0 +2024-09-17 09:54:04,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=196880.0, ans=0.125 +2024-09-17 09:54:16,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=196920.0, ans=0.0 +2024-09-17 09:54:35,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=196960.0, ans=0.0 +2024-09-17 09:54:43,897 INFO [train.py:1198] (0/2) Epoch 11, batch 4000, loss[loss=0.2555, ctc_loss=0.1645, cr_loss=0.4011, attn_decoder_loss=0.2567, over 29503.00 frames. ], tot_loss[loss=0.262, ctc_loss=0.1649, cr_loss=0.402, attn_decoder_loss=0.2639, over 5813709.51 frames. ], batch size: 74, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:54:47,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.53 vs. limit=22.5 +2024-09-17 09:54:58,574 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.223e+01 9.139e+01 9.851e+01 1.070e+02 1.973e+02, threshold=1.970e+02, percent-clipped=1.0 +2024-09-17 09:54:58,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=197040.0, ans=0.025 +2024-09-17 09:55:03,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=197040.0, ans=0.0 +2024-09-17 09:55:20,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=197080.0, ans=0.125 +2024-09-17 09:55:24,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=197080.0, ans=0.2 +2024-09-17 09:55:38,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=197120.0, ans=0.0 +2024-09-17 09:55:52,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=197160.0, ans=0.1 +2024-09-17 09:55:59,402 INFO [train.py:1198] (0/2) Epoch 11, batch 4050, loss[loss=0.3126, ctc_loss=0.2353, cr_loss=0.4227, attn_decoder_loss=0.3118, over 19807.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1648, cr_loss=0.4013, attn_decoder_loss=0.2636, over 5796320.19 frames. ], batch size: 210, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:56:02,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=197200.0, ans=0.125 +2024-09-17 09:56:08,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=197200.0, ans=0.125 +2024-09-17 09:56:16,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=197240.0, ans=0.0 +2024-09-17 09:56:31,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.19 vs. limit=15.0 +2024-09-17 09:56:43,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=197320.0, ans=0.2 +2024-09-17 09:56:47,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=197320.0, ans=0.2 +2024-09-17 09:56:55,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=197320.0, ans=0.125 +2024-09-17 09:57:09,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=197360.0, ans=0.125 +2024-09-17 09:57:12,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=197400.0, ans=0.0 +2024-09-17 09:57:13,997 INFO [train.py:1198] (0/2) Epoch 11, batch 4100, loss[loss=0.2738, ctc_loss=0.1673, cr_loss=0.4028, attn_decoder_loss=0.2767, over 29541.00 frames. ], tot_loss[loss=0.2619, ctc_loss=0.1647, cr_loss=0.4013, attn_decoder_loss=0.2638, over 5792508.45 frames. ], batch size: 90, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:57:17,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=197400.0, ans=0.1 +2024-09-17 09:57:30,189 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 9.399e+01 1.013e+02 1.118e+02 3.429e+02, threshold=2.026e+02, percent-clipped=2.0 +2024-09-17 09:57:42,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=197480.0, ans=0.2 +2024-09-17 09:57:55,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=197480.0, ans=0.2 +2024-09-17 09:58:05,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=197520.0, ans=0.0 +2024-09-17 09:58:05,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=197520.0, ans=0.125 +2024-09-17 09:58:22,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.82 vs. limit=22.5 +2024-09-17 09:58:28,185 INFO [train.py:1198] (0/2) Epoch 11, batch 4150, loss[loss=0.2611, ctc_loss=0.1755, cr_loss=0.4162, attn_decoder_loss=0.2613, over 29495.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.165, cr_loss=0.4021, attn_decoder_loss=0.2636, over 5798602.36 frames. ], batch size: 77, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:58:57,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.65 vs. limit=15.0 +2024-09-17 09:59:34,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=197760.0, ans=0.025 +2024-09-17 09:59:37,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=197760.0, ans=0.0 +2024-09-17 09:59:42,929 INFO [train.py:1198] (0/2) Epoch 11, batch 4200, loss[loss=0.2771, ctc_loss=0.1836, cr_loss=0.4513, attn_decoder_loss=0.2775, over 29515.00 frames. ], tot_loss[loss=0.2622, ctc_loss=0.1653, cr_loss=0.4028, attn_decoder_loss=0.2641, over 5800223.46 frames. ], batch size: 90, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:59:59,216 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.965e+01 8.983e+01 9.678e+01 1.042e+02 2.526e+02, threshold=1.936e+02, percent-clipped=1.0 +2024-09-17 09:59:59,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=197840.0, ans=0.125 +2024-09-17 10:00:22,834 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.49 vs. limit=10.0 +2024-09-17 10:00:31,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=197920.0, ans=0.125 +2024-09-17 10:00:57,507 INFO [train.py:1198] (0/2) Epoch 11, batch 4250, loss[loss=0.2517, ctc_loss=0.1547, cr_loss=0.3908, attn_decoder_loss=0.2538, over 29517.00 frames. ], tot_loss[loss=0.2624, ctc_loss=0.1651, cr_loss=0.4026, attn_decoder_loss=0.2643, over 5805909.02 frames. ], batch size: 74, lr: 1.00e-02, grad_scale: 4.0 +2024-09-17 10:01:21,209 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:02:02,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=198160.0, ans=15.0 +2024-09-17 10:02:09,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=198200.0, ans=0.125 +2024-09-17 10:02:11,149 INFO [train.py:1198] (0/2) Epoch 11, batch 4300, loss[loss=0.2852, ctc_loss=0.1843, cr_loss=0.4495, attn_decoder_loss=0.2864, over 29541.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1655, cr_loss=0.4039, attn_decoder_loss=0.2648, over 5794604.47 frames. ], batch size: 87, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 10:02:19,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=198200.0, ans=0.125 +2024-09-17 10:02:19,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=198200.0, ans=0.125 +2024-09-17 10:02:23,969 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.71 vs. limit=15.0 +2024-09-17 10:02:29,144 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.197e+01 9.375e+01 1.006e+02 1.083e+02 2.279e+02, threshold=2.011e+02, percent-clipped=1.0 +2024-09-17 10:02:33,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=198240.0, ans=0.025 +2024-09-17 10:03:13,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=12.0 +2024-09-17 10:03:17,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=198360.0, ans=0.025 +2024-09-17 10:03:20,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.70 vs. limit=15.0 +2024-09-17 10:03:23,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.93 vs. limit=15.0 +2024-09-17 10:03:27,241 INFO [train.py:1198] (0/2) Epoch 11, batch 4350, loss[loss=0.284, ctc_loss=0.1819, cr_loss=0.4354, attn_decoder_loss=0.2856, over 29471.00 frames. ], tot_loss[loss=0.2666, ctc_loss=0.1686, cr_loss=0.4084, attn_decoder_loss=0.2684, over 5796074.70 frames. ], batch size: 97, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 10:03:27,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=198400.0, ans=0.125 +2024-09-17 10:03:29,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=198400.0, ans=0.2 +2024-09-17 10:03:35,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=198400.0, ans=0.025 +2024-09-17 10:03:40,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.32 vs. limit=12.0 +2024-09-17 10:03:42,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=198440.0, ans=0.125 +2024-09-17 10:03:59,025 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.12 vs. limit=12.0 +2024-09-17 10:04:18,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=198520.0, ans=0.1 +2024-09-17 10:04:19,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.30 vs. limit=15.0 +2024-09-17 10:04:24,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=198560.0, ans=0.125 +2024-09-17 10:04:26,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=198560.0, ans=10.0 +2024-09-17 10:04:31,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=198560.0, ans=0.0 +2024-09-17 10:04:40,225 INFO [train.py:1198] (0/2) Epoch 11, batch 4400, loss[loss=0.2749, ctc_loss=0.1854, cr_loss=0.41, attn_decoder_loss=0.2757, over 27294.00 frames. ], tot_loss[loss=0.2688, ctc_loss=0.1704, cr_loss=0.411, attn_decoder_loss=0.2706, over 5766234.78 frames. ], batch size: 124, lr: 1.00e-02, grad_scale: 16.0 +2024-09-17 10:04:49,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=198600.0, ans=0.0 +2024-09-17 10:04:56,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=198640.0, ans=0.0 +2024-09-17 10:04:59,277 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.516e+01 9.761e+01 1.030e+02 1.162e+02 9.107e+02, threshold=2.060e+02, percent-clipped=3.0 +2024-09-17 10:05:30,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.12 vs. limit=22.5 +2024-09-17 10:05:40,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=198760.0, ans=0.125 +2024-09-17 10:05:54,741 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.84 vs. limit=15.0 +2024-09-17 10:05:55,218 INFO [train.py:1198] (0/2) Epoch 11, batch 4450, loss[loss=0.2922, ctc_loss=0.2213, cr_loss=0.4232, attn_decoder_loss=0.2907, over 19948.00 frames. ], tot_loss[loss=0.2724, ctc_loss=0.1765, cr_loss=0.416, attn_decoder_loss=0.2738, over 5570670.87 frames. ], batch size: 209, lr: 9.99e-03, grad_scale: 8.0 +2024-09-17 10:05:57,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=198800.0, ans=0.0 +2024-09-17 10:05:57,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.35 vs. limit=22.5 +2024-09-17 10:06:23,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.92 vs. limit=15.0 +2024-09-17 10:06:34,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=198880.0, ans=0.125 +2024-09-17 10:06:43,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=198920.0, ans=0.125 +2024-09-17 10:06:45,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=198920.0, ans=0.0 +2024-09-17 10:06:45,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=198920.0, ans=0.125 +2024-09-17 10:06:45,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=198920.0, ans=0.125 +2024-09-17 10:07:08,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=198960.0, ans=0.0 +2024-09-17 10:07:08,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=198960.0, ans=0.0 +2024-09-17 10:07:11,094 INFO [train.py:1198] (0/2) Epoch 11, batch 4500, loss[loss=0.2945, ctc_loss=0.2246, cr_loss=0.4396, attn_decoder_loss=0.2925, over 20357.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1831, cr_loss=0.4184, attn_decoder_loss=0.277, over 5228962.96 frames. ], batch size: 210, lr: 9.99e-03, grad_scale: 8.0 +2024-09-17 10:07:11,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=199000.0, ans=0.125 +2024-09-17 10:07:21,754 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:07:31,558 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.514e+01 1.082e+02 1.141e+02 1.239e+02 5.446e+02, threshold=2.282e+02, percent-clipped=2.0 +2024-09-17 10:07:47,669 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-11.pt +2024-09-17 10:08:38,959 INFO [train.py:1198] (0/2) Epoch 12, batch 0, loss[loss=0.2504, ctc_loss=0.1528, cr_loss=0.3984, attn_decoder_loss=0.2524, over 29594.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1528, cr_loss=0.3984, attn_decoder_loss=0.2524, over 29594.00 frames. ], batch size: 73, lr: 9.56e-03, grad_scale: 16.0 +2024-09-17 10:08:38,960 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 10:08:46,432 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([4.0889, 3.9492, 3.6319, 3.4240], device='cuda:0') +2024-09-17 10:08:57,356 INFO [train.py:1230] (0/2) Epoch 12, validation: loss=0.2149, ctc_loss=0.04611, cr_loss=4.481e-15, attn_decoder_loss=0.2337, over 944034.00 frames. +2024-09-17 10:08:57,357 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 10:09:08,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=199100.0, ans=0.0 +2024-09-17 10:09:32,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=199180.0, ans=0.1 +2024-09-17 10:09:50,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=199220.0, ans=15.0 +2024-09-17 10:09:54,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=199220.0, ans=10.0 +2024-09-17 10:09:58,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=199260.0, ans=0.0 +2024-09-17 10:10:13,553 INFO [train.py:1198] (0/2) Epoch 12, batch 50, loss[loss=0.2316, ctc_loss=0.1392, cr_loss=0.3646, attn_decoder_loss=0.2338, over 29436.00 frames. ], tot_loss[loss=0.2654, ctc_loss=0.1689, cr_loss=0.4098, attn_decoder_loss=0.267, over 1267159.47 frames. ], batch size: 70, lr: 9.56e-03, grad_scale: 8.0 +2024-09-17 10:10:32,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_ff2.min_abs, batch_count=199340.0, ans=0.1 +2024-09-17 10:10:37,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=199340.0, ans=0.125 +2024-09-17 10:10:40,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=199340.0, ans=0.0 +2024-09-17 10:10:44,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=199380.0, ans=0.0 +2024-09-17 10:10:50,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=199380.0, ans=0.125 +2024-09-17 10:11:16,127 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.442e+01 9.517e+01 1.012e+02 1.140e+02 5.609e+02, threshold=2.023e+02, percent-clipped=2.0 +2024-09-17 10:11:33,306 INFO [train.py:1198] (0/2) Epoch 12, batch 100, loss[loss=0.2563, ctc_loss=0.1647, cr_loss=0.3945, attn_decoder_loss=0.2577, over 29569.00 frames. ], tot_loss[loss=0.2657, ctc_loss=0.1686, cr_loss=0.4084, attn_decoder_loss=0.2674, over 2250980.15 frames. ], batch size: 76, lr: 9.56e-03, grad_scale: 8.0 +2024-09-17 10:11:38,778 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-17 10:11:50,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=199540.0, ans=0.1 +2024-09-17 10:11:56,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=199540.0, ans=0.125 +2024-09-17 10:11:59,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=199540.0, ans=0.0 +2024-09-17 10:12:03,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=199580.0, ans=0.0 +2024-09-17 10:12:47,661 INFO [train.py:1198] (0/2) Epoch 12, batch 150, loss[loss=0.238, ctc_loss=0.14, cr_loss=0.3718, attn_decoder_loss=0.2407, over 29446.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1656, cr_loss=0.4038, attn_decoder_loss=0.2651, over 3046390.88 frames. ], batch size: 70, lr: 9.55e-03, grad_scale: 8.0 +2024-09-17 10:12:52,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=199700.0, ans=0.1 +2024-09-17 10:13:04,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=199740.0, ans=0.125 +2024-09-17 10:13:07,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=199740.0, ans=0.125 +2024-09-17 10:13:34,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=199820.0, ans=0.0 +2024-09-17 10:13:34,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=199820.0, ans=0.1 +2024-09-17 10:13:36,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.02 vs. limit=10.0 +2024-09-17 10:13:40,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=199820.0, ans=0.125 +2024-09-17 10:13:41,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=199820.0, ans=0.0 +2024-09-17 10:13:41,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=199820.0, ans=0.2 +2024-09-17 10:13:47,582 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.984e+01 9.054e+01 9.523e+01 1.007e+02 1.391e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-17 10:13:49,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.47 vs. limit=22.5 +2024-09-17 10:13:51,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.60 vs. limit=15.0 +2024-09-17 10:13:52,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=199860.0, ans=0.0 +2024-09-17 10:13:53,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=199860.0, ans=0.0 +2024-09-17 10:13:56,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=199860.0, ans=0.025 +2024-09-17 10:14:02,683 INFO [train.py:1198] (0/2) Epoch 12, batch 200, loss[loss=0.2827, ctc_loss=0.1858, cr_loss=0.4394, attn_decoder_loss=0.2837, over 27407.00 frames. ], tot_loss[loss=0.2621, ctc_loss=0.1641, cr_loss=0.4027, attn_decoder_loss=0.264, over 3657176.50 frames. ], batch size: 124, lr: 9.55e-03, grad_scale: 8.0 +2024-09-17 10:14:04,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=199900.0, ans=0.125 +2024-09-17 10:14:32,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=199940.0, ans=10.0 +2024-09-17 10:14:38,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=199980.0, ans=0.05 +2024-09-17 10:14:43,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=199980.0, ans=0.125 +2024-09-17 10:14:45,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=6.35 vs. limit=12.0 +2024-09-17 10:14:57,694 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.72 vs. limit=6.0 +2024-09-17 10:15:01,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=200020.0, ans=0.125 +2024-09-17 10:15:20,793 INFO [train.py:1198] (0/2) Epoch 12, batch 250, loss[loss=0.2695, ctc_loss=0.1643, cr_loss=0.3994, attn_decoder_loss=0.2723, over 29249.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1634, cr_loss=0.4032, attn_decoder_loss=0.2636, over 4139777.95 frames. ], batch size: 100, lr: 9.54e-03, grad_scale: 8.0 +2024-09-17 10:15:28,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=200100.0, ans=0.05 +2024-09-17 10:15:35,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=200100.0, ans=0.125 +2024-09-17 10:15:37,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.98 vs. limit=12.0 +2024-09-17 10:15:54,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.59 vs. limit=15.0 +2024-09-17 10:16:23,314 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.843e+01 9.074e+01 9.707e+01 1.061e+02 3.060e+02, threshold=1.941e+02, percent-clipped=1.0 +2024-09-17 10:16:28,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=200260.0, ans=0.025 +2024-09-17 10:16:37,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=200300.0, ans=0.125 +2024-09-17 10:16:38,674 INFO [train.py:1198] (0/2) Epoch 12, batch 300, loss[loss=0.2725, ctc_loss=0.1766, cr_loss=0.4138, attn_decoder_loss=0.274, over 29543.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1639, cr_loss=0.4035, attn_decoder_loss=0.2637, over 4507694.14 frames. ], batch size: 92, lr: 9.54e-03, grad_scale: 8.0 +2024-09-17 10:17:31,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=200420.0, ans=0.09899494936611666 +2024-09-17 10:17:32,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.24 vs. limit=15.0 +2024-09-17 10:17:54,250 INFO [train.py:1198] (0/2) Epoch 12, batch 350, loss[loss=0.2424, ctc_loss=0.1381, cr_loss=0.3731, attn_decoder_loss=0.2457, over 29278.00 frames. ], tot_loss[loss=0.2623, ctc_loss=0.1643, cr_loss=0.4039, attn_decoder_loss=0.2642, over 4793592.44 frames. ], batch size: 71, lr: 9.53e-03, grad_scale: 8.0 +2024-09-17 10:17:57,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=200500.0, ans=0.0 +2024-09-17 10:18:31,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=200580.0, ans=0.0 +2024-09-17 10:18:45,448 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.33 vs. limit=15.0 +2024-09-17 10:18:56,615 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.651e+01 9.289e+01 1.000e+02 1.114e+02 4.401e+02, threshold=2.000e+02, percent-clipped=4.0 +2024-09-17 10:19:02,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.49 vs. limit=15.0 +2024-09-17 10:19:11,730 INFO [train.py:1198] (0/2) Epoch 12, batch 400, loss[loss=0.2707, ctc_loss=0.175, cr_loss=0.4476, attn_decoder_loss=0.2714, over 29696.00 frames. ], tot_loss[loss=0.2619, ctc_loss=0.1637, cr_loss=0.4034, attn_decoder_loss=0.2638, over 5022792.67 frames. ], batch size: 82, lr: 9.53e-03, grad_scale: 16.0 +2024-09-17 10:19:11,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=200700.0, ans=0.125 +2024-09-17 10:19:15,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=200700.0, ans=0.5 +2024-09-17 10:19:15,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=200700.0, ans=0.125 +2024-09-17 10:19:16,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=200700.0, ans=0.025 +2024-09-17 10:19:18,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=200700.0, ans=0.2 +2024-09-17 10:19:35,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=200740.0, ans=0.0 +2024-09-17 10:19:55,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=200780.0, ans=0.0 +2024-09-17 10:20:03,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=200820.0, ans=0.0 +2024-09-17 10:20:16,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=200860.0, ans=0.0 +2024-09-17 10:20:18,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=200860.0, ans=0.125 +2024-09-17 10:20:26,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=200860.0, ans=0.125 +2024-09-17 10:20:30,351 INFO [train.py:1198] (0/2) Epoch 12, batch 450, loss[loss=0.2779, ctc_loss=0.1795, cr_loss=0.4294, attn_decoder_loss=0.2793, over 29695.00 frames. ], tot_loss[loss=0.2621, ctc_loss=0.164, cr_loss=0.4034, attn_decoder_loss=0.264, over 5186766.53 frames. ], batch size: 83, lr: 9.52e-03, grad_scale: 8.0 +2024-09-17 10:20:49,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=200940.0, ans=0.125 +2024-09-17 10:20:50,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=200940.0, ans=0.125 +2024-09-17 10:20:56,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=200940.0, ans=0.0 +2024-09-17 10:20:58,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=200940.0, ans=0.125 +2024-09-17 10:20:59,734 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:21:16,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=201020.0, ans=0.125 +2024-09-17 10:21:30,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=201060.0, ans=0.0 +2024-09-17 10:21:32,802 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.842e+01 9.090e+01 9.686e+01 1.023e+02 4.799e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 10:21:40,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.80 vs. limit=15.0 +2024-09-17 10:21:46,302 INFO [train.py:1198] (0/2) Epoch 12, batch 500, loss[loss=0.2764, ctc_loss=0.1762, cr_loss=0.4326, attn_decoder_loss=0.2779, over 29445.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1634, cr_loss=0.4026, attn_decoder_loss=0.2632, over 5329922.58 frames. ], batch size: 94, lr: 9.52e-03, grad_scale: 8.0 +2024-09-17 10:21:57,408 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:22:01,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=201140.0, ans=0.0 +2024-09-17 10:22:13,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=201140.0, ans=0.125 +2024-09-17 10:22:16,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-17 10:22:20,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=201180.0, ans=0.125 +2024-09-17 10:22:26,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=201180.0, ans=0.1 +2024-09-17 10:22:52,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=201260.0, ans=15.0 +2024-09-17 10:22:56,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=201260.0, ans=0.2 +2024-09-17 10:23:03,902 INFO [train.py:1198] (0/2) Epoch 12, batch 550, loss[loss=0.2745, ctc_loss=0.1771, cr_loss=0.4369, attn_decoder_loss=0.2756, over 28794.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1635, cr_loss=0.4029, attn_decoder_loss=0.2633, over 5421586.34 frames. ], batch size: 104, lr: 9.51e-03, grad_scale: 4.0 +2024-09-17 10:23:05,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=201300.0, ans=0.125 +2024-09-17 10:23:46,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=201380.0, ans=0.125 +2024-09-17 10:24:01,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=201420.0, ans=0.125 +2024-09-17 10:24:02,141 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.64 vs. limit=15.0 +2024-09-17 10:24:06,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=201460.0, ans=0.125 +2024-09-17 10:24:06,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.95 vs. limit=15.0 +2024-09-17 10:24:09,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=201460.0, ans=6.0 +2024-09-17 10:24:10,193 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.800e+01 9.023e+01 9.660e+01 1.069e+02 2.891e+02, threshold=1.932e+02, percent-clipped=2.0 +2024-09-17 10:24:10,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=201460.0, ans=0.0 +2024-09-17 10:24:18,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=201460.0, ans=0.125 +2024-09-17 10:24:22,362 INFO [train.py:1198] (0/2) Epoch 12, batch 600, loss[loss=0.2733, ctc_loss=0.1716, cr_loss=0.3822, attn_decoder_loss=0.2761, over 29254.00 frames. ], tot_loss[loss=0.2619, ctc_loss=0.1638, cr_loss=0.4029, attn_decoder_loss=0.2639, over 5509166.16 frames. ], batch size: 100, lr: 9.51e-03, grad_scale: 8.0 +2024-09-17 10:24:28,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=201500.0, ans=0.1 +2024-09-17 10:24:40,757 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:25:18,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=201620.0, ans=0.05 +2024-09-17 10:25:23,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=201660.0, ans=0.1 +2024-09-17 10:25:23,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=201660.0, ans=0.125 +2024-09-17 10:25:28,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=201660.0, ans=0.2 +2024-09-17 10:25:38,287 INFO [train.py:1198] (0/2) Epoch 12, batch 650, loss[loss=0.256, ctc_loss=0.1615, cr_loss=0.3886, attn_decoder_loss=0.2578, over 29752.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1626, cr_loss=0.4011, attn_decoder_loss=0.263, over 5586415.78 frames. ], batch size: 81, lr: 9.50e-03, grad_scale: 8.0 +2024-09-17 10:26:23,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=201780.0, ans=0.0 +2024-09-17 10:26:23,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=201780.0, ans=0.125 +2024-09-17 10:26:29,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=201820.0, ans=0.125 +2024-09-17 10:26:32,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=201820.0, ans=0.0 +2024-09-17 10:26:35,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=201820.0, ans=0.2 +2024-09-17 10:26:43,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.44 vs. limit=15.0 +2024-09-17 10:26:43,924 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.946e+01 9.154e+01 9.702e+01 1.037e+02 1.837e+02, threshold=1.940e+02, percent-clipped=0.0 +2024-09-17 10:26:56,072 INFO [train.py:1198] (0/2) Epoch 12, batch 700, loss[loss=0.2513, ctc_loss=0.1543, cr_loss=0.4142, attn_decoder_loss=0.2529, over 29520.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1629, cr_loss=0.4014, attn_decoder_loss=0.2633, over 5634595.34 frames. ], batch size: 76, lr: 9.50e-03, grad_scale: 8.0 +2024-09-17 10:27:03,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=201900.0, ans=0.0 +2024-09-17 10:27:11,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=201940.0, ans=0.0 +2024-09-17 10:27:12,102 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.97 vs. limit=15.0 +2024-09-17 10:27:19,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.91 vs. limit=6.0 +2024-09-17 10:27:23,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=201940.0, ans=0.0 +2024-09-17 10:27:31,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=201980.0, ans=0.125 +2024-09-17 10:27:31,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=201980.0, ans=0.2 +2024-09-17 10:27:43,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=202020.0, ans=0.2 +2024-09-17 10:27:44,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.13 vs. limit=15.0 +2024-09-17 10:27:54,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=202020.0, ans=0.025 +2024-09-17 10:28:03,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=202060.0, ans=0.125 +2024-09-17 10:28:14,116 INFO [train.py:1198] (0/2) Epoch 12, batch 750, loss[loss=0.2721, ctc_loss=0.1715, cr_loss=0.4226, attn_decoder_loss=0.2739, over 29718.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1624, cr_loss=0.4004, attn_decoder_loss=0.2627, over 5673991.79 frames. ], batch size: 82, lr: 9.49e-03, grad_scale: 8.0 +2024-09-17 10:28:29,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=202140.0, ans=0.125 +2024-09-17 10:28:36,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=202140.0, ans=0.125 +2024-09-17 10:28:39,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=202140.0, ans=0.1 +2024-09-17 10:28:55,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=202180.0, ans=0.1 +2024-09-17 10:29:07,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=202220.0, ans=0.125 +2024-09-17 10:29:11,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=202220.0, ans=0.1 +2024-09-17 10:29:17,540 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.082e+01 9.622e+01 1.037e+02 1.120e+02 2.104e+02, threshold=2.074e+02, percent-clipped=1.0 +2024-09-17 10:29:29,713 INFO [train.py:1198] (0/2) Epoch 12, batch 800, loss[loss=0.2373, ctc_loss=0.1343, cr_loss=0.3651, attn_decoder_loss=0.2406, over 29584.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1623, cr_loss=0.4002, attn_decoder_loss=0.2627, over 5704209.08 frames. ], batch size: 73, lr: 9.49e-03, grad_scale: 16.0 +2024-09-17 10:30:08,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=202380.0, ans=0.2 +2024-09-17 10:30:47,505 INFO [train.py:1198] (0/2) Epoch 12, batch 850, loss[loss=0.278, ctc_loss=0.1773, cr_loss=0.443, attn_decoder_loss=0.2793, over 29708.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1614, cr_loss=0.3993, attn_decoder_loss=0.2621, over 5734490.50 frames. ], batch size: 89, lr: 9.49e-03, grad_scale: 4.0 +2024-09-17 10:31:03,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=15.0 +2024-09-17 10:31:38,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=202620.0, ans=0.07 +2024-09-17 10:31:55,954 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.132e+01 9.370e+01 1.044e+02 1.217e+02 3.517e+02, threshold=2.088e+02, percent-clipped=3.0 +2024-09-17 10:32:04,986 INFO [train.py:1198] (0/2) Epoch 12, batch 900, loss[loss=0.2437, ctc_loss=0.1531, cr_loss=0.3867, attn_decoder_loss=0.2452, over 29592.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1621, cr_loss=0.4004, attn_decoder_loss=0.2627, over 5738378.15 frames. ], batch size: 73, lr: 9.48e-03, grad_scale: 8.0 +2024-09-17 10:32:32,878 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.47 vs. limit=15.0 +2024-09-17 10:32:33,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=202780.0, ans=0.125 +2024-09-17 10:33:02,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=202820.0, ans=0.025 +2024-09-17 10:33:20,313 INFO [train.py:1198] (0/2) Epoch 12, batch 950, loss[loss=0.2435, ctc_loss=0.1466, cr_loss=0.3897, attn_decoder_loss=0.2456, over 29545.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1627, cr_loss=0.4018, attn_decoder_loss=0.2633, over 5740413.56 frames. ], batch size: 74, lr: 9.48e-03, grad_scale: 8.0 +2024-09-17 10:33:20,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=202900.0, ans=0.125 +2024-09-17 10:33:33,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=202940.0, ans=0.1 +2024-09-17 10:34:19,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-17 10:34:28,763 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.408e+01 9.367e+01 1.035e+02 1.151e+02 3.076e+02, threshold=2.071e+02, percent-clipped=5.0 +2024-09-17 10:34:37,634 INFO [train.py:1198] (0/2) Epoch 12, batch 1000, loss[loss=0.2508, ctc_loss=0.1562, cr_loss=0.3762, attn_decoder_loss=0.253, over 29484.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1638, cr_loss=0.4023, attn_decoder_loss=0.2638, over 5734450.83 frames. ], batch size: 77, lr: 9.47e-03, grad_scale: 8.0 +2024-09-17 10:35:03,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=203140.0, ans=0.125 +2024-09-17 10:35:55,928 INFO [train.py:1198] (0/2) Epoch 12, batch 1050, loss[loss=0.271, ctc_loss=0.1713, cr_loss=0.4121, attn_decoder_loss=0.273, over 29673.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1629, cr_loss=0.4011, attn_decoder_loss=0.2631, over 5741932.12 frames. ], batch size: 85, lr: 9.47e-03, grad_scale: 4.0 +2024-09-17 10:36:03,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=203300.0, ans=0.0 +2024-09-17 10:36:10,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=203340.0, ans=0.125 +2024-09-17 10:36:15,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.15 vs. limit=22.5 +2024-09-17 10:36:25,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=203380.0, ans=0.125 +2024-09-17 10:36:26,661 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:36:38,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=203380.0, ans=0.2 +2024-09-17 10:37:04,276 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.982e+01 9.127e+01 9.739e+01 1.067e+02 1.550e+02, threshold=1.948e+02, percent-clipped=0.0 +2024-09-17 10:37:06,555 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.79 vs. limit=15.0 +2024-09-17 10:37:11,894 INFO [train.py:1198] (0/2) Epoch 12, batch 1100, loss[loss=0.2558, ctc_loss=0.1587, cr_loss=0.4178, attn_decoder_loss=0.2573, over 29434.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1623, cr_loss=0.4006, attn_decoder_loss=0.2625, over 5754298.05 frames. ], batch size: 78, lr: 9.46e-03, grad_scale: 8.0 +2024-09-17 10:37:13,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=203500.0, ans=0.125 +2024-09-17 10:37:27,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=203540.0, ans=0.125 +2024-09-17 10:37:46,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=203580.0, ans=0.025 +2024-09-17 10:37:46,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=203580.0, ans=0.0 +2024-09-17 10:38:03,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=203620.0, ans=0.0 +2024-09-17 10:38:10,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=203620.0, ans=0.125 +2024-09-17 10:38:30,356 INFO [train.py:1198] (0/2) Epoch 12, batch 1150, loss[loss=0.2634, ctc_loss=0.1646, cr_loss=0.4047, attn_decoder_loss=0.2654, over 29457.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1625, cr_loss=0.4002, attn_decoder_loss=0.2627, over 5753240.27 frames. ], batch size: 78, lr: 9.46e-03, grad_scale: 8.0 +2024-09-17 10:38:36,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=203700.0, ans=0.1 +2024-09-17 10:38:47,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.04 vs. limit=12.0 +2024-09-17 10:38:53,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=203740.0, ans=0.125 +2024-09-17 10:38:59,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=203780.0, ans=0.0 +2024-09-17 10:39:01,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.05 vs. limit=15.0 +2024-09-17 10:39:03,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.58 vs. limit=15.0 +2024-09-17 10:39:05,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=203780.0, ans=0.125 +2024-09-17 10:39:30,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=203820.0, ans=0.0 +2024-09-17 10:39:40,830 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.985e+01 9.529e+01 1.043e+02 1.150e+02 3.679e+02, threshold=2.085e+02, percent-clipped=2.0 +2024-09-17 10:39:48,418 INFO [train.py:1198] (0/2) Epoch 12, batch 1200, loss[loss=0.2561, ctc_loss=0.1529, cr_loss=0.3975, attn_decoder_loss=0.2587, over 29660.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1628, cr_loss=0.4006, attn_decoder_loss=0.2632, over 5746732.94 frames. ], batch size: 85, lr: 9.45e-03, grad_scale: 16.0 +2024-09-17 10:39:50,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=203900.0, ans=0.0 +2024-09-17 10:39:54,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=203900.0, ans=0.125 +2024-09-17 10:40:36,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=204020.0, ans=0.125 +2024-09-17 10:41:04,794 INFO [train.py:1198] (0/2) Epoch 12, batch 1250, loss[loss=0.2682, ctc_loss=0.1659, cr_loss=0.4034, attn_decoder_loss=0.2706, over 29523.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1629, cr_loss=0.4014, attn_decoder_loss=0.2637, over 5774692.62 frames. ], batch size: 92, lr: 9.45e-03, grad_scale: 8.0 +2024-09-17 10:41:45,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=204180.0, ans=0.125 +2024-09-17 10:41:45,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=15.0 +2024-09-17 10:41:58,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=204220.0, ans=0.025 +2024-09-17 10:42:08,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.08 vs. limit=15.0 +2024-09-17 10:42:15,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=204260.0, ans=0.125 +2024-09-17 10:42:16,783 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.687e+01 9.051e+01 9.485e+01 1.007e+02 2.061e+02, threshold=1.897e+02, percent-clipped=0.0 +2024-09-17 10:42:22,709 INFO [train.py:1198] (0/2) Epoch 12, batch 1300, loss[loss=0.2712, ctc_loss=0.1634, cr_loss=0.3919, attn_decoder_loss=0.2745, over 28166.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1627, cr_loss=0.4017, attn_decoder_loss=0.2633, over 5779840.78 frames. ], batch size: 111, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:42:50,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=204340.0, ans=0.5 +2024-09-17 10:42:57,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.08 vs. limit=12.0 +2024-09-17 10:43:02,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=204380.0, ans=0.04949747468305833 +2024-09-17 10:43:04,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.71 vs. limit=15.0 +2024-09-17 10:43:20,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=204420.0, ans=0.125 +2024-09-17 10:43:39,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=204500.0, ans=0.125 +2024-09-17 10:43:40,857 INFO [train.py:1198] (0/2) Epoch 12, batch 1350, loss[loss=0.2588, ctc_loss=0.1571, cr_loss=0.3895, attn_decoder_loss=0.2615, over 29769.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1621, cr_loss=0.401, attn_decoder_loss=0.2629, over 5797949.68 frames. ], batch size: 81, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:44:22,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=204580.0, ans=0.07 +2024-09-17 10:44:49,480 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.879e+01 9.112e+01 9.581e+01 1.019e+02 1.292e+02, threshold=1.916e+02, percent-clipped=0.0 +2024-09-17 10:44:55,537 INFO [train.py:1198] (0/2) Epoch 12, batch 1400, loss[loss=0.2214, ctc_loss=0.1276, cr_loss=0.3526, attn_decoder_loss=0.224, over 29573.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1621, cr_loss=0.4015, attn_decoder_loss=0.263, over 5808011.04 frames. ], batch size: 69, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:45:01,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=204700.0, ans=0.5 +2024-09-17 10:45:14,352 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.82 vs. limit=15.0 +2024-09-17 10:45:15,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=204740.0, ans=0.125 +2024-09-17 10:45:30,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.02 vs. limit=22.5 +2024-09-17 10:45:54,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=204820.0, ans=0.2 +2024-09-17 10:46:06,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=204860.0, ans=0.0 +2024-09-17 10:46:13,815 INFO [train.py:1198] (0/2) Epoch 12, batch 1450, loss[loss=0.2775, ctc_loss=0.1758, cr_loss=0.4179, attn_decoder_loss=0.2795, over 29467.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1623, cr_loss=0.4015, attn_decoder_loss=0.2632, over 5804863.14 frames. ], batch size: 94, lr: 9.43e-03, grad_scale: 4.0 +2024-09-17 10:46:22,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.37 vs. limit=10.0 +2024-09-17 10:46:27,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=204940.0, ans=0.125 +2024-09-17 10:46:40,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-17 10:46:55,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-17 10:47:08,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=205020.0, ans=0.0 +2024-09-17 10:47:09,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=205020.0, ans=0.125 +2024-09-17 10:47:14,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=205060.0, ans=0.0 +2024-09-17 10:47:25,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=205060.0, ans=0.025 +2024-09-17 10:47:26,854 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.748e+01 9.404e+01 1.003e+02 1.073e+02 8.206e+02, threshold=2.005e+02, percent-clipped=2.0 +2024-09-17 10:47:28,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=205060.0, ans=0.025 +2024-09-17 10:47:30,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=205100.0, ans=0.1 +2024-09-17 10:47:31,567 INFO [train.py:1198] (0/2) Epoch 12, batch 1500, loss[loss=0.2694, ctc_loss=0.1632, cr_loss=0.3906, attn_decoder_loss=0.2725, over 29633.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1623, cr_loss=0.4013, attn_decoder_loss=0.2634, over 5805494.17 frames. ], batch size: 86, lr: 9.43e-03, grad_scale: 8.0 +2024-09-17 10:47:33,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=205100.0, ans=0.125 +2024-09-17 10:47:38,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=205100.0, ans=0.1 +2024-09-17 10:47:42,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=205100.0, ans=0.0 +2024-09-17 10:47:47,545 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.09 vs. limit=15.0 +2024-09-17 10:47:56,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=205140.0, ans=0.125 +2024-09-17 10:48:07,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=205180.0, ans=0.2 +2024-09-17 10:48:22,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.67 vs. limit=15.0 +2024-09-17 10:48:35,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=205260.0, ans=0.0 +2024-09-17 10:48:47,594 INFO [train.py:1198] (0/2) Epoch 12, batch 1550, loss[loss=0.2766, ctc_loss=0.174, cr_loss=0.4268, attn_decoder_loss=0.2785, over 29505.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1628, cr_loss=0.4023, attn_decoder_loss=0.2637, over 5782574.07 frames. ], batch size: 90, lr: 9.42e-03, grad_scale: 8.0 +2024-09-17 10:48:47,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=205300.0, ans=0.125 +2024-09-17 10:48:48,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=205300.0, ans=0.125 +2024-09-17 10:49:23,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.42 vs. limit=15.0 +2024-09-17 10:49:25,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.24 vs. limit=22.5 +2024-09-17 10:49:48,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=205460.0, ans=0.1 +2024-09-17 10:49:59,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=205460.0, ans=0.1 +2024-09-17 10:50:01,909 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.192e+01 9.435e+01 1.085e+02 1.264e+02 1.596e+02, threshold=2.170e+02, percent-clipped=0.0 +2024-09-17 10:50:04,927 INFO [train.py:1198] (0/2) Epoch 12, batch 1600, loss[loss=0.2671, ctc_loss=0.1645, cr_loss=0.3928, attn_decoder_loss=0.2698, over 29679.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1631, cr_loss=0.4024, attn_decoder_loss=0.2638, over 5764157.60 frames. ], batch size: 85, lr: 9.42e-03, grad_scale: 8.0 +2024-09-17 10:50:05,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.34 vs. limit=22.5 +2024-09-17 10:50:09,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=205500.0, ans=0.0 +2024-09-17 10:50:10,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.69 vs. limit=15.0 +2024-09-17 10:50:46,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=205580.0, ans=0.0 +2024-09-17 10:50:48,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=205580.0, ans=0.125 +2024-09-17 10:51:05,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.26 vs. limit=15.0 +2024-09-17 10:51:06,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.35 vs. limit=15.0 +2024-09-17 10:51:08,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=205660.0, ans=0.1 +2024-09-17 10:51:14,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=205660.0, ans=0.0 +2024-09-17 10:51:15,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.73 vs. limit=15.0 +2024-09-17 10:51:19,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=205660.0, ans=0.07 +2024-09-17 10:51:23,146 INFO [train.py:1198] (0/2) Epoch 12, batch 1650, loss[loss=0.274, ctc_loss=0.1665, cr_loss=0.414, attn_decoder_loss=0.2768, over 29670.00 frames. ], tot_loss[loss=0.2615, ctc_loss=0.163, cr_loss=0.4022, attn_decoder_loss=0.2635, over 5758245.06 frames. ], batch size: 89, lr: 9.41e-03, grad_scale: 4.0 +2024-09-17 10:51:42,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=205740.0, ans=0.1 +2024-09-17 10:51:53,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=205780.0, ans=0.125 +2024-09-17 10:52:10,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=205820.0, ans=0.2 +2024-09-17 10:52:36,820 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 9.373e+01 9.988e+01 1.108e+02 2.072e+02, threshold=1.998e+02, percent-clipped=0.0 +2024-09-17 10:52:38,333 INFO [train.py:1198] (0/2) Epoch 12, batch 1700, loss[loss=0.2294, ctc_loss=0.1332, cr_loss=0.3624, attn_decoder_loss=0.232, over 29610.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1627, cr_loss=0.4022, attn_decoder_loss=0.2634, over 5780361.23 frames. ], batch size: 69, lr: 9.41e-03, grad_scale: 8.0 +2024-09-17 10:52:46,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=205900.0, ans=0.2 +2024-09-17 10:53:28,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=206020.0, ans=0.95 +2024-09-17 10:53:39,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=206060.0, ans=0.1 +2024-09-17 10:53:46,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=206060.0, ans=0.125 +2024-09-17 10:53:49,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.49 vs. limit=15.0 +2024-09-17 10:53:51,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=206060.0, ans=0.0 +2024-09-17 10:53:55,950 INFO [train.py:1198] (0/2) Epoch 12, batch 1750, loss[loss=0.2294, ctc_loss=0.1376, cr_loss=0.3611, attn_decoder_loss=0.2316, over 29323.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.162, cr_loss=0.4009, attn_decoder_loss=0.2628, over 5788488.71 frames. ], batch size: 67, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:54:00,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=206100.0, ans=0.1 +2024-09-17 10:54:18,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=206140.0, ans=0.125 +2024-09-17 10:54:39,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=206220.0, ans=0.0 +2024-09-17 10:54:44,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=206220.0, ans=0.0 +2024-09-17 10:55:11,662 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.922e+01 9.583e+01 1.012e+02 1.403e+02, threshold=1.917e+02, percent-clipped=0.0 +2024-09-17 10:55:13,154 INFO [train.py:1198] (0/2) Epoch 12, batch 1800, loss[loss=0.2784, ctc_loss=0.1733, cr_loss=0.4454, attn_decoder_loss=0.2802, over 29689.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1623, cr_loss=0.4016, attn_decoder_loss=0.263, over 5790504.00 frames. ], batch size: 83, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:55:16,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=206300.0, ans=0.2 +2024-09-17 10:55:22,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=206300.0, ans=0.07 +2024-09-17 10:55:31,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=206340.0, ans=0.1 +2024-09-17 10:55:31,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=206340.0, ans=0.125 +2024-09-17 10:55:33,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.63 vs. limit=12.0 +2024-09-17 10:55:34,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=206340.0, ans=0.0 +2024-09-17 10:55:54,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff2.min_abs, batch_count=206380.0, ans=0.1 +2024-09-17 10:55:57,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=206420.0, ans=0.025 +2024-09-17 10:55:57,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=206420.0, ans=0.125 +2024-09-17 10:56:05,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=206420.0, ans=0.125 +2024-09-17 10:56:15,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=206460.0, ans=0.125 +2024-09-17 10:56:29,237 INFO [train.py:1198] (0/2) Epoch 12, batch 1850, loss[loss=0.2641, ctc_loss=0.1591, cr_loss=0.4073, attn_decoder_loss=0.2667, over 29638.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1617, cr_loss=0.4004, attn_decoder_loss=0.2626, over 5795036.01 frames. ], batch size: 86, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:56:50,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=206540.0, ans=0.125 +2024-09-17 10:56:57,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=206540.0, ans=0.125 +2024-09-17 10:57:26,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=22.5 +2024-09-17 10:57:32,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.41 vs. limit=22.5 +2024-09-17 10:57:44,672 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 9.093e+01 9.711e+01 1.054e+02 1.569e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 10:57:46,184 INFO [train.py:1198] (0/2) Epoch 12, batch 1900, loss[loss=0.269, ctc_loss=0.1731, cr_loss=0.3999, attn_decoder_loss=0.2708, over 29677.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1619, cr_loss=0.4007, attn_decoder_loss=0.2631, over 5802388.24 frames. ], batch size: 89, lr: 9.39e-03, grad_scale: 8.0 +2024-09-17 10:58:01,653 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:58:09,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=206740.0, ans=0.0 +2024-09-17 10:58:13,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=206740.0, ans=0.05 +2024-09-17 10:58:26,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=206780.0, ans=0.1 +2024-09-17 10:58:51,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=206860.0, ans=0.0 +2024-09-17 10:59:01,592 INFO [train.py:1198] (0/2) Epoch 12, batch 1950, loss[loss=0.263, ctc_loss=0.1621, cr_loss=0.4187, attn_decoder_loss=0.2649, over 29439.00 frames. ], tot_loss[loss=0.2621, ctc_loss=0.1626, cr_loss=0.4028, attn_decoder_loss=0.2642, over 5817328.13 frames. ], batch size: 78, lr: 9.39e-03, grad_scale: 8.0 +2024-09-17 10:59:30,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-17 10:59:31,874 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.79 vs. limit=15.0 +2024-09-17 10:59:37,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.73 vs. limit=22.5 +2024-09-17 10:59:43,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=206980.0, ans=0.125 +2024-09-17 10:59:48,237 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.60 vs. limit=15.0 +2024-09-17 10:59:50,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=207020.0, ans=0.125 +2024-09-17 10:59:52,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=207020.0, ans=0.025 +2024-09-17 11:00:17,490 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.829e+01 9.308e+01 9.738e+01 1.045e+02 2.594e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-17 11:00:19,010 INFO [train.py:1198] (0/2) Epoch 12, batch 2000, loss[loss=0.2281, ctc_loss=0.1375, cr_loss=0.3511, attn_decoder_loss=0.2304, over 29297.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1637, cr_loss=0.4036, attn_decoder_loss=0.2649, over 5795539.78 frames. ], batch size: 67, lr: 9.38e-03, grad_scale: 16.0 +2024-09-17 11:00:21,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.40 vs. limit=15.0 +2024-09-17 11:00:53,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=207180.0, ans=0.125 +2024-09-17 11:01:02,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=207180.0, ans=0.125 +2024-09-17 11:01:05,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=207220.0, ans=0.0 +2024-09-17 11:01:07,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=207220.0, ans=0.2 +2024-09-17 11:01:37,086 INFO [train.py:1198] (0/2) Epoch 12, batch 2050, loss[loss=0.2403, ctc_loss=0.1439, cr_loss=0.3844, attn_decoder_loss=0.2425, over 29432.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1629, cr_loss=0.4017, attn_decoder_loss=0.2639, over 5788049.78 frames. ], batch size: 70, lr: 9.38e-03, grad_scale: 4.0 +2024-09-17 11:01:43,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=207300.0, ans=0.1 +2024-09-17 11:02:12,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=207380.0, ans=0.125 +2024-09-17 11:02:16,161 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-17 11:02:18,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.23 vs. limit=15.0 +2024-09-17 11:02:19,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=207380.0, ans=0.1 +2024-09-17 11:02:36,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.76 vs. limit=15.0 +2024-09-17 11:02:39,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=207460.0, ans=0.125 +2024-09-17 11:02:42,710 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:02:44,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.15 vs. limit=10.0 +2024-09-17 11:02:48,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=207460.0, ans=0.05 +2024-09-17 11:02:52,887 INFO [train.py:1198] (0/2) Epoch 12, batch 2100, loss[loss=0.265, ctc_loss=0.164, cr_loss=0.3953, attn_decoder_loss=0.2674, over 29766.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1621, cr_loss=0.4009, attn_decoder_loss=0.263, over 5799784.53 frames. ], batch size: 81, lr: 9.37e-03, grad_scale: 8.0 +2024-09-17 11:02:54,457 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.611e+01 9.176e+01 9.560e+01 1.030e+02 1.406e+02, threshold=1.912e+02, percent-clipped=0.0 +2024-09-17 11:03:04,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=207500.0, ans=0.125 +2024-09-17 11:03:21,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=207540.0, ans=0.1 +2024-09-17 11:03:51,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff3.min_abs, batch_count=207620.0, ans=0.2 +2024-09-17 11:04:00,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=207660.0, ans=0.04949747468305833 +2024-09-17 11:04:01,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=207660.0, ans=0.125 +2024-09-17 11:04:02,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=207660.0, ans=0.125 +2024-09-17 11:04:09,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=207700.0, ans=0.125 +2024-09-17 11:04:10,887 INFO [train.py:1198] (0/2) Epoch 12, batch 2150, loss[loss=0.2778, ctc_loss=0.1791, cr_loss=0.4628, attn_decoder_loss=0.2784, over 29452.00 frames. ], tot_loss[loss=0.2603, ctc_loss=0.1613, cr_loss=0.3992, attn_decoder_loss=0.2625, over 5815169.54 frames. ], batch size: 78, lr: 9.37e-03, grad_scale: 4.0 +2024-09-17 11:04:35,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=207740.0, ans=0.0 +2024-09-17 11:04:58,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=207820.0, ans=0.1 +2024-09-17 11:04:59,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=207820.0, ans=0.0 +2024-09-17 11:05:01,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=207820.0, ans=0.04949747468305833 +2024-09-17 11:05:28,908 INFO [train.py:1198] (0/2) Epoch 12, batch 2200, loss[loss=0.2617, ctc_loss=0.172, cr_loss=0.404, attn_decoder_loss=0.2626, over 29639.00 frames. ], tot_loss[loss=0.2602, ctc_loss=0.1611, cr_loss=0.3993, attn_decoder_loss=0.2623, over 5811610.08 frames. ], batch size: 86, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:05:31,920 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 9.159e+01 9.816e+01 1.050e+02 6.382e+02, threshold=1.963e+02, percent-clipped=1.0 +2024-09-17 11:05:32,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=207900.0, ans=15.0 +2024-09-17 11:05:42,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=207940.0, ans=0.0 +2024-09-17 11:06:05,712 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-52000.pt +2024-09-17 11:06:16,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=207980.0, ans=0.125 +2024-09-17 11:06:30,176 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.14 vs. limit=12.0 +2024-09-17 11:06:49,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=208060.0, ans=0.0 +2024-09-17 11:06:52,015 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.42 vs. limit=15.0 +2024-09-17 11:06:52,587 INFO [train.py:1198] (0/2) Epoch 12, batch 2250, loss[loss=0.2733, ctc_loss=0.1784, cr_loss=0.4018, attn_decoder_loss=0.2749, over 29699.00 frames. ], tot_loss[loss=0.2599, ctc_loss=0.1609, cr_loss=0.3987, attn_decoder_loss=0.2621, over 5809694.74 frames. ], batch size: 82, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:06:52,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=208100.0, ans=0.125 +2024-09-17 11:06:56,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.65 vs. limit=15.0 +2024-09-17 11:07:15,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=208140.0, ans=0.125 +2024-09-17 11:07:34,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=208180.0, ans=0.125 +2024-09-17 11:07:37,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=208180.0, ans=0.125 +2024-09-17 11:07:37,808 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=12.0 +2024-09-17 11:08:10,390 INFO [train.py:1198] (0/2) Epoch 12, batch 2300, loss[loss=0.2434, ctc_loss=0.1455, cr_loss=0.3803, attn_decoder_loss=0.2458, over 29345.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1609, cr_loss=0.3984, attn_decoder_loss=0.2614, over 5797100.49 frames. ], batch size: 71, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:08:13,459 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.929e+01 9.033e+01 9.553e+01 1.076e+02 7.023e+02, threshold=1.911e+02, percent-clipped=3.0 +2024-09-17 11:08:32,528 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=15.0 +2024-09-17 11:08:44,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=208380.0, ans=0.125 +2024-09-17 11:08:50,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.47 vs. limit=15.0 +2024-09-17 11:08:56,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=208420.0, ans=0.125 +2024-09-17 11:08:58,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.84 vs. limit=10.0 +2024-09-17 11:09:01,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=208420.0, ans=0.125 +2024-09-17 11:09:14,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=208460.0, ans=0.125 +2024-09-17 11:09:28,438 INFO [train.py:1198] (0/2) Epoch 12, batch 2350, loss[loss=0.2754, ctc_loss=0.1712, cr_loss=0.4265, attn_decoder_loss=0.2775, over 29690.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1615, cr_loss=0.3991, attn_decoder_loss=0.2618, over 5802717.16 frames. ], batch size: 83, lr: 9.35e-03, grad_scale: 8.0 +2024-09-17 11:09:30,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.44 vs. limit=22.5 +2024-09-17 11:09:33,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=208500.0, ans=0.0 +2024-09-17 11:10:03,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=208580.0, ans=0.0 +2024-09-17 11:10:10,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=208580.0, ans=0.125 +2024-09-17 11:10:18,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=208620.0, ans=0.0 +2024-09-17 11:10:39,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=208660.0, ans=0.0 +2024-09-17 11:10:43,835 INFO [train.py:1198] (0/2) Epoch 12, batch 2400, loss[loss=0.2627, ctc_loss=0.1693, cr_loss=0.4045, attn_decoder_loss=0.2641, over 29549.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1615, cr_loss=0.3993, attn_decoder_loss=0.2622, over 5807264.33 frames. ], batch size: 76, lr: 9.35e-03, grad_scale: 16.0 +2024-09-17 11:10:49,768 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 9.246e+01 9.641e+01 1.033e+02 3.378e+02, threshold=1.928e+02, percent-clipped=1.0 +2024-09-17 11:11:02,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=208740.0, ans=0.2 +2024-09-17 11:11:44,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=208820.0, ans=0.05 +2024-09-17 11:12:02,486 INFO [train.py:1198] (0/2) Epoch 12, batch 2450, loss[loss=0.265, ctc_loss=0.1576, cr_loss=0.4003, attn_decoder_loss=0.268, over 29713.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1627, cr_loss=0.4015, attn_decoder_loss=0.2634, over 5785247.68 frames. ], batch size: 82, lr: 9.34e-03, grad_scale: 4.0 +2024-09-17 11:12:02,900 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:12:04,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=208900.0, ans=0.125 +2024-09-17 11:12:16,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=208940.0, ans=0.0 +2024-09-17 11:12:31,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=208940.0, ans=0.0 +2024-09-17 11:12:36,811 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.03 vs. limit=6.0 +2024-09-17 11:12:37,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=208980.0, ans=0.125 +2024-09-17 11:12:54,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=209020.0, ans=0.125 +2024-09-17 11:13:09,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=209060.0, ans=0.125 +2024-09-17 11:13:11,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=209060.0, ans=0.125 +2024-09-17 11:13:18,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.69 vs. limit=15.0 +2024-09-17 11:13:19,879 INFO [train.py:1198] (0/2) Epoch 12, batch 2500, loss[loss=0.2758, ctc_loss=0.176, cr_loss=0.4058, attn_decoder_loss=0.2779, over 29628.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1627, cr_loss=0.4017, attn_decoder_loss=0.2633, over 5795523.80 frames. ], batch size: 86, lr: 9.34e-03, grad_scale: 8.0 +2024-09-17 11:13:25,836 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.198e+01 9.207e+01 9.738e+01 1.065e+02 1.820e+02, threshold=1.948e+02, percent-clipped=0.0 +2024-09-17 11:13:33,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=209140.0, ans=0.125 +2024-09-17 11:13:35,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=209140.0, ans=0.0 +2024-09-17 11:14:04,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=209220.0, ans=0.2 +2024-09-17 11:14:05,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=209220.0, ans=0.025 +2024-09-17 11:14:06,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.81 vs. limit=15.0 +2024-09-17 11:14:29,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.05 vs. limit=15.0 +2024-09-17 11:14:29,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=209260.0, ans=0.125 +2024-09-17 11:14:36,063 INFO [train.py:1198] (0/2) Epoch 12, batch 2550, loss[loss=0.2298, ctc_loss=0.1367, cr_loss=0.3709, attn_decoder_loss=0.2319, over 29362.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1622, cr_loss=0.4004, attn_decoder_loss=0.2632, over 5797980.27 frames. ], batch size: 67, lr: 9.33e-03, grad_scale: 8.0 +2024-09-17 11:14:53,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.54 vs. limit=15.0 +2024-09-17 11:14:56,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.11 vs. limit=15.0 +2024-09-17 11:14:56,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.01 vs. limit=12.0 +2024-09-17 11:15:35,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=209420.0, ans=0.0 +2024-09-17 11:15:37,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=209460.0, ans=0.125 +2024-09-17 11:15:47,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=209460.0, ans=0.0 +2024-09-17 11:15:49,898 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.26 vs. limit=15.0 +2024-09-17 11:15:53,672 INFO [train.py:1198] (0/2) Epoch 12, batch 2600, loss[loss=0.2415, ctc_loss=0.1368, cr_loss=0.366, attn_decoder_loss=0.245, over 29457.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1625, cr_loss=0.4015, attn_decoder_loss=0.2635, over 5794233.94 frames. ], batch size: 78, lr: 9.33e-03, grad_scale: 8.0 +2024-09-17 11:15:56,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=209500.0, ans=0.125 +2024-09-17 11:16:01,133 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.249e+01 9.008e+01 9.501e+01 1.038e+02 1.745e+02, threshold=1.900e+02, percent-clipped=0.0 +2024-09-17 11:16:01,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=209500.0, ans=0.0 +2024-09-17 11:16:02,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.67 vs. limit=22.5 +2024-09-17 11:16:13,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=209540.0, ans=0.2 +2024-09-17 11:16:23,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.10 vs. limit=15.0 +2024-09-17 11:16:29,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=209580.0, ans=0.0 +2024-09-17 11:16:53,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=209620.0, ans=0.2 +2024-09-17 11:16:58,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=209660.0, ans=0.125 +2024-09-17 11:17:01,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=209660.0, ans=0.125 +2024-09-17 11:17:11,200 INFO [train.py:1198] (0/2) Epoch 12, batch 2650, loss[loss=0.2868, ctc_loss=0.1774, cr_loss=0.4283, attn_decoder_loss=0.2895, over 29285.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1624, cr_loss=0.4019, attn_decoder_loss=0.2637, over 5801021.06 frames. ], batch size: 100, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:17:14,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=209700.0, ans=0.025 +2024-09-17 11:17:36,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.20 vs. limit=22.5 +2024-09-17 11:17:40,099 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:17:44,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=209780.0, ans=0.09899494936611666 +2024-09-17 11:17:59,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=209820.0, ans=0.0 +2024-09-17 11:18:16,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=209860.0, ans=0.025 +2024-09-17 11:18:19,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=209860.0, ans=0.1 +2024-09-17 11:18:20,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=209860.0, ans=0.125 +2024-09-17 11:18:26,269 INFO [train.py:1198] (0/2) Epoch 12, batch 2700, loss[loss=0.2801, ctc_loss=0.1806, cr_loss=0.441, attn_decoder_loss=0.2814, over 29524.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1626, cr_loss=0.4022, attn_decoder_loss=0.2638, over 5796400.65 frames. ], batch size: 87, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:18:32,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=209900.0, ans=0.125 +2024-09-17 11:18:35,137 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.890e+01 9.487e+01 1.014e+02 1.859e+02, threshold=1.897e+02, percent-clipped=0.0 +2024-09-17 11:18:36,152 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.87 vs. limit=15.0 +2024-09-17 11:19:04,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.69 vs. limit=15.0 +2024-09-17 11:19:12,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=210020.0, ans=0.1 +2024-09-17 11:19:14,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=210020.0, ans=0.125 +2024-09-17 11:19:16,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.96 vs. limit=6.0 +2024-09-17 11:19:40,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=210060.0, ans=0.1 +2024-09-17 11:19:44,376 INFO [train.py:1198] (0/2) Epoch 12, batch 2750, loss[loss=0.2486, ctc_loss=0.1564, cr_loss=0.3888, attn_decoder_loss=0.2502, over 29525.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1617, cr_loss=0.4009, attn_decoder_loss=0.2626, over 5796045.41 frames. ], batch size: 75, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:19:55,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=210100.0, ans=0.0 +2024-09-17 11:19:55,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=210100.0, ans=0.125 +2024-09-17 11:20:07,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=210140.0, ans=0.1 +2024-09-17 11:20:38,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.49 vs. limit=22.5 +2024-09-17 11:20:44,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=210220.0, ans=0.0 +2024-09-17 11:20:55,642 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.37 vs. limit=15.0 +2024-09-17 11:21:02,205 INFO [train.py:1198] (0/2) Epoch 12, batch 2800, loss[loss=0.2927, ctc_loss=0.213, cr_loss=0.4275, attn_decoder_loss=0.2921, over 20080.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1624, cr_loss=0.4014, attn_decoder_loss=0.263, over 5775761.35 frames. ], batch size: 209, lr: 9.31e-03, grad_scale: 16.0 +2024-09-17 11:21:12,652 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.172e+01 9.480e+01 1.026e+02 1.256e+02 4.560e+02, threshold=2.052e+02, percent-clipped=3.0 +2024-09-17 11:21:31,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=210380.0, ans=0.125 +2024-09-17 11:22:07,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=210460.0, ans=0.0 +2024-09-17 11:22:18,260 INFO [train.py:1198] (0/2) Epoch 12, batch 2850, loss[loss=0.2424, ctc_loss=0.1419, cr_loss=0.3667, attn_decoder_loss=0.2454, over 29524.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1624, cr_loss=0.4008, attn_decoder_loss=0.2631, over 5762222.69 frames. ], batch size: 77, lr: 9.31e-03, grad_scale: 4.0 +2024-09-17 11:22:29,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=210500.0, ans=0.0 +2024-09-17 11:22:36,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=210540.0, ans=0.125 +2024-09-17 11:22:55,670 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:22:55,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=210580.0, ans=0.025 +2024-09-17 11:22:57,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=210580.0, ans=0.125 +2024-09-17 11:23:04,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=210620.0, ans=0.2 +2024-09-17 11:23:06,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=210620.0, ans=0.2 +2024-09-17 11:23:36,113 INFO [train.py:1198] (0/2) Epoch 12, batch 2900, loss[loss=0.2543, ctc_loss=0.1494, cr_loss=0.3867, attn_decoder_loss=0.2574, over 29409.00 frames. ], tot_loss[loss=0.2623, ctc_loss=0.1631, cr_loss=0.4036, attn_decoder_loss=0.2643, over 5787796.24 frames. ], batch size: 79, lr: 9.30e-03, grad_scale: 8.0 +2024-09-17 11:23:46,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=210700.0, ans=0.2 +2024-09-17 11:23:48,035 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.952e+01 8.957e+01 9.627e+01 1.010e+02 3.114e+02, threshold=1.925e+02, percent-clipped=2.0 +2024-09-17 11:24:20,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=210780.0, ans=0.0 +2024-09-17 11:24:28,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=210820.0, ans=0.125 +2024-09-17 11:24:53,856 INFO [train.py:1198] (0/2) Epoch 12, batch 2950, loss[loss=0.2464, ctc_loss=0.1531, cr_loss=0.3952, attn_decoder_loss=0.2479, over 29519.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1616, cr_loss=0.401, attn_decoder_loss=0.2629, over 5782971.46 frames. ], batch size: 75, lr: 9.30e-03, grad_scale: 8.0 +2024-09-17 11:25:27,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=210980.0, ans=0.125 +2024-09-17 11:26:09,713 INFO [train.py:1198] (0/2) Epoch 12, batch 3000, loss[loss=0.2676, ctc_loss=0.1746, cr_loss=0.4194, attn_decoder_loss=0.2686, over 29734.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1618, cr_loss=0.4018, attn_decoder_loss=0.2628, over 5783520.39 frames. ], batch size: 81, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:26:09,714 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 11:26:15,328 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([6.7040, 6.6246, 6.0556, 6.3602], device='cuda:0') +2024-09-17 11:26:28,165 INFO [train.py:1230] (0/2) Epoch 12, validation: loss=0.2128, ctc_loss=0.04571, cr_loss=4.818e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-17 11:26:28,165 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 11:26:34,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=211100.0, ans=0.125 +2024-09-17 11:26:42,640 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 9.212e+01 9.963e+01 1.087e+02 2.371e+02, threshold=1.993e+02, percent-clipped=1.0 +2024-09-17 11:26:44,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=211140.0, ans=0.0 +2024-09-17 11:27:36,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=211260.0, ans=0.125 +2024-09-17 11:27:42,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=211260.0, ans=0.125 +2024-09-17 11:27:48,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.59 vs. limit=15.0 +2024-09-17 11:27:48,913 INFO [train.py:1198] (0/2) Epoch 12, batch 3050, loss[loss=0.2502, ctc_loss=0.1478, cr_loss=0.3888, attn_decoder_loss=0.253, over 29521.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1627, cr_loss=0.4024, attn_decoder_loss=0.2637, over 5776808.83 frames. ], batch size: 76, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:27:54,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=211300.0, ans=0.0 +2024-09-17 11:28:00,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=211300.0, ans=0.2 +2024-09-17 11:29:04,429 INFO [train.py:1198] (0/2) Epoch 12, batch 3100, loss[loss=0.2753, ctc_loss=0.1768, cr_loss=0.4312, attn_decoder_loss=0.2767, over 29290.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1621, cr_loss=0.4015, attn_decoder_loss=0.2631, over 5776052.73 frames. ], batch size: 100, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:29:12,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=211500.0, ans=0.2 +2024-09-17 11:29:16,535 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.116e+01 9.262e+01 9.866e+01 1.070e+02 1.746e+02, threshold=1.973e+02, percent-clipped=0.0 +2024-09-17 11:29:21,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=211540.0, ans=0.2 +2024-09-17 11:29:31,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=211540.0, ans=0.125 +2024-09-17 11:29:40,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=211580.0, ans=0.07 +2024-09-17 11:29:54,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=211620.0, ans=0.125 +2024-09-17 11:30:02,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=211620.0, ans=0.125 +2024-09-17 11:30:05,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.22 vs. limit=15.0 +2024-09-17 11:30:19,858 INFO [train.py:1198] (0/2) Epoch 12, batch 3150, loss[loss=0.2857, ctc_loss=0.1858, cr_loss=0.4263, attn_decoder_loss=0.2873, over 28740.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1617, cr_loss=0.4016, attn_decoder_loss=0.2629, over 5781737.47 frames. ], batch size: 104, lr: 9.28e-03, grad_scale: 8.0 +2024-09-17 11:30:45,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=211740.0, ans=0.0 +2024-09-17 11:31:05,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=211780.0, ans=0.125 +2024-09-17 11:31:08,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.58 vs. limit=6.0 +2024-09-17 11:31:28,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=211860.0, ans=0.0 +2024-09-17 11:31:39,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=211900.0, ans=0.025 +2024-09-17 11:31:40,401 INFO [train.py:1198] (0/2) Epoch 12, batch 3200, loss[loss=0.248, ctc_loss=0.1408, cr_loss=0.3808, attn_decoder_loss=0.2514, over 29420.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.161, cr_loss=0.4004, attn_decoder_loss=0.2622, over 5792530.98 frames. ], batch size: 79, lr: 9.28e-03, grad_scale: 16.0 +2024-09-17 11:31:43,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=211900.0, ans=0.125 +2024-09-17 11:31:45,981 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.13 vs. limit=15.0 +2024-09-17 11:31:53,891 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 9.023e+01 9.600e+01 1.061e+02 2.809e+02, threshold=1.920e+02, percent-clipped=1.0 +2024-09-17 11:32:19,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=211980.0, ans=0.2 +2024-09-17 11:32:29,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=212020.0, ans=10.0 +2024-09-17 11:32:38,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=212020.0, ans=0.125 +2024-09-17 11:32:56,861 INFO [train.py:1198] (0/2) Epoch 12, batch 3250, loss[loss=0.2857, ctc_loss=0.1819, cr_loss=0.4415, attn_decoder_loss=0.2874, over 29702.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1615, cr_loss=0.4017, attn_decoder_loss=0.2631, over 5799069.98 frames. ], batch size: 84, lr: 9.27e-03, grad_scale: 8.0 +2024-09-17 11:33:30,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=212180.0, ans=0.0 +2024-09-17 11:33:51,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=212220.0, ans=0.125 +2024-09-17 11:34:00,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=212260.0, ans=0.125 +2024-09-17 11:34:12,003 INFO [train.py:1198] (0/2) Epoch 12, batch 3300, loss[loss=0.276, ctc_loss=0.1696, cr_loss=0.3986, attn_decoder_loss=0.279, over 28338.00 frames. ], tot_loss[loss=0.2596, ctc_loss=0.1604, cr_loss=0.3991, attn_decoder_loss=0.2617, over 5796384.07 frames. ], batch size: 111, lr: 9.27e-03, grad_scale: 8.0 +2024-09-17 11:34:14,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=212300.0, ans=0.125 +2024-09-17 11:34:23,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=212300.0, ans=0.125 +2024-09-17 11:34:27,366 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.743e+01 9.323e+01 1.013e+02 1.133e+02 3.364e+02, threshold=2.026e+02, percent-clipped=1.0 +2024-09-17 11:34:43,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=212380.0, ans=0.125 +2024-09-17 11:34:52,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=212380.0, ans=0.2 +2024-09-17 11:34:57,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=212380.0, ans=0.125 +2024-09-17 11:35:04,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=212420.0, ans=0.0 +2024-09-17 11:35:28,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=212460.0, ans=0.1 +2024-09-17 11:35:32,396 INFO [train.py:1198] (0/2) Epoch 12, batch 3350, loss[loss=0.2814, ctc_loss=0.1834, cr_loss=0.4154, attn_decoder_loss=0.283, over 28881.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1614, cr_loss=0.4005, attn_decoder_loss=0.2626, over 5773547.28 frames. ], batch size: 104, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:35:34,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=212500.0, ans=0.1 +2024-09-17 11:35:35,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=212500.0, ans=0.1 +2024-09-17 11:35:58,651 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:36:06,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=212580.0, ans=0.125 +2024-09-17 11:36:24,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=212620.0, ans=0.0 +2024-09-17 11:36:48,158 INFO [train.py:1198] (0/2) Epoch 12, batch 3400, loss[loss=0.2334, ctc_loss=0.1451, cr_loss=0.3925, attn_decoder_loss=0.2345, over 29351.00 frames. ], tot_loss[loss=0.2604, ctc_loss=0.1617, cr_loss=0.4008, attn_decoder_loss=0.2625, over 5767012.89 frames. ], batch size: 67, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:37:03,318 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 9.423e+01 1.002e+02 1.091e+02 2.670e+02, threshold=2.004e+02, percent-clipped=1.0 +2024-09-17 11:37:05,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=212740.0, ans=0.2 +2024-09-17 11:37:21,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=212780.0, ans=0.125 +2024-09-17 11:37:32,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.77 vs. limit=12.0 +2024-09-17 11:37:35,937 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:38:02,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=212900.0, ans=0.125 +2024-09-17 11:38:04,056 INFO [train.py:1198] (0/2) Epoch 12, batch 3450, loss[loss=0.2676, ctc_loss=0.1666, cr_loss=0.391, attn_decoder_loss=0.2702, over 28306.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1621, cr_loss=0.4016, attn_decoder_loss=0.2629, over 5774725.46 frames. ], batch size: 111, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:38:59,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=213020.0, ans=0.125 +2024-09-17 11:39:04,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=213020.0, ans=0.125 +2024-09-17 11:39:16,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=213060.0, ans=0.125 +2024-09-17 11:39:23,612 INFO [train.py:1198] (0/2) Epoch 12, batch 3500, loss[loss=0.2466, ctc_loss=0.1597, cr_loss=0.4104, attn_decoder_loss=0.2471, over 29312.00 frames. ], tot_loss[loss=0.2604, ctc_loss=0.1623, cr_loss=0.4022, attn_decoder_loss=0.2624, over 5777918.90 frames. ], batch size: 71, lr: 9.25e-03, grad_scale: 8.0 +2024-09-17 11:39:23,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=213100.0, ans=0.125 +2024-09-17 11:39:36,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.98 vs. limit=15.0 +2024-09-17 11:39:40,421 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.771e+01 8.906e+01 9.633e+01 1.043e+02 3.728e+02, threshold=1.927e+02, percent-clipped=3.0 +2024-09-17 11:39:42,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.14 vs. limit=10.0 +2024-09-17 11:39:45,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.54 vs. limit=22.5 +2024-09-17 11:39:58,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=213180.0, ans=0.0 +2024-09-17 11:40:04,998 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.66 vs. limit=12.0 +2024-09-17 11:40:13,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=213220.0, ans=0.5 +2024-09-17 11:40:26,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=213260.0, ans=0.1 +2024-09-17 11:40:38,548 INFO [train.py:1198] (0/2) Epoch 12, batch 3550, loss[loss=0.2547, ctc_loss=0.1459, cr_loss=0.4088, attn_decoder_loss=0.2577, over 29718.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1615, cr_loss=0.4011, attn_decoder_loss=0.262, over 5784372.48 frames. ], batch size: 89, lr: 9.25e-03, grad_scale: 8.0 +2024-09-17 11:40:41,822 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:40:44,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=213300.0, ans=0.1 +2024-09-17 11:40:53,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=213340.0, ans=0.125 +2024-09-17 11:41:02,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=213340.0, ans=0.025 +2024-09-17 11:41:15,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=213380.0, ans=0.125 +2024-09-17 11:41:15,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=213380.0, ans=0.0 +2024-09-17 11:41:23,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.39 vs. limit=6.0 +2024-09-17 11:41:27,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=213420.0, ans=0.125 +2024-09-17 11:41:52,784 INFO [train.py:1198] (0/2) Epoch 12, batch 3600, loss[loss=0.2667, ctc_loss=0.1765, cr_loss=0.4118, attn_decoder_loss=0.2676, over 29494.00 frames. ], tot_loss[loss=0.2603, ctc_loss=0.1613, cr_loss=0.401, attn_decoder_loss=0.2624, over 5792475.19 frames. ], batch size: 77, lr: 9.24e-03, grad_scale: 16.0 +2024-09-17 11:41:54,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=213500.0, ans=0.125 +2024-09-17 11:42:10,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.945e+01 9.059e+01 9.779e+01 1.035e+02 3.079e+02, threshold=1.956e+02, percent-clipped=1.0 +2024-09-17 11:42:26,538 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.12 vs. limit=15.0 +2024-09-17 11:42:35,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=213580.0, ans=0.125 +2024-09-17 11:42:36,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=213620.0, ans=0.0 +2024-09-17 11:43:05,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.97 vs. limit=15.0 +2024-09-17 11:43:08,013 INFO [train.py:1198] (0/2) Epoch 12, batch 3650, loss[loss=0.2667, ctc_loss=0.1594, cr_loss=0.4071, attn_decoder_loss=0.2695, over 29511.00 frames. ], tot_loss[loss=0.2596, ctc_loss=0.1607, cr_loss=0.3998, attn_decoder_loss=0.2617, over 5794304.20 frames. ], batch size: 90, lr: 9.24e-03, grad_scale: 8.0 +2024-09-17 11:43:18,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=213700.0, ans=0.0 +2024-09-17 11:43:29,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=213740.0, ans=0.2 +2024-09-17 11:44:23,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=213900.0, ans=0.025 +2024-09-17 11:44:24,701 INFO [train.py:1198] (0/2) Epoch 12, batch 3700, loss[loss=0.2754, ctc_loss=0.1744, cr_loss=0.4143, attn_decoder_loss=0.2774, over 29718.00 frames. ], tot_loss[loss=0.2596, ctc_loss=0.1603, cr_loss=0.3999, attn_decoder_loss=0.2617, over 5804564.03 frames. ], batch size: 84, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:44:42,611 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 9.238e+01 9.737e+01 1.052e+02 3.934e+02, threshold=1.947e+02, percent-clipped=3.0 +2024-09-17 11:45:11,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=214020.0, ans=0.125 +2024-09-17 11:45:38,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=214060.0, ans=0.125 +2024-09-17 11:45:40,794 INFO [train.py:1198] (0/2) Epoch 12, batch 3750, loss[loss=0.2312, ctc_loss=0.1342, cr_loss=0.3622, attn_decoder_loss=0.234, over 29399.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1604, cr_loss=0.3995, attn_decoder_loss=0.2618, over 5807922.45 frames. ], batch size: 67, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:45:41,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=214100.0, ans=0.125 +2024-09-17 11:45:43,077 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.29 vs. limit=15.0 +2024-09-17 11:46:21,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214180.0, ans=0.1 +2024-09-17 11:46:22,846 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:46:22,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=214180.0, ans=0.0 +2024-09-17 11:46:31,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=214220.0, ans=0.0 +2024-09-17 11:46:40,955 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.71 vs. limit=15.0 +2024-09-17 11:46:46,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=214260.0, ans=0.125 +2024-09-17 11:46:55,366 INFO [train.py:1198] (0/2) Epoch 12, batch 3800, loss[loss=0.2791, ctc_loss=0.1787, cr_loss=0.4438, attn_decoder_loss=0.2804, over 29645.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1603, cr_loss=0.399, attn_decoder_loss=0.2614, over 5799019.34 frames. ], batch size: 86, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:46:59,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=214300.0, ans=0.125 +2024-09-17 11:46:59,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=214300.0, ans=0.0 +2024-09-17 11:47:13,094 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.095e+01 9.449e+01 1.046e+02 1.140e+02 2.045e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 11:47:20,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=214340.0, ans=0.125 +2024-09-17 11:47:26,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=214380.0, ans=0.125 +2024-09-17 11:47:36,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=214380.0, ans=0.025 +2024-09-17 11:47:43,486 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:47:43,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=214420.0, ans=0.0 +2024-09-17 11:47:56,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.03 vs. limit=15.0 +2024-09-17 11:48:09,978 INFO [train.py:1198] (0/2) Epoch 12, batch 3850, loss[loss=0.2693, ctc_loss=0.1608, cr_loss=0.4175, attn_decoder_loss=0.2721, over 29263.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1599, cr_loss=0.3988, attn_decoder_loss=0.2614, over 5813076.11 frames. ], batch size: 100, lr: 9.22e-03, grad_scale: 8.0 +2024-09-17 11:48:11,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214500.0, ans=0.1 +2024-09-17 11:48:26,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=214540.0, ans=0.125 +2024-09-17 11:48:29,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=214540.0, ans=0.2 +2024-09-17 11:48:39,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=214580.0, ans=0.125 +2024-09-17 11:48:55,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.43 vs. limit=15.0 +2024-09-17 11:48:56,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=214620.0, ans=0.0 +2024-09-17 11:48:59,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=214620.0, ans=0.125 +2024-09-17 11:49:04,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.36 vs. limit=22.5 +2024-09-17 11:49:17,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=214660.0, ans=0.1 +2024-09-17 11:49:24,514 INFO [train.py:1198] (0/2) Epoch 12, batch 3900, loss[loss=0.2722, ctc_loss=0.1711, cr_loss=0.4252, attn_decoder_loss=0.274, over 29623.00 frames. ], tot_loss[loss=0.2599, ctc_loss=0.1607, cr_loss=0.4, attn_decoder_loss=0.262, over 5817441.25 frames. ], batch size: 86, lr: 9.22e-03, grad_scale: 8.0 +2024-09-17 11:49:34,374 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.94 vs. limit=15.0 +2024-09-17 11:49:36,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=214700.0, ans=0.1 +2024-09-17 11:49:42,147 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 9.064e+01 9.520e+01 1.003e+02 3.590e+02, threshold=1.904e+02, percent-clipped=1.0 +2024-09-17 11:50:03,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=214780.0, ans=0.125 +2024-09-17 11:50:38,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=214860.0, ans=0.125 +2024-09-17 11:50:40,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214900.0, ans=0.1 +2024-09-17 11:50:41,382 INFO [train.py:1198] (0/2) Epoch 12, batch 3950, loss[loss=0.2733, ctc_loss=0.1643, cr_loss=0.4063, attn_decoder_loss=0.2764, over 29420.00 frames. ], tot_loss[loss=0.2596, ctc_loss=0.1598, cr_loss=0.3995, attn_decoder_loss=0.2618, over 5836506.16 frames. ], batch size: 97, lr: 9.21e-03, grad_scale: 8.0 +2024-09-17 11:50:45,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.52 vs. limit=15.0 +2024-09-17 11:50:47,037 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.95 vs. limit=10.0 +2024-09-17 11:50:56,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=214940.0, ans=0.0 +2024-09-17 11:51:08,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-17 11:51:21,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=214980.0, ans=0.0 +2024-09-17 11:51:40,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=215060.0, ans=0.125 +2024-09-17 11:51:42,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=215060.0, ans=0.0 +2024-09-17 11:51:55,353 INFO [train.py:1198] (0/2) Epoch 12, batch 4000, loss[loss=0.2394, ctc_loss=0.1393, cr_loss=0.3793, attn_decoder_loss=0.2421, over 29489.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1606, cr_loss=0.4002, attn_decoder_loss=0.2621, over 5812185.62 frames. ], batch size: 74, lr: 9.21e-03, grad_scale: 16.0 +2024-09-17 11:51:56,609 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.38 vs. limit=5.0 +2024-09-17 11:52:14,267 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.967e+01 9.592e+01 1.062e+02 2.028e+02, threshold=1.918e+02, percent-clipped=1.0 +2024-09-17 11:52:46,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=215220.0, ans=0.0 +2024-09-17 11:53:10,002 INFO [train.py:1198] (0/2) Epoch 12, batch 4050, loss[loss=0.2966, ctc_loss=0.2172, cr_loss=0.4422, attn_decoder_loss=0.2955, over 19684.00 frames. ], tot_loss[loss=0.2598, ctc_loss=0.1606, cr_loss=0.3995, attn_decoder_loss=0.262, over 5795612.58 frames. ], batch size: 210, lr: 9.21e-03, grad_scale: 8.0 +2024-09-17 11:53:13,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.37 vs. limit=15.0 +2024-09-17 11:53:27,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=215340.0, ans=0.0 +2024-09-17 11:53:41,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=215380.0, ans=0.05 +2024-09-17 11:53:45,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=215380.0, ans=0.2 +2024-09-17 11:54:09,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=215460.0, ans=0.09899494936611666 +2024-09-17 11:54:23,751 INFO [train.py:1198] (0/2) Epoch 12, batch 4100, loss[loss=0.2694, ctc_loss=0.1624, cr_loss=0.3855, attn_decoder_loss=0.2728, over 29512.00 frames. ], tot_loss[loss=0.2602, ctc_loss=0.1609, cr_loss=0.4, attn_decoder_loss=0.2624, over 5791537.94 frames. ], batch size: 90, lr: 9.20e-03, grad_scale: 8.0 +2024-09-17 11:54:27,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.33 vs. limit=10.0 +2024-09-17 11:54:43,939 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.986e+01 9.324e+01 9.990e+01 1.134e+02 3.141e+02, threshold=1.998e+02, percent-clipped=1.0 +2024-09-17 11:54:55,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=215580.0, ans=0.125 +2024-09-17 11:55:05,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=215580.0, ans=0.0 +2024-09-17 11:55:14,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=215620.0, ans=0.05 +2024-09-17 11:55:16,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=215620.0, ans=0.07 +2024-09-17 11:55:29,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=215660.0, ans=0.0 +2024-09-17 11:55:35,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=215660.0, ans=0.125 +2024-09-17 11:55:38,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=215700.0, ans=0.125 +2024-09-17 11:55:39,604 INFO [train.py:1198] (0/2) Epoch 12, batch 4150, loss[loss=0.2549, ctc_loss=0.1508, cr_loss=0.3842, attn_decoder_loss=0.258, over 29487.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1605, cr_loss=0.399, attn_decoder_loss=0.2618, over 5798059.24 frames. ], batch size: 77, lr: 9.20e-03, grad_scale: 8.0 +2024-09-17 11:55:53,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.25 vs. limit=22.5 +2024-09-17 11:55:57,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=215740.0, ans=0.2 +2024-09-17 11:56:00,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=215740.0, ans=0.2 +2024-09-17 11:56:05,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.60 vs. limit=15.0 +2024-09-17 11:56:15,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=215780.0, ans=0.0 +2024-09-17 11:56:43,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.80 vs. limit=10.0 +2024-09-17 11:56:53,538 INFO [train.py:1198] (0/2) Epoch 12, batch 4200, loss[loss=0.2863, ctc_loss=0.1849, cr_loss=0.4494, attn_decoder_loss=0.2876, over 29488.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1608, cr_loss=0.3996, attn_decoder_loss=0.2623, over 5800043.24 frames. ], batch size: 90, lr: 9.19e-03, grad_scale: 8.0 +2024-09-17 11:57:04,991 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.19 vs. limit=15.0 +2024-09-17 11:57:12,888 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.094e+01 9.529e+01 1.014e+02 1.072e+02 1.789e+02, threshold=2.028e+02, percent-clipped=0.0 +2024-09-17 11:57:53,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.75 vs. limit=15.0 +2024-09-17 11:58:07,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-17 11:58:07,691 INFO [train.py:1198] (0/2) Epoch 12, batch 4250, loss[loss=0.2349, ctc_loss=0.1338, cr_loss=0.3496, attn_decoder_loss=0.2384, over 29496.00 frames. ], tot_loss[loss=0.2603, ctc_loss=0.1607, cr_loss=0.3989, attn_decoder_loss=0.2625, over 5805892.81 frames. ], batch size: 74, lr: 9.19e-03, grad_scale: 8.0 +2024-09-17 11:58:09,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=216100.0, ans=0.125 +2024-09-17 11:58:10,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=216100.0, ans=0.125 +2024-09-17 11:58:14,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.51 vs. limit=15.0 +2024-09-17 11:58:15,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=216100.0, ans=0.0 +2024-09-17 11:58:16,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=216100.0, ans=0.0 +2024-09-17 11:58:49,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=216180.0, ans=0.2 +2024-09-17 11:59:02,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=216220.0, ans=0.125 +2024-09-17 11:59:23,269 INFO [train.py:1198] (0/2) Epoch 12, batch 4300, loss[loss=0.2635, ctc_loss=0.1591, cr_loss=0.403, attn_decoder_loss=0.2661, over 29528.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1602, cr_loss=0.3981, attn_decoder_loss=0.2624, over 5795914.46 frames. ], batch size: 87, lr: 9.18e-03, grad_scale: 8.0 +2024-09-17 11:59:31,640 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.44 vs. limit=15.0 +2024-09-17 11:59:32,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=216300.0, ans=0.2 +2024-09-17 11:59:36,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.67 vs. limit=15.0 +2024-09-17 11:59:37,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.01 vs. limit=22.5 +2024-09-17 11:59:44,352 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.076e+01 9.375e+01 1.010e+02 1.083e+02 2.799e+02, threshold=2.019e+02, percent-clipped=3.0 +2024-09-17 12:00:14,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.04 vs. limit=15.0 +2024-09-17 12:00:30,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=216460.0, ans=0.0 +2024-09-17 12:00:37,748 INFO [train.py:1198] (0/2) Epoch 12, batch 4350, loss[loss=0.2815, ctc_loss=0.1809, cr_loss=0.4292, attn_decoder_loss=0.2832, over 29515.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1628, cr_loss=0.4033, attn_decoder_loss=0.2654, over 5798105.91 frames. ], batch size: 97, lr: 9.18e-03, grad_scale: 4.0 +2024-09-17 12:00:47,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=216500.0, ans=0.0 +2024-09-17 12:00:54,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=216540.0, ans=0.2 +2024-09-17 12:01:10,681 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.77 vs. limit=15.0 +2024-09-17 12:01:13,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=216580.0, ans=0.07 +2024-09-17 12:01:16,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=216580.0, ans=0.0 +2024-09-17 12:01:21,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=216620.0, ans=0.125 +2024-09-17 12:01:28,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=216620.0, ans=0.125 +2024-09-17 12:01:34,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=216620.0, ans=0.125 +2024-09-17 12:01:37,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=216660.0, ans=0.125 +2024-09-17 12:01:45,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=216660.0, ans=0.125 +2024-09-17 12:01:52,274 INFO [train.py:1198] (0/2) Epoch 12, batch 4400, loss[loss=0.2729, ctc_loss=0.1757, cr_loss=0.4361, attn_decoder_loss=0.274, over 27296.00 frames. ], tot_loss[loss=0.2655, ctc_loss=0.1647, cr_loss=0.4059, attn_decoder_loss=0.2677, over 5768845.56 frames. ], batch size: 125, lr: 9.18e-03, grad_scale: 8.0 +2024-09-17 12:02:12,851 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.351e+01 9.429e+01 9.897e+01 1.056e+02 1.811e+02, threshold=1.979e+02, percent-clipped=0.0 +2024-09-17 12:02:41,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=216820.0, ans=0.125 +2024-09-17 12:03:02,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=216860.0, ans=0.0 +2024-09-17 12:03:03,037 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.71 vs. limit=22.5 +2024-09-17 12:03:06,651 INFO [train.py:1198] (0/2) Epoch 12, batch 4450, loss[loss=0.298, ctc_loss=0.225, cr_loss=0.4492, attn_decoder_loss=0.2962, over 20492.00 frames. ], tot_loss[loss=0.2689, ctc_loss=0.1698, cr_loss=0.4103, attn_decoder_loss=0.2708, over 5584792.55 frames. ], batch size: 209, lr: 9.17e-03, grad_scale: 8.0 +2024-09-17 12:03:08,597 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:03:42,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=216980.0, ans=0.125 +2024-09-17 12:03:55,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.05 vs. limit=15.0 +2024-09-17 12:04:12,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=217060.0, ans=0.0 +2024-09-17 12:04:17,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=217060.0, ans=0.125 +2024-09-17 12:04:23,109 INFO [train.py:1198] (0/2) Epoch 12, batch 4500, loss[loss=0.3003, ctc_loss=0.2235, cr_loss=0.4326, attn_decoder_loss=0.2992, over 20064.00 frames. ], tot_loss[loss=0.2722, ctc_loss=0.1759, cr_loss=0.4123, attn_decoder_loss=0.2738, over 5244026.66 frames. ], batch size: 210, lr: 9.17e-03, grad_scale: 8.0 +2024-09-17 12:04:45,821 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.368e+01 1.035e+02 1.137e+02 1.264e+02 3.702e+02, threshold=2.273e+02, percent-clipped=1.0 +2024-09-17 12:05:00,373 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-12.pt +2024-09-17 12:05:52,066 INFO [train.py:1198] (0/2) Epoch 13, batch 0, loss[loss=0.2425, ctc_loss=0.1363, cr_loss=0.3769, attn_decoder_loss=0.2459, over 29595.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1363, cr_loss=0.3769, attn_decoder_loss=0.2459, over 29595.00 frames. ], batch size: 73, lr: 8.81e-03, grad_scale: 16.0 +2024-09-17 12:05:52,067 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 12:06:10,485 INFO [train.py:1230] (0/2) Epoch 13, validation: loss=0.214, ctc_loss=0.04435, cr_loss=4.652e-15, attn_decoder_loss=0.2329, over 944034.00 frames. +2024-09-17 12:06:10,485 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 12:06:10,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=217200.0, ans=0.1 +2024-09-17 12:06:10,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=217200.0, ans=0.05 +2024-09-17 12:06:13,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=217200.0, ans=0.125 +2024-09-17 12:06:33,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=217240.0, ans=0.125 +2024-09-17 12:06:50,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=217280.0, ans=0.0 +2024-09-17 12:07:19,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=217360.0, ans=0.1 +2024-09-17 12:07:22,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=217360.0, ans=0.2 +2024-09-17 12:07:28,728 INFO [train.py:1198] (0/2) Epoch 13, batch 50, loss[loss=0.2398, ctc_loss=0.1449, cr_loss=0.3748, attn_decoder_loss=0.242, over 29444.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1665, cr_loss=0.4088, attn_decoder_loss=0.2665, over 1269537.14 frames. ], batch size: 70, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:07:51,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=217440.0, ans=0.2 +2024-09-17 12:07:59,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=217480.0, ans=0.0 +2024-09-17 12:08:03,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=217480.0, ans=0.0 +2024-09-17 12:08:12,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.16 vs. limit=15.0 +2024-09-17 12:08:23,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=217520.0, ans=0.0 +2024-09-17 12:08:26,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=217520.0, ans=0.125 +2024-09-17 12:08:30,976 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.263e+01 9.622e+01 1.023e+02 1.146e+02 3.788e+02, threshold=2.046e+02, percent-clipped=2.0 +2024-09-17 12:08:45,179 INFO [train.py:1198] (0/2) Epoch 13, batch 100, loss[loss=0.2547, ctc_loss=0.1572, cr_loss=0.4009, attn_decoder_loss=0.2566, over 29516.00 frames. ], tot_loss[loss=0.2649, ctc_loss=0.1653, cr_loss=0.4066, attn_decoder_loss=0.267, over 2251992.19 frames. ], batch size: 76, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:08:46,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=217600.0, ans=0.125 +2024-09-17 12:08:59,618 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.45 vs. limit=15.0 +2024-09-17 12:09:06,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=217640.0, ans=0.125 +2024-09-17 12:09:12,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=217640.0, ans=0.1 +2024-09-17 12:09:13,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=217680.0, ans=0.0 +2024-09-17 12:09:33,847 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:09:41,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=217720.0, ans=0.0 +2024-09-17 12:09:51,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=217760.0, ans=0.125 +2024-09-17 12:10:01,871 INFO [train.py:1198] (0/2) Epoch 13, batch 150, loss[loss=0.2369, ctc_loss=0.1441, cr_loss=0.3816, attn_decoder_loss=0.2387, over 29429.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1609, cr_loss=0.4008, attn_decoder_loss=0.2632, over 3046915.09 frames. ], batch size: 70, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:10:32,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=217880.0, ans=0.125 +2024-09-17 12:10:59,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=217920.0, ans=0.125 +2024-09-17 12:11:06,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.881e+01 9.835e+01 1.094e+02 1.657e+02, threshold=1.967e+02, percent-clipped=0.0 +2024-09-17 12:11:14,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=217960.0, ans=0.5 +2024-09-17 12:11:20,034 INFO [train.py:1198] (0/2) Epoch 13, batch 200, loss[loss=0.2862, ctc_loss=0.191, cr_loss=0.4541, attn_decoder_loss=0.2867, over 27351.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1601, cr_loss=0.4001, attn_decoder_loss=0.2619, over 3659107.09 frames. ], batch size: 124, lr: 8.79e-03, grad_scale: 8.0 +2024-09-17 12:11:21,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=218000.0, ans=0.125 +2024-09-17 12:11:33,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=218040.0, ans=0.2 +2024-09-17 12:11:59,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=218080.0, ans=0.0 +2024-09-17 12:12:01,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff2.min_abs, batch_count=218080.0, ans=0.1 +2024-09-17 12:12:20,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=218160.0, ans=0.125 +2024-09-17 12:12:35,451 INFO [train.py:1198] (0/2) Epoch 13, batch 250, loss[loss=0.2768, ctc_loss=0.1787, cr_loss=0.4112, attn_decoder_loss=0.2786, over 29275.00 frames. ], tot_loss[loss=0.2595, ctc_loss=0.1592, cr_loss=0.3994, attn_decoder_loss=0.2618, over 4141365.92 frames. ], batch size: 100, lr: 8.79e-03, grad_scale: 8.0 +2024-09-17 12:12:43,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=218200.0, ans=0.0 +2024-09-17 12:12:50,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=218240.0, ans=0.2 +2024-09-17 12:13:01,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=218240.0, ans=0.1 +2024-09-17 12:13:02,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=218240.0, ans=0.0 +2024-09-17 12:13:05,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=218280.0, ans=0.0 +2024-09-17 12:13:11,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=218280.0, ans=0.125 +2024-09-17 12:13:11,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=218280.0, ans=0.025 +2024-09-17 12:13:12,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=218280.0, ans=0.95 +2024-09-17 12:13:14,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=218280.0, ans=0.025 +2024-09-17 12:13:16,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=218280.0, ans=0.0 +2024-09-17 12:13:32,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=218320.0, ans=0.1 +2024-09-17 12:13:34,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=218320.0, ans=0.125 +2024-09-17 12:13:38,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=218360.0, ans=0.125 +2024-09-17 12:13:39,915 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.631e+01 9.019e+01 9.647e+01 1.091e+02 1.389e+02, threshold=1.929e+02, percent-clipped=0.0 +2024-09-17 12:13:53,954 INFO [train.py:1198] (0/2) Epoch 13, batch 300, loss[loss=0.2807, ctc_loss=0.1738, cr_loss=0.4294, attn_decoder_loss=0.2831, over 29528.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.159, cr_loss=0.3993, attn_decoder_loss=0.2612, over 4510866.46 frames. ], batch size: 92, lr: 8.78e-03, grad_scale: 8.0 +2024-09-17 12:14:16,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=218440.0, ans=0.125 +2024-09-17 12:14:27,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.94 vs. limit=22.5 +2024-09-17 12:14:30,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=218480.0, ans=0.1 +2024-09-17 12:14:50,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=218520.0, ans=0.0 +2024-09-17 12:14:59,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=218560.0, ans=10.0 +2024-09-17 12:15:09,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=218560.0, ans=0.0 +2024-09-17 12:15:11,640 INFO [train.py:1198] (0/2) Epoch 13, batch 350, loss[loss=0.2271, ctc_loss=0.139, cr_loss=0.3527, attn_decoder_loss=0.229, over 29328.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1591, cr_loss=0.3991, attn_decoder_loss=0.2615, over 4794884.81 frames. ], batch size: 71, lr: 8.78e-03, grad_scale: 8.0 +2024-09-17 12:15:13,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=218600.0, ans=0.1 +2024-09-17 12:15:27,031 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:15:35,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.01 vs. limit=22.5 +2024-09-17 12:15:37,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=218640.0, ans=0.125 +2024-09-17 12:15:37,531 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:15:52,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=218680.0, ans=0.0 +2024-09-17 12:16:08,088 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.09 vs. limit=15.0 +2024-09-17 12:16:13,452 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 9.251e+01 9.818e+01 1.107e+02 7.103e+02, threshold=1.964e+02, percent-clipped=3.0 +2024-09-17 12:16:27,031 INFO [train.py:1198] (0/2) Epoch 13, batch 400, loss[loss=0.2607, ctc_loss=0.1591, cr_loss=0.4134, attn_decoder_loss=0.2629, over 29715.00 frames. ], tot_loss[loss=0.2588, ctc_loss=0.1586, cr_loss=0.3978, attn_decoder_loss=0.2611, over 5024477.02 frames. ], batch size: 82, lr: 8.78e-03, grad_scale: 16.0 +2024-09-17 12:16:34,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=218800.0, ans=0.1 +2024-09-17 12:16:36,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=218800.0, ans=0.2 +2024-09-17 12:16:53,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=218840.0, ans=0.2 +2024-09-17 12:16:54,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=218840.0, ans=0.0 +2024-09-17 12:16:57,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=218880.0, ans=0.025 +2024-09-17 12:16:58,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.37 vs. limit=22.5 +2024-09-17 12:17:19,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=218920.0, ans=0.0 +2024-09-17 12:17:39,041 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.87 vs. limit=15.0 +2024-09-17 12:17:40,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.89 vs. limit=15.0 +2024-09-17 12:17:42,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=218960.0, ans=0.0 +2024-09-17 12:17:45,480 INFO [train.py:1198] (0/2) Epoch 13, batch 450, loss[loss=0.2442, ctc_loss=0.125, cr_loss=0.3513, attn_decoder_loss=0.2496, over 29716.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1586, cr_loss=0.3973, attn_decoder_loss=0.2612, over 5186895.45 frames. ], batch size: 83, lr: 8.77e-03, grad_scale: 8.0 +2024-09-17 12:17:47,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=219000.0, ans=0.2 +2024-09-17 12:18:00,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=219040.0, ans=0.0 +2024-09-17 12:18:03,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=219040.0, ans=0.125 +2024-09-17 12:18:08,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=219040.0, ans=0.1 +2024-09-17 12:18:29,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=219120.0, ans=0.5 +2024-09-17 12:18:31,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=219120.0, ans=0.1 +2024-09-17 12:18:47,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=219160.0, ans=0.125 +2024-09-17 12:18:51,589 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.792e+01 8.793e+01 9.370e+01 9.843e+01 2.913e+02, threshold=1.874e+02, percent-clipped=1.0 +2024-09-17 12:19:04,120 INFO [train.py:1198] (0/2) Epoch 13, batch 500, loss[loss=0.2806, ctc_loss=0.182, cr_loss=0.4562, attn_decoder_loss=0.2814, over 29407.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1579, cr_loss=0.3967, attn_decoder_loss=0.2603, over 5330132.75 frames. ], batch size: 94, lr: 8.77e-03, grad_scale: 8.0 +2024-09-17 12:19:05,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=219200.0, ans=0.125 +2024-09-17 12:19:28,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=219240.0, ans=0.5 +2024-09-17 12:19:37,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=219280.0, ans=0.0 +2024-09-17 12:19:38,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.76 vs. limit=6.0 +2024-09-17 12:19:57,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=219320.0, ans=0.125 +2024-09-17 12:20:19,694 INFO [train.py:1198] (0/2) Epoch 13, batch 550, loss[loss=0.2683, ctc_loss=0.1585, cr_loss=0.4123, attn_decoder_loss=0.2713, over 28828.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1582, cr_loss=0.3972, attn_decoder_loss=0.2605, over 5423798.61 frames. ], batch size: 104, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:20:19,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=219400.0, ans=0.125 +2024-09-17 12:20:30,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=219400.0, ans=0.025 +2024-09-17 12:20:56,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=219480.0, ans=0.2 +2024-09-17 12:21:08,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=219520.0, ans=0.125 +2024-09-17 12:21:11,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=219520.0, ans=0.125 +2024-09-17 12:21:26,147 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.312e+01 1.008e+02 1.110e+02 1.901e+02, threshold=2.017e+02, percent-clipped=1.0 +2024-09-17 12:21:29,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=219560.0, ans=0.125 +2024-09-17 12:21:31,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.47 vs. limit=22.5 +2024-09-17 12:21:38,306 INFO [train.py:1198] (0/2) Epoch 13, batch 600, loss[loss=0.2666, ctc_loss=0.1583, cr_loss=0.3866, attn_decoder_loss=0.27, over 29261.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1587, cr_loss=0.3979, attn_decoder_loss=0.2611, over 5511558.11 frames. ], batch size: 100, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:21:58,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.98 vs. limit=15.0 +2024-09-17 12:22:08,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=219680.0, ans=0.2 +2024-09-17 12:22:08,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=219680.0, ans=0.0 +2024-09-17 12:22:28,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=219720.0, ans=0.2 +2024-09-17 12:22:43,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=219760.0, ans=0.125 +2024-09-17 12:22:49,295 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.87 vs. limit=15.0 +2024-09-17 12:22:53,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=219760.0, ans=0.125 +2024-09-17 12:22:55,878 INFO [train.py:1198] (0/2) Epoch 13, batch 650, loss[loss=0.262, ctc_loss=0.1593, cr_loss=0.3958, attn_decoder_loss=0.2646, over 29755.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1572, cr_loss=0.3957, attn_decoder_loss=0.2599, over 5588068.26 frames. ], batch size: 81, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:23:06,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.96 vs. limit=10.0 +2024-09-17 12:23:08,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=219800.0, ans=0.2 +2024-09-17 12:23:11,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=219840.0, ans=0.125 +2024-09-17 12:23:15,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=219840.0, ans=0.125 +2024-09-17 12:23:29,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=219880.0, ans=0.0 +2024-09-17 12:23:30,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.98 vs. limit=15.0 +2024-09-17 12:23:42,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.61 vs. limit=15.0 +2024-09-17 12:23:43,395 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:23:59,542 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 9.144e+01 9.957e+01 1.061e+02 1.597e+02, threshold=1.991e+02, percent-clipped=0.0 +2024-09-17 12:24:12,258 INFO [train.py:1198] (0/2) Epoch 13, batch 700, loss[loss=0.239, ctc_loss=0.135, cr_loss=0.359, attn_decoder_loss=0.2426, over 29536.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1576, cr_loss=0.3967, attn_decoder_loss=0.2607, over 5637638.60 frames. ], batch size: 76, lr: 8.75e-03, grad_scale: 8.0 +2024-09-17 12:24:24,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=220000.0, ans=0.125 +2024-09-17 12:24:24,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=220000.0, ans=0.025 +2024-09-17 12:24:24,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.48 vs. limit=22.5 +2024-09-17 12:24:26,640 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.29 vs. limit=10.0 +2024-09-17 12:24:35,044 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:24:36,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=220040.0, ans=0.125 +2024-09-17 12:24:53,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=220080.0, ans=0.0 +2024-09-17 12:25:30,054 INFO [train.py:1198] (0/2) Epoch 13, batch 750, loss[loss=0.2696, ctc_loss=0.1705, cr_loss=0.4244, attn_decoder_loss=0.2711, over 29724.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1574, cr_loss=0.3968, attn_decoder_loss=0.2604, over 5675423.52 frames. ], batch size: 82, lr: 8.75e-03, grad_scale: 8.0 +2024-09-17 12:25:37,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=220200.0, ans=0.0 +2024-09-17 12:25:40,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=220200.0, ans=10.0 +2024-09-17 12:25:45,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=220240.0, ans=0.125 +2024-09-17 12:25:57,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=220240.0, ans=0.07 +2024-09-17 12:26:21,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=220320.0, ans=0.0 +2024-09-17 12:26:30,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=220360.0, ans=0.125 +2024-09-17 12:26:33,426 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 9.372e+01 1.007e+02 1.108e+02 5.289e+02, threshold=2.013e+02, percent-clipped=1.0 +2024-09-17 12:26:41,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=220360.0, ans=0.0 +2024-09-17 12:26:42,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=220360.0, ans=0.125 +2024-09-17 12:26:45,666 INFO [train.py:1198] (0/2) Epoch 13, batch 800, loss[loss=0.2305, ctc_loss=0.1279, cr_loss=0.3447, attn_decoder_loss=0.2342, over 29576.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1575, cr_loss=0.3973, attn_decoder_loss=0.2603, over 5706516.52 frames. ], batch size: 73, lr: 8.74e-03, grad_scale: 16.0 +2024-09-17 12:26:45,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=220400.0, ans=0.1 +2024-09-17 12:27:27,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=220480.0, ans=0.125 +2024-09-17 12:27:32,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=220520.0, ans=0.125 +2024-09-17 12:27:33,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=220520.0, ans=0.125 +2024-09-17 12:28:03,606 INFO [train.py:1198] (0/2) Epoch 13, batch 850, loss[loss=0.271, ctc_loss=0.1667, cr_loss=0.4136, attn_decoder_loss=0.2733, over 29692.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1567, cr_loss=0.3958, attn_decoder_loss=0.2598, over 5736895.10 frames. ], batch size: 89, lr: 8.74e-03, grad_scale: 8.0 +2024-09-17 12:28:11,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=220600.0, ans=0.125 +2024-09-17 12:28:11,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=220600.0, ans=0.0 +2024-09-17 12:28:38,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=220680.0, ans=0.0 +2024-09-17 12:28:55,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=220720.0, ans=0.025 +2024-09-17 12:29:11,979 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.923e+01 9.332e+01 1.023e+02 2.147e+02, threshold=1.866e+02, percent-clipped=2.0 +2024-09-17 12:29:23,103 INFO [train.py:1198] (0/2) Epoch 13, batch 900, loss[loss=0.231, ctc_loss=0.1221, cr_loss=0.3393, attn_decoder_loss=0.2355, over 29587.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1571, cr_loss=0.3966, attn_decoder_loss=0.2604, over 5741553.95 frames. ], batch size: 73, lr: 8.74e-03, grad_scale: 8.0 +2024-09-17 12:29:59,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=220880.0, ans=0.0 +2024-09-17 12:30:38,551 INFO [train.py:1198] (0/2) Epoch 13, batch 950, loss[loss=0.2404, ctc_loss=0.1394, cr_loss=0.3586, attn_decoder_loss=0.2436, over 29523.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1576, cr_loss=0.3972, attn_decoder_loss=0.2607, over 5743906.01 frames. ], batch size: 74, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:30:40,973 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.95 vs. limit=15.0 +2024-09-17 12:30:47,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=221000.0, ans=10.0 +2024-09-17 12:30:48,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=221000.0, ans=0.0 +2024-09-17 12:31:05,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=221040.0, ans=0.025 +2024-09-17 12:31:46,093 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.726e+01 9.884e+01 1.087e+02 1.225e+02 3.377e+02, threshold=2.174e+02, percent-clipped=3.0 +2024-09-17 12:31:47,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=221160.0, ans=0.0 +2024-09-17 12:31:50,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.07 vs. limit=10.0 +2024-09-17 12:31:56,570 INFO [train.py:1198] (0/2) Epoch 13, batch 1000, loss[loss=0.2558, ctc_loss=0.1548, cr_loss=0.415, attn_decoder_loss=0.2578, over 29509.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.159, cr_loss=0.3986, attn_decoder_loss=0.2616, over 5736927.58 frames. ], batch size: 77, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:31:58,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=221200.0, ans=0.0 +2024-09-17 12:32:32,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=221280.0, ans=0.1 +2024-09-17 12:33:01,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=221360.0, ans=0.2 +2024-09-17 12:33:13,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=221400.0, ans=0.2 +2024-09-17 12:33:15,011 INFO [train.py:1198] (0/2) Epoch 13, batch 1050, loss[loss=0.2715, ctc_loss=0.1679, cr_loss=0.4192, attn_decoder_loss=0.2737, over 29669.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.1585, cr_loss=0.3978, attn_decoder_loss=0.2608, over 5744739.55 frames. ], batch size: 85, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:33:16,178 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.18 vs. limit=15.0 +2024-09-17 12:33:33,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=221440.0, ans=0.125 +2024-09-17 12:33:55,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=221480.0, ans=0.0 +2024-09-17 12:34:04,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=221520.0, ans=0.1 +2024-09-17 12:34:19,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=221560.0, ans=0.125 +2024-09-17 12:34:20,746 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.817e+01 9.337e+01 1.034e+02 1.952e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-17 12:34:27,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=221560.0, ans=0.125 +2024-09-17 12:34:32,019 INFO [train.py:1198] (0/2) Epoch 13, batch 1100, loss[loss=0.247, ctc_loss=0.1489, cr_loss=0.3576, attn_decoder_loss=0.2499, over 29468.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1584, cr_loss=0.3975, attn_decoder_loss=0.2607, over 5757738.88 frames. ], batch size: 78, lr: 8.72e-03, grad_scale: 8.0 +2024-09-17 12:34:39,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=221600.0, ans=0.125 +2024-09-17 12:34:54,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=221640.0, ans=0.04949747468305833 +2024-09-17 12:34:55,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=221640.0, ans=0.125 +2024-09-17 12:35:14,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=221680.0, ans=0.0 +2024-09-17 12:35:23,955 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.82 vs. limit=15.0 +2024-09-17 12:35:38,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=221760.0, ans=22.5 +2024-09-17 12:35:49,961 INFO [train.py:1198] (0/2) Epoch 13, batch 1150, loss[loss=0.2574, ctc_loss=0.1614, cr_loss=0.3921, attn_decoder_loss=0.2593, over 29443.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.1587, cr_loss=0.3981, attn_decoder_loss=0.2607, over 5755295.33 frames. ], batch size: 78, lr: 8.72e-03, grad_scale: 8.0 +2024-09-17 12:35:57,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=221800.0, ans=0.1 +2024-09-17 12:36:08,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=221840.0, ans=0.125 +2024-09-17 12:36:57,523 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.374e+01 9.019e+01 9.917e+01 1.067e+02 1.578e+02, threshold=1.983e+02, percent-clipped=0.0 +2024-09-17 12:37:00,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=221960.0, ans=0.125 +2024-09-17 12:37:08,002 INFO [train.py:1198] (0/2) Epoch 13, batch 1200, loss[loss=0.2676, ctc_loss=0.157, cr_loss=0.4101, attn_decoder_loss=0.2707, over 29695.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.159, cr_loss=0.399, attn_decoder_loss=0.2614, over 5747285.07 frames. ], batch size: 85, lr: 8.71e-03, grad_scale: 16.0 +2024-09-17 12:37:19,102 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:37:31,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff3.min_abs, batch_count=222040.0, ans=0.2 +2024-09-17 12:37:37,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=222080.0, ans=0.125 +2024-09-17 12:37:38,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=222080.0, ans=0.125 +2024-09-17 12:37:39,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.66 vs. limit=6.0 +2024-09-17 12:37:43,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=222080.0, ans=0.1 +2024-09-17 12:37:57,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=222120.0, ans=0.09899494936611666 +2024-09-17 12:38:00,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=222120.0, ans=0.0 +2024-09-17 12:38:00,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=222120.0, ans=0.2 +2024-09-17 12:38:09,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=222160.0, ans=0.125 +2024-09-17 12:38:24,383 INFO [train.py:1198] (0/2) Epoch 13, batch 1250, loss[loss=0.2895, ctc_loss=0.196, cr_loss=0.4554, attn_decoder_loss=0.2898, over 29539.00 frames. ], tot_loss[loss=0.2598, ctc_loss=0.1596, cr_loss=0.4005, attn_decoder_loss=0.262, over 5774717.62 frames. ], batch size: 92, lr: 8.71e-03, grad_scale: 8.0 +2024-09-17 12:38:48,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=222240.0, ans=0.125 +2024-09-17 12:38:54,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.95 vs. limit=10.0 +2024-09-17 12:39:00,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=222280.0, ans=0.1 +2024-09-17 12:39:21,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=222320.0, ans=0.0 +2024-09-17 12:39:24,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=222320.0, ans=0.125 +2024-09-17 12:39:26,226 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:39:33,605 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.458e+01 9.226e+01 9.923e+01 1.052e+02 2.205e+02, threshold=1.985e+02, percent-clipped=1.0 +2024-09-17 12:39:35,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff2.min_abs, batch_count=222360.0, ans=0.1 +2024-09-17 12:39:36,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=222360.0, ans=0.0 +2024-09-17 12:39:42,944 INFO [train.py:1198] (0/2) Epoch 13, batch 1300, loss[loss=0.2781, ctc_loss=0.1772, cr_loss=0.4246, attn_decoder_loss=0.2799, over 28150.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1594, cr_loss=0.4, attn_decoder_loss=0.2616, over 5779372.09 frames. ], batch size: 111, lr: 8.71e-03, grad_scale: 8.0 +2024-09-17 12:39:56,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.55 vs. limit=22.5 +2024-09-17 12:39:58,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=222440.0, ans=0.125 +2024-09-17 12:40:22,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=222480.0, ans=0.2 +2024-09-17 12:40:32,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=222520.0, ans=0.125 +2024-09-17 12:40:50,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=222560.0, ans=0.2 +2024-09-17 12:40:54,402 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.35 vs. limit=15.0 +2024-09-17 12:41:00,772 INFO [train.py:1198] (0/2) Epoch 13, batch 1350, loss[loss=0.2553, ctc_loss=0.1582, cr_loss=0.3962, attn_decoder_loss=0.2573, over 29757.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1583, cr_loss=0.3988, attn_decoder_loss=0.2608, over 5796347.14 frames. ], batch size: 81, lr: 8.70e-03, grad_scale: 8.0 +2024-09-17 12:41:20,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=222640.0, ans=0.125 +2024-09-17 12:41:32,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=222680.0, ans=0.025 +2024-09-17 12:41:35,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=222680.0, ans=0.125 +2024-09-17 12:41:46,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.32 vs. limit=6.0 +2024-09-17 12:41:59,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=222760.0, ans=0.125 +2024-09-17 12:42:06,533 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.825e+01 9.390e+01 1.007e+02 1.307e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 12:42:15,644 INFO [train.py:1198] (0/2) Epoch 13, batch 1400, loss[loss=0.2253, ctc_loss=0.1258, cr_loss=0.3299, attn_decoder_loss=0.2291, over 29574.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1579, cr_loss=0.398, attn_decoder_loss=0.2606, over 5806884.53 frames. ], batch size: 69, lr: 8.70e-03, grad_scale: 8.0 +2024-09-17 12:42:26,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.34 vs. limit=12.0 +2024-09-17 12:42:49,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=222880.0, ans=0.125 +2024-09-17 12:42:51,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=222880.0, ans=0.2 +2024-09-17 12:43:05,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.05 vs. limit=22.5 +2024-09-17 12:43:33,422 INFO [train.py:1198] (0/2) Epoch 13, batch 1450, loss[loss=0.2729, ctc_loss=0.1704, cr_loss=0.4372, attn_decoder_loss=0.2746, over 29478.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1582, cr_loss=0.3987, attn_decoder_loss=0.2612, over 5803433.11 frames. ], batch size: 94, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:43:35,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=223000.0, ans=0.2 +2024-09-17 12:43:52,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=223040.0, ans=0.125 +2024-09-17 12:43:59,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=223040.0, ans=0.125 +2024-09-17 12:43:59,638 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:44:02,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=223080.0, ans=0.0 +2024-09-17 12:44:09,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.75 vs. limit=22.5 +2024-09-17 12:44:28,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=223120.0, ans=0.0 +2024-09-17 12:44:31,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=223120.0, ans=0.125 +2024-09-17 12:44:38,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.57 vs. limit=22.5 +2024-09-17 12:44:39,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=223160.0, ans=0.2 +2024-09-17 12:44:41,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=223160.0, ans=0.125 +2024-09-17 12:44:42,126 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.708e+01 9.179e+01 9.900e+01 1.065e+02 2.201e+02, threshold=1.980e+02, percent-clipped=1.0 +2024-09-17 12:44:51,611 INFO [train.py:1198] (0/2) Epoch 13, batch 1500, loss[loss=0.2649, ctc_loss=0.1612, cr_loss=0.398, attn_decoder_loss=0.2676, over 29615.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1582, cr_loss=0.3988, attn_decoder_loss=0.2615, over 5804074.34 frames. ], batch size: 86, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:45:07,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=223240.0, ans=0.0 +2024-09-17 12:45:14,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=223240.0, ans=0.0 +2024-09-17 12:45:16,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=223240.0, ans=0.125 +2024-09-17 12:45:20,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.96 vs. limit=15.0 +2024-09-17 12:45:26,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.39 vs. limit=15.0 +2024-09-17 12:45:43,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=223320.0, ans=0.125 +2024-09-17 12:45:48,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=223320.0, ans=0.0 +2024-09-17 12:46:07,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.66 vs. limit=15.0 +2024-09-17 12:46:08,024 INFO [train.py:1198] (0/2) Epoch 13, batch 1550, loss[loss=0.2726, ctc_loss=0.1636, cr_loss=0.4123, attn_decoder_loss=0.2756, over 29536.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1581, cr_loss=0.3985, attn_decoder_loss=0.2612, over 5781224.51 frames. ], batch size: 90, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:46:10,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.83 vs. limit=12.0 +2024-09-17 12:46:27,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=223440.0, ans=0.125 +2024-09-17 12:46:42,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.40 vs. limit=22.5 +2024-09-17 12:46:49,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=223480.0, ans=0.2 +2024-09-17 12:46:52,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=223480.0, ans=0.125 +2024-09-17 12:47:01,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=223520.0, ans=0.125 +2024-09-17 12:47:16,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.020e+01 9.851e+01 1.181e+02 1.437e+02 2.605e+02, threshold=2.361e+02, percent-clipped=3.0 +2024-09-17 12:47:19,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.51 vs. limit=6.0 +2024-09-17 12:47:25,512 INFO [train.py:1198] (0/2) Epoch 13, batch 1600, loss[loss=0.2693, ctc_loss=0.1629, cr_loss=0.4184, attn_decoder_loss=0.2718, over 29678.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1582, cr_loss=0.3972, attn_decoder_loss=0.261, over 5764523.17 frames. ], batch size: 85, lr: 8.68e-03, grad_scale: 16.0 +2024-09-17 12:47:30,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=223600.0, ans=0.07 +2024-09-17 12:47:54,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=223680.0, ans=0.0 +2024-09-17 12:48:43,161 INFO [train.py:1198] (0/2) Epoch 13, batch 1650, loss[loss=0.2753, ctc_loss=0.1708, cr_loss=0.3982, attn_decoder_loss=0.2781, over 29716.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1577, cr_loss=0.3962, attn_decoder_loss=0.2605, over 5758598.81 frames. ], batch size: 89, lr: 8.68e-03, grad_scale: 8.0 +2024-09-17 12:48:49,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=223800.0, ans=0.2 +2024-09-17 12:49:29,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=223920.0, ans=0.07 +2024-09-17 12:49:30,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=223920.0, ans=0.025 +2024-09-17 12:49:41,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.40 vs. limit=15.0 +2024-09-17 12:49:50,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=223960.0, ans=0.125 +2024-09-17 12:49:51,351 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.178e+01 9.964e+01 1.088e+02 2.882e+02, threshold=1.993e+02, percent-clipped=2.0 +2024-09-17 12:49:57,808 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-56000.pt +2024-09-17 12:50:06,240 INFO [train.py:1198] (0/2) Epoch 13, batch 1700, loss[loss=0.2347, ctc_loss=0.1495, cr_loss=0.3696, attn_decoder_loss=0.2359, over 29610.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1571, cr_loss=0.3961, attn_decoder_loss=0.26, over 5781013.65 frames. ], batch size: 69, lr: 8.68e-03, grad_scale: 8.0 +2024-09-17 12:50:22,646 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.54 vs. limit=15.0 +2024-09-17 12:50:30,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=224040.0, ans=0.125 +2024-09-17 12:51:00,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.71 vs. limit=22.5 +2024-09-17 12:51:13,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=224160.0, ans=0.2 +2024-09-17 12:51:21,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=224160.0, ans=0.125 +2024-09-17 12:51:24,020 INFO [train.py:1198] (0/2) Epoch 13, batch 1750, loss[loss=0.2366, ctc_loss=0.1416, cr_loss=0.3711, attn_decoder_loss=0.2389, over 29355.00 frames. ], tot_loss[loss=0.2571, ctc_loss=0.1565, cr_loss=0.3946, attn_decoder_loss=0.2595, over 5789810.81 frames. ], batch size: 67, lr: 8.67e-03, grad_scale: 8.0 +2024-09-17 12:51:46,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=224240.0, ans=0.2 +2024-09-17 12:52:08,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=224280.0, ans=0.125 +2024-09-17 12:52:19,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=224320.0, ans=0.125 +2024-09-17 12:52:33,950 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 8.908e+01 9.548e+01 1.035e+02 2.424e+02, threshold=1.910e+02, percent-clipped=1.0 +2024-09-17 12:52:40,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=224400.0, ans=0.125 +2024-09-17 12:52:41,365 INFO [train.py:1198] (0/2) Epoch 13, batch 1800, loss[loss=0.2739, ctc_loss=0.1682, cr_loss=0.4026, attn_decoder_loss=0.2766, over 29685.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1568, cr_loss=0.3951, attn_decoder_loss=0.26, over 5792227.07 frames. ], batch size: 83, lr: 8.67e-03, grad_scale: 8.0 +2024-09-17 12:52:49,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=224400.0, ans=0.125 +2024-09-17 12:52:53,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=224400.0, ans=0.0 +2024-09-17 12:52:56,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=224440.0, ans=0.125 +2024-09-17 12:53:22,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=224480.0, ans=0.125 +2024-09-17 12:53:35,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=224520.0, ans=0.125 +2024-09-17 12:53:38,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=12.0 +2024-09-17 12:53:44,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=224560.0, ans=0.2 +2024-09-17 12:53:44,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=224560.0, ans=0.125 +2024-09-17 12:53:57,464 INFO [train.py:1198] (0/2) Epoch 13, batch 1850, loss[loss=0.2644, ctc_loss=0.1485, cr_loss=0.3851, attn_decoder_loss=0.2687, over 29640.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1567, cr_loss=0.3952, attn_decoder_loss=0.2599, over 5799699.76 frames. ], batch size: 86, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:54:05,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=224600.0, ans=0.07 +2024-09-17 12:54:08,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=224600.0, ans=0.125 +2024-09-17 12:54:24,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=224640.0, ans=0.0 +2024-09-17 12:54:45,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=224720.0, ans=0.125 +2024-09-17 12:54:48,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=224720.0, ans=0.1 +2024-09-17 12:54:57,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=224720.0, ans=0.125 +2024-09-17 12:55:07,362 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.966e+01 9.738e+01 1.037e+02 3.444e+02, threshold=1.948e+02, percent-clipped=2.0 +2024-09-17 12:55:12,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=224760.0, ans=0.2 +2024-09-17 12:55:15,199 INFO [train.py:1198] (0/2) Epoch 13, batch 1900, loss[loss=0.2622, ctc_loss=0.1564, cr_loss=0.4115, attn_decoder_loss=0.2649, over 29732.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1574, cr_loss=0.396, attn_decoder_loss=0.2608, over 5806774.79 frames. ], batch size: 89, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:55:21,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=224800.0, ans=0.0 +2024-09-17 12:55:26,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.56 vs. limit=15.0 +2024-09-17 12:55:29,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=224840.0, ans=0.025 +2024-09-17 12:55:32,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=224840.0, ans=0.0 +2024-09-17 12:55:33,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=224840.0, ans=0.2 +2024-09-17 12:55:35,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=224840.0, ans=0.2 +2024-09-17 12:55:42,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=224840.0, ans=0.125 +2024-09-17 12:55:51,458 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.68 vs. limit=15.0 +2024-09-17 12:55:53,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=224880.0, ans=10.0 +2024-09-17 12:55:53,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=224880.0, ans=0.07 +2024-09-17 12:56:10,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.17 vs. limit=15.0 +2024-09-17 12:56:29,064 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:56:33,262 INFO [train.py:1198] (0/2) Epoch 13, batch 1950, loss[loss=0.2538, ctc_loss=0.1469, cr_loss=0.3907, attn_decoder_loss=0.257, over 29452.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1583, cr_loss=0.3981, attn_decoder_loss=0.2622, over 5820807.59 frames. ], batch size: 78, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:56:50,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=225040.0, ans=0.0 +2024-09-17 12:57:12,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=225080.0, ans=0.0 +2024-09-17 12:57:20,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys.whitening_limit, batch_count=225120.0, ans=6.0 +2024-09-17 12:57:30,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=225120.0, ans=0.1 +2024-09-17 12:57:40,939 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 9.122e+01 9.575e+01 1.024e+02 4.346e+02, threshold=1.915e+02, percent-clipped=1.0 +2024-09-17 12:57:48,512 INFO [train.py:1198] (0/2) Epoch 13, batch 2000, loss[loss=0.226, ctc_loss=0.1322, cr_loss=0.3582, attn_decoder_loss=0.2285, over 29344.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1587, cr_loss=0.3986, attn_decoder_loss=0.2624, over 5798326.15 frames. ], batch size: 67, lr: 8.65e-03, grad_scale: 16.0 +2024-09-17 12:57:56,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=225200.0, ans=0.07 +2024-09-17 12:57:58,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-17 12:58:15,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.28 vs. limit=10.0 +2024-09-17 12:58:54,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=225360.0, ans=0.0 +2024-09-17 12:59:06,707 INFO [train.py:1198] (0/2) Epoch 13, batch 2050, loss[loss=0.2327, ctc_loss=0.1339, cr_loss=0.3661, attn_decoder_loss=0.2356, over 29454.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1582, cr_loss=0.3972, attn_decoder_loss=0.2615, over 5788991.15 frames. ], batch size: 70, lr: 8.65e-03, grad_scale: 8.0 +2024-09-17 12:59:37,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=225480.0, ans=0.125 +2024-09-17 12:59:48,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=225480.0, ans=0.125 +2024-09-17 13:00:06,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=225520.0, ans=0.0 +2024-09-17 13:00:18,377 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 8.858e+01 9.418e+01 1.005e+02 1.765e+02, threshold=1.884e+02, percent-clipped=0.0 +2024-09-17 13:00:18,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=225560.0, ans=0.2 +2024-09-17 13:00:24,865 INFO [train.py:1198] (0/2) Epoch 13, batch 2100, loss[loss=0.2625, ctc_loss=0.1581, cr_loss=0.3888, attn_decoder_loss=0.2654, over 29777.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1575, cr_loss=0.3963, attn_decoder_loss=0.2607, over 5799424.64 frames. ], batch size: 81, lr: 8.65e-03, grad_scale: 8.0 +2024-09-17 13:00:25,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=225600.0, ans=0.125 +2024-09-17 13:00:37,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=225600.0, ans=0.5 +2024-09-17 13:01:23,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.54 vs. limit=15.0 +2024-09-17 13:01:29,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=12.0 +2024-09-17 13:01:36,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.87 vs. limit=6.0 +2024-09-17 13:01:40,020 INFO [train.py:1198] (0/2) Epoch 13, batch 2150, loss[loss=0.2509, ctc_loss=0.1501, cr_loss=0.4056, attn_decoder_loss=0.2531, over 29454.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1563, cr_loss=0.3949, attn_decoder_loss=0.2599, over 5815450.86 frames. ], batch size: 78, lr: 8.64e-03, grad_scale: 8.0 +2024-09-17 13:01:41,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=225800.0, ans=0.125 +2024-09-17 13:02:01,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.94 vs. limit=22.5 +2024-09-17 13:02:35,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=225920.0, ans=0.0 +2024-09-17 13:02:51,961 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.903e+01 9.039e+01 9.591e+01 1.017e+02 1.428e+02, threshold=1.918e+02, percent-clipped=0.0 +2024-09-17 13:02:52,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=225960.0, ans=0.0 +2024-09-17 13:02:58,182 INFO [train.py:1198] (0/2) Epoch 13, batch 2200, loss[loss=0.2604, ctc_loss=0.1549, cr_loss=0.3997, attn_decoder_loss=0.2632, over 29642.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1569, cr_loss=0.3963, attn_decoder_loss=0.2601, over 5813621.14 frames. ], batch size: 86, lr: 8.64e-03, grad_scale: 8.0 +2024-09-17 13:03:02,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=226000.0, ans=0.125 +2024-09-17 13:03:04,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=226000.0, ans=0.125 +2024-09-17 13:03:27,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=226080.0, ans=0.125 +2024-09-17 13:04:09,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=226160.0, ans=0.125 +2024-09-17 13:04:16,304 INFO [train.py:1198] (0/2) Epoch 13, batch 2250, loss[loss=0.2698, ctc_loss=0.1661, cr_loss=0.4401, attn_decoder_loss=0.2715, over 29723.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1567, cr_loss=0.3964, attn_decoder_loss=0.2602, over 5810761.88 frames. ], batch size: 82, lr: 8.63e-03, grad_scale: 4.0 +2024-09-17 13:04:39,454 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:05:00,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=226320.0, ans=0.0 +2024-09-17 13:05:00,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=226320.0, ans=0.1 +2024-09-17 13:05:11,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=226320.0, ans=0.0 +2024-09-17 13:05:27,519 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 9.102e+01 9.665e+01 1.015e+02 1.637e+02, threshold=1.933e+02, percent-clipped=0.0 +2024-09-17 13:05:32,600 INFO [train.py:1198] (0/2) Epoch 13, batch 2300, loss[loss=0.2235, ctc_loss=0.125, cr_loss=0.3429, attn_decoder_loss=0.2268, over 29304.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1562, cr_loss=0.395, attn_decoder_loss=0.2592, over 5797021.96 frames. ], batch size: 71, lr: 8.63e-03, grad_scale: 8.0 +2024-09-17 13:05:55,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=226440.0, ans=0.0 +2024-09-17 13:06:09,818 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.68 vs. limit=15.0 +2024-09-17 13:06:21,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=226520.0, ans=0.125 +2024-09-17 13:06:25,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=226520.0, ans=0.2 +2024-09-17 13:06:27,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.54 vs. limit=15.0 +2024-09-17 13:06:31,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=226520.0, ans=0.2 +2024-09-17 13:06:31,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.03 vs. limit=15.0 +2024-09-17 13:06:50,457 INFO [train.py:1198] (0/2) Epoch 13, batch 2350, loss[loss=0.2711, ctc_loss=0.1663, cr_loss=0.4301, attn_decoder_loss=0.2732, over 29697.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1566, cr_loss=0.3961, attn_decoder_loss=0.2595, over 5802689.70 frames. ], batch size: 83, lr: 8.63e-03, grad_scale: 8.0 +2024-09-17 13:06:56,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=226600.0, ans=0.0 +2024-09-17 13:07:21,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.32 vs. limit=15.0 +2024-09-17 13:07:22,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=226680.0, ans=0.1 +2024-09-17 13:07:33,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=226680.0, ans=0.0 +2024-09-17 13:07:46,914 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.19 vs. limit=12.0 +2024-09-17 13:08:05,463 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.113e+01 9.470e+01 1.028e+02 1.156e+02 2.779e+02, threshold=2.056e+02, percent-clipped=1.0 +2024-09-17 13:08:05,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=226760.0, ans=0.125 +2024-09-17 13:08:08,454 INFO [train.py:1198] (0/2) Epoch 13, batch 2400, loss[loss=0.2446, ctc_loss=0.1446, cr_loss=0.3698, attn_decoder_loss=0.2475, over 29518.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.157, cr_loss=0.3971, attn_decoder_loss=0.26, over 5806733.11 frames. ], batch size: 76, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:08:26,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=226840.0, ans=0.1 +2024-09-17 13:08:28,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=226840.0, ans=0.125 +2024-09-17 13:08:51,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.83 vs. limit=15.0 +2024-09-17 13:08:51,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.60 vs. limit=6.0 +2024-09-17 13:09:15,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.83 vs. limit=22.5 +2024-09-17 13:09:22,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=227000.0, ans=0.1 +2024-09-17 13:09:24,019 INFO [train.py:1198] (0/2) Epoch 13, batch 2450, loss[loss=0.2622, ctc_loss=0.1519, cr_loss=0.3932, attn_decoder_loss=0.2657, over 29718.00 frames. ], tot_loss[loss=0.2588, ctc_loss=0.1578, cr_loss=0.398, attn_decoder_loss=0.2612, over 5783114.47 frames. ], batch size: 82, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:09:25,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=227000.0, ans=0.1 +2024-09-17 13:09:30,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.04 vs. limit=12.0 +2024-09-17 13:09:31,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=227000.0, ans=0.125 +2024-09-17 13:09:57,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=227080.0, ans=0.07 +2024-09-17 13:10:36,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.15 vs. limit=10.0 +2024-09-17 13:10:38,634 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.760e+01 9.238e+01 9.776e+01 1.100e+02 2.445e+02, threshold=1.955e+02, percent-clipped=1.0 +2024-09-17 13:10:40,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=227200.0, ans=0.125 +2024-09-17 13:10:41,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.67 vs. limit=22.5 +2024-09-17 13:10:42,079 INFO [train.py:1198] (0/2) Epoch 13, batch 2500, loss[loss=0.2615, ctc_loss=0.1458, cr_loss=0.3776, attn_decoder_loss=0.266, over 29622.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.158, cr_loss=0.3983, attn_decoder_loss=0.2612, over 5793535.65 frames. ], batch size: 86, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:10:45,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=227200.0, ans=0.0 +2024-09-17 13:10:47,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=227200.0, ans=0.0 +2024-09-17 13:10:47,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=227200.0, ans=0.1 +2024-09-17 13:10:53,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=227200.0, ans=0.025 +2024-09-17 13:11:08,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=227240.0, ans=0.09899494936611666 +2024-09-17 13:11:11,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=227280.0, ans=0.95 +2024-09-17 13:11:14,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=227280.0, ans=0.0 +2024-09-17 13:11:31,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=227320.0, ans=0.125 +2024-09-17 13:11:57,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=227360.0, ans=0.2 +2024-09-17 13:11:59,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=227400.0, ans=0.025 +2024-09-17 13:12:00,449 INFO [train.py:1198] (0/2) Epoch 13, batch 2550, loss[loss=0.2259, ctc_loss=0.1314, cr_loss=0.3645, attn_decoder_loss=0.2283, over 29361.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1579, cr_loss=0.3985, attn_decoder_loss=0.2613, over 5797032.29 frames. ], batch size: 67, lr: 8.61e-03, grad_scale: 8.0 +2024-09-17 13:12:31,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.13 vs. limit=15.0 +2024-09-17 13:12:35,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=227480.0, ans=0.07 +2024-09-17 13:12:36,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=227480.0, ans=0.125 +2024-09-17 13:12:47,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=227520.0, ans=0.1 +2024-09-17 13:12:48,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=227520.0, ans=0.1 +2024-09-17 13:12:52,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=227520.0, ans=0.125 +2024-09-17 13:12:56,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=227520.0, ans=0.0 +2024-09-17 13:13:07,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=227560.0, ans=0.2 +2024-09-17 13:13:13,202 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.947e+01 9.236e+01 9.928e+01 1.060e+02 5.337e+02, threshold=1.986e+02, percent-clipped=3.0 +2024-09-17 13:13:16,294 INFO [train.py:1198] (0/2) Epoch 13, batch 2600, loss[loss=0.2539, ctc_loss=0.15, cr_loss=0.4147, attn_decoder_loss=0.2563, over 29431.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.158, cr_loss=0.3984, attn_decoder_loss=0.2614, over 5793828.63 frames. ], batch size: 78, lr: 8.61e-03, grad_scale: 8.0 +2024-09-17 13:13:33,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=227640.0, ans=0.125 +2024-09-17 13:13:37,885 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:13:37,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=227640.0, ans=0.025 +2024-09-17 13:13:51,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=227680.0, ans=0.0 +2024-09-17 13:13:54,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=227680.0, ans=0.0 +2024-09-17 13:13:58,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.15 vs. limit=15.0 +2024-09-17 13:13:58,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=12.0 +2024-09-17 13:14:26,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=227760.0, ans=0.125 +2024-09-17 13:14:32,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=227800.0, ans=0.125 +2024-09-17 13:14:34,059 INFO [train.py:1198] (0/2) Epoch 13, batch 2650, loss[loss=0.2856, ctc_loss=0.1826, cr_loss=0.4439, attn_decoder_loss=0.2872, over 29270.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1577, cr_loss=0.3984, attn_decoder_loss=0.2614, over 5800571.80 frames. ], batch size: 100, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:14:37,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=227800.0, ans=0.1 +2024-09-17 13:14:38,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=227800.0, ans=0.0 +2024-09-17 13:14:51,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.49 vs. limit=15.0 +2024-09-17 13:14:59,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=227840.0, ans=0.07 +2024-09-17 13:15:27,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=227920.0, ans=0.125 +2024-09-17 13:15:39,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=227960.0, ans=0.0 +2024-09-17 13:15:41,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.07 vs. limit=12.0 +2024-09-17 13:15:48,540 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.882e+01 9.244e+01 9.728e+01 1.060e+02 3.050e+02, threshold=1.946e+02, percent-clipped=2.0 +2024-09-17 13:15:52,081 INFO [train.py:1198] (0/2) Epoch 13, batch 2700, loss[loss=0.2604, ctc_loss=0.1543, cr_loss=0.4058, attn_decoder_loss=0.2632, over 29530.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1578, cr_loss=0.3982, attn_decoder_loss=0.2615, over 5797366.61 frames. ], batch size: 87, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:16:11,157 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.29 vs. limit=15.0 +2024-09-17 13:16:28,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=228080.0, ans=0.125 +2024-09-17 13:16:47,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=228120.0, ans=0.125 +2024-09-17 13:17:07,973 INFO [train.py:1198] (0/2) Epoch 13, batch 2750, loss[loss=0.2563, ctc_loss=0.164, cr_loss=0.4011, attn_decoder_loss=0.2576, over 29509.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1573, cr_loss=0.3968, attn_decoder_loss=0.2606, over 5795452.74 frames. ], batch size: 75, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:17:40,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=228280.0, ans=0.125 +2024-09-17 13:17:41,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=228280.0, ans=0.025 +2024-09-17 13:18:20,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=228360.0, ans=0.125 +2024-09-17 13:18:23,264 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.445e+01 9.348e+01 1.004e+02 1.120e+02 2.904e+02, threshold=2.008e+02, percent-clipped=2.0 +2024-09-17 13:18:26,304 INFO [train.py:1198] (0/2) Epoch 13, batch 2800, loss[loss=0.2848, ctc_loss=0.2144, cr_loss=0.4482, attn_decoder_loss=0.2827, over 19866.00 frames. ], tot_loss[loss=0.2587, ctc_loss=0.1579, cr_loss=0.3971, attn_decoder_loss=0.2611, over 5776913.60 frames. ], batch size: 209, lr: 8.59e-03, grad_scale: 16.0 +2024-09-17 13:18:41,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=228440.0, ans=0.125 +2024-09-17 13:19:00,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=228480.0, ans=0.0 +2024-09-17 13:19:29,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=228560.0, ans=0.05 +2024-09-17 13:19:30,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=228560.0, ans=0.09899494936611666 +2024-09-17 13:19:37,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-17 13:19:44,134 INFO [train.py:1198] (0/2) Epoch 13, batch 2850, loss[loss=0.2412, ctc_loss=0.1384, cr_loss=0.3553, attn_decoder_loss=0.2447, over 29513.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1585, cr_loss=0.3983, attn_decoder_loss=0.2616, over 5763029.28 frames. ], batch size: 77, lr: 8.59e-03, grad_scale: 8.0 +2024-09-17 13:19:51,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=228600.0, ans=0.125 +2024-09-17 13:20:07,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=228640.0, ans=0.2 +2024-09-17 13:20:15,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.15 vs. limit=22.5 +2024-09-17 13:20:18,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=228680.0, ans=0.0 +2024-09-17 13:20:20,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=228680.0, ans=0.1 +2024-09-17 13:20:30,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=228720.0, ans=0.125 +2024-09-17 13:20:32,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=228720.0, ans=0.0 +2024-09-17 13:20:51,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=228760.0, ans=0.125 +2024-09-17 13:21:00,202 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.259e+01 1.060e+02 1.394e+02 3.143e+02, threshold=2.120e+02, percent-clipped=6.0 +2024-09-17 13:21:00,235 INFO [train.py:1198] (0/2) Epoch 13, batch 2900, loss[loss=0.2516, ctc_loss=0.1559, cr_loss=0.3903, attn_decoder_loss=0.2535, over 29420.00 frames. ], tot_loss[loss=0.2599, ctc_loss=0.1586, cr_loss=0.3997, attn_decoder_loss=0.2623, over 5788822.76 frames. ], batch size: 79, lr: 8.59e-03, grad_scale: 8.0 +2024-09-17 13:21:04,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=228800.0, ans=0.025 +2024-09-17 13:21:27,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=228840.0, ans=0.125 +2024-09-17 13:21:53,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=228920.0, ans=0.125 +2024-09-17 13:21:55,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.20 vs. limit=15.0 +2024-09-17 13:22:18,343 INFO [train.py:1198] (0/2) Epoch 13, batch 2950, loss[loss=0.2458, ctc_loss=0.1547, cr_loss=0.4209, attn_decoder_loss=0.2466, over 29507.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1577, cr_loss=0.3982, attn_decoder_loss=0.2609, over 5783263.48 frames. ], batch size: 75, lr: 8.58e-03, grad_scale: 4.0 +2024-09-17 13:22:20,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=229000.0, ans=0.0 +2024-09-17 13:23:15,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=229120.0, ans=0.0 +2024-09-17 13:23:36,399 INFO [train.py:1198] (0/2) Epoch 13, batch 3000, loss[loss=0.2526, ctc_loss=0.1424, cr_loss=0.3677, attn_decoder_loss=0.2567, over 29754.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1574, cr_loss=0.398, attn_decoder_loss=0.2607, over 5784730.47 frames. ], batch size: 81, lr: 8.58e-03, grad_scale: 8.0 +2024-09-17 13:23:36,400 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 13:23:49,537 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([6.2314, 5.9274, 5.8425, 5.5486], device='cuda:0') +2024-09-17 13:23:54,824 INFO [train.py:1230] (0/2) Epoch 13, validation: loss=0.212, ctc_loss=0.04384, cr_loss=4.97e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-17 13:23:54,824 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 13:23:56,318 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.701e+01 8.967e+01 9.683e+01 1.075e+02 2.883e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 13:23:57,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.05 vs. limit=15.0 +2024-09-17 13:24:02,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=229200.0, ans=0.125 +2024-09-17 13:24:07,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=229200.0, ans=0.0 +2024-09-17 13:24:11,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229240.0, ans=0.1 +2024-09-17 13:24:16,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=229240.0, ans=0.125 +2024-09-17 13:24:25,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=229280.0, ans=0.2 +2024-09-17 13:24:29,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.00 vs. limit=10.0 +2024-09-17 13:24:31,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=229280.0, ans=0.0 +2024-09-17 13:24:33,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=229280.0, ans=0.0 +2024-09-17 13:24:33,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=229280.0, ans=0.07 +2024-09-17 13:24:34,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=229280.0, ans=0.125 +2024-09-17 13:24:39,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229320.0, ans=0.1 +2024-09-17 13:25:04,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=229360.0, ans=0.125 +2024-09-17 13:25:10,620 INFO [train.py:1198] (0/2) Epoch 13, batch 3050, loss[loss=0.2498, ctc_loss=0.1511, cr_loss=0.3774, attn_decoder_loss=0.2524, over 29531.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1582, cr_loss=0.399, attn_decoder_loss=0.2618, over 5777319.31 frames. ], batch size: 76, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:25:18,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=229400.0, ans=0.0 +2024-09-17 13:25:35,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=229440.0, ans=0.125 +2024-09-17 13:25:40,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.69 vs. limit=10.0 +2024-09-17 13:26:08,729 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.60 vs. limit=15.0 +2024-09-17 13:26:29,228 INFO [train.py:1198] (0/2) Epoch 13, batch 3100, loss[loss=0.2794, ctc_loss=0.1827, cr_loss=0.4287, attn_decoder_loss=0.2806, over 29350.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1583, cr_loss=0.3988, attn_decoder_loss=0.2614, over 5777495.62 frames. ], batch size: 100, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:26:32,980 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.799e+01 9.409e+01 1.035e+02 1.210e+02 2.103e+02, threshold=2.070e+02, percent-clipped=1.0 +2024-09-17 13:27:20,281 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:27:29,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.28 vs. limit=15.0 +2024-09-17 13:27:47,054 INFO [train.py:1198] (0/2) Epoch 13, batch 3150, loss[loss=0.2722, ctc_loss=0.1576, cr_loss=0.3875, attn_decoder_loss=0.2763, over 28812.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.158, cr_loss=0.3981, attn_decoder_loss=0.2613, over 5784431.40 frames. ], batch size: 104, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:27:47,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=229800.0, ans=0.025 +2024-09-17 13:27:57,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=229800.0, ans=0.125 +2024-09-17 13:28:03,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=229840.0, ans=0.125 +2024-09-17 13:28:03,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=229840.0, ans=0.0 +2024-09-17 13:28:27,090 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.74 vs. limit=22.5 +2024-09-17 13:28:45,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=229960.0, ans=0.125 +2024-09-17 13:28:48,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=229960.0, ans=0.125 +2024-09-17 13:28:50,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229960.0, ans=0.1 +2024-09-17 13:29:02,205 INFO [train.py:1198] (0/2) Epoch 13, batch 3200, loss[loss=0.2566, ctc_loss=0.1686, cr_loss=0.4068, attn_decoder_loss=0.2573, over 29420.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1572, cr_loss=0.3976, attn_decoder_loss=0.2605, over 5793648.43 frames. ], batch size: 79, lr: 8.56e-03, grad_scale: 16.0 +2024-09-17 13:29:05,036 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.957e+01 9.025e+01 9.709e+01 1.089e+02 2.819e+02, threshold=1.942e+02, percent-clipped=2.0 +2024-09-17 13:29:20,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff2.min_abs, batch_count=230040.0, ans=0.1 +2024-09-17 13:29:56,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-17 13:30:17,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=230160.0, ans=0.0 +2024-09-17 13:30:20,633 INFO [train.py:1198] (0/2) Epoch 13, batch 3250, loss[loss=0.2675, ctc_loss=0.1608, cr_loss=0.427, attn_decoder_loss=0.2699, over 29704.00 frames. ], tot_loss[loss=0.2588, ctc_loss=0.1576, cr_loss=0.3982, attn_decoder_loss=0.2612, over 5799797.33 frames. ], batch size: 84, lr: 8.56e-03, grad_scale: 8.0 +2024-09-17 13:30:20,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=230200.0, ans=0.125 +2024-09-17 13:30:39,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=230240.0, ans=0.125 +2024-09-17 13:30:51,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=230280.0, ans=0.0 +2024-09-17 13:31:07,644 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.59 vs. limit=15.0 +2024-09-17 13:31:08,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=230320.0, ans=0.07 +2024-09-17 13:31:14,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.85 vs. limit=22.5 +2024-09-17 13:31:38,613 INFO [train.py:1198] (0/2) Epoch 13, batch 3300, loss[loss=0.2611, ctc_loss=0.1565, cr_loss=0.3723, attn_decoder_loss=0.2644, over 28169.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1566, cr_loss=0.396, attn_decoder_loss=0.2598, over 5796810.28 frames. ], batch size: 111, lr: 8.56e-03, grad_scale: 8.0 +2024-09-17 13:31:39,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=230400.0, ans=0.1 +2024-09-17 13:31:41,815 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.916e+01 9.519e+01 1.032e+02 2.087e+02, threshold=1.904e+02, percent-clipped=1.0 +2024-09-17 13:32:10,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=230480.0, ans=0.09899494936611666 +2024-09-17 13:32:27,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=230520.0, ans=0.5 +2024-09-17 13:32:29,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=230520.0, ans=0.2 +2024-09-17 13:32:39,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=230560.0, ans=0.125 +2024-09-17 13:32:42,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=230560.0, ans=0.2 +2024-09-17 13:32:53,828 INFO [train.py:1198] (0/2) Epoch 13, batch 3350, loss[loss=0.2638, ctc_loss=0.1632, cr_loss=0.4085, attn_decoder_loss=0.2659, over 28862.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1574, cr_loss=0.3969, attn_decoder_loss=0.2607, over 5773126.06 frames. ], batch size: 104, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:33:03,254 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:33:07,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=230640.0, ans=0.125 +2024-09-17 13:33:12,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=230640.0, ans=0.0 +2024-09-17 13:33:37,570 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.43 vs. limit=6.0 +2024-09-17 13:33:57,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=230760.0, ans=0.125 +2024-09-17 13:34:14,315 INFO [train.py:1198] (0/2) Epoch 13, batch 3400, loss[loss=0.2264, ctc_loss=0.1304, cr_loss=0.3573, attn_decoder_loss=0.2292, over 29318.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1577, cr_loss=0.3976, attn_decoder_loss=0.2607, over 5765588.45 frames. ], batch size: 67, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:34:14,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=230800.0, ans=0.0 +2024-09-17 13:34:17,283 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.853e+01 8.984e+01 9.781e+01 1.096e+02 3.563e+02, threshold=1.956e+02, percent-clipped=2.0 +2024-09-17 13:34:19,984 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.90 vs. limit=15.0 +2024-09-17 13:34:20,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=230800.0, ans=0.125 +2024-09-17 13:34:23,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=230800.0, ans=0.125 +2024-09-17 13:34:26,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.90 vs. limit=15.0 +2024-09-17 13:34:32,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=230840.0, ans=0.125 +2024-09-17 13:34:46,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=230880.0, ans=0.1 +2024-09-17 13:34:51,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=230880.0, ans=0.0 +2024-09-17 13:34:55,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=230880.0, ans=0.2 +2024-09-17 13:35:22,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=230960.0, ans=0.0 +2024-09-17 13:35:23,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=230960.0, ans=0.125 +2024-09-17 13:35:26,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=25.20 vs. limit=22.5 +2024-09-17 13:35:30,141 INFO [train.py:1198] (0/2) Epoch 13, batch 3450, loss[loss=0.2691, ctc_loss=0.1643, cr_loss=0.4234, attn_decoder_loss=0.2714, over 28547.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1575, cr_loss=0.3978, attn_decoder_loss=0.2608, over 5774511.30 frames. ], batch size: 112, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:35:32,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.42 vs. limit=15.0 +2024-09-17 13:35:36,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=231000.0, ans=0.1 +2024-09-17 13:35:41,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=231000.0, ans=0.0 +2024-09-17 13:35:43,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=231040.0, ans=0.1 +2024-09-17 13:35:44,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=231040.0, ans=0.125 +2024-09-17 13:36:03,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=231080.0, ans=0.2 +2024-09-17 13:36:03,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=231080.0, ans=0.125 +2024-09-17 13:36:11,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=231080.0, ans=0.125 +2024-09-17 13:36:14,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=231120.0, ans=0.1 +2024-09-17 13:36:32,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=231160.0, ans=0.1 +2024-09-17 13:36:45,950 INFO [train.py:1198] (0/2) Epoch 13, batch 3500, loss[loss=0.233, ctc_loss=0.1339, cr_loss=0.3554, attn_decoder_loss=0.2361, over 29343.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1571, cr_loss=0.397, attn_decoder_loss=0.2598, over 5777909.79 frames. ], batch size: 71, lr: 8.54e-03, grad_scale: 8.0 +2024-09-17 13:36:49,015 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.194e+01 9.091e+01 9.756e+01 1.067e+02 1.863e+02, threshold=1.951e+02, percent-clipped=0.0 +2024-09-17 13:36:50,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=231200.0, ans=0.125 +2024-09-17 13:36:58,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=231200.0, ans=0.0 +2024-09-17 13:37:05,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=231240.0, ans=0.125 +2024-09-17 13:37:22,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=231280.0, ans=0.0 +2024-09-17 13:37:32,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=231320.0, ans=0.05 +2024-09-17 13:37:37,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.70 vs. limit=6.0 +2024-09-17 13:37:53,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=231360.0, ans=0.0 +2024-09-17 13:38:00,821 INFO [train.py:1198] (0/2) Epoch 13, batch 3550, loss[loss=0.2615, ctc_loss=0.1516, cr_loss=0.3886, attn_decoder_loss=0.2651, over 29680.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1567, cr_loss=0.3966, attn_decoder_loss=0.2598, over 5783075.48 frames. ], batch size: 89, lr: 8.54e-03, grad_scale: 8.0 +2024-09-17 13:38:14,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=231440.0, ans=0.125 +2024-09-17 13:38:26,093 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.57 vs. limit=15.0 +2024-09-17 13:38:48,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=231520.0, ans=0.125 +2024-09-17 13:39:01,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=231520.0, ans=0.2 +2024-09-17 13:39:18,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=231600.0, ans=0.125 +2024-09-17 13:39:19,457 INFO [train.py:1198] (0/2) Epoch 13, batch 3600, loss[loss=0.2558, ctc_loss=0.1538, cr_loss=0.4067, attn_decoder_loss=0.2581, over 29498.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1566, cr_loss=0.3969, attn_decoder_loss=0.2601, over 5792395.21 frames. ], batch size: 77, lr: 8.53e-03, grad_scale: 16.0 +2024-09-17 13:39:23,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.96 vs. limit=22.5 +2024-09-17 13:39:24,014 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 8.866e+01 9.672e+01 1.060e+02 2.375e+02, threshold=1.934e+02, percent-clipped=1.0 +2024-09-17 13:39:41,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=231640.0, ans=0.04949747468305833 +2024-09-17 13:40:21,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=231760.0, ans=0.0 +2024-09-17 13:40:21,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=231760.0, ans=0.125 +2024-09-17 13:40:24,617 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.84 vs. limit=22.5 +2024-09-17 13:40:34,315 INFO [train.py:1198] (0/2) Epoch 13, batch 3650, loss[loss=0.2804, ctc_loss=0.1824, cr_loss=0.4416, attn_decoder_loss=0.2815, over 29485.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1562, cr_loss=0.3962, attn_decoder_loss=0.2599, over 5794459.49 frames. ], batch size: 90, lr: 8.53e-03, grad_scale: 8.0 +2024-09-17 13:40:44,052 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.44 vs. limit=15.0 +2024-09-17 13:40:47,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=231840.0, ans=0.125 +2024-09-17 13:41:10,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=231880.0, ans=0.05 +2024-09-17 13:41:17,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=231920.0, ans=0.125 +2024-09-17 13:41:30,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=231920.0, ans=0.0 +2024-09-17 13:41:35,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.53 vs. limit=15.0 +2024-09-17 13:41:36,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=231960.0, ans=0.0 +2024-09-17 13:41:47,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=232000.0, ans=0.0 +2024-09-17 13:41:48,931 INFO [train.py:1198] (0/2) Epoch 13, batch 3700, loss[loss=0.2705, ctc_loss=0.162, cr_loss=0.42, attn_decoder_loss=0.2732, over 29725.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1565, cr_loss=0.3968, attn_decoder_loss=0.2602, over 5805104.57 frames. ], batch size: 84, lr: 8.53e-03, grad_scale: 8.0 +2024-09-17 13:41:53,428 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.974e+01 9.152e+01 9.843e+01 1.065e+02 3.437e+02, threshold=1.969e+02, percent-clipped=3.0 +2024-09-17 13:42:29,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=232080.0, ans=0.07 +2024-09-17 13:42:38,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=232120.0, ans=0.025 +2024-09-17 13:42:48,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=232160.0, ans=0.125 +2024-09-17 13:42:48,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.90 vs. limit=22.5 +2024-09-17 13:42:51,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=232160.0, ans=0.0 +2024-09-17 13:42:59,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.77 vs. limit=22.5 +2024-09-17 13:43:02,995 INFO [train.py:1198] (0/2) Epoch 13, batch 3750, loss[loss=0.227, ctc_loss=0.1294, cr_loss=0.3443, attn_decoder_loss=0.2302, over 29358.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1563, cr_loss=0.3967, attn_decoder_loss=0.2598, over 5807994.64 frames. ], batch size: 67, lr: 8.52e-03, grad_scale: 4.0 +2024-09-17 13:43:05,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=232200.0, ans=15.0 +2024-09-17 13:43:15,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=232200.0, ans=0.125 +2024-09-17 13:43:21,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=232240.0, ans=0.125 +2024-09-17 13:43:46,911 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.83 vs. limit=15.0 +2024-09-17 13:43:47,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=232320.0, ans=0.1 +2024-09-17 13:43:49,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=232320.0, ans=0.1 +2024-09-17 13:44:17,438 INFO [train.py:1198] (0/2) Epoch 13, batch 3800, loss[loss=0.2672, ctc_loss=0.1556, cr_loss=0.425, attn_decoder_loss=0.2702, over 29623.00 frames. ], tot_loss[loss=0.2571, ctc_loss=0.1562, cr_loss=0.3966, attn_decoder_loss=0.2595, over 5798528.12 frames. ], batch size: 86, lr: 8.52e-03, grad_scale: 8.0 +2024-09-17 13:44:23,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.990e+01 9.154e+01 9.685e+01 1.039e+02 2.233e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 13:44:25,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=232400.0, ans=0.125 +2024-09-17 13:44:27,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=232400.0, ans=6.0 +2024-09-17 13:44:31,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.94 vs. limit=15.0 +2024-09-17 13:44:35,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=232440.0, ans=0.0 +2024-09-17 13:45:01,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=232480.0, ans=0.025 +2024-09-17 13:45:05,274 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.48 vs. limit=15.0 +2024-09-17 13:45:06,765 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=24.53 vs. limit=15.0 +2024-09-17 13:45:22,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=232560.0, ans=0.04949747468305833 +2024-09-17 13:45:23,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=232560.0, ans=0.0 +2024-09-17 13:45:35,422 INFO [train.py:1198] (0/2) Epoch 13, batch 3850, loss[loss=0.2741, ctc_loss=0.1726, cr_loss=0.4097, attn_decoder_loss=0.2762, over 29261.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.156, cr_loss=0.3966, attn_decoder_loss=0.2594, over 5810100.79 frames. ], batch size: 100, lr: 8.52e-03, grad_scale: 4.0 +2024-09-17 13:45:37,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=232600.0, ans=0.2 +2024-09-17 13:45:50,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=232640.0, ans=0.0 +2024-09-17 13:45:52,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.65 vs. limit=6.0 +2024-09-17 13:46:02,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=232640.0, ans=0.125 +2024-09-17 13:46:09,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=232680.0, ans=0.0 +2024-09-17 13:46:12,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.90 vs. limit=10.0 +2024-09-17 13:46:17,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=232680.0, ans=0.0 +2024-09-17 13:46:19,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=232720.0, ans=0.125 +2024-09-17 13:46:26,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=232720.0, ans=0.0 +2024-09-17 13:46:50,403 INFO [train.py:1198] (0/2) Epoch 13, batch 3900, loss[loss=0.2607, ctc_loss=0.1449, cr_loss=0.3745, attn_decoder_loss=0.2652, over 29640.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1563, cr_loss=0.3972, attn_decoder_loss=0.2601, over 5815219.94 frames. ], batch size: 86, lr: 8.51e-03, grad_scale: 8.0 +2024-09-17 13:46:52,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=232800.0, ans=0.0 +2024-09-17 13:46:56,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=232800.0, ans=0.2 +2024-09-17 13:46:57,792 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.883e+01 8.946e+01 9.477e+01 1.034e+02 1.292e+02, threshold=1.895e+02, percent-clipped=0.0 +2024-09-17 13:47:44,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=232920.0, ans=0.0 +2024-09-17 13:47:55,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=232960.0, ans=0.125 +2024-09-17 13:48:04,562 INFO [train.py:1198] (0/2) Epoch 13, batch 3950, loss[loss=0.2674, ctc_loss=0.164, cr_loss=0.4052, attn_decoder_loss=0.2699, over 29487.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1558, cr_loss=0.3971, attn_decoder_loss=0.26, over 5834877.80 frames. ], batch size: 97, lr: 8.51e-03, grad_scale: 8.0 +2024-09-17 13:48:19,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=233040.0, ans=0.0 +2024-09-17 13:48:32,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=233080.0, ans=0.125 +2024-09-17 13:48:34,972 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.20 vs. limit=6.0 +2024-09-17 13:48:35,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=233080.0, ans=0.0 +2024-09-17 13:48:43,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=233080.0, ans=0.2 +2024-09-17 13:48:46,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=233080.0, ans=0.025 +2024-09-17 13:49:05,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=233160.0, ans=0.125 +2024-09-17 13:49:15,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=233160.0, ans=0.125 +2024-09-17 13:49:18,341 INFO [train.py:1198] (0/2) Epoch 13, batch 4000, loss[loss=0.2378, ctc_loss=0.1377, cr_loss=0.3628, attn_decoder_loss=0.2408, over 29512.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1562, cr_loss=0.3971, attn_decoder_loss=0.2599, over 5811630.90 frames. ], batch size: 74, lr: 8.51e-03, grad_scale: 16.0 +2024-09-17 13:49:19,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=233200.0, ans=0.125 +2024-09-17 13:49:22,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=233200.0, ans=0.0 +2024-09-17 13:49:24,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=233200.0, ans=0.1 +2024-09-17 13:49:25,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=233200.0, ans=0.0 +2024-09-17 13:49:27,095 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.753e+01 9.222e+01 9.816e+01 1.053e+02 2.750e+02, threshold=1.963e+02, percent-clipped=1.0 +2024-09-17 13:49:39,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=233240.0, ans=0.0 +2024-09-17 13:49:45,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=233240.0, ans=0.0 +2024-09-17 13:49:49,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=233280.0, ans=0.125 +2024-09-17 13:49:52,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=233280.0, ans=0.125 +2024-09-17 13:49:58,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=233280.0, ans=0.125 +2024-09-17 13:50:00,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=233280.0, ans=0.2 +2024-09-17 13:50:20,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.87 vs. limit=22.5 +2024-09-17 13:50:23,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=233360.0, ans=0.0 +2024-09-17 13:50:34,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=233400.0, ans=0.125 +2024-09-17 13:50:35,265 INFO [train.py:1198] (0/2) Epoch 13, batch 4050, loss[loss=0.2821, ctc_loss=0.1975, cr_loss=0.4011, attn_decoder_loss=0.2826, over 20300.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1562, cr_loss=0.3966, attn_decoder_loss=0.2596, over 5796041.71 frames. ], batch size: 209, lr: 8.50e-03, grad_scale: 8.0 +2024-09-17 13:50:48,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=233440.0, ans=0.125 +2024-09-17 13:51:16,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=233480.0, ans=0.125 +2024-09-17 13:51:27,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=233520.0, ans=0.125 +2024-09-17 13:51:49,528 INFO [train.py:1198] (0/2) Epoch 13, batch 4100, loss[loss=0.2679, ctc_loss=0.1593, cr_loss=0.404, attn_decoder_loss=0.2709, over 29516.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1561, cr_loss=0.3961, attn_decoder_loss=0.2598, over 5791704.27 frames. ], batch size: 90, lr: 8.50e-03, grad_scale: 8.0 +2024-09-17 13:51:59,586 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.947e+01 9.234e+01 9.794e+01 1.124e+02 2.298e+02, threshold=1.959e+02, percent-clipped=3.0 +2024-09-17 13:52:07,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=233640.0, ans=0.0 +2024-09-17 13:52:11,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=233640.0, ans=0.025 +2024-09-17 13:52:12,506 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.70 vs. limit=15.0 +2024-09-17 13:52:14,570 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:52:27,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=233680.0, ans=0.125 +2024-09-17 13:52:49,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=233760.0, ans=0.2 +2024-09-17 13:53:00,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=233760.0, ans=0.125 +2024-09-17 13:53:02,944 INFO [train.py:1198] (0/2) Epoch 13, batch 4150, loss[loss=0.2471, ctc_loss=0.1518, cr_loss=0.3869, attn_decoder_loss=0.249, over 29487.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1562, cr_loss=0.3955, attn_decoder_loss=0.2596, over 5798081.59 frames. ], batch size: 77, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:53:19,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=233840.0, ans=0.125 +2024-09-17 13:53:20,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.88 vs. limit=15.0 +2024-09-17 13:53:22,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=233840.0, ans=0.07 +2024-09-17 13:53:51,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.43 vs. limit=15.0 +2024-09-17 13:54:18,774 INFO [train.py:1198] (0/2) Epoch 13, batch 4200, loss[loss=0.2771, ctc_loss=0.1712, cr_loss=0.4374, attn_decoder_loss=0.2791, over 29506.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1561, cr_loss=0.396, attn_decoder_loss=0.2597, over 5799614.80 frames. ], batch size: 90, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:54:30,795 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.689e+01 8.618e+01 9.139e+01 9.691e+01 3.040e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-17 13:55:20,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.whiten.whitening_limit, batch_count=234160.0, ans=12.0 +2024-09-17 13:55:28,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=234160.0, ans=0.025 +2024-09-17 13:55:32,366 INFO [train.py:1198] (0/2) Epoch 13, batch 4250, loss[loss=0.2418, ctc_loss=0.1357, cr_loss=0.3777, attn_decoder_loss=0.2453, over 29537.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1561, cr_loss=0.3962, attn_decoder_loss=0.26, over 5805963.13 frames. ], batch size: 74, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:55:32,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=234200.0, ans=0.125 +2024-09-17 13:55:38,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=234200.0, ans=0.125 +2024-09-17 13:55:45,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=234240.0, ans=0.2 +2024-09-17 13:55:59,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=234240.0, ans=0.1 +2024-09-17 13:55:59,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=234240.0, ans=0.125 +2024-09-17 13:56:46,323 INFO [train.py:1198] (0/2) Epoch 13, batch 4300, loss[loss=0.2642, ctc_loss=0.1552, cr_loss=0.4133, attn_decoder_loss=0.2672, over 29530.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1562, cr_loss=0.3957, attn_decoder_loss=0.2599, over 5795333.35 frames. ], batch size: 87, lr: 8.48e-03, grad_scale: 8.0 +2024-09-17 13:56:52,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=234400.0, ans=0.125 +2024-09-17 13:56:58,272 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.092e+01 9.409e+01 9.956e+01 1.092e+02 6.321e+02, threshold=1.991e+02, percent-clipped=4.0 +2024-09-17 13:57:03,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=234440.0, ans=0.125 +2024-09-17 13:57:09,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=234440.0, ans=0.125 +2024-09-17 13:57:10,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=234440.0, ans=0.09899494936611666 +2024-09-17 13:57:38,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=234520.0, ans=0.1 +2024-09-17 13:57:54,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.47 vs. limit=15.0 +2024-09-17 13:58:02,348 INFO [train.py:1198] (0/2) Epoch 13, batch 4350, loss[loss=0.2653, ctc_loss=0.1658, cr_loss=0.4134, attn_decoder_loss=0.2672, over 29490.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1594, cr_loss=0.4013, attn_decoder_loss=0.2636, over 5797594.59 frames. ], batch size: 97, lr: 8.48e-03, grad_scale: 8.0 +2024-09-17 13:58:10,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.49 vs. limit=6.0 +2024-09-17 13:58:14,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=234600.0, ans=0.0 +2024-09-17 13:58:23,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=234640.0, ans=0.0 +2024-09-17 13:58:24,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=234640.0, ans=0.1 +2024-09-17 13:58:34,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=234680.0, ans=0.125 +2024-09-17 13:58:42,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=234680.0, ans=0.025 +2024-09-17 13:58:49,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=234720.0, ans=0.125 +2024-09-17 13:58:59,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=234760.0, ans=0.125 +2024-09-17 13:59:15,471 INFO [train.py:1198] (0/2) Epoch 13, batch 4400, loss[loss=0.2726, ctc_loss=0.1728, cr_loss=0.4252, attn_decoder_loss=0.2743, over 27198.00 frames. ], tot_loss[loss=0.2634, ctc_loss=0.1611, cr_loss=0.4037, attn_decoder_loss=0.2658, over 5768238.50 frames. ], batch size: 124, lr: 8.48e-03, grad_scale: 16.0 +2024-09-17 13:59:15,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=234800.0, ans=0.125 +2024-09-17 13:59:15,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=234800.0, ans=0.1 +2024-09-17 13:59:15,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=234800.0, ans=0.2 +2024-09-17 13:59:17,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.88 vs. limit=15.0 +2024-09-17 13:59:26,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.55 vs. limit=15.0 +2024-09-17 13:59:26,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.67 vs. limit=22.5 +2024-09-17 13:59:28,465 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.453e+01 9.581e+01 9.987e+01 1.106e+02 2.626e+02, threshold=1.997e+02, percent-clipped=1.0 +2024-09-17 14:00:06,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=234920.0, ans=0.025 +2024-09-17 14:00:15,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=234960.0, ans=0.025 +2024-09-17 14:00:30,055 INFO [train.py:1198] (0/2) Epoch 13, batch 4450, loss[loss=0.2841, ctc_loss=0.201, cr_loss=0.4171, attn_decoder_loss=0.2841, over 19480.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1669, cr_loss=0.4092, attn_decoder_loss=0.2693, over 5580623.82 frames. ], batch size: 210, lr: 8.47e-03, grad_scale: 8.0 +2024-09-17 14:00:34,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=235000.0, ans=0.95 +2024-09-17 14:01:03,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=235080.0, ans=0.0 +2024-09-17 14:01:33,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.45 vs. limit=15.0 +2024-09-17 14:01:46,529 INFO [train.py:1198] (0/2) Epoch 13, batch 4500, loss[loss=0.2799, ctc_loss=0.1926, cr_loss=0.4141, attn_decoder_loss=0.2804, over 20601.00 frames. ], tot_loss[loss=0.2704, ctc_loss=0.1728, cr_loss=0.4105, attn_decoder_loss=0.2721, over 5236862.99 frames. ], batch size: 211, lr: 8.47e-03, grad_scale: 8.0 +2024-09-17 14:02:00,043 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.936e+01 1.022e+02 1.119e+02 1.227e+02 3.439e+02, threshold=2.238e+02, percent-clipped=3.0 +2024-09-17 14:02:02,117 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:02:07,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=235240.0, ans=0.125 +2024-09-17 14:02:16,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=235280.0, ans=0.0 +2024-09-17 14:02:23,768 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-13.pt +2024-09-17 14:03:16,468 INFO [train.py:1198] (0/2) Epoch 14, batch 0, loss[loss=0.2307, ctc_loss=0.1288, cr_loss=0.3515, attn_decoder_loss=0.2342, over 29584.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1288, cr_loss=0.3515, attn_decoder_loss=0.2342, over 29584.00 frames. ], batch size: 73, lr: 8.16e-03, grad_scale: 16.0 +2024-09-17 14:03:16,468 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 14:03:34,828 INFO [train.py:1230] (0/2) Epoch 14, validation: loss=0.2137, ctc_loss=0.04354, cr_loss=5.325e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-17 14:03:34,828 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 14:03:42,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=235300.0, ans=0.125 +2024-09-17 14:03:56,717 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.55 vs. limit=12.0 +2024-09-17 14:04:01,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.23 vs. limit=15.0 +2024-09-17 14:04:21,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=235420.0, ans=0.0 +2024-09-17 14:04:33,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=235420.0, ans=0.1 +2024-09-17 14:04:39,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=235460.0, ans=0.0 +2024-09-17 14:04:39,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.56 vs. limit=10.0 +2024-09-17 14:04:40,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=235460.0, ans=0.09899494936611666 +2024-09-17 14:04:50,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=235460.0, ans=0.125 +2024-09-17 14:04:52,727 INFO [train.py:1198] (0/2) Epoch 14, batch 50, loss[loss=0.2291, ctc_loss=0.1286, cr_loss=0.3466, attn_decoder_loss=0.2326, over 29434.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1598, cr_loss=0.4001, attn_decoder_loss=0.2616, over 1267581.49 frames. ], batch size: 70, lr: 8.16e-03, grad_scale: 8.0 +2024-09-17 14:05:04,955 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:05:07,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.35 vs. limit=15.0 +2024-09-17 14:05:28,562 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.49 vs. limit=10.0 +2024-09-17 14:05:29,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=235580.0, ans=0.02 +2024-09-17 14:05:45,794 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.961e+01 9.193e+01 1.002e+02 1.099e+02 2.018e+02, threshold=2.003e+02, percent-clipped=0.0 +2024-09-17 14:05:56,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=235660.0, ans=0.0 +2024-09-17 14:06:04,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.53 vs. limit=15.0 +2024-09-17 14:06:05,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=235660.0, ans=0.025 +2024-09-17 14:06:08,487 INFO [train.py:1198] (0/2) Epoch 14, batch 100, loss[loss=0.2376, ctc_loss=0.1404, cr_loss=0.3589, attn_decoder_loss=0.2404, over 29519.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1601, cr_loss=0.402, attn_decoder_loss=0.2632, over 2251805.97 frames. ], batch size: 76, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:06:20,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=235700.0, ans=0.125 +2024-09-17 14:06:22,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=235740.0, ans=0.1 +2024-09-17 14:06:45,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=235780.0, ans=0.125 +2024-09-17 14:06:45,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=235780.0, ans=0.125 +2024-09-17 14:06:48,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=235780.0, ans=0.125 +2024-09-17 14:06:50,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=235780.0, ans=0.125 +2024-09-17 14:07:00,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=235820.0, ans=0.125 +2024-09-17 14:07:10,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.98 vs. limit=15.0 +2024-09-17 14:07:25,367 INFO [train.py:1198] (0/2) Epoch 14, batch 150, loss[loss=0.2303, ctc_loss=0.1337, cr_loss=0.368, attn_decoder_loss=0.2329, over 29412.00 frames. ], tot_loss[loss=0.2579, ctc_loss=0.1567, cr_loss=0.3969, attn_decoder_loss=0.2603, over 3046189.15 frames. ], batch size: 70, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:07:36,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=235900.0, ans=0.2 +2024-09-17 14:07:43,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=235940.0, ans=0.125 +2024-09-17 14:08:01,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.71 vs. limit=15.0 +2024-09-17 14:08:20,474 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.181e+01 9.587e+01 1.009e+02 1.798e+02, threshold=1.917e+02, percent-clipped=0.0 +2024-09-17 14:08:29,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=236060.0, ans=0.125 +2024-09-17 14:08:34,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=236060.0, ans=0.125 +2024-09-17 14:08:34,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.27 vs. limit=15.0 +2024-09-17 14:08:40,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=236060.0, ans=0.0 +2024-09-17 14:08:43,261 INFO [train.py:1198] (0/2) Epoch 14, batch 200, loss[loss=0.2765, ctc_loss=0.1798, cr_loss=0.4266, attn_decoder_loss=0.2777, over 27232.00 frames. ], tot_loss[loss=0.2571, ctc_loss=0.1558, cr_loss=0.3955, attn_decoder_loss=0.2595, over 3658354.32 frames. ], batch size: 124, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:09:10,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=236140.0, ans=0.0 +2024-09-17 14:09:58,992 INFO [train.py:1198] (0/2) Epoch 14, batch 250, loss[loss=0.2697, ctc_loss=0.1668, cr_loss=0.4115, attn_decoder_loss=0.272, over 29219.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1556, cr_loss=0.3958, attn_decoder_loss=0.2594, over 4140504.95 frames. ], batch size: 100, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:09:59,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=236300.0, ans=0.0 +2024-09-17 14:10:07,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=236300.0, ans=15.0 +2024-09-17 14:10:09,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.79 vs. limit=6.0 +2024-09-17 14:10:16,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.29 vs. limit=12.0 +2024-09-17 14:10:17,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=236340.0, ans=0.1 +2024-09-17 14:10:38,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.33 vs. limit=15.0 +2024-09-17 14:10:47,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=236420.0, ans=0.2 +2024-09-17 14:10:54,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.995e+01 9.389e+01 1.000e+02 1.684e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 14:11:16,983 INFO [train.py:1198] (0/2) Epoch 14, batch 300, loss[loss=0.2625, ctc_loss=0.1575, cr_loss=0.3873, attn_decoder_loss=0.2656, over 29558.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1544, cr_loss=0.394, attn_decoder_loss=0.2588, over 4508197.05 frames. ], batch size: 92, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:12:05,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=236620.0, ans=0.0 +2024-09-17 14:12:10,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.23 vs. limit=12.0 +2024-09-17 14:12:10,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=236620.0, ans=0.125 +2024-09-17 14:12:17,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.76 vs. limit=22.5 +2024-09-17 14:12:18,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=236660.0, ans=10.0 +2024-09-17 14:12:35,064 INFO [train.py:1198] (0/2) Epoch 14, batch 350, loss[loss=0.2407, ctc_loss=0.1424, cr_loss=0.3721, attn_decoder_loss=0.2434, over 29740.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1546, cr_loss=0.395, attn_decoder_loss=0.2591, over 4795616.44 frames. ], batch size: 72, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:13:00,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=236740.0, ans=0.125 +2024-09-17 14:13:01,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=236740.0, ans=0.07 +2024-09-17 14:13:28,295 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.698e+01 9.344e+01 1.025e+02 1.871e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-17 14:13:50,803 INFO [train.py:1198] (0/2) Epoch 14, batch 400, loss[loss=0.2628, ctc_loss=0.1577, cr_loss=0.4206, attn_decoder_loss=0.2652, over 29676.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1544, cr_loss=0.3946, attn_decoder_loss=0.2588, over 5025228.99 frames. ], batch size: 82, lr: 8.13e-03, grad_scale: 16.0 +2024-09-17 14:14:01,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=236900.0, ans=0.125 +2024-09-17 14:14:06,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=236940.0, ans=0.0 +2024-09-17 14:14:13,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=236940.0, ans=0.2 +2024-09-17 14:14:21,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=236980.0, ans=0.125 +2024-09-17 14:14:47,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.78 vs. limit=22.5 +2024-09-17 14:14:58,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=237060.0, ans=0.125 +2024-09-17 14:15:08,978 INFO [train.py:1198] (0/2) Epoch 14, batch 450, loss[loss=0.2706, ctc_loss=0.1589, cr_loss=0.4138, attn_decoder_loss=0.2738, over 29709.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1541, cr_loss=0.3942, attn_decoder_loss=0.2588, over 5186979.11 frames. ], batch size: 83, lr: 8.13e-03, grad_scale: 8.0 +2024-09-17 14:15:16,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=237100.0, ans=0.0 +2024-09-17 14:15:30,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=237140.0, ans=0.0 +2024-09-17 14:15:36,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=237140.0, ans=0.125 +2024-09-17 14:15:39,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=237180.0, ans=0.1 +2024-09-17 14:15:42,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=237180.0, ans=0.04949747468305833 +2024-09-17 14:15:48,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=237180.0, ans=0.125 +2024-09-17 14:15:50,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=237180.0, ans=0.125 +2024-09-17 14:16:03,313 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:16:05,940 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.900e+01 9.763e+01 1.081e+02 1.650e+02, threshold=1.953e+02, percent-clipped=0.0 +2024-09-17 14:16:13,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=237260.0, ans=0.2 +2024-09-17 14:16:14,305 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.38 vs. limit=22.5 +2024-09-17 14:16:27,058 INFO [train.py:1198] (0/2) Epoch 14, batch 500, loss[loss=0.2728, ctc_loss=0.1678, cr_loss=0.4188, attn_decoder_loss=0.2752, over 29428.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1537, cr_loss=0.3936, attn_decoder_loss=0.2585, over 5330049.56 frames. ], batch size: 94, lr: 8.13e-03, grad_scale: 8.0 +2024-09-17 14:16:36,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=237300.0, ans=0.125 +2024-09-17 14:16:46,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=237340.0, ans=0.2 +2024-09-17 14:16:54,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=237340.0, ans=0.125 +2024-09-17 14:17:05,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=237380.0, ans=0.04949747468305833 +2024-09-17 14:17:21,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-17 14:17:23,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.23 vs. limit=22.5 +2024-09-17 14:17:42,723 INFO [train.py:1198] (0/2) Epoch 14, batch 550, loss[loss=0.2753, ctc_loss=0.1714, cr_loss=0.4285, attn_decoder_loss=0.2773, over 28851.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1543, cr_loss=0.3944, attn_decoder_loss=0.2588, over 5421815.96 frames. ], batch size: 104, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:18:14,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=237580.0, ans=0.1 +2024-09-17 14:18:35,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=237620.0, ans=0.07 +2024-09-17 14:18:40,100 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.963e+01 9.623e+01 1.012e+02 2.800e+02, threshold=1.925e+02, percent-clipped=3.0 +2024-09-17 14:18:41,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.47 vs. limit=6.0 +2024-09-17 14:19:01,495 INFO [train.py:1198] (0/2) Epoch 14, batch 600, loss[loss=0.2731, ctc_loss=0.1737, cr_loss=0.4371, attn_decoder_loss=0.2744, over 29207.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1539, cr_loss=0.3939, attn_decoder_loss=0.2586, over 5509942.19 frames. ], batch size: 100, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:19:01,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=237700.0, ans=0.125 +2024-09-17 14:19:21,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=237740.0, ans=0.0 +2024-09-17 14:19:29,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.03 vs. limit=15.0 +2024-09-17 14:19:35,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.93 vs. limit=15.0 +2024-09-17 14:20:05,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=237860.0, ans=0.125 +2024-09-17 14:20:08,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=237860.0, ans=0.125 +2024-09-17 14:20:13,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=237860.0, ans=0.125 +2024-09-17 14:20:19,153 INFO [train.py:1198] (0/2) Epoch 14, batch 650, loss[loss=0.2507, ctc_loss=0.1493, cr_loss=0.3664, attn_decoder_loss=0.2538, over 29753.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1534, cr_loss=0.3932, attn_decoder_loss=0.2581, over 5586895.04 frames. ], batch size: 81, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:20:19,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=237900.0, ans=0.125 +2024-09-17 14:20:35,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=237940.0, ans=0.125 +2024-09-17 14:20:43,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=237940.0, ans=0.0 +2024-09-17 14:20:45,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=237940.0, ans=0.0 +2024-09-17 14:20:46,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=237940.0, ans=0.1 +2024-09-17 14:20:48,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=237980.0, ans=0.1 +2024-09-17 14:21:05,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=238020.0, ans=0.125 +2024-09-17 14:21:11,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=238020.0, ans=0.07 +2024-09-17 14:21:13,714 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.771e+01 9.255e+01 1.013e+02 1.766e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-17 14:21:19,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=238060.0, ans=0.0 +2024-09-17 14:21:19,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=238060.0, ans=0.1 +2024-09-17 14:21:33,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=238100.0, ans=0.025 +2024-09-17 14:21:34,761 INFO [train.py:1198] (0/2) Epoch 14, batch 700, loss[loss=0.2444, ctc_loss=0.1415, cr_loss=0.3678, attn_decoder_loss=0.2477, over 29533.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1541, cr_loss=0.3943, attn_decoder_loss=0.2588, over 5637624.88 frames. ], batch size: 76, lr: 8.11e-03, grad_scale: 8.0 +2024-09-17 14:21:36,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=238100.0, ans=0.025 +2024-09-17 14:21:43,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.13 vs. limit=22.5 +2024-09-17 14:21:44,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=238100.0, ans=0.0 +2024-09-17 14:21:45,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=238100.0, ans=0.09899494936611666 +2024-09-17 14:22:01,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.71 vs. limit=15.0 +2024-09-17 14:22:21,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=238220.0, ans=0.2 +2024-09-17 14:22:23,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=238220.0, ans=0.125 +2024-09-17 14:22:30,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=238220.0, ans=0.125 +2024-09-17 14:22:32,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=238220.0, ans=0.0 +2024-09-17 14:22:37,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=238260.0, ans=0.125 +2024-09-17 14:22:52,564 INFO [train.py:1198] (0/2) Epoch 14, batch 750, loss[loss=0.2513, ctc_loss=0.146, cr_loss=0.3978, attn_decoder_loss=0.2542, over 29725.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1538, cr_loss=0.3938, attn_decoder_loss=0.2585, over 5675731.37 frames. ], batch size: 82, lr: 8.11e-03, grad_scale: 8.0 +2024-09-17 14:23:34,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.15 vs. limit=22.5 +2024-09-17 14:23:49,572 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.719e+01 9.200e+01 9.849e+01 1.104e+02 2.206e+02, threshold=1.970e+02, percent-clipped=2.0 +2024-09-17 14:23:54,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=238460.0, ans=0.125 +2024-09-17 14:24:10,889 INFO [train.py:1198] (0/2) Epoch 14, batch 800, loss[loss=0.2313, ctc_loss=0.1247, cr_loss=0.3366, attn_decoder_loss=0.2357, over 29594.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1535, cr_loss=0.3924, attn_decoder_loss=0.2583, over 5706662.91 frames. ], batch size: 73, lr: 8.11e-03, grad_scale: 16.0 +2024-09-17 14:24:14,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=238500.0, ans=0.125 +2024-09-17 14:25:10,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=238660.0, ans=0.125 +2024-09-17 14:25:11,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=238660.0, ans=0.2 +2024-09-17 14:25:26,169 INFO [train.py:1198] (0/2) Epoch 14, batch 850, loss[loss=0.277, ctc_loss=0.1683, cr_loss=0.4331, attn_decoder_loss=0.2794, over 29717.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1532, cr_loss=0.3927, attn_decoder_loss=0.258, over 5735619.19 frames. ], batch size: 89, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:25:36,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.31 vs. limit=8.0 +2024-09-17 14:25:41,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=238740.0, ans=0.125 +2024-09-17 14:26:04,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=238780.0, ans=0.2 +2024-09-17 14:26:11,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=238820.0, ans=0.125 +2024-09-17 14:26:22,028 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.470e+01 9.039e+01 9.635e+01 1.057e+02 1.739e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 14:26:31,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=238860.0, ans=0.125 +2024-09-17 14:26:31,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=238860.0, ans=0.125 +2024-09-17 14:26:44,209 INFO [train.py:1198] (0/2) Epoch 14, batch 900, loss[loss=0.2334, ctc_loss=0.1291, cr_loss=0.3512, attn_decoder_loss=0.2372, over 29616.00 frames. ], tot_loss[loss=0.2556, ctc_loss=0.1535, cr_loss=0.3931, attn_decoder_loss=0.2582, over 5741470.53 frames. ], batch size: 73, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:26:44,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=238900.0, ans=0.1 +2024-09-17 14:26:52,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=238900.0, ans=0.125 +2024-09-17 14:27:01,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.88 vs. limit=10.0 +2024-09-17 14:27:02,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=238940.0, ans=0.1 +2024-09-17 14:27:16,350 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:27:27,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=238980.0, ans=0.125 +2024-09-17 14:27:34,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=239020.0, ans=0.125 +2024-09-17 14:27:47,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=239060.0, ans=0.125 +2024-09-17 14:27:57,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=239060.0, ans=0.1 +2024-09-17 14:28:01,802 INFO [train.py:1198] (0/2) Epoch 14, batch 950, loss[loss=0.2277, ctc_loss=0.1341, cr_loss=0.3612, attn_decoder_loss=0.2301, over 29527.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1537, cr_loss=0.3938, attn_decoder_loss=0.2582, over 5743491.56 frames. ], batch size: 74, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:28:19,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-17 14:28:19,179 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.06 vs. limit=15.0 +2024-09-17 14:28:32,899 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-17 14:28:49,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=239220.0, ans=0.125 +2024-09-17 14:28:52,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.12 vs. limit=22.5 +2024-09-17 14:28:58,295 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 9.217e+01 9.958e+01 1.123e+02 9.034e+02, threshold=1.992e+02, percent-clipped=2.0 +2024-09-17 14:29:03,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=239260.0, ans=0.125 +2024-09-17 14:29:17,716 INFO [train.py:1198] (0/2) Epoch 14, batch 1000, loss[loss=0.2381, ctc_loss=0.14, cr_loss=0.3717, attn_decoder_loss=0.2407, over 29525.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1545, cr_loss=0.3944, attn_decoder_loss=0.2589, over 5736575.52 frames. ], batch size: 77, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:29:19,840 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.74 vs. limit=22.5 +2024-09-17 14:29:34,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=239340.0, ans=0.09899494936611666 +2024-09-17 14:29:52,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=239380.0, ans=0.0 +2024-09-17 14:29:57,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=239380.0, ans=0.125 +2024-09-17 14:30:08,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=239420.0, ans=0.125 +2024-09-17 14:30:35,791 INFO [train.py:1198] (0/2) Epoch 14, batch 1050, loss[loss=0.2621, ctc_loss=0.1539, cr_loss=0.4007, attn_decoder_loss=0.2652, over 29690.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1545, cr_loss=0.395, attn_decoder_loss=0.2587, over 5742600.60 frames. ], batch size: 85, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:30:36,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=15.0 +2024-09-17 14:30:52,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=239540.0, ans=0.125 +2024-09-17 14:30:56,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=239540.0, ans=0.1 +2024-09-17 14:30:56,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=239540.0, ans=0.0 +2024-09-17 14:31:03,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=239540.0, ans=0.07 +2024-09-17 14:31:09,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=239580.0, ans=0.2 +2024-09-17 14:31:31,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=239620.0, ans=0.125 +2024-09-17 14:31:34,117 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.789e+01 9.469e+01 1.013e+02 1.494e+02, threshold=1.894e+02, percent-clipped=0.0 +2024-09-17 14:31:45,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.57 vs. limit=15.0 +2024-09-17 14:31:52,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=239700.0, ans=0.0 +2024-09-17 14:31:53,880 INFO [train.py:1198] (0/2) Epoch 14, batch 1100, loss[loss=0.251, ctc_loss=0.1449, cr_loss=0.3784, attn_decoder_loss=0.2543, over 29444.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1542, cr_loss=0.3939, attn_decoder_loss=0.2584, over 5756596.13 frames. ], batch size: 78, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:32:23,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=239780.0, ans=0.025 +2024-09-17 14:32:31,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.84 vs. limit=10.0 +2024-09-17 14:32:51,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=239820.0, ans=0.125 +2024-09-17 14:32:56,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=239860.0, ans=0.2 +2024-09-17 14:33:09,748 INFO [train.py:1198] (0/2) Epoch 14, batch 1150, loss[loss=0.2514, ctc_loss=0.1504, cr_loss=0.3884, attn_decoder_loss=0.2539, over 29432.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1544, cr_loss=0.394, attn_decoder_loss=0.2584, over 5755133.88 frames. ], batch size: 78, lr: 8.08e-03, grad_scale: 8.0 +2024-09-17 14:33:30,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=239940.0, ans=0.125 +2024-09-17 14:33:37,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=239940.0, ans=0.125 +2024-09-17 14:33:47,026 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-60000.pt +2024-09-17 14:33:59,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=239980.0, ans=0.0 +2024-09-17 14:34:09,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=240020.0, ans=0.125 +2024-09-17 14:34:12,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=240020.0, ans=0.0 +2024-09-17 14:34:13,791 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.548e+01 9.029e+01 9.820e+01 1.050e+02 2.109e+02, threshold=1.964e+02, percent-clipped=1.0 +2024-09-17 14:34:21,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=240060.0, ans=0.0 +2024-09-17 14:34:36,161 INFO [train.py:1198] (0/2) Epoch 14, batch 1200, loss[loss=0.2613, ctc_loss=0.1533, cr_loss=0.3977, attn_decoder_loss=0.2645, over 29658.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.155, cr_loss=0.3949, attn_decoder_loss=0.259, over 5749203.46 frames. ], batch size: 85, lr: 8.08e-03, grad_scale: 16.0 +2024-09-17 14:34:37,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=240100.0, ans=0.1 +2024-09-17 14:35:54,244 INFO [train.py:1198] (0/2) Epoch 14, batch 1250, loss[loss=0.2727, ctc_loss=0.1725, cr_loss=0.4223, attn_decoder_loss=0.2744, over 29550.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1556, cr_loss=0.3967, attn_decoder_loss=0.2598, over 5776692.86 frames. ], batch size: 92, lr: 8.08e-03, grad_scale: 8.0 +2024-09-17 14:35:59,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=240300.0, ans=0.1 +2024-09-17 14:36:09,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=240340.0, ans=0.125 +2024-09-17 14:36:13,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=240340.0, ans=0.2 +2024-09-17 14:36:25,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.90 vs. limit=12.0 +2024-09-17 14:36:52,184 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.786e+01 9.275e+01 9.951e+01 3.249e+02, threshold=1.855e+02, percent-clipped=3.0 +2024-09-17 14:37:09,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=240500.0, ans=0.2 +2024-09-17 14:37:10,335 INFO [train.py:1198] (0/2) Epoch 14, batch 1300, loss[loss=0.2614, ctc_loss=0.1532, cr_loss=0.385, attn_decoder_loss=0.2649, over 28227.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1545, cr_loss=0.3952, attn_decoder_loss=0.259, over 5778962.28 frames. ], batch size: 111, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:37:30,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.79 vs. limit=6.0 +2024-09-17 14:37:31,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=240540.0, ans=0.2 +2024-09-17 14:37:38,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.12 vs. limit=22.5 +2024-09-17 14:38:00,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=240620.0, ans=0.0 +2024-09-17 14:38:09,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=240660.0, ans=0.025 +2024-09-17 14:38:12,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=240660.0, ans=0.125 +2024-09-17 14:38:19,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=240660.0, ans=0.1 +2024-09-17 14:38:24,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=240700.0, ans=0.0 +2024-09-17 14:38:25,787 INFO [train.py:1198] (0/2) Epoch 14, batch 1350, loss[loss=0.2595, ctc_loss=0.1526, cr_loss=0.4093, attn_decoder_loss=0.2623, over 29794.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1541, cr_loss=0.3954, attn_decoder_loss=0.2587, over 5796414.13 frames. ], batch size: 81, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:38:26,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.03 vs. limit=15.0 +2024-09-17 14:38:38,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=240700.0, ans=0.0 +2024-09-17 14:38:49,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.22 vs. limit=15.0 +2024-09-17 14:38:53,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=240740.0, ans=0.0 +2024-09-17 14:39:07,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=240780.0, ans=0.125 +2024-09-17 14:39:12,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.17 vs. limit=22.5 +2024-09-17 14:39:15,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=240820.0, ans=0.05 +2024-09-17 14:39:27,567 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.834e+01 9.288e+01 9.876e+01 1.389e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-17 14:39:45,916 INFO [train.py:1198] (0/2) Epoch 14, batch 1400, loss[loss=0.2229, ctc_loss=0.128, cr_loss=0.3447, attn_decoder_loss=0.2258, over 29579.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1537, cr_loss=0.3941, attn_decoder_loss=0.2584, over 5807958.36 frames. ], batch size: 69, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:41:01,373 INFO [train.py:1198] (0/2) Epoch 14, batch 1450, loss[loss=0.2642, ctc_loss=0.1591, cr_loss=0.4143, attn_decoder_loss=0.2667, over 29453.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1535, cr_loss=0.3939, attn_decoder_loss=0.2587, over 5805378.07 frames. ], batch size: 94, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:41:10,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=241100.0, ans=0.125 +2024-09-17 14:41:17,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.46 vs. limit=15.0 +2024-09-17 14:41:28,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=241140.0, ans=0.125 +2024-09-17 14:41:42,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=241180.0, ans=0.125 +2024-09-17 14:41:53,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.61 vs. limit=15.0 +2024-09-17 14:41:58,396 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.760e+01 9.187e+01 9.748e+01 1.026e+02 3.155e+02, threshold=1.950e+02, percent-clipped=2.0 +2024-09-17 14:42:06,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=241260.0, ans=0.1 +2024-09-17 14:42:16,803 INFO [train.py:1198] (0/2) Epoch 14, batch 1500, loss[loss=0.2727, ctc_loss=0.1602, cr_loss=0.4277, attn_decoder_loss=0.2757, over 29644.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.154, cr_loss=0.3952, attn_decoder_loss=0.2595, over 5807204.68 frames. ], batch size: 86, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:42:35,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=241340.0, ans=0.2 +2024-09-17 14:43:11,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=241420.0, ans=0.1 +2024-09-17 14:43:16,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=241420.0, ans=0.125 +2024-09-17 14:43:24,495 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.20 vs. limit=15.0 +2024-09-17 14:43:26,069 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.71 vs. limit=15.0 +2024-09-17 14:43:33,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=241460.0, ans=0.125 +2024-09-17 14:43:37,276 INFO [train.py:1198] (0/2) Epoch 14, batch 1550, loss[loss=0.2745, ctc_loss=0.1689, cr_loss=0.4126, attn_decoder_loss=0.2771, over 29501.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1545, cr_loss=0.3957, attn_decoder_loss=0.2598, over 5782855.59 frames. ], batch size: 90, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:43:51,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=241540.0, ans=0.0 +2024-09-17 14:43:52,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=241540.0, ans=0.0 +2024-09-17 14:43:57,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.87 vs. limit=15.0 +2024-09-17 14:44:09,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=241580.0, ans=0.125 +2024-09-17 14:44:10,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=241580.0, ans=0.1 +2024-09-17 14:44:34,866 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.788e+01 9.004e+01 9.910e+01 1.078e+02 4.071e+02, threshold=1.982e+02, percent-clipped=2.0 +2024-09-17 14:44:44,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.52 vs. limit=22.5 +2024-09-17 14:44:46,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=241660.0, ans=0.025 +2024-09-17 14:44:50,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=241660.0, ans=0.125 +2024-09-17 14:44:53,190 INFO [train.py:1198] (0/2) Epoch 14, batch 1600, loss[loss=0.275, ctc_loss=0.1744, cr_loss=0.426, attn_decoder_loss=0.2767, over 29670.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1548, cr_loss=0.3955, attn_decoder_loss=0.2595, over 5765408.60 frames. ], batch size: 85, lr: 8.05e-03, grad_scale: 16.0 +2024-09-17 14:45:31,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=241780.0, ans=0.0 +2024-09-17 14:45:35,061 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.11 vs. limit=15.0 +2024-09-17 14:45:43,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=241820.0, ans=0.1 +2024-09-17 14:45:57,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=241860.0, ans=0.0 +2024-09-17 14:46:04,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=241860.0, ans=0.0 +2024-09-17 14:46:06,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=241860.0, ans=0.2 +2024-09-17 14:46:08,674 INFO [train.py:1198] (0/2) Epoch 14, batch 1650, loss[loss=0.2581, ctc_loss=0.1638, cr_loss=0.4097, attn_decoder_loss=0.2595, over 29696.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1547, cr_loss=0.3951, attn_decoder_loss=0.2592, over 5760055.59 frames. ], batch size: 89, lr: 8.05e-03, grad_scale: 8.0 +2024-09-17 14:46:52,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=241980.0, ans=0.1 +2024-09-17 14:47:12,401 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 8.773e+01 9.391e+01 1.036e+02 1.444e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 14:47:28,904 INFO [train.py:1198] (0/2) Epoch 14, batch 1700, loss[loss=0.2196, ctc_loss=0.1229, cr_loss=0.3368, attn_decoder_loss=0.2228, over 29590.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1539, cr_loss=0.3943, attn_decoder_loss=0.2586, over 5781712.23 frames. ], batch size: 69, lr: 8.05e-03, grad_scale: 8.0 +2024-09-17 14:47:48,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.64 vs. limit=22.5 +2024-09-17 14:48:04,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=242180.0, ans=0.07 +2024-09-17 14:48:05,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=242180.0, ans=0.125 +2024-09-17 14:48:17,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.06 vs. limit=15.0 +2024-09-17 14:48:25,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=242220.0, ans=0.0 +2024-09-17 14:48:28,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=242260.0, ans=0.04949747468305833 +2024-09-17 14:48:34,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=242260.0, ans=0.1 +2024-09-17 14:48:40,585 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:48:40,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=242260.0, ans=0.125 +2024-09-17 14:48:44,838 INFO [train.py:1198] (0/2) Epoch 14, batch 1750, loss[loss=0.2282, ctc_loss=0.1337, cr_loss=0.3677, attn_decoder_loss=0.2305, over 29351.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1537, cr_loss=0.3944, attn_decoder_loss=0.2584, over 5788070.99 frames. ], batch size: 67, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:49:07,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=242340.0, ans=0.125 +2024-09-17 14:49:30,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242420.0, ans=0.1 +2024-09-17 14:49:35,532 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:49:39,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242420.0, ans=0.1 +2024-09-17 14:49:41,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=242420.0, ans=0.0 +2024-09-17 14:49:44,145 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.412e+01 8.812e+01 9.337e+01 1.025e+02 2.569e+02, threshold=1.867e+02, percent-clipped=1.0 +2024-09-17 14:49:53,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=242460.0, ans=0.09899494936611666 +2024-09-17 14:50:00,705 INFO [train.py:1198] (0/2) Epoch 14, batch 1800, loss[loss=0.2816, ctc_loss=0.1807, cr_loss=0.449, attn_decoder_loss=0.2828, over 29688.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.154, cr_loss=0.3948, attn_decoder_loss=0.2587, over 5790977.29 frames. ], batch size: 83, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:50:23,164 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.14 vs. limit=15.0 +2024-09-17 14:50:24,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=242540.0, ans=15.0 +2024-09-17 14:50:32,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=242580.0, ans=0.125 +2024-09-17 14:50:33,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=242580.0, ans=0.2 +2024-09-17 14:50:33,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=242580.0, ans=0.025 +2024-09-17 14:50:36,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=242580.0, ans=0.125 +2024-09-17 14:50:48,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242620.0, ans=0.1 +2024-09-17 14:50:48,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=242620.0, ans=0.2 +2024-09-17 14:50:56,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=242620.0, ans=0.025 +2024-09-17 14:51:17,475 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.33 vs. limit=6.0 +2024-09-17 14:51:20,988 INFO [train.py:1198] (0/2) Epoch 14, batch 1850, loss[loss=0.2667, ctc_loss=0.1542, cr_loss=0.3967, attn_decoder_loss=0.2704, over 29649.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1538, cr_loss=0.3948, attn_decoder_loss=0.2588, over 5797135.17 frames. ], batch size: 86, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:51:28,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=242700.0, ans=0.1 +2024-09-17 14:51:34,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=242740.0, ans=0.0 +2024-09-17 14:51:35,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.00 vs. limit=15.0 +2024-09-17 14:51:40,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=242740.0, ans=0.1 +2024-09-17 14:52:05,202 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:52:11,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=242820.0, ans=0.125 +2024-09-17 14:52:15,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=242820.0, ans=0.05 +2024-09-17 14:52:18,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=242820.0, ans=0.0 +2024-09-17 14:52:19,872 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.993e+01 9.601e+01 1.027e+02 2.401e+02, threshold=1.920e+02, percent-clipped=1.0 +2024-09-17 14:52:36,268 INFO [train.py:1198] (0/2) Epoch 14, batch 1900, loss[loss=0.259, ctc_loss=0.1392, cr_loss=0.3807, attn_decoder_loss=0.2638, over 29710.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1538, cr_loss=0.3948, attn_decoder_loss=0.2591, over 5804903.88 frames. ], batch size: 89, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:53:10,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242980.0, ans=0.1 +2024-09-17 14:53:36,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.04 vs. limit=22.5 +2024-09-17 14:53:39,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=243060.0, ans=0.125 +2024-09-17 14:53:43,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=243060.0, ans=0.0 +2024-09-17 14:53:52,712 INFO [train.py:1198] (0/2) Epoch 14, batch 1950, loss[loss=0.2474, ctc_loss=0.1539, cr_loss=0.4006, attn_decoder_loss=0.2489, over 29438.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1546, cr_loss=0.3971, attn_decoder_loss=0.2601, over 5819482.67 frames. ], batch size: 78, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:54:26,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.10 vs. limit=22.5 +2024-09-17 14:54:33,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=243180.0, ans=0.125 +2024-09-17 14:54:48,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.88 vs. limit=10.0 +2024-09-17 14:54:57,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.855e+01 9.181e+01 9.574e+01 1.007e+02 1.903e+02, threshold=1.915e+02, percent-clipped=0.0 +2024-09-17 14:55:11,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=243300.0, ans=0.0 +2024-09-17 14:55:13,071 INFO [train.py:1198] (0/2) Epoch 14, batch 2000, loss[loss=0.2287, ctc_loss=0.1252, cr_loss=0.3569, attn_decoder_loss=0.2323, over 29346.00 frames. ], tot_loss[loss=0.2578, ctc_loss=0.155, cr_loss=0.3971, attn_decoder_loss=0.2604, over 5796240.74 frames. ], batch size: 67, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:55:28,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=243340.0, ans=0.025 +2024-09-17 14:56:02,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.83 vs. limit=15.0 +2024-09-17 14:56:06,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=243420.0, ans=0.1 +2024-09-17 14:56:29,003 INFO [train.py:1198] (0/2) Epoch 14, batch 2050, loss[loss=0.2256, ctc_loss=0.1263, cr_loss=0.3531, attn_decoder_loss=0.2288, over 29422.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1547, cr_loss=0.3957, attn_decoder_loss=0.2596, over 5787800.97 frames. ], batch size: 70, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:56:29,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=243500.0, ans=0.0 +2024-09-17 14:56:37,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.73 vs. limit=15.0 +2024-09-17 14:57:19,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=243620.0, ans=0.2 +2024-09-17 14:57:29,378 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.444e+01 8.883e+01 9.401e+01 1.013e+02 1.488e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 14:57:37,686 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.61 vs. limit=15.0 +2024-09-17 14:57:44,643 INFO [train.py:1198] (0/2) Epoch 14, batch 2100, loss[loss=0.255, ctc_loss=0.1526, cr_loss=0.4039, attn_decoder_loss=0.2574, over 29754.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1536, cr_loss=0.3942, attn_decoder_loss=0.2586, over 5799297.89 frames. ], batch size: 81, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:57:46,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=243700.0, ans=0.125 +2024-09-17 14:58:27,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=243780.0, ans=0.2 +2024-09-17 14:59:04,622 INFO [train.py:1198] (0/2) Epoch 14, batch 2150, loss[loss=0.2626, ctc_loss=0.1643, cr_loss=0.4281, attn_decoder_loss=0.264, over 29436.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1527, cr_loss=0.3932, attn_decoder_loss=0.2578, over 5814861.69 frames. ], batch size: 78, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:59:20,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=243940.0, ans=0.0 +2024-09-17 14:59:27,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=243940.0, ans=0.0 +2024-09-17 14:59:32,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=243940.0, ans=0.1 +2024-09-17 14:59:33,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=243980.0, ans=0.125 +2024-09-17 14:59:46,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=243980.0, ans=0.025 +2024-09-17 14:59:56,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=244020.0, ans=0.125 +2024-09-17 15:00:05,494 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.957e+01 9.631e+01 1.031e+02 4.379e+02, threshold=1.926e+02, percent-clipped=1.0 +2024-09-17 15:00:10,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=244060.0, ans=0.125 +2024-09-17 15:00:20,631 INFO [train.py:1198] (0/2) Epoch 14, batch 2200, loss[loss=0.2599, ctc_loss=0.1542, cr_loss=0.3899, attn_decoder_loss=0.263, over 29629.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1526, cr_loss=0.3932, attn_decoder_loss=0.2578, over 5811169.34 frames. ], batch size: 86, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:00:24,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=244100.0, ans=0.2 +2024-09-17 15:00:28,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=244100.0, ans=0.125 +2024-09-17 15:00:46,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=244140.0, ans=0.2 +2024-09-17 15:00:54,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=244180.0, ans=0.125 +2024-09-17 15:01:10,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=244220.0, ans=0.0 +2024-09-17 15:01:31,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.38 vs. limit=15.0 +2024-09-17 15:01:32,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.88 vs. limit=15.0 +2024-09-17 15:01:36,352 INFO [train.py:1198] (0/2) Epoch 14, batch 2250, loss[loss=0.2662, ctc_loss=0.1578, cr_loss=0.423, attn_decoder_loss=0.2688, over 29701.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1528, cr_loss=0.3934, attn_decoder_loss=0.258, over 5810835.89 frames. ], batch size: 82, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:01:44,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=244300.0, ans=0.0 +2024-09-17 15:01:54,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=244340.0, ans=0.125 +2024-09-17 15:02:06,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=244380.0, ans=0.1 +2024-09-17 15:02:10,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=12.0 +2024-09-17 15:02:19,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.11 vs. limit=15.0 +2024-09-17 15:02:40,758 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 8.724e+01 9.348e+01 1.021e+02 5.677e+02, threshold=1.870e+02, percent-clipped=2.0 +2024-09-17 15:02:49,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.94 vs. limit=15.0 +2024-09-17 15:02:56,102 INFO [train.py:1198] (0/2) Epoch 14, batch 2300, loss[loss=0.2329, ctc_loss=0.1261, cr_loss=0.3595, attn_decoder_loss=0.2368, over 29302.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1518, cr_loss=0.3915, attn_decoder_loss=0.2569, over 5797724.07 frames. ], batch size: 71, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:02:58,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=244500.0, ans=0.025 +2024-09-17 15:03:01,405 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-17 15:03:12,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=244540.0, ans=0.0 +2024-09-17 15:03:49,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.92 vs. limit=15.0 +2024-09-17 15:04:00,249 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.80 vs. limit=22.5 +2024-09-17 15:04:10,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=244700.0, ans=0.125 +2024-09-17 15:04:11,746 INFO [train.py:1198] (0/2) Epoch 14, batch 2350, loss[loss=0.2702, ctc_loss=0.1685, cr_loss=0.4192, attn_decoder_loss=0.2722, over 29704.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1524, cr_loss=0.3929, attn_decoder_loss=0.2576, over 5804128.79 frames. ], batch size: 83, lr: 8.00e-03, grad_scale: 8.0 +2024-09-17 15:04:29,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=244740.0, ans=0.125 +2024-09-17 15:04:33,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-17 15:04:45,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.13 vs. limit=22.5 +2024-09-17 15:05:12,275 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.793e+01 8.941e+01 9.524e+01 1.022e+02 1.702e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-17 15:05:15,038 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.09 vs. limit=12.0 +2024-09-17 15:05:20,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=244860.0, ans=0.1 +2024-09-17 15:05:24,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=244860.0, ans=0.0 +2024-09-17 15:05:26,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=244900.0, ans=0.09899494936611666 +2024-09-17 15:05:27,589 INFO [train.py:1198] (0/2) Epoch 14, batch 2400, loss[loss=0.2515, ctc_loss=0.1552, cr_loss=0.4046, attn_decoder_loss=0.2532, over 29539.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1533, cr_loss=0.3941, attn_decoder_loss=0.2585, over 5806919.45 frames. ], batch size: 76, lr: 8.00e-03, grad_scale: 16.0 +2024-09-17 15:05:27,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=244900.0, ans=0.125 +2024-09-17 15:05:39,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=244900.0, ans=0.07 +2024-09-17 15:05:56,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=244980.0, ans=0.125 +2024-09-17 15:06:08,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=244980.0, ans=0.125 +2024-09-17 15:06:13,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=245020.0, ans=0.125 +2024-09-17 15:06:14,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=245020.0, ans=0.125 +2024-09-17 15:06:19,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.48 vs. limit=22.5 +2024-09-17 15:06:26,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=245020.0, ans=0.125 +2024-09-17 15:06:29,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=245060.0, ans=0.0 +2024-09-17 15:06:32,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.73 vs. limit=15.0 +2024-09-17 15:06:44,548 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:06:45,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.37 vs. limit=15.0 +2024-09-17 15:06:45,766 INFO [train.py:1198] (0/2) Epoch 14, batch 2450, loss[loss=0.254, ctc_loss=0.145, cr_loss=0.3713, attn_decoder_loss=0.2579, over 29712.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1543, cr_loss=0.3957, attn_decoder_loss=0.2596, over 5785425.57 frames. ], batch size: 82, lr: 8.00e-03, grad_scale: 4.0 +2024-09-17 15:06:50,944 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.71 vs. limit=10.0 +2024-09-17 15:06:55,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.91 vs. limit=15.0 +2024-09-17 15:06:59,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=245140.0, ans=0.0 +2024-09-17 15:07:03,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.97 vs. limit=15.0 +2024-09-17 15:07:03,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=245140.0, ans=0.0 +2024-09-17 15:07:06,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.72 vs. limit=15.0 +2024-09-17 15:07:31,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=245220.0, ans=0.0 +2024-09-17 15:07:34,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=245220.0, ans=0.125 +2024-09-17 15:07:34,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.90 vs. limit=15.0 +2024-09-17 15:07:44,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-17 15:07:46,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.36 vs. limit=15.0 +2024-09-17 15:07:49,617 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.704e+01 8.888e+01 9.584e+01 1.028e+02 5.136e+02, threshold=1.917e+02, percent-clipped=2.0 +2024-09-17 15:08:01,731 INFO [train.py:1198] (0/2) Epoch 14, batch 2500, loss[loss=0.2616, ctc_loss=0.1494, cr_loss=0.3827, attn_decoder_loss=0.2656, over 29658.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1543, cr_loss=0.3955, attn_decoder_loss=0.2596, over 5796886.75 frames. ], batch size: 86, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:08:14,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=245300.0, ans=0.125 +2024-09-17 15:08:38,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=245380.0, ans=0.125 +2024-09-17 15:09:16,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=245500.0, ans=0.2 +2024-09-17 15:09:18,071 INFO [train.py:1198] (0/2) Epoch 14, batch 2550, loss[loss=0.24, ctc_loss=0.1484, cr_loss=0.3863, attn_decoder_loss=0.2416, over 29348.00 frames. ], tot_loss[loss=0.2567, ctc_loss=0.154, cr_loss=0.3949, attn_decoder_loss=0.2594, over 5800765.02 frames. ], batch size: 67, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:09:18,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=245500.0, ans=0.1 +2024-09-17 15:09:28,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.80 vs. limit=15.0 +2024-09-17 15:09:36,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=245540.0, ans=0.0 +2024-09-17 15:09:37,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=245540.0, ans=0.2 +2024-09-17 15:09:42,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=245540.0, ans=0.0 +2024-09-17 15:10:13,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=245620.0, ans=0.0 +2024-09-17 15:10:22,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=245660.0, ans=0.125 +2024-09-17 15:10:27,974 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.823e+01 9.211e+01 1.016e+02 2.509e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-17 15:10:31,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=245660.0, ans=0.1 +2024-09-17 15:10:38,504 INFO [train.py:1198] (0/2) Epoch 14, batch 2600, loss[loss=0.2449, ctc_loss=0.1398, cr_loss=0.3626, attn_decoder_loss=0.2485, over 29469.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.154, cr_loss=0.3946, attn_decoder_loss=0.2596, over 5796834.07 frames. ], batch size: 78, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:11:18,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=245780.0, ans=0.125 +2024-09-17 15:11:18,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.76 vs. limit=22.5 +2024-09-17 15:11:31,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.25 vs. limit=15.0 +2024-09-17 15:11:33,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=245820.0, ans=0.125 +2024-09-17 15:11:54,236 INFO [train.py:1198] (0/2) Epoch 14, batch 2650, loss[loss=0.2764, ctc_loss=0.1739, cr_loss=0.4298, attn_decoder_loss=0.2782, over 29308.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1543, cr_loss=0.3954, attn_decoder_loss=0.2598, over 5802504.93 frames. ], batch size: 100, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:11:54,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=245900.0, ans=0.0 +2024-09-17 15:11:54,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=245900.0, ans=0.02 +2024-09-17 15:11:55,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=245900.0, ans=0.1 +2024-09-17 15:12:32,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=245980.0, ans=0.125 +2024-09-17 15:12:41,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=246020.0, ans=0.125 +2024-09-17 15:12:51,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.28 vs. limit=15.0 +2024-09-17 15:12:59,459 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.821e+01 9.405e+01 9.920e+01 1.834e+02, threshold=1.881e+02, percent-clipped=0.0 +2024-09-17 15:12:59,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=246060.0, ans=0.125 +2024-09-17 15:13:02,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=246060.0, ans=0.125 +2024-09-17 15:13:10,174 INFO [train.py:1198] (0/2) Epoch 14, batch 2700, loss[loss=0.2668, ctc_loss=0.1636, cr_loss=0.4017, attn_decoder_loss=0.2693, over 29513.00 frames. ], tot_loss[loss=0.2571, ctc_loss=0.1541, cr_loss=0.3948, attn_decoder_loss=0.2598, over 5797414.77 frames. ], batch size: 87, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:13:35,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.88 vs. limit=15.0 +2024-09-17 15:13:37,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=246140.0, ans=0.125 +2024-09-17 15:13:55,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=246220.0, ans=0.1 +2024-09-17 15:13:55,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=246220.0, ans=0.125 +2024-09-17 15:14:04,044 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:14:30,686 INFO [train.py:1198] (0/2) Epoch 14, batch 2750, loss[loss=0.243, ctc_loss=0.1475, cr_loss=0.3977, attn_decoder_loss=0.2448, over 29531.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.153, cr_loss=0.3937, attn_decoder_loss=0.2585, over 5794672.57 frames. ], batch size: 75, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:14:53,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=246340.0, ans=0.07 +2024-09-17 15:15:01,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=246380.0, ans=0.2 +2024-09-17 15:15:35,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=246460.0, ans=0.025 +2024-09-17 15:15:36,359 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.822e+01 9.426e+01 1.011e+02 2.167e+02, threshold=1.885e+02, percent-clipped=1.0 +2024-09-17 15:15:41,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=246460.0, ans=0.025 +2024-09-17 15:15:42,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=246460.0, ans=0.125 +2024-09-17 15:15:47,144 INFO [train.py:1198] (0/2) Epoch 14, batch 2800, loss[loss=0.277, ctc_loss=0.1901, cr_loss=0.4241, attn_decoder_loss=0.2773, over 19850.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1534, cr_loss=0.3939, attn_decoder_loss=0.2587, over 5776466.45 frames. ], batch size: 209, lr: 7.97e-03, grad_scale: 16.0 +2024-09-17 15:15:57,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=246500.0, ans=0.1 +2024-09-17 15:16:01,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=246540.0, ans=0.0 +2024-09-17 15:16:09,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=246540.0, ans=0.1 +2024-09-17 15:16:11,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=246540.0, ans=0.0 +2024-09-17 15:16:22,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=246580.0, ans=0.125 +2024-09-17 15:16:32,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=246620.0, ans=0.0 +2024-09-17 15:16:46,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=246660.0, ans=0.0 +2024-09-17 15:16:46,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=246660.0, ans=0.0 +2024-09-17 15:16:58,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.20 vs. limit=15.0 +2024-09-17 15:17:02,338 INFO [train.py:1198] (0/2) Epoch 14, batch 2850, loss[loss=0.2367, ctc_loss=0.1362, cr_loss=0.3719, attn_decoder_loss=0.2396, over 29516.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1539, cr_loss=0.3942, attn_decoder_loss=0.259, over 5761316.69 frames. ], batch size: 77, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:17:22,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=246740.0, ans=0.09899494936611666 +2024-09-17 15:17:27,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.28 vs. limit=15.0 +2024-09-17 15:18:06,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=246860.0, ans=0.125 +2024-09-17 15:18:13,427 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.023e+01 9.108e+01 9.602e+01 1.037e+02 1.624e+02, threshold=1.920e+02, percent-clipped=0.0 +2024-09-17 15:18:22,639 INFO [train.py:1198] (0/2) Epoch 14, batch 2900, loss[loss=0.2389, ctc_loss=0.1355, cr_loss=0.3863, attn_decoder_loss=0.2419, over 29400.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1547, cr_loss=0.3966, attn_decoder_loss=0.2602, over 5786979.97 frames. ], batch size: 79, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:18:28,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=246900.0, ans=0.125 +2024-09-17 15:18:44,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=246940.0, ans=0.125 +2024-09-17 15:18:54,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=246980.0, ans=0.025 +2024-09-17 15:19:38,652 INFO [train.py:1198] (0/2) Epoch 14, batch 2950, loss[loss=0.2314, ctc_loss=0.127, cr_loss=0.3582, attn_decoder_loss=0.2351, over 29519.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1535, cr_loss=0.3947, attn_decoder_loss=0.2589, over 5781576.78 frames. ], batch size: 75, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:19:40,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.17 vs. limit=15.0 +2024-09-17 15:19:43,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=247100.0, ans=0.125 +2024-09-17 15:20:03,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=247140.0, ans=0.125 +2024-09-17 15:20:07,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.23 vs. limit=22.5 +2024-09-17 15:20:09,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=247180.0, ans=0.05 +2024-09-17 15:20:21,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.66 vs. limit=15.0 +2024-09-17 15:20:25,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=247220.0, ans=0.125 +2024-09-17 15:20:46,137 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.963e+01 9.607e+01 1.034e+02 4.390e+02, threshold=1.921e+02, percent-clipped=3.0 +2024-09-17 15:20:55,398 INFO [train.py:1198] (0/2) Epoch 14, batch 3000, loss[loss=0.2541, ctc_loss=0.1491, cr_loss=0.3749, attn_decoder_loss=0.2574, over 29737.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1535, cr_loss=0.3947, attn_decoder_loss=0.2587, over 5783242.02 frames. ], batch size: 81, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:20:55,399 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 15:21:06,924 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.8895, 5.6860, 5.5123, 5.1472], device='cuda:0') +2024-09-17 15:21:13,886 INFO [train.py:1230] (0/2) Epoch 14, validation: loss=0.212, ctc_loss=0.04343, cr_loss=5.03e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-17 15:21:13,886 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 15:21:23,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=247300.0, ans=0.125 +2024-09-17 15:21:25,914 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.65 vs. limit=15.0 +2024-09-17 15:21:37,132 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.83 vs. limit=15.0 +2024-09-17 15:22:16,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=247420.0, ans=0.0 +2024-09-17 15:22:34,534 INFO [train.py:1198] (0/2) Epoch 14, batch 3050, loss[loss=0.2484, ctc_loss=0.1519, cr_loss=0.3955, attn_decoder_loss=0.2503, over 29541.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1546, cr_loss=0.3968, attn_decoder_loss=0.2599, over 5777703.82 frames. ], batch size: 76, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:22:55,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=247540.0, ans=0.125 +2024-09-17 15:22:56,538 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.54 vs. limit=15.0 +2024-09-17 15:23:02,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.23 vs. limit=15.0 +2024-09-17 15:23:12,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=247580.0, ans=0.125 +2024-09-17 15:23:40,609 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.297e+01 9.179e+01 9.620e+01 1.029e+02 1.592e+02, threshold=1.924e+02, percent-clipped=0.0 +2024-09-17 15:23:49,652 INFO [train.py:1198] (0/2) Epoch 14, batch 3100, loss[loss=0.2578, ctc_loss=0.1534, cr_loss=0.3913, attn_decoder_loss=0.2607, over 29250.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1539, cr_loss=0.3954, attn_decoder_loss=0.2592, over 5777800.97 frames. ], batch size: 100, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:23:54,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=247700.0, ans=0.125 +2024-09-17 15:23:55,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=247700.0, ans=0.0 +2024-09-17 15:24:14,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=247740.0, ans=0.125 +2024-09-17 15:24:15,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=247740.0, ans=0.0 +2024-09-17 15:24:42,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.80 vs. limit=15.0 +2024-09-17 15:24:46,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=247820.0, ans=0.125 +2024-09-17 15:24:50,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.40 vs. limit=6.0 +2024-09-17 15:25:02,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=247860.0, ans=0.2 +2024-09-17 15:25:05,570 INFO [train.py:1198] (0/2) Epoch 14, batch 3150, loss[loss=0.2793, ctc_loss=0.1758, cr_loss=0.439, attn_decoder_loss=0.2811, over 28852.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1541, cr_loss=0.396, attn_decoder_loss=0.2594, over 5784014.90 frames. ], batch size: 104, lr: 7.95e-03, grad_scale: 4.0 +2024-09-17 15:25:14,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.91 vs. limit=10.0 +2024-09-17 15:25:17,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=247900.0, ans=0.025 +2024-09-17 15:25:24,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=247940.0, ans=0.0 +2024-09-17 15:25:26,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=247940.0, ans=0.125 +2024-09-17 15:25:27,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=247940.0, ans=0.125 +2024-09-17 15:25:35,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=247940.0, ans=0.125 +2024-09-17 15:25:44,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=247980.0, ans=0.02 +2024-09-17 15:26:03,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=248020.0, ans=0.125 +2024-09-17 15:26:18,271 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.880e+01 9.083e+01 9.655e+01 1.039e+02 2.253e+02, threshold=1.931e+02, percent-clipped=1.0 +2024-09-17 15:26:25,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.28 vs. limit=15.0 +2024-09-17 15:26:25,943 INFO [train.py:1198] (0/2) Epoch 14, batch 3200, loss[loss=0.2542, ctc_loss=0.1498, cr_loss=0.393, attn_decoder_loss=0.2571, over 29426.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1538, cr_loss=0.3952, attn_decoder_loss=0.2589, over 5795067.13 frames. ], batch size: 79, lr: 7.95e-03, grad_scale: 8.0 +2024-09-17 15:26:26,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.47 vs. limit=6.0 +2024-09-17 15:26:53,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=248140.0, ans=0.125 +2024-09-17 15:27:08,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=248180.0, ans=0.125 +2024-09-17 15:27:12,356 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.08 vs. limit=22.5 +2024-09-17 15:27:26,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=248260.0, ans=0.125 +2024-09-17 15:27:42,187 INFO [train.py:1198] (0/2) Epoch 14, batch 3250, loss[loss=0.2588, ctc_loss=0.1524, cr_loss=0.3927, attn_decoder_loss=0.2619, over 29725.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1538, cr_loss=0.3952, attn_decoder_loss=0.2592, over 5800572.53 frames. ], batch size: 84, lr: 7.95e-03, grad_scale: 8.0 +2024-09-17 15:27:43,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.85 vs. limit=22.5 +2024-09-17 15:28:15,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=248380.0, ans=0.125 +2024-09-17 15:28:24,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=248380.0, ans=0.0 +2024-09-17 15:28:37,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=248420.0, ans=0.125 +2024-09-17 15:28:49,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.916e+01 9.075e+01 9.653e+01 1.031e+02 3.050e+02, threshold=1.931e+02, percent-clipped=1.0 +2024-09-17 15:28:57,457 INFO [train.py:1198] (0/2) Epoch 14, batch 3300, loss[loss=0.2639, ctc_loss=0.1552, cr_loss=0.3807, attn_decoder_loss=0.2675, over 28328.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1523, cr_loss=0.3927, attn_decoder_loss=0.2575, over 5798526.54 frames. ], batch size: 111, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:29:00,145 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.99 vs. limit=10.0 +2024-09-17 15:29:13,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=248540.0, ans=0.025 +2024-09-17 15:29:13,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=248540.0, ans=0.125 +2024-09-17 15:29:13,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=248540.0, ans=0.2 +2024-09-17 15:29:24,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=248540.0, ans=0.0 +2024-09-17 15:29:29,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=248580.0, ans=0.125 +2024-09-17 15:30:09,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.43 vs. limit=15.0 +2024-09-17 15:30:17,821 INFO [train.py:1198] (0/2) Epoch 14, batch 3350, loss[loss=0.2702, ctc_loss=0.1761, cr_loss=0.4186, attn_decoder_loss=0.2714, over 28813.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1534, cr_loss=0.3943, attn_decoder_loss=0.2583, over 5774783.68 frames. ], batch size: 104, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:30:22,726 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:30:22,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=248700.0, ans=0.1 +2024-09-17 15:30:36,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=248740.0, ans=0.0 +2024-09-17 15:30:50,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=248780.0, ans=0.1 +2024-09-17 15:30:53,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=248780.0, ans=0.0 +2024-09-17 15:30:59,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=248780.0, ans=0.125 +2024-09-17 15:31:03,095 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.53 vs. limit=12.0 +2024-09-17 15:31:08,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-17 15:31:26,422 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.815e+01 9.128e+01 9.625e+01 1.037e+02 1.571e+02, threshold=1.925e+02, percent-clipped=0.0 +2024-09-17 15:31:27,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.55 vs. limit=15.0 +2024-09-17 15:31:34,176 INFO [train.py:1198] (0/2) Epoch 14, batch 3400, loss[loss=0.2318, ctc_loss=0.1405, cr_loss=0.361, attn_decoder_loss=0.2339, over 29341.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1535, cr_loss=0.3941, attn_decoder_loss=0.2583, over 5768521.76 frames. ], batch size: 67, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:31:36,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.18 vs. limit=15.0 +2024-09-17 15:31:43,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=248900.0, ans=0.125 +2024-09-17 15:31:51,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=248940.0, ans=0.125 +2024-09-17 15:31:55,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=248940.0, ans=0.2 +2024-09-17 15:32:16,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=248980.0, ans=0.1 +2024-09-17 15:32:50,236 INFO [train.py:1198] (0/2) Epoch 14, batch 3450, loss[loss=0.2789, ctc_loss=0.1687, cr_loss=0.4164, attn_decoder_loss=0.2819, over 28254.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1538, cr_loss=0.3949, attn_decoder_loss=0.2588, over 5775546.64 frames. ], batch size: 111, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:32:51,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=249100.0, ans=0.125 +2024-09-17 15:33:09,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=249140.0, ans=0.125 +2024-09-17 15:33:14,482 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.00 vs. limit=6.0 +2024-09-17 15:33:32,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=249180.0, ans=0.125 +2024-09-17 15:33:37,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.55 vs. limit=22.5 +2024-09-17 15:33:41,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_na.min_abs, batch_count=249220.0, ans=0.02 +2024-09-17 15:33:51,741 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.75 vs. limit=15.0 +2024-09-17 15:34:03,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.438e+01 8.913e+01 9.467e+01 9.956e+01 4.435e+02, threshold=1.893e+02, percent-clipped=2.0 +2024-09-17 15:34:10,846 INFO [train.py:1198] (0/2) Epoch 14, batch 3500, loss[loss=0.2394, ctc_loss=0.1417, cr_loss=0.3817, attn_decoder_loss=0.2418, over 29355.00 frames. ], tot_loss[loss=0.2556, ctc_loss=0.1532, cr_loss=0.3937, attn_decoder_loss=0.2583, over 5776803.72 frames. ], batch size: 71, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:34:35,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=249340.0, ans=0.125 +2024-09-17 15:34:49,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-17 15:35:04,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=249420.0, ans=0.0 +2024-09-17 15:35:11,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=249460.0, ans=0.0 +2024-09-17 15:35:22,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=249460.0, ans=10.0 +2024-09-17 15:35:25,488 INFO [train.py:1198] (0/2) Epoch 14, batch 3550, loss[loss=0.2655, ctc_loss=0.1504, cr_loss=0.3891, attn_decoder_loss=0.2697, over 29737.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1526, cr_loss=0.3934, attn_decoder_loss=0.2579, over 5782880.64 frames. ], batch size: 89, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:35:27,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=249500.0, ans=0.025 +2024-09-17 15:35:33,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.27 vs. limit=10.0 +2024-09-17 15:35:36,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=249500.0, ans=0.1 +2024-09-17 15:35:43,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=249540.0, ans=0.2 +2024-09-17 15:35:52,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=249540.0, ans=0.125 +2024-09-17 15:35:58,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=249580.0, ans=0.125 +2024-09-17 15:36:31,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=249660.0, ans=0.125 +2024-09-17 15:36:33,062 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.867e+01 9.428e+01 1.003e+02 3.029e+02, threshold=1.886e+02, percent-clipped=1.0 +2024-09-17 15:36:40,437 INFO [train.py:1198] (0/2) Epoch 14, batch 3600, loss[loss=0.2377, ctc_loss=0.1372, cr_loss=0.3609, attn_decoder_loss=0.2408, over 29516.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1527, cr_loss=0.3932, attn_decoder_loss=0.2582, over 5791676.02 frames. ], batch size: 77, lr: 7.92e-03, grad_scale: 16.0 +2024-09-17 15:37:00,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.32 vs. limit=15.0 +2024-09-17 15:37:02,706 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.42 vs. limit=22.5 +2024-09-17 15:37:04,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=249740.0, ans=0.0 +2024-09-17 15:37:07,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=249740.0, ans=0.125 +2024-09-17 15:37:12,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=249780.0, ans=0.0 +2024-09-17 15:37:12,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.93 vs. limit=15.0 +2024-09-17 15:37:16,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=249780.0, ans=0.1 +2024-09-17 15:37:26,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.72 vs. limit=22.5 +2024-09-17 15:37:33,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=249820.0, ans=0.125 +2024-09-17 15:37:36,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=249820.0, ans=0.09899494936611666 +2024-09-17 15:37:40,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=249860.0, ans=0.125 +2024-09-17 15:37:55,265 INFO [train.py:1198] (0/2) Epoch 14, batch 3650, loss[loss=0.2654, ctc_loss=0.1609, cr_loss=0.4105, attn_decoder_loss=0.2679, over 29483.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1522, cr_loss=0.3931, attn_decoder_loss=0.2578, over 5793872.93 frames. ], batch size: 90, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:38:18,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.23 vs. limit=22.5 +2024-09-17 15:38:20,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=249940.0, ans=0.125 +2024-09-17 15:38:46,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=250020.0, ans=0.1 +2024-09-17 15:38:54,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=250020.0, ans=0.1 +2024-09-17 15:38:55,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=250060.0, ans=0.125 +2024-09-17 15:39:05,889 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.885e+01 9.531e+01 1.024e+02 1.907e+02, threshold=1.906e+02, percent-clipped=1.0 +2024-09-17 15:39:06,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=250060.0, ans=0.0 +2024-09-17 15:39:11,991 INFO [train.py:1198] (0/2) Epoch 14, batch 3700, loss[loss=0.267, ctc_loss=0.1593, cr_loss=0.4002, attn_decoder_loss=0.27, over 29705.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1528, cr_loss=0.3939, attn_decoder_loss=0.2584, over 5803668.44 frames. ], batch size: 84, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:39:15,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=250100.0, ans=0.1 +2024-09-17 15:39:15,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=250100.0, ans=0.0 +2024-09-17 15:39:18,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=12.0 +2024-09-17 15:39:23,370 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.60 vs. limit=15.0 +2024-09-17 15:39:28,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=250140.0, ans=0.125 +2024-09-17 15:39:31,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-17 15:39:37,119 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.63 vs. limit=15.0 +2024-09-17 15:39:49,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=250180.0, ans=0.07 +2024-09-17 15:40:22,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=250260.0, ans=0.125 +2024-09-17 15:40:28,282 INFO [train.py:1198] (0/2) Epoch 14, batch 3750, loss[loss=0.224, ctc_loss=0.1268, cr_loss=0.3418, attn_decoder_loss=0.2272, over 29348.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1525, cr_loss=0.3936, attn_decoder_loss=0.2581, over 5806852.01 frames. ], batch size: 67, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:40:41,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=250340.0, ans=0.125 +2024-09-17 15:40:55,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=250340.0, ans=0.0 +2024-09-17 15:41:10,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=250380.0, ans=0.0 +2024-09-17 15:41:21,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=250420.0, ans=0.125 +2024-09-17 15:41:27,092 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:41:37,288 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.832e+01 9.041e+01 9.799e+01 1.080e+02 3.062e+02, threshold=1.960e+02, percent-clipped=2.0 +2024-09-17 15:41:42,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.84 vs. limit=10.0 +2024-09-17 15:41:43,358 INFO [train.py:1198] (0/2) Epoch 14, batch 3800, loss[loss=0.2611, ctc_loss=0.1454, cr_loss=0.3696, attn_decoder_loss=0.2657, over 29633.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1523, cr_loss=0.3926, attn_decoder_loss=0.2579, over 5796680.26 frames. ], batch size: 86, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:41:47,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=250500.0, ans=0.125 +2024-09-17 15:41:48,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=250500.0, ans=0.2 +2024-09-17 15:41:54,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=250500.0, ans=0.125 +2024-09-17 15:42:07,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff3.min_abs, batch_count=250540.0, ans=0.2 +2024-09-17 15:42:24,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.62 vs. limit=15.0 +2024-09-17 15:42:25,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.54 vs. limit=15.0 +2024-09-17 15:42:43,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=250660.0, ans=0.1 +2024-09-17 15:42:50,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=250660.0, ans=0.2 +2024-09-17 15:42:57,842 INFO [train.py:1198] (0/2) Epoch 14, batch 3850, loss[loss=0.2755, ctc_loss=0.1716, cr_loss=0.4305, attn_decoder_loss=0.2774, over 29295.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.1517, cr_loss=0.3925, attn_decoder_loss=0.2576, over 5809725.84 frames. ], batch size: 100, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:43:12,908 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:43:18,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=250740.0, ans=0.0 +2024-09-17 15:43:37,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.62 vs. limit=15.0 +2024-09-17 15:43:54,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.85 vs. limit=8.0 +2024-09-17 15:43:59,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=250860.0, ans=0.125 +2024-09-17 15:44:01,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=250860.0, ans=0.0 +2024-09-17 15:44:06,771 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.174e+01 8.879e+01 9.449e+01 1.016e+02 1.639e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-17 15:44:12,012 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.28 vs. limit=15.0 +2024-09-17 15:44:14,352 INFO [train.py:1198] (0/2) Epoch 14, batch 3900, loss[loss=0.2556, ctc_loss=0.1512, cr_loss=0.3891, attn_decoder_loss=0.2585, over 29623.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.152, cr_loss=0.3933, attn_decoder_loss=0.258, over 5815157.84 frames. ], batch size: 86, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:44:26,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=250900.0, ans=15.0 +2024-09-17 15:44:28,887 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.45 vs. limit=15.0 +2024-09-17 15:44:38,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.20 vs. limit=15.0 +2024-09-17 15:45:02,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=251020.0, ans=0.125 +2024-09-17 15:45:25,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=251060.0, ans=0.025 +2024-09-17 15:45:28,451 INFO [train.py:1198] (0/2) Epoch 14, batch 3950, loss[loss=0.2686, ctc_loss=0.1653, cr_loss=0.4261, attn_decoder_loss=0.2706, over 29473.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1512, cr_loss=0.3925, attn_decoder_loss=0.2577, over 5835004.13 frames. ], batch size: 97, lr: 7.90e-03, grad_scale: 8.0 +2024-09-17 15:45:42,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=251100.0, ans=0.0 +2024-09-17 15:45:49,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=251140.0, ans=0.2 +2024-09-17 15:46:07,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=251180.0, ans=0.125 +2024-09-17 15:46:07,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=251180.0, ans=0.09899494936611666 +2024-09-17 15:46:20,061 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.30 vs. limit=6.0 +2024-09-17 15:46:28,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=251260.0, ans=0.2 +2024-09-17 15:46:29,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.44 vs. limit=15.0 +2024-09-17 15:46:38,426 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.473e+01 8.707e+01 9.297e+01 1.013e+02 1.953e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-17 15:46:44,344 INFO [train.py:1198] (0/2) Epoch 14, batch 4000, loss[loss=0.2384, ctc_loss=0.1397, cr_loss=0.3676, attn_decoder_loss=0.2411, over 29489.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1516, cr_loss=0.3924, attn_decoder_loss=0.2577, over 5811830.55 frames. ], batch size: 74, lr: 7.90e-03, grad_scale: 16.0 +2024-09-17 15:46:44,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=251300.0, ans=0.025 +2024-09-17 15:46:47,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=251300.0, ans=0.125 +2024-09-17 15:46:53,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=251300.0, ans=0.125 +2024-09-17 15:47:02,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=251340.0, ans=0.0 +2024-09-17 15:47:19,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=251380.0, ans=0.95 +2024-09-17 15:47:27,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=251420.0, ans=0.125 +2024-09-17 15:47:39,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=251420.0, ans=0.05 +2024-09-17 15:47:42,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=251460.0, ans=0.0 +2024-09-17 15:47:57,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=251500.0, ans=0.2 +2024-09-17 15:47:58,628 INFO [train.py:1198] (0/2) Epoch 14, batch 4050, loss[loss=0.2935, ctc_loss=0.214, cr_loss=0.4319, attn_decoder_loss=0.2927, over 19526.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1519, cr_loss=0.3923, attn_decoder_loss=0.2578, over 5794382.49 frames. ], batch size: 209, lr: 7.90e-03, grad_scale: 8.0 +2024-09-17 15:48:01,295 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.90 vs. limit=8.0 +2024-09-17 15:48:01,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=251500.0, ans=0.04949747468305833 +2024-09-17 15:48:11,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=251540.0, ans=0.125 +2024-09-17 15:48:12,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.42 vs. limit=15.0 +2024-09-17 15:48:19,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=251540.0, ans=0.125 +2024-09-17 15:48:22,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=251540.0, ans=0.125 +2024-09-17 15:48:31,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=251580.0, ans=0.125 +2024-09-17 15:48:34,391 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.96 vs. limit=22.5 +2024-09-17 15:48:41,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=251620.0, ans=0.125 +2024-09-17 15:48:41,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.70 vs. limit=22.5 +2024-09-17 15:48:45,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=251620.0, ans=0.125 +2024-09-17 15:49:04,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=251660.0, ans=0.1 +2024-09-17 15:49:08,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.922e+01 9.172e+01 9.805e+01 1.134e+02 3.956e+02, threshold=1.961e+02, percent-clipped=2.0 +2024-09-17 15:49:13,316 INFO [train.py:1198] (0/2) Epoch 14, batch 4100, loss[loss=0.2777, ctc_loss=0.1684, cr_loss=0.4152, attn_decoder_loss=0.2807, over 29496.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1524, cr_loss=0.393, attn_decoder_loss=0.2581, over 5791613.65 frames. ], batch size: 90, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:49:28,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=251740.0, ans=0.0 +2024-09-17 15:49:42,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=251780.0, ans=0.2 +2024-09-17 15:49:43,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=251780.0, ans=0.0 +2024-09-17 15:49:51,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=251780.0, ans=0.0 +2024-09-17 15:50:09,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=12.0 +2024-09-17 15:50:28,383 INFO [train.py:1198] (0/2) Epoch 14, batch 4150, loss[loss=0.2511, ctc_loss=0.1545, cr_loss=0.4005, attn_decoder_loss=0.2529, over 29526.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1521, cr_loss=0.3928, attn_decoder_loss=0.2577, over 5796987.72 frames. ], batch size: 77, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:50:46,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.75 vs. limit=22.5 +2024-09-17 15:50:47,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=251940.0, ans=0.2 +2024-09-17 15:51:08,149 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.03 vs. limit=10.0 +2024-09-17 15:51:14,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=252020.0, ans=0.025 +2024-09-17 15:51:22,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=252020.0, ans=0.125 +2024-09-17 15:51:29,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=252060.0, ans=0.0 +2024-09-17 15:51:29,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.45 vs. limit=15.0 +2024-09-17 15:51:37,954 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.595e+01 8.795e+01 9.407e+01 9.935e+01 3.892e+02, threshold=1.881e+02, percent-clipped=3.0 +2024-09-17 15:51:42,419 INFO [train.py:1198] (0/2) Epoch 14, batch 4200, loss[loss=0.2751, ctc_loss=0.1785, cr_loss=0.4195, attn_decoder_loss=0.2765, over 29532.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1523, cr_loss=0.3938, attn_decoder_loss=0.258, over 5798155.89 frames. ], batch size: 90, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:51:47,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=252100.0, ans=0.125 +2024-09-17 15:51:50,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-17 15:51:58,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.03 vs. limit=10.0 +2024-09-17 15:52:22,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=252180.0, ans=0.0 +2024-09-17 15:52:24,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=252180.0, ans=0.2 +2024-09-17 15:52:25,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=252220.0, ans=0.0 +2024-09-17 15:52:28,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=252220.0, ans=10.0 +2024-09-17 15:52:38,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=252220.0, ans=0.125 +2024-09-17 15:52:56,848 INFO [train.py:1198] (0/2) Epoch 14, batch 4250, loss[loss=0.2294, ctc_loss=0.1222, cr_loss=0.3553, attn_decoder_loss=0.2334, over 29518.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1518, cr_loss=0.3934, attn_decoder_loss=0.2581, over 5804732.15 frames. ], batch size: 74, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:53:31,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=252380.0, ans=0.0 +2024-09-17 15:53:57,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=252460.0, ans=0.0 +2024-09-17 15:54:06,787 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 8.931e+01 9.548e+01 1.031e+02 6.441e+02, threshold=1.910e+02, percent-clipped=3.0 +2024-09-17 15:54:08,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=252460.0, ans=0.125 +2024-09-17 15:54:11,264 INFO [train.py:1198] (0/2) Epoch 14, batch 4300, loss[loss=0.2599, ctc_loss=0.1546, cr_loss=0.425, attn_decoder_loss=0.2622, over 29533.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1522, cr_loss=0.3936, attn_decoder_loss=0.2583, over 5793951.36 frames. ], batch size: 87, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:54:12,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.50 vs. limit=6.0 +2024-09-17 15:54:21,011 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.87 vs. limit=15.0 +2024-09-17 15:54:49,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.39 vs. limit=15.0 +2024-09-17 15:55:15,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=252660.0, ans=0.0 +2024-09-17 15:55:25,799 INFO [train.py:1198] (0/2) Epoch 14, batch 4350, loss[loss=0.2722, ctc_loss=0.1589, cr_loss=0.4135, attn_decoder_loss=0.2756, over 29489.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1549, cr_loss=0.3985, attn_decoder_loss=0.2616, over 5795244.14 frames. ], batch size: 97, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:55:30,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=252700.0, ans=0.125 +2024-09-17 15:55:54,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=252780.0, ans=0.0 +2024-09-17 15:55:56,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=252780.0, ans=0.0 +2024-09-17 15:56:09,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=252820.0, ans=0.125 +2024-09-17 15:56:12,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=252820.0, ans=0.0 +2024-09-17 15:56:16,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=252820.0, ans=0.0 +2024-09-17 15:56:27,371 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.87 vs. limit=15.0 +2024-09-17 15:56:35,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-17 15:56:35,458 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.459e+01 9.298e+01 9.711e+01 1.038e+02 2.895e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-17 15:56:39,910 INFO [train.py:1198] (0/2) Epoch 14, batch 4400, loss[loss=0.2583, ctc_loss=0.1569, cr_loss=0.3871, attn_decoder_loss=0.261, over 27302.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1568, cr_loss=0.4012, attn_decoder_loss=0.264, over 5766591.99 frames. ], batch size: 124, lr: 7.87e-03, grad_scale: 16.0 +2024-09-17 15:56:45,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=252900.0, ans=0.1 +2024-09-17 15:57:12,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=252980.0, ans=0.0 +2024-09-17 15:57:27,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=253020.0, ans=0.125 +2024-09-17 15:57:34,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=253020.0, ans=0.1 +2024-09-17 15:57:36,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=253020.0, ans=10.0 +2024-09-17 15:57:55,065 INFO [train.py:1198] (0/2) Epoch 14, batch 4450, loss[loss=0.2945, ctc_loss=0.2137, cr_loss=0.448, attn_decoder_loss=0.2935, over 19987.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1618, cr_loss=0.4055, attn_decoder_loss=0.2669, over 5579062.46 frames. ], batch size: 210, lr: 7.87e-03, grad_scale: 8.0 +2024-09-17 15:58:07,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=253100.0, ans=0.0 +2024-09-17 15:58:13,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=253140.0, ans=0.125 +2024-09-17 15:58:30,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=253180.0, ans=0.025 +2024-09-17 15:58:48,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=253220.0, ans=0.0 +2024-09-17 15:58:48,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=8.37 vs. limit=10.0 +2024-09-17 15:58:54,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=10.75 vs. limit=15.0 +2024-09-17 15:59:02,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=253260.0, ans=0.1 +2024-09-17 15:59:08,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.421e+01 1.004e+02 1.135e+02 1.248e+02 2.199e+02, threshold=2.271e+02, percent-clipped=1.0 +2024-09-17 15:59:09,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=253300.0, ans=0.0 +2024-09-17 15:59:10,364 INFO [train.py:1198] (0/2) Epoch 14, batch 4500, loss[loss=0.2795, ctc_loss=0.1869, cr_loss=0.3981, attn_decoder_loss=0.2809, over 20589.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1674, cr_loss=0.4073, attn_decoder_loss=0.2693, over 5236849.44 frames. ], batch size: 209, lr: 7.87e-03, grad_scale: 8.0 +2024-09-17 15:59:27,258 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:59:34,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=253340.0, ans=0.125 +2024-09-17 15:59:47,763 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-14.pt +2024-09-17 16:00:38,581 INFO [train.py:1198] (0/2) Epoch 15, batch 0, loss[loss=0.2413, ctc_loss=0.1374, cr_loss=0.3759, attn_decoder_loss=0.2445, over 29563.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1374, cr_loss=0.3759, attn_decoder_loss=0.2445, over 29563.00 frames. ], batch size: 73, lr: 7.60e-03, grad_scale: 16.0 +2024-09-17 16:00:38,582 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 16:00:56,922 INFO [train.py:1230] (0/2) Epoch 15, validation: loss=0.2128, ctc_loss=0.04201, cr_loss=5.567e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-17 16:00:56,923 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 16:00:58,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=253400.0, ans=0.0 +2024-09-17 16:01:02,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.75 vs. limit=15.0 +2024-09-17 16:01:19,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=253440.0, ans=0.125 +2024-09-17 16:01:39,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=253480.0, ans=0.2 +2024-09-17 16:01:48,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=253520.0, ans=0.125 +2024-09-17 16:02:03,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=253560.0, ans=0.025 +2024-09-17 16:02:09,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.11 vs. limit=10.0 +2024-09-17 16:02:15,230 INFO [train.py:1198] (0/2) Epoch 15, batch 50, loss[loss=0.2306, ctc_loss=0.1325, cr_loss=0.356, attn_decoder_loss=0.2336, over 29463.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.154, cr_loss=0.3981, attn_decoder_loss=0.2588, over 1269337.52 frames. ], batch size: 70, lr: 7.60e-03, grad_scale: 8.0 +2024-09-17 16:02:17,582 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.89 vs. limit=15.0 +2024-09-17 16:02:20,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=253600.0, ans=0.0 +2024-09-17 16:02:24,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=253600.0, ans=0.2 +2024-09-17 16:02:53,177 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.523e+01 9.844e+01 1.055e+02 1.171e+02 3.873e+02, threshold=2.109e+02, percent-clipped=1.0 +2024-09-17 16:03:21,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.89 vs. limit=22.5 +2024-09-17 16:03:33,144 INFO [train.py:1198] (0/2) Epoch 15, batch 100, loss[loss=0.2503, ctc_loss=0.1535, cr_loss=0.3923, attn_decoder_loss=0.2523, over 29538.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1565, cr_loss=0.4014, attn_decoder_loss=0.2617, over 2252512.09 frames. ], batch size: 76, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:03:39,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=253800.0, ans=0.1 +2024-09-17 16:03:56,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=253840.0, ans=0.0 +2024-09-17 16:04:00,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=253840.0, ans=0.125 +2024-09-17 16:04:31,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.40 vs. limit=15.0 +2024-09-17 16:04:47,906 INFO [train.py:1198] (0/2) Epoch 15, batch 150, loss[loss=0.2342, ctc_loss=0.1379, cr_loss=0.3832, attn_decoder_loss=0.2364, over 29405.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1529, cr_loss=0.3957, attn_decoder_loss=0.2587, over 3046931.10 frames. ], batch size: 70, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:04:50,446 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.94 vs. limit=10.0 +2024-09-17 16:05:08,446 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.33 vs. limit=15.0 +2024-09-17 16:05:10,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=254040.0, ans=0.2 +2024-09-17 16:05:25,782 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.837e+01 9.448e+01 1.022e+02 1.353e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-17 16:05:59,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=254160.0, ans=0.125 +2024-09-17 16:06:03,418 INFO [train.py:1198] (0/2) Epoch 15, batch 200, loss[loss=0.2586, ctc_loss=0.1502, cr_loss=0.3838, attn_decoder_loss=0.2621, over 27273.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1521, cr_loss=0.3947, attn_decoder_loss=0.2577, over 3658384.43 frames. ], batch size: 124, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:06:10,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=254200.0, ans=0.125 +2024-09-17 16:06:44,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=254280.0, ans=0.5 +2024-09-17 16:07:19,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=254360.0, ans=0.125 +2024-09-17 16:07:24,607 INFO [train.py:1198] (0/2) Epoch 15, batch 250, loss[loss=0.2749, ctc_loss=0.1682, cr_loss=0.4344, attn_decoder_loss=0.2771, over 29250.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.152, cr_loss=0.3949, attn_decoder_loss=0.2578, over 4140721.06 frames. ], batch size: 100, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:07:27,145 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.58 vs. limit=15.0 +2024-09-17 16:07:45,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=254440.0, ans=15.0 +2024-09-17 16:08:02,186 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.896e+01 9.283e+01 1.022e+02 2.095e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 16:08:26,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=254560.0, ans=0.125 +2024-09-17 16:08:39,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=254600.0, ans=0.125 +2024-09-17 16:08:40,127 INFO [train.py:1198] (0/2) Epoch 15, batch 300, loss[loss=0.2755, ctc_loss=0.1735, cr_loss=0.4303, attn_decoder_loss=0.2773, over 29516.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.1519, cr_loss=0.3943, attn_decoder_loss=0.2575, over 4510343.75 frames. ], batch size: 92, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:08:47,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=254600.0, ans=0.05 +2024-09-17 16:08:52,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=254600.0, ans=0.0 +2024-09-17 16:09:45,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=254760.0, ans=0.0 +2024-09-17 16:09:49,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.56 vs. limit=15.0 +2024-09-17 16:09:50,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=254760.0, ans=0.125 +2024-09-17 16:09:54,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=254800.0, ans=0.125 +2024-09-17 16:09:54,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=254800.0, ans=0.05 +2024-09-17 16:09:56,022 INFO [train.py:1198] (0/2) Epoch 15, batch 350, loss[loss=0.2247, ctc_loss=0.1305, cr_loss=0.3487, attn_decoder_loss=0.2274, over 29329.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1518, cr_loss=0.3951, attn_decoder_loss=0.2579, over 4795972.86 frames. ], batch size: 71, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:10:00,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.22 vs. limit=22.5 +2024-09-17 16:10:15,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=254840.0, ans=0.125 +2024-09-17 16:10:15,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=254840.0, ans=0.125 +2024-09-17 16:10:16,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=254840.0, ans=0.125 +2024-09-17 16:10:18,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=254840.0, ans=0.2 +2024-09-17 16:10:23,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.07 vs. limit=22.5 +2024-09-17 16:10:32,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.89 vs. limit=15.0 +2024-09-17 16:10:35,870 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.706e+01 9.394e+01 1.041e+02 2.813e+02, threshold=1.879e+02, percent-clipped=1.0 +2024-09-17 16:10:38,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-17 16:10:51,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=254920.0, ans=0.0 +2024-09-17 16:10:55,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=254920.0, ans=0.125 +2024-09-17 16:11:16,018 INFO [train.py:1198] (0/2) Epoch 15, batch 400, loss[loss=0.2605, ctc_loss=0.1531, cr_loss=0.3978, attn_decoder_loss=0.2636, over 29730.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1518, cr_loss=0.3947, attn_decoder_loss=0.2578, over 5024796.63 frames. ], batch size: 82, lr: 7.58e-03, grad_scale: 16.0 +2024-09-17 16:11:17,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=255000.0, ans=0.0 +2024-09-17 16:11:32,512 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.61 vs. limit=15.0 +2024-09-17 16:11:37,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=255040.0, ans=0.125 +2024-09-17 16:12:15,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=255160.0, ans=0.125 +2024-09-17 16:12:31,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=255200.0, ans=0.125 +2024-09-17 16:12:32,477 INFO [train.py:1198] (0/2) Epoch 15, batch 450, loss[loss=0.2729, ctc_loss=0.1576, cr_loss=0.4117, attn_decoder_loss=0.2766, over 29704.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1519, cr_loss=0.3942, attn_decoder_loss=0.258, over 5187054.86 frames. ], batch size: 83, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:12:35,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=255200.0, ans=0.05 +2024-09-17 16:12:46,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=255240.0, ans=0.125 +2024-09-17 16:12:50,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.29 vs. limit=22.5 +2024-09-17 16:12:54,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.72 vs. limit=22.5 +2024-09-17 16:13:09,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=255280.0, ans=0.125 +2024-09-17 16:13:10,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=255280.0, ans=0.0 +2024-09-17 16:13:11,919 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.843e+01 9.415e+01 1.015e+02 2.907e+02, threshold=1.883e+02, percent-clipped=1.0 +2024-09-17 16:13:16,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=255320.0, ans=0.125 +2024-09-17 16:13:37,352 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.33 vs. limit=15.0 +2024-09-17 16:13:38,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=255360.0, ans=0.2 +2024-09-17 16:13:48,522 INFO [train.py:1198] (0/2) Epoch 15, batch 500, loss[loss=0.2755, ctc_loss=0.1678, cr_loss=0.4305, attn_decoder_loss=0.2778, over 29436.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1509, cr_loss=0.3928, attn_decoder_loss=0.257, over 5329737.46 frames. ], batch size: 94, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:13:50,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=255400.0, ans=0.0 +2024-09-17 16:14:08,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=255440.0, ans=0.04949747468305833 +2024-09-17 16:14:21,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=255480.0, ans=0.0 +2024-09-17 16:14:22,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=255480.0, ans=0.0 +2024-09-17 16:15:07,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=255600.0, ans=0.125 +2024-09-17 16:15:08,807 INFO [train.py:1198] (0/2) Epoch 15, batch 550, loss[loss=0.2684, ctc_loss=0.159, cr_loss=0.4169, attn_decoder_loss=0.2713, over 28792.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.151, cr_loss=0.3923, attn_decoder_loss=0.2569, over 5421703.10 frames. ], batch size: 104, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:15:48,133 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.773e+01 8.993e+01 9.917e+01 1.076e+02 7.641e+02, threshold=1.983e+02, percent-clipped=4.0 +2024-09-17 16:15:51,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=255680.0, ans=0.0 +2024-09-17 16:16:06,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=255720.0, ans=0.125 +2024-09-17 16:16:10,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.56 vs. limit=12.0 +2024-09-17 16:16:24,510 INFO [train.py:1198] (0/2) Epoch 15, batch 600, loss[loss=0.2647, ctc_loss=0.158, cr_loss=0.4115, attn_decoder_loss=0.2674, over 29196.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1504, cr_loss=0.3913, attn_decoder_loss=0.2568, over 5507484.68 frames. ], batch size: 100, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:16:30,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.75 vs. limit=12.0 +2024-09-17 16:16:31,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=255800.0, ans=0.125 +2024-09-17 16:16:37,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.88 vs. limit=15.0 +2024-09-17 16:16:49,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=255840.0, ans=0.0 +2024-09-17 16:17:17,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=255920.0, ans=0.125 +2024-09-17 16:17:25,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=255960.0, ans=0.0 +2024-09-17 16:17:35,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=255960.0, ans=0.0 +2024-09-17 16:17:38,683 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-64000.pt +2024-09-17 16:17:47,272 INFO [train.py:1198] (0/2) Epoch 15, batch 650, loss[loss=0.2421, ctc_loss=0.139, cr_loss=0.3733, attn_decoder_loss=0.2453, over 29746.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.15, cr_loss=0.3907, attn_decoder_loss=0.2564, over 5584868.09 frames. ], batch size: 81, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:18:08,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=256040.0, ans=0.125 +2024-09-17 16:18:22,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=256080.0, ans=10.0 +2024-09-17 16:18:28,867 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.547e+01 9.070e+01 9.577e+01 1.264e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-17 16:18:42,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=256120.0, ans=0.0 +2024-09-17 16:18:53,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=256160.0, ans=0.0 +2024-09-17 16:19:07,304 INFO [train.py:1198] (0/2) Epoch 15, batch 700, loss[loss=0.243, ctc_loss=0.1436, cr_loss=0.3752, attn_decoder_loss=0.2457, over 29529.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1504, cr_loss=0.3918, attn_decoder_loss=0.257, over 5635969.80 frames. ], batch size: 76, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:20:08,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.05 vs. limit=22.5 +2024-09-17 16:20:10,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=256360.0, ans=10.0 +2024-09-17 16:20:23,553 INFO [train.py:1198] (0/2) Epoch 15, batch 750, loss[loss=0.256, ctc_loss=0.1482, cr_loss=0.3851, attn_decoder_loss=0.2595, over 29710.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1501, cr_loss=0.3911, attn_decoder_loss=0.2569, over 5675552.67 frames. ], batch size: 82, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:20:25,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=256400.0, ans=0.0 +2024-09-17 16:21:02,922 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.739e+01 9.295e+01 9.820e+01 3.813e+02, threshold=1.859e+02, percent-clipped=2.0 +2024-09-17 16:21:10,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=256520.0, ans=0.1 +2024-09-17 16:21:39,086 INFO [train.py:1198] (0/2) Epoch 15, batch 800, loss[loss=0.2232, ctc_loss=0.1239, cr_loss=0.3518, attn_decoder_loss=0.2265, over 29618.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1506, cr_loss=0.3919, attn_decoder_loss=0.2571, over 5706763.84 frames. ], batch size: 73, lr: 7.55e-03, grad_scale: 16.0 +2024-09-17 16:22:26,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=256720.0, ans=0.0 +2024-09-17 16:22:34,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=256720.0, ans=0.2 +2024-09-17 16:22:37,701 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=15.0 +2024-09-17 16:22:56,861 INFO [train.py:1198] (0/2) Epoch 15, batch 850, loss[loss=0.2681, ctc_loss=0.1592, cr_loss=0.4215, attn_decoder_loss=0.2708, over 29732.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1496, cr_loss=0.3903, attn_decoder_loss=0.2564, over 5736590.34 frames. ], batch size: 89, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:23:00,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=256800.0, ans=0.125 +2024-09-17 16:23:06,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=256800.0, ans=0.125 +2024-09-17 16:23:12,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=256840.0, ans=0.0 +2024-09-17 16:23:18,038 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.59 vs. limit=12.0 +2024-09-17 16:23:32,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=256880.0, ans=0.0 +2024-09-17 16:23:33,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=256880.0, ans=0.125 +2024-09-17 16:23:36,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=256880.0, ans=0.125 +2024-09-17 16:23:39,536 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.676e+01 9.240e+01 9.818e+01 3.041e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 16:23:46,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=256920.0, ans=0.07 +2024-09-17 16:23:51,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.51 vs. limit=15.0 +2024-09-17 16:24:14,685 INFO [train.py:1198] (0/2) Epoch 15, batch 900, loss[loss=0.2342, ctc_loss=0.1326, cr_loss=0.3706, attn_decoder_loss=0.2372, over 29592.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1501, cr_loss=0.3905, attn_decoder_loss=0.2568, over 5740625.06 frames. ], batch size: 73, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:24:15,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=257000.0, ans=0.2 +2024-09-17 16:24:37,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=257040.0, ans=0.2 +2024-09-17 16:24:59,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=257120.0, ans=0.0 +2024-09-17 16:25:14,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=257160.0, ans=0.5 +2024-09-17 16:25:17,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=257160.0, ans=0.07 +2024-09-17 16:25:27,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=257160.0, ans=0.0 +2024-09-17 16:25:30,433 INFO [train.py:1198] (0/2) Epoch 15, batch 950, loss[loss=0.23, ctc_loss=0.1265, cr_loss=0.3501, attn_decoder_loss=0.2337, over 29518.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1499, cr_loss=0.3902, attn_decoder_loss=0.2569, over 5742006.31 frames. ], batch size: 74, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:25:32,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.69 vs. limit=15.0 +2024-09-17 16:25:45,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=257240.0, ans=0.125 +2024-09-17 16:25:47,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=257240.0, ans=0.0 +2024-09-17 16:25:53,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=257240.0, ans=0.0 +2024-09-17 16:25:57,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.11 vs. limit=12.0 +2024-09-17 16:26:13,743 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.071e+01 9.347e+01 1.011e+02 1.116e+02 3.125e+02, threshold=2.021e+02, percent-clipped=4.0 +2024-09-17 16:26:19,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.60 vs. limit=10.0 +2024-09-17 16:26:50,490 INFO [train.py:1198] (0/2) Epoch 15, batch 1000, loss[loss=0.2416, ctc_loss=0.1357, cr_loss=0.3761, attn_decoder_loss=0.245, over 29519.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1511, cr_loss=0.3918, attn_decoder_loss=0.2577, over 5736406.60 frames. ], batch size: 77, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:26:55,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.22 vs. limit=10.0 +2024-09-17 16:27:04,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=257440.0, ans=0.125 +2024-09-17 16:27:05,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.87 vs. limit=22.5 +2024-09-17 16:28:02,463 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:28:06,975 INFO [train.py:1198] (0/2) Epoch 15, batch 1050, loss[loss=0.2621, ctc_loss=0.1541, cr_loss=0.3909, attn_decoder_loss=0.2655, over 29691.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1503, cr_loss=0.39, attn_decoder_loss=0.2569, over 5743830.88 frames. ], batch size: 85, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:28:27,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=257640.0, ans=0.125 +2024-09-17 16:28:28,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=257640.0, ans=0.0 +2024-09-17 16:28:28,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=257640.0, ans=0.0 +2024-09-17 16:28:30,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=257640.0, ans=0.0 +2024-09-17 16:28:48,451 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.880e+01 9.520e+01 1.043e+02 1.808e+02, threshold=1.904e+02, percent-clipped=0.0 +2024-09-17 16:28:55,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.33 vs. limit=6.0 +2024-09-17 16:29:23,481 INFO [train.py:1198] (0/2) Epoch 15, batch 1100, loss[loss=0.2488, ctc_loss=0.1476, cr_loss=0.3769, attn_decoder_loss=0.2517, over 29444.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1504, cr_loss=0.3906, attn_decoder_loss=0.257, over 5755748.26 frames. ], batch size: 78, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:29:28,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=257800.0, ans=0.0 +2024-09-17 16:29:35,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=257800.0, ans=0.07 +2024-09-17 16:30:03,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=257880.0, ans=0.125 +2024-09-17 16:30:05,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.54 vs. limit=15.0 +2024-09-17 16:30:43,787 INFO [train.py:1198] (0/2) Epoch 15, batch 1150, loss[loss=0.2485, ctc_loss=0.1492, cr_loss=0.4006, attn_decoder_loss=0.2506, over 29458.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1504, cr_loss=0.3904, attn_decoder_loss=0.2569, over 5755823.87 frames. ], batch size: 78, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:30:50,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=258000.0, ans=0.0 +2024-09-17 16:30:53,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=258000.0, ans=0.125 +2024-09-17 16:31:03,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=258040.0, ans=0.125 +2024-09-17 16:31:08,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=258040.0, ans=0.2 +2024-09-17 16:31:10,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=258040.0, ans=0.0 +2024-09-17 16:31:16,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=258080.0, ans=0.0 +2024-09-17 16:31:22,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=258080.0, ans=10.0 +2024-09-17 16:31:25,063 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.854e+01 8.950e+01 9.429e+01 1.052e+02 4.091e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-17 16:31:43,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=258160.0, ans=0.125 +2024-09-17 16:31:47,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=258160.0, ans=0.125 +2024-09-17 16:31:56,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.29 vs. limit=15.0 +2024-09-17 16:31:57,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=258160.0, ans=0.125 +2024-09-17 16:32:00,022 INFO [train.py:1198] (0/2) Epoch 15, batch 1200, loss[loss=0.262, ctc_loss=0.1578, cr_loss=0.3966, attn_decoder_loss=0.2648, over 29686.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1513, cr_loss=0.3921, attn_decoder_loss=0.2579, over 5748659.94 frames. ], batch size: 85, lr: 7.53e-03, grad_scale: 16.0 +2024-09-17 16:32:18,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=258240.0, ans=0.0 +2024-09-17 16:32:34,643 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.53 vs. limit=22.5 +2024-09-17 16:32:44,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.99 vs. limit=6.0 +2024-09-17 16:32:56,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=258320.0, ans=0.2 +2024-09-17 16:32:56,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=258320.0, ans=0.0 +2024-09-17 16:33:06,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=258360.0, ans=0.95 +2024-09-17 16:33:15,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=258400.0, ans=0.125 +2024-09-17 16:33:16,675 INFO [train.py:1198] (0/2) Epoch 15, batch 1250, loss[loss=0.2705, ctc_loss=0.1683, cr_loss=0.4284, attn_decoder_loss=0.2723, over 29544.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1516, cr_loss=0.3932, attn_decoder_loss=0.2583, over 5776231.78 frames. ], batch size: 92, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:33:26,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=258400.0, ans=0.125 +2024-09-17 16:33:29,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=258400.0, ans=0.0 +2024-09-17 16:33:37,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=258440.0, ans=0.025 +2024-09-17 16:33:46,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=258480.0, ans=0.0 +2024-09-17 16:33:59,437 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.854e+01 8.868e+01 9.518e+01 1.036e+02 1.703e+02, threshold=1.904e+02, percent-clipped=0.0 +2024-09-17 16:34:09,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=258520.0, ans=0.0 +2024-09-17 16:34:18,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=258560.0, ans=0.2 +2024-09-17 16:34:20,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=258560.0, ans=0.1 +2024-09-17 16:34:24,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=258560.0, ans=0.5 +2024-09-17 16:34:37,185 INFO [train.py:1198] (0/2) Epoch 15, batch 1300, loss[loss=0.2741, ctc_loss=0.166, cr_loss=0.4204, attn_decoder_loss=0.2768, over 28290.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.151, cr_loss=0.3925, attn_decoder_loss=0.2577, over 5780101.10 frames. ], batch size: 111, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:35:10,215 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.10 vs. limit=15.0 +2024-09-17 16:35:14,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=258680.0, ans=0.125 +2024-09-17 16:35:35,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=258720.0, ans=0.0 +2024-09-17 16:35:53,263 INFO [train.py:1198] (0/2) Epoch 15, batch 1350, loss[loss=0.2473, ctc_loss=0.1468, cr_loss=0.3856, attn_decoder_loss=0.2499, over 29761.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1503, cr_loss=0.3921, attn_decoder_loss=0.2572, over 5796704.36 frames. ], batch size: 81, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:36:18,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=258840.0, ans=22.5 +2024-09-17 16:36:32,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=258880.0, ans=0.125 +2024-09-17 16:36:35,392 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.925e+01 9.317e+01 1.009e+02 1.483e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-17 16:36:40,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=258920.0, ans=0.125 +2024-09-17 16:36:43,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.99 vs. limit=15.0 +2024-09-17 16:37:04,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=258960.0, ans=0.05 +2024-09-17 16:37:08,673 INFO [train.py:1198] (0/2) Epoch 15, batch 1400, loss[loss=0.2263, ctc_loss=0.1256, cr_loss=0.3269, attn_decoder_loss=0.2303, over 29559.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1499, cr_loss=0.391, attn_decoder_loss=0.257, over 5807329.97 frames. ], batch size: 69, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:37:31,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=259040.0, ans=0.125 +2024-09-17 16:37:51,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=259080.0, ans=0.1 +2024-09-17 16:38:19,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=259160.0, ans=0.125 +2024-09-17 16:38:19,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=259160.0, ans=0.1 +2024-09-17 16:38:27,012 INFO [train.py:1198] (0/2) Epoch 15, batch 1450, loss[loss=0.2669, ctc_loss=0.1698, cr_loss=0.4141, attn_decoder_loss=0.2685, over 29428.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1507, cr_loss=0.3921, attn_decoder_loss=0.2577, over 5803354.57 frames. ], batch size: 94, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:38:28,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=259200.0, ans=0.125 +2024-09-17 16:38:50,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=259240.0, ans=0.125 +2024-09-17 16:38:58,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=259280.0, ans=0.125 +2024-09-17 16:39:05,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=259280.0, ans=0.125 +2024-09-17 16:39:11,108 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.490e+01 8.827e+01 9.578e+01 1.049e+02 2.248e+02, threshold=1.916e+02, percent-clipped=2.0 +2024-09-17 16:39:41,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=259360.0, ans=0.125 +2024-09-17 16:39:44,393 INFO [train.py:1198] (0/2) Epoch 15, batch 1500, loss[loss=0.2647, ctc_loss=0.1536, cr_loss=0.413, attn_decoder_loss=0.2678, over 29647.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1511, cr_loss=0.3931, attn_decoder_loss=0.2582, over 5805023.82 frames. ], batch size: 86, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:39:44,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=259400.0, ans=0.125 +2024-09-17 16:39:47,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=259400.0, ans=0.125 +2024-09-17 16:39:49,718 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.01 vs. limit=6.0 +2024-09-17 16:40:35,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=259520.0, ans=0.125 +2024-09-17 16:40:40,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=259520.0, ans=0.125 +2024-09-17 16:40:44,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=259560.0, ans=0.125 +2024-09-17 16:40:50,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=259560.0, ans=0.1 +2024-09-17 16:40:55,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=259560.0, ans=0.125 +2024-09-17 16:41:00,899 INFO [train.py:1198] (0/2) Epoch 15, batch 1550, loss[loss=0.279, ctc_loss=0.1746, cr_loss=0.4483, attn_decoder_loss=0.2806, over 29518.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1518, cr_loss=0.3937, attn_decoder_loss=0.2583, over 5779703.75 frames. ], batch size: 90, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:41:09,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.05 vs. limit=8.0 +2024-09-17 16:41:25,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.58 vs. limit=12.0 +2024-09-17 16:41:28,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.46 vs. limit=15.0 +2024-09-17 16:41:31,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=259680.0, ans=0.125 +2024-09-17 16:41:42,748 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.774e+01 9.466e+01 1.042e+02 2.668e+02, threshold=1.893e+02, percent-clipped=3.0 +2024-09-17 16:41:59,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=259720.0, ans=0.125 +2024-09-17 16:42:03,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=259760.0, ans=0.125 +2024-09-17 16:42:12,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=259760.0, ans=0.125 +2024-09-17 16:42:19,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.12 vs. limit=15.0 +2024-09-17 16:42:20,434 INFO [train.py:1198] (0/2) Epoch 15, batch 1600, loss[loss=0.2586, ctc_loss=0.1542, cr_loss=0.4028, attn_decoder_loss=0.2612, over 29660.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1515, cr_loss=0.393, attn_decoder_loss=0.2579, over 5761772.11 frames. ], batch size: 85, lr: 7.51e-03, grad_scale: 16.0 +2024-09-17 16:42:35,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=259840.0, ans=0.025 +2024-09-17 16:42:52,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=259880.0, ans=0.125 +2024-09-17 16:42:58,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=259880.0, ans=0.125 +2024-09-17 16:43:01,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=259880.0, ans=0.125 +2024-09-17 16:43:04,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=259920.0, ans=0.0 +2024-09-17 16:43:10,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=259920.0, ans=0.125 +2024-09-17 16:43:15,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.34 vs. limit=12.0 +2024-09-17 16:43:15,999 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.76 vs. limit=15.0 +2024-09-17 16:43:36,457 INFO [train.py:1198] (0/2) Epoch 15, batch 1650, loss[loss=0.2681, ctc_loss=0.1583, cr_loss=0.3969, attn_decoder_loss=0.2715, over 29718.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1517, cr_loss=0.3931, attn_decoder_loss=0.258, over 5757871.51 frames. ], batch size: 89, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:43:56,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=260040.0, ans=0.2 +2024-09-17 16:44:08,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=260080.0, ans=0.2 +2024-09-17 16:44:11,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=260080.0, ans=0.2 +2024-09-17 16:44:17,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=260080.0, ans=0.025 +2024-09-17 16:44:20,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.336e+01 8.810e+01 9.523e+01 1.053e+02 2.945e+02, threshold=1.905e+02, percent-clipped=2.0 +2024-09-17 16:44:26,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=260120.0, ans=0.0 +2024-09-17 16:44:28,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=260120.0, ans=0.2 +2024-09-17 16:44:36,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.48 vs. limit=15.0 +2024-09-17 16:44:41,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=260160.0, ans=0.2 +2024-09-17 16:44:44,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=260160.0, ans=0.035 +2024-09-17 16:44:45,158 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.30 vs. limit=15.0 +2024-09-17 16:44:46,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=260160.0, ans=0.125 +2024-09-17 16:44:50,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=260200.0, ans=0.09899494936611666 +2024-09-17 16:44:51,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.57 vs. limit=15.0 +2024-09-17 16:44:51,638 INFO [train.py:1198] (0/2) Epoch 15, batch 1700, loss[loss=0.2294, ctc_loss=0.138, cr_loss=0.3753, attn_decoder_loss=0.2313, over 29560.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1515, cr_loss=0.3931, attn_decoder_loss=0.2579, over 5779649.02 frames. ], batch size: 69, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:44:54,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=260200.0, ans=0.2 +2024-09-17 16:44:54,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=260200.0, ans=0.0 +2024-09-17 16:44:56,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=260200.0, ans=0.2 +2024-09-17 16:45:04,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.48 vs. limit=15.0 +2024-09-17 16:45:16,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=260240.0, ans=0.125 +2024-09-17 16:45:17,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=260240.0, ans=0.0 +2024-09-17 16:45:23,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=260280.0, ans=0.125 +2024-09-17 16:45:23,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=260280.0, ans=0.0 +2024-09-17 16:45:52,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=260360.0, ans=0.1 +2024-09-17 16:46:11,460 INFO [train.py:1198] (0/2) Epoch 15, batch 1750, loss[loss=0.2236, ctc_loss=0.1288, cr_loss=0.3571, attn_decoder_loss=0.2262, over 29390.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1507, cr_loss=0.3922, attn_decoder_loss=0.2572, over 5789162.43 frames. ], batch size: 67, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:46:25,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=260440.0, ans=0.025 +2024-09-17 16:46:27,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=260440.0, ans=0.025 +2024-09-17 16:46:27,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.22 vs. limit=15.0 +2024-09-17 16:46:44,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.49 vs. limit=22.5 +2024-09-17 16:46:48,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=12.0 +2024-09-17 16:46:54,618 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.81 vs. limit=10.0 +2024-09-17 16:46:55,375 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.854e+01 9.659e+01 1.042e+02 2.660e+02, threshold=1.932e+02, percent-clipped=4.0 +2024-09-17 16:47:01,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=260520.0, ans=0.07 +2024-09-17 16:47:08,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=12.0 +2024-09-17 16:47:26,582 INFO [train.py:1198] (0/2) Epoch 15, batch 1800, loss[loss=0.2603, ctc_loss=0.1563, cr_loss=0.3984, attn_decoder_loss=0.263, over 29688.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1504, cr_loss=0.392, attn_decoder_loss=0.2571, over 5790405.36 frames. ], batch size: 83, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:47:39,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=260600.0, ans=0.0 +2024-09-17 16:47:49,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=260640.0, ans=0.0 +2024-09-17 16:47:56,723 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.96 vs. limit=12.0 +2024-09-17 16:48:08,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=260680.0, ans=0.125 +2024-09-17 16:48:29,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=260760.0, ans=0.125 +2024-09-17 16:48:35,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=260760.0, ans=0.125 +2024-09-17 16:48:37,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=260760.0, ans=0.2 +2024-09-17 16:48:43,406 INFO [train.py:1198] (0/2) Epoch 15, batch 1850, loss[loss=0.2576, ctc_loss=0.1452, cr_loss=0.3923, attn_decoder_loss=0.2614, over 29620.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1502, cr_loss=0.3921, attn_decoder_loss=0.2571, over 5797155.55 frames. ], batch size: 86, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:49:27,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.760e+01 9.313e+01 1.001e+02 1.511e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-17 16:49:36,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=260920.0, ans=0.0 +2024-09-17 16:50:01,057 INFO [train.py:1198] (0/2) Epoch 15, batch 1900, loss[loss=0.2616, ctc_loss=0.1449, cr_loss=0.3835, attn_decoder_loss=0.2661, over 29666.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1501, cr_loss=0.3928, attn_decoder_loss=0.2574, over 5805517.69 frames. ], batch size: 89, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:50:23,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=261040.0, ans=0.2 +2024-09-17 16:51:03,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.93 vs. limit=15.0 +2024-09-17 16:51:07,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=261160.0, ans=0.2 +2024-09-17 16:51:10,997 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.29 vs. limit=6.0 +2024-09-17 16:51:13,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=261160.0, ans=0.125 +2024-09-17 16:51:18,992 INFO [train.py:1198] (0/2) Epoch 15, batch 1950, loss[loss=0.2577, ctc_loss=0.1562, cr_loss=0.4006, attn_decoder_loss=0.2601, over 29449.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1511, cr_loss=0.3953, attn_decoder_loss=0.2587, over 5820124.12 frames. ], batch size: 78, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:51:35,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=261240.0, ans=0.0 +2024-09-17 16:51:56,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=261280.0, ans=0.2 +2024-09-17 16:52:02,591 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.191e+01 8.962e+01 9.463e+01 1.031e+02 5.545e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-17 16:52:27,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=261360.0, ans=0.1 +2024-09-17 16:52:34,222 INFO [train.py:1198] (0/2) Epoch 15, batch 2000, loss[loss=0.2207, ctc_loss=0.125, cr_loss=0.3377, attn_decoder_loss=0.2238, over 29363.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1517, cr_loss=0.3955, attn_decoder_loss=0.2591, over 5797425.66 frames. ], batch size: 67, lr: 7.48e-03, grad_scale: 16.0 +2024-09-17 16:53:00,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=261440.0, ans=0.125 +2024-09-17 16:53:11,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.25 vs. limit=10.0 +2024-09-17 16:53:16,170 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.62 vs. limit=15.0 +2024-09-17 16:53:34,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.16 vs. limit=15.0 +2024-09-17 16:53:54,998 INFO [train.py:1198] (0/2) Epoch 15, batch 2050, loss[loss=0.2258, ctc_loss=0.1226, cr_loss=0.3393, attn_decoder_loss=0.2297, over 29450.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.151, cr_loss=0.3934, attn_decoder_loss=0.2581, over 5790028.81 frames. ], batch size: 70, lr: 7.48e-03, grad_scale: 8.0 +2024-09-17 16:53:56,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=261600.0, ans=0.125 +2024-09-17 16:54:07,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=261600.0, ans=0.0 +2024-09-17 16:54:19,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=261640.0, ans=0.2 +2024-09-17 16:54:40,624 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 9.073e+01 9.648e+01 1.067e+02 2.180e+02, threshold=1.930e+02, percent-clipped=1.0 +2024-09-17 16:54:46,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=261720.0, ans=0.2 +2024-09-17 16:54:46,954 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:54:48,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=261720.0, ans=0.125 +2024-09-17 16:54:58,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=261760.0, ans=0.1 +2024-09-17 16:55:10,759 INFO [train.py:1198] (0/2) Epoch 15, batch 2100, loss[loss=0.2566, ctc_loss=0.1466, cr_loss=0.3848, attn_decoder_loss=0.2603, over 29762.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1499, cr_loss=0.391, attn_decoder_loss=0.257, over 5802229.02 frames. ], batch size: 81, lr: 7.48e-03, grad_scale: 8.0 +2024-09-17 16:55:12,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=261800.0, ans=0.0 +2024-09-17 16:55:13,326 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.95 vs. limit=22.5 +2024-09-17 16:55:42,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=261880.0, ans=0.2 +2024-09-17 16:55:50,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=261880.0, ans=10.0 +2024-09-17 16:55:56,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=261920.0, ans=0.0 +2024-09-17 16:56:25,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=262000.0, ans=0.125 +2024-09-17 16:56:26,348 INFO [train.py:1198] (0/2) Epoch 15, batch 2150, loss[loss=0.2517, ctc_loss=0.1524, cr_loss=0.4101, attn_decoder_loss=0.2536, over 29455.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1491, cr_loss=0.3902, attn_decoder_loss=0.2561, over 5816688.95 frames. ], batch size: 78, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:56:31,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=262000.0, ans=0.1 +2024-09-17 16:56:33,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.08 vs. limit=15.0 +2024-09-17 16:57:11,851 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.582e+01 8.623e+01 9.076e+01 9.705e+01 5.465e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-17 16:57:14,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.25 vs. limit=15.0 +2024-09-17 16:57:40,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=262200.0, ans=0.125 +2024-09-17 16:57:41,951 INFO [train.py:1198] (0/2) Epoch 15, batch 2200, loss[loss=0.2733, ctc_loss=0.1596, cr_loss=0.4107, attn_decoder_loss=0.2768, over 29628.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1496, cr_loss=0.3913, attn_decoder_loss=0.2568, over 5812763.10 frames. ], batch size: 86, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:57:45,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=262200.0, ans=0.125 +2024-09-17 16:57:59,570 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.48 vs. limit=6.0 +2024-09-17 16:58:03,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=262240.0, ans=0.1 +2024-09-17 16:58:18,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=262280.0, ans=0.025 +2024-09-17 16:58:18,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.04 vs. limit=15.0 +2024-09-17 16:58:48,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.87 vs. limit=22.5 +2024-09-17 16:58:54,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=262360.0, ans=0.125 +2024-09-17 16:59:02,695 INFO [train.py:1198] (0/2) Epoch 15, batch 2250, loss[loss=0.2589, ctc_loss=0.1521, cr_loss=0.4103, attn_decoder_loss=0.2616, over 29686.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1494, cr_loss=0.3911, attn_decoder_loss=0.2566, over 5813074.02 frames. ], batch size: 82, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:59:07,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=262400.0, ans=0.1 +2024-09-17 16:59:15,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=262400.0, ans=0.0 +2024-09-17 16:59:30,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=262440.0, ans=0.0 +2024-09-17 16:59:30,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=262440.0, ans=0.0 +2024-09-17 16:59:37,743 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:59:39,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=262480.0, ans=0.0 +2024-09-17 16:59:40,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=262480.0, ans=0.125 +2024-09-17 16:59:47,797 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.637e+01 9.303e+01 1.004e+02 1.390e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-17 17:00:04,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=262560.0, ans=0.125 +2024-09-17 17:00:06,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=262560.0, ans=0.125 +2024-09-17 17:00:10,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=262560.0, ans=0.025 +2024-09-17 17:00:18,400 INFO [train.py:1198] (0/2) Epoch 15, batch 2300, loss[loss=0.2306, ctc_loss=0.1286, cr_loss=0.3606, attn_decoder_loss=0.2339, over 29309.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1491, cr_loss=0.3902, attn_decoder_loss=0.2559, over 5800010.59 frames. ], batch size: 71, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 17:00:18,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=262600.0, ans=0.2 +2024-09-17 17:00:26,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=262600.0, ans=0.2 +2024-09-17 17:00:45,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=262640.0, ans=0.0 +2024-09-17 17:00:54,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=262680.0, ans=0.125 +2024-09-17 17:01:17,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=262760.0, ans=0.1 +2024-09-17 17:01:26,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=262760.0, ans=0.0 +2024-09-17 17:01:30,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=262760.0, ans=0.0 +2024-09-17 17:01:34,196 INFO [train.py:1198] (0/2) Epoch 15, batch 2350, loss[loss=0.2664, ctc_loss=0.1648, cr_loss=0.4124, attn_decoder_loss=0.2685, over 29679.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1498, cr_loss=0.3916, attn_decoder_loss=0.2563, over 5804849.86 frames. ], batch size: 83, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:01:58,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=262840.0, ans=0.0 +2024-09-17 17:02:06,644 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.35 vs. limit=12.0 +2024-09-17 17:02:22,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=262880.0, ans=15.0 +2024-09-17 17:02:24,197 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.797e+01 9.531e+01 1.053e+02 3.289e+02, threshold=1.906e+02, percent-clipped=2.0 +2024-09-17 17:02:29,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=262920.0, ans=0.125 +2024-09-17 17:02:30,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=262920.0, ans=0.0 +2024-09-17 17:02:39,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=262960.0, ans=0.125 +2024-09-17 17:02:54,793 INFO [train.py:1198] (0/2) Epoch 15, batch 2400, loss[loss=0.2388, ctc_loss=0.1325, cr_loss=0.361, attn_decoder_loss=0.2426, over 29531.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1502, cr_loss=0.392, attn_decoder_loss=0.2569, over 5809103.59 frames. ], batch size: 76, lr: 7.46e-03, grad_scale: 16.0 +2024-09-17 17:02:55,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=263000.0, ans=0.125 +2024-09-17 17:03:02,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=263000.0, ans=0.125 +2024-09-17 17:03:25,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=263080.0, ans=0.1 +2024-09-17 17:03:39,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=263120.0, ans=0.0 +2024-09-17 17:04:11,096 INFO [train.py:1198] (0/2) Epoch 15, batch 2450, loss[loss=0.2682, ctc_loss=0.1595, cr_loss=0.4289, attn_decoder_loss=0.2708, over 29702.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1508, cr_loss=0.3926, attn_decoder_loss=0.2576, over 5785484.69 frames. ], batch size: 82, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:04:27,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=263240.0, ans=0.125 +2024-09-17 17:04:57,816 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 9.083e+01 9.931e+01 1.099e+02 3.144e+02, threshold=1.986e+02, percent-clipped=3.0 +2024-09-17 17:05:08,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=263320.0, ans=0.125 +2024-09-17 17:05:10,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=263360.0, ans=0.025 +2024-09-17 17:05:14,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.98 vs. limit=10.0 +2024-09-17 17:05:26,721 INFO [train.py:1198] (0/2) Epoch 15, batch 2500, loss[loss=0.2719, ctc_loss=0.16, cr_loss=0.4033, attn_decoder_loss=0.2754, over 29629.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.1507, cr_loss=0.393, attn_decoder_loss=0.2576, over 5795320.07 frames. ], batch size: 86, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:05:29,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=263400.0, ans=0.125 +2024-09-17 17:05:41,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=263400.0, ans=0.125 +2024-09-17 17:06:12,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=263480.0, ans=0.125 +2024-09-17 17:06:26,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=263520.0, ans=0.125 +2024-09-17 17:06:36,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=263560.0, ans=0.0 +2024-09-17 17:06:41,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=263560.0, ans=0.0 +2024-09-17 17:06:44,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=263560.0, ans=0.0 +2024-09-17 17:06:47,418 INFO [train.py:1198] (0/2) Epoch 15, batch 2550, loss[loss=0.2165, ctc_loss=0.1169, cr_loss=0.337, attn_decoder_loss=0.22, over 29359.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1506, cr_loss=0.393, attn_decoder_loss=0.2576, over 5798390.17 frames. ], batch size: 67, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:07:25,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=263680.0, ans=0.125 +2024-09-17 17:07:25,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=263680.0, ans=0.0 +2024-09-17 17:07:32,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=263720.0, ans=0.125 +2024-09-17 17:07:34,221 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.892e+01 9.357e+01 1.015e+02 2.489e+02, threshold=1.871e+02, percent-clipped=2.0 +2024-09-17 17:07:35,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.58 vs. limit=15.0 +2024-09-17 17:07:39,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.37 vs. limit=15.0 +2024-09-17 17:07:43,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=263720.0, ans=0.2 +2024-09-17 17:07:57,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=263760.0, ans=0.125 +2024-09-17 17:08:02,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.30 vs. limit=22.5 +2024-09-17 17:08:03,405 INFO [train.py:1198] (0/2) Epoch 15, batch 2600, loss[loss=0.2536, ctc_loss=0.1521, cr_loss=0.3994, attn_decoder_loss=0.256, over 29447.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1513, cr_loss=0.394, attn_decoder_loss=0.2582, over 5794505.68 frames. ], batch size: 78, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:08:11,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=263800.0, ans=0.0 +2024-09-17 17:08:12,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=263800.0, ans=0.0 +2024-09-17 17:08:20,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=263840.0, ans=0.125 +2024-09-17 17:08:37,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.24 vs. limit=12.0 +2024-09-17 17:08:48,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=263920.0, ans=0.125 +2024-09-17 17:09:06,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=263960.0, ans=0.07 +2024-09-17 17:09:10,088 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.40 vs. limit=10.0 +2024-09-17 17:09:15,529 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:09:15,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=263960.0, ans=0.0 +2024-09-17 17:09:18,684 INFO [train.py:1198] (0/2) Epoch 15, batch 2650, loss[loss=0.2671, ctc_loss=0.1604, cr_loss=0.4012, attn_decoder_loss=0.27, over 29282.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1512, cr_loss=0.3936, attn_decoder_loss=0.2584, over 5801075.20 frames. ], batch size: 100, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:09:31,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.16 vs. limit=15.0 +2024-09-17 17:09:33,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=264000.0, ans=0.1 +2024-09-17 17:09:39,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=264040.0, ans=0.125 +2024-09-17 17:09:46,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.55 vs. limit=15.0 +2024-09-17 17:10:00,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=264080.0, ans=0.0 +2024-09-17 17:10:09,780 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 9.042e+01 9.386e+01 1.019e+02 2.005e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 17:10:10,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-17 17:10:12,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=264120.0, ans=15.0 +2024-09-17 17:10:28,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=264160.0, ans=0.125 +2024-09-17 17:10:38,713 INFO [train.py:1198] (0/2) Epoch 15, batch 2700, loss[loss=0.2666, ctc_loss=0.1605, cr_loss=0.4013, attn_decoder_loss=0.2694, over 29537.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1518, cr_loss=0.3943, attn_decoder_loss=0.2587, over 5797393.42 frames. ], batch size: 87, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:10:38,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=264200.0, ans=0.025 +2024-09-17 17:10:54,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=264240.0, ans=0.1 +2024-09-17 17:11:07,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=264280.0, ans=0.025 +2024-09-17 17:11:38,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=264360.0, ans=0.0 +2024-09-17 17:11:48,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff3.min_abs, batch_count=264360.0, ans=0.2 +2024-09-17 17:11:52,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=264360.0, ans=0.125 +2024-09-17 17:11:55,013 INFO [train.py:1198] (0/2) Epoch 15, batch 2750, loss[loss=0.25, ctc_loss=0.1545, cr_loss=0.3859, attn_decoder_loss=0.252, over 29520.00 frames. ], tot_loss[loss=0.2546, ctc_loss=0.1506, cr_loss=0.3925, attn_decoder_loss=0.2574, over 5796200.17 frames. ], batch size: 75, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:12:12,101 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:12:24,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.04 vs. limit=12.0 +2024-09-17 17:12:33,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.46 vs. limit=15.0 +2024-09-17 17:12:41,851 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.996e+01 9.846e+01 1.075e+02 1.941e+02, threshold=1.969e+02, percent-clipped=1.0 +2024-09-17 17:12:47,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=12.0 +2024-09-17 17:13:13,279 INFO [train.py:1198] (0/2) Epoch 15, batch 2800, loss[loss=0.267, ctc_loss=0.1715, cr_loss=0.3922, attn_decoder_loss=0.2689, over 20280.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1509, cr_loss=0.3927, attn_decoder_loss=0.2575, over 5776975.42 frames. ], batch size: 210, lr: 7.44e-03, grad_scale: 16.0 +2024-09-17 17:13:13,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=264600.0, ans=0.125 +2024-09-17 17:13:19,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=264600.0, ans=0.1 +2024-09-17 17:13:19,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=264600.0, ans=0.1 +2024-09-17 17:13:25,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=264600.0, ans=0.025 +2024-09-17 17:14:25,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=264760.0, ans=0.1 +2024-09-17 17:14:31,449 INFO [train.py:1198] (0/2) Epoch 15, batch 2850, loss[loss=0.2396, ctc_loss=0.1432, cr_loss=0.3893, attn_decoder_loss=0.2417, over 29498.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1513, cr_loss=0.393, attn_decoder_loss=0.2578, over 5763409.72 frames. ], batch size: 77, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:14:45,447 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:14:47,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.42 vs. limit=10.0 +2024-09-17 17:15:05,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=264880.0, ans=0.2 +2024-09-17 17:15:11,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=264880.0, ans=0.125 +2024-09-17 17:15:11,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=264880.0, ans=0.125 +2024-09-17 17:15:20,060 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.176e+01 9.061e+01 9.943e+01 1.094e+02 2.532e+02, threshold=1.989e+02, percent-clipped=2.0 +2024-09-17 17:15:47,574 INFO [train.py:1198] (0/2) Epoch 15, batch 2900, loss[loss=0.2446, ctc_loss=0.1425, cr_loss=0.3768, attn_decoder_loss=0.2476, over 29415.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1515, cr_loss=0.3943, attn_decoder_loss=0.2589, over 5788003.58 frames. ], batch size: 79, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:15:56,789 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:16:38,914 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.90 vs. limit=15.0 +2024-09-17 17:16:43,471 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.05 vs. limit=22.5 +2024-09-17 17:16:49,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.99 vs. limit=15.0 +2024-09-17 17:17:05,973 INFO [train.py:1198] (0/2) Epoch 15, batch 2950, loss[loss=0.2418, ctc_loss=0.141, cr_loss=0.3738, attn_decoder_loss=0.2447, over 29501.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1502, cr_loss=0.3922, attn_decoder_loss=0.2572, over 5782760.38 frames. ], batch size: 75, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:17:10,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=265200.0, ans=0.0 +2024-09-17 17:17:13,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=265200.0, ans=0.0 +2024-09-17 17:17:15,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=265200.0, ans=0.025 +2024-09-17 17:17:27,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=265240.0, ans=0.0 +2024-09-17 17:17:56,690 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.763e+01 8.981e+01 9.731e+01 1.093e+02 3.344e+02, threshold=1.946e+02, percent-clipped=1.0 +2024-09-17 17:18:05,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.45 vs. limit=6.0 +2024-09-17 17:18:07,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=265360.0, ans=0.0 +2024-09-17 17:18:10,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=265360.0, ans=0.0 +2024-09-17 17:18:13,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=265360.0, ans=0.1 +2024-09-17 17:18:24,078 INFO [train.py:1198] (0/2) Epoch 15, batch 3000, loss[loss=0.259, ctc_loss=0.1534, cr_loss=0.4145, attn_decoder_loss=0.2615, over 29751.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1499, cr_loss=0.3915, attn_decoder_loss=0.2572, over 5784078.60 frames. ], batch size: 81, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:18:24,079 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 17:18:42,411 INFO [train.py:1230] (0/2) Epoch 15, validation: loss=0.2111, ctc_loss=0.04175, cr_loss=4.872e-15, attn_decoder_loss=0.23, over 944034.00 frames. +2024-09-17 17:18:42,411 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 17:18:48,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.69 vs. limit=22.5 +2024-09-17 17:18:56,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=265440.0, ans=0.0 +2024-09-17 17:19:54,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=265560.0, ans=0.0 +2024-09-17 17:19:57,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=265600.0, ans=0.1 +2024-09-17 17:19:58,993 INFO [train.py:1198] (0/2) Epoch 15, batch 3050, loss[loss=0.239, ctc_loss=0.1377, cr_loss=0.3894, attn_decoder_loss=0.2416, over 29540.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1508, cr_loss=0.3933, attn_decoder_loss=0.2581, over 5777185.16 frames. ], batch size: 76, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:20:11,620 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:20:13,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=265640.0, ans=0.0 +2024-09-17 17:20:15,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=265640.0, ans=0.1 +2024-09-17 17:20:45,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=265720.0, ans=0.2 +2024-09-17 17:20:49,578 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.921e+01 9.345e+01 1.016e+02 1.110e+02 2.723e+02, threshold=2.032e+02, percent-clipped=2.0 +2024-09-17 17:20:51,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.30 vs. limit=10.0 +2024-09-17 17:20:59,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.43 vs. limit=6.0 +2024-09-17 17:21:03,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=265760.0, ans=0.0 +2024-09-17 17:21:16,563 INFO [train.py:1198] (0/2) Epoch 15, batch 3100, loss[loss=0.2721, ctc_loss=0.1563, cr_loss=0.4015, attn_decoder_loss=0.2761, over 29300.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1502, cr_loss=0.3927, attn_decoder_loss=0.2574, over 5777404.79 frames. ], batch size: 100, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:21:46,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.39 vs. limit=6.0 +2024-09-17 17:21:49,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.38 vs. limit=15.0 +2024-09-17 17:22:12,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=265920.0, ans=0.125 +2024-09-17 17:22:16,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=265920.0, ans=0.04949747468305833 +2024-09-17 17:22:34,714 INFO [train.py:1198] (0/2) Epoch 15, batch 3150, loss[loss=0.2675, ctc_loss=0.161, cr_loss=0.3955, attn_decoder_loss=0.2706, over 28827.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1498, cr_loss=0.3915, attn_decoder_loss=0.2572, over 5783964.54 frames. ], batch size: 104, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:22:38,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=266000.0, ans=0.1 +2024-09-17 17:22:41,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=266000.0, ans=0.0 +2024-09-17 17:22:48,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=266040.0, ans=0.0 +2024-09-17 17:22:50,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.41 vs. limit=15.0 +2024-09-17 17:23:01,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.99 vs. limit=15.0 +2024-09-17 17:23:23,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.876e+01 9.219e+01 9.735e+01 3.011e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-17 17:23:34,598 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.71 vs. limit=15.0 +2024-09-17 17:23:43,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.15 vs. limit=15.0 +2024-09-17 17:23:49,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=266200.0, ans=0.125 +2024-09-17 17:23:50,659 INFO [train.py:1198] (0/2) Epoch 15, batch 3200, loss[loss=0.2563, ctc_loss=0.1521, cr_loss=0.4137, attn_decoder_loss=0.2587, over 29430.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1494, cr_loss=0.3912, attn_decoder_loss=0.2568, over 5793834.60 frames. ], batch size: 79, lr: 7.42e-03, grad_scale: 16.0 +2024-09-17 17:24:12,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=266240.0, ans=0.0 +2024-09-17 17:24:14,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.75 vs. limit=22.5 +2024-09-17 17:24:22,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=266280.0, ans=0.125 +2024-09-17 17:24:30,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=266280.0, ans=0.125 +2024-09-17 17:24:36,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=266320.0, ans=0.1 +2024-09-17 17:24:55,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=266360.0, ans=0.025 +2024-09-17 17:25:09,281 INFO [train.py:1198] (0/2) Epoch 15, batch 3250, loss[loss=0.2574, ctc_loss=0.1425, cr_loss=0.3785, attn_decoder_loss=0.2617, over 29704.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1496, cr_loss=0.3919, attn_decoder_loss=0.2572, over 5800882.32 frames. ], batch size: 84, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:25:17,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=266400.0, ans=0.0 +2024-09-17 17:25:31,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=266440.0, ans=0.125 +2024-09-17 17:26:00,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.12 vs. limit=15.0 +2024-09-17 17:26:01,021 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.078e+01 8.753e+01 9.181e+01 1.001e+02 1.564e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-17 17:26:26,784 INFO [train.py:1198] (0/2) Epoch 15, batch 3300, loss[loss=0.2573, ctc_loss=0.1457, cr_loss=0.3755, attn_decoder_loss=0.2613, over 28326.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1484, cr_loss=0.39, attn_decoder_loss=0.2557, over 5798298.49 frames. ], batch size: 111, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:26:38,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.76 vs. limit=22.5 +2024-09-17 17:26:59,820 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-17 17:27:03,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=266680.0, ans=0.05 +2024-09-17 17:27:05,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=266680.0, ans=0.125 +2024-09-17 17:27:19,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=266720.0, ans=0.0 +2024-09-17 17:27:20,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.25 vs. limit=22.5 +2024-09-17 17:27:21,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=266720.0, ans=0.0 +2024-09-17 17:27:22,318 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.26 vs. limit=22.5 +2024-09-17 17:27:29,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=266760.0, ans=0.0 +2024-09-17 17:27:38,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=266760.0, ans=0.125 +2024-09-17 17:27:42,413 INFO [train.py:1198] (0/2) Epoch 15, batch 3350, loss[loss=0.2736, ctc_loss=0.1714, cr_loss=0.4245, attn_decoder_loss=0.2756, over 28936.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1495, cr_loss=0.3915, attn_decoder_loss=0.2568, over 5775258.89 frames. ], batch size: 104, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:27:47,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=266800.0, ans=0.025 +2024-09-17 17:27:47,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.12 vs. limit=10.0 +2024-09-17 17:28:05,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=266840.0, ans=0.0 +2024-09-17 17:28:08,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=266840.0, ans=0.05 +2024-09-17 17:28:11,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=266880.0, ans=0.95 +2024-09-17 17:28:23,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=266880.0, ans=0.2 +2024-09-17 17:28:34,702 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.002e+01 8.982e+01 9.740e+01 1.080e+02 2.374e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-17 17:28:35,371 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.52 vs. limit=15.0 +2024-09-17 17:28:42,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=266920.0, ans=0.1 +2024-09-17 17:28:48,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=266960.0, ans=0.125 +2024-09-17 17:28:57,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=266960.0, ans=0.125 +2024-09-17 17:29:00,482 INFO [train.py:1198] (0/2) Epoch 15, batch 3400, loss[loss=0.2286, ctc_loss=0.135, cr_loss=0.3675, attn_decoder_loss=0.2308, over 29323.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1496, cr_loss=0.3908, attn_decoder_loss=0.2568, over 5767254.96 frames. ], batch size: 67, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:29:06,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=267000.0, ans=0.2 +2024-09-17 17:29:14,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=267040.0, ans=0.125 +2024-09-17 17:29:27,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=267040.0, ans=0.0 +2024-09-17 17:29:30,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=267040.0, ans=0.125 +2024-09-17 17:29:42,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=267080.0, ans=0.125 +2024-09-17 17:29:56,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=267120.0, ans=0.025 +2024-09-17 17:30:01,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-17 17:30:05,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.51 vs. limit=15.0 +2024-09-17 17:30:18,827 INFO [train.py:1198] (0/2) Epoch 15, batch 3450, loss[loss=0.2599, ctc_loss=0.1528, cr_loss=0.3888, attn_decoder_loss=0.2632, over 28277.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.15, cr_loss=0.3919, attn_decoder_loss=0.2573, over 5776079.01 frames. ], batch size: 111, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:30:21,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.18 vs. limit=22.5 +2024-09-17 17:30:22,162 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:30:25,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=267200.0, ans=0.07 +2024-09-17 17:30:35,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=267240.0, ans=0.125 +2024-09-17 17:30:40,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=267240.0, ans=0.2 +2024-09-17 17:31:08,470 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.414e+01 8.765e+01 9.280e+01 9.883e+01 2.461e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-17 17:31:30,592 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.41 vs. limit=15.0 +2024-09-17 17:31:34,397 INFO [train.py:1198] (0/2) Epoch 15, batch 3500, loss[loss=0.2362, ctc_loss=0.1369, cr_loss=0.3832, attn_decoder_loss=0.2388, over 29327.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1493, cr_loss=0.3908, attn_decoder_loss=0.2566, over 5778309.39 frames. ], batch size: 71, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:31:45,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=267400.0, ans=0.0 +2024-09-17 17:31:54,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=267440.0, ans=0.0 +2024-09-17 17:31:58,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=267440.0, ans=0.0 +2024-09-17 17:32:11,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=267480.0, ans=0.125 +2024-09-17 17:32:19,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=267520.0, ans=0.125 +2024-09-17 17:32:28,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=267520.0, ans=0.2 +2024-09-17 17:32:33,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=267560.0, ans=0.125 +2024-09-17 17:32:42,544 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:32:51,219 INFO [train.py:1198] (0/2) Epoch 15, batch 3550, loss[loss=0.2562, ctc_loss=0.1403, cr_loss=0.3888, attn_decoder_loss=0.2605, over 29713.00 frames. ], tot_loss[loss=0.2538, ctc_loss=0.1491, cr_loss=0.3903, attn_decoder_loss=0.2567, over 5784464.16 frames. ], batch size: 89, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:33:04,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=267640.0, ans=0.0 +2024-09-17 17:33:10,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=267640.0, ans=0.125 +2024-09-17 17:33:13,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.21 vs. limit=22.5 +2024-09-17 17:33:39,995 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.842e+01 9.279e+01 9.951e+01 4.838e+02, threshold=1.856e+02, percent-clipped=2.0 +2024-09-17 17:34:00,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=267760.0, ans=0.125 +2024-09-17 17:34:05,140 INFO [train.py:1198] (0/2) Epoch 15, batch 3600, loss[loss=0.2483, ctc_loss=0.1351, cr_loss=0.3697, attn_decoder_loss=0.2527, over 29523.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.149, cr_loss=0.3908, attn_decoder_loss=0.2569, over 5792742.25 frames. ], batch size: 77, lr: 7.39e-03, grad_scale: 16.0 +2024-09-17 17:34:15,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=267800.0, ans=0.025 +2024-09-17 17:34:34,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=267840.0, ans=0.025 +2024-09-17 17:34:37,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=267880.0, ans=0.125 +2024-09-17 17:34:41,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.43 vs. limit=15.0 +2024-09-17 17:34:49,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=267880.0, ans=0.125 +2024-09-17 17:34:51,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=267920.0, ans=0.1 +2024-09-17 17:35:22,549 INFO [train.py:1198] (0/2) Epoch 15, batch 3650, loss[loss=0.26, ctc_loss=0.1472, cr_loss=0.3816, attn_decoder_loss=0.2641, over 29519.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1484, cr_loss=0.3896, attn_decoder_loss=0.2563, over 5795247.96 frames. ], batch size: 90, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:35:25,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=268000.0, ans=0.1 +2024-09-17 17:35:27,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=268000.0, ans=0.1 +2024-09-17 17:35:28,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=268000.0, ans=0.0 +2024-09-17 17:35:30,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=268000.0, ans=0.1 +2024-09-17 17:35:34,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=268000.0, ans=0.125 +2024-09-17 17:35:38,631 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.65 vs. limit=6.0 +2024-09-17 17:35:39,385 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:35:44,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.62 vs. limit=12.0 +2024-09-17 17:36:00,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=268080.0, ans=0.04949747468305833 +2024-09-17 17:36:13,525 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.685e+01 9.367e+01 9.867e+01 1.459e+02, threshold=1.873e+02, percent-clipped=0.0 +2024-09-17 17:36:24,820 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.04 vs. limit=22.5 +2024-09-17 17:36:29,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-17 17:36:30,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=268160.0, ans=0.125 +2024-09-17 17:36:37,436 INFO [train.py:1198] (0/2) Epoch 15, batch 3700, loss[loss=0.261, ctc_loss=0.1531, cr_loss=0.39, attn_decoder_loss=0.2643, over 29698.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1483, cr_loss=0.3895, attn_decoder_loss=0.2563, over 5804548.43 frames. ], batch size: 84, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:36:39,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=268200.0, ans=0.04949747468305833 +2024-09-17 17:36:48,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=268200.0, ans=0.125 +2024-09-17 17:37:02,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=268240.0, ans=0.1 +2024-09-17 17:37:43,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=268360.0, ans=0.0 +2024-09-17 17:37:51,820 INFO [train.py:1198] (0/2) Epoch 15, batch 3750, loss[loss=0.2284, ctc_loss=0.1328, cr_loss=0.3598, attn_decoder_loss=0.231, over 29324.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1483, cr_loss=0.3895, attn_decoder_loss=0.2561, over 5808547.96 frames. ], batch size: 67, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:38:03,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=268400.0, ans=0.0 +2024-09-17 17:38:05,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=268440.0, ans=0.125 +2024-09-17 17:38:08,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=268440.0, ans=0.125 +2024-09-17 17:38:25,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.13 vs. limit=15.0 +2024-09-17 17:38:42,232 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.742e+01 9.217e+01 9.952e+01 4.415e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-17 17:38:48,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=268520.0, ans=0.2 +2024-09-17 17:39:07,748 INFO [train.py:1198] (0/2) Epoch 15, batch 3800, loss[loss=0.2613, ctc_loss=0.1544, cr_loss=0.4144, attn_decoder_loss=0.2639, over 29639.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1483, cr_loss=0.3888, attn_decoder_loss=0.2558, over 5798923.00 frames. ], batch size: 86, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:39:35,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=268640.0, ans=0.1 +2024-09-17 17:39:42,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=268680.0, ans=0.0 +2024-09-17 17:39:53,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.97 vs. limit=6.0 +2024-09-17 17:39:56,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=268720.0, ans=0.0 +2024-09-17 17:40:06,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=268760.0, ans=0.0 +2024-09-17 17:40:12,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.58 vs. limit=10.0 +2024-09-17 17:40:15,414 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:40:24,555 INFO [train.py:1198] (0/2) Epoch 15, batch 3850, loss[loss=0.2711, ctc_loss=0.1621, cr_loss=0.4053, attn_decoder_loss=0.2743, over 29173.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.148, cr_loss=0.3886, attn_decoder_loss=0.2556, over 5812029.28 frames. ], batch size: 100, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:40:30,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.67 vs. limit=15.0 +2024-09-17 17:40:42,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=268840.0, ans=0.0 +2024-09-17 17:41:01,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.83 vs. limit=15.0 +2024-09-17 17:41:08,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=268920.0, ans=0.2 +2024-09-17 17:41:16,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 9.057e+01 9.751e+01 1.063e+02 2.027e+02, threshold=1.950e+02, percent-clipped=1.0 +2024-09-17 17:41:17,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=268920.0, ans=0.125 +2024-09-17 17:41:19,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=268920.0, ans=0.125 +2024-09-17 17:41:36,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=268960.0, ans=0.125 +2024-09-17 17:41:38,864 INFO [train.py:1198] (0/2) Epoch 15, batch 3900, loss[loss=0.256, ctc_loss=0.1493, cr_loss=0.3943, attn_decoder_loss=0.2591, over 29631.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1483, cr_loss=0.389, attn_decoder_loss=0.2559, over 5817144.81 frames. ], batch size: 86, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:41:42,257 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:41:42,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.78 vs. limit=15.0 +2024-09-17 17:41:55,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=269040.0, ans=0.125 +2024-09-17 17:42:08,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.81 vs. limit=22.5 +2024-09-17 17:42:24,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=269120.0, ans=0.0 +2024-09-17 17:42:39,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=269160.0, ans=0.125 +2024-09-17 17:42:49,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=269160.0, ans=0.5 +2024-09-17 17:42:52,609 INFO [train.py:1198] (0/2) Epoch 15, batch 3950, loss[loss=0.2598, ctc_loss=0.1508, cr_loss=0.3809, attn_decoder_loss=0.2635, over 29481.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1487, cr_loss=0.3904, attn_decoder_loss=0.2563, over 5836455.69 frames. ], batch size: 97, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:42:52,998 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:43:10,049 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.92 vs. limit=15.0 +2024-09-17 17:43:16,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=269240.0, ans=0.1 +2024-09-17 17:43:19,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=269240.0, ans=0.1 +2024-09-17 17:43:32,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=269280.0, ans=0.125 +2024-09-17 17:43:44,314 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.088e+01 9.576e+01 1.054e+02 2.878e+02, threshold=1.915e+02, percent-clipped=1.0 +2024-09-17 17:43:45,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.97 vs. limit=22.5 +2024-09-17 17:44:07,787 INFO [train.py:1198] (0/2) Epoch 15, batch 4000, loss[loss=0.2367, ctc_loss=0.129, cr_loss=0.348, attn_decoder_loss=0.241, over 29512.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.149, cr_loss=0.3899, attn_decoder_loss=0.2562, over 5812695.14 frames. ], batch size: 74, lr: 7.37e-03, grad_scale: 16.0 +2024-09-17 17:44:09,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.10 vs. limit=12.0 +2024-09-17 17:44:14,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.01 vs. limit=22.5 +2024-09-17 17:44:18,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=269400.0, ans=0.125 +2024-09-17 17:44:27,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=269440.0, ans=0.125 +2024-09-17 17:44:49,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.75 vs. limit=15.0 +2024-09-17 17:44:50,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=269520.0, ans=0.05 +2024-09-17 17:44:53,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=269520.0, ans=0.125 +2024-09-17 17:45:08,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=269560.0, ans=0.1 +2024-09-17 17:45:22,419 INFO [train.py:1198] (0/2) Epoch 15, batch 4050, loss[loss=0.2871, ctc_loss=0.2084, cr_loss=0.4481, attn_decoder_loss=0.2859, over 20184.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1489, cr_loss=0.3891, attn_decoder_loss=0.256, over 5796553.32 frames. ], batch size: 210, lr: 7.37e-03, grad_scale: 8.0 +2024-09-17 17:46:16,373 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 8.913e+01 9.474e+01 1.030e+02 4.406e+02, threshold=1.895e+02, percent-clipped=2.0 +2024-09-17 17:46:25,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=269760.0, ans=0.125 +2024-09-17 17:46:37,091 INFO [train.py:1198] (0/2) Epoch 15, batch 4100, loss[loss=0.2635, ctc_loss=0.1552, cr_loss=0.3957, attn_decoder_loss=0.2667, over 29503.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.149, cr_loss=0.3892, attn_decoder_loss=0.2561, over 5791044.48 frames. ], batch size: 90, lr: 7.37e-03, grad_scale: 8.0 +2024-09-17 17:46:53,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=269840.0, ans=0.2 +2024-09-17 17:46:55,471 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.61 vs. limit=6.0 +2024-09-17 17:47:00,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=269840.0, ans=0.125 +2024-09-17 17:47:09,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=269880.0, ans=0.0 +2024-09-17 17:47:14,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=269880.0, ans=0.0 +2024-09-17 17:47:36,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=269960.0, ans=0.125 +2024-09-17 17:47:50,742 INFO [train.py:1198] (0/2) Epoch 15, batch 4150, loss[loss=0.2539, ctc_loss=0.1519, cr_loss=0.3934, attn_decoder_loss=0.2565, over 29511.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1488, cr_loss=0.389, attn_decoder_loss=0.256, over 5796622.69 frames. ], batch size: 77, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:47:58,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=270000.0, ans=0.0 +2024-09-17 17:48:21,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=270080.0, ans=0.0 +2024-09-17 17:48:41,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=270120.0, ans=0.125 +2024-09-17 17:48:43,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.23 vs. limit=15.0 +2024-09-17 17:48:45,089 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.781e+01 9.267e+01 9.931e+01 2.534e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-17 17:49:00,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=270160.0, ans=0.1 +2024-09-17 17:49:02,507 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.51 vs. limit=22.5 +2024-09-17 17:49:06,155 INFO [train.py:1198] (0/2) Epoch 15, batch 4200, loss[loss=0.2727, ctc_loss=0.1663, cr_loss=0.4252, attn_decoder_loss=0.2751, over 29483.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.1492, cr_loss=0.3906, attn_decoder_loss=0.2565, over 5799218.26 frames. ], batch size: 90, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:49:07,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=270200.0, ans=0.125 +2024-09-17 17:49:18,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=270200.0, ans=0.0 +2024-09-17 17:49:22,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=270240.0, ans=0.125 +2024-09-17 17:49:23,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-17 17:49:51,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=270320.0, ans=0.05 +2024-09-17 17:49:55,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=270320.0, ans=0.2 +2024-09-17 17:49:59,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=270320.0, ans=0.025 +2024-09-17 17:50:01,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=270320.0, ans=0.0 +2024-09-17 17:50:21,303 INFO [train.py:1198] (0/2) Epoch 15, batch 4250, loss[loss=0.2338, ctc_loss=0.1343, cr_loss=0.3669, attn_decoder_loss=0.2367, over 29511.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.149, cr_loss=0.3903, attn_decoder_loss=0.2564, over 5805216.19 frames. ], batch size: 74, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:50:31,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=270400.0, ans=0.1 +2024-09-17 17:50:44,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=270440.0, ans=0.125 +2024-09-17 17:51:14,267 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.903e+01 9.469e+01 1.020e+02 2.237e+02, threshold=1.894e+02, percent-clipped=2.0 +2024-09-17 17:51:35,157 INFO [train.py:1198] (0/2) Epoch 15, batch 4300, loss[loss=0.2652, ctc_loss=0.1595, cr_loss=0.3996, attn_decoder_loss=0.2681, over 29538.00 frames. ], tot_loss[loss=0.2538, ctc_loss=0.1493, cr_loss=0.3907, attn_decoder_loss=0.2567, over 5794554.82 frames. ], batch size: 87, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:51:35,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=12.0 +2024-09-17 17:51:40,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=270600.0, ans=0.125 +2024-09-17 17:51:45,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=270600.0, ans=0.125 +2024-09-17 17:52:11,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.49 vs. limit=10.0 +2024-09-17 17:52:30,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.43 vs. limit=6.0 +2024-09-17 17:52:47,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=270760.0, ans=0.07 +2024-09-17 17:52:50,150 INFO [train.py:1198] (0/2) Epoch 15, batch 4350, loss[loss=0.2686, ctc_loss=0.1691, cr_loss=0.4199, attn_decoder_loss=0.2703, over 29480.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1521, cr_loss=0.396, attn_decoder_loss=0.2601, over 5796972.83 frames. ], batch size: 97, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:53:10,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=270840.0, ans=0.2 +2024-09-17 17:53:11,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=270840.0, ans=0.1 +2024-09-17 17:53:26,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=270880.0, ans=0.04949747468305833 +2024-09-17 17:53:33,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=270920.0, ans=0.0 +2024-09-17 17:53:43,653 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.462e+01 9.074e+01 9.575e+01 1.004e+02 1.676e+02, threshold=1.915e+02, percent-clipped=0.0 +2024-09-17 17:53:46,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=270920.0, ans=0.125 +2024-09-17 17:54:04,393 INFO [train.py:1198] (0/2) Epoch 15, batch 4400, loss[loss=0.2732, ctc_loss=0.1709, cr_loss=0.4242, attn_decoder_loss=0.2752, over 27497.00 frames. ], tot_loss[loss=0.2595, ctc_loss=0.1537, cr_loss=0.3984, attn_decoder_loss=0.2624, over 5768045.99 frames. ], batch size: 124, lr: 7.35e-03, grad_scale: 16.0 +2024-09-17 17:54:26,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.48 vs. limit=15.0 +2024-09-17 17:54:59,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=271120.0, ans=0.125 +2024-09-17 17:55:06,265 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.62 vs. limit=10.0 +2024-09-17 17:55:07,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=271160.0, ans=0.125 +2024-09-17 17:55:12,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=271160.0, ans=0.0 +2024-09-17 17:55:15,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=271160.0, ans=0.1 +2024-09-17 17:55:15,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.20 vs. limit=15.0 +2024-09-17 17:55:19,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.81 vs. limit=15.0 +2024-09-17 17:55:19,873 INFO [train.py:1198] (0/2) Epoch 15, batch 4450, loss[loss=0.2903, ctc_loss=0.1907, cr_loss=0.4169, attn_decoder_loss=0.2921, over 19964.00 frames. ], tot_loss[loss=0.2626, ctc_loss=0.1585, cr_loss=0.4026, attn_decoder_loss=0.2652, over 5582213.43 frames. ], batch size: 210, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:55:21,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=271200.0, ans=0.07 +2024-09-17 17:55:33,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=271240.0, ans=0.125 +2024-09-17 17:55:38,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=271240.0, ans=0.0 +2024-09-17 17:55:47,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=271240.0, ans=10.0 +2024-09-17 17:56:17,157 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.262e+01 9.506e+01 1.069e+02 1.178e+02 1.981e+02, threshold=2.138e+02, percent-clipped=1.0 +2024-09-17 17:56:17,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=271320.0, ans=0.0 +2024-09-17 17:56:26,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=271360.0, ans=0.125 +2024-09-17 17:56:28,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.03 vs. limit=15.0 +2024-09-17 17:56:34,790 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.36 vs. limit=15.0 +2024-09-17 17:56:35,243 INFO [train.py:1198] (0/2) Epoch 15, batch 4500, loss[loss=0.2829, ctc_loss=0.1976, cr_loss=0.429, attn_decoder_loss=0.2828, over 20017.00 frames. ], tot_loss[loss=0.2658, ctc_loss=0.1646, cr_loss=0.4051, attn_decoder_loss=0.2681, over 5242031.97 frames. ], batch size: 209, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:56:41,954 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.58 vs. limit=15.0 +2024-09-17 17:57:03,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=271480.0, ans=0.1 +2024-09-17 17:57:12,404 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-15.pt +2024-09-17 17:58:05,194 INFO [train.py:1198] (0/2) Epoch 16, batch 0, loss[loss=0.2381, ctc_loss=0.1341, cr_loss=0.3719, attn_decoder_loss=0.2414, over 29607.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1341, cr_loss=0.3719, attn_decoder_loss=0.2414, over 29607.00 frames. ], batch size: 73, lr: 7.11e-03, grad_scale: 16.0 +2024-09-17 17:58:05,195 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 17:58:11,275 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([4.6519, 4.1508, 3.7418, 4.4602, 3.5596, 3.6036, 3.7345, 3.8424], + device='cuda:0') +2024-09-17 17:58:23,633 INFO [train.py:1230] (0/2) Epoch 16, validation: loss=0.2124, ctc_loss=0.04089, cr_loss=4.638e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-17 17:58:23,633 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 17:59:07,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=271580.0, ans=0.1 +2024-09-17 17:59:31,701 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:59:34,718 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:59:37,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=271660.0, ans=0.2 +2024-09-17 17:59:40,418 INFO [train.py:1198] (0/2) Epoch 16, batch 50, loss[loss=0.2235, ctc_loss=0.1261, cr_loss=0.3493, attn_decoder_loss=0.2265, over 29473.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1504, cr_loss=0.3931, attn_decoder_loss=0.2571, over 1269053.65 frames. ], batch size: 70, lr: 7.11e-03, grad_scale: 8.0 +2024-09-17 17:59:45,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.15 vs. limit=15.0 +2024-09-17 17:59:52,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=271700.0, ans=0.025 +2024-09-17 18:00:01,932 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.262e+01 1.005e+02 1.104e+02 1.206e+02 4.510e+02, threshold=2.208e+02, percent-clipped=2.0 +2024-09-17 18:00:27,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=271820.0, ans=0.1 +2024-09-17 18:00:29,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=271820.0, ans=0.025 +2024-09-17 18:00:49,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=271860.0, ans=0.125 +2024-09-17 18:00:56,278 INFO [train.py:1198] (0/2) Epoch 16, batch 100, loss[loss=0.2485, ctc_loss=0.1447, cr_loss=0.3974, attn_decoder_loss=0.2512, over 29537.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1534, cr_loss=0.3982, attn_decoder_loss=0.2602, over 2254360.75 frames. ], batch size: 76, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:01:01,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=271900.0, ans=0.04949747468305833 +2024-09-17 18:01:08,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=271900.0, ans=0.0 +2024-09-17 18:01:13,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=271940.0, ans=0.0 +2024-09-17 18:01:20,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=271940.0, ans=0.5 +2024-09-17 18:01:35,169 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-68000.pt +2024-09-17 18:01:53,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.91 vs. limit=22.5 +2024-09-17 18:01:54,958 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.62 vs. limit=22.5 +2024-09-17 18:02:00,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=272020.0, ans=0.0 +2024-09-17 18:02:00,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=272020.0, ans=0.05 +2024-09-17 18:02:16,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=272060.0, ans=0.125 +2024-09-17 18:02:21,070 INFO [train.py:1198] (0/2) Epoch 16, batch 150, loss[loss=0.2335, ctc_loss=0.1354, cr_loss=0.3821, attn_decoder_loss=0.2359, over 29424.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.1503, cr_loss=0.3947, attn_decoder_loss=0.2577, over 3048836.26 frames. ], batch size: 70, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:02:32,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=272100.0, ans=0.125 +2024-09-17 18:02:34,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-17 18:02:42,307 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.742e+01 8.615e+01 9.462e+01 1.007e+02 3.571e+02, threshold=1.892e+02, percent-clipped=1.0 +2024-09-17 18:02:49,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=272140.0, ans=0.125 +2024-09-17 18:02:53,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=272180.0, ans=0.125 +2024-09-17 18:02:53,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=272180.0, ans=0.95 +2024-09-17 18:03:05,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=272180.0, ans=0.125 +2024-09-17 18:03:13,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=272220.0, ans=0.0 +2024-09-17 18:03:13,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=272220.0, ans=0.0 +2024-09-17 18:03:22,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=272260.0, ans=0.0 +2024-09-17 18:03:26,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=272260.0, ans=0.2 +2024-09-17 18:03:32,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=272260.0, ans=0.0 +2024-09-17 18:03:38,613 INFO [train.py:1198] (0/2) Epoch 16, batch 200, loss[loss=0.26, ctc_loss=0.1474, cr_loss=0.3844, attn_decoder_loss=0.264, over 27525.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.149, cr_loss=0.392, attn_decoder_loss=0.2565, over 3661025.70 frames. ], batch size: 125, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:03:44,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.17 vs. limit=15.0 +2024-09-17 18:03:54,029 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:04:31,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.92 vs. limit=15.0 +2024-09-17 18:04:49,206 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.44 vs. limit=15.0 +2024-09-17 18:04:54,435 INFO [train.py:1198] (0/2) Epoch 16, batch 250, loss[loss=0.2656, ctc_loss=0.1601, cr_loss=0.3972, attn_decoder_loss=0.2685, over 29222.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.149, cr_loss=0.3921, attn_decoder_loss=0.2566, over 4140752.08 frames. ], batch size: 100, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:05:15,431 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.756e+01 8.691e+01 9.311e+01 9.688e+01 2.016e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-17 18:05:17,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=272540.0, ans=0.125 +2024-09-17 18:05:17,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=272540.0, ans=0.125 +2024-09-17 18:05:46,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=272620.0, ans=0.1 +2024-09-17 18:06:00,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=272660.0, ans=0.2 +2024-09-17 18:06:00,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.55 vs. limit=15.0 +2024-09-17 18:06:01,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=272660.0, ans=0.0 +2024-09-17 18:06:08,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.19 vs. limit=15.0 +2024-09-17 18:06:12,051 INFO [train.py:1198] (0/2) Epoch 16, batch 300, loss[loss=0.2698, ctc_loss=0.1632, cr_loss=0.4016, attn_decoder_loss=0.2727, over 29513.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1483, cr_loss=0.3904, attn_decoder_loss=0.2561, over 4508665.41 frames. ], batch size: 92, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:06:49,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=272780.0, ans=0.0 +2024-09-17 18:06:52,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=272780.0, ans=0.2 +2024-09-17 18:06:58,812 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:07:09,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=272820.0, ans=0.0 +2024-09-17 18:07:16,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.00 vs. limit=15.0 +2024-09-17 18:07:25,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.54 vs. limit=15.0 +2024-09-17 18:07:30,500 INFO [train.py:1198] (0/2) Epoch 16, batch 350, loss[loss=0.2245, ctc_loss=0.1252, cr_loss=0.3522, attn_decoder_loss=0.2277, over 29358.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1481, cr_loss=0.39, attn_decoder_loss=0.2563, over 4794839.04 frames. ], batch size: 71, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:07:51,719 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.443e+01 8.832e+01 9.583e+01 1.052e+02 2.461e+02, threshold=1.917e+02, percent-clipped=3.0 +2024-09-17 18:08:06,293 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.12 vs. limit=15.0 +2024-09-17 18:08:11,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=272980.0, ans=0.125 +2024-09-17 18:08:14,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=273020.0, ans=0.125 +2024-09-17 18:08:19,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=273020.0, ans=0.0 +2024-09-17 18:08:22,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=273020.0, ans=0.125 +2024-09-17 18:08:28,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=273020.0, ans=0.2 +2024-09-17 18:08:45,896 INFO [train.py:1198] (0/2) Epoch 16, batch 400, loss[loss=0.2575, ctc_loss=0.1454, cr_loss=0.3875, attn_decoder_loss=0.2614, over 29700.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1476, cr_loss=0.389, attn_decoder_loss=0.2559, over 5024708.72 frames. ], batch size: 82, lr: 7.09e-03, grad_scale: 16.0 +2024-09-17 18:08:53,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=273100.0, ans=0.0 +2024-09-17 18:09:19,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=273180.0, ans=0.07 +2024-09-17 18:09:32,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=273220.0, ans=0.125 +2024-09-17 18:09:35,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=273220.0, ans=0.2 +2024-09-17 18:09:37,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=273220.0, ans=0.125 +2024-09-17 18:09:56,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=273260.0, ans=0.125 +2024-09-17 18:10:04,238 INFO [train.py:1198] (0/2) Epoch 16, batch 450, loss[loss=0.2514, ctc_loss=0.14, cr_loss=0.3828, attn_decoder_loss=0.2553, over 29695.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1479, cr_loss=0.3893, attn_decoder_loss=0.2562, over 5186869.79 frames. ], batch size: 83, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:10:09,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.47 vs. limit=15.0 +2024-09-17 18:10:10,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=273300.0, ans=0.5 +2024-09-17 18:10:19,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=273340.0, ans=0.04949747468305833 +2024-09-17 18:10:26,870 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.756e+01 9.412e+01 1.001e+02 2.554e+02, threshold=1.882e+02, percent-clipped=1.0 +2024-09-17 18:11:19,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=273460.0, ans=0.0 +2024-09-17 18:11:22,632 INFO [train.py:1198] (0/2) Epoch 16, batch 500, loss[loss=0.2749, ctc_loss=0.1688, cr_loss=0.4389, attn_decoder_loss=0.2769, over 29398.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1467, cr_loss=0.3877, attn_decoder_loss=0.2553, over 5329425.74 frames. ], batch size: 94, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:11:27,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=273500.0, ans=0.1 +2024-09-17 18:11:32,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=273500.0, ans=0.05 +2024-09-17 18:11:44,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=273540.0, ans=0.0 +2024-09-17 18:11:56,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.19 vs. limit=15.0 +2024-09-17 18:12:12,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=273620.0, ans=0.0 +2024-09-17 18:12:39,146 INFO [train.py:1198] (0/2) Epoch 16, batch 550, loss[loss=0.2645, ctc_loss=0.155, cr_loss=0.3878, attn_decoder_loss=0.2681, over 28739.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1469, cr_loss=0.388, attn_decoder_loss=0.2555, over 5423177.23 frames. ], batch size: 104, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:12:42,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=273700.0, ans=0.125 +2024-09-17 18:12:57,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=273740.0, ans=0.1 +2024-09-17 18:12:58,327 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.35 vs. limit=6.0 +2024-09-17 18:13:01,896 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 8.801e+01 9.400e+01 1.011e+02 1.613e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 18:13:15,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=273780.0, ans=0.0 +2024-09-17 18:13:24,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.42 vs. limit=10.0 +2024-09-17 18:13:33,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.49 vs. limit=22.5 +2024-09-17 18:13:40,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=273860.0, ans=0.0 +2024-09-17 18:13:57,164 INFO [train.py:1198] (0/2) Epoch 16, batch 600, loss[loss=0.2657, ctc_loss=0.1564, cr_loss=0.4048, attn_decoder_loss=0.2689, over 29204.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1474, cr_loss=0.389, attn_decoder_loss=0.2559, over 5509145.01 frames. ], batch size: 100, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:14:42,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=273980.0, ans=0.2 +2024-09-17 18:14:43,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=274020.0, ans=0.0 +2024-09-17 18:14:49,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=274020.0, ans=0.125 +2024-09-17 18:15:03,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=274060.0, ans=0.0 +2024-09-17 18:15:04,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=274060.0, ans=0.2 +2024-09-17 18:15:15,061 INFO [train.py:1198] (0/2) Epoch 16, batch 650, loss[loss=0.2463, ctc_loss=0.1346, cr_loss=0.3688, attn_decoder_loss=0.2506, over 29742.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1465, cr_loss=0.3882, attn_decoder_loss=0.2552, over 5587059.91 frames. ], batch size: 81, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:15:21,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=274100.0, ans=0.125 +2024-09-17 18:15:22,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=274100.0, ans=0.125 +2024-09-17 18:15:28,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=274140.0, ans=0.015 +2024-09-17 18:15:32,396 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.74 vs. limit=10.0 +2024-09-17 18:15:37,740 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.963e+01 8.981e+01 9.378e+01 1.004e+02 1.703e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-17 18:15:56,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=274180.0, ans=0.125 +2024-09-17 18:15:56,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=274180.0, ans=0.0 +2024-09-17 18:16:08,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=274220.0, ans=22.5 +2024-09-17 18:16:14,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=274260.0, ans=0.125 +2024-09-17 18:16:30,948 INFO [train.py:1198] (0/2) Epoch 16, batch 700, loss[loss=0.2418, ctc_loss=0.1377, cr_loss=0.3823, attn_decoder_loss=0.2449, over 29539.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1472, cr_loss=0.3894, attn_decoder_loss=0.2557, over 5636577.89 frames. ], batch size: 76, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:16:35,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=274300.0, ans=0.0 +2024-09-17 18:16:40,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=274300.0, ans=0.125 +2024-09-17 18:16:44,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=274340.0, ans=0.1 +2024-09-17 18:16:52,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=274340.0, ans=0.07 +2024-09-17 18:17:03,582 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.08 vs. limit=15.0 +2024-09-17 18:17:05,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=274380.0, ans=0.125 +2024-09-17 18:17:21,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=274420.0, ans=0.015 +2024-09-17 18:17:23,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=274420.0, ans=0.125 +2024-09-17 18:17:34,790 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.68 vs. limit=22.5 +2024-09-17 18:17:49,231 INFO [train.py:1198] (0/2) Epoch 16, batch 750, loss[loss=0.2581, ctc_loss=0.1551, cr_loss=0.4156, attn_decoder_loss=0.2603, over 29693.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1468, cr_loss=0.3887, attn_decoder_loss=0.2554, over 5675367.89 frames. ], batch size: 82, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:17:50,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=274500.0, ans=0.025 +2024-09-17 18:17:52,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=274500.0, ans=0.125 +2024-09-17 18:17:55,662 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:18:05,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.61 vs. limit=15.0 +2024-09-17 18:18:05,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=274540.0, ans=0.125 +2024-09-17 18:18:07,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=274540.0, ans=0.025 +2024-09-17 18:18:11,564 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.869e+01 8.564e+01 9.225e+01 9.974e+01 3.199e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-17 18:18:16,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=274540.0, ans=0.1 +2024-09-17 18:18:40,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=274620.0, ans=0.125 +2024-09-17 18:18:47,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=274620.0, ans=0.0 +2024-09-17 18:18:56,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=274660.0, ans=0.0 +2024-09-17 18:19:02,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=274660.0, ans=0.0 +2024-09-17 18:19:06,958 INFO [train.py:1198] (0/2) Epoch 16, batch 800, loss[loss=0.2278, ctc_loss=0.1285, cr_loss=0.3665, attn_decoder_loss=0.2307, over 29625.00 frames. ], tot_loss[loss=0.2519, ctc_loss=0.1463, cr_loss=0.3878, attn_decoder_loss=0.255, over 5705197.54 frames. ], batch size: 73, lr: 7.07e-03, grad_scale: 16.0 +2024-09-17 18:19:10,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=274700.0, ans=0.1 +2024-09-17 18:19:15,325 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.08 vs. limit=10.0 +2024-09-17 18:19:25,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=274740.0, ans=0.0 +2024-09-17 18:19:26,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=274740.0, ans=0.125 +2024-09-17 18:19:32,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=274740.0, ans=0.125 +2024-09-17 18:19:46,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.13 vs. limit=15.0 +2024-09-17 18:19:50,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=274820.0, ans=0.0 +2024-09-17 18:19:53,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=274820.0, ans=0.025 +2024-09-17 18:19:55,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=274820.0, ans=0.125 +2024-09-17 18:20:02,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=274820.0, ans=0.125 +2024-09-17 18:20:22,032 INFO [train.py:1198] (0/2) Epoch 16, batch 850, loss[loss=0.2642, ctc_loss=0.1459, cr_loss=0.3819, attn_decoder_loss=0.2689, over 29715.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1459, cr_loss=0.3867, attn_decoder_loss=0.2545, over 5734938.20 frames. ], batch size: 89, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:20:37,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=274940.0, ans=0.125 +2024-09-17 18:20:40,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=274940.0, ans=0.2 +2024-09-17 18:20:45,875 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.953e+01 9.515e+01 1.010e+02 2.580e+02, threshold=1.903e+02, percent-clipped=2.0 +2024-09-17 18:20:52,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=274980.0, ans=0.025 +2024-09-17 18:21:04,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=274980.0, ans=0.2 +2024-09-17 18:21:13,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=275020.0, ans=0.125 +2024-09-17 18:21:28,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.41 vs. limit=15.0 +2024-09-17 18:21:39,955 INFO [train.py:1198] (0/2) Epoch 16, batch 900, loss[loss=0.2319, ctc_loss=0.1322, cr_loss=0.3775, attn_decoder_loss=0.2346, over 29620.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1467, cr_loss=0.3882, attn_decoder_loss=0.2554, over 5739658.09 frames. ], batch size: 73, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:21:49,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=275100.0, ans=0.025 +2024-09-17 18:22:22,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=275180.0, ans=0.025 +2024-09-17 18:22:32,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=275220.0, ans=0.1 +2024-09-17 18:22:39,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.76 vs. limit=15.0 +2024-09-17 18:22:43,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=275260.0, ans=0.0 +2024-09-17 18:22:43,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=275260.0, ans=0.0 +2024-09-17 18:22:57,941 INFO [train.py:1198] (0/2) Epoch 16, batch 950, loss[loss=0.2373, ctc_loss=0.1331, cr_loss=0.3602, attn_decoder_loss=0.2408, over 29521.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1472, cr_loss=0.3884, attn_decoder_loss=0.2558, over 5742554.00 frames. ], batch size: 74, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:22:59,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=275300.0, ans=0.125 +2024-09-17 18:23:13,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=275340.0, ans=0.0 +2024-09-17 18:23:21,954 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.870e+01 9.132e+01 9.762e+01 1.082e+02 2.725e+02, threshold=1.952e+02, percent-clipped=3.0 +2024-09-17 18:23:35,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=275380.0, ans=0.05 +2024-09-17 18:23:50,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=275420.0, ans=0.125 +2024-09-17 18:24:13,029 INFO [train.py:1198] (0/2) Epoch 16, batch 1000, loss[loss=0.2485, ctc_loss=0.1492, cr_loss=0.3975, attn_decoder_loss=0.2507, over 29492.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1481, cr_loss=0.3898, attn_decoder_loss=0.2566, over 5736603.71 frames. ], batch size: 77, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:24:22,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.48 vs. limit=15.0 +2024-09-17 18:25:01,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=275620.0, ans=0.0 +2024-09-17 18:25:30,816 INFO [train.py:1198] (0/2) Epoch 16, batch 1050, loss[loss=0.2672, ctc_loss=0.157, cr_loss=0.3932, attn_decoder_loss=0.2707, over 29668.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1474, cr_loss=0.3882, attn_decoder_loss=0.2556, over 5743351.25 frames. ], batch size: 85, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:25:36,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.87 vs. limit=12.0 +2024-09-17 18:25:46,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=275740.0, ans=0.125 +2024-09-17 18:25:55,380 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.705e+01 8.680e+01 9.093e+01 1.030e+02 1.882e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-17 18:26:04,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=275780.0, ans=0.025 +2024-09-17 18:26:04,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=275780.0, ans=0.1 +2024-09-17 18:26:13,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=275780.0, ans=0.0 +2024-09-17 18:26:13,683 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.46 vs. limit=15.0 +2024-09-17 18:26:49,383 INFO [train.py:1198] (0/2) Epoch 16, batch 1100, loss[loss=0.2622, ctc_loss=0.1655, cr_loss=0.4261, attn_decoder_loss=0.2635, over 29443.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1476, cr_loss=0.3888, attn_decoder_loss=0.2555, over 5757095.14 frames. ], batch size: 78, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:26:58,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=275900.0, ans=0.125 +2024-09-17 18:27:00,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=275900.0, ans=0.2 +2024-09-17 18:27:07,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=275940.0, ans=0.0 +2024-09-17 18:27:15,345 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:27:18,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=275980.0, ans=0.0 +2024-09-17 18:27:44,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=276020.0, ans=0.0 +2024-09-17 18:28:05,490 INFO [train.py:1198] (0/2) Epoch 16, batch 1150, loss[loss=0.2499, ctc_loss=0.1527, cr_loss=0.3898, attn_decoder_loss=0.252, over 29457.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1476, cr_loss=0.3884, attn_decoder_loss=0.2554, over 5755959.48 frames. ], batch size: 78, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:28:25,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=276140.0, ans=0.125 +2024-09-17 18:28:29,846 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.723e+01 8.744e+01 9.236e+01 1.006e+02 2.528e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-17 18:28:54,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=276220.0, ans=0.0 +2024-09-17 18:29:19,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=276260.0, ans=0.0 +2024-09-17 18:29:23,625 INFO [train.py:1198] (0/2) Epoch 16, batch 1200, loss[loss=0.2621, ctc_loss=0.1573, cr_loss=0.4058, attn_decoder_loss=0.2647, over 29684.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1482, cr_loss=0.3898, attn_decoder_loss=0.2563, over 5748902.78 frames. ], batch size: 85, lr: 7.05e-03, grad_scale: 16.0 +2024-09-17 18:29:31,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=276300.0, ans=0.0 +2024-09-17 18:29:42,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.92 vs. limit=15.0 +2024-09-17 18:30:05,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=276380.0, ans=0.1 +2024-09-17 18:30:22,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=276420.0, ans=0.09899494936611666 +2024-09-17 18:30:33,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=276460.0, ans=0.1 +2024-09-17 18:30:34,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=276460.0, ans=0.125 +2024-09-17 18:30:41,713 INFO [train.py:1198] (0/2) Epoch 16, batch 1250, loss[loss=0.2754, ctc_loss=0.1716, cr_loss=0.4463, attn_decoder_loss=0.277, over 29534.00 frames. ], tot_loss[loss=0.2538, ctc_loss=0.1485, cr_loss=0.391, attn_decoder_loss=0.2568, over 5776132.50 frames. ], batch size: 92, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:30:44,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=276500.0, ans=0.0 +2024-09-17 18:30:54,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=276500.0, ans=0.125 +2024-09-17 18:31:06,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=276540.0, ans=0.0 +2024-09-17 18:31:07,625 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.801e+01 9.250e+01 9.945e+01 2.307e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-17 18:31:13,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=276580.0, ans=0.1 +2024-09-17 18:31:15,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=276580.0, ans=0.125 +2024-09-17 18:31:38,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=276620.0, ans=0.025 +2024-09-17 18:31:41,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.85 vs. limit=22.5 +2024-09-17 18:31:43,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=276660.0, ans=0.1 +2024-09-17 18:31:57,698 INFO [train.py:1198] (0/2) Epoch 16, batch 1300, loss[loss=0.2639, ctc_loss=0.1513, cr_loss=0.3964, attn_decoder_loss=0.2676, over 28321.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1477, cr_loss=0.3899, attn_decoder_loss=0.2562, over 5780147.25 frames. ], batch size: 111, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:31:58,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=276700.0, ans=0.2 +2024-09-17 18:32:01,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=276700.0, ans=0.0 +2024-09-17 18:32:04,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.64 vs. limit=6.0 +2024-09-17 18:32:31,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=276780.0, ans=0.0 +2024-09-17 18:32:40,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=276780.0, ans=0.125 +2024-09-17 18:32:41,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=276780.0, ans=0.125 +2024-09-17 18:32:43,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=276820.0, ans=0.125 +2024-09-17 18:33:14,164 INFO [train.py:1198] (0/2) Epoch 16, batch 1350, loss[loss=0.2488, ctc_loss=0.1406, cr_loss=0.3784, attn_decoder_loss=0.2524, over 29782.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1477, cr_loss=0.3901, attn_decoder_loss=0.256, over 5799631.17 frames. ], batch size: 81, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:33:24,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=276900.0, ans=0.1 +2024-09-17 18:33:28,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=276900.0, ans=0.125 +2024-09-17 18:33:31,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=276940.0, ans=0.025 +2024-09-17 18:33:31,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=276940.0, ans=0.125 +2024-09-17 18:33:41,947 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.515e+01 9.086e+01 9.689e+01 1.239e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-17 18:33:59,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=276980.0, ans=0.0 +2024-09-17 18:34:10,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=277020.0, ans=0.125 +2024-09-17 18:34:19,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=277060.0, ans=0.125 +2024-09-17 18:34:21,463 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.33 vs. limit=6.0 +2024-09-17 18:34:30,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.29 vs. limit=15.0 +2024-09-17 18:34:34,350 INFO [train.py:1198] (0/2) Epoch 16, batch 1400, loss[loss=0.2291, ctc_loss=0.1264, cr_loss=0.3568, attn_decoder_loss=0.2325, over 29610.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.147, cr_loss=0.3888, attn_decoder_loss=0.2553, over 5809866.56 frames. ], batch size: 69, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:35:01,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=277140.0, ans=0.0 +2024-09-17 18:35:06,949 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.00 vs. limit=6.0 +2024-09-17 18:35:15,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=277180.0, ans=0.1 +2024-09-17 18:35:30,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-17 18:35:35,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=277260.0, ans=0.05 +2024-09-17 18:35:38,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=277260.0, ans=0.2 +2024-09-17 18:35:49,912 INFO [train.py:1198] (0/2) Epoch 16, batch 1450, loss[loss=0.272, ctc_loss=0.1667, cr_loss=0.428, attn_decoder_loss=0.2742, over 29466.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1473, cr_loss=0.3895, attn_decoder_loss=0.256, over 5806372.22 frames. ], batch size: 94, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:36:15,713 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.712e+01 8.888e+01 9.569e+01 1.025e+02 2.533e+02, threshold=1.914e+02, percent-clipped=1.0 +2024-09-17 18:36:34,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=277420.0, ans=0.125 +2024-09-17 18:36:59,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=277460.0, ans=0.025 +2024-09-17 18:37:05,624 INFO [train.py:1198] (0/2) Epoch 16, batch 1500, loss[loss=0.2518, ctc_loss=0.1436, cr_loss=0.3741, attn_decoder_loss=0.2555, over 29648.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1473, cr_loss=0.3899, attn_decoder_loss=0.2561, over 5805872.59 frames. ], batch size: 86, lr: 7.03e-03, grad_scale: 8.0 +2024-09-17 18:37:06,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.17 vs. limit=22.5 +2024-09-17 18:37:28,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.54 vs. limit=15.0 +2024-09-17 18:37:50,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=277580.0, ans=0.125 +2024-09-17 18:37:55,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=277620.0, ans=0.07 +2024-09-17 18:37:55,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=277620.0, ans=0.125 +2024-09-17 18:37:58,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=277620.0, ans=0.125 +2024-09-17 18:38:26,894 INFO [train.py:1198] (0/2) Epoch 16, batch 1550, loss[loss=0.2541, ctc_loss=0.1512, cr_loss=0.4036, attn_decoder_loss=0.2565, over 29504.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1479, cr_loss=0.3902, attn_decoder_loss=0.2559, over 5781039.01 frames. ], batch size: 90, lr: 7.03e-03, grad_scale: 8.0 +2024-09-17 18:38:30,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=277700.0, ans=0.125 +2024-09-17 18:38:48,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.28 vs. limit=15.0 +2024-09-17 18:38:52,452 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.047e+01 8.973e+01 9.587e+01 1.017e+02 1.956e+02, threshold=1.917e+02, percent-clipped=1.0 +2024-09-17 18:38:54,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.73 vs. limit=10.0 +2024-09-17 18:39:03,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=277780.0, ans=0.0 +2024-09-17 18:39:16,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=277820.0, ans=0.125 +2024-09-17 18:39:24,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=277820.0, ans=0.125 +2024-09-17 18:39:28,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=277860.0, ans=0.125 +2024-09-17 18:39:41,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.47 vs. limit=15.0 +2024-09-17 18:39:42,135 INFO [train.py:1198] (0/2) Epoch 16, batch 1600, loss[loss=0.2592, ctc_loss=0.1453, cr_loss=0.3701, attn_decoder_loss=0.2636, over 29673.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1477, cr_loss=0.3894, attn_decoder_loss=0.2558, over 5764150.76 frames. ], batch size: 85, lr: 7.03e-03, grad_scale: 16.0 +2024-09-17 18:39:43,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=277900.0, ans=0.125 +2024-09-17 18:39:59,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-17 18:40:08,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.17 vs. limit=10.0 +2024-09-17 18:40:09,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=277940.0, ans=0.125 +2024-09-17 18:40:17,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=277980.0, ans=0.0 +2024-09-17 18:40:23,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=277980.0, ans=0.2 +2024-09-17 18:40:42,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=278060.0, ans=0.125 +2024-09-17 18:40:56,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=278100.0, ans=0.125 +2024-09-17 18:40:57,568 INFO [train.py:1198] (0/2) Epoch 16, batch 1650, loss[loss=0.2716, ctc_loss=0.1602, cr_loss=0.4149, attn_decoder_loss=0.2748, over 29720.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1476, cr_loss=0.389, attn_decoder_loss=0.2555, over 5760374.03 frames. ], batch size: 89, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:41:26,854 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.637e+01 9.438e+01 1.013e+02 1.642e+02, threshold=1.888e+02, percent-clipped=0.0 +2024-09-17 18:41:55,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=278220.0, ans=0.0 +2024-09-17 18:41:57,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.90 vs. limit=15.0 +2024-09-17 18:41:59,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=278220.0, ans=0.09899494936611666 +2024-09-17 18:42:17,015 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.44 vs. limit=22.5 +2024-09-17 18:42:17,549 INFO [train.py:1198] (0/2) Epoch 16, batch 1700, loss[loss=0.2239, ctc_loss=0.1198, cr_loss=0.3455, attn_decoder_loss=0.2278, over 29588.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1471, cr_loss=0.3886, attn_decoder_loss=0.2552, over 5780645.65 frames. ], batch size: 69, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:42:23,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=278300.0, ans=0.2 +2024-09-17 18:42:47,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.93 vs. limit=6.0 +2024-09-17 18:42:55,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=278380.0, ans=0.035 +2024-09-17 18:43:06,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=278420.0, ans=0.5 +2024-09-17 18:43:08,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.45 vs. limit=10.0 +2024-09-17 18:43:33,563 INFO [train.py:1198] (0/2) Epoch 16, batch 1750, loss[loss=0.2275, ctc_loss=0.1338, cr_loss=0.3626, attn_decoder_loss=0.2298, over 29360.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1466, cr_loss=0.3883, attn_decoder_loss=0.2548, over 5786877.18 frames. ], batch size: 67, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:43:43,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=278500.0, ans=0.0 +2024-09-17 18:43:59,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=278540.0, ans=0.125 +2024-09-17 18:44:00,899 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.179e+01 8.367e+01 8.955e+01 9.624e+01 1.381e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-17 18:44:31,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=278620.0, ans=0.125 +2024-09-17 18:44:49,086 INFO [train.py:1198] (0/2) Epoch 16, batch 1800, loss[loss=0.2632, ctc_loss=0.1549, cr_loss=0.4071, attn_decoder_loss=0.2662, over 29686.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1472, cr_loss=0.3889, attn_decoder_loss=0.2551, over 5790621.66 frames. ], batch size: 83, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:44:53,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.75 vs. limit=15.0 +2024-09-17 18:45:06,856 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:45:11,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-17 18:45:36,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=278780.0, ans=0.1 +2024-09-17 18:45:44,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.48 vs. limit=15.0 +2024-09-17 18:46:09,899 INFO [train.py:1198] (0/2) Epoch 16, batch 1850, loss[loss=0.2616, ctc_loss=0.1561, cr_loss=0.3947, attn_decoder_loss=0.2646, over 29639.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1473, cr_loss=0.39, attn_decoder_loss=0.2552, over 5796361.04 frames. ], batch size: 86, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:46:23,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=278940.0, ans=0.125 +2024-09-17 18:46:36,824 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.695e+01 8.719e+01 9.438e+01 1.018e+02 2.897e+02, threshold=1.888e+02, percent-clipped=1.0 +2024-09-17 18:46:47,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=278980.0, ans=0.0 +2024-09-17 18:46:58,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=279020.0, ans=0.125 +2024-09-17 18:47:24,870 INFO [train.py:1198] (0/2) Epoch 16, batch 1900, loss[loss=0.2612, ctc_loss=0.1541, cr_loss=0.4219, attn_decoder_loss=0.2637, over 29685.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1478, cr_loss=0.3913, attn_decoder_loss=0.2559, over 5804609.83 frames. ], batch size: 89, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:47:28,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=279100.0, ans=0.125 +2024-09-17 18:47:32,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=279100.0, ans=0.125 +2024-09-17 18:47:44,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.11 vs. limit=15.0 +2024-09-17 18:48:32,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=279260.0, ans=0.2 +2024-09-17 18:48:37,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=279260.0, ans=0.125 +2024-09-17 18:48:41,361 INFO [train.py:1198] (0/2) Epoch 16, batch 1950, loss[loss=0.2548, ctc_loss=0.1555, cr_loss=0.404, attn_decoder_loss=0.2569, over 29446.00 frames. ], tot_loss[loss=0.2538, ctc_loss=0.1482, cr_loss=0.3925, attn_decoder_loss=0.2568, over 5819413.50 frames. ], batch size: 78, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:48:41,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=279300.0, ans=0.2 +2024-09-17 18:48:47,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=279300.0, ans=0.125 +2024-09-17 18:48:47,755 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:48:48,582 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.99 vs. limit=15.0 +2024-09-17 18:49:01,627 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.84 vs. limit=15.0 +2024-09-17 18:49:11,015 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.858e+01 9.399e+01 1.005e+02 1.788e+02, threshold=1.880e+02, percent-clipped=1.0 +2024-09-17 18:49:22,679 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:49:26,518 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.56 vs. limit=22.5 +2024-09-17 18:49:56,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=279460.0, ans=0.125 +2024-09-17 18:50:01,808 INFO [train.py:1198] (0/2) Epoch 16, batch 2000, loss[loss=0.2251, ctc_loss=0.1302, cr_loss=0.3628, attn_decoder_loss=0.2275, over 29342.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1491, cr_loss=0.3934, attn_decoder_loss=0.2575, over 5796717.16 frames. ], batch size: 67, lr: 7.01e-03, grad_scale: 16.0 +2024-09-17 18:50:20,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=279540.0, ans=0.0 +2024-09-17 18:50:28,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=279540.0, ans=0.125 +2024-09-17 18:50:48,513 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.85 vs. limit=15.0 +2024-09-17 18:50:50,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=279620.0, ans=0.125 +2024-09-17 18:50:56,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=279620.0, ans=0.0 +2024-09-17 18:51:17,831 INFO [train.py:1198] (0/2) Epoch 16, batch 2050, loss[loss=0.2229, ctc_loss=0.1252, cr_loss=0.3575, attn_decoder_loss=0.2259, over 29440.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1481, cr_loss=0.3911, attn_decoder_loss=0.2561, over 5788642.51 frames. ], batch size: 70, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:51:20,232 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.83 vs. limit=15.0 +2024-09-17 18:51:39,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=279740.0, ans=0.125 +2024-09-17 18:51:46,761 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.240e+01 9.024e+01 1.001e+02 1.116e+02 1.891e+02, threshold=2.001e+02, percent-clipped=1.0 +2024-09-17 18:51:50,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=279780.0, ans=0.0 +2024-09-17 18:51:56,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=279780.0, ans=0.1 +2024-09-17 18:52:11,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=279820.0, ans=0.125 +2024-09-17 18:52:33,739 INFO [train.py:1198] (0/2) Epoch 16, batch 2100, loss[loss=0.2513, ctc_loss=0.1399, cr_loss=0.4035, attn_decoder_loss=0.2547, over 29766.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1474, cr_loss=0.3896, attn_decoder_loss=0.2556, over 5801259.92 frames. ], batch size: 81, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:52:34,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=279900.0, ans=0.125 +2024-09-17 18:53:22,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=280020.0, ans=0.025 +2024-09-17 18:53:28,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=280020.0, ans=0.125 +2024-09-17 18:53:53,808 INFO [train.py:1198] (0/2) Epoch 16, batch 2150, loss[loss=0.2494, ctc_loss=0.1389, cr_loss=0.3917, attn_decoder_loss=0.253, over 29442.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1467, cr_loss=0.3888, attn_decoder_loss=0.2551, over 5816222.00 frames. ], batch size: 78, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:54:18,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=280140.0, ans=0.0 +2024-09-17 18:54:21,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=280140.0, ans=0.0 +2024-09-17 18:54:22,947 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.825e+01 8.753e+01 9.321e+01 9.810e+01 1.786e+02, threshold=1.864e+02, percent-clipped=0.0 +2024-09-17 18:54:30,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=280180.0, ans=0.0 +2024-09-17 18:54:58,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=280260.0, ans=0.0 +2024-09-17 18:55:10,162 INFO [train.py:1198] (0/2) Epoch 16, batch 2200, loss[loss=0.267, ctc_loss=0.1649, cr_loss=0.4167, attn_decoder_loss=0.2691, over 29602.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1469, cr_loss=0.3892, attn_decoder_loss=0.2552, over 5812594.36 frames. ], batch size: 86, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:55:12,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.98 vs. limit=15.0 +2024-09-17 18:55:16,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=280300.0, ans=0.125 +2024-09-17 18:55:57,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=280420.0, ans=0.125 +2024-09-17 18:56:03,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=280420.0, ans=0.125 +2024-09-17 18:56:03,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=280420.0, ans=0.125 +2024-09-17 18:56:06,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=280420.0, ans=0.125 +2024-09-17 18:56:25,749 INFO [train.py:1198] (0/2) Epoch 16, batch 2250, loss[loss=0.2484, ctc_loss=0.1387, cr_loss=0.3715, attn_decoder_loss=0.2523, over 29699.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1467, cr_loss=0.3891, attn_decoder_loss=0.2552, over 5812252.83 frames. ], batch size: 82, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:56:26,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=280500.0, ans=0.125 +2024-09-17 18:56:50,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=280540.0, ans=0.025 +2024-09-17 18:56:53,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=280540.0, ans=0.125 +2024-09-17 18:56:53,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=280540.0, ans=0.0 +2024-09-17 18:56:54,263 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.834e+01 9.325e+01 1.002e+02 2.125e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-17 18:57:10,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=280580.0, ans=0.015 +2024-09-17 18:57:12,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=280580.0, ans=0.04949747468305833 +2024-09-17 18:57:16,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=280620.0, ans=0.0 +2024-09-17 18:57:24,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=280620.0, ans=0.125 +2024-09-17 18:57:35,742 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.36 vs. limit=22.5 +2024-09-17 18:57:45,346 INFO [train.py:1198] (0/2) Epoch 16, batch 2300, loss[loss=0.2262, ctc_loss=0.1223, cr_loss=0.3323, attn_decoder_loss=0.2303, over 29328.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1463, cr_loss=0.3872, attn_decoder_loss=0.2543, over 5800807.70 frames. ], batch size: 71, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 18:57:58,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=280740.0, ans=0.0 +2024-09-17 18:58:23,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=280780.0, ans=0.05 +2024-09-17 18:58:31,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=280820.0, ans=0.2 +2024-09-17 18:58:38,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=280820.0, ans=0.125 +2024-09-17 18:58:38,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=280820.0, ans=0.125 +2024-09-17 18:58:40,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=280820.0, ans=0.125 +2024-09-17 18:58:46,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=280860.0, ans=0.125 +2024-09-17 18:58:54,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=280860.0, ans=0.0 +2024-09-17 18:59:01,558 INFO [train.py:1198] (0/2) Epoch 16, batch 2350, loss[loss=0.2569, ctc_loss=0.1493, cr_loss=0.392, attn_decoder_loss=0.2601, over 29694.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1463, cr_loss=0.3875, attn_decoder_loss=0.2544, over 5805997.70 frames. ], batch size: 83, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 18:59:09,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.69 vs. limit=15.0 +2024-09-17 18:59:30,274 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.837e+01 9.442e+01 1.004e+02 6.270e+02, threshold=1.888e+02, percent-clipped=1.0 +2024-09-17 18:59:42,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-17 18:59:43,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.23 vs. limit=22.5 +2024-09-17 19:00:13,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=281060.0, ans=0.125 +2024-09-17 19:00:17,727 INFO [train.py:1198] (0/2) Epoch 16, batch 2400, loss[loss=0.237, ctc_loss=0.1328, cr_loss=0.3747, attn_decoder_loss=0.2402, over 29544.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1464, cr_loss=0.3885, attn_decoder_loss=0.2549, over 5809008.32 frames. ], batch size: 76, lr: 6.99e-03, grad_scale: 16.0 +2024-09-17 19:00:18,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=281100.0, ans=0.125 +2024-09-17 19:00:25,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=281100.0, ans=0.025 +2024-09-17 19:00:58,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=281180.0, ans=0.025 +2024-09-17 19:00:59,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=281180.0, ans=0.0 +2024-09-17 19:01:01,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=281180.0, ans=0.0 +2024-09-17 19:01:22,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=281260.0, ans=0.1 +2024-09-17 19:01:36,124 INFO [train.py:1198] (0/2) Epoch 16, batch 2450, loss[loss=0.2723, ctc_loss=0.1692, cr_loss=0.4378, attn_decoder_loss=0.274, over 29715.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1473, cr_loss=0.3894, attn_decoder_loss=0.2558, over 5786125.56 frames. ], batch size: 82, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 19:01:43,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=281300.0, ans=0.125 +2024-09-17 19:02:06,405 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 9.367e+01 1.015e+02 1.200e+02 3.423e+02, threshold=2.029e+02, percent-clipped=2.0 +2024-09-17 19:02:08,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=281380.0, ans=0.125 +2024-09-17 19:02:20,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=281420.0, ans=0.1 +2024-09-17 19:02:34,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=281420.0, ans=0.125 +2024-09-17 19:02:51,887 INFO [train.py:1198] (0/2) Epoch 16, batch 2500, loss[loss=0.2644, ctc_loss=0.147, cr_loss=0.3932, attn_decoder_loss=0.2687, over 29631.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1472, cr_loss=0.3893, attn_decoder_loss=0.2557, over 5796359.09 frames. ], batch size: 86, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:02:59,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=281500.0, ans=0.0 +2024-09-17 19:03:13,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=281540.0, ans=0.125 +2024-09-17 19:04:01,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=281660.0, ans=0.125 +2024-09-17 19:04:07,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=281700.0, ans=0.125 +2024-09-17 19:04:08,348 INFO [train.py:1198] (0/2) Epoch 16, batch 2550, loss[loss=0.2243, ctc_loss=0.1295, cr_loss=0.3531, attn_decoder_loss=0.227, over 29296.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.147, cr_loss=0.3889, attn_decoder_loss=0.2558, over 5800194.08 frames. ], batch size: 67, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:04:21,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.90 vs. limit=12.0 +2024-09-17 19:04:26,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=281740.0, ans=0.0 +2024-09-17 19:04:29,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=281740.0, ans=0.0 +2024-09-17 19:04:31,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=281740.0, ans=0.125 +2024-09-17 19:04:37,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=281780.0, ans=0.0 +2024-09-17 19:04:38,396 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.661e+01 9.348e+01 1.013e+02 3.774e+02, threshold=1.870e+02, percent-clipped=2.0 +2024-09-17 19:04:41,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=281780.0, ans=0.0 +2024-09-17 19:04:43,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=281780.0, ans=0.125 +2024-09-17 19:04:43,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=281780.0, ans=0.125 +2024-09-17 19:04:55,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=281780.0, ans=0.125 +2024-09-17 19:05:28,338 INFO [train.py:1198] (0/2) Epoch 16, batch 2600, loss[loss=0.2494, ctc_loss=0.1358, cr_loss=0.3805, attn_decoder_loss=0.2535, over 29420.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1473, cr_loss=0.39, attn_decoder_loss=0.2561, over 5796004.96 frames. ], batch size: 78, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:05:30,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=281900.0, ans=0.125 +2024-09-17 19:05:30,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.72 vs. limit=22.5 +2024-09-17 19:05:43,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=281940.0, ans=0.125 +2024-09-17 19:05:49,996 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.13 vs. limit=15.0 +2024-09-17 19:05:53,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=281940.0, ans=0.2 +2024-09-17 19:06:04,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=281980.0, ans=0.125 +2024-09-17 19:06:16,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=282020.0, ans=0.125 +2024-09-17 19:06:33,743 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.23 vs. limit=15.0 +2024-09-17 19:06:43,662 INFO [train.py:1198] (0/2) Epoch 16, batch 2650, loss[loss=0.2651, ctc_loss=0.1523, cr_loss=0.3971, attn_decoder_loss=0.2688, over 29211.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1475, cr_loss=0.3904, attn_decoder_loss=0.2564, over 5802902.76 frames. ], batch size: 100, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:06:48,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=282100.0, ans=0.1 +2024-09-17 19:07:01,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.34 vs. limit=22.5 +2024-09-17 19:07:09,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=282140.0, ans=0.2 +2024-09-17 19:07:12,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=282180.0, ans=0.125 +2024-09-17 19:07:13,814 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.875e+01 8.834e+01 9.287e+01 9.746e+01 1.582e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-17 19:07:17,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=282180.0, ans=0.1 +2024-09-17 19:07:19,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=282180.0, ans=0.025 +2024-09-17 19:07:51,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.26 vs. limit=15.0 +2024-09-17 19:07:59,131 INFO [train.py:1198] (0/2) Epoch 16, batch 2700, loss[loss=0.2549, ctc_loss=0.1452, cr_loss=0.3854, attn_decoder_loss=0.2585, over 29560.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1471, cr_loss=0.3897, attn_decoder_loss=0.2564, over 5797904.31 frames. ], batch size: 87, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:08:07,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten.whitening_limit, batch_count=282300.0, ans=15.0 +2024-09-17 19:08:42,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=282380.0, ans=0.125 +2024-09-17 19:08:49,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=282420.0, ans=0.0 +2024-09-17 19:08:52,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=282420.0, ans=0.1 +2024-09-17 19:09:04,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.35 vs. limit=15.0 +2024-09-17 19:09:16,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=282460.0, ans=0.2 +2024-09-17 19:09:19,236 INFO [train.py:1198] (0/2) Epoch 16, batch 2750, loss[loss=0.2441, ctc_loss=0.1405, cr_loss=0.3686, attn_decoder_loss=0.2474, over 29534.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1462, cr_loss=0.3881, attn_decoder_loss=0.2552, over 5796513.51 frames. ], batch size: 75, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:09:21,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=282500.0, ans=0.125 +2024-09-17 19:09:23,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.42 vs. limit=22.5 +2024-09-17 19:09:24,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=282500.0, ans=0.1 +2024-09-17 19:09:49,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.861e+01 8.782e+01 9.545e+01 1.036e+02 3.066e+02, threshold=1.909e+02, percent-clipped=3.0 +2024-09-17 19:10:35,385 INFO [train.py:1198] (0/2) Epoch 16, batch 2800, loss[loss=0.2658, ctc_loss=0.1693, cr_loss=0.385, attn_decoder_loss=0.2679, over 20683.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1466, cr_loss=0.388, attn_decoder_loss=0.2553, over 5776866.88 frames. ], batch size: 213, lr: 6.97e-03, grad_scale: 16.0 +2024-09-17 19:10:43,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=282700.0, ans=0.05 +2024-09-17 19:10:59,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=282740.0, ans=0.2 +2024-09-17 19:11:16,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.76 vs. limit=10.0 +2024-09-17 19:11:17,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=282780.0, ans=0.0 +2024-09-17 19:11:28,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=282820.0, ans=0.1 +2024-09-17 19:11:39,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=282860.0, ans=0.125 +2024-09-17 19:11:42,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=282860.0, ans=0.025 +2024-09-17 19:11:50,921 INFO [train.py:1198] (0/2) Epoch 16, batch 2850, loss[loss=0.2437, ctc_loss=0.1374, cr_loss=0.3687, attn_decoder_loss=0.2473, over 29496.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1471, cr_loss=0.3887, attn_decoder_loss=0.2559, over 5761665.89 frames. ], batch size: 77, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:12:01,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=282900.0, ans=0.125 +2024-09-17 19:12:04,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=282940.0, ans=0.0 +2024-09-17 19:12:09,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=282940.0, ans=0.07 +2024-09-17 19:12:24,809 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.778e+01 8.830e+01 9.442e+01 1.037e+02 2.855e+02, threshold=1.888e+02, percent-clipped=3.0 +2024-09-17 19:12:32,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=282980.0, ans=0.1 +2024-09-17 19:13:10,860 INFO [train.py:1198] (0/2) Epoch 16, batch 2900, loss[loss=0.2331, ctc_loss=0.1247, cr_loss=0.3349, attn_decoder_loss=0.2377, over 29412.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1476, cr_loss=0.3908, attn_decoder_loss=0.2572, over 5787685.95 frames. ], batch size: 79, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:13:26,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=283140.0, ans=0.125 +2024-09-17 19:13:44,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=283180.0, ans=0.025 +2024-09-17 19:13:50,944 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:13:59,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=283220.0, ans=0.1 +2024-09-17 19:14:02,089 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.68 vs. limit=22.5 +2024-09-17 19:14:07,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=283220.0, ans=0.125 +2024-09-17 19:14:15,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.73 vs. limit=6.0 +2024-09-17 19:14:18,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=283260.0, ans=0.125 +2024-09-17 19:14:27,151 INFO [train.py:1198] (0/2) Epoch 16, batch 2950, loss[loss=0.2466, ctc_loss=0.1436, cr_loss=0.3891, attn_decoder_loss=0.2494, over 29511.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1472, cr_loss=0.3898, attn_decoder_loss=0.2559, over 5781383.24 frames. ], batch size: 75, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:14:39,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=283300.0, ans=0.05 +2024-09-17 19:14:58,915 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.449e+01 8.668e+01 9.077e+01 9.673e+01 1.448e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-17 19:15:05,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=283380.0, ans=0.1 +2024-09-17 19:15:27,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=283460.0, ans=0.2 +2024-09-17 19:15:42,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.43 vs. limit=15.0 +2024-09-17 19:15:42,946 INFO [train.py:1198] (0/2) Epoch 16, batch 3000, loss[loss=0.2522, ctc_loss=0.1476, cr_loss=0.4124, attn_decoder_loss=0.2547, over 29760.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1475, cr_loss=0.3905, attn_decoder_loss=0.2561, over 5782068.62 frames. ], batch size: 81, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:15:42,947 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 19:16:01,439 INFO [train.py:1230] (0/2) Epoch 16, validation: loss=0.2115, ctc_loss=0.04131, cr_loss=4.919e-15, attn_decoder_loss=0.2304, over 944034.00 frames. +2024-09-17 19:16:01,439 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 19:16:48,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=283580.0, ans=0.0 +2024-09-17 19:16:50,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=283620.0, ans=0.0 +2024-09-17 19:16:54,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=283620.0, ans=0.125 +2024-09-17 19:17:16,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=283660.0, ans=0.125 +2024-09-17 19:17:22,004 INFO [train.py:1198] (0/2) Epoch 16, batch 3050, loss[loss=0.2496, ctc_loss=0.1521, cr_loss=0.3951, attn_decoder_loss=0.2517, over 29539.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1479, cr_loss=0.3907, attn_decoder_loss=0.2566, over 5776071.40 frames. ], batch size: 76, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:17:27,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=16.22 vs. limit=15.0 +2024-09-17 19:17:43,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=283740.0, ans=0.125 +2024-09-17 19:17:53,959 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.977e+01 8.926e+01 9.487e+01 1.019e+02 3.855e+02, threshold=1.897e+02, percent-clipped=1.0 +2024-09-17 19:18:05,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.43 vs. limit=22.5 +2024-09-17 19:18:06,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=283820.0, ans=0.125 +2024-09-17 19:18:22,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=283860.0, ans=0.125 +2024-09-17 19:18:37,844 INFO [train.py:1198] (0/2) Epoch 16, batch 3100, loss[loss=0.2691, ctc_loss=0.1604, cr_loss=0.4174, attn_decoder_loss=0.2718, over 29204.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1477, cr_loss=0.3906, attn_decoder_loss=0.2562, over 5776328.13 frames. ], batch size: 100, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:18:57,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=283940.0, ans=0.0 +2024-09-17 19:19:11,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=283980.0, ans=0.0 +2024-09-17 19:19:54,440 INFO [train.py:1198] (0/2) Epoch 16, batch 3150, loss[loss=0.2757, ctc_loss=0.164, cr_loss=0.4316, attn_decoder_loss=0.2785, over 28844.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1475, cr_loss=0.39, attn_decoder_loss=0.256, over 5782777.57 frames. ], batch size: 104, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:20:10,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.23 vs. limit=6.0 +2024-09-17 19:20:17,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=284140.0, ans=0.125 +2024-09-17 19:20:26,927 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:20:28,020 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.755e+01 8.635e+01 9.420e+01 9.793e+01 2.697e+02, threshold=1.884e+02, percent-clipped=2.0 +2024-09-17 19:20:31,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=284180.0, ans=0.1 +2024-09-17 19:20:46,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.98 vs. limit=15.0 +2024-09-17 19:20:56,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=284220.0, ans=0.125 +2024-09-17 19:21:06,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=284260.0, ans=0.0 +2024-09-17 19:21:13,978 INFO [train.py:1198] (0/2) Epoch 16, batch 3200, loss[loss=0.2489, ctc_loss=0.1419, cr_loss=0.3788, attn_decoder_loss=0.2524, over 29428.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1464, cr_loss=0.3886, attn_decoder_loss=0.2551, over 5793938.76 frames. ], batch size: 79, lr: 6.95e-03, grad_scale: 16.0 +2024-09-17 19:21:20,820 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.19 vs. limit=15.0 +2024-09-17 19:21:21,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=284300.0, ans=0.125 +2024-09-17 19:21:30,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=284340.0, ans=0.5 +2024-09-17 19:21:44,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=284380.0, ans=0.0 +2024-09-17 19:21:46,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=284380.0, ans=0.1 +2024-09-17 19:21:56,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=284380.0, ans=0.0 +2024-09-17 19:22:01,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=284420.0, ans=0.1 +2024-09-17 19:22:06,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=284420.0, ans=0.1 +2024-09-17 19:22:15,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=284460.0, ans=0.125 +2024-09-17 19:22:25,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=284460.0, ans=0.0 +2024-09-17 19:22:29,947 INFO [train.py:1198] (0/2) Epoch 16, batch 3250, loss[loss=0.2587, ctc_loss=0.1489, cr_loss=0.4002, attn_decoder_loss=0.262, over 29703.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1462, cr_loss=0.3889, attn_decoder_loss=0.2555, over 5800888.20 frames. ], batch size: 84, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:22:42,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=284500.0, ans=0.0 +2024-09-17 19:22:48,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=284540.0, ans=0.0 +2024-09-17 19:23:03,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.613e+01 8.619e+01 9.155e+01 9.687e+01 2.235e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-17 19:23:07,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=284580.0, ans=0.125 +2024-09-17 19:23:16,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.62 vs. limit=15.0 +2024-09-17 19:23:45,584 INFO [train.py:1198] (0/2) Epoch 16, batch 3300, loss[loss=0.2626, ctc_loss=0.1521, cr_loss=0.4003, attn_decoder_loss=0.266, over 28533.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1456, cr_loss=0.3874, attn_decoder_loss=0.2544, over 5797880.63 frames. ], batch size: 112, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:23:49,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=284700.0, ans=0.125 +2024-09-17 19:23:51,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=284700.0, ans=0.125 +2024-09-17 19:23:52,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.96 vs. limit=15.0 +2024-09-17 19:24:12,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=284740.0, ans=0.025 +2024-09-17 19:24:28,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=284780.0, ans=0.125 +2024-09-17 19:24:36,720 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.20 vs. limit=15.0 +2024-09-17 19:24:37,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=284820.0, ans=0.125 +2024-09-17 19:24:45,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.63 vs. limit=15.0 +2024-09-17 19:25:01,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=284860.0, ans=0.0 +2024-09-17 19:25:06,083 INFO [train.py:1198] (0/2) Epoch 16, batch 3350, loss[loss=0.2645, ctc_loss=0.1574, cr_loss=0.3966, attn_decoder_loss=0.2676, over 28931.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1468, cr_loss=0.3891, attn_decoder_loss=0.2553, over 5773658.95 frames. ], batch size: 104, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:25:09,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=284900.0, ans=0.04949747468305833 +2024-09-17 19:25:39,363 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.948e+01 9.628e+01 1.043e+02 1.952e+02, threshold=1.926e+02, percent-clipped=2.0 +2024-09-17 19:25:49,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.76 vs. limit=15.0 +2024-09-17 19:26:03,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=285020.0, ans=0.125 +2024-09-17 19:26:21,750 INFO [train.py:1198] (0/2) Epoch 16, batch 3400, loss[loss=0.2202, ctc_loss=0.1221, cr_loss=0.3509, attn_decoder_loss=0.2233, over 29384.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.147, cr_loss=0.3894, attn_decoder_loss=0.2553, over 5765994.03 frames. ], batch size: 67, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:26:35,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=285140.0, ans=0.1 +2024-09-17 19:26:38,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=285140.0, ans=0.05 +2024-09-17 19:26:44,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=285140.0, ans=0.0 +2024-09-17 19:26:46,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=285140.0, ans=0.125 +2024-09-17 19:27:01,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=285180.0, ans=0.125 +2024-09-17 19:27:01,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=285180.0, ans=0.07 +2024-09-17 19:27:13,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=285220.0, ans=0.0 +2024-09-17 19:27:14,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.64 vs. limit=6.0 +2024-09-17 19:27:20,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=285260.0, ans=0.125 +2024-09-17 19:27:34,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=285260.0, ans=0.125 +2024-09-17 19:27:37,294 INFO [train.py:1198] (0/2) Epoch 16, batch 3450, loss[loss=0.2738, ctc_loss=0.1597, cr_loss=0.3921, attn_decoder_loss=0.2777, over 28214.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1473, cr_loss=0.39, attn_decoder_loss=0.2558, over 5773387.09 frames. ], batch size: 111, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:27:56,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=285340.0, ans=0.125 +2024-09-17 19:28:12,412 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 9.055e+01 9.633e+01 1.034e+02 1.561e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 19:28:57,136 INFO [train.py:1198] (0/2) Epoch 16, batch 3500, loss[loss=0.2232, ctc_loss=0.122, cr_loss=0.3388, attn_decoder_loss=0.2269, over 29357.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1468, cr_loss=0.3889, attn_decoder_loss=0.2552, over 5775426.32 frames. ], batch size: 71, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:29:06,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=285500.0, ans=0.1 +2024-09-17 19:29:18,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=285540.0, ans=0.05 +2024-09-17 19:30:02,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=285660.0, ans=0.025 +2024-09-17 19:30:06,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=285660.0, ans=0.0 +2024-09-17 19:30:12,389 INFO [train.py:1198] (0/2) Epoch 16, batch 3550, loss[loss=0.2576, ctc_loss=0.149, cr_loss=0.4023, attn_decoder_loss=0.2607, over 29710.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1468, cr_loss=0.3887, attn_decoder_loss=0.2553, over 5781765.41 frames. ], batch size: 89, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:30:23,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=285700.0, ans=0.0 +2024-09-17 19:30:39,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=285740.0, ans=0.025 +2024-09-17 19:30:45,353 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.292e+01 8.552e+01 9.135e+01 9.623e+01 1.565e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-17 19:30:51,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=285780.0, ans=0.0 +2024-09-17 19:30:51,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=285780.0, ans=0.125 +2024-09-17 19:31:06,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=285820.0, ans=0.0 +2024-09-17 19:31:15,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=285860.0, ans=0.1 +2024-09-17 19:31:26,865 INFO [train.py:1198] (0/2) Epoch 16, batch 3600, loss[loss=0.2523, ctc_loss=0.1439, cr_loss=0.3603, attn_decoder_loss=0.2564, over 29481.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1466, cr_loss=0.3882, attn_decoder_loss=0.2555, over 5791055.38 frames. ], batch size: 77, lr: 6.93e-03, grad_scale: 16.0 +2024-09-17 19:31:38,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=285900.0, ans=0.125 +2024-09-17 19:31:48,186 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:31:56,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=285980.0, ans=0.125 +2024-09-17 19:32:00,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.09 vs. limit=15.0 +2024-09-17 19:32:13,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=286020.0, ans=0.0 +2024-09-17 19:32:15,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-17 19:32:40,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=286100.0, ans=0.125 +2024-09-17 19:32:41,299 INFO [train.py:1198] (0/2) Epoch 16, batch 3650, loss[loss=0.2727, ctc_loss=0.1672, cr_loss=0.4431, attn_decoder_loss=0.2746, over 29510.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.146, cr_loss=0.3874, attn_decoder_loss=0.2547, over 5793005.86 frames. ], batch size: 90, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:32:41,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=286100.0, ans=0.125 +2024-09-17 19:32:43,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=286100.0, ans=0.125 +2024-09-17 19:32:45,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=286100.0, ans=0.125 +2024-09-17 19:33:15,469 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.18 vs. limit=15.0 +2024-09-17 19:33:17,578 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.668e+01 9.269e+01 9.880e+01 1.402e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-17 19:33:17,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=286180.0, ans=0.125 +2024-09-17 19:33:19,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=286180.0, ans=0.125 +2024-09-17 19:33:23,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=286180.0, ans=0.125 +2024-09-17 19:33:40,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=286220.0, ans=0.0 +2024-09-17 19:33:57,799 INFO [train.py:1198] (0/2) Epoch 16, batch 3700, loss[loss=0.2531, ctc_loss=0.138, cr_loss=0.3819, attn_decoder_loss=0.2574, over 29700.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1454, cr_loss=0.3865, attn_decoder_loss=0.2544, over 5803462.36 frames. ], batch size: 84, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:34:10,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=286300.0, ans=0.0 +2024-09-17 19:34:21,382 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.83 vs. limit=15.0 +2024-09-17 19:34:41,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=286380.0, ans=0.025 +2024-09-17 19:34:43,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=286420.0, ans=0.0 +2024-09-17 19:35:07,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=286460.0, ans=0.125 +2024-09-17 19:35:14,229 INFO [train.py:1198] (0/2) Epoch 16, batch 3750, loss[loss=0.2148, ctc_loss=0.1153, cr_loss=0.3368, attn_decoder_loss=0.2184, over 29324.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1458, cr_loss=0.3873, attn_decoder_loss=0.2545, over 5807711.39 frames. ], batch size: 67, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:35:22,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=286500.0, ans=0.125 +2024-09-17 19:35:25,489 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.33 vs. limit=12.0 +2024-09-17 19:35:47,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=286580.0, ans=0.0 +2024-09-17 19:35:48,295 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.804e+01 8.849e+01 9.329e+01 1.007e+02 6.454e+02, threshold=1.866e+02, percent-clipped=5.0 +2024-09-17 19:35:51,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=286580.0, ans=0.0 +2024-09-17 19:36:03,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=286620.0, ans=0.125 +2024-09-17 19:36:12,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=286660.0, ans=0.1 +2024-09-17 19:36:18,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=286660.0, ans=0.125 +2024-09-17 19:36:24,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=286660.0, ans=0.025 +2024-09-17 19:36:28,745 INFO [train.py:1198] (0/2) Epoch 16, batch 3800, loss[loss=0.2662, ctc_loss=0.1513, cr_loss=0.421, attn_decoder_loss=0.2696, over 29635.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1455, cr_loss=0.3867, attn_decoder_loss=0.254, over 5798501.19 frames. ], batch size: 86, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:36:39,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=286700.0, ans=0.0 +2024-09-17 19:36:43,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=286740.0, ans=0.025 +2024-09-17 19:36:49,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=286740.0, ans=0.125 +2024-09-17 19:36:54,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=286740.0, ans=0.2 +2024-09-17 19:36:55,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=286740.0, ans=0.125 +2024-09-17 19:37:10,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=286780.0, ans=0.125 +2024-09-17 19:37:20,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=286820.0, ans=0.0 +2024-09-17 19:37:28,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=286860.0, ans=0.125 +2024-09-17 19:37:42,782 INFO [train.py:1198] (0/2) Epoch 16, batch 3850, loss[loss=0.2782, ctc_loss=0.1776, cr_loss=0.4328, attn_decoder_loss=0.2798, over 29247.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1456, cr_loss=0.3868, attn_decoder_loss=0.2542, over 5813362.39 frames. ], batch size: 100, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:37:43,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=286900.0, ans=0.125 +2024-09-17 19:37:46,814 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.21 vs. limit=22.5 +2024-09-17 19:37:48,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=286900.0, ans=0.125 +2024-09-17 19:37:48,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=286900.0, ans=0.0 +2024-09-17 19:38:16,695 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 9.163e+01 9.754e+01 1.076e+02 2.177e+02, threshold=1.951e+02, percent-clipped=1.0 +2024-09-17 19:38:26,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.69 vs. limit=15.0 +2024-09-17 19:38:39,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=287020.0, ans=0.5 +2024-09-17 19:38:56,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.61 vs. limit=22.5 +2024-09-17 19:38:58,597 INFO [train.py:1198] (0/2) Epoch 16, batch 3900, loss[loss=0.2582, ctc_loss=0.149, cr_loss=0.3795, attn_decoder_loss=0.2618, over 29631.00 frames. ], tot_loss[loss=0.2519, ctc_loss=0.1463, cr_loss=0.3882, attn_decoder_loss=0.255, over 5817229.83 frames. ], batch size: 86, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:39:09,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=287100.0, ans=0.0 +2024-09-17 19:39:36,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=287180.0, ans=0.125 +2024-09-17 19:39:49,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.78 vs. limit=10.0 +2024-09-17 19:40:03,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.30 vs. limit=15.0 +2024-09-17 19:40:07,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=287260.0, ans=0.125 +2024-09-17 19:40:14,827 INFO [train.py:1198] (0/2) Epoch 16, batch 3950, loss[loss=0.277, ctc_loss=0.1676, cr_loss=0.4295, attn_decoder_loss=0.2796, over 29444.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1464, cr_loss=0.389, attn_decoder_loss=0.2552, over 5836337.45 frames. ], batch size: 97, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:40:30,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.37 vs. limit=12.0 +2024-09-17 19:40:41,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=287340.0, ans=0.025 +2024-09-17 19:40:42,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.86 vs. limit=15.0 +2024-09-17 19:40:43,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=287380.0, ans=0.0 +2024-09-17 19:40:48,603 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.783e+01 9.413e+01 1.005e+02 2.800e+02, threshold=1.883e+02, percent-clipped=1.0 +2024-09-17 19:41:09,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=287420.0, ans=0.125 +2024-09-17 19:41:25,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=287460.0, ans=0.07 +2024-09-17 19:41:27,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=287500.0, ans=0.2 +2024-09-17 19:41:28,447 INFO [train.py:1198] (0/2) Epoch 16, batch 4000, loss[loss=0.2371, ctc_loss=0.1296, cr_loss=0.356, attn_decoder_loss=0.2411, over 29498.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1463, cr_loss=0.3882, attn_decoder_loss=0.2552, over 5813896.97 frames. ], batch size: 74, lr: 6.91e-03, grad_scale: 16.0 +2024-09-17 19:41:30,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=287500.0, ans=0.2 +2024-09-17 19:42:09,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.09 vs. limit=15.0 +2024-09-17 19:42:14,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.16 vs. limit=15.0 +2024-09-17 19:42:15,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=287620.0, ans=0.2 +2024-09-17 19:42:42,630 INFO [train.py:1198] (0/2) Epoch 16, batch 4050, loss[loss=0.283, ctc_loss=0.1954, cr_loss=0.424, attn_decoder_loss=0.2833, over 19357.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1463, cr_loss=0.388, attn_decoder_loss=0.2551, over 5796254.64 frames. ], batch size: 209, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:42:45,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=287700.0, ans=0.0 +2024-09-17 19:42:57,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.32 vs. limit=15.0 +2024-09-17 19:43:01,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.48 vs. limit=12.0 +2024-09-17 19:43:17,958 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.851e+01 8.954e+01 9.709e+01 1.044e+02 2.247e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-17 19:43:18,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=287780.0, ans=0.125 +2024-09-17 19:43:18,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.34 vs. limit=22.5 +2024-09-17 19:43:38,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=287820.0, ans=0.125 +2024-09-17 19:43:41,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=287860.0, ans=0.025 +2024-09-17 19:43:57,678 INFO [train.py:1198] (0/2) Epoch 16, batch 4100, loss[loss=0.2673, ctc_loss=0.1619, cr_loss=0.4284, attn_decoder_loss=0.2695, over 29508.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1464, cr_loss=0.3878, attn_decoder_loss=0.2549, over 5792714.26 frames. ], batch size: 90, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:44:11,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.05 vs. limit=15.0 +2024-09-17 19:44:33,217 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-72000.pt +2024-09-17 19:44:49,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=288020.0, ans=10.0 +2024-09-17 19:44:50,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=288020.0, ans=0.07 +2024-09-17 19:45:03,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.01 vs. limit=10.0 +2024-09-17 19:45:08,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=288060.0, ans=0.0 +2024-09-17 19:45:20,188 INFO [train.py:1198] (0/2) Epoch 16, batch 4150, loss[loss=0.2379, ctc_loss=0.136, cr_loss=0.3732, attn_decoder_loss=0.241, over 29498.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1464, cr_loss=0.3881, attn_decoder_loss=0.2548, over 5798203.19 frames. ], batch size: 77, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:45:32,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=288100.0, ans=0.125 +2024-09-17 19:45:54,966 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.454e+01 9.164e+01 9.745e+01 4.465e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-17 19:45:55,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=288180.0, ans=0.125 +2024-09-17 19:45:59,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=288180.0, ans=0.0 +2024-09-17 19:46:14,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=288220.0, ans=0.125 +2024-09-17 19:46:15,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.17 vs. limit=15.0 +2024-09-17 19:46:20,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=288260.0, ans=0.125 +2024-09-17 19:46:32,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=288300.0, ans=0.125 +2024-09-17 19:46:33,304 INFO [train.py:1198] (0/2) Epoch 16, batch 4200, loss[loss=0.2754, ctc_loss=0.1661, cr_loss=0.4084, attn_decoder_loss=0.2785, over 29481.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1467, cr_loss=0.3889, attn_decoder_loss=0.2553, over 5800196.64 frames. ], batch size: 90, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:46:42,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=288300.0, ans=0.125 +2024-09-17 19:46:50,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=288340.0, ans=0.2 +2024-09-17 19:46:54,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=288340.0, ans=0.2 +2024-09-17 19:46:55,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=288340.0, ans=0.125 +2024-09-17 19:46:58,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=288340.0, ans=0.125 +2024-09-17 19:47:00,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=288340.0, ans=0.125 +2024-09-17 19:47:04,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=288380.0, ans=0.2 +2024-09-17 19:47:13,287 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:47:22,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=288420.0, ans=0.025 +2024-09-17 19:47:30,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=288420.0, ans=0.0 +2024-09-17 19:47:36,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=288460.0, ans=0.07 +2024-09-17 19:47:40,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=288460.0, ans=0.125 +2024-09-17 19:47:47,733 INFO [train.py:1198] (0/2) Epoch 16, batch 4250, loss[loss=0.2369, ctc_loss=0.1292, cr_loss=0.3561, attn_decoder_loss=0.241, over 29509.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1465, cr_loss=0.389, attn_decoder_loss=0.2554, over 5806454.89 frames. ], batch size: 74, lr: 6.90e-03, grad_scale: 4.0 +2024-09-17 19:47:48,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=288500.0, ans=0.125 +2024-09-17 19:47:53,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.10 vs. limit=8.0 +2024-09-17 19:47:53,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=288500.0, ans=0.125 +2024-09-17 19:48:15,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=288580.0, ans=0.0 +2024-09-17 19:48:24,161 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.118e+01 8.844e+01 9.399e+01 1.005e+02 1.682e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 19:48:35,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=288620.0, ans=0.05 +2024-09-17 19:48:43,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=288620.0, ans=0.125 +2024-09-17 19:49:01,841 INFO [train.py:1198] (0/2) Epoch 16, batch 4300, loss[loss=0.26, ctc_loss=0.1514, cr_loss=0.4004, attn_decoder_loss=0.2632, over 29540.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1466, cr_loss=0.3893, attn_decoder_loss=0.2557, over 5795294.69 frames. ], batch size: 87, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:49:32,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.50 vs. limit=15.0 +2024-09-17 19:49:57,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=288820.0, ans=0.025 +2024-09-17 19:50:12,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=288860.0, ans=0.125 +2024-09-17 19:50:16,551 INFO [train.py:1198] (0/2) Epoch 16, batch 4350, loss[loss=0.2705, ctc_loss=0.1613, cr_loss=0.4074, attn_decoder_loss=0.2735, over 29542.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1497, cr_loss=0.3951, attn_decoder_loss=0.2591, over 5796785.43 frames. ], batch size: 97, lr: 6.89e-03, grad_scale: 8.0 +2024-09-17 19:50:25,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=288900.0, ans=0.0 +2024-09-17 19:50:30,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=288940.0, ans=0.0 +2024-09-17 19:50:34,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.93 vs. limit=22.5 +2024-09-17 19:50:53,800 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.345e+01 8.913e+01 9.427e+01 9.937e+01 2.646e+02, threshold=1.885e+02, percent-clipped=2.0 +2024-09-17 19:51:12,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=289020.0, ans=0.0 +2024-09-17 19:51:21,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=289060.0, ans=0.1 +2024-09-17 19:51:27,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=289060.0, ans=0.0 +2024-09-17 19:51:31,264 INFO [train.py:1198] (0/2) Epoch 16, batch 4400, loss[loss=0.2696, ctc_loss=0.1628, cr_loss=0.431, attn_decoder_loss=0.2719, over 27178.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1514, cr_loss=0.3972, attn_decoder_loss=0.2613, over 5767594.20 frames. ], batch size: 124, lr: 6.89e-03, grad_scale: 16.0 +2024-09-17 19:51:49,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=289140.0, ans=0.2 +2024-09-17 19:51:50,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.48 vs. limit=15.0 +2024-09-17 19:51:53,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=289140.0, ans=0.2 +2024-09-17 19:52:04,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.93 vs. limit=22.5 +2024-09-17 19:52:25,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=5.93 vs. limit=12.0 +2024-09-17 19:52:33,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=289260.0, ans=0.0 +2024-09-17 19:52:44,526 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:52:45,507 INFO [train.py:1198] (0/2) Epoch 16, batch 4450, loss[loss=0.2774, ctc_loss=0.1871, cr_loss=0.4243, attn_decoder_loss=0.278, over 20105.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1562, cr_loss=0.4015, attn_decoder_loss=0.2638, over 5576562.96 frames. ], batch size: 209, lr: 6.89e-03, grad_scale: 4.0 +2024-09-17 19:53:05,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=289340.0, ans=0.0 +2024-09-17 19:53:16,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=9.81 vs. limit=12.0 +2024-09-17 19:53:25,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=289380.0, ans=0.0 +2024-09-17 19:53:26,477 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.172e+01 9.461e+01 1.058e+02 1.169e+02 3.185e+02, threshold=2.116e+02, percent-clipped=2.0 +2024-09-17 19:53:59,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.56 vs. limit=10.0 +2024-09-17 19:54:01,252 INFO [train.py:1198] (0/2) Epoch 16, batch 4500, loss[loss=0.2798, ctc_loss=0.1902, cr_loss=0.4156, attn_decoder_loss=0.2805, over 20427.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.1615, cr_loss=0.404, attn_decoder_loss=0.2666, over 5237425.48 frames. ], batch size: 209, lr: 6.89e-03, grad_scale: 8.0 +2024-09-17 19:54:03,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.38 vs. limit=15.0 +2024-09-17 19:54:23,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=289540.0, ans=0.2 +2024-09-17 19:54:38,429 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-16.pt +2024-09-17 19:55:24,188 INFO [train.py:1198] (0/2) Epoch 17, batch 0, loss[loss=0.2367, ctc_loss=0.1266, cr_loss=0.3529, attn_decoder_loss=0.2411, over 29605.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1266, cr_loss=0.3529, attn_decoder_loss=0.2411, over 29605.00 frames. ], batch size: 73, lr: 6.68e-03, grad_scale: 16.0 +2024-09-17 19:55:24,189 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 19:55:42,761 INFO [train.py:1230] (0/2) Epoch 17, validation: loss=0.2133, ctc_loss=0.04137, cr_loss=4.881e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-17 19:55:42,762 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 19:55:43,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.96 vs. limit=15.0 +2024-09-17 19:55:50,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=289600.0, ans=0.0 +2024-09-17 19:55:53,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=289600.0, ans=0.05 +2024-09-17 19:55:59,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=289640.0, ans=0.125 +2024-09-17 19:56:01,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=289640.0, ans=0.0 +2024-09-17 19:56:01,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=289640.0, ans=0.0 +2024-09-17 19:56:17,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=289680.0, ans=0.125 +2024-09-17 19:56:29,716 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:56:31,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=289720.0, ans=0.1 +2024-09-17 19:56:33,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.39 vs. limit=22.5 +2024-09-17 19:56:57,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=289760.0, ans=0.125 +2024-09-17 19:56:58,418 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.48 vs. limit=22.5 +2024-09-17 19:57:00,372 INFO [train.py:1198] (0/2) Epoch 17, batch 50, loss[loss=0.2295, ctc_loss=0.1296, cr_loss=0.3507, attn_decoder_loss=0.2328, over 29411.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.1479, cr_loss=0.3932, attn_decoder_loss=0.2566, over 1266247.01 frames. ], batch size: 70, lr: 6.68e-03, grad_scale: 8.0 +2024-09-17 19:57:05,050 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.027e+01 9.620e+01 1.078e+02 1.162e+02 4.794e+02, threshold=2.156e+02, percent-clipped=2.0 +2024-09-17 19:57:06,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=289800.0, ans=0.09899494936611666 +2024-09-17 19:57:42,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.48 vs. limit=15.0 +2024-09-17 19:58:10,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=289960.0, ans=15.0 +2024-09-17 19:58:18,302 INFO [train.py:1198] (0/2) Epoch 17, batch 100, loss[loss=0.238, ctc_loss=0.1339, cr_loss=0.3767, attn_decoder_loss=0.2412, over 29534.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1481, cr_loss=0.3922, attn_decoder_loss=0.2576, over 2251685.56 frames. ], batch size: 76, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 19:58:23,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.78 vs. limit=10.0 +2024-09-17 19:58:38,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=290040.0, ans=0.125 +2024-09-17 19:58:45,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.41 vs. limit=15.0 +2024-09-17 19:58:52,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=290080.0, ans=0.125 +2024-09-17 19:59:01,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=290120.0, ans=0.035 +2024-09-17 19:59:04,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.68 vs. limit=15.0 +2024-09-17 19:59:10,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=290120.0, ans=0.125 +2024-09-17 19:59:12,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=290120.0, ans=0.125 +2024-09-17 19:59:14,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=290120.0, ans=0.125 +2024-09-17 19:59:15,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=290120.0, ans=0.015 +2024-09-17 19:59:19,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=290160.0, ans=0.125 +2024-09-17 19:59:32,868 INFO [train.py:1198] (0/2) Epoch 17, batch 150, loss[loss=0.2281, ctc_loss=0.1236, cr_loss=0.3459, attn_decoder_loss=0.232, over 29425.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1454, cr_loss=0.3874, attn_decoder_loss=0.2548, over 3046282.09 frames. ], batch size: 70, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 19:59:36,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=290200.0, ans=0.1 +2024-09-17 19:59:37,318 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 8.871e+01 9.281e+01 1.009e+02 2.332e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-17 19:59:54,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=290240.0, ans=0.1 +2024-09-17 19:59:55,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=290240.0, ans=0.0 +2024-09-17 20:00:20,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.76 vs. limit=10.0 +2024-09-17 20:00:30,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=290320.0, ans=0.025 +2024-09-17 20:00:47,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=290360.0, ans=0.0 +2024-09-17 20:00:50,789 INFO [train.py:1198] (0/2) Epoch 17, batch 200, loss[loss=0.2642, ctc_loss=0.1583, cr_loss=0.3874, attn_decoder_loss=0.2674, over 27204.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1453, cr_loss=0.3871, attn_decoder_loss=0.2543, over 3657952.42 frames. ], batch size: 124, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 20:01:00,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=290400.0, ans=0.09899494936611666 +2024-09-17 20:01:14,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=290440.0, ans=0.125 +2024-09-17 20:01:33,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=290480.0, ans=0.125 +2024-09-17 20:01:42,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=290520.0, ans=0.1 +2024-09-17 20:01:50,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-17 20:02:09,186 INFO [train.py:1198] (0/2) Epoch 17, batch 250, loss[loss=0.2673, ctc_loss=0.1571, cr_loss=0.4192, attn_decoder_loss=0.2703, over 29289.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1454, cr_loss=0.3876, attn_decoder_loss=0.2547, over 4139569.96 frames. ], batch size: 100, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 20:02:11,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-17 20:02:13,832 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.681e+01 8.517e+01 9.040e+01 9.817e+01 1.381e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-17 20:02:14,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=290600.0, ans=0.125 +2024-09-17 20:02:20,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=290600.0, ans=0.125 +2024-09-17 20:02:27,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=290640.0, ans=10.0 +2024-09-17 20:02:27,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=290640.0, ans=0.1 +2024-09-17 20:02:27,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=290640.0, ans=10.0 +2024-09-17 20:02:35,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=290640.0, ans=0.09899494936611666 +2024-09-17 20:02:35,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=290640.0, ans=0.0 +2024-09-17 20:02:44,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-17 20:02:53,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=290720.0, ans=0.0 +2024-09-17 20:02:59,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=290720.0, ans=0.125 +2024-09-17 20:03:01,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.63 vs. limit=22.5 +2024-09-17 20:03:19,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.44 vs. limit=22.5 +2024-09-17 20:03:24,667 INFO [train.py:1198] (0/2) Epoch 17, batch 300, loss[loss=0.2681, ctc_loss=0.1588, cr_loss=0.4063, attn_decoder_loss=0.2712, over 29563.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1446, cr_loss=0.3865, attn_decoder_loss=0.2539, over 4508307.28 frames. ], batch size: 92, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:03:37,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=290800.0, ans=0.125 +2024-09-17 20:03:46,643 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.48 vs. limit=6.0 +2024-09-17 20:04:04,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=290880.0, ans=0.1 +2024-09-17 20:04:07,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=290880.0, ans=0.125 +2024-09-17 20:04:19,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=290920.0, ans=0.025 +2024-09-17 20:04:42,400 INFO [train.py:1198] (0/2) Epoch 17, batch 350, loss[loss=0.2211, ctc_loss=0.1236, cr_loss=0.3476, attn_decoder_loss=0.2242, over 29345.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.145, cr_loss=0.3877, attn_decoder_loss=0.2547, over 4794779.12 frames. ], batch size: 71, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:04:46,785 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.070e+01 8.690e+01 9.264e+01 9.789e+01 1.817e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-17 20:04:48,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=291000.0, ans=0.0 +2024-09-17 20:05:02,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=291040.0, ans=0.0 +2024-09-17 20:05:09,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=291040.0, ans=0.0 +2024-09-17 20:05:15,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=291080.0, ans=0.2 +2024-09-17 20:05:33,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=291120.0, ans=0.1 +2024-09-17 20:05:40,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.43 vs. limit=15.0 +2024-09-17 20:05:53,152 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.50 vs. limit=15.0 +2024-09-17 20:05:58,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=291200.0, ans=0.1 +2024-09-17 20:06:00,102 INFO [train.py:1198] (0/2) Epoch 17, batch 400, loss[loss=0.2562, ctc_loss=0.1478, cr_loss=0.3908, attn_decoder_loss=0.2596, over 29678.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1446, cr_loss=0.3868, attn_decoder_loss=0.2545, over 5024044.36 frames. ], batch size: 82, lr: 6.66e-03, grad_scale: 16.0 +2024-09-17 20:06:37,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.whiten.whitening_limit, batch_count=291280.0, ans=12.0 +2024-09-17 20:06:45,851 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:06:47,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=291320.0, ans=0.125 +2024-09-17 20:07:08,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=291360.0, ans=0.0 +2024-09-17 20:07:15,622 INFO [train.py:1198] (0/2) Epoch 17, batch 450, loss[loss=0.2702, ctc_loss=0.1646, cr_loss=0.4285, attn_decoder_loss=0.2724, over 29700.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1447, cr_loss=0.3868, attn_decoder_loss=0.2544, over 5186332.54 frames. ], batch size: 83, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:07:21,595 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.042e+01 8.659e+01 9.188e+01 9.784e+01 2.602e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-17 20:07:28,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=291400.0, ans=0.2 +2024-09-17 20:07:29,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.86 vs. limit=10.0 +2024-09-17 20:07:42,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.01 vs. limit=15.0 +2024-09-17 20:07:47,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=291480.0, ans=0.0 +2024-09-17 20:08:19,966 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:08:33,722 INFO [train.py:1198] (0/2) Epoch 17, batch 500, loss[loss=0.2715, ctc_loss=0.1627, cr_loss=0.4325, attn_decoder_loss=0.274, over 29447.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1444, cr_loss=0.3871, attn_decoder_loss=0.2539, over 5329065.67 frames. ], batch size: 94, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:08:44,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=291600.0, ans=0.0 +2024-09-17 20:08:45,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.41 vs. limit=22.5 +2024-09-17 20:09:17,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=291720.0, ans=0.1 +2024-09-17 20:09:19,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=291720.0, ans=0.2 +2024-09-17 20:09:36,438 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.97 vs. limit=15.0 +2024-09-17 20:09:40,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=291760.0, ans=0.0 +2024-09-17 20:09:40,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=291760.0, ans=0.125 +2024-09-17 20:09:51,608 INFO [train.py:1198] (0/2) Epoch 17, batch 550, loss[loss=0.2636, ctc_loss=0.15, cr_loss=0.4055, attn_decoder_loss=0.2672, over 28803.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1446, cr_loss=0.3867, attn_decoder_loss=0.254, over 5423617.86 frames. ], batch size: 104, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:09:57,725 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.623e+01 9.075e+01 9.597e+01 1.052e+02 1.735e+02, threshold=1.919e+02, percent-clipped=0.0 +2024-09-17 20:09:58,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=291800.0, ans=0.0 +2024-09-17 20:10:20,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.52 vs. limit=12.0 +2024-09-17 20:10:20,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=291880.0, ans=0.0 +2024-09-17 20:10:26,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=291880.0, ans=0.0 +2024-09-17 20:10:30,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=291880.0, ans=0.0 +2024-09-17 20:10:49,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=291920.0, ans=0.1 +2024-09-17 20:10:56,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=291960.0, ans=0.0 +2024-09-17 20:11:08,250 INFO [train.py:1198] (0/2) Epoch 17, batch 600, loss[loss=0.261, ctc_loss=0.1457, cr_loss=0.3917, attn_decoder_loss=0.2651, over 29320.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.145, cr_loss=0.3878, attn_decoder_loss=0.2547, over 5509737.78 frames. ], batch size: 100, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:11:08,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=292000.0, ans=0.025 +2024-09-17 20:11:17,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=292000.0, ans=0.125 +2024-09-17 20:11:25,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=292040.0, ans=0.2 +2024-09-17 20:11:30,286 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.81 vs. limit=15.0 +2024-09-17 20:11:45,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=292080.0, ans=0.2 +2024-09-17 20:11:56,081 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:11:57,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=292120.0, ans=0.07 +2024-09-17 20:12:11,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=292160.0, ans=0.125 +2024-09-17 20:12:15,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.93 vs. limit=15.0 +2024-09-17 20:12:23,226 INFO [train.py:1198] (0/2) Epoch 17, batch 650, loss[loss=0.257, ctc_loss=0.1421, cr_loss=0.4108, attn_decoder_loss=0.2607, over 29768.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.144, cr_loss=0.3859, attn_decoder_loss=0.2538, over 5586661.30 frames. ], batch size: 81, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:12:29,211 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.569e+01 9.101e+01 9.967e+01 2.303e+02, threshold=1.820e+02, percent-clipped=2.0 +2024-09-17 20:12:34,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=292200.0, ans=0.125 +2024-09-17 20:12:57,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=292280.0, ans=0.025 +2024-09-17 20:12:58,179 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.58 vs. limit=22.5 +2024-09-17 20:13:17,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=292320.0, ans=0.0 +2024-09-17 20:13:18,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=292320.0, ans=0.1 +2024-09-17 20:13:19,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=292320.0, ans=0.125 +2024-09-17 20:13:21,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=292320.0, ans=0.125 +2024-09-17 20:13:24,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=292360.0, ans=0.0 +2024-09-17 20:13:30,034 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.20 vs. limit=15.0 +2024-09-17 20:13:43,857 INFO [train.py:1198] (0/2) Epoch 17, batch 700, loss[loss=0.2393, ctc_loss=0.1393, cr_loss=0.3805, attn_decoder_loss=0.242, over 29545.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1449, cr_loss=0.3876, attn_decoder_loss=0.2547, over 5637985.84 frames. ], batch size: 76, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:13:48,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=292400.0, ans=0.025 +2024-09-17 20:14:06,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=292440.0, ans=0.025 +2024-09-17 20:14:13,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.38 vs. limit=6.0 +2024-09-17 20:14:17,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=292480.0, ans=0.125 +2024-09-17 20:14:29,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=292520.0, ans=0.125 +2024-09-17 20:14:40,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=292520.0, ans=0.0 +2024-09-17 20:14:58,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=292600.0, ans=0.0 +2024-09-17 20:14:59,468 INFO [train.py:1198] (0/2) Epoch 17, batch 750, loss[loss=0.25, ctc_loss=0.1421, cr_loss=0.3897, attn_decoder_loss=0.2533, over 29710.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1449, cr_loss=0.3874, attn_decoder_loss=0.2542, over 5676910.20 frames. ], batch size: 82, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:15:05,330 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.641e+01 8.913e+01 9.464e+01 1.024e+02 2.439e+02, threshold=1.893e+02, percent-clipped=2.0 +2024-09-17 20:15:05,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=292600.0, ans=0.0 +2024-09-17 20:15:14,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=292640.0, ans=0.0 +2024-09-17 20:15:24,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.11 vs. limit=15.0 +2024-09-17 20:15:25,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=292640.0, ans=0.125 +2024-09-17 20:15:36,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.66 vs. limit=22.5 +2024-09-17 20:15:49,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=292720.0, ans=0.125 +2024-09-17 20:15:57,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.18 vs. limit=15.0 +2024-09-17 20:16:07,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=292760.0, ans=0.0 +2024-09-17 20:16:15,492 INFO [train.py:1198] (0/2) Epoch 17, batch 800, loss[loss=0.2232, ctc_loss=0.1176, cr_loss=0.3294, attn_decoder_loss=0.2276, over 29606.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1448, cr_loss=0.3874, attn_decoder_loss=0.2538, over 5707700.67 frames. ], batch size: 73, lr: 6.64e-03, grad_scale: 16.0 +2024-09-17 20:16:18,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=292800.0, ans=0.0 +2024-09-17 20:16:44,305 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.25 vs. limit=10.0 +2024-09-17 20:17:07,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=292920.0, ans=0.125 +2024-09-17 20:17:14,465 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.70 vs. limit=15.0 +2024-09-17 20:17:17,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=292960.0, ans=0.125 +2024-09-17 20:17:33,057 INFO [train.py:1198] (0/2) Epoch 17, batch 850, loss[loss=0.2586, ctc_loss=0.1485, cr_loss=0.3985, attn_decoder_loss=0.2619, over 29710.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1441, cr_loss=0.3859, attn_decoder_loss=0.2534, over 5737157.47 frames. ], batch size: 89, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:17:33,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=293000.0, ans=0.2 +2024-09-17 20:17:42,755 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.757e+01 8.745e+01 9.386e+01 1.018e+02 1.977e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 20:17:52,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.48 vs. limit=10.0 +2024-09-17 20:17:58,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=293040.0, ans=0.2 +2024-09-17 20:18:16,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=293080.0, ans=0.0 +2024-09-17 20:18:27,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=293120.0, ans=0.1 +2024-09-17 20:18:30,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=293120.0, ans=0.07 +2024-09-17 20:18:46,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=293160.0, ans=0.2 +2024-09-17 20:18:47,326 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.66 vs. limit=12.0 +2024-09-17 20:18:51,106 INFO [train.py:1198] (0/2) Epoch 17, batch 900, loss[loss=0.2208, ctc_loss=0.1142, cr_loss=0.3311, attn_decoder_loss=0.2253, over 29633.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1445, cr_loss=0.3861, attn_decoder_loss=0.2536, over 5739813.89 frames. ], batch size: 73, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:19:00,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=293200.0, ans=0.015 +2024-09-17 20:19:14,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=293240.0, ans=0.0 +2024-09-17 20:19:15,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=293240.0, ans=0.125 +2024-09-17 20:19:52,697 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.31 vs. limit=22.5 +2024-09-17 20:19:59,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=293360.0, ans=0.125 +2024-09-17 20:20:06,843 INFO [train.py:1198] (0/2) Epoch 17, batch 950, loss[loss=0.2352, ctc_loss=0.1279, cr_loss=0.3525, attn_decoder_loss=0.2393, over 29507.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1451, cr_loss=0.387, attn_decoder_loss=0.2539, over 5741936.81 frames. ], batch size: 74, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:20:14,254 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.889e+01 9.768e+01 1.117e+02 1.855e+02, threshold=1.954e+02, percent-clipped=0.0 +2024-09-17 20:20:14,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=293400.0, ans=0.0 +2024-09-17 20:20:32,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.96 vs. limit=15.0 +2024-09-17 20:21:14,161 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:21:26,829 INFO [train.py:1198] (0/2) Epoch 17, batch 1000, loss[loss=0.2437, ctc_loss=0.1376, cr_loss=0.384, attn_decoder_loss=0.2469, over 29517.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1459, cr_loss=0.3882, attn_decoder_loss=0.2548, over 5735828.37 frames. ], batch size: 77, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:21:28,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=293600.0, ans=0.0 +2024-09-17 20:21:45,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=293640.0, ans=0.125 +2024-09-17 20:21:59,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=293680.0, ans=0.0 +2024-09-17 20:22:04,830 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.62 vs. limit=15.0 +2024-09-17 20:22:10,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=293680.0, ans=0.2 +2024-09-17 20:22:21,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=293720.0, ans=0.1 +2024-09-17 20:22:22,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=293720.0, ans=0.0 +2024-09-17 20:22:34,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=293760.0, ans=0.0 +2024-09-17 20:22:35,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=293760.0, ans=0.0 +2024-09-17 20:22:42,665 INFO [train.py:1198] (0/2) Epoch 17, batch 1050, loss[loss=0.2505, ctc_loss=0.1366, cr_loss=0.3657, attn_decoder_loss=0.255, over 29697.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1447, cr_loss=0.3863, attn_decoder_loss=0.2537, over 5744695.28 frames. ], batch size: 85, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:22:44,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=293800.0, ans=0.0 +2024-09-17 20:22:50,128 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.852e+01 9.385e+01 1.035e+02 1.958e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 20:23:01,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=293840.0, ans=0.0 +2024-09-17 20:23:25,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=293880.0, ans=0.1 +2024-09-17 20:23:40,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=293920.0, ans=0.0 +2024-09-17 20:23:42,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=12.0 +2024-09-17 20:23:43,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=293960.0, ans=0.0 +2024-09-17 20:23:58,419 INFO [train.py:1198] (0/2) Epoch 17, batch 1100, loss[loss=0.2414, ctc_loss=0.1341, cr_loss=0.3684, attn_decoder_loss=0.2451, over 29458.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1447, cr_loss=0.3868, attn_decoder_loss=0.2541, over 5757298.26 frames. ], batch size: 78, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:24:12,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=294040.0, ans=0.125 +2024-09-17 20:24:13,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=294040.0, ans=0.125 +2024-09-17 20:24:28,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=294080.0, ans=0.125 +2024-09-17 20:24:43,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=294080.0, ans=0.125 +2024-09-17 20:24:58,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=294120.0, ans=0.1 +2024-09-17 20:25:01,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=294160.0, ans=0.1 +2024-09-17 20:25:10,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=294160.0, ans=0.025 +2024-09-17 20:25:18,662 INFO [train.py:1198] (0/2) Epoch 17, batch 1150, loss[loss=0.2391, ctc_loss=0.1368, cr_loss=0.3774, attn_decoder_loss=0.242, over 29432.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1443, cr_loss=0.3861, attn_decoder_loss=0.2537, over 5755154.96 frames. ], batch size: 78, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:25:26,292 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.857e+01 8.746e+01 9.258e+01 9.833e+01 4.199e+02, threshold=1.852e+02, percent-clipped=3.0 +2024-09-17 20:25:38,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=294240.0, ans=0.125 +2024-09-17 20:25:45,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=294240.0, ans=0.125 +2024-09-17 20:26:09,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=294320.0, ans=0.2 +2024-09-17 20:26:34,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.88 vs. limit=15.0 +2024-09-17 20:26:34,875 INFO [train.py:1198] (0/2) Epoch 17, batch 1200, loss[loss=0.2584, ctc_loss=0.1382, cr_loss=0.3832, attn_decoder_loss=0.2632, over 29670.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1449, cr_loss=0.3872, attn_decoder_loss=0.2546, over 5748065.58 frames. ], batch size: 85, lr: 6.62e-03, grad_scale: 16.0 +2024-09-17 20:26:39,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=294400.0, ans=0.0 +2024-09-17 20:26:42,909 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:26:48,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=294440.0, ans=0.125 +2024-09-17 20:26:50,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=294440.0, ans=0.0 +2024-09-17 20:27:20,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=294520.0, ans=0.125 +2024-09-17 20:27:32,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.72 vs. limit=15.0 +2024-09-17 20:27:36,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.22 vs. limit=15.0 +2024-09-17 20:27:50,825 INFO [train.py:1198] (0/2) Epoch 17, batch 1250, loss[loss=0.2635, ctc_loss=0.1556, cr_loss=0.4229, attn_decoder_loss=0.266, over 29484.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1451, cr_loss=0.3885, attn_decoder_loss=0.2553, over 5775309.35 frames. ], batch size: 92, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:27:54,560 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.42 vs. limit=15.0 +2024-09-17 20:27:59,838 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 8.886e+01 9.388e+01 9.868e+01 1.541e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 20:28:06,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=294640.0, ans=0.125 +2024-09-17 20:28:19,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=294680.0, ans=0.125 +2024-09-17 20:28:19,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=294680.0, ans=0.0 +2024-09-17 20:28:32,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=294680.0, ans=0.0 +2024-09-17 20:29:01,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=294760.0, ans=0.2 +2024-09-17 20:29:01,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=294760.0, ans=0.0 +2024-09-17 20:29:07,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=294760.0, ans=0.125 +2024-09-17 20:29:10,540 INFO [train.py:1198] (0/2) Epoch 17, batch 1300, loss[loss=0.2618, ctc_loss=0.1501, cr_loss=0.3928, attn_decoder_loss=0.2655, over 28248.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1445, cr_loss=0.3867, attn_decoder_loss=0.2545, over 5779838.36 frames. ], batch size: 111, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:29:12,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=294800.0, ans=0.1 +2024-09-17 20:29:19,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.80 vs. limit=15.0 +2024-09-17 20:29:31,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=294840.0, ans=0.1 +2024-09-17 20:29:41,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=294880.0, ans=0.125 +2024-09-17 20:29:50,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=294880.0, ans=0.125 +2024-09-17 20:29:50,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=294880.0, ans=0.125 +2024-09-17 20:30:11,255 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:30:17,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=294960.0, ans=0.125 +2024-09-17 20:30:24,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-17 20:30:26,392 INFO [train.py:1198] (0/2) Epoch 17, batch 1350, loss[loss=0.2597, ctc_loss=0.1498, cr_loss=0.3808, attn_decoder_loss=0.2634, over 29762.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.144, cr_loss=0.3862, attn_decoder_loss=0.254, over 5798142.37 frames. ], batch size: 81, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:30:26,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=295000.0, ans=0.125 +2024-09-17 20:30:29,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=295000.0, ans=0.125 +2024-09-17 20:30:35,309 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.707e+01 9.188e+01 9.676e+01 1.559e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-17 20:30:43,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=295040.0, ans=0.125 +2024-09-17 20:30:43,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=295040.0, ans=0.125 +2024-09-17 20:30:49,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=295040.0, ans=0.1 +2024-09-17 20:30:53,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=295040.0, ans=0.0 +2024-09-17 20:30:54,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=295080.0, ans=0.2 +2024-09-17 20:31:02,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=295080.0, ans=0.125 +2024-09-17 20:31:15,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.55 vs. limit=22.5 +2024-09-17 20:31:31,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=295160.0, ans=0.2 +2024-09-17 20:31:35,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=295160.0, ans=0.0 +2024-09-17 20:31:35,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=295160.0, ans=0.1 +2024-09-17 20:31:41,837 INFO [train.py:1198] (0/2) Epoch 17, batch 1400, loss[loss=0.229, ctc_loss=0.1287, cr_loss=0.3675, attn_decoder_loss=0.232, over 29600.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1437, cr_loss=0.3855, attn_decoder_loss=0.2538, over 5809301.54 frames. ], batch size: 69, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:32:03,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=295240.0, ans=0.125 +2024-09-17 20:32:28,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.20 vs. limit=10.0 +2024-09-17 20:32:43,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.23 vs. limit=15.0 +2024-09-17 20:32:55,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.68 vs. limit=15.0 +2024-09-17 20:33:01,972 INFO [train.py:1198] (0/2) Epoch 17, batch 1450, loss[loss=0.2592, ctc_loss=0.1448, cr_loss=0.3958, attn_decoder_loss=0.2632, over 29394.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1438, cr_loss=0.3858, attn_decoder_loss=0.2543, over 5805868.02 frames. ], batch size: 94, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:33:10,927 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.631e+01 9.209e+01 9.989e+01 1.746e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-17 20:33:18,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.29 vs. limit=15.0 +2024-09-17 20:33:49,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=295520.0, ans=0.1 +2024-09-17 20:34:00,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=295560.0, ans=0.125 +2024-09-17 20:34:08,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=295560.0, ans=0.125 +2024-09-17 20:34:17,470 INFO [train.py:1198] (0/2) Epoch 17, batch 1500, loss[loss=0.2618, ctc_loss=0.1466, cr_loss=0.3878, attn_decoder_loss=0.266, over 29637.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1437, cr_loss=0.3863, attn_decoder_loss=0.2545, over 5807127.13 frames. ], batch size: 86, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:34:25,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=295600.0, ans=0.125 +2024-09-17 20:34:43,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=295640.0, ans=0.025 +2024-09-17 20:34:55,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=295680.0, ans=0.0 +2024-09-17 20:34:56,472 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.22 vs. limit=15.0 +2024-09-17 20:34:57,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=295680.0, ans=0.5 +2024-09-17 20:35:07,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.96 vs. limit=15.0 +2024-09-17 20:35:14,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=295720.0, ans=15.0 +2024-09-17 20:35:27,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=295760.0, ans=0.125 +2024-09-17 20:35:28,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.01 vs. limit=15.0 +2024-09-17 20:35:32,364 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:35:33,484 INFO [train.py:1198] (0/2) Epoch 17, batch 1550, loss[loss=0.2579, ctc_loss=0.149, cr_loss=0.3931, attn_decoder_loss=0.2612, over 29513.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1439, cr_loss=0.3858, attn_decoder_loss=0.2544, over 5782081.95 frames. ], batch size: 90, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:35:42,525 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.733e+01 9.019e+01 9.707e+01 1.076e+02 7.268e+02, threshold=1.941e+02, percent-clipped=2.0 +2024-09-17 20:35:44,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=295800.0, ans=0.125 +2024-09-17 20:35:50,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.29 vs. limit=12.0 +2024-09-17 20:36:02,646 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.23 vs. limit=22.5 +2024-09-17 20:36:43,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=295960.0, ans=0.0 +2024-09-17 20:36:43,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=295960.0, ans=0.05 +2024-09-17 20:36:53,524 INFO [train.py:1198] (0/2) Epoch 17, batch 1600, loss[loss=0.2557, ctc_loss=0.1435, cr_loss=0.3949, attn_decoder_loss=0.2594, over 29682.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1449, cr_loss=0.387, attn_decoder_loss=0.2548, over 5763617.13 frames. ], batch size: 85, lr: 6.61e-03, grad_scale: 16.0 +2024-09-17 20:37:43,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=296120.0, ans=0.09899494936611666 +2024-09-17 20:37:51,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=296120.0, ans=10.0 +2024-09-17 20:37:51,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=296120.0, ans=0.125 +2024-09-17 20:38:08,991 INFO [train.py:1198] (0/2) Epoch 17, batch 1650, loss[loss=0.2647, ctc_loss=0.151, cr_loss=0.4132, attn_decoder_loss=0.2681, over 29708.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1441, cr_loss=0.3859, attn_decoder_loss=0.2544, over 5757173.09 frames. ], batch size: 89, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:38:19,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.329e+01 8.617e+01 9.352e+01 1.025e+02 5.265e+02, threshold=1.870e+02, percent-clipped=3.0 +2024-09-17 20:38:22,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.71 vs. limit=15.0 +2024-09-17 20:38:29,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=296240.0, ans=0.025 +2024-09-17 20:39:24,821 INFO [train.py:1198] (0/2) Epoch 17, batch 1700, loss[loss=0.2251, ctc_loss=0.1258, cr_loss=0.3556, attn_decoder_loss=0.2282, over 29591.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1435, cr_loss=0.3849, attn_decoder_loss=0.2538, over 5779372.50 frames. ], batch size: 69, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:39:46,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=296440.0, ans=0.125 +2024-09-17 20:39:55,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=296480.0, ans=0.2 +2024-09-17 20:40:07,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=296480.0, ans=0.0 +2024-09-17 20:40:25,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=296560.0, ans=0.0 +2024-09-17 20:40:27,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=296560.0, ans=0.125 +2024-09-17 20:40:31,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.99 vs. limit=15.0 +2024-09-17 20:40:33,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=296560.0, ans=0.1 +2024-09-17 20:40:35,468 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.85 vs. limit=12.0 +2024-09-17 20:40:44,386 INFO [train.py:1198] (0/2) Epoch 17, batch 1750, loss[loss=0.2244, ctc_loss=0.1243, cr_loss=0.3519, attn_decoder_loss=0.2277, over 29368.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1436, cr_loss=0.3853, attn_decoder_loss=0.2537, over 5787502.18 frames. ], batch size: 67, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:40:54,994 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.563e+01 9.059e+01 9.719e+01 2.142e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-17 20:41:04,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=296640.0, ans=0.2 +2024-09-17 20:41:13,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=296680.0, ans=0.0 +2024-09-17 20:41:16,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=296680.0, ans=0.125 +2024-09-17 20:41:21,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.54 vs. limit=15.0 +2024-09-17 20:41:26,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=296680.0, ans=0.025 +2024-09-17 20:41:29,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=296720.0, ans=0.125 +2024-09-17 20:41:35,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=296720.0, ans=0.125 +2024-09-17 20:41:40,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=296720.0, ans=0.0 +2024-09-17 20:41:46,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=296760.0, ans=0.02 +2024-09-17 20:41:49,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=296760.0, ans=0.0 +2024-09-17 20:42:00,034 INFO [train.py:1198] (0/2) Epoch 17, batch 1800, loss[loss=0.2656, ctc_loss=0.1542, cr_loss=0.414, attn_decoder_loss=0.2688, over 29715.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1439, cr_loss=0.3862, attn_decoder_loss=0.2539, over 5791716.06 frames. ], batch size: 83, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:42:11,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=296800.0, ans=0.125 +2024-09-17 20:42:29,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=296880.0, ans=0.125 +2024-09-17 20:42:30,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=296880.0, ans=0.125 +2024-09-17 20:42:54,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.66 vs. limit=15.0 +2024-09-17 20:43:11,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=296960.0, ans=0.125 +2024-09-17 20:43:16,002 INFO [train.py:1198] (0/2) Epoch 17, batch 1850, loss[loss=0.2499, ctc_loss=0.1376, cr_loss=0.3883, attn_decoder_loss=0.2538, over 29608.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1432, cr_loss=0.3852, attn_decoder_loss=0.2533, over 5796413.63 frames. ], batch size: 86, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:43:24,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.75 vs. limit=12.0 +2024-09-17 20:43:26,337 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.783e+01 8.992e+01 9.506e+01 1.016e+02 2.077e+02, threshold=1.901e+02, percent-clipped=1.0 +2024-09-17 20:43:27,507 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.44 vs. limit=15.0 +2024-09-17 20:43:40,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=297040.0, ans=0.0 +2024-09-17 20:43:48,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=297080.0, ans=0.1 +2024-09-17 20:43:49,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=297080.0, ans=0.2 +2024-09-17 20:44:03,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.10 vs. limit=15.0 +2024-09-17 20:44:16,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=297120.0, ans=0.125 +2024-09-17 20:44:16,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.19 vs. limit=15.0 +2024-09-17 20:44:26,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=297160.0, ans=0.125 +2024-09-17 20:44:27,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=297160.0, ans=0.125 +2024-09-17 20:44:27,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=297160.0, ans=0.1 +2024-09-17 20:44:31,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.09 vs. limit=15.0 +2024-09-17 20:44:35,820 INFO [train.py:1198] (0/2) Epoch 17, batch 1900, loss[loss=0.2672, ctc_loss=0.161, cr_loss=0.4189, attn_decoder_loss=0.2697, over 29701.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1443, cr_loss=0.387, attn_decoder_loss=0.2546, over 5805065.92 frames. ], batch size: 89, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:44:49,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=297240.0, ans=0.0 +2024-09-17 20:45:23,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=297320.0, ans=0.125 +2024-09-17 20:45:33,355 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.79 vs. limit=15.0 +2024-09-17 20:45:49,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=297360.0, ans=0.125 +2024-09-17 20:45:50,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=297400.0, ans=0.125 +2024-09-17 20:45:52,068 INFO [train.py:1198] (0/2) Epoch 17, batch 1950, loss[loss=0.2489, ctc_loss=0.151, cr_loss=0.3893, attn_decoder_loss=0.2511, over 29441.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1447, cr_loss=0.388, attn_decoder_loss=0.2556, over 5819226.68 frames. ], batch size: 78, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:46:02,795 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.875e+01 9.464e+01 9.894e+01 2.247e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-17 20:46:16,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=297440.0, ans=0.125 +2024-09-17 20:46:25,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=297480.0, ans=0.1 +2024-09-17 20:46:33,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=297480.0, ans=0.0 +2024-09-17 20:47:00,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=297560.0, ans=0.125 +2024-09-17 20:47:08,298 INFO [train.py:1198] (0/2) Epoch 17, batch 2000, loss[loss=0.2225, ctc_loss=0.1232, cr_loss=0.3587, attn_decoder_loss=0.2255, over 29335.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1453, cr_loss=0.3887, attn_decoder_loss=0.256, over 5795625.69 frames. ], batch size: 67, lr: 6.59e-03, grad_scale: 16.0 +2024-09-17 20:47:12,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.17 vs. limit=15.0 +2024-09-17 20:47:33,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.46 vs. limit=10.0 +2024-09-17 20:47:34,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=297640.0, ans=0.0 +2024-09-17 20:47:50,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=297680.0, ans=0.125 +2024-09-17 20:48:27,879 INFO [train.py:1198] (0/2) Epoch 17, batch 2050, loss[loss=0.2231, ctc_loss=0.1224, cr_loss=0.347, attn_decoder_loss=0.2266, over 29408.00 frames. ], tot_loss[loss=0.2519, ctc_loss=0.1448, cr_loss=0.3879, attn_decoder_loss=0.2552, over 5788726.52 frames. ], batch size: 70, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:48:34,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=297800.0, ans=0.025 +2024-09-17 20:48:40,029 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.707e+01 9.110e+01 9.757e+01 1.726e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-17 20:48:46,391 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:49:08,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=297880.0, ans=0.125 +2024-09-17 20:49:14,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=297920.0, ans=0.0 +2024-09-17 20:49:37,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=297960.0, ans=0.5 +2024-09-17 20:49:43,052 INFO [train.py:1198] (0/2) Epoch 17, batch 2100, loss[loss=0.261, ctc_loss=0.1504, cr_loss=0.3985, attn_decoder_loss=0.2644, over 29784.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1443, cr_loss=0.3874, attn_decoder_loss=0.2545, over 5800465.65 frames. ], batch size: 81, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:49:47,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=298000.0, ans=0.1 +2024-09-17 20:49:50,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=298000.0, ans=0.125 +2024-09-17 20:50:04,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.81 vs. limit=10.0 +2024-09-17 20:50:08,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=298040.0, ans=0.2 +2024-09-17 20:50:10,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=298040.0, ans=0.125 +2024-09-17 20:50:34,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.18 vs. limit=22.5 +2024-09-17 20:50:34,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.21 vs. limit=15.0 +2024-09-17 20:50:58,161 INFO [train.py:1198] (0/2) Epoch 17, batch 2150, loss[loss=0.2437, ctc_loss=0.1341, cr_loss=0.3651, attn_decoder_loss=0.2478, over 29462.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1438, cr_loss=0.3869, attn_decoder_loss=0.2538, over 5814872.94 frames. ], batch size: 78, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:51:01,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=298200.0, ans=0.125 +2024-09-17 20:51:10,385 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.066e+01 8.718e+01 9.185e+01 9.940e+01 1.615e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 20:51:34,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=298280.0, ans=0.125 +2024-09-17 20:51:40,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=298280.0, ans=0.125 +2024-09-17 20:51:54,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=298320.0, ans=0.2 +2024-09-17 20:51:55,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=298320.0, ans=0.125 +2024-09-17 20:52:10,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=298360.0, ans=0.125 +2024-09-17 20:52:18,711 INFO [train.py:1198] (0/2) Epoch 17, batch 2200, loss[loss=0.2586, ctc_loss=0.1481, cr_loss=0.3894, attn_decoder_loss=0.2623, over 29626.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1443, cr_loss=0.3871, attn_decoder_loss=0.2541, over 5811417.99 frames. ], batch size: 86, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:52:28,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=298400.0, ans=0.125 +2024-09-17 20:52:40,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.78 vs. limit=22.5 +2024-09-17 20:52:45,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=298440.0, ans=0.2 +2024-09-17 20:53:01,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=298480.0, ans=0.1 +2024-09-17 20:53:33,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.63 vs. limit=6.0 +2024-09-17 20:53:34,402 INFO [train.py:1198] (0/2) Epoch 17, batch 2250, loss[loss=0.2501, ctc_loss=0.1434, cr_loss=0.3989, attn_decoder_loss=0.2531, over 29721.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1439, cr_loss=0.386, attn_decoder_loss=0.2538, over 5810205.05 frames. ], batch size: 82, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:53:36,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=298600.0, ans=0.0 +2024-09-17 20:53:37,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=298600.0, ans=0.125 +2024-09-17 20:53:46,691 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.540e+01 9.223e+01 9.820e+01 2.780e+02, threshold=1.845e+02, percent-clipped=3.0 +2024-09-17 20:53:56,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.81 vs. limit=22.5 +2024-09-17 20:54:14,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.43 vs. limit=15.0 +2024-09-17 20:54:44,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=298760.0, ans=0.125 +2024-09-17 20:54:50,230 INFO [train.py:1198] (0/2) Epoch 17, batch 2300, loss[loss=0.2365, ctc_loss=0.1391, cr_loss=0.3811, attn_decoder_loss=0.2389, over 29295.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1437, cr_loss=0.3859, attn_decoder_loss=0.2531, over 5797890.23 frames. ], batch size: 71, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:54:55,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=298800.0, ans=0.125 +2024-09-17 20:55:03,110 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.04 vs. limit=15.0 +2024-09-17 20:55:04,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=298840.0, ans=0.125 +2024-09-17 20:55:19,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=298880.0, ans=0.1 +2024-09-17 20:55:21,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.10 vs. limit=15.0 +2024-09-17 20:55:23,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=298880.0, ans=0.125 +2024-09-17 20:55:26,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=298880.0, ans=0.1 +2024-09-17 20:55:54,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=298960.0, ans=0.125 +2024-09-17 20:55:58,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=298960.0, ans=0.125 +2024-09-17 20:56:03,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=298960.0, ans=0.125 +2024-09-17 20:56:07,995 INFO [train.py:1198] (0/2) Epoch 17, batch 2350, loss[loss=0.262, ctc_loss=0.1494, cr_loss=0.4111, attn_decoder_loss=0.2654, over 29697.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1434, cr_loss=0.3856, attn_decoder_loss=0.2532, over 5803368.27 frames. ], batch size: 83, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:56:15,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=299000.0, ans=0.0 +2024-09-17 20:56:21,979 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.665e+01 8.873e+01 9.644e+01 1.055e+02 1.144e+03, threshold=1.929e+02, percent-clipped=2.0 +2024-09-17 20:56:49,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=299080.0, ans=0.125 +2024-09-17 20:56:49,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=299080.0, ans=0.1 +2024-09-17 20:57:04,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=299120.0, ans=0.1 +2024-09-17 20:57:04,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=299120.0, ans=0.2 +2024-09-17 20:57:26,179 INFO [train.py:1198] (0/2) Epoch 17, batch 2400, loss[loss=0.2393, ctc_loss=0.1356, cr_loss=0.3689, attn_decoder_loss=0.2427, over 29532.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1443, cr_loss=0.387, attn_decoder_loss=0.254, over 5807626.88 frames. ], batch size: 76, lr: 6.57e-03, grad_scale: 16.0 +2024-09-17 20:57:50,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=299240.0, ans=0.2 +2024-09-17 20:57:52,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=299240.0, ans=0.0 +2024-09-17 20:58:02,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.89 vs. limit=15.0 +2024-09-17 20:58:13,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=299320.0, ans=0.125 +2024-09-17 20:58:13,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.69 vs. limit=22.5 +2024-09-17 20:58:14,858 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:58:16,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.26 vs. limit=15.0 +2024-09-17 20:58:25,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=299360.0, ans=0.0 +2024-09-17 20:58:34,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.45 vs. limit=15.0 +2024-09-17 20:58:41,909 INFO [train.py:1198] (0/2) Epoch 17, batch 2450, loss[loss=0.2594, ctc_loss=0.1495, cr_loss=0.4, attn_decoder_loss=0.2627, over 29730.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1453, cr_loss=0.3884, attn_decoder_loss=0.2552, over 5784646.38 frames. ], batch size: 82, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:58:45,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=299400.0, ans=0.125 +2024-09-17 20:58:55,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 9.066e+01 9.720e+01 1.171e+02 1.991e+02, threshold=1.944e+02, percent-clipped=1.0 +2024-09-17 20:59:13,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-17 20:59:21,994 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.29 vs. limit=15.0 +2024-09-17 20:59:33,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=299520.0, ans=0.0 +2024-09-17 20:59:37,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.03 vs. limit=22.5 +2024-09-17 20:59:41,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=299560.0, ans=0.1 +2024-09-17 20:59:59,629 INFO [train.py:1198] (0/2) Epoch 17, batch 2500, loss[loss=0.2567, ctc_loss=0.1448, cr_loss=0.3626, attn_decoder_loss=0.261, over 29630.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1454, cr_loss=0.3887, attn_decoder_loss=0.2552, over 5794424.14 frames. ], batch size: 86, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 21:00:01,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=299600.0, ans=0.125 +2024-09-17 21:00:11,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=299600.0, ans=0.2 +2024-09-17 21:00:32,725 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:00:34,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=299680.0, ans=0.1 +2024-09-17 21:00:35,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=299680.0, ans=0.125 +2024-09-17 21:00:58,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=299720.0, ans=0.025 +2024-09-17 21:01:00,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.33 vs. limit=15.0 +2024-09-17 21:01:18,018 INFO [train.py:1198] (0/2) Epoch 17, batch 2550, loss[loss=0.2227, ctc_loss=0.1243, cr_loss=0.3479, attn_decoder_loss=0.2259, over 29312.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1447, cr_loss=0.3875, attn_decoder_loss=0.2547, over 5797491.95 frames. ], batch size: 67, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:01:31,611 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.868e+01 8.659e+01 9.126e+01 9.764e+01 1.342e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-17 21:01:52,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=299880.0, ans=0.125 +2024-09-17 21:02:08,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=299920.0, ans=0.125 +2024-09-17 21:02:22,356 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.51 vs. limit=15.0 +2024-09-17 21:02:26,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=299960.0, ans=0.0 +2024-09-17 21:02:27,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=299960.0, ans=0.125 +2024-09-17 21:02:30,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=299960.0, ans=0.0 +2024-09-17 21:02:30,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=299960.0, ans=0.125 +2024-09-17 21:02:34,197 INFO [train.py:1198] (0/2) Epoch 17, batch 2600, loss[loss=0.2493, ctc_loss=0.1509, cr_loss=0.4101, attn_decoder_loss=0.2511, over 29435.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1454, cr_loss=0.3887, attn_decoder_loss=0.2552, over 5794620.53 frames. ], batch size: 78, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:02:36,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=300000.0, ans=0.0 +2024-09-17 21:02:42,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=300000.0, ans=0.1 +2024-09-17 21:03:10,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=300080.0, ans=0.2 +2024-09-17 21:03:33,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.81 vs. limit=15.0 +2024-09-17 21:03:38,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=300160.0, ans=0.125 +2024-09-17 21:03:51,168 INFO [train.py:1198] (0/2) Epoch 17, batch 2650, loss[loss=0.2624, ctc_loss=0.1546, cr_loss=0.4168, attn_decoder_loss=0.2651, over 29347.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1454, cr_loss=0.3893, attn_decoder_loss=0.2556, over 5800801.04 frames. ], batch size: 100, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:04:06,989 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.955e+01 9.384e+01 9.945e+01 2.228e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 21:04:18,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=300240.0, ans=0.0 +2024-09-17 21:04:22,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=300280.0, ans=0.2 +2024-09-17 21:05:01,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=300360.0, ans=0.125 +2024-09-17 21:05:06,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=300360.0, ans=0.125 +2024-09-17 21:05:09,160 INFO [train.py:1198] (0/2) Epoch 17, batch 2700, loss[loss=0.2625, ctc_loss=0.1415, cr_loss=0.3916, attn_decoder_loss=0.2672, over 29495.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1453, cr_loss=0.3893, attn_decoder_loss=0.2556, over 5795946.09 frames. ], batch size: 87, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:05:16,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.86 vs. limit=15.0 +2024-09-17 21:06:03,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=300520.0, ans=0.1 +2024-09-17 21:06:24,705 INFO [train.py:1198] (0/2) Epoch 17, batch 2750, loss[loss=0.2377, ctc_loss=0.1408, cr_loss=0.3957, attn_decoder_loss=0.2397, over 29506.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.144, cr_loss=0.3865, attn_decoder_loss=0.2541, over 5794646.24 frames. ], batch size: 75, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:06:38,342 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.681e+01 9.439e+01 1.052e+02 4.745e+02, threshold=1.888e+02, percent-clipped=3.0 +2024-09-17 21:07:07,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.38 vs. limit=6.0 +2024-09-17 21:07:13,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=300720.0, ans=0.2 +2024-09-17 21:07:29,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=300760.0, ans=0.125 +2024-09-17 21:07:43,693 INFO [train.py:1198] (0/2) Epoch 17, batch 2800, loss[loss=0.2732, ctc_loss=0.1806, cr_loss=0.3908, attn_decoder_loss=0.2748, over 20466.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1443, cr_loss=0.3867, attn_decoder_loss=0.2542, over 5775472.51 frames. ], batch size: 209, lr: 6.55e-03, grad_scale: 16.0 +2024-09-17 21:07:56,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.90 vs. limit=15.0 +2024-09-17 21:08:04,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=300840.0, ans=0.125 +2024-09-17 21:08:07,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=300840.0, ans=0.0 +2024-09-17 21:08:07,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.23 vs. limit=22.5 +2024-09-17 21:08:17,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=300880.0, ans=0.1 +2024-09-17 21:08:23,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=300880.0, ans=0.125 +2024-09-17 21:08:28,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=300880.0, ans=0.07 +2024-09-17 21:08:34,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=300920.0, ans=0.125 +2024-09-17 21:08:46,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=300960.0, ans=0.125 +2024-09-17 21:08:54,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=300960.0, ans=0.125 +2024-09-17 21:09:00,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=301000.0, ans=0.0 +2024-09-17 21:09:01,392 INFO [train.py:1198] (0/2) Epoch 17, batch 2850, loss[loss=0.242, ctc_loss=0.1305, cr_loss=0.3617, attn_decoder_loss=0.2464, over 29495.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1447, cr_loss=0.3869, attn_decoder_loss=0.2548, over 5761255.37 frames. ], batch size: 77, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:09:01,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=301000.0, ans=0.0 +2024-09-17 21:09:12,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=301000.0, ans=0.0 +2024-09-17 21:09:16,420 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.088e+01 8.947e+01 9.466e+01 1.049e+02 1.883e+02, threshold=1.893e+02, percent-clipped=0.0 +2024-09-17 21:09:51,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=301120.0, ans=0.125 +2024-09-17 21:09:52,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=301120.0, ans=0.125 +2024-09-17 21:10:17,125 INFO [train.py:1198] (0/2) Epoch 17, batch 2900, loss[loss=0.2481, ctc_loss=0.1422, cr_loss=0.3717, attn_decoder_loss=0.2517, over 29399.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1451, cr_loss=0.3886, attn_decoder_loss=0.2557, over 5786254.18 frames. ], batch size: 79, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:10:24,057 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.90 vs. limit=22.5 +2024-09-17 21:10:26,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=301200.0, ans=0.125 +2024-09-17 21:10:32,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=301240.0, ans=0.1 +2024-09-17 21:10:41,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=301240.0, ans=0.2 +2024-09-17 21:10:47,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=301280.0, ans=0.1 +2024-09-17 21:10:52,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=301280.0, ans=0.1 +2024-09-17 21:10:53,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.44 vs. limit=22.5 +2024-09-17 21:11:04,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=301320.0, ans=0.125 +2024-09-17 21:11:13,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=301320.0, ans=0.2 +2024-09-17 21:11:17,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.05 vs. limit=15.0 +2024-09-17 21:11:35,005 INFO [train.py:1198] (0/2) Epoch 17, batch 2950, loss[loss=0.2371, ctc_loss=0.1328, cr_loss=0.36, attn_decoder_loss=0.2406, over 29502.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1443, cr_loss=0.387, attn_decoder_loss=0.2546, over 5781933.98 frames. ], batch size: 75, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:11:39,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=301400.0, ans=0.0 +2024-09-17 21:11:52,335 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.744e+01 8.656e+01 9.103e+01 9.738e+01 1.377e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-17 21:12:30,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=301520.0, ans=0.0 +2024-09-17 21:12:33,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=301520.0, ans=0.125 +2024-09-17 21:12:36,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.40 vs. limit=6.0 +2024-09-17 21:12:51,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=301600.0, ans=0.025 +2024-09-17 21:12:52,858 INFO [train.py:1198] (0/2) Epoch 17, batch 3000, loss[loss=0.2565, ctc_loss=0.1498, cr_loss=0.3735, attn_decoder_loss=0.2601, over 29726.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1438, cr_loss=0.3854, attn_decoder_loss=0.2542, over 5782913.88 frames. ], batch size: 81, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:12:52,858 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 21:13:11,357 INFO [train.py:1230] (0/2) Epoch 17, validation: loss=0.2115, ctc_loss=0.04066, cr_loss=4.995e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-17 21:13:11,357 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 21:13:16,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=301600.0, ans=0.2 +2024-09-17 21:13:25,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.15 vs. limit=15.0 +2024-09-17 21:13:31,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=301640.0, ans=0.0 +2024-09-17 21:13:31,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.73 vs. limit=10.0 +2024-09-17 21:13:32,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=301640.0, ans=0.125 +2024-09-17 21:13:57,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=301720.0, ans=0.07 +2024-09-17 21:14:10,002 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.70 vs. limit=22.5 +2024-09-17 21:14:10,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=301760.0, ans=0.2 +2024-09-17 21:14:18,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=301760.0, ans=0.025 +2024-09-17 21:14:20,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.37 vs. limit=5.0 +2024-09-17 21:14:27,363 INFO [train.py:1198] (0/2) Epoch 17, batch 3050, loss[loss=0.2461, ctc_loss=0.1366, cr_loss=0.377, attn_decoder_loss=0.2498, over 29552.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1446, cr_loss=0.387, attn_decoder_loss=0.2549, over 5776293.57 frames. ], batch size: 76, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:14:28,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-17 21:14:41,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=301840.0, ans=0.1 +2024-09-17 21:14:42,352 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 9.363e+01 1.016e+02 1.140e+02 2.796e+02, threshold=2.033e+02, percent-clipped=4.0 +2024-09-17 21:14:47,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=301840.0, ans=0.0 +2024-09-17 21:14:50,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.10 vs. limit=22.5 +2024-09-17 21:15:02,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=301880.0, ans=0.1 +2024-09-17 21:15:11,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=301880.0, ans=0.1 +2024-09-17 21:15:15,698 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.06 vs. limit=22.5 +2024-09-17 21:15:20,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=301920.0, ans=0.125 +2024-09-17 21:15:46,821 INFO [train.py:1198] (0/2) Epoch 17, batch 3100, loss[loss=0.2618, ctc_loss=0.1541, cr_loss=0.4122, attn_decoder_loss=0.2646, over 29286.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1449, cr_loss=0.3874, attn_decoder_loss=0.2549, over 5776997.24 frames. ], batch size: 100, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:15:50,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=302000.0, ans=0.0 +2024-09-17 21:16:02,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=302040.0, ans=0.0 +2024-09-17 21:16:53,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=302160.0, ans=0.07 +2024-09-17 21:16:54,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.01 vs. limit=12.0 +2024-09-17 21:16:58,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=302160.0, ans=0.125 +2024-09-17 21:17:02,401 INFO [train.py:1198] (0/2) Epoch 17, batch 3150, loss[loss=0.2729, ctc_loss=0.1601, cr_loss=0.4242, attn_decoder_loss=0.276, over 28854.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1445, cr_loss=0.387, attn_decoder_loss=0.2546, over 5782652.84 frames. ], batch size: 104, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:17:17,548 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.912e+01 9.257e+01 9.921e+01 1.761e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-17 21:17:23,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=302240.0, ans=0.125 +2024-09-17 21:17:37,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.12 vs. limit=12.0 +2024-09-17 21:18:09,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=302360.0, ans=0.125 +2024-09-17 21:18:18,472 INFO [train.py:1198] (0/2) Epoch 17, batch 3200, loss[loss=0.2508, ctc_loss=0.1402, cr_loss=0.3736, attn_decoder_loss=0.2548, over 29401.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1439, cr_loss=0.3857, attn_decoder_loss=0.254, over 5792875.79 frames. ], batch size: 79, lr: 6.54e-03, grad_scale: 16.0 +2024-09-17 21:18:20,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.03 vs. limit=22.5 +2024-09-17 21:18:23,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=302400.0, ans=0.125 +2024-09-17 21:18:32,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=302440.0, ans=0.125 +2024-09-17 21:18:49,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.34 vs. limit=15.0 +2024-09-17 21:18:50,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=302480.0, ans=0.2 +2024-09-17 21:18:51,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.25 vs. limit=22.5 +2024-09-17 21:19:01,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=302480.0, ans=0.0 +2024-09-17 21:19:27,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=302560.0, ans=0.125 +2024-09-17 21:19:38,257 INFO [train.py:1198] (0/2) Epoch 17, batch 3250, loss[loss=0.2546, ctc_loss=0.1408, cr_loss=0.374, attn_decoder_loss=0.259, over 29694.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.144, cr_loss=0.3864, attn_decoder_loss=0.2543, over 5798816.63 frames. ], batch size: 84, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:19:41,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=302600.0, ans=0.125 +2024-09-17 21:19:46,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=302600.0, ans=0.125 +2024-09-17 21:19:54,940 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.527e+01 9.036e+01 9.665e+01 1.223e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-17 21:20:07,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=302680.0, ans=0.2 +2024-09-17 21:20:49,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=302760.0, ans=0.2 +2024-09-17 21:20:53,932 INFO [train.py:1198] (0/2) Epoch 17, batch 3300, loss[loss=0.2555, ctc_loss=0.1587, cr_loss=0.3821, attn_decoder_loss=0.2578, over 28177.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.143, cr_loss=0.3844, attn_decoder_loss=0.2532, over 5796316.58 frames. ], batch size: 111, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:21:15,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=302840.0, ans=0.0 +2024-09-17 21:21:26,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=302880.0, ans=0.1 +2024-09-17 21:21:47,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=302920.0, ans=0.2 +2024-09-17 21:22:01,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=302960.0, ans=0.05 +2024-09-17 21:22:09,783 INFO [train.py:1198] (0/2) Epoch 17, batch 3350, loss[loss=0.2662, ctc_loss=0.1529, cr_loss=0.4045, attn_decoder_loss=0.2698, over 28765.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1438, cr_loss=0.3856, attn_decoder_loss=0.2541, over 5771790.42 frames. ], batch size: 104, lr: 6.53e-03, grad_scale: 4.0 +2024-09-17 21:22:20,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=303000.0, ans=0.0 +2024-09-17 21:22:28,069 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.973e+01 8.919e+01 9.576e+01 1.043e+02 2.558e+02, threshold=1.915e+02, percent-clipped=2.0 +2024-09-17 21:22:31,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=303040.0, ans=0.125 +2024-09-17 21:22:38,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=303080.0, ans=0.2 +2024-09-17 21:22:50,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=303080.0, ans=0.0 +2024-09-17 21:23:03,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=303120.0, ans=0.125 +2024-09-17 21:23:23,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=303160.0, ans=0.125 +2024-09-17 21:23:29,943 INFO [train.py:1198] (0/2) Epoch 17, batch 3400, loss[loss=0.22, ctc_loss=0.1214, cr_loss=0.3465, attn_decoder_loss=0.2233, over 29356.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1441, cr_loss=0.3859, attn_decoder_loss=0.2541, over 5764131.15 frames. ], batch size: 67, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:23:44,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.38 vs. limit=15.0 +2024-09-17 21:23:48,014 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.82 vs. limit=10.0 +2024-09-17 21:23:53,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=303240.0, ans=0.125 +2024-09-17 21:24:11,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=303280.0, ans=0.125 +2024-09-17 21:24:19,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.37 vs. limit=12.0 +2024-09-17 21:24:35,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=303360.0, ans=0.0 +2024-09-17 21:24:40,227 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:24:45,977 INFO [train.py:1198] (0/2) Epoch 17, batch 3450, loss[loss=0.26, ctc_loss=0.1464, cr_loss=0.3916, attn_decoder_loss=0.2639, over 28561.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1437, cr_loss=0.3853, attn_decoder_loss=0.2542, over 5773406.34 frames. ], batch size: 112, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:25:04,362 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 9.059e+01 9.380e+01 1.001e+02 2.094e+02, threshold=1.876e+02, percent-clipped=1.0 +2024-09-17 21:25:22,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=303480.0, ans=0.125 +2024-09-17 21:25:33,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=303520.0, ans=0.125 +2024-09-17 21:25:36,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=303520.0, ans=0.125 +2024-09-17 21:25:41,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=303520.0, ans=0.0 +2024-09-17 21:25:47,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=303560.0, ans=0.0 +2024-09-17 21:26:00,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=303600.0, ans=0.2 +2024-09-17 21:26:01,989 INFO [train.py:1198] (0/2) Epoch 17, batch 3500, loss[loss=0.216, ctc_loss=0.1078, cr_loss=0.3113, attn_decoder_loss=0.2211, over 29320.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1432, cr_loss=0.3845, attn_decoder_loss=0.2535, over 5775091.45 frames. ], batch size: 71, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:26:12,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=303600.0, ans=0.0 +2024-09-17 21:26:35,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=12.0 +2024-09-17 21:26:41,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.37 vs. limit=15.0 +2024-09-17 21:27:08,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=303760.0, ans=0.95 +2024-09-17 21:27:18,813 INFO [train.py:1198] (0/2) Epoch 17, batch 3550, loss[loss=0.2545, ctc_loss=0.1388, cr_loss=0.3781, attn_decoder_loss=0.259, over 29692.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1433, cr_loss=0.3852, attn_decoder_loss=0.2535, over 5782345.25 frames. ], batch size: 89, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:27:22,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=303800.0, ans=0.2 +2024-09-17 21:27:36,516 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.165e+01 8.716e+01 9.254e+01 9.841e+01 2.209e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-17 21:27:36,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=303840.0, ans=0.0 +2024-09-17 21:28:11,146 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.42 vs. limit=22.5 +2024-09-17 21:28:17,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=303920.0, ans=0.025 +2024-09-17 21:28:23,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.29 vs. limit=15.0 +2024-09-17 21:28:25,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=303960.0, ans=0.09899494936611666 +2024-09-17 21:28:25,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=303960.0, ans=0.125 +2024-09-17 21:28:34,212 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-76000.pt +2024-09-17 21:28:43,008 INFO [train.py:1198] (0/2) Epoch 17, batch 3600, loss[loss=0.2422, ctc_loss=0.1365, cr_loss=0.367, attn_decoder_loss=0.2458, over 29488.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1432, cr_loss=0.3854, attn_decoder_loss=0.2536, over 5791096.16 frames. ], batch size: 77, lr: 6.52e-03, grad_scale: 16.0 +2024-09-17 21:29:25,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.99 vs. limit=6.0 +2024-09-17 21:29:31,255 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:29:37,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=304120.0, ans=0.125 +2024-09-17 21:29:37,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=304120.0, ans=0.125 +2024-09-17 21:29:38,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=304120.0, ans=0.0 +2024-09-17 21:29:41,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=304160.0, ans=0.125 +2024-09-17 21:29:57,761 INFO [train.py:1198] (0/2) Epoch 17, batch 3650, loss[loss=0.2696, ctc_loss=0.1558, cr_loss=0.4176, attn_decoder_loss=0.273, over 29501.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.143, cr_loss=0.3853, attn_decoder_loss=0.2529, over 5793711.44 frames. ], batch size: 90, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:29:58,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.70 vs. limit=22.5 +2024-09-17 21:30:14,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=304240.0, ans=0.1 +2024-09-17 21:30:17,007 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.843e+01 9.212e+01 9.798e+01 3.342e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-17 21:30:26,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=304280.0, ans=0.125 +2024-09-17 21:30:39,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=304280.0, ans=0.0 +2024-09-17 21:30:40,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.79 vs. limit=15.0 +2024-09-17 21:30:44,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=304320.0, ans=0.0 +2024-09-17 21:31:08,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=304360.0, ans=0.2 +2024-09-17 21:31:12,237 INFO [train.py:1198] (0/2) Epoch 17, batch 3700, loss[loss=0.2466, ctc_loss=0.1347, cr_loss=0.381, attn_decoder_loss=0.2506, over 29704.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1429, cr_loss=0.3853, attn_decoder_loss=0.2531, over 5803591.63 frames. ], batch size: 84, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:31:18,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=304400.0, ans=0.125 +2024-09-17 21:31:18,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=304400.0, ans=0.1 +2024-09-17 21:31:42,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=304480.0, ans=0.025 +2024-09-17 21:31:45,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=304480.0, ans=0.1 +2024-09-17 21:31:46,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=304480.0, ans=0.125 +2024-09-17 21:31:50,233 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.67 vs. limit=10.0 +2024-09-17 21:31:59,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=304520.0, ans=10.0 +2024-09-17 21:32:07,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=304520.0, ans=0.125 +2024-09-17 21:32:14,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=304560.0, ans=0.1 +2024-09-17 21:32:14,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=304560.0, ans=0.125 +2024-09-17 21:32:16,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.79 vs. limit=15.0 +2024-09-17 21:32:23,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=304560.0, ans=0.09899494936611666 +2024-09-17 21:32:26,341 INFO [train.py:1198] (0/2) Epoch 17, batch 3750, loss[loss=0.2215, ctc_loss=0.1221, cr_loss=0.3657, attn_decoder_loss=0.2244, over 29308.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1428, cr_loss=0.3855, attn_decoder_loss=0.2531, over 5807789.49 frames. ], batch size: 67, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:32:45,808 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.729e+01 9.186e+01 9.795e+01 2.542e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-17 21:33:43,100 INFO [train.py:1198] (0/2) Epoch 17, batch 3800, loss[loss=0.2588, ctc_loss=0.1483, cr_loss=0.4047, attn_decoder_loss=0.2621, over 29639.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1421, cr_loss=0.3843, attn_decoder_loss=0.2525, over 5798135.34 frames. ], batch size: 86, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:33:46,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-17 21:33:54,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=304800.0, ans=15.0 +2024-09-17 21:34:13,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=304880.0, ans=0.0 +2024-09-17 21:34:23,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=304880.0, ans=0.125 +2024-09-17 21:34:33,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.94 vs. limit=10.0 +2024-09-17 21:34:42,051 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.84 vs. limit=15.0 +2024-09-17 21:34:44,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=304960.0, ans=0.1 +2024-09-17 21:34:56,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=304960.0, ans=0.125 +2024-09-17 21:34:59,147 INFO [train.py:1198] (0/2) Epoch 17, batch 3850, loss[loss=0.2677, ctc_loss=0.1564, cr_loss=0.4035, attn_decoder_loss=0.2711, over 29263.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1423, cr_loss=0.3848, attn_decoder_loss=0.2527, over 5812220.78 frames. ], batch size: 100, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:35:00,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=305000.0, ans=0.2 +2024-09-17 21:35:15,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=305040.0, ans=0.2 +2024-09-17 21:35:18,453 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.722e+01 9.215e+01 9.828e+01 1.401e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-17 21:35:20,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=305040.0, ans=0.125 +2024-09-17 21:35:21,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=305040.0, ans=0.125 +2024-09-17 21:35:23,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.14 vs. limit=15.0 +2024-09-17 21:35:30,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=305080.0, ans=0.0 +2024-09-17 21:35:43,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.88 vs. limit=15.0 +2024-09-17 21:35:47,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=305120.0, ans=0.125 +2024-09-17 21:35:53,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=305120.0, ans=0.2 +2024-09-17 21:35:58,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=305160.0, ans=0.0 +2024-09-17 21:36:13,796 INFO [train.py:1198] (0/2) Epoch 17, batch 3900, loss[loss=0.2622, ctc_loss=0.1457, cr_loss=0.4016, attn_decoder_loss=0.2663, over 29642.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1428, cr_loss=0.3854, attn_decoder_loss=0.2534, over 5816899.45 frames. ], batch size: 86, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:36:15,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=305200.0, ans=0.025 +2024-09-17 21:36:20,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-17 21:36:22,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=305200.0, ans=0.0 +2024-09-17 21:36:39,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=305240.0, ans=0.125 +2024-09-17 21:36:41,257 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.97 vs. limit=10.0 +2024-09-17 21:37:01,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.58 vs. limit=10.0 +2024-09-17 21:37:05,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.15 vs. limit=22.5 +2024-09-17 21:37:16,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=305360.0, ans=0.0 +2024-09-17 21:37:26,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=305400.0, ans=0.125 +2024-09-17 21:37:28,046 INFO [train.py:1198] (0/2) Epoch 17, batch 3950, loss[loss=0.2632, ctc_loss=0.156, cr_loss=0.4379, attn_decoder_loss=0.2654, over 29466.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1423, cr_loss=0.3848, attn_decoder_loss=0.2531, over 5836385.71 frames. ], batch size: 97, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:37:34,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=305400.0, ans=0.125 +2024-09-17 21:37:47,422 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.275e+01 8.762e+01 9.164e+01 9.964e+01 1.868e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-17 21:37:59,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=305480.0, ans=0.125 +2024-09-17 21:38:15,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=305520.0, ans=0.125 +2024-09-17 21:38:36,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=305560.0, ans=0.0 +2024-09-17 21:38:39,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=305560.0, ans=0.0 +2024-09-17 21:38:39,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=305560.0, ans=0.125 +2024-09-17 21:38:44,017 INFO [train.py:1198] (0/2) Epoch 17, batch 4000, loss[loss=0.2189, ctc_loss=0.1149, cr_loss=0.3269, attn_decoder_loss=0.2232, over 29497.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1425, cr_loss=0.384, attn_decoder_loss=0.2531, over 5814099.91 frames. ], batch size: 74, lr: 6.50e-03, grad_scale: 16.0 +2024-09-17 21:38:58,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=305640.0, ans=0.0 +2024-09-17 21:39:02,675 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.90 vs. limit=15.0 +2024-09-17 21:39:03,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=305640.0, ans=0.025 +2024-09-17 21:39:29,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=305720.0, ans=10.0 +2024-09-17 21:39:57,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.04 vs. limit=10.0 +2024-09-17 21:39:59,430 INFO [train.py:1198] (0/2) Epoch 17, batch 4050, loss[loss=0.2829, ctc_loss=0.189, cr_loss=0.4341, attn_decoder_loss=0.2837, over 21019.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1424, cr_loss=0.384, attn_decoder_loss=0.2529, over 5798794.33 frames. ], batch size: 210, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:40:02,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.59 vs. limit=5.0 +2024-09-17 21:40:08,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=305800.0, ans=0.0 +2024-09-17 21:40:12,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.24 vs. limit=15.0 +2024-09-17 21:40:19,858 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.516e+01 8.726e+01 9.314e+01 1.066e+02 2.595e+02, threshold=1.863e+02, percent-clipped=1.0 +2024-09-17 21:40:31,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=305880.0, ans=0.125 +2024-09-17 21:40:53,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=305920.0, ans=0.0 +2024-09-17 21:40:55,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=305920.0, ans=0.1 +2024-09-17 21:41:07,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=305960.0, ans=0.95 +2024-09-17 21:41:12,872 INFO [train.py:1198] (0/2) Epoch 17, batch 4100, loss[loss=0.2693, ctc_loss=0.1569, cr_loss=0.4108, attn_decoder_loss=0.2727, over 29503.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1429, cr_loss=0.3843, attn_decoder_loss=0.2533, over 5795021.44 frames. ], batch size: 90, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:41:38,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=306040.0, ans=0.0 +2024-09-17 21:41:58,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=306120.0, ans=0.125 +2024-09-17 21:42:16,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=306160.0, ans=0.125 +2024-09-17 21:42:16,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=306160.0, ans=0.0 +2024-09-17 21:42:22,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=306160.0, ans=0.0 +2024-09-17 21:42:26,512 INFO [train.py:1198] (0/2) Epoch 17, batch 4150, loss[loss=0.2438, ctc_loss=0.1366, cr_loss=0.3676, attn_decoder_loss=0.2476, over 29508.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1426, cr_loss=0.384, attn_decoder_loss=0.2531, over 5800570.82 frames. ], batch size: 77, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:42:34,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=306200.0, ans=0.125 +2024-09-17 21:42:48,272 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.896e+01 9.299e+01 9.873e+01 2.442e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-17 21:43:22,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.10 vs. limit=22.5 +2024-09-17 21:43:27,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=306360.0, ans=0.125 +2024-09-17 21:43:38,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=306360.0, ans=0.125 +2024-09-17 21:43:41,501 INFO [train.py:1198] (0/2) Epoch 17, batch 4200, loss[loss=0.2684, ctc_loss=0.1669, cr_loss=0.4313, attn_decoder_loss=0.2701, over 29520.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1429, cr_loss=0.3849, attn_decoder_loss=0.2534, over 5802492.57 frames. ], batch size: 90, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:43:43,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=306400.0, ans=0.125 +2024-09-17 21:43:59,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=306440.0, ans=0.1 +2024-09-17 21:44:00,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=306440.0, ans=0.05 +2024-09-17 21:44:05,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=306440.0, ans=0.0 +2024-09-17 21:44:05,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=306440.0, ans=0.1 +2024-09-17 21:44:08,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=306440.0, ans=0.125 +2024-09-17 21:44:56,159 INFO [train.py:1198] (0/2) Epoch 17, batch 4250, loss[loss=0.2343, ctc_loss=0.1221, cr_loss=0.3561, attn_decoder_loss=0.2389, over 29501.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1426, cr_loss=0.3848, attn_decoder_loss=0.2534, over 5808361.63 frames. ], batch size: 74, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:45:03,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=306600.0, ans=10.0 +2024-09-17 21:45:04,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.65 vs. limit=15.0 +2024-09-17 21:45:16,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.648e+01 8.736e+01 9.267e+01 9.996e+01 5.774e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-17 21:45:35,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=306680.0, ans=0.125 +2024-09-17 21:45:37,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=306680.0, ans=0.125 +2024-09-17 21:45:40,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=306720.0, ans=0.125 +2024-09-17 21:45:49,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=306720.0, ans=0.125 +2024-09-17 21:45:52,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=306720.0, ans=0.07 +2024-09-17 21:45:53,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=306760.0, ans=0.1 +2024-09-17 21:45:57,050 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.74 vs. limit=15.0 +2024-09-17 21:46:09,569 INFO [train.py:1198] (0/2) Epoch 17, batch 4300, loss[loss=0.259, ctc_loss=0.1503, cr_loss=0.3898, attn_decoder_loss=0.2624, over 29527.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1428, cr_loss=0.3847, attn_decoder_loss=0.2538, over 5798023.06 frames. ], batch size: 87, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:46:30,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=306840.0, ans=0.125 +2024-09-17 21:46:31,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=306840.0, ans=0.2 +2024-09-17 21:46:33,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=306840.0, ans=0.125 +2024-09-17 21:46:34,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=306840.0, ans=0.1 +2024-09-17 21:46:45,082 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:46:47,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.71 vs. limit=10.0 +2024-09-17 21:46:59,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=306920.0, ans=0.09899494936611666 +2024-09-17 21:47:25,582 INFO [train.py:1198] (0/2) Epoch 17, batch 4350, loss[loss=0.2744, ctc_loss=0.1654, cr_loss=0.4352, attn_decoder_loss=0.2768, over 29462.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1457, cr_loss=0.3904, attn_decoder_loss=0.2574, over 5799885.99 frames. ], batch size: 97, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:47:39,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=307040.0, ans=0.125 +2024-09-17 21:47:46,023 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.975e+01 9.056e+01 9.451e+01 1.005e+02 2.709e+02, threshold=1.890e+02, percent-clipped=3.0 +2024-09-17 21:47:53,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=307080.0, ans=0.125 +2024-09-17 21:47:56,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=307080.0, ans=0.0 +2024-09-17 21:48:19,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=307120.0, ans=0.1 +2024-09-17 21:48:33,796 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.35 vs. limit=15.0 +2024-09-17 21:48:39,028 INFO [train.py:1198] (0/2) Epoch 17, batch 4400, loss[loss=0.2664, ctc_loss=0.1561, cr_loss=0.4021, attn_decoder_loss=0.2697, over 27428.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1475, cr_loss=0.3927, attn_decoder_loss=0.2594, over 5769998.93 frames. ], batch size: 124, lr: 6.49e-03, grad_scale: 16.0 +2024-09-17 21:48:53,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=307240.0, ans=0.125 +2024-09-17 21:49:01,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=307240.0, ans=0.125 +2024-09-17 21:49:01,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=307240.0, ans=0.125 +2024-09-17 21:49:09,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=307280.0, ans=0.04949747468305833 +2024-09-17 21:49:36,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.01 vs. limit=15.0 +2024-09-17 21:49:43,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=307360.0, ans=0.125 +2024-09-17 21:49:46,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.11 vs. limit=6.0 +2024-09-17 21:49:48,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=307360.0, ans=0.07 +2024-09-17 21:49:54,142 INFO [train.py:1198] (0/2) Epoch 17, batch 4450, loss[loss=0.2783, ctc_loss=0.1924, cr_loss=0.432, attn_decoder_loss=0.2783, over 20154.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1525, cr_loss=0.3983, attn_decoder_loss=0.2621, over 5574068.38 frames. ], batch size: 209, lr: 6.48e-03, grad_scale: 8.0 +2024-09-17 21:50:02,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=307400.0, ans=0.0 +2024-09-17 21:50:16,944 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.924e+01 9.297e+01 9.769e+01 1.205e+02 1.699e+02, threshold=1.954e+02, percent-clipped=0.0 +2024-09-17 21:50:21,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=307440.0, ans=0.2 +2024-09-17 21:50:31,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=307480.0, ans=0.125 +2024-09-17 21:50:53,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=307560.0, ans=0.125 +2024-09-17 21:51:05,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=307560.0, ans=0.0 +2024-09-17 21:51:10,082 INFO [train.py:1198] (0/2) Epoch 17, batch 4500, loss[loss=0.2735, ctc_loss=0.1848, cr_loss=0.4315, attn_decoder_loss=0.2738, over 20248.00 frames. ], tot_loss[loss=0.262, ctc_loss=0.1578, cr_loss=0.4005, attn_decoder_loss=0.2647, over 5237255.77 frames. ], batch size: 210, lr: 6.48e-03, grad_scale: 8.0 +2024-09-17 21:51:26,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=307640.0, ans=0.0 +2024-09-17 21:51:38,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=10.85 vs. limit=12.0 +2024-09-17 21:51:44,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.52 vs. limit=6.0 +2024-09-17 21:51:47,560 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-17.pt +2024-09-17 21:52:34,424 INFO [train.py:1198] (0/2) Epoch 18, batch 0, loss[loss=0.2364, ctc_loss=0.1369, cr_loss=0.3919, attn_decoder_loss=0.2388, over 29594.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1369, cr_loss=0.3919, attn_decoder_loss=0.2388, over 29594.00 frames. ], batch size: 73, lr: 6.29e-03, grad_scale: 16.0 +2024-09-17 21:52:34,424 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 21:52:52,855 INFO [train.py:1230] (0/2) Epoch 18, validation: loss=0.2122, ctc_loss=0.03991, cr_loss=4.926e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-17 21:52:52,855 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 21:53:02,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=307700.0, ans=0.125 +2024-09-17 21:53:11,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=307740.0, ans=0.0 +2024-09-17 21:53:37,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=307780.0, ans=0.125 +2024-09-17 21:53:56,814 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.012e+01 9.686e+01 1.126e+02 1.212e+02 3.801e+02, threshold=2.253e+02, percent-clipped=2.0 +2024-09-17 21:54:00,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=307860.0, ans=0.2 +2024-09-17 21:54:10,449 INFO [train.py:1198] (0/2) Epoch 18, batch 50, loss[loss=0.2243, ctc_loss=0.1174, cr_loss=0.325, attn_decoder_loss=0.229, over 29422.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1459, cr_loss=0.3888, attn_decoder_loss=0.2546, over 1268301.28 frames. ], batch size: 70, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:54:12,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=307900.0, ans=0.125 +2024-09-17 21:54:15,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=307900.0, ans=0.1 +2024-09-17 21:54:25,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=307940.0, ans=0.0 +2024-09-17 21:54:57,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.73 vs. limit=15.0 +2024-09-17 21:55:22,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=308060.0, ans=0.0 +2024-09-17 21:55:26,560 INFO [train.py:1198] (0/2) Epoch 18, batch 100, loss[loss=0.2463, ctc_loss=0.1394, cr_loss=0.3702, attn_decoder_loss=0.2499, over 29545.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1468, cr_loss=0.3921, attn_decoder_loss=0.2568, over 2253368.39 frames. ], batch size: 76, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:55:50,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=308140.0, ans=0.0 +2024-09-17 21:55:52,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=308140.0, ans=0.04949747468305833 +2024-09-17 21:56:27,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=308260.0, ans=0.1 +2024-09-17 21:56:27,951 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.57 vs. limit=15.0 +2024-09-17 21:56:30,073 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.618e+01 9.118e+01 9.635e+01 1.582e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-17 21:56:43,549 INFO [train.py:1198] (0/2) Epoch 18, batch 150, loss[loss=0.2306, ctc_loss=0.1251, cr_loss=0.3355, attn_decoder_loss=0.2349, over 29423.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1442, cr_loss=0.3876, attn_decoder_loss=0.2546, over 3048627.94 frames. ], batch size: 70, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:56:48,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=308300.0, ans=0.07 +2024-09-17 21:56:53,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.01 vs. limit=22.5 +2024-09-17 21:57:22,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=308380.0, ans=0.0 +2024-09-17 21:57:40,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=308420.0, ans=0.1 +2024-09-17 21:57:43,591 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:57:49,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=308460.0, ans=0.0 +2024-09-17 21:57:51,533 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.84 vs. limit=15.0 +2024-09-17 21:57:58,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=308460.0, ans=0.0 +2024-09-17 21:58:01,101 INFO [train.py:1198] (0/2) Epoch 18, batch 200, loss[loss=0.261, ctc_loss=0.1552, cr_loss=0.3994, attn_decoder_loss=0.2639, over 27295.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1438, cr_loss=0.3864, attn_decoder_loss=0.254, over 3660314.48 frames. ], batch size: 125, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:58:15,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=308540.0, ans=0.1 +2024-09-17 21:58:17,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.66 vs. limit=15.0 +2024-09-17 21:58:30,097 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:58:48,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=308620.0, ans=0.125 +2024-09-17 21:58:50,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.91 vs. limit=12.0 +2024-09-17 21:58:52,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=308620.0, ans=0.125 +2024-09-17 21:58:53,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.94 vs. limit=15.0 +2024-09-17 21:59:03,039 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.405e+01 8.718e+01 9.535e+01 1.012e+02 1.370e+02, threshold=1.907e+02, percent-clipped=0.0 +2024-09-17 21:59:16,551 INFO [train.py:1198] (0/2) Epoch 18, batch 250, loss[loss=0.2799, ctc_loss=0.1811, cr_loss=0.4628, attn_decoder_loss=0.2806, over 29251.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1429, cr_loss=0.3858, attn_decoder_loss=0.2535, over 4142435.30 frames. ], batch size: 100, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 21:59:29,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=308700.0, ans=0.1 +2024-09-17 21:59:29,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=308700.0, ans=0.0 +2024-09-17 21:59:35,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=308740.0, ans=0.125 +2024-09-17 21:59:36,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=308740.0, ans=0.2 +2024-09-17 21:59:50,642 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.46 vs. limit=6.0 +2024-09-17 22:00:10,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.36 vs. limit=22.5 +2024-09-17 22:00:24,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.12 vs. limit=10.0 +2024-09-17 22:00:35,429 INFO [train.py:1198] (0/2) Epoch 18, batch 300, loss[loss=0.2671, ctc_loss=0.1503, cr_loss=0.3955, attn_decoder_loss=0.2713, over 29553.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.142, cr_loss=0.3844, attn_decoder_loss=0.253, over 4510235.92 frames. ], batch size: 92, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:00:40,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=308900.0, ans=0.0 +2024-09-17 22:00:59,842 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.46 vs. limit=15.0 +2024-09-17 22:01:02,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=308940.0, ans=0.0 +2024-09-17 22:01:14,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=308980.0, ans=0.0 +2024-09-17 22:01:17,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=308980.0, ans=0.0 +2024-09-17 22:01:39,753 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.552e+01 9.008e+01 9.448e+01 1.517e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-17 22:01:41,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=309060.0, ans=0.1 +2024-09-17 22:01:43,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309060.0, ans=0.1 +2024-09-17 22:01:53,396 INFO [train.py:1198] (0/2) Epoch 18, batch 350, loss[loss=0.2285, ctc_loss=0.1274, cr_loss=0.3454, attn_decoder_loss=0.232, over 29317.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1422, cr_loss=0.3846, attn_decoder_loss=0.2535, over 4794597.09 frames. ], batch size: 71, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:02:05,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=309100.0, ans=0.1 +2024-09-17 22:02:55,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=309260.0, ans=0.125 +2024-09-17 22:03:08,711 INFO [train.py:1198] (0/2) Epoch 18, batch 400, loss[loss=0.2428, ctc_loss=0.1333, cr_loss=0.3755, attn_decoder_loss=0.2466, over 29662.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1418, cr_loss=0.3844, attn_decoder_loss=0.2532, over 5024247.19 frames. ], batch size: 82, lr: 6.28e-03, grad_scale: 16.0 +2024-09-17 22:03:12,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=309300.0, ans=0.025 +2024-09-17 22:03:22,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309340.0, ans=0.1 +2024-09-17 22:03:31,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.35 vs. limit=15.0 +2024-09-17 22:03:32,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.93 vs. limit=10.0 +2024-09-17 22:03:51,095 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.43 vs. limit=15.0 +2024-09-17 22:03:58,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=309420.0, ans=0.025 +2024-09-17 22:04:15,311 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.825e+01 9.596e+01 1.056e+02 3.642e+02, threshold=1.919e+02, percent-clipped=2.0 +2024-09-17 22:04:15,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309460.0, ans=0.1 +2024-09-17 22:04:27,498 INFO [train.py:1198] (0/2) Epoch 18, batch 450, loss[loss=0.2574, ctc_loss=0.1423, cr_loss=0.3824, attn_decoder_loss=0.2617, over 29694.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1419, cr_loss=0.3838, attn_decoder_loss=0.2533, over 5187876.96 frames. ], batch size: 83, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:04:32,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=309500.0, ans=0.2 +2024-09-17 22:04:34,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-17 22:04:45,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.57 vs. limit=15.0 +2024-09-17 22:04:49,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=309540.0, ans=0.0 +2024-09-17 22:05:03,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=309580.0, ans=0.125 +2024-09-17 22:05:16,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=309620.0, ans=0.0 +2024-09-17 22:05:17,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309620.0, ans=0.1 +2024-09-17 22:05:19,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=309620.0, ans=0.125 +2024-09-17 22:05:46,478 INFO [train.py:1198] (0/2) Epoch 18, batch 500, loss[loss=0.2663, ctc_loss=0.1642, cr_loss=0.4206, attn_decoder_loss=0.2683, over 29440.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1415, cr_loss=0.3834, attn_decoder_loss=0.2526, over 5330261.76 frames. ], batch size: 94, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:05:50,320 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.61 vs. limit=6.0 +2024-09-17 22:05:52,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=309700.0, ans=0.125 +2024-09-17 22:06:05,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=309740.0, ans=0.05 +2024-09-17 22:06:12,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=309740.0, ans=0.1 +2024-09-17 22:06:27,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=309780.0, ans=0.125 +2024-09-17 22:06:36,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=309820.0, ans=0.2 +2024-09-17 22:06:45,051 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.20 vs. limit=15.0 +2024-09-17 22:06:50,089 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.497e+01 9.185e+01 1.006e+02 4.777e+02, threshold=1.837e+02, percent-clipped=3.0 +2024-09-17 22:06:51,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=309860.0, ans=0.2 +2024-09-17 22:07:02,206 INFO [train.py:1198] (0/2) Epoch 18, batch 550, loss[loss=0.2553, ctc_loss=0.1357, cr_loss=0.3699, attn_decoder_loss=0.2604, over 28778.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1417, cr_loss=0.3838, attn_decoder_loss=0.2527, over 5422721.09 frames. ], batch size: 104, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:07:04,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.71 vs. limit=15.0 +2024-09-17 22:07:16,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=309940.0, ans=0.05 +2024-09-17 22:07:59,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=310020.0, ans=0.05 +2024-09-17 22:08:20,549 INFO [train.py:1198] (0/2) Epoch 18, batch 600, loss[loss=0.2806, ctc_loss=0.1695, cr_loss=0.4491, attn_decoder_loss=0.2829, over 29292.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1423, cr_loss=0.3852, attn_decoder_loss=0.253, over 5509475.60 frames. ], batch size: 100, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:08:20,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=310100.0, ans=0.125 +2024-09-17 22:08:25,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=310100.0, ans=0.1 +2024-09-17 22:08:33,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.69 vs. limit=15.0 +2024-09-17 22:08:37,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=310140.0, ans=0.0 +2024-09-17 22:08:48,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=310140.0, ans=0.2 +2024-09-17 22:08:51,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=310180.0, ans=0.1 +2024-09-17 22:09:12,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=310220.0, ans=0.07 +2024-09-17 22:09:21,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=310260.0, ans=0.125 +2024-09-17 22:09:25,966 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.219e+01 8.500e+01 9.114e+01 9.640e+01 1.427e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 22:09:26,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=310260.0, ans=0.2 +2024-09-17 22:09:32,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=310260.0, ans=0.0 +2024-09-17 22:09:38,126 INFO [train.py:1198] (0/2) Epoch 18, batch 650, loss[loss=0.2423, ctc_loss=0.1367, cr_loss=0.3953, attn_decoder_loss=0.2452, over 29779.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.141, cr_loss=0.3832, attn_decoder_loss=0.2519, over 5585863.90 frames. ], batch size: 81, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:09:42,129 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.79 vs. limit=15.0 +2024-09-17 22:09:43,560 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-17 22:10:27,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=310420.0, ans=0.0 +2024-09-17 22:10:45,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=310460.0, ans=0.125 +2024-09-17 22:10:48,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=310460.0, ans=0.0 +2024-09-17 22:10:49,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=310460.0, ans=0.2 +2024-09-17 22:10:54,174 INFO [train.py:1198] (0/2) Epoch 18, batch 700, loss[loss=0.2405, ctc_loss=0.1357, cr_loss=0.3839, attn_decoder_loss=0.2436, over 29541.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1418, cr_loss=0.3845, attn_decoder_loss=0.2527, over 5636170.48 frames. ], batch size: 76, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:10:57,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=310500.0, ans=0.0 +2024-09-17 22:11:12,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=310540.0, ans=0.1 +2024-09-17 22:11:16,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=310540.0, ans=0.0 +2024-09-17 22:11:16,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=310540.0, ans=0.125 +2024-09-17 22:11:51,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=310620.0, ans=0.0 +2024-09-17 22:11:54,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=310660.0, ans=0.0 +2024-09-17 22:11:57,451 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.839e+01 8.847e+01 9.240e+01 9.883e+01 4.255e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 22:11:58,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.35 vs. limit=22.5 +2024-09-17 22:12:11,912 INFO [train.py:1198] (0/2) Epoch 18, batch 750, loss[loss=0.2523, ctc_loss=0.1353, cr_loss=0.3721, attn_decoder_loss=0.257, over 29712.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1417, cr_loss=0.3844, attn_decoder_loss=0.2526, over 5675070.55 frames. ], batch size: 82, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:12:18,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=310700.0, ans=0.0 +2024-09-17 22:12:18,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=310700.0, ans=0.125 +2024-09-17 22:12:31,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=310740.0, ans=0.0 +2024-09-17 22:12:46,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.57 vs. limit=15.0 +2024-09-17 22:12:54,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.95 vs. limit=22.5 +2024-09-17 22:12:55,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=310780.0, ans=0.2 +2024-09-17 22:13:10,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.25 vs. limit=15.0 +2024-09-17 22:13:23,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=310860.0, ans=0.0 +2024-09-17 22:13:25,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=310860.0, ans=0.125 +2024-09-17 22:13:29,310 INFO [train.py:1198] (0/2) Epoch 18, batch 800, loss[loss=0.2198, ctc_loss=0.111, cr_loss=0.3272, attn_decoder_loss=0.2246, over 29606.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1411, cr_loss=0.3834, attn_decoder_loss=0.2521, over 5705996.53 frames. ], batch size: 73, lr: 6.26e-03, grad_scale: 16.0 +2024-09-17 22:13:37,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=310900.0, ans=0.0 +2024-09-17 22:14:10,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=310980.0, ans=0.0 +2024-09-17 22:14:12,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=310980.0, ans=0.2 +2024-09-17 22:14:28,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=311060.0, ans=0.125 +2024-09-17 22:14:34,571 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.772e+01 9.230e+01 9.952e+01 3.129e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-17 22:14:43,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=311100.0, ans=0.125 +2024-09-17 22:14:44,935 INFO [train.py:1198] (0/2) Epoch 18, batch 850, loss[loss=0.2584, ctc_loss=0.1429, cr_loss=0.3894, attn_decoder_loss=0.2626, over 29712.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1407, cr_loss=0.3826, attn_decoder_loss=0.2519, over 5734606.06 frames. ], batch size: 89, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:15:06,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=311140.0, ans=0.2 +2024-09-17 22:15:19,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=311180.0, ans=0.1 +2024-09-17 22:15:29,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=311220.0, ans=0.07 +2024-09-17 22:15:41,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=311220.0, ans=0.0 +2024-09-17 22:16:03,556 INFO [train.py:1198] (0/2) Epoch 18, batch 900, loss[loss=0.2217, ctc_loss=0.1174, cr_loss=0.3299, attn_decoder_loss=0.226, over 29607.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1414, cr_loss=0.3835, attn_decoder_loss=0.2523, over 5739519.84 frames. ], batch size: 73, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:16:24,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=311340.0, ans=0.2 +2024-09-17 22:16:27,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=311340.0, ans=0.125 +2024-09-17 22:16:29,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=311340.0, ans=0.125 +2024-09-17 22:16:39,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=311380.0, ans=0.0 +2024-09-17 22:17:10,604 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.085e+01 8.926e+01 9.503e+01 1.103e+02 2.746e+02, threshold=1.901e+02, percent-clipped=1.0 +2024-09-17 22:17:21,151 INFO [train.py:1198] (0/2) Epoch 18, batch 950, loss[loss=0.2333, ctc_loss=0.1344, cr_loss=0.3847, attn_decoder_loss=0.2358, over 29520.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1416, cr_loss=0.3838, attn_decoder_loss=0.2526, over 5742880.60 frames. ], batch size: 74, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:17:29,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.08 vs. limit=6.0 +2024-09-17 22:17:35,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.78 vs. limit=12.0 +2024-09-17 22:18:29,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=311660.0, ans=0.0 +2024-09-17 22:18:36,543 INFO [train.py:1198] (0/2) Epoch 18, batch 1000, loss[loss=0.2267, ctc_loss=0.1224, cr_loss=0.355, attn_decoder_loss=0.2304, over 29489.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1423, cr_loss=0.3849, attn_decoder_loss=0.2533, over 5737153.99 frames. ], batch size: 77, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:18:37,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.15 vs. limit=10.0 +2024-09-17 22:18:38,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=311700.0, ans=0.1 +2024-09-17 22:18:49,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.33 vs. limit=15.0 +2024-09-17 22:19:14,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=311780.0, ans=0.125 +2024-09-17 22:19:19,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=311780.0, ans=0.0 +2024-09-17 22:19:28,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=311820.0, ans=0.0 +2024-09-17 22:19:41,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.748e+01 9.283e+01 1.021e+02 2.281e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 22:19:44,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=311860.0, ans=0.2 +2024-09-17 22:19:51,944 INFO [train.py:1198] (0/2) Epoch 18, batch 1050, loss[loss=0.249, ctc_loss=0.1354, cr_loss=0.3841, attn_decoder_loss=0.2531, over 29662.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1416, cr_loss=0.3831, attn_decoder_loss=0.2526, over 5744257.62 frames. ], batch size: 85, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:20:28,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=311980.0, ans=10.0 +2024-09-17 22:20:28,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=311980.0, ans=0.0 +2024-09-17 22:20:28,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=311980.0, ans=0.125 +2024-09-17 22:20:35,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.18 vs. limit=15.0 +2024-09-17 22:20:36,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=311980.0, ans=0.125 +2024-09-17 22:20:43,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=312020.0, ans=0.125 +2024-09-17 22:20:48,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.93 vs. limit=10.0 +2024-09-17 22:21:04,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=312060.0, ans=0.0 +2024-09-17 22:21:10,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=312060.0, ans=0.125 +2024-09-17 22:21:13,191 INFO [train.py:1198] (0/2) Epoch 18, batch 1100, loss[loss=0.2474, ctc_loss=0.1429, cr_loss=0.3897, attn_decoder_loss=0.2504, over 29458.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1416, cr_loss=0.3834, attn_decoder_loss=0.2524, over 5754970.74 frames. ], batch size: 78, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:21:33,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=312140.0, ans=0.0 +2024-09-17 22:21:43,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=312180.0, ans=0.125 +2024-09-17 22:22:18,322 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.460e+01 8.510e+01 9.386e+01 9.841e+01 2.672e+02, threshold=1.877e+02, percent-clipped=2.0 +2024-09-17 22:22:23,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=312260.0, ans=0.125 +2024-09-17 22:22:28,966 INFO [train.py:1198] (0/2) Epoch 18, batch 1150, loss[loss=0.2439, ctc_loss=0.1359, cr_loss=0.3803, attn_decoder_loss=0.2474, over 29482.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1415, cr_loss=0.3839, attn_decoder_loss=0.2524, over 5753171.49 frames. ], batch size: 78, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:22:38,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=312300.0, ans=0.125 +2024-09-17 22:22:59,911 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:23:05,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=312380.0, ans=0.0 +2024-09-17 22:23:17,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=312420.0, ans=0.2 +2024-09-17 22:23:33,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=312460.0, ans=0.125 +2024-09-17 22:23:44,704 INFO [train.py:1198] (0/2) Epoch 18, batch 1200, loss[loss=0.2548, ctc_loss=0.1461, cr_loss=0.4095, attn_decoder_loss=0.2577, over 29671.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1422, cr_loss=0.3843, attn_decoder_loss=0.2533, over 5745648.90 frames. ], batch size: 85, lr: 6.25e-03, grad_scale: 16.0 +2024-09-17 22:23:50,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=312500.0, ans=0.035 +2024-09-17 22:23:57,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=312500.0, ans=0.09899494936611666 +2024-09-17 22:24:04,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.23 vs. limit=22.5 +2024-09-17 22:24:05,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=312540.0, ans=0.1 +2024-09-17 22:24:35,520 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.14 vs. limit=22.5 +2024-09-17 22:24:41,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.16 vs. limit=22.5 +2024-09-17 22:24:45,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=312620.0, ans=0.125 +2024-09-17 22:24:48,704 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:24:54,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=312660.0, ans=0.125 +2024-09-17 22:24:55,875 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 9.002e+01 9.543e+01 1.051e+02 1.930e+02, threshold=1.909e+02, percent-clipped=1.0 +2024-09-17 22:25:04,860 INFO [train.py:1198] (0/2) Epoch 18, batch 1250, loss[loss=0.2694, ctc_loss=0.1583, cr_loss=0.4159, attn_decoder_loss=0.2725, over 29552.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1425, cr_loss=0.3853, attn_decoder_loss=0.2536, over 5773448.35 frames. ], batch size: 92, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:25:06,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=312700.0, ans=0.07 +2024-09-17 22:25:07,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.20 vs. limit=15.0 +2024-09-17 22:25:07,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.74 vs. limit=15.0 +2024-09-17 22:25:08,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=312700.0, ans=0.0 +2024-09-17 22:25:29,703 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:26:06,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=312860.0, ans=0.2 +2024-09-17 22:26:11,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=312860.0, ans=0.95 +2024-09-17 22:26:20,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.43 vs. limit=15.0 +2024-09-17 22:26:21,429 INFO [train.py:1198] (0/2) Epoch 18, batch 1300, loss[loss=0.2677, ctc_loss=0.1521, cr_loss=0.4118, attn_decoder_loss=0.2714, over 28286.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1418, cr_loss=0.3839, attn_decoder_loss=0.2529, over 5778803.67 frames. ], batch size: 111, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:26:21,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=312900.0, ans=0.125 +2024-09-17 22:26:26,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=312900.0, ans=0.0 +2024-09-17 22:26:27,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.95 vs. limit=15.0 +2024-09-17 22:26:51,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.95 vs. limit=15.0 +2024-09-17 22:26:56,132 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.89 vs. limit=15.0 +2024-09-17 22:26:58,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=312980.0, ans=0.0 +2024-09-17 22:27:05,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=313020.0, ans=0.04949747468305833 +2024-09-17 22:27:05,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=313020.0, ans=0.125 +2024-09-17 22:27:17,335 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.04 vs. limit=22.5 +2024-09-17 22:27:28,525 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.616e+01 9.113e+01 9.632e+01 1.228e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 22:27:28,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=313060.0, ans=0.04949747468305833 +2024-09-17 22:27:30,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=313060.0, ans=0.05 +2024-09-17 22:27:32,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=313060.0, ans=0.125 +2024-09-17 22:27:36,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=313100.0, ans=0.07 +2024-09-17 22:27:37,658 INFO [train.py:1198] (0/2) Epoch 18, batch 1350, loss[loss=0.254, ctc_loss=0.1503, cr_loss=0.4249, attn_decoder_loss=0.2561, over 29767.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.141, cr_loss=0.3829, attn_decoder_loss=0.2523, over 5795203.96 frames. ], batch size: 81, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:27:42,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=313100.0, ans=0.1 +2024-09-17 22:27:51,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=313140.0, ans=0.125 +2024-09-17 22:27:53,498 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.34 vs. limit=15.0 +2024-09-17 22:28:17,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.94 vs. limit=15.0 +2024-09-17 22:28:30,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=313220.0, ans=0.2 +2024-09-17 22:28:44,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=313260.0, ans=0.0 +2024-09-17 22:28:57,963 INFO [train.py:1198] (0/2) Epoch 18, batch 1400, loss[loss=0.2209, ctc_loss=0.1236, cr_loss=0.3415, attn_decoder_loss=0.2242, over 29561.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1408, cr_loss=0.3823, attn_decoder_loss=0.2522, over 5807002.89 frames. ], batch size: 69, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:29:33,695 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.49 vs. limit=15.0 +2024-09-17 22:29:50,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.97 vs. limit=15.0 +2024-09-17 22:30:00,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=313460.0, ans=0.0 +2024-09-17 22:30:04,775 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.593e+01 9.088e+01 9.649e+01 1.870e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-17 22:30:05,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=313460.0, ans=0.2 +2024-09-17 22:30:11,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=313460.0, ans=0.0 +2024-09-17 22:30:13,983 INFO [train.py:1198] (0/2) Epoch 18, batch 1450, loss[loss=0.2697, ctc_loss=0.1536, cr_loss=0.4106, attn_decoder_loss=0.2735, over 29481.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1417, cr_loss=0.384, attn_decoder_loss=0.2531, over 5803146.41 frames. ], batch size: 94, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:30:34,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=313540.0, ans=0.0 +2024-09-17 22:30:41,039 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.63 vs. limit=10.0 +2024-09-17 22:30:41,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=313540.0, ans=0.125 +2024-09-17 22:31:03,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=313620.0, ans=0.125 +2024-09-17 22:31:28,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-17 22:31:30,413 INFO [train.py:1198] (0/2) Epoch 18, batch 1500, loss[loss=0.2545, ctc_loss=0.1449, cr_loss=0.379, attn_decoder_loss=0.2583, over 29642.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1423, cr_loss=0.3852, attn_decoder_loss=0.2535, over 5804318.85 frames. ], batch size: 86, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:31:31,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.05 vs. limit=6.0 +2024-09-17 22:31:58,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=313740.0, ans=0.125 +2024-09-17 22:32:13,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=313780.0, ans=0.2 +2024-09-17 22:32:24,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.89 vs. limit=15.0 +2024-09-17 22:32:33,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=313820.0, ans=0.035 +2024-09-17 22:32:37,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=313860.0, ans=0.2 +2024-09-17 22:32:42,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.582e+01 9.289e+01 9.829e+01 2.134e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-17 22:32:51,081 INFO [train.py:1198] (0/2) Epoch 18, batch 1550, loss[loss=0.2761, ctc_loss=0.1626, cr_loss=0.4474, attn_decoder_loss=0.2787, over 29477.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1422, cr_loss=0.3842, attn_decoder_loss=0.2534, over 5781931.49 frames. ], batch size: 90, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:33:07,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=313940.0, ans=0.0 +2024-09-17 22:33:18,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.62 vs. limit=15.0 +2024-09-17 22:33:24,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=313980.0, ans=0.0 +2024-09-17 22:33:36,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=314020.0, ans=0.125 +2024-09-17 22:33:38,213 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:33:41,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.60 vs. limit=15.0 +2024-09-17 22:33:51,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=314060.0, ans=0.1 +2024-09-17 22:33:58,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=314060.0, ans=0.0 +2024-09-17 22:33:59,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=314060.0, ans=0.0 +2024-09-17 22:34:06,771 INFO [train.py:1198] (0/2) Epoch 18, batch 1600, loss[loss=0.2439, ctc_loss=0.1302, cr_loss=0.3665, attn_decoder_loss=0.2484, over 29665.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1423, cr_loss=0.384, attn_decoder_loss=0.2532, over 5765828.51 frames. ], batch size: 85, lr: 6.23e-03, grad_scale: 16.0 +2024-09-17 22:34:15,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=314100.0, ans=0.125 +2024-09-17 22:34:34,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=314140.0, ans=0.0 +2024-09-17 22:34:38,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.75 vs. limit=22.5 +2024-09-17 22:34:43,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=314180.0, ans=0.05 +2024-09-17 22:34:54,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=314220.0, ans=0.025 +2024-09-17 22:35:00,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=314220.0, ans=0.1 +2024-09-17 22:35:03,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=314220.0, ans=10.0 +2024-09-17 22:35:14,878 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.294e+01 8.911e+01 9.926e+01 1.138e+02 4.601e+02, threshold=1.985e+02, percent-clipped=5.0 +2024-09-17 22:35:22,532 INFO [train.py:1198] (0/2) Epoch 18, batch 1650, loss[loss=0.2573, ctc_loss=0.1458, cr_loss=0.3958, attn_decoder_loss=0.2609, over 29699.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1425, cr_loss=0.3848, attn_decoder_loss=0.2532, over 5761616.25 frames. ], batch size: 89, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:35:47,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=314340.0, ans=0.125 +2024-09-17 22:36:14,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=314420.0, ans=0.0 +2024-09-17 22:36:24,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=314460.0, ans=0.125 +2024-09-17 22:36:29,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=314460.0, ans=0.125 +2024-09-17 22:36:29,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=314460.0, ans=0.0 +2024-09-17 22:36:30,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=314460.0, ans=0.0 +2024-09-17 22:36:40,864 INFO [train.py:1198] (0/2) Epoch 18, batch 1700, loss[loss=0.2249, ctc_loss=0.1213, cr_loss=0.3342, attn_decoder_loss=0.229, over 29608.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1418, cr_loss=0.3835, attn_decoder_loss=0.253, over 5781514.64 frames. ], batch size: 69, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:36:45,088 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.75 vs. limit=15.0 +2024-09-17 22:36:54,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.98 vs. limit=15.0 +2024-09-17 22:36:54,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.08 vs. limit=15.0 +2024-09-17 22:36:56,515 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:37:04,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=314540.0, ans=0.1 +2024-09-17 22:37:04,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=314540.0, ans=0.07 +2024-09-17 22:37:14,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=314580.0, ans=0.125 +2024-09-17 22:37:20,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=314580.0, ans=0.0 +2024-09-17 22:37:29,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=314620.0, ans=0.025 +2024-09-17 22:37:35,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=314620.0, ans=0.0 +2024-09-17 22:37:35,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=314620.0, ans=0.04949747468305833 +2024-09-17 22:37:49,120 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.791e+01 8.699e+01 9.289e+01 9.898e+01 1.574e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-17 22:37:56,803 INFO [train.py:1198] (0/2) Epoch 18, batch 1750, loss[loss=0.2259, ctc_loss=0.1291, cr_loss=0.3617, attn_decoder_loss=0.2286, over 29304.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1416, cr_loss=0.3836, attn_decoder_loss=0.2526, over 5790128.61 frames. ], batch size: 67, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:38:00,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.59 vs. limit=22.5 +2024-09-17 22:38:03,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=314700.0, ans=0.125 +2024-09-17 22:38:23,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.69 vs. limit=10.0 +2024-09-17 22:38:30,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=314780.0, ans=0.125 +2024-09-17 22:38:30,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=314780.0, ans=0.07 +2024-09-17 22:38:35,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.39 vs. limit=15.0 +2024-09-17 22:38:37,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.89 vs. limit=15.0 +2024-09-17 22:38:42,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=314820.0, ans=0.1 +2024-09-17 22:38:47,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=314820.0, ans=0.2 +2024-09-17 22:39:12,153 INFO [train.py:1198] (0/2) Epoch 18, batch 1800, loss[loss=0.2589, ctc_loss=0.1414, cr_loss=0.3807, attn_decoder_loss=0.2635, over 29696.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1416, cr_loss=0.3842, attn_decoder_loss=0.2529, over 5793199.21 frames. ], batch size: 83, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:40:18,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=315060.0, ans=10.0 +2024-09-17 22:40:24,496 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.622e+01 9.178e+01 9.904e+01 1.304e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-17 22:40:32,253 INFO [train.py:1198] (0/2) Epoch 18, batch 1850, loss[loss=0.2597, ctc_loss=0.1455, cr_loss=0.3867, attn_decoder_loss=0.2638, over 29614.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1412, cr_loss=0.384, attn_decoder_loss=0.2526, over 5795971.06 frames. ], batch size: 86, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:40:35,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=315100.0, ans=0.09899494936611666 +2024-09-17 22:40:40,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.36 vs. limit=6.0 +2024-09-17 22:40:41,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=315100.0, ans=0.5 +2024-09-17 22:40:57,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.36 vs. limit=15.0 +2024-09-17 22:41:02,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=315180.0, ans=0.125 +2024-09-17 22:41:13,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=315180.0, ans=0.07 +2024-09-17 22:41:27,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.96 vs. limit=15.0 +2024-09-17 22:41:33,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=315260.0, ans=0.125 +2024-09-17 22:41:41,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=315260.0, ans=0.125 +2024-09-17 22:41:47,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=315300.0, ans=0.0 +2024-09-17 22:41:48,286 INFO [train.py:1198] (0/2) Epoch 18, batch 1900, loss[loss=0.2566, ctc_loss=0.1435, cr_loss=0.4012, attn_decoder_loss=0.2603, over 29733.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1419, cr_loss=0.3854, attn_decoder_loss=0.2533, over 5804126.64 frames. ], batch size: 89, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:41:54,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=315300.0, ans=0.125 +2024-09-17 22:42:10,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.60 vs. limit=15.0 +2024-09-17 22:42:17,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=315380.0, ans=0.1 +2024-09-17 22:42:22,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.32 vs. limit=15.0 +2024-09-17 22:42:44,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=315420.0, ans=0.0 +2024-09-17 22:42:55,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=315460.0, ans=0.0 +2024-09-17 22:42:56,527 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.577e+01 8.839e+01 9.337e+01 9.876e+01 1.224e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-17 22:43:04,092 INFO [train.py:1198] (0/2) Epoch 18, batch 1950, loss[loss=0.242, ctc_loss=0.1359, cr_loss=0.3761, attn_decoder_loss=0.2454, over 29436.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1427, cr_loss=0.3867, attn_decoder_loss=0.2542, over 5818627.54 frames. ], batch size: 78, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:43:13,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=315500.0, ans=0.1 +2024-09-17 22:43:13,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=315500.0, ans=0.125 +2024-09-17 22:43:20,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=315540.0, ans=0.0 +2024-09-17 22:43:56,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=315620.0, ans=0.125 +2024-09-17 22:44:06,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=315660.0, ans=0.025 +2024-09-17 22:44:23,109 INFO [train.py:1198] (0/2) Epoch 18, batch 2000, loss[loss=0.2141, ctc_loss=0.1164, cr_loss=0.3357, attn_decoder_loss=0.2175, over 29339.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1428, cr_loss=0.3866, attn_decoder_loss=0.2543, over 5795590.60 frames. ], batch size: 67, lr: 6.22e-03, grad_scale: 16.0 +2024-09-17 22:44:23,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=315700.0, ans=0.125 +2024-09-17 22:44:46,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=315740.0, ans=0.125 +2024-09-17 22:44:52,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=315780.0, ans=0.1 +2024-09-17 22:45:00,197 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:45:30,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=315860.0, ans=0.025 +2024-09-17 22:45:31,344 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.725e+01 9.284e+01 1.004e+02 4.329e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 22:45:38,901 INFO [train.py:1198] (0/2) Epoch 18, batch 2050, loss[loss=0.229, ctc_loss=0.1313, cr_loss=0.369, attn_decoder_loss=0.2317, over 29442.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1421, cr_loss=0.3849, attn_decoder_loss=0.2533, over 5788479.11 frames. ], batch size: 70, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:45:40,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=315900.0, ans=0.125 +2024-09-17 22:45:44,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=315900.0, ans=0.0 +2024-09-17 22:46:02,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=315940.0, ans=0.1 +2024-09-17 22:46:32,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.24 vs. limit=15.0 +2024-09-17 22:46:53,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=316100.0, ans=0.125 +2024-09-17 22:46:54,761 INFO [train.py:1198] (0/2) Epoch 18, batch 2100, loss[loss=0.2576, ctc_loss=0.1393, cr_loss=0.4039, attn_decoder_loss=0.2618, over 29755.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1417, cr_loss=0.3842, attn_decoder_loss=0.2529, over 5800257.60 frames. ], batch size: 81, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:48:02,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=316260.0, ans=0.125 +2024-09-17 22:48:05,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=316260.0, ans=0.04949747468305833 +2024-09-17 22:48:08,069 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.905e+01 8.595e+01 9.065e+01 9.620e+01 1.690e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-17 22:48:14,164 INFO [train.py:1198] (0/2) Epoch 18, batch 2150, loss[loss=0.2438, ctc_loss=0.141, cr_loss=0.38, attn_decoder_loss=0.2468, over 29455.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1409, cr_loss=0.3828, attn_decoder_loss=0.252, over 5815119.35 frames. ], batch size: 78, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:48:19,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.17 vs. limit=15.0 +2024-09-17 22:48:57,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.12 vs. limit=15.0 +2024-09-17 22:49:13,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=316460.0, ans=0.1 +2024-09-17 22:49:25,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=316460.0, ans=0.125 +2024-09-17 22:49:25,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=316460.0, ans=0.125 +2024-09-17 22:49:29,890 INFO [train.py:1198] (0/2) Epoch 18, batch 2200, loss[loss=0.256, ctc_loss=0.1436, cr_loss=0.3777, attn_decoder_loss=0.26, over 29601.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1408, cr_loss=0.3827, attn_decoder_loss=0.2521, over 5811238.50 frames. ], batch size: 86, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:49:30,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=316500.0, ans=0.0 +2024-09-17 22:49:37,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=316500.0, ans=0.125 +2024-09-17 22:49:39,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=316500.0, ans=0.2 +2024-09-17 22:49:51,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=316540.0, ans=0.125 +2024-09-17 22:50:07,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.46 vs. limit=15.0 +2024-09-17 22:50:08,016 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:50:30,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=316660.0, ans=0.125 +2024-09-17 22:50:39,196 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.790e+01 9.365e+01 1.003e+02 3.289e+02, threshold=1.873e+02, percent-clipped=2.0 +2024-09-17 22:50:41,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=316660.0, ans=0.125 +2024-09-17 22:50:45,434 INFO [train.py:1198] (0/2) Epoch 18, batch 2250, loss[loss=0.2474, ctc_loss=0.1311, cr_loss=0.3582, attn_decoder_loss=0.2523, over 29696.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1402, cr_loss=0.3818, attn_decoder_loss=0.2517, over 5810528.58 frames. ], batch size: 82, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:50:50,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=316700.0, ans=0.125 +2024-09-17 22:50:50,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=316700.0, ans=0.1 +2024-09-17 22:51:14,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=316780.0, ans=0.125 +2024-09-17 22:51:18,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.93 vs. limit=15.0 +2024-09-17 22:51:33,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=316820.0, ans=0.125 +2024-09-17 22:51:37,283 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.55 vs. limit=10.0 +2024-09-17 22:51:47,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=316820.0, ans=0.0 +2024-09-17 22:51:55,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=316860.0, ans=0.1 +2024-09-17 22:52:05,592 INFO [train.py:1198] (0/2) Epoch 18, batch 2300, loss[loss=0.2228, ctc_loss=0.1163, cr_loss=0.3352, attn_decoder_loss=0.2271, over 29301.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1399, cr_loss=0.3811, attn_decoder_loss=0.251, over 5797674.93 frames. ], batch size: 71, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:52:07,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=316900.0, ans=0.1 +2024-09-17 22:52:20,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=316940.0, ans=0.1 +2024-09-17 22:52:20,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=316940.0, ans=0.1 +2024-09-17 22:52:29,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.13 vs. limit=15.0 +2024-09-17 22:52:32,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=316940.0, ans=0.0 +2024-09-17 22:52:57,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=317020.0, ans=0.025 +2024-09-17 22:52:57,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=317020.0, ans=0.125 +2024-09-17 22:53:10,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.78 vs. limit=10.0 +2024-09-17 22:53:15,288 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.054e+01 8.549e+01 8.919e+01 9.975e+01 1.965e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-17 22:53:21,365 INFO [train.py:1198] (0/2) Epoch 18, batch 2350, loss[loss=0.2605, ctc_loss=0.1498, cr_loss=0.4024, attn_decoder_loss=0.2638, over 29687.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.14, cr_loss=0.3816, attn_decoder_loss=0.2512, over 5802612.47 frames. ], batch size: 83, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:53:30,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=317100.0, ans=0.1 +2024-09-17 22:53:56,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=317180.0, ans=0.125 +2024-09-17 22:54:08,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=317220.0, ans=0.0 +2024-09-17 22:54:11,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=317220.0, ans=0.125 +2024-09-17 22:54:14,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=317220.0, ans=0.125 +2024-09-17 22:54:15,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=317220.0, ans=0.2 +2024-09-17 22:54:31,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=317260.0, ans=0.125 +2024-09-17 22:54:35,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.79 vs. limit=15.0 +2024-09-17 22:54:37,095 INFO [train.py:1198] (0/2) Epoch 18, batch 2400, loss[loss=0.2373, ctc_loss=0.1226, cr_loss=0.3516, attn_decoder_loss=0.2422, over 29543.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1406, cr_loss=0.3826, attn_decoder_loss=0.2517, over 5805544.25 frames. ], batch size: 76, lr: 6.20e-03, grad_scale: 16.0 +2024-09-17 22:55:07,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=317380.0, ans=0.125 +2024-09-17 22:55:32,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=317420.0, ans=0.1 +2024-09-17 22:55:35,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=317420.0, ans=0.5 +2024-09-17 22:55:52,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=9.94 vs. limit=12.0 +2024-09-17 22:55:52,493 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.296e+01 8.608e+01 9.226e+01 9.922e+01 2.120e+02, threshold=1.845e+02, percent-clipped=2.0 +2024-09-17 22:55:57,153 INFO [train.py:1198] (0/2) Epoch 18, batch 2450, loss[loss=0.2535, ctc_loss=0.1465, cr_loss=0.394, attn_decoder_loss=0.2566, over 29698.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1416, cr_loss=0.3844, attn_decoder_loss=0.253, over 5782246.46 frames. ], batch size: 82, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:57:13,353 INFO [train.py:1198] (0/2) Epoch 18, batch 2500, loss[loss=0.2613, ctc_loss=0.1472, cr_loss=0.3931, attn_decoder_loss=0.2652, over 29649.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.142, cr_loss=0.3851, attn_decoder_loss=0.2532, over 5793396.81 frames. ], batch size: 86, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:57:27,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.78 vs. limit=15.0 +2024-09-17 22:57:28,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=317740.0, ans=0.125 +2024-09-17 22:57:53,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=317780.0, ans=0.0 +2024-09-17 22:57:53,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=317780.0, ans=0.125 +2024-09-17 22:58:09,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=317820.0, ans=0.0 +2024-09-17 22:58:24,508 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.912e+01 8.741e+01 9.201e+01 9.902e+01 1.726e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-17 22:58:29,152 INFO [train.py:1198] (0/2) Epoch 18, batch 2550, loss[loss=0.2211, ctc_loss=0.1183, cr_loss=0.3389, attn_decoder_loss=0.225, over 29304.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1423, cr_loss=0.3855, attn_decoder_loss=0.2534, over 5796707.71 frames. ], batch size: 67, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 22:58:32,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=317900.0, ans=0.0 +2024-09-17 22:58:41,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=317900.0, ans=0.125 +2024-09-17 22:58:50,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=317940.0, ans=0.0 +2024-09-17 22:58:50,876 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.92 vs. limit=15.0 +2024-09-17 22:59:07,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=317980.0, ans=0.0 +2024-09-17 22:59:43,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=318060.0, ans=0.0 +2024-09-17 22:59:45,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=318060.0, ans=0.125 +2024-09-17 22:59:46,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=318060.0, ans=0.1 +2024-09-17 22:59:49,439 INFO [train.py:1198] (0/2) Epoch 18, batch 2600, loss[loss=0.2475, ctc_loss=0.1399, cr_loss=0.3811, attn_decoder_loss=0.251, over 29442.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1424, cr_loss=0.386, attn_decoder_loss=0.2537, over 5793517.16 frames. ], batch size: 78, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:00:05,382 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.24 vs. limit=12.0 +2024-09-17 23:00:12,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=318140.0, ans=0.125 +2024-09-17 23:00:32,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=318180.0, ans=0.0 +2024-09-17 23:01:00,211 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.599e+01 9.133e+01 9.930e+01 1.773e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-17 23:01:04,597 INFO [train.py:1198] (0/2) Epoch 18, batch 2650, loss[loss=0.2684, ctc_loss=0.1561, cr_loss=0.4335, attn_decoder_loss=0.2712, over 29178.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1417, cr_loss=0.3852, attn_decoder_loss=0.2536, over 5801044.93 frames. ], batch size: 100, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:01:09,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=318300.0, ans=0.0 +2024-09-17 23:01:09,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=318300.0, ans=0.125 +2024-09-17 23:01:33,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=318380.0, ans=0.125 +2024-09-17 23:02:00,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=318420.0, ans=0.125 +2024-09-17 23:02:10,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.53 vs. limit=6.0 +2024-09-17 23:02:20,279 INFO [train.py:1198] (0/2) Epoch 18, batch 2700, loss[loss=0.2578, ctc_loss=0.1462, cr_loss=0.3924, attn_decoder_loss=0.2615, over 29527.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1421, cr_loss=0.3863, attn_decoder_loss=0.2542, over 5796833.38 frames. ], batch size: 87, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:02:24,305 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.76 vs. limit=15.0 +2024-09-17 23:02:41,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=318540.0, ans=0.125 +2024-09-17 23:03:11,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=318620.0, ans=0.125 +2024-09-17 23:03:27,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=318660.0, ans=0.1 +2024-09-17 23:03:28,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=318660.0, ans=0.015 +2024-09-17 23:03:36,478 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.684e+01 8.599e+01 9.139e+01 9.802e+01 1.659e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-17 23:03:41,088 INFO [train.py:1198] (0/2) Epoch 18, batch 2750, loss[loss=0.2417, ctc_loss=0.1344, cr_loss=0.3741, attn_decoder_loss=0.2453, over 29505.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1411, cr_loss=0.3841, attn_decoder_loss=0.2529, over 5795395.17 frames. ], batch size: 75, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:03:51,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=318700.0, ans=0.1 +2024-09-17 23:03:57,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=318740.0, ans=0.0 +2024-09-17 23:04:02,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=318740.0, ans=0.1 +2024-09-17 23:04:21,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=318780.0, ans=0.1 +2024-09-17 23:04:29,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=318820.0, ans=0.125 +2024-09-17 23:04:41,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=318860.0, ans=0.0 +2024-09-17 23:04:57,081 INFO [train.py:1198] (0/2) Epoch 18, batch 2800, loss[loss=0.2715, ctc_loss=0.1814, cr_loss=0.3997, attn_decoder_loss=0.2727, over 20263.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1415, cr_loss=0.3843, attn_decoder_loss=0.253, over 5775974.89 frames. ], batch size: 210, lr: 6.18e-03, grad_scale: 16.0 +2024-09-17 23:05:08,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.83 vs. limit=10.0 +2024-09-17 23:05:11,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=318940.0, ans=0.2 +2024-09-17 23:05:23,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=318940.0, ans=0.0 +2024-09-17 23:05:31,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=318980.0, ans=0.1 +2024-09-17 23:05:38,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=318980.0, ans=0.125 +2024-09-17 23:06:10,006 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.902e+01 8.963e+01 9.729e+01 1.060e+02 3.606e+02, threshold=1.946e+02, percent-clipped=3.0 +2024-09-17 23:06:13,100 INFO [train.py:1198] (0/2) Epoch 18, batch 2850, loss[loss=0.2443, ctc_loss=0.1429, cr_loss=0.4056, attn_decoder_loss=0.2465, over 29515.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.142, cr_loss=0.3852, attn_decoder_loss=0.2534, over 5761703.10 frames. ], batch size: 77, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:06:17,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.67 vs. limit=15.0 +2024-09-17 23:06:20,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.45 vs. limit=15.0 +2024-09-17 23:06:26,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.40 vs. limit=15.0 +2024-09-17 23:06:31,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=319140.0, ans=0.125 +2024-09-17 23:06:46,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=319180.0, ans=0.125 +2024-09-17 23:07:12,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=12.0 +2024-09-17 23:07:26,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=12.0 +2024-09-17 23:07:30,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=319260.0, ans=0.0 +2024-09-17 23:07:33,503 INFO [train.py:1198] (0/2) Epoch 18, batch 2900, loss[loss=0.2375, ctc_loss=0.132, cr_loss=0.3795, attn_decoder_loss=0.2408, over 29406.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1428, cr_loss=0.3871, attn_decoder_loss=0.2545, over 5787509.68 frames. ], batch size: 79, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:07:42,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=319300.0, ans=0.2 +2024-09-17 23:07:46,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.35 vs. limit=22.5 +2024-09-17 23:07:56,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=319340.0, ans=0.0 +2024-09-17 23:08:02,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=319380.0, ans=10.0 +2024-09-17 23:08:15,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=319380.0, ans=0.125 +2024-09-17 23:08:46,531 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.577e+01 9.174e+01 9.696e+01 1.530e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-17 23:08:49,582 INFO [train.py:1198] (0/2) Epoch 18, batch 2950, loss[loss=0.2482, ctc_loss=0.1463, cr_loss=0.403, attn_decoder_loss=0.2506, over 29559.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1419, cr_loss=0.385, attn_decoder_loss=0.2534, over 5780262.89 frames. ], batch size: 75, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:09:02,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.97 vs. limit=22.5 +2024-09-17 23:09:16,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.06 vs. limit=15.0 +2024-09-17 23:09:18,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=319580.0, ans=0.2 +2024-09-17 23:09:36,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=319620.0, ans=0.1 +2024-09-17 23:10:05,408 INFO [train.py:1198] (0/2) Epoch 18, batch 3000, loss[loss=0.2441, ctc_loss=0.1337, cr_loss=0.3685, attn_decoder_loss=0.2481, over 29748.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1417, cr_loss=0.3838, attn_decoder_loss=0.2532, over 5781857.33 frames. ], batch size: 81, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:10:05,408 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 23:10:24,020 INFO [train.py:1230] (0/2) Epoch 18, validation: loss=0.211, ctc_loss=0.04071, cr_loss=4.994e-15, attn_decoder_loss=0.23, over 944034.00 frames. +2024-09-17 23:10:24,020 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 23:10:40,377 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:10:47,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=319740.0, ans=0.025 +2024-09-17 23:10:57,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=319780.0, ans=0.125 +2024-09-17 23:11:01,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.00 vs. limit=15.0 +2024-09-17 23:11:32,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=319860.0, ans=0.0 +2024-09-17 23:11:34,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=319860.0, ans=0.125 +2024-09-17 23:11:41,318 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 9.075e+01 9.591e+01 1.002e+02 5.340e+02, threshold=1.918e+02, percent-clipped=4.0 +2024-09-17 23:11:44,494 INFO [train.py:1198] (0/2) Epoch 18, batch 3050, loss[loss=0.2467, ctc_loss=0.1398, cr_loss=0.364, attn_decoder_loss=0.2505, over 29541.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1427, cr_loss=0.386, attn_decoder_loss=0.2541, over 5776783.09 frames. ], batch size: 76, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:12:21,286 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-80000.pt +2024-09-17 23:12:33,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=319980.0, ans=0.125 +2024-09-17 23:12:36,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=320020.0, ans=0.125 +2024-09-17 23:12:54,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=320060.0, ans=0.09899494936611666 +2024-09-17 23:12:55,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=320060.0, ans=0.1 +2024-09-17 23:13:07,356 INFO [train.py:1198] (0/2) Epoch 18, batch 3100, loss[loss=0.2707, ctc_loss=0.1582, cr_loss=0.4045, attn_decoder_loss=0.2742, over 29226.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1422, cr_loss=0.3848, attn_decoder_loss=0.2535, over 5777066.19 frames. ], batch size: 100, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:13:07,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=320100.0, ans=0.0 +2024-09-17 23:13:30,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=320140.0, ans=0.125 +2024-09-17 23:13:54,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=320220.0, ans=0.025 +2024-09-17 23:13:57,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=320220.0, ans=0.125 +2024-09-17 23:13:58,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=320220.0, ans=0.125 +2024-09-17 23:14:12,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=320260.0, ans=0.125 +2024-09-17 23:14:13,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.17 vs. limit=15.0 +2024-09-17 23:14:21,536 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.202e+01 8.516e+01 8.988e+01 9.440e+01 2.574e+02, threshold=1.798e+02, percent-clipped=3.0 +2024-09-17 23:14:23,080 INFO [train.py:1198] (0/2) Epoch 18, batch 3150, loss[loss=0.2573, ctc_loss=0.1412, cr_loss=0.3744, attn_decoder_loss=0.2619, over 28743.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1421, cr_loss=0.3846, attn_decoder_loss=0.2535, over 5783800.35 frames. ], batch size: 104, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:14:33,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.47 vs. limit=15.0 +2024-09-17 23:14:48,255 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:14:48,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=320340.0, ans=0.0 +2024-09-17 23:14:54,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=320380.0, ans=0.125 +2024-09-17 23:15:03,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=320380.0, ans=0.0 +2024-09-17 23:15:03,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=320380.0, ans=0.025 +2024-09-17 23:15:06,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.25 vs. limit=15.0 +2024-09-17 23:15:15,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.88 vs. limit=10.0 +2024-09-17 23:15:32,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=320460.0, ans=0.2 +2024-09-17 23:15:38,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=320460.0, ans=0.0 +2024-09-17 23:15:43,028 INFO [train.py:1198] (0/2) Epoch 18, batch 3200, loss[loss=0.2376, ctc_loss=0.1346, cr_loss=0.3703, attn_decoder_loss=0.2408, over 29425.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1421, cr_loss=0.3851, attn_decoder_loss=0.2534, over 5793477.74 frames. ], batch size: 79, lr: 6.17e-03, grad_scale: 16.0 +2024-09-17 23:15:49,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=320500.0, ans=0.1 +2024-09-17 23:16:18,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=320580.0, ans=0.125 +2024-09-17 23:16:19,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=320580.0, ans=0.125 +2024-09-17 23:16:21,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=320580.0, ans=0.0 +2024-09-17 23:16:25,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=320580.0, ans=0.1 +2024-09-17 23:16:49,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-17 23:16:58,812 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.899e+01 8.773e+01 9.216e+01 9.587e+01 1.476e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-17 23:16:58,833 INFO [train.py:1198] (0/2) Epoch 18, batch 3250, loss[loss=0.2607, ctc_loss=0.1453, cr_loss=0.3989, attn_decoder_loss=0.2647, over 29721.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1419, cr_loss=0.3853, attn_decoder_loss=0.2535, over 5800785.54 frames. ], batch size: 84, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:17:05,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=320700.0, ans=0.1 +2024-09-17 23:17:24,920 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:17:44,904 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:17:54,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=320820.0, ans=15.0 +2024-09-17 23:18:12,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=320860.0, ans=0.125 +2024-09-17 23:18:12,222 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:18:14,952 INFO [train.py:1198] (0/2) Epoch 18, batch 3300, loss[loss=0.2605, ctc_loss=0.1482, cr_loss=0.4051, attn_decoder_loss=0.264, over 28325.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1408, cr_loss=0.3836, attn_decoder_loss=0.2523, over 5797946.48 frames. ], batch size: 111, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:18:20,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=320900.0, ans=0.1 +2024-09-17 23:18:32,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=320940.0, ans=0.125 +2024-09-17 23:18:35,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=320940.0, ans=0.125 +2024-09-17 23:18:49,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.93 vs. limit=15.0 +2024-09-17 23:18:57,091 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:19:01,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.83 vs. limit=15.0 +2024-09-17 23:19:02,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.96 vs. limit=12.0 +2024-09-17 23:19:14,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=321020.0, ans=0.125 +2024-09-17 23:19:29,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=321060.0, ans=0.125 +2024-09-17 23:19:35,431 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.646e+01 9.242e+01 9.946e+01 1.965e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 23:19:35,453 INFO [train.py:1198] (0/2) Epoch 18, batch 3350, loss[loss=0.2647, ctc_loss=0.1498, cr_loss=0.4029, attn_decoder_loss=0.2685, over 28755.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1416, cr_loss=0.3845, attn_decoder_loss=0.2531, over 5774386.44 frames. ], batch size: 104, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:20:12,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=321180.0, ans=0.125 +2024-09-17 23:20:21,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=321220.0, ans=0.125 +2024-09-17 23:20:51,642 INFO [train.py:1198] (0/2) Epoch 18, batch 3400, loss[loss=0.2141, ctc_loss=0.1124, cr_loss=0.3266, attn_decoder_loss=0.2182, over 29318.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1413, cr_loss=0.3831, attn_decoder_loss=0.2525, over 5766115.50 frames. ], batch size: 67, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:21:03,153 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.95 vs. limit=22.5 +2024-09-17 23:21:38,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=321420.0, ans=0.025 +2024-09-17 23:22:05,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=321460.0, ans=0.0 +2024-09-17 23:22:09,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.690e+01 9.213e+01 9.933e+01 2.095e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-17 23:22:09,848 INFO [train.py:1198] (0/2) Epoch 18, batch 3450, loss[loss=0.2632, ctc_loss=0.1526, cr_loss=0.3901, attn_decoder_loss=0.2668, over 28138.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1412, cr_loss=0.3834, attn_decoder_loss=0.2529, over 5773617.76 frames. ], batch size: 111, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:22:23,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=321540.0, ans=0.0 +2024-09-17 23:22:27,475 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.09 vs. limit=15.0 +2024-09-17 23:22:52,022 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.57 vs. limit=22.5 +2024-09-17 23:23:16,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=321660.0, ans=0.025 +2024-09-17 23:23:28,091 INFO [train.py:1198] (0/2) Epoch 18, batch 3500, loss[loss=0.2173, ctc_loss=0.1216, cr_loss=0.3567, attn_decoder_loss=0.22, over 29310.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1406, cr_loss=0.3826, attn_decoder_loss=0.2522, over 5775591.99 frames. ], batch size: 71, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:23:34,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.03 vs. limit=22.5 +2024-09-17 23:23:45,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=321740.0, ans=0.0 +2024-09-17 23:23:49,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=321740.0, ans=0.2 +2024-09-17 23:23:58,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=321780.0, ans=0.0 +2024-09-17 23:23:59,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=321780.0, ans=0.1 +2024-09-17 23:24:01,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=321780.0, ans=0.125 +2024-09-17 23:24:04,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.38 vs. limit=15.0 +2024-09-17 23:24:34,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=321860.0, ans=0.1 +2024-09-17 23:24:42,808 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.524e+01 9.219e+01 9.966e+01 2.449e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-17 23:24:42,830 INFO [train.py:1198] (0/2) Epoch 18, batch 3550, loss[loss=0.262, ctc_loss=0.143, cr_loss=0.3928, attn_decoder_loss=0.2665, over 29704.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1406, cr_loss=0.3821, attn_decoder_loss=0.2522, over 5782377.76 frames. ], batch size: 89, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:24:43,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=321900.0, ans=0.0 +2024-09-17 23:24:44,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=321900.0, ans=0.0 +2024-09-17 23:24:53,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=321900.0, ans=0.125 +2024-09-17 23:24:58,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.83 vs. limit=15.0 +2024-09-17 23:25:20,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=321980.0, ans=0.0 +2024-09-17 23:25:41,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=322060.0, ans=0.125 +2024-09-17 23:25:41,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=322060.0, ans=0.0 +2024-09-17 23:25:52,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=322060.0, ans=0.015 +2024-09-17 23:25:57,195 INFO [train.py:1198] (0/2) Epoch 18, batch 3600, loss[loss=0.2454, ctc_loss=0.1371, cr_loss=0.3873, attn_decoder_loss=0.2488, over 29494.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1404, cr_loss=0.3819, attn_decoder_loss=0.2525, over 5792002.85 frames. ], batch size: 77, lr: 6.15e-03, grad_scale: 16.0 +2024-09-17 23:26:06,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=322100.0, ans=0.0 +2024-09-17 23:26:13,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=322140.0, ans=0.125 +2024-09-17 23:26:15,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=322140.0, ans=0.125 +2024-09-17 23:26:18,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=322140.0, ans=0.2 +2024-09-17 23:26:36,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=322180.0, ans=0.1 +2024-09-17 23:27:04,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=322260.0, ans=0.1 +2024-09-17 23:27:11,828 INFO [train.py:1198] (0/2) Epoch 18, batch 3650, loss[loss=0.2789, ctc_loss=0.1764, cr_loss=0.4497, attn_decoder_loss=0.2803, over 29515.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1401, cr_loss=0.3811, attn_decoder_loss=0.2521, over 5792698.77 frames. ], batch size: 90, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:27:13,216 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.529e+01 9.051e+01 9.513e+01 1.639e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-17 23:27:34,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.51 vs. limit=15.0 +2024-09-17 23:27:35,342 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.80 vs. limit=15.0 +2024-09-17 23:27:57,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=322420.0, ans=0.1 +2024-09-17 23:27:59,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=322420.0, ans=0.125 +2024-09-17 23:28:13,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=322460.0, ans=0.0 +2024-09-17 23:28:13,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=322460.0, ans=0.0 +2024-09-17 23:28:25,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.95 vs. limit=15.0 +2024-09-17 23:28:29,089 INFO [train.py:1198] (0/2) Epoch 18, batch 3700, loss[loss=0.2599, ctc_loss=0.1543, cr_loss=0.4198, attn_decoder_loss=0.2623, over 29705.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1404, cr_loss=0.3822, attn_decoder_loss=0.2524, over 5803408.95 frames. ], batch size: 84, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:28:44,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=322540.0, ans=0.125 +2024-09-17 23:28:51,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=322540.0, ans=0.1 +2024-09-17 23:28:56,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=322540.0, ans=0.125 +2024-09-17 23:29:45,331 INFO [train.py:1198] (0/2) Epoch 18, batch 3750, loss[loss=0.2238, ctc_loss=0.1296, cr_loss=0.3542, attn_decoder_loss=0.2264, over 29348.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1403, cr_loss=0.3819, attn_decoder_loss=0.2521, over 5807019.52 frames. ], batch size: 67, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:29:46,815 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.942e+01 8.687e+01 9.263e+01 1.001e+02 2.346e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-17 23:29:52,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-17 23:29:56,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.47 vs. limit=15.0 +2024-09-17 23:30:12,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=322740.0, ans=0.125 +2024-09-17 23:30:32,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.93 vs. limit=15.0 +2024-09-17 23:30:42,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=322820.0, ans=0.125 +2024-09-17 23:30:43,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=322860.0, ans=0.125 +2024-09-17 23:31:00,144 INFO [train.py:1198] (0/2) Epoch 18, batch 3800, loss[loss=0.2463, ctc_loss=0.1283, cr_loss=0.3586, attn_decoder_loss=0.2515, over 29643.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1399, cr_loss=0.3806, attn_decoder_loss=0.2515, over 5799056.09 frames. ], batch size: 86, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:31:11,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.82 vs. limit=22.5 +2024-09-17 23:31:13,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=322940.0, ans=0.07 +2024-09-17 23:31:45,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=323020.0, ans=0.125 +2024-09-17 23:31:53,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-17 23:32:14,622 INFO [train.py:1198] (0/2) Epoch 18, batch 3850, loss[loss=0.2661, ctc_loss=0.1564, cr_loss=0.4328, attn_decoder_loss=0.2687, over 29258.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1396, cr_loss=0.3808, attn_decoder_loss=0.2514, over 5811914.34 frames. ], batch size: 100, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:32:16,117 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.799e+01 9.187e+01 9.877e+01 1.493e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 23:32:19,914 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.54 vs. limit=15.0 +2024-09-17 23:32:29,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=323140.0, ans=0.5 +2024-09-17 23:32:35,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=323140.0, ans=0.1 +2024-09-17 23:32:38,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=323140.0, ans=0.025 +2024-09-17 23:33:12,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=323220.0, ans=10.0 +2024-09-17 23:33:15,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=323260.0, ans=0.0 +2024-09-17 23:33:17,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.41 vs. limit=22.5 +2024-09-17 23:33:31,053 INFO [train.py:1198] (0/2) Epoch 18, batch 3900, loss[loss=0.2609, ctc_loss=0.1387, cr_loss=0.3916, attn_decoder_loss=0.2657, over 29629.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1405, cr_loss=0.3825, attn_decoder_loss=0.2521, over 5817571.01 frames. ], batch size: 86, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:33:44,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=323340.0, ans=0.125 +2024-09-17 23:33:59,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=323380.0, ans=0.2 +2024-09-17 23:34:01,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=323380.0, ans=0.125 +2024-09-17 23:34:08,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=323380.0, ans=0.0 +2024-09-17 23:34:44,941 INFO [train.py:1198] (0/2) Epoch 18, batch 3950, loss[loss=0.2625, ctc_loss=0.1567, cr_loss=0.4102, attn_decoder_loss=0.2651, over 29465.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1403, cr_loss=0.3823, attn_decoder_loss=0.2522, over 5836895.65 frames. ], batch size: 97, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:34:46,427 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.658e+01 8.710e+01 9.175e+01 9.677e+01 1.510e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-17 23:34:48,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=323500.0, ans=0.0 +2024-09-17 23:34:48,252 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:34:55,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=323500.0, ans=0.0 +2024-09-17 23:35:06,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.14 vs. limit=15.0 +2024-09-17 23:35:13,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=323540.0, ans=0.025 +2024-09-17 23:35:14,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.42 vs. limit=22.5 +2024-09-17 23:35:14,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=323580.0, ans=0.0 +2024-09-17 23:35:17,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=323580.0, ans=0.025 +2024-09-17 23:35:19,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=323580.0, ans=0.5 +2024-09-17 23:35:22,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=323580.0, ans=0.0 +2024-09-17 23:35:29,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=323620.0, ans=0.0 +2024-09-17 23:35:32,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.69 vs. limit=22.5 +2024-09-17 23:35:47,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.89 vs. limit=6.0 +2024-09-17 23:36:00,072 INFO [train.py:1198] (0/2) Epoch 18, batch 4000, loss[loss=0.2301, ctc_loss=0.1305, cr_loss=0.3667, attn_decoder_loss=0.2331, over 29506.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1407, cr_loss=0.3823, attn_decoder_loss=0.2524, over 5814696.51 frames. ], batch size: 74, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:36:05,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.40 vs. limit=15.0 +2024-09-17 23:36:43,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=323820.0, ans=0.05 +2024-09-17 23:36:48,545 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.50 vs. limit=5.0 +2024-09-17 23:36:50,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=323820.0, ans=0.0 +2024-09-17 23:37:14,028 INFO [train.py:1198] (0/2) Epoch 18, batch 4050, loss[loss=0.2737, ctc_loss=0.1727, cr_loss=0.4204, attn_decoder_loss=0.2756, over 19843.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1405, cr_loss=0.3816, attn_decoder_loss=0.2523, over 5796895.58 frames. ], batch size: 209, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:37:16,868 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.693e+01 8.741e+01 9.284e+01 9.840e+01 3.533e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-17 23:37:21,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=323900.0, ans=0.125 +2024-09-17 23:37:24,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=323900.0, ans=0.125 +2024-09-17 23:37:25,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=323900.0, ans=0.125 +2024-09-17 23:37:38,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=323940.0, ans=0.125 +2024-09-17 23:37:41,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=323980.0, ans=0.95 +2024-09-17 23:37:49,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.39 vs. limit=15.0 +2024-09-17 23:37:53,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=323980.0, ans=0.2 +2024-09-17 23:37:58,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=324020.0, ans=0.0 +2024-09-17 23:38:15,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=324060.0, ans=0.2 +2024-09-17 23:38:17,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=324060.0, ans=0.125 +2024-09-17 23:38:28,776 INFO [train.py:1198] (0/2) Epoch 18, batch 4100, loss[loss=0.2617, ctc_loss=0.151, cr_loss=0.4061, attn_decoder_loss=0.2649, over 29497.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1409, cr_loss=0.3822, attn_decoder_loss=0.2526, over 5792479.79 frames. ], batch size: 90, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:38:29,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=324100.0, ans=0.0 +2024-09-17 23:38:48,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.92 vs. limit=10.0 +2024-09-17 23:38:58,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=324180.0, ans=0.0 +2024-09-17 23:39:01,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.05 vs. limit=10.0 +2024-09-17 23:39:07,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=324180.0, ans=0.125 +2024-09-17 23:39:07,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=324180.0, ans=0.025 +2024-09-17 23:39:09,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.10 vs. limit=15.0 +2024-09-17 23:39:14,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=324220.0, ans=0.125 +2024-09-17 23:39:20,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=324220.0, ans=0.125 +2024-09-17 23:39:20,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=324220.0, ans=0.025 +2024-09-17 23:39:38,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.35 vs. limit=15.0 +2024-09-17 23:39:40,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=324260.0, ans=0.0 +2024-09-17 23:39:43,551 INFO [train.py:1198] (0/2) Epoch 18, batch 4150, loss[loss=0.2417, ctc_loss=0.1353, cr_loss=0.3681, attn_decoder_loss=0.2454, over 29490.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1408, cr_loss=0.3825, attn_decoder_loss=0.2523, over 5797291.30 frames. ], batch size: 77, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:39:46,518 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.404e+01 8.386e+01 9.045e+01 9.725e+01 1.428e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-17 23:39:47,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.80 vs. limit=15.0 +2024-09-17 23:40:14,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=324380.0, ans=0.1 +2024-09-17 23:40:19,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=324380.0, ans=0.125 +2024-09-17 23:40:29,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=324420.0, ans=0.0 +2024-09-17 23:40:37,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.18 vs. limit=15.0 +2024-09-17 23:40:48,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=324460.0, ans=0.0 +2024-09-17 23:40:57,212 INFO [train.py:1198] (0/2) Epoch 18, batch 4200, loss[loss=0.2716, ctc_loss=0.1737, cr_loss=0.4533, attn_decoder_loss=0.2723, over 29499.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.141, cr_loss=0.3833, attn_decoder_loss=0.2527, over 5798403.16 frames. ], batch size: 90, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:40:58,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=324500.0, ans=0.0 +2024-09-17 23:41:13,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.90 vs. limit=10.0 +2024-09-17 23:41:21,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=324540.0, ans=0.2 +2024-09-17 23:41:33,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=324580.0, ans=0.0 +2024-09-17 23:42:06,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=324660.0, ans=0.125 +2024-09-17 23:42:06,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=324660.0, ans=0.025 +2024-09-17 23:42:11,839 INFO [train.py:1198] (0/2) Epoch 18, batch 4250, loss[loss=0.2389, ctc_loss=0.1351, cr_loss=0.3871, attn_decoder_loss=0.2418, over 29504.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.141, cr_loss=0.3832, attn_decoder_loss=0.2529, over 5805148.88 frames. ], batch size: 74, lr: 6.13e-03, grad_scale: 4.0 +2024-09-17 23:42:16,129 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.827e+01 9.431e+01 1.016e+02 4.056e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-17 23:42:16,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=324700.0, ans=0.125 +2024-09-17 23:42:59,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=324820.0, ans=0.0 +2024-09-17 23:43:01,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=324820.0, ans=0.125 +2024-09-17 23:43:08,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=324820.0, ans=0.5 +2024-09-17 23:43:12,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=324860.0, ans=0.125 +2024-09-17 23:43:27,302 INFO [train.py:1198] (0/2) Epoch 18, batch 4300, loss[loss=0.2586, ctc_loss=0.1373, cr_loss=0.384, attn_decoder_loss=0.2635, over 29546.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1409, cr_loss=0.3828, attn_decoder_loss=0.253, over 5794049.95 frames. ], batch size: 87, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:43:33,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=324900.0, ans=0.0 +2024-09-17 23:43:35,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=324900.0, ans=0.1 +2024-09-17 23:43:39,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=324900.0, ans=0.0 +2024-09-17 23:44:02,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.67 vs. limit=15.0 +2024-09-17 23:44:10,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=325020.0, ans=0.0 +2024-09-17 23:44:41,172 INFO [train.py:1198] (0/2) Epoch 18, batch 4350, loss[loss=0.2627, ctc_loss=0.153, cr_loss=0.4239, attn_decoder_loss=0.2655, over 29500.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1432, cr_loss=0.3875, attn_decoder_loss=0.256, over 5795891.09 frames. ], batch size: 97, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:44:46,380 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.876e+01 8.831e+01 9.306e+01 9.822e+01 6.484e+02, threshold=1.861e+02, percent-clipped=2.0 +2024-09-17 23:45:08,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=325140.0, ans=0.2 +2024-09-17 23:45:11,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=325180.0, ans=0.0 +2024-09-17 23:45:42,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=325260.0, ans=0.125 +2024-09-17 23:45:43,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=325260.0, ans=0.1 +2024-09-17 23:45:54,951 INFO [train.py:1198] (0/2) Epoch 18, batch 4400, loss[loss=0.2512, ctc_loss=0.144, cr_loss=0.3893, attn_decoder_loss=0.2545, over 27414.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1446, cr_loss=0.3897, attn_decoder_loss=0.258, over 5768819.32 frames. ], batch size: 124, lr: 6.12e-03, grad_scale: 16.0 +2024-09-17 23:46:00,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.22 vs. limit=15.0 +2024-09-17 23:46:59,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=325460.0, ans=0.125 +2024-09-17 23:47:10,206 INFO [train.py:1198] (0/2) Epoch 18, batch 4450, loss[loss=0.2709, ctc_loss=0.1828, cr_loss=0.3999, attn_decoder_loss=0.2718, over 20133.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1489, cr_loss=0.3948, attn_decoder_loss=0.2606, over 5582217.17 frames. ], batch size: 210, lr: 6.12e-03, grad_scale: 8.0 +2024-09-17 23:47:16,217 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.181e+01 9.154e+01 9.637e+01 1.052e+02 1.489e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 23:47:22,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=325500.0, ans=0.125 +2024-09-17 23:47:25,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=325540.0, ans=0.125 +2024-09-17 23:47:27,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.86 vs. limit=22.5 +2024-09-17 23:47:27,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=14.13 vs. limit=22.5 +2024-09-17 23:47:34,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=325540.0, ans=0.1 +2024-09-17 23:47:59,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.44 vs. limit=6.0 +2024-09-17 23:48:04,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.60 vs. limit=15.0 +2024-09-17 23:48:26,307 INFO [train.py:1198] (0/2) Epoch 18, batch 4500, loss[loss=0.2791, ctc_loss=0.1843, cr_loss=0.4493, attn_decoder_loss=0.2796, over 19723.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1541, cr_loss=0.3976, attn_decoder_loss=0.263, over 5240094.54 frames. ], batch size: 209, lr: 6.12e-03, grad_scale: 8.0 +2024-09-17 23:48:30,084 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.87 vs. limit=22.5 +2024-09-17 23:48:31,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=325700.0, ans=0.04949747468305833 +2024-09-17 23:48:37,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=325700.0, ans=0.025 +2024-09-17 23:48:46,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=325740.0, ans=0.125 +2024-09-17 23:48:58,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=325780.0, ans=0.125 +2024-09-17 23:48:59,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=325780.0, ans=0.125 +2024-09-17 23:48:59,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=325780.0, ans=0.0 +2024-09-17 23:49:03,625 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-18.pt +2024-09-17 23:49:55,588 INFO [train.py:1198] (0/2) Epoch 19, batch 0, loss[loss=0.2316, ctc_loss=0.1232, cr_loss=0.3617, attn_decoder_loss=0.2356, over 29606.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1232, cr_loss=0.3617, attn_decoder_loss=0.2356, over 29606.00 frames. ], batch size: 73, lr: 5.95e-03, grad_scale: 16.0 +2024-09-17 23:49:55,589 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-17 23:50:13,880 INFO [train.py:1230] (0/2) Epoch 19, validation: loss=0.2122, ctc_loss=0.03932, cr_loss=5e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-17 23:50:13,881 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-17 23:50:14,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=325800.0, ans=0.0 +2024-09-17 23:50:30,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=325840.0, ans=0.125 +2024-09-17 23:50:44,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=325880.0, ans=0.125 +2024-09-17 23:50:48,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=325880.0, ans=0.125 +2024-09-17 23:50:59,126 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.957e+01 1.057e+02 1.132e+02 1.239e+02 3.685e+02, threshold=2.265e+02, percent-clipped=3.0 +2024-09-17 23:51:00,042 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.31 vs. limit=15.0 +2024-09-17 23:51:01,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=325920.0, ans=0.125 +2024-09-17 23:51:07,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=325920.0, ans=0.125 +2024-09-17 23:51:31,615 INFO [train.py:1198] (0/2) Epoch 19, batch 50, loss[loss=0.2246, ctc_loss=0.1255, cr_loss=0.3655, attn_decoder_loss=0.2275, over 29487.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.146, cr_loss=0.3928, attn_decoder_loss=0.2547, over 1267258.49 frames. ], batch size: 70, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:51:42,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=326000.0, ans=0.125 +2024-09-17 23:51:53,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=326040.0, ans=0.125 +2024-09-17 23:52:05,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=326080.0, ans=0.0 +2024-09-17 23:52:07,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.60 vs. limit=15.0 +2024-09-17 23:52:08,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=326080.0, ans=0.0 +2024-09-17 23:52:16,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=326080.0, ans=0.0 +2024-09-17 23:52:28,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=326120.0, ans=0.2 +2024-09-17 23:52:49,544 INFO [train.py:1198] (0/2) Epoch 19, batch 100, loss[loss=0.2375, ctc_loss=0.1282, cr_loss=0.3585, attn_decoder_loss=0.2417, over 29566.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1456, cr_loss=0.393, attn_decoder_loss=0.2561, over 2252771.49 frames. ], batch size: 76, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:53:18,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=326280.0, ans=0.125 +2024-09-17 23:53:21,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=326280.0, ans=0.2 +2024-09-17 23:53:29,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=326280.0, ans=15.0 +2024-09-17 23:53:31,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=326280.0, ans=0.0 +2024-09-17 23:53:34,402 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.770e+01 8.614e+01 9.117e+01 9.815e+01 1.763e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 23:53:39,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=326320.0, ans=0.2 +2024-09-17 23:53:57,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.55 vs. limit=22.5 +2024-09-17 23:54:04,658 INFO [train.py:1198] (0/2) Epoch 19, batch 150, loss[loss=0.2291, ctc_loss=0.1272, cr_loss=0.3658, attn_decoder_loss=0.2323, over 29467.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1429, cr_loss=0.3875, attn_decoder_loss=0.2539, over 3047411.51 frames. ], batch size: 70, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:54:04,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=326400.0, ans=0.125 +2024-09-17 23:54:08,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=326400.0, ans=0.2 +2024-09-17 23:54:29,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=326440.0, ans=0.125 +2024-09-17 23:54:35,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.05 vs. limit=15.0 +2024-09-17 23:54:40,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-17 23:54:47,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.93 vs. limit=15.0 +2024-09-17 23:54:57,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=326520.0, ans=0.04949747468305833 +2024-09-17 23:55:05,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=326560.0, ans=0.1 +2024-09-17 23:55:20,168 INFO [train.py:1198] (0/2) Epoch 19, batch 200, loss[loss=0.2612, ctc_loss=0.1551, cr_loss=0.4037, attn_decoder_loss=0.264, over 27254.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1423, cr_loss=0.3865, attn_decoder_loss=0.253, over 3660252.05 frames. ], batch size: 124, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:55:22,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=326600.0, ans=0.025 +2024-09-17 23:55:40,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=326640.0, ans=0.0 +2024-09-17 23:55:59,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=326680.0, ans=0.125 +2024-09-17 23:56:10,480 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.598e+01 9.185e+01 9.838e+01 1.653e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 23:56:17,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.02 vs. limit=15.0 +2024-09-17 23:56:23,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.73 vs. limit=12.0 +2024-09-17 23:56:36,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=326760.0, ans=0.025 +2024-09-17 23:56:38,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=326760.0, ans=0.0 +2024-09-17 23:56:40,795 INFO [train.py:1198] (0/2) Epoch 19, batch 250, loss[loss=0.2552, ctc_loss=0.1418, cr_loss=0.391, attn_decoder_loss=0.2591, over 29204.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.141, cr_loss=0.3854, attn_decoder_loss=0.2527, over 4142320.36 frames. ], batch size: 100, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:56:42,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=326800.0, ans=0.2 +2024-09-17 23:56:51,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=326800.0, ans=0.0 +2024-09-17 23:56:59,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=326840.0, ans=0.125 +2024-09-17 23:57:02,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=326840.0, ans=15.0 +2024-09-17 23:57:03,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=326840.0, ans=0.125 +2024-09-17 23:57:08,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=326840.0, ans=0.125 +2024-09-17 23:57:32,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=326920.0, ans=0.125 +2024-09-17 23:57:40,988 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.58 vs. limit=15.0 +2024-09-17 23:57:56,491 INFO [train.py:1198] (0/2) Epoch 19, batch 300, loss[loss=0.2589, ctc_loss=0.1484, cr_loss=0.4061, attn_decoder_loss=0.2622, over 29561.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.14, cr_loss=0.3837, attn_decoder_loss=0.252, over 4509933.11 frames. ], batch size: 92, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:58:09,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=327000.0, ans=0.125 +2024-09-17 23:58:09,705 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.80 vs. limit=22.5 +2024-09-17 23:58:10,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=327040.0, ans=0.125 +2024-09-17 23:58:18,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=327040.0, ans=0.1 +2024-09-17 23:58:22,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=327040.0, ans=0.2 +2024-09-17 23:58:25,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=327080.0, ans=0.0 +2024-09-17 23:58:36,198 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:58:37,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=327080.0, ans=0.125 +2024-09-17 23:58:41,752 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.487e+01 9.041e+01 9.802e+01 3.671e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-17 23:58:49,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=327120.0, ans=0.2 +2024-09-17 23:59:02,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.02 vs. limit=12.0 +2024-09-17 23:59:03,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=327160.0, ans=0.0 +2024-09-17 23:59:12,437 INFO [train.py:1198] (0/2) Epoch 19, batch 350, loss[loss=0.2221, ctc_loss=0.1219, cr_loss=0.3541, attn_decoder_loss=0.2254, over 29302.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1404, cr_loss=0.3844, attn_decoder_loss=0.2527, over 4795786.75 frames. ], batch size: 71, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:59:17,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=327200.0, ans=10.0 +2024-09-17 23:59:19,507 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.05 vs. limit=10.0 +2024-09-17 23:59:28,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=327240.0, ans=0.125 +2024-09-17 23:59:30,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=327240.0, ans=0.0 +2024-09-17 23:59:37,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=327240.0, ans=0.125 +2024-09-17 23:59:39,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=327240.0, ans=0.07 +2024-09-17 23:59:40,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=327240.0, ans=0.025 +2024-09-17 23:59:46,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=327280.0, ans=0.025 +2024-09-17 23:59:55,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.86 vs. limit=10.0 +2024-09-18 00:00:15,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff2.min_abs, batch_count=327320.0, ans=0.1 +2024-09-18 00:00:15,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=327320.0, ans=15.0 +2024-09-18 00:00:19,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=327360.0, ans=0.1 +2024-09-18 00:00:32,751 INFO [train.py:1198] (0/2) Epoch 19, batch 400, loss[loss=0.2481, ctc_loss=0.1347, cr_loss=0.3821, attn_decoder_loss=0.2522, over 29694.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1394, cr_loss=0.3829, attn_decoder_loss=0.2521, over 5024769.94 frames. ], batch size: 82, lr: 5.94e-03, grad_scale: 16.0 +2024-09-18 00:00:51,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=327440.0, ans=0.0 +2024-09-18 00:00:57,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=327440.0, ans=0.125 +2024-09-18 00:00:58,012 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.20 vs. limit=22.5 +2024-09-18 00:01:14,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.00 vs. limit=22.5 +2024-09-18 00:01:19,971 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.098e+01 8.676e+01 9.493e+01 1.045e+02 1.663e+02, threshold=1.899e+02, percent-clipped=0.0 +2024-09-18 00:01:20,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=327520.0, ans=0.125 +2024-09-18 00:01:26,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=327520.0, ans=0.0 +2024-09-18 00:01:48,883 INFO [train.py:1198] (0/2) Epoch 19, batch 450, loss[loss=0.254, ctc_loss=0.1423, cr_loss=0.3869, attn_decoder_loss=0.2578, over 29675.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1397, cr_loss=0.3827, attn_decoder_loss=0.2522, over 5187705.82 frames. ], batch size: 83, lr: 5.94e-03, grad_scale: 8.0 +2024-09-18 00:02:10,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=327640.0, ans=0.1 +2024-09-18 00:02:19,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=327680.0, ans=0.125 +2024-09-18 00:02:36,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=327720.0, ans=0.1 +2024-09-18 00:02:49,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=327760.0, ans=0.0 +2024-09-18 00:03:04,335 INFO [train.py:1198] (0/2) Epoch 19, batch 500, loss[loss=0.2648, ctc_loss=0.149, cr_loss=0.3976, attn_decoder_loss=0.2688, over 29450.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1393, cr_loss=0.3818, attn_decoder_loss=0.2513, over 5330119.54 frames. ], batch size: 94, lr: 5.94e-03, grad_scale: 8.0 +2024-09-18 00:03:15,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=327800.0, ans=0.95 +2024-09-18 00:03:21,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_na.min_abs, batch_count=327840.0, ans=0.02 +2024-09-18 00:03:30,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.whiten.whitening_limit, batch_count=327840.0, ans=12.0 +2024-09-18 00:03:56,183 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.669e+01 8.703e+01 9.333e+01 1.015e+02 2.225e+02, threshold=1.867e+02, percent-clipped=2.0 +2024-09-18 00:04:03,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.67 vs. limit=15.0 +2024-09-18 00:04:16,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=327960.0, ans=0.125 +2024-09-18 00:04:17,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=327960.0, ans=0.0 +2024-09-18 00:04:20,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=327960.0, ans=0.125 +2024-09-18 00:04:25,793 INFO [train.py:1198] (0/2) Epoch 19, batch 550, loss[loss=0.2603, ctc_loss=0.1371, cr_loss=0.3935, attn_decoder_loss=0.2653, over 28812.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1397, cr_loss=0.3823, attn_decoder_loss=0.2517, over 5421769.02 frames. ], batch size: 104, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:04:29,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.15 vs. limit=10.0 +2024-09-18 00:04:37,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.94 vs. limit=15.0 +2024-09-18 00:04:39,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=328040.0, ans=0.125 +2024-09-18 00:04:59,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=328080.0, ans=0.125 +2024-09-18 00:05:02,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=328080.0, ans=0.125 +2024-09-18 00:05:11,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=328120.0, ans=0.125 +2024-09-18 00:05:17,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=328120.0, ans=0.0 +2024-09-18 00:05:26,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=328160.0, ans=0.1 +2024-09-18 00:05:41,273 INFO [train.py:1198] (0/2) Epoch 19, batch 600, loss[loss=0.2648, ctc_loss=0.1495, cr_loss=0.3818, attn_decoder_loss=0.2691, over 29259.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1399, cr_loss=0.3822, attn_decoder_loss=0.2521, over 5507333.62 frames. ], batch size: 100, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:05:44,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=328200.0, ans=0.0 +2024-09-18 00:06:14,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=328280.0, ans=0.1 +2024-09-18 00:06:27,696 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.610e+01 8.945e+01 9.378e+01 9.831e+01 2.043e+02, threshold=1.876e+02, percent-clipped=1.0 +2024-09-18 00:06:56,861 INFO [train.py:1198] (0/2) Epoch 19, batch 650, loss[loss=0.2521, ctc_loss=0.1439, cr_loss=0.3964, attn_decoder_loss=0.2554, over 29735.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1381, cr_loss=0.3795, attn_decoder_loss=0.2508, over 5585375.31 frames. ], batch size: 81, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:07:02,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.31 vs. limit=22.5 +2024-09-18 00:07:10,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=328440.0, ans=0.125 +2024-09-18 00:07:11,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.24 vs. limit=6.0 +2024-09-18 00:07:15,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=328440.0, ans=0.125 +2024-09-18 00:07:23,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.84 vs. limit=15.0 +2024-09-18 00:07:24,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-18 00:07:48,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=328520.0, ans=0.125 +2024-09-18 00:08:17,458 INFO [train.py:1198] (0/2) Epoch 19, batch 700, loss[loss=0.2328, ctc_loss=0.1284, cr_loss=0.3705, attn_decoder_loss=0.2362, over 29543.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1385, cr_loss=0.3806, attn_decoder_loss=0.2513, over 5637468.76 frames. ], batch size: 76, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:08:25,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=328600.0, ans=0.2 +2024-09-18 00:08:31,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=328640.0, ans=0.1 +2024-09-18 00:08:41,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=328640.0, ans=0.125 +2024-09-18 00:09:04,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 8.484e+01 8.986e+01 9.600e+01 2.397e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 00:09:33,274 INFO [train.py:1198] (0/2) Epoch 19, batch 750, loss[loss=0.26, ctc_loss=0.1508, cr_loss=0.4297, attn_decoder_loss=0.2626, over 29720.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1386, cr_loss=0.3807, attn_decoder_loss=0.2512, over 5676570.82 frames. ], batch size: 82, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:09:54,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=328840.0, ans=0.2 +2024-09-18 00:10:11,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.63 vs. limit=22.5 +2024-09-18 00:10:14,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=328880.0, ans=0.0 +2024-09-18 00:10:24,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=328920.0, ans=0.0 +2024-09-18 00:10:29,378 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:10:48,710 INFO [train.py:1198] (0/2) Epoch 19, batch 800, loss[loss=0.2115, ctc_loss=0.09974, cr_loss=0.3018, attn_decoder_loss=0.2172, over 29597.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1386, cr_loss=0.3802, attn_decoder_loss=0.2511, over 5707327.14 frames. ], batch size: 73, lr: 5.92e-03, grad_scale: 16.0 +2024-09-18 00:11:01,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=329000.0, ans=0.1 +2024-09-18 00:11:02,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=329040.0, ans=0.125 +2024-09-18 00:11:39,693 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.734e+01 9.110e+01 9.840e+01 2.381e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 00:11:43,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=329120.0, ans=0.0 +2024-09-18 00:11:55,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=329160.0, ans=0.125 +2024-09-18 00:12:09,128 INFO [train.py:1198] (0/2) Epoch 19, batch 850, loss[loss=0.2555, ctc_loss=0.1372, cr_loss=0.3835, attn_decoder_loss=0.2601, over 29686.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1383, cr_loss=0.3798, attn_decoder_loss=0.2508, over 5736441.80 frames. ], batch size: 89, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:12:13,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.20 vs. limit=15.0 +2024-09-18 00:12:30,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=329240.0, ans=0.1 +2024-09-18 00:12:31,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=329240.0, ans=0.125 +2024-09-18 00:12:33,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=329240.0, ans=0.1 +2024-09-18 00:12:41,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=329280.0, ans=0.0 +2024-09-18 00:13:25,354 INFO [train.py:1198] (0/2) Epoch 19, batch 900, loss[loss=0.2307, ctc_loss=0.1291, cr_loss=0.3627, attn_decoder_loss=0.2339, over 29614.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1392, cr_loss=0.3816, attn_decoder_loss=0.2515, over 5742704.52 frames. ], batch size: 73, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:13:48,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=329440.0, ans=0.125 +2024-09-18 00:13:57,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=329480.0, ans=0.125 +2024-09-18 00:14:14,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.000e+01 8.696e+01 9.115e+01 9.955e+01 6.704e+02, threshold=1.823e+02, percent-clipped=4.0 +2024-09-18 00:14:28,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.44 vs. limit=15.0 +2024-09-18 00:14:34,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=329560.0, ans=0.0 +2024-09-18 00:14:41,598 INFO [train.py:1198] (0/2) Epoch 19, batch 950, loss[loss=0.2365, ctc_loss=0.129, cr_loss=0.3711, attn_decoder_loss=0.2402, over 29523.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1392, cr_loss=0.3817, attn_decoder_loss=0.2518, over 5743088.42 frames. ], batch size: 74, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:14:44,145 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.77 vs. limit=6.0 +2024-09-18 00:14:58,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=329640.0, ans=0.1 +2024-09-18 00:15:18,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=329680.0, ans=0.025 +2024-09-18 00:15:29,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=329720.0, ans=0.0 +2024-09-18 00:15:45,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=329760.0, ans=0.125 +2024-09-18 00:15:50,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.71 vs. limit=15.0 +2024-09-18 00:15:59,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=329760.0, ans=0.125 +2024-09-18 00:16:01,870 INFO [train.py:1198] (0/2) Epoch 19, batch 1000, loss[loss=0.2422, ctc_loss=0.1347, cr_loss=0.3759, attn_decoder_loss=0.2458, over 29481.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1398, cr_loss=0.3823, attn_decoder_loss=0.2524, over 5736374.13 frames. ], batch size: 77, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:16:09,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=329800.0, ans=0.125 +2024-09-18 00:16:17,544 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:16:33,402 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.66 vs. limit=15.0 +2024-09-18 00:16:45,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.28 vs. limit=22.5 +2024-09-18 00:16:50,537 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.634e+01 8.872e+01 9.584e+01 1.048e+02 1.890e+02, threshold=1.917e+02, percent-clipped=1.0 +2024-09-18 00:16:56,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=329920.0, ans=0.125 +2024-09-18 00:17:10,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=329960.0, ans=0.125 +2024-09-18 00:17:14,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=329960.0, ans=0.1 +2024-09-18 00:17:17,655 INFO [train.py:1198] (0/2) Epoch 19, batch 1050, loss[loss=0.2528, ctc_loss=0.1375, cr_loss=0.3729, attn_decoder_loss=0.2573, over 29670.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1391, cr_loss=0.3813, attn_decoder_loss=0.2516, over 5743676.91 frames. ], batch size: 85, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:17:19,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=330000.0, ans=0.125 +2024-09-18 00:17:20,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=330000.0, ans=0.125 +2024-09-18 00:17:33,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=330040.0, ans=0.2 +2024-09-18 00:17:39,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=330040.0, ans=0.0 +2024-09-18 00:17:47,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=330080.0, ans=0.0 +2024-09-18 00:17:51,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.13 vs. limit=10.0 +2024-09-18 00:17:51,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=330080.0, ans=0.1 +2024-09-18 00:18:25,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=330160.0, ans=0.0 +2024-09-18 00:18:34,359 INFO [train.py:1198] (0/2) Epoch 19, batch 1100, loss[loss=0.2472, ctc_loss=0.1359, cr_loss=0.38, attn_decoder_loss=0.2512, over 29459.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1388, cr_loss=0.3812, attn_decoder_loss=0.2515, over 5756361.94 frames. ], batch size: 78, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:18:42,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=330200.0, ans=0.1 +2024-09-18 00:18:45,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=330200.0, ans=0.0 +2024-09-18 00:18:45,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=330200.0, ans=0.0 +2024-09-18 00:19:05,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=330280.0, ans=0.1 +2024-09-18 00:19:25,408 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.301e+01 8.385e+01 8.690e+01 9.252e+01 1.167e+02, threshold=1.738e+02, percent-clipped=0.0 +2024-09-18 00:19:35,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=330320.0, ans=0.07 +2024-09-18 00:19:38,206 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.95 vs. limit=22.5 +2024-09-18 00:19:46,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=330360.0, ans=0.07 +2024-09-18 00:19:49,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=330360.0, ans=0.0 +2024-09-18 00:19:55,677 INFO [train.py:1198] (0/2) Epoch 19, batch 1150, loss[loss=0.2388, ctc_loss=0.1318, cr_loss=0.3735, attn_decoder_loss=0.2424, over 29461.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1388, cr_loss=0.3809, attn_decoder_loss=0.2511, over 5755423.13 frames. ], batch size: 78, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:20:26,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=330480.0, ans=0.125 +2024-09-18 00:20:32,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=330480.0, ans=0.125 +2024-09-18 00:20:34,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=330480.0, ans=0.2 +2024-09-18 00:20:35,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=330480.0, ans=0.1 +2024-09-18 00:20:37,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.84 vs. limit=12.0 +2024-09-18 00:21:03,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=330560.0, ans=0.0 +2024-09-18 00:21:09,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=330560.0, ans=0.0 +2024-09-18 00:21:11,973 INFO [train.py:1198] (0/2) Epoch 19, batch 1200, loss[loss=0.2539, ctc_loss=0.1407, cr_loss=0.3857, attn_decoder_loss=0.2579, over 29672.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1395, cr_loss=0.3818, attn_decoder_loss=0.252, over 5747105.59 frames. ], batch size: 85, lr: 5.91e-03, grad_scale: 16.0 +2024-09-18 00:21:12,972 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.50 vs. limit=22.5 +2024-09-18 00:21:22,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=330600.0, ans=0.0 +2024-09-18 00:21:30,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=330640.0, ans=0.1 +2024-09-18 00:21:41,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.36 vs. limit=6.0 +2024-09-18 00:22:02,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.778e+01 9.349e+01 9.833e+01 1.592e+02, threshold=1.870e+02, percent-clipped=0.0 +2024-09-18 00:22:15,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=330760.0, ans=0.025 +2024-09-18 00:22:28,396 INFO [train.py:1198] (0/2) Epoch 19, batch 1250, loss[loss=0.257, ctc_loss=0.1553, cr_loss=0.41, attn_decoder_loss=0.2592, over 29549.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1402, cr_loss=0.3832, attn_decoder_loss=0.2526, over 5775084.08 frames. ], batch size: 92, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:22:40,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=330800.0, ans=0.1 +2024-09-18 00:22:41,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=330800.0, ans=0.0 +2024-09-18 00:23:40,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=330960.0, ans=0.125 +2024-09-18 00:23:48,824 INFO [train.py:1198] (0/2) Epoch 19, batch 1300, loss[loss=0.2652, ctc_loss=0.1532, cr_loss=0.3994, attn_decoder_loss=0.2687, over 28438.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1397, cr_loss=0.3817, attn_decoder_loss=0.2518, over 5780963.77 frames. ], batch size: 111, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:23:58,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=331000.0, ans=0.125 +2024-09-18 00:24:13,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=331040.0, ans=0.125 +2024-09-18 00:24:24,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=331080.0, ans=0.125 +2024-09-18 00:24:39,241 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.628e+01 9.058e+01 9.767e+01 1.420e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 00:24:41,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.54 vs. limit=22.5 +2024-09-18 00:24:42,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=331120.0, ans=0.125 +2024-09-18 00:25:05,571 INFO [train.py:1198] (0/2) Epoch 19, batch 1350, loss[loss=0.2486, ctc_loss=0.1359, cr_loss=0.3745, attn_decoder_loss=0.2528, over 29770.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1388, cr_loss=0.3804, attn_decoder_loss=0.2514, over 5797060.28 frames. ], batch size: 81, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:25:09,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.67 vs. limit=15.0 +2024-09-18 00:25:19,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=331240.0, ans=0.5 +2024-09-18 00:25:30,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=331240.0, ans=0.1 +2024-09-18 00:25:59,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=331320.0, ans=0.04949747468305833 +2024-09-18 00:26:03,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_na.min_abs, batch_count=331320.0, ans=0.02 +2024-09-18 00:26:19,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=331360.0, ans=0.125 +2024-09-18 00:26:21,736 INFO [train.py:1198] (0/2) Epoch 19, batch 1400, loss[loss=0.2089, ctc_loss=0.1112, cr_loss=0.3159, attn_decoder_loss=0.2127, over 29597.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1384, cr_loss=0.3797, attn_decoder_loss=0.2509, over 5807996.46 frames. ], batch size: 69, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:26:22,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=331400.0, ans=0.1 +2024-09-18 00:26:43,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=331440.0, ans=0.1 +2024-09-18 00:26:50,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=331480.0, ans=0.0 +2024-09-18 00:26:55,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=331480.0, ans=0.07 +2024-09-18 00:27:07,640 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:27:07,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.33 vs. limit=15.0 +2024-09-18 00:27:09,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=331520.0, ans=0.0 +2024-09-18 00:27:11,766 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.236e+01 8.640e+01 9.143e+01 9.808e+01 1.570e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-18 00:27:12,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=331520.0, ans=0.0 +2024-09-18 00:27:12,808 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.68 vs. limit=15.0 +2024-09-18 00:27:13,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=331520.0, ans=0.1 +2024-09-18 00:27:15,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=331520.0, ans=0.125 +2024-09-18 00:27:28,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=331560.0, ans=0.125 +2024-09-18 00:27:40,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=331600.0, ans=0.0 +2024-09-18 00:27:42,237 INFO [train.py:1198] (0/2) Epoch 19, batch 1450, loss[loss=0.2601, ctc_loss=0.1497, cr_loss=0.4224, attn_decoder_loss=0.263, over 29422.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.139, cr_loss=0.3813, attn_decoder_loss=0.2518, over 5804523.94 frames. ], batch size: 94, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:27:54,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=331600.0, ans=0.125 +2024-09-18 00:28:03,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=331640.0, ans=0.125 +2024-09-18 00:28:09,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=331640.0, ans=0.0 +2024-09-18 00:28:20,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.19 vs. limit=15.0 +2024-09-18 00:28:21,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=331680.0, ans=0.2 +2024-09-18 00:28:37,257 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.03 vs. limit=15.0 +2024-09-18 00:28:47,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=331760.0, ans=0.0 +2024-09-18 00:28:47,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=331760.0, ans=0.1 +2024-09-18 00:28:57,909 INFO [train.py:1198] (0/2) Epoch 19, batch 1500, loss[loss=0.2561, ctc_loss=0.1435, cr_loss=0.3788, attn_decoder_loss=0.2602, over 29623.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1391, cr_loss=0.3812, attn_decoder_loss=0.2519, over 5805904.38 frames. ], batch size: 86, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:29:05,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=331800.0, ans=0.2 +2024-09-18 00:29:15,110 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:29:40,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=331880.0, ans=0.1 +2024-09-18 00:29:41,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=331880.0, ans=0.125 +2024-09-18 00:29:48,930 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.295e+01 8.706e+01 9.242e+01 9.878e+01 2.158e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 00:29:59,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.22 vs. limit=6.0 +2024-09-18 00:30:12,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.74 vs. limit=15.0 +2024-09-18 00:30:15,118 INFO [train.py:1198] (0/2) Epoch 19, batch 1550, loss[loss=0.2674, ctc_loss=0.1607, cr_loss=0.4099, attn_decoder_loss=0.2702, over 29508.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1395, cr_loss=0.3814, attn_decoder_loss=0.2521, over 5781915.14 frames. ], batch size: 90, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:30:36,528 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:30:57,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=332080.0, ans=0.125 +2024-09-18 00:31:10,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=16.60 vs. limit=15.0 +2024-09-18 00:31:18,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=332160.0, ans=0.025 +2024-09-18 00:31:30,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=332160.0, ans=0.0 +2024-09-18 00:31:35,035 INFO [train.py:1198] (0/2) Epoch 19, batch 1600, loss[loss=0.2537, ctc_loss=0.1425, cr_loss=0.3876, attn_decoder_loss=0.2574, over 29696.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1401, cr_loss=0.3819, attn_decoder_loss=0.252, over 5763907.52 frames. ], batch size: 85, lr: 5.90e-03, grad_scale: 16.0 +2024-09-18 00:31:47,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=332200.0, ans=0.125 +2024-09-18 00:32:03,233 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.36 vs. limit=15.0 +2024-09-18 00:32:08,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=332280.0, ans=0.2 +2024-09-18 00:32:10,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=332280.0, ans=0.2 +2024-09-18 00:32:14,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=332280.0, ans=0.125 +2024-09-18 00:32:16,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=332280.0, ans=0.1 +2024-09-18 00:32:25,390 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:32:26,522 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.753e+01 8.856e+01 9.608e+01 1.051e+02 2.791e+02, threshold=1.922e+02, percent-clipped=1.0 +2024-09-18 00:32:28,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.87 vs. limit=15.0 +2024-09-18 00:32:49,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=332400.0, ans=0.04949747468305833 +2024-09-18 00:32:50,570 INFO [train.py:1198] (0/2) Epoch 19, batch 1650, loss[loss=0.2563, ctc_loss=0.1444, cr_loss=0.374, attn_decoder_loss=0.2604, over 29701.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1403, cr_loss=0.3828, attn_decoder_loss=0.2521, over 5757422.59 frames. ], batch size: 89, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:32:50,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=332400.0, ans=0.025 +2024-09-18 00:32:54,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=332400.0, ans=0.95 +2024-09-18 00:33:10,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.61 vs. limit=22.5 +2024-09-18 00:33:11,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=332440.0, ans=0.1 +2024-09-18 00:33:30,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.11 vs. limit=22.5 +2024-09-18 00:33:36,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=332520.0, ans=0.125 +2024-09-18 00:34:06,071 INFO [train.py:1198] (0/2) Epoch 19, batch 1700, loss[loss=0.2178, ctc_loss=0.1113, cr_loss=0.3292, attn_decoder_loss=0.2223, over 29563.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1396, cr_loss=0.3821, attn_decoder_loss=0.2517, over 5779143.30 frames. ], batch size: 69, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:34:06,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=332600.0, ans=0.125 +2024-09-18 00:34:33,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=332640.0, ans=0.125 +2024-09-18 00:34:59,434 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.557e+01 9.059e+01 9.709e+01 1.358e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 00:35:05,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=332720.0, ans=0.125 +2024-09-18 00:35:13,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=332760.0, ans=0.1 +2024-09-18 00:35:26,341 INFO [train.py:1198] (0/2) Epoch 19, batch 1750, loss[loss=0.219, ctc_loss=0.1185, cr_loss=0.3489, attn_decoder_loss=0.2224, over 29340.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1391, cr_loss=0.3813, attn_decoder_loss=0.2513, over 5788444.80 frames. ], batch size: 67, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:35:29,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=332800.0, ans=0.125 +2024-09-18 00:35:41,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=332840.0, ans=0.0 +2024-09-18 00:35:43,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.81 vs. limit=15.0 +2024-09-18 00:36:20,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.63 vs. limit=22.5 +2024-09-18 00:36:25,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=332960.0, ans=0.125 +2024-09-18 00:36:27,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=332960.0, ans=0.0 +2024-09-18 00:36:31,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=332960.0, ans=0.2 +2024-09-18 00:36:41,691 INFO [train.py:1198] (0/2) Epoch 19, batch 1800, loss[loss=0.2514, ctc_loss=0.1378, cr_loss=0.3744, attn_decoder_loss=0.2558, over 29693.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1389, cr_loss=0.3812, attn_decoder_loss=0.2515, over 5790576.93 frames. ], batch size: 83, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:37:04,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=333040.0, ans=0.125 +2024-09-18 00:37:33,205 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.534e+01 9.002e+01 9.561e+01 2.098e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 00:37:51,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.89 vs. limit=15.0 +2024-09-18 00:37:57,829 INFO [train.py:1198] (0/2) Epoch 19, batch 1850, loss[loss=0.2568, ctc_loss=0.1423, cr_loss=0.3895, attn_decoder_loss=0.2609, over 29633.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1386, cr_loss=0.3808, attn_decoder_loss=0.2513, over 5797633.58 frames. ], batch size: 86, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:38:22,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=333240.0, ans=0.0 +2024-09-18 00:38:31,627 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:38:32,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.09 vs. limit=15.0 +2024-09-18 00:38:36,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=333280.0, ans=0.07 +2024-09-18 00:39:02,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=333360.0, ans=0.0 +2024-09-18 00:39:10,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=15.0 +2024-09-18 00:39:15,878 INFO [train.py:1198] (0/2) Epoch 19, batch 1900, loss[loss=0.2645, ctc_loss=0.1533, cr_loss=0.4106, attn_decoder_loss=0.2677, over 29711.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1389, cr_loss=0.3814, attn_decoder_loss=0.2518, over 5804780.63 frames. ], batch size: 89, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:39:28,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=333400.0, ans=0.2 +2024-09-18 00:40:10,193 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 8.878e+01 9.424e+01 1.001e+02 2.862e+02, threshold=1.885e+02, percent-clipped=2.0 +2024-09-18 00:40:33,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=333600.0, ans=0.125 +2024-09-18 00:40:34,855 INFO [train.py:1198] (0/2) Epoch 19, batch 1950, loss[loss=0.2442, ctc_loss=0.1398, cr_loss=0.394, attn_decoder_loss=0.247, over 29471.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1395, cr_loss=0.3828, attn_decoder_loss=0.2528, over 5819699.83 frames. ], batch size: 78, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:40:38,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=333600.0, ans=0.125 +2024-09-18 00:40:40,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.44 vs. limit=12.0 +2024-09-18 00:40:56,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=333640.0, ans=0.0 +2024-09-18 00:41:02,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.72 vs. limit=15.0 +2024-09-18 00:41:32,942 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.73 vs. limit=15.0 +2024-09-18 00:41:38,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=333760.0, ans=0.125 +2024-09-18 00:41:38,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=333760.0, ans=0.1 +2024-09-18 00:41:50,438 INFO [train.py:1198] (0/2) Epoch 19, batch 2000, loss[loss=0.2171, ctc_loss=0.1207, cr_loss=0.3547, attn_decoder_loss=0.22, over 29346.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1397, cr_loss=0.3824, attn_decoder_loss=0.253, over 5797569.81 frames. ], batch size: 67, lr: 5.88e-03, grad_scale: 16.0 +2024-09-18 00:42:06,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=333840.0, ans=0.0 +2024-09-18 00:42:19,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=333880.0, ans=0.025 +2024-09-18 00:42:31,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=333880.0, ans=0.1 +2024-09-18 00:42:40,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=333920.0, ans=0.1 +2024-09-18 00:42:41,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=333920.0, ans=0.125 +2024-09-18 00:42:46,024 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.666e+01 9.128e+01 9.687e+01 2.181e+02, threshold=1.826e+02, percent-clipped=3.0 +2024-09-18 00:42:53,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=333960.0, ans=0.0 +2024-09-18 00:42:55,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=333960.0, ans=0.1 +2024-09-18 00:43:07,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=334000.0, ans=0.125 +2024-09-18 00:43:08,937 INFO [train.py:1198] (0/2) Epoch 19, batch 2050, loss[loss=0.2298, ctc_loss=0.1297, cr_loss=0.3721, attn_decoder_loss=0.2327, over 29421.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1394, cr_loss=0.3818, attn_decoder_loss=0.2521, over 5790757.28 frames. ], batch size: 70, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:43:17,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=334000.0, ans=0.0 +2024-09-18 00:43:31,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=334040.0, ans=0.0 +2024-09-18 00:43:56,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.23 vs. limit=22.5 +2024-09-18 00:43:58,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=334120.0, ans=0.2 +2024-09-18 00:44:16,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=334160.0, ans=0.07 +2024-09-18 00:44:27,284 INFO [train.py:1198] (0/2) Epoch 19, batch 2100, loss[loss=0.238, ctc_loss=0.1228, cr_loss=0.3591, attn_decoder_loss=0.2428, over 29744.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1387, cr_loss=0.381, attn_decoder_loss=0.2514, over 5802249.08 frames. ], batch size: 81, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:44:58,865 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.93 vs. limit=12.0 +2024-09-18 00:45:20,462 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.379e+01 9.013e+01 9.583e+01 3.257e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-18 00:45:23,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=334320.0, ans=0.025 +2024-09-18 00:45:29,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.23 vs. limit=22.5 +2024-09-18 00:45:42,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=334400.0, ans=0.125 +2024-09-18 00:45:44,008 INFO [train.py:1198] (0/2) Epoch 19, batch 2150, loss[loss=0.2525, ctc_loss=0.1449, cr_loss=0.4004, attn_decoder_loss=0.2556, over 29470.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1382, cr_loss=0.3809, attn_decoder_loss=0.2507, over 5816748.26 frames. ], batch size: 78, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:46:13,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=334480.0, ans=0.1 +2024-09-18 00:46:29,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=334520.0, ans=0.09899494936611666 +2024-09-18 00:46:36,550 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.48 vs. limit=15.0 +2024-09-18 00:46:41,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=334520.0, ans=0.0 +2024-09-18 00:46:42,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=334520.0, ans=0.2 +2024-09-18 00:46:48,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=334560.0, ans=0.2 +2024-09-18 00:46:49,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=334560.0, ans=0.125 +2024-09-18 00:46:58,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=334560.0, ans=0.0 +2024-09-18 00:47:02,261 INFO [train.py:1198] (0/2) Epoch 19, batch 2200, loss[loss=0.2507, ctc_loss=0.1387, cr_loss=0.3793, attn_decoder_loss=0.2547, over 29632.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1389, cr_loss=0.3815, attn_decoder_loss=0.2512, over 5814531.90 frames. ], batch size: 86, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:47:26,642 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.80 vs. limit=15.0 +2024-09-18 00:47:36,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=334680.0, ans=0.2 +2024-09-18 00:47:45,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=334680.0, ans=0.125 +2024-09-18 00:47:57,756 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.512e+01 9.076e+01 9.778e+01 1.780e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 00:48:13,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=334760.0, ans=0.0 +2024-09-18 00:48:20,719 INFO [train.py:1198] (0/2) Epoch 19, batch 2250, loss[loss=0.2496, ctc_loss=0.134, cr_loss=0.3697, attn_decoder_loss=0.2542, over 29691.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1383, cr_loss=0.3809, attn_decoder_loss=0.2508, over 5813579.94 frames. ], batch size: 82, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:48:30,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=334800.0, ans=0.1 +2024-09-18 00:48:33,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=334800.0, ans=0.2 +2024-09-18 00:48:42,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.53 vs. limit=15.0 +2024-09-18 00:48:52,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=334880.0, ans=0.125 +2024-09-18 00:48:54,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=334880.0, ans=0.1 +2024-09-18 00:48:58,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.19 vs. limit=15.0 +2024-09-18 00:48:58,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=334880.0, ans=0.0 +2024-09-18 00:49:24,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=334960.0, ans=0.0 +2024-09-18 00:49:27,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=334960.0, ans=0.0 +2024-09-18 00:49:36,427 INFO [train.py:1198] (0/2) Epoch 19, batch 2300, loss[loss=0.2236, ctc_loss=0.1191, cr_loss=0.3605, attn_decoder_loss=0.2272, over 29697.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1376, cr_loss=0.3794, attn_decoder_loss=0.2499, over 5799942.45 frames. ], batch size: 72, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:49:38,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=335000.0, ans=0.125 +2024-09-18 00:50:05,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=335080.0, ans=0.125 +2024-09-18 00:50:18,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=335080.0, ans=0.0 +2024-09-18 00:50:29,716 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.590e+01 9.155e+01 9.781e+01 6.273e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-18 00:50:32,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.13 vs. limit=12.0 +2024-09-18 00:50:42,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.43 vs. limit=15.0 +2024-09-18 00:50:47,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=335160.0, ans=0.0 +2024-09-18 00:50:55,497 INFO [train.py:1198] (0/2) Epoch 19, batch 2350, loss[loss=0.264, ctc_loss=0.1495, cr_loss=0.3919, attn_decoder_loss=0.268, over 29701.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1382, cr_loss=0.3804, attn_decoder_loss=0.2504, over 5805732.69 frames. ], batch size: 83, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:51:01,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=335200.0, ans=0.1 +2024-09-18 00:51:09,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=335240.0, ans=0.125 +2024-09-18 00:51:38,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=335280.0, ans=0.125 +2024-09-18 00:52:13,757 INFO [train.py:1198] (0/2) Epoch 19, batch 2400, loss[loss=0.2331, ctc_loss=0.1264, cr_loss=0.363, attn_decoder_loss=0.2369, over 29541.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1391, cr_loss=0.3824, attn_decoder_loss=0.2512, over 5809545.44 frames. ], batch size: 76, lr: 5.87e-03, grad_scale: 16.0 +2024-09-18 00:52:17,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.74 vs. limit=15.0 +2024-09-18 00:52:29,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=335440.0, ans=0.025 +2024-09-18 00:52:35,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=335440.0, ans=0.0 +2024-09-18 00:52:36,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=7.35 vs. limit=15.0 +2024-09-18 00:52:55,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=335480.0, ans=0.125 +2024-09-18 00:53:07,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.40 vs. limit=15.0 +2024-09-18 00:53:08,372 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.399e+01 8.603e+01 9.064e+01 9.775e+01 3.534e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-18 00:53:10,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=335520.0, ans=0.125 +2024-09-18 00:53:11,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=335520.0, ans=0.125 +2024-09-18 00:53:22,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=335560.0, ans=0.0 +2024-09-18 00:53:27,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=335560.0, ans=0.025 +2024-09-18 00:53:29,740 INFO [train.py:1198] (0/2) Epoch 19, batch 2450, loss[loss=0.2499, ctc_loss=0.1391, cr_loss=0.3836, attn_decoder_loss=0.2537, over 29708.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1394, cr_loss=0.3827, attn_decoder_loss=0.252, over 5785638.54 frames. ], batch size: 82, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:53:32,231 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.68 vs. limit=22.5 +2024-09-18 00:53:43,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=335640.0, ans=0.125 +2024-09-18 00:53:45,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.12 vs. limit=6.0 +2024-09-18 00:54:09,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=335680.0, ans=0.1 +2024-09-18 00:54:12,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=335680.0, ans=15.0 +2024-09-18 00:54:23,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=335720.0, ans=0.0 +2024-09-18 00:54:23,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=335720.0, ans=0.1 +2024-09-18 00:54:31,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=335760.0, ans=0.0 +2024-09-18 00:54:32,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=335760.0, ans=0.125 +2024-09-18 00:54:47,583 INFO [train.py:1198] (0/2) Epoch 19, batch 2500, loss[loss=0.2548, ctc_loss=0.1453, cr_loss=0.3942, attn_decoder_loss=0.2582, over 29636.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1395, cr_loss=0.3832, attn_decoder_loss=0.2521, over 5795828.19 frames. ], batch size: 86, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:54:54,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=335800.0, ans=0.125 +2024-09-18 00:55:22,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=335880.0, ans=0.125 +2024-09-18 00:55:44,877 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.526e+01 9.010e+01 9.846e+01 5.892e+02, threshold=1.802e+02, percent-clipped=2.0 +2024-09-18 00:55:51,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=335960.0, ans=0.125 +2024-09-18 00:55:51,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=335960.0, ans=0.1 +2024-09-18 00:56:05,423 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-84000.pt +2024-09-18 00:56:13,768 INFO [train.py:1198] (0/2) Epoch 19, batch 2550, loss[loss=0.215, ctc_loss=0.1143, cr_loss=0.3409, attn_decoder_loss=0.2187, over 29327.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1391, cr_loss=0.3826, attn_decoder_loss=0.2519, over 5799470.69 frames. ], batch size: 67, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:56:35,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.44 vs. limit=12.0 +2024-09-18 00:56:42,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=336080.0, ans=0.2 +2024-09-18 00:56:47,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=336080.0, ans=0.025 +2024-09-18 00:56:53,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=336080.0, ans=0.0 +2024-09-18 00:56:58,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=336120.0, ans=0.125 +2024-09-18 00:57:04,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=336120.0, ans=0.0 +2024-09-18 00:57:05,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=336120.0, ans=0.05 +2024-09-18 00:57:14,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=336160.0, ans=0.125 +2024-09-18 00:57:19,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.36 vs. limit=15.0 +2024-09-18 00:57:29,655 INFO [train.py:1198] (0/2) Epoch 19, batch 2600, loss[loss=0.2402, ctc_loss=0.1287, cr_loss=0.3751, attn_decoder_loss=0.2442, over 29426.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1396, cr_loss=0.3834, attn_decoder_loss=0.2526, over 5795382.32 frames. ], batch size: 78, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:57:58,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=336280.0, ans=0.125 +2024-09-18 00:58:00,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=336280.0, ans=0.125 +2024-09-18 00:58:01,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=336280.0, ans=0.125 +2024-09-18 00:58:03,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=336280.0, ans=0.125 +2024-09-18 00:58:26,740 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.566e+01 8.963e+01 9.636e+01 1.354e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-18 00:58:33,112 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:58:43,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=336360.0, ans=0.07 +2024-09-18 00:58:47,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.36 vs. limit=22.5 +2024-09-18 00:58:47,576 INFO [train.py:1198] (0/2) Epoch 19, batch 2650, loss[loss=0.2657, ctc_loss=0.1562, cr_loss=0.4106, attn_decoder_loss=0.2687, over 29261.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1396, cr_loss=0.3836, attn_decoder_loss=0.2527, over 5800960.15 frames. ], batch size: 100, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:58:54,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=336400.0, ans=0.125 +2024-09-18 00:58:58,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=336400.0, ans=0.125 +2024-09-18 00:59:29,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=336480.0, ans=0.125 +2024-09-18 00:59:29,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=336480.0, ans=0.0 +2024-09-18 00:59:58,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=336560.0, ans=0.0 +2024-09-18 01:00:00,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=336560.0, ans=0.125 +2024-09-18 01:00:05,872 INFO [train.py:1198] (0/2) Epoch 19, batch 2700, loss[loss=0.2489, ctc_loss=0.1279, cr_loss=0.3681, attn_decoder_loss=0.2541, over 29534.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1399, cr_loss=0.3844, attn_decoder_loss=0.253, over 5796997.58 frames. ], batch size: 87, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 01:00:06,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=336600.0, ans=0.0 +2024-09-18 01:00:15,720 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.84 vs. limit=15.0 +2024-09-18 01:00:25,711 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:00:28,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.31 vs. limit=15.0 +2024-09-18 01:00:28,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=336640.0, ans=0.125 +2024-09-18 01:00:43,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=336680.0, ans=0.0 +2024-09-18 01:01:00,333 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.475e+01 9.059e+01 9.583e+01 2.142e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-18 01:01:22,325 INFO [train.py:1198] (0/2) Epoch 19, batch 2750, loss[loss=0.2437, ctc_loss=0.1493, cr_loss=0.4151, attn_decoder_loss=0.2449, over 29532.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.139, cr_loss=0.3824, attn_decoder_loss=0.2517, over 5795108.48 frames. ], batch size: 75, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 01:01:48,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=336840.0, ans=0.125 +2024-09-18 01:01:49,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=336840.0, ans=0.2 +2024-09-18 01:01:51,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=336880.0, ans=0.125 +2024-09-18 01:01:51,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=336880.0, ans=0.0 +2024-09-18 01:01:53,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=336880.0, ans=0.04949747468305833 +2024-09-18 01:02:00,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=336880.0, ans=0.2 +2024-09-18 01:02:02,095 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:02:02,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.58 vs. limit=22.5 +2024-09-18 01:02:04,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.62 vs. limit=15.0 +2024-09-18 01:02:09,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=336920.0, ans=0.025 +2024-09-18 01:02:19,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=336920.0, ans=0.1 +2024-09-18 01:02:24,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=336960.0, ans=0.07 +2024-09-18 01:02:24,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=336960.0, ans=0.125 +2024-09-18 01:02:36,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.73 vs. limit=15.0 +2024-09-18 01:02:39,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=337000.0, ans=0.125 +2024-09-18 01:02:40,762 INFO [train.py:1198] (0/2) Epoch 19, batch 2800, loss[loss=0.2764, ctc_loss=0.1691, cr_loss=0.3678, attn_decoder_loss=0.2801, over 20147.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1395, cr_loss=0.3822, attn_decoder_loss=0.2519, over 5776815.19 frames. ], batch size: 210, lr: 5.85e-03, grad_scale: 16.0 +2024-09-18 01:02:40,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=337000.0, ans=0.125 +2024-09-18 01:02:43,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.66 vs. limit=15.0 +2024-09-18 01:02:51,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=337000.0, ans=0.125 +2024-09-18 01:03:02,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=337040.0, ans=0.0 +2024-09-18 01:03:11,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.73 vs. limit=15.0 +2024-09-18 01:03:12,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=337080.0, ans=0.0 +2024-09-18 01:03:36,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=337120.0, ans=0.0 +2024-09-18 01:03:38,942 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.828e+01 9.014e+01 9.335e+01 1.020e+02 1.618e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-18 01:03:58,617 INFO [train.py:1198] (0/2) Epoch 19, batch 2850, loss[loss=0.2326, ctc_loss=0.1264, cr_loss=0.3672, attn_decoder_loss=0.2363, over 29492.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1398, cr_loss=0.3827, attn_decoder_loss=0.2523, over 5762014.80 frames. ], batch size: 77, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:04:04,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=337200.0, ans=0.0 +2024-09-18 01:04:11,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.83 vs. limit=12.0 +2024-09-18 01:04:17,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=337240.0, ans=0.0 +2024-09-18 01:04:20,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=337240.0, ans=0.125 +2024-09-18 01:04:21,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=337240.0, ans=0.2 +2024-09-18 01:04:34,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=337280.0, ans=0.0 +2024-09-18 01:04:46,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=337320.0, ans=0.125 +2024-09-18 01:04:57,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=337320.0, ans=0.125 +2024-09-18 01:05:06,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=337360.0, ans=0.1 +2024-09-18 01:05:15,134 INFO [train.py:1198] (0/2) Epoch 19, batch 2900, loss[loss=0.2403, ctc_loss=0.1338, cr_loss=0.3688, attn_decoder_loss=0.2439, over 29439.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1404, cr_loss=0.384, attn_decoder_loss=0.2532, over 5787343.58 frames. ], batch size: 79, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:05:25,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=337400.0, ans=0.07 +2024-09-18 01:05:51,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=337480.0, ans=0.0 +2024-09-18 01:06:05,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-18 01:06:13,398 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.650e+01 9.061e+01 9.798e+01 5.022e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-18 01:06:15,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=337520.0, ans=0.125 +2024-09-18 01:06:18,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=337560.0, ans=0.025 +2024-09-18 01:06:23,484 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.66 vs. limit=15.0 +2024-09-18 01:06:25,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-18 01:06:33,635 INFO [train.py:1198] (0/2) Epoch 19, batch 2950, loss[loss=0.2434, ctc_loss=0.1444, cr_loss=0.3962, attn_decoder_loss=0.2456, over 29519.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1396, cr_loss=0.3821, attn_decoder_loss=0.2521, over 5781292.61 frames. ], batch size: 75, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:06:44,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=337600.0, ans=0.025 +2024-09-18 01:06:47,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=337640.0, ans=0.2 +2024-09-18 01:07:14,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=337680.0, ans=0.125 +2024-09-18 01:07:52,306 INFO [train.py:1198] (0/2) Epoch 19, batch 3000, loss[loss=0.2509, ctc_loss=0.1408, cr_loss=0.3941, attn_decoder_loss=0.2543, over 29774.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1395, cr_loss=0.3818, attn_decoder_loss=0.252, over 5781734.16 frames. ], batch size: 81, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:07:52,307 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 01:08:10,715 INFO [train.py:1230] (0/2) Epoch 19, validation: loss=0.2115, ctc_loss=0.0393, cr_loss=5.039e-15, attn_decoder_loss=0.2306, over 944034.00 frames. +2024-09-18 01:08:10,715 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 01:08:46,147 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:08:47,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=337880.0, ans=0.125 +2024-09-18 01:08:53,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=337880.0, ans=0.0 +2024-09-18 01:08:55,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=337920.0, ans=0.1 +2024-09-18 01:08:59,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=337920.0, ans=0.025 +2024-09-18 01:09:07,057 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.545e+01 8.664e+01 9.190e+01 9.808e+01 2.398e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-18 01:09:08,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=337920.0, ans=0.1 +2024-09-18 01:09:09,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.88 vs. limit=15.0 +2024-09-18 01:09:20,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.99 vs. limit=12.0 +2024-09-18 01:09:26,831 INFO [train.py:1198] (0/2) Epoch 19, batch 3050, loss[loss=0.2406, ctc_loss=0.1381, cr_loss=0.3899, attn_decoder_loss=0.2434, over 29517.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1402, cr_loss=0.3835, attn_decoder_loss=0.2528, over 5776057.49 frames. ], batch size: 76, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:09:53,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=338040.0, ans=0.125 +2024-09-18 01:10:27,664 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.23 vs. limit=15.0 +2024-09-18 01:10:27,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.27 vs. limit=6.0 +2024-09-18 01:10:31,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=338160.0, ans=0.025 +2024-09-18 01:10:34,810 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:10:45,194 INFO [train.py:1198] (0/2) Epoch 19, batch 3100, loss[loss=0.2581, ctc_loss=0.1463, cr_loss=0.4052, attn_decoder_loss=0.2615, over 29293.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1397, cr_loss=0.3831, attn_decoder_loss=0.2524, over 5776086.36 frames. ], batch size: 100, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:11:03,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.66 vs. limit=12.0 +2024-09-18 01:11:06,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=338240.0, ans=15.0 +2024-09-18 01:11:14,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=338240.0, ans=0.07 +2024-09-18 01:11:19,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=338280.0, ans=0.125 +2024-09-18 01:11:27,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.67 vs. limit=15.0 +2024-09-18 01:11:28,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=338280.0, ans=0.0 +2024-09-18 01:11:43,973 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.533e+01 9.118e+01 9.870e+01 1.992e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-18 01:11:50,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=338360.0, ans=0.125 +2024-09-18 01:11:54,823 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:12:04,280 INFO [train.py:1198] (0/2) Epoch 19, batch 3150, loss[loss=0.2544, ctc_loss=0.1313, cr_loss=0.354, attn_decoder_loss=0.2602, over 28794.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1399, cr_loss=0.3834, attn_decoder_loss=0.2525, over 5782716.24 frames. ], batch size: 104, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:12:22,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.37 vs. limit=15.0 +2024-09-18 01:12:33,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=338480.0, ans=0.0 +2024-09-18 01:12:54,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=338520.0, ans=0.1 +2024-09-18 01:12:56,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=338520.0, ans=0.125 +2024-09-18 01:12:59,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=338520.0, ans=0.2 +2024-09-18 01:13:03,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.90 vs. limit=10.0 +2024-09-18 01:13:08,567 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:13:17,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=338560.0, ans=0.07 +2024-09-18 01:13:20,357 INFO [train.py:1198] (0/2) Epoch 19, batch 3200, loss[loss=0.2421, ctc_loss=0.1328, cr_loss=0.3794, attn_decoder_loss=0.2458, over 29393.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1395, cr_loss=0.3828, attn_decoder_loss=0.2519, over 5793521.08 frames. ], batch size: 79, lr: 5.84e-03, grad_scale: 16.0 +2024-09-18 01:13:35,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=338640.0, ans=0.0 +2024-09-18 01:13:36,434 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.62 vs. limit=22.5 +2024-09-18 01:13:55,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.80 vs. limit=15.0 +2024-09-18 01:14:00,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=338680.0, ans=0.2 +2024-09-18 01:14:00,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=338680.0, ans=0.0 +2024-09-18 01:14:14,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=338720.0, ans=0.025 +2024-09-18 01:14:20,072 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.580e+01 9.076e+01 9.687e+01 2.351e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-18 01:14:31,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=338760.0, ans=0.125 +2024-09-18 01:14:38,513 INFO [train.py:1198] (0/2) Epoch 19, batch 3250, loss[loss=0.2539, ctc_loss=0.134, cr_loss=0.3724, attn_decoder_loss=0.2589, over 29689.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1393, cr_loss=0.3831, attn_decoder_loss=0.2521, over 5799907.71 frames. ], batch size: 84, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:14:42,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.86 vs. limit=22.5 +2024-09-18 01:14:46,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=338800.0, ans=0.0 +2024-09-18 01:14:47,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=338800.0, ans=0.125 +2024-09-18 01:15:06,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=338840.0, ans=0.0 +2024-09-18 01:15:29,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=338920.0, ans=0.125 +2024-09-18 01:15:30,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=338920.0, ans=10.0 +2024-09-18 01:15:32,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=338920.0, ans=0.0 +2024-09-18 01:15:33,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=338920.0, ans=0.125 +2024-09-18 01:15:33,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=338920.0, ans=0.125 +2024-09-18 01:15:41,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=338960.0, ans=0.1 +2024-09-18 01:15:46,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.92 vs. limit=15.0 +2024-09-18 01:15:56,362 INFO [train.py:1198] (0/2) Epoch 19, batch 3300, loss[loss=0.2517, ctc_loss=0.1394, cr_loss=0.3689, attn_decoder_loss=0.256, over 28336.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1382, cr_loss=0.381, attn_decoder_loss=0.2509, over 5797066.12 frames. ], batch size: 111, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:15:58,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=339000.0, ans=0.125 +2024-09-18 01:16:01,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=339000.0, ans=0.0 +2024-09-18 01:16:32,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=339080.0, ans=0.0 +2024-09-18 01:16:53,881 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.663e+01 9.126e+01 9.763e+01 2.623e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 01:17:12,571 INFO [train.py:1198] (0/2) Epoch 19, batch 3350, loss[loss=0.2746, ctc_loss=0.1638, cr_loss=0.4372, attn_decoder_loss=0.2772, over 28800.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1394, cr_loss=0.3825, attn_decoder_loss=0.2519, over 5774027.54 frames. ], batch size: 104, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:17:30,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=339240.0, ans=0.125 +2024-09-18 01:17:32,580 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.88 vs. limit=15.0 +2024-09-18 01:17:42,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=339240.0, ans=0.05 +2024-09-18 01:17:42,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=339240.0, ans=0.125 +2024-09-18 01:17:44,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=339280.0, ans=0.0 +2024-09-18 01:18:03,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=339320.0, ans=0.0 +2024-09-18 01:18:14,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=339360.0, ans=0.125 +2024-09-18 01:18:22,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=339360.0, ans=0.125 +2024-09-18 01:18:23,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=339360.0, ans=0.125 +2024-09-18 01:18:25,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=339360.0, ans=0.0 +2024-09-18 01:18:28,663 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.5 +2024-09-18 01:18:30,833 INFO [train.py:1198] (0/2) Epoch 19, batch 3400, loss[loss=0.2195, ctc_loss=0.1257, cr_loss=0.3674, attn_decoder_loss=0.2218, over 29339.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1394, cr_loss=0.3822, attn_decoder_loss=0.2518, over 5766468.41 frames. ], batch size: 67, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:18:47,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=339440.0, ans=0.125 +2024-09-18 01:19:10,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.68 vs. limit=10.0 +2024-09-18 01:19:15,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=339480.0, ans=0.125 +2024-09-18 01:19:27,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=339520.0, ans=0.125 +2024-09-18 01:19:30,515 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.395e+01 8.511e+01 9.195e+01 9.878e+01 2.681e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 01:19:44,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=339560.0, ans=0.0 +2024-09-18 01:19:45,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=339560.0, ans=0.125 +2024-09-18 01:19:48,775 INFO [train.py:1198] (0/2) Epoch 19, batch 3450, loss[loss=0.2656, ctc_loss=0.155, cr_loss=0.3924, attn_decoder_loss=0.2691, over 28328.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1394, cr_loss=0.3827, attn_decoder_loss=0.2521, over 5774687.01 frames. ], batch size: 111, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:19:48,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=339600.0, ans=0.0 +2024-09-18 01:20:17,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=339680.0, ans=0.2 +2024-09-18 01:20:28,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=339680.0, ans=0.125 +2024-09-18 01:20:49,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=339760.0, ans=0.0 +2024-09-18 01:20:54,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=339760.0, ans=0.1 +2024-09-18 01:20:55,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=339760.0, ans=0.125 +2024-09-18 01:21:04,601 INFO [train.py:1198] (0/2) Epoch 19, batch 3500, loss[loss=0.2133, ctc_loss=0.1045, cr_loss=0.3062, attn_decoder_loss=0.2186, over 29332.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.139, cr_loss=0.3816, attn_decoder_loss=0.2513, over 5775661.99 frames. ], batch size: 71, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:21:08,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.45 vs. limit=15.0 +2024-09-18 01:21:46,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=339880.0, ans=0.2 +2024-09-18 01:22:04,060 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.228e+01 8.478e+01 8.934e+01 9.584e+01 2.565e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 01:22:16,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.23 vs. limit=15.0 +2024-09-18 01:22:22,259 INFO [train.py:1198] (0/2) Epoch 19, batch 3550, loss[loss=0.2606, ctc_loss=0.1401, cr_loss=0.3966, attn_decoder_loss=0.2651, over 29731.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.139, cr_loss=0.3819, attn_decoder_loss=0.2514, over 5782115.28 frames. ], batch size: 89, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:22:22,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=340000.0, ans=0.0 +2024-09-18 01:22:38,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=340040.0, ans=0.05 +2024-09-18 01:22:44,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=340040.0, ans=0.0 +2024-09-18 01:22:46,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=340040.0, ans=0.2 +2024-09-18 01:22:59,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=340080.0, ans=0.0 +2024-09-18 01:23:04,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=340080.0, ans=0.0 +2024-09-18 01:23:09,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.35 vs. limit=15.0 +2024-09-18 01:23:19,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=340120.0, ans=0.0 +2024-09-18 01:23:28,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=340160.0, ans=0.025 +2024-09-18 01:23:38,726 INFO [train.py:1198] (0/2) Epoch 19, batch 3600, loss[loss=0.2448, ctc_loss=0.1339, cr_loss=0.3589, attn_decoder_loss=0.2492, over 29532.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1391, cr_loss=0.3817, attn_decoder_loss=0.2518, over 5791026.17 frames. ], batch size: 77, lr: 5.83e-03, grad_scale: 16.0 +2024-09-18 01:23:43,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=340200.0, ans=0.0 +2024-09-18 01:23:46,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=340200.0, ans=0.2 +2024-09-18 01:24:12,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=340280.0, ans=0.025 +2024-09-18 01:24:31,988 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.83 vs. limit=10.0 +2024-09-18 01:24:37,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.610e+01 9.225e+01 9.925e+01 8.683e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-18 01:24:40,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=340360.0, ans=0.125 +2024-09-18 01:24:42,641 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.52 vs. limit=12.0 +2024-09-18 01:24:53,605 INFO [train.py:1198] (0/2) Epoch 19, batch 3650, loss[loss=0.2655, ctc_loss=0.1451, cr_loss=0.4055, attn_decoder_loss=0.2699, over 29514.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1381, cr_loss=0.3803, attn_decoder_loss=0.2508, over 5791858.87 frames. ], batch size: 90, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:25:14,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=340440.0, ans=0.125 +2024-09-18 01:25:37,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=340520.0, ans=0.125 +2024-09-18 01:25:44,472 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.63 vs. limit=15.0 +2024-09-18 01:25:46,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.06 vs. limit=15.0 +2024-09-18 01:25:52,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=340560.0, ans=0.2 +2024-09-18 01:26:00,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=340560.0, ans=0.0 +2024-09-18 01:26:08,880 INFO [train.py:1198] (0/2) Epoch 19, batch 3700, loss[loss=0.2593, ctc_loss=0.1501, cr_loss=0.3898, attn_decoder_loss=0.2628, over 29696.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1381, cr_loss=0.3802, attn_decoder_loss=0.2509, over 5800885.86 frames. ], batch size: 84, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:26:10,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=340600.0, ans=0.1 +2024-09-18 01:26:15,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=340600.0, ans=0.125 +2024-09-18 01:26:22,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=340640.0, ans=0.1 +2024-09-18 01:26:39,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-18 01:27:07,341 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.588e+01 9.238e+01 9.671e+01 4.711e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-18 01:27:09,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.14 vs. limit=15.0 +2024-09-18 01:27:24,464 INFO [train.py:1198] (0/2) Epoch 19, batch 3750, loss[loss=0.2182, ctc_loss=0.1168, cr_loss=0.3422, attn_decoder_loss=0.2218, over 29356.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1384, cr_loss=0.381, attn_decoder_loss=0.251, over 5805059.54 frames. ], batch size: 67, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:27:27,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=340800.0, ans=0.125 +2024-09-18 01:27:29,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=340800.0, ans=0.0 +2024-09-18 01:27:44,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=340840.0, ans=0.125 +2024-09-18 01:27:59,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=340880.0, ans=0.125 +2024-09-18 01:28:01,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=340880.0, ans=0.125 +2024-09-18 01:28:17,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.87 vs. limit=15.0 +2024-09-18 01:28:30,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.23 vs. limit=15.0 +2024-09-18 01:28:41,212 INFO [train.py:1198] (0/2) Epoch 19, batch 3800, loss[loss=0.2665, ctc_loss=0.1505, cr_loss=0.4011, attn_decoder_loss=0.2705, over 29634.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1383, cr_loss=0.3801, attn_decoder_loss=0.2508, over 5797336.56 frames. ], batch size: 86, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:28:41,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=341000.0, ans=0.125 +2024-09-18 01:29:05,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=341040.0, ans=0.125 +2024-09-18 01:29:16,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=341080.0, ans=0.0 +2024-09-18 01:29:25,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=341120.0, ans=0.0 +2024-09-18 01:29:39,592 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.913e+01 9.389e+01 9.954e+01 1.370e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-18 01:29:45,040 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.76 vs. limit=15.0 +2024-09-18 01:29:49,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=341160.0, ans=0.125 +2024-09-18 01:29:51,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=341160.0, ans=0.125 +2024-09-18 01:29:53,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=341160.0, ans=0.0 +2024-09-18 01:29:57,755 INFO [train.py:1198] (0/2) Epoch 19, batch 3850, loss[loss=0.2724, ctc_loss=0.1619, cr_loss=0.4002, attn_decoder_loss=0.2758, over 29297.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1381, cr_loss=0.3799, attn_decoder_loss=0.2506, over 5811062.32 frames. ], batch size: 100, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:30:01,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=341200.0, ans=0.125 +2024-09-18 01:30:11,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=341240.0, ans=0.125 +2024-09-18 01:30:21,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=341240.0, ans=0.125 +2024-09-18 01:30:35,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=341280.0, ans=0.1 +2024-09-18 01:30:59,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=341360.0, ans=0.0 +2024-09-18 01:31:12,335 INFO [train.py:1198] (0/2) Epoch 19, batch 3900, loss[loss=0.2652, ctc_loss=0.1505, cr_loss=0.4217, attn_decoder_loss=0.2686, over 29628.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1385, cr_loss=0.3813, attn_decoder_loss=0.2512, over 5815180.10 frames. ], batch size: 86, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:31:21,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=341400.0, ans=0.0 +2024-09-18 01:31:29,537 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.18 vs. limit=15.0 +2024-09-18 01:31:37,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=341440.0, ans=0.125 +2024-09-18 01:31:39,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=341440.0, ans=0.0 +2024-09-18 01:31:58,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=341520.0, ans=0.05 +2024-09-18 01:32:10,246 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.279e+01 8.574e+01 8.925e+01 9.348e+01 1.659e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 01:32:12,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=341560.0, ans=0.95 +2024-09-18 01:32:13,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=341560.0, ans=0.0 +2024-09-18 01:32:18,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=341560.0, ans=0.2 +2024-09-18 01:32:18,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=341560.0, ans=0.125 +2024-09-18 01:32:27,248 INFO [train.py:1198] (0/2) Epoch 19, batch 3950, loss[loss=0.2606, ctc_loss=0.1425, cr_loss=0.3952, attn_decoder_loss=0.2649, over 29450.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1378, cr_loss=0.3811, attn_decoder_loss=0.2512, over 5834807.87 frames. ], batch size: 97, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:32:40,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=341640.0, ans=0.2 +2024-09-18 01:32:54,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=341640.0, ans=0.0 +2024-09-18 01:32:58,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=341680.0, ans=0.025 +2024-09-18 01:33:34,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=341760.0, ans=0.0 +2024-09-18 01:33:40,201 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:33:42,799 INFO [train.py:1198] (0/2) Epoch 19, batch 4000, loss[loss=0.2372, ctc_loss=0.1347, cr_loss=0.382, attn_decoder_loss=0.2401, over 29499.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1385, cr_loss=0.3814, attn_decoder_loss=0.2514, over 5811936.68 frames. ], batch size: 74, lr: 5.81e-03, grad_scale: 16.0 +2024-09-18 01:33:43,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=341800.0, ans=0.125 +2024-09-18 01:33:45,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=341800.0, ans=0.0 +2024-09-18 01:33:53,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=341800.0, ans=0.035 +2024-09-18 01:33:56,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=341840.0, ans=0.125 +2024-09-18 01:33:59,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=341840.0, ans=0.0 +2024-09-18 01:34:06,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=341840.0, ans=0.125 +2024-09-18 01:34:26,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.80 vs. limit=15.0 +2024-09-18 01:34:41,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.874e+01 9.386e+01 1.032e+02 2.674e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-18 01:34:50,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=341960.0, ans=0.125 +2024-09-18 01:34:57,877 INFO [train.py:1198] (0/2) Epoch 19, batch 4050, loss[loss=0.2775, ctc_loss=0.1817, cr_loss=0.4095, attn_decoder_loss=0.2791, over 20582.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1387, cr_loss=0.381, attn_decoder_loss=0.2513, over 5795892.34 frames. ], batch size: 209, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:35:00,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.73 vs. limit=6.0 +2024-09-18 01:35:06,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=342000.0, ans=0.125 +2024-09-18 01:35:11,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=342040.0, ans=0.125 +2024-09-18 01:35:11,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=342040.0, ans=0.1 +2024-09-18 01:35:46,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=342120.0, ans=0.125 +2024-09-18 01:35:52,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=342120.0, ans=0.0 +2024-09-18 01:35:59,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.14 vs. limit=22.5 +2024-09-18 01:36:05,907 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:36:06,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.53 vs. limit=15.0 +2024-09-18 01:36:11,638 INFO [train.py:1198] (0/2) Epoch 19, batch 4100, loss[loss=0.2688, ctc_loss=0.1603, cr_loss=0.4115, attn_decoder_loss=0.2718, over 29534.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1387, cr_loss=0.3809, attn_decoder_loss=0.2514, over 5791409.23 frames. ], batch size: 90, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:36:16,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=342200.0, ans=0.125 +2024-09-18 01:36:41,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=342280.0, ans=0.04949747468305833 +2024-09-18 01:36:47,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_na.min_abs, batch_count=342280.0, ans=0.02 +2024-09-18 01:37:02,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.43 vs. limit=15.0 +2024-09-18 01:37:06,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=342320.0, ans=0.125 +2024-09-18 01:37:11,591 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 8.625e+01 9.215e+01 9.767e+01 2.484e+02, threshold=1.843e+02, percent-clipped=3.0 +2024-09-18 01:37:12,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=342360.0, ans=0.125 +2024-09-18 01:37:27,164 INFO [train.py:1198] (0/2) Epoch 19, batch 4150, loss[loss=0.2428, ctc_loss=0.1369, cr_loss=0.3842, attn_decoder_loss=0.246, over 29471.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1385, cr_loss=0.3801, attn_decoder_loss=0.2509, over 5797376.13 frames. ], batch size: 77, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:37:27,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=342400.0, ans=0.125 +2024-09-18 01:37:37,102 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.75 vs. limit=15.0 +2024-09-18 01:37:55,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=342480.0, ans=0.125 +2024-09-18 01:38:23,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.82 vs. limit=22.5 +2024-09-18 01:38:28,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.98 vs. limit=15.0 +2024-09-18 01:38:39,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=342600.0, ans=0.1 +2024-09-18 01:38:40,957 INFO [train.py:1198] (0/2) Epoch 19, batch 4200, loss[loss=0.2702, ctc_loss=0.1547, cr_loss=0.4237, attn_decoder_loss=0.2737, over 29534.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1381, cr_loss=0.3799, attn_decoder_loss=0.251, over 5799443.61 frames. ], batch size: 90, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:38:42,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=342600.0, ans=0.1 +2024-09-18 01:38:51,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=342600.0, ans=0.125 +2024-09-18 01:39:01,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=342640.0, ans=0.1 +2024-09-18 01:39:13,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=342680.0, ans=10.0 +2024-09-18 01:39:41,093 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.293e+01 8.502e+01 9.115e+01 9.695e+01 2.005e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 01:39:55,927 INFO [train.py:1198] (0/2) Epoch 19, batch 4250, loss[loss=0.2382, ctc_loss=0.126, cr_loss=0.3465, attn_decoder_loss=0.243, over 29515.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1386, cr_loss=0.3805, attn_decoder_loss=0.2514, over 5805464.58 frames. ], batch size: 74, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:39:58,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.64 vs. limit=10.0 +2024-09-18 01:40:03,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=342800.0, ans=0.125 +2024-09-18 01:40:27,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=342880.0, ans=0.5 +2024-09-18 01:40:52,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=342920.0, ans=0.0 +2024-09-18 01:41:02,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=342960.0, ans=0.125 +2024-09-18 01:41:03,141 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.19 vs. limit=22.5 +2024-09-18 01:41:11,123 INFO [train.py:1198] (0/2) Epoch 19, batch 4300, loss[loss=0.2618, ctc_loss=0.1465, cr_loss=0.3836, attn_decoder_loss=0.2661, over 29574.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1382, cr_loss=0.3798, attn_decoder_loss=0.2515, over 5795035.70 frames. ], batch size: 87, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:41:27,950 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:41:41,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=343080.0, ans=0.125 +2024-09-18 01:41:53,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=343080.0, ans=0.0 +2024-09-18 01:41:54,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=343120.0, ans=0.025 +2024-09-18 01:41:56,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=343120.0, ans=0.0 +2024-09-18 01:41:57,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=343120.0, ans=0.0 +2024-09-18 01:42:10,630 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.890e+01 9.360e+01 1.027e+02 1.828e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-18 01:42:10,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=343160.0, ans=10.0 +2024-09-18 01:42:18,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=343160.0, ans=0.0 +2024-09-18 01:42:27,030 INFO [train.py:1198] (0/2) Epoch 19, batch 4350, loss[loss=0.2769, ctc_loss=0.1679, cr_loss=0.4427, attn_decoder_loss=0.2791, over 29539.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1406, cr_loss=0.3843, attn_decoder_loss=0.2545, over 5797664.17 frames. ], batch size: 97, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:42:30,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=343200.0, ans=0.125 +2024-09-18 01:42:37,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=343200.0, ans=0.125 +2024-09-18 01:42:48,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=343240.0, ans=0.125 +2024-09-18 01:42:56,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=343280.0, ans=0.0 +2024-09-18 01:43:01,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=343280.0, ans=0.125 +2024-09-18 01:43:16,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.47 vs. limit=15.0 +2024-09-18 01:43:34,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.22 vs. limit=22.5 +2024-09-18 01:43:38,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=343360.0, ans=0.2 +2024-09-18 01:43:40,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.46 vs. limit=6.0 +2024-09-18 01:43:41,024 INFO [train.py:1198] (0/2) Epoch 19, batch 4400, loss[loss=0.2699, ctc_loss=0.1691, cr_loss=0.4207, attn_decoder_loss=0.2718, over 27098.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1422, cr_loss=0.3874, attn_decoder_loss=0.2567, over 5769029.54 frames. ], batch size: 124, lr: 5.80e-03, grad_scale: 16.0 +2024-09-18 01:44:04,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.81 vs. limit=15.0 +2024-09-18 01:44:27,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=343520.0, ans=0.125 +2024-09-18 01:44:28,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=343520.0, ans=0.0 +2024-09-18 01:44:28,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=343520.0, ans=0.025 +2024-09-18 01:44:32,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=343520.0, ans=0.125 +2024-09-18 01:44:34,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=343520.0, ans=0.125 +2024-09-18 01:44:35,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=343520.0, ans=0.1 +2024-09-18 01:44:41,230 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.011e+01 9.170e+01 9.647e+01 1.019e+02 1.899e+02, threshold=1.929e+02, percent-clipped=1.0 +2024-09-18 01:44:55,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=10.98 vs. limit=15.0 +2024-09-18 01:44:55,330 INFO [train.py:1198] (0/2) Epoch 19, batch 4450, loss[loss=0.2763, ctc_loss=0.1854, cr_loss=0.4312, attn_decoder_loss=0.2768, over 20408.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1469, cr_loss=0.3928, attn_decoder_loss=0.2596, over 5585000.48 frames. ], batch size: 209, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:45:07,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=343600.0, ans=0.125 +2024-09-18 01:45:20,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=343640.0, ans=0.04949747468305833 +2024-09-18 01:45:20,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=343640.0, ans=0.125 +2024-09-18 01:45:23,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=343640.0, ans=0.125 +2024-09-18 01:45:24,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=343680.0, ans=0.2 +2024-09-18 01:45:34,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=343680.0, ans=0.2 +2024-09-18 01:45:54,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=343760.0, ans=0.1 +2024-09-18 01:46:00,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=343760.0, ans=0.125 +2024-09-18 01:46:11,295 INFO [train.py:1198] (0/2) Epoch 19, batch 4500, loss[loss=0.2821, ctc_loss=0.194, cr_loss=0.4192, attn_decoder_loss=0.2826, over 19830.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1518, cr_loss=0.3955, attn_decoder_loss=0.262, over 5240343.05 frames. ], batch size: 209, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:46:14,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=343800.0, ans=0.0 +2024-09-18 01:46:20,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=343800.0, ans=0.0 +2024-09-18 01:46:29,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=343840.0, ans=0.0 +2024-09-18 01:46:31,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=343840.0, ans=0.2 +2024-09-18 01:46:41,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=343880.0, ans=0.1 +2024-09-18 01:46:43,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=343880.0, ans=0.025 +2024-09-18 01:46:45,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=343880.0, ans=15.0 +2024-09-18 01:46:48,754 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-19.pt +2024-09-18 01:47:35,798 INFO [train.py:1198] (0/2) Epoch 20, batch 0, loss[loss=0.2242, ctc_loss=0.1163, cr_loss=0.3464, attn_decoder_loss=0.2285, over 29626.00 frames. ], tot_loss[loss=0.2242, ctc_loss=0.1163, cr_loss=0.3464, attn_decoder_loss=0.2285, over 29626.00 frames. ], batch size: 73, lr: 5.65e-03, grad_scale: 16.0 +2024-09-18 01:47:35,799 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 01:47:41,085 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([3.0105, 2.9573, 3.0339, 3.3353], device='cuda:0') +2024-09-18 01:47:52,984 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.7348, 4.6365, 4.4636, 4.2583], device='cuda:0') +2024-09-18 01:47:54,257 INFO [train.py:1230] (0/2) Epoch 20, validation: loss=0.2118, ctc_loss=0.0395, cr_loss=4.878e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-18 01:47:54,257 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 01:48:23,230 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.011e+01 1.094e+02 1.165e+02 1.257e+02 3.397e+02, threshold=2.331e+02, percent-clipped=2.0 +2024-09-18 01:48:26,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=343980.0, ans=0.125 +2024-09-18 01:48:31,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=343980.0, ans=0.07 +2024-09-18 01:48:37,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=343980.0, ans=0.1 +2024-09-18 01:49:12,431 INFO [train.py:1198] (0/2) Epoch 20, batch 50, loss[loss=0.2231, ctc_loss=0.1199, cr_loss=0.3623, attn_decoder_loss=0.2265, over 29427.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1408, cr_loss=0.3845, attn_decoder_loss=0.2527, over 1267027.01 frames. ], batch size: 70, lr: 5.64e-03, grad_scale: 4.0 +2024-09-18 01:49:19,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.10 vs. limit=15.0 +2024-09-18 01:49:31,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.55 vs. limit=22.5 +2024-09-18 01:49:40,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-18 01:49:44,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=344180.0, ans=0.125 +2024-09-18 01:49:50,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=344180.0, ans=0.0 +2024-09-18 01:50:09,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=344220.0, ans=0.0 +2024-09-18 01:50:09,553 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-18 01:50:10,597 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:50:11,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.16 vs. limit=15.0 +2024-09-18 01:50:24,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=344260.0, ans=0.09899494936611666 +2024-09-18 01:50:24,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=344260.0, ans=0.125 +2024-09-18 01:50:28,289 INFO [train.py:1198] (0/2) Epoch 20, batch 100, loss[loss=0.2462, ctc_loss=0.1381, cr_loss=0.3805, attn_decoder_loss=0.2498, over 29537.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1428, cr_loss=0.3888, attn_decoder_loss=0.255, over 2252985.71 frames. ], batch size: 76, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:50:51,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=344340.0, ans=0.5 +2024-09-18 01:50:55,267 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.781e+01 9.298e+01 1.012e+02 1.493e+02, threshold=1.860e+02, percent-clipped=0.0 +2024-09-18 01:51:03,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=344380.0, ans=0.0 +2024-09-18 01:51:10,672 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.92 vs. limit=8.0 +2024-09-18 01:51:24,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=344420.0, ans=0.1 +2024-09-18 01:51:35,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=344460.0, ans=0.0 +2024-09-18 01:51:42,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=344460.0, ans=0.125 +2024-09-18 01:51:44,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=344500.0, ans=0.125 +2024-09-18 01:51:45,547 INFO [train.py:1198] (0/2) Epoch 20, batch 150, loss[loss=0.2302, ctc_loss=0.1297, cr_loss=0.3543, attn_decoder_loss=0.2335, over 29436.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1393, cr_loss=0.3824, attn_decoder_loss=0.2522, over 3048574.84 frames. ], batch size: 70, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:51:48,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=344500.0, ans=0.0 +2024-09-18 01:51:57,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=344500.0, ans=0.04949747468305833 +2024-09-18 01:52:09,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=344540.0, ans=0.125 +2024-09-18 01:52:19,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=344580.0, ans=0.125 +2024-09-18 01:52:30,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=344580.0, ans=0.125 +2024-09-18 01:52:57,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.66 vs. limit=22.5 +2024-09-18 01:53:00,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.86 vs. limit=12.0 +2024-09-18 01:53:03,337 INFO [train.py:1198] (0/2) Epoch 20, batch 200, loss[loss=0.2557, ctc_loss=0.1437, cr_loss=0.3875, attn_decoder_loss=0.2595, over 27342.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.138, cr_loss=0.3807, attn_decoder_loss=0.2509, over 3660866.33 frames. ], batch size: 124, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:53:12,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=344700.0, ans=0.125 +2024-09-18 01:53:30,636 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.380e+01 8.894e+01 9.610e+01 1.111e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 01:53:35,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=344780.0, ans=0.125 +2024-09-18 01:53:42,813 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.80 vs. limit=15.0 +2024-09-18 01:53:50,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.22 vs. limit=22.5 +2024-09-18 01:53:54,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=344820.0, ans=0.0 +2024-09-18 01:54:19,408 INFO [train.py:1198] (0/2) Epoch 20, batch 250, loss[loss=0.2619, ctc_loss=0.1474, cr_loss=0.4084, attn_decoder_loss=0.2656, over 29233.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1381, cr_loss=0.3816, attn_decoder_loss=0.2509, over 4143634.82 frames. ], batch size: 100, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:54:19,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=344900.0, ans=0.0 +2024-09-18 01:54:21,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=344900.0, ans=0.05 +2024-09-18 01:54:36,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=344940.0, ans=0.125 +2024-09-18 01:54:49,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=344980.0, ans=0.125 +2024-09-18 01:54:54,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=344980.0, ans=0.1 +2024-09-18 01:54:54,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=344980.0, ans=0.2 +2024-09-18 01:55:06,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=345020.0, ans=0.05 +2024-09-18 01:55:14,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.37 vs. limit=15.0 +2024-09-18 01:55:34,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=345060.0, ans=0.0 +2024-09-18 01:55:37,650 INFO [train.py:1198] (0/2) Epoch 20, batch 300, loss[loss=0.2725, ctc_loss=0.1647, cr_loss=0.451, attn_decoder_loss=0.2745, over 29532.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1373, cr_loss=0.3804, attn_decoder_loss=0.2504, over 4511045.12 frames. ], batch size: 92, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:55:45,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=345100.0, ans=0.125 +2024-09-18 01:56:07,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.480e+01 8.946e+01 9.469e+01 2.628e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-18 01:56:17,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.50 vs. limit=15.0 +2024-09-18 01:56:56,017 INFO [train.py:1198] (0/2) Epoch 20, batch 350, loss[loss=0.2194, ctc_loss=0.1089, cr_loss=0.3294, attn_decoder_loss=0.2244, over 29343.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1375, cr_loss=0.3806, attn_decoder_loss=0.2509, over 4795450.48 frames. ], batch size: 71, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 01:56:58,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.58 vs. limit=15.0 +2024-09-18 01:57:14,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.69 vs. limit=22.5 +2024-09-18 01:57:24,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=345380.0, ans=0.0 +2024-09-18 01:57:50,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=345420.0, ans=0.125 +2024-09-18 01:57:53,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=345420.0, ans=0.0 +2024-09-18 01:58:04,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=15.0 +2024-09-18 01:58:11,300 INFO [train.py:1198] (0/2) Epoch 20, batch 400, loss[loss=0.2529, ctc_loss=0.1453, cr_loss=0.3959, attn_decoder_loss=0.2561, over 29713.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1364, cr_loss=0.3782, attn_decoder_loss=0.2502, over 5026525.13 frames. ], batch size: 82, lr: 5.63e-03, grad_scale: 16.0 +2024-09-18 01:58:13,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=345500.0, ans=0.125 +2024-09-18 01:58:27,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=10.68 vs. limit=12.0 +2024-09-18 01:58:40,352 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.703e+01 9.237e+01 1.010e+02 2.283e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-18 01:58:48,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=345580.0, ans=0.0 +2024-09-18 01:59:20,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=345660.0, ans=0.09899494936611666 +2024-09-18 01:59:30,512 INFO [train.py:1198] (0/2) Epoch 20, batch 450, loss[loss=0.2557, ctc_loss=0.1478, cr_loss=0.3998, attn_decoder_loss=0.2588, over 29684.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1368, cr_loss=0.3783, attn_decoder_loss=0.2504, over 5187987.42 frames. ], batch size: 83, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:00:05,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=345780.0, ans=0.0 +2024-09-18 02:00:05,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=345780.0, ans=0.125 +2024-09-18 02:00:14,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=345780.0, ans=0.125 +2024-09-18 02:00:20,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=345820.0, ans=0.0 +2024-09-18 02:00:34,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-18 02:00:43,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=345860.0, ans=0.025 +2024-09-18 02:00:48,894 INFO [train.py:1198] (0/2) Epoch 20, batch 500, loss[loss=0.2682, ctc_loss=0.155, cr_loss=0.4101, attn_decoder_loss=0.2716, over 29458.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1367, cr_loss=0.3784, attn_decoder_loss=0.2498, over 5331128.41 frames. ], batch size: 94, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:01:13,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=345940.0, ans=0.09899494936611666 +2024-09-18 02:01:17,859 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.440e+01 8.932e+01 9.633e+01 1.955e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 02:01:21,264 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:01:23,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.89 vs. limit=10.0 +2024-09-18 02:02:05,130 INFO [train.py:1198] (0/2) Epoch 20, batch 550, loss[loss=0.2524, ctc_loss=0.142, cr_loss=0.402, attn_decoder_loss=0.2557, over 28883.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1373, cr_loss=0.3793, attn_decoder_loss=0.2503, over 5421758.17 frames. ], batch size: 104, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:02:09,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.97 vs. limit=22.5 +2024-09-18 02:02:14,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=346100.0, ans=0.0 +2024-09-18 02:02:22,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=346140.0, ans=0.1 +2024-09-18 02:02:29,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=346140.0, ans=0.125 +2024-09-18 02:02:31,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.98 vs. limit=15.0 +2024-09-18 02:02:49,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=346220.0, ans=0.125 +2024-09-18 02:03:16,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=346260.0, ans=0.125 +2024-09-18 02:03:23,206 INFO [train.py:1198] (0/2) Epoch 20, batch 600, loss[loss=0.2664, ctc_loss=0.1538, cr_loss=0.4188, attn_decoder_loss=0.2696, over 29172.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1377, cr_loss=0.381, attn_decoder_loss=0.2504, over 5508352.30 frames. ], batch size: 100, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:03:25,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=346300.0, ans=0.0 +2024-09-18 02:03:54,168 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.587e+01 8.611e+01 9.331e+01 1.005e+02 2.865e+02, threshold=1.866e+02, percent-clipped=3.0 +2024-09-18 02:03:56,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.85 vs. limit=10.0 +2024-09-18 02:04:17,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=346420.0, ans=0.0 +2024-09-18 02:04:18,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.07 vs. limit=12.0 +2024-09-18 02:04:25,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=346460.0, ans=0.1 +2024-09-18 02:04:26,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=346460.0, ans=0.1 +2024-09-18 02:04:41,259 INFO [train.py:1198] (0/2) Epoch 20, batch 650, loss[loss=0.2446, ctc_loss=0.1308, cr_loss=0.3677, attn_decoder_loss=0.249, over 29782.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1371, cr_loss=0.3802, attn_decoder_loss=0.2501, over 5585563.32 frames. ], batch size: 81, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:04:44,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=346500.0, ans=0.0 +2024-09-18 02:04:44,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=346500.0, ans=0.1 +2024-09-18 02:04:46,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=346500.0, ans=0.2 +2024-09-18 02:04:49,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=346500.0, ans=0.0 +2024-09-18 02:04:49,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=346500.0, ans=0.125 +2024-09-18 02:05:45,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.56 vs. limit=6.0 +2024-09-18 02:05:47,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=346660.0, ans=0.125 +2024-09-18 02:05:56,825 INFO [train.py:1198] (0/2) Epoch 20, batch 700, loss[loss=0.2573, ctc_loss=0.1475, cr_loss=0.4012, attn_decoder_loss=0.2606, over 29562.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1376, cr_loss=0.3814, attn_decoder_loss=0.2509, over 5636756.44 frames. ], batch size: 76, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:06:12,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=346740.0, ans=0.2 +2024-09-18 02:06:15,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=346740.0, ans=0.0 +2024-09-18 02:06:25,603 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 8.542e+01 8.952e+01 9.567e+01 1.859e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 02:06:32,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=346780.0, ans=0.125 +2024-09-18 02:06:45,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=346820.0, ans=0.05 +2024-09-18 02:06:56,249 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:07:14,613 INFO [train.py:1198] (0/2) Epoch 20, batch 750, loss[loss=0.2431, ctc_loss=0.1367, cr_loss=0.3882, attn_decoder_loss=0.2463, over 29723.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1376, cr_loss=0.381, attn_decoder_loss=0.2507, over 5677044.51 frames. ], batch size: 82, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:07:30,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=346940.0, ans=0.125 +2024-09-18 02:07:31,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=346940.0, ans=0.1 +2024-09-18 02:07:32,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.56 vs. limit=15.0 +2024-09-18 02:07:54,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=346980.0, ans=0.0 +2024-09-18 02:08:02,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.70 vs. limit=15.0 +2024-09-18 02:08:20,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=347060.0, ans=0.0 +2024-09-18 02:08:22,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.81 vs. limit=15.0 +2024-09-18 02:08:25,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=347060.0, ans=0.1 +2024-09-18 02:08:29,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=347060.0, ans=0.125 +2024-09-18 02:08:32,357 INFO [train.py:1198] (0/2) Epoch 20, batch 800, loss[loss=0.2268, ctc_loss=0.1204, cr_loss=0.3471, attn_decoder_loss=0.2309, over 29621.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1373, cr_loss=0.3801, attn_decoder_loss=0.2504, over 5707625.72 frames. ], batch size: 73, lr: 5.62e-03, grad_scale: 16.0 +2024-09-18 02:09:02,551 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.412e+01 8.904e+01 9.473e+01 1.507e+02, threshold=1.781e+02, percent-clipped=0.0 +2024-09-18 02:09:12,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=347180.0, ans=0.125 +2024-09-18 02:09:32,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.67 vs. limit=6.0 +2024-09-18 02:09:48,236 INFO [train.py:1198] (0/2) Epoch 20, batch 850, loss[loss=0.2491, ctc_loss=0.1286, cr_loss=0.3516, attn_decoder_loss=0.2546, over 29693.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1368, cr_loss=0.3792, attn_decoder_loss=0.25, over 5736558.62 frames. ], batch size: 89, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:10:03,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=347340.0, ans=0.2 +2024-09-18 02:10:09,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=347340.0, ans=0.125 +2024-09-18 02:10:25,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=347380.0, ans=0.125 +2024-09-18 02:10:31,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=347420.0, ans=0.2 +2024-09-18 02:10:31,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=347420.0, ans=0.125 +2024-09-18 02:10:37,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=6.97 vs. limit=12.0 +2024-09-18 02:10:47,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=347460.0, ans=0.0 +2024-09-18 02:10:59,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=347460.0, ans=0.1 +2024-09-18 02:11:03,669 INFO [train.py:1198] (0/2) Epoch 20, batch 900, loss[loss=0.2238, ctc_loss=0.1201, cr_loss=0.3563, attn_decoder_loss=0.2274, over 29627.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1372, cr_loss=0.3802, attn_decoder_loss=0.2506, over 5740436.80 frames. ], batch size: 73, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:11:09,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=347500.0, ans=0.1 +2024-09-18 02:11:33,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.02 vs. limit=12.0 +2024-09-18 02:11:35,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=347540.0, ans=0.125 +2024-09-18 02:11:38,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.746e+01 8.646e+01 9.308e+01 1.001e+02 2.040e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-18 02:11:40,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=347580.0, ans=0.1 +2024-09-18 02:11:43,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=9.91 vs. limit=12.0 +2024-09-18 02:11:56,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=347620.0, ans=0.125 +2024-09-18 02:11:59,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=347620.0, ans=0.125 +2024-09-18 02:12:01,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=347620.0, ans=0.125 +2024-09-18 02:12:11,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=347660.0, ans=0.125 +2024-09-18 02:12:14,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=347660.0, ans=0.1 +2024-09-18 02:12:22,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=347700.0, ans=15.0 +2024-09-18 02:12:23,500 INFO [train.py:1198] (0/2) Epoch 20, batch 950, loss[loss=0.2211, ctc_loss=0.1124, cr_loss=0.353, attn_decoder_loss=0.2253, over 29521.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1372, cr_loss=0.3797, attn_decoder_loss=0.2507, over 5740257.45 frames. ], batch size: 74, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:12:37,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=347740.0, ans=0.0 +2024-09-18 02:13:04,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=347780.0, ans=0.1 +2024-09-18 02:13:06,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=347780.0, ans=0.05 +2024-09-18 02:13:38,986 INFO [train.py:1198] (0/2) Epoch 20, batch 1000, loss[loss=0.2371, ctc_loss=0.1289, cr_loss=0.3719, attn_decoder_loss=0.2408, over 29496.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1385, cr_loss=0.3814, attn_decoder_loss=0.2517, over 5734693.99 frames. ], batch size: 77, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:13:42,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=347900.0, ans=0.0 +2024-09-18 02:14:07,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=347980.0, ans=0.125 +2024-09-18 02:14:09,210 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.603e+01 8.704e+01 9.397e+01 1.040e+02 1.771e+02, threshold=1.879e+02, percent-clipped=0.0 +2024-09-18 02:14:11,568 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.97 vs. limit=12.0 +2024-09-18 02:14:17,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=347980.0, ans=0.0 +2024-09-18 02:14:41,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=348060.0, ans=0.2 +2024-09-18 02:14:50,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=348060.0, ans=0.05 +2024-09-18 02:14:54,854 INFO [train.py:1198] (0/2) Epoch 20, batch 1050, loss[loss=0.2633, ctc_loss=0.1501, cr_loss=0.3983, attn_decoder_loss=0.267, over 29670.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1378, cr_loss=0.3805, attn_decoder_loss=0.251, over 5744286.71 frames. ], batch size: 85, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:14:56,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=348100.0, ans=0.125 +2024-09-18 02:15:19,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=348140.0, ans=10.0 +2024-09-18 02:15:24,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=348140.0, ans=0.0 +2024-09-18 02:15:28,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=348180.0, ans=0.125 +2024-09-18 02:15:33,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=348180.0, ans=0.125 +2024-09-18 02:15:47,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.53 vs. limit=15.0 +2024-09-18 02:16:08,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=348260.0, ans=0.125 +2024-09-18 02:16:14,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=348300.0, ans=0.125 +2024-09-18 02:16:15,295 INFO [train.py:1198] (0/2) Epoch 20, batch 1100, loss[loss=0.241, ctc_loss=0.1369, cr_loss=0.375, attn_decoder_loss=0.2442, over 29458.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1376, cr_loss=0.3802, attn_decoder_loss=0.2506, over 5756047.67 frames. ], batch size: 78, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:16:21,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=348300.0, ans=0.0 +2024-09-18 02:16:32,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=348340.0, ans=0.2 +2024-09-18 02:16:38,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.33 vs. limit=15.0 +2024-09-18 02:16:45,744 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.272e+01 8.537e+01 9.169e+01 9.929e+01 2.148e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 02:17:01,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=348420.0, ans=0.09899494936611666 +2024-09-18 02:17:05,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=348420.0, ans=0.0 +2024-09-18 02:17:07,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=348420.0, ans=0.125 +2024-09-18 02:17:11,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=348420.0, ans=0.125 +2024-09-18 02:17:19,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=348460.0, ans=0.07 +2024-09-18 02:17:31,318 INFO [train.py:1198] (0/2) Epoch 20, batch 1150, loss[loss=0.2363, ctc_loss=0.1297, cr_loss=0.3513, attn_decoder_loss=0.2404, over 29466.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1376, cr_loss=0.3798, attn_decoder_loss=0.2506, over 5753317.00 frames. ], batch size: 78, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:17:43,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=348500.0, ans=0.0 +2024-09-18 02:18:24,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=348620.0, ans=0.125 +2024-09-18 02:18:46,993 INFO [train.py:1198] (0/2) Epoch 20, batch 1200, loss[loss=0.257, ctc_loss=0.1449, cr_loss=0.3909, attn_decoder_loss=0.2608, over 29683.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1385, cr_loss=0.3811, attn_decoder_loss=0.2514, over 5747066.43 frames. ], batch size: 85, lr: 5.61e-03, grad_scale: 16.0 +2024-09-18 02:18:47,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=348700.0, ans=0.0 +2024-09-18 02:18:47,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=348700.0, ans=0.1 +2024-09-18 02:19:03,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=348700.0, ans=0.125 +2024-09-18 02:19:08,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=348740.0, ans=0.125 +2024-09-18 02:19:21,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=348780.0, ans=0.025 +2024-09-18 02:19:23,034 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.725e+01 9.303e+01 1.008e+02 1.601e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-18 02:19:33,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=348780.0, ans=0.05 +2024-09-18 02:19:33,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=348780.0, ans=0.2 +2024-09-18 02:19:45,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=348820.0, ans=0.125 +2024-09-18 02:20:07,688 INFO [train.py:1198] (0/2) Epoch 20, batch 1250, loss[loss=0.2565, ctc_loss=0.1465, cr_loss=0.3961, attn_decoder_loss=0.2599, over 29550.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1385, cr_loss=0.3815, attn_decoder_loss=0.2518, over 5773851.17 frames. ], batch size: 92, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:20:13,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=348900.0, ans=0.0 +2024-09-18 02:20:31,616 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.96 vs. limit=15.0 +2024-09-18 02:20:41,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=348980.0, ans=0.1 +2024-09-18 02:20:50,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=348980.0, ans=0.1 +2024-09-18 02:20:51,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=349020.0, ans=0.125 +2024-09-18 02:21:23,272 INFO [train.py:1198] (0/2) Epoch 20, batch 1300, loss[loss=0.256, ctc_loss=0.1429, cr_loss=0.3877, attn_decoder_loss=0.26, over 28663.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1381, cr_loss=0.3811, attn_decoder_loss=0.2512, over 5778210.46 frames. ], batch size: 112, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:21:28,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=349100.0, ans=0.0 +2024-09-18 02:21:29,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=349100.0, ans=0.125 +2024-09-18 02:21:55,242 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.542e+01 9.047e+01 9.656e+01 1.934e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-18 02:22:00,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=349180.0, ans=0.95 +2024-09-18 02:22:06,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=349180.0, ans=0.2 +2024-09-18 02:22:10,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=349220.0, ans=0.125 +2024-09-18 02:22:38,975 INFO [train.py:1198] (0/2) Epoch 20, batch 1350, loss[loss=0.254, ctc_loss=0.142, cr_loss=0.3818, attn_decoder_loss=0.258, over 29784.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1373, cr_loss=0.3799, attn_decoder_loss=0.2508, over 5797128.66 frames. ], batch size: 81, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:22:52,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=349340.0, ans=0.2 +2024-09-18 02:22:55,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=349340.0, ans=0.2 +2024-09-18 02:23:46,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=349460.0, ans=0.0 +2024-09-18 02:23:56,657 INFO [train.py:1198] (0/2) Epoch 20, batch 1400, loss[loss=0.2125, ctc_loss=0.1113, cr_loss=0.3166, attn_decoder_loss=0.2167, over 29606.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1369, cr_loss=0.3794, attn_decoder_loss=0.2506, over 5808266.14 frames. ], batch size: 69, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:24:28,081 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.400e+01 8.906e+01 9.445e+01 1.188e+02, threshold=1.781e+02, percent-clipped=0.0 +2024-09-18 02:24:40,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=349620.0, ans=0.1 +2024-09-18 02:24:48,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.85 vs. limit=15.0 +2024-09-18 02:24:53,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.60 vs. limit=15.0 +2024-09-18 02:24:56,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-18 02:24:58,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=349660.0, ans=0.09899494936611666 +2024-09-18 02:25:12,149 INFO [train.py:1198] (0/2) Epoch 20, batch 1450, loss[loss=0.2591, ctc_loss=0.1436, cr_loss=0.3949, attn_decoder_loss=0.2631, over 29465.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1372, cr_loss=0.3805, attn_decoder_loss=0.2509, over 5804520.07 frames. ], batch size: 94, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:25:16,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=349700.0, ans=0.125 +2024-09-18 02:25:37,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=349740.0, ans=0.125 +2024-09-18 02:25:39,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=349740.0, ans=0.2 +2024-09-18 02:25:41,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=349780.0, ans=0.125 +2024-09-18 02:25:44,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=349780.0, ans=0.125 +2024-09-18 02:25:57,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=349820.0, ans=0.125 +2024-09-18 02:26:05,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.45 vs. limit=15.0 +2024-09-18 02:26:16,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.50 vs. limit=15.0 +2024-09-18 02:26:27,545 INFO [train.py:1198] (0/2) Epoch 20, batch 1500, loss[loss=0.2584, ctc_loss=0.1493, cr_loss=0.4002, attn_decoder_loss=0.2616, over 29643.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1379, cr_loss=0.3822, attn_decoder_loss=0.2518, over 5804815.17 frames. ], batch size: 86, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:26:58,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.10 vs. limit=15.0 +2024-09-18 02:27:04,147 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.814e+01 9.450e+01 1.000e+02 1.461e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-18 02:27:06,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=349980.0, ans=0.125 +2024-09-18 02:27:13,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=349980.0, ans=0.09899494936611666 +2024-09-18 02:27:33,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=350060.0, ans=0.0 +2024-09-18 02:27:41,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=350060.0, ans=0.125 +2024-09-18 02:27:48,324 INFO [train.py:1198] (0/2) Epoch 20, batch 1550, loss[loss=0.2528, ctc_loss=0.141, cr_loss=0.3985, attn_decoder_loss=0.2563, over 29528.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.138, cr_loss=0.3822, attn_decoder_loss=0.2516, over 5781896.51 frames. ], batch size: 90, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:27:54,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.01 vs. limit=15.0 +2024-09-18 02:27:57,051 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.70 vs. limit=15.0 +2024-09-18 02:28:02,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=350140.0, ans=0.07 +2024-09-18 02:28:12,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=350140.0, ans=0.1 +2024-09-18 02:28:22,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.42 vs. limit=6.0 +2024-09-18 02:28:39,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=350220.0, ans=0.125 +2024-09-18 02:28:52,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=350260.0, ans=0.125 +2024-09-18 02:28:58,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=350260.0, ans=0.125 +2024-09-18 02:29:01,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=350260.0, ans=0.125 +2024-09-18 02:29:03,923 INFO [train.py:1198] (0/2) Epoch 20, batch 1600, loss[loss=0.2547, ctc_loss=0.1387, cr_loss=0.367, attn_decoder_loss=0.2594, over 29673.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1376, cr_loss=0.3808, attn_decoder_loss=0.2509, over 5765078.39 frames. ], batch size: 85, lr: 5.59e-03, grad_scale: 16.0 +2024-09-18 02:29:12,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.35 vs. limit=22.5 +2024-09-18 02:29:16,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=350300.0, ans=0.125 +2024-09-18 02:29:17,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=350340.0, ans=0.1 +2024-09-18 02:29:30,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=350340.0, ans=0.025 +2024-09-18 02:29:37,442 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.748e+01 9.299e+01 1.007e+02 2.517e+02, threshold=1.860e+02, percent-clipped=3.0 +2024-09-18 02:29:44,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=350380.0, ans=0.1 +2024-09-18 02:30:15,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=350460.0, ans=0.125 +2024-09-18 02:30:16,513 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.11 vs. limit=15.0 +2024-09-18 02:30:17,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.36 vs. limit=12.0 +2024-09-18 02:30:19,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.20 vs. limit=12.0 +2024-09-18 02:30:19,926 INFO [train.py:1198] (0/2) Epoch 20, batch 1650, loss[loss=0.2573, ctc_loss=0.1378, cr_loss=0.3867, attn_decoder_loss=0.262, over 29701.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1376, cr_loss=0.3808, attn_decoder_loss=0.2508, over 5761142.65 frames. ], batch size: 89, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:30:20,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=350500.0, ans=0.125 +2024-09-18 02:30:36,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.19 vs. limit=22.5 +2024-09-18 02:31:13,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=350620.0, ans=0.1 +2024-09-18 02:31:16,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=350620.0, ans=0.125 +2024-09-18 02:31:17,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=350620.0, ans=0.125 +2024-09-18 02:31:28,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=350660.0, ans=0.025 +2024-09-18 02:31:39,888 INFO [train.py:1198] (0/2) Epoch 20, batch 1700, loss[loss=0.2193, ctc_loss=0.1115, cr_loss=0.3329, attn_decoder_loss=0.2239, over 29589.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1369, cr_loss=0.38, attn_decoder_loss=0.2504, over 5782815.44 frames. ], batch size: 69, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:31:44,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=350700.0, ans=0.1 +2024-09-18 02:31:57,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.58 vs. limit=15.0 +2024-09-18 02:32:13,142 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.537e+01 9.114e+01 9.746e+01 1.208e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 02:32:13,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=350780.0, ans=0.025 +2024-09-18 02:32:16,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=350780.0, ans=0.1 +2024-09-18 02:32:53,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=350860.0, ans=0.125 +2024-09-18 02:32:55,910 INFO [train.py:1198] (0/2) Epoch 20, batch 1750, loss[loss=0.2183, ctc_loss=0.1095, cr_loss=0.324, attn_decoder_loss=0.2231, over 29358.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1367, cr_loss=0.3798, attn_decoder_loss=0.25, over 5790414.98 frames. ], batch size: 67, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:33:09,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=350940.0, ans=0.025 +2024-09-18 02:33:09,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=350940.0, ans=0.125 +2024-09-18 02:33:11,835 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.10 vs. limit=15.0 +2024-09-18 02:33:14,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=350940.0, ans=0.125 +2024-09-18 02:33:18,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=350940.0, ans=0.125 +2024-09-18 02:33:32,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=350980.0, ans=0.025 +2024-09-18 02:33:56,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=351060.0, ans=0.125 +2024-09-18 02:34:11,443 INFO [train.py:1198] (0/2) Epoch 20, batch 1800, loss[loss=0.252, ctc_loss=0.1427, cr_loss=0.3947, attn_decoder_loss=0.2553, over 29697.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1368, cr_loss=0.3802, attn_decoder_loss=0.2504, over 5792789.38 frames. ], batch size: 83, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:34:16,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=351100.0, ans=0.125 +2024-09-18 02:34:33,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=351140.0, ans=10.0 +2024-09-18 02:34:48,919 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.074e+01 8.564e+01 9.228e+01 9.746e+01 1.564e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-18 02:35:00,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=351220.0, ans=0.125 +2024-09-18 02:35:01,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=351220.0, ans=0.125 +2024-09-18 02:35:32,098 INFO [train.py:1198] (0/2) Epoch 20, batch 1850, loss[loss=0.2563, ctc_loss=0.1397, cr_loss=0.3806, attn_decoder_loss=0.2608, over 29627.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1367, cr_loss=0.3799, attn_decoder_loss=0.2504, over 5798053.69 frames. ], batch size: 86, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:36:23,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=351420.0, ans=0.0 +2024-09-18 02:36:27,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.23 vs. limit=15.0 +2024-09-18 02:36:28,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=351420.0, ans=0.2 +2024-09-18 02:36:32,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=351460.0, ans=0.125 +2024-09-18 02:36:32,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=351460.0, ans=0.0 +2024-09-18 02:36:47,372 INFO [train.py:1198] (0/2) Epoch 20, batch 1900, loss[loss=0.2528, ctc_loss=0.1383, cr_loss=0.3826, attn_decoder_loss=0.257, over 29729.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1372, cr_loss=0.3809, attn_decoder_loss=0.251, over 5805997.81 frames. ], batch size: 89, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:36:49,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=351500.0, ans=0.125 +2024-09-18 02:37:02,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=351540.0, ans=0.1 +2024-09-18 02:37:13,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=351540.0, ans=0.0 +2024-09-18 02:37:17,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=351580.0, ans=0.125 +2024-09-18 02:37:18,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=351580.0, ans=10.0 +2024-09-18 02:37:20,731 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.814e+01 8.754e+01 9.062e+01 9.837e+01 1.384e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 02:37:22,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=351580.0, ans=0.1 +2024-09-18 02:37:30,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=351580.0, ans=0.125 +2024-09-18 02:37:43,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=351620.0, ans=0.125 +2024-09-18 02:37:52,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=351660.0, ans=0.0 +2024-09-18 02:38:00,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=351660.0, ans=0.0 +2024-09-18 02:38:03,054 INFO [train.py:1198] (0/2) Epoch 20, batch 1950, loss[loss=0.2454, ctc_loss=0.1412, cr_loss=0.3794, attn_decoder_loss=0.2486, over 29451.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1379, cr_loss=0.382, attn_decoder_loss=0.2521, over 5820208.86 frames. ], batch size: 78, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:38:53,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=351820.0, ans=0.0 +2024-09-18 02:39:23,351 INFO [train.py:1198] (0/2) Epoch 20, batch 2000, loss[loss=0.218, ctc_loss=0.1063, cr_loss=0.325, attn_decoder_loss=0.2232, over 29379.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1386, cr_loss=0.383, attn_decoder_loss=0.253, over 5796734.97 frames. ], batch size: 67, lr: 5.58e-03, grad_scale: 16.0 +2024-09-18 02:39:33,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.45 vs. limit=15.0 +2024-09-18 02:39:56,873 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.619e+01 9.159e+01 9.729e+01 7.125e+02, threshold=1.832e+02, percent-clipped=2.0 +2024-09-18 02:40:00,364 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-88000.pt +2024-09-18 02:40:09,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=351980.0, ans=0.1 +2024-09-18 02:40:20,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=352020.0, ans=0.1 +2024-09-18 02:40:24,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=352020.0, ans=0.0 +2024-09-18 02:40:25,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=352020.0, ans=0.05 +2024-09-18 02:40:27,101 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:40:28,614 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:40:43,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=352060.0, ans=0.125 +2024-09-18 02:40:46,374 INFO [train.py:1198] (0/2) Epoch 20, batch 2050, loss[loss=0.2206, ctc_loss=0.1145, cr_loss=0.3312, attn_decoder_loss=0.225, over 29423.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1381, cr_loss=0.3816, attn_decoder_loss=0.252, over 5787097.09 frames. ], batch size: 70, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:40:46,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=352100.0, ans=0.95 +2024-09-18 02:40:48,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.16 vs. limit=15.0 +2024-09-18 02:40:57,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=352100.0, ans=0.0 +2024-09-18 02:40:59,405 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.75 vs. limit=22.5 +2024-09-18 02:41:04,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-18 02:41:17,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=352180.0, ans=0.125 +2024-09-18 02:41:26,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.49 vs. limit=15.0 +2024-09-18 02:41:37,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=352220.0, ans=0.025 +2024-09-18 02:41:39,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=352220.0, ans=0.125 +2024-09-18 02:41:39,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=352220.0, ans=0.0 +2024-09-18 02:41:48,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=352260.0, ans=0.1 +2024-09-18 02:41:54,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=352260.0, ans=0.125 +2024-09-18 02:42:01,889 INFO [train.py:1198] (0/2) Epoch 20, batch 2100, loss[loss=0.2535, ctc_loss=0.144, cr_loss=0.404, attn_decoder_loss=0.2567, over 29745.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1373, cr_loss=0.3804, attn_decoder_loss=0.251, over 5798375.86 frames. ], batch size: 81, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:42:12,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=352300.0, ans=0.125 +2024-09-18 02:42:22,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=352340.0, ans=0.015 +2024-09-18 02:42:22,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=352340.0, ans=0.04949747468305833 +2024-09-18 02:42:32,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=352380.0, ans=0.5 +2024-09-18 02:42:38,581 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.126e+01 8.424e+01 8.970e+01 9.709e+01 1.410e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 02:43:01,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.67 vs. limit=22.5 +2024-09-18 02:43:18,689 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:43:21,525 INFO [train.py:1198] (0/2) Epoch 20, batch 2150, loss[loss=0.2437, ctc_loss=0.1295, cr_loss=0.3659, attn_decoder_loss=0.2483, over 29438.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1365, cr_loss=0.3792, attn_decoder_loss=0.2504, over 5813614.13 frames. ], batch size: 78, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:43:23,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=352500.0, ans=0.0 +2024-09-18 02:43:25,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.17 vs. limit=10.0 +2024-09-18 02:43:26,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.71 vs. limit=22.5 +2024-09-18 02:43:35,690 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:43:36,475 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.65 vs. limit=15.0 +2024-09-18 02:43:41,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=352540.0, ans=0.2 +2024-09-18 02:43:47,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=352540.0, ans=0.0 +2024-09-18 02:43:50,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=352580.0, ans=0.1 +2024-09-18 02:44:05,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=352620.0, ans=0.025 +2024-09-18 02:44:08,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=352620.0, ans=0.125 +2024-09-18 02:44:13,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=352620.0, ans=0.0 +2024-09-18 02:44:31,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=352660.0, ans=0.1 +2024-09-18 02:44:37,544 INFO [train.py:1198] (0/2) Epoch 20, batch 2200, loss[loss=0.2543, ctc_loss=0.1411, cr_loss=0.4038, attn_decoder_loss=0.2579, over 29608.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1368, cr_loss=0.3798, attn_decoder_loss=0.2504, over 5810636.82 frames. ], batch size: 86, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:44:42,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=352700.0, ans=0.125 +2024-09-18 02:45:08,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=352780.0, ans=0.125 +2024-09-18 02:45:12,311 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.647e+01 9.174e+01 9.915e+01 1.896e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-18 02:45:17,794 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:45:22,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=352820.0, ans=0.125 +2024-09-18 02:45:25,472 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=12.0 +2024-09-18 02:45:38,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=352860.0, ans=0.125 +2024-09-18 02:45:53,595 INFO [train.py:1198] (0/2) Epoch 20, batch 2250, loss[loss=0.2503, ctc_loss=0.1398, cr_loss=0.3811, attn_decoder_loss=0.2541, over 29726.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1364, cr_loss=0.3791, attn_decoder_loss=0.2502, over 5810498.26 frames. ], batch size: 82, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:45:56,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=352900.0, ans=0.125 +2024-09-18 02:46:55,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=353020.0, ans=0.0 +2024-09-18 02:47:12,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=353100.0, ans=0.0 +2024-09-18 02:47:13,751 INFO [train.py:1198] (0/2) Epoch 20, batch 2300, loss[loss=0.2074, ctc_loss=0.1006, cr_loss=0.3184, attn_decoder_loss=0.2122, over 29315.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1358, cr_loss=0.3774, attn_decoder_loss=0.2492, over 5798414.10 frames. ], batch size: 71, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:47:15,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=353100.0, ans=0.0 +2024-09-18 02:47:46,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.95 vs. limit=15.0 +2024-09-18 02:47:48,557 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.594e+01 9.374e+01 1.007e+02 2.489e+02, threshold=1.875e+02, percent-clipped=2.0 +2024-09-18 02:47:59,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=353220.0, ans=0.0 +2024-09-18 02:48:08,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=353220.0, ans=0.125 +2024-09-18 02:48:13,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=353260.0, ans=0.2 +2024-09-18 02:48:29,461 INFO [train.py:1198] (0/2) Epoch 20, batch 2350, loss[loss=0.2681, ctc_loss=0.1525, cr_loss=0.4275, attn_decoder_loss=0.2714, over 29682.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1356, cr_loss=0.3774, attn_decoder_loss=0.2493, over 5803855.60 frames. ], batch size: 83, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:48:43,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=353340.0, ans=0.125 +2024-09-18 02:48:49,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=353340.0, ans=0.125 +2024-09-18 02:48:52,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=353340.0, ans=0.125 +2024-09-18 02:48:54,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.89 vs. limit=15.0 +2024-09-18 02:48:56,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=353340.0, ans=0.025 +2024-09-18 02:49:06,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=353380.0, ans=0.0 +2024-09-18 02:49:07,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=353380.0, ans=0.1 +2024-09-18 02:49:30,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=353460.0, ans=0.2 +2024-09-18 02:49:45,311 INFO [train.py:1198] (0/2) Epoch 20, batch 2400, loss[loss=0.2375, ctc_loss=0.1323, cr_loss=0.3744, attn_decoder_loss=0.2408, over 29520.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1362, cr_loss=0.3784, attn_decoder_loss=0.2498, over 5807549.93 frames. ], batch size: 76, lr: 5.57e-03, grad_scale: 16.0 +2024-09-18 02:49:59,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=353540.0, ans=0.0 +2024-09-18 02:50:12,532 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.52 vs. limit=15.0 +2024-09-18 02:50:20,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=353580.0, ans=0.1 +2024-09-18 02:50:23,694 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.283e+01 8.660e+01 9.243e+01 9.853e+01 2.252e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-18 02:50:54,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=353660.0, ans=0.09899494936611666 +2024-09-18 02:50:54,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=353660.0, ans=0.0 +2024-09-18 02:51:05,906 INFO [train.py:1198] (0/2) Epoch 20, batch 2450, loss[loss=0.2478, ctc_loss=0.1341, cr_loss=0.3916, attn_decoder_loss=0.2518, over 29719.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1367, cr_loss=0.3794, attn_decoder_loss=0.2506, over 5784039.22 frames. ], batch size: 82, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:51:13,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=353700.0, ans=0.2 +2024-09-18 02:51:53,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=353820.0, ans=0.1 +2024-09-18 02:52:09,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.97 vs. limit=15.0 +2024-09-18 02:52:11,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=353860.0, ans=0.05 +2024-09-18 02:52:17,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=353860.0, ans=0.2 +2024-09-18 02:52:21,951 INFO [train.py:1198] (0/2) Epoch 20, batch 2500, loss[loss=0.2647, ctc_loss=0.1478, cr_loss=0.4101, attn_decoder_loss=0.2686, over 29648.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1367, cr_loss=0.3802, attn_decoder_loss=0.2508, over 5794405.68 frames. ], batch size: 86, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:52:42,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.29 vs. limit=10.0 +2024-09-18 02:52:55,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=353980.0, ans=0.0 +2024-09-18 02:52:58,484 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.450e+01 8.592e+01 8.974e+01 9.558e+01 1.231e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 02:53:18,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=354020.0, ans=0.0 +2024-09-18 02:53:38,056 INFO [train.py:1198] (0/2) Epoch 20, batch 2550, loss[loss=0.2302, ctc_loss=0.1233, cr_loss=0.3521, attn_decoder_loss=0.2343, over 29334.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1368, cr_loss=0.3805, attn_decoder_loss=0.2509, over 5797502.10 frames. ], batch size: 67, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:53:47,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.98 vs. limit=6.0 +2024-09-18 02:54:16,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=354180.0, ans=0.125 +2024-09-18 02:54:20,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=354180.0, ans=0.125 +2024-09-18 02:54:26,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.91 vs. limit=15.0 +2024-09-18 02:54:37,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=354220.0, ans=0.125 +2024-09-18 02:54:58,067 INFO [train.py:1198] (0/2) Epoch 20, batch 2600, loss[loss=0.2464, ctc_loss=0.1373, cr_loss=0.3978, attn_decoder_loss=0.2497, over 29422.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1369, cr_loss=0.3804, attn_decoder_loss=0.2513, over 5794595.17 frames. ], batch size: 78, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:55:04,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.13 vs. limit=22.5 +2024-09-18 02:55:28,931 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.70 vs. limit=6.0 +2024-09-18 02:55:34,145 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.665e+01 9.316e+01 9.977e+01 1.565e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-18 02:55:46,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.72 vs. limit=15.0 +2024-09-18 02:55:47,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=354420.0, ans=0.0 +2024-09-18 02:55:57,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=354460.0, ans=0.2 +2024-09-18 02:55:59,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=354460.0, ans=0.09899494936611666 +2024-09-18 02:56:13,768 INFO [train.py:1198] (0/2) Epoch 20, batch 2650, loss[loss=0.2594, ctc_loss=0.1408, cr_loss=0.3804, attn_decoder_loss=0.2642, over 29201.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.137, cr_loss=0.3809, attn_decoder_loss=0.2517, over 5800268.29 frames. ], batch size: 100, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:56:27,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=354540.0, ans=0.125 +2024-09-18 02:56:50,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=354580.0, ans=0.125 +2024-09-18 02:57:03,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=354620.0, ans=0.025 +2024-09-18 02:57:29,350 INFO [train.py:1198] (0/2) Epoch 20, batch 2700, loss[loss=0.2617, ctc_loss=0.1464, cr_loss=0.3939, attn_decoder_loss=0.2658, over 29546.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1375, cr_loss=0.3813, attn_decoder_loss=0.2521, over 5794869.41 frames. ], batch size: 87, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:57:34,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=354700.0, ans=0.07 +2024-09-18 02:57:35,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=354700.0, ans=0.0 +2024-09-18 02:57:35,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=354700.0, ans=0.2 +2024-09-18 02:57:38,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=354700.0, ans=15.0 +2024-09-18 02:57:51,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=354740.0, ans=0.125 +2024-09-18 02:58:07,619 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.506e+01 9.049e+01 9.472e+01 1.287e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 02:58:15,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=354820.0, ans=0.2 +2024-09-18 02:58:20,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.67 vs. limit=6.0 +2024-09-18 02:58:24,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=354820.0, ans=0.0 +2024-09-18 02:58:27,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=354820.0, ans=0.125 +2024-09-18 02:58:33,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=354860.0, ans=0.0 +2024-09-18 02:58:45,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=354860.0, ans=0.0 +2024-09-18 02:58:49,462 INFO [train.py:1198] (0/2) Epoch 20, batch 2750, loss[loss=0.2412, ctc_loss=0.1359, cr_loss=0.387, attn_decoder_loss=0.2443, over 29524.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1366, cr_loss=0.3797, attn_decoder_loss=0.2508, over 5793027.81 frames. ], batch size: 75, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:58:54,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=354900.0, ans=0.125 +2024-09-18 02:59:04,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=354940.0, ans=0.2 +2024-09-18 02:59:12,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=354940.0, ans=0.0 +2024-09-18 02:59:14,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.62 vs. limit=15.0 +2024-09-18 02:59:21,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=354980.0, ans=0.125 +2024-09-18 02:59:32,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=354980.0, ans=0.1 +2024-09-18 02:59:41,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=355020.0, ans=0.0 +2024-09-18 02:59:49,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=355060.0, ans=0.125 +2024-09-18 03:00:05,928 INFO [train.py:1198] (0/2) Epoch 20, batch 2800, loss[loss=0.2833, ctc_loss=0.1919, cr_loss=0.4092, attn_decoder_loss=0.2843, over 20086.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1373, cr_loss=0.3802, attn_decoder_loss=0.2511, over 5774720.83 frames. ], batch size: 209, lr: 5.56e-03, grad_scale: 16.0 +2024-09-18 03:00:22,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=355140.0, ans=0.1 +2024-09-18 03:00:22,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=355140.0, ans=0.125 +2024-09-18 03:00:31,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=355140.0, ans=0.1 +2024-09-18 03:00:36,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.74 vs. limit=15.0 +2024-09-18 03:00:44,107 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.732e+01 9.172e+01 1.024e+02 2.809e+02, threshold=1.834e+02, percent-clipped=3.0 +2024-09-18 03:00:46,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=355180.0, ans=0.2 +2024-09-18 03:00:56,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=355220.0, ans=0.07 +2024-09-18 03:01:21,683 INFO [train.py:1198] (0/2) Epoch 20, batch 2850, loss[loss=0.2331, ctc_loss=0.1213, cr_loss=0.3478, attn_decoder_loss=0.2378, over 29488.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1378, cr_loss=0.3806, attn_decoder_loss=0.2514, over 5760308.24 frames. ], batch size: 77, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 03:01:24,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.66 vs. limit=15.0 +2024-09-18 03:01:45,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=355340.0, ans=0.025 +2024-09-18 03:01:57,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=355380.0, ans=0.0 +2024-09-18 03:01:59,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=355380.0, ans=0.025 +2024-09-18 03:02:17,216 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:02:24,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=355460.0, ans=0.125 +2024-09-18 03:02:37,547 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:02:41,736 INFO [train.py:1198] (0/2) Epoch 20, batch 2900, loss[loss=0.2375, ctc_loss=0.1219, cr_loss=0.3527, attn_decoder_loss=0.2425, over 29440.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1381, cr_loss=0.3824, attn_decoder_loss=0.2524, over 5786180.06 frames. ], batch size: 79, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:03:11,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=12.0 +2024-09-18 03:03:19,773 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.493e+01 9.196e+01 9.952e+01 2.490e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 03:03:51,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=355660.0, ans=0.125 +2024-09-18 03:03:57,577 INFO [train.py:1198] (0/2) Epoch 20, batch 2950, loss[loss=0.2269, ctc_loss=0.1298, cr_loss=0.3745, attn_decoder_loss=0.2294, over 29533.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.137, cr_loss=0.3805, attn_decoder_loss=0.2508, over 5781980.62 frames. ], batch size: 75, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:04:02,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=355700.0, ans=0.1 +2024-09-18 03:04:05,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=355700.0, ans=0.1 +2024-09-18 03:04:08,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=355700.0, ans=0.125 +2024-09-18 03:04:19,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.99 vs. limit=10.0 +2024-09-18 03:04:27,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=355780.0, ans=0.0 +2024-09-18 03:04:31,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=355780.0, ans=0.1 +2024-09-18 03:04:35,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=355780.0, ans=0.125 +2024-09-18 03:04:52,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=355820.0, ans=0.2 +2024-09-18 03:05:05,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=355860.0, ans=0.1 +2024-09-18 03:05:13,041 INFO [train.py:1198] (0/2) Epoch 20, batch 3000, loss[loss=0.2591, ctc_loss=0.1553, cr_loss=0.4206, attn_decoder_loss=0.2612, over 29757.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1372, cr_loss=0.3807, attn_decoder_loss=0.251, over 5783366.14 frames. ], batch size: 81, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:05:13,042 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 03:05:32,384 INFO [train.py:1230] (0/2) Epoch 20, validation: loss=0.2111, ctc_loss=0.03914, cr_loss=5.228e-15, attn_decoder_loss=0.2302, over 944034.00 frames. +2024-09-18 03:05:32,384 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 03:05:41,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=355900.0, ans=0.125 +2024-09-18 03:05:53,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=355940.0, ans=0.2 +2024-09-18 03:06:10,670 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.213e+01 8.598e+01 9.158e+01 9.918e+01 2.557e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-18 03:06:44,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=356060.0, ans=0.2 +2024-09-18 03:06:47,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=356060.0, ans=0.125 +2024-09-18 03:06:50,699 INFO [train.py:1198] (0/2) Epoch 20, batch 3050, loss[loss=0.2354, ctc_loss=0.1342, cr_loss=0.3808, attn_decoder_loss=0.2382, over 29515.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1381, cr_loss=0.3822, attn_decoder_loss=0.2519, over 5776682.07 frames. ], batch size: 76, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:07:16,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=356140.0, ans=0.125 +2024-09-18 03:07:17,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.72 vs. limit=22.5 +2024-09-18 03:07:36,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=356220.0, ans=0.125 +2024-09-18 03:07:46,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=356220.0, ans=0.0 +2024-09-18 03:08:00,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=356260.0, ans=0.0 +2024-09-18 03:08:05,864 INFO [train.py:1198] (0/2) Epoch 20, batch 3100, loss[loss=0.2727, ctc_loss=0.1509, cr_loss=0.4011, attn_decoder_loss=0.2773, over 29233.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1377, cr_loss=0.3815, attn_decoder_loss=0.2513, over 5777445.85 frames. ], batch size: 100, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:08:07,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=356300.0, ans=0.5 +2024-09-18 03:08:08,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.08 vs. limit=10.0 +2024-09-18 03:08:12,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=356300.0, ans=0.125 +2024-09-18 03:08:18,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=356300.0, ans=0.125 +2024-09-18 03:08:30,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=356340.0, ans=0.125 +2024-09-18 03:08:33,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=356340.0, ans=0.125 +2024-09-18 03:08:38,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.61 vs. limit=22.5 +2024-09-18 03:08:43,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=356380.0, ans=15.0 +2024-09-18 03:08:43,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.464e+01 9.160e+01 9.747e+01 2.632e+02, threshold=1.832e+02, percent-clipped=3.0 +2024-09-18 03:08:49,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.06 vs. limit=10.0 +2024-09-18 03:08:51,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=356420.0, ans=0.09899494936611666 +2024-09-18 03:09:23,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=356500.0, ans=0.025 +2024-09-18 03:09:24,110 INFO [train.py:1198] (0/2) Epoch 20, batch 3150, loss[loss=0.2668, ctc_loss=0.1513, cr_loss=0.3997, attn_decoder_loss=0.2707, over 28761.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1377, cr_loss=0.3814, attn_decoder_loss=0.2515, over 5782549.93 frames. ], batch size: 104, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:09:31,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=356500.0, ans=0.0 +2024-09-18 03:09:59,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=356580.0, ans=0.125 +2024-09-18 03:10:00,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356580.0, ans=0.1 +2024-09-18 03:10:02,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=356580.0, ans=0.125 +2024-09-18 03:10:02,918 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.26 vs. limit=10.0 +2024-09-18 03:10:42,208 INFO [train.py:1198] (0/2) Epoch 20, batch 3200, loss[loss=0.2458, ctc_loss=0.1394, cr_loss=0.3796, attn_decoder_loss=0.2492, over 29422.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1376, cr_loss=0.3811, attn_decoder_loss=0.2512, over 5792079.01 frames. ], batch size: 79, lr: 5.54e-03, grad_scale: 16.0 +2024-09-18 03:10:59,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=356740.0, ans=0.2 +2024-09-18 03:11:00,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=356740.0, ans=0.2 +2024-09-18 03:11:08,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.27 vs. limit=22.5 +2024-09-18 03:11:20,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=356780.0, ans=0.125 +2024-09-18 03:11:21,933 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.707e+01 8.428e+01 9.069e+01 9.579e+01 2.573e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-18 03:11:29,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=356820.0, ans=0.125 +2024-09-18 03:11:42,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=356860.0, ans=0.1 +2024-09-18 03:11:58,533 INFO [train.py:1198] (0/2) Epoch 20, batch 3250, loss[loss=0.2544, ctc_loss=0.1412, cr_loss=0.3622, attn_decoder_loss=0.2589, over 29727.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1371, cr_loss=0.3804, attn_decoder_loss=0.2511, over 5799014.64 frames. ], batch size: 84, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:12:13,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=356940.0, ans=0.125 +2024-09-18 03:12:21,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=356940.0, ans=0.125 +2024-09-18 03:12:42,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=357020.0, ans=0.125 +2024-09-18 03:13:15,875 INFO [train.py:1198] (0/2) Epoch 20, batch 3300, loss[loss=0.2605, ctc_loss=0.1531, cr_loss=0.3989, attn_decoder_loss=0.2636, over 28537.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1362, cr_loss=0.3787, attn_decoder_loss=0.2498, over 5797535.21 frames. ], batch size: 112, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:13:37,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=357140.0, ans=0.05 +2024-09-18 03:13:44,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=357180.0, ans=0.025 +2024-09-18 03:13:55,065 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.624e+01 9.196e+01 9.884e+01 4.402e+02, threshold=1.839e+02, percent-clipped=2.0 +2024-09-18 03:14:09,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=357220.0, ans=0.0 +2024-09-18 03:14:32,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=357300.0, ans=0.2 +2024-09-18 03:14:33,205 INFO [train.py:1198] (0/2) Epoch 20, batch 3350, loss[loss=0.2598, ctc_loss=0.1449, cr_loss=0.3894, attn_decoder_loss=0.264, over 28851.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1372, cr_loss=0.38, attn_decoder_loss=0.2508, over 5772753.96 frames. ], batch size: 104, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:14:33,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=357300.0, ans=0.125 +2024-09-18 03:14:45,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=357300.0, ans=0.025 +2024-09-18 03:14:47,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=357340.0, ans=0.2 +2024-09-18 03:15:00,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=357340.0, ans=0.125 +2024-09-18 03:15:30,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.19 vs. limit=15.0 +2024-09-18 03:15:47,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=357500.0, ans=0.125 +2024-09-18 03:15:48,959 INFO [train.py:1198] (0/2) Epoch 20, batch 3400, loss[loss=0.2174, ctc_loss=0.1151, cr_loss=0.3433, attn_decoder_loss=0.2211, over 29348.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1373, cr_loss=0.3795, attn_decoder_loss=0.2507, over 5763884.14 frames. ], batch size: 67, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:16:28,758 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 8.602e+01 9.311e+01 9.873e+01 3.083e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-18 03:16:35,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=357620.0, ans=0.0 +2024-09-18 03:16:36,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=357620.0, ans=0.125 +2024-09-18 03:16:38,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=357620.0, ans=0.1 +2024-09-18 03:16:44,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=357620.0, ans=0.2 +2024-09-18 03:16:47,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=357620.0, ans=0.125 +2024-09-18 03:16:57,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=357660.0, ans=0.125 +2024-09-18 03:17:00,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=357660.0, ans=0.125 +2024-09-18 03:17:07,410 INFO [train.py:1198] (0/2) Epoch 20, batch 3450, loss[loss=0.2503, ctc_loss=0.1392, cr_loss=0.3575, attn_decoder_loss=0.2548, over 28163.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1376, cr_loss=0.3805, attn_decoder_loss=0.2512, over 5772855.07 frames. ], batch size: 111, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:17:15,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=357700.0, ans=0.125 +2024-09-18 03:17:24,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=357740.0, ans=0.125 +2024-09-18 03:17:32,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=357740.0, ans=0.09899494936611666 +2024-09-18 03:17:35,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.52 vs. limit=15.0 +2024-09-18 03:17:36,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=357780.0, ans=0.2 +2024-09-18 03:17:48,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.76 vs. limit=22.5 +2024-09-18 03:18:09,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.24 vs. limit=15.0 +2024-09-18 03:18:17,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=357860.0, ans=0.125 +2024-09-18 03:18:25,103 INFO [train.py:1198] (0/2) Epoch 20, batch 3500, loss[loss=0.2212, ctc_loss=0.1248, cr_loss=0.3493, attn_decoder_loss=0.2241, over 29280.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1372, cr_loss=0.3796, attn_decoder_loss=0.2506, over 5775548.01 frames. ], batch size: 71, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:18:27,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=357900.0, ans=0.0 +2024-09-18 03:18:39,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=357940.0, ans=0.0 +2024-09-18 03:18:45,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=357940.0, ans=15.0 +2024-09-18 03:19:03,916 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.576e+01 9.185e+01 9.795e+01 1.651e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-18 03:19:10,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=358020.0, ans=0.05 +2024-09-18 03:19:11,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=358020.0, ans=0.125 +2024-09-18 03:19:13,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=358020.0, ans=0.2 +2024-09-18 03:19:28,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=358060.0, ans=0.1 +2024-09-18 03:19:34,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=358060.0, ans=0.0 +2024-09-18 03:19:35,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=358060.0, ans=0.125 +2024-09-18 03:19:39,927 INFO [train.py:1198] (0/2) Epoch 20, batch 3550, loss[loss=0.2591, ctc_loss=0.1456, cr_loss=0.3872, attn_decoder_loss=0.2631, over 29741.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1371, cr_loss=0.3792, attn_decoder_loss=0.2506, over 5781627.15 frames. ], batch size: 89, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:19:43,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=358100.0, ans=0.125 +2024-09-18 03:19:47,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=358100.0, ans=0.1 +2024-09-18 03:19:50,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=358100.0, ans=0.125 +2024-09-18 03:19:59,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=358140.0, ans=0.2 +2024-09-18 03:20:03,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=358140.0, ans=0.0 +2024-09-18 03:20:09,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=358180.0, ans=0.1 +2024-09-18 03:20:21,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=358180.0, ans=0.125 +2024-09-18 03:20:21,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=358180.0, ans=0.0 +2024-09-18 03:20:21,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=358180.0, ans=0.1 +2024-09-18 03:20:23,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=358220.0, ans=0.0 +2024-09-18 03:20:24,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:29,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:34,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=358220.0, ans=0.0 +2024-09-18 03:20:53,756 INFO [train.py:1198] (0/2) Epoch 20, batch 3600, loss[loss=0.2345, ctc_loss=0.1255, cr_loss=0.3581, attn_decoder_loss=0.2387, over 29500.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1368, cr_loss=0.379, attn_decoder_loss=0.2505, over 5790735.34 frames. ], batch size: 77, lr: 5.53e-03, grad_scale: 16.0 +2024-09-18 03:21:18,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=358340.0, ans=0.125 +2024-09-18 03:21:33,254 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.608e+01 9.165e+01 9.950e+01 3.634e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 03:21:48,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=358420.0, ans=0.0 +2024-09-18 03:21:52,781 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:22:08,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=358460.0, ans=0.0 +2024-09-18 03:22:08,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=358460.0, ans=0.0 +2024-09-18 03:22:10,953 INFO [train.py:1198] (0/2) Epoch 20, batch 3650, loss[loss=0.2608, ctc_loss=0.143, cr_loss=0.4143, attn_decoder_loss=0.2646, over 29515.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1365, cr_loss=0.3781, attn_decoder_loss=0.2501, over 5792247.20 frames. ], batch size: 90, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:22:38,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=358540.0, ans=0.05 +2024-09-18 03:22:41,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=358580.0, ans=0.0 +2024-09-18 03:22:45,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=358580.0, ans=0.0 +2024-09-18 03:22:48,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=358580.0, ans=0.1 +2024-09-18 03:23:10,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=358660.0, ans=0.125 +2024-09-18 03:23:12,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=358660.0, ans=0.125 +2024-09-18 03:23:21,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=358660.0, ans=0.0 +2024-09-18 03:23:24,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=358700.0, ans=0.125 +2024-09-18 03:23:25,633 INFO [train.py:1198] (0/2) Epoch 20, batch 3700, loss[loss=0.2404, ctc_loss=0.1214, cr_loss=0.358, attn_decoder_loss=0.2457, over 29714.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1362, cr_loss=0.3773, attn_decoder_loss=0.2502, over 5802277.12 frames. ], batch size: 84, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:23:32,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.74 vs. limit=10.0 +2024-09-18 03:23:52,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff2.min_abs, batch_count=358740.0, ans=0.1 +2024-09-18 03:24:05,520 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 8.568e+01 9.154e+01 9.793e+01 1.686e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-18 03:24:34,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=358860.0, ans=0.125 +2024-09-18 03:24:41,704 INFO [train.py:1198] (0/2) Epoch 20, batch 3750, loss[loss=0.2244, ctc_loss=0.1174, cr_loss=0.3346, attn_decoder_loss=0.2289, over 29336.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1358, cr_loss=0.3767, attn_decoder_loss=0.2497, over 5806094.86 frames. ], batch size: 67, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:24:43,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=358900.0, ans=0.05 +2024-09-18 03:24:43,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=358900.0, ans=0.05 +2024-09-18 03:24:57,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=358940.0, ans=0.125 +2024-09-18 03:25:01,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=358940.0, ans=0.1 +2024-09-18 03:25:07,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=358940.0, ans=0.04949747468305833 +2024-09-18 03:25:37,518 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.56 vs. limit=15.0 +2024-09-18 03:25:50,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=359060.0, ans=0.05 +2024-09-18 03:25:56,007 INFO [train.py:1198] (0/2) Epoch 20, batch 3800, loss[loss=0.2457, ctc_loss=0.1365, cr_loss=0.3981, attn_decoder_loss=0.249, over 29627.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1358, cr_loss=0.3766, attn_decoder_loss=0.2492, over 5796796.54 frames. ], batch size: 86, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:26:20,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=359140.0, ans=0.125 +2024-09-18 03:26:20,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.61 vs. limit=15.0 +2024-09-18 03:26:36,575 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.575e+01 9.018e+01 9.556e+01 1.555e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-18 03:26:38,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359180.0, ans=0.1 +2024-09-18 03:26:41,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=359220.0, ans=0.125 +2024-09-18 03:26:50,925 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.47 vs. limit=15.0 +2024-09-18 03:26:59,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=359260.0, ans=0.125 +2024-09-18 03:27:01,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359260.0, ans=0.1 +2024-09-18 03:27:10,603 INFO [train.py:1198] (0/2) Epoch 20, batch 3850, loss[loss=0.2557, ctc_loss=0.1392, cr_loss=0.3737, attn_decoder_loss=0.2603, over 29243.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1355, cr_loss=0.3763, attn_decoder_loss=0.2492, over 5812225.67 frames. ], batch size: 100, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:27:10,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=359300.0, ans=0.125 +2024-09-18 03:27:28,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=359340.0, ans=0.035 +2024-09-18 03:27:43,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=359380.0, ans=0.0 +2024-09-18 03:27:52,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=359380.0, ans=0.125 +2024-09-18 03:28:00,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.20 vs. limit=15.0 +2024-09-18 03:28:22,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=359460.0, ans=0.2 +2024-09-18 03:28:22,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=359460.0, ans=0.0 +2024-09-18 03:28:22,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.87 vs. limit=15.0 +2024-09-18 03:28:26,449 INFO [train.py:1198] (0/2) Epoch 20, batch 3900, loss[loss=0.2468, ctc_loss=0.139, cr_loss=0.394, attn_decoder_loss=0.2501, over 29612.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.136, cr_loss=0.3775, attn_decoder_loss=0.2498, over 5816826.54 frames. ], batch size: 86, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:28:26,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=359500.0, ans=0.125 +2024-09-18 03:28:37,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=359500.0, ans=0.125 +2024-09-18 03:28:59,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=359580.0, ans=0.07 +2024-09-18 03:29:06,327 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.505e+01 9.019e+01 9.664e+01 2.565e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 03:29:28,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=359660.0, ans=0.125 +2024-09-18 03:29:37,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=359660.0, ans=0.125 +2024-09-18 03:29:40,549 INFO [train.py:1198] (0/2) Epoch 20, batch 3950, loss[loss=0.2593, ctc_loss=0.1406, cr_loss=0.413, attn_decoder_loss=0.2633, over 29509.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1354, cr_loss=0.3767, attn_decoder_loss=0.2497, over 5836225.77 frames. ], batch size: 97, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:29:51,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.18 vs. limit=15.0 +2024-09-18 03:29:54,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359700.0, ans=0.1 +2024-09-18 03:30:01,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=359740.0, ans=0.125 +2024-09-18 03:30:01,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=359740.0, ans=0.1 +2024-09-18 03:30:03,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.59 vs. limit=22.5 +2024-09-18 03:30:25,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=359820.0, ans=0.0 +2024-09-18 03:30:56,095 INFO [train.py:1198] (0/2) Epoch 20, batch 4000, loss[loss=0.2305, ctc_loss=0.1213, cr_loss=0.3394, attn_decoder_loss=0.2351, over 29523.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1357, cr_loss=0.3767, attn_decoder_loss=0.2498, over 5814596.54 frames. ], batch size: 74, lr: 5.52e-03, grad_scale: 16.0 +2024-09-18 03:31:21,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.45 vs. limit=15.0 +2024-09-18 03:31:25,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359980.0, ans=0.1 +2024-09-18 03:31:38,100 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.709e+01 8.706e+01 9.188e+01 9.943e+01 2.259e+02, threshold=1.838e+02, percent-clipped=3.0 +2024-09-18 03:31:51,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=360020.0, ans=0.035 +2024-09-18 03:32:05,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.82 vs. limit=15.0 +2024-09-18 03:32:10,749 INFO [train.py:1198] (0/2) Epoch 20, batch 4050, loss[loss=0.2802, ctc_loss=0.1897, cr_loss=0.4252, attn_decoder_loss=0.2808, over 20119.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1362, cr_loss=0.3773, attn_decoder_loss=0.2499, over 5797871.26 frames. ], batch size: 209, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:32:11,659 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.24 vs. limit=15.0 +2024-09-18 03:32:32,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=360140.0, ans=0.125 +2024-09-18 03:32:33,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=360140.0, ans=0.0 +2024-09-18 03:32:36,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.65 vs. limit=15.0 +2024-09-18 03:33:03,569 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:33:25,676 INFO [train.py:1198] (0/2) Epoch 20, batch 4100, loss[loss=0.2731, ctc_loss=0.1605, cr_loss=0.4499, attn_decoder_loss=0.2756, over 29492.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1366, cr_loss=0.3778, attn_decoder_loss=0.2502, over 5792640.59 frames. ], batch size: 90, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:33:47,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=360340.0, ans=0.125 +2024-09-18 03:33:48,215 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-18 03:34:06,721 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.554e+01 9.204e+01 1.015e+02 1.958e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 03:34:07,181 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:34:27,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=360460.0, ans=0.025 +2024-09-18 03:34:28,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=360460.0, ans=0.2 +2024-09-18 03:34:29,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.99 vs. limit=15.0 +2024-09-18 03:34:40,437 INFO [train.py:1198] (0/2) Epoch 20, batch 4150, loss[loss=0.2358, ctc_loss=0.1315, cr_loss=0.3555, attn_decoder_loss=0.2395, over 29503.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1365, cr_loss=0.3775, attn_decoder_loss=0.2501, over 5798400.21 frames. ], batch size: 77, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:35:08,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=360580.0, ans=0.1 +2024-09-18 03:35:08,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.04 vs. limit=10.0 +2024-09-18 03:35:29,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.07 vs. limit=22.5 +2024-09-18 03:35:53,872 INFO [train.py:1198] (0/2) Epoch 20, batch 4200, loss[loss=0.2629, ctc_loss=0.147, cr_loss=0.4052, attn_decoder_loss=0.2668, over 29471.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.137, cr_loss=0.3795, attn_decoder_loss=0.2506, over 5800654.87 frames. ], batch size: 90, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:36:19,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=360740.0, ans=0.125 +2024-09-18 03:36:21,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=360780.0, ans=0.125 +2024-09-18 03:36:36,269 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.598e+01 9.049e+01 1.004e+02 1.437e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 03:36:48,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=360820.0, ans=0.0 +2024-09-18 03:36:49,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.53 vs. limit=15.0 +2024-09-18 03:36:58,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=360860.0, ans=0.125 +2024-09-18 03:37:09,129 INFO [train.py:1198] (0/2) Epoch 20, batch 4250, loss[loss=0.2341, ctc_loss=0.1201, cr_loss=0.3408, attn_decoder_loss=0.2391, over 29528.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1367, cr_loss=0.379, attn_decoder_loss=0.2506, over 5805582.43 frames. ], batch size: 74, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:37:10,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=360900.0, ans=0.125 +2024-09-18 03:37:12,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=360900.0, ans=0.2 +2024-09-18 03:37:28,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=360940.0, ans=0.2 +2024-09-18 03:37:30,520 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.26 vs. limit=22.5 +2024-09-18 03:37:37,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=360980.0, ans=0.0 +2024-09-18 03:37:43,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=360980.0, ans=0.125 +2024-09-18 03:38:09,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=361060.0, ans=0.125 +2024-09-18 03:38:19,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=361060.0, ans=0.125 +2024-09-18 03:38:19,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=361060.0, ans=0.1 +2024-09-18 03:38:22,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=361100.0, ans=0.0 +2024-09-18 03:38:23,849 INFO [train.py:1198] (0/2) Epoch 20, batch 4300, loss[loss=0.2596, ctc_loss=0.1425, cr_loss=0.4027, attn_decoder_loss=0.2637, over 29554.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1364, cr_loss=0.3783, attn_decoder_loss=0.2504, over 5793494.58 frames. ], batch size: 87, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:38:27,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.52 vs. limit=12.0 +2024-09-18 03:38:30,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=361100.0, ans=0.125 +2024-09-18 03:38:34,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=361100.0, ans=0.025 +2024-09-18 03:38:42,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=361140.0, ans=0.125 +2024-09-18 03:38:56,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=361180.0, ans=0.125 +2024-09-18 03:39:01,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=361180.0, ans=0.1 +2024-09-18 03:39:01,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=361180.0, ans=0.125 +2024-09-18 03:39:05,351 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.631e+01 8.736e+01 9.238e+01 9.877e+01 2.557e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 03:39:07,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=361220.0, ans=0.125 +2024-09-18 03:39:08,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=361220.0, ans=0.125 +2024-09-18 03:39:11,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=361220.0, ans=0.125 +2024-09-18 03:39:23,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=361260.0, ans=0.125 +2024-09-18 03:39:38,013 INFO [train.py:1198] (0/2) Epoch 20, batch 4350, loss[loss=0.2572, ctc_loss=0.1445, cr_loss=0.3813, attn_decoder_loss=0.2612, over 29463.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1386, cr_loss=0.3831, attn_decoder_loss=0.2536, over 5795513.96 frames. ], batch size: 97, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:39:40,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=361300.0, ans=0.0 +2024-09-18 03:39:42,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.74 vs. limit=12.0 +2024-09-18 03:39:55,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=361340.0, ans=0.125 +2024-09-18 03:40:05,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=361340.0, ans=0.125 +2024-09-18 03:40:09,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=361380.0, ans=0.035 +2024-09-18 03:40:17,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=361380.0, ans=0.0 +2024-09-18 03:40:27,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=361420.0, ans=0.125 +2024-09-18 03:40:29,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=361420.0, ans=0.1 +2024-09-18 03:40:51,533 INFO [train.py:1198] (0/2) Epoch 20, batch 4400, loss[loss=0.2524, ctc_loss=0.146, cr_loss=0.3801, attn_decoder_loss=0.2558, over 27153.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.14, cr_loss=0.3854, attn_decoder_loss=0.2557, over 5766620.01 frames. ], batch size: 124, lr: 5.51e-03, grad_scale: 16.0 +2024-09-18 03:41:19,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=361540.0, ans=0.1 +2024-09-18 03:41:34,939 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.117e+01 8.833e+01 9.166e+01 9.784e+01 1.631e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 03:41:58,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=361660.0, ans=0.125 +2024-09-18 03:42:01,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=361660.0, ans=0.0 +2024-09-18 03:42:05,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=361700.0, ans=0.125 +2024-09-18 03:42:06,985 INFO [train.py:1198] (0/2) Epoch 20, batch 4450, loss[loss=0.2757, ctc_loss=0.1804, cr_loss=0.4216, attn_decoder_loss=0.2769, over 20068.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1442, cr_loss=0.3904, attn_decoder_loss=0.2581, over 5575625.71 frames. ], batch size: 211, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:42:07,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=361700.0, ans=6.0 +2024-09-18 03:42:59,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=361820.0, ans=0.125 +2024-09-18 03:43:00,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=361820.0, ans=0.125 +2024-09-18 03:43:05,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=361820.0, ans=0.125 +2024-09-18 03:43:22,881 INFO [train.py:1198] (0/2) Epoch 20, batch 4500, loss[loss=0.267, ctc_loss=0.1702, cr_loss=0.403, attn_decoder_loss=0.2688, over 20674.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1492, cr_loss=0.393, attn_decoder_loss=0.2605, over 5234870.80 frames. ], batch size: 210, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:43:35,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=361900.0, ans=0.0 +2024-09-18 03:44:00,126 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-20.pt +2024-09-18 03:44:52,337 INFO [train.py:1198] (0/2) Epoch 21, batch 0, loss[loss=0.2207, ctc_loss=0.1079, cr_loss=0.3422, attn_decoder_loss=0.2256, over 29626.00 frames. ], tot_loss[loss=0.2207, ctc_loss=0.1079, cr_loss=0.3422, attn_decoder_loss=0.2256, over 29626.00 frames. ], batch size: 73, lr: 5.37e-03, grad_scale: 16.0 +2024-09-18 03:44:52,338 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 03:45:10,776 INFO [train.py:1230] (0/2) Epoch 21, validation: loss=0.2126, ctc_loss=0.0391, cr_loss=5.275e-15, attn_decoder_loss=0.2319, over 944034.00 frames. +2024-09-18 03:45:10,776 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 03:45:19,729 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.930e+01 1.076e+02 1.145e+02 1.241e+02 1.705e+02, threshold=2.291e+02, percent-clipped=0.0 +2024-09-18 03:45:29,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=362040.0, ans=0.0 +2024-09-18 03:45:31,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.77 vs. limit=12.0 +2024-09-18 03:45:42,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=362080.0, ans=0.015 +2024-09-18 03:46:00,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=362120.0, ans=0.1 +2024-09-18 03:46:02,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=362120.0, ans=0.0 +2024-09-18 03:46:04,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=362120.0, ans=0.0 +2024-09-18 03:46:23,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.70 vs. limit=6.0 +2024-09-18 03:46:24,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=362160.0, ans=0.125 +2024-09-18 03:46:28,349 INFO [train.py:1198] (0/2) Epoch 21, batch 50, loss[loss=0.2237, ctc_loss=0.1233, cr_loss=0.3545, attn_decoder_loss=0.227, over 29401.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1376, cr_loss=0.3786, attn_decoder_loss=0.251, over 1268268.75 frames. ], batch size: 70, lr: 5.37e-03, grad_scale: 8.0 +2024-09-18 03:46:31,905 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:46:39,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=362200.0, ans=0.1 +2024-09-18 03:47:00,572 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.91 vs. limit=15.0 +2024-09-18 03:47:22,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=362320.0, ans=0.0 +2024-09-18 03:47:23,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=362320.0, ans=0.125 +2024-09-18 03:47:25,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=362320.0, ans=0.07 +2024-09-18 03:47:46,702 INFO [train.py:1198] (0/2) Epoch 21, batch 100, loss[loss=0.2303, ctc_loss=0.1205, cr_loss=0.342, attn_decoder_loss=0.2349, over 29553.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1379, cr_loss=0.3814, attn_decoder_loss=0.2527, over 2253007.91 frames. ], batch size: 76, lr: 5.37e-03, grad_scale: 8.0 +2024-09-18 03:47:55,562 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.793e+01 9.358e+01 9.884e+01 2.727e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-18 03:47:57,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=362400.0, ans=0.125 +2024-09-18 03:48:13,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=362440.0, ans=0.125 +2024-09-18 03:48:46,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=362560.0, ans=0.025 +2024-09-18 03:48:49,452 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:48:50,110 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.02 vs. limit=22.5 +2024-09-18 03:49:01,016 INFO [train.py:1198] (0/2) Epoch 21, batch 150, loss[loss=0.2134, ctc_loss=0.104, cr_loss=0.317, attn_decoder_loss=0.2185, over 29427.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1363, cr_loss=0.3803, attn_decoder_loss=0.2509, over 3048374.36 frames. ], batch size: 70, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:49:10,456 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:49:21,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=362640.0, ans=0.125 +2024-09-18 03:49:25,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=362640.0, ans=0.125 +2024-09-18 03:49:54,548 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.65 vs. limit=15.0 +2024-09-18 03:50:18,547 INFO [train.py:1198] (0/2) Epoch 21, batch 200, loss[loss=0.2622, ctc_loss=0.153, cr_loss=0.4061, attn_decoder_loss=0.2653, over 27535.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1361, cr_loss=0.3799, attn_decoder_loss=0.2502, over 3660214.80 frames. ], batch size: 124, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:50:21,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=362800.0, ans=0.09899494936611666 +2024-09-18 03:50:27,602 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.138e+01 8.461e+01 9.001e+01 9.601e+01 1.394e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-18 03:50:28,918 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.05 vs. limit=8.0 +2024-09-18 03:50:29,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=362800.0, ans=0.125 +2024-09-18 03:50:29,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.86 vs. limit=6.0 +2024-09-18 03:50:32,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=362840.0, ans=0.0 +2024-09-18 03:50:37,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=362840.0, ans=0.125 +2024-09-18 03:50:38,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=362840.0, ans=0.04949747468305833 +2024-09-18 03:51:34,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=362960.0, ans=0.0 +2024-09-18 03:51:37,236 INFO [train.py:1198] (0/2) Epoch 21, batch 250, loss[loss=0.2607, ctc_loss=0.1476, cr_loss=0.3991, attn_decoder_loss=0.2643, over 29256.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1355, cr_loss=0.3787, attn_decoder_loss=0.2499, over 4142547.94 frames. ], batch size: 100, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:51:39,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=363000.0, ans=0.07 +2024-09-18 03:51:46,462 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:51:47,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=363000.0, ans=0.1 +2024-09-18 03:51:55,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=363040.0, ans=10.0 +2024-09-18 03:52:02,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.53 vs. limit=15.0 +2024-09-18 03:52:04,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=363040.0, ans=0.025 +2024-09-18 03:52:04,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=363040.0, ans=0.125 +2024-09-18 03:52:19,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=363080.0, ans=0.5 +2024-09-18 03:52:29,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=363120.0, ans=0.1 +2024-09-18 03:52:50,429 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:52:52,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=363200.0, ans=0.125 +2024-09-18 03:52:53,505 INFO [train.py:1198] (0/2) Epoch 21, batch 300, loss[loss=0.2644, ctc_loss=0.155, cr_loss=0.4306, attn_decoder_loss=0.267, over 29512.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1356, cr_loss=0.3789, attn_decoder_loss=0.2499, over 4511800.60 frames. ], batch size: 92, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:53:02,585 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.788e+01 8.424e+01 9.085e+01 9.553e+01 2.134e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-18 03:53:10,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=363240.0, ans=0.2 +2024-09-18 03:53:15,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=363240.0, ans=0.0 +2024-09-18 03:53:30,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=363280.0, ans=0.1 +2024-09-18 03:53:36,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=363280.0, ans=0.125 +2024-09-18 03:53:43,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=363320.0, ans=0.0 +2024-09-18 03:53:56,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=363360.0, ans=0.125 +2024-09-18 03:54:11,641 INFO [train.py:1198] (0/2) Epoch 21, batch 350, loss[loss=0.216, ctc_loss=0.1061, cr_loss=0.3084, attn_decoder_loss=0.2213, over 29324.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.136, cr_loss=0.3797, attn_decoder_loss=0.2503, over 4797821.13 frames. ], batch size: 71, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:54:17,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=363400.0, ans=0.125 +2024-09-18 03:54:28,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=363440.0, ans=0.0 +2024-09-18 03:54:37,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=363440.0, ans=0.125 +2024-09-18 03:54:56,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=363480.0, ans=0.04949747468305833 +2024-09-18 03:55:23,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.36 vs. limit=15.0 +2024-09-18 03:55:25,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=363560.0, ans=0.0 +2024-09-18 03:55:29,551 INFO [train.py:1198] (0/2) Epoch 21, batch 400, loss[loss=0.2536, ctc_loss=0.1421, cr_loss=0.3881, attn_decoder_loss=0.2573, over 29692.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1356, cr_loss=0.3789, attn_decoder_loss=0.2497, over 5026006.51 frames. ], batch size: 82, lr: 5.36e-03, grad_scale: 16.0 +2024-09-18 03:55:38,689 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.497e+01 9.045e+01 9.813e+01 2.448e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 03:55:57,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=363640.0, ans=0.0 +2024-09-18 03:56:15,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=363720.0, ans=0.0 +2024-09-18 03:56:39,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=363760.0, ans=0.0 +2024-09-18 03:56:45,199 INFO [train.py:1198] (0/2) Epoch 21, batch 450, loss[loss=0.2692, ctc_loss=0.1566, cr_loss=0.4096, attn_decoder_loss=0.2726, over 29684.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1359, cr_loss=0.379, attn_decoder_loss=0.25, over 5189002.55 frames. ], batch size: 83, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:56:45,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=363800.0, ans=0.025 +2024-09-18 03:56:50,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=363800.0, ans=0.0 +2024-09-18 03:56:57,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=363800.0, ans=0.125 +2024-09-18 03:56:57,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=363800.0, ans=0.125 +2024-09-18 03:57:00,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=363840.0, ans=0.2 +2024-09-18 03:57:12,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=363840.0, ans=0.125 +2024-09-18 03:57:45,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=363960.0, ans=0.125 +2024-09-18 03:58:01,733 INFO [train.py:1198] (0/2) Epoch 21, batch 500, loss[loss=0.2593, ctc_loss=0.1432, cr_loss=0.3987, attn_decoder_loss=0.2633, over 29420.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1351, cr_loss=0.3777, attn_decoder_loss=0.2492, over 5331923.08 frames. ], batch size: 94, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 03:58:08,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=364000.0, ans=0.0 +2024-09-18 03:58:14,711 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.455e+01 8.968e+01 9.588e+01 2.224e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-18 03:58:21,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=364040.0, ans=0.0 +2024-09-18 03:58:37,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=364080.0, ans=0.025 +2024-09-18 03:59:07,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=364160.0, ans=0.07 +2024-09-18 03:59:14,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=364160.0, ans=0.125 +2024-09-18 03:59:22,169 INFO [train.py:1198] (0/2) Epoch 21, batch 550, loss[loss=0.2494, ctc_loss=0.1394, cr_loss=0.3659, attn_decoder_loss=0.2535, over 28779.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1351, cr_loss=0.3775, attn_decoder_loss=0.2491, over 5424965.66 frames. ], batch size: 104, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 03:59:23,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=364200.0, ans=22.5 +2024-09-18 03:59:28,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=364200.0, ans=0.1 +2024-09-18 03:59:33,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=364200.0, ans=0.025 +2024-09-18 04:00:06,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=364320.0, ans=0.1 +2024-09-18 04:00:11,733 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.87 vs. limit=15.0 +2024-09-18 04:00:12,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=364320.0, ans=0.125 +2024-09-18 04:00:31,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=364360.0, ans=0.05 +2024-09-18 04:00:31,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.68 vs. limit=6.0 +2024-09-18 04:00:38,401 INFO [train.py:1198] (0/2) Epoch 21, batch 600, loss[loss=0.2581, ctc_loss=0.1482, cr_loss=0.3993, attn_decoder_loss=0.2615, over 29200.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1355, cr_loss=0.3784, attn_decoder_loss=0.2496, over 5509218.45 frames. ], batch size: 100, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:00:43,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=364400.0, ans=0.2 +2024-09-18 04:00:48,932 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.547e+01 9.115e+01 9.764e+01 2.691e+02, threshold=1.823e+02, percent-clipped=3.0 +2024-09-18 04:01:10,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=364480.0, ans=0.2 +2024-09-18 04:01:19,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=364480.0, ans=0.1 +2024-09-18 04:01:22,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=364520.0, ans=0.0 +2024-09-18 04:01:28,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=364520.0, ans=0.125 +2024-09-18 04:01:34,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=364520.0, ans=0.1 +2024-09-18 04:01:53,835 INFO [train.py:1198] (0/2) Epoch 21, batch 650, loss[loss=0.2513, ctc_loss=0.1373, cr_loss=0.3884, attn_decoder_loss=0.2554, over 29762.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1343, cr_loss=0.3768, attn_decoder_loss=0.2488, over 5586571.95 frames. ], batch size: 81, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:02:01,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=364600.0, ans=0.0 +2024-09-18 04:02:06,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=364600.0, ans=0.125 +2024-09-18 04:02:07,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=364640.0, ans=0.1 +2024-09-18 04:02:11,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=364640.0, ans=0.025 +2024-09-18 04:02:13,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=364640.0, ans=0.0 +2024-09-18 04:02:13,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=364640.0, ans=0.025 +2024-09-18 04:02:29,205 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.46 vs. limit=15.0 +2024-09-18 04:03:14,773 INFO [train.py:1198] (0/2) Epoch 21, batch 700, loss[loss=0.2437, ctc_loss=0.137, cr_loss=0.3927, attn_decoder_loss=0.2469, over 29544.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1349, cr_loss=0.3776, attn_decoder_loss=0.2495, over 5636277.38 frames. ], batch size: 76, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:03:21,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=364800.0, ans=0.125 +2024-09-18 04:03:25,121 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.583e+01 8.553e+01 9.088e+01 9.665e+01 1.426e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 04:03:28,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=364840.0, ans=0.025 +2024-09-18 04:04:03,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=364920.0, ans=0.125 +2024-09-18 04:04:04,286 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.79 vs. limit=12.0 +2024-09-18 04:04:11,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.88 vs. limit=15.0 +2024-09-18 04:04:18,730 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:04:26,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=364960.0, ans=0.2 +2024-09-18 04:04:26,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=364960.0, ans=0.125 +2024-09-18 04:04:30,490 INFO [train.py:1198] (0/2) Epoch 21, batch 750, loss[loss=0.2441, ctc_loss=0.135, cr_loss=0.3783, attn_decoder_loss=0.2478, over 29712.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1347, cr_loss=0.3769, attn_decoder_loss=0.2492, over 5675264.33 frames. ], batch size: 82, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:04:32,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=365000.0, ans=0.125 +2024-09-18 04:04:37,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.22 vs. limit=15.0 +2024-09-18 04:04:41,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=365000.0, ans=0.2 +2024-09-18 04:04:46,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.32 vs. limit=15.0 +2024-09-18 04:04:56,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=365040.0, ans=0.125 +2024-09-18 04:04:57,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=365040.0, ans=0.125 +2024-09-18 04:05:03,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=365080.0, ans=0.125 +2024-09-18 04:05:03,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=365080.0, ans=0.1 +2024-09-18 04:05:13,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=365080.0, ans=0.125 +2024-09-18 04:05:14,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=365120.0, ans=0.0 +2024-09-18 04:05:16,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.65 vs. limit=15.0 +2024-09-18 04:05:24,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.81 vs. limit=22.5 +2024-09-18 04:05:25,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=365120.0, ans=0.2 +2024-09-18 04:05:29,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=365160.0, ans=0.1 +2024-09-18 04:05:31,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=365160.0, ans=0.1 +2024-09-18 04:05:46,211 INFO [train.py:1198] (0/2) Epoch 21, batch 800, loss[loss=0.2328, ctc_loss=0.1244, cr_loss=0.3493, attn_decoder_loss=0.237, over 29624.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1342, cr_loss=0.3764, attn_decoder_loss=0.2489, over 5706105.37 frames. ], batch size: 73, lr: 5.35e-03, grad_scale: 16.0 +2024-09-18 04:05:51,887 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.62 vs. limit=15.0 +2024-09-18 04:05:56,668 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.575e+01 9.275e+01 9.797e+01 6.839e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-18 04:06:03,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=365240.0, ans=0.2 +2024-09-18 04:06:09,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.64 vs. limit=12.0 +2024-09-18 04:06:14,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=365240.0, ans=0.1 +2024-09-18 04:06:16,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=15.18 vs. limit=15.0 +2024-09-18 04:06:44,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.09 vs. limit=22.5 +2024-09-18 04:07:06,319 INFO [train.py:1198] (0/2) Epoch 21, batch 850, loss[loss=0.2523, ctc_loss=0.1347, cr_loss=0.3792, attn_decoder_loss=0.2569, over 29725.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1341, cr_loss=0.3759, attn_decoder_loss=0.2488, over 5736131.57 frames. ], batch size: 89, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:07:12,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=365400.0, ans=0.0 +2024-09-18 04:07:49,355 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.66 vs. limit=15.0 +2024-09-18 04:07:50,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=365520.0, ans=0.0 +2024-09-18 04:08:04,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=365520.0, ans=0.125 +2024-09-18 04:08:18,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=365560.0, ans=0.0 +2024-09-18 04:08:22,631 INFO [train.py:1198] (0/2) Epoch 21, batch 900, loss[loss=0.2274, ctc_loss=0.1249, cr_loss=0.3463, attn_decoder_loss=0.231, over 29586.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1341, cr_loss=0.3759, attn_decoder_loss=0.2491, over 5739978.17 frames. ], batch size: 73, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:08:34,648 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.573e+01 9.119e+01 9.639e+01 3.066e+02, threshold=1.824e+02, percent-clipped=3.0 +2024-09-18 04:08:48,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=365640.0, ans=0.0 +2024-09-18 04:08:51,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=365680.0, ans=0.1 +2024-09-18 04:08:53,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=365680.0, ans=0.125 +2024-09-18 04:08:57,127 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=16.13 vs. limit=22.5 +2024-09-18 04:09:23,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=365760.0, ans=0.125 +2024-09-18 04:09:35,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=365760.0, ans=0.95 +2024-09-18 04:09:38,096 INFO [train.py:1198] (0/2) Epoch 21, batch 950, loss[loss=0.2284, ctc_loss=0.121, cr_loss=0.3508, attn_decoder_loss=0.2325, over 29523.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1344, cr_loss=0.3761, attn_decoder_loss=0.2493, over 5741870.51 frames. ], batch size: 74, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:10:10,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=365880.0, ans=0.1 +2024-09-18 04:10:26,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=365920.0, ans=0.015 +2024-09-18 04:10:47,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=365960.0, ans=0.125 +2024-09-18 04:10:53,254 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.16 vs. limit=15.0 +2024-09-18 04:10:58,218 INFO [train.py:1198] (0/2) Epoch 21, batch 1000, loss[loss=0.237, ctc_loss=0.1223, cr_loss=0.3637, attn_decoder_loss=0.2417, over 29525.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1355, cr_loss=0.3775, attn_decoder_loss=0.2502, over 5737720.63 frames. ], batch size: 77, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:11:10,224 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.656e+01 8.708e+01 9.150e+01 9.911e+01 2.107e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-18 04:11:25,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=366040.0, ans=0.125 +2024-09-18 04:11:39,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=366080.0, ans=0.95 +2024-09-18 04:11:44,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=366120.0, ans=0.0 +2024-09-18 04:12:13,877 INFO [train.py:1198] (0/2) Epoch 21, batch 1050, loss[loss=0.2422, ctc_loss=0.1187, cr_loss=0.3332, attn_decoder_loss=0.2485, over 29680.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1351, cr_loss=0.3768, attn_decoder_loss=0.2496, over 5744884.25 frames. ], batch size: 85, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:12:49,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=366280.0, ans=0.125 +2024-09-18 04:13:30,146 INFO [train.py:1198] (0/2) Epoch 21, batch 1100, loss[loss=0.2345, ctc_loss=0.1221, cr_loss=0.3579, attn_decoder_loss=0.239, over 29428.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1347, cr_loss=0.3768, attn_decoder_loss=0.2493, over 5756321.89 frames. ], batch size: 78, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:13:37,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=366400.0, ans=0.1 +2024-09-18 04:13:42,196 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.489e+01 9.148e+01 9.741e+01 7.755e+02, threshold=1.830e+02, percent-clipped=3.0 +2024-09-18 04:13:53,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.64 vs. limit=8.0 +2024-09-18 04:14:12,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=366480.0, ans=0.2 +2024-09-18 04:14:35,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=366560.0, ans=0.125 +2024-09-18 04:14:50,298 INFO [train.py:1198] (0/2) Epoch 21, batch 1150, loss[loss=0.2448, ctc_loss=0.1362, cr_loss=0.3895, attn_decoder_loss=0.2483, over 29437.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1346, cr_loss=0.3765, attn_decoder_loss=0.2492, over 5754288.63 frames. ], batch size: 78, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:15:34,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=366720.0, ans=0.125 +2024-09-18 04:15:58,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=366760.0, ans=0.125 +2024-09-18 04:16:05,938 INFO [train.py:1198] (0/2) Epoch 21, batch 1200, loss[loss=0.2609, ctc_loss=0.1439, cr_loss=0.389, attn_decoder_loss=0.2653, over 29670.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1352, cr_loss=0.3776, attn_decoder_loss=0.25, over 5746495.98 frames. ], batch size: 85, lr: 5.33e-03, grad_scale: 16.0 +2024-09-18 04:16:06,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=366800.0, ans=0.125 +2024-09-18 04:16:10,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.21 vs. limit=15.0 +2024-09-18 04:16:13,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=366800.0, ans=0.5 +2024-09-18 04:16:13,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=366800.0, ans=0.125 +2024-09-18 04:16:19,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.603e+01 9.203e+01 9.910e+01 1.694e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-18 04:16:20,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=366840.0, ans=0.125 +2024-09-18 04:16:20,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=366840.0, ans=0.125 +2024-09-18 04:16:21,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=366840.0, ans=0.025 +2024-09-18 04:16:24,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=366840.0, ans=0.1 +2024-09-18 04:16:27,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=366840.0, ans=0.125 +2024-09-18 04:17:20,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=367000.0, ans=0.125 +2024-09-18 04:17:22,070 INFO [train.py:1198] (0/2) Epoch 21, batch 1250, loss[loss=0.2676, ctc_loss=0.1571, cr_loss=0.4356, attn_decoder_loss=0.2702, over 29544.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1355, cr_loss=0.3787, attn_decoder_loss=0.2504, over 5773152.46 frames. ], batch size: 92, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:17:52,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=367080.0, ans=0.1 +2024-09-18 04:17:56,834 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.10 vs. limit=22.5 +2024-09-18 04:18:34,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=367160.0, ans=0.125 +2024-09-18 04:18:40,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.25 vs. limit=15.0 +2024-09-18 04:18:41,094 INFO [train.py:1198] (0/2) Epoch 21, batch 1300, loss[loss=0.2528, ctc_loss=0.1407, cr_loss=0.3992, attn_decoder_loss=0.2563, over 28318.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1352, cr_loss=0.3776, attn_decoder_loss=0.2499, over 5777995.78 frames. ], batch size: 112, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:18:52,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=367200.0, ans=0.125 +2024-09-18 04:18:54,777 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.475e+01 9.131e+01 9.688e+01 1.292e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-18 04:19:03,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.53 vs. limit=6.0 +2024-09-18 04:19:23,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=367280.0, ans=0.0 +2024-09-18 04:19:26,031 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.19 vs. limit=15.0 +2024-09-18 04:19:29,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.42 vs. limit=15.0 +2024-09-18 04:19:30,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=367320.0, ans=0.0 +2024-09-18 04:19:40,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=367360.0, ans=0.0 +2024-09-18 04:19:57,012 INFO [train.py:1198] (0/2) Epoch 21, batch 1350, loss[loss=0.2492, ctc_loss=0.1382, cr_loss=0.4028, attn_decoder_loss=0.2525, over 29746.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1348, cr_loss=0.3772, attn_decoder_loss=0.2496, over 5795295.43 frames. ], batch size: 81, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:20:18,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=367440.0, ans=0.035 +2024-09-18 04:20:36,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=367480.0, ans=0.0 +2024-09-18 04:20:45,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=367520.0, ans=0.125 +2024-09-18 04:20:45,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=367520.0, ans=0.125 +2024-09-18 04:20:55,373 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.20 vs. limit=12.0 +2024-09-18 04:21:12,480 INFO [train.py:1198] (0/2) Epoch 21, batch 1400, loss[loss=0.2159, ctc_loss=0.1177, cr_loss=0.346, attn_decoder_loss=0.2192, over 29596.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1348, cr_loss=0.3773, attn_decoder_loss=0.2494, over 5807043.58 frames. ], batch size: 69, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:21:24,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=367600.0, ans=0.2 +2024-09-18 04:21:25,902 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.911e+01 8.438e+01 9.001e+01 9.853e+01 2.309e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 04:21:29,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=367640.0, ans=0.2 +2024-09-18 04:21:56,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=367720.0, ans=0.025 +2024-09-18 04:21:59,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=367720.0, ans=0.0 +2024-09-18 04:22:14,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=367720.0, ans=0.125 +2024-09-18 04:22:31,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=367800.0, ans=0.0 +2024-09-18 04:22:32,120 INFO [train.py:1198] (0/2) Epoch 21, batch 1450, loss[loss=0.2623, ctc_loss=0.1504, cr_loss=0.4141, attn_decoder_loss=0.2656, over 29418.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1348, cr_loss=0.3777, attn_decoder_loss=0.2496, over 5803048.04 frames. ], batch size: 94, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:22:52,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=367840.0, ans=0.05 +2024-09-18 04:23:07,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=15.99 vs. limit=15.0 +2024-09-18 04:23:17,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=367920.0, ans=0.1 +2024-09-18 04:23:19,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=367920.0, ans=0.2 +2024-09-18 04:23:37,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=367960.0, ans=0.125 +2024-09-18 04:23:46,784 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-92000.pt +2024-09-18 04:23:55,039 INFO [train.py:1198] (0/2) Epoch 21, batch 1500, loss[loss=0.251, ctc_loss=0.1369, cr_loss=0.3836, attn_decoder_loss=0.2552, over 29608.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1352, cr_loss=0.3784, attn_decoder_loss=0.2501, over 5804032.41 frames. ], batch size: 86, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:24:08,787 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.602e+01 9.157e+01 9.632e+01 2.068e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-18 04:24:27,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=368080.0, ans=0.1 +2024-09-18 04:24:28,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.93 vs. limit=15.0 +2024-09-18 04:24:32,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=368080.0, ans=0.07 +2024-09-18 04:24:34,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.75 vs. limit=10.0 +2024-09-18 04:24:47,494 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:24:55,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=368160.0, ans=0.0 +2024-09-18 04:24:56,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=368160.0, ans=0.0 +2024-09-18 04:25:11,419 INFO [train.py:1198] (0/2) Epoch 21, batch 1550, loss[loss=0.2681, ctc_loss=0.1524, cr_loss=0.4118, attn_decoder_loss=0.2718, over 29504.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1358, cr_loss=0.3792, attn_decoder_loss=0.2503, over 5780367.76 frames. ], batch size: 90, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:25:16,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=368200.0, ans=0.0 +2024-09-18 04:25:21,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.06 vs. limit=15.0 +2024-09-18 04:25:28,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=368240.0, ans=0.125 +2024-09-18 04:25:31,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.69 vs. limit=15.0 +2024-09-18 04:25:40,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=368280.0, ans=0.2 +2024-09-18 04:25:57,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=368320.0, ans=0.125 +2024-09-18 04:26:14,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.01 vs. limit=15.0 +2024-09-18 04:26:17,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=12.0 +2024-09-18 04:26:24,913 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.42 vs. limit=15.0 +2024-09-18 04:26:31,469 INFO [train.py:1198] (0/2) Epoch 21, batch 1600, loss[loss=0.2542, ctc_loss=0.1386, cr_loss=0.3837, attn_decoder_loss=0.2585, over 29670.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1358, cr_loss=0.3792, attn_decoder_loss=0.25, over 5764326.21 frames. ], batch size: 85, lr: 5.32e-03, grad_scale: 16.0 +2024-09-18 04:26:33,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=368400.0, ans=0.125 +2024-09-18 04:26:39,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=368400.0, ans=0.125 +2024-09-18 04:26:39,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=368400.0, ans=0.025 +2024-09-18 04:26:46,820 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.519e+01 9.030e+01 9.960e+01 2.636e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 04:27:23,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=368520.0, ans=0.0 +2024-09-18 04:27:29,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=368520.0, ans=0.05 +2024-09-18 04:27:41,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=368560.0, ans=0.04949747468305833 +2024-09-18 04:27:47,304 INFO [train.py:1198] (0/2) Epoch 21, batch 1650, loss[loss=0.2437, ctc_loss=0.121, cr_loss=0.3539, attn_decoder_loss=0.2494, over 29704.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1354, cr_loss=0.3786, attn_decoder_loss=0.2495, over 5758713.34 frames. ], batch size: 89, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:27:53,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=368600.0, ans=0.125 +2024-09-18 04:28:02,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=368640.0, ans=0.125 +2024-09-18 04:28:33,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=368720.0, ans=0.125 +2024-09-18 04:28:47,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.35 vs. limit=6.0 +2024-09-18 04:28:50,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.37 vs. limit=15.0 +2024-09-18 04:29:03,548 INFO [train.py:1198] (0/2) Epoch 21, batch 1700, loss[loss=0.2152, ctc_loss=0.1165, cr_loss=0.3589, attn_decoder_loss=0.2182, over 29575.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1346, cr_loss=0.3774, attn_decoder_loss=0.2493, over 5782511.51 frames. ], batch size: 69, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:29:18,920 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.057e+01 8.456e+01 9.072e+01 9.555e+01 1.411e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 04:30:08,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=368960.0, ans=0.025 +2024-09-18 04:30:10,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=368960.0, ans=0.0 +2024-09-18 04:30:13,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=368960.0, ans=0.125 +2024-09-18 04:30:19,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=368960.0, ans=0.0 +2024-09-18 04:30:23,632 INFO [train.py:1198] (0/2) Epoch 21, batch 1750, loss[loss=0.2159, ctc_loss=0.111, cr_loss=0.3279, attn_decoder_loss=0.2203, over 29384.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1346, cr_loss=0.3782, attn_decoder_loss=0.2494, over 5789846.95 frames. ], batch size: 67, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:30:24,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=369000.0, ans=0.125 +2024-09-18 04:30:35,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=369000.0, ans=0.0 +2024-09-18 04:30:51,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=369040.0, ans=0.125 +2024-09-18 04:30:55,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=369080.0, ans=0.125 +2024-09-18 04:30:55,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=369080.0, ans=0.05 +2024-09-18 04:31:32,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.44 vs. limit=15.0 +2024-09-18 04:31:37,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=369200.0, ans=0.1 +2024-09-18 04:31:38,782 INFO [train.py:1198] (0/2) Epoch 21, batch 1800, loss[loss=0.2666, ctc_loss=0.1566, cr_loss=0.4359, attn_decoder_loss=0.2692, over 29683.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1344, cr_loss=0.3773, attn_decoder_loss=0.2492, over 5792322.69 frames. ], batch size: 83, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:31:47,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.47 vs. limit=15.0 +2024-09-18 04:31:49,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=369200.0, ans=0.125 +2024-09-18 04:31:51,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=369200.0, ans=0.0 +2024-09-18 04:31:54,056 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.570e+01 9.201e+01 9.986e+01 1.467e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-18 04:32:11,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.87 vs. limit=15.0 +2024-09-18 04:32:20,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=369280.0, ans=0.1 +2024-09-18 04:32:54,989 INFO [train.py:1198] (0/2) Epoch 21, batch 1850, loss[loss=0.2527, ctc_loss=0.1371, cr_loss=0.3844, attn_decoder_loss=0.257, over 29628.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1346, cr_loss=0.3777, attn_decoder_loss=0.2492, over 5797270.39 frames. ], batch size: 86, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:33:11,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=369440.0, ans=0.125 +2024-09-18 04:33:21,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=369440.0, ans=0.0 +2024-09-18 04:33:26,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.24 vs. limit=15.0 +2024-09-18 04:33:46,617 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.38 vs. limit=15.0 +2024-09-18 04:34:12,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=369560.0, ans=0.125 +2024-09-18 04:34:12,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=369560.0, ans=0.125 +2024-09-18 04:34:15,227 INFO [train.py:1198] (0/2) Epoch 21, batch 1900, loss[loss=0.2619, ctc_loss=0.143, cr_loss=0.4013, attn_decoder_loss=0.2662, over 29718.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1352, cr_loss=0.3784, attn_decoder_loss=0.2501, over 5805525.55 frames. ], batch size: 89, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:34:17,742 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.76 vs. limit=15.0 +2024-09-18 04:34:30,317 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.618e+01 9.006e+01 9.728e+01 3.211e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-18 04:35:13,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=369720.0, ans=0.125 +2024-09-18 04:35:25,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=369760.0, ans=0.2 +2024-09-18 04:35:31,126 INFO [train.py:1198] (0/2) Epoch 21, batch 1950, loss[loss=0.2425, ctc_loss=0.1335, cr_loss=0.3849, attn_decoder_loss=0.2461, over 29434.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.136, cr_loss=0.3802, attn_decoder_loss=0.251, over 5820038.99 frames. ], batch size: 78, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:35:40,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=369800.0, ans=0.125 +2024-09-18 04:36:00,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=369880.0, ans=0.07 +2024-09-18 04:36:05,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.04 vs. limit=10.0 +2024-09-18 04:36:45,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=370000.0, ans=0.125 +2024-09-18 04:36:46,465 INFO [train.py:1198] (0/2) Epoch 21, batch 2000, loss[loss=0.2208, ctc_loss=0.1189, cr_loss=0.3429, attn_decoder_loss=0.2245, over 29340.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1362, cr_loss=0.3802, attn_decoder_loss=0.2511, over 5798736.27 frames. ], batch size: 67, lr: 5.31e-03, grad_scale: 16.0 +2024-09-18 04:37:01,560 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.001e+01 8.831e+01 9.227e+01 9.765e+01 5.439e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-18 04:37:04,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=370040.0, ans=0.0 +2024-09-18 04:37:49,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.79 vs. limit=12.0 +2024-09-18 04:38:05,839 INFO [train.py:1198] (0/2) Epoch 21, batch 2050, loss[loss=0.2112, ctc_loss=0.1036, cr_loss=0.3155, attn_decoder_loss=0.2161, over 29405.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1352, cr_loss=0.378, attn_decoder_loss=0.2499, over 5788255.34 frames. ], batch size: 70, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:38:12,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=370200.0, ans=0.125 +2024-09-18 04:38:33,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=370240.0, ans=0.0 +2024-09-18 04:38:39,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=370280.0, ans=0.125 +2024-09-18 04:38:44,072 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:38:44,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.22 vs. limit=22.5 +2024-09-18 04:39:21,719 INFO [train.py:1198] (0/2) Epoch 21, batch 2100, loss[loss=0.239, ctc_loss=0.1282, cr_loss=0.35, attn_decoder_loss=0.2435, over 29756.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1351, cr_loss=0.3776, attn_decoder_loss=0.2497, over 5800597.69 frames. ], batch size: 81, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:39:38,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.297e+01 8.835e+01 9.326e+01 1.551e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 04:40:10,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=370520.0, ans=0.125 +2024-09-18 04:40:11,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=370520.0, ans=0.1 +2024-09-18 04:40:33,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.34 vs. limit=15.0 +2024-09-18 04:40:37,246 INFO [train.py:1198] (0/2) Epoch 21, batch 2150, loss[loss=0.237, ctc_loss=0.1317, cr_loss=0.3855, attn_decoder_loss=0.2402, over 29463.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1344, cr_loss=0.3765, attn_decoder_loss=0.2491, over 5815216.16 frames. ], batch size: 78, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:41:00,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=370640.0, ans=0.0 +2024-09-18 04:41:03,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=370640.0, ans=0.125 +2024-09-18 04:41:11,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.09 vs. limit=15.0 +2024-09-18 04:41:20,504 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.36 vs. limit=6.0 +2024-09-18 04:41:54,703 INFO [train.py:1198] (0/2) Epoch 21, batch 2200, loss[loss=0.252, ctc_loss=0.1367, cr_loss=0.364, attn_decoder_loss=0.2567, over 29640.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.135, cr_loss=0.3775, attn_decoder_loss=0.2494, over 5811852.56 frames. ], batch size: 86, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:42:02,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.91 vs. limit=6.0 +2024-09-18 04:42:13,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.585e+01 9.031e+01 9.683e+01 2.928e+02, threshold=1.806e+02, percent-clipped=3.0 +2024-09-18 04:42:27,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=370880.0, ans=0.025 +2024-09-18 04:42:50,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=370920.0, ans=0.125 +2024-09-18 04:42:52,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.66 vs. limit=15.0 +2024-09-18 04:43:12,504 INFO [train.py:1198] (0/2) Epoch 21, batch 2250, loss[loss=0.2617, ctc_loss=0.149, cr_loss=0.4164, attn_decoder_loss=0.265, over 29713.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1347, cr_loss=0.3768, attn_decoder_loss=0.2494, over 5810428.16 frames. ], batch size: 82, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:43:20,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=371000.0, ans=0.1 +2024-09-18 04:43:24,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=371000.0, ans=0.125 +2024-09-18 04:43:33,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=371040.0, ans=0.0 +2024-09-18 04:43:35,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=371040.0, ans=0.125 +2024-09-18 04:43:54,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=371080.0, ans=0.1 +2024-09-18 04:44:06,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.14 vs. limit=22.5 +2024-09-18 04:44:28,725 INFO [train.py:1198] (0/2) Epoch 21, batch 2300, loss[loss=0.2097, ctc_loss=0.1065, cr_loss=0.3099, attn_decoder_loss=0.2142, over 29308.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1339, cr_loss=0.3749, attn_decoder_loss=0.2483, over 5799808.97 frames. ], batch size: 71, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:44:33,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=371200.0, ans=0.125 +2024-09-18 04:44:40,305 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.73 vs. limit=15.0 +2024-09-18 04:44:45,172 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.457e+01 8.450e+01 8.935e+01 9.776e+01 2.210e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 04:45:10,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.17 vs. limit=22.5 +2024-09-18 04:45:12,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=371320.0, ans=0.0 +2024-09-18 04:45:31,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=371360.0, ans=0.125 +2024-09-18 04:45:46,540 INFO [train.py:1198] (0/2) Epoch 21, batch 2350, loss[loss=0.2625, ctc_loss=0.1527, cr_loss=0.4052, attn_decoder_loss=0.2657, over 29697.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1339, cr_loss=0.3755, attn_decoder_loss=0.2485, over 5805105.83 frames. ], batch size: 83, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:45:54,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=371400.0, ans=0.1 +2024-09-18 04:45:55,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=371400.0, ans=0.2 +2024-09-18 04:45:58,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=371400.0, ans=0.04949747468305833 +2024-09-18 04:46:02,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.04 vs. limit=15.0 +2024-09-18 04:46:08,880 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:46:22,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=371480.0, ans=0.0 +2024-09-18 04:46:36,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=371520.0, ans=0.05 +2024-09-18 04:47:00,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=371560.0, ans=0.125 +2024-09-18 04:47:04,812 INFO [train.py:1198] (0/2) Epoch 21, batch 2400, loss[loss=0.2303, ctc_loss=0.1245, cr_loss=0.3388, attn_decoder_loss=0.2346, over 29556.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1346, cr_loss=0.3772, attn_decoder_loss=0.2491, over 5808906.62 frames. ], batch size: 76, lr: 5.30e-03, grad_scale: 16.0 +2024-09-18 04:47:11,682 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=12.0 +2024-09-18 04:47:21,500 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.073e+01 8.473e+01 9.186e+01 9.665e+01 3.026e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 04:47:37,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=371680.0, ans=0.125 +2024-09-18 04:47:37,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=371680.0, ans=0.125 +2024-09-18 04:47:40,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=371680.0, ans=0.07 +2024-09-18 04:47:55,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=371720.0, ans=0.125 +2024-09-18 04:48:04,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=371760.0, ans=0.125 +2024-09-18 04:48:20,901 INFO [train.py:1198] (0/2) Epoch 21, batch 2450, loss[loss=0.2486, ctc_loss=0.1296, cr_loss=0.3744, attn_decoder_loss=0.2535, over 29699.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1352, cr_loss=0.3777, attn_decoder_loss=0.25, over 5786183.88 frames. ], batch size: 82, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:48:27,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-18 04:48:33,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=371800.0, ans=0.125 +2024-09-18 04:49:14,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.36 vs. limit=22.5 +2024-09-18 04:49:19,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=371920.0, ans=0.125 +2024-09-18 04:49:38,820 INFO [train.py:1198] (0/2) Epoch 21, batch 2500, loss[loss=0.2543, ctc_loss=0.1367, cr_loss=0.3625, attn_decoder_loss=0.2593, over 29627.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1349, cr_loss=0.3775, attn_decoder_loss=0.2497, over 5795549.00 frames. ], batch size: 86, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:49:45,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=372000.0, ans=0.125 +2024-09-18 04:49:55,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=372040.0, ans=0.1 +2024-09-18 04:49:59,136 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.020e+01 8.495e+01 9.101e+01 9.738e+01 1.875e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 04:50:07,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=372040.0, ans=0.0 +2024-09-18 04:50:19,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=372080.0, ans=0.125 +2024-09-18 04:50:57,254 INFO [train.py:1198] (0/2) Epoch 21, batch 2550, loss[loss=0.2214, ctc_loss=0.1122, cr_loss=0.357, attn_decoder_loss=0.2256, over 29308.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1343, cr_loss=0.3767, attn_decoder_loss=0.2496, over 5797809.81 frames. ], batch size: 67, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:51:10,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=372240.0, ans=0.0 +2024-09-18 04:51:15,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=372240.0, ans=0.125 +2024-09-18 04:51:22,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=372240.0, ans=0.125 +2024-09-18 04:51:23,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=372240.0, ans=0.2 +2024-09-18 04:51:30,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=372280.0, ans=0.1 +2024-09-18 04:52:04,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=372360.0, ans=0.125 +2024-09-18 04:52:09,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.32 vs. limit=15.0 +2024-09-18 04:52:13,127 INFO [train.py:1198] (0/2) Epoch 21, batch 2600, loss[loss=0.2404, ctc_loss=0.1301, cr_loss=0.3978, attn_decoder_loss=0.2439, over 29467.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1345, cr_loss=0.3773, attn_decoder_loss=0.2498, over 5794428.99 frames. ], batch size: 78, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:52:16,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=372400.0, ans=0.2 +2024-09-18 04:52:28,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=372440.0, ans=0.125 +2024-09-18 04:52:31,053 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.657e+01 9.187e+01 9.794e+01 2.069e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 04:52:34,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=372440.0, ans=0.125 +2024-09-18 04:52:44,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=372480.0, ans=0.0 +2024-09-18 04:52:54,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=372480.0, ans=0.1 +2024-09-18 04:53:00,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=372520.0, ans=0.125 +2024-09-18 04:53:02,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=372520.0, ans=0.025 +2024-09-18 04:53:02,650 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.05 vs. limit=15.0 +2024-09-18 04:53:08,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=372520.0, ans=0.1 +2024-09-18 04:53:18,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=372560.0, ans=0.125 +2024-09-18 04:53:30,485 INFO [train.py:1198] (0/2) Epoch 21, batch 2650, loss[loss=0.2641, ctc_loss=0.1499, cr_loss=0.4085, attn_decoder_loss=0.2678, over 29302.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1349, cr_loss=0.378, attn_decoder_loss=0.2502, over 5801669.82 frames. ], batch size: 100, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:53:43,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=372600.0, ans=0.05 +2024-09-18 04:53:56,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.47 vs. limit=10.0 +2024-09-18 04:54:01,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=372680.0, ans=0.0 +2024-09-18 04:54:08,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.86 vs. limit=15.0 +2024-09-18 04:54:12,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=372680.0, ans=0.125 +2024-09-18 04:54:15,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.22 vs. limit=22.5 +2024-09-18 04:54:22,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=372720.0, ans=0.125 +2024-09-18 04:54:29,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-18 04:54:31,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=372760.0, ans=0.125 +2024-09-18 04:54:42,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=372760.0, ans=0.0 +2024-09-18 04:54:48,398 INFO [train.py:1198] (0/2) Epoch 21, batch 2700, loss[loss=0.2517, ctc_loss=0.1462, cr_loss=0.3928, attn_decoder_loss=0.2547, over 29544.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1351, cr_loss=0.3785, attn_decoder_loss=0.2506, over 5798070.67 frames. ], batch size: 87, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:54:51,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=372800.0, ans=0.07 +2024-09-18 04:55:06,490 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.257e+01 8.585e+01 9.069e+01 9.661e+01 1.375e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 04:55:12,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=372840.0, ans=0.125 +2024-09-18 04:55:16,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-18 04:55:36,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=372920.0, ans=0.125 +2024-09-18 04:55:55,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=372960.0, ans=0.2 +2024-09-18 04:56:04,540 INFO [train.py:1198] (0/2) Epoch 21, batch 2750, loss[loss=0.2408, ctc_loss=0.136, cr_loss=0.385, attn_decoder_loss=0.2439, over 29493.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1342, cr_loss=0.3768, attn_decoder_loss=0.2493, over 5795978.54 frames. ], batch size: 75, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:56:06,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_ff2.min_abs, batch_count=373000.0, ans=0.1 +2024-09-18 04:56:06,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=373000.0, ans=0.1 +2024-09-18 04:56:29,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=373040.0, ans=0.0 +2024-09-18 04:56:32,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.32 vs. limit=22.5 +2024-09-18 04:56:48,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=373120.0, ans=0.125 +2024-09-18 04:57:17,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=373160.0, ans=0.0 +2024-09-18 04:57:19,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=373160.0, ans=0.125 +2024-09-18 04:57:22,200 INFO [train.py:1198] (0/2) Epoch 21, batch 2800, loss[loss=0.2648, ctc_loss=0.1701, cr_loss=0.3938, attn_decoder_loss=0.2666, over 20423.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1345, cr_loss=0.3771, attn_decoder_loss=0.2495, over 5776161.96 frames. ], batch size: 209, lr: 5.29e-03, grad_scale: 16.0 +2024-09-18 04:57:22,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=373200.0, ans=0.125 +2024-09-18 04:57:22,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=373200.0, ans=0.0 +2024-09-18 04:57:24,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=373200.0, ans=0.0 +2024-09-18 04:57:26,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=373200.0, ans=0.0 +2024-09-18 04:57:43,971 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.642e+01 9.187e+01 1.013e+02 2.371e+02, threshold=1.837e+02, percent-clipped=3.0 +2024-09-18 04:57:54,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=373280.0, ans=0.125 +2024-09-18 04:58:01,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=373280.0, ans=0.0 +2024-09-18 04:58:10,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=373320.0, ans=0.125 +2024-09-18 04:58:19,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=373320.0, ans=0.2 +2024-09-18 04:58:25,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.54 vs. limit=15.0 +2024-09-18 04:58:29,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=373360.0, ans=0.125 +2024-09-18 04:58:35,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=373360.0, ans=0.0 +2024-09-18 04:58:40,012 INFO [train.py:1198] (0/2) Epoch 21, batch 2850, loss[loss=0.2373, ctc_loss=0.1332, cr_loss=0.3872, attn_decoder_loss=0.2403, over 29500.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1353, cr_loss=0.3784, attn_decoder_loss=0.2502, over 5763090.41 frames. ], batch size: 77, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:58:52,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=373400.0, ans=0.1 +2024-09-18 04:59:01,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=373440.0, ans=0.125 +2024-09-18 04:59:21,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=373480.0, ans=0.0 +2024-09-18 04:59:21,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=373480.0, ans=0.125 +2024-09-18 04:59:24,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=373520.0, ans=0.1 +2024-09-18 04:59:36,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.73 vs. limit=15.0 +2024-09-18 04:59:56,343 INFO [train.py:1198] (0/2) Epoch 21, batch 2900, loss[loss=0.2397, ctc_loss=0.1203, cr_loss=0.3394, attn_decoder_loss=0.2454, over 29448.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1362, cr_loss=0.3803, attn_decoder_loss=0.2515, over 5787794.53 frames. ], batch size: 79, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:59:58,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=373600.0, ans=0.125 +2024-09-18 05:00:07,391 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.02 vs. limit=15.0 +2024-09-18 05:00:15,830 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.588e+01 9.125e+01 9.672e+01 3.101e+02, threshold=1.825e+02, percent-clipped=2.0 +2024-09-18 05:00:23,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=373640.0, ans=0.0 +2024-09-18 05:00:36,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=373680.0, ans=0.5 +2024-09-18 05:00:39,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=373680.0, ans=0.0 +2024-09-18 05:01:05,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=373760.0, ans=0.125 +2024-09-18 05:01:11,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=373760.0, ans=0.125 +2024-09-18 05:01:13,987 INFO [train.py:1198] (0/2) Epoch 21, batch 2950, loss[loss=0.2414, ctc_loss=0.1381, cr_loss=0.3949, attn_decoder_loss=0.2441, over 29520.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1349, cr_loss=0.3776, attn_decoder_loss=0.2501, over 5782732.05 frames. ], batch size: 75, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:01:17,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=373800.0, ans=0.025 +2024-09-18 05:01:26,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=373800.0, ans=0.125 +2024-09-18 05:01:30,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=373840.0, ans=0.125 +2024-09-18 05:01:54,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=373880.0, ans=0.07 +2024-09-18 05:02:00,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=373920.0, ans=0.125 +2024-09-18 05:02:06,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=373920.0, ans=0.0 +2024-09-18 05:02:06,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.34 vs. limit=12.0 +2024-09-18 05:02:10,898 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:02:14,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.74 vs. limit=15.0 +2024-09-18 05:02:31,643 INFO [train.py:1198] (0/2) Epoch 21, batch 3000, loss[loss=0.2381, ctc_loss=0.1232, cr_loss=0.3447, attn_decoder_loss=0.2433, over 29763.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1347, cr_loss=0.3776, attn_decoder_loss=0.2498, over 5784376.39 frames. ], batch size: 81, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:02:31,643 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 05:02:50,165 INFO [train.py:1230] (0/2) Epoch 21, validation: loss=0.2116, ctc_loss=0.03952, cr_loss=5.001e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-18 05:02:50,165 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 05:02:56,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=374000.0, ans=0.0 +2024-09-18 05:03:07,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=374040.0, ans=0.0 +2024-09-18 05:03:10,259 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.360e+01 8.620e+01 9.343e+01 9.937e+01 2.049e+02, threshold=1.869e+02, percent-clipped=2.0 +2024-09-18 05:03:23,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.53 vs. limit=15.0 +2024-09-18 05:03:24,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=374080.0, ans=0.125 +2024-09-18 05:03:36,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=374120.0, ans=0.125 +2024-09-18 05:04:06,327 INFO [train.py:1198] (0/2) Epoch 21, batch 3050, loss[loss=0.2358, ctc_loss=0.1261, cr_loss=0.3658, attn_decoder_loss=0.2399, over 29547.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1352, cr_loss=0.3785, attn_decoder_loss=0.2502, over 5778337.82 frames. ], batch size: 76, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:04:14,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=374200.0, ans=0.125 +2024-09-18 05:04:29,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=374240.0, ans=0.0 +2024-09-18 05:04:32,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=374240.0, ans=0.0 +2024-09-18 05:04:50,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=374280.0, ans=0.0 +2024-09-18 05:05:07,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=374360.0, ans=0.0 +2024-09-18 05:05:15,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=374360.0, ans=0.0 +2024-09-18 05:05:26,567 INFO [train.py:1198] (0/2) Epoch 21, batch 3100, loss[loss=0.2656, ctc_loss=0.152, cr_loss=0.4122, attn_decoder_loss=0.2691, over 29273.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1355, cr_loss=0.3795, attn_decoder_loss=0.2503, over 5776717.52 frames. ], batch size: 100, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:05:26,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=374400.0, ans=0.2 +2024-09-18 05:05:45,909 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.504e+01 9.125e+01 9.577e+01 2.431e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 05:05:48,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.32 vs. limit=15.0 +2024-09-18 05:05:50,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=374440.0, ans=0.1 +2024-09-18 05:06:03,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.07 vs. limit=15.0 +2024-09-18 05:06:03,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.35 vs. limit=15.0 +2024-09-18 05:06:21,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.24 vs. limit=15.0 +2024-09-18 05:06:35,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.98 vs. limit=15.0 +2024-09-18 05:06:36,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=374560.0, ans=0.0 +2024-09-18 05:06:36,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=374560.0, ans=0.0 +2024-09-18 05:06:41,991 INFO [train.py:1198] (0/2) Epoch 21, batch 3150, loss[loss=0.2636, ctc_loss=0.1513, cr_loss=0.4108, attn_decoder_loss=0.267, over 28842.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1355, cr_loss=0.379, attn_decoder_loss=0.2503, over 5782803.36 frames. ], batch size: 104, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:06:49,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=374600.0, ans=0.0 +2024-09-18 05:06:58,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.71 vs. limit=22.5 +2024-09-18 05:07:23,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=374680.0, ans=0.5 +2024-09-18 05:07:44,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=374760.0, ans=0.125 +2024-09-18 05:07:47,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=374760.0, ans=0.1 +2024-09-18 05:07:57,657 INFO [train.py:1198] (0/2) Epoch 21, batch 3200, loss[loss=0.2377, ctc_loss=0.1256, cr_loss=0.3625, attn_decoder_loss=0.2421, over 29412.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1348, cr_loss=0.3777, attn_decoder_loss=0.2496, over 5792494.88 frames. ], batch size: 79, lr: 5.28e-03, grad_scale: 16.0 +2024-09-18 05:08:00,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=374800.0, ans=0.125 +2024-09-18 05:08:04,818 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.13 vs. limit=15.0 +2024-09-18 05:08:06,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=374800.0, ans=0.1 +2024-09-18 05:08:20,849 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.806e+01 8.671e+01 9.297e+01 1.015e+02 2.448e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-18 05:08:35,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=374880.0, ans=0.125 +2024-09-18 05:08:38,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.13 vs. limit=15.0 +2024-09-18 05:08:50,257 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.98 vs. limit=10.0 +2024-09-18 05:09:15,156 INFO [train.py:1198] (0/2) Epoch 21, batch 3250, loss[loss=0.2509, ctc_loss=0.13, cr_loss=0.3631, attn_decoder_loss=0.2563, over 29694.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.135, cr_loss=0.3784, attn_decoder_loss=0.2502, over 5799255.11 frames. ], batch size: 84, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:09:33,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=375040.0, ans=0.05 +2024-09-18 05:09:36,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=375040.0, ans=0.0 +2024-09-18 05:09:46,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=375080.0, ans=0.1 +2024-09-18 05:09:46,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=375080.0, ans=0.025 +2024-09-18 05:09:49,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=375080.0, ans=0.125 +2024-09-18 05:10:06,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=375120.0, ans=0.125 +2024-09-18 05:10:26,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=375160.0, ans=0.2 +2024-09-18 05:10:33,521 INFO [train.py:1198] (0/2) Epoch 21, batch 3300, loss[loss=0.2513, ctc_loss=0.1335, cr_loss=0.3561, attn_decoder_loss=0.2565, over 28294.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1342, cr_loss=0.3767, attn_decoder_loss=0.2491, over 5797491.37 frames. ], batch size: 111, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:10:52,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=375240.0, ans=0.0 +2024-09-18 05:10:54,883 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.586e+01 9.172e+01 9.727e+01 2.274e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 05:10:58,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=15.0 +2024-09-18 05:11:37,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=375360.0, ans=0.0 +2024-09-18 05:11:48,833 INFO [train.py:1198] (0/2) Epoch 21, batch 3350, loss[loss=0.2599, ctc_loss=0.1377, cr_loss=0.3905, attn_decoder_loss=0.2648, over 28890.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1349, cr_loss=0.3779, attn_decoder_loss=0.2498, over 5773235.32 frames. ], batch size: 104, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:12:04,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=375440.0, ans=0.5 +2024-09-18 05:12:12,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=375440.0, ans=0.1 +2024-09-18 05:12:36,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=375520.0, ans=0.125 +2024-09-18 05:12:54,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=375560.0, ans=0.0 +2024-09-18 05:13:00,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=375560.0, ans=0.0 +2024-09-18 05:13:05,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=375600.0, ans=0.0 +2024-09-18 05:13:06,609 INFO [train.py:1198] (0/2) Epoch 21, batch 3400, loss[loss=0.2189, ctc_loss=0.1085, cr_loss=0.3326, attn_decoder_loss=0.2238, over 29337.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1349, cr_loss=0.3778, attn_decoder_loss=0.2499, over 5766623.43 frames. ], batch size: 67, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:13:25,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=375640.0, ans=0.125 +2024-09-18 05:13:29,674 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.485e+01 9.062e+01 9.587e+01 1.561e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 05:13:38,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.51 vs. limit=10.0 +2024-09-18 05:13:45,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=375680.0, ans=0.125 +2024-09-18 05:14:02,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=375720.0, ans=0.1 +2024-09-18 05:14:08,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=375760.0, ans=0.025 +2024-09-18 05:14:24,505 INFO [train.py:1198] (0/2) Epoch 21, batch 3450, loss[loss=0.2485, ctc_loss=0.1359, cr_loss=0.3723, attn_decoder_loss=0.2527, over 28304.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1352, cr_loss=0.3787, attn_decoder_loss=0.2502, over 5774468.58 frames. ], batch size: 111, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:14:32,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=375800.0, ans=0.0 +2024-09-18 05:14:57,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=375880.0, ans=0.125 +2024-09-18 05:15:20,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=375920.0, ans=0.1 +2024-09-18 05:15:21,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.00 vs. limit=6.0 +2024-09-18 05:15:37,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=375960.0, ans=0.125 +2024-09-18 05:15:40,542 INFO [train.py:1198] (0/2) Epoch 21, batch 3500, loss[loss=0.2194, ctc_loss=0.1096, cr_loss=0.3308, attn_decoder_loss=0.2242, over 29299.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1348, cr_loss=0.378, attn_decoder_loss=0.2494, over 5776519.19 frames. ], batch size: 71, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:16:03,720 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.357e+01 8.729e+01 9.303e+01 9.808e+01 4.681e+02, threshold=1.861e+02, percent-clipped=2.0 +2024-09-18 05:16:05,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=376040.0, ans=0.05 +2024-09-18 05:16:26,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=376120.0, ans=0.1 +2024-09-18 05:16:27,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=12.0 +2024-09-18 05:16:34,566 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.77 vs. limit=15.0 +2024-09-18 05:16:57,298 INFO [train.py:1198] (0/2) Epoch 21, batch 3550, loss[loss=0.2606, ctc_loss=0.1394, cr_loss=0.39, attn_decoder_loss=0.2654, over 29715.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1344, cr_loss=0.3773, attn_decoder_loss=0.2493, over 5783278.99 frames. ], batch size: 89, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:16:57,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=376200.0, ans=0.1 +2024-09-18 05:17:02,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=376200.0, ans=0.125 +2024-09-18 05:17:04,298 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.22 vs. limit=6.0 +2024-09-18 05:17:19,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=376240.0, ans=0.2 +2024-09-18 05:17:34,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=376280.0, ans=0.05 +2024-09-18 05:18:00,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=376360.0, ans=0.125 +2024-09-18 05:18:13,899 INFO [train.py:1198] (0/2) Epoch 21, batch 3600, loss[loss=0.2313, ctc_loss=0.1187, cr_loss=0.3314, attn_decoder_loss=0.2365, over 29508.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1344, cr_loss=0.3773, attn_decoder_loss=0.2495, over 5793314.89 frames. ], batch size: 77, lr: 5.27e-03, grad_scale: 16.0 +2024-09-18 05:18:29,821 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.45 vs. limit=15.0 +2024-09-18 05:18:34,845 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.337e+01 8.787e+01 9.364e+01 1.302e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 05:18:47,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=12.0 +2024-09-18 05:19:10,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=376520.0, ans=0.0 +2024-09-18 05:19:28,201 INFO [train.py:1198] (0/2) Epoch 21, batch 3650, loss[loss=0.259, ctc_loss=0.143, cr_loss=0.4022, attn_decoder_loss=0.263, over 29497.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.134, cr_loss=0.3765, attn_decoder_loss=0.249, over 5794509.55 frames. ], batch size: 90, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:19:36,021 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:19:38,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=376600.0, ans=0.025 +2024-09-18 05:19:56,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=376680.0, ans=0.0 +2024-09-18 05:20:01,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=376680.0, ans=0.2 +2024-09-18 05:20:20,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=376720.0, ans=0.2 +2024-09-18 05:20:28,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=376760.0, ans=0.2 +2024-09-18 05:20:42,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=376800.0, ans=0.125 +2024-09-18 05:20:43,489 INFO [train.py:1198] (0/2) Epoch 21, batch 3700, loss[loss=0.2511, ctc_loss=0.1319, cr_loss=0.3778, attn_decoder_loss=0.2559, over 29697.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1341, cr_loss=0.3766, attn_decoder_loss=0.2491, over 5804887.64 frames. ], batch size: 84, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:21:05,914 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.517e+01 9.022e+01 9.849e+01 1.949e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-18 05:21:16,657 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:21:23,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=376880.0, ans=0.09899494936611666 +2024-09-18 05:21:33,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.66 vs. limit=22.5 +2024-09-18 05:21:57,754 INFO [train.py:1198] (0/2) Epoch 21, batch 3750, loss[loss=0.2197, ctc_loss=0.1098, cr_loss=0.3279, attn_decoder_loss=0.2246, over 29330.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1342, cr_loss=0.3766, attn_decoder_loss=0.2487, over 5808621.54 frames. ], batch size: 67, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:21:58,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=377000.0, ans=0.025 +2024-09-18 05:22:07,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=377000.0, ans=0.125 +2024-09-18 05:22:07,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=377000.0, ans=0.025 +2024-09-18 05:22:20,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=377040.0, ans=0.0 +2024-09-18 05:22:35,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=377080.0, ans=0.125 +2024-09-18 05:22:40,419 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.10 vs. limit=15.0 +2024-09-18 05:22:43,845 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.24 vs. limit=15.0 +2024-09-18 05:23:14,090 INFO [train.py:1198] (0/2) Epoch 21, batch 3800, loss[loss=0.2527, ctc_loss=0.1427, cr_loss=0.3801, attn_decoder_loss=0.2565, over 29638.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.134, cr_loss=0.3761, attn_decoder_loss=0.2484, over 5798593.74 frames. ], batch size: 86, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:23:36,536 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.558e+01 9.240e+01 9.922e+01 2.766e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 05:23:38,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=15.0 +2024-09-18 05:23:39,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=377240.0, ans=0.04949747468305833 +2024-09-18 05:23:45,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=377280.0, ans=0.125 +2024-09-18 05:23:48,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=377280.0, ans=0.125 +2024-09-18 05:23:59,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn2.whiten.whitening_limit, batch_count=377320.0, ans=22.5 +2024-09-18 05:24:00,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=377320.0, ans=0.0 +2024-09-18 05:24:18,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=377360.0, ans=0.125 +2024-09-18 05:24:20,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.83 vs. limit=15.0 +2024-09-18 05:24:30,327 INFO [train.py:1198] (0/2) Epoch 21, batch 3850, loss[loss=0.2609, ctc_loss=0.1464, cr_loss=0.3972, attn_decoder_loss=0.2649, over 29293.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1338, cr_loss=0.3762, attn_decoder_loss=0.2484, over 5811831.24 frames. ], batch size: 100, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:24:30,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=377400.0, ans=0.1 +2024-09-18 05:24:36,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=377400.0, ans=0.0 +2024-09-18 05:24:42,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=377400.0, ans=0.09899494936611666 +2024-09-18 05:24:49,946 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:25:01,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=377480.0, ans=0.125 +2024-09-18 05:25:03,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.58 vs. limit=15.0 +2024-09-18 05:25:09,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.94 vs. limit=22.5 +2024-09-18 05:25:36,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=377560.0, ans=0.025 +2024-09-18 05:25:43,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=377600.0, ans=0.035 +2024-09-18 05:25:45,158 INFO [train.py:1198] (0/2) Epoch 21, batch 3900, loss[loss=0.2651, ctc_loss=0.1464, cr_loss=0.4038, attn_decoder_loss=0.2693, over 29647.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1343, cr_loss=0.3769, attn_decoder_loss=0.2489, over 5816091.42 frames. ], batch size: 86, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:25:55,030 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.10 vs. limit=10.0 +2024-09-18 05:26:07,259 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.671e+01 9.111e+01 9.603e+01 1.300e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-18 05:26:10,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-18 05:26:34,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=377720.0, ans=0.125 +2024-09-18 05:26:51,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=377760.0, ans=0.0 +2024-09-18 05:26:57,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.75 vs. limit=22.5 +2024-09-18 05:26:59,572 INFO [train.py:1198] (0/2) Epoch 21, batch 3950, loss[loss=0.2454, ctc_loss=0.1282, cr_loss=0.3519, attn_decoder_loss=0.2506, over 29494.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1339, cr_loss=0.3759, attn_decoder_loss=0.2491, over 5835666.96 frames. ], batch size: 97, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:27:52,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=377920.0, ans=0.0 +2024-09-18 05:28:09,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.00 vs. limit=22.5 +2024-09-18 05:28:14,618 INFO [train.py:1198] (0/2) Epoch 21, batch 4000, loss[loss=0.228, ctc_loss=0.1166, cr_loss=0.3577, attn_decoder_loss=0.2324, over 29499.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1343, cr_loss=0.3763, attn_decoder_loss=0.2494, over 5813263.39 frames. ], batch size: 74, lr: 5.26e-03, grad_scale: 16.0 +2024-09-18 05:28:26,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=378000.0, ans=0.0 +2024-09-18 05:28:38,247 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.613e+01 8.637e+01 9.105e+01 9.736e+01 3.809e+02, threshold=1.821e+02, percent-clipped=2.0 +2024-09-18 05:28:40,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.79 vs. limit=15.0 +2024-09-18 05:28:56,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=378080.0, ans=0.125 +2024-09-18 05:29:03,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=378120.0, ans=0.2 +2024-09-18 05:29:06,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.94 vs. limit=12.0 +2024-09-18 05:29:17,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378160.0, ans=0.1 +2024-09-18 05:29:18,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=378160.0, ans=0.125 +2024-09-18 05:29:29,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=378200.0, ans=0.2 +2024-09-18 05:29:30,122 INFO [train.py:1198] (0/2) Epoch 21, batch 4050, loss[loss=0.2778, ctc_loss=0.1843, cr_loss=0.4239, attn_decoder_loss=0.2788, over 20109.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.134, cr_loss=0.3754, attn_decoder_loss=0.249, over 5795487.91 frames. ], batch size: 209, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:29:47,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=378240.0, ans=0.025 +2024-09-18 05:29:49,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=378240.0, ans=0.125 +2024-09-18 05:29:55,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=378240.0, ans=0.125 +2024-09-18 05:29:58,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=378280.0, ans=0.0 +2024-09-18 05:30:28,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=378360.0, ans=0.0 +2024-09-18 05:30:39,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=378360.0, ans=0.025 +2024-09-18 05:30:44,000 INFO [train.py:1198] (0/2) Epoch 21, batch 4100, loss[loss=0.2565, ctc_loss=0.1427, cr_loss=0.4077, attn_decoder_loss=0.2601, over 29501.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1341, cr_loss=0.3755, attn_decoder_loss=0.249, over 5790750.47 frames. ], batch size: 90, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:31:07,487 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.357e+01 8.642e+01 9.337e+01 1.033e+02 5.468e+02, threshold=1.867e+02, percent-clipped=3.0 +2024-09-18 05:31:11,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.34 vs. limit=22.5 +2024-09-18 05:31:32,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378520.0, ans=0.1 +2024-09-18 05:31:38,162 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.44 vs. limit=15.0 +2024-09-18 05:31:40,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=378520.0, ans=0.0 +2024-09-18 05:31:47,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.25 vs. limit=15.0 +2024-09-18 05:31:50,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=378560.0, ans=0.1 +2024-09-18 05:31:58,985 INFO [train.py:1198] (0/2) Epoch 21, batch 4150, loss[loss=0.2401, ctc_loss=0.1358, cr_loss=0.3545, attn_decoder_loss=0.2438, over 29488.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1339, cr_loss=0.3755, attn_decoder_loss=0.2488, over 5796929.89 frames. ], batch size: 77, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:32:09,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=378600.0, ans=0.1 +2024-09-18 05:32:12,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.10 vs. limit=15.0 +2024-09-18 05:32:37,730 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.79 vs. limit=22.5 +2024-09-18 05:32:58,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=378760.0, ans=0.125 +2024-09-18 05:32:59,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=378760.0, ans=0.1 +2024-09-18 05:33:11,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=378800.0, ans=0.05 +2024-09-18 05:33:12,774 INFO [train.py:1198] (0/2) Epoch 21, batch 4200, loss[loss=0.2593, ctc_loss=0.1498, cr_loss=0.4038, attn_decoder_loss=0.2625, over 29517.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1341, cr_loss=0.3759, attn_decoder_loss=0.249, over 5798684.74 frames. ], batch size: 90, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:33:18,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=378800.0, ans=0.125 +2024-09-18 05:33:36,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378840.0, ans=0.1 +2024-09-18 05:33:37,365 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.402e+01 9.063e+01 9.513e+01 1.420e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-18 05:33:43,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=378880.0, ans=0.2 +2024-09-18 05:34:03,570 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.58 vs. limit=15.0 +2024-09-18 05:34:07,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=378920.0, ans=0.2 +2024-09-18 05:34:27,309 INFO [train.py:1198] (0/2) Epoch 21, batch 4250, loss[loss=0.232, ctc_loss=0.1254, cr_loss=0.3643, attn_decoder_loss=0.2357, over 29515.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1339, cr_loss=0.3763, attn_decoder_loss=0.2492, over 5805102.14 frames. ], batch size: 74, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:34:28,326 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.48 vs. limit=15.0 +2024-09-18 05:34:33,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=379000.0, ans=0.09899494936611666 +2024-09-18 05:34:37,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=379000.0, ans=0.125 +2024-09-18 05:34:56,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.17 vs. limit=15.0 +2024-09-18 05:35:01,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=379080.0, ans=0.125 +2024-09-18 05:35:04,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=379080.0, ans=0.09899494936611666 +2024-09-18 05:35:17,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=379120.0, ans=0.1 +2024-09-18 05:35:41,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=379200.0, ans=0.1 +2024-09-18 05:35:42,510 INFO [train.py:1198] (0/2) Epoch 21, batch 4300, loss[loss=0.2547, ctc_loss=0.1359, cr_loss=0.3904, attn_decoder_loss=0.2592, over 29523.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1338, cr_loss=0.3756, attn_decoder_loss=0.2495, over 5794897.86 frames. ], batch size: 87, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:35:54,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=379200.0, ans=0.1 +2024-09-18 05:36:05,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=379240.0, ans=0.125 +2024-09-18 05:36:06,493 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.631e+01 9.482e+01 1.010e+02 4.284e+02, threshold=1.896e+02, percent-clipped=4.0 +2024-09-18 05:36:16,274 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.58 vs. limit=22.5 +2024-09-18 05:36:45,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=379360.0, ans=0.025 +2024-09-18 05:36:51,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=379360.0, ans=0.0 +2024-09-18 05:36:57,603 INFO [train.py:1198] (0/2) Epoch 21, batch 4350, loss[loss=0.2615, ctc_loss=0.156, cr_loss=0.4167, attn_decoder_loss=0.2639, over 29429.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1365, cr_loss=0.3811, attn_decoder_loss=0.2526, over 5797367.74 frames. ], batch size: 97, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:37:02,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.82 vs. limit=22.5 +2024-09-18 05:37:05,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.77 vs. limit=22.5 +2024-09-18 05:37:06,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=379400.0, ans=0.1 +2024-09-18 05:37:24,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=379440.0, ans=0.125 +2024-09-18 05:37:27,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.62 vs. limit=6.0 +2024-09-18 05:37:30,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=379480.0, ans=0.125 +2024-09-18 05:37:58,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=379560.0, ans=0.0 +2024-09-18 05:38:06,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=379560.0, ans=0.125 +2024-09-18 05:38:08,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.02 vs. limit=15.0 +2024-09-18 05:38:11,715 INFO [train.py:1198] (0/2) Epoch 21, batch 4400, loss[loss=0.2535, ctc_loss=0.1549, cr_loss=0.4119, attn_decoder_loss=0.2554, over 27265.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1379, cr_loss=0.3831, attn_decoder_loss=0.2545, over 5767837.93 frames. ], batch size: 124, lr: 5.24e-03, grad_scale: 16.0 +2024-09-18 05:38:29,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=379640.0, ans=0.0 +2024-09-18 05:38:34,953 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 8.987e+01 9.326e+01 1.008e+02 3.021e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-18 05:38:35,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=379640.0, ans=0.025 +2024-09-18 05:38:39,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=379680.0, ans=0.0 +2024-09-18 05:38:41,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=379680.0, ans=0.2 +2024-09-18 05:38:54,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=379720.0, ans=0.2 +2024-09-18 05:39:17,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=379760.0, ans=0.07 +2024-09-18 05:39:18,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=379760.0, ans=0.0 +2024-09-18 05:39:25,856 INFO [train.py:1198] (0/2) Epoch 21, batch 4450, loss[loss=0.2695, ctc_loss=0.1702, cr_loss=0.4001, attn_decoder_loss=0.2716, over 19956.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1423, cr_loss=0.3887, attn_decoder_loss=0.2571, over 5579452.04 frames. ], batch size: 210, lr: 5.24e-03, grad_scale: 8.0 +2024-09-18 05:39:26,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=379800.0, ans=0.04949747468305833 +2024-09-18 05:39:29,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=379800.0, ans=0.1 +2024-09-18 05:40:01,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=379880.0, ans=0.0 +2024-09-18 05:40:30,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=379960.0, ans=0.125 +2024-09-18 05:40:30,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=379960.0, ans=0.0 +2024-09-18 05:40:41,877 INFO [train.py:1198] (0/2) Epoch 21, batch 4500, loss[loss=0.2713, ctc_loss=0.174, cr_loss=0.403, attn_decoder_loss=0.2732, over 19971.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1471, cr_loss=0.3914, attn_decoder_loss=0.2595, over 5237584.87 frames. ], batch size: 209, lr: 5.24e-03, grad_scale: 8.0 +2024-09-18 05:40:57,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=380040.0, ans=0.125 +2024-09-18 05:41:00,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=380040.0, ans=0.0 +2024-09-18 05:41:07,254 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.842e+01 1.023e+02 1.116e+02 1.184e+02 1.723e+02, threshold=2.233e+02, percent-clipped=0.0 +2024-09-18 05:41:18,838 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-21.pt +2024-09-18 05:42:06,205 INFO [train.py:1198] (0/2) Epoch 22, batch 0, loss[loss=0.2248, ctc_loss=0.1267, cr_loss=0.3587, attn_decoder_loss=0.2278, over 29623.00 frames. ], tot_loss[loss=0.2248, ctc_loss=0.1267, cr_loss=0.3587, attn_decoder_loss=0.2278, over 29623.00 frames. ], batch size: 73, lr: 5.12e-03, grad_scale: 16.0 +2024-09-18 05:42:06,206 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 05:42:13,684 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.0586, 3.5827, 3.9292, 3.5171], device='cuda:0') +2024-09-18 05:42:24,648 INFO [train.py:1230] (0/2) Epoch 22, validation: loss=0.212, ctc_loss=0.0382, cr_loss=5.087e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 05:42:24,649 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 05:42:26,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=380100.0, ans=0.0 +2024-09-18 05:43:12,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.18 vs. limit=15.0 +2024-09-18 05:43:35,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.49 vs. limit=15.0 +2024-09-18 05:43:38,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=380260.0, ans=0.125 +2024-09-18 05:43:42,237 INFO [train.py:1198] (0/2) Epoch 22, batch 50, loss[loss=0.2047, ctc_loss=0.1007, cr_loss=0.3103, attn_decoder_loss=0.2094, over 29422.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1376, cr_loss=0.3838, attn_decoder_loss=0.2508, over 1268560.95 frames. ], batch size: 70, lr: 5.12e-03, grad_scale: 8.0 +2024-09-18 05:43:47,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=380300.0, ans=0.0 +2024-09-18 05:44:12,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.47 vs. limit=22.5 +2024-09-18 05:44:14,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=380380.0, ans=0.125 +2024-09-18 05:44:15,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=380380.0, ans=15.0 +2024-09-18 05:44:28,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=380420.0, ans=0.1 +2024-09-18 05:44:29,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=380420.0, ans=0.2 +2024-09-18 05:44:41,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=380460.0, ans=0.125 +2024-09-18 05:44:47,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.106e+01 8.759e+01 9.355e+01 1.030e+02 2.527e+02, threshold=1.871e+02, percent-clipped=1.0 +2024-09-18 05:44:52,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-18 05:44:56,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=380500.0, ans=0.1 +2024-09-18 05:44:57,980 INFO [train.py:1198] (0/2) Epoch 22, batch 100, loss[loss=0.2304, ctc_loss=0.1275, cr_loss=0.3432, attn_decoder_loss=0.2342, over 29540.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1379, cr_loss=0.3852, attn_decoder_loss=0.2528, over 2253339.33 frames. ], batch size: 76, lr: 5.12e-03, grad_scale: 8.0 +2024-09-18 05:45:14,840 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:45:19,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=380540.0, ans=0.0 +2024-09-18 05:45:40,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.87 vs. limit=22.5 +2024-09-18 05:45:58,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=380660.0, ans=0.1 +2024-09-18 05:46:09,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=380660.0, ans=0.0 +2024-09-18 05:46:12,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=380660.0, ans=0.2 +2024-09-18 05:46:12,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=380660.0, ans=0.0 +2024-09-18 05:46:17,498 INFO [train.py:1198] (0/2) Epoch 22, batch 150, loss[loss=0.2182, ctc_loss=0.1151, cr_loss=0.3388, attn_decoder_loss=0.2222, over 29425.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1352, cr_loss=0.3797, attn_decoder_loss=0.2502, over 3048314.36 frames. ], batch size: 70, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:46:49,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=380780.0, ans=0.1 +2024-09-18 05:47:17,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.13 vs. limit=22.5 +2024-09-18 05:47:22,580 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.751e+01 8.602e+01 9.163e+01 9.915e+01 1.341e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 05:47:32,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=380900.0, ans=0.025 +2024-09-18 05:47:33,203 INFO [train.py:1198] (0/2) Epoch 22, batch 200, loss[loss=0.258, ctc_loss=0.1491, cr_loss=0.4133, attn_decoder_loss=0.2609, over 27451.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1342, cr_loss=0.3776, attn_decoder_loss=0.249, over 3660568.58 frames. ], batch size: 124, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:47:39,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=380900.0, ans=0.2 +2024-09-18 05:48:01,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.25 vs. limit=15.0 +2024-09-18 05:48:11,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=380980.0, ans=0.125 +2024-09-18 05:48:12,003 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.92 vs. limit=15.0 +2024-09-18 05:48:12,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=380980.0, ans=0.0 +2024-09-18 05:48:21,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=381020.0, ans=0.1 +2024-09-18 05:48:39,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=381060.0, ans=0.125 +2024-09-18 05:48:47,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=381100.0, ans=0.125 +2024-09-18 05:48:48,679 INFO [train.py:1198] (0/2) Epoch 22, batch 250, loss[loss=0.2719, ctc_loss=0.149, cr_loss=0.4063, attn_decoder_loss=0.2765, over 29251.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1333, cr_loss=0.3756, attn_decoder_loss=0.2487, over 4142327.25 frames. ], batch size: 100, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:49:01,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=381100.0, ans=0.0 +2024-09-18 05:49:12,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=381140.0, ans=0.125 +2024-09-18 05:49:19,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.93 vs. limit=12.0 +2024-09-18 05:49:37,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=381220.0, ans=0.125 +2024-09-18 05:49:52,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=381260.0, ans=0.0 +2024-09-18 05:49:56,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.533e+01 8.896e+01 9.505e+01 2.232e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-18 05:50:06,898 INFO [train.py:1198] (0/2) Epoch 22, batch 300, loss[loss=0.2616, ctc_loss=0.1495, cr_loss=0.4099, attn_decoder_loss=0.265, over 29500.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1331, cr_loss=0.3754, attn_decoder_loss=0.2486, over 4510616.87 frames. ], batch size: 92, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:50:26,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=381340.0, ans=0.2 +2024-09-18 05:50:38,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=381380.0, ans=0.2 +2024-09-18 05:50:51,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=381380.0, ans=0.125 +2024-09-18 05:50:53,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=381420.0, ans=0.5 +2024-09-18 05:51:08,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=381460.0, ans=0.1 +2024-09-18 05:51:14,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=381460.0, ans=0.0 +2024-09-18 05:51:18,206 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.49 vs. limit=10.0 +2024-09-18 05:51:24,701 INFO [train.py:1198] (0/2) Epoch 22, batch 350, loss[loss=0.2179, ctc_loss=0.1063, cr_loss=0.3224, attn_decoder_loss=0.2231, over 29312.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1335, cr_loss=0.3764, attn_decoder_loss=0.2492, over 4795628.38 frames. ], batch size: 71, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:51:27,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=381500.0, ans=0.1 +2024-09-18 05:51:50,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=381540.0, ans=0.125 +2024-09-18 05:51:51,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=381540.0, ans=0.125 +2024-09-18 05:51:53,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=381580.0, ans=0.125 +2024-09-18 05:52:29,879 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.396e+01 8.841e+01 9.371e+01 8.849e+02, threshold=1.768e+02, percent-clipped=1.0 +2024-09-18 05:52:33,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-18 05:52:40,327 INFO [train.py:1198] (0/2) Epoch 22, batch 400, loss[loss=0.2444, ctc_loss=0.1316, cr_loss=0.3775, attn_decoder_loss=0.2486, over 29707.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1334, cr_loss=0.3762, attn_decoder_loss=0.2489, over 5025787.21 frames. ], batch size: 82, lr: 5.11e-03, grad_scale: 16.0 +2024-09-18 05:52:57,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.86 vs. limit=15.0 +2024-09-18 05:53:08,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=381740.0, ans=0.1 +2024-09-18 05:53:09,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=381780.0, ans=0.125 +2024-09-18 05:53:14,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=381780.0, ans=0.125 +2024-09-18 05:53:17,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=381780.0, ans=0.0 +2024-09-18 05:53:19,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.78 vs. limit=15.0 +2024-09-18 05:53:59,069 INFO [train.py:1198] (0/2) Epoch 22, batch 450, loss[loss=0.2563, ctc_loss=0.1371, cr_loss=0.3925, attn_decoder_loss=0.2608, over 29697.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1337, cr_loss=0.3769, attn_decoder_loss=0.2493, over 5188038.91 frames. ], batch size: 83, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:54:29,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=381940.0, ans=0.125 +2024-09-18 05:54:33,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.63 vs. limit=22.5 +2024-09-18 05:54:44,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=381980.0, ans=0.125 +2024-09-18 05:54:58,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=382020.0, ans=0.125 +2024-09-18 05:55:08,440 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.444e+01 8.472e+01 8.899e+01 9.397e+01 1.729e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 05:55:14,837 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:55:17,489 INFO [train.py:1198] (0/2) Epoch 22, batch 500, loss[loss=0.2632, ctc_loss=0.1546, cr_loss=0.4179, attn_decoder_loss=0.266, over 29471.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1333, cr_loss=0.3766, attn_decoder_loss=0.2487, over 5329891.18 frames. ], batch size: 94, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:55:30,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=382100.0, ans=0.125 +2024-09-18 05:55:47,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.96 vs. limit=22.5 +2024-09-18 05:55:49,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=382180.0, ans=0.1 +2024-09-18 05:55:52,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=382180.0, ans=0.1 +2024-09-18 05:56:01,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=382220.0, ans=0.0 +2024-09-18 05:56:15,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=382220.0, ans=0.0 +2024-09-18 05:56:15,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=382220.0, ans=0.125 +2024-09-18 05:56:33,391 INFO [train.py:1198] (0/2) Epoch 22, batch 550, loss[loss=0.2651, ctc_loss=0.1454, cr_loss=0.3908, attn_decoder_loss=0.2697, over 28750.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1331, cr_loss=0.3762, attn_decoder_loss=0.2486, over 5423352.79 frames. ], batch size: 104, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:56:56,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=382340.0, ans=0.0 +2024-09-18 05:56:58,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=382340.0, ans=0.125 +2024-09-18 05:57:39,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=382460.0, ans=0.1 +2024-09-18 05:57:39,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=382460.0, ans=0.025 +2024-09-18 05:57:40,969 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.068e+01 8.705e+01 9.082e+01 9.823e+01 4.645e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 05:57:44,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=382460.0, ans=0.125 +2024-09-18 05:57:52,558 INFO [train.py:1198] (0/2) Epoch 22, batch 600, loss[loss=0.2575, ctc_loss=0.1498, cr_loss=0.4075, attn_decoder_loss=0.2604, over 29282.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1335, cr_loss=0.3769, attn_decoder_loss=0.2492, over 5510802.67 frames. ], batch size: 100, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:58:41,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=382620.0, ans=0.1 +2024-09-18 05:58:50,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=382620.0, ans=0.2 +2024-09-18 05:59:03,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=382660.0, ans=0.1 +2024-09-18 05:59:09,506 INFO [train.py:1198] (0/2) Epoch 22, batch 650, loss[loss=0.2487, ctc_loss=0.1342, cr_loss=0.392, attn_decoder_loss=0.2527, over 29754.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1327, cr_loss=0.3761, attn_decoder_loss=0.2486, over 5587773.30 frames. ], batch size: 81, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:59:21,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=15.0 +2024-09-18 05:59:34,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=382740.0, ans=0.0 +2024-09-18 05:59:58,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=382820.0, ans=0.125 +2024-09-18 05:59:59,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=382820.0, ans=0.125 +2024-09-18 06:00:15,695 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.721e+01 8.434e+01 8.895e+01 9.353e+01 1.142e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 06:00:19,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.31 vs. limit=15.0 +2024-09-18 06:00:24,705 INFO [train.py:1198] (0/2) Epoch 22, batch 700, loss[loss=0.2339, ctc_loss=0.1233, cr_loss=0.3688, attn_decoder_loss=0.238, over 29520.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.133, cr_loss=0.377, attn_decoder_loss=0.249, over 5638919.75 frames. ], batch size: 76, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:00:32,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=382900.0, ans=0.125 +2024-09-18 06:00:43,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.23 vs. limit=22.5 +2024-09-18 06:00:44,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=382940.0, ans=0.1 +2024-09-18 06:00:47,511 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:00:52,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=382940.0, ans=0.05 +2024-09-18 06:00:58,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=382980.0, ans=0.125 +2024-09-18 06:00:58,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=382980.0, ans=0.1 +2024-09-18 06:01:01,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=382980.0, ans=0.125 +2024-09-18 06:01:11,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.56 vs. limit=10.0 +2024-09-18 06:01:40,815 INFO [train.py:1198] (0/2) Epoch 22, batch 750, loss[loss=0.2497, ctc_loss=0.1323, cr_loss=0.3811, attn_decoder_loss=0.2543, over 29691.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1327, cr_loss=0.3762, attn_decoder_loss=0.2484, over 5678131.40 frames. ], batch size: 82, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:01:44,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=383100.0, ans=0.125 +2024-09-18 06:01:55,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=383100.0, ans=0.125 +2024-09-18 06:01:58,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=383140.0, ans=0.1 +2024-09-18 06:02:18,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=383180.0, ans=0.125 +2024-09-18 06:02:34,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=383220.0, ans=0.0 +2024-09-18 06:02:34,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=383220.0, ans=0.125 +2024-09-18 06:02:38,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=383220.0, ans=0.125 +2024-09-18 06:02:52,143 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.347e+01 8.642e+01 9.168e+01 9.743e+01 1.816e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 06:03:01,139 INFO [train.py:1198] (0/2) Epoch 22, batch 800, loss[loss=0.2318, ctc_loss=0.1259, cr_loss=0.3564, attn_decoder_loss=0.2356, over 29606.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1328, cr_loss=0.3765, attn_decoder_loss=0.2483, over 5707925.00 frames. ], batch size: 73, lr: 5.10e-03, grad_scale: 16.0 +2024-09-18 06:03:19,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=383340.0, ans=0.125 +2024-09-18 06:03:21,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=383340.0, ans=0.1 +2024-09-18 06:03:24,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=383340.0, ans=0.04949747468305833 +2024-09-18 06:03:30,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=383380.0, ans=0.025 +2024-09-18 06:03:36,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=383380.0, ans=0.125 +2024-09-18 06:03:52,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=383420.0, ans=0.0 +2024-09-18 06:04:12,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=383460.0, ans=0.1 +2024-09-18 06:04:16,285 INFO [train.py:1198] (0/2) Epoch 22, batch 850, loss[loss=0.2439, ctc_loss=0.1249, cr_loss=0.3494, attn_decoder_loss=0.2493, over 29713.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1326, cr_loss=0.376, attn_decoder_loss=0.248, over 5736615.10 frames. ], batch size: 89, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:04:35,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=383540.0, ans=0.125 +2024-09-18 06:04:38,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=383540.0, ans=0.125 +2024-09-18 06:04:54,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.01 vs. limit=15.0 +2024-09-18 06:05:24,336 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.172e+01 8.714e+01 9.138e+01 9.767e+01 2.023e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 06:05:32,035 INFO [train.py:1198] (0/2) Epoch 22, batch 900, loss[loss=0.2244, ctc_loss=0.118, cr_loss=0.3512, attn_decoder_loss=0.2284, over 29592.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1328, cr_loss=0.3758, attn_decoder_loss=0.248, over 5742101.92 frames. ], batch size: 73, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:05:39,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.64 vs. limit=6.0 +2024-09-18 06:05:53,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.58 vs. limit=15.0 +2024-09-18 06:06:01,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.14 vs. limit=6.0 +2024-09-18 06:06:05,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=383780.0, ans=0.1 +2024-09-18 06:06:38,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=383860.0, ans=0.125 +2024-09-18 06:06:52,150 INFO [train.py:1198] (0/2) Epoch 22, batch 950, loss[loss=0.2231, ctc_loss=0.1143, cr_loss=0.343, attn_decoder_loss=0.2276, over 29509.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1328, cr_loss=0.3755, attn_decoder_loss=0.2483, over 5744501.83 frames. ], batch size: 74, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:07:28,873 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-96000.pt +2024-09-18 06:08:01,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=384060.0, ans=0.125 +2024-09-18 06:08:06,681 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.106e+01 8.991e+01 9.492e+01 1.022e+02 3.198e+02, threshold=1.898e+02, percent-clipped=2.0 +2024-09-18 06:08:13,999 INFO [train.py:1198] (0/2) Epoch 22, batch 1000, loss[loss=0.2424, ctc_loss=0.1312, cr_loss=0.3629, attn_decoder_loss=0.2467, over 29497.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1338, cr_loss=0.3767, attn_decoder_loss=0.2491, over 5738641.10 frames. ], batch size: 77, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:08:17,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=384100.0, ans=0.125 +2024-09-18 06:08:24,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=384100.0, ans=0.0 +2024-09-18 06:08:32,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=384140.0, ans=0.1 +2024-09-18 06:09:29,787 INFO [train.py:1198] (0/2) Epoch 22, batch 1050, loss[loss=0.255, ctc_loss=0.1335, cr_loss=0.3714, attn_decoder_loss=0.2603, over 29684.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1332, cr_loss=0.3757, attn_decoder_loss=0.2484, over 5745869.57 frames. ], batch size: 85, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:09:44,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=384300.0, ans=0.125 +2024-09-18 06:09:55,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=384340.0, ans=0.09899494936611666 +2024-09-18 06:09:57,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=384340.0, ans=0.0 +2024-09-18 06:10:03,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=384380.0, ans=0.0 +2024-09-18 06:10:14,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=384380.0, ans=0.1 +2024-09-18 06:10:17,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=384380.0, ans=0.0 +2024-09-18 06:10:19,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=384420.0, ans=0.125 +2024-09-18 06:10:42,980 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.419e+01 8.971e+01 9.530e+01 1.277e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 06:10:50,699 INFO [train.py:1198] (0/2) Epoch 22, batch 1100, loss[loss=0.2396, ctc_loss=0.1321, cr_loss=0.3851, attn_decoder_loss=0.243, over 29446.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1329, cr_loss=0.3748, attn_decoder_loss=0.2482, over 5757292.82 frames. ], batch size: 78, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:11:10,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=384540.0, ans=0.2 +2024-09-18 06:11:16,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=384540.0, ans=0.0 +2024-09-18 06:11:27,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=384580.0, ans=0.125 +2024-09-18 06:11:42,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=384620.0, ans=0.0 +2024-09-18 06:11:50,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=384660.0, ans=0.1 +2024-09-18 06:11:54,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=384660.0, ans=0.125 +2024-09-18 06:11:54,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=384660.0, ans=0.2 +2024-09-18 06:12:06,664 INFO [train.py:1198] (0/2) Epoch 22, batch 1150, loss[loss=0.238, ctc_loss=0.1294, cr_loss=0.3586, attn_decoder_loss=0.2421, over 29461.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1329, cr_loss=0.3746, attn_decoder_loss=0.2481, over 5757336.96 frames. ], batch size: 78, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:12:11,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=384700.0, ans=0.2 +2024-09-18 06:12:19,712 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.21 vs. limit=15.0 +2024-09-18 06:12:28,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=384740.0, ans=0.5 +2024-09-18 06:12:32,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.33 vs. limit=12.0 +2024-09-18 06:12:42,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=384780.0, ans=0.125 +2024-09-18 06:12:45,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=384780.0, ans=0.125 +2024-09-18 06:12:50,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=384780.0, ans=0.07 +2024-09-18 06:13:02,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=384820.0, ans=0.125 +2024-09-18 06:13:15,274 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.605e+01 9.127e+01 9.575e+01 1.863e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 06:13:22,817 INFO [train.py:1198] (0/2) Epoch 22, batch 1200, loss[loss=0.261, ctc_loss=0.1455, cr_loss=0.3981, attn_decoder_loss=0.2649, over 29657.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1336, cr_loss=0.3758, attn_decoder_loss=0.2488, over 5749327.97 frames. ], batch size: 85, lr: 5.09e-03, grad_scale: 16.0 +2024-09-18 06:13:58,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.81 vs. limit=22.5 +2024-09-18 06:14:14,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=385020.0, ans=0.0 +2024-09-18 06:14:21,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=385020.0, ans=0.2 +2024-09-18 06:14:42,679 INFO [train.py:1198] (0/2) Epoch 22, batch 1250, loss[loss=0.2564, ctc_loss=0.1361, cr_loss=0.4004, attn_decoder_loss=0.2609, over 29552.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1339, cr_loss=0.3773, attn_decoder_loss=0.2494, over 5775330.58 frames. ], batch size: 92, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:14:48,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.42 vs. limit=6.0 +2024-09-18 06:14:52,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=385100.0, ans=0.2 +2024-09-18 06:14:53,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=385100.0, ans=0.125 +2024-09-18 06:15:18,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=385180.0, ans=0.1 +2024-09-18 06:15:40,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=385220.0, ans=0.0 +2024-09-18 06:15:42,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=385260.0, ans=0.125 +2024-09-18 06:15:43,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=385260.0, ans=0.125 +2024-09-18 06:15:47,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=385260.0, ans=0.125 +2024-09-18 06:15:52,594 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.078e+01 8.234e+01 8.912e+01 9.418e+01 2.045e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-18 06:15:58,556 INFO [train.py:1198] (0/2) Epoch 22, batch 1300, loss[loss=0.2524, ctc_loss=0.1344, cr_loss=0.3623, attn_decoder_loss=0.2574, over 28150.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1333, cr_loss=0.3762, attn_decoder_loss=0.2488, over 5780813.43 frames. ], batch size: 111, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:16:00,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=385300.0, ans=0.125 +2024-09-18 06:16:38,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=385380.0, ans=0.04949747468305833 +2024-09-18 06:16:48,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=385420.0, ans=0.0 +2024-09-18 06:17:11,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=385460.0, ans=0.2 +2024-09-18 06:17:14,132 INFO [train.py:1198] (0/2) Epoch 22, batch 1350, loss[loss=0.2414, ctc_loss=0.1244, cr_loss=0.3505, attn_decoder_loss=0.2466, over 29749.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1325, cr_loss=0.3746, attn_decoder_loss=0.2482, over 5798098.58 frames. ], batch size: 81, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:17:15,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.24 vs. limit=6.0 +2024-09-18 06:17:15,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=385500.0, ans=0.07 +2024-09-18 06:17:16,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.15 vs. limit=15.0 +2024-09-18 06:17:17,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=385500.0, ans=0.125 +2024-09-18 06:17:21,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.49 vs. limit=15.0 +2024-09-18 06:17:29,465 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:17:35,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.35 vs. limit=22.5 +2024-09-18 06:17:56,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=12.0 +2024-09-18 06:17:57,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=385580.0, ans=0.125 +2024-09-18 06:18:06,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=385620.0, ans=0.09899494936611666 +2024-09-18 06:18:13,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.25 vs. limit=22.5 +2024-09-18 06:18:20,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=385660.0, ans=0.125 +2024-09-18 06:18:24,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=385660.0, ans=0.1 +2024-09-18 06:18:27,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.549e+01 8.459e+01 9.043e+01 9.728e+01 1.319e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 06:18:29,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=385660.0, ans=0.125 +2024-09-18 06:18:33,610 INFO [train.py:1198] (0/2) Epoch 22, batch 1400, loss[loss=0.2198, ctc_loss=0.1146, cr_loss=0.3512, attn_decoder_loss=0.2237, over 29597.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1325, cr_loss=0.3749, attn_decoder_loss=0.2481, over 5808648.42 frames. ], batch size: 69, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:18:44,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=385700.0, ans=0.0 +2024-09-18 06:18:55,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=385740.0, ans=0.1 +2024-09-18 06:19:04,225 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:19:10,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.22 vs. limit=15.0 +2024-09-18 06:19:22,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=385820.0, ans=0.2 +2024-09-18 06:19:22,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=385820.0, ans=0.125 +2024-09-18 06:19:23,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.50 vs. limit=15.0 +2024-09-18 06:19:25,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=385820.0, ans=0.1 +2024-09-18 06:19:26,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=385820.0, ans=0.125 +2024-09-18 06:19:37,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=385860.0, ans=0.07 +2024-09-18 06:19:49,086 INFO [train.py:1198] (0/2) Epoch 22, batch 1450, loss[loss=0.2558, ctc_loss=0.1414, cr_loss=0.3781, attn_decoder_loss=0.2601, over 29435.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1329, cr_loss=0.3752, attn_decoder_loss=0.2487, over 5804554.94 frames. ], batch size: 94, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:19:59,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.88 vs. limit=15.0 +2024-09-18 06:20:27,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=385980.0, ans=0.0 +2024-09-18 06:20:33,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=386020.0, ans=0.0 +2024-09-18 06:20:47,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=386020.0, ans=0.0 +2024-09-18 06:20:50,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-18 06:20:51,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=386060.0, ans=0.2 +2024-09-18 06:20:58,987 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.476e+01 9.077e+01 9.872e+01 2.572e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-18 06:20:59,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=386060.0, ans=0.0 +2024-09-18 06:21:05,105 INFO [train.py:1198] (0/2) Epoch 22, batch 1500, loss[loss=0.2403, ctc_loss=0.1224, cr_loss=0.357, attn_decoder_loss=0.2455, over 29617.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1332, cr_loss=0.3765, attn_decoder_loss=0.2492, over 5805570.34 frames. ], batch size: 86, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:21:41,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=386180.0, ans=0.2 +2024-09-18 06:21:56,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=386220.0, ans=6.0 +2024-09-18 06:22:04,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=386220.0, ans=0.125 +2024-09-18 06:22:05,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=386220.0, ans=0.125 +2024-09-18 06:22:17,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=386260.0, ans=0.1 +2024-09-18 06:22:17,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.80 vs. limit=22.5 +2024-09-18 06:22:25,762 INFO [train.py:1198] (0/2) Epoch 22, batch 1550, loss[loss=0.2508, ctc_loss=0.1458, cr_loss=0.4013, attn_decoder_loss=0.2535, over 29517.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1339, cr_loss=0.3777, attn_decoder_loss=0.2494, over 5781311.36 frames. ], batch size: 90, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:22:27,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=386300.0, ans=0.125 +2024-09-18 06:22:47,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=386340.0, ans=0.0 +2024-09-18 06:23:09,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=386380.0, ans=0.07 +2024-09-18 06:23:25,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=386460.0, ans=0.125 +2024-09-18 06:23:35,905 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.667e+01 9.294e+01 9.875e+01 4.781e+02, threshold=1.859e+02, percent-clipped=2.0 +2024-09-18 06:23:41,955 INFO [train.py:1198] (0/2) Epoch 22, batch 1600, loss[loss=0.2488, ctc_loss=0.1306, cr_loss=0.3735, attn_decoder_loss=0.2536, over 29672.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1342, cr_loss=0.3777, attn_decoder_loss=0.2493, over 5764196.48 frames. ], batch size: 85, lr: 5.08e-03, grad_scale: 16.0 +2024-09-18 06:23:43,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=386500.0, ans=0.05 +2024-09-18 06:23:51,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=386500.0, ans=0.125 +2024-09-18 06:24:01,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=386540.0, ans=0.0 +2024-09-18 06:24:03,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=386540.0, ans=0.1 +2024-09-18 06:24:06,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=386540.0, ans=0.07 +2024-09-18 06:24:29,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=386620.0, ans=0.1 +2024-09-18 06:24:32,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=386620.0, ans=0.1 +2024-09-18 06:24:51,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=386660.0, ans=0.2 +2024-09-18 06:24:57,626 INFO [train.py:1198] (0/2) Epoch 22, batch 1650, loss[loss=0.2536, ctc_loss=0.1292, cr_loss=0.3773, attn_decoder_loss=0.259, over 29712.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1337, cr_loss=0.3765, attn_decoder_loss=0.2489, over 5759738.61 frames. ], batch size: 89, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:25:16,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=386740.0, ans=0.125 +2024-09-18 06:25:19,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=386740.0, ans=0.025 +2024-09-18 06:25:28,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=386780.0, ans=0.125 +2024-09-18 06:25:30,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.95 vs. limit=15.0 +2024-09-18 06:25:40,631 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=22.5 +2024-09-18 06:25:46,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.71 vs. limit=15.0 +2024-09-18 06:26:12,695 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.418e+01 9.168e+01 9.653e+01 1.530e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 06:26:17,120 INFO [train.py:1198] (0/2) Epoch 22, batch 1700, loss[loss=0.2158, ctc_loss=0.1149, cr_loss=0.3439, attn_decoder_loss=0.2194, over 29609.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1333, cr_loss=0.3762, attn_decoder_loss=0.2487, over 5781193.14 frames. ], batch size: 69, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:26:21,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.89 vs. limit=8.0 +2024-09-18 06:26:49,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=386980.0, ans=0.125 +2024-09-18 06:27:02,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=387020.0, ans=0.125 +2024-09-18 06:27:07,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=387020.0, ans=0.0 +2024-09-18 06:27:13,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=387020.0, ans=0.125 +2024-09-18 06:27:19,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=387060.0, ans=0.0 +2024-09-18 06:27:32,820 INFO [train.py:1198] (0/2) Epoch 22, batch 1750, loss[loss=0.2174, ctc_loss=0.1168, cr_loss=0.3587, attn_decoder_loss=0.2206, over 29319.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1326, cr_loss=0.3753, attn_decoder_loss=0.2481, over 5788432.75 frames. ], batch size: 67, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:28:03,178 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.61 vs. limit=15.0 +2024-09-18 06:28:05,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=387180.0, ans=0.125 +2024-09-18 06:28:05,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=387180.0, ans=0.0 +2024-09-18 06:28:19,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=387220.0, ans=0.0 +2024-09-18 06:28:24,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=387220.0, ans=15.0 +2024-09-18 06:28:31,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=387220.0, ans=0.0 +2024-09-18 06:28:42,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.47 vs. limit=15.0 +2024-09-18 06:28:44,543 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.338e+01 8.803e+01 9.481e+01 6.567e+02, threshold=1.761e+02, percent-clipped=1.0 +2024-09-18 06:28:49,102 INFO [train.py:1198] (0/2) Epoch 22, batch 1800, loss[loss=0.2387, ctc_loss=0.1187, cr_loss=0.3492, attn_decoder_loss=0.2443, over 29675.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1328, cr_loss=0.3756, attn_decoder_loss=0.2484, over 5791347.65 frames. ], batch size: 83, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:28:54,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=387300.0, ans=0.0 +2024-09-18 06:29:19,870 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.47 vs. limit=22.5 +2024-09-18 06:29:26,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=387380.0, ans=0.125 +2024-09-18 06:29:49,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.12 vs. limit=10.0 +2024-09-18 06:29:59,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=387460.0, ans=0.125 +2024-09-18 06:30:09,420 INFO [train.py:1198] (0/2) Epoch 22, batch 1850, loss[loss=0.2603, ctc_loss=0.1413, cr_loss=0.3846, attn_decoder_loss=0.265, over 29639.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1327, cr_loss=0.3752, attn_decoder_loss=0.2485, over 5798387.37 frames. ], batch size: 86, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:30:14,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=387500.0, ans=0.1 +2024-09-18 06:30:20,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=387500.0, ans=0.1 +2024-09-18 06:30:24,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=387540.0, ans=0.0 +2024-09-18 06:30:31,363 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.60 vs. limit=15.0 +2024-09-18 06:30:32,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=387540.0, ans=0.5 +2024-09-18 06:30:34,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=387540.0, ans=0.2 +2024-09-18 06:30:34,830 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.78 vs. limit=22.5 +2024-09-18 06:30:38,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=387580.0, ans=0.95 +2024-09-18 06:30:39,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=387580.0, ans=0.125 +2024-09-18 06:30:50,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=387580.0, ans=0.035 +2024-09-18 06:31:11,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=387660.0, ans=0.125 +2024-09-18 06:31:19,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.84 vs. limit=10.0 +2024-09-18 06:31:20,354 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.630e+01 9.053e+01 9.518e+01 1.576e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 06:31:24,769 INFO [train.py:1198] (0/2) Epoch 22, batch 1900, loss[loss=0.2592, ctc_loss=0.1388, cr_loss=0.3918, attn_decoder_loss=0.2638, over 29719.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1332, cr_loss=0.3766, attn_decoder_loss=0.2492, over 5806194.55 frames. ], batch size: 89, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:31:36,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.83 vs. limit=22.5 +2024-09-18 06:31:41,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=387740.0, ans=0.0 +2024-09-18 06:31:52,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=387740.0, ans=0.1 +2024-09-18 06:32:00,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=387780.0, ans=0.0 +2024-09-18 06:32:18,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=387820.0, ans=0.125 +2024-09-18 06:32:30,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=387860.0, ans=0.125 +2024-09-18 06:32:31,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=387860.0, ans=0.0 +2024-09-18 06:32:32,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.81 vs. limit=22.5 +2024-09-18 06:32:33,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=387860.0, ans=0.025 +2024-09-18 06:32:40,942 INFO [train.py:1198] (0/2) Epoch 22, batch 1950, loss[loss=0.234, ctc_loss=0.1244, cr_loss=0.3527, attn_decoder_loss=0.2384, over 29435.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1337, cr_loss=0.3776, attn_decoder_loss=0.2502, over 5820237.69 frames. ], batch size: 78, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:32:50,501 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:33:45,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.42 vs. limit=6.0 +2024-09-18 06:33:56,805 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 8.668e+01 9.187e+01 9.705e+01 3.737e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-18 06:33:57,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=15.0 +2024-09-18 06:34:01,454 INFO [train.py:1198] (0/2) Epoch 22, batch 2000, loss[loss=0.2214, ctc_loss=0.1202, cr_loss=0.3654, attn_decoder_loss=0.2246, over 29311.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1343, cr_loss=0.3787, attn_decoder_loss=0.2508, over 5796983.45 frames. ], batch size: 67, lr: 5.07e-03, grad_scale: 16.0 +2024-09-18 06:34:06,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=388100.0, ans=0.2 +2024-09-18 06:34:32,130 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:34:33,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=388180.0, ans=0.2 +2024-09-18 06:34:46,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.81 vs. limit=15.0 +2024-09-18 06:34:56,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=388220.0, ans=0.125 +2024-09-18 06:35:14,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=388260.0, ans=0.125 +2024-09-18 06:35:16,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=388300.0, ans=0.125 +2024-09-18 06:35:17,274 INFO [train.py:1198] (0/2) Epoch 22, batch 2050, loss[loss=0.226, ctc_loss=0.1206, cr_loss=0.3611, attn_decoder_loss=0.2297, over 29438.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1335, cr_loss=0.3767, attn_decoder_loss=0.2497, over 5788976.32 frames. ], batch size: 70, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:35:23,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=388300.0, ans=0.1 +2024-09-18 06:35:57,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=388380.0, ans=0.0 +2024-09-18 06:36:13,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=388420.0, ans=0.125 +2024-09-18 06:36:15,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=388420.0, ans=0.125 +2024-09-18 06:36:30,011 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.598e+01 9.133e+01 9.835e+01 1.696e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 06:36:33,137 INFO [train.py:1198] (0/2) Epoch 22, batch 2100, loss[loss=0.2327, ctc_loss=0.1225, cr_loss=0.3626, attn_decoder_loss=0.2368, over 29750.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1327, cr_loss=0.3756, attn_decoder_loss=0.2487, over 5800850.84 frames. ], batch size: 81, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:36:44,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=388500.0, ans=0.125 +2024-09-18 06:36:55,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=388540.0, ans=15.0 +2024-09-18 06:37:05,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=388580.0, ans=0.1 +2024-09-18 06:37:31,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=388620.0, ans=0.125 +2024-09-18 06:37:52,733 INFO [train.py:1198] (0/2) Epoch 22, batch 2150, loss[loss=0.2394, ctc_loss=0.1236, cr_loss=0.3565, attn_decoder_loss=0.2444, over 29442.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1319, cr_loss=0.3747, attn_decoder_loss=0.2481, over 5815393.06 frames. ], batch size: 78, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:38:02,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=388700.0, ans=0.125 +2024-09-18 06:38:05,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=388700.0, ans=0.125 +2024-09-18 06:38:23,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=388780.0, ans=0.0 +2024-09-18 06:38:32,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=388780.0, ans=0.125 +2024-09-18 06:38:34,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=388780.0, ans=0.125 +2024-09-18 06:38:40,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.83 vs. limit=10.0 +2024-09-18 06:38:55,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=388860.0, ans=0.125 +2024-09-18 06:39:02,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=388860.0, ans=0.0 +2024-09-18 06:39:04,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=388860.0, ans=0.2 +2024-09-18 06:39:04,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.77 vs. limit=6.0 +2024-09-18 06:39:05,560 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.464e+01 8.588e+01 8.944e+01 9.592e+01 1.412e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 06:39:08,645 INFO [train.py:1198] (0/2) Epoch 22, batch 2200, loss[loss=0.2525, ctc_loss=0.1379, cr_loss=0.4001, attn_decoder_loss=0.2563, over 29628.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1322, cr_loss=0.375, attn_decoder_loss=0.2483, over 5811304.21 frames. ], batch size: 86, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:39:22,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=388940.0, ans=0.1 +2024-09-18 06:39:25,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=388940.0, ans=0.07 +2024-09-18 06:39:28,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=388940.0, ans=0.0 +2024-09-18 06:39:47,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.66 vs. limit=15.0 +2024-09-18 06:40:08,718 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.10 vs. limit=12.0 +2024-09-18 06:40:23,970 INFO [train.py:1198] (0/2) Epoch 22, batch 2250, loss[loss=0.2415, ctc_loss=0.1227, cr_loss=0.3459, attn_decoder_loss=0.247, over 29687.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.132, cr_loss=0.374, attn_decoder_loss=0.2481, over 5810993.19 frames. ], batch size: 82, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:40:27,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=389100.0, ans=0.2 +2024-09-18 06:40:36,553 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:41:14,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=389220.0, ans=0.125 +2024-09-18 06:41:39,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=389260.0, ans=0.2 +2024-09-18 06:41:41,017 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.612e+01 9.109e+01 9.746e+01 4.316e+02, threshold=1.822e+02, percent-clipped=5.0 +2024-09-18 06:41:44,068 INFO [train.py:1198] (0/2) Epoch 22, batch 2300, loss[loss=0.2127, ctc_loss=0.1095, cr_loss=0.3204, attn_decoder_loss=0.217, over 29332.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1316, cr_loss=0.3735, attn_decoder_loss=0.2472, over 5798665.08 frames. ], batch size: 71, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:41:52,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.30 vs. limit=12.0 +2024-09-18 06:42:11,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=389340.0, ans=10.0 +2024-09-18 06:42:12,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=389380.0, ans=0.0 +2024-09-18 06:42:14,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=389380.0, ans=0.125 +2024-09-18 06:42:47,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=389460.0, ans=0.125 +2024-09-18 06:42:59,650 INFO [train.py:1198] (0/2) Epoch 22, batch 2350, loss[loss=0.2553, ctc_loss=0.1414, cr_loss=0.3865, attn_decoder_loss=0.2594, over 29675.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1314, cr_loss=0.3734, attn_decoder_loss=0.2474, over 5803853.39 frames. ], batch size: 83, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:44:04,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=389660.0, ans=0.125 +2024-09-18 06:44:13,259 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.909e+01 8.749e+01 9.346e+01 1.024e+02 1.570e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-18 06:44:16,230 INFO [train.py:1198] (0/2) Epoch 22, batch 2400, loss[loss=0.2342, ctc_loss=0.1256, cr_loss=0.3746, attn_decoder_loss=0.238, over 29527.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1322, cr_loss=0.3744, attn_decoder_loss=0.2482, over 5807491.14 frames. ], batch size: 76, lr: 5.05e-03, grad_scale: 16.0 +2024-09-18 06:44:18,568 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.98 vs. limit=22.5 +2024-09-18 06:44:39,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=389740.0, ans=0.125 +2024-09-18 06:44:46,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=389740.0, ans=0.125 +2024-09-18 06:44:50,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=389780.0, ans=0.1 +2024-09-18 06:45:03,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=389820.0, ans=0.1 +2024-09-18 06:45:14,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=389820.0, ans=0.2 +2024-09-18 06:45:17,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=389860.0, ans=0.125 +2024-09-18 06:45:19,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=389860.0, ans=0.125 +2024-09-18 06:45:27,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=389860.0, ans=0.2 +2024-09-18 06:45:32,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=389860.0, ans=0.0 +2024-09-18 06:45:36,554 INFO [train.py:1198] (0/2) Epoch 22, batch 2450, loss[loss=0.2442, ctc_loss=0.1266, cr_loss=0.3622, attn_decoder_loss=0.2492, over 29727.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1329, cr_loss=0.3757, attn_decoder_loss=0.2491, over 5784180.74 frames. ], batch size: 82, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:45:50,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=389940.0, ans=0.125 +2024-09-18 06:46:16,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.72 vs. limit=15.0 +2024-09-18 06:46:50,165 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.749e+01 9.206e+01 9.779e+01 5.372e+02, threshold=1.841e+02, percent-clipped=2.0 +2024-09-18 06:46:51,739 INFO [train.py:1198] (0/2) Epoch 22, batch 2500, loss[loss=0.2512, ctc_loss=0.1339, cr_loss=0.3699, attn_decoder_loss=0.2561, over 29611.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1332, cr_loss=0.3762, attn_decoder_loss=0.249, over 5793778.15 frames. ], batch size: 86, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:46:54,117 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.68 vs. limit=6.0 +2024-09-18 06:46:58,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=390100.0, ans=0.125 +2024-09-18 06:47:04,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=390100.0, ans=0.025 +2024-09-18 06:47:07,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.81 vs. limit=15.0 +2024-09-18 06:47:13,878 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.26 vs. limit=15.0 +2024-09-18 06:47:28,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=390180.0, ans=0.125 +2024-09-18 06:47:49,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=390220.0, ans=0.1 +2024-09-18 06:48:07,756 INFO [train.py:1198] (0/2) Epoch 22, batch 2550, loss[loss=0.2181, ctc_loss=0.1115, cr_loss=0.3292, attn_decoder_loss=0.2226, over 29388.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1333, cr_loss=0.3764, attn_decoder_loss=0.2494, over 5797056.18 frames. ], batch size: 67, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:48:09,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=390300.0, ans=0.0 +2024-09-18 06:48:30,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.11 vs. limit=15.0 +2024-09-18 06:48:32,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=390340.0, ans=0.07 +2024-09-18 06:48:40,440 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:48:50,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.87 vs. limit=15.0 +2024-09-18 06:49:02,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=390420.0, ans=0.0 +2024-09-18 06:49:23,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=390460.0, ans=0.125 +2024-09-18 06:49:24,706 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.515e+01 9.035e+01 9.623e+01 2.254e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 06:49:26,249 INFO [train.py:1198] (0/2) Epoch 22, batch 2600, loss[loss=0.236, ctc_loss=0.1329, cr_loss=0.4046, attn_decoder_loss=0.2385, over 29450.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1337, cr_loss=0.377, attn_decoder_loss=0.2499, over 5792976.39 frames. ], batch size: 78, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:49:30,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=390500.0, ans=0.0 +2024-09-18 06:49:46,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=390540.0, ans=0.0 +2024-09-18 06:49:54,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=390540.0, ans=0.1 +2024-09-18 06:50:20,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=390620.0, ans=0.0 +2024-09-18 06:50:24,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=390620.0, ans=0.1 +2024-09-18 06:50:43,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.72 vs. limit=15.0 +2024-09-18 06:50:43,871 INFO [train.py:1198] (0/2) Epoch 22, batch 2650, loss[loss=0.2547, ctc_loss=0.1385, cr_loss=0.3707, attn_decoder_loss=0.2594, over 29256.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1336, cr_loss=0.377, attn_decoder_loss=0.2499, over 5799180.96 frames. ], batch size: 100, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:50:54,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=390700.0, ans=0.025 +2024-09-18 06:51:08,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.54 vs. limit=15.0 +2024-09-18 06:51:18,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=390780.0, ans=0.125 +2024-09-18 06:51:32,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=390820.0, ans=0.125 +2024-09-18 06:51:40,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=390820.0, ans=0.0 +2024-09-18 06:51:41,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=390820.0, ans=0.025 +2024-09-18 06:51:51,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=390860.0, ans=0.0 +2024-09-18 06:51:51,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=390860.0, ans=0.125 +2024-09-18 06:51:57,484 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.432e+01 9.013e+01 9.580e+01 2.667e+02, threshold=1.803e+02, percent-clipped=2.0 +2024-09-18 06:51:59,086 INFO [train.py:1198] (0/2) Epoch 22, batch 2700, loss[loss=0.2532, ctc_loss=0.1333, cr_loss=0.3584, attn_decoder_loss=0.2585, over 29536.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1337, cr_loss=0.3774, attn_decoder_loss=0.2502, over 5794354.89 frames. ], batch size: 87, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:52:08,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=390900.0, ans=0.05 +2024-09-18 06:52:10,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=390900.0, ans=0.025 +2024-09-18 06:52:28,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=390940.0, ans=0.0 +2024-09-18 06:52:29,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.04 vs. limit=22.5 +2024-09-18 06:53:06,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=391060.0, ans=0.0 +2024-09-18 06:53:17,094 INFO [train.py:1198] (0/2) Epoch 22, batch 2750, loss[loss=0.2333, ctc_loss=0.1312, cr_loss=0.367, attn_decoder_loss=0.2365, over 29531.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.133, cr_loss=0.3761, attn_decoder_loss=0.2489, over 5793863.10 frames. ], batch size: 75, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:53:37,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=391140.0, ans=0.0 +2024-09-18 06:53:49,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=391180.0, ans=0.0 +2024-09-18 06:53:54,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=391180.0, ans=0.1 +2024-09-18 06:54:26,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=391260.0, ans=0.1 +2024-09-18 06:54:29,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=391260.0, ans=0.125 +2024-09-18 06:54:34,213 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.791e+01 9.407e+01 1.009e+02 2.763e+02, threshold=1.881e+02, percent-clipped=2.0 +2024-09-18 06:54:35,685 INFO [train.py:1198] (0/2) Epoch 22, batch 2800, loss[loss=0.2779, ctc_loss=0.1767, cr_loss=0.4262, attn_decoder_loss=0.2797, over 20368.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1332, cr_loss=0.3761, attn_decoder_loss=0.249, over 5775664.38 frames. ], batch size: 211, lr: 5.04e-03, grad_scale: 16.0 +2024-09-18 06:54:57,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=391340.0, ans=0.1 +2024-09-18 06:55:00,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=391340.0, ans=0.0 +2024-09-18 06:55:30,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=391420.0, ans=0.125 +2024-09-18 06:55:35,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=391460.0, ans=0.07 +2024-09-18 06:55:45,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=391460.0, ans=0.0 +2024-09-18 06:55:51,525 INFO [train.py:1198] (0/2) Epoch 22, batch 2850, loss[loss=0.2442, ctc_loss=0.1359, cr_loss=0.3949, attn_decoder_loss=0.2474, over 29520.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1333, cr_loss=0.3762, attn_decoder_loss=0.2494, over 5761859.23 frames. ], batch size: 77, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:56:13,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=391540.0, ans=0.125 +2024-09-18 06:56:35,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=391580.0, ans=0.0 +2024-09-18 06:56:42,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=391620.0, ans=0.0 +2024-09-18 06:56:51,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=391620.0, ans=0.125 +2024-09-18 06:56:57,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=391660.0, ans=0.0 +2024-09-18 06:57:03,895 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.22 vs. limit=22.5 +2024-09-18 06:57:06,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=391660.0, ans=0.125 +2024-09-18 06:57:09,049 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.934e+01 9.738e+01 1.096e+02 2.741e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-18 06:57:09,071 INFO [train.py:1198] (0/2) Epoch 22, batch 2900, loss[loss=0.2354, ctc_loss=0.1238, cr_loss=0.374, attn_decoder_loss=0.2395, over 29427.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1336, cr_loss=0.3776, attn_decoder_loss=0.2502, over 5787904.04 frames. ], batch size: 79, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:57:40,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=391780.0, ans=0.125 +2024-09-18 06:57:42,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=391780.0, ans=0.025 +2024-09-18 06:57:52,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten.whitening_limit, batch_count=391780.0, ans=15.0 +2024-09-18 06:57:55,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=391820.0, ans=0.125 +2024-09-18 06:58:22,722 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:58:27,107 INFO [train.py:1198] (0/2) Epoch 22, batch 2950, loss[loss=0.2407, ctc_loss=0.1296, cr_loss=0.3705, attn_decoder_loss=0.2448, over 29546.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1326, cr_loss=0.3755, attn_decoder_loss=0.2487, over 5782191.72 frames. ], batch size: 75, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:58:30,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=391900.0, ans=0.015 +2024-09-18 06:58:47,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.73 vs. limit=15.0 +2024-09-18 06:59:06,796 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=7.49 vs. limit=10.0 +2024-09-18 06:59:15,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=392020.0, ans=0.2 +2024-09-18 06:59:31,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=392060.0, ans=0.0 +2024-09-18 06:59:43,571 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.512e+01 8.926e+01 9.722e+01 3.359e+02, threshold=1.785e+02, percent-clipped=2.0 +2024-09-18 06:59:43,593 INFO [train.py:1198] (0/2) Epoch 22, batch 3000, loss[loss=0.2505, ctc_loss=0.1362, cr_loss=0.4077, attn_decoder_loss=0.2542, over 29769.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1332, cr_loss=0.3767, attn_decoder_loss=0.2491, over 5783346.98 frames. ], batch size: 81, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:59:43,594 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 06:59:51,655 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.5.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.3017, 4.2715, 3.7968, 4.2151], device='cuda:0') +2024-09-18 07:00:03,080 INFO [train.py:1230] (0/2) Epoch 22, validation: loss=0.2118, ctc_loss=0.03901, cr_loss=5.241e-15, attn_decoder_loss=0.231, over 944034.00 frames. +2024-09-18 07:00:03,080 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 07:00:14,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=392100.0, ans=0.125 +2024-09-18 07:00:30,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=392140.0, ans=0.125 +2024-09-18 07:00:31,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=392140.0, ans=0.0 +2024-09-18 07:01:06,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=392260.0, ans=0.125 +2024-09-18 07:01:21,517 INFO [train.py:1198] (0/2) Epoch 22, batch 3050, loss[loss=0.236, ctc_loss=0.1256, cr_loss=0.3484, attn_decoder_loss=0.2405, over 29537.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1337, cr_loss=0.3773, attn_decoder_loss=0.2497, over 5776725.81 frames. ], batch size: 76, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:01:50,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=392380.0, ans=0.0 +2024-09-18 07:01:52,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=392380.0, ans=0.2 +2024-09-18 07:02:01,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.07 vs. limit=12.0 +2024-09-18 07:02:12,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=15.0 +2024-09-18 07:02:14,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=392420.0, ans=0.125 +2024-09-18 07:02:17,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=392420.0, ans=0.0 +2024-09-18 07:02:20,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=392460.0, ans=0.5 +2024-09-18 07:02:21,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.91 vs. limit=15.0 +2024-09-18 07:02:22,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=392460.0, ans=0.0 +2024-09-18 07:02:29,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=392460.0, ans=0.125 +2024-09-18 07:02:31,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=392460.0, ans=0.2 +2024-09-18 07:02:37,019 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.586e+01 8.738e+01 9.227e+01 9.918e+01 5.288e+02, threshold=1.845e+02, percent-clipped=2.0 +2024-09-18 07:02:37,041 INFO [train.py:1198] (0/2) Epoch 22, batch 3100, loss[loss=0.266, ctc_loss=0.1519, cr_loss=0.4075, attn_decoder_loss=0.2696, over 29299.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1339, cr_loss=0.3775, attn_decoder_loss=0.2495, over 5777160.55 frames. ], batch size: 100, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:02:57,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=392540.0, ans=0.05 +2024-09-18 07:02:58,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=392540.0, ans=0.125 +2024-09-18 07:03:00,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.75 vs. limit=10.0 +2024-09-18 07:03:10,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=392580.0, ans=0.2 +2024-09-18 07:03:46,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=392660.0, ans=0.0 +2024-09-18 07:03:55,333 INFO [train.py:1198] (0/2) Epoch 22, batch 3150, loss[loss=0.2581, ctc_loss=0.1336, cr_loss=0.3768, attn_decoder_loss=0.2635, over 28794.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1339, cr_loss=0.3779, attn_decoder_loss=0.2493, over 5783534.18 frames. ], batch size: 104, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:04:10,977 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:04:12,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=392740.0, ans=0.125 +2024-09-18 07:04:20,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=392740.0, ans=0.2 +2024-09-18 07:04:42,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.94 vs. limit=15.0 +2024-09-18 07:04:44,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=392820.0, ans=0.025 +2024-09-18 07:04:50,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=392820.0, ans=0.09899494936611666 +2024-09-18 07:05:09,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=392860.0, ans=0.125 +2024-09-18 07:05:13,334 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.207e+01 8.635e+01 9.167e+01 9.821e+01 1.751e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 07:05:13,356 INFO [train.py:1198] (0/2) Epoch 22, batch 3200, loss[loss=0.2448, ctc_loss=0.1305, cr_loss=0.3869, attn_decoder_loss=0.2489, over 29430.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1331, cr_loss=0.3763, attn_decoder_loss=0.2485, over 5793742.32 frames. ], batch size: 79, lr: 5.03e-03, grad_scale: 16.0 +2024-09-18 07:05:30,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.60 vs. limit=15.0 +2024-09-18 07:05:38,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=392940.0, ans=0.0 +2024-09-18 07:05:43,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=392980.0, ans=0.025 +2024-09-18 07:05:44,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.98 vs. limit=22.5 +2024-09-18 07:05:47,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=392980.0, ans=0.125 +2024-09-18 07:05:49,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=392980.0, ans=0.07 +2024-09-18 07:06:15,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=393060.0, ans=0.0 +2024-09-18 07:06:24,031 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=15.0 +2024-09-18 07:06:28,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.32 vs. limit=15.0 +2024-09-18 07:06:29,127 INFO [train.py:1198] (0/2) Epoch 22, batch 3250, loss[loss=0.2494, ctc_loss=0.1282, cr_loss=0.3808, attn_decoder_loss=0.2544, over 29685.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1332, cr_loss=0.3766, attn_decoder_loss=0.2488, over 5799315.35 frames. ], batch size: 84, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:06:44,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=393140.0, ans=0.0 +2024-09-18 07:06:52,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=393140.0, ans=0.0 +2024-09-18 07:07:05,635 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:07:07,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=393180.0, ans=0.125 +2024-09-18 07:07:17,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=393220.0, ans=0.125 +2024-09-18 07:07:17,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=393220.0, ans=0.125 +2024-09-18 07:07:26,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.05 vs. limit=15.0 +2024-09-18 07:07:26,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=393220.0, ans=0.0 +2024-09-18 07:07:31,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=393260.0, ans=0.125 +2024-09-18 07:07:33,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=393260.0, ans=0.0 +2024-09-18 07:07:47,130 INFO [train.py:1198] (0/2) Epoch 22, batch 3300, loss[loss=0.2536, ctc_loss=0.1432, cr_loss=0.3902, attn_decoder_loss=0.2572, over 28543.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1324, cr_loss=0.3748, attn_decoder_loss=0.2474, over 5797414.32 frames. ], batch size: 112, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:07:48,691 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.576e+01 9.104e+01 9.607e+01 2.025e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 07:07:54,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=393300.0, ans=0.125 +2024-09-18 07:07:56,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=393300.0, ans=0.1 +2024-09-18 07:08:07,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=393340.0, ans=0.125 +2024-09-18 07:08:15,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=393380.0, ans=0.125 +2024-09-18 07:08:19,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=393380.0, ans=0.125 +2024-09-18 07:08:44,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.87 vs. limit=22.5 +2024-09-18 07:08:46,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=393460.0, ans=0.125 +2024-09-18 07:08:46,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=393460.0, ans=0.125 +2024-09-18 07:09:04,459 INFO [train.py:1198] (0/2) Epoch 22, batch 3350, loss[loss=0.2585, ctc_loss=0.1399, cr_loss=0.3875, attn_decoder_loss=0.2631, over 28779.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.133, cr_loss=0.3752, attn_decoder_loss=0.2482, over 5775421.47 frames. ], batch size: 104, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:09:21,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=393540.0, ans=0.05 +2024-09-18 07:09:30,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=393540.0, ans=0.0 +2024-09-18 07:09:33,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=393580.0, ans=0.09899494936611666 +2024-09-18 07:09:40,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.57 vs. limit=22.5 +2024-09-18 07:09:41,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=393580.0, ans=0.125 +2024-09-18 07:09:49,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=393620.0, ans=0.0 +2024-09-18 07:09:53,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=393620.0, ans=0.0 +2024-09-18 07:10:20,923 INFO [train.py:1198] (0/2) Epoch 22, batch 3400, loss[loss=0.2113, ctc_loss=0.1101, cr_loss=0.3183, attn_decoder_loss=0.2155, over 29342.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.133, cr_loss=0.3748, attn_decoder_loss=0.2481, over 5766593.11 frames. ], batch size: 67, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:10:22,302 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.673e+01 9.256e+01 9.754e+01 2.312e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 07:10:30,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=393700.0, ans=0.125 +2024-09-18 07:10:31,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=393700.0, ans=0.1 +2024-09-18 07:11:07,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=393820.0, ans=0.035 +2024-09-18 07:11:08,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=393820.0, ans=0.0 +2024-09-18 07:11:14,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=393820.0, ans=0.2 +2024-09-18 07:11:18,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.37 vs. limit=10.0 +2024-09-18 07:11:37,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=393900.0, ans=0.1 +2024-09-18 07:11:38,638 INFO [train.py:1198] (0/2) Epoch 22, batch 3450, loss[loss=0.25, ctc_loss=0.1316, cr_loss=0.3732, attn_decoder_loss=0.2549, over 28321.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.133, cr_loss=0.3748, attn_decoder_loss=0.2484, over 5774615.23 frames. ], batch size: 111, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:12:00,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=393940.0, ans=0.0 +2024-09-18 07:12:05,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.52 vs. limit=22.5 +2024-09-18 07:12:08,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.44 vs. limit=15.0 +2024-09-18 07:12:17,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=393980.0, ans=0.125 +2024-09-18 07:12:36,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=394020.0, ans=0.025 +2024-09-18 07:12:56,515 INFO [train.py:1198] (0/2) Epoch 22, batch 3500, loss[loss=0.2222, ctc_loss=0.1188, cr_loss=0.3558, attn_decoder_loss=0.2258, over 29318.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1329, cr_loss=0.3749, attn_decoder_loss=0.2482, over 5777030.80 frames. ], batch size: 71, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:12:58,047 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.509e+01 8.992e+01 9.710e+01 6.035e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-18 07:12:58,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=394100.0, ans=0.1 +2024-09-18 07:13:04,582 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:13:07,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=394100.0, ans=0.125 +2024-09-18 07:13:31,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=394180.0, ans=0.2 +2024-09-18 07:13:53,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=394220.0, ans=0.0 +2024-09-18 07:13:55,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=394260.0, ans=0.125 +2024-09-18 07:14:11,193 INFO [train.py:1198] (0/2) Epoch 22, batch 3550, loss[loss=0.2501, ctc_loss=0.1277, cr_loss=0.3708, attn_decoder_loss=0.2555, over 29703.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1323, cr_loss=0.3743, attn_decoder_loss=0.2481, over 5782829.52 frames. ], batch size: 89, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:14:12,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=394300.0, ans=0.125 +2024-09-18 07:14:18,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=394300.0, ans=0.1 +2024-09-18 07:14:39,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=394380.0, ans=0.125 +2024-09-18 07:14:48,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.34 vs. limit=22.5 +2024-09-18 07:14:59,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=394420.0, ans=0.0 +2024-09-18 07:15:11,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=394460.0, ans=0.0 +2024-09-18 07:15:24,735 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:15:25,910 INFO [train.py:1198] (0/2) Epoch 22, batch 3600, loss[loss=0.2359, ctc_loss=0.1305, cr_loss=0.3794, attn_decoder_loss=0.2392, over 29501.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1323, cr_loss=0.3747, attn_decoder_loss=0.2481, over 5791977.58 frames. ], batch size: 77, lr: 5.02e-03, grad_scale: 16.0 +2024-09-18 07:15:27,409 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.441e+01 8.945e+01 9.412e+01 1.487e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 07:16:08,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=394580.0, ans=0.0 +2024-09-18 07:16:40,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=394700.0, ans=0.125 +2024-09-18 07:16:42,262 INFO [train.py:1198] (0/2) Epoch 22, batch 3650, loss[loss=0.2608, ctc_loss=0.1449, cr_loss=0.4032, attn_decoder_loss=0.2647, over 29522.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1322, cr_loss=0.3745, attn_decoder_loss=0.2478, over 5793274.24 frames. ], batch size: 90, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:17:39,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.88 vs. limit=15.0 +2024-09-18 07:17:40,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=394860.0, ans=0.125 +2024-09-18 07:17:46,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=394860.0, ans=0.0 +2024-09-18 07:17:56,477 INFO [train.py:1198] (0/2) Epoch 22, batch 3700, loss[loss=0.2552, ctc_loss=0.1397, cr_loss=0.3929, attn_decoder_loss=0.2593, over 29714.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1316, cr_loss=0.3742, attn_decoder_loss=0.2476, over 5803053.40 frames. ], batch size: 84, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:17:59,516 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.466e+01 8.986e+01 9.824e+01 1.367e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 07:18:10,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=394940.0, ans=0.025 +2024-09-18 07:18:11,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=394940.0, ans=0.125 +2024-09-18 07:18:22,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=394940.0, ans=0.1 +2024-09-18 07:18:22,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=394940.0, ans=0.2 +2024-09-18 07:18:35,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=394980.0, ans=0.2 +2024-09-18 07:19:12,625 INFO [train.py:1198] (0/2) Epoch 22, batch 3750, loss[loss=0.2206, ctc_loss=0.1252, cr_loss=0.3682, attn_decoder_loss=0.223, over 29339.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1322, cr_loss=0.3752, attn_decoder_loss=0.2478, over 5806760.96 frames. ], batch size: 67, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:19:26,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=395140.0, ans=0.125 +2024-09-18 07:19:54,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=395180.0, ans=0.125 +2024-09-18 07:19:56,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=395220.0, ans=0.1 +2024-09-18 07:20:05,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=395220.0, ans=0.2 +2024-09-18 07:20:07,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=395220.0, ans=0.125 +2024-09-18 07:20:08,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=395220.0, ans=0.025 +2024-09-18 07:20:19,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=395260.0, ans=0.125 +2024-09-18 07:20:20,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=395260.0, ans=0.05 +2024-09-18 07:20:26,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=395300.0, ans=0.125 +2024-09-18 07:20:27,749 INFO [train.py:1198] (0/2) Epoch 22, batch 3800, loss[loss=0.2571, ctc_loss=0.1359, cr_loss=0.3944, attn_decoder_loss=0.2618, over 29636.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1316, cr_loss=0.3739, attn_decoder_loss=0.2472, over 5797279.93 frames. ], batch size: 86, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:20:30,688 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.441e+01 9.008e+01 9.541e+01 1.561e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-18 07:20:59,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=395380.0, ans=10.0 +2024-09-18 07:20:59,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=395380.0, ans=0.0 +2024-09-18 07:21:18,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=395420.0, ans=0.0 +2024-09-18 07:21:25,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=395460.0, ans=0.1 +2024-09-18 07:21:41,882 INFO [train.py:1198] (0/2) Epoch 22, batch 3850, loss[loss=0.2642, ctc_loss=0.1515, cr_loss=0.4257, attn_decoder_loss=0.2673, over 29305.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1316, cr_loss=0.3741, attn_decoder_loss=0.2475, over 5812245.84 frames. ], batch size: 100, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:21:49,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=395500.0, ans=0.015 +2024-09-18 07:21:55,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.57 vs. limit=15.0 +2024-09-18 07:21:58,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=395540.0, ans=0.0 +2024-09-18 07:22:27,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.24 vs. limit=22.5 +2024-09-18 07:22:49,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=395660.0, ans=0.125 +2024-09-18 07:22:57,687 INFO [train.py:1198] (0/2) Epoch 22, batch 3900, loss[loss=0.2478, ctc_loss=0.1298, cr_loss=0.3667, attn_decoder_loss=0.2527, over 29615.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1318, cr_loss=0.3743, attn_decoder_loss=0.2479, over 5816353.60 frames. ], batch size: 86, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:23:00,754 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.669e+01 9.089e+01 9.620e+01 1.531e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 07:23:02,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=395700.0, ans=0.025 +2024-09-18 07:23:14,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=395740.0, ans=0.025 +2024-09-18 07:23:39,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=395780.0, ans=0.07 +2024-09-18 07:23:39,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=395780.0, ans=0.125 +2024-09-18 07:23:57,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.42 vs. limit=15.0 +2024-09-18 07:24:04,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=395860.0, ans=0.09899494936611666 +2024-09-18 07:24:11,541 INFO [train.py:1198] (0/2) Epoch 22, batch 3950, loss[loss=0.2579, ctc_loss=0.1431, cr_loss=0.3862, attn_decoder_loss=0.262, over 29497.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1316, cr_loss=0.3748, attn_decoder_loss=0.248, over 5835824.65 frames. ], batch size: 97, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:24:15,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=395900.0, ans=0.125 +2024-09-18 07:25:01,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=396020.0, ans=0.1 +2024-09-18 07:25:02,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=396020.0, ans=0.125 +2024-09-18 07:25:06,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=396020.0, ans=0.035 +2024-09-18 07:25:15,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=396060.0, ans=0.125 +2024-09-18 07:25:17,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=396060.0, ans=0.2 +2024-09-18 07:25:27,277 INFO [train.py:1198] (0/2) Epoch 22, batch 4000, loss[loss=0.2289, ctc_loss=0.1226, cr_loss=0.3475, attn_decoder_loss=0.233, over 29553.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1324, cr_loss=0.3757, attn_decoder_loss=0.2483, over 5813026.98 frames. ], batch size: 74, lr: 5.01e-03, grad_scale: 16.0 +2024-09-18 07:25:30,130 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.472e+01 8.530e+01 8.952e+01 9.583e+01 2.635e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-18 07:25:46,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=396140.0, ans=0.05 +2024-09-18 07:25:58,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=396180.0, ans=0.1 +2024-09-18 07:26:06,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=396180.0, ans=0.0 +2024-09-18 07:26:10,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=396220.0, ans=0.125 +2024-09-18 07:26:20,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=396220.0, ans=0.125 +2024-09-18 07:26:23,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-18 07:26:29,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-18 07:26:32,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=396260.0, ans=0.2 +2024-09-18 07:26:34,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=396260.0, ans=0.125 +2024-09-18 07:26:41,541 INFO [train.py:1198] (0/2) Epoch 22, batch 4050, loss[loss=0.2746, ctc_loss=0.1727, cr_loss=0.4072, attn_decoder_loss=0.2769, over 19797.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1321, cr_loss=0.3747, attn_decoder_loss=0.2481, over 5796783.07 frames. ], batch size: 209, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:26:51,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=396300.0, ans=0.125 +2024-09-18 07:26:52,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=396300.0, ans=0.125 +2024-09-18 07:27:10,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=396380.0, ans=0.125 +2024-09-18 07:27:35,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=396420.0, ans=0.0 +2024-09-18 07:27:46,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=396460.0, ans=0.1 +2024-09-18 07:27:56,314 INFO [train.py:1198] (0/2) Epoch 22, batch 4100, loss[loss=0.2613, ctc_loss=0.1447, cr_loss=0.3919, attn_decoder_loss=0.2656, over 29469.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1326, cr_loss=0.3756, attn_decoder_loss=0.2484, over 5792163.96 frames. ], batch size: 90, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:28:00,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.483e+01 8.697e+01 9.214e+01 1.008e+02 3.653e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-18 07:28:03,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=396500.0, ans=0.0 +2024-09-18 07:28:04,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=396500.0, ans=0.0 +2024-09-18 07:28:34,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=396580.0, ans=0.09899494936611666 +2024-09-18 07:28:39,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=396620.0, ans=0.125 +2024-09-18 07:28:42,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.28 vs. limit=22.5 +2024-09-18 07:28:45,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.86 vs. limit=15.0 +2024-09-18 07:28:53,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.85 vs. limit=15.0 +2024-09-18 07:29:11,101 INFO [train.py:1198] (0/2) Epoch 22, batch 4150, loss[loss=0.2399, ctc_loss=0.1331, cr_loss=0.3785, attn_decoder_loss=0.2434, over 29501.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1323, cr_loss=0.3754, attn_decoder_loss=0.2479, over 5797913.39 frames. ], batch size: 77, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:29:11,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=396700.0, ans=0.1 +2024-09-18 07:29:15,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=396700.0, ans=0.5 +2024-09-18 07:29:32,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=396740.0, ans=15.0 +2024-09-18 07:29:42,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=396780.0, ans=0.1 +2024-09-18 07:29:52,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.04 vs. limit=15.0 +2024-09-18 07:30:25,223 INFO [train.py:1198] (0/2) Epoch 22, batch 4200, loss[loss=0.2596, ctc_loss=0.1416, cr_loss=0.3861, attn_decoder_loss=0.2642, over 29523.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1326, cr_loss=0.376, attn_decoder_loss=0.2484, over 5799342.44 frames. ], batch size: 90, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:30:29,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.447e+01 9.085e+01 9.593e+01 1.747e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-18 07:30:31,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=396900.0, ans=0.125 +2024-09-18 07:30:43,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=396940.0, ans=0.5 +2024-09-18 07:31:07,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.01 vs. limit=15.0 +2024-09-18 07:31:32,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=397060.0, ans=0.0 +2024-09-18 07:31:39,886 INFO [train.py:1198] (0/2) Epoch 22, batch 4250, loss[loss=0.2233, ctc_loss=0.1108, cr_loss=0.3368, attn_decoder_loss=0.2283, over 29485.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1322, cr_loss=0.3754, attn_decoder_loss=0.2485, over 5805171.56 frames. ], batch size: 74, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:31:48,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=397100.0, ans=0.125 +2024-09-18 07:31:57,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=397140.0, ans=0.0 +2024-09-18 07:32:21,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=397180.0, ans=0.0 +2024-09-18 07:32:22,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=397220.0, ans=0.2 +2024-09-18 07:32:53,988 INFO [train.py:1198] (0/2) Epoch 22, batch 4300, loss[loss=0.2593, ctc_loss=0.1448, cr_loss=0.3904, attn_decoder_loss=0.2634, over 29560.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1318, cr_loss=0.3742, attn_decoder_loss=0.2485, over 5794848.07 frames. ], batch size: 87, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:32:58,452 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.737e+01 9.479e+01 1.036e+02 1.602e+02, threshold=1.896e+02, percent-clipped=0.0 +2024-09-18 07:32:59,274 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.14 vs. limit=22.5 +2024-09-18 07:33:03,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=397300.0, ans=0.025 +2024-09-18 07:33:09,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=397340.0, ans=0.0 +2024-09-18 07:33:17,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=397340.0, ans=0.0 +2024-09-18 07:33:45,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=397420.0, ans=0.0 +2024-09-18 07:33:47,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=397420.0, ans=0.0 +2024-09-18 07:33:57,909 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.92 vs. limit=22.5 +2024-09-18 07:33:58,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=397460.0, ans=0.0 +2024-09-18 07:34:03,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=397460.0, ans=0.0 +2024-09-18 07:34:07,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=397500.0, ans=0.125 +2024-09-18 07:34:08,149 INFO [train.py:1198] (0/2) Epoch 22, batch 4350, loss[loss=0.2643, ctc_loss=0.147, cr_loss=0.4032, attn_decoder_loss=0.2684, over 29424.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1346, cr_loss=0.3798, attn_decoder_loss=0.2519, over 5797519.36 frames. ], batch size: 97, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:34:11,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=397500.0, ans=0.2 +2024-09-18 07:34:17,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=397500.0, ans=0.0 +2024-09-18 07:34:26,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.51 vs. limit=15.0 +2024-09-18 07:34:33,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=397540.0, ans=0.125 +2024-09-18 07:34:50,542 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.08 vs. limit=22.5 +2024-09-18 07:35:03,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.64 vs. limit=15.0 +2024-09-18 07:35:21,398 INFO [train.py:1198] (0/2) Epoch 22, batch 4400, loss[loss=0.2665, ctc_loss=0.1553, cr_loss=0.4323, attn_decoder_loss=0.2693, over 27482.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1367, cr_loss=0.384, attn_decoder_loss=0.2544, over 5768049.45 frames. ], batch size: 124, lr: 5.00e-03, grad_scale: 16.0 +2024-09-18 07:35:25,704 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.295e+01 9.019e+01 9.432e+01 1.021e+02 4.096e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-18 07:35:35,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.14 vs. limit=6.0 +2024-09-18 07:35:48,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=397740.0, ans=0.015 +2024-09-18 07:36:04,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=397820.0, ans=0.0 +2024-09-18 07:36:07,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=397820.0, ans=0.0 +2024-09-18 07:36:18,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=397820.0, ans=0.125 +2024-09-18 07:36:36,342 INFO [train.py:1198] (0/2) Epoch 22, batch 4450, loss[loss=0.2671, ctc_loss=0.1668, cr_loss=0.3972, attn_decoder_loss=0.2695, over 20594.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1409, cr_loss=0.389, attn_decoder_loss=0.2567, over 5572868.78 frames. ], batch size: 209, lr: 5.00e-03, grad_scale: 8.0 +2024-09-18 07:36:36,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=397900.0, ans=0.125 +2024-09-18 07:36:41,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=397900.0, ans=0.0 +2024-09-18 07:36:45,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=397900.0, ans=0.025 +2024-09-18 07:36:51,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=397940.0, ans=0.025 +2024-09-18 07:37:19,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=397980.0, ans=0.125 +2024-09-18 07:37:20,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=398020.0, ans=0.2 +2024-09-18 07:37:44,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=398060.0, ans=0.0 +2024-09-18 07:37:51,783 INFO [train.py:1198] (0/2) Epoch 22, batch 4500, loss[loss=0.2693, ctc_loss=0.1663, cr_loss=0.4054, attn_decoder_loss=0.2717, over 19576.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1453, cr_loss=0.3912, attn_decoder_loss=0.259, over 5233750.18 frames. ], batch size: 210, lr: 5.00e-03, grad_scale: 8.0 +2024-09-18 07:37:53,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=398100.0, ans=0.125 +2024-09-18 07:37:57,651 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.580e+01 1.014e+02 1.103e+02 1.223e+02 2.065e+02, threshold=2.205e+02, percent-clipped=1.0 +2024-09-18 07:37:59,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_na.min_abs, batch_count=398100.0, ans=0.02 +2024-09-18 07:38:01,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.69 vs. limit=22.5 +2024-09-18 07:38:08,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.13 vs. limit=15.0 +2024-09-18 07:38:10,881 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.81 vs. limit=8.0 +2024-09-18 07:38:20,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=398180.0, ans=0.125 +2024-09-18 07:38:29,076 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-22.pt +2024-09-18 07:39:14,512 INFO [train.py:1198] (0/2) Epoch 23, batch 0, loss[loss=0.225, ctc_loss=0.1159, cr_loss=0.3369, attn_decoder_loss=0.2296, over 29596.00 frames. ], tot_loss[loss=0.225, ctc_loss=0.1159, cr_loss=0.3369, attn_decoder_loss=0.2296, over 29596.00 frames. ], batch size: 73, lr: 4.89e-03, grad_scale: 16.0 +2024-09-18 07:39:14,513 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 07:39:33,043 INFO [train.py:1230] (0/2) Epoch 23, validation: loss=0.212, ctc_loss=0.03823, cr_loss=5.578e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 07:39:33,044 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 07:39:39,675 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-18 07:39:58,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=398240.0, ans=0.1 +2024-09-18 07:40:27,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=398320.0, ans=0.0 +2024-09-18 07:40:32,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=398360.0, ans=0.125 +2024-09-18 07:40:49,048 INFO [train.py:1198] (0/2) Epoch 23, batch 50, loss[loss=0.2233, ctc_loss=0.1242, cr_loss=0.3615, attn_decoder_loss=0.2262, over 29432.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1355, cr_loss=0.3819, attn_decoder_loss=0.25, over 1266805.41 frames. ], batch size: 70, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:41:05,119 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:41:38,768 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.725e+01 8.809e+01 9.782e+01 1.101e+02 2.337e+02, threshold=1.956e+02, percent-clipped=1.0 +2024-09-18 07:41:52,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.93 vs. limit=22.5 +2024-09-18 07:41:54,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=398560.0, ans=0.2 +2024-09-18 07:41:57,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=398560.0, ans=0.125 +2024-09-18 07:42:06,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=398560.0, ans=0.0 +2024-09-18 07:42:08,982 INFO [train.py:1198] (0/2) Epoch 23, batch 100, loss[loss=0.2427, ctc_loss=0.1377, cr_loss=0.3851, attn_decoder_loss=0.2458, over 29526.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1366, cr_loss=0.3824, attn_decoder_loss=0.2515, over 2252552.34 frames. ], batch size: 76, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:42:36,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=398640.0, ans=0.125 +2024-09-18 07:43:08,046 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.43 vs. limit=15.0 +2024-09-18 07:43:19,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=398760.0, ans=0.2 +2024-09-18 07:43:22,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=398800.0, ans=0.0 +2024-09-18 07:43:23,648 INFO [train.py:1198] (0/2) Epoch 23, batch 150, loss[loss=0.2189, ctc_loss=0.1141, cr_loss=0.3509, attn_decoder_loss=0.2228, over 29417.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1336, cr_loss=0.3773, attn_decoder_loss=0.2492, over 3047785.53 frames. ], batch size: 70, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:43:23,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=398800.0, ans=0.0 +2024-09-18 07:43:34,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=398800.0, ans=0.0 +2024-09-18 07:43:46,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=398840.0, ans=0.125 +2024-09-18 07:43:56,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.88 vs. limit=22.5 +2024-09-18 07:44:08,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.84 vs. limit=6.0 +2024-09-18 07:44:08,890 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.443e+01 9.031e+01 9.523e+01 1.308e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 07:44:12,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=398920.0, ans=0.1 +2024-09-18 07:44:21,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=398920.0, ans=0.0 +2024-09-18 07:44:31,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.73 vs. limit=12.0 +2024-09-18 07:44:38,861 INFO [train.py:1198] (0/2) Epoch 23, batch 200, loss[loss=0.2604, ctc_loss=0.1421, cr_loss=0.3946, attn_decoder_loss=0.2648, over 27267.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1323, cr_loss=0.3762, attn_decoder_loss=0.2482, over 3658162.74 frames. ], batch size: 125, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:45:05,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=399040.0, ans=10.0 +2024-09-18 07:45:07,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=399040.0, ans=0.125 +2024-09-18 07:45:18,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=399080.0, ans=0.125 +2024-09-18 07:45:27,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=399120.0, ans=0.1 +2024-09-18 07:45:43,793 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=10.92 vs. limit=15.0 +2024-09-18 07:45:45,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=399160.0, ans=0.2 +2024-09-18 07:45:59,853 INFO [train.py:1198] (0/2) Epoch 23, batch 250, loss[loss=0.2441, ctc_loss=0.1306, cr_loss=0.3807, attn_decoder_loss=0.2483, over 29216.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.132, cr_loss=0.3759, attn_decoder_loss=0.2478, over 4140112.02 frames. ], batch size: 100, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:46:01,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=399200.0, ans=0.5 +2024-09-18 07:46:03,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=399200.0, ans=0.125 +2024-09-18 07:46:05,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.82 vs. limit=8.0 +2024-09-18 07:46:18,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=399240.0, ans=0.125 +2024-09-18 07:46:21,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=399240.0, ans=0.0 +2024-09-18 07:46:22,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=399240.0, ans=0.1 +2024-09-18 07:46:32,521 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.28 vs. limit=15.0 +2024-09-18 07:46:45,299 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.537e+01 9.009e+01 9.547e+01 2.225e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 07:46:56,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=399320.0, ans=0.1 +2024-09-18 07:47:03,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=399360.0, ans=0.0 +2024-09-18 07:47:15,459 INFO [train.py:1198] (0/2) Epoch 23, batch 300, loss[loss=0.2628, ctc_loss=0.1405, cr_loss=0.4001, attn_decoder_loss=0.2674, over 29520.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1313, cr_loss=0.3743, attn_decoder_loss=0.2475, over 4509692.85 frames. ], batch size: 92, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:47:23,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=399400.0, ans=0.125 +2024-09-18 07:47:25,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.52 vs. limit=15.0 +2024-09-18 07:47:46,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=399480.0, ans=0.2 +2024-09-18 07:47:46,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=399480.0, ans=0.2 +2024-09-18 07:48:29,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=399600.0, ans=0.025 +2024-09-18 07:48:31,060 INFO [train.py:1198] (0/2) Epoch 23, batch 350, loss[loss=0.2212, ctc_loss=0.1132, cr_loss=0.3324, attn_decoder_loss=0.2258, over 29328.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.132, cr_loss=0.375, attn_decoder_loss=0.2482, over 4794541.37 frames. ], batch size: 71, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:48:37,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=399600.0, ans=0.025 +2024-09-18 07:49:01,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.46 vs. limit=15.0 +2024-09-18 07:49:19,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=399720.0, ans=0.1 +2024-09-18 07:49:20,802 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.416e+01 8.727e+01 9.232e+01 2.116e+02, threshold=1.745e+02, percent-clipped=2.0 +2024-09-18 07:49:22,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=399720.0, ans=0.125 +2024-09-18 07:49:27,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=399720.0, ans=0.125 +2024-09-18 07:49:28,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=399720.0, ans=0.09899494936611666 +2024-09-18 07:49:43,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=399760.0, ans=0.125 +2024-09-18 07:49:48,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=399760.0, ans=0.0 +2024-09-18 07:49:50,841 INFO [train.py:1198] (0/2) Epoch 23, batch 400, loss[loss=0.2498, ctc_loss=0.1312, cr_loss=0.3826, attn_decoder_loss=0.2544, over 29696.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.131, cr_loss=0.3733, attn_decoder_loss=0.2475, over 5024319.81 frames. ], batch size: 82, lr: 4.88e-03, grad_scale: 16.0 +2024-09-18 07:49:52,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=399800.0, ans=0.1 +2024-09-18 07:50:57,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.78 vs. limit=6.0 +2024-09-18 07:51:05,994 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-100000.pt +2024-09-18 07:51:14,608 INFO [train.py:1198] (0/2) Epoch 23, batch 450, loss[loss=0.2568, ctc_loss=0.1334, cr_loss=0.3993, attn_decoder_loss=0.2616, over 29690.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1314, cr_loss=0.3741, attn_decoder_loss=0.2481, over 5187304.74 frames. ], batch size: 83, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:51:17,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=400000.0, ans=0.125 +2024-09-18 07:51:17,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=400000.0, ans=0.125 +2024-09-18 07:51:34,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=400040.0, ans=0.0 +2024-09-18 07:51:58,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=400120.0, ans=0.125 +2024-09-18 07:52:01,483 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.450e+01 8.997e+01 9.501e+01 2.678e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-18 07:52:21,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=400160.0, ans=0.1 +2024-09-18 07:52:30,172 INFO [train.py:1198] (0/2) Epoch 23, batch 500, loss[loss=0.2565, ctc_loss=0.1384, cr_loss=0.3835, attn_decoder_loss=0.2611, over 29461.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.131, cr_loss=0.3736, attn_decoder_loss=0.2473, over 5330331.38 frames. ], batch size: 94, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:52:45,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=400240.0, ans=0.125 +2024-09-18 07:52:55,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=400240.0, ans=0.125 +2024-09-18 07:52:57,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.07 vs. limit=15.0 +2024-09-18 07:53:19,845 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.38 vs. limit=15.0 +2024-09-18 07:53:25,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=400320.0, ans=0.125 +2024-09-18 07:53:44,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=400360.0, ans=0.2 +2024-09-18 07:53:50,362 INFO [train.py:1198] (0/2) Epoch 23, batch 550, loss[loss=0.257, ctc_loss=0.1387, cr_loss=0.4059, attn_decoder_loss=0.2612, over 28763.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.131, cr_loss=0.3735, attn_decoder_loss=0.2473, over 5422777.38 frames. ], batch size: 104, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:53:58,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=400400.0, ans=0.0 +2024-09-18 07:54:05,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=400440.0, ans=0.125 +2024-09-18 07:54:10,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=400440.0, ans=0.0 +2024-09-18 07:54:37,034 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.525e+01 9.043e+01 9.907e+01 2.945e+02, threshold=1.809e+02, percent-clipped=3.0 +2024-09-18 07:55:05,779 INFO [train.py:1198] (0/2) Epoch 23, batch 600, loss[loss=0.2589, ctc_loss=0.141, cr_loss=0.3726, attn_decoder_loss=0.2637, over 29269.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1311, cr_loss=0.3742, attn_decoder_loss=0.2476, over 5510368.62 frames. ], batch size: 100, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:55:31,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=400640.0, ans=0.125 +2024-09-18 07:55:34,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=400680.0, ans=0.1 +2024-09-18 07:55:36,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=400680.0, ans=0.0 +2024-09-18 07:55:36,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=400680.0, ans=0.125 +2024-09-18 07:55:39,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=400680.0, ans=0.5 +2024-09-18 07:56:21,501 INFO [train.py:1198] (0/2) Epoch 23, batch 650, loss[loss=0.2479, ctc_loss=0.1275, cr_loss=0.3694, attn_decoder_loss=0.253, over 29778.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1304, cr_loss=0.373, attn_decoder_loss=0.247, over 5587446.78 frames. ], batch size: 81, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:56:32,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=400800.0, ans=0.125 +2024-09-18 07:56:37,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.31 vs. limit=15.0 +2024-09-18 07:56:47,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=400840.0, ans=0.0 +2024-09-18 07:56:48,084 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.48 vs. limit=22.5 +2024-09-18 07:57:07,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400880.0, ans=0.1 +2024-09-18 07:57:10,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.62 vs. limit=15.0 +2024-09-18 07:57:12,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.07 vs. limit=6.0 +2024-09-18 07:57:12,930 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.280e+01 8.571e+01 9.065e+01 9.710e+01 2.691e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-18 07:57:30,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.39 vs. limit=15.0 +2024-09-18 07:57:41,626 INFO [train.py:1198] (0/2) Epoch 23, batch 700, loss[loss=0.2354, ctc_loss=0.1244, cr_loss=0.3698, attn_decoder_loss=0.2395, over 29518.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1307, cr_loss=0.3734, attn_decoder_loss=0.2475, over 5637807.99 frames. ], batch size: 76, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:57:55,454 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:57:58,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=401040.0, ans=0.125 +2024-09-18 07:58:27,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=401120.0, ans=0.07 +2024-09-18 07:58:33,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=401120.0, ans=0.0 +2024-09-18 07:58:35,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=401120.0, ans=0.125 +2024-09-18 07:58:36,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=401120.0, ans=0.125 +2024-09-18 07:58:47,806 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.99 vs. limit=15.0 +2024-09-18 07:58:52,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=401160.0, ans=10.0 +2024-09-18 07:58:54,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=401160.0, ans=0.125 +2024-09-18 07:58:57,478 INFO [train.py:1198] (0/2) Epoch 23, batch 750, loss[loss=0.2552, ctc_loss=0.1385, cr_loss=0.3796, attn_decoder_loss=0.2598, over 29723.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1307, cr_loss=0.3729, attn_decoder_loss=0.2472, over 5676771.75 frames. ], batch size: 82, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:59:05,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=401200.0, ans=0.125 +2024-09-18 07:59:05,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.52 vs. limit=22.5 +2024-09-18 07:59:06,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=401200.0, ans=0.0 +2024-09-18 07:59:35,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=401280.0, ans=0.0 +2024-09-18 07:59:43,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=401320.0, ans=0.125 +2024-09-18 07:59:44,084 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.454e+01 8.911e+01 9.640e+01 3.418e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-18 08:00:04,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=401360.0, ans=0.125 +2024-09-18 08:00:12,898 INFO [train.py:1198] (0/2) Epoch 23, batch 800, loss[loss=0.2255, ctc_loss=0.1183, cr_loss=0.3594, attn_decoder_loss=0.2295, over 29639.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1308, cr_loss=0.3734, attn_decoder_loss=0.2472, over 5705433.12 frames. ], batch size: 73, lr: 4.87e-03, grad_scale: 16.0 +2024-09-18 08:00:17,675 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:00:21,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.45 vs. limit=10.0 +2024-09-18 08:01:09,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=401520.0, ans=0.07 +2024-09-18 08:01:15,198 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.30 vs. limit=15.0 +2024-09-18 08:01:18,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=401560.0, ans=0.0 +2024-09-18 08:01:30,985 INFO [train.py:1198] (0/2) Epoch 23, batch 850, loss[loss=0.2436, ctc_loss=0.1227, cr_loss=0.3555, attn_decoder_loss=0.2492, over 29720.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1303, cr_loss=0.3725, attn_decoder_loss=0.2469, over 5734237.32 frames. ], batch size: 89, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:01:32,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=401600.0, ans=0.125 +2024-09-18 08:01:34,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=401600.0, ans=0.125 +2024-09-18 08:01:41,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=401600.0, ans=0.1 +2024-09-18 08:01:56,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=401640.0, ans=0.125 +2024-09-18 08:02:18,871 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.183e+01 8.350e+01 8.947e+01 9.398e+01 1.136e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 08:02:31,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=401760.0, ans=0.0 +2024-09-18 08:02:40,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=401760.0, ans=0.0 +2024-09-18 08:02:46,367 INFO [train.py:1198] (0/2) Epoch 23, batch 900, loss[loss=0.2228, ctc_loss=0.1146, cr_loss=0.3316, attn_decoder_loss=0.2274, over 29598.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1312, cr_loss=0.3742, attn_decoder_loss=0.2474, over 5739863.91 frames. ], batch size: 73, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:02:48,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=401800.0, ans=0.5 +2024-09-18 08:03:31,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=401920.0, ans=0.125 +2024-09-18 08:03:37,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=401920.0, ans=0.2 +2024-09-18 08:03:39,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=401920.0, ans=0.125 +2024-09-18 08:03:46,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=401960.0, ans=0.125 +2024-09-18 08:04:01,327 INFO [train.py:1198] (0/2) Epoch 23, batch 950, loss[loss=0.2313, ctc_loss=0.1181, cr_loss=0.334, attn_decoder_loss=0.2365, over 29513.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1314, cr_loss=0.3737, attn_decoder_loss=0.2476, over 5741611.14 frames. ], batch size: 74, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:04:19,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=402040.0, ans=0.2 +2024-09-18 08:04:30,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=402080.0, ans=0.1 +2024-09-18 08:04:33,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.25 vs. limit=15.0 +2024-09-18 08:04:48,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=402080.0, ans=0.2 +2024-09-18 08:04:54,188 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.814e+01 9.447e+01 1.062e+02 2.466e+02, threshold=1.889e+02, percent-clipped=1.0 +2024-09-18 08:05:00,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=402120.0, ans=0.2 +2024-09-18 08:05:09,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=402160.0, ans=0.125 +2024-09-18 08:05:21,287 INFO [train.py:1198] (0/2) Epoch 23, batch 1000, loss[loss=0.235, ctc_loss=0.1299, cr_loss=0.3715, attn_decoder_loss=0.2385, over 29490.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1323, cr_loss=0.3755, attn_decoder_loss=0.2485, over 5735108.81 frames. ], batch size: 77, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:05:23,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=402200.0, ans=0.125 +2024-09-18 08:05:43,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=402240.0, ans=0.0 +2024-09-18 08:05:50,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=402280.0, ans=0.125 +2024-09-18 08:06:06,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=402320.0, ans=0.125 +2024-09-18 08:06:06,811 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.05 vs. limit=15.0 +2024-09-18 08:06:31,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=402360.0, ans=0.025 +2024-09-18 08:06:31,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.32 vs. limit=15.0 +2024-09-18 08:06:33,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=402360.0, ans=0.125 +2024-09-18 08:06:37,691 INFO [train.py:1198] (0/2) Epoch 23, batch 1050, loss[loss=0.2476, ctc_loss=0.1306, cr_loss=0.3837, attn_decoder_loss=0.2521, over 29663.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1317, cr_loss=0.3747, attn_decoder_loss=0.2478, over 5743650.84 frames. ], batch size: 85, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:06:51,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=402440.0, ans=0.2 +2024-09-18 08:07:19,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=402480.0, ans=0.1 +2024-09-18 08:07:19,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=402480.0, ans=0.025 +2024-09-18 08:07:26,535 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.610e+01 8.303e+01 8.731e+01 9.470e+01 1.420e+02, threshold=1.746e+02, percent-clipped=0.0 +2024-09-18 08:07:49,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.00 vs. limit=6.0 +2024-09-18 08:07:54,156 INFO [train.py:1198] (0/2) Epoch 23, batch 1100, loss[loss=0.2407, ctc_loss=0.1329, cr_loss=0.3728, attn_decoder_loss=0.2443, over 29462.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1312, cr_loss=0.3737, attn_decoder_loss=0.2474, over 5756094.58 frames. ], batch size: 78, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:08:01,854 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:08:13,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.51 vs. limit=22.5 +2024-09-18 08:08:27,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-18 08:08:32,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=402680.0, ans=0.2 +2024-09-18 08:08:56,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=402720.0, ans=0.0 +2024-09-18 08:09:11,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=402760.0, ans=0.125 +2024-09-18 08:09:14,480 INFO [train.py:1198] (0/2) Epoch 23, batch 1150, loss[loss=0.2311, ctc_loss=0.1164, cr_loss=0.3555, attn_decoder_loss=0.236, over 29471.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1313, cr_loss=0.3737, attn_decoder_loss=0.2474, over 5755449.67 frames. ], batch size: 78, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:09:21,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.32 vs. limit=15.0 +2024-09-18 08:09:40,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=402840.0, ans=0.125 +2024-09-18 08:10:03,216 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.173e+01 8.564e+01 9.109e+01 9.682e+01 1.953e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 08:10:03,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=402920.0, ans=0.0 +2024-09-18 08:10:17,202 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:10:30,401 INFO [train.py:1198] (0/2) Epoch 23, batch 1200, loss[loss=0.2643, ctc_loss=0.145, cr_loss=0.4138, attn_decoder_loss=0.2683, over 29683.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1318, cr_loss=0.3749, attn_decoder_loss=0.2483, over 5747716.77 frames. ], batch size: 85, lr: 4.86e-03, grad_scale: 16.0 +2024-09-18 08:10:44,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=403040.0, ans=0.0 +2024-09-18 08:10:51,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=403040.0, ans=0.125 +2024-09-18 08:11:02,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=403080.0, ans=0.125 +2024-09-18 08:11:16,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=403120.0, ans=0.125 +2024-09-18 08:11:20,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=403120.0, ans=0.025 +2024-09-18 08:11:23,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.51 vs. limit=15.0 +2024-09-18 08:11:33,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.47 vs. limit=10.0 +2024-09-18 08:11:43,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=403160.0, ans=0.1 +2024-09-18 08:11:46,535 INFO [train.py:1198] (0/2) Epoch 23, batch 1250, loss[loss=0.2563, ctc_loss=0.1429, cr_loss=0.3926, attn_decoder_loss=0.2601, over 29523.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1325, cr_loss=0.3766, attn_decoder_loss=0.2489, over 5775177.51 frames. ], batch size: 92, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:12:11,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=403240.0, ans=0.125 +2024-09-18 08:12:24,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.96 vs. limit=15.0 +2024-09-18 08:12:35,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.13 vs. limit=15.0 +2024-09-18 08:12:39,079 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.079e+01 8.243e+01 8.772e+01 9.696e+01 1.858e+02, threshold=1.754e+02, percent-clipped=1.0 +2024-09-18 08:12:53,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=403360.0, ans=0.09899494936611666 +2024-09-18 08:13:06,960 INFO [train.py:1198] (0/2) Epoch 23, batch 1300, loss[loss=0.2516, ctc_loss=0.1297, cr_loss=0.3569, attn_decoder_loss=0.2572, over 28160.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.132, cr_loss=0.3757, attn_decoder_loss=0.2482, over 5778215.38 frames. ], batch size: 111, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:13:14,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=403400.0, ans=0.0 +2024-09-18 08:13:16,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=403400.0, ans=0.125 +2024-09-18 08:13:32,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.74 vs. limit=22.5 +2024-09-18 08:13:36,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=403480.0, ans=0.125 +2024-09-18 08:13:43,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=403480.0, ans=0.1 +2024-09-18 08:13:48,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=403480.0, ans=0.125 +2024-09-18 08:13:51,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=403520.0, ans=0.1 +2024-09-18 08:13:56,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.40 vs. limit=6.0 +2024-09-18 08:13:59,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.08 vs. limit=12.0 +2024-09-18 08:14:22,782 INFO [train.py:1198] (0/2) Epoch 23, batch 1350, loss[loss=0.2387, ctc_loss=0.1159, cr_loss=0.3508, attn_decoder_loss=0.2446, over 29736.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1316, cr_loss=0.3753, attn_decoder_loss=0.2481, over 5796398.52 frames. ], batch size: 81, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:14:26,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.75 vs. limit=15.0 +2024-09-18 08:14:55,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=403680.0, ans=0.2 +2024-09-18 08:15:04,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=403680.0, ans=0.1 +2024-09-18 08:15:12,105 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.370e+01 8.788e+01 9.254e+01 1.206e+02, threshold=1.758e+02, percent-clipped=0.0 +2024-09-18 08:15:13,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=403720.0, ans=0.125 +2024-09-18 08:15:14,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=403720.0, ans=0.1 +2024-09-18 08:15:15,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=403720.0, ans=0.2 +2024-09-18 08:15:37,899 INFO [train.py:1198] (0/2) Epoch 23, batch 1400, loss[loss=0.2094, ctc_loss=0.1077, cr_loss=0.3285, attn_decoder_loss=0.2134, over 29575.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1314, cr_loss=0.3748, attn_decoder_loss=0.2481, over 5807181.51 frames. ], batch size: 69, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:15:38,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=403800.0, ans=0.0 +2024-09-18 08:15:45,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=403800.0, ans=0.125 +2024-09-18 08:15:51,777 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:16:52,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.38 vs. limit=22.5 +2024-09-18 08:16:58,245 INFO [train.py:1198] (0/2) Epoch 23, batch 1450, loss[loss=0.2496, ctc_loss=0.1291, cr_loss=0.3717, attn_decoder_loss=0.2547, over 29417.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1315, cr_loss=0.3742, attn_decoder_loss=0.2483, over 5802430.50 frames. ], batch size: 94, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:17:25,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=404040.0, ans=0.125 +2024-09-18 08:17:47,799 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.721e+01 9.213e+01 9.736e+01 2.438e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-18 08:18:13,670 INFO [train.py:1198] (0/2) Epoch 23, batch 1500, loss[loss=0.2465, ctc_loss=0.133, cr_loss=0.3858, attn_decoder_loss=0.2505, over 29626.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1318, cr_loss=0.3749, attn_decoder_loss=0.2486, over 5802638.98 frames. ], batch size: 86, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:18:17,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=404200.0, ans=0.0 +2024-09-18 08:18:52,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=404280.0, ans=0.125 +2024-09-18 08:19:29,876 INFO [train.py:1198] (0/2) Epoch 23, batch 1550, loss[loss=0.2626, ctc_loss=0.1461, cr_loss=0.417, attn_decoder_loss=0.2663, over 29491.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1325, cr_loss=0.376, attn_decoder_loss=0.2487, over 5778428.34 frames. ], batch size: 90, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:19:33,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=404400.0, ans=0.1 +2024-09-18 08:19:54,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=404440.0, ans=0.1 +2024-09-18 08:20:22,179 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.646e+01 9.169e+01 9.938e+01 3.341e+02, threshold=1.834e+02, percent-clipped=2.0 +2024-09-18 08:20:41,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=404560.0, ans=0.125 +2024-09-18 08:20:50,182 INFO [train.py:1198] (0/2) Epoch 23, batch 1600, loss[loss=0.2484, ctc_loss=0.1263, cr_loss=0.3638, attn_decoder_loss=0.2538, over 29677.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1322, cr_loss=0.3754, attn_decoder_loss=0.2484, over 5761868.24 frames. ], batch size: 85, lr: 4.85e-03, grad_scale: 16.0 +2024-09-18 08:21:04,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=404640.0, ans=0.0 +2024-09-18 08:21:28,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=404680.0, ans=0.0 +2024-09-18 08:21:34,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=404720.0, ans=0.0 +2024-09-18 08:21:57,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=404760.0, ans=0.2 +2024-09-18 08:22:02,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.18 vs. limit=22.5 +2024-09-18 08:22:06,223 INFO [train.py:1198] (0/2) Epoch 23, batch 1650, loss[loss=0.2581, ctc_loss=0.1343, cr_loss=0.3922, attn_decoder_loss=0.2631, over 29723.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1319, cr_loss=0.3751, attn_decoder_loss=0.2482, over 5756361.75 frames. ], batch size: 89, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:22:30,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=404840.0, ans=0.07 +2024-09-18 08:22:30,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=404840.0, ans=0.1 +2024-09-18 08:22:32,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=404840.0, ans=0.025 +2024-09-18 08:22:37,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=404880.0, ans=0.0 +2024-09-18 08:22:53,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.94 vs. limit=15.0 +2024-09-18 08:22:58,234 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.577e+01 9.100e+01 9.886e+01 2.579e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 08:22:58,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=404920.0, ans=0.125 +2024-09-18 08:23:07,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=404960.0, ans=0.125 +2024-09-18 08:23:22,335 INFO [train.py:1198] (0/2) Epoch 23, batch 1700, loss[loss=0.2004, ctc_loss=0.09551, cr_loss=0.306, attn_decoder_loss=0.2053, over 29584.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1315, cr_loss=0.3749, attn_decoder_loss=0.248, over 5778634.77 frames. ], batch size: 69, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:23:37,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=405040.0, ans=0.125 +2024-09-18 08:23:39,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=405040.0, ans=0.125 +2024-09-18 08:23:40,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=405040.0, ans=0.0 +2024-09-18 08:23:47,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=405040.0, ans=0.125 +2024-09-18 08:23:52,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=405080.0, ans=0.025 +2024-09-18 08:23:54,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=405080.0, ans=0.1 +2024-09-18 08:24:25,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=405160.0, ans=0.1 +2024-09-18 08:24:31,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=405160.0, ans=0.0 +2024-09-18 08:24:42,501 INFO [train.py:1198] (0/2) Epoch 23, batch 1750, loss[loss=0.2138, ctc_loss=0.1119, cr_loss=0.3477, attn_decoder_loss=0.2174, over 29329.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1312, cr_loss=0.3743, attn_decoder_loss=0.2478, over 5788260.60 frames. ], batch size: 67, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:25:18,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.76 vs. limit=10.0 +2024-09-18 08:25:33,638 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.000e+01 8.508e+01 9.190e+01 9.615e+01 2.377e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-18 08:25:57,522 INFO [train.py:1198] (0/2) Epoch 23, batch 1800, loss[loss=0.2455, ctc_loss=0.1352, cr_loss=0.3994, attn_decoder_loss=0.2489, over 29690.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1317, cr_loss=0.3748, attn_decoder_loss=0.2482, over 5790778.10 frames. ], batch size: 83, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:26:14,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=405440.0, ans=0.0 +2024-09-18 08:26:18,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=405440.0, ans=0.125 +2024-09-18 08:26:59,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=405560.0, ans=15.0 +2024-09-18 08:27:13,787 INFO [train.py:1198] (0/2) Epoch 23, batch 1850, loss[loss=0.259, ctc_loss=0.1395, cr_loss=0.3881, attn_decoder_loss=0.2636, over 29635.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1317, cr_loss=0.3748, attn_decoder_loss=0.2483, over 5795255.43 frames. ], batch size: 86, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:27:23,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.26 vs. limit=15.0 +2024-09-18 08:27:24,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=405600.0, ans=0.025 +2024-09-18 08:27:29,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=405640.0, ans=0.07 +2024-09-18 08:27:32,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=405640.0, ans=0.125 +2024-09-18 08:27:39,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=405640.0, ans=0.125 +2024-09-18 08:27:45,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.25 vs. limit=15.0 +2024-09-18 08:28:02,237 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:28:07,771 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.499e+01 9.020e+01 9.564e+01 1.401e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-18 08:28:11,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=405720.0, ans=0.125 +2024-09-18 08:28:14,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=405720.0, ans=0.025 +2024-09-18 08:28:23,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=405760.0, ans=0.0 +2024-09-18 08:28:31,649 INFO [train.py:1198] (0/2) Epoch 23, batch 1900, loss[loss=0.2555, ctc_loss=0.1372, cr_loss=0.3868, attn_decoder_loss=0.26, over 29707.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1319, cr_loss=0.3752, attn_decoder_loss=0.2489, over 5803028.32 frames. ], batch size: 89, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:28:40,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=405800.0, ans=0.125 +2024-09-18 08:29:17,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.90 vs. limit=10.0 +2024-09-18 08:29:20,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=405920.0, ans=0.125 +2024-09-18 08:29:22,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.55 vs. limit=6.0 +2024-09-18 08:29:33,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=405960.0, ans=0.0 +2024-09-18 08:29:38,840 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.16 vs. limit=22.5 +2024-09-18 08:29:50,195 INFO [train.py:1198] (0/2) Epoch 23, batch 1950, loss[loss=0.2382, ctc_loss=0.1384, cr_loss=0.389, attn_decoder_loss=0.2407, over 29471.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1328, cr_loss=0.3768, attn_decoder_loss=0.25, over 5817734.88 frames. ], batch size: 78, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:29:58,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=406000.0, ans=0.0 +2024-09-18 08:29:59,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=406000.0, ans=0.2 +2024-09-18 08:30:05,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=406040.0, ans=0.2 +2024-09-18 08:30:29,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=406080.0, ans=0.125 +2024-09-18 08:30:34,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=406120.0, ans=0.0 +2024-09-18 08:30:39,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=406120.0, ans=0.025 +2024-09-18 08:30:41,519 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.634e+01 9.173e+01 9.833e+01 1.215e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-18 08:31:05,558 INFO [train.py:1198] (0/2) Epoch 23, batch 2000, loss[loss=0.2204, ctc_loss=0.1203, cr_loss=0.3797, attn_decoder_loss=0.2231, over 29328.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1327, cr_loss=0.3765, attn_decoder_loss=0.25, over 5797292.88 frames. ], batch size: 67, lr: 4.84e-03, grad_scale: 16.0 +2024-09-18 08:31:13,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=406200.0, ans=0.125 +2024-09-18 08:31:19,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=406240.0, ans=0.0 +2024-09-18 08:31:22,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=406240.0, ans=0.125 +2024-09-18 08:31:25,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=406240.0, ans=0.2 +2024-09-18 08:31:48,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=406280.0, ans=0.0 +2024-09-18 08:31:59,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=406320.0, ans=0.125 +2024-09-18 08:32:20,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=406360.0, ans=0.125 +2024-09-18 08:32:20,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=406360.0, ans=0.125 +2024-09-18 08:32:23,716 INFO [train.py:1198] (0/2) Epoch 23, batch 2050, loss[loss=0.2091, ctc_loss=0.1098, cr_loss=0.3292, attn_decoder_loss=0.2128, over 29435.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1321, cr_loss=0.3749, attn_decoder_loss=0.2488, over 5788082.02 frames. ], batch size: 70, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:32:49,640 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=15.0 +2024-09-18 08:33:05,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=406480.0, ans=0.125 +2024-09-18 08:33:13,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=406520.0, ans=0.0 +2024-09-18 08:33:18,756 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.483e+01 9.027e+01 9.590e+01 1.679e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-18 08:33:22,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=406520.0, ans=0.0 +2024-09-18 08:33:25,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=406560.0, ans=0.125 +2024-09-18 08:33:37,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=406560.0, ans=0.125 +2024-09-18 08:33:37,775 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.38 vs. limit=15.0 +2024-09-18 08:33:41,476 INFO [train.py:1198] (0/2) Epoch 23, batch 2100, loss[loss=0.2487, ctc_loss=0.1306, cr_loss=0.3807, attn_decoder_loss=0.2534, over 29744.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1315, cr_loss=0.3742, attn_decoder_loss=0.2483, over 5798905.85 frames. ], batch size: 81, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:34:04,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=406640.0, ans=0.1 +2024-09-18 08:34:11,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=406680.0, ans=0.125 +2024-09-18 08:34:17,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=406680.0, ans=0.0 +2024-09-18 08:34:35,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=406720.0, ans=0.0 +2024-09-18 08:34:43,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=406760.0, ans=0.07 +2024-09-18 08:34:50,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.61 vs. limit=15.0 +2024-09-18 08:34:56,596 INFO [train.py:1198] (0/2) Epoch 23, batch 2150, loss[loss=0.2444, ctc_loss=0.1329, cr_loss=0.3763, attn_decoder_loss=0.2484, over 29453.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1307, cr_loss=0.3729, attn_decoder_loss=0.2476, over 5813727.82 frames. ], batch size: 78, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:34:56,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=406800.0, ans=0.1 +2024-09-18 08:35:04,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=406800.0, ans=0.0 +2024-09-18 08:35:05,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=406800.0, ans=0.5 +2024-09-18 08:35:12,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=406840.0, ans=0.07 +2024-09-18 08:35:28,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=406880.0, ans=0.125 +2024-09-18 08:35:44,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=406920.0, ans=0.0 +2024-09-18 08:35:51,679 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.476e+01 8.832e+01 9.481e+01 1.697e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-18 08:35:55,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.80 vs. limit=10.0 +2024-09-18 08:36:14,434 INFO [train.py:1198] (0/2) Epoch 23, batch 2200, loss[loss=0.2534, ctc_loss=0.1296, cr_loss=0.3764, attn_decoder_loss=0.2588, over 29628.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1311, cr_loss=0.3738, attn_decoder_loss=0.2477, over 5810070.72 frames. ], batch size: 86, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:37:06,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=407120.0, ans=0.125 +2024-09-18 08:37:09,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=407120.0, ans=0.0 +2024-09-18 08:37:11,575 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:37:32,957 INFO [train.py:1198] (0/2) Epoch 23, batch 2250, loss[loss=0.2511, ctc_loss=0.1276, cr_loss=0.3606, attn_decoder_loss=0.2569, over 29705.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1307, cr_loss=0.3731, attn_decoder_loss=0.2476, over 5810142.32 frames. ], batch size: 82, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:37:39,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=407200.0, ans=0.0 +2024-09-18 08:37:40,258 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.93 vs. limit=15.0 +2024-09-18 08:37:51,604 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:38:03,721 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.00 vs. limit=22.5 +2024-09-18 08:38:09,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.20 vs. limit=15.0 +2024-09-18 08:38:25,782 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.661e+01 8.566e+01 9.041e+01 9.811e+01 1.660e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-18 08:38:46,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.05 vs. limit=15.0 +2024-09-18 08:38:48,510 INFO [train.py:1198] (0/2) Epoch 23, batch 2300, loss[loss=0.2163, ctc_loss=0.1051, cr_loss=0.34, attn_decoder_loss=0.2211, over 29304.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1302, cr_loss=0.3716, attn_decoder_loss=0.2466, over 5797904.98 frames. ], batch size: 71, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:38:50,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=407400.0, ans=0.125 +2024-09-18 08:39:03,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=407440.0, ans=0.0 +2024-09-18 08:39:08,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=407440.0, ans=0.125 +2024-09-18 08:39:09,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=407440.0, ans=0.125 +2024-09-18 08:39:11,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=407440.0, ans=0.09899494936611666 +2024-09-18 08:39:11,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=407440.0, ans=0.2 +2024-09-18 08:39:18,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=407480.0, ans=0.0 +2024-09-18 08:39:25,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.94 vs. limit=22.5 +2024-09-18 08:39:29,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=407480.0, ans=0.125 +2024-09-18 08:39:37,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=407520.0, ans=0.025 +2024-09-18 08:39:39,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=407520.0, ans=0.0 +2024-09-18 08:39:45,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=407520.0, ans=0.2 +2024-09-18 08:40:03,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=407560.0, ans=0.1 +2024-09-18 08:40:06,190 INFO [train.py:1198] (0/2) Epoch 23, batch 2350, loss[loss=0.2538, ctc_loss=0.1381, cr_loss=0.3932, attn_decoder_loss=0.2579, over 29685.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1306, cr_loss=0.3727, attn_decoder_loss=0.2468, over 5803846.35 frames. ], batch size: 83, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:40:35,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.45 vs. limit=15.0 +2024-09-18 08:41:01,418 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.649e+01 9.243e+01 9.923e+01 8.680e+02, threshold=1.849e+02, percent-clipped=2.0 +2024-09-18 08:41:08,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.31 vs. limit=6.0 +2024-09-18 08:41:15,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=407760.0, ans=0.0 +2024-09-18 08:41:24,574 INFO [train.py:1198] (0/2) Epoch 23, batch 2400, loss[loss=0.2454, ctc_loss=0.1349, cr_loss=0.3834, attn_decoder_loss=0.2492, over 29548.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.131, cr_loss=0.3737, attn_decoder_loss=0.2475, over 5807232.08 frames. ], batch size: 76, lr: 4.83e-03, grad_scale: 16.0 +2024-09-18 08:41:30,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=407800.0, ans=0.125 +2024-09-18 08:41:39,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=407840.0, ans=0.1 +2024-09-18 08:41:45,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=407840.0, ans=0.0 +2024-09-18 08:41:45,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=407840.0, ans=0.2 +2024-09-18 08:42:05,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=407880.0, ans=0.0 +2024-09-18 08:42:08,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=407920.0, ans=0.2 +2024-09-18 08:42:28,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=407960.0, ans=0.025 +2024-09-18 08:42:40,652 INFO [train.py:1198] (0/2) Epoch 23, batch 2450, loss[loss=0.2441, ctc_loss=0.1295, cr_loss=0.3825, attn_decoder_loss=0.2483, over 29701.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1323, cr_loss=0.376, attn_decoder_loss=0.2487, over 5784250.71 frames. ], batch size: 82, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:42:41,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=408000.0, ans=0.1 +2024-09-18 08:42:51,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=408000.0, ans=0.0 +2024-09-18 08:42:55,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=408040.0, ans=0.125 +2024-09-18 08:43:03,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=408040.0, ans=0.0 +2024-09-18 08:43:11,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=408080.0, ans=0.1 +2024-09-18 08:43:12,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.83 vs. limit=10.0 +2024-09-18 08:43:37,145 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.992e+01 9.709e+01 1.062e+02 3.982e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-18 08:43:52,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=408160.0, ans=0.0 +2024-09-18 08:43:58,461 INFO [train.py:1198] (0/2) Epoch 23, batch 2500, loss[loss=0.2537, ctc_loss=0.1339, cr_loss=0.3849, attn_decoder_loss=0.2585, over 29632.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1319, cr_loss=0.3756, attn_decoder_loss=0.2485, over 5794697.06 frames. ], batch size: 86, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:45:06,988 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.43 vs. limit=15.0 +2024-09-18 08:45:12,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.49 vs. limit=15.0 +2024-09-18 08:45:16,708 INFO [train.py:1198] (0/2) Epoch 23, batch 2550, loss[loss=0.2104, ctc_loss=0.108, cr_loss=0.314, attn_decoder_loss=0.2148, over 29312.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1316, cr_loss=0.3753, attn_decoder_loss=0.2482, over 5798278.41 frames. ], batch size: 67, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:45:26,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=408400.0, ans=0.125 +2024-09-18 08:45:43,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.41 vs. limit=15.0 +2024-09-18 08:46:11,117 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.424e+01 8.872e+01 9.650e+01 4.846e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 08:46:19,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=408560.0, ans=0.2 +2024-09-18 08:46:22,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=408560.0, ans=0.05 +2024-09-18 08:46:32,480 INFO [train.py:1198] (0/2) Epoch 23, batch 2600, loss[loss=0.2318, ctc_loss=0.1254, cr_loss=0.3634, attn_decoder_loss=0.2355, over 29443.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1316, cr_loss=0.3748, attn_decoder_loss=0.2485, over 5794908.46 frames. ], batch size: 78, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:46:32,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=408600.0, ans=0.1 +2024-09-18 08:46:46,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=408640.0, ans=0.1 +2024-09-18 08:46:59,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=408640.0, ans=0.1 +2024-09-18 08:47:16,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=408680.0, ans=0.0 +2024-09-18 08:47:40,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=408760.0, ans=0.125 +2024-09-18 08:47:43,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=408760.0, ans=0.05 +2024-09-18 08:47:45,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=408760.0, ans=0.125 +2024-09-18 08:47:50,070 INFO [train.py:1198] (0/2) Epoch 23, batch 2650, loss[loss=0.2595, ctc_loss=0.1456, cr_loss=0.415, attn_decoder_loss=0.2629, over 29249.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1313, cr_loss=0.3749, attn_decoder_loss=0.2486, over 5801011.13 frames. ], batch size: 100, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:47:58,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=408800.0, ans=0.025 +2024-09-18 08:48:02,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=408800.0, ans=0.125 +2024-09-18 08:48:19,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=408880.0, ans=0.09899494936611666 +2024-09-18 08:48:31,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=408880.0, ans=0.1 +2024-09-18 08:48:32,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-18 08:48:37,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=408920.0, ans=0.1 +2024-09-18 08:48:46,441 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.322e+01 8.846e+01 9.392e+01 1.397e+02, threshold=1.769e+02, percent-clipped=0.0 +2024-09-18 08:48:48,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=408920.0, ans=0.2 +2024-09-18 08:48:59,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=408960.0, ans=0.125 +2024-09-18 08:49:07,646 INFO [train.py:1198] (0/2) Epoch 23, batch 2700, loss[loss=0.2411, ctc_loss=0.1223, cr_loss=0.3727, attn_decoder_loss=0.2461, over 29511.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1317, cr_loss=0.3755, attn_decoder_loss=0.2489, over 5796099.06 frames. ], batch size: 87, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:49:07,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=409000.0, ans=0.125 +2024-09-18 08:49:10,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=409000.0, ans=0.125 +2024-09-18 08:49:16,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=409000.0, ans=0.035 +2024-09-18 08:49:25,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-18 08:49:53,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=409120.0, ans=0.0 +2024-09-18 08:50:16,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.58 vs. limit=15.0 +2024-09-18 08:50:23,173 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.18 vs. limit=22.5 +2024-09-18 08:50:23,524 INFO [train.py:1198] (0/2) Epoch 23, batch 2750, loss[loss=0.2388, ctc_loss=0.1321, cr_loss=0.3847, attn_decoder_loss=0.2421, over 29506.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1305, cr_loss=0.3731, attn_decoder_loss=0.2476, over 5793806.72 frames. ], batch size: 75, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:50:31,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=409200.0, ans=0.2 +2024-09-18 08:50:37,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=409240.0, ans=0.0 +2024-09-18 08:51:00,808 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=12.0 +2024-09-18 08:51:19,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=409320.0, ans=0.2 +2024-09-18 08:51:20,299 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.385e+01 8.421e+01 8.872e+01 9.349e+01 6.581e+02, threshold=1.774e+02, percent-clipped=3.0 +2024-09-18 08:51:37,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=409360.0, ans=0.0 +2024-09-18 08:51:41,683 INFO [train.py:1198] (0/2) Epoch 23, batch 2800, loss[loss=0.2619, ctc_loss=0.163, cr_loss=0.3741, attn_decoder_loss=0.2645, over 20507.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1314, cr_loss=0.3748, attn_decoder_loss=0.2483, over 5775033.13 frames. ], batch size: 210, lr: 4.82e-03, grad_scale: 16.0 +2024-09-18 08:52:00,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=409440.0, ans=0.0 +2024-09-18 08:52:03,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=409440.0, ans=0.125 +2024-09-18 08:52:06,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=409440.0, ans=0.1 +2024-09-18 08:52:10,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=409480.0, ans=0.0 +2024-09-18 08:52:13,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=409480.0, ans=0.0 +2024-09-18 08:52:59,484 INFO [train.py:1198] (0/2) Epoch 23, batch 2850, loss[loss=0.2241, ctc_loss=0.1112, cr_loss=0.3334, attn_decoder_loss=0.2293, over 29543.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.132, cr_loss=0.3756, attn_decoder_loss=0.2487, over 5759859.22 frames. ], batch size: 77, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:53:02,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=409600.0, ans=0.1 +2024-09-18 08:53:19,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=409640.0, ans=0.125 +2024-09-18 08:53:21,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=409640.0, ans=0.1 +2024-09-18 08:53:30,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=409680.0, ans=0.125 +2024-09-18 08:53:43,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.85 vs. limit=15.0 +2024-09-18 08:53:51,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=409720.0, ans=0.125 +2024-09-18 08:53:55,294 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.592e+01 9.017e+01 9.666e+01 1.557e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 08:54:15,015 INFO [train.py:1198] (0/2) Epoch 23, batch 2900, loss[loss=0.2418, ctc_loss=0.129, cr_loss=0.3758, attn_decoder_loss=0.2459, over 29410.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1322, cr_loss=0.3762, attn_decoder_loss=0.2495, over 5786269.68 frames. ], batch size: 79, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:54:58,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.30 vs. limit=12.0 +2024-09-18 08:55:09,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=409920.0, ans=0.125 +2024-09-18 08:55:15,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=409920.0, ans=0.0 +2024-09-18 08:55:33,242 INFO [train.py:1198] (0/2) Epoch 23, batch 2950, loss[loss=0.2413, ctc_loss=0.1334, cr_loss=0.3929, attn_decoder_loss=0.2445, over 29532.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1314, cr_loss=0.3748, attn_decoder_loss=0.2481, over 5781818.95 frames. ], batch size: 75, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:55:56,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=410040.0, ans=0.0 +2024-09-18 08:56:25,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=410120.0, ans=0.04949747468305833 +2024-09-18 08:56:27,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=410120.0, ans=0.2 +2024-09-18 08:56:31,679 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.552e+01 9.299e+01 9.927e+01 2.795e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-18 08:56:38,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff2.min_abs, batch_count=410160.0, ans=0.1 +2024-09-18 08:56:51,633 INFO [train.py:1198] (0/2) Epoch 23, batch 3000, loss[loss=0.2494, ctc_loss=0.1384, cr_loss=0.387, attn_decoder_loss=0.2532, over 29751.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1315, cr_loss=0.3753, attn_decoder_loss=0.2483, over 5783184.32 frames. ], batch size: 81, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:56:51,634 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 08:57:10,057 INFO [train.py:1230] (0/2) Epoch 23, validation: loss=0.2116, ctc_loss=0.03932, cr_loss=5.516e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-18 08:57:10,058 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 08:57:17,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=410200.0, ans=0.0 +2024-09-18 08:57:53,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.46 vs. limit=22.5 +2024-09-18 08:58:03,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=410320.0, ans=0.95 +2024-09-18 08:58:25,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=410400.0, ans=0.04949747468305833 +2024-09-18 08:58:26,566 INFO [train.py:1198] (0/2) Epoch 23, batch 3050, loss[loss=0.2436, ctc_loss=0.1288, cr_loss=0.3888, attn_decoder_loss=0.2477, over 29541.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1318, cr_loss=0.3759, attn_decoder_loss=0.2488, over 5777593.71 frames. ], batch size: 76, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:58:34,157 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.07 vs. limit=15.0 +2024-09-18 08:58:40,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.94 vs. limit=15.0 +2024-09-18 08:58:53,668 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:59:20,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=410520.0, ans=0.0 +2024-09-18 08:59:24,740 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.808e+01 9.332e+01 1.013e+02 4.220e+02, threshold=1.866e+02, percent-clipped=2.0 +2024-09-18 08:59:25,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=410520.0, ans=0.1 +2024-09-18 08:59:32,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=410560.0, ans=0.125 +2024-09-18 08:59:35,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=410560.0, ans=0.125 +2024-09-18 08:59:40,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.42 vs. limit=22.5 +2024-09-18 08:59:44,268 INFO [train.py:1198] (0/2) Epoch 23, batch 3100, loss[loss=0.2677, ctc_loss=0.1514, cr_loss=0.4049, attn_decoder_loss=0.2716, over 29253.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1319, cr_loss=0.3763, attn_decoder_loss=0.2485, over 5777820.02 frames. ], batch size: 100, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 08:59:54,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=410600.0, ans=0.125 +2024-09-18 09:00:15,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=410680.0, ans=0.1 +2024-09-18 09:00:42,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=410720.0, ans=0.125 +2024-09-18 09:01:02,026 INFO [train.py:1198] (0/2) Epoch 23, batch 3150, loss[loss=0.2613, ctc_loss=0.1395, cr_loss=0.3946, attn_decoder_loss=0.266, over 28822.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1316, cr_loss=0.3754, attn_decoder_loss=0.2485, over 5784242.33 frames. ], batch size: 104, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:01:26,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=410840.0, ans=0.125 +2024-09-18 09:01:35,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=410880.0, ans=0.0 +2024-09-18 09:01:57,780 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.637e+01 9.168e+01 9.786e+01 2.272e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 09:02:17,396 INFO [train.py:1198] (0/2) Epoch 23, batch 3200, loss[loss=0.2372, ctc_loss=0.122, cr_loss=0.3753, attn_decoder_loss=0.2416, over 29430.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1309, cr_loss=0.3738, attn_decoder_loss=0.248, over 5794132.34 frames. ], batch size: 79, lr: 4.81e-03, grad_scale: 16.0 +2024-09-18 09:02:20,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=411000.0, ans=0.2 +2024-09-18 09:02:28,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=411000.0, ans=0.1 +2024-09-18 09:02:28,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=411000.0, ans=0.0 +2024-09-18 09:02:36,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=411040.0, ans=0.0 +2024-09-18 09:02:50,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=411080.0, ans=0.0 +2024-09-18 09:02:59,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=411080.0, ans=0.1 +2024-09-18 09:03:15,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.60 vs. limit=15.0 +2024-09-18 09:03:35,720 INFO [train.py:1198] (0/2) Epoch 23, batch 3250, loss[loss=0.2553, ctc_loss=0.133, cr_loss=0.378, attn_decoder_loss=0.2605, over 29712.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1314, cr_loss=0.375, attn_decoder_loss=0.2484, over 5800240.47 frames. ], batch size: 84, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:03:38,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=411200.0, ans=0.1 +2024-09-18 09:03:45,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=411200.0, ans=0.2 +2024-09-18 09:03:45,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=411200.0, ans=0.0 +2024-09-18 09:03:46,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=411200.0, ans=0.09899494936611666 +2024-09-18 09:04:34,986 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.655e+01 9.272e+01 9.823e+01 1.322e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-18 09:04:49,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-18 09:04:53,301 INFO [train.py:1198] (0/2) Epoch 23, batch 3300, loss[loss=0.2501, ctc_loss=0.1236, cr_loss=0.3621, attn_decoder_loss=0.2561, over 28405.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1306, cr_loss=0.3731, attn_decoder_loss=0.2473, over 5796544.97 frames. ], batch size: 111, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:05:01,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=411400.0, ans=0.125 +2024-09-18 09:06:04,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=411560.0, ans=0.0 +2024-09-18 09:06:08,663 INFO [train.py:1198] (0/2) Epoch 23, batch 3350, loss[loss=0.2553, ctc_loss=0.147, cr_loss=0.4158, attn_decoder_loss=0.2581, over 28863.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1314, cr_loss=0.3744, attn_decoder_loss=0.2481, over 5773198.81 frames. ], batch size: 104, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:06:21,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=411600.0, ans=0.0 +2024-09-18 09:06:24,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=411640.0, ans=0.0 +2024-09-18 09:06:31,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=411640.0, ans=0.07 +2024-09-18 09:06:44,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=411680.0, ans=0.125 +2024-09-18 09:06:55,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=411720.0, ans=0.125 +2024-09-18 09:07:08,457 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.663e+01 9.206e+01 9.789e+01 2.075e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 09:07:10,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=411760.0, ans=0.0 +2024-09-18 09:07:19,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=411760.0, ans=0.125 +2024-09-18 09:07:23,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=411760.0, ans=0.125 +2024-09-18 09:07:26,467 INFO [train.py:1198] (0/2) Epoch 23, batch 3400, loss[loss=0.2197, ctc_loss=0.117, cr_loss=0.3496, attn_decoder_loss=0.2234, over 29364.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1316, cr_loss=0.3742, attn_decoder_loss=0.248, over 5766013.70 frames. ], batch size: 67, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:07:30,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.10 vs. limit=15.0 +2024-09-18 09:07:32,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=411800.0, ans=0.125 +2024-09-18 09:08:06,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.15 vs. limit=15.0 +2024-09-18 09:08:44,752 INFO [train.py:1198] (0/2) Epoch 23, batch 3450, loss[loss=0.2521, ctc_loss=0.1318, cr_loss=0.3774, attn_decoder_loss=0.2571, over 28191.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1313, cr_loss=0.3737, attn_decoder_loss=0.2482, over 5774069.30 frames. ], batch size: 111, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:08:51,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=412000.0, ans=0.125 +2024-09-18 09:09:06,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=412040.0, ans=0.125 +2024-09-18 09:09:09,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=412040.0, ans=0.5 +2024-09-18 09:09:22,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=412080.0, ans=0.025 +2024-09-18 09:09:22,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=412080.0, ans=0.0 +2024-09-18 09:09:29,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=412120.0, ans=0.125 +2024-09-18 09:09:35,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=412120.0, ans=0.125 +2024-09-18 09:09:42,461 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.670e+01 9.056e+01 9.530e+01 1.937e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 09:09:47,514 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:10:02,667 INFO [train.py:1198] (0/2) Epoch 23, batch 3500, loss[loss=0.2213, ctc_loss=0.1147, cr_loss=0.3444, attn_decoder_loss=0.2255, over 29341.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1312, cr_loss=0.374, attn_decoder_loss=0.2477, over 5776840.32 frames. ], batch size: 71, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:10:41,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.08 vs. limit=12.0 +2024-09-18 09:10:46,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=412320.0, ans=0.5 +2024-09-18 09:10:47,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=412320.0, ans=0.0 +2024-09-18 09:10:53,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=412320.0, ans=0.0 +2024-09-18 09:11:17,256 INFO [train.py:1198] (0/2) Epoch 23, batch 3550, loss[loss=0.251, ctc_loss=0.1347, cr_loss=0.385, attn_decoder_loss=0.2554, over 29705.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1309, cr_loss=0.3738, attn_decoder_loss=0.2476, over 5783488.07 frames. ], batch size: 89, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:11:54,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=412480.0, ans=0.125 +2024-09-18 09:12:06,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=412520.0, ans=0.1 +2024-09-18 09:12:09,840 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:12:14,057 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.214e+01 8.498e+01 8.967e+01 9.754e+01 1.546e+02, threshold=1.793e+02, percent-clipped=1.0 +2024-09-18 09:12:31,829 INFO [train.py:1198] (0/2) Epoch 23, batch 3600, loss[loss=0.2346, ctc_loss=0.1217, cr_loss=0.3633, attn_decoder_loss=0.2391, over 29497.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1307, cr_loss=0.3732, attn_decoder_loss=0.2474, over 5791557.08 frames. ], batch size: 77, lr: 4.80e-03, grad_scale: 16.0 +2024-09-18 09:13:17,587 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:13:26,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=15.0 +2024-09-18 09:13:38,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=412760.0, ans=0.0 +2024-09-18 09:13:48,738 INFO [train.py:1198] (0/2) Epoch 23, batch 3650, loss[loss=0.2463, ctc_loss=0.1334, cr_loss=0.3894, attn_decoder_loss=0.2502, over 29518.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1305, cr_loss=0.3727, attn_decoder_loss=0.2471, over 5793234.61 frames. ], batch size: 90, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:13:53,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=412800.0, ans=0.0 +2024-09-18 09:14:06,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=412840.0, ans=0.0 +2024-09-18 09:14:14,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=412840.0, ans=10.0 +2024-09-18 09:14:15,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=412840.0, ans=0.0 +2024-09-18 09:14:17,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=412880.0, ans=0.2 +2024-09-18 09:14:38,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=412920.0, ans=0.0 +2024-09-18 09:14:39,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=412920.0, ans=0.95 +2024-09-18 09:14:46,461 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.590e+01 8.375e+01 8.989e+01 9.606e+01 2.045e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-18 09:14:51,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=412960.0, ans=0.1 +2024-09-18 09:15:02,931 INFO [train.py:1198] (0/2) Epoch 23, batch 3700, loss[loss=0.2542, ctc_loss=0.1409, cr_loss=0.4041, attn_decoder_loss=0.2578, over 29704.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1302, cr_loss=0.3724, attn_decoder_loss=0.2471, over 5803249.18 frames. ], batch size: 84, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:15:22,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.48 vs. limit=5.0 +2024-09-18 09:15:41,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=413080.0, ans=0.125 +2024-09-18 09:15:42,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=413080.0, ans=0.125 +2024-09-18 09:15:57,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.04 vs. limit=6.0 +2024-09-18 09:16:01,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=413160.0, ans=0.0 +2024-09-18 09:16:16,983 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:16:19,392 INFO [train.py:1198] (0/2) Epoch 23, batch 3750, loss[loss=0.2178, ctc_loss=0.1182, cr_loss=0.3385, attn_decoder_loss=0.2214, over 29335.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1299, cr_loss=0.3715, attn_decoder_loss=0.2466, over 5807302.11 frames. ], batch size: 67, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:16:31,587 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:16:45,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.56 vs. limit=22.5 +2024-09-18 09:16:46,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.84 vs. limit=22.5 +2024-09-18 09:16:59,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=413280.0, ans=0.125 +2024-09-18 09:17:13,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=413320.0, ans=0.125 +2024-09-18 09:17:17,040 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.618e+01 9.156e+01 9.859e+01 5.134e+02, threshold=1.831e+02, percent-clipped=3.0 +2024-09-18 09:17:19,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.19 vs. limit=15.0 +2024-09-18 09:17:33,575 INFO [train.py:1198] (0/2) Epoch 23, batch 3800, loss[loss=0.2488, ctc_loss=0.1259, cr_loss=0.357, attn_decoder_loss=0.2545, over 29624.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1297, cr_loss=0.3711, attn_decoder_loss=0.2463, over 5797274.70 frames. ], batch size: 86, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:17:41,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=413400.0, ans=0.2 +2024-09-18 09:17:43,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.30 vs. limit=15.0 +2024-09-18 09:17:50,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.73 vs. limit=22.5 +2024-09-18 09:17:54,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-18 09:18:15,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=413480.0, ans=0.125 +2024-09-18 09:18:15,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=413480.0, ans=0.125 +2024-09-18 09:18:23,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=413520.0, ans=0.125 +2024-09-18 09:18:35,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=413560.0, ans=0.1 +2024-09-18 09:18:36,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=413560.0, ans=0.025 +2024-09-18 09:18:39,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=413560.0, ans=0.0 +2024-09-18 09:18:41,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=413560.0, ans=0.125 +2024-09-18 09:18:44,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=413560.0, ans=0.2 +2024-09-18 09:18:45,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=413560.0, ans=0.2 +2024-09-18 09:18:48,727 INFO [train.py:1198] (0/2) Epoch 23, batch 3850, loss[loss=0.2579, ctc_loss=0.1366, cr_loss=0.4009, attn_decoder_loss=0.2624, over 29265.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1298, cr_loss=0.3715, attn_decoder_loss=0.2465, over 5811821.05 frames. ], batch size: 100, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:18:56,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=413600.0, ans=0.125 +2024-09-18 09:19:04,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-18 09:19:48,638 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.512e+01 9.090e+01 9.629e+01 1.233e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 09:19:51,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=413760.0, ans=0.0 +2024-09-18 09:19:53,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=413760.0, ans=0.0 +2024-09-18 09:20:05,023 INFO [train.py:1198] (0/2) Epoch 23, batch 3900, loss[loss=0.258, ctc_loss=0.1349, cr_loss=0.3876, attn_decoder_loss=0.263, over 29610.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1303, cr_loss=0.3723, attn_decoder_loss=0.2471, over 5815852.05 frames. ], batch size: 86, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:20:18,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=413840.0, ans=0.0 +2024-09-18 09:20:18,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=413840.0, ans=0.125 +2024-09-18 09:20:30,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=413840.0, ans=0.1 +2024-09-18 09:20:32,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.95 vs. limit=15.0 +2024-09-18 09:20:34,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=413880.0, ans=0.125 +2024-09-18 09:20:37,745 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:20:47,140 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.60 vs. limit=10.0 +2024-09-18 09:20:53,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.04 vs. limit=22.5 +2024-09-18 09:20:55,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=413920.0, ans=0.125 +2024-09-18 09:21:19,073 INFO [train.py:1198] (0/2) Epoch 23, batch 3950, loss[loss=0.2523, ctc_loss=0.1345, cr_loss=0.3897, attn_decoder_loss=0.2567, over 29481.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.13, cr_loss=0.3731, attn_decoder_loss=0.247, over 5835374.69 frames. ], batch size: 97, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:21:44,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=414040.0, ans=0.025 +2024-09-18 09:21:57,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=414080.0, ans=0.1 +2024-09-18 09:22:04,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.25 vs. limit=15.0 +2024-09-18 09:22:18,242 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.449e+01 8.566e+01 9.101e+01 9.931e+01 2.734e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 09:22:24,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=414160.0, ans=0.125 +2024-09-18 09:22:30,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.33 vs. limit=22.5 +2024-09-18 09:22:34,496 INFO [train.py:1198] (0/2) Epoch 23, batch 4000, loss[loss=0.2329, ctc_loss=0.1245, cr_loss=0.3646, attn_decoder_loss=0.2368, over 29522.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1304, cr_loss=0.3729, attn_decoder_loss=0.2471, over 5814414.70 frames. ], batch size: 74, lr: 4.79e-03, grad_scale: 16.0 +2024-09-18 09:22:36,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=414200.0, ans=0.125 +2024-09-18 09:23:04,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=414280.0, ans=0.1 +2024-09-18 09:23:26,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.82 vs. limit=15.0 +2024-09-18 09:23:43,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.63 vs. limit=10.0 +2024-09-18 09:23:48,958 INFO [train.py:1198] (0/2) Epoch 23, batch 4050, loss[loss=0.2726, ctc_loss=0.1757, cr_loss=0.4093, attn_decoder_loss=0.2743, over 20008.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1307, cr_loss=0.3725, attn_decoder_loss=0.247, over 5797963.48 frames. ], batch size: 210, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:24:03,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=414440.0, ans=0.125 +2024-09-18 09:24:08,091 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:24:42,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=414520.0, ans=0.125 +2024-09-18 09:24:49,138 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.682e+01 9.236e+01 9.757e+01 1.586e+02, threshold=1.847e+02, percent-clipped=0.0 +2024-09-18 09:25:03,928 INFO [train.py:1198] (0/2) Epoch 23, batch 4100, loss[loss=0.2592, ctc_loss=0.147, cr_loss=0.3935, attn_decoder_loss=0.2629, over 29495.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1309, cr_loss=0.3732, attn_decoder_loss=0.2473, over 5793134.35 frames. ], batch size: 90, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:25:26,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=414640.0, ans=0.125 +2024-09-18 09:25:29,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=414640.0, ans=0.125 +2024-09-18 09:25:58,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.07 vs. limit=10.0 +2024-09-18 09:26:02,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=414760.0, ans=0.025 +2024-09-18 09:26:18,785 INFO [train.py:1198] (0/2) Epoch 23, batch 4150, loss[loss=0.2343, ctc_loss=0.1283, cr_loss=0.3739, attn_decoder_loss=0.2377, over 29491.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1308, cr_loss=0.3734, attn_decoder_loss=0.2471, over 5798404.60 frames. ], batch size: 77, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:26:48,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=414880.0, ans=0.125 +2024-09-18 09:27:16,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=414960.0, ans=0.1 +2024-09-18 09:27:17,891 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.495e+01 8.961e+01 9.619e+01 1.585e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 09:27:19,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=414960.0, ans=0.1 +2024-09-18 09:27:25,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.91 vs. limit=15.0 +2024-09-18 09:27:32,591 INFO [train.py:1198] (0/2) Epoch 23, batch 4200, loss[loss=0.2682, ctc_loss=0.1535, cr_loss=0.411, attn_decoder_loss=0.2718, over 29486.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1311, cr_loss=0.3737, attn_decoder_loss=0.2473, over 5800598.11 frames. ], batch size: 90, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:27:45,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=415000.0, ans=0.125 +2024-09-18 09:28:18,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=415120.0, ans=0.125 +2024-09-18 09:28:18,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=415120.0, ans=0.125 +2024-09-18 09:28:20,786 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.37 vs. limit=6.0 +2024-09-18 09:28:43,089 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.74 vs. limit=15.0 +2024-09-18 09:28:48,193 INFO [train.py:1198] (0/2) Epoch 23, batch 4250, loss[loss=0.2262, ctc_loss=0.117, cr_loss=0.3413, attn_decoder_loss=0.2308, over 29522.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1308, cr_loss=0.3733, attn_decoder_loss=0.2476, over 5805553.05 frames. ], batch size: 74, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:29:04,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=415240.0, ans=0.07 +2024-09-18 09:29:07,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=415240.0, ans=0.2 +2024-09-18 09:29:19,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.69 vs. limit=22.5 +2024-09-18 09:29:21,090 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.09 vs. limit=15.0 +2024-09-18 09:29:24,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=415280.0, ans=0.1 +2024-09-18 09:29:28,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=415280.0, ans=0.2 +2024-09-18 09:29:38,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=415320.0, ans=0.0 +2024-09-18 09:29:47,861 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 8.728e+01 9.274e+01 9.904e+01 2.860e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-18 09:30:02,719 INFO [train.py:1198] (0/2) Epoch 23, batch 4300, loss[loss=0.2576, ctc_loss=0.1452, cr_loss=0.3932, attn_decoder_loss=0.2613, over 29531.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1307, cr_loss=0.3734, attn_decoder_loss=0.2478, over 5794632.24 frames. ], batch size: 87, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:30:13,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=415400.0, ans=0.125 +2024-09-18 09:30:19,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=415440.0, ans=0.125 +2024-09-18 09:30:26,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=415440.0, ans=0.125 +2024-09-18 09:30:28,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=415440.0, ans=0.1 +2024-09-18 09:30:44,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=415480.0, ans=0.125 +2024-09-18 09:31:16,768 INFO [train.py:1198] (0/2) Epoch 23, batch 4350, loss[loss=0.2623, ctc_loss=0.146, cr_loss=0.4098, attn_decoder_loss=0.2661, over 29478.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1334, cr_loss=0.3795, attn_decoder_loss=0.2512, over 5797691.14 frames. ], batch size: 97, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:31:33,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=415640.0, ans=0.125 +2024-09-18 09:31:36,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=415640.0, ans=0.125 +2024-09-18 09:31:54,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.51 vs. limit=12.0 +2024-09-18 09:32:16,354 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.901e+01 9.212e+01 9.767e+01 1.363e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-18 09:32:20,297 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.95 vs. limit=6.0 +2024-09-18 09:32:30,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.63 vs. limit=10.0 +2024-09-18 09:32:31,048 INFO [train.py:1198] (0/2) Epoch 23, batch 4400, loss[loss=0.2544, ctc_loss=0.1347, cr_loss=0.3792, attn_decoder_loss=0.2593, over 27084.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1349, cr_loss=0.382, attn_decoder_loss=0.2532, over 5767420.41 frames. ], batch size: 124, lr: 4.78e-03, grad_scale: 16.0 +2024-09-18 09:32:46,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.78 vs. limit=10.0 +2024-09-18 09:33:15,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=415920.0, ans=0.025 +2024-09-18 09:33:34,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=415960.0, ans=0.125 +2024-09-18 09:33:44,565 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-104000.pt +2024-09-18 09:33:53,116 INFO [train.py:1198] (0/2) Epoch 23, batch 4450, loss[loss=0.2671, ctc_loss=0.166, cr_loss=0.4067, attn_decoder_loss=0.2694, over 20485.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1388, cr_loss=0.3865, attn_decoder_loss=0.2555, over 5576389.12 frames. ], batch size: 210, lr: 4.78e-03, grad_scale: 16.0 +2024-09-18 09:34:48,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=416120.0, ans=10.0 +2024-09-18 09:34:52,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=416160.0, ans=0.0 +2024-09-18 09:34:54,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=416160.0, ans=0.0 +2024-09-18 09:34:55,164 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.365e+01 9.450e+01 1.070e+02 1.179e+02 4.631e+02, threshold=2.141e+02, percent-clipped=3.0 +2024-09-18 09:35:06,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=416160.0, ans=0.025 +2024-09-18 09:35:09,063 INFO [train.py:1198] (0/2) Epoch 23, batch 4500, loss[loss=0.2635, ctc_loss=0.1602, cr_loss=0.3813, attn_decoder_loss=0.2665, over 20071.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.143, cr_loss=0.3891, attn_decoder_loss=0.2577, over 5237689.90 frames. ], batch size: 210, lr: 4.78e-03, grad_scale: 8.0 +2024-09-18 09:35:11,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.61 vs. limit=22.5 +2024-09-18 09:35:11,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.22 vs. limit=15.0 +2024-09-18 09:35:22,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=416240.0, ans=0.09899494936611666 +2024-09-18 09:35:31,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=416240.0, ans=0.1 +2024-09-18 09:35:46,056 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-23.pt +2024-09-18 09:36:38,043 INFO [train.py:1198] (0/2) Epoch 24, batch 0, loss[loss=0.2248, ctc_loss=0.113, cr_loss=0.3443, attn_decoder_loss=0.2295, over 29608.00 frames. ], tot_loss[loss=0.2248, ctc_loss=0.113, cr_loss=0.3443, attn_decoder_loss=0.2295, over 29608.00 frames. ], batch size: 73, lr: 4.68e-03, grad_scale: 16.0 +2024-09-18 09:36:38,044 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 09:36:58,732 INFO [train.py:1230] (0/2) Epoch 24, validation: loss=0.2127, ctc_loss=0.03777, cr_loss=4.976e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-18 09:36:58,732 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 09:37:11,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=416300.0, ans=0.2 +2024-09-18 09:37:14,382 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:37:32,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=416380.0, ans=0.0 +2024-09-18 09:38:14,645 INFO [train.py:1198] (0/2) Epoch 24, batch 50, loss[loss=0.2245, ctc_loss=0.1171, cr_loss=0.3634, attn_decoder_loss=0.2284, over 29414.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1326, cr_loss=0.378, attn_decoder_loss=0.2492, over 1267441.45 frames. ], batch size: 70, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:38:40,703 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.838e+01 9.694e+01 1.103e+02 3.363e+02, threshold=1.939e+02, percent-clipped=1.0 +2024-09-18 09:39:30,895 INFO [train.py:1198] (0/2) Epoch 24, batch 100, loss[loss=0.2225, ctc_loss=0.1116, cr_loss=0.3367, attn_decoder_loss=0.2273, over 29528.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1333, cr_loss=0.3791, attn_decoder_loss=0.2505, over 2250592.16 frames. ], batch size: 76, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:39:55,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=416740.0, ans=0.125 +2024-09-18 09:39:57,415 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.26 vs. limit=15.0 +2024-09-18 09:40:09,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=416780.0, ans=0.2 +2024-09-18 09:40:11,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=416780.0, ans=0.1 +2024-09-18 09:40:15,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=416780.0, ans=0.05 +2024-09-18 09:40:21,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=416820.0, ans=0.125 +2024-09-18 09:40:23,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=416820.0, ans=0.125 +2024-09-18 09:40:50,682 INFO [train.py:1198] (0/2) Epoch 24, batch 150, loss[loss=0.222, ctc_loss=0.1179, cr_loss=0.3604, attn_decoder_loss=0.2256, over 29435.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1306, cr_loss=0.3747, attn_decoder_loss=0.2476, over 3046501.94 frames. ], batch size: 70, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:41:16,582 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.498e+01 9.006e+01 9.810e+01 1.466e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 09:41:31,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=416980.0, ans=0.0 +2024-09-18 09:41:50,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=417060.0, ans=0.125 +2024-09-18 09:42:06,295 INFO [train.py:1198] (0/2) Epoch 24, batch 200, loss[loss=0.2539, ctc_loss=0.1453, cr_loss=0.3997, attn_decoder_loss=0.257, over 27222.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1301, cr_loss=0.3739, attn_decoder_loss=0.2471, over 3659772.09 frames. ], batch size: 124, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:42:06,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=417100.0, ans=0.0 +2024-09-18 09:42:38,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=417180.0, ans=0.0 +2024-09-18 09:42:47,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=417180.0, ans=0.05 +2024-09-18 09:42:52,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.33 vs. limit=15.0 +2024-09-18 09:42:56,697 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:43:00,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-18 09:43:13,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=417260.0, ans=0.0 +2024-09-18 09:43:22,141 INFO [train.py:1198] (0/2) Epoch 24, batch 250, loss[loss=0.263, ctc_loss=0.1455, cr_loss=0.4044, attn_decoder_loss=0.267, over 29326.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1301, cr_loss=0.3738, attn_decoder_loss=0.2471, over 4142618.33 frames. ], batch size: 100, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:43:40,119 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.20 vs. limit=15.0 +2024-09-18 09:43:47,873 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.833e+01 9.396e+01 1.002e+02 2.195e+02, threshold=1.879e+02, percent-clipped=2.0 +2024-09-18 09:44:01,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.86 vs. limit=15.0 +2024-09-18 09:44:08,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=417420.0, ans=0.125 +2024-09-18 09:44:23,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=417460.0, ans=0.025 +2024-09-18 09:44:42,567 INFO [train.py:1198] (0/2) Epoch 24, batch 300, loss[loss=0.2694, ctc_loss=0.1517, cr_loss=0.4356, attn_decoder_loss=0.2728, over 29501.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1299, cr_loss=0.3737, attn_decoder_loss=0.2472, over 4511016.49 frames. ], batch size: 92, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:44:42,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=417500.0, ans=0.035 +2024-09-18 09:44:43,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.82 vs. limit=15.0 +2024-09-18 09:44:52,119 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:45:17,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=417580.0, ans=0.125 +2024-09-18 09:45:34,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=417620.0, ans=0.2 +2024-09-18 09:45:36,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.46 vs. limit=15.0 +2024-09-18 09:45:58,770 INFO [train.py:1198] (0/2) Epoch 24, batch 350, loss[loss=0.2165, ctc_loss=0.103, cr_loss=0.3045, attn_decoder_loss=0.2223, over 29326.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1301, cr_loss=0.3742, attn_decoder_loss=0.2476, over 4796809.02 frames. ], batch size: 71, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:46:04,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.29 vs. limit=10.0 +2024-09-18 09:46:25,838 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.494e+01 8.951e+01 9.745e+01 1.329e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 09:46:37,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.12 vs. limit=15.0 +2024-09-18 09:46:42,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=417820.0, ans=0.125 +2024-09-18 09:46:53,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.10 vs. limit=15.0 +2024-09-18 09:46:58,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=417860.0, ans=0.1 +2024-09-18 09:47:14,296 INFO [train.py:1198] (0/2) Epoch 24, batch 400, loss[loss=0.2546, ctc_loss=0.14, cr_loss=0.3908, attn_decoder_loss=0.2587, over 29716.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1294, cr_loss=0.3725, attn_decoder_loss=0.247, over 5026817.28 frames. ], batch size: 82, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:47:25,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=417900.0, ans=10.0 +2024-09-18 09:47:58,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=418020.0, ans=0.0 +2024-09-18 09:48:00,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=418020.0, ans=0.125 +2024-09-18 09:48:24,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=418060.0, ans=0.125 +2024-09-18 09:48:32,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=418060.0, ans=0.0 +2024-09-18 09:48:35,438 INFO [train.py:1198] (0/2) Epoch 24, batch 450, loss[loss=0.2491, ctc_loss=0.129, cr_loss=0.3675, attn_decoder_loss=0.2542, over 29706.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1294, cr_loss=0.3729, attn_decoder_loss=0.247, over 5189502.84 frames. ], batch size: 83, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:48:37,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=418100.0, ans=0.125 +2024-09-18 09:49:02,619 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.530e+01 9.135e+01 9.796e+01 4.658e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-18 09:49:24,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=418220.0, ans=0.05 +2024-09-18 09:49:37,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=418260.0, ans=0.125 +2024-09-18 09:49:50,901 INFO [train.py:1198] (0/2) Epoch 24, batch 500, loss[loss=0.2534, ctc_loss=0.1356, cr_loss=0.3822, attn_decoder_loss=0.258, over 29472.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1288, cr_loss=0.3717, attn_decoder_loss=0.2463, over 5332204.13 frames. ], batch size: 94, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:50:00,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=418300.0, ans=0.0 +2024-09-18 09:50:06,745 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:50:11,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=418340.0, ans=0.125 +2024-09-18 09:50:18,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=418340.0, ans=0.125 +2024-09-18 09:50:34,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=418380.0, ans=0.2 +2024-09-18 09:50:41,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=418420.0, ans=0.125 +2024-09-18 09:50:49,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=418420.0, ans=0.2 +2024-09-18 09:51:05,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=418500.0, ans=0.125 +2024-09-18 09:51:07,118 INFO [train.py:1198] (0/2) Epoch 24, batch 550, loss[loss=0.2666, ctc_loss=0.1456, cr_loss=0.4107, attn_decoder_loss=0.2709, over 28906.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1288, cr_loss=0.3713, attn_decoder_loss=0.2463, over 5424609.82 frames. ], batch size: 104, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:51:07,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=418500.0, ans=0.2 +2024-09-18 09:51:10,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=418500.0, ans=0.1 +2024-09-18 09:51:34,481 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.569e+01 9.031e+01 9.630e+01 1.358e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 09:51:43,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=418580.0, ans=0.125 +2024-09-18 09:51:59,547 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.40 vs. limit=15.0 +2024-09-18 09:52:03,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.43 vs. limit=22.5 +2024-09-18 09:52:14,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.31 vs. limit=15.0 +2024-09-18 09:52:16,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=418660.0, ans=0.125 +2024-09-18 09:52:27,743 INFO [train.py:1198] (0/2) Epoch 24, batch 600, loss[loss=0.2494, ctc_loss=0.1327, cr_loss=0.3802, attn_decoder_loss=0.2539, over 29268.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1293, cr_loss=0.372, attn_decoder_loss=0.2467, over 5512126.11 frames. ], batch size: 100, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:52:54,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=418740.0, ans=0.0 +2024-09-18 09:53:29,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=418860.0, ans=0.0 +2024-09-18 09:53:34,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=418860.0, ans=0.0 +2024-09-18 09:53:36,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.90 vs. limit=12.0 +2024-09-18 09:53:42,743 INFO [train.py:1198] (0/2) Epoch 24, batch 650, loss[loss=0.2549, ctc_loss=0.1413, cr_loss=0.4025, attn_decoder_loss=0.2586, over 29771.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1285, cr_loss=0.3705, attn_decoder_loss=0.2461, over 5588441.15 frames. ], batch size: 81, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:53:43,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-18 09:53:49,512 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.02 vs. limit=10.0 +2024-09-18 09:54:10,121 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.656e+01 8.941e+01 9.589e+01 2.067e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 09:54:16,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=418980.0, ans=0.125 +2024-09-18 09:54:37,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=419020.0, ans=0.0 +2024-09-18 09:54:42,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=419060.0, ans=0.0 +2024-09-18 09:54:47,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.66 vs. limit=12.0 +2024-09-18 09:54:49,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_na.min_abs, batch_count=419060.0, ans=0.02 +2024-09-18 09:54:51,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=419060.0, ans=0.0 +2024-09-18 09:54:58,685 INFO [train.py:1198] (0/2) Epoch 24, batch 700, loss[loss=0.2262, ctc_loss=0.1121, cr_loss=0.3439, attn_decoder_loss=0.2312, over 29538.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1297, cr_loss=0.3727, attn_decoder_loss=0.247, over 5638803.96 frames. ], batch size: 76, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:55:02,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=419100.0, ans=0.025 +2024-09-18 09:55:03,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=419100.0, ans=0.125 +2024-09-18 09:55:33,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=419180.0, ans=0.125 +2024-09-18 09:55:53,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.98 vs. limit=10.0 +2024-09-18 09:56:17,350 INFO [train.py:1198] (0/2) Epoch 24, batch 750, loss[loss=0.2403, ctc_loss=0.1267, cr_loss=0.3668, attn_decoder_loss=0.2447, over 29717.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1297, cr_loss=0.3724, attn_decoder_loss=0.2464, over 5677730.35 frames. ], batch size: 82, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:56:17,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=419300.0, ans=0.125 +2024-09-18 09:56:17,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=419300.0, ans=0.125 +2024-09-18 09:56:20,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=419300.0, ans=0.0 +2024-09-18 09:56:37,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.62 vs. limit=22.5 +2024-09-18 09:56:39,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=419340.0, ans=0.0 +2024-09-18 09:56:46,707 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.693e+01 9.112e+01 9.779e+01 2.514e+02, threshold=1.822e+02, percent-clipped=3.0 +2024-09-18 09:56:49,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.98 vs. limit=22.5 +2024-09-18 09:57:22,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=419460.0, ans=0.125 +2024-09-18 09:57:36,009 INFO [train.py:1198] (0/2) Epoch 24, batch 800, loss[loss=0.2246, ctc_loss=0.1178, cr_loss=0.3603, attn_decoder_loss=0.2284, over 29593.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1291, cr_loss=0.3718, attn_decoder_loss=0.2462, over 5708557.20 frames. ], batch size: 73, lr: 4.66e-03, grad_scale: 16.0 +2024-09-18 09:57:45,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=419500.0, ans=0.125 +2024-09-18 09:57:49,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=419540.0, ans=0.025 +2024-09-18 09:58:52,056 INFO [train.py:1198] (0/2) Epoch 24, batch 850, loss[loss=0.246, ctc_loss=0.125, cr_loss=0.3583, attn_decoder_loss=0.2515, over 29687.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1288, cr_loss=0.3711, attn_decoder_loss=0.246, over 5738208.29 frames. ], batch size: 89, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:59:21,101 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.516e+01 9.029e+01 9.587e+01 2.043e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-18 09:59:36,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=419820.0, ans=0.025 +2024-09-18 09:59:37,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.30 vs. limit=6.0 +2024-09-18 09:59:40,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=419820.0, ans=0.125 +2024-09-18 09:59:52,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=419860.0, ans=0.035 +2024-09-18 10:00:00,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=419860.0, ans=0.1 +2024-09-18 10:00:03,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=419860.0, ans=0.125 +2024-09-18 10:00:08,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=419900.0, ans=0.125 +2024-09-18 10:00:09,271 INFO [train.py:1198] (0/2) Epoch 24, batch 900, loss[loss=0.2297, ctc_loss=0.1175, cr_loss=0.3547, attn_decoder_loss=0.2343, over 29587.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1289, cr_loss=0.3711, attn_decoder_loss=0.2463, over 5742960.81 frames. ], batch size: 73, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:00:22,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=419900.0, ans=0.0 +2024-09-18 10:00:27,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=419900.0, ans=0.125 +2024-09-18 10:00:32,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.97 vs. limit=6.0 +2024-09-18 10:00:35,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.48 vs. limit=22.5 +2024-09-18 10:00:37,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=419940.0, ans=0.0 +2024-09-18 10:00:41,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=419940.0, ans=0.0 +2024-09-18 10:00:47,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=419980.0, ans=0.2 +2024-09-18 10:00:47,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=419980.0, ans=0.125 +2024-09-18 10:00:55,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=419980.0, ans=0.0 +2024-09-18 10:01:31,745 INFO [train.py:1198] (0/2) Epoch 24, batch 950, loss[loss=0.2171, ctc_loss=0.0974, cr_loss=0.3009, attn_decoder_loss=0.2237, over 29526.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1291, cr_loss=0.3711, attn_decoder_loss=0.2466, over 5743560.79 frames. ], batch size: 74, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:01:32,720 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.32 vs. limit=12.0 +2024-09-18 10:01:33,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=420100.0, ans=0.2 +2024-09-18 10:01:50,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=420140.0, ans=0.1 +2024-09-18 10:02:00,914 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.680e+01 8.541e+01 9.200e+01 9.747e+01 2.326e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-18 10:02:18,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=420220.0, ans=0.125 +2024-09-18 10:02:36,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=420260.0, ans=0.125 +2024-09-18 10:02:47,906 INFO [train.py:1198] (0/2) Epoch 24, batch 1000, loss[loss=0.2398, ctc_loss=0.1268, cr_loss=0.3646, attn_decoder_loss=0.2442, over 29487.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1303, cr_loss=0.3731, attn_decoder_loss=0.2474, over 5738176.24 frames. ], batch size: 77, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:03:02,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=420340.0, ans=0.125 +2024-09-18 10:03:07,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.47 vs. limit=15.0 +2024-09-18 10:03:12,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.80 vs. limit=15.0 +2024-09-18 10:03:15,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=420340.0, ans=0.125 +2024-09-18 10:03:20,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=420380.0, ans=0.0 +2024-09-18 10:03:20,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=420380.0, ans=0.0 +2024-09-18 10:03:25,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=420380.0, ans=0.125 +2024-09-18 10:03:44,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=420420.0, ans=0.125 +2024-09-18 10:04:02,062 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.58 vs. limit=12.0 +2024-09-18 10:04:03,941 INFO [train.py:1198] (0/2) Epoch 24, batch 1050, loss[loss=0.2483, ctc_loss=0.1352, cr_loss=0.3756, attn_decoder_loss=0.2525, over 29664.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1298, cr_loss=0.3721, attn_decoder_loss=0.2466, over 5745384.06 frames. ], batch size: 85, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:04:21,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.91 vs. limit=15.0 +2024-09-18 10:04:23,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=420540.0, ans=0.0 +2024-09-18 10:04:35,588 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.418e+01 8.849e+01 9.632e+01 1.961e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-18 10:04:45,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=420580.0, ans=0.125 +2024-09-18 10:04:53,560 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.36 vs. limit=22.5 +2024-09-18 10:04:58,069 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.38 vs. limit=15.0 +2024-09-18 10:05:12,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.58 vs. limit=6.0 +2024-09-18 10:05:22,855 INFO [train.py:1198] (0/2) Epoch 24, batch 1100, loss[loss=0.2269, ctc_loss=0.1185, cr_loss=0.3492, attn_decoder_loss=0.2312, over 29479.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1297, cr_loss=0.372, attn_decoder_loss=0.2465, over 5757411.86 frames. ], batch size: 78, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:05:33,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=420700.0, ans=0.2 +2024-09-18 10:05:49,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.59 vs. limit=22.5 +2024-09-18 10:06:08,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=420820.0, ans=0.125 +2024-09-18 10:06:20,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=420820.0, ans=0.0 +2024-09-18 10:06:39,953 INFO [train.py:1198] (0/2) Epoch 24, batch 1150, loss[loss=0.2401, ctc_loss=0.1312, cr_loss=0.3648, attn_decoder_loss=0.2441, over 29452.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1299, cr_loss=0.3722, attn_decoder_loss=0.2467, over 5755628.32 frames. ], batch size: 78, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:06:44,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=420900.0, ans=0.125 +2024-09-18 10:07:08,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=420940.0, ans=10.0 +2024-09-18 10:07:08,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=420940.0, ans=15.0 +2024-09-18 10:07:09,200 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.376e+01 8.363e+01 8.865e+01 9.557e+01 3.982e+02, threshold=1.773e+02, percent-clipped=2.0 +2024-09-18 10:07:42,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=421060.0, ans=0.125 +2024-09-18 10:07:50,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421060.0, ans=0.1 +2024-09-18 10:07:58,432 INFO [train.py:1198] (0/2) Epoch 24, batch 1200, loss[loss=0.2321, ctc_loss=0.1111, cr_loss=0.3149, attn_decoder_loss=0.2386, over 29680.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1304, cr_loss=0.3732, attn_decoder_loss=0.2476, over 5748607.40 frames. ], batch size: 85, lr: 4.65e-03, grad_scale: 16.0 +2024-09-18 10:08:19,904 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.29 vs. limit=6.0 +2024-09-18 10:08:37,806 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.93 vs. limit=15.0 +2024-09-18 10:08:52,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=421220.0, ans=0.125 +2024-09-18 10:09:00,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=421260.0, ans=0.125 +2024-09-18 10:09:00,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=421260.0, ans=0.125 +2024-09-18 10:09:10,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=421260.0, ans=0.0 +2024-09-18 10:09:13,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=421260.0, ans=0.125 +2024-09-18 10:09:16,336 INFO [train.py:1198] (0/2) Epoch 24, batch 1250, loss[loss=0.2584, ctc_loss=0.1443, cr_loss=0.3926, attn_decoder_loss=0.2624, over 29508.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1312, cr_loss=0.3755, attn_decoder_loss=0.2482, over 5776089.99 frames. ], batch size: 92, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:09:21,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=421300.0, ans=0.125 +2024-09-18 10:09:25,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421300.0, ans=0.1 +2024-09-18 10:09:46,763 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.716e+01 9.105e+01 9.689e+01 1.606e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-18 10:10:08,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=421420.0, ans=15.0 +2024-09-18 10:10:10,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=421420.0, ans=22.5 +2024-09-18 10:10:19,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=421460.0, ans=0.125 +2024-09-18 10:10:25,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421460.0, ans=0.1 +2024-09-18 10:10:31,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=421500.0, ans=0.2 +2024-09-18 10:10:32,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.73 vs. limit=6.0 +2024-09-18 10:10:32,443 INFO [train.py:1198] (0/2) Epoch 24, batch 1300, loss[loss=0.2524, ctc_loss=0.1363, cr_loss=0.3874, attn_decoder_loss=0.2566, over 28516.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1305, cr_loss=0.3743, attn_decoder_loss=0.2476, over 5779994.26 frames. ], batch size: 112, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:10:49,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.70 vs. limit=22.5 +2024-09-18 10:11:01,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.46 vs. limit=22.5 +2024-09-18 10:11:17,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=421620.0, ans=0.025 +2024-09-18 10:11:23,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=421620.0, ans=0.2 +2024-09-18 10:11:31,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421620.0, ans=0.1 +2024-09-18 10:11:39,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.96 vs. limit=15.0 +2024-09-18 10:11:49,283 INFO [train.py:1198] (0/2) Epoch 24, batch 1350, loss[loss=0.2343, ctc_loss=0.1272, cr_loss=0.3766, attn_decoder_loss=0.2378, over 29770.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.13, cr_loss=0.3734, attn_decoder_loss=0.247, over 5798677.17 frames. ], batch size: 81, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:11:59,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=421700.0, ans=0.2 +2024-09-18 10:12:24,088 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.522e+01 8.854e+01 9.285e+01 9.935e+01 1.189e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 10:12:24,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=421780.0, ans=0.125 +2024-09-18 10:12:42,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=421820.0, ans=0.1 +2024-09-18 10:12:50,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=421820.0, ans=0.125 +2024-09-18 10:13:09,369 INFO [train.py:1198] (0/2) Epoch 24, batch 1400, loss[loss=0.2137, ctc_loss=0.111, cr_loss=0.3455, attn_decoder_loss=0.2175, over 29591.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1298, cr_loss=0.3733, attn_decoder_loss=0.2468, over 5809033.63 frames. ], batch size: 69, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:13:12,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=421900.0, ans=0.0 +2024-09-18 10:13:21,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=421900.0, ans=0.125 +2024-09-18 10:13:31,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=421940.0, ans=0.125 +2024-09-18 10:13:38,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=421980.0, ans=0.125 +2024-09-18 10:13:51,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=421980.0, ans=0.125 +2024-09-18 10:14:16,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=422060.0, ans=0.125 +2024-09-18 10:14:24,951 INFO [train.py:1198] (0/2) Epoch 24, batch 1450, loss[loss=0.2598, ctc_loss=0.1433, cr_loss=0.4112, attn_decoder_loss=0.2636, over 29399.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1298, cr_loss=0.373, attn_decoder_loss=0.2473, over 5805151.75 frames. ], batch size: 94, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:14:26,817 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:14:32,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=422100.0, ans=0.125 +2024-09-18 10:14:38,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=422140.0, ans=0.0 +2024-09-18 10:14:54,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=422180.0, ans=0.0 +2024-09-18 10:14:55,265 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.177e+01 8.531e+01 9.051e+01 9.633e+01 1.306e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 10:15:03,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=422180.0, ans=0.0 +2024-09-18 10:15:06,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=422180.0, ans=0.125 +2024-09-18 10:15:19,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=422220.0, ans=0.125 +2024-09-18 10:15:39,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=422300.0, ans=0.125 +2024-09-18 10:15:40,538 INFO [train.py:1198] (0/2) Epoch 24, batch 1500, loss[loss=0.252, ctc_loss=0.1379, cr_loss=0.3867, attn_decoder_loss=0.256, over 29644.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.13, cr_loss=0.3736, attn_decoder_loss=0.2477, over 5804915.64 frames. ], batch size: 86, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:16:17,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=422380.0, ans=0.125 +2024-09-18 10:16:36,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=422420.0, ans=0.05 +2024-09-18 10:16:43,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=422420.0, ans=0.125 +2024-09-18 10:16:52,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.20 vs. limit=22.5 +2024-09-18 10:17:01,784 INFO [train.py:1198] (0/2) Epoch 24, batch 1550, loss[loss=0.2561, ctc_loss=0.143, cr_loss=0.4161, attn_decoder_loss=0.2595, over 29502.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1302, cr_loss=0.3734, attn_decoder_loss=0.2478, over 5780208.38 frames. ], batch size: 90, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:17:19,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.14 vs. limit=15.0 +2024-09-18 10:17:29,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.54 vs. limit=15.0 +2024-09-18 10:17:32,008 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.697e+01 9.200e+01 9.648e+01 4.928e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-18 10:17:36,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=422580.0, ans=0.1 +2024-09-18 10:17:54,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.99 vs. limit=22.5 +2024-09-18 10:18:17,225 INFO [train.py:1198] (0/2) Epoch 24, batch 1600, loss[loss=0.2472, ctc_loss=0.1257, cr_loss=0.3581, attn_decoder_loss=0.2527, over 29665.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1307, cr_loss=0.374, attn_decoder_loss=0.2477, over 5763021.28 frames. ], batch size: 85, lr: 4.64e-03, grad_scale: 16.0 +2024-09-18 10:18:18,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.13 vs. limit=12.0 +2024-09-18 10:18:38,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=422740.0, ans=0.0 +2024-09-18 10:18:38,644 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:18:46,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.51 vs. limit=12.0 +2024-09-18 10:18:53,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=422780.0, ans=0.025 +2024-09-18 10:19:15,546 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.89 vs. limit=22.5 +2024-09-18 10:19:17,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=422860.0, ans=0.1 +2024-09-18 10:19:21,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=422860.0, ans=0.1 +2024-09-18 10:19:21,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=422860.0, ans=0.125 +2024-09-18 10:19:28,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=422860.0, ans=0.0 +2024-09-18 10:19:35,112 INFO [train.py:1198] (0/2) Epoch 24, batch 1650, loss[loss=0.2575, ctc_loss=0.1365, cr_loss=0.3915, attn_decoder_loss=0.2622, over 29725.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1303, cr_loss=0.3729, attn_decoder_loss=0.2474, over 5759119.10 frames. ], batch size: 89, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:19:38,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-18 10:19:42,128 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.78 vs. limit=22.5 +2024-09-18 10:19:53,590 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:20:00,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=422940.0, ans=0.125 +2024-09-18 10:20:08,932 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.940e+01 8.438e+01 9.287e+01 9.952e+01 1.595e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 10:20:12,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.73 vs. limit=15.0 +2024-09-18 10:20:26,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=423020.0, ans=0.0 +2024-09-18 10:20:27,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=423020.0, ans=0.025 +2024-09-18 10:20:32,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.70 vs. limit=10.0 +2024-09-18 10:20:35,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=423020.0, ans=0.125 +2024-09-18 10:20:50,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=423060.0, ans=0.125 +2024-09-18 10:20:52,744 INFO [train.py:1198] (0/2) Epoch 24, batch 1700, loss[loss=0.2165, ctc_loss=0.1123, cr_loss=0.3359, attn_decoder_loss=0.2206, over 29575.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1298, cr_loss=0.3718, attn_decoder_loss=0.247, over 5780925.31 frames. ], batch size: 69, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:20:56,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=423100.0, ans=0.125 +2024-09-18 10:20:59,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=423100.0, ans=0.125 +2024-09-18 10:21:21,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=423180.0, ans=0.025 +2024-09-18 10:21:26,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=423180.0, ans=0.025 +2024-09-18 10:21:31,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=423180.0, ans=0.025 +2024-09-18 10:21:40,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=423220.0, ans=0.125 +2024-09-18 10:22:08,371 INFO [train.py:1198] (0/2) Epoch 24, batch 1750, loss[loss=0.2197, ctc_loss=0.1127, cr_loss=0.3226, attn_decoder_loss=0.2244, over 29302.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1294, cr_loss=0.3713, attn_decoder_loss=0.2467, over 5788848.91 frames. ], batch size: 67, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:22:13,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=423300.0, ans=0.125 +2024-09-18 10:22:33,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=423340.0, ans=0.0 +2024-09-18 10:22:39,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=423380.0, ans=0.125 +2024-09-18 10:22:40,206 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.086e+01 8.547e+01 8.974e+01 9.351e+01 1.739e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 10:22:57,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=423420.0, ans=0.1 +2024-09-18 10:23:13,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.26 vs. limit=15.0 +2024-09-18 10:23:19,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=423460.0, ans=0.0 +2024-09-18 10:23:25,962 INFO [train.py:1198] (0/2) Epoch 24, batch 1800, loss[loss=0.2484, ctc_loss=0.1286, cr_loss=0.3747, attn_decoder_loss=0.2534, over 29682.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1295, cr_loss=0.3716, attn_decoder_loss=0.2468, over 5792001.08 frames. ], batch size: 83, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:24:20,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-18 10:24:43,910 INFO [train.py:1198] (0/2) Epoch 24, batch 1850, loss[loss=0.2576, ctc_loss=0.1377, cr_loss=0.3617, attn_decoder_loss=0.2628, over 29631.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1294, cr_loss=0.3713, attn_decoder_loss=0.2467, over 5796273.76 frames. ], batch size: 86, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:25:15,527 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.229e+01 8.631e+01 9.392e+01 8.263e+02, threshold=1.726e+02, percent-clipped=1.0 +2024-09-18 10:25:17,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=423780.0, ans=0.125 +2024-09-18 10:25:23,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=423780.0, ans=0.0 +2024-09-18 10:25:24,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=423780.0, ans=0.125 +2024-09-18 10:25:38,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.62 vs. limit=15.0 +2024-09-18 10:25:56,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=423860.0, ans=0.0 +2024-09-18 10:25:59,342 INFO [train.py:1198] (0/2) Epoch 24, batch 1900, loss[loss=0.2521, ctc_loss=0.1304, cr_loss=0.3786, attn_decoder_loss=0.2572, over 29716.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1298, cr_loss=0.3725, attn_decoder_loss=0.2474, over 5803860.40 frames. ], batch size: 89, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:26:24,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=423940.0, ans=0.125 +2024-09-18 10:26:36,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=423980.0, ans=0.025 +2024-09-18 10:26:52,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten.whitening_limit, batch_count=424020.0, ans=15.0 +2024-09-18 10:26:56,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=424020.0, ans=0.05 +2024-09-18 10:27:11,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=424060.0, ans=0.07 +2024-09-18 10:27:15,260 INFO [train.py:1198] (0/2) Epoch 24, batch 1950, loss[loss=0.2406, ctc_loss=0.1266, cr_loss=0.3796, attn_decoder_loss=0.2448, over 29448.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1306, cr_loss=0.3744, attn_decoder_loss=0.2486, over 5818807.57 frames. ], batch size: 78, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:27:19,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=424100.0, ans=0.2 +2024-09-18 10:27:27,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.83 vs. limit=15.0 +2024-09-18 10:27:34,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=424140.0, ans=0.125 +2024-09-18 10:27:34,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=424140.0, ans=0.125 +2024-09-18 10:27:38,948 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:27:42,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.05 vs. limit=15.0 +2024-09-18 10:27:49,222 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.703e+01 9.158e+01 9.577e+01 1.650e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-18 10:27:58,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.63 vs. limit=15.0 +2024-09-18 10:28:04,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.77 vs. limit=12.0 +2024-09-18 10:28:21,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=424260.0, ans=0.05 +2024-09-18 10:28:21,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=424260.0, ans=0.125 +2024-09-18 10:28:34,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.93 vs. limit=15.0 +2024-09-18 10:28:35,225 INFO [train.py:1198] (0/2) Epoch 24, batch 2000, loss[loss=0.2216, ctc_loss=0.1126, cr_loss=0.3554, attn_decoder_loss=0.2258, over 29364.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1309, cr_loss=0.3749, attn_decoder_loss=0.2489, over 5795650.51 frames. ], batch size: 67, lr: 4.64e-03, grad_scale: 16.0 +2024-09-18 10:28:43,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-18 10:29:17,260 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.51 vs. limit=15.0 +2024-09-18 10:29:22,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=424420.0, ans=0.0 +2024-09-18 10:29:51,157 INFO [train.py:1198] (0/2) Epoch 24, batch 2050, loss[loss=0.2107, ctc_loss=0.1024, cr_loss=0.3271, attn_decoder_loss=0.2154, over 29439.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1301, cr_loss=0.3733, attn_decoder_loss=0.2477, over 5788134.05 frames. ], batch size: 70, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:29:54,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=424500.0, ans=0.04949747468305833 +2024-09-18 10:29:55,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.63 vs. limit=12.0 +2024-09-18 10:30:08,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=424540.0, ans=0.1 +2024-09-18 10:30:24,692 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.469e+01 9.021e+01 9.794e+01 2.013e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 10:30:29,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=424580.0, ans=0.0 +2024-09-18 10:30:50,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=424660.0, ans=0.1 +2024-09-18 10:31:01,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=424660.0, ans=0.125 +2024-09-18 10:31:08,887 INFO [train.py:1198] (0/2) Epoch 24, batch 2100, loss[loss=0.2504, ctc_loss=0.1315, cr_loss=0.3965, attn_decoder_loss=0.2548, over 29761.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1296, cr_loss=0.373, attn_decoder_loss=0.2472, over 5800045.68 frames. ], batch size: 81, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:31:25,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=424740.0, ans=0.1 +2024-09-18 10:31:27,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=424740.0, ans=0.1 +2024-09-18 10:31:40,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=424780.0, ans=0.0 +2024-09-18 10:31:50,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=424780.0, ans=0.0 +2024-09-18 10:31:53,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=424820.0, ans=0.125 +2024-09-18 10:32:05,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=424820.0, ans=0.125 +2024-09-18 10:32:16,619 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:32:19,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=424860.0, ans=0.1 +2024-09-18 10:32:22,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=424860.0, ans=0.0 +2024-09-18 10:32:25,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=424900.0, ans=0.2 +2024-09-18 10:32:26,893 INFO [train.py:1198] (0/2) Epoch 24, batch 2150, loss[loss=0.2406, ctc_loss=0.1292, cr_loss=0.391, attn_decoder_loss=0.2443, over 29462.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1289, cr_loss=0.3721, attn_decoder_loss=0.2466, over 5815500.55 frames. ], batch size: 78, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:32:29,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.03 vs. limit=6.0 +2024-09-18 10:32:57,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=424980.0, ans=0.2 +2024-09-18 10:33:00,317 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.319e+01 8.375e+01 8.762e+01 9.510e+01 1.706e+02, threshold=1.752e+02, percent-clipped=0.0 +2024-09-18 10:33:05,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=424980.0, ans=0.1 +2024-09-18 10:33:22,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.61 vs. limit=15.0 +2024-09-18 10:33:27,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=425060.0, ans=0.0 +2024-09-18 10:33:42,641 INFO [train.py:1198] (0/2) Epoch 24, batch 2200, loss[loss=0.2507, ctc_loss=0.1308, cr_loss=0.3765, attn_decoder_loss=0.2556, over 29627.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.129, cr_loss=0.372, attn_decoder_loss=0.2467, over 5811841.05 frames. ], batch size: 86, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:33:46,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=425100.0, ans=0.05 +2024-09-18 10:33:57,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=10.24 vs. limit=12.0 +2024-09-18 10:34:16,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=425180.0, ans=0.0 +2024-09-18 10:34:17,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=425180.0, ans=0.125 +2024-09-18 10:34:19,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=425180.0, ans=0.125 +2024-09-18 10:34:24,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=425180.0, ans=0.025 +2024-09-18 10:34:52,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=425260.0, ans=0.125 +2024-09-18 10:34:58,654 INFO [train.py:1198] (0/2) Epoch 24, batch 2250, loss[loss=0.2397, ctc_loss=0.1171, cr_loss=0.3319, attn_decoder_loss=0.246, over 29725.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1283, cr_loss=0.3709, attn_decoder_loss=0.2463, over 5811897.67 frames. ], batch size: 82, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:35:30,717 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.51 vs. limit=5.0 +2024-09-18 10:35:33,884 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.517e+01 9.003e+01 9.651e+01 2.176e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-18 10:35:43,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=425380.0, ans=0.125 +2024-09-18 10:35:46,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=425420.0, ans=0.125 +2024-09-18 10:35:52,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.69 vs. limit=10.0 +2024-09-18 10:36:10,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=425460.0, ans=0.125 +2024-09-18 10:36:12,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=425460.0, ans=0.2 +2024-09-18 10:36:18,241 INFO [train.py:1198] (0/2) Epoch 24, batch 2300, loss[loss=0.2155, ctc_loss=0.111, cr_loss=0.3466, attn_decoder_loss=0.2194, over 29344.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1277, cr_loss=0.3696, attn_decoder_loss=0.2452, over 5797584.95 frames. ], batch size: 71, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:36:53,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=425580.0, ans=0.125 +2024-09-18 10:36:55,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=425580.0, ans=0.125 +2024-09-18 10:37:00,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=425580.0, ans=0.125 +2024-09-18 10:37:34,641 INFO [train.py:1198] (0/2) Epoch 24, batch 2350, loss[loss=0.2527, ctc_loss=0.1366, cr_loss=0.388, attn_decoder_loss=0.257, over 29682.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1282, cr_loss=0.3706, attn_decoder_loss=0.2455, over 5803412.52 frames. ], batch size: 83, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:37:51,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=425740.0, ans=0.125 +2024-09-18 10:38:01,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=425740.0, ans=0.125 +2024-09-18 10:38:07,919 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.350e+01 8.476e+01 9.011e+01 9.684e+01 2.166e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 10:38:20,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=425820.0, ans=0.0 +2024-09-18 10:38:29,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=425820.0, ans=0.0 +2024-09-18 10:38:41,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=425860.0, ans=0.125 +2024-09-18 10:38:50,527 INFO [train.py:1198] (0/2) Epoch 24, batch 2400, loss[loss=0.2341, ctc_loss=0.1169, cr_loss=0.3593, attn_decoder_loss=0.2391, over 29555.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1288, cr_loss=0.3714, attn_decoder_loss=0.2461, over 5807652.88 frames. ], batch size: 76, lr: 4.63e-03, grad_scale: 16.0 +2024-09-18 10:39:38,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=426020.0, ans=0.1 +2024-09-18 10:39:52,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=426020.0, ans=0.125 +2024-09-18 10:39:58,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=426060.0, ans=0.1 +2024-09-18 10:40:02,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.89 vs. limit=6.0 +2024-09-18 10:40:10,574 INFO [train.py:1198] (0/2) Epoch 24, batch 2450, loss[loss=0.239, ctc_loss=0.1186, cr_loss=0.3404, attn_decoder_loss=0.2448, over 29713.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1298, cr_loss=0.3734, attn_decoder_loss=0.2472, over 5785688.83 frames. ], batch size: 82, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:40:11,372 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-18 10:40:14,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.99 vs. limit=15.0 +2024-09-18 10:40:28,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=426140.0, ans=0.1 +2024-09-18 10:40:28,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=426140.0, ans=0.5 +2024-09-18 10:40:30,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=426140.0, ans=0.1 +2024-09-18 10:40:38,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=426140.0, ans=0.125 +2024-09-18 10:40:45,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.473e+01 8.992e+01 9.865e+01 1.103e+02 3.120e+02, threshold=1.973e+02, percent-clipped=1.0 +2024-09-18 10:40:50,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=426180.0, ans=0.1 +2024-09-18 10:40:56,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=426220.0, ans=0.1 +2024-09-18 10:41:26,495 INFO [train.py:1198] (0/2) Epoch 24, batch 2500, loss[loss=0.2507, ctc_loss=0.1403, cr_loss=0.394, attn_decoder_loss=0.2542, over 29622.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1303, cr_loss=0.375, attn_decoder_loss=0.2476, over 5795107.56 frames. ], batch size: 86, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:41:29,002 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.62 vs. limit=22.5 +2024-09-18 10:41:40,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=426340.0, ans=0.0 +2024-09-18 10:41:46,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=426340.0, ans=0.125 +2024-09-18 10:41:47,281 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.59 vs. limit=15.0 +2024-09-18 10:42:35,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.60 vs. limit=15.0 +2024-09-18 10:42:41,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=426500.0, ans=0.09899494936611666 +2024-09-18 10:42:45,179 INFO [train.py:1198] (0/2) Epoch 24, batch 2550, loss[loss=0.2164, ctc_loss=0.1073, cr_loss=0.3341, attn_decoder_loss=0.2211, over 29317.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1305, cr_loss=0.3753, attn_decoder_loss=0.2474, over 5797145.47 frames. ], batch size: 67, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:42:48,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=426500.0, ans=0.125 +2024-09-18 10:43:00,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=426540.0, ans=0.125 +2024-09-18 10:43:19,607 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.669e+01 9.245e+01 9.655e+01 1.436e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-18 10:43:20,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=426580.0, ans=0.0 +2024-09-18 10:43:32,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=426620.0, ans=0.125 +2024-09-18 10:43:36,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=426620.0, ans=0.5 +2024-09-18 10:44:01,779 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:44:03,056 INFO [train.py:1198] (0/2) Epoch 24, batch 2600, loss[loss=0.2464, ctc_loss=0.1288, cr_loss=0.3539, attn_decoder_loss=0.2516, over 29426.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1304, cr_loss=0.375, attn_decoder_loss=0.2475, over 5793207.93 frames. ], batch size: 78, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:44:09,370 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:44:13,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=426700.0, ans=0.0 +2024-09-18 10:44:15,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=426700.0, ans=0.125 +2024-09-18 10:44:21,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=426740.0, ans=0.0 +2024-09-18 10:44:24,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=426740.0, ans=0.035 +2024-09-18 10:44:39,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=426780.0, ans=0.0 +2024-09-18 10:44:59,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=426820.0, ans=0.0 +2024-09-18 10:45:05,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=426860.0, ans=0.125 +2024-09-18 10:45:08,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=426860.0, ans=0.125 +2024-09-18 10:45:11,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=426860.0, ans=0.125 +2024-09-18 10:45:18,307 INFO [train.py:1198] (0/2) Epoch 24, batch 2650, loss[loss=0.2706, ctc_loss=0.1545, cr_loss=0.4093, attn_decoder_loss=0.2744, over 29221.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1303, cr_loss=0.3749, attn_decoder_loss=0.2477, over 5799137.57 frames. ], batch size: 100, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:45:29,915 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.94 vs. limit=15.0 +2024-09-18 10:45:32,528 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.64 vs. limit=15.0 +2024-09-18 10:45:49,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=426980.0, ans=0.0 +2024-09-18 10:45:53,135 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.692e+01 8.422e+01 8.884e+01 9.489e+01 2.051e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-18 10:46:24,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=427060.0, ans=0.0 +2024-09-18 10:46:35,889 INFO [train.py:1198] (0/2) Epoch 24, batch 2700, loss[loss=0.2431, ctc_loss=0.1179, cr_loss=0.3313, attn_decoder_loss=0.2496, over 29515.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1308, cr_loss=0.3752, attn_decoder_loss=0.2483, over 5795158.44 frames. ], batch size: 87, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:46:52,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=427140.0, ans=0.2 +2024-09-18 10:47:03,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=427140.0, ans=0.125 +2024-09-18 10:47:29,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=427220.0, ans=0.0 +2024-09-18 10:47:35,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=427260.0, ans=0.0 +2024-09-18 10:47:38,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=427260.0, ans=0.0 +2024-09-18 10:47:43,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=427260.0, ans=0.0 +2024-09-18 10:47:54,353 INFO [train.py:1198] (0/2) Epoch 24, batch 2750, loss[loss=0.2427, ctc_loss=0.1337, cr_loss=0.3823, attn_decoder_loss=0.2463, over 29522.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1301, cr_loss=0.373, attn_decoder_loss=0.2471, over 5793811.76 frames. ], batch size: 75, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:48:02,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.41 vs. limit=15.0 +2024-09-18 10:48:03,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=427300.0, ans=0.125 +2024-09-18 10:48:28,938 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.612e+01 9.140e+01 9.786e+01 3.109e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 10:48:33,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=427380.0, ans=0.0 +2024-09-18 10:48:35,242 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:48:38,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=427420.0, ans=0.125 +2024-09-18 10:48:48,141 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.57 vs. limit=15.0 +2024-09-18 10:49:02,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=427460.0, ans=0.95 +2024-09-18 10:49:07,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=427460.0, ans=0.125 +2024-09-18 10:49:10,160 INFO [train.py:1198] (0/2) Epoch 24, batch 2800, loss[loss=0.2697, ctc_loss=0.161, cr_loss=0.4071, attn_decoder_loss=0.2727, over 20269.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1308, cr_loss=0.3742, attn_decoder_loss=0.2475, over 5773574.38 frames. ], batch size: 210, lr: 4.62e-03, grad_scale: 16.0 +2024-09-18 10:49:13,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=427500.0, ans=0.125 +2024-09-18 10:49:42,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=427580.0, ans=0.125 +2024-09-18 10:49:52,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=427580.0, ans=0.1 +2024-09-18 10:49:55,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=427620.0, ans=0.125 +2024-09-18 10:50:06,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=427620.0, ans=0.0 +2024-09-18 10:50:17,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=427660.0, ans=0.125 +2024-09-18 10:50:27,678 INFO [train.py:1198] (0/2) Epoch 24, batch 2850, loss[loss=0.2362, ctc_loss=0.1232, cr_loss=0.3846, attn_decoder_loss=0.2402, over 29486.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1309, cr_loss=0.3742, attn_decoder_loss=0.2477, over 5760524.14 frames. ], batch size: 77, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:50:57,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.86 vs. limit=22.5 +2024-09-18 10:51:00,249 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.45 vs. limit=22.5 +2024-09-18 10:51:04,121 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.609e+01 8.759e+01 9.407e+01 9.943e+01 3.710e+02, threshold=1.881e+02, percent-clipped=1.0 +2024-09-18 10:51:04,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-18 10:51:10,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=427780.0, ans=0.025 +2024-09-18 10:51:13,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=427820.0, ans=0.0 +2024-09-18 10:51:40,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.29 vs. limit=10.0 +2024-09-18 10:51:42,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=427860.0, ans=0.0 +2024-09-18 10:51:45,750 INFO [train.py:1198] (0/2) Epoch 24, batch 2900, loss[loss=0.2386, ctc_loss=0.121, cr_loss=0.3622, attn_decoder_loss=0.2437, over 29426.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1312, cr_loss=0.3754, attn_decoder_loss=0.2487, over 5786616.71 frames. ], batch size: 79, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:51:53,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=427900.0, ans=0.125 +2024-09-18 10:52:24,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=427980.0, ans=0.0 +2024-09-18 10:52:39,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=428020.0, ans=0.0 +2024-09-18 10:52:45,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=428060.0, ans=0.125 +2024-09-18 10:53:02,261 INFO [train.py:1198] (0/2) Epoch 24, batch 2950, loss[loss=0.2355, ctc_loss=0.1286, cr_loss=0.3676, attn_decoder_loss=0.2392, over 29522.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.13, cr_loss=0.373, attn_decoder_loss=0.2472, over 5782077.49 frames. ], batch size: 75, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:53:07,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=428100.0, ans=0.2 +2024-09-18 10:53:38,589 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.398e+01 8.942e+01 9.654e+01 3.446e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 10:53:42,355 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.56 vs. limit=15.0 +2024-09-18 10:54:09,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=428260.0, ans=0.0 +2024-09-18 10:54:19,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=428300.0, ans=0.09899494936611666 +2024-09-18 10:54:20,525 INFO [train.py:1198] (0/2) Epoch 24, batch 3000, loss[loss=0.2432, ctc_loss=0.1258, cr_loss=0.362, attn_decoder_loss=0.2482, over 29770.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1301, cr_loss=0.3733, attn_decoder_loss=0.2475, over 5782659.19 frames. ], batch size: 81, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:54:20,526 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 10:54:39,002 INFO [train.py:1230] (0/2) Epoch 24, validation: loss=0.2118, ctc_loss=0.03891, cr_loss=5.525e-15, attn_decoder_loss=0.231, over 944034.00 frames. +2024-09-18 10:54:39,002 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 10:54:40,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=428300.0, ans=0.125 +2024-09-18 10:54:50,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=428300.0, ans=0.125 +2024-09-18 10:54:54,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.91 vs. limit=15.0 +2024-09-18 10:55:11,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=428380.0, ans=0.0 +2024-09-18 10:55:20,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=428380.0, ans=0.125 +2024-09-18 10:55:24,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=428420.0, ans=0.125 +2024-09-18 10:55:33,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=428420.0, ans=0.125 +2024-09-18 10:55:51,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=428460.0, ans=0.2 +2024-09-18 10:55:56,752 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.72 vs. limit=15.0 +2024-09-18 10:55:57,318 INFO [train.py:1198] (0/2) Epoch 24, batch 3050, loss[loss=0.2443, ctc_loss=0.1301, cr_loss=0.3718, attn_decoder_loss=0.2488, over 29539.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1307, cr_loss=0.3744, attn_decoder_loss=0.2483, over 5775731.11 frames. ], batch size: 76, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:56:08,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=428500.0, ans=0.5 +2024-09-18 10:56:21,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=428540.0, ans=0.0 +2024-09-18 10:56:23,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=428540.0, ans=0.1 +2024-09-18 10:56:33,592 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.657e+01 9.220e+01 9.690e+01 1.587e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-18 10:56:40,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=428580.0, ans=0.125 +2024-09-18 10:56:49,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=428620.0, ans=0.2 +2024-09-18 10:57:13,018 INFO [train.py:1198] (0/2) Epoch 24, batch 3100, loss[loss=0.2591, ctc_loss=0.1414, cr_loss=0.4077, attn_decoder_loss=0.2631, over 29236.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1306, cr_loss=0.3746, attn_decoder_loss=0.248, over 5776390.23 frames. ], batch size: 100, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:57:16,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=428700.0, ans=0.125 +2024-09-18 10:57:40,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=428740.0, ans=0.1 +2024-09-18 10:58:04,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=428820.0, ans=0.0 +2024-09-18 10:58:31,280 INFO [train.py:1198] (0/2) Epoch 24, batch 3150, loss[loss=0.2577, ctc_loss=0.1392, cr_loss=0.396, attn_decoder_loss=0.262, over 28925.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1306, cr_loss=0.3742, attn_decoder_loss=0.2482, over 5782684.56 frames. ], batch size: 104, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:58:34,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=428900.0, ans=0.1 +2024-09-18 10:58:50,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.21 vs. limit=15.0 +2024-09-18 10:58:56,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.80 vs. limit=10.0 +2024-09-18 10:59:07,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.611e+01 9.043e+01 9.612e+01 2.237e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 10:59:28,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=429020.0, ans=0.125 +2024-09-18 10:59:49,143 INFO [train.py:1198] (0/2) Epoch 24, batch 3200, loss[loss=0.2332, ctc_loss=0.1217, cr_loss=0.3486, attn_decoder_loss=0.2378, over 29758.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1301, cr_loss=0.3731, attn_decoder_loss=0.2476, over 5793189.46 frames. ], batch size: 80, lr: 4.61e-03, grad_scale: 16.0 +2024-09-18 10:59:53,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=429100.0, ans=0.0 +2024-09-18 10:59:56,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=429100.0, ans=0.07 +2024-09-18 10:59:59,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=429100.0, ans=0.125 +2024-09-18 11:00:01,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-18 11:00:19,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=429180.0, ans=0.5 +2024-09-18 11:00:35,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=429220.0, ans=0.0 +2024-09-18 11:00:36,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=429220.0, ans=0.125 +2024-09-18 11:00:48,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=429260.0, ans=0.05 +2024-09-18 11:01:00,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=429260.0, ans=0.1 +2024-09-18 11:01:04,861 INFO [train.py:1198] (0/2) Epoch 24, batch 3250, loss[loss=0.2392, ctc_loss=0.1206, cr_loss=0.3554, attn_decoder_loss=0.2445, over 29728.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1302, cr_loss=0.3737, attn_decoder_loss=0.2477, over 5800023.58 frames. ], batch size: 84, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:01:06,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=429300.0, ans=0.025 +2024-09-18 11:01:06,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=429300.0, ans=0.1 +2024-09-18 11:01:42,458 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.459e+01 8.528e+01 8.996e+01 9.575e+01 1.279e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 11:02:12,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_ff3.min_abs, batch_count=429460.0, ans=0.2 +2024-09-18 11:02:22,378 INFO [train.py:1198] (0/2) Epoch 24, batch 3300, loss[loss=0.2558, ctc_loss=0.1388, cr_loss=0.3839, attn_decoder_loss=0.2603, over 28349.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1297, cr_loss=0.3722, attn_decoder_loss=0.2466, over 5797962.65 frames. ], batch size: 112, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:02:25,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.06 vs. limit=15.0 +2024-09-18 11:02:45,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=429540.0, ans=0.125 +2024-09-18 11:02:53,887 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.91 vs. limit=15.0 +2024-09-18 11:02:54,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=429580.0, ans=0.05 +2024-09-18 11:02:56,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=429580.0, ans=0.0 +2024-09-18 11:02:57,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=429580.0, ans=0.125 +2024-09-18 11:03:24,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=429660.0, ans=0.025 +2024-09-18 11:03:34,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=429660.0, ans=0.2 +2024-09-18 11:03:40,490 INFO [train.py:1198] (0/2) Epoch 24, batch 3350, loss[loss=0.2495, ctc_loss=0.1285, cr_loss=0.3667, attn_decoder_loss=0.2548, over 28837.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1302, cr_loss=0.373, attn_decoder_loss=0.2473, over 5774319.05 frames. ], batch size: 104, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:03:53,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=429700.0, ans=0.025 +2024-09-18 11:03:57,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=429740.0, ans=0.125 +2024-09-18 11:04:03,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=429740.0, ans=0.1 +2024-09-18 11:04:10,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=429780.0, ans=0.0 +2024-09-18 11:04:10,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.42 vs. limit=15.0 +2024-09-18 11:04:16,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=429780.0, ans=0.0 +2024-09-18 11:04:16,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=429780.0, ans=0.125 +2024-09-18 11:04:16,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=15.0 +2024-09-18 11:04:18,811 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.492e+01 9.207e+01 9.979e+01 1.773e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-18 11:04:29,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=429820.0, ans=0.07 +2024-09-18 11:04:30,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.27 vs. limit=15.0 +2024-09-18 11:04:35,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=429820.0, ans=0.125 +2024-09-18 11:04:51,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.40 vs. limit=22.5 +2024-09-18 11:04:55,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=429900.0, ans=0.125 +2024-09-18 11:04:56,631 INFO [train.py:1198] (0/2) Epoch 24, batch 3400, loss[loss=0.2166, ctc_loss=0.1146, cr_loss=0.3412, attn_decoder_loss=0.2204, over 29313.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1303, cr_loss=0.3735, attn_decoder_loss=0.2475, over 5766875.38 frames. ], batch size: 67, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:04:56,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=429900.0, ans=0.0 +2024-09-18 11:05:02,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=429900.0, ans=0.1 +2024-09-18 11:05:27,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=429980.0, ans=0.0 +2024-09-18 11:05:40,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=429980.0, ans=0.07 +2024-09-18 11:05:40,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=429980.0, ans=0.125 +2024-09-18 11:05:41,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=429980.0, ans=0.5 +2024-09-18 11:05:44,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=430020.0, ans=0.125 +2024-09-18 11:05:46,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=430020.0, ans=0.2 +2024-09-18 11:05:50,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=430020.0, ans=0.125 +2024-09-18 11:05:52,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-18 11:05:58,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=430060.0, ans=0.1 +2024-09-18 11:06:14,382 INFO [train.py:1198] (0/2) Epoch 24, batch 3450, loss[loss=0.2568, ctc_loss=0.1348, cr_loss=0.3872, attn_decoder_loss=0.2617, over 28221.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1306, cr_loss=0.3746, attn_decoder_loss=0.2482, over 5775274.65 frames. ], batch size: 111, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:06:35,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=430140.0, ans=0.1 +2024-09-18 11:06:47,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=430180.0, ans=0.2 +2024-09-18 11:06:51,990 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.937e+01 8.449e+01 8.954e+01 9.468e+01 1.386e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 11:07:13,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=430220.0, ans=0.025 +2024-09-18 11:07:19,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=430260.0, ans=0.0 +2024-09-18 11:07:32,348 INFO [train.py:1198] (0/2) Epoch 24, batch 3500, loss[loss=0.2168, ctc_loss=0.1139, cr_loss=0.3357, attn_decoder_loss=0.2207, over 29329.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1307, cr_loss=0.3748, attn_decoder_loss=0.2478, over 5777206.17 frames. ], batch size: 71, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:07:46,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=430340.0, ans=0.1 +2024-09-18 11:08:20,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=430420.0, ans=0.125 +2024-09-18 11:08:28,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.62 vs. limit=15.0 +2024-09-18 11:08:31,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=430460.0, ans=0.1 +2024-09-18 11:08:47,394 INFO [train.py:1198] (0/2) Epoch 24, batch 3550, loss[loss=0.2562, ctc_loss=0.1375, cr_loss=0.3997, attn_decoder_loss=0.2605, over 29725.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1302, cr_loss=0.3739, attn_decoder_loss=0.2474, over 5783532.74 frames. ], batch size: 89, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:09:01,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=430540.0, ans=0.1 +2024-09-18 11:09:09,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=430540.0, ans=0.0 +2024-09-18 11:09:24,137 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.484e+01 9.073e+01 9.801e+01 1.561e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 11:09:24,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=430580.0, ans=0.0 +2024-09-18 11:09:48,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=430660.0, ans=0.0 +2024-09-18 11:10:00,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.19 vs. limit=22.5 +2024-09-18 11:10:01,186 INFO [train.py:1198] (0/2) Epoch 24, batch 3600, loss[loss=0.2404, ctc_loss=0.1307, cr_loss=0.3777, attn_decoder_loss=0.2442, over 29504.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1296, cr_loss=0.3732, attn_decoder_loss=0.2471, over 5792393.72 frames. ], batch size: 77, lr: 4.60e-03, grad_scale: 16.0 +2024-09-18 11:10:07,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=430700.0, ans=0.0 +2024-09-18 11:10:08,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.44 vs. limit=6.0 +2024-09-18 11:10:20,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=430740.0, ans=0.1 +2024-09-18 11:10:25,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=430740.0, ans=0.2 +2024-09-18 11:10:28,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=430740.0, ans=0.0 +2024-09-18 11:10:33,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=430780.0, ans=0.125 +2024-09-18 11:10:39,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=430780.0, ans=0.2 +2024-09-18 11:10:43,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=430780.0, ans=0.125 +2024-09-18 11:10:52,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=430820.0, ans=0.125 +2024-09-18 11:11:16,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=430900.0, ans=0.1 +2024-09-18 11:11:17,346 INFO [train.py:1198] (0/2) Epoch 24, batch 3650, loss[loss=0.2535, ctc_loss=0.1461, cr_loss=0.3955, attn_decoder_loss=0.2566, over 29507.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.129, cr_loss=0.3716, attn_decoder_loss=0.2464, over 5793870.69 frames. ], batch size: 90, lr: 4.60e-03, grad_scale: 16.0 +2024-09-18 11:11:29,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=430900.0, ans=0.0 +2024-09-18 11:11:44,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=430940.0, ans=0.0 +2024-09-18 11:11:56,436 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.267e+01 8.409e+01 9.046e+01 9.842e+01 1.750e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 11:12:03,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=431020.0, ans=0.07 +2024-09-18 11:12:15,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=431060.0, ans=0.0 +2024-09-18 11:12:24,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=431060.0, ans=0.125 +2024-09-18 11:12:32,073 INFO [train.py:1198] (0/2) Epoch 24, batch 3700, loss[loss=0.2445, ctc_loss=0.1311, cr_loss=0.3836, attn_decoder_loss=0.2486, over 29697.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1287, cr_loss=0.3711, attn_decoder_loss=0.2464, over 5804203.92 frames. ], batch size: 84, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:12:53,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.76 vs. limit=15.0 +2024-09-18 11:13:20,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=431220.0, ans=0.0 +2024-09-18 11:13:48,722 INFO [train.py:1198] (0/2) Epoch 24, batch 3750, loss[loss=0.217, ctc_loss=0.1101, cr_loss=0.3528, attn_decoder_loss=0.221, over 29362.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.129, cr_loss=0.3714, attn_decoder_loss=0.2464, over 5807245.38 frames. ], batch size: 67, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:13:51,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=431300.0, ans=0.125 +2024-09-18 11:13:53,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=431300.0, ans=0.0 +2024-09-18 11:13:55,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.68 vs. limit=22.5 +2024-09-18 11:13:55,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.53 vs. limit=15.0 +2024-09-18 11:13:57,044 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.04 vs. limit=15.0 +2024-09-18 11:14:12,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=431340.0, ans=0.125 +2024-09-18 11:14:20,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=431380.0, ans=0.125 +2024-09-18 11:14:21,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=431380.0, ans=0.125 +2024-09-18 11:14:27,338 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.117e+01 8.348e+01 8.770e+01 9.473e+01 2.105e+02, threshold=1.754e+02, percent-clipped=1.0 +2024-09-18 11:14:30,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=431380.0, ans=0.025 +2024-09-18 11:15:03,203 INFO [train.py:1198] (0/2) Epoch 24, batch 3800, loss[loss=0.2574, ctc_loss=0.1386, cr_loss=0.4126, attn_decoder_loss=0.2615, over 29617.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1287, cr_loss=0.3714, attn_decoder_loss=0.2462, over 5798321.18 frames. ], batch size: 86, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:15:21,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=431540.0, ans=0.125 +2024-09-18 11:15:25,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=431540.0, ans=0.015 +2024-09-18 11:15:32,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.66 vs. limit=22.5 +2024-09-18 11:15:33,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=431580.0, ans=0.1 +2024-09-18 11:16:14,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=431660.0, ans=0.2 +2024-09-18 11:16:19,235 INFO [train.py:1198] (0/2) Epoch 24, batch 3850, loss[loss=0.262, ctc_loss=0.1457, cr_loss=0.4267, attn_decoder_loss=0.2654, over 29221.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1284, cr_loss=0.3709, attn_decoder_loss=0.246, over 5811907.24 frames. ], batch size: 100, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:16:37,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=431740.0, ans=0.0 +2024-09-18 11:16:46,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=431740.0, ans=0.125 +2024-09-18 11:16:49,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=431780.0, ans=0.5 +2024-09-18 11:16:52,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=431780.0, ans=0.0 +2024-09-18 11:16:52,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=431780.0, ans=0.0 +2024-09-18 11:16:57,861 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.248e+01 8.427e+01 9.024e+01 9.626e+01 1.408e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-18 11:17:01,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=431780.0, ans=0.0 +2024-09-18 11:17:21,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.51 vs. limit=15.0 +2024-09-18 11:17:33,516 INFO [train.py:1198] (0/2) Epoch 24, batch 3900, loss[loss=0.2556, ctc_loss=0.1319, cr_loss=0.3758, attn_decoder_loss=0.2609, over 29619.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1289, cr_loss=0.3718, attn_decoder_loss=0.2468, over 5815656.59 frames. ], batch size: 86, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:17:41,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=431900.0, ans=0.1 +2024-09-18 11:17:50,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=431940.0, ans=0.2 +2024-09-18 11:17:57,421 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:18:00,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=431940.0, ans=0.1 +2024-09-18 11:18:03,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=431980.0, ans=0.2 +2024-09-18 11:18:09,529 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-108000.pt +2024-09-18 11:18:19,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=431980.0, ans=0.1 +2024-09-18 11:18:20,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.34 vs. limit=10.0 +2024-09-18 11:18:42,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.34 vs. limit=15.0 +2024-09-18 11:18:52,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=432060.0, ans=0.2 +2024-09-18 11:18:55,351 INFO [train.py:1198] (0/2) Epoch 24, batch 3950, loss[loss=0.2556, ctc_loss=0.146, cr_loss=0.4153, attn_decoder_loss=0.2585, over 29548.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1285, cr_loss=0.3715, attn_decoder_loss=0.2466, over 5835301.94 frames. ], batch size: 97, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:19:20,693 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:19:28,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=432180.0, ans=0.0 +2024-09-18 11:19:35,356 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.178e+01 8.348e+01 8.902e+01 9.353e+01 3.258e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 11:19:40,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.43 vs. limit=15.0 +2024-09-18 11:19:42,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=432220.0, ans=0.2 +2024-09-18 11:20:08,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=432260.0, ans=0.125 +2024-09-18 11:20:10,533 INFO [train.py:1198] (0/2) Epoch 24, batch 4000, loss[loss=0.232, ctc_loss=0.1221, cr_loss=0.3545, attn_decoder_loss=0.2364, over 29509.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.129, cr_loss=0.3718, attn_decoder_loss=0.2467, over 5813476.72 frames. ], batch size: 74, lr: 4.59e-03, grad_scale: 16.0 +2024-09-18 11:20:17,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.72 vs. limit=22.5 +2024-09-18 11:20:22,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.38 vs. limit=15.0 +2024-09-18 11:20:29,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=432340.0, ans=0.09899494936611666 +2024-09-18 11:20:38,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=432380.0, ans=0.0 +2024-09-18 11:20:52,249 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:20:59,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=432420.0, ans=0.2 +2024-09-18 11:21:21,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=432460.0, ans=0.125 +2024-09-18 11:21:25,719 INFO [train.py:1198] (0/2) Epoch 24, batch 4050, loss[loss=0.2571, ctc_loss=0.1487, cr_loss=0.3752, attn_decoder_loss=0.2608, over 20136.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1286, cr_loss=0.3709, attn_decoder_loss=0.2463, over 5797914.52 frames. ], batch size: 210, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:21:30,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.70 vs. limit=6.0 +2024-09-18 11:21:43,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=432540.0, ans=0.125 +2024-09-18 11:21:45,411 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=15.0 +2024-09-18 11:21:49,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=432540.0, ans=0.125 +2024-09-18 11:21:52,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=432540.0, ans=0.125 +2024-09-18 11:22:05,482 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.379e+01 9.029e+01 9.565e+01 1.787e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 11:22:10,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=432620.0, ans=0.09899494936611666 +2024-09-18 11:22:22,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=432620.0, ans=0.1 +2024-09-18 11:22:24,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.36 vs. limit=15.0 +2024-09-18 11:22:30,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=432660.0, ans=0.0 +2024-09-18 11:22:39,423 INFO [train.py:1198] (0/2) Epoch 24, batch 4100, loss[loss=0.267, ctc_loss=0.1535, cr_loss=0.4323, attn_decoder_loss=0.27, over 29483.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.129, cr_loss=0.3719, attn_decoder_loss=0.2468, over 5792472.61 frames. ], batch size: 90, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:22:47,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=432700.0, ans=0.07 +2024-09-18 11:23:07,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=432780.0, ans=0.0 +2024-09-18 11:23:15,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=432780.0, ans=0.125 +2024-09-18 11:23:18,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=432780.0, ans=0.125 +2024-09-18 11:23:38,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=432860.0, ans=0.125 +2024-09-18 11:23:49,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=432860.0, ans=0.125 +2024-09-18 11:23:54,706 INFO [train.py:1198] (0/2) Epoch 24, batch 4150, loss[loss=0.2352, ctc_loss=0.1294, cr_loss=0.3681, attn_decoder_loss=0.2387, over 29488.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1296, cr_loss=0.3724, attn_decoder_loss=0.247, over 5798126.50 frames. ], batch size: 77, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:24:06,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=432900.0, ans=0.125 +2024-09-18 11:24:14,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=432940.0, ans=0.0 +2024-09-18 11:24:23,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.43 vs. limit=10.0 +2024-09-18 11:24:34,387 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.706e+01 9.310e+01 1.000e+02 1.548e+02, threshold=1.862e+02, percent-clipped=0.0 +2024-09-18 11:25:00,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.21 vs. limit=22.5 +2024-09-18 11:25:04,426 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:25:04,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=433060.0, ans=0.125 +2024-09-18 11:25:08,561 INFO [train.py:1198] (0/2) Epoch 24, batch 4200, loss[loss=0.2667, ctc_loss=0.1462, cr_loss=0.4112, attn_decoder_loss=0.2709, over 29507.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1298, cr_loss=0.373, attn_decoder_loss=0.2472, over 5801208.39 frames. ], batch size: 90, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:25:42,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=433180.0, ans=0.1 +2024-09-18 11:25:48,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=433180.0, ans=0.0 +2024-09-18 11:25:52,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=433220.0, ans=0.125 +2024-09-18 11:26:12,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=433260.0, ans=0.2 +2024-09-18 11:26:17,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=433260.0, ans=0.0 +2024-09-18 11:26:21,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-18 11:26:23,505 INFO [train.py:1198] (0/2) Epoch 24, batch 4250, loss[loss=0.2259, ctc_loss=0.1151, cr_loss=0.3489, attn_decoder_loss=0.2305, over 29511.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1295, cr_loss=0.3728, attn_decoder_loss=0.2474, over 5806496.94 frames. ], batch size: 74, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:26:35,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=433300.0, ans=0.0 +2024-09-18 11:26:57,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=433380.0, ans=0.125 +2024-09-18 11:27:03,541 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.615e+01 9.136e+01 9.637e+01 1.647e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 11:27:05,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=433380.0, ans=0.0 +2024-09-18 11:27:27,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=433460.0, ans=0.125 +2024-09-18 11:27:38,619 INFO [train.py:1198] (0/2) Epoch 24, batch 4300, loss[loss=0.2514, ctc_loss=0.1339, cr_loss=0.386, attn_decoder_loss=0.2559, over 29518.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1292, cr_loss=0.3719, attn_decoder_loss=0.2474, over 5795361.45 frames. ], batch size: 87, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:27:41,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=433500.0, ans=0.125 +2024-09-18 11:27:48,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.50 vs. limit=15.0 +2024-09-18 11:27:55,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=433540.0, ans=0.2 +2024-09-18 11:28:04,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=433540.0, ans=0.0 +2024-09-18 11:28:19,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=433580.0, ans=0.0 +2024-09-18 11:28:19,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=433580.0, ans=0.125 +2024-09-18 11:28:21,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=433580.0, ans=0.07 +2024-09-18 11:28:44,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=433660.0, ans=0.1 +2024-09-18 11:28:54,031 INFO [train.py:1198] (0/2) Epoch 24, batch 4350, loss[loss=0.2623, ctc_loss=0.1493, cr_loss=0.4152, attn_decoder_loss=0.2656, over 29484.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1312, cr_loss=0.3764, attn_decoder_loss=0.2504, over 5798060.25 frames. ], batch size: 97, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:29:03,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=433700.0, ans=0.0 +2024-09-18 11:29:03,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.80 vs. limit=10.0 +2024-09-18 11:29:08,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.86 vs. limit=15.0 +2024-09-18 11:29:26,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=433780.0, ans=0.0 +2024-09-18 11:29:33,322 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 8.974e+01 9.434e+01 1.011e+02 1.996e+02, threshold=1.887e+02, percent-clipped=1.0 +2024-09-18 11:29:47,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-18 11:29:55,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=433860.0, ans=0.035 +2024-09-18 11:30:05,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=433900.0, ans=0.0 +2024-09-18 11:30:07,005 INFO [train.py:1198] (0/2) Epoch 24, batch 4400, loss[loss=0.2518, ctc_loss=0.141, cr_loss=0.4097, attn_decoder_loss=0.255, over 27469.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1323, cr_loss=0.3783, attn_decoder_loss=0.2521, over 5769411.00 frames. ], batch size: 125, lr: 4.58e-03, grad_scale: 16.0 +2024-09-18 11:30:15,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=433900.0, ans=0.125 +2024-09-18 11:30:27,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten.whitening_limit, batch_count=433940.0, ans=22.5 +2024-09-18 11:30:28,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=433940.0, ans=0.04949747468305833 +2024-09-18 11:30:38,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=433980.0, ans=0.0 +2024-09-18 11:30:44,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=433980.0, ans=0.125 +2024-09-18 11:31:00,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=434020.0, ans=0.04949747468305833 +2024-09-18 11:31:21,146 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.14 vs. limit=10.0 +2024-09-18 11:31:21,665 INFO [train.py:1198] (0/2) Epoch 24, batch 4450, loss[loss=0.2658, ctc_loss=0.1598, cr_loss=0.3752, attn_decoder_loss=0.2692, over 20223.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1364, cr_loss=0.3835, attn_decoder_loss=0.2544, over 5584158.32 frames. ], batch size: 210, lr: 4.58e-03, grad_scale: 8.0 +2024-09-18 11:31:47,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.64 vs. limit=15.0 +2024-09-18 11:31:48,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=434140.0, ans=0.09899494936611666 +2024-09-18 11:31:50,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=434180.0, ans=0.125 +2024-09-18 11:32:04,072 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 9.021e+01 9.778e+01 1.211e+02 1.854e+02, threshold=1.956e+02, percent-clipped=0.0 +2024-09-18 11:32:07,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=434220.0, ans=0.05 +2024-09-18 11:32:15,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=434220.0, ans=0.2 +2024-09-18 11:32:18,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=434220.0, ans=0.2 +2024-09-18 11:32:21,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=434260.0, ans=0.125 +2024-09-18 11:32:30,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=434260.0, ans=0.125 +2024-09-18 11:32:37,517 INFO [train.py:1198] (0/2) Epoch 24, batch 4500, loss[loss=0.2705, ctc_loss=0.1671, cr_loss=0.4141, attn_decoder_loss=0.2728, over 20188.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1411, cr_loss=0.3875, attn_decoder_loss=0.2569, over 5240242.20 frames. ], batch size: 211, lr: 4.58e-03, grad_scale: 8.0 +2024-09-18 11:33:14,949 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-24.pt +2024-09-18 11:34:00,534 INFO [train.py:1198] (0/2) Epoch 25, batch 0, loss[loss=0.2124, ctc_loss=0.09974, cr_loss=0.3218, attn_decoder_loss=0.2178, over 29624.00 frames. ], tot_loss[loss=0.2124, ctc_loss=0.09974, cr_loss=0.3218, attn_decoder_loss=0.2178, over 29624.00 frames. ], batch size: 73, lr: 4.49e-03, grad_scale: 16.0 +2024-09-18 11:34:00,535 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 11:34:18,958 INFO [train.py:1230] (0/2) Epoch 25, validation: loss=0.2119, ctc_loss=0.03765, cr_loss=5.538e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 11:34:18,958 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 11:34:53,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=434480.0, ans=0.0 +2024-09-18 11:34:56,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=434480.0, ans=0.125 +2024-09-18 11:35:08,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.85 vs. limit=10.0 +2024-09-18 11:35:18,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.22 vs. limit=15.0 +2024-09-18 11:35:36,620 INFO [train.py:1198] (0/2) Epoch 25, batch 50, loss[loss=0.222, ctc_loss=0.1096, cr_loss=0.3395, attn_decoder_loss=0.2269, over 29453.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1314, cr_loss=0.3783, attn_decoder_loss=0.2479, over 1268992.76 frames. ], batch size: 70, lr: 4.49e-03, grad_scale: 8.0 +2024-09-18 11:35:37,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-18 11:35:41,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=434600.0, ans=0.0 +2024-09-18 11:35:42,739 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.115e+01 8.952e+01 1.043e+02 1.177e+02 2.373e+02, threshold=2.086e+02, percent-clipped=2.0 +2024-09-18 11:35:55,826 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.54 vs. limit=15.0 +2024-09-18 11:36:23,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=434720.0, ans=0.0 +2024-09-18 11:36:53,484 INFO [train.py:1198] (0/2) Epoch 25, batch 100, loss[loss=0.2381, ctc_loss=0.1196, cr_loss=0.3604, attn_decoder_loss=0.2432, over 29523.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1327, cr_loss=0.3801, attn_decoder_loss=0.2503, over 2253706.56 frames. ], batch size: 76, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:37:01,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=434800.0, ans=0.1 +2024-09-18 11:37:01,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=434800.0, ans=10.0 +2024-09-18 11:37:08,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=434840.0, ans=0.125 +2024-09-18 11:37:31,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=434880.0, ans=0.2 +2024-09-18 11:37:44,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=434920.0, ans=0.09899494936611666 +2024-09-18 11:37:55,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=434960.0, ans=0.125 +2024-09-18 11:37:55,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=434960.0, ans=0.0 +2024-09-18 11:37:58,189 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:38:02,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=434960.0, ans=0.0 +2024-09-18 11:38:08,137 INFO [train.py:1198] (0/2) Epoch 25, batch 150, loss[loss=0.2211, ctc_loss=0.1186, cr_loss=0.3383, attn_decoder_loss=0.225, over 29418.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1301, cr_loss=0.3745, attn_decoder_loss=0.2483, over 3048095.50 frames. ], batch size: 70, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:38:08,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=435000.0, ans=0.0 +2024-09-18 11:38:14,096 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.648e+01 9.269e+01 9.917e+01 1.697e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-18 11:38:14,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=435000.0, ans=0.1 +2024-09-18 11:38:34,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=435040.0, ans=0.0 +2024-09-18 11:38:41,780 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:39:05,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=435120.0, ans=0.125 +2024-09-18 11:39:21,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=435160.0, ans=0.125 +2024-09-18 11:39:24,084 INFO [train.py:1198] (0/2) Epoch 25, batch 200, loss[loss=0.2536, ctc_loss=0.138, cr_loss=0.3873, attn_decoder_loss=0.2578, over 27556.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1294, cr_loss=0.3731, attn_decoder_loss=0.247, over 3659578.51 frames. ], batch size: 125, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:39:31,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.52 vs. limit=15.0 +2024-09-18 11:39:34,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=435200.0, ans=0.1 +2024-09-18 11:39:54,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=435240.0, ans=0.0 +2024-09-18 11:40:00,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=435280.0, ans=0.1 +2024-09-18 11:40:15,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-18 11:40:37,418 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:40:42,769 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.57 vs. limit=22.5 +2024-09-18 11:40:44,516 INFO [train.py:1198] (0/2) Epoch 25, batch 250, loss[loss=0.2486, ctc_loss=0.1294, cr_loss=0.3634, attn_decoder_loss=0.2538, over 29228.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1288, cr_loss=0.3722, attn_decoder_loss=0.2466, over 4142482.82 frames. ], batch size: 100, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:40:46,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=435400.0, ans=0.2 +2024-09-18 11:40:50,550 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.660e+01 8.459e+01 8.857e+01 9.365e+01 1.077e+02, threshold=1.771e+02, percent-clipped=0.0 +2024-09-18 11:41:30,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=435520.0, ans=0.07 +2024-09-18 11:41:38,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=435520.0, ans=0.0 +2024-09-18 11:41:43,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.25 vs. limit=15.0 +2024-09-18 11:41:49,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.87 vs. limit=15.0 +2024-09-18 11:41:50,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.whiten.whitening_limit, batch_count=435560.0, ans=15.0 +2024-09-18 11:41:56,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=435560.0, ans=0.125 +2024-09-18 11:41:59,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.95 vs. limit=10.0 +2024-09-18 11:42:00,695 INFO [train.py:1198] (0/2) Epoch 25, batch 300, loss[loss=0.2482, ctc_loss=0.1299, cr_loss=0.3776, attn_decoder_loss=0.2529, over 29507.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1289, cr_loss=0.3721, attn_decoder_loss=0.2466, over 4511644.29 frames. ], batch size: 92, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:42:23,579 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:42:31,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.02 vs. limit=10.0 +2024-09-18 11:42:36,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.06 vs. limit=12.0 +2024-09-18 11:42:37,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=435680.0, ans=0.125 +2024-09-18 11:43:08,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=435760.0, ans=0.2 +2024-09-18 11:43:09,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.75 vs. limit=22.5 +2024-09-18 11:43:15,964 INFO [train.py:1198] (0/2) Epoch 25, batch 350, loss[loss=0.2232, ctc_loss=0.111, cr_loss=0.3191, attn_decoder_loss=0.2286, over 29332.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1285, cr_loss=0.3712, attn_decoder_loss=0.2465, over 4796369.32 frames. ], batch size: 71, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:43:21,920 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.434e+01 8.932e+01 9.530e+01 2.745e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 11:43:30,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=435800.0, ans=0.125 +2024-09-18 11:43:39,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=435840.0, ans=0.0 +2024-09-18 11:43:57,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.43 vs. limit=15.0 +2024-09-18 11:44:27,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=435960.0, ans=0.125 +2024-09-18 11:44:36,416 INFO [train.py:1198] (0/2) Epoch 25, batch 400, loss[loss=0.2586, ctc_loss=0.142, cr_loss=0.4138, attn_decoder_loss=0.2624, over 29720.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1282, cr_loss=0.3709, attn_decoder_loss=0.2463, over 5027159.78 frames. ], batch size: 82, lr: 4.48e-03, grad_scale: 16.0 +2024-09-18 11:45:06,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.80 vs. limit=15.0 +2024-09-18 11:45:26,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.70 vs. limit=8.0 +2024-09-18 11:45:29,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=436120.0, ans=0.125 +2024-09-18 11:45:32,136 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.17 vs. limit=22.5 +2024-09-18 11:45:52,038 INFO [train.py:1198] (0/2) Epoch 25, batch 450, loss[loss=0.2438, ctc_loss=0.1333, cr_loss=0.3816, attn_decoder_loss=0.2476, over 29679.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1286, cr_loss=0.3716, attn_decoder_loss=0.2463, over 5188697.01 frames. ], batch size: 83, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:45:52,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=436200.0, ans=0.0 +2024-09-18 11:45:59,436 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.625e+01 9.050e+01 9.660e+01 1.722e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 11:46:48,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=436320.0, ans=0.1 +2024-09-18 11:47:07,835 INFO [train.py:1198] (0/2) Epoch 25, batch 500, loss[loss=0.2706, ctc_loss=0.1583, cr_loss=0.427, attn_decoder_loss=0.2736, over 29437.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1282, cr_loss=0.3712, attn_decoder_loss=0.2459, over 5330368.47 frames. ], batch size: 94, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:47:29,789 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.04 vs. limit=15.0 +2024-09-18 11:48:08,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=436520.0, ans=0.0 +2024-09-18 11:48:25,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=436560.0, ans=0.125 +2024-09-18 11:48:28,229 INFO [train.py:1198] (0/2) Epoch 25, batch 550, loss[loss=0.2568, ctc_loss=0.1361, cr_loss=0.3926, attn_decoder_loss=0.2615, over 28858.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1277, cr_loss=0.3702, attn_decoder_loss=0.246, over 5422095.63 frames. ], batch size: 104, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:48:33,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=436600.0, ans=0.2 +2024-09-18 11:48:35,874 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.562e+01 9.108e+01 9.510e+01 4.336e+02, threshold=1.822e+02, percent-clipped=3.0 +2024-09-18 11:48:56,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=436640.0, ans=0.125 +2024-09-18 11:48:56,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.85 vs. limit=15.0 +2024-09-18 11:49:05,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=436680.0, ans=0.0 +2024-09-18 11:49:11,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=436680.0, ans=0.125 +2024-09-18 11:49:14,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=436720.0, ans=0.125 +2024-09-18 11:49:16,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=436720.0, ans=0.2 +2024-09-18 11:49:27,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=436720.0, ans=15.0 +2024-09-18 11:49:34,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=436760.0, ans=10.0 +2024-09-18 11:49:45,358 INFO [train.py:1198] (0/2) Epoch 25, batch 600, loss[loss=0.2558, ctc_loss=0.1416, cr_loss=0.3816, attn_decoder_loss=0.26, over 29262.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1274, cr_loss=0.3699, attn_decoder_loss=0.2463, over 5509206.82 frames. ], batch size: 100, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:49:48,849 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:49:56,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=436800.0, ans=0.125 +2024-09-18 11:49:58,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=436840.0, ans=0.025 +2024-09-18 11:50:13,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=436880.0, ans=0.125 +2024-09-18 11:50:18,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=436880.0, ans=0.125 +2024-09-18 11:50:37,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=436920.0, ans=0.2 +2024-09-18 11:50:53,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=436960.0, ans=0.0 +2024-09-18 11:50:56,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=436960.0, ans=0.05 +2024-09-18 11:51:00,491 INFO [train.py:1198] (0/2) Epoch 25, batch 650, loss[loss=0.234, ctc_loss=0.1205, cr_loss=0.3645, attn_decoder_loss=0.2385, over 29772.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1267, cr_loss=0.3684, attn_decoder_loss=0.2456, over 5587379.03 frames. ], batch size: 81, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:51:05,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=437000.0, ans=0.0 +2024-09-18 11:51:08,131 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.416e+01 8.904e+01 9.509e+01 2.097e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-18 11:51:16,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=437040.0, ans=0.125 +2024-09-18 11:51:22,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=437040.0, ans=0.125 +2024-09-18 11:51:26,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=437040.0, ans=0.125 +2024-09-18 11:51:31,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=437080.0, ans=0.125 +2024-09-18 11:51:49,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=437120.0, ans=0.125 +2024-09-18 11:51:55,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=437120.0, ans=0.125 +2024-09-18 11:52:03,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=437120.0, ans=0.125 +2024-09-18 11:52:05,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.54 vs. limit=15.0 +2024-09-18 11:52:09,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=437160.0, ans=0.025 +2024-09-18 11:52:21,114 INFO [train.py:1198] (0/2) Epoch 25, batch 700, loss[loss=0.2384, ctc_loss=0.1324, cr_loss=0.3713, attn_decoder_loss=0.2419, over 29536.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1275, cr_loss=0.3693, attn_decoder_loss=0.2461, over 5636449.29 frames. ], batch size: 76, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:52:27,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=437200.0, ans=0.0 +2024-09-18 11:52:28,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=437200.0, ans=0.125 +2024-09-18 11:52:43,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.77 vs. limit=15.0 +2024-09-18 11:53:30,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.86 vs. limit=22.5 +2024-09-18 11:53:33,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.92 vs. limit=15.0 +2024-09-18 11:53:37,357 INFO [train.py:1198] (0/2) Epoch 25, batch 750, loss[loss=0.2291, ctc_loss=0.1112, cr_loss=0.3316, attn_decoder_loss=0.2349, over 29710.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1274, cr_loss=0.369, attn_decoder_loss=0.2457, over 5674865.06 frames. ], batch size: 82, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:53:43,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.16 vs. limit=15.0 +2024-09-18 11:53:44,707 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.436e+01 8.901e+01 9.527e+01 2.571e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 11:54:07,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=437480.0, ans=0.125 +2024-09-18 11:54:33,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.88 vs. limit=15.0 +2024-09-18 11:54:35,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=437520.0, ans=0.1 +2024-09-18 11:54:53,473 INFO [train.py:1198] (0/2) Epoch 25, batch 800, loss[loss=0.2197, ctc_loss=0.1064, cr_loss=0.3124, attn_decoder_loss=0.2253, over 29597.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1274, cr_loss=0.3688, attn_decoder_loss=0.2457, over 5705268.78 frames. ], batch size: 73, lr: 4.47e-03, grad_scale: 16.0 +2024-09-18 11:55:03,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=437600.0, ans=0.0 +2024-09-18 11:55:04,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.72 vs. limit=15.0 +2024-09-18 11:55:06,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=437600.0, ans=0.125 +2024-09-18 11:55:09,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=437640.0, ans=0.025 +2024-09-18 11:55:35,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=437680.0, ans=0.0 +2024-09-18 11:55:45,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=437720.0, ans=0.0 +2024-09-18 11:55:52,559 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.38 vs. limit=22.5 +2024-09-18 11:56:13,703 INFO [train.py:1198] (0/2) Epoch 25, batch 850, loss[loss=0.2416, ctc_loss=0.1165, cr_loss=0.3412, attn_decoder_loss=0.2479, over 29696.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1271, cr_loss=0.3686, attn_decoder_loss=0.2454, over 5734676.18 frames. ], batch size: 89, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:56:22,488 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.420e+01 8.934e+01 9.567e+01 3.952e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 11:56:28,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=437840.0, ans=0.0 +2024-09-18 11:56:36,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=437840.0, ans=0.0 +2024-09-18 11:56:40,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=437840.0, ans=0.2 +2024-09-18 11:57:00,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=437920.0, ans=0.125 +2024-09-18 11:57:23,981 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.72 vs. limit=15.0 +2024-09-18 11:57:29,246 INFO [train.py:1198] (0/2) Epoch 25, batch 900, loss[loss=0.2224, ctc_loss=0.1097, cr_loss=0.3312, attn_decoder_loss=0.2275, over 29645.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1278, cr_loss=0.3695, attn_decoder_loss=0.2459, over 5740099.17 frames. ], batch size: 73, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:57:41,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=438000.0, ans=0.125 +2024-09-18 11:57:52,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=438040.0, ans=0.125 +2024-09-18 11:57:55,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=438040.0, ans=0.125 +2024-09-18 11:58:01,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=438080.0, ans=0.0 +2024-09-18 11:58:01,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=438080.0, ans=0.0 +2024-09-18 11:58:11,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=438080.0, ans=0.125 +2024-09-18 11:58:14,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=438120.0, ans=0.125 +2024-09-18 11:58:22,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=438120.0, ans=0.025 +2024-09-18 11:58:44,546 INFO [train.py:1198] (0/2) Epoch 25, batch 950, loss[loss=0.2245, ctc_loss=0.1153, cr_loss=0.342, attn_decoder_loss=0.229, over 29522.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1274, cr_loss=0.3685, attn_decoder_loss=0.2457, over 5743839.52 frames. ], batch size: 74, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:58:53,518 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.540e+01 9.168e+01 9.959e+01 1.680e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 11:59:13,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=438280.0, ans=0.2 +2024-09-18 11:59:18,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=438280.0, ans=0.125 +2024-09-18 11:59:54,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.69 vs. limit=10.0 +2024-09-18 12:00:04,898 INFO [train.py:1198] (0/2) Epoch 25, batch 1000, loss[loss=0.2332, ctc_loss=0.1218, cr_loss=0.3643, attn_decoder_loss=0.2374, over 29494.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1284, cr_loss=0.3713, attn_decoder_loss=0.2467, over 5739432.10 frames. ], batch size: 77, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 12:00:08,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=438400.0, ans=0.2 +2024-09-18 12:00:09,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.99 vs. limit=6.0 +2024-09-18 12:00:15,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=438400.0, ans=0.125 +2024-09-18 12:00:18,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=438440.0, ans=0.125 +2024-09-18 12:00:34,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=438480.0, ans=0.95 +2024-09-18 12:00:51,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.20 vs. limit=22.5 +2024-09-18 12:00:52,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=438520.0, ans=0.125 +2024-09-18 12:00:58,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=438520.0, ans=0.0 +2024-09-18 12:01:20,738 INFO [train.py:1198] (0/2) Epoch 25, batch 1050, loss[loss=0.2428, ctc_loss=0.1231, cr_loss=0.3473, attn_decoder_loss=0.2484, over 29671.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1281, cr_loss=0.3707, attn_decoder_loss=0.2461, over 5746860.94 frames. ], batch size: 85, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 12:01:25,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=438600.0, ans=0.1 +2024-09-18 12:01:29,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.550e+01 9.112e+01 9.812e+01 2.455e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 12:01:30,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=438600.0, ans=0.125 +2024-09-18 12:01:44,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.38 vs. limit=15.0 +2024-09-18 12:01:49,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=438680.0, ans=0.125 +2024-09-18 12:01:57,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=438680.0, ans=0.125 +2024-09-18 12:02:17,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=438720.0, ans=0.1 +2024-09-18 12:02:20,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=438760.0, ans=0.125 +2024-09-18 12:02:23,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=438760.0, ans=0.025 +2024-09-18 12:02:35,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=438800.0, ans=0.1 +2024-09-18 12:02:36,548 INFO [train.py:1198] (0/2) Epoch 25, batch 1100, loss[loss=0.2395, ctc_loss=0.1227, cr_loss=0.3704, attn_decoder_loss=0.2442, over 29475.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1282, cr_loss=0.3712, attn_decoder_loss=0.2462, over 5759597.11 frames. ], batch size: 78, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:02:36,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=438800.0, ans=0.125 +2024-09-18 12:02:41,780 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.81 vs. limit=15.0 +2024-09-18 12:02:53,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.25 vs. limit=15.0 +2024-09-18 12:02:55,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=438840.0, ans=0.125 +2024-09-18 12:02:59,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=438840.0, ans=0.035 +2024-09-18 12:02:59,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=438840.0, ans=0.025 +2024-09-18 12:03:03,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=438840.0, ans=0.125 +2024-09-18 12:03:08,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=438880.0, ans=0.125 +2024-09-18 12:03:13,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=438880.0, ans=0.2 +2024-09-18 12:03:13,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=438880.0, ans=0.0 +2024-09-18 12:03:28,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=438920.0, ans=0.125 +2024-09-18 12:03:50,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=438960.0, ans=0.0 +2024-09-18 12:03:56,686 INFO [train.py:1198] (0/2) Epoch 25, batch 1150, loss[loss=0.2296, ctc_loss=0.1196, cr_loss=0.3646, attn_decoder_loss=0.2337, over 29435.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1282, cr_loss=0.3711, attn_decoder_loss=0.2458, over 5755390.58 frames. ], batch size: 78, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:04:05,924 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.230e+01 8.574e+01 9.064e+01 9.855e+01 2.778e+02, threshold=1.813e+02, percent-clipped=2.0 +2024-09-18 12:04:08,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.61 vs. limit=12.0 +2024-09-18 12:04:13,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=439040.0, ans=0.0 +2024-09-18 12:04:43,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=439120.0, ans=0.0 +2024-09-18 12:04:56,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=439160.0, ans=0.025 +2024-09-18 12:05:13,553 INFO [train.py:1198] (0/2) Epoch 25, batch 1200, loss[loss=0.2502, ctc_loss=0.1347, cr_loss=0.384, attn_decoder_loss=0.2545, over 29672.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.129, cr_loss=0.3726, attn_decoder_loss=0.2466, over 5747811.89 frames. ], batch size: 85, lr: 4.46e-03, grad_scale: 16.0 +2024-09-18 12:05:13,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=439200.0, ans=0.125 +2024-09-18 12:05:48,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.47 vs. limit=15.0 +2024-09-18 12:06:01,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=439320.0, ans=0.125 +2024-09-18 12:06:16,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=439360.0, ans=0.0 +2024-09-18 12:06:24,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=439360.0, ans=0.0 +2024-09-18 12:06:29,669 INFO [train.py:1198] (0/2) Epoch 25, batch 1250, loss[loss=0.2472, ctc_loss=0.1232, cr_loss=0.3687, attn_decoder_loss=0.2528, over 29536.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1294, cr_loss=0.3734, attn_decoder_loss=0.2473, over 5775494.57 frames. ], batch size: 92, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:06:31,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=439400.0, ans=0.125 +2024-09-18 12:06:40,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 8.697e+01 9.266e+01 9.820e+01 4.128e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-18 12:06:55,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=439440.0, ans=0.125 +2024-09-18 12:07:03,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.86 vs. limit=15.0 +2024-09-18 12:07:04,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=439480.0, ans=0.125 +2024-09-18 12:07:30,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.41 vs. limit=15.0 +2024-09-18 12:07:46,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=439560.0, ans=0.2 +2024-09-18 12:07:50,372 INFO [train.py:1198] (0/2) Epoch 25, batch 1300, loss[loss=0.2526, ctc_loss=0.1329, cr_loss=0.3888, attn_decoder_loss=0.2572, over 28238.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1291, cr_loss=0.3734, attn_decoder_loss=0.2468, over 5780532.04 frames. ], batch size: 111, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:07:50,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=439600.0, ans=0.125 +2024-09-18 12:08:07,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.20 vs. limit=22.5 +2024-09-18 12:08:42,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.07 vs. limit=22.5 +2024-09-18 12:09:06,235 INFO [train.py:1198] (0/2) Epoch 25, batch 1350, loss[loss=0.2508, ctc_loss=0.1352, cr_loss=0.3904, attn_decoder_loss=0.255, over 29755.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1289, cr_loss=0.3735, attn_decoder_loss=0.2466, over 5797388.83 frames. ], batch size: 81, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:09:16,805 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.561e+01 9.293e+01 1.003e+02 2.081e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-18 12:09:28,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=439840.0, ans=0.125 +2024-09-18 12:10:03,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=439920.0, ans=0.125 +2024-09-18 12:10:21,586 INFO [train.py:1198] (0/2) Epoch 25, batch 1400, loss[loss=0.2128, ctc_loss=0.1093, cr_loss=0.3223, attn_decoder_loss=0.2171, over 29573.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1285, cr_loss=0.372, attn_decoder_loss=0.2463, over 5808018.07 frames. ], batch size: 69, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:10:29,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=440000.0, ans=0.125 +2024-09-18 12:10:32,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=440000.0, ans=10.0 +2024-09-18 12:10:53,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=440080.0, ans=0.125 +2024-09-18 12:11:12,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=440120.0, ans=0.1 +2024-09-18 12:11:29,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=440160.0, ans=0.2 +2024-09-18 12:11:30,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.24 vs. limit=15.0 +2024-09-18 12:11:34,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=440160.0, ans=0.125 +2024-09-18 12:11:38,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=440160.0, ans=0.1 +2024-09-18 12:11:38,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=440160.0, ans=0.2 +2024-09-18 12:11:41,708 INFO [train.py:1198] (0/2) Epoch 25, batch 1450, loss[loss=0.2658, ctc_loss=0.1523, cr_loss=0.4289, attn_decoder_loss=0.2688, over 29428.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1288, cr_loss=0.3726, attn_decoder_loss=0.2468, over 5804225.06 frames. ], batch size: 94, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:11:42,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.70 vs. limit=15.0 +2024-09-18 12:11:52,214 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.790e+01 8.701e+01 9.305e+01 9.884e+01 1.753e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-18 12:11:57,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=440240.0, ans=0.1 +2024-09-18 12:12:01,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=440240.0, ans=0.125 +2024-09-18 12:12:08,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.40 vs. limit=22.5 +2024-09-18 12:12:19,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=440280.0, ans=0.2 +2024-09-18 12:12:21,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_na.min_abs, batch_count=440280.0, ans=0.02 +2024-09-18 12:12:34,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=440320.0, ans=0.125 +2024-09-18 12:12:43,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=440360.0, ans=10.0 +2024-09-18 12:12:53,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.91 vs. limit=15.0 +2024-09-18 12:12:57,321 INFO [train.py:1198] (0/2) Epoch 25, batch 1500, loss[loss=0.2475, ctc_loss=0.1252, cr_loss=0.3688, attn_decoder_loss=0.2529, over 29630.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.129, cr_loss=0.3732, attn_decoder_loss=0.2474, over 5806151.31 frames. ], batch size: 86, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:13:23,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=440440.0, ans=0.125 +2024-09-18 12:13:46,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.77 vs. limit=15.0 +2024-09-18 12:13:47,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=440520.0, ans=0.1 +2024-09-18 12:13:49,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=440520.0, ans=0.1 +2024-09-18 12:13:50,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=440520.0, ans=0.0 +2024-09-18 12:13:52,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=440520.0, ans=0.125 +2024-09-18 12:13:55,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=440520.0, ans=0.1 +2024-09-18 12:13:57,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=440560.0, ans=0.1 +2024-09-18 12:14:10,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=440560.0, ans=0.0 +2024-09-18 12:14:11,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=440600.0, ans=0.0 +2024-09-18 12:14:13,207 INFO [train.py:1198] (0/2) Epoch 25, batch 1550, loss[loss=0.254, ctc_loss=0.1365, cr_loss=0.3838, attn_decoder_loss=0.2586, over 29535.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1292, cr_loss=0.373, attn_decoder_loss=0.2471, over 5782306.12 frames. ], batch size: 90, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:14:16,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=440600.0, ans=0.0 +2024-09-18 12:14:23,710 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.497e+01 9.186e+01 9.794e+01 2.835e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-18 12:14:35,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.63 vs. limit=15.0 +2024-09-18 12:14:41,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=440680.0, ans=0.0 +2024-09-18 12:15:33,647 INFO [train.py:1198] (0/2) Epoch 25, batch 1600, loss[loss=0.2497, ctc_loss=0.1272, cr_loss=0.3527, attn_decoder_loss=0.2555, over 29689.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1298, cr_loss=0.3738, attn_decoder_loss=0.2474, over 5765450.97 frames. ], batch size: 85, lr: 4.45e-03, grad_scale: 16.0 +2024-09-18 12:15:36,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.28 vs. limit=6.0 +2024-09-18 12:15:43,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=440800.0, ans=0.07 +2024-09-18 12:15:53,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=440840.0, ans=0.125 +2024-09-18 12:15:59,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=440840.0, ans=0.125 +2024-09-18 12:16:07,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=440880.0, ans=0.125 +2024-09-18 12:16:08,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=440880.0, ans=0.125 +2024-09-18 12:16:33,438 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-18 12:16:36,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.29 vs. limit=15.0 +2024-09-18 12:16:49,308 INFO [train.py:1198] (0/2) Epoch 25, batch 1650, loss[loss=0.2504, ctc_loss=0.1352, cr_loss=0.3896, attn_decoder_loss=0.2546, over 29728.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1295, cr_loss=0.3733, attn_decoder_loss=0.2471, over 5758936.36 frames. ], batch size: 89, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:17:01,297 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.666e+01 8.636e+01 9.380e+01 1.005e+02 4.034e+02, threshold=1.876e+02, percent-clipped=3.0 +2024-09-18 12:17:13,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=441040.0, ans=0.1 +2024-09-18 12:17:30,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=441080.0, ans=0.2 +2024-09-18 12:17:36,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=441120.0, ans=0.0 +2024-09-18 12:18:03,084 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.16 vs. limit=22.5 +2024-09-18 12:18:04,970 INFO [train.py:1198] (0/2) Epoch 25, batch 1700, loss[loss=0.2153, ctc_loss=0.1143, cr_loss=0.3431, attn_decoder_loss=0.2189, over 29565.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1287, cr_loss=0.3719, attn_decoder_loss=0.2466, over 5780835.66 frames. ], batch size: 69, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:18:20,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=441240.0, ans=0.125 +2024-09-18 12:18:39,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=441280.0, ans=0.1 +2024-09-18 12:18:45,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=441280.0, ans=0.025 +2024-09-18 12:18:48,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=441280.0, ans=0.125 +2024-09-18 12:19:04,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=441320.0, ans=0.125 +2024-09-18 12:19:25,315 INFO [train.py:1198] (0/2) Epoch 25, batch 1750, loss[loss=0.2121, ctc_loss=0.1113, cr_loss=0.3436, attn_decoder_loss=0.2157, over 29333.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1284, cr_loss=0.3716, attn_decoder_loss=0.2463, over 5789593.89 frames. ], batch size: 67, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:19:37,503 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.136e+01 8.435e+01 8.870e+01 9.715e+01 1.342e+02, threshold=1.774e+02, percent-clipped=0.0 +2024-09-18 12:19:41,438 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.29 vs. limit=6.0 +2024-09-18 12:19:42,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=441440.0, ans=0.2 +2024-09-18 12:20:14,396 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:20:20,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.09 vs. limit=6.0 +2024-09-18 12:20:41,483 INFO [train.py:1198] (0/2) Epoch 25, batch 1800, loss[loss=0.2574, ctc_loss=0.1394, cr_loss=0.4003, attn_decoder_loss=0.2616, over 29692.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1284, cr_loss=0.371, attn_decoder_loss=0.2464, over 5792895.72 frames. ], batch size: 83, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:20:49,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=441600.0, ans=0.0 +2024-09-18 12:21:06,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=22.5 +2024-09-18 12:21:15,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.30 vs. limit=15.0 +2024-09-18 12:21:25,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=441720.0, ans=0.2 +2024-09-18 12:21:27,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=441720.0, ans=0.0 +2024-09-18 12:21:36,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=441720.0, ans=0.5 +2024-09-18 12:21:45,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=441760.0, ans=0.125 +2024-09-18 12:21:56,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=441800.0, ans=0.125 +2024-09-18 12:21:57,727 INFO [train.py:1198] (0/2) Epoch 25, batch 1850, loss[loss=0.2457, ctc_loss=0.1278, cr_loss=0.3651, attn_decoder_loss=0.2507, over 29607.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1278, cr_loss=0.3703, attn_decoder_loss=0.2462, over 5798928.66 frames. ], batch size: 86, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:22:06,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=441800.0, ans=0.1 +2024-09-18 12:22:09,694 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.990e+01 8.488e+01 8.939e+01 9.551e+01 1.184e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-18 12:22:34,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.77 vs. limit=10.0 +2024-09-18 12:22:45,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=441920.0, ans=0.0 +2024-09-18 12:22:52,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-18 12:22:59,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-18 12:23:06,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=441960.0, ans=0.2 +2024-09-18 12:23:11,411 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.76 vs. limit=15.0 +2024-09-18 12:23:12,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=441960.0, ans=0.125 +2024-09-18 12:23:14,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=442000.0, ans=0.025 +2024-09-18 12:23:15,236 INFO [train.py:1198] (0/2) Epoch 25, batch 1900, loss[loss=0.2552, ctc_loss=0.1347, cr_loss=0.3823, attn_decoder_loss=0.26, over 29717.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.128, cr_loss=0.3712, attn_decoder_loss=0.2467, over 5806835.88 frames. ], batch size: 89, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:23:25,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=442000.0, ans=0.05 +2024-09-18 12:23:33,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=442040.0, ans=0.125 +2024-09-18 12:23:39,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1.whitening_limit, batch_count=442040.0, ans=10.0 +2024-09-18 12:23:52,391 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.73 vs. limit=15.0 +2024-09-18 12:24:04,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.37 vs. limit=15.0 +2024-09-18 12:24:21,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=442160.0, ans=0.0 +2024-09-18 12:24:33,478 INFO [train.py:1198] (0/2) Epoch 25, batch 1950, loss[loss=0.2379, ctc_loss=0.1313, cr_loss=0.3931, attn_decoder_loss=0.2411, over 29447.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1287, cr_loss=0.3725, attn_decoder_loss=0.2479, over 5820961.61 frames. ], batch size: 78, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:24:45,600 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 8.609e+01 9.254e+01 9.710e+01 4.424e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 12:24:47,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=442240.0, ans=0.125 +2024-09-18 12:24:54,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=442240.0, ans=0.2 +2024-09-18 12:25:01,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=442240.0, ans=0.2 +2024-09-18 12:25:39,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=442360.0, ans=0.125 +2024-09-18 12:25:41,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=442360.0, ans=22.5 +2024-09-18 12:25:46,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=442360.0, ans=0.2 +2024-09-18 12:25:49,611 INFO [train.py:1198] (0/2) Epoch 25, batch 2000, loss[loss=0.2191, ctc_loss=0.1159, cr_loss=0.3499, attn_decoder_loss=0.2228, over 29340.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1291, cr_loss=0.373, attn_decoder_loss=0.2481, over 5796983.38 frames. ], batch size: 67, lr: 4.45e-03, grad_scale: 16.0 +2024-09-18 12:25:57,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=442400.0, ans=10.0 +2024-09-18 12:26:18,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=442440.0, ans=0.0 +2024-09-18 12:26:30,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=442480.0, ans=0.2 +2024-09-18 12:26:32,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=442480.0, ans=0.0 +2024-09-18 12:26:34,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=442480.0, ans=0.0 +2024-09-18 12:26:34,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=442480.0, ans=0.125 +2024-09-18 12:26:48,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=442520.0, ans=0.1 +2024-09-18 12:26:49,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=442520.0, ans=0.07 +2024-09-18 12:26:54,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=442560.0, ans=0.0 +2024-09-18 12:27:03,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=442560.0, ans=0.125 +2024-09-18 12:27:03,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=442560.0, ans=0.0 +2024-09-18 12:27:07,724 INFO [train.py:1198] (0/2) Epoch 25, batch 2050, loss[loss=0.22, ctc_loss=0.1139, cr_loss=0.3447, attn_decoder_loss=0.2242, over 29420.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1288, cr_loss=0.3722, attn_decoder_loss=0.2474, over 5789234.25 frames. ], batch size: 70, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:27:19,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=442600.0, ans=0.125 +2024-09-18 12:27:23,554 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.313e+01 8.436e+01 8.905e+01 9.396e+01 1.982e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-18 12:27:29,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=442640.0, ans=0.0 +2024-09-18 12:27:58,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=442720.0, ans=0.125 +2024-09-18 12:28:12,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.54 vs. limit=22.5 +2024-09-18 12:28:16,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.47 vs. limit=15.0 +2024-09-18 12:28:25,816 INFO [train.py:1198] (0/2) Epoch 25, batch 2100, loss[loss=0.2481, ctc_loss=0.1348, cr_loss=0.3834, attn_decoder_loss=0.2521, over 29779.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1285, cr_loss=0.3713, attn_decoder_loss=0.2468, over 5801232.31 frames. ], batch size: 81, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:28:26,644 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.76 vs. limit=6.0 +2024-09-18 12:28:35,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=442800.0, ans=0.95 +2024-09-18 12:28:36,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=442800.0, ans=0.2 +2024-09-18 12:28:44,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.45 vs. limit=15.0 +2024-09-18 12:28:47,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=442840.0, ans=0.125 +2024-09-18 12:29:09,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=442920.0, ans=0.0 +2024-09-18 12:29:26,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=442960.0, ans=0.125 +2024-09-18 12:29:29,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=442960.0, ans=0.2 +2024-09-18 12:29:32,382 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:29:38,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff3.min_abs, batch_count=442960.0, ans=0.2 +2024-09-18 12:29:39,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=443000.0, ans=0.2 +2024-09-18 12:29:41,142 INFO [train.py:1198] (0/2) Epoch 25, batch 2150, loss[loss=0.2368, ctc_loss=0.13, cr_loss=0.3788, attn_decoder_loss=0.2402, over 29457.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1274, cr_loss=0.3695, attn_decoder_loss=0.246, over 5816463.15 frames. ], batch size: 78, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:29:41,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.94 vs. limit=10.0 +2024-09-18 12:29:43,108 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:29:52,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=443000.0, ans=0.125 +2024-09-18 12:29:54,895 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.193e+01 8.437e+01 8.935e+01 9.622e+01 1.303e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 12:30:07,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=443040.0, ans=0.2 +2024-09-18 12:30:18,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=443080.0, ans=0.1 +2024-09-18 12:30:20,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=443080.0, ans=0.0 +2024-09-18 12:30:35,082 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:30:53,790 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.95 vs. limit=15.0 +2024-09-18 12:30:59,574 INFO [train.py:1198] (0/2) Epoch 25, batch 2200, loss[loss=0.2508, ctc_loss=0.127, cr_loss=0.3521, attn_decoder_loss=0.2567, over 29628.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.128, cr_loss=0.3706, attn_decoder_loss=0.2463, over 5812665.67 frames. ], batch size: 86, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:31:09,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=443200.0, ans=0.0 +2024-09-18 12:31:54,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-18 12:31:59,815 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:32:16,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.95 vs. limit=22.5 +2024-09-18 12:32:17,484 INFO [train.py:1198] (0/2) Epoch 25, batch 2250, loss[loss=0.2516, ctc_loss=0.131, cr_loss=0.3862, attn_decoder_loss=0.2565, over 29689.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1279, cr_loss=0.3707, attn_decoder_loss=0.2463, over 5811821.71 frames. ], batch size: 82, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:32:31,059 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.266e+01 8.316e+01 8.883e+01 9.424e+01 4.658e+02, threshold=1.777e+02, percent-clipped=2.0 +2024-09-18 12:33:03,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.48 vs. limit=12.0 +2024-09-18 12:33:07,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=443520.0, ans=0.2 +2024-09-18 12:33:15,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=13.85 vs. limit=15.0 +2024-09-18 12:33:22,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=443560.0, ans=0.05 +2024-09-18 12:33:33,023 INFO [train.py:1198] (0/2) Epoch 25, batch 2300, loss[loss=0.2241, ctc_loss=0.1141, cr_loss=0.3552, attn_decoder_loss=0.2284, over 29315.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.127, cr_loss=0.369, attn_decoder_loss=0.2454, over 5799523.79 frames. ], batch size: 71, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:33:46,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=443640.0, ans=0.0 +2024-09-18 12:34:09,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=443680.0, ans=0.0 +2024-09-18 12:34:28,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=443720.0, ans=0.125 +2024-09-18 12:34:51,415 INFO [train.py:1198] (0/2) Epoch 25, batch 2350, loss[loss=0.2534, ctc_loss=0.1338, cr_loss=0.3852, attn_decoder_loss=0.2581, over 29691.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1273, cr_loss=0.3695, attn_decoder_loss=0.2456, over 5804728.64 frames. ], batch size: 83, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:34:53,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=443800.0, ans=0.125 +2024-09-18 12:35:04,930 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.160e+01 8.571e+01 9.088e+01 9.554e+01 1.522e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 12:35:27,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=443880.0, ans=0.125 +2024-09-18 12:35:38,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-18 12:35:43,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.37 vs. limit=15.0 +2024-09-18 12:35:59,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=443960.0, ans=0.125 +2024-09-18 12:36:02,793 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:36:10,419 INFO [train.py:1198] (0/2) Epoch 25, batch 2400, loss[loss=0.2296, ctc_loss=0.1194, cr_loss=0.3475, attn_decoder_loss=0.2341, over 29518.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1277, cr_loss=0.3701, attn_decoder_loss=0.2458, over 5808367.93 frames. ], batch size: 76, lr: 4.44e-03, grad_scale: 16.0 +2024-09-18 12:36:13,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=444000.0, ans=0.05 +2024-09-18 12:36:19,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=444000.0, ans=0.125 +2024-09-18 12:36:43,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.17 vs. limit=22.5 +2024-09-18 12:36:45,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=444080.0, ans=0.125 +2024-09-18 12:36:59,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=444120.0, ans=0.0 +2024-09-18 12:37:08,432 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:37:26,070 INFO [train.py:1198] (0/2) Epoch 25, batch 2450, loss[loss=0.2454, ctc_loss=0.1256, cr_loss=0.3734, attn_decoder_loss=0.2504, over 29720.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1283, cr_loss=0.3714, attn_decoder_loss=0.2468, over 5786358.89 frames. ], batch size: 82, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:37:34,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.83 vs. limit=15.0 +2024-09-18 12:37:40,940 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.902e+01 9.594e+01 1.053e+02 2.320e+02, threshold=1.919e+02, percent-clipped=3.0 +2024-09-18 12:38:07,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=444280.0, ans=0.125 +2024-09-18 12:38:36,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=444360.0, ans=0.125 +2024-09-18 12:38:43,402 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.10 vs. limit=15.0 +2024-09-18 12:38:43,749 INFO [train.py:1198] (0/2) Epoch 25, batch 2500, loss[loss=0.2446, ctc_loss=0.1192, cr_loss=0.3593, attn_decoder_loss=0.2506, over 29614.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1285, cr_loss=0.3721, attn_decoder_loss=0.2468, over 5796785.94 frames. ], batch size: 86, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:39:19,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=444480.0, ans=0.125 +2024-09-18 12:39:34,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=444520.0, ans=0.025 +2024-09-18 12:39:35,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=444520.0, ans=0.125 +2024-09-18 12:39:41,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=444520.0, ans=0.95 +2024-09-18 12:39:45,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=444560.0, ans=0.1 +2024-09-18 12:40:02,052 INFO [train.py:1198] (0/2) Epoch 25, batch 2550, loss[loss=0.2273, ctc_loss=0.1176, cr_loss=0.3568, attn_decoder_loss=0.2316, over 29355.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1281, cr_loss=0.3714, attn_decoder_loss=0.2466, over 5798937.52 frames. ], batch size: 67, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:40:17,178 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.300e+01 8.248e+01 8.745e+01 9.244e+01 1.627e+02, threshold=1.749e+02, percent-clipped=0.0 +2024-09-18 12:41:18,551 INFO [train.py:1198] (0/2) Epoch 25, batch 2600, loss[loss=0.2345, ctc_loss=0.1232, cr_loss=0.3658, attn_decoder_loss=0.2387, over 29449.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1282, cr_loss=0.3711, attn_decoder_loss=0.2469, over 5794332.44 frames. ], batch size: 78, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:41:22,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.20 vs. limit=15.0 +2024-09-18 12:41:27,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=444800.0, ans=0.0 +2024-09-18 12:41:44,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=444840.0, ans=0.125 +2024-09-18 12:42:03,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=444880.0, ans=0.1 +2024-09-18 12:42:15,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.41 vs. limit=6.0 +2024-09-18 12:42:24,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=444960.0, ans=0.95 +2024-09-18 12:42:25,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=444960.0, ans=0.125 +2024-09-18 12:42:36,073 INFO [train.py:1198] (0/2) Epoch 25, batch 2650, loss[loss=0.2559, ctc_loss=0.1412, cr_loss=0.4102, attn_decoder_loss=0.2595, over 29261.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1279, cr_loss=0.3711, attn_decoder_loss=0.2467, over 5801391.12 frames. ], batch size: 100, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:42:42,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=12.0 +2024-09-18 12:42:48,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=445000.0, ans=0.1 +2024-09-18 12:42:51,111 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.486e+01 8.913e+01 9.474e+01 1.768e+02, threshold=1.783e+02, percent-clipped=1.0 +2024-09-18 12:43:02,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=445040.0, ans=15.0 +2024-09-18 12:43:03,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=445040.0, ans=0.2 +2024-09-18 12:43:04,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=445080.0, ans=0.0 +2024-09-18 12:43:34,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=445120.0, ans=0.125 +2024-09-18 12:43:38,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=445160.0, ans=0.025 +2024-09-18 12:43:53,472 INFO [train.py:1198] (0/2) Epoch 25, batch 2700, loss[loss=0.2535, ctc_loss=0.1305, cr_loss=0.3848, attn_decoder_loss=0.2586, over 29517.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1288, cr_loss=0.3725, attn_decoder_loss=0.2474, over 5795328.70 frames. ], batch size: 87, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:44:26,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=445280.0, ans=0.0 +2024-09-18 12:44:36,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=445280.0, ans=0.125 +2024-09-18 12:44:43,070 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.11 vs. limit=22.5 +2024-09-18 12:45:02,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff2.min_abs, batch_count=445360.0, ans=0.1 +2024-09-18 12:45:09,283 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.27 vs. limit=8.0 +2024-09-18 12:45:09,582 INFO [train.py:1198] (0/2) Epoch 25, batch 2750, loss[loss=0.2308, ctc_loss=0.1182, cr_loss=0.3492, attn_decoder_loss=0.2355, over 29517.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1281, cr_loss=0.3713, attn_decoder_loss=0.2463, over 5793317.83 frames. ], batch size: 75, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:45:13,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=445400.0, ans=0.0 +2024-09-18 12:45:17,536 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:45:24,761 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.540e+01 9.041e+01 9.626e+01 3.086e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-18 12:45:35,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=445440.0, ans=0.0 +2024-09-18 12:45:41,371 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.95 vs. limit=22.5 +2024-09-18 12:45:49,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.50 vs. limit=15.0 +2024-09-18 12:45:58,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.67 vs. limit=22.5 +2024-09-18 12:45:59,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=445520.0, ans=0.125 +2024-09-18 12:46:04,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.37 vs. limit=22.5 +2024-09-18 12:46:28,563 INFO [train.py:1198] (0/2) Epoch 25, batch 2800, loss[loss=0.2624, ctc_loss=0.1542, cr_loss=0.3821, attn_decoder_loss=0.266, over 19898.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1286, cr_loss=0.3719, attn_decoder_loss=0.2465, over 5772895.37 frames. ], batch size: 210, lr: 4.43e-03, grad_scale: 16.0 +2024-09-18 12:46:28,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=445600.0, ans=0.125 +2024-09-18 12:46:33,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=445600.0, ans=0.125 +2024-09-18 12:46:52,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=445640.0, ans=0.125 +2024-09-18 12:47:10,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=445680.0, ans=0.07 +2024-09-18 12:47:14,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=445720.0, ans=0.125 +2024-09-18 12:47:17,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=445720.0, ans=0.125 +2024-09-18 12:47:28,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=445720.0, ans=0.0 +2024-09-18 12:47:46,147 INFO [train.py:1198] (0/2) Epoch 25, batch 2850, loss[loss=0.2244, ctc_loss=0.1178, cr_loss=0.356, attn_decoder_loss=0.2284, over 29505.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1287, cr_loss=0.3724, attn_decoder_loss=0.2467, over 5758423.92 frames. ], batch size: 77, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:47:49,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=445800.0, ans=0.125 +2024-09-18 12:47:50,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=445800.0, ans=0.2 +2024-09-18 12:47:53,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=445800.0, ans=0.1 +2024-09-18 12:47:57,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=445800.0, ans=0.125 +2024-09-18 12:48:02,747 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.484e+01 8.566e+01 9.294e+01 9.797e+01 4.897e+02, threshold=1.859e+02, percent-clipped=4.0 +2024-09-18 12:48:12,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=445840.0, ans=0.2 +2024-09-18 12:48:35,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=445920.0, ans=0.025 +2024-09-18 12:49:01,845 INFO [train.py:1198] (0/2) Epoch 25, batch 2900, loss[loss=0.243, ctc_loss=0.1356, cr_loss=0.3923, attn_decoder_loss=0.2462, over 29412.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1294, cr_loss=0.3744, attn_decoder_loss=0.248, over 5784675.55 frames. ], batch size: 79, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:49:03,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=446000.0, ans=0.0 +2024-09-18 12:49:04,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.38 vs. limit=15.0 +2024-09-18 12:49:09,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=446000.0, ans=0.2 +2024-09-18 12:49:23,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=446040.0, ans=0.1 +2024-09-18 12:49:45,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=446080.0, ans=0.125 +2024-09-18 12:50:03,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=446160.0, ans=0.1 +2024-09-18 12:50:06,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=446160.0, ans=0.125 +2024-09-18 12:50:19,439 INFO [train.py:1198] (0/2) Epoch 25, batch 2950, loss[loss=0.2314, ctc_loss=0.1189, cr_loss=0.3632, attn_decoder_loss=0.2358, over 29501.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.128, cr_loss=0.3716, attn_decoder_loss=0.2465, over 5780423.70 frames. ], batch size: 75, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:50:19,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446200.0, ans=0.1 +2024-09-18 12:50:22,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=446200.0, ans=0.0 +2024-09-18 12:50:25,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=446200.0, ans=0.125 +2024-09-18 12:50:25,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=446200.0, ans=0.1 +2024-09-18 12:50:34,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=446240.0, ans=0.125 +2024-09-18 12:50:36,057 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.376e+01 8.898e+01 9.637e+01 1.288e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 12:50:41,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=446240.0, ans=0.0 +2024-09-18 12:50:45,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=446240.0, ans=0.0 +2024-09-18 12:50:52,030 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.75 vs. limit=15.0 +2024-09-18 12:50:59,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=446280.0, ans=0.125 +2024-09-18 12:51:07,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=446320.0, ans=0.95 +2024-09-18 12:51:38,084 INFO [train.py:1198] (0/2) Epoch 25, batch 3000, loss[loss=0.2422, ctc_loss=0.1289, cr_loss=0.3786, attn_decoder_loss=0.2464, over 29730.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1278, cr_loss=0.3711, attn_decoder_loss=0.2464, over 5780386.62 frames. ], batch size: 81, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:51:38,085 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 12:51:56,649 INFO [train.py:1230] (0/2) Epoch 25, validation: loss=0.2113, ctc_loss=0.03809, cr_loss=5.582e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-18 12:51:56,650 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 12:51:58,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=446400.0, ans=0.0 +2024-09-18 12:52:01,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=446400.0, ans=0.125 +2024-09-18 12:52:22,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=446440.0, ans=0.125 +2024-09-18 12:52:24,352 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:52:34,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=446480.0, ans=0.125 +2024-09-18 12:53:05,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.22 vs. limit=22.5 +2024-09-18 12:53:12,532 INFO [train.py:1198] (0/2) Epoch 25, batch 3050, loss[loss=0.231, ctc_loss=0.119, cr_loss=0.3568, attn_decoder_loss=0.2355, over 29529.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1283, cr_loss=0.3727, attn_decoder_loss=0.2473, over 5774679.08 frames. ], batch size: 76, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:53:18,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=446600.0, ans=0.125 +2024-09-18 12:53:19,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=446600.0, ans=0.0 +2024-09-18 12:53:31,788 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.617e+01 9.221e+01 9.973e+01 3.035e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-18 12:53:42,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=446640.0, ans=0.2 +2024-09-18 12:53:54,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=446680.0, ans=0.125 +2024-09-18 12:54:08,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=446720.0, ans=0.0 +2024-09-18 12:54:10,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=446720.0, ans=0.035 +2024-09-18 12:54:30,187 INFO [train.py:1198] (0/2) Epoch 25, batch 3100, loss[loss=0.2542, ctc_loss=0.1379, cr_loss=0.3973, attn_decoder_loss=0.2583, over 29311.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1283, cr_loss=0.3725, attn_decoder_loss=0.2469, over 5774939.09 frames. ], batch size: 100, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:54:36,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=446800.0, ans=0.125 +2024-09-18 12:54:46,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=446840.0, ans=0.125 +2024-09-18 12:55:01,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446880.0, ans=0.1 +2024-09-18 12:55:05,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=446880.0, ans=0.125 +2024-09-18 12:55:16,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=446920.0, ans=0.0 +2024-09-18 12:55:21,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=446920.0, ans=0.125 +2024-09-18 12:55:27,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=446920.0, ans=0.125 +2024-09-18 12:55:28,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=446920.0, ans=0.125 +2024-09-18 12:55:48,326 INFO [train.py:1198] (0/2) Epoch 25, batch 3150, loss[loss=0.2533, ctc_loss=0.1388, cr_loss=0.3788, attn_decoder_loss=0.2576, over 28882.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1285, cr_loss=0.3731, attn_decoder_loss=0.2472, over 5781386.18 frames. ], batch size: 104, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:56:05,069 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.834e+01 8.618e+01 9.043e+01 9.824e+01 1.542e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 12:56:22,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=447080.0, ans=0.025 +2024-09-18 12:56:32,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=447120.0, ans=0.125 +2024-09-18 12:56:32,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=447120.0, ans=0.0 +2024-09-18 12:56:42,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=447120.0, ans=0.125 +2024-09-18 12:56:46,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=447120.0, ans=0.125 +2024-09-18 12:56:47,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=447160.0, ans=0.1 +2024-09-18 12:56:52,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-18 12:57:04,425 INFO [train.py:1198] (0/2) Epoch 25, batch 3200, loss[loss=0.2463, ctc_loss=0.1307, cr_loss=0.3501, attn_decoder_loss=0.2514, over 29392.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1284, cr_loss=0.3719, attn_decoder_loss=0.2468, over 5791620.51 frames. ], batch size: 79, lr: 4.42e-03, grad_scale: 16.0 +2024-09-18 12:57:17,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=447200.0, ans=0.05 +2024-09-18 12:57:35,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=447280.0, ans=0.025 +2024-09-18 12:57:58,756 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:58:18,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=447360.0, ans=0.125 +2024-09-18 12:58:22,740 INFO [train.py:1198] (0/2) Epoch 25, batch 3250, loss[loss=0.2478, ctc_loss=0.1275, cr_loss=0.3754, attn_decoder_loss=0.2528, over 29720.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1286, cr_loss=0.3721, attn_decoder_loss=0.247, over 5798919.73 frames. ], batch size: 84, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:58:23,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=447400.0, ans=0.0 +2024-09-18 12:58:40,941 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.602e+01 9.212e+01 9.778e+01 1.600e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-18 12:58:47,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=447440.0, ans=0.1 +2024-09-18 12:58:49,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=447440.0, ans=0.2 +2024-09-18 12:58:55,918 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=12.0 +2024-09-18 12:58:56,090 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.56 vs. limit=15.0 +2024-09-18 12:58:58,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=447480.0, ans=0.1 +2024-09-18 12:59:01,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=447480.0, ans=0.1 +2024-09-18 12:59:10,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=447520.0, ans=0.0 +2024-09-18 12:59:13,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=447520.0, ans=0.125 +2024-09-18 12:59:21,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=447520.0, ans=0.125 +2024-09-18 12:59:30,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=447560.0, ans=0.1 +2024-09-18 12:59:33,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=447560.0, ans=0.05 +2024-09-18 12:59:40,969 INFO [train.py:1198] (0/2) Epoch 25, batch 3300, loss[loss=0.2429, ctc_loss=0.1221, cr_loss=0.3657, attn_decoder_loss=0.2482, over 28419.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1274, cr_loss=0.3699, attn_decoder_loss=0.2455, over 5797153.84 frames. ], batch size: 111, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:59:43,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=447600.0, ans=10.0 +2024-09-18 13:00:05,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=447640.0, ans=0.0 +2024-09-18 13:00:28,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=447720.0, ans=0.0 +2024-09-18 13:00:34,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=447720.0, ans=0.1 +2024-09-18 13:00:37,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=447720.0, ans=0.0 +2024-09-18 13:00:48,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=447760.0, ans=0.125 +2024-09-18 13:00:58,871 INFO [train.py:1198] (0/2) Epoch 25, batch 3350, loss[loss=0.2529, ctc_loss=0.1391, cr_loss=0.3976, attn_decoder_loss=0.2567, over 28864.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.128, cr_loss=0.3713, attn_decoder_loss=0.2462, over 5773403.49 frames. ], batch size: 104, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:01:17,273 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.840e+01 9.298e+01 1.002e+02 3.178e+02, threshold=1.860e+02, percent-clipped=4.0 +2024-09-18 13:01:19,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=447840.0, ans=22.5 +2024-09-18 13:01:25,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=447840.0, ans=0.125 +2024-09-18 13:01:27,458 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.65 vs. limit=12.0 +2024-09-18 13:01:28,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=447880.0, ans=0.0 +2024-09-18 13:01:35,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=447880.0, ans=0.125 +2024-09-18 13:01:48,003 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:02:04,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=447960.0, ans=0.125 +2024-09-18 13:02:13,999 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-112000.pt +2024-09-18 13:02:22,682 INFO [train.py:1198] (0/2) Epoch 25, batch 3400, loss[loss=0.2113, ctc_loss=0.1139, cr_loss=0.3381, attn_decoder_loss=0.2146, over 29380.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1284, cr_loss=0.3719, attn_decoder_loss=0.2463, over 5766485.63 frames. ], batch size: 67, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:02:22,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=448000.0, ans=0.125 +2024-09-18 13:02:33,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=448000.0, ans=0.125 +2024-09-18 13:02:46,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.10 vs. limit=15.0 +2024-09-18 13:02:59,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=448080.0, ans=0.125 +2024-09-18 13:03:02,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.67 vs. limit=15.0 +2024-09-18 13:03:10,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=448120.0, ans=0.0 +2024-09-18 13:03:17,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.18 vs. limit=15.0 +2024-09-18 13:03:18,755 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:03:27,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=448160.0, ans=0.1 +2024-09-18 13:03:30,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=448160.0, ans=0.2 +2024-09-18 13:03:38,933 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.36 vs. limit=15.0 +2024-09-18 13:03:40,799 INFO [train.py:1198] (0/2) Epoch 25, batch 3450, loss[loss=0.2586, ctc_loss=0.1349, cr_loss=0.3881, attn_decoder_loss=0.2637, over 28121.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1286, cr_loss=0.3723, attn_decoder_loss=0.2469, over 5773500.77 frames. ], batch size: 111, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:03:44,284 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:03:58,830 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.900e+01 8.450e+01 9.075e+01 9.587e+01 1.383e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 13:04:06,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.07 vs. limit=22.5 +2024-09-18 13:04:08,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=448240.0, ans=0.1 +2024-09-18 13:04:31,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=448320.0, ans=0.035 +2024-09-18 13:04:37,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=448320.0, ans=10.0 +2024-09-18 13:04:47,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=448360.0, ans=0.125 +2024-09-18 13:04:57,768 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:04:58,899 INFO [train.py:1198] (0/2) Epoch 25, batch 3500, loss[loss=0.2171, ctc_loss=0.1025, cr_loss=0.3181, attn_decoder_loss=0.2228, over 29333.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1281, cr_loss=0.3711, attn_decoder_loss=0.2462, over 5776705.94 frames. ], batch size: 71, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:05:14,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=448440.0, ans=0.125 +2024-09-18 13:05:14,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=448440.0, ans=0.04949747468305833 +2024-09-18 13:05:30,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.34 vs. limit=22.5 +2024-09-18 13:05:48,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=448520.0, ans=0.07 +2024-09-18 13:05:54,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=448520.0, ans=0.125 +2024-09-18 13:05:55,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=448520.0, ans=0.1 +2024-09-18 13:06:13,960 INFO [train.py:1198] (0/2) Epoch 25, batch 3550, loss[loss=0.2577, ctc_loss=0.1353, cr_loss=0.3942, attn_decoder_loss=0.2625, over 29706.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1279, cr_loss=0.371, attn_decoder_loss=0.2462, over 5784502.79 frames. ], batch size: 89, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:06:15,725 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:06:21,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=448600.0, ans=0.0 +2024-09-18 13:06:28,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=448640.0, ans=0.125 +2024-09-18 13:06:31,448 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.716e+01 8.657e+01 9.167e+01 9.744e+01 2.782e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-18 13:06:34,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=448640.0, ans=0.0 +2024-09-18 13:07:04,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=448720.0, ans=0.0 +2024-09-18 13:07:07,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=448720.0, ans=0.025 +2024-09-18 13:07:16,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=448760.0, ans=0.2 +2024-09-18 13:07:24,533 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.07 vs. limit=15.0 +2024-09-18 13:07:28,554 INFO [train.py:1198] (0/2) Epoch 25, batch 3600, loss[loss=0.2367, ctc_loss=0.123, cr_loss=0.3573, attn_decoder_loss=0.2414, over 29478.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1276, cr_loss=0.3709, attn_decoder_loss=0.2462, over 5793848.34 frames. ], batch size: 77, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:07:41,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=448800.0, ans=0.0 +2024-09-18 13:08:03,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=448880.0, ans=0.0 +2024-09-18 13:08:05,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=448880.0, ans=0.125 +2024-09-18 13:08:18,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=448920.0, ans=0.125 +2024-09-18 13:08:21,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=448920.0, ans=0.125 +2024-09-18 13:08:23,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=448920.0, ans=0.5 +2024-09-18 13:08:30,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=448960.0, ans=0.0 +2024-09-18 13:08:33,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=448960.0, ans=0.125 +2024-09-18 13:08:39,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=448960.0, ans=0.2 +2024-09-18 13:08:44,785 INFO [train.py:1198] (0/2) Epoch 25, batch 3650, loss[loss=0.2662, ctc_loss=0.1461, cr_loss=0.4305, attn_decoder_loss=0.27, over 29509.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1274, cr_loss=0.3698, attn_decoder_loss=0.2459, over 5795317.69 frames. ], batch size: 90, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:08:53,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten.whitening_limit, batch_count=449000.0, ans=22.5 +2024-09-18 13:08:55,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=449000.0, ans=0.07 +2024-09-18 13:09:02,518 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.539e+01 8.955e+01 9.424e+01 1.447e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 13:09:18,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.11 vs. limit=22.5 +2024-09-18 13:09:41,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=449120.0, ans=0.2 +2024-09-18 13:09:44,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=449160.0, ans=0.125 +2024-09-18 13:09:59,592 INFO [train.py:1198] (0/2) Epoch 25, batch 3700, loss[loss=0.261, ctc_loss=0.1398, cr_loss=0.3904, attn_decoder_loss=0.2658, over 29712.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1275, cr_loss=0.3703, attn_decoder_loss=0.2462, over 5805356.94 frames. ], batch size: 84, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:09:59,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=449200.0, ans=0.0 +2024-09-18 13:10:16,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=449240.0, ans=0.025 +2024-09-18 13:10:16,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=449240.0, ans=0.0 +2024-09-18 13:10:18,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=449240.0, ans=0.1 +2024-09-18 13:10:22,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.24 vs. limit=15.0 +2024-09-18 13:10:31,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=449280.0, ans=0.125 +2024-09-18 13:10:44,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=449320.0, ans=0.1 +2024-09-18 13:10:56,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=449320.0, ans=0.2 +2024-09-18 13:11:15,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=449400.0, ans=0.0 +2024-09-18 13:11:16,099 INFO [train.py:1198] (0/2) Epoch 25, batch 3750, loss[loss=0.2194, ctc_loss=0.1201, cr_loss=0.3559, attn_decoder_loss=0.2225, over 29358.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1278, cr_loss=0.3709, attn_decoder_loss=0.2461, over 5808679.54 frames. ], batch size: 67, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:11:25,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=449400.0, ans=0.125 +2024-09-18 13:11:25,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=449400.0, ans=0.125 +2024-09-18 13:11:26,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=449400.0, ans=0.0 +2024-09-18 13:11:31,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=449440.0, ans=0.1 +2024-09-18 13:11:35,578 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.392e+01 8.983e+01 9.467e+01 5.174e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 13:11:37,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.08 vs. limit=22.5 +2024-09-18 13:11:43,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=449440.0, ans=0.125 +2024-09-18 13:11:58,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.41 vs. limit=15.0 +2024-09-18 13:12:20,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=449560.0, ans=0.125 +2024-09-18 13:12:22,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=449560.0, ans=0.0 +2024-09-18 13:12:23,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=449560.0, ans=0.2 +2024-09-18 13:12:26,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=449560.0, ans=0.125 +2024-09-18 13:12:28,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=449560.0, ans=0.95 +2024-09-18 13:12:31,189 INFO [train.py:1198] (0/2) Epoch 25, batch 3800, loss[loss=0.246, ctc_loss=0.124, cr_loss=0.3757, attn_decoder_loss=0.2512, over 29606.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1274, cr_loss=0.3699, attn_decoder_loss=0.2457, over 5800381.58 frames. ], batch size: 86, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:12:34,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=449600.0, ans=0.125 +2024-09-18 13:12:36,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=449600.0, ans=0.125 +2024-09-18 13:12:58,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.08 vs. limit=10.0 +2024-09-18 13:13:19,498 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.80 vs. limit=10.0 +2024-09-18 13:13:35,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=449760.0, ans=0.125 +2024-09-18 13:13:47,363 INFO [train.py:1198] (0/2) Epoch 25, batch 3850, loss[loss=0.2638, ctc_loss=0.1488, cr_loss=0.4071, attn_decoder_loss=0.2675, over 29272.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1272, cr_loss=0.3698, attn_decoder_loss=0.2454, over 5815269.13 frames. ], batch size: 100, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:13:47,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=449800.0, ans=0.125 +2024-09-18 13:13:53,896 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.78 vs. limit=15.0 +2024-09-18 13:14:06,411 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.121e+01 8.692e+01 9.184e+01 9.971e+01 1.957e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 13:14:17,358 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:14:24,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=449880.0, ans=0.125 +2024-09-18 13:14:31,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=449920.0, ans=0.125 +2024-09-18 13:14:33,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.88 vs. limit=12.0 +2024-09-18 13:14:37,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.18 vs. limit=10.0 +2024-09-18 13:15:02,115 INFO [train.py:1198] (0/2) Epoch 25, batch 3900, loss[loss=0.2513, ctc_loss=0.1365, cr_loss=0.3823, attn_decoder_loss=0.2556, over 29631.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1276, cr_loss=0.3707, attn_decoder_loss=0.246, over 5818984.38 frames. ], batch size: 86, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:15:05,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=450000.0, ans=0.0 +2024-09-18 13:15:15,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=450040.0, ans=0.125 +2024-09-18 13:15:17,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.23 vs. limit=22.5 +2024-09-18 13:15:29,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=450040.0, ans=0.2 +2024-09-18 13:15:37,041 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=12.0 +2024-09-18 13:15:49,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=450120.0, ans=0.125 +2024-09-18 13:15:51,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=450120.0, ans=0.0 +2024-09-18 13:16:10,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=450160.0, ans=0.95 +2024-09-18 13:16:16,554 INFO [train.py:1198] (0/2) Epoch 25, batch 3950, loss[loss=0.2576, ctc_loss=0.1443, cr_loss=0.4002, attn_decoder_loss=0.2613, over 29447.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1273, cr_loss=0.3702, attn_decoder_loss=0.246, over 5837898.56 frames. ], batch size: 97, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:16:32,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.60 vs. limit=22.5 +2024-09-18 13:16:37,511 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.544e+01 9.055e+01 9.627e+01 1.387e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 13:16:48,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.41 vs. limit=22.5 +2024-09-18 13:16:54,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=450280.0, ans=0.0 +2024-09-18 13:17:32,514 INFO [train.py:1198] (0/2) Epoch 25, batch 4000, loss[loss=0.2277, ctc_loss=0.1119, cr_loss=0.329, attn_decoder_loss=0.2333, over 29510.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1276, cr_loss=0.3702, attn_decoder_loss=0.246, over 5814711.41 frames. ], batch size: 74, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:17:40,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=450400.0, ans=0.0 +2024-09-18 13:17:45,298 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.91 vs. limit=22.5 +2024-09-18 13:18:00,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=450480.0, ans=0.0 +2024-09-18 13:18:15,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=450520.0, ans=0.2 +2024-09-18 13:18:48,056 INFO [train.py:1198] (0/2) Epoch 25, batch 4050, loss[loss=0.2633, ctc_loss=0.1592, cr_loss=0.3871, attn_decoder_loss=0.2663, over 20209.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1279, cr_loss=0.3708, attn_decoder_loss=0.2461, over 5798214.66 frames. ], batch size: 210, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:18:51,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=450600.0, ans=0.1 +2024-09-18 13:18:54,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=15.0 +2024-09-18 13:18:57,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=450600.0, ans=0.125 +2024-09-18 13:19:05,188 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=9.05 vs. limit=15.0 +2024-09-18 13:19:05,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=450640.0, ans=0.1 +2024-09-18 13:19:08,360 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.851e+01 9.697e+01 1.095e+02 3.076e+02, threshold=1.939e+02, percent-clipped=2.0 +2024-09-18 13:19:28,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.90 vs. limit=22.5 +2024-09-18 13:19:31,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=450720.0, ans=0.125 +2024-09-18 13:19:37,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=450720.0, ans=0.0 +2024-09-18 13:19:37,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=450720.0, ans=0.125 +2024-09-18 13:19:54,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=450760.0, ans=0.1 +2024-09-18 13:20:01,374 INFO [train.py:1198] (0/2) Epoch 25, batch 4100, loss[loss=0.2624, ctc_loss=0.1437, cr_loss=0.396, attn_decoder_loss=0.2668, over 29522.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1284, cr_loss=0.3716, attn_decoder_loss=0.2463, over 5793822.08 frames. ], batch size: 90, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:20:06,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.89 vs. limit=22.5 +2024-09-18 13:20:08,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=450800.0, ans=0.0 +2024-09-18 13:20:15,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.15 vs. limit=6.0 +2024-09-18 13:20:23,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=450840.0, ans=0.125 +2024-09-18 13:20:32,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=450880.0, ans=0.125 +2024-09-18 13:20:37,628 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.77 vs. limit=22.5 +2024-09-18 13:20:39,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=450880.0, ans=0.0 +2024-09-18 13:20:40,226 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.48 vs. limit=22.5 +2024-09-18 13:20:51,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=450920.0, ans=0.025 +2024-09-18 13:21:00,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=450960.0, ans=0.1 +2024-09-18 13:21:01,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.65 vs. limit=22.5 +2024-09-18 13:21:01,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=450960.0, ans=0.025 +2024-09-18 13:21:14,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=451000.0, ans=0.125 +2024-09-18 13:21:16,040 INFO [train.py:1198] (0/2) Epoch 25, batch 4150, loss[loss=0.2338, ctc_loss=0.1288, cr_loss=0.3636, attn_decoder_loss=0.2374, over 29495.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1281, cr_loss=0.3715, attn_decoder_loss=0.246, over 5799471.11 frames. ], batch size: 77, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:21:36,737 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.849e+01 8.352e+01 8.963e+01 9.819e+01 3.617e+02, threshold=1.793e+02, percent-clipped=2.0 +2024-09-18 13:21:44,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=451080.0, ans=0.125 +2024-09-18 13:21:47,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=451080.0, ans=0.0 +2024-09-18 13:21:47,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=451080.0, ans=0.2 +2024-09-18 13:22:02,560 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.40 vs. limit=22.5 +2024-09-18 13:22:19,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=451160.0, ans=0.125 +2024-09-18 13:22:30,265 INFO [train.py:1198] (0/2) Epoch 25, batch 4200, loss[loss=0.2655, ctc_loss=0.152, cr_loss=0.4201, attn_decoder_loss=0.2688, over 29506.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1283, cr_loss=0.372, attn_decoder_loss=0.2463, over 5801636.99 frames. ], batch size: 90, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:22:32,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=451200.0, ans=0.025 +2024-09-18 13:23:15,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.87 vs. limit=22.5 +2024-09-18 13:23:35,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=451360.0, ans=0.2 +2024-09-18 13:23:36,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=451360.0, ans=0.125 +2024-09-18 13:23:45,266 INFO [train.py:1198] (0/2) Epoch 25, batch 4250, loss[loss=0.2316, ctc_loss=0.1156, cr_loss=0.3418, attn_decoder_loss=0.2369, over 29511.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1278, cr_loss=0.3713, attn_decoder_loss=0.2464, over 5807612.38 frames. ], batch size: 74, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:23:45,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=451400.0, ans=0.5 +2024-09-18 13:24:03,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=451440.0, ans=0.0 +2024-09-18 13:24:04,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=451440.0, ans=0.125 +2024-09-18 13:24:05,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.84 vs. limit=22.5 +2024-09-18 13:24:05,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.410e+01 8.848e+01 9.485e+01 3.555e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-18 13:24:17,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=451480.0, ans=0.0 +2024-09-18 13:24:25,221 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:24:26,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=451480.0, ans=0.1 +2024-09-18 13:24:36,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=451520.0, ans=0.0 +2024-09-18 13:24:51,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=451560.0, ans=0.0 +2024-09-18 13:24:57,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=451560.0, ans=0.1 +2024-09-18 13:24:58,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=451600.0, ans=0.0 +2024-09-18 13:24:59,841 INFO [train.py:1198] (0/2) Epoch 25, batch 4300, loss[loss=0.2433, ctc_loss=0.1265, cr_loss=0.3685, attn_decoder_loss=0.2481, over 29549.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1275, cr_loss=0.3705, attn_decoder_loss=0.2464, over 5796859.01 frames. ], batch size: 87, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:25:04,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=451600.0, ans=0.0 +2024-09-18 13:25:05,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.68 vs. limit=6.0 +2024-09-18 13:25:18,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-18 13:25:35,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=451680.0, ans=0.2 +2024-09-18 13:25:44,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=451720.0, ans=0.125 +2024-09-18 13:25:49,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=451720.0, ans=0.125 +2024-09-18 13:26:00,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=451760.0, ans=0.125 +2024-09-18 13:26:13,088 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.13 vs. limit=15.0 +2024-09-18 13:26:15,314 INFO [train.py:1198] (0/2) Epoch 25, batch 4350, loss[loss=0.2521, ctc_loss=0.1367, cr_loss=0.3751, attn_decoder_loss=0.2566, over 29518.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1306, cr_loss=0.3768, attn_decoder_loss=0.2498, over 5799080.20 frames. ], batch size: 97, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:26:20,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=451800.0, ans=0.1 +2024-09-18 13:26:24,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=451800.0, ans=0.0 +2024-09-18 13:26:29,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=451840.0, ans=0.1 +2024-09-18 13:26:30,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=451840.0, ans=0.0 +2024-09-18 13:26:36,119 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.060e+01 8.772e+01 9.206e+01 9.719e+01 3.076e+02, threshold=1.841e+02, percent-clipped=2.0 +2024-09-18 13:26:45,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=451880.0, ans=0.1 +2024-09-18 13:27:00,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=451920.0, ans=0.125 +2024-09-18 13:27:04,141 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:27:18,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=451960.0, ans=0.125 +2024-09-18 13:27:21,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=451960.0, ans=0.1 +2024-09-18 13:27:29,898 INFO [train.py:1198] (0/2) Epoch 25, batch 4400, loss[loss=0.2537, ctc_loss=0.1395, cr_loss=0.3822, attn_decoder_loss=0.2579, over 27221.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1322, cr_loss=0.3801, attn_decoder_loss=0.2519, over 5769166.07 frames. ], batch size: 124, lr: 4.40e-03, grad_scale: 16.0 +2024-09-18 13:27:37,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=452000.0, ans=0.07 +2024-09-18 13:27:40,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=452000.0, ans=0.125 +2024-09-18 13:27:45,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.57 vs. limit=12.0 +2024-09-18 13:27:50,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=452040.0, ans=0.125 +2024-09-18 13:27:52,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=452040.0, ans=0.1 +2024-09-18 13:27:54,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.46 vs. limit=15.0 +2024-09-18 13:28:09,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=452080.0, ans=0.09899494936611666 +2024-09-18 13:28:11,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=452080.0, ans=0.2 +2024-09-18 13:28:44,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.98 vs. limit=22.5 +2024-09-18 13:28:44,345 INFO [train.py:1198] (0/2) Epoch 25, batch 4450, loss[loss=0.2669, ctc_loss=0.158, cr_loss=0.3915, attn_decoder_loss=0.2703, over 20212.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1364, cr_loss=0.385, attn_decoder_loss=0.2545, over 5575090.92 frames. ], batch size: 209, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:28:44,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=452200.0, ans=0.025 +2024-09-18 13:28:49,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=452200.0, ans=0.1 +2024-09-18 13:28:50,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=452200.0, ans=0.0 +2024-09-18 13:29:07,252 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.138e+01 9.104e+01 9.870e+01 1.187e+02 3.111e+02, threshold=1.974e+02, percent-clipped=3.0 +2024-09-18 13:29:10,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=452240.0, ans=0.2 +2024-09-18 13:29:31,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=12.42 vs. limit=15.0 +2024-09-18 13:29:34,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=452320.0, ans=0.125 +2024-09-18 13:29:42,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=452320.0, ans=0.0 +2024-09-18 13:30:00,168 INFO [train.py:1198] (0/2) Epoch 25, batch 4500, loss[loss=0.2546, ctc_loss=0.1459, cr_loss=0.3798, attn_decoder_loss=0.2582, over 19677.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1404, cr_loss=0.3873, attn_decoder_loss=0.2566, over 5235547.44 frames. ], batch size: 209, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:30:20,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=452440.0, ans=0.125 +2024-09-18 13:30:23,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=452440.0, ans=0.1 +2024-09-18 13:30:30,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=452480.0, ans=0.0 +2024-09-18 13:30:37,649 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-25.pt +2024-09-18 13:31:31,899 INFO [train.py:1198] (0/2) Epoch 26, batch 0, loss[loss=0.2292, ctc_loss=0.1129, cr_loss=0.3446, attn_decoder_loss=0.2344, over 29599.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1129, cr_loss=0.3446, attn_decoder_loss=0.2344, over 29599.00 frames. ], batch size: 73, lr: 4.31e-03, grad_scale: 16.0 +2024-09-18 13:31:31,900 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 13:31:52,393 INFO [train.py:1230] (0/2) Epoch 26, validation: loss=0.2126, ctc_loss=0.03779, cr_loss=5.994e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-18 13:31:52,394 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 13:31:58,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=452500.0, ans=0.0 +2024-09-18 13:32:06,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=452540.0, ans=0.1 +2024-09-18 13:32:15,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=452540.0, ans=0.1 +2024-09-18 13:32:39,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=452620.0, ans=0.2 +2024-09-18 13:32:45,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=452620.0, ans=0.1 +2024-09-18 13:32:52,739 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 9.299e+01 1.068e+02 1.174e+02 2.339e+02, threshold=2.135e+02, percent-clipped=1.0 +2024-09-18 13:33:07,811 INFO [train.py:1198] (0/2) Epoch 26, batch 50, loss[loss=0.2047, ctc_loss=0.09705, cr_loss=0.3127, attn_decoder_loss=0.2097, over 29467.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1302, cr_loss=0.3758, attn_decoder_loss=0.2481, over 1267884.01 frames. ], batch size: 70, lr: 4.31e-03, grad_scale: 16.0 +2024-09-18 13:33:08,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=452700.0, ans=0.0 +2024-09-18 13:33:53,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.43 vs. limit=6.0 +2024-09-18 13:34:18,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=452860.0, ans=0.0 +2024-09-18 13:34:24,115 INFO [train.py:1198] (0/2) Epoch 26, batch 100, loss[loss=0.231, ctc_loss=0.1259, cr_loss=0.3824, attn_decoder_loss=0.2341, over 29546.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1302, cr_loss=0.3753, attn_decoder_loss=0.2494, over 2252493.90 frames. ], batch size: 76, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:34:42,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=452940.0, ans=0.0 +2024-09-18 13:34:57,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=452980.0, ans=0.2 +2024-09-18 13:35:27,635 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.544e+01 8.982e+01 9.348e+01 1.241e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-18 13:35:43,420 INFO [train.py:1198] (0/2) Epoch 26, batch 150, loss[loss=0.2223, ctc_loss=0.1209, cr_loss=0.3668, attn_decoder_loss=0.2254, over 29457.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1283, cr_loss=0.3717, attn_decoder_loss=0.2473, over 3048354.46 frames. ], batch size: 70, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:35:55,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=453100.0, ans=0.1 +2024-09-18 13:35:58,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=453140.0, ans=0.1 +2024-09-18 13:36:02,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=453140.0, ans=0.1 +2024-09-18 13:36:22,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=453180.0, ans=10.0 +2024-09-18 13:36:29,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=453220.0, ans=0.125 +2024-09-18 13:36:31,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.02 vs. limit=15.0 +2024-09-18 13:36:50,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=453260.0, ans=0.0 +2024-09-18 13:36:58,956 INFO [train.py:1198] (0/2) Epoch 26, batch 200, loss[loss=0.2519, ctc_loss=0.1368, cr_loss=0.3876, attn_decoder_loss=0.2561, over 27426.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.128, cr_loss=0.371, attn_decoder_loss=0.2467, over 3660028.03 frames. ], batch size: 124, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:37:27,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=453380.0, ans=0.0 +2024-09-18 13:37:36,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=453380.0, ans=0.125 +2024-09-18 13:37:36,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=453380.0, ans=0.125 +2024-09-18 13:37:55,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=453420.0, ans=0.2 +2024-09-18 13:38:00,824 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.772e+01 8.385e+01 8.934e+01 9.482e+01 1.708e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 13:38:14,315 INFO [train.py:1198] (0/2) Epoch 26, batch 250, loss[loss=0.2556, ctc_loss=0.1406, cr_loss=0.3865, attn_decoder_loss=0.2597, over 29276.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1271, cr_loss=0.3695, attn_decoder_loss=0.2462, over 4142769.39 frames. ], batch size: 100, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:38:28,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=453540.0, ans=0.125 +2024-09-18 13:38:46,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=453580.0, ans=0.2 +2024-09-18 13:38:57,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=453580.0, ans=0.125 +2024-09-18 13:39:03,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=453620.0, ans=0.1 +2024-09-18 13:39:08,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=453620.0, ans=0.125 +2024-09-18 13:39:17,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=453660.0, ans=0.1 +2024-09-18 13:39:34,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=453700.0, ans=0.2 +2024-09-18 13:39:35,292 INFO [train.py:1198] (0/2) Epoch 26, batch 300, loss[loss=0.2462, ctc_loss=0.1332, cr_loss=0.3864, attn_decoder_loss=0.2501, over 29494.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.127, cr_loss=0.3695, attn_decoder_loss=0.2459, over 4510845.22 frames. ], batch size: 92, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:40:01,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=453740.0, ans=0.0 +2024-09-18 13:40:04,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=453780.0, ans=0.1 +2024-09-18 13:40:22,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=453820.0, ans=0.0 +2024-09-18 13:40:29,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=453820.0, ans=0.025 +2024-09-18 13:40:37,316 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.478e+01 8.450e+01 8.925e+01 9.500e+01 1.325e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 13:40:50,747 INFO [train.py:1198] (0/2) Epoch 26, batch 350, loss[loss=0.2134, ctc_loss=0.1044, cr_loss=0.3349, attn_decoder_loss=0.218, over 29337.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1272, cr_loss=0.3698, attn_decoder_loss=0.2461, over 4796190.69 frames. ], batch size: 71, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:41:09,167 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:41:43,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=454020.0, ans=0.125 +2024-09-18 13:41:47,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=454020.0, ans=0.1 +2024-09-18 13:42:05,698 INFO [train.py:1198] (0/2) Epoch 26, batch 400, loss[loss=0.2451, ctc_loss=0.1314, cr_loss=0.3728, attn_decoder_loss=0.2495, over 29724.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1265, cr_loss=0.3688, attn_decoder_loss=0.2455, over 5025975.27 frames. ], batch size: 82, lr: 4.30e-03, grad_scale: 16.0 +2024-09-18 13:42:24,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=454140.0, ans=0.125 +2024-09-18 13:42:50,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=454220.0, ans=0.0 +2024-09-18 13:43:07,925 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.062e+01 8.396e+01 8.968e+01 9.786e+01 1.327e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 13:43:26,151 INFO [train.py:1198] (0/2) Epoch 26, batch 450, loss[loss=0.2478, ctc_loss=0.1286, cr_loss=0.3747, attn_decoder_loss=0.2527, over 29682.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1269, cr_loss=0.37, attn_decoder_loss=0.2456, over 5187358.23 frames. ], batch size: 83, lr: 4.30e-03, grad_scale: 16.0 +2024-09-18 13:44:12,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=454420.0, ans=0.125 +2024-09-18 13:44:28,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.29 vs. limit=6.0 +2024-09-18 13:44:32,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=454460.0, ans=0.125 +2024-09-18 13:44:42,486 INFO [train.py:1198] (0/2) Epoch 26, batch 500, loss[loss=0.2564, ctc_loss=0.13, cr_loss=0.3716, attn_decoder_loss=0.2621, over 29486.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1265, cr_loss=0.3693, attn_decoder_loss=0.2451, over 5330875.21 frames. ], batch size: 94, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:44:42,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=454500.0, ans=0.125 +2024-09-18 13:44:45,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=454500.0, ans=0.0 +2024-09-18 13:44:53,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=454500.0, ans=0.0 +2024-09-18 13:44:55,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=454500.0, ans=0.2 +2024-09-18 13:45:07,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=454540.0, ans=0.04949747468305833 +2024-09-18 13:45:24,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=454580.0, ans=0.0 +2024-09-18 13:45:40,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=454620.0, ans=0.0 +2024-09-18 13:45:46,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.427e+01 8.869e+01 9.503e+01 2.659e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 13:45:48,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=454660.0, ans=0.125 +2024-09-18 13:45:58,555 INFO [train.py:1198] (0/2) Epoch 26, batch 550, loss[loss=0.2658, ctc_loss=0.1479, cr_loss=0.4119, attn_decoder_loss=0.2698, over 28867.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1267, cr_loss=0.3698, attn_decoder_loss=0.2453, over 5422545.43 frames. ], batch size: 104, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:46:09,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=454700.0, ans=0.125 +2024-09-18 13:46:38,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=454780.0, ans=0.125 +2024-09-18 13:46:47,726 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:46:49,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.92 vs. limit=10.0 +2024-09-18 13:46:51,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.64 vs. limit=15.0 +2024-09-18 13:47:04,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=454860.0, ans=0.0 +2024-09-18 13:47:16,789 INFO [train.py:1198] (0/2) Epoch 26, batch 600, loss[loss=0.2472, ctc_loss=0.1307, cr_loss=0.3698, attn_decoder_loss=0.2519, over 29314.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1266, cr_loss=0.3696, attn_decoder_loss=0.2454, over 5509413.92 frames. ], batch size: 100, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:47:21,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=454900.0, ans=0.1 +2024-09-18 13:47:21,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=454900.0, ans=0.125 +2024-09-18 13:47:23,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=454900.0, ans=0.04949747468305833 +2024-09-18 13:47:24,550 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.95 vs. limit=22.5 +2024-09-18 13:47:31,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=454900.0, ans=0.2 +2024-09-18 13:47:31,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=454900.0, ans=0.0 +2024-09-18 13:47:48,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.68 vs. limit=15.0 +2024-09-18 13:48:22,378 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.526e+01 8.982e+01 9.575e+01 5.252e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-18 13:48:24,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=455060.0, ans=0.1 +2024-09-18 13:48:30,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=455060.0, ans=0.125 +2024-09-18 13:48:31,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=455060.0, ans=0.0 +2024-09-18 13:48:34,586 INFO [train.py:1198] (0/2) Epoch 26, batch 650, loss[loss=0.2438, ctc_loss=0.1331, cr_loss=0.3941, attn_decoder_loss=0.2474, over 29772.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1258, cr_loss=0.3681, attn_decoder_loss=0.2447, over 5587581.50 frames. ], batch size: 81, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:48:34,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=455100.0, ans=0.1 +2024-09-18 13:48:50,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.24 vs. limit=15.0 +2024-09-18 13:49:05,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=455180.0, ans=0.1 +2024-09-18 13:49:16,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=455180.0, ans=0.0 +2024-09-18 13:49:27,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=455220.0, ans=0.1 +2024-09-18 13:49:36,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.41 vs. limit=15.0 +2024-09-18 13:49:40,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=455260.0, ans=0.2 +2024-09-18 13:49:43,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=455260.0, ans=0.125 +2024-09-18 13:49:50,939 INFO [train.py:1198] (0/2) Epoch 26, batch 700, loss[loss=0.2233, ctc_loss=0.1142, cr_loss=0.3488, attn_decoder_loss=0.2277, over 29537.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.126, cr_loss=0.3686, attn_decoder_loss=0.2449, over 5637348.01 frames. ], batch size: 76, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:50:07,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=455340.0, ans=0.125 +2024-09-18 13:50:08,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.95 vs. limit=15.0 +2024-09-18 13:50:08,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-18 13:50:27,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=455380.0, ans=0.0 +2024-09-18 13:50:44,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=455420.0, ans=0.125 +2024-09-18 13:50:44,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=455420.0, ans=0.1 +2024-09-18 13:50:45,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=455420.0, ans=0.125 +2024-09-18 13:50:51,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=455460.0, ans=0.125 +2024-09-18 13:50:54,637 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.356e+01 8.785e+01 9.330e+01 1.328e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 13:51:02,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=455460.0, ans=0.0 +2024-09-18 13:51:06,759 INFO [train.py:1198] (0/2) Epoch 26, batch 750, loss[loss=0.2347, ctc_loss=0.1209, cr_loss=0.3634, attn_decoder_loss=0.2393, over 29702.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.126, cr_loss=0.3686, attn_decoder_loss=0.2448, over 5676949.46 frames. ], batch size: 82, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:51:13,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.25 vs. limit=12.0 +2024-09-18 13:51:23,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.50 vs. limit=15.0 +2024-09-18 13:51:35,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=455540.0, ans=0.2 +2024-09-18 13:51:37,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=455540.0, ans=0.125 +2024-09-18 13:51:44,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=455580.0, ans=0.125 +2024-09-18 13:51:48,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.08 vs. limit=15.0 +2024-09-18 13:51:52,551 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:52:10,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=455660.0, ans=0.0 +2024-09-18 13:52:10,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.43 vs. limit=15.0 +2024-09-18 13:52:18,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.18 vs. limit=10.0 +2024-09-18 13:52:26,845 INFO [train.py:1198] (0/2) Epoch 26, batch 800, loss[loss=0.2172, ctc_loss=0.1095, cr_loss=0.344, attn_decoder_loss=0.2215, over 29607.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1264, cr_loss=0.3691, attn_decoder_loss=0.245, over 5707558.59 frames. ], batch size: 73, lr: 4.29e-03, grad_scale: 16.0 +2024-09-18 13:52:36,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.80 vs. limit=12.0 +2024-09-18 13:52:55,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.82 vs. limit=6.0 +2024-09-18 13:52:55,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=455780.0, ans=0.1 +2024-09-18 13:53:01,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=455780.0, ans=0.125 +2024-09-18 13:53:13,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=455820.0, ans=0.125 +2024-09-18 13:53:18,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.59 vs. limit=15.0 +2024-09-18 13:53:27,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.27 vs. limit=15.0 +2024-09-18 13:53:29,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.94 vs. limit=15.0 +2024-09-18 13:53:31,736 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.399e+01 8.438e+01 9.008e+01 9.520e+01 4.430e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 13:53:38,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.59 vs. limit=15.0 +2024-09-18 13:53:42,296 INFO [train.py:1198] (0/2) Epoch 26, batch 850, loss[loss=0.2536, ctc_loss=0.1305, cr_loss=0.3908, attn_decoder_loss=0.2586, over 29708.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1259, cr_loss=0.3681, attn_decoder_loss=0.2446, over 5736023.77 frames. ], batch size: 89, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:53:44,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=12.0 +2024-09-18 13:54:08,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=455940.0, ans=0.0 +2024-09-18 13:54:14,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=455980.0, ans=0.125 +2024-09-18 13:54:36,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=456020.0, ans=0.1 +2024-09-18 13:54:58,548 INFO [train.py:1198] (0/2) Epoch 26, batch 900, loss[loss=0.2163, ctc_loss=0.09761, cr_loss=0.3113, attn_decoder_loss=0.2226, over 29604.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1262, cr_loss=0.3684, attn_decoder_loss=0.2449, over 5741124.95 frames. ], batch size: 73, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:55:03,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=456100.0, ans=0.1 +2024-09-18 13:55:26,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=456140.0, ans=0.025 +2024-09-18 13:55:42,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=456180.0, ans=0.125 +2024-09-18 13:56:07,883 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.077e+01 8.650e+01 9.071e+01 9.568e+01 1.657e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 13:56:18,483 INFO [train.py:1198] (0/2) Epoch 26, batch 950, loss[loss=0.2306, ctc_loss=0.1161, cr_loss=0.3401, attn_decoder_loss=0.2358, over 29509.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1265, cr_loss=0.3693, attn_decoder_loss=0.2451, over 5742303.03 frames. ], batch size: 74, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:56:29,602 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.37 vs. limit=15.0 +2024-09-18 13:56:38,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=456340.0, ans=0.1 +2024-09-18 13:56:55,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=456380.0, ans=0.125 +2024-09-18 13:56:56,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=456380.0, ans=0.07 +2024-09-18 13:57:01,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=456380.0, ans=0.0 +2024-09-18 13:57:14,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=456420.0, ans=0.125 +2024-09-18 13:57:29,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=456460.0, ans=0.2 +2024-09-18 13:57:33,408 INFO [train.py:1198] (0/2) Epoch 26, batch 1000, loss[loss=0.2248, ctc_loss=0.1107, cr_loss=0.3187, attn_decoder_loss=0.2304, over 29494.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1271, cr_loss=0.3702, attn_decoder_loss=0.2459, over 5735852.97 frames. ], batch size: 77, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:57:38,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=456500.0, ans=0.1 +2024-09-18 13:57:44,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-18 13:57:51,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=456540.0, ans=0.0 +2024-09-18 13:57:55,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=456540.0, ans=0.0 +2024-09-18 13:57:59,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=456540.0, ans=0.0 +2024-09-18 13:58:01,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=456540.0, ans=0.2 +2024-09-18 13:58:31,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=456620.0, ans=0.0 +2024-09-18 13:58:33,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=456660.0, ans=0.0 +2024-09-18 13:58:38,766 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.192e+01 8.379e+01 9.044e+01 9.595e+01 2.964e+02, threshold=1.809e+02, percent-clipped=3.0 +2024-09-18 13:58:48,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=456700.0, ans=0.0 +2024-09-18 13:58:49,374 INFO [train.py:1198] (0/2) Epoch 26, batch 1050, loss[loss=0.25, ctc_loss=0.1365, cr_loss=0.3944, attn_decoder_loss=0.2539, over 29700.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1268, cr_loss=0.3695, attn_decoder_loss=0.2454, over 5745278.78 frames. ], batch size: 85, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:59:10,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=456740.0, ans=15.0 +2024-09-18 13:59:47,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.32 vs. limit=15.0 +2024-09-18 13:59:49,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.66 vs. limit=10.0 +2024-09-18 13:59:59,406 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.45 vs. limit=12.0 +2024-09-18 14:00:10,565 INFO [train.py:1198] (0/2) Epoch 26, batch 1100, loss[loss=0.2398, ctc_loss=0.1234, cr_loss=0.3844, attn_decoder_loss=0.2442, over 29444.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.127, cr_loss=0.3697, attn_decoder_loss=0.2455, over 5757979.78 frames. ], batch size: 78, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:00:18,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=456900.0, ans=0.0 +2024-09-18 14:00:26,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.03 vs. limit=15.0 +2024-09-18 14:00:30,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=456940.0, ans=0.125 +2024-09-18 14:00:39,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=456980.0, ans=0.0 +2024-09-18 14:00:39,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=456980.0, ans=0.125 +2024-09-18 14:00:56,536 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:01:07,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.09 vs. limit=22.5 +2024-09-18 14:01:10,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=457060.0, ans=0.125 +2024-09-18 14:01:15,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.069e+01 8.599e+01 9.010e+01 9.619e+01 1.920e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 14:01:20,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=457060.0, ans=0.1 +2024-09-18 14:01:26,671 INFO [train.py:1198] (0/2) Epoch 26, batch 1150, loss[loss=0.2267, ctc_loss=0.111, cr_loss=0.3348, attn_decoder_loss=0.2321, over 29448.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1267, cr_loss=0.369, attn_decoder_loss=0.2453, over 5756301.35 frames. ], batch size: 78, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:01:30,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=457100.0, ans=0.2 +2024-09-18 14:01:48,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=457140.0, ans=0.0 +2024-09-18 14:02:05,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=457180.0, ans=0.125 +2024-09-18 14:02:44,780 INFO [train.py:1198] (0/2) Epoch 26, batch 1200, loss[loss=0.2553, ctc_loss=0.1397, cr_loss=0.3926, attn_decoder_loss=0.2595, over 29673.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1274, cr_loss=0.3701, attn_decoder_loss=0.2461, over 5748206.62 frames. ], batch size: 85, lr: 4.29e-03, grad_scale: 16.0 +2024-09-18 14:02:49,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=457300.0, ans=0.125 +2024-09-18 14:02:50,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.46 vs. limit=10.0 +2024-09-18 14:02:55,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=457300.0, ans=0.025 +2024-09-18 14:03:07,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=457340.0, ans=0.0 +2024-09-18 14:03:10,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.95 vs. limit=15.0 +2024-09-18 14:03:10,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=457340.0, ans=0.125 +2024-09-18 14:03:38,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=457420.0, ans=15.0 +2024-09-18 14:03:40,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=457420.0, ans=0.95 +2024-09-18 14:03:52,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=457460.0, ans=0.125 +2024-09-18 14:03:53,662 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.704e+01 9.142e+01 9.758e+01 1.993e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 14:03:57,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=457460.0, ans=0.125 +2024-09-18 14:03:58,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=457460.0, ans=0.1 +2024-09-18 14:04:02,760 INFO [train.py:1198] (0/2) Epoch 26, batch 1250, loss[loss=0.2602, ctc_loss=0.1421, cr_loss=0.4031, attn_decoder_loss=0.2644, over 29556.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1277, cr_loss=0.3708, attn_decoder_loss=0.2465, over 5775552.15 frames. ], batch size: 92, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:04:23,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.77 vs. limit=22.5 +2024-09-18 14:04:31,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=457580.0, ans=0.125 +2024-09-18 14:04:50,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=457620.0, ans=0.0 +2024-09-18 14:05:15,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=457660.0, ans=0.0 +2024-09-18 14:05:18,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=457700.0, ans=0.125 +2024-09-18 14:05:19,427 INFO [train.py:1198] (0/2) Epoch 26, batch 1300, loss[loss=0.2669, ctc_loss=0.144, cr_loss=0.41, attn_decoder_loss=0.2715, over 28304.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1272, cr_loss=0.3697, attn_decoder_loss=0.2461, over 5780223.15 frames. ], batch size: 111, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:05:22,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=457700.0, ans=0.1 +2024-09-18 14:05:42,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=457740.0, ans=0.0 +2024-09-18 14:06:02,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=457780.0, ans=0.1 +2024-09-18 14:06:25,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.454e+01 9.061e+01 9.465e+01 1.475e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 14:06:26,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=457860.0, ans=0.125 +2024-09-18 14:06:35,253 INFO [train.py:1198] (0/2) Epoch 26, batch 1350, loss[loss=0.2438, ctc_loss=0.1307, cr_loss=0.3685, attn_decoder_loss=0.2482, over 29742.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1263, cr_loss=0.3682, attn_decoder_loss=0.2455, over 5797226.96 frames. ], batch size: 81, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:06:46,052 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.91 vs. limit=22.5 +2024-09-18 14:06:53,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=457940.0, ans=0.0 +2024-09-18 14:06:54,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=457940.0, ans=0.0 +2024-09-18 14:07:45,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.16 vs. limit=15.0 +2024-09-18 14:07:55,073 INFO [train.py:1198] (0/2) Epoch 26, batch 1400, loss[loss=0.2098, ctc_loss=0.1029, cr_loss=0.3118, attn_decoder_loss=0.2148, over 29593.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.126, cr_loss=0.3682, attn_decoder_loss=0.2452, over 5807541.58 frames. ], batch size: 69, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:08:14,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=458140.0, ans=0.125 +2024-09-18 14:09:01,509 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.404e+01 8.959e+01 9.350e+01 1.926e+02, threshold=1.792e+02, percent-clipped=1.0 +2024-09-18 14:09:10,655 INFO [train.py:1198] (0/2) Epoch 26, batch 1450, loss[loss=0.2654, ctc_loss=0.1464, cr_loss=0.4052, attn_decoder_loss=0.2696, over 29448.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1261, cr_loss=0.3681, attn_decoder_loss=0.2454, over 5805120.99 frames. ], batch size: 94, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:09:20,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=458300.0, ans=0.125 +2024-09-18 14:09:32,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=458340.0, ans=0.125 +2024-09-18 14:10:16,041 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-18 14:10:27,089 INFO [train.py:1198] (0/2) Epoch 26, batch 1500, loss[loss=0.2539, ctc_loss=0.1281, cr_loss=0.3826, attn_decoder_loss=0.2594, over 29636.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1266, cr_loss=0.3689, attn_decoder_loss=0.246, over 5806856.28 frames. ], batch size: 86, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:10:27,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=458500.0, ans=0.2 +2024-09-18 14:10:28,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.85 vs. limit=15.0 +2024-09-18 14:10:31,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.70 vs. limit=12.0 +2024-09-18 14:10:34,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=458500.0, ans=0.0 +2024-09-18 14:10:43,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=458540.0, ans=0.125 +2024-09-18 14:10:45,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=458540.0, ans=0.125 +2024-09-18 14:11:38,955 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.523e+01 8.611e+01 9.025e+01 9.915e+01 2.823e+02, threshold=1.805e+02, percent-clipped=2.0 +2024-09-18 14:11:48,168 INFO [train.py:1198] (0/2) Epoch 26, batch 1550, loss[loss=0.2558, ctc_loss=0.1349, cr_loss=0.3963, attn_decoder_loss=0.2604, over 29476.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1268, cr_loss=0.369, attn_decoder_loss=0.246, over 5782401.85 frames. ], batch size: 90, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:12:12,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=458740.0, ans=0.125 +2024-09-18 14:12:19,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.07 vs. limit=15.0 +2024-09-18 14:12:22,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.03 vs. limit=15.0 +2024-09-18 14:12:36,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=458820.0, ans=0.125 +2024-09-18 14:12:43,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.47 vs. limit=15.0 +2024-09-18 14:12:46,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.31 vs. limit=15.0 +2024-09-18 14:12:46,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.23 vs. limit=6.0 +2024-09-18 14:13:03,831 INFO [train.py:1198] (0/2) Epoch 26, batch 1600, loss[loss=0.2443, ctc_loss=0.1289, cr_loss=0.3573, attn_decoder_loss=0.2492, over 29660.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1271, cr_loss=0.3691, attn_decoder_loss=0.2459, over 5764546.68 frames. ], batch size: 85, lr: 4.28e-03, grad_scale: 16.0 +2024-09-18 14:13:05,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=458900.0, ans=0.0 +2024-09-18 14:13:14,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.17 vs. limit=22.5 +2024-09-18 14:13:22,348 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:13:26,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=458940.0, ans=0.2 +2024-09-18 14:13:42,641 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.28 vs. limit=15.0 +2024-09-18 14:13:43,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=458980.0, ans=0.125 +2024-09-18 14:13:46,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=458980.0, ans=0.125 +2024-09-18 14:13:52,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=459020.0, ans=0.125 +2024-09-18 14:13:54,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=459020.0, ans=0.1 +2024-09-18 14:14:09,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=459060.0, ans=0.125 +2024-09-18 14:14:12,105 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.327e+01 8.927e+01 9.503e+01 2.372e+02, threshold=1.785e+02, percent-clipped=2.0 +2024-09-18 14:14:19,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.84 vs. limit=15.0 +2024-09-18 14:14:21,599 INFO [train.py:1198] (0/2) Epoch 26, batch 1650, loss[loss=0.2556, ctc_loss=0.1319, cr_loss=0.384, attn_decoder_loss=0.2608, over 29684.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1272, cr_loss=0.3692, attn_decoder_loss=0.2458, over 5759985.89 frames. ], batch size: 89, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:15:22,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=459220.0, ans=0.0 +2024-09-18 14:15:23,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=459260.0, ans=0.125 +2024-09-18 14:15:25,715 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.88 vs. limit=10.0 +2024-09-18 14:15:28,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=459260.0, ans=0.125 +2024-09-18 14:15:29,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=459260.0, ans=0.1 +2024-09-18 14:15:39,936 INFO [train.py:1198] (0/2) Epoch 26, batch 1700, loss[loss=0.2074, ctc_loss=0.09927, cr_loss=0.3267, attn_decoder_loss=0.2122, over 29583.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1267, cr_loss=0.369, attn_decoder_loss=0.2454, over 5781630.93 frames. ], batch size: 69, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:15:40,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=459300.0, ans=0.02 +2024-09-18 14:15:50,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.00 vs. limit=15.0 +2024-09-18 14:15:54,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-18 14:15:56,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.28 vs. limit=22.5 +2024-09-18 14:16:34,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=459420.0, ans=0.0 +2024-09-18 14:16:37,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=459420.0, ans=0.025 +2024-09-18 14:16:48,024 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.426e+01 8.833e+01 9.428e+01 1.268e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 14:16:48,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=459460.0, ans=0.0 +2024-09-18 14:16:51,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=459460.0, ans=0.2 +2024-09-18 14:16:52,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=459460.0, ans=0.0 +2024-09-18 14:16:55,735 INFO [train.py:1198] (0/2) Epoch 26, batch 1750, loss[loss=0.2131, ctc_loss=0.1123, cr_loss=0.3382, attn_decoder_loss=0.2168, over 29326.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1264, cr_loss=0.3681, attn_decoder_loss=0.2449, over 5789604.65 frames. ], batch size: 67, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:17:03,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=459500.0, ans=0.125 +2024-09-18 14:17:09,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=459540.0, ans=0.125 +2024-09-18 14:17:11,451 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:17:28,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=459580.0, ans=0.1 +2024-09-18 14:17:30,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.whiten.whitening_limit, batch_count=459580.0, ans=12.0 +2024-09-18 14:17:50,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=459620.0, ans=0.125 +2024-09-18 14:17:57,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.40 vs. limit=15.0 +2024-09-18 14:18:02,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=459660.0, ans=0.125 +2024-09-18 14:18:02,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=459660.0, ans=0.1 +2024-09-18 14:18:07,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=459660.0, ans=0.0 +2024-09-18 14:18:11,488 INFO [train.py:1198] (0/2) Epoch 26, batch 1800, loss[loss=0.2547, ctc_loss=0.1335, cr_loss=0.3726, attn_decoder_loss=0.2599, over 29695.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1266, cr_loss=0.3689, attn_decoder_loss=0.2455, over 5792854.94 frames. ], batch size: 83, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:18:23,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.03 vs. limit=10.0 +2024-09-18 14:19:00,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=459820.0, ans=0.125 +2024-09-18 14:19:23,995 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.569e+01 8.931e+01 9.347e+01 1.247e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-18 14:19:31,602 INFO [train.py:1198] (0/2) Epoch 26, batch 1850, loss[loss=0.2455, ctc_loss=0.1305, cr_loss=0.373, attn_decoder_loss=0.25, over 29633.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1266, cr_loss=0.3689, attn_decoder_loss=0.2454, over 5799651.61 frames. ], batch size: 86, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:19:34,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=459900.0, ans=0.025 +2024-09-18 14:19:37,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.76 vs. limit=15.0 +2024-09-18 14:19:42,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=459900.0, ans=0.1 +2024-09-18 14:19:48,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=459940.0, ans=0.0 +2024-09-18 14:19:52,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.15 vs. limit=10.0 +2024-09-18 14:20:10,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=459980.0, ans=0.1 +2024-09-18 14:20:45,616 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.08 vs. limit=6.0 +2024-09-18 14:20:47,440 INFO [train.py:1198] (0/2) Epoch 26, batch 1900, loss[loss=0.2476, ctc_loss=0.1257, cr_loss=0.3749, attn_decoder_loss=0.2528, over 29679.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1269, cr_loss=0.3698, attn_decoder_loss=0.246, over 5805868.17 frames. ], batch size: 89, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:20:58,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=460100.0, ans=0.1 +2024-09-18 14:21:09,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=460140.0, ans=0.2 +2024-09-18 14:21:10,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=460140.0, ans=0.1 +2024-09-18 14:21:19,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=460180.0, ans=0.2 +2024-09-18 14:21:26,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=460180.0, ans=0.1 +2024-09-18 14:21:38,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=460220.0, ans=0.0 +2024-09-18 14:21:44,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=460220.0, ans=0.125 +2024-09-18 14:21:56,071 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.154e+01 8.718e+01 9.273e+01 9.664e+01 1.625e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-18 14:22:03,703 INFO [train.py:1198] (0/2) Epoch 26, batch 1950, loss[loss=0.2272, ctc_loss=0.116, cr_loss=0.3666, attn_decoder_loss=0.2314, over 29449.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1277, cr_loss=0.3719, attn_decoder_loss=0.2471, over 5820949.63 frames. ], batch size: 78, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:22:31,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=460340.0, ans=0.125 +2024-09-18 14:22:45,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=460380.0, ans=0.125 +2024-09-18 14:22:48,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.37 vs. limit=6.0 +2024-09-18 14:23:11,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=460460.0, ans=0.125 +2024-09-18 14:23:14,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=460460.0, ans=0.125 +2024-09-18 14:23:16,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=460460.0, ans=0.0 +2024-09-18 14:23:21,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.28 vs. limit=15.0 +2024-09-18 14:23:23,251 INFO [train.py:1198] (0/2) Epoch 26, batch 2000, loss[loss=0.2142, ctc_loss=0.1087, cr_loss=0.3372, attn_decoder_loss=0.2184, over 29361.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1277, cr_loss=0.3721, attn_decoder_loss=0.2476, over 5798444.38 frames. ], batch size: 67, lr: 4.27e-03, grad_scale: 16.0 +2024-09-18 14:23:34,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=460500.0, ans=0.0 +2024-09-18 14:23:37,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=460540.0, ans=0.1 +2024-09-18 14:24:06,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=460580.0, ans=0.07 +2024-09-18 14:24:09,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=460620.0, ans=0.125 +2024-09-18 14:24:33,229 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.635e+01 8.701e+01 9.104e+01 9.478e+01 2.564e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 14:24:38,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=460700.0, ans=0.2 +2024-09-18 14:24:39,263 INFO [train.py:1198] (0/2) Epoch 26, batch 2050, loss[loss=0.2154, ctc_loss=0.1144, cr_loss=0.35, attn_decoder_loss=0.2189, over 29471.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1272, cr_loss=0.3709, attn_decoder_loss=0.2465, over 5791073.23 frames. ], batch size: 70, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:25:27,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.64 vs. limit=15.0 +2024-09-18 14:25:31,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=460820.0, ans=0.0 +2024-09-18 14:25:43,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=460860.0, ans=0.0 +2024-09-18 14:25:55,184 INFO [train.py:1198] (0/2) Epoch 26, batch 2100, loss[loss=0.2358, ctc_loss=0.1216, cr_loss=0.3531, attn_decoder_loss=0.2407, over 29772.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1264, cr_loss=0.3694, attn_decoder_loss=0.2456, over 5803097.02 frames. ], batch size: 81, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:26:05,283 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:26:06,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=460900.0, ans=0.07 +2024-09-18 14:26:10,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.03 vs. limit=22.5 +2024-09-18 14:26:19,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=460940.0, ans=15.0 +2024-09-18 14:26:39,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=460980.0, ans=0.2 +2024-09-18 14:26:44,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=461020.0, ans=0.0 +2024-09-18 14:26:56,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=461060.0, ans=0.125 +2024-09-18 14:27:08,387 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.265e+01 8.897e+01 9.459e+01 1.093e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 14:27:10,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=461060.0, ans=0.1 +2024-09-18 14:27:10,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=461060.0, ans=0.025 +2024-09-18 14:27:14,484 INFO [train.py:1198] (0/2) Epoch 26, batch 2150, loss[loss=0.2351, ctc_loss=0.1264, cr_loss=0.3799, attn_decoder_loss=0.2387, over 29461.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1259, cr_loss=0.3683, attn_decoder_loss=0.245, over 5816756.37 frames. ], batch size: 78, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:27:24,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=461100.0, ans=0.1 +2024-09-18 14:27:46,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=461180.0, ans=0.125 +2024-09-18 14:27:52,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=461180.0, ans=0.0 +2024-09-18 14:27:52,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=461180.0, ans=0.05 +2024-09-18 14:28:01,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=461220.0, ans=0.0 +2024-09-18 14:28:03,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=461220.0, ans=0.0 +2024-09-18 14:28:14,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=461260.0, ans=0.0 +2024-09-18 14:28:27,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=461260.0, ans=0.025 +2024-09-18 14:28:30,689 INFO [train.py:1198] (0/2) Epoch 26, batch 2200, loss[loss=0.2557, ctc_loss=0.1365, cr_loss=0.3971, attn_decoder_loss=0.2601, over 29646.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1265, cr_loss=0.3695, attn_decoder_loss=0.2455, over 5814432.55 frames. ], batch size: 86, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:28:40,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=461300.0, ans=0.125 +2024-09-18 14:28:46,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=461340.0, ans=0.125 +2024-09-18 14:28:48,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.05 vs. limit=22.5 +2024-09-18 14:29:10,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.11 vs. limit=15.0 +2024-09-18 14:29:22,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=461420.0, ans=0.025 +2024-09-18 14:29:31,812 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:29:33,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=461460.0, ans=0.125 +2024-09-18 14:29:36,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=461460.0, ans=0.0 +2024-09-18 14:29:40,322 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.263e+01 8.743e+01 9.086e+01 9.862e+01 3.457e+02, threshold=1.817e+02, percent-clipped=3.0 +2024-09-18 14:29:40,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=461460.0, ans=0.125 +2024-09-18 14:29:46,507 INFO [train.py:1198] (0/2) Epoch 26, batch 2250, loss[loss=0.2422, ctc_loss=0.1226, cr_loss=0.3527, attn_decoder_loss=0.2477, over 29678.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1262, cr_loss=0.3687, attn_decoder_loss=0.2454, over 5812523.50 frames. ], batch size: 82, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:29:58,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=461500.0, ans=0.0 +2024-09-18 14:30:01,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=461500.0, ans=0.0 +2024-09-18 14:30:31,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=461580.0, ans=0.0 +2024-09-18 14:30:45,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.88 vs. limit=22.5 +2024-09-18 14:30:54,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=461660.0, ans=0.125 +2024-09-18 14:31:07,030 INFO [train.py:1198] (0/2) Epoch 26, batch 2300, loss[loss=0.2097, ctc_loss=0.1004, cr_loss=0.3222, attn_decoder_loss=0.2147, over 29341.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1257, cr_loss=0.3675, attn_decoder_loss=0.2444, over 5798382.48 frames. ], batch size: 71, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:31:08,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=461700.0, ans=0.0 +2024-09-18 14:31:12,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.90 vs. limit=15.0 +2024-09-18 14:31:17,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=461700.0, ans=0.125 +2024-09-18 14:31:22,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=461740.0, ans=0.125 +2024-09-18 14:31:28,899 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.08 vs. limit=6.0 +2024-09-18 14:31:33,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=461740.0, ans=0.0 +2024-09-18 14:31:48,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=461780.0, ans=0.1 +2024-09-18 14:31:50,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.76 vs. limit=15.0 +2024-09-18 14:32:09,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=461860.0, ans=0.2 +2024-09-18 14:32:16,746 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.397e+01 8.981e+01 9.856e+01 3.624e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-18 14:32:17,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=461860.0, ans=0.1 +2024-09-18 14:32:21,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=461900.0, ans=0.125 +2024-09-18 14:32:22,734 INFO [train.py:1198] (0/2) Epoch 26, batch 2350, loss[loss=0.2384, ctc_loss=0.1282, cr_loss=0.373, attn_decoder_loss=0.2423, over 29683.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1262, cr_loss=0.3685, attn_decoder_loss=0.2446, over 5803578.79 frames. ], batch size: 83, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:32:24,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=461900.0, ans=0.1 +2024-09-18 14:32:27,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=461900.0, ans=0.125 +2024-09-18 14:32:36,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=461940.0, ans=0.2 +2024-09-18 14:33:02,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=461980.0, ans=0.125 +2024-09-18 14:33:38,602 INFO [train.py:1198] (0/2) Epoch 26, batch 2400, loss[loss=0.23, ctc_loss=0.1193, cr_loss=0.3569, attn_decoder_loss=0.2344, over 29527.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.127, cr_loss=0.3699, attn_decoder_loss=0.2453, over 5807680.64 frames. ], batch size: 76, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:33:58,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.91 vs. limit=10.0 +2024-09-18 14:34:49,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=462260.0, ans=0.0 +2024-09-18 14:34:51,669 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.393e+01 8.579e+01 9.212e+01 9.914e+01 2.760e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-18 14:34:57,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=462300.0, ans=0.125 +2024-09-18 14:34:58,432 INFO [train.py:1198] (0/2) Epoch 26, batch 2450, loss[loss=0.2419, ctc_loss=0.1365, cr_loss=0.3994, attn_decoder_loss=0.2447, over 29702.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1275, cr_loss=0.3708, attn_decoder_loss=0.2462, over 5784374.25 frames. ], batch size: 82, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:34:58,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=462300.0, ans=0.125 +2024-09-18 14:35:07,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=462300.0, ans=0.125 +2024-09-18 14:35:35,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=462380.0, ans=0.0 +2024-09-18 14:36:13,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=462500.0, ans=0.125 +2024-09-18 14:36:14,445 INFO [train.py:1198] (0/2) Epoch 26, batch 2500, loss[loss=0.2463, ctc_loss=0.1278, cr_loss=0.3684, attn_decoder_loss=0.2513, over 29651.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1272, cr_loss=0.3704, attn_decoder_loss=0.2462, over 5795372.21 frames. ], batch size: 86, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:36:19,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=462500.0, ans=0.125 +2024-09-18 14:36:30,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=462540.0, ans=0.125 +2024-09-18 14:36:34,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=462540.0, ans=0.125 +2024-09-18 14:36:52,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=462580.0, ans=0.95 +2024-09-18 14:37:14,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=462660.0, ans=0.0 +2024-09-18 14:37:24,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=462660.0, ans=0.2 +2024-09-18 14:37:25,674 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.473e+01 8.987e+01 9.500e+01 1.769e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 14:37:26,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=462660.0, ans=0.125 +2024-09-18 14:37:30,365 INFO [train.py:1198] (0/2) Epoch 26, batch 2550, loss[loss=0.2187, ctc_loss=0.1091, cr_loss=0.3347, attn_decoder_loss=0.2235, over 29340.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1271, cr_loss=0.3706, attn_decoder_loss=0.2462, over 5798500.58 frames. ], batch size: 67, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:37:37,955 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.98 vs. limit=15.0 +2024-09-18 14:38:05,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=462780.0, ans=0.125 +2024-09-18 14:38:45,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=462860.0, ans=0.125 +2024-09-18 14:38:48,021 INFO [train.py:1198] (0/2) Epoch 26, batch 2600, loss[loss=0.2279, ctc_loss=0.1123, cr_loss=0.3424, attn_decoder_loss=0.2331, over 29435.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1271, cr_loss=0.3711, attn_decoder_loss=0.2465, over 5794113.55 frames. ], batch size: 78, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:38:48,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=462900.0, ans=0.0 +2024-09-18 14:38:55,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=462900.0, ans=0.0 +2024-09-18 14:39:02,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=462900.0, ans=0.2 +2024-09-18 14:39:05,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=462940.0, ans=0.125 +2024-09-18 14:39:08,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=462940.0, ans=0.1 +2024-09-18 14:39:14,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=462940.0, ans=0.1 +2024-09-18 14:39:14,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=462940.0, ans=0.125 +2024-09-18 14:39:47,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=463020.0, ans=0.2 +2024-09-18 14:39:58,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=463060.0, ans=0.125 +2024-09-18 14:40:01,215 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.375e+01 8.942e+01 9.564e+01 2.475e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 14:40:05,706 INFO [train.py:1198] (0/2) Epoch 26, batch 2650, loss[loss=0.252, ctc_loss=0.1313, cr_loss=0.3862, attn_decoder_loss=0.2568, over 29232.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1271, cr_loss=0.3711, attn_decoder_loss=0.2467, over 5800730.06 frames. ], batch size: 100, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:40:16,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=463100.0, ans=0.1 +2024-09-18 14:40:37,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.54 vs. limit=15.0 +2024-09-18 14:40:43,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.23 vs. limit=15.0 +2024-09-18 14:41:07,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=463260.0, ans=0.0 +2024-09-18 14:41:10,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.21 vs. limit=15.0 +2024-09-18 14:41:11,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=463260.0, ans=0.0 +2024-09-18 14:41:18,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=463260.0, ans=0.2 +2024-09-18 14:41:19,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.22 vs. limit=15.0 +2024-09-18 14:41:23,924 INFO [train.py:1198] (0/2) Epoch 26, batch 2700, loss[loss=0.2388, ctc_loss=0.1166, cr_loss=0.3453, attn_decoder_loss=0.2447, over 29555.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1277, cr_loss=0.3723, attn_decoder_loss=0.2473, over 5796330.18 frames. ], batch size: 87, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:41:55,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=463380.0, ans=0.0 +2024-09-18 14:41:57,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=463380.0, ans=0.0 +2024-09-18 14:42:00,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=463380.0, ans=0.2 +2024-09-18 14:42:12,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=463420.0, ans=0.125 +2024-09-18 14:42:35,434 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.497e+01 8.933e+01 9.409e+01 1.999e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 14:42:37,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=463460.0, ans=0.125 +2024-09-18 14:42:40,225 INFO [train.py:1198] (0/2) Epoch 26, batch 2750, loss[loss=0.2263, ctc_loss=0.1115, cr_loss=0.3233, attn_decoder_loss=0.2319, over 29519.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.127, cr_loss=0.3707, attn_decoder_loss=0.2463, over 5795594.39 frames. ], batch size: 75, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:42:52,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.89 vs. limit=22.5 +2024-09-18 14:42:59,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=463540.0, ans=0.0 +2024-09-18 14:42:59,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.24 vs. limit=15.0 +2024-09-18 14:43:23,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=463580.0, ans=0.0 +2024-09-18 14:43:26,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=463620.0, ans=0.125 +2024-09-18 14:43:28,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=463620.0, ans=0.125 +2024-09-18 14:43:36,265 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.39 vs. limit=22.5 +2024-09-18 14:43:58,318 INFO [train.py:1198] (0/2) Epoch 26, batch 2800, loss[loss=0.259, ctc_loss=0.1529, cr_loss=0.4057, attn_decoder_loss=0.2618, over 20060.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1273, cr_loss=0.3717, attn_decoder_loss=0.2465, over 5777221.82 frames. ], batch size: 209, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:44:00,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=463700.0, ans=0.0 +2024-09-18 14:44:01,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=463700.0, ans=0.125 +2024-09-18 14:44:06,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=463700.0, ans=0.0 +2024-09-18 14:44:10,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=463700.0, ans=0.125 +2024-09-18 14:44:18,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=463740.0, ans=0.125 +2024-09-18 14:44:36,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=463780.0, ans=0.125 +2024-09-18 14:44:46,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=463820.0, ans=0.2 +2024-09-18 14:44:54,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=463820.0, ans=0.125 +2024-09-18 14:45:01,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=463860.0, ans=0.2 +2024-09-18 14:45:06,125 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:45:10,998 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.701e+01 9.139e+01 9.864e+01 2.017e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 14:45:15,526 INFO [train.py:1198] (0/2) Epoch 26, batch 2850, loss[loss=0.2414, ctc_loss=0.1359, cr_loss=0.3929, attn_decoder_loss=0.2443, over 29480.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.128, cr_loss=0.3725, attn_decoder_loss=0.2471, over 5762069.85 frames. ], batch size: 77, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:45:29,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=463940.0, ans=0.125 +2024-09-18 14:45:47,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=463980.0, ans=0.0 +2024-09-18 14:45:52,413 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-116000.pt +2024-09-18 14:46:06,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.83 vs. limit=6.0 +2024-09-18 14:46:10,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=15.0 +2024-09-18 14:46:23,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=464060.0, ans=0.09899494936611666 +2024-09-18 14:46:26,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=464060.0, ans=0.0 +2024-09-18 14:46:36,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=464060.0, ans=0.2 +2024-09-18 14:46:37,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=464100.0, ans=0.0 +2024-09-18 14:46:40,983 INFO [train.py:1198] (0/2) Epoch 26, batch 2900, loss[loss=0.231, ctc_loss=0.1226, cr_loss=0.3473, attn_decoder_loss=0.2353, over 29423.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1284, cr_loss=0.3734, attn_decoder_loss=0.2477, over 5787462.71 frames. ], batch size: 79, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:46:56,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=464140.0, ans=0.125 +2024-09-18 14:47:13,179 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:47:23,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=464180.0, ans=0.125 +2024-09-18 14:47:52,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=464260.0, ans=0.125 +2024-09-18 14:47:53,453 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.512e+01 9.090e+01 9.867e+01 2.207e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 14:47:56,514 INFO [train.py:1198] (0/2) Epoch 26, batch 2950, loss[loss=0.2317, ctc_loss=0.1237, cr_loss=0.3678, attn_decoder_loss=0.2356, over 29518.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1273, cr_loss=0.3708, attn_decoder_loss=0.2462, over 5781604.82 frames. ], batch size: 75, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:48:02,864 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:48:03,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.82 vs. limit=15.0 +2024-09-18 14:48:25,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=464380.0, ans=0.1 +2024-09-18 14:48:33,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=464380.0, ans=0.0 +2024-09-18 14:48:50,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.22 vs. limit=15.0 +2024-09-18 14:48:54,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=464420.0, ans=0.125 +2024-09-18 14:49:02,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=464460.0, ans=0.015 +2024-09-18 14:49:15,210 INFO [train.py:1198] (0/2) Epoch 26, batch 3000, loss[loss=0.2394, ctc_loss=0.1272, cr_loss=0.384, attn_decoder_loss=0.2434, over 29750.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1269, cr_loss=0.3701, attn_decoder_loss=0.2458, over 5783244.11 frames. ], batch size: 81, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:49:15,210 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 14:49:33,743 INFO [train.py:1230] (0/2) Epoch 26, validation: loss=0.2113, ctc_loss=0.03775, cr_loss=5.571e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-18 14:49:33,743 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 14:49:40,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=464500.0, ans=0.125 +2024-09-18 14:49:56,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.47 vs. limit=22.5 +2024-09-18 14:50:15,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=464580.0, ans=0.1 +2024-09-18 14:50:40,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=464660.0, ans=0.025 +2024-09-18 14:50:46,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=464660.0, ans=0.04949747468305833 +2024-09-18 14:50:49,033 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.963e+01 8.504e+01 9.014e+01 9.631e+01 1.549e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 14:50:52,203 INFO [train.py:1198] (0/2) Epoch 26, batch 3050, loss[loss=0.2343, ctc_loss=0.1182, cr_loss=0.3484, attn_decoder_loss=0.2394, over 29521.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.127, cr_loss=0.3703, attn_decoder_loss=0.2464, over 5778111.13 frames. ], batch size: 76, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:50:52,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=464700.0, ans=0.125 +2024-09-18 14:50:54,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=464700.0, ans=0.2 +2024-09-18 14:50:57,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.64 vs. limit=22.5 +2024-09-18 14:51:06,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=464740.0, ans=0.1 +2024-09-18 14:51:06,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.32 vs. limit=15.0 +2024-09-18 14:51:09,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=464740.0, ans=0.1 +2024-09-18 14:51:18,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=464740.0, ans=0.025 +2024-09-18 14:51:54,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=464860.0, ans=0.2 +2024-09-18 14:51:55,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-18 14:52:08,312 INFO [train.py:1198] (0/2) Epoch 26, batch 3100, loss[loss=0.2603, ctc_loss=0.1394, cr_loss=0.3839, attn_decoder_loss=0.2652, over 29234.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1269, cr_loss=0.3699, attn_decoder_loss=0.2461, over 5777581.80 frames. ], batch size: 100, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:52:13,336 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:52:29,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=464940.0, ans=0.025 +2024-09-18 14:53:09,190 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-18 14:53:09,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.43 vs. limit=15.0 +2024-09-18 14:53:23,452 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.570e+01 9.069e+01 9.533e+01 2.948e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-18 14:53:26,520 INFO [train.py:1198] (0/2) Epoch 26, batch 3150, loss[loss=0.2551, ctc_loss=0.1364, cr_loss=0.4029, attn_decoder_loss=0.2593, over 28849.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1267, cr_loss=0.3692, attn_decoder_loss=0.246, over 5784088.12 frames. ], batch size: 104, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:53:26,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=465100.0, ans=0.2 +2024-09-18 14:53:39,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.43 vs. limit=22.5 +2024-09-18 14:53:49,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=465140.0, ans=0.2 +2024-09-18 14:54:19,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=465220.0, ans=0.125 +2024-09-18 14:54:21,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=465220.0, ans=0.2 +2024-09-18 14:54:32,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=465260.0, ans=0.2 +2024-09-18 14:54:44,452 INFO [train.py:1198] (0/2) Epoch 26, batch 3200, loss[loss=0.2302, ctc_loss=0.1203, cr_loss=0.3585, attn_decoder_loss=0.2344, over 29420.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1264, cr_loss=0.3687, attn_decoder_loss=0.2457, over 5793399.83 frames. ], batch size: 79, lr: 4.25e-03, grad_scale: 16.0 +2024-09-18 14:54:54,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=465300.0, ans=0.1 +2024-09-18 14:55:03,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.54 vs. limit=6.0 +2024-09-18 14:55:03,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.96 vs. limit=12.0 +2024-09-18 14:55:25,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=465380.0, ans=0.1 +2024-09-18 14:55:25,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=465380.0, ans=0.125 +2024-09-18 14:55:33,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_positive, batch_count=465420.0, ans=0.05 +2024-09-18 14:55:35,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=465420.0, ans=0.025 +2024-09-18 14:55:39,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=465420.0, ans=0.025 +2024-09-18 14:55:46,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.29 vs. limit=15.0 +2024-09-18 14:55:53,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=465460.0, ans=0.125 +2024-09-18 14:55:58,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=465460.0, ans=0.0 +2024-09-18 14:55:59,027 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.879e+01 8.413e+01 8.869e+01 9.551e+01 1.271e+02, threshold=1.774e+02, percent-clipped=0.0 +2024-09-18 14:56:00,551 INFO [train.py:1198] (0/2) Epoch 26, batch 3250, loss[loss=0.2544, ctc_loss=0.1369, cr_loss=0.3886, attn_decoder_loss=0.2588, over 29716.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1265, cr_loss=0.3689, attn_decoder_loss=0.2461, over 5800623.61 frames. ], batch size: 84, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:56:04,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=465500.0, ans=0.0 +2024-09-18 14:56:26,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=465540.0, ans=0.125 +2024-09-18 14:57:01,304 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.25 vs. limit=15.0 +2024-09-18 14:57:05,267 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:57:06,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=465660.0, ans=0.0 +2024-09-18 14:57:18,690 INFO [train.py:1198] (0/2) Epoch 26, batch 3300, loss[loss=0.2441, ctc_loss=0.1233, cr_loss=0.3499, attn_decoder_loss=0.2497, over 28658.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1259, cr_loss=0.3673, attn_decoder_loss=0.2449, over 5796952.11 frames. ], batch size: 112, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:57:47,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=465780.0, ans=0.035 +2024-09-18 14:57:53,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=465780.0, ans=0.125 +2024-09-18 14:57:53,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=465780.0, ans=10.0 +2024-09-18 14:57:55,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=465780.0, ans=0.125 +2024-09-18 14:58:02,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.58 vs. limit=15.0 +2024-09-18 14:58:14,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=465820.0, ans=0.2 +2024-09-18 14:58:20,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.61 vs. limit=10.0 +2024-09-18 14:58:34,538 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.655e+01 9.163e+01 9.654e+01 2.275e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 14:58:36,151 INFO [train.py:1198] (0/2) Epoch 26, batch 3350, loss[loss=0.2551, ctc_loss=0.1336, cr_loss=0.3968, attn_decoder_loss=0.2598, over 28898.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.127, cr_loss=0.3693, attn_decoder_loss=0.246, over 5774070.84 frames. ], batch size: 104, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:59:03,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=465940.0, ans=0.0 +2024-09-18 14:59:50,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=466100.0, ans=0.0 +2024-09-18 14:59:51,879 INFO [train.py:1198] (0/2) Epoch 26, batch 3400, loss[loss=0.2096, ctc_loss=0.1033, cr_loss=0.3325, attn_decoder_loss=0.214, over 29333.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1272, cr_loss=0.3699, attn_decoder_loss=0.2459, over 5766835.19 frames. ], batch size: 67, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 15:00:02,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=466100.0, ans=0.0 +2024-09-18 15:00:10,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=466140.0, ans=0.125 +2024-09-18 15:00:19,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=466140.0, ans=0.125 +2024-09-18 15:00:22,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=466180.0, ans=0.125 +2024-09-18 15:00:25,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=466180.0, ans=0.5 +2024-09-18 15:01:08,280 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.379e+01 8.854e+01 9.422e+01 2.123e+02, threshold=1.771e+02, percent-clipped=1.0 +2024-09-18 15:01:09,876 INFO [train.py:1198] (0/2) Epoch 26, batch 3450, loss[loss=0.2414, ctc_loss=0.1196, cr_loss=0.3621, attn_decoder_loss=0.2469, over 28401.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.127, cr_loss=0.3698, attn_decoder_loss=0.246, over 5774994.57 frames. ], batch size: 111, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 15:01:31,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=466340.0, ans=0.125 +2024-09-18 15:01:40,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=466380.0, ans=0.125 +2024-09-18 15:02:05,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=466420.0, ans=0.0 +2024-09-18 15:02:15,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=466460.0, ans=0.0 +2024-09-18 15:02:23,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=466460.0, ans=0.0 +2024-09-18 15:02:28,044 INFO [train.py:1198] (0/2) Epoch 26, batch 3500, loss[loss=0.2309, ctc_loss=0.1209, cr_loss=0.3608, attn_decoder_loss=0.2351, over 29326.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1271, cr_loss=0.3697, attn_decoder_loss=0.2459, over 5775493.93 frames. ], batch size: 71, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:02:28,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=466500.0, ans=0.125 +2024-09-18 15:02:56,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=466580.0, ans=0.1 +2024-09-18 15:03:32,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=466660.0, ans=0.125 +2024-09-18 15:03:32,372 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:03:40,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.52 vs. limit=10.0 +2024-09-18 15:03:40,928 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.124e+01 8.579e+01 9.256e+01 9.884e+01 2.781e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-18 15:03:42,420 INFO [train.py:1198] (0/2) Epoch 26, batch 3550, loss[loss=0.2596, ctc_loss=0.1395, cr_loss=0.3918, attn_decoder_loss=0.2642, over 29711.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1271, cr_loss=0.3698, attn_decoder_loss=0.2461, over 5784115.72 frames. ], batch size: 89, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:03:50,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=466700.0, ans=0.125 +2024-09-18 15:04:07,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=466740.0, ans=0.0 +2024-09-18 15:04:09,895 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.97 vs. limit=6.0 +2024-09-18 15:04:18,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=466780.0, ans=0.0 +2024-09-18 15:04:34,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=466820.0, ans=0.125 +2024-09-18 15:04:56,568 INFO [train.py:1198] (0/2) Epoch 26, batch 3600, loss[loss=0.2394, ctc_loss=0.1314, cr_loss=0.3753, attn_decoder_loss=0.2431, over 29502.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1273, cr_loss=0.3706, attn_decoder_loss=0.2462, over 5793401.10 frames. ], batch size: 77, lr: 4.24e-03, grad_scale: 16.0 +2024-09-18 15:05:07,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=466900.0, ans=0.1 +2024-09-18 15:05:08,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=466900.0, ans=0.1 +2024-09-18 15:05:25,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=466980.0, ans=0.125 +2024-09-18 15:05:28,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=466980.0, ans=0.125 +2024-09-18 15:05:28,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=466980.0, ans=0.125 +2024-09-18 15:05:43,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=467020.0, ans=0.1 +2024-09-18 15:05:58,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=467060.0, ans=0.125 +2024-09-18 15:06:12,878 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.659e+01 8.525e+01 9.113e+01 9.643e+01 7.477e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 15:06:12,899 INFO [train.py:1198] (0/2) Epoch 26, batch 3650, loss[loss=0.2579, ctc_loss=0.1378, cr_loss=0.3848, attn_decoder_loss=0.2627, over 29502.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1263, cr_loss=0.3688, attn_decoder_loss=0.2454, over 5794468.21 frames. ], batch size: 90, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:06:15,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=467100.0, ans=0.1 +2024-09-18 15:06:23,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=467100.0, ans=0.09899494936611666 +2024-09-18 15:06:36,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=467140.0, ans=0.125 +2024-09-18 15:07:07,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=467220.0, ans=0.95 +2024-09-18 15:07:09,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=467220.0, ans=0.025 +2024-09-18 15:07:24,934 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:07:27,490 INFO [train.py:1198] (0/2) Epoch 26, batch 3700, loss[loss=0.2471, ctc_loss=0.1237, cr_loss=0.3631, attn_decoder_loss=0.2528, over 29698.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.126, cr_loss=0.3683, attn_decoder_loss=0.2454, over 5803164.54 frames. ], batch size: 84, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:07:27,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=467300.0, ans=0.0 +2024-09-18 15:07:32,449 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:07:35,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=467300.0, ans=0.125 +2024-09-18 15:07:38,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=467300.0, ans=0.125 +2024-09-18 15:08:21,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=467420.0, ans=0.2 +2024-09-18 15:08:21,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=467420.0, ans=0.09899494936611666 +2024-09-18 15:08:25,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=467460.0, ans=0.125 +2024-09-18 15:08:26,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.00 vs. limit=10.0 +2024-09-18 15:08:28,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=467460.0, ans=0.0 +2024-09-18 15:08:35,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=467460.0, ans=0.125 +2024-09-18 15:08:39,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=467460.0, ans=0.5 +2024-09-18 15:08:43,719 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.301e+01 8.766e+01 9.365e+01 1.565e+02, threshold=1.753e+02, percent-clipped=0.0 +2024-09-18 15:08:43,746 INFO [train.py:1198] (0/2) Epoch 26, batch 3750, loss[loss=0.2179, ctc_loss=0.1166, cr_loss=0.345, attn_decoder_loss=0.2215, over 29334.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1261, cr_loss=0.3688, attn_decoder_loss=0.2451, over 5806394.34 frames. ], batch size: 67, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:08:47,038 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:08:52,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=467500.0, ans=0.125 +2024-09-18 15:09:04,969 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:09:15,865 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.63 vs. limit=15.0 +2024-09-18 15:09:45,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.60 vs. limit=22.5 +2024-09-18 15:09:49,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=467660.0, ans=0.125 +2024-09-18 15:09:58,322 INFO [train.py:1198] (0/2) Epoch 26, batch 3800, loss[loss=0.245, ctc_loss=0.1203, cr_loss=0.3533, attn_decoder_loss=0.251, over 29630.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1257, cr_loss=0.3678, attn_decoder_loss=0.2447, over 5797275.57 frames. ], batch size: 86, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:10:00,247 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:10:00,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=15.0 +2024-09-18 15:10:21,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=467740.0, ans=0.125 +2024-09-18 15:10:27,030 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:10:27,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.64 vs. limit=6.0 +2024-09-18 15:10:31,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=467780.0, ans=0.0 +2024-09-18 15:10:35,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=467780.0, ans=0.1 +2024-09-18 15:10:44,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=467820.0, ans=0.025 +2024-09-18 15:11:00,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-18 15:11:12,680 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.723e+01 9.262e+01 9.820e+01 3.411e+02, threshold=1.852e+02, percent-clipped=3.0 +2024-09-18 15:11:12,706 INFO [train.py:1198] (0/2) Epoch 26, batch 3850, loss[loss=0.2581, ctc_loss=0.1438, cr_loss=0.4129, attn_decoder_loss=0.2616, over 29250.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1259, cr_loss=0.3681, attn_decoder_loss=0.2448, over 5811310.59 frames. ], batch size: 100, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:11:15,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.64 vs. limit=15.0 +2024-09-18 15:11:16,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=467900.0, ans=0.125 +2024-09-18 15:11:27,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=467940.0, ans=0.125 +2024-09-18 15:11:30,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=467940.0, ans=0.125 +2024-09-18 15:11:32,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=467940.0, ans=0.125 +2024-09-18 15:11:38,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=467940.0, ans=0.2 +2024-09-18 15:11:44,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=467980.0, ans=0.025 +2024-09-18 15:12:08,939 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:12:20,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=468060.0, ans=0.125 +2024-09-18 15:12:29,508 INFO [train.py:1198] (0/2) Epoch 26, batch 3900, loss[loss=0.2526, ctc_loss=0.1325, cr_loss=0.3785, attn_decoder_loss=0.2575, over 29632.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1259, cr_loss=0.3678, attn_decoder_loss=0.2453, over 5815012.37 frames. ], batch size: 86, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:12:36,293 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.20 vs. limit=22.5 +2024-09-18 15:12:48,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.73 vs. limit=15.0 +2024-09-18 15:12:57,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=468180.0, ans=0.1 +2024-09-18 15:13:11,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=468180.0, ans=0.0 +2024-09-18 15:13:12,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=468220.0, ans=0.125 +2024-09-18 15:13:15,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=468220.0, ans=0.125 +2024-09-18 15:13:26,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.10 vs. limit=12.0 +2024-09-18 15:13:43,704 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.582e+01 9.076e+01 9.520e+01 1.404e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 15:13:43,725 INFO [train.py:1198] (0/2) Epoch 26, batch 3950, loss[loss=0.2597, ctc_loss=0.1409, cr_loss=0.4061, attn_decoder_loss=0.2639, over 29444.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1259, cr_loss=0.3684, attn_decoder_loss=0.2453, over 5834648.99 frames. ], batch size: 97, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:13:53,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=468300.0, ans=0.125 +2024-09-18 15:14:26,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=468380.0, ans=0.125 +2024-09-18 15:14:31,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.33 vs. limit=12.0 +2024-09-18 15:14:41,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=468420.0, ans=0.0 +2024-09-18 15:14:54,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.39 vs. limit=5.0 +2024-09-18 15:14:58,757 INFO [train.py:1198] (0/2) Epoch 26, batch 4000, loss[loss=0.2152, ctc_loss=0.1032, cr_loss=0.3282, attn_decoder_loss=0.2204, over 29524.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1257, cr_loss=0.3671, attn_decoder_loss=0.245, over 5812346.31 frames. ], batch size: 74, lr: 4.24e-03, grad_scale: 16.0 +2024-09-18 15:15:07,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=468500.0, ans=0.2 +2024-09-18 15:15:10,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=468500.0, ans=0.125 +2024-09-18 15:15:29,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=468580.0, ans=0.125 +2024-09-18 15:15:31,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=468580.0, ans=0.125 +2024-09-18 15:15:45,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.51 vs. limit=12.0 +2024-09-18 15:15:53,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=468620.0, ans=0.05 +2024-09-18 15:16:14,193 INFO [train.py:1198] (0/2) Epoch 26, batch 4050, loss[loss=0.2614, ctc_loss=0.1525, cr_loss=0.3686, attn_decoder_loss=0.2653, over 20875.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1256, cr_loss=0.3665, attn_decoder_loss=0.2448, over 5796882.15 frames. ], batch size: 209, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:16:15,589 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.012e+01 8.606e+01 9.122e+01 9.849e+01 6.037e+02, threshold=1.824e+02, percent-clipped=3.0 +2024-09-18 15:16:18,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=468700.0, ans=0.2 +2024-09-18 15:16:23,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=468700.0, ans=0.125 +2024-09-18 15:16:59,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=468820.0, ans=0.2 +2024-09-18 15:17:28,159 INFO [train.py:1198] (0/2) Epoch 26, batch 4100, loss[loss=0.2502, ctc_loss=0.1318, cr_loss=0.3941, attn_decoder_loss=0.2546, over 29530.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1263, cr_loss=0.3679, attn_decoder_loss=0.245, over 5791512.62 frames. ], batch size: 90, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:17:32,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=468900.0, ans=0.125 +2024-09-18 15:17:34,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=468900.0, ans=0.125 +2024-09-18 15:17:55,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.36 vs. limit=15.0 +2024-09-18 15:18:05,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-18 15:18:09,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=468980.0, ans=0.0 +2024-09-18 15:18:11,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=469020.0, ans=0.0 +2024-09-18 15:18:12,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=469020.0, ans=0.1 +2024-09-18 15:18:29,928 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:18:40,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=469060.0, ans=0.125 +2024-09-18 15:18:42,763 INFO [train.py:1198] (0/2) Epoch 26, batch 4150, loss[loss=0.2325, ctc_loss=0.1228, cr_loss=0.3861, attn_decoder_loss=0.2361, over 29503.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1261, cr_loss=0.368, attn_decoder_loss=0.2447, over 5798104.96 frames. ], batch size: 77, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:18:44,182 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.459e+01 8.973e+01 9.469e+01 6.878e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 15:18:53,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=469100.0, ans=0.2 +2024-09-18 15:19:00,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=469140.0, ans=0.2 +2024-09-18 15:19:09,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=469140.0, ans=0.1 +2024-09-18 15:19:22,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=469180.0, ans=0.1 +2024-09-18 15:19:27,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=469220.0, ans=0.2 +2024-09-18 15:19:43,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=469260.0, ans=0.125 +2024-09-18 15:19:56,319 INFO [train.py:1198] (0/2) Epoch 26, batch 4200, loss[loss=0.2666, ctc_loss=0.1534, cr_loss=0.4232, attn_decoder_loss=0.2698, over 29512.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1264, cr_loss=0.3687, attn_decoder_loss=0.2451, over 5799729.12 frames. ], batch size: 90, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:20:00,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=469300.0, ans=0.1 +2024-09-18 15:20:08,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=469300.0, ans=0.0 +2024-09-18 15:20:10,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.31 vs. limit=15.0 +2024-09-18 15:20:33,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=469380.0, ans=0.125 +2024-09-18 15:20:38,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=469380.0, ans=0.035 +2024-09-18 15:20:46,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=469420.0, ans=0.0 +2024-09-18 15:21:02,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=469460.0, ans=0.125 +2024-09-18 15:21:10,803 INFO [train.py:1198] (0/2) Epoch 26, batch 4250, loss[loss=0.235, ctc_loss=0.1264, cr_loss=0.3844, attn_decoder_loss=0.2386, over 29529.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1262, cr_loss=0.3683, attn_decoder_loss=0.2453, over 5805367.35 frames. ], batch size: 74, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:21:12,222 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.377e+01 8.717e+01 9.053e+01 9.730e+01 2.394e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 15:21:34,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=469540.0, ans=0.125 +2024-09-18 15:22:06,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=469620.0, ans=0.07 +2024-09-18 15:22:07,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.35 vs. limit=22.5 +2024-09-18 15:22:25,794 INFO [train.py:1198] (0/2) Epoch 26, batch 4300, loss[loss=0.2492, ctc_loss=0.1262, cr_loss=0.3686, attn_decoder_loss=0.2547, over 29500.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.126, cr_loss=0.3683, attn_decoder_loss=0.2455, over 5794747.27 frames. ], batch size: 87, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:22:35,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=469700.0, ans=0.0 +2024-09-18 15:22:36,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=469700.0, ans=0.125 +2024-09-18 15:22:41,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.08 vs. limit=15.0 +2024-09-18 15:22:48,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=469740.0, ans=0.125 +2024-09-18 15:22:55,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=469780.0, ans=0.125 +2024-09-18 15:23:10,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=469820.0, ans=0.125 +2024-09-18 15:23:17,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=469820.0, ans=0.1 +2024-09-18 15:23:30,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=469860.0, ans=10.0 +2024-09-18 15:23:40,793 INFO [train.py:1198] (0/2) Epoch 26, batch 4350, loss[loss=0.2465, ctc_loss=0.1258, cr_loss=0.3609, attn_decoder_loss=0.2519, over 29533.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1285, cr_loss=0.3741, attn_decoder_loss=0.2487, over 5796754.15 frames. ], batch size: 97, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:23:42,286 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.612e+01 9.127e+01 9.671e+01 1.308e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-18 15:23:48,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=469900.0, ans=0.0 +2024-09-18 15:24:09,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=469980.0, ans=0.125 +2024-09-18 15:24:10,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=469980.0, ans=0.0 +2024-09-18 15:24:23,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=470020.0, ans=0.1 +2024-09-18 15:24:38,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=470060.0, ans=0.125 +2024-09-18 15:24:44,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=470060.0, ans=0.125 +2024-09-18 15:24:47,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=470060.0, ans=0.1 +2024-09-18 15:24:54,005 INFO [train.py:1198] (0/2) Epoch 26, batch 4400, loss[loss=0.2563, ctc_loss=0.1456, cr_loss=0.4085, attn_decoder_loss=0.2596, over 27246.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1303, cr_loss=0.3769, attn_decoder_loss=0.2509, over 5767840.50 frames. ], batch size: 124, lr: 4.23e-03, grad_scale: 16.0 +2024-09-18 15:24:55,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=470100.0, ans=0.1 +2024-09-18 15:24:57,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=470100.0, ans=0.1 +2024-09-18 15:25:00,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.49 vs. limit=6.0 +2024-09-18 15:25:20,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=470140.0, ans=0.2 +2024-09-18 15:25:22,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=470180.0, ans=0.125 +2024-09-18 15:25:37,767 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:26:06,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=470260.0, ans=0.025 +2024-09-18 15:26:09,418 INFO [train.py:1198] (0/2) Epoch 26, batch 4450, loss[loss=0.267, ctc_loss=0.1538, cr_loss=0.3973, attn_decoder_loss=0.2707, over 20287.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1345, cr_loss=0.3825, attn_decoder_loss=0.2534, over 5573461.36 frames. ], batch size: 210, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:26:12,367 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.301e+01 9.167e+01 9.608e+01 1.048e+02 2.652e+02, threshold=1.922e+02, percent-clipped=1.0 +2024-09-18 15:26:26,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.54 vs. limit=15.0 +2024-09-18 15:27:07,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=470420.0, ans=0.1 +2024-09-18 15:27:25,445 INFO [train.py:1198] (0/2) Epoch 26, batch 4500, loss[loss=0.2607, ctc_loss=0.1547, cr_loss=0.3889, attn_decoder_loss=0.2639, over 19626.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1386, cr_loss=0.3851, attn_decoder_loss=0.2557, over 5233363.15 frames. ], batch size: 210, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:27:44,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.90 vs. limit=15.0 +2024-09-18 15:28:02,400 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-26.pt +2024-09-18 15:28:54,202 INFO [train.py:1198] (0/2) Epoch 27, batch 0, loss[loss=0.2194, ctc_loss=0.104, cr_loss=0.3239, attn_decoder_loss=0.225, over 29570.00 frames. ], tot_loss[loss=0.2194, ctc_loss=0.104, cr_loss=0.3239, attn_decoder_loss=0.225, over 29570.00 frames. ], batch size: 73, lr: 4.15e-03, grad_scale: 16.0 +2024-09-18 15:28:54,202 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 15:29:01,849 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([2.4264, 3.6627, 3.6778, 3.6779], device='cuda:0') +2024-09-18 15:29:10,365 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.1557, 4.4262, 3.8561, 4.0711], device='cuda:0') +2024-09-18 15:29:12,732 INFO [train.py:1230] (0/2) Epoch 27, validation: loss=0.2127, ctc_loss=0.03797, cr_loss=5.907e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-18 15:29:12,733 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 15:29:46,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=470680.0, ans=0.0 +2024-09-18 15:29:53,212 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.260e+01 1.034e+02 1.128e+02 1.240e+02 3.218e+02, threshold=2.256e+02, percent-clipped=3.0 +2024-09-18 15:30:04,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=470720.0, ans=0.125 +2024-09-18 15:30:05,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=470720.0, ans=0.0 +2024-09-18 15:30:28,359 INFO [train.py:1198] (0/2) Epoch 27, batch 50, loss[loss=0.2172, ctc_loss=0.1071, cr_loss=0.3277, attn_decoder_loss=0.2221, over 29435.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1271, cr_loss=0.3695, attn_decoder_loss=0.2455, over 1269115.75 frames. ], batch size: 70, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:30:30,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=470800.0, ans=0.0 +2024-09-18 15:30:55,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=470840.0, ans=0.2 +2024-09-18 15:31:04,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.13 vs. limit=15.0 +2024-09-18 15:31:07,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=470880.0, ans=0.0 +2024-09-18 15:31:08,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=470880.0, ans=0.125 +2024-09-18 15:31:37,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=470960.0, ans=0.125 +2024-09-18 15:31:47,308 INFO [train.py:1198] (0/2) Epoch 27, batch 100, loss[loss=0.2239, ctc_loss=0.1107, cr_loss=0.332, attn_decoder_loss=0.2291, over 29529.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.128, cr_loss=0.3726, attn_decoder_loss=0.2474, over 2253429.13 frames. ], batch size: 76, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:31:52,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.60 vs. limit=15.0 +2024-09-18 15:32:07,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=471040.0, ans=0.1 +2024-09-18 15:32:12,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=471040.0, ans=0.07 +2024-09-18 15:32:14,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=471040.0, ans=0.125 +2024-09-18 15:32:28,931 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.524e+01 9.170e+01 9.614e+01 1.417e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 15:32:40,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.52 vs. limit=22.5 +2024-09-18 15:32:45,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=471160.0, ans=0.1 +2024-09-18 15:32:58,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.13 vs. limit=10.0 +2024-09-18 15:33:02,264 INFO [train.py:1198] (0/2) Epoch 27, batch 150, loss[loss=0.2149, ctc_loss=0.1051, cr_loss=0.3327, attn_decoder_loss=0.2197, over 29417.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1259, cr_loss=0.3683, attn_decoder_loss=0.2454, over 3048209.46 frames. ], batch size: 70, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:33:13,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=471200.0, ans=0.125 +2024-09-18 15:33:28,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=471240.0, ans=0.125 +2024-09-18 15:33:44,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=471280.0, ans=0.2 +2024-09-18 15:33:47,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=471320.0, ans=0.0 +2024-09-18 15:33:52,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.60 vs. limit=22.5 +2024-09-18 15:33:52,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-18 15:33:55,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=471320.0, ans=0.125 +2024-09-18 15:34:17,455 INFO [train.py:1198] (0/2) Epoch 27, batch 200, loss[loss=0.2534, ctc_loss=0.1325, cr_loss=0.3964, attn_decoder_loss=0.2581, over 27183.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1252, cr_loss=0.3677, attn_decoder_loss=0.2444, over 3659797.50 frames. ], batch size: 124, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:34:23,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=471400.0, ans=0.1 +2024-09-18 15:34:29,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=471400.0, ans=0.1 +2024-09-18 15:34:33,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.45 vs. limit=22.5 +2024-09-18 15:34:59,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=471480.0, ans=0.1 +2024-09-18 15:35:04,162 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.473e+01 8.928e+01 9.557e+01 1.148e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-18 15:35:10,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=471520.0, ans=0.1 +2024-09-18 15:35:18,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=471520.0, ans=0.09899494936611666 +2024-09-18 15:35:18,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=471520.0, ans=0.2 +2024-09-18 15:35:28,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=471560.0, ans=0.125 +2024-09-18 15:35:37,439 INFO [train.py:1198] (0/2) Epoch 27, batch 250, loss[loss=0.2577, ctc_loss=0.1411, cr_loss=0.4084, attn_decoder_loss=0.2615, over 29269.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1255, cr_loss=0.3676, attn_decoder_loss=0.2445, over 4141133.67 frames. ], batch size: 100, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:35:40,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=471600.0, ans=0.125 +2024-09-18 15:36:13,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=471680.0, ans=0.0 +2024-09-18 15:36:38,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.34 vs. limit=15.0 +2024-09-18 15:36:53,042 INFO [train.py:1198] (0/2) Epoch 27, batch 300, loss[loss=0.2508, ctc_loss=0.1355, cr_loss=0.3946, attn_decoder_loss=0.2548, over 29537.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.125, cr_loss=0.366, attn_decoder_loss=0.2442, over 4508899.19 frames. ], batch size: 92, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:36:56,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=471800.0, ans=0.125 +2024-09-18 15:37:11,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=471840.0, ans=0.125 +2024-09-18 15:37:19,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=471840.0, ans=0.2 +2024-09-18 15:37:31,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=471880.0, ans=0.125 +2024-09-18 15:37:35,276 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.118e+01 8.847e+01 9.359e+01 3.678e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-18 15:37:44,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=471920.0, ans=0.125 +2024-09-18 15:37:48,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=471920.0, ans=0.125 +2024-09-18 15:37:52,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=471960.0, ans=0.05 +2024-09-18 15:37:54,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=471960.0, ans=0.025 +2024-09-18 15:37:57,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=471960.0, ans=0.09899494936611666 +2024-09-18 15:38:06,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=471960.0, ans=0.1 +2024-09-18 15:38:08,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=472000.0, ans=0.125 +2024-09-18 15:38:09,252 INFO [train.py:1198] (0/2) Epoch 27, batch 350, loss[loss=0.2154, ctc_loss=0.1084, cr_loss=0.3306, attn_decoder_loss=0.2199, over 29314.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1256, cr_loss=0.3669, attn_decoder_loss=0.2449, over 4794809.73 frames. ], batch size: 71, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:38:35,194 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:38:43,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=472080.0, ans=0.125 +2024-09-18 15:38:59,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=472120.0, ans=0.0 +2024-09-18 15:39:07,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=472120.0, ans=0.1 +2024-09-18 15:39:29,370 INFO [train.py:1198] (0/2) Epoch 27, batch 400, loss[loss=0.2417, ctc_loss=0.1192, cr_loss=0.3633, attn_decoder_loss=0.2472, over 29701.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1255, cr_loss=0.367, attn_decoder_loss=0.2446, over 5023935.81 frames. ], batch size: 82, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:39:31,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=472200.0, ans=0.125 +2024-09-18 15:39:34,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=472200.0, ans=0.2 +2024-09-18 15:39:55,682 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:40:11,987 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.649e+01 9.100e+01 9.719e+01 1.502e+02, threshold=1.820e+02, percent-clipped=0.0 +2024-09-18 15:40:27,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=472320.0, ans=0.125 +2024-09-18 15:40:30,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=472360.0, ans=0.0 +2024-09-18 15:40:45,450 INFO [train.py:1198] (0/2) Epoch 27, batch 450, loss[loss=0.2554, ctc_loss=0.1418, cr_loss=0.4002, attn_decoder_loss=0.2591, over 29695.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.126, cr_loss=0.3685, attn_decoder_loss=0.2453, over 5186742.95 frames. ], batch size: 83, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:41:30,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.32 vs. limit=12.0 +2024-09-18 15:41:34,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=472520.0, ans=0.0 +2024-09-18 15:41:42,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.42 vs. limit=15.0 +2024-09-18 15:42:02,023 INFO [train.py:1198] (0/2) Epoch 27, batch 500, loss[loss=0.2619, ctc_loss=0.1448, cr_loss=0.425, attn_decoder_loss=0.2655, over 29427.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1258, cr_loss=0.3686, attn_decoder_loss=0.2449, over 5329472.10 frames. ], batch size: 94, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:42:03,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=472600.0, ans=0.125 +2024-09-18 15:42:10,628 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.72 vs. limit=15.0 +2024-09-18 15:42:41,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=472680.0, ans=0.0 +2024-09-18 15:42:50,898 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.545e+01 8.912e+01 9.466e+01 2.661e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-18 15:42:51,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.49 vs. limit=12.0 +2024-09-18 15:42:52,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=472720.0, ans=0.2 +2024-09-18 15:43:05,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=472720.0, ans=0.125 +2024-09-18 15:43:06,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.09 vs. limit=22.5 +2024-09-18 15:43:23,226 INFO [train.py:1198] (0/2) Epoch 27, batch 550, loss[loss=0.2452, ctc_loss=0.1259, cr_loss=0.362, attn_decoder_loss=0.2504, over 28904.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1254, cr_loss=0.3673, attn_decoder_loss=0.245, over 5422509.15 frames. ], batch size: 104, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:43:25,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=472800.0, ans=0.0 +2024-09-18 15:43:44,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=472840.0, ans=0.1 +2024-09-18 15:43:44,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=472840.0, ans=0.125 +2024-09-18 15:43:52,534 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:43:56,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.10 vs. limit=6.0 +2024-09-18 15:44:19,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.58 vs. limit=15.0 +2024-09-18 15:44:21,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=472920.0, ans=0.1 +2024-09-18 15:44:39,398 INFO [train.py:1198] (0/2) Epoch 27, batch 600, loss[loss=0.2623, ctc_loss=0.1381, cr_loss=0.3906, attn_decoder_loss=0.2674, over 29249.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1258, cr_loss=0.3686, attn_decoder_loss=0.2453, over 5509367.75 frames. ], batch size: 100, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:44:41,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=473000.0, ans=0.0 +2024-09-18 15:44:42,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=473000.0, ans=0.125 +2024-09-18 15:45:11,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=473080.0, ans=0.0 +2024-09-18 15:45:14,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=473080.0, ans=0.0 +2024-09-18 15:45:15,768 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:45:22,886 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.064e+01 8.416e+01 8.737e+01 9.314e+01 1.829e+02, threshold=1.747e+02, percent-clipped=2.0 +2024-09-18 15:45:55,002 INFO [train.py:1198] (0/2) Epoch 27, batch 650, loss[loss=0.2361, ctc_loss=0.1118, cr_loss=0.3327, attn_decoder_loss=0.2425, over 29747.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1245, cr_loss=0.3659, attn_decoder_loss=0.2444, over 5585989.04 frames. ], batch size: 81, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:45:58,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=473200.0, ans=0.05 +2024-09-18 15:46:02,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473200.0, ans=0.1 +2024-09-18 15:46:10,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=473240.0, ans=0.05 +2024-09-18 15:46:27,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=473280.0, ans=0.1 +2024-09-18 15:47:15,997 INFO [train.py:1198] (0/2) Epoch 27, batch 700, loss[loss=0.2306, ctc_loss=0.1217, cr_loss=0.3617, attn_decoder_loss=0.2347, over 29563.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1247, cr_loss=0.3664, attn_decoder_loss=0.2448, over 5636723.76 frames. ], batch size: 76, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:47:37,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=473440.0, ans=0.125 +2024-09-18 15:47:39,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473440.0, ans=0.1 +2024-09-18 15:47:58,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=473480.0, ans=0.125 +2024-09-18 15:48:00,170 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.450e+01 9.020e+01 9.619e+01 3.078e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 15:48:32,684 INFO [train.py:1198] (0/2) Epoch 27, batch 750, loss[loss=0.2465, ctc_loss=0.1288, cr_loss=0.3753, attn_decoder_loss=0.2513, over 29679.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1244, cr_loss=0.3656, attn_decoder_loss=0.2444, over 5676381.75 frames. ], batch size: 82, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:48:54,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473640.0, ans=0.1 +2024-09-18 15:48:55,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.44 vs. limit=6.0 +2024-09-18 15:49:01,766 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:49:08,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.50 vs. limit=22.5 +2024-09-18 15:49:11,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=473680.0, ans=0.0 +2024-09-18 15:49:20,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=473720.0, ans=0.1 +2024-09-18 15:49:26,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=473720.0, ans=0.2 +2024-09-18 15:49:45,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.64 vs. limit=22.5 +2024-09-18 15:49:45,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.06 vs. limit=22.5 +2024-09-18 15:49:46,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=473760.0, ans=0.125 +2024-09-18 15:49:47,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=473800.0, ans=0.0 +2024-09-18 15:49:48,910 INFO [train.py:1198] (0/2) Epoch 27, batch 800, loss[loss=0.224, ctc_loss=0.1121, cr_loss=0.3499, attn_decoder_loss=0.2287, over 29628.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1249, cr_loss=0.3664, attn_decoder_loss=0.2447, over 5706884.86 frames. ], batch size: 73, lr: 4.13e-03, grad_scale: 16.0 +2024-09-18 15:49:51,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.57 vs. limit=15.0 +2024-09-18 15:49:53,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=473800.0, ans=0.125 +2024-09-18 15:50:11,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=473840.0, ans=0.125 +2024-09-18 15:50:14,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=473840.0, ans=0.125 +2024-09-18 15:50:18,628 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.39 vs. limit=12.0 +2024-09-18 15:50:27,256 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:50:28,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=473880.0, ans=0.035 +2024-09-18 15:50:35,330 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.485e+01 9.104e+01 9.795e+01 7.519e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 15:50:41,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=473920.0, ans=0.0 +2024-09-18 15:50:41,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=473920.0, ans=0.1 +2024-09-18 15:51:09,194 INFO [train.py:1198] (0/2) Epoch 27, batch 850, loss[loss=0.2538, ctc_loss=0.1296, cr_loss=0.3738, attn_decoder_loss=0.2593, over 29708.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1243, cr_loss=0.3656, attn_decoder_loss=0.2443, over 5736343.07 frames. ], batch size: 89, lr: 4.13e-03, grad_scale: 16.0 +2024-09-18 15:51:16,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=474000.0, ans=0.125 +2024-09-18 15:51:19,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=474000.0, ans=0.2 +2024-09-18 15:51:39,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=474080.0, ans=0.0 +2024-09-18 15:51:48,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=474080.0, ans=0.2 +2024-09-18 15:51:53,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=474120.0, ans=0.125 +2024-09-18 15:52:07,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=474120.0, ans=0.125 +2024-09-18 15:52:10,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=474160.0, ans=0.0 +2024-09-18 15:52:12,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.83 vs. limit=6.0 +2024-09-18 15:52:16,907 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.83 vs. limit=22.5 +2024-09-18 15:52:24,937 INFO [train.py:1198] (0/2) Epoch 27, batch 900, loss[loss=0.2111, ctc_loss=0.09848, cr_loss=0.3115, attn_decoder_loss=0.2167, over 29572.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1245, cr_loss=0.3661, attn_decoder_loss=0.2443, over 5741025.85 frames. ], batch size: 73, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:52:28,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=474200.0, ans=0.1 +2024-09-18 15:52:40,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.38 vs. limit=15.0 +2024-09-18 15:53:10,287 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.261e+01 8.501e+01 8.938e+01 9.467e+01 2.355e+02, threshold=1.788e+02, percent-clipped=2.0 +2024-09-18 15:53:11,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.17 vs. limit=22.5 +2024-09-18 15:53:16,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=474320.0, ans=0.1 +2024-09-18 15:53:37,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.21 vs. limit=22.5 +2024-09-18 15:53:41,188 INFO [train.py:1198] (0/2) Epoch 27, batch 950, loss[loss=0.2306, ctc_loss=0.1159, cr_loss=0.3477, attn_decoder_loss=0.2356, over 29514.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1248, cr_loss=0.3663, attn_decoder_loss=0.2446, over 5742537.43 frames. ], batch size: 74, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:53:43,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.94 vs. limit=8.0 +2024-09-18 15:53:44,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=474400.0, ans=0.1 +2024-09-18 15:54:26,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=474480.0, ans=0.125 +2024-09-18 15:54:29,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.69 vs. limit=12.0 +2024-09-18 15:55:01,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.06 vs. limit=15.0 +2024-09-18 15:55:01,614 INFO [train.py:1198] (0/2) Epoch 27, batch 1000, loss[loss=0.2306, ctc_loss=0.1166, cr_loss=0.3559, attn_decoder_loss=0.2353, over 29504.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1259, cr_loss=0.3679, attn_decoder_loss=0.2453, over 5737418.41 frames. ], batch size: 77, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:55:09,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=474600.0, ans=0.09899494936611666 +2024-09-18 15:55:20,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=474640.0, ans=0.0 +2024-09-18 15:55:23,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=474640.0, ans=0.0 +2024-09-18 15:55:47,617 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.547e+01 9.112e+01 9.993e+01 2.254e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 15:56:00,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=474720.0, ans=0.04949747468305833 +2024-09-18 15:56:04,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=474760.0, ans=0.125 +2024-09-18 15:56:10,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=474760.0, ans=0.1 +2024-09-18 15:56:13,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=474760.0, ans=0.0 +2024-09-18 15:56:17,920 INFO [train.py:1198] (0/2) Epoch 27, batch 1050, loss[loss=0.2583, ctc_loss=0.1366, cr_loss=0.4031, attn_decoder_loss=0.2629, over 29702.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1256, cr_loss=0.3676, attn_decoder_loss=0.2448, over 5744773.64 frames. ], batch size: 85, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:56:21,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=474800.0, ans=0.2 +2024-09-18 15:56:38,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=474840.0, ans=0.125 +2024-09-18 15:56:45,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=474840.0, ans=0.05 +2024-09-18 15:57:07,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=474920.0, ans=0.1 +2024-09-18 15:57:34,521 INFO [train.py:1198] (0/2) Epoch 27, batch 1100, loss[loss=0.2319, ctc_loss=0.119, cr_loss=0.3462, attn_decoder_loss=0.2367, over 29439.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1254, cr_loss=0.3676, attn_decoder_loss=0.2446, over 5756406.35 frames. ], batch size: 78, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:57:36,443 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:57:43,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.46 vs. limit=15.0 +2024-09-18 15:58:08,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=475080.0, ans=0.1 +2024-09-18 15:58:09,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=475080.0, ans=0.1 +2024-09-18 15:58:10,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=15.0 +2024-09-18 15:58:18,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=475080.0, ans=0.025 +2024-09-18 15:58:18,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=475080.0, ans=0.125 +2024-09-18 15:58:22,629 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.448e+01 9.006e+01 9.632e+01 1.338e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 15:58:28,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=475120.0, ans=0.07 +2024-09-18 15:58:28,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=475120.0, ans=0.0 +2024-09-18 15:58:31,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=475120.0, ans=0.2 +2024-09-18 15:58:41,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.39 vs. limit=15.0 +2024-09-18 15:58:55,779 INFO [train.py:1198] (0/2) Epoch 27, batch 1150, loss[loss=0.2363, ctc_loss=0.12, cr_loss=0.3586, attn_decoder_loss=0.2413, over 29439.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1248, cr_loss=0.3658, attn_decoder_loss=0.2442, over 5754164.93 frames. ], batch size: 78, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:59:11,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=475240.0, ans=10.0 +2024-09-18 15:59:12,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=475240.0, ans=0.125 +2024-09-18 15:59:15,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=475240.0, ans=0.125 +2024-09-18 15:59:23,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=475240.0, ans=0.2 +2024-09-18 15:59:47,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=475320.0, ans=0.04949747468305833 +2024-09-18 15:59:54,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.14 vs. limit=15.0 +2024-09-18 16:00:04,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=475360.0, ans=0.125 +2024-09-18 16:00:10,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=475400.0, ans=0.1 +2024-09-18 16:00:11,933 INFO [train.py:1198] (0/2) Epoch 27, batch 1200, loss[loss=0.2496, ctc_loss=0.1328, cr_loss=0.3708, attn_decoder_loss=0.2543, over 29696.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1259, cr_loss=0.3676, attn_decoder_loss=0.2453, over 5746382.68 frames. ], batch size: 85, lr: 4.12e-03, grad_scale: 16.0 +2024-09-18 16:00:29,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-18 16:00:45,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=475480.0, ans=0.025 +2024-09-18 16:00:59,251 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.652e+01 9.107e+01 9.727e+01 1.637e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-18 16:01:04,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=475520.0, ans=10.0 +2024-09-18 16:01:04,309 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:01:25,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=475560.0, ans=0.0 +2024-09-18 16:01:27,920 INFO [train.py:1198] (0/2) Epoch 27, batch 1250, loss[loss=0.2572, ctc_loss=0.1349, cr_loss=0.3853, attn_decoder_loss=0.2622, over 29508.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1264, cr_loss=0.3694, attn_decoder_loss=0.2461, over 5773981.05 frames. ], batch size: 92, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:01:48,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.71 vs. limit=22.5 +2024-09-18 16:01:51,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=475640.0, ans=0.1 +2024-09-18 16:01:59,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.73 vs. limit=6.0 +2024-09-18 16:02:00,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=475680.0, ans=0.125 +2024-09-18 16:02:13,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=475680.0, ans=0.2 +2024-09-18 16:02:17,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=475720.0, ans=0.1 +2024-09-18 16:02:48,700 INFO [train.py:1198] (0/2) Epoch 27, batch 1300, loss[loss=0.2456, ctc_loss=0.1269, cr_loss=0.3701, attn_decoder_loss=0.2506, over 28188.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1261, cr_loss=0.3689, attn_decoder_loss=0.2455, over 5779113.18 frames. ], batch size: 111, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:02:52,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=475800.0, ans=0.0 +2024-09-18 16:02:58,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=475800.0, ans=0.125 +2024-09-18 16:03:11,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=475840.0, ans=0.125 +2024-09-18 16:03:20,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=475880.0, ans=0.5 +2024-09-18 16:03:28,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=475880.0, ans=0.2 +2024-09-18 16:03:31,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=475880.0, ans=0.035 +2024-09-18 16:03:35,951 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.380e+01 8.992e+01 9.418e+01 1.555e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-18 16:03:40,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=475920.0, ans=0.2 +2024-09-18 16:03:53,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=475960.0, ans=0.1 +2024-09-18 16:03:56,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=475960.0, ans=10.0 +2024-09-18 16:04:01,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=475960.0, ans=0.125 +2024-09-18 16:04:04,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.57 vs. limit=15.0 +2024-09-18 16:04:05,251 INFO [train.py:1198] (0/2) Epoch 27, batch 1350, loss[loss=0.246, ctc_loss=0.1244, cr_loss=0.3474, attn_decoder_loss=0.2518, over 29762.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1253, cr_loss=0.3672, attn_decoder_loss=0.2449, over 5796113.87 frames. ], batch size: 81, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:04:10,598 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.67 vs. limit=10.0 +2024-09-18 16:04:17,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=476000.0, ans=0.0 +2024-09-18 16:04:20,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=476040.0, ans=0.125 +2024-09-18 16:04:33,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=476080.0, ans=0.125 +2024-09-18 16:04:38,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=476080.0, ans=0.2 +2024-09-18 16:04:45,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=476080.0, ans=0.125 +2024-09-18 16:04:53,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=476120.0, ans=0.125 +2024-09-18 16:05:19,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=476200.0, ans=0.1 +2024-09-18 16:05:20,666 INFO [train.py:1198] (0/2) Epoch 27, batch 1400, loss[loss=0.21, ctc_loss=0.1116, cr_loss=0.333, attn_decoder_loss=0.2135, over 29586.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1252, cr_loss=0.367, attn_decoder_loss=0.2447, over 5807359.02 frames. ], batch size: 69, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:05:28,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=476200.0, ans=0.0 +2024-09-18 16:05:30,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=476200.0, ans=0.125 +2024-09-18 16:05:34,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=476240.0, ans=0.2 +2024-09-18 16:06:09,970 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.340e+01 8.342e+01 8.774e+01 9.500e+01 1.505e+02, threshold=1.755e+02, percent-clipped=0.0 +2024-09-18 16:06:38,572 INFO [train.py:1198] (0/2) Epoch 27, batch 1450, loss[loss=0.2516, ctc_loss=0.1374, cr_loss=0.3934, attn_decoder_loss=0.2555, over 29475.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1254, cr_loss=0.3675, attn_decoder_loss=0.245, over 5804397.22 frames. ], batch size: 94, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:06:45,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=476400.0, ans=0.2 +2024-09-18 16:07:11,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=476480.0, ans=0.1 +2024-09-18 16:07:23,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=476480.0, ans=0.1 +2024-09-18 16:07:26,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=476520.0, ans=0.125 +2024-09-18 16:07:56,834 INFO [train.py:1198] (0/2) Epoch 27, batch 1500, loss[loss=0.2413, ctc_loss=0.124, cr_loss=0.3767, attn_decoder_loss=0.246, over 29614.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1253, cr_loss=0.3679, attn_decoder_loss=0.2452, over 5805453.38 frames. ], batch size: 86, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:08:27,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=476680.0, ans=0.0 +2024-09-18 16:08:38,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=476680.0, ans=0.1 +2024-09-18 16:08:41,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=476720.0, ans=0.04949747468305833 +2024-09-18 16:08:44,404 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.578e+01 9.265e+01 1.012e+02 4.469e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-18 16:09:01,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=476760.0, ans=0.1 +2024-09-18 16:09:13,770 INFO [train.py:1198] (0/2) Epoch 27, batch 1550, loss[loss=0.2539, ctc_loss=0.1427, cr_loss=0.4061, attn_decoder_loss=0.2573, over 29472.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1256, cr_loss=0.3684, attn_decoder_loss=0.2453, over 5781541.65 frames. ], batch size: 90, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:09:47,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=476880.0, ans=0.2 +2024-09-18 16:09:57,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=476880.0, ans=0.0 +2024-09-18 16:10:00,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=476920.0, ans=0.125 +2024-09-18 16:10:08,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.48 vs. limit=15.0 +2024-09-18 16:10:15,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=476960.0, ans=0.1 +2024-09-18 16:10:17,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=476960.0, ans=0.0 +2024-09-18 16:10:31,961 INFO [train.py:1198] (0/2) Epoch 27, batch 1600, loss[loss=0.2441, ctc_loss=0.126, cr_loss=0.3702, attn_decoder_loss=0.249, over 29669.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1256, cr_loss=0.368, attn_decoder_loss=0.2452, over 5764444.30 frames. ], batch size: 85, lr: 4.12e-03, grad_scale: 16.0 +2024-09-18 16:10:51,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.43 vs. limit=15.0 +2024-09-18 16:10:55,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=477040.0, ans=0.125 +2024-09-18 16:10:55,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=477040.0, ans=0.125 +2024-09-18 16:11:23,076 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.401e+01 8.546e+01 9.000e+01 9.569e+01 2.285e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 16:11:26,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=477120.0, ans=0.125 +2024-09-18 16:11:37,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=477160.0, ans=0.125 +2024-09-18 16:11:40,694 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.94 vs. limit=15.0 +2024-09-18 16:11:50,231 INFO [train.py:1198] (0/2) Epoch 27, batch 1650, loss[loss=0.2503, ctc_loss=0.1274, cr_loss=0.3691, attn_decoder_loss=0.2558, over 29709.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1253, cr_loss=0.3674, attn_decoder_loss=0.2448, over 5759113.86 frames. ], batch size: 89, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:12:08,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=477240.0, ans=0.125 +2024-09-18 16:12:23,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=477280.0, ans=0.0 +2024-09-18 16:12:24,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.45 vs. limit=15.0 +2024-09-18 16:12:55,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=477360.0, ans=0.1 +2024-09-18 16:13:05,974 INFO [train.py:1198] (0/2) Epoch 27, batch 1700, loss[loss=0.2117, ctc_loss=0.1099, cr_loss=0.3377, attn_decoder_loss=0.2155, over 29579.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.125, cr_loss=0.3674, attn_decoder_loss=0.2448, over 5779223.19 frames. ], batch size: 69, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:13:20,226 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:13:45,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=477480.0, ans=0.0 +2024-09-18 16:13:56,897 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.516e+01 9.095e+01 9.729e+01 1.325e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-18 16:14:07,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=477560.0, ans=0.125 +2024-09-18 16:14:24,658 INFO [train.py:1198] (0/2) Epoch 27, batch 1750, loss[loss=0.2147, ctc_loss=0.1116, cr_loss=0.3515, attn_decoder_loss=0.2184, over 29344.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1248, cr_loss=0.3669, attn_decoder_loss=0.2444, over 5787471.15 frames. ], batch size: 67, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:14:42,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=477640.0, ans=0.125 +2024-09-18 16:14:45,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=477640.0, ans=0.04949747468305833 +2024-09-18 16:14:51,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=477640.0, ans=0.025 +2024-09-18 16:15:42,637 INFO [train.py:1198] (0/2) Epoch 27, batch 1800, loss[loss=0.2507, ctc_loss=0.1272, cr_loss=0.3613, attn_decoder_loss=0.2564, over 29691.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.125, cr_loss=0.3673, attn_decoder_loss=0.2447, over 5790555.29 frames. ], batch size: 83, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:15:50,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=477800.0, ans=0.2 +2024-09-18 16:16:00,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.33 vs. limit=15.0 +2024-09-18 16:16:17,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=477880.0, ans=0.125 +2024-09-18 16:16:31,638 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.468e+01 8.306e+01 8.965e+01 9.478e+01 1.194e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-18 16:16:35,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=477920.0, ans=0.125 +2024-09-18 16:16:35,801 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.43 vs. limit=15.0 +2024-09-18 16:16:45,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=477960.0, ans=0.125 +2024-09-18 16:16:55,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=477960.0, ans=0.95 +2024-09-18 16:16:59,471 INFO [train.py:1198] (0/2) Epoch 27, batch 1850, loss[loss=0.2491, ctc_loss=0.1188, cr_loss=0.359, attn_decoder_loss=0.2555, over 29627.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1246, cr_loss=0.3663, attn_decoder_loss=0.2442, over 5795156.91 frames. ], batch size: 86, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:17:06,226 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.28 vs. limit=12.0 +2024-09-18 16:17:16,750 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.29 vs. limit=22.5 +2024-09-18 16:17:19,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=478040.0, ans=0.125 +2024-09-18 16:17:44,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=478080.0, ans=0.125 +2024-09-18 16:17:47,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=478120.0, ans=0.0 +2024-09-18 16:17:55,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.07 vs. limit=22.5 +2024-09-18 16:18:12,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=478160.0, ans=0.035 +2024-09-18 16:18:17,764 INFO [train.py:1198] (0/2) Epoch 27, batch 1900, loss[loss=0.2456, ctc_loss=0.1226, cr_loss=0.3718, attn_decoder_loss=0.2509, over 29713.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1249, cr_loss=0.367, attn_decoder_loss=0.2448, over 5803278.60 frames. ], batch size: 89, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:18:31,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=478240.0, ans=0.125 +2024-09-18 16:18:44,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=478240.0, ans=0.125 +2024-09-18 16:18:57,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=478280.0, ans=0.1 +2024-09-18 16:19:01,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=478280.0, ans=0.0 +2024-09-18 16:19:01,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff3.min_abs, batch_count=478280.0, ans=0.2 +2024-09-18 16:19:08,922 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.594e+01 9.103e+01 9.777e+01 2.715e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 16:19:09,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=478320.0, ans=0.1 +2024-09-18 16:19:35,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=478400.0, ans=0.1 +2024-09-18 16:19:35,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=478400.0, ans=0.125 +2024-09-18 16:19:36,746 INFO [train.py:1198] (0/2) Epoch 27, batch 1950, loss[loss=0.2423, ctc_loss=0.1278, cr_loss=0.3703, attn_decoder_loss=0.2468, over 29458.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1259, cr_loss=0.3698, attn_decoder_loss=0.2462, over 5818434.88 frames. ], batch size: 78, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:19:38,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=478400.0, ans=0.1 +2024-09-18 16:20:12,024 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:20:25,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=478520.0, ans=0.125 +2024-09-18 16:20:26,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=478520.0, ans=0.025 +2024-09-18 16:20:52,625 INFO [train.py:1198] (0/2) Epoch 27, batch 2000, loss[loss=0.2188, ctc_loss=0.1169, cr_loss=0.3494, attn_decoder_loss=0.2224, over 29325.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1262, cr_loss=0.3699, attn_decoder_loss=0.2465, over 5795938.91 frames. ], batch size: 67, lr: 4.11e-03, grad_scale: 16.0 +2024-09-18 16:21:05,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=478600.0, ans=0.0 +2024-09-18 16:21:38,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.53 vs. limit=10.0 +2024-09-18 16:21:45,122 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.111e+01 8.586e+01 9.013e+01 9.702e+01 5.300e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-18 16:21:53,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.14 vs. limit=15.0 +2024-09-18 16:21:55,288 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.56 vs. limit=15.0 +2024-09-18 16:22:00,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=478760.0, ans=0.0 +2024-09-18 16:22:10,908 INFO [train.py:1198] (0/2) Epoch 27, batch 2050, loss[loss=0.211, ctc_loss=0.1039, cr_loss=0.3165, attn_decoder_loss=0.2159, over 29448.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1255, cr_loss=0.3684, attn_decoder_loss=0.2454, over 5788679.82 frames. ], batch size: 70, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:22:11,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=478800.0, ans=0.2 +2024-09-18 16:22:19,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.36 vs. limit=22.5 +2024-09-18 16:22:36,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=478840.0, ans=0.125 +2024-09-18 16:22:52,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=478880.0, ans=0.125 +2024-09-18 16:22:57,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=478920.0, ans=0.125 +2024-09-18 16:23:09,951 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=12.0 +2024-09-18 16:23:19,293 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.35 vs. limit=22.5 +2024-09-18 16:23:20,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=478960.0, ans=0.0 +2024-09-18 16:23:21,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=478960.0, ans=0.125 +2024-09-18 16:23:28,893 INFO [train.py:1198] (0/2) Epoch 27, batch 2100, loss[loss=0.2335, ctc_loss=0.1145, cr_loss=0.3557, attn_decoder_loss=0.2389, over 29750.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1249, cr_loss=0.3678, attn_decoder_loss=0.245, over 5802062.48 frames. ], batch size: 81, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:23:41,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=479000.0, ans=0.025 +2024-09-18 16:23:49,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=479040.0, ans=0.125 +2024-09-18 16:23:57,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=479080.0, ans=0.2 +2024-09-18 16:24:04,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.17 vs. limit=10.0 +2024-09-18 16:24:18,658 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.253e+01 8.787e+01 9.429e+01 1.232e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 16:24:38,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=479160.0, ans=0.1 +2024-09-18 16:24:45,041 INFO [train.py:1198] (0/2) Epoch 27, batch 2150, loss[loss=0.2394, ctc_loss=0.1233, cr_loss=0.3604, attn_decoder_loss=0.2443, over 29444.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1244, cr_loss=0.3666, attn_decoder_loss=0.2444, over 5816418.53 frames. ], batch size: 78, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:24:56,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=479200.0, ans=0.125 +2024-09-18 16:25:02,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=479240.0, ans=0.125 +2024-09-18 16:25:28,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=479280.0, ans=0.125 +2024-09-18 16:25:51,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=479360.0, ans=0.125 +2024-09-18 16:25:58,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.29 vs. limit=6.0 +2024-09-18 16:26:03,687 INFO [train.py:1198] (0/2) Epoch 27, batch 2200, loss[loss=0.2577, ctc_loss=0.1342, cr_loss=0.402, attn_decoder_loss=0.2625, over 29636.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1252, cr_loss=0.3684, attn_decoder_loss=0.2448, over 5812586.48 frames. ], batch size: 86, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:26:07,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=479400.0, ans=0.0 +2024-09-18 16:26:23,555 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:26:49,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=479480.0, ans=15.0 +2024-09-18 16:26:55,792 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.516e+01 8.471e+01 9.024e+01 9.757e+01 3.508e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-18 16:27:11,780 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-18 16:27:20,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=479600.0, ans=0.125 +2024-09-18 16:27:21,641 INFO [train.py:1198] (0/2) Epoch 27, batch 2250, loss[loss=0.2365, ctc_loss=0.1119, cr_loss=0.3285, attn_decoder_loss=0.243, over 29701.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.125, cr_loss=0.368, attn_decoder_loss=0.2447, over 5810801.06 frames. ], batch size: 82, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:27:44,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=479640.0, ans=0.025 +2024-09-18 16:27:53,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=479680.0, ans=0.5 +2024-09-18 16:28:01,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=479680.0, ans=0.0 +2024-09-18 16:28:10,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=479720.0, ans=0.95 +2024-09-18 16:28:22,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=479760.0, ans=0.025 +2024-09-18 16:28:24,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=479760.0, ans=0.0 +2024-09-18 16:28:37,661 INFO [train.py:1198] (0/2) Epoch 27, batch 2300, loss[loss=0.2152, ctc_loss=0.1078, cr_loss=0.3431, attn_decoder_loss=0.2195, over 29291.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1248, cr_loss=0.3675, attn_decoder_loss=0.244, over 5798696.72 frames. ], batch size: 71, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:28:42,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=479800.0, ans=0.1 +2024-09-18 16:28:55,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=479840.0, ans=0.125 +2024-09-18 16:29:24,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=479920.0, ans=0.0 +2024-09-18 16:29:29,673 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.039e+01 8.211e+01 8.889e+01 9.358e+01 1.563e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-18 16:29:31,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=479920.0, ans=0.2 +2024-09-18 16:29:42,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=479960.0, ans=0.1 +2024-09-18 16:29:54,606 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-120000.pt +2024-09-18 16:30:02,918 INFO [train.py:1198] (0/2) Epoch 27, batch 2350, loss[loss=0.2498, ctc_loss=0.1235, cr_loss=0.365, attn_decoder_loss=0.2557, over 29697.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1248, cr_loss=0.3673, attn_decoder_loss=0.2443, over 5803673.30 frames. ], batch size: 83, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:30:15,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=480000.0, ans=0.2 +2024-09-18 16:30:19,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=480040.0, ans=0.0 +2024-09-18 16:30:25,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=480040.0, ans=0.02 +2024-09-18 16:30:27,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=480040.0, ans=0.125 +2024-09-18 16:30:29,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.91 vs. limit=12.0 +2024-09-18 16:30:45,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=480080.0, ans=0.2 +2024-09-18 16:31:11,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=480160.0, ans=0.0 +2024-09-18 16:31:15,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=480160.0, ans=0.1 +2024-09-18 16:31:20,880 INFO [train.py:1198] (0/2) Epoch 27, batch 2400, loss[loss=0.2373, ctc_loss=0.1238, cr_loss=0.3798, attn_decoder_loss=0.2414, over 29540.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1251, cr_loss=0.368, attn_decoder_loss=0.2448, over 5807651.55 frames. ], batch size: 76, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:31:22,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=480200.0, ans=0.1 +2024-09-18 16:31:22,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=480200.0, ans=0.125 +2024-09-18 16:31:25,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=480200.0, ans=0.125 +2024-09-18 16:31:38,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=480240.0, ans=0.125 +2024-09-18 16:31:43,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.93 vs. limit=6.0 +2024-09-18 16:31:55,170 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.45 vs. limit=22.5 +2024-09-18 16:32:02,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=480280.0, ans=0.025 +2024-09-18 16:32:12,455 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.717e+01 9.101e+01 9.636e+01 2.464e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 16:32:24,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=480360.0, ans=0.0 +2024-09-18 16:32:36,819 INFO [train.py:1198] (0/2) Epoch 27, batch 2450, loss[loss=0.2473, ctc_loss=0.1347, cr_loss=0.3619, attn_decoder_loss=0.2518, over 29708.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1257, cr_loss=0.3693, attn_decoder_loss=0.2457, over 5783287.67 frames. ], batch size: 82, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:32:54,190 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.44 vs. limit=6.0 +2024-09-18 16:33:01,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=480440.0, ans=0.07 +2024-09-18 16:33:04,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=480440.0, ans=0.0 +2024-09-18 16:33:20,489 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.94 vs. limit=22.5 +2024-09-18 16:33:30,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=480520.0, ans=0.125 +2024-09-18 16:33:35,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=480520.0, ans=0.0 +2024-09-18 16:33:54,966 INFO [train.py:1198] (0/2) Epoch 27, batch 2500, loss[loss=0.246, ctc_loss=0.1175, cr_loss=0.3533, attn_decoder_loss=0.2524, over 29636.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1255, cr_loss=0.3689, attn_decoder_loss=0.2456, over 5794617.62 frames. ], batch size: 86, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:33:59,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=480600.0, ans=0.125 +2024-09-18 16:34:05,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=480600.0, ans=0.025 +2024-09-18 16:34:21,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=480640.0, ans=0.2 +2024-09-18 16:34:24,612 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:34:24,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=480680.0, ans=0.125 +2024-09-18 16:34:35,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.15 vs. limit=12.0 +2024-09-18 16:34:36,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=480680.0, ans=0.0 +2024-09-18 16:34:43,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=480720.0, ans=0.2 +2024-09-18 16:34:49,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.457e+01 8.825e+01 9.370e+01 1.600e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-18 16:35:14,346 INFO [train.py:1198] (0/2) Epoch 27, batch 2550, loss[loss=0.2202, ctc_loss=0.1125, cr_loss=0.3374, attn_decoder_loss=0.2246, over 29326.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1254, cr_loss=0.3684, attn_decoder_loss=0.2454, over 5797330.63 frames. ], batch size: 67, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:35:19,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=480800.0, ans=0.125 +2024-09-18 16:35:25,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=480800.0, ans=0.0 +2024-09-18 16:35:34,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=480840.0, ans=0.1 +2024-09-18 16:35:37,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=480840.0, ans=0.04949747468305833 +2024-09-18 16:35:49,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=480880.0, ans=0.125 +2024-09-18 16:35:55,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=480880.0, ans=0.07 +2024-09-18 16:36:01,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=480920.0, ans=0.125 +2024-09-18 16:36:03,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=480920.0, ans=0.125 +2024-09-18 16:36:30,359 INFO [train.py:1198] (0/2) Epoch 27, batch 2600, loss[loss=0.2382, ctc_loss=0.126, cr_loss=0.3867, attn_decoder_loss=0.242, over 29446.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1252, cr_loss=0.3683, attn_decoder_loss=0.2457, over 5794059.86 frames. ], batch size: 78, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:36:47,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=481040.0, ans=0.125 +2024-09-18 16:36:56,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=481040.0, ans=0.1 +2024-09-18 16:37:15,300 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.49 vs. limit=15.0 +2024-09-18 16:37:15,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=481080.0, ans=0.125 +2024-09-18 16:37:24,585 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.268e+01 8.374e+01 8.989e+01 9.651e+01 1.905e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-18 16:37:48,743 INFO [train.py:1198] (0/2) Epoch 27, batch 2650, loss[loss=0.2677, ctc_loss=0.1472, cr_loss=0.4027, attn_decoder_loss=0.2721, over 29241.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1255, cr_loss=0.3691, attn_decoder_loss=0.246, over 5800935.59 frames. ], batch size: 100, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:38:02,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=481240.0, ans=0.0 +2024-09-18 16:38:16,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=12.0 +2024-09-18 16:38:19,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=481280.0, ans=0.125 +2024-09-18 16:38:22,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=481280.0, ans=0.5 +2024-09-18 16:38:28,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=481280.0, ans=0.0 +2024-09-18 16:38:45,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=481320.0, ans=0.0 +2024-09-18 16:38:53,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=481360.0, ans=0.1 +2024-09-18 16:39:05,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=481400.0, ans=0.05 +2024-09-18 16:39:06,855 INFO [train.py:1198] (0/2) Epoch 27, batch 2700, loss[loss=0.2663, ctc_loss=0.1447, cr_loss=0.4078, attn_decoder_loss=0.2707, over 29536.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.126, cr_loss=0.3697, attn_decoder_loss=0.2464, over 5796443.58 frames. ], batch size: 87, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:39:22,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=481440.0, ans=0.125 +2024-09-18 16:39:22,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=481440.0, ans=0.125 +2024-09-18 16:39:22,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=481440.0, ans=0.0 +2024-09-18 16:39:58,199 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.481e+01 8.531e+01 8.958e+01 9.495e+01 1.703e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 16:40:03,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.31 vs. limit=15.0 +2024-09-18 16:40:19,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=481560.0, ans=0.125 +2024-09-18 16:40:22,498 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.30 vs. limit=12.0 +2024-09-18 16:40:23,152 INFO [train.py:1198] (0/2) Epoch 27, batch 2750, loss[loss=0.2253, ctc_loss=0.1131, cr_loss=0.3509, attn_decoder_loss=0.2299, over 29523.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1252, cr_loss=0.3678, attn_decoder_loss=0.245, over 5794909.23 frames. ], batch size: 75, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:40:33,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.46 vs. limit=22.5 +2024-09-18 16:40:36,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.37 vs. limit=15.0 +2024-09-18 16:40:43,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=481640.0, ans=0.125 +2024-09-18 16:40:44,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=481640.0, ans=0.05 +2024-09-18 16:40:46,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=481640.0, ans=0.125 +2024-09-18 16:41:12,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=481720.0, ans=0.125 +2024-09-18 16:41:15,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.39 vs. limit=22.5 +2024-09-18 16:41:17,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=481720.0, ans=0.0 +2024-09-18 16:41:29,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=481760.0, ans=0.2 +2024-09-18 16:41:41,644 INFO [train.py:1198] (0/2) Epoch 27, batch 2800, loss[loss=0.2631, ctc_loss=0.157, cr_loss=0.4018, attn_decoder_loss=0.266, over 20048.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1253, cr_loss=0.3675, attn_decoder_loss=0.245, over 5776233.41 frames. ], batch size: 209, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:41:55,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=481840.0, ans=0.0 +2024-09-18 16:41:57,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=481840.0, ans=0.0 +2024-09-18 16:42:01,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=481840.0, ans=0.0 +2024-09-18 16:42:09,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=481840.0, ans=0.125 +2024-09-18 16:42:13,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=481880.0, ans=0.125 +2024-09-18 16:42:25,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=481920.0, ans=0.1 +2024-09-18 16:42:34,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=481920.0, ans=10.0 +2024-09-18 16:42:35,147 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.963e+01 8.581e+01 9.268e+01 9.879e+01 2.017e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-18 16:42:43,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=481960.0, ans=0.2 +2024-09-18 16:42:47,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=481960.0, ans=0.0 +2024-09-18 16:42:59,618 INFO [train.py:1198] (0/2) Epoch 27, batch 2850, loss[loss=0.2342, ctc_loss=0.1266, cr_loss=0.3715, attn_decoder_loss=0.2379, over 29503.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1263, cr_loss=0.3695, attn_decoder_loss=0.2457, over 5761849.93 frames. ], batch size: 77, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:43:34,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=482080.0, ans=0.0 +2024-09-18 16:43:43,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=482120.0, ans=0.125 +2024-09-18 16:43:52,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.01 vs. limit=15.0 +2024-09-18 16:43:57,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=482120.0, ans=0.125 +2024-09-18 16:44:07,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=482160.0, ans=0.125 +2024-09-18 16:44:15,381 INFO [train.py:1198] (0/2) Epoch 27, batch 2900, loss[loss=0.2326, ctc_loss=0.1167, cr_loss=0.356, attn_decoder_loss=0.2376, over 29424.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1266, cr_loss=0.3705, attn_decoder_loss=0.2466, over 5787281.01 frames. ], batch size: 79, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:44:18,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=482200.0, ans=0.0 +2024-09-18 16:44:20,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=482200.0, ans=0.1 +2024-09-18 16:44:36,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_na.min_abs, batch_count=482240.0, ans=0.02 +2024-09-18 16:44:40,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.40 vs. limit=15.0 +2024-09-18 16:45:04,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=482320.0, ans=0.125 +2024-09-18 16:45:12,316 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.581e+01 8.546e+01 8.987e+01 9.686e+01 7.083e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 16:45:15,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=482320.0, ans=0.09899494936611666 +2024-09-18 16:45:21,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=482360.0, ans=10.0 +2024-09-18 16:45:28,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.61 vs. limit=15.0 +2024-09-18 16:45:33,863 INFO [train.py:1198] (0/2) Epoch 27, batch 2950, loss[loss=0.234, ctc_loss=0.1211, cr_loss=0.3629, attn_decoder_loss=0.2385, over 29540.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1254, cr_loss=0.3677, attn_decoder_loss=0.2452, over 5782023.57 frames. ], batch size: 75, lr: 4.09e-03, grad_scale: 4.0 +2024-09-18 16:45:35,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=482400.0, ans=0.125 +2024-09-18 16:45:50,865 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:46:15,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=482480.0, ans=0.1 +2024-09-18 16:46:38,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=482560.0, ans=0.09899494936611666 +2024-09-18 16:46:48,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=482560.0, ans=0.2 +2024-09-18 16:46:52,446 INFO [train.py:1198] (0/2) Epoch 27, batch 3000, loss[loss=0.2291, ctc_loss=0.1205, cr_loss=0.3475, attn_decoder_loss=0.2334, over 29770.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1251, cr_loss=0.3671, attn_decoder_loss=0.245, over 5783858.03 frames. ], batch size: 81, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:46:52,446 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 16:47:10,904 INFO [train.py:1230] (0/2) Epoch 27, validation: loss=0.212, ctc_loss=0.03868, cr_loss=6.15e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 16:47:10,905 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 16:47:11,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=482600.0, ans=0.0 +2024-09-18 16:47:11,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-18 16:47:40,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.52 vs. limit=15.0 +2024-09-18 16:47:43,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=482680.0, ans=0.0 +2024-09-18 16:47:52,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=482680.0, ans=0.1 +2024-09-18 16:48:05,514 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.646e+01 9.161e+01 1.019e+02 2.247e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-18 16:48:16,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=482760.0, ans=0.1 +2024-09-18 16:48:26,847 INFO [train.py:1198] (0/2) Epoch 27, batch 3050, loss[loss=0.2301, ctc_loss=0.113, cr_loss=0.3387, attn_decoder_loss=0.2356, over 29529.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1256, cr_loss=0.3673, attn_decoder_loss=0.2458, over 5776509.67 frames. ], batch size: 76, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:48:38,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=482800.0, ans=0.05 +2024-09-18 16:48:40,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=482800.0, ans=0.0 +2024-09-18 16:48:58,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=482880.0, ans=0.125 +2024-09-18 16:49:20,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=482920.0, ans=0.0 +2024-09-18 16:49:30,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=482960.0, ans=0.125 +2024-09-18 16:49:40,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=482960.0, ans=0.1 +2024-09-18 16:49:42,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=482960.0, ans=0.125 +2024-09-18 16:49:44,756 INFO [train.py:1198] (0/2) Epoch 27, batch 3100, loss[loss=0.2479, ctc_loss=0.1343, cr_loss=0.3847, attn_decoder_loss=0.252, over 29307.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1251, cr_loss=0.3666, attn_decoder_loss=0.2452, over 5776045.98 frames. ], batch size: 100, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:49:46,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=483000.0, ans=0.125 +2024-09-18 16:50:12,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=483040.0, ans=0.125 +2024-09-18 16:50:13,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=483080.0, ans=0.125 +2024-09-18 16:50:14,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.85 vs. limit=12.0 +2024-09-18 16:50:15,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=483080.0, ans=0.125 +2024-09-18 16:50:22,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.45 vs. limit=22.5 +2024-09-18 16:50:32,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=483120.0, ans=0.0 +2024-09-18 16:50:41,518 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.612e+01 9.047e+01 9.758e+01 3.006e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 16:50:43,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=483120.0, ans=0.0 +2024-09-18 16:51:03,302 INFO [train.py:1198] (0/2) Epoch 27, batch 3150, loss[loss=0.2599, ctc_loss=0.1402, cr_loss=0.388, attn_decoder_loss=0.2646, over 28832.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1251, cr_loss=0.3669, attn_decoder_loss=0.2452, over 5782399.03 frames. ], batch size: 104, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:51:51,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=483320.0, ans=0.0 +2024-09-18 16:51:59,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=483320.0, ans=0.125 +2024-09-18 16:52:03,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=483360.0, ans=0.125 +2024-09-18 16:52:16,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=483360.0, ans=0.025 +2024-09-18 16:52:18,921 INFO [train.py:1198] (0/2) Epoch 27, batch 3200, loss[loss=0.2404, ctc_loss=0.1303, cr_loss=0.3872, attn_decoder_loss=0.244, over 29412.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1247, cr_loss=0.3668, attn_decoder_loss=0.2445, over 5792648.46 frames. ], batch size: 79, lr: 4.09e-03, grad_scale: 16.0 +2024-09-18 16:52:22,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=483400.0, ans=0.2 +2024-09-18 16:52:24,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.74 vs. limit=22.5 +2024-09-18 16:52:30,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=483400.0, ans=0.0 +2024-09-18 16:52:52,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=483480.0, ans=0.0 +2024-09-18 16:53:16,077 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.193e+01 8.478e+01 8.969e+01 9.595e+01 1.807e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 16:53:24,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=483560.0, ans=0.125 +2024-09-18 16:53:27,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=483560.0, ans=0.2 +2024-09-18 16:53:34,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.16 vs. limit=22.5 +2024-09-18 16:53:37,346 INFO [train.py:1198] (0/2) Epoch 27, batch 3250, loss[loss=0.2437, ctc_loss=0.1227, cr_loss=0.356, attn_decoder_loss=0.2492, over 29696.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1248, cr_loss=0.3674, attn_decoder_loss=0.245, over 5799421.93 frames. ], batch size: 84, lr: 4.09e-03, grad_scale: 16.0 +2024-09-18 16:53:46,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=483600.0, ans=0.125 +2024-09-18 16:53:49,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=483600.0, ans=0.0 +2024-09-18 16:53:49,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=483600.0, ans=0.125 +2024-09-18 16:53:49,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=483600.0, ans=0.1 +2024-09-18 16:54:07,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=483680.0, ans=0.2 +2024-09-18 16:54:34,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.12 vs. limit=15.0 +2024-09-18 16:54:54,880 INFO [train.py:1198] (0/2) Epoch 27, batch 3300, loss[loss=0.2473, ctc_loss=0.1255, cr_loss=0.3667, attn_decoder_loss=0.2526, over 28316.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.124, cr_loss=0.3657, attn_decoder_loss=0.2439, over 5795434.50 frames. ], batch size: 111, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:55:04,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=483800.0, ans=0.125 +2024-09-18 16:55:22,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=483840.0, ans=0.1 +2024-09-18 16:55:33,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=483880.0, ans=0.125 +2024-09-18 16:55:33,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.50 vs. limit=15.0 +2024-09-18 16:55:50,532 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.603e+01 8.485e+01 9.035e+01 9.621e+01 1.592e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-18 16:56:00,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=483960.0, ans=0.0 +2024-09-18 16:56:09,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=484000.0, ans=0.05 +2024-09-18 16:56:10,664 INFO [train.py:1198] (0/2) Epoch 27, batch 3350, loss[loss=0.2529, ctc_loss=0.13, cr_loss=0.3661, attn_decoder_loss=0.2585, over 28822.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1251, cr_loss=0.3674, attn_decoder_loss=0.2449, over 5774139.39 frames. ], batch size: 104, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:56:17,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=484000.0, ans=0.125 +2024-09-18 16:56:25,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=484000.0, ans=0.1 +2024-09-18 16:56:37,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=484040.0, ans=0.0 +2024-09-18 16:57:05,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=484120.0, ans=0.125 +2024-09-18 16:57:11,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.50 vs. limit=10.0 +2024-09-18 16:57:12,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484160.0, ans=0.1 +2024-09-18 16:57:28,520 INFO [train.py:1198] (0/2) Epoch 27, batch 3400, loss[loss=0.218, ctc_loss=0.1137, cr_loss=0.3378, attn_decoder_loss=0.2221, over 29368.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1256, cr_loss=0.368, attn_decoder_loss=0.2451, over 5767629.20 frames. ], batch size: 67, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:58:00,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=484280.0, ans=0.125 +2024-09-18 16:58:07,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=484280.0, ans=0.5 +2024-09-18 16:58:26,756 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.585e+01 9.028e+01 9.662e+01 1.590e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 16:58:32,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.54 vs. limit=6.0 +2024-09-18 16:58:36,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.44 vs. limit=15.0 +2024-09-18 16:58:39,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=484360.0, ans=0.0 +2024-09-18 16:58:46,435 INFO [train.py:1198] (0/2) Epoch 27, batch 3450, loss[loss=0.2433, ctc_loss=0.119, cr_loss=0.3456, attn_decoder_loss=0.2494, over 28326.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1256, cr_loss=0.3679, attn_decoder_loss=0.2452, over 5774992.32 frames. ], batch size: 111, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:58:58,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=484400.0, ans=0.035 +2024-09-18 16:59:09,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=484440.0, ans=0.125 +2024-09-18 16:59:14,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=484440.0, ans=0.1 +2024-09-18 16:59:15,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484480.0, ans=0.1 +2024-09-18 16:59:18,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=484480.0, ans=0.0 +2024-09-18 16:59:39,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=484520.0, ans=0.1 +2024-09-18 16:59:50,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=484560.0, ans=0.1 +2024-09-18 17:00:04,106 INFO [train.py:1198] (0/2) Epoch 27, batch 3500, loss[loss=0.2197, ctc_loss=0.1048, cr_loss=0.3193, attn_decoder_loss=0.2254, over 29308.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1255, cr_loss=0.3675, attn_decoder_loss=0.2448, over 5777172.87 frames. ], batch size: 71, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 17:00:44,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=484680.0, ans=0.125 +2024-09-18 17:00:59,675 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.562e+01 8.977e+01 9.669e+01 2.220e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-18 17:01:02,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=484760.0, ans=0.0 +2024-09-18 17:01:19,574 INFO [train.py:1198] (0/2) Epoch 27, batch 3550, loss[loss=0.2569, ctc_loss=0.1339, cr_loss=0.4018, attn_decoder_loss=0.2617, over 29727.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1253, cr_loss=0.3677, attn_decoder_loss=0.2448, over 5783914.34 frames. ], batch size: 89, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:02:08,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=484920.0, ans=0.125 +2024-09-18 17:02:33,738 INFO [train.py:1198] (0/2) Epoch 27, batch 3600, loss[loss=0.2446, ctc_loss=0.1298, cr_loss=0.3763, attn_decoder_loss=0.249, over 29498.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1251, cr_loss=0.3673, attn_decoder_loss=0.2449, over 5792005.68 frames. ], batch size: 77, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:02:43,084 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:02:50,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=485040.0, ans=0.05 +2024-09-18 17:03:19,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=485120.0, ans=0.125 +2024-09-18 17:03:30,862 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.309e+01 8.400e+01 9.013e+01 9.523e+01 1.334e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 17:03:32,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=485120.0, ans=0.125 +2024-09-18 17:03:35,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=485160.0, ans=0.125 +2024-09-18 17:03:42,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=485160.0, ans=0.125 +2024-09-18 17:03:45,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=485160.0, ans=0.0 +2024-09-18 17:03:50,129 INFO [train.py:1198] (0/2) Epoch 27, batch 3650, loss[loss=0.2578, ctc_loss=0.138, cr_loss=0.3956, attn_decoder_loss=0.2623, over 29495.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1251, cr_loss=0.3674, attn_decoder_loss=0.2446, over 5794148.14 frames. ], batch size: 90, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:03:50,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=485200.0, ans=0.125 +2024-09-18 17:03:53,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=485200.0, ans=0.025 +2024-09-18 17:04:06,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=485240.0, ans=0.0 +2024-09-18 17:04:19,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.80 vs. limit=15.0 +2024-09-18 17:04:23,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=485280.0, ans=0.125 +2024-09-18 17:04:23,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=485280.0, ans=0.1 +2024-09-18 17:04:29,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff2.min_abs, batch_count=485280.0, ans=0.1 +2024-09-18 17:04:29,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=485280.0, ans=0.125 +2024-09-18 17:04:36,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.99 vs. limit=15.0 +2024-09-18 17:04:38,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=485320.0, ans=0.0 +2024-09-18 17:05:04,962 INFO [train.py:1198] (0/2) Epoch 27, batch 3700, loss[loss=0.2461, ctc_loss=0.1258, cr_loss=0.3747, attn_decoder_loss=0.2511, over 29697.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1252, cr_loss=0.3679, attn_decoder_loss=0.2449, over 5804420.64 frames. ], batch size: 84, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:05:12,122 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.29 vs. limit=15.0 +2024-09-18 17:05:49,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=485520.0, ans=0.125 +2024-09-18 17:05:55,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=485520.0, ans=0.0 +2024-09-18 17:06:00,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=485520.0, ans=0.0 +2024-09-18 17:06:00,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=485520.0, ans=0.0 +2024-09-18 17:06:01,403 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.031e+01 8.546e+01 8.927e+01 9.450e+01 1.781e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 17:06:04,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=485560.0, ans=0.125 +2024-09-18 17:06:18,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=485560.0, ans=0.0 +2024-09-18 17:06:21,381 INFO [train.py:1198] (0/2) Epoch 27, batch 3750, loss[loss=0.2184, ctc_loss=0.1088, cr_loss=0.3288, attn_decoder_loss=0.2233, over 29311.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1251, cr_loss=0.368, attn_decoder_loss=0.2446, over 5807492.06 frames. ], batch size: 67, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:06:23,845 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.84 vs. limit=15.0 +2024-09-18 17:06:24,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=485600.0, ans=0.125 +2024-09-18 17:06:32,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=485600.0, ans=0.2 +2024-09-18 17:06:43,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=485640.0, ans=0.125 +2024-09-18 17:06:51,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=485680.0, ans=0.125 +2024-09-18 17:07:06,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=485720.0, ans=0.125 +2024-09-18 17:07:35,760 INFO [train.py:1198] (0/2) Epoch 27, batch 3800, loss[loss=0.2502, ctc_loss=0.1296, cr_loss=0.3728, attn_decoder_loss=0.2553, over 29641.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1254, cr_loss=0.3687, attn_decoder_loss=0.2446, over 5798210.51 frames. ], batch size: 86, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:07:36,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=485800.0, ans=0.2 +2024-09-18 17:07:48,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=485800.0, ans=0.0 +2024-09-18 17:08:22,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.65 vs. limit=15.0 +2024-09-18 17:08:25,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=485920.0, ans=0.0 +2024-09-18 17:08:31,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=485920.0, ans=0.125 +2024-09-18 17:08:32,466 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.128e+01 8.550e+01 9.227e+01 9.705e+01 1.468e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-18 17:08:35,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=485960.0, ans=0.0 +2024-09-18 17:08:37,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=485960.0, ans=0.125 +2024-09-18 17:08:49,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=486000.0, ans=0.0 +2024-09-18 17:08:50,198 INFO [train.py:1198] (0/2) Epoch 27, batch 3850, loss[loss=0.255, ctc_loss=0.1325, cr_loss=0.3768, attn_decoder_loss=0.2602, over 29310.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1252, cr_loss=0.3688, attn_decoder_loss=0.2447, over 5812804.19 frames. ], batch size: 100, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:08:57,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=486000.0, ans=0.1 +2024-09-18 17:08:57,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=486000.0, ans=0.09899494936611666 +2024-09-18 17:09:14,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.03 vs. limit=15.0 +2024-09-18 17:09:30,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=486080.0, ans=0.125 +2024-09-18 17:09:52,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.63 vs. limit=22.5 +2024-09-18 17:10:06,033 INFO [train.py:1198] (0/2) Epoch 27, batch 3900, loss[loss=0.2488, ctc_loss=0.1242, cr_loss=0.3373, attn_decoder_loss=0.2551, over 29635.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1252, cr_loss=0.3687, attn_decoder_loss=0.2449, over 5816900.57 frames. ], batch size: 86, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:10:26,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=486240.0, ans=0.1 +2024-09-18 17:10:35,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=486280.0, ans=0.125 +2024-09-18 17:10:41,807 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:10:43,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff3.min_abs, batch_count=486280.0, ans=0.2 +2024-09-18 17:11:02,351 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.580e+01 9.073e+01 9.587e+01 1.534e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 17:11:07,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=486360.0, ans=10.0 +2024-09-18 17:11:20,657 INFO [train.py:1198] (0/2) Epoch 27, batch 3950, loss[loss=0.2541, ctc_loss=0.1306, cr_loss=0.3947, attn_decoder_loss=0.2591, over 29499.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1246, cr_loss=0.3679, attn_decoder_loss=0.2445, over 5836080.57 frames. ], batch size: 97, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:11:25,412 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:12:11,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=486520.0, ans=0.125 +2024-09-18 17:12:28,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=486560.0, ans=0.0 +2024-09-18 17:12:35,440 INFO [train.py:1198] (0/2) Epoch 27, batch 4000, loss[loss=0.2239, ctc_loss=0.1078, cr_loss=0.3326, attn_decoder_loss=0.2295, over 29531.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1247, cr_loss=0.3675, attn_decoder_loss=0.2446, over 5812415.69 frames. ], batch size: 74, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:13:02,999 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.44 vs. limit=10.0 +2024-09-18 17:13:06,770 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:13:08,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=486680.0, ans=0.125 +2024-09-18 17:13:33,403 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.629e+01 8.740e+01 9.217e+01 9.696e+01 1.612e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-18 17:13:49,531 INFO [train.py:1198] (0/2) Epoch 27, batch 4050, loss[loss=0.2636, ctc_loss=0.1596, cr_loss=0.3991, attn_decoder_loss=0.2663, over 20779.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1248, cr_loss=0.3674, attn_decoder_loss=0.2444, over 5796739.94 frames. ], batch size: 211, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:13:52,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=486800.0, ans=0.125 +2024-09-18 17:13:54,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=486800.0, ans=0.125 +2024-09-18 17:13:55,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=486800.0, ans=0.0 +2024-09-18 17:14:07,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=486840.0, ans=0.09899494936611666 +2024-09-18 17:14:26,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=486880.0, ans=0.1 +2024-09-18 17:15:03,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.65 vs. limit=22.5 +2024-09-18 17:15:04,289 INFO [train.py:1198] (0/2) Epoch 27, batch 4100, loss[loss=0.2573, ctc_loss=0.139, cr_loss=0.4108, attn_decoder_loss=0.2613, over 29492.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1252, cr_loss=0.3679, attn_decoder_loss=0.2448, over 5792473.89 frames. ], batch size: 90, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:15:17,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=487040.0, ans=0.125 +2024-09-18 17:15:37,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=487080.0, ans=0.1 +2024-09-18 17:15:46,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.35 vs. limit=15.0 +2024-09-18 17:16:03,239 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.595e+01 8.410e+01 8.915e+01 9.592e+01 1.452e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-18 17:16:13,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=487160.0, ans=0.1 +2024-09-18 17:16:19,986 INFO [train.py:1198] (0/2) Epoch 27, batch 4150, loss[loss=0.231, ctc_loss=0.1116, cr_loss=0.3421, attn_decoder_loss=0.2367, over 29503.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1249, cr_loss=0.3674, attn_decoder_loss=0.2447, over 5798520.97 frames. ], batch size: 77, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:16:29,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=487200.0, ans=0.0 +2024-09-18 17:16:58,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=487280.0, ans=0.025 +2024-09-18 17:16:59,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=487280.0, ans=0.125 +2024-09-18 17:17:06,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=487320.0, ans=0.2 +2024-09-18 17:17:20,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=487360.0, ans=0.125 +2024-09-18 17:17:32,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=487400.0, ans=0.0 +2024-09-18 17:17:33,697 INFO [train.py:1198] (0/2) Epoch 27, batch 4200, loss[loss=0.262, ctc_loss=0.1476, cr_loss=0.4338, attn_decoder_loss=0.2651, over 29519.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1251, cr_loss=0.3678, attn_decoder_loss=0.2451, over 5800317.11 frames. ], batch size: 90, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:17:37,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-18 17:17:47,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=487440.0, ans=0.0 +2024-09-18 17:17:55,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.71 vs. limit=12.0 +2024-09-18 17:18:32,352 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.514e+01 8.963e+01 9.288e+01 3.975e+02, threshold=1.793e+02, percent-clipped=1.0 +2024-09-18 17:18:48,514 INFO [train.py:1198] (0/2) Epoch 27, batch 4250, loss[loss=0.2266, ctc_loss=0.1141, cr_loss=0.3409, attn_decoder_loss=0.2315, over 29520.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1253, cr_loss=0.3683, attn_decoder_loss=0.2452, over 5806147.75 frames. ], batch size: 74, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:18:54,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=487600.0, ans=0.07 +2024-09-18 17:19:06,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=487640.0, ans=0.0 +2024-09-18 17:19:09,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=487640.0, ans=0.125 +2024-09-18 17:19:25,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=487680.0, ans=0.125 +2024-09-18 17:20:02,926 INFO [train.py:1198] (0/2) Epoch 27, batch 4300, loss[loss=0.2575, ctc_loss=0.1386, cr_loss=0.4013, attn_decoder_loss=0.2618, over 29536.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1255, cr_loss=0.3693, attn_decoder_loss=0.2455, over 5795622.57 frames. ], batch size: 87, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:20:04,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=487800.0, ans=0.125 +2024-09-18 17:20:19,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=487840.0, ans=0.0 +2024-09-18 17:20:46,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=487920.0, ans=0.5 +2024-09-18 17:21:00,647 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.078e+01 8.751e+01 9.154e+01 9.778e+01 2.419e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-18 17:21:17,483 INFO [train.py:1198] (0/2) Epoch 27, batch 4350, loss[loss=0.2539, ctc_loss=0.1349, cr_loss=0.4017, attn_decoder_loss=0.2582, over 29423.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.128, cr_loss=0.3741, attn_decoder_loss=0.2485, over 5797573.24 frames. ], batch size: 97, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:21:20,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=488000.0, ans=0.2 +2024-09-18 17:21:25,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=488000.0, ans=0.125 +2024-09-18 17:21:43,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=488040.0, ans=0.125 +2024-09-18 17:21:44,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=488040.0, ans=0.125 +2024-09-18 17:21:53,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=488080.0, ans=0.125 +2024-09-18 17:21:55,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=488080.0, ans=0.125 +2024-09-18 17:21:56,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=488080.0, ans=0.1 +2024-09-18 17:22:03,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=488120.0, ans=0.0 +2024-09-18 17:22:22,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=488160.0, ans=0.1 +2024-09-18 17:22:28,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.72 vs. limit=10.0 +2024-09-18 17:22:32,264 INFO [train.py:1198] (0/2) Epoch 27, batch 4400, loss[loss=0.2621, ctc_loss=0.144, cr_loss=0.4197, attn_decoder_loss=0.2659, over 27691.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1293, cr_loss=0.3765, attn_decoder_loss=0.2506, over 5768792.46 frames. ], batch size: 125, lr: 4.07e-03, grad_scale: 16.0 +2024-09-18 17:22:42,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=488200.0, ans=0.125 +2024-09-18 17:22:47,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.67 vs. limit=15.0 +2024-09-18 17:22:53,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=488240.0, ans=0.125 +2024-09-18 17:22:56,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=488240.0, ans=0.125 +2024-09-18 17:23:00,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.34 vs. limit=15.0 +2024-09-18 17:23:07,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=488280.0, ans=0.0 +2024-09-18 17:23:12,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=488280.0, ans=0.1 +2024-09-18 17:23:29,590 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.043e+01 8.897e+01 9.375e+01 9.833e+01 4.108e+02, threshold=1.875e+02, percent-clipped=1.0 +2024-09-18 17:23:42,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=488360.0, ans=0.125 +2024-09-18 17:23:46,272 INFO [train.py:1198] (0/2) Epoch 27, batch 4450, loss[loss=0.2672, ctc_loss=0.1664, cr_loss=0.416, attn_decoder_loss=0.2692, over 20209.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1333, cr_loss=0.3819, attn_decoder_loss=0.2528, over 5583409.54 frames. ], batch size: 210, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:23:50,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=488400.0, ans=0.07 +2024-09-18 17:24:00,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=488440.0, ans=0.125 +2024-09-18 17:24:03,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=488440.0, ans=0.0 +2024-09-18 17:24:28,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=488480.0, ans=0.125 +2024-09-18 17:24:38,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=488520.0, ans=0.0 +2024-09-18 17:24:43,596 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.90 vs. limit=6.0 +2024-09-18 17:24:55,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=488560.0, ans=0.125 +2024-09-18 17:24:55,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=488560.0, ans=0.125 +2024-09-18 17:25:02,154 INFO [train.py:1198] (0/2) Epoch 27, batch 4500, loss[loss=0.2558, ctc_loss=0.1455, cr_loss=0.3984, attn_decoder_loss=0.2592, over 20015.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1366, cr_loss=0.3828, attn_decoder_loss=0.2548, over 5238580.84 frames. ], batch size: 209, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:25:25,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=488640.0, ans=0.1 +2024-09-18 17:25:29,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=488640.0, ans=0.05 +2024-09-18 17:25:39,617 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-27.pt +2024-09-18 17:26:25,154 INFO [train.py:1198] (0/2) Epoch 28, batch 0, loss[loss=0.2157, ctc_loss=0.09692, cr_loss=0.3006, attn_decoder_loss=0.2222, over 29581.00 frames. ], tot_loss[loss=0.2157, ctc_loss=0.09692, cr_loss=0.3006, attn_decoder_loss=0.2222, over 29581.00 frames. ], batch size: 73, lr: 3.99e-03, grad_scale: 16.0 +2024-09-18 17:26:25,155 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 17:26:45,481 INFO [train.py:1230] (0/2) Epoch 28, validation: loss=0.2131, ctc_loss=0.0377, cr_loss=5.605e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-18 17:26:45,481 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 17:27:07,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=488740.0, ans=0.025 +2024-09-18 17:27:07,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=488740.0, ans=0.125 +2024-09-18 17:27:07,701 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.53 vs. limit=15.0 +2024-09-18 17:27:09,720 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.684e+01 1.052e+02 1.136e+02 1.230e+02 3.342e+02, threshold=2.271e+02, percent-clipped=3.0 +2024-09-18 17:27:10,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=10.36 vs. limit=12.0 +2024-09-18 17:27:23,295 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.55 vs. limit=15.0 +2024-09-18 17:27:34,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=488820.0, ans=0.125 +2024-09-18 17:27:54,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=488860.0, ans=0.0 +2024-09-18 17:28:01,705 INFO [train.py:1198] (0/2) Epoch 28, batch 50, loss[loss=0.2166, ctc_loss=0.1101, cr_loss=0.3457, attn_decoder_loss=0.2208, over 29403.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1269, cr_loss=0.3721, attn_decoder_loss=0.2462, over 1268783.47 frames. ], batch size: 70, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:28:17,802 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.46 vs. limit=15.0 +2024-09-18 17:28:20,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=488940.0, ans=0.025 +2024-09-18 17:28:21,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=488940.0, ans=0.2 +2024-09-18 17:28:29,599 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:28:35,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.11 vs. limit=15.0 +2024-09-18 17:28:52,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.35 vs. limit=22.5 +2024-09-18 17:28:58,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=489020.0, ans=0.0 +2024-09-18 17:29:07,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=489060.0, ans=0.125 +2024-09-18 17:29:17,675 INFO [train.py:1198] (0/2) Epoch 28, batch 100, loss[loss=0.2333, ctc_loss=0.1266, cr_loss=0.3769, attn_decoder_loss=0.2368, over 29552.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.128, cr_loss=0.3737, attn_decoder_loss=0.2482, over 2253421.04 frames. ], batch size: 76, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:29:23,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=489100.0, ans=0.0 +2024-09-18 17:29:41,539 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.772e+01 8.514e+01 8.987e+01 9.639e+01 1.687e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 17:30:05,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.97 vs. limit=15.0 +2024-09-18 17:30:08,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=489220.0, ans=0.0 +2024-09-18 17:30:17,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=489260.0, ans=0.025 +2024-09-18 17:30:35,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=489300.0, ans=0.125 +2024-09-18 17:30:36,915 INFO [train.py:1198] (0/2) Epoch 28, batch 150, loss[loss=0.2077, ctc_loss=0.1001, cr_loss=0.3322, attn_decoder_loss=0.2123, over 29446.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1253, cr_loss=0.3688, attn_decoder_loss=0.2455, over 3048532.00 frames. ], batch size: 70, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:30:45,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.28 vs. limit=15.0 +2024-09-18 17:30:47,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=489300.0, ans=0.125 +2024-09-18 17:31:01,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=489340.0, ans=0.125 +2024-09-18 17:31:07,423 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:31:33,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.66 vs. limit=15.0 +2024-09-18 17:31:35,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.53 vs. limit=15.0 +2024-09-18 17:31:44,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=489460.0, ans=0.0 +2024-09-18 17:31:52,051 INFO [train.py:1198] (0/2) Epoch 28, batch 200, loss[loss=0.2543, ctc_loss=0.1332, cr_loss=0.3744, attn_decoder_loss=0.2595, over 27159.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.124, cr_loss=0.3663, attn_decoder_loss=0.2442, over 3659567.05 frames. ], batch size: 124, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:31:54,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=489500.0, ans=0.2 +2024-09-18 17:32:00,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=489500.0, ans=0.0 +2024-09-18 17:32:06,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=489540.0, ans=0.09899494936611666 +2024-09-18 17:32:08,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.55 vs. limit=15.0 +2024-09-18 17:32:09,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=489540.0, ans=0.1 +2024-09-18 17:32:09,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.79 vs. limit=15.0 +2024-09-18 17:32:16,586 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.292e+01 9.011e+01 9.460e+01 1.346e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-18 17:32:33,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=489580.0, ans=0.125 +2024-09-18 17:32:37,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.50 vs. limit=22.5 +2024-09-18 17:32:43,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=489620.0, ans=0.125 +2024-09-18 17:33:08,529 INFO [train.py:1198] (0/2) Epoch 28, batch 250, loss[loss=0.2506, ctc_loss=0.1364, cr_loss=0.3843, attn_decoder_loss=0.2548, over 29260.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1239, cr_loss=0.3664, attn_decoder_loss=0.2441, over 4141398.11 frames. ], batch size: 100, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:33:09,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=15.0 +2024-09-18 17:33:16,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=489700.0, ans=0.0 +2024-09-18 17:33:21,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=489700.0, ans=0.0 +2024-09-18 17:33:27,733 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.09 vs. limit=15.0 +2024-09-18 17:33:30,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=489740.0, ans=0.125 +2024-09-18 17:33:31,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=489740.0, ans=0.0 +2024-09-18 17:33:47,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=489780.0, ans=0.0 +2024-09-18 17:33:48,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=489780.0, ans=0.0 +2024-09-18 17:34:00,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=489820.0, ans=0.125 +2024-09-18 17:34:20,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=489860.0, ans=0.125 +2024-09-18 17:34:25,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=489900.0, ans=10.0 +2024-09-18 17:34:26,611 INFO [train.py:1198] (0/2) Epoch 28, batch 300, loss[loss=0.263, ctc_loss=0.1378, cr_loss=0.4058, attn_decoder_loss=0.2679, over 29533.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1234, cr_loss=0.3652, attn_decoder_loss=0.2438, over 4510818.57 frames. ], batch size: 92, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:34:41,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=489900.0, ans=0.125 +2024-09-18 17:34:50,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=489940.0, ans=0.0 +2024-09-18 17:34:50,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=489940.0, ans=0.125 +2024-09-18 17:34:53,044 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.323e+01 8.453e+01 8.832e+01 9.524e+01 1.905e+02, threshold=1.766e+02, percent-clipped=1.0 +2024-09-18 17:34:58,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.68 vs. limit=15.0 +2024-09-18 17:35:09,185 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.31 vs. limit=6.0 +2024-09-18 17:35:12,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.89 vs. limit=15.0 +2024-09-18 17:35:37,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=490060.0, ans=0.0 +2024-09-18 17:35:44,882 INFO [train.py:1198] (0/2) Epoch 28, batch 350, loss[loss=0.219, ctc_loss=0.1044, cr_loss=0.3239, attn_decoder_loss=0.2246, over 29718.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1237, cr_loss=0.366, attn_decoder_loss=0.2445, over 4796395.22 frames. ], batch size: 72, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:36:06,826 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=12.79 vs. limit=15.0 +2024-09-18 17:36:29,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=490220.0, ans=0.2 +2024-09-18 17:36:32,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.15 vs. limit=15.0 +2024-09-18 17:36:50,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=490260.0, ans=0.0 +2024-09-18 17:36:56,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=490260.0, ans=0.0 +2024-09-18 17:37:00,262 INFO [train.py:1198] (0/2) Epoch 28, batch 400, loss[loss=0.2418, ctc_loss=0.1233, cr_loss=0.3626, attn_decoder_loss=0.2469, over 29725.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1234, cr_loss=0.366, attn_decoder_loss=0.2444, over 5025156.70 frames. ], batch size: 82, lr: 3.99e-03, grad_scale: 16.0 +2024-09-18 17:37:22,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=490340.0, ans=0.0 +2024-09-18 17:37:26,414 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.632e+01 9.035e+01 9.717e+01 2.941e+02, threshold=1.807e+02, percent-clipped=3.0 +2024-09-18 17:37:43,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=490380.0, ans=0.125 +2024-09-18 17:37:52,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.75 vs. limit=15.0 +2024-09-18 17:38:19,616 INFO [train.py:1198] (0/2) Epoch 28, batch 450, loss[loss=0.2424, ctc_loss=0.1284, cr_loss=0.3797, attn_decoder_loss=0.2467, over 29689.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1241, cr_loss=0.3674, attn_decoder_loss=0.2446, over 5185589.89 frames. ], batch size: 83, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:38:43,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=490540.0, ans=0.0 +2024-09-18 17:38:54,590 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:39:00,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=490580.0, ans=0.0 +2024-09-18 17:39:29,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=490660.0, ans=0.125 +2024-09-18 17:39:32,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=490660.0, ans=0.0 +2024-09-18 17:39:38,418 INFO [train.py:1198] (0/2) Epoch 28, batch 500, loss[loss=0.2613, ctc_loss=0.135, cr_loss=0.3838, attn_decoder_loss=0.2668, over 29420.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1232, cr_loss=0.3657, attn_decoder_loss=0.2437, over 5328287.09 frames. ], batch size: 94, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:39:54,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=490740.0, ans=0.125 +2024-09-18 17:39:59,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.08 vs. limit=22.5 +2024-09-18 17:40:04,209 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.021e+01 8.478e+01 8.864e+01 9.440e+01 1.535e+02, threshold=1.773e+02, percent-clipped=0.0 +2024-09-18 17:40:16,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=490780.0, ans=0.125 +2024-09-18 17:40:41,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=490860.0, ans=0.125 +2024-09-18 17:40:42,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=490860.0, ans=0.125 +2024-09-18 17:40:54,418 INFO [train.py:1198] (0/2) Epoch 28, batch 550, loss[loss=0.2478, ctc_loss=0.1275, cr_loss=0.3793, attn_decoder_loss=0.2528, over 28909.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1237, cr_loss=0.3662, attn_decoder_loss=0.244, over 5422723.98 frames. ], batch size: 104, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:40:56,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=490900.0, ans=0.0 +2024-09-18 17:41:01,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.75 vs. limit=15.0 +2024-09-18 17:41:08,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.17 vs. limit=15.0 +2024-09-18 17:41:22,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.00 vs. limit=15.0 +2024-09-18 17:41:56,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=491060.0, ans=0.125 +2024-09-18 17:41:56,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=491060.0, ans=0.0 +2024-09-18 17:42:06,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=491060.0, ans=0.025 +2024-09-18 17:42:12,517 INFO [train.py:1198] (0/2) Epoch 28, batch 600, loss[loss=0.2572, ctc_loss=0.1376, cr_loss=0.3932, attn_decoder_loss=0.2617, over 29324.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.124, cr_loss=0.3669, attn_decoder_loss=0.2442, over 5508291.78 frames. ], batch size: 100, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:42:21,807 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:42:29,274 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:42:40,165 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.281e+01 8.877e+01 9.486e+01 1.809e+02, threshold=1.775e+02, percent-clipped=1.0 +2024-09-18 17:42:53,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.61 vs. limit=22.5 +2024-09-18 17:43:05,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=491220.0, ans=0.04949747468305833 +2024-09-18 17:43:15,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=491260.0, ans=0.95 +2024-09-18 17:43:22,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=491260.0, ans=0.125 +2024-09-18 17:43:29,932 INFO [train.py:1198] (0/2) Epoch 28, batch 650, loss[loss=0.2402, ctc_loss=0.1324, cr_loss=0.3889, attn_decoder_loss=0.2436, over 29778.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1231, cr_loss=0.3659, attn_decoder_loss=0.2435, over 5585636.97 frames. ], batch size: 81, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:43:45,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=491340.0, ans=0.2 +2024-09-18 17:43:45,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=491340.0, ans=0.04949747468305833 +2024-09-18 17:44:14,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=491420.0, ans=0.0 +2024-09-18 17:44:19,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=491420.0, ans=0.125 +2024-09-18 17:44:21,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=491420.0, ans=0.0 +2024-09-18 17:44:34,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=491460.0, ans=0.125 +2024-09-18 17:44:46,049 INFO [train.py:1198] (0/2) Epoch 28, batch 700, loss[loss=0.2385, ctc_loss=0.1223, cr_loss=0.3821, attn_decoder_loss=0.243, over 29538.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1234, cr_loss=0.3664, attn_decoder_loss=0.244, over 5636398.80 frames. ], batch size: 76, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:44:46,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=491500.0, ans=0.125 +2024-09-18 17:44:58,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=491500.0, ans=0.0 +2024-09-18 17:44:58,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=491500.0, ans=0.0 +2024-09-18 17:45:11,727 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.262e+01 8.777e+01 9.267e+01 2.724e+02, threshold=1.755e+02, percent-clipped=1.0 +2024-09-18 17:45:29,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=491580.0, ans=0.125 +2024-09-18 17:45:40,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.52 vs. limit=15.0 +2024-09-18 17:45:53,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=491660.0, ans=0.125 +2024-09-18 17:46:01,809 INFO [train.py:1198] (0/2) Epoch 28, batch 750, loss[loss=0.2545, ctc_loss=0.1407, cr_loss=0.4031, attn_decoder_loss=0.2582, over 29691.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1232, cr_loss=0.3659, attn_decoder_loss=0.2437, over 5676151.64 frames. ], batch size: 82, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:46:13,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=491700.0, ans=0.0 +2024-09-18 17:46:20,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=491740.0, ans=0.0 +2024-09-18 17:46:51,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.22 vs. limit=15.0 +2024-09-18 17:47:17,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=491860.0, ans=0.0 +2024-09-18 17:47:21,495 INFO [train.py:1198] (0/2) Epoch 28, batch 800, loss[loss=0.215, ctc_loss=0.1035, cr_loss=0.3151, attn_decoder_loss=0.2204, over 29575.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1231, cr_loss=0.3655, attn_decoder_loss=0.2435, over 5707356.34 frames. ], batch size: 73, lr: 3.98e-03, grad_scale: 16.0 +2024-09-18 17:47:32,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=491900.0, ans=0.0 +2024-09-18 17:47:47,343 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.491e+01 9.037e+01 9.523e+01 1.873e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 17:47:49,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=491940.0, ans=0.125 +2024-09-18 17:48:27,592 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.81 vs. limit=15.0 +2024-09-18 17:48:32,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=492060.0, ans=0.5 +2024-09-18 17:48:37,062 INFO [train.py:1198] (0/2) Epoch 28, batch 850, loss[loss=0.2565, ctc_loss=0.1305, cr_loss=0.3827, attn_decoder_loss=0.262, over 29703.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1232, cr_loss=0.3658, attn_decoder_loss=0.2436, over 5736830.54 frames. ], batch size: 89, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:48:37,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=492100.0, ans=0.1 +2024-09-18 17:49:03,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.89 vs. limit=12.0 +2024-09-18 17:49:14,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.19 vs. limit=15.0 +2024-09-18 17:49:16,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=492180.0, ans=0.05 +2024-09-18 17:49:19,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=492180.0, ans=0.125 +2024-09-18 17:49:37,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=492260.0, ans=0.125 +2024-09-18 17:49:52,779 INFO [train.py:1198] (0/2) Epoch 28, batch 900, loss[loss=0.2133, ctc_loss=0.1, cr_loss=0.319, attn_decoder_loss=0.2188, over 29628.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1235, cr_loss=0.3663, attn_decoder_loss=0.244, over 5741806.84 frames. ], batch size: 73, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:50:17,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=492340.0, ans=0.0 +2024-09-18 17:50:19,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=492340.0, ans=0.1 +2024-09-18 17:50:21,989 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.505e+01 9.006e+01 9.829e+01 2.830e+02, threshold=1.801e+02, percent-clipped=3.0 +2024-09-18 17:50:22,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=492340.0, ans=0.125 +2024-09-18 17:50:26,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=492380.0, ans=0.125 +2024-09-18 17:50:37,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.95 vs. limit=12.0 +2024-09-18 17:50:44,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=492420.0, ans=0.0 +2024-09-18 17:50:44,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=492420.0, ans=0.2 +2024-09-18 17:50:59,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=492460.0, ans=0.125 +2024-09-18 17:51:12,933 INFO [train.py:1198] (0/2) Epoch 28, batch 950, loss[loss=0.222, ctc_loss=0.1138, cr_loss=0.3498, attn_decoder_loss=0.2263, over 29538.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1239, cr_loss=0.3672, attn_decoder_loss=0.2444, over 5742777.37 frames. ], batch size: 74, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:51:13,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=492500.0, ans=0.0 +2024-09-18 17:51:17,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=492500.0, ans=0.125 +2024-09-18 17:51:19,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=492500.0, ans=0.125 +2024-09-18 17:51:23,921 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:51:42,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=492580.0, ans=0.125 +2024-09-18 17:51:45,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=492580.0, ans=0.125 +2024-09-18 17:52:13,766 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:52:15,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=492660.0, ans=0.125 +2024-09-18 17:52:28,271 INFO [train.py:1198] (0/2) Epoch 28, batch 1000, loss[loss=0.2315, ctc_loss=0.1205, cr_loss=0.3629, attn_decoder_loss=0.2358, over 29492.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1244, cr_loss=0.3678, attn_decoder_loss=0.245, over 5737258.43 frames. ], batch size: 77, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:52:41,215 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.90 vs. limit=15.0 +2024-09-18 17:52:55,756 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.563e+01 9.173e+01 1.012e+02 1.591e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-18 17:53:07,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.51 vs. limit=15.0 +2024-09-18 17:53:08,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=492780.0, ans=0.1 +2024-09-18 17:53:11,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=492780.0, ans=0.0 +2024-09-18 17:53:27,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=492860.0, ans=0.025 +2024-09-18 17:53:37,057 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:53:46,488 INFO [train.py:1198] (0/2) Epoch 28, batch 1050, loss[loss=0.2466, ctc_loss=0.1224, cr_loss=0.3601, attn_decoder_loss=0.2524, over 29676.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1237, cr_loss=0.3663, attn_decoder_loss=0.2442, over 5745139.33 frames. ], batch size: 85, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:54:22,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-18 17:54:28,184 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:54:33,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=493020.0, ans=0.04949747468305833 +2024-09-18 17:54:48,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.96 vs. limit=22.5 +2024-09-18 17:54:54,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=493060.0, ans=0.025 +2024-09-18 17:54:57,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=493060.0, ans=10.0 +2024-09-18 17:55:04,308 INFO [train.py:1198] (0/2) Epoch 28, batch 1100, loss[loss=0.2335, ctc_loss=0.1238, cr_loss=0.3552, attn_decoder_loss=0.2378, over 29446.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1237, cr_loss=0.3661, attn_decoder_loss=0.2439, over 5758345.28 frames. ], batch size: 78, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:55:12,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=493100.0, ans=0.125 +2024-09-18 17:55:29,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.31 vs. limit=15.0 +2024-09-18 17:55:31,739 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.956e+01 8.310e+01 8.930e+01 9.558e+01 2.939e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 17:55:32,155 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:55:54,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=493220.0, ans=0.125 +2024-09-18 17:56:02,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=493220.0, ans=0.0 +2024-09-18 17:56:14,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=493260.0, ans=0.0 +2024-09-18 17:56:19,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=493300.0, ans=0.125 +2024-09-18 17:56:20,581 INFO [train.py:1198] (0/2) Epoch 28, batch 1150, loss[loss=0.229, ctc_loss=0.1113, cr_loss=0.3349, attn_decoder_loss=0.2346, over 29464.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1236, cr_loss=0.3659, attn_decoder_loss=0.2439, over 5755219.47 frames. ], batch size: 78, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:56:33,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=493300.0, ans=0.125 +2024-09-18 17:56:42,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=493340.0, ans=0.1 +2024-09-18 17:56:42,217 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:57:38,536 INFO [train.py:1198] (0/2) Epoch 28, batch 1200, loss[loss=0.2504, ctc_loss=0.136, cr_loss=0.3568, attn_decoder_loss=0.2552, over 29671.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1244, cr_loss=0.3675, attn_decoder_loss=0.2446, over 5747577.92 frames. ], batch size: 85, lr: 3.97e-03, grad_scale: 16.0 +2024-09-18 17:58:06,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=493540.0, ans=0.1 +2024-09-18 17:58:07,207 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.554e+01 9.030e+01 9.625e+01 2.213e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-18 17:58:12,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=493580.0, ans=0.2 +2024-09-18 17:58:13,015 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.90 vs. limit=10.0 +2024-09-18 17:58:25,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=493620.0, ans=0.1 +2024-09-18 17:58:39,291 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:58:45,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=493660.0, ans=0.125 +2024-09-18 17:58:56,918 INFO [train.py:1198] (0/2) Epoch 28, batch 1250, loss[loss=0.2539, ctc_loss=0.1277, cr_loss=0.3895, attn_decoder_loss=0.2593, over 29546.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1247, cr_loss=0.3685, attn_decoder_loss=0.2448, over 5775109.85 frames. ], batch size: 92, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 17:58:58,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=493700.0, ans=0.07 +2024-09-18 17:58:58,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=493700.0, ans=0.1 +2024-09-18 17:59:12,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=493740.0, ans=0.125 +2024-09-18 17:59:42,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=493820.0, ans=0.0 +2024-09-18 17:59:42,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=493820.0, ans=0.125 +2024-09-18 17:59:52,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=493820.0, ans=0.125 +2024-09-18 17:59:59,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=493860.0, ans=0.125 +2024-09-18 18:00:13,105 INFO [train.py:1198] (0/2) Epoch 28, batch 1300, loss[loss=0.2512, ctc_loss=0.1244, cr_loss=0.3546, attn_decoder_loss=0.2574, over 28422.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1245, cr_loss=0.3675, attn_decoder_loss=0.2444, over 5780605.07 frames. ], batch size: 111, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:00:19,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=493900.0, ans=0.0 +2024-09-18 18:00:25,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=493900.0, ans=0.125 +2024-09-18 18:00:36,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=493940.0, ans=0.025 +2024-09-18 18:00:40,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=493940.0, ans=0.125 +2024-09-18 18:00:41,990 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.448e+01 8.590e+01 9.154e+01 9.575e+01 1.829e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-18 18:00:48,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=493980.0, ans=0.1 +2024-09-18 18:01:00,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=494020.0, ans=0.0 +2024-09-18 18:01:02,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=494020.0, ans=0.025 +2024-09-18 18:01:07,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=494020.0, ans=0.0 +2024-09-18 18:01:18,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=494060.0, ans=0.1 +2024-09-18 18:01:29,044 INFO [train.py:1198] (0/2) Epoch 28, batch 1350, loss[loss=0.2379, ctc_loss=0.1179, cr_loss=0.3579, attn_decoder_loss=0.2433, over 29764.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1238, cr_loss=0.3659, attn_decoder_loss=0.2439, over 5799246.92 frames. ], batch size: 81, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:01:39,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=494100.0, ans=0.125 +2024-09-18 18:01:48,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=494140.0, ans=0.1 +2024-09-18 18:01:53,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=494140.0, ans=0.125 +2024-09-18 18:01:58,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=494140.0, ans=0.0 +2024-09-18 18:02:22,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=494220.0, ans=0.125 +2024-09-18 18:02:26,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=494220.0, ans=0.125 +2024-09-18 18:02:32,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=494260.0, ans=0.035 +2024-09-18 18:02:48,567 INFO [train.py:1198] (0/2) Epoch 28, batch 1400, loss[loss=0.2064, ctc_loss=0.1028, cr_loss=0.3101, attn_decoder_loss=0.211, over 29606.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1237, cr_loss=0.3659, attn_decoder_loss=0.2437, over 5809316.60 frames. ], batch size: 69, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:03:07,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.92 vs. limit=15.0 +2024-09-18 18:03:17,503 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.696e+01 8.548e+01 9.065e+01 9.786e+01 1.272e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-18 18:03:24,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=15.0 +2024-09-18 18:03:42,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=494420.0, ans=0.125 +2024-09-18 18:03:47,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=494420.0, ans=0.0 +2024-09-18 18:04:04,979 INFO [train.py:1198] (0/2) Epoch 28, batch 1450, loss[loss=0.2595, ctc_loss=0.1345, cr_loss=0.4021, attn_decoder_loss=0.2645, over 29469.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.124, cr_loss=0.3663, attn_decoder_loss=0.2444, over 5804923.36 frames. ], batch size: 94, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:04:22,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=494540.0, ans=0.0 +2024-09-18 18:04:44,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=494580.0, ans=0.1 +2024-09-18 18:04:49,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=494620.0, ans=0.1 +2024-09-18 18:05:13,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=494660.0, ans=0.1 +2024-09-18 18:05:15,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=494660.0, ans=0.025 +2024-09-18 18:05:20,930 INFO [train.py:1198] (0/2) Epoch 28, batch 1500, loss[loss=0.2539, ctc_loss=0.1239, cr_loss=0.3694, attn_decoder_loss=0.2601, over 29636.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1243, cr_loss=0.3668, attn_decoder_loss=0.2448, over 5806175.37 frames. ], batch size: 86, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:05:31,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.61 vs. limit=15.0 +2024-09-18 18:05:38,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=494740.0, ans=0.125 +2024-09-18 18:05:49,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=494740.0, ans=0.1 +2024-09-18 18:05:52,395 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.464e+01 8.636e+01 9.142e+01 9.701e+01 7.436e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-18 18:06:12,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.whiten.whitening_limit, batch_count=494820.0, ans=12.0 +2024-09-18 18:06:13,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=494820.0, ans=0.0 +2024-09-18 18:06:41,533 INFO [train.py:1198] (0/2) Epoch 28, batch 1550, loss[loss=0.2532, ctc_loss=0.1378, cr_loss=0.3838, attn_decoder_loss=0.2575, over 29482.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1246, cr_loss=0.3667, attn_decoder_loss=0.2449, over 5780978.22 frames. ], batch size: 90, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:07:04,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=494940.0, ans=0.125 +2024-09-18 18:07:04,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=494940.0, ans=0.07 +2024-09-18 18:07:06,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.06 vs. limit=12.0 +2024-09-18 18:07:19,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=494980.0, ans=0.0 +2024-09-18 18:07:33,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.58 vs. limit=10.0 +2024-09-18 18:07:38,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.50 vs. limit=6.0 +2024-09-18 18:07:45,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=495060.0, ans=0.125 +2024-09-18 18:07:53,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.07 vs. limit=15.0 +2024-09-18 18:07:57,363 INFO [train.py:1198] (0/2) Epoch 28, batch 1600, loss[loss=0.2478, ctc_loss=0.1233, cr_loss=0.3663, attn_decoder_loss=0.2535, over 29655.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1246, cr_loss=0.3665, attn_decoder_loss=0.2447, over 5763807.06 frames. ], batch size: 85, lr: 3.97e-03, grad_scale: 16.0 +2024-09-18 18:08:14,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=495140.0, ans=0.1 +2024-09-18 18:08:18,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=495140.0, ans=0.0 +2024-09-18 18:08:27,526 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.529e+01 9.034e+01 9.836e+01 1.943e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 18:08:45,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=495220.0, ans=0.125 +2024-09-18 18:09:05,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=495260.0, ans=0.0 +2024-09-18 18:09:15,398 INFO [train.py:1198] (0/2) Epoch 28, batch 1650, loss[loss=0.258, ctc_loss=0.1394, cr_loss=0.4175, attn_decoder_loss=0.2619, over 29707.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1245, cr_loss=0.3666, attn_decoder_loss=0.2446, over 5758153.19 frames. ], batch size: 89, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:09:18,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=495300.0, ans=0.125 +2024-09-18 18:09:24,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=495300.0, ans=0.125 +2024-09-18 18:09:24,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=495300.0, ans=0.0 +2024-09-18 18:09:27,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=495300.0, ans=0.0 +2024-09-18 18:09:29,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=495340.0, ans=0.0 +2024-09-18 18:09:31,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=495340.0, ans=0.125 +2024-09-18 18:09:31,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=495340.0, ans=0.125 +2024-09-18 18:09:31,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=495340.0, ans=0.025 +2024-09-18 18:09:40,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=495340.0, ans=0.125 +2024-09-18 18:09:47,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=495380.0, ans=0.125 +2024-09-18 18:09:58,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=495380.0, ans=0.0 +2024-09-18 18:10:10,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=495420.0, ans=0.0 +2024-09-18 18:10:18,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=495460.0, ans=0.0 +2024-09-18 18:10:23,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=495460.0, ans=0.125 +2024-09-18 18:10:23,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.85 vs. limit=15.0 +2024-09-18 18:10:33,326 INFO [train.py:1198] (0/2) Epoch 28, batch 1700, loss[loss=0.2217, ctc_loss=0.1071, cr_loss=0.3307, attn_decoder_loss=0.2271, over 29582.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1246, cr_loss=0.3675, attn_decoder_loss=0.2445, over 5779217.84 frames. ], batch size: 69, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:10:36,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=495500.0, ans=0.125 +2024-09-18 18:10:45,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=495500.0, ans=0.0 +2024-09-18 18:10:57,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=495540.0, ans=0.0 +2024-09-18 18:11:03,302 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.621e+01 8.597e+01 9.283e+01 9.916e+01 1.626e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 18:11:08,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=495580.0, ans=0.0 +2024-09-18 18:11:08,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.02 vs. limit=15.0 +2024-09-18 18:11:09,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=495580.0, ans=0.125 +2024-09-18 18:11:26,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=495620.0, ans=0.1 +2024-09-18 18:11:49,156 INFO [train.py:1198] (0/2) Epoch 28, batch 1750, loss[loss=0.2141, ctc_loss=0.1066, cr_loss=0.3323, attn_decoder_loss=0.2186, over 29357.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1238, cr_loss=0.3666, attn_decoder_loss=0.2439, over 5787504.55 frames. ], batch size: 67, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:11:53,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=495700.0, ans=0.125 +2024-09-18 18:11:57,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=495700.0, ans=0.125 +2024-09-18 18:12:17,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=495740.0, ans=0.1 +2024-09-18 18:12:32,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.48 vs. limit=22.5 +2024-09-18 18:12:50,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=495860.0, ans=0.1 +2024-09-18 18:13:05,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=495900.0, ans=0.0 +2024-09-18 18:13:07,144 INFO [train.py:1198] (0/2) Epoch 28, batch 1800, loss[loss=0.2711, ctc_loss=0.1563, cr_loss=0.457, attn_decoder_loss=0.2737, over 29681.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1243, cr_loss=0.3675, attn_decoder_loss=0.2443, over 5790967.49 frames. ], batch size: 83, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:13:07,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=495900.0, ans=0.0 +2024-09-18 18:13:15,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=495900.0, ans=0.0 +2024-09-18 18:13:25,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=495940.0, ans=0.125 +2024-09-18 18:13:37,653 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.001e+01 8.359e+01 8.858e+01 9.396e+01 1.273e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-18 18:13:44,064 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-124000.pt +2024-09-18 18:14:02,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.14 vs. limit=22.5 +2024-09-18 18:14:12,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=496020.0, ans=0.0 +2024-09-18 18:14:20,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=496060.0, ans=0.0 +2024-09-18 18:14:32,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-18 18:14:32,849 INFO [train.py:1198] (0/2) Epoch 28, batch 1850, loss[loss=0.2554, ctc_loss=0.1345, cr_loss=0.3809, attn_decoder_loss=0.2604, over 29653.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1239, cr_loss=0.3667, attn_decoder_loss=0.2442, over 5797187.42 frames. ], batch size: 86, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:14:39,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=15.0 +2024-09-18 18:14:49,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=496140.0, ans=0.09899494936611666 +2024-09-18 18:14:53,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=496140.0, ans=0.0 +2024-09-18 18:14:54,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=496140.0, ans=0.1 +2024-09-18 18:15:12,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=496180.0, ans=0.2 +2024-09-18 18:15:28,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.90 vs. limit=15.0 +2024-09-18 18:15:48,303 INFO [train.py:1198] (0/2) Epoch 28, batch 1900, loss[loss=0.2471, ctc_loss=0.1235, cr_loss=0.3644, attn_decoder_loss=0.2528, over 29708.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1243, cr_loss=0.3676, attn_decoder_loss=0.2447, over 5804662.39 frames. ], batch size: 89, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:15:50,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.74 vs. limit=15.0 +2024-09-18 18:16:01,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.32 vs. limit=15.0 +2024-09-18 18:16:06,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=496340.0, ans=0.125 +2024-09-18 18:16:14,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=496340.0, ans=0.125 +2024-09-18 18:16:18,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.544e+01 9.072e+01 9.391e+01 1.587e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 18:16:28,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.05 vs. limit=10.0 +2024-09-18 18:16:46,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=496420.0, ans=0.125 +2024-09-18 18:16:46,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=496420.0, ans=0.1 +2024-09-18 18:16:54,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.38 vs. limit=6.0 +2024-09-18 18:17:06,241 INFO [train.py:1198] (0/2) Epoch 28, batch 1950, loss[loss=0.2384, ctc_loss=0.1292, cr_loss=0.3873, attn_decoder_loss=0.2419, over 29449.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.125, cr_loss=0.3692, attn_decoder_loss=0.2458, over 5819832.79 frames. ], batch size: 78, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:17:25,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.46 vs. limit=6.0 +2024-09-18 18:17:28,446 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.15 vs. limit=15.0 +2024-09-18 18:17:45,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=496580.0, ans=22.5 +2024-09-18 18:17:45,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=496580.0, ans=0.2 +2024-09-18 18:18:00,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=496620.0, ans=0.125 +2024-09-18 18:18:13,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.15 vs. limit=15.0 +2024-09-18 18:18:24,157 INFO [train.py:1198] (0/2) Epoch 28, batch 2000, loss[loss=0.2205, ctc_loss=0.1166, cr_loss=0.3461, attn_decoder_loss=0.2244, over 29324.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1255, cr_loss=0.3695, attn_decoder_loss=0.2462, over 5797610.63 frames. ], batch size: 67, lr: 3.96e-03, grad_scale: 16.0 +2024-09-18 18:18:55,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.651e+01 8.591e+01 9.006e+01 9.471e+01 1.475e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 18:19:31,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=496860.0, ans=0.125 +2024-09-18 18:19:40,000 INFO [train.py:1198] (0/2) Epoch 28, batch 2050, loss[loss=0.2185, ctc_loss=0.1104, cr_loss=0.3364, attn_decoder_loss=0.223, over 29455.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1251, cr_loss=0.3688, attn_decoder_loss=0.2455, over 5788274.19 frames. ], batch size: 70, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:19:40,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=496900.0, ans=0.5 +2024-09-18 18:19:59,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=496940.0, ans=0.125 +2024-09-18 18:20:36,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=497020.0, ans=0.0 +2024-09-18 18:20:45,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=497060.0, ans=0.2 +2024-09-18 18:20:58,299 INFO [train.py:1198] (0/2) Epoch 28, batch 2100, loss[loss=0.2402, ctc_loss=0.1186, cr_loss=0.3512, attn_decoder_loss=0.2459, over 29760.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1244, cr_loss=0.3677, attn_decoder_loss=0.2449, over 5800164.97 frames. ], batch size: 81, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:21:29,768 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.088e+01 8.428e+01 8.818e+01 9.232e+01 1.075e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-18 18:21:37,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=497180.0, ans=0.125 +2024-09-18 18:21:43,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=497220.0, ans=0.2 +2024-09-18 18:21:55,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=497220.0, ans=0.1 +2024-09-18 18:22:00,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=497260.0, ans=0.125 +2024-09-18 18:22:04,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=497260.0, ans=0.125 +2024-09-18 18:22:13,527 INFO [train.py:1198] (0/2) Epoch 28, batch 2150, loss[loss=0.2358, ctc_loss=0.13, cr_loss=0.387, attn_decoder_loss=0.2389, over 29458.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1235, cr_loss=0.3663, attn_decoder_loss=0.2441, over 5813959.48 frames. ], batch size: 78, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:22:15,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=497300.0, ans=0.125 +2024-09-18 18:22:17,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.27 vs. limit=15.0 +2024-09-18 18:22:22,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=497300.0, ans=0.2 +2024-09-18 18:23:01,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=497420.0, ans=0.125 +2024-09-18 18:23:01,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=497420.0, ans=0.125 +2024-09-18 18:23:06,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.82 vs. limit=15.0 +2024-09-18 18:23:06,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.49 vs. limit=15.0 +2024-09-18 18:23:31,677 INFO [train.py:1198] (0/2) Epoch 28, batch 2200, loss[loss=0.2538, ctc_loss=0.1393, cr_loss=0.3941, attn_decoder_loss=0.2577, over 29624.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1238, cr_loss=0.3671, attn_decoder_loss=0.2443, over 5809929.24 frames. ], batch size: 86, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:23:48,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.35 vs. limit=12.0 +2024-09-18 18:23:54,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=497540.0, ans=0.125 +2024-09-18 18:23:59,475 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-18 18:24:03,366 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.572e+01 8.974e+01 9.491e+01 1.804e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 18:24:03,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=497580.0, ans=0.0 +2024-09-18 18:24:19,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=497620.0, ans=0.07 +2024-09-18 18:24:19,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=497620.0, ans=0.07 +2024-09-18 18:24:30,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=497620.0, ans=0.0 +2024-09-18 18:24:42,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.47 vs. limit=15.0 +2024-09-18 18:24:46,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=497700.0, ans=0.125 +2024-09-18 18:24:47,950 INFO [train.py:1198] (0/2) Epoch 28, batch 2250, loss[loss=0.2345, ctc_loss=0.1229, cr_loss=0.3496, attn_decoder_loss=0.2391, over 29722.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1238, cr_loss=0.3666, attn_decoder_loss=0.244, over 5809044.57 frames. ], batch size: 82, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:24:56,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=497700.0, ans=0.0 +2024-09-18 18:25:04,196 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:25:05,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=497740.0, ans=0.0 +2024-09-18 18:25:17,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=497740.0, ans=0.2 +2024-09-18 18:25:23,983 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:25:28,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=497780.0, ans=0.025 +2024-09-18 18:25:43,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.05 vs. limit=22.5 +2024-09-18 18:26:05,886 INFO [train.py:1198] (0/2) Epoch 28, batch 2300, loss[loss=0.21, ctc_loss=0.1003, cr_loss=0.3077, attn_decoder_loss=0.2154, over 29318.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1233, cr_loss=0.3655, attn_decoder_loss=0.2432, over 5797540.21 frames. ], batch size: 71, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:26:06,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=497900.0, ans=0.025 +2024-09-18 18:26:32,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=497940.0, ans=0.0 +2024-09-18 18:26:39,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.522e+01 8.383e+01 8.665e+01 9.441e+01 6.698e+02, threshold=1.733e+02, percent-clipped=3.0 +2024-09-18 18:26:45,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=497980.0, ans=0.1 +2024-09-18 18:26:49,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=497980.0, ans=0.125 +2024-09-18 18:26:49,580 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.85 vs. limit=15.0 +2024-09-18 18:26:53,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=498020.0, ans=0.2 +2024-09-18 18:27:01,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=498020.0, ans=0.0 +2024-09-18 18:27:19,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=498060.0, ans=0.125 +2024-09-18 18:27:23,864 INFO [train.py:1198] (0/2) Epoch 28, batch 2350, loss[loss=0.2511, ctc_loss=0.1359, cr_loss=0.3997, attn_decoder_loss=0.255, over 29705.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1234, cr_loss=0.3657, attn_decoder_loss=0.2433, over 5803527.76 frames. ], batch size: 83, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:27:32,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=498100.0, ans=0.125 +2024-09-18 18:27:54,973 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.58 vs. limit=22.5 +2024-09-18 18:28:17,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=498220.0, ans=0.125 +2024-09-18 18:28:18,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=498220.0, ans=0.07 +2024-09-18 18:28:38,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=498300.0, ans=0.125 +2024-09-18 18:28:39,728 INFO [train.py:1198] (0/2) Epoch 28, batch 2400, loss[loss=0.2367, ctc_loss=0.127, cr_loss=0.3836, attn_decoder_loss=0.2404, over 29544.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.124, cr_loss=0.3666, attn_decoder_loss=0.244, over 5807095.92 frames. ], batch size: 76, lr: 3.96e-03, grad_scale: 16.0 +2024-09-18 18:28:48,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_na.min_abs, batch_count=498300.0, ans=0.02 +2024-09-18 18:28:51,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.90 vs. limit=22.5 +2024-09-18 18:28:57,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=498340.0, ans=0.05 +2024-09-18 18:29:07,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=498340.0, ans=0.0 +2024-09-18 18:29:09,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=498340.0, ans=0.0 +2024-09-18 18:29:15,197 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.714e+01 9.180e+01 9.673e+01 2.821e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-18 18:29:30,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=12.0 +2024-09-18 18:29:58,171 INFO [train.py:1198] (0/2) Epoch 28, batch 2450, loss[loss=0.2512, ctc_loss=0.1321, cr_loss=0.3812, attn_decoder_loss=0.2559, over 29687.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1245, cr_loss=0.3675, attn_decoder_loss=0.2445, over 5784158.61 frames. ], batch size: 82, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:29:58,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=498500.0, ans=0.1 +2024-09-18 18:30:08,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=498500.0, ans=0.125 +2024-09-18 18:30:13,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=498540.0, ans=0.1 +2024-09-18 18:30:16,373 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.09 vs. limit=15.0 +2024-09-18 18:30:42,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=498580.0, ans=0.125 +2024-09-18 18:30:52,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=498620.0, ans=0.0 +2024-09-18 18:31:10,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=498660.0, ans=0.1 +2024-09-18 18:31:16,314 INFO [train.py:1198] (0/2) Epoch 28, batch 2500, loss[loss=0.2481, ctc_loss=0.1206, cr_loss=0.3645, attn_decoder_loss=0.2541, over 29652.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1244, cr_loss=0.367, attn_decoder_loss=0.2445, over 5794223.49 frames. ], batch size: 86, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:31:24,031 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:31:48,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.76 vs. limit=15.0 +2024-09-18 18:31:49,772 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.126e+01 8.525e+01 9.051e+01 9.521e+01 3.075e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-18 18:31:51,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=498780.0, ans=0.125 +2024-09-18 18:32:05,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=498820.0, ans=0.125 +2024-09-18 18:32:08,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=498820.0, ans=0.125 +2024-09-18 18:32:11,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=498820.0, ans=0.2 +2024-09-18 18:32:31,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=498900.0, ans=0.0 +2024-09-18 18:32:32,428 INFO [train.py:1198] (0/2) Epoch 28, batch 2550, loss[loss=0.2219, ctc_loss=0.112, cr_loss=0.3525, attn_decoder_loss=0.2263, over 29343.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1243, cr_loss=0.3668, attn_decoder_loss=0.2444, over 5797108.41 frames. ], batch size: 67, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:33:01,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.31 vs. limit=15.0 +2024-09-18 18:33:06,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=498980.0, ans=0.1 +2024-09-18 18:33:08,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-18 18:33:12,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=498980.0, ans=0.1 +2024-09-18 18:33:50,490 INFO [train.py:1198] (0/2) Epoch 28, batch 2600, loss[loss=0.2379, ctc_loss=0.1243, cr_loss=0.3798, attn_decoder_loss=0.2421, over 29439.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1244, cr_loss=0.3672, attn_decoder_loss=0.2447, over 5793344.62 frames. ], batch size: 78, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:33:53,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=499100.0, ans=0.125 +2024-09-18 18:34:04,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=499140.0, ans=0.0 +2024-09-18 18:34:13,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.94 vs. limit=10.0 +2024-09-18 18:34:14,796 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.69 vs. limit=12.0 +2024-09-18 18:34:25,550 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.719e+01 9.111e+01 9.618e+01 2.208e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 18:34:26,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.00 vs. limit=15.0 +2024-09-18 18:34:44,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=499220.0, ans=0.125 +2024-09-18 18:34:54,136 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.79 vs. limit=22.5 +2024-09-18 18:35:02,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=499260.0, ans=0.125 +2024-09-18 18:35:05,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=499260.0, ans=0.0 +2024-09-18 18:35:07,770 INFO [train.py:1198] (0/2) Epoch 28, batch 2650, loss[loss=0.2583, ctc_loss=0.1344, cr_loss=0.4043, attn_decoder_loss=0.2631, over 29314.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1242, cr_loss=0.3672, attn_decoder_loss=0.2449, over 5801197.34 frames. ], batch size: 100, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:35:14,547 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.61 vs. limit=22.5 +2024-09-18 18:35:18,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=499300.0, ans=0.025 +2024-09-18 18:35:23,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=499340.0, ans=0.125 +2024-09-18 18:35:36,231 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.32 vs. limit=8.0 +2024-09-18 18:35:41,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=499380.0, ans=0.125 +2024-09-18 18:35:43,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.87 vs. limit=15.0 +2024-09-18 18:36:05,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=499420.0, ans=0.1 +2024-09-18 18:36:16,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=499460.0, ans=0.0 +2024-09-18 18:36:22,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=499500.0, ans=0.125 +2024-09-18 18:36:25,507 INFO [train.py:1198] (0/2) Epoch 28, batch 2700, loss[loss=0.2445, ctc_loss=0.1132, cr_loss=0.3558, attn_decoder_loss=0.2511, over 29533.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1248, cr_loss=0.3682, attn_decoder_loss=0.2454, over 5796859.82 frames. ], batch size: 87, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:36:37,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.33 vs. limit=10.0 +2024-09-18 18:36:37,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=499500.0, ans=0.125 +2024-09-18 18:36:47,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.85 vs. limit=15.0 +2024-09-18 18:36:56,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=499580.0, ans=0.2 +2024-09-18 18:36:58,805 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.414e+01 8.942e+01 9.601e+01 1.842e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 18:37:08,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=499580.0, ans=0.125 +2024-09-18 18:37:32,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=499660.0, ans=0.1 +2024-09-18 18:37:33,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.51 vs. limit=10.0 +2024-09-18 18:37:37,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=499660.0, ans=0.5 +2024-09-18 18:37:38,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=499660.0, ans=0.125 +2024-09-18 18:37:38,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=499660.0, ans=0.0 +2024-09-18 18:37:41,533 INFO [train.py:1198] (0/2) Epoch 28, batch 2750, loss[loss=0.2338, ctc_loss=0.1178, cr_loss=0.3392, attn_decoder_loss=0.2392, over 29507.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1237, cr_loss=0.3661, attn_decoder_loss=0.2443, over 5796605.73 frames. ], batch size: 75, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:37:55,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=499740.0, ans=0.0 +2024-09-18 18:38:11,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=499740.0, ans=0.2 +2024-09-18 18:38:19,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-18 18:38:27,806 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:38:29,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=499820.0, ans=0.1 +2024-09-18 18:38:40,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=499820.0, ans=0.125 +2024-09-18 18:38:42,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.whiten.whitening_limit, batch_count=499820.0, ans=15.0 +2024-09-18 18:38:57,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=499860.0, ans=0.0 +2024-09-18 18:38:59,707 INFO [train.py:1198] (0/2) Epoch 28, batch 2800, loss[loss=0.2579, ctc_loss=0.1493, cr_loss=0.3827, attn_decoder_loss=0.2615, over 20329.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.124, cr_loss=0.3667, attn_decoder_loss=0.2445, over 5777438.67 frames. ], batch size: 213, lr: 3.95e-03, grad_scale: 16.0 +2024-09-18 18:39:03,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.25 vs. limit=10.0 +2024-09-18 18:39:07,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=499900.0, ans=0.0 +2024-09-18 18:39:15,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=499940.0, ans=0.125 +2024-09-18 18:39:34,536 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.662e+01 9.200e+01 9.823e+01 1.916e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-18 18:39:36,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=499980.0, ans=0.1 +2024-09-18 18:39:40,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.24 vs. limit=15.0 +2024-09-18 18:39:43,443 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.30 vs. limit=10.0 +2024-09-18 18:40:17,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.13 vs. limit=22.5 +2024-09-18 18:40:18,068 INFO [train.py:1198] (0/2) Epoch 28, batch 2850, loss[loss=0.234, ctc_loss=0.1178, cr_loss=0.351, attn_decoder_loss=0.2391, over 29508.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1244, cr_loss=0.367, attn_decoder_loss=0.2448, over 5762628.05 frames. ], batch size: 77, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:40:19,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=500100.0, ans=0.125 +2024-09-18 18:40:28,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=500100.0, ans=0.125 +2024-09-18 18:40:45,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=500140.0, ans=0.125 +2024-09-18 18:40:48,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=500180.0, ans=0.0 +2024-09-18 18:40:51,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=500180.0, ans=0.1 +2024-09-18 18:40:51,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=500180.0, ans=0.0 +2024-09-18 18:41:03,460 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.42 vs. limit=8.0 +2024-09-18 18:41:04,351 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.61 vs. limit=15.0 +2024-09-18 18:41:33,987 INFO [train.py:1198] (0/2) Epoch 28, batch 2900, loss[loss=0.2401, ctc_loss=0.1278, cr_loss=0.3883, attn_decoder_loss=0.2439, over 29392.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1253, cr_loss=0.3692, attn_decoder_loss=0.2461, over 5787904.27 frames. ], batch size: 79, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:41:50,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=500340.0, ans=0.125 +2024-09-18 18:42:10,954 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.571e+01 8.982e+01 9.611e+01 1.691e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-18 18:42:22,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=500420.0, ans=6.0 +2024-09-18 18:42:23,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=500420.0, ans=0.125 +2024-09-18 18:42:35,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=500460.0, ans=0.025 +2024-09-18 18:42:51,879 INFO [train.py:1198] (0/2) Epoch 28, batch 2950, loss[loss=0.2227, ctc_loss=0.1094, cr_loss=0.333, attn_decoder_loss=0.2279, over 29517.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1245, cr_loss=0.3675, attn_decoder_loss=0.2448, over 5782167.21 frames. ], batch size: 75, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:42:59,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.51 vs. limit=12.0 +2024-09-18 18:43:06,415 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=15.0 +2024-09-18 18:43:14,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=500540.0, ans=0.125 +2024-09-18 18:43:33,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=500580.0, ans=15.0 +2024-09-18 18:43:37,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=500620.0, ans=0.1 +2024-09-18 18:43:51,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=500660.0, ans=0.125 +2024-09-18 18:43:59,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=500660.0, ans=0.2 +2024-09-18 18:44:06,307 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.25 vs. limit=22.5 +2024-09-18 18:44:10,171 INFO [train.py:1198] (0/2) Epoch 28, batch 3000, loss[loss=0.232, ctc_loss=0.1159, cr_loss=0.342, attn_decoder_loss=0.2373, over 29756.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1244, cr_loss=0.3674, attn_decoder_loss=0.2449, over 5783475.66 frames. ], batch size: 81, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:44:10,171 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 18:44:28,709 INFO [train.py:1230] (0/2) Epoch 28, validation: loss=0.2115, ctc_loss=0.03821, cr_loss=5.852e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-18 18:44:28,709 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 18:44:44,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=500740.0, ans=0.125 +2024-09-18 18:44:50,942 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.33 vs. limit=15.0 +2024-09-18 18:45:03,651 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 8.580e+01 9.034e+01 9.618e+01 2.130e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-18 18:45:07,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=500780.0, ans=0.125 +2024-09-18 18:45:40,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-18 18:45:45,098 INFO [train.py:1198] (0/2) Epoch 28, batch 3050, loss[loss=0.2237, ctc_loss=0.1216, cr_loss=0.3597, attn_decoder_loss=0.227, over 29540.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1247, cr_loss=0.3675, attn_decoder_loss=0.2452, over 5776660.39 frames. ], batch size: 76, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:46:02,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=500940.0, ans=0.0 +2024-09-18 18:46:05,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=500940.0, ans=0.125 +2024-09-18 18:46:07,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=500940.0, ans=0.1 +2024-09-18 18:46:24,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=500980.0, ans=0.0 +2024-09-18 18:46:44,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-18 18:46:49,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=501060.0, ans=0.2 +2024-09-18 18:46:50,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.80 vs. limit=22.5 +2024-09-18 18:46:51,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=501060.0, ans=0.125 +2024-09-18 18:47:02,941 INFO [train.py:1198] (0/2) Epoch 28, batch 3100, loss[loss=0.2552, ctc_loss=0.1411, cr_loss=0.4208, attn_decoder_loss=0.2585, over 29269.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1246, cr_loss=0.3674, attn_decoder_loss=0.2449, over 5776854.86 frames. ], batch size: 100, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:47:04,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=501100.0, ans=0.025 +2024-09-18 18:47:27,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=501140.0, ans=0.125 +2024-09-18 18:47:37,590 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.256e+01 8.481e+01 8.983e+01 9.463e+01 1.324e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 18:48:19,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=501300.0, ans=0.125 +2024-09-18 18:48:20,792 INFO [train.py:1198] (0/2) Epoch 28, batch 3150, loss[loss=0.251, ctc_loss=0.1259, cr_loss=0.3646, attn_decoder_loss=0.2568, over 28888.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1243, cr_loss=0.3665, attn_decoder_loss=0.2448, over 5784126.65 frames. ], batch size: 104, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:48:37,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=501340.0, ans=0.0 +2024-09-18 18:49:15,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=501420.0, ans=0.05 +2024-09-18 18:49:33,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=501460.0, ans=0.0 +2024-09-18 18:49:36,033 INFO [train.py:1198] (0/2) Epoch 28, batch 3200, loss[loss=0.2344, ctc_loss=0.1202, cr_loss=0.3635, attn_decoder_loss=0.239, over 29415.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.124, cr_loss=0.3662, attn_decoder_loss=0.2445, over 5794840.10 frames. ], batch size: 79, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:49:52,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=501540.0, ans=0.125 +2024-09-18 18:49:53,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=501540.0, ans=0.125 +2024-09-18 18:50:13,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.202e+01 8.510e+01 8.995e+01 9.300e+01 1.777e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 18:50:18,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=501580.0, ans=0.2 +2024-09-18 18:50:26,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=501620.0, ans=0.125 +2024-09-18 18:50:44,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=501660.0, ans=0.125 +2024-09-18 18:50:44,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=501660.0, ans=10.0 +2024-09-18 18:50:54,444 INFO [train.py:1198] (0/2) Epoch 28, batch 3250, loss[loss=0.2594, ctc_loss=0.1422, cr_loss=0.419, attn_decoder_loss=0.2631, over 29723.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1238, cr_loss=0.3665, attn_decoder_loss=0.2448, over 5801708.42 frames. ], batch size: 84, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:51:00,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=501700.0, ans=0.1 +2024-09-18 18:51:06,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=501700.0, ans=0.0 +2024-09-18 18:51:19,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.38 vs. limit=22.5 +2024-09-18 18:51:39,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=501820.0, ans=0.1 +2024-09-18 18:51:50,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=501820.0, ans=0.0 +2024-09-18 18:51:52,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=501820.0, ans=0.125 +2024-09-18 18:52:02,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=501860.0, ans=0.07 +2024-09-18 18:52:11,768 INFO [train.py:1198] (0/2) Epoch 28, batch 3300, loss[loss=0.247, ctc_loss=0.1233, cr_loss=0.3717, attn_decoder_loss=0.2525, over 28231.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1232, cr_loss=0.3647, attn_decoder_loss=0.2438, over 5797954.20 frames. ], batch size: 111, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:52:12,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=501900.0, ans=0.125 +2024-09-18 18:52:21,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=501900.0, ans=0.05 +2024-09-18 18:52:25,956 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:52:38,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=501940.0, ans=0.025 +2024-09-18 18:52:44,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=501980.0, ans=0.125 +2024-09-18 18:52:48,162 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.472e+01 9.021e+01 9.788e+01 2.409e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-18 18:53:27,371 INFO [train.py:1198] (0/2) Epoch 28, batch 3350, loss[loss=0.2509, ctc_loss=0.1344, cr_loss=0.3869, attn_decoder_loss=0.2552, over 28809.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1242, cr_loss=0.3669, attn_decoder_loss=0.2447, over 5774198.39 frames. ], batch size: 104, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:53:30,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=502100.0, ans=0.125 +2024-09-18 18:53:35,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=502100.0, ans=0.125 +2024-09-18 18:53:42,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=502100.0, ans=0.125 +2024-09-18 18:53:45,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=502140.0, ans=0.2 +2024-09-18 18:53:54,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=502140.0, ans=0.125 +2024-09-18 18:54:03,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=502180.0, ans=0.2 +2024-09-18 18:54:10,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.69 vs. limit=5.0 +2024-09-18 18:54:13,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=502220.0, ans=0.0 +2024-09-18 18:54:22,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.42 vs. limit=22.5 +2024-09-18 18:54:45,498 INFO [train.py:1198] (0/2) Epoch 28, batch 3400, loss[loss=0.2101, ctc_loss=0.1103, cr_loss=0.3282, attn_decoder_loss=0.2139, over 29358.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1244, cr_loss=0.3673, attn_decoder_loss=0.2448, over 5766045.93 frames. ], batch size: 67, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:54:57,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=502300.0, ans=0.1 +2024-09-18 18:55:09,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=502340.0, ans=0.025 +2024-09-18 18:55:14,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=502380.0, ans=0.0 +2024-09-18 18:55:20,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=502380.0, ans=0.2 +2024-09-18 18:55:21,609 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.432e+01 8.459e+01 8.977e+01 9.782e+01 2.197e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 18:55:25,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=502380.0, ans=0.125 +2024-09-18 18:55:30,015 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.60 vs. limit=6.0 +2024-09-18 18:55:54,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=502460.0, ans=0.125 +2024-09-18 18:55:57,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=502460.0, ans=0.0 +2024-09-18 18:56:03,380 INFO [train.py:1198] (0/2) Epoch 28, batch 3450, loss[loss=0.2498, ctc_loss=0.1215, cr_loss=0.362, attn_decoder_loss=0.256, over 28386.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1245, cr_loss=0.3679, attn_decoder_loss=0.2452, over 5774539.48 frames. ], batch size: 111, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:56:21,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=502540.0, ans=0.125 +2024-09-18 18:56:23,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=502540.0, ans=0.025 +2024-09-18 18:56:23,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=502540.0, ans=0.125 +2024-09-18 18:56:33,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=502580.0, ans=0.2 +2024-09-18 18:57:18,866 INFO [train.py:1198] (0/2) Epoch 28, batch 3500, loss[loss=0.2173, ctc_loss=0.1061, cr_loss=0.3361, attn_decoder_loss=0.2222, over 29315.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1242, cr_loss=0.3671, attn_decoder_loss=0.2447, over 5776732.07 frames. ], batch size: 71, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:57:22,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=502700.0, ans=0.1 +2024-09-18 18:57:25,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.99 vs. limit=10.0 +2024-09-18 18:57:26,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=502700.0, ans=0.1 +2024-09-18 18:57:50,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=502780.0, ans=0.09899494936611666 +2024-09-18 18:57:57,151 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.602e+01 9.014e+01 9.488e+01 1.440e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 18:58:01,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=502780.0, ans=0.1 +2024-09-18 18:58:05,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=502820.0, ans=0.125 +2024-09-18 18:58:26,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.48 vs. limit=15.0 +2024-09-18 18:58:30,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=502860.0, ans=0.125 +2024-09-18 18:58:35,919 INFO [train.py:1198] (0/2) Epoch 28, batch 3550, loss[loss=0.2478, ctc_loss=0.1243, cr_loss=0.3623, attn_decoder_loss=0.2535, over 29698.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1241, cr_loss=0.3671, attn_decoder_loss=0.2447, over 5782739.46 frames. ], batch size: 89, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:58:45,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-18 18:59:12,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=502980.0, ans=0.125 +2024-09-18 18:59:12,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=502980.0, ans=0.125 +2024-09-18 18:59:13,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=502980.0, ans=0.0 +2024-09-18 18:59:17,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=502980.0, ans=0.125 +2024-09-18 18:59:40,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=503060.0, ans=0.2 +2024-09-18 18:59:40,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=503060.0, ans=0.1 +2024-09-18 18:59:43,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.73 vs. limit=15.0 +2024-09-18 18:59:50,231 INFO [train.py:1198] (0/2) Epoch 28, batch 3600, loss[loss=0.2448, ctc_loss=0.1248, cr_loss=0.3574, attn_decoder_loss=0.2502, over 29497.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1239, cr_loss=0.3665, attn_decoder_loss=0.2445, over 5792869.46 frames. ], batch size: 77, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:59:57,420 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.45 vs. limit=15.0 +2024-09-18 19:00:02,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=503100.0, ans=0.5 +2024-09-18 19:00:05,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=503140.0, ans=0.2 +2024-09-18 19:00:09,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.39 vs. limit=10.0 +2024-09-18 19:00:18,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=503180.0, ans=0.125 +2024-09-18 19:00:21,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=503180.0, ans=0.05 +2024-09-18 19:00:22,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.36 vs. limit=10.0 +2024-09-18 19:00:23,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=503180.0, ans=0.0 +2024-09-18 19:00:25,999 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.358e+01 8.868e+01 9.352e+01 4.010e+02, threshold=1.774e+02, percent-clipped=1.0 +2024-09-18 19:00:28,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=503180.0, ans=0.0 +2024-09-18 19:00:28,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=503180.0, ans=0.125 +2024-09-18 19:00:56,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.63 vs. limit=15.0 +2024-09-18 19:00:59,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=503260.0, ans=10.0 +2024-09-18 19:01:04,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=503260.0, ans=0.0 +2024-09-18 19:01:07,132 INFO [train.py:1198] (0/2) Epoch 28, batch 3650, loss[loss=0.2533, ctc_loss=0.132, cr_loss=0.3903, attn_decoder_loss=0.2582, over 29506.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1236, cr_loss=0.3664, attn_decoder_loss=0.244, over 5794803.36 frames. ], batch size: 90, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 19:01:08,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=503300.0, ans=0.09899494936611666 +2024-09-18 19:01:34,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=503340.0, ans=0.1 +2024-09-18 19:01:41,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=503380.0, ans=0.1 +2024-09-18 19:01:47,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=503380.0, ans=0.2 +2024-09-18 19:01:48,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.96 vs. limit=22.5 +2024-09-18 19:02:07,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=503460.0, ans=0.1 +2024-09-18 19:02:15,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=503460.0, ans=0.0 +2024-09-18 19:02:21,716 INFO [train.py:1198] (0/2) Epoch 28, batch 3700, loss[loss=0.2426, ctc_loss=0.1281, cr_loss=0.3688, attn_decoder_loss=0.2471, over 29698.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1237, cr_loss=0.3663, attn_decoder_loss=0.244, over 5804670.52 frames. ], batch size: 84, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:02:27,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=503500.0, ans=0.1 +2024-09-18 19:02:32,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.80 vs. limit=22.5 +2024-09-18 19:02:34,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.31 vs. limit=6.0 +2024-09-18 19:02:58,673 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.604e+01 9.187e+01 9.989e+01 2.860e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 19:03:07,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=503620.0, ans=0.0 +2024-09-18 19:03:10,090 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.63 vs. limit=15.0 +2024-09-18 19:03:16,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=503620.0, ans=0.125 +2024-09-18 19:03:26,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.38 vs. limit=15.0 +2024-09-18 19:03:36,037 INFO [train.py:1198] (0/2) Epoch 28, batch 3750, loss[loss=0.2046, ctc_loss=0.09922, cr_loss=0.2978, attn_decoder_loss=0.2097, over 29399.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1234, cr_loss=0.3658, attn_decoder_loss=0.2438, over 5808098.48 frames. ], batch size: 67, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:03:49,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=503740.0, ans=0.125 +2024-09-18 19:03:52,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=503740.0, ans=0.125 +2024-09-18 19:04:03,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=503740.0, ans=0.0 +2024-09-18 19:04:05,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=503780.0, ans=0.125 +2024-09-18 19:04:22,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=503820.0, ans=0.1 +2024-09-18 19:04:52,049 INFO [train.py:1198] (0/2) Epoch 28, batch 3800, loss[loss=0.251, ctc_loss=0.1273, cr_loss=0.3685, attn_decoder_loss=0.2565, over 29638.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1233, cr_loss=0.3653, attn_decoder_loss=0.2435, over 5798447.89 frames. ], batch size: 86, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:05:15,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.84 vs. limit=22.5 +2024-09-18 19:05:18,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.24 vs. limit=15.0 +2024-09-18 19:05:25,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=503980.0, ans=0.0 +2024-09-18 19:05:29,965 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.108e+01 8.413e+01 8.933e+01 9.626e+01 3.409e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 19:05:57,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=504060.0, ans=0.0 +2024-09-18 19:06:01,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=504060.0, ans=0.125 +2024-09-18 19:06:01,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=504060.0, ans=0.1 +2024-09-18 19:06:01,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=504060.0, ans=0.125 +2024-09-18 19:06:07,082 INFO [train.py:1198] (0/2) Epoch 28, batch 3850, loss[loss=0.2592, ctc_loss=0.1268, cr_loss=0.3826, attn_decoder_loss=0.2654, over 29308.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1229, cr_loss=0.365, attn_decoder_loss=0.2434, over 5811688.22 frames. ], batch size: 100, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:06:13,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=504100.0, ans=0.125 +2024-09-18 19:06:20,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=504100.0, ans=0.0 +2024-09-18 19:06:34,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=504140.0, ans=0.0 +2024-09-18 19:06:37,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=504180.0, ans=0.0 +2024-09-18 19:06:42,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-18 19:06:59,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=504220.0, ans=0.035 +2024-09-18 19:07:14,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=504260.0, ans=0.0 +2024-09-18 19:07:23,016 INFO [train.py:1198] (0/2) Epoch 28, batch 3900, loss[loss=0.2608, ctc_loss=0.1369, cr_loss=0.3969, attn_decoder_loss=0.2658, over 29648.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1235, cr_loss=0.3664, attn_decoder_loss=0.244, over 5815810.11 frames. ], batch size: 86, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:07:36,038 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.05 vs. limit=15.0 +2024-09-18 19:07:48,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=504340.0, ans=0.125 +2024-09-18 19:07:56,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.24 vs. limit=6.0 +2024-09-18 19:08:00,192 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.164e+01 8.446e+01 8.921e+01 9.410e+01 1.233e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 19:08:03,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=504380.0, ans=0.2 +2024-09-18 19:08:37,164 INFO [train.py:1198] (0/2) Epoch 28, batch 3950, loss[loss=0.2503, ctc_loss=0.133, cr_loss=0.3781, attn_decoder_loss=0.2549, over 29475.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1234, cr_loss=0.3665, attn_decoder_loss=0.2441, over 5835375.78 frames. ], batch size: 97, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:08:37,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=504500.0, ans=0.05 +2024-09-18 19:08:50,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=504540.0, ans=0.125 +2024-09-18 19:08:52,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=504540.0, ans=0.125 +2024-09-18 19:08:58,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=504540.0, ans=0.05 +2024-09-18 19:09:01,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=504540.0, ans=0.125 +2024-09-18 19:09:06,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.54 vs. limit=22.5 +2024-09-18 19:09:15,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=504580.0, ans=0.0 +2024-09-18 19:09:30,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=504620.0, ans=0.025 +2024-09-18 19:09:48,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.07 vs. limit=15.0 +2024-09-18 19:09:52,195 INFO [train.py:1198] (0/2) Epoch 28, batch 4000, loss[loss=0.224, ctc_loss=0.1108, cr_loss=0.3488, attn_decoder_loss=0.2288, over 29490.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1234, cr_loss=0.3661, attn_decoder_loss=0.2441, over 5813130.76 frames. ], batch size: 74, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:09:52,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=504700.0, ans=0.2 +2024-09-18 19:09:59,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=504700.0, ans=0.0 +2024-09-18 19:10:15,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=504740.0, ans=0.125 +2024-09-18 19:10:28,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=504780.0, ans=0.0 +2024-09-18 19:10:29,489 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.096e+01 8.633e+01 9.036e+01 9.608e+01 3.784e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 19:10:34,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=504780.0, ans=0.1 +2024-09-18 19:10:42,325 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.52 vs. limit=15.0 +2024-09-18 19:10:43,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=504820.0, ans=0.125 +2024-09-18 19:11:08,042 INFO [train.py:1198] (0/2) Epoch 28, batch 4050, loss[loss=0.2603, ctc_loss=0.1538, cr_loss=0.3771, attn_decoder_loss=0.2637, over 20134.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1231, cr_loss=0.3652, attn_decoder_loss=0.2438, over 5797189.24 frames. ], batch size: 209, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:11:11,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=504900.0, ans=0.09899494936611666 +2024-09-18 19:11:21,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=504940.0, ans=0.025 +2024-09-18 19:11:27,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=504940.0, ans=0.0 +2024-09-18 19:11:34,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=15.0 +2024-09-18 19:11:50,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=505020.0, ans=0.125 +2024-09-18 19:12:05,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.27 vs. limit=15.0 +2024-09-18 19:12:21,990 INFO [train.py:1198] (0/2) Epoch 28, batch 4100, loss[loss=0.2532, ctc_loss=0.1367, cr_loss=0.4024, attn_decoder_loss=0.2572, over 29516.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1236, cr_loss=0.3662, attn_decoder_loss=0.2442, over 5792516.23 frames. ], batch size: 90, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:12:30,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=505100.0, ans=0.2 +2024-09-18 19:12:58,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=505180.0, ans=0.05 +2024-09-18 19:13:00,178 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.509e+01 9.171e+01 9.842e+01 2.303e+02, threshold=1.834e+02, percent-clipped=2.0 +2024-09-18 19:13:13,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=505220.0, ans=0.1 +2024-09-18 19:13:21,152 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:13:35,870 INFO [train.py:1198] (0/2) Epoch 28, batch 4150, loss[loss=0.2361, ctc_loss=0.1182, cr_loss=0.3577, attn_decoder_loss=0.2412, over 29520.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1231, cr_loss=0.3652, attn_decoder_loss=0.2437, over 5797279.06 frames. ], batch size: 77, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:13:49,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=505340.0, ans=0.1 +2024-09-18 19:13:49,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.79 vs. limit=15.0 +2024-09-18 19:14:19,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=505420.0, ans=0.125 +2024-09-18 19:14:31,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=505420.0, ans=0.125 +2024-09-18 19:14:33,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=505420.0, ans=0.0 +2024-09-18 19:14:48,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=505460.0, ans=10.0 +2024-09-18 19:14:50,771 INFO [train.py:1198] (0/2) Epoch 28, batch 4200, loss[loss=0.2499, ctc_loss=0.1316, cr_loss=0.3802, attn_decoder_loss=0.2546, over 29502.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1233, cr_loss=0.3655, attn_decoder_loss=0.244, over 5800059.80 frames. ], batch size: 90, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:15:30,601 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.561e+01 9.045e+01 9.717e+01 1.244e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 19:15:35,730 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.19 vs. limit=12.0 +2024-09-18 19:15:55,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=505660.0, ans=0.125 +2024-09-18 19:15:56,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=505660.0, ans=0.0 +2024-09-18 19:16:00,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=505660.0, ans=0.1 +2024-09-18 19:16:04,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=505700.0, ans=0.1 +2024-09-18 19:16:06,011 INFO [train.py:1198] (0/2) Epoch 28, batch 4250, loss[loss=0.2272, ctc_loss=0.1114, cr_loss=0.3443, attn_decoder_loss=0.2325, over 29495.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1233, cr_loss=0.3652, attn_decoder_loss=0.2441, over 5805515.06 frames. ], batch size: 74, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:16:15,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=505700.0, ans=0.95 +2024-09-18 19:16:17,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=505700.0, ans=0.2 +2024-09-18 19:16:20,971 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:16:44,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=505780.0, ans=0.0 +2024-09-18 19:16:51,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=505820.0, ans=0.0 +2024-09-18 19:17:06,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=505860.0, ans=0.125 +2024-09-18 19:17:06,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=505860.0, ans=0.2 +2024-09-18 19:17:19,609 INFO [train.py:1198] (0/2) Epoch 28, batch 4300, loss[loss=0.2499, ctc_loss=0.1328, cr_loss=0.3975, attn_decoder_loss=0.2541, over 29508.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1231, cr_loss=0.3646, attn_decoder_loss=0.2442, over 5794061.05 frames. ], batch size: 87, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:17:23,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.02 vs. limit=15.0 +2024-09-18 19:17:46,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=505940.0, ans=0.125 +2024-09-18 19:17:58,958 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.536e+01 8.600e+01 9.054e+01 9.453e+01 1.609e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 19:17:59,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=505980.0, ans=0.125 +2024-09-18 19:18:15,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=506020.0, ans=0.125 +2024-09-18 19:18:18,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=506060.0, ans=0.2 +2024-09-18 19:18:35,179 INFO [train.py:1198] (0/2) Epoch 28, batch 4350, loss[loss=0.2574, ctc_loss=0.1333, cr_loss=0.3825, attn_decoder_loss=0.2627, over 29479.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1261, cr_loss=0.3708, attn_decoder_loss=0.2476, over 5796466.00 frames. ], batch size: 97, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:18:41,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=506100.0, ans=0.1 +2024-09-18 19:18:51,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=506140.0, ans=0.0 +2024-09-18 19:19:04,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.13 vs. limit=15.0 +2024-09-18 19:19:09,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=506180.0, ans=0.125 +2024-09-18 19:19:12,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=506180.0, ans=0.125 +2024-09-18 19:19:24,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=506220.0, ans=0.125 +2024-09-18 19:19:27,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=506220.0, ans=0.0 +2024-09-18 19:19:35,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=506260.0, ans=0.0 +2024-09-18 19:19:43,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.27 vs. limit=15.0 +2024-09-18 19:19:45,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.74 vs. limit=12.0 +2024-09-18 19:19:47,497 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:19:48,761 INFO [train.py:1198] (0/2) Epoch 28, batch 4400, loss[loss=0.258, ctc_loss=0.1499, cr_loss=0.4174, attn_decoder_loss=0.2607, over 27663.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1279, cr_loss=0.3741, attn_decoder_loss=0.25, over 5767731.37 frames. ], batch size: 125, lr: 3.92e-03, grad_scale: 16.0 +2024-09-18 19:20:09,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.84 vs. limit=22.5 +2024-09-18 19:20:20,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.11 vs. limit=15.0 +2024-09-18 19:20:28,784 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.311e+01 8.874e+01 9.241e+01 9.772e+01 1.532e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-18 19:20:30,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=506380.0, ans=0.2 +2024-09-18 19:20:50,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=506460.0, ans=0.0 +2024-09-18 19:20:51,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=506460.0, ans=0.125 +2024-09-18 19:20:53,459 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:21:03,587 INFO [train.py:1198] (0/2) Epoch 28, batch 4450, loss[loss=0.2586, ctc_loss=0.1486, cr_loss=0.3782, attn_decoder_loss=0.2624, over 20815.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1318, cr_loss=0.3791, attn_decoder_loss=0.2523, over 5582972.97 frames. ], batch size: 209, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:21:11,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=506500.0, ans=0.0 +2024-09-18 19:21:17,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=506540.0, ans=0.07 +2024-09-18 19:21:26,531 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:21:44,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.12 vs. limit=15.0 +2024-09-18 19:21:59,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=506620.0, ans=0.125 +2024-09-18 19:22:12,184 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.76 vs. limit=10.0 +2024-09-18 19:22:18,779 INFO [train.py:1198] (0/2) Epoch 28, batch 4500, loss[loss=0.2588, ctc_loss=0.1528, cr_loss=0.3801, attn_decoder_loss=0.2622, over 19913.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1358, cr_loss=0.382, attn_decoder_loss=0.2544, over 5239747.58 frames. ], batch size: 209, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:22:19,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=506700.0, ans=0.035 +2024-09-18 19:22:29,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=506700.0, ans=0.025 +2024-09-18 19:22:38,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=506740.0, ans=0.0 +2024-09-18 19:22:47,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=506780.0, ans=0.1 +2024-09-18 19:22:55,543 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-28.pt +2024-09-18 19:23:47,611 INFO [train.py:1198] (0/2) Epoch 29, batch 0, loss[loss=0.2176, ctc_loss=0.1034, cr_loss=0.3261, attn_decoder_loss=0.2231, over 29605.00 frames. ], tot_loss[loss=0.2176, ctc_loss=0.1034, cr_loss=0.3261, attn_decoder_loss=0.2231, over 29605.00 frames. ], batch size: 73, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:23:47,612 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 19:23:50,221 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([3.2259, 3.5959, 3.4950, 3.7498, 3.6269, 3.7059, 2.9567, 3.8993], + device='cuda:0') +2024-09-18 19:23:53,418 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.4153, 4.8573, 5.4239, 5.1193], device='cuda:0') +2024-09-18 19:24:06,126 INFO [train.py:1230] (0/2) Epoch 29, validation: loss=0.2126, ctc_loss=0.03746, cr_loss=5.58e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-18 19:24:06,126 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 19:24:09,039 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.797e+01 1.050e+02 1.169e+02 1.299e+02 2.763e+02, threshold=2.337e+02, percent-clipped=3.0 +2024-09-18 19:24:15,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=506800.0, ans=0.125 +2024-09-18 19:24:47,401 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:25:03,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=506920.0, ans=0.0 +2024-09-18 19:25:08,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=506960.0, ans=0.0 +2024-09-18 19:25:12,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=506960.0, ans=0.125 +2024-09-18 19:25:21,688 INFO [train.py:1198] (0/2) Epoch 29, batch 50, loss[loss=0.2132, ctc_loss=0.1017, cr_loss=0.3097, attn_decoder_loss=0.2187, over 29425.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1267, cr_loss=0.3706, attn_decoder_loss=0.2456, over 1266174.42 frames. ], batch size: 70, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:25:23,919 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-18 19:25:31,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=507000.0, ans=0.125 +2024-09-18 19:25:46,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=507040.0, ans=0.125 +2024-09-18 19:26:07,548 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.72 vs. limit=22.5 +2024-09-18 19:26:09,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.38 vs. limit=12.0 +2024-09-18 19:26:13,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=507120.0, ans=0.2 +2024-09-18 19:26:16,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.22 vs. limit=22.5 +2024-09-18 19:26:41,684 INFO [train.py:1198] (0/2) Epoch 29, batch 100, loss[loss=0.2289, ctc_loss=0.1162, cr_loss=0.3557, attn_decoder_loss=0.2335, over 29527.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.127, cr_loss=0.3722, attn_decoder_loss=0.2471, over 2251765.49 frames. ], batch size: 76, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:26:46,194 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.735e+01 9.318e+01 1.000e+02 1.586e+02, threshold=1.864e+02, percent-clipped=0.0 +2024-09-18 19:26:52,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=507200.0, ans=0.0 +2024-09-18 19:27:09,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=507240.0, ans=0.125 +2024-09-18 19:27:24,609 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-18 19:27:24,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.44 vs. limit=15.0 +2024-09-18 19:27:49,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=507360.0, ans=0.2 +2024-09-18 19:27:55,320 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:27:56,472 INFO [train.py:1198] (0/2) Epoch 29, batch 150, loss[loss=0.22, ctc_loss=0.1109, cr_loss=0.349, attn_decoder_loss=0.2244, over 29423.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1247, cr_loss=0.3675, attn_decoder_loss=0.2446, over 3046372.31 frames. ], batch size: 70, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:27:58,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.97 vs. limit=15.0 +2024-09-18 19:27:59,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=507400.0, ans=0.0 +2024-09-18 19:28:01,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.90 vs. limit=10.0 +2024-09-18 19:28:02,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=507400.0, ans=0.125 +2024-09-18 19:28:14,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=507440.0, ans=0.2 +2024-09-18 19:28:37,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.13 vs. limit=15.0 +2024-09-18 19:29:02,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=507560.0, ans=0.125 +2024-09-18 19:29:07,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.22 vs. limit=22.5 +2024-09-18 19:29:11,259 INFO [train.py:1198] (0/2) Epoch 29, batch 200, loss[loss=0.2569, ctc_loss=0.1385, cr_loss=0.403, attn_decoder_loss=0.2611, over 27264.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1244, cr_loss=0.3673, attn_decoder_loss=0.2444, over 3658782.65 frames. ], batch size: 124, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:29:15,707 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.478e+01 8.328e+01 8.818e+01 9.310e+01 1.091e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-18 19:29:29,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.69 vs. limit=22.5 +2024-09-18 19:29:37,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.31 vs. limit=22.5 +2024-09-18 19:29:53,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=507680.0, ans=0.125 +2024-09-18 19:30:04,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=507720.0, ans=0.0 +2024-09-18 19:30:12,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=507720.0, ans=0.2 +2024-09-18 19:30:15,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=507760.0, ans=0.2 +2024-09-18 19:30:26,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.20 vs. limit=10.0 +2024-09-18 19:30:31,825 INFO [train.py:1198] (0/2) Epoch 29, batch 250, loss[loss=0.2508, ctc_loss=0.1343, cr_loss=0.3801, attn_decoder_loss=0.2553, over 29268.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1242, cr_loss=0.3674, attn_decoder_loss=0.2442, over 4140732.98 frames. ], batch size: 100, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:30:33,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=507800.0, ans=0.125 +2024-09-18 19:30:33,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=507800.0, ans=0.1 +2024-09-18 19:30:37,162 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.03 vs. limit=15.0 +2024-09-18 19:30:44,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.18 vs. limit=22.5 +2024-09-18 19:30:47,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.13 vs. limit=10.0 +2024-09-18 19:30:53,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=507840.0, ans=0.125 +2024-09-18 19:30:57,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=507840.0, ans=0.1 +2024-09-18 19:31:18,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=507920.0, ans=0.125 +2024-09-18 19:31:26,619 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:31:30,099 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.53 vs. limit=15.0 +2024-09-18 19:31:47,704 INFO [train.py:1198] (0/2) Epoch 29, batch 300, loss[loss=0.2572, ctc_loss=0.1365, cr_loss=0.3873, attn_decoder_loss=0.262, over 29560.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1235, cr_loss=0.3662, attn_decoder_loss=0.2437, over 4509501.48 frames. ], batch size: 92, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:31:50,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=508000.0, ans=0.1 +2024-09-18 19:31:52,192 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.405e+01 8.844e+01 9.472e+01 2.622e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-18 19:31:57,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=508000.0, ans=0.125 +2024-09-18 19:32:33,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=508120.0, ans=0.125 +2024-09-18 19:32:39,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=508120.0, ans=0.125 +2024-09-18 19:33:03,340 INFO [train.py:1198] (0/2) Epoch 29, batch 350, loss[loss=0.2187, ctc_loss=0.1077, cr_loss=0.3303, attn_decoder_loss=0.2237, over 29326.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1239, cr_loss=0.3673, attn_decoder_loss=0.2442, over 4794953.18 frames. ], batch size: 71, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:33:25,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=508240.0, ans=0.125 +2024-09-18 19:33:36,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=508280.0, ans=15.0 +2024-09-18 19:33:46,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=508280.0, ans=0.07 +2024-09-18 19:34:10,078 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:34:17,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=508360.0, ans=0.2 +2024-09-18 19:34:23,108 INFO [train.py:1198] (0/2) Epoch 29, batch 400, loss[loss=0.2399, ctc_loss=0.1231, cr_loss=0.3818, attn_decoder_loss=0.2444, over 29692.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1234, cr_loss=0.3669, attn_decoder_loss=0.2441, over 5024398.64 frames. ], batch size: 82, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:34:27,752 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.140e+01 8.478e+01 8.916e+01 9.451e+01 2.866e+02, threshold=1.783e+02, percent-clipped=2.0 +2024-09-18 19:34:31,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=508400.0, ans=0.1 +2024-09-18 19:35:04,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=508480.0, ans=10.0 +2024-09-18 19:35:13,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=508520.0, ans=0.125 +2024-09-18 19:35:21,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=508520.0, ans=0.0 +2024-09-18 19:35:21,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.64 vs. limit=15.0 +2024-09-18 19:35:30,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=508560.0, ans=0.5 +2024-09-18 19:35:39,004 INFO [train.py:1198] (0/2) Epoch 29, batch 450, loss[loss=0.2427, ctc_loss=0.134, cr_loss=0.381, attn_decoder_loss=0.2463, over 29707.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1238, cr_loss=0.3674, attn_decoder_loss=0.2442, over 5187217.84 frames. ], batch size: 83, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:35:48,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=508600.0, ans=0.0 +2024-09-18 19:36:01,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.59 vs. limit=15.0 +2024-09-18 19:36:12,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=508680.0, ans=0.2 +2024-09-18 19:36:35,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.93 vs. limit=15.0 +2024-09-18 19:36:42,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=508760.0, ans=0.0 +2024-09-18 19:36:46,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=508760.0, ans=0.0 +2024-09-18 19:36:55,947 INFO [train.py:1198] (0/2) Epoch 29, batch 500, loss[loss=0.2564, ctc_loss=0.1331, cr_loss=0.3897, attn_decoder_loss=0.2614, over 29470.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1231, cr_loss=0.3659, attn_decoder_loss=0.2435, over 5330764.93 frames. ], batch size: 94, lr: 3.84e-03, grad_scale: 16.0 +2024-09-18 19:37:00,505 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.214e+01 8.526e+01 8.926e+01 9.589e+01 3.622e+02, threshold=1.785e+02, percent-clipped=3.0 +2024-09-18 19:37:15,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=508840.0, ans=0.125 +2024-09-18 19:37:34,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=508880.0, ans=0.125 +2024-09-18 19:38:14,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=509000.0, ans=0.0 +2024-09-18 19:38:15,951 INFO [train.py:1198] (0/2) Epoch 29, batch 550, loss[loss=0.2451, ctc_loss=0.1235, cr_loss=0.3577, attn_decoder_loss=0.2507, over 28788.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1231, cr_loss=0.3651, attn_decoder_loss=0.2435, over 5423925.04 frames. ], batch size: 104, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:38:25,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=509000.0, ans=0.125 +2024-09-18 19:38:37,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=509040.0, ans=0.0 +2024-09-18 19:38:56,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=509080.0, ans=0.0 +2024-09-18 19:38:59,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=509120.0, ans=0.125 +2024-09-18 19:39:31,419 INFO [train.py:1198] (0/2) Epoch 29, batch 600, loss[loss=0.253, ctc_loss=0.1328, cr_loss=0.3841, attn_decoder_loss=0.2578, over 29176.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.123, cr_loss=0.3654, attn_decoder_loss=0.2435, over 5508904.25 frames. ], batch size: 100, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:39:37,603 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.349e+01 8.468e+01 8.932e+01 9.529e+01 2.879e+02, threshold=1.786e+02, percent-clipped=3.0 +2024-09-18 19:39:47,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.96 vs. limit=15.0 +2024-09-18 19:39:55,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=509240.0, ans=0.125 +2024-09-18 19:40:18,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=509320.0, ans=0.2 +2024-09-18 19:40:25,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=509320.0, ans=0.07 +2024-09-18 19:40:28,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=509320.0, ans=0.2 +2024-09-18 19:40:36,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=509360.0, ans=0.125 +2024-09-18 19:40:46,516 INFO [train.py:1198] (0/2) Epoch 29, batch 650, loss[loss=0.2401, ctc_loss=0.1189, cr_loss=0.3609, attn_decoder_loss=0.2456, over 29747.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1223, cr_loss=0.3641, attn_decoder_loss=0.243, over 5586583.62 frames. ], batch size: 81, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:40:54,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=509400.0, ans=0.125 +2024-09-18 19:41:20,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=509480.0, ans=0.125 +2024-09-18 19:41:23,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=509480.0, ans=0.125 +2024-09-18 19:41:40,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=509520.0, ans=10.0 +2024-09-18 19:41:41,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=509520.0, ans=0.025 +2024-09-18 19:41:44,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=509520.0, ans=0.1 +2024-09-18 19:41:48,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=509560.0, ans=0.125 +2024-09-18 19:41:50,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=509560.0, ans=0.125 +2024-09-18 19:41:55,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=509560.0, ans=0.125 +2024-09-18 19:42:06,945 INFO [train.py:1198] (0/2) Epoch 29, batch 700, loss[loss=0.2286, ctc_loss=0.1199, cr_loss=0.3862, attn_decoder_loss=0.2321, over 29546.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1226, cr_loss=0.3648, attn_decoder_loss=0.2434, over 5637917.47 frames. ], batch size: 76, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:42:07,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=509600.0, ans=0.0 +2024-09-18 19:42:12,943 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.488e+01 8.956e+01 9.496e+01 1.572e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 19:42:16,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=509600.0, ans=0.0 +2024-09-18 19:42:22,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=509640.0, ans=0.0 +2024-09-18 19:42:27,011 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:42:30,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=509640.0, ans=0.125 +2024-09-18 19:42:36,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=509680.0, ans=0.0 +2024-09-18 19:42:43,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=509680.0, ans=0.125 +2024-09-18 19:42:51,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=509720.0, ans=0.0 +2024-09-18 19:42:51,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.11 vs. limit=15.0 +2024-09-18 19:42:53,435 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.42 vs. limit=15.0 +2024-09-18 19:42:59,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=509720.0, ans=0.2 +2024-09-18 19:43:23,118 INFO [train.py:1198] (0/2) Epoch 29, batch 750, loss[loss=0.2398, ctc_loss=0.1217, cr_loss=0.3667, attn_decoder_loss=0.2448, over 29703.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1221, cr_loss=0.3642, attn_decoder_loss=0.2429, over 5676840.54 frames. ], batch size: 82, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:43:40,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.22 vs. limit=15.0 +2024-09-18 19:43:45,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=509840.0, ans=0.0 +2024-09-18 19:44:11,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=509920.0, ans=0.0 +2024-09-18 19:44:11,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=509920.0, ans=0.125 +2024-09-18 19:44:38,393 INFO [train.py:1198] (0/2) Epoch 29, batch 800, loss[loss=0.2372, ctc_loss=0.1255, cr_loss=0.3862, attn_decoder_loss=0.241, over 29631.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1222, cr_loss=0.3645, attn_decoder_loss=0.2428, over 5708809.82 frames. ], batch size: 73, lr: 3.84e-03, grad_scale: 16.0 +2024-09-18 19:44:44,456 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.377e+01 8.861e+01 9.386e+01 4.532e+02, threshold=1.772e+02, percent-clipped=1.0 +2024-09-18 19:44:44,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=510000.0, ans=0.125 +2024-09-18 19:44:47,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=510000.0, ans=0.2 +2024-09-18 19:44:50,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=510000.0, ans=0.125 +2024-09-18 19:45:30,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=510120.0, ans=0.125 +2024-09-18 19:45:37,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.51 vs. limit=12.0 +2024-09-18 19:45:55,658 INFO [train.py:1198] (0/2) Epoch 29, batch 850, loss[loss=0.2504, ctc_loss=0.1304, cr_loss=0.3692, attn_decoder_loss=0.2556, over 29706.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1219, cr_loss=0.3634, attn_decoder_loss=0.2426, over 5736928.35 frames. ], batch size: 89, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:46:18,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=510240.0, ans=0.125 +2024-09-18 19:46:21,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=510240.0, ans=0.0 +2024-09-18 19:46:41,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=510320.0, ans=0.125 +2024-09-18 19:46:54,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=510320.0, ans=0.0 +2024-09-18 19:46:58,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=510360.0, ans=0.125 +2024-09-18 19:47:06,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.16 vs. limit=15.0 +2024-09-18 19:47:07,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=510360.0, ans=0.2 +2024-09-18 19:47:13,955 INFO [train.py:1198] (0/2) Epoch 29, batch 900, loss[loss=0.2249, ctc_loss=0.1218, cr_loss=0.3693, attn_decoder_loss=0.2281, over 29601.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1224, cr_loss=0.3644, attn_decoder_loss=0.2431, over 5740888.19 frames. ], batch size: 73, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:47:15,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=510400.0, ans=0.1 +2024-09-18 19:47:21,302 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.562e+01 8.540e+01 9.030e+01 9.336e+01 1.932e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 19:47:22,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.26 vs. limit=15.0 +2024-09-18 19:47:53,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.49 vs. limit=5.0 +2024-09-18 19:47:55,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=510480.0, ans=0.1 +2024-09-18 19:48:05,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=510520.0, ans=0.0 +2024-09-18 19:48:10,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=510520.0, ans=0.125 +2024-09-18 19:48:29,425 INFO [train.py:1198] (0/2) Epoch 29, batch 950, loss[loss=0.2207, ctc_loss=0.1067, cr_loss=0.347, attn_decoder_loss=0.2257, over 29519.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1223, cr_loss=0.3643, attn_decoder_loss=0.2431, over 5742586.58 frames. ], batch size: 74, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:48:44,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=510640.0, ans=0.2 +2024-09-18 19:49:15,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=510720.0, ans=0.125 +2024-09-18 19:49:18,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=510720.0, ans=0.125 +2024-09-18 19:49:19,422 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.29 vs. limit=6.0 +2024-09-18 19:49:30,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.whiten.whitening_limit, batch_count=510720.0, ans=15.0 +2024-09-18 19:49:46,882 INFO [train.py:1198] (0/2) Epoch 29, batch 1000, loss[loss=0.2411, ctc_loss=0.1288, cr_loss=0.3697, attn_decoder_loss=0.2454, over 29522.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1235, cr_loss=0.3665, attn_decoder_loss=0.2442, over 5736686.37 frames. ], batch size: 77, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:49:56,628 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.108e+01 8.627e+01 9.386e+01 1.009e+02 2.634e+02, threshold=1.877e+02, percent-clipped=2.0 +2024-09-18 19:50:21,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.91 vs. limit=15.0 +2024-09-18 19:50:33,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=510920.0, ans=0.1 +2024-09-18 19:50:45,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=510920.0, ans=0.035 +2024-09-18 19:50:51,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=510960.0, ans=0.0 +2024-09-18 19:51:04,703 INFO [train.py:1198] (0/2) Epoch 29, batch 1050, loss[loss=0.2552, ctc_loss=0.1383, cr_loss=0.3859, attn_decoder_loss=0.2597, over 29677.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1234, cr_loss=0.3663, attn_decoder_loss=0.2437, over 5744088.07 frames. ], batch size: 85, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:51:29,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=511040.0, ans=0.025 +2024-09-18 19:51:30,107 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.42 vs. limit=6.0 +2024-09-18 19:51:42,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.63 vs. limit=12.0 +2024-09-18 19:52:13,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=511160.0, ans=0.125 +2024-09-18 19:52:21,211 INFO [train.py:1198] (0/2) Epoch 29, batch 1100, loss[loss=0.2353, ctc_loss=0.1222, cr_loss=0.3697, attn_decoder_loss=0.2396, over 29446.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1232, cr_loss=0.3661, attn_decoder_loss=0.2433, over 5756633.96 frames. ], batch size: 78, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:52:28,710 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.758e+01 8.572e+01 8.922e+01 9.420e+01 4.206e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 19:52:38,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=511240.0, ans=0.0 +2024-09-18 19:52:41,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=511240.0, ans=0.1 +2024-09-18 19:52:57,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.04 vs. limit=15.0 +2024-09-18 19:52:59,113 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.14 vs. limit=15.0 +2024-09-18 19:53:17,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=511320.0, ans=0.0 +2024-09-18 19:53:28,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=511360.0, ans=0.0 +2024-09-18 19:53:38,729 INFO [train.py:1198] (0/2) Epoch 29, batch 1150, loss[loss=0.2392, ctc_loss=0.1231, cr_loss=0.3727, attn_decoder_loss=0.2438, over 29474.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1231, cr_loss=0.3656, attn_decoder_loss=0.2433, over 5754156.62 frames. ], batch size: 78, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:53:47,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.74 vs. limit=15.0 +2024-09-18 19:53:53,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=511400.0, ans=0.2 +2024-09-18 19:53:54,164 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.03 vs. limit=15.0 +2024-09-18 19:54:04,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=511440.0, ans=0.1 +2024-09-18 19:54:05,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=511440.0, ans=0.125 +2024-09-18 19:54:08,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.86 vs. limit=15.0 +2024-09-18 19:54:11,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=511480.0, ans=0.125 +2024-09-18 19:54:11,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=511480.0, ans=0.0 +2024-09-18 19:54:42,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=511560.0, ans=0.2 +2024-09-18 19:54:45,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.38 vs. limit=15.0 +2024-09-18 19:54:54,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=511560.0, ans=0.07 +2024-09-18 19:54:56,981 INFO [train.py:1198] (0/2) Epoch 29, batch 1200, loss[loss=0.2536, ctc_loss=0.135, cr_loss=0.4027, attn_decoder_loss=0.2578, over 29678.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1237, cr_loss=0.3662, attn_decoder_loss=0.2442, over 5746772.75 frames. ], batch size: 85, lr: 3.83e-03, grad_scale: 16.0 +2024-09-18 19:54:58,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=511600.0, ans=0.125 +2024-09-18 19:55:04,484 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.543e+01 9.016e+01 9.683e+01 2.653e+02, threshold=1.803e+02, percent-clipped=3.0 +2024-09-18 19:55:36,249 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.69 vs. limit=15.0 +2024-09-18 19:55:36,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=511680.0, ans=0.125 +2024-09-18 19:55:41,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=511720.0, ans=0.125 +2024-09-18 19:55:47,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=511720.0, ans=0.125 +2024-09-18 19:55:51,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=511720.0, ans=0.2 +2024-09-18 19:56:09,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=511760.0, ans=0.125 +2024-09-18 19:56:12,586 INFO [train.py:1198] (0/2) Epoch 29, batch 1250, loss[loss=0.2493, ctc_loss=0.1279, cr_loss=0.3834, attn_decoder_loss=0.2543, over 29486.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1238, cr_loss=0.367, attn_decoder_loss=0.2448, over 5774760.91 frames. ], batch size: 92, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:56:14,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=511800.0, ans=0.1 +2024-09-18 19:56:22,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=511800.0, ans=0.025 +2024-09-18 19:56:35,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=511840.0, ans=0.0 +2024-09-18 19:57:05,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=511920.0, ans=0.125 +2024-09-18 19:57:09,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=511920.0, ans=0.0 +2024-09-18 19:57:29,765 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-128000.pt +2024-09-18 19:57:38,311 INFO [train.py:1198] (0/2) Epoch 29, batch 1300, loss[loss=0.2469, ctc_loss=0.1162, cr_loss=0.3499, attn_decoder_loss=0.2536, over 28228.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1237, cr_loss=0.3671, attn_decoder_loss=0.2443, over 5779861.04 frames. ], batch size: 111, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:57:47,484 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.526e+01 8.940e+01 9.401e+01 4.173e+02, threshold=1.788e+02, percent-clipped=2.0 +2024-09-18 19:58:17,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=512080.0, ans=0.2 +2024-09-18 19:58:19,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=512080.0, ans=0.125 +2024-09-18 19:58:23,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=512080.0, ans=0.0 +2024-09-18 19:58:29,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=512120.0, ans=0.0 +2024-09-18 19:58:38,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=512120.0, ans=0.1 +2024-09-18 19:58:52,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.16 vs. limit=12.0 +2024-09-18 19:58:56,558 INFO [train.py:1198] (0/2) Epoch 29, batch 1350, loss[loss=0.2392, ctc_loss=0.1222, cr_loss=0.3634, attn_decoder_loss=0.2441, over 29760.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1232, cr_loss=0.3665, attn_decoder_loss=0.244, over 5796580.64 frames. ], batch size: 81, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:59:02,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=512200.0, ans=0.2 +2024-09-18 19:59:19,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=512240.0, ans=0.125 +2024-09-18 19:59:19,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=512240.0, ans=0.125 +2024-09-18 19:59:23,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=512240.0, ans=0.125 +2024-09-18 19:59:29,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=512280.0, ans=0.1 +2024-09-18 19:59:31,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.33 vs. limit=15.0 +2024-09-18 19:59:32,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=512280.0, ans=0.025 +2024-09-18 19:59:34,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=512280.0, ans=0.0 +2024-09-18 19:59:56,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=512360.0, ans=0.1 +2024-09-18 20:00:11,672 INFO [train.py:1198] (0/2) Epoch 29, batch 1400, loss[loss=0.2157, ctc_loss=0.1152, cr_loss=0.3427, attn_decoder_loss=0.2193, over 29617.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1229, cr_loss=0.3656, attn_decoder_loss=0.2439, over 5807736.05 frames. ], batch size: 69, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:00:19,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=512400.0, ans=0.0 +2024-09-18 20:00:20,755 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.135e+01 8.361e+01 8.836e+01 9.387e+01 1.190e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 20:00:27,918 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.39 vs. limit=15.0 +2024-09-18 20:00:31,793 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:00:32,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.42 vs. limit=10.0 +2024-09-18 20:00:39,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=512440.0, ans=0.125 +2024-09-18 20:01:13,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-18 20:01:26,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=512560.0, ans=0.0 +2024-09-18 20:01:27,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=512600.0, ans=0.1 +2024-09-18 20:01:29,200 INFO [train.py:1198] (0/2) Epoch 29, batch 1450, loss[loss=0.2419, ctc_loss=0.1209, cr_loss=0.3728, attn_decoder_loss=0.2471, over 29402.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.123, cr_loss=0.3655, attn_decoder_loss=0.2442, over 5804756.77 frames. ], batch size: 94, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:01:32,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=512600.0, ans=0.0 +2024-09-18 20:01:37,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=512600.0, ans=0.025 +2024-09-18 20:01:40,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.26 vs. limit=15.0 +2024-09-18 20:02:06,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.79 vs. limit=15.0 +2024-09-18 20:02:12,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=512680.0, ans=0.0 +2024-09-18 20:02:39,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=512760.0, ans=0.2 +2024-09-18 20:02:47,663 INFO [train.py:1198] (0/2) Epoch 29, batch 1500, loss[loss=0.2471, ctc_loss=0.1326, cr_loss=0.4037, attn_decoder_loss=0.2508, over 29635.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1236, cr_loss=0.3667, attn_decoder_loss=0.2447, over 5804937.60 frames. ], batch size: 86, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:02:58,365 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.696e+01 9.136e+01 9.651e+01 1.564e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 20:02:58,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=512800.0, ans=0.025 +2024-09-18 20:02:59,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=9.38 vs. limit=12.0 +2024-09-18 20:03:02,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.06 vs. limit=15.0 +2024-09-18 20:03:21,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=512880.0, ans=0.0 +2024-09-18 20:03:21,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=512880.0, ans=0.95 +2024-09-18 20:03:32,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=512920.0, ans=0.125 +2024-09-18 20:03:34,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=512920.0, ans=0.125 +2024-09-18 20:03:35,954 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.57 vs. limit=15.0 +2024-09-18 20:03:41,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=512920.0, ans=0.1 +2024-09-18 20:04:03,793 INFO [train.py:1198] (0/2) Epoch 29, batch 1550, loss[loss=0.254, ctc_loss=0.1388, cr_loss=0.4093, attn_decoder_loss=0.2577, over 29517.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.124, cr_loss=0.3672, attn_decoder_loss=0.2448, over 5780508.16 frames. ], batch size: 90, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:04:14,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=513000.0, ans=0.0 +2024-09-18 20:04:14,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=513000.0, ans=0.2 +2024-09-18 20:04:39,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=513080.0, ans=0.125 +2024-09-18 20:05:18,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=513160.0, ans=0.0 +2024-09-18 20:05:21,279 INFO [train.py:1198] (0/2) Epoch 29, batch 1600, loss[loss=0.2425, ctc_loss=0.1187, cr_loss=0.3567, attn_decoder_loss=0.2483, over 29688.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.124, cr_loss=0.3668, attn_decoder_loss=0.2446, over 5763845.48 frames. ], batch size: 85, lr: 3.83e-03, grad_scale: 16.0 +2024-09-18 20:05:28,090 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-18 20:05:31,643 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.586e+01 9.089e+01 9.783e+01 2.042e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 20:05:37,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=513240.0, ans=0.025 +2024-09-18 20:05:38,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=513240.0, ans=0.0 +2024-09-18 20:05:48,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.44 vs. limit=15.0 +2024-09-18 20:05:57,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=513280.0, ans=0.1 +2024-09-18 20:06:04,958 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.90 vs. limit=15.0 +2024-09-18 20:06:19,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=513320.0, ans=0.125 +2024-09-18 20:06:34,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.79 vs. limit=10.0 +2024-09-18 20:06:39,180 INFO [train.py:1198] (0/2) Epoch 29, batch 1650, loss[loss=0.254, ctc_loss=0.1346, cr_loss=0.4047, attn_decoder_loss=0.2583, over 29698.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1237, cr_loss=0.3665, attn_decoder_loss=0.2441, over 5758142.04 frames. ], batch size: 89, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:06:56,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.11 vs. limit=15.0 +2024-09-18 20:07:08,878 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.03 vs. limit=22.5 +2024-09-18 20:07:11,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=513480.0, ans=0.125 +2024-09-18 20:07:25,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=513520.0, ans=0.125 +2024-09-18 20:07:33,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=513520.0, ans=10.0 +2024-09-18 20:07:39,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=513560.0, ans=0.05 +2024-09-18 20:07:41,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=513560.0, ans=0.125 +2024-09-18 20:07:43,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.96 vs. limit=10.0 +2024-09-18 20:07:53,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=513600.0, ans=0.125 +2024-09-18 20:07:55,034 INFO [train.py:1198] (0/2) Epoch 29, batch 1700, loss[loss=0.215, ctc_loss=0.1026, cr_loss=0.3152, attn_decoder_loss=0.2205, over 29596.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.123, cr_loss=0.3651, attn_decoder_loss=0.2437, over 5779929.53 frames. ], batch size: 69, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:08:07,209 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.650e+01 8.371e+01 8.901e+01 9.499e+01 1.304e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 20:08:09,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.16 vs. limit=15.0 +2024-09-18 20:08:13,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=513640.0, ans=0.0 +2024-09-18 20:08:32,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=513680.0, ans=0.125 +2024-09-18 20:08:41,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=513720.0, ans=0.125 +2024-09-18 20:08:47,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=513720.0, ans=0.125 +2024-09-18 20:09:00,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_ff3.min_abs, batch_count=513760.0, ans=0.2 +2024-09-18 20:09:02,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=513760.0, ans=0.0 +2024-09-18 20:09:12,834 INFO [train.py:1198] (0/2) Epoch 29, batch 1750, loss[loss=0.2158, ctc_loss=0.1045, cr_loss=0.3304, attn_decoder_loss=0.2208, over 29348.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1229, cr_loss=0.3652, attn_decoder_loss=0.2433, over 5788115.85 frames. ], batch size: 67, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:09:19,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=513800.0, ans=0.0 +2024-09-18 20:09:23,762 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:09:35,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=513840.0, ans=0.0 +2024-09-18 20:10:06,944 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.96 vs. limit=10.0 +2024-09-18 20:10:21,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=513960.0, ans=0.1 +2024-09-18 20:10:30,207 INFO [train.py:1198] (0/2) Epoch 29, batch 1800, loss[loss=0.2548, ctc_loss=0.1402, cr_loss=0.396, attn_decoder_loss=0.2588, over 29692.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1231, cr_loss=0.366, attn_decoder_loss=0.2436, over 5791570.14 frames. ], batch size: 83, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:10:38,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.02 vs. limit=15.0 +2024-09-18 20:10:42,240 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.472e+01 8.834e+01 9.561e+01 3.303e+02, threshold=1.767e+02, percent-clipped=1.0 +2024-09-18 20:11:08,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=514080.0, ans=0.1 +2024-09-18 20:11:15,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=514120.0, ans=0.0 +2024-09-18 20:11:31,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.68 vs. limit=6.0 +2024-09-18 20:11:46,039 INFO [train.py:1198] (0/2) Epoch 29, batch 1850, loss[loss=0.2428, ctc_loss=0.1185, cr_loss=0.3443, attn_decoder_loss=0.2489, over 29632.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1231, cr_loss=0.3659, attn_decoder_loss=0.2435, over 5797679.78 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:11:50,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=514200.0, ans=0.125 +2024-09-18 20:11:50,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=514200.0, ans=0.125 +2024-09-18 20:11:58,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=514200.0, ans=0.125 +2024-09-18 20:12:05,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514240.0, ans=0.1 +2024-09-18 20:12:07,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=514240.0, ans=0.125 +2024-09-18 20:12:18,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=514280.0, ans=0.0 +2024-09-18 20:12:32,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=514320.0, ans=0.125 +2024-09-18 20:12:42,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=514320.0, ans=0.1 +2024-09-18 20:12:42,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=514320.0, ans=0.125 +2024-09-18 20:13:00,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=514360.0, ans=0.0 +2024-09-18 20:13:01,190 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.73 vs. limit=15.0 +2024-09-18 20:13:03,716 INFO [train.py:1198] (0/2) Epoch 29, batch 1900, loss[loss=0.2559, ctc_loss=0.1263, cr_loss=0.3784, attn_decoder_loss=0.2619, over 29696.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.123, cr_loss=0.3658, attn_decoder_loss=0.244, over 5805094.32 frames. ], batch size: 89, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:13:10,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=514400.0, ans=0.1 +2024-09-18 20:13:15,863 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.747e+01 8.630e+01 9.084e+01 9.711e+01 2.750e+02, threshold=1.817e+02, percent-clipped=3.0 +2024-09-18 20:13:17,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=514440.0, ans=0.0 +2024-09-18 20:13:20,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=514440.0, ans=0.0 +2024-09-18 20:13:28,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=514440.0, ans=0.125 +2024-09-18 20:14:17,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=514560.0, ans=0.2 +2024-09-18 20:14:20,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=514600.0, ans=0.125 +2024-09-18 20:14:20,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=514600.0, ans=0.125 +2024-09-18 20:14:22,102 INFO [train.py:1198] (0/2) Epoch 29, batch 1950, loss[loss=0.2383, ctc_loss=0.128, cr_loss=0.3786, attn_decoder_loss=0.2422, over 29442.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1235, cr_loss=0.3672, attn_decoder_loss=0.2449, over 5819456.59 frames. ], batch size: 78, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:14:24,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=514600.0, ans=0.0 +2024-09-18 20:14:39,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=514640.0, ans=0.07 +2024-09-18 20:14:43,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=514640.0, ans=0.0 +2024-09-18 20:15:04,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=514680.0, ans=0.125 +2024-09-18 20:15:37,448 INFO [train.py:1198] (0/2) Epoch 29, batch 2000, loss[loss=0.2062, ctc_loss=0.09518, cr_loss=0.3054, attn_decoder_loss=0.2117, over 29349.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1237, cr_loss=0.3673, attn_decoder_loss=0.2452, over 5795977.24 frames. ], batch size: 67, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:15:45,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=514800.0, ans=0.0 +2024-09-18 20:15:49,642 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.639e+01 9.197e+01 9.637e+01 2.415e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 20:16:14,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=514880.0, ans=0.125 +2024-09-18 20:16:14,789 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:16:25,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=514920.0, ans=0.025 +2024-09-18 20:16:28,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=514920.0, ans=0.125 +2024-09-18 20:16:55,177 INFO [train.py:1198] (0/2) Epoch 29, batch 2050, loss[loss=0.2193, ctc_loss=0.1017, cr_loss=0.3221, attn_decoder_loss=0.2252, over 29452.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1232, cr_loss=0.3662, attn_decoder_loss=0.2443, over 5788378.58 frames. ], batch size: 70, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:17:06,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=515000.0, ans=0.125 +2024-09-18 20:17:07,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=515000.0, ans=0.125 +2024-09-18 20:17:59,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=515160.0, ans=0.125 +2024-09-18 20:18:01,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=515160.0, ans=0.05 +2024-09-18 20:18:08,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=515160.0, ans=0.0 +2024-09-18 20:18:12,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=515200.0, ans=0.1 +2024-09-18 20:18:13,444 INFO [train.py:1198] (0/2) Epoch 29, batch 2100, loss[loss=0.2402, ctc_loss=0.1256, cr_loss=0.3615, attn_decoder_loss=0.2449, over 29755.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1232, cr_loss=0.3661, attn_decoder_loss=0.2441, over 5800610.41 frames. ], batch size: 81, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:18:15,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=515200.0, ans=0.05 +2024-09-18 20:18:19,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=515200.0, ans=0.2 +2024-09-18 20:18:25,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.305e+01 8.420e+01 8.993e+01 9.361e+01 1.152e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 20:18:28,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=515240.0, ans=0.125 +2024-09-18 20:18:57,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=515320.0, ans=0.125 +2024-09-18 20:19:09,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.61 vs. limit=10.0 +2024-09-18 20:19:18,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=515360.0, ans=0.125 +2024-09-18 20:19:27,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.99 vs. limit=12.0 +2024-09-18 20:19:28,669 INFO [train.py:1198] (0/2) Epoch 29, batch 2150, loss[loss=0.234, ctc_loss=0.1242, cr_loss=0.3732, attn_decoder_loss=0.2379, over 29446.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1228, cr_loss=0.3657, attn_decoder_loss=0.2437, over 5815449.35 frames. ], batch size: 78, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:19:47,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.12 vs. limit=12.0 +2024-09-18 20:19:54,119 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.65 vs. limit=6.0 +2024-09-18 20:19:58,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=515440.0, ans=0.0 +2024-09-18 20:20:06,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=515480.0, ans=0.125 +2024-09-18 20:20:09,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=515480.0, ans=0.125 +2024-09-18 20:20:12,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=515480.0, ans=0.125 +2024-09-18 20:20:19,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=515520.0, ans=0.125 +2024-09-18 20:20:30,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=515560.0, ans=0.1 +2024-09-18 20:20:46,531 INFO [train.py:1198] (0/2) Epoch 29, batch 2200, loss[loss=0.2553, ctc_loss=0.1362, cr_loss=0.3962, attn_decoder_loss=0.2597, over 29597.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1232, cr_loss=0.3663, attn_decoder_loss=0.2438, over 5811764.77 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:20:48,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=515600.0, ans=0.1 +2024-09-18 20:20:51,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=515600.0, ans=0.025 +2024-09-18 20:20:58,449 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.069e+01 8.349e+01 8.970e+01 9.403e+01 1.511e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 20:21:01,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=515640.0, ans=0.125 +2024-09-18 20:21:01,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=515640.0, ans=0.125 +2024-09-18 20:21:13,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=515640.0, ans=0.125 +2024-09-18 20:21:21,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=515680.0, ans=0.0 +2024-09-18 20:21:52,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=515760.0, ans=0.125 +2024-09-18 20:22:04,185 INFO [train.py:1198] (0/2) Epoch 29, batch 2250, loss[loss=0.2431, ctc_loss=0.1181, cr_loss=0.3532, attn_decoder_loss=0.2492, over 29713.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1224, cr_loss=0.3649, attn_decoder_loss=0.2433, over 5812020.93 frames. ], batch size: 82, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:22:25,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=515840.0, ans=0.125 +2024-09-18 20:22:35,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.09 vs. limit=10.0 +2024-09-18 20:22:55,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=515920.0, ans=0.125 +2024-09-18 20:23:19,804 INFO [train.py:1198] (0/2) Epoch 29, batch 2300, loss[loss=0.2286, ctc_loss=0.1154, cr_loss=0.3594, attn_decoder_loss=0.2332, over 29316.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1221, cr_loss=0.3646, attn_decoder_loss=0.2427, over 5798927.51 frames. ], batch size: 71, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:23:23,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=516000.0, ans=0.0 +2024-09-18 20:23:29,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=516000.0, ans=0.1 +2024-09-18 20:23:31,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.460e+01 8.964e+01 9.608e+01 5.700e+02, threshold=1.793e+02, percent-clipped=2.0 +2024-09-18 20:23:46,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.67 vs. limit=22.5 +2024-09-18 20:23:53,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.62 vs. limit=12.0 +2024-09-18 20:24:02,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=10.62 vs. limit=15.0 +2024-09-18 20:24:12,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.45 vs. limit=10.0 +2024-09-18 20:24:22,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=516160.0, ans=0.0 +2024-09-18 20:24:28,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=516160.0, ans=0.125 +2024-09-18 20:24:37,598 INFO [train.py:1198] (0/2) Epoch 29, batch 2350, loss[loss=0.2406, ctc_loss=0.1195, cr_loss=0.3588, attn_decoder_loss=0.2461, over 29705.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1224, cr_loss=0.3651, attn_decoder_loss=0.243, over 5804819.02 frames. ], batch size: 83, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:24:46,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=516200.0, ans=0.125 +2024-09-18 20:25:09,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=516280.0, ans=0.0 +2024-09-18 20:25:55,367 INFO [train.py:1198] (0/2) Epoch 29, batch 2400, loss[loss=0.2303, ctc_loss=0.1211, cr_loss=0.3905, attn_decoder_loss=0.2337, over 29551.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1224, cr_loss=0.3649, attn_decoder_loss=0.2436, over 5807924.62 frames. ], batch size: 76, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:26:00,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.36 vs. limit=12.0 +2024-09-18 20:26:08,939 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 8.500e+01 8.937e+01 9.634e+01 2.540e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 20:26:35,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=516480.0, ans=0.125 +2024-09-18 20:26:47,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=516520.0, ans=0.1 +2024-09-18 20:27:02,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.49 vs. limit=15.0 +2024-09-18 20:27:06,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=516560.0, ans=0.0 +2024-09-18 20:27:09,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=516600.0, ans=0.125 +2024-09-18 20:27:11,186 INFO [train.py:1198] (0/2) Epoch 29, batch 2450, loss[loss=0.2391, ctc_loss=0.1268, cr_loss=0.3819, attn_decoder_loss=0.2431, over 29721.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1231, cr_loss=0.3657, attn_decoder_loss=0.2447, over 5785398.60 frames. ], batch size: 82, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:27:11,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=516600.0, ans=0.2 +2024-09-18 20:27:27,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=516640.0, ans=0.0 +2024-09-18 20:27:38,415 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.92 vs. limit=15.0 +2024-09-18 20:27:49,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=516680.0, ans=0.0 +2024-09-18 20:27:54,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=516680.0, ans=0.0 +2024-09-18 20:28:10,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=516720.0, ans=0.2 +2024-09-18 20:28:26,837 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.95 vs. limit=22.5 +2024-09-18 20:28:29,440 INFO [train.py:1198] (0/2) Epoch 29, batch 2500, loss[loss=0.2533, ctc_loss=0.1252, cr_loss=0.3774, attn_decoder_loss=0.2592, over 29621.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1229, cr_loss=0.3651, attn_decoder_loss=0.2444, over 5795332.25 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:28:37,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=516800.0, ans=0.125 +2024-09-18 20:28:44,581 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.372e+01 8.869e+01 9.573e+01 2.936e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 20:28:49,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=516840.0, ans=0.1 +2024-09-18 20:29:12,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=516880.0, ans=0.0 +2024-09-18 20:29:25,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=516920.0, ans=0.0 +2024-09-18 20:29:46,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=517000.0, ans=0.125 +2024-09-18 20:29:47,318 INFO [train.py:1198] (0/2) Epoch 29, batch 2550, loss[loss=0.2129, ctc_loss=0.1066, cr_loss=0.3275, attn_decoder_loss=0.2175, over 29364.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1229, cr_loss=0.3653, attn_decoder_loss=0.2445, over 5798752.43 frames. ], batch size: 67, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:30:05,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=517040.0, ans=0.125 +2024-09-18 20:30:13,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=517040.0, ans=0.2 +2024-09-18 20:30:37,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=517120.0, ans=0.125 +2024-09-18 20:30:39,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=517120.0, ans=0.0 +2024-09-18 20:30:46,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=517160.0, ans=0.95 +2024-09-18 20:30:51,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=517160.0, ans=0.1 +2024-09-18 20:30:57,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.09 vs. limit=15.0 +2024-09-18 20:31:02,910 INFO [train.py:1198] (0/2) Epoch 29, batch 2600, loss[loss=0.2283, ctc_loss=0.1148, cr_loss=0.3403, attn_decoder_loss=0.2334, over 29431.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1234, cr_loss=0.3661, attn_decoder_loss=0.245, over 5794903.68 frames. ], batch size: 78, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:31:10,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=517200.0, ans=0.2 +2024-09-18 20:31:17,758 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.549e+01 8.951e+01 9.409e+01 2.372e+02, threshold=1.790e+02, percent-clipped=2.0 +2024-09-18 20:31:30,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=517240.0, ans=0.125 +2024-09-18 20:31:33,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=517280.0, ans=0.125 +2024-09-18 20:31:47,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=517280.0, ans=0.025 +2024-09-18 20:32:17,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=517360.0, ans=0.125 +2024-09-18 20:32:20,505 INFO [train.py:1198] (0/2) Epoch 29, batch 2650, loss[loss=0.255, ctc_loss=0.1306, cr_loss=0.381, attn_decoder_loss=0.2604, over 29273.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1238, cr_loss=0.3675, attn_decoder_loss=0.2455, over 5800314.90 frames. ], batch size: 100, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:32:25,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=517400.0, ans=0.09899494936611666 +2024-09-18 20:32:41,188 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.82 vs. limit=22.5 +2024-09-18 20:33:21,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=517560.0, ans=0.0 +2024-09-18 20:33:33,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=517560.0, ans=0.1 +2024-09-18 20:33:35,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=517560.0, ans=0.1 +2024-09-18 20:33:38,528 INFO [train.py:1198] (0/2) Epoch 29, batch 2700, loss[loss=0.2417, ctc_loss=0.1243, cr_loss=0.3715, attn_decoder_loss=0.2465, over 29547.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1241, cr_loss=0.3682, attn_decoder_loss=0.2457, over 5795806.03 frames. ], batch size: 87, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:33:38,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=517600.0, ans=0.0 +2024-09-18 20:33:38,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=517600.0, ans=0.0 +2024-09-18 20:33:53,537 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.043e+01 8.653e+01 9.179e+01 9.808e+01 2.021e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-18 20:34:01,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=12.0 +2024-09-18 20:34:05,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=517640.0, ans=0.125 +2024-09-18 20:34:27,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=517720.0, ans=0.125 +2024-09-18 20:34:39,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=517760.0, ans=0.1 +2024-09-18 20:34:54,588 INFO [train.py:1198] (0/2) Epoch 29, batch 2750, loss[loss=0.2409, ctc_loss=0.1304, cr_loss=0.3891, attn_decoder_loss=0.2446, over 29503.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1229, cr_loss=0.3657, attn_decoder_loss=0.2443, over 5795330.67 frames. ], batch size: 75, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:35:06,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.32 vs. limit=15.0 +2024-09-18 20:35:17,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.04 vs. limit=15.0 +2024-09-18 20:35:41,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.86 vs. limit=22.5 +2024-09-18 20:35:49,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=517920.0, ans=0.0 +2024-09-18 20:35:50,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-18 20:35:58,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=517960.0, ans=0.125 +2024-09-18 20:35:58,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=517960.0, ans=0.0 +2024-09-18 20:36:08,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=517960.0, ans=0.125 +2024-09-18 20:36:12,345 INFO [train.py:1198] (0/2) Epoch 29, batch 2800, loss[loss=0.2553, ctc_loss=0.1516, cr_loss=0.3784, attn_decoder_loss=0.2584, over 20434.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1232, cr_loss=0.3656, attn_decoder_loss=0.2445, over 5776290.60 frames. ], batch size: 209, lr: 3.81e-03, grad_scale: 16.0 +2024-09-18 20:36:21,557 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:36:28,864 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.249e+01 8.371e+01 8.942e+01 9.579e+01 2.215e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 20:36:39,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=518040.0, ans=0.125 +2024-09-18 20:36:48,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=518080.0, ans=0.125 +2024-09-18 20:37:30,079 INFO [train.py:1198] (0/2) Epoch 29, batch 2850, loss[loss=0.2311, ctc_loss=0.1183, cr_loss=0.3491, attn_decoder_loss=0.2359, over 29515.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1234, cr_loss=0.3655, attn_decoder_loss=0.2448, over 5762084.16 frames. ], batch size: 77, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:37:37,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=518200.0, ans=0.0 +2024-09-18 20:37:40,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=518200.0, ans=0.1 +2024-09-18 20:37:59,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=518280.0, ans=0.1 +2024-09-18 20:38:22,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=518320.0, ans=0.125 +2024-09-18 20:38:26,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=518320.0, ans=0.125 +2024-09-18 20:38:32,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=518360.0, ans=0.0 +2024-09-18 20:38:46,511 INFO [train.py:1198] (0/2) Epoch 29, batch 2900, loss[loss=0.2377, ctc_loss=0.1208, cr_loss=0.3478, attn_decoder_loss=0.243, over 29417.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.124, cr_loss=0.3672, attn_decoder_loss=0.2456, over 5786964.94 frames. ], batch size: 79, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:38:55,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=518400.0, ans=0.5 +2024-09-18 20:39:05,311 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.499e+01 8.947e+01 9.458e+01 2.522e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-18 20:39:25,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=518480.0, ans=0.0 +2024-09-18 20:39:26,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=518480.0, ans=0.2 +2024-09-18 20:40:04,388 INFO [train.py:1198] (0/2) Epoch 29, batch 2950, loss[loss=0.233, ctc_loss=0.1263, cr_loss=0.3776, attn_decoder_loss=0.2365, over 29508.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1231, cr_loss=0.3654, attn_decoder_loss=0.2442, over 5782247.62 frames. ], batch size: 75, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:40:07,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=518600.0, ans=0.125 +2024-09-18 20:40:12,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.10 vs. limit=22.5 +2024-09-18 20:40:58,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=518720.0, ans=0.125 +2024-09-18 20:41:01,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=518720.0, ans=0.125 +2024-09-18 20:41:22,999 INFO [train.py:1198] (0/2) Epoch 29, batch 3000, loss[loss=0.2466, ctc_loss=0.1228, cr_loss=0.3508, attn_decoder_loss=0.2525, over 29755.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.123, cr_loss=0.3654, attn_decoder_loss=0.2442, over 5782942.76 frames. ], batch size: 81, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:41:22,999 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 20:41:37,626 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.9122, 4.4108, 3.8202, 3.8674], device='cuda:0') +2024-09-18 20:41:41,474 INFO [train.py:1230] (0/2) Epoch 29, validation: loss=0.2115, ctc_loss=0.03752, cr_loss=5.604e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-18 20:41:41,474 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 20:41:58,314 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.278e+01 8.694e+01 9.323e+01 9.820e+01 2.000e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-18 20:42:15,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=518880.0, ans=0.1 +2024-09-18 20:42:24,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=518880.0, ans=0.125 +2024-09-18 20:42:49,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=518960.0, ans=0.125 +2024-09-18 20:42:59,580 INFO [train.py:1198] (0/2) Epoch 29, batch 3050, loss[loss=0.2249, ctc_loss=0.1095, cr_loss=0.3466, attn_decoder_loss=0.23, over 29534.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1237, cr_loss=0.3668, attn_decoder_loss=0.2451, over 5776876.80 frames. ], batch size: 76, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:43:02,901 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:43:12,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=519000.0, ans=0.1 +2024-09-18 20:43:31,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=519080.0, ans=0.125 +2024-09-18 20:43:47,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.92 vs. limit=22.5 +2024-09-18 20:43:58,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=519160.0, ans=0.0 +2024-09-18 20:44:15,291 INFO [train.py:1198] (0/2) Epoch 29, batch 3100, loss[loss=0.2584, ctc_loss=0.142, cr_loss=0.4027, attn_decoder_loss=0.2624, over 29206.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1235, cr_loss=0.366, attn_decoder_loss=0.2445, over 5778094.67 frames. ], batch size: 100, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:44:15,735 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:44:31,778 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.578e+01 9.222e+01 9.783e+01 2.939e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-18 20:44:35,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=519240.0, ans=0.125 +2024-09-18 20:44:42,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=519240.0, ans=0.125 +2024-09-18 20:44:57,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=519280.0, ans=0.125 +2024-09-18 20:45:21,485 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:45:29,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=519360.0, ans=0.0 +2024-09-18 20:45:33,288 INFO [train.py:1198] (0/2) Epoch 29, batch 3150, loss[loss=0.2541, ctc_loss=0.1264, cr_loss=0.362, attn_decoder_loss=0.2603, over 28846.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1235, cr_loss=0.3663, attn_decoder_loss=0.2446, over 5783897.02 frames. ], batch size: 104, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:45:52,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.07 vs. limit=8.0 +2024-09-18 20:45:59,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=519440.0, ans=0.0 +2024-09-18 20:46:50,886 INFO [train.py:1198] (0/2) Epoch 29, batch 3200, loss[loss=0.2279, ctc_loss=0.1149, cr_loss=0.3494, attn_decoder_loss=0.2327, over 29430.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1227, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5793621.97 frames. ], batch size: 79, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:46:57,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=519600.0, ans=0.1 +2024-09-18 20:47:03,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=519600.0, ans=0.0 +2024-09-18 20:47:06,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=519640.0, ans=0.1 +2024-09-18 20:47:07,587 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.418e+01 8.919e+01 9.479e+01 2.582e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 20:47:23,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=519680.0, ans=0.0 +2024-09-18 20:47:40,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=519720.0, ans=0.0 +2024-09-18 20:48:07,096 INFO [train.py:1198] (0/2) Epoch 29, batch 3250, loss[loss=0.2429, ctc_loss=0.1188, cr_loss=0.3645, attn_decoder_loss=0.2485, over 29702.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1226, cr_loss=0.3648, attn_decoder_loss=0.244, over 5799803.73 frames. ], batch size: 84, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:48:26,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=519840.0, ans=0.1 +2024-09-18 20:48:33,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.14 vs. limit=10.0 +2024-09-18 20:48:33,643 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.15 vs. limit=6.0 +2024-09-18 20:48:57,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=519920.0, ans=0.125 +2024-09-18 20:49:25,266 INFO [train.py:1198] (0/2) Epoch 29, batch 3300, loss[loss=0.2492, ctc_loss=0.1236, cr_loss=0.3556, attn_decoder_loss=0.2552, over 28485.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1221, cr_loss=0.3638, attn_decoder_loss=0.243, over 5796725.08 frames. ], batch size: 112, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:49:33,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=520000.0, ans=0.1 +2024-09-18 20:49:42,287 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.577e+01 8.993e+01 9.559e+01 2.414e+02, threshold=1.799e+02, percent-clipped=3.0 +2024-09-18 20:50:04,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.64 vs. limit=12.0 +2024-09-18 20:50:08,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=520080.0, ans=0.0 +2024-09-18 20:50:30,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=520160.0, ans=0.125 +2024-09-18 20:50:37,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=520160.0, ans=0.0 +2024-09-18 20:50:43,264 INFO [train.py:1198] (0/2) Epoch 29, batch 3350, loss[loss=0.2623, ctc_loss=0.14, cr_loss=0.3911, attn_decoder_loss=0.2672, over 28918.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1227, cr_loss=0.3646, attn_decoder_loss=0.2437, over 5773914.16 frames. ], batch size: 104, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:50:51,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=520200.0, ans=0.125 +2024-09-18 20:50:57,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=520240.0, ans=0.0 +2024-09-18 20:51:00,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=520240.0, ans=0.125 +2024-09-18 20:51:02,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.02 vs. limit=15.0 +2024-09-18 20:51:23,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.68 vs. limit=6.0 +2024-09-18 20:51:24,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=520280.0, ans=0.025 +2024-09-18 20:51:38,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.36 vs. limit=12.0 +2024-09-18 20:51:50,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=520360.0, ans=0.125 +2024-09-18 20:51:51,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=520360.0, ans=0.2 +2024-09-18 20:51:55,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.27 vs. limit=15.0 +2024-09-18 20:51:58,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=520400.0, ans=15.0 +2024-09-18 20:51:59,172 INFO [train.py:1198] (0/2) Epoch 29, batch 3400, loss[loss=0.2139, ctc_loss=0.1057, cr_loss=0.3537, attn_decoder_loss=0.218, over 29343.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1232, cr_loss=0.3657, attn_decoder_loss=0.2439, over 5767416.84 frames. ], batch size: 67, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:52:06,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=520400.0, ans=0.125 +2024-09-18 20:52:11,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=520400.0, ans=0.0 +2024-09-18 20:52:13,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=15.0 +2024-09-18 20:52:14,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=520440.0, ans=0.125 +2024-09-18 20:52:15,975 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.635e+01 9.188e+01 1.005e+02 1.629e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-18 20:52:26,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=520440.0, ans=0.125 +2024-09-18 20:52:29,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=520480.0, ans=0.0 +2024-09-18 20:52:36,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=520480.0, ans=0.125 +2024-09-18 20:52:45,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=520520.0, ans=0.125 +2024-09-18 20:52:51,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.30 vs. limit=22.5 +2024-09-18 20:53:12,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=520560.0, ans=0.125 +2024-09-18 20:53:16,964 INFO [train.py:1198] (0/2) Epoch 29, batch 3450, loss[loss=0.2437, ctc_loss=0.1229, cr_loss=0.3756, attn_decoder_loss=0.2487, over 28573.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1234, cr_loss=0.3664, attn_decoder_loss=0.2441, over 5774606.26 frames. ], batch size: 112, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:53:17,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.18 vs. limit=15.0 +2024-09-18 20:53:32,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=520640.0, ans=0.125 +2024-09-18 20:53:32,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=520640.0, ans=0.125 +2024-09-18 20:53:34,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.82 vs. limit=10.0 +2024-09-18 20:53:37,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=520640.0, ans=0.0 +2024-09-18 20:53:47,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=520680.0, ans=15.0 +2024-09-18 20:53:59,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=520680.0, ans=0.125 +2024-09-18 20:54:35,153 INFO [train.py:1198] (0/2) Epoch 29, batch 3500, loss[loss=0.2184, ctc_loss=0.1068, cr_loss=0.3374, attn_decoder_loss=0.2233, over 29731.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1231, cr_loss=0.3658, attn_decoder_loss=0.2436, over 5777940.83 frames. ], batch size: 72, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:54:40,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=520800.0, ans=0.07 +2024-09-18 20:54:50,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=520840.0, ans=0.125 +2024-09-18 20:54:53,389 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.549e+01 8.236e+01 8.769e+01 9.566e+01 1.320e+02, threshold=1.754e+02, percent-clipped=0.0 +2024-09-18 20:54:58,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=520840.0, ans=0.1 +2024-09-18 20:54:58,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=520840.0, ans=0.0 +2024-09-18 20:55:13,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=520880.0, ans=0.2 +2024-09-18 20:55:19,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=520920.0, ans=0.1 +2024-09-18 20:55:29,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=520920.0, ans=0.0 +2024-09-18 20:55:47,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=520960.0, ans=0.025 +2024-09-18 20:55:50,173 INFO [train.py:1198] (0/2) Epoch 29, batch 3550, loss[loss=0.2432, ctc_loss=0.1145, cr_loss=0.3357, attn_decoder_loss=0.25, over 29713.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1228, cr_loss=0.3649, attn_decoder_loss=0.2437, over 5782411.41 frames. ], batch size: 89, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:56:08,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=521040.0, ans=0.2 +2024-09-18 20:56:46,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=521120.0, ans=0.0 +2024-09-18 20:56:55,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.68 vs. limit=12.0 +2024-09-18 20:57:01,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=521160.0, ans=0.05 +2024-09-18 20:57:04,197 INFO [train.py:1198] (0/2) Epoch 29, batch 3600, loss[loss=0.2413, ctc_loss=0.135, cr_loss=0.4152, attn_decoder_loss=0.2439, over 29515.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1224, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5791820.03 frames. ], batch size: 77, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:57:10,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=521200.0, ans=0.1 +2024-09-18 20:57:18,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=521240.0, ans=0.125 +2024-09-18 20:57:19,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=521240.0, ans=0.125 +2024-09-18 20:57:22,271 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.410e+01 8.552e+01 8.984e+01 9.474e+01 4.897e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 20:57:50,800 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:57:54,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.44 vs. limit=15.0 +2024-09-18 20:57:56,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=521320.0, ans=0.1 +2024-09-18 20:57:58,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=521320.0, ans=0.05 +2024-09-18 20:58:18,774 INFO [train.py:1198] (0/2) Epoch 29, batch 3650, loss[loss=0.2497, ctc_loss=0.1367, cr_loss=0.3968, attn_decoder_loss=0.2535, over 29504.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.122, cr_loss=0.3634, attn_decoder_loss=0.2429, over 5794681.48 frames. ], batch size: 90, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:58:19,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=521400.0, ans=0.0 +2024-09-18 20:58:23,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.94 vs. limit=15.0 +2024-09-18 20:58:28,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=521400.0, ans=0.0 +2024-09-18 20:58:37,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=521440.0, ans=10.0 +2024-09-18 20:58:46,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=521440.0, ans=0.0 +2024-09-18 20:58:52,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=521480.0, ans=0.125 +2024-09-18 20:59:05,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=521520.0, ans=0.125 +2024-09-18 20:59:06,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=521520.0, ans=0.0 +2024-09-18 20:59:23,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=521560.0, ans=0.125 +2024-09-18 20:59:36,062 INFO [train.py:1198] (0/2) Epoch 29, batch 3700, loss[loss=0.2492, ctc_loss=0.1265, cr_loss=0.3715, attn_decoder_loss=0.2545, over 29724.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1224, cr_loss=0.3647, attn_decoder_loss=0.2433, over 5805140.46 frames. ], batch size: 84, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:59:55,283 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.397e+01 8.875e+01 9.405e+01 1.712e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-18 21:00:04,775 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=12.0 +2024-09-18 21:00:05,850 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:00:16,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.30 vs. limit=22.5 +2024-09-18 21:00:25,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=521720.0, ans=0.0 +2024-09-18 21:00:32,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=521720.0, ans=0.125 +2024-09-18 21:00:35,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.88 vs. limit=6.0 +2024-09-18 21:00:52,046 INFO [train.py:1198] (0/2) Epoch 29, batch 3750, loss[loss=0.2175, ctc_loss=0.112, cr_loss=0.347, attn_decoder_loss=0.2215, over 29318.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1223, cr_loss=0.3649, attn_decoder_loss=0.2431, over 5808722.10 frames. ], batch size: 67, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:00:59,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=521800.0, ans=0.0 +2024-09-18 21:01:17,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=521840.0, ans=0.1 +2024-09-18 21:01:24,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=521880.0, ans=0.125 +2024-09-18 21:01:28,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=521880.0, ans=0.125 +2024-09-18 21:01:29,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=521880.0, ans=0.05 +2024-09-18 21:01:46,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=521920.0, ans=0.125 +2024-09-18 21:02:01,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=521960.0, ans=0.1 +2024-09-18 21:02:04,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.62 vs. limit=6.0 +2024-09-18 21:02:06,640 INFO [train.py:1198] (0/2) Epoch 29, batch 3800, loss[loss=0.2403, ctc_loss=0.1194, cr_loss=0.3507, attn_decoder_loss=0.2459, over 29619.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1221, cr_loss=0.3641, attn_decoder_loss=0.2427, over 5799618.40 frames. ], batch size: 86, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:02:25,923 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.282e+01 8.472e+01 8.859e+01 9.703e+01 1.383e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-18 21:02:28,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.13 vs. limit=15.0 +2024-09-18 21:03:00,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=522120.0, ans=0.2 +2024-09-18 21:03:06,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=522160.0, ans=0.0 +2024-09-18 21:03:13,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=522160.0, ans=0.2 +2024-09-18 21:03:20,793 INFO [train.py:1198] (0/2) Epoch 29, batch 3850, loss[loss=0.2586, ctc_loss=0.1425, cr_loss=0.4161, attn_decoder_loss=0.2623, over 29327.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.122, cr_loss=0.3644, attn_decoder_loss=0.2427, over 5814091.29 frames. ], batch size: 100, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:03:30,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=522200.0, ans=0.2 +2024-09-18 21:03:49,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=522280.0, ans=0.0 +2024-09-18 21:04:07,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.05 vs. limit=6.0 +2024-09-18 21:04:20,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=522360.0, ans=0.0 +2024-09-18 21:04:31,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=522360.0, ans=0.07 +2024-09-18 21:04:32,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=522360.0, ans=0.0 +2024-09-18 21:04:34,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=522360.0, ans=0.0 +2024-09-18 21:04:37,290 INFO [train.py:1198] (0/2) Epoch 29, batch 3900, loss[loss=0.2578, ctc_loss=0.1336, cr_loss=0.3897, attn_decoder_loss=0.2629, over 29641.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1228, cr_loss=0.3658, attn_decoder_loss=0.2434, over 5818166.81 frames. ], batch size: 86, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:04:47,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=522400.0, ans=0.0 +2024-09-18 21:04:56,254 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.564e+01 8.663e+01 9.050e+01 9.576e+01 3.697e+02, threshold=1.810e+02, percent-clipped=2.0 +2024-09-18 21:04:59,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=522440.0, ans=0.1 +2024-09-18 21:05:21,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=522520.0, ans=0.125 +2024-09-18 21:05:30,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=522520.0, ans=0.09899494936611666 +2024-09-18 21:05:36,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=522560.0, ans=0.0 +2024-09-18 21:05:49,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=522560.0, ans=0.0 +2024-09-18 21:05:52,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=12.0 +2024-09-18 21:05:52,776 INFO [train.py:1198] (0/2) Epoch 29, batch 3950, loss[loss=0.2525, ctc_loss=0.1294, cr_loss=0.3827, attn_decoder_loss=0.2577, over 29421.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1224, cr_loss=0.3653, attn_decoder_loss=0.2431, over 5837208.25 frames. ], batch size: 97, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:06:09,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.65 vs. limit=15.0 +2024-09-18 21:06:22,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=522680.0, ans=0.2 +2024-09-18 21:06:32,955 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:06:46,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=522720.0, ans=0.125 +2024-09-18 21:06:49,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=522720.0, ans=0.125 +2024-09-18 21:07:06,730 INFO [train.py:1198] (0/2) Epoch 29, batch 4000, loss[loss=0.2247, ctc_loss=0.1093, cr_loss=0.3458, attn_decoder_loss=0.2299, over 29516.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1226, cr_loss=0.3653, attn_decoder_loss=0.2433, over 5813621.32 frames. ], batch size: 74, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:07:20,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=522840.0, ans=0.125 +2024-09-18 21:07:25,861 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.544e+01 9.038e+01 9.843e+01 4.905e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-18 21:07:52,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=11.69 vs. limit=15.0 +2024-09-18 21:08:00,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=522920.0, ans=0.0 +2024-09-18 21:08:21,178 INFO [train.py:1198] (0/2) Epoch 29, batch 4050, loss[loss=0.2633, ctc_loss=0.1549, cr_loss=0.3989, attn_decoder_loss=0.2665, over 19867.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1226, cr_loss=0.3645, attn_decoder_loss=0.2433, over 5797446.17 frames. ], batch size: 209, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:08:22,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=523000.0, ans=0.0 +2024-09-18 21:08:32,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.87 vs. limit=15.0 +2024-09-18 21:08:37,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=523040.0, ans=10.0 +2024-09-18 21:08:43,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=523040.0, ans=0.0 +2024-09-18 21:08:50,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=523080.0, ans=0.125 +2024-09-18 21:08:53,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=523080.0, ans=0.0 +2024-09-18 21:08:54,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=523080.0, ans=0.1 +2024-09-18 21:09:22,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=523160.0, ans=0.0 +2024-09-18 21:09:29,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=523160.0, ans=0.125 +2024-09-18 21:09:30,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=523160.0, ans=0.0 +2024-09-18 21:09:30,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.74 vs. limit=10.0 +2024-09-18 21:09:33,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=523160.0, ans=0.0 +2024-09-18 21:09:35,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=523200.0, ans=0.07 +2024-09-18 21:09:36,356 INFO [train.py:1198] (0/2) Epoch 29, batch 4100, loss[loss=0.2592, ctc_loss=0.1417, cr_loss=0.4046, attn_decoder_loss=0.2633, over 29503.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1224, cr_loss=0.364, attn_decoder_loss=0.2432, over 5793667.09 frames. ], batch size: 90, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:09:48,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=523200.0, ans=0.125 +2024-09-18 21:09:52,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=523240.0, ans=0.125 +2024-09-18 21:09:56,919 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.631e+01 9.111e+01 9.616e+01 2.001e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 21:10:00,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.94 vs. limit=15.0 +2024-09-18 21:10:02,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=523240.0, ans=0.0 +2024-09-18 21:10:22,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.80 vs. limit=15.0 +2024-09-18 21:10:29,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=523320.0, ans=0.0 +2024-09-18 21:10:43,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=523360.0, ans=0.125 +2024-09-18 21:10:43,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=523360.0, ans=0.2 +2024-09-18 21:10:51,017 INFO [train.py:1198] (0/2) Epoch 29, batch 4150, loss[loss=0.2407, ctc_loss=0.1275, cr_loss=0.374, attn_decoder_loss=0.2449, over 29494.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1228, cr_loss=0.365, attn_decoder_loss=0.2435, over 5799101.90 frames. ], batch size: 77, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:10:55,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=523400.0, ans=0.125 +2024-09-18 21:11:03,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=523400.0, ans=0.0 +2024-09-18 21:11:03,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=523400.0, ans=0.125 +2024-09-18 21:11:06,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=523440.0, ans=0.125 +2024-09-18 21:11:07,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=523440.0, ans=0.025 +2024-09-18 21:11:12,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.98 vs. limit=15.0 +2024-09-18 21:11:19,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=523480.0, ans=0.125 +2024-09-18 21:11:26,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=523480.0, ans=0.125 +2024-09-18 21:11:27,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.05 vs. limit=10.0 +2024-09-18 21:11:36,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=523520.0, ans=0.07 +2024-09-18 21:11:39,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=523520.0, ans=0.0 +2024-09-18 21:11:44,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=523520.0, ans=0.05 +2024-09-18 21:12:04,653 INFO [train.py:1198] (0/2) Epoch 29, batch 4200, loss[loss=0.2785, ctc_loss=0.1588, cr_loss=0.445, attn_decoder_loss=0.282, over 29477.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1226, cr_loss=0.3646, attn_decoder_loss=0.2436, over 5800323.36 frames. ], batch size: 90, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:12:14,672 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.78 vs. limit=15.0 +2024-09-18 21:12:24,984 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.93 vs. limit=12.0 +2024-09-18 21:12:25,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.320e+01 8.488e+01 8.959e+01 9.406e+01 1.586e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 21:12:31,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=523640.0, ans=0.1 +2024-09-18 21:12:40,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=523680.0, ans=0.125 +2024-09-18 21:12:41,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=523680.0, ans=0.125 +2024-09-18 21:13:00,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=523720.0, ans=0.1 +2024-09-18 21:13:19,320 INFO [train.py:1198] (0/2) Epoch 29, batch 4250, loss[loss=0.2288, ctc_loss=0.1149, cr_loss=0.3555, attn_decoder_loss=0.2336, over 29516.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1225, cr_loss=0.365, attn_decoder_loss=0.2438, over 5806126.12 frames. ], batch size: 74, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:13:20,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=17.96 vs. limit=15.0 +2024-09-18 21:13:48,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=523880.0, ans=0.125 +2024-09-18 21:14:02,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=523920.0, ans=0.1 +2024-09-18 21:14:28,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.76 vs. limit=10.0 +2024-09-18 21:14:33,927 INFO [train.py:1198] (0/2) Epoch 29, batch 4300, loss[loss=0.2559, ctc_loss=0.1387, cr_loss=0.4091, attn_decoder_loss=0.2598, over 29526.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1224, cr_loss=0.3646, attn_decoder_loss=0.244, over 5796687.86 frames. ], batch size: 87, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:14:54,744 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.701e+01 8.566e+01 9.093e+01 9.563e+01 1.622e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-18 21:14:55,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=524040.0, ans=0.0 +2024-09-18 21:15:08,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=524080.0, ans=0.0 +2024-09-18 21:15:16,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.57 vs. limit=15.0 +2024-09-18 21:15:18,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=524120.0, ans=0.2 +2024-09-18 21:15:20,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=524120.0, ans=0.125 +2024-09-18 21:15:24,738 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:15:25,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.63 vs. limit=15.0 +2024-09-18 21:15:46,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=524200.0, ans=0.2 +2024-09-18 21:15:48,233 INFO [train.py:1198] (0/2) Epoch 29, batch 4350, loss[loss=0.255, ctc_loss=0.1343, cr_loss=0.4019, attn_decoder_loss=0.2595, over 29504.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1249, cr_loss=0.3697, attn_decoder_loss=0.2471, over 5798108.56 frames. ], batch size: 97, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:15:49,318 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.24 vs. limit=22.5 +2024-09-18 21:16:01,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=524240.0, ans=0.2 +2024-09-18 21:16:04,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=524240.0, ans=0.025 +2024-09-18 21:16:07,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=524240.0, ans=0.1 +2024-09-18 21:16:29,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=524280.0, ans=0.1 +2024-09-18 21:16:30,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.29 vs. limit=15.0 +2024-09-18 21:16:39,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=524320.0, ans=0.125 +2024-09-18 21:16:53,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.68 vs. limit=15.0 +2024-09-18 21:17:02,842 INFO [train.py:1198] (0/2) Epoch 29, batch 4400, loss[loss=0.2571, ctc_loss=0.1459, cr_loss=0.4042, attn_decoder_loss=0.2605, over 27608.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1259, cr_loss=0.3715, attn_decoder_loss=0.249, over 5768382.67 frames. ], batch size: 124, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:17:05,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.86 vs. limit=15.0 +2024-09-18 21:17:22,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=524440.0, ans=0.1 +2024-09-18 21:17:23,258 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.178e+01 8.977e+01 9.367e+01 9.862e+01 3.705e+02, threshold=1.873e+02, percent-clipped=1.0 +2024-09-18 21:17:29,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=524440.0, ans=0.1 +2024-09-18 21:17:29,632 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:17:50,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=524520.0, ans=0.0 +2024-09-18 21:17:55,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=22.5 +2024-09-18 21:17:56,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=524520.0, ans=0.2 +2024-09-18 21:18:16,812 INFO [train.py:1198] (0/2) Epoch 29, batch 4450, loss[loss=0.2618, ctc_loss=0.1488, cr_loss=0.4023, attn_decoder_loss=0.2654, over 20522.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.13, cr_loss=0.3775, attn_decoder_loss=0.2515, over 5583724.45 frames. ], batch size: 209, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:18:45,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=524640.0, ans=0.025 +2024-09-18 21:18:48,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=524680.0, ans=0.125 +2024-09-18 21:18:51,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=524680.0, ans=0.2 +2024-09-18 21:18:52,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=524680.0, ans=0.125 +2024-09-18 21:18:58,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=524680.0, ans=0.1 +2024-09-18 21:19:03,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=524720.0, ans=0.125 +2024-09-18 21:19:18,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=524760.0, ans=0.0 +2024-09-18 21:19:31,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=524800.0, ans=0.0 +2024-09-18 21:19:33,099 INFO [train.py:1198] (0/2) Epoch 29, batch 4500, loss[loss=0.2669, ctc_loss=0.1588, cr_loss=0.4112, attn_decoder_loss=0.2698, over 20450.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1337, cr_loss=0.3801, attn_decoder_loss=0.2535, over 5240174.55 frames. ], batch size: 210, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:19:42,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=524800.0, ans=0.125 +2024-09-18 21:19:55,855 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.478e+01 1.036e+02 1.116e+02 1.208e+02 3.141e+02, threshold=2.233e+02, percent-clipped=1.0 +2024-09-18 21:20:00,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=524840.0, ans=10.0 +2024-09-18 21:20:10,760 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-29.pt +2024-09-18 21:21:03,276 INFO [train.py:1198] (0/2) Epoch 30, batch 0, loss[loss=0.219, ctc_loss=0.1109, cr_loss=0.345, attn_decoder_loss=0.2234, over 29597.00 frames. ], tot_loss[loss=0.219, ctc_loss=0.1109, cr_loss=0.345, attn_decoder_loss=0.2234, over 29597.00 frames. ], batch size: 73, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:21:03,276 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 21:21:23,765 INFO [train.py:1230] (0/2) Epoch 30, validation: loss=0.2119, ctc_loss=0.03754, cr_loss=5.775e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 21:21:23,765 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 21:21:27,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=524900.0, ans=0.125 +2024-09-18 21:21:33,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.32 vs. limit=10.0 +2024-09-18 21:21:39,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=524940.0, ans=0.125 +2024-09-18 21:21:39,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=524940.0, ans=0.1 +2024-09-18 21:21:43,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=524940.0, ans=0.2 +2024-09-18 21:22:30,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.45 vs. limit=15.0 +2024-09-18 21:22:32,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=525060.0, ans=0.125 +2024-09-18 21:22:36,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=525060.0, ans=0.125 +2024-09-18 21:22:39,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=525100.0, ans=0.025 +2024-09-18 21:22:40,135 INFO [train.py:1198] (0/2) Epoch 30, batch 50, loss[loss=0.2138, ctc_loss=0.1045, cr_loss=0.3278, attn_decoder_loss=0.2187, over 29456.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1233, cr_loss=0.3678, attn_decoder_loss=0.2431, over 1267299.47 frames. ], batch size: 70, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:22:48,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=525100.0, ans=0.125 +2024-09-18 21:23:09,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=525180.0, ans=0.04949747468305833 +2024-09-18 21:23:11,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=525180.0, ans=0.125 +2024-09-18 21:23:17,191 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:23:36,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=525220.0, ans=0.05 +2024-09-18 21:23:39,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=525260.0, ans=0.0 +2024-09-18 21:23:42,502 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.716e+01 8.848e+01 9.545e+01 1.010e+02 1.497e+02, threshold=1.909e+02, percent-clipped=0.0 +2024-09-18 21:23:53,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=525260.0, ans=0.1 +2024-09-18 21:23:54,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=525300.0, ans=0.05 +2024-09-18 21:23:56,179 INFO [train.py:1198] (0/2) Epoch 30, batch 100, loss[loss=0.2252, ctc_loss=0.1145, cr_loss=0.3536, attn_decoder_loss=0.2297, over 29520.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1253, cr_loss=0.3719, attn_decoder_loss=0.246, over 2253142.01 frames. ], batch size: 76, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:24:06,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=525300.0, ans=0.125 +2024-09-18 21:24:21,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=525340.0, ans=0.0 +2024-09-18 21:24:27,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=525380.0, ans=0.0 +2024-09-18 21:24:29,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=525380.0, ans=0.125 +2024-09-18 21:24:33,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=525380.0, ans=0.0 +2024-09-18 21:24:49,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=525420.0, ans=0.2 +2024-09-18 21:24:51,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=525420.0, ans=0.125 +2024-09-18 21:25:13,043 INFO [train.py:1198] (0/2) Epoch 30, batch 150, loss[loss=0.2193, ctc_loss=0.1092, cr_loss=0.3445, attn_decoder_loss=0.2239, over 29437.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1232, cr_loss=0.3677, attn_decoder_loss=0.2443, over 3047980.48 frames. ], batch size: 70, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:25:22,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=15.0 +2024-09-18 21:25:25,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=525500.0, ans=0.1 +2024-09-18 21:26:13,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.whiten.whitening_limit, batch_count=525620.0, ans=12.0 +2024-09-18 21:26:17,353 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.425e+01 8.976e+01 9.725e+01 1.408e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 21:26:29,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=525700.0, ans=0.125 +2024-09-18 21:26:30,930 INFO [train.py:1198] (0/2) Epoch 30, batch 200, loss[loss=0.2543, ctc_loss=0.1363, cr_loss=0.3934, attn_decoder_loss=0.2587, over 27321.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1224, cr_loss=0.3664, attn_decoder_loss=0.2434, over 3659272.88 frames. ], batch size: 124, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:26:43,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=525700.0, ans=0.125 +2024-09-18 21:26:46,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=525740.0, ans=0.125 +2024-09-18 21:26:57,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=525740.0, ans=6.0 +2024-09-18 21:26:59,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.whiten.whitening_limit, batch_count=525740.0, ans=15.0 +2024-09-18 21:27:00,157 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:27:09,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=525780.0, ans=0.04949747468305833 +2024-09-18 21:27:24,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=525820.0, ans=0.2 +2024-09-18 21:27:37,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=525860.0, ans=0.125 +2024-09-18 21:27:42,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=525860.0, ans=0.0 +2024-09-18 21:27:44,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=525860.0, ans=0.0 +2024-09-18 21:27:46,552 INFO [train.py:1198] (0/2) Epoch 30, batch 250, loss[loss=0.2615, ctc_loss=0.1354, cr_loss=0.3863, attn_decoder_loss=0.267, over 29252.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1225, cr_loss=0.3657, attn_decoder_loss=0.2436, over 4142376.32 frames. ], batch size: 100, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:27:49,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=525900.0, ans=0.2 +2024-09-18 21:28:08,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.56 vs. limit=22.5 +2024-09-18 21:28:12,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=525940.0, ans=0.125 +2024-09-18 21:28:20,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=525980.0, ans=0.0 +2024-09-18 21:28:29,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=525980.0, ans=0.0 +2024-09-18 21:28:36,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.46 vs. limit=15.0 +2024-09-18 21:28:39,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=526020.0, ans=0.025 +2024-09-18 21:28:43,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=526020.0, ans=0.1 +2024-09-18 21:28:50,801 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.169e+01 8.439e+01 8.914e+01 9.350e+01 1.362e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-18 21:29:00,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=526060.0, ans=0.125 +2024-09-18 21:29:04,748 INFO [train.py:1198] (0/2) Epoch 30, batch 300, loss[loss=0.2479, ctc_loss=0.1267, cr_loss=0.3882, attn_decoder_loss=0.2528, over 29514.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1223, cr_loss=0.366, attn_decoder_loss=0.2433, over 4508734.64 frames. ], batch size: 92, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:29:05,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=526100.0, ans=0.0 +2024-09-18 21:29:15,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=526100.0, ans=0.0 +2024-09-18 21:29:16,102 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.81 vs. limit=22.5 +2024-09-18 21:29:33,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=526140.0, ans=0.125 +2024-09-18 21:29:45,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=526180.0, ans=0.125 +2024-09-18 21:30:00,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=526220.0, ans=0.2 +2024-09-18 21:30:08,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=526260.0, ans=0.1 +2024-09-18 21:30:21,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=526300.0, ans=0.025 +2024-09-18 21:30:22,650 INFO [train.py:1198] (0/2) Epoch 30, batch 350, loss[loss=0.2144, ctc_loss=0.09692, cr_loss=0.2959, attn_decoder_loss=0.2208, over 29333.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1224, cr_loss=0.3655, attn_decoder_loss=0.2437, over 4794140.64 frames. ], batch size: 71, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:31:10,287 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:31:20,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=526420.0, ans=0.125 +2024-09-18 21:31:22,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=526460.0, ans=0.025 +2024-09-18 21:31:24,892 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.639e+01 9.253e+01 9.920e+01 3.039e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 21:31:38,413 INFO [train.py:1198] (0/2) Epoch 30, batch 400, loss[loss=0.2448, ctc_loss=0.1206, cr_loss=0.3706, attn_decoder_loss=0.2504, over 29706.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1223, cr_loss=0.365, attn_decoder_loss=0.2437, over 5025692.44 frames. ], batch size: 82, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:31:55,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=526540.0, ans=0.0 +2024-09-18 21:32:42,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=526660.0, ans=0.125 +2024-09-18 21:32:47,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=526660.0, ans=0.1 +2024-09-18 21:32:56,028 INFO [train.py:1198] (0/2) Epoch 30, batch 450, loss[loss=0.2464, ctc_loss=0.1238, cr_loss=0.3671, attn_decoder_loss=0.2519, over 29685.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.122, cr_loss=0.3645, attn_decoder_loss=0.2434, over 5188016.55 frames. ], batch size: 83, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:32:56,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.38 vs. limit=12.0 +2024-09-18 21:33:00,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=526700.0, ans=0.125 +2024-09-18 21:33:07,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=526700.0, ans=0.125 +2024-09-18 21:33:07,617 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.95 vs. limit=10.0 +2024-09-18 21:33:11,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=526740.0, ans=0.125 +2024-09-18 21:33:29,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=526780.0, ans=0.125 +2024-09-18 21:33:39,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=526780.0, ans=0.1 +2024-09-18 21:33:44,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=526820.0, ans=0.125 +2024-09-18 21:33:48,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.28 vs. limit=12.0 +2024-09-18 21:34:01,950 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.512e+01 8.936e+01 9.488e+01 1.864e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 21:34:06,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=526860.0, ans=0.0 +2024-09-18 21:34:13,945 INFO [train.py:1198] (0/2) Epoch 30, batch 500, loss[loss=0.2637, ctc_loss=0.1444, cr_loss=0.4224, attn_decoder_loss=0.2676, over 29442.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1215, cr_loss=0.3636, attn_decoder_loss=0.2427, over 5330605.84 frames. ], batch size: 94, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:34:53,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=526980.0, ans=0.5 +2024-09-18 21:35:30,023 INFO [train.py:1198] (0/2) Epoch 30, batch 550, loss[loss=0.2569, ctc_loss=0.1279, cr_loss=0.3925, attn_decoder_loss=0.2625, over 28775.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1211, cr_loss=0.3625, attn_decoder_loss=0.2426, over 5422706.90 frames. ], batch size: 104, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:35:51,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=527140.0, ans=0.125 +2024-09-18 21:36:15,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.70 vs. limit=22.5 +2024-09-18 21:36:16,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=527220.0, ans=0.0 +2024-09-18 21:36:24,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=527220.0, ans=0.125 +2024-09-18 21:36:34,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=527260.0, ans=15.0 +2024-09-18 21:36:36,358 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.634e+01 8.972e+01 9.427e+01 2.186e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-18 21:36:38,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=527260.0, ans=0.07 +2024-09-18 21:36:44,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=527260.0, ans=0.125 +2024-09-18 21:36:44,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=527260.0, ans=0.125 +2024-09-18 21:36:48,688 INFO [train.py:1198] (0/2) Epoch 30, batch 600, loss[loss=0.2587, ctc_loss=0.1384, cr_loss=0.402, attn_decoder_loss=0.2632, over 29206.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1212, cr_loss=0.3633, attn_decoder_loss=0.243, over 5510178.63 frames. ], batch size: 100, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:36:55,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=527300.0, ans=0.0 +2024-09-18 21:37:08,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=527340.0, ans=0.125 +2024-09-18 21:37:13,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=527340.0, ans=0.125 +2024-09-18 21:37:16,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=527340.0, ans=0.125 +2024-09-18 21:37:25,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=527380.0, ans=0.0 +2024-09-18 21:37:31,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=527380.0, ans=10.0 +2024-09-18 21:37:37,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.91 vs. limit=12.0 +2024-09-18 21:37:43,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=527420.0, ans=0.125 +2024-09-18 21:37:53,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=527460.0, ans=0.2 +2024-09-18 21:38:06,372 INFO [train.py:1198] (0/2) Epoch 30, batch 650, loss[loss=0.2402, ctc_loss=0.1206, cr_loss=0.364, attn_decoder_loss=0.2454, over 29740.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1208, cr_loss=0.3625, attn_decoder_loss=0.2424, over 5587077.19 frames. ], batch size: 81, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:39:01,448 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.23 vs. limit=12.0 +2024-09-18 21:39:09,640 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.386e+01 8.897e+01 9.302e+01 1.225e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 21:39:09,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=527660.0, ans=0.0 +2024-09-18 21:39:21,810 INFO [train.py:1198] (0/2) Epoch 30, batch 700, loss[loss=0.2317, ctc_loss=0.1223, cr_loss=0.3486, attn_decoder_loss=0.2361, over 29540.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1209, cr_loss=0.3629, attn_decoder_loss=0.2427, over 5637501.27 frames. ], batch size: 76, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:39:28,572 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.94 vs. limit=15.0 +2024-09-18 21:39:40,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=527740.0, ans=0.07 +2024-09-18 21:40:06,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=527820.0, ans=0.125 +2024-09-18 21:40:14,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.99 vs. limit=15.0 +2024-09-18 21:40:25,484 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.04 vs. limit=15.0 +2024-09-18 21:40:39,933 INFO [train.py:1198] (0/2) Epoch 30, batch 750, loss[loss=0.2439, ctc_loss=0.1221, cr_loss=0.3634, attn_decoder_loss=0.2493, over 29711.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1207, cr_loss=0.3618, attn_decoder_loss=0.2423, over 5677379.27 frames. ], batch size: 82, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:40:47,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=527900.0, ans=0.2 +2024-09-18 21:40:49,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=527900.0, ans=0.0 +2024-09-18 21:40:52,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=527900.0, ans=0.07 +2024-09-18 21:41:16,424 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-132000.pt +2024-09-18 21:41:28,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=527980.0, ans=0.125 +2024-09-18 21:41:38,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=528020.0, ans=0.125 +2024-09-18 21:41:41,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.96 vs. limit=10.0 +2024-09-18 21:41:46,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.96 vs. limit=15.0 +2024-09-18 21:41:52,809 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.561e+01 8.909e+01 9.515e+01 3.316e+02, threshold=1.782e+02, percent-clipped=2.0 +2024-09-18 21:42:04,147 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.92 vs. limit=22.5 +2024-09-18 21:42:04,968 INFO [train.py:1198] (0/2) Epoch 30, batch 800, loss[loss=0.2214, ctc_loss=0.1132, cr_loss=0.3289, attn_decoder_loss=0.2261, over 29606.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1208, cr_loss=0.3622, attn_decoder_loss=0.2421, over 5708120.81 frames. ], batch size: 73, lr: 3.71e-03, grad_scale: 16.0 +2024-09-18 21:42:23,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=528140.0, ans=0.125 +2024-09-18 21:42:23,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=528140.0, ans=0.0 +2024-09-18 21:42:26,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=528140.0, ans=0.2 +2024-09-18 21:42:39,931 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:43:08,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=528260.0, ans=0.1 +2024-09-18 21:43:20,040 INFO [train.py:1198] (0/2) Epoch 30, batch 850, loss[loss=0.251, ctc_loss=0.1287, cr_loss=0.3777, attn_decoder_loss=0.2562, over 29712.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1205, cr_loss=0.3614, attn_decoder_loss=0.242, over 5734294.14 frames. ], batch size: 89, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:43:23,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=528300.0, ans=0.125 +2024-09-18 21:43:33,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=528340.0, ans=0.125 +2024-09-18 21:43:39,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=528340.0, ans=0.125 +2024-09-18 21:43:55,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=528380.0, ans=0.2 +2024-09-18 21:44:10,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=528420.0, ans=0.0 +2024-09-18 21:44:21,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=528460.0, ans=0.125 +2024-09-18 21:44:26,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.70 vs. limit=22.5 +2024-09-18 21:44:27,504 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.440e+01 8.960e+01 9.629e+01 1.513e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 21:44:33,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=528460.0, ans=0.1 +2024-09-18 21:44:37,931 INFO [train.py:1198] (0/2) Epoch 30, batch 900, loss[loss=0.2207, ctc_loss=0.1104, cr_loss=0.3385, attn_decoder_loss=0.2255, over 29600.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1207, cr_loss=0.3616, attn_decoder_loss=0.2423, over 5739196.92 frames. ], batch size: 73, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:44:39,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=528500.0, ans=0.5 +2024-09-18 21:44:54,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=528540.0, ans=0.125 +2024-09-18 21:45:00,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=528540.0, ans=0.1 +2024-09-18 21:45:02,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.11 vs. limit=15.0 +2024-09-18 21:45:06,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=528580.0, ans=0.0 +2024-09-18 21:45:17,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=528580.0, ans=0.125 +2024-09-18 21:45:31,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.17 vs. limit=6.0 +2024-09-18 21:45:32,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=528620.0, ans=0.125 +2024-09-18 21:45:36,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=528620.0, ans=0.125 +2024-09-18 21:45:37,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=528620.0, ans=0.125 +2024-09-18 21:45:43,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=528660.0, ans=0.1 +2024-09-18 21:45:45,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=528660.0, ans=0.95 +2024-09-18 21:45:55,578 INFO [train.py:1198] (0/2) Epoch 30, batch 950, loss[loss=0.2204, ctc_loss=0.1093, cr_loss=0.3473, attn_decoder_loss=0.225, over 29523.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1207, cr_loss=0.3618, attn_decoder_loss=0.2425, over 5741816.45 frames. ], batch size: 74, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:46:20,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.69 vs. limit=15.0 +2024-09-18 21:46:49,013 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:46:50,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=528820.0, ans=0.125 +2024-09-18 21:47:00,634 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.476e+01 9.054e+01 9.594e+01 4.825e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 21:47:11,026 INFO [train.py:1198] (0/2) Epoch 30, batch 1000, loss[loss=0.2421, ctc_loss=0.1265, cr_loss=0.3747, attn_decoder_loss=0.2466, over 29526.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1215, cr_loss=0.3633, attn_decoder_loss=0.2433, over 5736587.22 frames. ], batch size: 77, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:47:19,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.30 vs. limit=15.0 +2024-09-18 21:47:35,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=528940.0, ans=10.0 +2024-09-18 21:47:37,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=528940.0, ans=0.0 +2024-09-18 21:48:01,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=529020.0, ans=0.0 +2024-09-18 21:48:17,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=529060.0, ans=0.125 +2024-09-18 21:48:24,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=529060.0, ans=0.125 +2024-09-18 21:48:28,834 INFO [train.py:1198] (0/2) Epoch 30, batch 1050, loss[loss=0.2473, ctc_loss=0.1274, cr_loss=0.3758, attn_decoder_loss=0.2523, over 29680.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1209, cr_loss=0.3624, attn_decoder_loss=0.2426, over 5744557.33 frames. ], batch size: 85, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:48:39,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=529100.0, ans=0.0 +2024-09-18 21:48:44,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.86 vs. limit=15.0 +2024-09-18 21:48:48,974 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:48:51,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=529140.0, ans=0.2 +2024-09-18 21:48:53,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=529140.0, ans=0.0 +2024-09-18 21:49:36,321 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.411e+01 8.824e+01 9.446e+01 1.337e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-18 21:49:38,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=529260.0, ans=0.2 +2024-09-18 21:49:46,959 INFO [train.py:1198] (0/2) Epoch 30, batch 1100, loss[loss=0.2296, ctc_loss=0.1157, cr_loss=0.3611, attn_decoder_loss=0.2342, over 29439.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1208, cr_loss=0.3625, attn_decoder_loss=0.2423, over 5757634.80 frames. ], batch size: 78, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:49:54,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=529300.0, ans=0.125 +2024-09-18 21:50:05,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=529340.0, ans=0.0 +2024-09-18 21:50:06,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=529340.0, ans=0.0 +2024-09-18 21:50:13,648 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.81 vs. limit=15.0 +2024-09-18 21:50:13,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.71 vs. limit=10.0 +2024-09-18 21:50:19,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.75 vs. limit=15.0 +2024-09-18 21:50:20,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=529380.0, ans=0.125 +2024-09-18 21:50:34,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=529420.0, ans=0.125 +2024-09-18 21:51:00,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=529460.0, ans=6.0 +2024-09-18 21:51:02,533 INFO [train.py:1198] (0/2) Epoch 30, batch 1150, loss[loss=0.2275, ctc_loss=0.1137, cr_loss=0.3515, attn_decoder_loss=0.2323, over 29441.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1212, cr_loss=0.3634, attn_decoder_loss=0.2425, over 5756979.78 frames. ], batch size: 78, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:51:39,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=529580.0, ans=0.125 +2024-09-18 21:51:47,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=529620.0, ans=0.025 +2024-09-18 21:51:52,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.04 vs. limit=22.5 +2024-09-18 21:52:04,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=529660.0, ans=0.1 +2024-09-18 21:52:10,742 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.491e+01 9.020e+01 1.005e+02 1.994e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 21:52:10,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=529660.0, ans=0.125 +2024-09-18 21:52:21,418 INFO [train.py:1198] (0/2) Epoch 30, batch 1200, loss[loss=0.2451, ctc_loss=0.1233, cr_loss=0.3638, attn_decoder_loss=0.2506, over 29665.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1216, cr_loss=0.3641, attn_decoder_loss=0.2434, over 5749111.71 frames. ], batch size: 85, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:52:26,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=529700.0, ans=0.0 +2024-09-18 21:52:39,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.49 vs. limit=15.0 +2024-09-18 21:52:43,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=529740.0, ans=0.0 +2024-09-18 21:52:58,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=529780.0, ans=0.1 +2024-09-18 21:53:07,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=529820.0, ans=0.1 +2024-09-18 21:53:38,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=529900.0, ans=0.1 +2024-09-18 21:53:40,018 INFO [train.py:1198] (0/2) Epoch 30, batch 1250, loss[loss=0.2442, ctc_loss=0.1182, cr_loss=0.3644, attn_decoder_loss=0.2501, over 29511.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1216, cr_loss=0.3641, attn_decoder_loss=0.2439, over 5776062.80 frames. ], batch size: 92, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:53:50,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.79 vs. limit=6.0 +2024-09-18 21:54:23,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=15.0 +2024-09-18 21:54:45,398 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.969e+01 8.456e+01 8.891e+01 9.459e+01 2.793e+02, threshold=1.778e+02, percent-clipped=2.0 +2024-09-18 21:54:55,979 INFO [train.py:1198] (0/2) Epoch 30, batch 1300, loss[loss=0.2491, ctc_loss=0.1199, cr_loss=0.3664, attn_decoder_loss=0.2553, over 28477.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1215, cr_loss=0.3637, attn_decoder_loss=0.2433, over 5780801.92 frames. ], batch size: 112, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:55:20,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=530140.0, ans=0.0 +2024-09-18 21:55:47,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.92 vs. limit=15.0 +2024-09-18 21:56:02,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=530260.0, ans=0.1 +2024-09-18 21:56:03,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=530260.0, ans=0.125 +2024-09-18 21:56:14,324 INFO [train.py:1198] (0/2) Epoch 30, batch 1350, loss[loss=0.2421, ctc_loss=0.1218, cr_loss=0.3766, attn_decoder_loss=0.2471, over 29734.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1214, cr_loss=0.3636, attn_decoder_loss=0.2433, over 5797901.68 frames. ], batch size: 81, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:56:44,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=530380.0, ans=0.0 +2024-09-18 21:56:51,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.79 vs. limit=22.5 +2024-09-18 21:57:09,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=530420.0, ans=0.0 +2024-09-18 21:57:12,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=530420.0, ans=0.2 +2024-09-18 21:57:20,869 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.385e+01 8.847e+01 9.362e+01 1.529e+02, threshold=1.769e+02, percent-clipped=0.0 +2024-09-18 21:57:30,064 INFO [train.py:1198] (0/2) Epoch 30, batch 1400, loss[loss=0.2149, ctc_loss=0.1105, cr_loss=0.3362, attn_decoder_loss=0.219, over 29564.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1215, cr_loss=0.3641, attn_decoder_loss=0.243, over 5808926.92 frames. ], batch size: 69, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:57:37,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=530500.0, ans=0.125 +2024-09-18 21:57:55,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=530540.0, ans=0.125 +2024-09-18 21:58:02,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=530580.0, ans=0.125 +2024-09-18 21:58:04,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=530580.0, ans=0.125 +2024-09-18 21:58:25,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=530620.0, ans=0.2 +2024-09-18 21:58:48,074 INFO [train.py:1198] (0/2) Epoch 30, batch 1450, loss[loss=0.2607, ctc_loss=0.1417, cr_loss=0.3979, attn_decoder_loss=0.2651, over 29436.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1218, cr_loss=0.3649, attn_decoder_loss=0.2436, over 5805950.89 frames. ], batch size: 94, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:58:57,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=530700.0, ans=0.125 +2024-09-18 21:59:20,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=530780.0, ans=0.2 +2024-09-18 21:59:39,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=530820.0, ans=0.1 +2024-09-18 21:59:56,666 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.746e+01 9.204e+01 9.882e+01 6.648e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 22:00:04,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=530900.0, ans=0.2 +2024-09-18 22:00:05,880 INFO [train.py:1198] (0/2) Epoch 30, batch 1500, loss[loss=0.2405, ctc_loss=0.1209, cr_loss=0.3645, attn_decoder_loss=0.2457, over 29632.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.122, cr_loss=0.3654, attn_decoder_loss=0.2441, over 5805108.31 frames. ], batch size: 86, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 22:00:10,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=530900.0, ans=0.125 +2024-09-18 22:00:24,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.23 vs. limit=22.5 +2024-09-18 22:00:42,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=530980.0, ans=0.125 +2024-09-18 22:00:45,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=530980.0, ans=0.0 +2024-09-18 22:00:52,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=531020.0, ans=0.125 +2024-09-18 22:00:53,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=531020.0, ans=0.125 +2024-09-18 22:01:02,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=531020.0, ans=0.1 +2024-09-18 22:01:22,000 INFO [train.py:1198] (0/2) Epoch 30, batch 1550, loss[loss=0.253, ctc_loss=0.1356, cr_loss=0.4033, attn_decoder_loss=0.2571, over 29503.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1222, cr_loss=0.3656, attn_decoder_loss=0.2441, over 5780208.61 frames. ], batch size: 90, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 22:01:45,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=531140.0, ans=0.0 +2024-09-18 22:01:45,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=531140.0, ans=0.125 +2024-09-18 22:02:00,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.75 vs. limit=22.5 +2024-09-18 22:02:27,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=531260.0, ans=0.2 +2024-09-18 22:02:28,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=531260.0, ans=0.0 +2024-09-18 22:02:31,378 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.245e+01 8.587e+01 9.165e+01 1.006e+02 3.566e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 22:02:40,583 INFO [train.py:1198] (0/2) Epoch 30, batch 1600, loss[loss=0.2434, ctc_loss=0.113, cr_loss=0.3379, attn_decoder_loss=0.2504, over 29679.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1222, cr_loss=0.3655, attn_decoder_loss=0.2438, over 5763362.81 frames. ], batch size: 85, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:02:42,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=531300.0, ans=0.125 +2024-09-18 22:02:53,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=531300.0, ans=0.125 +2024-09-18 22:02:54,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=531340.0, ans=0.09899494936611666 +2024-09-18 22:03:05,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=531340.0, ans=0.2 +2024-09-18 22:03:15,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=531380.0, ans=0.125 +2024-09-18 22:03:20,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=531380.0, ans=0.125 +2024-09-18 22:03:23,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=531380.0, ans=0.125 +2024-09-18 22:03:23,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=531380.0, ans=0.0 +2024-09-18 22:03:49,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=531460.0, ans=0.125 +2024-09-18 22:03:50,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=531460.0, ans=0.0 +2024-09-18 22:03:58,188 INFO [train.py:1198] (0/2) Epoch 30, batch 1650, loss[loss=0.243, ctc_loss=0.1239, cr_loss=0.3647, attn_decoder_loss=0.2482, over 29726.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.122, cr_loss=0.3647, attn_decoder_loss=0.2435, over 5756722.27 frames. ], batch size: 89, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:04:34,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=531580.0, ans=0.2 +2024-09-18 22:04:45,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=531620.0, ans=0.125 +2024-09-18 22:04:58,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=531660.0, ans=0.1 +2024-09-18 22:05:04,521 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.948e+01 8.547e+01 8.983e+01 9.697e+01 1.906e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 22:05:13,433 INFO [train.py:1198] (0/2) Epoch 30, batch 1700, loss[loss=0.2035, ctc_loss=0.09007, cr_loss=0.299, attn_decoder_loss=0.2094, over 29569.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1218, cr_loss=0.3643, attn_decoder_loss=0.2432, over 5778162.51 frames. ], batch size: 69, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:05:15,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=531700.0, ans=0.125 +2024-09-18 22:05:52,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=531780.0, ans=0.0 +2024-09-18 22:06:05,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.36 vs. limit=10.0 +2024-09-18 22:06:08,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=531820.0, ans=0.125 +2024-09-18 22:06:31,438 INFO [train.py:1198] (0/2) Epoch 30, batch 1750, loss[loss=0.2125, ctc_loss=0.1105, cr_loss=0.3454, attn_decoder_loss=0.2162, over 29329.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1215, cr_loss=0.3641, attn_decoder_loss=0.2428, over 5786631.16 frames. ], batch size: 67, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:06:55,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.69 vs. limit=22.5 +2024-09-18 22:07:13,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=531980.0, ans=0.125 +2024-09-18 22:07:17,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=532020.0, ans=0.07 +2024-09-18 22:07:28,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=532020.0, ans=0.125 +2024-09-18 22:07:40,551 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.985e+01 8.324e+01 8.730e+01 9.634e+01 1.252e+02, threshold=1.746e+02, percent-clipped=0.0 +2024-09-18 22:07:46,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=532060.0, ans=0.125 +2024-09-18 22:07:47,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.00 vs. limit=10.0 +2024-09-18 22:07:48,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=532100.0, ans=0.025 +2024-09-18 22:07:49,663 INFO [train.py:1198] (0/2) Epoch 30, batch 1800, loss[loss=0.2476, ctc_loss=0.129, cr_loss=0.38, attn_decoder_loss=0.2523, over 29698.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1215, cr_loss=0.3639, attn_decoder_loss=0.243, over 5788954.96 frames. ], batch size: 83, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:07:53,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.88 vs. limit=15.0 +2024-09-18 22:07:55,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.26 vs. limit=10.0 +2024-09-18 22:07:56,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=532100.0, ans=0.125 +2024-09-18 22:07:56,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=532100.0, ans=0.0 +2024-09-18 22:08:31,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=532180.0, ans=0.125 +2024-09-18 22:08:52,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.53 vs. limit=15.0 +2024-09-18 22:09:05,418 INFO [train.py:1198] (0/2) Epoch 30, batch 1850, loss[loss=0.2474, ctc_loss=0.1161, cr_loss=0.3613, attn_decoder_loss=0.254, over 29632.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1216, cr_loss=0.3643, attn_decoder_loss=0.243, over 5795266.65 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:09:08,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=532300.0, ans=0.125 +2024-09-18 22:09:14,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=532300.0, ans=0.1 +2024-09-18 22:09:31,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=532340.0, ans=0.0 +2024-09-18 22:09:56,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=532420.0, ans=0.125 +2024-09-18 22:10:06,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=532460.0, ans=0.2 +2024-09-18 22:10:15,372 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.201e+01 8.313e+01 8.937e+01 9.417e+01 1.433e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 22:10:22,899 INFO [train.py:1198] (0/2) Epoch 30, batch 1900, loss[loss=0.2487, ctc_loss=0.131, cr_loss=0.3821, attn_decoder_loss=0.2533, over 29701.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1217, cr_loss=0.3641, attn_decoder_loss=0.2435, over 5802692.56 frames. ], batch size: 89, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:10:24,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=532500.0, ans=0.0 +2024-09-18 22:10:38,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=532540.0, ans=0.2 +2024-09-18 22:10:42,397 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.92 vs. limit=15.0 +2024-09-18 22:10:44,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=532540.0, ans=0.125 +2024-09-18 22:11:10,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=532620.0, ans=0.0 +2024-09-18 22:11:27,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=532660.0, ans=0.125 +2024-09-18 22:11:32,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=532660.0, ans=0.1 +2024-09-18 22:11:39,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.79 vs. limit=15.0 +2024-09-18 22:11:41,039 INFO [train.py:1198] (0/2) Epoch 30, batch 1950, loss[loss=0.2299, ctc_loss=0.1115, cr_loss=0.3412, attn_decoder_loss=0.2354, over 29450.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1224, cr_loss=0.3665, attn_decoder_loss=0.2447, over 5817507.78 frames. ], batch size: 78, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:12:05,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=532740.0, ans=0.125 +2024-09-18 22:12:09,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=532780.0, ans=0.125 +2024-09-18 22:12:11,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=532780.0, ans=0.0 +2024-09-18 22:12:25,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=532820.0, ans=0.125 +2024-09-18 22:12:34,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=532820.0, ans=0.125 +2024-09-18 22:12:46,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=532860.0, ans=0.0 +2024-09-18 22:12:46,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=532860.0, ans=0.0 +2024-09-18 22:12:49,465 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.712e+01 9.082e+01 9.590e+01 8.305e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:12:56,994 INFO [train.py:1198] (0/2) Epoch 30, batch 2000, loss[loss=0.2152, ctc_loss=0.102, cr_loss=0.339, attn_decoder_loss=0.2203, over 29332.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1231, cr_loss=0.3672, attn_decoder_loss=0.245, over 5794262.31 frames. ], batch size: 67, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:13:16,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=532940.0, ans=0.5 +2024-09-18 22:13:18,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=532940.0, ans=0.2 +2024-09-18 22:13:18,598 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.47 vs. limit=6.0 +2024-09-18 22:13:31,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=532980.0, ans=0.05 +2024-09-18 22:13:37,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=532980.0, ans=0.125 +2024-09-18 22:13:39,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=532980.0, ans=0.1 +2024-09-18 22:13:53,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.01 vs. limit=15.0 +2024-09-18 22:13:55,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=533020.0, ans=0.125 +2024-09-18 22:14:01,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=533060.0, ans=0.0 +2024-09-18 22:14:15,044 INFO [train.py:1198] (0/2) Epoch 30, batch 2050, loss[loss=0.2141, ctc_loss=0.1045, cr_loss=0.3384, attn_decoder_loss=0.2187, over 29419.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1226, cr_loss=0.3658, attn_decoder_loss=0.2441, over 5787038.28 frames. ], batch size: 70, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:14:40,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=15.0 +2024-09-18 22:14:54,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=533180.0, ans=0.125 +2024-09-18 22:15:02,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=533220.0, ans=0.2 +2024-09-18 22:15:03,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=533220.0, ans=0.0 +2024-09-18 22:15:05,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=533220.0, ans=0.025 +2024-09-18 22:15:26,831 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.796e+01 8.326e+01 8.954e+01 9.784e+01 1.550e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 22:15:31,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=533300.0, ans=0.125 +2024-09-18 22:15:33,017 INFO [train.py:1198] (0/2) Epoch 30, batch 2100, loss[loss=0.2313, ctc_loss=0.1137, cr_loss=0.3377, attn_decoder_loss=0.2369, over 29743.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1213, cr_loss=0.3633, attn_decoder_loss=0.2431, over 5799197.02 frames. ], batch size: 81, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:15:34,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=533300.0, ans=0.125 +2024-09-18 22:15:45,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.79 vs. limit=15.0 +2024-09-18 22:15:47,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.90 vs. limit=15.0 +2024-09-18 22:16:06,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=533380.0, ans=0.0 +2024-09-18 22:16:34,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=533460.0, ans=0.0 +2024-09-18 22:16:48,303 INFO [train.py:1198] (0/2) Epoch 30, batch 2150, loss[loss=0.2259, ctc_loss=0.1135, cr_loss=0.3482, attn_decoder_loss=0.2306, over 29449.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.121, cr_loss=0.3635, attn_decoder_loss=0.2425, over 5814478.71 frames. ], batch size: 78, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:17:13,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533540.0, ans=0.1 +2024-09-18 22:17:21,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=533580.0, ans=0.0 +2024-09-18 22:17:21,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.81 vs. limit=22.5 +2024-09-18 22:17:33,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=533580.0, ans=0.025 +2024-09-18 22:17:33,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=533580.0, ans=0.0 +2024-09-18 22:17:45,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=533620.0, ans=0.025 +2024-09-18 22:17:54,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=533660.0, ans=0.125 +2024-09-18 22:18:00,805 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.351e+01 8.919e+01 9.432e+01 1.434e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 22:18:07,002 INFO [train.py:1198] (0/2) Epoch 30, batch 2200, loss[loss=0.247, ctc_loss=0.126, cr_loss=0.3574, attn_decoder_loss=0.2525, over 29639.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1211, cr_loss=0.3635, attn_decoder_loss=0.2426, over 5812166.69 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:18:30,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=533740.0, ans=0.125 +2024-09-18 22:18:30,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=533740.0, ans=0.1 +2024-09-18 22:18:30,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533740.0, ans=0.1 +2024-09-18 22:18:37,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=533780.0, ans=0.0 +2024-09-18 22:19:00,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=533820.0, ans=0.0 +2024-09-18 22:19:01,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=533820.0, ans=0.125 +2024-09-18 22:19:03,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=533820.0, ans=0.0 +2024-09-18 22:19:14,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=533860.0, ans=0.1 +2024-09-18 22:19:24,812 INFO [train.py:1198] (0/2) Epoch 30, batch 2250, loss[loss=0.2404, ctc_loss=0.1233, cr_loss=0.371, attn_decoder_loss=0.2452, over 29718.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1212, cr_loss=0.3633, attn_decoder_loss=0.2426, over 5811729.32 frames. ], batch size: 82, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:19:29,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=533900.0, ans=0.0 +2024-09-18 22:19:30,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.46 vs. limit=15.0 +2024-09-18 22:19:51,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=533940.0, ans=0.125 +2024-09-18 22:19:54,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=533980.0, ans=0.0 +2024-09-18 22:19:55,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=533980.0, ans=0.1 +2024-09-18 22:19:55,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=533980.0, ans=0.1 +2024-09-18 22:19:58,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=533980.0, ans=0.125 +2024-09-18 22:20:00,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=533980.0, ans=0.0 +2024-09-18 22:20:01,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533980.0, ans=0.1 +2024-09-18 22:20:17,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.84 vs. limit=15.0 +2024-09-18 22:20:19,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=534020.0, ans=0.0 +2024-09-18 22:20:21,437 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:20:24,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=534060.0, ans=0.125 +2024-09-18 22:20:34,700 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.594e+01 8.443e+01 9.095e+01 9.654e+01 4.299e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-18 22:20:39,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=534100.0, ans=0.0 +2024-09-18 22:20:40,901 INFO [train.py:1198] (0/2) Epoch 30, batch 2300, loss[loss=0.2162, ctc_loss=0.1027, cr_loss=0.3189, attn_decoder_loss=0.2218, over 29331.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1207, cr_loss=0.3624, attn_decoder_loss=0.242, over 5798687.90 frames. ], batch size: 71, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:20:47,997 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.14 vs. limit=6.0 +2024-09-18 22:20:50,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=534100.0, ans=0.125 +2024-09-18 22:21:22,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=534180.0, ans=0.1 +2024-09-18 22:21:28,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=534220.0, ans=0.0 +2024-09-18 22:21:33,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=534220.0, ans=0.125 +2024-09-18 22:21:54,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=534260.0, ans=0.125 +2024-09-18 22:21:58,861 INFO [train.py:1198] (0/2) Epoch 30, batch 2350, loss[loss=0.2548, ctc_loss=0.135, cr_loss=0.3788, attn_decoder_loss=0.2597, over 29704.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1212, cr_loss=0.363, attn_decoder_loss=0.2424, over 5804636.73 frames. ], batch size: 83, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:21:59,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=534300.0, ans=0.125 +2024-09-18 22:22:08,834 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.85 vs. limit=15.0 +2024-09-18 22:22:10,073 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.97 vs. limit=15.0 +2024-09-18 22:22:29,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=534380.0, ans=0.0 +2024-09-18 22:22:32,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=534380.0, ans=0.025 +2024-09-18 22:22:38,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=534380.0, ans=15.0 +2024-09-18 22:22:40,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=534380.0, ans=0.04949747468305833 +2024-09-18 22:22:49,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=534420.0, ans=0.04949747468305833 +2024-09-18 22:23:03,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=534460.0, ans=0.2 +2024-09-18 22:23:03,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=534460.0, ans=0.2 +2024-09-18 22:23:08,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=534460.0, ans=0.0 +2024-09-18 22:23:11,011 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.566e+01 9.166e+01 9.835e+01 1.994e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-18 22:23:11,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=534460.0, ans=0.025 +2024-09-18 22:23:17,299 INFO [train.py:1198] (0/2) Epoch 30, batch 2400, loss[loss=0.2293, ctc_loss=0.1124, cr_loss=0.3417, attn_decoder_loss=0.2347, over 29545.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1214, cr_loss=0.3636, attn_decoder_loss=0.2426, over 5808832.62 frames. ], batch size: 76, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:23:28,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=534500.0, ans=0.1 +2024-09-18 22:23:32,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=534540.0, ans=0.125 +2024-09-18 22:23:46,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=534580.0, ans=0.95 +2024-09-18 22:23:51,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=534580.0, ans=0.0 +2024-09-18 22:24:00,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.12 vs. limit=15.0 +2024-09-18 22:24:13,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=534620.0, ans=0.2 +2024-09-18 22:24:33,295 INFO [train.py:1198] (0/2) Epoch 30, batch 2450, loss[loss=0.2363, ctc_loss=0.1187, cr_loss=0.3744, attn_decoder_loss=0.2411, over 29711.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1223, cr_loss=0.3648, attn_decoder_loss=0.2438, over 5784680.72 frames. ], batch size: 82, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:24:35,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.71 vs. limit=22.5 +2024-09-18 22:24:57,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=534740.0, ans=0.125 +2024-09-18 22:25:10,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=534780.0, ans=0.0 +2024-09-18 22:25:31,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.12 vs. limit=15.0 +2024-09-18 22:25:40,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.74 vs. limit=6.0 +2024-09-18 22:25:44,668 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.707e+01 9.216e+01 9.749e+01 1.884e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-18 22:25:50,707 INFO [train.py:1198] (0/2) Epoch 30, batch 2500, loss[loss=0.2474, ctc_loss=0.1234, cr_loss=0.3633, attn_decoder_loss=0.2531, over 29616.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1223, cr_loss=0.3647, attn_decoder_loss=0.2438, over 5795687.74 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:25:59,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=534900.0, ans=0.125 +2024-09-18 22:26:07,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=534940.0, ans=0.0 +2024-09-18 22:26:15,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=534940.0, ans=0.0 +2024-09-18 22:26:16,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=534940.0, ans=0.125 +2024-09-18 22:26:28,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=534980.0, ans=0.125 +2024-09-18 22:26:48,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=535020.0, ans=0.125 +2024-09-18 22:27:01,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=535060.0, ans=0.0 +2024-09-18 22:27:04,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=535060.0, ans=0.0 +2024-09-18 22:27:09,176 INFO [train.py:1198] (0/2) Epoch 30, batch 2550, loss[loss=0.2108, ctc_loss=0.1015, cr_loss=0.3205, attn_decoder_loss=0.2158, over 29387.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1224, cr_loss=0.3655, attn_decoder_loss=0.2439, over 5798796.41 frames. ], batch size: 67, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:27:18,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.39 vs. limit=15.0 +2024-09-18 22:27:19,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=535100.0, ans=0.2 +2024-09-18 22:28:10,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=535260.0, ans=0.1 +2024-09-18 22:28:19,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.83 vs. limit=15.0 +2024-09-18 22:28:20,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.046e+01 8.542e+01 9.079e+01 9.680e+01 2.807e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:28:22,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=535260.0, ans=0.1 +2024-09-18 22:28:25,097 INFO [train.py:1198] (0/2) Epoch 30, batch 2600, loss[loss=0.2332, ctc_loss=0.1232, cr_loss=0.3756, attn_decoder_loss=0.2371, over 29470.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1222, cr_loss=0.3652, attn_decoder_loss=0.244, over 5795471.67 frames. ], batch size: 78, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:28:31,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=535300.0, ans=0.1 +2024-09-18 22:28:32,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=535300.0, ans=0.09899494936611666 +2024-09-18 22:28:46,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=535340.0, ans=0.2 +2024-09-18 22:28:56,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=535380.0, ans=0.125 +2024-09-18 22:29:06,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=535380.0, ans=0.0 +2024-09-18 22:29:18,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=535420.0, ans=0.1 +2024-09-18 22:29:24,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=535420.0, ans=0.1 +2024-09-18 22:29:36,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=535460.0, ans=0.125 +2024-09-18 22:29:37,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.95 vs. limit=12.0 +2024-09-18 22:29:38,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=535460.0, ans=0.025 +2024-09-18 22:29:41,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=535500.0, ans=0.09899494936611666 +2024-09-18 22:29:42,492 INFO [train.py:1198] (0/2) Epoch 30, batch 2650, loss[loss=0.2548, ctc_loss=0.1307, cr_loss=0.3949, attn_decoder_loss=0.2598, over 29231.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1223, cr_loss=0.3662, attn_decoder_loss=0.2445, over 5802875.23 frames. ], batch size: 100, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:30:06,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=535540.0, ans=0.125 +2024-09-18 22:30:06,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=535540.0, ans=0.1 +2024-09-18 22:30:27,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=535620.0, ans=0.025 +2024-09-18 22:30:30,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=535620.0, ans=0.125 +2024-09-18 22:30:38,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=535620.0, ans=0.125 +2024-09-18 22:30:55,370 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.645e+01 8.677e+01 9.089e+01 9.646e+01 4.909e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 22:31:00,015 INFO [train.py:1198] (0/2) Epoch 30, batch 2700, loss[loss=0.2453, ctc_loss=0.1213, cr_loss=0.36, attn_decoder_loss=0.2511, over 29529.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1225, cr_loss=0.3663, attn_decoder_loss=0.2448, over 5798336.44 frames. ], batch size: 87, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:31:03,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=535700.0, ans=0.125 +2024-09-18 22:31:03,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=535700.0, ans=0.125 +2024-09-18 22:31:27,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=535740.0, ans=0.025 +2024-09-18 22:31:34,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=535780.0, ans=0.1 +2024-09-18 22:31:42,984 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.71 vs. limit=15.0 +2024-09-18 22:31:56,446 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:32:00,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=535860.0, ans=0.2 +2024-09-18 22:32:15,740 INFO [train.py:1198] (0/2) Epoch 30, batch 2750, loss[loss=0.2235, ctc_loss=0.1093, cr_loss=0.332, attn_decoder_loss=0.2288, over 29493.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1216, cr_loss=0.3645, attn_decoder_loss=0.2437, over 5797070.69 frames. ], batch size: 75, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:32:22,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=535900.0, ans=0.025 +2024-09-18 22:32:49,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=535980.0, ans=0.1 +2024-09-18 22:33:10,363 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.06 vs. limit=15.0 +2024-09-18 22:33:12,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.08 vs. limit=22.5 +2024-09-18 22:33:14,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=536020.0, ans=0.125 +2024-09-18 22:33:15,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.68 vs. limit=15.0 +2024-09-18 22:33:29,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.131e+01 8.488e+01 8.995e+01 9.694e+01 2.537e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-18 22:33:34,348 INFO [train.py:1198] (0/2) Epoch 30, batch 2800, loss[loss=0.2636, ctc_loss=0.1532, cr_loss=0.3792, attn_decoder_loss=0.2674, over 20761.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1221, cr_loss=0.365, attn_decoder_loss=0.2438, over 5779949.49 frames. ], batch size: 209, lr: 3.68e-03, grad_scale: 16.0 +2024-09-18 22:34:07,754 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:34:15,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=536180.0, ans=0.2 +2024-09-18 22:34:38,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.29 vs. limit=15.0 +2024-09-18 22:34:47,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=536260.0, ans=0.125 +2024-09-18 22:34:51,679 INFO [train.py:1198] (0/2) Epoch 30, batch 2850, loss[loss=0.2317, ctc_loss=0.1125, cr_loss=0.3514, attn_decoder_loss=0.2371, over 29492.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1224, cr_loss=0.3654, attn_decoder_loss=0.2441, over 5765824.64 frames. ], batch size: 77, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:34:54,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.41 vs. limit=15.0 +2024-09-18 22:35:08,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.89 vs. limit=15.0 +2024-09-18 22:35:08,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=536340.0, ans=0.0 +2024-09-18 22:35:10,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=536340.0, ans=0.0 +2024-09-18 22:35:26,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=536380.0, ans=10.0 +2024-09-18 22:35:28,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=536380.0, ans=0.025 +2024-09-18 22:35:37,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=536420.0, ans=0.125 +2024-09-18 22:36:04,435 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.865e+01 8.533e+01 9.000e+01 9.896e+01 2.723e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 22:36:07,490 INFO [train.py:1198] (0/2) Epoch 30, batch 2900, loss[loss=0.2376, ctc_loss=0.1235, cr_loss=0.3767, attn_decoder_loss=0.2419, over 29409.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1227, cr_loss=0.3666, attn_decoder_loss=0.2449, over 5790456.80 frames. ], batch size: 79, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:36:10,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=536500.0, ans=0.125 +2024-09-18 22:36:18,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=536500.0, ans=0.07 +2024-09-18 22:36:21,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=536540.0, ans=0.0 +2024-09-18 22:36:30,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=536540.0, ans=0.125 +2024-09-18 22:36:44,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=536580.0, ans=0.1 +2024-09-18 22:36:50,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=536580.0, ans=0.02 +2024-09-18 22:37:01,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=536620.0, ans=0.025 +2024-09-18 22:37:11,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.22 vs. limit=22.5 +2024-09-18 22:37:25,628 INFO [train.py:1198] (0/2) Epoch 30, batch 2950, loss[loss=0.2142, ctc_loss=0.1101, cr_loss=0.3293, attn_decoder_loss=0.2184, over 29530.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1222, cr_loss=0.3651, attn_decoder_loss=0.2438, over 5784616.09 frames. ], batch size: 75, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:37:52,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-18 22:37:58,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=536780.0, ans=0.125 +2024-09-18 22:38:41,262 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.475e+01 9.079e+01 9.790e+01 2.714e+02, threshold=1.816e+02, percent-clipped=3.0 +2024-09-18 22:38:43,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=536900.0, ans=0.125 +2024-09-18 22:38:44,466 INFO [train.py:1198] (0/2) Epoch 30, batch 3000, loss[loss=0.2408, ctc_loss=0.1235, cr_loss=0.3704, attn_decoder_loss=0.2456, over 29740.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1221, cr_loss=0.3649, attn_decoder_loss=0.2437, over 5784887.35 frames. ], batch size: 81, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:38:44,467 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 22:38:53,643 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.4863, 4.0234, 4.3950, 3.9746], device='cuda:0') +2024-09-18 22:38:56,198 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([2.5319, 4.0698, 4.1271, 4.1949], device='cuda:0') +2024-09-18 22:39:02,889 INFO [train.py:1230] (0/2) Epoch 30, validation: loss=0.2118, ctc_loss=0.03796, cr_loss=5.626e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-18 22:39:02,889 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 22:39:03,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=536900.0, ans=0.125 +2024-09-18 22:39:38,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=536980.0, ans=0.125 +2024-09-18 22:39:41,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=536980.0, ans=0.0 +2024-09-18 22:39:44,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=536980.0, ans=0.125 +2024-09-18 22:39:55,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=537020.0, ans=0.125 +2024-09-18 22:40:14,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=537060.0, ans=0.0 +2024-09-18 22:40:16,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=537060.0, ans=0.125 +2024-09-18 22:40:19,085 INFO [train.py:1198] (0/2) Epoch 30, batch 3050, loss[loss=0.2295, ctc_loss=0.1135, cr_loss=0.3567, attn_decoder_loss=0.2345, over 29534.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1223, cr_loss=0.3648, attn_decoder_loss=0.2442, over 5778863.30 frames. ], batch size: 76, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:40:20,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=537100.0, ans=0.09899494936611666 +2024-09-18 22:40:32,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=537140.0, ans=0.035 +2024-09-18 22:40:34,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=537140.0, ans=0.0 +2024-09-18 22:40:56,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=537180.0, ans=0.1 +2024-09-18 22:41:02,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=537180.0, ans=0.1 +2024-09-18 22:41:10,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=537220.0, ans=0.0 +2024-09-18 22:41:23,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=537260.0, ans=0.2 +2024-09-18 22:41:33,829 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.707e+01 8.461e+01 8.902e+01 9.446e+01 1.923e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 22:41:36,775 INFO [train.py:1198] (0/2) Epoch 30, batch 3100, loss[loss=0.2524, ctc_loss=0.1265, cr_loss=0.382, attn_decoder_loss=0.2579, over 29249.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1222, cr_loss=0.3648, attn_decoder_loss=0.2438, over 5778206.77 frames. ], batch size: 100, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:41:49,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.94 vs. limit=15.0 +2024-09-18 22:41:49,741 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.42 vs. limit=15.0 +2024-09-18 22:41:55,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=537340.0, ans=0.1 +2024-09-18 22:41:55,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=537340.0, ans=0.0 +2024-09-18 22:42:05,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=537380.0, ans=0.5 +2024-09-18 22:42:20,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=537380.0, ans=0.0 +2024-09-18 22:42:33,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=537420.0, ans=0.2 +2024-09-18 22:42:36,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.65 vs. limit=5.0 +2024-09-18 22:42:54,847 INFO [train.py:1198] (0/2) Epoch 30, batch 3150, loss[loss=0.2538, ctc_loss=0.1314, cr_loss=0.3983, attn_decoder_loss=0.2585, over 28799.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1219, cr_loss=0.3642, attn_decoder_loss=0.2438, over 5783969.00 frames. ], batch size: 104, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:43:10,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=537540.0, ans=0.025 +2024-09-18 22:43:22,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=537540.0, ans=0.0 +2024-09-18 22:43:26,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.02 vs. limit=22.5 +2024-09-18 22:44:00,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=537660.0, ans=0.2 +2024-09-18 22:44:06,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=537660.0, ans=0.09899494936611666 +2024-09-18 22:44:07,778 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.272e+01 8.378e+01 8.875e+01 9.441e+01 1.254e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-18 22:44:10,848 INFO [train.py:1198] (0/2) Epoch 30, batch 3200, loss[loss=0.2406, ctc_loss=0.1215, cr_loss=0.3807, attn_decoder_loss=0.2453, over 29787.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1217, cr_loss=0.364, attn_decoder_loss=0.2435, over 5793872.66 frames. ], batch size: 80, lr: 3.68e-03, grad_scale: 16.0 +2024-09-18 22:44:16,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.13 vs. limit=6.0 +2024-09-18 22:44:31,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=537740.0, ans=0.0 +2024-09-18 22:44:37,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=537740.0, ans=0.125 +2024-09-18 22:44:53,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=537780.0, ans=0.125 +2024-09-18 22:44:57,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=537820.0, ans=0.1 +2024-09-18 22:45:01,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.08 vs. limit=15.0 +2024-09-18 22:45:21,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.83 vs. limit=22.5 +2024-09-18 22:45:28,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=537900.0, ans=0.2 +2024-09-18 22:45:29,319 INFO [train.py:1198] (0/2) Epoch 30, batch 3250, loss[loss=0.2497, ctc_loss=0.1224, cr_loss=0.3674, attn_decoder_loss=0.2557, over 29696.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1221, cr_loss=0.3651, attn_decoder_loss=0.2439, over 5800578.72 frames. ], batch size: 84, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:45:29,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.36 vs. limit=15.0 +2024-09-18 22:45:38,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=537900.0, ans=0.0 +2024-09-18 22:45:55,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=537940.0, ans=0.125 +2024-09-18 22:46:05,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=537980.0, ans=0.2 +2024-09-18 22:46:42,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=538060.0, ans=0.0 +2024-09-18 22:46:45,605 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.587e+01 8.918e+01 9.595e+01 3.976e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 22:46:45,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=538100.0, ans=0.0 +2024-09-18 22:46:47,126 INFO [train.py:1198] (0/2) Epoch 30, batch 3300, loss[loss=0.2453, ctc_loss=0.1198, cr_loss=0.3585, attn_decoder_loss=0.2513, over 28537.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1211, cr_loss=0.3632, attn_decoder_loss=0.2426, over 5798033.94 frames. ], batch size: 111, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:47:01,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=538140.0, ans=0.125 +2024-09-18 22:47:05,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=538140.0, ans=0.0 +2024-09-18 22:47:10,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=538140.0, ans=0.1 +2024-09-18 22:47:17,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=538180.0, ans=0.125 +2024-09-18 22:47:19,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.63 vs. limit=22.5 +2024-09-18 22:47:21,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-18 22:47:25,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=538180.0, ans=0.0 +2024-09-18 22:47:40,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=538220.0, ans=0.125 +2024-09-18 22:47:40,399 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:47:55,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=538260.0, ans=0.0 +2024-09-18 22:48:02,461 INFO [train.py:1198] (0/2) Epoch 30, batch 3350, loss[loss=0.2502, ctc_loss=0.1292, cr_loss=0.3852, attn_decoder_loss=0.255, over 28747.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1219, cr_loss=0.3644, attn_decoder_loss=0.2434, over 5774617.31 frames. ], batch size: 104, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:48:33,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=538380.0, ans=0.0 +2024-09-18 22:48:41,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=538380.0, ans=0.07 +2024-09-18 22:49:01,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=538420.0, ans=0.125 +2024-09-18 22:49:11,141 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.44 vs. limit=15.0 +2024-09-18 22:49:19,305 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.711e+01 9.247e+01 9.714e+01 4.351e+02, threshold=1.849e+02, percent-clipped=3.0 +2024-09-18 22:49:19,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=538500.0, ans=0.125 +2024-09-18 22:49:20,832 INFO [train.py:1198] (0/2) Epoch 30, batch 3400, loss[loss=0.2146, ctc_loss=0.107, cr_loss=0.3354, attn_decoder_loss=0.2191, over 29338.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1217, cr_loss=0.3642, attn_decoder_loss=0.2433, over 5767949.64 frames. ], batch size: 67, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:49:33,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=538500.0, ans=0.04949747468305833 +2024-09-18 22:50:12,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.92 vs. limit=22.5 +2024-09-18 22:50:19,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=538620.0, ans=0.125 +2024-09-18 22:50:22,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=538660.0, ans=0.125 +2024-09-18 22:50:38,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.57 vs. limit=22.5 +2024-09-18 22:50:38,778 INFO [train.py:1198] (0/2) Epoch 30, batch 3450, loss[loss=0.2516, ctc_loss=0.1288, cr_loss=0.3639, attn_decoder_loss=0.2572, over 28277.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1218, cr_loss=0.3641, attn_decoder_loss=0.2435, over 5775385.41 frames. ], batch size: 111, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:50:43,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=538700.0, ans=0.0 +2024-09-18 22:50:46,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.86 vs. limit=15.0 +2024-09-18 22:51:05,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.48 vs. limit=15.0 +2024-09-18 22:51:35,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=538820.0, ans=0.125 +2024-09-18 22:51:52,847 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.505e+01 9.176e+01 9.588e+01 2.343e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-18 22:51:54,378 INFO [train.py:1198] (0/2) Epoch 30, batch 3500, loss[loss=0.2258, ctc_loss=0.1123, cr_loss=0.3466, attn_decoder_loss=0.2307, over 29337.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1215, cr_loss=0.364, attn_decoder_loss=0.2432, over 5777113.79 frames. ], batch size: 71, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:51:56,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=538900.0, ans=0.125 +2024-09-18 22:52:08,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=538940.0, ans=0.0 +2024-09-18 22:52:55,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=539060.0, ans=0.0 +2024-09-18 22:53:00,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=539060.0, ans=0.125 +2024-09-18 22:53:07,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=539060.0, ans=0.0 +2024-09-18 22:53:08,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=539060.0, ans=0.125 +2024-09-18 22:53:11,184 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.26 vs. limit=10.0 +2024-09-18 22:53:11,561 INFO [train.py:1198] (0/2) Epoch 30, batch 3550, loss[loss=0.2456, ctc_loss=0.1216, cr_loss=0.3781, attn_decoder_loss=0.251, over 29701.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1216, cr_loss=0.3645, attn_decoder_loss=0.2436, over 5783166.84 frames. ], batch size: 89, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:53:48,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=539180.0, ans=10.0 +2024-09-18 22:54:09,178 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.28 vs. limit=15.0 +2024-09-18 22:54:14,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=539260.0, ans=0.125 +2024-09-18 22:54:26,534 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.244e+01 8.421e+01 8.886e+01 9.459e+01 1.383e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-18 22:54:28,091 INFO [train.py:1198] (0/2) Epoch 30, batch 3600, loss[loss=0.2211, ctc_loss=0.1068, cr_loss=0.3317, attn_decoder_loss=0.2264, over 29479.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1213, cr_loss=0.3642, attn_decoder_loss=0.2434, over 5792451.98 frames. ], batch size: 77, lr: 3.67e-03, grad_scale: 16.0 +2024-09-18 22:54:29,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=539300.0, ans=0.025 +2024-09-18 22:54:41,760 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:54:47,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=539340.0, ans=0.2 +2024-09-18 22:55:25,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=15.0 +2024-09-18 22:55:26,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=539460.0, ans=0.2 +2024-09-18 22:55:29,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=539460.0, ans=0.125 +2024-09-18 22:55:29,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=539460.0, ans=0.125 +2024-09-18 22:55:42,275 INFO [train.py:1198] (0/2) Epoch 30, batch 3650, loss[loss=0.25, ctc_loss=0.125, cr_loss=0.3825, attn_decoder_loss=0.2554, over 29510.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.121, cr_loss=0.3634, attn_decoder_loss=0.243, over 5794866.27 frames. ], batch size: 90, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:55:42,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=539500.0, ans=0.125 +2024-09-18 22:55:43,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=539500.0, ans=0.125 +2024-09-18 22:55:57,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=539540.0, ans=0.0 +2024-09-18 22:56:02,291 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.95 vs. limit=15.0 +2024-09-18 22:56:10,176 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.11 vs. limit=15.0 +2024-09-18 22:56:18,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=539580.0, ans=0.125 +2024-09-18 22:56:22,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=539580.0, ans=0.2 +2024-09-18 22:56:56,905 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.540e+01 9.082e+01 9.609e+01 1.779e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:56:56,927 INFO [train.py:1198] (0/2) Epoch 30, batch 3700, loss[loss=0.2544, ctc_loss=0.131, cr_loss=0.3829, attn_decoder_loss=0.2596, over 29680.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.121, cr_loss=0.363, attn_decoder_loss=0.2431, over 5803902.89 frames. ], batch size: 84, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:57:01,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=539700.0, ans=0.05 +2024-09-18 22:57:08,266 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.35 vs. limit=15.0 +2024-09-18 22:57:10,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=539740.0, ans=0.125 +2024-09-18 22:57:15,613 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:57:21,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=539740.0, ans=0.0 +2024-09-18 22:57:26,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-18 22:57:27,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=539780.0, ans=0.125 +2024-09-18 22:57:29,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.96 vs. limit=15.0 +2024-09-18 22:57:36,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-18 22:58:07,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=539860.0, ans=0.0 +2024-09-18 22:58:11,625 INFO [train.py:1198] (0/2) Epoch 30, batch 3750, loss[loss=0.2169, ctc_loss=0.1089, cr_loss=0.3235, attn_decoder_loss=0.2217, over 29344.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.121, cr_loss=0.3635, attn_decoder_loss=0.2432, over 5808013.38 frames. ], batch size: 67, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:58:15,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.19 vs. limit=22.5 +2024-09-18 22:58:29,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=539940.0, ans=0.2 +2024-09-18 22:58:34,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=539940.0, ans=0.125 +2024-09-18 22:58:41,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=539980.0, ans=0.0 +2024-09-18 22:58:45,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=539980.0, ans=0.0 +2024-09-18 22:59:00,793 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.81 vs. limit=12.0 +2024-09-18 22:59:21,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=540060.0, ans=0.0 +2024-09-18 22:59:28,014 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.475e+01 8.896e+01 9.603e+01 2.511e+02, threshold=1.779e+02, percent-clipped=2.0 +2024-09-18 22:59:28,041 INFO [train.py:1198] (0/2) Epoch 30, batch 3800, loss[loss=0.2523, ctc_loss=0.1307, cr_loss=0.3797, attn_decoder_loss=0.2573, over 29630.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1208, cr_loss=0.3624, attn_decoder_loss=0.2428, over 5798288.53 frames. ], batch size: 86, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:59:28,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=540100.0, ans=0.0 +2024-09-18 22:59:40,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.76 vs. limit=22.5 +2024-09-18 22:59:48,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=540140.0, ans=0.125 +2024-09-18 22:59:58,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=540180.0, ans=0.1 +2024-09-18 23:00:13,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.01 vs. limit=22.5 +2024-09-18 23:00:15,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=540220.0, ans=0.0 +2024-09-18 23:00:39,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=540260.0, ans=0.0 +2024-09-18 23:00:42,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=540300.0, ans=0.125 +2024-09-18 23:00:42,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=540300.0, ans=0.125 +2024-09-18 23:00:44,096 INFO [train.py:1198] (0/2) Epoch 30, batch 3850, loss[loss=0.2602, ctc_loss=0.1385, cr_loss=0.4049, attn_decoder_loss=0.2647, over 29240.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1209, cr_loss=0.3625, attn_decoder_loss=0.2428, over 5812306.55 frames. ], batch size: 100, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:00:53,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=540300.0, ans=0.125 +2024-09-18 23:01:09,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=540340.0, ans=0.2 +2024-09-18 23:01:21,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=540380.0, ans=0.2 +2024-09-18 23:01:31,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.26 vs. limit=10.0 +2024-09-18 23:01:39,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=540420.0, ans=0.125 +2024-09-18 23:01:56,580 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.68 vs. limit=10.0 +2024-09-18 23:01:58,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.853e+01 8.547e+01 9.033e+01 9.737e+01 1.184e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-18 23:01:58,784 INFO [train.py:1198] (0/2) Epoch 30, batch 3900, loss[loss=0.2488, ctc_loss=0.1192, cr_loss=0.3517, attn_decoder_loss=0.2554, over 29626.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1214, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5816424.36 frames. ], batch size: 86, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:02:21,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.12 vs. limit=22.5 +2024-09-18 23:02:45,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.48 vs. limit=22.5 +2024-09-18 23:02:49,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=540620.0, ans=0.125 +2024-09-18 23:02:53,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.80 vs. limit=15.0 +2024-09-18 23:03:13,096 INFO [train.py:1198] (0/2) Epoch 30, batch 3950, loss[loss=0.2595, ctc_loss=0.1455, cr_loss=0.4014, attn_decoder_loss=0.2632, over 29476.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1211, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5835732.16 frames. ], batch size: 97, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:03:38,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=540740.0, ans=0.0 +2024-09-18 23:03:38,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.89 vs. limit=15.0 +2024-09-18 23:04:05,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=540820.0, ans=0.015 +2024-09-18 23:04:19,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.48 vs. limit=15.0 +2024-09-18 23:04:27,383 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.475e+01 8.885e+01 9.495e+01 1.627e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-18 23:04:27,404 INFO [train.py:1198] (0/2) Epoch 30, batch 4000, loss[loss=0.2307, ctc_loss=0.1101, cr_loss=0.3418, attn_decoder_loss=0.2366, over 29507.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1212, cr_loss=0.3631, attn_decoder_loss=0.2432, over 5812206.88 frames. ], batch size: 74, lr: 3.67e-03, grad_scale: 16.0 +2024-09-18 23:04:29,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=540900.0, ans=0.1 +2024-09-18 23:04:29,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=540900.0, ans=0.0 +2024-09-18 23:04:44,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.89 vs. limit=22.5 +2024-09-18 23:05:00,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.77 vs. limit=15.0 +2024-09-18 23:05:03,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=540980.0, ans=0.1 +2024-09-18 23:05:22,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.34 vs. limit=22.5 +2024-09-18 23:05:25,622 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:05:43,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=541100.0, ans=0.125 +2024-09-18 23:05:44,346 INFO [train.py:1198] (0/2) Epoch 30, batch 4050, loss[loss=0.2547, ctc_loss=0.1415, cr_loss=0.3694, attn_decoder_loss=0.259, over 20967.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1214, cr_loss=0.3633, attn_decoder_loss=0.2431, over 5796941.83 frames. ], batch size: 210, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:06:46,093 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:06:53,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=541260.0, ans=0.125 +2024-09-18 23:06:57,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.51 vs. limit=15.0 +2024-09-18 23:06:57,913 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.018e+01 8.748e+01 9.261e+01 9.930e+01 1.570e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-18 23:06:57,939 INFO [train.py:1198] (0/2) Epoch 30, batch 4100, loss[loss=0.2551, ctc_loss=0.1346, cr_loss=0.3932, attn_decoder_loss=0.2597, over 29528.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1214, cr_loss=0.3631, attn_decoder_loss=0.2431, over 5792719.84 frames. ], batch size: 90, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:07:02,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.75 vs. limit=15.0 +2024-09-18 23:07:26,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=541380.0, ans=0.125 +2024-09-18 23:08:07,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=541460.0, ans=0.0 +2024-09-18 23:08:11,980 INFO [train.py:1198] (0/2) Epoch 30, batch 4150, loss[loss=0.2294, ctc_loss=0.1118, cr_loss=0.3381, attn_decoder_loss=0.2349, over 29508.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1211, cr_loss=0.3626, attn_decoder_loss=0.2428, over 5798068.09 frames. ], batch size: 77, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:08:18,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=541500.0, ans=0.0 +2024-09-18 23:08:19,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=541500.0, ans=0.0 +2024-09-18 23:08:27,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=541540.0, ans=0.125 +2024-09-18 23:08:34,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=541540.0, ans=0.125 +2024-09-18 23:08:39,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.04 vs. limit=15.0 +2024-09-18 23:09:05,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=541620.0, ans=0.0 +2024-09-18 23:09:27,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.80 vs. limit=6.0 +2024-09-18 23:09:28,243 INFO [train.py:1198] (0/2) Epoch 30, batch 4200, loss[loss=0.2541, ctc_loss=0.149, cr_loss=0.4278, attn_decoder_loss=0.2562, over 29494.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1212, cr_loss=0.3626, attn_decoder_loss=0.243, over 5799197.27 frames. ], batch size: 90, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:09:28,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=541700.0, ans=0.0 +2024-09-18 23:09:29,682 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.353e+01 8.390e+01 9.004e+01 9.409e+01 1.747e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 23:09:29,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=541700.0, ans=0.1 +2024-09-18 23:09:32,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=541700.0, ans=0.2 +2024-09-18 23:09:43,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=541740.0, ans=0.125 +2024-09-18 23:09:50,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=541740.0, ans=0.2 +2024-09-18 23:09:55,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=541740.0, ans=0.2 +2024-09-18 23:09:59,814 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.29 vs. limit=22.5 +2024-09-18 23:10:28,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=541860.0, ans=0.2 +2024-09-18 23:10:34,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=541860.0, ans=0.0 +2024-09-18 23:10:40,665 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:10:41,862 INFO [train.py:1198] (0/2) Epoch 30, batch 4250, loss[loss=0.2245, ctc_loss=0.1065, cr_loss=0.3455, attn_decoder_loss=0.2299, over 29514.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1208, cr_loss=0.362, attn_decoder_loss=0.243, over 5804535.28 frames. ], batch size: 74, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:10:50,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=541900.0, ans=0.1 +2024-09-18 23:10:53,136 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.25 vs. limit=15.0 +2024-09-18 23:10:53,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=541900.0, ans=0.2 +2024-09-18 23:10:58,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=541940.0, ans=0.125 +2024-09-18 23:11:23,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=541980.0, ans=0.025 +2024-09-18 23:11:25,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=542020.0, ans=0.125 +2024-09-18 23:11:45,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=542060.0, ans=0.125 +2024-09-18 23:11:55,755 INFO [train.py:1198] (0/2) Epoch 30, batch 4300, loss[loss=0.2369, ctc_loss=0.1128, cr_loss=0.3436, attn_decoder_loss=0.2431, over 29518.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.121, cr_loss=0.3626, attn_decoder_loss=0.2434, over 5794024.15 frames. ], batch size: 87, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:11:57,300 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.934e+01 8.627e+01 9.132e+01 9.730e+01 6.693e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-18 23:13:11,892 INFO [train.py:1198] (0/2) Epoch 30, batch 4350, loss[loss=0.2539, ctc_loss=0.1375, cr_loss=0.398, attn_decoder_loss=0.258, over 29436.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1239, cr_loss=0.3684, attn_decoder_loss=0.2467, over 5796300.32 frames. ], batch size: 97, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:13:26,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.94 vs. limit=15.0 +2024-09-18 23:13:38,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=542340.0, ans=0.1 +2024-09-18 23:13:51,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.73 vs. limit=15.0 +2024-09-18 23:13:56,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=542420.0, ans=0.0 +2024-09-18 23:14:02,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=542420.0, ans=0.125 +2024-09-18 23:14:09,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=542460.0, ans=0.125 +2024-09-18 23:14:22,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=542460.0, ans=0.1 +2024-09-18 23:14:25,121 INFO [train.py:1198] (0/2) Epoch 30, batch 4400, loss[loss=0.2425, ctc_loss=0.1273, cr_loss=0.3731, attn_decoder_loss=0.247, over 27177.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1252, cr_loss=0.3714, attn_decoder_loss=0.2487, over 5766987.37 frames. ], batch size: 124, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:14:26,525 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.984e+01 8.805e+01 9.147e+01 9.646e+01 3.836e+02, threshold=1.829e+02, percent-clipped=2.0 +2024-09-18 23:14:32,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=542500.0, ans=0.125 +2024-09-18 23:14:53,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=542580.0, ans=0.125 +2024-09-18 23:15:35,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=542660.0, ans=0.0 +2024-09-18 23:15:35,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=542660.0, ans=0.0 +2024-09-18 23:15:39,905 INFO [train.py:1198] (0/2) Epoch 30, batch 4450, loss[loss=0.2518, ctc_loss=0.1462, cr_loss=0.3916, attn_decoder_loss=0.2549, over 20155.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1289, cr_loss=0.3762, attn_decoder_loss=0.2507, over 5574486.38 frames. ], batch size: 209, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:15:50,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=542700.0, ans=0.125 +2024-09-18 23:15:52,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.92 vs. limit=15.0 +2024-09-18 23:15:54,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=542740.0, ans=0.125 +2024-09-18 23:15:55,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=542740.0, ans=0.2 +2024-09-18 23:16:09,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.42 vs. limit=15.0 +2024-09-18 23:16:18,863 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=10.12 vs. limit=15.0 +2024-09-18 23:16:21,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=542780.0, ans=0.07 +2024-09-18 23:16:30,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=542820.0, ans=0.0 +2024-09-18 23:16:31,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=542820.0, ans=0.125 +2024-09-18 23:16:50,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=542860.0, ans=0.1 +2024-09-18 23:16:53,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.82 vs. limit=10.0 +2024-09-18 23:16:54,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=542900.0, ans=0.125 +2024-09-18 23:16:55,901 INFO [train.py:1198] (0/2) Epoch 30, batch 4500, loss[loss=0.2598, ctc_loss=0.1451, cr_loss=0.3758, attn_decoder_loss=0.2642, over 20538.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1325, cr_loss=0.3786, attn_decoder_loss=0.2526, over 5232450.92 frames. ], batch size: 209, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:16:58,807 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 9.634e+01 1.118e+02 1.226e+02 1.647e+02, threshold=2.235e+02, percent-clipped=0.0 +2024-09-18 23:17:11,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=542940.0, ans=0.125 +2024-09-18 23:17:18,581 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:17:30,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=542980.0, ans=0.025 +2024-09-18 23:17:32,829 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-30.pt +2024-09-18 23:18:19,544 INFO [train.py:1198] (0/2) Epoch 31, batch 0, loss[loss=0.2182, ctc_loss=0.1054, cr_loss=0.3214, attn_decoder_loss=0.2236, over 29607.00 frames. ], tot_loss[loss=0.2182, ctc_loss=0.1054, cr_loss=0.3214, attn_decoder_loss=0.2236, over 29607.00 frames. ], batch size: 73, lr: 3.60e-03, grad_scale: 16.0 +2024-09-18 23:18:19,545 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-18 23:18:37,943 INFO [train.py:1230] (0/2) Epoch 31, validation: loss=0.2119, ctc_loss=0.03668, cr_loss=5.946e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-18 23:18:37,944 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-18 23:18:39,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=543000.0, ans=0.125 +2024-09-18 23:19:01,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.17 vs. limit=22.5 +2024-09-18 23:19:13,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=543080.0, ans=10.0 +2024-09-18 23:19:20,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=543080.0, ans=0.0 +2024-09-18 23:19:25,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=543120.0, ans=0.05 +2024-09-18 23:19:31,004 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:19:56,000 INFO [train.py:1198] (0/2) Epoch 31, batch 50, loss[loss=0.2126, ctc_loss=0.1048, cr_loss=0.3216, attn_decoder_loss=0.2175, over 29416.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1235, cr_loss=0.3683, attn_decoder_loss=0.2447, over 1267633.91 frames. ], batch size: 70, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:19:57,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=543200.0, ans=0.0 +2024-09-18 23:20:06,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=543200.0, ans=0.0 +2024-09-18 23:20:31,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.16 vs. limit=15.0 +2024-09-18 23:20:36,124 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:20:38,049 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.40 vs. limit=6.0 +2024-09-18 23:20:38,772 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.848e+01 9.634e+01 1.110e+02 1.417e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-18 23:21:06,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.89 vs. limit=12.0 +2024-09-18 23:21:12,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.75 vs. limit=15.0 +2024-09-18 23:21:13,386 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:21:14,584 INFO [train.py:1198] (0/2) Epoch 31, batch 100, loss[loss=0.2388, ctc_loss=0.1323, cr_loss=0.3787, attn_decoder_loss=0.2422, over 29521.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1248, cr_loss=0.3697, attn_decoder_loss=0.2463, over 2252189.91 frames. ], batch size: 76, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:21:36,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=15.0 +2024-09-18 23:21:38,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=543440.0, ans=0.125 +2024-09-18 23:21:52,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=543480.0, ans=0.125 +2024-09-18 23:21:55,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=543480.0, ans=0.125 +2024-09-18 23:22:00,694 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.43 vs. limit=5.0 +2024-09-18 23:22:09,396 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.04 vs. limit=12.0 +2024-09-18 23:22:17,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=543560.0, ans=0.125 +2024-09-18 23:22:29,484 INFO [train.py:1198] (0/2) Epoch 31, batch 150, loss[loss=0.2196, ctc_loss=0.1082, cr_loss=0.354, attn_decoder_loss=0.2241, over 29437.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1218, cr_loss=0.365, attn_decoder_loss=0.2435, over 3046565.01 frames. ], batch size: 70, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:22:52,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=543640.0, ans=0.2 +2024-09-18 23:23:11,517 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.390e+01 8.450e+01 8.920e+01 9.351e+01 1.507e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 23:23:17,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=543720.0, ans=0.125 +2024-09-18 23:23:24,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.38 vs. limit=12.0 +2024-09-18 23:23:47,251 INFO [train.py:1198] (0/2) Epoch 31, batch 200, loss[loss=0.2504, ctc_loss=0.1305, cr_loss=0.38, attn_decoder_loss=0.2553, over 27379.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1211, cr_loss=0.3639, attn_decoder_loss=0.2427, over 3659581.94 frames. ], batch size: 124, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:23:49,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.68 vs. limit=15.0 +2024-09-18 23:23:52,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.29 vs. limit=15.0 +2024-09-18 23:24:05,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=543840.0, ans=0.125 +2024-09-18 23:24:26,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=543880.0, ans=0.125 +2024-09-18 23:24:32,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=543920.0, ans=0.125 +2024-09-18 23:25:04,443 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-136000.pt +2024-09-18 23:25:13,221 INFO [train.py:1198] (0/2) Epoch 31, batch 250, loss[loss=0.2597, ctc_loss=0.1384, cr_loss=0.4089, attn_decoder_loss=0.2641, over 29265.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1211, cr_loss=0.364, attn_decoder_loss=0.2429, over 4142728.68 frames. ], batch size: 100, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:25:42,283 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:25:45,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=544080.0, ans=0.5 +2024-09-18 23:25:45,553 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.73 vs. limit=15.0 +2024-09-18 23:25:48,948 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.22 vs. limit=15.0 +2024-09-18 23:25:55,606 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.468e+01 8.439e+01 8.894e+01 9.430e+01 6.449e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-18 23:25:56,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.85 vs. limit=22.5 +2024-09-18 23:26:12,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=544160.0, ans=0.0 +2024-09-18 23:26:13,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=544160.0, ans=0.0 +2024-09-18 23:26:28,640 INFO [train.py:1198] (0/2) Epoch 31, batch 300, loss[loss=0.2516, ctc_loss=0.1321, cr_loss=0.3944, attn_decoder_loss=0.2562, over 29544.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1207, cr_loss=0.3628, attn_decoder_loss=0.2424, over 4511344.09 frames. ], batch size: 92, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:26:59,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=544280.0, ans=0.025 +2024-09-18 23:27:08,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=544280.0, ans=0.125 +2024-09-18 23:27:16,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.71 vs. limit=10.0 +2024-09-18 23:27:35,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=544360.0, ans=0.125 +2024-09-18 23:27:41,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=544360.0, ans=0.1 +2024-09-18 23:27:46,735 INFO [train.py:1198] (0/2) Epoch 31, batch 350, loss[loss=0.2147, ctc_loss=0.1023, cr_loss=0.3191, attn_decoder_loss=0.2201, over 29321.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1212, cr_loss=0.3633, attn_decoder_loss=0.2433, over 4797360.95 frames. ], batch size: 71, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:27:46,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=544400.0, ans=0.125 +2024-09-18 23:28:06,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=544440.0, ans=0.1 +2024-09-18 23:28:14,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.31 vs. limit=12.0 +2024-09-18 23:28:15,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.18 vs. limit=22.5 +2024-09-18 23:28:28,468 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 8.389e+01 8.860e+01 9.607e+01 2.348e+02, threshold=1.772e+02, percent-clipped=3.0 +2024-09-18 23:28:39,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=544520.0, ans=0.1 +2024-09-18 23:29:01,560 INFO [train.py:1198] (0/2) Epoch 31, batch 400, loss[loss=0.2369, ctc_loss=0.1173, cr_loss=0.3738, attn_decoder_loss=0.2419, over 29712.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1209, cr_loss=0.3624, attn_decoder_loss=0.2432, over 5025681.81 frames. ], batch size: 82, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:29:10,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=544600.0, ans=0.125 +2024-09-18 23:29:39,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.66 vs. limit=15.0 +2024-09-18 23:29:48,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=544720.0, ans=0.125 +2024-09-18 23:29:54,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=12.0 +2024-09-18 23:30:13,206 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.60 vs. limit=15.0 +2024-09-18 23:30:20,324 INFO [train.py:1198] (0/2) Epoch 31, batch 450, loss[loss=0.2429, ctc_loss=0.1241, cr_loss=0.3749, attn_decoder_loss=0.2478, over 29705.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1207, cr_loss=0.3626, attn_decoder_loss=0.243, over 5188156.66 frames. ], batch size: 83, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:30:20,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=544800.0, ans=0.1 +2024-09-18 23:30:35,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.38 vs. limit=15.0 +2024-09-18 23:31:04,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.382e+01 8.525e+01 8.811e+01 9.438e+01 1.510e+02, threshold=1.762e+02, percent-clipped=0.0 +2024-09-18 23:31:15,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=544920.0, ans=0.0 +2024-09-18 23:31:16,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=544920.0, ans=0.125 +2024-09-18 23:31:38,407 INFO [train.py:1198] (0/2) Epoch 31, batch 500, loss[loss=0.2518, ctc_loss=0.1337, cr_loss=0.398, attn_decoder_loss=0.2561, over 29426.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1206, cr_loss=0.3628, attn_decoder_loss=0.2425, over 5330485.22 frames. ], batch size: 94, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:31:46,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=545000.0, ans=0.125 +2024-09-18 23:31:46,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=545000.0, ans=0.125 +2024-09-18 23:31:48,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=545000.0, ans=0.1 +2024-09-18 23:31:53,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.75 vs. limit=12.0 +2024-09-18 23:32:18,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=545080.0, ans=0.1 +2024-09-18 23:32:25,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=545120.0, ans=0.0 +2024-09-18 23:32:39,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=545160.0, ans=0.125 +2024-09-18 23:32:54,166 INFO [train.py:1198] (0/2) Epoch 31, batch 550, loss[loss=0.2603, ctc_loss=0.1433, cr_loss=0.4057, attn_decoder_loss=0.2643, over 28753.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1206, cr_loss=0.362, attn_decoder_loss=0.2425, over 5423482.42 frames. ], batch size: 104, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:33:06,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.44 vs. limit=15.0 +2024-09-18 23:33:40,334 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.523e+01 8.948e+01 9.609e+01 1.463e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 23:34:12,442 INFO [train.py:1198] (0/2) Epoch 31, batch 600, loss[loss=0.2486, ctc_loss=0.1278, cr_loss=0.3734, attn_decoder_loss=0.2537, over 29279.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.121, cr_loss=0.3632, attn_decoder_loss=0.2429, over 5511217.78 frames. ], batch size: 100, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:34:12,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=545400.0, ans=0.125 +2024-09-18 23:34:15,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=545400.0, ans=0.125 +2024-09-18 23:34:35,483 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=12.0 +2024-09-18 23:34:37,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.52 vs. limit=15.0 +2024-09-18 23:34:52,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.61 vs. limit=15.0 +2024-09-18 23:35:06,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=545520.0, ans=0.2 +2024-09-18 23:35:30,097 INFO [train.py:1198] (0/2) Epoch 31, batch 650, loss[loss=0.2441, ctc_loss=0.1251, cr_loss=0.3792, attn_decoder_loss=0.2489, over 29772.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1206, cr_loss=0.3625, attn_decoder_loss=0.2422, over 5588205.08 frames. ], batch size: 81, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:35:31,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=545600.0, ans=0.125 +2024-09-18 23:35:42,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=545600.0, ans=0.1 +2024-09-18 23:35:57,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=545640.0, ans=0.1 +2024-09-18 23:36:03,257 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.14 vs. limit=8.0 +2024-09-18 23:36:14,178 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.324e+01 8.831e+01 9.249e+01 1.386e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-18 23:36:37,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=545760.0, ans=0.05 +2024-09-18 23:36:44,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-18 23:36:46,087 INFO [train.py:1198] (0/2) Epoch 31, batch 700, loss[loss=0.2403, ctc_loss=0.128, cr_loss=0.3898, attn_decoder_loss=0.2441, over 29533.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1209, cr_loss=0.3636, attn_decoder_loss=0.2428, over 5636073.64 frames. ], batch size: 76, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:36:47,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=545800.0, ans=0.1 +2024-09-18 23:37:52,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=545960.0, ans=0.2 +2024-09-18 23:37:58,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=545960.0, ans=0.125 +2024-09-18 23:38:04,178 INFO [train.py:1198] (0/2) Epoch 31, batch 750, loss[loss=0.2527, ctc_loss=0.127, cr_loss=0.3908, attn_decoder_loss=0.258, over 29700.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1209, cr_loss=0.3636, attn_decoder_loss=0.2428, over 5676180.18 frames. ], batch size: 82, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:38:15,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=546000.0, ans=0.125 +2024-09-18 23:38:47,926 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.065e+01 8.604e+01 9.058e+01 9.496e+01 1.707e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 23:38:51,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=546120.0, ans=0.1 +2024-09-18 23:38:52,811 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:39:03,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.69 vs. limit=22.5 +2024-09-18 23:39:12,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=546160.0, ans=0.0 +2024-09-18 23:39:19,499 INFO [train.py:1198] (0/2) Epoch 31, batch 800, loss[loss=0.2123, ctc_loss=0.09193, cr_loss=0.2874, attn_decoder_loss=0.2193, over 29603.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1208, cr_loss=0.3627, attn_decoder_loss=0.2428, over 5706081.86 frames. ], batch size: 73, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:39:32,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=546200.0, ans=0.2 +2024-09-18 23:39:44,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=546240.0, ans=0.125 +2024-09-18 23:40:04,914 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-18 23:40:37,652 INFO [train.py:1198] (0/2) Epoch 31, batch 850, loss[loss=0.2511, ctc_loss=0.1249, cr_loss=0.3802, attn_decoder_loss=0.2567, over 29710.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1202, cr_loss=0.3616, attn_decoder_loss=0.2423, over 5734567.76 frames. ], batch size: 89, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:40:37,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=546400.0, ans=0.0 +2024-09-18 23:40:42,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=546400.0, ans=0.1 +2024-09-18 23:40:48,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=546400.0, ans=0.125 +2024-09-18 23:40:58,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.99 vs. limit=6.0 +2024-09-18 23:40:58,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.whiten.whitening_limit, batch_count=546440.0, ans=12.0 +2024-09-18 23:41:02,834 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.35 vs. limit=15.0 +2024-09-18 23:41:22,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=546480.0, ans=0.125 +2024-09-18 23:41:22,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=546480.0, ans=0.05 +2024-09-18 23:41:23,561 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.645e+01 9.090e+01 9.691e+01 3.180e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-18 23:41:45,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=546560.0, ans=0.0 +2024-09-18 23:41:46,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=546560.0, ans=0.0 +2024-09-18 23:41:55,485 INFO [train.py:1198] (0/2) Epoch 31, batch 900, loss[loss=0.2214, ctc_loss=0.1111, cr_loss=0.3564, attn_decoder_loss=0.2258, over 29595.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1207, cr_loss=0.3624, attn_decoder_loss=0.2427, over 5740075.65 frames. ], batch size: 73, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:42:09,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=546640.0, ans=0.125 +2024-09-18 23:42:10,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=546640.0, ans=0.0 +2024-09-18 23:42:16,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=546640.0, ans=0.125 +2024-09-18 23:42:52,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=546720.0, ans=0.2 +2024-09-18 23:42:58,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=546760.0, ans=0.2 +2024-09-18 23:43:07,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=546760.0, ans=0.125 +2024-09-18 23:43:10,547 INFO [train.py:1198] (0/2) Epoch 31, batch 950, loss[loss=0.2157, ctc_loss=0.1115, cr_loss=0.3462, attn_decoder_loss=0.2196, over 29543.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3623, attn_decoder_loss=0.2427, over 5741276.83 frames. ], batch size: 74, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:43:23,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=546800.0, ans=0.1 +2024-09-18 23:43:57,153 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:43:58,298 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.530e+01 9.181e+01 9.954e+01 1.509e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-18 23:43:59,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.75 vs. limit=15.0 +2024-09-18 23:44:06,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=546920.0, ans=0.125 +2024-09-18 23:44:23,222 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.33 vs. limit=15.0 +2024-09-18 23:44:28,412 INFO [train.py:1198] (0/2) Epoch 31, batch 1000, loss[loss=0.2298, ctc_loss=0.1151, cr_loss=0.355, attn_decoder_loss=0.2347, over 29483.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1214, cr_loss=0.3639, attn_decoder_loss=0.2434, over 5734371.93 frames. ], batch size: 77, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:44:45,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=547040.0, ans=0.1 +2024-09-18 23:44:54,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=547040.0, ans=0.125 +2024-09-18 23:45:00,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=547080.0, ans=0.0 +2024-09-18 23:45:06,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=547080.0, ans=0.0 +2024-09-18 23:45:14,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=15.0 +2024-09-18 23:45:47,380 INFO [train.py:1198] (0/2) Epoch 31, batch 1050, loss[loss=0.2485, ctc_loss=0.133, cr_loss=0.3906, attn_decoder_loss=0.2526, over 29675.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1211, cr_loss=0.3631, attn_decoder_loss=0.243, over 5742972.28 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:46:10,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=547240.0, ans=0.2 +2024-09-18 23:46:14,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-18 23:46:21,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=547280.0, ans=10.0 +2024-09-18 23:46:33,059 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.494e+01 8.412e+01 9.049e+01 9.703e+01 1.961e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-18 23:46:36,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.59 vs. limit=15.0 +2024-09-18 23:46:42,512 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:46:42,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=547320.0, ans=0.0 +2024-09-18 23:47:03,202 INFO [train.py:1198] (0/2) Epoch 31, batch 1100, loss[loss=0.2407, ctc_loss=0.1228, cr_loss=0.3709, attn_decoder_loss=0.2456, over 29442.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1206, cr_loss=0.3621, attn_decoder_loss=0.2425, over 5756269.53 frames. ], batch size: 78, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:47:25,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=547440.0, ans=0.2 +2024-09-18 23:47:25,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=547440.0, ans=0.2 +2024-09-18 23:47:51,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=547520.0, ans=0.1 +2024-09-18 23:47:51,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.07 vs. limit=22.5 +2024-09-18 23:48:08,266 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:48:09,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=547560.0, ans=0.2 +2024-09-18 23:48:11,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=547560.0, ans=0.125 +2024-09-18 23:48:12,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=547560.0, ans=0.125 +2024-09-18 23:48:21,595 INFO [train.py:1198] (0/2) Epoch 31, batch 1150, loss[loss=0.2376, ctc_loss=0.1253, cr_loss=0.3756, attn_decoder_loss=0.2418, over 29451.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1204, cr_loss=0.3614, attn_decoder_loss=0.2422, over 5755095.50 frames. ], batch size: 78, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:48:25,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=547600.0, ans=0.0 +2024-09-18 23:48:43,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=547640.0, ans=0.125 +2024-09-18 23:48:57,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=547680.0, ans=0.05 +2024-09-18 23:48:57,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=547680.0, ans=0.0 +2024-09-18 23:49:09,381 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.218e+01 8.554e+01 9.013e+01 9.585e+01 3.112e+02, threshold=1.803e+02, percent-clipped=2.0 +2024-09-18 23:49:30,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=547760.0, ans=0.0 +2024-09-18 23:49:39,573 INFO [train.py:1198] (0/2) Epoch 31, batch 1200, loss[loss=0.2406, ctc_loss=0.1133, cr_loss=0.3605, attn_decoder_loss=0.2468, over 29661.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1208, cr_loss=0.3626, attn_decoder_loss=0.2429, over 5747212.38 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:49:53,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=547840.0, ans=0.07 +2024-09-18 23:50:29,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.32 vs. limit=15.0 +2024-09-18 23:50:35,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.02 vs. limit=6.0 +2024-09-18 23:50:42,426 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:50:42,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=547960.0, ans=0.2 +2024-09-18 23:50:42,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=547960.0, ans=0.1 +2024-09-18 23:50:58,373 INFO [train.py:1198] (0/2) Epoch 31, batch 1250, loss[loss=0.2598, ctc_loss=0.1411, cr_loss=0.4142, attn_decoder_loss=0.2638, over 29514.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1212, cr_loss=0.3633, attn_decoder_loss=0.2436, over 5774348.56 frames. ], batch size: 92, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:51:01,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=548000.0, ans=0.125 +2024-09-18 23:51:03,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=548000.0, ans=0.125 +2024-09-18 23:51:03,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.15 vs. limit=10.0 +2024-09-18 23:51:12,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=548040.0, ans=0.1 +2024-09-18 23:51:18,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=548040.0, ans=0.125 +2024-09-18 23:51:25,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.67 vs. limit=22.5 +2024-09-18 23:51:43,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.406e+01 8.800e+01 9.095e+01 1.339e+02, threshold=1.760e+02, percent-clipped=0.0 +2024-09-18 23:51:58,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.84 vs. limit=15.0 +2024-09-18 23:52:06,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.85 vs. limit=22.5 +2024-09-18 23:52:14,291 INFO [train.py:1198] (0/2) Epoch 31, batch 1300, loss[loss=0.2477, ctc_loss=0.1237, cr_loss=0.3725, attn_decoder_loss=0.2532, over 28555.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1205, cr_loss=0.3623, attn_decoder_loss=0.2429, over 5778575.17 frames. ], batch size: 112, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:52:18,318 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.01 vs. limit=15.0 +2024-09-18 23:53:04,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.43 vs. limit=22.5 +2024-09-18 23:53:08,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=548320.0, ans=0.0 +2024-09-18 23:53:25,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=548360.0, ans=0.025 +2024-09-18 23:53:32,531 INFO [train.py:1198] (0/2) Epoch 31, batch 1350, loss[loss=0.2354, ctc_loss=0.1179, cr_loss=0.3645, attn_decoder_loss=0.2404, over 29768.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1204, cr_loss=0.362, attn_decoder_loss=0.2426, over 5795840.48 frames. ], batch size: 81, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:53:41,810 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:53:49,751 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.22 vs. limit=22.5 +2024-09-18 23:53:55,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=548440.0, ans=0.0 +2024-09-18 23:53:56,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=548440.0, ans=0.125 +2024-09-18 23:54:17,462 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.480e+01 8.970e+01 9.556e+01 1.739e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 23:54:25,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.25 vs. limit=12.0 +2024-09-18 23:54:31,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=548560.0, ans=0.0 +2024-09-18 23:54:43,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=548560.0, ans=0.025 +2024-09-18 23:54:47,699 INFO [train.py:1198] (0/2) Epoch 31, batch 1400, loss[loss=0.2045, ctc_loss=0.09596, cr_loss=0.3023, attn_decoder_loss=0.2099, over 29605.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1201, cr_loss=0.3615, attn_decoder_loss=0.2422, over 5806151.50 frames. ], batch size: 69, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:54:56,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=548600.0, ans=0.09899494936611666 +2024-09-18 23:55:08,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=548640.0, ans=0.2 +2024-09-18 23:55:12,759 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:55:20,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=548680.0, ans=0.125 +2024-09-18 23:55:24,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=548680.0, ans=0.1 +2024-09-18 23:55:32,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=548680.0, ans=0.1 +2024-09-18 23:55:38,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=548720.0, ans=0.1 +2024-09-18 23:55:53,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.18 vs. limit=22.5 +2024-09-18 23:56:05,900 INFO [train.py:1198] (0/2) Epoch 31, batch 1450, loss[loss=0.2518, ctc_loss=0.1342, cr_loss=0.3758, attn_decoder_loss=0.2566, over 29473.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1201, cr_loss=0.3614, attn_decoder_loss=0.2428, over 5803555.60 frames. ], batch size: 94, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:56:13,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=548800.0, ans=0.1 +2024-09-18 23:56:17,065 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.43 vs. limit=15.0 +2024-09-18 23:56:31,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=548840.0, ans=0.0 +2024-09-18 23:56:34,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=548880.0, ans=0.025 +2024-09-18 23:56:45,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=548880.0, ans=0.2 +2024-09-18 23:56:52,504 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.514e+01 9.000e+01 9.465e+01 1.182e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-18 23:57:12,965 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:57:18,183 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.59 vs. limit=15.0 +2024-09-18 23:57:18,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=548960.0, ans=0.2 +2024-09-18 23:57:23,230 INFO [train.py:1198] (0/2) Epoch 31, batch 1500, loss[loss=0.2418, ctc_loss=0.1219, cr_loss=0.3596, attn_decoder_loss=0.2471, over 29651.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1202, cr_loss=0.3618, attn_decoder_loss=0.2431, over 5804589.12 frames. ], batch size: 86, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:57:26,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=549000.0, ans=0.125 +2024-09-18 23:57:51,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.64 vs. limit=15.0 +2024-09-18 23:58:14,559 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.89 vs. limit=15.0 +2024-09-18 23:58:15,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=549120.0, ans=0.2 +2024-09-18 23:58:20,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=549120.0, ans=0.125 +2024-09-18 23:58:20,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=549120.0, ans=0.125 +2024-09-18 23:58:41,412 INFO [train.py:1198] (0/2) Epoch 31, batch 1550, loss[loss=0.2485, ctc_loss=0.1317, cr_loss=0.3929, attn_decoder_loss=0.2527, over 29512.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1206, cr_loss=0.3622, attn_decoder_loss=0.2431, over 5782000.45 frames. ], batch size: 90, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:58:52,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=549200.0, ans=0.125 +2024-09-18 23:59:19,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=549280.0, ans=0.125 +2024-09-18 23:59:19,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=549280.0, ans=0.1 +2024-09-18 23:59:20,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=549280.0, ans=0.125 +2024-09-18 23:59:26,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=549320.0, ans=0.125 +2024-09-18 23:59:26,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=549320.0, ans=0.07 +2024-09-18 23:59:27,893 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.398e+01 8.813e+01 9.564e+01 2.152e+02, threshold=1.763e+02, percent-clipped=1.0 +2024-09-18 23:59:46,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=549360.0, ans=0.2 +2024-09-18 23:59:48,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=549360.0, ans=0.125 +2024-09-18 23:59:56,784 INFO [train.py:1198] (0/2) Epoch 31, batch 1600, loss[loss=0.2448, ctc_loss=0.1214, cr_loss=0.3588, attn_decoder_loss=0.2506, over 29675.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1209, cr_loss=0.3628, attn_decoder_loss=0.2429, over 5765377.91 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 16.0 +2024-09-19 00:00:39,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=549480.0, ans=0.125 +2024-09-19 00:00:55,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=549520.0, ans=0.125 +2024-09-19 00:00:59,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=549560.0, ans=0.0 +2024-09-19 00:01:02,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=549560.0, ans=0.025 +2024-09-19 00:01:07,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=549560.0, ans=0.0 +2024-09-19 00:01:10,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=549560.0, ans=0.1 +2024-09-19 00:01:10,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=549560.0, ans=0.125 +2024-09-19 00:01:14,876 INFO [train.py:1198] (0/2) Epoch 31, batch 1650, loss[loss=0.2498, ctc_loss=0.1298, cr_loss=0.3732, attn_decoder_loss=0.2549, over 29725.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3622, attn_decoder_loss=0.2428, over 5758833.38 frames. ], batch size: 89, lr: 3.58e-03, grad_scale: 8.0 +2024-09-19 00:02:03,350 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.401e+01 8.981e+01 9.648e+01 1.683e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 00:02:05,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=549720.0, ans=0.125 +2024-09-19 00:02:28,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=549760.0, ans=0.025 +2024-09-19 00:02:31,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=549800.0, ans=0.025 +2024-09-19 00:02:32,506 INFO [train.py:1198] (0/2) Epoch 31, batch 1700, loss[loss=0.2157, ctc_loss=0.1065, cr_loss=0.3381, attn_decoder_loss=0.2203, over 29583.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1203, cr_loss=0.3621, attn_decoder_loss=0.2427, over 5778722.78 frames. ], batch size: 69, lr: 3.58e-03, grad_scale: 8.0 +2024-09-19 00:02:56,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.95 vs. limit=15.0 +2024-09-19 00:03:11,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=549880.0, ans=0.0 +2024-09-19 00:03:11,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.07 vs. limit=15.0 +2024-09-19 00:03:42,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=549960.0, ans=0.125 +2024-09-19 00:03:42,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=549960.0, ans=0.125 +2024-09-19 00:03:43,576 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.78 vs. limit=12.0 +2024-09-19 00:03:48,476 INFO [train.py:1198] (0/2) Epoch 31, batch 1750, loss[loss=0.2134, ctc_loss=0.1004, cr_loss=0.3099, attn_decoder_loss=0.2191, over 29337.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1202, cr_loss=0.3619, attn_decoder_loss=0.2424, over 5787513.21 frames. ], batch size: 67, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:03:53,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.49 vs. limit=12.0 +2024-09-19 00:04:35,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=550120.0, ans=0.0 +2024-09-19 00:04:36,769 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.248e+01 8.450e+01 9.086e+01 9.663e+01 1.697e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 00:05:01,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=550160.0, ans=0.125 +2024-09-19 00:05:05,953 INFO [train.py:1198] (0/2) Epoch 31, batch 1800, loss[loss=0.2469, ctc_loss=0.1261, cr_loss=0.3752, attn_decoder_loss=0.252, over 29698.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1204, cr_loss=0.362, attn_decoder_loss=0.2423, over 5790935.79 frames. ], batch size: 83, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:05:16,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=550200.0, ans=0.025 +2024-09-19 00:05:26,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.95 vs. limit=15.0 +2024-09-19 00:05:38,609 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.81 vs. limit=22.5 +2024-09-19 00:06:00,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=550320.0, ans=0.5 +2024-09-19 00:06:17,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=550360.0, ans=0.125 +2024-09-19 00:06:23,828 INFO [train.py:1198] (0/2) Epoch 31, batch 1850, loss[loss=0.2452, ctc_loss=0.1232, cr_loss=0.3525, attn_decoder_loss=0.2509, over 29623.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1203, cr_loss=0.3619, attn_decoder_loss=0.2424, over 5797363.68 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 4.0 +2024-09-19 00:06:48,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=550440.0, ans=0.0 +2024-09-19 00:07:14,101 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.638e+01 9.110e+01 9.627e+01 2.703e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 00:07:15,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=550520.0, ans=10.0 +2024-09-19 00:07:39,689 INFO [train.py:1198] (0/2) Epoch 31, batch 1900, loss[loss=0.2451, ctc_loss=0.1184, cr_loss=0.3563, attn_decoder_loss=0.2513, over 29715.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1205, cr_loss=0.3623, attn_decoder_loss=0.2429, over 5805547.58 frames. ], batch size: 89, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:07:43,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=550600.0, ans=0.125 +2024-09-19 00:07:46,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=550600.0, ans=0.2 +2024-09-19 00:08:05,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=550640.0, ans=0.125 +2024-09-19 00:08:31,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=550720.0, ans=0.0 +2024-09-19 00:08:57,883 INFO [train.py:1198] (0/2) Epoch 31, batch 1950, loss[loss=0.2325, ctc_loss=0.1191, cr_loss=0.3684, attn_decoder_loss=0.2369, over 29446.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.121, cr_loss=0.364, attn_decoder_loss=0.244, over 5820138.71 frames. ], batch size: 78, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:09:11,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=550840.0, ans=0.0 +2024-09-19 00:09:23,065 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.33 vs. limit=15.0 +2024-09-19 00:09:36,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.59 vs. limit=15.0 +2024-09-19 00:09:42,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=550920.0, ans=0.125 +2024-09-19 00:09:42,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=550920.0, ans=0.0 +2024-09-19 00:09:47,671 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.604e+01 9.078e+01 9.873e+01 2.917e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 00:09:55,407 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:10:09,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=550960.0, ans=0.025 +2024-09-19 00:10:15,518 INFO [train.py:1198] (0/2) Epoch 31, batch 2000, loss[loss=0.2186, ctc_loss=0.1103, cr_loss=0.3437, attn_decoder_loss=0.223, over 29328.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1212, cr_loss=0.3642, attn_decoder_loss=0.2442, over 5797911.02 frames. ], batch size: 67, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:10:20,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=551000.0, ans=0.125 +2024-09-19 00:10:37,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.17 vs. limit=22.5 +2024-09-19 00:10:39,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-19 00:10:53,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=551080.0, ans=0.2 +2024-09-19 00:10:58,009 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:11:04,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=551120.0, ans=0.0 +2024-09-19 00:11:11,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=551120.0, ans=0.0 +2024-09-19 00:11:31,489 INFO [train.py:1198] (0/2) Epoch 31, batch 2050, loss[loss=0.215, ctc_loss=0.1074, cr_loss=0.3453, attn_decoder_loss=0.2193, over 29463.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1206, cr_loss=0.3627, attn_decoder_loss=0.2432, over 5789733.65 frames. ], batch size: 70, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:11:48,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=551240.0, ans=0.125 +2024-09-19 00:12:00,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=551280.0, ans=0.125 +2024-09-19 00:12:06,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=551280.0, ans=0.125 +2024-09-19 00:12:21,279 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.718e+01 8.532e+01 9.006e+01 9.562e+01 1.976e+02, threshold=1.801e+02, percent-clipped=1.0 +2024-09-19 00:12:27,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=551320.0, ans=0.125 +2024-09-19 00:12:38,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=551360.0, ans=0.125 +2024-09-19 00:12:48,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=551400.0, ans=0.125 +2024-09-19 00:12:49,613 INFO [train.py:1198] (0/2) Epoch 31, batch 2100, loss[loss=0.2334, ctc_loss=0.118, cr_loss=0.359, attn_decoder_loss=0.2382, over 29770.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3627, attn_decoder_loss=0.2428, over 5800843.37 frames. ], batch size: 81, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:12:56,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=551400.0, ans=0.0 +2024-09-19 00:12:58,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=551400.0, ans=0.0 +2024-09-19 00:13:02,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=551400.0, ans=0.1 +2024-09-19 00:13:27,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=551480.0, ans=0.125 +2024-09-19 00:13:43,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=551520.0, ans=0.025 +2024-09-19 00:13:45,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.34 vs. limit=15.0 +2024-09-19 00:14:07,010 INFO [train.py:1198] (0/2) Epoch 31, batch 2150, loss[loss=0.2314, ctc_loss=0.1164, cr_loss=0.3695, attn_decoder_loss=0.236, over 29450.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.12, cr_loss=0.3623, attn_decoder_loss=0.2424, over 5816075.18 frames. ], batch size: 78, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:14:11,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=551600.0, ans=0.125 +2024-09-19 00:14:31,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=551640.0, ans=0.125 +2024-09-19 00:14:43,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=551680.0, ans=0.125 +2024-09-19 00:14:44,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=551680.0, ans=0.125 +2024-09-19 00:14:57,048 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.507e+01 8.560e+01 8.969e+01 9.441e+01 3.216e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 00:15:18,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=551760.0, ans=0.125 +2024-09-19 00:15:23,015 INFO [train.py:1198] (0/2) Epoch 31, batch 2200, loss[loss=0.2448, ctc_loss=0.1127, cr_loss=0.3387, attn_decoder_loss=0.252, over 29646.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.12, cr_loss=0.3617, attn_decoder_loss=0.2424, over 5813250.19 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:15:23,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=551800.0, ans=0.035 +2024-09-19 00:15:36,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.32 vs. limit=10.0 +2024-09-19 00:15:42,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=551840.0, ans=0.2 +2024-09-19 00:16:05,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=551880.0, ans=0.1 +2024-09-19 00:16:12,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.39 vs. limit=15.0 +2024-09-19 00:16:19,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=551920.0, ans=0.0 +2024-09-19 00:16:22,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=551960.0, ans=0.95 +2024-09-19 00:16:39,386 INFO [train.py:1198] (0/2) Epoch 31, batch 2250, loss[loss=0.2382, ctc_loss=0.112, cr_loss=0.3404, attn_decoder_loss=0.2446, over 29684.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1195, cr_loss=0.361, attn_decoder_loss=0.2422, over 5812221.56 frames. ], batch size: 82, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:16:39,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=552000.0, ans=0.125 +2024-09-19 00:16:42,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=552000.0, ans=0.125 +2024-09-19 00:16:49,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=552000.0, ans=0.125 +2024-09-19 00:17:00,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=552040.0, ans=0.2 +2024-09-19 00:17:03,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=552040.0, ans=0.05 +2024-09-19 00:17:09,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.92 vs. limit=15.0 +2024-09-19 00:17:32,962 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.496e+01 8.948e+01 9.461e+01 2.809e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-19 00:17:38,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.25 vs. limit=15.0 +2024-09-19 00:17:57,272 INFO [train.py:1198] (0/2) Epoch 31, batch 2300, loss[loss=0.2132, ctc_loss=0.0993, cr_loss=0.3028, attn_decoder_loss=0.2191, over 29739.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1192, cr_loss=0.3598, attn_decoder_loss=0.2413, over 5798736.49 frames. ], batch size: 72, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:18:05,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=552200.0, ans=0.05 +2024-09-19 00:19:01,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=552360.0, ans=0.125 +2024-09-19 00:19:15,043 INFO [train.py:1198] (0/2) Epoch 31, batch 2350, loss[loss=0.2527, ctc_loss=0.1355, cr_loss=0.3843, attn_decoder_loss=0.2572, over 29685.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1194, cr_loss=0.3602, attn_decoder_loss=0.2416, over 5804134.29 frames. ], batch size: 83, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:19:27,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=552400.0, ans=0.2 +2024-09-19 00:19:28,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=552440.0, ans=0.025 +2024-09-19 00:19:35,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=552440.0, ans=0.125 +2024-09-19 00:19:56,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=552480.0, ans=0.025 +2024-09-19 00:19:59,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=552520.0, ans=0.125 +2024-09-19 00:19:59,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=552520.0, ans=0.125 +2024-09-19 00:20:06,504 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.603e+01 9.093e+01 9.793e+01 1.880e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 00:20:28,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=552560.0, ans=0.0 +2024-09-19 00:20:31,125 INFO [train.py:1198] (0/2) Epoch 31, batch 2400, loss[loss=0.2237, ctc_loss=0.106, cr_loss=0.3211, attn_decoder_loss=0.2296, over 29547.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1201, cr_loss=0.3617, attn_decoder_loss=0.2422, over 5807617.74 frames. ], batch size: 76, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:20:40,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=552600.0, ans=0.025 +2024-09-19 00:21:08,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.61 vs. limit=15.0 +2024-09-19 00:21:09,644 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:21:23,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=552720.0, ans=0.125 +2024-09-19 00:21:23,870 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.30 vs. limit=15.0 +2024-09-19 00:21:24,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=552720.0, ans=0.2 +2024-09-19 00:21:29,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=552720.0, ans=0.125 +2024-09-19 00:21:35,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=552760.0, ans=0.125 +2024-09-19 00:21:51,231 INFO [train.py:1198] (0/2) Epoch 31, batch 2450, loss[loss=0.2446, ctc_loss=0.1208, cr_loss=0.3679, attn_decoder_loss=0.2502, over 29697.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1207, cr_loss=0.3627, attn_decoder_loss=0.2429, over 5783437.67 frames. ], batch size: 82, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:22:44,294 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.732e+01 9.075e+01 9.673e+01 2.868e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 00:22:46,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=552920.0, ans=0.1 +2024-09-19 00:22:49,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=552920.0, ans=0.1 +2024-09-19 00:22:53,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=552960.0, ans=0.2 +2024-09-19 00:22:59,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=552960.0, ans=0.125 +2024-09-19 00:23:06,988 INFO [train.py:1198] (0/2) Epoch 31, batch 2500, loss[loss=0.2435, ctc_loss=0.1136, cr_loss=0.3395, attn_decoder_loss=0.2504, over 29653.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1209, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5793156.71 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:23:14,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=553000.0, ans=0.125 +2024-09-19 00:23:36,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=553080.0, ans=0.0 +2024-09-19 00:23:46,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=553080.0, ans=0.0 +2024-09-19 00:23:51,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=553120.0, ans=0.0 +2024-09-19 00:24:04,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=553120.0, ans=0.1 +2024-09-19 00:24:08,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.88 vs. limit=22.5 +2024-09-19 00:24:10,077 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.43 vs. limit=15.0 +2024-09-19 00:24:22,898 INFO [train.py:1198] (0/2) Epoch 31, batch 2550, loss[loss=0.208, ctc_loss=0.09618, cr_loss=0.3072, attn_decoder_loss=0.2136, over 29305.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1208, cr_loss=0.3634, attn_decoder_loss=0.2431, over 5796970.77 frames. ], batch size: 67, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:24:40,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=553240.0, ans=0.5 +2024-09-19 00:24:41,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=553240.0, ans=0.2 +2024-09-19 00:24:58,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.47 vs. limit=12.0 +2024-09-19 00:25:18,168 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.379e+01 8.309e+01 8.709e+01 9.331e+01 1.370e+02, threshold=1.742e+02, percent-clipped=0.0 +2024-09-19 00:25:21,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=553320.0, ans=0.0 +2024-09-19 00:25:35,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=553360.0, ans=0.125 +2024-09-19 00:25:43,196 INFO [train.py:1198] (0/2) Epoch 31, batch 2600, loss[loss=0.2374, ctc_loss=0.1197, cr_loss=0.3636, attn_decoder_loss=0.2424, over 29456.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1207, cr_loss=0.3629, attn_decoder_loss=0.2431, over 5793332.72 frames. ], batch size: 78, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:25:48,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=553400.0, ans=0.0 +2024-09-19 00:25:48,899 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.94 vs. limit=12.0 +2024-09-19 00:25:53,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=553400.0, ans=0.2 +2024-09-19 00:25:54,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.73 vs. limit=15.0 +2024-09-19 00:26:19,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=553480.0, ans=0.0 +2024-09-19 00:26:19,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=553480.0, ans=0.125 +2024-09-19 00:26:19,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=553480.0, ans=0.125 +2024-09-19 00:26:33,830 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.77 vs. limit=15.0 +2024-09-19 00:26:48,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=553560.0, ans=0.125 +2024-09-19 00:26:58,907 INFO [train.py:1198] (0/2) Epoch 31, batch 2650, loss[loss=0.2503, ctc_loss=0.1263, cr_loss=0.3781, attn_decoder_loss=0.2557, over 29216.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1207, cr_loss=0.3631, attn_decoder_loss=0.2434, over 5800012.88 frames. ], batch size: 100, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:27:06,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=553600.0, ans=10.0 +2024-09-19 00:27:15,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=553640.0, ans=0.125 +2024-09-19 00:27:35,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=553680.0, ans=0.125 +2024-09-19 00:27:41,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=553680.0, ans=0.125 +2024-09-19 00:27:41,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.03 vs. limit=22.5 +2024-09-19 00:27:48,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=553720.0, ans=0.0 +2024-09-19 00:27:51,191 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.524e+01 8.581e+01 9.000e+01 9.310e+01 1.740e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 00:27:56,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=553720.0, ans=0.1 +2024-09-19 00:28:05,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=553760.0, ans=0.125 +2024-09-19 00:28:14,277 INFO [train.py:1198] (0/2) Epoch 31, batch 2700, loss[loss=0.2512, ctc_loss=0.1264, cr_loss=0.371, attn_decoder_loss=0.2569, over 29534.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1211, cr_loss=0.3639, attn_decoder_loss=0.2438, over 5796578.46 frames. ], batch size: 87, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:28:14,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=553800.0, ans=0.025 +2024-09-19 00:28:20,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=553800.0, ans=0.125 +2024-09-19 00:28:45,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=553880.0, ans=0.025 +2024-09-19 00:28:59,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=553880.0, ans=0.2 +2024-09-19 00:29:12,902 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:29:31,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=553960.0, ans=0.2 +2024-09-19 00:29:34,469 INFO [train.py:1198] (0/2) Epoch 31, batch 2750, loss[loss=0.2317, ctc_loss=0.1286, cr_loss=0.3781, attn_decoder_loss=0.2347, over 29522.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1208, cr_loss=0.3633, attn_decoder_loss=0.2427, over 5795324.11 frames. ], batch size: 75, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:30:23,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=554120.0, ans=0.1 +2024-09-19 00:30:27,598 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.490e+01 8.925e+01 9.454e+01 1.870e+02, threshold=1.785e+02, percent-clipped=1.0 +2024-09-19 00:30:50,894 INFO [train.py:1198] (0/2) Epoch 31, batch 2800, loss[loss=0.2606, ctc_loss=0.149, cr_loss=0.3854, attn_decoder_loss=0.2644, over 20503.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1215, cr_loss=0.3647, attn_decoder_loss=0.2432, over 5776355.72 frames. ], batch size: 209, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:30:57,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.94 vs. limit=22.5 +2024-09-19 00:31:12,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=554240.0, ans=0.5 +2024-09-19 00:31:25,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=554280.0, ans=0.0 +2024-09-19 00:31:27,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=554280.0, ans=0.125 +2024-09-19 00:32:00,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=554360.0, ans=0.125 +2024-09-19 00:32:06,638 INFO [train.py:1198] (0/2) Epoch 31, batch 2850, loss[loss=0.2297, ctc_loss=0.1155, cr_loss=0.3761, attn_decoder_loss=0.234, over 29521.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.122, cr_loss=0.3653, attn_decoder_loss=0.2436, over 5762943.17 frames. ], batch size: 77, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:32:13,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=554400.0, ans=0.0 +2024-09-19 00:32:15,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=554400.0, ans=0.0 +2024-09-19 00:32:50,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=554480.0, ans=0.2 +2024-09-19 00:32:55,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=554520.0, ans=0.125 +2024-09-19 00:33:02,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=554520.0, ans=0.2 +2024-09-19 00:33:03,237 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.652e+01 9.121e+01 9.681e+01 2.307e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 00:33:19,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=554560.0, ans=0.125 +2024-09-19 00:33:24,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-19 00:33:26,765 INFO [train.py:1198] (0/2) Epoch 31, batch 2900, loss[loss=0.2361, ctc_loss=0.12, cr_loss=0.3726, attn_decoder_loss=0.2407, over 29412.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1221, cr_loss=0.3662, attn_decoder_loss=0.2445, over 5788998.06 frames. ], batch size: 79, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:33:27,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=554600.0, ans=0.09899494936611666 +2024-09-19 00:33:57,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=554680.0, ans=0.0 +2024-09-19 00:33:57,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=554680.0, ans=0.1 +2024-09-19 00:34:09,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=554680.0, ans=0.1 +2024-09-19 00:34:12,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=554720.0, ans=0.5 +2024-09-19 00:34:14,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=554720.0, ans=0.125 +2024-09-19 00:34:40,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.16 vs. limit=15.0 +2024-09-19 00:34:42,832 INFO [train.py:1198] (0/2) Epoch 31, batch 2950, loss[loss=0.2211, ctc_loss=0.1071, cr_loss=0.3303, attn_decoder_loss=0.2264, over 29541.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1209, cr_loss=0.3635, attn_decoder_loss=0.2432, over 5783661.58 frames. ], batch size: 75, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:35:08,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=554840.0, ans=0.125 +2024-09-19 00:35:20,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=554880.0, ans=0.125 +2024-09-19 00:35:22,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=554880.0, ans=0.1 +2024-09-19 00:35:34,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=554920.0, ans=0.125 +2024-09-19 00:35:37,428 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.875e+01 8.492e+01 9.114e+01 9.567e+01 2.273e+02, threshold=1.823e+02, percent-clipped=2.0 +2024-09-19 00:35:38,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.55 vs. limit=22.5 +2024-09-19 00:35:45,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=554960.0, ans=0.2 +2024-09-19 00:35:45,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 00:35:50,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=554960.0, ans=0.0 +2024-09-19 00:35:50,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.94 vs. limit=15.0 +2024-09-19 00:35:59,048 INFO [train.py:1198] (0/2) Epoch 31, batch 3000, loss[loss=0.2361, ctc_loss=0.1236, cr_loss=0.387, attn_decoder_loss=0.24, over 29760.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1211, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5784419.92 frames. ], batch size: 81, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:35:59,049 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 00:36:03,199 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.8752, 3.3226, 3.6253, 3.7246], device='cuda:0') +2024-09-19 00:36:14,661 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.3077, 4.6332, 5.1183, 5.2325], device='cuda:0') +2024-09-19 00:36:19,672 INFO [train.py:1230] (0/2) Epoch 31, validation: loss=0.2117, ctc_loss=0.03748, cr_loss=5.925e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-19 00:36:19,673 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 00:36:32,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=555000.0, ans=0.5 +2024-09-19 00:37:02,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-19 00:37:03,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=555080.0, ans=0.0 +2024-09-19 00:37:19,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.20 vs. limit=22.5 +2024-09-19 00:37:37,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.16 vs. limit=15.0 +2024-09-19 00:37:38,277 INFO [train.py:1198] (0/2) Epoch 31, batch 3050, loss[loss=0.2309, ctc_loss=0.1221, cr_loss=0.3887, attn_decoder_loss=0.2344, over 29527.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1216, cr_loss=0.3645, attn_decoder_loss=0.2439, over 5779081.78 frames. ], batch size: 76, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:37:40,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=555200.0, ans=0.1 +2024-09-19 00:37:53,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=555240.0, ans=0.025 +2024-09-19 00:38:15,017 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:38:15,442 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.88 vs. limit=15.0 +2024-09-19 00:38:21,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=555280.0, ans=0.0 +2024-09-19 00:38:32,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.264e+01 8.564e+01 9.261e+01 9.873e+01 2.101e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 00:38:42,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=555360.0, ans=0.2 +2024-09-19 00:38:54,005 INFO [train.py:1198] (0/2) Epoch 31, batch 3100, loss[loss=0.2474, ctc_loss=0.1284, cr_loss=0.3982, attn_decoder_loss=0.2518, over 29255.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1211, cr_loss=0.3632, attn_decoder_loss=0.2434, over 5778677.53 frames. ], batch size: 100, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:38:54,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.85 vs. limit=15.0 +2024-09-19 00:39:03,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=555400.0, ans=0.1 +2024-09-19 00:39:08,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=555440.0, ans=0.125 +2024-09-19 00:39:24,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=555480.0, ans=0.0 +2024-09-19 00:39:26,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.82 vs. limit=22.5 +2024-09-19 00:39:31,111 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:39:44,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=555520.0, ans=0.125 +2024-09-19 00:39:50,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.40 vs. limit=15.0 +2024-09-19 00:40:10,294 INFO [train.py:1198] (0/2) Epoch 31, batch 3150, loss[loss=0.2506, ctc_loss=0.1247, cr_loss=0.362, attn_decoder_loss=0.2565, over 28847.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1211, cr_loss=0.3632, attn_decoder_loss=0.2434, over 5785226.08 frames. ], batch size: 104, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:40:14,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=555600.0, ans=0.125 +2024-09-19 00:40:29,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=555640.0, ans=0.125 +2024-09-19 00:40:44,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=555680.0, ans=0.0 +2024-09-19 00:40:46,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=555680.0, ans=0.0 +2024-09-19 00:41:09,176 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.063e+01 8.684e+01 9.147e+01 9.580e+01 2.256e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 00:41:09,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=555720.0, ans=0.1 +2024-09-19 00:41:12,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=555720.0, ans=0.125 +2024-09-19 00:41:23,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=555760.0, ans=0.125 +2024-09-19 00:41:30,419 INFO [train.py:1198] (0/2) Epoch 31, batch 3200, loss[loss=0.2249, ctc_loss=0.09647, cr_loss=0.3118, attn_decoder_loss=0.2322, over 29394.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1207, cr_loss=0.3633, attn_decoder_loss=0.2429, over 5795084.08 frames. ], batch size: 79, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:41:47,948 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.00 vs. limit=15.0 +2024-09-19 00:41:48,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=555840.0, ans=0.125 +2024-09-19 00:42:01,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=555880.0, ans=0.0 +2024-09-19 00:42:30,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=555960.0, ans=0.125 +2024-09-19 00:42:37,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=555960.0, ans=0.0 +2024-09-19 00:42:46,900 INFO [train.py:1198] (0/2) Epoch 31, batch 3250, loss[loss=0.2396, ctc_loss=0.1206, cr_loss=0.3617, attn_decoder_loss=0.2448, over 29710.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1207, cr_loss=0.3636, attn_decoder_loss=0.2434, over 5801416.62 frames. ], batch size: 84, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:43:13,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.63 vs. limit=12.0 +2024-09-19 00:43:42,508 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.541e+01 8.932e+01 9.487e+01 3.275e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 00:43:55,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=556160.0, ans=0.1 +2024-09-19 00:43:55,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=556160.0, ans=0.2 +2024-09-19 00:44:02,474 INFO [train.py:1198] (0/2) Epoch 31, batch 3300, loss[loss=0.247, ctc_loss=0.1263, cr_loss=0.3663, attn_decoder_loss=0.2522, over 28110.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1199, cr_loss=0.3617, attn_decoder_loss=0.2422, over 5797155.25 frames. ], batch size: 111, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:44:15,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=556200.0, ans=0.0 +2024-09-19 00:44:46,750 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=15.0 +2024-09-19 00:44:52,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=556320.0, ans=0.025 +2024-09-19 00:45:01,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=556320.0, ans=0.125 +2024-09-19 00:45:03,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.28 vs. limit=15.0 +2024-09-19 00:45:07,555 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:45:17,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=556360.0, ans=0.07 +2024-09-19 00:45:21,978 INFO [train.py:1198] (0/2) Epoch 31, batch 3350, loss[loss=0.2489, ctc_loss=0.1246, cr_loss=0.3703, attn_decoder_loss=0.2545, over 28822.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1204, cr_loss=0.3624, attn_decoder_loss=0.2427, over 5773423.15 frames. ], batch size: 104, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:45:23,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=556400.0, ans=0.0 +2024-09-19 00:45:29,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=556400.0, ans=0.2 +2024-09-19 00:45:39,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=556440.0, ans=0.1 +2024-09-19 00:46:04,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=556480.0, ans=0.0 +2024-09-19 00:46:07,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=556520.0, ans=0.0 +2024-09-19 00:46:17,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=556520.0, ans=0.125 +2024-09-19 00:46:18,236 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.469e+01 9.027e+01 9.591e+01 1.739e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 00:46:18,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=556520.0, ans=0.125 +2024-09-19 00:46:22,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=556560.0, ans=0.2 +2024-09-19 00:46:38,053 INFO [train.py:1198] (0/2) Epoch 31, batch 3400, loss[loss=0.2187, ctc_loss=0.108, cr_loss=0.3305, attn_decoder_loss=0.2237, over 29359.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1207, cr_loss=0.3625, attn_decoder_loss=0.2426, over 5766102.54 frames. ], batch size: 67, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:46:39,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=556600.0, ans=0.0 +2024-09-19 00:46:53,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=556640.0, ans=0.0 +2024-09-19 00:47:45,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=556760.0, ans=0.0 +2024-09-19 00:47:49,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=556760.0, ans=0.1 +2024-09-19 00:47:54,284 INFO [train.py:1198] (0/2) Epoch 31, batch 3450, loss[loss=0.2399, ctc_loss=0.1162, cr_loss=0.3496, attn_decoder_loss=0.2459, over 28214.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1206, cr_loss=0.3626, attn_decoder_loss=0.2427, over 5773405.30 frames. ], batch size: 111, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:48:13,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=556840.0, ans=0.125 +2024-09-19 00:48:35,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.43 vs. limit=6.0 +2024-09-19 00:48:42,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=556920.0, ans=0.125 +2024-09-19 00:48:54,519 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 8.684e+01 9.223e+01 9.839e+01 1.576e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-19 00:48:57,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.26 vs. limit=15.0 +2024-09-19 00:49:11,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=556960.0, ans=0.0 +2024-09-19 00:49:13,818 INFO [train.py:1198] (0/2) Epoch 31, batch 3500, loss[loss=0.2174, ctc_loss=0.1038, cr_loss=0.3196, attn_decoder_loss=0.2229, over 29339.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1203, cr_loss=0.3619, attn_decoder_loss=0.2424, over 5774981.12 frames. ], batch size: 71, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:49:17,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=557000.0, ans=0.1 +2024-09-19 00:49:51,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=557080.0, ans=0.125 +2024-09-19 00:49:53,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=557080.0, ans=0.025 +2024-09-19 00:50:09,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=557120.0, ans=0.0 +2024-09-19 00:50:25,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=557160.0, ans=0.0 +2024-09-19 00:50:28,515 INFO [train.py:1198] (0/2) Epoch 31, batch 3550, loss[loss=0.2501, ctc_loss=0.1271, cr_loss=0.376, attn_decoder_loss=0.2554, over 29702.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1195, cr_loss=0.3602, attn_decoder_loss=0.242, over 5781415.77 frames. ], batch size: 89, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:50:56,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=557280.0, ans=0.125 +2024-09-19 00:51:05,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=557280.0, ans=0.125 +2024-09-19 00:51:10,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=557280.0, ans=0.07 +2024-09-19 00:51:23,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.575e+01 9.105e+01 9.496e+01 5.708e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 00:51:29,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=557360.0, ans=0.2 +2024-09-19 00:51:35,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=557360.0, ans=0.125 +2024-09-19 00:51:41,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=557400.0, ans=0.2 +2024-09-19 00:51:42,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.52 vs. limit=22.5 +2024-09-19 00:51:43,114 INFO [train.py:1198] (0/2) Epoch 31, batch 3600, loss[loss=0.2297, ctc_loss=0.1088, cr_loss=0.3412, attn_decoder_loss=0.2355, over 29487.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1197, cr_loss=0.3608, attn_decoder_loss=0.2422, over 5791516.72 frames. ], batch size: 77, lr: 3.55e-03, grad_scale: 16.0 +2024-09-19 00:51:52,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=557400.0, ans=0.125 +2024-09-19 00:52:03,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.50 vs. limit=12.0 +2024-09-19 00:52:22,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=557480.0, ans=0.2 +2024-09-19 00:52:55,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=557560.0, ans=0.125 +2024-09-19 00:52:58,315 INFO [train.py:1198] (0/2) Epoch 31, batch 3650, loss[loss=0.2519, ctc_loss=0.1235, cr_loss=0.3545, attn_decoder_loss=0.2583, over 29557.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1195, cr_loss=0.3605, attn_decoder_loss=0.242, over 5794158.13 frames. ], batch size: 90, lr: 3.55e-03, grad_scale: 16.0 +2024-09-19 00:53:23,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=557640.0, ans=0.0 +2024-09-19 00:53:29,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=15.0 +2024-09-19 00:53:34,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=557680.0, ans=0.1 +2024-09-19 00:53:36,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=557680.0, ans=0.0 +2024-09-19 00:53:49,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=557720.0, ans=0.0 +2024-09-19 00:53:55,288 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.468e+01 9.016e+01 9.512e+01 1.613e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-19 00:54:16,694 INFO [train.py:1198] (0/2) Epoch 31, batch 3700, loss[loss=0.2378, ctc_loss=0.1204, cr_loss=0.3537, attn_decoder_loss=0.243, over 29698.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1198, cr_loss=0.3609, attn_decoder_loss=0.2422, over 5803591.46 frames. ], batch size: 84, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:54:24,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=557800.0, ans=0.125 +2024-09-19 00:54:56,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=557880.0, ans=0.125 +2024-09-19 00:55:13,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=557920.0, ans=0.0 +2024-09-19 00:55:30,747 INFO [train.py:1198] (0/2) Epoch 31, batch 3750, loss[loss=0.2115, ctc_loss=0.1051, cr_loss=0.3312, attn_decoder_loss=0.216, over 29356.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1201, cr_loss=0.3618, attn_decoder_loss=0.2422, over 5807835.14 frames. ], batch size: 67, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:55:47,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=558040.0, ans=0.125 +2024-09-19 00:56:18,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=558120.0, ans=0.0 +2024-09-19 00:56:27,504 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.551e+01 9.139e+01 9.962e+01 3.532e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 00:56:38,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=558160.0, ans=0.05 +2024-09-19 00:56:45,329 INFO [train.py:1198] (0/2) Epoch 31, batch 3800, loss[loss=0.2527, ctc_loss=0.1422, cr_loss=0.404, attn_decoder_loss=0.256, over 29639.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1203, cr_loss=0.362, attn_decoder_loss=0.2423, over 5797166.80 frames. ], batch size: 86, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:56:57,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=558200.0, ans=0.125 +2024-09-19 00:57:08,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=558240.0, ans=0.07 +2024-09-19 00:57:16,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=558280.0, ans=22.5 +2024-09-19 00:57:32,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=558320.0, ans=0.125 +2024-09-19 00:57:33,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=558320.0, ans=10.0 +2024-09-19 00:57:38,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.03 vs. limit=15.0 +2024-09-19 00:57:40,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=558320.0, ans=0.0 +2024-09-19 00:57:51,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.36 vs. limit=15.0 +2024-09-19 00:57:59,984 INFO [train.py:1198] (0/2) Epoch 31, batch 3850, loss[loss=0.2518, ctc_loss=0.1361, cr_loss=0.4007, attn_decoder_loss=0.2558, over 29275.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1199, cr_loss=0.3614, attn_decoder_loss=0.242, over 5811463.95 frames. ], batch size: 100, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:58:22,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=558440.0, ans=0.125 +2024-09-19 00:58:40,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=558480.0, ans=0.125 +2024-09-19 00:58:56,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.602e+01 9.005e+01 9.471e+01 1.448e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 00:58:59,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=558560.0, ans=0.125 +2024-09-19 00:59:02,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=558560.0, ans=0.125 +2024-09-19 00:59:14,165 INFO [train.py:1198] (0/2) Epoch 31, batch 3900, loss[loss=0.2514, ctc_loss=0.1275, cr_loss=0.396, attn_decoder_loss=0.2563, over 29625.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.12, cr_loss=0.362, attn_decoder_loss=0.2422, over 5816153.37 frames. ], batch size: 86, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:00:19,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=558760.0, ans=0.0 +2024-09-19 01:00:28,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=558760.0, ans=0.125 +2024-09-19 01:00:30,739 INFO [train.py:1198] (0/2) Epoch 31, batch 3950, loss[loss=0.2542, ctc_loss=0.1276, cr_loss=0.3831, attn_decoder_loss=0.2597, over 29521.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.12, cr_loss=0.3623, attn_decoder_loss=0.2425, over 5835656.53 frames. ], batch size: 97, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:00:47,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=558840.0, ans=0.125 +2024-09-19 01:00:55,073 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-19 01:01:06,951 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.63 vs. limit=15.0 +2024-09-19 01:01:18,891 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.02 vs. limit=22.5 +2024-09-19 01:01:28,227 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.387e+01 8.418e+01 8.953e+01 9.483e+01 1.231e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 01:01:31,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=558960.0, ans=0.0 +2024-09-19 01:01:32,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=558960.0, ans=0.125 +2024-09-19 01:01:35,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=558960.0, ans=0.0 +2024-09-19 01:01:37,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=558960.0, ans=0.1 +2024-09-19 01:01:43,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.09 vs. limit=15.0 +2024-09-19 01:01:44,284 INFO [train.py:1198] (0/2) Epoch 31, batch 4000, loss[loss=0.2169, ctc_loss=0.1021, cr_loss=0.3338, attn_decoder_loss=0.2222, over 29521.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1203, cr_loss=0.3628, attn_decoder_loss=0.2426, over 5812702.37 frames. ], batch size: 74, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:02:00,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=559040.0, ans=0.0 +2024-09-19 01:02:02,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=559040.0, ans=0.125 +2024-09-19 01:02:09,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=559040.0, ans=0.2 +2024-09-19 01:02:09,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=559040.0, ans=0.1 +2024-09-19 01:02:14,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=559080.0, ans=0.125 +2024-09-19 01:02:24,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=559080.0, ans=0.1 +2024-09-19 01:02:26,263 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:02:36,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=559120.0, ans=0.2 +2024-09-19 01:02:38,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=559120.0, ans=0.0 +2024-09-19 01:02:59,110 INFO [train.py:1198] (0/2) Epoch 31, batch 4050, loss[loss=0.2497, ctc_loss=0.1362, cr_loss=0.3769, attn_decoder_loss=0.2539, over 20089.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1204, cr_loss=0.3628, attn_decoder_loss=0.2424, over 5796457.92 frames. ], batch size: 209, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:03:33,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=559280.0, ans=0.125 +2024-09-19 01:03:43,339 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:03:56,630 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.819e+01 8.784e+01 9.351e+01 1.021e+02 4.182e+02, threshold=1.870e+02, percent-clipped=0.0 +2024-09-19 01:04:14,215 INFO [train.py:1198] (0/2) Epoch 31, batch 4100, loss[loss=0.2587, ctc_loss=0.1381, cr_loss=0.4214, attn_decoder_loss=0.2627, over 29507.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1207, cr_loss=0.3632, attn_decoder_loss=0.2426, over 5792989.06 frames. ], batch size: 90, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:04:18,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=559400.0, ans=0.1 +2024-09-19 01:04:25,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=559400.0, ans=0.125 +2024-09-19 01:04:46,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=559480.0, ans=0.0 +2024-09-19 01:04:49,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=559480.0, ans=0.0 +2024-09-19 01:04:56,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=559480.0, ans=0.2 +2024-09-19 01:05:05,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=559520.0, ans=0.1 +2024-09-19 01:05:08,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=559520.0, ans=0.1 +2024-09-19 01:05:27,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=559600.0, ans=0.125 +2024-09-19 01:05:28,645 INFO [train.py:1198] (0/2) Epoch 31, batch 4150, loss[loss=0.2321, ctc_loss=0.1157, cr_loss=0.3578, attn_decoder_loss=0.2371, over 29512.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1205, cr_loss=0.3625, attn_decoder_loss=0.2424, over 5798627.09 frames. ], batch size: 77, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:05:29,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.76 vs. limit=22.5 +2024-09-19 01:06:00,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.37 vs. limit=10.0 +2024-09-19 01:06:10,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=559680.0, ans=0.0 +2024-09-19 01:06:14,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=559720.0, ans=0.125 +2024-09-19 01:06:26,031 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.331e+01 8.844e+01 9.480e+01 1.340e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 01:06:42,213 INFO [train.py:1198] (0/2) Epoch 31, batch 4200, loss[loss=0.2545, ctc_loss=0.1422, cr_loss=0.4025, attn_decoder_loss=0.2581, over 29515.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1208, cr_loss=0.3636, attn_decoder_loss=0.2427, over 5800020.69 frames. ], batch size: 90, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:07:01,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=559840.0, ans=0.0 +2024-09-19 01:07:13,708 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:07:25,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=559920.0, ans=0.0 +2024-09-19 01:07:25,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=559920.0, ans=0.125 +2024-09-19 01:07:38,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=559920.0, ans=0.025 +2024-09-19 01:07:47,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=559960.0, ans=0.125 +2024-09-19 01:07:51,164 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.24 vs. limit=10.0 +2024-09-19 01:07:55,216 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-140000.pt +2024-09-19 01:08:04,341 INFO [train.py:1198] (0/2) Epoch 31, batch 4250, loss[loss=0.2298, ctc_loss=0.1139, cr_loss=0.3393, attn_decoder_loss=0.2352, over 29531.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1202, cr_loss=0.3626, attn_decoder_loss=0.2425, over 5805715.66 frames. ], batch size: 74, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:08:04,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=560000.0, ans=0.125 +2024-09-19 01:08:16,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.78 vs. limit=22.5 +2024-09-19 01:08:34,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=560080.0, ans=0.125 +2024-09-19 01:08:38,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.48 vs. limit=15.0 +2024-09-19 01:08:41,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.26 vs. limit=15.0 +2024-09-19 01:08:46,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=560080.0, ans=0.1 +2024-09-19 01:08:54,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.18 vs. limit=22.5 +2024-09-19 01:09:03,969 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.697e+01 9.428e+01 9.992e+01 2.936e+02, threshold=1.886e+02, percent-clipped=1.0 +2024-09-19 01:09:16,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=12.0 +2024-09-19 01:09:20,359 INFO [train.py:1198] (0/2) Epoch 31, batch 4300, loss[loss=0.2529, ctc_loss=0.13, cr_loss=0.3769, attn_decoder_loss=0.2582, over 29525.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1197, cr_loss=0.3615, attn_decoder_loss=0.2425, over 5794656.54 frames. ], batch size: 87, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:09:44,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=560240.0, ans=0.0 +2024-09-19 01:09:51,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=560280.0, ans=0.1 +2024-09-19 01:10:06,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=560320.0, ans=0.0 +2024-09-19 01:10:23,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=560360.0, ans=0.0 +2024-09-19 01:10:34,938 INFO [train.py:1198] (0/2) Epoch 31, batch 4350, loss[loss=0.2528, ctc_loss=0.1309, cr_loss=0.3758, attn_decoder_loss=0.258, over 29442.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.122, cr_loss=0.3663, attn_decoder_loss=0.2455, over 5798249.66 frames. ], batch size: 97, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:10:35,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=560400.0, ans=0.2 +2024-09-19 01:10:41,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=560400.0, ans=0.0 +2024-09-19 01:11:34,000 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.066e+01 8.755e+01 9.230e+01 9.613e+01 3.743e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-19 01:11:47,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=560560.0, ans=0.125 +2024-09-19 01:11:50,363 INFO [train.py:1198] (0/2) Epoch 31, batch 4400, loss[loss=0.2546, ctc_loss=0.1366, cr_loss=0.4002, attn_decoder_loss=0.2588, over 27305.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1236, cr_loss=0.3694, attn_decoder_loss=0.2477, over 5769731.83 frames. ], batch size: 124, lr: 3.54e-03, grad_scale: 16.0 +2024-09-19 01:12:23,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.53 vs. limit=15.0 +2024-09-19 01:12:49,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.62 vs. limit=15.0 +2024-09-19 01:13:05,147 INFO [train.py:1198] (0/2) Epoch 31, batch 4450, loss[loss=0.2649, ctc_loss=0.1595, cr_loss=0.3967, attn_decoder_loss=0.2678, over 19833.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1276, cr_loss=0.3752, attn_decoder_loss=0.25, over 5582974.73 frames. ], batch size: 210, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:13:17,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=10.35 vs. limit=12.0 +2024-09-19 01:13:18,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=560800.0, ans=0.025 +2024-09-19 01:13:32,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=560840.0, ans=0.1 +2024-09-19 01:13:33,735 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:13:49,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=560920.0, ans=0.0 +2024-09-19 01:13:56,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=560920.0, ans=0.2 +2024-09-19 01:14:00,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=560920.0, ans=0.1 +2024-09-19 01:14:06,071 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.287e+01 9.493e+01 1.082e+02 1.219e+02 3.408e+02, threshold=2.163e+02, percent-clipped=1.0 +2024-09-19 01:14:09,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=560960.0, ans=0.0 +2024-09-19 01:14:10,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=560960.0, ans=0.125 +2024-09-19 01:14:19,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=561000.0, ans=0.0 +2024-09-19 01:14:21,377 INFO [train.py:1198] (0/2) Epoch 31, batch 4500, loss[loss=0.2533, ctc_loss=0.1426, cr_loss=0.3664, attn_decoder_loss=0.2575, over 20748.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.131, cr_loss=0.3772, attn_decoder_loss=0.2518, over 5244502.52 frames. ], batch size: 210, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:14:29,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=561000.0, ans=0.125 +2024-09-19 01:14:58,626 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-31.pt +2024-09-19 01:15:44,697 INFO [train.py:1198] (0/2) Epoch 32, batch 0, loss[loss=0.2194, ctc_loss=0.107, cr_loss=0.3323, attn_decoder_loss=0.2245, over 29618.00 frames. ], tot_loss[loss=0.2194, ctc_loss=0.107, cr_loss=0.3323, attn_decoder_loss=0.2245, over 29618.00 frames. ], batch size: 73, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:15:44,697 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 01:15:58,089 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([4.9515, 4.7430, 4.4457, 4.2535], device='cuda:0') +2024-09-19 01:16:03,114 INFO [train.py:1230] (0/2) Epoch 32, validation: loss=0.2127, ctc_loss=0.03714, cr_loss=6.101e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-19 01:16:03,114 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 01:16:03,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=561100.0, ans=0.1 +2024-09-19 01:16:22,157 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.54 vs. limit=15.0 +2024-09-19 01:16:25,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=561140.0, ans=0.125 +2024-09-19 01:16:27,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=561140.0, ans=0.125 +2024-09-19 01:16:45,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=561180.0, ans=0.125 +2024-09-19 01:16:45,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=561180.0, ans=0.0 +2024-09-19 01:17:05,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=561260.0, ans=0.0 +2024-09-19 01:17:17,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=561260.0, ans=0.1 +2024-09-19 01:17:20,697 INFO [train.py:1198] (0/2) Epoch 32, batch 50, loss[loss=0.2077, ctc_loss=0.09286, cr_loss=0.3135, attn_decoder_loss=0.2135, over 29435.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1223, cr_loss=0.3673, attn_decoder_loss=0.244, over 1266455.14 frames. ], batch size: 70, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:17:22,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=561300.0, ans=0.125 +2024-09-19 01:17:32,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-19 01:17:40,119 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.24 vs. limit=15.0 +2024-09-19 01:17:45,061 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.792e+01 8.848e+01 9.833e+01 1.147e+02 1.812e+02, threshold=1.967e+02, percent-clipped=0.0 +2024-09-19 01:17:59,641 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.85 vs. limit=22.5 +2024-09-19 01:18:00,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=561380.0, ans=0.07 +2024-09-19 01:18:09,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=561420.0, ans=0.125 +2024-09-19 01:18:21,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=561460.0, ans=0.125 +2024-09-19 01:18:27,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=561460.0, ans=0.0 +2024-09-19 01:18:27,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=561460.0, ans=0.1 +2024-09-19 01:18:36,353 INFO [train.py:1198] (0/2) Epoch 32, batch 100, loss[loss=0.226, ctc_loss=0.1144, cr_loss=0.337, attn_decoder_loss=0.2309, over 29521.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1234, cr_loss=0.3704, attn_decoder_loss=0.2455, over 2251671.75 frames. ], batch size: 76, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:19:13,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=561580.0, ans=0.125 +2024-09-19 01:19:15,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=561580.0, ans=0.2 +2024-09-19 01:19:34,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=561620.0, ans=0.125 +2024-09-19 01:19:39,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=561660.0, ans=0.1 +2024-09-19 01:19:53,959 INFO [train.py:1198] (0/2) Epoch 32, batch 150, loss[loss=0.2123, ctc_loss=0.1034, cr_loss=0.3253, attn_decoder_loss=0.2172, over 29433.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3633, attn_decoder_loss=0.2428, over 3047528.34 frames. ], batch size: 70, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:19:55,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=561700.0, ans=0.2 +2024-09-19 01:19:55,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=561700.0, ans=0.125 +2024-09-19 01:19:55,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=561700.0, ans=0.125 +2024-09-19 01:20:00,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=561700.0, ans=0.125 +2024-09-19 01:20:00,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=561700.0, ans=0.0 +2024-09-19 01:20:07,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.00 vs. limit=10.0 +2024-09-19 01:20:18,131 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.368e+01 8.757e+01 9.262e+01 1.493e+02, threshold=1.751e+02, percent-clipped=0.0 +2024-09-19 01:20:35,047 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:20:44,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=561820.0, ans=0.125 +2024-09-19 01:20:58,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=561860.0, ans=0.05 +2024-09-19 01:20:58,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=561860.0, ans=0.0 +2024-09-19 01:21:03,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=561860.0, ans=0.125 +2024-09-19 01:21:08,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=561860.0, ans=0.0 +2024-09-19 01:21:11,625 INFO [train.py:1198] (0/2) Epoch 32, batch 200, loss[loss=0.2552, ctc_loss=0.1376, cr_loss=0.3899, attn_decoder_loss=0.2596, over 27147.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1206, cr_loss=0.3639, attn_decoder_loss=0.2423, over 3660373.51 frames. ], batch size: 124, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:21:17,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.08 vs. limit=15.0 +2024-09-19 01:21:32,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.53 vs. limit=22.5 +2024-09-19 01:21:37,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=561940.0, ans=0.04949747468305833 +2024-09-19 01:21:37,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=561940.0, ans=0.1 +2024-09-19 01:21:45,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=561980.0, ans=0.125 +2024-09-19 01:21:46,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=561980.0, ans=0.0 +2024-09-19 01:21:53,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.00 vs. limit=22.5 +2024-09-19 01:22:16,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=562060.0, ans=0.125 +2024-09-19 01:22:17,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.58 vs. limit=12.0 +2024-09-19 01:22:27,316 INFO [train.py:1198] (0/2) Epoch 32, batch 250, loss[loss=0.2563, ctc_loss=0.1374, cr_loss=0.4122, attn_decoder_loss=0.2604, over 29244.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.12, cr_loss=0.3635, attn_decoder_loss=0.242, over 4142599.52 frames. ], batch size: 100, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:22:45,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.55 vs. limit=12.0 +2024-09-19 01:22:46,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=562140.0, ans=0.0 +2024-09-19 01:22:53,931 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.466e+01 9.044e+01 9.662e+01 1.743e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 01:23:05,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.35 vs. limit=22.5 +2024-09-19 01:23:12,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=562180.0, ans=0.125 +2024-09-19 01:23:14,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=562220.0, ans=0.125 +2024-09-19 01:23:20,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=562220.0, ans=0.2 +2024-09-19 01:23:23,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=562220.0, ans=0.125 +2024-09-19 01:23:27,197 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.20 vs. limit=10.0 +2024-09-19 01:23:37,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.52 vs. limit=15.0 +2024-09-19 01:23:41,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=562260.0, ans=0.125 +2024-09-19 01:23:41,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.08 vs. limit=12.0 +2024-09-19 01:23:45,522 INFO [train.py:1198] (0/2) Epoch 32, batch 300, loss[loss=0.2588, ctc_loss=0.1355, cr_loss=0.3991, attn_decoder_loss=0.2636, over 29519.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1198, cr_loss=0.3634, attn_decoder_loss=0.2418, over 4509583.44 frames. ], batch size: 92, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:24:10,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=562340.0, ans=0.125 +2024-09-19 01:24:17,142 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-19 01:24:22,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=562380.0, ans=0.0 +2024-09-19 01:24:33,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.70 vs. limit=15.0 +2024-09-19 01:25:01,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=562460.0, ans=0.125 +2024-09-19 01:25:04,393 INFO [train.py:1198] (0/2) Epoch 32, batch 350, loss[loss=0.2063, ctc_loss=0.09845, cr_loss=0.3005, attn_decoder_loss=0.2116, over 29333.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1198, cr_loss=0.3628, attn_decoder_loss=0.242, over 4794921.21 frames. ], batch size: 71, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:25:07,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=562500.0, ans=0.1 +2024-09-19 01:25:15,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=562500.0, ans=0.1 +2024-09-19 01:25:28,323 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.336e+01 8.922e+01 9.619e+01 6.149e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 01:25:36,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=562580.0, ans=0.125 +2024-09-19 01:25:37,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=562580.0, ans=0.0 +2024-09-19 01:25:40,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=562580.0, ans=0.05 +2024-09-19 01:25:50,642 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.56 vs. limit=15.0 +2024-09-19 01:25:59,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=562620.0, ans=0.125 +2024-09-19 01:26:00,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=562620.0, ans=0.125 +2024-09-19 01:26:02,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=562620.0, ans=0.1 +2024-09-19 01:26:06,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=562660.0, ans=0.0 +2024-09-19 01:26:13,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.64 vs. limit=6.0 +2024-09-19 01:26:13,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.81 vs. limit=15.0 +2024-09-19 01:26:19,649 INFO [train.py:1198] (0/2) Epoch 32, batch 400, loss[loss=0.2378, ctc_loss=0.1178, cr_loss=0.3566, attn_decoder_loss=0.2432, over 29714.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1195, cr_loss=0.3618, attn_decoder_loss=0.2419, over 5025208.09 frames. ], batch size: 82, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:26:32,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=562700.0, ans=0.0 +2024-09-19 01:26:48,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.24 vs. limit=15.0 +2024-09-19 01:26:52,076 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.19 vs. limit=15.0 +2024-09-19 01:27:21,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=562860.0, ans=0.125 +2024-09-19 01:27:30,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=562860.0, ans=0.125 +2024-09-19 01:27:33,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=562860.0, ans=0.2 +2024-09-19 01:27:38,244 INFO [train.py:1198] (0/2) Epoch 32, batch 450, loss[loss=0.2442, ctc_loss=0.1165, cr_loss=0.3531, attn_decoder_loss=0.2505, over 29695.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1198, cr_loss=0.3621, attn_decoder_loss=0.2422, over 5186833.58 frames. ], batch size: 83, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:27:38,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=562900.0, ans=0.125 +2024-09-19 01:27:56,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=562940.0, ans=0.125 +2024-09-19 01:28:00,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=562940.0, ans=0.0 +2024-09-19 01:28:02,738 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.493e+01 8.894e+01 9.370e+01 1.465e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-19 01:28:03,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=562940.0, ans=0.125 +2024-09-19 01:28:08,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.06 vs. limit=15.0 +2024-09-19 01:28:11,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.47 vs. limit=6.0 +2024-09-19 01:28:30,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=563020.0, ans=0.125 +2024-09-19 01:28:31,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=563020.0, ans=0.0 +2024-09-19 01:28:54,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-19 01:28:56,470 INFO [train.py:1198] (0/2) Epoch 32, batch 500, loss[loss=0.26, ctc_loss=0.1385, cr_loss=0.3995, attn_decoder_loss=0.2646, over 29428.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1199, cr_loss=0.3622, attn_decoder_loss=0.2419, over 5330566.09 frames. ], batch size: 94, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:29:16,699 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:29:25,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=563180.0, ans=0.2 +2024-09-19 01:29:29,117 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-19 01:29:41,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=563220.0, ans=0.125 +2024-09-19 01:29:42,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=563220.0, ans=0.125 +2024-09-19 01:29:49,430 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.95 vs. limit=10.0 +2024-09-19 01:30:02,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=563260.0, ans=0.95 +2024-09-19 01:30:05,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=563260.0, ans=0.125 +2024-09-19 01:30:12,546 INFO [train.py:1198] (0/2) Epoch 32, batch 550, loss[loss=0.2455, ctc_loss=0.1227, cr_loss=0.3704, attn_decoder_loss=0.251, over 28777.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1195, cr_loss=0.3611, attn_decoder_loss=0.2419, over 5422555.26 frames. ], batch size: 104, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:30:23,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.28 vs. limit=15.0 +2024-09-19 01:30:40,433 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.146e+01 8.423e+01 9.076e+01 9.566e+01 2.311e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 01:30:42,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=563340.0, ans=0.0 +2024-09-19 01:30:45,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=563380.0, ans=0.1 +2024-09-19 01:31:11,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=563420.0, ans=0.0 +2024-09-19 01:31:30,749 INFO [train.py:1198] (0/2) Epoch 32, batch 600, loss[loss=0.2518, ctc_loss=0.1322, cr_loss=0.3805, attn_decoder_loss=0.2567, over 29295.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1193, cr_loss=0.3607, attn_decoder_loss=0.242, over 5509315.66 frames. ], batch size: 100, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:31:41,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=563500.0, ans=0.05 +2024-09-19 01:31:58,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=563540.0, ans=0.2 +2024-09-19 01:31:59,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=563580.0, ans=0.0 +2024-09-19 01:32:01,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=563580.0, ans=0.0 +2024-09-19 01:32:17,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=563620.0, ans=0.125 +2024-09-19 01:32:34,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=563660.0, ans=0.0 +2024-09-19 01:32:45,656 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.86 vs. limit=8.0 +2024-09-19 01:32:45,911 INFO [train.py:1198] (0/2) Epoch 32, batch 650, loss[loss=0.2335, ctc_loss=0.1093, cr_loss=0.3423, attn_decoder_loss=0.2397, over 29780.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1186, cr_loss=0.3595, attn_decoder_loss=0.2414, over 5586494.66 frames. ], batch size: 81, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:32:55,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=563700.0, ans=0.1 +2024-09-19 01:32:58,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=563700.0, ans=0.1 +2024-09-19 01:33:08,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=563740.0, ans=0.2 +2024-09-19 01:33:08,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=563740.0, ans=0.1 +2024-09-19 01:33:11,744 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-19 01:33:13,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=563740.0, ans=0.0 +2024-09-19 01:33:14,110 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.233e+01 8.421e+01 8.815e+01 9.543e+01 5.182e+02, threshold=1.763e+02, percent-clipped=1.0 +2024-09-19 01:33:30,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.57 vs. limit=15.0 +2024-09-19 01:33:34,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=563820.0, ans=0.0 +2024-09-19 01:33:34,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=563820.0, ans=0.125 +2024-09-19 01:33:43,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=563820.0, ans=0.0 +2024-09-19 01:33:47,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=563860.0, ans=0.0 +2024-09-19 01:33:49,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.87 vs. limit=12.0 +2024-09-19 01:33:59,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=563860.0, ans=0.0 +2024-09-19 01:34:00,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.55 vs. limit=22.5 +2024-09-19 01:34:01,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=563860.0, ans=0.125 +2024-09-19 01:34:01,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=563860.0, ans=0.0 +2024-09-19 01:34:04,048 INFO [train.py:1198] (0/2) Epoch 32, batch 700, loss[loss=0.2394, ctc_loss=0.1271, cr_loss=0.3814, attn_decoder_loss=0.2434, over 29543.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3619, attn_decoder_loss=0.2424, over 5636863.26 frames. ], batch size: 76, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:34:12,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.79 vs. limit=15.0 +2024-09-19 01:34:13,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=563900.0, ans=0.0 +2024-09-19 01:34:13,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=563900.0, ans=0.125 +2024-09-19 01:34:22,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=563940.0, ans=0.05 +2024-09-19 01:34:54,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=564020.0, ans=0.125 +2024-09-19 01:35:10,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.65 vs. limit=15.0 +2024-09-19 01:35:16,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=564060.0, ans=0.0 +2024-09-19 01:35:22,665 INFO [train.py:1198] (0/2) Epoch 32, batch 750, loss[loss=0.2433, ctc_loss=0.1223, cr_loss=0.3478, attn_decoder_loss=0.249, over 29697.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1195, cr_loss=0.3614, attn_decoder_loss=0.2419, over 5676111.23 frames. ], batch size: 82, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:35:25,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.47 vs. limit=15.0 +2024-09-19 01:35:30,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=564100.0, ans=0.2 +2024-09-19 01:35:33,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=564100.0, ans=0.125 +2024-09-19 01:35:43,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.67 vs. limit=15.0 +2024-09-19 01:35:48,089 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.447e+01 8.933e+01 9.518e+01 3.479e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 01:35:55,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=564180.0, ans=0.0 +2024-09-19 01:35:58,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=564180.0, ans=0.125 +2024-09-19 01:36:02,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=564180.0, ans=0.125 +2024-09-19 01:36:14,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.86 vs. limit=15.0 +2024-09-19 01:36:15,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=564220.0, ans=0.1 +2024-09-19 01:36:17,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=564220.0, ans=0.2 +2024-09-19 01:36:38,476 INFO [train.py:1198] (0/2) Epoch 32, batch 800, loss[loss=0.2168, ctc_loss=0.1027, cr_loss=0.336, attn_decoder_loss=0.222, over 29615.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1197, cr_loss=0.362, attn_decoder_loss=0.2421, over 5706136.75 frames. ], batch size: 73, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:36:45,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.07 vs. limit=22.5 +2024-09-19 01:36:51,156 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.88 vs. limit=22.5 +2024-09-19 01:37:05,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=564340.0, ans=0.125 +2024-09-19 01:37:26,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=564420.0, ans=0.125 +2024-09-19 01:37:29,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-19 01:37:46,645 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.64 vs. limit=6.0 +2024-09-19 01:37:48,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=564460.0, ans=0.2 +2024-09-19 01:37:51,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=564460.0, ans=0.125 +2024-09-19 01:37:54,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=564500.0, ans=0.2 +2024-09-19 01:37:56,058 INFO [train.py:1198] (0/2) Epoch 32, batch 850, loss[loss=0.2401, ctc_loss=0.1266, cr_loss=0.3819, attn_decoder_loss=0.2442, over 29695.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1195, cr_loss=0.3613, attn_decoder_loss=0.2417, over 5735991.02 frames. ], batch size: 89, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:38:15,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=564540.0, ans=0.04949747468305833 +2024-09-19 01:38:23,001 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.587e+01 9.050e+01 9.701e+01 1.930e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 01:38:31,865 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:38:46,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=564620.0, ans=0.125 +2024-09-19 01:38:59,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=564660.0, ans=0.2 +2024-09-19 01:39:04,249 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.03 vs. limit=10.0 +2024-09-19 01:39:05,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=564660.0, ans=0.1 +2024-09-19 01:39:11,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=564660.0, ans=0.0 +2024-09-19 01:39:13,909 INFO [train.py:1198] (0/2) Epoch 32, batch 900, loss[loss=0.2195, ctc_loss=0.1009, cr_loss=0.3293, attn_decoder_loss=0.2254, over 29608.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1196, cr_loss=0.3616, attn_decoder_loss=0.2419, over 5740975.28 frames. ], batch size: 73, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:39:14,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=564700.0, ans=0.0 +2024-09-19 01:39:17,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=564700.0, ans=0.025 +2024-09-19 01:39:18,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=564700.0, ans=0.0 +2024-09-19 01:39:49,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=564780.0, ans=0.0 +2024-09-19 01:39:50,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=564780.0, ans=0.2 +2024-09-19 01:39:54,373 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.69 vs. limit=15.0 +2024-09-19 01:39:55,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=564780.0, ans=0.125 +2024-09-19 01:40:01,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.12 vs. limit=15.0 +2024-09-19 01:40:13,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=564860.0, ans=0.0 +2024-09-19 01:40:14,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=564860.0, ans=0.025 +2024-09-19 01:40:29,654 INFO [train.py:1198] (0/2) Epoch 32, batch 950, loss[loss=0.2278, ctc_loss=0.1095, cr_loss=0.325, attn_decoder_loss=0.2337, over 29514.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1199, cr_loss=0.362, attn_decoder_loss=0.2422, over 5743132.69 frames. ], batch size: 74, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:40:36,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=564900.0, ans=0.125 +2024-09-19 01:40:40,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=564900.0, ans=0.125 +2024-09-19 01:40:41,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=564900.0, ans=0.09899494936611666 +2024-09-19 01:40:53,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=564940.0, ans=0.1 +2024-09-19 01:40:59,189 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.678e+01 9.053e+01 9.826e+01 2.124e+02, threshold=1.811e+02, percent-clipped=3.0 +2024-09-19 01:41:16,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.85 vs. limit=15.0 +2024-09-19 01:41:19,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=565020.0, ans=0.0 +2024-09-19 01:41:25,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=565020.0, ans=0.2 +2024-09-19 01:41:34,500 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:41:35,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=565060.0, ans=0.0 +2024-09-19 01:41:47,541 INFO [train.py:1198] (0/2) Epoch 32, batch 1000, loss[loss=0.2213, ctc_loss=0.1081, cr_loss=0.3287, attn_decoder_loss=0.2266, over 29488.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1204, cr_loss=0.363, attn_decoder_loss=0.2427, over 5738069.66 frames. ], batch size: 77, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:42:03,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=565140.0, ans=0.2 +2024-09-19 01:42:06,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=565140.0, ans=0.0 +2024-09-19 01:42:19,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=565180.0, ans=0.04949747468305833 +2024-09-19 01:42:31,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=565180.0, ans=10.0 +2024-09-19 01:42:54,981 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:43:05,369 INFO [train.py:1198] (0/2) Epoch 32, batch 1050, loss[loss=0.2464, ctc_loss=0.1235, cr_loss=0.387, attn_decoder_loss=0.2515, over 29690.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1202, cr_loss=0.3629, attn_decoder_loss=0.2423, over 5747486.47 frames. ], batch size: 85, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:43:22,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.38 vs. limit=22.5 +2024-09-19 01:43:27,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=565340.0, ans=0.04949747468305833 +2024-09-19 01:43:30,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.05 vs. limit=15.0 +2024-09-19 01:43:31,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=565340.0, ans=0.125 +2024-09-19 01:43:32,742 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.148e+01 8.597e+01 9.014e+01 9.453e+01 2.467e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 01:43:40,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=565380.0, ans=0.0 +2024-09-19 01:43:54,948 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.29 vs. limit=15.0 +2024-09-19 01:44:21,432 INFO [train.py:1198] (0/2) Epoch 32, batch 1100, loss[loss=0.2285, ctc_loss=0.1125, cr_loss=0.3567, attn_decoder_loss=0.2335, over 29427.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1201, cr_loss=0.3625, attn_decoder_loss=0.242, over 5757355.57 frames. ], batch size: 78, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:44:21,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=565500.0, ans=0.2 +2024-09-19 01:44:43,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=565540.0, ans=0.125 +2024-09-19 01:44:57,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=565580.0, ans=0.0 +2024-09-19 01:45:22,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=565620.0, ans=0.0 +2024-09-19 01:45:31,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=565660.0, ans=0.125 +2024-09-19 01:45:31,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=565660.0, ans=0.1 +2024-09-19 01:45:40,254 INFO [train.py:1198] (0/2) Epoch 32, batch 1150, loss[loss=0.2368, ctc_loss=0.122, cr_loss=0.3689, attn_decoder_loss=0.2414, over 29462.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1201, cr_loss=0.3625, attn_decoder_loss=0.2418, over 5755691.86 frames. ], batch size: 78, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:45:45,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=565700.0, ans=0.025 +2024-09-19 01:45:52,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=565700.0, ans=0.04949747468305833 +2024-09-19 01:45:56,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=565740.0, ans=0.025 +2024-09-19 01:45:57,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=565740.0, ans=0.125 +2024-09-19 01:46:02,055 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:46:10,198 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.460e+01 8.830e+01 9.335e+01 1.572e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 01:46:16,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=565780.0, ans=0.0 +2024-09-19 01:46:24,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=565780.0, ans=0.125 +2024-09-19 01:46:25,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=565780.0, ans=0.125 +2024-09-19 01:46:34,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=565820.0, ans=0.0 +2024-09-19 01:46:47,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.42 vs. limit=15.0 +2024-09-19 01:46:58,702 INFO [train.py:1198] (0/2) Epoch 32, batch 1200, loss[loss=0.2518, ctc_loss=0.125, cr_loss=0.3833, attn_decoder_loss=0.2574, over 29676.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1205, cr_loss=0.3635, attn_decoder_loss=0.2426, over 5747669.00 frames. ], batch size: 85, lr: 3.47e-03, grad_scale: 16.0 +2024-09-19 01:46:58,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=565900.0, ans=0.125 +2024-09-19 01:47:06,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=565900.0, ans=0.125 +2024-09-19 01:47:09,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=565900.0, ans=0.0 +2024-09-19 01:47:30,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=565980.0, ans=0.125 +2024-09-19 01:47:34,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=565980.0, ans=0.125 +2024-09-19 01:47:52,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=566020.0, ans=0.0 +2024-09-19 01:48:14,673 INFO [train.py:1198] (0/2) Epoch 32, batch 1250, loss[loss=0.2558, ctc_loss=0.1311, cr_loss=0.3914, attn_decoder_loss=0.2609, over 29516.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1208, cr_loss=0.3645, attn_decoder_loss=0.2431, over 5774262.56 frames. ], batch size: 92, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:48:34,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=566140.0, ans=0.0 +2024-09-19 01:48:43,429 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.280e+01 8.626e+01 9.127e+01 9.598e+01 1.741e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 01:48:52,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=566180.0, ans=0.1 +2024-09-19 01:48:55,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=566180.0, ans=0.5 +2024-09-19 01:49:07,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=566220.0, ans=0.0 +2024-09-19 01:49:32,736 INFO [train.py:1198] (0/2) Epoch 32, batch 1300, loss[loss=0.2436, ctc_loss=0.1161, cr_loss=0.3545, attn_decoder_loss=0.2499, over 28391.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1204, cr_loss=0.3631, attn_decoder_loss=0.2425, over 5780191.12 frames. ], batch size: 111, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:49:33,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.65 vs. limit=22.5 +2024-09-19 01:49:37,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=566300.0, ans=0.025 +2024-09-19 01:49:48,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=566340.0, ans=0.025 +2024-09-19 01:50:05,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=566380.0, ans=0.1 +2024-09-19 01:50:16,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=566380.0, ans=0.09899494936611666 +2024-09-19 01:50:26,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=566420.0, ans=0.125 +2024-09-19 01:50:45,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=566460.0, ans=0.125 +2024-09-19 01:50:51,302 INFO [train.py:1198] (0/2) Epoch 32, batch 1350, loss[loss=0.2448, ctc_loss=0.125, cr_loss=0.3688, attn_decoder_loss=0.2499, over 29737.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1198, cr_loss=0.3624, attn_decoder_loss=0.242, over 5797837.80 frames. ], batch size: 81, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:51:19,499 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.117e+01 8.702e+01 9.337e+01 1.229e+02, threshold=1.740e+02, percent-clipped=0.0 +2024-09-19 01:51:21,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=566580.0, ans=0.05 +2024-09-19 01:51:24,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.21 vs. limit=22.5 +2024-09-19 01:51:42,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=566620.0, ans=0.125 +2024-09-19 01:52:06,517 INFO [train.py:1198] (0/2) Epoch 32, batch 1400, loss[loss=0.2097, ctc_loss=0.09831, cr_loss=0.3106, attn_decoder_loss=0.2151, over 29574.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1198, cr_loss=0.3628, attn_decoder_loss=0.242, over 5808864.39 frames. ], batch size: 69, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:52:15,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=566700.0, ans=0.0 +2024-09-19 01:52:46,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=566780.0, ans=0.0 +2024-09-19 01:52:52,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=566820.0, ans=0.125 +2024-09-19 01:52:54,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=566820.0, ans=0.125 +2024-09-19 01:53:04,389 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.50 vs. limit=12.0 +2024-09-19 01:53:06,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=566820.0, ans=0.1 +2024-09-19 01:53:08,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=566860.0, ans=0.2 +2024-09-19 01:53:24,559 INFO [train.py:1198] (0/2) Epoch 32, batch 1450, loss[loss=0.263, ctc_loss=0.1416, cr_loss=0.4136, attn_decoder_loss=0.2673, over 29424.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3625, attn_decoder_loss=0.2423, over 5807296.71 frames. ], batch size: 94, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:53:26,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=566900.0, ans=0.125 +2024-09-19 01:53:30,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=566900.0, ans=15.0 +2024-09-19 01:53:32,550 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:53:53,182 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.580e+01 8.959e+01 9.480e+01 1.633e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 01:54:07,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=566980.0, ans=0.0 +2024-09-19 01:54:10,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=567020.0, ans=0.125 +2024-09-19 01:54:36,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=567060.0, ans=0.125 +2024-09-19 01:54:36,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=567060.0, ans=0.0 +2024-09-19 01:54:38,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=567060.0, ans=0.0 +2024-09-19 01:54:41,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=567100.0, ans=0.0 +2024-09-19 01:54:42,379 INFO [train.py:1198] (0/2) Epoch 32, batch 1500, loss[loss=0.2522, ctc_loss=0.1298, cr_loss=0.3754, attn_decoder_loss=0.2575, over 29639.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1201, cr_loss=0.3632, attn_decoder_loss=0.2428, over 5807608.84 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:54:57,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=567140.0, ans=0.1 +2024-09-19 01:55:12,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=567180.0, ans=0.0 +2024-09-19 01:55:14,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=567180.0, ans=0.2 +2024-09-19 01:55:17,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=567180.0, ans=0.125 +2024-09-19 01:55:27,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=567220.0, ans=0.125 +2024-09-19 01:55:30,877 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.46 vs. limit=6.0 +2024-09-19 01:55:36,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=567220.0, ans=0.0 +2024-09-19 01:55:48,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=567260.0, ans=0.125 +2024-09-19 01:55:55,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=567260.0, ans=0.2 +2024-09-19 01:55:58,503 INFO [train.py:1198] (0/2) Epoch 32, batch 1550, loss[loss=0.2631, ctc_loss=0.1481, cr_loss=0.4005, attn_decoder_loss=0.2669, over 29469.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1206, cr_loss=0.364, attn_decoder_loss=0.2429, over 5783454.24 frames. ], batch size: 90, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:56:00,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=567300.0, ans=0.025 +2024-09-19 01:56:16,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=567340.0, ans=0.0 +2024-09-19 01:56:17,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=567340.0, ans=0.0 +2024-09-19 01:56:27,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.568e+01 8.583e+01 9.090e+01 9.539e+01 2.299e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 01:56:46,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=567420.0, ans=0.125 +2024-09-19 01:57:03,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.67 vs. limit=6.0 +2024-09-19 01:57:12,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=567460.0, ans=0.125 +2024-09-19 01:57:16,197 INFO [train.py:1198] (0/2) Epoch 32, batch 1600, loss[loss=0.2417, ctc_loss=0.1148, cr_loss=0.3494, attn_decoder_loss=0.2481, over 29662.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1204, cr_loss=0.3633, attn_decoder_loss=0.2429, over 5766529.19 frames. ], batch size: 85, lr: 3.46e-03, grad_scale: 16.0 +2024-09-19 01:57:16,542 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:58:04,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=567620.0, ans=0.1 +2024-09-19 01:58:13,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=567620.0, ans=0.125 +2024-09-19 01:58:34,170 INFO [train.py:1198] (0/2) Epoch 32, batch 1650, loss[loss=0.2446, ctc_loss=0.1275, cr_loss=0.3706, attn_decoder_loss=0.2494, over 29716.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1199, cr_loss=0.3618, attn_decoder_loss=0.2424, over 5762219.95 frames. ], batch size: 89, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:58:39,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=567700.0, ans=0.0 +2024-09-19 01:58:48,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=567740.0, ans=0.2 +2024-09-19 01:58:52,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=567740.0, ans=0.125 +2024-09-19 01:59:00,018 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:59:03,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=567780.0, ans=0.0 +2024-09-19 01:59:04,225 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.351e+01 8.988e+01 9.892e+01 1.504e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 01:59:06,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=567780.0, ans=0.0 +2024-09-19 01:59:21,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.13 vs. limit=15.0 +2024-09-19 01:59:49,522 INFO [train.py:1198] (0/2) Epoch 32, batch 1700, loss[loss=0.2148, ctc_loss=0.09969, cr_loss=0.3216, attn_decoder_loss=0.2204, over 29598.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1196, cr_loss=0.3611, attn_decoder_loss=0.2422, over 5783371.95 frames. ], batch size: 69, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:00:05,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.86 vs. limit=15.0 +2024-09-19 02:00:12,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=567940.0, ans=0.125 +2024-09-19 02:00:17,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.62 vs. limit=15.0 +2024-09-19 02:00:40,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=568020.0, ans=0.0 +2024-09-19 02:00:53,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.88 vs. limit=22.5 +2024-09-19 02:01:08,120 INFO [train.py:1198] (0/2) Epoch 32, batch 1750, loss[loss=0.2145, ctc_loss=0.1088, cr_loss=0.3366, attn_decoder_loss=0.2188, over 29329.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1199, cr_loss=0.3617, attn_decoder_loss=0.2422, over 5790792.74 frames. ], batch size: 67, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:01:10,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-19 02:01:20,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=568100.0, ans=0.0 +2024-09-19 02:01:33,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.88 vs. limit=15.0 +2024-09-19 02:01:40,653 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.109e+01 8.621e+01 8.991e+01 9.586e+01 2.043e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-19 02:01:44,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=16.66 vs. limit=15.0 +2024-09-19 02:02:23,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.30 vs. limit=15.0 +2024-09-19 02:02:25,099 INFO [train.py:1198] (0/2) Epoch 32, batch 1800, loss[loss=0.2442, ctc_loss=0.1253, cr_loss=0.373, attn_decoder_loss=0.2491, over 29698.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1201, cr_loss=0.3625, attn_decoder_loss=0.2425, over 5792478.40 frames. ], batch size: 83, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:03:09,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=568420.0, ans=0.0 +2024-09-19 02:03:13,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=568420.0, ans=10.0 +2024-09-19 02:03:14,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=568420.0, ans=0.125 +2024-09-19 02:03:15,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=568420.0, ans=0.025 +2024-09-19 02:03:20,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=568420.0, ans=22.5 +2024-09-19 02:03:36,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=568460.0, ans=0.07 +2024-09-19 02:03:41,086 INFO [train.py:1198] (0/2) Epoch 32, batch 1850, loss[loss=0.2464, ctc_loss=0.1282, cr_loss=0.364, attn_decoder_loss=0.2515, over 29636.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1194, cr_loss=0.3612, attn_decoder_loss=0.242, over 5796025.41 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:03:47,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=568500.0, ans=0.0 +2024-09-19 02:03:47,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=568500.0, ans=0.125 +2024-09-19 02:03:50,533 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:03:51,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.08 vs. limit=15.0 +2024-09-19 02:03:53,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=568500.0, ans=0.125 +2024-09-19 02:03:59,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=568540.0, ans=0.0 +2024-09-19 02:04:06,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=568540.0, ans=0.0 +2024-09-19 02:04:09,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=568580.0, ans=0.0 +2024-09-19 02:04:11,112 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.765e+01 8.550e+01 9.044e+01 9.477e+01 1.404e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 02:04:48,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=568660.0, ans=0.125 +2024-09-19 02:04:48,801 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.03 vs. limit=6.0 +2024-09-19 02:04:58,450 INFO [train.py:1198] (0/2) Epoch 32, batch 1900, loss[loss=0.2451, ctc_loss=0.1199, cr_loss=0.3599, attn_decoder_loss=0.251, over 29701.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1201, cr_loss=0.3627, attn_decoder_loss=0.2428, over 5803064.09 frames. ], batch size: 89, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:05:00,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=568700.0, ans=0.2 +2024-09-19 02:05:01,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=568700.0, ans=0.125 +2024-09-19 02:05:26,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=568740.0, ans=0.015 +2024-09-19 02:05:29,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=568780.0, ans=0.1 +2024-09-19 02:05:36,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=568780.0, ans=0.125 +2024-09-19 02:05:39,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=568780.0, ans=0.2 +2024-09-19 02:05:48,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=568820.0, ans=0.0 +2024-09-19 02:05:54,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=568820.0, ans=0.0 +2024-09-19 02:05:56,528 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.40 vs. limit=22.5 +2024-09-19 02:06:03,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=568860.0, ans=0.05 +2024-09-19 02:06:09,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=568860.0, ans=0.125 +2024-09-19 02:06:11,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=568860.0, ans=0.07 +2024-09-19 02:06:16,862 INFO [train.py:1198] (0/2) Epoch 32, batch 1950, loss[loss=0.2395, ctc_loss=0.1269, cr_loss=0.3902, attn_decoder_loss=0.2433, over 29450.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1209, cr_loss=0.3646, attn_decoder_loss=0.244, over 5818477.84 frames. ], batch size: 78, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:06:22,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.91 vs. limit=6.0 +2024-09-19 02:06:23,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=568900.0, ans=0.125 +2024-09-19 02:06:31,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.20 vs. limit=8.0 +2024-09-19 02:06:47,104 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.678e+01 9.081e+01 9.709e+01 1.589e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 02:07:02,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=569020.0, ans=0.125 +2024-09-19 02:07:32,377 INFO [train.py:1198] (0/2) Epoch 32, batch 2000, loss[loss=0.2099, ctc_loss=0.09901, cr_loss=0.3133, attn_decoder_loss=0.2152, over 29297.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1211, cr_loss=0.3647, attn_decoder_loss=0.2443, over 5796015.56 frames. ], batch size: 67, lr: 3.46e-03, grad_scale: 16.0 +2024-09-19 02:07:40,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.64 vs. limit=15.0 +2024-09-19 02:07:52,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=569140.0, ans=0.125 +2024-09-19 02:07:57,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=569140.0, ans=0.0 +2024-09-19 02:07:58,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=569140.0, ans=10.0 +2024-09-19 02:08:03,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=569180.0, ans=0.2 +2024-09-19 02:08:08,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.17 vs. limit=10.0 +2024-09-19 02:08:22,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=569220.0, ans=0.125 +2024-09-19 02:08:38,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=569260.0, ans=0.0 +2024-09-19 02:08:50,443 INFO [train.py:1198] (0/2) Epoch 32, batch 2050, loss[loss=0.2078, ctc_loss=0.1015, cr_loss=0.3058, attn_decoder_loss=0.2128, over 29431.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1203, cr_loss=0.3633, attn_decoder_loss=0.2433, over 5788399.31 frames. ], batch size: 70, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:09:07,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=569340.0, ans=0.0 +2024-09-19 02:09:21,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff3.min_abs, batch_count=569380.0, ans=0.2 +2024-09-19 02:09:21,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=569380.0, ans=0.025 +2024-09-19 02:09:24,448 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.363e+01 8.440e+01 8.893e+01 9.652e+01 5.207e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-19 02:09:32,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=569380.0, ans=0.2 +2024-09-19 02:09:47,682 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.94 vs. limit=15.0 +2024-09-19 02:09:50,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=569420.0, ans=0.0 +2024-09-19 02:10:00,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=569460.0, ans=0.125 +2024-09-19 02:10:01,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=569460.0, ans=0.125 +2024-09-19 02:10:08,280 INFO [train.py:1198] (0/2) Epoch 32, batch 2100, loss[loss=0.2365, ctc_loss=0.1136, cr_loss=0.3439, attn_decoder_loss=0.2425, over 29755.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1197, cr_loss=0.362, attn_decoder_loss=0.2427, over 5799201.05 frames. ], batch size: 81, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:10:14,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=569500.0, ans=0.125 +2024-09-19 02:10:29,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=569540.0, ans=0.0 +2024-09-19 02:10:29,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=569540.0, ans=0.125 +2024-09-19 02:10:35,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=569540.0, ans=0.1 +2024-09-19 02:10:38,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=569580.0, ans=0.125 +2024-09-19 02:10:52,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=569620.0, ans=0.125 +2024-09-19 02:10:52,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=569620.0, ans=0.1 +2024-09-19 02:10:58,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.54 vs. limit=15.0 +2024-09-19 02:11:04,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=569620.0, ans=0.125 +2024-09-19 02:11:23,559 INFO [train.py:1198] (0/2) Epoch 32, batch 2150, loss[loss=0.2369, ctc_loss=0.1243, cr_loss=0.3696, attn_decoder_loss=0.2411, over 29447.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1193, cr_loss=0.3611, attn_decoder_loss=0.2419, over 5813979.27 frames. ], batch size: 78, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:11:43,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=569740.0, ans=0.125 +2024-09-19 02:11:55,486 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.476e+01 8.874e+01 9.335e+01 1.569e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-19 02:11:57,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=569780.0, ans=0.0 +2024-09-19 02:11:57,812 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-19 02:12:04,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=569780.0, ans=0.0 +2024-09-19 02:12:24,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=569860.0, ans=0.0 +2024-09-19 02:12:24,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.88 vs. limit=15.0 +2024-09-19 02:12:41,303 INFO [train.py:1198] (0/2) Epoch 32, batch 2200, loss[loss=0.2461, ctc_loss=0.1215, cr_loss=0.3586, attn_decoder_loss=0.252, over 29620.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1192, cr_loss=0.3606, attn_decoder_loss=0.2418, over 5811052.55 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:13:02,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=569940.0, ans=0.2 +2024-09-19 02:13:04,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=569940.0, ans=0.1 +2024-09-19 02:13:10,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=569980.0, ans=0.125 +2024-09-19 02:13:23,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=569980.0, ans=0.125 +2024-09-19 02:13:36,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.46 vs. limit=8.0 +2024-09-19 02:13:46,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=570060.0, ans=0.0 +2024-09-19 02:13:58,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=570100.0, ans=0.0 +2024-09-19 02:13:59,411 INFO [train.py:1198] (0/2) Epoch 32, batch 2250, loss[loss=0.2424, ctc_loss=0.1229, cr_loss=0.3619, attn_decoder_loss=0.2476, over 29694.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1187, cr_loss=0.36, attn_decoder_loss=0.2416, over 5811805.26 frames. ], batch size: 82, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:14:09,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.87 vs. limit=10.0 +2024-09-19 02:14:13,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=570140.0, ans=0.0 +2024-09-19 02:14:21,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=570140.0, ans=10.0 +2024-09-19 02:14:31,247 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.789e+01 8.483e+01 9.181e+01 9.844e+01 2.065e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-19 02:14:36,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.07 vs. limit=22.5 +2024-09-19 02:14:43,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=570220.0, ans=0.0 +2024-09-19 02:14:43,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=570220.0, ans=0.0 +2024-09-19 02:15:15,151 INFO [train.py:1198] (0/2) Epoch 32, batch 2300, loss[loss=0.199, ctc_loss=0.09008, cr_loss=0.2916, attn_decoder_loss=0.2047, over 29317.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1185, cr_loss=0.3595, attn_decoder_loss=0.2409, over 5798775.77 frames. ], batch size: 71, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:15:39,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=570340.0, ans=0.0 +2024-09-19 02:15:39,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=570340.0, ans=0.0 +2024-09-19 02:15:50,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=570380.0, ans=0.1 +2024-09-19 02:16:00,261 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-19 02:16:19,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=570460.0, ans=0.125 +2024-09-19 02:16:23,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.04 vs. limit=15.0 +2024-09-19 02:16:31,319 INFO [train.py:1198] (0/2) Epoch 32, batch 2350, loss[loss=0.2526, ctc_loss=0.1303, cr_loss=0.3708, attn_decoder_loss=0.258, over 29676.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1188, cr_loss=0.36, attn_decoder_loss=0.2412, over 5803717.88 frames. ], batch size: 83, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:16:41,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.56 vs. limit=15.0 +2024-09-19 02:16:43,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-19 02:16:48,204 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.39 vs. limit=15.0 +2024-09-19 02:17:07,393 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.359e+01 8.588e+01 9.289e+01 9.851e+01 1.770e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-19 02:17:13,013 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.36 vs. limit=22.5 +2024-09-19 02:17:21,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=570620.0, ans=0.125 +2024-09-19 02:17:51,218 INFO [train.py:1198] (0/2) Epoch 32, batch 2400, loss[loss=0.2301, ctc_loss=0.1212, cr_loss=0.3814, attn_decoder_loss=0.2337, over 29523.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1196, cr_loss=0.3612, attn_decoder_loss=0.2419, over 5807819.60 frames. ], batch size: 76, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:17:54,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=570700.0, ans=0.125 +2024-09-19 02:18:03,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=570700.0, ans=0.125 +2024-09-19 02:18:19,504 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.94 vs. limit=15.0 +2024-09-19 02:18:49,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=570820.0, ans=0.2 +2024-09-19 02:19:05,116 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-19 02:19:07,320 INFO [train.py:1198] (0/2) Epoch 32, batch 2450, loss[loss=0.2439, ctc_loss=0.1171, cr_loss=0.3568, attn_decoder_loss=0.2501, over 29729.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1199, cr_loss=0.3617, attn_decoder_loss=0.2427, over 5784745.51 frames. ], batch size: 82, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:19:11,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=570900.0, ans=0.125 +2024-09-19 02:19:11,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=570900.0, ans=0.1 +2024-09-19 02:19:12,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.00 vs. limit=15.0 +2024-09-19 02:19:27,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.17 vs. limit=15.0 +2024-09-19 02:19:39,210 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.233e+01 8.392e+01 8.947e+01 9.639e+01 2.320e+02, threshold=1.789e+02, percent-clipped=2.0 +2024-09-19 02:19:45,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=570980.0, ans=0.025 +2024-09-19 02:20:06,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=571060.0, ans=0.125 +2024-09-19 02:20:10,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.29 vs. limit=15.0 +2024-09-19 02:20:16,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=571060.0, ans=0.125 +2024-09-19 02:20:23,383 INFO [train.py:1198] (0/2) Epoch 32, batch 2500, loss[loss=0.2454, ctc_loss=0.1216, cr_loss=0.3752, attn_decoder_loss=0.2509, over 29646.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1199, cr_loss=0.3619, attn_decoder_loss=0.2427, over 5794706.97 frames. ], batch size: 86, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:20:36,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=571100.0, ans=0.125 +2024-09-19 02:20:39,682 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:20:53,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=571140.0, ans=0.0 +2024-09-19 02:21:06,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=571180.0, ans=0.09899494936611666 +2024-09-19 02:21:11,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=571180.0, ans=0.125 +2024-09-19 02:21:24,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=571220.0, ans=0.125 +2024-09-19 02:21:28,533 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.23 vs. limit=15.0 +2024-09-19 02:21:41,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=571260.0, ans=0.0 +2024-09-19 02:21:44,430 INFO [train.py:1198] (0/2) Epoch 32, batch 2550, loss[loss=0.2194, ctc_loss=0.1134, cr_loss=0.3563, attn_decoder_loss=0.2233, over 29366.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3617, attn_decoder_loss=0.2424, over 5798583.78 frames. ], batch size: 67, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:22:13,999 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.51 vs. limit=15.0 +2024-09-19 02:22:17,609 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.677e+01 9.042e+01 9.632e+01 1.838e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 02:23:00,327 INFO [train.py:1198] (0/2) Epoch 32, batch 2600, loss[loss=0.2309, ctc_loss=0.1151, cr_loss=0.3733, attn_decoder_loss=0.2355, over 29448.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1197, cr_loss=0.3621, attn_decoder_loss=0.2428, over 5794317.76 frames. ], batch size: 78, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:23:00,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=571500.0, ans=0.125 +2024-09-19 02:23:06,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=571500.0, ans=0.0 +2024-09-19 02:23:11,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=571500.0, ans=0.1 +2024-09-19 02:23:21,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.68 vs. limit=15.0 +2024-09-19 02:23:28,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=571580.0, ans=0.0 +2024-09-19 02:23:53,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=571620.0, ans=0.125 +2024-09-19 02:24:11,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.44 vs. limit=15.0 +2024-09-19 02:24:15,312 INFO [train.py:1198] (0/2) Epoch 32, batch 2650, loss[loss=0.2598, ctc_loss=0.1355, cr_loss=0.3937, attn_decoder_loss=0.2649, over 29256.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.12, cr_loss=0.3624, attn_decoder_loss=0.2431, over 5800284.07 frames. ], batch size: 100, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:24:17,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=571700.0, ans=0.0 +2024-09-19 02:24:35,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=571740.0, ans=0.125 +2024-09-19 02:24:52,692 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.436e+01 8.918e+01 9.348e+01 1.627e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 02:25:00,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=571780.0, ans=0.5 +2024-09-19 02:25:03,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=571820.0, ans=0.0 +2024-09-19 02:25:12,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=571820.0, ans=0.0 +2024-09-19 02:25:18,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=571860.0, ans=0.0 +2024-09-19 02:25:21,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=571860.0, ans=0.1 +2024-09-19 02:25:26,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.03 vs. limit=10.0 +2024-09-19 02:25:34,876 INFO [train.py:1198] (0/2) Epoch 32, batch 2700, loss[loss=0.2473, ctc_loss=0.1167, cr_loss=0.3612, attn_decoder_loss=0.2537, over 29540.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1199, cr_loss=0.3621, attn_decoder_loss=0.2433, over 5796575.24 frames. ], batch size: 87, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:25:42,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=571900.0, ans=0.125 +2024-09-19 02:25:48,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=571940.0, ans=0.0 +2024-09-19 02:25:54,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=571940.0, ans=0.2 +2024-09-19 02:26:21,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=572020.0, ans=0.0 +2024-09-19 02:26:51,226 INFO [train.py:1198] (0/2) Epoch 32, batch 2750, loss[loss=0.2305, ctc_loss=0.1196, cr_loss=0.362, attn_decoder_loss=0.2348, over 29502.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1194, cr_loss=0.3611, attn_decoder_loss=0.2423, over 5796528.23 frames. ], batch size: 75, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:27:08,695 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.21 vs. limit=15.0 +2024-09-19 02:27:12,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=572140.0, ans=0.125 +2024-09-19 02:27:24,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.589e+01 9.060e+01 9.796e+01 2.270e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-19 02:27:26,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=572180.0, ans=0.125 +2024-09-19 02:27:29,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.52 vs. limit=15.0 +2024-09-19 02:27:44,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=572220.0, ans=0.0 +2024-09-19 02:28:07,088 INFO [train.py:1198] (0/2) Epoch 32, batch 2800, loss[loss=0.2482, ctc_loss=0.1333, cr_loss=0.374, attn_decoder_loss=0.2526, over 20421.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1198, cr_loss=0.3614, attn_decoder_loss=0.2426, over 5777527.12 frames. ], batch size: 210, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:28:07,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=572300.0, ans=0.2 +2024-09-19 02:28:37,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=572340.0, ans=0.125 +2024-09-19 02:28:39,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=572340.0, ans=0.0 +2024-09-19 02:28:39,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=572340.0, ans=0.125 +2024-09-19 02:28:56,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.85 vs. limit=22.5 +2024-09-19 02:28:58,116 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.29 vs. limit=5.0 +2024-09-19 02:29:13,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=572460.0, ans=0.0 +2024-09-19 02:29:18,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=572460.0, ans=0.125 +2024-09-19 02:29:26,874 INFO [train.py:1198] (0/2) Epoch 32, batch 2850, loss[loss=0.234, ctc_loss=0.1167, cr_loss=0.3515, attn_decoder_loss=0.2392, over 29526.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1204, cr_loss=0.3626, attn_decoder_loss=0.2431, over 5762170.78 frames. ], batch size: 77, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:29:31,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=572500.0, ans=0.2 +2024-09-19 02:29:43,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=572540.0, ans=0.1 +2024-09-19 02:30:00,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=572580.0, ans=0.1 +2024-09-19 02:30:01,858 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.632e+01 8.715e+01 9.222e+01 9.934e+01 2.539e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-19 02:30:15,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=572620.0, ans=0.125 +2024-09-19 02:30:20,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=572620.0, ans=0.1 +2024-09-19 02:30:32,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=572660.0, ans=0.0 +2024-09-19 02:30:33,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=572660.0, ans=0.025 +2024-09-19 02:30:39,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=572660.0, ans=0.125 +2024-09-19 02:30:42,446 INFO [train.py:1198] (0/2) Epoch 32, batch 2900, loss[loss=0.2353, ctc_loss=0.1207, cr_loss=0.3678, attn_decoder_loss=0.2399, over 29435.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.121, cr_loss=0.3643, attn_decoder_loss=0.2443, over 5787927.33 frames. ], batch size: 79, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:31:27,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.07 vs. limit=22.5 +2024-09-19 02:31:28,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=572820.0, ans=0.1 +2024-09-19 02:31:42,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=572860.0, ans=0.125 +2024-09-19 02:31:58,403 INFO [train.py:1198] (0/2) Epoch 32, batch 2950, loss[loss=0.2269, ctc_loss=0.1175, cr_loss=0.3438, attn_decoder_loss=0.2315, over 29525.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1202, cr_loss=0.3626, attn_decoder_loss=0.2428, over 5782785.14 frames. ], batch size: 75, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:31:58,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=572900.0, ans=0.125 +2024-09-19 02:32:37,782 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.544e+01 8.997e+01 9.588e+01 2.155e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-19 02:32:45,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=572980.0, ans=0.125 +2024-09-19 02:32:56,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=573020.0, ans=0.07 +2024-09-19 02:33:03,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=573060.0, ans=0.1 +2024-09-19 02:33:06,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=573060.0, ans=0.125 +2024-09-19 02:33:08,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=573060.0, ans=0.125 +2024-09-19 02:33:08,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=573060.0, ans=0.125 +2024-09-19 02:33:10,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=15.0 +2024-09-19 02:33:11,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=573060.0, ans=0.0 +2024-09-19 02:33:18,456 INFO [train.py:1198] (0/2) Epoch 32, batch 3000, loss[loss=0.2374, ctc_loss=0.1178, cr_loss=0.3647, attn_decoder_loss=0.2426, over 29766.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1201, cr_loss=0.3619, attn_decoder_loss=0.2426, over 5784061.52 frames. ], batch size: 81, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:33:18,456 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 02:33:36,936 INFO [train.py:1230] (0/2) Epoch 32, validation: loss=0.2117, ctc_loss=0.0367, cr_loss=5.626e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-19 02:33:36,937 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 02:33:41,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=573100.0, ans=0.2 +2024-09-19 02:33:59,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.12 vs. limit=15.0 +2024-09-19 02:33:59,587 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.38 vs. limit=15.0 +2024-09-19 02:34:01,777 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:34:03,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=573140.0, ans=0.1 +2024-09-19 02:34:04,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=573140.0, ans=0.1 +2024-09-19 02:34:11,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.85 vs. limit=15.0 +2024-09-19 02:34:13,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=573180.0, ans=0.0 +2024-09-19 02:34:18,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.23 vs. limit=6.0 +2024-09-19 02:34:34,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=573220.0, ans=0.125 +2024-09-19 02:34:34,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=573220.0, ans=0.125 +2024-09-19 02:34:52,857 INFO [train.py:1198] (0/2) Epoch 32, batch 3050, loss[loss=0.2254, ctc_loss=0.1135, cr_loss=0.3308, attn_decoder_loss=0.2305, over 29539.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1208, cr_loss=0.3635, attn_decoder_loss=0.2433, over 5777459.39 frames. ], batch size: 76, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:35:11,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=573340.0, ans=0.125 +2024-09-19 02:35:19,391 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.46 vs. limit=15.0 +2024-09-19 02:35:27,641 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.755e+01 9.253e+01 9.957e+01 1.667e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-19 02:35:42,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.39 vs. limit=22.5 +2024-09-19 02:35:47,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=573420.0, ans=0.2 +2024-09-19 02:35:53,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.74 vs. limit=22.5 +2024-09-19 02:36:11,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=573500.0, ans=0.125 +2024-09-19 02:36:12,286 INFO [train.py:1198] (0/2) Epoch 32, batch 3100, loss[loss=0.2542, ctc_loss=0.1323, cr_loss=0.4011, attn_decoder_loss=0.2588, over 29200.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1201, cr_loss=0.3623, attn_decoder_loss=0.2429, over 5776365.54 frames. ], batch size: 100, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:36:23,046 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:36:23,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=573500.0, ans=0.0 +2024-09-19 02:36:42,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=573580.0, ans=0.125 +2024-09-19 02:36:44,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=573580.0, ans=0.0 +2024-09-19 02:37:15,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=573660.0, ans=0.0 +2024-09-19 02:37:28,501 INFO [train.py:1198] (0/2) Epoch 32, batch 3150, loss[loss=0.2581, ctc_loss=0.1453, cr_loss=0.4177, attn_decoder_loss=0.2613, over 28789.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1199, cr_loss=0.3623, attn_decoder_loss=0.243, over 5783084.71 frames. ], batch size: 104, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:38:03,495 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.454e+01 8.918e+01 9.492e+01 5.119e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 02:38:09,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=573780.0, ans=0.1 +2024-09-19 02:38:20,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.58 vs. limit=15.0 +2024-09-19 02:38:21,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=573820.0, ans=0.0 +2024-09-19 02:38:44,610 INFO [train.py:1198] (0/2) Epoch 32, batch 3200, loss[loss=0.249, ctc_loss=0.1333, cr_loss=0.3938, attn_decoder_loss=0.2531, over 29393.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1196, cr_loss=0.3618, attn_decoder_loss=0.2427, over 5794026.96 frames. ], batch size: 79, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:38:55,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=573900.0, ans=0.0 +2024-09-19 02:39:11,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.71 vs. limit=22.5 +2024-09-19 02:39:13,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=573980.0, ans=0.1 +2024-09-19 02:39:40,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=574020.0, ans=0.0 +2024-09-19 02:39:48,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=574060.0, ans=0.125 +2024-09-19 02:39:53,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=574060.0, ans=0.2 +2024-09-19 02:40:04,582 INFO [train.py:1198] (0/2) Epoch 32, batch 3250, loss[loss=0.2432, ctc_loss=0.1202, cr_loss=0.3592, attn_decoder_loss=0.2489, over 29684.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1194, cr_loss=0.3614, attn_decoder_loss=0.2427, over 5800718.10 frames. ], batch size: 84, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:40:09,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=574100.0, ans=0.0 +2024-09-19 02:40:16,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=574100.0, ans=0.1 +2024-09-19 02:40:19,883 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:40:26,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=574140.0, ans=15.0 +2024-09-19 02:40:40,324 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.537e+01 9.027e+01 9.508e+01 1.850e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 02:40:40,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=574180.0, ans=0.125 +2024-09-19 02:40:44,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.54 vs. limit=15.0 +2024-09-19 02:40:55,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=574220.0, ans=0.125 +2024-09-19 02:41:10,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=574260.0, ans=0.125 +2024-09-19 02:41:19,689 INFO [train.py:1198] (0/2) Epoch 32, batch 3300, loss[loss=0.2456, ctc_loss=0.1139, cr_loss=0.3574, attn_decoder_loss=0.2523, over 28220.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1187, cr_loss=0.3603, attn_decoder_loss=0.2416, over 5798071.09 frames. ], batch size: 111, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:41:23,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=574300.0, ans=0.0 +2024-09-19 02:41:37,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.21 vs. limit=22.5 +2024-09-19 02:42:10,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=574420.0, ans=0.0 +2024-09-19 02:42:21,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.02 vs. limit=15.0 +2024-09-19 02:42:35,362 INFO [train.py:1198] (0/2) Epoch 32, batch 3350, loss[loss=0.2472, ctc_loss=0.127, cr_loss=0.3597, attn_decoder_loss=0.2526, over 28858.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1199, cr_loss=0.3619, attn_decoder_loss=0.2426, over 5774855.08 frames. ], batch size: 104, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:43:03,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=574540.0, ans=0.125 +2024-09-19 02:43:11,942 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.605e+01 8.988e+01 9.712e+01 2.177e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 02:43:29,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.26 vs. limit=15.0 +2024-09-19 02:43:39,205 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.54 vs. limit=15.0 +2024-09-19 02:43:46,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=574660.0, ans=0.025 +2024-09-19 02:43:55,669 INFO [train.py:1198] (0/2) Epoch 32, batch 3400, loss[loss=0.2088, ctc_loss=0.09359, cr_loss=0.3084, attn_decoder_loss=0.2148, over 29332.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1198, cr_loss=0.3616, attn_decoder_loss=0.2425, over 5767137.85 frames. ], batch size: 67, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:43:56,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.38 vs. limit=15.0 +2024-09-19 02:44:08,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.08 vs. limit=15.0 +2024-09-19 02:44:22,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=574740.0, ans=0.125 +2024-09-19 02:44:35,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=574780.0, ans=0.1 +2024-09-19 02:44:48,221 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.88 vs. limit=6.0 +2024-09-19 02:45:00,390 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.17 vs. limit=15.0 +2024-09-19 02:45:11,594 INFO [train.py:1198] (0/2) Epoch 32, batch 3450, loss[loss=0.2545, ctc_loss=0.1269, cr_loss=0.3624, attn_decoder_loss=0.2606, over 28533.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1199, cr_loss=0.3615, attn_decoder_loss=0.2427, over 5775170.89 frames. ], batch size: 112, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:45:14,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=574900.0, ans=0.125 +2024-09-19 02:45:16,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=574900.0, ans=0.125 +2024-09-19 02:45:30,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=574940.0, ans=0.0 +2024-09-19 02:45:47,947 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.523e+01 9.077e+01 9.652e+01 1.976e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 02:46:13,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=575060.0, ans=0.125 +2024-09-19 02:46:21,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.80 vs. limit=15.0 +2024-09-19 02:46:24,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=575060.0, ans=0.1 +2024-09-19 02:46:27,033 INFO [train.py:1198] (0/2) Epoch 32, batch 3500, loss[loss=0.2136, ctc_loss=0.09588, cr_loss=0.3229, attn_decoder_loss=0.2195, over 29316.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1197, cr_loss=0.3614, attn_decoder_loss=0.2421, over 5777122.96 frames. ], batch size: 71, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:46:27,371 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:46:48,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.37 vs. limit=15.0 +2024-09-19 02:46:51,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=575140.0, ans=0.2 +2024-09-19 02:47:23,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.45 vs. limit=15.0 +2024-09-19 02:47:34,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=575260.0, ans=0.1 +2024-09-19 02:47:42,065 INFO [train.py:1198] (0/2) Epoch 32, batch 3550, loss[loss=0.2422, ctc_loss=0.1162, cr_loss=0.3667, attn_decoder_loss=0.2481, over 29719.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1192, cr_loss=0.3606, attn_decoder_loss=0.242, over 5782094.82 frames. ], batch size: 89, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:47:47,192 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.33 vs. limit=6.0 +2024-09-19 02:48:03,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=575340.0, ans=0.1 +2024-09-19 02:48:19,432 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.300e+01 8.288e+01 8.961e+01 9.598e+01 1.614e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 02:48:48,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=575460.0, ans=0.125 +2024-09-19 02:48:52,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=575460.0, ans=0.125 +2024-09-19 02:48:58,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=575500.0, ans=0.125 +2024-09-19 02:49:00,121 INFO [train.py:1198] (0/2) Epoch 32, batch 3600, loss[loss=0.2216, ctc_loss=0.1079, cr_loss=0.351, attn_decoder_loss=0.2264, over 29496.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1189, cr_loss=0.3609, attn_decoder_loss=0.242, over 5791555.63 frames. ], batch size: 77, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:49:03,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=575500.0, ans=0.0 +2024-09-19 02:49:19,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=575540.0, ans=0.125 +2024-09-19 02:49:30,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=575580.0, ans=0.125 +2024-09-19 02:49:34,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=575580.0, ans=0.125 +2024-09-19 02:50:14,517 INFO [train.py:1198] (0/2) Epoch 32, batch 3650, loss[loss=0.255, ctc_loss=0.1304, cr_loss=0.3743, attn_decoder_loss=0.2605, over 29490.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1189, cr_loss=0.3612, attn_decoder_loss=0.2417, over 5793270.74 frames. ], batch size: 90, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:50:23,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=575700.0, ans=0.0 +2024-09-19 02:50:46,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=575780.0, ans=0.125 +2024-09-19 02:50:51,849 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.010e+01 8.427e+01 8.894e+01 9.403e+01 1.898e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-19 02:51:06,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=575820.0, ans=0.0 +2024-09-19 02:51:29,100 INFO [train.py:1198] (0/2) Epoch 32, batch 3700, loss[loss=0.2522, ctc_loss=0.1276, cr_loss=0.3707, attn_decoder_loss=0.2578, over 29703.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1191, cr_loss=0.3617, attn_decoder_loss=0.2421, over 5802838.59 frames. ], batch size: 84, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:51:31,440 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-19 02:51:42,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=575940.0, ans=0.125 +2024-09-19 02:52:05,314 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-144000.pt +2024-09-19 02:52:27,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=576020.0, ans=0.1 +2024-09-19 02:52:33,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=576020.0, ans=0.0 +2024-09-19 02:52:51,309 INFO [train.py:1198] (0/2) Epoch 32, batch 3750, loss[loss=0.21, ctc_loss=0.1003, cr_loss=0.3261, attn_decoder_loss=0.215, over 29290.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1193, cr_loss=0.362, attn_decoder_loss=0.2421, over 5807171.82 frames. ], batch size: 67, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:53:05,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.65 vs. limit=6.0 +2024-09-19 02:53:19,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=576180.0, ans=0.125 +2024-09-19 02:53:28,121 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.053e+01 8.581e+01 9.002e+01 9.610e+01 1.544e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 02:53:42,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=576220.0, ans=0.1 +2024-09-19 02:54:05,629 INFO [train.py:1198] (0/2) Epoch 32, batch 3800, loss[loss=0.2331, ctc_loss=0.1127, cr_loss=0.3425, attn_decoder_loss=0.2389, over 29618.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1194, cr_loss=0.3617, attn_decoder_loss=0.2418, over 5795896.76 frames. ], batch size: 86, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:54:13,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=576300.0, ans=0.125 +2024-09-19 02:54:14,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.31 vs. limit=6.0 +2024-09-19 02:54:25,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=576340.0, ans=0.07 +2024-09-19 02:54:28,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=576340.0, ans=0.95 +2024-09-19 02:54:34,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff2.min_abs, batch_count=576340.0, ans=0.1 +2024-09-19 02:54:46,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.42 vs. limit=22.5 +2024-09-19 02:54:50,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=576380.0, ans=0.125 +2024-09-19 02:54:58,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=576420.0, ans=0.2 +2024-09-19 02:55:10,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=576460.0, ans=0.0 +2024-09-19 02:55:23,185 INFO [train.py:1198] (0/2) Epoch 32, batch 3850, loss[loss=0.2576, ctc_loss=0.1345, cr_loss=0.3925, attn_decoder_loss=0.2625, over 29323.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.119, cr_loss=0.3611, attn_decoder_loss=0.2414, over 5810051.81 frames. ], batch size: 100, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:55:47,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=576540.0, ans=0.0 +2024-09-19 02:55:51,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=576580.0, ans=0.1 +2024-09-19 02:56:00,141 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.855e+01 8.481e+01 8.994e+01 9.437e+01 1.418e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 02:56:34,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=576660.0, ans=0.025 +2024-09-19 02:56:37,438 INFO [train.py:1198] (0/2) Epoch 32, batch 3900, loss[loss=0.2471, ctc_loss=0.1231, cr_loss=0.3666, attn_decoder_loss=0.2527, over 29644.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1194, cr_loss=0.3614, attn_decoder_loss=0.2419, over 5815060.67 frames. ], batch size: 86, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:56:41,374 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.97 vs. limit=15.0 +2024-09-19 02:56:45,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=576700.0, ans=10.0 +2024-09-19 02:56:49,753 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:57:22,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=576820.0, ans=0.2 +2024-09-19 02:57:52,056 INFO [train.py:1198] (0/2) Epoch 32, batch 3950, loss[loss=0.2596, ctc_loss=0.1339, cr_loss=0.403, attn_decoder_loss=0.2646, over 29494.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1196, cr_loss=0.3623, attn_decoder_loss=0.2425, over 5834987.77 frames. ], batch size: 97, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 02:57:56,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=576900.0, ans=0.1 +2024-09-19 02:57:57,370 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.54 vs. limit=22.5 +2024-09-19 02:58:05,027 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.80 vs. limit=5.0 +2024-09-19 02:58:25,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.26 vs. limit=15.0 +2024-09-19 02:58:28,775 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.521e+01 9.029e+01 9.542e+01 2.820e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-19 02:58:31,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=576980.0, ans=0.0 +2024-09-19 02:58:32,896 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.45 vs. limit=22.5 +2024-09-19 02:58:40,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=577020.0, ans=0.125 +2024-09-19 02:58:42,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=577020.0, ans=0.1 +2024-09-19 02:59:00,463 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-19 02:59:05,620 INFO [train.py:1198] (0/2) Epoch 32, batch 4000, loss[loss=0.219, ctc_loss=0.102, cr_loss=0.3152, attn_decoder_loss=0.225, over 29505.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3622, attn_decoder_loss=0.2423, over 5811754.00 frames. ], batch size: 74, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 02:59:38,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=577180.0, ans=0.125 +2024-09-19 02:59:46,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=577180.0, ans=0.125 +2024-09-19 02:59:47,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=577180.0, ans=0.2 +2024-09-19 03:00:21,893 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.21 vs. limit=10.0 +2024-09-19 03:00:22,472 INFO [train.py:1198] (0/2) Epoch 32, batch 4050, loss[loss=0.2522, ctc_loss=0.137, cr_loss=0.3817, attn_decoder_loss=0.2565, over 20682.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1197, cr_loss=0.3621, attn_decoder_loss=0.242, over 5796045.76 frames. ], batch size: 210, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:00:53,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=577380.0, ans=0.125 +2024-09-19 03:00:59,176 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.569e+01 8.593e+01 9.238e+01 9.964e+01 1.548e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-19 03:01:10,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.57 vs. limit=6.0 +2024-09-19 03:01:25,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.97 vs. limit=12.0 +2024-09-19 03:01:36,060 INFO [train.py:1198] (0/2) Epoch 32, batch 4100, loss[loss=0.2507, ctc_loss=0.1241, cr_loss=0.3772, attn_decoder_loss=0.2564, over 29539.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1195, cr_loss=0.3619, attn_decoder_loss=0.242, over 5791807.89 frames. ], batch size: 90, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:01:42,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=577500.0, ans=0.07 +2024-09-19 03:01:49,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=577540.0, ans=0.1 +2024-09-19 03:01:57,725 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.10 vs. limit=12.0 +2024-09-19 03:02:08,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=577580.0, ans=0.1 +2024-09-19 03:02:16,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=577580.0, ans=0.0 +2024-09-19 03:02:19,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=577620.0, ans=0.1 +2024-09-19 03:02:27,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-19 03:02:50,340 INFO [train.py:1198] (0/2) Epoch 32, batch 4150, loss[loss=0.2269, ctc_loss=0.1113, cr_loss=0.3386, attn_decoder_loss=0.2322, over 29517.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1192, cr_loss=0.3612, attn_decoder_loss=0.2415, over 5798028.74 frames. ], batch size: 77, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:03:03,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.90 vs. limit=22.5 +2024-09-19 03:03:08,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=577740.0, ans=0.125 +2024-09-19 03:03:11,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=577740.0, ans=0.125 +2024-09-19 03:03:12,044 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.85 vs. limit=12.0 +2024-09-19 03:03:12,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=577740.0, ans=0.09899494936611666 +2024-09-19 03:03:14,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.29 vs. limit=6.0 +2024-09-19 03:03:15,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=577740.0, ans=0.125 +2024-09-19 03:03:19,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=577780.0, ans=0.0 +2024-09-19 03:03:24,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=577780.0, ans=0.0 +2024-09-19 03:03:26,926 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.412e+01 8.911e+01 9.455e+01 1.648e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 03:03:27,381 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:03:30,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=577780.0, ans=10.0 +2024-09-19 03:03:46,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=577820.0, ans=0.125 +2024-09-19 03:03:59,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.72 vs. limit=10.0 +2024-09-19 03:04:02,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=577860.0, ans=0.125 +2024-09-19 03:04:04,960 INFO [train.py:1198] (0/2) Epoch 32, batch 4200, loss[loss=0.2514, ctc_loss=0.133, cr_loss=0.3924, attn_decoder_loss=0.2558, over 29507.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1193, cr_loss=0.361, attn_decoder_loss=0.2418, over 5799966.27 frames. ], batch size: 90, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:04:13,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=577900.0, ans=0.0 +2024-09-19 03:04:21,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=577940.0, ans=0.125 +2024-09-19 03:04:34,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=577980.0, ans=0.125 +2024-09-19 03:04:51,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=578020.0, ans=0.125 +2024-09-19 03:04:59,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=578020.0, ans=0.025 +2024-09-19 03:05:06,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=578060.0, ans=0.1 +2024-09-19 03:05:10,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=578060.0, ans=0.025 +2024-09-19 03:05:19,408 INFO [train.py:1198] (0/2) Epoch 32, batch 4250, loss[loss=0.2108, ctc_loss=0.09751, cr_loss=0.3212, attn_decoder_loss=0.2162, over 29527.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1192, cr_loss=0.3609, attn_decoder_loss=0.2422, over 5805296.48 frames. ], batch size: 74, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:05:19,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=578100.0, ans=0.125 +2024-09-19 03:05:28,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=578100.0, ans=0.0 +2024-09-19 03:05:30,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=578100.0, ans=0.2 +2024-09-19 03:05:39,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.22 vs. limit=15.0 +2024-09-19 03:05:57,462 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.485e+01 9.060e+01 9.670e+01 1.862e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 03:06:08,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.63 vs. limit=22.5 +2024-09-19 03:06:13,239 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.94 vs. limit=6.0 +2024-09-19 03:06:33,361 INFO [train.py:1198] (0/2) Epoch 32, batch 4300, loss[loss=0.2497, ctc_loss=0.1253, cr_loss=0.3731, attn_decoder_loss=0.2553, over 29495.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1192, cr_loss=0.361, attn_decoder_loss=0.2425, over 5794684.64 frames. ], batch size: 87, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:06:41,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=578300.0, ans=0.125 +2024-09-19 03:06:53,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=578340.0, ans=0.035 +2024-09-19 03:07:01,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=578340.0, ans=0.125 +2024-09-19 03:07:47,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=578460.0, ans=0.125 +2024-09-19 03:07:48,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=578500.0, ans=0.2 +2024-09-19 03:07:50,081 INFO [train.py:1198] (0/2) Epoch 32, batch 4350, loss[loss=0.2521, ctc_loss=0.1357, cr_loss=0.3939, attn_decoder_loss=0.2562, over 29485.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1216, cr_loss=0.3659, attn_decoder_loss=0.2455, over 5796735.53 frames. ], batch size: 97, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:07:51,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=578500.0, ans=0.125 +2024-09-19 03:08:14,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=578540.0, ans=0.125 +2024-09-19 03:08:18,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=578580.0, ans=0.125 +2024-09-19 03:08:28,241 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.091e+01 8.949e+01 9.418e+01 9.976e+01 1.682e+02, threshold=1.884e+02, percent-clipped=0.0 +2024-09-19 03:08:35,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=578620.0, ans=0.125 +2024-09-19 03:08:53,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=578660.0, ans=0.125 +2024-09-19 03:09:03,605 INFO [train.py:1198] (0/2) Epoch 32, batch 4400, loss[loss=0.2513, ctc_loss=0.1311, cr_loss=0.3746, attn_decoder_loss=0.2564, over 27372.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.123, cr_loss=0.3683, attn_decoder_loss=0.2475, over 5768591.09 frames. ], batch size: 124, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:09:20,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=578740.0, ans=0.1 +2024-09-19 03:09:54,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=578820.0, ans=0.125 +2024-09-19 03:09:55,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=578820.0, ans=0.125 +2024-09-19 03:10:04,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.99 vs. limit=6.0 +2024-09-19 03:10:12,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=578860.0, ans=0.025 +2024-09-19 03:10:18,565 INFO [train.py:1198] (0/2) Epoch 32, batch 4450, loss[loss=0.2591, ctc_loss=0.1508, cr_loss=0.388, attn_decoder_loss=0.2625, over 19486.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1269, cr_loss=0.3742, attn_decoder_loss=0.2497, over 5582169.92 frames. ], batch size: 209, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:10:19,096 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:10:38,833 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.02 vs. limit=15.0 +2024-09-19 03:10:41,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=578940.0, ans=0.025 +2024-09-19 03:10:55,393 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.52 vs. limit=6.0 +2024-09-19 03:10:58,946 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.256e+01 9.217e+01 9.990e+01 1.147e+02 3.633e+02, threshold=1.998e+02, percent-clipped=4.0 +2024-09-19 03:11:04,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=579020.0, ans=0.125 +2024-09-19 03:11:08,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=579020.0, ans=0.2 +2024-09-19 03:11:17,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=579060.0, ans=10.0 +2024-09-19 03:11:34,149 INFO [train.py:1198] (0/2) Epoch 32, batch 4500, loss[loss=0.2538, ctc_loss=0.1486, cr_loss=0.3782, attn_decoder_loss=0.2571, over 20100.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1306, cr_loss=0.3769, attn_decoder_loss=0.2518, over 5243910.12 frames. ], batch size: 209, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:11:49,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=579140.0, ans=0.0 +2024-09-19 03:11:58,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=579140.0, ans=0.2 +2024-09-19 03:12:00,489 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.88 vs. limit=15.0 +2024-09-19 03:12:11,671 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-32.pt +2024-09-19 03:13:03,888 INFO [train.py:1198] (0/2) Epoch 33, batch 0, loss[loss=0.2071, ctc_loss=0.08985, cr_loss=0.2948, attn_decoder_loss=0.2135, over 29641.00 frames. ], tot_loss[loss=0.2071, ctc_loss=0.08985, cr_loss=0.2948, attn_decoder_loss=0.2135, over 29641.00 frames. ], batch size: 73, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:13:03,888 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 03:13:20,825 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.0481, 3.6285, 3.9033, 3.5478], device='cuda:0') +2024-09-19 03:13:22,384 INFO [train.py:1230] (0/2) Epoch 33, validation: loss=0.2131, ctc_loss=0.03625, cr_loss=6.2e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-19 03:13:22,385 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 03:13:24,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=579200.0, ans=0.125 +2024-09-19 03:13:27,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=579200.0, ans=0.0 +2024-09-19 03:13:43,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=579240.0, ans=0.125 +2024-09-19 03:14:08,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=579320.0, ans=0.0 +2024-09-19 03:14:22,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=579360.0, ans=0.125 +2024-09-19 03:14:26,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=579360.0, ans=0.125 +2024-09-19 03:14:29,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.82 vs. limit=5.0 +2024-09-19 03:14:33,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=579360.0, ans=0.025 +2024-09-19 03:14:38,708 INFO [train.py:1198] (0/2) Epoch 33, batch 50, loss[loss=0.209, ctc_loss=0.09614, cr_loss=0.3074, attn_decoder_loss=0.2147, over 29467.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1208, cr_loss=0.3655, attn_decoder_loss=0.2427, over 1268171.71 frames. ], batch size: 70, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:14:43,365 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 9.277e+01 1.031e+02 1.119e+02 2.001e+02, threshold=2.062e+02, percent-clipped=1.0 +2024-09-19 03:15:06,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=579440.0, ans=0.0 +2024-09-19 03:15:09,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=579480.0, ans=0.025 +2024-09-19 03:15:32,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=579520.0, ans=0.1 +2024-09-19 03:15:38,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=579560.0, ans=0.025 +2024-09-19 03:15:54,960 INFO [train.py:1198] (0/2) Epoch 33, batch 100, loss[loss=0.2286, ctc_loss=0.1135, cr_loss=0.3475, attn_decoder_loss=0.2337, over 29528.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1226, cr_loss=0.3697, attn_decoder_loss=0.2452, over 2252997.21 frames. ], batch size: 76, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:15:55,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=579600.0, ans=0.0 +2024-09-19 03:16:02,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.93 vs. limit=15.0 +2024-09-19 03:16:07,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.24 vs. limit=6.0 +2024-09-19 03:16:13,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=579640.0, ans=0.2 +2024-09-19 03:16:24,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=579640.0, ans=0.2 +2024-09-19 03:16:24,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=579640.0, ans=0.125 +2024-09-19 03:16:43,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.17 vs. limit=15.0 +2024-09-19 03:16:48,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=579720.0, ans=0.125 +2024-09-19 03:17:03,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.49 vs. limit=6.0 +2024-09-19 03:17:10,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=579800.0, ans=0.035 +2024-09-19 03:17:11,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.23 vs. limit=6.0 +2024-09-19 03:17:11,883 INFO [train.py:1198] (0/2) Epoch 33, batch 150, loss[loss=0.2195, ctc_loss=0.1079, cr_loss=0.3584, attn_decoder_loss=0.2239, over 29422.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1204, cr_loss=0.365, attn_decoder_loss=0.2431, over 3047719.18 frames. ], batch size: 70, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:17:12,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=579800.0, ans=0.1 +2024-09-19 03:17:16,295 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.420e+01 8.477e+01 8.945e+01 9.593e+01 9.750e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-19 03:17:22,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=579800.0, ans=0.0 +2024-09-19 03:17:33,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=12.0 +2024-09-19 03:17:36,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=579840.0, ans=0.125 +2024-09-19 03:17:42,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=12.0 +2024-09-19 03:17:43,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=579880.0, ans=0.125 +2024-09-19 03:17:56,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=579920.0, ans=0.2 +2024-09-19 03:18:07,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=579920.0, ans=0.05 +2024-09-19 03:18:16,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=579960.0, ans=0.1 +2024-09-19 03:18:23,643 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:18:28,277 INFO [train.py:1198] (0/2) Epoch 33, batch 200, loss[loss=0.238, ctc_loss=0.1236, cr_loss=0.3649, attn_decoder_loss=0.2426, over 27293.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1197, cr_loss=0.3637, attn_decoder_loss=0.2422, over 3660162.64 frames. ], batch size: 124, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:19:16,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=580120.0, ans=0.125 +2024-09-19 03:19:20,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.82 vs. limit=15.0 +2024-09-19 03:19:30,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=580160.0, ans=0.025 +2024-09-19 03:19:32,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.61 vs. limit=22.5 +2024-09-19 03:19:43,965 INFO [train.py:1198] (0/2) Epoch 33, batch 250, loss[loss=0.2495, ctc_loss=0.1268, cr_loss=0.3698, attn_decoder_loss=0.2549, over 29203.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1191, cr_loss=0.362, attn_decoder_loss=0.2423, over 4142921.87 frames. ], batch size: 100, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:19:48,586 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.257e+01 8.698e+01 9.269e+01 2.011e+02, threshold=1.740e+02, percent-clipped=1.0 +2024-09-19 03:19:48,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=580200.0, ans=0.0 +2024-09-19 03:19:59,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=580240.0, ans=0.1 +2024-09-19 03:20:15,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=580280.0, ans=0.1 +2024-09-19 03:20:18,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=580280.0, ans=0.0 +2024-09-19 03:20:18,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=580280.0, ans=0.1 +2024-09-19 03:20:35,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=580320.0, ans=0.125 +2024-09-19 03:20:39,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.64 vs. limit=22.5 +2024-09-19 03:20:54,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=580360.0, ans=0.0 +2024-09-19 03:21:02,353 INFO [train.py:1198] (0/2) Epoch 33, batch 300, loss[loss=0.2415, ctc_loss=0.1219, cr_loss=0.3713, attn_decoder_loss=0.2466, over 29535.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1187, cr_loss=0.3603, attn_decoder_loss=0.2418, over 4510469.32 frames. ], batch size: 92, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:21:25,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=580440.0, ans=0.07 +2024-09-19 03:21:28,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=580440.0, ans=0.0 +2024-09-19 03:21:42,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=580480.0, ans=0.0 +2024-09-19 03:21:58,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.17 vs. limit=15.0 +2024-09-19 03:22:16,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=580560.0, ans=0.05 +2024-09-19 03:22:20,290 INFO [train.py:1198] (0/2) Epoch 33, batch 350, loss[loss=0.2065, ctc_loss=0.09489, cr_loss=0.3116, attn_decoder_loss=0.212, over 29325.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1188, cr_loss=0.3603, attn_decoder_loss=0.2418, over 4795854.79 frames. ], batch size: 71, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:22:24,713 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.206e+01 8.463e+01 8.888e+01 9.398e+01 1.588e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 03:22:40,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.70 vs. limit=22.5 +2024-09-19 03:22:51,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=580680.0, ans=0.2 +2024-09-19 03:22:51,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.37 vs. limit=12.0 +2024-09-19 03:23:26,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.56 vs. limit=15.0 +2024-09-19 03:23:36,311 INFO [train.py:1198] (0/2) Epoch 33, batch 400, loss[loss=0.2359, ctc_loss=0.1151, cr_loss=0.3539, attn_decoder_loss=0.2414, over 29712.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1186, cr_loss=0.3605, attn_decoder_loss=0.2416, over 5025143.34 frames. ], batch size: 82, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:23:43,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.02 vs. limit=15.0 +2024-09-19 03:23:49,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=580800.0, ans=0.2 +2024-09-19 03:24:05,566 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:24:15,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=580880.0, ans=0.2 +2024-09-19 03:24:22,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=580920.0, ans=0.125 +2024-09-19 03:24:38,830 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.86 vs. limit=10.0 +2024-09-19 03:24:54,731 INFO [train.py:1198] (0/2) Epoch 33, batch 450, loss[loss=0.2506, ctc_loss=0.1302, cr_loss=0.3757, attn_decoder_loss=0.2556, over 29700.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.119, cr_loss=0.3614, attn_decoder_loss=0.2418, over 5187179.12 frames. ], batch size: 83, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:24:55,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=581000.0, ans=0.1 +2024-09-19 03:25:00,690 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.321e+01 8.447e+01 9.007e+01 9.616e+01 1.601e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 03:25:04,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=581000.0, ans=0.025 +2024-09-19 03:25:11,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=581040.0, ans=0.1 +2024-09-19 03:25:16,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=581040.0, ans=0.05 +2024-09-19 03:25:22,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=581040.0, ans=0.125 +2024-09-19 03:25:39,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=581080.0, ans=0.125 +2024-09-19 03:25:49,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=581120.0, ans=0.125 +2024-09-19 03:25:59,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=581160.0, ans=0.125 +2024-09-19 03:26:12,853 INFO [train.py:1198] (0/2) Epoch 33, batch 500, loss[loss=0.2599, ctc_loss=0.1437, cr_loss=0.4232, attn_decoder_loss=0.2635, over 29448.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1189, cr_loss=0.361, attn_decoder_loss=0.2413, over 5330199.58 frames. ], batch size: 94, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:26:17,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=581200.0, ans=0.07 +2024-09-19 03:26:36,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.71 vs. limit=8.0 +2024-09-19 03:26:37,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=581240.0, ans=0.0 +2024-09-19 03:26:38,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=581240.0, ans=0.0 +2024-09-19 03:26:48,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.05 vs. limit=15.0 +2024-09-19 03:26:57,253 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:27:12,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=581360.0, ans=0.125 +2024-09-19 03:27:12,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.73 vs. limit=15.0 +2024-09-19 03:27:15,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=581360.0, ans=0.0 +2024-09-19 03:27:21,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=581360.0, ans=0.0 +2024-09-19 03:27:28,653 INFO [train.py:1198] (0/2) Epoch 33, batch 550, loss[loss=0.2496, ctc_loss=0.117, cr_loss=0.3739, attn_decoder_loss=0.256, over 28884.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1189, cr_loss=0.3614, attn_decoder_loss=0.2415, over 5424127.96 frames. ], batch size: 104, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:27:34,831 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.333e+01 9.017e+01 9.436e+01 4.024e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 03:28:06,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=581480.0, ans=0.025 +2024-09-19 03:28:15,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=581520.0, ans=0.125 +2024-09-19 03:28:18,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=581520.0, ans=0.0 +2024-09-19 03:28:18,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.42 vs. limit=15.0 +2024-09-19 03:28:29,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.93 vs. limit=15.0 +2024-09-19 03:28:30,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=581560.0, ans=0.1 +2024-09-19 03:28:30,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=581560.0, ans=0.0 +2024-09-19 03:28:47,669 INFO [train.py:1198] (0/2) Epoch 33, batch 600, loss[loss=0.2534, ctc_loss=0.1294, cr_loss=0.3821, attn_decoder_loss=0.2587, over 29264.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.119, cr_loss=0.3616, attn_decoder_loss=0.2417, over 5511289.67 frames. ], batch size: 100, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:28:54,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=581600.0, ans=0.125 +2024-09-19 03:30:03,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.64 vs. limit=15.0 +2024-09-19 03:30:05,159 INFO [train.py:1198] (0/2) Epoch 33, batch 650, loss[loss=0.2336, ctc_loss=0.1129, cr_loss=0.3552, attn_decoder_loss=0.2391, over 29784.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.241, over 5588646.29 frames. ], batch size: 81, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:30:11,215 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.577e+01 8.986e+01 9.488e+01 1.360e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 03:30:31,408 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:30:31,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=581840.0, ans=0.125 +2024-09-19 03:30:39,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.27 vs. limit=15.0 +2024-09-19 03:30:54,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=581920.0, ans=0.07 +2024-09-19 03:30:55,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=581920.0, ans=0.0 +2024-09-19 03:31:07,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.97 vs. limit=15.0 +2024-09-19 03:31:07,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=581960.0, ans=0.1 +2024-09-19 03:31:21,262 INFO [train.py:1198] (0/2) Epoch 33, batch 700, loss[loss=0.2336, ctc_loss=0.1216, cr_loss=0.3731, attn_decoder_loss=0.2378, over 29545.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1185, cr_loss=0.3602, attn_decoder_loss=0.2415, over 5639275.00 frames. ], batch size: 76, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:31:33,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=582000.0, ans=0.125 +2024-09-19 03:31:42,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=582040.0, ans=0.025 +2024-09-19 03:31:51,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.01 vs. limit=12.0 +2024-09-19 03:32:26,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.31 vs. limit=15.0 +2024-09-19 03:32:35,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=582160.0, ans=0.1 +2024-09-19 03:32:39,398 INFO [train.py:1198] (0/2) Epoch 33, batch 750, loss[loss=0.2467, ctc_loss=0.1261, cr_loss=0.3831, attn_decoder_loss=0.2516, over 29709.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2412, over 5676936.62 frames. ], batch size: 82, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:32:46,699 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.188e+01 8.541e+01 8.897e+01 9.394e+01 1.704e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-19 03:32:50,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=582200.0, ans=0.0 +2024-09-19 03:33:00,104 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.06 vs. limit=6.0 +2024-09-19 03:33:02,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=582240.0, ans=0.125 +2024-09-19 03:33:15,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=582280.0, ans=0.125 +2024-09-19 03:33:30,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=582320.0, ans=0.125 +2024-09-19 03:33:51,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=582360.0, ans=0.125 +2024-09-19 03:33:57,740 INFO [train.py:1198] (0/2) Epoch 33, batch 800, loss[loss=0.2226, ctc_loss=0.1074, cr_loss=0.3449, attn_decoder_loss=0.2277, over 29642.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1185, cr_loss=0.3601, attn_decoder_loss=0.2412, over 5708018.36 frames. ], batch size: 73, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:34:20,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582440.0, ans=0.1 +2024-09-19 03:35:06,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582560.0, ans=0.1 +2024-09-19 03:35:13,253 INFO [train.py:1198] (0/2) Epoch 33, batch 850, loss[loss=0.2436, ctc_loss=0.1232, cr_loss=0.3444, attn_decoder_loss=0.2493, over 29692.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1178, cr_loss=0.3587, attn_decoder_loss=0.2407, over 5736735.16 frames. ], batch size: 89, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:35:14,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=582600.0, ans=0.05 +2024-09-19 03:35:20,667 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.452e+01 8.956e+01 9.635e+01 2.624e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-19 03:35:22,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=582600.0, ans=0.0 +2024-09-19 03:35:26,216 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-19 03:35:28,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=582640.0, ans=0.0 +2024-09-19 03:36:11,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=582720.0, ans=0.125 +2024-09-19 03:36:20,269 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.71 vs. limit=15.0 +2024-09-19 03:36:23,049 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.79 vs. limit=15.0 +2024-09-19 03:36:31,365 INFO [train.py:1198] (0/2) Epoch 33, batch 900, loss[loss=0.2148, ctc_loss=0.1005, cr_loss=0.3266, attn_decoder_loss=0.2202, over 29642.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1185, cr_loss=0.3595, attn_decoder_loss=0.2414, over 5741792.23 frames. ], batch size: 73, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:36:36,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=582800.0, ans=0.125 +2024-09-19 03:36:48,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=582840.0, ans=0.125 +2024-09-19 03:36:49,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=582840.0, ans=0.125 +2024-09-19 03:37:04,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582880.0, ans=0.1 +2024-09-19 03:37:06,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=582880.0, ans=0.0 +2024-09-19 03:37:07,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=582880.0, ans=0.1 +2024-09-19 03:37:34,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=582960.0, ans=0.5 +2024-09-19 03:37:37,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.54 vs. limit=15.0 +2024-09-19 03:37:40,919 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.10 vs. limit=15.0 +2024-09-19 03:37:48,801 INFO [train.py:1198] (0/2) Epoch 33, batch 950, loss[loss=0.224, ctc_loss=0.1061, cr_loss=0.3466, attn_decoder_loss=0.2294, over 29503.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1185, cr_loss=0.36, attn_decoder_loss=0.2417, over 5741539.66 frames. ], batch size: 74, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:37:55,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=583000.0, ans=0.0 +2024-09-19 03:37:56,284 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.465e+01 9.060e+01 1.004e+02 2.208e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 03:37:58,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=583000.0, ans=0.125 +2024-09-19 03:37:58,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.74 vs. limit=15.0 +2024-09-19 03:38:02,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=583040.0, ans=0.0 +2024-09-19 03:38:37,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=583120.0, ans=0.0 +2024-09-19 03:38:48,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.06 vs. limit=15.0 +2024-09-19 03:39:04,622 INFO [train.py:1198] (0/2) Epoch 33, batch 1000, loss[loss=0.2356, ctc_loss=0.1224, cr_loss=0.401, attn_decoder_loss=0.2393, over 29533.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1195, cr_loss=0.3624, attn_decoder_loss=0.2424, over 5735557.99 frames. ], batch size: 77, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:39:19,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.25 vs. limit=15.0 +2024-09-19 03:39:38,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=583280.0, ans=0.5 +2024-09-19 03:39:40,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=583280.0, ans=0.0 +2024-09-19 03:39:49,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=583320.0, ans=0.125 +2024-09-19 03:40:13,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=583360.0, ans=0.125 +2024-09-19 03:40:15,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=583360.0, ans=0.0 +2024-09-19 03:40:22,705 INFO [train.py:1198] (0/2) Epoch 33, batch 1050, loss[loss=0.2342, ctc_loss=0.1115, cr_loss=0.3621, attn_decoder_loss=0.2398, over 29698.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1189, cr_loss=0.3612, attn_decoder_loss=0.2416, over 5745125.72 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:40:33,202 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.604e+01 9.076e+01 9.577e+01 3.537e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 03:40:45,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=583440.0, ans=0.2 +2024-09-19 03:40:47,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=583440.0, ans=0.07 +2024-09-19 03:41:25,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=583560.0, ans=0.0 +2024-09-19 03:41:40,567 INFO [train.py:1198] (0/2) Epoch 33, batch 1100, loss[loss=0.2311, ctc_loss=0.1187, cr_loss=0.3674, attn_decoder_loss=0.2355, over 29421.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1185, cr_loss=0.3606, attn_decoder_loss=0.2411, over 5758048.79 frames. ], batch size: 78, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:41:57,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=583640.0, ans=0.125 +2024-09-19 03:42:03,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=583640.0, ans=0.0 +2024-09-19 03:42:11,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=583680.0, ans=0.0 +2024-09-19 03:42:12,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=583680.0, ans=0.0 +2024-09-19 03:42:18,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.08 vs. limit=15.0 +2024-09-19 03:42:18,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=583680.0, ans=0.0 +2024-09-19 03:42:23,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=583680.0, ans=0.0 +2024-09-19 03:42:23,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=583680.0, ans=0.125 +2024-09-19 03:42:31,304 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.97 vs. limit=15.0 +2024-09-19 03:42:33,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=583720.0, ans=0.0 +2024-09-19 03:42:56,359 INFO [train.py:1198] (0/2) Epoch 33, batch 1150, loss[loss=0.2165, ctc_loss=0.1039, cr_loss=0.3317, attn_decoder_loss=0.2216, over 29445.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1187, cr_loss=0.3608, attn_decoder_loss=0.2414, over 5754239.77 frames. ], batch size: 78, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:43:01,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=583800.0, ans=0.025 +2024-09-19 03:43:04,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=583800.0, ans=0.0 +2024-09-19 03:43:06,974 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.330e+01 8.417e+01 8.891e+01 9.458e+01 2.719e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 03:43:12,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-19 03:43:13,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.50 vs. limit=15.0 +2024-09-19 03:43:36,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=583880.0, ans=0.1 +2024-09-19 03:43:40,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=583920.0, ans=10.0 +2024-09-19 03:43:45,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=583920.0, ans=0.1 +2024-09-19 03:43:53,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=583920.0, ans=15.0 +2024-09-19 03:44:14,594 INFO [train.py:1198] (0/2) Epoch 33, batch 1200, loss[loss=0.2362, ctc_loss=0.1144, cr_loss=0.3549, attn_decoder_loss=0.2418, over 29689.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1192, cr_loss=0.3613, attn_decoder_loss=0.2422, over 5746328.68 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:44:31,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=584040.0, ans=0.015 +2024-09-19 03:44:36,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=584040.0, ans=0.2 +2024-09-19 03:44:57,296 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.34 vs. limit=15.0 +2024-09-19 03:45:02,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=584120.0, ans=0.0 +2024-09-19 03:45:08,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=584120.0, ans=0.0 +2024-09-19 03:45:14,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=584120.0, ans=0.0 +2024-09-19 03:45:32,636 INFO [train.py:1198] (0/2) Epoch 33, batch 1250, loss[loss=0.2519, ctc_loss=0.1299, cr_loss=0.3888, attn_decoder_loss=0.2568, over 29552.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1191, cr_loss=0.3616, attn_decoder_loss=0.2427, over 5774446.47 frames. ], batch size: 92, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:45:43,422 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.568e+01 9.117e+01 9.876e+01 2.169e+02, threshold=1.823e+02, percent-clipped=3.0 +2024-09-19 03:45:50,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=584240.0, ans=0.1 +2024-09-19 03:45:56,582 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.19 vs. limit=22.5 +2024-09-19 03:46:15,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=584280.0, ans=0.125 +2024-09-19 03:46:26,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=584320.0, ans=0.125 +2024-09-19 03:46:34,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=584360.0, ans=0.2 +2024-09-19 03:46:35,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=584360.0, ans=0.0 +2024-09-19 03:46:48,683 INFO [train.py:1198] (0/2) Epoch 33, batch 1300, loss[loss=0.2397, ctc_loss=0.1114, cr_loss=0.3362, attn_decoder_loss=0.2465, over 28226.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1186, cr_loss=0.3599, attn_decoder_loss=0.2419, over 5779246.89 frames. ], batch size: 111, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:46:50,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=584400.0, ans=0.0 +2024-09-19 03:47:16,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=584440.0, ans=0.125 +2024-09-19 03:47:28,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=584480.0, ans=0.125 +2024-09-19 03:47:33,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=584520.0, ans=0.2 +2024-09-19 03:47:39,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=584520.0, ans=0.2 +2024-09-19 03:47:57,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.90 vs. limit=22.5 +2024-09-19 03:48:03,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=584600.0, ans=0.05 +2024-09-19 03:48:04,740 INFO [train.py:1198] (0/2) Epoch 33, batch 1350, loss[loss=0.2411, ctc_loss=0.1239, cr_loss=0.3822, attn_decoder_loss=0.2456, over 29767.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2415, over 5795666.87 frames. ], batch size: 81, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:48:06,565 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:48:17,536 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.274e+01 8.749e+01 9.320e+01 1.394e+02, threshold=1.750e+02, percent-clipped=0.0 +2024-09-19 03:48:17,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=584600.0, ans=0.125 +2024-09-19 03:48:17,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=584600.0, ans=0.025 +2024-09-19 03:49:04,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=584720.0, ans=0.125 +2024-09-19 03:49:17,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=584760.0, ans=0.125 +2024-09-19 03:49:24,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.04 vs. limit=12.0 +2024-09-19 03:49:24,983 INFO [train.py:1198] (0/2) Epoch 33, batch 1400, loss[loss=0.2102, ctc_loss=0.09253, cr_loss=0.2917, attn_decoder_loss=0.2168, over 29593.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.118, cr_loss=0.3591, attn_decoder_loss=0.2414, over 5806627.12 frames. ], batch size: 69, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:49:25,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.60 vs. limit=15.0 +2024-09-19 03:49:43,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.82 vs. limit=22.5 +2024-09-19 03:50:12,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=584920.0, ans=0.1 +2024-09-19 03:50:25,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=584960.0, ans=0.125 +2024-09-19 03:50:25,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=584960.0, ans=0.125 +2024-09-19 03:50:28,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=584960.0, ans=0.2 +2024-09-19 03:50:34,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=584960.0, ans=0.07 +2024-09-19 03:50:40,323 INFO [train.py:1198] (0/2) Epoch 33, batch 1450, loss[loss=0.2546, ctc_loss=0.1296, cr_loss=0.3929, attn_decoder_loss=0.2598, over 29424.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1181, cr_loss=0.3596, attn_decoder_loss=0.2418, over 5802721.15 frames. ], batch size: 94, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:50:41,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.80 vs. limit=15.0 +2024-09-19 03:50:50,834 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.392e+01 8.954e+01 9.384e+01 1.541e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 03:50:55,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=7.95 vs. limit=22.5 +2024-09-19 03:51:13,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=585080.0, ans=0.125 +2024-09-19 03:51:16,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=585080.0, ans=0.0 +2024-09-19 03:51:25,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=585120.0, ans=0.125 +2024-09-19 03:51:26,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.94 vs. limit=15.0 +2024-09-19 03:51:42,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.86 vs. limit=15.0 +2024-09-19 03:51:55,822 INFO [train.py:1198] (0/2) Epoch 33, batch 1500, loss[loss=0.2426, ctc_loss=0.1193, cr_loss=0.3513, attn_decoder_loss=0.2484, over 29637.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1188, cr_loss=0.3606, attn_decoder_loss=0.2423, over 5802458.67 frames. ], batch size: 86, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:52:18,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=585240.0, ans=0.025 +2024-09-19 03:52:32,370 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.91 vs. limit=12.0 +2024-09-19 03:52:33,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=585280.0, ans=0.125 +2024-09-19 03:52:38,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=585280.0, ans=0.125 +2024-09-19 03:52:57,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=585320.0, ans=0.125 +2024-09-19 03:53:07,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=585360.0, ans=0.0 +2024-09-19 03:53:07,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=585360.0, ans=0.125 +2024-09-19 03:53:16,551 INFO [train.py:1198] (0/2) Epoch 33, batch 1550, loss[loss=0.2602, ctc_loss=0.1372, cr_loss=0.3985, attn_decoder_loss=0.265, over 29527.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.119, cr_loss=0.361, attn_decoder_loss=0.2423, over 5778831.96 frames. ], batch size: 90, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:53:20,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=585400.0, ans=0.0 +2024-09-19 03:53:28,679 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.266e+01 8.624e+01 9.001e+01 9.537e+01 4.675e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 03:53:52,176 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.50 vs. limit=15.0 +2024-09-19 03:54:07,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=585520.0, ans=0.125 +2024-09-19 03:54:15,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=585560.0, ans=0.125 +2024-09-19 03:54:19,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=585560.0, ans=0.0 +2024-09-19 03:54:19,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.70 vs. limit=15.0 +2024-09-19 03:54:32,530 INFO [train.py:1198] (0/2) Epoch 33, batch 1600, loss[loss=0.2425, ctc_loss=0.1202, cr_loss=0.3664, attn_decoder_loss=0.2479, over 29679.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1189, cr_loss=0.3608, attn_decoder_loss=0.2421, over 5761979.15 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:54:38,093 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.61 vs. limit=10.0 +2024-09-19 03:54:41,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=585600.0, ans=0.2 +2024-09-19 03:54:48,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=585640.0, ans=0.125 +2024-09-19 03:54:54,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=585640.0, ans=0.025 +2024-09-19 03:55:06,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=585680.0, ans=0.1 +2024-09-19 03:55:16,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=585720.0, ans=0.1 +2024-09-19 03:55:33,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=585760.0, ans=0.2 +2024-09-19 03:55:48,321 INFO [train.py:1198] (0/2) Epoch 33, batch 1650, loss[loss=0.2467, ctc_loss=0.1214, cr_loss=0.3819, attn_decoder_loss=0.2521, over 29714.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1188, cr_loss=0.3605, attn_decoder_loss=0.2419, over 5756719.03 frames. ], batch size: 89, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:56:02,791 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.668e+01 9.020e+01 9.711e+01 1.996e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-19 03:56:06,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=585840.0, ans=0.0 +2024-09-19 03:56:13,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=585840.0, ans=0.0 +2024-09-19 03:56:20,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.85 vs. limit=15.0 +2024-09-19 03:56:22,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=585880.0, ans=0.025 +2024-09-19 03:56:46,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=585920.0, ans=0.2 +2024-09-19 03:56:57,735 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.51 vs. limit=15.0 +2024-09-19 03:57:08,350 INFO [train.py:1198] (0/2) Epoch 33, batch 1700, loss[loss=0.2121, ctc_loss=0.109, cr_loss=0.341, attn_decoder_loss=0.216, over 29578.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1184, cr_loss=0.36, attn_decoder_loss=0.2417, over 5779375.60 frames. ], batch size: 69, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:57:08,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=586000.0, ans=0.2 +2024-09-19 03:57:12,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.84 vs. limit=22.5 +2024-09-19 03:57:25,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=586040.0, ans=0.2 +2024-09-19 03:57:26,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=586040.0, ans=0.1 +2024-09-19 03:57:44,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=586080.0, ans=0.125 +2024-09-19 03:57:46,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=586080.0, ans=0.2 +2024-09-19 03:57:56,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=586120.0, ans=0.125 +2024-09-19 03:57:58,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=586120.0, ans=0.0 +2024-09-19 03:58:01,919 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.27 vs. limit=15.0 +2024-09-19 03:58:06,899 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.73 vs. limit=15.0 +2024-09-19 03:58:18,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=586160.0, ans=0.0 +2024-09-19 03:58:23,871 INFO [train.py:1198] (0/2) Epoch 33, batch 1750, loss[loss=0.2147, ctc_loss=0.1024, cr_loss=0.3324, attn_decoder_loss=0.2197, over 29346.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3593, attn_decoder_loss=0.2414, over 5788018.48 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 03:58:37,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.205e+01 8.452e+01 8.998e+01 9.448e+01 1.573e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 03:58:56,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.92 vs. limit=15.0 +2024-09-19 03:59:07,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=586280.0, ans=0.0 +2024-09-19 03:59:11,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=586320.0, ans=0.025 +2024-09-19 03:59:12,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.52 vs. limit=22.5 +2024-09-19 03:59:40,303 INFO [train.py:1198] (0/2) Epoch 33, batch 1800, loss[loss=0.2463, ctc_loss=0.1341, cr_loss=0.4142, attn_decoder_loss=0.2496, over 29704.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1178, cr_loss=0.3589, attn_decoder_loss=0.2411, over 5791289.25 frames. ], batch size: 83, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 03:59:45,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.33 vs. limit=15.0 +2024-09-19 03:59:47,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.50 vs. limit=22.5 +2024-09-19 04:00:40,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=586520.0, ans=0.125 +2024-09-19 04:00:43,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=586560.0, ans=0.0 +2024-09-19 04:00:59,188 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:01:00,455 INFO [train.py:1198] (0/2) Epoch 33, batch 1850, loss[loss=0.2435, ctc_loss=0.1184, cr_loss=0.3487, attn_decoder_loss=0.2496, over 29661.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1177, cr_loss=0.3585, attn_decoder_loss=0.2408, over 5796561.01 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:01:00,882 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:01:02,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.72 vs. limit=15.0 +2024-09-19 04:01:03,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=586600.0, ans=0.125 +2024-09-19 04:01:13,842 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.226e+01 8.429e+01 8.891e+01 9.502e+01 1.976e+02, threshold=1.778e+02, percent-clipped=1.0 +2024-09-19 04:01:54,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=586720.0, ans=0.125 +2024-09-19 04:01:59,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=586760.0, ans=0.2 +2024-09-19 04:02:08,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=586760.0, ans=0.2 +2024-09-19 04:02:15,704 INFO [train.py:1198] (0/2) Epoch 33, batch 1900, loss[loss=0.2458, ctc_loss=0.1216, cr_loss=0.3703, attn_decoder_loss=0.2514, over 29716.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1182, cr_loss=0.3595, attn_decoder_loss=0.2414, over 5804611.49 frames. ], batch size: 89, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:02:24,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=586800.0, ans=0.125 +2024-09-19 04:02:24,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=586800.0, ans=0.0 +2024-09-19 04:02:39,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.47 vs. limit=10.0 +2024-09-19 04:02:54,052 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:02:58,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.32 vs. limit=22.5 +2024-09-19 04:03:22,607 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:03:31,186 INFO [train.py:1198] (0/2) Epoch 33, batch 1950, loss[loss=0.237, ctc_loss=0.1102, cr_loss=0.3432, attn_decoder_loss=0.2434, over 29469.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1191, cr_loss=0.3612, attn_decoder_loss=0.2428, over 5819056.05 frames. ], batch size: 78, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:03:44,772 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.530e+01 9.165e+01 9.739e+01 1.607e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-19 04:03:59,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=587040.0, ans=0.125 +2024-09-19 04:04:05,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=587080.0, ans=0.05 +2024-09-19 04:04:12,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=587080.0, ans=0.07 +2024-09-19 04:04:13,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.61 vs. limit=15.0 +2024-09-19 04:04:15,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=587080.0, ans=0.2 +2024-09-19 04:04:43,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=587160.0, ans=0.09899494936611666 +2024-09-19 04:04:47,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.70 vs. limit=12.0 +2024-09-19 04:04:51,445 INFO [train.py:1198] (0/2) Epoch 33, batch 2000, loss[loss=0.2092, ctc_loss=0.1019, cr_loss=0.3183, attn_decoder_loss=0.2141, over 29343.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1197, cr_loss=0.3623, attn_decoder_loss=0.2434, over 5797045.21 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:04:59,652 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:05:05,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=587240.0, ans=0.125 +2024-09-19 04:05:07,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=587240.0, ans=0.125 +2024-09-19 04:05:30,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=587280.0, ans=0.1 +2024-09-19 04:05:31,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff2.min_abs, batch_count=587280.0, ans=0.1 +2024-09-19 04:05:37,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=587320.0, ans=0.1 +2024-09-19 04:05:49,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=587320.0, ans=0.0 +2024-09-19 04:05:52,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=587360.0, ans=0.2 +2024-09-19 04:05:55,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=587360.0, ans=0.125 +2024-09-19 04:06:01,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=15.0 +2024-09-19 04:06:07,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=10.41 vs. limit=10.0 +2024-09-19 04:06:07,791 INFO [train.py:1198] (0/2) Epoch 33, batch 2050, loss[loss=0.2096, ctc_loss=0.09914, cr_loss=0.3199, attn_decoder_loss=0.2148, over 29461.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1188, cr_loss=0.3603, attn_decoder_loss=0.2422, over 5789582.28 frames. ], batch size: 70, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:06:08,024 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:06:14,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=587400.0, ans=0.2 +2024-09-19 04:06:14,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=587400.0, ans=0.0 +2024-09-19 04:06:21,352 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.725e+01 9.262e+01 9.868e+01 2.043e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 04:06:52,259 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:06:52,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.06 vs. limit=15.0 +2024-09-19 04:07:23,430 INFO [train.py:1198] (0/2) Epoch 33, batch 2100, loss[loss=0.2432, ctc_loss=0.1259, cr_loss=0.3859, attn_decoder_loss=0.2477, over 29758.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1186, cr_loss=0.3598, attn_decoder_loss=0.2419, over 5801533.99 frames. ], batch size: 81, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:07:31,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.10 vs. limit=10.0 +2024-09-19 04:07:50,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=587640.0, ans=0.0 +2024-09-19 04:08:19,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=587720.0, ans=0.0 +2024-09-19 04:08:23,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=587720.0, ans=0.125 +2024-09-19 04:08:42,918 INFO [train.py:1198] (0/2) Epoch 33, batch 2150, loss[loss=0.246, ctc_loss=0.1222, cr_loss=0.3673, attn_decoder_loss=0.2516, over 29466.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1183, cr_loss=0.3598, attn_decoder_loss=0.2415, over 5816290.52 frames. ], batch size: 78, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:08:56,491 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.296e+01 8.476e+01 8.968e+01 9.482e+01 1.071e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 04:09:01,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=587840.0, ans=0.1 +2024-09-19 04:09:05,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.32 vs. limit=15.0 +2024-09-19 04:09:19,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=587880.0, ans=0.0 +2024-09-19 04:09:58,815 INFO [train.py:1198] (0/2) Epoch 33, batch 2200, loss[loss=0.2387, ctc_loss=0.1162, cr_loss=0.3474, attn_decoder_loss=0.2446, over 29631.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1183, cr_loss=0.3598, attn_decoder_loss=0.2414, over 5812705.31 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:10:09,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.44 vs. limit=15.0 +2024-09-19 04:10:10,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=588000.0, ans=0.1 +2024-09-19 04:10:39,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=588080.0, ans=0.0 +2024-09-19 04:10:58,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.10 vs. limit=10.0 +2024-09-19 04:11:00,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=588160.0, ans=0.125 +2024-09-19 04:11:11,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=588160.0, ans=0.0 +2024-09-19 04:11:14,406 INFO [train.py:1198] (0/2) Epoch 33, batch 2250, loss[loss=0.2465, ctc_loss=0.1263, cr_loss=0.3698, attn_decoder_loss=0.2517, over 29718.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1184, cr_loss=0.36, attn_decoder_loss=0.2414, over 5813084.98 frames. ], batch size: 82, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:11:17,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588200.0, ans=0.1 +2024-09-19 04:11:26,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=588200.0, ans=0.0 +2024-09-19 04:11:29,578 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.468e+01 9.055e+01 9.587e+01 2.332e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 04:11:34,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=588240.0, ans=0.125 +2024-09-19 04:11:35,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=588240.0, ans=0.5 +2024-09-19 04:11:45,631 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.31 vs. limit=15.0 +2024-09-19 04:11:48,726 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.99 vs. limit=15.0 +2024-09-19 04:12:34,491 INFO [train.py:1198] (0/2) Epoch 33, batch 2300, loss[loss=0.2222, ctc_loss=0.1041, cr_loss=0.3258, attn_decoder_loss=0.2281, over 29322.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1177, cr_loss=0.358, attn_decoder_loss=0.2404, over 5799909.87 frames. ], batch size: 71, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:12:37,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=588400.0, ans=0.125 +2024-09-19 04:12:42,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=588400.0, ans=0.1 +2024-09-19 04:13:09,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=588480.0, ans=0.0 +2024-09-19 04:13:15,904 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.12 vs. limit=15.0 +2024-09-19 04:13:33,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=588560.0, ans=0.0 +2024-09-19 04:13:38,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=588560.0, ans=0.125 +2024-09-19 04:13:49,828 INFO [train.py:1198] (0/2) Epoch 33, batch 2350, loss[loss=0.2479, ctc_loss=0.1271, cr_loss=0.3811, attn_decoder_loss=0.2528, over 29690.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.118, cr_loss=0.3587, attn_decoder_loss=0.2409, over 5805301.47 frames. ], batch size: 83, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:13:53,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=588600.0, ans=0.125 +2024-09-19 04:14:00,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=588600.0, ans=0.125 +2024-09-19 04:14:00,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=588600.0, ans=0.125 +2024-09-19 04:14:04,757 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.984e+01 8.296e+01 8.859e+01 9.524e+01 1.352e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 04:14:06,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=588640.0, ans=0.0 +2024-09-19 04:14:11,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=588640.0, ans=0.2 +2024-09-19 04:14:27,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=588680.0, ans=0.125 +2024-09-19 04:14:38,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=588720.0, ans=0.2 +2024-09-19 04:14:47,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=588720.0, ans=0.125 +2024-09-19 04:15:03,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.39 vs. limit=22.5 +2024-09-19 04:15:06,269 INFO [train.py:1198] (0/2) Epoch 33, batch 2400, loss[loss=0.2361, ctc_loss=0.1183, cr_loss=0.3652, attn_decoder_loss=0.2411, over 29527.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1181, cr_loss=0.3588, attn_decoder_loss=0.2414, over 5809482.73 frames. ], batch size: 76, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:15:12,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=588800.0, ans=0.125 +2024-09-19 04:15:18,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=588800.0, ans=0.125 +2024-09-19 04:15:19,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=13.26 vs. limit=15.0 +2024-09-19 04:15:20,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=588840.0, ans=0.125 +2024-09-19 04:15:32,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=588840.0, ans=0.0 +2024-09-19 04:15:39,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=588880.0, ans=0.0 +2024-09-19 04:15:45,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.11 vs. limit=15.0 +2024-09-19 04:16:24,266 INFO [train.py:1198] (0/2) Epoch 33, batch 2450, loss[loss=0.243, ctc_loss=0.1265, cr_loss=0.3818, attn_decoder_loss=0.2474, over 29713.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.119, cr_loss=0.3606, attn_decoder_loss=0.2424, over 5786029.45 frames. ], batch size: 82, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:16:26,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.20 vs. limit=15.0 +2024-09-19 04:16:30,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=589000.0, ans=0.5 +2024-09-19 04:16:33,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=589000.0, ans=0.0 +2024-09-19 04:16:39,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=589040.0, ans=0.125 +2024-09-19 04:16:40,789 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.743e+01 8.663e+01 9.079e+01 9.765e+01 4.096e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 04:16:53,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=589080.0, ans=0.5 +2024-09-19 04:16:56,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=589080.0, ans=0.125 +2024-09-19 04:17:00,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=589080.0, ans=0.125 +2024-09-19 04:17:02,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=589080.0, ans=0.1 +2024-09-19 04:17:08,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=589120.0, ans=0.1 +2024-09-19 04:17:31,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=589160.0, ans=0.0 +2024-09-19 04:17:40,042 INFO [train.py:1198] (0/2) Epoch 33, batch 2500, loss[loss=0.2405, ctc_loss=0.1155, cr_loss=0.3627, attn_decoder_loss=0.2463, over 29625.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1186, cr_loss=0.3599, attn_decoder_loss=0.2422, over 5795647.64 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:18:21,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=589280.0, ans=0.025 +2024-09-19 04:18:32,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=589320.0, ans=0.125 +2024-09-19 04:18:36,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.48 vs. limit=15.0 +2024-09-19 04:18:52,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=589360.0, ans=0.1 +2024-09-19 04:18:56,407 INFO [train.py:1198] (0/2) Epoch 33, batch 2550, loss[loss=0.2139, ctc_loss=0.1009, cr_loss=0.3422, attn_decoder_loss=0.2188, over 29344.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1186, cr_loss=0.3601, attn_decoder_loss=0.2421, over 5799156.50 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:19:12,962 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.839e+01 8.542e+01 8.992e+01 9.541e+01 1.643e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 04:19:13,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=589440.0, ans=0.125 +2024-09-19 04:19:41,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=589520.0, ans=0.1 +2024-09-19 04:19:49,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.79 vs. limit=12.0 +2024-09-19 04:19:59,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=589560.0, ans=0.0 +2024-09-19 04:20:02,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=589560.0, ans=0.0 +2024-09-19 04:20:07,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=589560.0, ans=0.0 +2024-09-19 04:20:08,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=589560.0, ans=0.125 +2024-09-19 04:20:10,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=589560.0, ans=0.0 +2024-09-19 04:20:16,545 INFO [train.py:1198] (0/2) Epoch 33, batch 2600, loss[loss=0.2242, ctc_loss=0.1044, cr_loss=0.3262, attn_decoder_loss=0.2302, over 29459.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1188, cr_loss=0.3606, attn_decoder_loss=0.2422, over 5794825.99 frames. ], batch size: 78, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:20:37,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=589640.0, ans=0.2 +2024-09-19 04:20:39,073 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:20:55,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=589680.0, ans=0.1 +2024-09-19 04:21:10,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=589720.0, ans=0.125 +2024-09-19 04:21:17,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-19 04:21:31,655 INFO [train.py:1198] (0/2) Epoch 33, batch 2650, loss[loss=0.2378, ctc_loss=0.1147, cr_loss=0.3447, attn_decoder_loss=0.2438, over 29262.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1181, cr_loss=0.3593, attn_decoder_loss=0.242, over 5801391.51 frames. ], batch size: 100, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:21:41,735 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.57 vs. limit=22.5 +2024-09-19 04:21:48,460 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.311e+01 8.528e+01 8.946e+01 9.384e+01 1.299e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-19 04:21:52,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.53 vs. limit=12.0 +2024-09-19 04:22:07,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=589880.0, ans=0.1 +2024-09-19 04:22:47,777 INFO [train.py:1198] (0/2) Epoch 33, batch 2700, loss[loss=0.2365, ctc_loss=0.1084, cr_loss=0.3399, attn_decoder_loss=0.2432, over 29528.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1187, cr_loss=0.3603, attn_decoder_loss=0.2423, over 5795332.82 frames. ], batch size: 87, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:23:06,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=590040.0, ans=0.0 +2024-09-19 04:23:22,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=590080.0, ans=0.1 +2024-09-19 04:23:22,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=590080.0, ans=0.1 +2024-09-19 04:24:00,915 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-19 04:24:07,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.34 vs. limit=15.0 +2024-09-19 04:24:07,847 INFO [train.py:1198] (0/2) Epoch 33, batch 2750, loss[loss=0.2217, ctc_loss=0.1109, cr_loss=0.3584, attn_decoder_loss=0.2261, over 29546.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1179, cr_loss=0.359, attn_decoder_loss=0.2413, over 5793696.50 frames. ], batch size: 75, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:24:20,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=590200.0, ans=0.125 +2024-09-19 04:24:24,590 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.542e+01 8.884e+01 9.570e+01 2.810e+02, threshold=1.777e+02, percent-clipped=3.0 +2024-09-19 04:24:44,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=590280.0, ans=0.125 +2024-09-19 04:24:48,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=590280.0, ans=0.125 +2024-09-19 04:24:49,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.33 vs. limit=15.0 +2024-09-19 04:25:01,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=590320.0, ans=0.0 +2024-09-19 04:25:14,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=590360.0, ans=0.2 +2024-09-19 04:25:24,063 INFO [train.py:1198] (0/2) Epoch 33, batch 2800, loss[loss=0.254, ctc_loss=0.1504, cr_loss=0.3995, attn_decoder_loss=0.2567, over 20419.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1184, cr_loss=0.3593, attn_decoder_loss=0.2416, over 5776206.54 frames. ], batch size: 210, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:25:39,645 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:25:45,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=590440.0, ans=0.125 +2024-09-19 04:25:50,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.58 vs. limit=6.0 +2024-09-19 04:25:51,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.24 vs. limit=15.0 +2024-09-19 04:25:52,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=590480.0, ans=0.2 +2024-09-19 04:26:11,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=590520.0, ans=0.035 +2024-09-19 04:26:39,485 INFO [train.py:1198] (0/2) Epoch 33, batch 2850, loss[loss=0.2295, ctc_loss=0.1195, cr_loss=0.353, attn_decoder_loss=0.2339, over 29518.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1188, cr_loss=0.3599, attn_decoder_loss=0.242, over 5761717.51 frames. ], batch size: 77, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:26:39,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=590600.0, ans=0.2 +2024-09-19 04:26:45,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.34 vs. limit=10.0 +2024-09-19 04:26:52,837 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.18 vs. limit=15.0 +2024-09-19 04:26:57,780 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.590e+01 8.701e+01 9.298e+01 9.945e+01 2.152e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-19 04:27:01,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=590640.0, ans=0.1 +2024-09-19 04:27:14,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=590680.0, ans=0.125 +2024-09-19 04:27:24,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.56 vs. limit=5.0 +2024-09-19 04:27:41,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=590720.0, ans=0.2 +2024-09-19 04:27:52,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=590760.0, ans=0.2 +2024-09-19 04:27:59,740 INFO [train.py:1198] (0/2) Epoch 33, batch 2900, loss[loss=0.2306, ctc_loss=0.1085, cr_loss=0.3255, attn_decoder_loss=0.237, over 29436.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1192, cr_loss=0.3614, attn_decoder_loss=0.2427, over 5786558.33 frames. ], batch size: 79, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:28:18,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=590840.0, ans=0.2 +2024-09-19 04:28:24,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=590840.0, ans=0.125 +2024-09-19 04:28:28,111 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.39 vs. limit=15.0 +2024-09-19 04:28:30,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=590880.0, ans=0.125 +2024-09-19 04:28:39,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=590880.0, ans=0.125 +2024-09-19 04:28:51,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=590920.0, ans=0.125 +2024-09-19 04:28:53,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=590920.0, ans=0.125 +2024-09-19 04:28:53,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.18 vs. limit=12.0 +2024-09-19 04:28:56,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.30 vs. limit=6.0 +2024-09-19 04:29:08,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=590960.0, ans=0.125 +2024-09-19 04:29:15,410 INFO [train.py:1198] (0/2) Epoch 33, batch 2950, loss[loss=0.2307, ctc_loss=0.112, cr_loss=0.3607, attn_decoder_loss=0.2359, over 29508.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1182, cr_loss=0.3592, attn_decoder_loss=0.2414, over 5780959.72 frames. ], batch size: 75, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:29:25,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=591000.0, ans=22.5 +2024-09-19 04:29:33,869 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.422e+01 8.881e+01 9.248e+01 1.525e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 04:29:48,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=591080.0, ans=0.125 +2024-09-19 04:30:13,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=591120.0, ans=0.0 +2024-09-19 04:30:20,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-19 04:30:32,269 INFO [train.py:1198] (0/2) Epoch 33, batch 3000, loss[loss=0.2439, ctc_loss=0.1242, cr_loss=0.3711, attn_decoder_loss=0.249, over 29769.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1184, cr_loss=0.359, attn_decoder_loss=0.2416, over 5781762.77 frames. ], batch size: 81, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:30:32,270 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 04:30:50,728 INFO [train.py:1230] (0/2) Epoch 33, validation: loss=0.2119, ctc_loss=0.03704, cr_loss=5.931e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 04:30:50,729 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 04:31:22,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.76 vs. limit=15.0 +2024-09-19 04:31:30,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=591280.0, ans=0.2 +2024-09-19 04:31:50,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=591320.0, ans=0.125 +2024-09-19 04:31:51,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=591320.0, ans=0.125 +2024-09-19 04:32:00,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=591360.0, ans=0.125 +2024-09-19 04:32:11,227 INFO [train.py:1198] (0/2) Epoch 33, batch 3050, loss[loss=0.2223, ctc_loss=0.1044, cr_loss=0.3289, attn_decoder_loss=0.2281, over 29518.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1188, cr_loss=0.3601, attn_decoder_loss=0.2423, over 5776094.42 frames. ], batch size: 76, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:32:13,715 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.11 vs. limit=22.5 +2024-09-19 04:32:28,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=591440.0, ans=0.2 +2024-09-19 04:32:29,479 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.592e+01 9.144e+01 9.827e+01 2.461e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 04:32:29,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=591440.0, ans=0.125 +2024-09-19 04:32:41,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=591480.0, ans=0.125 +2024-09-19 04:32:45,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.27 vs. limit=15.0 +2024-09-19 04:33:02,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.31 vs. limit=22.5 +2024-09-19 04:33:03,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.78 vs. limit=15.0 +2024-09-19 04:33:11,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=591560.0, ans=0.125 +2024-09-19 04:33:12,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=591560.0, ans=0.125 +2024-09-19 04:33:19,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=591560.0, ans=0.2 +2024-09-19 04:33:24,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.63 vs. limit=10.0 +2024-09-19 04:33:26,769 INFO [train.py:1198] (0/2) Epoch 33, batch 3100, loss[loss=0.2539, ctc_loss=0.1278, cr_loss=0.3681, attn_decoder_loss=0.2597, over 29248.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1183, cr_loss=0.3591, attn_decoder_loss=0.2417, over 5775472.65 frames. ], batch size: 100, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:33:52,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=591640.0, ans=0.125 +2024-09-19 04:34:31,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.03 vs. limit=15.0 +2024-09-19 04:34:42,674 INFO [train.py:1198] (0/2) Epoch 33, batch 3150, loss[loss=0.263, ctc_loss=0.1388, cr_loss=0.4227, attn_decoder_loss=0.2674, over 28854.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1185, cr_loss=0.36, attn_decoder_loss=0.242, over 5782880.62 frames. ], batch size: 104, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:35:03,072 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.559e+01 9.035e+01 9.509e+01 1.493e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 04:35:19,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=591880.0, ans=0.125 +2024-09-19 04:35:19,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=591880.0, ans=0.1 +2024-09-19 04:35:34,316 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:35:48,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.whiten.whitening_limit, batch_count=591960.0, ans=12.0 +2024-09-19 04:36:01,782 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-148000.pt +2024-09-19 04:36:10,887 INFO [train.py:1198] (0/2) Epoch 33, batch 3200, loss[loss=0.2393, ctc_loss=0.1147, cr_loss=0.3656, attn_decoder_loss=0.245, over 29425.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1178, cr_loss=0.3587, attn_decoder_loss=0.2414, over 5794269.34 frames. ], batch size: 79, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:36:37,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=592040.0, ans=0.1 +2024-09-19 04:36:54,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=592080.0, ans=0.2 +2024-09-19 04:37:09,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=592120.0, ans=0.2 +2024-09-19 04:37:15,733 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.15 vs. limit=6.0 +2024-09-19 04:37:21,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=592160.0, ans=0.125 +2024-09-19 04:37:26,889 INFO [train.py:1198] (0/2) Epoch 33, batch 3250, loss[loss=0.2423, ctc_loss=0.1188, cr_loss=0.357, attn_decoder_loss=0.2481, over 29716.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1183, cr_loss=0.3602, attn_decoder_loss=0.242, over 5801289.22 frames. ], batch size: 84, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:37:27,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=592200.0, ans=0.125 +2024-09-19 04:37:43,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=592240.0, ans=0.0 +2024-09-19 04:37:44,971 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.600e+01 8.640e+01 9.097e+01 9.766e+01 4.487e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 04:38:42,519 INFO [train.py:1198] (0/2) Epoch 33, batch 3300, loss[loss=0.253, ctc_loss=0.1372, cr_loss=0.393, attn_decoder_loss=0.2571, over 28402.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1176, cr_loss=0.3583, attn_decoder_loss=0.2409, over 5798124.56 frames. ], batch size: 111, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:39:23,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=592480.0, ans=0.0 +2024-09-19 04:39:41,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=592520.0, ans=0.0 +2024-09-19 04:39:44,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=592520.0, ans=0.0 +2024-09-19 04:39:52,608 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:40:02,668 INFO [train.py:1198] (0/2) Epoch 33, batch 3350, loss[loss=0.2546, ctc_loss=0.1291, cr_loss=0.3838, attn_decoder_loss=0.26, over 28796.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1187, cr_loss=0.36, attn_decoder_loss=0.2418, over 5773903.30 frames. ], batch size: 104, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:40:16,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=592640.0, ans=0.125 +2024-09-19 04:40:22,487 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.878e+01 9.274e+01 9.993e+01 2.283e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-19 04:40:57,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=592720.0, ans=0.1 +2024-09-19 04:41:03,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=592760.0, ans=0.0 +2024-09-19 04:41:05,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=592760.0, ans=0.125 +2024-09-19 04:41:07,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.98 vs. limit=10.0 +2024-09-19 04:41:08,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.57 vs. limit=15.0 +2024-09-19 04:41:10,443 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.89 vs. limit=15.0 +2024-09-19 04:41:14,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=592760.0, ans=0.1 +2024-09-19 04:41:19,059 INFO [train.py:1198] (0/2) Epoch 33, batch 3400, loss[loss=0.2069, ctc_loss=0.09915, cr_loss=0.3214, attn_decoder_loss=0.2117, over 29349.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1183, cr_loss=0.3589, attn_decoder_loss=0.2415, over 5766150.92 frames. ], batch size: 67, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:41:19,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.20 vs. limit=6.0 +2024-09-19 04:41:37,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=592840.0, ans=0.0 +2024-09-19 04:41:46,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=592840.0, ans=0.125 +2024-09-19 04:41:49,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=592880.0, ans=0.1 +2024-09-19 04:42:01,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=592880.0, ans=0.0 +2024-09-19 04:42:09,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=592920.0, ans=0.125 +2024-09-19 04:42:18,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=592960.0, ans=0.2 +2024-09-19 04:42:27,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=592960.0, ans=0.125 +2024-09-19 04:42:34,749 INFO [train.py:1198] (0/2) Epoch 33, batch 3450, loss[loss=0.2497, ctc_loss=0.1128, cr_loss=0.338, attn_decoder_loss=0.2574, over 28776.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1185, cr_loss=0.3593, attn_decoder_loss=0.242, over 5774617.61 frames. ], batch size: 112, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:42:56,751 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.686e+01 9.141e+01 9.790e+01 2.387e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 04:43:01,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=593040.0, ans=0.0 +2024-09-19 04:43:20,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=593080.0, ans=0.05 +2024-09-19 04:43:30,265 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=17.14 vs. limit=15.0 +2024-09-19 04:43:52,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=593160.0, ans=0.125 +2024-09-19 04:43:55,210 INFO [train.py:1198] (0/2) Epoch 33, batch 3500, loss[loss=0.2213, ctc_loss=0.1099, cr_loss=0.3573, attn_decoder_loss=0.2257, over 29331.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1181, cr_loss=0.3586, attn_decoder_loss=0.2414, over 5776369.97 frames. ], batch size: 71, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:44:20,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=593240.0, ans=0.0 +2024-09-19 04:44:22,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=593240.0, ans=0.125 +2024-09-19 04:44:33,373 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.74 vs. limit=22.5 +2024-09-19 04:44:38,180 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.85 vs. limit=22.5 +2024-09-19 04:44:44,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=593320.0, ans=0.125 +2024-09-19 04:44:56,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=593360.0, ans=0.0 +2024-09-19 04:45:09,792 INFO [train.py:1198] (0/2) Epoch 33, batch 3550, loss[loss=0.2439, ctc_loss=0.1259, cr_loss=0.3768, attn_decoder_loss=0.2487, over 29726.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1179, cr_loss=0.3585, attn_decoder_loss=0.2414, over 5782222.21 frames. ], batch size: 89, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:45:23,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=593440.0, ans=0.1 +2024-09-19 04:45:28,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.090e+01 8.555e+01 9.089e+01 9.583e+01 3.040e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-19 04:45:35,723 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.83 vs. limit=15.0 +2024-09-19 04:45:38,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=593480.0, ans=0.2 +2024-09-19 04:45:54,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=593520.0, ans=0.125 +2024-09-19 04:45:55,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=593520.0, ans=0.125 +2024-09-19 04:45:59,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=15.0 +2024-09-19 04:46:09,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=593560.0, ans=0.2 +2024-09-19 04:46:24,264 INFO [train.py:1198] (0/2) Epoch 33, batch 3600, loss[loss=0.2276, ctc_loss=0.1211, cr_loss=0.3785, attn_decoder_loss=0.2311, over 29486.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1178, cr_loss=0.359, attn_decoder_loss=0.2416, over 5791359.08 frames. ], batch size: 77, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:46:46,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.61 vs. limit=15.0 +2024-09-19 04:46:57,630 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:46:59,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=593680.0, ans=0.1 +2024-09-19 04:47:21,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=593720.0, ans=0.125 +2024-09-19 04:47:21,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=593720.0, ans=0.125 +2024-09-19 04:47:34,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=593760.0, ans=0.125 +2024-09-19 04:47:38,883 INFO [train.py:1198] (0/2) Epoch 33, batch 3650, loss[loss=0.2526, ctc_loss=0.1303, cr_loss=0.3985, attn_decoder_loss=0.2573, over 29505.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1173, cr_loss=0.3577, attn_decoder_loss=0.2409, over 5793522.54 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:47:42,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=593800.0, ans=0.0 +2024-09-19 04:47:58,207 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.325e+01 8.858e+01 9.502e+01 1.563e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 04:47:59,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=593840.0, ans=0.125 +2024-09-19 04:48:33,881 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.13 vs. limit=6.0 +2024-09-19 04:48:48,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=593960.0, ans=0.125 +2024-09-19 04:48:52,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=593960.0, ans=0.1 +2024-09-19 04:48:55,547 INFO [train.py:1198] (0/2) Epoch 33, batch 3700, loss[loss=0.2343, ctc_loss=0.1158, cr_loss=0.3436, attn_decoder_loss=0.2399, over 29696.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1172, cr_loss=0.3577, attn_decoder_loss=0.2409, over 5805037.23 frames. ], batch size: 84, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:49:07,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=594000.0, ans=0.125 +2024-09-19 04:49:10,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.33 vs. limit=15.0 +2024-09-19 04:49:16,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=594040.0, ans=0.0 +2024-09-19 04:49:24,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=594040.0, ans=0.0 +2024-09-19 04:49:54,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=594120.0, ans=0.0 +2024-09-19 04:49:56,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=594160.0, ans=0.125 +2024-09-19 04:50:06,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.03 vs. limit=6.0 +2024-09-19 04:50:11,530 INFO [train.py:1198] (0/2) Epoch 33, batch 3750, loss[loss=0.2094, ctc_loss=0.1003, cr_loss=0.3332, attn_decoder_loss=0.2141, over 29371.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1175, cr_loss=0.3585, attn_decoder_loss=0.241, over 5808391.87 frames. ], batch size: 67, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:50:16,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=594200.0, ans=0.125 +2024-09-19 04:50:30,937 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.561e+01 9.006e+01 9.475e+01 6.465e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-19 04:50:51,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=594280.0, ans=0.025 +2024-09-19 04:50:54,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=594320.0, ans=0.2 +2024-09-19 04:51:02,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=594320.0, ans=0.0 +2024-09-19 04:51:26,179 INFO [train.py:1198] (0/2) Epoch 33, batch 3800, loss[loss=0.2525, ctc_loss=0.1276, cr_loss=0.3731, attn_decoder_loss=0.2581, over 29616.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1174, cr_loss=0.3581, attn_decoder_loss=0.2409, over 5798861.05 frames. ], batch size: 86, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:51:28,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=594400.0, ans=0.125 +2024-09-19 04:51:30,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=594400.0, ans=0.2 +2024-09-19 04:51:32,428 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:51:45,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=594440.0, ans=0.125 +2024-09-19 04:52:21,520 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:52:40,418 INFO [train.py:1198] (0/2) Epoch 33, batch 3850, loss[loss=0.2528, ctc_loss=0.1298, cr_loss=0.3884, attn_decoder_loss=0.2578, over 29264.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1172, cr_loss=0.3576, attn_decoder_loss=0.2407, over 5813138.43 frames. ], batch size: 100, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:52:47,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.82 vs. limit=22.5 +2024-09-19 04:52:59,670 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.228e+01 8.527e+01 9.047e+01 9.575e+01 1.638e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 04:53:01,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=594640.0, ans=0.125 +2024-09-19 04:53:13,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=594680.0, ans=0.125 +2024-09-19 04:53:23,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=594720.0, ans=0.0 +2024-09-19 04:53:55,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=594800.0, ans=0.0 +2024-09-19 04:53:56,281 INFO [train.py:1198] (0/2) Epoch 33, batch 3900, loss[loss=0.2539, ctc_loss=0.1321, cr_loss=0.4044, attn_decoder_loss=0.2584, over 29618.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3586, attn_decoder_loss=0.2414, over 5817345.91 frames. ], batch size: 86, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:54:02,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=594800.0, ans=0.025 +2024-09-19 04:54:06,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=594800.0, ans=0.1 +2024-09-19 04:54:08,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=594800.0, ans=0.125 +2024-09-19 04:54:10,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.89 vs. limit=10.0 +2024-09-19 04:54:11,324 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:54:30,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=594880.0, ans=0.125 +2024-09-19 04:54:40,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=594920.0, ans=0.125 +2024-09-19 04:54:55,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=594960.0, ans=0.125 +2024-09-19 04:55:11,406 INFO [train.py:1198] (0/2) Epoch 33, batch 3950, loss[loss=0.2514, ctc_loss=0.1277, cr_loss=0.3787, attn_decoder_loss=0.2567, over 29464.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3595, attn_decoder_loss=0.2415, over 5836355.13 frames. ], batch size: 97, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:55:19,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=595000.0, ans=0.0 +2024-09-19 04:55:20,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=595000.0, ans=0.0 +2024-09-19 04:55:32,035 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.582e+01 8.607e+01 9.033e+01 9.637e+01 1.585e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 04:55:49,198 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.04 vs. limit=6.0 +2024-09-19 04:55:50,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=595080.0, ans=0.0 +2024-09-19 04:56:25,736 INFO [train.py:1198] (0/2) Epoch 33, batch 4000, loss[loss=0.2202, ctc_loss=0.1045, cr_loss=0.3358, attn_decoder_loss=0.2255, over 29520.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1185, cr_loss=0.3599, attn_decoder_loss=0.2417, over 5813199.41 frames. ], batch size: 74, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:56:26,741 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.23 vs. limit=15.0 +2024-09-19 04:56:30,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=595200.0, ans=0.125 +2024-09-19 04:56:51,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=595240.0, ans=0.2 +2024-09-19 04:57:00,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=595280.0, ans=0.0 +2024-09-19 04:57:01,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=595280.0, ans=0.125 +2024-09-19 04:57:25,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=595360.0, ans=0.035 +2024-09-19 04:57:31,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.13 vs. limit=6.0 +2024-09-19 04:57:34,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=595360.0, ans=0.0 +2024-09-19 04:57:37,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=595360.0, ans=0.025 +2024-09-19 04:57:39,807 INFO [train.py:1198] (0/2) Epoch 33, batch 4050, loss[loss=0.2575, ctc_loss=0.1417, cr_loss=0.365, attn_decoder_loss=0.2623, over 20591.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1181, cr_loss=0.359, attn_decoder_loss=0.2412, over 5797523.60 frames. ], batch size: 209, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:58:00,218 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.573e+01 9.185e+01 9.893e+01 2.518e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-19 04:58:00,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=595440.0, ans=0.0 +2024-09-19 04:58:10,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=595480.0, ans=0.0 +2024-09-19 04:58:24,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=595520.0, ans=0.125 +2024-09-19 04:58:37,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=595520.0, ans=0.125 +2024-09-19 04:58:52,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=595560.0, ans=0.125 +2024-09-19 04:58:55,014 INFO [train.py:1198] (0/2) Epoch 33, batch 4100, loss[loss=0.2465, ctc_loss=0.1267, cr_loss=0.3837, attn_decoder_loss=0.2512, over 29504.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1184, cr_loss=0.3596, attn_decoder_loss=0.2416, over 5793946.99 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:59:17,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=595640.0, ans=0.0 +2024-09-19 04:59:41,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.30 vs. limit=10.0 +2024-09-19 04:59:45,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=595720.0, ans=0.05 +2024-09-19 04:59:46,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=595720.0, ans=0.2 +2024-09-19 05:00:02,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=595760.0, ans=0.0 +2024-09-19 05:00:02,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=595760.0, ans=0.1 +2024-09-19 05:00:09,891 INFO [train.py:1198] (0/2) Epoch 33, batch 4150, loss[loss=0.2276, ctc_loss=0.1138, cr_loss=0.341, attn_decoder_loss=0.2327, over 29473.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1183, cr_loss=0.3593, attn_decoder_loss=0.2413, over 5799297.35 frames. ], batch size: 77, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 05:00:21,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=595800.0, ans=0.125 +2024-09-19 05:00:31,921 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.198e+01 8.400e+01 8.837e+01 9.482e+01 1.626e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-19 05:00:33,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=595840.0, ans=0.1 +2024-09-19 05:00:38,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=595880.0, ans=0.07 +2024-09-19 05:00:40,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=595880.0, ans=0.0 +2024-09-19 05:00:51,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=595880.0, ans=0.125 +2024-09-19 05:01:07,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=595960.0, ans=0.05 +2024-09-19 05:01:23,828 INFO [train.py:1198] (0/2) Epoch 33, batch 4200, loss[loss=0.2543, ctc_loss=0.1396, cr_loss=0.3987, attn_decoder_loss=0.2582, over 29525.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1183, cr_loss=0.3598, attn_decoder_loss=0.2417, over 5801314.76 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:01:33,811 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.77 vs. limit=22.5 +2024-09-19 05:01:45,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=596040.0, ans=0.0 +2024-09-19 05:01:47,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=596040.0, ans=0.125 +2024-09-19 05:02:06,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=596120.0, ans=0.2 +2024-09-19 05:02:09,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=596120.0, ans=0.125 +2024-09-19 05:02:17,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=596120.0, ans=0.125 +2024-09-19 05:02:23,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=596160.0, ans=0.0 +2024-09-19 05:02:38,337 INFO [train.py:1198] (0/2) Epoch 33, batch 4250, loss[loss=0.2179, ctc_loss=0.09707, cr_loss=0.3259, attn_decoder_loss=0.2241, over 29529.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1182, cr_loss=0.3601, attn_decoder_loss=0.2417, over 5806596.31 frames. ], batch size: 74, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:02:44,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=596200.0, ans=0.125 +2024-09-19 05:02:59,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.79 vs. limit=12.0 +2024-09-19 05:02:59,965 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.204e+01 8.601e+01 9.024e+01 9.699e+01 1.912e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 05:03:00,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=596240.0, ans=0.1 +2024-09-19 05:03:03,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=596240.0, ans=0.0 +2024-09-19 05:03:31,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=596320.0, ans=0.2 +2024-09-19 05:03:52,556 INFO [train.py:1198] (0/2) Epoch 33, batch 4300, loss[loss=0.2405, ctc_loss=0.1156, cr_loss=0.358, attn_decoder_loss=0.2464, over 29525.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1177, cr_loss=0.3588, attn_decoder_loss=0.2416, over 5796943.48 frames. ], batch size: 87, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:04:16,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=596440.0, ans=0.2 +2024-09-19 05:04:32,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=596480.0, ans=0.09899494936611666 +2024-09-19 05:04:38,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=596520.0, ans=0.125 +2024-09-19 05:04:41,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=596520.0, ans=0.0 +2024-09-19 05:04:52,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=596560.0, ans=0.07 +2024-09-19 05:04:55,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=596560.0, ans=0.125 +2024-09-19 05:04:55,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=596560.0, ans=0.0 +2024-09-19 05:05:07,031 INFO [train.py:1198] (0/2) Epoch 33, batch 4350, loss[loss=0.2632, ctc_loss=0.1437, cr_loss=0.416, attn_decoder_loss=0.2672, over 29489.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1207, cr_loss=0.365, attn_decoder_loss=0.2453, over 5799077.79 frames. ], batch size: 97, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:05:13,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=596600.0, ans=0.125 +2024-09-19 05:05:19,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=596600.0, ans=0.125 +2024-09-19 05:05:24,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=596640.0, ans=0.125 +2024-09-19 05:05:29,989 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.801e+01 9.131e+01 9.765e+01 2.028e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-19 05:05:35,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=596680.0, ans=0.025 +2024-09-19 05:05:39,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.84 vs. limit=22.5 +2024-09-19 05:05:43,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=596680.0, ans=0.025 +2024-09-19 05:05:46,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=596680.0, ans=0.1 +2024-09-19 05:06:06,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=596760.0, ans=0.125 +2024-09-19 05:06:22,397 INFO [train.py:1198] (0/2) Epoch 33, batch 4400, loss[loss=0.251, ctc_loss=0.1388, cr_loss=0.4044, attn_decoder_loss=0.2545, over 27365.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1218, cr_loss=0.3673, attn_decoder_loss=0.247, over 5769188.47 frames. ], batch size: 124, lr: 3.32e-03, grad_scale: 16.0 +2024-09-19 05:06:23,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=12.0 +2024-09-19 05:06:25,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=596800.0, ans=0.0 +2024-09-19 05:07:09,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.36 vs. limit=12.0 +2024-09-19 05:07:16,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=596920.0, ans=0.0 +2024-09-19 05:07:36,278 INFO [train.py:1198] (0/2) Epoch 33, batch 4450, loss[loss=0.2507, ctc_loss=0.1368, cr_loss=0.3852, attn_decoder_loss=0.2548, over 20180.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1255, cr_loss=0.373, attn_decoder_loss=0.2494, over 5574626.52 frames. ], batch size: 209, lr: 3.32e-03, grad_scale: 8.0 +2024-09-19 05:07:36,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=597000.0, ans=0.125 +2024-09-19 05:07:53,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=597040.0, ans=0.125 +2024-09-19 05:08:00,436 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.114e+01 9.208e+01 9.597e+01 1.124e+02 1.638e+02, threshold=1.919e+02, percent-clipped=0.0 +2024-09-19 05:08:05,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=597080.0, ans=0.125 +2024-09-19 05:08:15,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.57 vs. limit=15.0 +2024-09-19 05:08:17,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=597080.0, ans=0.125 +2024-09-19 05:08:49,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=10.99 vs. limit=12.0 +2024-09-19 05:08:52,062 INFO [train.py:1198] (0/2) Epoch 33, batch 4500, loss[loss=0.2486, ctc_loss=0.1319, cr_loss=0.3676, attn_decoder_loss=0.2534, over 20311.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1291, cr_loss=0.3758, attn_decoder_loss=0.2512, over 5231995.43 frames. ], batch size: 209, lr: 3.32e-03, grad_scale: 8.0 +2024-09-19 05:08:53,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=597200.0, ans=0.125 +2024-09-19 05:08:55,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=597200.0, ans=0.0 +2024-09-19 05:08:59,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=597200.0, ans=0.125 +2024-09-19 05:08:59,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=597200.0, ans=0.0 +2024-09-19 05:09:16,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=597240.0, ans=0.125 +2024-09-19 05:09:17,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=597240.0, ans=0.0 +2024-09-19 05:09:23,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=597280.0, ans=0.1 +2024-09-19 05:09:29,391 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-33.pt +2024-09-19 05:10:21,345 INFO [train.py:1198] (0/2) Epoch 34, batch 0, loss[loss=0.2194, ctc_loss=0.1091, cr_loss=0.3431, attn_decoder_loss=0.224, over 29573.00 frames. ], tot_loss[loss=0.2194, ctc_loss=0.1091, cr_loss=0.3431, attn_decoder_loss=0.224, over 29573.00 frames. ], batch size: 73, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:10:21,346 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 05:10:26,212 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.0973, 4.9657, 4.7907, 4.4852], device='cuda:0') +2024-09-19 05:10:39,722 INFO [train.py:1230] (0/2) Epoch 34, validation: loss=0.2115, ctc_loss=0.03706, cr_loss=5.889e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-19 05:10:39,723 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 05:10:43,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.84 vs. limit=22.5 +2024-09-19 05:11:08,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=597380.0, ans=10.0 +2024-09-19 05:11:14,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=597380.0, ans=0.125 +2024-09-19 05:11:32,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=597420.0, ans=0.0 +2024-09-19 05:11:45,329 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.942e+01 9.532e+01 1.086e+02 1.158e+02 1.194e+03, threshold=2.172e+02, percent-clipped=2.0 +2024-09-19 05:11:48,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=597460.0, ans=0.2 +2024-09-19 05:11:54,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=597460.0, ans=0.1 +2024-09-19 05:11:57,347 INFO [train.py:1198] (0/2) Epoch 34, batch 50, loss[loss=0.2175, ctc_loss=0.1079, cr_loss=0.3256, attn_decoder_loss=0.2224, over 29430.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1195, cr_loss=0.3625, attn_decoder_loss=0.2422, over 1269156.22 frames. ], batch size: 70, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:12:00,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=597500.0, ans=0.0 +2024-09-19 05:12:02,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=597500.0, ans=0.0 +2024-09-19 05:12:06,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=597500.0, ans=0.025 +2024-09-19 05:12:19,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=597540.0, ans=0.2 +2024-09-19 05:12:25,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=597540.0, ans=0.0 +2024-09-19 05:13:04,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=597660.0, ans=0.0 +2024-09-19 05:13:16,055 INFO [train.py:1198] (0/2) Epoch 34, batch 100, loss[loss=0.2249, ctc_loss=0.1087, cr_loss=0.3264, attn_decoder_loss=0.2305, over 29524.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1218, cr_loss=0.3676, attn_decoder_loss=0.245, over 2252911.65 frames. ], batch size: 76, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:13:22,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=597700.0, ans=0.0 +2024-09-19 05:13:34,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=597740.0, ans=0.0 +2024-09-19 05:13:53,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=597780.0, ans=0.125 +2024-09-19 05:14:02,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=597820.0, ans=0.0 +2024-09-19 05:14:04,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=597820.0, ans=0.0 +2024-09-19 05:14:10,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.12 vs. limit=10.0 +2024-09-19 05:14:14,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=597860.0, ans=0.09899494936611666 +2024-09-19 05:14:18,841 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.662e+01 8.574e+01 9.028e+01 9.395e+01 1.381e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-19 05:14:25,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=597860.0, ans=0.0 +2024-09-19 05:14:30,768 INFO [train.py:1198] (0/2) Epoch 34, batch 150, loss[loss=0.2194, ctc_loss=0.1059, cr_loss=0.3478, attn_decoder_loss=0.2243, over 29448.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1193, cr_loss=0.3626, attn_decoder_loss=0.2428, over 3048113.80 frames. ], batch size: 70, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:14:32,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=597900.0, ans=0.125 +2024-09-19 05:14:41,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=597900.0, ans=0.125 +2024-09-19 05:14:46,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=597940.0, ans=0.0 +2024-09-19 05:14:47,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=597940.0, ans=0.125 +2024-09-19 05:14:57,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=597940.0, ans=0.125 +2024-09-19 05:15:06,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=597980.0, ans=0.125 +2024-09-19 05:15:19,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=598020.0, ans=0.125 +2024-09-19 05:15:27,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.60 vs. limit=22.5 +2024-09-19 05:15:48,458 INFO [train.py:1198] (0/2) Epoch 34, batch 200, loss[loss=0.2557, ctc_loss=0.1329, cr_loss=0.3876, attn_decoder_loss=0.2608, over 27662.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.3609, attn_decoder_loss=0.2418, over 3659701.12 frames. ], batch size: 125, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:16:12,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.71 vs. limit=6.0 +2024-09-19 05:16:15,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=598140.0, ans=0.125 +2024-09-19 05:16:21,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=598180.0, ans=0.2 +2024-09-19 05:16:54,092 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.433e+01 8.957e+01 9.594e+01 1.517e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 05:16:55,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=598260.0, ans=0.125 +2024-09-19 05:17:00,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=598260.0, ans=0.125 +2024-09-19 05:17:06,366 INFO [train.py:1198] (0/2) Epoch 34, batch 250, loss[loss=0.259, ctc_loss=0.1396, cr_loss=0.4061, attn_decoder_loss=0.2632, over 29221.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.3614, attn_decoder_loss=0.2417, over 4140720.44 frames. ], batch size: 100, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:17:25,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=598340.0, ans=0.1 +2024-09-19 05:17:26,439 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-19 05:17:27,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=598340.0, ans=0.2 +2024-09-19 05:17:29,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=598340.0, ans=0.025 +2024-09-19 05:17:29,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.23 vs. limit=10.0 +2024-09-19 05:17:42,151 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.54 vs. limit=12.0 +2024-09-19 05:17:43,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=598380.0, ans=0.125 +2024-09-19 05:18:01,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=598420.0, ans=0.035 +2024-09-19 05:18:10,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=598460.0, ans=0.2 +2024-09-19 05:18:12,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=598460.0, ans=0.0 +2024-09-19 05:18:22,541 INFO [train.py:1198] (0/2) Epoch 34, batch 300, loss[loss=0.2423, ctc_loss=0.1214, cr_loss=0.3659, attn_decoder_loss=0.2476, over 29542.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1173, cr_loss=0.3589, attn_decoder_loss=0.2409, over 4510132.35 frames. ], batch size: 92, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:18:30,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=598500.0, ans=0.1 +2024-09-19 05:18:32,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.26 vs. limit=6.0 +2024-09-19 05:18:51,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=598580.0, ans=0.125 +2024-09-19 05:19:12,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=598620.0, ans=0.0 +2024-09-19 05:19:21,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=598660.0, ans=0.0 +2024-09-19 05:19:24,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=598660.0, ans=0.5 +2024-09-19 05:19:25,507 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-19 05:19:25,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.15 vs. limit=12.0 +2024-09-19 05:19:26,025 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.105e+01 8.376e+01 8.844e+01 9.262e+01 3.831e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 05:19:40,603 INFO [train.py:1198] (0/2) Epoch 34, batch 350, loss[loss=0.2169, ctc_loss=0.09869, cr_loss=0.3146, attn_decoder_loss=0.223, over 29331.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3593, attn_decoder_loss=0.2414, over 4796099.73 frames. ], batch size: 71, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:20:03,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=598740.0, ans=0.125 +2024-09-19 05:20:10,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=598780.0, ans=0.0 +2024-09-19 05:20:12,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.98 vs. limit=15.0 +2024-09-19 05:20:16,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=598780.0, ans=0.2 +2024-09-19 05:20:39,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=598860.0, ans=0.1 +2024-09-19 05:20:52,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.31 vs. limit=22.5 +2024-09-19 05:20:58,172 INFO [train.py:1198] (0/2) Epoch 34, batch 400, loss[loss=0.2441, ctc_loss=0.1211, cr_loss=0.3759, attn_decoder_loss=0.2494, over 29705.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1176, cr_loss=0.3594, attn_decoder_loss=0.2412, over 5025811.08 frames. ], batch size: 82, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:21:21,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.36 vs. limit=15.0 +2024-09-19 05:22:02,034 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.466e+01 8.485e+01 9.014e+01 9.585e+01 2.227e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 05:22:14,037 INFO [train.py:1198] (0/2) Epoch 34, batch 450, loss[loss=0.2457, ctc_loss=0.1203, cr_loss=0.3712, attn_decoder_loss=0.2513, over 29690.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1177, cr_loss=0.359, attn_decoder_loss=0.2413, over 5186095.76 frames. ], batch size: 83, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:22:19,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=599100.0, ans=0.0 +2024-09-19 05:22:49,317 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:22:59,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.95 vs. limit=22.5 +2024-09-19 05:23:09,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=599220.0, ans=0.125 +2024-09-19 05:23:30,249 INFO [train.py:1198] (0/2) Epoch 34, batch 500, loss[loss=0.2478, ctc_loss=0.1224, cr_loss=0.3485, attn_decoder_loss=0.254, over 29454.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1175, cr_loss=0.3586, attn_decoder_loss=0.2405, over 5328456.03 frames. ], batch size: 94, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:24:04,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=599380.0, ans=0.0 +2024-09-19 05:24:06,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=599380.0, ans=0.0 +2024-09-19 05:24:21,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=599420.0, ans=0.1 +2024-09-19 05:24:21,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.14 vs. limit=15.0 +2024-09-19 05:24:27,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=599420.0, ans=0.125 +2024-09-19 05:24:27,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=599420.0, ans=0.0 +2024-09-19 05:24:37,604 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.414e+01 8.516e+01 9.011e+01 9.672e+01 1.492e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 05:24:43,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=599460.0, ans=0.2 +2024-09-19 05:24:44,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=599460.0, ans=0.035 +2024-09-19 05:24:49,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=599500.0, ans=0.125 +2024-09-19 05:24:50,590 INFO [train.py:1198] (0/2) Epoch 34, batch 550, loss[loss=0.2545, ctc_loss=0.1316, cr_loss=0.3991, attn_decoder_loss=0.2593, over 28919.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1172, cr_loss=0.3583, attn_decoder_loss=0.2405, over 5420942.70 frames. ], batch size: 104, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:25:27,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=599580.0, ans=0.1 +2024-09-19 05:25:29,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.58 vs. limit=10.0 +2024-09-19 05:26:04,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.58 vs. limit=15.0 +2024-09-19 05:26:06,404 INFO [train.py:1198] (0/2) Epoch 34, batch 600, loss[loss=0.2499, ctc_loss=0.1243, cr_loss=0.3615, attn_decoder_loss=0.2559, over 29312.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1173, cr_loss=0.3578, attn_decoder_loss=0.2406, over 5510135.95 frames. ], batch size: 100, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:26:14,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=599700.0, ans=0.125 +2024-09-19 05:26:22,132 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.85 vs. limit=12.0 +2024-09-19 05:26:29,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=599740.0, ans=0.0 +2024-09-19 05:26:44,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.49 vs. limit=15.0 +2024-09-19 05:26:48,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=599780.0, ans=0.0 +2024-09-19 05:26:50,909 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.38 vs. limit=15.0 +2024-09-19 05:27:11,322 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.757e+01 8.437e+01 8.830e+01 9.420e+01 2.114e+02, threshold=1.766e+02, percent-clipped=1.0 +2024-09-19 05:27:21,946 INFO [train.py:1198] (0/2) Epoch 34, batch 650, loss[loss=0.2493, ctc_loss=0.138, cr_loss=0.4094, attn_decoder_loss=0.2526, over 29765.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1166, cr_loss=0.3567, attn_decoder_loss=0.2401, over 5586936.75 frames. ], batch size: 81, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:28:14,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=600020.0, ans=0.0 +2024-09-19 05:28:25,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=600060.0, ans=0.07 +2024-09-19 05:28:32,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.78 vs. limit=15.0 +2024-09-19 05:28:33,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=600060.0, ans=0.07 +2024-09-19 05:28:40,698 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.40 vs. limit=15.0 +2024-09-19 05:28:42,765 INFO [train.py:1198] (0/2) Epoch 34, batch 700, loss[loss=0.2255, ctc_loss=0.1094, cr_loss=0.3321, attn_decoder_loss=0.231, over 29538.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1166, cr_loss=0.3569, attn_decoder_loss=0.2406, over 5635995.72 frames. ], batch size: 76, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:28:50,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=600100.0, ans=0.1 +2024-09-19 05:29:24,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=600180.0, ans=0.09899494936611666 +2024-09-19 05:29:48,399 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.364e+01 8.809e+01 9.436e+01 2.463e+02, threshold=1.762e+02, percent-clipped=1.0 +2024-09-19 05:29:59,016 INFO [train.py:1198] (0/2) Epoch 34, batch 750, loss[loss=0.2494, ctc_loss=0.1234, cr_loss=0.3739, attn_decoder_loss=0.2551, over 29715.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1168, cr_loss=0.3574, attn_decoder_loss=0.2406, over 5675338.89 frames. ], batch size: 82, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:30:09,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn2.whiten.whitening_limit, batch_count=600300.0, ans=22.5 +2024-09-19 05:30:34,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=600380.0, ans=0.0 +2024-09-19 05:30:54,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=10.32 vs. limit=15.0 +2024-09-19 05:31:04,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=600460.0, ans=0.0 +2024-09-19 05:31:07,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=600460.0, ans=0.1 +2024-09-19 05:31:14,725 INFO [train.py:1198] (0/2) Epoch 34, batch 800, loss[loss=0.2286, ctc_loss=0.1183, cr_loss=0.3534, attn_decoder_loss=0.233, over 29609.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1172, cr_loss=0.3578, attn_decoder_loss=0.2409, over 5705851.11 frames. ], batch size: 73, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:31:21,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=600500.0, ans=0.025 +2024-09-19 05:31:43,354 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.53 vs. limit=12.0 +2024-09-19 05:32:08,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=600620.0, ans=0.025 +2024-09-19 05:32:09,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.20 vs. limit=22.5 +2024-09-19 05:32:12,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=600620.0, ans=0.2 +2024-09-19 05:32:16,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=600660.0, ans=0.125 +2024-09-19 05:32:21,712 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.053e+01 8.379e+01 9.063e+01 9.651e+01 1.795e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 05:32:32,186 INFO [train.py:1198] (0/2) Epoch 34, batch 850, loss[loss=0.2408, ctc_loss=0.1243, cr_loss=0.3671, attn_decoder_loss=0.2456, over 29721.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1172, cr_loss=0.3577, attn_decoder_loss=0.2407, over 5735404.90 frames. ], batch size: 89, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:32:37,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=600700.0, ans=0.1 +2024-09-19 05:32:44,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.58 vs. limit=6.0 +2024-09-19 05:32:46,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=600700.0, ans=0.0 +2024-09-19 05:32:57,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=600740.0, ans=0.2 +2024-09-19 05:32:58,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=600740.0, ans=0.125 +2024-09-19 05:33:00,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=600740.0, ans=0.1 +2024-09-19 05:33:00,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=600740.0, ans=0.125 +2024-09-19 05:33:09,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=600780.0, ans=0.125 +2024-09-19 05:33:12,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=600780.0, ans=0.125 +2024-09-19 05:33:24,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=600820.0, ans=0.125 +2024-09-19 05:33:43,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-19 05:33:47,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=600860.0, ans=0.125 +2024-09-19 05:33:50,503 INFO [train.py:1198] (0/2) Epoch 34, batch 900, loss[loss=0.2107, ctc_loss=0.1012, cr_loss=0.3282, attn_decoder_loss=0.2156, over 29618.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1176, cr_loss=0.3584, attn_decoder_loss=0.2409, over 5739140.91 frames. ], batch size: 73, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:33:58,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=600900.0, ans=0.025 +2024-09-19 05:34:42,787 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.48 vs. limit=10.0 +2024-09-19 05:34:43,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=601020.0, ans=0.1 +2024-09-19 05:34:56,770 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.479e+01 9.154e+01 9.598e+01 2.436e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-19 05:35:05,825 INFO [train.py:1198] (0/2) Epoch 34, batch 950, loss[loss=0.2267, ctc_loss=0.1076, cr_loss=0.3298, attn_decoder_loss=0.2326, over 29523.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1174, cr_loss=0.3582, attn_decoder_loss=0.2409, over 5741451.58 frames. ], batch size: 74, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:35:32,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=601140.0, ans=0.0 +2024-09-19 05:35:41,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=601180.0, ans=0.125 +2024-09-19 05:35:51,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=601180.0, ans=0.05 +2024-09-19 05:36:00,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=601220.0, ans=0.125 +2024-09-19 05:36:01,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=601220.0, ans=0.0 +2024-09-19 05:36:15,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=601260.0, ans=0.125 +2024-09-19 05:36:16,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=601260.0, ans=0.125 +2024-09-19 05:36:26,135 INFO [train.py:1198] (0/2) Epoch 34, batch 1000, loss[loss=0.2301, ctc_loss=0.1137, cr_loss=0.3711, attn_decoder_loss=0.2348, over 29504.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1178, cr_loss=0.3591, attn_decoder_loss=0.2414, over 5736331.22 frames. ], batch size: 77, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:36:26,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=601300.0, ans=0.0 +2024-09-19 05:36:36,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=601300.0, ans=0.125 +2024-09-19 05:36:55,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=601380.0, ans=0.0 +2024-09-19 05:37:13,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.61 vs. limit=15.0 +2024-09-19 05:37:16,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=601420.0, ans=0.1 +2024-09-19 05:37:23,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=601420.0, ans=0.125 +2024-09-19 05:37:28,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=601460.0, ans=0.125 +2024-09-19 05:37:32,496 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.521e+01 9.169e+01 9.649e+01 1.531e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-19 05:37:40,870 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.47 vs. limit=15.0 +2024-09-19 05:37:41,629 INFO [train.py:1198] (0/2) Epoch 34, batch 1050, loss[loss=0.2458, ctc_loss=0.1238, cr_loss=0.3741, attn_decoder_loss=0.251, over 29681.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1178, cr_loss=0.359, attn_decoder_loss=0.2409, over 5744512.33 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:38:15,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.61 vs. limit=5.0 +2024-09-19 05:38:29,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=601620.0, ans=0.125 +2024-09-19 05:38:46,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=601660.0, ans=0.025 +2024-09-19 05:38:50,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=601660.0, ans=0.125 +2024-09-19 05:38:58,091 INFO [train.py:1198] (0/2) Epoch 34, batch 1100, loss[loss=0.2319, ctc_loss=0.117, cr_loss=0.3647, attn_decoder_loss=0.2366, over 29428.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1175, cr_loss=0.3585, attn_decoder_loss=0.2407, over 5757943.74 frames. ], batch size: 78, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:39:24,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=601740.0, ans=0.1 +2024-09-19 05:39:35,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=601780.0, ans=0.125 +2024-09-19 05:39:53,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=601820.0, ans=0.125 +2024-09-19 05:39:59,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=601860.0, ans=0.1 +2024-09-19 05:40:06,790 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.458e+01 9.005e+01 9.723e+01 2.492e+02, threshold=1.801e+02, percent-clipped=1.0 +2024-09-19 05:40:16,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=601900.0, ans=0.2 +2024-09-19 05:40:18,194 INFO [train.py:1198] (0/2) Epoch 34, batch 1150, loss[loss=0.2339, ctc_loss=0.1154, cr_loss=0.3698, attn_decoder_loss=0.2388, over 29421.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1178, cr_loss=0.359, attn_decoder_loss=0.2409, over 5754408.21 frames. ], batch size: 78, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:40:38,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=601940.0, ans=0.125 +2024-09-19 05:40:47,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=601980.0, ans=0.125 +2024-09-19 05:41:04,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=602020.0, ans=0.125 +2024-09-19 05:41:31,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=602060.0, ans=0.125 +2024-09-19 05:41:33,784 INFO [train.py:1198] (0/2) Epoch 34, batch 1200, loss[loss=0.2503, ctc_loss=0.1234, cr_loss=0.3678, attn_decoder_loss=0.2563, over 29675.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1181, cr_loss=0.3593, attn_decoder_loss=0.2415, over 5748110.38 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:41:34,646 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.28 vs. limit=15.0 +2024-09-19 05:41:52,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=602140.0, ans=0.125 +2024-09-19 05:41:55,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=602140.0, ans=0.125 +2024-09-19 05:41:55,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=602140.0, ans=0.0 +2024-09-19 05:42:04,516 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:42:04,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=602180.0, ans=0.125 +2024-09-19 05:42:22,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=602220.0, ans=0.07 +2024-09-19 05:42:24,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=602220.0, ans=0.125 +2024-09-19 05:42:24,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=602220.0, ans=0.125 +2024-09-19 05:42:42,361 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.436e+01 8.575e+01 9.202e+01 9.867e+01 2.398e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-19 05:42:49,882 INFO [train.py:1198] (0/2) Epoch 34, batch 1250, loss[loss=0.2427, ctc_loss=0.1183, cr_loss=0.3593, attn_decoder_loss=0.2485, over 29531.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1188, cr_loss=0.3612, attn_decoder_loss=0.2425, over 5775313.63 frames. ], batch size: 92, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:42:59,427 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:43:22,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=8.98 vs. limit=15.0 +2024-09-19 05:43:25,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=602380.0, ans=0.1 +2024-09-19 05:43:30,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=602380.0, ans=0.0 +2024-09-19 05:43:34,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.09 vs. limit=15.0 +2024-09-19 05:44:03,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.28 vs. limit=15.0 +2024-09-19 05:44:10,635 INFO [train.py:1198] (0/2) Epoch 34, batch 1300, loss[loss=0.2421, ctc_loss=0.1156, cr_loss=0.3493, attn_decoder_loss=0.2484, over 28365.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2417, over 5780445.01 frames. ], batch size: 111, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:44:32,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=602540.0, ans=0.1 +2024-09-19 05:44:33,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=602540.0, ans=0.125 +2024-09-19 05:45:10,961 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.23 vs. limit=8.0 +2024-09-19 05:45:18,886 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.383e+01 8.885e+01 9.572e+01 2.098e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 05:45:20,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=602660.0, ans=0.1 +2024-09-19 05:45:25,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:26,503 INFO [train.py:1198] (0/2) Epoch 34, batch 1350, loss[loss=0.2394, ctc_loss=0.1243, cr_loss=0.3943, attn_decoder_loss=0.2434, over 29748.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.118, cr_loss=0.36, attn_decoder_loss=0.2414, over 5796062.09 frames. ], batch size: 81, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:45:26,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:31,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:43,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=602740.0, ans=0.125 +2024-09-19 05:45:48,382 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.30 vs. limit=12.0 +2024-09-19 05:45:58,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=602780.0, ans=0.125 +2024-09-19 05:46:01,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=602780.0, ans=0.125 +2024-09-19 05:46:14,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=602820.0, ans=0.2 +2024-09-19 05:46:17,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=602820.0, ans=0.125 +2024-09-19 05:46:24,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.13 vs. limit=22.5 +2024-09-19 05:46:36,471 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.51 vs. limit=15.0 +2024-09-19 05:46:41,831 INFO [train.py:1198] (0/2) Epoch 34, batch 1400, loss[loss=0.2011, ctc_loss=0.08877, cr_loss=0.2917, attn_decoder_loss=0.2072, over 29612.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1177, cr_loss=0.3593, attn_decoder_loss=0.241, over 5808434.91 frames. ], batch size: 69, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:47:15,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=602980.0, ans=0.0 +2024-09-19 05:47:51,990 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.415e+01 9.038e+01 9.472e+01 1.467e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 05:47:55,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=603060.0, ans=0.1 +2024-09-19 05:47:59,666 INFO [train.py:1198] (0/2) Epoch 34, batch 1450, loss[loss=0.2511, ctc_loss=0.1285, cr_loss=0.3811, attn_decoder_loss=0.2562, over 29476.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.3588, attn_decoder_loss=0.2413, over 5805225.85 frames. ], batch size: 94, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:48:06,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=603100.0, ans=0.1 +2024-09-19 05:48:09,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=603100.0, ans=0.1 +2024-09-19 05:48:32,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=603180.0, ans=0.0 +2024-09-19 05:48:32,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=603180.0, ans=0.2 +2024-09-19 05:48:50,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=603220.0, ans=0.0 +2024-09-19 05:48:50,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=603220.0, ans=0.0 +2024-09-19 05:49:17,722 INFO [train.py:1198] (0/2) Epoch 34, batch 1500, loss[loss=0.2521, ctc_loss=0.1303, cr_loss=0.3866, attn_decoder_loss=0.257, over 29642.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1178, cr_loss=0.3596, attn_decoder_loss=0.2417, over 5805682.33 frames. ], batch size: 86, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:49:23,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.10 vs. limit=6.0 +2024-09-19 05:49:31,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=603340.0, ans=0.125 +2024-09-19 05:49:38,081 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:49:41,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=603340.0, ans=0.09899494936611666 +2024-09-19 05:49:42,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=603340.0, ans=0.125 +2024-09-19 05:50:04,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=603420.0, ans=0.125 +2024-09-19 05:50:05,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=603420.0, ans=0.025 +2024-09-19 05:50:13,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=603420.0, ans=0.2 +2024-09-19 05:50:13,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.09 vs. limit=15.0 +2024-09-19 05:50:26,554 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.633e+01 8.541e+01 9.102e+01 9.733e+01 3.230e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 05:50:28,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.02 vs. limit=15.0 +2024-09-19 05:50:34,066 INFO [train.py:1198] (0/2) Epoch 34, batch 1550, loss[loss=0.2474, ctc_loss=0.1245, cr_loss=0.3674, attn_decoder_loss=0.2529, over 29508.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3595, attn_decoder_loss=0.2415, over 5778841.64 frames. ], batch size: 90, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:50:34,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=603500.0, ans=0.125 +2024-09-19 05:50:51,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=603540.0, ans=0.125 +2024-09-19 05:50:57,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=603540.0, ans=0.0 +2024-09-19 05:51:03,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=603580.0, ans=0.0 +2024-09-19 05:51:12,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=603580.0, ans=0.125 +2024-09-19 05:51:33,964 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:51:37,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.70 vs. limit=15.0 +2024-09-19 05:51:41,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=603660.0, ans=0.125 +2024-09-19 05:51:53,942 INFO [train.py:1198] (0/2) Epoch 34, batch 1600, loss[loss=0.2353, ctc_loss=0.1059, cr_loss=0.3365, attn_decoder_loss=0.2422, over 29664.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1178, cr_loss=0.3592, attn_decoder_loss=0.2414, over 5762492.51 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:52:24,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=603780.0, ans=0.125 +2024-09-19 05:52:34,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=603780.0, ans=0.1 +2024-09-19 05:52:39,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=603820.0, ans=0.0 +2024-09-19 05:52:41,651 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.85 vs. limit=22.5 +2024-09-19 05:53:01,949 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.513e+01 8.541e+01 8.929e+01 9.524e+01 1.976e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 05:53:09,391 INFO [train.py:1198] (0/2) Epoch 34, batch 1650, loss[loss=0.2515, ctc_loss=0.1197, cr_loss=0.3566, attn_decoder_loss=0.2582, over 29712.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.3583, attn_decoder_loss=0.2413, over 5756436.28 frames. ], batch size: 89, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:53:43,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=603980.0, ans=0.1 +2024-09-19 05:54:11,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=604060.0, ans=0.1 +2024-09-19 05:54:14,218 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=604060.0, ans=0.0 +2024-09-19 05:54:17,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=604060.0, ans=0.0 +2024-09-19 05:54:17,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.89 vs. limit=22.5 +2024-09-19 05:54:25,724 INFO [train.py:1198] (0/2) Epoch 34, batch 1700, loss[loss=0.2033, ctc_loss=0.09864, cr_loss=0.3037, attn_decoder_loss=0.2081, over 29593.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3577, attn_decoder_loss=0.2412, over 5779018.29 frames. ], batch size: 69, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:54:41,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-19 05:55:27,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=604260.0, ans=0.2 +2024-09-19 05:55:33,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=604260.0, ans=0.0 +2024-09-19 05:55:35,946 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.641e+01 8.515e+01 9.078e+01 9.556e+01 1.170e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 05:55:45,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.12 vs. limit=12.0 +2024-09-19 05:55:45,694 INFO [train.py:1198] (0/2) Epoch 34, batch 1750, loss[loss=0.1996, ctc_loss=0.0908, cr_loss=0.3046, attn_decoder_loss=0.205, over 29329.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1172, cr_loss=0.3581, attn_decoder_loss=0.2411, over 5787194.69 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:55:57,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.98 vs. limit=22.5 +2024-09-19 05:56:01,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=604340.0, ans=0.125 +2024-09-19 05:56:10,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=604340.0, ans=0.1 +2024-09-19 05:56:18,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.78 vs. limit=15.0 +2024-09-19 05:56:31,893 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.89 vs. limit=10.0 +2024-09-19 05:56:46,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.66 vs. limit=15.0 +2024-09-19 05:57:00,866 INFO [train.py:1198] (0/2) Epoch 34, batch 1800, loss[loss=0.2539, ctc_loss=0.1305, cr_loss=0.3957, attn_decoder_loss=0.2588, over 29697.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1176, cr_loss=0.3593, attn_decoder_loss=0.2415, over 5789075.60 frames. ], batch size: 83, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:57:04,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.56 vs. limit=22.5 +2024-09-19 05:57:10,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=604500.0, ans=0.125 +2024-09-19 05:57:39,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.20 vs. limit=15.0 +2024-09-19 05:57:42,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=604580.0, ans=0.2 +2024-09-19 05:57:51,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=604620.0, ans=0.125 +2024-09-19 05:57:51,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=604620.0, ans=0.125 +2024-09-19 05:58:09,166 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.032e+01 8.453e+01 8.879e+01 9.546e+01 1.316e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 05:58:09,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.52 vs. limit=22.5 +2024-09-19 05:58:16,918 INFO [train.py:1198] (0/2) Epoch 34, batch 1850, loss[loss=0.2554, ctc_loss=0.1361, cr_loss=0.3976, attn_decoder_loss=0.2598, over 29630.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1171, cr_loss=0.3586, attn_decoder_loss=0.2411, over 5795779.71 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:58:23,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=604700.0, ans=0.125 +2024-09-19 05:58:25,117 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.58 vs. limit=22.5 +2024-09-19 05:58:55,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=604780.0, ans=0.125 +2024-09-19 05:59:01,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=604820.0, ans=0.5 +2024-09-19 05:59:36,980 INFO [train.py:1198] (0/2) Epoch 34, batch 1900, loss[loss=0.2465, ctc_loss=0.1252, cr_loss=0.3884, attn_decoder_loss=0.2514, over 29706.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1176, cr_loss=0.3598, attn_decoder_loss=0.2418, over 5803217.40 frames. ], batch size: 89, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:59:42,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-19 05:59:46,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=604900.0, ans=0.125 +2024-09-19 06:00:10,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=604980.0, ans=0.0 +2024-09-19 06:00:15,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=604980.0, ans=0.125 +2024-09-19 06:00:18,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=604980.0, ans=0.5 +2024-09-19 06:00:37,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=605060.0, ans=0.125 +2024-09-19 06:00:39,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=605060.0, ans=0.0 +2024-09-19 06:00:39,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=605060.0, ans=0.1 +2024-09-19 06:00:45,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=605060.0, ans=0.0 +2024-09-19 06:00:45,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=605060.0, ans=0.1 +2024-09-19 06:00:46,819 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.799e+01 9.191e+01 9.672e+01 1.531e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-19 06:00:47,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=605060.0, ans=0.1 +2024-09-19 06:00:52,902 INFO [train.py:1198] (0/2) Epoch 34, batch 1950, loss[loss=0.2361, ctc_loss=0.1257, cr_loss=0.3843, attn_decoder_loss=0.2398, over 29456.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1187, cr_loss=0.3629, attn_decoder_loss=0.2428, over 5818374.05 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:00:54,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=605100.0, ans=0.1 +2024-09-19 06:00:59,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.62 vs. limit=10.0 +2024-09-19 06:01:02,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=605100.0, ans=0.0 +2024-09-19 06:01:08,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=605140.0, ans=0.025 +2024-09-19 06:01:32,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=605180.0, ans=0.1 +2024-09-19 06:01:35,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=605180.0, ans=0.125 +2024-09-19 06:01:46,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=605220.0, ans=10.0 +2024-09-19 06:01:49,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.92 vs. limit=12.0 +2024-09-19 06:01:51,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=605260.0, ans=0.125 +2024-09-19 06:01:56,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=605260.0, ans=0.125 +2024-09-19 06:02:08,569 INFO [train.py:1198] (0/2) Epoch 34, batch 2000, loss[loss=0.206, ctc_loss=0.09164, cr_loss=0.3147, attn_decoder_loss=0.2117, over 29352.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1184, cr_loss=0.3615, attn_decoder_loss=0.2425, over 5797752.63 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:02:10,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=605300.0, ans=0.125 +2024-09-19 06:02:16,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=605300.0, ans=0.125 +2024-09-19 06:03:05,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=605420.0, ans=0.1 +2024-09-19 06:03:13,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=605460.0, ans=0.025 +2024-09-19 06:03:22,872 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.535e+01 9.098e+01 9.559e+01 2.375e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 06:03:26,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=605460.0, ans=0.04949747468305833 +2024-09-19 06:03:28,940 INFO [train.py:1198] (0/2) Epoch 34, batch 2050, loss[loss=0.2142, ctc_loss=0.0997, cr_loss=0.3214, attn_decoder_loss=0.2198, over 29459.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1177, cr_loss=0.36, attn_decoder_loss=0.2415, over 5789702.64 frames. ], batch size: 70, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:03:38,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=605500.0, ans=0.125 +2024-09-19 06:03:39,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=605500.0, ans=0.2 +2024-09-19 06:03:47,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=605540.0, ans=0.1 +2024-09-19 06:03:59,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=605580.0, ans=0.5 +2024-09-19 06:04:00,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=605580.0, ans=0.025 +2024-09-19 06:04:03,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=605580.0, ans=0.0 +2024-09-19 06:04:19,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=605620.0, ans=0.125 +2024-09-19 06:04:44,676 INFO [train.py:1198] (0/2) Epoch 34, batch 2100, loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3527, attn_decoder_loss=0.2406, over 29758.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.117, cr_loss=0.3586, attn_decoder_loss=0.2411, over 5801947.88 frames. ], batch size: 81, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:04:57,014 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:05:06,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=605740.0, ans=0.125 +2024-09-19 06:05:27,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=605780.0, ans=15.0 +2024-09-19 06:05:30,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.09 vs. limit=22.5 +2024-09-19 06:05:31,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=605820.0, ans=0.0 +2024-09-19 06:05:40,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=605820.0, ans=0.125 +2024-09-19 06:05:52,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=605860.0, ans=0.0 +2024-09-19 06:05:53,727 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.782e+01 8.705e+01 9.050e+01 9.610e+01 1.138e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 06:05:54,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.08 vs. limit=15.0 +2024-09-19 06:05:59,953 INFO [train.py:1198] (0/2) Epoch 34, batch 2150, loss[loss=0.2218, ctc_loss=0.1099, cr_loss=0.3613, attn_decoder_loss=0.2262, over 29456.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1165, cr_loss=0.3574, attn_decoder_loss=0.2406, over 5816554.39 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:06:07,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.71 vs. limit=15.0 +2024-09-19 06:06:27,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=605940.0, ans=0.0 +2024-09-19 06:06:30,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=605980.0, ans=0.04949747468305833 +2024-09-19 06:06:33,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=605980.0, ans=0.0 +2024-09-19 06:07:17,779 INFO [train.py:1198] (0/2) Epoch 34, batch 2200, loss[loss=0.246, ctc_loss=0.1226, cr_loss=0.384, attn_decoder_loss=0.2512, over 29612.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1169, cr_loss=0.3578, attn_decoder_loss=0.2409, over 5813414.13 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:07:26,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=606100.0, ans=0.1 +2024-09-19 06:07:39,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=606140.0, ans=0.015 +2024-09-19 06:07:43,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=606140.0, ans=0.0 +2024-09-19 06:07:46,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=606140.0, ans=0.125 +2024-09-19 06:08:31,029 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.748e+01 9.126e+01 9.549e+01 2.332e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-19 06:08:35,716 INFO [train.py:1198] (0/2) Epoch 34, batch 2250, loss[loss=0.2384, ctc_loss=0.1161, cr_loss=0.3536, attn_decoder_loss=0.2441, over 29721.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.117, cr_loss=0.3579, attn_decoder_loss=0.2409, over 5813334.29 frames. ], batch size: 82, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:08:40,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=606300.0, ans=0.125 +2024-09-19 06:08:42,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=606300.0, ans=0.1 +2024-09-19 06:08:52,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=606340.0, ans=0.05 +2024-09-19 06:08:54,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=606340.0, ans=0.125 +2024-09-19 06:09:01,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=606340.0, ans=0.0 +2024-09-19 06:09:03,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=606340.0, ans=0.0 +2024-09-19 06:09:14,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=606380.0, ans=0.0 +2024-09-19 06:09:20,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=606420.0, ans=0.1 +2024-09-19 06:09:20,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=606420.0, ans=0.0 +2024-09-19 06:09:51,737 INFO [train.py:1198] (0/2) Epoch 34, batch 2300, loss[loss=0.215, ctc_loss=0.1026, cr_loss=0.324, attn_decoder_loss=0.2202, over 29752.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1163, cr_loss=0.3563, attn_decoder_loss=0.24, over 5801343.14 frames. ], batch size: 72, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:09:54,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=606500.0, ans=0.2 +2024-09-19 06:10:13,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=606540.0, ans=0.0 +2024-09-19 06:10:22,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=606580.0, ans=0.125 +2024-09-19 06:10:31,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=606580.0, ans=0.0 +2024-09-19 06:10:31,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=606580.0, ans=0.1 +2024-09-19 06:11:00,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=606660.0, ans=0.125 +2024-09-19 06:11:02,991 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.476e+01 8.516e+01 9.072e+01 9.584e+01 2.753e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 06:11:07,527 INFO [train.py:1198] (0/2) Epoch 34, batch 2350, loss[loss=0.2409, ctc_loss=0.123, cr_loss=0.3651, attn_decoder_loss=0.2459, over 29665.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1164, cr_loss=0.3566, attn_decoder_loss=0.2402, over 5806138.52 frames. ], batch size: 83, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:11:17,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=606700.0, ans=0.125 +2024-09-19 06:11:24,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=606700.0, ans=0.125 +2024-09-19 06:11:50,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.51 vs. limit=22.5 +2024-09-19 06:12:14,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=606860.0, ans=0.125 +2024-09-19 06:12:17,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.74 vs. limit=10.0 +2024-09-19 06:12:23,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=606860.0, ans=0.025 +2024-09-19 06:12:27,789 INFO [train.py:1198] (0/2) Epoch 34, batch 2400, loss[loss=0.2356, ctc_loss=0.1216, cr_loss=0.3724, attn_decoder_loss=0.24, over 29545.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1173, cr_loss=0.3586, attn_decoder_loss=0.2407, over 5809527.64 frames. ], batch size: 76, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:12:48,192 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.86 vs. limit=15.0 +2024-09-19 06:12:49,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=606940.0, ans=0.125 +2024-09-19 06:13:12,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.74 vs. limit=22.5 +2024-09-19 06:13:40,174 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.501e+01 8.985e+01 9.485e+01 2.487e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 06:13:42,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=607100.0, ans=0.125 +2024-09-19 06:13:43,277 INFO [train.py:1198] (0/2) Epoch 34, batch 2450, loss[loss=0.2482, ctc_loss=0.1265, cr_loss=0.388, attn_decoder_loss=0.2531, over 29669.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.118, cr_loss=0.3595, attn_decoder_loss=0.2416, over 5785645.05 frames. ], batch size: 82, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:13:49,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=607100.0, ans=0.1 +2024-09-19 06:14:03,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=607140.0, ans=0.025 +2024-09-19 06:14:11,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.06 vs. limit=15.0 +2024-09-19 06:14:20,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=607180.0, ans=0.0 +2024-09-19 06:14:32,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=607220.0, ans=0.125 +2024-09-19 06:14:42,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=607260.0, ans=0.1 +2024-09-19 06:14:54,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=607260.0, ans=0.1 +2024-09-19 06:14:59,118 INFO [train.py:1198] (0/2) Epoch 34, batch 2500, loss[loss=0.2451, ctc_loss=0.1289, cr_loss=0.381, attn_decoder_loss=0.2496, over 29611.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1179, cr_loss=0.3595, attn_decoder_loss=0.2413, over 5796074.54 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:15:10,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=607300.0, ans=0.125 +2024-09-19 06:15:20,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=607340.0, ans=0.2 +2024-09-19 06:15:46,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=607380.0, ans=0.1 +2024-09-19 06:15:54,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=607420.0, ans=0.125 +2024-09-19 06:16:16,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.53 vs. limit=15.0 +2024-09-19 06:16:16,476 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.449e+01 8.900e+01 9.375e+01 2.079e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 06:16:19,647 INFO [train.py:1198] (0/2) Epoch 34, batch 2550, loss[loss=0.2072, ctc_loss=0.1032, cr_loss=0.3173, attn_decoder_loss=0.2117, over 29355.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1181, cr_loss=0.36, attn_decoder_loss=0.2416, over 5799320.07 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:16:30,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=607500.0, ans=0.125 +2024-09-19 06:16:53,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.44 vs. limit=22.5 +2024-09-19 06:16:54,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=607580.0, ans=0.0 +2024-09-19 06:16:57,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=607580.0, ans=0.125 +2024-09-19 06:17:05,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.37 vs. limit=22.5 +2024-09-19 06:17:08,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=607620.0, ans=0.1 +2024-09-19 06:17:35,380 INFO [train.py:1198] (0/2) Epoch 34, batch 2600, loss[loss=0.2272, ctc_loss=0.1051, cr_loss=0.331, attn_decoder_loss=0.2334, over 29479.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1178, cr_loss=0.3594, attn_decoder_loss=0.2417, over 5794736.32 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:17:38,983 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.52 vs. limit=22.5 +2024-09-19 06:17:43,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.19 vs. limit=22.5 +2024-09-19 06:18:14,575 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:18:43,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=607860.0, ans=0.125 +2024-09-19 06:18:47,317 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.717e+01 9.275e+01 9.753e+01 1.560e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 06:18:49,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=607900.0, ans=0.0 +2024-09-19 06:18:50,258 INFO [train.py:1198] (0/2) Epoch 34, batch 2650, loss[loss=0.2517, ctc_loss=0.1265, cr_loss=0.3824, attn_decoder_loss=0.2572, over 29195.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1178, cr_loss=0.3594, attn_decoder_loss=0.242, over 5801353.06 frames. ], batch size: 100, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:18:57,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.76 vs. limit=22.5 +2024-09-19 06:18:58,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=607900.0, ans=0.0 +2024-09-19 06:19:31,349 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-152000.pt +2024-09-19 06:20:04,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=608060.0, ans=0.0 +2024-09-19 06:20:10,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=608060.0, ans=0.0 +2024-09-19 06:20:17,682 INFO [train.py:1198] (0/2) Epoch 34, batch 2700, loss[loss=0.2365, ctc_loss=0.1128, cr_loss=0.3434, attn_decoder_loss=0.2426, over 29515.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1183, cr_loss=0.3606, attn_decoder_loss=0.2422, over 5795420.10 frames. ], batch size: 87, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:20:25,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=608100.0, ans=0.1 +2024-09-19 06:20:32,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=608140.0, ans=0.2 +2024-09-19 06:21:03,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=608220.0, ans=0.125 +2024-09-19 06:21:15,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.54 vs. limit=22.5 +2024-09-19 06:21:30,487 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.546e+01 9.039e+01 9.900e+01 1.946e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 06:21:33,579 INFO [train.py:1198] (0/2) Epoch 34, batch 2750, loss[loss=0.2294, ctc_loss=0.1112, cr_loss=0.3455, attn_decoder_loss=0.2349, over 29526.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1172, cr_loss=0.3582, attn_decoder_loss=0.241, over 5794099.90 frames. ], batch size: 75, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:22:02,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=608380.0, ans=0.125 +2024-09-19 06:22:10,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=608380.0, ans=0.1 +2024-09-19 06:22:11,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.62 vs. limit=22.5 +2024-09-19 06:22:17,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=608420.0, ans=0.0 +2024-09-19 06:22:33,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=15.0 +2024-09-19 06:22:36,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=608460.0, ans=0.125 +2024-09-19 06:22:37,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=608460.0, ans=0.1 +2024-09-19 06:22:38,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.20 vs. limit=15.0 +2024-09-19 06:22:42,504 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.01 vs. limit=15.0 +2024-09-19 06:22:42,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-19 06:22:47,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.17 vs. limit=15.0 +2024-09-19 06:22:51,655 INFO [train.py:1198] (0/2) Epoch 34, batch 2800, loss[loss=0.2591, ctc_loss=0.1473, cr_loss=0.3894, attn_decoder_loss=0.2629, over 20100.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3592, attn_decoder_loss=0.2413, over 5774374.08 frames. ], batch size: 209, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:23:02,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=608500.0, ans=0.0 +2024-09-19 06:23:02,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=608500.0, ans=0.0 +2024-09-19 06:23:07,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=608540.0, ans=0.0 +2024-09-19 06:23:07,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=608540.0, ans=0.125 +2024-09-19 06:23:19,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=608540.0, ans=0.1 +2024-09-19 06:23:37,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=608620.0, ans=0.1 +2024-09-19 06:23:44,095 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-19 06:23:48,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=608620.0, ans=0.125 +2024-09-19 06:24:07,377 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.476e+01 9.039e+01 9.642e+01 3.312e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 06:24:08,977 INFO [train.py:1198] (0/2) Epoch 34, batch 2850, loss[loss=0.2205, ctc_loss=0.1014, cr_loss=0.321, attn_decoder_loss=0.2266, over 29509.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1181, cr_loss=0.3601, attn_decoder_loss=0.2418, over 5761306.59 frames. ], batch size: 77, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:24:09,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=608700.0, ans=0.125 +2024-09-19 06:24:49,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=608780.0, ans=0.125 +2024-09-19 06:24:59,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=608820.0, ans=0.0 +2024-09-19 06:25:05,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=608820.0, ans=0.125 +2024-09-19 06:25:17,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=608860.0, ans=0.0 +2024-09-19 06:25:20,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=608860.0, ans=0.0 +2024-09-19 06:25:25,015 INFO [train.py:1198] (0/2) Epoch 34, batch 2900, loss[loss=0.2308, ctc_loss=0.1137, cr_loss=0.3512, attn_decoder_loss=0.236, over 29422.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1187, cr_loss=0.3616, attn_decoder_loss=0.2429, over 5786338.83 frames. ], batch size: 79, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:25:37,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=608900.0, ans=0.0 +2024-09-19 06:25:40,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.34 vs. limit=15.0 +2024-09-19 06:25:43,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=608940.0, ans=0.125 +2024-09-19 06:25:57,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=608980.0, ans=0.1 +2024-09-19 06:25:58,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=608980.0, ans=0.125 +2024-09-19 06:26:27,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=609060.0, ans=0.0 +2024-09-19 06:26:41,354 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.332e+01 8.811e+01 9.212e+01 1.381e+02, threshold=1.762e+02, percent-clipped=0.0 +2024-09-19 06:26:42,923 INFO [train.py:1198] (0/2) Epoch 34, batch 2950, loss[loss=0.2267, ctc_loss=0.1206, cr_loss=0.3745, attn_decoder_loss=0.2301, over 29522.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1176, cr_loss=0.3594, attn_decoder_loss=0.2414, over 5781504.44 frames. ], batch size: 75, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:26:46,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=609100.0, ans=0.1 +2024-09-19 06:26:47,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=609100.0, ans=0.125 +2024-09-19 06:26:52,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=609100.0, ans=0.0 +2024-09-19 06:27:16,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.04 vs. limit=15.0 +2024-09-19 06:27:20,838 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.15 vs. limit=6.0 +2024-09-19 06:27:21,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=609180.0, ans=0.0 +2024-09-19 06:27:31,389 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.14 vs. limit=6.0 +2024-09-19 06:27:37,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.02 vs. limit=15.0 +2024-09-19 06:27:43,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=609220.0, ans=0.125 +2024-09-19 06:28:00,851 INFO [train.py:1198] (0/2) Epoch 34, batch 3000, loss[loss=0.2349, ctc_loss=0.1149, cr_loss=0.3591, attn_decoder_loss=0.2403, over 29755.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1175, cr_loss=0.3591, attn_decoder_loss=0.2411, over 5781958.39 frames. ], batch size: 81, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:28:00,852 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 06:28:19,438 INFO [train.py:1230] (0/2) Epoch 34, validation: loss=0.2118, ctc_loss=0.03645, cr_loss=6.088e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 06:28:19,438 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 06:28:25,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=609300.0, ans=0.0 +2024-09-19 06:28:39,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=609340.0, ans=0.0 +2024-09-19 06:28:39,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=609340.0, ans=0.0 +2024-09-19 06:28:47,262 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:28:48,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=609380.0, ans=0.1 +2024-09-19 06:28:56,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-19 06:29:33,591 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.609e+01 9.134e+01 9.597e+01 3.076e+02, threshold=1.827e+02, percent-clipped=2.0 +2024-09-19 06:29:35,190 INFO [train.py:1198] (0/2) Epoch 34, batch 3050, loss[loss=0.2252, ctc_loss=0.1083, cr_loss=0.3428, attn_decoder_loss=0.2305, over 29534.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.3608, attn_decoder_loss=0.2418, over 5775736.45 frames. ], batch size: 76, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:29:43,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=609500.0, ans=0.5 +2024-09-19 06:29:59,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=609540.0, ans=0.125 +2024-09-19 06:30:05,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=609580.0, ans=0.0 +2024-09-19 06:30:07,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=609580.0, ans=0.125 +2024-09-19 06:30:23,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=609620.0, ans=0.125 +2024-09-19 06:30:45,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=609660.0, ans=0.0 +2024-09-19 06:30:45,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=609660.0, ans=0.0 +2024-09-19 06:30:53,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=609700.0, ans=0.125 +2024-09-19 06:30:54,917 INFO [train.py:1198] (0/2) Epoch 34, batch 3100, loss[loss=0.2504, ctc_loss=0.1249, cr_loss=0.3771, attn_decoder_loss=0.256, over 29277.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1178, cr_loss=0.3599, attn_decoder_loss=0.2415, over 5774949.34 frames. ], batch size: 100, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:30:55,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=609700.0, ans=0.0 +2024-09-19 06:30:56,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=609700.0, ans=0.125 +2024-09-19 06:30:56,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=609700.0, ans=0.125 +2024-09-19 06:31:40,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=609820.0, ans=0.125 +2024-09-19 06:32:09,148 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.390e+01 8.517e+01 8.968e+01 9.546e+01 1.931e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 06:32:10,672 INFO [train.py:1198] (0/2) Epoch 34, batch 3150, loss[loss=0.2445, ctc_loss=0.1183, cr_loss=0.3632, attn_decoder_loss=0.2504, over 28906.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3594, attn_decoder_loss=0.2414, over 5781962.20 frames. ], batch size: 104, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:32:15,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=609900.0, ans=0.0 +2024-09-19 06:32:15,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=609900.0, ans=0.125 +2024-09-19 06:32:15,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=609900.0, ans=0.2 +2024-09-19 06:32:26,207 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:32:29,683 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.38 vs. limit=15.0 +2024-09-19 06:32:45,756 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:32:47,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=609980.0, ans=0.0 +2024-09-19 06:32:54,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=610020.0, ans=0.0 +2024-09-19 06:33:00,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=610020.0, ans=0.0 +2024-09-19 06:33:22,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.87 vs. limit=6.0 +2024-09-19 06:33:25,799 INFO [train.py:1198] (0/2) Epoch 34, batch 3200, loss[loss=0.2334, ctc_loss=0.1149, cr_loss=0.3571, attn_decoder_loss=0.2386, over 29404.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1174, cr_loss=0.3585, attn_decoder_loss=0.2411, over 5792859.14 frames. ], batch size: 79, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:34:24,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=610220.0, ans=0.125 +2024-09-19 06:34:26,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=610220.0, ans=0.1 +2024-09-19 06:34:27,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=610260.0, ans=0.125 +2024-09-19 06:34:42,401 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 8.499e+01 9.052e+01 9.605e+01 1.287e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 06:34:43,869 INFO [train.py:1198] (0/2) Epoch 34, batch 3250, loss[loss=0.2494, ctc_loss=0.1257, cr_loss=0.3653, attn_decoder_loss=0.255, over 29710.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1178, cr_loss=0.3597, attn_decoder_loss=0.2417, over 5799477.54 frames. ], batch size: 84, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:35:03,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=610340.0, ans=0.125 +2024-09-19 06:35:13,808 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.36 vs. limit=15.0 +2024-09-19 06:35:16,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=610380.0, ans=0.0 +2024-09-19 06:35:24,896 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.74 vs. limit=15.0 +2024-09-19 06:35:42,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=610420.0, ans=0.125 +2024-09-19 06:35:43,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.74 vs. limit=6.0 +2024-09-19 06:35:57,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=610460.0, ans=0.125 +2024-09-19 06:36:01,921 INFO [train.py:1198] (0/2) Epoch 34, batch 3300, loss[loss=0.2489, ctc_loss=0.1238, cr_loss=0.3492, attn_decoder_loss=0.2551, over 28385.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1169, cr_loss=0.3575, attn_decoder_loss=0.2405, over 5797321.67 frames. ], batch size: 111, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:36:27,039 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.78 vs. limit=15.0 +2024-09-19 06:36:34,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=610580.0, ans=10.0 +2024-09-19 06:36:44,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=610580.0, ans=0.125 +2024-09-19 06:36:55,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=610620.0, ans=0.125 +2024-09-19 06:36:59,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=610620.0, ans=0.125 +2024-09-19 06:37:01,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=610660.0, ans=0.125 +2024-09-19 06:37:17,120 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.592e+01 9.077e+01 9.630e+01 2.771e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 06:37:17,142 INFO [train.py:1198] (0/2) Epoch 34, batch 3350, loss[loss=0.2474, ctc_loss=0.1272, cr_loss=0.3654, attn_decoder_loss=0.2526, over 28898.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3583, attn_decoder_loss=0.2414, over 5775260.55 frames. ], batch size: 104, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:37:25,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=610700.0, ans=0.125 +2024-09-19 06:37:31,172 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:37:58,307 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.93 vs. limit=22.5 +2024-09-19 06:38:06,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=610820.0, ans=0.2 +2024-09-19 06:38:18,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=610860.0, ans=0.1 +2024-09-19 06:38:21,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=610860.0, ans=0.1 +2024-09-19 06:38:29,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=610860.0, ans=0.125 +2024-09-19 06:38:35,445 INFO [train.py:1198] (0/2) Epoch 34, batch 3400, loss[loss=0.2121, ctc_loss=0.1067, cr_loss=0.3267, attn_decoder_loss=0.2166, over 29370.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.359, attn_decoder_loss=0.2414, over 5767374.81 frames. ], batch size: 67, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:38:41,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=610900.0, ans=0.2 +2024-09-19 06:38:42,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.28 vs. limit=22.5 +2024-09-19 06:38:51,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=610940.0, ans=0.125 +2024-09-19 06:38:54,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=610940.0, ans=0.0 +2024-09-19 06:38:59,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=610940.0, ans=0.025 +2024-09-19 06:39:53,416 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.710e+01 9.261e+01 9.751e+01 2.657e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 06:39:53,439 INFO [train.py:1198] (0/2) Epoch 34, batch 3450, loss[loss=0.2451, ctc_loss=0.1237, cr_loss=0.3472, attn_decoder_loss=0.2509, over 28225.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1181, cr_loss=0.3595, attn_decoder_loss=0.2417, over 5775752.82 frames. ], batch size: 111, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:39:55,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.15 vs. limit=6.0 +2024-09-19 06:40:03,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=611100.0, ans=0.125 +2024-09-19 06:40:32,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=611180.0, ans=0.1 +2024-09-19 06:40:36,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=611180.0, ans=0.95 +2024-09-19 06:40:41,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=611220.0, ans=0.0 +2024-09-19 06:40:51,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=611220.0, ans=0.1 +2024-09-19 06:41:09,771 INFO [train.py:1198] (0/2) Epoch 34, batch 3500, loss[loss=0.2043, ctc_loss=0.09207, cr_loss=0.2917, attn_decoder_loss=0.2103, over 29343.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1177, cr_loss=0.3583, attn_decoder_loss=0.2411, over 5776833.37 frames. ], batch size: 71, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:41:10,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=611300.0, ans=0.125 +2024-09-19 06:41:50,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=611380.0, ans=0.125 +2024-09-19 06:42:14,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=611460.0, ans=0.125 +2024-09-19 06:42:20,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=611460.0, ans=0.0 +2024-09-19 06:42:25,532 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.57 vs. limit=22.5 +2024-09-19 06:42:26,135 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.638e+01 9.255e+01 9.995e+01 3.984e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-19 06:42:26,157 INFO [train.py:1198] (0/2) Epoch 34, batch 3550, loss[loss=0.246, ctc_loss=0.1184, cr_loss=0.3623, attn_decoder_loss=0.2521, over 29714.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1173, cr_loss=0.3579, attn_decoder_loss=0.2411, over 5781753.99 frames. ], batch size: 89, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:42:33,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=611500.0, ans=0.1 +2024-09-19 06:42:45,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=611540.0, ans=0.0 +2024-09-19 06:42:58,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=611580.0, ans=0.125 +2024-09-19 06:43:14,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.34 vs. limit=15.0 +2024-09-19 06:43:42,115 INFO [train.py:1198] (0/2) Epoch 34, batch 3600, loss[loss=0.2252, ctc_loss=0.1095, cr_loss=0.3478, attn_decoder_loss=0.2304, over 29487.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.117, cr_loss=0.3576, attn_decoder_loss=0.2412, over 5791255.91 frames. ], batch size: 77, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:44:04,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=611740.0, ans=0.035 +2024-09-19 06:44:33,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=611820.0, ans=0.125 +2024-09-19 06:44:33,698 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.71 vs. limit=15.0 +2024-09-19 06:44:41,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=611860.0, ans=0.1 +2024-09-19 06:44:56,827 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.640e+01 9.081e+01 9.603e+01 2.325e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 06:44:56,853 INFO [train.py:1198] (0/2) Epoch 34, batch 3650, loss[loss=0.2478, ctc_loss=0.1263, cr_loss=0.376, attn_decoder_loss=0.2529, over 29529.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1167, cr_loss=0.3574, attn_decoder_loss=0.2406, over 5793945.81 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:45:12,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.99 vs. limit=12.0 +2024-09-19 06:45:27,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=611980.0, ans=0.0 +2024-09-19 06:45:29,474 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-19 06:45:58,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=612060.0, ans=0.125 +2024-09-19 06:46:11,758 INFO [train.py:1198] (0/2) Epoch 34, batch 3700, loss[loss=0.2469, ctc_loss=0.121, cr_loss=0.3542, attn_decoder_loss=0.253, over 29713.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1171, cr_loss=0.3579, attn_decoder_loss=0.2409, over 5804572.50 frames. ], batch size: 84, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:46:24,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=612100.0, ans=0.0 +2024-09-19 06:46:45,258 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.46 vs. limit=15.0 +2024-09-19 06:46:52,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=612180.0, ans=0.1 +2024-09-19 06:47:02,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=612220.0, ans=0.0 +2024-09-19 06:47:11,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=612260.0, ans=0.05 +2024-09-19 06:47:25,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.56 vs. limit=15.0 +2024-09-19 06:47:26,247 INFO [train.py:1198] (0/2) Epoch 34, batch 3750, loss[loss=0.2075, ctc_loss=0.09346, cr_loss=0.3142, attn_decoder_loss=0.2132, over 29385.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.117, cr_loss=0.3576, attn_decoder_loss=0.2408, over 5808777.09 frames. ], batch size: 67, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:47:27,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.454e+01 8.933e+01 9.373e+01 1.602e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 06:47:30,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=612300.0, ans=0.2 +2024-09-19 06:47:32,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=612300.0, ans=0.125 +2024-09-19 06:47:44,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=612340.0, ans=0.125 +2024-09-19 06:48:08,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=612380.0, ans=0.2 +2024-09-19 06:48:17,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.80 vs. limit=15.0 +2024-09-19 06:48:26,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=612460.0, ans=0.125 +2024-09-19 06:48:42,284 INFO [train.py:1198] (0/2) Epoch 34, batch 3800, loss[loss=0.2511, ctc_loss=0.1253, cr_loss=0.3749, attn_decoder_loss=0.2567, over 29628.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1168, cr_loss=0.3576, attn_decoder_loss=0.2404, over 5799262.89 frames. ], batch size: 86, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:48:42,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=612500.0, ans=0.2 +2024-09-19 06:49:01,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=612540.0, ans=0.2 +2024-09-19 06:49:05,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=612540.0, ans=0.1 +2024-09-19 06:49:08,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.18 vs. limit=22.5 +2024-09-19 06:49:39,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=612620.0, ans=0.125 +2024-09-19 06:49:48,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=612660.0, ans=10.0 +2024-09-19 06:49:58,141 INFO [train.py:1198] (0/2) Epoch 34, batch 3850, loss[loss=0.2471, ctc_loss=0.1164, cr_loss=0.3667, attn_decoder_loss=0.2535, over 29231.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1165, cr_loss=0.3568, attn_decoder_loss=0.2402, over 5812783.31 frames. ], batch size: 100, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:49:59,607 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.389e+01 8.951e+01 9.412e+01 1.497e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 06:50:16,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=612740.0, ans=0.0 +2024-09-19 06:50:16,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=612740.0, ans=0.1 +2024-09-19 06:50:19,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=612740.0, ans=0.0 +2024-09-19 06:50:22,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=612740.0, ans=0.0 +2024-09-19 06:50:46,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=612820.0, ans=0.0 +2024-09-19 06:51:07,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=612860.0, ans=0.0 +2024-09-19 06:51:12,887 INFO [train.py:1198] (0/2) Epoch 34, batch 3900, loss[loss=0.2449, ctc_loss=0.1235, cr_loss=0.3596, attn_decoder_loss=0.2504, over 29667.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1171, cr_loss=0.3579, attn_decoder_loss=0.241, over 5816973.05 frames. ], batch size: 86, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:51:17,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=612900.0, ans=0.2 +2024-09-19 06:51:36,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_na.min_abs, batch_count=612940.0, ans=0.02 +2024-09-19 06:51:36,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=612940.0, ans=0.125 +2024-09-19 06:51:47,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=612980.0, ans=0.125 +2024-09-19 06:51:57,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=613020.0, ans=0.2 +2024-09-19 06:52:10,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=613060.0, ans=0.2 +2024-09-19 06:52:26,846 INFO [train.py:1198] (0/2) Epoch 34, batch 3950, loss[loss=0.2507, ctc_loss=0.131, cr_loss=0.3736, attn_decoder_loss=0.2557, over 29469.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1172, cr_loss=0.3583, attn_decoder_loss=0.2413, over 5836260.80 frames. ], batch size: 97, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:52:28,321 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.556e+01 9.009e+01 9.395e+01 1.816e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 06:52:29,161 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.55 vs. limit=6.0 +2024-09-19 06:52:30,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=613100.0, ans=0.125 +2024-09-19 06:52:32,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=613100.0, ans=0.125 +2024-09-19 06:52:56,627 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:53:42,106 INFO [train.py:1198] (0/2) Epoch 34, batch 4000, loss[loss=0.2209, ctc_loss=0.1051, cr_loss=0.3291, attn_decoder_loss=0.2265, over 29524.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1173, cr_loss=0.3584, attn_decoder_loss=0.2414, over 5813036.17 frames. ], batch size: 74, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:53:52,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=613300.0, ans=0.025 +2024-09-19 06:53:55,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=613340.0, ans=0.0 +2024-09-19 06:54:22,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=613380.0, ans=0.0 +2024-09-19 06:54:33,674 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.45 vs. limit=10.0 +2024-09-19 06:54:47,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=613460.0, ans=0.125 +2024-09-19 06:54:49,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=613460.0, ans=6.0 +2024-09-19 06:54:55,423 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.81 vs. limit=15.0 +2024-09-19 06:54:57,764 INFO [train.py:1198] (0/2) Epoch 34, batch 4050, loss[loss=0.2581, ctc_loss=0.1477, cr_loss=0.3782, attn_decoder_loss=0.262, over 20349.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1175, cr_loss=0.3583, attn_decoder_loss=0.2412, over 5796039.38 frames. ], batch size: 209, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:55:00,713 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.471e+01 9.121e+01 9.639e+01 2.999e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 06:55:00,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=613500.0, ans=0.125 +2024-09-19 06:55:14,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=613540.0, ans=0.125 +2024-09-19 06:55:30,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=613580.0, ans=0.1 +2024-09-19 06:55:38,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=613580.0, ans=0.125 +2024-09-19 06:56:00,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=613660.0, ans=0.125 +2024-09-19 06:56:11,834 INFO [train.py:1198] (0/2) Epoch 34, batch 4100, loss[loss=0.2538, ctc_loss=0.1374, cr_loss=0.4112, attn_decoder_loss=0.2575, over 29490.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1178, cr_loss=0.359, attn_decoder_loss=0.2415, over 5791912.71 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:56:13,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=613700.0, ans=0.0 +2024-09-19 06:57:00,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=613820.0, ans=0.125 +2024-09-19 06:57:00,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=613820.0, ans=0.2 +2024-09-19 06:57:15,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=613860.0, ans=0.0 +2024-09-19 06:57:24,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=613900.0, ans=0.0 +2024-09-19 06:57:25,788 INFO [train.py:1198] (0/2) Epoch 34, batch 4150, loss[loss=0.2313, ctc_loss=0.1147, cr_loss=0.3529, attn_decoder_loss=0.2364, over 29493.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1176, cr_loss=0.3587, attn_decoder_loss=0.2412, over 5796813.55 frames. ], batch size: 77, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:57:28,807 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.811e+01 8.506e+01 8.901e+01 9.635e+01 1.346e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 06:57:33,722 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:57:38,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=613900.0, ans=0.125 +2024-09-19 06:57:43,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=613940.0, ans=0.0 +2024-09-19 06:57:55,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=613980.0, ans=0.0 +2024-09-19 06:57:59,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=613980.0, ans=0.125 +2024-09-19 06:58:24,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=614060.0, ans=0.2 +2024-09-19 06:58:27,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=614060.0, ans=0.1 +2024-09-19 06:58:40,903 INFO [train.py:1198] (0/2) Epoch 34, batch 4200, loss[loss=0.2402, ctc_loss=0.1176, cr_loss=0.3527, attn_decoder_loss=0.246, over 29478.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1176, cr_loss=0.3587, attn_decoder_loss=0.2415, over 5798309.20 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:58:41,230 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:58:44,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=614100.0, ans=0.1 +2024-09-19 06:58:53,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.87 vs. limit=15.0 +2024-09-19 06:59:03,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=614140.0, ans=0.125 +2024-09-19 06:59:11,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=614180.0, ans=0.0 +2024-09-19 06:59:13,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.06 vs. limit=22.5 +2024-09-19 06:59:55,719 INFO [train.py:1198] (0/2) Epoch 34, batch 4250, loss[loss=0.2207, ctc_loss=0.1032, cr_loss=0.3243, attn_decoder_loss=0.2265, over 29508.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1172, cr_loss=0.3584, attn_decoder_loss=0.2417, over 5804980.99 frames. ], batch size: 74, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:59:58,622 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.615e+01 8.496e+01 8.853e+01 9.381e+01 2.444e+02, threshold=1.771e+02, percent-clipped=1.0 +2024-09-19 07:00:09,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=614340.0, ans=0.0 +2024-09-19 07:00:49,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=614420.0, ans=0.125 +2024-09-19 07:00:56,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=614460.0, ans=0.1 +2024-09-19 07:01:02,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=614460.0, ans=0.125 +2024-09-19 07:01:05,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=614460.0, ans=0.125 +2024-09-19 07:01:09,461 INFO [train.py:1198] (0/2) Epoch 34, batch 4300, loss[loss=0.251, ctc_loss=0.1277, cr_loss=0.3973, attn_decoder_loss=0.2559, over 29533.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1172, cr_loss=0.3583, attn_decoder_loss=0.2418, over 5793731.67 frames. ], batch size: 87, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:01:11,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=614500.0, ans=0.125 +2024-09-19 07:01:18,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=614500.0, ans=0.125 +2024-09-19 07:01:59,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=614620.0, ans=0.0 +2024-09-19 07:02:22,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=614660.0, ans=0.2 +2024-09-19 07:02:25,331 INFO [train.py:1198] (0/2) Epoch 34, batch 4350, loss[loss=0.2549, ctc_loss=0.1303, cr_loss=0.3852, attn_decoder_loss=0.2601, over 29510.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1199, cr_loss=0.3639, attn_decoder_loss=0.245, over 5796555.71 frames. ], batch size: 97, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:02:28,308 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.836e+01 9.274e+01 9.839e+01 5.976e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 07:02:42,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.78 vs. limit=15.0 +2024-09-19 07:02:48,204 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.58 vs. limit=22.5 +2024-09-19 07:02:57,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=614780.0, ans=0.125 +2024-09-19 07:03:12,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.43 vs. limit=15.0 +2024-09-19 07:03:15,674 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.52 vs. limit=15.0 +2024-09-19 07:03:19,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=614820.0, ans=0.1 +2024-09-19 07:03:27,981 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.06 vs. limit=22.5 +2024-09-19 07:03:38,642 INFO [train.py:1198] (0/2) Epoch 34, batch 4400, loss[loss=0.2302, ctc_loss=0.1032, cr_loss=0.3193, attn_decoder_loss=0.2373, over 27349.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1207, cr_loss=0.3654, attn_decoder_loss=0.2466, over 5767013.94 frames. ], batch size: 124, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 07:03:39,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.02 vs. limit=15.0 +2024-09-19 07:03:41,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=614900.0, ans=0.1 +2024-09-19 07:03:43,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=614900.0, ans=0.0 +2024-09-19 07:03:55,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=614940.0, ans=0.125 +2024-09-19 07:04:10,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=614980.0, ans=0.0 +2024-09-19 07:04:22,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.28 vs. limit=15.0 +2024-09-19 07:04:31,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=615020.0, ans=0.2 +2024-09-19 07:04:35,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=615020.0, ans=0.0 +2024-09-19 07:04:35,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=615020.0, ans=0.0 +2024-09-19 07:04:53,554 INFO [train.py:1198] (0/2) Epoch 34, batch 4450, loss[loss=0.2565, ctc_loss=0.144, cr_loss=0.3897, attn_decoder_loss=0.2603, over 20484.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1245, cr_loss=0.3712, attn_decoder_loss=0.2487, over 5580829.15 frames. ], batch size: 210, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 07:04:55,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=615100.0, ans=0.2 +2024-09-19 07:04:56,493 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.888e+01 9.045e+01 9.501e+01 1.052e+02 3.870e+02, threshold=1.900e+02, percent-clipped=1.0 +2024-09-19 07:05:05,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=615100.0, ans=0.125 +2024-09-19 07:05:25,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=615180.0, ans=0.2 +2024-09-19 07:05:49,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=615220.0, ans=0.125 +2024-09-19 07:05:50,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=615220.0, ans=0.1 +2024-09-19 07:05:54,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=615260.0, ans=0.125 +2024-09-19 07:06:04,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.03 vs. limit=15.0 +2024-09-19 07:06:09,419 INFO [train.py:1198] (0/2) Epoch 34, batch 4500, loss[loss=0.2559, ctc_loss=0.1443, cr_loss=0.3699, attn_decoder_loss=0.2601, over 19663.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1278, cr_loss=0.3739, attn_decoder_loss=0.2508, over 5239247.98 frames. ], batch size: 210, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:06:17,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=615300.0, ans=0.025 +2024-09-19 07:06:34,741 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.43 vs. limit=22.5 +2024-09-19 07:06:44,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=615380.0, ans=0.125 +2024-09-19 07:06:47,043 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-34.pt +2024-09-19 07:07:33,727 INFO [train.py:1198] (0/2) Epoch 35, batch 0, loss[loss=0.2093, ctc_loss=0.09147, cr_loss=0.305, attn_decoder_loss=0.2156, over 29640.00 frames. ], tot_loss[loss=0.2093, ctc_loss=0.09147, cr_loss=0.305, attn_decoder_loss=0.2156, over 29640.00 frames. ], batch size: 73, lr: 3.18e-03, grad_scale: 16.0 +2024-09-19 07:07:33,728 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 07:07:52,111 INFO [train.py:1230] (0/2) Epoch 35, validation: loss=0.2125, ctc_loss=0.03615, cr_loss=6.293e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 07:07:52,112 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 07:07:53,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=10.68 vs. limit=10.0 +2024-09-19 07:08:23,415 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.75 vs. limit=10.0 +2024-09-19 07:08:36,042 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.990e+01 1.018e+02 1.116e+02 1.176e+02 2.643e+02, threshold=2.232e+02, percent-clipped=1.0 +2024-09-19 07:08:59,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=615560.0, ans=0.05 +2024-09-19 07:09:09,395 INFO [train.py:1198] (0/2) Epoch 35, batch 50, loss[loss=0.2132, ctc_loss=0.1022, cr_loss=0.326, attn_decoder_loss=0.2183, over 29396.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1212, cr_loss=0.3657, attn_decoder_loss=0.2431, over 1267901.08 frames. ], batch size: 70, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:09:32,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=615640.0, ans=0.0 +2024-09-19 07:09:38,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=615680.0, ans=0.05 +2024-09-19 07:10:01,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=615720.0, ans=0.1 +2024-09-19 07:10:15,245 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.41 vs. limit=22.5 +2024-09-19 07:10:23,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=615800.0, ans=0.1 +2024-09-19 07:10:25,303 INFO [train.py:1198] (0/2) Epoch 35, batch 100, loss[loss=0.2283, ctc_loss=0.1126, cr_loss=0.3378, attn_decoder_loss=0.2337, over 29516.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1212, cr_loss=0.3653, attn_decoder_loss=0.2441, over 2251406.66 frames. ], batch size: 76, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:10:31,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=615800.0, ans=0.125 +2024-09-19 07:10:31,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=615800.0, ans=0.2 +2024-09-19 07:10:37,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=615800.0, ans=0.09899494936611666 +2024-09-19 07:10:40,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=615840.0, ans=0.5 +2024-09-19 07:10:40,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=615840.0, ans=0.125 +2024-09-19 07:10:43,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=615840.0, ans=0.125 +2024-09-19 07:10:52,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=615840.0, ans=15.0 +2024-09-19 07:11:11,190 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.556e+01 9.012e+01 9.778e+01 2.155e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 07:11:23,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=615920.0, ans=0.0 +2024-09-19 07:11:24,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.45 vs. limit=15.0 +2024-09-19 07:11:24,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=615920.0, ans=0.125 +2024-09-19 07:11:32,193 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=615960.0, ans=0.0 +2024-09-19 07:11:42,866 INFO [train.py:1198] (0/2) Epoch 35, batch 150, loss[loss=0.2025, ctc_loss=0.08999, cr_loss=0.2981, attn_decoder_loss=0.2084, over 29427.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.118, cr_loss=0.3591, attn_decoder_loss=0.2414, over 3046816.81 frames. ], batch size: 70, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:11:46,326 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:11:59,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=616040.0, ans=0.125 +2024-09-19 07:11:59,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=616040.0, ans=0.125 +2024-09-19 07:12:15,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=616080.0, ans=0.125 +2024-09-19 07:12:24,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=616080.0, ans=0.125 +2024-09-19 07:12:26,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=616080.0, ans=0.0 +2024-09-19 07:12:30,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=616120.0, ans=0.125 +2024-09-19 07:12:32,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=616120.0, ans=0.125 +2024-09-19 07:12:35,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=616120.0, ans=0.0 +2024-09-19 07:12:42,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=616120.0, ans=0.0 +2024-09-19 07:12:56,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=616160.0, ans=0.0 +2024-09-19 07:13:00,661 INFO [train.py:1198] (0/2) Epoch 35, batch 200, loss[loss=0.2537, ctc_loss=0.1307, cr_loss=0.3933, attn_decoder_loss=0.2586, over 27197.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.117, cr_loss=0.3579, attn_decoder_loss=0.2405, over 3657373.81 frames. ], batch size: 124, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:13:18,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-19 07:13:21,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.43 vs. limit=15.0 +2024-09-19 07:13:22,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=616240.0, ans=0.0 +2024-09-19 07:13:32,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=616280.0, ans=0.025 +2024-09-19 07:13:34,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=616280.0, ans=0.0 +2024-09-19 07:13:44,289 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.322e+01 8.803e+01 9.325e+01 1.291e+02, threshold=1.761e+02, percent-clipped=0.0 +2024-09-19 07:14:14,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=616400.0, ans=0.125 +2024-09-19 07:14:15,873 INFO [train.py:1198] (0/2) Epoch 35, batch 250, loss[loss=0.2429, ctc_loss=0.1166, cr_loss=0.3418, attn_decoder_loss=0.2493, over 29349.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1165, cr_loss=0.3573, attn_decoder_loss=0.2403, over 4139224.24 frames. ], batch size: 100, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:14:41,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=616440.0, ans=0.2 +2024-09-19 07:14:46,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=616480.0, ans=0.125 +2024-09-19 07:15:03,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.72 vs. limit=22.5 +2024-09-19 07:15:10,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.41 vs. limit=15.0 +2024-09-19 07:15:26,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.52 vs. limit=22.5 +2024-09-19 07:15:34,117 INFO [train.py:1198] (0/2) Epoch 35, batch 300, loss[loss=0.2517, ctc_loss=0.1249, cr_loss=0.3711, attn_decoder_loss=0.2576, over 29516.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1165, cr_loss=0.3573, attn_decoder_loss=0.2403, over 4506829.44 frames. ], batch size: 92, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:15:53,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=616640.0, ans=0.1 +2024-09-19 07:16:04,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=616640.0, ans=0.0 +2024-09-19 07:16:13,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=616680.0, ans=0.125 +2024-09-19 07:16:20,346 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.277e+01 8.384e+01 8.991e+01 9.743e+01 6.934e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 07:16:34,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=616720.0, ans=0.125 +2024-09-19 07:16:46,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=616760.0, ans=0.1 +2024-09-19 07:16:49,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=616760.0, ans=0.125 +2024-09-19 07:16:52,561 INFO [train.py:1198] (0/2) Epoch 35, batch 350, loss[loss=0.2117, ctc_loss=0.09936, cr_loss=0.3125, attn_decoder_loss=0.2172, over 29315.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.117, cr_loss=0.3585, attn_decoder_loss=0.2411, over 4792408.53 frames. ], batch size: 71, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:17:08,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.19 vs. limit=15.0 +2024-09-19 07:17:10,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=616840.0, ans=0.1 +2024-09-19 07:17:21,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=616880.0, ans=0.2 +2024-09-19 07:17:33,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=616880.0, ans=0.0 +2024-09-19 07:17:45,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=616920.0, ans=0.2 +2024-09-19 07:17:45,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=616920.0, ans=0.0 +2024-09-19 07:17:49,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=616920.0, ans=0.0 +2024-09-19 07:18:04,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.75 vs. limit=15.0 +2024-09-19 07:18:07,763 INFO [train.py:1198] (0/2) Epoch 35, batch 400, loss[loss=0.2272, ctc_loss=0.1069, cr_loss=0.3197, attn_decoder_loss=0.2334, over 29699.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1165, cr_loss=0.3574, attn_decoder_loss=0.2407, over 5023166.50 frames. ], batch size: 82, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:18:16,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=15.0 +2024-09-19 07:18:35,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=617040.0, ans=0.125 +2024-09-19 07:18:41,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=617080.0, ans=0.0 +2024-09-19 07:18:41,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=617080.0, ans=0.125 +2024-09-19 07:18:54,454 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 8.637e+01 9.137e+01 9.905e+01 1.373e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 07:19:11,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=617160.0, ans=0.125 +2024-09-19 07:19:26,444 INFO [train.py:1198] (0/2) Epoch 35, batch 450, loss[loss=0.2406, ctc_loss=0.1197, cr_loss=0.3618, attn_decoder_loss=0.246, over 29690.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3595, attn_decoder_loss=0.2412, over 5184796.81 frames. ], batch size: 83, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:19:29,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=617200.0, ans=0.2 +2024-09-19 07:19:42,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=617240.0, ans=0.125 +2024-09-19 07:19:53,801 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.09 vs. limit=12.0 +2024-09-19 07:20:05,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=617280.0, ans=0.125 +2024-09-19 07:20:28,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=617360.0, ans=0.025 +2024-09-19 07:20:44,374 INFO [train.py:1198] (0/2) Epoch 35, batch 500, loss[loss=0.257, ctc_loss=0.1344, cr_loss=0.3778, attn_decoder_loss=0.2623, over 29380.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1167, cr_loss=0.3577, attn_decoder_loss=0.2404, over 5328749.36 frames. ], batch size: 94, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:20:58,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-19 07:21:01,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=617440.0, ans=0.05 +2024-09-19 07:21:05,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=617440.0, ans=0.125 +2024-09-19 07:21:05,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=617440.0, ans=0.2 +2024-09-19 07:21:27,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.89 vs. limit=6.0 +2024-09-19 07:21:29,903 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.448e+01 8.461e+01 8.901e+01 9.576e+01 2.460e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-19 07:21:38,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.67 vs. limit=15.0 +2024-09-19 07:21:39,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.73 vs. limit=6.0 +2024-09-19 07:22:00,512 INFO [train.py:1198] (0/2) Epoch 35, batch 550, loss[loss=0.2571, ctc_loss=0.1261, cr_loss=0.3856, attn_decoder_loss=0.2631, over 28808.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1165, cr_loss=0.3575, attn_decoder_loss=0.2405, over 5421776.48 frames. ], batch size: 104, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:22:02,874 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.41 vs. limit=12.0 +2024-09-19 07:22:06,942 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:22:54,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=617720.0, ans=0.1 +2024-09-19 07:23:08,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=617760.0, ans=0.2 +2024-09-19 07:23:12,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.46 vs. limit=12.0 +2024-09-19 07:23:18,917 INFO [train.py:1198] (0/2) Epoch 35, batch 600, loss[loss=0.258, ctc_loss=0.1308, cr_loss=0.3866, attn_decoder_loss=0.2635, over 29267.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1169, cr_loss=0.3583, attn_decoder_loss=0.2409, over 5509462.41 frames. ], batch size: 100, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:23:23,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=617800.0, ans=0.0 +2024-09-19 07:23:26,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=617800.0, ans=0.09899494936611666 +2024-09-19 07:23:56,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.90 vs. limit=6.0 +2024-09-19 07:24:06,189 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.466e+01 8.823e+01 9.402e+01 3.791e+02, threshold=1.765e+02, percent-clipped=1.0 +2024-09-19 07:24:13,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=617920.0, ans=0.125 +2024-09-19 07:24:31,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=617960.0, ans=0.0 +2024-09-19 07:24:35,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=618000.0, ans=0.0 +2024-09-19 07:24:36,197 INFO [train.py:1198] (0/2) Epoch 35, batch 650, loss[loss=0.2336, ctc_loss=0.1068, cr_loss=0.35, attn_decoder_loss=0.24, over 29752.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1159, cr_loss=0.3562, attn_decoder_loss=0.24, over 5586305.07 frames. ], batch size: 81, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:24:51,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=618040.0, ans=0.125 +2024-09-19 07:24:54,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=618040.0, ans=0.125 +2024-09-19 07:25:19,762 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.79 vs. limit=15.0 +2024-09-19 07:25:35,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=618160.0, ans=0.125 +2024-09-19 07:25:52,026 INFO [train.py:1198] (0/2) Epoch 35, batch 700, loss[loss=0.2242, ctc_loss=0.1079, cr_loss=0.3439, attn_decoder_loss=0.2295, over 29539.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1162, cr_loss=0.3567, attn_decoder_loss=0.2404, over 5636100.80 frames. ], batch size: 76, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:26:24,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.01 vs. limit=22.5 +2024-09-19 07:26:28,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=618280.0, ans=0.0 +2024-09-19 07:26:30,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.57 vs. limit=22.5 +2024-09-19 07:26:37,261 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.406e+01 8.899e+01 9.421e+01 1.331e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 07:26:42,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=618320.0, ans=0.09899494936611666 +2024-09-19 07:26:43,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=618320.0, ans=0.125 +2024-09-19 07:27:10,382 INFO [train.py:1198] (0/2) Epoch 35, batch 750, loss[loss=0.2346, ctc_loss=0.1132, cr_loss=0.3481, attn_decoder_loss=0.2404, over 29680.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.116, cr_loss=0.3563, attn_decoder_loss=0.2401, over 5676190.05 frames. ], batch size: 82, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:27:55,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=618480.0, ans=0.125 +2024-09-19 07:28:00,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=618520.0, ans=0.2 +2024-09-19 07:28:04,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-19 07:28:28,642 INFO [train.py:1198] (0/2) Epoch 35, batch 800, loss[loss=0.21, ctc_loss=0.09416, cr_loss=0.2933, attn_decoder_loss=0.2163, over 29600.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1161, cr_loss=0.3563, attn_decoder_loss=0.2401, over 5706988.89 frames. ], batch size: 73, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:28:48,594 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:28:52,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=618640.0, ans=0.125 +2024-09-19 07:29:07,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=618680.0, ans=0.07 +2024-09-19 07:29:15,165 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.859e+01 8.586e+01 8.985e+01 9.600e+01 2.003e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 07:29:24,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-19 07:29:36,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=618760.0, ans=0.125 +2024-09-19 07:29:36,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=618760.0, ans=0.125 +2024-09-19 07:29:42,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=618800.0, ans=0.1 +2024-09-19 07:29:43,482 INFO [train.py:1198] (0/2) Epoch 35, batch 850, loss[loss=0.2461, ctc_loss=0.1245, cr_loss=0.3656, attn_decoder_loss=0.2515, over 29705.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1158, cr_loss=0.3566, attn_decoder_loss=0.24, over 5736245.31 frames. ], batch size: 89, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:29:48,317 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:29:55,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=618800.0, ans=0.0 +2024-09-19 07:30:17,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=618880.0, ans=0.025 +2024-09-19 07:30:17,742 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.37 vs. limit=15.0 +2024-09-19 07:30:22,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.17 vs. limit=15.0 +2024-09-19 07:30:36,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=618920.0, ans=0.125 +2024-09-19 07:30:38,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=618920.0, ans=0.0 +2024-09-19 07:30:40,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=15.0 +2024-09-19 07:30:53,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=618960.0, ans=0.125 +2024-09-19 07:31:00,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.86 vs. limit=15.0 +2024-09-19 07:31:01,541 INFO [train.py:1198] (0/2) Epoch 35, batch 900, loss[loss=0.2177, ctc_loss=0.1036, cr_loss=0.3257, attn_decoder_loss=0.2231, over 29660.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.116, cr_loss=0.3568, attn_decoder_loss=0.2402, over 5741359.32 frames. ], batch size: 73, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:31:01,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=619000.0, ans=0.125 +2024-09-19 07:31:13,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=619000.0, ans=0.125 +2024-09-19 07:31:37,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.15 vs. limit=10.0 +2024-09-19 07:31:49,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=619120.0, ans=0.0 +2024-09-19 07:31:50,717 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.514e+01 9.190e+01 1.005e+02 2.448e+02, threshold=1.838e+02, percent-clipped=2.0 +2024-09-19 07:31:54,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=619120.0, ans=0.1 +2024-09-19 07:31:56,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=619120.0, ans=0.015 +2024-09-19 07:32:01,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.59 vs. limit=15.0 +2024-09-19 07:32:04,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=619160.0, ans=0.1 +2024-09-19 07:32:19,735 INFO [train.py:1198] (0/2) Epoch 35, batch 950, loss[loss=0.2153, ctc_loss=0.09471, cr_loss=0.3189, attn_decoder_loss=0.2216, over 29507.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1159, cr_loss=0.3564, attn_decoder_loss=0.2404, over 5743112.27 frames. ], batch size: 74, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:33:16,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=619320.0, ans=6.0 +2024-09-19 07:33:35,506 INFO [train.py:1198] (0/2) Epoch 35, batch 1000, loss[loss=0.2237, ctc_loss=0.09942, cr_loss=0.3284, attn_decoder_loss=0.2303, over 29484.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1167, cr_loss=0.358, attn_decoder_loss=0.2413, over 5737116.11 frames. ], batch size: 77, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:33:40,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=619400.0, ans=0.0 +2024-09-19 07:33:43,400 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:33:58,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=619440.0, ans=0.125 +2024-09-19 07:34:04,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=619480.0, ans=0.0 +2024-09-19 07:34:06,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=619480.0, ans=0.125 +2024-09-19 07:34:22,839 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.400e+01 8.920e+01 9.804e+01 1.524e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 07:34:32,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff2.min_abs, batch_count=619520.0, ans=0.1 +2024-09-19 07:34:42,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=619560.0, ans=0.1 +2024-09-19 07:34:53,686 INFO [train.py:1198] (0/2) Epoch 35, batch 1050, loss[loss=0.2457, ctc_loss=0.1294, cr_loss=0.3821, attn_decoder_loss=0.2501, over 29686.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1167, cr_loss=0.358, attn_decoder_loss=0.2408, over 5745451.70 frames. ], batch size: 85, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:34:56,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.41 vs. limit=22.5 +2024-09-19 07:34:57,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=619600.0, ans=0.0 +2024-09-19 07:34:58,498 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:35:00,815 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.13 vs. limit=22.5 +2024-09-19 07:35:03,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.69 vs. limit=15.0 +2024-09-19 07:35:05,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.29 vs. limit=15.0 +2024-09-19 07:35:41,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=619720.0, ans=0.2 +2024-09-19 07:35:55,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=619760.0, ans=0.0 +2024-09-19 07:36:06,622 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.10 vs. limit=15.0 +2024-09-19 07:36:11,770 INFO [train.py:1198] (0/2) Epoch 35, batch 1100, loss[loss=0.2307, ctc_loss=0.1204, cr_loss=0.361, attn_decoder_loss=0.235, over 29429.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1167, cr_loss=0.358, attn_decoder_loss=0.2406, over 5756866.30 frames. ], batch size: 78, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:36:13,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=619800.0, ans=0.025 +2024-09-19 07:36:32,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.35 vs. limit=15.0 +2024-09-19 07:36:39,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=619840.0, ans=0.2 +2024-09-19 07:36:45,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=619880.0, ans=0.125 +2024-09-19 07:36:59,136 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.311e+01 8.442e+01 8.888e+01 9.490e+01 5.357e+02, threshold=1.778e+02, percent-clipped=1.0 +2024-09-19 07:36:59,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=619920.0, ans=0.125 +2024-09-19 07:36:59,602 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:37:06,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=619920.0, ans=0.125 +2024-09-19 07:37:14,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=619960.0, ans=0.125 +2024-09-19 07:37:22,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=619960.0, ans=0.0 +2024-09-19 07:37:28,427 INFO [train.py:1198] (0/2) Epoch 35, batch 1150, loss[loss=0.2226, ctc_loss=0.1049, cr_loss=0.3183, attn_decoder_loss=0.2285, over 29463.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1172, cr_loss=0.3588, attn_decoder_loss=0.2409, over 5756046.34 frames. ], batch size: 78, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:37:53,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=620040.0, ans=0.1 +2024-09-19 07:38:12,055 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.20 vs. limit=15.0 +2024-09-19 07:38:19,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=620120.0, ans=0.125 +2024-09-19 07:38:38,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=620160.0, ans=0.0 +2024-09-19 07:38:46,890 INFO [train.py:1198] (0/2) Epoch 35, batch 1200, loss[loss=0.2441, ctc_loss=0.122, cr_loss=0.3802, attn_decoder_loss=0.2492, over 29666.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1174, cr_loss=0.3592, attn_decoder_loss=0.2414, over 5747522.59 frames. ], batch size: 85, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:39:06,340 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.36 vs. limit=15.0 +2024-09-19 07:39:19,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=620280.0, ans=0.2 +2024-09-19 07:39:24,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=620280.0, ans=0.125 +2024-09-19 07:39:31,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=620280.0, ans=0.0 +2024-09-19 07:39:33,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.05 vs. limit=10.0 +2024-09-19 07:39:35,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=15.0 +2024-09-19 07:39:35,879 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.532e+01 9.165e+01 9.750e+01 1.443e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-19 07:39:36,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=620320.0, ans=0.2 +2024-09-19 07:39:42,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=620320.0, ans=0.125 +2024-09-19 07:39:44,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=12.0 +2024-09-19 07:39:55,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=620360.0, ans=0.125 +2024-09-19 07:39:57,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=620360.0, ans=0.025 +2024-09-19 07:39:58,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=620360.0, ans=0.2 +2024-09-19 07:40:04,452 INFO [train.py:1198] (0/2) Epoch 35, batch 1250, loss[loss=0.2538, ctc_loss=0.1296, cr_loss=0.3802, attn_decoder_loss=0.2592, over 29546.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1176, cr_loss=0.3596, attn_decoder_loss=0.2419, over 5775146.74 frames. ], batch size: 92, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:40:09,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=620400.0, ans=0.125 +2024-09-19 07:40:12,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=620400.0, ans=0.0 +2024-09-19 07:40:20,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=620440.0, ans=0.125 +2024-09-19 07:40:24,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=620440.0, ans=0.1 +2024-09-19 07:40:24,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=620440.0, ans=0.125 +2024-09-19 07:40:29,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=620440.0, ans=0.1 +2024-09-19 07:40:39,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=620480.0, ans=0.2 +2024-09-19 07:40:50,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=620520.0, ans=0.0 +2024-09-19 07:40:50,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=620520.0, ans=0.125 +2024-09-19 07:40:53,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=620520.0, ans=0.125 +2024-09-19 07:41:10,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=620560.0, ans=0.125 +2024-09-19 07:41:17,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=620560.0, ans=0.025 +2024-09-19 07:41:20,154 INFO [train.py:1198] (0/2) Epoch 35, batch 1300, loss[loss=0.2448, ctc_loss=0.1133, cr_loss=0.3629, attn_decoder_loss=0.2514, over 28132.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1167, cr_loss=0.3578, attn_decoder_loss=0.241, over 5777774.59 frames. ], batch size: 111, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:41:38,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=620640.0, ans=0.125 +2024-09-19 07:41:49,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=620680.0, ans=0.2 +2024-09-19 07:41:52,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=620680.0, ans=0.125 +2024-09-19 07:41:57,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=620680.0, ans=0.0 +2024-09-19 07:42:01,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=620680.0, ans=0.0 +2024-09-19 07:42:08,927 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.419e+01 8.887e+01 9.525e+01 1.443e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-19 07:42:36,595 INFO [train.py:1198] (0/2) Epoch 35, batch 1350, loss[loss=0.225, ctc_loss=0.1003, cr_loss=0.3383, attn_decoder_loss=0.2314, over 29768.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.116, cr_loss=0.3567, attn_decoder_loss=0.2407, over 5794844.72 frames. ], batch size: 81, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:42:37,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.01 vs. limit=6.0 +2024-09-19 07:42:41,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.34 vs. limit=12.0 +2024-09-19 07:42:52,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=620840.0, ans=0.125 +2024-09-19 07:42:55,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=620840.0, ans=0.0 +2024-09-19 07:43:05,916 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.52 vs. limit=12.0 +2024-09-19 07:43:17,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=620880.0, ans=0.125 +2024-09-19 07:43:20,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=620880.0, ans=0.0 +2024-09-19 07:43:21,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=620880.0, ans=0.0 +2024-09-19 07:43:21,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=620880.0, ans=0.125 +2024-09-19 07:43:56,461 INFO [train.py:1198] (0/2) Epoch 35, batch 1400, loss[loss=0.2146, ctc_loss=0.107, cr_loss=0.3578, attn_decoder_loss=0.2186, over 29594.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3567, attn_decoder_loss=0.2404, over 5806157.07 frames. ], batch size: 69, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:44:04,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=621000.0, ans=0.125 +2024-09-19 07:44:08,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=621000.0, ans=10.0 +2024-09-19 07:44:31,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=621080.0, ans=0.1 +2024-09-19 07:44:44,725 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.569e+01 8.443e+01 9.009e+01 9.628e+01 2.334e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 07:45:11,970 INFO [train.py:1198] (0/2) Epoch 35, batch 1450, loss[loss=0.2565, ctc_loss=0.1319, cr_loss=0.3868, attn_decoder_loss=0.2617, over 29446.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1163, cr_loss=0.3571, attn_decoder_loss=0.2408, over 5803154.90 frames. ], batch size: 94, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:45:12,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=621200.0, ans=0.2 +2024-09-19 07:45:39,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=621240.0, ans=0.0 +2024-09-19 07:45:48,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=621280.0, ans=0.1 +2024-09-19 07:45:51,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=621280.0, ans=0.2 +2024-09-19 07:45:56,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=621320.0, ans=0.125 +2024-09-19 07:45:58,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.68 vs. limit=6.0 +2024-09-19 07:46:00,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=621320.0, ans=0.04949747468305833 +2024-09-19 07:46:02,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=621320.0, ans=0.2 +2024-09-19 07:46:09,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=621320.0, ans=0.0 +2024-09-19 07:46:23,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=621360.0, ans=0.2 +2024-09-19 07:46:27,761 INFO [train.py:1198] (0/2) Epoch 35, batch 1500, loss[loss=0.2392, ctc_loss=0.1172, cr_loss=0.3614, attn_decoder_loss=0.2447, over 29610.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1162, cr_loss=0.3568, attn_decoder_loss=0.241, over 5805104.22 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:46:32,705 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:46:34,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=621400.0, ans=0.0 +2024-09-19 07:46:55,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=621440.0, ans=0.0 +2024-09-19 07:47:07,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=621480.0, ans=0.125 +2024-09-19 07:47:16,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=621520.0, ans=0.1 +2024-09-19 07:47:20,907 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.458e+01 9.148e+01 9.758e+01 1.676e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 07:47:24,852 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.79 vs. limit=15.0 +2024-09-19 07:47:39,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=621560.0, ans=0.125 +2024-09-19 07:47:48,565 INFO [train.py:1198] (0/2) Epoch 35, batch 1550, loss[loss=0.2565, ctc_loss=0.1302, cr_loss=0.3954, attn_decoder_loss=0.2618, over 29530.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1165, cr_loss=0.3577, attn_decoder_loss=0.2412, over 5780795.35 frames. ], batch size: 90, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:47:59,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.43 vs. limit=22.5 +2024-09-19 07:48:05,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=621640.0, ans=0.125 +2024-09-19 07:48:14,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=621640.0, ans=0.1 +2024-09-19 07:48:19,044 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:48:34,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=621720.0, ans=0.0 +2024-09-19 07:48:43,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=621720.0, ans=0.0 +2024-09-19 07:48:43,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=621720.0, ans=0.025 +2024-09-19 07:48:50,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=621760.0, ans=0.2 +2024-09-19 07:48:58,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=621760.0, ans=0.1 +2024-09-19 07:49:04,113 INFO [train.py:1198] (0/2) Epoch 35, batch 1600, loss[loss=0.2411, ctc_loss=0.1161, cr_loss=0.362, attn_decoder_loss=0.247, over 29681.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1163, cr_loss=0.357, attn_decoder_loss=0.2409, over 5763323.91 frames. ], batch size: 85, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:49:18,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.21 vs. limit=10.0 +2024-09-19 07:49:25,632 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:49:52,911 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.463e+01 9.203e+01 9.882e+01 2.471e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-19 07:50:00,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=621920.0, ans=0.0 +2024-09-19 07:50:11,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=621960.0, ans=0.125 +2024-09-19 07:50:11,811 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.87 vs. limit=15.0 +2024-09-19 07:50:20,046 INFO [train.py:1198] (0/2) Epoch 35, batch 1650, loss[loss=0.2456, ctc_loss=0.1209, cr_loss=0.3626, attn_decoder_loss=0.2514, over 29671.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1163, cr_loss=0.357, attn_decoder_loss=0.2408, over 5756845.63 frames. ], batch size: 89, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:50:30,001 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.18 vs. limit=15.0 +2024-09-19 07:50:52,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=622080.0, ans=0.125 +2024-09-19 07:51:00,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=622080.0, ans=0.0 +2024-09-19 07:51:21,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=622120.0, ans=0.125 +2024-09-19 07:51:38,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=622200.0, ans=0.1 +2024-09-19 07:51:39,322 INFO [train.py:1198] (0/2) Epoch 35, batch 1700, loss[loss=0.2086, ctc_loss=0.09704, cr_loss=0.3128, attn_decoder_loss=0.2141, over 29574.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1158, cr_loss=0.3567, attn_decoder_loss=0.2406, over 5780122.51 frames. ], batch size: 69, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:51:51,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=622200.0, ans=0.125 +2024-09-19 07:52:04,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=622240.0, ans=0.2 +2024-09-19 07:52:10,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=622280.0, ans=0.2 +2024-09-19 07:52:11,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=622280.0, ans=0.0 +2024-09-19 07:52:17,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=622280.0, ans=0.125 +2024-09-19 07:52:27,947 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.208e+01 8.456e+01 8.908e+01 9.428e+01 1.294e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 07:52:31,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=622320.0, ans=0.1 +2024-09-19 07:52:42,363 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.86 vs. limit=15.0 +2024-09-19 07:52:54,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=622400.0, ans=0.125 +2024-09-19 07:52:55,735 INFO [train.py:1198] (0/2) Epoch 35, batch 1750, loss[loss=0.2075, ctc_loss=0.09858, cr_loss=0.3154, attn_decoder_loss=0.2126, over 29362.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.116, cr_loss=0.3571, attn_decoder_loss=0.2406, over 5786938.76 frames. ], batch size: 67, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:53:10,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.65 vs. limit=22.5 +2024-09-19 07:53:11,737 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.11 vs. limit=15.0 +2024-09-19 07:53:15,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=622440.0, ans=0.025 +2024-09-19 07:53:23,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=622440.0, ans=0.0 +2024-09-19 07:53:25,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=622480.0, ans=0.125 +2024-09-19 07:53:30,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.12 vs. limit=22.5 +2024-09-19 07:53:36,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=622480.0, ans=0.0 +2024-09-19 07:54:03,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=15.0 +2024-09-19 07:54:04,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=622560.0, ans=0.125 +2024-09-19 07:54:04,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=622560.0, ans=0.125 +2024-09-19 07:54:11,191 INFO [train.py:1198] (0/2) Epoch 35, batch 1800, loss[loss=0.2484, ctc_loss=0.1231, cr_loss=0.3653, attn_decoder_loss=0.2542, over 29694.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1162, cr_loss=0.3573, attn_decoder_loss=0.2407, over 5789053.58 frames. ], batch size: 83, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:54:20,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=622600.0, ans=0.0 +2024-09-19 07:54:33,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=622640.0, ans=0.125 +2024-09-19 07:55:05,583 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.311e+01 8.892e+01 9.552e+01 1.638e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 07:55:14,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=622760.0, ans=0.0 +2024-09-19 07:55:16,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=622760.0, ans=0.125 +2024-09-19 07:55:26,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=622760.0, ans=0.0 +2024-09-19 07:55:30,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=622800.0, ans=0.125 +2024-09-19 07:55:31,265 INFO [train.py:1198] (0/2) Epoch 35, batch 1850, loss[loss=0.2389, ctc_loss=0.1204, cr_loss=0.3687, attn_decoder_loss=0.2439, over 29614.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1163, cr_loss=0.3578, attn_decoder_loss=0.2406, over 5795303.17 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:55:33,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=622800.0, ans=0.125 +2024-09-19 07:55:36,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=622800.0, ans=0.0 +2024-09-19 07:55:36,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.53 vs. limit=12.0 +2024-09-19 07:55:48,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=622840.0, ans=0.025 +2024-09-19 07:56:19,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=622920.0, ans=0.1 +2024-09-19 07:56:33,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=622960.0, ans=0.0 +2024-09-19 07:56:35,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=622960.0, ans=0.2 +2024-09-19 07:56:42,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=622960.0, ans=0.1 +2024-09-19 07:56:46,598 INFO [train.py:1198] (0/2) Epoch 35, batch 1900, loss[loss=0.2439, ctc_loss=0.1196, cr_loss=0.3757, attn_decoder_loss=0.2493, over 29740.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3574, attn_decoder_loss=0.2408, over 5802913.74 frames. ], batch size: 89, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:56:52,041 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.71 vs. limit=10.0 +2024-09-19 07:57:13,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.28 vs. limit=22.5 +2024-09-19 07:57:15,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=623080.0, ans=0.125 +2024-09-19 07:57:24,840 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:57:35,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=623120.0, ans=0.015 +2024-09-19 07:57:36,457 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.574e+01 9.165e+01 9.579e+01 2.044e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-19 07:57:42,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=623120.0, ans=0.125 +2024-09-19 07:57:43,337 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.37 vs. limit=10.0 +2024-09-19 07:58:01,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=623200.0, ans=0.125 +2024-09-19 07:58:02,546 INFO [train.py:1198] (0/2) Epoch 35, batch 1950, loss[loss=0.2344, ctc_loss=0.1112, cr_loss=0.3402, attn_decoder_loss=0.2406, over 29471.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1176, cr_loss=0.36, attn_decoder_loss=0.2423, over 5817677.05 frames. ], batch size: 78, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:58:21,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=623240.0, ans=0.0 +2024-09-19 07:58:30,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=623240.0, ans=0.125 +2024-09-19 07:58:34,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=623240.0, ans=0.1 +2024-09-19 07:58:47,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=623280.0, ans=0.025 +2024-09-19 07:58:52,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=623320.0, ans=0.0 +2024-09-19 07:58:53,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.29 vs. limit=22.5 +2024-09-19 07:58:55,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=623320.0, ans=0.2 +2024-09-19 07:59:18,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=623360.0, ans=0.125 +2024-09-19 07:59:22,127 INFO [train.py:1198] (0/2) Epoch 35, batch 2000, loss[loss=0.2085, ctc_loss=0.09548, cr_loss=0.3172, attn_decoder_loss=0.214, over 29370.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1177, cr_loss=0.3601, attn_decoder_loss=0.2425, over 5794212.54 frames. ], batch size: 67, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 08:00:10,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=623520.0, ans=0.125 +2024-09-19 08:00:13,533 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.517e+01 9.042e+01 9.652e+01 2.863e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 08:00:15,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=623520.0, ans=0.125 +2024-09-19 08:00:37,512 INFO [train.py:1198] (0/2) Epoch 35, batch 2050, loss[loss=0.2132, ctc_loss=0.09848, cr_loss=0.3221, attn_decoder_loss=0.2188, over 29433.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1177, cr_loss=0.3602, attn_decoder_loss=0.2419, over 5786264.82 frames. ], batch size: 70, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:00:48,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=623600.0, ans=0.0 +2024-09-19 08:01:32,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=623720.0, ans=0.2 +2024-09-19 08:01:35,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=623720.0, ans=0.025 +2024-09-19 08:01:36,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.71 vs. limit=15.0 +2024-09-19 08:01:53,588 INFO [train.py:1198] (0/2) Epoch 35, batch 2100, loss[loss=0.235, ctc_loss=0.1156, cr_loss=0.3579, attn_decoder_loss=0.2404, over 29798.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1172, cr_loss=0.359, attn_decoder_loss=0.2412, over 5799598.43 frames. ], batch size: 81, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:02:04,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=623800.0, ans=0.2 +2024-09-19 08:02:08,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=623840.0, ans=0.125 +2024-09-19 08:02:18,868 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:02:49,243 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.509e+01 9.008e+01 9.603e+01 1.299e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 08:02:55,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=623920.0, ans=0.125 +2024-09-19 08:03:06,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=623960.0, ans=0.125 +2024-09-19 08:03:12,216 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-156000.pt +2024-09-19 08:03:20,893 INFO [train.py:1198] (0/2) Epoch 35, batch 2150, loss[loss=0.2306, ctc_loss=0.1162, cr_loss=0.3582, attn_decoder_loss=0.2353, over 29441.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1168, cr_loss=0.3582, attn_decoder_loss=0.2407, over 5813687.80 frames. ], batch size: 78, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:03:21,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=624000.0, ans=0.1 +2024-09-19 08:03:24,876 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.30 vs. limit=12.0 +2024-09-19 08:03:47,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=624040.0, ans=0.0 +2024-09-19 08:04:00,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=624080.0, ans=0.2 +2024-09-19 08:04:03,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=624080.0, ans=0.0 +2024-09-19 08:04:35,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=624200.0, ans=0.0 +2024-09-19 08:04:36,522 INFO [train.py:1198] (0/2) Epoch 35, batch 2200, loss[loss=0.2334, ctc_loss=0.1097, cr_loss=0.3362, attn_decoder_loss=0.2397, over 29648.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1172, cr_loss=0.359, attn_decoder_loss=0.241, over 5810436.23 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:04:56,252 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:04:58,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=15.0 +2024-09-19 08:05:26,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=624320.0, ans=15.0 +2024-09-19 08:05:27,602 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.240e+01 8.597e+01 9.109e+01 9.743e+01 2.251e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 08:05:32,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=624320.0, ans=0.05 +2024-09-19 08:05:35,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=624360.0, ans=0.0 +2024-09-19 08:05:41,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=624360.0, ans=0.125 +2024-09-19 08:05:41,909 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.84 vs. limit=15.0 +2024-09-19 08:05:47,554 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:05:49,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=624360.0, ans=0.125 +2024-09-19 08:05:51,064 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.33 vs. limit=15.0 +2024-09-19 08:05:51,831 INFO [train.py:1198] (0/2) Epoch 35, batch 2250, loss[loss=0.2388, ctc_loss=0.1179, cr_loss=0.358, attn_decoder_loss=0.2443, over 29717.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1169, cr_loss=0.3586, attn_decoder_loss=0.2409, over 5810622.40 frames. ], batch size: 82, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:07:09,255 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.29 vs. limit=22.5 +2024-09-19 08:07:10,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.68 vs. limit=15.0 +2024-09-19 08:07:11,523 INFO [train.py:1198] (0/2) Epoch 35, batch 2300, loss[loss=0.2149, ctc_loss=0.1006, cr_loss=0.3184, attn_decoder_loss=0.2205, over 29336.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1163, cr_loss=0.357, attn_decoder_loss=0.2402, over 5797762.68 frames. ], batch size: 71, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:07:15,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.32 vs. limit=15.0 +2024-09-19 08:07:35,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=624640.0, ans=0.125 +2024-09-19 08:07:38,076 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.33 vs. limit=15.0 +2024-09-19 08:07:44,146 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.39 vs. limit=15.0 +2024-09-19 08:07:48,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=624680.0, ans=0.125 +2024-09-19 08:07:54,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=624680.0, ans=0.2 +2024-09-19 08:08:02,741 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.578e+01 9.084e+01 9.791e+01 1.309e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 08:08:08,198 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.06 vs. limit=22.5 +2024-09-19 08:08:24,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=624760.0, ans=0.07 +2024-09-19 08:08:27,452 INFO [train.py:1198] (0/2) Epoch 35, batch 2350, loss[loss=0.2448, ctc_loss=0.1271, cr_loss=0.3742, attn_decoder_loss=0.2496, over 29684.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1162, cr_loss=0.3569, attn_decoder_loss=0.2402, over 5803954.82 frames. ], batch size: 83, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:08:30,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=624800.0, ans=0.125 +2024-09-19 08:08:35,233 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:09:42,850 INFO [train.py:1198] (0/2) Epoch 35, batch 2400, loss[loss=0.224, ctc_loss=0.1071, cr_loss=0.3412, attn_decoder_loss=0.2295, over 29544.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1165, cr_loss=0.3576, attn_decoder_loss=0.2407, over 5808400.93 frames. ], batch size: 76, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:09:47,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=625000.0, ans=0.025 +2024-09-19 08:10:11,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_na.min_abs, batch_count=625040.0, ans=0.02 +2024-09-19 08:10:12,705 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:10:14,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=625080.0, ans=0.5 +2024-09-19 08:10:24,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=625080.0, ans=0.125 +2024-09-19 08:10:38,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.636e+01 9.212e+01 9.895e+01 1.857e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 08:11:02,884 INFO [train.py:1198] (0/2) Epoch 35, batch 2450, loss[loss=0.2385, ctc_loss=0.1173, cr_loss=0.3574, attn_decoder_loss=0.2441, over 29745.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1173, cr_loss=0.359, attn_decoder_loss=0.2416, over 5783057.41 frames. ], batch size: 82, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:11:03,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=625200.0, ans=0.025 +2024-09-19 08:11:15,644 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.29 vs. limit=15.0 +2024-09-19 08:11:21,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=625240.0, ans=0.0 +2024-09-19 08:11:39,628 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:11:48,770 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:12:01,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=625320.0, ans=0.1 +2024-09-19 08:12:03,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.53 vs. limit=15.0 +2024-09-19 08:12:11,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=625360.0, ans=0.95 +2024-09-19 08:12:18,949 INFO [train.py:1198] (0/2) Epoch 35, batch 2500, loss[loss=0.244, ctc_loss=0.1196, cr_loss=0.3594, attn_decoder_loss=0.2498, over 29644.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1171, cr_loss=0.3589, attn_decoder_loss=0.2415, over 5794375.72 frames. ], batch size: 86, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:12:29,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=625400.0, ans=0.125 +2024-09-19 08:12:45,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.69 vs. limit=22.5 +2024-09-19 08:13:10,578 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.524e+01 8.980e+01 9.425e+01 1.614e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 08:13:12,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=625520.0, ans=0.1 +2024-09-19 08:13:18,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=625560.0, ans=0.1 +2024-09-19 08:13:20,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.20 vs. limit=6.0 +2024-09-19 08:13:23,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=625560.0, ans=0.125 +2024-09-19 08:13:35,396 INFO [train.py:1198] (0/2) Epoch 35, batch 2550, loss[loss=0.2036, ctc_loss=0.08984, cr_loss=0.2944, attn_decoder_loss=0.2097, over 29304.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3589, attn_decoder_loss=0.2412, over 5797334.09 frames. ], batch size: 67, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:13:57,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=625640.0, ans=0.0 +2024-09-19 08:13:59,454 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.20 vs. limit=15.0 +2024-09-19 08:14:12,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=625680.0, ans=0.125 +2024-09-19 08:14:13,145 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.12 vs. limit=15.0 +2024-09-19 08:14:21,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=625720.0, ans=0.125 +2024-09-19 08:14:55,516 INFO [train.py:1198] (0/2) Epoch 35, batch 2600, loss[loss=0.222, ctc_loss=0.1091, cr_loss=0.3437, attn_decoder_loss=0.2269, over 29456.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1172, cr_loss=0.3587, attn_decoder_loss=0.2414, over 5794593.41 frames. ], batch size: 78, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:14:55,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=625800.0, ans=0.125 +2024-09-19 08:14:57,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=625800.0, ans=0.125 +2024-09-19 08:15:24,596 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-19 08:15:33,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=625880.0, ans=0.0 +2024-09-19 08:15:46,689 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.822e+01 8.594e+01 9.058e+01 9.611e+01 1.555e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 08:15:49,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=625920.0, ans=0.125 +2024-09-19 08:15:51,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=625920.0, ans=0.07 +2024-09-19 08:15:52,192 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.07 vs. limit=15.0 +2024-09-19 08:16:10,548 INFO [train.py:1198] (0/2) Epoch 35, batch 2650, loss[loss=0.2541, ctc_loss=0.1397, cr_loss=0.4159, attn_decoder_loss=0.2576, over 29229.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1174, cr_loss=0.3591, attn_decoder_loss=0.2415, over 5801171.74 frames. ], batch size: 100, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:16:19,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=626000.0, ans=0.0 +2024-09-19 08:16:35,144 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:17:25,667 INFO [train.py:1198] (0/2) Epoch 35, batch 2700, loss[loss=0.2404, ctc_loss=0.1122, cr_loss=0.3426, attn_decoder_loss=0.247, over 29513.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1176, cr_loss=0.3594, attn_decoder_loss=0.2417, over 5796425.67 frames. ], batch size: 87, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:17:49,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=626240.0, ans=0.125 +2024-09-19 08:18:05,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=626280.0, ans=0.1 +2024-09-19 08:18:20,758 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 8.428e+01 9.037e+01 9.618e+01 3.244e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-19 08:18:22,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=626320.0, ans=0.125 +2024-09-19 08:18:22,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=626320.0, ans=0.025 +2024-09-19 08:18:35,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=626360.0, ans=0.125 +2024-09-19 08:18:37,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=626360.0, ans=0.0 +2024-09-19 08:18:46,429 INFO [train.py:1198] (0/2) Epoch 35, batch 2750, loss[loss=0.2283, ctc_loss=0.1173, cr_loss=0.3447, attn_decoder_loss=0.233, over 29505.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1169, cr_loss=0.3581, attn_decoder_loss=0.2408, over 5795058.38 frames. ], batch size: 75, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:18:55,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=626400.0, ans=0.025 +2024-09-19 08:18:58,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=626400.0, ans=0.1 +2024-09-19 08:19:01,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=626440.0, ans=0.125 +2024-09-19 08:19:07,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=626440.0, ans=0.125 +2024-09-19 08:19:09,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=626440.0, ans=0.2 +2024-09-19 08:19:25,935 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:19:25,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=626480.0, ans=0.05 +2024-09-19 08:19:27,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.04 vs. limit=22.5 +2024-09-19 08:19:29,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.13 vs. limit=12.0 +2024-09-19 08:19:38,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=626520.0, ans=0.125 +2024-09-19 08:19:50,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=626560.0, ans=0.0 +2024-09-19 08:20:02,182 INFO [train.py:1198] (0/2) Epoch 35, batch 2800, loss[loss=0.2545, ctc_loss=0.1338, cr_loss=0.3813, attn_decoder_loss=0.2594, over 20666.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1171, cr_loss=0.3585, attn_decoder_loss=0.2409, over 5775572.52 frames. ], batch size: 209, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:20:35,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=626680.0, ans=10.0 +2024-09-19 08:20:53,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=626720.0, ans=0.125 +2024-09-19 08:20:54,998 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.599e+01 9.222e+01 9.663e+01 2.009e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-19 08:21:17,441 INFO [train.py:1198] (0/2) Epoch 35, batch 2850, loss[loss=0.2351, ctc_loss=0.1147, cr_loss=0.3431, attn_decoder_loss=0.2408, over 29489.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.3598, attn_decoder_loss=0.2413, over 5762325.02 frames. ], batch size: 77, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:21:29,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=626800.0, ans=0.0 +2024-09-19 08:21:42,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=626840.0, ans=0.125 +2024-09-19 08:21:56,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=626880.0, ans=0.0 +2024-09-19 08:22:14,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=626920.0, ans=0.04949747468305833 +2024-09-19 08:22:22,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.27 vs. limit=6.0 +2024-09-19 08:22:36,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=627000.0, ans=0.09899494936611666 +2024-09-19 08:22:37,746 INFO [train.py:1198] (0/2) Epoch 35, batch 2900, loss[loss=0.2328, ctc_loss=0.1162, cr_loss=0.3519, attn_decoder_loss=0.2379, over 29428.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1181, cr_loss=0.3613, attn_decoder_loss=0.2423, over 5787767.56 frames. ], batch size: 79, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:22:45,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=627000.0, ans=0.125 +2024-09-19 08:22:51,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=627040.0, ans=0.125 +2024-09-19 08:23:02,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=627040.0, ans=0.125 +2024-09-19 08:23:17,242 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:23:32,020 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.291e+01 8.643e+01 9.038e+01 9.732e+01 2.249e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-19 08:23:38,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=627160.0, ans=0.125 +2024-09-19 08:23:53,518 INFO [train.py:1198] (0/2) Epoch 35, batch 2950, loss[loss=0.2283, ctc_loss=0.1124, cr_loss=0.3436, attn_decoder_loss=0.2336, over 29517.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3598, attn_decoder_loss=0.2412, over 5780913.25 frames. ], batch size: 75, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:23:54,625 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.28 vs. limit=15.0 +2024-09-19 08:24:10,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=627240.0, ans=0.1 +2024-09-19 08:24:52,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=627360.0, ans=0.0 +2024-09-19 08:25:09,411 INFO [train.py:1198] (0/2) Epoch 35, batch 3000, loss[loss=0.2346, ctc_loss=0.1145, cr_loss=0.3644, attn_decoder_loss=0.2399, over 29758.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1167, cr_loss=0.3579, attn_decoder_loss=0.2409, over 5781402.93 frames. ], batch size: 81, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:25:09,412 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 08:25:24,889 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([4.1830, 3.8003, 2.9862, 4.0655, 3.3760, 2.8495, 3.0859, 3.3812], + device='cuda:0') +2024-09-19 08:25:28,764 INFO [train.py:1230] (0/2) Epoch 35, validation: loss=0.2119, ctc_loss=0.03685, cr_loss=6.108e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 08:25:28,765 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 08:25:47,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=627440.0, ans=0.0 +2024-09-19 08:25:56,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=627440.0, ans=0.125 +2024-09-19 08:26:15,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=627520.0, ans=0.2 +2024-09-19 08:26:25,878 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.691e+01 9.210e+01 9.887e+01 4.457e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 08:26:32,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=627560.0, ans=0.125 +2024-09-19 08:26:36,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=627560.0, ans=0.1 +2024-09-19 08:26:38,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=627560.0, ans=0.125 +2024-09-19 08:26:47,037 INFO [train.py:1198] (0/2) Epoch 35, batch 3050, loss[loss=0.2277, ctc_loss=0.1138, cr_loss=0.3605, attn_decoder_loss=0.2324, over 29522.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1174, cr_loss=0.3593, attn_decoder_loss=0.2419, over 5775506.26 frames. ], batch size: 76, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:26:53,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=627600.0, ans=0.125 +2024-09-19 08:27:22,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=627680.0, ans=0.125 +2024-09-19 08:27:29,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=627680.0, ans=0.5 +2024-09-19 08:27:35,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=627720.0, ans=0.1 +2024-09-19 08:27:41,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=627720.0, ans=0.125 +2024-09-19 08:27:41,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=627720.0, ans=0.1 +2024-09-19 08:27:59,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=627760.0, ans=0.125 +2024-09-19 08:28:02,350 INFO [train.py:1198] (0/2) Epoch 35, batch 3100, loss[loss=0.258, ctc_loss=0.1346, cr_loss=0.4101, attn_decoder_loss=0.2626, over 29245.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1173, cr_loss=0.3595, attn_decoder_loss=0.2417, over 5775866.49 frames. ], batch size: 100, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:28:11,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=627800.0, ans=0.2 +2024-09-19 08:28:36,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-19 08:28:42,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=627880.0, ans=0.1 +2024-09-19 08:28:45,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.27 vs. limit=15.0 +2024-09-19 08:28:55,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.58 vs. limit=10.0 +2024-09-19 08:28:57,337 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.618e+01 9.080e+01 9.751e+01 2.675e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 08:29:01,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-19 08:29:11,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=627960.0, ans=0.0 +2024-09-19 08:29:18,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=627960.0, ans=0.07 +2024-09-19 08:29:21,432 INFO [train.py:1198] (0/2) Epoch 35, batch 3150, loss[loss=0.2429, ctc_loss=0.1196, cr_loss=0.3588, attn_decoder_loss=0.2486, over 28778.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1168, cr_loss=0.3584, attn_decoder_loss=0.2413, over 5781830.64 frames. ], batch size: 104, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:29:53,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=628080.0, ans=0.0 +2024-09-19 08:29:58,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=628080.0, ans=0.125 +2024-09-19 08:30:09,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=628120.0, ans=0.125 +2024-09-19 08:30:27,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=628160.0, ans=0.125 +2024-09-19 08:30:29,394 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.02 vs. limit=15.0 +2024-09-19 08:30:38,989 INFO [train.py:1198] (0/2) Epoch 35, batch 3200, loss[loss=0.2266, ctc_loss=0.1089, cr_loss=0.3365, attn_decoder_loss=0.2322, over 29437.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3576, attn_decoder_loss=0.2408, over 5793377.52 frames. ], batch size: 79, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:30:42,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=628200.0, ans=0.025 +2024-09-19 08:31:06,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=628240.0, ans=0.09899494936611666 +2024-09-19 08:31:19,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=628280.0, ans=0.125 +2024-09-19 08:31:27,870 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:31:34,894 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.608e+01 9.276e+01 9.756e+01 1.910e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 08:31:49,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.03 vs. limit=10.0 +2024-09-19 08:31:54,881 INFO [train.py:1198] (0/2) Epoch 35, batch 3250, loss[loss=0.2467, ctc_loss=0.1214, cr_loss=0.3614, attn_decoder_loss=0.2526, over 29714.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1166, cr_loss=0.3581, attn_decoder_loss=0.2415, over 5800331.16 frames. ], batch size: 84, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:31:57,331 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.44 vs. limit=10.0 +2024-09-19 08:32:22,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=628440.0, ans=0.125 +2024-09-19 08:32:39,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=628520.0, ans=0.2 +2024-09-19 08:32:43,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=628520.0, ans=0.125 +2024-09-19 08:32:56,236 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.80 vs. limit=22.5 +2024-09-19 08:32:57,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=628560.0, ans=0.0 +2024-09-19 08:33:12,772 INFO [train.py:1198] (0/2) Epoch 35, batch 3300, loss[loss=0.2447, ctc_loss=0.1169, cr_loss=0.336, attn_decoder_loss=0.2514, over 28277.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1157, cr_loss=0.3561, attn_decoder_loss=0.2401, over 5797599.22 frames. ], batch size: 111, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:33:14,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=628600.0, ans=0.0 +2024-09-19 08:34:10,348 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.666e+01 8.715e+01 9.290e+01 9.754e+01 2.928e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-19 08:34:30,126 INFO [train.py:1198] (0/2) Epoch 35, batch 3350, loss[loss=0.2613, ctc_loss=0.1352, cr_loss=0.4042, attn_decoder_loss=0.2663, over 28898.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1165, cr_loss=0.3573, attn_decoder_loss=0.241, over 5773025.53 frames. ], batch size: 104, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:34:30,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=628800.0, ans=0.1 +2024-09-19 08:34:53,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=628840.0, ans=0.09899494936611666 +2024-09-19 08:34:53,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=628840.0, ans=0.2 +2024-09-19 08:34:54,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=628840.0, ans=0.125 +2024-09-19 08:35:03,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=628880.0, ans=0.125 +2024-09-19 08:35:04,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=628880.0, ans=0.0 +2024-09-19 08:35:46,083 INFO [train.py:1198] (0/2) Epoch 35, batch 3400, loss[loss=0.2072, ctc_loss=0.0978, cr_loss=0.3213, attn_decoder_loss=0.2122, over 29338.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1165, cr_loss=0.3574, attn_decoder_loss=0.2409, over 5765291.73 frames. ], batch size: 67, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:35:56,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=629000.0, ans=0.125 +2024-09-19 08:36:04,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=629040.0, ans=0.0 +2024-09-19 08:36:08,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=629040.0, ans=0.125 +2024-09-19 08:36:12,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=629040.0, ans=0.2 +2024-09-19 08:36:12,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=629040.0, ans=0.125 +2024-09-19 08:36:18,661 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-19 08:36:25,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=629080.0, ans=0.125 +2024-09-19 08:36:31,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=629120.0, ans=0.125 +2024-09-19 08:36:36,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=629120.0, ans=0.0 +2024-09-19 08:36:44,223 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.920e+01 8.517e+01 9.055e+01 9.651e+01 2.142e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 08:36:58,822 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.98 vs. limit=15.0 +2024-09-19 08:37:03,842 INFO [train.py:1198] (0/2) Epoch 35, batch 3450, loss[loss=0.265, ctc_loss=0.14, cr_loss=0.4148, attn_decoder_loss=0.2697, over 28236.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1167, cr_loss=0.3583, attn_decoder_loss=0.2413, over 5774214.52 frames. ], batch size: 111, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:37:12,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.34 vs. limit=15.0 +2024-09-19 08:37:26,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=629240.0, ans=0.125 +2024-09-19 08:37:26,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=629240.0, ans=0.125 +2024-09-19 08:37:32,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=629280.0, ans=0.125 +2024-09-19 08:38:21,920 INFO [train.py:1198] (0/2) Epoch 35, batch 3500, loss[loss=0.2146, ctc_loss=0.09909, cr_loss=0.325, attn_decoder_loss=0.2202, over 29341.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1166, cr_loss=0.3581, attn_decoder_loss=0.241, over 5775770.02 frames. ], batch size: 71, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:38:52,327 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:38:55,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=629480.0, ans=0.0 +2024-09-19 08:39:17,036 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.529e+01 8.957e+01 9.484e+01 1.276e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 08:39:18,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=629520.0, ans=0.04949747468305833 +2024-09-19 08:39:20,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=629560.0, ans=0.05 +2024-09-19 08:39:29,827 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.48 vs. limit=15.0 +2024-09-19 08:39:36,787 INFO [train.py:1198] (0/2) Epoch 35, batch 3550, loss[loss=0.234, ctc_loss=0.1117, cr_loss=0.3435, attn_decoder_loss=0.24, over 29676.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3574, attn_decoder_loss=0.2408, over 5780790.83 frames. ], batch size: 89, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:39:38,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=629600.0, ans=0.125 +2024-09-19 08:40:11,546 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.40 vs. limit=15.0 +2024-09-19 08:40:16,243 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=6.19 vs. limit=12.0 +2024-09-19 08:40:37,791 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:40:50,978 INFO [train.py:1198] (0/2) Epoch 35, batch 3600, loss[loss=0.22, ctc_loss=0.09878, cr_loss=0.3261, attn_decoder_loss=0.2262, over 29522.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3574, attn_decoder_loss=0.2408, over 5790720.86 frames. ], batch size: 77, lr: 3.14e-03, grad_scale: 16.0 +2024-09-19 08:41:02,008 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.39 vs. limit=10.0 +2024-09-19 08:41:05,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.37 vs. limit=10.0 +2024-09-19 08:41:08,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.26 vs. limit=15.0 +2024-09-19 08:41:12,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=629840.0, ans=0.07 +2024-09-19 08:41:29,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=629880.0, ans=0.125 +2024-09-19 08:41:47,405 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.545e+01 9.030e+01 9.736e+01 4.485e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-19 08:41:55,500 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:42:01,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=629960.0, ans=0.1 +2024-09-19 08:42:07,127 INFO [train.py:1198] (0/2) Epoch 35, batch 3650, loss[loss=0.2596, ctc_loss=0.1392, cr_loss=0.4067, attn_decoder_loss=0.264, over 29513.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1161, cr_loss=0.3572, attn_decoder_loss=0.2403, over 5793322.79 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:42:08,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-19 08:42:23,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=630040.0, ans=0.2 +2024-09-19 08:42:37,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=630080.0, ans=0.0 +2024-09-19 08:43:21,873 INFO [train.py:1198] (0/2) Epoch 35, batch 3700, loss[loss=0.2468, ctc_loss=0.1239, cr_loss=0.37, attn_decoder_loss=0.2523, over 29699.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1166, cr_loss=0.3584, attn_decoder_loss=0.2405, over 5803013.18 frames. ], batch size: 84, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:43:42,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.24 vs. limit=10.0 +2024-09-19 08:44:05,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=630280.0, ans=0.1 +2024-09-19 08:44:08,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=630320.0, ans=0.0 +2024-09-19 08:44:19,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 8.511e+01 9.010e+01 9.557e+01 1.443e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 08:44:31,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=630360.0, ans=0.015 +2024-09-19 08:44:38,051 INFO [train.py:1198] (0/2) Epoch 35, batch 3750, loss[loss=0.211, ctc_loss=0.09384, cr_loss=0.3064, attn_decoder_loss=0.2173, over 29349.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1166, cr_loss=0.3587, attn_decoder_loss=0.2404, over 5806660.40 frames. ], batch size: 67, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:44:44,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=630400.0, ans=0.0 +2024-09-19 08:44:55,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.20 vs. limit=6.0 +2024-09-19 08:44:57,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=630440.0, ans=0.05 +2024-09-19 08:45:02,393 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:45:05,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=630440.0, ans=0.025 +2024-09-19 08:45:43,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=630560.0, ans=0.0 +2024-09-19 08:45:52,361 INFO [train.py:1198] (0/2) Epoch 35, batch 3800, loss[loss=0.2393, ctc_loss=0.1133, cr_loss=0.336, attn_decoder_loss=0.2458, over 29603.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1163, cr_loss=0.3574, attn_decoder_loss=0.2403, over 5797049.73 frames. ], batch size: 86, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:46:16,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=630640.0, ans=0.0 +2024-09-19 08:46:29,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=630680.0, ans=0.2 +2024-09-19 08:46:34,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=630680.0, ans=0.125 +2024-09-19 08:46:40,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.76 vs. limit=15.0 +2024-09-19 08:46:45,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=630720.0, ans=0.125 +2024-09-19 08:46:46,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=630720.0, ans=0.125 +2024-09-19 08:46:48,666 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.417e+01 9.020e+01 9.508e+01 1.354e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-19 08:46:59,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=630760.0, ans=0.09899494936611666 +2024-09-19 08:46:59,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.03 vs. limit=15.0 +2024-09-19 08:47:00,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-19 08:47:02,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=630760.0, ans=0.2 +2024-09-19 08:47:06,583 INFO [train.py:1198] (0/2) Epoch 35, batch 3850, loss[loss=0.2381, ctc_loss=0.1115, cr_loss=0.3528, attn_decoder_loss=0.2443, over 29237.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1157, cr_loss=0.356, attn_decoder_loss=0.24, over 5811323.05 frames. ], batch size: 100, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:47:12,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=630800.0, ans=0.125 +2024-09-19 08:47:38,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=630880.0, ans=0.125 +2024-09-19 08:47:54,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=630920.0, ans=0.125 +2024-09-19 08:48:08,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.62 vs. limit=22.5 +2024-09-19 08:48:12,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=630960.0, ans=0.125 +2024-09-19 08:48:13,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=630960.0, ans=0.0 +2024-09-19 08:48:15,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=630960.0, ans=0.0 +2024-09-19 08:48:22,482 INFO [train.py:1198] (0/2) Epoch 35, batch 3900, loss[loss=0.2473, ctc_loss=0.1262, cr_loss=0.3885, attn_decoder_loss=0.2521, over 29629.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1163, cr_loss=0.3574, attn_decoder_loss=0.2406, over 5815707.28 frames. ], batch size: 86, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:48:27,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=631000.0, ans=0.1 +2024-09-19 08:48:37,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=631040.0, ans=0.125 +2024-09-19 08:48:40,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=631040.0, ans=22.5 +2024-09-19 08:49:00,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=631080.0, ans=0.125 +2024-09-19 08:49:18,880 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.482e+01 8.961e+01 9.353e+01 1.224e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 08:49:32,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=631160.0, ans=0.125 +2024-09-19 08:49:38,531 INFO [train.py:1198] (0/2) Epoch 35, batch 3950, loss[loss=0.2513, ctc_loss=0.1315, cr_loss=0.397, attn_decoder_loss=0.2558, over 29506.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1164, cr_loss=0.3578, attn_decoder_loss=0.2411, over 5835166.37 frames. ], batch size: 97, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:49:40,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=631200.0, ans=10.0 +2024-09-19 08:50:03,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=631240.0, ans=0.1 +2024-09-19 08:50:24,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=631320.0, ans=0.125 +2024-09-19 08:50:32,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=631320.0, ans=0.125 +2024-09-19 08:50:43,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=631360.0, ans=0.0 +2024-09-19 08:50:48,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=631360.0, ans=0.0 +2024-09-19 08:50:52,166 INFO [train.py:1198] (0/2) Epoch 35, batch 4000, loss[loss=0.2327, ctc_loss=0.1138, cr_loss=0.3604, attn_decoder_loss=0.2379, over 29501.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1164, cr_loss=0.3572, attn_decoder_loss=0.241, over 5812117.12 frames. ], batch size: 74, lr: 3.14e-03, grad_scale: 16.0 +2024-09-19 08:51:14,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=631440.0, ans=0.0 +2024-09-19 08:51:28,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=631480.0, ans=0.2 +2024-09-19 08:51:29,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=631480.0, ans=0.0 +2024-09-19 08:51:42,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.28 vs. limit=10.0 +2024-09-19 08:51:49,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=631520.0, ans=0.125 +2024-09-19 08:51:50,542 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.935e+01 8.609e+01 9.049e+01 9.611e+01 2.994e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 08:52:01,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=631560.0, ans=0.125 +2024-09-19 08:52:06,771 INFO [train.py:1198] (0/2) Epoch 35, batch 4050, loss[loss=0.2532, ctc_loss=0.1488, cr_loss=0.3919, attn_decoder_loss=0.2561, over 20529.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1166, cr_loss=0.3577, attn_decoder_loss=0.241, over 5796937.43 frames. ], batch size: 212, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:52:08,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=631600.0, ans=0.5 +2024-09-19 08:53:02,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=631720.0, ans=0.035 +2024-09-19 08:53:21,184 INFO [train.py:1198] (0/2) Epoch 35, batch 4100, loss[loss=0.259, ctc_loss=0.1414, cr_loss=0.4167, attn_decoder_loss=0.2628, over 29531.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1165, cr_loss=0.3573, attn_decoder_loss=0.2411, over 5792594.84 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:53:21,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=631800.0, ans=0.125 +2024-09-19 08:53:28,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=631800.0, ans=0.0 +2024-09-19 08:53:31,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=631800.0, ans=0.0 +2024-09-19 08:53:49,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=631880.0, ans=0.125 +2024-09-19 08:53:50,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=631880.0, ans=0.125 +2024-09-19 08:54:12,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=631920.0, ans=0.125 +2024-09-19 08:54:19,449 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.260e+01 8.556e+01 9.136e+01 9.776e+01 2.394e+02, threshold=1.827e+02, percent-clipped=3.0 +2024-09-19 08:54:29,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.89 vs. limit=15.0 +2024-09-19 08:54:36,201 INFO [train.py:1198] (0/2) Epoch 35, batch 4150, loss[loss=0.2331, ctc_loss=0.1183, cr_loss=0.3675, attn_decoder_loss=0.2376, over 29474.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1163, cr_loss=0.3568, attn_decoder_loss=0.2405, over 5797616.97 frames. ], batch size: 77, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:54:45,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=632000.0, ans=0.125 +2024-09-19 08:54:48,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=632000.0, ans=0.125 +2024-09-19 08:54:48,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=632000.0, ans=0.125 +2024-09-19 08:54:51,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=632040.0, ans=0.0 +2024-09-19 08:54:55,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=632040.0, ans=0.0 +2024-09-19 08:55:04,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=632080.0, ans=0.2 +2024-09-19 08:55:04,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=632080.0, ans=0.0 +2024-09-19 08:55:18,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=632120.0, ans=0.025 +2024-09-19 08:55:22,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-19 08:55:41,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=632160.0, ans=0.0 +2024-09-19 08:55:49,866 INFO [train.py:1198] (0/2) Epoch 35, batch 4200, loss[loss=0.2433, ctc_loss=0.1284, cr_loss=0.3975, attn_decoder_loss=0.2472, over 29510.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1165, cr_loss=0.3575, attn_decoder_loss=0.241, over 5798981.12 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:55:51,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=632200.0, ans=0.1 +2024-09-19 08:55:54,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=632200.0, ans=0.1 +2024-09-19 08:56:24,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=632280.0, ans=0.125 +2024-09-19 08:56:25,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=632280.0, ans=0.125 +2024-09-19 08:56:26,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=632280.0, ans=0.2 +2024-09-19 08:56:27,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=632280.0, ans=0.0 +2024-09-19 08:56:33,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=632320.0, ans=0.125 +2024-09-19 08:56:33,816 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:56:34,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.72 vs. limit=22.5 +2024-09-19 08:56:37,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=632320.0, ans=0.015 +2024-09-19 08:56:38,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=632320.0, ans=15.0 +2024-09-19 08:56:48,204 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.470e+01 8.972e+01 9.495e+01 2.308e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 08:57:01,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=632360.0, ans=0.125 +2024-09-19 08:57:04,324 INFO [train.py:1198] (0/2) Epoch 35, batch 4250, loss[loss=0.2189, ctc_loss=0.09502, cr_loss=0.3111, attn_decoder_loss=0.2257, over 29532.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1163, cr_loss=0.357, attn_decoder_loss=0.2411, over 5805071.84 frames. ], batch size: 74, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:57:26,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=632440.0, ans=0.125 +2024-09-19 08:57:46,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=632480.0, ans=0.125 +2024-09-19 08:57:47,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=632520.0, ans=0.1 +2024-09-19 08:57:54,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=632520.0, ans=0.1 +2024-09-19 08:57:58,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=632520.0, ans=0.1 +2024-09-19 08:58:13,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=632560.0, ans=0.0 +2024-09-19 08:58:19,074 INFO [train.py:1198] (0/2) Epoch 35, batch 4300, loss[loss=0.2479, ctc_loss=0.1281, cr_loss=0.3678, attn_decoder_loss=0.2531, over 29559.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1163, cr_loss=0.3571, attn_decoder_loss=0.2416, over 5794623.81 frames. ], batch size: 87, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:58:41,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=632640.0, ans=0.0 +2024-09-19 08:58:46,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.55 vs. limit=22.5 +2024-09-19 08:58:52,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=632680.0, ans=0.0 +2024-09-19 08:58:53,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=632680.0, ans=0.1 +2024-09-19 08:59:17,001 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.393e+01 8.841e+01 9.230e+01 9.936e+01 2.115e+02, threshold=1.846e+02, percent-clipped=2.0 +2024-09-19 08:59:20,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=632760.0, ans=0.0 +2024-09-19 08:59:34,555 INFO [train.py:1198] (0/2) Epoch 35, batch 4350, loss[loss=0.2505, ctc_loss=0.1299, cr_loss=0.3742, attn_decoder_loss=0.2556, over 29471.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1189, cr_loss=0.3628, attn_decoder_loss=0.2447, over 5797603.02 frames. ], batch size: 97, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:59:40,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=632800.0, ans=0.125 +2024-09-19 08:59:41,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.19 vs. limit=15.0 +2024-09-19 09:00:01,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=632840.0, ans=0.125 +2024-09-19 09:00:23,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=632920.0, ans=0.025 +2024-09-19 09:00:27,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=632920.0, ans=0.2 +2024-09-19 09:00:47,706 INFO [train.py:1198] (0/2) Epoch 35, batch 4400, loss[loss=0.248, ctc_loss=0.1306, cr_loss=0.3922, attn_decoder_loss=0.2523, over 27390.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1203, cr_loss=0.3656, attn_decoder_loss=0.2465, over 5769256.30 frames. ], batch size: 124, lr: 3.13e-03, grad_scale: 16.0 +2024-09-19 09:01:06,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=633040.0, ans=0.5 +2024-09-19 09:01:28,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.14 vs. limit=15.0 +2024-09-19 09:01:35,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=633120.0, ans=0.07 +2024-09-19 09:01:39,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=633120.0, ans=0.125 +2024-09-19 09:01:45,571 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.031e+01 8.931e+01 9.450e+01 9.933e+01 1.920e+02, threshold=1.890e+02, percent-clipped=1.0 +2024-09-19 09:01:50,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=633160.0, ans=0.125 +2024-09-19 09:01:51,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.13 vs. limit=15.0 +2024-09-19 09:02:02,859 INFO [train.py:1198] (0/2) Epoch 35, batch 4450, loss[loss=0.2551, ctc_loss=0.1424, cr_loss=0.4017, attn_decoder_loss=0.2587, over 20507.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1237, cr_loss=0.3711, attn_decoder_loss=0.2485, over 5583854.42 frames. ], batch size: 209, lr: 3.13e-03, grad_scale: 16.0 +2024-09-19 09:02:06,498 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.69 vs. limit=12.0 +2024-09-19 09:02:15,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=633200.0, ans=0.2 +2024-09-19 09:02:15,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=633200.0, ans=0.1 +2024-09-19 09:02:35,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=633280.0, ans=0.125 +2024-09-19 09:03:17,988 INFO [train.py:1198] (0/2) Epoch 35, batch 4500, loss[loss=0.2572, ctc_loss=0.1477, cr_loss=0.3827, attn_decoder_loss=0.2609, over 19135.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.127, cr_loss=0.3735, attn_decoder_loss=0.2504, over 5239063.70 frames. ], batch size: 209, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 09:03:18,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=633400.0, ans=0.0 +2024-09-19 09:03:45,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=633440.0, ans=0.125 +2024-09-19 09:03:51,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=633480.0, ans=0.125 +2024-09-19 09:03:53,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=15.39 vs. limit=15.0 +2024-09-19 09:03:55,219 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-35.pt +2024-09-19 09:04:39,980 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:04:41,128 INFO [train.py:1198] (0/2) Epoch 36, batch 0, loss[loss=0.2157, ctc_loss=0.1019, cr_loss=0.3389, attn_decoder_loss=0.2208, over 29596.00 frames. ], tot_loss[loss=0.2157, ctc_loss=0.1019, cr_loss=0.3389, attn_decoder_loss=0.2208, over 29596.00 frames. ], batch size: 73, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:04:41,128 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 09:04:49,388 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.2387, 3.8427, 4.1141, 3.7377], device='cuda:0') +2024-09-19 09:04:56,200 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.1522, 4.3278, 4.4782, 4.7765], device='cuda:0') +2024-09-19 09:04:59,474 INFO [train.py:1230] (0/2) Epoch 36, validation: loss=0.2129, ctc_loss=0.03662, cr_loss=5.743e-15, attn_decoder_loss=0.2325, over 944034.00 frames. +2024-09-19 09:04:59,474 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 09:05:02,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=633500.0, ans=0.125 +2024-09-19 09:05:08,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=633500.0, ans=0.125 +2024-09-19 09:05:13,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=633540.0, ans=0.125 +2024-09-19 09:05:22,044 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.883e+01 1.073e+02 1.144e+02 1.210e+02 8.768e+02, threshold=2.289e+02, percent-clipped=4.0 +2024-09-19 09:05:33,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.36 vs. limit=15.0 +2024-09-19 09:06:08,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=633660.0, ans=0.0 +2024-09-19 09:06:15,586 INFO [train.py:1198] (0/2) Epoch 36, batch 50, loss[loss=0.213, ctc_loss=0.09483, cr_loss=0.3005, attn_decoder_loss=0.2195, over 29427.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1168, cr_loss=0.3594, attn_decoder_loss=0.241, over 1267692.84 frames. ], batch size: 70, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:06:30,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=633700.0, ans=0.1 +2024-09-19 09:06:30,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.33 vs. limit=10.0 +2024-09-19 09:06:52,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=633780.0, ans=0.0 +2024-09-19 09:06:52,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=633780.0, ans=0.125 +2024-09-19 09:07:18,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.83 vs. limit=22.5 +2024-09-19 09:07:19,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=633860.0, ans=0.1 +2024-09-19 09:07:23,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=633860.0, ans=0.125 +2024-09-19 09:07:28,786 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.86 vs. limit=22.5 +2024-09-19 09:07:34,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=633900.0, ans=0.1 +2024-09-19 09:07:35,509 INFO [train.py:1198] (0/2) Epoch 36, batch 100, loss[loss=0.2234, ctc_loss=0.1034, cr_loss=0.3298, attn_decoder_loss=0.2294, over 29529.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.119, cr_loss=0.3641, attn_decoder_loss=0.2436, over 2251081.11 frames. ], batch size: 76, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:07:37,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=633900.0, ans=0.2 +2024-09-19 09:07:49,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=633940.0, ans=0.125 +2024-09-19 09:07:49,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=633940.0, ans=0.125 +2024-09-19 09:07:57,922 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.147e+01 8.630e+01 9.046e+01 9.825e+01 1.723e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 09:07:58,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=633940.0, ans=0.025 +2024-09-19 09:08:22,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=634020.0, ans=0.125 +2024-09-19 09:08:36,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.80 vs. limit=15.0 +2024-09-19 09:08:44,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=634060.0, ans=0.125 +2024-09-19 09:08:50,177 INFO [train.py:1198] (0/2) Epoch 36, batch 150, loss[loss=0.2111, ctc_loss=0.0983, cr_loss=0.3197, attn_decoder_loss=0.2166, over 29418.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1161, cr_loss=0.3578, attn_decoder_loss=0.2411, over 3047344.17 frames. ], batch size: 70, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:08:54,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=634100.0, ans=0.025 +2024-09-19 09:09:23,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=634180.0, ans=0.2 +2024-09-19 09:09:50,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.21 vs. limit=10.0 +2024-09-19 09:10:00,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634260.0, ans=0.1 +2024-09-19 09:10:04,908 INFO [train.py:1198] (0/2) Epoch 36, batch 200, loss[loss=0.2484, ctc_loss=0.124, cr_loss=0.3775, attn_decoder_loss=0.2538, over 27109.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1155, cr_loss=0.356, attn_decoder_loss=0.24, over 3658860.25 frames. ], batch size: 124, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:10:24,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=634340.0, ans=0.125 +2024-09-19 09:10:29,661 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.434e+01 8.423e+01 8.790e+01 9.226e+01 1.100e+02, threshold=1.758e+02, percent-clipped=0.0 +2024-09-19 09:11:05,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.78 vs. limit=6.0 +2024-09-19 09:11:25,654 INFO [train.py:1198] (0/2) Epoch 36, batch 250, loss[loss=0.251, ctc_loss=0.123, cr_loss=0.3658, attn_decoder_loss=0.2571, over 29287.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1153, cr_loss=0.3549, attn_decoder_loss=0.2398, over 4141728.24 frames. ], batch size: 100, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:11:31,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=634500.0, ans=0.025 +2024-09-19 09:11:41,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=634540.0, ans=0.1 +2024-09-19 09:11:56,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=634580.0, ans=0.125 +2024-09-19 09:12:06,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=634580.0, ans=0.125 +2024-09-19 09:12:13,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-19 09:12:27,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=634660.0, ans=0.0 +2024-09-19 09:12:40,875 INFO [train.py:1198] (0/2) Epoch 36, batch 300, loss[loss=0.2546, ctc_loss=0.1355, cr_loss=0.4038, attn_decoder_loss=0.2588, over 29559.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1154, cr_loss=0.3558, attn_decoder_loss=0.2399, over 4509960.68 frames. ], batch size: 92, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:12:47,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=634700.0, ans=0.1 +2024-09-19 09:12:54,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=634740.0, ans=0.2 +2024-09-19 09:12:57,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=634740.0, ans=0.125 +2024-09-19 09:12:57,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=634740.0, ans=0.0 +2024-09-19 09:13:04,720 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.387e+01 8.698e+01 9.076e+01 9.667e+01 1.639e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-19 09:13:06,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=634740.0, ans=0.0 +2024-09-19 09:13:08,227 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:13:24,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=634820.0, ans=0.0 +2024-09-19 09:13:29,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=634820.0, ans=0.5 +2024-09-19 09:13:38,488 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:13:56,338 INFO [train.py:1198] (0/2) Epoch 36, batch 350, loss[loss=0.2069, ctc_loss=0.09585, cr_loss=0.3108, attn_decoder_loss=0.2123, over 29306.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1154, cr_loss=0.3564, attn_decoder_loss=0.2404, over 4796458.08 frames. ], batch size: 71, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:14:38,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=634980.0, ans=0.125 +2024-09-19 09:14:41,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=634980.0, ans=0.5 +2024-09-19 09:14:47,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=15.92 vs. limit=15.0 +2024-09-19 09:14:54,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-19 09:14:54,750 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.06 vs. limit=6.0 +2024-09-19 09:15:15,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=635100.0, ans=0.125 +2024-09-19 09:15:16,543 INFO [train.py:1198] (0/2) Epoch 36, batch 400, loss[loss=0.2382, ctc_loss=0.1136, cr_loss=0.3659, attn_decoder_loss=0.2439, over 29691.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.115, cr_loss=0.3552, attn_decoder_loss=0.24, over 5025164.44 frames. ], batch size: 82, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:15:22,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=635100.0, ans=0.1 +2024-09-19 09:15:35,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.87 vs. limit=22.5 +2024-09-19 09:15:40,915 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.564e+01 9.194e+01 9.781e+01 3.536e+02, threshold=1.839e+02, percent-clipped=4.0 +2024-09-19 09:16:12,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=635220.0, ans=0.125 +2024-09-19 09:16:33,035 INFO [train.py:1198] (0/2) Epoch 36, batch 450, loss[loss=0.2509, ctc_loss=0.127, cr_loss=0.3812, attn_decoder_loss=0.2562, over 29689.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1153, cr_loss=0.3554, attn_decoder_loss=0.2403, over 5187247.00 frames. ], batch size: 83, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:16:45,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=635300.0, ans=0.95 +2024-09-19 09:16:50,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=635340.0, ans=0.1 +2024-09-19 09:17:26,723 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:17:38,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=635460.0, ans=0.04949747468305833 +2024-09-19 09:17:44,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=635460.0, ans=0.125 +2024-09-19 09:17:48,984 INFO [train.py:1198] (0/2) Epoch 36, batch 500, loss[loss=0.2423, ctc_loss=0.1267, cr_loss=0.3739, attn_decoder_loss=0.2468, over 29378.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1148, cr_loss=0.3546, attn_decoder_loss=0.2395, over 5329421.19 frames. ], batch size: 94, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:18:01,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=635500.0, ans=0.07 +2024-09-19 09:18:13,077 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.195e+01 8.310e+01 8.819e+01 9.519e+01 1.597e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-19 09:18:23,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=635580.0, ans=0.125 +2024-09-19 09:18:32,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=635580.0, ans=0.1 +2024-09-19 09:18:32,463 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:18:45,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=635620.0, ans=0.0 +2024-09-19 09:18:51,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=635620.0, ans=0.1 +2024-09-19 09:18:55,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=635660.0, ans=0.2 +2024-09-19 09:19:09,303 INFO [train.py:1198] (0/2) Epoch 36, batch 550, loss[loss=0.2507, ctc_loss=0.1228, cr_loss=0.3936, attn_decoder_loss=0.2562, over 28775.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1148, cr_loss=0.3542, attn_decoder_loss=0.2395, over 5421610.17 frames. ], batch size: 104, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:19:36,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=635740.0, ans=0.025 +2024-09-19 09:19:39,310 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.27 vs. limit=15.0 +2024-09-19 09:20:02,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-19 09:20:25,469 INFO [train.py:1198] (0/2) Epoch 36, batch 600, loss[loss=0.2456, ctc_loss=0.1242, cr_loss=0.3747, attn_decoder_loss=0.2508, over 29255.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1151, cr_loss=0.3549, attn_decoder_loss=0.2398, over 5508695.42 frames. ], batch size: 100, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:20:27,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=635900.0, ans=0.125 +2024-09-19 09:20:30,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=635900.0, ans=0.125 +2024-09-19 09:20:30,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=635900.0, ans=0.125 +2024-09-19 09:20:30,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.35 vs. limit=15.0 +2024-09-19 09:20:33,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=635900.0, ans=10.0 +2024-09-19 09:20:36,303 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:20:42,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=635940.0, ans=0.1 +2024-09-19 09:20:50,745 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.519e+01 9.044e+01 9.582e+01 1.949e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 09:20:54,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=635980.0, ans=0.125 +2024-09-19 09:20:54,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.63 vs. limit=22.5 +2024-09-19 09:21:36,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=636060.0, ans=0.125 +2024-09-19 09:21:37,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=636060.0, ans=0.0 +2024-09-19 09:21:40,519 INFO [train.py:1198] (0/2) Epoch 36, batch 650, loss[loss=0.2407, ctc_loss=0.1221, cr_loss=0.3797, attn_decoder_loss=0.2454, over 29764.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1143, cr_loss=0.3537, attn_decoder_loss=0.239, over 5586019.14 frames. ], batch size: 81, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:21:49,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=636100.0, ans=0.1 +2024-09-19 09:22:00,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=636140.0, ans=0.0 +2024-09-19 09:22:06,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=636140.0, ans=0.1 +2024-09-19 09:22:37,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=636220.0, ans=0.1 +2024-09-19 09:22:49,750 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.41 vs. limit=15.0 +2024-09-19 09:22:52,725 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-19 09:23:00,741 INFO [train.py:1198] (0/2) Epoch 36, batch 700, loss[loss=0.2307, ctc_loss=0.1208, cr_loss=0.3618, attn_decoder_loss=0.2349, over 29519.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.115, cr_loss=0.3556, attn_decoder_loss=0.2401, over 5635977.64 frames. ], batch size: 76, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:23:13,659 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.97 vs. limit=22.5 +2024-09-19 09:23:26,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.520e+01 8.919e+01 9.430e+01 1.206e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 09:23:29,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.91 vs. limit=15.0 +2024-09-19 09:23:35,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=636380.0, ans=0.125 +2024-09-19 09:23:36,498 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.15 vs. limit=10.0 +2024-09-19 09:24:04,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=636460.0, ans=0.125 +2024-09-19 09:24:16,384 INFO [train.py:1198] (0/2) Epoch 36, batch 750, loss[loss=0.243, ctc_loss=0.1206, cr_loss=0.371, attn_decoder_loss=0.2484, over 29704.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1154, cr_loss=0.3565, attn_decoder_loss=0.2399, over 5673970.56 frames. ], batch size: 82, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:24:33,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=636540.0, ans=0.125 +2024-09-19 09:24:46,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=636580.0, ans=0.025 +2024-09-19 09:25:16,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.52 vs. limit=15.0 +2024-09-19 09:25:29,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=636660.0, ans=0.125 +2024-09-19 09:25:31,843 INFO [train.py:1198] (0/2) Epoch 36, batch 800, loss[loss=0.2142, ctc_loss=0.09237, cr_loss=0.3157, attn_decoder_loss=0.2207, over 29650.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1156, cr_loss=0.3568, attn_decoder_loss=0.24, over 5704345.64 frames. ], batch size: 73, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:25:38,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=636700.0, ans=0.0 +2024-09-19 09:25:55,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.33 vs. limit=15.0 +2024-09-19 09:25:57,541 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.447e+01 8.844e+01 9.388e+01 5.453e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 09:26:00,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=636780.0, ans=0.025 +2024-09-19 09:26:09,430 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.67 vs. limit=15.0 +2024-09-19 09:26:27,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.72 vs. limit=15.0 +2024-09-19 09:26:33,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=636820.0, ans=0.2 +2024-09-19 09:26:37,284 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.38 vs. limit=15.0 +2024-09-19 09:26:39,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=636860.0, ans=0.125 +2024-09-19 09:26:46,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=636860.0, ans=0.125 +2024-09-19 09:26:52,628 INFO [train.py:1198] (0/2) Epoch 36, batch 850, loss[loss=0.2288, ctc_loss=0.09834, cr_loss=0.3238, attn_decoder_loss=0.2361, over 29705.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1147, cr_loss=0.3541, attn_decoder_loss=0.2395, over 5733748.95 frames. ], batch size: 89, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:26:54,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=636900.0, ans=0.125 +2024-09-19 09:27:06,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=636940.0, ans=0.125 +2024-09-19 09:27:45,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=637020.0, ans=0.2 +2024-09-19 09:27:50,588 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:27:59,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=637060.0, ans=0.125 +2024-09-19 09:28:08,134 INFO [train.py:1198] (0/2) Epoch 36, batch 900, loss[loss=0.2105, ctc_loss=0.09503, cr_loss=0.3001, attn_decoder_loss=0.2166, over 29637.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1149, cr_loss=0.3547, attn_decoder_loss=0.2395, over 5739515.63 frames. ], batch size: 73, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:28:17,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=637100.0, ans=0.0 +2024-09-19 09:28:28,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=637140.0, ans=0.125 +2024-09-19 09:28:35,109 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.598e+01 8.959e+01 9.567e+01 2.745e+02, threshold=1.792e+02, percent-clipped=2.0 +2024-09-19 09:28:35,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=637140.0, ans=0.0 +2024-09-19 09:28:35,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=637140.0, ans=0.125 +2024-09-19 09:28:37,856 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.46 vs. limit=22.5 +2024-09-19 09:28:41,735 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:28:50,660 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:28:51,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.66 vs. limit=15.0 +2024-09-19 09:28:55,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=637220.0, ans=0.1 +2024-09-19 09:28:59,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=637220.0, ans=0.0 +2024-09-19 09:29:07,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=637260.0, ans=0.025 +2024-09-19 09:29:11,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.07 vs. limit=6.0 +2024-09-19 09:29:23,682 INFO [train.py:1198] (0/2) Epoch 36, batch 950, loss[loss=0.226, ctc_loss=0.1079, cr_loss=0.3393, attn_decoder_loss=0.2316, over 29491.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.115, cr_loss=0.355, attn_decoder_loss=0.2397, over 5742272.70 frames. ], batch size: 74, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:29:34,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=637300.0, ans=0.0 +2024-09-19 09:29:37,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=637340.0, ans=0.2 +2024-09-19 09:29:46,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=637340.0, ans=0.2 +2024-09-19 09:29:51,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=637340.0, ans=0.0 +2024-09-19 09:30:00,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=637380.0, ans=10.0 +2024-09-19 09:30:01,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.56 vs. limit=22.5 +2024-09-19 09:30:39,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=637460.0, ans=0.025 +2024-09-19 09:30:40,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=637460.0, ans=0.125 +2024-09-19 09:30:41,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.13 vs. limit=6.0 +2024-09-19 09:30:43,640 INFO [train.py:1198] (0/2) Epoch 36, batch 1000, loss[loss=0.22, ctc_loss=0.09931, cr_loss=0.3108, attn_decoder_loss=0.2265, over 29478.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.116, cr_loss=0.3573, attn_decoder_loss=0.2408, over 5736021.56 frames. ], batch size: 77, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:30:48,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=637500.0, ans=0.0 +2024-09-19 09:31:09,042 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.25 vs. limit=15.0 +2024-09-19 09:31:11,033 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.580e+01 9.134e+01 9.845e+01 2.020e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 09:31:26,048 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.78 vs. limit=22.5 +2024-09-19 09:31:32,192 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.44 vs. limit=22.5 +2024-09-19 09:31:37,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=637620.0, ans=0.125 +2024-09-19 09:31:51,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.22 vs. limit=22.5 +2024-09-19 09:31:57,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=637660.0, ans=0.0 +2024-09-19 09:31:59,736 INFO [train.py:1198] (0/2) Epoch 36, batch 1050, loss[loss=0.242, ctc_loss=0.1124, cr_loss=0.3483, attn_decoder_loss=0.2486, over 29662.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1158, cr_loss=0.3566, attn_decoder_loss=0.2404, over 5743386.73 frames. ], batch size: 85, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:32:09,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=637700.0, ans=0.125 +2024-09-19 09:32:23,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=637740.0, ans=0.0 +2024-09-19 09:32:37,438 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.72 vs. limit=22.5 +2024-09-19 09:33:15,954 INFO [train.py:1198] (0/2) Epoch 36, batch 1100, loss[loss=0.2406, ctc_loss=0.1218, cr_loss=0.3646, attn_decoder_loss=0.2457, over 29438.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1161, cr_loss=0.357, attn_decoder_loss=0.2405, over 5755761.42 frames. ], batch size: 78, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:33:30,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.33 vs. limit=15.0 +2024-09-19 09:33:43,104 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.368e+01 8.851e+01 9.380e+01 2.140e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-19 09:34:00,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=638020.0, ans=0.125 +2024-09-19 09:34:33,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.87 vs. limit=10.0 +2024-09-19 09:34:35,863 INFO [train.py:1198] (0/2) Epoch 36, batch 1150, loss[loss=0.2303, ctc_loss=0.1147, cr_loss=0.3488, attn_decoder_loss=0.2354, over 29466.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1163, cr_loss=0.3571, attn_decoder_loss=0.2405, over 5754964.15 frames. ], batch size: 78, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:34:37,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=638100.0, ans=0.0 +2024-09-19 09:34:43,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=638100.0, ans=0.125 +2024-09-19 09:34:52,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=638140.0, ans=0.125 +2024-09-19 09:35:12,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=638180.0, ans=0.125 +2024-09-19 09:35:17,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=638180.0, ans=0.125 +2024-09-19 09:35:18,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=638180.0, ans=0.05 +2024-09-19 09:35:23,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=638220.0, ans=0.1 +2024-09-19 09:35:29,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=638220.0, ans=0.125 +2024-09-19 09:35:29,393 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:35:30,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=638220.0, ans=0.125 +2024-09-19 09:35:34,326 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-19 09:35:38,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=638260.0, ans=0.0 +2024-09-19 09:35:51,890 INFO [train.py:1198] (0/2) Epoch 36, batch 1200, loss[loss=0.2468, ctc_loss=0.1224, cr_loss=0.3803, attn_decoder_loss=0.2522, over 29655.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1168, cr_loss=0.3582, attn_decoder_loss=0.2413, over 5748141.53 frames. ], batch size: 85, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:35:53,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=638300.0, ans=0.0 +2024-09-19 09:36:19,079 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.768e+01 8.630e+01 9.163e+01 9.879e+01 2.531e+02, threshold=1.833e+02, percent-clipped=3.0 +2024-09-19 09:36:22,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=638380.0, ans=0.125 +2024-09-19 09:36:32,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=638380.0, ans=0.125 +2024-09-19 09:36:43,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.97 vs. limit=15.0 +2024-09-19 09:36:52,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=638460.0, ans=0.1 +2024-09-19 09:36:58,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=638460.0, ans=0.07 +2024-09-19 09:37:01,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=638460.0, ans=0.09899494936611666 +2024-09-19 09:37:08,431 INFO [train.py:1198] (0/2) Epoch 36, batch 1250, loss[loss=0.2503, ctc_loss=0.1309, cr_loss=0.3856, attn_decoder_loss=0.255, over 29542.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.117, cr_loss=0.3584, attn_decoder_loss=0.2416, over 5774372.11 frames. ], batch size: 92, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:37:22,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.48 vs. limit=15.0 +2024-09-19 09:37:43,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=638580.0, ans=0.04949747468305833 +2024-09-19 09:38:14,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=638660.0, ans=0.5 +2024-09-19 09:38:20,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=638660.0, ans=0.125 +2024-09-19 09:38:24,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.32 vs. limit=10.0 +2024-09-19 09:38:28,268 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.38 vs. limit=10.0 +2024-09-19 09:38:29,054 INFO [train.py:1198] (0/2) Epoch 36, batch 1300, loss[loss=0.2445, ctc_loss=0.1179, cr_loss=0.3656, attn_decoder_loss=0.2505, over 28553.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1166, cr_loss=0.3578, attn_decoder_loss=0.2412, over 5780318.77 frames. ], batch size: 112, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:38:32,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.28 vs. limit=22.5 +2024-09-19 09:38:50,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=638740.0, ans=0.1 +2024-09-19 09:38:53,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=638740.0, ans=0.0 +2024-09-19 09:38:56,429 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.178e+01 8.817e+01 9.661e+01 1.409e+02, threshold=1.763e+02, percent-clipped=0.0 +2024-09-19 09:38:58,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=638780.0, ans=0.1 +2024-09-19 09:39:36,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=638860.0, ans=10.0 +2024-09-19 09:39:45,467 INFO [train.py:1198] (0/2) Epoch 36, batch 1350, loss[loss=0.2355, ctc_loss=0.1109, cr_loss=0.3428, attn_decoder_loss=0.2417, over 29743.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1159, cr_loss=0.3565, attn_decoder_loss=0.2405, over 5799090.44 frames. ], batch size: 81, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:39:47,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=638900.0, ans=0.0 +2024-09-19 09:40:36,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=639020.0, ans=0.0 +2024-09-19 09:40:38,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=639020.0, ans=0.0 +2024-09-19 09:40:42,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=639020.0, ans=0.2 +2024-09-19 09:41:00,592 INFO [train.py:1198] (0/2) Epoch 36, batch 1400, loss[loss=0.2137, ctc_loss=0.1004, cr_loss=0.3308, attn_decoder_loss=0.2189, over 29581.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1158, cr_loss=0.3562, attn_decoder_loss=0.2403, over 5809210.17 frames. ], batch size: 69, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:41:00,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=639100.0, ans=0.125 +2024-09-19 09:41:18,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=639140.0, ans=0.0 +2024-09-19 09:41:25,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=639140.0, ans=22.5 +2024-09-19 09:41:27,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.347e+01 8.417e+01 9.024e+01 9.500e+01 1.848e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 09:41:42,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=639180.0, ans=0.2 +2024-09-19 09:41:51,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=639220.0, ans=0.2 +2024-09-19 09:42:09,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.94 vs. limit=15.0 +2024-09-19 09:42:20,993 INFO [train.py:1198] (0/2) Epoch 36, batch 1450, loss[loss=0.2497, ctc_loss=0.1261, cr_loss=0.3748, attn_decoder_loss=0.2551, over 29446.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1163, cr_loss=0.3575, attn_decoder_loss=0.241, over 5806442.98 frames. ], batch size: 94, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:42:50,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=639380.0, ans=0.125 +2024-09-19 09:42:54,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=639380.0, ans=0.04949747468305833 +2024-09-19 09:43:01,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.04 vs. limit=22.5 +2024-09-19 09:43:36,466 INFO [train.py:1198] (0/2) Epoch 36, batch 1500, loss[loss=0.2505, ctc_loss=0.1249, cr_loss=0.3872, attn_decoder_loss=0.2559, over 29631.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1162, cr_loss=0.3576, attn_decoder_loss=0.2411, over 5806107.41 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:43:47,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=639500.0, ans=0.0 +2024-09-19 09:44:02,834 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.82 vs. limit=22.5 +2024-09-19 09:44:03,549 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.635e+01 9.112e+01 9.549e+01 2.206e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 09:44:05,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=639580.0, ans=0.0 +2024-09-19 09:44:14,673 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:44:23,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=639620.0, ans=0.125 +2024-09-19 09:44:34,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=639620.0, ans=0.0 +2024-09-19 09:44:42,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=639660.0, ans=0.125 +2024-09-19 09:44:49,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=639660.0, ans=0.1 +2024-09-19 09:44:52,260 INFO [train.py:1198] (0/2) Epoch 36, batch 1550, loss[loss=0.254, ctc_loss=0.1323, cr_loss=0.3844, attn_decoder_loss=0.259, over 29509.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1163, cr_loss=0.3578, attn_decoder_loss=0.241, over 5781182.65 frames. ], batch size: 90, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:44:53,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-19 09:44:57,302 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:45:03,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=639700.0, ans=0.0 +2024-09-19 09:45:10,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=639740.0, ans=0.125 +2024-09-19 09:45:12,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.65 vs. limit=15.0 +2024-09-19 09:45:27,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=639780.0, ans=0.0 +2024-09-19 09:45:48,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=639820.0, ans=0.125 +2024-09-19 09:45:59,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=639860.0, ans=0.025 +2024-09-19 09:46:03,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.76 vs. limit=15.0 +2024-09-19 09:46:05,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.82 vs. limit=15.0 +2024-09-19 09:46:11,765 INFO [train.py:1198] (0/2) Epoch 36, batch 1600, loss[loss=0.2403, ctc_loss=0.114, cr_loss=0.3488, attn_decoder_loss=0.2466, over 29674.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1165, cr_loss=0.3582, attn_decoder_loss=0.2409, over 5763704.37 frames. ], batch size: 85, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:46:13,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=639900.0, ans=0.025 +2024-09-19 09:46:34,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=639940.0, ans=0.125 +2024-09-19 09:46:42,000 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.623e+01 9.307e+01 9.759e+01 1.491e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-19 09:46:48,614 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-160000.pt +2024-09-19 09:47:03,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.75 vs. limit=22.5 +2024-09-19 09:47:08,222 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:47:11,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.04 vs. limit=15.0 +2024-09-19 09:47:28,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.10 vs. limit=15.0 +2024-09-19 09:47:34,969 INFO [train.py:1198] (0/2) Epoch 36, batch 1650, loss[loss=0.2389, ctc_loss=0.1108, cr_loss=0.3413, attn_decoder_loss=0.2456, over 29707.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1163, cr_loss=0.3579, attn_decoder_loss=0.2406, over 5757218.15 frames. ], batch size: 89, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:47:48,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=640140.0, ans=0.125 +2024-09-19 09:47:51,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=640140.0, ans=0.125 +2024-09-19 09:48:05,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=640180.0, ans=0.125 +2024-09-19 09:48:22,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=640220.0, ans=0.125 +2024-09-19 09:48:46,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=640260.0, ans=0.1 +2024-09-19 09:48:50,207 INFO [train.py:1198] (0/2) Epoch 36, batch 1700, loss[loss=0.2064, ctc_loss=0.09674, cr_loss=0.3242, attn_decoder_loss=0.2114, over 29585.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.116, cr_loss=0.358, attn_decoder_loss=0.2406, over 5779835.99 frames. ], batch size: 69, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:49:05,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=640340.0, ans=0.025 +2024-09-19 09:49:09,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=640340.0, ans=0.1 +2024-09-19 09:49:20,275 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.508e+01 8.971e+01 9.480e+01 1.290e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 09:49:53,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=640460.0, ans=0.125 +2024-09-19 09:50:10,194 INFO [train.py:1198] (0/2) Epoch 36, batch 1750, loss[loss=0.1967, ctc_loss=0.0869, cr_loss=0.3056, attn_decoder_loss=0.2021, over 29397.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1157, cr_loss=0.3574, attn_decoder_loss=0.2403, over 5789130.46 frames. ], batch size: 67, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:50:14,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=640500.0, ans=0.2 +2024-09-19 09:50:16,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=640500.0, ans=0.1 +2024-09-19 09:50:22,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=640500.0, ans=0.0 +2024-09-19 09:50:22,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=640500.0, ans=0.0 +2024-09-19 09:50:54,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=640620.0, ans=0.125 +2024-09-19 09:51:21,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=640660.0, ans=0.1 +2024-09-19 09:51:23,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=640660.0, ans=0.125 +2024-09-19 09:51:24,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=640700.0, ans=0.125 +2024-09-19 09:51:26,090 INFO [train.py:1198] (0/2) Epoch 36, batch 1800, loss[loss=0.2463, ctc_loss=0.1297, cr_loss=0.3865, attn_decoder_loss=0.2507, over 29696.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1159, cr_loss=0.3574, attn_decoder_loss=0.2406, over 5791152.59 frames. ], batch size: 83, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:51:56,469 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.489e+01 9.081e+01 9.519e+01 1.920e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 09:52:32,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=640860.0, ans=0.125 +2024-09-19 09:52:38,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=640860.0, ans=0.125 +2024-09-19 09:52:42,689 INFO [train.py:1198] (0/2) Epoch 36, batch 1850, loss[loss=0.2517, ctc_loss=0.121, cr_loss=0.3643, attn_decoder_loss=0.2581, over 29624.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3574, attn_decoder_loss=0.2403, over 5795496.49 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:52:50,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=12.0 +2024-09-19 09:52:54,978 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:53:02,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=640940.0, ans=0.0 +2024-09-19 09:53:50,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=641060.0, ans=0.0 +2024-09-19 09:53:55,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.55 vs. limit=6.0 +2024-09-19 09:53:57,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=641060.0, ans=0.2 +2024-09-19 09:54:00,523 INFO [train.py:1198] (0/2) Epoch 36, batch 1900, loss[loss=0.2443, ctc_loss=0.1231, cr_loss=0.3744, attn_decoder_loss=0.2495, over 29685.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1163, cr_loss=0.3586, attn_decoder_loss=0.2407, over 5803958.24 frames. ], batch size: 89, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:54:09,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=641100.0, ans=0.2 +2024-09-19 09:54:27,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=641140.0, ans=0.1 +2024-09-19 09:54:33,087 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.812e+01 8.610e+01 8.955e+01 9.499e+01 1.383e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 09:54:35,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.23 vs. limit=22.5 +2024-09-19 09:54:45,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=641180.0, ans=0.0 +2024-09-19 09:54:57,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=641220.0, ans=0.07 +2024-09-19 09:55:05,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=641260.0, ans=0.1 +2024-09-19 09:55:18,517 INFO [train.py:1198] (0/2) Epoch 36, batch 1950, loss[loss=0.2276, ctc_loss=0.1133, cr_loss=0.3584, attn_decoder_loss=0.2323, over 29431.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1166, cr_loss=0.3594, attn_decoder_loss=0.2416, over 5818979.46 frames. ], batch size: 78, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:55:21,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.15 vs. limit=15.0 +2024-09-19 09:55:44,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=641340.0, ans=0.0 +2024-09-19 09:56:10,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-19 09:56:33,542 INFO [train.py:1198] (0/2) Epoch 36, batch 2000, loss[loss=0.2127, ctc_loss=0.1004, cr_loss=0.3337, attn_decoder_loss=0.2178, over 29371.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1169, cr_loss=0.3596, attn_decoder_loss=0.2419, over 5797113.61 frames. ], batch size: 67, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:56:39,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=641500.0, ans=0.125 +2024-09-19 09:56:49,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=641540.0, ans=0.125 +2024-09-19 09:56:57,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.03 vs. limit=6.0 +2024-09-19 09:57:04,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.346e+01 8.610e+01 8.991e+01 9.571e+01 3.322e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-19 09:57:15,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=641580.0, ans=0.0 +2024-09-19 09:57:25,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.01 vs. limit=15.0 +2024-09-19 09:57:28,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=641620.0, ans=0.1 +2024-09-19 09:57:52,162 INFO [train.py:1198] (0/2) Epoch 36, batch 2050, loss[loss=0.2109, ctc_loss=0.09731, cr_loss=0.3141, attn_decoder_loss=0.2165, over 29431.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1163, cr_loss=0.358, attn_decoder_loss=0.2411, over 5788833.87 frames. ], batch size: 70, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:58:02,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=641700.0, ans=0.1 +2024-09-19 09:58:27,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=641780.0, ans=0.125 +2024-09-19 09:59:02,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=641860.0, ans=0.025 +2024-09-19 09:59:09,445 INFO [train.py:1198] (0/2) Epoch 36, batch 2100, loss[loss=0.2337, ctc_loss=0.1091, cr_loss=0.3474, attn_decoder_loss=0.2398, over 29763.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1156, cr_loss=0.3561, attn_decoder_loss=0.2405, over 5801157.10 frames. ], batch size: 81, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:59:24,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=641940.0, ans=0.125 +2024-09-19 09:59:26,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-19 09:59:33,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=641940.0, ans=0.1 +2024-09-19 09:59:39,063 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.33 vs. limit=22.5 +2024-09-19 09:59:39,339 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.432e+01 8.828e+01 9.578e+01 1.169e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 09:59:50,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=641980.0, ans=0.2 +2024-09-19 10:00:02,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.54 vs. limit=15.0 +2024-09-19 10:00:06,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=642020.0, ans=0.125 +2024-09-19 10:00:09,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=642060.0, ans=0.2 +2024-09-19 10:00:14,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=642060.0, ans=0.0 +2024-09-19 10:00:17,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=642060.0, ans=0.0 +2024-09-19 10:00:24,377 INFO [train.py:1198] (0/2) Epoch 36, batch 2150, loss[loss=0.2163, ctc_loss=0.1029, cr_loss=0.3239, attn_decoder_loss=0.2217, over 29460.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1152, cr_loss=0.3555, attn_decoder_loss=0.24, over 5815432.22 frames. ], batch size: 78, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:00:26,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=642100.0, ans=0.0 +2024-09-19 10:00:57,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.35 vs. limit=6.0 +2024-09-19 10:00:59,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=642180.0, ans=0.1 +2024-09-19 10:01:21,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=642220.0, ans=0.125 +2024-09-19 10:01:27,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=642260.0, ans=0.0 +2024-09-19 10:01:36,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=642260.0, ans=0.125 +2024-09-19 10:01:42,317 INFO [train.py:1198] (0/2) Epoch 36, batch 2200, loss[loss=0.2458, ctc_loss=0.1163, cr_loss=0.3668, attn_decoder_loss=0.2521, over 29623.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1154, cr_loss=0.3564, attn_decoder_loss=0.2402, over 5812149.38 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:01:54,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=642300.0, ans=0.125 +2024-09-19 10:02:06,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=12.0 +2024-09-19 10:02:10,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=642340.0, ans=0.125 +2024-09-19 10:02:11,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=642340.0, ans=0.125 +2024-09-19 10:02:14,555 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.653e+01 8.688e+01 9.104e+01 9.664e+01 2.107e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 10:02:16,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=642380.0, ans=0.0 +2024-09-19 10:02:24,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=642380.0, ans=0.025 +2024-09-19 10:02:30,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=642420.0, ans=0.125 +2024-09-19 10:02:44,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=642460.0, ans=0.2 +2024-09-19 10:02:45,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=642460.0, ans=0.125 +2024-09-19 10:03:00,208 INFO [train.py:1198] (0/2) Epoch 36, batch 2250, loss[loss=0.2334, ctc_loss=0.1121, cr_loss=0.3437, attn_decoder_loss=0.2392, over 29724.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.115, cr_loss=0.3559, attn_decoder_loss=0.2398, over 5810603.86 frames. ], batch size: 82, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:03:00,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=642500.0, ans=0.1 +2024-09-19 10:03:05,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-19 10:03:11,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=642500.0, ans=0.125 +2024-09-19 10:03:24,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=642540.0, ans=0.05 +2024-09-19 10:03:36,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=642580.0, ans=0.2 +2024-09-19 10:03:51,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=642620.0, ans=0.0 +2024-09-19 10:03:55,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.45 vs. limit=15.0 +2024-09-19 10:04:15,306 INFO [train.py:1198] (0/2) Epoch 36, batch 2300, loss[loss=0.2068, ctc_loss=0.09063, cr_loss=0.2979, attn_decoder_loss=0.2131, over 29309.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1145, cr_loss=0.3541, attn_decoder_loss=0.2389, over 5799302.72 frames. ], batch size: 71, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:04:18,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=642700.0, ans=0.0 +2024-09-19 10:04:38,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=642740.0, ans=0.2 +2024-09-19 10:04:46,893 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.402e+01 9.082e+01 9.464e+01 1.800e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 10:04:56,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=642780.0, ans=0.2 +2024-09-19 10:05:07,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=642820.0, ans=0.0 +2024-09-19 10:05:22,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=642860.0, ans=0.025 +2024-09-19 10:05:29,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=642860.0, ans=0.0 +2024-09-19 10:05:33,282 INFO [train.py:1198] (0/2) Epoch 36, batch 2350, loss[loss=0.2434, ctc_loss=0.1205, cr_loss=0.3812, attn_decoder_loss=0.2486, over 29702.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1148, cr_loss=0.3549, attn_decoder_loss=0.2391, over 5804102.85 frames. ], batch size: 83, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:05:36,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=642900.0, ans=0.125 +2024-09-19 10:05:45,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.86 vs. limit=15.0 +2024-09-19 10:06:14,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=642980.0, ans=0.0 +2024-09-19 10:06:32,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=643020.0, ans=0.0 +2024-09-19 10:06:37,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=643060.0, ans=0.0 +2024-09-19 10:06:50,477 INFO [train.py:1198] (0/2) Epoch 36, batch 2400, loss[loss=0.2339, ctc_loss=0.1143, cr_loss=0.3586, attn_decoder_loss=0.2393, over 29542.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1152, cr_loss=0.3555, attn_decoder_loss=0.2397, over 5807886.29 frames. ], batch size: 76, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:06:55,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=643100.0, ans=0.025 +2024-09-19 10:07:17,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.77 vs. limit=10.0 +2024-09-19 10:07:19,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=643180.0, ans=0.0 +2024-09-19 10:07:22,303 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.644e+01 9.234e+01 9.836e+01 2.155e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-19 10:07:24,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=643180.0, ans=0.125 +2024-09-19 10:07:35,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=643220.0, ans=0.0 +2024-09-19 10:07:35,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=643220.0, ans=0.025 +2024-09-19 10:07:35,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.84 vs. limit=15.0 +2024-09-19 10:07:39,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=643220.0, ans=0.0 +2024-09-19 10:07:41,312 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:07:44,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=643220.0, ans=0.0 +2024-09-19 10:07:45,819 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:08:01,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=643260.0, ans=0.1 +2024-09-19 10:08:07,055 INFO [train.py:1198] (0/2) Epoch 36, batch 2450, loss[loss=0.2352, ctc_loss=0.1032, cr_loss=0.3382, attn_decoder_loss=0.2424, over 29711.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.116, cr_loss=0.3573, attn_decoder_loss=0.2407, over 5783069.05 frames. ], batch size: 82, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:08:07,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=643300.0, ans=0.0 +2024-09-19 10:08:10,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=643300.0, ans=0.0 +2024-09-19 10:08:10,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=643300.0, ans=0.1 +2024-09-19 10:08:14,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=643300.0, ans=0.1 +2024-09-19 10:08:17,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=643300.0, ans=0.125 +2024-09-19 10:08:31,867 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.75 vs. limit=10.0 +2024-09-19 10:08:39,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=643380.0, ans=0.0 +2024-09-19 10:08:54,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=643420.0, ans=0.0 +2024-09-19 10:09:13,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.90 vs. limit=15.0 +2024-09-19 10:09:24,640 INFO [train.py:1198] (0/2) Epoch 36, batch 2500, loss[loss=0.2465, ctc_loss=0.1228, cr_loss=0.383, attn_decoder_loss=0.2518, over 29634.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1161, cr_loss=0.3579, attn_decoder_loss=0.2409, over 5793855.93 frames. ], batch size: 86, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:09:29,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=643500.0, ans=0.2 +2024-09-19 10:09:58,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.618e+01 8.994e+01 9.637e+01 2.222e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-19 10:10:00,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=643580.0, ans=0.125 +2024-09-19 10:10:32,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=643660.0, ans=0.0 +2024-09-19 10:10:42,732 INFO [train.py:1198] (0/2) Epoch 36, batch 2550, loss[loss=0.2052, ctc_loss=0.09095, cr_loss=0.2959, attn_decoder_loss=0.2113, over 29334.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1158, cr_loss=0.3577, attn_decoder_loss=0.2407, over 5797498.65 frames. ], batch size: 67, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:10:50,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=643700.0, ans=0.1 +2024-09-19 10:10:53,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=643700.0, ans=0.1 +2024-09-19 10:11:30,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=643820.0, ans=0.09899494936611666 +2024-09-19 10:11:46,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=643860.0, ans=0.2 +2024-09-19 10:11:50,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.36 vs. limit=10.0 +2024-09-19 10:11:58,151 INFO [train.py:1198] (0/2) Epoch 36, batch 2600, loss[loss=0.2305, ctc_loss=0.1086, cr_loss=0.341, attn_decoder_loss=0.2364, over 29443.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1158, cr_loss=0.3575, attn_decoder_loss=0.2408, over 5794276.33 frames. ], batch size: 78, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:12:09,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=643900.0, ans=0.1 +2024-09-19 10:12:31,416 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.615e+01 9.147e+01 9.711e+01 1.347e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 10:12:39,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=643980.0, ans=0.0 +2024-09-19 10:12:43,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.67 vs. limit=15.0 +2024-09-19 10:13:01,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=644060.0, ans=0.125 +2024-09-19 10:13:02,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=644060.0, ans=0.125 +2024-09-19 10:13:10,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=644060.0, ans=0.2 +2024-09-19 10:13:16,044 INFO [train.py:1198] (0/2) Epoch 36, batch 2650, loss[loss=0.2517, ctc_loss=0.1287, cr_loss=0.3823, attn_decoder_loss=0.2568, over 29279.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1161, cr_loss=0.3578, attn_decoder_loss=0.2411, over 5801554.33 frames. ], batch size: 100, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:13:27,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=15.69 vs. limit=15.0 +2024-09-19 10:13:29,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=644140.0, ans=0.025 +2024-09-19 10:13:34,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=644140.0, ans=0.125 +2024-09-19 10:13:35,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.36 vs. limit=15.0 +2024-09-19 10:14:02,297 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.91 vs. limit=15.0 +2024-09-19 10:14:12,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=644220.0, ans=0.2 +2024-09-19 10:14:18,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=644260.0, ans=0.125 +2024-09-19 10:14:20,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=644260.0, ans=0.0 +2024-09-19 10:14:33,369 INFO [train.py:1198] (0/2) Epoch 36, batch 2700, loss[loss=0.2451, ctc_loss=0.1217, cr_loss=0.3733, attn_decoder_loss=0.2505, over 29541.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1158, cr_loss=0.3575, attn_decoder_loss=0.241, over 5797849.69 frames. ], batch size: 87, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:14:42,988 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.33 vs. limit=15.0 +2024-09-19 10:14:49,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=644340.0, ans=0.2 +2024-09-19 10:14:51,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=644340.0, ans=0.1 +2024-09-19 10:14:55,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=644340.0, ans=0.125 +2024-09-19 10:14:57,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=644340.0, ans=0.125 +2024-09-19 10:15:05,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.56 vs. limit=22.5 +2024-09-19 10:15:06,278 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.558e+01 9.078e+01 9.683e+01 1.491e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 10:15:25,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.82 vs. limit=15.0 +2024-09-19 10:15:35,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=644460.0, ans=0.125 +2024-09-19 10:15:46,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=644460.0, ans=0.2 +2024-09-19 10:15:48,824 INFO [train.py:1198] (0/2) Epoch 36, batch 2750, loss[loss=0.2333, ctc_loss=0.1154, cr_loss=0.3674, attn_decoder_loss=0.2382, over 29515.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1154, cr_loss=0.3561, attn_decoder_loss=0.24, over 5796577.95 frames. ], batch size: 75, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:15:50,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=644500.0, ans=0.125 +2024-09-19 10:16:13,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=644540.0, ans=0.125 +2024-09-19 10:16:17,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=644580.0, ans=0.125 +2024-09-19 10:16:22,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=644580.0, ans=0.125 +2024-09-19 10:16:27,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.32 vs. limit=22.5 +2024-09-19 10:16:31,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=644580.0, ans=0.125 +2024-09-19 10:17:03,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=644660.0, ans=0.2 +2024-09-19 10:17:05,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=644700.0, ans=0.1 +2024-09-19 10:17:06,637 INFO [train.py:1198] (0/2) Epoch 36, batch 2800, loss[loss=0.2534, ctc_loss=0.1357, cr_loss=0.3788, attn_decoder_loss=0.2581, over 20883.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1162, cr_loss=0.3577, attn_decoder_loss=0.2404, over 5777967.23 frames. ], batch size: 209, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:17:43,405 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.665e+01 8.452e+01 9.019e+01 9.554e+01 2.850e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-19 10:17:46,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=644780.0, ans=22.5 +2024-09-19 10:18:01,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.43 vs. limit=22.5 +2024-09-19 10:18:24,433 INFO [train.py:1198] (0/2) Epoch 36, batch 2850, loss[loss=0.2232, ctc_loss=0.1113, cr_loss=0.3398, attn_decoder_loss=0.2281, over 29513.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1168, cr_loss=0.3583, attn_decoder_loss=0.2409, over 5764221.51 frames. ], batch size: 77, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:18:32,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=644900.0, ans=0.125 +2024-09-19 10:18:35,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten.whitening_limit, batch_count=644900.0, ans=22.5 +2024-09-19 10:18:44,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=644940.0, ans=0.125 +2024-09-19 10:18:53,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=644980.0, ans=0.0 +2024-09-19 10:19:10,655 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.31 vs. limit=15.0 +2024-09-19 10:19:13,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.08 vs. limit=15.0 +2024-09-19 10:19:19,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=645020.0, ans=0.125 +2024-09-19 10:19:37,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=645060.0, ans=0.125 +2024-09-19 10:19:37,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=645060.0, ans=0.125 +2024-09-19 10:19:39,848 INFO [train.py:1198] (0/2) Epoch 36, batch 2900, loss[loss=0.2397, ctc_loss=0.1209, cr_loss=0.3709, attn_decoder_loss=0.2446, over 29442.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1171, cr_loss=0.3596, attn_decoder_loss=0.2419, over 5789511.64 frames. ], batch size: 79, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:20:05,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=645140.0, ans=0.2 +2024-09-19 10:20:09,300 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:20:14,868 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.477e+01 8.534e+01 8.969e+01 9.435e+01 1.794e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 10:20:37,627 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.78 vs. limit=10.0 +2024-09-19 10:20:57,807 INFO [train.py:1198] (0/2) Epoch 36, batch 2950, loss[loss=0.2303, ctc_loss=0.1162, cr_loss=0.3642, attn_decoder_loss=0.2349, over 29523.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1159, cr_loss=0.3567, attn_decoder_loss=0.2407, over 5782911.30 frames. ], batch size: 75, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:21:16,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=645340.0, ans=0.1 +2024-09-19 10:21:16,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-19 10:21:25,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=645340.0, ans=0.0 +2024-09-19 10:21:58,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=8.75 vs. limit=12.0 +2024-09-19 10:21:58,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.56 vs. limit=15.0 +2024-09-19 10:22:09,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=645460.0, ans=0.05 +2024-09-19 10:22:09,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=645460.0, ans=0.2 +2024-09-19 10:22:15,301 INFO [train.py:1198] (0/2) Epoch 36, batch 3000, loss[loss=0.2382, ctc_loss=0.1192, cr_loss=0.363, attn_decoder_loss=0.2433, over 29737.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1156, cr_loss=0.3558, attn_decoder_loss=0.2404, over 5782851.33 frames. ], batch size: 81, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:22:15,302 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 10:22:33,842 INFO [train.py:1230] (0/2) Epoch 36, validation: loss=0.212, ctc_loss=0.03671, cr_loss=5.93e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 10:22:33,842 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 10:22:49,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=645540.0, ans=0.1 +2024-09-19 10:23:08,438 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.539e+01 8.660e+01 9.002e+01 9.609e+01 4.841e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 10:23:10,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=645580.0, ans=0.125 +2024-09-19 10:23:18,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=645620.0, ans=0.125 +2024-09-19 10:23:27,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=645620.0, ans=0.2 +2024-09-19 10:23:27,604 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:23:31,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=645620.0, ans=0.0 +2024-09-19 10:23:50,020 INFO [train.py:1198] (0/2) Epoch 36, batch 3050, loss[loss=0.234, ctc_loss=0.1189, cr_loss=0.3513, attn_decoder_loss=0.239, over 29546.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1167, cr_loss=0.3578, attn_decoder_loss=0.2416, over 5777628.60 frames. ], batch size: 76, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:23:56,873 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 10:24:06,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=645740.0, ans=0.125 +2024-09-19 10:24:12,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=645740.0, ans=0.125 +2024-09-19 10:24:13,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=645740.0, ans=0.125 +2024-09-19 10:24:31,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=645780.0, ans=0.025 +2024-09-19 10:24:39,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=645820.0, ans=0.0 +2024-09-19 10:24:42,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=645820.0, ans=0.125 +2024-09-19 10:24:56,802 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.72 vs. limit=15.0 +2024-09-19 10:25:03,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=645860.0, ans=0.0 +2024-09-19 10:25:06,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=645900.0, ans=0.2 +2024-09-19 10:25:07,759 INFO [train.py:1198] (0/2) Epoch 36, batch 3100, loss[loss=0.2439, ctc_loss=0.1174, cr_loss=0.3664, attn_decoder_loss=0.2498, over 29191.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1163, cr_loss=0.3573, attn_decoder_loss=0.2412, over 5776640.73 frames. ], batch size: 100, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:25:17,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=645900.0, ans=0.125 +2024-09-19 10:25:31,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=645940.0, ans=0.0 +2024-09-19 10:25:44,450 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.450e+01 9.039e+01 9.711e+01 1.761e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 10:25:44,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=645980.0, ans=0.125 +2024-09-19 10:25:55,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=646020.0, ans=0.125 +2024-09-19 10:26:25,354 INFO [train.py:1198] (0/2) Epoch 36, batch 3150, loss[loss=0.2434, ctc_loss=0.1186, cr_loss=0.3581, attn_decoder_loss=0.2493, over 28760.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1164, cr_loss=0.3578, attn_decoder_loss=0.2413, over 5782601.30 frames. ], batch size: 104, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:26:31,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=646100.0, ans=0.07 +2024-09-19 10:26:57,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=646180.0, ans=0.0 +2024-09-19 10:27:00,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=646180.0, ans=0.125 +2024-09-19 10:27:10,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=646220.0, ans=0.1 +2024-09-19 10:27:12,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=646220.0, ans=0.125 +2024-09-19 10:27:39,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=646300.0, ans=0.125 +2024-09-19 10:27:40,721 INFO [train.py:1198] (0/2) Epoch 36, batch 3200, loss[loss=0.2243, ctc_loss=0.09994, cr_loss=0.3082, attn_decoder_loss=0.2313, over 29429.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.116, cr_loss=0.357, attn_decoder_loss=0.2406, over 5792712.58 frames. ], batch size: 79, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:28:06,865 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.26 vs. limit=15.0 +2024-09-19 10:28:11,038 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.83 vs. limit=15.0 +2024-09-19 10:28:17,944 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.484e+01 8.478e+01 9.056e+01 9.805e+01 1.899e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 10:28:18,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=646380.0, ans=0.0 +2024-09-19 10:28:18,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=646380.0, ans=0.0 +2024-09-19 10:28:30,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=646420.0, ans=0.125 +2024-09-19 10:28:53,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.44 vs. limit=15.0 +2024-09-19 10:28:55,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.98 vs. limit=15.0 +2024-09-19 10:28:59,141 INFO [train.py:1198] (0/2) Epoch 36, batch 3250, loss[loss=0.242, ctc_loss=0.1239, cr_loss=0.3651, attn_decoder_loss=0.247, over 29707.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1158, cr_loss=0.3568, attn_decoder_loss=0.2408, over 5798751.78 frames. ], batch size: 84, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:28:59,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=646500.0, ans=0.125 +2024-09-19 10:29:00,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=646500.0, ans=0.125 +2024-09-19 10:29:16,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=646540.0, ans=0.125 +2024-09-19 10:29:27,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=646540.0, ans=0.2 +2024-09-19 10:29:27,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=646540.0, ans=0.125 +2024-09-19 10:29:46,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=646620.0, ans=0.125 +2024-09-19 10:30:03,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.97 vs. limit=15.0 +2024-09-19 10:30:07,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=646660.0, ans=0.0 +2024-09-19 10:30:15,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=646700.0, ans=0.125 +2024-09-19 10:30:16,478 INFO [train.py:1198] (0/2) Epoch 36, batch 3300, loss[loss=0.2448, ctc_loss=0.1203, cr_loss=0.3618, attn_decoder_loss=0.2506, over 28203.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.115, cr_loss=0.3551, attn_decoder_loss=0.2397, over 5795895.45 frames. ], batch size: 111, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:30:22,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=646700.0, ans=0.125 +2024-09-19 10:30:44,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=646740.0, ans=0.04949747468305833 +2024-09-19 10:30:46,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.28 vs. limit=15.0 +2024-09-19 10:30:47,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=646780.0, ans=0.125 +2024-09-19 10:30:51,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.60 vs. limit=12.0 +2024-09-19 10:30:52,749 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.457e+01 8.565e+01 9.043e+01 9.746e+01 1.474e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 10:30:56,682 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.54 vs. limit=15.0 +2024-09-19 10:31:17,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=646860.0, ans=0.0 +2024-09-19 10:31:23,233 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:31:31,688 INFO [train.py:1198] (0/2) Epoch 36, batch 3350, loss[loss=0.2476, ctc_loss=0.1235, cr_loss=0.3808, attn_decoder_loss=0.253, over 28899.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.116, cr_loss=0.3577, attn_decoder_loss=0.2408, over 5772592.36 frames. ], batch size: 104, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:31:35,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=646900.0, ans=0.0 +2024-09-19 10:31:49,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=646940.0, ans=0.0 +2024-09-19 10:31:50,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=646940.0, ans=0.125 +2024-09-19 10:32:49,154 INFO [train.py:1198] (0/2) Epoch 36, batch 3400, loss[loss=0.2066, ctc_loss=0.09039, cr_loss=0.3004, attn_decoder_loss=0.2129, over 29359.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1164, cr_loss=0.3587, attn_decoder_loss=0.2409, over 5764852.46 frames. ], batch size: 67, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:33:10,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=647140.0, ans=0.125 +2024-09-19 10:33:18,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=647140.0, ans=0.0 +2024-09-19 10:33:20,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=647180.0, ans=0.2 +2024-09-19 10:33:27,778 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.490e+01 8.557e+01 9.096e+01 9.972e+01 2.860e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 10:33:37,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=647220.0, ans=0.0 +2024-09-19 10:33:37,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=647220.0, ans=0.2 +2024-09-19 10:33:40,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=647220.0, ans=0.1 +2024-09-19 10:33:55,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=647260.0, ans=0.0 +2024-09-19 10:33:58,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=647260.0, ans=0.07 +2024-09-19 10:34:01,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=647260.0, ans=0.2 +2024-09-19 10:34:03,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=647260.0, ans=0.125 +2024-09-19 10:34:07,344 INFO [train.py:1198] (0/2) Epoch 36, batch 3450, loss[loss=0.2313, ctc_loss=0.1097, cr_loss=0.3441, attn_decoder_loss=0.2372, over 28160.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1164, cr_loss=0.3587, attn_decoder_loss=0.2411, over 5773250.50 frames. ], batch size: 111, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:34:10,562 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:34:37,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=647380.0, ans=0.125 +2024-09-19 10:35:09,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=647460.0, ans=0.125 +2024-09-19 10:35:11,070 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:35:12,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=647460.0, ans=0.125 +2024-09-19 10:35:17,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=647460.0, ans=0.1 +2024-09-19 10:35:21,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=647500.0, ans=0.125 +2024-09-19 10:35:22,928 INFO [train.py:1198] (0/2) Epoch 36, batch 3500, loss[loss=0.2214, ctc_loss=0.103, cr_loss=0.3246, attn_decoder_loss=0.2273, over 29362.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1164, cr_loss=0.3585, attn_decoder_loss=0.2409, over 5775159.53 frames. ], batch size: 71, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:35:37,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=647500.0, ans=0.125 +2024-09-19 10:35:39,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=647540.0, ans=0.2 +2024-09-19 10:35:40,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=647540.0, ans=0.2 +2024-09-19 10:35:44,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=647540.0, ans=0.125 +2024-09-19 10:35:49,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=647540.0, ans=0.125 +2024-09-19 10:35:52,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=647540.0, ans=0.5 +2024-09-19 10:36:00,839 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.443e+01 8.960e+01 9.445e+01 1.390e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 10:36:39,521 INFO [train.py:1198] (0/2) Epoch 36, batch 3550, loss[loss=0.2509, ctc_loss=0.1172, cr_loss=0.3581, attn_decoder_loss=0.2578, over 29720.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1166, cr_loss=0.3591, attn_decoder_loss=0.241, over 5782224.27 frames. ], batch size: 89, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:36:50,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.35 vs. limit=12.0 +2024-09-19 10:37:00,325 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:37:09,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=647780.0, ans=0.125 +2024-09-19 10:37:13,787 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:37:26,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=647820.0, ans=0.125 +2024-09-19 10:37:43,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=647860.0, ans=0.125 +2024-09-19 10:37:53,734 INFO [train.py:1198] (0/2) Epoch 36, batch 3600, loss[loss=0.2233, ctc_loss=0.1048, cr_loss=0.3486, attn_decoder_loss=0.2287, over 29507.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1159, cr_loss=0.3581, attn_decoder_loss=0.2406, over 5791734.20 frames. ], batch size: 77, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:38:06,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=647900.0, ans=0.2 +2024-09-19 10:38:12,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=647940.0, ans=0.1 +2024-09-19 10:38:26,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.91 vs. limit=12.0 +2024-09-19 10:38:30,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=647980.0, ans=0.1 +2024-09-19 10:38:31,850 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.382e+01 8.950e+01 9.458e+01 2.043e+02, threshold=1.790e+02, percent-clipped=2.0 +2024-09-19 10:38:47,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=648020.0, ans=0.5 +2024-09-19 10:38:54,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=648060.0, ans=0.0 +2024-09-19 10:38:54,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=648060.0, ans=0.0 +2024-09-19 10:39:10,853 INFO [train.py:1198] (0/2) Epoch 36, batch 3650, loss[loss=0.2404, ctc_loss=0.1101, cr_loss=0.334, attn_decoder_loss=0.2475, over 29484.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1155, cr_loss=0.3574, attn_decoder_loss=0.2399, over 5794354.37 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:39:42,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=648180.0, ans=0.2 +2024-09-19 10:40:14,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.08 vs. limit=15.0 +2024-09-19 10:40:18,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=648260.0, ans=0.125 +2024-09-19 10:40:22,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=648260.0, ans=0.0 +2024-09-19 10:40:25,591 INFO [train.py:1198] (0/2) Epoch 36, batch 3700, loss[loss=0.2514, ctc_loss=0.1292, cr_loss=0.3906, attn_decoder_loss=0.2563, over 29708.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1154, cr_loss=0.3573, attn_decoder_loss=0.2402, over 5804680.62 frames. ], batch size: 84, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:41:01,475 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.595e+01 9.070e+01 9.562e+01 1.267e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 10:41:12,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=648420.0, ans=0.125 +2024-09-19 10:41:15,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=648420.0, ans=0.2 +2024-09-19 10:41:34,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=648460.0, ans=0.125 +2024-09-19 10:41:40,472 INFO [train.py:1198] (0/2) Epoch 36, batch 3750, loss[loss=0.2111, ctc_loss=0.1037, cr_loss=0.3384, attn_decoder_loss=0.2155, over 29330.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1152, cr_loss=0.3563, attn_decoder_loss=0.24, over 5807277.98 frames. ], batch size: 67, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:41:54,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=648500.0, ans=0.125 +2024-09-19 10:42:10,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=648580.0, ans=0.07 +2024-09-19 10:42:31,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=648620.0, ans=0.125 +2024-09-19 10:42:52,330 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:42:53,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=648660.0, ans=0.1 +2024-09-19 10:42:56,339 INFO [train.py:1198] (0/2) Epoch 36, batch 3800, loss[loss=0.2462, ctc_loss=0.1191, cr_loss=0.3666, attn_decoder_loss=0.2521, over 29615.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1155, cr_loss=0.3566, attn_decoder_loss=0.2399, over 5798033.46 frames. ], batch size: 86, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:42:58,587 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.71 vs. limit=22.5 +2024-09-19 10:43:18,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.52 vs. limit=15.0 +2024-09-19 10:43:34,112 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.360e+01 8.859e+01 9.442e+01 1.706e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 10:43:43,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=648820.0, ans=0.125 +2024-09-19 10:43:52,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.24 vs. limit=12.0 +2024-09-19 10:43:53,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=648820.0, ans=0.125 +2024-09-19 10:44:04,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=648860.0, ans=0.2 +2024-09-19 10:44:04,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=648860.0, ans=0.95 +2024-09-19 10:44:04,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=648860.0, ans=0.0 +2024-09-19 10:44:11,224 INFO [train.py:1198] (0/2) Epoch 36, batch 3850, loss[loss=0.2546, ctc_loss=0.1229, cr_loss=0.3829, attn_decoder_loss=0.2607, over 29250.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1153, cr_loss=0.3557, attn_decoder_loss=0.2398, over 5810835.03 frames. ], batch size: 100, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:44:46,042 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.55 vs. limit=12.0 +2024-09-19 10:44:47,515 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.98 vs. limit=6.0 +2024-09-19 10:44:48,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=648980.0, ans=0.5 +2024-09-19 10:45:02,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.40 vs. limit=15.0 +2024-09-19 10:45:18,643 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.00 vs. limit=15.0 +2024-09-19 10:45:24,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.03 vs. limit=15.0 +2024-09-19 10:45:26,936 INFO [train.py:1198] (0/2) Epoch 36, batch 3900, loss[loss=0.2491, ctc_loss=0.1243, cr_loss=0.378, attn_decoder_loss=0.2545, over 29655.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1156, cr_loss=0.356, attn_decoder_loss=0.2402, over 5814835.20 frames. ], batch size: 86, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:45:31,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=649100.0, ans=0.2 +2024-09-19 10:45:37,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=649100.0, ans=0.1 +2024-09-19 10:45:45,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.42 vs. limit=15.0 +2024-09-19 10:45:52,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=649140.0, ans=0.0 +2024-09-19 10:45:58,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=649180.0, ans=0.1 +2024-09-19 10:46:03,830 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.759e+01 8.587e+01 8.995e+01 9.649e+01 1.195e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 10:46:04,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=649180.0, ans=0.025 +2024-09-19 10:46:05,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=649180.0, ans=0.0 +2024-09-19 10:46:16,550 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.50 vs. limit=15.0 +2024-09-19 10:46:26,344 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:46:40,852 INFO [train.py:1198] (0/2) Epoch 36, batch 3950, loss[loss=0.2427, ctc_loss=0.1242, cr_loss=0.3876, attn_decoder_loss=0.2472, over 29508.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1158, cr_loss=0.357, attn_decoder_loss=0.2405, over 5834476.75 frames. ], batch size: 97, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:46:41,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=649300.0, ans=0.125 +2024-09-19 10:46:47,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=649300.0, ans=0.0 +2024-09-19 10:46:57,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=649340.0, ans=0.125 +2024-09-19 10:47:04,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=649340.0, ans=0.125 +2024-09-19 10:47:05,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.66 vs. limit=22.5 +2024-09-19 10:47:12,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=649380.0, ans=0.2 +2024-09-19 10:47:21,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=649380.0, ans=0.5 +2024-09-19 10:47:24,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.08 vs. limit=15.0 +2024-09-19 10:47:41,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=649460.0, ans=0.2 +2024-09-19 10:47:44,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=649460.0, ans=0.1 +2024-09-19 10:47:52,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=649460.0, ans=0.125 +2024-09-19 10:47:56,080 INFO [train.py:1198] (0/2) Epoch 36, batch 4000, loss[loss=0.23, ctc_loss=0.1139, cr_loss=0.3558, attn_decoder_loss=0.235, over 29507.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3569, attn_decoder_loss=0.2404, over 5811264.36 frames. ], batch size: 74, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:48:00,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=649500.0, ans=0.125 +2024-09-19 10:48:05,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.91 vs. limit=15.0 +2024-09-19 10:48:18,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.52 vs. limit=15.0 +2024-09-19 10:48:33,457 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.599e+01 9.136e+01 9.707e+01 2.354e+02, threshold=1.827e+02, percent-clipped=2.0 +2024-09-19 10:48:51,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=649620.0, ans=0.0 +2024-09-19 10:49:02,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=649660.0, ans=0.0 +2024-09-19 10:49:02,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=649660.0, ans=0.0 +2024-09-19 10:49:10,626 INFO [train.py:1198] (0/2) Epoch 36, batch 4050, loss[loss=0.2526, ctc_loss=0.1412, cr_loss=0.3906, attn_decoder_loss=0.2563, over 20869.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1158, cr_loss=0.3565, attn_decoder_loss=0.2401, over 5795944.45 frames. ], batch size: 210, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:49:41,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=649780.0, ans=0.0 +2024-09-19 10:50:04,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=649820.0, ans=0.1 +2024-09-19 10:50:13,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=649860.0, ans=0.125 +2024-09-19 10:50:15,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=649860.0, ans=0.125 +2024-09-19 10:50:23,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.48 vs. limit=15.0 +2024-09-19 10:50:25,522 INFO [train.py:1198] (0/2) Epoch 36, batch 4100, loss[loss=0.2477, ctc_loss=0.1324, cr_loss=0.4053, attn_decoder_loss=0.2515, over 29492.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1163, cr_loss=0.3569, attn_decoder_loss=0.2405, over 5791150.08 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:50:36,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=649900.0, ans=0.04949747468305833 +2024-09-19 10:50:53,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=649980.0, ans=0.0 +2024-09-19 10:50:54,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=649980.0, ans=0.0 +2024-09-19 10:51:01,956 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.541e+01 9.319e+01 9.811e+01 6.662e+02, threshold=1.864e+02, percent-clipped=1.0 +2024-09-19 10:51:06,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=649980.0, ans=0.09899494936611666 +2024-09-19 10:51:12,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=650020.0, ans=0.2 +2024-09-19 10:51:39,875 INFO [train.py:1198] (0/2) Epoch 36, batch 4150, loss[loss=0.2251, ctc_loss=0.1157, cr_loss=0.3529, attn_decoder_loss=0.2294, over 29478.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1163, cr_loss=0.3572, attn_decoder_loss=0.2403, over 5796294.09 frames. ], batch size: 77, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:51:44,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=650100.0, ans=0.0 +2024-09-19 10:51:58,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.45 vs. limit=6.0 +2024-09-19 10:52:06,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=650140.0, ans=0.05 +2024-09-19 10:52:08,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=650180.0, ans=0.0 +2024-09-19 10:52:21,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=650180.0, ans=0.0 +2024-09-19 10:52:23,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.31 vs. limit=15.0 +2024-09-19 10:52:27,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=650220.0, ans=0.1 +2024-09-19 10:52:50,215 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.97 vs. limit=12.0 +2024-09-19 10:52:53,752 INFO [train.py:1198] (0/2) Epoch 36, batch 4200, loss[loss=0.25, ctc_loss=0.1279, cr_loss=0.3804, attn_decoder_loss=0.2551, over 29498.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1162, cr_loss=0.357, attn_decoder_loss=0.2405, over 5797725.52 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:53:06,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=650300.0, ans=0.1 +2024-09-19 10:53:16,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=650340.0, ans=0.125 +2024-09-19 10:53:31,779 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.662e+01 9.257e+01 9.687e+01 2.927e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-19 10:53:47,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.56 vs. limit=15.0 +2024-09-19 10:53:58,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=650460.0, ans=0.0 +2024-09-19 10:53:59,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.30 vs. limit=22.5 +2024-09-19 10:54:01,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=650460.0, ans=0.125 +2024-09-19 10:54:08,378 INFO [train.py:1198] (0/2) Epoch 36, batch 4250, loss[loss=0.2205, ctc_loss=0.1073, cr_loss=0.3293, attn_decoder_loss=0.2258, over 29536.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1153, cr_loss=0.3554, attn_decoder_loss=0.2402, over 5804306.45 frames. ], batch size: 74, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:54:12,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=650500.0, ans=0.2 +2024-09-19 10:54:21,814 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:54:30,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=650540.0, ans=0.125 +2024-09-19 10:55:00,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=650620.0, ans=0.125 +2024-09-19 10:55:02,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=650620.0, ans=0.125 +2024-09-19 10:55:02,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=650620.0, ans=0.0 +2024-09-19 10:55:19,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=650660.0, ans=0.125 +2024-09-19 10:55:22,697 INFO [train.py:1198] (0/2) Epoch 36, batch 4300, loss[loss=0.2343, ctc_loss=0.1137, cr_loss=0.3545, attn_decoder_loss=0.2398, over 29567.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1151, cr_loss=0.3549, attn_decoder_loss=0.2404, over 5794053.28 frames. ], batch size: 87, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:55:52,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.70 vs. limit=15.0 +2024-09-19 10:55:58,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=650780.0, ans=0.125 +2024-09-19 10:56:01,045 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.651e+01 9.063e+01 9.682e+01 5.777e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 10:56:29,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=650860.0, ans=0.2 +2024-09-19 10:56:36,544 INFO [train.py:1198] (0/2) Epoch 36, batch 4350, loss[loss=0.242, ctc_loss=0.122, cr_loss=0.3725, attn_decoder_loss=0.2471, over 29516.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1176, cr_loss=0.361, attn_decoder_loss=0.2434, over 5797119.86 frames. ], batch size: 97, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:56:39,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=650900.0, ans=0.125 +2024-09-19 10:56:44,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=650900.0, ans=0.1 +2024-09-19 10:56:56,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=650940.0, ans=0.125 +2024-09-19 10:57:33,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=651020.0, ans=0.125 +2024-09-19 10:57:51,173 INFO [train.py:1198] (0/2) Epoch 36, batch 4400, loss[loss=0.2419, ctc_loss=0.1234, cr_loss=0.3699, attn_decoder_loss=0.2469, over 27603.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.119, cr_loss=0.3638, attn_decoder_loss=0.2455, over 5769485.13 frames. ], batch size: 125, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:57:56,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=651100.0, ans=0.1 +2024-09-19 10:58:09,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=651140.0, ans=15.0 +2024-09-19 10:58:22,961 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.13 vs. limit=6.0 +2024-09-19 10:58:23,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=651180.0, ans=0.2 +2024-09-19 10:58:29,468 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.803e+01 8.945e+01 9.277e+01 9.704e+01 3.205e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 10:58:33,450 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.26 vs. limit=15.0 +2024-09-19 10:58:34,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=651220.0, ans=0.1 +2024-09-19 10:58:50,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=651260.0, ans=0.035 +2024-09-19 10:59:05,956 INFO [train.py:1198] (0/2) Epoch 36, batch 4450, loss[loss=0.2595, ctc_loss=0.1477, cr_loss=0.3898, attn_decoder_loss=0.2632, over 20126.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1224, cr_loss=0.3687, attn_decoder_loss=0.2476, over 5583111.17 frames. ], batch size: 209, lr: 3.04e-03, grad_scale: 8.0 +2024-09-19 10:59:10,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=651300.0, ans=0.2 +2024-09-19 10:59:13,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=651300.0, ans=0.125 +2024-09-19 10:59:19,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=651340.0, ans=0.07 +2024-09-19 10:59:40,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=651380.0, ans=0.0 +2024-09-19 10:59:53,840 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.68 vs. limit=15.0 +2024-09-19 11:00:18,826 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:00:21,397 INFO [train.py:1198] (0/2) Epoch 36, batch 4500, loss[loss=0.2485, ctc_loss=0.1334, cr_loss=0.3379, attn_decoder_loss=0.2538, over 20068.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1258, cr_loss=0.3714, attn_decoder_loss=0.2495, over 5239238.40 frames. ], batch size: 209, lr: 3.04e-03, grad_scale: 8.0 +2024-09-19 11:00:23,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=651500.0, ans=0.125 +2024-09-19 11:00:59,344 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-36.pt +2024-09-19 11:01:50,756 INFO [train.py:1198] (0/2) Epoch 37, batch 0, loss[loss=0.2221, ctc_loss=0.1068, cr_loss=0.3496, attn_decoder_loss=0.2272, over 29610.00 frames. ], tot_loss[loss=0.2221, ctc_loss=0.1068, cr_loss=0.3496, attn_decoder_loss=0.2272, over 29610.00 frames. ], batch size: 73, lr: 3.00e-03, grad_scale: 16.0 +2024-09-19 11:01:50,756 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 11:02:09,657 INFO [train.py:1230] (0/2) Epoch 37, validation: loss=0.2132, ctc_loss=0.03619, cr_loss=6.181e-15, attn_decoder_loss=0.2329, over 944034.00 frames. +2024-09-19 11:02:09,657 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 11:02:12,628 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.774e+01 1.049e+02 1.138e+02 1.230e+02 2.136e+02, threshold=2.276e+02, percent-clipped=1.0 +2024-09-19 11:02:12,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=651600.0, ans=0.0 +2024-09-19 11:02:21,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=651600.0, ans=0.125 +2024-09-19 11:02:22,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=651600.0, ans=0.0 +2024-09-19 11:02:51,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=651680.0, ans=0.125 +2024-09-19 11:02:53,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=651680.0, ans=0.0 +2024-09-19 11:02:54,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=651720.0, ans=0.2 +2024-09-19 11:02:59,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-19 11:03:07,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.67 vs. limit=12.0 +2024-09-19 11:03:08,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=651720.0, ans=0.125 +2024-09-19 11:03:08,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=651720.0, ans=0.0 +2024-09-19 11:03:15,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=651760.0, ans=0.5 +2024-09-19 11:03:25,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.27 vs. limit=15.0 +2024-09-19 11:03:26,220 INFO [train.py:1198] (0/2) Epoch 37, batch 50, loss[loss=0.2151, ctc_loss=0.103, cr_loss=0.3234, attn_decoder_loss=0.2204, over 29465.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1176, cr_loss=0.3617, attn_decoder_loss=0.2416, over 1267905.63 frames. ], batch size: 70, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:03:29,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=651800.0, ans=0.0 +2024-09-19 11:03:39,149 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-19 11:03:44,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=651840.0, ans=0.125 +2024-09-19 11:03:48,743 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.58 vs. limit=8.0 +2024-09-19 11:04:04,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=651880.0, ans=0.125 +2024-09-19 11:04:19,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.22 vs. limit=6.0 +2024-09-19 11:04:42,598 INFO [train.py:1198] (0/2) Epoch 37, batch 100, loss[loss=0.2241, ctc_loss=0.1088, cr_loss=0.3453, attn_decoder_loss=0.2292, over 29519.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1186, cr_loss=0.3633, attn_decoder_loss=0.2437, over 2251832.60 frames. ], batch size: 76, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:04:46,987 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 8.722e+01 9.272e+01 9.995e+01 2.422e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-19 11:04:54,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=652000.0, ans=0.09899494936611666 +2024-09-19 11:05:02,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=652040.0, ans=0.125 +2024-09-19 11:05:10,621 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.03 vs. limit=15.0 +2024-09-19 11:05:11,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=652080.0, ans=0.2 +2024-09-19 11:05:26,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=652120.0, ans=0.125 +2024-09-19 11:05:29,641 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.24 vs. limit=15.0 +2024-09-19 11:05:57,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=652200.0, ans=0.0 +2024-09-19 11:05:59,137 INFO [train.py:1198] (0/2) Epoch 37, batch 150, loss[loss=0.2018, ctc_loss=0.09433, cr_loss=0.3092, attn_decoder_loss=0.2069, over 29460.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1169, cr_loss=0.3598, attn_decoder_loss=0.2415, over 3046907.45 frames. ], batch size: 70, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:06:29,292 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.34 vs. limit=22.5 +2024-09-19 11:06:48,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=652320.0, ans=0.125 +2024-09-19 11:07:08,358 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.11 vs. limit=10.0 +2024-09-19 11:07:16,349 INFO [train.py:1198] (0/2) Epoch 37, batch 200, loss[loss=0.2504, ctc_loss=0.1329, cr_loss=0.3902, attn_decoder_loss=0.2547, over 27305.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1159, cr_loss=0.3581, attn_decoder_loss=0.2403, over 3659653.17 frames. ], batch size: 124, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:07:20,812 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.578e+01 8.412e+01 8.881e+01 9.450e+01 8.334e+02, threshold=1.776e+02, percent-clipped=1.0 +2024-09-19 11:07:25,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=652400.0, ans=0.0 +2024-09-19 11:07:25,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=652400.0, ans=0.2 +2024-09-19 11:07:33,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=652440.0, ans=0.125 +2024-09-19 11:07:34,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=652440.0, ans=0.0 +2024-09-19 11:07:49,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=652480.0, ans=0.025 +2024-09-19 11:07:54,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=652480.0, ans=0.125 +2024-09-19 11:08:04,801 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:08:12,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=652520.0, ans=0.0 +2024-09-19 11:08:31,987 INFO [train.py:1198] (0/2) Epoch 37, batch 250, loss[loss=0.2477, ctc_loss=0.1242, cr_loss=0.3734, attn_decoder_loss=0.2531, over 29235.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1152, cr_loss=0.3568, attn_decoder_loss=0.24, over 4141908.43 frames. ], batch size: 100, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:08:35,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=652600.0, ans=0.125 +2024-09-19 11:08:49,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=652640.0, ans=0.125 +2024-09-19 11:08:53,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=652640.0, ans=0.1 +2024-09-19 11:09:01,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=652680.0, ans=0.0 +2024-09-19 11:09:13,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=652680.0, ans=0.2 +2024-09-19 11:09:16,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=652720.0, ans=0.2 +2024-09-19 11:09:39,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=652760.0, ans=0.2 +2024-09-19 11:09:50,042 INFO [train.py:1198] (0/2) Epoch 37, batch 300, loss[loss=0.2621, ctc_loss=0.1394, cr_loss=0.4197, attn_decoder_loss=0.2664, over 29575.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1152, cr_loss=0.3567, attn_decoder_loss=0.2396, over 4510415.68 frames. ], batch size: 92, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:09:54,608 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.309e+01 8.922e+01 9.556e+01 2.479e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 11:09:59,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=652800.0, ans=0.125 +2024-09-19 11:10:07,042 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.61 vs. limit=6.0 +2024-09-19 11:10:13,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=652840.0, ans=0.0 +2024-09-19 11:10:27,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=652880.0, ans=0.0 +2024-09-19 11:10:29,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.08 vs. limit=15.0 +2024-09-19 11:10:47,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=652920.0, ans=0.2 +2024-09-19 11:10:47,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=652920.0, ans=0.0 +2024-09-19 11:11:00,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=652960.0, ans=0.125 +2024-09-19 11:11:03,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=652960.0, ans=0.125 +2024-09-19 11:11:08,035 INFO [train.py:1198] (0/2) Epoch 37, batch 350, loss[loss=0.2117, ctc_loss=0.1017, cr_loss=0.3278, attn_decoder_loss=0.2166, over 29333.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1157, cr_loss=0.3577, attn_decoder_loss=0.2401, over 4795216.70 frames. ], batch size: 71, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:11:34,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=653040.0, ans=0.1 +2024-09-19 11:11:34,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=653040.0, ans=0.125 +2024-09-19 11:11:42,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=653080.0, ans=0.0 +2024-09-19 11:11:52,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=653120.0, ans=0.0 +2024-09-19 11:12:07,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=653160.0, ans=0.04949747468305833 +2024-09-19 11:12:14,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=653160.0, ans=0.1 +2024-09-19 11:12:23,525 INFO [train.py:1198] (0/2) Epoch 37, batch 400, loss[loss=0.24, ctc_loss=0.1179, cr_loss=0.385, attn_decoder_loss=0.245, over 29683.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1162, cr_loss=0.3582, attn_decoder_loss=0.2404, over 5025642.26 frames. ], batch size: 82, lr: 3.00e-03, grad_scale: 16.0 +2024-09-19 11:12:28,152 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.730e+01 8.454e+01 8.886e+01 9.286e+01 1.359e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-19 11:12:42,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=653240.0, ans=0.125 +2024-09-19 11:12:50,122 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.31 vs. limit=22.5 +2024-09-19 11:12:54,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=653280.0, ans=0.125 +2024-09-19 11:13:00,572 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:13:07,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=653320.0, ans=0.07 +2024-09-19 11:13:08,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.69 vs. limit=22.5 +2024-09-19 11:13:13,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=653320.0, ans=0.125 +2024-09-19 11:13:41,659 INFO [train.py:1198] (0/2) Epoch 37, batch 450, loss[loss=0.2445, ctc_loss=0.1217, cr_loss=0.3708, attn_decoder_loss=0.2499, over 29716.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1163, cr_loss=0.3587, attn_decoder_loss=0.2408, over 5186674.24 frames. ], batch size: 83, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:14:00,232 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.28 vs. limit=22.5 +2024-09-19 11:14:06,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=653440.0, ans=0.0 +2024-09-19 11:14:08,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=653440.0, ans=0.0 +2024-09-19 11:14:09,851 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:14:15,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=653480.0, ans=0.035 +2024-09-19 11:14:17,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.66 vs. limit=15.0 +2024-09-19 11:14:27,073 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.82 vs. limit=12.0 +2024-09-19 11:14:35,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=653520.0, ans=0.2 +2024-09-19 11:14:35,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=653520.0, ans=0.125 +2024-09-19 11:15:00,247 INFO [train.py:1198] (0/2) Epoch 37, batch 500, loss[loss=0.2557, ctc_loss=0.1317, cr_loss=0.3914, attn_decoder_loss=0.2607, over 29487.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1158, cr_loss=0.3577, attn_decoder_loss=0.2402, over 5328965.95 frames. ], batch size: 94, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:15:00,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=653600.0, ans=0.0 +2024-09-19 11:15:06,232 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.426e+01 9.049e+01 9.525e+01 1.733e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 11:15:07,003 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.39 vs. limit=15.0 +2024-09-19 11:15:18,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=653640.0, ans=0.0 +2024-09-19 11:15:23,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=653640.0, ans=0.125 +2024-09-19 11:15:27,865 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-19 11:16:02,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=653760.0, ans=0.125 +2024-09-19 11:16:07,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=653760.0, ans=0.125 +2024-09-19 11:16:10,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=653760.0, ans=0.1 +2024-09-19 11:16:14,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=653800.0, ans=0.95 +2024-09-19 11:16:15,821 INFO [train.py:1198] (0/2) Epoch 37, batch 550, loss[loss=0.2437, ctc_loss=0.1281, cr_loss=0.382, attn_decoder_loss=0.2481, over 28806.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.116, cr_loss=0.358, attn_decoder_loss=0.2403, over 5423112.18 frames. ], batch size: 104, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:16:28,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=653800.0, ans=0.1 +2024-09-19 11:16:29,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=653840.0, ans=0.0 +2024-09-19 11:16:34,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=653840.0, ans=0.2 +2024-09-19 11:16:48,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.25 vs. limit=22.5 +2024-09-19 11:16:48,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.23 vs. limit=22.5 +2024-09-19 11:16:50,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=653880.0, ans=0.0 +2024-09-19 11:17:04,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=653920.0, ans=0.1 +2024-09-19 11:17:06,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=653920.0, ans=0.0 +2024-09-19 11:17:18,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=653960.0, ans=0.0 +2024-09-19 11:17:25,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.27 vs. limit=15.0 +2024-09-19 11:17:27,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=653960.0, ans=0.5 +2024-09-19 11:17:29,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=653960.0, ans=0.125 +2024-09-19 11:17:31,926 INFO [train.py:1198] (0/2) Epoch 37, batch 600, loss[loss=0.2515, ctc_loss=0.1266, cr_loss=0.365, attn_decoder_loss=0.2573, over 29291.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3577, attn_decoder_loss=0.2404, over 5510549.40 frames. ], batch size: 100, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:17:32,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=654000.0, ans=0.125 +2024-09-19 11:17:38,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=654000.0, ans=0.125 +2024-09-19 11:17:40,201 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.494e+01 8.998e+01 9.681e+01 2.744e+02, threshold=1.800e+02, percent-clipped=3.0 +2024-09-19 11:17:54,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=654040.0, ans=0.1 +2024-09-19 11:17:59,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=654040.0, ans=0.125 +2024-09-19 11:18:02,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=654040.0, ans=0.125 +2024-09-19 11:18:12,115 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.24 vs. limit=15.0 +2024-09-19 11:18:17,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=654080.0, ans=10.0 +2024-09-19 11:18:20,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=654120.0, ans=0.0 +2024-09-19 11:18:51,781 INFO [train.py:1198] (0/2) Epoch 37, batch 650, loss[loss=0.2281, ctc_loss=0.1059, cr_loss=0.3265, attn_decoder_loss=0.2344, over 29758.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1149, cr_loss=0.3554, attn_decoder_loss=0.2397, over 5587828.02 frames. ], batch size: 81, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:19:21,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=654280.0, ans=0.125 +2024-09-19 11:19:49,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=654320.0, ans=0.125 +2024-09-19 11:19:58,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=654360.0, ans=0.1 +2024-09-19 11:20:07,721 INFO [train.py:1198] (0/2) Epoch 37, batch 700, loss[loss=0.2347, ctc_loss=0.1182, cr_loss=0.3573, attn_decoder_loss=0.2397, over 29548.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3561, attn_decoder_loss=0.2404, over 5639990.92 frames. ], batch size: 76, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:20:11,365 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.44 vs. limit=6.0 +2024-09-19 11:20:13,643 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.549e+01 8.958e+01 9.415e+01 1.725e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 11:20:54,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=654520.0, ans=0.0 +2024-09-19 11:20:59,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.36 vs. limit=15.0 +2024-09-19 11:21:00,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=654520.0, ans=0.2 +2024-09-19 11:21:22,120 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:21:23,289 INFO [train.py:1198] (0/2) Epoch 37, batch 750, loss[loss=0.2403, ctc_loss=0.1113, cr_loss=0.3465, attn_decoder_loss=0.247, over 29710.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1149, cr_loss=0.3555, attn_decoder_loss=0.2401, over 5676686.06 frames. ], batch size: 82, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:21:23,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=654600.0, ans=0.0 +2024-09-19 11:22:27,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=654760.0, ans=0.0 +2024-09-19 11:22:39,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=654760.0, ans=0.07 +2024-09-19 11:22:40,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=654760.0, ans=0.125 +2024-09-19 11:22:43,727 INFO [train.py:1198] (0/2) Epoch 37, batch 800, loss[loss=0.2199, ctc_loss=0.09739, cr_loss=0.3098, attn_decoder_loss=0.2266, over 29621.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1149, cr_loss=0.3553, attn_decoder_loss=0.2401, over 5708114.80 frames. ], batch size: 73, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:22:47,071 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:22:49,768 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.191e+01 8.523e+01 9.017e+01 9.581e+01 2.303e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 11:22:56,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.93 vs. limit=15.0 +2024-09-19 11:23:26,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.57 vs. limit=15.0 +2024-09-19 11:23:58,746 INFO [train.py:1198] (0/2) Epoch 37, batch 850, loss[loss=0.2389, ctc_loss=0.1103, cr_loss=0.3579, attn_decoder_loss=0.2452, over 29733.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1141, cr_loss=0.3538, attn_decoder_loss=0.2394, over 5737190.37 frames. ], batch size: 89, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:24:45,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=655120.0, ans=0.1 +2024-09-19 11:24:47,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=655120.0, ans=0.0 +2024-09-19 11:25:15,283 INFO [train.py:1198] (0/2) Epoch 37, batch 900, loss[loss=0.2099, ctc_loss=0.09156, cr_loss=0.2977, attn_decoder_loss=0.2164, over 29630.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3542, attn_decoder_loss=0.2395, over 5741406.26 frames. ], batch size: 73, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:25:16,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=14.45 vs. limit=22.5 +2024-09-19 11:25:20,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=655200.0, ans=0.125 +2024-09-19 11:25:22,644 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.623e+01 9.305e+01 9.762e+01 2.031e+02, threshold=1.861e+02, percent-clipped=1.0 +2024-09-19 11:25:29,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=655240.0, ans=0.125 +2024-09-19 11:25:33,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=655240.0, ans=0.0 +2024-09-19 11:25:49,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=655280.0, ans=0.0 +2024-09-19 11:26:08,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.50 vs. limit=15.0 +2024-09-19 11:26:11,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=655320.0, ans=0.2 +2024-09-19 11:26:17,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=655320.0, ans=0.02 +2024-09-19 11:26:20,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=655360.0, ans=0.1 +2024-09-19 11:26:29,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=655360.0, ans=0.05 +2024-09-19 11:26:34,858 INFO [train.py:1198] (0/2) Epoch 37, batch 950, loss[loss=0.2209, ctc_loss=0.1026, cr_loss=0.3269, attn_decoder_loss=0.2267, over 29505.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1149, cr_loss=0.355, attn_decoder_loss=0.2397, over 5742394.43 frames. ], batch size: 74, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:27:22,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=655520.0, ans=0.1 +2024-09-19 11:27:22,254 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:27:29,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=655520.0, ans=0.0 +2024-09-19 11:27:32,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=655520.0, ans=0.0 +2024-09-19 11:27:50,303 INFO [train.py:1198] (0/2) Epoch 37, batch 1000, loss[loss=0.2257, ctc_loss=0.1073, cr_loss=0.3537, attn_decoder_loss=0.231, over 29512.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1155, cr_loss=0.3566, attn_decoder_loss=0.2402, over 5736013.86 frames. ], batch size: 77, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:27:51,427 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.29 vs. limit=10.0 +2024-09-19 11:27:52,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=655600.0, ans=0.0 +2024-09-19 11:27:53,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=655600.0, ans=0.2 +2024-09-19 11:27:57,723 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.598e+01 8.810e+01 9.265e+01 9.999e+01 4.241e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-19 11:28:05,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=655640.0, ans=0.1 +2024-09-19 11:28:49,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=655760.0, ans=0.04949747468305833 +2024-09-19 11:28:52,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=655760.0, ans=0.125 +2024-09-19 11:28:53,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-19 11:29:06,009 INFO [train.py:1198] (0/2) Epoch 37, batch 1050, loss[loss=0.242, ctc_loss=0.1116, cr_loss=0.3544, attn_decoder_loss=0.2486, over 29699.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.115, cr_loss=0.3559, attn_decoder_loss=0.2396, over 5742996.70 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:29:11,327 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.10 vs. limit=6.0 +2024-09-19 11:29:12,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=655800.0, ans=0.125 +2024-09-19 11:29:17,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.38 vs. limit=22.5 +2024-09-19 11:29:23,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=655840.0, ans=0.125 +2024-09-19 11:29:30,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=655840.0, ans=0.2 +2024-09-19 11:29:34,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.73 vs. limit=22.5 +2024-09-19 11:29:42,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=655880.0, ans=0.0 +2024-09-19 11:29:47,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=655880.0, ans=0.125 +2024-09-19 11:29:53,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=655880.0, ans=0.2 +2024-09-19 11:30:25,478 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-164000.pt +2024-09-19 11:30:33,900 INFO [train.py:1198] (0/2) Epoch 37, batch 1100, loss[loss=0.2279, ctc_loss=0.1121, cr_loss=0.3455, attn_decoder_loss=0.2331, over 29435.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1148, cr_loss=0.3555, attn_decoder_loss=0.2394, over 5756406.62 frames. ], batch size: 78, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:30:41,293 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.502e+01 8.949e+01 9.455e+01 1.229e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 11:30:41,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.19 vs. limit=22.5 +2024-09-19 11:30:55,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.14 vs. limit=15.0 +2024-09-19 11:31:05,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=656080.0, ans=0.025 +2024-09-19 11:31:07,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=656080.0, ans=0.2 +2024-09-19 11:31:34,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=656160.0, ans=0.2 +2024-09-19 11:31:36,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=656160.0, ans=0.035 +2024-09-19 11:31:49,460 INFO [train.py:1198] (0/2) Epoch 37, batch 1150, loss[loss=0.2347, ctc_loss=0.116, cr_loss=0.3574, attn_decoder_loss=0.2399, over 29466.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1147, cr_loss=0.3555, attn_decoder_loss=0.2392, over 5755660.52 frames. ], batch size: 78, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:31:49,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=656200.0, ans=0.125 +2024-09-19 11:32:04,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.78 vs. limit=15.0 +2024-09-19 11:32:08,419 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.50 vs. limit=10.0 +2024-09-19 11:32:11,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=656240.0, ans=0.0 +2024-09-19 11:32:17,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=656240.0, ans=0.125 +2024-09-19 11:32:18,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=656280.0, ans=0.2 +2024-09-19 11:32:34,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=656320.0, ans=10.0 +2024-09-19 11:32:43,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=656320.0, ans=0.0 +2024-09-19 11:33:03,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=9.36 vs. limit=12.0 +2024-09-19 11:33:04,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=656400.0, ans=0.07 +2024-09-19 11:33:05,431 INFO [train.py:1198] (0/2) Epoch 37, batch 1200, loss[loss=0.2407, ctc_loss=0.1152, cr_loss=0.3511, attn_decoder_loss=0.2468, over 29681.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1151, cr_loss=0.3561, attn_decoder_loss=0.2399, over 5747735.79 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:33:07,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=656400.0, ans=0.125 +2024-09-19 11:33:10,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=656400.0, ans=0.0 +2024-09-19 11:33:12,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.756e+01 9.143e+01 9.785e+01 1.884e+02, threshold=1.829e+02, percent-clipped=2.0 +2024-09-19 11:33:16,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=656400.0, ans=0.025 +2024-09-19 11:33:19,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=656440.0, ans=0.2 +2024-09-19 11:33:42,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.03 vs. limit=12.0 +2024-09-19 11:34:12,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=656560.0, ans=0.125 +2024-09-19 11:34:17,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=656560.0, ans=0.125 +2024-09-19 11:34:19,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=656560.0, ans=0.05 +2024-09-19 11:34:25,527 INFO [train.py:1198] (0/2) Epoch 37, batch 1250, loss[loss=0.2542, ctc_loss=0.1314, cr_loss=0.4099, attn_decoder_loss=0.2587, over 29537.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1159, cr_loss=0.3583, attn_decoder_loss=0.2407, over 5774263.28 frames. ], batch size: 92, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:34:28,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.12 vs. limit=15.0 +2024-09-19 11:35:03,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=656680.0, ans=0.0 +2024-09-19 11:35:41,475 INFO [train.py:1198] (0/2) Epoch 37, batch 1300, loss[loss=0.2502, ctc_loss=0.1253, cr_loss=0.3691, attn_decoder_loss=0.2558, over 28229.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1157, cr_loss=0.358, attn_decoder_loss=0.2403, over 5778888.46 frames. ], batch size: 111, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:35:45,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.82 vs. limit=12.0 +2024-09-19 11:35:49,096 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.478e+01 8.951e+01 9.333e+01 1.111e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 11:35:49,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.22 vs. limit=22.5 +2024-09-19 11:36:56,958 INFO [train.py:1198] (0/2) Epoch 37, batch 1350, loss[loss=0.2336, ctc_loss=0.1151, cr_loss=0.3504, attn_decoder_loss=0.239, over 29750.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1153, cr_loss=0.3576, attn_decoder_loss=0.2399, over 5795810.87 frames. ], batch size: 81, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:37:00,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=657000.0, ans=0.0 +2024-09-19 11:37:04,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=657000.0, ans=0.125 +2024-09-19 11:37:06,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=657000.0, ans=0.0 +2024-09-19 11:37:08,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=657000.0, ans=0.125 +2024-09-19 11:37:13,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=657040.0, ans=0.1 +2024-09-19 11:37:19,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=657040.0, ans=0.05 +2024-09-19 11:37:21,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=657040.0, ans=0.1 +2024-09-19 11:37:35,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=657080.0, ans=0.0 +2024-09-19 11:37:35,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=657080.0, ans=0.1 +2024-09-19 11:37:52,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=657120.0, ans=0.0 +2024-09-19 11:37:52,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=657120.0, ans=0.1 +2024-09-19 11:37:56,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=657120.0, ans=0.125 +2024-09-19 11:37:57,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff2.min_abs, batch_count=657120.0, ans=0.1 +2024-09-19 11:37:59,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=657160.0, ans=0.0 +2024-09-19 11:38:16,411 INFO [train.py:1198] (0/2) Epoch 37, batch 1400, loss[loss=0.2098, ctc_loss=0.1001, cr_loss=0.328, attn_decoder_loss=0.2147, over 29575.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1147, cr_loss=0.3562, attn_decoder_loss=0.2394, over 5807333.71 frames. ], batch size: 69, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:38:25,472 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.397e+01 9.027e+01 9.734e+01 1.349e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 11:38:25,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=657200.0, ans=0.0 +2024-09-19 11:38:46,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=657280.0, ans=0.025 +2024-09-19 11:39:04,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.84 vs. limit=10.0 +2024-09-19 11:39:31,999 INFO [train.py:1198] (0/2) Epoch 37, batch 1450, loss[loss=0.2456, ctc_loss=0.1233, cr_loss=0.3829, attn_decoder_loss=0.2507, over 29424.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.115, cr_loss=0.3566, attn_decoder_loss=0.2398, over 5803997.87 frames. ], batch size: 94, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:39:38,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-19 11:39:58,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=657440.0, ans=0.1 +2024-09-19 11:40:05,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=657480.0, ans=0.125 +2024-09-19 11:40:11,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=657480.0, ans=0.125 +2024-09-19 11:40:18,275 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.39 vs. limit=10.0 +2024-09-19 11:40:29,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=657520.0, ans=0.0 +2024-09-19 11:40:43,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=657560.0, ans=0.0 +2024-09-19 11:40:48,388 INFO [train.py:1198] (0/2) Epoch 37, batch 1500, loss[loss=0.2454, ctc_loss=0.1162, cr_loss=0.3532, attn_decoder_loss=0.2519, over 29625.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.115, cr_loss=0.3564, attn_decoder_loss=0.2403, over 5805435.98 frames. ], batch size: 86, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:40:57,334 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.474e+01 8.863e+01 9.485e+01 5.565e+02, threshold=1.773e+02, percent-clipped=3.0 +2024-09-19 11:41:37,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.37 vs. limit=15.0 +2024-09-19 11:41:38,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=657720.0, ans=0.2 +2024-09-19 11:42:07,928 INFO [train.py:1198] (0/2) Epoch 37, batch 1550, loss[loss=0.244, ctc_loss=0.1277, cr_loss=0.3926, attn_decoder_loss=0.2482, over 29498.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1153, cr_loss=0.3571, attn_decoder_loss=0.2402, over 5781436.38 frames. ], batch size: 90, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:42:11,184 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:42:17,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=657800.0, ans=0.125 +2024-09-19 11:42:21,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=657840.0, ans=0.07 +2024-09-19 11:42:26,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=657840.0, ans=0.125 +2024-09-19 11:42:42,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=657880.0, ans=0.1 +2024-09-19 11:42:53,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.75 vs. limit=22.5 +2024-09-19 11:42:56,857 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.47 vs. limit=22.5 +2024-09-19 11:43:01,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=657920.0, ans=0.1 +2024-09-19 11:43:16,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=657960.0, ans=0.125 +2024-09-19 11:43:17,590 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:43:23,372 INFO [train.py:1198] (0/2) Epoch 37, batch 1600, loss[loss=0.2489, ctc_loss=0.1253, cr_loss=0.3814, attn_decoder_loss=0.2541, over 29691.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1156, cr_loss=0.3568, attn_decoder_loss=0.2402, over 5763287.93 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:43:32,327 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.772e+01 8.728e+01 9.146e+01 9.748e+01 2.180e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 11:43:40,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=658040.0, ans=0.125 +2024-09-19 11:43:41,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=658040.0, ans=0.1 +2024-09-19 11:44:15,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=658120.0, ans=0.125 +2024-09-19 11:44:16,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=658120.0, ans=0.1 +2024-09-19 11:44:33,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=658160.0, ans=0.125 +2024-09-19 11:44:38,993 INFO [train.py:1198] (0/2) Epoch 37, batch 1650, loss[loss=0.2414, ctc_loss=0.1188, cr_loss=0.3664, attn_decoder_loss=0.2468, over 29712.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1152, cr_loss=0.3554, attn_decoder_loss=0.2396, over 5756101.23 frames. ], batch size: 89, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:45:09,435 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.94 vs. limit=10.0 +2024-09-19 11:45:16,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=658280.0, ans=0.0 +2024-09-19 11:45:29,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=658320.0, ans=0.125 +2024-09-19 11:45:44,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=658360.0, ans=0.125 +2024-09-19 11:45:59,549 INFO [train.py:1198] (0/2) Epoch 37, batch 1700, loss[loss=0.2049, ctc_loss=0.09385, cr_loss=0.3058, attn_decoder_loss=0.2105, over 29570.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1151, cr_loss=0.3553, attn_decoder_loss=0.2397, over 5778281.86 frames. ], batch size: 69, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:46:10,139 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.607e+01 9.068e+01 9.479e+01 1.872e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 11:46:13,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=658440.0, ans=0.125 +2024-09-19 11:46:18,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=658440.0, ans=0.125 +2024-09-19 11:47:04,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=658560.0, ans=0.125 +2024-09-19 11:47:15,016 INFO [train.py:1198] (0/2) Epoch 37, batch 1750, loss[loss=0.2056, ctc_loss=0.09605, cr_loss=0.3209, attn_decoder_loss=0.2106, over 29345.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.115, cr_loss=0.3551, attn_decoder_loss=0.2395, over 5787040.76 frames. ], batch size: 67, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:48:08,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=658720.0, ans=0.1 +2024-09-19 11:48:15,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.33 vs. limit=12.0 +2024-09-19 11:48:28,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=658800.0, ans=0.125 +2024-09-19 11:48:30,139 INFO [train.py:1198] (0/2) Epoch 37, batch 1800, loss[loss=0.2474, ctc_loss=0.1221, cr_loss=0.3817, attn_decoder_loss=0.2528, over 29689.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.115, cr_loss=0.355, attn_decoder_loss=0.2398, over 5789606.66 frames. ], batch size: 83, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:48:38,775 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.16 vs. limit=6.0 +2024-09-19 11:48:40,891 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.421e+01 8.885e+01 9.322e+01 2.627e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 11:48:41,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.02 vs. limit=10.0 +2024-09-19 11:49:01,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=658880.0, ans=0.2 +2024-09-19 11:49:02,198 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.83 vs. limit=15.0 +2024-09-19 11:49:08,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=658880.0, ans=0.125 +2024-09-19 11:49:09,397 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.60 vs. limit=15.0 +2024-09-19 11:49:16,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=658920.0, ans=0.0 +2024-09-19 11:49:22,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=658920.0, ans=15.0 +2024-09-19 11:49:29,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=658920.0, ans=0.125 +2024-09-19 11:49:30,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.75 vs. limit=15.0 +2024-09-19 11:49:30,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=658920.0, ans=0.0 +2024-09-19 11:49:38,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=658960.0, ans=0.125 +2024-09-19 11:49:39,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=658960.0, ans=0.125 +2024-09-19 11:49:50,131 INFO [train.py:1198] (0/2) Epoch 37, batch 1850, loss[loss=0.2407, ctc_loss=0.1131, cr_loss=0.3372, attn_decoder_loss=0.2474, over 29611.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1147, cr_loss=0.3548, attn_decoder_loss=0.2396, over 5794486.09 frames. ], batch size: 86, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:49:54,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=659000.0, ans=0.125 +2024-09-19 11:50:38,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=659120.0, ans=0.0 +2024-09-19 11:50:40,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=659120.0, ans=0.1 +2024-09-19 11:50:48,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.03 vs. limit=22.5 +2024-09-19 11:50:52,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=659160.0, ans=0.1 +2024-09-19 11:50:55,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=659160.0, ans=0.0 +2024-09-19 11:51:00,329 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.03 vs. limit=6.0 +2024-09-19 11:51:05,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.24 vs. limit=15.0 +2024-09-19 11:51:05,857 INFO [train.py:1198] (0/2) Epoch 37, batch 1900, loss[loss=0.243, ctc_loss=0.1177, cr_loss=0.3596, attn_decoder_loss=0.2489, over 29689.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1147, cr_loss=0.3554, attn_decoder_loss=0.2402, over 5802425.78 frames. ], batch size: 89, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:51:16,272 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.346e+01 8.529e+01 8.942e+01 9.570e+01 1.575e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 11:51:51,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=659320.0, ans=0.2 +2024-09-19 11:51:54,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=659320.0, ans=0.0 +2024-09-19 11:51:57,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=12.0 +2024-09-19 11:52:12,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=659360.0, ans=0.2 +2024-09-19 11:52:15,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=659360.0, ans=0.0 +2024-09-19 11:52:17,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=659360.0, ans=0.125 +2024-09-19 11:52:21,363 INFO [train.py:1198] (0/2) Epoch 37, batch 1950, loss[loss=0.2233, ctc_loss=0.1009, cr_loss=0.3347, attn_decoder_loss=0.2295, over 29453.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1151, cr_loss=0.3566, attn_decoder_loss=0.241, over 5817843.46 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:52:30,930 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:52:35,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=659440.0, ans=0.125 +2024-09-19 11:52:37,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=659440.0, ans=0.0 +2024-09-19 11:52:50,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten.whitening_limit, batch_count=659440.0, ans=22.5 +2024-09-19 11:52:55,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=659480.0, ans=0.125 +2024-09-19 11:53:01,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=659480.0, ans=0.1 +2024-09-19 11:53:06,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=659480.0, ans=0.1 +2024-09-19 11:53:33,182 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.35 vs. limit=15.0 +2024-09-19 11:53:41,381 INFO [train.py:1198] (0/2) Epoch 37, batch 2000, loss[loss=0.2075, ctc_loss=0.1029, cr_loss=0.3259, attn_decoder_loss=0.2119, over 29356.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1158, cr_loss=0.3577, attn_decoder_loss=0.2418, over 5794826.29 frames. ], batch size: 67, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:53:51,962 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.756e+01 9.402e+01 9.802e+01 1.853e+02, threshold=1.880e+02, percent-clipped=1.0 +2024-09-19 11:53:52,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=659600.0, ans=0.125 +2024-09-19 11:54:18,763 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.37 vs. limit=15.0 +2024-09-19 11:54:19,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=659680.0, ans=0.1 +2024-09-19 11:54:24,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.18 vs. limit=15.0 +2024-09-19 11:54:33,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=659720.0, ans=0.1 +2024-09-19 11:54:34,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=659720.0, ans=0.125 +2024-09-19 11:54:42,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=659760.0, ans=0.125 +2024-09-19 11:54:57,546 INFO [train.py:1198] (0/2) Epoch 37, batch 2050, loss[loss=0.2168, ctc_loss=0.1047, cr_loss=0.3497, attn_decoder_loss=0.2215, over 29460.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1156, cr_loss=0.3575, attn_decoder_loss=0.241, over 5787906.48 frames. ], batch size: 70, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:55:16,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=659840.0, ans=0.0 +2024-09-19 11:55:20,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=659840.0, ans=0.0 +2024-09-19 11:55:34,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=659880.0, ans=0.1 +2024-09-19 11:55:57,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.66 vs. limit=15.0 +2024-09-19 11:56:04,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=659960.0, ans=0.5 +2024-09-19 11:56:07,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=659960.0, ans=0.0 +2024-09-19 11:56:13,773 INFO [train.py:1198] (0/2) Epoch 37, batch 2100, loss[loss=0.2296, ctc_loss=0.1076, cr_loss=0.3531, attn_decoder_loss=0.2353, over 29754.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1146, cr_loss=0.356, attn_decoder_loss=0.24, over 5799731.84 frames. ], batch size: 81, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:56:14,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=660000.0, ans=0.025 +2024-09-19 11:56:24,219 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 8.399e+01 8.911e+01 9.542e+01 1.204e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 11:56:26,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=660000.0, ans=0.1 +2024-09-19 11:56:52,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=660080.0, ans=0.125 +2024-09-19 11:57:08,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=660120.0, ans=0.125 +2024-09-19 11:57:18,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=660160.0, ans=0.125 +2024-09-19 11:57:33,569 INFO [train.py:1198] (0/2) Epoch 37, batch 2150, loss[loss=0.2356, ctc_loss=0.1201, cr_loss=0.3653, attn_decoder_loss=0.2403, over 29441.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.3551, attn_decoder_loss=0.2395, over 5815526.68 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:57:46,142 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:57:50,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=660240.0, ans=0.125 +2024-09-19 11:58:29,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=660320.0, ans=0.1 +2024-09-19 11:58:49,159 INFO [train.py:1198] (0/2) Epoch 37, batch 2200, loss[loss=0.2495, ctc_loss=0.1227, cr_loss=0.3726, attn_decoder_loss=0.2554, over 29648.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3557, attn_decoder_loss=0.2395, over 5811548.42 frames. ], batch size: 86, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:58:52,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=660400.0, ans=0.0 +2024-09-19 11:59:01,227 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.378e+01 8.935e+01 9.603e+01 1.294e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 11:59:03,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=660440.0, ans=0.1 +2024-09-19 11:59:42,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.45 vs. limit=6.0 +2024-09-19 11:59:51,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=660560.0, ans=0.125 +2024-09-19 12:00:01,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=660560.0, ans=0.2 +2024-09-19 12:00:04,895 INFO [train.py:1198] (0/2) Epoch 37, batch 2250, loss[loss=0.232, ctc_loss=0.1051, cr_loss=0.3254, attn_decoder_loss=0.2389, over 29695.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1143, cr_loss=0.3551, attn_decoder_loss=0.2395, over 5811640.08 frames. ], batch size: 82, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:00:31,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=660640.0, ans=0.0 +2024-09-19 12:01:13,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=660760.0, ans=0.025 +2024-09-19 12:01:22,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.25 vs. limit=15.0 +2024-09-19 12:01:23,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=660800.0, ans=0.05 +2024-09-19 12:01:25,019 INFO [train.py:1198] (0/2) Epoch 37, batch 2300, loss[loss=0.2111, ctc_loss=0.09468, cr_loss=0.2949, attn_decoder_loss=0.2175, over 29708.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1139, cr_loss=0.3534, attn_decoder_loss=0.2386, over 5798383.24 frames. ], batch size: 72, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:01:36,927 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.404e+01 8.548e+01 9.077e+01 9.950e+01 1.821e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 12:01:49,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=660840.0, ans=0.0 +2024-09-19 12:01:54,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=660880.0, ans=0.1 +2024-09-19 12:01:58,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=660880.0, ans=0.0 +2024-09-19 12:02:03,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=660880.0, ans=0.125 +2024-09-19 12:02:07,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=660880.0, ans=0.125 +2024-09-19 12:02:41,021 INFO [train.py:1198] (0/2) Epoch 37, batch 2350, loss[loss=0.2432, ctc_loss=0.1243, cr_loss=0.3768, attn_decoder_loss=0.2481, over 29676.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1142, cr_loss=0.3544, attn_decoder_loss=0.239, over 5803968.01 frames. ], batch size: 83, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:02:59,449 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:03:28,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=661120.0, ans=0.0 +2024-09-19 12:03:56,988 INFO [train.py:1198] (0/2) Epoch 37, batch 2400, loss[loss=0.232, ctc_loss=0.1173, cr_loss=0.364, attn_decoder_loss=0.2367, over 29517.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1149, cr_loss=0.3556, attn_decoder_loss=0.2397, over 5808123.17 frames. ], batch size: 76, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:04:08,985 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.655e+01 8.594e+01 9.080e+01 9.693e+01 1.252e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 12:04:35,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.53 vs. limit=12.0 +2024-09-19 12:04:38,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.14 vs. limit=10.0 +2024-09-19 12:04:47,084 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.66 vs. limit=6.0 +2024-09-19 12:04:59,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=661360.0, ans=0.125 +2024-09-19 12:05:06,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=661360.0, ans=0.025 +2024-09-19 12:05:07,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=661360.0, ans=0.125 +2024-09-19 12:05:17,007 INFO [train.py:1198] (0/2) Epoch 37, batch 2450, loss[loss=0.2411, ctc_loss=0.1214, cr_loss=0.3636, attn_decoder_loss=0.2463, over 29689.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1156, cr_loss=0.357, attn_decoder_loss=0.2406, over 5785080.64 frames. ], batch size: 82, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:05:17,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=661400.0, ans=0.0 +2024-09-19 12:05:26,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=661400.0, ans=0.0 +2024-09-19 12:05:36,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=661440.0, ans=0.125 +2024-09-19 12:05:43,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=661440.0, ans=0.125 +2024-09-19 12:05:47,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=661480.0, ans=0.1 +2024-09-19 12:05:47,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=661480.0, ans=0.1 +2024-09-19 12:06:01,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-19 12:06:02,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=661520.0, ans=0.125 +2024-09-19 12:06:02,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=661520.0, ans=0.0 +2024-09-19 12:06:16,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=661560.0, ans=0.07 +2024-09-19 12:06:22,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=661560.0, ans=0.125 +2024-09-19 12:06:30,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=661560.0, ans=0.125 +2024-09-19 12:06:33,575 INFO [train.py:1198] (0/2) Epoch 37, batch 2500, loss[loss=0.2343, ctc_loss=0.1127, cr_loss=0.3455, attn_decoder_loss=0.2402, over 29632.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1155, cr_loss=0.3568, attn_decoder_loss=0.2406, over 5795051.87 frames. ], batch size: 86, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:06:45,677 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.640e+01 9.238e+01 1.003e+02 4.668e+02, threshold=1.848e+02, percent-clipped=4.0 +2024-09-19 12:07:01,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.64 vs. limit=15.0 +2024-09-19 12:07:05,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=661680.0, ans=0.1 +2024-09-19 12:07:11,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=661680.0, ans=0.0 +2024-09-19 12:07:37,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=661760.0, ans=0.0 +2024-09-19 12:07:49,437 INFO [train.py:1198] (0/2) Epoch 37, batch 2550, loss[loss=0.2086, ctc_loss=0.1012, cr_loss=0.309, attn_decoder_loss=0.2136, over 29350.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1152, cr_loss=0.3564, attn_decoder_loss=0.2403, over 5797218.58 frames. ], batch size: 67, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:07:49,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=661800.0, ans=0.0 +2024-09-19 12:07:54,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=661800.0, ans=0.125 +2024-09-19 12:08:37,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.27 vs. limit=15.0 +2024-09-19 12:08:41,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=661920.0, ans=0.2 +2024-09-19 12:08:44,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.17 vs. limit=22.5 +2024-09-19 12:09:01,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=661960.0, ans=0.0 +2024-09-19 12:09:07,399 INFO [train.py:1198] (0/2) Epoch 37, batch 2600, loss[loss=0.2268, ctc_loss=0.1039, cr_loss=0.3379, attn_decoder_loss=0.2329, over 29450.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1154, cr_loss=0.357, attn_decoder_loss=0.2407, over 5793506.74 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:09:18,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=662000.0, ans=0.125 +2024-09-19 12:09:21,436 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.726e+01 8.481e+01 8.933e+01 9.512e+01 2.457e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 12:09:38,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=662080.0, ans=0.125 +2024-09-19 12:10:03,179 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.30 vs. limit=22.5 +2024-09-19 12:10:12,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=662160.0, ans=0.0 +2024-09-19 12:10:20,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=662160.0, ans=0.2 +2024-09-19 12:10:24,381 INFO [train.py:1198] (0/2) Epoch 37, batch 2650, loss[loss=0.2468, ctc_loss=0.1184, cr_loss=0.3645, attn_decoder_loss=0.2529, over 29314.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1156, cr_loss=0.3575, attn_decoder_loss=0.241, over 5799776.84 frames. ], batch size: 100, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:10:35,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=662200.0, ans=0.2 +2024-09-19 12:10:55,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=662280.0, ans=0.025 +2024-09-19 12:11:03,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=662280.0, ans=0.125 +2024-09-19 12:11:14,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=662320.0, ans=0.125 +2024-09-19 12:11:25,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=662360.0, ans=0.125 +2024-09-19 12:11:29,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=662360.0, ans=0.125 +2024-09-19 12:11:39,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=662400.0, ans=0.0 +2024-09-19 12:11:40,389 INFO [train.py:1198] (0/2) Epoch 37, batch 2700, loss[loss=0.2459, ctc_loss=0.1157, cr_loss=0.3629, attn_decoder_loss=0.2523, over 29538.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1154, cr_loss=0.3569, attn_decoder_loss=0.2411, over 5795205.99 frames. ], batch size: 87, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:11:52,428 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.281e+01 8.575e+01 9.095e+01 9.529e+01 6.705e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 12:12:30,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=662520.0, ans=0.1 +2024-09-19 12:12:45,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=662560.0, ans=0.0 +2024-09-19 12:12:51,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=662560.0, ans=0.125 +2024-09-19 12:12:58,482 INFO [train.py:1198] (0/2) Epoch 37, batch 2750, loss[loss=0.229, ctc_loss=0.1192, cr_loss=0.3685, attn_decoder_loss=0.233, over 29529.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1145, cr_loss=0.3547, attn_decoder_loss=0.2397, over 5794998.78 frames. ], batch size: 75, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:13:00,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=662600.0, ans=0.125 +2024-09-19 12:13:15,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.67 vs. limit=15.0 +2024-09-19 12:13:16,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=662640.0, ans=0.125 +2024-09-19 12:13:21,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.01 vs. limit=15.0 +2024-09-19 12:13:23,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=662640.0, ans=0.5 +2024-09-19 12:13:24,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.92 vs. limit=6.0 +2024-09-19 12:13:26,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=662640.0, ans=0.0 +2024-09-19 12:13:34,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=662680.0, ans=0.125 +2024-09-19 12:13:40,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=662680.0, ans=0.125 +2024-09-19 12:13:43,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=662680.0, ans=0.125 +2024-09-19 12:14:00,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=662760.0, ans=0.125 +2024-09-19 12:14:03,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=662760.0, ans=0.5 +2024-09-19 12:14:16,761 INFO [train.py:1198] (0/2) Epoch 37, batch 2800, loss[loss=0.2576, ctc_loss=0.1425, cr_loss=0.3665, attn_decoder_loss=0.2622, over 20107.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1152, cr_loss=0.3562, attn_decoder_loss=0.2402, over 5775099.05 frames. ], batch size: 210, lr: 2.98e-03, grad_scale: 32.0 +2024-09-19 12:14:19,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.58 vs. limit=15.0 +2024-09-19 12:14:26,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=662800.0, ans=0.125 +2024-09-19 12:14:30,281 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.502e+01 8.910e+01 9.403e+01 2.471e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-19 12:14:42,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=662840.0, ans=0.125 +2024-09-19 12:15:12,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.22 vs. limit=15.0 +2024-09-19 12:15:22,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-19 12:15:32,028 INFO [train.py:1198] (0/2) Epoch 37, batch 2850, loss[loss=0.2279, ctc_loss=0.1131, cr_loss=0.344, attn_decoder_loss=0.233, over 29506.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1156, cr_loss=0.3572, attn_decoder_loss=0.2405, over 5760102.76 frames. ], batch size: 77, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:15:41,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=663000.0, ans=0.0 +2024-09-19 12:15:49,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=663040.0, ans=0.1 +2024-09-19 12:16:13,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.00 vs. limit=15.0 +2024-09-19 12:16:20,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=663120.0, ans=0.04949747468305833 +2024-09-19 12:16:39,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=663160.0, ans=0.125 +2024-09-19 12:16:49,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=663200.0, ans=0.05 +2024-09-19 12:16:50,708 INFO [train.py:1198] (0/2) Epoch 37, batch 2900, loss[loss=0.2337, ctc_loss=0.1056, cr_loss=0.3327, attn_decoder_loss=0.2405, over 29427.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.116, cr_loss=0.3583, attn_decoder_loss=0.2415, over 5785966.38 frames. ], batch size: 79, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:17:07,907 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.541e+01 8.975e+01 9.658e+01 1.927e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 12:17:08,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=663240.0, ans=0.125 +2024-09-19 12:17:26,812 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.36 vs. limit=12.0 +2024-09-19 12:17:36,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=663320.0, ans=0.0 +2024-09-19 12:18:07,938 INFO [train.py:1198] (0/2) Epoch 37, batch 2950, loss[loss=0.2363, ctc_loss=0.1232, cr_loss=0.368, attn_decoder_loss=0.2407, over 29513.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.115, cr_loss=0.3557, attn_decoder_loss=0.2401, over 5780757.15 frames. ], batch size: 75, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:18:08,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.91 vs. limit=12.0 +2024-09-19 12:18:23,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=663440.0, ans=0.0 +2024-09-19 12:18:26,426 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:18:32,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=663440.0, ans=0.125 +2024-09-19 12:18:34,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=663440.0, ans=0.0 +2024-09-19 12:18:56,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=663520.0, ans=0.05 +2024-09-19 12:18:58,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=663520.0, ans=0.125 +2024-09-19 12:19:02,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=663520.0, ans=0.125 +2024-09-19 12:19:02,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=663520.0, ans=0.125 +2024-09-19 12:19:07,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=663560.0, ans=0.1 +2024-09-19 12:19:21,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=663560.0, ans=0.125 +2024-09-19 12:19:23,863 INFO [train.py:1198] (0/2) Epoch 37, batch 3000, loss[loss=0.231, ctc_loss=0.1149, cr_loss=0.3542, attn_decoder_loss=0.236, over 29772.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1147, cr_loss=0.3553, attn_decoder_loss=0.2397, over 5780628.70 frames. ], batch size: 81, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:19:23,864 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 12:19:35,528 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.5817, 3.7650, 3.9748, 4.1670], device='cuda:0') +2024-09-19 12:19:43,120 INFO [train.py:1230] (0/2) Epoch 37, validation: loss=0.212, ctc_loss=0.03675, cr_loss=6.305e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 12:19:43,120 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 12:19:58,495 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.507e+01 8.935e+01 9.407e+01 3.949e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 12:20:07,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=663640.0, ans=0.1 +2024-09-19 12:20:38,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.89 vs. limit=15.0 +2024-09-19 12:20:39,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=663720.0, ans=0.125 +2024-09-19 12:20:55,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=663760.0, ans=0.125 +2024-09-19 12:21:01,308 INFO [train.py:1198] (0/2) Epoch 37, batch 3050, loss[loss=0.2261, ctc_loss=0.1067, cr_loss=0.3401, attn_decoder_loss=0.2318, over 29531.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1151, cr_loss=0.3563, attn_decoder_loss=0.2404, over 5775855.39 frames. ], batch size: 76, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:21:03,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=663800.0, ans=0.0 +2024-09-19 12:21:06,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=663800.0, ans=0.125 +2024-09-19 12:21:14,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=663800.0, ans=0.025 +2024-09-19 12:21:35,517 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-19 12:21:56,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=663920.0, ans=0.1 +2024-09-19 12:22:06,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=663960.0, ans=0.1 +2024-09-19 12:22:17,509 INFO [train.py:1198] (0/2) Epoch 37, batch 3100, loss[loss=0.25, ctc_loss=0.1237, cr_loss=0.3753, attn_decoder_loss=0.2557, over 29294.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1154, cr_loss=0.3566, attn_decoder_loss=0.2405, over 5774541.65 frames. ], batch size: 100, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:22:32,778 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.715e+01 9.369e+01 9.767e+01 1.782e+02, threshold=1.874e+02, percent-clipped=0.0 +2024-09-19 12:23:05,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.46 vs. limit=15.0 +2024-09-19 12:23:15,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=664120.0, ans=0.1 +2024-09-19 12:23:35,721 INFO [train.py:1198] (0/2) Epoch 37, batch 3150, loss[loss=0.2474, ctc_loss=0.1208, cr_loss=0.3641, attn_decoder_loss=0.2534, over 28855.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1154, cr_loss=0.3567, attn_decoder_loss=0.2405, over 5781744.05 frames. ], batch size: 104, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:23:42,606 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.36 vs. limit=15.0 +2024-09-19 12:23:50,484 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.46 vs. limit=15.0 +2024-09-19 12:24:01,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=664240.0, ans=0.125 +2024-09-19 12:24:04,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=664280.0, ans=0.125 +2024-09-19 12:24:27,913 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-19 12:24:33,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.72 vs. limit=15.0 +2024-09-19 12:24:53,367 INFO [train.py:1198] (0/2) Epoch 37, batch 3200, loss[loss=0.2329, ctc_loss=0.1129, cr_loss=0.3655, attn_decoder_loss=0.2381, over 29411.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1149, cr_loss=0.3559, attn_decoder_loss=0.2398, over 5792885.22 frames. ], batch size: 79, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:25:03,463 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-19 12:25:07,954 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-19 12:25:08,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.621e+01 9.120e+01 9.766e+01 2.704e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 12:25:21,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=664440.0, ans=0.2 +2024-09-19 12:25:30,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=664480.0, ans=0.2 +2024-09-19 12:25:33,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=664480.0, ans=0.2 +2024-09-19 12:25:33,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=664480.0, ans=0.2 +2024-09-19 12:25:45,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=664520.0, ans=0.2 +2024-09-19 12:25:57,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=664560.0, ans=0.125 +2024-09-19 12:25:57,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=664560.0, ans=0.0 +2024-09-19 12:26:02,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=664560.0, ans=0.125 +2024-09-19 12:26:09,291 INFO [train.py:1198] (0/2) Epoch 37, batch 3250, loss[loss=0.2396, ctc_loss=0.1106, cr_loss=0.3521, attn_decoder_loss=0.2461, over 29717.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3571, attn_decoder_loss=0.2404, over 5799718.85 frames. ], batch size: 84, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:26:12,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=664600.0, ans=0.2 +2024-09-19 12:26:42,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.65 vs. limit=15.0 +2024-09-19 12:26:49,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.63 vs. limit=15.0 +2024-09-19 12:27:00,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=664720.0, ans=0.125 +2024-09-19 12:27:11,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=664760.0, ans=0.125 +2024-09-19 12:27:27,409 INFO [train.py:1198] (0/2) Epoch 37, batch 3300, loss[loss=0.2369, ctc_loss=0.1114, cr_loss=0.3474, attn_decoder_loss=0.2432, over 28297.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1143, cr_loss=0.3548, attn_decoder_loss=0.2393, over 5798526.83 frames. ], batch size: 111, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:27:27,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=664800.0, ans=0.1 +2024-09-19 12:27:30,132 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.87 vs. limit=22.5 +2024-09-19 12:27:39,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=664800.0, ans=0.125 +2024-09-19 12:27:42,600 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.021e+01 8.526e+01 9.078e+01 9.888e+01 1.961e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 12:27:46,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=664840.0, ans=0.125 +2024-09-19 12:27:46,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-19 12:28:11,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=664920.0, ans=0.025 +2024-09-19 12:28:21,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=664920.0, ans=0.125 +2024-09-19 12:28:30,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=664960.0, ans=0.0 +2024-09-19 12:28:45,005 INFO [train.py:1198] (0/2) Epoch 37, batch 3350, loss[loss=0.245, ctc_loss=0.122, cr_loss=0.3654, attn_decoder_loss=0.2505, over 28885.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1148, cr_loss=0.3558, attn_decoder_loss=0.2399, over 5774740.32 frames. ], batch size: 104, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:28:46,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=665000.0, ans=0.0 +2024-09-19 12:29:14,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.42 vs. limit=15.0 +2024-09-19 12:29:40,573 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.73 vs. limit=15.0 +2024-09-19 12:29:43,528 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.33 vs. limit=15.0 +2024-09-19 12:29:43,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.86 vs. limit=12.0 +2024-09-19 12:29:47,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=665160.0, ans=0.125 +2024-09-19 12:30:00,464 INFO [train.py:1198] (0/2) Epoch 37, batch 3400, loss[loss=0.2097, ctc_loss=0.1067, cr_loss=0.3402, attn_decoder_loss=0.2136, over 29332.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1152, cr_loss=0.3568, attn_decoder_loss=0.24, over 5765811.37 frames. ], batch size: 67, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:30:16,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=665240.0, ans=0.125 +2024-09-19 12:30:17,145 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.972e+01 8.762e+01 9.202e+01 9.777e+01 2.648e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 12:30:19,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=665240.0, ans=0.025 +2024-09-19 12:30:23,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=665240.0, ans=0.0 +2024-09-19 12:30:24,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.92 vs. limit=15.0 +2024-09-19 12:30:32,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=665280.0, ans=0.125 +2024-09-19 12:30:35,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=665280.0, ans=0.0 +2024-09-19 12:31:11,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=665360.0, ans=0.1 +2024-09-19 12:31:18,360 INFO [train.py:1198] (0/2) Epoch 37, batch 3450, loss[loss=0.2541, ctc_loss=0.1249, cr_loss=0.3561, attn_decoder_loss=0.2606, over 28333.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1154, cr_loss=0.3568, attn_decoder_loss=0.2405, over 5772022.56 frames. ], batch size: 111, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:31:24,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=665400.0, ans=0.2 +2024-09-19 12:31:35,583 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:31:43,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.58 vs. limit=15.0 +2024-09-19 12:32:19,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=665560.0, ans=0.1 +2024-09-19 12:32:35,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=665600.0, ans=0.125 +2024-09-19 12:32:36,779 INFO [train.py:1198] (0/2) Epoch 37, batch 3500, loss[loss=0.2179, ctc_loss=0.1066, cr_loss=0.3418, attn_decoder_loss=0.2227, over 29329.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.115, cr_loss=0.3558, attn_decoder_loss=0.2398, over 5773803.46 frames. ], batch size: 71, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:32:40,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=665600.0, ans=0.0 +2024-09-19 12:32:44,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=665600.0, ans=0.125 +2024-09-19 12:32:47,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=665600.0, ans=0.125 +2024-09-19 12:32:50,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=665640.0, ans=0.2 +2024-09-19 12:32:53,367 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.540e+01 8.561e+01 8.978e+01 9.459e+01 2.098e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 12:32:55,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=665640.0, ans=0.2 +2024-09-19 12:32:59,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=665640.0, ans=0.0 +2024-09-19 12:33:32,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=665720.0, ans=0.1 +2024-09-19 12:33:33,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=665720.0, ans=0.0 +2024-09-19 12:33:51,558 INFO [train.py:1198] (0/2) Epoch 37, batch 3550, loss[loss=0.2383, ctc_loss=0.1061, cr_loss=0.3205, attn_decoder_loss=0.2459, over 29718.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1145, cr_loss=0.3546, attn_decoder_loss=0.2397, over 5780948.14 frames. ], batch size: 89, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:33:54,849 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=665800.0, ans=0.1 +2024-09-19 12:33:56,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=665800.0, ans=0.125 +2024-09-19 12:34:20,861 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.67 vs. limit=15.0 +2024-09-19 12:34:40,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=665920.0, ans=10.0 +2024-09-19 12:34:43,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=665920.0, ans=0.0 +2024-09-19 12:34:52,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=665960.0, ans=0.125 +2024-09-19 12:34:54,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=665960.0, ans=0.0 +2024-09-19 12:34:55,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.41 vs. limit=15.0 +2024-09-19 12:35:05,689 INFO [train.py:1198] (0/2) Epoch 37, batch 3600, loss[loss=0.223, ctc_loss=0.1074, cr_loss=0.3334, attn_decoder_loss=0.2284, over 29489.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1146, cr_loss=0.3549, attn_decoder_loss=0.2398, over 5790318.06 frames. ], batch size: 77, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:35:06,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.60 vs. limit=10.0 +2024-09-19 12:35:14,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=666000.0, ans=0.0 +2024-09-19 12:35:22,126 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.583e+01 9.106e+01 9.636e+01 2.538e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 12:35:31,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.83 vs. limit=6.0 +2024-09-19 12:35:58,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.29 vs. limit=15.0 +2024-09-19 12:35:59,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=666120.0, ans=0.125 +2024-09-19 12:36:22,096 INFO [train.py:1198] (0/2) Epoch 37, batch 3650, loss[loss=0.2456, ctc_loss=0.1239, cr_loss=0.3848, attn_decoder_loss=0.2506, over 29508.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1142, cr_loss=0.3546, attn_decoder_loss=0.2392, over 5793238.65 frames. ], batch size: 90, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:36:34,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=666200.0, ans=0.1 +2024-09-19 12:36:42,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.64 vs. limit=5.0 +2024-09-19 12:36:57,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=666280.0, ans=0.125 +2024-09-19 12:37:15,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=666320.0, ans=0.2 +2024-09-19 12:37:36,686 INFO [train.py:1198] (0/2) Epoch 37, batch 3700, loss[loss=0.2435, ctc_loss=0.1165, cr_loss=0.3658, attn_decoder_loss=0.2495, over 29702.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1144, cr_loss=0.3549, attn_decoder_loss=0.2396, over 5803643.71 frames. ], batch size: 84, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:37:41,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=666400.0, ans=0.2 +2024-09-19 12:37:48,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.54 vs. limit=10.0 +2024-09-19 12:37:48,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=666400.0, ans=0.125 +2024-09-19 12:37:56,005 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.469e+01 9.062e+01 9.671e+01 3.468e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 12:38:16,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.05 vs. limit=10.0 +2024-09-19 12:38:17,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=666480.0, ans=0.2 +2024-09-19 12:38:24,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=666520.0, ans=0.0 +2024-09-19 12:38:35,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=666560.0, ans=0.125 +2024-09-19 12:38:37,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=666560.0, ans=0.125 +2024-09-19 12:38:42,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=666560.0, ans=0.07 +2024-09-19 12:38:52,752 INFO [train.py:1198] (0/2) Epoch 37, batch 3750, loss[loss=0.2066, ctc_loss=0.09794, cr_loss=0.3292, attn_decoder_loss=0.2114, over 29368.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1147, cr_loss=0.3553, attn_decoder_loss=0.2398, over 5807905.85 frames. ], batch size: 67, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:39:02,137 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=666600.0, ans=0.1 +2024-09-19 12:39:06,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=666640.0, ans=0.2 +2024-09-19 12:39:24,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=666680.0, ans=0.0 +2024-09-19 12:39:44,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=666720.0, ans=0.0 +2024-09-19 12:39:59,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=666760.0, ans=0.0 +2024-09-19 12:40:07,586 INFO [train.py:1198] (0/2) Epoch 37, batch 3800, loss[loss=0.2373, ctc_loss=0.1116, cr_loss=0.3403, attn_decoder_loss=0.2437, over 29611.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1144, cr_loss=0.3541, attn_decoder_loss=0.2393, over 5798578.41 frames. ], batch size: 86, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:40:19,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=666800.0, ans=0.0 +2024-09-19 12:40:26,998 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.615e+01 8.580e+01 8.987e+01 9.690e+01 1.357e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 12:40:43,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=666880.0, ans=0.125 +2024-09-19 12:40:45,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=666880.0, ans=0.1 +2024-09-19 12:40:58,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=666920.0, ans=0.125 +2024-09-19 12:40:59,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=666920.0, ans=0.1 +2024-09-19 12:41:01,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=666920.0, ans=0.0 +2024-09-19 12:41:01,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=666920.0, ans=0.125 +2024-09-19 12:41:08,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=666960.0, ans=0.125 +2024-09-19 12:41:19,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=666960.0, ans=0.125 +2024-09-19 12:41:21,812 INFO [train.py:1198] (0/2) Epoch 37, batch 3850, loss[loss=0.2493, ctc_loss=0.1265, cr_loss=0.3784, attn_decoder_loss=0.2545, over 29302.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1139, cr_loss=0.3534, attn_decoder_loss=0.239, over 5812137.96 frames. ], batch size: 100, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:41:25,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=667000.0, ans=0.1 +2024-09-19 12:41:29,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=667000.0, ans=0.125 +2024-09-19 12:42:20,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=667120.0, ans=0.0 +2024-09-19 12:42:38,731 INFO [train.py:1198] (0/2) Epoch 37, batch 3900, loss[loss=0.2472, ctc_loss=0.1235, cr_loss=0.3497, attn_decoder_loss=0.2532, over 29645.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1145, cr_loss=0.3545, attn_decoder_loss=0.2397, over 5816723.22 frames. ], batch size: 86, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:42:44,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.94 vs. limit=6.0 +2024-09-19 12:42:57,941 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.596e+01 8.935e+01 9.669e+01 1.380e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 12:42:59,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667240.0, ans=0.1 +2024-09-19 12:43:27,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=667320.0, ans=0.125 +2024-09-19 12:43:38,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=667360.0, ans=0.2 +2024-09-19 12:43:48,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=667360.0, ans=0.125 +2024-09-19 12:43:52,903 INFO [train.py:1198] (0/2) Epoch 37, batch 3950, loss[loss=0.2557, ctc_loss=0.135, cr_loss=0.4039, attn_decoder_loss=0.2601, over 29527.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1144, cr_loss=0.3549, attn_decoder_loss=0.2396, over 5836309.81 frames. ], batch size: 97, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:44:06,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=667440.0, ans=0.2 +2024-09-19 12:44:40,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=667520.0, ans=0.125 +2024-09-19 12:44:52,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=667560.0, ans=0.0 +2024-09-19 12:44:57,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.86 vs. limit=22.5 +2024-09-19 12:45:01,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=667560.0, ans=0.125 +2024-09-19 12:45:08,493 INFO [train.py:1198] (0/2) Epoch 37, batch 4000, loss[loss=0.2229, ctc_loss=0.1086, cr_loss=0.3558, attn_decoder_loss=0.2277, over 29505.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1145, cr_loss=0.355, attn_decoder_loss=0.2397, over 5813282.92 frames. ], batch size: 74, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:45:16,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=667600.0, ans=0.0 +2024-09-19 12:45:16,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.67 vs. limit=15.0 +2024-09-19 12:45:17,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=667600.0, ans=0.0 +2024-09-19 12:45:23,775 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.30 vs. limit=10.0 +2024-09-19 12:45:27,465 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.490e+01 9.030e+01 9.800e+01 2.988e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 12:45:31,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.85 vs. limit=22.5 +2024-09-19 12:45:57,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=667720.0, ans=0.035 +2024-09-19 12:46:04,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667720.0, ans=0.1 +2024-09-19 12:46:22,187 INFO [train.py:1198] (0/2) Epoch 37, batch 4050, loss[loss=0.2606, ctc_loss=0.1424, cr_loss=0.4059, attn_decoder_loss=0.2647, over 20034.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1142, cr_loss=0.3542, attn_decoder_loss=0.2394, over 5795388.99 frames. ], batch size: 209, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:46:26,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=667800.0, ans=0.07 +2024-09-19 12:46:36,145 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.75 vs. limit=6.0 +2024-09-19 12:46:45,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=667840.0, ans=0.025 +2024-09-19 12:46:48,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667840.0, ans=0.1 +2024-09-19 12:46:54,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=667880.0, ans=0.2 +2024-09-19 12:47:15,562 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-19 12:47:37,494 INFO [train.py:1198] (0/2) Epoch 37, batch 4100, loss[loss=0.2421, ctc_loss=0.1195, cr_loss=0.3729, attn_decoder_loss=0.2474, over 29480.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1147, cr_loss=0.3558, attn_decoder_loss=0.24, over 5791423.91 frames. ], batch size: 90, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:47:51,463 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.18 vs. limit=15.0 +2024-09-19 12:47:56,159 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.448e+01 9.033e+01 9.875e+01 1.600e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 12:47:59,523 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:48:04,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=668040.0, ans=0.125 +2024-09-19 12:48:09,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=668080.0, ans=0.125 +2024-09-19 12:48:09,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=668080.0, ans=0.125 +2024-09-19 12:48:12,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=668080.0, ans=0.125 +2024-09-19 12:48:15,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=668080.0, ans=0.125 +2024-09-19 12:48:15,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=668080.0, ans=0.0 +2024-09-19 12:48:34,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=668160.0, ans=10.0 +2024-09-19 12:48:37,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=668160.0, ans=0.125 +2024-09-19 12:48:37,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=668160.0, ans=0.025 +2024-09-19 12:48:51,882 INFO [train.py:1198] (0/2) Epoch 37, batch 4150, loss[loss=0.2313, ctc_loss=0.1167, cr_loss=0.3537, attn_decoder_loss=0.2361, over 29494.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1143, cr_loss=0.3549, attn_decoder_loss=0.2396, over 5796940.32 frames. ], batch size: 77, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:49:01,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=668200.0, ans=0.125 +2024-09-19 12:49:17,778 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.71 vs. limit=15.0 +2024-09-19 12:49:31,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=668280.0, ans=0.0 +2024-09-19 12:49:37,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=668320.0, ans=0.025 +2024-09-19 12:49:49,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=668360.0, ans=0.125 +2024-09-19 12:49:55,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=668360.0, ans=0.025 +2024-09-19 12:50:05,539 INFO [train.py:1198] (0/2) Epoch 37, batch 4200, loss[loss=0.2523, ctc_loss=0.128, cr_loss=0.3791, attn_decoder_loss=0.2577, over 29503.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1146, cr_loss=0.3558, attn_decoder_loss=0.24, over 5800248.73 frames. ], batch size: 90, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:50:24,821 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.799e+01 8.584e+01 9.010e+01 9.647e+01 2.583e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 12:50:41,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=668480.0, ans=0.125 +2024-09-19 12:50:48,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=668480.0, ans=0.125 +2024-09-19 12:50:51,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=668520.0, ans=0.125 +2024-09-19 12:50:52,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=668520.0, ans=0.1 +2024-09-19 12:51:00,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=668520.0, ans=0.2 +2024-09-19 12:51:10,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=668560.0, ans=0.025 +2024-09-19 12:51:20,677 INFO [train.py:1198] (0/2) Epoch 37, batch 4250, loss[loss=0.2183, ctc_loss=0.1058, cr_loss=0.346, attn_decoder_loss=0.2231, over 29509.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1144, cr_loss=0.3551, attn_decoder_loss=0.2401, over 5805604.64 frames. ], batch size: 74, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:51:41,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=668640.0, ans=0.125 +2024-09-19 12:52:03,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=668720.0, ans=0.125 +2024-09-19 12:52:07,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=668720.0, ans=0.2 +2024-09-19 12:52:16,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.74 vs. limit=15.0 +2024-09-19 12:52:25,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=668760.0, ans=0.0 +2024-09-19 12:52:31,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=668760.0, ans=0.05 +2024-09-19 12:52:35,078 INFO [train.py:1198] (0/2) Epoch 37, batch 4300, loss[loss=0.2352, ctc_loss=0.1086, cr_loss=0.3283, attn_decoder_loss=0.242, over 29525.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1142, cr_loss=0.3547, attn_decoder_loss=0.2401, over 5794431.21 frames. ], batch size: 87, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:52:39,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=668800.0, ans=0.125 +2024-09-19 12:52:50,949 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.19 vs. limit=22.5 +2024-09-19 12:52:54,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.770e+01 8.796e+01 9.094e+01 9.550e+01 2.475e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 12:53:05,444 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.75 vs. limit=15.0 +2024-09-19 12:53:12,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=668880.0, ans=0.0 +2024-09-19 12:53:16,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=668880.0, ans=0.125 +2024-09-19 12:53:25,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=668920.0, ans=10.0 +2024-09-19 12:53:30,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=668920.0, ans=0.0 +2024-09-19 12:53:48,878 INFO [train.py:1198] (0/2) Epoch 37, batch 4350, loss[loss=0.2572, ctc_loss=0.133, cr_loss=0.4137, attn_decoder_loss=0.2618, over 29423.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1166, cr_loss=0.3597, attn_decoder_loss=0.2433, over 5796711.97 frames. ], batch size: 97, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:54:11,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=669040.0, ans=0.035 +2024-09-19 12:54:16,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=669040.0, ans=0.1 +2024-09-19 12:54:19,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=669080.0, ans=0.2 +2024-09-19 12:54:19,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=669080.0, ans=0.0 +2024-09-19 12:54:37,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.09 vs. limit=22.5 +2024-09-19 12:54:49,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=669160.0, ans=0.09899494936611666 +2024-09-19 12:54:54,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=669160.0, ans=0.025 +2024-09-19 12:55:02,637 INFO [train.py:1198] (0/2) Epoch 37, batch 4400, loss[loss=0.2504, ctc_loss=0.1345, cr_loss=0.4052, attn_decoder_loss=0.2543, over 27505.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1179, cr_loss=0.3621, attn_decoder_loss=0.2453, over 5767648.43 frames. ], batch size: 125, lr: 2.96e-03, grad_scale: 32.0 +2024-09-19 12:55:11,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=669200.0, ans=0.0 +2024-09-19 12:55:23,898 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.307e+01 8.852e+01 9.362e+01 9.812e+01 1.394e+02, threshold=1.872e+02, percent-clipped=0.0 +2024-09-19 12:55:34,600 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:56:07,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=669360.0, ans=0.1 +2024-09-19 12:56:17,482 INFO [train.py:1198] (0/2) Epoch 37, batch 4450, loss[loss=0.2586, ctc_loss=0.1485, cr_loss=0.3802, attn_decoder_loss=0.2624, over 19822.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1217, cr_loss=0.3673, attn_decoder_loss=0.2473, over 5582007.31 frames. ], batch size: 209, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:56:34,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.72 vs. limit=22.5 +2024-09-19 12:57:19,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=669560.0, ans=0.125 +2024-09-19 12:57:32,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=669600.0, ans=0.125 +2024-09-19 12:57:33,295 INFO [train.py:1198] (0/2) Epoch 37, batch 4500, loss[loss=0.2548, ctc_loss=0.1356, cr_loss=0.3863, attn_decoder_loss=0.2595, over 20195.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.125, cr_loss=0.3703, attn_decoder_loss=0.2491, over 5241687.17 frames. ], batch size: 209, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:57:54,010 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.699e+01 1.043e+02 1.161e+02 1.270e+02 1.246e+03, threshold=2.323e+02, percent-clipped=2.0 +2024-09-19 12:57:55,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=669640.0, ans=0.0 +2024-09-19 12:57:55,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=669640.0, ans=0.025 +2024-09-19 12:58:01,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=669680.0, ans=0.125 +2024-09-19 12:58:10,467 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-37.pt +2024-09-19 12:58:56,697 INFO [train.py:1198] (0/2) Epoch 38, batch 0, loss[loss=0.2182, ctc_loss=0.1033, cr_loss=0.3426, attn_decoder_loss=0.2234, over 29603.00 frames. ], tot_loss[loss=0.2182, ctc_loss=0.1033, cr_loss=0.3426, attn_decoder_loss=0.2234, over 29603.00 frames. ], batch size: 73, lr: 2.92e-03, grad_scale: 32.0 +2024-09-19 12:58:56,698 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 12:59:15,172 INFO [train.py:1230] (0/2) Epoch 38, validation: loss=0.2124, ctc_loss=0.03582, cr_loss=6.776e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 12:59:15,172 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 12:59:18,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=669700.0, ans=0.2 +2024-09-19 12:59:21,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=669700.0, ans=0.1 +2024-09-19 12:59:22,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=669700.0, ans=0.125 +2024-09-19 12:59:23,188 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.83 vs. limit=15.0 +2024-09-19 12:59:28,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=669740.0, ans=0.0 +2024-09-19 13:00:09,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=669820.0, ans=0.125 +2024-09-19 13:00:13,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=669820.0, ans=15.0 +2024-09-19 13:00:14,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=669860.0, ans=0.2 +2024-09-19 13:00:20,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=669860.0, ans=0.0 +2024-09-19 13:00:32,670 INFO [train.py:1198] (0/2) Epoch 38, batch 50, loss[loss=0.2173, ctc_loss=0.1081, cr_loss=0.3455, attn_decoder_loss=0.2218, over 29438.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.117, cr_loss=0.3605, attn_decoder_loss=0.2416, over 1267202.93 frames. ], batch size: 70, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:00:48,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=669940.0, ans=0.0 +2024-09-19 13:01:02,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=669980.0, ans=0.125 +2024-09-19 13:01:27,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=670020.0, ans=0.0 +2024-09-19 13:01:35,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.803e+01 8.673e+01 9.380e+01 1.040e+02 1.745e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-19 13:01:50,652 INFO [train.py:1198] (0/2) Epoch 38, batch 100, loss[loss=0.2302, ctc_loss=0.1169, cr_loss=0.3722, attn_decoder_loss=0.2345, over 29536.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1174, cr_loss=0.3617, attn_decoder_loss=0.2432, over 2253653.07 frames. ], batch size: 76, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:02:33,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.97 vs. limit=15.0 +2024-09-19 13:03:00,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.04 vs. limit=15.0 +2024-09-19 13:03:05,242 INFO [train.py:1198] (0/2) Epoch 38, batch 150, loss[loss=0.2129, ctc_loss=0.1027, cr_loss=0.3194, attn_decoder_loss=0.218, over 29427.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1152, cr_loss=0.3567, attn_decoder_loss=0.2407, over 3047891.81 frames. ], batch size: 70, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:03:47,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.04 vs. limit=22.5 +2024-09-19 13:03:47,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn2.whiten.whitening_limit, batch_count=670380.0, ans=22.5 +2024-09-19 13:03:51,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=670420.0, ans=0.125 +2024-09-19 13:03:58,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=670420.0, ans=0.1 +2024-09-19 13:04:05,658 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.245e+01 8.326e+01 8.770e+01 9.236e+01 1.783e+02, threshold=1.754e+02, percent-clipped=0.0 +2024-09-19 13:04:08,081 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-19 13:04:12,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=670460.0, ans=0.125 +2024-09-19 13:04:20,689 INFO [train.py:1198] (0/2) Epoch 38, batch 200, loss[loss=0.2499, ctc_loss=0.1273, cr_loss=0.3915, attn_decoder_loss=0.2548, over 27306.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1143, cr_loss=0.3552, attn_decoder_loss=0.2399, over 3659314.12 frames. ], batch size: 124, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:04:29,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=670500.0, ans=0.1 +2024-09-19 13:04:32,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=670500.0, ans=0.0 +2024-09-19 13:04:39,061 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-19 13:04:49,484 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.43 vs. limit=15.0 +2024-09-19 13:04:58,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=670580.0, ans=0.125 +2024-09-19 13:05:10,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=670620.0, ans=0.0 +2024-09-19 13:05:12,178 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.25 vs. limit=15.0 +2024-09-19 13:05:32,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=670660.0, ans=0.09899494936611666 +2024-09-19 13:05:38,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=670660.0, ans=0.04949747468305833 +2024-09-19 13:05:41,274 INFO [train.py:1198] (0/2) Epoch 38, batch 250, loss[loss=0.2465, ctc_loss=0.124, cr_loss=0.3575, attn_decoder_loss=0.2522, over 29246.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3537, attn_decoder_loss=0.2393, over 4142040.01 frames. ], batch size: 100, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:05:46,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.16 vs. limit=6.0 +2024-09-19 13:06:01,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=670740.0, ans=0.125 +2024-09-19 13:06:07,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=670740.0, ans=0.125 +2024-09-19 13:06:19,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=670780.0, ans=0.125 +2024-09-19 13:06:28,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=670820.0, ans=0.0 +2024-09-19 13:06:33,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=670820.0, ans=0.0 +2024-09-19 13:06:41,640 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.456e+01 8.891e+01 9.506e+01 1.343e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 13:06:56,768 INFO [train.py:1198] (0/2) Epoch 38, batch 300, loss[loss=0.253, ctc_loss=0.1284, cr_loss=0.4034, attn_decoder_loss=0.2579, over 29510.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1132, cr_loss=0.353, attn_decoder_loss=0.239, over 4510490.16 frames. ], batch size: 92, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:06:57,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=670900.0, ans=0.125 +2024-09-19 13:07:12,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=670940.0, ans=0.125 +2024-09-19 13:07:19,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=670940.0, ans=0.1 +2024-09-19 13:07:21,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=670940.0, ans=0.125 +2024-09-19 13:07:24,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.75 vs. limit=22.5 +2024-09-19 13:07:25,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=670980.0, ans=0.0 +2024-09-19 13:08:02,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=671060.0, ans=0.1 +2024-09-19 13:08:10,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=671100.0, ans=0.0 +2024-09-19 13:08:12,145 INFO [train.py:1198] (0/2) Epoch 38, batch 350, loss[loss=0.2111, ctc_loss=0.09593, cr_loss=0.317, attn_decoder_loss=0.2169, over 29296.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1134, cr_loss=0.3534, attn_decoder_loss=0.2394, over 4795300.60 frames. ], batch size: 71, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:08:15,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=671100.0, ans=0.125 +2024-09-19 13:08:25,646 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.22 vs. limit=15.0 +2024-09-19 13:08:38,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=671140.0, ans=0.025 +2024-09-19 13:08:44,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=671180.0, ans=0.0 +2024-09-19 13:08:55,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=671180.0, ans=0.125 +2024-09-19 13:08:58,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=671220.0, ans=0.125 +2024-09-19 13:09:16,780 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.520e+01 8.939e+01 9.511e+01 1.277e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 13:09:32,533 INFO [train.py:1198] (0/2) Epoch 38, batch 400, loss[loss=0.2347, ctc_loss=0.111, cr_loss=0.3474, attn_decoder_loss=0.2407, over 29713.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1135, cr_loss=0.3534, attn_decoder_loss=0.2394, over 5025414.26 frames. ], batch size: 82, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:09:32,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=671300.0, ans=0.0 +2024-09-19 13:09:38,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=671300.0, ans=0.125 +2024-09-19 13:09:46,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=671340.0, ans=0.0 +2024-09-19 13:09:58,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=671340.0, ans=0.2 +2024-09-19 13:10:24,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=671420.0, ans=0.0 +2024-09-19 13:10:24,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.62 vs. limit=10.0 +2024-09-19 13:10:35,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=10.35 vs. limit=12.0 +2024-09-19 13:10:39,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=671460.0, ans=0.125 +2024-09-19 13:10:48,148 INFO [train.py:1198] (0/2) Epoch 38, batch 450, loss[loss=0.2509, ctc_loss=0.1265, cr_loss=0.3981, attn_decoder_loss=0.2559, over 29683.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.114, cr_loss=0.3546, attn_decoder_loss=0.2399, over 5187494.44 frames. ], batch size: 83, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:11:51,640 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.658e+01 9.040e+01 9.546e+01 1.503e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 13:12:00,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=671660.0, ans=0.1 +2024-09-19 13:12:03,561 INFO [train.py:1198] (0/2) Epoch 38, batch 500, loss[loss=0.2475, ctc_loss=0.1177, cr_loss=0.3547, attn_decoder_loss=0.254, over 29454.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1137, cr_loss=0.3534, attn_decoder_loss=0.2393, over 5329875.77 frames. ], batch size: 94, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:12:19,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=671740.0, ans=0.125 +2024-09-19 13:12:27,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=671740.0, ans=0.2 +2024-09-19 13:12:29,526 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.17 vs. limit=6.0 +2024-09-19 13:12:32,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.44 vs. limit=15.0 +2024-09-19 13:12:39,386 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:13:00,226 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.21 vs. limit=15.0 +2024-09-19 13:13:23,869 INFO [train.py:1198] (0/2) Epoch 38, batch 550, loss[loss=0.2434, ctc_loss=0.117, cr_loss=0.3578, attn_decoder_loss=0.2495, over 28799.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3536, attn_decoder_loss=0.2393, over 5422497.46 frames. ], batch size: 104, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:13:25,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=671900.0, ans=0.015 +2024-09-19 13:13:31,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=671900.0, ans=0.2 +2024-09-19 13:13:36,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=671900.0, ans=0.125 +2024-09-19 13:13:42,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=671940.0, ans=0.125 +2024-09-19 13:13:44,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=671940.0, ans=0.0 +2024-09-19 13:14:00,913 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-168000.pt +2024-09-19 13:14:14,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=671980.0, ans=0.125 +2024-09-19 13:14:24,892 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=672020.0, ans=0.125 +2024-09-19 13:14:35,171 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.563e+01 9.079e+01 9.918e+01 4.106e+02, threshold=1.816e+02, percent-clipped=4.0 +2024-09-19 13:14:43,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=672060.0, ans=0.025 +2024-09-19 13:14:43,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=672060.0, ans=0.2 +2024-09-19 13:14:47,317 INFO [train.py:1198] (0/2) Epoch 38, batch 600, loss[loss=0.2453, ctc_loss=0.1228, cr_loss=0.3685, attn_decoder_loss=0.2507, over 29294.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1142, cr_loss=0.3539, attn_decoder_loss=0.2395, over 5509316.80 frames. ], batch size: 100, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:14:49,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=672100.0, ans=0.125 +2024-09-19 13:15:03,335 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.09 vs. limit=22.5 +2024-09-19 13:15:43,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=672220.0, ans=0.125 +2024-09-19 13:16:02,916 INFO [train.py:1198] (0/2) Epoch 38, batch 650, loss[loss=0.2383, ctc_loss=0.1105, cr_loss=0.3516, attn_decoder_loss=0.2447, over 29754.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1132, cr_loss=0.352, attn_decoder_loss=0.2388, over 5586881.89 frames. ], batch size: 81, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:16:16,855 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:16:24,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=672340.0, ans=0.0 +2024-09-19 13:16:40,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=672380.0, ans=0.125 +2024-09-19 13:16:51,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=672420.0, ans=0.125 +2024-09-19 13:16:57,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=672420.0, ans=0.125 +2024-09-19 13:17:02,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.28 vs. limit=15.0 +2024-09-19 13:17:04,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=672460.0, ans=0.0 +2024-09-19 13:17:04,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=672460.0, ans=0.0 +2024-09-19 13:17:09,155 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.584e+01 9.023e+01 9.741e+01 1.282e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 13:17:12,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=672460.0, ans=0.07 +2024-09-19 13:17:23,525 INFO [train.py:1198] (0/2) Epoch 38, batch 700, loss[loss=0.2185, ctc_loss=0.1051, cr_loss=0.3305, attn_decoder_loss=0.2237, over 29521.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1137, cr_loss=0.3533, attn_decoder_loss=0.2395, over 5638645.00 frames. ], batch size: 76, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:17:29,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=672500.0, ans=0.125 +2024-09-19 13:17:38,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=672540.0, ans=0.0 +2024-09-19 13:17:47,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.50 vs. limit=15.0 +2024-09-19 13:17:48,457 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.01 vs. limit=15.0 +2024-09-19 13:17:52,656 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:18:12,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=672620.0, ans=0.125 +2024-09-19 13:18:20,520 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.67 vs. limit=12.0 +2024-09-19 13:18:23,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=672660.0, ans=0.125 +2024-09-19 13:18:29,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=672660.0, ans=0.2 +2024-09-19 13:18:33,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=672660.0, ans=0.2 +2024-09-19 13:18:39,488 INFO [train.py:1198] (0/2) Epoch 38, batch 750, loss[loss=0.2366, ctc_loss=0.113, cr_loss=0.3525, attn_decoder_loss=0.2425, over 29708.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1135, cr_loss=0.3528, attn_decoder_loss=0.2392, over 5676044.04 frames. ], batch size: 82, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:18:55,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.08 vs. limit=12.0 +2024-09-19 13:19:17,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=672780.0, ans=0.125 +2024-09-19 13:19:43,312 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.750e+01 9.083e+01 9.607e+01 5.779e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 13:19:45,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=672860.0, ans=0.1 +2024-09-19 13:19:51,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.42 vs. limit=15.0 +2024-09-19 13:19:55,421 INFO [train.py:1198] (0/2) Epoch 38, batch 800, loss[loss=0.2207, ctc_loss=0.1039, cr_loss=0.3237, attn_decoder_loss=0.2265, over 29563.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1135, cr_loss=0.3528, attn_decoder_loss=0.239, over 5706142.15 frames. ], batch size: 73, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:20:07,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=672900.0, ans=0.0 +2024-09-19 13:20:13,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=672940.0, ans=0.0 +2024-09-19 13:20:14,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=672940.0, ans=0.0 +2024-09-19 13:20:37,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=672980.0, ans=0.0 +2024-09-19 13:20:58,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=673060.0, ans=0.0 +2024-09-19 13:21:15,060 INFO [train.py:1198] (0/2) Epoch 38, batch 850, loss[loss=0.2458, ctc_loss=0.1232, cr_loss=0.3668, attn_decoder_loss=0.2513, over 29714.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1131, cr_loss=0.3522, attn_decoder_loss=0.2386, over 5736288.16 frames. ], batch size: 89, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:22:18,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=673260.0, ans=0.0 +2024-09-19 13:22:19,982 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.440e+01 8.974e+01 9.392e+01 3.199e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-19 13:22:23,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=673260.0, ans=0.0 +2024-09-19 13:22:30,668 INFO [train.py:1198] (0/2) Epoch 38, batch 900, loss[loss=0.2121, ctc_loss=0.09601, cr_loss=0.3062, attn_decoder_loss=0.2182, over 29586.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1135, cr_loss=0.3527, attn_decoder_loss=0.2389, over 5739917.94 frames. ], batch size: 73, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:22:42,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=673300.0, ans=0.125 +2024-09-19 13:22:47,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=673340.0, ans=0.125 +2024-09-19 13:23:04,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.03 vs. limit=15.0 +2024-09-19 13:23:05,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=673380.0, ans=0.125 +2024-09-19 13:23:05,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=673380.0, ans=0.0 +2024-09-19 13:23:07,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=673380.0, ans=0.0 +2024-09-19 13:23:45,776 INFO [train.py:1198] (0/2) Epoch 38, batch 950, loss[loss=0.2174, ctc_loss=0.09595, cr_loss=0.3119, attn_decoder_loss=0.2239, over 29501.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1137, cr_loss=0.3531, attn_decoder_loss=0.2392, over 5740486.15 frames. ], batch size: 74, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:23:53,471 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:24:15,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=673580.0, ans=0.0 +2024-09-19 13:24:53,674 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.808e+01 9.253e+01 1.008e+02 2.662e+02, threshold=1.851e+02, percent-clipped=5.0 +2024-09-19 13:25:04,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.18 vs. limit=15.0 +2024-09-19 13:25:06,339 INFO [train.py:1198] (0/2) Epoch 38, batch 1000, loss[loss=0.2256, ctc_loss=0.1083, cr_loss=0.3608, attn_decoder_loss=0.2307, over 29510.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1149, cr_loss=0.3555, attn_decoder_loss=0.24, over 5735873.73 frames. ], batch size: 77, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:25:08,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=673700.0, ans=0.125 +2024-09-19 13:25:17,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=673700.0, ans=0.125 +2024-09-19 13:25:30,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=673740.0, ans=0.1 +2024-09-19 13:25:38,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=673780.0, ans=0.2 +2024-09-19 13:25:58,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=673820.0, ans=0.2 +2024-09-19 13:26:20,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=673900.0, ans=0.0 +2024-09-19 13:26:21,866 INFO [train.py:1198] (0/2) Epoch 38, batch 1050, loss[loss=0.2408, ctc_loss=0.1047, cr_loss=0.3265, attn_decoder_loss=0.2487, over 29700.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3548, attn_decoder_loss=0.2395, over 5745049.54 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:26:26,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=673900.0, ans=0.1 +2024-09-19 13:26:58,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=673980.0, ans=0.125 +2024-09-19 13:27:10,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=674020.0, ans=0.125 +2024-09-19 13:27:27,109 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.465e+01 8.973e+01 9.470e+01 1.777e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-19 13:27:37,782 INFO [train.py:1198] (0/2) Epoch 38, batch 1100, loss[loss=0.2258, ctc_loss=0.1076, cr_loss=0.3217, attn_decoder_loss=0.2318, over 29439.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1144, cr_loss=0.354, attn_decoder_loss=0.2394, over 5757461.78 frames. ], batch size: 78, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:28:08,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=674180.0, ans=0.125 +2024-09-19 13:28:11,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=674180.0, ans=10.0 +2024-09-19 13:28:24,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.whiten.whitening_limit, batch_count=674220.0, ans=12.0 +2024-09-19 13:28:28,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=674220.0, ans=0.0 +2024-09-19 13:28:28,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=674220.0, ans=0.2 +2024-09-19 13:28:42,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=674260.0, ans=0.125 +2024-09-19 13:28:58,021 INFO [train.py:1198] (0/2) Epoch 38, batch 1150, loss[loss=0.2145, ctc_loss=0.101, cr_loss=0.3352, attn_decoder_loss=0.2197, over 29454.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1147, cr_loss=0.3546, attn_decoder_loss=0.2394, over 5755046.51 frames. ], batch size: 78, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:29:05,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=674300.0, ans=0.125 +2024-09-19 13:29:13,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=674340.0, ans=0.0 +2024-09-19 13:29:21,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=15.73 vs. limit=15.0 +2024-09-19 13:29:27,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=674380.0, ans=0.1 +2024-09-19 13:29:39,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=674380.0, ans=0.125 +2024-09-19 13:30:03,761 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.627e+01 8.613e+01 9.064e+01 9.591e+01 1.895e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 13:30:07,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.70 vs. limit=15.0 +2024-09-19 13:30:08,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=674460.0, ans=0.125 +2024-09-19 13:30:13,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=674500.0, ans=0.2 +2024-09-19 13:30:14,471 INFO [train.py:1198] (0/2) Epoch 38, batch 1200, loss[loss=0.2365, ctc_loss=0.108, cr_loss=0.3484, attn_decoder_loss=0.243, over 29675.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1148, cr_loss=0.3555, attn_decoder_loss=0.2399, over 5747484.42 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:30:16,267 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:30:19,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=674500.0, ans=0.125 +2024-09-19 13:30:28,397 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:30:40,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=674540.0, ans=0.1 +2024-09-19 13:31:12,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=674620.0, ans=0.025 +2024-09-19 13:31:20,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-19 13:31:29,825 INFO [train.py:1198] (0/2) Epoch 38, batch 1250, loss[loss=0.252, ctc_loss=0.1333, cr_loss=0.3997, attn_decoder_loss=0.2564, over 29566.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3571, attn_decoder_loss=0.2405, over 5775409.25 frames. ], batch size: 92, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:31:36,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=674700.0, ans=0.125 +2024-09-19 13:31:50,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=674740.0, ans=0.0 +2024-09-19 13:32:19,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=674820.0, ans=0.025 +2024-09-19 13:32:20,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=674820.0, ans=10.0 +2024-09-19 13:32:22,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=674820.0, ans=0.2 +2024-09-19 13:32:30,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=674820.0, ans=0.2 +2024-09-19 13:32:30,752 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.82 vs. limit=15.0 +2024-09-19 13:32:35,129 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-19 13:32:37,392 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.525e+01 9.083e+01 9.622e+01 1.847e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 13:32:46,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.52 vs. limit=22.5 +2024-09-19 13:32:50,183 INFO [train.py:1198] (0/2) Epoch 38, batch 1300, loss[loss=0.2426, ctc_loss=0.1205, cr_loss=0.365, attn_decoder_loss=0.2481, over 28068.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1151, cr_loss=0.3568, attn_decoder_loss=0.24, over 5780000.71 frames. ], batch size: 111, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:32:52,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.51 vs. limit=6.0 +2024-09-19 13:32:58,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=674900.0, ans=0.125 +2024-09-19 13:33:01,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=674900.0, ans=0.125 +2024-09-19 13:33:07,668 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.70 vs. limit=15.0 +2024-09-19 13:33:13,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=674940.0, ans=0.125 +2024-09-19 13:33:22,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=674980.0, ans=0.125 +2024-09-19 13:33:43,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=675020.0, ans=0.125 +2024-09-19 13:33:50,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=675060.0, ans=0.125 +2024-09-19 13:33:52,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=675060.0, ans=0.1 +2024-09-19 13:33:55,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=675060.0, ans=0.2 +2024-09-19 13:34:05,850 INFO [train.py:1198] (0/2) Epoch 38, batch 1350, loss[loss=0.2423, ctc_loss=0.1186, cr_loss=0.3637, attn_decoder_loss=0.248, over 29739.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1145, cr_loss=0.3555, attn_decoder_loss=0.2398, over 5797462.69 frames. ], batch size: 81, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:34:22,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=675140.0, ans=0.1 +2024-09-19 13:34:23,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=675140.0, ans=0.2 +2024-09-19 13:34:25,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=675140.0, ans=0.125 +2024-09-19 13:34:40,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=675180.0, ans=0.0 +2024-09-19 13:35:04,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=675260.0, ans=0.0 +2024-09-19 13:35:11,895 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.540e+01 8.958e+01 9.553e+01 1.189e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 13:35:20,937 INFO [train.py:1198] (0/2) Epoch 38, batch 1400, loss[loss=0.2073, ctc_loss=0.09708, cr_loss=0.3149, attn_decoder_loss=0.2125, over 29600.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1146, cr_loss=0.3559, attn_decoder_loss=0.2397, over 5808010.84 frames. ], batch size: 69, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:35:24,381 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:35:34,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=675340.0, ans=0.125 +2024-09-19 13:36:02,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=675380.0, ans=0.125 +2024-09-19 13:36:08,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=675420.0, ans=0.2 +2024-09-19 13:36:08,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=675420.0, ans=0.125 +2024-09-19 13:36:13,062 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.93 vs. limit=15.0 +2024-09-19 13:36:14,536 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.31 vs. limit=15.0 +2024-09-19 13:36:23,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=675460.0, ans=0.025 +2024-09-19 13:36:40,630 INFO [train.py:1198] (0/2) Epoch 38, batch 1450, loss[loss=0.2478, ctc_loss=0.1235, cr_loss=0.3706, attn_decoder_loss=0.2534, over 29449.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1146, cr_loss=0.3557, attn_decoder_loss=0.2403, over 5804702.87 frames. ], batch size: 94, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:36:52,984 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:37:09,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=675580.0, ans=0.0 +2024-09-19 13:37:15,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=675580.0, ans=0.125 +2024-09-19 13:37:27,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=675620.0, ans=0.125 +2024-09-19 13:37:41,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=675660.0, ans=0.125 +2024-09-19 13:37:46,830 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.618e+01 9.172e+01 9.928e+01 3.328e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-19 13:37:51,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=675660.0, ans=0.0 +2024-09-19 13:37:56,071 INFO [train.py:1198] (0/2) Epoch 38, batch 1500, loss[loss=0.245, ctc_loss=0.1234, cr_loss=0.3724, attn_decoder_loss=0.2502, over 29596.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1149, cr_loss=0.3561, attn_decoder_loss=0.2405, over 5804829.21 frames. ], batch size: 86, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:37:59,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=675700.0, ans=0.1 +2024-09-19 13:38:25,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=675780.0, ans=0.125 +2024-09-19 13:38:43,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=675820.0, ans=0.1 +2024-09-19 13:38:49,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=675820.0, ans=0.125 +2024-09-19 13:39:07,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=675860.0, ans=0.1 +2024-09-19 13:39:09,962 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.17 vs. limit=15.0 +2024-09-19 13:39:11,913 INFO [train.py:1198] (0/2) Epoch 38, batch 1550, loss[loss=0.2582, ctc_loss=0.1299, cr_loss=0.3778, attn_decoder_loss=0.2641, over 29521.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1148, cr_loss=0.3559, attn_decoder_loss=0.2402, over 5781130.60 frames. ], batch size: 90, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:39:16,237 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-19 13:39:30,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=675940.0, ans=0.125 +2024-09-19 13:39:33,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=675940.0, ans=0.125 +2024-09-19 13:39:35,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=675940.0, ans=0.2 +2024-09-19 13:39:57,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=676020.0, ans=0.2 +2024-09-19 13:40:20,844 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.489e+01 9.048e+01 9.769e+01 3.941e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 13:40:28,183 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.90 vs. limit=15.0 +2024-09-19 13:40:32,003 INFO [train.py:1198] (0/2) Epoch 38, batch 1600, loss[loss=0.2376, ctc_loss=0.1065, cr_loss=0.3261, attn_decoder_loss=0.245, over 29696.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1146, cr_loss=0.355, attn_decoder_loss=0.2399, over 5763915.38 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:40:33,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=676100.0, ans=0.125 +2024-09-19 13:40:33,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=676100.0, ans=0.125 +2024-09-19 13:40:47,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=676140.0, ans=0.125 +2024-09-19 13:40:56,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=676140.0, ans=0.0 +2024-09-19 13:41:17,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=676220.0, ans=0.125 +2024-09-19 13:41:31,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=676260.0, ans=0.0 +2024-09-19 13:41:34,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=676260.0, ans=0.125 +2024-09-19 13:41:47,496 INFO [train.py:1198] (0/2) Epoch 38, batch 1650, loss[loss=0.2407, ctc_loss=0.1104, cr_loss=0.3314, attn_decoder_loss=0.2478, over 29699.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1141, cr_loss=0.3542, attn_decoder_loss=0.2397, over 5758724.73 frames. ], batch size: 89, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:41:49,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.36 vs. limit=22.5 +2024-09-19 13:42:10,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=676340.0, ans=0.125 +2024-09-19 13:42:11,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=676340.0, ans=0.0 +2024-09-19 13:42:14,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=676340.0, ans=0.125 +2024-09-19 13:42:28,439 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:42:38,117 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.12 vs. limit=15.0 +2024-09-19 13:42:49,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=676460.0, ans=0.125 +2024-09-19 13:42:50,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=676460.0, ans=0.2 +2024-09-19 13:42:53,545 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.620e+01 8.638e+01 9.232e+01 9.728e+01 1.403e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-19 13:43:00,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.69 vs. limit=15.0 +2024-09-19 13:43:02,488 INFO [train.py:1198] (0/2) Epoch 38, batch 1700, loss[loss=0.215, ctc_loss=0.1022, cr_loss=0.3311, attn_decoder_loss=0.2201, over 29579.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3545, attn_decoder_loss=0.2397, over 5780264.06 frames. ], batch size: 69, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:43:04,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=676500.0, ans=0.025 +2024-09-19 13:43:16,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=676540.0, ans=0.0 +2024-09-19 13:43:22,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=676540.0, ans=0.0 +2024-09-19 13:43:33,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=676580.0, ans=0.125 +2024-09-19 13:43:42,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=676580.0, ans=0.1 +2024-09-19 13:43:43,025 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.82 vs. limit=15.0 +2024-09-19 13:44:22,574 INFO [train.py:1198] (0/2) Epoch 38, batch 1750, loss[loss=0.2134, ctc_loss=0.1006, cr_loss=0.325, attn_decoder_loss=0.2187, over 29380.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.3543, attn_decoder_loss=0.2395, over 5788566.77 frames. ], batch size: 67, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:44:30,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=676700.0, ans=0.125 +2024-09-19 13:44:30,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=676700.0, ans=0.125 +2024-09-19 13:44:32,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=676700.0, ans=0.125 +2024-09-19 13:44:44,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=676740.0, ans=0.125 +2024-09-19 13:45:04,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=676780.0, ans=0.95 +2024-09-19 13:45:19,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=676820.0, ans=0.1 +2024-09-19 13:45:30,731 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.567e+01 9.147e+01 9.670e+01 2.287e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 13:45:37,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=676900.0, ans=0.0 +2024-09-19 13:45:38,185 INFO [train.py:1198] (0/2) Epoch 38, batch 1800, loss[loss=0.2406, ctc_loss=0.1222, cr_loss=0.3721, attn_decoder_loss=0.2455, over 29697.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1145, cr_loss=0.3554, attn_decoder_loss=0.24, over 5789591.42 frames. ], batch size: 83, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:46:14,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=676980.0, ans=0.125 +2024-09-19 13:46:19,951 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.89 vs. limit=6.0 +2024-09-19 13:46:29,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=677020.0, ans=0.0 +2024-09-19 13:46:41,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=677060.0, ans=0.0 +2024-09-19 13:46:46,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=677060.0, ans=0.0 +2024-09-19 13:46:49,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=677060.0, ans=0.1 +2024-09-19 13:46:52,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=677100.0, ans=0.125 +2024-09-19 13:46:52,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=677100.0, ans=0.2 +2024-09-19 13:46:53,833 INFO [train.py:1198] (0/2) Epoch 38, batch 1850, loss[loss=0.2422, ctc_loss=0.1155, cr_loss=0.3549, attn_decoder_loss=0.2484, over 29642.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1142, cr_loss=0.3548, attn_decoder_loss=0.2397, over 5796514.82 frames. ], batch size: 86, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:46:55,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=677100.0, ans=0.2 +2024-09-19 13:47:03,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.65 vs. limit=10.0 +2024-09-19 13:47:11,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-19 13:47:33,833 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.32 vs. limit=12.0 +2024-09-19 13:47:34,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=677180.0, ans=0.1 +2024-09-19 13:47:38,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.78 vs. limit=15.0 +2024-09-19 13:47:57,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=677260.0, ans=0.025 +2024-09-19 13:48:01,725 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.336e+01 8.507e+01 9.088e+01 9.545e+01 1.586e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 13:48:10,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=677260.0, ans=0.125 +2024-09-19 13:48:13,524 INFO [train.py:1198] (0/2) Epoch 38, batch 1900, loss[loss=0.2452, ctc_loss=0.1148, cr_loss=0.3625, attn_decoder_loss=0.2517, over 29697.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1144, cr_loss=0.3553, attn_decoder_loss=0.2401, over 5803853.61 frames. ], batch size: 89, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:49:05,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=677420.0, ans=0.125 +2024-09-19 13:49:11,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=677420.0, ans=0.125 +2024-09-19 13:49:15,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=677460.0, ans=0.125 +2024-09-19 13:49:29,173 INFO [train.py:1198] (0/2) Epoch 38, batch 1950, loss[loss=0.2301, ctc_loss=0.1103, cr_loss=0.3525, attn_decoder_loss=0.2356, over 29448.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1147, cr_loss=0.3559, attn_decoder_loss=0.2411, over 5818387.96 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:49:32,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=677500.0, ans=0.0 +2024-09-19 13:49:50,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=677540.0, ans=0.125 +2024-09-19 13:49:57,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.96 vs. limit=22.5 +2024-09-19 13:50:10,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=677580.0, ans=0.0 +2024-09-19 13:50:17,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.99 vs. limit=10.0 +2024-09-19 13:50:18,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=677620.0, ans=0.125 +2024-09-19 13:50:35,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.93 vs. limit=15.0 +2024-09-19 13:50:37,363 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.623e+01 9.104e+01 9.387e+01 1.434e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-19 13:50:37,767 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:50:44,929 INFO [train.py:1198] (0/2) Epoch 38, batch 2000, loss[loss=0.217, ctc_loss=0.1072, cr_loss=0.338, attn_decoder_loss=0.2216, over 29337.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1152, cr_loss=0.3571, attn_decoder_loss=0.2416, over 5795490.13 frames. ], batch size: 67, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:50:56,644 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.09 vs. limit=22.5 +2024-09-19 13:51:12,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=677740.0, ans=0.125 +2024-09-19 13:51:18,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=677780.0, ans=0.0 +2024-09-19 13:51:35,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=677820.0, ans=0.1 +2024-09-19 13:51:38,750 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.58 vs. limit=15.0 +2024-09-19 13:51:42,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=677820.0, ans=0.2 +2024-09-19 13:51:47,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.53 vs. limit=22.5 +2024-09-19 13:52:01,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=677860.0, ans=0.125 +2024-09-19 13:52:04,795 INFO [train.py:1198] (0/2) Epoch 38, batch 2050, loss[loss=0.2199, ctc_loss=0.1035, cr_loss=0.3302, attn_decoder_loss=0.2255, over 29422.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1149, cr_loss=0.3568, attn_decoder_loss=0.2407, over 5787614.44 frames. ], batch size: 70, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:52:09,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=677900.0, ans=0.0 +2024-09-19 13:52:38,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=677980.0, ans=0.125 +2024-09-19 13:52:52,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=678020.0, ans=0.0 +2024-09-19 13:52:59,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=678020.0, ans=0.2 +2024-09-19 13:53:04,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=678060.0, ans=0.125 +2024-09-19 13:53:12,679 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.512e+01 8.454e+01 8.907e+01 9.620e+01 4.678e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-19 13:53:20,343 INFO [train.py:1198] (0/2) Epoch 38, batch 2100, loss[loss=0.2374, ctc_loss=0.1202, cr_loss=0.3572, attn_decoder_loss=0.2425, over 29752.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1145, cr_loss=0.3561, attn_decoder_loss=0.2401, over 5798534.38 frames. ], batch size: 81, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:53:20,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=678100.0, ans=10.0 +2024-09-19 13:53:28,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=678100.0, ans=0.125 +2024-09-19 13:54:13,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=678220.0, ans=0.2 +2024-09-19 13:54:14,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=678220.0, ans=0.2 +2024-09-19 13:54:35,527 INFO [train.py:1198] (0/2) Epoch 38, batch 2150, loss[loss=0.2488, ctc_loss=0.134, cr_loss=0.4185, attn_decoder_loss=0.2522, over 29471.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1139, cr_loss=0.3552, attn_decoder_loss=0.2395, over 5813553.91 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:54:40,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten.whitening_limit, batch_count=678300.0, ans=15.0 +2024-09-19 13:54:42,731 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.25 vs. limit=15.0 +2024-09-19 13:55:10,030 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.41 vs. limit=12.0 +2024-09-19 13:55:14,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=678380.0, ans=0.2 +2024-09-19 13:55:21,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.02 vs. limit=12.0 +2024-09-19 13:55:27,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=678420.0, ans=0.125 +2024-09-19 13:55:44,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=678460.0, ans=0.125 +2024-09-19 13:55:45,741 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.586e+01 8.959e+01 9.597e+01 2.666e+02, threshold=1.792e+02, percent-clipped=1.0 +2024-09-19 13:55:50,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=678500.0, ans=0.0 +2024-09-19 13:55:53,921 INFO [train.py:1198] (0/2) Epoch 38, batch 2200, loss[loss=0.2337, ctc_loss=0.1087, cr_loss=0.325, attn_decoder_loss=0.2404, over 29641.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1142, cr_loss=0.3555, attn_decoder_loss=0.2394, over 5810635.10 frames. ], batch size: 86, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:56:10,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.33 vs. limit=10.0 +2024-09-19 13:56:25,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=678580.0, ans=0.125 +2024-09-19 13:56:32,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=678580.0, ans=0.125 +2024-09-19 13:56:34,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=678580.0, ans=0.2 +2024-09-19 13:56:44,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=678620.0, ans=0.125 +2024-09-19 13:56:53,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=678620.0, ans=0.2 +2024-09-19 13:56:55,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.60 vs. limit=22.5 +2024-09-19 13:56:57,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=678660.0, ans=0.0 +2024-09-19 13:57:11,729 INFO [train.py:1198] (0/2) Epoch 38, batch 2250, loss[loss=0.2461, ctc_loss=0.1223, cr_loss=0.3582, attn_decoder_loss=0.2519, over 29730.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1142, cr_loss=0.3554, attn_decoder_loss=0.2396, over 5810191.29 frames. ], batch size: 82, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:57:59,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=678820.0, ans=0.2 +2024-09-19 13:58:20,713 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.565e+01 9.062e+01 9.644e+01 4.463e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-19 13:58:22,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=678860.0, ans=0.125 +2024-09-19 13:58:23,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-19 13:58:26,895 INFO [train.py:1198] (0/2) Epoch 38, batch 2300, loss[loss=0.2126, ctc_loss=0.09438, cr_loss=0.3165, attn_decoder_loss=0.2187, over 29341.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1137, cr_loss=0.3541, attn_decoder_loss=0.2387, over 5797299.42 frames. ], batch size: 71, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:58:49,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=678940.0, ans=0.2 +2024-09-19 13:59:06,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=678980.0, ans=0.1 +2024-09-19 13:59:13,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=679020.0, ans=0.125 +2024-09-19 13:59:23,197 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:59:25,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.37 vs. limit=15.0 +2024-09-19 13:59:42,413 INFO [train.py:1198] (0/2) Epoch 38, batch 2350, loss[loss=0.242, ctc_loss=0.1185, cr_loss=0.3691, attn_decoder_loss=0.2475, over 29699.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1137, cr_loss=0.3542, attn_decoder_loss=0.2389, over 5803314.89 frames. ], batch size: 83, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:00:04,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=679140.0, ans=0.1 +2024-09-19 14:00:09,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=679140.0, ans=0.125 +2024-09-19 14:00:26,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=679180.0, ans=0.0 +2024-09-19 14:00:27,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=679180.0, ans=0.125 +2024-09-19 14:00:30,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=679220.0, ans=0.07 +2024-09-19 14:00:39,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=679220.0, ans=0.1 +2024-09-19 14:00:56,208 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.471e+01 8.979e+01 9.530e+01 2.043e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 14:01:02,398 INFO [train.py:1198] (0/2) Epoch 38, batch 2400, loss[loss=0.2361, ctc_loss=0.1166, cr_loss=0.3679, attn_decoder_loss=0.2412, over 29535.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1137, cr_loss=0.3544, attn_decoder_loss=0.2393, over 5807660.08 frames. ], batch size: 76, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 14:01:04,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=679300.0, ans=0.0 +2024-09-19 14:01:32,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=679380.0, ans=0.025 +2024-09-19 14:01:54,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=679420.0, ans=0.125 +2024-09-19 14:01:55,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=679420.0, ans=0.2 +2024-09-19 14:02:10,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=679460.0, ans=0.125 +2024-09-19 14:02:12,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=679460.0, ans=0.025 +2024-09-19 14:02:18,229 INFO [train.py:1198] (0/2) Epoch 38, batch 2450, loss[loss=0.2364, ctc_loss=0.1185, cr_loss=0.3739, attn_decoder_loss=0.2412, over 29713.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1144, cr_loss=0.3553, attn_decoder_loss=0.2402, over 5785240.05 frames. ], batch size: 82, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:02:29,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.46 vs. limit=15.0 +2024-09-19 14:02:31,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=679540.0, ans=0.09899494936611666 +2024-09-19 14:02:33,840 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-19 14:03:09,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=679620.0, ans=0.0 +2024-09-19 14:03:12,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=679620.0, ans=0.125 +2024-09-19 14:03:28,797 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.520e+01 8.981e+01 9.531e+01 3.262e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 14:03:35,430 INFO [train.py:1198] (0/2) Epoch 38, batch 2500, loss[loss=0.2478, ctc_loss=0.1216, cr_loss=0.3605, attn_decoder_loss=0.2538, over 29621.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1144, cr_loss=0.3551, attn_decoder_loss=0.2403, over 5796223.76 frames. ], batch size: 86, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:03:45,163 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.17 vs. limit=6.0 +2024-09-19 14:03:50,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.00 vs. limit=22.5 +2024-09-19 14:04:29,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=679820.0, ans=0.0 +2024-09-19 14:04:48,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=679860.0, ans=0.1 +2024-09-19 14:04:53,319 INFO [train.py:1198] (0/2) Epoch 38, batch 2550, loss[loss=0.2029, ctc_loss=0.0925, cr_loss=0.2932, attn_decoder_loss=0.2087, over 29357.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1141, cr_loss=0.3544, attn_decoder_loss=0.2402, over 5798274.98 frames. ], batch size: 67, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:04:59,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=679900.0, ans=0.125 +2024-09-19 14:05:08,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=679940.0, ans=0.95 +2024-09-19 14:05:25,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=679980.0, ans=0.125 +2024-09-19 14:05:49,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=680020.0, ans=0.125 +2024-09-19 14:05:56,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=680060.0, ans=0.1 +2024-09-19 14:06:04,812 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.447e+01 9.059e+01 9.443e+01 1.451e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 14:06:05,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=680060.0, ans=0.125 +2024-09-19 14:06:09,435 INFO [train.py:1198] (0/2) Epoch 38, batch 2600, loss[loss=0.2307, ctc_loss=0.1146, cr_loss=0.3546, attn_decoder_loss=0.2357, over 29478.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1142, cr_loss=0.3548, attn_decoder_loss=0.2403, over 5795629.45 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:06:14,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=680100.0, ans=0.025 +2024-09-19 14:06:17,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=680100.0, ans=0.025 +2024-09-19 14:06:18,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=680100.0, ans=0.1 +2024-09-19 14:06:23,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.23 vs. limit=22.5 +2024-09-19 14:06:27,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=680140.0, ans=0.125 +2024-09-19 14:06:30,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=680140.0, ans=0.1 +2024-09-19 14:06:53,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=680220.0, ans=0.2 +2024-09-19 14:06:59,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=680220.0, ans=0.0 +2024-09-19 14:07:17,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.19 vs. limit=15.0 +2024-09-19 14:07:26,864 INFO [train.py:1198] (0/2) Epoch 38, batch 2650, loss[loss=0.2502, ctc_loss=0.1269, cr_loss=0.3853, attn_decoder_loss=0.2554, over 29227.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1148, cr_loss=0.3562, attn_decoder_loss=0.2408, over 5801204.94 frames. ], batch size: 100, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:07:36,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=15.0 +2024-09-19 14:08:24,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=680420.0, ans=0.2 +2024-09-19 14:08:32,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=680460.0, ans=0.0 +2024-09-19 14:08:39,404 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.526e+01 9.066e+01 9.711e+01 1.379e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-19 14:08:41,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=680460.0, ans=0.1 +2024-09-19 14:08:44,066 INFO [train.py:1198] (0/2) Epoch 38, batch 2700, loss[loss=0.245, ctc_loss=0.1165, cr_loss=0.3711, attn_decoder_loss=0.251, over 29533.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.115, cr_loss=0.3559, attn_decoder_loss=0.2412, over 5796443.60 frames. ], batch size: 87, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:08:54,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=680500.0, ans=0.2 +2024-09-19 14:09:40,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=680620.0, ans=0.125 +2024-09-19 14:09:46,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.25 vs. limit=15.0 +2024-09-19 14:09:59,320 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.23 vs. limit=6.0 +2024-09-19 14:09:59,762 INFO [train.py:1198] (0/2) Epoch 38, batch 2750, loss[loss=0.2218, ctc_loss=0.1063, cr_loss=0.3461, attn_decoder_loss=0.2269, over 29508.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1143, cr_loss=0.3549, attn_decoder_loss=0.2401, over 5796142.31 frames. ], batch size: 75, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:10:18,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=680740.0, ans=0.125 +2024-09-19 14:10:33,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_na.min_abs, batch_count=680780.0, ans=0.02 +2024-09-19 14:10:37,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.37 vs. limit=10.0 +2024-09-19 14:10:39,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=680780.0, ans=0.1 +2024-09-19 14:10:47,185 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=680820.0, ans=0.125 +2024-09-19 14:10:53,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=680820.0, ans=0.125 +2024-09-19 14:11:13,461 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.135e+01 8.653e+01 9.096e+01 9.746e+01 4.436e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 14:11:18,100 INFO [train.py:1198] (0/2) Epoch 38, batch 2800, loss[loss=0.2559, ctc_loss=0.1486, cr_loss=0.3974, attn_decoder_loss=0.259, over 20260.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1148, cr_loss=0.3557, attn_decoder_loss=0.2403, over 5777104.63 frames. ], batch size: 210, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 14:11:18,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=680900.0, ans=0.125 +2024-09-19 14:11:21,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=680900.0, ans=0.125 +2024-09-19 14:12:07,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=681020.0, ans=10.0 +2024-09-19 14:12:17,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=681020.0, ans=0.0 +2024-09-19 14:12:22,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=681060.0, ans=0.0 +2024-09-19 14:12:26,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.98 vs. limit=22.5 +2024-09-19 14:12:32,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=681060.0, ans=0.0 +2024-09-19 14:12:35,543 INFO [train.py:1198] (0/2) Epoch 38, batch 2850, loss[loss=0.2352, ctc_loss=0.1148, cr_loss=0.364, attn_decoder_loss=0.2405, over 29498.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1151, cr_loss=0.3561, attn_decoder_loss=0.2405, over 5762478.20 frames. ], batch size: 77, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:12:38,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=681100.0, ans=0.125 +2024-09-19 14:12:58,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=681140.0, ans=0.125 +2024-09-19 14:13:27,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=681220.0, ans=0.125 +2024-09-19 14:13:47,916 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.614e+01 9.082e+01 1.001e+02 4.152e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 14:13:50,898 INFO [train.py:1198] (0/2) Epoch 38, batch 2900, loss[loss=0.2297, ctc_loss=0.1127, cr_loss=0.351, attn_decoder_loss=0.2349, over 29435.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.116, cr_loss=0.3584, attn_decoder_loss=0.2415, over 5787965.61 frames. ], batch size: 79, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:14:09,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=681340.0, ans=0.5 +2024-09-19 14:14:11,304 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.89 vs. limit=6.0 +2024-09-19 14:14:37,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=681420.0, ans=0.07 +2024-09-19 14:14:39,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1.whitening_limit, batch_count=681420.0, ans=10.0 +2024-09-19 14:15:08,291 INFO [train.py:1198] (0/2) Epoch 38, batch 2950, loss[loss=0.2299, ctc_loss=0.1219, cr_loss=0.3711, attn_decoder_loss=0.2337, over 29518.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.115, cr_loss=0.3563, attn_decoder_loss=0.2402, over 5781503.71 frames. ], batch size: 75, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:15:08,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=681500.0, ans=0.07 +2024-09-19 14:15:29,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=681540.0, ans=0.125 +2024-09-19 14:15:39,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=681580.0, ans=0.125 +2024-09-19 14:15:42,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.56 vs. limit=22.5 +2024-09-19 14:15:55,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.11 vs. limit=15.0 +2024-09-19 14:15:59,915 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-19 14:16:16,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=681660.0, ans=0.1 +2024-09-19 14:16:20,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=681660.0, ans=0.125 +2024-09-19 14:16:23,458 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.275e+01 8.330e+01 8.968e+01 9.568e+01 1.287e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 14:16:26,715 INFO [train.py:1198] (0/2) Epoch 38, batch 3000, loss[loss=0.2365, ctc_loss=0.1127, cr_loss=0.355, attn_decoder_loss=0.2423, over 29766.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1148, cr_loss=0.3557, attn_decoder_loss=0.2401, over 5782271.67 frames. ], batch size: 81, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:16:26,716 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 14:16:45,081 INFO [train.py:1230] (0/2) Epoch 38, validation: loss=0.2118, ctc_loss=0.03653, cr_loss=5.871e-15, attn_decoder_loss=0.2312, over 944034.00 frames. +2024-09-19 14:16:45,082 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 14:16:47,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.41 vs. limit=15.0 +2024-09-19 14:16:48,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=681700.0, ans=0.1 +2024-09-19 14:16:50,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=681700.0, ans=0.2 +2024-09-19 14:17:06,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=681740.0, ans=0.0 +2024-09-19 14:17:08,910 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.03 vs. limit=10.0 +2024-09-19 14:17:17,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=681780.0, ans=0.125 +2024-09-19 14:17:23,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=681780.0, ans=0.1 +2024-09-19 14:17:37,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=681820.0, ans=0.125 +2024-09-19 14:17:38,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=681820.0, ans=0.125 +2024-09-19 14:17:39,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=681820.0, ans=0.125 +2024-09-19 14:17:40,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=681820.0, ans=0.1 +2024-09-19 14:17:41,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=681820.0, ans=0.125 +2024-09-19 14:17:47,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=681860.0, ans=0.125 +2024-09-19 14:18:00,867 INFO [train.py:1198] (0/2) Epoch 38, batch 3050, loss[loss=0.2205, ctc_loss=0.1002, cr_loss=0.3334, attn_decoder_loss=0.2264, over 29546.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1144, cr_loss=0.355, attn_decoder_loss=0.2404, over 5776648.11 frames. ], batch size: 76, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:18:03,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.43 vs. limit=22.5 +2024-09-19 14:18:09,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.85 vs. limit=15.0 +2024-09-19 14:18:11,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=681900.0, ans=0.0 +2024-09-19 14:18:17,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=681940.0, ans=10.0 +2024-09-19 14:18:29,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_na.min_abs, batch_count=681980.0, ans=0.02 +2024-09-19 14:18:52,987 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:19:15,321 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.187e+01 8.484e+01 8.987e+01 9.703e+01 1.967e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 14:19:18,299 INFO [train.py:1198] (0/2) Epoch 38, batch 3100, loss[loss=0.2553, ctc_loss=0.1233, cr_loss=0.3654, attn_decoder_loss=0.2618, over 29272.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.114, cr_loss=0.3539, attn_decoder_loss=0.2399, over 5776135.51 frames. ], batch size: 100, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:19:39,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.03 vs. limit=10.0 +2024-09-19 14:19:47,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=682140.0, ans=0.0 +2024-09-19 14:19:51,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.91 vs. limit=15.0 +2024-09-19 14:20:08,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=682220.0, ans=0.2 +2024-09-19 14:20:32,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.28 vs. limit=8.0 +2024-09-19 14:20:33,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=682260.0, ans=0.125 +2024-09-19 14:20:35,964 INFO [train.py:1198] (0/2) Epoch 38, batch 3150, loss[loss=0.2408, ctc_loss=0.1201, cr_loss=0.3687, attn_decoder_loss=0.246, over 28715.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1142, cr_loss=0.3542, attn_decoder_loss=0.2401, over 5781572.31 frames. ], batch size: 104, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:20:49,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=682340.0, ans=0.0 +2024-09-19 14:21:01,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=682340.0, ans=0.1 +2024-09-19 14:21:25,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=682420.0, ans=0.025 +2024-09-19 14:21:29,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=682420.0, ans=0.125 +2024-09-19 14:21:48,486 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.507e+01 9.169e+01 9.644e+01 2.178e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-19 14:21:51,677 INFO [train.py:1198] (0/2) Epoch 38, batch 3200, loss[loss=0.2236, ctc_loss=0.1042, cr_loss=0.3355, attn_decoder_loss=0.2294, over 29401.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1139, cr_loss=0.3537, attn_decoder_loss=0.2397, over 5791801.45 frames. ], batch size: 79, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:21:54,298 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.36 vs. limit=10.0 +2024-09-19 14:22:01,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=682500.0, ans=0.125 +2024-09-19 14:22:02,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=682500.0, ans=0.125 +2024-09-19 14:22:06,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=682540.0, ans=0.07 +2024-09-19 14:22:22,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=682580.0, ans=0.125 +2024-09-19 14:22:22,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.32 vs. limit=15.0 +2024-09-19 14:22:41,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=682620.0, ans=0.07 +2024-09-19 14:23:09,410 INFO [train.py:1198] (0/2) Epoch 38, batch 3250, loss[loss=0.2443, ctc_loss=0.1203, cr_loss=0.3774, attn_decoder_loss=0.2497, over 29690.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1141, cr_loss=0.3549, attn_decoder_loss=0.2399, over 5797941.35 frames. ], batch size: 84, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:23:09,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=682700.0, ans=0.2 +2024-09-19 14:23:11,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=682700.0, ans=0.025 +2024-09-19 14:23:14,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=682700.0, ans=0.0 +2024-09-19 14:23:15,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=682700.0, ans=0.125 +2024-09-19 14:23:28,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=682740.0, ans=0.0 +2024-09-19 14:23:38,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=682740.0, ans=0.05 +2024-09-19 14:23:50,824 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:23:50,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=682780.0, ans=0.125 +2024-09-19 14:23:51,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.53 vs. limit=12.0 +2024-09-19 14:23:55,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=682820.0, ans=0.025 +2024-09-19 14:24:01,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=682820.0, ans=0.125 +2024-09-19 14:24:14,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=682860.0, ans=0.1 +2024-09-19 14:24:23,686 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.537e+01 9.091e+01 9.701e+01 1.814e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 14:24:26,798 INFO [train.py:1198] (0/2) Epoch 38, batch 3300, loss[loss=0.2534, ctc_loss=0.1194, cr_loss=0.3615, attn_decoder_loss=0.2602, over 28471.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1132, cr_loss=0.3522, attn_decoder_loss=0.2386, over 5795264.04 frames. ], batch size: 112, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:24:28,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=682900.0, ans=0.2 +2024-09-19 14:24:34,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=682900.0, ans=0.2 +2024-09-19 14:24:47,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=682940.0, ans=10.0 +2024-09-19 14:25:26,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=683060.0, ans=0.0 +2024-09-19 14:25:33,566 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:25:42,127 INFO [train.py:1198] (0/2) Epoch 38, batch 3350, loss[loss=0.257, ctc_loss=0.1288, cr_loss=0.3972, attn_decoder_loss=0.2624, over 28795.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1138, cr_loss=0.3534, attn_decoder_loss=0.2395, over 5772681.99 frames. ], batch size: 104, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:25:51,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=683100.0, ans=0.0 +2024-09-19 14:25:53,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.58 vs. limit=6.0 +2024-09-19 14:26:11,996 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.88 vs. limit=10.0 +2024-09-19 14:26:42,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=683220.0, ans=0.125 +2024-09-19 14:26:51,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=683260.0, ans=0.125 +2024-09-19 14:26:57,159 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.721e+01 9.366e+01 9.877e+01 4.380e+02, threshold=1.873e+02, percent-clipped=1.0 +2024-09-19 14:27:00,141 INFO [train.py:1198] (0/2) Epoch 38, batch 3400, loss[loss=0.2029, ctc_loss=0.09145, cr_loss=0.2948, attn_decoder_loss=0.2088, over 29360.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1142, cr_loss=0.3534, attn_decoder_loss=0.2395, over 5764779.76 frames. ], batch size: 67, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:27:03,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=683300.0, ans=0.1 +2024-09-19 14:27:09,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=683300.0, ans=0.1 +2024-09-19 14:27:13,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=683300.0, ans=0.0 +2024-09-19 14:27:13,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=683300.0, ans=0.125 +2024-09-19 14:27:17,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=683340.0, ans=0.125 +2024-09-19 14:27:25,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=683340.0, ans=0.07 +2024-09-19 14:27:40,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=683380.0, ans=0.1 +2024-09-19 14:28:10,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=683460.0, ans=0.125 +2024-09-19 14:28:13,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=683460.0, ans=0.125 +2024-09-19 14:28:16,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=683500.0, ans=0.0 +2024-09-19 14:28:17,521 INFO [train.py:1198] (0/2) Epoch 38, batch 3450, loss[loss=0.2352, ctc_loss=0.1097, cr_loss=0.3399, attn_decoder_loss=0.2416, over 28336.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1144, cr_loss=0.3545, attn_decoder_loss=0.24, over 5772652.58 frames. ], batch size: 111, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:28:20,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=683500.0, ans=0.125 +2024-09-19 14:28:23,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=683500.0, ans=0.0 +2024-09-19 14:28:57,282 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:29:14,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=683620.0, ans=0.125 +2024-09-19 14:29:17,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=683660.0, ans=0.2 +2024-09-19 14:29:31,893 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.708e+01 8.608e+01 9.088e+01 9.945e+01 4.659e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 14:29:33,415 INFO [train.py:1198] (0/2) Epoch 38, batch 3500, loss[loss=0.2062, ctc_loss=0.08862, cr_loss=0.2964, attn_decoder_loss=0.2126, over 29345.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3538, attn_decoder_loss=0.2393, over 5774557.55 frames. ], batch size: 71, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:29:39,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=683700.0, ans=0.125 +2024-09-19 14:29:42,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=683700.0, ans=0.125 +2024-09-19 14:29:44,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=683700.0, ans=0.125 +2024-09-19 14:29:56,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=683740.0, ans=0.1 +2024-09-19 14:30:03,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=683780.0, ans=0.125 +2024-09-19 14:30:05,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=683780.0, ans=0.125 +2024-09-19 14:30:18,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=683820.0, ans=0.025 +2024-09-19 14:30:21,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=683820.0, ans=0.0 +2024-09-19 14:30:31,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.00 vs. limit=15.0 +2024-09-19 14:30:32,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=683820.0, ans=0.0 +2024-09-19 14:30:40,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=683860.0, ans=0.125 +2024-09-19 14:30:40,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=683860.0, ans=0.0 +2024-09-19 14:30:50,291 INFO [train.py:1198] (0/2) Epoch 38, batch 3550, loss[loss=0.2487, ctc_loss=0.1187, cr_loss=0.383, attn_decoder_loss=0.2546, over 29722.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.3542, attn_decoder_loss=0.2394, over 5781869.47 frames. ], batch size: 89, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:30:50,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=683900.0, ans=0.125 +2024-09-19 14:30:52,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=683900.0, ans=0.125 +2024-09-19 14:31:14,550 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=12.0 +2024-09-19 14:31:21,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=683980.0, ans=0.125 +2024-09-19 14:31:41,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=684020.0, ans=0.125 +2024-09-19 14:31:50,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=684060.0, ans=0.125 +2024-09-19 14:31:54,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=684060.0, ans=0.125 +2024-09-19 14:32:05,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.514e+01 8.990e+01 9.421e+01 1.244e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 14:32:06,681 INFO [train.py:1198] (0/2) Epoch 38, batch 3600, loss[loss=0.2321, ctc_loss=0.1076, cr_loss=0.3511, attn_decoder_loss=0.2381, over 29488.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.114, cr_loss=0.3542, attn_decoder_loss=0.2398, over 5791104.46 frames. ], batch size: 77, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:32:23,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=684140.0, ans=0.1 +2024-09-19 14:32:35,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=684180.0, ans=0.125 +2024-09-19 14:32:59,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=684220.0, ans=0.125 +2024-09-19 14:33:11,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=684260.0, ans=0.125 +2024-09-19 14:33:21,081 INFO [train.py:1198] (0/2) Epoch 38, batch 3650, loss[loss=0.2382, ctc_loss=0.1194, cr_loss=0.375, attn_decoder_loss=0.243, over 29501.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1136, cr_loss=0.3529, attn_decoder_loss=0.239, over 5792125.76 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:33:25,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=684300.0, ans=0.125 +2024-09-19 14:33:37,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=684340.0, ans=0.125 +2024-09-19 14:33:57,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=684380.0, ans=0.1 +2024-09-19 14:33:58,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=684380.0, ans=0.125 +2024-09-19 14:34:01,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=684380.0, ans=0.2 +2024-09-19 14:34:32,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=684460.0, ans=0.125 +2024-09-19 14:34:34,296 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.479e+01 8.884e+01 9.372e+01 5.863e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 14:34:34,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=684500.0, ans=0.07 +2024-09-19 14:34:35,785 INFO [train.py:1198] (0/2) Epoch 38, batch 3700, loss[loss=0.2544, ctc_loss=0.1328, cr_loss=0.4103, attn_decoder_loss=0.2588, over 29711.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1135, cr_loss=0.3533, attn_decoder_loss=0.2392, over 5802941.98 frames. ], batch size: 84, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:34:59,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=684540.0, ans=0.125 +2024-09-19 14:35:01,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=684540.0, ans=0.0 +2024-09-19 14:35:15,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=684580.0, ans=0.125 +2024-09-19 14:35:36,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=684660.0, ans=0.0 +2024-09-19 14:35:39,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=684660.0, ans=0.025 +2024-09-19 14:35:49,914 INFO [train.py:1198] (0/2) Epoch 38, batch 3750, loss[loss=0.2125, ctc_loss=0.09902, cr_loss=0.3135, attn_decoder_loss=0.2181, over 29358.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1135, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5807094.65 frames. ], batch size: 67, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:35:59,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=684700.0, ans=0.125 +2024-09-19 14:36:02,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.64 vs. limit=15.0 +2024-09-19 14:36:27,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=684780.0, ans=0.2 +2024-09-19 14:36:29,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=684780.0, ans=0.07 +2024-09-19 14:37:01,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.33 vs. limit=22.5 +2024-09-19 14:37:04,811 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.242e+01 8.375e+01 8.926e+01 9.574e+01 1.662e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-19 14:37:06,353 INFO [train.py:1198] (0/2) Epoch 38, batch 3800, loss[loss=0.2541, ctc_loss=0.1333, cr_loss=0.4054, attn_decoder_loss=0.2585, over 29619.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1134, cr_loss=0.3528, attn_decoder_loss=0.2388, over 5796603.85 frames. ], batch size: 86, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:37:19,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.98 vs. limit=15.0 +2024-09-19 14:37:25,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=684940.0, ans=0.5 +2024-09-19 14:37:29,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=684940.0, ans=0.2 +2024-09-19 14:37:38,331 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.65 vs. limit=15.0 +2024-09-19 14:37:40,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=684980.0, ans=0.0 +2024-09-19 14:37:56,656 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.93 vs. limit=8.0 +2024-09-19 14:37:58,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=685020.0, ans=0.125 +2024-09-19 14:38:03,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=685020.0, ans=0.0 +2024-09-19 14:38:04,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=685060.0, ans=0.05 +2024-09-19 14:38:07,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=685060.0, ans=0.125 +2024-09-19 14:38:10,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=685060.0, ans=0.0 +2024-09-19 14:38:19,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=685060.0, ans=0.0 +2024-09-19 14:38:22,255 INFO [train.py:1198] (0/2) Epoch 38, batch 3850, loss[loss=0.2475, ctc_loss=0.1171, cr_loss=0.3502, attn_decoder_loss=0.2542, over 29240.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1133, cr_loss=0.3529, attn_decoder_loss=0.2386, over 5811708.57 frames. ], batch size: 100, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:38:38,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.42 vs. limit=15.0 +2024-09-19 14:38:56,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=685180.0, ans=0.125 +2024-09-19 14:39:34,606 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.061e+01 8.495e+01 8.957e+01 9.535e+01 1.173e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 14:39:35,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=685300.0, ans=0.125 +2024-09-19 14:39:36,181 INFO [train.py:1198] (0/2) Epoch 38, batch 3900, loss[loss=0.2423, ctc_loss=0.1163, cr_loss=0.3579, attn_decoder_loss=0.2483, over 29644.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1136, cr_loss=0.3536, attn_decoder_loss=0.239, over 5816652.07 frames. ], batch size: 86, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:39:50,406 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.08 vs. limit=15.0 +2024-09-19 14:39:58,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=685340.0, ans=0.1 +2024-09-19 14:39:59,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=685340.0, ans=0.125 +2024-09-19 14:40:08,935 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=685380.0, ans=0.1 +2024-09-19 14:40:16,604 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.91 vs. limit=22.5 +2024-09-19 14:40:38,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=685460.0, ans=0.1 +2024-09-19 14:40:49,773 INFO [train.py:1198] (0/2) Epoch 38, batch 3950, loss[loss=0.247, ctc_loss=0.1179, cr_loss=0.3768, attn_decoder_loss=0.2529, over 29465.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1138, cr_loss=0.354, attn_decoder_loss=0.2393, over 5835549.40 frames. ], batch size: 97, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:40:53,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.97 vs. limit=15.0 +2024-09-19 14:40:59,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=685500.0, ans=0.125 +2024-09-19 14:41:14,418 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-19 14:41:18,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=685580.0, ans=0.1 +2024-09-19 14:41:18,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.87 vs. limit=15.0 +2024-09-19 14:42:03,852 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.719e+01 9.341e+01 1.012e+02 2.118e+02, threshold=1.868e+02, percent-clipped=1.0 +2024-09-19 14:42:05,293 INFO [train.py:1198] (0/2) Epoch 38, batch 4000, loss[loss=0.2168, ctc_loss=0.09837, cr_loss=0.3018, attn_decoder_loss=0.2233, over 29508.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1139, cr_loss=0.3538, attn_decoder_loss=0.2396, over 5813405.29 frames. ], batch size: 74, lr: 2.89e-03, grad_scale: 32.0 +2024-09-19 14:42:33,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=685780.0, ans=0.0 +2024-09-19 14:42:40,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.77 vs. limit=15.0 +2024-09-19 14:42:59,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.03 vs. limit=22.5 +2024-09-19 14:43:00,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=685820.0, ans=0.125 +2024-09-19 14:43:20,736 INFO [train.py:1198] (0/2) Epoch 38, batch 4050, loss[loss=0.2505, ctc_loss=0.1265, cr_loss=0.3574, attn_decoder_loss=0.2563, over 19714.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.3542, attn_decoder_loss=0.2395, over 5796970.94 frames. ], batch size: 210, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:43:41,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=685940.0, ans=0.0 +2024-09-19 14:43:50,617 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.54 vs. limit=15.0 +2024-09-19 14:43:51,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=685980.0, ans=0.125 +2024-09-19 14:44:19,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=686060.0, ans=0.125 +2024-09-19 14:44:33,761 INFO [train.py:1198] (0/2) Epoch 38, batch 4100, loss[loss=0.2657, ctc_loss=0.1427, cr_loss=0.4175, attn_decoder_loss=0.2701, over 29495.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1144, cr_loss=0.3548, attn_decoder_loss=0.2396, over 5792470.94 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:44:33,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=686100.0, ans=0.125 +2024-09-19 14:44:34,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=686100.0, ans=0.0 +2024-09-19 14:44:35,193 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.495e+01 9.024e+01 9.584e+01 1.415e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 14:44:42,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=686100.0, ans=0.125 +2024-09-19 14:44:47,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=686140.0, ans=0.125 +2024-09-19 14:45:16,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=686220.0, ans=0.0 +2024-09-19 14:45:35,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=686260.0, ans=0.07 +2024-09-19 14:45:39,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=686260.0, ans=0.125 +2024-09-19 14:45:47,175 INFO [train.py:1198] (0/2) Epoch 38, batch 4150, loss[loss=0.2301, ctc_loss=0.1162, cr_loss=0.3608, attn_decoder_loss=0.2348, over 29501.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1145, cr_loss=0.3556, attn_decoder_loss=0.2395, over 5797948.24 frames. ], batch size: 77, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:46:03,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=686340.0, ans=0.125 +2024-09-19 14:46:04,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=686340.0, ans=0.2 +2024-09-19 14:46:05,338 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.60 vs. limit=15.0 +2024-09-19 14:46:06,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=686340.0, ans=0.2 +2024-09-19 14:46:16,880 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.12 vs. limit=15.0 +2024-09-19 14:46:19,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=686380.0, ans=0.125 +2024-09-19 14:46:29,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=686380.0, ans=0.5 +2024-09-19 14:46:32,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=686420.0, ans=0.1 +2024-09-19 14:46:45,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=686460.0, ans=0.0 +2024-09-19 14:46:53,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=686460.0, ans=0.1 +2024-09-19 14:47:01,643 INFO [train.py:1198] (0/2) Epoch 38, batch 4200, loss[loss=0.2569, ctc_loss=0.1367, cr_loss=0.4165, attn_decoder_loss=0.261, over 29489.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1146, cr_loss=0.356, attn_decoder_loss=0.2398, over 5800034.03 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:47:03,141 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.063e+01 8.618e+01 9.071e+01 9.625e+01 1.972e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 14:47:07,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=686500.0, ans=0.2 +2024-09-19 14:47:12,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=686500.0, ans=0.1 +2024-09-19 14:47:29,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=686540.0, ans=0.125 +2024-09-19 14:47:54,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=686620.0, ans=0.0 +2024-09-19 14:48:08,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.71 vs. limit=15.0 +2024-09-19 14:48:09,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=686660.0, ans=0.05 +2024-09-19 14:48:16,371 INFO [train.py:1198] (0/2) Epoch 38, batch 4250, loss[loss=0.2274, ctc_loss=0.1104, cr_loss=0.3546, attn_decoder_loss=0.2325, over 29517.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1143, cr_loss=0.3552, attn_decoder_loss=0.24, over 5805461.84 frames. ], batch size: 74, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:48:17,459 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.40 vs. limit=15.0 +2024-09-19 14:48:36,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=686740.0, ans=0.025 +2024-09-19 14:48:36,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=686740.0, ans=0.1 +2024-09-19 14:48:39,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.39 vs. limit=15.0 +2024-09-19 14:49:26,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=686860.0, ans=0.125 +2024-09-19 14:49:30,555 INFO [train.py:1198] (0/2) Epoch 38, batch 4300, loss[loss=0.249, ctc_loss=0.1179, cr_loss=0.3604, attn_decoder_loss=0.2556, over 29502.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.114, cr_loss=0.3543, attn_decoder_loss=0.2401, over 5795090.04 frames. ], batch size: 87, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:49:32,034 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.244e+01 8.697e+01 9.242e+01 9.593e+01 9.804e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-19 14:50:05,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=686980.0, ans=0.125 +2024-09-19 14:50:28,974 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.64 vs. limit=15.0 +2024-09-19 14:50:45,378 INFO [train.py:1198] (0/2) Epoch 38, batch 4350, loss[loss=0.2439, ctc_loss=0.125, cr_loss=0.378, attn_decoder_loss=0.2487, over 29545.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1168, cr_loss=0.3605, attn_decoder_loss=0.2435, over 5797332.96 frames. ], batch size: 97, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:50:48,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=687100.0, ans=0.125 +2024-09-19 14:50:49,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.11 vs. limit=15.0 +2024-09-19 14:51:17,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.89 vs. limit=6.0 +2024-09-19 14:51:39,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.40 vs. limit=10.0 +2024-09-19 14:51:58,779 INFO [train.py:1198] (0/2) Epoch 38, batch 4400, loss[loss=0.2541, ctc_loss=0.1335, cr_loss=0.4015, attn_decoder_loss=0.2585, over 27176.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1178, cr_loss=0.3625, attn_decoder_loss=0.2454, over 5768542.48 frames. ], batch size: 124, lr: 2.88e-03, grad_scale: 16.0 +2024-09-19 14:52:00,221 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.169e+01 8.939e+01 9.261e+01 9.709e+01 1.293e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-19 14:52:05,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=687300.0, ans=0.2 +2024-09-19 14:52:50,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=687420.0, ans=0.2 +2024-09-19 14:52:50,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=687420.0, ans=0.2 +2024-09-19 14:53:13,723 INFO [train.py:1198] (0/2) Epoch 38, batch 4450, loss[loss=0.2583, ctc_loss=0.1525, cr_loss=0.395, attn_decoder_loss=0.2613, over 20212.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1212, cr_loss=0.3674, attn_decoder_loss=0.2475, over 5574936.87 frames. ], batch size: 209, lr: 2.88e-03, grad_scale: 16.0 +2024-09-19 14:53:23,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=687500.0, ans=0.0 +2024-09-19 14:53:55,173 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:54:11,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=687620.0, ans=0.05 +2024-09-19 14:54:15,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=687660.0, ans=0.5 +2024-09-19 14:54:22,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=687660.0, ans=0.125 +2024-09-19 14:54:28,738 INFO [train.py:1198] (0/2) Epoch 38, batch 4500, loss[loss=0.2488, ctc_loss=0.1334, cr_loss=0.3645, attn_decoder_loss=0.2536, over 20543.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1241, cr_loss=0.3696, attn_decoder_loss=0.2491, over 5239572.98 frames. ], batch size: 210, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:54:31,692 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.974e+01 1.104e+02 1.169e+02 2.298e+02, threshold=2.208e+02, percent-clipped=1.0 +2024-09-19 14:54:38,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=687700.0, ans=0.025 +2024-09-19 14:55:01,570 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:55:05,497 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-38.pt +2024-09-19 14:55:50,038 INFO [train.py:1198] (0/2) Epoch 39, batch 0, loss[loss=0.2131, ctc_loss=0.09408, cr_loss=0.3196, attn_decoder_loss=0.2193, over 29604.00 frames. ], tot_loss[loss=0.2131, ctc_loss=0.09408, cr_loss=0.3196, attn_decoder_loss=0.2193, over 29604.00 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 14:55:50,039 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 14:56:08,891 INFO [train.py:1230] (0/2) Epoch 39, validation: loss=0.2125, ctc_loss=0.03631, cr_loss=6.129e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 14:56:08,892 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 14:56:19,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=687800.0, ans=0.125 +2024-09-19 14:56:37,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=687840.0, ans=0.125 +2024-09-19 14:56:40,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=687880.0, ans=0.125 +2024-09-19 14:56:43,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=687880.0, ans=0.125 +2024-09-19 14:57:04,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=687920.0, ans=0.125 +2024-09-19 14:57:19,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=687960.0, ans=0.125 +2024-09-19 14:57:24,316 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-172000.pt +2024-09-19 14:57:32,780 INFO [train.py:1198] (0/2) Epoch 39, batch 50, loss[loss=0.2148, ctc_loss=0.1022, cr_loss=0.324, attn_decoder_loss=0.2201, over 29421.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1151, cr_loss=0.3563, attn_decoder_loss=0.2403, over 1270028.75 frames. ], batch size: 70, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 14:57:38,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.46 vs. limit=12.0 +2024-09-19 14:57:51,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=688040.0, ans=0.07 +2024-09-19 14:58:02,650 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.92 vs. limit=22.5 +2024-09-19 14:58:06,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=688080.0, ans=0.125 +2024-09-19 14:58:12,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=688080.0, ans=0.09899494936611666 +2024-09-19 14:58:14,953 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.884e+01 9.468e+01 1.073e+02 2.116e+02, threshold=1.894e+02, percent-clipped=0.0 +2024-09-19 14:58:42,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=688160.0, ans=0.2 +2024-09-19 14:58:47,895 INFO [train.py:1198] (0/2) Epoch 39, batch 100, loss[loss=0.2312, ctc_loss=0.1161, cr_loss=0.3847, attn_decoder_loss=0.2355, over 29539.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1169, cr_loss=0.3608, attn_decoder_loss=0.2428, over 2253958.84 frames. ], batch size: 76, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 14:58:48,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.34 vs. limit=22.5 +2024-09-19 14:59:01,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=688240.0, ans=0.0 +2024-09-19 14:59:09,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=688240.0, ans=0.09899494936611666 +2024-09-19 14:59:13,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=688240.0, ans=0.125 +2024-09-19 14:59:24,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.50 vs. limit=15.0 +2024-09-19 14:59:30,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=688280.0, ans=0.04949747468305833 +2024-09-19 15:00:04,839 INFO [train.py:1198] (0/2) Epoch 39, batch 150, loss[loss=0.2128, ctc_loss=0.1011, cr_loss=0.3304, attn_decoder_loss=0.2178, over 29424.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1149, cr_loss=0.3567, attn_decoder_loss=0.2405, over 3048202.84 frames. ], batch size: 70, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:00:17,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.83 vs. limit=22.5 +2024-09-19 15:00:33,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=688440.0, ans=0.125 +2024-09-19 15:00:37,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=688480.0, ans=0.125 +2024-09-19 15:00:40,323 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:00:46,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=688480.0, ans=0.125 +2024-09-19 15:00:48,859 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.419e+01 8.955e+01 9.625e+01 1.555e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 15:01:02,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=688520.0, ans=0.125 +2024-09-19 15:01:10,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=688560.0, ans=0.5 +2024-09-19 15:01:13,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=688560.0, ans=0.2 +2024-09-19 15:01:16,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=688560.0, ans=0.0 +2024-09-19 15:01:22,059 INFO [train.py:1198] (0/2) Epoch 39, batch 200, loss[loss=0.2418, ctc_loss=0.1218, cr_loss=0.3628, attn_decoder_loss=0.2471, over 26978.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1141, cr_loss=0.355, attn_decoder_loss=0.2396, over 3659269.98 frames. ], batch size: 124, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:01:25,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=688600.0, ans=0.125 +2024-09-19 15:01:43,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=688640.0, ans=0.125 +2024-09-19 15:01:44,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=688640.0, ans=0.05 +2024-09-19 15:01:46,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=688640.0, ans=0.125 +2024-09-19 15:01:58,746 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.22 vs. limit=22.5 +2024-09-19 15:02:01,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=688680.0, ans=0.125 +2024-09-19 15:02:05,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=688720.0, ans=0.0 +2024-09-19 15:02:07,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=688720.0, ans=0.2 +2024-09-19 15:02:11,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=688720.0, ans=0.1 +2024-09-19 15:02:37,647 INFO [train.py:1198] (0/2) Epoch 39, batch 250, loss[loss=0.2449, ctc_loss=0.1254, cr_loss=0.3742, attn_decoder_loss=0.2499, over 29214.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3558, attn_decoder_loss=0.2394, over 4142289.88 frames. ], batch size: 100, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:02:54,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten.whitening_limit, batch_count=688840.0, ans=22.5 +2024-09-19 15:03:06,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=688880.0, ans=0.2 +2024-09-19 15:03:19,943 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.148e+01 8.603e+01 9.098e+01 9.821e+01 6.363e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 15:03:21,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=688920.0, ans=0.125 +2024-09-19 15:03:52,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=688960.0, ans=0.0 +2024-09-19 15:03:55,541 INFO [train.py:1198] (0/2) Epoch 39, batch 300, loss[loss=0.2454, ctc_loss=0.1275, cr_loss=0.3994, attn_decoder_loss=0.2496, over 29567.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1141, cr_loss=0.3554, attn_decoder_loss=0.2392, over 4512111.14 frames. ], batch size: 92, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:04:06,227 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.85 vs. limit=15.0 +2024-09-19 15:04:49,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=689120.0, ans=0.125 +2024-09-19 15:05:11,070 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.76 vs. limit=15.0 +2024-09-19 15:05:13,153 INFO [train.py:1198] (0/2) Epoch 39, batch 350, loss[loss=0.2101, ctc_loss=0.09211, cr_loss=0.3008, attn_decoder_loss=0.2166, over 29331.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1137, cr_loss=0.3544, attn_decoder_loss=0.2392, over 4797053.09 frames. ], batch size: 71, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:05:13,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=689200.0, ans=0.025 +2024-09-19 15:05:16,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=689200.0, ans=0.125 +2024-09-19 15:05:23,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=689200.0, ans=0.125 +2024-09-19 15:05:24,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=689200.0, ans=0.0 +2024-09-19 15:05:50,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=689280.0, ans=0.125 +2024-09-19 15:05:55,258 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.629e+01 8.459e+01 8.983e+01 9.522e+01 3.712e+02, threshold=1.797e+02, percent-clipped=2.0 +2024-09-19 15:05:58,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=689320.0, ans=0.125 +2024-09-19 15:06:08,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.00 vs. limit=15.0 +2024-09-19 15:06:18,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.38 vs. limit=22.5 +2024-09-19 15:06:28,537 INFO [train.py:1198] (0/2) Epoch 39, batch 400, loss[loss=0.2328, ctc_loss=0.1126, cr_loss=0.3536, attn_decoder_loss=0.2383, over 29712.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1132, cr_loss=0.353, attn_decoder_loss=0.2387, over 5026081.87 frames. ], batch size: 82, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:06:33,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=689400.0, ans=0.025 +2024-09-19 15:06:39,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=689400.0, ans=0.0 +2024-09-19 15:07:46,832 INFO [train.py:1198] (0/2) Epoch 39, batch 450, loss[loss=0.2426, ctc_loss=0.1116, cr_loss=0.3529, attn_decoder_loss=0.2494, over 29699.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1136, cr_loss=0.354, attn_decoder_loss=0.2392, over 5188118.92 frames. ], batch size: 83, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:07:47,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=689600.0, ans=0.2 +2024-09-19 15:08:09,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=689640.0, ans=0.125 +2024-09-19 15:08:14,247 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.67 vs. limit=12.0 +2024-09-19 15:08:15,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=689640.0, ans=0.0 +2024-09-19 15:08:27,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=689680.0, ans=0.125 +2024-09-19 15:08:32,803 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.422e+01 8.949e+01 9.558e+01 1.384e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 15:08:45,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=689720.0, ans=0.025 +2024-09-19 15:08:53,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=689760.0, ans=0.125 +2024-09-19 15:09:03,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=689800.0, ans=0.0 +2024-09-19 15:09:04,771 INFO [train.py:1198] (0/2) Epoch 39, batch 500, loss[loss=0.2527, ctc_loss=0.1246, cr_loss=0.3895, attn_decoder_loss=0.2583, over 29461.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1136, cr_loss=0.3544, attn_decoder_loss=0.2389, over 5330745.74 frames. ], batch size: 94, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:09:05,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=689800.0, ans=0.125 +2024-09-19 15:09:24,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=689840.0, ans=15.0 +2024-09-19 15:09:46,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=689880.0, ans=0.125 +2024-09-19 15:09:47,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=689880.0, ans=0.025 +2024-09-19 15:09:53,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=689920.0, ans=0.0 +2024-09-19 15:09:58,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=689920.0, ans=0.125 +2024-09-19 15:10:04,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=689960.0, ans=0.1 +2024-09-19 15:10:08,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=689960.0, ans=0.125 +2024-09-19 15:10:19,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=690000.0, ans=0.125 +2024-09-19 15:10:20,422 INFO [train.py:1198] (0/2) Epoch 39, batch 550, loss[loss=0.2338, ctc_loss=0.1057, cr_loss=0.3236, attn_decoder_loss=0.2408, over 28829.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1135, cr_loss=0.3536, attn_decoder_loss=0.2389, over 5422767.64 frames. ], batch size: 104, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:10:52,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=690080.0, ans=0.125 +2024-09-19 15:11:02,264 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-19 15:11:04,428 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.632e+01 8.977e+01 9.526e+01 2.010e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-19 15:11:38,686 INFO [train.py:1198] (0/2) Epoch 39, batch 600, loss[loss=0.2535, ctc_loss=0.1241, cr_loss=0.3841, attn_decoder_loss=0.2593, over 29287.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1135, cr_loss=0.354, attn_decoder_loss=0.2391, over 5508124.50 frames. ], batch size: 100, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:11:53,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=690200.0, ans=0.125 +2024-09-19 15:12:00,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=690240.0, ans=0.125 +2024-09-19 15:12:02,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=690240.0, ans=0.2 +2024-09-19 15:12:24,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=690320.0, ans=0.1 +2024-09-19 15:12:37,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.44 vs. limit=15.0 +2024-09-19 15:12:42,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=690360.0, ans=0.0 +2024-09-19 15:12:56,387 INFO [train.py:1198] (0/2) Epoch 39, batch 650, loss[loss=0.2404, ctc_loss=0.1235, cr_loss=0.3813, attn_decoder_loss=0.245, over 29758.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.113, cr_loss=0.3529, attn_decoder_loss=0.2385, over 5586032.50 frames. ], batch size: 81, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:13:02,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=690400.0, ans=0.0 +2024-09-19 15:13:05,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=690400.0, ans=0.1 +2024-09-19 15:13:26,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=690480.0, ans=0.0 +2024-09-19 15:13:40,408 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.365e+01 8.432e+01 8.976e+01 9.547e+01 1.845e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 15:14:00,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=690560.0, ans=0.125 +2024-09-19 15:14:12,090 INFO [train.py:1198] (0/2) Epoch 39, batch 700, loss[loss=0.2227, ctc_loss=0.1045, cr_loss=0.3371, attn_decoder_loss=0.2284, over 29531.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1133, cr_loss=0.3532, attn_decoder_loss=0.2388, over 5637095.02 frames. ], batch size: 76, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:14:12,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=690600.0, ans=0.1 +2024-09-19 15:14:16,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=690600.0, ans=0.0 +2024-09-19 15:14:22,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=690600.0, ans=0.0 +2024-09-19 15:14:24,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=690600.0, ans=0.1 +2024-09-19 15:14:32,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=690640.0, ans=6.0 +2024-09-19 15:14:33,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=690640.0, ans=0.125 +2024-09-19 15:14:34,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=690640.0, ans=0.0 +2024-09-19 15:14:42,959 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.68 vs. limit=22.5 +2024-09-19 15:14:45,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=690680.0, ans=0.0 +2024-09-19 15:14:58,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=690720.0, ans=0.0 +2024-09-19 15:15:01,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=690720.0, ans=0.125 +2024-09-19 15:15:27,466 INFO [train.py:1198] (0/2) Epoch 39, batch 750, loss[loss=0.2413, ctc_loss=0.1154, cr_loss=0.3704, attn_decoder_loss=0.2471, over 29698.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1131, cr_loss=0.3528, attn_decoder_loss=0.2384, over 5675391.50 frames. ], batch size: 82, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:15:34,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=690800.0, ans=0.125 +2024-09-19 15:15:40,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=690800.0, ans=0.125 +2024-09-19 15:15:44,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=690840.0, ans=0.025 +2024-09-19 15:15:53,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.64 vs. limit=15.0 +2024-09-19 15:16:13,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.83 vs. limit=15.0 +2024-09-19 15:16:15,751 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.497e+01 9.078e+01 9.651e+01 1.974e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 15:16:47,095 INFO [train.py:1198] (0/2) Epoch 39, batch 800, loss[loss=0.2039, ctc_loss=0.09287, cr_loss=0.2991, attn_decoder_loss=0.2096, over 29610.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1131, cr_loss=0.3528, attn_decoder_loss=0.2384, over 5705974.29 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:17:14,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=691040.0, ans=0.125 +2024-09-19 15:17:19,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=691080.0, ans=0.2 +2024-09-19 15:17:25,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=691080.0, ans=0.0 +2024-09-19 15:17:37,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=691120.0, ans=0.125 +2024-09-19 15:17:45,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.09 vs. limit=6.0 +2024-09-19 15:17:50,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=691160.0, ans=0.0 +2024-09-19 15:18:02,595 INFO [train.py:1198] (0/2) Epoch 39, batch 850, loss[loss=0.2443, ctc_loss=0.1185, cr_loss=0.3588, attn_decoder_loss=0.2503, over 29715.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1128, cr_loss=0.3521, attn_decoder_loss=0.2382, over 5734719.02 frames. ], batch size: 89, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:18:14,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=691200.0, ans=0.05 +2024-09-19 15:18:19,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=691240.0, ans=0.0 +2024-09-19 15:18:25,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=691240.0, ans=0.125 +2024-09-19 15:18:26,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=691240.0, ans=0.0 +2024-09-19 15:18:38,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=691280.0, ans=0.0 +2024-09-19 15:18:41,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=691280.0, ans=0.125 +2024-09-19 15:18:46,234 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.532e+01 8.459e+01 8.825e+01 9.402e+01 1.909e+02, threshold=1.765e+02, percent-clipped=1.0 +2024-09-19 15:18:58,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=691320.0, ans=0.0 +2024-09-19 15:19:06,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=691360.0, ans=0.125 +2024-09-19 15:19:18,058 INFO [train.py:1198] (0/2) Epoch 39, batch 900, loss[loss=0.2122, ctc_loss=0.09792, cr_loss=0.3157, attn_decoder_loss=0.2179, over 29630.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1129, cr_loss=0.3524, attn_decoder_loss=0.2387, over 5739054.48 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:19:18,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=691400.0, ans=0.125 +2024-09-19 15:20:08,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=691520.0, ans=0.125 +2024-09-19 15:20:23,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=691560.0, ans=0.5 +2024-09-19 15:20:26,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=691560.0, ans=0.05 +2024-09-19 15:20:34,158 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.03 vs. limit=15.0 +2024-09-19 15:20:37,730 INFO [train.py:1198] (0/2) Epoch 39, batch 950, loss[loss=0.2186, ctc_loss=0.09537, cr_loss=0.3165, attn_decoder_loss=0.2253, over 29535.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.113, cr_loss=0.3523, attn_decoder_loss=0.2388, over 5742216.84 frames. ], batch size: 74, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:20:39,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=691600.0, ans=0.125 +2024-09-19 15:21:11,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=691680.0, ans=0.1 +2024-09-19 15:21:17,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=691680.0, ans=0.09899494936611666 +2024-09-19 15:21:20,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=691680.0, ans=0.125 +2024-09-19 15:21:21,646 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.688e+01 9.370e+01 1.012e+02 2.860e+02, threshold=1.874e+02, percent-clipped=2.0 +2024-09-19 15:21:27,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.52 vs. limit=22.5 +2024-09-19 15:21:46,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=691760.0, ans=0.2 +2024-09-19 15:21:53,224 INFO [train.py:1198] (0/2) Epoch 39, batch 1000, loss[loss=0.2312, ctc_loss=0.1143, cr_loss=0.3451, attn_decoder_loss=0.2366, over 29507.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1142, cr_loss=0.3547, attn_decoder_loss=0.2399, over 5736475.03 frames. ], batch size: 77, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:21:54,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=691800.0, ans=0.025 +2024-09-19 15:22:02,487 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:22:28,664 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.46 vs. limit=15.0 +2024-09-19 15:22:53,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.99 vs. limit=15.0 +2024-09-19 15:22:58,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=691960.0, ans=0.125 +2024-09-19 15:23:08,640 INFO [train.py:1198] (0/2) Epoch 39, batch 1050, loss[loss=0.2456, ctc_loss=0.1213, cr_loss=0.3758, attn_decoder_loss=0.251, over 29678.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1136, cr_loss=0.3535, attn_decoder_loss=0.2393, over 5744592.71 frames. ], batch size: 85, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:23:34,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=692040.0, ans=0.125 +2024-09-19 15:23:41,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=692080.0, ans=0.2 +2024-09-19 15:23:49,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=692080.0, ans=0.0 +2024-09-19 15:23:54,908 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.552e+01 9.121e+01 9.553e+01 1.921e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 15:24:03,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=692120.0, ans=0.0 +2024-09-19 15:24:05,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=692120.0, ans=0.125 +2024-09-19 15:24:26,500 INFO [train.py:1198] (0/2) Epoch 39, batch 1100, loss[loss=0.225, ctc_loss=0.1054, cr_loss=0.3349, attn_decoder_loss=0.2309, over 29457.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1132, cr_loss=0.3527, attn_decoder_loss=0.239, over 5757123.06 frames. ], batch size: 78, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:24:37,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=692200.0, ans=0.125 +2024-09-19 15:25:17,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.29 vs. limit=22.5 +2024-09-19 15:25:23,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.24 vs. limit=6.0 +2024-09-19 15:25:35,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_positive, batch_count=692360.0, ans=0.05 +2024-09-19 15:25:42,571 INFO [train.py:1198] (0/2) Epoch 39, batch 1150, loss[loss=0.2295, ctc_loss=0.1111, cr_loss=0.3489, attn_decoder_loss=0.2349, over 29450.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1134, cr_loss=0.353, attn_decoder_loss=0.239, over 5754981.89 frames. ], batch size: 78, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:25:46,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=692400.0, ans=0.125 +2024-09-19 15:26:05,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.33 vs. limit=10.0 +2024-09-19 15:26:23,281 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.11 vs. limit=15.0 +2024-09-19 15:26:26,561 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.659e+01 8.488e+01 9.080e+01 9.695e+01 1.564e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 15:26:30,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.51 vs. limit=15.0 +2024-09-19 15:26:42,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.86 vs. limit=15.0 +2024-09-19 15:26:43,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=692560.0, ans=0.125 +2024-09-19 15:26:58,174 INFO [train.py:1198] (0/2) Epoch 39, batch 1200, loss[loss=0.2377, ctc_loss=0.1133, cr_loss=0.3424, attn_decoder_loss=0.2439, over 29664.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1138, cr_loss=0.3539, attn_decoder_loss=0.2397, over 5747749.79 frames. ], batch size: 85, lr: 2.83e-03, grad_scale: 32.0 +2024-09-19 15:27:31,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=692680.0, ans=0.125 +2024-09-19 15:27:52,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=692720.0, ans=0.1 +2024-09-19 15:27:52,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=692720.0, ans=0.125 +2024-09-19 15:27:54,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=12.0 +2024-09-19 15:27:57,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=692720.0, ans=0.0 +2024-09-19 15:28:05,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.69 vs. limit=15.0 +2024-09-19 15:28:09,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=692760.0, ans=0.0 +2024-09-19 15:28:18,501 INFO [train.py:1198] (0/2) Epoch 39, batch 1250, loss[loss=0.2602, ctc_loss=0.1393, cr_loss=0.4193, attn_decoder_loss=0.2643, over 29533.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1145, cr_loss=0.3558, attn_decoder_loss=0.2405, over 5775392.73 frames. ], batch size: 92, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:28:35,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=692840.0, ans=0.2 +2024-09-19 15:28:46,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=692840.0, ans=0.0 +2024-09-19 15:29:05,370 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.604e+01 9.074e+01 9.816e+01 4.150e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 15:29:24,480 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.93 vs. limit=15.0 +2024-09-19 15:29:31,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=692960.0, ans=0.125 +2024-09-19 15:29:33,885 INFO [train.py:1198] (0/2) Epoch 39, batch 1300, loss[loss=0.2454, ctc_loss=0.1167, cr_loss=0.3557, attn_decoder_loss=0.2518, over 28193.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1144, cr_loss=0.3557, attn_decoder_loss=0.24, over 5781124.70 frames. ], batch size: 111, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:29:38,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=693000.0, ans=0.0 +2024-09-19 15:29:41,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=693000.0, ans=0.2 +2024-09-19 15:29:43,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=693000.0, ans=0.1 +2024-09-19 15:29:43,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=693000.0, ans=0.2 +2024-09-19 15:29:49,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.42 vs. limit=15.0 +2024-09-19 15:30:17,009 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.23 vs. limit=6.0 +2024-09-19 15:30:49,663 INFO [train.py:1198] (0/2) Epoch 39, batch 1350, loss[loss=0.229, ctc_loss=0.1065, cr_loss=0.3481, attn_decoder_loss=0.2349, over 29757.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1136, cr_loss=0.3544, attn_decoder_loss=0.2396, over 5798879.13 frames. ], batch size: 81, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:31:36,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=693280.0, ans=0.125 +2024-09-19 15:31:40,745 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.173e+01 8.563e+01 8.987e+01 9.374e+01 1.474e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 15:31:57,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=693360.0, ans=0.125 +2024-09-19 15:32:02,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=693360.0, ans=10.0 +2024-09-19 15:32:09,545 INFO [train.py:1198] (0/2) Epoch 39, batch 1400, loss[loss=0.2032, ctc_loss=0.08939, cr_loss=0.3022, attn_decoder_loss=0.2091, over 29612.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1135, cr_loss=0.3546, attn_decoder_loss=0.2394, over 5808969.58 frames. ], batch size: 69, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:32:22,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.29 vs. limit=15.0 +2024-09-19 15:32:29,863 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.73 vs. limit=12.0 +2024-09-19 15:32:30,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=693440.0, ans=0.0 +2024-09-19 15:32:32,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=693440.0, ans=0.125 +2024-09-19 15:32:51,220 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.94 vs. limit=15.0 +2024-09-19 15:32:52,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=693480.0, ans=0.0 +2024-09-19 15:33:03,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.57 vs. limit=22.5 +2024-09-19 15:33:17,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=693560.0, ans=0.0 +2024-09-19 15:33:22,986 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=15.0 +2024-09-19 15:33:24,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=693600.0, ans=0.1 +2024-09-19 15:33:25,283 INFO [train.py:1198] (0/2) Epoch 39, batch 1450, loss[loss=0.2548, ctc_loss=0.1299, cr_loss=0.4057, attn_decoder_loss=0.2596, over 29452.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1138, cr_loss=0.3554, attn_decoder_loss=0.2398, over 5805491.22 frames. ], batch size: 94, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:33:38,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=693640.0, ans=0.125 +2024-09-19 15:33:39,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.68 vs. limit=15.0 +2024-09-19 15:33:43,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=693640.0, ans=0.125 +2024-09-19 15:33:46,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=693640.0, ans=0.1 +2024-09-19 15:33:55,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.88 vs. limit=15.0 +2024-09-19 15:33:58,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=693680.0, ans=0.125 +2024-09-19 15:34:00,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=693680.0, ans=0.1 +2024-09-19 15:34:11,639 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.483e+01 8.532e+01 9.213e+01 9.668e+01 2.812e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-19 15:34:12,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=693720.0, ans=0.125 +2024-09-19 15:34:13,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=693720.0, ans=0.1 +2024-09-19 15:34:15,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=693720.0, ans=0.0 +2024-09-19 15:34:31,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=693760.0, ans=0.125 +2024-09-19 15:34:38,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=693800.0, ans=0.0 +2024-09-19 15:34:40,279 INFO [train.py:1198] (0/2) Epoch 39, batch 1500, loss[loss=0.2326, ctc_loss=0.1126, cr_loss=0.3571, attn_decoder_loss=0.238, over 29631.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1137, cr_loss=0.3551, attn_decoder_loss=0.2399, over 5805488.03 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:34:48,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=693800.0, ans=0.0 +2024-09-19 15:34:48,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=693800.0, ans=0.125 +2024-09-19 15:34:57,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=693840.0, ans=0.1 +2024-09-19 15:35:11,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.42 vs. limit=15.0 +2024-09-19 15:35:18,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=693880.0, ans=0.0 +2024-09-19 15:35:20,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-19 15:35:26,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=693880.0, ans=0.125 +2024-09-19 15:35:29,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=693920.0, ans=0.125 +2024-09-19 15:35:53,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=693960.0, ans=0.2 +2024-09-19 15:35:56,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=693960.0, ans=0.0 +2024-09-19 15:36:00,504 INFO [train.py:1198] (0/2) Epoch 39, batch 1550, loss[loss=0.2358, ctc_loss=0.118, cr_loss=0.3708, attn_decoder_loss=0.2406, over 29515.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.114, cr_loss=0.3554, attn_decoder_loss=0.2399, over 5781889.75 frames. ], batch size: 90, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:36:09,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=694000.0, ans=0.07 +2024-09-19 15:36:10,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=694000.0, ans=0.125 +2024-09-19 15:36:30,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.05 vs. limit=22.5 +2024-09-19 15:36:47,241 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.376e+01 8.823e+01 9.525e+01 1.389e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-19 15:36:50,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=694120.0, ans=0.125 +2024-09-19 15:36:55,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=694120.0, ans=0.2 +2024-09-19 15:36:58,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=694120.0, ans=0.2 +2024-09-19 15:37:02,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=694160.0, ans=0.1 +2024-09-19 15:37:13,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=694160.0, ans=0.125 +2024-09-19 15:37:16,088 INFO [train.py:1198] (0/2) Epoch 39, batch 1600, loss[loss=0.2278, ctc_loss=0.09962, cr_loss=0.3234, attn_decoder_loss=0.2348, over 29687.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1139, cr_loss=0.3547, attn_decoder_loss=0.2396, over 5764443.69 frames. ], batch size: 85, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:37:18,577 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.05 vs. limit=15.0 +2024-09-19 15:37:19,964 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.27 vs. limit=15.0 +2024-09-19 15:37:32,943 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:37:55,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=694280.0, ans=0.0 +2024-09-19 15:38:04,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=694320.0, ans=0.125 +2024-09-19 15:38:14,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=694360.0, ans=0.125 +2024-09-19 15:38:19,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=694360.0, ans=0.1 +2024-09-19 15:38:20,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=694360.0, ans=0.0 +2024-09-19 15:38:27,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=694360.0, ans=0.0 +2024-09-19 15:38:31,547 INFO [train.py:1198] (0/2) Epoch 39, batch 1650, loss[loss=0.2434, ctc_loss=0.1172, cr_loss=0.3673, attn_decoder_loss=0.2493, over 29727.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1133, cr_loss=0.3534, attn_decoder_loss=0.2393, over 5758025.67 frames. ], batch size: 89, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:38:36,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=694400.0, ans=0.125 +2024-09-19 15:38:58,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=694440.0, ans=0.125 +2024-09-19 15:39:01,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=694440.0, ans=0.0 +2024-09-19 15:39:12,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=694480.0, ans=0.0 +2024-09-19 15:39:17,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.87 vs. limit=15.0 +2024-09-19 15:39:22,826 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.471e+01 8.887e+01 9.578e+01 2.740e+02, threshold=1.777e+02, percent-clipped=2.0 +2024-09-19 15:39:33,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=694520.0, ans=0.125 +2024-09-19 15:39:33,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=694520.0, ans=0.125 +2024-09-19 15:39:49,192 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.28 vs. limit=15.0 +2024-09-19 15:39:51,161 INFO [train.py:1198] (0/2) Epoch 39, batch 1700, loss[loss=0.2027, ctc_loss=0.09043, cr_loss=0.3017, attn_decoder_loss=0.2085, over 29588.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1128, cr_loss=0.3523, attn_decoder_loss=0.2389, over 5780843.16 frames. ], batch size: 69, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:40:02,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=694600.0, ans=0.125 +2024-09-19 15:40:02,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=694600.0, ans=0.025 +2024-09-19 15:40:39,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=694720.0, ans=0.125 +2024-09-19 15:40:45,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=694720.0, ans=0.0 +2024-09-19 15:40:51,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.11 vs. limit=8.0 +2024-09-19 15:41:06,503 INFO [train.py:1198] (0/2) Epoch 39, batch 1750, loss[loss=0.2092, ctc_loss=0.09943, cr_loss=0.3176, attn_decoder_loss=0.2143, over 29354.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1129, cr_loss=0.353, attn_decoder_loss=0.2388, over 5789273.76 frames. ], batch size: 67, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:41:26,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=694840.0, ans=0.125 +2024-09-19 15:41:34,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=694840.0, ans=0.0 +2024-09-19 15:41:37,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=694880.0, ans=0.0 +2024-09-19 15:41:38,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=694880.0, ans=0.0 +2024-09-19 15:41:53,569 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.741e+01 9.226e+01 9.687e+01 1.772e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-19 15:42:06,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.54 vs. limit=15.0 +2024-09-19 15:42:22,079 INFO [train.py:1198] (0/2) Epoch 39, batch 1800, loss[loss=0.2493, ctc_loss=0.1205, cr_loss=0.3825, attn_decoder_loss=0.2551, over 29709.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1132, cr_loss=0.353, attn_decoder_loss=0.239, over 5791863.09 frames. ], batch size: 83, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:42:39,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=695040.0, ans=0.125 +2024-09-19 15:43:21,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=695120.0, ans=0.1 +2024-09-19 15:43:31,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=695160.0, ans=0.125 +2024-09-19 15:43:41,999 INFO [train.py:1198] (0/2) Epoch 39, batch 1850, loss[loss=0.2399, ctc_loss=0.1163, cr_loss=0.3621, attn_decoder_loss=0.2456, over 29642.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1133, cr_loss=0.3537, attn_decoder_loss=0.2388, over 5799274.73 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:43:50,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.68 vs. limit=15.0 +2024-09-19 15:43:51,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=695200.0, ans=0.125 +2024-09-19 15:44:01,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=695240.0, ans=0.125 +2024-09-19 15:44:04,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=695240.0, ans=0.125 +2024-09-19 15:44:11,185 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:44:17,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=695280.0, ans=0.0 +2024-09-19 15:44:26,593 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.41 vs. limit=15.0 +2024-09-19 15:44:28,637 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.257e+01 8.566e+01 9.030e+01 9.513e+01 1.502e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-19 15:44:30,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=695320.0, ans=0.0 +2024-09-19 15:44:37,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=695320.0, ans=0.125 +2024-09-19 15:44:47,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=695360.0, ans=0.1 +2024-09-19 15:44:48,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=695360.0, ans=0.125 +2024-09-19 15:44:57,268 INFO [train.py:1198] (0/2) Epoch 39, batch 1900, loss[loss=0.2333, ctc_loss=0.1036, cr_loss=0.34, attn_decoder_loss=0.2401, over 29713.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1136, cr_loss=0.3546, attn_decoder_loss=0.2394, over 5807173.15 frames. ], batch size: 89, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:45:03,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=695400.0, ans=0.0 +2024-09-19 15:45:09,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=695400.0, ans=0.0 +2024-09-19 15:45:20,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=695440.0, ans=0.0 +2024-09-19 15:46:07,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=695560.0, ans=0.0 +2024-09-19 15:46:13,431 INFO [train.py:1198] (0/2) Epoch 39, batch 1950, loss[loss=0.2277, ctc_loss=0.1123, cr_loss=0.34, attn_decoder_loss=0.233, over 29460.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1143, cr_loss=0.3563, attn_decoder_loss=0.2408, over 5821590.84 frames. ], batch size: 78, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:46:13,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=695600.0, ans=0.125 +2024-09-19 15:46:21,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.86 vs. limit=15.0 +2024-09-19 15:46:22,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=695600.0, ans=0.125 +2024-09-19 15:46:25,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=695600.0, ans=0.1 +2024-09-19 15:46:37,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=15.0 +2024-09-19 15:46:56,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.95 vs. limit=15.0 +2024-09-19 15:47:02,236 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-19 15:47:04,134 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.858e+01 9.313e+01 9.741e+01 2.178e+02, threshold=1.863e+02, percent-clipped=1.0 +2024-09-19 15:47:07,454 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:47:08,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-19 15:47:31,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=695800.0, ans=0.1 +2024-09-19 15:47:32,837 INFO [train.py:1198] (0/2) Epoch 39, batch 2000, loss[loss=0.2008, ctc_loss=0.09166, cr_loss=0.2977, attn_decoder_loss=0.2063, over 29303.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1147, cr_loss=0.3566, attn_decoder_loss=0.2413, over 5796932.16 frames. ], batch size: 67, lr: 2.83e-03, grad_scale: 32.0 +2024-09-19 15:48:08,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=695880.0, ans=0.125 +2024-09-19 15:48:12,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=695880.0, ans=0.125 +2024-09-19 15:48:48,812 INFO [train.py:1198] (0/2) Epoch 39, batch 2050, loss[loss=0.2113, ctc_loss=0.1042, cr_loss=0.3442, attn_decoder_loss=0.2155, over 29416.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1143, cr_loss=0.3556, attn_decoder_loss=0.2405, over 5789189.90 frames. ], batch size: 70, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:49:03,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.29 vs. limit=15.0 +2024-09-19 15:49:11,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=696040.0, ans=0.0 +2024-09-19 15:49:20,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-19 15:49:31,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=696080.0, ans=0.125 +2024-09-19 15:49:37,160 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.009e+01 8.542e+01 8.929e+01 9.648e+01 1.386e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-19 15:49:48,592 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.24 vs. limit=15.0 +2024-09-19 15:50:02,430 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.57 vs. limit=10.0 +2024-09-19 15:50:04,425 INFO [train.py:1198] (0/2) Epoch 39, batch 2100, loss[loss=0.2371, ctc_loss=0.1161, cr_loss=0.3557, attn_decoder_loss=0.2426, over 29750.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1139, cr_loss=0.3556, attn_decoder_loss=0.2401, over 5800887.58 frames. ], batch size: 81, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:50:09,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.23 vs. limit=15.0 +2024-09-19 15:50:09,717 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.25 vs. limit=12.0 +2024-09-19 15:50:23,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=696240.0, ans=0.1 +2024-09-19 15:50:28,585 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.13 vs. limit=10.0 +2024-09-19 15:50:32,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=696240.0, ans=0.5 +2024-09-19 15:50:56,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=696320.0, ans=0.125 +2024-09-19 15:51:09,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=696360.0, ans=0.1 +2024-09-19 15:51:23,876 INFO [train.py:1198] (0/2) Epoch 39, batch 2150, loss[loss=0.2329, ctc_loss=0.1177, cr_loss=0.3737, attn_decoder_loss=0.2374, over 29443.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1137, cr_loss=0.3557, attn_decoder_loss=0.2396, over 5814785.15 frames. ], batch size: 78, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:51:25,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=696400.0, ans=0.0 +2024-09-19 15:51:28,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=696400.0, ans=0.2 +2024-09-19 15:51:28,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=696400.0, ans=0.0 +2024-09-19 15:51:45,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=696440.0, ans=0.0 +2024-09-19 15:51:54,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=696480.0, ans=0.0 +2024-09-19 15:52:12,126 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.545e+01 9.048e+01 9.484e+01 1.799e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 15:52:17,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.48 vs. limit=22.5 +2024-09-19 15:52:21,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=696520.0, ans=0.0 +2024-09-19 15:52:24,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=696560.0, ans=0.125 +2024-09-19 15:52:39,526 INFO [train.py:1198] (0/2) Epoch 39, batch 2200, loss[loss=0.2364, ctc_loss=0.1139, cr_loss=0.3527, attn_decoder_loss=0.2422, over 29641.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.114, cr_loss=0.3558, attn_decoder_loss=0.2398, over 5810928.35 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:52:53,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=696640.0, ans=0.125 +2024-09-19 15:53:09,859 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:53:54,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=696800.0, ans=0.1 +2024-09-19 15:53:55,433 INFO [train.py:1198] (0/2) Epoch 39, batch 2250, loss[loss=0.2405, ctc_loss=0.1122, cr_loss=0.3498, attn_decoder_loss=0.247, over 29684.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1136, cr_loss=0.3549, attn_decoder_loss=0.2397, over 5810493.44 frames. ], batch size: 82, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:54:04,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=696800.0, ans=0.1 +2024-09-19 15:54:09,506 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:54:35,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=696880.0, ans=0.0 +2024-09-19 15:54:40,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=696880.0, ans=0.0 +2024-09-19 15:54:45,788 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.499e+01 9.039e+01 9.530e+01 1.426e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 15:55:04,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=696960.0, ans=0.125 +2024-09-19 15:55:15,221 INFO [train.py:1198] (0/2) Epoch 39, batch 2300, loss[loss=0.2061, ctc_loss=0.08873, cr_loss=0.2944, attn_decoder_loss=0.2126, over 29302.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1129, cr_loss=0.353, attn_decoder_loss=0.2386, over 5797936.49 frames. ], batch size: 71, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:55:16,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.50 vs. limit=6.0 +2024-09-19 15:55:51,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=697080.0, ans=0.0 +2024-09-19 15:55:54,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=697080.0, ans=0.0 +2024-09-19 15:55:55,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-19 15:56:30,109 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=12.0 +2024-09-19 15:56:30,684 INFO [train.py:1198] (0/2) Epoch 39, batch 2350, loss[loss=0.2368, ctc_loss=0.1157, cr_loss=0.3706, attn_decoder_loss=0.242, over 29686.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1134, cr_loss=0.3538, attn_decoder_loss=0.239, over 5804213.54 frames. ], batch size: 83, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:56:42,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.20 vs. limit=22.5 +2024-09-19 15:56:56,490 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:56:58,247 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.62 vs. limit=10.0 +2024-09-19 15:57:11,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=697280.0, ans=0.0 +2024-09-19 15:57:13,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=697280.0, ans=0.1 +2024-09-19 15:57:20,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.672e+01 9.121e+01 9.858e+01 6.738e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-19 15:57:45,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.34 vs. limit=22.5 +2024-09-19 15:57:45,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.60 vs. limit=15.0 +2024-09-19 15:57:46,027 INFO [train.py:1198] (0/2) Epoch 39, batch 2400, loss[loss=0.2221, ctc_loss=0.1017, cr_loss=0.3371, attn_decoder_loss=0.228, over 29527.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.3548, attn_decoder_loss=0.2395, over 5808492.50 frames. ], batch size: 76, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:57:49,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.72 vs. limit=15.0 +2024-09-19 15:58:03,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=697440.0, ans=0.0 +2024-09-19 15:58:14,316 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:58:20,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=697480.0, ans=0.1 +2024-09-19 15:58:46,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=697520.0, ans=0.025 +2024-09-19 15:58:52,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.17 vs. limit=6.0 +2024-09-19 15:59:06,350 INFO [train.py:1198] (0/2) Epoch 39, batch 2450, loss[loss=0.2442, ctc_loss=0.1228, cr_loss=0.3692, attn_decoder_loss=0.2495, over 29674.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1143, cr_loss=0.3551, attn_decoder_loss=0.2401, over 5785343.74 frames. ], batch size: 82, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 15:59:08,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=697600.0, ans=0.0 +2024-09-19 15:59:36,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=697680.0, ans=0.1 +2024-09-19 15:59:48,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=697680.0, ans=0.2 +2024-09-19 15:59:55,611 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.313e+01 8.647e+01 9.273e+01 9.890e+01 2.382e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-19 15:59:59,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.26 vs. limit=10.0 +2024-09-19 16:00:09,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=697760.0, ans=0.125 +2024-09-19 16:00:14,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=697760.0, ans=0.125 +2024-09-19 16:00:20,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=697800.0, ans=0.0 +2024-09-19 16:00:21,285 INFO [train.py:1198] (0/2) Epoch 39, batch 2500, loss[loss=0.2392, ctc_loss=0.1093, cr_loss=0.3547, attn_decoder_loss=0.2458, over 29611.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1148, cr_loss=0.3567, attn_decoder_loss=0.2404, over 5795486.30 frames. ], batch size: 86, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:00:21,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=697800.0, ans=0.0 +2024-09-19 16:00:32,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=697800.0, ans=0.0 +2024-09-19 16:00:53,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=697880.0, ans=0.0 +2024-09-19 16:01:08,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=697920.0, ans=0.0 +2024-09-19 16:01:20,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=697960.0, ans=0.07 +2024-09-19 16:01:37,124 INFO [train.py:1198] (0/2) Epoch 39, batch 2550, loss[loss=0.2085, ctc_loss=0.09364, cr_loss=0.305, attn_decoder_loss=0.2145, over 29340.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1142, cr_loss=0.3554, attn_decoder_loss=0.2399, over 5799297.67 frames. ], batch size: 67, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:01:42,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=698000.0, ans=0.0 +2024-09-19 16:01:46,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=698000.0, ans=0.05 +2024-09-19 16:01:56,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=698040.0, ans=0.025 +2024-09-19 16:02:12,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-19 16:02:20,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.88 vs. limit=12.0 +2024-09-19 16:02:28,817 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.383e+01 8.876e+01 9.415e+01 4.021e+02, threshold=1.775e+02, percent-clipped=1.0 +2024-09-19 16:02:36,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=698120.0, ans=0.0 +2024-09-19 16:02:42,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=698160.0, ans=0.0 +2024-09-19 16:02:56,979 INFO [train.py:1198] (0/2) Epoch 39, batch 2600, loss[loss=0.2322, ctc_loss=0.1105, cr_loss=0.3465, attn_decoder_loss=0.2381, over 29456.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1143, cr_loss=0.3561, attn_decoder_loss=0.2403, over 5795324.99 frames. ], batch size: 78, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:03:00,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=698200.0, ans=0.1 +2024-09-19 16:03:00,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=698200.0, ans=0.125 +2024-09-19 16:03:02,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.32 vs. limit=12.0 +2024-09-19 16:03:12,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=698240.0, ans=0.0 +2024-09-19 16:03:22,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=698240.0, ans=0.0 +2024-09-19 16:03:29,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.19 vs. limit=22.5 +2024-09-19 16:03:36,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=698280.0, ans=0.1 +2024-09-19 16:03:36,614 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:04:12,659 INFO [train.py:1198] (0/2) Epoch 39, batch 2650, loss[loss=0.252, ctc_loss=0.1284, cr_loss=0.3921, attn_decoder_loss=0.257, over 29242.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1143, cr_loss=0.356, attn_decoder_loss=0.2405, over 5801836.55 frames. ], batch size: 100, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:04:27,049 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.22 vs. limit=10.0 +2024-09-19 16:04:58,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=698520.0, ans=0.2 +2024-09-19 16:05:02,269 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.675e+01 8.983e+01 9.685e+01 2.002e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 16:05:02,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=698520.0, ans=0.0 +2024-09-19 16:05:02,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=698520.0, ans=0.025 +2024-09-19 16:05:04,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=698520.0, ans=0.0 +2024-09-19 16:05:11,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=698560.0, ans=0.1 +2024-09-19 16:05:11,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=698560.0, ans=0.0 +2024-09-19 16:05:11,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=698560.0, ans=0.5 +2024-09-19 16:05:14,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=698560.0, ans=0.125 +2024-09-19 16:05:17,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=698560.0, ans=0.1 +2024-09-19 16:05:23,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=698560.0, ans=0.0 +2024-09-19 16:05:27,840 INFO [train.py:1198] (0/2) Epoch 39, batch 2700, loss[loss=0.2297, ctc_loss=0.1093, cr_loss=0.3434, attn_decoder_loss=0.2355, over 29519.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1146, cr_loss=0.3568, attn_decoder_loss=0.2409, over 5797846.18 frames. ], batch size: 87, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:05:29,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=698600.0, ans=0.2 +2024-09-19 16:05:31,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.43 vs. limit=15.0 +2024-09-19 16:05:38,619 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:05:48,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=698640.0, ans=0.125 +2024-09-19 16:06:18,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=698720.0, ans=0.1 +2024-09-19 16:06:27,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=698720.0, ans=0.125 +2024-09-19 16:06:38,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=698760.0, ans=0.1 +2024-09-19 16:06:45,669 INFO [train.py:1198] (0/2) Epoch 39, batch 2750, loss[loss=0.2212, ctc_loss=0.103, cr_loss=0.3507, attn_decoder_loss=0.2266, over 29517.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1138, cr_loss=0.3546, attn_decoder_loss=0.2397, over 5796852.71 frames. ], batch size: 75, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:06:45,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=698800.0, ans=0.2 +2024-09-19 16:07:29,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=698880.0, ans=0.1 +2024-09-19 16:07:34,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=698920.0, ans=0.09899494936611666 +2024-09-19 16:07:38,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.635e+01 8.394e+01 9.092e+01 9.647e+01 2.225e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 16:07:46,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=698960.0, ans=0.07 +2024-09-19 16:08:03,250 INFO [train.py:1198] (0/2) Epoch 39, batch 2800, loss[loss=0.2488, ctc_loss=0.1341, cr_loss=0.3448, attn_decoder_loss=0.2539, over 19535.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.114, cr_loss=0.3545, attn_decoder_loss=0.2399, over 5776372.94 frames. ], batch size: 210, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:08:06,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=699000.0, ans=0.025 +2024-09-19 16:08:09,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=699000.0, ans=0.0 +2024-09-19 16:08:09,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.57 vs. limit=15.0 +2024-09-19 16:08:42,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=699080.0, ans=0.125 +2024-09-19 16:08:50,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=699120.0, ans=0.125 +2024-09-19 16:08:50,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=699120.0, ans=0.0 +2024-09-19 16:09:05,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=699160.0, ans=0.2 +2024-09-19 16:09:11,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=699160.0, ans=0.025 +2024-09-19 16:09:12,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=699160.0, ans=0.2 +2024-09-19 16:09:14,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=699160.0, ans=0.0 +2024-09-19 16:09:18,775 INFO [train.py:1198] (0/2) Epoch 39, batch 2850, loss[loss=0.2366, ctc_loss=0.1216, cr_loss=0.3662, attn_decoder_loss=0.2412, over 29510.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1144, cr_loss=0.3554, attn_decoder_loss=0.2404, over 5761953.36 frames. ], batch size: 77, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:09:33,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=699200.0, ans=0.1 +2024-09-19 16:09:45,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=699240.0, ans=0.125 +2024-09-19 16:10:12,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.94 vs. limit=6.0 +2024-09-19 16:10:13,572 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.429e+01 8.628e+01 9.119e+01 9.691e+01 3.191e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-19 16:10:36,328 INFO [train.py:1198] (0/2) Epoch 39, batch 2900, loss[loss=0.2327, ctc_loss=0.1151, cr_loss=0.3544, attn_decoder_loss=0.2379, over 29437.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.115, cr_loss=0.3567, attn_decoder_loss=0.2414, over 5787344.23 frames. ], batch size: 79, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:10:44,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=699400.0, ans=0.04949747468305833 +2024-09-19 16:10:59,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.88 vs. limit=15.0 +2024-09-19 16:11:18,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=699480.0, ans=0.025 +2024-09-19 16:11:30,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=699520.0, ans=0.1 +2024-09-19 16:11:41,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=699560.0, ans=0.125 +2024-09-19 16:11:53,780 INFO [train.py:1198] (0/2) Epoch 39, batch 2950, loss[loss=0.227, ctc_loss=0.112, cr_loss=0.3642, attn_decoder_loss=0.2317, over 29545.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1139, cr_loss=0.3547, attn_decoder_loss=0.24, over 5782862.58 frames. ], batch size: 75, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:11:54,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.10 vs. limit=22.5 +2024-09-19 16:12:03,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=699600.0, ans=0.0 +2024-09-19 16:12:08,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.33 vs. limit=12.0 +2024-09-19 16:12:09,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=699640.0, ans=0.125 +2024-09-19 16:12:17,563 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.53 vs. limit=15.0 +2024-09-19 16:12:21,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-19 16:12:22,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=699680.0, ans=0.1 +2024-09-19 16:12:30,998 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.74 vs. limit=15.0 +2024-09-19 16:12:32,793 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.56 vs. limit=22.5 +2024-09-19 16:12:41,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=699720.0, ans=0.0 +2024-09-19 16:12:41,120 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:12:43,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.11 vs. limit=15.0 +2024-09-19 16:12:45,557 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:12:46,051 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.38 vs. limit=10.0 +2024-09-19 16:12:46,699 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.658e+01 9.205e+01 9.936e+01 3.321e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-19 16:12:54,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=699760.0, ans=0.0 +2024-09-19 16:12:57,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=699760.0, ans=0.05 +2024-09-19 16:13:09,516 INFO [train.py:1198] (0/2) Epoch 39, batch 3000, loss[loss=0.2397, ctc_loss=0.114, cr_loss=0.3782, attn_decoder_loss=0.2452, over 29757.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1137, cr_loss=0.3548, attn_decoder_loss=0.24, over 5783556.34 frames. ], batch size: 81, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:13:09,517 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 16:13:28,815 INFO [train.py:1230] (0/2) Epoch 39, validation: loss=0.2123, ctc_loss=0.03671, cr_loss=6.289e-15, attn_decoder_loss=0.2318, over 944034.00 frames. +2024-09-19 16:13:28,816 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 16:13:29,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=699800.0, ans=0.0 +2024-09-19 16:13:48,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=699840.0, ans=0.0 +2024-09-19 16:13:49,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.61 vs. limit=15.0 +2024-09-19 16:13:54,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=699840.0, ans=0.0 +2024-09-19 16:13:56,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=699840.0, ans=0.125 +2024-09-19 16:14:01,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.53 vs. limit=15.0 +2024-09-19 16:14:02,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=699880.0, ans=0.0 +2024-09-19 16:14:40,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=699960.0, ans=0.025 +2024-09-19 16:14:46,919 INFO [train.py:1198] (0/2) Epoch 39, batch 3050, loss[loss=0.2253, ctc_loss=0.1086, cr_loss=0.3556, attn_decoder_loss=0.2303, over 29532.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1144, cr_loss=0.3562, attn_decoder_loss=0.2404, over 5776898.58 frames. ], batch size: 76, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:14:48,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=700000.0, ans=0.125 +2024-09-19 16:15:03,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=700040.0, ans=0.025 +2024-09-19 16:15:31,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.86 vs. limit=6.0 +2024-09-19 16:15:39,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.938e+01 8.454e+01 9.058e+01 9.630e+01 1.961e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 16:15:45,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=700160.0, ans=0.2 +2024-09-19 16:16:02,121 INFO [train.py:1198] (0/2) Epoch 39, batch 3100, loss[loss=0.2444, ctc_loss=0.1176, cr_loss=0.3627, attn_decoder_loss=0.2504, over 29300.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1141, cr_loss=0.3558, attn_decoder_loss=0.2399, over 5776698.02 frames. ], batch size: 100, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:16:08,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=700200.0, ans=0.025 +2024-09-19 16:16:08,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.73 vs. limit=15.0 +2024-09-19 16:16:20,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=12.0 +2024-09-19 16:16:21,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=700240.0, ans=0.125 +2024-09-19 16:16:29,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=700240.0, ans=0.125 +2024-09-19 16:16:54,209 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-19 16:16:58,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=700320.0, ans=0.1 +2024-09-19 16:17:19,625 INFO [train.py:1198] (0/2) Epoch 39, batch 3150, loss[loss=0.2567, ctc_loss=0.1351, cr_loss=0.3982, attn_decoder_loss=0.2614, over 28752.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.114, cr_loss=0.3553, attn_decoder_loss=0.24, over 5783160.27 frames. ], batch size: 104, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:17:39,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=700440.0, ans=0.2 +2024-09-19 16:17:44,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=700440.0, ans=0.125 +2024-09-19 16:18:11,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=700520.0, ans=0.1 +2024-09-19 16:18:12,218 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.334e+01 8.732e+01 9.135e+01 9.638e+01 1.512e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 16:18:32,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=700560.0, ans=0.0 +2024-09-19 16:18:33,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=700560.0, ans=0.5 +2024-09-19 16:18:36,823 INFO [train.py:1198] (0/2) Epoch 39, batch 3200, loss[loss=0.2322, ctc_loss=0.1069, cr_loss=0.3469, attn_decoder_loss=0.2384, over 29392.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1136, cr_loss=0.3544, attn_decoder_loss=0.2394, over 5792188.20 frames. ], batch size: 79, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:19:06,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=700680.0, ans=0.0 +2024-09-19 16:19:19,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=700680.0, ans=0.125 +2024-09-19 16:19:37,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.57 vs. limit=22.5 +2024-09-19 16:19:50,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=700760.0, ans=0.125 +2024-09-19 16:19:53,018 INFO [train.py:1198] (0/2) Epoch 39, batch 3250, loss[loss=0.2407, ctc_loss=0.1157, cr_loss=0.357, attn_decoder_loss=0.2467, over 29709.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.114, cr_loss=0.3551, attn_decoder_loss=0.24, over 5799344.29 frames. ], batch size: 84, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:19:56,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=700800.0, ans=0.125 +2024-09-19 16:20:02,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=700800.0, ans=0.025 +2024-09-19 16:20:15,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=700840.0, ans=0.0 +2024-09-19 16:20:15,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=700840.0, ans=0.2 +2024-09-19 16:20:21,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=700880.0, ans=0.125 +2024-09-19 16:20:42,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=700920.0, ans=0.2 +2024-09-19 16:20:46,553 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.633e+01 8.604e+01 9.197e+01 9.698e+01 1.830e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 16:21:09,879 INFO [train.py:1198] (0/2) Epoch 39, batch 3300, loss[loss=0.2449, ctc_loss=0.12, cr_loss=0.3559, attn_decoder_loss=0.2509, over 28477.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1131, cr_loss=0.353, attn_decoder_loss=0.2389, over 5795699.89 frames. ], batch size: 112, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:21:34,769 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.23 vs. limit=12.0 +2024-09-19 16:21:36,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.98 vs. limit=15.0 +2024-09-19 16:21:38,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=701080.0, ans=0.0 +2024-09-19 16:21:44,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=701080.0, ans=0.125 +2024-09-19 16:22:05,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=701120.0, ans=0.0 +2024-09-19 16:22:27,046 INFO [train.py:1198] (0/2) Epoch 39, batch 3350, loss[loss=0.2443, ctc_loss=0.1117, cr_loss=0.3412, attn_decoder_loss=0.2515, over 28887.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1137, cr_loss=0.3539, attn_decoder_loss=0.2396, over 5774165.53 frames. ], batch size: 104, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:22:42,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=701240.0, ans=0.0 +2024-09-19 16:23:08,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=701280.0, ans=0.2 +2024-09-19 16:23:21,142 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.630e+01 9.121e+01 9.700e+01 6.720e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 16:23:22,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=701320.0, ans=0.015 +2024-09-19 16:23:42,603 INFO [train.py:1198] (0/2) Epoch 39, batch 3400, loss[loss=0.2059, ctc_loss=0.09364, cr_loss=0.3127, attn_decoder_loss=0.2114, over 29345.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.3544, attn_decoder_loss=0.2394, over 5766307.54 frames. ], batch size: 67, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:23:53,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=701400.0, ans=0.1 +2024-09-19 16:23:56,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=701440.0, ans=0.125 +2024-09-19 16:24:14,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=701480.0, ans=0.1 +2024-09-19 16:24:53,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.04 vs. limit=12.0 +2024-09-19 16:24:55,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=701560.0, ans=0.125 +2024-09-19 16:25:00,312 INFO [train.py:1198] (0/2) Epoch 39, batch 3450, loss[loss=0.2433, ctc_loss=0.1156, cr_loss=0.3668, attn_decoder_loss=0.2494, over 28567.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1139, cr_loss=0.3543, attn_decoder_loss=0.2397, over 5774616.31 frames. ], batch size: 112, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:25:17,240 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:25:18,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=701640.0, ans=0.025 +2024-09-19 16:25:24,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=701640.0, ans=0.1 +2024-09-19 16:25:48,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=701720.0, ans=0.2 +2024-09-19 16:25:54,481 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.388e+01 8.636e+01 9.201e+01 9.668e+01 2.196e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 16:25:56,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=701720.0, ans=0.125 +2024-09-19 16:26:11,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=701760.0, ans=0.0 +2024-09-19 16:26:17,579 INFO [train.py:1198] (0/2) Epoch 39, batch 3500, loss[loss=0.2176, ctc_loss=0.1044, cr_loss=0.3262, attn_decoder_loss=0.2229, over 29737.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1136, cr_loss=0.3541, attn_decoder_loss=0.2391, over 5776677.68 frames. ], batch size: 72, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:26:23,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=701800.0, ans=0.2 +2024-09-19 16:26:34,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=701840.0, ans=0.125 +2024-09-19 16:27:08,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=701920.0, ans=0.0 +2024-09-19 16:27:14,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=701920.0, ans=0.125 +2024-09-19 16:27:31,753 INFO [train.py:1198] (0/2) Epoch 39, batch 3550, loss[loss=0.2539, ctc_loss=0.1316, cr_loss=0.3915, attn_decoder_loss=0.2588, over 29716.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5783508.33 frames. ], batch size: 89, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:27:33,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=702000.0, ans=0.0 +2024-09-19 16:27:42,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=702000.0, ans=0.0 +2024-09-19 16:27:42,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=702000.0, ans=0.0 +2024-09-19 16:28:08,889 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:28:24,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.449e+01 9.039e+01 9.569e+01 2.236e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 16:28:35,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=702160.0, ans=0.125 +2024-09-19 16:28:45,267 INFO [train.py:1198] (0/2) Epoch 39, batch 3600, loss[loss=0.2313, ctc_loss=0.1168, cr_loss=0.3494, attn_decoder_loss=0.2362, over 29514.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1134, cr_loss=0.3533, attn_decoder_loss=0.2393, over 5792342.23 frames. ], batch size: 77, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:29:16,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=702280.0, ans=0.125 +2024-09-19 16:29:47,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.32 vs. limit=12.0 +2024-09-19 16:29:55,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=702360.0, ans=0.05 +2024-09-19 16:29:57,354 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:30:01,926 INFO [train.py:1198] (0/2) Epoch 39, batch 3650, loss[loss=0.2533, ctc_loss=0.129, cr_loss=0.3976, attn_decoder_loss=0.2583, over 29508.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.113, cr_loss=0.3529, attn_decoder_loss=0.2387, over 5794526.90 frames. ], batch size: 90, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:30:18,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=702440.0, ans=0.2 +2024-09-19 16:30:48,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=702520.0, ans=0.2 +2024-09-19 16:30:55,337 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.559e+01 9.136e+01 9.465e+01 1.942e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 16:31:15,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=702600.0, ans=0.0 +2024-09-19 16:31:16,132 INFO [train.py:1198] (0/2) Epoch 39, batch 3700, loss[loss=0.2469, ctc_loss=0.1211, cr_loss=0.3557, attn_decoder_loss=0.253, over 29707.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1135, cr_loss=0.3541, attn_decoder_loss=0.2391, over 5804663.90 frames. ], batch size: 84, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:31:45,258 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.38 vs. limit=15.0 +2024-09-19 16:31:48,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.23 vs. limit=22.5 +2024-09-19 16:32:08,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-19 16:32:16,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=702760.0, ans=0.125 +2024-09-19 16:32:22,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=702760.0, ans=0.1 +2024-09-19 16:32:31,624 INFO [train.py:1198] (0/2) Epoch 39, batch 3750, loss[loss=0.2082, ctc_loss=0.09694, cr_loss=0.3104, attn_decoder_loss=0.2137, over 29322.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1133, cr_loss=0.3539, attn_decoder_loss=0.239, over 5808704.86 frames. ], batch size: 67, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:32:34,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=702800.0, ans=0.125 +2024-09-19 16:32:34,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=702800.0, ans=0.0 +2024-09-19 16:32:35,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.60 vs. limit=22.5 +2024-09-19 16:32:47,266 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.06 vs. limit=6.0 +2024-09-19 16:33:08,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=702880.0, ans=0.0 +2024-09-19 16:33:09,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.82 vs. limit=15.0 +2024-09-19 16:33:20,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=702920.0, ans=0.125 +2024-09-19 16:33:26,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.464e+01 8.930e+01 9.588e+01 2.704e+02, threshold=1.786e+02, percent-clipped=2.0 +2024-09-19 16:33:28,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=702920.0, ans=0.125 +2024-09-19 16:33:28,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=702920.0, ans=0.1 +2024-09-19 16:33:37,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=702960.0, ans=0.0 +2024-09-19 16:33:45,858 INFO [train.py:1198] (0/2) Epoch 39, batch 3800, loss[loss=0.2479, ctc_loss=0.124, cr_loss=0.3631, attn_decoder_loss=0.2536, over 29621.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.113, cr_loss=0.3527, attn_decoder_loss=0.2384, over 5798841.12 frames. ], batch size: 86, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:33:47,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=703000.0, ans=0.0 +2024-09-19 16:33:50,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.64 vs. limit=5.0 +2024-09-19 16:33:58,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.89 vs. limit=15.0 +2024-09-19 16:34:25,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=703080.0, ans=0.2 +2024-09-19 16:34:42,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=703120.0, ans=0.1 +2024-09-19 16:34:45,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=703160.0, ans=0.125 +2024-09-19 16:34:57,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=703160.0, ans=0.125 +2024-09-19 16:35:00,350 INFO [train.py:1198] (0/2) Epoch 39, batch 3850, loss[loss=0.2524, ctc_loss=0.1271, cr_loss=0.3889, attn_decoder_loss=0.2577, over 29214.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1126, cr_loss=0.3521, attn_decoder_loss=0.2383, over 5813585.64 frames. ], batch size: 100, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:35:00,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=703200.0, ans=0.125 +2024-09-19 16:35:26,243 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=15.0 +2024-09-19 16:35:49,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=703320.0, ans=0.04949747468305833 +2024-09-19 16:35:56,712 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.648e+01 9.079e+01 9.833e+01 2.007e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 16:36:14,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=703400.0, ans=0.0 +2024-09-19 16:36:15,986 INFO [train.py:1198] (0/2) Epoch 39, batch 3900, loss[loss=0.2484, ctc_loss=0.1186, cr_loss=0.3704, attn_decoder_loss=0.2546, over 29623.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1131, cr_loss=0.3538, attn_decoder_loss=0.239, over 5817611.17 frames. ], batch size: 86, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:36:40,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.03 vs. limit=15.0 +2024-09-19 16:36:47,744 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.46 vs. limit=6.0 +2024-09-19 16:36:51,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=703480.0, ans=0.125 +2024-09-19 16:37:06,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=703520.0, ans=0.0 +2024-09-19 16:37:29,853 INFO [train.py:1198] (0/2) Epoch 39, batch 3950, loss[loss=0.2589, ctc_loss=0.1213, cr_loss=0.3857, attn_decoder_loss=0.2656, over 29485.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1129, cr_loss=0.3532, attn_decoder_loss=0.2391, over 5836612.93 frames. ], batch size: 97, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:37:55,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=703640.0, ans=0.125 +2024-09-19 16:38:11,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=703680.0, ans=0.125 +2024-09-19 16:38:12,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=703680.0, ans=0.2 +2024-09-19 16:38:16,435 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.75 vs. limit=15.0 +2024-09-19 16:38:24,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=703720.0, ans=0.2 +2024-09-19 16:38:25,891 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.999e+01 8.633e+01 9.078e+01 9.598e+01 1.411e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 16:38:33,519 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=703760.0, ans=0.0 +2024-09-19 16:38:42,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=703760.0, ans=0.0 +2024-09-19 16:38:44,547 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.03 vs. limit=22.5 +2024-09-19 16:38:44,854 INFO [train.py:1198] (0/2) Epoch 39, batch 4000, loss[loss=0.2151, ctc_loss=0.0952, cr_loss=0.3197, attn_decoder_loss=0.2213, over 29499.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1129, cr_loss=0.3529, attn_decoder_loss=0.2389, over 5813254.49 frames. ], batch size: 74, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:38:51,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.03 vs. limit=10.0 +2024-09-19 16:39:01,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.47 vs. limit=15.0 +2024-09-19 16:39:04,780 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.32 vs. limit=15.0 +2024-09-19 16:39:13,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.89 vs. limit=15.0 +2024-09-19 16:39:29,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=703920.0, ans=0.0 +2024-09-19 16:39:30,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=703920.0, ans=0.09899494936611666 +2024-09-19 16:39:39,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=703920.0, ans=0.0 +2024-09-19 16:39:55,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=703960.0, ans=0.125 +2024-09-19 16:39:57,592 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-176000.pt +2024-09-19 16:40:06,228 INFO [train.py:1198] (0/2) Epoch 39, batch 4050, loss[loss=0.2542, ctc_loss=0.1412, cr_loss=0.3726, attn_decoder_loss=0.2585, over 20018.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1131, cr_loss=0.353, attn_decoder_loss=0.2388, over 5796775.96 frames. ], batch size: 209, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:40:15,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=704000.0, ans=0.125 +2024-09-19 16:40:33,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=704040.0, ans=0.125 +2024-09-19 16:40:47,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=704080.0, ans=0.2 +2024-09-19 16:41:01,440 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.594e+01 8.633e+01 9.112e+01 9.845e+01 1.931e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 16:41:04,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=704160.0, ans=0.1 +2024-09-19 16:41:18,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.75 vs. limit=15.0 +2024-09-19 16:41:20,591 INFO [train.py:1198] (0/2) Epoch 39, batch 4100, loss[loss=0.2395, ctc_loss=0.1125, cr_loss=0.3529, attn_decoder_loss=0.2458, over 29492.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1134, cr_loss=0.354, attn_decoder_loss=0.2394, over 5793187.42 frames. ], batch size: 90, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:41:28,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=704200.0, ans=0.125 +2024-09-19 16:41:32,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=704200.0, ans=0.2 +2024-09-19 16:41:38,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=704240.0, ans=0.025 +2024-09-19 16:41:53,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=704280.0, ans=0.125 +2024-09-19 16:42:13,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=704320.0, ans=0.125 +2024-09-19 16:42:15,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=704320.0, ans=0.125 +2024-09-19 16:42:16,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=704320.0, ans=0.1 +2024-09-19 16:42:35,209 INFO [train.py:1198] (0/2) Epoch 39, batch 4150, loss[loss=0.232, ctc_loss=0.1208, cr_loss=0.3815, attn_decoder_loss=0.2359, over 29490.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3539, attn_decoder_loss=0.2391, over 5798153.41 frames. ], batch size: 77, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:42:37,115 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:42:38,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=704400.0, ans=0.125 +2024-09-19 16:42:47,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=704400.0, ans=0.07 +2024-09-19 16:42:50,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=704440.0, ans=0.0 +2024-09-19 16:42:50,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=704440.0, ans=0.1 +2024-09-19 16:43:03,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=704480.0, ans=0.0 +2024-09-19 16:43:22,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=704520.0, ans=0.125 +2024-09-19 16:43:29,362 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.507e+01 9.056e+01 9.500e+01 2.477e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 16:43:43,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.69 vs. limit=15.0 +2024-09-19 16:43:48,514 INFO [train.py:1198] (0/2) Epoch 39, batch 4200, loss[loss=0.2454, ctc_loss=0.1203, cr_loss=0.3706, attn_decoder_loss=0.2511, over 29526.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1135, cr_loss=0.3546, attn_decoder_loss=0.2393, over 5800715.15 frames. ], batch size: 90, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:43:50,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.91 vs. limit=15.0 +2024-09-19 16:43:54,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=704600.0, ans=0.0 +2024-09-19 16:44:06,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=704640.0, ans=0.0 +2024-09-19 16:44:06,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=704640.0, ans=0.04949747468305833 +2024-09-19 16:44:13,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=704640.0, ans=0.0 +2024-09-19 16:44:38,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=704720.0, ans=0.125 +2024-09-19 16:45:03,389 INFO [train.py:1198] (0/2) Epoch 39, batch 4250, loss[loss=0.2154, ctc_loss=0.1006, cr_loss=0.3256, attn_decoder_loss=0.2209, over 29522.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1129, cr_loss=0.3535, attn_decoder_loss=0.2394, over 5806071.03 frames. ], batch size: 74, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:45:10,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=704800.0, ans=0.2 +2024-09-19 16:45:21,103 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=704840.0, ans=0.125 +2024-09-19 16:45:23,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.54 vs. limit=15.0 +2024-09-19 16:45:30,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.02 vs. limit=10.0 +2024-09-19 16:45:35,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=704880.0, ans=0.1 +2024-09-19 16:45:44,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.75 vs. limit=12.0 +2024-09-19 16:45:46,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=704920.0, ans=0.125 +2024-09-19 16:45:57,876 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.522e+01 9.039e+01 9.490e+01 2.336e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 16:45:58,687 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.00 vs. limit=6.0 +2024-09-19 16:46:17,663 INFO [train.py:1198] (0/2) Epoch 39, batch 4300, loss[loss=0.2491, ctc_loss=0.1201, cr_loss=0.3713, attn_decoder_loss=0.2552, over 29506.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1129, cr_loss=0.3531, attn_decoder_loss=0.2394, over 5794970.55 frames. ], batch size: 87, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:46:36,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=705040.0, ans=0.125 +2024-09-19 16:46:46,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=705080.0, ans=0.5 +2024-09-19 16:46:56,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.34 vs. limit=15.0 +2024-09-19 16:47:14,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=705120.0, ans=0.125 +2024-09-19 16:47:20,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=705160.0, ans=0.2 +2024-09-19 16:47:24,749 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.63 vs. limit=22.5 +2024-09-19 16:47:32,381 INFO [train.py:1198] (0/2) Epoch 39, batch 4350, loss[loss=0.2506, ctc_loss=0.1231, cr_loss=0.3978, attn_decoder_loss=0.2559, over 29489.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1157, cr_loss=0.3596, attn_decoder_loss=0.2427, over 5797094.98 frames. ], batch size: 97, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:48:13,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.62 vs. limit=15.0 +2024-09-19 16:48:27,651 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.048e+01 8.862e+01 9.354e+01 9.777e+01 1.379e+02, threshold=1.871e+02, percent-clipped=0.0 +2024-09-19 16:48:29,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=705360.0, ans=0.0 +2024-09-19 16:48:45,016 INFO [train.py:1198] (0/2) Epoch 39, batch 4400, loss[loss=0.2471, ctc_loss=0.1314, cr_loss=0.3913, attn_decoder_loss=0.2513, over 27543.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1165, cr_loss=0.3611, attn_decoder_loss=0.2445, over 5768802.71 frames. ], batch size: 125, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:48:47,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.57 vs. limit=10.0 +2024-09-19 16:49:00,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=705440.0, ans=0.125 +2024-09-19 16:49:35,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=705520.0, ans=0.025 +2024-09-19 16:50:00,208 INFO [train.py:1198] (0/2) Epoch 39, batch 4450, loss[loss=0.2529, ctc_loss=0.1392, cr_loss=0.3908, attn_decoder_loss=0.2568, over 20508.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1199, cr_loss=0.3661, attn_decoder_loss=0.2465, over 5574189.99 frames. ], batch size: 209, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:50:11,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=705600.0, ans=0.125 +2024-09-19 16:50:19,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.53 vs. limit=15.0 +2024-09-19 16:50:21,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=705640.0, ans=0.125 +2024-09-19 16:50:33,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.47 vs. limit=15.0 +2024-09-19 16:50:35,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.04 vs. limit=15.0 +2024-09-19 16:50:39,699 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:50:41,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=705680.0, ans=0.0 +2024-09-19 16:50:42,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=705680.0, ans=0.125 +2024-09-19 16:50:48,994 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.07 vs. limit=10.0 +2024-09-19 16:50:52,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=705720.0, ans=0.035 +2024-09-19 16:50:57,516 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:50:58,884 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.301e+01 9.418e+01 1.029e+02 1.185e+02 3.823e+02, threshold=2.058e+02, percent-clipped=1.0 +2024-09-19 16:50:59,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=705760.0, ans=0.1 +2024-09-19 16:51:03,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=705760.0, ans=0.025 +2024-09-19 16:51:07,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=15.0 +2024-09-19 16:51:15,114 INFO [train.py:1198] (0/2) Epoch 39, batch 4500, loss[loss=0.2514, ctc_loss=0.1363, cr_loss=0.3667, attn_decoder_loss=0.256, over 20140.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1226, cr_loss=0.368, attn_decoder_loss=0.2481, over 5233209.78 frames. ], batch size: 209, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:51:16,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=705800.0, ans=0.0 +2024-09-19 16:51:27,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=705800.0, ans=0.2 +2024-09-19 16:51:45,461 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:51:52,033 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-39.pt +2024-09-19 16:52:42,174 INFO [train.py:1198] (0/2) Epoch 40, batch 0, loss[loss=0.2138, ctc_loss=0.1006, cr_loss=0.3157, attn_decoder_loss=0.2194, over 29615.00 frames. ], tot_loss[loss=0.2138, ctc_loss=0.1006, cr_loss=0.3157, attn_decoder_loss=0.2194, over 29615.00 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 16:52:42,175 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 16:53:00,473 INFO [train.py:1230] (0/2) Epoch 40, validation: loss=0.2128, ctc_loss=0.03605, cr_loss=6.84e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-19 16:53:00,473 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 16:53:20,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=705940.0, ans=0.1 +2024-09-19 16:53:35,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=705980.0, ans=0.125 +2024-09-19 16:54:03,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-19 16:54:06,005 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.82 vs. limit=22.5 +2024-09-19 16:54:13,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=706060.0, ans=0.025 +2024-09-19 16:54:17,753 INFO [train.py:1198] (0/2) Epoch 40, batch 50, loss[loss=0.2054, ctc_loss=0.08898, cr_loss=0.2879, attn_decoder_loss=0.212, over 29406.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1152, cr_loss=0.3579, attn_decoder_loss=0.2401, over 1267750.84 frames. ], batch size: 70, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:54:42,488 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.552e+01 8.881e+01 9.876e+01 1.118e+02 1.337e+02, threshold=1.975e+02, percent-clipped=0.0 +2024-09-19 16:54:47,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=706140.0, ans=0.125 +2024-09-19 16:54:51,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=706180.0, ans=0.125 +2024-09-19 16:54:54,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=706180.0, ans=0.125 +2024-09-19 16:54:56,686 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:55:16,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=706220.0, ans=0.2 +2024-09-19 16:55:25,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.35 vs. limit=15.0 +2024-09-19 16:55:27,257 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-19 16:55:35,517 INFO [train.py:1198] (0/2) Epoch 40, batch 100, loss[loss=0.2254, ctc_loss=0.1069, cr_loss=0.3525, attn_decoder_loss=0.2308, over 29525.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1166, cr_loss=0.3615, attn_decoder_loss=0.2425, over 2251992.36 frames. ], batch size: 76, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:55:37,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=706300.0, ans=0.0 +2024-09-19 16:55:48,182 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.31 vs. limit=22.5 +2024-09-19 16:55:52,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.73 vs. limit=15.0 +2024-09-19 16:56:01,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=706340.0, ans=0.0 +2024-09-19 16:56:07,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=706380.0, ans=0.125 +2024-09-19 16:56:10,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.49 vs. limit=15.0 +2024-09-19 16:56:28,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=706420.0, ans=0.0 +2024-09-19 16:56:34,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=706460.0, ans=0.1 +2024-09-19 16:56:50,196 INFO [train.py:1198] (0/2) Epoch 40, batch 150, loss[loss=0.2097, ctc_loss=0.08963, cr_loss=0.3019, attn_decoder_loss=0.2163, over 29402.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1147, cr_loss=0.3563, attn_decoder_loss=0.2407, over 3047538.29 frames. ], batch size: 70, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:57:04,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=706540.0, ans=0.125 +2024-09-19 16:57:04,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=706540.0, ans=0.125 +2024-09-19 16:57:05,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=706540.0, ans=0.0 +2024-09-19 16:57:10,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.77 vs. limit=15.0 +2024-09-19 16:57:12,851 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.727e+01 9.012e+01 9.533e+01 1.739e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 16:57:19,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=706580.0, ans=0.0 +2024-09-19 16:57:22,449 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.70 vs. limit=22.5 +2024-09-19 16:57:23,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=706580.0, ans=0.125 +2024-09-19 16:57:24,414 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.66 vs. limit=15.0 +2024-09-19 16:57:41,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=706620.0, ans=0.125 +2024-09-19 16:57:54,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-19 16:58:05,140 INFO [train.py:1198] (0/2) Epoch 40, batch 200, loss[loss=0.253, ctc_loss=0.1314, cr_loss=0.4206, attn_decoder_loss=0.2572, over 27376.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1141, cr_loss=0.3552, attn_decoder_loss=0.2399, over 3659524.53 frames. ], batch size: 124, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:59:13,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=706860.0, ans=0.125 +2024-09-19 16:59:19,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=706860.0, ans=0.0 +2024-09-19 16:59:25,392 INFO [train.py:1198] (0/2) Epoch 40, batch 250, loss[loss=0.2397, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2465, over 29217.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1139, cr_loss=0.3559, attn_decoder_loss=0.24, over 4142219.56 frames. ], batch size: 100, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:59:28,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=706900.0, ans=0.0 +2024-09-19 16:59:30,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=706900.0, ans=0.125 +2024-09-19 16:59:42,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=706940.0, ans=0.1 +2024-09-19 16:59:47,893 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.510e+01 9.023e+01 9.427e+01 1.559e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 17:00:40,602 INFO [train.py:1198] (0/2) Epoch 40, batch 300, loss[loss=0.2505, ctc_loss=0.1272, cr_loss=0.3976, attn_decoder_loss=0.2554, over 29516.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1134, cr_loss=0.3545, attn_decoder_loss=0.2393, over 4511658.97 frames. ], batch size: 92, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:00:44,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.56 vs. limit=15.0 +2024-09-19 17:00:51,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=707100.0, ans=10.0 +2024-09-19 17:00:56,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=707140.0, ans=0.0 +2024-09-19 17:01:14,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=707180.0, ans=0.125 +2024-09-19 17:01:17,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=707180.0, ans=0.1 +2024-09-19 17:01:23,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=707180.0, ans=0.0 +2024-09-19 17:01:37,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=707220.0, ans=0.025 +2024-09-19 17:01:37,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=707220.0, ans=0.125 +2024-09-19 17:01:39,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=707220.0, ans=0.1 +2024-09-19 17:01:46,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=707260.0, ans=0.0 +2024-09-19 17:01:46,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=707260.0, ans=0.2 +2024-09-19 17:01:56,848 INFO [train.py:1198] (0/2) Epoch 40, batch 350, loss[loss=0.2084, ctc_loss=0.09492, cr_loss=0.3157, attn_decoder_loss=0.2139, over 29315.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1135, cr_loss=0.3548, attn_decoder_loss=0.2396, over 4795897.10 frames. ], batch size: 71, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:01:57,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff2.min_abs, batch_count=707300.0, ans=0.1 +2024-09-19 17:02:14,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=707340.0, ans=0.0 +2024-09-19 17:02:21,767 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.315e+01 8.445e+01 8.881e+01 9.307e+01 1.282e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 17:02:37,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=707380.0, ans=0.2 +2024-09-19 17:02:44,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=707420.0, ans=0.0 +2024-09-19 17:02:49,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=707420.0, ans=0.0 +2024-09-19 17:02:55,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=707420.0, ans=0.125 +2024-09-19 17:03:02,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=707460.0, ans=0.125 +2024-09-19 17:03:14,413 INFO [train.py:1198] (0/2) Epoch 40, batch 400, loss[loss=0.2332, ctc_loss=0.1101, cr_loss=0.3408, attn_decoder_loss=0.2393, over 29690.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1131, cr_loss=0.3536, attn_decoder_loss=0.2391, over 5025011.80 frames. ], batch size: 82, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:03:47,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=707580.0, ans=0.025 +2024-09-19 17:03:57,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=707580.0, ans=0.125 +2024-09-19 17:03:57,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=707580.0, ans=0.1 +2024-09-19 17:04:04,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.41 vs. limit=15.0 +2024-09-19 17:04:27,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=707660.0, ans=0.125 +2024-09-19 17:04:30,151 INFO [train.py:1198] (0/2) Epoch 40, batch 450, loss[loss=0.2366, ctc_loss=0.1117, cr_loss=0.3591, attn_decoder_loss=0.2425, over 29679.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1131, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5187797.34 frames. ], batch size: 83, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:04:30,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=707700.0, ans=0.125 +2024-09-19 17:04:52,854 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.522e+01 8.945e+01 9.353e+01 2.975e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-19 17:05:41,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=707860.0, ans=0.125 +2024-09-19 17:05:41,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.37 vs. limit=15.0 +2024-09-19 17:05:43,060 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=707860.0, ans=0.125 +2024-09-19 17:05:45,818 INFO [train.py:1198] (0/2) Epoch 40, batch 500, loss[loss=0.247, ctc_loss=0.1186, cr_loss=0.3741, attn_decoder_loss=0.2529, over 29434.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1124, cr_loss=0.3524, attn_decoder_loss=0.2383, over 5329389.88 frames. ], batch size: 94, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:05:50,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=707900.0, ans=0.025 +2024-09-19 17:05:53,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=707900.0, ans=0.2 +2024-09-19 17:06:01,428 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=707940.0, ans=0.125 +2024-09-19 17:06:14,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=707940.0, ans=0.125 +2024-09-19 17:06:22,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=707980.0, ans=0.1 +2024-09-19 17:06:37,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=708020.0, ans=0.125 +2024-09-19 17:06:48,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=708020.0, ans=0.125 +2024-09-19 17:07:00,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.82 vs. limit=22.5 +2024-09-19 17:07:06,553 INFO [train.py:1198] (0/2) Epoch 40, batch 550, loss[loss=0.2473, ctc_loss=0.1214, cr_loss=0.366, attn_decoder_loss=0.2531, over 28898.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1122, cr_loss=0.3515, attn_decoder_loss=0.2384, over 5423755.80 frames. ], batch size: 104, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:07:24,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.26 vs. limit=15.0 +2024-09-19 17:07:26,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=708140.0, ans=0.125 +2024-09-19 17:07:30,881 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.557e+01 8.930e+01 9.623e+01 2.134e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 17:07:53,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=708220.0, ans=0.0 +2024-09-19 17:07:59,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=708220.0, ans=0.125 +2024-09-19 17:08:08,264 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.62 vs. limit=15.0 +2024-09-19 17:08:15,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=708260.0, ans=0.09899494936611666 +2024-09-19 17:08:16,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=708260.0, ans=0.0 +2024-09-19 17:08:21,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.27 vs. limit=15.0 +2024-09-19 17:08:22,277 INFO [train.py:1198] (0/2) Epoch 40, batch 600, loss[loss=0.2453, ctc_loss=0.1204, cr_loss=0.3625, attn_decoder_loss=0.2511, over 29292.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1123, cr_loss=0.3519, attn_decoder_loss=0.2388, over 5509728.67 frames. ], batch size: 100, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:08:54,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=708380.0, ans=0.125 +2024-09-19 17:08:58,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=708380.0, ans=0.125 +2024-09-19 17:09:13,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=708420.0, ans=0.125 +2024-09-19 17:09:25,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=708460.0, ans=0.125 +2024-09-19 17:09:37,338 INFO [train.py:1198] (0/2) Epoch 40, batch 650, loss[loss=0.2383, ctc_loss=0.1206, cr_loss=0.3761, attn_decoder_loss=0.243, over 29742.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.112, cr_loss=0.3515, attn_decoder_loss=0.2381, over 5587030.03 frames. ], batch size: 81, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:10:03,863 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.123e+01 8.522e+01 8.894e+01 9.367e+01 2.518e+02, threshold=1.779e+02, percent-clipped=2.0 +2024-09-19 17:10:07,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=708540.0, ans=0.1 +2024-09-19 17:10:11,702 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:10:12,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.26 vs. limit=12.0 +2024-09-19 17:10:43,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=708660.0, ans=0.2 +2024-09-19 17:10:57,360 INFO [train.py:1198] (0/2) Epoch 40, batch 700, loss[loss=0.2353, ctc_loss=0.1185, cr_loss=0.3802, attn_decoder_loss=0.2398, over 29533.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1123, cr_loss=0.3523, attn_decoder_loss=0.2387, over 5636691.97 frames. ], batch size: 76, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:11:00,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=708700.0, ans=0.0 +2024-09-19 17:11:11,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=708740.0, ans=0.125 +2024-09-19 17:11:34,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=708780.0, ans=0.025 +2024-09-19 17:11:49,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=708820.0, ans=0.025 +2024-09-19 17:12:06,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=708860.0, ans=0.1 +2024-09-19 17:12:13,437 INFO [train.py:1198] (0/2) Epoch 40, batch 750, loss[loss=0.2336, ctc_loss=0.1096, cr_loss=0.3343, attn_decoder_loss=0.24, over 29721.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1124, cr_loss=0.3527, attn_decoder_loss=0.2387, over 5675988.10 frames. ], batch size: 82, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:12:13,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=708900.0, ans=0.125 +2024-09-19 17:12:18,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=708900.0, ans=0.1 +2024-09-19 17:12:20,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.14 vs. limit=10.0 +2024-09-19 17:12:25,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=708900.0, ans=0.5 +2024-09-19 17:12:37,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.374e+01 9.046e+01 9.655e+01 1.904e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 17:12:46,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=708980.0, ans=0.0 +2024-09-19 17:12:56,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-19 17:12:57,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=709020.0, ans=0.0 +2024-09-19 17:13:20,081 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=709060.0, ans=0.0 +2024-09-19 17:13:28,941 INFO [train.py:1198] (0/2) Epoch 40, batch 800, loss[loss=0.2185, ctc_loss=0.1008, cr_loss=0.3129, attn_decoder_loss=0.2246, over 29593.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1126, cr_loss=0.3531, attn_decoder_loss=0.2388, over 5706526.95 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:13:29,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=709100.0, ans=0.1 +2024-09-19 17:13:32,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=709100.0, ans=0.125 +2024-09-19 17:13:41,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=709100.0, ans=0.0 +2024-09-19 17:13:41,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=709100.0, ans=0.125 +2024-09-19 17:13:59,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.22 vs. limit=22.5 +2024-09-19 17:14:03,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=709180.0, ans=0.0 +2024-09-19 17:14:04,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=709180.0, ans=0.1 +2024-09-19 17:14:28,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=709220.0, ans=0.125 +2024-09-19 17:14:28,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=709220.0, ans=0.0 +2024-09-19 17:14:32,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=709260.0, ans=0.0 +2024-09-19 17:14:34,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=709260.0, ans=0.125 +2024-09-19 17:14:47,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=709300.0, ans=0.125 +2024-09-19 17:14:48,727 INFO [train.py:1198] (0/2) Epoch 40, batch 850, loss[loss=0.2382, ctc_loss=0.111, cr_loss=0.3641, attn_decoder_loss=0.2442, over 29719.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1124, cr_loss=0.3527, attn_decoder_loss=0.2384, over 5736360.52 frames. ], batch size: 89, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:14:57,034 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.16 vs. limit=22.5 +2024-09-19 17:14:59,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=709300.0, ans=0.125 +2024-09-19 17:15:00,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=709300.0, ans=0.2 +2024-09-19 17:15:12,635 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.469e+01 8.929e+01 9.566e+01 2.198e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 17:16:03,933 INFO [train.py:1198] (0/2) Epoch 40, batch 900, loss[loss=0.2154, ctc_loss=0.09639, cr_loss=0.3132, attn_decoder_loss=0.2216, over 29607.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.113, cr_loss=0.3537, attn_decoder_loss=0.239, over 5741201.72 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:16:10,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=709500.0, ans=15.0 +2024-09-19 17:17:00,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=709620.0, ans=0.95 +2024-09-19 17:17:16,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=709660.0, ans=0.125 +2024-09-19 17:17:19,239 INFO [train.py:1198] (0/2) Epoch 40, batch 950, loss[loss=0.2108, ctc_loss=0.08825, cr_loss=0.2947, attn_decoder_loss=0.2179, over 29531.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1131, cr_loss=0.3537, attn_decoder_loss=0.239, over 5744431.46 frames. ], batch size: 74, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:17:34,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=709740.0, ans=0.1 +2024-09-19 17:17:45,404 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.548e+01 9.083e+01 9.830e+01 2.215e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 17:17:50,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=709780.0, ans=0.0 +2024-09-19 17:17:56,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=709780.0, ans=0.1 +2024-09-19 17:18:22,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=709860.0, ans=0.0 +2024-09-19 17:18:36,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=709860.0, ans=0.0 +2024-09-19 17:18:39,002 INFO [train.py:1198] (0/2) Epoch 40, batch 1000, loss[loss=0.2263, ctc_loss=0.1011, cr_loss=0.3162, attn_decoder_loss=0.2332, over 29502.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1134, cr_loss=0.3549, attn_decoder_loss=0.2395, over 5737489.44 frames. ], batch size: 77, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:18:52,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=709940.0, ans=0.125 +2024-09-19 17:19:09,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=709980.0, ans=0.0 +2024-09-19 17:19:11,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.39 vs. limit=15.0 +2024-09-19 17:19:33,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=710020.0, ans=0.125 +2024-09-19 17:19:51,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=710060.0, ans=0.2 +2024-09-19 17:19:52,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=710100.0, ans=0.0 +2024-09-19 17:19:54,081 INFO [train.py:1198] (0/2) Epoch 40, batch 1050, loss[loss=0.2432, ctc_loss=0.1133, cr_loss=0.3584, attn_decoder_loss=0.2496, over 29684.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.113, cr_loss=0.354, attn_decoder_loss=0.2386, over 5745774.36 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:20:12,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=710140.0, ans=0.125 +2024-09-19 17:20:15,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=710140.0, ans=0.125 +2024-09-19 17:20:15,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=710140.0, ans=0.0 +2024-09-19 17:20:20,065 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.590e+01 9.048e+01 9.519e+01 1.628e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 17:20:40,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=710220.0, ans=0.07 +2024-09-19 17:20:54,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.22 vs. limit=15.0 +2024-09-19 17:20:56,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=710260.0, ans=0.0 +2024-09-19 17:21:09,816 INFO [train.py:1198] (0/2) Epoch 40, batch 1100, loss[loss=0.2298, ctc_loss=0.1084, cr_loss=0.3415, attn_decoder_loss=0.2357, over 29461.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1122, cr_loss=0.3522, attn_decoder_loss=0.2381, over 5758056.75 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:21:19,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=710300.0, ans=0.025 +2024-09-19 17:21:20,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=710300.0, ans=0.0 +2024-09-19 17:21:26,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=710340.0, ans=0.125 +2024-09-19 17:21:39,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=710340.0, ans=0.125 +2024-09-19 17:21:58,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=710420.0, ans=0.125 +2024-09-19 17:22:20,117 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.71 vs. limit=10.0 +2024-09-19 17:22:22,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=710460.0, ans=0.125 +2024-09-19 17:22:29,989 INFO [train.py:1198] (0/2) Epoch 40, batch 1150, loss[loss=0.2273, ctc_loss=0.1053, cr_loss=0.3412, attn_decoder_loss=0.2333, over 29466.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1125, cr_loss=0.3528, attn_decoder_loss=0.2383, over 5755516.35 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:22:36,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=710500.0, ans=0.0 +2024-09-19 17:22:36,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.79 vs. limit=12.0 +2024-09-19 17:22:38,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=710500.0, ans=0.125 +2024-09-19 17:22:42,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=710500.0, ans=0.125 +2024-09-19 17:22:48,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=710540.0, ans=0.07 +2024-09-19 17:22:55,728 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.177e+01 8.424e+01 8.898e+01 9.617e+01 1.555e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 17:23:25,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=710620.0, ans=0.1 +2024-09-19 17:23:45,227 INFO [train.py:1198] (0/2) Epoch 40, batch 1200, loss[loss=0.2434, ctc_loss=0.1131, cr_loss=0.3464, attn_decoder_loss=0.2502, over 29674.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1126, cr_loss=0.3525, attn_decoder_loss=0.2388, over 5748925.05 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:24:11,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.87 vs. limit=15.0 +2024-09-19 17:24:25,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=710780.0, ans=15.0 +2024-09-19 17:24:29,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=710820.0, ans=0.0 +2024-09-19 17:24:30,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.82 vs. limit=15.0 +2024-09-19 17:24:38,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=710820.0, ans=0.05 +2024-09-19 17:24:44,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=710860.0, ans=0.2 +2024-09-19 17:25:01,056 INFO [train.py:1198] (0/2) Epoch 40, batch 1250, loss[loss=0.2497, ctc_loss=0.1199, cr_loss=0.3723, attn_decoder_loss=0.2559, over 29556.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1132, cr_loss=0.354, attn_decoder_loss=0.2397, over 5776061.68 frames. ], batch size: 92, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:25:07,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=710900.0, ans=0.125 +2024-09-19 17:25:12,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=710900.0, ans=0.125 +2024-09-19 17:25:21,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=710940.0, ans=0.125 +2024-09-19 17:25:26,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=710940.0, ans=0.1 +2024-09-19 17:25:29,036 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.896e+01 8.708e+01 9.133e+01 9.581e+01 1.854e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 17:26:19,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.25 vs. limit=15.0 +2024-09-19 17:26:21,599 INFO [train.py:1198] (0/2) Epoch 40, batch 1300, loss[loss=0.2388, ctc_loss=0.1077, cr_loss=0.3405, attn_decoder_loss=0.2458, over 28207.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1128, cr_loss=0.353, attn_decoder_loss=0.239, over 5780561.15 frames. ], batch size: 111, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:26:21,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=711100.0, ans=0.125 +2024-09-19 17:27:05,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=711180.0, ans=0.2 +2024-09-19 17:27:08,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=711220.0, ans=0.2 +2024-09-19 17:27:08,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=711220.0, ans=0.2 +2024-09-19 17:27:13,534 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=15.0 +2024-09-19 17:27:38,126 INFO [train.py:1198] (0/2) Epoch 40, batch 1350, loss[loss=0.2344, ctc_loss=0.1144, cr_loss=0.3441, attn_decoder_loss=0.24, over 29755.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1124, cr_loss=0.3524, attn_decoder_loss=0.2387, over 5797453.13 frames. ], batch size: 81, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:27:39,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.39 vs. limit=22.5 +2024-09-19 17:28:03,656 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.275e+01 9.002e+01 9.355e+01 2.084e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 17:28:07,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=711380.0, ans=0.025 +2024-09-19 17:28:11,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=711380.0, ans=0.0 +2024-09-19 17:28:25,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=711420.0, ans=0.125 +2024-09-19 17:28:25,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.13 vs. limit=15.0 +2024-09-19 17:28:28,724 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-19 17:28:42,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=711460.0, ans=0.125 +2024-09-19 17:28:45,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=711460.0, ans=0.125 +2024-09-19 17:28:52,057 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=711500.0, ans=0.0 +2024-09-19 17:28:53,204 INFO [train.py:1198] (0/2) Epoch 40, batch 1400, loss[loss=0.2022, ctc_loss=0.08693, cr_loss=0.2963, attn_decoder_loss=0.2085, over 29583.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1127, cr_loss=0.3533, attn_decoder_loss=0.2389, over 5808354.62 frames. ], batch size: 69, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:28:59,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=711500.0, ans=0.025 +2024-09-19 17:29:12,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.59 vs. limit=12.0 +2024-09-19 17:29:13,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=711540.0, ans=0.0 +2024-09-19 17:29:14,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=711540.0, ans=0.125 +2024-09-19 17:29:17,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=711540.0, ans=0.125 +2024-09-19 17:29:17,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=711540.0, ans=0.125 +2024-09-19 17:29:50,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=711620.0, ans=0.025 +2024-09-19 17:29:53,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=711620.0, ans=0.1 +2024-09-19 17:29:56,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=711660.0, ans=0.1 +2024-09-19 17:30:13,141 INFO [train.py:1198] (0/2) Epoch 40, batch 1450, loss[loss=0.2509, ctc_loss=0.1299, cr_loss=0.3889, attn_decoder_loss=0.2557, over 29423.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1127, cr_loss=0.353, attn_decoder_loss=0.2392, over 5804947.99 frames. ], batch size: 94, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:30:16,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=711700.0, ans=0.125 +2024-09-19 17:30:38,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.710e+01 9.115e+01 9.620e+01 3.738e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-19 17:30:52,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=711780.0, ans=0.125 +2024-09-19 17:31:10,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.82 vs. limit=15.0 +2024-09-19 17:31:26,309 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-19 17:31:27,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=711900.0, ans=0.0 +2024-09-19 17:31:28,345 INFO [train.py:1198] (0/2) Epoch 40, batch 1500, loss[loss=0.2406, ctc_loss=0.1204, cr_loss=0.3716, attn_decoder_loss=0.2457, over 29637.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1131, cr_loss=0.3541, attn_decoder_loss=0.2397, over 5804773.98 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:31:39,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=711900.0, ans=0.0 +2024-09-19 17:31:48,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.30 vs. limit=12.0 +2024-09-19 17:32:00,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=711980.0, ans=0.125 +2024-09-19 17:32:11,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=711980.0, ans=0.1 +2024-09-19 17:32:22,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=712020.0, ans=0.125 +2024-09-19 17:32:40,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=712060.0, ans=0.1 +2024-09-19 17:32:44,594 INFO [train.py:1198] (0/2) Epoch 40, batch 1550, loss[loss=0.252, ctc_loss=0.1374, cr_loss=0.416, attn_decoder_loss=0.2555, over 29504.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1135, cr_loss=0.3544, attn_decoder_loss=0.2397, over 5780995.63 frames. ], batch size: 90, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:33:11,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=712140.0, ans=0.125 +2024-09-19 17:33:14,041 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.677e+01 9.047e+01 9.758e+01 3.580e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 17:33:14,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=712140.0, ans=0.2 +2024-09-19 17:33:21,949 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:33:26,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=712180.0, ans=0.025 +2024-09-19 17:33:38,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=712220.0, ans=0.1 +2024-09-19 17:33:41,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=712220.0, ans=0.125 +2024-09-19 17:34:01,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=712260.0, ans=0.2 +2024-09-19 17:34:04,515 INFO [train.py:1198] (0/2) Epoch 40, batch 1600, loss[loss=0.2418, ctc_loss=0.1244, cr_loss=0.3744, attn_decoder_loss=0.2466, over 29657.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1134, cr_loss=0.3538, attn_decoder_loss=0.2394, over 5764385.40 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:34:13,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=712300.0, ans=0.0 +2024-09-19 17:34:26,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=712340.0, ans=0.125 +2024-09-19 17:34:30,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=712340.0, ans=0.0 +2024-09-19 17:34:33,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=712380.0, ans=0.1 +2024-09-19 17:34:47,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=712380.0, ans=0.0 +2024-09-19 17:34:56,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=712420.0, ans=0.1 +2024-09-19 17:34:56,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=712420.0, ans=0.025 +2024-09-19 17:35:09,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=712460.0, ans=0.0 +2024-09-19 17:35:11,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=712460.0, ans=0.125 +2024-09-19 17:35:20,310 INFO [train.py:1198] (0/2) Epoch 40, batch 1650, loss[loss=0.2461, ctc_loss=0.116, cr_loss=0.3698, attn_decoder_loss=0.2523, over 29704.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1129, cr_loss=0.3529, attn_decoder_loss=0.2389, over 5758515.04 frames. ], batch size: 89, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:35:41,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=712540.0, ans=0.025 +2024-09-19 17:35:46,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=712540.0, ans=0.1 +2024-09-19 17:35:48,736 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.111e+01 8.375e+01 9.140e+01 9.741e+01 3.230e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 17:35:50,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=712580.0, ans=0.2 +2024-09-19 17:36:09,702 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.20 vs. limit=15.0 +2024-09-19 17:36:09,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.60 vs. limit=22.5 +2024-09-19 17:36:16,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=712620.0, ans=0.1 +2024-09-19 17:36:35,504 INFO [train.py:1198] (0/2) Epoch 40, batch 1700, loss[loss=0.2125, ctc_loss=0.1012, cr_loss=0.3208, attn_decoder_loss=0.2177, over 29604.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1129, cr_loss=0.3533, attn_decoder_loss=0.2388, over 5780718.43 frames. ], batch size: 69, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:36:40,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=712700.0, ans=0.125 +2024-09-19 17:36:46,111 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:36:52,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=712740.0, ans=0.0 +2024-09-19 17:37:08,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.04 vs. limit=8.0 +2024-09-19 17:37:55,713 INFO [train.py:1198] (0/2) Epoch 40, batch 1750, loss[loss=0.2068, ctc_loss=0.1001, cr_loss=0.3344, attn_decoder_loss=0.2112, over 29382.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1125, cr_loss=0.3524, attn_decoder_loss=0.2384, over 5788133.09 frames. ], batch size: 67, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:38:24,561 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.436e+01 8.990e+01 9.570e+01 1.574e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 17:38:29,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=712980.0, ans=0.125 +2024-09-19 17:38:54,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=713060.0, ans=0.025 +2024-09-19 17:39:03,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=713060.0, ans=0.125 +2024-09-19 17:39:04,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.87 vs. limit=15.0 +2024-09-19 17:39:10,864 INFO [train.py:1198] (0/2) Epoch 40, batch 1800, loss[loss=0.2438, ctc_loss=0.1162, cr_loss=0.367, attn_decoder_loss=0.2498, over 29708.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1126, cr_loss=0.3523, attn_decoder_loss=0.2385, over 5790713.70 frames. ], batch size: 83, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:39:17,322 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:39:24,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=713140.0, ans=0.125 +2024-09-19 17:39:42,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=713180.0, ans=0.0 +2024-09-19 17:39:51,871 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:40:02,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=713220.0, ans=0.0 +2024-09-19 17:40:15,432 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=9.06 vs. limit=15.0 +2024-09-19 17:40:26,410 INFO [train.py:1198] (0/2) Epoch 40, batch 1850, loss[loss=0.243, ctc_loss=0.1147, cr_loss=0.3575, attn_decoder_loss=0.2493, over 29622.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1128, cr_loss=0.3529, attn_decoder_loss=0.2385, over 5799032.95 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:40:57,143 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.615e+01 9.088e+01 9.758e+01 2.205e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 17:40:59,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=713380.0, ans=0.2 +2024-09-19 17:41:15,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=713420.0, ans=0.125 +2024-09-19 17:41:28,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=713460.0, ans=0.2 +2024-09-19 17:41:30,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=713460.0, ans=0.125 +2024-09-19 17:41:33,993 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.97 vs. limit=10.0 +2024-09-19 17:41:43,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.27 vs. limit=22.5 +2024-09-19 17:41:43,527 INFO [train.py:1198] (0/2) Epoch 40, batch 1900, loss[loss=0.2385, ctc_loss=0.1142, cr_loss=0.3594, attn_decoder_loss=0.2444, over 29708.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1132, cr_loss=0.3536, attn_decoder_loss=0.239, over 5805992.68 frames. ], batch size: 89, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:41:44,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.36 vs. limit=15.0 +2024-09-19 17:41:49,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=713500.0, ans=0.125 +2024-09-19 17:42:02,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=713540.0, ans=10.0 +2024-09-19 17:42:07,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=713540.0, ans=0.1 +2024-09-19 17:42:13,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.86 vs. limit=15.0 +2024-09-19 17:42:18,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.51 vs. limit=15.0 +2024-09-19 17:43:01,613 INFO [train.py:1198] (0/2) Epoch 40, batch 1950, loss[loss=0.2279, ctc_loss=0.1079, cr_loss=0.3546, attn_decoder_loss=0.2333, over 29453.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1138, cr_loss=0.3549, attn_decoder_loss=0.2403, over 5820376.68 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:43:07,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.43 vs. limit=15.0 +2024-09-19 17:43:09,685 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=713700.0, ans=0.0 +2024-09-19 17:43:27,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=713740.0, ans=0.1 +2024-09-19 17:43:30,165 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.694e+01 9.094e+01 9.637e+01 1.422e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-19 17:43:36,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=713780.0, ans=0.025 +2024-09-19 17:43:41,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=713780.0, ans=0.125 +2024-09-19 17:43:53,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.91 vs. limit=6.0 +2024-09-19 17:44:16,998 INFO [train.py:1198] (0/2) Epoch 40, batch 2000, loss[loss=0.2108, ctc_loss=0.09089, cr_loss=0.311, attn_decoder_loss=0.2172, over 29348.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1139, cr_loss=0.3549, attn_decoder_loss=0.2407, over 5799444.75 frames. ], batch size: 67, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:44:23,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=713900.0, ans=0.2 +2024-09-19 17:44:44,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=713940.0, ans=0.1 +2024-09-19 17:45:02,776 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.13 vs. limit=15.0 +2024-09-19 17:45:08,527 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.38 vs. limit=10.0 +2024-09-19 17:45:13,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.08 vs. limit=15.0 +2024-09-19 17:45:30,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=714060.0, ans=0.125 +2024-09-19 17:45:33,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=714100.0, ans=0.025 +2024-09-19 17:45:34,773 INFO [train.py:1198] (0/2) Epoch 40, batch 2050, loss[loss=0.2092, ctc_loss=0.09001, cr_loss=0.3153, attn_decoder_loss=0.2154, over 29451.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1131, cr_loss=0.3533, attn_decoder_loss=0.2398, over 5791572.21 frames. ], batch size: 70, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:45:52,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=714140.0, ans=0.04949747468305833 +2024-09-19 17:46:00,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=714140.0, ans=0.125 +2024-09-19 17:46:05,842 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.392e+01 8.898e+01 9.558e+01 3.245e+02, threshold=1.780e+02, percent-clipped=2.0 +2024-09-19 17:46:15,179 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:46:16,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=714180.0, ans=0.2 +2024-09-19 17:46:34,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=714220.0, ans=0.0 +2024-09-19 17:46:52,706 INFO [train.py:1198] (0/2) Epoch 40, batch 2100, loss[loss=0.2332, ctc_loss=0.1099, cr_loss=0.3612, attn_decoder_loss=0.2389, over 29757.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1129, cr_loss=0.3533, attn_decoder_loss=0.2393, over 5803094.03 frames. ], batch size: 81, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:46:52,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=714300.0, ans=0.025 +2024-09-19 17:47:21,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=714380.0, ans=0.0 +2024-09-19 17:47:26,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.74 vs. limit=15.0 +2024-09-19 17:48:04,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=714460.0, ans=0.0 +2024-09-19 17:48:07,616 INFO [train.py:1198] (0/2) Epoch 40, batch 2150, loss[loss=0.2344, ctc_loss=0.1046, cr_loss=0.3303, attn_decoder_loss=0.2415, over 29457.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1123, cr_loss=0.3524, attn_decoder_loss=0.2387, over 5817303.50 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:48:09,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=714500.0, ans=0.05 +2024-09-19 17:48:37,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=714540.0, ans=0.025 +2024-09-19 17:48:38,291 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.587e+01 9.010e+01 9.804e+01 2.260e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 17:49:01,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=714620.0, ans=0.2 +2024-09-19 17:49:02,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=714620.0, ans=0.0 +2024-09-19 17:49:05,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=714620.0, ans=0.125 +2024-09-19 17:49:07,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=714620.0, ans=0.125 +2024-09-19 17:49:22,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=714660.0, ans=0.125 +2024-09-19 17:49:24,944 INFO [train.py:1198] (0/2) Epoch 40, batch 2200, loss[loss=0.2488, ctc_loss=0.123, cr_loss=0.377, attn_decoder_loss=0.2544, over 29613.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1127, cr_loss=0.3525, attn_decoder_loss=0.2388, over 5812614.49 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:49:54,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=714740.0, ans=0.04949747468305833 +2024-09-19 17:49:56,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.13 vs. limit=15.0 +2024-09-19 17:49:57,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=714780.0, ans=0.125 +2024-09-19 17:50:15,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=714820.0, ans=0.0 +2024-09-19 17:50:21,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=714820.0, ans=0.125 +2024-09-19 17:50:42,696 INFO [train.py:1198] (0/2) Epoch 40, batch 2250, loss[loss=0.2365, ctc_loss=0.1136, cr_loss=0.3618, attn_decoder_loss=0.2422, over 29715.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1126, cr_loss=0.3528, attn_decoder_loss=0.2389, over 5811883.69 frames. ], batch size: 82, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:50:50,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=714900.0, ans=0.0 +2024-09-19 17:51:08,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=714940.0, ans=0.125 +2024-09-19 17:51:12,518 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.488e+01 9.052e+01 9.511e+01 5.082e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 17:51:12,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=714980.0, ans=0.0 +2024-09-19 17:51:16,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=714980.0, ans=0.125 +2024-09-19 17:51:40,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=715020.0, ans=0.09899494936611666 +2024-09-19 17:51:57,717 INFO [train.py:1198] (0/2) Epoch 40, batch 2300, loss[loss=0.2161, ctc_loss=0.09541, cr_loss=0.3204, attn_decoder_loss=0.2224, over 29305.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1114, cr_loss=0.35, attn_decoder_loss=0.2377, over 5798079.64 frames. ], batch size: 71, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:52:17,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=715140.0, ans=0.125 +2024-09-19 17:52:26,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=715140.0, ans=0.1 +2024-09-19 17:52:45,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=715220.0, ans=0.2 +2024-09-19 17:52:48,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=715220.0, ans=0.0 +2024-09-19 17:52:56,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=715220.0, ans=0.125 +2024-09-19 17:53:00,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=715260.0, ans=0.07 +2024-09-19 17:53:02,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=715260.0, ans=0.0 +2024-09-19 17:53:06,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=715260.0, ans=0.0 +2024-09-19 17:53:13,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=715300.0, ans=0.125 +2024-09-19 17:53:15,284 INFO [train.py:1198] (0/2) Epoch 40, batch 2350, loss[loss=0.2268, ctc_loss=0.1028, cr_loss=0.3235, attn_decoder_loss=0.2333, over 29696.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1119, cr_loss=0.3515, attn_decoder_loss=0.238, over 5803152.83 frames. ], batch size: 83, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:53:17,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=715300.0, ans=0.0 +2024-09-19 17:53:33,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=715340.0, ans=0.025 +2024-09-19 17:53:38,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=715340.0, ans=0.0 +2024-09-19 17:53:47,274 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.564e+01 8.558e+01 9.025e+01 9.597e+01 1.404e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 17:54:02,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=715420.0, ans=0.0 +2024-09-19 17:54:09,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-19 17:54:32,768 INFO [train.py:1198] (0/2) Epoch 40, batch 2400, loss[loss=0.2146, ctc_loss=0.09992, cr_loss=0.3384, attn_decoder_loss=0.2198, over 29526.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1124, cr_loss=0.3529, attn_decoder_loss=0.2388, over 5806300.73 frames. ], batch size: 76, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:54:46,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=715540.0, ans=0.025 +2024-09-19 17:54:51,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=715540.0, ans=0.1 +2024-09-19 17:55:03,466 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.70 vs. limit=22.5 +2024-09-19 17:55:18,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=715620.0, ans=0.5 +2024-09-19 17:55:18,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=715620.0, ans=0.125 +2024-09-19 17:55:18,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=715620.0, ans=0.125 +2024-09-19 17:55:48,040 INFO [train.py:1198] (0/2) Epoch 40, batch 2450, loss[loss=0.2307, ctc_loss=0.1058, cr_loss=0.3295, attn_decoder_loss=0.2373, over 29716.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1132, cr_loss=0.3546, attn_decoder_loss=0.2395, over 5783726.91 frames. ], batch size: 82, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:55:55,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=715700.0, ans=0.125 +2024-09-19 17:56:03,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=715740.0, ans=0.0 +2024-09-19 17:56:20,252 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.714e+01 9.274e+01 9.862e+01 1.579e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-19 17:56:42,983 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:57:05,504 INFO [train.py:1198] (0/2) Epoch 40, batch 2500, loss[loss=0.239, ctc_loss=0.108, cr_loss=0.3416, attn_decoder_loss=0.246, over 29608.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1131, cr_loss=0.3541, attn_decoder_loss=0.2395, over 5794633.90 frames. ], batch size: 86, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:57:13,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=715900.0, ans=0.125 +2024-09-19 17:57:19,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=715940.0, ans=0.0 +2024-09-19 17:57:41,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=715980.0, ans=0.1 +2024-09-19 17:58:13,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=716060.0, ans=0.2 +2024-09-19 17:58:24,138 INFO [train.py:1198] (0/2) Epoch 40, batch 2550, loss[loss=0.202, ctc_loss=0.0945, cr_loss=0.3191, attn_decoder_loss=0.2069, over 29351.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1126, cr_loss=0.3534, attn_decoder_loss=0.2392, over 5798417.42 frames. ], batch size: 67, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:58:36,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=716100.0, ans=0.5 +2024-09-19 17:58:40,808 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:58:43,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=716140.0, ans=0.0 +2024-09-19 17:58:48,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=716140.0, ans=0.1 +2024-09-19 17:58:53,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.529e+01 8.996e+01 9.557e+01 1.715e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 17:59:07,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=716220.0, ans=0.125 +2024-09-19 17:59:30,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=716260.0, ans=0.125 +2024-09-19 17:59:39,642 INFO [train.py:1198] (0/2) Epoch 40, batch 2600, loss[loss=0.2226, ctc_loss=0.1038, cr_loss=0.3361, attn_decoder_loss=0.2284, over 29465.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1127, cr_loss=0.3535, attn_decoder_loss=0.2395, over 5794737.76 frames. ], batch size: 78, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:59:58,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=716340.0, ans=0.1 +2024-09-19 18:00:54,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=716500.0, ans=0.125 +2024-09-19 18:00:56,059 INFO [train.py:1198] (0/2) Epoch 40, batch 2650, loss[loss=0.2458, ctc_loss=0.1193, cr_loss=0.3573, attn_decoder_loss=0.2519, over 29194.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1129, cr_loss=0.3542, attn_decoder_loss=0.2397, over 5801569.33 frames. ], batch size: 100, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:00:57,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=716500.0, ans=0.1 +2024-09-19 18:01:05,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=716500.0, ans=0.125 +2024-09-19 18:01:28,231 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.306e+01 8.493e+01 9.009e+01 9.595e+01 1.150e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 18:01:28,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=716580.0, ans=0.1 +2024-09-19 18:01:46,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=716620.0, ans=0.1 +2024-09-19 18:01:51,990 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.25 vs. limit=15.0 +2024-09-19 18:02:06,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=716660.0, ans=0.0 +2024-09-19 18:02:13,715 INFO [train.py:1198] (0/2) Epoch 40, batch 2700, loss[loss=0.2432, ctc_loss=0.118, cr_loss=0.3716, attn_decoder_loss=0.2489, over 29509.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1131, cr_loss=0.3541, attn_decoder_loss=0.2402, over 5797132.96 frames. ], batch size: 87, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:02:21,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=716700.0, ans=0.07 +2024-09-19 18:02:31,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=716740.0, ans=0.0 +2024-09-19 18:02:33,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=716740.0, ans=0.125 +2024-09-19 18:02:35,659 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.16 vs. limit=15.0 +2024-09-19 18:02:37,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=716740.0, ans=0.0 +2024-09-19 18:02:42,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=716780.0, ans=0.0 +2024-09-19 18:03:03,677 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-19 18:03:07,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=716820.0, ans=0.1 +2024-09-19 18:03:13,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=716860.0, ans=0.125 +2024-09-19 18:03:19,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=716860.0, ans=0.1 +2024-09-19 18:03:29,462 INFO [train.py:1198] (0/2) Epoch 40, batch 2750, loss[loss=0.2108, ctc_loss=0.09994, cr_loss=0.3351, attn_decoder_loss=0.2157, over 29515.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.112, cr_loss=0.3522, attn_decoder_loss=0.2388, over 5795807.17 frames. ], batch size: 75, lr: 2.75e-03, grad_scale: 4.0 +2024-09-19 18:03:41,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=12.0 +2024-09-19 18:03:41,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=716900.0, ans=0.125 +2024-09-19 18:04:04,577 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.418e+01 8.972e+01 9.467e+01 1.420e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 18:04:17,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=717020.0, ans=0.125 +2024-09-19 18:04:18,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=717020.0, ans=0.125 +2024-09-19 18:04:29,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=717020.0, ans=0.125 +2024-09-19 18:04:40,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.80 vs. limit=15.0 +2024-09-19 18:04:46,937 INFO [train.py:1198] (0/2) Epoch 40, batch 2800, loss[loss=0.2489, ctc_loss=0.1373, cr_loss=0.3897, attn_decoder_loss=0.2526, over 19804.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1125, cr_loss=0.353, attn_decoder_loss=0.2389, over 5776838.83 frames. ], batch size: 209, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:04:57,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=717100.0, ans=0.125 +2024-09-19 18:05:00,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=717140.0, ans=0.2 +2024-09-19 18:05:08,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=717140.0, ans=0.125 +2024-09-19 18:05:14,387 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:05:20,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=717180.0, ans=0.025 +2024-09-19 18:05:22,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=717180.0, ans=0.2 +2024-09-19 18:05:29,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=717180.0, ans=0.125 +2024-09-19 18:06:03,963 INFO [train.py:1198] (0/2) Epoch 40, batch 2850, loss[loss=0.2219, ctc_loss=0.1017, cr_loss=0.3343, attn_decoder_loss=0.2278, over 29496.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1129, cr_loss=0.3537, attn_decoder_loss=0.2395, over 5762779.30 frames. ], batch size: 77, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:06:05,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=717300.0, ans=0.0 +2024-09-19 18:06:17,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=717340.0, ans=0.125 +2024-09-19 18:06:29,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.69 vs. limit=15.0 +2024-09-19 18:06:37,388 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.590e+01 9.012e+01 9.613e+01 1.852e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 18:06:42,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=717380.0, ans=0.125 +2024-09-19 18:07:19,947 INFO [train.py:1198] (0/2) Epoch 40, batch 2900, loss[loss=0.2342, ctc_loss=0.1153, cr_loss=0.3533, attn_decoder_loss=0.2395, over 29456.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1136, cr_loss=0.3551, attn_decoder_loss=0.2407, over 5788023.15 frames. ], batch size: 79, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:07:24,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=717500.0, ans=0.04949747468305833 +2024-09-19 18:07:34,285 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:07:46,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=717540.0, ans=0.125 +2024-09-19 18:08:06,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=717620.0, ans=0.125 +2024-09-19 18:08:07,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=717620.0, ans=0.2 +2024-09-19 18:08:07,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=717620.0, ans=0.125 +2024-09-19 18:08:37,796 INFO [train.py:1198] (0/2) Epoch 40, batch 2950, loss[loss=0.2166, ctc_loss=0.09499, cr_loss=0.3023, attn_decoder_loss=0.2234, over 29490.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1129, cr_loss=0.3533, attn_decoder_loss=0.2395, over 5781375.83 frames. ], batch size: 75, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:08:41,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-19 18:09:02,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=717740.0, ans=0.125 +2024-09-19 18:09:11,438 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.157e+01 8.443e+01 9.079e+01 9.666e+01 1.457e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 18:09:56,123 INFO [train.py:1198] (0/2) Epoch 40, batch 3000, loss[loss=0.2263, ctc_loss=0.1033, cr_loss=0.334, attn_decoder_loss=0.2326, over 29754.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1127, cr_loss=0.3531, attn_decoder_loss=0.2393, over 5783270.48 frames. ], batch size: 81, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:09:56,124 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 18:10:14,432 INFO [train.py:1230] (0/2) Epoch 40, validation: loss=0.2122, ctc_loss=0.03685, cr_loss=5.615e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-19 18:10:14,432 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 18:10:28,593 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:11:03,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=718020.0, ans=0.0 +2024-09-19 18:11:12,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_na.min_abs, batch_count=718020.0, ans=0.02 +2024-09-19 18:11:17,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=718060.0, ans=0.0 +2024-09-19 18:11:32,566 INFO [train.py:1198] (0/2) Epoch 40, batch 3050, loss[loss=0.2252, ctc_loss=0.1114, cr_loss=0.3369, attn_decoder_loss=0.2303, over 29535.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1132, cr_loss=0.3536, attn_decoder_loss=0.2401, over 5776459.84 frames. ], batch size: 76, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:11:50,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=718140.0, ans=0.125 +2024-09-19 18:11:56,050 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.59 vs. limit=15.0 +2024-09-19 18:11:58,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=718140.0, ans=0.125 +2024-09-19 18:12:05,628 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.520e+01 9.084e+01 9.934e+01 1.461e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 18:12:12,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=718180.0, ans=0.025 +2024-09-19 18:12:47,572 INFO [train.py:1198] (0/2) Epoch 40, batch 3100, loss[loss=0.2497, ctc_loss=0.1272, cr_loss=0.381, attn_decoder_loss=0.2549, over 29271.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.113, cr_loss=0.3531, attn_decoder_loss=0.24, over 5777258.43 frames. ], batch size: 100, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:12:52,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=718300.0, ans=0.125 +2024-09-19 18:12:58,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=718300.0, ans=0.0 +2024-09-19 18:13:06,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=718340.0, ans=0.125 +2024-09-19 18:13:17,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=718340.0, ans=0.0 +2024-09-19 18:13:20,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=718380.0, ans=0.0 +2024-09-19 18:13:58,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=718460.0, ans=0.2 +2024-09-19 18:14:06,043 INFO [train.py:1198] (0/2) Epoch 40, batch 3150, loss[loss=0.242, ctc_loss=0.1166, cr_loss=0.366, attn_decoder_loss=0.2478, over 28870.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1131, cr_loss=0.3532, attn_decoder_loss=0.2399, over 5783856.70 frames. ], batch size: 104, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:14:06,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=718500.0, ans=0.125 +2024-09-19 18:14:07,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=718500.0, ans=0.0 +2024-09-19 18:14:13,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.31 vs. limit=6.0 +2024-09-19 18:14:16,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=718500.0, ans=0.025 +2024-09-19 18:14:18,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=718500.0, ans=0.1 +2024-09-19 18:14:30,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=718540.0, ans=0.0 +2024-09-19 18:14:36,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=718580.0, ans=0.2 +2024-09-19 18:14:39,242 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.506e+01 9.197e+01 9.540e+01 2.562e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 18:14:46,204 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.25 vs. limit=12.0 +2024-09-19 18:14:47,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=718580.0, ans=0.0 +2024-09-19 18:15:23,524 INFO [train.py:1198] (0/2) Epoch 40, batch 3200, loss[loss=0.2374, ctc_loss=0.1121, cr_loss=0.3476, attn_decoder_loss=0.2436, over 29389.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1131, cr_loss=0.3537, attn_decoder_loss=0.2395, over 5794368.89 frames. ], batch size: 79, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:16:04,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=718780.0, ans=0.125 +2024-09-19 18:16:07,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=718820.0, ans=0.07 +2024-09-19 18:16:12,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=718820.0, ans=0.125 +2024-09-19 18:16:14,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=718820.0, ans=0.0 +2024-09-19 18:16:31,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=718860.0, ans=0.95 +2024-09-19 18:16:38,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.24 vs. limit=22.5 +2024-09-19 18:16:38,752 INFO [train.py:1198] (0/2) Epoch 40, batch 3250, loss[loss=0.23, ctc_loss=0.1084, cr_loss=0.3495, attn_decoder_loss=0.2357, over 29717.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.113, cr_loss=0.3538, attn_decoder_loss=0.2397, over 5801180.60 frames. ], batch size: 84, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:16:42,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=718900.0, ans=0.0 +2024-09-19 18:16:45,175 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:16:54,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=718940.0, ans=0.125 +2024-09-19 18:16:58,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=718940.0, ans=0.125 +2024-09-19 18:17:09,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=718980.0, ans=0.125 +2024-09-19 18:17:13,653 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.518e+01 9.005e+01 9.479e+01 1.398e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 18:17:29,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=719020.0, ans=0.04949747468305833 +2024-09-19 18:17:55,808 INFO [train.py:1198] (0/2) Epoch 40, batch 3300, loss[loss=0.2477, ctc_loss=0.1206, cr_loss=0.3604, attn_decoder_loss=0.2538, over 28342.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1122, cr_loss=0.352, attn_decoder_loss=0.2385, over 5798021.47 frames. ], batch size: 111, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:17:57,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=719100.0, ans=0.1 +2024-09-19 18:18:12,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=719140.0, ans=0.2 +2024-09-19 18:18:37,289 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:19:01,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=719260.0, ans=0.125 +2024-09-19 18:19:13,643 INFO [train.py:1198] (0/2) Epoch 40, batch 3350, loss[loss=0.254, ctc_loss=0.1258, cr_loss=0.3693, attn_decoder_loss=0.26, over 28855.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1134, cr_loss=0.3546, attn_decoder_loss=0.2393, over 5774274.70 frames. ], batch size: 104, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:19:27,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=719340.0, ans=0.95 +2024-09-19 18:19:41,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=719340.0, ans=0.125 +2024-09-19 18:19:48,474 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.582e+01 9.036e+01 9.650e+01 6.119e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 18:19:50,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=719380.0, ans=0.0 +2024-09-19 18:19:51,000 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.82 vs. limit=15.0 +2024-09-19 18:20:26,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=719460.0, ans=0.1 +2024-09-19 18:20:29,156 INFO [train.py:1198] (0/2) Epoch 40, batch 3400, loss[loss=0.2099, ctc_loss=0.09878, cr_loss=0.3238, attn_decoder_loss=0.215, over 29392.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1138, cr_loss=0.3553, attn_decoder_loss=0.2394, over 5767492.07 frames. ], batch size: 67, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:20:46,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=719540.0, ans=0.0 +2024-09-19 18:21:39,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=719660.0, ans=0.0 +2024-09-19 18:21:41,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=719660.0, ans=0.2 +2024-09-19 18:21:41,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=719660.0, ans=0.125 +2024-09-19 18:21:46,898 INFO [train.py:1198] (0/2) Epoch 40, batch 3450, loss[loss=0.2453, ctc_loss=0.1225, cr_loss=0.3743, attn_decoder_loss=0.2507, over 28327.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1135, cr_loss=0.3542, attn_decoder_loss=0.2395, over 5775310.95 frames. ], batch size: 111, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:21:52,393 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.95 vs. limit=10.0 +2024-09-19 18:22:09,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=719740.0, ans=0.025 +2024-09-19 18:22:21,311 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.879e+01 8.580e+01 9.014e+01 9.618e+01 1.900e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 18:22:23,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=719780.0, ans=0.2 +2024-09-19 18:22:28,105 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.25 vs. limit=15.0 +2024-09-19 18:22:43,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=15.0 +2024-09-19 18:22:46,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=719820.0, ans=0.0 +2024-09-19 18:23:04,446 INFO [train.py:1198] (0/2) Epoch 40, batch 3500, loss[loss=0.2093, ctc_loss=0.0908, cr_loss=0.3126, attn_decoder_loss=0.2155, over 29303.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1131, cr_loss=0.3533, attn_decoder_loss=0.2389, over 5777795.50 frames. ], batch size: 71, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:23:21,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=719940.0, ans=0.125 +2024-09-19 18:23:32,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.35 vs. limit=6.0 +2024-09-19 18:23:33,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=719980.0, ans=0.125 +2024-09-19 18:23:40,939 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-180000.pt +2024-09-19 18:23:51,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=719980.0, ans=0.125 +2024-09-19 18:23:51,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=719980.0, ans=0.07 +2024-09-19 18:23:55,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=720020.0, ans=0.125 +2024-09-19 18:24:04,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=720020.0, ans=0.04949747468305833 +2024-09-19 18:24:09,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.34 vs. limit=22.5 +2024-09-19 18:24:26,508 INFO [train.py:1198] (0/2) Epoch 40, batch 3550, loss[loss=0.241, ctc_loss=0.1111, cr_loss=0.3365, attn_decoder_loss=0.248, over 29719.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1129, cr_loss=0.3529, attn_decoder_loss=0.239, over 5782977.12 frames. ], batch size: 89, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:24:29,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.02 vs. limit=15.0 +2024-09-19 18:24:30,344 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.91 vs. limit=6.0 +2024-09-19 18:24:34,778 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.49 vs. limit=15.0 +2024-09-19 18:24:35,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=720100.0, ans=0.125 +2024-09-19 18:24:36,012 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.40 vs. limit=15.0 +2024-09-19 18:24:40,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=720140.0, ans=0.0 +2024-09-19 18:24:40,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=720140.0, ans=0.025 +2024-09-19 18:24:45,252 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.56 vs. limit=15.0 +2024-09-19 18:24:45,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=720140.0, ans=0.1 +2024-09-19 18:24:56,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=720180.0, ans=0.1 +2024-09-19 18:24:58,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=720180.0, ans=0.125 +2024-09-19 18:24:59,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=720180.0, ans=0.125 +2024-09-19 18:25:00,509 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.309e+01 8.600e+01 9.034e+01 9.634e+01 4.593e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 18:25:03,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=720180.0, ans=0.2 +2024-09-19 18:25:09,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=720220.0, ans=0.0 +2024-09-19 18:25:13,517 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.07 vs. limit=6.0 +2024-09-19 18:25:14,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=720220.0, ans=0.125 +2024-09-19 18:25:22,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=720220.0, ans=0.0 +2024-09-19 18:25:40,388 INFO [train.py:1198] (0/2) Epoch 40, batch 3600, loss[loss=0.2315, ctc_loss=0.1086, cr_loss=0.3454, attn_decoder_loss=0.2374, over 29500.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.113, cr_loss=0.3536, attn_decoder_loss=0.2393, over 5792287.83 frames. ], batch size: 77, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:25:47,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=720300.0, ans=0.125 +2024-09-19 18:25:48,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=720300.0, ans=0.0 +2024-09-19 18:26:10,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=720380.0, ans=0.025 +2024-09-19 18:26:22,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=720380.0, ans=0.125 +2024-09-19 18:26:24,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=720380.0, ans=0.125 +2024-09-19 18:26:56,394 INFO [train.py:1198] (0/2) Epoch 40, batch 3650, loss[loss=0.2405, ctc_loss=0.1165, cr_loss=0.35, attn_decoder_loss=0.2465, over 29514.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1122, cr_loss=0.3522, attn_decoder_loss=0.2384, over 5793222.66 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:27:21,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=720540.0, ans=0.0 +2024-09-19 18:27:24,816 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:27:30,425 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.153e+01 8.608e+01 9.210e+01 9.736e+01 1.315e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-19 18:27:56,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.53 vs. limit=15.0 +2024-09-19 18:27:59,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=720660.0, ans=0.1 +2024-09-19 18:28:10,685 INFO [train.py:1198] (0/2) Epoch 40, batch 3700, loss[loss=0.2449, ctc_loss=0.1165, cr_loss=0.3686, attn_decoder_loss=0.251, over 29720.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1121, cr_loss=0.3519, attn_decoder_loss=0.2385, over 5804284.66 frames. ], batch size: 84, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:28:21,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=720700.0, ans=0.04949747468305833 +2024-09-19 18:28:30,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=720740.0, ans=0.125 +2024-09-19 18:28:44,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=720780.0, ans=0.0 +2024-09-19 18:29:16,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.60 vs. limit=15.0 +2024-09-19 18:29:18,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.86 vs. limit=12.0 +2024-09-19 18:29:26,574 INFO [train.py:1198] (0/2) Epoch 40, batch 3750, loss[loss=0.2147, ctc_loss=0.09589, cr_loss=0.3043, attn_decoder_loss=0.2212, over 29361.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1122, cr_loss=0.3521, attn_decoder_loss=0.2385, over 5808559.94 frames. ], batch size: 67, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:29:30,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.33 vs. limit=15.0 +2024-09-19 18:29:34,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=720900.0, ans=0.125 +2024-09-19 18:29:43,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=720940.0, ans=0.125 +2024-09-19 18:30:01,966 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 8.539e+01 9.071e+01 9.494e+01 1.651e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 18:30:13,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=721020.0, ans=0.0 +2024-09-19 18:30:18,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=721020.0, ans=0.125 +2024-09-19 18:30:30,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=721060.0, ans=0.2 +2024-09-19 18:30:40,981 INFO [train.py:1198] (0/2) Epoch 40, batch 3800, loss[loss=0.2386, ctc_loss=0.1057, cr_loss=0.3522, attn_decoder_loss=0.2456, over 29623.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1121, cr_loss=0.3516, attn_decoder_loss=0.2379, over 5799320.15 frames. ], batch size: 86, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:30:44,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=721100.0, ans=0.1 +2024-09-19 18:31:18,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=721180.0, ans=0.125 +2024-09-19 18:31:32,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=721220.0, ans=0.1 +2024-09-19 18:31:32,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=721220.0, ans=0.125 +2024-09-19 18:31:40,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=721260.0, ans=0.125 +2024-09-19 18:31:56,313 INFO [train.py:1198] (0/2) Epoch 40, batch 3850, loss[loss=0.2483, ctc_loss=0.1204, cr_loss=0.3606, attn_decoder_loss=0.2545, over 29316.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1116, cr_loss=0.3507, attn_decoder_loss=0.2378, over 5812392.26 frames. ], batch size: 100, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:32:04,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.20 vs. limit=15.0 +2024-09-19 18:32:17,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=721340.0, ans=0.2 +2024-09-19 18:32:17,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=721340.0, ans=0.1 +2024-09-19 18:32:31,699 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.429e+01 8.857e+01 9.400e+01 1.753e+02, threshold=1.771e+02, percent-clipped=0.0 +2024-09-19 18:32:36,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=721380.0, ans=0.1 +2024-09-19 18:32:51,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=721420.0, ans=0.05 +2024-09-19 18:32:55,908 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.53 vs. limit=22.5 +2024-09-19 18:32:59,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=721460.0, ans=0.125 +2024-09-19 18:33:02,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=721460.0, ans=0.0 +2024-09-19 18:33:05,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=721460.0, ans=0.125 +2024-09-19 18:33:10,021 INFO [train.py:1198] (0/2) Epoch 40, batch 3900, loss[loss=0.2458, ctc_loss=0.1206, cr_loss=0.3758, attn_decoder_loss=0.2514, over 29635.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1122, cr_loss=0.3524, attn_decoder_loss=0.2387, over 5816778.24 frames. ], batch size: 86, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:33:19,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.31 vs. limit=10.0 +2024-09-19 18:33:37,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=721580.0, ans=0.0 +2024-09-19 18:33:39,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=721580.0, ans=0.125 +2024-09-19 18:33:50,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=721580.0, ans=0.2 +2024-09-19 18:34:09,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=721660.0, ans=0.04949747468305833 +2024-09-19 18:34:10,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.60 vs. limit=6.0 +2024-09-19 18:34:23,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=721660.0, ans=15.0 +2024-09-19 18:34:25,656 INFO [train.py:1198] (0/2) Epoch 40, batch 3950, loss[loss=0.2459, ctc_loss=0.1222, cr_loss=0.3686, attn_decoder_loss=0.2514, over 29447.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1122, cr_loss=0.3527, attn_decoder_loss=0.2389, over 5835855.39 frames. ], batch size: 97, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:34:29,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=721700.0, ans=0.125 +2024-09-19 18:34:42,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=721740.0, ans=0.125 +2024-09-19 18:34:52,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=721740.0, ans=0.0 +2024-09-19 18:34:59,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=721780.0, ans=0.1 +2024-09-19 18:35:00,912 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.605e+01 9.141e+01 9.620e+01 2.736e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-19 18:35:12,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=721820.0, ans=0.0 +2024-09-19 18:35:21,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=721820.0, ans=0.0 +2024-09-19 18:35:26,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=721860.0, ans=0.125 +2024-09-19 18:35:31,376 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.24 vs. limit=12.0 +2024-09-19 18:35:39,103 INFO [train.py:1198] (0/2) Epoch 40, batch 4000, loss[loss=0.2241, ctc_loss=0.1082, cr_loss=0.3496, attn_decoder_loss=0.2292, over 29523.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1122, cr_loss=0.3528, attn_decoder_loss=0.239, over 5812913.53 frames. ], batch size: 74, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:36:02,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=721940.0, ans=0.125 +2024-09-19 18:36:06,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=721940.0, ans=0.2 +2024-09-19 18:36:17,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=721980.0, ans=0.2 +2024-09-19 18:36:23,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=722020.0, ans=0.0 +2024-09-19 18:36:23,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=722020.0, ans=0.1 +2024-09-19 18:36:41,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=722060.0, ans=0.1 +2024-09-19 18:36:41,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=722060.0, ans=0.2 +2024-09-19 18:36:54,439 INFO [train.py:1198] (0/2) Epoch 40, batch 4050, loss[loss=0.2487, ctc_loss=0.1355, cr_loss=0.3757, attn_decoder_loss=0.2529, over 19798.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1127, cr_loss=0.3535, attn_decoder_loss=0.2389, over 5796045.58 frames. ], batch size: 209, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:36:56,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=722100.0, ans=0.125 +2024-09-19 18:37:02,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=722100.0, ans=0.2 +2024-09-19 18:37:22,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=722180.0, ans=0.0 +2024-09-19 18:37:29,855 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.600e+01 8.666e+01 9.149e+01 9.737e+01 4.805e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 18:37:55,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.69 vs. limit=15.0 +2024-09-19 18:37:57,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.84 vs. limit=15.0 +2024-09-19 18:37:59,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=722260.0, ans=0.0 +2024-09-19 18:38:08,049 INFO [train.py:1198] (0/2) Epoch 40, batch 4100, loss[loss=0.2504, ctc_loss=0.1269, cr_loss=0.3861, attn_decoder_loss=0.2555, over 29517.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1131, cr_loss=0.3546, attn_decoder_loss=0.2392, over 5792544.01 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:38:26,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.81 vs. limit=15.0 +2024-09-19 18:38:46,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=722380.0, ans=0.025 +2024-09-19 18:38:49,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=722380.0, ans=0.2 +2024-09-19 18:38:50,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=722380.0, ans=0.125 +2024-09-19 18:38:53,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=722420.0, ans=0.125 +2024-09-19 18:39:13,283 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.67 vs. limit=15.0 +2024-09-19 18:39:14,753 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-19 18:39:23,036 INFO [train.py:1198] (0/2) Epoch 40, batch 4150, loss[loss=0.2211, ctc_loss=0.1035, cr_loss=0.3206, attn_decoder_loss=0.227, over 29488.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1128, cr_loss=0.3539, attn_decoder_loss=0.239, over 5797393.96 frames. ], batch size: 77, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:39:31,245 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.08 vs. limit=22.5 +2024-09-19 18:39:36,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=722540.0, ans=0.0 +2024-09-19 18:39:39,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=722540.0, ans=0.0 +2024-09-19 18:39:57,903 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.961e+01 8.418e+01 8.915e+01 9.615e+01 1.835e+02, threshold=1.783e+02, percent-clipped=1.0 +2024-09-19 18:40:21,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=722660.0, ans=0.125 +2024-09-19 18:40:36,013 INFO [train.py:1198] (0/2) Epoch 40, batch 4200, loss[loss=0.2501, ctc_loss=0.125, cr_loss=0.3816, attn_decoder_loss=0.2555, over 29478.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1127, cr_loss=0.3539, attn_decoder_loss=0.2391, over 5799917.79 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:40:42,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.36 vs. limit=10.0 +2024-09-19 18:41:22,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=722820.0, ans=0.04949747468305833 +2024-09-19 18:41:34,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=722860.0, ans=0.02 +2024-09-19 18:41:37,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=722860.0, ans=0.125 +2024-09-19 18:41:37,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.98 vs. limit=15.0 +2024-09-19 18:41:50,104 INFO [train.py:1198] (0/2) Epoch 40, batch 4250, loss[loss=0.2214, ctc_loss=0.1014, cr_loss=0.3211, attn_decoder_loss=0.2276, over 29516.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1123, cr_loss=0.3528, attn_decoder_loss=0.239, over 5805688.66 frames. ], batch size: 74, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:42:27,450 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.534e+01 9.089e+01 9.722e+01 3.339e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-19 18:42:28,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.82 vs. limit=15.0 +2024-09-19 18:42:35,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=723020.0, ans=0.125 +2024-09-19 18:42:52,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=723060.0, ans=0.125 +2024-09-19 18:43:04,240 INFO [train.py:1198] (0/2) Epoch 40, batch 4300, loss[loss=0.24, ctc_loss=0.1058, cr_loss=0.3354, attn_decoder_loss=0.2475, over 29528.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1116, cr_loss=0.3511, attn_decoder_loss=0.2389, over 5795303.32 frames. ], batch size: 87, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:43:19,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=723140.0, ans=0.1 +2024-09-19 18:43:22,436 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:43:55,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.09 vs. limit=12.0 +2024-09-19 18:44:11,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=723260.0, ans=0.025 +2024-09-19 18:44:19,231 INFO [train.py:1198] (0/2) Epoch 40, batch 4350, loss[loss=0.2438, ctc_loss=0.1119, cr_loss=0.3423, attn_decoder_loss=0.2508, over 29483.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1143, cr_loss=0.3567, attn_decoder_loss=0.2421, over 5798401.02 frames. ], batch size: 97, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:44:55,945 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.669e+01 8.976e+01 9.434e+01 1.012e+02 1.882e+02, threshold=1.887e+02, percent-clipped=1.0 +2024-09-19 18:45:24,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=723460.0, ans=0.0 +2024-09-19 18:45:32,919 INFO [train.py:1198] (0/2) Epoch 40, batch 4400, loss[loss=0.2443, ctc_loss=0.1219, cr_loss=0.3919, attn_decoder_loss=0.2492, over 27121.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1157, cr_loss=0.3597, attn_decoder_loss=0.2444, over 5768628.91 frames. ], batch size: 124, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:45:41,097 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.49 vs. limit=10.0 +2024-09-19 18:46:01,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.75 vs. limit=6.0 +2024-09-19 18:46:30,709 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.46 vs. limit=15.0 +2024-09-19 18:46:34,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=723660.0, ans=0.2 +2024-09-19 18:46:46,892 INFO [train.py:1198] (0/2) Epoch 40, batch 4450, loss[loss=0.2606, ctc_loss=0.1472, cr_loss=0.4113, attn_decoder_loss=0.2641, over 20060.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1194, cr_loss=0.3659, attn_decoder_loss=0.2466, over 5583481.43 frames. ], batch size: 210, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:46:50,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=723700.0, ans=0.1 +2024-09-19 18:47:07,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=723740.0, ans=0.125 +2024-09-19 18:47:13,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=723740.0, ans=0.0 +2024-09-19 18:47:26,345 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.232e+01 9.186e+01 1.020e+02 1.192e+02 3.727e+02, threshold=2.040e+02, percent-clipped=2.0 +2024-09-19 18:47:29,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=723780.0, ans=0.125 +2024-09-19 18:47:52,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=723860.0, ans=0.95 +2024-09-19 18:47:56,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=723860.0, ans=0.125 +2024-09-19 18:47:59,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=723860.0, ans=0.1 +2024-09-19 18:48:02,467 INFO [train.py:1198] (0/2) Epoch 40, batch 4500, loss[loss=0.2531, ctc_loss=0.1323, cr_loss=0.3837, attn_decoder_loss=0.258, over 20665.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1226, cr_loss=0.3685, attn_decoder_loss=0.2484, over 5242714.70 frames. ], batch size: 210, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:48:02,898 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:48:23,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=723940.0, ans=0.125 +2024-09-19 18:48:24,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=723940.0, ans=0.0 +2024-09-19 18:48:27,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=723940.0, ans=0.125 +2024-09-19 18:48:39,560 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-40.pt +2024-09-19 18:49:17,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.04 vs. limit=22.5 +2024-09-19 18:49:17,628 INFO [train.py:1198] (0/2) Epoch 41, batch 0, loss[loss=0.2146, ctc_loss=0.09514, cr_loss=0.3081, attn_decoder_loss=0.2211, over 29626.00 frames. ], tot_loss[loss=0.2146, ctc_loss=0.09514, cr_loss=0.3081, attn_decoder_loss=0.2211, over 29626.00 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:49:17,629 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 18:49:35,953 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.9509, 3.5523, 3.7480, 3.8470], device='cuda:0') +2024-09-19 18:49:36,956 INFO [train.py:1230] (0/2) Epoch 41, validation: loss=0.2123, ctc_loss=0.03622, cr_loss=6.741e-15, attn_decoder_loss=0.2319, over 944034.00 frames. +2024-09-19 18:49:36,956 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 18:50:10,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.02 vs. limit=22.5 +2024-09-19 18:50:34,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=724120.0, ans=0.025 +2024-09-19 18:50:36,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=724160.0, ans=0.125 +2024-09-19 18:50:48,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.26 vs. limit=15.0 +2024-09-19 18:50:52,550 INFO [train.py:1198] (0/2) Epoch 41, batch 50, loss[loss=0.2084, ctc_loss=0.09023, cr_loss=0.3064, attn_decoder_loss=0.2147, over 29438.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1149, cr_loss=0.3581, attn_decoder_loss=0.2404, over 1268527.95 frames. ], batch size: 70, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:50:54,028 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 9.153e+01 1.062e+02 1.232e+02 3.092e+02, threshold=2.125e+02, percent-clipped=2.0 +2024-09-19 18:51:00,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=724200.0, ans=0.125 +2024-09-19 18:51:06,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=724240.0, ans=0.5 +2024-09-19 18:51:07,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=724240.0, ans=0.0 +2024-09-19 18:51:18,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.73 vs. limit=15.0 +2024-09-19 18:51:18,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.73 vs. limit=15.0 +2024-09-19 18:51:27,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=724280.0, ans=0.025 +2024-09-19 18:51:29,581 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-19 18:51:30,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=724280.0, ans=0.0 +2024-09-19 18:51:33,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=724280.0, ans=0.2 +2024-09-19 18:51:34,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=724280.0, ans=0.2 +2024-09-19 18:51:34,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=724280.0, ans=0.0 +2024-09-19 18:51:57,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=724360.0, ans=0.1 +2024-09-19 18:52:00,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=724360.0, ans=0.0 +2024-09-19 18:52:07,799 INFO [train.py:1198] (0/2) Epoch 41, batch 100, loss[loss=0.2252, ctc_loss=0.1153, cr_loss=0.3636, attn_decoder_loss=0.2293, over 29534.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1161, cr_loss=0.3623, attn_decoder_loss=0.2421, over 2252710.34 frames. ], batch size: 76, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:52:17,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=724400.0, ans=0.1 +2024-09-19 18:52:57,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=724520.0, ans=0.0 +2024-09-19 18:53:07,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=724520.0, ans=0.0 +2024-09-19 18:53:10,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=724560.0, ans=0.0 +2024-09-19 18:53:23,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=724560.0, ans=0.125 +2024-09-19 18:53:27,432 INFO [train.py:1198] (0/2) Epoch 41, batch 150, loss[loss=0.2076, ctc_loss=0.09995, cr_loss=0.3373, attn_decoder_loss=0.212, over 29397.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1136, cr_loss=0.3557, attn_decoder_loss=0.2395, over 3047738.22 frames. ], batch size: 70, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:53:30,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.681e+01 9.088e+01 9.657e+01 1.697e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 18:53:42,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=724640.0, ans=0.125 +2024-09-19 18:54:00,781 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:54:07,214 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.11 vs. limit=15.0 +2024-09-19 18:54:17,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=724720.0, ans=0.1 +2024-09-19 18:54:23,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.45 vs. limit=15.0 +2024-09-19 18:54:37,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-19 18:54:42,659 INFO [train.py:1198] (0/2) Epoch 41, batch 200, loss[loss=0.2464, ctc_loss=0.1239, cr_loss=0.3676, attn_decoder_loss=0.2519, over 27668.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.113, cr_loss=0.3543, attn_decoder_loss=0.239, over 3658396.96 frames. ], batch size: 125, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:54:44,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=724800.0, ans=0.125 +2024-09-19 18:55:16,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=724880.0, ans=0.125 +2024-09-19 18:55:26,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=724920.0, ans=0.5 +2024-09-19 18:55:57,744 INFO [train.py:1198] (0/2) Epoch 41, batch 250, loss[loss=0.2548, ctc_loss=0.1286, cr_loss=0.4022, attn_decoder_loss=0.2598, over 29313.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1125, cr_loss=0.3539, attn_decoder_loss=0.2388, over 4141526.74 frames. ], batch size: 100, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:56:00,844 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.416e+01 8.964e+01 9.351e+01 1.561e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-19 18:56:31,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=725080.0, ans=0.125 +2024-09-19 18:57:17,570 INFO [train.py:1198] (0/2) Epoch 41, batch 300, loss[loss=0.2446, ctc_loss=0.1264, cr_loss=0.3907, attn_decoder_loss=0.2491, over 29548.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1123, cr_loss=0.353, attn_decoder_loss=0.2383, over 4509944.96 frames. ], batch size: 92, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:57:27,083 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=725200.0, ans=0.125 +2024-09-19 18:57:56,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=725280.0, ans=0.0 +2024-09-19 18:58:10,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=725320.0, ans=0.04949747468305833 +2024-09-19 18:58:19,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=725360.0, ans=0.0 +2024-09-19 18:58:21,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.62 vs. limit=15.0 +2024-09-19 18:58:23,394 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.00 vs. limit=22.5 +2024-09-19 18:58:33,157 INFO [train.py:1198] (0/2) Epoch 41, batch 350, loss[loss=0.2076, ctc_loss=0.08828, cr_loss=0.2996, attn_decoder_loss=0.2142, over 29324.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1129, cr_loss=0.3547, attn_decoder_loss=0.2388, over 4796383.44 frames. ], batch size: 71, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:58:34,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=725400.0, ans=0.125 +2024-09-19 18:58:36,040 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.397e+01 8.852e+01 9.608e+01 1.644e+02, threshold=1.770e+02, percent-clipped=0.0 +2024-09-19 18:58:39,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=725400.0, ans=0.2 +2024-09-19 18:58:42,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=725400.0, ans=0.0 +2024-09-19 18:59:24,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=725520.0, ans=0.0 +2024-09-19 18:59:25,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=725520.0, ans=0.0 +2024-09-19 18:59:46,262 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.97 vs. limit=10.0 +2024-09-19 18:59:48,519 INFO [train.py:1198] (0/2) Epoch 41, batch 400, loss[loss=0.2292, ctc_loss=0.1061, cr_loss=0.3445, attn_decoder_loss=0.2352, over 29698.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1122, cr_loss=0.3537, attn_decoder_loss=0.2384, over 5026577.42 frames. ], batch size: 82, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:59:48,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=725600.0, ans=0.125 +2024-09-19 18:59:59,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=725600.0, ans=0.125 +2024-09-19 19:00:05,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=725640.0, ans=0.2 +2024-09-19 19:00:05,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=725640.0, ans=0.2 +2024-09-19 19:00:15,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=725640.0, ans=0.125 +2024-09-19 19:00:19,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=725680.0, ans=0.0 +2024-09-19 19:01:08,850 INFO [train.py:1198] (0/2) Epoch 41, batch 450, loss[loss=0.2264, ctc_loss=0.1013, cr_loss=0.334, attn_decoder_loss=0.2329, over 29691.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1121, cr_loss=0.353, attn_decoder_loss=0.2385, over 5189824.24 frames. ], batch size: 83, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:01:11,781 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.467e+01 8.907e+01 9.504e+01 2.028e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-19 19:01:20,727 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.07 vs. limit=12.0 +2024-09-19 19:01:29,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.08 vs. limit=15.0 +2024-09-19 19:01:36,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=725840.0, ans=0.125 +2024-09-19 19:02:18,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=725960.0, ans=0.0 +2024-09-19 19:02:21,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=725960.0, ans=0.0 +2024-09-19 19:02:24,452 INFO [train.py:1198] (0/2) Epoch 41, batch 500, loss[loss=0.2444, ctc_loss=0.1148, cr_loss=0.362, attn_decoder_loss=0.2507, over 29428.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1118, cr_loss=0.3522, attn_decoder_loss=0.2381, over 5332075.58 frames. ], batch size: 94, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:02:29,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=726000.0, ans=0.125 +2024-09-19 19:02:51,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=726040.0, ans=0.09899494936611666 +2024-09-19 19:03:02,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=726080.0, ans=0.0 +2024-09-19 19:03:05,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=726080.0, ans=0.125 +2024-09-19 19:03:10,738 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.79 vs. limit=15.0 +2024-09-19 19:03:25,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=726160.0, ans=0.125 +2024-09-19 19:03:37,054 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=726160.0, ans=0.125 +2024-09-19 19:03:37,381 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.26 vs. limit=15.0 +2024-09-19 19:03:39,812 INFO [train.py:1198] (0/2) Epoch 41, batch 550, loss[loss=0.245, ctc_loss=0.1185, cr_loss=0.3792, attn_decoder_loss=0.2506, over 28702.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1118, cr_loss=0.352, attn_decoder_loss=0.2382, over 5424417.03 frames. ], batch size: 104, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:03:42,905 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.739e+01 9.193e+01 9.957e+01 2.783e+02, threshold=1.839e+02, percent-clipped=3.0 +2024-09-19 19:03:45,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.51 vs. limit=15.0 +2024-09-19 19:04:16,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=726280.0, ans=0.125 +2024-09-19 19:04:17,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=726280.0, ans=0.0 +2024-09-19 19:04:18,746 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.29 vs. limit=15.0 +2024-09-19 19:04:28,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=726320.0, ans=0.2 +2024-09-19 19:04:30,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=726320.0, ans=0.1 +2024-09-19 19:04:40,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=726360.0, ans=0.0 +2024-09-19 19:04:58,290 INFO [train.py:1198] (0/2) Epoch 41, batch 600, loss[loss=0.2477, ctc_loss=0.1249, cr_loss=0.388, attn_decoder_loss=0.2527, over 29284.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.112, cr_loss=0.3524, attn_decoder_loss=0.2384, over 5510380.86 frames. ], batch size: 100, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:04:59,361 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.68 vs. limit=22.5 +2024-09-19 19:05:03,069 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=16.16 vs. limit=15.0 +2024-09-19 19:05:20,160 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:05:33,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=726480.0, ans=0.1 +2024-09-19 19:05:39,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=726480.0, ans=0.125 +2024-09-19 19:05:40,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=726480.0, ans=0.07 +2024-09-19 19:05:50,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=726520.0, ans=0.125 +2024-09-19 19:05:51,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=726520.0, ans=0.125 +2024-09-19 19:06:15,355 INFO [train.py:1198] (0/2) Epoch 41, batch 650, loss[loss=0.2356, ctc_loss=0.1044, cr_loss=0.3253, attn_decoder_loss=0.2429, over 29775.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1109, cr_loss=0.3502, attn_decoder_loss=0.2378, over 5586482.20 frames. ], batch size: 81, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:06:19,865 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.350e+01 8.880e+01 9.262e+01 1.448e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 19:06:36,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.44 vs. limit=12.0 +2024-09-19 19:06:41,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=726640.0, ans=0.1 +2024-09-19 19:06:45,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=726680.0, ans=0.05 +2024-09-19 19:07:02,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=726720.0, ans=0.125 +2024-09-19 19:07:17,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=726760.0, ans=0.2 +2024-09-19 19:07:29,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=726800.0, ans=0.125 +2024-09-19 19:07:30,717 INFO [train.py:1198] (0/2) Epoch 41, batch 700, loss[loss=0.2251, ctc_loss=0.1052, cr_loss=0.3381, attn_decoder_loss=0.2309, over 29536.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1115, cr_loss=0.3515, attn_decoder_loss=0.2387, over 5636945.53 frames. ], batch size: 76, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:07:34,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=726800.0, ans=0.125 +2024-09-19 19:07:35,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=726800.0, ans=0.2 +2024-09-19 19:07:37,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.50 vs. limit=15.0 +2024-09-19 19:07:49,482 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.73 vs. limit=15.0 +2024-09-19 19:07:54,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=726840.0, ans=0.125 +2024-09-19 19:08:01,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=726880.0, ans=0.125 +2024-09-19 19:08:11,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=726880.0, ans=0.125 +2024-09-19 19:08:22,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=726920.0, ans=0.125 +2024-09-19 19:08:22,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=726920.0, ans=0.125 +2024-09-19 19:08:23,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=726920.0, ans=0.0 +2024-09-19 19:08:25,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=726920.0, ans=0.07 +2024-09-19 19:08:26,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=726920.0, ans=0.0 +2024-09-19 19:08:46,095 INFO [train.py:1198] (0/2) Epoch 41, batch 750, loss[loss=0.2442, ctc_loss=0.1174, cr_loss=0.3612, attn_decoder_loss=0.2502, over 29723.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1113, cr_loss=0.3511, attn_decoder_loss=0.2383, over 5677086.91 frames. ], batch size: 82, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:08:52,746 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.416e+01 8.976e+01 9.718e+01 1.767e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-19 19:08:54,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=727000.0, ans=0.125 +2024-09-19 19:08:56,583 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.02 vs. limit=15.0 +2024-09-19 19:09:00,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.35 vs. limit=15.0 +2024-09-19 19:09:30,364 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-19 19:09:36,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.20 vs. limit=15.0 +2024-09-19 19:09:45,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=727120.0, ans=0.0 +2024-09-19 19:10:06,047 INFO [train.py:1198] (0/2) Epoch 41, batch 800, loss[loss=0.2098, ctc_loss=0.09009, cr_loss=0.2969, attn_decoder_loss=0.2165, over 29624.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1117, cr_loss=0.3518, attn_decoder_loss=0.2383, over 5707766.77 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:10:27,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=727240.0, ans=0.2 +2024-09-19 19:10:29,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.60 vs. limit=15.0 +2024-09-19 19:10:30,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=727240.0, ans=0.035 +2024-09-19 19:10:30,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=727240.0, ans=0.025 +2024-09-19 19:10:30,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.86 vs. limit=15.0 +2024-09-19 19:10:38,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=727280.0, ans=0.1 +2024-09-19 19:10:51,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=727320.0, ans=0.125 +2024-09-19 19:10:58,405 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.32 vs. limit=22.5 +2024-09-19 19:11:00,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=727320.0, ans=0.05 +2024-09-19 19:11:06,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=727360.0, ans=0.125 +2024-09-19 19:11:21,347 INFO [train.py:1198] (0/2) Epoch 41, batch 850, loss[loss=0.2343, ctc_loss=0.111, cr_loss=0.3458, attn_decoder_loss=0.2404, over 29692.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1114, cr_loss=0.3508, attn_decoder_loss=0.2379, over 5737609.00 frames. ], batch size: 89, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:11:25,689 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.681e+01 8.437e+01 9.040e+01 9.490e+01 1.672e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 19:11:56,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=727480.0, ans=0.025 +2024-09-19 19:11:57,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=727480.0, ans=0.125 +2024-09-19 19:12:17,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=727520.0, ans=0.125 +2024-09-19 19:12:17,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=727520.0, ans=0.125 +2024-09-19 19:12:31,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=727560.0, ans=0.125 +2024-09-19 19:12:37,296 INFO [train.py:1198] (0/2) Epoch 41, batch 900, loss[loss=0.2031, ctc_loss=0.09108, cr_loss=0.3138, attn_decoder_loss=0.2085, over 29584.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2381, over 5742257.53 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:12:45,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=727600.0, ans=0.0 +2024-09-19 19:12:49,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.00 vs. limit=6.0 +2024-09-19 19:12:54,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=727640.0, ans=0.125 +2024-09-19 19:13:26,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=727720.0, ans=0.5 +2024-09-19 19:13:38,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=727720.0, ans=0.1 +2024-09-19 19:13:38,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=727720.0, ans=0.1 +2024-09-19 19:13:56,361 INFO [train.py:1198] (0/2) Epoch 41, batch 950, loss[loss=0.2202, ctc_loss=0.1, cr_loss=0.3232, attn_decoder_loss=0.2264, over 29494.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1118, cr_loss=0.3514, attn_decoder_loss=0.2382, over 5744592.35 frames. ], batch size: 74, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:14:00,868 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.928e+01 8.606e+01 9.118e+01 9.826e+01 2.095e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 19:14:01,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=727800.0, ans=0.0 +2024-09-19 19:14:08,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=727800.0, ans=0.1 +2024-09-19 19:14:17,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=727840.0, ans=0.025 +2024-09-19 19:14:37,965 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.73 vs. limit=22.5 +2024-09-19 19:14:39,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.54 vs. limit=6.0 +2024-09-19 19:14:54,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.15 vs. limit=22.5 +2024-09-19 19:15:07,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=727960.0, ans=0.0 +2024-09-19 19:15:12,356 INFO [train.py:1198] (0/2) Epoch 41, batch 1000, loss[loss=0.2183, ctc_loss=0.1068, cr_loss=0.3313, attn_decoder_loss=0.2234, over 29488.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1127, cr_loss=0.3533, attn_decoder_loss=0.2389, over 5737124.46 frames. ], batch size: 77, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:15:38,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=728040.0, ans=0.125 +2024-09-19 19:15:49,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=728080.0, ans=0.125 +2024-09-19 19:15:49,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=728080.0, ans=0.2 +2024-09-19 19:16:02,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=728120.0, ans=0.125 +2024-09-19 19:16:05,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=728120.0, ans=0.1 +2024-09-19 19:16:11,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=728160.0, ans=0.125 +2024-09-19 19:16:29,739 INFO [train.py:1198] (0/2) Epoch 41, batch 1050, loss[loss=0.2466, ctc_loss=0.1297, cr_loss=0.3965, attn_decoder_loss=0.2508, over 29684.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1123, cr_loss=0.3525, attn_decoder_loss=0.2386, over 5744228.47 frames. ], batch size: 85, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:16:33,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=728200.0, ans=0.1 +2024-09-19 19:16:34,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=728200.0, ans=0.0 +2024-09-19 19:16:35,723 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.355e+01 8.570e+01 9.055e+01 9.661e+01 1.822e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-19 19:16:40,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=728200.0, ans=0.0 +2024-09-19 19:16:42,246 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:16:52,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=728240.0, ans=0.125 +2024-09-19 19:17:00,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=728280.0, ans=0.125 +2024-09-19 19:17:11,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=728280.0, ans=0.2 +2024-09-19 19:17:13,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=728280.0, ans=0.0 +2024-09-19 19:17:41,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.75 vs. limit=10.0 +2024-09-19 19:17:47,176 INFO [train.py:1198] (0/2) Epoch 41, batch 1100, loss[loss=0.2269, ctc_loss=0.1073, cr_loss=0.3444, attn_decoder_loss=0.2325, over 29454.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1123, cr_loss=0.3524, attn_decoder_loss=0.2384, over 5756379.99 frames. ], batch size: 78, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:17:51,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=728400.0, ans=0.125 +2024-09-19 19:18:02,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=728440.0, ans=0.125 +2024-09-19 19:18:14,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=728440.0, ans=0.1 +2024-09-19 19:18:23,018 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.15 vs. limit=12.0 +2024-09-19 19:18:25,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.31 vs. limit=15.0 +2024-09-19 19:18:45,352 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.08 vs. limit=6.0 +2024-09-19 19:18:50,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=728560.0, ans=0.125 +2024-09-19 19:19:02,689 INFO [train.py:1198] (0/2) Epoch 41, batch 1150, loss[loss=0.226, ctc_loss=0.1107, cr_loss=0.3458, attn_decoder_loss=0.2311, over 29422.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1122, cr_loss=0.3523, attn_decoder_loss=0.2382, over 5754490.22 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:19:08,825 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.493e+01 8.986e+01 9.432e+01 3.581e+02, threshold=1.797e+02, percent-clipped=4.0 +2024-09-19 19:19:25,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=728640.0, ans=0.2 +2024-09-19 19:19:28,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=728640.0, ans=0.0 +2024-09-19 19:19:41,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.34 vs. limit=10.0 +2024-09-19 19:20:08,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=728760.0, ans=0.2 +2024-09-19 19:20:20,770 INFO [train.py:1198] (0/2) Epoch 41, batch 1200, loss[loss=0.2268, ctc_loss=0.0965, cr_loss=0.312, attn_decoder_loss=0.2344, over 29662.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1125, cr_loss=0.3527, attn_decoder_loss=0.2389, over 5746539.84 frames. ], batch size: 85, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:20:42,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=728840.0, ans=0.1 +2024-09-19 19:20:55,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.56 vs. limit=22.5 +2024-09-19 19:21:08,070 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.20 vs. limit=15.0 +2024-09-19 19:21:16,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=728920.0, ans=0.125 +2024-09-19 19:21:33,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=728960.0, ans=0.2 +2024-09-19 19:21:38,650 INFO [train.py:1198] (0/2) Epoch 41, batch 1250, loss[loss=0.2507, ctc_loss=0.1263, cr_loss=0.4046, attn_decoder_loss=0.2555, over 29561.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1129, cr_loss=0.3537, attn_decoder_loss=0.2396, over 5774193.70 frames. ], batch size: 92, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:21:40,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=729000.0, ans=0.0 +2024-09-19 19:21:44,545 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.620e+01 9.115e+01 9.641e+01 1.627e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 19:21:48,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=729000.0, ans=0.2 +2024-09-19 19:21:52,743 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:21:54,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=729040.0, ans=0.2 +2024-09-19 19:21:55,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=729040.0, ans=0.0 +2024-09-19 19:22:03,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=729040.0, ans=0.2 +2024-09-19 19:22:26,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=729120.0, ans=0.0 +2024-09-19 19:22:31,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=729120.0, ans=0.0 +2024-09-19 19:22:32,008 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=729120.0, ans=0.0 +2024-09-19 19:22:47,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=729160.0, ans=0.125 +2024-09-19 19:22:54,565 INFO [train.py:1198] (0/2) Epoch 41, batch 1300, loss[loss=0.2351, ctc_loss=0.1027, cr_loss=0.3207, attn_decoder_loss=0.2427, over 28206.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1125, cr_loss=0.353, attn_decoder_loss=0.2393, over 5778630.18 frames. ], batch size: 111, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:23:02,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=729200.0, ans=0.125 +2024-09-19 19:23:02,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=729200.0, ans=0.125 +2024-09-19 19:23:22,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=729240.0, ans=0.125 +2024-09-19 19:23:32,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.29 vs. limit=22.5 +2024-09-19 19:23:54,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=729360.0, ans=0.125 +2024-09-19 19:23:55,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=729360.0, ans=0.0 +2024-09-19 19:24:10,603 INFO [train.py:1198] (0/2) Epoch 41, batch 1350, loss[loss=0.2344, ctc_loss=0.1126, cr_loss=0.3506, attn_decoder_loss=0.2402, over 29737.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1119, cr_loss=0.3521, attn_decoder_loss=0.2387, over 5795556.01 frames. ], batch size: 81, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:24:17,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=729400.0, ans=0.125 +2024-09-19 19:24:18,638 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.744e+01 8.406e+01 8.862e+01 9.438e+01 1.295e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 19:24:22,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=729400.0, ans=0.125 +2024-09-19 19:24:24,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.93 vs. limit=10.0 +2024-09-19 19:24:29,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=729440.0, ans=0.1 +2024-09-19 19:25:05,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-19 19:25:30,641 INFO [train.py:1198] (0/2) Epoch 41, batch 1400, loss[loss=0.2107, ctc_loss=0.09775, cr_loss=0.3177, attn_decoder_loss=0.2162, over 29588.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1117, cr_loss=0.3515, attn_decoder_loss=0.2383, over 5807095.24 frames. ], batch size: 69, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:26:18,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=729720.0, ans=0.0 +2024-09-19 19:26:19,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.29 vs. limit=12.0 +2024-09-19 19:26:45,630 INFO [train.py:1198] (0/2) Epoch 41, batch 1450, loss[loss=0.2412, ctc_loss=0.1154, cr_loss=0.3434, attn_decoder_loss=0.2476, over 29422.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1119, cr_loss=0.3519, attn_decoder_loss=0.2388, over 5805047.48 frames. ], batch size: 94, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:26:51,370 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.557e+01 9.068e+01 9.745e+01 1.592e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 19:27:02,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=729840.0, ans=0.025 +2024-09-19 19:27:37,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=729920.0, ans=0.5 +2024-09-19 19:27:40,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=729920.0, ans=0.025 +2024-09-19 19:27:46,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_na.min_abs, batch_count=729960.0, ans=0.02 +2024-09-19 19:27:47,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=729960.0, ans=0.0 +2024-09-19 19:28:03,357 INFO [train.py:1198] (0/2) Epoch 41, batch 1500, loss[loss=0.2402, ctc_loss=0.1164, cr_loss=0.3627, attn_decoder_loss=0.2459, over 29630.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1118, cr_loss=0.3515, attn_decoder_loss=0.239, over 5804787.69 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:28:49,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=730120.0, ans=0.125 +2024-09-19 19:29:05,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.05 vs. limit=6.0 +2024-09-19 19:29:15,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=730160.0, ans=0.025 +2024-09-19 19:29:21,291 INFO [train.py:1198] (0/2) Epoch 41, batch 1550, loss[loss=0.2575, ctc_loss=0.1304, cr_loss=0.4053, attn_decoder_loss=0.2626, over 29504.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1123, cr_loss=0.3521, attn_decoder_loss=0.2389, over 5779722.47 frames. ], batch size: 90, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:29:27,257 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 8.596e+01 9.016e+01 9.921e+01 2.014e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 19:29:35,136 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:29:42,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=730240.0, ans=0.0 +2024-09-19 19:29:58,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=730280.0, ans=0.125 +2024-09-19 19:30:09,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=730320.0, ans=0.2 +2024-09-19 19:30:36,416 INFO [train.py:1198] (0/2) Epoch 41, batch 1600, loss[loss=0.2411, ctc_loss=0.1174, cr_loss=0.3575, attn_decoder_loss=0.2469, over 29684.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1125, cr_loss=0.3525, attn_decoder_loss=0.2388, over 5762624.43 frames. ], batch size: 85, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:30:42,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=730400.0, ans=0.125 +2024-09-19 19:30:44,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=730400.0, ans=0.125 +2024-09-19 19:31:10,228 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=730480.0, ans=0.0 +2024-09-19 19:31:13,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=730480.0, ans=0.5 +2024-09-19 19:31:13,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=730480.0, ans=0.2 +2024-09-19 19:31:23,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=730520.0, ans=0.0 +2024-09-19 19:31:23,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=730520.0, ans=0.2 +2024-09-19 19:31:37,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=730560.0, ans=0.125 +2024-09-19 19:31:48,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=730560.0, ans=0.125 +2024-09-19 19:31:51,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=730560.0, ans=0.125 +2024-09-19 19:31:54,179 INFO [train.py:1198] (0/2) Epoch 41, batch 1650, loss[loss=0.2437, ctc_loss=0.1172, cr_loss=0.3612, attn_decoder_loss=0.2497, over 29727.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1118, cr_loss=0.3512, attn_decoder_loss=0.2384, over 5757302.44 frames. ], batch size: 89, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:32:03,245 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.587e+01 9.228e+01 9.861e+01 2.680e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-19 19:32:18,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=730640.0, ans=0.0 +2024-09-19 19:32:40,403 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:33:01,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=730760.0, ans=0.125 +2024-09-19 19:33:11,335 INFO [train.py:1198] (0/2) Epoch 41, batch 1700, loss[loss=0.2131, ctc_loss=0.1037, cr_loss=0.3434, attn_decoder_loss=0.2176, over 29578.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1115, cr_loss=0.3503, attn_decoder_loss=0.2384, over 5779870.64 frames. ], batch size: 69, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:33:11,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=730800.0, ans=0.1 +2024-09-19 19:33:26,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=730840.0, ans=0.125 +2024-09-19 19:33:53,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=730880.0, ans=0.1 +2024-09-19 19:33:59,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=730920.0, ans=0.125 +2024-09-19 19:33:59,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=730920.0, ans=0.125 +2024-09-19 19:34:26,871 INFO [train.py:1198] (0/2) Epoch 41, batch 1750, loss[loss=0.2022, ctc_loss=0.08466, cr_loss=0.2766, attn_decoder_loss=0.2091, over 29308.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1113, cr_loss=0.3496, attn_decoder_loss=0.2379, over 5788345.34 frames. ], batch size: 67, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:34:35,973 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.612e+01 9.117e+01 9.709e+01 1.098e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 19:34:47,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.27 vs. limit=10.0 +2024-09-19 19:34:54,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=731040.0, ans=0.125 +2024-09-19 19:35:06,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=731080.0, ans=0.125 +2024-09-19 19:35:21,627 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.84 vs. limit=22.5 +2024-09-19 19:35:22,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=731120.0, ans=0.125 +2024-09-19 19:35:35,020 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.86 vs. limit=22.5 +2024-09-19 19:35:44,369 INFO [train.py:1198] (0/2) Epoch 41, batch 1800, loss[loss=0.2425, ctc_loss=0.1144, cr_loss=0.3708, attn_decoder_loss=0.2485, over 29685.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1117, cr_loss=0.3508, attn_decoder_loss=0.2383, over 5790314.90 frames. ], batch size: 83, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:35:46,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=731200.0, ans=0.0 +2024-09-19 19:35:55,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=731200.0, ans=0.0 +2024-09-19 19:35:55,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=731200.0, ans=0.2 +2024-09-19 19:35:56,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=731200.0, ans=0.0 +2024-09-19 19:36:00,200 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=167.07 vs. limit=15.0 +2024-09-19 19:36:07,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=731240.0, ans=0.0 +2024-09-19 19:36:12,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=731240.0, ans=0.0 +2024-09-19 19:36:24,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=731280.0, ans=0.2 +2024-09-19 19:36:28,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=731320.0, ans=0.0 +2024-09-19 19:36:34,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=731320.0, ans=0.125 +2024-09-19 19:36:47,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.63 vs. limit=15.0 +2024-09-19 19:37:02,122 INFO [train.py:1198] (0/2) Epoch 41, batch 1850, loss[loss=0.2507, ctc_loss=0.1208, cr_loss=0.3871, attn_decoder_loss=0.2565, over 29625.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1115, cr_loss=0.3506, attn_decoder_loss=0.2381, over 5796257.21 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:37:10,992 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.675e+01 9.084e+01 9.615e+01 1.395e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 19:37:39,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=731480.0, ans=0.125 +2024-09-19 19:37:46,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=731520.0, ans=0.1 +2024-09-19 19:37:48,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=731520.0, ans=0.05 +2024-09-19 19:37:49,528 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.70 vs. limit=10.0 +2024-09-19 19:38:00,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=731560.0, ans=0.025 +2024-09-19 19:38:08,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=731560.0, ans=0.125 +2024-09-19 19:38:14,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=731560.0, ans=0.125 +2024-09-19 19:38:14,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.49 vs. limit=22.5 +2024-09-19 19:38:17,062 INFO [train.py:1198] (0/2) Epoch 41, batch 1900, loss[loss=0.2478, ctc_loss=0.12, cr_loss=0.3755, attn_decoder_loss=0.2536, over 29695.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1118, cr_loss=0.3517, attn_decoder_loss=0.239, over 5804709.65 frames. ], batch size: 89, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:38:32,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=731640.0, ans=0.1 +2024-09-19 19:38:38,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=731640.0, ans=0.125 +2024-09-19 19:38:46,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=731680.0, ans=0.125 +2024-09-19 19:39:31,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=731760.0, ans=0.125 +2024-09-19 19:39:34,508 INFO [train.py:1198] (0/2) Epoch 41, batch 1950, loss[loss=0.23, ctc_loss=0.1162, cr_loss=0.3572, attn_decoder_loss=0.2347, over 29470.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.113, cr_loss=0.3542, attn_decoder_loss=0.2403, over 5819411.55 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:39:43,508 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.306e+01 8.775e+01 9.303e+01 9.846e+01 2.591e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-19 19:40:16,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=731880.0, ans=0.125 +2024-09-19 19:40:18,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=731920.0, ans=0.0 +2024-09-19 19:40:36,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=731960.0, ans=0.125 +2024-09-19 19:40:40,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=731960.0, ans=0.125 +2024-09-19 19:40:44,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.92 vs. limit=15.0 +2024-09-19 19:40:47,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.67 vs. limit=15.0 +2024-09-19 19:40:51,606 INFO [train.py:1198] (0/2) Epoch 41, batch 2000, loss[loss=0.2124, ctc_loss=0.1015, cr_loss=0.3245, attn_decoder_loss=0.2175, over 29373.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1131, cr_loss=0.3537, attn_decoder_loss=0.2405, over 5796017.08 frames. ], batch size: 67, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:40:59,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=732000.0, ans=0.125 +2024-09-19 19:41:36,467 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.08 vs. limit=15.0 +2024-09-19 19:41:37,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=732120.0, ans=0.125 +2024-09-19 19:42:07,266 INFO [train.py:1198] (0/2) Epoch 41, batch 2050, loss[loss=0.2122, ctc_loss=0.09986, cr_loss=0.3161, attn_decoder_loss=0.2177, over 29444.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1128, cr_loss=0.3528, attn_decoder_loss=0.2395, over 5788810.10 frames. ], batch size: 70, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:42:16,365 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.729e+01 8.645e+01 9.096e+01 9.473e+01 4.528e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 19:42:16,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=732200.0, ans=0.125 +2024-09-19 19:42:25,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=732240.0, ans=0.0 +2024-09-19 19:42:28,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=732240.0, ans=0.0 +2024-09-19 19:42:41,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=732280.0, ans=0.0 +2024-09-19 19:42:45,671 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:42:48,815 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:42:52,460 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.72 vs. limit=15.0 +2024-09-19 19:42:54,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=732320.0, ans=0.05 +2024-09-19 19:43:12,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=732360.0, ans=0.125 +2024-09-19 19:43:24,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.43 vs. limit=22.5 +2024-09-19 19:43:25,409 INFO [train.py:1198] (0/2) Epoch 41, batch 2100, loss[loss=0.2295, ctc_loss=0.1109, cr_loss=0.3556, attn_decoder_loss=0.2348, over 29769.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1119, cr_loss=0.3515, attn_decoder_loss=0.2389, over 5801428.81 frames. ], batch size: 81, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:43:43,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=732440.0, ans=0.125 +2024-09-19 19:43:43,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=732440.0, ans=0.125 +2024-09-19 19:43:52,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=732440.0, ans=0.125 +2024-09-19 19:44:00,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.49 vs. limit=15.0 +2024-09-19 19:44:03,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=732480.0, ans=0.125 +2024-09-19 19:44:33,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=732560.0, ans=0.0 +2024-09-19 19:44:34,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.88 vs. limit=22.5 +2024-09-19 19:44:42,400 INFO [train.py:1198] (0/2) Epoch 41, batch 2150, loss[loss=0.2297, ctc_loss=0.1109, cr_loss=0.3591, attn_decoder_loss=0.2349, over 29444.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1114, cr_loss=0.3504, attn_decoder_loss=0.2382, over 5816373.07 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:44:48,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=732600.0, ans=0.0 +2024-09-19 19:44:50,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=732600.0, ans=0.0 +2024-09-19 19:44:51,571 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.227e+01 8.830e+01 9.472e+01 1.149e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 19:44:51,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=732600.0, ans=0.125 +2024-09-19 19:45:02,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=732640.0, ans=0.0 +2024-09-19 19:45:13,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=732680.0, ans=0.0 +2024-09-19 19:45:55,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=732760.0, ans=0.0 +2024-09-19 19:45:58,476 INFO [train.py:1198] (0/2) Epoch 41, batch 2200, loss[loss=0.2403, ctc_loss=0.1153, cr_loss=0.3581, attn_decoder_loss=0.2462, over 29610.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1117, cr_loss=0.351, attn_decoder_loss=0.2385, over 5812369.48 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:46:12,497 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=732840.0, ans=0.0 +2024-09-19 19:46:51,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=732920.0, ans=0.125 +2024-09-19 19:47:16,340 INFO [train.py:1198] (0/2) Epoch 41, batch 2250, loss[loss=0.2354, ctc_loss=0.1137, cr_loss=0.3606, attn_decoder_loss=0.2409, over 29688.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1114, cr_loss=0.351, attn_decoder_loss=0.2383, over 5811424.46 frames. ], batch size: 82, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:47:19,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=733000.0, ans=0.5 +2024-09-19 19:47:25,242 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.546e+01 9.093e+01 9.694e+01 2.560e+02, threshold=1.819e+02, percent-clipped=3.0 +2024-09-19 19:47:25,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=733000.0, ans=0.125 +2024-09-19 19:47:25,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=733000.0, ans=0.0 +2024-09-19 19:47:53,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=733080.0, ans=0.125 +2024-09-19 19:48:03,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=733120.0, ans=0.1 +2024-09-19 19:48:06,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=733120.0, ans=0.0 +2024-09-19 19:48:10,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=733120.0, ans=0.125 +2024-09-19 19:48:33,637 INFO [train.py:1198] (0/2) Epoch 41, batch 2300, loss[loss=0.2072, ctc_loss=0.09647, cr_loss=0.3145, attn_decoder_loss=0.2125, over 29322.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1111, cr_loss=0.3503, attn_decoder_loss=0.2375, over 5798408.92 frames. ], batch size: 71, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:48:34,977 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.89 vs. limit=6.0 +2024-09-19 19:48:55,824 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.44 vs. limit=10.0 +2024-09-19 19:49:08,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=733280.0, ans=0.95 +2024-09-19 19:49:16,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=733280.0, ans=0.125 +2024-09-19 19:49:19,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=733320.0, ans=0.0 +2024-09-19 19:49:49,348 INFO [train.py:1198] (0/2) Epoch 41, batch 2350, loss[loss=0.249, ctc_loss=0.1226, cr_loss=0.3793, attn_decoder_loss=0.2547, over 29674.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1112, cr_loss=0.3501, attn_decoder_loss=0.2376, over 5804755.30 frames. ], batch size: 83, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:49:58,164 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.660e+01 9.088e+01 9.774e+01 1.601e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 19:50:04,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=733440.0, ans=0.1 +2024-09-19 19:50:16,620 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=733440.0, ans=0.0 +2024-09-19 19:50:19,973 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.12 vs. limit=22.5 +2024-09-19 19:50:21,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=733480.0, ans=0.125 +2024-09-19 19:50:24,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=733480.0, ans=0.125 +2024-09-19 19:50:28,702 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:50:39,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=733520.0, ans=0.0 +2024-09-19 19:51:06,692 INFO [train.py:1198] (0/2) Epoch 41, batch 2400, loss[loss=0.2181, ctc_loss=0.1011, cr_loss=0.3239, attn_decoder_loss=0.2239, over 29539.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1119, cr_loss=0.3517, attn_decoder_loss=0.2383, over 5807978.56 frames. ], batch size: 76, lr: 2.69e-03, grad_scale: 32.0 +2024-09-19 19:51:12,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=733600.0, ans=0.125 +2024-09-19 19:51:34,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=733640.0, ans=0.95 +2024-09-19 19:52:01,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=733720.0, ans=0.125 +2024-09-19 19:52:02,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=733720.0, ans=0.125 +2024-09-19 19:52:09,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=733760.0, ans=0.0 +2024-09-19 19:52:18,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=733760.0, ans=0.125 +2024-09-19 19:52:24,348 INFO [train.py:1198] (0/2) Epoch 41, batch 2450, loss[loss=0.256, ctc_loss=0.1339, cr_loss=0.4267, attn_decoder_loss=0.26, over 29680.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1123, cr_loss=0.3527, attn_decoder_loss=0.2389, over 5783457.31 frames. ], batch size: 82, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:52:33,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=733800.0, ans=0.125 +2024-09-19 19:52:34,693 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.655e+01 9.209e+01 9.754e+01 2.010e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 19:53:12,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=733920.0, ans=0.1 +2024-09-19 19:53:33,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=733960.0, ans=0.1 +2024-09-19 19:53:39,343 INFO [train.py:1198] (0/2) Epoch 41, batch 2500, loss[loss=0.2502, ctc_loss=0.1252, cr_loss=0.3802, attn_decoder_loss=0.2556, over 29619.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1125, cr_loss=0.3536, attn_decoder_loss=0.239, over 5793375.91 frames. ], batch size: 86, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:53:42,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=734000.0, ans=0.0 +2024-09-19 19:53:52,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=734000.0, ans=0.0 +2024-09-19 19:54:17,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=734080.0, ans=0.125 +2024-09-19 19:54:22,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=734080.0, ans=0.035 +2024-09-19 19:54:41,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=734160.0, ans=0.125 +2024-09-19 19:54:45,434 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=15.0 +2024-09-19 19:54:46,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=734160.0, ans=0.0 +2024-09-19 19:54:50,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=734160.0, ans=0.5 +2024-09-19 19:54:57,246 INFO [train.py:1198] (0/2) Epoch 41, batch 2550, loss[loss=0.2014, ctc_loss=0.09104, cr_loss=0.3116, attn_decoder_loss=0.2067, over 29368.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1122, cr_loss=0.3533, attn_decoder_loss=0.239, over 5797163.21 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:55:03,622 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:55:07,640 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.421e+01 8.984e+01 9.489e+01 4.917e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 19:55:24,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=734240.0, ans=0.1 +2024-09-19 19:55:25,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=734280.0, ans=0.125 +2024-09-19 19:55:33,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=734280.0, ans=0.025 +2024-09-19 19:55:51,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=734320.0, ans=0.0 +2024-09-19 19:55:58,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=734360.0, ans=0.125 +2024-09-19 19:55:59,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=734360.0, ans=0.125 +2024-09-19 19:56:13,327 INFO [train.py:1198] (0/2) Epoch 41, batch 2600, loss[loss=0.229, ctc_loss=0.1118, cr_loss=0.3474, attn_decoder_loss=0.2343, over 29430.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1122, cr_loss=0.353, attn_decoder_loss=0.239, over 5793901.17 frames. ], batch size: 78, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:56:14,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.72 vs. limit=15.0 +2024-09-19 19:56:33,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=734440.0, ans=0.125 +2024-09-19 19:56:34,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=734440.0, ans=10.0 +2024-09-19 19:56:38,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=734440.0, ans=0.0 +2024-09-19 19:56:44,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=734480.0, ans=0.125 +2024-09-19 19:57:02,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=734520.0, ans=0.125 +2024-09-19 19:57:09,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.27 vs. limit=15.0 +2024-09-19 19:57:15,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=734560.0, ans=0.125 +2024-09-19 19:57:20,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=734560.0, ans=0.125 +2024-09-19 19:57:30,508 INFO [train.py:1198] (0/2) Epoch 41, batch 2650, loss[loss=0.2403, ctc_loss=0.1108, cr_loss=0.3417, attn_decoder_loss=0.2471, over 29266.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1121, cr_loss=0.3532, attn_decoder_loss=0.2393, over 5799790.18 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:57:35,544 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:57:36,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=734600.0, ans=0.2 +2024-09-19 19:57:38,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=734600.0, ans=0.0 +2024-09-19 19:57:41,310 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.192e+01 8.633e+01 9.136e+01 9.710e+01 1.315e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 19:57:52,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=734640.0, ans=0.04949747468305833 +2024-09-19 19:58:22,675 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.24 vs. limit=22.5 +2024-09-19 19:58:35,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=734760.0, ans=0.1 +2024-09-19 19:58:38,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=734760.0, ans=0.125 +2024-09-19 19:58:48,358 INFO [train.py:1198] (0/2) Epoch 41, batch 2700, loss[loss=0.2361, ctc_loss=0.1033, cr_loss=0.3239, attn_decoder_loss=0.2436, over 29516.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1122, cr_loss=0.3529, attn_decoder_loss=0.2394, over 5795354.23 frames. ], batch size: 87, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:58:56,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=734800.0, ans=0.2 +2024-09-19 19:59:00,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=734800.0, ans=0.125 +2024-09-19 19:59:31,153 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.64 vs. limit=15.0 +2024-09-19 19:59:32,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=734920.0, ans=0.125 +2024-09-19 19:59:35,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=734920.0, ans=0.125 +2024-09-19 19:59:36,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=734920.0, ans=0.0 +2024-09-19 19:59:49,475 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.68 vs. limit=15.0 +2024-09-19 20:00:03,693 INFO [train.py:1198] (0/2) Epoch 41, batch 2750, loss[loss=0.2168, ctc_loss=0.09663, cr_loss=0.3127, attn_decoder_loss=0.2233, over 29530.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1114, cr_loss=0.3508, attn_decoder_loss=0.2384, over 5795139.98 frames. ], batch size: 75, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:00:12,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=735000.0, ans=0.0 +2024-09-19 20:00:14,116 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.948e+01 8.495e+01 8.920e+01 9.727e+01 1.790e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 20:00:28,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=735040.0, ans=0.04949747468305833 +2024-09-19 20:00:32,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.32 vs. limit=15.0 +2024-09-19 20:01:20,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=735200.0, ans=0.125 +2024-09-19 20:01:21,645 INFO [train.py:1198] (0/2) Epoch 41, batch 2800, loss[loss=0.2517, ctc_loss=0.1334, cr_loss=0.3706, attn_decoder_loss=0.2566, over 20152.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1119, cr_loss=0.3519, attn_decoder_loss=0.2387, over 5776301.33 frames. ], batch size: 209, lr: 2.68e-03, grad_scale: 32.0 +2024-09-19 20:01:34,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=735200.0, ans=0.0 +2024-09-19 20:01:44,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=735240.0, ans=0.125 +2024-09-19 20:01:47,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=735240.0, ans=0.125 +2024-09-19 20:01:48,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=735240.0, ans=0.2 +2024-09-19 20:01:54,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=735280.0, ans=0.1 +2024-09-19 20:02:06,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=735320.0, ans=0.1 +2024-09-19 20:02:08,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=735320.0, ans=0.0 +2024-09-19 20:02:38,653 INFO [train.py:1198] (0/2) Epoch 41, batch 2850, loss[loss=0.2214, ctc_loss=0.1019, cr_loss=0.3253, attn_decoder_loss=0.2274, over 29504.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1121, cr_loss=0.352, attn_decoder_loss=0.2392, over 5761712.31 frames. ], batch size: 77, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:02:48,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=735400.0, ans=0.025 +2024-09-19 20:02:50,642 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 8.742e+01 9.309e+01 1.007e+02 1.847e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-19 20:03:04,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=735440.0, ans=0.125 +2024-09-19 20:03:25,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=735520.0, ans=0.125 +2024-09-19 20:03:53,277 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.39 vs. limit=15.0 +2024-09-19 20:03:53,522 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.07 vs. limit=15.0 +2024-09-19 20:03:54,114 INFO [train.py:1198] (0/2) Epoch 41, batch 2900, loss[loss=0.2282, ctc_loss=0.1077, cr_loss=0.3408, attn_decoder_loss=0.2341, over 29423.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1131, cr_loss=0.3545, attn_decoder_loss=0.2405, over 5787572.51 frames. ], batch size: 79, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:04:05,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.39 vs. limit=15.0 +2024-09-19 20:04:22,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=735640.0, ans=0.125 +2024-09-19 20:04:46,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=735720.0, ans=0.125 +2024-09-19 20:05:01,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=735760.0, ans=0.025 +2024-09-19 20:05:12,053 INFO [train.py:1198] (0/2) Epoch 41, batch 2950, loss[loss=0.231, ctc_loss=0.113, cr_loss=0.3633, attn_decoder_loss=0.2361, over 29522.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1124, cr_loss=0.3527, attn_decoder_loss=0.2394, over 5781734.07 frames. ], batch size: 75, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:05:15,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=735800.0, ans=0.125 +2024-09-19 20:05:24,172 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.387e+01 8.869e+01 9.638e+01 2.369e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-19 20:05:30,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=735840.0, ans=0.0 +2024-09-19 20:05:48,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=735880.0, ans=0.0 +2024-09-19 20:05:48,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=735880.0, ans=0.125 +2024-09-19 20:06:02,917 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.52 vs. limit=22.5 +2024-09-19 20:06:11,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=735960.0, ans=0.125 +2024-09-19 20:06:28,787 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-184000.pt +2024-09-19 20:06:37,443 INFO [train.py:1198] (0/2) Epoch 41, batch 3000, loss[loss=0.2456, ctc_loss=0.1209, cr_loss=0.3703, attn_decoder_loss=0.2513, over 29754.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1128, cr_loss=0.3536, attn_decoder_loss=0.2394, over 5781709.65 frames. ], batch size: 81, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:06:37,443 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 20:06:55,725 INFO [train.py:1230] (0/2) Epoch 41, validation: loss=0.2123, ctc_loss=0.03697, cr_loss=6.466e-15, attn_decoder_loss=0.2318, over 944034.00 frames. +2024-09-19 20:06:55,725 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 20:07:12,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=736040.0, ans=0.125 +2024-09-19 20:07:26,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=736080.0, ans=0.125 +2024-09-19 20:07:32,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=736080.0, ans=0.125 +2024-09-19 20:07:38,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=736080.0, ans=0.0 +2024-09-19 20:07:40,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=736120.0, ans=0.07 +2024-09-19 20:07:52,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=736120.0, ans=0.07 +2024-09-19 20:08:13,492 INFO [train.py:1198] (0/2) Epoch 41, batch 3050, loss[loss=0.2354, ctc_loss=0.1199, cr_loss=0.3596, attn_decoder_loss=0.2403, over 29509.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1134, cr_loss=0.3551, attn_decoder_loss=0.2401, over 5775946.74 frames. ], batch size: 76, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:08:14,172 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.30 vs. limit=22.5 +2024-09-19 20:08:25,659 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.668e+01 9.193e+01 9.788e+01 2.004e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 20:08:34,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=736240.0, ans=0.125 +2024-09-19 20:08:38,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=736240.0, ans=0.125 +2024-09-19 20:09:23,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.41 vs. limit=22.5 +2024-09-19 20:09:28,914 INFO [train.py:1198] (0/2) Epoch 41, batch 3100, loss[loss=0.2426, ctc_loss=0.1186, cr_loss=0.366, attn_decoder_loss=0.2483, over 29273.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.113, cr_loss=0.3539, attn_decoder_loss=0.2396, over 5775907.98 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:09:42,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=736440.0, ans=0.1 +2024-09-19 20:09:45,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=736440.0, ans=0.0 +2024-09-19 20:09:53,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=736440.0, ans=0.2 +2024-09-19 20:10:11,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=736480.0, ans=0.1 +2024-09-19 20:10:25,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=736520.0, ans=0.125 +2024-09-19 20:10:31,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=736560.0, ans=0.1 +2024-09-19 20:10:46,493 INFO [train.py:1198] (0/2) Epoch 41, batch 3150, loss[loss=0.237, ctc_loss=0.1126, cr_loss=0.3494, attn_decoder_loss=0.243, over 28926.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1131, cr_loss=0.3541, attn_decoder_loss=0.2398, over 5782412.71 frames. ], batch size: 104, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:10:58,490 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.553e+01 9.133e+01 9.719e+01 1.833e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 20:10:58,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=736600.0, ans=0.1 +2024-09-19 20:11:03,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=736640.0, ans=0.125 +2024-09-19 20:11:12,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=736640.0, ans=0.125 +2024-09-19 20:11:27,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=736680.0, ans=0.0 +2024-09-19 20:11:49,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=736760.0, ans=0.125 +2024-09-19 20:12:04,119 INFO [train.py:1198] (0/2) Epoch 41, batch 3200, loss[loss=0.2304, ctc_loss=0.1043, cr_loss=0.3269, attn_decoder_loss=0.2371, over 29432.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1125, cr_loss=0.3532, attn_decoder_loss=0.2392, over 5793452.91 frames. ], batch size: 79, lr: 2.68e-03, grad_scale: 32.0 +2024-09-19 20:12:07,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=736800.0, ans=0.1 +2024-09-19 20:12:21,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=736840.0, ans=0.125 +2024-09-19 20:12:22,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=736840.0, ans=0.125 +2024-09-19 20:12:25,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=736840.0, ans=0.125 +2024-09-19 20:12:52,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=12.0 +2024-09-19 20:12:57,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=736920.0, ans=0.125 +2024-09-19 20:13:20,182 INFO [train.py:1198] (0/2) Epoch 41, batch 3250, loss[loss=0.232, ctc_loss=0.1125, cr_loss=0.3434, attn_decoder_loss=0.2377, over 29692.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1128, cr_loss=0.3538, attn_decoder_loss=0.2398, over 5799548.25 frames. ], batch size: 84, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:13:33,816 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.531e+01 9.147e+01 9.717e+01 1.259e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 20:13:40,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=737040.0, ans=0.2 +2024-09-19 20:13:53,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=737080.0, ans=0.0 +2024-09-19 20:14:09,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=737120.0, ans=0.125 +2024-09-19 20:14:09,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=737120.0, ans=0.125 +2024-09-19 20:14:27,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=737160.0, ans=0.125 +2024-09-19 20:14:37,474 INFO [train.py:1198] (0/2) Epoch 41, batch 3300, loss[loss=0.242, ctc_loss=0.1084, cr_loss=0.3539, attn_decoder_loss=0.249, over 28614.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2385, over 5797120.50 frames. ], batch size: 112, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:14:44,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=737200.0, ans=0.125 +2024-09-19 20:15:07,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=737280.0, ans=0.125 +2024-09-19 20:15:52,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=737360.0, ans=0.0 +2024-09-19 20:15:52,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.39 vs. limit=10.0 +2024-09-19 20:15:53,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=737400.0, ans=0.125 +2024-09-19 20:15:54,696 INFO [train.py:1198] (0/2) Epoch 41, batch 3350, loss[loss=0.2383, ctc_loss=0.1129, cr_loss=0.3516, attn_decoder_loss=0.2444, over 28886.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1125, cr_loss=0.3529, attn_decoder_loss=0.2391, over 5773908.28 frames. ], batch size: 104, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:15:59,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=737400.0, ans=0.0 +2024-09-19 20:15:59,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=737400.0, ans=0.2 +2024-09-19 20:16:08,347 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.656e+01 9.093e+01 9.789e+01 1.911e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 20:16:35,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=737480.0, ans=0.125 +2024-09-19 20:16:46,912 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-19 20:17:06,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.39 vs. limit=15.0 +2024-09-19 20:17:10,491 INFO [train.py:1198] (0/2) Epoch 41, batch 3400, loss[loss=0.2086, ctc_loss=0.09939, cr_loss=0.3146, attn_decoder_loss=0.2137, over 29358.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1125, cr_loss=0.3527, attn_decoder_loss=0.239, over 5766010.78 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:17:15,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=737600.0, ans=0.0 +2024-09-19 20:17:15,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=737600.0, ans=0.1 +2024-09-19 20:17:19,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=737600.0, ans=0.0 +2024-09-19 20:17:56,162 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.71 vs. limit=15.0 +2024-09-19 20:17:56,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=737720.0, ans=0.125 +2024-09-19 20:18:21,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=737760.0, ans=0.125 +2024-09-19 20:18:25,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=12.0 +2024-09-19 20:18:26,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=737800.0, ans=10.0 +2024-09-19 20:18:28,097 INFO [train.py:1198] (0/2) Epoch 41, batch 3450, loss[loss=0.2445, ctc_loss=0.107, cr_loss=0.3405, attn_decoder_loss=0.2522, over 28187.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1124, cr_loss=0.3526, attn_decoder_loss=0.2392, over 5775145.21 frames. ], batch size: 111, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:18:28,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=737800.0, ans=0.07 +2024-09-19 20:18:41,841 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.497e+01 9.130e+01 9.574e+01 2.613e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-19 20:18:46,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=737840.0, ans=0.125 +2024-09-19 20:18:58,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=737880.0, ans=0.1 +2024-09-19 20:19:20,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.39 vs. limit=15.0 +2024-09-19 20:19:27,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.64 vs. limit=22.5 +2024-09-19 20:19:33,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=737960.0, ans=0.125 +2024-09-19 20:19:43,464 INFO [train.py:1198] (0/2) Epoch 41, batch 3500, loss[loss=0.2161, ctc_loss=0.102, cr_loss=0.3541, attn_decoder_loss=0.2209, over 29335.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1122, cr_loss=0.3524, attn_decoder_loss=0.2388, over 5777714.49 frames. ], batch size: 71, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:19:50,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=738000.0, ans=0.2 +2024-09-19 20:20:00,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=738040.0, ans=0.0 +2024-09-19 20:20:17,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_positive, batch_count=738080.0, ans=0.05 +2024-09-19 20:20:28,351 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.42 vs. limit=15.0 +2024-09-19 20:20:35,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.71 vs. limit=15.0 +2024-09-19 20:20:42,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=738120.0, ans=0.1 +2024-09-19 20:20:46,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=738160.0, ans=0.125 +2024-09-19 20:20:49,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=738160.0, ans=0.0 +2024-09-19 20:20:51,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=738160.0, ans=0.0 +2024-09-19 20:20:59,911 INFO [train.py:1198] (0/2) Epoch 41, batch 3550, loss[loss=0.2389, ctc_loss=0.1091, cr_loss=0.3343, attn_decoder_loss=0.2459, over 29708.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1117, cr_loss=0.3513, attn_decoder_loss=0.2384, over 5784314.26 frames. ], batch size: 89, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:21:03,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=738200.0, ans=0.025 +2024-09-19 20:21:04,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=738200.0, ans=0.05 +2024-09-19 20:21:14,692 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.523e+01 8.996e+01 9.507e+01 2.339e+02, threshold=1.799e+02, percent-clipped=2.0 +2024-09-19 20:21:28,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=738280.0, ans=0.0 +2024-09-19 20:21:34,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.40 vs. limit=22.5 +2024-09-19 20:21:47,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=738320.0, ans=0.125 +2024-09-19 20:22:03,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=738360.0, ans=0.1 +2024-09-19 20:22:14,208 INFO [train.py:1198] (0/2) Epoch 41, batch 3600, loss[loss=0.228, ctc_loss=0.1123, cr_loss=0.3573, attn_decoder_loss=0.2329, over 29518.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2386, over 5792162.69 frames. ], batch size: 77, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:22:33,362 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.97 vs. limit=10.0 +2024-09-19 20:22:33,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=738440.0, ans=0.0 +2024-09-19 20:22:50,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=738480.0, ans=0.0 +2024-09-19 20:22:55,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=738480.0, ans=0.1 +2024-09-19 20:23:07,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=738520.0, ans=0.125 +2024-09-19 20:23:17,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=738560.0, ans=0.125 +2024-09-19 20:23:30,391 INFO [train.py:1198] (0/2) Epoch 41, batch 3650, loss[loss=0.2548, ctc_loss=0.135, cr_loss=0.4031, attn_decoder_loss=0.2591, over 29507.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2382, over 5794309.16 frames. ], batch size: 90, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:23:44,809 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.32 vs. limit=15.0 +2024-09-19 20:23:46,680 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.451e+01 9.065e+01 9.454e+01 1.125e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-19 20:23:49,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.54 vs. limit=15.0 +2024-09-19 20:23:58,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.32 vs. limit=15.0 +2024-09-19 20:24:16,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=738720.0, ans=0.125 +2024-09-19 20:24:16,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=738720.0, ans=0.125 +2024-09-19 20:24:17,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.25 vs. limit=10.0 +2024-09-19 20:24:22,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=738720.0, ans=0.1 +2024-09-19 20:24:31,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=738760.0, ans=0.0 +2024-09-19 20:24:43,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=738800.0, ans=0.0 +2024-09-19 20:24:44,919 INFO [train.py:1198] (0/2) Epoch 41, batch 3700, loss[loss=0.2328, ctc_loss=0.11, cr_loss=0.3408, attn_decoder_loss=0.2388, over 29698.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.112, cr_loss=0.3521, attn_decoder_loss=0.2385, over 5804851.69 frames. ], batch size: 84, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:24:48,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=738800.0, ans=0.0 +2024-09-19 20:24:48,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.39 vs. limit=22.5 +2024-09-19 20:25:53,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=738960.0, ans=0.0 +2024-09-19 20:25:58,933 INFO [train.py:1198] (0/2) Epoch 41, batch 3750, loss[loss=0.2126, ctc_loss=0.1055, cr_loss=0.3478, attn_decoder_loss=0.2168, over 29353.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1118, cr_loss=0.3519, attn_decoder_loss=0.2382, over 5808283.97 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:26:11,533 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:26:17,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.549e+01 9.026e+01 9.637e+01 1.696e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 20:26:33,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=739080.0, ans=0.0 +2024-09-19 20:26:35,009 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:26:35,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=739080.0, ans=0.125 +2024-09-19 20:26:44,041 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=739120.0, ans=0.0 +2024-09-19 20:26:50,829 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.54 vs. limit=15.0 +2024-09-19 20:26:51,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=739120.0, ans=0.125 +2024-09-19 20:26:51,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=739120.0, ans=0.125 +2024-09-19 20:26:53,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=739120.0, ans=0.05 +2024-09-19 20:26:56,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=739120.0, ans=0.1 +2024-09-19 20:27:14,354 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:27:15,520 INFO [train.py:1198] (0/2) Epoch 41, batch 3800, loss[loss=0.236, ctc_loss=0.1047, cr_loss=0.3276, attn_decoder_loss=0.2433, over 29651.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1117, cr_loss=0.3512, attn_decoder_loss=0.2379, over 5797868.65 frames. ], batch size: 86, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:27:15,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=739200.0, ans=0.025 +2024-09-19 20:27:17,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=739200.0, ans=0.0 +2024-09-19 20:27:30,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=739240.0, ans=0.125 +2024-09-19 20:27:41,222 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:27:53,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.54 vs. limit=15.0 +2024-09-19 20:28:11,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=739320.0, ans=0.125 +2024-09-19 20:28:17,695 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.08 vs. limit=15.0 +2024-09-19 20:28:29,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=739400.0, ans=0.2 +2024-09-19 20:28:30,237 INFO [train.py:1198] (0/2) Epoch 41, batch 3850, loss[loss=0.2334, ctc_loss=0.1038, cr_loss=0.3285, attn_decoder_loss=0.2405, over 29202.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1114, cr_loss=0.3507, attn_decoder_loss=0.2378, over 5809614.31 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:28:37,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=739400.0, ans=0.2 +2024-09-19 20:28:45,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.74 vs. limit=12.0 +2024-09-19 20:28:47,850 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.446e+01 9.109e+01 9.536e+01 1.999e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 20:28:49,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=739440.0, ans=0.0 +2024-09-19 20:28:51,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=739440.0, ans=0.025 +2024-09-19 20:28:57,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=739440.0, ans=0.125 +2024-09-19 20:29:03,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=739480.0, ans=0.2 +2024-09-19 20:29:04,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=739480.0, ans=0.125 +2024-09-19 20:29:08,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=739480.0, ans=0.125 +2024-09-19 20:29:15,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=739520.0, ans=0.125 +2024-09-19 20:29:46,225 INFO [train.py:1198] (0/2) Epoch 41, batch 3900, loss[loss=0.2392, ctc_loss=0.1087, cr_loss=0.3513, attn_decoder_loss=0.2458, over 29641.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1114, cr_loss=0.3507, attn_decoder_loss=0.2382, over 5814794.75 frames. ], batch size: 86, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:29:46,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=739600.0, ans=0.2 +2024-09-19 20:29:52,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=739600.0, ans=0.0 +2024-09-19 20:30:13,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.47 vs. limit=15.0 +2024-09-19 20:30:33,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=739720.0, ans=0.125 +2024-09-19 20:30:46,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=739760.0, ans=0.0 +2024-09-19 20:30:49,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.50 vs. limit=15.0 +2024-09-19 20:30:55,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=739760.0, ans=0.07 +2024-09-19 20:30:57,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=739760.0, ans=0.0 +2024-09-19 20:30:59,903 INFO [train.py:1198] (0/2) Epoch 41, batch 3950, loss[loss=0.2506, ctc_loss=0.1262, cr_loss=0.383, attn_decoder_loss=0.256, over 29454.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1113, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5834225.96 frames. ], batch size: 97, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:31:16,084 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.077e+01 8.615e+01 9.061e+01 9.543e+01 2.103e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 20:31:20,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=739840.0, ans=0.125 +2024-09-19 20:31:45,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=739920.0, ans=0.1 +2024-09-19 20:31:51,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=739920.0, ans=0.125 +2024-09-19 20:32:08,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=739960.0, ans=0.0 +2024-09-19 20:32:12,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=739960.0, ans=0.0 +2024-09-19 20:32:15,341 INFO [train.py:1198] (0/2) Epoch 41, batch 4000, loss[loss=0.2185, ctc_loss=0.09627, cr_loss=0.3143, attn_decoder_loss=0.2251, over 29495.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1114, cr_loss=0.3505, attn_decoder_loss=0.2383, over 5810132.87 frames. ], batch size: 74, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:32:21,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=740000.0, ans=0.0 +2024-09-19 20:32:22,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=740000.0, ans=0.125 +2024-09-19 20:32:27,359 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=740000.0, ans=0.125 +2024-09-19 20:32:37,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=740040.0, ans=0.125 +2024-09-19 20:32:42,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=740040.0, ans=0.0 +2024-09-19 20:33:01,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=740120.0, ans=0.0 +2024-09-19 20:33:01,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=740120.0, ans=0.1 +2024-09-19 20:33:02,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.68 vs. limit=15.0 +2024-09-19 20:33:08,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=740120.0, ans=0.0 +2024-09-19 20:33:22,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=740160.0, ans=0.0 +2024-09-19 20:33:30,563 INFO [train.py:1198] (0/2) Epoch 41, batch 4050, loss[loss=0.2544, ctc_loss=0.1372, cr_loss=0.399, attn_decoder_loss=0.2585, over 20028.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1113, cr_loss=0.3504, attn_decoder_loss=0.2382, over 5795065.28 frames. ], batch size: 209, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:33:35,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.65 vs. limit=12.0 +2024-09-19 20:33:44,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=740240.0, ans=0.125 +2024-09-19 20:33:46,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=740240.0, ans=0.1 +2024-09-19 20:33:48,071 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.562e+01 8.566e+01 9.117e+01 9.789e+01 2.862e+02, threshold=1.823e+02, percent-clipped=4.0 +2024-09-19 20:33:52,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=740240.0, ans=0.09899494936611666 +2024-09-19 20:34:04,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=740280.0, ans=0.025 +2024-09-19 20:34:17,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.22 vs. limit=22.5 +2024-09-19 20:34:27,769 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=740360.0, ans=0.2 +2024-09-19 20:34:30,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=740360.0, ans=0.125 +2024-09-19 20:34:37,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=740360.0, ans=0.2 +2024-09-19 20:34:42,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=740400.0, ans=0.125 +2024-09-19 20:34:43,681 INFO [train.py:1198] (0/2) Epoch 41, batch 4100, loss[loss=0.2426, ctc_loss=0.1137, cr_loss=0.3564, attn_decoder_loss=0.249, over 29477.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1117, cr_loss=0.351, attn_decoder_loss=0.2388, over 5790568.40 frames. ], batch size: 90, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:34:54,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.51 vs. limit=15.0 +2024-09-19 20:35:16,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=740480.0, ans=0.0 +2024-09-19 20:35:25,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=740480.0, ans=6.0 +2024-09-19 20:35:50,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=740560.0, ans=0.1 +2024-09-19 20:35:52,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.41 vs. limit=10.0 +2024-09-19 20:35:53,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=740560.0, ans=0.025 +2024-09-19 20:35:57,578 INFO [train.py:1198] (0/2) Epoch 41, batch 4150, loss[loss=0.2226, ctc_loss=0.1061, cr_loss=0.3337, attn_decoder_loss=0.2281, over 29525.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1115, cr_loss=0.3502, attn_decoder_loss=0.2382, over 5796506.38 frames. ], batch size: 77, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:36:02,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=740600.0, ans=0.0 +2024-09-19 20:36:16,236 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.325e+01 8.604e+01 9.031e+01 9.625e+01 1.845e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 20:36:20,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=740640.0, ans=0.2 +2024-09-19 20:36:30,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=740680.0, ans=0.0 +2024-09-19 20:36:37,413 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.29 vs. limit=22.5 +2024-09-19 20:37:04,596 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=740760.0, ans=0.0 +2024-09-19 20:37:12,297 INFO [train.py:1198] (0/2) Epoch 41, batch 4200, loss[loss=0.2513, ctc_loss=0.134, cr_loss=0.4121, attn_decoder_loss=0.2552, over 29520.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1124, cr_loss=0.352, attn_decoder_loss=0.2389, over 5798180.06 frames. ], batch size: 90, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:37:30,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=740840.0, ans=0.0 +2024-09-19 20:37:33,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=740840.0, ans=0.125 +2024-09-19 20:37:34,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=740840.0, ans=0.0 +2024-09-19 20:37:37,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=740840.0, ans=0.125 +2024-09-19 20:37:37,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=740840.0, ans=0.125 +2024-09-19 20:38:26,690 INFO [train.py:1198] (0/2) Epoch 41, batch 4250, loss[loss=0.2273, ctc_loss=0.1036, cr_loss=0.3332, attn_decoder_loss=0.2337, over 29523.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.112, cr_loss=0.351, attn_decoder_loss=0.2388, over 5804132.99 frames. ], batch size: 74, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:38:37,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=741000.0, ans=0.2 +2024-09-19 20:38:44,065 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.725e+01 8.665e+01 9.196e+01 9.683e+01 5.015e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 20:38:46,695 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.34 vs. limit=15.0 +2024-09-19 20:38:54,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=741080.0, ans=0.0 +2024-09-19 20:38:57,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=741080.0, ans=0.0 +2024-09-19 20:39:05,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=741080.0, ans=0.125 +2024-09-19 20:39:15,808 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.43 vs. limit=15.0 +2024-09-19 20:39:20,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.42 vs. limit=22.5 +2024-09-19 20:39:40,138 INFO [train.py:1198] (0/2) Epoch 41, batch 4300, loss[loss=0.238, ctc_loss=0.1151, cr_loss=0.3399, attn_decoder_loss=0.2441, over 29512.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1121, cr_loss=0.3512, attn_decoder_loss=0.239, over 5792352.73 frames. ], batch size: 87, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:39:49,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.45 vs. limit=15.0 +2024-09-19 20:39:57,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=741240.0, ans=0.125 +2024-09-19 20:40:12,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=741280.0, ans=0.125 +2024-09-19 20:40:16,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=741280.0, ans=0.125 +2024-09-19 20:40:22,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=741280.0, ans=0.025 +2024-09-19 20:40:33,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=741320.0, ans=0.125 +2024-09-19 20:40:55,598 INFO [train.py:1198] (0/2) Epoch 41, batch 4350, loss[loss=0.2515, ctc_loss=0.124, cr_loss=0.3899, attn_decoder_loss=0.257, over 29519.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1146, cr_loss=0.357, attn_decoder_loss=0.2422, over 5795744.81 frames. ], batch size: 97, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:40:58,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.47 vs. limit=15.0 +2024-09-19 20:41:06,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=741400.0, ans=0.0 +2024-09-19 20:41:13,107 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.893e+01 9.255e+01 9.747e+01 1.701e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-19 20:41:36,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=741480.0, ans=0.125 +2024-09-19 20:41:52,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=741560.0, ans=0.07 +2024-09-19 20:41:59,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=741560.0, ans=0.1 +2024-09-19 20:42:08,702 INFO [train.py:1198] (0/2) Epoch 41, batch 4400, loss[loss=0.2443, ctc_loss=0.1307, cr_loss=0.3968, attn_decoder_loss=0.2481, over 27330.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1156, cr_loss=0.3596, attn_decoder_loss=0.2441, over 5767103.52 frames. ], batch size: 124, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:42:13,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=741600.0, ans=0.125 +2024-09-19 20:42:27,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=741640.0, ans=0.125 +2024-09-19 20:42:40,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=741680.0, ans=0.09899494936611666 +2024-09-19 20:43:10,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=741760.0, ans=0.125 +2024-09-19 20:43:16,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=741760.0, ans=0.2 +2024-09-19 20:43:22,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=741800.0, ans=0.025 +2024-09-19 20:43:23,298 INFO [train.py:1198] (0/2) Epoch 41, batch 4450, loss[loss=0.2496, ctc_loss=0.1312, cr_loss=0.3865, attn_decoder_loss=0.2541, over 20660.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1188, cr_loss=0.3649, attn_decoder_loss=0.2461, over 5579401.19 frames. ], batch size: 209, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:43:37,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=741840.0, ans=0.2 +2024-09-19 20:43:41,189 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.090e+01 9.304e+01 9.971e+01 1.121e+02 2.265e+02, threshold=1.994e+02, percent-clipped=2.0 +2024-09-19 20:44:38,327 INFO [train.py:1198] (0/2) Epoch 41, batch 4500, loss[loss=0.249, ctc_loss=0.134, cr_loss=0.368, attn_decoder_loss=0.2536, over 20785.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1215, cr_loss=0.3661, attn_decoder_loss=0.2476, over 5239476.48 frames. ], batch size: 210, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:44:59,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=742040.0, ans=0.125 +2024-09-19 20:45:15,160 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-41.pt +2024-09-19 20:46:06,158 INFO [train.py:1198] (0/2) Epoch 42, batch 0, loss[loss=0.2098, ctc_loss=0.09304, cr_loss=0.304, attn_decoder_loss=0.216, over 29618.00 frames. ], tot_loss[loss=0.2098, ctc_loss=0.09304, cr_loss=0.304, attn_decoder_loss=0.216, over 29618.00 frames. ], batch size: 73, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:46:06,159 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 20:46:24,583 INFO [train.py:1230] (0/2) Epoch 42, validation: loss=0.2127, ctc_loss=0.03579, cr_loss=6.428e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-19 20:46:24,584 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 20:46:29,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=742100.0, ans=0.0 +2024-09-19 20:46:32,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=742100.0, ans=0.125 +2024-09-19 20:46:36,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=742100.0, ans=0.2 +2024-09-19 20:46:55,618 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.43 vs. limit=22.5 +2024-09-19 20:47:10,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=742220.0, ans=0.125 +2024-09-19 20:47:19,979 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.95 vs. limit=22.5 +2024-09-19 20:47:21,374 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.26 vs. limit=15.0 +2024-09-19 20:47:21,856 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 9.381e+01 1.084e+02 1.178e+02 1.554e+02, threshold=2.167e+02, percent-clipped=0.0 +2024-09-19 20:47:25,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.27 vs. limit=15.0 +2024-09-19 20:47:42,178 INFO [train.py:1198] (0/2) Epoch 42, batch 50, loss[loss=0.2102, ctc_loss=0.09328, cr_loss=0.3127, attn_decoder_loss=0.2162, over 29452.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1139, cr_loss=0.3564, attn_decoder_loss=0.24, over 1267510.91 frames. ], batch size: 70, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:47:50,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=742300.0, ans=0.125 +2024-09-19 20:48:13,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.63 vs. limit=15.0 +2024-09-19 20:48:30,110 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:48:59,798 INFO [train.py:1198] (0/2) Epoch 42, batch 100, loss[loss=0.2223, ctc_loss=0.1037, cr_loss=0.3385, attn_decoder_loss=0.2279, over 29533.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1148, cr_loss=0.3578, attn_decoder_loss=0.2418, over 2251441.59 frames. ], batch size: 76, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:49:03,458 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.27 vs. limit=15.0 +2024-09-19 20:49:10,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=742500.0, ans=0.0 +2024-09-19 20:49:20,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=742540.0, ans=0.0 +2024-09-19 20:49:28,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=742580.0, ans=0.2 +2024-09-19 20:49:30,361 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.64 vs. limit=22.5 +2024-09-19 20:49:56,419 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.813e+01 8.687e+01 8.987e+01 9.639e+01 1.254e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 20:50:09,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=742660.0, ans=0.0 +2024-09-19 20:50:14,292 INFO [train.py:1198] (0/2) Epoch 42, batch 150, loss[loss=0.2129, ctc_loss=0.0977, cr_loss=0.3264, attn_decoder_loss=0.2185, over 29403.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1131, cr_loss=0.3548, attn_decoder_loss=0.24, over 3045153.91 frames. ], batch size: 70, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:50:14,689 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:50:20,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.71 vs. limit=10.0 +2024-09-19 20:50:38,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=742740.0, ans=0.125 +2024-09-19 20:50:44,617 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:50:47,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=742780.0, ans=0.125 +2024-09-19 20:50:57,132 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.31 vs. limit=10.0 +2024-09-19 20:50:59,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=742820.0, ans=0.125 +2024-09-19 20:51:05,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=742820.0, ans=0.1 +2024-09-19 20:51:06,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=742820.0, ans=0.0 +2024-09-19 20:51:21,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=742860.0, ans=0.0 +2024-09-19 20:51:31,401 INFO [train.py:1198] (0/2) Epoch 42, batch 200, loss[loss=0.2486, ctc_loss=0.1238, cr_loss=0.3801, attn_decoder_loss=0.2541, over 27508.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1118, cr_loss=0.3515, attn_decoder_loss=0.2383, over 3657236.37 frames. ], batch size: 124, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:51:57,628 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.81 vs. limit=12.0 +2024-09-19 20:52:15,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.72 vs. limit=15.0 +2024-09-19 20:52:31,003 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.655e+01 8.542e+01 9.078e+01 9.443e+01 1.255e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 20:52:31,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=743020.0, ans=0.0 +2024-09-19 20:52:49,282 INFO [train.py:1198] (0/2) Epoch 42, batch 250, loss[loss=0.2424, ctc_loss=0.1104, cr_loss=0.3401, attn_decoder_loss=0.2495, over 29306.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1117, cr_loss=0.3513, attn_decoder_loss=0.2382, over 4139721.23 frames. ], batch size: 100, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:52:51,681 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.90 vs. limit=15.0 +2024-09-19 20:53:13,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=743140.0, ans=10.0 +2024-09-19 20:53:29,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=743180.0, ans=0.0 +2024-09-19 20:53:46,096 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.33 vs. limit=22.5 +2024-09-19 20:53:47,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=743220.0, ans=0.125 +2024-09-19 20:53:53,081 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:54:04,815 INFO [train.py:1198] (0/2) Epoch 42, batch 300, loss[loss=0.2483, ctc_loss=0.1226, cr_loss=0.3916, attn_decoder_loss=0.2536, over 29519.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1114, cr_loss=0.3512, attn_decoder_loss=0.2382, over 4509588.99 frames. ], batch size: 92, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:54:05,190 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=743300.0, ans=0.0 +2024-09-19 20:54:06,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=743300.0, ans=0.1 +2024-09-19 20:54:27,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=743340.0, ans=0.125 +2024-09-19 20:54:43,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.59 vs. limit=15.0 +2024-09-19 20:54:44,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=743380.0, ans=0.07 +2024-09-19 20:54:49,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.62 vs. limit=15.0 +2024-09-19 20:54:58,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=743420.0, ans=0.0 +2024-09-19 20:55:03,817 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.625e+01 9.047e+01 9.646e+01 1.583e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 20:55:07,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=743460.0, ans=0.025 +2024-09-19 20:55:17,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=743460.0, ans=0.2 +2024-09-19 20:55:22,738 INFO [train.py:1198] (0/2) Epoch 42, batch 350, loss[loss=0.2083, ctc_loss=0.08809, cr_loss=0.3052, attn_decoder_loss=0.2149, over 29319.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1118, cr_loss=0.3521, attn_decoder_loss=0.2385, over 4795361.75 frames. ], batch size: 71, lr: 2.64e-03, grad_scale: 8.0 +2024-09-19 20:55:23,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=743500.0, ans=0.125 +2024-09-19 20:55:36,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=743540.0, ans=0.09899494936611666 +2024-09-19 20:55:58,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=743580.0, ans=0.125 +2024-09-19 20:56:14,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=743620.0, ans=0.125 +2024-09-19 20:56:17,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=743620.0, ans=0.0 +2024-09-19 20:56:19,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.43 vs. limit=6.0 +2024-09-19 20:56:32,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=743660.0, ans=0.2 +2024-09-19 20:56:40,157 INFO [train.py:1198] (0/2) Epoch 42, batch 400, loss[loss=0.2411, ctc_loss=0.1157, cr_loss=0.3618, attn_decoder_loss=0.247, over 29710.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1114, cr_loss=0.3516, attn_decoder_loss=0.2382, over 5025712.49 frames. ], batch size: 82, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:56:42,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.80 vs. limit=15.0 +2024-09-19 20:56:47,347 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.00 vs. limit=12.0 +2024-09-19 20:57:00,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=743740.0, ans=0.0 +2024-09-19 20:57:39,493 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.837e+01 8.484e+01 8.956e+01 9.498e+01 1.659e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 20:57:51,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.30 vs. limit=15.0 +2024-09-19 20:57:56,177 INFO [train.py:1198] (0/2) Epoch 42, batch 450, loss[loss=0.2375, ctc_loss=0.1116, cr_loss=0.361, attn_decoder_loss=0.2435, over 29688.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.2383, over 5185851.18 frames. ], batch size: 83, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:58:09,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=743940.0, ans=0.025 +2024-09-19 20:58:33,981 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.26 vs. limit=15.0 +2024-09-19 20:58:34,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=743980.0, ans=0.025 +2024-09-19 20:58:56,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.59 vs. limit=15.0 +2024-09-19 20:58:58,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=744060.0, ans=0.1 +2024-09-19 20:59:12,136 INFO [train.py:1198] (0/2) Epoch 42, batch 500, loss[loss=0.2548, ctc_loss=0.1315, cr_loss=0.3931, attn_decoder_loss=0.2598, over 29441.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1111, cr_loss=0.3504, attn_decoder_loss=0.2375, over 5330611.37 frames. ], batch size: 94, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:59:15,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=744100.0, ans=0.125 +2024-09-19 20:59:20,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=744100.0, ans=0.125 +2024-09-19 20:59:27,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.36 vs. limit=15.0 +2024-09-19 20:59:43,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=744180.0, ans=0.025 +2024-09-19 20:59:44,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.08 vs. limit=15.0 +2024-09-19 20:59:49,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=744180.0, ans=0.125 +2024-09-19 20:59:57,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=744180.0, ans=0.1 +2024-09-19 21:00:01,025 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:00:07,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=744220.0, ans=0.1 +2024-09-19 21:00:15,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.239e+01 8.359e+01 8.854e+01 9.452e+01 4.385e+02, threshold=1.771e+02, percent-clipped=2.0 +2024-09-19 21:00:23,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=744260.0, ans=0.125 +2024-09-19 21:00:24,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=744260.0, ans=0.0 +2024-09-19 21:00:25,624 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.68 vs. limit=15.0 +2024-09-19 21:00:32,320 INFO [train.py:1198] (0/2) Epoch 42, batch 550, loss[loss=0.2495, ctc_loss=0.1187, cr_loss=0.3615, attn_decoder_loss=0.256, over 28899.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1112, cr_loss=0.3508, attn_decoder_loss=0.2377, over 5423246.40 frames. ], batch size: 104, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:00:34,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=744300.0, ans=0.1 +2024-09-19 21:00:35,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=744300.0, ans=0.015 +2024-09-19 21:00:43,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=744300.0, ans=0.2 +2024-09-19 21:00:53,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=744340.0, ans=0.05 +2024-09-19 21:00:56,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=744340.0, ans=0.125 +2024-09-19 21:00:58,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=744340.0, ans=0.1 +2024-09-19 21:01:20,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=744420.0, ans=0.1 +2024-09-19 21:01:24,462 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-19 21:01:26,154 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.84 vs. limit=15.0 +2024-09-19 21:01:42,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=744460.0, ans=0.2 +2024-09-19 21:01:47,810 INFO [train.py:1198] (0/2) Epoch 42, batch 600, loss[loss=0.2522, ctc_loss=0.1275, cr_loss=0.384, attn_decoder_loss=0.2575, over 29263.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1113, cr_loss=0.3511, attn_decoder_loss=0.2381, over 5510875.27 frames. ], batch size: 100, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:02:13,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.41 vs. limit=15.0 +2024-09-19 21:02:15,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=744540.0, ans=0.1 +2024-09-19 21:02:22,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=744580.0, ans=0.1 +2024-09-19 21:02:47,683 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.400e+01 8.795e+01 9.486e+01 1.602e+02, threshold=1.759e+02, percent-clipped=0.0 +2024-09-19 21:03:02,674 INFO [train.py:1198] (0/2) Epoch 42, batch 650, loss[loss=0.2315, ctc_loss=0.1135, cr_loss=0.359, attn_decoder_loss=0.2366, over 29757.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1107, cr_loss=0.3499, attn_decoder_loss=0.2374, over 5587567.90 frames. ], batch size: 81, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:03:10,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=744700.0, ans=0.125 +2024-09-19 21:03:36,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=744780.0, ans=0.025 +2024-09-19 21:03:41,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=744780.0, ans=0.125 +2024-09-19 21:03:44,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=744780.0, ans=0.1 +2024-09-19 21:03:46,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=744780.0, ans=0.025 +2024-09-19 21:03:47,927 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.80 vs. limit=15.0 +2024-09-19 21:03:53,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=744820.0, ans=0.025 +2024-09-19 21:04:09,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=744860.0, ans=0.025 +2024-09-19 21:04:22,721 INFO [train.py:1198] (0/2) Epoch 42, batch 700, loss[loss=0.2179, ctc_loss=0.09945, cr_loss=0.3067, attn_decoder_loss=0.2243, over 29528.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1107, cr_loss=0.3499, attn_decoder_loss=0.2378, over 5638279.80 frames. ], batch size: 76, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:04:29,874 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.58 vs. limit=15.0 +2024-09-19 21:04:32,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-19 21:04:54,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=744980.0, ans=0.125 +2024-09-19 21:05:23,216 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.486e+01 9.011e+01 9.700e+01 3.654e+02, threshold=1.802e+02, percent-clipped=4.0 +2024-09-19 21:05:23,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.92 vs. limit=15.0 +2024-09-19 21:05:38,323 INFO [train.py:1198] (0/2) Epoch 42, batch 750, loss[loss=0.2422, ctc_loss=0.1126, cr_loss=0.3744, attn_decoder_loss=0.2483, over 29680.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1105, cr_loss=0.3495, attn_decoder_loss=0.2376, over 5676198.42 frames. ], batch size: 82, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:05:43,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.65 vs. limit=6.0 +2024-09-19 21:05:44,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=745100.0, ans=15.0 +2024-09-19 21:06:05,641 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=5.34 vs. limit=12.0 +2024-09-19 21:06:39,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=745260.0, ans=0.025 +2024-09-19 21:06:53,497 INFO [train.py:1198] (0/2) Epoch 42, batch 800, loss[loss=0.2099, ctc_loss=0.0875, cr_loss=0.2931, attn_decoder_loss=0.217, over 29599.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1108, cr_loss=0.3504, attn_decoder_loss=0.2378, over 5707084.56 frames. ], batch size: 73, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:07:02,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=745300.0, ans=10.0 +2024-09-19 21:07:05,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=745300.0, ans=0.1 +2024-09-19 21:07:25,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=745380.0, ans=0.1 +2024-09-19 21:07:26,631 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.93 vs. limit=22.5 +2024-09-19 21:07:59,498 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.594e+01 9.081e+01 9.628e+01 1.457e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 21:08:07,093 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=745460.0, ans=0.07 +2024-09-19 21:08:12,821 INFO [train.py:1198] (0/2) Epoch 42, batch 850, loss[loss=0.2547, ctc_loss=0.1244, cr_loss=0.388, attn_decoder_loss=0.2605, over 29717.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3497, attn_decoder_loss=0.2377, over 5735788.04 frames. ], batch size: 89, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:08:14,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=745500.0, ans=0.125 +2024-09-19 21:08:20,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=745500.0, ans=0.025 +2024-09-19 21:08:33,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=745540.0, ans=0.0 +2024-09-19 21:08:34,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.15 vs. limit=15.0 +2024-09-19 21:08:39,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=745540.0, ans=0.0 +2024-09-19 21:08:41,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=745580.0, ans=0.125 +2024-09-19 21:08:51,009 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=745580.0, ans=0.1 +2024-09-19 21:08:51,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=745580.0, ans=0.0 +2024-09-19 21:09:28,713 INFO [train.py:1198] (0/2) Epoch 42, batch 900, loss[loss=0.222, ctc_loss=0.1062, cr_loss=0.3506, attn_decoder_loss=0.2271, over 29632.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.111, cr_loss=0.3505, attn_decoder_loss=0.238, over 5740733.25 frames. ], batch size: 73, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:09:29,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=745700.0, ans=0.09899494936611666 +2024-09-19 21:09:29,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=745700.0, ans=0.0 +2024-09-19 21:09:33,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=745700.0, ans=0.0 +2024-09-19 21:09:33,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=745700.0, ans=0.125 +2024-09-19 21:09:45,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=745740.0, ans=0.0 +2024-09-19 21:10:02,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=745780.0, ans=0.125 +2024-09-19 21:10:14,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=745820.0, ans=0.1 +2024-09-19 21:10:17,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=745820.0, ans=0.07 +2024-09-19 21:10:21,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=745820.0, ans=0.125 +2024-09-19 21:10:23,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=745820.0, ans=0.125 +2024-09-19 21:10:30,383 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.128e+01 8.573e+01 9.060e+01 9.874e+01 1.680e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 21:10:39,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=745860.0, ans=0.125 +2024-09-19 21:10:43,724 INFO [train.py:1198] (0/2) Epoch 42, batch 950, loss[loss=0.2184, ctc_loss=0.09903, cr_loss=0.3335, attn_decoder_loss=0.2242, over 29503.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1106, cr_loss=0.3494, attn_decoder_loss=0.2379, over 5744311.20 frames. ], batch size: 74, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:10:44,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=745900.0, ans=0.0 +2024-09-19 21:11:17,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=745980.0, ans=0.125 +2024-09-19 21:11:39,559 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-19 21:12:03,071 INFO [train.py:1198] (0/2) Epoch 42, batch 1000, loss[loss=0.2372, ctc_loss=0.1209, cr_loss=0.3881, attn_decoder_loss=0.2415, over 29516.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3506, attn_decoder_loss=0.2385, over 5737780.72 frames. ], batch size: 77, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:12:27,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=746140.0, ans=0.125 +2024-09-19 21:12:36,133 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.81 vs. limit=10.0 +2024-09-19 21:13:05,356 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.540e+01 9.060e+01 9.719e+01 2.106e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 21:13:11,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=746260.0, ans=0.125 +2024-09-19 21:13:19,000 INFO [train.py:1198] (0/2) Epoch 42, batch 1050, loss[loss=0.2249, ctc_loss=0.1039, cr_loss=0.3351, attn_decoder_loss=0.2309, over 29654.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1107, cr_loss=0.3497, attn_decoder_loss=0.2375, over 5746351.38 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:13:23,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=746300.0, ans=0.125 +2024-09-19 21:13:35,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=746340.0, ans=0.1 +2024-09-19 21:13:44,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=746340.0, ans=0.2 +2024-09-19 21:14:23,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=746460.0, ans=0.125 +2024-09-19 21:14:23,913 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.75 vs. limit=22.5 +2024-09-19 21:14:35,086 INFO [train.py:1198] (0/2) Epoch 42, batch 1100, loss[loss=0.2213, ctc_loss=0.1062, cr_loss=0.3405, attn_decoder_loss=0.2265, over 29457.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1106, cr_loss=0.3492, attn_decoder_loss=0.2375, over 5757143.64 frames. ], batch size: 78, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:14:46,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=746500.0, ans=0.125 +2024-09-19 21:15:06,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=746580.0, ans=0.0 +2024-09-19 21:15:15,923 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.95 vs. limit=10.0 +2024-09-19 21:15:19,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.36 vs. limit=10.0 +2024-09-19 21:15:39,240 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.586e+01 9.042e+01 9.812e+01 2.400e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 21:15:39,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=746660.0, ans=0.125 +2024-09-19 21:15:42,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=746660.0, ans=0.2 +2024-09-19 21:15:50,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=746660.0, ans=0.0 +2024-09-19 21:15:50,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=746660.0, ans=0.125 +2024-09-19 21:15:55,103 INFO [train.py:1198] (0/2) Epoch 42, batch 1150, loss[loss=0.2283, ctc_loss=0.1162, cr_loss=0.3628, attn_decoder_loss=0.2327, over 29457.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1111, cr_loss=0.3501, attn_decoder_loss=0.2378, over 5756536.41 frames. ], batch size: 78, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:15:59,268 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=15.0 +2024-09-19 21:15:59,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=746700.0, ans=0.2 +2024-09-19 21:16:07,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=746700.0, ans=0.125 +2024-09-19 21:16:21,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=746740.0, ans=0.025 +2024-09-19 21:16:25,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.27 vs. limit=15.0 +2024-09-19 21:16:37,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=746780.0, ans=0.0 +2024-09-19 21:16:40,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=746820.0, ans=0.0 +2024-09-19 21:17:05,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=746860.0, ans=0.125 +2024-09-19 21:17:10,789 INFO [train.py:1198] (0/2) Epoch 42, batch 1200, loss[loss=0.2386, ctc_loss=0.1135, cr_loss=0.3529, attn_decoder_loss=0.2447, over 29681.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1115, cr_loss=0.3511, attn_decoder_loss=0.2385, over 5749582.75 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:17:12,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=746900.0, ans=0.125 +2024-09-19 21:17:14,171 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:17:14,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=746900.0, ans=10.0 +2024-09-19 21:17:18,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=746900.0, ans=0.125 +2024-09-19 21:17:19,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=746900.0, ans=0.125 +2024-09-19 21:17:29,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=746940.0, ans=0.0 +2024-09-19 21:17:40,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=746980.0, ans=0.0 +2024-09-19 21:18:08,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=747020.0, ans=0.0 +2024-09-19 21:18:13,019 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.675e+01 9.072e+01 9.806e+01 1.661e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 21:18:16,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=747060.0, ans=0.2 +2024-09-19 21:18:26,691 INFO [train.py:1198] (0/2) Epoch 42, batch 1250, loss[loss=0.2529, ctc_loss=0.1277, cr_loss=0.3873, attn_decoder_loss=0.2582, over 29506.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.112, cr_loss=0.3526, attn_decoder_loss=0.2393, over 5776729.50 frames. ], batch size: 92, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:18:48,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=747140.0, ans=0.0 +2024-09-19 21:19:02,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=747180.0, ans=0.125 +2024-09-19 21:19:10,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=747180.0, ans=0.0 +2024-09-19 21:19:15,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=747220.0, ans=0.0 +2024-09-19 21:19:38,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=747260.0, ans=0.0 +2024-09-19 21:19:47,438 INFO [train.py:1198] (0/2) Epoch 42, batch 1300, loss[loss=0.2365, ctc_loss=0.1106, cr_loss=0.3562, attn_decoder_loss=0.2426, over 28169.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1117, cr_loss=0.3522, attn_decoder_loss=0.2389, over 5780111.14 frames. ], batch size: 111, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:20:50,662 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.538e+01 9.081e+01 9.476e+01 1.507e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 21:20:59,451 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.86 vs. limit=15.0 +2024-09-19 21:21:02,966 INFO [train.py:1198] (0/2) Epoch 42, batch 1350, loss[loss=0.2298, ctc_loss=0.1121, cr_loss=0.3579, attn_decoder_loss=0.235, over 29748.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3514, attn_decoder_loss=0.2384, over 5795691.94 frames. ], batch size: 81, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:21:03,672 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.61 vs. limit=15.0 +2024-09-19 21:21:14,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=12.0 +2024-09-19 21:21:16,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=747540.0, ans=0.125 +2024-09-19 21:21:46,221 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:21:46,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=747620.0, ans=0.125 +2024-09-19 21:21:56,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=747620.0, ans=0.125 +2024-09-19 21:22:16,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=747700.0, ans=0.0 +2024-09-19 21:22:17,780 INFO [train.py:1198] (0/2) Epoch 42, batch 1400, loss[loss=0.2021, ctc_loss=0.08377, cr_loss=0.2874, attn_decoder_loss=0.2089, over 29587.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3513, attn_decoder_loss=0.2384, over 5806863.25 frames. ], batch size: 69, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:22:19,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=747700.0, ans=0.2 +2024-09-19 21:22:45,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=747740.0, ans=0.2 +2024-09-19 21:22:50,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=747780.0, ans=0.1 +2024-09-19 21:23:03,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.45 vs. limit=22.5 +2024-09-19 21:23:14,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=747820.0, ans=0.125 +2024-09-19 21:23:16,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=747820.0, ans=0.2 +2024-09-19 21:23:23,217 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.442e+01 9.058e+01 9.585e+01 2.575e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 21:23:35,307 INFO [train.py:1198] (0/2) Epoch 42, batch 1450, loss[loss=0.2524, ctc_loss=0.1289, cr_loss=0.4019, attn_decoder_loss=0.2572, over 29440.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1114, cr_loss=0.3517, attn_decoder_loss=0.2387, over 5803137.06 frames. ], batch size: 94, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:23:39,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.62 vs. limit=22.5 +2024-09-19 21:23:52,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=747940.0, ans=10.0 +2024-09-19 21:23:56,055 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:24:05,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=747940.0, ans=0.0 +2024-09-19 21:24:07,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=747980.0, ans=0.125 +2024-09-19 21:24:12,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=747980.0, ans=0.1 +2024-09-19 21:24:12,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=747980.0, ans=0.025 +2024-09-19 21:24:17,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=747980.0, ans=0.0 +2024-09-19 21:24:18,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=747980.0, ans=0.0 +2024-09-19 21:24:48,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=748060.0, ans=0.125 +2024-09-19 21:24:53,283 INFO [train.py:1198] (0/2) Epoch 42, batch 1500, loss[loss=0.2412, ctc_loss=0.1082, cr_loss=0.335, attn_decoder_loss=0.2485, over 29649.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1115, cr_loss=0.3515, attn_decoder_loss=0.2391, over 5804244.94 frames. ], batch size: 86, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:25:57,691 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 8.590e+01 8.992e+01 9.499e+01 3.130e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 21:26:00,433 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.13 vs. limit=15.0 +2024-09-19 21:26:09,801 INFO [train.py:1198] (0/2) Epoch 42, batch 1550, loss[loss=0.251, ctc_loss=0.1291, cr_loss=0.4052, attn_decoder_loss=0.2556, over 29475.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2392, over 5780668.75 frames. ], batch size: 90, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:26:16,846 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.67 vs. limit=12.0 +2024-09-19 21:26:19,017 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:26:25,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=748340.0, ans=0.125 +2024-09-19 21:26:34,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=748340.0, ans=0.0 +2024-09-19 21:26:37,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=748340.0, ans=0.125 +2024-09-19 21:26:42,171 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=748380.0, ans=0.125 +2024-09-19 21:26:45,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=748380.0, ans=0.1 +2024-09-19 21:27:09,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=748420.0, ans=0.125 +2024-09-19 21:27:22,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=748460.0, ans=0.025 +2024-09-19 21:27:26,805 INFO [train.py:1198] (0/2) Epoch 42, batch 1600, loss[loss=0.2404, ctc_loss=0.1137, cr_loss=0.3666, attn_decoder_loss=0.2463, over 29688.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1116, cr_loss=0.3509, attn_decoder_loss=0.2389, over 5763146.36 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:27:27,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=748500.0, ans=0.125 +2024-09-19 21:27:28,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=748500.0, ans=0.0 +2024-09-19 21:27:50,110 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:27:51,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=748540.0, ans=0.1 +2024-09-19 21:27:52,024 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.09 vs. limit=6.0 +2024-09-19 21:28:08,660 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.66 vs. limit=15.0 +2024-09-19 21:28:15,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=748620.0, ans=0.125 +2024-09-19 21:28:23,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=748620.0, ans=0.125 +2024-09-19 21:28:32,061 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.535e+01 9.042e+01 9.603e+01 1.807e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 21:28:43,819 INFO [train.py:1198] (0/2) Epoch 42, batch 1650, loss[loss=0.2301, ctc_loss=0.09968, cr_loss=0.3373, attn_decoder_loss=0.2371, over 29712.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.35, attn_decoder_loss=0.2385, over 5759972.55 frames. ], batch size: 89, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:29:04,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=748740.0, ans=0.125 +2024-09-19 21:29:13,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=748780.0, ans=0.125 +2024-09-19 21:29:20,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=748780.0, ans=0.0 +2024-09-19 21:29:47,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=748860.0, ans=0.0 +2024-09-19 21:29:50,804 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.68 vs. limit=22.5 +2024-09-19 21:29:51,259 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.82 vs. limit=22.5 +2024-09-19 21:29:51,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=748860.0, ans=0.0 +2024-09-19 21:29:54,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.64 vs. limit=15.0 +2024-09-19 21:29:59,147 INFO [train.py:1198] (0/2) Epoch 42, batch 1700, loss[loss=0.2115, ctc_loss=0.1016, cr_loss=0.3279, attn_decoder_loss=0.2164, over 29563.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1109, cr_loss=0.3497, attn_decoder_loss=0.2382, over 5781003.62 frames. ], batch size: 69, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:29:59,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=748900.0, ans=0.2 +2024-09-19 21:31:04,408 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.536e+01 8.510e+01 9.136e+01 9.466e+01 1.659e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 21:31:16,664 INFO [train.py:1198] (0/2) Epoch 42, batch 1750, loss[loss=0.21, ctc_loss=0.09229, cr_loss=0.3041, attn_decoder_loss=0.2164, over 29334.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1107, cr_loss=0.3494, attn_decoder_loss=0.2378, over 5788122.25 frames. ], batch size: 67, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:31:30,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=749140.0, ans=0.1 +2024-09-19 21:31:32,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=749140.0, ans=0.1 +2024-09-19 21:31:40,884 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.87 vs. limit=15.0 +2024-09-19 21:31:44,968 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=749140.0, ans=0.125 +2024-09-19 21:31:52,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=749180.0, ans=0.125 +2024-09-19 21:31:54,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=749180.0, ans=0.125 +2024-09-19 21:32:00,092 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=749180.0, ans=0.2 +2024-09-19 21:32:09,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=749220.0, ans=0.125 +2024-09-19 21:32:16,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=749220.0, ans=0.125 +2024-09-19 21:32:27,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.28 vs. limit=10.0 +2024-09-19 21:32:34,212 INFO [train.py:1198] (0/2) Epoch 42, batch 1800, loss[loss=0.2357, ctc_loss=0.1138, cr_loss=0.3623, attn_decoder_loss=0.2412, over 29672.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1109, cr_loss=0.3496, attn_decoder_loss=0.2379, over 5791465.46 frames. ], batch size: 83, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:32:34,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=749300.0, ans=0.0 +2024-09-19 21:33:13,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=749380.0, ans=0.1 +2024-09-19 21:33:20,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_na.min_abs, batch_count=749420.0, ans=0.02 +2024-09-19 21:33:35,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=749460.0, ans=0.0 +2024-09-19 21:33:36,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=749460.0, ans=0.125 +2024-09-19 21:33:39,223 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.440e+01 8.862e+01 9.428e+01 1.419e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 21:33:47,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-19 21:33:49,906 INFO [train.py:1198] (0/2) Epoch 42, batch 1850, loss[loss=0.2486, ctc_loss=0.1166, cr_loss=0.3579, attn_decoder_loss=0.2553, over 29643.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1107, cr_loss=0.3493, attn_decoder_loss=0.2378, over 5796765.49 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:33:50,938 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.84 vs. limit=15.0 +2024-09-19 21:34:23,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=749580.0, ans=0.125 +2024-09-19 21:34:35,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.01 vs. limit=22.5 +2024-09-19 21:34:38,226 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.57 vs. limit=10.0 +2024-09-19 21:34:45,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=749620.0, ans=0.0 +2024-09-19 21:35:01,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=749660.0, ans=0.125 +2024-09-19 21:35:07,214 INFO [train.py:1198] (0/2) Epoch 42, batch 1900, loss[loss=0.2425, ctc_loss=0.1143, cr_loss=0.3543, attn_decoder_loss=0.2488, over 29686.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3501, attn_decoder_loss=0.2384, over 5804347.85 frames. ], batch size: 89, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:35:07,674 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:35:13,671 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:35:14,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=749700.0, ans=0.125 +2024-09-19 21:35:49,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.31 vs. limit=15.0 +2024-09-19 21:35:57,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.33 vs. limit=6.0 +2024-09-19 21:35:59,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=749820.0, ans=0.125 +2024-09-19 21:36:14,505 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.827e+01 8.670e+01 9.049e+01 9.659e+01 1.303e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 21:36:22,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=749860.0, ans=0.125 +2024-09-19 21:36:24,972 INFO [train.py:1198] (0/2) Epoch 42, batch 1950, loss[loss=0.2319, ctc_loss=0.1102, cr_loss=0.3595, attn_decoder_loss=0.2374, over 29445.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1118, cr_loss=0.3522, attn_decoder_loss=0.2396, over 5819765.80 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:36:28,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=749900.0, ans=0.1 +2024-09-19 21:36:40,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=749940.0, ans=0.025 +2024-09-19 21:36:49,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=749940.0, ans=0.95 +2024-09-19 21:36:58,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=749980.0, ans=0.0 +2024-09-19 21:36:58,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=749980.0, ans=0.125 +2024-09-19 21:37:08,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=750020.0, ans=0.0 +2024-09-19 21:37:40,250 INFO [train.py:1198] (0/2) Epoch 42, batch 2000, loss[loss=0.204, ctc_loss=0.08689, cr_loss=0.2763, attn_decoder_loss=0.2108, over 29326.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1125, cr_loss=0.353, attn_decoder_loss=0.2399, over 5796244.49 frames. ], batch size: 67, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:38:04,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=750140.0, ans=0.125 +2024-09-19 21:38:15,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.99 vs. limit=10.0 +2024-09-19 21:38:25,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=750180.0, ans=0.125 +2024-09-19 21:38:36,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.04 vs. limit=15.0 +2024-09-19 21:38:39,258 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.11 vs. limit=15.0 +2024-09-19 21:38:47,687 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.819e+01 8.670e+01 9.136e+01 9.850e+01 1.573e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 21:38:58,270 INFO [train.py:1198] (0/2) Epoch 42, batch 2050, loss[loss=0.2182, ctc_loss=0.1036, cr_loss=0.328, attn_decoder_loss=0.2237, over 29473.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1119, cr_loss=0.3518, attn_decoder_loss=0.2389, over 5787086.27 frames. ], batch size: 70, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:39:19,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=750340.0, ans=0.0 +2024-09-19 21:39:50,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.93 vs. limit=8.0 +2024-09-19 21:39:59,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=750460.0, ans=0.125 +2024-09-19 21:40:16,258 INFO [train.py:1198] (0/2) Epoch 42, batch 2100, loss[loss=0.2282, ctc_loss=0.1096, cr_loss=0.358, attn_decoder_loss=0.2334, over 29772.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1115, cr_loss=0.351, attn_decoder_loss=0.2384, over 5799344.87 frames. ], batch size: 81, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:40:18,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-19 21:40:39,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=750540.0, ans=0.1 +2024-09-19 21:41:17,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=750660.0, ans=0.125 +2024-09-19 21:41:20,506 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.604e+01 9.019e+01 9.390e+01 1.185e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-19 21:41:20,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=750660.0, ans=0.125 +2024-09-19 21:41:22,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=750660.0, ans=0.2 +2024-09-19 21:41:31,113 INFO [train.py:1198] (0/2) Epoch 42, batch 2150, loss[loss=0.2189, ctc_loss=0.1024, cr_loss=0.3288, attn_decoder_loss=0.2245, over 29460.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1106, cr_loss=0.3494, attn_decoder_loss=0.2376, over 5814307.82 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:41:57,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=750740.0, ans=0.0 +2024-09-19 21:42:11,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=750780.0, ans=0.1 +2024-09-19 21:42:23,603 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.28 vs. limit=12.0 +2024-09-19 21:42:41,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=750860.0, ans=0.125 +2024-09-19 21:42:48,194 INFO [train.py:1198] (0/2) Epoch 42, batch 2200, loss[loss=0.2432, ctc_loss=0.118, cr_loss=0.3714, attn_decoder_loss=0.2488, over 29603.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1108, cr_loss=0.3497, attn_decoder_loss=0.2376, over 5810960.49 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:42:56,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=750900.0, ans=0.2 +2024-09-19 21:43:06,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=750940.0, ans=0.125 +2024-09-19 21:43:08,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=750940.0, ans=15.0 +2024-09-19 21:43:09,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=750940.0, ans=0.125 +2024-09-19 21:43:46,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=751020.0, ans=0.125 +2024-09-19 21:43:55,415 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.242e+01 8.649e+01 8.991e+01 9.667e+01 4.201e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 21:44:01,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=751060.0, ans=0.0 +2024-09-19 21:44:06,040 INFO [train.py:1198] (0/2) Epoch 42, batch 2250, loss[loss=0.2317, ctc_loss=0.107, cr_loss=0.335, attn_decoder_loss=0.2381, over 29700.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1109, cr_loss=0.35, attn_decoder_loss=0.2379, over 5811247.25 frames. ], batch size: 82, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:44:39,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=751180.0, ans=0.125 +2024-09-19 21:45:04,213 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=9.88 vs. limit=12.0 +2024-09-19 21:45:09,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=751260.0, ans=0.125 +2024-09-19 21:45:11,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=751260.0, ans=0.125 +2024-09-19 21:45:21,542 INFO [train.py:1198] (0/2) Epoch 42, batch 2300, loss[loss=0.2105, ctc_loss=0.09396, cr_loss=0.3023, attn_decoder_loss=0.2167, over 29321.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1105, cr_loss=0.3486, attn_decoder_loss=0.2372, over 5797525.02 frames. ], batch size: 71, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:45:23,904 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.71 vs. limit=15.0 +2024-09-19 21:45:32,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=751300.0, ans=0.1 +2024-09-19 21:45:41,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=751340.0, ans=0.125 +2024-09-19 21:46:28,380 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.932e+01 8.459e+01 9.034e+01 9.702e+01 2.715e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 21:46:39,169 INFO [train.py:1198] (0/2) Epoch 42, batch 2350, loss[loss=0.2411, ctc_loss=0.1139, cr_loss=0.3556, attn_decoder_loss=0.2473, over 29684.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.111, cr_loss=0.3498, attn_decoder_loss=0.2375, over 5802071.55 frames. ], batch size: 83, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:46:55,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=10.74 vs. limit=10.0 +2024-09-19 21:47:09,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=751580.0, ans=0.5 +2024-09-19 21:47:14,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=751580.0, ans=0.125 +2024-09-19 21:47:29,873 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:47:44,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=751660.0, ans=0.125 +2024-09-19 21:47:56,843 INFO [train.py:1198] (0/2) Epoch 42, batch 2400, loss[loss=0.2237, ctc_loss=0.1045, cr_loss=0.337, attn_decoder_loss=0.2295, over 29526.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1107, cr_loss=0.3495, attn_decoder_loss=0.2376, over 5806722.72 frames. ], batch size: 76, lr: 2.62e-03, grad_scale: 32.0 +2024-09-19 21:48:27,812 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.98 vs. limit=22.5 +2024-09-19 21:49:02,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=751860.0, ans=0.0 +2024-09-19 21:49:03,289 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.663e+01 9.186e+01 9.777e+01 4.524e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-19 21:49:12,375 INFO [train.py:1198] (0/2) Epoch 42, batch 2450, loss[loss=0.2321, ctc_loss=0.1093, cr_loss=0.3427, attn_decoder_loss=0.2381, over 29730.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1112, cr_loss=0.3506, attn_decoder_loss=0.2382, over 5784150.28 frames. ], batch size: 82, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:49:14,665 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.20 vs. limit=15.0 +2024-09-19 21:49:19,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=751900.0, ans=0.125 +2024-09-19 21:49:21,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=751900.0, ans=0.0 +2024-09-19 21:49:51,019 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-188000.pt +2024-09-19 21:50:07,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=752020.0, ans=0.125 +2024-09-19 21:50:09,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.05 vs. limit=10.0 +2024-09-19 21:50:11,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=752020.0, ans=0.125 +2024-09-19 21:50:19,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=752020.0, ans=0.125 +2024-09-19 21:50:26,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.38 vs. limit=22.5 +2024-09-19 21:50:31,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=752060.0, ans=0.2 +2024-09-19 21:50:37,334 INFO [train.py:1198] (0/2) Epoch 42, batch 2500, loss[loss=0.2411, ctc_loss=0.1101, cr_loss=0.3404, attn_decoder_loss=0.2481, over 29646.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1113, cr_loss=0.3507, attn_decoder_loss=0.2382, over 5794787.95 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:50:40,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=752100.0, ans=0.125 +2024-09-19 21:50:54,301 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=752140.0, ans=0.1 +2024-09-19 21:50:54,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=752140.0, ans=0.2 +2024-09-19 21:51:03,361 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:51:04,018 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.28 vs. limit=15.0 +2024-09-19 21:51:16,367 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.46 vs. limit=22.5 +2024-09-19 21:51:29,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=752220.0, ans=0.125 +2024-09-19 21:51:46,105 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.657e+01 8.685e+01 9.215e+01 9.799e+01 2.260e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-19 21:51:47,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=752260.0, ans=0.125 +2024-09-19 21:51:55,267 INFO [train.py:1198] (0/2) Epoch 42, batch 2550, loss[loss=0.2161, ctc_loss=0.1068, cr_loss=0.3292, attn_decoder_loss=0.2209, over 29396.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1115, cr_loss=0.351, attn_decoder_loss=0.2386, over 5797389.70 frames. ], batch size: 67, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:51:58,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=752300.0, ans=0.125 +2024-09-19 21:52:03,464 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.45 vs. limit=15.0 +2024-09-19 21:52:18,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.71 vs. limit=15.0 +2024-09-19 21:52:31,604 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=752380.0, ans=0.025 +2024-09-19 21:52:38,091 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.07 vs. limit=22.5 +2024-09-19 21:52:45,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=752420.0, ans=0.125 +2024-09-19 21:52:52,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=752420.0, ans=0.0 +2024-09-19 21:53:01,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=752460.0, ans=0.125 +2024-09-19 21:53:04,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.65 vs. limit=15.0 +2024-09-19 21:53:10,741 INFO [train.py:1198] (0/2) Epoch 42, batch 2600, loss[loss=0.2296, ctc_loss=0.1128, cr_loss=0.3579, attn_decoder_loss=0.2346, over 29446.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1115, cr_loss=0.3509, attn_decoder_loss=0.2389, over 5794840.18 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:53:11,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=752500.0, ans=0.2 +2024-09-19 21:53:12,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=752500.0, ans=0.0 +2024-09-19 21:53:13,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=752500.0, ans=0.125 +2024-09-19 21:53:14,124 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=752500.0, ans=0.125 +2024-09-19 21:53:35,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=752540.0, ans=0.125 +2024-09-19 21:53:35,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=752540.0, ans=0.1 +2024-09-19 21:53:43,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=752580.0, ans=0.125 +2024-09-19 21:53:56,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=752620.0, ans=0.025 +2024-09-19 21:54:10,251 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:54:10,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=752620.0, ans=0.125 +2024-09-19 21:54:14,831 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=752660.0, ans=0.125 +2024-09-19 21:54:18,979 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.622e+01 9.143e+01 9.724e+01 1.437e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 21:54:26,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=752700.0, ans=0.125 +2024-09-19 21:54:27,800 INFO [train.py:1198] (0/2) Epoch 42, batch 2650, loss[loss=0.2486, ctc_loss=0.1187, cr_loss=0.3773, attn_decoder_loss=0.2546, over 29294.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.239, over 5801135.67 frames. ], batch size: 100, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:54:34,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=752700.0, ans=0.035 +2024-09-19 21:54:41,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=752740.0, ans=0.125 +2024-09-19 21:54:58,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=752780.0, ans=0.125 +2024-09-19 21:55:00,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.51 vs. limit=15.0 +2024-09-19 21:55:04,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=752780.0, ans=0.0 +2024-09-19 21:55:06,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=752780.0, ans=0.125 +2024-09-19 21:55:18,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=752820.0, ans=0.125 +2024-09-19 21:55:45,576 INFO [train.py:1198] (0/2) Epoch 42, batch 2700, loss[loss=0.2409, ctc_loss=0.1156, cr_loss=0.3702, attn_decoder_loss=0.2466, over 29525.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1121, cr_loss=0.3524, attn_decoder_loss=0.2396, over 5797041.63 frames. ], batch size: 87, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:56:08,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=752940.0, ans=0.1 +2024-09-19 21:56:08,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=752940.0, ans=0.0 +2024-09-19 21:56:20,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=752980.0, ans=0.1 +2024-09-19 21:56:26,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=752980.0, ans=0.125 +2024-09-19 21:56:48,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=753060.0, ans=0.125 +2024-09-19 21:56:50,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=753060.0, ans=0.2 +2024-09-19 21:56:51,999 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.707e+01 9.259e+01 9.781e+01 2.020e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 21:56:55,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.93 vs. limit=6.0 +2024-09-19 21:57:01,154 INFO [train.py:1198] (0/2) Epoch 42, batch 2750, loss[loss=0.2331, ctc_loss=0.115, cr_loss=0.3678, attn_decoder_loss=0.2381, over 29524.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1113, cr_loss=0.3511, attn_decoder_loss=0.2385, over 5796874.40 frames. ], batch size: 75, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:57:03,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=753100.0, ans=0.1 +2024-09-19 21:57:16,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.09 vs. limit=15.0 +2024-09-19 21:57:21,072 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.49 vs. limit=15.0 +2024-09-19 21:57:23,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=753140.0, ans=0.125 +2024-09-19 21:57:25,106 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.02 vs. limit=12.0 +2024-09-19 21:57:39,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=753180.0, ans=0.1 +2024-09-19 21:57:40,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=15.0 +2024-09-19 21:58:03,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=753260.0, ans=0.0 +2024-09-19 21:58:05,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=753260.0, ans=0.0 +2024-09-19 21:58:05,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=753260.0, ans=0.0 +2024-09-19 21:58:18,578 INFO [train.py:1198] (0/2) Epoch 42, batch 2800, loss[loss=0.2636, ctc_loss=0.1503, cr_loss=0.4094, attn_decoder_loss=0.2671, over 19953.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1115, cr_loss=0.3513, attn_decoder_loss=0.2385, over 5778317.23 frames. ], batch size: 209, lr: 2.62e-03, grad_scale: 32.0 +2024-09-19 21:58:26,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.59 vs. limit=15.0 +2024-09-19 21:58:44,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=753340.0, ans=0.1 +2024-09-19 21:58:47,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=753380.0, ans=0.1 +2024-09-19 21:58:51,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=753380.0, ans=0.125 +2024-09-19 21:59:05,848 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:59:11,660 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=753420.0, ans=0.035 +2024-09-19 21:59:13,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=753420.0, ans=0.0 +2024-09-19 21:59:15,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.31 vs. limit=15.0 +2024-09-19 21:59:22,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=753460.0, ans=0.125 +2024-09-19 21:59:29,300 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.790e+01 9.273e+01 9.887e+01 2.081e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 21:59:32,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=753460.0, ans=0.125 +2024-09-19 21:59:34,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=753500.0, ans=0.125 +2024-09-19 21:59:35,331 INFO [train.py:1198] (0/2) Epoch 42, batch 2850, loss[loss=0.2266, ctc_loss=0.1031, cr_loss=0.3346, attn_decoder_loss=0.2329, over 29516.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1117, cr_loss=0.3517, attn_decoder_loss=0.2389, over 5763642.41 frames. ], batch size: 77, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:59:47,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=753500.0, ans=0.125 +2024-09-19 21:59:47,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=753500.0, ans=0.2 +2024-09-19 21:59:49,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=753540.0, ans=0.0 +2024-09-19 22:00:06,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.88 vs. limit=15.0 +2024-09-19 22:00:33,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=753620.0, ans=0.125 +2024-09-19 22:00:37,045 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.76 vs. limit=15.0 +2024-09-19 22:00:41,032 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=753660.0, ans=0.07 +2024-09-19 22:00:46,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=753660.0, ans=0.1 +2024-09-19 22:00:51,145 INFO [train.py:1198] (0/2) Epoch 42, batch 2900, loss[loss=0.2267, ctc_loss=0.1032, cr_loss=0.3418, attn_decoder_loss=0.2328, over 29425.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1125, cr_loss=0.3539, attn_decoder_loss=0.2401, over 5788719.71 frames. ], batch size: 79, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:01:16,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=753740.0, ans=0.2 +2024-09-19 22:01:31,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=753780.0, ans=0.2 +2024-09-19 22:01:43,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=753820.0, ans=0.125 +2024-09-19 22:01:45,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=753820.0, ans=0.0 +2024-09-19 22:01:46,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=753820.0, ans=0.0 +2024-09-19 22:01:54,506 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.15 vs. limit=6.0 +2024-09-19 22:02:02,879 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.702e+01 8.715e+01 9.227e+01 9.833e+01 2.599e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-19 22:02:08,881 INFO [train.py:1198] (0/2) Epoch 42, batch 2950, loss[loss=0.2166, ctc_loss=0.1042, cr_loss=0.3422, attn_decoder_loss=0.2215, over 29513.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1118, cr_loss=0.3522, attn_decoder_loss=0.2389, over 5783447.02 frames. ], batch size: 75, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:02:18,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=753900.0, ans=0.125 +2024-09-19 22:02:33,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=753940.0, ans=0.1 +2024-09-19 22:02:39,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=753980.0, ans=0.025 +2024-09-19 22:02:55,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=754020.0, ans=0.125 +2024-09-19 22:03:08,874 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.63 vs. limit=15.0 +2024-09-19 22:03:26,591 INFO [train.py:1198] (0/2) Epoch 42, batch 3000, loss[loss=0.2382, ctc_loss=0.117, cr_loss=0.3576, attn_decoder_loss=0.2438, over 29755.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1119, cr_loss=0.3523, attn_decoder_loss=0.2388, over 5783881.88 frames. ], batch size: 81, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:03:26,592 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 22:03:36,046 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([3.2613, 3.9431, 3.7381, 3.3029], device='cuda:0') +2024-09-19 22:03:44,994 INFO [train.py:1230] (0/2) Epoch 42, validation: loss=0.212, ctc_loss=0.03659, cr_loss=6.044e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 22:03:44,995 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 22:04:18,002 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.02 vs. limit=22.5 +2024-09-19 22:04:38,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=754220.0, ans=0.0 +2024-09-19 22:04:40,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=754220.0, ans=0.05 +2024-09-19 22:04:56,824 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.635e+01 9.210e+01 9.879e+01 1.269e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-19 22:05:03,034 INFO [train.py:1198] (0/2) Epoch 42, batch 3050, loss[loss=0.2186, ctc_loss=0.09853, cr_loss=0.3277, attn_decoder_loss=0.2246, over 29533.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1121, cr_loss=0.3527, attn_decoder_loss=0.2394, over 5777161.23 frames. ], batch size: 76, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:05:04,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=754300.0, ans=0.125 +2024-09-19 22:05:19,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=754340.0, ans=0.125 +2024-09-19 22:05:52,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.98 vs. limit=15.0 +2024-09-19 22:06:18,289 INFO [train.py:1198] (0/2) Epoch 42, batch 3100, loss[loss=0.2378, ctc_loss=0.1139, cr_loss=0.33, attn_decoder_loss=0.2442, over 29304.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1119, cr_loss=0.352, attn_decoder_loss=0.239, over 5776917.96 frames. ], batch size: 100, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:06:29,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.06 vs. limit=15.0 +2024-09-19 22:07:05,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=754620.0, ans=0.0 +2024-09-19 22:07:12,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.46 vs. limit=22.5 +2024-09-19 22:07:16,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=754620.0, ans=0.125 +2024-09-19 22:07:20,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=754660.0, ans=0.125 +2024-09-19 22:07:30,014 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.575e+01 9.075e+01 9.708e+01 6.330e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 22:07:36,120 INFO [train.py:1198] (0/2) Epoch 42, batch 3150, loss[loss=0.2437, ctc_loss=0.118, cr_loss=0.3659, attn_decoder_loss=0.2496, over 28882.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1117, cr_loss=0.3515, attn_decoder_loss=0.2389, over 5783706.09 frames. ], batch size: 104, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:07:49,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=754740.0, ans=0.1 +2024-09-19 22:08:03,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=754740.0, ans=0.0 +2024-09-19 22:08:14,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=754780.0, ans=0.125 +2024-09-19 22:08:14,742 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.74 vs. limit=15.0 +2024-09-19 22:08:26,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.93 vs. limit=22.5 +2024-09-19 22:08:40,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=754860.0, ans=0.0 +2024-09-19 22:08:46,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=754860.0, ans=0.0 +2024-09-19 22:08:47,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=754860.0, ans=0.0 +2024-09-19 22:08:52,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=754900.0, ans=0.1 +2024-09-19 22:08:53,271 INFO [train.py:1198] (0/2) Epoch 42, batch 3200, loss[loss=0.2227, ctc_loss=0.1084, cr_loss=0.3485, attn_decoder_loss=0.2276, over 29766.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3502, attn_decoder_loss=0.238, over 5794924.87 frames. ], batch size: 80, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 22:09:02,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=754900.0, ans=0.125 +2024-09-19 22:09:11,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=754940.0, ans=0.125 +2024-09-19 22:09:17,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=754940.0, ans=0.125 +2024-09-19 22:09:32,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-19 22:09:46,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=755020.0, ans=0.125 +2024-09-19 22:10:03,487 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.581e+01 9.115e+01 9.616e+01 1.393e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 22:10:09,484 INFO [train.py:1198] (0/2) Epoch 42, batch 3250, loss[loss=0.2404, ctc_loss=0.1229, cr_loss=0.388, attn_decoder_loss=0.2448, over 29712.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3514, attn_decoder_loss=0.2386, over 5801375.90 frames. ], batch size: 84, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:10:12,219 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.26 vs. limit=15.0 +2024-09-19 22:10:17,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=755100.0, ans=0.125 +2024-09-19 22:10:20,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=755100.0, ans=0.2 +2024-09-19 22:10:24,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=755140.0, ans=0.07 +2024-09-19 22:10:58,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=10.72 vs. limit=12.0 +2024-09-19 22:11:06,500 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.98 vs. limit=15.0 +2024-09-19 22:11:14,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=755260.0, ans=0.2 +2024-09-19 22:11:26,684 INFO [train.py:1198] (0/2) Epoch 42, batch 3300, loss[loss=0.2467, ctc_loss=0.1179, cr_loss=0.3521, attn_decoder_loss=0.2532, over 28235.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1107, cr_loss=0.3497, attn_decoder_loss=0.2375, over 5797790.37 frames. ], batch size: 111, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:11:28,684 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=755300.0, ans=0.125 +2024-09-19 22:11:54,767 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.11 vs. limit=22.5 +2024-09-19 22:12:11,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.71 vs. limit=22.5 +2024-09-19 22:12:39,471 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.752e+01 8.624e+01 9.226e+01 9.886e+01 3.496e+02, threshold=1.845e+02, percent-clipped=4.0 +2024-09-19 22:12:39,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=755460.0, ans=0.125 +2024-09-19 22:12:44,137 INFO [train.py:1198] (0/2) Epoch 42, batch 3350, loss[loss=0.2426, ctc_loss=0.1143, cr_loss=0.3629, attn_decoder_loss=0.2487, over 28854.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1114, cr_loss=0.3511, attn_decoder_loss=0.2383, over 5773960.32 frames. ], batch size: 104, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:13:02,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=755540.0, ans=0.125 +2024-09-19 22:13:07,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.27 vs. limit=22.5 +2024-09-19 22:13:10,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=755540.0, ans=0.1 +2024-09-19 22:13:29,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=755620.0, ans=0.2 +2024-09-19 22:13:34,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=755620.0, ans=0.2 +2024-09-19 22:13:40,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=755620.0, ans=0.0 +2024-09-19 22:13:41,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=755620.0, ans=0.0 +2024-09-19 22:13:49,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.68 vs. limit=15.0 +2024-09-19 22:13:59,651 INFO [train.py:1198] (0/2) Epoch 42, batch 3400, loss[loss=0.2066, ctc_loss=0.09239, cr_loss=0.3126, attn_decoder_loss=0.2123, over 29343.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1117, cr_loss=0.3517, attn_decoder_loss=0.2384, over 5764968.46 frames. ], batch size: 67, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:14:13,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=755740.0, ans=0.025 +2024-09-19 22:14:49,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=755820.0, ans=0.1 +2024-09-19 22:14:49,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.93 vs. limit=6.0 +2024-09-19 22:14:50,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=755820.0, ans=0.1 +2024-09-19 22:15:10,682 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-19 22:15:12,722 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.618e+01 8.954e+01 9.599e+01 1.831e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 22:15:17,198 INFO [train.py:1198] (0/2) Epoch 42, batch 3450, loss[loss=0.2434, ctc_loss=0.1251, cr_loss=0.3875, attn_decoder_loss=0.248, over 28502.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1119, cr_loss=0.3523, attn_decoder_loss=0.2386, over 5772882.16 frames. ], batch size: 112, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:15:25,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=755900.0, ans=0.0 +2024-09-19 22:15:47,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=755980.0, ans=0.035 +2024-09-19 22:15:50,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=755980.0, ans=0.1 +2024-09-19 22:15:58,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=755980.0, ans=0.0 +2024-09-19 22:16:03,453 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.33 vs. limit=15.0 +2024-09-19 22:16:06,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.66 vs. limit=15.0 +2024-09-19 22:16:14,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=756020.0, ans=0.0 +2024-09-19 22:16:21,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=756060.0, ans=0.025 +2024-09-19 22:16:34,972 INFO [train.py:1198] (0/2) Epoch 42, batch 3500, loss[loss=0.2121, ctc_loss=0.09119, cr_loss=0.2943, attn_decoder_loss=0.219, over 29282.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1116, cr_loss=0.3517, attn_decoder_loss=0.2382, over 5775042.91 frames. ], batch size: 71, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:16:36,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=756100.0, ans=0.0 +2024-09-19 22:16:36,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=756100.0, ans=0.0 +2024-09-19 22:17:02,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-19 22:17:03,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=756180.0, ans=0.025 +2024-09-19 22:17:03,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=756180.0, ans=0.2 +2024-09-19 22:17:15,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=756180.0, ans=0.125 +2024-09-19 22:17:26,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.09 vs. limit=15.0 +2024-09-19 22:17:37,044 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.22 vs. limit=12.0 +2024-09-19 22:17:43,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=756260.0, ans=0.1 +2024-09-19 22:17:44,786 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.395e+01 8.622e+01 9.000e+01 9.662e+01 3.411e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 22:17:49,260 INFO [train.py:1198] (0/2) Epoch 42, batch 3550, loss[loss=0.2513, ctc_loss=0.1207, cr_loss=0.3608, attn_decoder_loss=0.2578, over 29704.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1116, cr_loss=0.3517, attn_decoder_loss=0.2384, over 5782461.41 frames. ], batch size: 89, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:17:49,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=756300.0, ans=0.0 +2024-09-19 22:18:02,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=756340.0, ans=0.125 +2024-09-19 22:18:02,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=756340.0, ans=0.125 +2024-09-19 22:18:07,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=756340.0, ans=0.0 +2024-09-19 22:18:39,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=756420.0, ans=0.05 +2024-09-19 22:18:51,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.90 vs. limit=10.0 +2024-09-19 22:18:52,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=756460.0, ans=0.1 +2024-09-19 22:19:02,903 INFO [train.py:1198] (0/2) Epoch 42, batch 3600, loss[loss=0.2213, ctc_loss=0.1039, cr_loss=0.3269, attn_decoder_loss=0.2271, over 29525.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1116, cr_loss=0.352, attn_decoder_loss=0.2385, over 5792065.62 frames. ], batch size: 77, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:19:21,191 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=756540.0, ans=0.09899494936611666 +2024-09-19 22:19:22,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=756540.0, ans=0.125 +2024-09-19 22:19:25,007 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.87 vs. limit=15.0 +2024-09-19 22:19:27,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=756540.0, ans=0.125 +2024-09-19 22:19:32,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-19 22:19:38,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=756580.0, ans=0.0 +2024-09-19 22:19:44,188 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.09 vs. limit=15.0 +2024-09-19 22:19:57,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-19 22:20:04,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=756660.0, ans=0.1 +2024-09-19 22:20:05,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=756660.0, ans=0.0 +2024-09-19 22:20:14,584 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.291e+01 8.526e+01 8.930e+01 9.587e+01 1.613e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-19 22:20:19,051 INFO [train.py:1198] (0/2) Epoch 42, batch 3650, loss[loss=0.2365, ctc_loss=0.1246, cr_loss=0.381, attn_decoder_loss=0.2404, over 29489.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.111, cr_loss=0.3507, attn_decoder_loss=0.2379, over 5793452.74 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:20:34,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=756740.0, ans=0.125 +2024-09-19 22:20:46,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=756740.0, ans=0.125 +2024-09-19 22:21:11,454 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.61 vs. limit=15.0 +2024-09-19 22:21:19,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=756860.0, ans=0.07 +2024-09-19 22:21:20,494 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.97 vs. limit=5.0 +2024-09-19 22:21:24,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=756860.0, ans=15.0 +2024-09-19 22:21:28,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=756860.0, ans=0.125 +2024-09-19 22:21:33,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-19 22:21:34,296 INFO [train.py:1198] (0/2) Epoch 42, batch 3700, loss[loss=0.2398, ctc_loss=0.1102, cr_loss=0.35, attn_decoder_loss=0.2464, over 29715.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1108, cr_loss=0.3502, attn_decoder_loss=0.2381, over 5803661.14 frames. ], batch size: 84, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:21:44,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=756900.0, ans=0.1 +2024-09-19 22:21:50,319 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-19 22:22:16,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=756980.0, ans=0.125 +2024-09-19 22:22:27,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.30 vs. limit=15.0 +2024-09-19 22:22:33,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=757020.0, ans=0.0 +2024-09-19 22:22:46,054 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.519e+01 8.506e+01 9.125e+01 9.829e+01 2.175e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-19 22:22:50,495 INFO [train.py:1198] (0/2) Epoch 42, batch 3750, loss[loss=0.2055, ctc_loss=0.09267, cr_loss=0.3168, attn_decoder_loss=0.211, over 29308.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1102, cr_loss=0.349, attn_decoder_loss=0.2376, over 5806050.15 frames. ], batch size: 67, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:23:11,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=757140.0, ans=0.125 +2024-09-19 22:23:24,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=757180.0, ans=0.125 +2024-09-19 22:23:26,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=757180.0, ans=0.125 +2024-09-19 22:23:38,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=757220.0, ans=10.0 +2024-09-19 22:23:45,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=757220.0, ans=0.0 +2024-09-19 22:24:04,581 INFO [train.py:1198] (0/2) Epoch 42, batch 3800, loss[loss=0.2303, ctc_loss=0.09818, cr_loss=0.3156, attn_decoder_loss=0.238, over 29621.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1101, cr_loss=0.3485, attn_decoder_loss=0.2374, over 5797246.97 frames. ], batch size: 86, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:24:10,018 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.27 vs. limit=15.0 +2024-09-19 22:24:11,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.58 vs. limit=22.5 +2024-09-19 22:24:16,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=757300.0, ans=0.125 +2024-09-19 22:24:33,098 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=757380.0, ans=0.125 +2024-09-19 22:24:39,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.78 vs. limit=22.5 +2024-09-19 22:25:13,885 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.649e+01 9.029e+01 9.772e+01 5.131e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 22:25:16,217 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.10 vs. limit=22.5 +2024-09-19 22:25:17,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=757500.0, ans=0.125 +2024-09-19 22:25:18,240 INFO [train.py:1198] (0/2) Epoch 42, batch 3850, loss[loss=0.2599, ctc_loss=0.132, cr_loss=0.3847, attn_decoder_loss=0.2655, over 29292.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1103, cr_loss=0.3491, attn_decoder_loss=0.2376, over 5810881.80 frames. ], batch size: 100, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:25:20,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=757500.0, ans=0.0 +2024-09-19 22:25:20,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.53 vs. limit=15.0 +2024-09-19 22:25:33,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=757540.0, ans=0.125 +2024-09-19 22:25:39,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=757540.0, ans=0.125 +2024-09-19 22:25:46,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=757580.0, ans=0.1 +2024-09-19 22:25:47,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=757580.0, ans=0.0 +2024-09-19 22:25:55,960 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:26:06,767 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.00 vs. limit=10.0 +2024-09-19 22:26:33,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=757700.0, ans=15.0 +2024-09-19 22:26:33,899 INFO [train.py:1198] (0/2) Epoch 42, batch 3900, loss[loss=0.2468, ctc_loss=0.1165, cr_loss=0.3668, attn_decoder_loss=0.2531, over 29630.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1107, cr_loss=0.3501, attn_decoder_loss=0.238, over 5815839.82 frames. ], batch size: 86, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:26:37,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=757700.0, ans=0.0 +2024-09-19 22:26:38,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=757700.0, ans=0.125 +2024-09-19 22:26:38,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=757700.0, ans=0.0 +2024-09-19 22:26:40,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=757700.0, ans=0.125 +2024-09-19 22:26:54,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=757740.0, ans=0.0 +2024-09-19 22:27:03,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=757780.0, ans=0.5 +2024-09-19 22:27:11,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=757780.0, ans=0.125 +2024-09-19 22:27:30,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=757820.0, ans=10.0 +2024-09-19 22:27:34,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=757860.0, ans=0.2 +2024-09-19 22:27:41,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=757860.0, ans=0.0 +2024-09-19 22:27:44,479 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.680e+01 8.643e+01 9.033e+01 9.490e+01 1.279e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 22:27:49,121 INFO [train.py:1198] (0/2) Epoch 42, batch 3950, loss[loss=0.2432, ctc_loss=0.1144, cr_loss=0.3693, attn_decoder_loss=0.2493, over 29461.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1104, cr_loss=0.3495, attn_decoder_loss=0.2378, over 5835145.76 frames. ], batch size: 97, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:27:51,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.14 vs. limit=15.0 +2024-09-19 22:27:52,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=757900.0, ans=0.1 +2024-09-19 22:27:58,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=757900.0, ans=0.0 +2024-09-19 22:28:18,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=757980.0, ans=0.0 +2024-09-19 22:29:02,376 INFO [train.py:1198] (0/2) Epoch 42, batch 4000, loss[loss=0.215, ctc_loss=0.09504, cr_loss=0.3007, attn_decoder_loss=0.2216, over 29516.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1107, cr_loss=0.3499, attn_decoder_loss=0.2381, over 5812832.02 frames. ], batch size: 74, lr: 2.61e-03, grad_scale: 32.0 +2024-09-19 22:29:20,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=758140.0, ans=0.0 +2024-09-19 22:29:20,235 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:29:32,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=758180.0, ans=0.125 +2024-09-19 22:29:40,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=758180.0, ans=0.1 +2024-09-19 22:29:40,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=758180.0, ans=0.125 +2024-09-19 22:29:48,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=758220.0, ans=0.125 +2024-09-19 22:29:49,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=758220.0, ans=0.125 +2024-09-19 22:29:55,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=758220.0, ans=0.125 +2024-09-19 22:29:56,347 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.23 vs. limit=15.0 +2024-09-19 22:30:04,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=758260.0, ans=10.0 +2024-09-19 22:30:13,115 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.365e+01 8.535e+01 8.990e+01 9.708e+01 1.890e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 22:30:16,060 INFO [train.py:1198] (0/2) Epoch 42, batch 4050, loss[loss=0.2612, ctc_loss=0.1518, cr_loss=0.4075, attn_decoder_loss=0.2643, over 19972.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1113, cr_loss=0.351, attn_decoder_loss=0.2383, over 5795931.20 frames. ], batch size: 210, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:30:23,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=758300.0, ans=0.125 +2024-09-19 22:31:31,088 INFO [train.py:1198] (0/2) Epoch 42, batch 4100, loss[loss=0.2422, ctc_loss=0.1218, cr_loss=0.3598, attn_decoder_loss=0.2476, over 29519.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3515, attn_decoder_loss=0.2385, over 5791043.64 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:31:43,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=758500.0, ans=0.125 +2024-09-19 22:32:09,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=758580.0, ans=0.125 +2024-09-19 22:32:16,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=758620.0, ans=0.2 +2024-09-19 22:32:42,743 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.799e+01 9.217e+01 9.992e+01 1.793e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-19 22:32:45,671 INFO [train.py:1198] (0/2) Epoch 42, batch 4150, loss[loss=0.2351, ctc_loss=0.1137, cr_loss=0.3648, attn_decoder_loss=0.2405, over 29496.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1116, cr_loss=0.3516, attn_decoder_loss=0.2384, over 5797299.54 frames. ], batch size: 77, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:32:57,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=758700.0, ans=0.1 +2024-09-19 22:32:57,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=758700.0, ans=0.07 +2024-09-19 22:33:16,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=758780.0, ans=0.125 +2024-09-19 22:33:22,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=758780.0, ans=0.125 +2024-09-19 22:33:58,999 INFO [train.py:1198] (0/2) Epoch 42, batch 4200, loss[loss=0.2577, ctc_loss=0.1402, cr_loss=0.4327, attn_decoder_loss=0.2611, over 29484.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1116, cr_loss=0.352, attn_decoder_loss=0.2385, over 5799671.92 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:34:11,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=758900.0, ans=0.125 +2024-09-19 22:34:30,790 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.23 vs. limit=22.5 +2024-09-19 22:34:31,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=758980.0, ans=0.125 +2024-09-19 22:34:33,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.20 vs. limit=15.0 +2024-09-19 22:35:10,285 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.752e+01 9.321e+01 9.976e+01 3.736e+02, threshold=1.864e+02, percent-clipped=1.0 +2024-09-19 22:35:10,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=759060.0, ans=0.125 +2024-09-19 22:35:13,180 INFO [train.py:1198] (0/2) Epoch 42, batch 4250, loss[loss=0.2114, ctc_loss=0.0876, cr_loss=0.3024, attn_decoder_loss=0.2185, over 29499.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1117, cr_loss=0.3518, attn_decoder_loss=0.2388, over 5806299.39 frames. ], batch size: 74, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:35:23,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=759100.0, ans=0.1 +2024-09-19 22:35:32,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=759140.0, ans=0.2 +2024-09-19 22:35:48,276 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.73 vs. limit=10.0 +2024-09-19 22:35:52,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=759180.0, ans=0.125 +2024-09-19 22:36:11,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.54 vs. limit=10.0 +2024-09-19 22:36:27,662 INFO [train.py:1198] (0/2) Epoch 42, batch 4300, loss[loss=0.2399, ctc_loss=0.1161, cr_loss=0.368, attn_decoder_loss=0.2455, over 29525.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.2388, over 5795488.81 frames. ], batch size: 87, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:36:30,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=759300.0, ans=0.1 +2024-09-19 22:36:34,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=15.0 +2024-09-19 22:36:44,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=759340.0, ans=0.1 +2024-09-19 22:36:49,395 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.68 vs. limit=15.0 +2024-09-19 22:37:00,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=759380.0, ans=0.125 +2024-09-19 22:37:22,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=759420.0, ans=0.0 +2024-09-19 22:37:38,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.00 vs. limit=15.0 +2024-09-19 22:37:38,940 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.652e+01 9.323e+01 9.871e+01 1.907e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-19 22:37:41,935 INFO [train.py:1198] (0/2) Epoch 42, batch 4350, loss[loss=0.2557, ctc_loss=0.1235, cr_loss=0.379, attn_decoder_loss=0.2619, over 29525.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1141, cr_loss=0.3565, attn_decoder_loss=0.2421, over 5797781.91 frames. ], batch size: 97, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:37:42,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=759500.0, ans=0.125 +2024-09-19 22:38:17,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=759580.0, ans=0.125 +2024-09-19 22:38:31,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=759620.0, ans=0.2 +2024-09-19 22:38:39,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=759620.0, ans=0.125 +2024-09-19 22:38:55,424 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.75 vs. limit=10.0 +2024-09-19 22:38:56,125 INFO [train.py:1198] (0/2) Epoch 42, batch 4400, loss[loss=0.2399, ctc_loss=0.1204, cr_loss=0.3721, attn_decoder_loss=0.2449, over 27398.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1151, cr_loss=0.3585, attn_decoder_loss=0.244, over 5768697.79 frames. ], batch size: 124, lr: 2.61e-03, grad_scale: 32.0 +2024-09-19 22:39:01,307 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.00 vs. limit=10.0 +2024-09-19 22:39:32,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=759780.0, ans=0.1 +2024-09-19 22:39:42,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.73 vs. limit=15.0 +2024-09-19 22:40:06,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=759860.0, ans=0.1 +2024-09-19 22:40:07,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.246e+01 9.179e+01 9.547e+01 1.014e+02 2.970e+02, threshold=1.909e+02, percent-clipped=2.0 +2024-09-19 22:40:09,216 INFO [train.py:1198] (0/2) Epoch 42, batch 4450, loss[loss=0.2529, ctc_loss=0.1369, cr_loss=0.3714, attn_decoder_loss=0.2575, over 20018.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1182, cr_loss=0.3636, attn_decoder_loss=0.2458, over 5582453.23 frames. ], batch size: 210, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:40:10,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=759900.0, ans=0.125 +2024-09-19 22:40:34,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=759940.0, ans=0.0 +2024-09-19 22:40:42,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=759980.0, ans=0.2 +2024-09-19 22:40:59,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=760020.0, ans=0.0 +2024-09-19 22:40:59,679 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=15.0 +2024-09-19 22:41:04,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.87 vs. limit=15.0 +2024-09-19 22:41:25,586 INFO [train.py:1198] (0/2) Epoch 42, batch 4500, loss[loss=0.2598, ctc_loss=0.1444, cr_loss=0.402, attn_decoder_loss=0.2637, over 20598.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1212, cr_loss=0.3664, attn_decoder_loss=0.2476, over 5241073.03 frames. ], batch size: 210, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:41:32,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=760100.0, ans=0.2 +2024-09-19 22:41:39,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=760140.0, ans=0.2 +2024-09-19 22:41:41,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=760140.0, ans=0.125 +2024-09-19 22:41:54,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=760180.0, ans=0.015 +2024-09-19 22:41:57,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=760180.0, ans=0.2 +2024-09-19 22:42:03,068 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-42.pt +2024-09-19 22:42:41,134 INFO [train.py:1198] (0/2) Epoch 43, batch 0, loss[loss=0.2039, ctc_loss=0.08759, cr_loss=0.293, attn_decoder_loss=0.2103, over 29619.00 frames. ], tot_loss[loss=0.2039, ctc_loss=0.08759, cr_loss=0.293, attn_decoder_loss=0.2103, over 29619.00 frames. ], batch size: 73, lr: 2.58e-03, grad_scale: 16.0 +2024-09-19 22:42:41,135 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-19 22:42:44,316 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.3358, 3.8196, 4.1357, 4.2389], device='cuda:0') +2024-09-19 22:43:00,148 INFO [train.py:1230] (0/2) Epoch 43, validation: loss=0.2125, ctc_loss=0.03634, cr_loss=6.648e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-19 22:43:00,148 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-19 22:43:07,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-19 22:43:09,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=760200.0, ans=0.1 +2024-09-19 22:43:11,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=760200.0, ans=0.5 +2024-09-19 22:43:26,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=760240.0, ans=0.1 +2024-09-19 22:43:39,928 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.895e+01 1.042e+02 1.140e+02 1.225e+02 1.755e+02, threshold=2.281e+02, percent-clipped=0.0 +2024-09-19 22:43:49,795 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-19 22:44:11,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=760360.0, ans=0.125 +2024-09-19 22:44:17,477 INFO [train.py:1198] (0/2) Epoch 43, batch 50, loss[loss=0.2085, ctc_loss=0.0911, cr_loss=0.3043, attn_decoder_loss=0.2148, over 29440.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1121, cr_loss=0.3516, attn_decoder_loss=0.2389, over 1267312.63 frames. ], batch size: 70, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:44:55,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=760480.0, ans=0.2 +2024-09-19 22:45:05,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.69 vs. limit=6.0 +2024-09-19 22:45:06,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=760520.0, ans=0.035 +2024-09-19 22:45:24,798 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.45 vs. limit=15.0 +2024-09-19 22:45:33,227 INFO [train.py:1198] (0/2) Epoch 43, batch 100, loss[loss=0.2203, ctc_loss=0.1015, cr_loss=0.3325, attn_decoder_loss=0.2261, over 29555.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1135, cr_loss=0.3549, attn_decoder_loss=0.2411, over 2252337.85 frames. ], batch size: 76, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:45:34,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=760600.0, ans=0.2 +2024-09-19 22:45:53,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=760640.0, ans=0.125 +2024-09-19 22:46:10,561 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.246e+01 8.774e+01 9.184e+01 9.707e+01 2.214e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-19 22:46:10,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=760680.0, ans=0.1 +2024-09-19 22:46:12,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=760680.0, ans=0.1 +2024-09-19 22:46:53,114 INFO [train.py:1198] (0/2) Epoch 43, batch 150, loss[loss=0.2101, ctc_loss=0.09194, cr_loss=0.3154, attn_decoder_loss=0.2162, over 29421.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1121, cr_loss=0.3528, attn_decoder_loss=0.2394, over 3047524.24 frames. ], batch size: 70, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:47:07,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=760840.0, ans=0.125 +2024-09-19 22:47:13,253 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:47:17,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=760840.0, ans=0.0 +2024-09-19 22:47:28,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.36 vs. limit=15.0 +2024-09-19 22:47:30,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=760880.0, ans=0.0 +2024-09-19 22:47:42,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=760920.0, ans=0.0 +2024-09-19 22:48:07,816 INFO [train.py:1198] (0/2) Epoch 43, batch 200, loss[loss=0.2424, ctc_loss=0.1197, cr_loss=0.366, attn_decoder_loss=0.2479, over 27593.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1114, cr_loss=0.352, attn_decoder_loss=0.2385, over 3659758.62 frames. ], batch size: 125, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:48:15,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=761000.0, ans=0.1 +2024-09-19 22:48:28,941 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=761040.0, ans=0.125 +2024-09-19 22:48:32,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=761040.0, ans=0.05 +2024-09-19 22:48:34,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.93 vs. limit=10.0 +2024-09-19 22:48:44,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=761080.0, ans=0.2 +2024-09-19 22:48:45,409 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.183e+01 8.451e+01 8.919e+01 9.338e+01 1.606e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 22:49:23,061 INFO [train.py:1198] (0/2) Epoch 43, batch 250, loss[loss=0.2447, ctc_loss=0.1194, cr_loss=0.3815, attn_decoder_loss=0.2502, over 29275.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1115, cr_loss=0.3517, attn_decoder_loss=0.2383, over 4141175.69 frames. ], batch size: 100, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:49:25,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=761200.0, ans=0.1 +2024-09-19 22:50:08,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=761320.0, ans=0.1 +2024-09-19 22:50:35,101 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:50:40,744 INFO [train.py:1198] (0/2) Epoch 43, batch 300, loss[loss=0.2406, ctc_loss=0.1178, cr_loss=0.3813, attn_decoder_loss=0.2457, over 29517.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1109, cr_loss=0.351, attn_decoder_loss=0.2378, over 4510601.90 frames. ], batch size: 92, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:50:44,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=761400.0, ans=0.125 +2024-09-19 22:50:55,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=761400.0, ans=0.0 +2024-09-19 22:51:14,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=761480.0, ans=0.125 +2024-09-19 22:51:20,733 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.675e+01 9.148e+01 9.609e+01 2.085e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 22:51:25,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=761480.0, ans=0.125 +2024-09-19 22:51:34,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=761520.0, ans=0.125 +2024-09-19 22:51:59,187 INFO [train.py:1198] (0/2) Epoch 43, batch 350, loss[loss=0.2076, ctc_loss=0.09594, cr_loss=0.3221, attn_decoder_loss=0.2129, over 29324.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1112, cr_loss=0.3519, attn_decoder_loss=0.2382, over 4795857.38 frames. ], batch size: 71, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:52:12,168 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.29 vs. limit=22.5 +2024-09-19 22:52:41,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=761680.0, ans=0.125 +2024-09-19 22:53:14,431 INFO [train.py:1198] (0/2) Epoch 43, batch 400, loss[loss=0.2349, ctc_loss=0.1077, cr_loss=0.3404, attn_decoder_loss=0.2415, over 29728.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1106, cr_loss=0.3508, attn_decoder_loss=0.2379, over 5026435.87 frames. ], batch size: 82, lr: 2.57e-03, grad_scale: 32.0 +2024-09-19 22:53:14,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=761800.0, ans=0.125 +2024-09-19 22:53:16,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=761800.0, ans=0.2 +2024-09-19 22:53:29,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=761840.0, ans=0.025 +2024-09-19 22:53:31,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=761840.0, ans=0.125 +2024-09-19 22:53:33,572 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=6.0 +2024-09-19 22:53:38,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=761840.0, ans=0.125 +2024-09-19 22:53:52,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=761880.0, ans=0.0 +2024-09-19 22:53:53,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.20 vs. limit=12.0 +2024-09-19 22:53:53,724 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.678e+01 9.168e+01 9.670e+01 1.497e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-19 22:53:58,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=761920.0, ans=0.125 +2024-09-19 22:54:04,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=761920.0, ans=0.125 +2024-09-19 22:54:10,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=761920.0, ans=0.2 +2024-09-19 22:54:20,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=761960.0, ans=0.025 +2024-09-19 22:54:22,767 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.35 vs. limit=15.0 +2024-09-19 22:54:29,064 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.97 vs. limit=10.0 +2024-09-19 22:54:32,428 INFO [train.py:1198] (0/2) Epoch 43, batch 450, loss[loss=0.2361, ctc_loss=0.1105, cr_loss=0.345, attn_decoder_loss=0.2424, over 29667.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1109, cr_loss=0.3509, attn_decoder_loss=0.2381, over 5186107.89 frames. ], batch size: 83, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:54:46,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=762040.0, ans=0.1 +2024-09-19 22:55:09,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=762080.0, ans=0.07 +2024-09-19 22:55:15,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=762080.0, ans=0.125 +2024-09-19 22:55:21,186 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.81 vs. limit=15.0 +2024-09-19 22:55:21,783 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=762120.0, ans=0.025 +2024-09-19 22:55:23,691 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.56 vs. limit=10.0 +2024-09-19 22:55:50,316 INFO [train.py:1198] (0/2) Epoch 43, batch 500, loss[loss=0.2503, ctc_loss=0.1271, cr_loss=0.389, attn_decoder_loss=0.2553, over 29474.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1107, cr_loss=0.3498, attn_decoder_loss=0.2377, over 5329135.46 frames. ], batch size: 94, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:55:55,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=762200.0, ans=0.125 +2024-09-19 22:55:56,805 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=762200.0, ans=0.125 +2024-09-19 22:56:01,427 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:56:19,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=762280.0, ans=0.0 +2024-09-19 22:56:27,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=762280.0, ans=0.2 +2024-09-19 22:56:29,956 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.288e+01 8.617e+01 8.998e+01 9.696e+01 3.544e+02, threshold=1.800e+02, percent-clipped=2.0 +2024-09-19 22:56:33,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=762280.0, ans=0.125 +2024-09-19 22:56:38,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.98 vs. limit=15.0 +2024-09-19 22:56:50,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.54 vs. limit=15.0 +2024-09-19 22:56:50,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.34 vs. limit=15.0 +2024-09-19 22:56:56,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.77 vs. limit=22.5 +2024-09-19 22:57:06,486 INFO [train.py:1198] (0/2) Epoch 43, batch 550, loss[loss=0.2413, ctc_loss=0.1158, cr_loss=0.3575, attn_decoder_loss=0.2473, over 28785.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.351, attn_decoder_loss=0.2379, over 5421798.15 frames. ], batch size: 104, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:57:42,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=762480.0, ans=0.125 +2024-09-19 22:57:59,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=762520.0, ans=0.125 +2024-09-19 22:58:02,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=762520.0, ans=0.125 +2024-09-19 22:58:16,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=762560.0, ans=0.2 +2024-09-19 22:58:16,886 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:58:19,980 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:58:24,033 INFO [train.py:1198] (0/2) Epoch 43, batch 600, loss[loss=0.2388, ctc_loss=0.1132, cr_loss=0.3587, attn_decoder_loss=0.2448, over 29198.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1113, cr_loss=0.3513, attn_decoder_loss=0.2382, over 5507763.26 frames. ], batch size: 100, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:58:43,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=762640.0, ans=0.2 +2024-09-19 22:58:52,556 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.05 vs. limit=22.5 +2024-09-19 22:59:01,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=762680.0, ans=0.0 +2024-09-19 22:59:05,155 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.540e+01 8.510e+01 8.971e+01 9.586e+01 1.722e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 22:59:20,325 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=762720.0, ans=0.5 +2024-09-19 22:59:24,035 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.47 vs. limit=15.0 +2024-09-19 22:59:29,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=762760.0, ans=0.2 +2024-09-19 22:59:38,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.59 vs. limit=10.0 +2024-09-19 22:59:40,601 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.91 vs. limit=6.0 +2024-09-19 22:59:41,223 INFO [train.py:1198] (0/2) Epoch 43, batch 650, loss[loss=0.2291, ctc_loss=0.1051, cr_loss=0.3337, attn_decoder_loss=0.2355, over 29770.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3499, attn_decoder_loss=0.2377, over 5585268.95 frames. ], batch size: 81, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:00:36,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.98 vs. limit=15.0 +2024-09-19 23:00:37,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=762920.0, ans=0.0 +2024-09-19 23:00:41,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=762960.0, ans=0.025 +2024-09-19 23:00:43,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=762960.0, ans=0.125 +2024-09-19 23:00:43,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=762960.0, ans=0.025 +2024-09-19 23:00:46,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=762960.0, ans=0.125 +2024-09-19 23:00:56,555 INFO [train.py:1198] (0/2) Epoch 43, batch 700, loss[loss=0.2313, ctc_loss=0.1153, cr_loss=0.3738, attn_decoder_loss=0.2359, over 29542.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1109, cr_loss=0.3507, attn_decoder_loss=0.2381, over 5636719.42 frames. ], batch size: 76, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:01:10,951 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.88 vs. limit=10.0 +2024-09-19 23:01:19,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=763040.0, ans=0.2 +2024-09-19 23:01:28,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=763080.0, ans=0.125 +2024-09-19 23:01:34,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=763080.0, ans=0.125 +2024-09-19 23:01:35,727 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.576e+01 9.155e+01 9.558e+01 1.416e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-19 23:01:46,796 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=763120.0, ans=0.125 +2024-09-19 23:01:58,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=763160.0, ans=0.125 +2024-09-19 23:01:58,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=763160.0, ans=0.2 +2024-09-19 23:02:00,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=763160.0, ans=0.025 +2024-09-19 23:02:04,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=763160.0, ans=0.125 +2024-09-19 23:02:14,578 INFO [train.py:1198] (0/2) Epoch 43, batch 750, loss[loss=0.2326, ctc_loss=0.1062, cr_loss=0.3555, attn_decoder_loss=0.2387, over 29723.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.238, over 5675308.93 frames. ], batch size: 82, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:02:20,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=763200.0, ans=0.125 +2024-09-19 23:02:57,011 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.38 vs. limit=15.0 +2024-09-19 23:03:02,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=763320.0, ans=0.125 +2024-09-19 23:03:03,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=763320.0, ans=0.2 +2024-09-19 23:03:17,341 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:03:31,977 INFO [train.py:1198] (0/2) Epoch 43, batch 800, loss[loss=0.2117, ctc_loss=0.0904, cr_loss=0.2983, attn_decoder_loss=0.2185, over 29594.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1107, cr_loss=0.3497, attn_decoder_loss=0.238, over 5706822.83 frames. ], batch size: 73, lr: 2.57e-03, grad_scale: 32.0 +2024-09-19 23:03:44,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=763400.0, ans=0.1 +2024-09-19 23:03:44,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=763400.0, ans=0.1 +2024-09-19 23:03:50,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=763440.0, ans=0.0 +2024-09-19 23:04:13,923 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.340e+01 8.564e+01 8.973e+01 9.746e+01 2.709e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 23:04:17,755 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.35 vs. limit=15.0 +2024-09-19 23:04:33,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=763560.0, ans=0.0 +2024-09-19 23:04:36,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=763560.0, ans=0.125 +2024-09-19 23:04:41,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=763560.0, ans=0.05 +2024-09-19 23:04:47,006 INFO [train.py:1198] (0/2) Epoch 43, batch 850, loss[loss=0.2489, ctc_loss=0.1271, cr_loss=0.3768, attn_decoder_loss=0.2541, over 29680.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1105, cr_loss=0.3496, attn_decoder_loss=0.2377, over 5735965.13 frames. ], batch size: 89, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:04:53,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=763600.0, ans=0.0 +2024-09-19 23:05:33,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=763720.0, ans=0.125 +2024-09-19 23:05:41,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=763720.0, ans=10.0 +2024-09-19 23:05:49,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=763760.0, ans=0.125 +2024-09-19 23:05:52,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=763760.0, ans=0.125 +2024-09-19 23:06:04,721 INFO [train.py:1198] (0/2) Epoch 43, batch 900, loss[loss=0.2156, ctc_loss=0.09557, cr_loss=0.3117, attn_decoder_loss=0.222, over 29596.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1106, cr_loss=0.3495, attn_decoder_loss=0.238, over 5741732.52 frames. ], batch size: 73, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:06:10,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=763800.0, ans=0.05 +2024-09-19 23:06:16,933 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:06:16,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=763800.0, ans=0.0 +2024-09-19 23:06:32,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=763840.0, ans=0.0 +2024-09-19 23:06:33,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=763880.0, ans=0.0 +2024-09-19 23:06:46,895 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.378e+01 8.546e+01 9.046e+01 9.640e+01 1.475e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 23:06:47,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=763880.0, ans=0.125 +2024-09-19 23:07:08,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=763960.0, ans=0.125 +2024-09-19 23:07:22,193 INFO [train.py:1198] (0/2) Epoch 43, batch 950, loss[loss=0.2173, ctc_loss=0.095, cr_loss=0.3149, attn_decoder_loss=0.2239, over 29489.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.2381, over 5742841.23 frames. ], batch size: 74, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:07:58,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=764080.0, ans=0.025 +2024-09-19 23:08:02,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=764080.0, ans=0.125 +2024-09-19 23:08:03,185 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=12.0 +2024-09-19 23:08:20,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=764160.0, ans=0.125 +2024-09-19 23:08:28,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=764160.0, ans=0.0 +2024-09-19 23:08:36,653 INFO [train.py:1198] (0/2) Epoch 43, batch 1000, loss[loss=0.2261, ctc_loss=0.1071, cr_loss=0.3325, attn_decoder_loss=0.232, over 29529.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1114, cr_loss=0.3519, attn_decoder_loss=0.2388, over 5737998.25 frames. ], batch size: 77, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:08:37,797 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=5.87 vs. limit=15.0 +2024-09-19 23:08:39,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=764200.0, ans=0.125 +2024-09-19 23:09:05,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=764280.0, ans=0.125 +2024-09-19 23:09:18,835 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.366e+01 8.655e+01 9.178e+01 9.837e+01 2.417e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-19 23:09:19,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=764280.0, ans=0.0 +2024-09-19 23:09:30,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.01 vs. limit=15.0 +2024-09-19 23:09:51,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=764360.0, ans=0.125 +2024-09-19 23:09:54,202 INFO [train.py:1198] (0/2) Epoch 43, batch 1050, loss[loss=0.2372, ctc_loss=0.113, cr_loss=0.3557, attn_decoder_loss=0.2431, over 29683.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3504, attn_decoder_loss=0.238, over 5745468.84 frames. ], batch size: 85, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:10:01,134 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=15.0 +2024-09-19 23:10:18,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=764440.0, ans=0.025 +2024-09-19 23:10:25,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=764480.0, ans=0.125 +2024-09-19 23:10:41,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=764520.0, ans=0.125 +2024-09-19 23:11:11,877 INFO [train.py:1198] (0/2) Epoch 43, batch 1100, loss[loss=0.2399, ctc_loss=0.1184, cr_loss=0.3663, attn_decoder_loss=0.2453, over 29457.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.111, cr_loss=0.3502, attn_decoder_loss=0.238, over 5757734.75 frames. ], batch size: 78, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:11:22,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=764600.0, ans=0.125 +2024-09-19 23:11:39,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=764640.0, ans=0.125 +2024-09-19 23:11:39,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=764640.0, ans=0.025 +2024-09-19 23:11:54,299 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.378e+01 8.891e+01 9.353e+01 1.322e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 23:12:27,892 INFO [train.py:1198] (0/2) Epoch 43, batch 1150, loss[loss=0.2231, ctc_loss=0.1045, cr_loss=0.3375, attn_decoder_loss=0.2288, over 29429.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1109, cr_loss=0.3502, attn_decoder_loss=0.2379, over 5756916.04 frames. ], batch size: 78, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:12:28,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=764800.0, ans=0.0 +2024-09-19 23:12:45,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=764840.0, ans=0.1 +2024-09-19 23:12:48,816 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.31 vs. limit=12.0 +2024-09-19 23:12:51,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=764840.0, ans=0.07 +2024-09-19 23:12:54,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=764840.0, ans=0.125 +2024-09-19 23:13:15,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=764920.0, ans=0.1 +2024-09-19 23:13:21,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=764920.0, ans=0.125 +2024-09-19 23:13:45,766 INFO [train.py:1198] (0/2) Epoch 43, batch 1200, loss[loss=0.2476, ctc_loss=0.1203, cr_loss=0.3663, attn_decoder_loss=0.2536, over 29653.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1115, cr_loss=0.3515, attn_decoder_loss=0.2385, over 5750012.39 frames. ], batch size: 85, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:13:49,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=765000.0, ans=0.125 +2024-09-19 23:14:28,141 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.366e+01 8.714e+01 9.128e+01 9.687e+01 4.379e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-19 23:14:44,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=765120.0, ans=0.125 +2024-09-19 23:14:54,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=765160.0, ans=0.0 +2024-09-19 23:14:56,245 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=765160.0, ans=0.125 +2024-09-19 23:15:03,375 INFO [train.py:1198] (0/2) Epoch 43, batch 1250, loss[loss=0.2408, ctc_loss=0.1175, cr_loss=0.3653, attn_decoder_loss=0.2464, over 29521.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1118, cr_loss=0.352, attn_decoder_loss=0.239, over 5777671.53 frames. ], batch size: 92, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:15:17,165 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:15:28,068 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:15:28,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.93 vs. limit=12.0 +2024-09-19 23:15:32,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=765280.0, ans=0.0 +2024-09-19 23:15:39,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=765280.0, ans=0.09899494936611666 +2024-09-19 23:15:53,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=765320.0, ans=0.025 +2024-09-19 23:16:02,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=765360.0, ans=0.035 +2024-09-19 23:16:07,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=765360.0, ans=0.1 +2024-09-19 23:16:13,480 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:16:17,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=765400.0, ans=0.0 +2024-09-19 23:16:19,057 INFO [train.py:1198] (0/2) Epoch 43, batch 1300, loss[loss=0.2413, ctc_loss=0.1148, cr_loss=0.3656, attn_decoder_loss=0.2472, over 28195.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1112, cr_loss=0.3511, attn_decoder_loss=0.2381, over 5782469.89 frames. ], batch size: 111, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:16:22,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=765400.0, ans=0.125 +2024-09-19 23:16:32,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=765440.0, ans=0.0 +2024-09-19 23:16:52,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=765480.0, ans=0.125 +2024-09-19 23:16:56,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=15.0 +2024-09-19 23:16:57,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=765480.0, ans=0.125 +2024-09-19 23:17:01,388 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.583e+01 9.046e+01 9.582e+01 1.774e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 23:17:04,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=765520.0, ans=0.125 +2024-09-19 23:17:13,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=765520.0, ans=0.025 +2024-09-19 23:17:19,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=765560.0, ans=0.1 +2024-09-19 23:17:37,164 INFO [train.py:1198] (0/2) Epoch 43, batch 1350, loss[loss=0.2338, ctc_loss=0.113, cr_loss=0.3499, attn_decoder_loss=0.2394, over 29754.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1109, cr_loss=0.3507, attn_decoder_loss=0.238, over 5798690.41 frames. ], batch size: 81, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:18:54,305 INFO [train.py:1198] (0/2) Epoch 43, batch 1400, loss[loss=0.2063, ctc_loss=0.09094, cr_loss=0.2938, attn_decoder_loss=0.2125, over 29599.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1109, cr_loss=0.3506, attn_decoder_loss=0.238, over 5809586.42 frames. ], batch size: 69, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:19:02,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=765800.0, ans=0.2 +2024-09-19 23:19:03,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=765800.0, ans=0.125 +2024-09-19 23:19:16,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=765840.0, ans=15.0 +2024-09-19 23:19:23,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=765880.0, ans=0.0 +2024-09-19 23:19:29,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=765880.0, ans=0.1 +2024-09-19 23:19:32,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=765880.0, ans=0.0 +2024-09-19 23:19:36,335 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.462e+01 9.127e+01 9.642e+01 1.340e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 23:19:56,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=765960.0, ans=0.1 +2024-09-19 23:20:02,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=765960.0, ans=0.125 +2024-09-19 23:20:09,476 INFO [train.py:1198] (0/2) Epoch 43, batch 1450, loss[loss=0.2498, ctc_loss=0.1259, cr_loss=0.3849, attn_decoder_loss=0.255, over 29464.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1107, cr_loss=0.3508, attn_decoder_loss=0.2382, over 5806186.72 frames. ], batch size: 94, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:20:21,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=766000.0, ans=0.0 +2024-09-19 23:20:23,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=766040.0, ans=0.1 +2024-09-19 23:20:33,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=766040.0, ans=0.0 +2024-09-19 23:20:39,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=766080.0, ans=0.2 +2024-09-19 23:20:44,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=766080.0, ans=0.025 +2024-09-19 23:20:47,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=766080.0, ans=0.0 +2024-09-19 23:20:48,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=766080.0, ans=0.125 +2024-09-19 23:21:00,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.25 vs. limit=6.0 +2024-09-19 23:21:26,807 INFO [train.py:1198] (0/2) Epoch 43, batch 1500, loss[loss=0.2406, ctc_loss=0.1156, cr_loss=0.3652, attn_decoder_loss=0.2464, over 29625.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1108, cr_loss=0.3507, attn_decoder_loss=0.2384, over 5806562.00 frames. ], batch size: 86, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:21:30,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=766200.0, ans=0.125 +2024-09-19 23:21:40,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=766240.0, ans=0.2 +2024-09-19 23:21:51,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=766240.0, ans=0.125 +2024-09-19 23:22:09,443 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.081e+01 8.602e+01 9.131e+01 9.560e+01 1.543e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-19 23:22:23,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=766320.0, ans=0.0 +2024-09-19 23:22:29,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.41 vs. limit=22.5 +2024-09-19 23:22:45,491 INFO [train.py:1198] (0/2) Epoch 43, batch 1550, loss[loss=0.2446, ctc_loss=0.1254, cr_loss=0.3856, attn_decoder_loss=0.2493, over 29518.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1109, cr_loss=0.3507, attn_decoder_loss=0.2384, over 5782339.06 frames. ], batch size: 90, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:23:36,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=766520.0, ans=0.0 +2024-09-19 23:23:42,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=766520.0, ans=0.1 +2024-09-19 23:23:55,087 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.34 vs. limit=22.5 +2024-09-19 23:24:00,262 INFO [train.py:1198] (0/2) Epoch 43, batch 1600, loss[loss=0.2363, ctc_loss=0.1056, cr_loss=0.3287, attn_decoder_loss=0.2435, over 29670.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1109, cr_loss=0.3506, attn_decoder_loss=0.2381, over 5764558.48 frames. ], batch size: 85, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:24:30,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=766680.0, ans=0.0 +2024-09-19 23:24:44,021 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.796e+01 8.578e+01 9.126e+01 9.935e+01 1.775e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 23:24:45,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=766720.0, ans=0.0 +2024-09-19 23:24:49,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=766720.0, ans=0.125 +2024-09-19 23:24:51,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.53 vs. limit=12.0 +2024-09-19 23:25:17,716 INFO [train.py:1198] (0/2) Epoch 43, batch 1650, loss[loss=0.2445, ctc_loss=0.1184, cr_loss=0.3613, attn_decoder_loss=0.2504, over 29738.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.111, cr_loss=0.3504, attn_decoder_loss=0.2379, over 5756783.66 frames. ], batch size: 89, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:25:24,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=766800.0, ans=0.1 +2024-09-19 23:25:34,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=766840.0, ans=0.2 +2024-09-19 23:25:36,837 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.40 vs. limit=22.5 +2024-09-19 23:25:41,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=766840.0, ans=0.0 +2024-09-19 23:25:52,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=766880.0, ans=0.0 +2024-09-19 23:26:21,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=766960.0, ans=0.125 +2024-09-19 23:26:34,723 INFO [train.py:1198] (0/2) Epoch 43, batch 1700, loss[loss=0.2114, ctc_loss=0.09423, cr_loss=0.3103, attn_decoder_loss=0.2175, over 29616.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1103, cr_loss=0.3488, attn_decoder_loss=0.2376, over 5779001.63 frames. ], batch size: 69, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:26:39,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=767000.0, ans=0.1 +2024-09-19 23:26:39,635 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=767000.0, ans=0.0 +2024-09-19 23:26:47,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=767000.0, ans=0.125 +2024-09-19 23:26:49,743 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.23 vs. limit=15.0 +2024-09-19 23:26:59,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=767040.0, ans=0.2 +2024-09-19 23:27:18,348 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.484e+01 9.017e+01 9.514e+01 1.146e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-19 23:27:44,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=767160.0, ans=0.0 +2024-09-19 23:27:50,588 INFO [train.py:1198] (0/2) Epoch 43, batch 1750, loss[loss=0.2125, ctc_loss=0.09853, cr_loss=0.3231, attn_decoder_loss=0.218, over 29332.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1101, cr_loss=0.3486, attn_decoder_loss=0.2373, over 5788026.91 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:27:59,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=767200.0, ans=0.125 +2024-09-19 23:28:10,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=767240.0, ans=0.125 +2024-09-19 23:28:24,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=767280.0, ans=0.1 +2024-09-19 23:28:47,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=767320.0, ans=0.125 +2024-09-19 23:29:07,603 INFO [train.py:1198] (0/2) Epoch 43, batch 1800, loss[loss=0.229, ctc_loss=0.1011, cr_loss=0.3269, attn_decoder_loss=0.2359, over 29679.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1101, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5790689.97 frames. ], batch size: 83, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:29:11,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.26 vs. limit=6.0 +2024-09-19 23:29:48,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=767480.0, ans=0.1 +2024-09-19 23:29:50,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=767480.0, ans=0.0 +2024-09-19 23:29:51,300 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.169e+01 8.348e+01 8.939e+01 9.600e+01 1.459e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 23:29:53,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=767520.0, ans=0.025 +2024-09-19 23:30:05,231 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=767520.0, ans=0.0 +2024-09-19 23:30:17,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=767560.0, ans=0.125 +2024-09-19 23:30:20,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=767560.0, ans=0.125 +2024-09-19 23:30:23,259 INFO [train.py:1198] (0/2) Epoch 43, batch 1850, loss[loss=0.23, ctc_loss=0.1009, cr_loss=0.3018, attn_decoder_loss=0.2376, over 29639.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1102, cr_loss=0.3489, attn_decoder_loss=0.2373, over 5795241.88 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:30:31,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=767600.0, ans=0.0 +2024-09-19 23:30:39,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=767640.0, ans=0.0 +2024-09-19 23:30:39,501 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.46 vs. limit=22.5 +2024-09-19 23:31:35,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.19 vs. limit=15.0 +2024-09-19 23:31:40,271 INFO [train.py:1198] (0/2) Epoch 43, batch 1900, loss[loss=0.2452, ctc_loss=0.116, cr_loss=0.349, attn_decoder_loss=0.2518, over 29710.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1105, cr_loss=0.3494, attn_decoder_loss=0.2379, over 5803330.94 frames. ], batch size: 89, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:31:49,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=767800.0, ans=0.0 +2024-09-19 23:31:55,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=767840.0, ans=0.125 +2024-09-19 23:32:09,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=767880.0, ans=0.2 +2024-09-19 23:32:13,181 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.17 vs. limit=6.0 +2024-09-19 23:32:24,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 8.779e+01 9.176e+01 9.742e+01 1.549e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-19 23:32:30,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=767920.0, ans=0.125 +2024-09-19 23:32:45,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=767960.0, ans=0.125 +2024-09-19 23:32:49,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=767960.0, ans=0.125 +2024-09-19 23:32:56,691 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-192000.pt +2024-09-19 23:33:04,946 INFO [train.py:1198] (0/2) Epoch 43, batch 1950, loss[loss=0.2188, ctc_loss=0.1006, cr_loss=0.3224, attn_decoder_loss=0.2248, over 29471.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1111, cr_loss=0.3512, attn_decoder_loss=0.239, over 5818038.45 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:33:05,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=768000.0, ans=0.125 +2024-09-19 23:33:16,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=768000.0, ans=0.0 +2024-09-19 23:33:26,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=768040.0, ans=0.1 +2024-09-19 23:34:07,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=768160.0, ans=0.2 +2024-09-19 23:34:18,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.91 vs. limit=15.0 +2024-09-19 23:34:20,478 INFO [train.py:1198] (0/2) Epoch 43, batch 2000, loss[loss=0.2135, ctc_loss=0.105, cr_loss=0.3483, attn_decoder_loss=0.2178, over 29336.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1117, cr_loss=0.3518, attn_decoder_loss=0.2393, over 5796746.78 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:34:27,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=768200.0, ans=0.125 +2024-09-19 23:34:34,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.08 vs. limit=12.0 +2024-09-19 23:34:36,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=768240.0, ans=0.125 +2024-09-19 23:35:07,942 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.936e+01 8.662e+01 9.256e+01 9.828e+01 2.553e+02, threshold=1.851e+02, percent-clipped=3.0 +2024-09-19 23:35:17,309 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=768320.0, ans=0.0 +2024-09-19 23:35:38,235 INFO [train.py:1198] (0/2) Epoch 43, batch 2050, loss[loss=0.2115, ctc_loss=0.09723, cr_loss=0.3171, attn_decoder_loss=0.2172, over 29442.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1111, cr_loss=0.3502, attn_decoder_loss=0.2383, over 5789279.72 frames. ], batch size: 70, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:36:09,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.23 vs. limit=15.0 +2024-09-19 23:36:21,344 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.04 vs. limit=6.0 +2024-09-19 23:36:26,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=768520.0, ans=0.1 +2024-09-19 23:36:27,094 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.83 vs. limit=15.0 +2024-09-19 23:36:43,680 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=768560.0, ans=0.1 +2024-09-19 23:36:43,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=768560.0, ans=0.125 +2024-09-19 23:36:45,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=768560.0, ans=0.0 +2024-09-19 23:36:55,473 INFO [train.py:1198] (0/2) Epoch 43, batch 2100, loss[loss=0.235, ctc_loss=0.1028, cr_loss=0.3492, attn_decoder_loss=0.242, over 29762.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1107, cr_loss=0.3498, attn_decoder_loss=0.2379, over 5800944.88 frames. ], batch size: 81, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:36:57,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=768600.0, ans=0.2 +2024-09-19 23:37:03,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=768600.0, ans=0.125 +2024-09-19 23:37:10,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=768640.0, ans=0.2 +2024-09-19 23:37:15,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=768640.0, ans=0.125 +2024-09-19 23:37:16,930 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:37:24,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.26 vs. limit=15.0 +2024-09-19 23:37:32,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.67 vs. limit=15.0 +2024-09-19 23:37:39,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=768720.0, ans=0.1 +2024-09-19 23:37:41,776 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.175e+01 8.395e+01 8.911e+01 9.448e+01 1.160e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 23:37:53,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=768760.0, ans=10.0 +2024-09-19 23:37:56,296 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.75 vs. limit=6.0 +2024-09-19 23:38:10,685 INFO [train.py:1198] (0/2) Epoch 43, batch 2150, loss[loss=0.2334, ctc_loss=0.1167, cr_loss=0.3579, attn_decoder_loss=0.2384, over 29456.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1106, cr_loss=0.3495, attn_decoder_loss=0.2375, over 5815829.34 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:38:18,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=768800.0, ans=0.0 +2024-09-19 23:38:40,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=768840.0, ans=0.0 +2024-09-19 23:39:04,554 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:39:28,385 INFO [train.py:1198] (0/2) Epoch 43, batch 2200, loss[loss=0.2412, ctc_loss=0.1162, cr_loss=0.3659, attn_decoder_loss=0.2469, over 29625.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1105, cr_loss=0.3494, attn_decoder_loss=0.2375, over 5812053.61 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:39:40,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=769000.0, ans=0.025 +2024-09-19 23:39:45,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.73 vs. limit=12.0 +2024-09-19 23:39:55,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=769040.0, ans=0.125 +2024-09-19 23:40:10,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=769080.0, ans=0.125 +2024-09-19 23:40:14,955 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.529e+01 9.034e+01 9.598e+01 1.063e+03, threshold=1.807e+02, percent-clipped=3.0 +2024-09-19 23:40:46,046 INFO [train.py:1198] (0/2) Epoch 43, batch 2250, loss[loss=0.239, ctc_loss=0.1114, cr_loss=0.3491, attn_decoder_loss=0.2454, over 29700.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1104, cr_loss=0.3492, attn_decoder_loss=0.2376, over 5812170.43 frames. ], batch size: 82, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:40:47,953 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:41:10,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=769240.0, ans=0.2 +2024-09-19 23:41:11,239 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.45 vs. limit=22.5 +2024-09-19 23:41:14,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=769280.0, ans=0.1 +2024-09-19 23:41:20,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.50 vs. limit=15.0 +2024-09-19 23:41:46,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=769360.0, ans=0.0 +2024-09-19 23:42:01,235 INFO [train.py:1198] (0/2) Epoch 43, batch 2300, loss[loss=0.2034, ctc_loss=0.08519, cr_loss=0.2863, attn_decoder_loss=0.2102, over 29308.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1099, cr_loss=0.3478, attn_decoder_loss=0.2368, over 5800405.72 frames. ], batch size: 71, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:42:29,272 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.91 vs. limit=15.0 +2024-09-19 23:42:49,953 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.425e+01 9.007e+01 9.590e+01 1.483e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 23:42:54,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=769520.0, ans=0.1 +2024-09-19 23:43:12,937 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=769560.0, ans=0.1 +2024-09-19 23:43:19,011 INFO [train.py:1198] (0/2) Epoch 43, batch 2350, loss[loss=0.2377, ctc_loss=0.1082, cr_loss=0.3338, attn_decoder_loss=0.2447, over 29690.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1099, cr_loss=0.3479, attn_decoder_loss=0.237, over 5805140.13 frames. ], batch size: 83, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:43:32,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=769640.0, ans=0.1 +2024-09-19 23:44:14,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=769720.0, ans=0.5 +2024-09-19 23:44:23,311 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.57 vs. limit=6.0 +2024-09-19 23:44:27,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=769760.0, ans=0.2 +2024-09-19 23:44:33,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=769760.0, ans=0.025 +2024-09-19 23:44:36,555 INFO [train.py:1198] (0/2) Epoch 43, batch 2400, loss[loss=0.2221, ctc_loss=0.103, cr_loss=0.327, attn_decoder_loss=0.228, over 29507.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1102, cr_loss=0.3488, attn_decoder_loss=0.2375, over 5808895.85 frames. ], batch size: 76, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:44:49,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.50 vs. limit=15.0 +2024-09-19 23:44:51,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=769840.0, ans=15.0 +2024-09-19 23:44:52,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.02 vs. limit=22.5 +2024-09-19 23:45:17,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=769880.0, ans=0.025 +2024-09-19 23:45:23,259 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.858e+01 8.868e+01 9.245e+01 1.005e+02 2.989e+02, threshold=1.849e+02, percent-clipped=3.0 +2024-09-19 23:45:31,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=769920.0, ans=0.0 +2024-09-19 23:45:38,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=769960.0, ans=0.125 +2024-09-19 23:45:41,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=769960.0, ans=0.1 +2024-09-19 23:45:49,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=769960.0, ans=0.0 +2024-09-19 23:45:52,121 INFO [train.py:1198] (0/2) Epoch 43, batch 2450, loss[loss=0.2326, ctc_loss=0.1128, cr_loss=0.362, attn_decoder_loss=0.2379, over 29692.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1108, cr_loss=0.3503, attn_decoder_loss=0.2382, over 5785894.37 frames. ], batch size: 82, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:45:59,987 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:46:22,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.15 vs. limit=6.0 +2024-09-19 23:46:41,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=770120.0, ans=0.1 +2024-09-19 23:47:09,633 INFO [train.py:1198] (0/2) Epoch 43, batch 2500, loss[loss=0.2421, ctc_loss=0.1162, cr_loss=0.3753, attn_decoder_loss=0.2477, over 29597.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1107, cr_loss=0.3505, attn_decoder_loss=0.2381, over 5795959.21 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:47:11,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=770200.0, ans=0.125 +2024-09-19 23:47:17,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=770200.0, ans=0.125 +2024-09-19 23:47:46,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=770280.0, ans=0.125 +2024-09-19 23:47:56,926 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.730e+01 9.095e+01 9.659e+01 1.544e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-19 23:47:58,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=770320.0, ans=0.1 +2024-09-19 23:48:06,901 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.16 vs. limit=15.0 +2024-09-19 23:48:07,242 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.48 vs. limit=15.0 +2024-09-19 23:48:28,168 INFO [train.py:1198] (0/2) Epoch 43, batch 2550, loss[loss=0.2064, ctc_loss=0.09794, cr_loss=0.3185, attn_decoder_loss=0.2114, over 29369.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1107, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5796930.55 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:48:28,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=770400.0, ans=0.05 +2024-09-19 23:48:29,468 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.43 vs. limit=5.0 +2024-09-19 23:48:50,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=770440.0, ans=0.125 +2024-09-19 23:49:04,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=770480.0, ans=0.125 +2024-09-19 23:49:04,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.38 vs. limit=12.0 +2024-09-19 23:49:18,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=770520.0, ans=0.125 +2024-09-19 23:49:43,748 INFO [train.py:1198] (0/2) Epoch 43, batch 2600, loss[loss=0.2289, ctc_loss=0.1044, cr_loss=0.3355, attn_decoder_loss=0.2353, over 29458.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1108, cr_loss=0.3503, attn_decoder_loss=0.2384, over 5794572.41 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:50:03,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=770640.0, ans=0.09899494936611666 +2024-09-19 23:50:06,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=770640.0, ans=0.125 +2024-09-19 23:50:16,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=770680.0, ans=0.125 +2024-09-19 23:50:17,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=770680.0, ans=0.125 +2024-09-19 23:50:19,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=770680.0, ans=0.0 +2024-09-19 23:50:32,557 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.619e+01 9.177e+01 9.694e+01 1.714e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-19 23:50:33,190 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.62 vs. limit=15.0 +2024-09-19 23:50:37,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.73 vs. limit=12.0 +2024-09-19 23:50:44,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=770760.0, ans=0.2 +2024-09-19 23:50:55,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=770760.0, ans=0.125 +2024-09-19 23:51:01,472 INFO [train.py:1198] (0/2) Epoch 43, batch 2650, loss[loss=0.2409, ctc_loss=0.104, cr_loss=0.3345, attn_decoder_loss=0.2486, over 29313.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1111, cr_loss=0.351, attn_decoder_loss=0.2388, over 5800930.99 frames. ], batch size: 100, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:51:10,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=770800.0, ans=0.09899494936611666 +2024-09-19 23:51:12,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=770800.0, ans=0.125 +2024-09-19 23:51:15,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=770840.0, ans=0.1 +2024-09-19 23:51:18,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=770840.0, ans=0.0 +2024-09-19 23:51:27,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=770840.0, ans=0.1 +2024-09-19 23:51:27,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=770840.0, ans=0.125 +2024-09-19 23:51:31,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=770880.0, ans=0.0 +2024-09-19 23:51:41,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=770880.0, ans=0.125 +2024-09-19 23:52:18,210 INFO [train.py:1198] (0/2) Epoch 43, batch 2700, loss[loss=0.2426, ctc_loss=0.1136, cr_loss=0.3686, attn_decoder_loss=0.2488, over 29529.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1113, cr_loss=0.3514, attn_decoder_loss=0.2391, over 5797342.03 frames. ], batch size: 87, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:52:26,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=771000.0, ans=0.0 +2024-09-19 23:52:30,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=771000.0, ans=0.125 +2024-09-19 23:52:32,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=771040.0, ans=0.125 +2024-09-19 23:52:35,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=771040.0, ans=0.0 +2024-09-19 23:52:54,028 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.71 vs. limit=15.0 +2024-09-19 23:53:05,377 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.492e+01 9.068e+01 9.521e+01 1.768e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 23:53:23,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=771160.0, ans=0.5 +2024-09-19 23:53:34,516 INFO [train.py:1198] (0/2) Epoch 43, batch 2750, loss[loss=0.2292, ctc_loss=0.1084, cr_loss=0.3468, attn_decoder_loss=0.2349, over 29518.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.2383, over 5796011.02 frames. ], batch size: 75, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:53:44,408 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.83 vs. limit=22.5 +2024-09-19 23:53:47,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=771200.0, ans=15.0 +2024-09-19 23:53:52,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=771240.0, ans=0.07 +2024-09-19 23:54:05,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=771280.0, ans=0.0 +2024-09-19 23:54:18,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=771280.0, ans=0.0 +2024-09-19 23:54:38,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=771360.0, ans=0.1 +2024-09-19 23:54:40,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=771360.0, ans=0.125 +2024-09-19 23:54:40,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=771360.0, ans=0.2 +2024-09-19 23:54:52,209 INFO [train.py:1198] (0/2) Epoch 43, batch 2800, loss[loss=0.2513, ctc_loss=0.1373, cr_loss=0.3869, attn_decoder_loss=0.2553, over 20270.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1112, cr_loss=0.3512, attn_decoder_loss=0.2385, over 5776065.65 frames. ], batch size: 209, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:54:54,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=771400.0, ans=0.2 +2024-09-19 23:54:59,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=771400.0, ans=0.05 +2024-09-19 23:55:01,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=771400.0, ans=0.2 +2024-09-19 23:55:25,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=771480.0, ans=0.0 +2024-09-19 23:55:30,125 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=771480.0, ans=0.125 +2024-09-19 23:55:30,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=771480.0, ans=0.1 +2024-09-19 23:55:34,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=771480.0, ans=0.125 +2024-09-19 23:55:40,240 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.712e+01 9.201e+01 9.753e+01 5.037e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 23:55:58,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=771560.0, ans=0.125 +2024-09-19 23:56:08,851 INFO [train.py:1198] (0/2) Epoch 43, batch 2850, loss[loss=0.2242, ctc_loss=0.1052, cr_loss=0.3417, attn_decoder_loss=0.2298, over 29479.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1118, cr_loss=0.352, attn_decoder_loss=0.2391, over 5761160.95 frames. ], batch size: 77, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:56:09,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=771600.0, ans=0.5 +2024-09-19 23:56:12,055 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=771600.0, ans=0.025 +2024-09-19 23:56:19,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=771600.0, ans=0.125 +2024-09-19 23:56:24,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=771640.0, ans=0.2 +2024-09-19 23:56:41,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=771680.0, ans=0.125 +2024-09-19 23:56:41,779 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.21 vs. limit=15.0 +2024-09-19 23:56:51,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=771680.0, ans=0.0 +2024-09-19 23:57:05,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=771720.0, ans=0.1 +2024-09-19 23:57:06,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=771720.0, ans=0.125 +2024-09-19 23:57:14,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=771760.0, ans=0.0 +2024-09-19 23:57:16,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.60 vs. limit=15.0 +2024-09-19 23:57:24,707 INFO [train.py:1198] (0/2) Epoch 43, batch 2900, loss[loss=0.2267, ctc_loss=0.1033, cr_loss=0.3332, attn_decoder_loss=0.233, over 29419.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1118, cr_loss=0.353, attn_decoder_loss=0.2399, over 5786662.46 frames. ], batch size: 79, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:57:28,366 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.91 vs. limit=6.0 +2024-09-19 23:57:30,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=771800.0, ans=0.95 +2024-09-19 23:57:39,080 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.36 vs. limit=15.0 +2024-09-19 23:58:03,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=771880.0, ans=0.95 +2024-09-19 23:58:14,811 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.451e+01 8.980e+01 9.523e+01 1.534e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 23:58:24,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=771920.0, ans=0.125 +2024-09-19 23:58:27,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=771960.0, ans=0.025 +2024-09-19 23:58:33,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=771960.0, ans=0.2 +2024-09-19 23:58:42,029 INFO [train.py:1198] (0/2) Epoch 43, batch 2950, loss[loss=0.2276, ctc_loss=0.1106, cr_loss=0.3541, attn_decoder_loss=0.2327, over 29511.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1106, cr_loss=0.3504, attn_decoder_loss=0.2384, over 5782433.07 frames. ], batch size: 75, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:58:51,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=772000.0, ans=0.125 +2024-09-19 23:58:54,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=772000.0, ans=0.125 +2024-09-19 23:59:05,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=772040.0, ans=0.125 +2024-09-19 23:59:17,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=772080.0, ans=0.125 +2024-09-19 23:59:30,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=772120.0, ans=0.125 +2024-09-19 23:59:36,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=772120.0, ans=0.0 +2024-09-19 23:59:51,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.67 vs. limit=15.0 +2024-09-19 23:59:59,897 INFO [train.py:1198] (0/2) Epoch 43, batch 3000, loss[loss=0.2337, ctc_loss=0.1098, cr_loss=0.3518, attn_decoder_loss=0.2396, over 29773.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.11, cr_loss=0.3494, attn_decoder_loss=0.238, over 5783618.13 frames. ], batch size: 81, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:59:59,897 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 00:00:10,486 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.7358, 5.5045, 5.3153, 4.9204], device='cuda:0') +2024-09-20 00:00:10,937 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([2.7649, 3.2162, 3.3404, 3.4658], device='cuda:0') +2024-09-20 00:00:18,198 INFO [train.py:1230] (0/2) Epoch 43, validation: loss=0.2118, ctc_loss=0.03672, cr_loss=6.551e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-20 00:00:18,199 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 00:00:35,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=772240.0, ans=0.07 +2024-09-20 00:00:35,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=772240.0, ans=0.0 +2024-09-20 00:00:42,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.82 vs. limit=15.0 +2024-09-20 00:01:00,368 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.64 vs. limit=15.0 +2024-09-20 00:01:06,794 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.607e+01 9.085e+01 9.850e+01 2.122e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-20 00:01:08,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=772320.0, ans=0.1 +2024-09-20 00:01:22,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=772360.0, ans=0.1 +2024-09-20 00:01:28,204 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:01:31,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=772360.0, ans=0.0 +2024-09-20 00:01:34,008 INFO [train.py:1198] (0/2) Epoch 43, batch 3050, loss[loss=0.2206, ctc_loss=0.1072, cr_loss=0.3534, attn_decoder_loss=0.2254, over 29523.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1105, cr_loss=0.3505, attn_decoder_loss=0.2386, over 5777923.30 frames. ], batch size: 76, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:01:48,674 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=772400.0, ans=0.0 +2024-09-20 00:01:59,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=772440.0, ans=0.2 +2024-09-20 00:02:11,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-20 00:02:14,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=772480.0, ans=0.125 +2024-09-20 00:02:17,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=772480.0, ans=15.0 +2024-09-20 00:02:29,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=772520.0, ans=0.1 +2024-09-20 00:02:33,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=772520.0, ans=0.015 +2024-09-20 00:02:42,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=772560.0, ans=0.0 +2024-09-20 00:02:51,506 INFO [train.py:1198] (0/2) Epoch 43, batch 3100, loss[loss=0.2508, ctc_loss=0.125, cr_loss=0.3784, attn_decoder_loss=0.2563, over 29322.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1107, cr_loss=0.351, attn_decoder_loss=0.2386, over 5777143.46 frames. ], batch size: 100, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:02:55,470 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.56 vs. limit=22.5 +2024-09-20 00:03:41,807 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.560e+01 8.944e+01 9.719e+01 1.343e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 00:04:09,809 INFO [train.py:1198] (0/2) Epoch 43, batch 3150, loss[loss=0.2482, ctc_loss=0.1197, cr_loss=0.3521, attn_decoder_loss=0.2547, over 28850.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1105, cr_loss=0.3502, attn_decoder_loss=0.2385, over 5784555.17 frames. ], batch size: 104, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:04:15,321 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.93 vs. limit=15.0 +2024-09-20 00:04:49,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=772880.0, ans=0.125 +2024-09-20 00:04:54,443 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-20 00:05:18,721 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.36 vs. limit=15.0 +2024-09-20 00:05:25,183 INFO [train.py:1198] (0/2) Epoch 43, batch 3200, loss[loss=0.2262, ctc_loss=0.104, cr_loss=0.3337, attn_decoder_loss=0.2323, over 29408.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1103, cr_loss=0.3498, attn_decoder_loss=0.2381, over 5795604.45 frames. ], batch size: 79, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:05:25,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=773000.0, ans=0.1 +2024-09-20 00:05:42,725 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.06 vs. limit=15.0 +2024-09-20 00:05:53,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=773040.0, ans=0.0 +2024-09-20 00:06:11,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=773120.0, ans=0.025 +2024-09-20 00:06:17,443 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.172e+01 8.459e+01 9.068e+01 9.712e+01 1.068e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 00:06:18,417 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.89 vs. limit=15.0 +2024-09-20 00:06:29,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=773160.0, ans=0.1 +2024-09-20 00:06:43,189 INFO [train.py:1198] (0/2) Epoch 43, batch 3250, loss[loss=0.2429, ctc_loss=0.1185, cr_loss=0.3657, attn_decoder_loss=0.2486, over 29694.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1109, cr_loss=0.3514, attn_decoder_loss=0.2388, over 5801269.20 frames. ], batch size: 84, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:06:56,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.01 vs. limit=22.5 +2024-09-20 00:07:03,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=773240.0, ans=0.0 +2024-09-20 00:07:04,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=773240.0, ans=0.1 +2024-09-20 00:07:26,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=773280.0, ans=0.125 +2024-09-20 00:07:46,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=773360.0, ans=0.125 +2024-09-20 00:07:55,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=773360.0, ans=0.0 +2024-09-20 00:08:00,882 INFO [train.py:1198] (0/2) Epoch 43, batch 3300, loss[loss=0.2404, ctc_loss=0.1167, cr_loss=0.3724, attn_decoder_loss=0.2458, over 28221.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1106, cr_loss=0.3505, attn_decoder_loss=0.2376, over 5798768.37 frames. ], batch size: 111, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:08:49,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=773520.0, ans=0.125 +2024-09-20 00:08:50,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=773520.0, ans=0.125 +2024-09-20 00:08:52,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.624e+01 9.248e+01 9.741e+01 2.844e+02, threshold=1.850e+02, percent-clipped=2.0 +2024-09-20 00:08:55,987 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.53 vs. limit=12.0 +2024-09-20 00:09:11,743 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:09:15,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=773600.0, ans=0.125 +2024-09-20 00:09:16,242 INFO [train.py:1198] (0/2) Epoch 43, batch 3350, loss[loss=0.2461, ctc_loss=0.1194, cr_loss=0.3619, attn_decoder_loss=0.2521, over 28847.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1108, cr_loss=0.3503, attn_decoder_loss=0.238, over 5775840.20 frames. ], batch size: 104, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:09:18,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=773600.0, ans=0.0 +2024-09-20 00:09:23,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.31 vs. limit=22.5 +2024-09-20 00:10:05,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=773720.0, ans=0.1 +2024-09-20 00:10:14,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=773720.0, ans=0.07 +2024-09-20 00:10:34,063 INFO [train.py:1198] (0/2) Epoch 43, batch 3400, loss[loss=0.2127, ctc_loss=0.104, cr_loss=0.34, attn_decoder_loss=0.2172, over 29396.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1113, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5767080.38 frames. ], batch size: 67, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:10:37,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=773800.0, ans=0.025 +2024-09-20 00:10:41,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.53 vs. limit=15.0 +2024-09-20 00:10:43,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=773800.0, ans=0.125 +2024-09-20 00:11:01,016 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-20 00:11:12,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=773880.0, ans=0.125 +2024-09-20 00:11:20,247 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:11:26,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=773920.0, ans=0.125 +2024-09-20 00:11:27,406 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.527e+01 9.240e+01 9.845e+01 1.909e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-20 00:11:30,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=773920.0, ans=0.2 +2024-09-20 00:11:39,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=773960.0, ans=0.2 +2024-09-20 00:11:43,231 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=12.0 +2024-09-20 00:11:47,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=773960.0, ans=0.2 +2024-09-20 00:11:51,437 INFO [train.py:1198] (0/2) Epoch 43, batch 3450, loss[loss=0.2399, ctc_loss=0.1121, cr_loss=0.3625, attn_decoder_loss=0.246, over 28345.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1109, cr_loss=0.3503, attn_decoder_loss=0.2383, over 5775069.86 frames. ], batch size: 111, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:12:25,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=774080.0, ans=0.125 +2024-09-20 00:12:52,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=774160.0, ans=0.125 +2024-09-20 00:12:58,941 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.04 vs. limit=22.5 +2024-09-20 00:12:59,806 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=774160.0, ans=0.0 +2024-09-20 00:13:02,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=774160.0, ans=0.0 +2024-09-20 00:13:06,966 INFO [train.py:1198] (0/2) Epoch 43, batch 3500, loss[loss=0.2111, ctc_loss=0.0888, cr_loss=0.3109, attn_decoder_loss=0.2178, over 29349.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3492, attn_decoder_loss=0.2377, over 5776399.36 frames. ], batch size: 71, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:13:10,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=774200.0, ans=0.0 +2024-09-20 00:13:20,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=774240.0, ans=0.025 +2024-09-20 00:13:22,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=774240.0, ans=0.125 +2024-09-20 00:13:40,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=774280.0, ans=0.025 +2024-09-20 00:13:42,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=774280.0, ans=0.025 +2024-09-20 00:13:58,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=774320.0, ans=0.025 +2024-09-20 00:13:59,839 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.916e+01 8.502e+01 8.947e+01 9.671e+01 2.846e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-20 00:14:04,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=774320.0, ans=0.125 +2024-09-20 00:14:20,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=774360.0, ans=0.125 +2024-09-20 00:14:23,853 INFO [train.py:1198] (0/2) Epoch 43, batch 3550, loss[loss=0.2385, ctc_loss=0.1047, cr_loss=0.3345, attn_decoder_loss=0.2459, over 29719.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1105, cr_loss=0.3493, attn_decoder_loss=0.2378, over 5784398.12 frames. ], batch size: 89, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:14:34,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=774400.0, ans=0.0 +2024-09-20 00:14:50,631 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=774440.0, ans=0.125 +2024-09-20 00:14:52,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=774480.0, ans=0.125 +2024-09-20 00:15:05,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=774480.0, ans=0.09899494936611666 +2024-09-20 00:15:36,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=774600.0, ans=0.125 +2024-09-20 00:15:37,574 INFO [train.py:1198] (0/2) Epoch 43, batch 3600, loss[loss=0.2209, ctc_loss=0.103, cr_loss=0.3453, attn_decoder_loss=0.2264, over 29516.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1111, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5793736.97 frames. ], batch size: 77, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:15:46,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=774600.0, ans=0.125 +2024-09-20 00:15:47,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.30 vs. limit=12.0 +2024-09-20 00:15:55,185 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.85 vs. limit=22.5 +2024-09-20 00:16:20,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=774680.0, ans=0.1 +2024-09-20 00:16:25,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=774720.0, ans=0.125 +2024-09-20 00:16:30,157 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.541e+01 9.175e+01 9.569e+01 2.464e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 00:16:39,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=774760.0, ans=0.125 +2024-09-20 00:16:42,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=774760.0, ans=0.125 +2024-09-20 00:16:51,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=774760.0, ans=0.0 +2024-09-20 00:16:52,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=774800.0, ans=0.2 +2024-09-20 00:16:54,084 INFO [train.py:1198] (0/2) Epoch 43, batch 3650, loss[loss=0.2429, ctc_loss=0.1235, cr_loss=0.3826, attn_decoder_loss=0.2476, over 29513.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1104, cr_loss=0.35, attn_decoder_loss=0.2376, over 5794418.80 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:17:02,296 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.03 vs. limit=15.0 +2024-09-20 00:17:43,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=774920.0, ans=0.125 +2024-09-20 00:17:49,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=774920.0, ans=0.0 +2024-09-20 00:18:04,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=774960.0, ans=10.0 +2024-09-20 00:18:06,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.40 vs. limit=15.0 +2024-09-20 00:18:08,750 INFO [train.py:1198] (0/2) Epoch 43, batch 3700, loss[loss=0.2352, ctc_loss=0.1161, cr_loss=0.371, attn_decoder_loss=0.2402, over 29705.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1103, cr_loss=0.3501, attn_decoder_loss=0.2378, over 5804658.18 frames. ], batch size: 84, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:18:29,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=775040.0, ans=0.025 +2024-09-20 00:18:59,090 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.832e+01 8.457e+01 9.128e+01 9.477e+01 6.609e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 00:19:23,197 INFO [train.py:1198] (0/2) Epoch 43, batch 3750, loss[loss=0.1992, ctc_loss=0.08377, cr_loss=0.2782, attn_decoder_loss=0.2059, over 29311.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1104, cr_loss=0.35, attn_decoder_loss=0.2377, over 5808015.09 frames. ], batch size: 67, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:19:35,319 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=775200.0, ans=0.5 +2024-09-20 00:19:37,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=775240.0, ans=0.125 +2024-09-20 00:19:39,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=775240.0, ans=0.125 +2024-09-20 00:19:46,565 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.85 vs. limit=15.0 +2024-09-20 00:20:15,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=775320.0, ans=0.2 +2024-09-20 00:20:39,071 INFO [train.py:1198] (0/2) Epoch 43, batch 3800, loss[loss=0.2408, ctc_loss=0.1098, cr_loss=0.3467, attn_decoder_loss=0.2477, over 29631.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1103, cr_loss=0.3498, attn_decoder_loss=0.2374, over 5797288.16 frames. ], batch size: 86, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:20:53,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.59 vs. limit=10.0 +2024-09-20 00:20:57,730 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.91 vs. limit=15.0 +2024-09-20 00:21:10,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.48 vs. limit=15.0 +2024-09-20 00:21:24,543 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.65 vs. limit=15.0 +2024-09-20 00:21:29,530 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.047e+01 8.559e+01 9.199e+01 9.773e+01 2.259e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-20 00:21:49,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=775560.0, ans=0.0 +2024-09-20 00:21:50,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=775560.0, ans=0.125 +2024-09-20 00:21:53,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=775600.0, ans=0.025 +2024-09-20 00:21:55,046 INFO [train.py:1198] (0/2) Epoch 43, batch 3850, loss[loss=0.2512, ctc_loss=0.1245, cr_loss=0.3763, attn_decoder_loss=0.2569, over 29245.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.11, cr_loss=0.3496, attn_decoder_loss=0.2373, over 5811011.49 frames. ], batch size: 100, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:22:00,484 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.05 vs. limit=12.0 +2024-09-20 00:22:00,583 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.13 vs. limit=12.0 +2024-09-20 00:22:07,207 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:22:20,618 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:23:09,181 INFO [train.py:1198] (0/2) Epoch 43, batch 3900, loss[loss=0.2438, ctc_loss=0.1124, cr_loss=0.362, attn_decoder_loss=0.2503, over 29627.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1104, cr_loss=0.3507, attn_decoder_loss=0.2377, over 5815995.11 frames. ], batch size: 86, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:23:36,493 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.69 vs. limit=15.0 +2024-09-20 00:23:43,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=775880.0, ans=0.125 +2024-09-20 00:23:47,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=775880.0, ans=0.2 +2024-09-20 00:23:50,843 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=775880.0, ans=0.1 +2024-09-20 00:23:59,430 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.663e+01 9.061e+01 9.537e+01 1.215e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 00:24:23,507 INFO [train.py:1198] (0/2) Epoch 43, batch 3950, loss[loss=0.2519, ctc_loss=0.1304, cr_loss=0.3895, attn_decoder_loss=0.2567, over 29536.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1103, cr_loss=0.3502, attn_decoder_loss=0.2378, over 5835287.52 frames. ], batch size: 97, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:24:34,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=776000.0, ans=0.125 +2024-09-20 00:24:52,445 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=3.92 vs. limit=15.0 +2024-09-20 00:25:04,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=776080.0, ans=0.025 +2024-09-20 00:25:10,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=776120.0, ans=0.025 +2024-09-20 00:25:13,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=776120.0, ans=0.0 +2024-09-20 00:25:15,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=776120.0, ans=0.125 +2024-09-20 00:25:38,024 INFO [train.py:1198] (0/2) Epoch 43, batch 4000, loss[loss=0.2139, ctc_loss=0.09668, cr_loss=0.3236, attn_decoder_loss=0.2197, over 29527.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1104, cr_loss=0.3502, attn_decoder_loss=0.2378, over 5812257.35 frames. ], batch size: 74, lr: 2.55e-03, grad_scale: 32.0 +2024-09-20 00:25:38,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=776200.0, ans=0.1 +2024-09-20 00:26:07,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=776280.0, ans=0.125 +2024-09-20 00:26:25,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=776320.0, ans=0.2 +2024-09-20 00:26:29,829 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.878e+01 9.363e+01 9.780e+01 3.308e+02, threshold=1.873e+02, percent-clipped=2.0 +2024-09-20 00:26:41,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=776360.0, ans=0.125 +2024-09-20 00:26:53,251 INFO [train.py:1198] (0/2) Epoch 43, batch 4050, loss[loss=0.2562, ctc_loss=0.1427, cr_loss=0.3952, attn_decoder_loss=0.26, over 19875.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1106, cr_loss=0.3506, attn_decoder_loss=0.2378, over 5795305.11 frames. ], batch size: 210, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:27:02,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=776400.0, ans=0.125 +2024-09-20 00:27:06,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=776440.0, ans=0.125 +2024-09-20 00:27:15,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=776440.0, ans=0.0 +2024-09-20 00:27:24,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=776480.0, ans=0.07 +2024-09-20 00:27:48,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 00:28:06,605 INFO [train.py:1198] (0/2) Epoch 43, batch 4100, loss[loss=0.2463, ctc_loss=0.1227, cr_loss=0.3745, attn_decoder_loss=0.2517, over 29493.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.111, cr_loss=0.351, attn_decoder_loss=0.2382, over 5790697.73 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:28:09,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=776600.0, ans=0.125 +2024-09-20 00:28:25,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=776640.0, ans=0.125 +2024-09-20 00:28:37,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=776680.0, ans=0.0 +2024-09-20 00:28:57,815 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.624e+01 9.289e+01 9.929e+01 2.714e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-20 00:29:04,675 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.78 vs. limit=22.5 +2024-09-20 00:29:20,429 INFO [train.py:1198] (0/2) Epoch 43, batch 4150, loss[loss=0.2227, ctc_loss=0.101, cr_loss=0.3304, attn_decoder_loss=0.2289, over 29506.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1108, cr_loss=0.3506, attn_decoder_loss=0.2382, over 5795861.00 frames. ], batch size: 77, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:29:27,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.19 vs. limit=12.0 +2024-09-20 00:29:31,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=776800.0, ans=0.125 +2024-09-20 00:29:51,533 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.47 vs. limit=12.0 +2024-09-20 00:30:03,273 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.53 vs. limit=15.0 +2024-09-20 00:30:10,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=776920.0, ans=0.125 +2024-09-20 00:30:36,192 INFO [train.py:1198] (0/2) Epoch 43, batch 4200, loss[loss=0.2419, ctc_loss=0.1154, cr_loss=0.3602, attn_decoder_loss=0.2479, over 29520.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1107, cr_loss=0.3508, attn_decoder_loss=0.2383, over 5797984.57 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:30:45,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=777000.0, ans=0.125 +2024-09-20 00:30:46,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=777000.0, ans=0.125 +2024-09-20 00:30:59,274 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.30 vs. limit=15.0 +2024-09-20 00:31:05,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=777080.0, ans=0.2 +2024-09-20 00:31:14,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=777080.0, ans=0.125 +2024-09-20 00:31:29,059 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.571e+01 8.984e+01 9.502e+01 1.265e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-20 00:31:30,754 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=777120.0, ans=0.0 +2024-09-20 00:31:40,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=777160.0, ans=0.125 +2024-09-20 00:31:49,477 INFO [train.py:1198] (0/2) Epoch 43, batch 4250, loss[loss=0.2057, ctc_loss=0.08499, cr_loss=0.2871, attn_decoder_loss=0.2127, over 29515.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1103, cr_loss=0.3498, attn_decoder_loss=0.2382, over 5803793.73 frames. ], batch size: 74, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:32:05,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=777240.0, ans=0.125 +2024-09-20 00:32:28,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.35 vs. limit=15.0 +2024-09-20 00:32:36,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=777320.0, ans=0.5 +2024-09-20 00:32:49,266 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.55 vs. limit=15.0 +2024-09-20 00:32:55,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=777360.0, ans=0.025 +2024-09-20 00:32:58,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=777360.0, ans=0.1 +2024-09-20 00:33:02,914 INFO [train.py:1198] (0/2) Epoch 43, batch 4300, loss[loss=0.2332, ctc_loss=0.1095, cr_loss=0.3553, attn_decoder_loss=0.2391, over 29557.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1099, cr_loss=0.3486, attn_decoder_loss=0.2383, over 5793710.60 frames. ], batch size: 87, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:33:05,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.94 vs. limit=10.0 +2024-09-20 00:33:06,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=777400.0, ans=0.125 +2024-09-20 00:33:26,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=777440.0, ans=0.07 +2024-09-20 00:33:27,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=777440.0, ans=0.125 +2024-09-20 00:33:32,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=777480.0, ans=0.035 +2024-09-20 00:33:55,857 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.44 vs. limit=15.0 +2024-09-20 00:33:57,762 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.855e+01 9.292e+01 9.899e+01 2.383e+02, threshold=1.858e+02, percent-clipped=1.0 +2024-09-20 00:33:58,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=777520.0, ans=0.125 +2024-09-20 00:34:11,631 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.62 vs. limit=22.5 +2024-09-20 00:34:17,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=777600.0, ans=0.125 +2024-09-20 00:34:18,765 INFO [train.py:1198] (0/2) Epoch 43, batch 4350, loss[loss=0.2472, ctc_loss=0.1261, cr_loss=0.4094, attn_decoder_loss=0.2515, over 29489.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1126, cr_loss=0.355, attn_decoder_loss=0.2413, over 5797524.71 frames. ], batch size: 97, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:34:23,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=777600.0, ans=0.125 +2024-09-20 00:34:29,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=777600.0, ans=0.025 +2024-09-20 00:34:52,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=777680.0, ans=0.0 +2024-09-20 00:35:11,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=777720.0, ans=0.1 +2024-09-20 00:35:29,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=777760.0, ans=0.0 +2024-09-20 00:35:31,779 INFO [train.py:1198] (0/2) Epoch 43, batch 4400, loss[loss=0.2415, ctc_loss=0.1198, cr_loss=0.38, attn_decoder_loss=0.2465, over 27417.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1141, cr_loss=0.3582, attn_decoder_loss=0.2435, over 5769488.55 frames. ], batch size: 124, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:35:49,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=777840.0, ans=0.125 +2024-09-20 00:36:07,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=777880.0, ans=0.1 +2024-09-20 00:36:21,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=777920.0, ans=0.125 +2024-09-20 00:36:23,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-20 00:36:25,884 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.138e+01 9.169e+01 9.548e+01 1.005e+02 2.703e+02, threshold=1.910e+02, percent-clipped=1.0 +2024-09-20 00:36:32,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=777960.0, ans=0.2 +2024-09-20 00:36:46,770 INFO [train.py:1198] (0/2) Epoch 43, batch 4450, loss[loss=0.2478, ctc_loss=0.1393, cr_loss=0.3912, attn_decoder_loss=0.2511, over 20309.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1175, cr_loss=0.3633, attn_decoder_loss=0.2456, over 5577179.81 frames. ], batch size: 209, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:36:47,771 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=9.16 vs. limit=12.0 +2024-09-20 00:36:48,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=778000.0, ans=0.125 +2024-09-20 00:36:53,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=778000.0, ans=0.5 +2024-09-20 00:37:06,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=778040.0, ans=0.2 +2024-09-20 00:37:08,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=778040.0, ans=0.025 +2024-09-20 00:37:18,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=778080.0, ans=0.0 +2024-09-20 00:37:47,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=778160.0, ans=0.125 +2024-09-20 00:37:47,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=778160.0, ans=0.125 +2024-09-20 00:37:56,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=778160.0, ans=0.0 +2024-09-20 00:38:01,077 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.52 vs. limit=15.0 +2024-09-20 00:38:01,715 INFO [train.py:1198] (0/2) Epoch 43, batch 4500, loss[loss=0.2425, ctc_loss=0.1266, cr_loss=0.3587, attn_decoder_loss=0.2475, over 20298.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1201, cr_loss=0.3651, attn_decoder_loss=0.2472, over 5234479.56 frames. ], batch size: 209, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:38:25,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=778240.0, ans=0.025 +2024-09-20 00:38:38,570 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-43.pt +2024-09-20 00:39:29,384 INFO [train.py:1198] (0/2) Epoch 44, batch 0, loss[loss=0.2193, ctc_loss=0.09658, cr_loss=0.3306, attn_decoder_loss=0.2256, over 29625.00 frames. ], tot_loss[loss=0.2193, ctc_loss=0.09658, cr_loss=0.3306, attn_decoder_loss=0.2256, over 29625.00 frames. ], batch size: 73, lr: 2.52e-03, grad_scale: 16.0 +2024-09-20 00:39:29,385 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 00:39:43,013 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.1837, 5.2776, 5.0190, 3.0121], device='cuda:0') +2024-09-20 00:39:47,833 INFO [train.py:1230] (0/2) Epoch 44, validation: loss=0.2131, ctc_loss=0.03639, cr_loss=8.375e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-20 00:39:47,834 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 00:39:52,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=778300.0, ans=0.125 +2024-09-20 00:40:03,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=778340.0, ans=0.125 +2024-09-20 00:40:03,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=778340.0, ans=0.07 +2024-09-20 00:40:05,916 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.560e+01 1.073e+02 1.152e+02 1.272e+02 3.214e+02, threshold=2.305e+02, percent-clipped=2.0 +2024-09-20 00:40:32,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=778420.0, ans=0.025 +2024-09-20 00:40:39,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=778420.0, ans=0.125 +2024-09-20 00:40:46,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=778420.0, ans=0.2 +2024-09-20 00:40:54,459 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.04 vs. limit=15.0 +2024-09-20 00:40:56,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=778460.0, ans=0.0 +2024-09-20 00:41:03,897 INFO [train.py:1198] (0/2) Epoch 44, batch 50, loss[loss=0.208, ctc_loss=0.09508, cr_loss=0.3109, attn_decoder_loss=0.2136, over 29419.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1129, cr_loss=0.3568, attn_decoder_loss=0.2392, over 1268656.83 frames. ], batch size: 70, lr: 2.52e-03, grad_scale: 16.0 +2024-09-20 00:41:04,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=778500.0, ans=0.0 +2024-09-20 00:41:07,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=778500.0, ans=0.04949747468305833 +2024-09-20 00:41:11,757 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=778500.0, ans=0.125 +2024-09-20 00:41:33,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=778540.0, ans=0.025 +2024-09-20 00:41:39,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=778580.0, ans=0.125 +2024-09-20 00:41:45,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=778580.0, ans=0.125 +2024-09-20 00:41:56,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=778620.0, ans=0.125 +2024-09-20 00:41:57,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=778620.0, ans=0.0 +2024-09-20 00:42:10,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=778660.0, ans=0.125 +2024-09-20 00:42:13,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=778660.0, ans=0.125 +2024-09-20 00:42:23,326 INFO [train.py:1198] (0/2) Epoch 44, batch 100, loss[loss=0.2305, ctc_loss=0.1122, cr_loss=0.3583, attn_decoder_loss=0.2357, over 29543.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1143, cr_loss=0.3594, attn_decoder_loss=0.2413, over 2254962.01 frames. ], batch size: 76, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:42:38,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=778740.0, ans=0.125 +2024-09-20 00:42:41,355 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.747e+01 9.046e+01 9.804e+01 1.542e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-20 00:42:47,100 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.72 vs. limit=6.0 +2024-09-20 00:42:52,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=778780.0, ans=0.125 +2024-09-20 00:42:58,184 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:42:58,592 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.37 vs. limit=22.5 +2024-09-20 00:43:04,931 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.17 vs. limit=22.5 +2024-09-20 00:43:22,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=778860.0, ans=0.0 +2024-09-20 00:43:33,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=778860.0, ans=0.0 +2024-09-20 00:43:37,845 INFO [train.py:1198] (0/2) Epoch 44, batch 150, loss[loss=0.2108, ctc_loss=0.09747, cr_loss=0.3133, attn_decoder_loss=0.2164, over 29437.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.112, cr_loss=0.3547, attn_decoder_loss=0.239, over 3048979.00 frames. ], batch size: 70, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:43:45,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=778900.0, ans=0.125 +2024-09-20 00:43:54,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=778940.0, ans=0.125 +2024-09-20 00:43:54,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=778940.0, ans=0.125 +2024-09-20 00:44:17,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.47 vs. limit=15.0 +2024-09-20 00:44:18,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=778980.0, ans=0.0 +2024-09-20 00:44:52,621 INFO [train.py:1198] (0/2) Epoch 44, batch 200, loss[loss=0.248, ctc_loss=0.1178, cr_loss=0.3651, attn_decoder_loss=0.2543, over 27173.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3524, attn_decoder_loss=0.2379, over 3660229.34 frames. ], batch size: 124, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:44:57,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=779100.0, ans=0.1 +2024-09-20 00:45:10,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=779140.0, ans=0.125 +2024-09-20 00:45:12,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=779140.0, ans=0.125 +2024-09-20 00:45:13,160 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.429e+01 8.994e+01 9.673e+01 1.827e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-20 00:45:39,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=779220.0, ans=0.125 +2024-09-20 00:45:53,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=779220.0, ans=0.125 +2024-09-20 00:46:01,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=779260.0, ans=0.0 +2024-09-20 00:46:02,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=779260.0, ans=0.5 +2024-09-20 00:46:07,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.77 vs. limit=15.0 +2024-09-20 00:46:10,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=779260.0, ans=0.125 +2024-09-20 00:46:12,945 INFO [train.py:1198] (0/2) Epoch 44, batch 250, loss[loss=0.2524, ctc_loss=0.127, cr_loss=0.3788, attn_decoder_loss=0.2579, over 29237.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1106, cr_loss=0.3515, attn_decoder_loss=0.2378, over 4142737.39 frames. ], batch size: 100, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:46:28,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=779340.0, ans=0.09899494936611666 +2024-09-20 00:46:31,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=779340.0, ans=0.025 +2024-09-20 00:47:01,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=779420.0, ans=0.125 +2024-09-20 00:47:18,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.21 vs. limit=22.5 +2024-09-20 00:47:20,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=779460.0, ans=0.125 +2024-09-20 00:47:22,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=779460.0, ans=0.125 +2024-09-20 00:47:28,089 INFO [train.py:1198] (0/2) Epoch 44, batch 300, loss[loss=0.2463, ctc_loss=0.1207, cr_loss=0.3704, attn_decoder_loss=0.252, over 29531.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1107, cr_loss=0.3515, attn_decoder_loss=0.2378, over 4511243.37 frames. ], batch size: 92, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:47:29,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=779500.0, ans=0.0 +2024-09-20 00:47:44,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.90 vs. limit=12.0 +2024-09-20 00:47:47,519 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.498e+01 8.969e+01 9.392e+01 3.050e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-20 00:47:49,494 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:47:50,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=779540.0, ans=0.125 +2024-09-20 00:47:58,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=779580.0, ans=0.125 +2024-09-20 00:48:36,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_na.min_abs, batch_count=779660.0, ans=0.02 +2024-09-20 00:48:43,473 INFO [train.py:1198] (0/2) Epoch 44, batch 350, loss[loss=0.207, ctc_loss=0.08909, cr_loss=0.2935, attn_decoder_loss=0.2135, over 29341.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3524, attn_decoder_loss=0.2383, over 4796838.46 frames. ], batch size: 71, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 00:49:23,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=779780.0, ans=0.1 +2024-09-20 00:50:03,122 INFO [train.py:1198] (0/2) Epoch 44, batch 400, loss[loss=0.2291, ctc_loss=0.1098, cr_loss=0.3483, attn_decoder_loss=0.2346, over 29715.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1105, cr_loss=0.3504, attn_decoder_loss=0.2376, over 5026303.07 frames. ], batch size: 82, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:50:10,282 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.23 vs. limit=15.0 +2024-09-20 00:50:18,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=779940.0, ans=10.0 +2024-09-20 00:50:22,886 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.675e+01 8.550e+01 9.066e+01 9.796e+01 2.019e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 00:50:55,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=780020.0, ans=0.1 +2024-09-20 00:51:02,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=780020.0, ans=0.0 +2024-09-20 00:51:06,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=780060.0, ans=0.125 +2024-09-20 00:51:06,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=780060.0, ans=0.0 +2024-09-20 00:51:14,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=780060.0, ans=0.2 +2024-09-20 00:51:14,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=780060.0, ans=0.09899494936611666 +2024-09-20 00:51:16,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=780060.0, ans=0.125 +2024-09-20 00:51:19,806 INFO [train.py:1198] (0/2) Epoch 44, batch 450, loss[loss=0.2386, ctc_loss=0.1148, cr_loss=0.3672, attn_decoder_loss=0.2442, over 29698.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1104, cr_loss=0.3497, attn_decoder_loss=0.2377, over 5188611.34 frames. ], batch size: 83, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:51:30,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=780100.0, ans=0.0 +2024-09-20 00:51:59,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=780180.0, ans=0.125 +2024-09-20 00:52:35,600 INFO [train.py:1198] (0/2) Epoch 44, batch 500, loss[loss=0.2544, ctc_loss=0.1252, cr_loss=0.3933, attn_decoder_loss=0.26, over 29438.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1097, cr_loss=0.3482, attn_decoder_loss=0.2368, over 5331645.37 frames. ], batch size: 94, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:52:36,325 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.33 vs. limit=15.0 +2024-09-20 00:52:40,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=780300.0, ans=0.2 +2024-09-20 00:52:57,329 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.573e+01 8.977e+01 9.726e+01 1.793e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-20 00:53:02,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=780340.0, ans=0.125 +2024-09-20 00:53:04,110 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.28 vs. limit=15.0 +2024-09-20 00:53:29,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=780420.0, ans=0.0 +2024-09-20 00:53:39,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=780460.0, ans=0.2 +2024-09-20 00:53:49,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=780460.0, ans=0.07 +2024-09-20 00:53:55,482 INFO [train.py:1198] (0/2) Epoch 44, batch 550, loss[loss=0.2379, ctc_loss=0.1043, cr_loss=0.3287, attn_decoder_loss=0.2454, over 28841.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1099, cr_loss=0.348, attn_decoder_loss=0.237, over 5425659.34 frames. ], batch size: 104, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:54:19,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=780540.0, ans=0.125 +2024-09-20 00:54:22,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=780540.0, ans=0.2 +2024-09-20 00:54:34,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=780580.0, ans=0.125 +2024-09-20 00:54:48,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=780620.0, ans=0.125 +2024-09-20 00:54:54,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=780660.0, ans=0.0 +2024-09-20 00:55:10,810 INFO [train.py:1198] (0/2) Epoch 44, batch 600, loss[loss=0.2438, ctc_loss=0.1187, cr_loss=0.3734, attn_decoder_loss=0.2494, over 29229.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.11, cr_loss=0.3481, attn_decoder_loss=0.2372, over 5511875.62 frames. ], batch size: 100, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:55:27,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=780740.0, ans=0.125 +2024-09-20 00:55:30,168 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.190e+01 8.511e+01 9.110e+01 9.777e+01 1.650e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 00:55:50,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=780780.0, ans=0.125 +2024-09-20 00:55:51,986 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=780780.0, ans=0.125 +2024-09-20 00:56:07,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=780820.0, ans=0.0 +2024-09-20 00:56:10,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=780860.0, ans=0.125 +2024-09-20 00:56:16,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=780860.0, ans=0.0 +2024-09-20 00:56:26,255 INFO [train.py:1198] (0/2) Epoch 44, batch 650, loss[loss=0.2385, ctc_loss=0.1169, cr_loss=0.3568, attn_decoder_loss=0.2441, over 29761.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1091, cr_loss=0.3464, attn_decoder_loss=0.2367, over 5588215.73 frames. ], batch size: 81, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:56:33,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=780900.0, ans=0.0 +2024-09-20 00:56:52,066 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.00 vs. limit=15.0 +2024-09-20 00:56:55,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=780940.0, ans=0.0 +2024-09-20 00:57:14,186 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=781020.0, ans=0.2 +2024-09-20 00:57:14,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=781020.0, ans=22.5 +2024-09-20 00:57:36,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=781060.0, ans=0.1 +2024-09-20 00:57:46,189 INFO [train.py:1198] (0/2) Epoch 44, batch 700, loss[loss=0.2263, ctc_loss=0.1073, cr_loss=0.3309, attn_decoder_loss=0.2321, over 29523.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1093, cr_loss=0.3466, attn_decoder_loss=0.2369, over 5638581.80 frames. ], batch size: 76, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:58:01,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=781140.0, ans=0.1 +2024-09-20 00:58:05,629 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.523e+01 8.995e+01 9.436e+01 1.726e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 00:58:09,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.25 vs. limit=6.0 +2024-09-20 00:58:45,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.41 vs. limit=12.0 +2024-09-20 00:58:46,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=781260.0, ans=0.125 +2024-09-20 00:58:48,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=781260.0, ans=0.125 +2024-09-20 00:59:01,550 INFO [train.py:1198] (0/2) Epoch 44, batch 750, loss[loss=0.2303, ctc_loss=0.1017, cr_loss=0.3201, attn_decoder_loss=0.2375, over 29700.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.109, cr_loss=0.3456, attn_decoder_loss=0.2365, over 5678228.47 frames. ], batch size: 82, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:59:01,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=781300.0, ans=0.0 +2024-09-20 00:59:06,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=781300.0, ans=0.0 +2024-09-20 00:59:06,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.48 vs. limit=22.5 +2024-09-20 00:59:09,531 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.72 vs. limit=12.0 +2024-09-20 00:59:23,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.92 vs. limit=12.0 +2024-09-20 00:59:37,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=781380.0, ans=0.0 +2024-09-20 00:59:37,730 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=781380.0, ans=0.125 +2024-09-20 00:59:50,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=781420.0, ans=0.0 +2024-09-20 01:00:00,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=781460.0, ans=0.05 +2024-09-20 01:00:01,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=781460.0, ans=0.125 +2024-09-20 01:00:04,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=781460.0, ans=0.125 +2024-09-20 01:00:09,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=781460.0, ans=0.125 +2024-09-20 01:00:16,836 INFO [train.py:1198] (0/2) Epoch 44, batch 800, loss[loss=0.2094, ctc_loss=0.09722, cr_loss=0.327, attn_decoder_loss=0.2146, over 29579.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1092, cr_loss=0.3464, attn_decoder_loss=0.2365, over 5707967.39 frames. ], batch size: 73, lr: 2.51e-03, grad_scale: 32.0 +2024-09-20 01:00:37,970 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.478e+01 8.977e+01 9.680e+01 1.726e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-20 01:00:53,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.81 vs. limit=6.0 +2024-09-20 01:00:53,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=781580.0, ans=0.125 +2024-09-20 01:01:17,491 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.09 vs. limit=15.0 +2024-09-20 01:01:34,654 INFO [train.py:1198] (0/2) Epoch 44, batch 850, loss[loss=0.2448, ctc_loss=0.1154, cr_loss=0.3595, attn_decoder_loss=0.2512, over 29705.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1091, cr_loss=0.3463, attn_decoder_loss=0.2366, over 5736907.03 frames. ], batch size: 89, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:01:34,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=781700.0, ans=0.0 +2024-09-20 01:01:37,894 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:01:51,234 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.11 vs. limit=15.0 +2024-09-20 01:02:13,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=781780.0, ans=0.125 +2024-09-20 01:02:16,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=781780.0, ans=0.2 +2024-09-20 01:02:19,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=781780.0, ans=0.0 +2024-09-20 01:02:19,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=781780.0, ans=0.07 +2024-09-20 01:02:42,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.86 vs. limit=6.0 +2024-09-20 01:02:52,366 INFO [train.py:1198] (0/2) Epoch 44, batch 900, loss[loss=0.2118, ctc_loss=0.09533, cr_loss=0.3258, attn_decoder_loss=0.2176, over 29602.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1095, cr_loss=0.3471, attn_decoder_loss=0.237, over 5742099.74 frames. ], batch size: 73, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:03:03,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.35 vs. limit=15.0 +2024-09-20 01:03:15,003 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.619e+01 9.074e+01 9.618e+01 1.505e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 01:03:22,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=781980.0, ans=0.125 +2024-09-20 01:03:27,480 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:03:42,997 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.64 vs. limit=15.0 +2024-09-20 01:03:46,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=782020.0, ans=0.025 +2024-09-20 01:03:48,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=782020.0, ans=0.125 +2024-09-20 01:04:07,365 INFO [train.py:1198] (0/2) Epoch 44, batch 950, loss[loss=0.206, ctc_loss=0.08885, cr_loss=0.2955, attn_decoder_loss=0.2124, over 29510.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1095, cr_loss=0.3474, attn_decoder_loss=0.2372, over 5743491.77 frames. ], batch size: 74, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:04:16,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=782100.0, ans=0.1 +2024-09-20 01:04:33,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=782140.0, ans=0.1 +2024-09-20 01:04:39,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=782180.0, ans=0.125 +2024-09-20 01:04:50,422 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=782180.0, ans=0.125 +2024-09-20 01:04:59,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=782220.0, ans=0.1 +2024-09-20 01:05:24,400 INFO [train.py:1198] (0/2) Epoch 44, batch 1000, loss[loss=0.2274, ctc_loss=0.1114, cr_loss=0.3492, attn_decoder_loss=0.2326, over 29494.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1104, cr_loss=0.349, attn_decoder_loss=0.238, over 5737837.87 frames. ], batch size: 77, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:05:30,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=782300.0, ans=0.1 +2024-09-20 01:05:49,521 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.837e+01 9.355e+01 1.004e+02 2.810e+02, threshold=1.871e+02, percent-clipped=1.0 +2024-09-20 01:06:06,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=782380.0, ans=0.0 +2024-09-20 01:06:11,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=782420.0, ans=0.0 +2024-09-20 01:06:38,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=782460.0, ans=0.125 +2024-09-20 01:06:42,614 INFO [train.py:1198] (0/2) Epoch 44, batch 1050, loss[loss=0.2421, ctc_loss=0.114, cr_loss=0.3415, attn_decoder_loss=0.2487, over 29675.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1102, cr_loss=0.3486, attn_decoder_loss=0.2374, over 5745247.80 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:06:42,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=782500.0, ans=0.125 +2024-09-20 01:06:44,453 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:06:54,173 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.43 vs. limit=12.0 +2024-09-20 01:07:01,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=782540.0, ans=0.1 +2024-09-20 01:07:02,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=782540.0, ans=0.0 +2024-09-20 01:07:04,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=782540.0, ans=0.05 +2024-09-20 01:07:07,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=782540.0, ans=0.1 +2024-09-20 01:07:44,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=782660.0, ans=0.125 +2024-09-20 01:07:54,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=782660.0, ans=0.125 +2024-09-20 01:07:58,406 INFO [train.py:1198] (0/2) Epoch 44, batch 1100, loss[loss=0.2281, ctc_loss=0.1067, cr_loss=0.3373, attn_decoder_loss=0.2341, over 29471.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1101, cr_loss=0.3486, attn_decoder_loss=0.2375, over 5757450.82 frames. ], batch size: 78, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:08:23,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.650e+01 8.487e+01 8.944e+01 9.556e+01 1.706e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 01:08:41,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=782780.0, ans=0.125 +2024-09-20 01:08:44,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=782820.0, ans=0.125 +2024-09-20 01:08:47,223 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.58 vs. limit=15.0 +2024-09-20 01:09:16,487 INFO [train.py:1198] (0/2) Epoch 44, batch 1150, loss[loss=0.2277, ctc_loss=0.1038, cr_loss=0.3335, attn_decoder_loss=0.234, over 29459.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1101, cr_loss=0.3488, attn_decoder_loss=0.2371, over 5754782.39 frames. ], batch size: 78, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:10:17,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=783060.0, ans=0.0 +2024-09-20 01:10:25,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=783060.0, ans=0.0 +2024-09-20 01:10:33,779 INFO [train.py:1198] (0/2) Epoch 44, batch 1200, loss[loss=0.2325, ctc_loss=0.1097, cr_loss=0.3464, attn_decoder_loss=0.2385, over 29674.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1106, cr_loss=0.3497, attn_decoder_loss=0.2379, over 5748006.98 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:10:37,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=783100.0, ans=0.125 +2024-09-20 01:10:56,606 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.628e+01 8.611e+01 9.178e+01 9.686e+01 1.323e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-20 01:10:58,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=783140.0, ans=0.125 +2024-09-20 01:11:36,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=783260.0, ans=0.015 +2024-09-20 01:11:36,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=783260.0, ans=0.125 +2024-09-20 01:11:44,372 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=783260.0, ans=0.0 +2024-09-20 01:11:50,054 INFO [train.py:1198] (0/2) Epoch 44, batch 1250, loss[loss=0.2485, ctc_loss=0.1291, cr_loss=0.3995, attn_decoder_loss=0.2529, over 29517.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1109, cr_loss=0.3504, attn_decoder_loss=0.2384, over 5775629.13 frames. ], batch size: 92, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:11:59,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=783300.0, ans=0.0 +2024-09-20 01:12:09,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.52 vs. limit=22.5 +2024-09-20 01:12:28,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=783380.0, ans=0.125 +2024-09-20 01:12:31,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=783380.0, ans=0.1 +2024-09-20 01:12:37,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=783420.0, ans=0.125 +2024-09-20 01:12:37,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=783420.0, ans=0.125 +2024-09-20 01:12:37,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=783420.0, ans=0.09899494936611666 +2024-09-20 01:12:43,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=783420.0, ans=0.025 +2024-09-20 01:12:43,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=783420.0, ans=0.125 +2024-09-20 01:12:44,398 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.48 vs. limit=15.0 +2024-09-20 01:12:46,040 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.58 vs. limit=15.0 +2024-09-20 01:13:02,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=783460.0, ans=0.025 +2024-09-20 01:13:05,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=783460.0, ans=0.1 +2024-09-20 01:13:07,790 INFO [train.py:1198] (0/2) Epoch 44, batch 1300, loss[loss=0.2393, ctc_loss=0.1031, cr_loss=0.3271, attn_decoder_loss=0.2472, over 28290.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3494, attn_decoder_loss=0.2378, over 5779139.09 frames. ], batch size: 111, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:13:08,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=783500.0, ans=0.125 +2024-09-20 01:13:12,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=783500.0, ans=0.125 +2024-09-20 01:13:28,758 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.28 vs. limit=15.0 +2024-09-20 01:13:30,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.490e+01 8.901e+01 9.557e+01 1.827e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-20 01:13:55,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=783620.0, ans=0.0 +2024-09-20 01:13:58,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=783620.0, ans=0.1 +2024-09-20 01:14:13,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.04 vs. limit=15.0 +2024-09-20 01:14:14,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=783660.0, ans=0.1 +2024-09-20 01:14:23,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.25 vs. limit=10.0 +2024-09-20 01:14:25,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.01 vs. limit=12.0 +2024-09-20 01:14:25,848 INFO [train.py:1198] (0/2) Epoch 44, batch 1350, loss[loss=0.2361, ctc_loss=0.114, cr_loss=0.3559, attn_decoder_loss=0.2417, over 29763.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1103, cr_loss=0.3492, attn_decoder_loss=0.2376, over 5796261.51 frames. ], batch size: 81, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:14:40,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=783740.0, ans=0.125 +2024-09-20 01:14:40,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=783740.0, ans=0.125 +2024-09-20 01:14:40,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=783740.0, ans=0.0 +2024-09-20 01:14:42,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=783740.0, ans=0.125 +2024-09-20 01:14:54,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=783780.0, ans=0.025 +2024-09-20 01:15:01,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=783780.0, ans=0.1 +2024-09-20 01:15:02,205 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.92 vs. limit=12.0 +2024-09-20 01:15:10,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=783820.0, ans=10.0 +2024-09-20 01:15:40,847 INFO [train.py:1198] (0/2) Epoch 44, batch 1400, loss[loss=0.2077, ctc_loss=0.09281, cr_loss=0.3045, attn_decoder_loss=0.2137, over 29600.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.11, cr_loss=0.3487, attn_decoder_loss=0.2374, over 5808125.16 frames. ], batch size: 69, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:15:43,429 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.60 vs. limit=10.0 +2024-09-20 01:15:48,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=783900.0, ans=0.125 +2024-09-20 01:16:03,128 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.567e+01 9.208e+01 9.655e+01 2.033e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-20 01:16:17,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=783980.0, ans=0.0 +2024-09-20 01:16:19,228 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-196000.pt +2024-09-20 01:16:40,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=784020.0, ans=0.125 +2024-09-20 01:16:41,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=784020.0, ans=0.125 +2024-09-20 01:17:05,608 INFO [train.py:1198] (0/2) Epoch 44, batch 1450, loss[loss=0.2511, ctc_loss=0.125, cr_loss=0.3883, attn_decoder_loss=0.2565, over 29449.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1104, cr_loss=0.3499, attn_decoder_loss=0.2378, over 5805163.55 frames. ], batch size: 94, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:17:10,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=784100.0, ans=0.125 +2024-09-20 01:18:14,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=784260.0, ans=0.125 +2024-09-20 01:18:23,234 INFO [train.py:1198] (0/2) Epoch 44, batch 1500, loss[loss=0.24, ctc_loss=0.1147, cr_loss=0.3486, attn_decoder_loss=0.2461, over 29628.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1103, cr_loss=0.3497, attn_decoder_loss=0.2381, over 5805354.00 frames. ], batch size: 86, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:18:23,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=784300.0, ans=0.025 +2024-09-20 01:18:34,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=784300.0, ans=0.035 +2024-09-20 01:18:40,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=784340.0, ans=0.0 +2024-09-20 01:18:47,501 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.764e+01 8.610e+01 9.049e+01 9.653e+01 2.114e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-20 01:19:01,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.18 vs. limit=15.0 +2024-09-20 01:19:06,040 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=784380.0, ans=0.0 +2024-09-20 01:19:33,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=784460.0, ans=0.125 +2024-09-20 01:19:39,004 INFO [train.py:1198] (0/2) Epoch 44, batch 1550, loss[loss=0.2477, ctc_loss=0.1233, cr_loss=0.3784, attn_decoder_loss=0.2531, over 29524.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1108, cr_loss=0.3499, attn_decoder_loss=0.2385, over 5780568.08 frames. ], batch size: 90, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:19:40,030 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.17 vs. limit=10.0 +2024-09-20 01:19:43,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=784500.0, ans=0.125 +2024-09-20 01:20:24,208 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.58 vs. limit=6.0 +2024-09-20 01:20:47,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=784660.0, ans=0.2 +2024-09-20 01:20:56,207 INFO [train.py:1198] (0/2) Epoch 44, batch 1600, loss[loss=0.2417, ctc_loss=0.1183, cr_loss=0.3677, attn_decoder_loss=0.2472, over 29660.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1108, cr_loss=0.35, attn_decoder_loss=0.2383, over 5762219.35 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:21:02,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=784700.0, ans=0.2 +2024-09-20 01:21:07,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=784700.0, ans=0.125 +2024-09-20 01:21:14,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=784740.0, ans=0.125 +2024-09-20 01:21:18,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=784740.0, ans=0.2 +2024-09-20 01:21:20,302 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.634e+01 8.674e+01 9.197e+01 9.675e+01 9.690e+02, threshold=1.839e+02, percent-clipped=2.0 +2024-09-20 01:21:20,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=784740.0, ans=0.0 +2024-09-20 01:22:14,099 INFO [train.py:1198] (0/2) Epoch 44, batch 1650, loss[loss=0.2489, ctc_loss=0.1221, cr_loss=0.3825, attn_decoder_loss=0.2544, over 29691.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1104, cr_loss=0.3493, attn_decoder_loss=0.238, over 5756344.99 frames. ], batch size: 89, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:23:07,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=785020.0, ans=0.2 +2024-09-20 01:23:23,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=785060.0, ans=0.125 +2024-09-20 01:23:26,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=785060.0, ans=0.0 +2024-09-20 01:23:29,266 INFO [train.py:1198] (0/2) Epoch 44, batch 1700, loss[loss=0.2033, ctc_loss=0.08759, cr_loss=0.3065, attn_decoder_loss=0.2093, over 29566.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1102, cr_loss=0.3489, attn_decoder_loss=0.238, over 5778423.79 frames. ], batch size: 69, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:23:38,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=785100.0, ans=0.07 +2024-09-20 01:23:43,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=785140.0, ans=0.0 +2024-09-20 01:23:52,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=785140.0, ans=0.0 +2024-09-20 01:23:55,597 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.621e+01 9.129e+01 9.684e+01 1.448e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 01:23:57,413 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:24:04,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=785180.0, ans=0.125 +2024-09-20 01:24:12,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=785180.0, ans=0.09899494936611666 +2024-09-20 01:24:13,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=785180.0, ans=0.125 +2024-09-20 01:24:15,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=785220.0, ans=0.2 +2024-09-20 01:24:20,693 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.51 vs. limit=15.0 +2024-09-20 01:24:46,811 INFO [train.py:1198] (0/2) Epoch 44, batch 1750, loss[loss=0.2085, ctc_loss=0.09494, cr_loss=0.318, attn_decoder_loss=0.214, over 29341.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1099, cr_loss=0.3479, attn_decoder_loss=0.2377, over 5788309.47 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:24:48,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=785300.0, ans=0.125 +2024-09-20 01:25:17,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.30 vs. limit=15.0 +2024-09-20 01:25:21,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=785380.0, ans=0.125 +2024-09-20 01:26:01,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=785460.0, ans=0.5 +2024-09-20 01:26:03,906 INFO [train.py:1198] (0/2) Epoch 44, batch 1800, loss[loss=0.2457, ctc_loss=0.1215, cr_loss=0.3741, attn_decoder_loss=0.2512, over 29690.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1097, cr_loss=0.3479, attn_decoder_loss=0.2377, over 5790349.91 frames. ], batch size: 83, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:26:06,454 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.58 vs. limit=12.0 +2024-09-20 01:26:16,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=785500.0, ans=0.07 +2024-09-20 01:26:27,988 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.581e+01 8.530e+01 8.993e+01 9.458e+01 1.310e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 01:26:29,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=785540.0, ans=0.125 +2024-09-20 01:26:32,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=785580.0, ans=0.125 +2024-09-20 01:26:56,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.95 vs. limit=22.5 +2024-09-20 01:27:10,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=785660.0, ans=10.0 +2024-09-20 01:27:19,805 INFO [train.py:1198] (0/2) Epoch 44, batch 1850, loss[loss=0.2332, ctc_loss=0.1036, cr_loss=0.3238, attn_decoder_loss=0.2403, over 29625.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1097, cr_loss=0.3485, attn_decoder_loss=0.2377, over 5798606.39 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:27:20,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=785700.0, ans=0.0 +2024-09-20 01:27:44,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=785740.0, ans=0.2 +2024-09-20 01:27:46,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=785740.0, ans=0.0 +2024-09-20 01:28:13,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=785820.0, ans=0.0 +2024-09-20 01:28:15,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=785820.0, ans=0.125 +2024-09-20 01:28:16,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=785820.0, ans=0.125 +2024-09-20 01:28:37,305 INFO [train.py:1198] (0/2) Epoch 44, batch 1900, loss[loss=0.2333, ctc_loss=0.09809, cr_loss=0.3168, attn_decoder_loss=0.2413, over 29700.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1099, cr_loss=0.3489, attn_decoder_loss=0.2382, over 5806673.13 frames. ], batch size: 89, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:28:42,684 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.10 vs. limit=6.0 +2024-09-20 01:28:51,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=785940.0, ans=0.0 +2024-09-20 01:28:54,697 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.09 vs. limit=6.0 +2024-09-20 01:28:58,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=785940.0, ans=0.0 +2024-09-20 01:29:00,929 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-20 01:29:01,488 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.601e+01 9.112e+01 9.762e+01 1.549e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 01:29:47,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=786060.0, ans=0.125 +2024-09-20 01:29:50,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=786060.0, ans=0.07 +2024-09-20 01:29:54,968 INFO [train.py:1198] (0/2) Epoch 44, batch 1950, loss[loss=0.2329, ctc_loss=0.1137, cr_loss=0.3517, attn_decoder_loss=0.2383, over 29451.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.111, cr_loss=0.3516, attn_decoder_loss=0.2393, over 5820909.90 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:30:19,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.90 vs. limit=10.0 +2024-09-20 01:30:24,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=786180.0, ans=0.125 +2024-09-20 01:30:33,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=786180.0, ans=0.025 +2024-09-20 01:31:07,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 01:31:09,160 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.60 vs. limit=15.0 +2024-09-20 01:31:09,701 INFO [train.py:1198] (0/2) Epoch 44, batch 2000, loss[loss=0.2018, ctc_loss=0.08974, cr_loss=0.2936, attn_decoder_loss=0.2077, over 29358.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1112, cr_loss=0.3517, attn_decoder_loss=0.2395, over 5797361.89 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 32.0 +2024-09-20 01:31:15,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.39 vs. limit=6.0 +2024-09-20 01:31:15,508 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.98 vs. limit=6.0 +2024-09-20 01:31:30,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=786340.0, ans=0.0 +2024-09-20 01:31:37,704 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.322e+01 8.592e+01 9.152e+01 9.700e+01 1.620e+02, threshold=1.830e+02, percent-clipped=0.0 +2024-09-20 01:31:48,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=786380.0, ans=0.0 +2024-09-20 01:31:59,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=786420.0, ans=0.1 +2024-09-20 01:32:27,931 INFO [train.py:1198] (0/2) Epoch 44, batch 2050, loss[loss=0.2122, ctc_loss=0.1005, cr_loss=0.3195, attn_decoder_loss=0.2175, over 29443.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1104, cr_loss=0.3503, attn_decoder_loss=0.2383, over 5788997.11 frames. ], batch size: 70, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:32:46,689 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.41 vs. limit=10.0 +2024-09-20 01:33:07,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=786580.0, ans=0.0 +2024-09-20 01:33:07,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=786580.0, ans=0.0 +2024-09-20 01:33:11,849 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:33:13,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=786620.0, ans=0.2 +2024-09-20 01:33:18,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=786620.0, ans=0.125 +2024-09-20 01:33:28,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=786660.0, ans=0.1 +2024-09-20 01:33:44,954 INFO [train.py:1198] (0/2) Epoch 44, batch 2100, loss[loss=0.2273, ctc_loss=0.1086, cr_loss=0.3473, attn_decoder_loss=0.2328, over 29756.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1093, cr_loss=0.3484, attn_decoder_loss=0.2373, over 5801082.62 frames. ], batch size: 81, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:34:00,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=786740.0, ans=0.2 +2024-09-20 01:34:00,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.13 vs. limit=15.0 +2024-09-20 01:34:10,461 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.592e+01 8.953e+01 9.546e+01 1.075e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-20 01:34:30,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=786820.0, ans=0.125 +2024-09-20 01:34:58,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=786900.0, ans=0.025 +2024-09-20 01:34:59,829 INFO [train.py:1198] (0/2) Epoch 44, batch 2150, loss[loss=0.2238, ctc_loss=0.09986, cr_loss=0.3431, attn_decoder_loss=0.23, over 29484.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.109, cr_loss=0.3478, attn_decoder_loss=0.2368, over 5816411.87 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:35:11,612 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.88 vs. limit=15.0 +2024-09-20 01:35:37,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=786980.0, ans=0.0 +2024-09-20 01:36:02,014 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.39 vs. limit=6.0 +2024-09-20 01:36:17,944 INFO [train.py:1198] (0/2) Epoch 44, batch 2200, loss[loss=0.2315, ctc_loss=0.1053, cr_loss=0.3467, attn_decoder_loss=0.2379, over 29614.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1093, cr_loss=0.348, attn_decoder_loss=0.237, over 5813032.49 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:36:24,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=787100.0, ans=0.0 +2024-09-20 01:36:30,155 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=787100.0, ans=0.1 +2024-09-20 01:36:35,936 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=787140.0, ans=0.125 +2024-09-20 01:36:42,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=787140.0, ans=0.0 +2024-09-20 01:36:43,208 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.551e+01 8.996e+01 9.508e+01 1.674e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 01:36:44,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.11 vs. limit=22.5 +2024-09-20 01:36:54,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=787180.0, ans=0.0 +2024-09-20 01:37:03,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=787220.0, ans=0.2 +2024-09-20 01:37:12,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=787220.0, ans=0.0 +2024-09-20 01:37:34,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=787300.0, ans=0.1 +2024-09-20 01:37:35,608 INFO [train.py:1198] (0/2) Epoch 44, batch 2250, loss[loss=0.2398, ctc_loss=0.1133, cr_loss=0.3482, attn_decoder_loss=0.2461, over 29710.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.3467, attn_decoder_loss=0.2369, over 5812907.59 frames. ], batch size: 82, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:37:43,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=787300.0, ans=0.125 +2024-09-20 01:38:04,581 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:38:10,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.16 vs. limit=6.0 +2024-09-20 01:38:13,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=787380.0, ans=0.0 +2024-09-20 01:38:17,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=787380.0, ans=0.125 +2024-09-20 01:38:35,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=787460.0, ans=0.0 +2024-09-20 01:38:44,060 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.57 vs. limit=22.5 +2024-09-20 01:38:50,742 INFO [train.py:1198] (0/2) Epoch 44, batch 2300, loss[loss=0.2118, ctc_loss=0.103, cr_loss=0.3333, attn_decoder_loss=0.2165, over 29340.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.109, cr_loss=0.3473, attn_decoder_loss=0.2363, over 5800208.47 frames. ], batch size: 71, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:38:54,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=787500.0, ans=0.1 +2024-09-20 01:39:03,575 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.75 vs. limit=12.0 +2024-09-20 01:39:10,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=787540.0, ans=0.04949747468305833 +2024-09-20 01:39:13,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=787540.0, ans=0.125 +2024-09-20 01:39:18,228 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.593e+01 8.668e+01 9.192e+01 9.767e+01 1.748e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 01:39:31,642 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.44 vs. limit=15.0 +2024-09-20 01:39:53,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=787660.0, ans=0.025 +2024-09-20 01:40:08,548 INFO [train.py:1198] (0/2) Epoch 44, batch 2350, loss[loss=0.2425, ctc_loss=0.1154, cr_loss=0.3788, attn_decoder_loss=0.2482, over 29685.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1096, cr_loss=0.3487, attn_decoder_loss=0.2368, over 5805614.50 frames. ], batch size: 83, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:40:09,597 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.64 vs. limit=15.0 +2024-09-20 01:40:23,759 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=787740.0, ans=0.025 +2024-09-20 01:40:28,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=787740.0, ans=0.0 +2024-09-20 01:40:30,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.20 vs. limit=22.5 +2024-09-20 01:40:59,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.57 vs. limit=15.0 +2024-09-20 01:41:26,339 INFO [train.py:1198] (0/2) Epoch 44, batch 2400, loss[loss=0.2126, ctc_loss=0.09099, cr_loss=0.2975, attn_decoder_loss=0.2195, over 29543.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1098, cr_loss=0.349, attn_decoder_loss=0.2372, over 5808715.85 frames. ], batch size: 76, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:41:38,514 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=787900.0, ans=0.0 +2024-09-20 01:41:50,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=787940.0, ans=0.2 +2024-09-20 01:41:53,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.728e+01 9.218e+01 9.758e+01 1.607e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-20 01:42:42,339 INFO [train.py:1198] (0/2) Epoch 44, batch 2450, loss[loss=0.2329, ctc_loss=0.1075, cr_loss=0.3539, attn_decoder_loss=0.239, over 29718.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1101, cr_loss=0.3492, attn_decoder_loss=0.2381, over 5785394.76 frames. ], batch size: 82, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:42:48,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=788100.0, ans=0.125 +2024-09-20 01:43:02,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=788140.0, ans=0.07 +2024-09-20 01:43:32,829 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:43:32,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=788220.0, ans=0.0 +2024-09-20 01:43:59,853 INFO [train.py:1198] (0/2) Epoch 44, batch 2500, loss[loss=0.2487, ctc_loss=0.1252, cr_loss=0.382, attn_decoder_loss=0.2539, over 29618.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1104, cr_loss=0.3493, attn_decoder_loss=0.2382, over 5794909.70 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:44:14,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.33 vs. limit=22.5 +2024-09-20 01:44:21,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=788340.0, ans=0.125 +2024-09-20 01:44:26,963 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.639e+01 9.215e+01 9.726e+01 1.262e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-20 01:44:27,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.44 vs. limit=15.0 +2024-09-20 01:44:35,441 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.74 vs. limit=10.0 +2024-09-20 01:44:39,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=788380.0, ans=0.1 +2024-09-20 01:44:59,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=788460.0, ans=0.125 +2024-09-20 01:45:09,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=788460.0, ans=0.05 +2024-09-20 01:45:17,642 INFO [train.py:1198] (0/2) Epoch 44, batch 2550, loss[loss=0.2073, ctc_loss=0.09275, cr_loss=0.3073, attn_decoder_loss=0.2132, over 29311.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1103, cr_loss=0.3491, attn_decoder_loss=0.2381, over 5798227.42 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:45:17,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=788500.0, ans=0.125 +2024-09-20 01:45:49,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=788580.0, ans=0.5 +2024-09-20 01:45:49,910 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.67 vs. limit=15.0 +2024-09-20 01:46:01,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=788620.0, ans=0.2 +2024-09-20 01:46:10,318 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=788620.0, ans=0.125 +2024-09-20 01:46:26,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=788660.0, ans=0.125 +2024-09-20 01:46:32,703 INFO [train.py:1198] (0/2) Epoch 44, batch 2600, loss[loss=0.2308, ctc_loss=0.1121, cr_loss=0.3634, attn_decoder_loss=0.2359, over 29429.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1108, cr_loss=0.3504, attn_decoder_loss=0.2387, over 5794862.69 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:46:43,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=788700.0, ans=0.0 +2024-09-20 01:46:53,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=788740.0, ans=0.025 +2024-09-20 01:47:03,304 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.497e+01 9.008e+01 9.570e+01 2.359e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 01:47:09,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=788780.0, ans=0.125 +2024-09-20 01:47:14,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=788780.0, ans=0.125 +2024-09-20 01:47:37,843 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-20 01:47:43,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=788860.0, ans=0.025 +2024-09-20 01:47:50,464 INFO [train.py:1198] (0/2) Epoch 44, batch 2650, loss[loss=0.2477, ctc_loss=0.1148, cr_loss=0.3606, attn_decoder_loss=0.2544, over 29274.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1108, cr_loss=0.3502, attn_decoder_loss=0.2389, over 5801028.37 frames. ], batch size: 100, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:48:52,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=789060.0, ans=0.125 +2024-09-20 01:49:07,981 INFO [train.py:1198] (0/2) Epoch 44, batch 2700, loss[loss=0.2504, ctc_loss=0.1197, cr_loss=0.3745, attn_decoder_loss=0.2566, over 29543.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.2391, over 5796874.56 frames. ], batch size: 87, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:49:20,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=789100.0, ans=0.125 +2024-09-20 01:49:21,700 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=789140.0, ans=0.0 +2024-09-20 01:49:33,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=789140.0, ans=0.125 +2024-09-20 01:49:36,539 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.638e+01 9.038e+01 9.626e+01 7.105e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-20 01:49:38,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=789180.0, ans=0.0 +2024-09-20 01:50:11,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=789260.0, ans=0.125 +2024-09-20 01:50:14,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=789260.0, ans=0.125 +2024-09-20 01:50:23,488 INFO [train.py:1198] (0/2) Epoch 44, batch 2750, loss[loss=0.2217, ctc_loss=0.1096, cr_loss=0.3577, attn_decoder_loss=0.2262, over 29508.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1101, cr_loss=0.3491, attn_decoder_loss=0.2379, over 5794569.28 frames. ], batch size: 75, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:50:37,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=789340.0, ans=0.1 +2024-09-20 01:50:42,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=789340.0, ans=0.025 +2024-09-20 01:50:55,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=789380.0, ans=0.125 +2024-09-20 01:51:10,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=789420.0, ans=0.0 +2024-09-20 01:51:14,393 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.08 vs. limit=12.0 +2024-09-20 01:51:19,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=789420.0, ans=0.125 +2024-09-20 01:51:21,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=789420.0, ans=0.125 +2024-09-20 01:51:41,198 INFO [train.py:1198] (0/2) Epoch 44, batch 2800, loss[loss=0.2575, ctc_loss=0.1406, cr_loss=0.3884, attn_decoder_loss=0.2618, over 20265.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1105, cr_loss=0.3496, attn_decoder_loss=0.2381, over 5775863.06 frames. ], batch size: 210, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:51:41,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=789500.0, ans=0.125 +2024-09-20 01:51:48,958 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:51:53,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=789500.0, ans=0.125 +2024-09-20 01:52:03,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=789540.0, ans=0.125 +2024-09-20 01:52:09,703 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.668e+01 8.779e+01 9.114e+01 9.644e+01 1.703e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 01:52:19,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=789580.0, ans=0.125 +2024-09-20 01:52:39,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=789620.0, ans=0.04949747468305833 +2024-09-20 01:52:40,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=789660.0, ans=0.125 +2024-09-20 01:52:58,696 INFO [train.py:1198] (0/2) Epoch 44, batch 2850, loss[loss=0.2265, ctc_loss=0.1063, cr_loss=0.3398, attn_decoder_loss=0.2323, over 29523.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1109, cr_loss=0.3505, attn_decoder_loss=0.2384, over 5762266.45 frames. ], batch size: 77, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:53:17,185 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:53:22,029 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.56 vs. limit=15.0 +2024-09-20 01:53:30,739 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=789780.0, ans=0.125 +2024-09-20 01:53:41,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=789780.0, ans=0.0 +2024-09-20 01:53:42,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=789820.0, ans=0.2 +2024-09-20 01:53:50,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=789820.0, ans=0.2 +2024-09-20 01:54:08,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=789860.0, ans=0.2 +2024-09-20 01:54:11,717 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.89 vs. limit=10.0 +2024-09-20 01:54:13,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.12 vs. limit=10.0 +2024-09-20 01:54:13,955 INFO [train.py:1198] (0/2) Epoch 44, batch 2900, loss[loss=0.2327, ctc_loss=0.1107, cr_loss=0.3448, attn_decoder_loss=0.2386, over 29401.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1108, cr_loss=0.3508, attn_decoder_loss=0.239, over 5787849.30 frames. ], batch size: 79, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:54:23,278 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=789900.0, ans=0.0 +2024-09-20 01:54:24,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=789900.0, ans=0.125 +2024-09-20 01:54:37,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=789940.0, ans=0.1 +2024-09-20 01:54:46,280 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.529e+01 8.571e+01 8.849e+01 9.680e+01 1.963e+02, threshold=1.770e+02, percent-clipped=2.0 +2024-09-20 01:54:47,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-20 01:55:03,163 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:55:03,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.59 vs. limit=15.0 +2024-09-20 01:55:13,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=790020.0, ans=0.125 +2024-09-20 01:55:15,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=790060.0, ans=0.0 +2024-09-20 01:55:20,256 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.01 vs. limit=6.0 +2024-09-20 01:55:31,618 INFO [train.py:1198] (0/2) Epoch 44, batch 2950, loss[loss=0.2228, ctc_loss=0.1079, cr_loss=0.3563, attn_decoder_loss=0.2277, over 29510.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1102, cr_loss=0.3495, attn_decoder_loss=0.2379, over 5782380.57 frames. ], batch size: 75, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:55:41,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=790100.0, ans=0.125 +2024-09-20 01:55:50,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=790140.0, ans=0.125 +2024-09-20 01:55:56,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=790140.0, ans=0.125 +2024-09-20 01:56:08,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=790180.0, ans=0.0 +2024-09-20 01:56:14,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=790180.0, ans=0.1 +2024-09-20 01:56:24,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.89 vs. limit=22.5 +2024-09-20 01:56:25,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=790220.0, ans=0.125 +2024-09-20 01:56:33,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.33 vs. limit=10.0 +2024-09-20 01:56:49,773 INFO [train.py:1198] (0/2) Epoch 44, batch 3000, loss[loss=0.2281, ctc_loss=0.1076, cr_loss=0.3437, attn_decoder_loss=0.2338, over 29759.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1104, cr_loss=0.35, attn_decoder_loss=0.2378, over 5782390.45 frames. ], batch size: 81, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:56:49,773 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 01:57:08,098 INFO [train.py:1230] (0/2) Epoch 44, validation: loss=0.2127, ctc_loss=0.03705, cr_loss=7.369e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-20 01:57:08,098 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 01:57:14,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=790300.0, ans=0.125 +2024-09-20 01:57:18,032 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.14 vs. limit=15.0 +2024-09-20 01:57:26,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=790340.0, ans=0.125 +2024-09-20 01:57:37,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.93 vs. limit=12.0 +2024-09-20 01:57:38,255 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 8.680e+01 9.147e+01 9.757e+01 3.916e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-20 01:57:55,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=790420.0, ans=0.0 +2024-09-20 01:58:01,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=790420.0, ans=0.125 +2024-09-20 01:58:11,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=790460.0, ans=0.125 +2024-09-20 01:58:12,360 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.52 vs. limit=15.0 +2024-09-20 01:58:26,517 INFO [train.py:1198] (0/2) Epoch 44, batch 3050, loss[loss=0.2276, ctc_loss=0.1086, cr_loss=0.3537, attn_decoder_loss=0.2329, over 29521.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1108, cr_loss=0.3508, attn_decoder_loss=0.2386, over 5776523.32 frames. ], batch size: 76, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:58:28,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=790500.0, ans=0.0 +2024-09-20 01:58:28,928 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=15.0 +2024-09-20 01:58:36,355 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.18 vs. limit=15.0 +2024-09-20 01:59:10,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=790620.0, ans=0.125 +2024-09-20 01:59:15,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=790620.0, ans=0.2 +2024-09-20 01:59:21,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=790620.0, ans=0.2 +2024-09-20 01:59:21,063 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=790620.0, ans=0.0 +2024-09-20 01:59:36,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=790660.0, ans=0.125 +2024-09-20 01:59:41,914 INFO [train.py:1198] (0/2) Epoch 44, batch 3100, loss[loss=0.2374, ctc_loss=0.1128, cr_loss=0.3476, attn_decoder_loss=0.2435, over 29244.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1105, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5777408.42 frames. ], batch size: 100, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:59:49,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=790700.0, ans=0.125 +2024-09-20 02:00:11,908 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.501e+01 8.989e+01 9.639e+01 2.477e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-20 02:00:13,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=790780.0, ans=0.125 +2024-09-20 02:00:16,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=790780.0, ans=0.125 +2024-09-20 02:00:31,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=790820.0, ans=0.125 +2024-09-20 02:00:33,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=790820.0, ans=0.0 +2024-09-20 02:00:41,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=790820.0, ans=10.0 +2024-09-20 02:00:46,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=790860.0, ans=0.2 +2024-09-20 02:00:47,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=790860.0, ans=0.125 +2024-09-20 02:00:49,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=790860.0, ans=0.09899494936611666 +2024-09-20 02:00:59,880 INFO [train.py:1198] (0/2) Epoch 44, batch 3150, loss[loss=0.2501, ctc_loss=0.1259, cr_loss=0.3775, attn_decoder_loss=0.2555, over 28857.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.11, cr_loss=0.3493, attn_decoder_loss=0.2379, over 5783611.69 frames. ], batch size: 104, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 02:01:18,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=790940.0, ans=0.125 +2024-09-20 02:01:24,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=790940.0, ans=0.0 +2024-09-20 02:01:31,915 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=790980.0, ans=0.125 +2024-09-20 02:01:40,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=790980.0, ans=0.125 +2024-09-20 02:01:51,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=791020.0, ans=0.1 +2024-09-20 02:02:17,232 INFO [train.py:1198] (0/2) Epoch 44, batch 3200, loss[loss=0.2358, ctc_loss=0.1133, cr_loss=0.3553, attn_decoder_loss=0.2415, over 29417.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1098, cr_loss=0.3489, attn_decoder_loss=0.2376, over 5794245.09 frames. ], batch size: 79, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 02:02:23,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=791100.0, ans=0.0 +2024-09-20 02:02:28,131 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:02:37,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=791140.0, ans=0.125 +2024-09-20 02:02:43,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=791140.0, ans=0.0 +2024-09-20 02:02:47,457 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.647e+01 9.072e+01 9.601e+01 1.731e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 02:03:00,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=791180.0, ans=0.0 +2024-09-20 02:03:33,119 INFO [train.py:1198] (0/2) Epoch 44, batch 3250, loss[loss=0.2364, ctc_loss=0.1132, cr_loss=0.3388, attn_decoder_loss=0.2426, over 29707.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1101, cr_loss=0.3493, attn_decoder_loss=0.2381, over 5800769.10 frames. ], batch size: 84, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:03:48,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=791340.0, ans=0.125 +2024-09-20 02:03:58,487 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.90 vs. limit=15.0 +2024-09-20 02:04:01,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.62 vs. limit=15.0 +2024-09-20 02:04:02,771 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.23 vs. limit=15.0 +2024-09-20 02:04:50,969 INFO [train.py:1198] (0/2) Epoch 44, batch 3300, loss[loss=0.2384, ctc_loss=0.1043, cr_loss=0.3224, attn_decoder_loss=0.2462, over 28464.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1094, cr_loss=0.3474, attn_decoder_loss=0.237, over 5797531.82 frames. ], batch size: 111, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:04:55,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=791500.0, ans=0.125 +2024-09-20 02:05:22,614 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.134e+01 8.597e+01 9.177e+01 9.695e+01 2.585e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 02:05:24,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=791580.0, ans=0.025 +2024-09-20 02:05:39,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=791620.0, ans=0.0 +2024-09-20 02:06:07,995 INFO [train.py:1198] (0/2) Epoch 44, batch 3350, loss[loss=0.2408, ctc_loss=0.1106, cr_loss=0.3406, attn_decoder_loss=0.2477, over 28865.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.11, cr_loss=0.3488, attn_decoder_loss=0.2379, over 5774971.14 frames. ], batch size: 104, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:06:20,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=791700.0, ans=0.1 +2024-09-20 02:06:40,688 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.33 vs. limit=15.0 +2024-09-20 02:06:47,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=791780.0, ans=0.2 +2024-09-20 02:07:06,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.41 vs. limit=15.0 +2024-09-20 02:07:18,802 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.02 vs. limit=15.0 +2024-09-20 02:07:23,739 INFO [train.py:1198] (0/2) Epoch 44, batch 3400, loss[loss=0.2045, ctc_loss=0.08879, cr_loss=0.311, attn_decoder_loss=0.2104, over 29345.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1103, cr_loss=0.3492, attn_decoder_loss=0.2379, over 5768646.34 frames. ], batch size: 67, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:07:45,707 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.08 vs. limit=6.0 +2024-09-20 02:07:51,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=791940.0, ans=0.0 +2024-09-20 02:07:55,387 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.682e+01 9.111e+01 9.724e+01 2.135e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-20 02:08:07,152 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.86 vs. limit=6.0 +2024-09-20 02:08:09,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=792020.0, ans=0.2 +2024-09-20 02:08:10,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=792020.0, ans=0.05 +2024-09-20 02:08:41,413 INFO [train.py:1198] (0/2) Epoch 44, batch 3450, loss[loss=0.2345, ctc_loss=0.101, cr_loss=0.3214, attn_decoder_loss=0.2422, over 28390.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1103, cr_loss=0.3496, attn_decoder_loss=0.2382, over 5777195.75 frames. ], batch size: 111, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:08:55,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=792140.0, ans=0.95 +2024-09-20 02:09:05,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=792140.0, ans=0.125 +2024-09-20 02:09:31,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=792220.0, ans=0.0 +2024-09-20 02:09:51,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=792260.0, ans=0.125 +2024-09-20 02:09:58,573 INFO [train.py:1198] (0/2) Epoch 44, batch 3500, loss[loss=0.2013, ctc_loss=0.08756, cr_loss=0.3059, attn_decoder_loss=0.2071, over 29314.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1098, cr_loss=0.3488, attn_decoder_loss=0.2377, over 5778131.00 frames. ], batch size: 71, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:10:08,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=792300.0, ans=0.0 +2024-09-20 02:10:10,357 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.86 vs. limit=15.0 +2024-09-20 02:10:15,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=792340.0, ans=0.0 +2024-09-20 02:10:30,314 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.582e+01 8.980e+01 9.639e+01 1.678e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-20 02:10:32,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=792380.0, ans=0.125 +2024-09-20 02:10:49,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=792420.0, ans=0.125 +2024-09-20 02:10:56,431 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.20 vs. limit=12.0 +2024-09-20 02:11:13,201 INFO [train.py:1198] (0/2) Epoch 44, batch 3550, loss[loss=0.2361, ctc_loss=0.1085, cr_loss=0.3522, attn_decoder_loss=0.2424, over 29716.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1097, cr_loss=0.3484, attn_decoder_loss=0.2377, over 5783843.73 frames. ], batch size: 89, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:11:14,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=792500.0, ans=0.125 +2024-09-20 02:11:33,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=792540.0, ans=0.025 +2024-09-20 02:11:44,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=792580.0, ans=0.0 +2024-09-20 02:12:05,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=792620.0, ans=0.125 +2024-09-20 02:12:05,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=792620.0, ans=0.025 +2024-09-20 02:12:07,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=792620.0, ans=0.1 +2024-09-20 02:12:22,786 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=792660.0, ans=0.1 +2024-09-20 02:12:26,921 INFO [train.py:1198] (0/2) Epoch 44, batch 3600, loss[loss=0.2247, ctc_loss=0.1041, cr_loss=0.3353, attn_decoder_loss=0.2307, over 29506.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.11, cr_loss=0.3494, attn_decoder_loss=0.2378, over 5792692.21 frames. ], batch size: 77, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:12:58,440 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.550e+01 9.094e+01 9.613e+01 3.759e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-20 02:12:58,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=792780.0, ans=0.125 +2024-09-20 02:13:08,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=792780.0, ans=0.125 +2024-09-20 02:13:30,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=792860.0, ans=0.5 +2024-09-20 02:13:41,887 INFO [train.py:1198] (0/2) Epoch 44, batch 3650, loss[loss=0.2483, ctc_loss=0.1207, cr_loss=0.3688, attn_decoder_loss=0.2543, over 29510.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1096, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5794979.23 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:14:06,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=792940.0, ans=0.125 +2024-09-20 02:14:58,107 INFO [train.py:1198] (0/2) Epoch 44, batch 3700, loss[loss=0.2417, ctc_loss=0.1152, cr_loss=0.3492, attn_decoder_loss=0.248, over 29719.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1095, cr_loss=0.3479, attn_decoder_loss=0.2374, over 5804335.29 frames. ], batch size: 84, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:15:13,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=793140.0, ans=0.1 +2024-09-20 02:15:32,681 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.629e+01 9.056e+01 9.534e+01 1.565e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-20 02:16:09,061 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.70 vs. limit=15.0 +2024-09-20 02:16:09,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.92 vs. limit=12.0 +2024-09-20 02:16:14,384 INFO [train.py:1198] (0/2) Epoch 44, batch 3750, loss[loss=0.2149, ctc_loss=0.0995, cr_loss=0.3329, attn_decoder_loss=0.2203, over 29360.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1095, cr_loss=0.3477, attn_decoder_loss=0.2373, over 5807990.75 frames. ], batch size: 67, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:16:22,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=793300.0, ans=0.0 +2024-09-20 02:16:25,545 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.43 vs. limit=10.0 +2024-09-20 02:16:36,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.23 vs. limit=15.0 +2024-09-20 02:16:44,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=793380.0, ans=0.0 +2024-09-20 02:16:50,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=793380.0, ans=0.125 +2024-09-20 02:17:24,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=793460.0, ans=0.5 +2024-09-20 02:17:28,370 INFO [train.py:1198] (0/2) Epoch 44, batch 3800, loss[loss=0.2376, ctc_loss=0.1097, cr_loss=0.3594, attn_decoder_loss=0.2438, over 29623.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1092, cr_loss=0.3473, attn_decoder_loss=0.237, over 5798920.21 frames. ], batch size: 86, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:17:39,696 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.23 vs. limit=10.0 +2024-09-20 02:17:44,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=793540.0, ans=0.125 +2024-09-20 02:17:46,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.16 vs. limit=22.5 +2024-09-20 02:17:50,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=793540.0, ans=0.1 +2024-09-20 02:18:01,001 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.351e+01 9.103e+01 9.836e+01 3.154e+02, threshold=1.821e+02, percent-clipped=2.0 +2024-09-20 02:18:18,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-20 02:18:30,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.50 vs. limit=15.0 +2024-09-20 02:18:42,695 INFO [train.py:1198] (0/2) Epoch 44, batch 3850, loss[loss=0.2305, ctc_loss=0.1042, cr_loss=0.345, attn_decoder_loss=0.2369, over 29246.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.347, attn_decoder_loss=0.2367, over 5814184.03 frames. ], batch size: 100, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:18:53,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=793700.0, ans=0.0 +2024-09-20 02:18:56,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=793740.0, ans=10.0 +2024-09-20 02:19:00,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=793740.0, ans=0.125 +2024-09-20 02:19:06,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=793740.0, ans=0.2 +2024-09-20 02:19:16,944 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=793780.0, ans=0.125 +2024-09-20 02:19:31,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=793820.0, ans=0.125 +2024-09-20 02:19:46,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=793860.0, ans=0.0 +2024-09-20 02:19:53,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.18 vs. limit=15.0 +2024-09-20 02:19:58,452 INFO [train.py:1198] (0/2) Epoch 44, batch 3900, loss[loss=0.2411, ctc_loss=0.1078, cr_loss=0.3461, attn_decoder_loss=0.2482, over 29628.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1093, cr_loss=0.3476, attn_decoder_loss=0.2372, over 5818259.17 frames. ], batch size: 86, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:20:04,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=793900.0, ans=0.125 +2024-09-20 02:20:14,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=793940.0, ans=0.1 +2024-09-20 02:20:15,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=793940.0, ans=0.1 +2024-09-20 02:20:31,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.718e+01 8.637e+01 9.253e+01 9.637e+01 1.224e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 02:20:31,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=793980.0, ans=0.09899494936611666 +2024-09-20 02:20:33,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=793980.0, ans=0.125 +2024-09-20 02:21:14,101 INFO [train.py:1198] (0/2) Epoch 44, batch 3950, loss[loss=0.2387, ctc_loss=0.1128, cr_loss=0.3647, attn_decoder_loss=0.2446, over 29470.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1093, cr_loss=0.348, attn_decoder_loss=0.2372, over 5837074.36 frames. ], batch size: 97, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:21:26,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=794100.0, ans=0.025 +2024-09-20 02:21:42,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=794180.0, ans=0.07 +2024-09-20 02:21:45,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=794180.0, ans=0.125 +2024-09-20 02:21:46,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=794180.0, ans=0.04949747468305833 +2024-09-20 02:21:58,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=794220.0, ans=0.0 +2024-09-20 02:22:16,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=794260.0, ans=0.125 +2024-09-20 02:22:20,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=794260.0, ans=0.2 +2024-09-20 02:22:27,455 INFO [train.py:1198] (0/2) Epoch 44, batch 4000, loss[loss=0.2216, ctc_loss=0.1034, cr_loss=0.3446, attn_decoder_loss=0.2271, over 29500.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1097, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5812205.81 frames. ], batch size: 74, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:22:32,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=794300.0, ans=0.2 +2024-09-20 02:22:33,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=794300.0, ans=0.0 +2024-09-20 02:22:39,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=794300.0, ans=0.125 +2024-09-20 02:22:54,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=794340.0, ans=0.125 +2024-09-20 02:23:01,247 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.802e+01 8.690e+01 9.242e+01 9.635e+01 1.653e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-20 02:23:25,290 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.58 vs. limit=22.5 +2024-09-20 02:23:40,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=794500.0, ans=0.1 +2024-09-20 02:23:41,488 INFO [train.py:1198] (0/2) Epoch 44, batch 4050, loss[loss=0.2437, ctc_loss=0.1208, cr_loss=0.3474, attn_decoder_loss=0.2496, over 20775.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1096, cr_loss=0.3481, attn_decoder_loss=0.2372, over 5796790.95 frames. ], batch size: 209, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:24:03,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=794540.0, ans=0.125 +2024-09-20 02:24:03,737 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=794540.0, ans=0.1 +2024-09-20 02:24:16,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=794580.0, ans=0.125 +2024-09-20 02:24:45,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=794660.0, ans=0.2 +2024-09-20 02:24:51,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=794660.0, ans=0.0 +2024-09-20 02:24:53,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=794660.0, ans=0.125 +2024-09-20 02:24:56,016 INFO [train.py:1198] (0/2) Epoch 44, batch 4100, loss[loss=0.2453, ctc_loss=0.1235, cr_loss=0.3894, attn_decoder_loss=0.2501, over 29487.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1103, cr_loss=0.3502, attn_decoder_loss=0.2376, over 5792236.86 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:24:59,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=794700.0, ans=0.2 +2024-09-20 02:25:04,909 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=794700.0, ans=0.025 +2024-09-20 02:25:06,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=794700.0, ans=0.125 +2024-09-20 02:25:29,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=794780.0, ans=0.125 +2024-09-20 02:25:30,721 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.174e+01 8.703e+01 9.227e+01 9.918e+01 1.839e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-20 02:25:31,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=794780.0, ans=0.1 +2024-09-20 02:25:53,425 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.81 vs. limit=22.5 +2024-09-20 02:25:56,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.08 vs. limit=6.0 +2024-09-20 02:26:09,614 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.83 vs. limit=15.0 +2024-09-20 02:26:10,188 INFO [train.py:1198] (0/2) Epoch 44, batch 4150, loss[loss=0.2256, ctc_loss=0.1056, cr_loss=0.3384, attn_decoder_loss=0.2314, over 29500.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1102, cr_loss=0.3502, attn_decoder_loss=0.2373, over 5798148.74 frames. ], batch size: 77, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:26:11,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=794900.0, ans=0.2 +2024-09-20 02:26:12,592 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.19 vs. limit=15.0 +2024-09-20 02:26:13,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=794900.0, ans=0.125 +2024-09-20 02:26:27,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=794940.0, ans=0.125 +2024-09-20 02:27:23,536 INFO [train.py:1198] (0/2) Epoch 44, batch 4200, loss[loss=0.2459, ctc_loss=0.1224, cr_loss=0.3779, attn_decoder_loss=0.2512, over 29477.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1103, cr_loss=0.3503, attn_decoder_loss=0.2376, over 5800167.11 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:27:25,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=795100.0, ans=0.5 +2024-09-20 02:27:32,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=795100.0, ans=0.125 +2024-09-20 02:27:47,842 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.92 vs. limit=22.5 +2024-09-20 02:27:51,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=795180.0, ans=0.0 +2024-09-20 02:27:57,349 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.818e+01 8.655e+01 9.286e+01 9.774e+01 5.497e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-20 02:28:00,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=795180.0, ans=0.2 +2024-09-20 02:28:27,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=795260.0, ans=0.125 +2024-09-20 02:28:27,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.71 vs. limit=15.0 +2024-09-20 02:28:28,766 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=795260.0, ans=0.09899494936611666 +2024-09-20 02:28:38,191 INFO [train.py:1198] (0/2) Epoch 44, batch 4250, loss[loss=0.212, ctc_loss=0.0886, cr_loss=0.2889, attn_decoder_loss=0.2193, over 29533.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1096, cr_loss=0.3486, attn_decoder_loss=0.2376, over 5805971.82 frames. ], batch size: 74, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:28:44,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=795300.0, ans=0.125 +2024-09-20 02:29:01,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=795340.0, ans=0.0 +2024-09-20 02:29:08,355 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=795380.0, ans=0.125 +2024-09-20 02:29:10,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=795380.0, ans=15.0 +2024-09-20 02:29:20,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=795380.0, ans=0.125 +2024-09-20 02:29:21,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=795420.0, ans=0.125 +2024-09-20 02:29:31,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=795420.0, ans=0.1 +2024-09-20 02:29:36,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=795460.0, ans=0.125 +2024-09-20 02:29:36,826 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.04 vs. limit=15.0 +2024-09-20 02:29:39,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=795460.0, ans=0.125 +2024-09-20 02:29:46,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=795460.0, ans=0.0 +2024-09-20 02:29:49,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=795460.0, ans=0.1 +2024-09-20 02:29:52,314 INFO [train.py:1198] (0/2) Epoch 44, batch 4300, loss[loss=0.2331, ctc_loss=0.1096, cr_loss=0.3373, attn_decoder_loss=0.2394, over 29516.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1093, cr_loss=0.348, attn_decoder_loss=0.2376, over 5795712.26 frames. ], batch size: 87, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:30:15,280 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.96 vs. limit=6.0 +2024-09-20 02:30:20,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=795580.0, ans=0.125 +2024-09-20 02:30:26,387 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.755e+01 9.251e+01 9.683e+01 2.005e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-20 02:30:32,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=795580.0, ans=0.0 +2024-09-20 02:30:41,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=795620.0, ans=10.0 +2024-09-20 02:30:54,898 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:31:06,388 INFO [train.py:1198] (0/2) Epoch 44, batch 4350, loss[loss=0.2477, ctc_loss=0.1167, cr_loss=0.3563, attn_decoder_loss=0.2543, over 29500.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1119, cr_loss=0.3541, attn_decoder_loss=0.2409, over 5798103.03 frames. ], batch size: 97, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:31:08,160 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=795700.0, ans=0.09899494936611666 +2024-09-20 02:31:27,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=795740.0, ans=0.0 +2024-09-20 02:31:31,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=795740.0, ans=0.95 +2024-09-20 02:31:40,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=795780.0, ans=0.125 +2024-09-20 02:31:53,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=795820.0, ans=0.0 +2024-09-20 02:31:59,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.42 vs. limit=15.0 +2024-09-20 02:32:00,672 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=795820.0, ans=0.1 +2024-09-20 02:32:06,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=795860.0, ans=0.125 +2024-09-20 02:32:11,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.11 vs. limit=6.0 +2024-09-20 02:32:17,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=795860.0, ans=0.1 +2024-09-20 02:32:20,752 INFO [train.py:1198] (0/2) Epoch 44, batch 4400, loss[loss=0.2436, ctc_loss=0.1144, cr_loss=0.3569, attn_decoder_loss=0.2501, over 27447.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1131, cr_loss=0.3568, attn_decoder_loss=0.2428, over 5768510.48 frames. ], batch size: 125, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:32:29,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=795900.0, ans=0.2 +2024-09-20 02:32:34,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=795940.0, ans=0.0 +2024-09-20 02:32:35,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=795940.0, ans=0.125 +2024-09-20 02:32:37,924 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.69 vs. limit=15.0 +2024-09-20 02:32:53,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=795980.0, ans=0.0 +2024-09-20 02:32:54,477 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.213e+01 9.042e+01 9.394e+01 9.819e+01 2.193e+02, threshold=1.879e+02, percent-clipped=1.0 +2024-09-20 02:33:09,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=796020.0, ans=0.0 +2024-09-20 02:33:19,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=796060.0, ans=0.2 +2024-09-20 02:33:29,419 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.24 vs. limit=10.0 +2024-09-20 02:33:34,337 INFO [train.py:1198] (0/2) Epoch 44, batch 4450, loss[loss=0.2622, ctc_loss=0.15, cr_loss=0.4197, attn_decoder_loss=0.2654, over 19813.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1167, cr_loss=0.3621, attn_decoder_loss=0.2449, over 5574107.50 frames. ], batch size: 210, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:33:55,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.27 vs. limit=6.0 +2024-09-20 02:34:21,948 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:34:27,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=796220.0, ans=0.025 +2024-09-20 02:34:33,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=796260.0, ans=0.125 +2024-09-20 02:34:41,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=796260.0, ans=0.125 +2024-09-20 02:34:44,690 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.58 vs. limit=6.0 +2024-09-20 02:34:47,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=796260.0, ans=0.025 +2024-09-20 02:34:49,812 INFO [train.py:1198] (0/2) Epoch 44, batch 4500, loss[loss=0.2487, ctc_loss=0.1302, cr_loss=0.3951, attn_decoder_loss=0.2531, over 20424.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1194, cr_loss=0.3641, attn_decoder_loss=0.2465, over 5236993.14 frames. ], batch size: 210, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:34:53,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=796300.0, ans=0.125 +2024-09-20 02:34:53,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=796300.0, ans=15.0 +2024-09-20 02:35:05,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=796340.0, ans=0.0 +2024-09-20 02:35:11,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=796340.0, ans=0.0 +2024-09-20 02:35:17,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=796340.0, ans=0.025 +2024-09-20 02:35:26,140 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.915e+01 1.070e+02 1.147e+02 1.258e+02 2.122e+02, threshold=2.294e+02, percent-clipped=1.0 +2024-09-20 02:35:27,522 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-44.pt +2024-09-20 02:36:17,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=10.01 vs. limit=15.0 +2024-09-20 02:36:17,461 INFO [train.py:1198] (0/2) Epoch 45, batch 0, loss[loss=0.2139, ctc_loss=0.09865, cr_loss=0.3261, attn_decoder_loss=0.2194, over 29639.00 frames. ], tot_loss[loss=0.2139, ctc_loss=0.09865, cr_loss=0.3261, attn_decoder_loss=0.2194, over 29639.00 frames. ], batch size: 73, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:36:17,461 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 02:36:35,782 INFO [train.py:1230] (0/2) Epoch 45, validation: loss=0.2126, ctc_loss=0.03577, cr_loss=6.589e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 02:36:35,783 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 02:36:42,111 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=796400.0, ans=0.125 +2024-09-20 02:36:42,802 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-20 02:37:02,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=796440.0, ans=0.0 +2024-09-20 02:37:08,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=796480.0, ans=0.1 +2024-09-20 02:37:11,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=796480.0, ans=0.0 +2024-09-20 02:37:27,157 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.50 vs. limit=10.0 +2024-09-20 02:37:30,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=796520.0, ans=0.125 +2024-09-20 02:37:35,863 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.73 vs. limit=15.0 +2024-09-20 02:37:38,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=796560.0, ans=0.2 +2024-09-20 02:37:46,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=796560.0, ans=0.125 +2024-09-20 02:37:51,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=15.0 +2024-09-20 02:37:53,214 INFO [train.py:1198] (0/2) Epoch 45, batch 50, loss[loss=0.2135, ctc_loss=0.1006, cr_loss=0.3196, attn_decoder_loss=0.2189, over 29426.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1106, cr_loss=0.3508, attn_decoder_loss=0.2384, over 1264621.16 frames. ], batch size: 70, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:38:07,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=796640.0, ans=0.0 +2024-09-20 02:38:10,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=796640.0, ans=0.0 +2024-09-20 02:38:11,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=796640.0, ans=0.5 +2024-09-20 02:38:26,108 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.00 vs. limit=12.0 +2024-09-20 02:38:28,738 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:38:40,694 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:38:42,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=796720.0, ans=0.125 +2024-09-20 02:38:52,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=796760.0, ans=0.0 +2024-09-20 02:38:54,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=796760.0, ans=0.0 +2024-09-20 02:39:00,711 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.31 vs. limit=12.0 +2024-09-20 02:39:07,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=796800.0, ans=0.125 +2024-09-20 02:39:09,107 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.517e+01 8.971e+01 9.670e+01 3.092e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-20 02:39:09,133 INFO [train.py:1198] (0/2) Epoch 45, batch 100, loss[loss=0.2234, ctc_loss=0.105, cr_loss=0.3473, attn_decoder_loss=0.2288, over 29534.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1112, cr_loss=0.3513, attn_decoder_loss=0.2397, over 2250092.81 frames. ], batch size: 76, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:39:37,879 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=796880.0, ans=0.125 +2024-09-20 02:39:47,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=796880.0, ans=0.125 +2024-09-20 02:39:56,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=796920.0, ans=0.0 +2024-09-20 02:40:15,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=796960.0, ans=0.0 +2024-09-20 02:40:15,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=796960.0, ans=0.125 +2024-09-20 02:40:25,370 INFO [train.py:1198] (0/2) Epoch 45, batch 150, loss[loss=0.2097, ctc_loss=0.09235, cr_loss=0.3179, attn_decoder_loss=0.2156, over 29447.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1087, cr_loss=0.3462, attn_decoder_loss=0.2374, over 3045881.53 frames. ], batch size: 70, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:40:37,618 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=797000.0, ans=0.125 +2024-09-20 02:40:50,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=797040.0, ans=0.125 +2024-09-20 02:40:51,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=797040.0, ans=0.125 +2024-09-20 02:40:55,418 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.05 vs. limit=12.0 +2024-09-20 02:40:59,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=797080.0, ans=0.125 +2024-09-20 02:41:05,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=797080.0, ans=0.125 +2024-09-20 02:41:26,070 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=797160.0, ans=0.125 +2024-09-20 02:41:38,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=797160.0, ans=0.125 +2024-09-20 02:41:42,434 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.414e+01 8.795e+01 9.375e+01 1.270e+02, threshold=1.759e+02, percent-clipped=0.0 +2024-09-20 02:41:42,456 INFO [train.py:1198] (0/2) Epoch 45, batch 200, loss[loss=0.2403, ctc_loss=0.1147, cr_loss=0.3657, attn_decoder_loss=0.2461, over 27196.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1082, cr_loss=0.3456, attn_decoder_loss=0.2364, over 3658071.84 frames. ], batch size: 124, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:41:57,933 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=797240.0, ans=0.125 +2024-09-20 02:42:18,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.53 vs. limit=22.5 +2024-09-20 02:42:26,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=797320.0, ans=0.2 +2024-09-20 02:42:46,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=797360.0, ans=0.125 +2024-09-20 02:42:47,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=797360.0, ans=0.2 +2024-09-20 02:42:55,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=797360.0, ans=0.0 +2024-09-20 02:42:58,112 INFO [train.py:1198] (0/2) Epoch 45, batch 250, loss[loss=0.2496, ctc_loss=0.1234, cr_loss=0.3807, attn_decoder_loss=0.2552, over 29250.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.2371, over 4139122.97 frames. ], batch size: 100, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:42:58,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=797400.0, ans=0.0 +2024-09-20 02:43:29,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=797480.0, ans=0.125 +2024-09-20 02:43:58,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=797520.0, ans=0.025 +2024-09-20 02:44:16,174 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.164e+01 8.464e+01 8.917e+01 9.593e+01 1.535e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-20 02:44:16,202 INFO [train.py:1198] (0/2) Epoch 45, batch 300, loss[loss=0.2458, ctc_loss=0.1184, cr_loss=0.3636, attn_decoder_loss=0.2519, over 29542.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.3474, attn_decoder_loss=0.2372, over 4506806.23 frames. ], batch size: 92, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:44:46,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=797640.0, ans=0.125 +2024-09-20 02:45:33,893 INFO [train.py:1198] (0/2) Epoch 45, batch 350, loss[loss=0.217, ctc_loss=0.09506, cr_loss=0.3059, attn_decoder_loss=0.2238, over 29736.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1096, cr_loss=0.349, attn_decoder_loss=0.2379, over 4793293.33 frames. ], batch size: 72, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:45:46,110 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:45:55,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=797840.0, ans=0.1 +2024-09-20 02:45:58,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=797840.0, ans=0.0 +2024-09-20 02:46:19,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=797920.0, ans=0.0 +2024-09-20 02:46:32,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=797960.0, ans=0.125 +2024-09-20 02:46:34,281 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=797960.0, ans=0.125 +2024-09-20 02:46:37,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=797960.0, ans=0.2 +2024-09-20 02:46:42,216 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.49 vs. limit=15.0 +2024-09-20 02:46:48,813 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.541e+01 8.980e+01 9.725e+01 1.224e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-20 02:46:48,834 INFO [train.py:1198] (0/2) Epoch 45, batch 400, loss[loss=0.2373, ctc_loss=0.123, cr_loss=0.3721, attn_decoder_loss=0.2417, over 29713.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1096, cr_loss=0.3491, attn_decoder_loss=0.2378, over 5022831.86 frames. ], batch size: 82, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:47:05,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=798040.0, ans=0.0 +2024-09-20 02:47:10,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=798040.0, ans=0.025 +2024-09-20 02:47:13,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=798040.0, ans=0.0 +2024-09-20 02:47:16,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=798040.0, ans=0.125 +2024-09-20 02:47:17,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-20 02:47:17,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=798080.0, ans=0.1 +2024-09-20 02:47:30,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=798080.0, ans=0.0 +2024-09-20 02:47:35,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=798120.0, ans=0.125 +2024-09-20 02:48:04,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=798160.0, ans=0.2 +2024-09-20 02:48:06,801 INFO [train.py:1198] (0/2) Epoch 45, batch 450, loss[loss=0.2425, ctc_loss=0.116, cr_loss=0.3668, attn_decoder_loss=0.2484, over 29704.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1096, cr_loss=0.3491, attn_decoder_loss=0.2377, over 5185610.35 frames. ], batch size: 83, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:48:23,671 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=798240.0, ans=0.07 +2024-09-20 02:48:34,242 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=798240.0, ans=0.05 +2024-09-20 02:48:45,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=798280.0, ans=0.125 +2024-09-20 02:48:51,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=798280.0, ans=0.025 +2024-09-20 02:48:52,195 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.11 vs. limit=22.5 +2024-09-20 02:48:53,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=798320.0, ans=0.125 +2024-09-20 02:49:08,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=798360.0, ans=0.0 +2024-09-20 02:49:11,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=798360.0, ans=0.0 +2024-09-20 02:49:15,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=798360.0, ans=0.125 +2024-09-20 02:49:24,763 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.897e+01 8.412e+01 8.859e+01 9.470e+01 4.425e+02, threshold=1.772e+02, percent-clipped=1.0 +2024-09-20 02:49:24,789 INFO [train.py:1198] (0/2) Epoch 45, batch 500, loss[loss=0.2521, ctc_loss=0.131, cr_loss=0.3977, attn_decoder_loss=0.2567, over 29485.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1085, cr_loss=0.3473, attn_decoder_loss=0.2366, over 5329419.65 frames. ], batch size: 94, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:49:30,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.04 vs. limit=6.0 +2024-09-20 02:49:43,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=798440.0, ans=0.125 +2024-09-20 02:49:46,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=12.0 +2024-09-20 02:49:53,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=798480.0, ans=0.0 +2024-09-20 02:50:10,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=798520.0, ans=0.07 +2024-09-20 02:50:33,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=798560.0, ans=0.1 +2024-09-20 02:50:38,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.61 vs. limit=15.0 +2024-09-20 02:50:40,212 INFO [train.py:1198] (0/2) Epoch 45, batch 550, loss[loss=0.2434, ctc_loss=0.1137, cr_loss=0.3614, attn_decoder_loss=0.2498, over 28835.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1093, cr_loss=0.3484, attn_decoder_loss=0.237, over 5423505.42 frames. ], batch size: 104, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:50:58,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=798640.0, ans=0.125 +2024-09-20 02:51:09,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=798680.0, ans=0.0 +2024-09-20 02:51:10,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=798680.0, ans=0.125 +2024-09-20 02:51:20,673 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.54 vs. limit=12.0 +2024-09-20 02:51:32,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=798720.0, ans=0.0 +2024-09-20 02:51:43,021 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:51:57,990 INFO [train.py:1198] (0/2) Epoch 45, batch 600, loss[loss=0.2393, ctc_loss=0.1157, cr_loss=0.3576, attn_decoder_loss=0.2451, over 29264.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1092, cr_loss=0.3484, attn_decoder_loss=0.237, over 5511075.43 frames. ], batch size: 100, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:51:59,417 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.595e+01 9.062e+01 9.748e+01 3.862e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-20 02:52:35,764 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.20 vs. limit=22.5 +2024-09-20 02:52:53,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=798920.0, ans=0.125 +2024-09-20 02:53:01,425 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=798960.0, ans=0.1 +2024-09-20 02:53:02,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=798960.0, ans=0.125 +2024-09-20 02:53:07,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=798960.0, ans=0.0 +2024-09-20 02:53:13,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=799000.0, ans=0.125 +2024-09-20 02:53:14,710 INFO [train.py:1198] (0/2) Epoch 45, batch 650, loss[loss=0.2317, ctc_loss=0.1018, cr_loss=0.3205, attn_decoder_loss=0.239, over 29739.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1085, cr_loss=0.3464, attn_decoder_loss=0.2364, over 5587558.72 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:53:55,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=799080.0, ans=0.125 +2024-09-20 02:54:06,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=799120.0, ans=0.2 +2024-09-20 02:54:30,733 INFO [train.py:1198] (0/2) Epoch 45, batch 700, loss[loss=0.2278, ctc_loss=0.1083, cr_loss=0.3462, attn_decoder_loss=0.2334, over 29534.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1089, cr_loss=0.3477, attn_decoder_loss=0.2369, over 5637988.76 frames. ], batch size: 76, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:54:32,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.994e+01 8.670e+01 9.106e+01 9.852e+01 1.537e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-20 02:54:46,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=799240.0, ans=0.0 +2024-09-20 02:55:07,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=799280.0, ans=0.125 +2024-09-20 02:55:16,379 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:55:24,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=799320.0, ans=0.2 +2024-09-20 02:55:39,734 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=799360.0, ans=0.0 +2024-09-20 02:55:48,586 INFO [train.py:1198] (0/2) Epoch 45, batch 750, loss[loss=0.2394, ctc_loss=0.112, cr_loss=0.3589, attn_decoder_loss=0.2456, over 29692.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2366, over 5677236.95 frames. ], batch size: 82, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:55:50,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=799400.0, ans=0.09899494936611666 +2024-09-20 02:55:56,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=799400.0, ans=0.125 +2024-09-20 02:55:57,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=799400.0, ans=0.1 +2024-09-20 02:56:02,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.76 vs. limit=15.0 +2024-09-20 02:56:06,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=799440.0, ans=0.125 +2024-09-20 02:56:17,525 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.55 vs. limit=15.0 +2024-09-20 02:56:28,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=15.0 +2024-09-20 02:56:29,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=799480.0, ans=0.1 +2024-09-20 02:56:46,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=799520.0, ans=0.125 +2024-09-20 02:57:06,240 INFO [train.py:1198] (0/2) Epoch 45, batch 800, loss[loss=0.1988, ctc_loss=0.08456, cr_loss=0.2962, attn_decoder_loss=0.2049, over 29607.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.109, cr_loss=0.3472, attn_decoder_loss=0.2367, over 5708316.68 frames. ], batch size: 73, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 02:57:07,693 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.784e+01 8.615e+01 9.052e+01 9.760e+01 1.570e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 02:57:09,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=799600.0, ans=0.125 +2024-09-20 02:57:11,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=799600.0, ans=0.0 +2024-09-20 02:57:12,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=799600.0, ans=0.125 +2024-09-20 02:57:19,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=799640.0, ans=0.035 +2024-09-20 02:57:59,848 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.14 vs. limit=15.0 +2024-09-20 02:58:01,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.66 vs. limit=15.0 +2024-09-20 02:58:05,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=799760.0, ans=0.125 +2024-09-20 02:58:08,376 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=799760.0, ans=0.125 +2024-09-20 02:58:21,370 INFO [train.py:1198] (0/2) Epoch 45, batch 850, loss[loss=0.2353, ctc_loss=0.1062, cr_loss=0.3503, attn_decoder_loss=0.2419, over 29686.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1086, cr_loss=0.3464, attn_decoder_loss=0.2364, over 5736135.39 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:58:29,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=799800.0, ans=0.125 +2024-09-20 02:58:39,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=799840.0, ans=0.2 +2024-09-20 02:58:46,125 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.20 vs. limit=6.0 +2024-09-20 02:58:47,434 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.74 vs. limit=22.5 +2024-09-20 02:59:02,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=799880.0, ans=0.125 +2024-09-20 02:59:06,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=799920.0, ans=0.125 +2024-09-20 02:59:08,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.96 vs. limit=15.0 +2024-09-20 02:59:33,922 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.42 vs. limit=15.0 +2024-09-20 02:59:38,140 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-200000.pt +2024-09-20 02:59:46,327 INFO [train.py:1198] (0/2) Epoch 45, batch 900, loss[loss=0.2153, ctc_loss=0.09392, cr_loss=0.3106, attn_decoder_loss=0.2219, over 29615.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.347, attn_decoder_loss=0.2369, over 5740156.93 frames. ], batch size: 73, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:59:49,261 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.442e+01 9.122e+01 9.676e+01 4.269e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-20 03:00:00,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=800040.0, ans=0.1 +2024-09-20 03:00:30,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=800120.0, ans=0.125 +2024-09-20 03:00:31,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=800120.0, ans=0.125 +2024-09-20 03:00:50,181 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:00:53,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=800160.0, ans=0.0 +2024-09-20 03:01:02,854 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.82 vs. limit=15.0 +2024-09-20 03:01:03,278 INFO [train.py:1198] (0/2) Epoch 45, batch 950, loss[loss=0.2259, ctc_loss=0.1028, cr_loss=0.3377, attn_decoder_loss=0.2321, over 29537.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1091, cr_loss=0.3474, attn_decoder_loss=0.2372, over 5741568.58 frames. ], batch size: 74, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:01:11,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=800200.0, ans=0.125 +2024-09-20 03:01:18,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=800240.0, ans=0.025 +2024-09-20 03:01:20,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=800240.0, ans=0.2 +2024-09-20 03:01:21,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=800240.0, ans=0.025 +2024-09-20 03:01:25,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=800240.0, ans=15.0 +2024-09-20 03:01:32,561 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:01:38,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=800280.0, ans=0.125 +2024-09-20 03:01:44,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-20 03:02:18,180 INFO [train.py:1198] (0/2) Epoch 45, batch 1000, loss[loss=0.2215, ctc_loss=0.1048, cr_loss=0.3348, attn_decoder_loss=0.227, over 29474.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1096, cr_loss=0.3485, attn_decoder_loss=0.2379, over 5735344.27 frames. ], batch size: 77, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:02:21,229 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.681e+01 9.118e+01 9.953e+01 2.174e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 03:02:45,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=800440.0, ans=0.125 +2024-09-20 03:03:35,521 INFO [train.py:1198] (0/2) Epoch 45, batch 1050, loss[loss=0.2551, ctc_loss=0.1233, cr_loss=0.3763, attn_decoder_loss=0.2613, over 29690.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1099, cr_loss=0.3492, attn_decoder_loss=0.2378, over 5744106.07 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:03:47,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=800600.0, ans=0.2 +2024-09-20 03:03:54,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=800640.0, ans=0.125 +2024-09-20 03:03:55,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=800640.0, ans=0.0 +2024-09-20 03:04:13,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=800680.0, ans=0.0 +2024-09-20 03:04:18,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=800680.0, ans=0.0 +2024-09-20 03:04:53,634 INFO [train.py:1198] (0/2) Epoch 45, batch 1100, loss[loss=0.2249, ctc_loss=0.1003, cr_loss=0.327, attn_decoder_loss=0.2315, over 29460.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1095, cr_loss=0.3485, attn_decoder_loss=0.2373, over 5756249.81 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:04:56,595 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.469e+01 8.955e+01 9.647e+01 1.370e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-20 03:05:03,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=800800.0, ans=0.2 +2024-09-20 03:05:19,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=800840.0, ans=0.0 +2024-09-20 03:05:24,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=800880.0, ans=0.125 +2024-09-20 03:05:39,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.93 vs. limit=15.0 +2024-09-20 03:05:57,394 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=800960.0, ans=0.2 +2024-09-20 03:06:00,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=800960.0, ans=0.0 +2024-09-20 03:06:09,170 INFO [train.py:1198] (0/2) Epoch 45, batch 1150, loss[loss=0.2224, ctc_loss=0.1046, cr_loss=0.3338, attn_decoder_loss=0.2281, over 29451.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1096, cr_loss=0.3486, attn_decoder_loss=0.2372, over 5753840.74 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:06:26,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=801040.0, ans=0.0 +2024-09-20 03:06:33,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.89 vs. limit=10.0 +2024-09-20 03:06:34,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=801040.0, ans=0.0 +2024-09-20 03:06:34,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.75 vs. limit=15.0 +2024-09-20 03:06:41,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=801080.0, ans=0.0 +2024-09-20 03:06:44,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff3.min_abs, batch_count=801080.0, ans=0.2 +2024-09-20 03:06:44,515 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=801080.0, ans=0.125 +2024-09-20 03:07:01,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=801120.0, ans=0.125 +2024-09-20 03:07:02,043 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.45 vs. limit=15.0 +2024-09-20 03:07:27,000 INFO [train.py:1198] (0/2) Epoch 45, batch 1200, loss[loss=0.2324, ctc_loss=0.09843, cr_loss=0.327, attn_decoder_loss=0.24, over 29671.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1097, cr_loss=0.3489, attn_decoder_loss=0.2376, over 5747592.82 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:07:29,986 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.448e+01 9.125e+01 9.558e+01 3.990e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-20 03:07:36,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=801200.0, ans=0.2 +2024-09-20 03:08:12,538 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:08:13,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=801320.0, ans=0.0 +2024-09-20 03:08:15,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=801320.0, ans=0.0 +2024-09-20 03:08:41,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=801360.0, ans=0.025 +2024-09-20 03:08:44,562 INFO [train.py:1198] (0/2) Epoch 45, batch 1250, loss[loss=0.2467, ctc_loss=0.1226, cr_loss=0.3757, attn_decoder_loss=0.2521, over 29520.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1105, cr_loss=0.3509, attn_decoder_loss=0.2383, over 5775525.57 frames. ], batch size: 92, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:08:49,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=801400.0, ans=0.125 +2024-09-20 03:09:19,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=801480.0, ans=0.125 +2024-09-20 03:09:24,188 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=801480.0, ans=0.125 +2024-09-20 03:09:34,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=801520.0, ans=0.125 +2024-09-20 03:09:38,437 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.98 vs. limit=15.0 +2024-09-20 03:09:45,772 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.51 vs. limit=15.0 +2024-09-20 03:09:57,435 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=801560.0, ans=0.125 +2024-09-20 03:10:00,506 INFO [train.py:1198] (0/2) Epoch 45, batch 1300, loss[loss=0.2282, ctc_loss=0.101, cr_loss=0.3177, attn_decoder_loss=0.2353, over 28392.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.11, cr_loss=0.3497, attn_decoder_loss=0.2378, over 5778880.80 frames. ], batch size: 111, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:10:03,560 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.532e+01 8.720e+01 9.060e+01 9.963e+01 1.314e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 03:10:23,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=801640.0, ans=0.1 +2024-09-20 03:11:09,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=801760.0, ans=0.09899494936611666 +2024-09-20 03:11:14,078 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=801760.0, ans=0.1 +2024-09-20 03:11:18,210 INFO [train.py:1198] (0/2) Epoch 45, batch 1350, loss[loss=0.2357, ctc_loss=0.1092, cr_loss=0.3458, attn_decoder_loss=0.2421, over 29760.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1094, cr_loss=0.3485, attn_decoder_loss=0.2373, over 5796812.75 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:11:21,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=801800.0, ans=0.125 +2024-09-20 03:11:48,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=801880.0, ans=0.125 +2024-09-20 03:12:01,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=801920.0, ans=0.0 +2024-09-20 03:12:06,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=801920.0, ans=0.125 +2024-09-20 03:12:31,209 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=801960.0, ans=0.125 +2024-09-20 03:12:35,462 INFO [train.py:1198] (0/2) Epoch 45, batch 1400, loss[loss=0.2055, ctc_loss=0.09484, cr_loss=0.3202, attn_decoder_loss=0.2107, over 29552.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1092, cr_loss=0.348, attn_decoder_loss=0.2371, over 5808167.82 frames. ], batch size: 69, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:12:37,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.73 vs. limit=12.0 +2024-09-20 03:12:39,955 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.353e+01 8.806e+01 9.318e+01 1.165e+02, threshold=1.761e+02, percent-clipped=0.0 +2024-09-20 03:12:43,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=802000.0, ans=0.125 +2024-09-20 03:12:52,714 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.14 vs. limit=15.0 +2024-09-20 03:12:55,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=802040.0, ans=0.125 +2024-09-20 03:13:12,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=802080.0, ans=0.125 +2024-09-20 03:13:34,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=802160.0, ans=0.0 +2024-09-20 03:13:40,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=802160.0, ans=0.1 +2024-09-20 03:13:41,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=802160.0, ans=0.1 +2024-09-20 03:13:41,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=802160.0, ans=0.0 +2024-09-20 03:13:43,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.24 vs. limit=15.0 +2024-09-20 03:13:46,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=802160.0, ans=0.2 +2024-09-20 03:13:49,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.90 vs. limit=15.0 +2024-09-20 03:13:50,619 INFO [train.py:1198] (0/2) Epoch 45, batch 1450, loss[loss=0.2464, ctc_loss=0.1247, cr_loss=0.377, attn_decoder_loss=0.2515, over 29445.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1091, cr_loss=0.3477, attn_decoder_loss=0.2373, over 5804556.60 frames. ], batch size: 94, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:13:50,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=802200.0, ans=0.0 +2024-09-20 03:13:55,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=802200.0, ans=0.04949747468305833 +2024-09-20 03:14:01,870 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.07 vs. limit=22.5 +2024-09-20 03:14:06,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=802240.0, ans=0.0 +2024-09-20 03:14:31,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=802280.0, ans=0.125 +2024-09-20 03:14:37,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=802320.0, ans=0.125 +2024-09-20 03:14:40,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=802320.0, ans=0.2 +2024-09-20 03:14:47,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=802320.0, ans=0.2 +2024-09-20 03:14:55,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=802360.0, ans=0.2 +2024-09-20 03:15:08,098 INFO [train.py:1198] (0/2) Epoch 45, batch 1500, loss[loss=0.2346, ctc_loss=0.105, cr_loss=0.3379, attn_decoder_loss=0.2415, over 29603.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2375, over 5803477.40 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:15:12,537 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.707e+01 9.148e+01 9.626e+01 3.931e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-20 03:15:38,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=802480.0, ans=0.125 +2024-09-20 03:16:04,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=802520.0, ans=0.0 +2024-09-20 03:16:10,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=802560.0, ans=0.125 +2024-09-20 03:16:14,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.85 vs. limit=15.0 +2024-09-20 03:16:14,891 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=802560.0, ans=0.0 +2024-09-20 03:16:25,826 INFO [train.py:1198] (0/2) Epoch 45, batch 1550, loss[loss=0.2463, ctc_loss=0.1243, cr_loss=0.4003, attn_decoder_loss=0.2509, over 29497.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1095, cr_loss=0.3485, attn_decoder_loss=0.2378, over 5779855.82 frames. ], batch size: 90, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:16:42,746 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:16:47,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=802640.0, ans=0.1 +2024-09-20 03:16:53,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=802640.0, ans=0.125 +2024-09-20 03:17:09,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=802720.0, ans=0.0 +2024-09-20 03:17:15,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=802720.0, ans=0.1 +2024-09-20 03:17:20,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=802720.0, ans=0.2 +2024-09-20 03:17:30,626 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=802760.0, ans=0.0 +2024-09-20 03:17:40,716 INFO [train.py:1198] (0/2) Epoch 45, batch 1600, loss[loss=0.2351, ctc_loss=0.1009, cr_loss=0.3286, attn_decoder_loss=0.2427, over 29680.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1094, cr_loss=0.3476, attn_decoder_loss=0.2375, over 5761621.13 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:17:45,053 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.517e+01 9.021e+01 9.788e+01 6.298e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-20 03:17:48,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=802800.0, ans=0.125 +2024-09-20 03:18:06,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=802840.0, ans=0.07 +2024-09-20 03:18:18,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=802880.0, ans=0.2 +2024-09-20 03:18:20,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=802880.0, ans=0.1 +2024-09-20 03:18:57,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.36 vs. limit=12.0 +2024-09-20 03:18:58,018 INFO [train.py:1198] (0/2) Epoch 45, batch 1650, loss[loss=0.2283, ctc_loss=0.09796, cr_loss=0.3329, attn_decoder_loss=0.2354, over 29706.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1094, cr_loss=0.3479, attn_decoder_loss=0.2375, over 5757384.65 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:18:58,400 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=803000.0, ans=0.125 +2024-09-20 03:19:10,427 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=803000.0, ans=0.125 +2024-09-20 03:19:13,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=803040.0, ans=0.1 +2024-09-20 03:19:31,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=803080.0, ans=0.0 +2024-09-20 03:19:43,490 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=803120.0, ans=0.1 +2024-09-20 03:20:08,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=803160.0, ans=0.05 +2024-09-20 03:20:15,553 INFO [train.py:1198] (0/2) Epoch 45, batch 1700, loss[loss=0.1988, ctc_loss=0.08167, cr_loss=0.2862, attn_decoder_loss=0.2054, over 29569.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1091, cr_loss=0.3475, attn_decoder_loss=0.2373, over 5779588.83 frames. ], batch size: 69, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:20:21,495 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.570e+01 9.061e+01 9.508e+01 1.721e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 03:20:43,419 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.10 vs. limit=6.0 +2024-09-20 03:21:26,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=803360.0, ans=0.07 +2024-09-20 03:21:30,940 INFO [train.py:1198] (0/2) Epoch 45, batch 1750, loss[loss=0.2085, ctc_loss=0.09586, cr_loss=0.3188, attn_decoder_loss=0.2139, over 29321.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.109, cr_loss=0.3476, attn_decoder_loss=0.2369, over 5787416.11 frames. ], batch size: 67, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:22:08,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=803480.0, ans=0.2 +2024-09-20 03:22:19,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=803520.0, ans=0.07 +2024-09-20 03:22:47,902 INFO [train.py:1198] (0/2) Epoch 45, batch 1800, loss[loss=0.2469, ctc_loss=0.1194, cr_loss=0.382, attn_decoder_loss=0.2526, over 29683.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.3474, attn_decoder_loss=0.2372, over 5790952.00 frames. ], batch size: 83, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:22:51,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=803600.0, ans=0.04949747468305833 +2024-09-20 03:22:53,958 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.618e+01 8.492e+01 8.891e+01 9.479e+01 1.445e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-20 03:22:54,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=803600.0, ans=0.125 +2024-09-20 03:22:55,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=803600.0, ans=0.125 +2024-09-20 03:22:57,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=803600.0, ans=0.2 +2024-09-20 03:23:09,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=803640.0, ans=0.07 +2024-09-20 03:23:30,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=803680.0, ans=0.2 +2024-09-20 03:23:33,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=803720.0, ans=0.1 +2024-09-20 03:23:53,113 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=803760.0, ans=0.125 +2024-09-20 03:23:53,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=803760.0, ans=0.07 +2024-09-20 03:24:02,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=803800.0, ans=0.1 +2024-09-20 03:24:03,349 INFO [train.py:1198] (0/2) Epoch 45, batch 1850, loss[loss=0.2443, ctc_loss=0.1074, cr_loss=0.3512, attn_decoder_loss=0.2518, over 29620.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.237, over 5795680.68 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:24:39,159 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=803880.0, ans=0.2 +2024-09-20 03:24:40,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=803880.0, ans=0.5 +2024-09-20 03:24:57,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=803920.0, ans=0.0 +2024-09-20 03:24:57,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=803920.0, ans=0.05 +2024-09-20 03:25:01,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=803920.0, ans=10.0 +2024-09-20 03:25:07,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.65 vs. limit=12.0 +2024-09-20 03:25:20,212 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.98 vs. limit=22.5 +2024-09-20 03:25:21,002 INFO [train.py:1198] (0/2) Epoch 45, batch 1900, loss[loss=0.2347, ctc_loss=0.1055, cr_loss=0.329, attn_decoder_loss=0.2417, over 29716.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1093, cr_loss=0.3479, attn_decoder_loss=0.2378, over 5804301.59 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:25:27,048 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.514e+01 9.088e+01 9.657e+01 1.546e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-20 03:25:28,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=804000.0, ans=0.125 +2024-09-20 03:25:34,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=804040.0, ans=0.125 +2024-09-20 03:25:37,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=804040.0, ans=0.125 +2024-09-20 03:25:51,767 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=804080.0, ans=0.1 +2024-09-20 03:25:59,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=804080.0, ans=0.025 +2024-09-20 03:26:16,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=804120.0, ans=0.2 +2024-09-20 03:26:31,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=804160.0, ans=0.125 +2024-09-20 03:26:34,003 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.52 vs. limit=10.0 +2024-09-20 03:26:38,944 INFO [train.py:1198] (0/2) Epoch 45, batch 1950, loss[loss=0.2209, ctc_loss=0.09681, cr_loss=0.3165, attn_decoder_loss=0.2276, over 29441.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.11, cr_loss=0.3495, attn_decoder_loss=0.2388, over 5819219.77 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:27:25,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=804320.0, ans=0.0 +2024-09-20 03:27:35,962 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=804320.0, ans=0.125 +2024-09-20 03:27:42,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=804360.0, ans=0.1 +2024-09-20 03:27:44,671 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.25 vs. limit=10.0 +2024-09-20 03:27:54,313 INFO [train.py:1198] (0/2) Epoch 45, batch 2000, loss[loss=0.21, ctc_loss=0.09882, cr_loss=0.328, attn_decoder_loss=0.2151, over 29383.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.11, cr_loss=0.3489, attn_decoder_loss=0.2388, over 5798557.57 frames. ], batch size: 67, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:27:59,931 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-20 03:28:00,424 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.885e+01 8.761e+01 9.181e+01 9.636e+01 2.089e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-20 03:28:18,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.04 vs. limit=15.0 +2024-09-20 03:28:51,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=804520.0, ans=0.025 +2024-09-20 03:29:01,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=804560.0, ans=0.0 +2024-09-20 03:29:11,956 INFO [train.py:1198] (0/2) Epoch 45, batch 2050, loss[loss=0.2129, ctc_loss=0.09694, cr_loss=0.3265, attn_decoder_loss=0.2185, over 29430.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1093, cr_loss=0.3477, attn_decoder_loss=0.2379, over 5789925.05 frames. ], batch size: 70, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:29:14,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.95 vs. limit=10.0 +2024-09-20 03:29:31,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=804640.0, ans=0.0 +2024-09-20 03:30:10,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=804720.0, ans=0.0 +2024-09-20 03:30:29,648 INFO [train.py:1198] (0/2) Epoch 45, batch 2100, loss[loss=0.2306, ctc_loss=0.111, cr_loss=0.3513, attn_decoder_loss=0.2361, over 29729.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.109, cr_loss=0.3473, attn_decoder_loss=0.2375, over 5800700.69 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:30:34,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=804800.0, ans=0.125 +2024-09-20 03:30:35,559 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.221e+01 8.482e+01 9.039e+01 9.529e+01 1.230e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-20 03:30:46,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=804840.0, ans=0.125 +2024-09-20 03:30:49,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=804840.0, ans=0.125 +2024-09-20 03:31:04,143 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=804880.0, ans=0.125 +2024-09-20 03:31:04,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.06 vs. limit=15.0 +2024-09-20 03:31:10,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=804880.0, ans=0.125 +2024-09-20 03:31:44,355 INFO [train.py:1198] (0/2) Epoch 45, batch 2150, loss[loss=0.2297, ctc_loss=0.1159, cr_loss=0.3636, attn_decoder_loss=0.2342, over 29440.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.109, cr_loss=0.3473, attn_decoder_loss=0.2371, over 5813372.65 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:31:50,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=805000.0, ans=0.025 +2024-09-20 03:31:54,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=805000.0, ans=15.0 +2024-09-20 03:32:04,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=805040.0, ans=0.125 +2024-09-20 03:32:24,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=805080.0, ans=0.2 +2024-09-20 03:32:28,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=805080.0, ans=0.125 +2024-09-20 03:32:30,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.97 vs. limit=12.0 +2024-09-20 03:32:44,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=805120.0, ans=0.025 +2024-09-20 03:33:01,958 INFO [train.py:1198] (0/2) Epoch 45, batch 2200, loss[loss=0.234, ctc_loss=0.1057, cr_loss=0.3542, attn_decoder_loss=0.2404, over 29638.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1093, cr_loss=0.348, attn_decoder_loss=0.2372, over 5809647.24 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:33:09,444 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.623e+01 8.976e+01 9.604e+01 3.634e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-20 03:33:11,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=805200.0, ans=0.2 +2024-09-20 03:33:21,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=805240.0, ans=0.0 +2024-09-20 03:33:50,959 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.67 vs. limit=15.0 +2024-09-20 03:34:19,497 INFO [train.py:1198] (0/2) Epoch 45, batch 2250, loss[loss=0.2396, ctc_loss=0.1156, cr_loss=0.3636, attn_decoder_loss=0.2452, over 29699.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.3474, attn_decoder_loss=0.2369, over 5810151.83 frames. ], batch size: 82, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:34:36,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=805440.0, ans=0.1 +2024-09-20 03:35:10,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=805520.0, ans=0.05 +2024-09-20 03:35:24,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=805560.0, ans=0.125 +2024-09-20 03:35:30,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=805560.0, ans=0.125 +2024-09-20 03:35:34,915 INFO [train.py:1198] (0/2) Epoch 45, batch 2300, loss[loss=0.209, ctc_loss=0.09004, cr_loss=0.3007, attn_decoder_loss=0.2155, over 29338.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1079, cr_loss=0.345, attn_decoder_loss=0.2358, over 5796926.88 frames. ], batch size: 71, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:35:37,167 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.53 vs. limit=15.0 +2024-09-20 03:35:42,372 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.561e+01 9.011e+01 9.517e+01 1.725e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 03:35:42,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=805600.0, ans=0.1 +2024-09-20 03:35:45,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=805600.0, ans=0.025 +2024-09-20 03:35:51,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=805640.0, ans=0.125 +2024-09-20 03:36:11,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=805680.0, ans=0.0 +2024-09-20 03:36:34,757 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:36:40,322 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.97 vs. limit=15.0 +2024-09-20 03:36:52,632 INFO [train.py:1198] (0/2) Epoch 45, batch 2350, loss[loss=0.2544, ctc_loss=0.1277, cr_loss=0.3829, attn_decoder_loss=0.26, over 29716.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1082, cr_loss=0.3453, attn_decoder_loss=0.2362, over 5803671.28 frames. ], batch size: 83, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:36:56,623 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.13 vs. limit=6.0 +2024-09-20 03:37:31,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.53 vs. limit=15.0 +2024-09-20 03:37:46,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=805920.0, ans=0.125 +2024-09-20 03:37:56,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=805960.0, ans=0.05 +2024-09-20 03:38:10,111 INFO [train.py:1198] (0/2) Epoch 45, batch 2400, loss[loss=0.2298, ctc_loss=0.112, cr_loss=0.3674, attn_decoder_loss=0.2347, over 29540.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1086, cr_loss=0.3462, attn_decoder_loss=0.2366, over 5808076.19 frames. ], batch size: 76, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:38:16,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=806000.0, ans=0.95 +2024-09-20 03:38:17,562 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.673e+01 8.961e+01 9.495e+01 1.491e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-20 03:38:33,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=806040.0, ans=0.125 +2024-09-20 03:38:45,362 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=806080.0, ans=0.1 +2024-09-20 03:38:52,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=806080.0, ans=0.0 +2024-09-20 03:38:55,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=806120.0, ans=0.2 +2024-09-20 03:39:15,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=806160.0, ans=0.1 +2024-09-20 03:39:23,066 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=806160.0, ans=0.04949747468305833 +2024-09-20 03:39:24,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=806200.0, ans=0.125 +2024-09-20 03:39:25,820 INFO [train.py:1198] (0/2) Epoch 45, batch 2450, loss[loss=0.2433, ctc_loss=0.1225, cr_loss=0.3638, attn_decoder_loss=0.2487, over 29716.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1092, cr_loss=0.3476, attn_decoder_loss=0.2376, over 5785502.25 frames. ], batch size: 82, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:40:08,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=806280.0, ans=0.2 +2024-09-20 03:40:19,989 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.04 vs. limit=6.0 +2024-09-20 03:40:39,069 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=806360.0, ans=0.0 +2024-09-20 03:40:43,797 INFO [train.py:1198] (0/2) Epoch 45, batch 2500, loss[loss=0.2475, ctc_loss=0.1201, cr_loss=0.382, attn_decoder_loss=0.2531, over 29642.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1096, cr_loss=0.3484, attn_decoder_loss=0.2378, over 5795403.14 frames. ], batch size: 86, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:40:50,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=806400.0, ans=0.125 +2024-09-20 03:40:51,304 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.641e+01 9.220e+01 9.804e+01 1.997e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-20 03:40:59,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=806440.0, ans=0.0 +2024-09-20 03:41:01,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.33 vs. limit=15.0 +2024-09-20 03:41:01,332 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.94 vs. limit=15.0 +2024-09-20 03:41:08,282 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=806440.0, ans=0.2 +2024-09-20 03:41:46,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=806560.0, ans=0.125 +2024-09-20 03:42:01,671 INFO [train.py:1198] (0/2) Epoch 45, batch 2550, loss[loss=0.2068, ctc_loss=0.09188, cr_loss=0.3068, attn_decoder_loss=0.2127, over 29353.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1097, cr_loss=0.349, attn_decoder_loss=0.2379, over 5798861.01 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:42:33,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=806680.0, ans=0.125 +2024-09-20 03:42:53,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=806720.0, ans=0.0 +2024-09-20 03:42:58,662 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.38 vs. limit=15.0 +2024-09-20 03:43:10,192 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:43:13,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=806760.0, ans=0.2 +2024-09-20 03:43:14,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=806760.0, ans=0.125 +2024-09-20 03:43:17,313 INFO [train.py:1198] (0/2) Epoch 45, batch 2600, loss[loss=0.2312, ctc_loss=0.1124, cr_loss=0.361, attn_decoder_loss=0.2364, over 29449.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1101, cr_loss=0.3498, attn_decoder_loss=0.2385, over 5796322.82 frames. ], batch size: 78, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:43:20,531 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=806800.0, ans=0.0 +2024-09-20 03:43:23,511 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=806800.0, ans=0.1 +2024-09-20 03:43:26,235 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.746e+01 8.807e+01 9.340e+01 9.891e+01 1.748e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 03:43:29,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=806800.0, ans=0.09899494936611666 +2024-09-20 03:43:32,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=806840.0, ans=0.1 +2024-09-20 03:43:36,951 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:43:39,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=806840.0, ans=0.125 +2024-09-20 03:43:47,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=806880.0, ans=0.025 +2024-09-20 03:44:00,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=806880.0, ans=0.125 +2024-09-20 03:44:03,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=806920.0, ans=0.025 +2024-09-20 03:44:03,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=806920.0, ans=0.125 +2024-09-20 03:44:03,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=806920.0, ans=0.04949747468305833 +2024-09-20 03:44:09,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.12 vs. limit=10.0 +2024-09-20 03:44:22,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=806960.0, ans=0.125 +2024-09-20 03:44:32,328 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.44 vs. limit=22.5 +2024-09-20 03:44:34,361 INFO [train.py:1198] (0/2) Epoch 45, batch 2650, loss[loss=0.2448, ctc_loss=0.1131, cr_loss=0.3674, attn_decoder_loss=0.2512, over 29222.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1101, cr_loss=0.3502, attn_decoder_loss=0.2387, over 5802170.98 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:44:34,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=807000.0, ans=0.025 +2024-09-20 03:44:36,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=807000.0, ans=0.0 +2024-09-20 03:44:39,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=807000.0, ans=0.1 +2024-09-20 03:44:48,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=807040.0, ans=0.125 +2024-09-20 03:45:29,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=807120.0, ans=0.025 +2024-09-20 03:45:31,383 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.97 vs. limit=15.0 +2024-09-20 03:45:45,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.68 vs. limit=15.0 +2024-09-20 03:45:45,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=807160.0, ans=0.0 +2024-09-20 03:45:47,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.52 vs. limit=15.0 +2024-09-20 03:45:51,986 INFO [train.py:1198] (0/2) Epoch 45, batch 2700, loss[loss=0.2357, ctc_loss=0.1036, cr_loss=0.3395, attn_decoder_loss=0.2428, over 29525.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1101, cr_loss=0.3504, attn_decoder_loss=0.2389, over 5797901.42 frames. ], batch size: 87, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:46:01,055 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.586e+01 9.065e+01 9.630e+01 2.449e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 03:46:13,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=807240.0, ans=0.2 +2024-09-20 03:46:23,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=807280.0, ans=0.0 +2024-09-20 03:46:24,559 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.46 vs. limit=22.5 +2024-09-20 03:46:35,241 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.60 vs. limit=6.0 +2024-09-20 03:46:40,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=807320.0, ans=0.0 +2024-09-20 03:46:57,721 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.00 vs. limit=10.0 +2024-09-20 03:47:07,354 INFO [train.py:1198] (0/2) Epoch 45, batch 2750, loss[loss=0.216, ctc_loss=0.09659, cr_loss=0.3186, attn_decoder_loss=0.2222, over 29534.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1095, cr_loss=0.3492, attn_decoder_loss=0.2377, over 5796195.82 frames. ], batch size: 75, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:47:32,114 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.08 vs. limit=15.0 +2024-09-20 03:47:33,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=807440.0, ans=0.0 +2024-09-20 03:47:38,248 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.28 vs. limit=6.0 +2024-09-20 03:47:46,459 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:47:57,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=807520.0, ans=0.125 +2024-09-20 03:47:59,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.10 vs. limit=15.0 +2024-09-20 03:48:25,194 INFO [train.py:1198] (0/2) Epoch 45, batch 2800, loss[loss=0.2564, ctc_loss=0.1384, cr_loss=0.3842, attn_decoder_loss=0.2609, over 19742.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1098, cr_loss=0.35, attn_decoder_loss=0.2379, over 5777025.72 frames. ], batch size: 209, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:48:31,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=807600.0, ans=0.025 +2024-09-20 03:48:34,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.581e+01 9.021e+01 9.905e+01 2.529e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-20 03:48:42,549 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.87 vs. limit=22.5 +2024-09-20 03:48:43,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=807640.0, ans=0.0 +2024-09-20 03:48:48,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=807640.0, ans=0.1 +2024-09-20 03:48:51,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=807640.0, ans=0.125 +2024-09-20 03:48:52,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=807640.0, ans=0.0 +2024-09-20 03:49:12,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=807720.0, ans=0.125 +2024-09-20 03:49:38,616 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.98 vs. limit=15.0 +2024-09-20 03:49:42,384 INFO [train.py:1198] (0/2) Epoch 45, batch 2850, loss[loss=0.2191, ctc_loss=0.1001, cr_loss=0.3286, attn_decoder_loss=0.225, over 29500.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.11, cr_loss=0.35, attn_decoder_loss=0.2381, over 5762506.35 frames. ], batch size: 77, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:49:48,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=807800.0, ans=0.0 +2024-09-20 03:49:54,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=807800.0, ans=0.125 +2024-09-20 03:50:41,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=807960.0, ans=0.1 +2024-09-20 03:50:57,567 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.49 vs. limit=22.5 +2024-09-20 03:50:58,375 INFO [train.py:1198] (0/2) Epoch 45, batch 2900, loss[loss=0.2288, ctc_loss=0.1037, cr_loss=0.3381, attn_decoder_loss=0.2352, over 29794.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1107, cr_loss=0.3513, attn_decoder_loss=0.2391, over 5788882.80 frames. ], batch size: 80, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:50:59,196 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.13 vs. limit=10.0 +2024-09-20 03:51:06,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.41 vs. limit=10.0 +2024-09-20 03:51:07,242 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.821e+01 8.668e+01 9.103e+01 9.766e+01 1.431e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-20 03:51:19,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=808040.0, ans=0.0 +2024-09-20 03:51:51,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=808120.0, ans=0.025 +2024-09-20 03:52:15,547 INFO [train.py:1198] (0/2) Epoch 45, batch 2950, loss[loss=0.2198, ctc_loss=0.1048, cr_loss=0.3422, attn_decoder_loss=0.225, over 29513.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1099, cr_loss=0.3488, attn_decoder_loss=0.2379, over 5783407.18 frames. ], batch size: 75, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:52:26,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.80 vs. limit=15.0 +2024-09-20 03:52:41,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=808240.0, ans=0.125 +2024-09-20 03:52:45,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=808280.0, ans=0.035 +2024-09-20 03:52:52,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=808280.0, ans=0.125 +2024-09-20 03:52:52,195 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=808280.0, ans=0.125 +2024-09-20 03:52:55,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=808280.0, ans=0.0 +2024-09-20 03:53:33,024 INFO [train.py:1198] (0/2) Epoch 45, batch 3000, loss[loss=0.2294, ctc_loss=0.109, cr_loss=0.3489, attn_decoder_loss=0.235, over 29754.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1095, cr_loss=0.3474, attn_decoder_loss=0.2375, over 5783608.43 frames. ], batch size: 81, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:53:33,025 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 03:53:51,274 INFO [train.py:1230] (0/2) Epoch 45, validation: loss=0.213, ctc_loss=0.0366, cr_loss=6.956e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-20 03:53:51,275 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 03:54:00,587 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.339e+01 8.498e+01 9.089e+01 9.590e+01 3.857e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-20 03:54:05,353 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=808440.0, ans=0.5 +2024-09-20 03:54:29,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=808480.0, ans=0.125 +2024-09-20 03:55:05,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=808600.0, ans=0.125 +2024-09-20 03:55:06,904 INFO [train.py:1198] (0/2) Epoch 45, batch 3050, loss[loss=0.2248, ctc_loss=0.1063, cr_loss=0.3468, attn_decoder_loss=0.2303, over 29525.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1101, cr_loss=0.3492, attn_decoder_loss=0.2383, over 5778038.17 frames. ], batch size: 76, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:55:32,697 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.90 vs. limit=22.5 +2024-09-20 03:55:55,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=808720.0, ans=0.125 +2024-09-20 03:56:04,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=808720.0, ans=0.125 +2024-09-20 03:56:08,490 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.01 vs. limit=6.0 +2024-09-20 03:56:12,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=808760.0, ans=0.0 +2024-09-20 03:56:24,622 INFO [train.py:1198] (0/2) Epoch 45, batch 3100, loss[loss=0.2458, ctc_loss=0.1244, cr_loss=0.3748, attn_decoder_loss=0.2509, over 29251.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1101, cr_loss=0.3495, attn_decoder_loss=0.2379, over 5777083.28 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:56:35,119 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.756e+01 8.685e+01 9.291e+01 9.894e+01 1.991e+02, threshold=1.858e+02, percent-clipped=1.0 +2024-09-20 03:56:57,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=808880.0, ans=0.125 +2024-09-20 03:57:25,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=808960.0, ans=0.035 +2024-09-20 03:57:36,273 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=808960.0, ans=0.025 +2024-09-20 03:57:42,020 INFO [train.py:1198] (0/2) Epoch 45, batch 3150, loss[loss=0.2417, ctc_loss=0.1118, cr_loss=0.358, attn_decoder_loss=0.2482, over 28812.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1101, cr_loss=0.3496, attn_decoder_loss=0.2379, over 5783057.78 frames. ], batch size: 104, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:57:57,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=809040.0, ans=0.0 +2024-09-20 03:58:16,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=809080.0, ans=0.125 +2024-09-20 03:58:47,157 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.71 vs. limit=15.0 +2024-09-20 03:58:56,895 INFO [train.py:1198] (0/2) Epoch 45, batch 3200, loss[loss=0.2353, ctc_loss=0.1071, cr_loss=0.3366, attn_decoder_loss=0.2421, over 29431.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1099, cr_loss=0.3493, attn_decoder_loss=0.2376, over 5793410.18 frames. ], batch size: 79, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:59:01,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=809200.0, ans=0.0 +2024-09-20 03:59:07,504 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.301e+01 8.632e+01 9.218e+01 9.587e+01 1.920e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-20 03:59:16,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=809240.0, ans=0.0 +2024-09-20 03:59:38,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=809280.0, ans=0.0 +2024-09-20 03:59:45,026 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=809320.0, ans=0.125 +2024-09-20 03:59:52,420 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=809320.0, ans=0.125 +2024-09-20 04:00:07,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=809360.0, ans=0.125 +2024-09-20 04:00:14,541 INFO [train.py:1198] (0/2) Epoch 45, batch 3250, loss[loss=0.2414, ctc_loss=0.1125, cr_loss=0.3642, attn_decoder_loss=0.2476, over 29716.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1101, cr_loss=0.3501, attn_decoder_loss=0.238, over 5800206.74 frames. ], batch size: 84, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:00:14,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=809400.0, ans=0.0 +2024-09-20 04:00:16,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=809400.0, ans=0.125 +2024-09-20 04:00:32,854 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:00:39,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=809440.0, ans=0.0 +2024-09-20 04:00:59,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.22 vs. limit=15.0 +2024-09-20 04:01:08,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.83 vs. limit=10.0 +2024-09-20 04:01:11,678 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.54 vs. limit=10.0 +2024-09-20 04:01:12,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=809520.0, ans=0.125 +2024-09-20 04:01:20,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.96 vs. limit=6.0 +2024-09-20 04:01:32,415 INFO [train.py:1198] (0/2) Epoch 45, batch 3300, loss[loss=0.2406, ctc_loss=0.1133, cr_loss=0.3555, attn_decoder_loss=0.2469, over 28259.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1095, cr_loss=0.349, attn_decoder_loss=0.2369, over 5797535.38 frames. ], batch size: 111, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:01:38,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=809600.0, ans=10.0 +2024-09-20 04:01:40,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=809600.0, ans=0.0 +2024-09-20 04:01:42,963 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.585e+01 9.187e+01 9.677e+01 1.727e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-20 04:01:49,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=809640.0, ans=0.125 +2024-09-20 04:01:55,517 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=809640.0, ans=0.125 +2024-09-20 04:02:04,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=809680.0, ans=0.1 +2024-09-20 04:02:12,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=809680.0, ans=0.125 +2024-09-20 04:02:28,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=809720.0, ans=0.125 +2024-09-20 04:02:44,004 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.07 vs. limit=15.0 +2024-09-20 04:02:47,607 INFO [train.py:1198] (0/2) Epoch 45, batch 3350, loss[loss=0.2429, ctc_loss=0.1117, cr_loss=0.3456, attn_decoder_loss=0.2498, over 28774.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1103, cr_loss=0.3506, attn_decoder_loss=0.2378, over 5774750.33 frames. ], batch size: 104, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:02:52,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=809800.0, ans=0.1 +2024-09-20 04:03:14,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=809840.0, ans=0.1 +2024-09-20 04:03:18,683 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=809880.0, ans=0.1 +2024-09-20 04:03:26,568 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:04:05,666 INFO [train.py:1198] (0/2) Epoch 45, batch 3400, loss[loss=0.2091, ctc_loss=0.09998, cr_loss=0.3422, attn_decoder_loss=0.2136, over 29354.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1102, cr_loss=0.3501, attn_decoder_loss=0.2376, over 5768057.15 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:04:07,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=810000.0, ans=0.1 +2024-09-20 04:04:18,538 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.782e+01 9.254e+01 9.954e+01 2.335e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-20 04:04:33,057 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.22 vs. limit=15.0 +2024-09-20 04:05:11,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.62 vs. limit=15.0 +2024-09-20 04:05:23,107 INFO [train.py:1198] (0/2) Epoch 45, batch 3450, loss[loss=0.2506, ctc_loss=0.114, cr_loss=0.3571, attn_decoder_loss=0.2578, over 28326.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1103, cr_loss=0.3503, attn_decoder_loss=0.2379, over 5776330.98 frames. ], batch size: 111, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:05:39,263 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-20 04:05:41,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=810240.0, ans=0.125 +2024-09-20 04:05:46,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=810240.0, ans=0.09899494936611666 +2024-09-20 04:05:55,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=810280.0, ans=0.125 +2024-09-20 04:05:58,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=810280.0, ans=0.1 +2024-09-20 04:06:09,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.60 vs. limit=6.0 +2024-09-20 04:06:32,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=810360.0, ans=0.125 +2024-09-20 04:06:38,593 INFO [train.py:1198] (0/2) Epoch 45, batch 3500, loss[loss=0.2118, ctc_loss=0.09721, cr_loss=0.3205, attn_decoder_loss=0.2174, over 29327.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1101, cr_loss=0.3493, attn_decoder_loss=0.2375, over 5776730.58 frames. ], batch size: 71, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:06:49,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.777e+01 9.274e+01 9.867e+01 1.400e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 04:06:55,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=810440.0, ans=0.0 +2024-09-20 04:06:58,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=810440.0, ans=0.0 +2024-09-20 04:07:15,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=810480.0, ans=0.125 +2024-09-20 04:07:36,706 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.62 vs. limit=15.0 +2024-09-20 04:07:55,108 INFO [train.py:1198] (0/2) Epoch 45, batch 3550, loss[loss=0.2477, ctc_loss=0.1201, cr_loss=0.3678, attn_decoder_loss=0.2537, over 29684.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1099, cr_loss=0.3492, attn_decoder_loss=0.2376, over 5783317.18 frames. ], batch size: 89, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:07:56,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=810600.0, ans=0.125 +2024-09-20 04:08:01,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=810600.0, ans=0.0 +2024-09-20 04:08:10,071 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=810640.0, ans=0.125 +2024-09-20 04:08:18,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=810640.0, ans=0.0 +2024-09-20 04:08:22,799 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.48 vs. limit=22.5 +2024-09-20 04:08:29,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=810680.0, ans=0.0 +2024-09-20 04:08:37,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.60 vs. limit=6.0 +2024-09-20 04:08:43,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=15.0 +2024-09-20 04:09:00,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=810760.0, ans=0.2 +2024-09-20 04:09:02,161 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=810760.0, ans=0.025 +2024-09-20 04:09:02,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=810760.0, ans=0.1 +2024-09-20 04:09:10,806 INFO [train.py:1198] (0/2) Epoch 45, batch 3600, loss[loss=0.2085, ctc_loss=0.09187, cr_loss=0.2998, attn_decoder_loss=0.2148, over 29507.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1096, cr_loss=0.3485, attn_decoder_loss=0.2373, over 5792992.56 frames. ], batch size: 77, lr: 2.44e-03, grad_scale: 32.0 +2024-09-20 04:09:15,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=810800.0, ans=0.125 +2024-09-20 04:09:22,711 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.599e+01 9.272e+01 9.719e+01 1.680e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 04:09:55,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=810920.0, ans=0.1 +2024-09-20 04:10:10,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=810960.0, ans=0.125 +2024-09-20 04:10:24,865 INFO [train.py:1198] (0/2) Epoch 45, batch 3650, loss[loss=0.2479, ctc_loss=0.1188, cr_loss=0.3688, attn_decoder_loss=0.2541, over 29515.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1092, cr_loss=0.3476, attn_decoder_loss=0.2367, over 5794436.81 frames. ], batch size: 90, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:10:46,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=811040.0, ans=0.04949747468305833 +2024-09-20 04:10:48,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=811040.0, ans=0.1 +2024-09-20 04:10:56,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=811080.0, ans=0.1 +2024-09-20 04:10:59,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=811080.0, ans=0.2 +2024-09-20 04:11:05,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=811080.0, ans=0.0 +2024-09-20 04:11:06,251 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.35 vs. limit=22.5 +2024-09-20 04:11:20,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=811120.0, ans=0.1 +2024-09-20 04:11:27,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=811160.0, ans=0.2 +2024-09-20 04:11:39,703 INFO [train.py:1198] (0/2) Epoch 45, batch 3700, loss[loss=0.238, ctc_loss=0.1068, cr_loss=0.3399, attn_decoder_loss=0.245, over 29696.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1093, cr_loss=0.3476, attn_decoder_loss=0.2371, over 5804812.40 frames. ], batch size: 84, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:11:51,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.848e+01 9.366e+01 9.775e+01 1.224e+02, threshold=1.873e+02, percent-clipped=0.0 +2024-09-20 04:11:53,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=811240.0, ans=0.0 +2024-09-20 04:12:10,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=811280.0, ans=0.125 +2024-09-20 04:12:27,689 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=811320.0, ans=0.125 +2024-09-20 04:12:31,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=811320.0, ans=0.0 +2024-09-20 04:12:39,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=811360.0, ans=0.125 +2024-09-20 04:12:52,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.54 vs. limit=15.0 +2024-09-20 04:12:54,016 INFO [train.py:1198] (0/2) Epoch 45, batch 3750, loss[loss=0.2073, ctc_loss=0.09693, cr_loss=0.3101, attn_decoder_loss=0.2127, over 29297.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1094, cr_loss=0.3479, attn_decoder_loss=0.237, over 5809097.88 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:13:00,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=811400.0, ans=0.125 +2024-09-20 04:13:06,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=811400.0, ans=0.125 +2024-09-20 04:13:12,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=811440.0, ans=10.0 +2024-09-20 04:13:29,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=811480.0, ans=0.125 +2024-09-20 04:13:41,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.32 vs. limit=22.5 +2024-09-20 04:13:53,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=811560.0, ans=0.125 +2024-09-20 04:14:04,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=811560.0, ans=0.125 +2024-09-20 04:14:05,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=811560.0, ans=0.1 +2024-09-20 04:14:09,800 INFO [train.py:1198] (0/2) Epoch 45, batch 3800, loss[loss=0.246, ctc_loss=0.1116, cr_loss=0.345, attn_decoder_loss=0.2533, over 29624.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1088, cr_loss=0.3468, attn_decoder_loss=0.2366, over 5799778.91 frames. ], batch size: 86, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:14:21,612 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.556e+01 8.957e+01 9.574e+01 2.203e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-20 04:14:30,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=811640.0, ans=0.07 +2024-09-20 04:15:00,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=811720.0, ans=0.125 +2024-09-20 04:15:08,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=811720.0, ans=0.125 +2024-09-20 04:15:21,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=811760.0, ans=0.0 +2024-09-20 04:15:25,792 INFO [train.py:1198] (0/2) Epoch 45, batch 3850, loss[loss=0.2442, ctc_loss=0.1232, cr_loss=0.3752, attn_decoder_loss=0.2493, over 29250.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.3469, attn_decoder_loss=0.2368, over 5812194.21 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:15:36,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=811800.0, ans=0.0 +2024-09-20 04:15:40,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=811840.0, ans=0.0 +2024-09-20 04:15:51,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=811840.0, ans=0.125 +2024-09-20 04:15:55,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=811880.0, ans=0.0 +2024-09-20 04:15:57,950 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.05 vs. limit=10.0 +2024-09-20 04:16:00,313 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.27 vs. limit=12.0 +2024-09-20 04:16:11,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=811920.0, ans=0.2 +2024-09-20 04:16:32,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=811960.0, ans=0.125 +2024-09-20 04:16:40,296 INFO [train.py:1198] (0/2) Epoch 45, batch 3900, loss[loss=0.2369, ctc_loss=0.1125, cr_loss=0.3573, attn_decoder_loss=0.2427, over 29627.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1093, cr_loss=0.348, attn_decoder_loss=0.2372, over 5815900.16 frames. ], batch size: 86, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:16:52,122 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.943e+01 8.765e+01 9.119e+01 9.578e+01 1.365e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 04:17:13,080 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:17:23,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=812120.0, ans=0.0 +2024-09-20 04:17:43,878 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=812160.0, ans=0.125 +2024-09-20 04:17:46,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=812160.0, ans=0.05 +2024-09-20 04:17:54,036 INFO [train.py:1198] (0/2) Epoch 45, batch 3950, loss[loss=0.2413, ctc_loss=0.1133, cr_loss=0.3634, attn_decoder_loss=0.2475, over 29470.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.109, cr_loss=0.3476, attn_decoder_loss=0.2372, over 5835179.26 frames. ], batch size: 97, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:18:17,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.48 vs. limit=15.0 +2024-09-20 04:19:08,738 INFO [train.py:1198] (0/2) Epoch 45, batch 4000, loss[loss=0.2232, ctc_loss=0.1062, cr_loss=0.3362, attn_decoder_loss=0.2287, over 29535.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1092, cr_loss=0.3483, attn_decoder_loss=0.2373, over 5813142.82 frames. ], batch size: 74, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:19:11,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=812400.0, ans=0.125 +2024-09-20 04:19:11,874 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=812400.0, ans=0.125 +2024-09-20 04:19:13,337 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=812400.0, ans=0.2 +2024-09-20 04:19:21,819 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.441e+01 9.012e+01 9.623e+01 3.417e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 04:19:26,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=812440.0, ans=0.125 +2024-09-20 04:19:29,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.71 vs. limit=15.0 +2024-09-20 04:20:09,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=812560.0, ans=0.0 +2024-09-20 04:20:13,006 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.12 vs. limit=10.0 +2024-09-20 04:20:23,544 INFO [train.py:1198] (0/2) Epoch 45, batch 4050, loss[loss=0.2551, ctc_loss=0.1364, cr_loss=0.3795, attn_decoder_loss=0.2599, over 20715.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1094, cr_loss=0.3486, attn_decoder_loss=0.2374, over 5797479.81 frames. ], batch size: 209, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:20:39,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=812640.0, ans=0.125 +2024-09-20 04:20:45,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=812640.0, ans=0.125 +2024-09-20 04:21:05,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=812720.0, ans=0.1 +2024-09-20 04:21:11,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=812720.0, ans=0.125 +2024-09-20 04:21:33,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=812760.0, ans=0.125 +2024-09-20 04:21:35,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=812800.0, ans=0.0 +2024-09-20 04:21:36,999 INFO [train.py:1198] (0/2) Epoch 45, batch 4100, loss[loss=0.2432, ctc_loss=0.1201, cr_loss=0.38, attn_decoder_loss=0.2485, over 29508.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1095, cr_loss=0.349, attn_decoder_loss=0.2377, over 5792330.78 frames. ], batch size: 90, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:21:51,600 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.673e+01 9.305e+01 9.853e+01 2.008e+02, threshold=1.861e+02, percent-clipped=1.0 +2024-09-20 04:22:16,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=812880.0, ans=0.125 +2024-09-20 04:22:31,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=812920.0, ans=0.125 +2024-09-20 04:22:31,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=812920.0, ans=0.125 +2024-09-20 04:22:41,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=812960.0, ans=0.025 +2024-09-20 04:22:50,427 INFO [train.py:1198] (0/2) Epoch 45, batch 4150, loss[loss=0.2273, ctc_loss=0.1143, cr_loss=0.3697, attn_decoder_loss=0.2316, over 29528.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1094, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5797807.04 frames. ], batch size: 77, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:22:55,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=813000.0, ans=0.125 +2024-09-20 04:23:07,426 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.94 vs. limit=15.0 +2024-09-20 04:23:08,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=813040.0, ans=0.0 +2024-09-20 04:23:09,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=813040.0, ans=0.1 +2024-09-20 04:23:41,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=813120.0, ans=0.5 +2024-09-20 04:23:54,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=813160.0, ans=0.0 +2024-09-20 04:24:06,123 INFO [train.py:1198] (0/2) Epoch 45, batch 4200, loss[loss=0.2505, ctc_loss=0.1294, cr_loss=0.3986, attn_decoder_loss=0.2551, over 29495.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1093, cr_loss=0.3481, attn_decoder_loss=0.2375, over 5800542.81 frames. ], batch size: 90, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:24:20,925 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.773e+01 8.639e+01 8.983e+01 9.636e+01 1.465e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-20 04:24:40,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.17 vs. limit=10.0 +2024-09-20 04:25:00,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=813320.0, ans=0.1 +2024-09-20 04:25:04,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.94 vs. limit=15.0 +2024-09-20 04:25:12,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=813360.0, ans=0.125 +2024-09-20 04:25:13,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=813360.0, ans=0.1 +2024-09-20 04:25:13,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=813360.0, ans=0.0 +2024-09-20 04:25:14,386 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.23 vs. limit=10.0 +2024-09-20 04:25:19,311 INFO [train.py:1198] (0/2) Epoch 45, batch 4250, loss[loss=0.2129, ctc_loss=0.08413, cr_loss=0.2994, attn_decoder_loss=0.2205, over 29533.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.109, cr_loss=0.3478, attn_decoder_loss=0.2376, over 5806220.48 frames. ], batch size: 74, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:25:22,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=813400.0, ans=0.0 +2024-09-20 04:25:31,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=813400.0, ans=0.2 +2024-09-20 04:25:32,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=813440.0, ans=0.125 +2024-09-20 04:25:33,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=813440.0, ans=0.0 +2024-09-20 04:25:36,790 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=813440.0, ans=0.125 +2024-09-20 04:25:45,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=813440.0, ans=0.0 +2024-09-20 04:26:03,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.60 vs. limit=15.0 +2024-09-20 04:26:06,783 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.69 vs. limit=10.0 +2024-09-20 04:26:09,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=813520.0, ans=10.0 +2024-09-20 04:26:14,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=813520.0, ans=0.025 +2024-09-20 04:26:22,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=813560.0, ans=0.04949747468305833 +2024-09-20 04:26:26,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=813560.0, ans=0.1 +2024-09-20 04:26:29,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=813560.0, ans=0.0 +2024-09-20 04:26:32,857 INFO [train.py:1198] (0/2) Epoch 45, batch 4300, loss[loss=0.231, ctc_loss=0.1076, cr_loss=0.3264, attn_decoder_loss=0.2374, over 29519.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1092, cr_loss=0.3483, attn_decoder_loss=0.238, over 5795193.89 frames. ], batch size: 87, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:26:42,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=813600.0, ans=0.1 +2024-09-20 04:26:43,811 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.53 vs. limit=15.0 +2024-09-20 04:26:47,737 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 8.880e+01 9.464e+01 1.001e+02 2.468e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-20 04:26:49,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=813640.0, ans=0.125 +2024-09-20 04:27:34,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=813760.0, ans=0.1 +2024-09-20 04:27:36,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.32 vs. limit=15.0 +2024-09-20 04:27:39,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=813760.0, ans=0.0 +2024-09-20 04:27:48,500 INFO [train.py:1198] (0/2) Epoch 45, batch 4350, loss[loss=0.2537, ctc_loss=0.1315, cr_loss=0.3866, attn_decoder_loss=0.2586, over 29436.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1117, cr_loss=0.3539, attn_decoder_loss=0.241, over 5797797.70 frames. ], batch size: 97, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:28:13,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=813840.0, ans=0.0 +2024-09-20 04:28:25,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=813880.0, ans=0.125 +2024-09-20 04:28:47,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=813960.0, ans=0.125 +2024-09-20 04:28:47,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.61 vs. limit=15.0 +2024-09-20 04:29:01,436 INFO [train.py:1198] (0/2) Epoch 45, batch 4400, loss[loss=0.2394, ctc_loss=0.1166, cr_loss=0.369, attn_decoder_loss=0.2448, over 27438.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1129, cr_loss=0.356, attn_decoder_loss=0.2429, over 5769621.48 frames. ], batch size: 125, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:29:04,963 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.30 vs. limit=15.0 +2024-09-20 04:29:11,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=814000.0, ans=0.1 +2024-09-20 04:29:15,735 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.358e+01 9.080e+01 9.421e+01 9.945e+01 1.972e+02, threshold=1.884e+02, percent-clipped=1.0 +2024-09-20 04:29:26,246 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=814040.0, ans=0.125 +2024-09-20 04:29:27,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=814040.0, ans=0.125 +2024-09-20 04:30:15,985 INFO [train.py:1198] (0/2) Epoch 45, batch 4450, loss[loss=0.2485, ctc_loss=0.1401, cr_loss=0.404, attn_decoder_loss=0.2516, over 19866.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1161, cr_loss=0.3613, attn_decoder_loss=0.2449, over 5581693.95 frames. ], batch size: 210, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:30:20,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=814200.0, ans=0.125 +2024-09-20 04:30:31,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=814240.0, ans=0.025 +2024-09-20 04:30:37,134 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=814240.0, ans=0.1 +2024-09-20 04:30:40,542 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.90 vs. limit=15.0 +2024-09-20 04:30:49,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=814280.0, ans=0.125 +2024-09-20 04:31:11,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=814320.0, ans=0.2 +2024-09-20 04:31:15,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=814360.0, ans=0.125 +2024-09-20 04:31:21,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=814360.0, ans=0.025 +2024-09-20 04:31:24,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=814360.0, ans=0.05 +2024-09-20 04:31:31,833 INFO [train.py:1198] (0/2) Epoch 45, batch 4500, loss[loss=0.2495, ctc_loss=0.1297, cr_loss=0.3818, attn_decoder_loss=0.2544, over 19585.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1189, cr_loss=0.3637, attn_decoder_loss=0.2466, over 5238162.55 frames. ], batch size: 209, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:31:39,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=814400.0, ans=0.125 +2024-09-20 04:31:46,992 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:31:48,022 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.758e+01 1.032e+02 1.137e+02 1.254e+02 4.078e+02, threshold=2.275e+02, percent-clipped=1.0 +2024-09-20 04:32:08,830 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-45.pt +2024-09-20 04:32:47,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.01 vs. limit=15.0 +2024-09-20 04:32:47,514 INFO [train.py:1198] (0/2) Epoch 46, batch 0, loss[loss=0.2135, ctc_loss=0.0987, cr_loss=0.3329, attn_decoder_loss=0.2188, over 29619.00 frames. ], tot_loss[loss=0.2135, ctc_loss=0.0987, cr_loss=0.3329, attn_decoder_loss=0.2188, over 29619.00 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:32:47,515 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 04:33:04,846 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.2.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([5.5135, 5.3068, 5.1238, 4.7433], device='cuda:0') +2024-09-20 04:33:07,329 INFO [train.py:1230] (0/2) Epoch 46, validation: loss=0.2132, ctc_loss=0.03625, cr_loss=6.411e-15, attn_decoder_loss=0.2328, over 944034.00 frames. +2024-09-20 04:33:07,329 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 04:33:09,149 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=814500.0, ans=0.0 +2024-09-20 04:33:12,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=814500.0, ans=0.125 +2024-09-20 04:33:16,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=814500.0, ans=0.125 +2024-09-20 04:33:18,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=814500.0, ans=0.2 +2024-09-20 04:33:36,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=814580.0, ans=0.125 +2024-09-20 04:33:45,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=814580.0, ans=0.2 +2024-09-20 04:33:50,315 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.01 vs. limit=15.0 +2024-09-20 04:33:51,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.75 vs. limit=12.0 +2024-09-20 04:34:24,686 INFO [train.py:1198] (0/2) Epoch 46, batch 50, loss[loss=0.2025, ctc_loss=0.09474, cr_loss=0.3192, attn_decoder_loss=0.2074, over 29409.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1114, cr_loss=0.3504, attn_decoder_loss=0.2382, over 1268269.42 frames. ], batch size: 70, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:35:19,989 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.869e+01 8.768e+01 9.324e+01 1.041e+02 2.439e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-20 04:35:26,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=814860.0, ans=0.0 +2024-09-20 04:35:41,095 INFO [train.py:1198] (0/2) Epoch 46, batch 100, loss[loss=0.221, ctc_loss=0.1046, cr_loss=0.3381, attn_decoder_loss=0.2264, over 29548.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1126, cr_loss=0.3537, attn_decoder_loss=0.24, over 2253032.30 frames. ], batch size: 76, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:35:42,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=814900.0, ans=0.125 +2024-09-20 04:35:52,705 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.96 vs. limit=12.0 +2024-09-20 04:35:59,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=814940.0, ans=0.125 +2024-09-20 04:36:00,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=814940.0, ans=0.0 +2024-09-20 04:36:21,627 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=814980.0, ans=0.125 +2024-09-20 04:36:35,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.48 vs. limit=15.0 +2024-09-20 04:36:36,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=815020.0, ans=0.2 +2024-09-20 04:36:55,412 INFO [train.py:1198] (0/2) Epoch 46, batch 150, loss[loss=0.2052, ctc_loss=0.09145, cr_loss=0.3075, attn_decoder_loss=0.211, over 29418.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1099, cr_loss=0.3488, attn_decoder_loss=0.2376, over 3047719.32 frames. ], batch size: 70, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:37:09,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=815140.0, ans=0.0 +2024-09-20 04:37:51,970 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.524e+01 8.420e+01 9.019e+01 9.584e+01 1.300e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 04:37:55,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=815220.0, ans=0.125 +2024-09-20 04:38:01,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=815260.0, ans=0.125 +2024-09-20 04:38:12,905 INFO [train.py:1198] (0/2) Epoch 46, batch 200, loss[loss=0.2384, ctc_loss=0.116, cr_loss=0.3758, attn_decoder_loss=0.2436, over 27307.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1094, cr_loss=0.3481, attn_decoder_loss=0.2372, over 3660184.39 frames. ], batch size: 124, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:38:16,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.31 vs. limit=12.0 +2024-09-20 04:38:20,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=815300.0, ans=0.125 +2024-09-20 04:38:26,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=815340.0, ans=0.125 +2024-09-20 04:38:32,666 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=815340.0, ans=0.0 +2024-09-20 04:38:34,823 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.37 vs. limit=6.0 +2024-09-20 04:38:49,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=815380.0, ans=0.125 +2024-09-20 04:38:52,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=815380.0, ans=0.125 +2024-09-20 04:38:53,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=815380.0, ans=0.125 +2024-09-20 04:39:16,316 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=15.0 +2024-09-20 04:39:21,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=815460.0, ans=0.04949747468305833 +2024-09-20 04:39:30,377 INFO [train.py:1198] (0/2) Epoch 46, batch 250, loss[loss=0.2511, ctc_loss=0.126, cr_loss=0.3759, attn_decoder_loss=0.2566, over 29262.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1086, cr_loss=0.3467, attn_decoder_loss=0.2368, over 4141981.70 frames. ], batch size: 100, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:39:33,716 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=815500.0, ans=0.025 +2024-09-20 04:39:37,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.50 vs. limit=15.0 +2024-09-20 04:39:47,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=815540.0, ans=0.05 +2024-09-20 04:40:02,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=815580.0, ans=15.0 +2024-09-20 04:40:06,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=815580.0, ans=0.1 +2024-09-20 04:40:11,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=815580.0, ans=0.125 +2024-09-20 04:40:17,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=815620.0, ans=0.0 +2024-09-20 04:40:24,347 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.615e+01 9.020e+01 9.569e+01 1.385e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 04:40:45,339 INFO [train.py:1198] (0/2) Epoch 46, batch 300, loss[loss=0.2549, ctc_loss=0.133, cr_loss=0.3967, attn_decoder_loss=0.2596, over 29520.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.2369, over 4512202.25 frames. ], batch size: 92, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:40:45,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=815700.0, ans=0.125 +2024-09-20 04:40:59,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=815740.0, ans=0.1 +2024-09-20 04:41:08,700 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.07 vs. limit=15.0 +2024-09-20 04:41:09,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=815740.0, ans=0.1 +2024-09-20 04:42:02,790 INFO [train.py:1198] (0/2) Epoch 46, batch 350, loss[loss=0.2118, ctc_loss=0.08988, cr_loss=0.3069, attn_decoder_loss=0.2186, over 29328.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1092, cr_loss=0.348, attn_decoder_loss=0.2378, over 4797694.01 frames. ], batch size: 71, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:42:08,551 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.29 vs. limit=22.5 +2024-09-20 04:42:09,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=815900.0, ans=0.05 +2024-09-20 04:42:09,562 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-20 04:42:25,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.54 vs. limit=15.0 +2024-09-20 04:42:37,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=815980.0, ans=0.1 +2024-09-20 04:42:39,132 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-204000.pt +2024-09-20 04:42:52,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=815980.0, ans=0.0 +2024-09-20 04:42:56,482 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.32 vs. limit=15.0 +2024-09-20 04:43:04,444 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.751e+01 9.027e+01 9.740e+01 2.091e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-20 04:43:13,818 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=816060.0, ans=0.125 +2024-09-20 04:43:17,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=816060.0, ans=0.125 +2024-09-20 04:43:27,883 INFO [train.py:1198] (0/2) Epoch 46, batch 400, loss[loss=0.2409, ctc_loss=0.1145, cr_loss=0.3577, attn_decoder_loss=0.247, over 29702.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1085, cr_loss=0.3462, attn_decoder_loss=0.2372, over 5027050.34 frames. ], batch size: 82, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 04:43:32,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=816100.0, ans=0.125 +2024-09-20 04:43:47,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=816140.0, ans=0.5 +2024-09-20 04:43:49,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=816140.0, ans=0.2 +2024-09-20 04:44:12,229 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=816220.0, ans=0.125 +2024-09-20 04:44:15,667 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=816220.0, ans=15.0 +2024-09-20 04:44:32,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=816260.0, ans=0.0 +2024-09-20 04:44:33,698 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=816260.0, ans=0.025 +2024-09-20 04:44:36,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=816260.0, ans=0.125 +2024-09-20 04:44:43,873 INFO [train.py:1198] (0/2) Epoch 46, batch 450, loss[loss=0.2432, ctc_loss=0.121, cr_loss=0.3785, attn_decoder_loss=0.2484, over 29707.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1088, cr_loss=0.3469, attn_decoder_loss=0.2376, over 5189556.49 frames. ], batch size: 83, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:44:59,388 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=816340.0, ans=0.0 +2024-09-20 04:45:11,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=816340.0, ans=0.0 +2024-09-20 04:45:19,680 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.08 vs. limit=22.5 +2024-09-20 04:45:40,120 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.987e+01 8.630e+01 9.037e+01 9.631e+01 6.120e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-20 04:45:43,688 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:45:58,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.01 vs. limit=6.0 +2024-09-20 04:46:02,236 INFO [train.py:1198] (0/2) Epoch 46, batch 500, loss[loss=0.2487, ctc_loss=0.1271, cr_loss=0.4, attn_decoder_loss=0.2533, over 29425.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1079, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5331717.19 frames. ], batch size: 94, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:46:06,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=816500.0, ans=0.015 +2024-09-20 04:46:32,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=816580.0, ans=0.2 +2024-09-20 04:46:35,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=816580.0, ans=0.0 +2024-09-20 04:46:37,158 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=816580.0, ans=0.0 +2024-09-20 04:46:53,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.44 vs. limit=22.5 +2024-09-20 04:47:01,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=816660.0, ans=0.1 +2024-09-20 04:47:20,144 INFO [train.py:1198] (0/2) Epoch 46, batch 550, loss[loss=0.2445, ctc_loss=0.1162, cr_loss=0.3583, attn_decoder_loss=0.2507, over 28832.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.108, cr_loss=0.3454, attn_decoder_loss=0.2363, over 5423114.66 frames. ], batch size: 104, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:47:28,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=816700.0, ans=0.1 +2024-09-20 04:47:31,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=816700.0, ans=0.07 +2024-09-20 04:47:32,537 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=816700.0, ans=0.0 +2024-09-20 04:47:37,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=816740.0, ans=0.2 +2024-09-20 04:47:38,524 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=816740.0, ans=0.125 +2024-09-20 04:47:40,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=816740.0, ans=0.0 +2024-09-20 04:47:47,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=816740.0, ans=0.2 +2024-09-20 04:47:57,591 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.05 vs. limit=15.0 +2024-09-20 04:48:16,368 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.351e+01 8.519e+01 9.115e+01 9.608e+01 2.263e+02, threshold=1.823e+02, percent-clipped=2.0 +2024-09-20 04:48:33,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=816860.0, ans=0.1 +2024-09-20 04:48:36,083 INFO [train.py:1198] (0/2) Epoch 46, batch 600, loss[loss=0.2451, ctc_loss=0.1222, cr_loss=0.3752, attn_decoder_loss=0.2504, over 29303.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.108, cr_loss=0.3455, attn_decoder_loss=0.2364, over 5509249.85 frames. ], batch size: 100, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:48:45,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=816900.0, ans=0.125 +2024-09-20 04:48:47,574 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.12 vs. limit=15.0 +2024-09-20 04:49:09,314 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=816980.0, ans=0.025 +2024-09-20 04:49:10,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=816980.0, ans=0.025 +2024-09-20 04:49:13,130 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.37 vs. limit=22.5 +2024-09-20 04:49:49,610 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.42 vs. limit=15.0 +2024-09-20 04:49:53,434 INFO [train.py:1198] (0/2) Epoch 46, batch 650, loss[loss=0.232, ctc_loss=0.1035, cr_loss=0.3241, attn_decoder_loss=0.2391, over 29737.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1072, cr_loss=0.3444, attn_decoder_loss=0.236, over 5586977.24 frames. ], batch size: 81, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:50:11,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=817140.0, ans=0.125 +2024-09-20 04:50:15,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-20 04:50:16,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=817140.0, ans=0.2 +2024-09-20 04:50:16,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=817140.0, ans=0.07 +2024-09-20 04:50:23,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=817180.0, ans=0.125 +2024-09-20 04:50:24,075 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=817180.0, ans=0.0 +2024-09-20 04:50:36,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=817180.0, ans=0.125 +2024-09-20 04:50:45,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.90 vs. limit=15.0 +2024-09-20 04:50:49,216 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.319e+01 8.831e+01 9.492e+01 1.301e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-20 04:51:08,832 INFO [train.py:1198] (0/2) Epoch 46, batch 700, loss[loss=0.2263, ctc_loss=0.1081, cr_loss=0.349, attn_decoder_loss=0.2316, over 29540.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3455, attn_decoder_loss=0.2366, over 5638544.29 frames. ], batch size: 76, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:51:13,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=817300.0, ans=0.0 +2024-09-20 04:51:30,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.79 vs. limit=15.0 +2024-09-20 04:51:48,713 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.95 vs. limit=22.5 +2024-09-20 04:51:49,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=817380.0, ans=0.125 +2024-09-20 04:52:04,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=817420.0, ans=0.0 +2024-09-20 04:52:10,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=817460.0, ans=0.125 +2024-09-20 04:52:27,358 INFO [train.py:1198] (0/2) Epoch 46, batch 750, loss[loss=0.2313, ctc_loss=0.1005, cr_loss=0.3292, attn_decoder_loss=0.2385, over 29722.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1075, cr_loss=0.3448, attn_decoder_loss=0.2361, over 5676713.41 frames. ], batch size: 82, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:52:33,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=817500.0, ans=0.125 +2024-09-20 04:52:34,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.18 vs. limit=15.0 +2024-09-20 04:52:44,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=817540.0, ans=0.125 +2024-09-20 04:53:10,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=817580.0, ans=0.0 +2024-09-20 04:53:23,361 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.270e+01 8.548e+01 9.089e+01 9.698e+01 1.282e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-20 04:53:24,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.33 vs. limit=22.5 +2024-09-20 04:53:40,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=817660.0, ans=0.125 +2024-09-20 04:53:43,746 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=817700.0, ans=0.125 +2024-09-20 04:53:44,978 INFO [train.py:1198] (0/2) Epoch 46, batch 800, loss[loss=0.2123, ctc_loss=0.0932, cr_loss=0.3111, attn_decoder_loss=0.2186, over 29613.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1077, cr_loss=0.3454, attn_decoder_loss=0.2361, over 5706339.64 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 04:53:52,136 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.85 vs. limit=12.0 +2024-09-20 04:53:58,036 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.86 vs. limit=15.0 +2024-09-20 04:53:58,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=817740.0, ans=0.5 +2024-09-20 04:54:03,890 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.97 vs. limit=15.0 +2024-09-20 04:54:19,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=817780.0, ans=0.125 +2024-09-20 04:54:23,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.04 vs. limit=6.0 +2024-09-20 04:54:27,858 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.01 vs. limit=15.0 +2024-09-20 04:54:38,161 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:54:40,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=817820.0, ans=0.125 +2024-09-20 04:55:00,147 INFO [train.py:1198] (0/2) Epoch 46, batch 850, loss[loss=0.2422, ctc_loss=0.1159, cr_loss=0.3637, attn_decoder_loss=0.2482, over 29723.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1074, cr_loss=0.3449, attn_decoder_loss=0.2358, over 5734901.00 frames. ], batch size: 89, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:55:11,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.25 vs. limit=22.5 +2024-09-20 04:55:13,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=817940.0, ans=0.125 +2024-09-20 04:55:19,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=817940.0, ans=0.0 +2024-09-20 04:55:26,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=817940.0, ans=0.1 +2024-09-20 04:55:29,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.63 vs. limit=15.0 +2024-09-20 04:55:36,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=817980.0, ans=0.125 +2024-09-20 04:55:45,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=817980.0, ans=0.0 +2024-09-20 04:55:52,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=818020.0, ans=0.0 +2024-09-20 04:55:57,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=818020.0, ans=0.0 +2024-09-20 04:55:59,849 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.524e+01 9.066e+01 9.505e+01 2.667e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 04:56:18,041 INFO [train.py:1198] (0/2) Epoch 46, batch 900, loss[loss=0.2131, ctc_loss=0.08737, cr_loss=0.2951, attn_decoder_loss=0.2205, over 29600.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1078, cr_loss=0.3455, attn_decoder_loss=0.2365, over 5739987.75 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:56:33,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=818140.0, ans=0.125 +2024-09-20 04:56:57,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=818180.0, ans=0.035 +2024-09-20 04:57:18,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=818260.0, ans=0.125 +2024-09-20 04:57:19,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=818260.0, ans=0.0 +2024-09-20 04:57:30,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=818260.0, ans=0.125 +2024-09-20 04:57:34,894 INFO [train.py:1198] (0/2) Epoch 46, batch 950, loss[loss=0.2148, ctc_loss=0.08928, cr_loss=0.2967, attn_decoder_loss=0.2222, over 29524.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.108, cr_loss=0.3455, attn_decoder_loss=0.2368, over 5742962.42 frames. ], batch size: 74, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:57:42,599 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:57:44,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=818300.0, ans=0.0 +2024-09-20 04:58:09,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=818380.0, ans=0.0 +2024-09-20 04:58:32,435 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.692e+01 9.271e+01 9.926e+01 1.686e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 04:58:49,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=818500.0, ans=0.5 +2024-09-20 04:58:50,240 INFO [train.py:1198] (0/2) Epoch 46, batch 1000, loss[loss=0.2208, ctc_loss=0.09965, cr_loss=0.3393, attn_decoder_loss=0.2267, over 29508.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1086, cr_loss=0.3464, attn_decoder_loss=0.2375, over 5736594.08 frames. ], batch size: 77, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:59:54,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=818660.0, ans=0.05 +2024-09-20 05:00:07,746 INFO [train.py:1198] (0/2) Epoch 46, batch 1050, loss[loss=0.2407, ctc_loss=0.1176, cr_loss=0.3665, attn_decoder_loss=0.2462, over 29658.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1084, cr_loss=0.3459, attn_decoder_loss=0.237, over 5743532.19 frames. ], batch size: 85, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:00:15,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=818700.0, ans=0.1 +2024-09-20 05:00:17,339 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.81 vs. limit=12.0 +2024-09-20 05:00:37,492 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.53 vs. limit=15.0 +2024-09-20 05:01:05,428 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.267e+01 8.722e+01 9.094e+01 9.715e+01 1.593e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 05:01:11,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=818860.0, ans=0.2 +2024-09-20 05:01:25,796 INFO [train.py:1198] (0/2) Epoch 46, batch 1100, loss[loss=0.232, ctc_loss=0.1095, cr_loss=0.3682, attn_decoder_loss=0.2375, over 29469.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1085, cr_loss=0.3466, attn_decoder_loss=0.2371, over 5755735.45 frames. ], batch size: 78, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:01:36,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=818900.0, ans=0.125 +2024-09-20 05:01:47,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=818940.0, ans=0.2 +2024-09-20 05:01:51,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=818940.0, ans=0.025 +2024-09-20 05:01:56,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=818980.0, ans=0.125 +2024-09-20 05:02:02,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=818980.0, ans=0.0 +2024-09-20 05:02:05,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=818980.0, ans=0.125 +2024-09-20 05:02:19,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=819020.0, ans=0.0 +2024-09-20 05:02:41,364 INFO [train.py:1198] (0/2) Epoch 46, batch 1150, loss[loss=0.2247, ctc_loss=0.1119, cr_loss=0.3543, attn_decoder_loss=0.2294, over 29433.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1086, cr_loss=0.3462, attn_decoder_loss=0.2368, over 5754864.75 frames. ], batch size: 78, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:02:51,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=819100.0, ans=0.0 +2024-09-20 05:03:17,616 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=819180.0, ans=0.125 +2024-09-20 05:03:17,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=819180.0, ans=0.125 +2024-09-20 05:03:35,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.85 vs. limit=12.0 +2024-09-20 05:03:41,726 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.675e+01 9.165e+01 9.732e+01 5.471e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-20 05:03:59,540 INFO [train.py:1198] (0/2) Epoch 46, batch 1200, loss[loss=0.2251, ctc_loss=0.09975, cr_loss=0.3244, attn_decoder_loss=0.2318, over 29703.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1087, cr_loss=0.3467, attn_decoder_loss=0.2373, over 5748693.47 frames. ], batch size: 85, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 05:04:17,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=819340.0, ans=0.0 +2024-09-20 05:04:37,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=819380.0, ans=0.1 +2024-09-20 05:05:02,015 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=819460.0, ans=0.0 +2024-09-20 05:05:08,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=819460.0, ans=0.1 +2024-09-20 05:05:17,174 INFO [train.py:1198] (0/2) Epoch 46, batch 1250, loss[loss=0.2583, ctc_loss=0.1331, cr_loss=0.4009, attn_decoder_loss=0.2633, over 29487.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1096, cr_loss=0.3486, attn_decoder_loss=0.2382, over 5775356.27 frames. ], batch size: 92, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:05:53,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=819580.0, ans=0.125 +2024-09-20 05:06:16,052 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.294e+01 8.479e+01 9.052e+01 9.530e+01 1.493e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 05:06:32,583 INFO [train.py:1198] (0/2) Epoch 46, batch 1300, loss[loss=0.2508, ctc_loss=0.1151, cr_loss=0.3614, attn_decoder_loss=0.2578, over 28240.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1092, cr_loss=0.3477, attn_decoder_loss=0.2376, over 5778317.20 frames. ], batch size: 111, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:06:40,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=819700.0, ans=0.0 +2024-09-20 05:06:44,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=819700.0, ans=0.0 +2024-09-20 05:07:01,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=819780.0, ans=10.0 +2024-09-20 05:07:14,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=819780.0, ans=0.1 +2024-09-20 05:07:15,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=819780.0, ans=0.125 +2024-09-20 05:07:18,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.76 vs. limit=15.0 +2024-09-20 05:07:20,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=819820.0, ans=0.0 +2024-09-20 05:07:29,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=819820.0, ans=0.0 +2024-09-20 05:07:40,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=819860.0, ans=0.125 +2024-09-20 05:07:41,025 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-20 05:07:49,800 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.97 vs. limit=6.0 +2024-09-20 05:07:50,558 INFO [train.py:1198] (0/2) Epoch 46, batch 1350, loss[loss=0.2356, ctc_loss=0.1143, cr_loss=0.3571, attn_decoder_loss=0.2412, over 29752.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1086, cr_loss=0.3472, attn_decoder_loss=0.2372, over 5795096.60 frames. ], batch size: 81, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:07:58,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=819900.0, ans=0.1 +2024-09-20 05:08:19,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=819980.0, ans=0.125 +2024-09-20 05:08:32,361 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn1.whiten.whitening_limit, batch_count=819980.0, ans=22.5 +2024-09-20 05:08:49,306 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.548e+01 8.548e+01 9.009e+01 9.440e+01 1.283e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 05:08:51,448 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.64 vs. limit=15.0 +2024-09-20 05:08:54,268 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:09:05,332 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:09:06,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=820100.0, ans=0.05 +2024-09-20 05:09:08,086 INFO [train.py:1198] (0/2) Epoch 46, batch 1400, loss[loss=0.1997, ctc_loss=0.08563, cr_loss=0.2873, attn_decoder_loss=0.2059, over 29591.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1082, cr_loss=0.346, attn_decoder_loss=0.2369, over 5806080.97 frames. ], batch size: 69, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:09:17,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=820100.0, ans=0.0 +2024-09-20 05:09:18,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=820100.0, ans=0.125 +2024-09-20 05:09:21,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.17 vs. limit=15.0 +2024-09-20 05:09:35,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=820140.0, ans=0.1 +2024-09-20 05:09:36,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=820180.0, ans=0.0 +2024-09-20 05:09:37,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=820180.0, ans=0.125 +2024-09-20 05:09:38,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=820180.0, ans=0.025 +2024-09-20 05:09:47,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=820180.0, ans=0.125 +2024-09-20 05:09:51,253 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.55 vs. limit=15.0 +2024-09-20 05:09:51,375 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.59 vs. limit=15.0 +2024-09-20 05:09:55,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=820220.0, ans=0.0 +2024-09-20 05:10:20,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=820260.0, ans=0.2 +2024-09-20 05:10:23,185 INFO [train.py:1198] (0/2) Epoch 46, batch 1450, loss[loss=0.2482, ctc_loss=0.1225, cr_loss=0.3836, attn_decoder_loss=0.2537, over 29431.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1082, cr_loss=0.3463, attn_decoder_loss=0.2371, over 5802956.93 frames. ], batch size: 94, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:10:27,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=820300.0, ans=0.0 +2024-09-20 05:10:35,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=820300.0, ans=0.0 +2024-09-20 05:10:41,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=820340.0, ans=0.1 +2024-09-20 05:10:46,220 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=820340.0, ans=0.0 +2024-09-20 05:11:03,244 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.13 vs. limit=22.5 +2024-09-20 05:11:10,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=820420.0, ans=0.125 +2024-09-20 05:11:20,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.86 vs. limit=22.5 +2024-09-20 05:11:23,832 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.716e+01 9.154e+01 9.658e+01 1.732e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 05:11:24,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=820460.0, ans=0.07 +2024-09-20 05:11:26,254 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.12 vs. limit=15.0 +2024-09-20 05:11:40,539 INFO [train.py:1198] (0/2) Epoch 46, batch 1500, loss[loss=0.2429, ctc_loss=0.1172, cr_loss=0.3587, attn_decoder_loss=0.2489, over 29619.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.109, cr_loss=0.3482, attn_decoder_loss=0.2378, over 5804043.11 frames. ], batch size: 86, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:11:56,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=820540.0, ans=0.5 +2024-09-20 05:11:58,144 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.76 vs. limit=22.5 +2024-09-20 05:11:59,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=820540.0, ans=0.0 +2024-09-20 05:12:09,848 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=820580.0, ans=0.125 +2024-09-20 05:12:11,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=820580.0, ans=0.125 +2024-09-20 05:12:40,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=820660.0, ans=15.0 +2024-09-20 05:12:41,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=820660.0, ans=0.0 +2024-09-20 05:12:44,745 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=820660.0, ans=0.2 +2024-09-20 05:12:57,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=820700.0, ans=0.0 +2024-09-20 05:12:58,449 INFO [train.py:1198] (0/2) Epoch 46, batch 1550, loss[loss=0.2467, ctc_loss=0.1291, cr_loss=0.3944, attn_decoder_loss=0.251, over 29499.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1095, cr_loss=0.3492, attn_decoder_loss=0.2379, over 5780323.36 frames. ], batch size: 90, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:13:17,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=820740.0, ans=22.5 +2024-09-20 05:13:18,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.23 vs. limit=15.0 +2024-09-20 05:13:20,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.03 vs. limit=12.0 +2024-09-20 05:13:27,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=820780.0, ans=0.025 +2024-09-20 05:13:31,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=820780.0, ans=0.125 +2024-09-20 05:13:56,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=820820.0, ans=0.0 +2024-09-20 05:13:57,690 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.852e+01 8.666e+01 9.165e+01 9.955e+01 1.733e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-20 05:14:05,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=820860.0, ans=0.125 +2024-09-20 05:14:14,135 INFO [train.py:1198] (0/2) Epoch 46, batch 1600, loss[loss=0.2353, ctc_loss=0.1091, cr_loss=0.3541, attn_decoder_loss=0.2415, over 29670.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1093, cr_loss=0.3481, attn_decoder_loss=0.2374, over 5763542.32 frames. ], batch size: 85, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:14:38,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=820940.0, ans=0.125 +2024-09-20 05:15:10,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=821020.0, ans=0.1 +2024-09-20 05:15:22,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=821060.0, ans=0.95 +2024-09-20 05:15:31,302 INFO [train.py:1198] (0/2) Epoch 46, batch 1650, loss[loss=0.2287, ctc_loss=0.09893, cr_loss=0.338, attn_decoder_loss=0.2356, over 29693.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1092, cr_loss=0.348, attn_decoder_loss=0.2375, over 5756542.63 frames. ], batch size: 89, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:15:33,900 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.14 vs. limit=10.0 +2024-09-20 05:15:39,307 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:15:49,402 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=821140.0, ans=0.07 +2024-09-20 05:15:49,935 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.93 vs. limit=10.0 +2024-09-20 05:16:31,206 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.592e+01 9.131e+01 9.784e+01 1.419e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 05:16:48,221 INFO [train.py:1198] (0/2) Epoch 46, batch 1700, loss[loss=0.2074, ctc_loss=0.0965, cr_loss=0.3345, attn_decoder_loss=0.2123, over 29581.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3475, attn_decoder_loss=0.237, over 5778876.77 frames. ], batch size: 69, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:16:54,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=821300.0, ans=0.0 +2024-09-20 05:17:00,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=821300.0, ans=0.2 +2024-09-20 05:17:08,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=821340.0, ans=0.1 +2024-09-20 05:17:11,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=821340.0, ans=0.0 +2024-09-20 05:17:18,906 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.91 vs. limit=15.0 +2024-09-20 05:17:19,171 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.80 vs. limit=15.0 +2024-09-20 05:17:30,491 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=821380.0, ans=0.0 +2024-09-20 05:17:39,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=821420.0, ans=0.0 +2024-09-20 05:17:40,742 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=821420.0, ans=0.125 +2024-09-20 05:17:50,481 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.31 vs. limit=15.0 +2024-09-20 05:18:00,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=821460.0, ans=0.04949747468305833 +2024-09-20 05:18:03,233 INFO [train.py:1198] (0/2) Epoch 46, batch 1750, loss[loss=0.2053, ctc_loss=0.08617, cr_loss=0.2837, attn_decoder_loss=0.2123, over 29351.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1083, cr_loss=0.346, attn_decoder_loss=0.2367, over 5786803.38 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:18:11,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.02 vs. limit=15.0 +2024-09-20 05:19:06,023 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.095e+01 8.682e+01 9.175e+01 9.617e+01 1.208e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 05:19:13,705 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=821660.0, ans=0.0 +2024-09-20 05:19:16,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=821660.0, ans=0.125 +2024-09-20 05:19:20,743 INFO [train.py:1198] (0/2) Epoch 46, batch 1800, loss[loss=0.2334, ctc_loss=0.1119, cr_loss=0.3409, attn_decoder_loss=0.2393, over 29671.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1086, cr_loss=0.3467, attn_decoder_loss=0.2369, over 5790404.03 frames. ], batch size: 83, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:19:43,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.05 vs. limit=15.0 +2024-09-20 05:20:06,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=821820.0, ans=0.1 +2024-09-20 05:20:18,306 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=821820.0, ans=0.0 +2024-09-20 05:20:38,155 INFO [train.py:1198] (0/2) Epoch 46, batch 1850, loss[loss=0.2324, ctc_loss=0.1036, cr_loss=0.3256, attn_decoder_loss=0.2394, over 29650.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.2372, over 5796699.03 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:20:39,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=821900.0, ans=0.125 +2024-09-20 05:20:45,077 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-20 05:20:48,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=821900.0, ans=0.2 +2024-09-20 05:21:14,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=821980.0, ans=0.0 +2024-09-20 05:21:38,309 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.515e+01 9.175e+01 9.634e+01 2.306e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 05:21:49,650 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.09 vs. limit=22.5 +2024-09-20 05:21:53,030 INFO [train.py:1198] (0/2) Epoch 46, batch 1900, loss[loss=0.2456, ctc_loss=0.1143, cr_loss=0.3666, attn_decoder_loss=0.252, over 29710.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.109, cr_loss=0.3475, attn_decoder_loss=0.2376, over 5803691.66 frames. ], batch size: 89, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:21:59,569 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=822100.0, ans=0.125 +2024-09-20 05:22:10,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=822140.0, ans=0.025 +2024-09-20 05:22:19,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.35 vs. limit=6.0 +2024-09-20 05:22:22,832 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.73 vs. limit=15.0 +2024-09-20 05:22:57,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=822260.0, ans=0.125 +2024-09-20 05:23:05,564 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.34 vs. limit=22.5 +2024-09-20 05:23:10,796 INFO [train.py:1198] (0/2) Epoch 46, batch 1950, loss[loss=0.2269, ctc_loss=0.1056, cr_loss=0.3451, attn_decoder_loss=0.2327, over 29445.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1096, cr_loss=0.3491, attn_decoder_loss=0.2387, over 5818357.50 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:23:16,430 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.98 vs. limit=22.5 +2024-09-20 05:23:23,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=822300.0, ans=15.0 +2024-09-20 05:23:30,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=822340.0, ans=0.1 +2024-09-20 05:23:51,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=822380.0, ans=0.125 +2024-09-20 05:23:54,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=822420.0, ans=0.125 +2024-09-20 05:24:08,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=822420.0, ans=0.0 +2024-09-20 05:24:08,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=822420.0, ans=0.0 +2024-09-20 05:24:10,975 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.887e+01 8.689e+01 9.269e+01 9.722e+01 1.487e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 05:24:18,022 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=822460.0, ans=0.0 +2024-09-20 05:24:23,997 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=822460.0, ans=0.1 +2024-09-20 05:24:25,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=822460.0, ans=0.125 +2024-09-20 05:24:28,127 INFO [train.py:1198] (0/2) Epoch 46, batch 2000, loss[loss=0.2075, ctc_loss=0.09306, cr_loss=0.3121, attn_decoder_loss=0.2132, over 29380.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1098, cr_loss=0.3495, attn_decoder_loss=0.239, over 5796736.44 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:24:45,153 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:24:48,361 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.55 vs. limit=15.0 +2024-09-20 05:24:49,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=822540.0, ans=0.125 +2024-09-20 05:24:52,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=822540.0, ans=0.125 +2024-09-20 05:25:03,892 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.27 vs. limit=10.0 +2024-09-20 05:25:10,373 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=822580.0, ans=0.125 +2024-09-20 05:25:17,169 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.82 vs. limit=15.0 +2024-09-20 05:25:19,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=822620.0, ans=0.0 +2024-09-20 05:25:19,644 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=822620.0, ans=0.0 +2024-09-20 05:25:31,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=822660.0, ans=0.2 +2024-09-20 05:25:43,655 INFO [train.py:1198] (0/2) Epoch 46, batch 2050, loss[loss=0.2039, ctc_loss=0.09426, cr_loss=0.3426, attn_decoder_loss=0.2085, over 29401.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1092, cr_loss=0.3482, attn_decoder_loss=0.2381, over 5788761.45 frames. ], batch size: 70, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:25:49,224 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.42 vs. limit=15.0 +2024-09-20 05:26:23,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=822780.0, ans=0.0 +2024-09-20 05:26:32,782 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.28 vs. limit=15.0 +2024-09-20 05:26:34,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=822820.0, ans=0.125 +2024-09-20 05:26:35,445 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=822820.0, ans=0.0 +2024-09-20 05:26:41,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=822820.0, ans=0.025 +2024-09-20 05:26:47,715 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.548e+01 9.000e+01 9.590e+01 1.636e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 05:26:55,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=822860.0, ans=0.025 +2024-09-20 05:27:01,504 INFO [train.py:1198] (0/2) Epoch 46, batch 2100, loss[loss=0.2325, ctc_loss=0.1144, cr_loss=0.3636, attn_decoder_loss=0.2375, over 29762.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1088, cr_loss=0.3473, attn_decoder_loss=0.2376, over 5801100.88 frames. ], batch size: 81, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:27:16,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=822940.0, ans=0.025 +2024-09-20 05:27:16,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=822940.0, ans=0.0 +2024-09-20 05:27:25,869 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=822940.0, ans=0.5 +2024-09-20 05:27:29,376 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.10 vs. limit=15.0 +2024-09-20 05:27:49,548 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=823020.0, ans=0.125 +2024-09-20 05:27:52,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=823020.0, ans=0.125 +2024-09-20 05:27:53,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=823020.0, ans=0.125 +2024-09-20 05:28:04,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=823060.0, ans=0.0 +2024-09-20 05:28:15,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=823060.0, ans=0.0 +2024-09-20 05:28:18,248 INFO [train.py:1198] (0/2) Epoch 46, batch 2150, loss[loss=0.2278, ctc_loss=0.1088, cr_loss=0.3489, attn_decoder_loss=0.2333, over 29451.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1081, cr_loss=0.3463, attn_decoder_loss=0.2369, over 5816135.80 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:28:47,217 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:28:56,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=823180.0, ans=0.0 +2024-09-20 05:29:01,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=823180.0, ans=0.125 +2024-09-20 05:29:06,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.07 vs. limit=6.0 +2024-09-20 05:29:10,182 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=823220.0, ans=0.125 +2024-09-20 05:29:12,198 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.08 vs. limit=6.0 +2024-09-20 05:29:13,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=823220.0, ans=0.0 +2024-09-20 05:29:20,447 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.498e+01 9.079e+01 9.733e+01 1.239e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-20 05:29:25,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=823260.0, ans=0.025 +2024-09-20 05:29:34,168 INFO [train.py:1198] (0/2) Epoch 46, batch 2200, loss[loss=0.253, ctc_loss=0.1249, cr_loss=0.3699, attn_decoder_loss=0.2591, over 29603.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1081, cr_loss=0.3463, attn_decoder_loss=0.237, over 5813419.41 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:29:40,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=823300.0, ans=0.2 +2024-09-20 05:29:45,653 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.40 vs. limit=6.0 +2024-09-20 05:29:47,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.77 vs. limit=15.0 +2024-09-20 05:29:55,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=823340.0, ans=0.0 +2024-09-20 05:29:55,541 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:29:57,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=823340.0, ans=0.025 +2024-09-20 05:30:01,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=823340.0, ans=0.025 +2024-09-20 05:30:03,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=823380.0, ans=0.0 +2024-09-20 05:30:15,302 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=823380.0, ans=0.1 +2024-09-20 05:30:21,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=823420.0, ans=0.125 +2024-09-20 05:30:24,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=823420.0, ans=0.125 +2024-09-20 05:30:30,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=823420.0, ans=0.025 +2024-09-20 05:30:52,018 INFO [train.py:1198] (0/2) Epoch 46, batch 2250, loss[loss=0.2395, ctc_loss=0.1162, cr_loss=0.3738, attn_decoder_loss=0.2448, over 29725.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1082, cr_loss=0.3466, attn_decoder_loss=0.2371, over 5812323.06 frames. ], batch size: 82, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:31:11,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=823540.0, ans=0.1 +2024-09-20 05:31:34,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.57 vs. limit=22.5 +2024-09-20 05:31:35,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=823620.0, ans=0.0 +2024-09-20 05:31:44,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=823620.0, ans=0.09899494936611666 +2024-09-20 05:31:53,371 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.133e+01 8.446e+01 9.116e+01 9.634e+01 2.292e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 05:31:55,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=823660.0, ans=0.035 +2024-09-20 05:32:08,792 INFO [train.py:1198] (0/2) Epoch 46, batch 2300, loss[loss=0.2137, ctc_loss=0.09189, cr_loss=0.3222, attn_decoder_loss=0.2201, over 29313.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1076, cr_loss=0.3452, attn_decoder_loss=0.2361, over 5799003.90 frames. ], batch size: 71, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:32:14,079 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.94 vs. limit=10.0 +2024-09-20 05:32:16,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=823700.0, ans=0.125 +2024-09-20 05:32:21,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=823700.0, ans=0.0 +2024-09-20 05:32:22,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:30,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:36,656 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=16.41 vs. limit=15.0 +2024-09-20 05:33:06,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=823820.0, ans=0.025 +2024-09-20 05:33:23,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=823900.0, ans=0.125 +2024-09-20 05:33:24,354 INFO [train.py:1198] (0/2) Epoch 46, batch 2350, loss[loss=0.252, ctc_loss=0.1233, cr_loss=0.3879, attn_decoder_loss=0.2577, over 29692.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1082, cr_loss=0.3468, attn_decoder_loss=0.2366, over 5803995.84 frames. ], batch size: 83, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:33:28,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=823900.0, ans=0.2 +2024-09-20 05:33:48,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=823940.0, ans=0.2 +2024-09-20 05:34:26,077 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.494e+01 8.622e+01 9.111e+01 9.786e+01 2.523e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-20 05:34:39,832 INFO [train.py:1198] (0/2) Epoch 46, batch 2400, loss[loss=0.2077, ctc_loss=0.08687, cr_loss=0.2951, attn_decoder_loss=0.2146, over 29547.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1087, cr_loss=0.3479, attn_decoder_loss=0.2371, over 5808766.06 frames. ], batch size: 76, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:35:06,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=824140.0, ans=0.125 +2024-09-20 05:35:14,345 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.68 vs. limit=10.0 +2024-09-20 05:35:24,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=824180.0, ans=0.125 +2024-09-20 05:35:32,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=824220.0, ans=0.125 +2024-09-20 05:35:42,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=824260.0, ans=0.0 +2024-09-20 05:35:45,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=824260.0, ans=0.125 +2024-09-20 05:35:52,881 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.75 vs. limit=15.0 +2024-09-20 05:35:59,548 INFO [train.py:1198] (0/2) Epoch 46, batch 2450, loss[loss=0.2407, ctc_loss=0.117, cr_loss=0.3709, attn_decoder_loss=0.2462, over 29703.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1099, cr_loss=0.3501, attn_decoder_loss=0.2384, over 5785591.41 frames. ], batch size: 82, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:36:06,189 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.99 vs. limit=15.0 +2024-09-20 05:36:16,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=824340.0, ans=0.025 +2024-09-20 05:36:24,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=824340.0, ans=22.5 +2024-09-20 05:36:40,817 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.88 vs. limit=15.0 +2024-09-20 05:36:47,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=824420.0, ans=0.0 +2024-09-20 05:36:52,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=824420.0, ans=0.2 +2024-09-20 05:36:55,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=824420.0, ans=0.0 +2024-09-20 05:37:02,670 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.742e+01 8.776e+01 9.478e+01 1.012e+02 4.785e+02, threshold=1.896e+02, percent-clipped=1.0 +2024-09-20 05:37:07,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=824460.0, ans=0.125 +2024-09-20 05:37:14,644 INFO [train.py:1198] (0/2) Epoch 46, batch 2500, loss[loss=0.2433, ctc_loss=0.1214, cr_loss=0.3839, attn_decoder_loss=0.2483, over 29648.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1099, cr_loss=0.35, attn_decoder_loss=0.2383, over 5795592.93 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:37:42,754 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-20 05:37:48,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=824580.0, ans=0.0 +2024-09-20 05:38:30,353 INFO [train.py:1198] (0/2) Epoch 46, batch 2550, loss[loss=0.2131, ctc_loss=0.1083, cr_loss=0.3375, attn_decoder_loss=0.2173, over 29344.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1096, cr_loss=0.3495, attn_decoder_loss=0.2381, over 5798486.00 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:38:32,292 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=824700.0, ans=0.125 +2024-09-20 05:38:43,774 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.29 vs. limit=15.0 +2024-09-20 05:38:58,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=824740.0, ans=0.1 +2024-09-20 05:39:02,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=824780.0, ans=0.125 +2024-09-20 05:39:02,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=824780.0, ans=0.2 +2024-09-20 05:39:19,695 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=824820.0, ans=0.0 +2024-09-20 05:39:27,450 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=824820.0, ans=0.0 +2024-09-20 05:39:36,274 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.630e+01 8.555e+01 9.140e+01 9.726e+01 1.841e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-20 05:39:44,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=824860.0, ans=0.125 +2024-09-20 05:39:50,515 INFO [train.py:1198] (0/2) Epoch 46, batch 2600, loss[loss=0.2221, ctc_loss=0.1019, cr_loss=0.3426, attn_decoder_loss=0.2278, over 29462.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1094, cr_loss=0.3492, attn_decoder_loss=0.2382, over 5794100.49 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:39:52,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=824900.0, ans=0.015 +2024-09-20 05:39:52,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=824900.0, ans=0.1 +2024-09-20 05:40:19,740 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.42 vs. limit=10.0 +2024-09-20 05:40:28,746 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.63 vs. limit=22.5 +2024-09-20 05:40:43,946 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-20 05:40:46,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=825020.0, ans=0.2 +2024-09-20 05:41:05,513 INFO [train.py:1198] (0/2) Epoch 46, batch 2650, loss[loss=0.2372, ctc_loss=0.1094, cr_loss=0.3417, attn_decoder_loss=0.2438, over 29266.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1094, cr_loss=0.3493, attn_decoder_loss=0.2383, over 5800865.16 frames. ], batch size: 100, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:41:26,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.52 vs. limit=15.0 +2024-09-20 05:41:27,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=825140.0, ans=0.1 +2024-09-20 05:41:31,903 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.59 vs. limit=15.0 +2024-09-20 05:41:35,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=825180.0, ans=0.1 +2024-09-20 05:41:46,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=825180.0, ans=0.125 +2024-09-20 05:42:09,969 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.633e+01 9.169e+01 9.571e+01 1.241e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-20 05:42:20,700 INFO [train.py:1198] (0/2) Epoch 46, batch 2700, loss[loss=0.2392, ctc_loss=0.1101, cr_loss=0.3381, attn_decoder_loss=0.2461, over 29512.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1096, cr_loss=0.3498, attn_decoder_loss=0.2386, over 5795882.21 frames. ], batch size: 87, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:42:50,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=825340.0, ans=0.125 +2024-09-20 05:43:05,277 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=825380.0, ans=0.1 +2024-09-20 05:43:40,492 INFO [train.py:1198] (0/2) Epoch 46, batch 2750, loss[loss=0.2246, ctc_loss=0.1022, cr_loss=0.3486, attn_decoder_loss=0.2305, over 29516.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1089, cr_loss=0.3481, attn_decoder_loss=0.2376, over 5792944.42 frames. ], batch size: 75, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:43:44,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=825500.0, ans=15.0 +2024-09-20 05:44:15,452 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:44:32,451 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=825620.0, ans=0.125 +2024-09-20 05:44:38,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=825620.0, ans=0.2 +2024-09-20 05:44:46,008 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.730e+01 8.644e+01 9.121e+01 9.722e+01 2.212e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 05:44:55,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=825700.0, ans=0.0 +2024-09-20 05:44:56,658 INFO [train.py:1198] (0/2) Epoch 46, batch 2800, loss[loss=0.2393, ctc_loss=0.116, cr_loss=0.3294, attn_decoder_loss=0.2456, over 20031.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1094, cr_loss=0.3488, attn_decoder_loss=0.2378, over 5775894.28 frames. ], batch size: 209, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:44:56,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=825700.0, ans=0.2 +2024-09-20 05:44:58,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=825700.0, ans=0.125 +2024-09-20 05:44:58,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=825700.0, ans=0.0 +2024-09-20 05:45:20,091 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.40 vs. limit=12.0 +2024-09-20 05:45:35,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=825780.0, ans=0.0 +2024-09-20 05:45:39,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.32 vs. limit=22.5 +2024-09-20 05:45:57,347 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.40 vs. limit=15.0 +2024-09-20 05:46:11,555 INFO [train.py:1198] (0/2) Epoch 46, batch 2850, loss[loss=0.2213, ctc_loss=0.1018, cr_loss=0.3195, attn_decoder_loss=0.2275, over 29518.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1098, cr_loss=0.3494, attn_decoder_loss=0.2382, over 5762250.44 frames. ], batch size: 77, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:46:24,533 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=825900.0, ans=0.2 +2024-09-20 05:46:36,665 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=825940.0, ans=0.125 +2024-09-20 05:47:16,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=826060.0, ans=0.1 +2024-09-20 05:47:22,089 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.726e+01 9.166e+01 9.745e+01 2.049e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-20 05:47:31,176 INFO [train.py:1198] (0/2) Epoch 46, batch 2900, loss[loss=0.2208, ctc_loss=0.09948, cr_loss=0.3225, attn_decoder_loss=0.2271, over 29417.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1104, cr_loss=0.3506, attn_decoder_loss=0.239, over 5788154.42 frames. ], batch size: 79, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:47:32,254 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.02 vs. limit=22.5 +2024-09-20 05:47:33,476 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.03 vs. limit=15.0 +2024-09-20 05:47:43,444 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=826100.0, ans=0.0 +2024-09-20 05:47:52,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=826140.0, ans=0.025 +2024-09-20 05:48:01,387 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=826180.0, ans=0.125 +2024-09-20 05:48:05,978 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=826180.0, ans=0.1 +2024-09-20 05:48:10,665 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:48:28,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=826220.0, ans=0.0 +2024-09-20 05:48:36,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=826260.0, ans=0.2 +2024-09-20 05:48:39,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=826260.0, ans=0.1 +2024-09-20 05:48:46,536 INFO [train.py:1198] (0/2) Epoch 46, batch 2950, loss[loss=0.2216, ctc_loss=0.1033, cr_loss=0.3437, attn_decoder_loss=0.2271, over 29516.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.109, cr_loss=0.3474, attn_decoder_loss=0.2373, over 5783431.27 frames. ], batch size: 75, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:48:47,370 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.92 vs. limit=15.0 +2024-09-20 05:48:51,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=826300.0, ans=0.0 +2024-09-20 05:48:57,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=826300.0, ans=10.0 +2024-09-20 05:49:04,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=826340.0, ans=0.0 +2024-09-20 05:49:27,839 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=826380.0, ans=0.1 +2024-09-20 05:49:30,809 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=826420.0, ans=0.0 +2024-09-20 05:49:50,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=826460.0, ans=0.1 +2024-09-20 05:49:53,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.789e+01 9.210e+01 9.770e+01 1.527e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-20 05:50:02,327 INFO [train.py:1198] (0/2) Epoch 46, batch 3000, loss[loss=0.2302, ctc_loss=0.1076, cr_loss=0.3604, attn_decoder_loss=0.2358, over 29756.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1086, cr_loss=0.3472, attn_decoder_loss=0.2372, over 5784250.87 frames. ], batch size: 81, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:50:02,328 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 05:50:11,349 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.5652, 5.3135, 5.1572, 4.7790], device='cuda:0') +2024-09-20 05:50:20,767 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.8010, 4.8593, 4.5217, 2.5232], device='cuda:0') +2024-09-20 05:50:21,365 INFO [train.py:1230] (0/2) Epoch 46, validation: loss=0.2122, ctc_loss=0.03683, cr_loss=6.872e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-20 05:50:21,365 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 05:50:36,990 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=826540.0, ans=0.0 +2024-09-20 05:50:39,194 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.30 vs. limit=6.0 +2024-09-20 05:50:44,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=826540.0, ans=0.0 +2024-09-20 05:51:22,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=826660.0, ans=0.125 +2024-09-20 05:51:39,051 INFO [train.py:1198] (0/2) Epoch 46, batch 3050, loss[loss=0.234, ctc_loss=0.1173, cr_loss=0.3882, attn_decoder_loss=0.2383, over 29528.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1092, cr_loss=0.3482, attn_decoder_loss=0.238, over 5777141.54 frames. ], batch size: 76, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:51:43,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.09 vs. limit=15.0 +2024-09-20 05:52:02,142 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=826740.0, ans=0.0 +2024-09-20 05:52:03,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=826740.0, ans=0.2 +2024-09-20 05:52:22,586 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-20 05:52:42,875 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=826860.0, ans=0.2 +2024-09-20 05:52:44,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=826860.0, ans=0.125 +2024-09-20 05:52:45,558 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.649e+01 8.604e+01 9.128e+01 9.681e+01 2.059e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 05:52:51,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=826860.0, ans=0.0 +2024-09-20 05:52:51,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=826860.0, ans=0.125 +2024-09-20 05:52:54,449 INFO [train.py:1198] (0/2) Epoch 46, batch 3100, loss[loss=0.2412, ctc_loss=0.1206, cr_loss=0.3838, attn_decoder_loss=0.2461, over 29249.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.109, cr_loss=0.3475, attn_decoder_loss=0.2374, over 5775946.51 frames. ], batch size: 100, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:52:56,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=826900.0, ans=0.0 +2024-09-20 05:53:02,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=826900.0, ans=0.125 +2024-09-20 05:53:12,721 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=826940.0, ans=0.025 +2024-09-20 05:53:29,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=826980.0, ans=0.1 +2024-09-20 05:53:41,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=827020.0, ans=0.1 +2024-09-20 05:54:09,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=827060.0, ans=0.025 +2024-09-20 05:54:12,267 INFO [train.py:1198] (0/2) Epoch 46, batch 3150, loss[loss=0.2449, ctc_loss=0.1108, cr_loss=0.3498, attn_decoder_loss=0.252, over 28864.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2371, over 5783291.68 frames. ], batch size: 104, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:54:12,737 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:54:14,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=827100.0, ans=0.2 +2024-09-20 05:54:15,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=827100.0, ans=0.0 +2024-09-20 05:54:24,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=827100.0, ans=0.125 +2024-09-20 05:54:36,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=827140.0, ans=0.1 +2024-09-20 05:54:44,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=827180.0, ans=0.1 +2024-09-20 05:54:44,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=827180.0, ans=0.1 +2024-09-20 05:54:52,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=827180.0, ans=0.025 +2024-09-20 05:55:16,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=827260.0, ans=0.125 +2024-09-20 05:55:18,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=827260.0, ans=0.125 +2024-09-20 05:55:20,992 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.661e+01 9.228e+01 9.834e+01 1.754e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 05:55:27,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=827260.0, ans=0.125 +2024-09-20 05:55:30,097 INFO [train.py:1198] (0/2) Epoch 46, batch 3200, loss[loss=0.2281, ctc_loss=0.1077, cr_loss=0.3519, attn_decoder_loss=0.2337, over 29422.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.2368, over 5793912.58 frames. ], batch size: 79, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:55:50,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=827340.0, ans=0.0 +2024-09-20 05:56:03,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=827380.0, ans=0.2 +2024-09-20 05:56:06,359 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.83 vs. limit=22.5 +2024-09-20 05:56:06,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=827380.0, ans=0.125 +2024-09-20 05:56:14,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=827420.0, ans=0.125 +2024-09-20 05:56:24,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=827420.0, ans=0.5 +2024-09-20 05:56:26,826 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.53 vs. limit=15.0 +2024-09-20 05:56:45,902 INFO [train.py:1198] (0/2) Epoch 46, batch 3250, loss[loss=0.2461, ctc_loss=0.1219, cr_loss=0.3791, attn_decoder_loss=0.2514, over 29703.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1087, cr_loss=0.3471, attn_decoder_loss=0.2375, over 5802215.58 frames. ], batch size: 84, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:56:52,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=827500.0, ans=0.1 +2024-09-20 05:57:01,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=827540.0, ans=0.025 +2024-09-20 05:57:01,173 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=827540.0, ans=0.0 +2024-09-20 05:57:05,574 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=827540.0, ans=0.125 +2024-09-20 05:57:11,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=827540.0, ans=0.025 +2024-09-20 05:57:36,067 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.36 vs. limit=22.5 +2024-09-20 05:57:44,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=827660.0, ans=0.125 +2024-09-20 05:57:52,296 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=827660.0, ans=0.125 +2024-09-20 05:57:53,416 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.520e+01 8.954e+01 9.495e+01 2.408e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-20 05:57:56,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=827660.0, ans=0.125 +2024-09-20 05:58:01,023 INFO [train.py:1198] (0/2) Epoch 46, batch 3300, loss[loss=0.2457, ctc_loss=0.1204, cr_loss=0.3673, attn_decoder_loss=0.2514, over 28262.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1081, cr_loss=0.3453, attn_decoder_loss=0.2364, over 5799046.48 frames. ], batch size: 111, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 05:58:01,463 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=827700.0, ans=0.0 +2024-09-20 05:58:18,633 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=827740.0, ans=0.1 +2024-09-20 05:58:37,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=827780.0, ans=0.025 +2024-09-20 05:58:44,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=827780.0, ans=0.125 +2024-09-20 05:58:58,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.86 vs. limit=15.0 +2024-09-20 05:59:05,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=827860.0, ans=0.09899494936611666 +2024-09-20 05:59:07,179 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=827860.0, ans=0.1 +2024-09-20 05:59:20,247 INFO [train.py:1198] (0/2) Epoch 46, batch 3350, loss[loss=0.2351, ctc_loss=0.107, cr_loss=0.3237, attn_decoder_loss=0.2421, over 28907.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1089, cr_loss=0.347, attn_decoder_loss=0.2373, over 5776005.25 frames. ], batch size: 104, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 05:59:24,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.37 vs. limit=6.0 +2024-09-20 05:59:41,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=827940.0, ans=0.2 +2024-09-20 05:59:43,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=827940.0, ans=0.125 +2024-09-20 06:00:03,145 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=827980.0, ans=0.125 +2024-09-20 06:00:06,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=828020.0, ans=0.125 +2024-09-20 06:00:17,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.24 vs. limit=15.0 +2024-09-20 06:00:27,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=828060.0, ans=0.0 +2024-09-20 06:00:28,533 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.460e+01 8.801e+01 9.345e+01 9.836e+01 1.654e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-20 06:00:36,123 INFO [train.py:1198] (0/2) Epoch 46, batch 3400, loss[loss=0.2076, ctc_loss=0.09076, cr_loss=0.3016, attn_decoder_loss=0.2138, over 29350.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.109, cr_loss=0.3469, attn_decoder_loss=0.2371, over 5767412.53 frames. ], batch size: 67, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:01:00,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=828140.0, ans=0.125 +2024-09-20 06:01:06,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=828180.0, ans=0.2 +2024-09-20 06:01:12,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=828180.0, ans=0.2 +2024-09-20 06:01:15,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=828180.0, ans=0.125 +2024-09-20 06:01:20,332 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=828220.0, ans=0.025 +2024-09-20 06:01:24,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=828220.0, ans=0.0 +2024-09-20 06:01:30,964 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=828220.0, ans=0.07 +2024-09-20 06:01:32,819 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=15.0 +2024-09-20 06:01:35,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=828260.0, ans=10.0 +2024-09-20 06:01:44,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=828260.0, ans=0.125 +2024-09-20 06:01:47,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=828260.0, ans=0.125 +2024-09-20 06:01:51,529 INFO [train.py:1198] (0/2) Epoch 46, batch 3450, loss[loss=0.2396, ctc_loss=0.1092, cr_loss=0.3364, attn_decoder_loss=0.2466, over 28164.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2373, over 5776135.20 frames. ], batch size: 111, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:01:58,470 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=828300.0, ans=0.125 +2024-09-20 06:02:04,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=828300.0, ans=0.0 +2024-09-20 06:02:53,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.51 vs. limit=15.0 +2024-09-20 06:03:00,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=828460.0, ans=0.0 +2024-09-20 06:03:03,198 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.466e+01 9.080e+01 9.638e+01 4.809e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-20 06:03:05,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=828460.0, ans=0.2 +2024-09-20 06:03:10,700 INFO [train.py:1198] (0/2) Epoch 46, batch 3500, loss[loss=0.2061, ctc_loss=0.09622, cr_loss=0.3232, attn_decoder_loss=0.2111, over 29315.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1086, cr_loss=0.3463, attn_decoder_loss=0.2367, over 5777228.74 frames. ], batch size: 71, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:03:17,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=828500.0, ans=0.0 +2024-09-20 06:03:26,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=828540.0, ans=0.125 +2024-09-20 06:03:44,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.18 vs. limit=15.0 +2024-09-20 06:03:44,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.80 vs. limit=15.0 +2024-09-20 06:03:46,946 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:03:49,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=828580.0, ans=0.0 +2024-09-20 06:04:06,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=828620.0, ans=0.0 +2024-09-20 06:04:12,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=828660.0, ans=0.0 +2024-09-20 06:04:20,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.02 vs. limit=15.0 +2024-09-20 06:04:25,142 INFO [train.py:1198] (0/2) Epoch 46, batch 3550, loss[loss=0.2433, ctc_loss=0.12, cr_loss=0.3784, attn_decoder_loss=0.2486, over 29698.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1084, cr_loss=0.3461, attn_decoder_loss=0.2368, over 5783426.27 frames. ], batch size: 89, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:04:25,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=828700.0, ans=0.0 +2024-09-20 06:04:28,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=828700.0, ans=0.0 +2024-09-20 06:04:30,201 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.37 vs. limit=15.0 +2024-09-20 06:04:32,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=828700.0, ans=0.1 +2024-09-20 06:04:35,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=828700.0, ans=0.0 +2024-09-20 06:05:02,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.84 vs. limit=15.0 +2024-09-20 06:05:20,622 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=828820.0, ans=0.0 +2024-09-20 06:05:21,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=828820.0, ans=15.0 +2024-09-20 06:05:32,134 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.578e+01 8.518e+01 9.140e+01 9.697e+01 1.857e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 06:05:39,453 INFO [train.py:1198] (0/2) Epoch 46, batch 3600, loss[loss=0.2252, ctc_loss=0.1023, cr_loss=0.313, attn_decoder_loss=0.2319, over 29493.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1088, cr_loss=0.3473, attn_decoder_loss=0.2372, over 5792252.79 frames. ], batch size: 77, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:05:42,761 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:05:43,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.85 vs. limit=22.5 +2024-09-20 06:05:46,728 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.14 vs. limit=15.0 +2024-09-20 06:06:09,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=828980.0, ans=0.125 +2024-09-20 06:06:28,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=829020.0, ans=0.125 +2024-09-20 06:06:50,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=829060.0, ans=0.0 +2024-09-20 06:06:53,555 INFO [train.py:1198] (0/2) Epoch 46, batch 3650, loss[loss=0.2541, ctc_loss=0.1241, cr_loss=0.3707, attn_decoder_loss=0.2603, over 29485.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1085, cr_loss=0.3463, attn_decoder_loss=0.2366, over 5793970.15 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:07:05,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=829100.0, ans=0.125 +2024-09-20 06:07:22,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=829180.0, ans=0.125 +2024-09-20 06:07:25,011 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=829180.0, ans=0.2 +2024-09-20 06:08:03,943 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.537e+01 9.087e+01 9.420e+01 1.458e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-20 06:08:04,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=829260.0, ans=0.5 +2024-09-20 06:08:08,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=829260.0, ans=0.0 +2024-09-20 06:08:11,548 INFO [train.py:1198] (0/2) Epoch 46, batch 3700, loss[loss=0.2473, ctc_loss=0.1218, cr_loss=0.368, attn_decoder_loss=0.2531, over 29710.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3468, attn_decoder_loss=0.2369, over 5804500.53 frames. ], batch size: 84, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:08:38,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=829340.0, ans=0.0 +2024-09-20 06:09:25,410 INFO [train.py:1198] (0/2) Epoch 46, batch 3750, loss[loss=0.2027, ctc_loss=0.09294, cr_loss=0.2982, attn_decoder_loss=0.2082, over 29371.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1086, cr_loss=0.3468, attn_decoder_loss=0.2367, over 5807811.31 frames. ], batch size: 67, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:09:27,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=829500.0, ans=0.125 +2024-09-20 06:09:47,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=829540.0, ans=0.125 +2024-09-20 06:10:01,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=829580.0, ans=0.2 +2024-09-20 06:10:13,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=829620.0, ans=0.1 +2024-09-20 06:10:30,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=829660.0, ans=0.0 +2024-09-20 06:10:30,170 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=829660.0, ans=0.125 +2024-09-20 06:10:32,723 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.638e+01 9.232e+01 9.625e+01 1.772e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 06:10:40,159 INFO [train.py:1198] (0/2) Epoch 46, batch 3800, loss[loss=0.2473, ctc_loss=0.11, cr_loss=0.3418, attn_decoder_loss=0.255, over 29649.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1083, cr_loss=0.3459, attn_decoder_loss=0.2362, over 5798819.64 frames. ], batch size: 86, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:10:40,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=829700.0, ans=0.2 +2024-09-20 06:10:43,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=829700.0, ans=0.0 +2024-09-20 06:10:49,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=829700.0, ans=0.125 +2024-09-20 06:10:59,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=829740.0, ans=0.125 +2024-09-20 06:11:20,554 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=829780.0, ans=0.0 +2024-09-20 06:11:54,494 INFO [train.py:1198] (0/2) Epoch 46, batch 3850, loss[loss=0.2476, ctc_loss=0.1188, cr_loss=0.3728, attn_decoder_loss=0.2537, over 29244.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1083, cr_loss=0.3464, attn_decoder_loss=0.2365, over 5811355.29 frames. ], batch size: 100, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:12:15,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=829940.0, ans=0.125 +2024-09-20 06:12:40,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=830020.0, ans=0.2 +2024-09-20 06:12:48,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=830020.0, ans=0.025 +2024-09-20 06:12:52,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=830060.0, ans=0.0 +2024-09-20 06:13:01,125 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.921e+01 8.600e+01 9.111e+01 9.601e+01 1.529e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 06:13:07,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=830100.0, ans=0.05 +2024-09-20 06:13:08,427 INFO [train.py:1198] (0/2) Epoch 46, batch 3900, loss[loss=0.2318, ctc_loss=0.09471, cr_loss=0.3145, attn_decoder_loss=0.24, over 29609.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1087, cr_loss=0.3473, attn_decoder_loss=0.2372, over 5816113.08 frames. ], batch size: 86, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:13:11,719 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=830100.0, ans=0.0 +2024-09-20 06:13:14,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=830100.0, ans=0.125 +2024-09-20 06:13:33,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=830140.0, ans=0.125 +2024-09-20 06:13:38,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.72 vs. limit=10.0 +2024-09-20 06:14:08,341 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.37 vs. limit=15.0 +2024-09-20 06:14:09,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=830260.0, ans=0.07 +2024-09-20 06:14:13,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=830260.0, ans=0.125 +2024-09-20 06:14:25,257 INFO [train.py:1198] (0/2) Epoch 46, batch 3950, loss[loss=0.2421, ctc_loss=0.1137, cr_loss=0.3509, attn_decoder_loss=0.2486, over 29499.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1085, cr_loss=0.3469, attn_decoder_loss=0.2373, over 5835460.62 frames. ], batch size: 97, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:14:31,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=830300.0, ans=0.0 +2024-09-20 06:14:35,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=830300.0, ans=0.125 +2024-09-20 06:14:40,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=830340.0, ans=0.0 +2024-09-20 06:14:43,530 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-20 06:15:11,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=830420.0, ans=0.125 +2024-09-20 06:15:26,857 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.89 vs. limit=22.5 +2024-09-20 06:15:27,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=830460.0, ans=0.125 +2024-09-20 06:15:31,668 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.756e+01 9.097e+01 9.656e+01 1.303e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 06:15:33,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-20 06:15:36,488 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=830460.0, ans=0.1 +2024-09-20 06:15:38,973 INFO [train.py:1198] (0/2) Epoch 46, batch 4000, loss[loss=0.2206, ctc_loss=0.09773, cr_loss=0.3227, attn_decoder_loss=0.2271, over 29499.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1085, cr_loss=0.3465, attn_decoder_loss=0.237, over 5812246.59 frames. ], batch size: 74, lr: 2.38e-03, grad_scale: 32.0 +2024-09-20 06:15:45,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=830500.0, ans=0.125 +2024-09-20 06:15:53,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=830540.0, ans=0.1 +2024-09-20 06:15:55,771 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.44 vs. limit=15.0 +2024-09-20 06:15:58,297 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:16:52,994 INFO [train.py:1198] (0/2) Epoch 46, batch 4050, loss[loss=0.2559, ctc_loss=0.1323, cr_loss=0.3898, attn_decoder_loss=0.2609, over 20613.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1089, cr_loss=0.3475, attn_decoder_loss=0.237, over 5796386.24 frames. ], batch size: 210, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:17:03,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=830700.0, ans=0.2 +2024-09-20 06:17:03,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff2.min_abs, batch_count=830700.0, ans=0.1 +2024-09-20 06:17:07,971 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=830740.0, ans=0.05 +2024-09-20 06:17:12,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=830740.0, ans=0.035 +2024-09-20 06:17:16,709 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=830740.0, ans=0.125 +2024-09-20 06:17:25,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=830780.0, ans=0.0 +2024-09-20 06:17:25,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=830780.0, ans=0.1 +2024-09-20 06:17:34,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=830780.0, ans=0.0 +2024-09-20 06:17:51,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=830860.0, ans=0.125 +2024-09-20 06:17:58,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=830860.0, ans=0.125 +2024-09-20 06:18:01,510 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.777e+01 9.283e+01 1.003e+02 3.559e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-20 06:18:08,723 INFO [train.py:1198] (0/2) Epoch 46, batch 4100, loss[loss=0.2486, ctc_loss=0.1245, cr_loss=0.3929, attn_decoder_loss=0.2536, over 29510.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1091, cr_loss=0.348, attn_decoder_loss=0.2371, over 5791806.65 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:18:33,771 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=830940.0, ans=0.125 +2024-09-20 06:18:35,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=830940.0, ans=0.125 +2024-09-20 06:18:45,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=830980.0, ans=0.125 +2024-09-20 06:18:51,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.70 vs. limit=12.0 +2024-09-20 06:18:54,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=831020.0, ans=0.125 +2024-09-20 06:19:21,756 INFO [train.py:1198] (0/2) Epoch 46, batch 4150, loss[loss=0.2256, ctc_loss=0.1046, cr_loss=0.341, attn_decoder_loss=0.2314, over 29508.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.3476, attn_decoder_loss=0.2367, over 5797404.67 frames. ], batch size: 77, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:20:10,098 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 06:20:15,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=831220.0, ans=0.125 +2024-09-20 06:20:29,883 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.815e+01 9.247e+01 9.794e+01 1.755e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 06:20:35,815 INFO [train.py:1198] (0/2) Epoch 46, batch 4200, loss[loss=0.2538, ctc_loss=0.1249, cr_loss=0.3963, attn_decoder_loss=0.2593, over 29500.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1092, cr_loss=0.3481, attn_decoder_loss=0.2371, over 5799555.81 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:20:53,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=831340.0, ans=0.2 +2024-09-20 06:20:58,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=831340.0, ans=0.1 +2024-09-20 06:21:01,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=831340.0, ans=0.125 +2024-09-20 06:21:11,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=831380.0, ans=0.125 +2024-09-20 06:21:34,570 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=831460.0, ans=0.0 +2024-09-20 06:21:49,886 INFO [train.py:1198] (0/2) Epoch 46, batch 4250, loss[loss=0.2151, ctc_loss=0.09344, cr_loss=0.3136, attn_decoder_loss=0.2216, over 29492.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.347, attn_decoder_loss=0.2372, over 5805522.79 frames. ], batch size: 74, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:21:50,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=831500.0, ans=0.125 +2024-09-20 06:21:51,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=831500.0, ans=0.0 +2024-09-20 06:21:53,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=831500.0, ans=0.0 +2024-09-20 06:22:14,486 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:22:20,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=831580.0, ans=0.125 +2024-09-20 06:22:22,558 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.65 vs. limit=15.0 +2024-09-20 06:22:25,523 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.22 vs. limit=15.0 +2024-09-20 06:22:26,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=831580.0, ans=0.125 +2024-09-20 06:22:36,763 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=831620.0, ans=0.07 +2024-09-20 06:22:58,491 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.665e+01 9.148e+01 1.004e+02 2.126e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-20 06:23:04,400 INFO [train.py:1198] (0/2) Epoch 46, batch 4300, loss[loss=0.2435, ctc_loss=0.1161, cr_loss=0.3618, attn_decoder_loss=0.2496, over 29503.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1086, cr_loss=0.3461, attn_decoder_loss=0.2373, over 5794092.29 frames. ], batch size: 87, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:23:28,524 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.52 vs. limit=15.0 +2024-09-20 06:23:31,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=15.0 +2024-09-20 06:23:34,667 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.69 vs. limit=15.0 +2024-09-20 06:24:18,202 INFO [train.py:1198] (0/2) Epoch 46, batch 4350, loss[loss=0.242, ctc_loss=0.1131, cr_loss=0.3533, attn_decoder_loss=0.2484, over 29505.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1105, cr_loss=0.3507, attn_decoder_loss=0.2402, over 5796729.91 frames. ], batch size: 97, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:24:37,580 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=831940.0, ans=0.125 +2024-09-20 06:24:54,534 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-208000.pt +2024-09-20 06:25:07,024 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=831980.0, ans=0.0 +2024-09-20 06:25:20,865 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=832020.0, ans=0.025 +2024-09-20 06:25:20,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=832020.0, ans=0.0 +2024-09-20 06:25:36,408 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.165e+01 9.019e+01 9.377e+01 9.847e+01 2.022e+02, threshold=1.875e+02, percent-clipped=1.0 +2024-09-20 06:25:42,287 INFO [train.py:1198] (0/2) Epoch 46, batch 4400, loss[loss=0.2382, ctc_loss=0.1176, cr_loss=0.3685, attn_decoder_loss=0.2434, over 27242.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1117, cr_loss=0.3533, attn_decoder_loss=0.2421, over 5766651.95 frames. ], batch size: 124, lr: 2.38e-03, grad_scale: 32.0 +2024-09-20 06:25:44,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=832100.0, ans=0.1 +2024-09-20 06:25:45,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=832100.0, ans=0.1 +2024-09-20 06:25:48,226 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:26:17,446 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=832180.0, ans=0.125 +2024-09-20 06:26:18,254 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.74 vs. limit=22.5 +2024-09-20 06:26:18,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=832180.0, ans=0.125 +2024-09-20 06:26:32,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=7.26 vs. limit=12.0 +2024-09-20 06:26:40,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=832260.0, ans=0.125 +2024-09-20 06:26:55,246 INFO [train.py:1198] (0/2) Epoch 46, batch 4450, loss[loss=0.2382, ctc_loss=0.1203, cr_loss=0.3354, attn_decoder_loss=0.2439, over 20283.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1152, cr_loss=0.3597, attn_decoder_loss=0.2443, over 5576992.35 frames. ], batch size: 209, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:27:08,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=832300.0, ans=0.035 +2024-09-20 06:27:16,721 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.20 vs. limit=22.5 +2024-09-20 06:27:25,412 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=11.99 vs. limit=15.0 +2024-09-20 06:27:54,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=832460.0, ans=0.1 +2024-09-20 06:27:54,828 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=832460.0, ans=0.025 +2024-09-20 06:28:06,273 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.388e+01 1.015e+02 1.122e+02 1.210e+02 5.487e+02, threshold=2.243e+02, percent-clipped=3.0 +2024-09-20 06:28:10,697 INFO [train.py:1198] (0/2) Epoch 46, batch 4500, loss[loss=0.2462, ctc_loss=0.1248, cr_loss=0.36, attn_decoder_loss=0.2517, over 20721.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1176, cr_loss=0.3612, attn_decoder_loss=0.2458, over 5235304.38 frames. ], batch size: 210, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:28:17,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.49 vs. limit=22.5 +2024-09-20 06:28:48,068 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-46.pt +2024-09-20 06:29:38,357 INFO [train.py:1198] (0/2) Epoch 47, batch 0, loss[loss=0.2164, ctc_loss=0.0921, cr_loss=0.3201, attn_decoder_loss=0.223, over 29633.00 frames. ], tot_loss[loss=0.2164, ctc_loss=0.0921, cr_loss=0.3201, attn_decoder_loss=0.223, over 29633.00 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:29:38,358 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 06:29:56,719 INFO [train.py:1230] (0/2) Epoch 47, validation: loss=0.2131, ctc_loss=0.03582, cr_loss=6.765e-15, attn_decoder_loss=0.2328, over 944034.00 frames. +2024-09-20 06:29:56,720 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 06:29:58,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=832600.0, ans=0.2 +2024-09-20 06:30:28,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=832680.0, ans=0.125 +2024-09-20 06:30:28,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=832680.0, ans=0.125 +2024-09-20 06:30:45,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=832720.0, ans=0.02 +2024-09-20 06:30:57,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=832760.0, ans=0.09899494936611666 +2024-09-20 06:31:02,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=832760.0, ans=0.125 +2024-09-20 06:31:08,073 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=832760.0, ans=0.125 +2024-09-20 06:31:13,821 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=832800.0, ans=22.5 +2024-09-20 06:31:14,204 INFO [train.py:1198] (0/2) Epoch 47, batch 50, loss[loss=0.203, ctc_loss=0.09429, cr_loss=0.3112, attn_decoder_loss=0.2081, over 29423.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1106, cr_loss=0.352, attn_decoder_loss=0.2381, over 1266693.12 frames. ], batch size: 70, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:31:15,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=832800.0, ans=0.125 +2024-09-20 06:31:48,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.892e+01 9.712e+01 1.150e+02 2.007e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-20 06:32:11,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=832920.0, ans=0.035 +2024-09-20 06:32:14,898 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=832960.0, ans=0.0 +2024-09-20 06:32:16,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=832960.0, ans=0.1 +2024-09-20 06:32:24,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=832960.0, ans=0.07 +2024-09-20 06:32:29,705 INFO [train.py:1198] (0/2) Epoch 47, batch 100, loss[loss=0.2237, ctc_loss=0.1055, cr_loss=0.3409, attn_decoder_loss=0.2293, over 29536.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1118, cr_loss=0.3542, attn_decoder_loss=0.2405, over 2252171.74 frames. ], batch size: 76, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:32:54,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.96 vs. limit=12.0 +2024-09-20 06:32:57,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=833040.0, ans=0.125 +2024-09-20 06:33:18,032 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:33:25,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=833120.0, ans=0.125 +2024-09-20 06:33:45,862 INFO [train.py:1198] (0/2) Epoch 47, batch 150, loss[loss=0.2093, ctc_loss=0.09806, cr_loss=0.3249, attn_decoder_loss=0.2145, over 29429.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1094, cr_loss=0.3483, attn_decoder_loss=0.2379, over 3046893.96 frames. ], batch size: 70, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:33:55,630 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.27 vs. limit=15.0 +2024-09-20 06:34:01,587 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:34:13,481 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=833240.0, ans=0.125 +2024-09-20 06:34:22,974 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 8.579e+01 9.254e+01 9.598e+01 1.367e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 06:34:26,718 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.73 vs. limit=15.0 +2024-09-20 06:34:29,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=833280.0, ans=0.2 +2024-09-20 06:34:35,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=833320.0, ans=0.125 +2024-09-20 06:34:44,326 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=833320.0, ans=0.1 +2024-09-20 06:34:57,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=833360.0, ans=0.0 +2024-09-20 06:35:02,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=833400.0, ans=0.125 +2024-09-20 06:35:03,354 INFO [train.py:1198] (0/2) Epoch 47, batch 200, loss[loss=0.2518, ctc_loss=0.1319, cr_loss=0.398, attn_decoder_loss=0.2562, over 27352.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1083, cr_loss=0.3461, attn_decoder_loss=0.2366, over 3659180.33 frames. ], batch size: 124, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:35:03,756 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=833400.0, ans=0.0 +2024-09-20 06:35:12,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=833400.0, ans=0.125 +2024-09-20 06:35:31,238 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.05 vs. limit=10.0 +2024-09-20 06:35:44,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=833480.0, ans=0.125 +2024-09-20 06:35:50,144 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=833520.0, ans=0.125 +2024-09-20 06:36:00,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=833520.0, ans=0.0 +2024-09-20 06:36:12,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=833560.0, ans=0.0 +2024-09-20 06:36:19,052 INFO [train.py:1198] (0/2) Epoch 47, batch 250, loss[loss=0.244, ctc_loss=0.1107, cr_loss=0.3519, attn_decoder_loss=0.251, over 29213.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1086, cr_loss=0.347, attn_decoder_loss=0.237, over 4140402.48 frames. ], batch size: 100, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:36:29,886 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=833600.0, ans=0.0 +2024-09-20 06:36:38,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=833640.0, ans=0.125 +2024-09-20 06:36:38,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=833640.0, ans=0.025 +2024-09-20 06:36:47,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=833640.0, ans=0.1 +2024-09-20 06:36:55,948 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.268e+01 8.644e+01 9.308e+01 9.912e+01 1.990e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-20 06:37:06,772 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=833720.0, ans=0.0 +2024-09-20 06:37:21,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=833760.0, ans=0.125 +2024-09-20 06:37:26,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=833760.0, ans=0.125 +2024-09-20 06:37:35,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=833800.0, ans=0.0 +2024-09-20 06:37:36,484 INFO [train.py:1198] (0/2) Epoch 47, batch 300, loss[loss=0.2447, ctc_loss=0.113, cr_loss=0.3606, attn_decoder_loss=0.2513, over 29523.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1083, cr_loss=0.3467, attn_decoder_loss=0.2369, over 4507978.83 frames. ], batch size: 92, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:37:37,205 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.04 vs. limit=15.0 +2024-09-20 06:37:44,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=833800.0, ans=0.125 +2024-09-20 06:38:02,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=833840.0, ans=0.2 +2024-09-20 06:38:06,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=833880.0, ans=0.125 +2024-09-20 06:38:26,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.61 vs. limit=12.0 +2024-09-20 06:38:28,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=833920.0, ans=0.125 +2024-09-20 06:38:37,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=833960.0, ans=0.1 +2024-09-20 06:38:46,908 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=833960.0, ans=0.125 +2024-09-20 06:38:54,087 INFO [train.py:1198] (0/2) Epoch 47, batch 350, loss[loss=0.2095, ctc_loss=0.08746, cr_loss=0.2833, attn_decoder_loss=0.2167, over 29319.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2371, over 4794049.83 frames. ], batch size: 71, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:39:28,605 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.657e+01 9.081e+01 9.524e+01 1.810e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-20 06:39:39,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=834120.0, ans=0.1 +2024-09-20 06:39:58,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=834160.0, ans=0.125 +2024-09-20 06:40:01,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=834160.0, ans=0.025 +2024-09-20 06:40:04,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=834160.0, ans=0.025 +2024-09-20 06:40:08,860 INFO [train.py:1198] (0/2) Epoch 47, batch 400, loss[loss=0.2416, ctc_loss=0.1124, cr_loss=0.3535, attn_decoder_loss=0.2481, over 29713.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1078, cr_loss=0.3453, attn_decoder_loss=0.2367, over 5024044.74 frames. ], batch size: 82, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:40:12,322 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:40:24,493 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=834240.0, ans=0.0 +2024-09-20 06:40:31,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=834240.0, ans=0.0 +2024-09-20 06:40:52,407 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=834280.0, ans=0.0 +2024-09-20 06:41:10,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=834360.0, ans=0.95 +2024-09-20 06:41:14,240 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=10.12 vs. limit=12.0 +2024-09-20 06:41:27,383 INFO [train.py:1198] (0/2) Epoch 47, batch 450, loss[loss=0.2497, ctc_loss=0.1223, cr_loss=0.3837, attn_decoder_loss=0.2554, over 29684.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5187896.99 frames. ], batch size: 83, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:41:39,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=834400.0, ans=0.0 +2024-09-20 06:41:45,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=834440.0, ans=0.0 +2024-09-20 06:41:50,167 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=834440.0, ans=0.0 +2024-09-20 06:41:54,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=834440.0, ans=0.0 +2024-09-20 06:41:58,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=834480.0, ans=0.0 +2024-09-20 06:42:03,674 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.609e+01 9.172e+01 9.678e+01 2.074e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-20 06:42:11,718 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=834520.0, ans=0.0 +2024-09-20 06:42:16,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=834520.0, ans=0.1 +2024-09-20 06:42:27,590 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=834520.0, ans=0.0 +2024-09-20 06:42:32,672 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.11 vs. limit=6.0 +2024-09-20 06:42:38,864 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.17 vs. limit=15.0 +2024-09-20 06:42:45,367 INFO [train.py:1198] (0/2) Epoch 47, batch 500, loss[loss=0.2451, ctc_loss=0.1172, cr_loss=0.3812, attn_decoder_loss=0.2508, over 29464.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1072, cr_loss=0.3444, attn_decoder_loss=0.2361, over 5330749.54 frames. ], batch size: 94, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:42:48,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=834600.0, ans=0.125 +2024-09-20 06:42:53,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=834600.0, ans=0.125 +2024-09-20 06:42:53,752 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.61 vs. limit=10.0 +2024-09-20 06:42:54,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=834600.0, ans=0.125 +2024-09-20 06:43:06,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=834640.0, ans=0.0 +2024-09-20 06:43:14,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=834680.0, ans=15.0 +2024-09-20 06:43:22,300 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.65 vs. limit=15.0 +2024-09-20 06:43:42,143 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.06 vs. limit=15.0 +2024-09-20 06:43:51,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=834760.0, ans=0.125 +2024-09-20 06:44:00,803 INFO [train.py:1198] (0/2) Epoch 47, batch 550, loss[loss=0.2427, ctc_loss=0.1171, cr_loss=0.3613, attn_decoder_loss=0.2486, over 28828.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1077, cr_loss=0.3454, attn_decoder_loss=0.2364, over 5422970.41 frames. ], batch size: 104, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:44:03,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.42 vs. limit=6.0 +2024-09-20 06:44:32,739 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.41 vs. limit=15.0 +2024-09-20 06:44:36,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=834880.0, ans=0.125 +2024-09-20 06:44:39,387 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.709e+01 8.688e+01 9.011e+01 9.708e+01 1.487e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 06:45:04,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=834960.0, ans=0.0 +2024-09-20 06:45:08,694 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=834960.0, ans=0.125 +2024-09-20 06:45:18,980 INFO [train.py:1198] (0/2) Epoch 47, batch 600, loss[loss=0.2437, ctc_loss=0.116, cr_loss=0.3442, attn_decoder_loss=0.2502, over 29272.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1079, cr_loss=0.3458, attn_decoder_loss=0.2368, over 5508956.35 frames. ], batch size: 100, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:45:32,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=835040.0, ans=0.2 +2024-09-20 06:45:36,608 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.87 vs. limit=12.0 +2024-09-20 06:45:39,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-20 06:45:53,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=835080.0, ans=0.2 +2024-09-20 06:45:59,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=835080.0, ans=0.125 +2024-09-20 06:46:36,764 INFO [train.py:1198] (0/2) Epoch 47, batch 650, loss[loss=0.2272, ctc_loss=0.09445, cr_loss=0.3155, attn_decoder_loss=0.2349, over 29778.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1073, cr_loss=0.3445, attn_decoder_loss=0.236, over 5585604.67 frames. ], batch size: 81, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:46:43,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=835200.0, ans=0.0 +2024-09-20 06:47:07,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=835280.0, ans=0.125 +2024-09-20 06:47:13,106 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.705e+01 8.697e+01 9.175e+01 9.724e+01 1.599e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 06:47:16,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=835280.0, ans=0.125 +2024-09-20 06:47:16,412 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=835280.0, ans=0.0 +2024-09-20 06:47:16,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=835280.0, ans=0.125 +2024-09-20 06:47:25,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=835320.0, ans=0.0 +2024-09-20 06:47:27,389 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-20 06:47:45,488 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.35 vs. limit=22.5 +2024-09-20 06:47:47,211 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.23 vs. limit=15.0 +2024-09-20 06:47:52,265 INFO [train.py:1198] (0/2) Epoch 47, batch 700, loss[loss=0.2245, ctc_loss=0.1087, cr_loss=0.3452, attn_decoder_loss=0.2297, over 29541.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1081, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5636639.74 frames. ], batch size: 76, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:48:25,095 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.94 vs. limit=15.0 +2024-09-20 06:48:26,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=835480.0, ans=0.125 +2024-09-20 06:48:33,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.83 vs. limit=22.5 +2024-09-20 06:48:50,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 06:48:51,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=835520.0, ans=0.125 +2024-09-20 06:49:03,085 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.83 vs. limit=15.0 +2024-09-20 06:49:06,842 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=835560.0, ans=0.125 +2024-09-20 06:49:09,606 INFO [train.py:1198] (0/2) Epoch 47, batch 750, loss[loss=0.2341, ctc_loss=0.1099, cr_loss=0.3366, attn_decoder_loss=0.2404, over 29714.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1078, cr_loss=0.3454, attn_decoder_loss=0.2363, over 5674964.02 frames. ], batch size: 82, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:49:21,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=835600.0, ans=0.1 +2024-09-20 06:49:45,342 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.705e+01 9.081e+01 9.642e+01 1.954e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-20 06:50:07,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=835720.0, ans=0.1 +2024-09-20 06:50:10,135 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=835760.0, ans=0.125 +2024-09-20 06:50:10,421 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.62 vs. limit=12.0 +2024-09-20 06:50:24,780 INFO [train.py:1198] (0/2) Epoch 47, batch 800, loss[loss=0.209, ctc_loss=0.09147, cr_loss=0.3117, attn_decoder_loss=0.2151, over 29636.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.108, cr_loss=0.3464, attn_decoder_loss=0.2365, over 5706406.34 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:50:28,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=835800.0, ans=0.0 +2024-09-20 06:50:35,376 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.94 vs. limit=15.0 +2024-09-20 06:50:41,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.74 vs. limit=15.0 +2024-09-20 06:50:51,994 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.74 vs. limit=10.0 +2024-09-20 06:51:18,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=835920.0, ans=0.025 +2024-09-20 06:51:34,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=835960.0, ans=0.5 +2024-09-20 06:51:41,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=836000.0, ans=0.125 +2024-09-20 06:51:42,576 INFO [train.py:1198] (0/2) Epoch 47, batch 850, loss[loss=0.257, ctc_loss=0.1254, cr_loss=0.3694, attn_decoder_loss=0.2634, over 29676.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1077, cr_loss=0.3458, attn_decoder_loss=0.236, over 5736362.55 frames. ], batch size: 89, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:51:47,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=836000.0, ans=0.0 +2024-09-20 06:52:06,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=836040.0, ans=0.125 +2024-09-20 06:52:08,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=836040.0, ans=0.125 +2024-09-20 06:52:21,099 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=836080.0, ans=0.0 +2024-09-20 06:52:22,245 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.624e+01 9.106e+01 9.735e+01 2.135e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-20 06:52:49,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.60 vs. limit=22.5 +2024-09-20 06:53:00,277 INFO [train.py:1198] (0/2) Epoch 47, batch 900, loss[loss=0.2056, ctc_loss=0.08506, cr_loss=0.2924, attn_decoder_loss=0.2125, over 29593.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1077, cr_loss=0.3457, attn_decoder_loss=0.2361, over 5741336.32 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:53:16,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=836240.0, ans=6.0 +2024-09-20 06:53:25,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=836240.0, ans=0.125 +2024-09-20 06:53:51,399 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=836320.0, ans=0.125 +2024-09-20 06:54:01,981 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=836360.0, ans=0.2 +2024-09-20 06:54:05,017 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=836360.0, ans=0.125 +2024-09-20 06:54:15,111 INFO [train.py:1198] (0/2) Epoch 47, batch 950, loss[loss=0.2099, ctc_loss=0.08923, cr_loss=0.3013, attn_decoder_loss=0.2166, over 29517.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1077, cr_loss=0.3456, attn_decoder_loss=0.2362, over 5744613.86 frames. ], batch size: 74, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:54:33,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.68 vs. limit=12.0 +2024-09-20 06:54:40,265 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:54:56,610 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.702e+01 9.182e+01 9.933e+01 3.090e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-20 06:55:12,034 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=836520.0, ans=0.1 +2024-09-20 06:55:13,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=836520.0, ans=0.0 +2024-09-20 06:55:32,629 INFO [train.py:1198] (0/2) Epoch 47, batch 1000, loss[loss=0.23, ctc_loss=0.1211, cr_loss=0.376, attn_decoder_loss=0.2338, over 29488.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1087, cr_loss=0.3474, attn_decoder_loss=0.237, over 5739433.85 frames. ], batch size: 77, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:55:44,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=836600.0, ans=0.0 +2024-09-20 06:55:45,048 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=836600.0, ans=0.0 +2024-09-20 06:55:47,301 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.70 vs. limit=15.0 +2024-09-20 06:55:52,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=836640.0, ans=0.0 +2024-09-20 06:56:18,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=836720.0, ans=0.125 +2024-09-20 06:56:26,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=836720.0, ans=0.1 +2024-09-20 06:56:46,455 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.89 vs. limit=10.0 +2024-09-20 06:56:50,186 INFO [train.py:1198] (0/2) Epoch 47, batch 1050, loss[loss=0.2365, ctc_loss=0.1028, cr_loss=0.3264, attn_decoder_loss=0.2441, over 29687.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1084, cr_loss=0.347, attn_decoder_loss=0.2366, over 5746373.95 frames. ], batch size: 85, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:57:04,133 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=836840.0, ans=0.125 +2024-09-20 06:57:11,056 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.26 vs. limit=6.0 +2024-09-20 06:57:14,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=836840.0, ans=0.125 +2024-09-20 06:57:27,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=836880.0, ans=0.0 +2024-09-20 06:57:29,788 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.620e+01 9.050e+01 9.577e+01 1.323e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 06:57:34,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=836920.0, ans=0.125 +2024-09-20 06:57:37,539 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=836920.0, ans=0.125 +2024-09-20 06:57:39,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=836920.0, ans=0.0 +2024-09-20 06:57:43,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=836920.0, ans=0.125 +2024-09-20 06:58:05,795 INFO [train.py:1198] (0/2) Epoch 47, batch 1100, loss[loss=0.2202, ctc_loss=0.09893, cr_loss=0.3257, attn_decoder_loss=0.2264, over 29464.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1083, cr_loss=0.3462, attn_decoder_loss=0.2365, over 5758359.19 frames. ], batch size: 78, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:58:07,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=837000.0, ans=0.125 +2024-09-20 06:58:18,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=837000.0, ans=0.1 +2024-09-20 06:58:26,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=837040.0, ans=0.04949747468305833 +2024-09-20 06:58:31,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=837040.0, ans=0.1 +2024-09-20 06:58:35,550 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=837040.0, ans=0.1 +2024-09-20 06:58:58,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=837120.0, ans=0.1 +2024-09-20 06:58:59,752 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=837120.0, ans=0.125 +2024-09-20 06:59:01,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=837120.0, ans=0.125 +2024-09-20 06:59:23,430 INFO [train.py:1198] (0/2) Epoch 47, batch 1150, loss[loss=0.2206, ctc_loss=0.1006, cr_loss=0.335, attn_decoder_loss=0.2264, over 29434.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1081, cr_loss=0.3457, attn_decoder_loss=0.2361, over 5756846.31 frames. ], batch size: 78, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:59:46,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=837240.0, ans=0.125 +2024-09-20 07:00:02,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=837280.0, ans=10.0 +2024-09-20 07:00:04,978 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.978e+01 8.402e+01 9.011e+01 9.514e+01 2.556e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 07:00:24,489 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=837360.0, ans=0.125 +2024-09-20 07:00:31,959 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:00:40,655 INFO [train.py:1198] (0/2) Epoch 47, batch 1200, loss[loss=0.2433, ctc_loss=0.1158, cr_loss=0.349, attn_decoder_loss=0.2497, over 29679.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1084, cr_loss=0.346, attn_decoder_loss=0.237, over 5748968.84 frames. ], batch size: 85, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 07:00:44,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=837400.0, ans=0.125 +2024-09-20 07:00:48,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=837400.0, ans=0.125 +2024-09-20 07:01:09,774 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:01:14,736 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.78 vs. limit=15.0 +2024-09-20 07:01:21,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=837480.0, ans=0.125 +2024-09-20 07:01:56,185 INFO [train.py:1198] (0/2) Epoch 47, batch 1250, loss[loss=0.2433, ctc_loss=0.1209, cr_loss=0.3763, attn_decoder_loss=0.2485, over 29513.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1087, cr_loss=0.3468, attn_decoder_loss=0.2374, over 5776280.12 frames. ], batch size: 92, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 07:02:06,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=837600.0, ans=0.09899494936611666 +2024-09-20 07:02:24,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=837640.0, ans=0.0 +2024-09-20 07:02:26,477 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.34 vs. limit=15.0 +2024-09-20 07:02:35,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.10 vs. limit=15.0 +2024-09-20 07:02:37,735 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.615e+01 9.145e+01 9.650e+01 1.333e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-20 07:03:13,888 INFO [train.py:1198] (0/2) Epoch 47, batch 1300, loss[loss=0.2437, ctc_loss=0.1125, cr_loss=0.3518, attn_decoder_loss=0.2505, over 28380.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3467, attn_decoder_loss=0.2368, over 5781296.89 frames. ], batch size: 112, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:03:17,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.17 vs. limit=22.5 +2024-09-20 07:03:35,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=837840.0, ans=0.0 +2024-09-20 07:04:07,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=837920.0, ans=0.125 +2024-09-20 07:04:10,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=837920.0, ans=0.0 +2024-09-20 07:04:25,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=12.0 +2024-09-20 07:04:25,594 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.69 vs. limit=22.5 +2024-09-20 07:04:29,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=837960.0, ans=10.0 +2024-09-20 07:04:31,984 INFO [train.py:1198] (0/2) Epoch 47, batch 1350, loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.3386, attn_decoder_loss=0.237, over 29769.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1083, cr_loss=0.3463, attn_decoder_loss=0.2368, over 5797616.44 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:04:41,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=838000.0, ans=0.125 +2024-09-20 07:04:45,688 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:05:03,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=838080.0, ans=0.125 +2024-09-20 07:05:03,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=838080.0, ans=0.0 +2024-09-20 07:05:10,468 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.518e+01 8.374e+01 8.876e+01 9.629e+01 1.227e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-20 07:05:34,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=838160.0, ans=0.125 +2024-09-20 07:05:39,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1.whitening_limit, batch_count=838160.0, ans=10.0 +2024-09-20 07:05:46,452 INFO [train.py:1198] (0/2) Epoch 47, batch 1400, loss[loss=0.1946, ctc_loss=0.0817, cr_loss=0.2936, attn_decoder_loss=0.2006, over 29583.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1082, cr_loss=0.3467, attn_decoder_loss=0.2365, over 5808545.29 frames. ], batch size: 69, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:05:57,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.14 vs. limit=15.0 +2024-09-20 07:06:09,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=838240.0, ans=0.1 +2024-09-20 07:06:15,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=838240.0, ans=0.1 +2024-09-20 07:06:22,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.11 vs. limit=6.0 +2024-09-20 07:06:47,265 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=838360.0, ans=0.0 +2024-09-20 07:07:03,992 INFO [train.py:1198] (0/2) Epoch 47, batch 1450, loss[loss=0.2473, ctc_loss=0.1259, cr_loss=0.3832, attn_decoder_loss=0.2522, over 29434.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1085, cr_loss=0.3478, attn_decoder_loss=0.2371, over 5805254.63 frames. ], batch size: 94, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:07:35,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=838480.0, ans=0.125 +2024-09-20 07:07:45,072 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.353e+01 8.627e+01 9.137e+01 9.746e+01 6.249e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-20 07:07:49,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=838520.0, ans=0.125 +2024-09-20 07:07:49,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=838520.0, ans=0.125 +2024-09-20 07:07:56,377 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.81 vs. limit=10.0 +2024-09-20 07:07:57,905 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.92 vs. limit=22.5 +2024-09-20 07:08:00,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=838520.0, ans=0.02 +2024-09-20 07:08:06,095 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=838560.0, ans=0.0 +2024-09-20 07:08:09,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=838560.0, ans=0.125 +2024-09-20 07:08:09,636 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.36 vs. limit=12.0 +2024-09-20 07:08:11,974 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=838560.0, ans=0.0 +2024-09-20 07:08:16,545 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=838560.0, ans=0.125 +2024-09-20 07:08:20,890 INFO [train.py:1198] (0/2) Epoch 47, batch 1500, loss[loss=0.2487, ctc_loss=0.1132, cr_loss=0.3669, attn_decoder_loss=0.2556, over 29636.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1086, cr_loss=0.3476, attn_decoder_loss=0.2373, over 5805791.52 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:08:31,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=838600.0, ans=0.125 +2024-09-20 07:08:50,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.86 vs. limit=10.0 +2024-09-20 07:08:56,176 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=838680.0, ans=0.0 +2024-09-20 07:09:02,280 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=838680.0, ans=0.2 +2024-09-20 07:09:02,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.44 vs. limit=15.0 +2024-09-20 07:09:23,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=838760.0, ans=0.025 +2024-09-20 07:09:27,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=4.92 vs. limit=15.0 +2024-09-20 07:09:36,604 INFO [train.py:1198] (0/2) Epoch 47, batch 1550, loss[loss=0.2413, ctc_loss=0.1192, cr_loss=0.3704, attn_decoder_loss=0.2467, over 29513.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3481, attn_decoder_loss=0.2375, over 5781840.48 frames. ], batch size: 90, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:09:38,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=838800.0, ans=0.125 +2024-09-20 07:09:49,294 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.34 vs. limit=10.0 +2024-09-20 07:09:59,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=838840.0, ans=0.0 +2024-09-20 07:10:09,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=838880.0, ans=0.125 +2024-09-20 07:10:17,554 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.567e+01 9.122e+01 9.785e+01 2.024e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 07:10:37,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=838960.0, ans=0.1 +2024-09-20 07:10:53,714 INFO [train.py:1198] (0/2) Epoch 47, batch 1600, loss[loss=0.2385, ctc_loss=0.1111, cr_loss=0.3582, attn_decoder_loss=0.2447, over 29685.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.109, cr_loss=0.3479, attn_decoder_loss=0.2372, over 5763786.92 frames. ], batch size: 85, lr: 2.34e-03, grad_scale: 32.0 +2024-09-20 07:10:54,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=839000.0, ans=0.125 +2024-09-20 07:11:20,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=839040.0, ans=0.025 +2024-09-20 07:11:39,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=839120.0, ans=0.0 +2024-09-20 07:11:50,634 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.25 vs. limit=15.0 +2024-09-20 07:11:54,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=839160.0, ans=0.125 +2024-09-20 07:11:54,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=839160.0, ans=0.125 +2024-09-20 07:11:56,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=839160.0, ans=0.2 +2024-09-20 07:11:57,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=839160.0, ans=0.125 +2024-09-20 07:12:11,193 INFO [train.py:1198] (0/2) Epoch 47, batch 1650, loss[loss=0.2428, ctc_loss=0.1126, cr_loss=0.3533, attn_decoder_loss=0.2495, over 29709.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3472, attn_decoder_loss=0.2368, over 5758169.05 frames. ], batch size: 89, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:12:17,614 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=839200.0, ans=0.125 +2024-09-20 07:12:20,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=839200.0, ans=0.1 +2024-09-20 07:12:25,715 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.07 vs. limit=15.0 +2024-09-20 07:12:47,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=839280.0, ans=0.0 +2024-09-20 07:12:52,013 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.634e+01 9.046e+01 9.641e+01 2.969e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-20 07:12:57,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=839320.0, ans=0.125 +2024-09-20 07:13:19,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=839360.0, ans=0.125 +2024-09-20 07:13:23,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.95 vs. limit=15.0 +2024-09-20 07:13:26,655 INFO [train.py:1198] (0/2) Epoch 47, batch 1700, loss[loss=0.1974, ctc_loss=0.08603, cr_loss=0.3055, attn_decoder_loss=0.203, over 29602.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1081, cr_loss=0.346, attn_decoder_loss=0.2366, over 5779581.65 frames. ], batch size: 69, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:13:26,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=839400.0, ans=0.5 +2024-09-20 07:13:36,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.71 vs. limit=15.0 +2024-09-20 07:14:02,855 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.28 vs. limit=10.0 +2024-09-20 07:14:06,557 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:14:39,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.99 vs. limit=15.0 +2024-09-20 07:14:43,708 INFO [train.py:1198] (0/2) Epoch 47, batch 1750, loss[loss=0.2076, ctc_loss=0.09166, cr_loss=0.3124, attn_decoder_loss=0.2136, over 29309.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1078, cr_loss=0.3459, attn_decoder_loss=0.2364, over 5786334.70 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:14:45,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=839600.0, ans=0.07 +2024-09-20 07:15:04,312 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=15.0 +2024-09-20 07:15:07,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.27 vs. limit=15.0 +2024-09-20 07:15:26,384 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.597e+01 9.141e+01 9.828e+01 1.386e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-20 07:15:34,336 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=839720.0, ans=0.125 +2024-09-20 07:15:37,456 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=839720.0, ans=0.125 +2024-09-20 07:16:00,712 INFO [train.py:1198] (0/2) Epoch 47, batch 1800, loss[loss=0.2422, ctc_loss=0.118, cr_loss=0.3749, attn_decoder_loss=0.2477, over 29679.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1083, cr_loss=0.3471, attn_decoder_loss=0.2367, over 5790396.59 frames. ], batch size: 83, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:16:03,303 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.79 vs. limit=15.0 +2024-09-20 07:16:13,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=839800.0, ans=0.125 +2024-09-20 07:16:22,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=839840.0, ans=0.0 +2024-09-20 07:16:25,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=839840.0, ans=0.025 +2024-09-20 07:16:55,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=839920.0, ans=0.125 +2024-09-20 07:17:16,946 INFO [train.py:1198] (0/2) Epoch 47, batch 1850, loss[loss=0.2395, ctc_loss=0.1163, cr_loss=0.3517, attn_decoder_loss=0.2454, over 29628.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1082, cr_loss=0.3471, attn_decoder_loss=0.2367, over 5795429.22 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:17:24,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=840000.0, ans=0.0 +2024-09-20 07:17:29,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=840000.0, ans=0.0 +2024-09-20 07:17:39,021 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.10 vs. limit=22.5 +2024-09-20 07:18:01,054 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.631e+01 9.188e+01 9.651e+01 1.430e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 07:18:34,074 INFO [train.py:1198] (0/2) Epoch 47, batch 1900, loss[loss=0.2367, ctc_loss=0.1061, cr_loss=0.3406, attn_decoder_loss=0.2437, over 29700.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1085, cr_loss=0.348, attn_decoder_loss=0.2375, over 5803245.97 frames. ], batch size: 89, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:18:36,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-20 07:18:39,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.98 vs. limit=22.5 +2024-09-20 07:18:41,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=840200.0, ans=0.125 +2024-09-20 07:18:42,139 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.53 vs. limit=22.5 +2024-09-20 07:18:52,525 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=840240.0, ans=0.125 +2024-09-20 07:19:08,154 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_na.min_abs, batch_count=840280.0, ans=0.02 +2024-09-20 07:19:12,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=840280.0, ans=0.125 +2024-09-20 07:19:21,886 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:19:32,324 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=840320.0, ans=0.0 +2024-09-20 07:19:35,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=840360.0, ans=0.0 +2024-09-20 07:19:51,436 INFO [train.py:1198] (0/2) Epoch 47, batch 1950, loss[loss=0.2259, ctc_loss=0.09598, cr_loss=0.3169, attn_decoder_loss=0.2333, over 29474.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1091, cr_loss=0.3495, attn_decoder_loss=0.2384, over 5818546.31 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:19:54,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=840400.0, ans=0.04949747468305833 +2024-09-20 07:20:05,250 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=840440.0, ans=0.125 +2024-09-20 07:20:06,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=840440.0, ans=0.0 +2024-09-20 07:20:07,396 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.34 vs. limit=15.0 +2024-09-20 07:20:33,255 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.893e+01 9.385e+01 9.948e+01 2.061e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-20 07:20:48,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=840520.0, ans=0.1 +2024-09-20 07:20:54,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=840560.0, ans=0.0 +2024-09-20 07:20:59,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=840560.0, ans=0.125 +2024-09-20 07:21:06,476 INFO [train.py:1198] (0/2) Epoch 47, batch 2000, loss[loss=0.201, ctc_loss=0.0855, cr_loss=0.3055, attn_decoder_loss=0.2071, over 29322.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1092, cr_loss=0.3496, attn_decoder_loss=0.2384, over 5797390.64 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:21:16,062 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=840600.0, ans=0.05 +2024-09-20 07:21:25,707 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.73 vs. limit=15.0 +2024-09-20 07:21:28,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=840640.0, ans=0.025 +2024-09-20 07:22:02,230 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.31 vs. limit=6.0 +2024-09-20 07:22:10,365 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=840760.0, ans=0.1 +2024-09-20 07:22:24,173 INFO [train.py:1198] (0/2) Epoch 47, batch 2050, loss[loss=0.2023, ctc_loss=0.08672, cr_loss=0.3017, attn_decoder_loss=0.2085, over 29452.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1086, cr_loss=0.3479, attn_decoder_loss=0.2375, over 5789306.08 frames. ], batch size: 70, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:22:24,440 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=840800.0, ans=0.04949747468305833 +2024-09-20 07:22:41,384 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.42 vs. limit=12.0 +2024-09-20 07:22:55,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=840880.0, ans=0.0 +2024-09-20 07:22:55,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=840880.0, ans=0.0 +2024-09-20 07:22:59,503 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=840880.0, ans=0.0 +2024-09-20 07:23:09,568 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.215e+01 8.643e+01 9.120e+01 9.477e+01 1.642e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 07:23:19,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.67 vs. limit=15.0 +2024-09-20 07:23:23,601 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=840920.0, ans=0.2 +2024-09-20 07:23:26,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=840960.0, ans=0.125 +2024-09-20 07:23:41,213 INFO [train.py:1198] (0/2) Epoch 47, batch 2100, loss[loss=0.2324, ctc_loss=0.1097, cr_loss=0.3547, attn_decoder_loss=0.2382, over 29733.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1083, cr_loss=0.347, attn_decoder_loss=0.237, over 5800064.23 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:23:43,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.83 vs. limit=15.0 +2024-09-20 07:23:44,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=841000.0, ans=0.1 +2024-09-20 07:24:03,170 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.80 vs. limit=12.0 +2024-09-20 07:24:04,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=841040.0, ans=0.07 +2024-09-20 07:24:08,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=841040.0, ans=0.0 +2024-09-20 07:24:10,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=841080.0, ans=0.1 +2024-09-20 07:24:14,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=841080.0, ans=0.2 +2024-09-20 07:24:27,033 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.58 vs. limit=15.0 +2024-09-20 07:24:29,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=841120.0, ans=0.125 +2024-09-20 07:24:29,945 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.00 vs. limit=15.0 +2024-09-20 07:24:40,540 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.16 vs. limit=15.0 +2024-09-20 07:24:49,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=841160.0, ans=0.0 +2024-09-20 07:24:53,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=841160.0, ans=0.125 +2024-09-20 07:24:56,334 INFO [train.py:1198] (0/2) Epoch 47, batch 2150, loss[loss=0.2276, ctc_loss=0.1109, cr_loss=0.3467, attn_decoder_loss=0.2328, over 29448.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1078, cr_loss=0.3463, attn_decoder_loss=0.2363, over 5814989.13 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:25:11,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=841240.0, ans=0.2 +2024-09-20 07:25:21,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=15.0 +2024-09-20 07:25:39,992 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 8.572e+01 9.031e+01 9.738e+01 1.571e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-20 07:25:41,774 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=841320.0, ans=0.2 +2024-09-20 07:25:44,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=841320.0, ans=0.025 +2024-09-20 07:25:45,092 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.00 vs. limit=12.0 +2024-09-20 07:25:47,785 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=841320.0, ans=0.0 +2024-09-20 07:25:48,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.29 vs. limit=12.0 +2024-09-20 07:25:51,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=841320.0, ans=0.0 +2024-09-20 07:25:52,973 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=841320.0, ans=0.2 +2024-09-20 07:26:08,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=841360.0, ans=0.2 +2024-09-20 07:26:12,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=841400.0, ans=0.125 +2024-09-20 07:26:13,833 INFO [train.py:1198] (0/2) Epoch 47, batch 2200, loss[loss=0.2406, ctc_loss=0.1162, cr_loss=0.3563, attn_decoder_loss=0.2466, over 29640.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1079, cr_loss=0.3463, attn_decoder_loss=0.2364, over 5810924.36 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:26:15,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=841400.0, ans=0.2 +2024-09-20 07:26:18,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=841400.0, ans=0.125 +2024-09-20 07:26:23,115 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=841400.0, ans=0.125 +2024-09-20 07:26:35,510 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.38 vs. limit=15.0 +2024-09-20 07:26:41,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=841440.0, ans=0.0 +2024-09-20 07:26:57,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=841480.0, ans=0.125 +2024-09-20 07:27:06,729 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.83 vs. limit=15.0 +2024-09-20 07:27:13,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=841520.0, ans=0.0 +2024-09-20 07:27:31,966 INFO [train.py:1198] (0/2) Epoch 47, batch 2250, loss[loss=0.2368, ctc_loss=0.1093, cr_loss=0.3317, attn_decoder_loss=0.2436, over 29732.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1077, cr_loss=0.3458, attn_decoder_loss=0.2363, over 5811443.11 frames. ], batch size: 82, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:27:47,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=841640.0, ans=0.125 +2024-09-20 07:27:49,331 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-20 07:28:02,670 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.66 vs. limit=22.5 +2024-09-20 07:28:03,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=841680.0, ans=0.0 +2024-09-20 07:28:09,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=841680.0, ans=0.125 +2024-09-20 07:28:14,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=841680.0, ans=0.2 +2024-09-20 07:28:15,524 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 8.630e+01 9.023e+01 9.514e+01 2.412e+02, threshold=1.805e+02, percent-clipped=2.0 +2024-09-20 07:28:28,174 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.59 vs. limit=15.0 +2024-09-20 07:28:34,522 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.55 vs. limit=15.0 +2024-09-20 07:28:38,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=841760.0, ans=0.2 +2024-09-20 07:28:42,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=841760.0, ans=0.125 +2024-09-20 07:28:47,112 INFO [train.py:1198] (0/2) Epoch 47, batch 2300, loss[loss=0.2141, ctc_loss=0.1049, cr_loss=0.3409, attn_decoder_loss=0.2187, over 29303.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1073, cr_loss=0.3445, attn_decoder_loss=0.2356, over 5797323.47 frames. ], batch size: 71, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:29:00,643 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=841840.0, ans=0.1 +2024-09-20 07:29:00,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=841840.0, ans=10.0 +2024-09-20 07:29:28,261 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=841880.0, ans=0.07 +2024-09-20 07:29:29,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=841880.0, ans=0.0 +2024-09-20 07:29:34,598 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.27 vs. limit=15.0 +2024-09-20 07:29:58,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=841960.0, ans=0.125 +2024-09-20 07:29:59,004 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=841960.0, ans=0.0 +2024-09-20 07:30:04,682 INFO [train.py:1198] (0/2) Epoch 47, batch 2350, loss[loss=0.2444, ctc_loss=0.1239, cr_loss=0.3836, attn_decoder_loss=0.2493, over 29698.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1076, cr_loss=0.3454, attn_decoder_loss=0.2357, over 5802668.85 frames. ], batch size: 83, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:30:10,887 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=842000.0, ans=0.1 +2024-09-20 07:30:12,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=842000.0, ans=0.1 +2024-09-20 07:30:50,388 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.800e+01 9.287e+01 9.916e+01 3.475e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-20 07:30:51,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-20 07:31:18,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=842160.0, ans=0.125 +2024-09-20 07:31:22,263 INFO [train.py:1198] (0/2) Epoch 47, batch 2400, loss[loss=0.2253, ctc_loss=0.1059, cr_loss=0.3552, attn_decoder_loss=0.2307, over 29526.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1081, cr_loss=0.3465, attn_decoder_loss=0.2364, over 5807643.08 frames. ], batch size: 76, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:31:26,153 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.36 vs. limit=15.0 +2024-09-20 07:31:31,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=842200.0, ans=0.95 +2024-09-20 07:32:00,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=842280.0, ans=0.0 +2024-09-20 07:32:07,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.93 vs. limit=15.0 +2024-09-20 07:32:15,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=842320.0, ans=0.0 +2024-09-20 07:32:17,120 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=842320.0, ans=0.125 +2024-09-20 07:32:20,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=842320.0, ans=0.04949747468305833 +2024-09-20 07:32:27,825 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=842360.0, ans=0.0 +2024-09-20 07:32:33,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=842360.0, ans=0.125 +2024-09-20 07:32:38,367 INFO [train.py:1198] (0/2) Epoch 47, batch 2450, loss[loss=0.2331, ctc_loss=0.1081, cr_loss=0.3441, attn_decoder_loss=0.2394, over 29718.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1086, cr_loss=0.3469, attn_decoder_loss=0.2373, over 5784330.63 frames. ], batch size: 82, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:33:03,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.78 vs. limit=6.0 +2024-09-20 07:33:05,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=842440.0, ans=0.125 +2024-09-20 07:33:05,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=842440.0, ans=0.04949747468305833 +2024-09-20 07:33:07,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=842480.0, ans=0.1 +2024-09-20 07:33:21,566 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.574e+01 9.096e+01 9.798e+01 1.804e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 07:33:29,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=842520.0, ans=0.0 +2024-09-20 07:33:48,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=842560.0, ans=0.125 +2024-09-20 07:33:49,795 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=842560.0, ans=0.125 +2024-09-20 07:33:55,453 INFO [train.py:1198] (0/2) Epoch 47, batch 2500, loss[loss=0.251, ctc_loss=0.1232, cr_loss=0.3979, attn_decoder_loss=0.2563, over 29649.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1089, cr_loss=0.3474, attn_decoder_loss=0.2376, over 5794327.35 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:33:59,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.35 vs. limit=15.0 +2024-09-20 07:34:03,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=842600.0, ans=0.125 +2024-09-20 07:34:09,431 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=842640.0, ans=0.0 +2024-09-20 07:34:16,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=842640.0, ans=0.1 +2024-09-20 07:34:21,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=842640.0, ans=0.0 +2024-09-20 07:34:25,202 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.48 vs. limit=15.0 +2024-09-20 07:34:35,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=842680.0, ans=0.125 +2024-09-20 07:35:13,292 INFO [train.py:1198] (0/2) Epoch 47, batch 2550, loss[loss=0.2014, ctc_loss=0.08292, cr_loss=0.2991, attn_decoder_loss=0.208, over 29356.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1083, cr_loss=0.3464, attn_decoder_loss=0.2372, over 5797609.64 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:35:44,415 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-20 07:35:49,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.whiten.whitening_limit, batch_count=842880.0, ans=12.0 +2024-09-20 07:35:51,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=842880.0, ans=0.2 +2024-09-20 07:35:54,131 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=842880.0, ans=0.2 +2024-09-20 07:35:56,714 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.552e+01 8.685e+01 9.047e+01 9.681e+01 1.454e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-20 07:36:28,672 INFO [train.py:1198] (0/2) Epoch 47, batch 2600, loss[loss=0.2231, ctc_loss=0.09966, cr_loss=0.3286, attn_decoder_loss=0.2295, over 29439.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1084, cr_loss=0.3469, attn_decoder_loss=0.2375, over 5794394.49 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:36:54,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=843040.0, ans=0.125 +2024-09-20 07:36:57,477 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=843080.0, ans=0.1 +2024-09-20 07:36:59,499 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-20 07:37:34,071 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.91 vs. limit=12.0 +2024-09-20 07:37:46,284 INFO [train.py:1198] (0/2) Epoch 47, batch 2650, loss[loss=0.2397, ctc_loss=0.114, cr_loss=0.3714, attn_decoder_loss=0.2454, over 29252.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1082, cr_loss=0.3468, attn_decoder_loss=0.2376, over 5801649.14 frames. ], batch size: 100, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:38:00,925 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.96 vs. limit=22.5 +2024-09-20 07:38:04,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=843240.0, ans=0.0 +2024-09-20 07:38:07,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=843240.0, ans=0.0 +2024-09-20 07:38:31,922 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.003e+01 8.688e+01 9.037e+01 9.488e+01 1.743e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-20 07:38:35,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.40 vs. limit=22.5 +2024-09-20 07:39:03,847 INFO [train.py:1198] (0/2) Epoch 47, batch 2700, loss[loss=0.2387, ctc_loss=0.1125, cr_loss=0.3602, attn_decoder_loss=0.2447, over 29513.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1084, cr_loss=0.3472, attn_decoder_loss=0.2376, over 5797523.32 frames. ], batch size: 87, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:39:13,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=843400.0, ans=0.125 +2024-09-20 07:39:19,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=843440.0, ans=0.0 +2024-09-20 07:39:47,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=843520.0, ans=0.2 +2024-09-20 07:40:04,200 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=843560.0, ans=0.125 +2024-09-20 07:40:16,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=843560.0, ans=0.2 +2024-09-20 07:40:19,196 INFO [train.py:1198] (0/2) Epoch 47, batch 2750, loss[loss=0.216, ctc_loss=0.1061, cr_loss=0.3476, attn_decoder_loss=0.2205, over 29523.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1078, cr_loss=0.3459, attn_decoder_loss=0.2366, over 5794815.33 frames. ], batch size: 75, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:40:33,992 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.11 vs. limit=12.0 +2024-09-20 07:40:36,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=843640.0, ans=0.0 +2024-09-20 07:41:02,935 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.699e+01 9.178e+01 9.870e+01 7.766e+02, threshold=1.836e+02, percent-clipped=3.0 +2024-09-20 07:41:10,146 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.09 vs. limit=15.0 +2024-09-20 07:41:12,419 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=843720.0, ans=0.125 +2024-09-20 07:41:21,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=843760.0, ans=0.0 +2024-09-20 07:41:37,199 INFO [train.py:1198] (0/2) Epoch 47, batch 2800, loss[loss=0.2641, ctc_loss=0.1492, cr_loss=0.4008, attn_decoder_loss=0.2679, over 19412.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1081, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5775943.44 frames. ], batch size: 210, lr: 2.34e-03, grad_scale: 32.0 +2024-09-20 07:41:54,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=843840.0, ans=0.125 +2024-09-20 07:42:06,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=843840.0, ans=0.0 +2024-09-20 07:42:14,230 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=843880.0, ans=0.125 +2024-09-20 07:42:17,932 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.38 vs. limit=15.0 +2024-09-20 07:42:23,001 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=843920.0, ans=0.125 +2024-09-20 07:42:53,453 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=844000.0, ans=0.0 +2024-09-20 07:42:54,619 INFO [train.py:1198] (0/2) Epoch 47, batch 2850, loss[loss=0.2241, ctc_loss=0.1029, cr_loss=0.3313, attn_decoder_loss=0.2302, over 29494.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1084, cr_loss=0.3469, attn_decoder_loss=0.2376, over 5762714.52 frames. ], batch size: 77, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:43:04,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=844000.0, ans=0.0 +2024-09-20 07:43:25,074 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=844080.0, ans=0.1 +2024-09-20 07:43:32,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=844080.0, ans=0.025 +2024-09-20 07:43:32,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=844080.0, ans=0.0 +2024-09-20 07:43:37,086 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=844080.0, ans=0.125 +2024-09-20 07:43:39,707 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.812e+01 9.340e+01 9.979e+01 3.635e+02, threshold=1.868e+02, percent-clipped=1.0 +2024-09-20 07:43:49,554 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.66 vs. limit=22.5 +2024-09-20 07:44:01,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=844160.0, ans=0.2 +2024-09-20 07:44:04,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=844160.0, ans=0.0 +2024-09-20 07:44:09,787 INFO [train.py:1198] (0/2) Epoch 47, batch 2900, loss[loss=0.233, ctc_loss=0.1153, cr_loss=0.368, attn_decoder_loss=0.2379, over 29417.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.109, cr_loss=0.3486, attn_decoder_loss=0.2388, over 5787896.81 frames. ], batch size: 79, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:45:27,382 INFO [train.py:1198] (0/2) Epoch 47, batch 2950, loss[loss=0.226, ctc_loss=0.1101, cr_loss=0.3562, attn_decoder_loss=0.2309, over 29518.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1081, cr_loss=0.3467, attn_decoder_loss=0.2375, over 5781945.55 frames. ], batch size: 75, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:45:32,716 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.59 vs. limit=15.0 +2024-09-20 07:45:35,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=844400.0, ans=0.0 +2024-09-20 07:45:59,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=844480.0, ans=0.1 +2024-09-20 07:46:05,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.73 vs. limit=6.0 +2024-09-20 07:46:14,754 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.670e+01 9.173e+01 9.775e+01 4.031e+02, threshold=1.835e+02, percent-clipped=2.0 +2024-09-20 07:46:36,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.72 vs. limit=15.0 +2024-09-20 07:46:45,068 INFO [train.py:1198] (0/2) Epoch 47, batch 3000, loss[loss=0.2306, ctc_loss=0.102, cr_loss=0.3381, attn_decoder_loss=0.2374, over 29772.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1084, cr_loss=0.3472, attn_decoder_loss=0.2373, over 5782197.46 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:46:45,069 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 07:47:03,444 INFO [train.py:1230] (0/2) Epoch 47, validation: loss=0.2127, ctc_loss=0.03692, cr_loss=6.538e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 07:47:03,445 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 07:47:15,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=844600.0, ans=0.2 +2024-09-20 07:47:29,582 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=844640.0, ans=0.0 +2024-09-20 07:47:35,528 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=844680.0, ans=0.0 +2024-09-20 07:47:36,093 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.39 vs. limit=22.5 +2024-09-20 07:47:38,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=844680.0, ans=0.125 +2024-09-20 07:47:46,239 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=844680.0, ans=0.1 +2024-09-20 07:48:16,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=844760.0, ans=0.0 +2024-09-20 07:48:16,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=844760.0, ans=0.125 +2024-09-20 07:48:19,546 INFO [train.py:1198] (0/2) Epoch 47, batch 3050, loss[loss=0.2301, ctc_loss=0.1077, cr_loss=0.3399, attn_decoder_loss=0.2361, over 29530.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1089, cr_loss=0.3479, attn_decoder_loss=0.2382, over 5776807.71 frames. ], batch size: 76, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:48:22,932 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=844800.0, ans=0.0 +2024-09-20 07:48:36,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=844840.0, ans=0.0 +2024-09-20 07:48:53,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=844880.0, ans=0.0 +2024-09-20 07:49:06,338 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.834e+01 8.765e+01 9.371e+01 9.890e+01 2.296e+02, threshold=1.874e+02, percent-clipped=1.0 +2024-09-20 07:49:38,882 INFO [train.py:1198] (0/2) Epoch 47, batch 3100, loss[loss=0.246, ctc_loss=0.117, cr_loss=0.3721, attn_decoder_loss=0.2521, over 29280.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1091, cr_loss=0.3484, attn_decoder_loss=0.238, over 5776110.11 frames. ], batch size: 100, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:50:04,498 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:50:07,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=845080.0, ans=0.0 +2024-09-20 07:50:16,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=845080.0, ans=0.2 +2024-09-20 07:50:16,828 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.15 vs. limit=15.0 +2024-09-20 07:50:22,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=845120.0, ans=0.0 +2024-09-20 07:50:27,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=845120.0, ans=0.1 +2024-09-20 07:50:33,198 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=845120.0, ans=0.025 +2024-09-20 07:50:33,745 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.92 vs. limit=10.0 +2024-09-20 07:50:54,270 INFO [train.py:1198] (0/2) Epoch 47, batch 3150, loss[loss=0.2542, ctc_loss=0.1229, cr_loss=0.3719, attn_decoder_loss=0.2605, over 28861.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1088, cr_loss=0.3477, attn_decoder_loss=0.2379, over 5783552.70 frames. ], batch size: 104, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:50:57,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=845200.0, ans=0.07 +2024-09-20 07:51:11,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=845240.0, ans=0.1 +2024-09-20 07:51:23,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=845280.0, ans=0.0 +2024-09-20 07:51:25,127 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:51:33,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=845280.0, ans=0.1 +2024-09-20 07:51:33,943 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:51:36,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=845280.0, ans=0.1 +2024-09-20 07:51:40,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.436e+01 8.583e+01 9.160e+01 9.723e+01 3.463e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-20 07:51:55,020 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=845360.0, ans=0.0 +2024-09-20 07:52:09,911 INFO [train.py:1198] (0/2) Epoch 47, batch 3200, loss[loss=0.2173, ctc_loss=0.09778, cr_loss=0.3356, attn_decoder_loss=0.2231, over 29781.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1085, cr_loss=0.3471, attn_decoder_loss=0.2372, over 5795303.42 frames. ], batch size: 80, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:52:17,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=845400.0, ans=0.07 +2024-09-20 07:52:19,238 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=845400.0, ans=0.125 +2024-09-20 07:52:27,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.69 vs. limit=15.0 +2024-09-20 07:52:37,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=845440.0, ans=0.125 +2024-09-20 07:52:52,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=845480.0, ans=0.125 +2024-09-20 07:52:55,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=845520.0, ans=0.125 +2024-09-20 07:52:58,914 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=845520.0, ans=0.125 +2024-09-20 07:53:16,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.56 vs. limit=22.5 +2024-09-20 07:53:22,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=845560.0, ans=0.2 +2024-09-20 07:53:27,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=845600.0, ans=0.125 +2024-09-20 07:53:28,507 INFO [train.py:1198] (0/2) Epoch 47, batch 3250, loss[loss=0.2401, ctc_loss=0.1099, cr_loss=0.3596, attn_decoder_loss=0.2466, over 29701.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1086, cr_loss=0.3474, attn_decoder_loss=0.2377, over 5801559.56 frames. ], batch size: 84, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:53:59,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.79 vs. limit=15.0 +2024-09-20 07:54:13,280 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:54:17,342 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 8.840e+01 9.453e+01 1.004e+02 3.254e+02, threshold=1.891e+02, percent-clipped=2.0 +2024-09-20 07:54:30,168 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.71 vs. limit=22.5 +2024-09-20 07:54:32,889 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.34 vs. limit=22.5 +2024-09-20 07:54:37,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=845760.0, ans=0.125 +2024-09-20 07:54:38,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=845760.0, ans=0.025 +2024-09-20 07:54:45,561 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.93 vs. limit=12.0 +2024-09-20 07:54:45,884 INFO [train.py:1198] (0/2) Epoch 47, batch 3300, loss[loss=0.2336, ctc_loss=0.101, cr_loss=0.323, attn_decoder_loss=0.2412, over 28386.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1078, cr_loss=0.3455, attn_decoder_loss=0.2364, over 5798708.90 frames. ], batch size: 111, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:55:07,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=845840.0, ans=0.125 +2024-09-20 07:55:30,920 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=845920.0, ans=0.125 +2024-09-20 07:55:43,203 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=845920.0, ans=0.125 +2024-09-20 07:55:46,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=845960.0, ans=0.05 +2024-09-20 07:55:50,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=845960.0, ans=0.0 +2024-09-20 07:55:55,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=845960.0, ans=0.0 +2024-09-20 07:56:00,877 INFO [train.py:1198] (0/2) Epoch 47, batch 3350, loss[loss=0.2463, ctc_loss=0.1145, cr_loss=0.3628, attn_decoder_loss=0.2529, over 28787.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1087, cr_loss=0.3474, attn_decoder_loss=0.2373, over 5776303.51 frames. ], batch size: 104, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:56:02,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=846000.0, ans=0.2 +2024-09-20 07:56:05,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=846000.0, ans=0.125 +2024-09-20 07:56:26,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=846040.0, ans=0.125 +2024-09-20 07:56:46,664 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.13 vs. limit=22.5 +2024-09-20 07:56:47,170 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.631e+01 9.228e+01 9.801e+01 1.993e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-20 07:56:50,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=846120.0, ans=0.04949747468305833 +2024-09-20 07:57:20,296 INFO [train.py:1198] (0/2) Epoch 47, batch 3400, loss[loss=0.201, ctc_loss=0.08512, cr_loss=0.2924, attn_decoder_loss=0.2073, over 29351.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1088, cr_loss=0.3479, attn_decoder_loss=0.2372, over 5768704.61 frames. ], batch size: 67, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:57:26,754 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:57:30,761 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.95 vs. limit=6.0 +2024-09-20 07:57:35,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=846240.0, ans=10.0 +2024-09-20 07:58:05,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.19 vs. limit=15.0 +2024-09-20 07:58:15,201 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=846320.0, ans=0.125 +2024-09-20 07:58:16,712 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=846320.0, ans=0.1 +2024-09-20 07:58:26,287 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.28 vs. limit=12.0 +2024-09-20 07:58:36,095 INFO [train.py:1198] (0/2) Epoch 47, batch 3450, loss[loss=0.2365, ctc_loss=0.1066, cr_loss=0.3315, attn_decoder_loss=0.2436, over 28414.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3478, attn_decoder_loss=0.2375, over 5776812.97 frames. ], batch size: 111, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:58:47,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.02 vs. limit=12.0 +2024-09-20 07:58:54,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=846440.0, ans=0.025 +2024-09-20 07:59:12,234 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=846480.0, ans=10.0 +2024-09-20 07:59:15,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=846480.0, ans=0.125 +2024-09-20 07:59:16,875 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:59:22,444 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.138e+01 8.718e+01 9.224e+01 9.719e+01 1.765e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-20 07:59:51,158 INFO [train.py:1198] (0/2) Epoch 47, batch 3500, loss[loss=0.2091, ctc_loss=0.08889, cr_loss=0.3034, attn_decoder_loss=0.2157, over 29301.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.3469, attn_decoder_loss=0.2369, over 5776512.41 frames. ], batch size: 71, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:00:02,168 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=846600.0, ans=0.125 +2024-09-20 08:00:16,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.87 vs. limit=15.0 +2024-09-20 08:00:40,603 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=846720.0, ans=0.125 +2024-09-20 08:01:05,576 INFO [train.py:1198] (0/2) Epoch 47, batch 3550, loss[loss=0.2457, ctc_loss=0.1146, cr_loss=0.3807, attn_decoder_loss=0.2518, over 29704.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1081, cr_loss=0.3464, attn_decoder_loss=0.2368, over 5782290.61 frames. ], batch size: 89, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:01:19,050 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=846840.0, ans=0.125 +2024-09-20 08:01:48,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=846880.0, ans=0.0 +2024-09-20 08:01:55,423 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.386e+01 8.882e+01 9.420e+01 1.531e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-20 08:02:01,421 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=846920.0, ans=0.0 +2024-09-20 08:02:06,710 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.42 vs. limit=15.0 +2024-09-20 08:02:08,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=846960.0, ans=0.125 +2024-09-20 08:02:08,890 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=846960.0, ans=0.125 +2024-09-20 08:02:10,844 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.16 vs. limit=15.0 +2024-09-20 08:02:23,268 INFO [train.py:1198] (0/2) Epoch 47, batch 3600, loss[loss=0.2229, ctc_loss=0.1046, cr_loss=0.3393, attn_decoder_loss=0.2285, over 29480.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1081, cr_loss=0.3466, attn_decoder_loss=0.237, over 5791866.72 frames. ], batch size: 77, lr: 2.33e-03, grad_scale: 32.0 +2024-09-20 08:02:36,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=847040.0, ans=0.125 +2024-09-20 08:02:47,174 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=847040.0, ans=0.0 +2024-09-20 08:03:08,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.50 vs. limit=22.5 +2024-09-20 08:03:19,844 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=847120.0, ans=0.1 +2024-09-20 08:03:21,954 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.18 vs. limit=22.5 +2024-09-20 08:03:28,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=847160.0, ans=10.0 +2024-09-20 08:03:34,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=847160.0, ans=0.2 +2024-09-20 08:03:36,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=847200.0, ans=0.0 +2024-09-20 08:03:37,741 INFO [train.py:1198] (0/2) Epoch 47, batch 3650, loss[loss=0.2513, ctc_loss=0.1294, cr_loss=0.3927, attn_decoder_loss=0.2561, over 29511.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1077, cr_loss=0.3456, attn_decoder_loss=0.2365, over 5794009.18 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:03:38,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=847200.0, ans=0.125 +2024-09-20 08:03:44,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=847200.0, ans=6.0 +2024-09-20 08:03:57,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=847240.0, ans=0.125 +2024-09-20 08:03:57,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=847240.0, ans=0.125 +2024-09-20 08:04:22,970 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.00 vs. limit=15.0 +2024-09-20 08:04:26,509 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.278e+01 8.685e+01 9.171e+01 9.762e+01 1.576e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-20 08:04:37,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=847360.0, ans=0.125 +2024-09-20 08:04:38,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=847360.0, ans=0.125 +2024-09-20 08:04:51,910 INFO [train.py:1198] (0/2) Epoch 47, batch 3700, loss[loss=0.2518, ctc_loss=0.1288, cr_loss=0.3975, attn_decoder_loss=0.2567, over 29712.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.108, cr_loss=0.3457, attn_decoder_loss=0.2369, over 5803527.79 frames. ], batch size: 84, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:05:05,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=847440.0, ans=0.04949747468305833 +2024-09-20 08:05:26,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=847480.0, ans=0.125 +2024-09-20 08:05:31,080 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=847480.0, ans=0.0 +2024-09-20 08:05:38,448 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=847520.0, ans=0.025 +2024-09-20 08:05:42,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=847520.0, ans=0.0 +2024-09-20 08:05:47,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=847520.0, ans=0.2 +2024-09-20 08:05:50,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=847560.0, ans=0.125 +2024-09-20 08:05:54,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=847560.0, ans=0.0 +2024-09-20 08:06:06,315 INFO [train.py:1198] (0/2) Epoch 47, batch 3750, loss[loss=0.2099, ctc_loss=0.1056, cr_loss=0.3263, attn_decoder_loss=0.2143, over 29356.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3452, attn_decoder_loss=0.2365, over 5807458.81 frames. ], batch size: 67, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:06:11,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=847600.0, ans=0.2 +2024-09-20 08:06:39,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=847680.0, ans=0.0 +2024-09-20 08:06:55,606 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.560e+01 9.134e+01 9.769e+01 1.535e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-20 08:07:15,846 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.45 vs. limit=22.5 +2024-09-20 08:07:20,709 INFO [train.py:1198] (0/2) Epoch 47, batch 3800, loss[loss=0.24, ctc_loss=0.1073, cr_loss=0.344, attn_decoder_loss=0.2471, over 29629.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1076, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5798810.27 frames. ], batch size: 86, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:07:34,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=847840.0, ans=0.0 +2024-09-20 08:07:40,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=847840.0, ans=0.125 +2024-09-20 08:07:46,516 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=847840.0, ans=0.0 +2024-09-20 08:07:46,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=847840.0, ans=0.1 +2024-09-20 08:07:55,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=847880.0, ans=0.1 +2024-09-20 08:08:04,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten.whitening_limit, batch_count=847880.0, ans=22.5 +2024-09-20 08:08:19,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=847920.0, ans=0.95 +2024-09-20 08:08:32,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=847960.0, ans=0.125 +2024-09-20 08:08:34,087 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=847960.0, ans=0.125 +2024-09-20 08:08:35,498 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=847960.0, ans=0.125 +2024-09-20 08:08:37,323 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-212000.pt +2024-09-20 08:08:45,574 INFO [train.py:1198] (0/2) Epoch 47, batch 3850, loss[loss=0.2396, ctc_loss=0.1165, cr_loss=0.3619, attn_decoder_loss=0.2452, over 29233.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1073, cr_loss=0.3444, attn_decoder_loss=0.2362, over 5811828.25 frames. ], batch size: 100, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:08:45,865 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:08:56,723 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.86 vs. limit=10.0 +2024-09-20 08:09:16,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=848080.0, ans=0.025 +2024-09-20 08:09:18,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=848080.0, ans=0.07 +2024-09-20 08:09:22,753 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=848080.0, ans=0.1 +2024-09-20 08:09:28,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=848120.0, ans=0.125 +2024-09-20 08:09:34,450 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.721e+01 8.837e+01 9.282e+01 9.780e+01 1.653e+02, threshold=1.856e+02, percent-clipped=0.0 +2024-09-20 08:09:36,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=848120.0, ans=0.125 +2024-09-20 08:09:48,233 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:09:59,714 INFO [train.py:1198] (0/2) Epoch 47, batch 3900, loss[loss=0.2527, ctc_loss=0.1144, cr_loss=0.3605, attn_decoder_loss=0.2601, over 29636.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1077, cr_loss=0.3455, attn_decoder_loss=0.2366, over 5816388.25 frames. ], batch size: 86, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:10:10,465 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=848200.0, ans=0.125 +2024-09-20 08:10:26,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=848240.0, ans=0.0 +2024-09-20 08:10:30,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=848280.0, ans=0.0 +2024-09-20 08:10:32,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.00 vs. limit=10.0 +2024-09-20 08:10:33,885 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=848280.0, ans=0.025 +2024-09-20 08:10:41,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=848280.0, ans=0.125 +2024-09-20 08:10:51,515 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:11:03,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=848360.0, ans=10.0 +2024-09-20 08:11:13,513 INFO [train.py:1198] (0/2) Epoch 47, batch 3950, loss[loss=0.2482, ctc_loss=0.1291, cr_loss=0.3987, attn_decoder_loss=0.2525, over 29479.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1074, cr_loss=0.3449, attn_decoder_loss=0.2365, over 5835978.83 frames. ], batch size: 97, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:11:35,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.43 vs. limit=15.0 +2024-09-20 08:11:58,031 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=848520.0, ans=0.125 +2024-09-20 08:12:02,179 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.359e+01 8.442e+01 9.066e+01 9.725e+01 6.124e+02, threshold=1.813e+02, percent-clipped=2.0 +2024-09-20 08:12:02,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=848520.0, ans=0.0 +2024-09-20 08:12:27,102 INFO [train.py:1198] (0/2) Epoch 47, batch 4000, loss[loss=0.2161, ctc_loss=0.09348, cr_loss=0.3105, attn_decoder_loss=0.2229, over 29503.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1075, cr_loss=0.3453, attn_decoder_loss=0.2367, over 5812990.51 frames. ], batch size: 74, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:12:55,886 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.16 vs. limit=15.0 +2024-09-20 08:12:57,416 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-20 08:13:43,550 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.83 vs. limit=22.5 +2024-09-20 08:13:43,952 INFO [train.py:1198] (0/2) Epoch 47, batch 4050, loss[loss=0.2516, ctc_loss=0.1303, cr_loss=0.3478, attn_decoder_loss=0.2573, over 20475.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3445, attn_decoder_loss=0.2364, over 5797036.11 frames. ], batch size: 209, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:14:11,715 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=848880.0, ans=0.0 +2024-09-20 08:14:17,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=848880.0, ans=0.0 +2024-09-20 08:14:24,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=848880.0, ans=0.2 +2024-09-20 08:14:30,468 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=848920.0, ans=0.125 +2024-09-20 08:14:33,187 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.120e+01 8.525e+01 9.075e+01 9.953e+01 1.624e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 08:14:38,850 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.26 vs. limit=15.0 +2024-09-20 08:14:56,870 INFO [train.py:1198] (0/2) Epoch 47, batch 4100, loss[loss=0.2475, ctc_loss=0.1262, cr_loss=0.3763, attn_decoder_loss=0.2526, over 29497.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.108, cr_loss=0.3455, attn_decoder_loss=0.2368, over 5792527.85 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:15:46,563 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=849120.0, ans=0.125 +2024-09-20 08:15:49,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=849120.0, ans=0.1 +2024-09-20 08:15:53,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=849160.0, ans=0.0 +2024-09-20 08:16:10,033 INFO [train.py:1198] (0/2) Epoch 47, batch 4150, loss[loss=0.2293, ctc_loss=0.108, cr_loss=0.3422, attn_decoder_loss=0.2352, over 29481.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1079, cr_loss=0.3449, attn_decoder_loss=0.2366, over 5797588.50 frames. ], batch size: 77, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:16:28,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=849240.0, ans=0.5 +2024-09-20 08:16:51,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=849280.0, ans=0.2 +2024-09-20 08:17:00,946 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.758e+01 9.113e+01 9.755e+01 1.948e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 08:17:05,657 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=849320.0, ans=0.125 +2024-09-20 08:17:25,429 INFO [train.py:1198] (0/2) Epoch 47, batch 4200, loss[loss=0.2499, ctc_loss=0.1255, cr_loss=0.3934, attn_decoder_loss=0.255, over 29517.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1085, cr_loss=0.3467, attn_decoder_loss=0.237, over 5799526.02 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:17:31,767 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:17:46,349 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=849440.0, ans=0.125 +2024-09-20 08:17:46,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=849440.0, ans=0.04949747468305833 +2024-09-20 08:17:50,770 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=849440.0, ans=0.1 +2024-09-20 08:17:50,846 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=849440.0, ans=0.0 +2024-09-20 08:17:53,486 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=849480.0, ans=0.015 +2024-09-20 08:18:32,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=849560.0, ans=0.0 +2024-09-20 08:18:39,530 INFO [train.py:1198] (0/2) Epoch 47, batch 4250, loss[loss=0.2095, ctc_loss=0.09108, cr_loss=0.315, attn_decoder_loss=0.2157, over 29520.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1082, cr_loss=0.3467, attn_decoder_loss=0.237, over 5805410.36 frames. ], batch size: 74, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:18:41,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=849600.0, ans=0.1 +2024-09-20 08:18:42,675 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=849600.0, ans=0.05 +2024-09-20 08:19:00,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=849640.0, ans=0.0 +2024-09-20 08:19:09,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=849680.0, ans=0.0 +2024-09-20 08:19:29,566 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.716e+01 9.310e+01 9.869e+01 2.948e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-20 08:19:53,044 INFO [train.py:1198] (0/2) Epoch 47, batch 4300, loss[loss=0.2439, ctc_loss=0.1177, cr_loss=0.3753, attn_decoder_loss=0.2496, over 29525.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.108, cr_loss=0.3461, attn_decoder_loss=0.2372, over 5794156.62 frames. ], batch size: 87, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:20:05,321 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:20:05,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=849800.0, ans=0.125 +2024-09-20 08:20:07,496 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.50 vs. limit=22.5 +2024-09-20 08:20:12,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=849840.0, ans=0.125 +2024-09-20 08:20:20,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=849840.0, ans=0.2 +2024-09-20 08:20:38,150 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=8.0 +2024-09-20 08:21:02,203 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.63 vs. limit=15.0 +2024-09-20 08:21:02,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=849960.0, ans=0.125 +2024-09-20 08:21:05,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.24 vs. limit=22.5 +2024-09-20 08:21:08,682 INFO [train.py:1198] (0/2) Epoch 47, batch 4350, loss[loss=0.2496, ctc_loss=0.1243, cr_loss=0.3883, attn_decoder_loss=0.2549, over 29497.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1107, cr_loss=0.3526, attn_decoder_loss=0.2404, over 5796099.97 frames. ], batch size: 97, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:21:16,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=850000.0, ans=0.1 +2024-09-20 08:21:23,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=850040.0, ans=0.125 +2024-09-20 08:21:29,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=850040.0, ans=0.2 +2024-09-20 08:21:42,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=850080.0, ans=0.125 +2024-09-20 08:21:58,355 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 9.076e+01 9.411e+01 9.982e+01 1.475e+02, threshold=1.882e+02, percent-clipped=0.0 +2024-09-20 08:22:11,854 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:22:21,577 INFO [train.py:1198] (0/2) Epoch 47, batch 4400, loss[loss=0.2348, ctc_loss=0.106, cr_loss=0.3411, attn_decoder_loss=0.2415, over 27561.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1117, cr_loss=0.3547, attn_decoder_loss=0.2421, over 5767884.64 frames. ], batch size: 125, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:22:21,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=850200.0, ans=0.04949747468305833 +2024-09-20 08:22:42,564 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=850240.0, ans=0.125 +2024-09-20 08:22:49,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=850280.0, ans=0.2 +2024-09-20 08:23:03,442 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=850280.0, ans=0.125 +2024-09-20 08:23:10,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=850320.0, ans=0.1 +2024-09-20 08:23:20,224 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=850360.0, ans=0.0 +2024-09-20 08:23:36,660 INFO [train.py:1198] (0/2) Epoch 47, batch 4450, loss[loss=0.2507, ctc_loss=0.126, cr_loss=0.3677, attn_decoder_loss=0.2564, over 20389.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1149, cr_loss=0.3599, attn_decoder_loss=0.2441, over 5584155.34 frames. ], batch size: 209, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:24:17,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=850480.0, ans=0.125 +2024-09-20 08:24:18,751 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=850480.0, ans=0.2 +2024-09-20 08:24:22,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=850520.0, ans=0.0 +2024-09-20 08:24:29,099 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.530e+01 9.504e+01 1.076e+02 1.200e+02 1.579e+02, threshold=2.152e+02, percent-clipped=0.0 +2024-09-20 08:24:35,957 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.76 vs. limit=22.5 +2024-09-20 08:24:50,883 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.48 vs. limit=22.5 +2024-09-20 08:24:51,481 INFO [train.py:1198] (0/2) Epoch 47, batch 4500, loss[loss=0.2369, ctc_loss=0.1124, cr_loss=0.3235, attn_decoder_loss=0.2435, over 19704.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1176, cr_loss=0.3623, attn_decoder_loss=0.2458, over 5242171.24 frames. ], batch size: 209, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:24:55,113 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.19 vs. limit=22.5 +2024-09-20 08:25:03,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=850600.0, ans=0.125 +2024-09-20 08:25:09,655 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=850640.0, ans=0.125 +2024-09-20 08:25:13,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=6.77 vs. limit=10.0 +2024-09-20 08:25:23,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.48 vs. limit=22.5 +2024-09-20 08:25:28,451 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-47.pt +2024-09-20 08:26:14,114 INFO [train.py:1198] (0/2) Epoch 48, batch 0, loss[loss=0.2138, ctc_loss=0.09467, cr_loss=0.3301, attn_decoder_loss=0.2197, over 29591.00 frames. ], tot_loss[loss=0.2138, ctc_loss=0.09467, cr_loss=0.3301, attn_decoder_loss=0.2197, over 29591.00 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:26:14,115 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 08:26:32,431 INFO [train.py:1230] (0/2) Epoch 48, validation: loss=0.2131, ctc_loss=0.03621, cr_loss=7.075e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-20 08:26:32,431 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 08:26:40,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=850700.0, ans=0.05 +2024-09-20 08:27:13,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.07 vs. limit=15.0 +2024-09-20 08:27:39,690 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:27:41,213 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=850860.0, ans=0.125 +2024-09-20 08:27:49,851 INFO [train.py:1198] (0/2) Epoch 48, batch 50, loss[loss=0.201, ctc_loss=0.08715, cr_loss=0.3126, attn_decoder_loss=0.2067, over 29407.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1101, cr_loss=0.3518, attn_decoder_loss=0.2372, over 1267569.61 frames. ], batch size: 70, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:27:55,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.27 vs. limit=12.0 +2024-09-20 08:27:56,136 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=850900.0, ans=0.125 +2024-09-20 08:28:02,249 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=850900.0, ans=0.125 +2024-09-20 08:28:04,961 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 9.048e+01 9.836e+01 1.173e+02 2.253e+02, threshold=1.967e+02, percent-clipped=1.0 +2024-09-20 08:28:08,759 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.55 vs. limit=10.0 +2024-09-20 08:28:20,551 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=850980.0, ans=0.0 +2024-09-20 08:28:35,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=851020.0, ans=0.125 +2024-09-20 08:28:40,150 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=851020.0, ans=0.0 +2024-09-20 08:29:05,197 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=851060.0, ans=0.125 +2024-09-20 08:29:07,767 INFO [train.py:1198] (0/2) Epoch 48, batch 100, loss[loss=0.2142, ctc_loss=0.08895, cr_loss=0.2942, attn_decoder_loss=0.2215, over 29525.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1112, cr_loss=0.3547, attn_decoder_loss=0.24, over 2250747.43 frames. ], batch size: 76, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:29:08,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=851100.0, ans=0.1 +2024-09-20 08:29:22,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=851140.0, ans=0.2 +2024-09-20 08:29:31,946 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=851140.0, ans=0.0 +2024-09-20 08:29:33,934 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.94 vs. limit=6.0 +2024-09-20 08:29:44,751 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.92 vs. limit=15.0 +2024-09-20 08:30:03,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=851220.0, ans=0.0 +2024-09-20 08:30:22,101 INFO [train.py:1198] (0/2) Epoch 48, batch 150, loss[loss=0.2032, ctc_loss=0.09185, cr_loss=0.3061, attn_decoder_loss=0.2087, over 29466.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1093, cr_loss=0.3502, attn_decoder_loss=0.238, over 3046285.51 frames. ], batch size: 70, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:30:29,314 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.22 vs. limit=10.0 +2024-09-20 08:30:38,632 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.481e+01 8.661e+01 9.113e+01 9.779e+01 1.487e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 08:30:46,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=851340.0, ans=0.125 +2024-09-20 08:30:46,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=851340.0, ans=0.125 +2024-09-20 08:30:54,882 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.91 vs. limit=6.0 +2024-09-20 08:30:55,487 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=851380.0, ans=0.0 +2024-09-20 08:31:26,091 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=851460.0, ans=0.0 +2024-09-20 08:31:26,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.52 vs. limit=15.0 +2024-09-20 08:31:39,270 INFO [train.py:1198] (0/2) Epoch 48, batch 200, loss[loss=0.2461, ctc_loss=0.1194, cr_loss=0.3746, attn_decoder_loss=0.2519, over 27581.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.109, cr_loss=0.3494, attn_decoder_loss=0.2374, over 3658492.92 frames. ], batch size: 125, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:31:50,617 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.69 vs. limit=15.0 +2024-09-20 08:32:35,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=851620.0, ans=0.125 +2024-09-20 08:32:44,620 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.71 vs. limit=22.5 +2024-09-20 08:32:54,544 INFO [train.py:1198] (0/2) Epoch 48, batch 250, loss[loss=0.2464, ctc_loss=0.1106, cr_loss=0.3586, attn_decoder_loss=0.2535, over 29264.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1082, cr_loss=0.348, attn_decoder_loss=0.2373, over 4141286.62 frames. ], batch size: 100, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:33:10,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=851740.0, ans=0.1 +2024-09-20 08:33:13,492 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.583e+01 8.550e+01 9.278e+01 9.687e+01 3.776e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-20 08:33:14,436 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.24 vs. limit=15.0 +2024-09-20 08:33:30,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=851780.0, ans=0.025 +2024-09-20 08:33:40,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=851820.0, ans=0.0 +2024-09-20 08:33:40,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=851820.0, ans=0.1 +2024-09-20 08:33:40,922 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=851820.0, ans=0.0 +2024-09-20 08:33:45,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=851820.0, ans=0.025 +2024-09-20 08:33:54,459 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=851820.0, ans=0.0 +2024-09-20 08:34:06,878 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.68 vs. limit=22.5 +2024-09-20 08:34:09,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=851860.0, ans=0.07 +2024-09-20 08:34:12,358 INFO [train.py:1198] (0/2) Epoch 48, batch 300, loss[loss=0.241, ctc_loss=0.1118, cr_loss=0.3694, attn_decoder_loss=0.2471, over 29541.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3471, attn_decoder_loss=0.2369, over 4508742.30 frames. ], batch size: 92, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:34:15,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=851900.0, ans=0.0 +2024-09-20 08:34:20,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=851900.0, ans=0.0 +2024-09-20 08:34:22,126 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.96 vs. limit=15.0 +2024-09-20 08:34:30,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=851940.0, ans=0.125 +2024-09-20 08:35:00,860 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.28 vs. limit=10.0 +2024-09-20 08:35:01,779 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=852020.0, ans=0.125 +2024-09-20 08:35:08,191 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.22 vs. limit=15.0 +2024-09-20 08:35:15,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=852060.0, ans=0.125 +2024-09-20 08:35:15,471 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=852060.0, ans=0.1 +2024-09-20 08:35:21,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=852060.0, ans=0.1 +2024-09-20 08:35:29,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.48 vs. limit=22.5 +2024-09-20 08:35:29,937 INFO [train.py:1198] (0/2) Epoch 48, batch 350, loss[loss=0.2061, ctc_loss=0.08786, cr_loss=0.2724, attn_decoder_loss=0.2132, over 29331.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1077, cr_loss=0.3466, attn_decoder_loss=0.2368, over 4795090.57 frames. ], batch size: 71, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:35:37,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=852100.0, ans=0.025 +2024-09-20 08:35:39,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=852100.0, ans=0.1 +2024-09-20 08:35:45,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=852140.0, ans=0.125 +2024-09-20 08:35:46,308 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.632e+01 9.132e+01 9.604e+01 3.712e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 08:35:52,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=852140.0, ans=0.1 +2024-09-20 08:36:36,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=852260.0, ans=0.125 +2024-09-20 08:36:45,274 INFO [train.py:1198] (0/2) Epoch 48, batch 400, loss[loss=0.2323, ctc_loss=0.1106, cr_loss=0.3738, attn_decoder_loss=0.2375, over 29711.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1079, cr_loss=0.3466, attn_decoder_loss=0.2367, over 5025215.85 frames. ], batch size: 82, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:36:52,131 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.78 vs. limit=15.0 +2024-09-20 08:36:54,638 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=852300.0, ans=0.125 +2024-09-20 08:37:12,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.74 vs. limit=12.0 +2024-09-20 08:37:14,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.56 vs. limit=15.0 +2024-09-20 08:37:15,140 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=852340.0, ans=0.125 +2024-09-20 08:37:26,618 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.58 vs. limit=22.5 +2024-09-20 08:37:59,013 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=852460.0, ans=0.0 +2024-09-20 08:38:03,186 INFO [train.py:1198] (0/2) Epoch 48, batch 450, loss[loss=0.2464, ctc_loss=0.1202, cr_loss=0.3695, attn_decoder_loss=0.2523, over 29699.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1083, cr_loss=0.3473, attn_decoder_loss=0.2368, over 5188972.73 frames. ], batch size: 83, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:38:19,720 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.823e+01 8.734e+01 9.234e+01 9.898e+01 1.385e+02, threshold=1.847e+02, percent-clipped=0.0 +2024-09-20 08:38:22,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.99 vs. limit=15.0 +2024-09-20 08:38:24,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=852540.0, ans=0.0 +2024-09-20 08:38:42,639 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=852580.0, ans=0.125 +2024-09-20 08:38:55,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=852620.0, ans=0.2 +2024-09-20 08:39:07,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=852660.0, ans=0.125 +2024-09-20 08:39:10,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=852660.0, ans=0.1 +2024-09-20 08:39:13,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=852660.0, ans=0.2 +2024-09-20 08:39:21,087 INFO [train.py:1198] (0/2) Epoch 48, batch 500, loss[loss=0.2514, ctc_loss=0.1239, cr_loss=0.3655, attn_decoder_loss=0.2574, over 29429.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1074, cr_loss=0.3456, attn_decoder_loss=0.2361, over 5331199.11 frames. ], batch size: 94, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:39:26,509 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.22 vs. limit=6.0 +2024-09-20 08:39:35,571 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.98 vs. limit=15.0 +2024-09-20 08:40:02,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=852780.0, ans=0.2 +2024-09-20 08:40:09,246 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.53 vs. limit=15.0 +2024-09-20 08:40:23,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=852860.0, ans=0.125 +2024-09-20 08:40:36,940 INFO [train.py:1198] (0/2) Epoch 48, batch 550, loss[loss=0.2343, ctc_loss=0.107, cr_loss=0.3598, attn_decoder_loss=0.2404, over 28775.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1076, cr_loss=0.3466, attn_decoder_loss=0.2363, over 5425018.09 frames. ], batch size: 104, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:40:38,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=852900.0, ans=0.125 +2024-09-20 08:40:53,454 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.626e+01 8.943e+01 9.744e+01 1.321e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 08:40:56,761 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=852940.0, ans=0.09899494936611666 +2024-09-20 08:41:30,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=853020.0, ans=0.1 +2024-09-20 08:41:36,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=853020.0, ans=0.0 +2024-09-20 08:41:44,207 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=853060.0, ans=0.125 +2024-09-20 08:41:44,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=853060.0, ans=0.1 +2024-09-20 08:41:48,122 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.97 vs. limit=15.0 +2024-09-20 08:41:50,383 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=853060.0, ans=0.0 +2024-09-20 08:41:54,600 INFO [train.py:1198] (0/2) Epoch 48, batch 600, loss[loss=0.2459, ctc_loss=0.1153, cr_loss=0.3682, attn_decoder_loss=0.2522, over 29227.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1074, cr_loss=0.3465, attn_decoder_loss=0.2365, over 5510892.85 frames. ], batch size: 100, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:41:57,993 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=853100.0, ans=0.0 +2024-09-20 08:42:02,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=853100.0, ans=0.125 +2024-09-20 08:42:33,041 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-20 08:42:34,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-20 08:42:35,346 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=853180.0, ans=0.025 +2024-09-20 08:42:52,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.02 vs. limit=22.5 +2024-09-20 08:43:04,827 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=853260.0, ans=0.2 +2024-09-20 08:43:12,008 INFO [train.py:1198] (0/2) Epoch 48, batch 650, loss[loss=0.2295, ctc_loss=0.1056, cr_loss=0.3489, attn_decoder_loss=0.2355, over 29766.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1067, cr_loss=0.3443, attn_decoder_loss=0.2358, over 5587963.11 frames. ], batch size: 81, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:43:19,017 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.90 vs. limit=6.0 +2024-09-20 08:43:21,183 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=853300.0, ans=0.05 +2024-09-20 08:43:21,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=853300.0, ans=0.125 +2024-09-20 08:43:28,436 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.608e+01 8.952e+01 9.536e+01 4.634e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-20 08:43:31,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.86 vs. limit=15.0 +2024-09-20 08:43:40,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=853380.0, ans=0.125 +2024-09-20 08:43:53,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=853380.0, ans=0.1 +2024-09-20 08:44:05,212 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=853420.0, ans=0.2 +2024-09-20 08:44:23,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=14.42 vs. limit=15.0 +2024-09-20 08:44:27,289 INFO [train.py:1198] (0/2) Epoch 48, batch 700, loss[loss=0.2323, ctc_loss=0.1101, cr_loss=0.3508, attn_decoder_loss=0.238, over 29537.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1072, cr_loss=0.3459, attn_decoder_loss=0.2364, over 5637334.28 frames. ], batch size: 76, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:44:50,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=853540.0, ans=0.125 +2024-09-20 08:45:23,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=853620.0, ans=0.1 +2024-09-20 08:45:29,367 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=853660.0, ans=0.125 +2024-09-20 08:45:41,452 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:45:45,558 INFO [train.py:1198] (0/2) Epoch 48, batch 750, loss[loss=0.2388, ctc_loss=0.1235, cr_loss=0.3988, attn_decoder_loss=0.2427, over 29700.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1075, cr_loss=0.3462, attn_decoder_loss=0.2363, over 5677371.04 frames. ], batch size: 82, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:45:54,577 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=853700.0, ans=0.125 +2024-09-20 08:46:01,951 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.518e+01 8.792e+01 9.309e+01 9.827e+01 1.298e+02, threshold=1.862e+02, percent-clipped=0.0 +2024-09-20 08:46:08,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.19 vs. limit=15.0 +2024-09-20 08:46:18,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=853780.0, ans=0.0 +2024-09-20 08:46:39,148 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.91 vs. limit=22.5 +2024-09-20 08:47:03,065 INFO [train.py:1198] (0/2) Epoch 48, batch 800, loss[loss=0.2089, ctc_loss=0.0955, cr_loss=0.3158, attn_decoder_loss=0.2145, over 29639.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1078, cr_loss=0.3473, attn_decoder_loss=0.2364, over 5708284.17 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 32.0 +2024-09-20 08:47:12,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=853900.0, ans=0.125 +2024-09-20 08:47:36,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=853980.0, ans=0.2 +2024-09-20 08:47:47,033 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=854020.0, ans=0.0 +2024-09-20 08:47:49,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=854020.0, ans=0.0 +2024-09-20 08:47:51,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=854020.0, ans=0.125 +2024-09-20 08:47:54,920 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.23 vs. limit=15.0 +2024-09-20 08:48:11,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=854060.0, ans=0.1 +2024-09-20 08:48:18,090 INFO [train.py:1198] (0/2) Epoch 48, batch 850, loss[loss=0.2338, ctc_loss=0.1078, cr_loss=0.3356, attn_decoder_loss=0.2404, over 29704.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.107, cr_loss=0.345, attn_decoder_loss=0.2361, over 5736657.06 frames. ], batch size: 89, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:48:21,241 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=854100.0, ans=0.0 +2024-09-20 08:48:28,040 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.07 vs. limit=15.0 +2024-09-20 08:48:35,990 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 8.678e+01 9.128e+01 9.659e+01 1.410e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 08:48:42,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=854140.0, ans=0.0 +2024-09-20 08:49:12,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=854220.0, ans=0.1 +2024-09-20 08:49:12,615 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.95 vs. limit=15.0 +2024-09-20 08:49:36,098 INFO [train.py:1198] (0/2) Epoch 48, batch 900, loss[loss=0.212, ctc_loss=0.08857, cr_loss=0.2962, attn_decoder_loss=0.2191, over 29616.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1071, cr_loss=0.3451, attn_decoder_loss=0.2363, over 5740605.75 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:49:38,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.35 vs. limit=10.0 +2024-09-20 08:49:52,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=854340.0, ans=0.0 +2024-09-20 08:50:23,305 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=854420.0, ans=0.0 +2024-09-20 08:50:49,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=854460.0, ans=0.125 +2024-09-20 08:50:53,779 INFO [train.py:1198] (0/2) Epoch 48, batch 950, loss[loss=0.217, ctc_loss=0.1019, cr_loss=0.3369, attn_decoder_loss=0.2223, over 29506.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1069, cr_loss=0.3443, attn_decoder_loss=0.2362, over 5742532.03 frames. ], batch size: 74, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:51:01,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=854500.0, ans=0.125 +2024-09-20 08:51:04,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=854500.0, ans=0.125 +2024-09-20 08:51:11,718 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.672e+01 8.747e+01 9.386e+01 9.871e+01 2.198e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-20 08:51:12,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.74 vs. limit=10.0 +2024-09-20 08:51:20,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.94 vs. limit=8.0 +2024-09-20 08:51:22,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=854580.0, ans=0.2 +2024-09-20 08:51:25,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=854580.0, ans=0.0 +2024-09-20 08:51:27,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=854580.0, ans=0.125 +2024-09-20 08:51:37,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=854620.0, ans=0.125 +2024-09-20 08:51:57,894 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.55 vs. limit=12.0 +2024-09-20 08:52:00,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=854660.0, ans=0.0 +2024-09-20 08:52:08,494 INFO [train.py:1198] (0/2) Epoch 48, batch 1000, loss[loss=0.2245, ctc_loss=0.1093, cr_loss=0.343, attn_decoder_loss=0.2297, over 29479.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1076, cr_loss=0.346, attn_decoder_loss=0.2368, over 5736651.27 frames. ], batch size: 77, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:52:25,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=854740.0, ans=0.0 +2024-09-20 08:52:32,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=854740.0, ans=0.2 +2024-09-20 08:52:46,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=854780.0, ans=0.125 +2024-09-20 08:53:06,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=854820.0, ans=0.1 +2024-09-20 08:53:25,983 INFO [train.py:1198] (0/2) Epoch 48, batch 1050, loss[loss=0.2349, ctc_loss=0.1035, cr_loss=0.3305, attn_decoder_loss=0.2421, over 29681.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1071, cr_loss=0.3447, attn_decoder_loss=0.2361, over 5746171.94 frames. ], batch size: 85, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:53:30,727 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=854900.0, ans=0.1 +2024-09-20 08:53:44,085 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.686e+01 9.233e+01 9.898e+01 2.337e+02, threshold=1.847e+02, percent-clipped=2.0 +2024-09-20 08:53:53,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=854940.0, ans=0.125 +2024-09-20 08:53:54,012 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.70 vs. limit=15.0 +2024-09-20 08:54:02,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=854980.0, ans=0.1 +2024-09-20 08:54:05,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=854980.0, ans=0.125 +2024-09-20 08:54:11,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=855020.0, ans=0.1 +2024-09-20 08:54:39,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=855060.0, ans=0.125 +2024-09-20 08:54:39,597 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=855060.0, ans=0.125 +2024-09-20 08:54:43,757 INFO [train.py:1198] (0/2) Epoch 48, batch 1100, loss[loss=0.2192, ctc_loss=0.0946, cr_loss=0.3168, attn_decoder_loss=0.226, over 29424.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1066, cr_loss=0.3437, attn_decoder_loss=0.2358, over 5757607.28 frames. ], batch size: 78, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:54:53,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.66 vs. limit=12.0 +2024-09-20 08:55:42,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.29 vs. limit=15.0 +2024-09-20 08:55:48,584 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.18 vs. limit=15.0 +2024-09-20 08:55:59,696 INFO [train.py:1198] (0/2) Epoch 48, batch 1150, loss[loss=0.2184, ctc_loss=0.1026, cr_loss=0.3302, attn_decoder_loss=0.2239, over 29447.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1071, cr_loss=0.3447, attn_decoder_loss=0.236, over 5755202.63 frames. ], batch size: 78, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:55:59,965 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=855300.0, ans=0.0 +2024-09-20 08:56:03,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.94 vs. limit=10.0 +2024-09-20 08:56:19,238 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.603e+01 9.086e+01 9.808e+01 3.950e+02, threshold=1.817e+02, percent-clipped=2.0 +2024-09-20 08:56:31,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=855380.0, ans=0.125 +2024-09-20 08:56:41,460 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.46 vs. limit=15.0 +2024-09-20 08:57:17,312 INFO [train.py:1198] (0/2) Epoch 48, batch 1200, loss[loss=0.2476, ctc_loss=0.1186, cr_loss=0.3756, attn_decoder_loss=0.2536, over 29667.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1078, cr_loss=0.3461, attn_decoder_loss=0.2369, over 5747638.05 frames. ], batch size: 85, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:57:19,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=855500.0, ans=0.125 +2024-09-20 08:57:34,995 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.35 vs. limit=22.5 +2024-09-20 08:57:52,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=855580.0, ans=0.09899494936611666 +2024-09-20 08:57:54,649 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.29 vs. limit=15.0 +2024-09-20 08:57:58,975 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.69 vs. limit=12.0 +2024-09-20 08:58:23,088 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=855660.0, ans=0.125 +2024-09-20 08:58:30,682 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=855660.0, ans=0.125 +2024-09-20 08:58:34,855 INFO [train.py:1198] (0/2) Epoch 48, batch 1250, loss[loss=0.2581, ctc_loss=0.1329, cr_loss=0.4148, attn_decoder_loss=0.2627, over 29545.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1083, cr_loss=0.347, attn_decoder_loss=0.2373, over 5775405.62 frames. ], batch size: 92, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:58:36,673 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=855700.0, ans=0.1 +2024-09-20 08:58:54,648 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.675e+01 8.788e+01 9.389e+01 9.946e+01 2.084e+02, threshold=1.878e+02, percent-clipped=1.0 +2024-09-20 08:58:59,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=855740.0, ans=0.2 +2024-09-20 08:59:21,247 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.81 vs. limit=22.5 +2024-09-20 08:59:50,619 INFO [train.py:1198] (0/2) Epoch 48, batch 1300, loss[loss=0.2401, ctc_loss=0.1107, cr_loss=0.3368, attn_decoder_loss=0.247, over 28242.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1078, cr_loss=0.3456, attn_decoder_loss=0.2367, over 5779682.82 frames. ], batch size: 111, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:59:51,035 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=855900.0, ans=0.0 +2024-09-20 08:59:54,478 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.96 vs. limit=15.0 +2024-09-20 09:01:01,591 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=856060.0, ans=0.125 +2024-09-20 09:01:09,003 INFO [train.py:1198] (0/2) Epoch 48, batch 1350, loss[loss=0.2243, ctc_loss=0.09546, cr_loss=0.3213, attn_decoder_loss=0.2315, over 29753.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1074, cr_loss=0.3449, attn_decoder_loss=0.2365, over 5797127.22 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:01:20,070 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.39 vs. limit=22.5 +2024-09-20 09:01:24,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=856140.0, ans=0.0 +2024-09-20 09:01:29,672 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.601e+01 8.992e+01 9.491e+01 1.134e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-20 09:01:55,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=856220.0, ans=0.0 +2024-09-20 09:01:59,989 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=856220.0, ans=0.1 +2024-09-20 09:02:23,918 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.14 vs. limit=15.0 +2024-09-20 09:02:25,938 INFO [train.py:1198] (0/2) Epoch 48, batch 1400, loss[loss=0.2054, ctc_loss=0.09145, cr_loss=0.3032, attn_decoder_loss=0.2113, over 29560.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1076, cr_loss=0.3454, attn_decoder_loss=0.2364, over 5808074.38 frames. ], batch size: 69, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:02:26,344 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=856300.0, ans=0.125 +2024-09-20 09:02:30,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=856300.0, ans=0.125 +2024-09-20 09:02:33,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=856300.0, ans=0.95 +2024-09-20 09:02:51,653 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=856340.0, ans=0.125 +2024-09-20 09:03:08,853 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.37 vs. limit=22.5 +2024-09-20 09:03:11,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=856420.0, ans=0.04949747468305833 +2024-09-20 09:03:12,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=856420.0, ans=0.0 +2024-09-20 09:03:38,467 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=856460.0, ans=0.0 +2024-09-20 09:03:41,084 INFO [train.py:1198] (0/2) Epoch 48, batch 1450, loss[loss=0.2413, ctc_loss=0.1115, cr_loss=0.3268, attn_decoder_loss=0.2484, over 29440.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.108, cr_loss=0.3463, attn_decoder_loss=0.2369, over 5804200.22 frames. ], batch size: 94, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:03:41,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=856500.0, ans=0.0 +2024-09-20 09:03:47,414 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=856500.0, ans=0.2 +2024-09-20 09:03:48,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=856500.0, ans=0.07 +2024-09-20 09:04:01,052 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=856540.0, ans=0.0 +2024-09-20 09:04:02,263 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.708e+01 9.120e+01 9.678e+01 1.766e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 09:04:06,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=856540.0, ans=0.1 +2024-09-20 09:04:08,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=856540.0, ans=0.2 +2024-09-20 09:04:11,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=856580.0, ans=0.0 +2024-09-20 09:04:24,183 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.93 vs. limit=15.0 +2024-09-20 09:04:29,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=856620.0, ans=0.125 +2024-09-20 09:04:35,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=856620.0, ans=0.1 +2024-09-20 09:04:48,205 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.39 vs. limit=12.0 +2024-09-20 09:04:58,580 INFO [train.py:1198] (0/2) Epoch 48, batch 1500, loss[loss=0.2349, ctc_loss=0.1041, cr_loss=0.3355, attn_decoder_loss=0.242, over 29635.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1082, cr_loss=0.3472, attn_decoder_loss=0.2374, over 5806445.63 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:05:18,645 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=856740.0, ans=0.125 +2024-09-20 09:05:18,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=856740.0, ans=15.0 +2024-09-20 09:05:24,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=856740.0, ans=0.1 +2024-09-20 09:05:28,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.00 vs. limit=15.0 +2024-09-20 09:05:29,472 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.05 vs. limit=22.5 +2024-09-20 09:05:40,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.18 vs. limit=15.0 +2024-09-20 09:05:41,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=856780.0, ans=0.0 +2024-09-20 09:05:45,068 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:05:49,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=856820.0, ans=0.0 +2024-09-20 09:06:17,038 INFO [train.py:1198] (0/2) Epoch 48, batch 1550, loss[loss=0.2504, ctc_loss=0.1299, cr_loss=0.4068, attn_decoder_loss=0.2547, over 29490.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1087, cr_loss=0.3481, attn_decoder_loss=0.2376, over 5781985.88 frames. ], batch size: 90, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:06:18,817 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=856900.0, ans=0.0 +2024-09-20 09:06:26,244 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=856900.0, ans=0.125 +2024-09-20 09:06:38,059 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.059e+01 8.748e+01 9.189e+01 9.595e+01 2.151e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-20 09:06:43,299 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.56 vs. limit=22.5 +2024-09-20 09:06:49,317 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.95 vs. limit=12.0 +2024-09-20 09:06:57,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=856980.0, ans=0.95 +2024-09-20 09:07:08,148 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=857020.0, ans=0.125 +2024-09-20 09:07:17,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.02 vs. limit=15.0 +2024-09-20 09:07:22,082 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.79 vs. limit=15.0 +2024-09-20 09:07:31,851 INFO [train.py:1198] (0/2) Epoch 48, batch 1600, loss[loss=0.2373, ctc_loss=0.1087, cr_loss=0.3493, attn_decoder_loss=0.2439, over 29669.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1085, cr_loss=0.347, attn_decoder_loss=0.2374, over 5765229.82 frames. ], batch size: 85, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:07:36,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=857100.0, ans=0.025 +2024-09-20 09:08:49,427 INFO [train.py:1198] (0/2) Epoch 48, batch 1650, loss[loss=0.2423, ctc_loss=0.1021, cr_loss=0.3436, attn_decoder_loss=0.2502, over 29722.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3458, attn_decoder_loss=0.2372, over 5758085.80 frames. ], batch size: 89, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:09:10,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.669e+01 9.204e+01 9.828e+01 1.752e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-20 09:09:22,887 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:09:35,256 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:09:39,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=857420.0, ans=0.125 +2024-09-20 09:09:47,864 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=857420.0, ans=0.1 +2024-09-20 09:09:52,276 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=857460.0, ans=0.1 +2024-09-20 09:10:07,189 INFO [train.py:1198] (0/2) Epoch 48, batch 1700, loss[loss=0.2024, ctc_loss=0.08735, cr_loss=0.2999, attn_decoder_loss=0.2085, over 29593.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1076, cr_loss=0.3451, attn_decoder_loss=0.2369, over 5778543.55 frames. ], batch size: 69, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:10:12,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=857500.0, ans=0.0 +2024-09-20 09:10:21,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=857540.0, ans=0.125 +2024-09-20 09:10:48,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=857580.0, ans=0.1 +2024-09-20 09:11:16,479 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.71 vs. limit=15.0 +2024-09-20 09:11:23,281 INFO [train.py:1198] (0/2) Epoch 48, batch 1750, loss[loss=0.218, ctc_loss=0.1032, cr_loss=0.3353, attn_decoder_loss=0.2233, over 29342.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.345, attn_decoder_loss=0.2366, over 5787129.57 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:11:40,287 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=857740.0, ans=0.1 +2024-09-20 09:11:44,443 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.991e+01 8.682e+01 9.026e+01 9.554e+01 1.464e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-20 09:11:52,315 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=857780.0, ans=0.125 +2024-09-20 09:12:17,901 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=857820.0, ans=0.125 +2024-09-20 09:12:28,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=857860.0, ans=0.2 +2024-09-20 09:12:40,544 INFO [train.py:1198] (0/2) Epoch 48, batch 1800, loss[loss=0.2389, ctc_loss=0.1113, cr_loss=0.3637, attn_decoder_loss=0.245, over 29682.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3457, attn_decoder_loss=0.2368, over 5790832.54 frames. ], batch size: 83, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:13:37,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=858020.0, ans=0.125 +2024-09-20 09:13:57,387 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.00 vs. limit=6.0 +2024-09-20 09:13:58,142 INFO [train.py:1198] (0/2) Epoch 48, batch 1850, loss[loss=0.2382, ctc_loss=0.1141, cr_loss=0.3617, attn_decoder_loss=0.2439, over 29620.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1077, cr_loss=0.3459, attn_decoder_loss=0.2367, over 5795916.10 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:14:00,001 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:14:02,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=858100.0, ans=0.125 +2024-09-20 09:14:12,940 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.66 vs. limit=15.0 +2024-09-20 09:14:19,244 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.577e+01 9.244e+01 9.733e+01 2.629e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 09:14:31,690 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=858180.0, ans=0.0 +2024-09-20 09:15:00,340 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=858260.0, ans=0.125 +2024-09-20 09:15:13,440 INFO [train.py:1198] (0/2) Epoch 48, batch 1900, loss[loss=0.242, ctc_loss=0.1191, cr_loss=0.3746, attn_decoder_loss=0.2473, over 29700.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3466, attn_decoder_loss=0.2371, over 5803905.48 frames. ], batch size: 89, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:15:39,168 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.51 vs. limit=15.0 +2024-09-20 09:16:13,820 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=858460.0, ans=0.125 +2024-09-20 09:16:16,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=858460.0, ans=0.125 +2024-09-20 09:16:30,242 INFO [train.py:1198] (0/2) Epoch 48, batch 1950, loss[loss=0.2259, ctc_loss=0.1044, cr_loss=0.336, attn_decoder_loss=0.2319, over 29446.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1084, cr_loss=0.3476, attn_decoder_loss=0.238, over 5818537.52 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:16:31,407 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=7.77 vs. limit=22.5 +2024-09-20 09:16:50,750 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=858540.0, ans=0.1 +2024-09-20 09:16:53,370 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.839e+01 9.358e+01 9.818e+01 1.771e+02, threshold=1.872e+02, percent-clipped=0.0 +2024-09-20 09:17:01,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=858580.0, ans=0.1 +2024-09-20 09:17:15,459 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.27 vs. limit=15.0 +2024-09-20 09:17:26,378 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=858620.0, ans=15.0 +2024-09-20 09:17:31,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=858620.0, ans=0.04949747468305833 +2024-09-20 09:17:36,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=858660.0, ans=0.125 +2024-09-20 09:17:49,886 INFO [train.py:1198] (0/2) Epoch 48, batch 2000, loss[loss=0.2035, ctc_loss=0.0852, cr_loss=0.2953, attn_decoder_loss=0.2101, over 29297.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1086, cr_loss=0.3479, attn_decoder_loss=0.2384, over 5796742.12 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 32.0 +2024-09-20 09:17:57,723 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=858700.0, ans=0.125 +2024-09-20 09:18:19,228 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.19 vs. limit=15.0 +2024-09-20 09:18:59,747 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=858860.0, ans=0.0 +2024-09-20 09:19:05,485 INFO [train.py:1198] (0/2) Epoch 48, batch 2050, loss[loss=0.2039, ctc_loss=0.08967, cr_loss=0.3109, attn_decoder_loss=0.2097, over 29421.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1075, cr_loss=0.3454, attn_decoder_loss=0.2369, over 5787963.79 frames. ], batch size: 70, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:19:07,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=858900.0, ans=0.125 +2024-09-20 09:19:21,633 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.54 vs. limit=15.0 +2024-09-20 09:19:22,310 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:19:28,038 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.559e+01 9.116e+01 9.582e+01 1.621e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 09:19:32,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=858940.0, ans=0.025 +2024-09-20 09:19:55,676 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.80 vs. limit=15.0 +2024-09-20 09:20:02,802 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=859020.0, ans=0.0 +2024-09-20 09:20:20,537 INFO [train.py:1198] (0/2) Epoch 48, batch 2100, loss[loss=0.2291, ctc_loss=0.107, cr_loss=0.3557, attn_decoder_loss=0.2347, over 29767.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3447, attn_decoder_loss=0.2364, over 5800889.71 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:20:37,768 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=859140.0, ans=0.0 +2024-09-20 09:20:48,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=16.91 vs. limit=15.0 +2024-09-20 09:20:52,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=859180.0, ans=0.125 +2024-09-20 09:21:22,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=859220.0, ans=0.125 +2024-09-20 09:21:28,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=859260.0, ans=0.125 +2024-09-20 09:21:32,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=859260.0, ans=0.2 +2024-09-20 09:21:40,099 INFO [train.py:1198] (0/2) Epoch 48, batch 2150, loss[loss=0.2304, ctc_loss=0.1006, cr_loss=0.3418, attn_decoder_loss=0.2372, over 29429.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1067, cr_loss=0.344, attn_decoder_loss=0.2359, over 5816616.80 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:21:40,899 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.56 vs. limit=22.5 +2024-09-20 09:21:46,492 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=859300.0, ans=0.125 +2024-09-20 09:22:02,730 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.204e+01 8.576e+01 8.993e+01 9.601e+01 1.335e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 09:22:03,042 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_ff2.min_abs, batch_count=859340.0, ans=0.1 +2024-09-20 09:22:05,058 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-20 09:22:25,641 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=859420.0, ans=0.1 +2024-09-20 09:22:25,681 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=859420.0, ans=0.1 +2024-09-20 09:22:39,732 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.30 vs. limit=10.0 +2024-09-20 09:22:52,829 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=859460.0, ans=0.025 +2024-09-20 09:22:55,666 INFO [train.py:1198] (0/2) Epoch 48, batch 2200, loss[loss=0.2306, ctc_loss=0.1059, cr_loss=0.3408, attn_decoder_loss=0.2369, over 29640.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.107, cr_loss=0.3443, attn_decoder_loss=0.236, over 5813781.14 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:23:06,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=859500.0, ans=0.125 +2024-09-20 09:23:15,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=859540.0, ans=0.0 +2024-09-20 09:23:31,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=859580.0, ans=0.125 +2024-09-20 09:23:35,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=859580.0, ans=0.125 +2024-09-20 09:23:38,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=859580.0, ans=0.125 +2024-09-20 09:23:55,074 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.44 vs. limit=15.0 +2024-09-20 09:23:56,027 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=859660.0, ans=0.125 +2024-09-20 09:24:10,685 INFO [train.py:1198] (0/2) Epoch 48, batch 2250, loss[loss=0.2261, ctc_loss=0.103, cr_loss=0.3285, attn_decoder_loss=0.2325, over 29709.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1072, cr_loss=0.3442, attn_decoder_loss=0.2361, over 5814520.24 frames. ], batch size: 82, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:24:11,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=859700.0, ans=0.0 +2024-09-20 09:24:27,760 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=859740.0, ans=0.125 +2024-09-20 09:24:34,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.41 vs. limit=15.0 +2024-09-20 09:24:35,429 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.683e+01 9.115e+01 9.671e+01 7.163e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 09:24:44,823 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=859780.0, ans=0.09899494936611666 +2024-09-20 09:25:30,683 INFO [train.py:1198] (0/2) Epoch 48, batch 2300, loss[loss=0.2158, ctc_loss=0.1003, cr_loss=0.3348, attn_decoder_loss=0.2212, over 29734.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1066, cr_loss=0.343, attn_decoder_loss=0.2352, over 5800515.93 frames. ], batch size: 72, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:25:47,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=859940.0, ans=0.2 +2024-09-20 09:25:53,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=859940.0, ans=0.125 +2024-09-20 09:25:57,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=859940.0, ans=0.125 +2024-09-20 09:26:17,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.48 vs. limit=10.0 +2024-09-20 09:26:18,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.40 vs. limit=15.0 +2024-09-20 09:26:46,313 INFO [train.py:1198] (0/2) Epoch 48, batch 2350, loss[loss=0.2357, ctc_loss=0.1146, cr_loss=0.3735, attn_decoder_loss=0.2409, over 29689.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1066, cr_loss=0.343, attn_decoder_loss=0.2353, over 5804675.82 frames. ], batch size: 83, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:26:48,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=860100.0, ans=0.2 +2024-09-20 09:27:08,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=860140.0, ans=0.5 +2024-09-20 09:27:10,173 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.540e+01 9.100e+01 9.543e+01 1.555e+02, threshold=1.820e+02, percent-clipped=0.0 +2024-09-20 09:27:24,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=860180.0, ans=0.2 +2024-09-20 09:27:49,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=860260.0, ans=0.125 +2024-09-20 09:28:01,993 INFO [train.py:1198] (0/2) Epoch 48, batch 2400, loss[loss=0.2202, ctc_loss=0.1067, cr_loss=0.3427, attn_decoder_loss=0.2251, over 29545.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1071, cr_loss=0.3446, attn_decoder_loss=0.236, over 5809073.40 frames. ], batch size: 76, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:28:02,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=860300.0, ans=0.0 +2024-09-20 09:28:07,529 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.94 vs. limit=6.0 +2024-09-20 09:28:09,800 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=860300.0, ans=0.125 +2024-09-20 09:28:49,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=860420.0, ans=0.5 +2024-09-20 09:29:21,833 INFO [train.py:1198] (0/2) Epoch 48, batch 2450, loss[loss=0.2475, ctc_loss=0.121, cr_loss=0.388, attn_decoder_loss=0.2529, over 29728.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1079, cr_loss=0.3463, attn_decoder_loss=0.237, over 5785118.11 frames. ], batch size: 82, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:29:22,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=860500.0, ans=0.1 +2024-09-20 09:29:25,100 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=860500.0, ans=0.125 +2024-09-20 09:29:27,159 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.68 vs. limit=22.5 +2024-09-20 09:29:32,888 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.67 vs. limit=22.5 +2024-09-20 09:29:44,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=860540.0, ans=0.2 +2024-09-20 09:29:45,558 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 8.875e+01 9.472e+01 1.005e+02 1.888e+02, threshold=1.894e+02, percent-clipped=1.0 +2024-09-20 09:29:45,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=860540.0, ans=0.0 +2024-09-20 09:29:55,010 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=860580.0, ans=0.0 +2024-09-20 09:30:36,888 INFO [train.py:1198] (0/2) Epoch 48, batch 2500, loss[loss=0.2361, ctc_loss=0.104, cr_loss=0.3284, attn_decoder_loss=0.2435, over 29647.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.108, cr_loss=0.3467, attn_decoder_loss=0.2372, over 5795019.78 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:30:37,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=860700.0, ans=0.025 +2024-09-20 09:30:46,321 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=860700.0, ans=0.125 +2024-09-20 09:30:55,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=860740.0, ans=0.09899494936611666 +2024-09-20 09:31:03,879 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=8.0 +2024-09-20 09:31:09,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=860780.0, ans=0.1 +2024-09-20 09:31:26,175 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=860820.0, ans=0.2 +2024-09-20 09:31:30,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=860820.0, ans=0.125 +2024-09-20 09:31:30,925 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=860820.0, ans=0.1 +2024-09-20 09:31:53,216 INFO [train.py:1198] (0/2) Epoch 48, batch 2550, loss[loss=0.2103, ctc_loss=0.09497, cr_loss=0.3296, attn_decoder_loss=0.2158, over 29376.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.108, cr_loss=0.3465, attn_decoder_loss=0.237, over 5798019.69 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:31:58,019 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=860900.0, ans=0.0 +2024-09-20 09:32:00,982 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:32:02,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=860900.0, ans=0.0 +2024-09-20 09:32:09,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=860940.0, ans=0.125 +2024-09-20 09:32:11,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.83 vs. limit=12.0 +2024-09-20 09:32:18,704 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.740e+01 9.125e+01 9.570e+01 1.327e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 09:32:25,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=860980.0, ans=0.125 +2024-09-20 09:32:39,723 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.68 vs. limit=22.5 +2024-09-20 09:32:59,235 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=861060.0, ans=0.125 +2024-09-20 09:33:12,682 INFO [train.py:1198] (0/2) Epoch 48, batch 2600, loss[loss=0.2292, ctc_loss=0.1072, cr_loss=0.3502, attn_decoder_loss=0.2349, over 29428.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3467, attn_decoder_loss=0.2372, over 5794512.94 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:33:45,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=861180.0, ans=0.125 +2024-09-20 09:33:48,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=861180.0, ans=0.125 +2024-09-20 09:33:58,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=861220.0, ans=0.125 +2024-09-20 09:34:27,409 INFO [train.py:1198] (0/2) Epoch 48, batch 2650, loss[loss=0.2448, ctc_loss=0.1168, cr_loss=0.3723, attn_decoder_loss=0.2507, over 29287.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1081, cr_loss=0.347, attn_decoder_loss=0.2376, over 5800744.33 frames. ], batch size: 100, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:34:35,466 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=861300.0, ans=0.125 +2024-09-20 09:34:53,030 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.627e+01 9.156e+01 9.635e+01 1.174e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 09:35:14,770 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.07 vs. limit=15.0 +2024-09-20 09:35:22,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-20 09:35:30,849 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:35:36,165 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=4.81 vs. limit=15.0 +2024-09-20 09:35:42,636 INFO [train.py:1198] (0/2) Epoch 48, batch 2700, loss[loss=0.2425, ctc_loss=0.1085, cr_loss=0.3392, attn_decoder_loss=0.2498, over 29529.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1086, cr_loss=0.3479, attn_decoder_loss=0.238, over 5795654.92 frames. ], batch size: 87, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:35:45,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=861500.0, ans=0.125 +2024-09-20 09:35:55,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=861500.0, ans=0.1 +2024-09-20 09:36:07,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=861540.0, ans=0.5 +2024-09-20 09:36:45,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=861620.0, ans=0.1 +2024-09-20 09:36:57,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=861660.0, ans=0.1 +2024-09-20 09:37:03,341 INFO [train.py:1198] (0/2) Epoch 48, batch 2750, loss[loss=0.2165, ctc_loss=0.09687, cr_loss=0.3074, attn_decoder_loss=0.223, over 29512.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3459, attn_decoder_loss=0.2369, over 5793238.57 frames. ], batch size: 75, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:37:03,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=861700.0, ans=0.0 +2024-09-20 09:37:08,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=861700.0, ans=0.95 +2024-09-20 09:37:20,272 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=861740.0, ans=0.1 +2024-09-20 09:37:23,210 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=861740.0, ans=0.125 +2024-09-20 09:37:23,803 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.14 vs. limit=22.5 +2024-09-20 09:37:28,903 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.868e+01 9.360e+01 1.005e+02 2.892e+02, threshold=1.872e+02, percent-clipped=3.0 +2024-09-20 09:37:35,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=861780.0, ans=0.1 +2024-09-20 09:37:36,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=861780.0, ans=0.125 +2024-09-20 09:37:57,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=861820.0, ans=0.2 +2024-09-20 09:37:58,300 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-20 09:37:59,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=861820.0, ans=0.0 +2024-09-20 09:38:08,621 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=861860.0, ans=0.125 +2024-09-20 09:38:13,126 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=861860.0, ans=0.125 +2024-09-20 09:38:18,880 INFO [train.py:1198] (0/2) Epoch 48, batch 2800, loss[loss=0.2498, ctc_loss=0.1281, cr_loss=0.3772, attn_decoder_loss=0.255, over 20563.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1084, cr_loss=0.3464, attn_decoder_loss=0.2368, over 5774896.62 frames. ], batch size: 211, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:38:22,259 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=861900.0, ans=0.1 +2024-09-20 09:38:23,748 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=861900.0, ans=0.2 +2024-09-20 09:38:41,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=861940.0, ans=0.09899494936611666 +2024-09-20 09:38:41,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=861940.0, ans=0.125 +2024-09-20 09:38:47,724 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=861980.0, ans=0.1 +2024-09-20 09:38:48,267 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.66 vs. limit=15.0 +2024-09-20 09:39:00,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.25 vs. limit=15.0 +2024-09-20 09:39:05,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=862020.0, ans=0.125 +2024-09-20 09:39:23,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=862060.0, ans=0.125 +2024-09-20 09:39:34,247 INFO [train.py:1198] (0/2) Epoch 48, batch 2850, loss[loss=0.2288, ctc_loss=0.1042, cr_loss=0.3487, attn_decoder_loss=0.2349, over 29491.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1083, cr_loss=0.3464, attn_decoder_loss=0.2371, over 5761102.55 frames. ], batch size: 77, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:39:52,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=862140.0, ans=0.04949747468305833 +2024-09-20 09:39:59,997 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.529e+01 8.779e+01 9.246e+01 9.697e+01 4.650e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 09:40:22,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=862220.0, ans=0.125 +2024-09-20 09:40:33,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=862220.0, ans=0.125 +2024-09-20 09:40:53,971 INFO [train.py:1198] (0/2) Epoch 48, batch 2900, loss[loss=0.222, ctc_loss=0.1002, cr_loss=0.3302, attn_decoder_loss=0.2282, over 29425.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1089, cr_loss=0.3483, attn_decoder_loss=0.2382, over 5787123.47 frames. ], batch size: 79, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:41:15,382 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=862340.0, ans=0.0 +2024-09-20 09:41:21,578 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=862340.0, ans=0.2 +2024-09-20 09:41:38,423 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=862420.0, ans=0.025 +2024-09-20 09:41:39,911 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=862420.0, ans=0.2 +2024-09-20 09:41:42,124 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.67 vs. limit=6.0 +2024-09-20 09:42:10,005 INFO [train.py:1198] (0/2) Epoch 48, batch 2950, loss[loss=0.2223, ctc_loss=0.1095, cr_loss=0.3632, attn_decoder_loss=0.2268, over 29525.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.108, cr_loss=0.3464, attn_decoder_loss=0.237, over 5782312.38 frames. ], batch size: 75, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:42:37,508 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.743e+01 9.257e+01 9.610e+01 1.643e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 09:42:52,697 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=862580.0, ans=0.2 +2024-09-20 09:42:57,286 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=862620.0, ans=0.0 +2024-09-20 09:43:00,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.14 vs. limit=15.0 +2024-09-20 09:43:23,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=862660.0, ans=0.1 +2024-09-20 09:43:25,736 INFO [train.py:1198] (0/2) Epoch 48, batch 3000, loss[loss=0.2289, ctc_loss=0.1098, cr_loss=0.3496, attn_decoder_loss=0.2343, over 29755.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1081, cr_loss=0.3465, attn_decoder_loss=0.2369, over 5783999.53 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:43:25,737 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 09:43:31,636 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.2051, 3.8085, 3.9051, 4.2579, 4.2995, 4.3676, 3.8244, 4.3401], + device='cuda:0') +2024-09-20 09:43:44,037 INFO [train.py:1230] (0/2) Epoch 48, validation: loss=0.2127, ctc_loss=0.03675, cr_loss=6.55e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 09:43:44,038 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 09:43:47,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=862700.0, ans=0.125 +2024-09-20 09:43:57,792 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.21 vs. limit=15.0 +2024-09-20 09:44:19,225 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=862780.0, ans=0.1 +2024-09-20 09:44:25,172 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=862780.0, ans=0.125 +2024-09-20 09:44:44,736 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=862820.0, ans=0.125 +2024-09-20 09:45:04,018 INFO [train.py:1198] (0/2) Epoch 48, batch 3050, loss[loss=0.2194, ctc_loss=0.1035, cr_loss=0.3446, attn_decoder_loss=0.2247, over 29506.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1086, cr_loss=0.3478, attn_decoder_loss=0.2376, over 5777530.52 frames. ], batch size: 76, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:45:09,018 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=862900.0, ans=0.125 +2024-09-20 09:45:10,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=862900.0, ans=0.125 +2024-09-20 09:45:23,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=862940.0, ans=0.125 +2024-09-20 09:45:27,193 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.61 vs. limit=15.0 +2024-09-20 09:45:31,015 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.813e+01 8.880e+01 9.329e+01 1.001e+02 1.444e+02, threshold=1.866e+02, percent-clipped=0.0 +2024-09-20 09:45:37,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=862980.0, ans=0.125 +2024-09-20 09:45:44,907 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=862980.0, ans=0.0 +2024-09-20 09:45:46,350 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=862980.0, ans=0.0 +2024-09-20 09:45:59,392 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.24 vs. limit=15.0 +2024-09-20 09:46:19,412 INFO [train.py:1198] (0/2) Epoch 48, batch 3100, loss[loss=0.2401, ctc_loss=0.1158, cr_loss=0.355, attn_decoder_loss=0.246, over 29187.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1083, cr_loss=0.3468, attn_decoder_loss=0.2373, over 5777309.88 frames. ], batch size: 100, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:46:22,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=863100.0, ans=0.125 +2024-09-20 09:46:24,258 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=863100.0, ans=0.95 +2024-09-20 09:46:31,713 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=863100.0, ans=0.0 +2024-09-20 09:46:33,252 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=863140.0, ans=0.09899494936611666 +2024-09-20 09:46:37,654 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=863140.0, ans=0.09899494936611666 +2024-09-20 09:46:51,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=863180.0, ans=0.0 +2024-09-20 09:47:06,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=863220.0, ans=0.125 +2024-09-20 09:47:11,939 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.36 vs. limit=15.0 +2024-09-20 09:47:18,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=863260.0, ans=10.0 +2024-09-20 09:47:35,296 INFO [train.py:1198] (0/2) Epoch 48, batch 3150, loss[loss=0.2484, ctc_loss=0.121, cr_loss=0.377, attn_decoder_loss=0.2541, over 28789.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3463, attn_decoder_loss=0.2372, over 5784338.57 frames. ], batch size: 104, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:47:40,227 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=863300.0, ans=0.125 +2024-09-20 09:48:06,679 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.651e+01 9.014e+01 9.549e+01 1.887e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-20 09:48:17,599 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=863380.0, ans=0.125 +2024-09-20 09:48:25,041 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:48:32,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=863420.0, ans=0.125 +2024-09-20 09:48:34,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=863420.0, ans=0.0 +2024-09-20 09:48:38,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=863460.0, ans=0.0 +2024-09-20 09:48:54,877 INFO [train.py:1198] (0/2) Epoch 48, batch 3200, loss[loss=0.2268, ctc_loss=0.1072, cr_loss=0.343, attn_decoder_loss=0.2325, over 29398.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1074, cr_loss=0.3454, attn_decoder_loss=0.2367, over 5795077.27 frames. ], batch size: 79, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:49:01,441 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=863500.0, ans=0.125 +2024-09-20 09:49:08,199 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.85 vs. limit=22.5 +2024-09-20 09:49:20,245 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.43 vs. limit=10.0 +2024-09-20 09:49:30,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=863580.0, ans=0.125 +2024-09-20 09:49:45,058 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=863620.0, ans=0.125 +2024-09-20 09:49:51,784 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.75 vs. limit=15.0 +2024-09-20 09:49:58,714 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=863660.0, ans=0.125 +2024-09-20 09:50:10,573 INFO [train.py:1198] (0/2) Epoch 48, batch 3250, loss[loss=0.2391, ctc_loss=0.1126, cr_loss=0.3643, attn_decoder_loss=0.245, over 29703.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1076, cr_loss=0.3465, attn_decoder_loss=0.2371, over 5801742.08 frames. ], batch size: 84, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:50:10,902 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=863700.0, ans=0.125 +2024-09-20 09:50:23,128 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=863700.0, ans=0.1 +2024-09-20 09:50:36,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=863740.0, ans=0.2 +2024-09-20 09:50:37,750 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.777e+01 9.225e+01 9.680e+01 2.463e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-20 09:50:38,777 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.47 vs. limit=15.0 +2024-09-20 09:50:47,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=863780.0, ans=0.0 +2024-09-20 09:50:56,639 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.54 vs. limit=15.0 +2024-09-20 09:51:09,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=863860.0, ans=0.07 +2024-09-20 09:51:13,626 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.20 vs. limit=10.0 +2024-09-20 09:51:26,310 INFO [train.py:1198] (0/2) Epoch 48, batch 3300, loss[loss=0.2432, ctc_loss=0.1083, cr_loss=0.3266, attn_decoder_loss=0.2509, over 28287.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.107, cr_loss=0.3452, attn_decoder_loss=0.2361, over 5798668.32 frames. ], batch size: 111, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:52:05,562 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=863980.0, ans=0.0 +2024-09-20 09:52:07,144 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-216000.pt +2024-09-20 09:52:26,375 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=864020.0, ans=0.125 +2024-09-20 09:52:30,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=864020.0, ans=0.2 +2024-09-20 09:52:38,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=864060.0, ans=0.125 +2024-09-20 09:52:48,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=864060.0, ans=0.0 +2024-09-20 09:52:52,953 INFO [train.py:1198] (0/2) Epoch 48, batch 3350, loss[loss=0.2495, ctc_loss=0.1267, cr_loss=0.3735, attn_decoder_loss=0.2548, over 28866.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3469, attn_decoder_loss=0.2369, over 5775578.11 frames. ], batch size: 104, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:53:02,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=864100.0, ans=0.025 +2024-09-20 09:53:11,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=864140.0, ans=0.125 +2024-09-20 09:53:20,154 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.868e+01 9.379e+01 9.923e+01 1.602e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-20 09:53:20,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=864140.0, ans=0.09899494936611666 +2024-09-20 09:53:23,572 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=864180.0, ans=0.1 +2024-09-20 09:53:29,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=864180.0, ans=0.125 +2024-09-20 09:53:29,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=864180.0, ans=0.125 +2024-09-20 09:53:29,648 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:53:38,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=864220.0, ans=0.125 +2024-09-20 09:53:44,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=864220.0, ans=0.125 +2024-09-20 09:53:58,310 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=864260.0, ans=0.0 +2024-09-20 09:54:04,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=864260.0, ans=0.0 +2024-09-20 09:54:05,792 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=864260.0, ans=0.025 +2024-09-20 09:54:08,444 INFO [train.py:1198] (0/2) Epoch 48, batch 3400, loss[loss=0.2076, ctc_loss=0.09971, cr_loss=0.3432, attn_decoder_loss=0.2119, over 29350.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.108, cr_loss=0.3475, attn_decoder_loss=0.2369, over 5767638.10 frames. ], batch size: 67, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:54:43,640 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=864380.0, ans=0.0 +2024-09-20 09:54:46,816 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=864380.0, ans=0.0 +2024-09-20 09:54:49,755 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=864380.0, ans=0.0 +2024-09-20 09:54:52,832 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=864420.0, ans=0.0 +2024-09-20 09:54:55,652 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:54:58,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.21 vs. limit=15.0 +2024-09-20 09:55:19,741 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=864460.0, ans=0.125 +2024-09-20 09:55:23,994 INFO [train.py:1198] (0/2) Epoch 48, batch 3450, loss[loss=0.2375, ctc_loss=0.1086, cr_loss=0.3485, attn_decoder_loss=0.244, over 28289.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.108, cr_loss=0.3477, attn_decoder_loss=0.2372, over 5775760.72 frames. ], batch size: 111, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:55:30,253 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=864500.0, ans=0.125 +2024-09-20 09:55:55,191 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.532e+01 9.118e+01 9.502e+01 1.543e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 09:56:32,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=864660.0, ans=0.125 +2024-09-20 09:56:35,881 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=864660.0, ans=0.125 +2024-09-20 09:56:43,109 INFO [train.py:1198] (0/2) Epoch 48, batch 3500, loss[loss=0.2164, ctc_loss=0.0936, cr_loss=0.3251, attn_decoder_loss=0.2228, over 29327.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1078, cr_loss=0.3472, attn_decoder_loss=0.2368, over 5777417.73 frames. ], batch size: 71, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:57:16,866 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.25 vs. limit=15.0 +2024-09-20 09:57:52,547 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=864860.0, ans=0.125 +2024-09-20 09:57:58,097 INFO [train.py:1198] (0/2) Epoch 48, batch 3550, loss[loss=0.24, ctc_loss=0.1053, cr_loss=0.3438, attn_decoder_loss=0.2473, over 29723.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2366, over 5782701.66 frames. ], batch size: 89, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:58:24,720 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.565e+01 9.018e+01 9.505e+01 1.694e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 09:58:41,472 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=865020.0, ans=0.0 +2024-09-20 09:58:54,893 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=865020.0, ans=0.125 +2024-09-20 09:58:59,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=865060.0, ans=0.125 +2024-09-20 09:59:01,439 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.38 vs. limit=15.0 +2024-09-20 09:59:11,260 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=865100.0, ans=0.125 +2024-09-20 09:59:12,459 INFO [train.py:1198] (0/2) Epoch 48, batch 3600, loss[loss=0.2295, ctc_loss=0.1118, cr_loss=0.3546, attn_decoder_loss=0.2347, over 29504.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1074, cr_loss=0.3462, attn_decoder_loss=0.2366, over 5790946.56 frames. ], batch size: 77, lr: 2.28e-03, grad_scale: 32.0 +2024-09-20 09:59:25,269 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-20 09:59:56,595 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-20 10:00:10,380 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=865260.0, ans=0.125 +2024-09-20 10:00:26,296 INFO [train.py:1198] (0/2) Epoch 48, batch 3650, loss[loss=0.2479, ctc_loss=0.1182, cr_loss=0.3676, attn_decoder_loss=0.2542, over 29492.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1068, cr_loss=0.3451, attn_decoder_loss=0.236, over 5792926.02 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:00:54,244 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.575e+01 9.071e+01 9.730e+01 1.168e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 10:01:22,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=865420.0, ans=0.125 +2024-09-20 10:01:25,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.47 vs. limit=8.0 +2024-09-20 10:01:44,201 INFO [train.py:1198] (0/2) Epoch 48, batch 3700, loss[loss=0.2411, ctc_loss=0.1073, cr_loss=0.3438, attn_decoder_loss=0.2483, over 29698.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.346, attn_decoder_loss=0.2364, over 5803291.84 frames. ], batch size: 84, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:01:57,963 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=865540.0, ans=0.2 +2024-09-20 10:02:20,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=865580.0, ans=0.0 +2024-09-20 10:02:21,980 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=865580.0, ans=0.125 +2024-09-20 10:02:42,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=865660.0, ans=0.025 +2024-09-20 10:02:45,981 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.91 vs. limit=12.0 +2024-09-20 10:02:54,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=865660.0, ans=0.125 +2024-09-20 10:02:58,586 INFO [train.py:1198] (0/2) Epoch 48, batch 3750, loss[loss=0.2074, ctc_loss=0.09622, cr_loss=0.3412, attn_decoder_loss=0.2122, over 29344.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3463, attn_decoder_loss=0.2364, over 5807035.26 frames. ], batch size: 67, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:03:01,895 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=865700.0, ans=0.125 +2024-09-20 10:03:09,271 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=865700.0, ans=0.125 +2024-09-20 10:03:14,669 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-20 10:03:19,670 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=865740.0, ans=0.125 +2024-09-20 10:03:28,344 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.668e+01 9.150e+01 9.729e+01 2.139e+02, threshold=1.830e+02, percent-clipped=2.0 +2024-09-20 10:03:31,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=865780.0, ans=0.125 +2024-09-20 10:03:39,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=865780.0, ans=0.125 +2024-09-20 10:04:00,557 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.07 vs. limit=15.0 +2024-09-20 10:04:12,899 INFO [train.py:1198] (0/2) Epoch 48, batch 3800, loss[loss=0.238, ctc_loss=0.1159, cr_loss=0.3646, attn_decoder_loss=0.2435, over 29631.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3462, attn_decoder_loss=0.2364, over 5797711.73 frames. ], batch size: 86, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:04:26,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=865940.0, ans=0.0 +2024-09-20 10:04:30,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.72 vs. limit=15.0 +2024-09-20 10:04:47,323 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=865980.0, ans=0.0 +2024-09-20 10:04:51,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=865980.0, ans=0.2 +2024-09-20 10:05:23,385 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.86 vs. limit=10.0 +2024-09-20 10:05:27,016 INFO [train.py:1198] (0/2) Epoch 48, batch 3850, loss[loss=0.2351, ctc_loss=0.1075, cr_loss=0.3566, attn_decoder_loss=0.2414, over 29208.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1073, cr_loss=0.3462, attn_decoder_loss=0.2362, over 5811952.56 frames. ], batch size: 100, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:05:38,868 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=866100.0, ans=0.1 +2024-09-20 10:05:41,974 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:05:46,781 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.11 vs. limit=15.0 +2024-09-20 10:05:56,512 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.668e+01 9.090e+01 9.614e+01 1.900e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-20 10:06:01,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=866180.0, ans=0.0 +2024-09-20 10:06:07,351 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=866180.0, ans=0.0 +2024-09-20 10:06:16,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=866220.0, ans=0.125 +2024-09-20 10:06:17,836 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=866220.0, ans=0.125 +2024-09-20 10:06:23,885 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.35 vs. limit=15.0 +2024-09-20 10:06:40,951 INFO [train.py:1198] (0/2) Epoch 48, batch 3900, loss[loss=0.2385, ctc_loss=0.1102, cr_loss=0.3461, attn_decoder_loss=0.2451, over 29628.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1077, cr_loss=0.3467, attn_decoder_loss=0.2365, over 5816274.87 frames. ], batch size: 86, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:06:41,284 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=866300.0, ans=0.125 +2024-09-20 10:06:45,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=866300.0, ans=0.0 +2024-09-20 10:07:22,118 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.04 vs. limit=6.0 +2024-09-20 10:07:26,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=866380.0, ans=0.2 +2024-09-20 10:07:49,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=866460.0, ans=0.1 +2024-09-20 10:07:50,983 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=866460.0, ans=0.0 +2024-09-20 10:07:58,099 INFO [train.py:1198] (0/2) Epoch 48, batch 3950, loss[loss=0.2507, ctc_loss=0.123, cr_loss=0.3739, attn_decoder_loss=0.2566, over 29487.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1073, cr_loss=0.3459, attn_decoder_loss=0.2365, over 5835555.21 frames. ], batch size: 97, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:08:04,308 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=866500.0, ans=0.125 +2024-09-20 10:08:17,312 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=866540.0, ans=0.125 +2024-09-20 10:08:17,506 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:08:27,414 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.623e+01 9.056e+01 9.623e+01 1.586e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-20 10:08:37,221 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.78 vs. limit=15.0 +2024-09-20 10:08:41,403 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.53 vs. limit=15.0 +2024-09-20 10:08:49,725 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=866620.0, ans=0.0 +2024-09-20 10:08:59,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=866660.0, ans=0.125 +2024-09-20 10:09:11,165 INFO [train.py:1198] (0/2) Epoch 48, batch 4000, loss[loss=0.2073, ctc_loss=0.08399, cr_loss=0.2907, attn_decoder_loss=0.2146, over 29509.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1075, cr_loss=0.3461, attn_decoder_loss=0.2366, over 5814001.30 frames. ], batch size: 74, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:09:23,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=866700.0, ans=0.1 +2024-09-20 10:09:27,437 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=866740.0, ans=0.125 +2024-09-20 10:09:36,342 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=866740.0, ans=0.0 +2024-09-20 10:09:45,247 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=866780.0, ans=0.125 +2024-09-20 10:09:50,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=866780.0, ans=0.1 +2024-09-20 10:09:53,135 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.91 vs. limit=6.0 +2024-09-20 10:09:56,855 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=866820.0, ans=0.0 +2024-09-20 10:10:02,047 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.72 vs. limit=15.0 +2024-09-20 10:10:20,636 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=866860.0, ans=0.125 +2024-09-20 10:10:24,271 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.04 vs. limit=15.0 +2024-09-20 10:10:24,655 INFO [train.py:1198] (0/2) Epoch 48, batch 4050, loss[loss=0.2433, ctc_loss=0.1206, cr_loss=0.3394, attn_decoder_loss=0.2494, over 19806.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1075, cr_loss=0.346, attn_decoder_loss=0.2363, over 5796888.82 frames. ], batch size: 209, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:10:44,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.89 vs. limit=15.0 +2024-09-20 10:10:46,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=866940.0, ans=0.1 +2024-09-20 10:10:49,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=866940.0, ans=0.125 +2024-09-20 10:10:51,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=866940.0, ans=0.0 +2024-09-20 10:10:53,680 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.713e+01 8.805e+01 9.236e+01 9.679e+01 1.942e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 10:11:10,117 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=867020.0, ans=0.125 +2024-09-20 10:11:11,982 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-20 10:11:27,443 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=867060.0, ans=0.025 +2024-09-20 10:11:39,148 INFO [train.py:1198] (0/2) Epoch 48, batch 4100, loss[loss=0.2511, ctc_loss=0.1293, cr_loss=0.3928, attn_decoder_loss=0.2559, over 29519.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1079, cr_loss=0.3469, attn_decoder_loss=0.2366, over 5792418.00 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:12:02,762 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=867140.0, ans=0.1 +2024-09-20 10:12:05,652 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=867140.0, ans=0.04949747468305833 +2024-09-20 10:12:10,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=867180.0, ans=0.125 +2024-09-20 10:12:26,897 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=867220.0, ans=0.125 +2024-09-20 10:12:50,464 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=867260.0, ans=0.125 +2024-09-20 10:12:54,717 INFO [train.py:1198] (0/2) Epoch 48, batch 4150, loss[loss=0.2215, ctc_loss=0.1054, cr_loss=0.352, attn_decoder_loss=0.2266, over 29500.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.108, cr_loss=0.3473, attn_decoder_loss=0.2362, over 5797162.02 frames. ], batch size: 77, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:13:15,608 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=867340.0, ans=0.125 +2024-09-20 10:13:23,981 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.793e+01 8.808e+01 9.166e+01 9.915e+01 1.612e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-20 10:13:37,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=867420.0, ans=0.0 +2024-09-20 10:13:49,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=867420.0, ans=0.1 +2024-09-20 10:13:54,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=867460.0, ans=0.0 +2024-09-20 10:14:04,336 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.72 vs. limit=22.5 +2024-09-20 10:14:08,054 INFO [train.py:1198] (0/2) Epoch 48, batch 4200, loss[loss=0.2338, ctc_loss=0.1161, cr_loss=0.3581, attn_decoder_loss=0.239, over 29511.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1082, cr_loss=0.348, attn_decoder_loss=0.2367, over 5799486.42 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:14:31,410 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.13 vs. limit=10.0 +2024-09-20 10:15:17,635 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.50 vs. limit=22.5 +2024-09-20 10:15:20,379 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=12.0 +2024-09-20 10:15:21,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=867700.0, ans=0.0 +2024-09-20 10:15:22,555 INFO [train.py:1198] (0/2) Epoch 48, batch 4250, loss[loss=0.2219, ctc_loss=0.09539, cr_loss=0.3149, attn_decoder_loss=0.229, over 29527.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.108, cr_loss=0.3472, attn_decoder_loss=0.2368, over 5804572.06 frames. ], batch size: 74, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:15:22,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=867700.0, ans=0.125 +2024-09-20 10:15:24,232 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_ff2.min_abs, batch_count=867700.0, ans=0.1 +2024-09-20 10:15:31,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=867700.0, ans=0.125 +2024-09-20 10:15:54,014 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.733e+01 9.174e+01 9.868e+01 2.354e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 10:15:56,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.88 vs. limit=10.0 +2024-09-20 10:15:57,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=867780.0, ans=0.125 +2024-09-20 10:16:03,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=867780.0, ans=0.125 +2024-09-20 10:16:03,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=867780.0, ans=0.125 +2024-09-20 10:16:05,448 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.51 vs. limit=15.0 +2024-09-20 10:16:13,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=867820.0, ans=0.5 +2024-09-20 10:16:16,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=867820.0, ans=0.0 +2024-09-20 10:16:22,825 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.14 vs. limit=12.0 +2024-09-20 10:16:36,677 INFO [train.py:1198] (0/2) Epoch 48, batch 4300, loss[loss=0.241, ctc_loss=0.1145, cr_loss=0.3516, attn_decoder_loss=0.2473, over 29546.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3464, attn_decoder_loss=0.2372, over 5793588.06 frames. ], batch size: 87, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:16:38,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=867900.0, ans=0.125 +2024-09-20 10:17:04,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=867980.0, ans=0.125 +2024-09-20 10:17:24,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=868020.0, ans=0.125 +2024-09-20 10:17:27,978 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.60 vs. limit=15.0 +2024-09-20 10:17:31,595 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=868020.0, ans=0.025 +2024-09-20 10:17:45,872 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-20 10:17:50,562 INFO [train.py:1198] (0/2) Epoch 48, batch 4350, loss[loss=0.2489, ctc_loss=0.1276, cr_loss=0.379, attn_decoder_loss=0.2539, over 29529.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1104, cr_loss=0.3519, attn_decoder_loss=0.2403, over 5796940.15 frames. ], batch size: 97, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:18:00,984 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=868100.0, ans=0.125 +2024-09-20 10:18:04,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=15.0 +2024-09-20 10:18:14,853 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=868140.0, ans=0.95 +2024-09-20 10:18:16,322 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=868140.0, ans=0.125 +2024-09-20 10:18:21,800 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.740e+01 9.099e+01 9.551e+01 1.026e+02 1.775e+02, threshold=1.910e+02, percent-clipped=0.0 +2024-09-20 10:18:29,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=868180.0, ans=0.1 +2024-09-20 10:18:29,360 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=868180.0, ans=0.125 +2024-09-20 10:18:35,116 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff3.min_abs, batch_count=868220.0, ans=0.2 +2024-09-20 10:18:42,685 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.29 vs. limit=22.5 +2024-09-20 10:18:42,805 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.78 vs. limit=22.5 +2024-09-20 10:18:59,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=868260.0, ans=0.125 +2024-09-20 10:19:01,155 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.09 vs. limit=10.0 +2024-09-20 10:19:04,477 INFO [train.py:1198] (0/2) Epoch 48, batch 4400, loss[loss=0.2428, ctc_loss=0.1199, cr_loss=0.3713, attn_decoder_loss=0.2482, over 27268.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1114, cr_loss=0.354, attn_decoder_loss=0.242, over 5767692.62 frames. ], batch size: 124, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:19:44,380 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:19:45,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=868380.0, ans=0.09899494936611666 +2024-09-20 10:19:48,834 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=868420.0, ans=0.0 +2024-09-20 10:19:53,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=868420.0, ans=0.1 +2024-09-20 10:19:53,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=868420.0, ans=0.125 +2024-09-20 10:20:18,889 INFO [train.py:1198] (0/2) Epoch 48, batch 4450, loss[loss=0.255, ctc_loss=0.1378, cr_loss=0.3876, attn_decoder_loss=0.2594, over 20291.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1148, cr_loss=0.3596, attn_decoder_loss=0.2442, over 5574642.69 frames. ], batch size: 210, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:20:19,328 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=868500.0, ans=0.125 +2024-09-20 10:20:30,333 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.64 vs. limit=10.0 +2024-09-20 10:20:49,452 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=868580.0, ans=0.0 +2024-09-20 10:20:52,107 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.165e+01 9.214e+01 1.004e+02 1.130e+02 1.604e+02, threshold=2.007e+02, percent-clipped=0.0 +2024-09-20 10:20:55,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=868580.0, ans=0.1 +2024-09-20 10:21:03,281 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.71 vs. limit=15.0 +2024-09-20 10:21:22,308 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.22 vs. limit=15.0 +2024-09-20 10:21:33,695 INFO [train.py:1198] (0/2) Epoch 48, batch 4500, loss[loss=0.2521, ctc_loss=0.1374, cr_loss=0.3722, attn_decoder_loss=0.2566, over 20367.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1174, cr_loss=0.3617, attn_decoder_loss=0.2457, over 5236472.66 frames. ], batch size: 210, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:21:39,838 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=868700.0, ans=0.125 +2024-09-20 10:21:44,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=868700.0, ans=0.95 +2024-09-20 10:21:51,153 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=9.75 vs. limit=12.0 +2024-09-20 10:22:11,204 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-48.pt +2024-09-20 10:23:01,527 INFO [train.py:1198] (0/2) Epoch 49, batch 0, loss[loss=0.207, ctc_loss=0.08852, cr_loss=0.3078, attn_decoder_loss=0.2133, over 29604.00 frames. ], tot_loss[loss=0.207, ctc_loss=0.08852, cr_loss=0.3078, attn_decoder_loss=0.2133, over 29604.00 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:23:01,527 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 10:23:19,981 INFO [train.py:1230] (0/2) Epoch 49, validation: loss=0.2124, ctc_loss=0.03569, cr_loss=6.554e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-20 10:23:19,981 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 10:23:26,339 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=868800.0, ans=0.025 +2024-09-20 10:23:27,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=868800.0, ans=0.125 +2024-09-20 10:24:24,926 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:24:32,177 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.048e+01 9.535e+01 1.078e+02 1.164e+02 4.744e+02, threshold=2.156e+02, percent-clipped=1.0 +2024-09-20 10:24:36,547 INFO [train.py:1198] (0/2) Epoch 49, batch 50, loss[loss=0.2056, ctc_loss=0.09332, cr_loss=0.3106, attn_decoder_loss=0.2112, over 29457.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1086, cr_loss=0.3466, attn_decoder_loss=0.2375, over 1269174.46 frames. ], batch size: 70, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:24:57,910 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=869040.0, ans=0.0 +2024-09-20 10:24:57,929 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=869040.0, ans=0.125 +2024-09-20 10:25:02,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=869040.0, ans=0.0 +2024-09-20 10:25:10,029 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=869080.0, ans=0.125 +2024-09-20 10:25:31,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=869120.0, ans=0.125 +2024-09-20 10:25:53,938 INFO [train.py:1198] (0/2) Epoch 49, batch 100, loss[loss=0.2257, ctc_loss=0.1118, cr_loss=0.3366, attn_decoder_loss=0.2309, over 29545.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1107, cr_loss=0.352, attn_decoder_loss=0.2392, over 2252368.66 frames. ], batch size: 76, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:25:57,105 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=869200.0, ans=0.125 +2024-09-20 10:26:08,121 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.56 vs. limit=15.0 +2024-09-20 10:26:34,401 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=869280.0, ans=0.125 +2024-09-20 10:26:56,894 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=869360.0, ans=0.07 +2024-09-20 10:26:58,404 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=869360.0, ans=0.125 +2024-09-20 10:27:05,481 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.667e+01 9.247e+01 9.821e+01 1.649e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 10:27:08,519 INFO [train.py:1198] (0/2) Epoch 49, batch 150, loss[loss=0.2089, ctc_loss=0.09265, cr_loss=0.3253, attn_decoder_loss=0.2146, over 29440.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1082, cr_loss=0.3472, attn_decoder_loss=0.2371, over 3047414.00 frames. ], batch size: 70, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:27:17,889 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=869400.0, ans=0.0 +2024-09-20 10:27:33,255 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=869440.0, ans=0.125 +2024-09-20 10:27:48,634 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=869480.0, ans=0.2 +2024-09-20 10:27:53,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=869480.0, ans=0.0 +2024-09-20 10:28:00,613 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:28:00,669 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=869520.0, ans=0.0 +2024-09-20 10:28:06,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=869520.0, ans=0.125 +2024-09-20 10:28:16,471 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.79 vs. limit=15.0 +2024-09-20 10:28:26,279 INFO [train.py:1198] (0/2) Epoch 49, batch 200, loss[loss=0.2413, ctc_loss=0.1131, cr_loss=0.3641, attn_decoder_loss=0.2474, over 27089.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1075, cr_loss=0.3466, attn_decoder_loss=0.2362, over 3658689.03 frames. ], batch size: 124, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:28:58,211 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=869680.0, ans=0.1 +2024-09-20 10:29:13,294 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=869720.0, ans=0.125 +2024-09-20 10:29:14,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=869720.0, ans=0.125 +2024-09-20 10:29:18,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.39 vs. limit=22.5 +2024-09-20 10:29:38,438 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.622e+01 9.248e+01 9.651e+01 1.394e+02, threshold=1.850e+02, percent-clipped=0.0 +2024-09-20 10:29:43,771 INFO [train.py:1198] (0/2) Epoch 49, batch 250, loss[loss=0.2535, ctc_loss=0.1221, cr_loss=0.3761, attn_decoder_loss=0.2598, over 29222.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3469, attn_decoder_loss=0.2365, over 4141638.23 frames. ], batch size: 100, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:29:46,103 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.65 vs. limit=22.5 +2024-09-20 10:29:48,120 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.81 vs. limit=5.0 +2024-09-20 10:30:15,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=869880.0, ans=0.1 +2024-09-20 10:30:59,203 INFO [train.py:1198] (0/2) Epoch 49, batch 300, loss[loss=0.2428, ctc_loss=0.115, cr_loss=0.3555, attn_decoder_loss=0.2491, over 29525.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1072, cr_loss=0.3459, attn_decoder_loss=0.2362, over 4508627.99 frames. ], batch size: 92, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:31:07,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=870000.0, ans=0.0 +2024-09-20 10:31:21,888 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=870040.0, ans=0.1 +2024-09-20 10:31:52,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=870120.0, ans=0.125 +2024-09-20 10:31:54,329 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=870120.0, ans=0.0 +2024-09-20 10:32:13,610 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.601e+01 9.011e+01 9.321e+01 1.888e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 10:32:16,512 INFO [train.py:1198] (0/2) Epoch 49, batch 350, loss[loss=0.2031, ctc_loss=0.07674, cr_loss=0.2854, attn_decoder_loss=0.2108, over 29315.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1074, cr_loss=0.3465, attn_decoder_loss=0.2367, over 4794983.95 frames. ], batch size: 71, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:32:16,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=870200.0, ans=0.2 +2024-09-20 10:32:31,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.05 vs. limit=15.0 +2024-09-20 10:32:44,938 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=870280.0, ans=0.125 +2024-09-20 10:32:55,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=870280.0, ans=0.125 +2024-09-20 10:33:22,546 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=870360.0, ans=0.2 +2024-09-20 10:33:25,902 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.48 vs. limit=10.0 +2024-09-20 10:33:31,555 INFO [train.py:1198] (0/2) Epoch 49, batch 400, loss[loss=0.2409, ctc_loss=0.1129, cr_loss=0.3412, attn_decoder_loss=0.2476, over 29727.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3458, attn_decoder_loss=0.2363, over 5024658.34 frames. ], batch size: 82, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:33:49,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=870440.0, ans=0.125 +2024-09-20 10:33:52,952 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.41 vs. limit=22.5 +2024-09-20 10:34:31,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=870520.0, ans=0.125 +2024-09-20 10:34:46,411 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.552e+01 9.241e+01 9.788e+01 2.728e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-20 10:34:49,363 INFO [train.py:1198] (0/2) Epoch 49, batch 450, loss[loss=0.2314, ctc_loss=0.1084, cr_loss=0.3601, attn_decoder_loss=0.237, over 29707.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1073, cr_loss=0.3466, attn_decoder_loss=0.2364, over 5188025.74 frames. ], batch size: 83, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:34:55,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=870600.0, ans=0.2 +2024-09-20 10:35:09,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=870640.0, ans=0.2 +2024-09-20 10:35:13,824 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:35:56,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=870760.0, ans=0.0 +2024-09-20 10:36:02,344 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.38 vs. limit=15.0 +2024-09-20 10:36:07,290 INFO [train.py:1198] (0/2) Epoch 49, batch 500, loss[loss=0.2468, ctc_loss=0.1193, cr_loss=0.3703, attn_decoder_loss=0.2527, over 29460.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1068, cr_loss=0.3456, attn_decoder_loss=0.2357, over 5330455.38 frames. ], batch size: 94, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:36:10,588 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=870800.0, ans=0.1 +2024-09-20 10:36:14,347 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-20 10:36:16,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=870800.0, ans=0.0 +2024-09-20 10:36:19,796 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:36:22,795 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:36:25,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=870840.0, ans=0.5 +2024-09-20 10:37:18,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=870960.0, ans=0.015 +2024-09-20 10:37:19,724 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.778e+01 8.745e+01 9.074e+01 9.621e+01 1.472e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 10:37:22,579 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.08 vs. limit=15.0 +2024-09-20 10:37:25,058 INFO [train.py:1198] (0/2) Epoch 49, batch 550, loss[loss=0.2343, ctc_loss=0.09976, cr_loss=0.3043, attn_decoder_loss=0.2425, over 28842.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1068, cr_loss=0.3453, attn_decoder_loss=0.2356, over 5423933.06 frames. ], batch size: 104, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:37:30,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.18 vs. limit=12.0 +2024-09-20 10:37:55,397 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=871080.0, ans=0.125 +2024-09-20 10:38:04,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=871080.0, ans=0.125 +2024-09-20 10:38:10,438 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=871120.0, ans=0.0 +2024-09-20 10:38:40,830 INFO [train.py:1198] (0/2) Epoch 49, batch 600, loss[loss=0.2396, ctc_loss=0.1058, cr_loss=0.3211, attn_decoder_loss=0.2473, over 29210.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1073, cr_loss=0.3459, attn_decoder_loss=0.236, over 5511122.91 frames. ], batch size: 100, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:39:54,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=871360.0, ans=0.125 +2024-09-20 10:39:55,326 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.840e+01 8.578e+01 9.036e+01 9.635e+01 5.589e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-20 10:39:58,317 INFO [train.py:1198] (0/2) Epoch 49, batch 650, loss[loss=0.2277, ctc_loss=0.1024, cr_loss=0.3301, attn_decoder_loss=0.2343, over 29748.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1063, cr_loss=0.3435, attn_decoder_loss=0.2353, over 5588481.38 frames. ], batch size: 81, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:40:07,663 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=871400.0, ans=0.025 +2024-09-20 10:40:32,485 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.77 vs. limit=15.0 +2024-09-20 10:40:47,065 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=871520.0, ans=0.0 +2024-09-20 10:41:12,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=871600.0, ans=0.125 +2024-09-20 10:41:13,721 INFO [train.py:1198] (0/2) Epoch 49, batch 700, loss[loss=0.2208, ctc_loss=0.09761, cr_loss=0.3295, attn_decoder_loss=0.2271, over 29511.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1069, cr_loss=0.3448, attn_decoder_loss=0.2358, over 5639554.74 frames. ], batch size: 76, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:41:20,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=871600.0, ans=0.2 +2024-09-20 10:41:26,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=871600.0, ans=0.125 +2024-09-20 10:41:26,803 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=871600.0, ans=0.125 +2024-09-20 10:41:29,649 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=871640.0, ans=0.125 +2024-09-20 10:41:32,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=871640.0, ans=0.1 +2024-09-20 10:41:40,240 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=871640.0, ans=0.0 +2024-09-20 10:42:04,352 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=871720.0, ans=0.125 +2024-09-20 10:42:05,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=871720.0, ans=0.0 +2024-09-20 10:42:26,870 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=871760.0, ans=0.125 +2024-09-20 10:42:29,495 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.733e+01 8.695e+01 9.527e+01 1.020e+02 1.538e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-20 10:42:31,056 INFO [train.py:1198] (0/2) Epoch 49, batch 750, loss[loss=0.2386, ctc_loss=0.1102, cr_loss=0.3574, attn_decoder_loss=0.245, over 29712.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1069, cr_loss=0.345, attn_decoder_loss=0.2358, over 5677316.70 frames. ], batch size: 82, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:43:06,452 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-20 10:43:34,581 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=871960.0, ans=0.125 +2024-09-20 10:43:37,521 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=871960.0, ans=0.125 +2024-09-20 10:43:40,484 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=871960.0, ans=0.125 +2024-09-20 10:43:48,839 INFO [train.py:1198] (0/2) Epoch 49, batch 800, loss[loss=0.2128, ctc_loss=0.09813, cr_loss=0.3239, attn_decoder_loss=0.2184, over 29584.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1066, cr_loss=0.3443, attn_decoder_loss=0.2357, over 5707598.61 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:44:02,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=872040.0, ans=0.025 +2024-09-20 10:44:05,485 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=872040.0, ans=0.125 +2024-09-20 10:44:05,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=872040.0, ans=0.07 +2024-09-20 10:44:11,967 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.40 vs. limit=15.0 +2024-09-20 10:44:15,378 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.27 vs. limit=12.0 +2024-09-20 10:44:17,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=872080.0, ans=0.1 +2024-09-20 10:44:23,462 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=872080.0, ans=0.05 +2024-09-20 10:44:35,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=872120.0, ans=0.0 +2024-09-20 10:44:37,101 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=872120.0, ans=0.0 +2024-09-20 10:45:03,403 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.964e+01 8.646e+01 9.267e+01 9.884e+01 3.056e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-20 10:45:03,424 INFO [train.py:1198] (0/2) Epoch 49, batch 850, loss[loss=0.2347, ctc_loss=0.1099, cr_loss=0.3597, attn_decoder_loss=0.2406, over 29706.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1063, cr_loss=0.3435, attn_decoder_loss=0.2353, over 5734397.38 frames. ], batch size: 89, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:45:10,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.83 vs. limit=10.0 +2024-09-20 10:45:17,776 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=872200.0, ans=0.125 +2024-09-20 10:45:18,456 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.06 vs. limit=10.0 +2024-09-20 10:45:19,223 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=872240.0, ans=0.125 +2024-09-20 10:45:22,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=872240.0, ans=0.125 +2024-09-20 10:45:32,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=872240.0, ans=0.125 +2024-09-20 10:45:34,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=872280.0, ans=0.0 +2024-09-20 10:45:54,016 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=872320.0, ans=0.0 +2024-09-20 10:46:09,638 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.06 vs. limit=15.0 +2024-09-20 10:46:12,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=872360.0, ans=0.0 +2024-09-20 10:46:21,062 INFO [train.py:1198] (0/2) Epoch 49, batch 900, loss[loss=0.204, ctc_loss=0.0902, cr_loss=0.3127, attn_decoder_loss=0.2097, over 29628.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1064, cr_loss=0.3435, attn_decoder_loss=0.2355, over 5738161.68 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:46:37,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=872440.0, ans=0.125 +2024-09-20 10:46:37,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=872440.0, ans=0.2 +2024-09-20 10:46:40,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=872440.0, ans=0.125 +2024-09-20 10:46:45,637 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-20 10:47:08,036 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=872520.0, ans=0.2 +2024-09-20 10:47:29,122 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=872560.0, ans=0.2 +2024-09-20 10:47:33,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=872560.0, ans=0.125 +2024-09-20 10:47:38,400 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.315e+01 8.666e+01 9.208e+01 1.007e+02 2.481e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-20 10:47:38,427 INFO [train.py:1198] (0/2) Epoch 49, batch 950, loss[loss=0.2154, ctc_loss=0.09565, cr_loss=0.3102, attn_decoder_loss=0.2218, over 29522.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1065, cr_loss=0.3436, attn_decoder_loss=0.2358, over 5738756.57 frames. ], batch size: 74, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:47:41,835 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=872600.0, ans=0.125 +2024-09-20 10:48:00,281 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.69 vs. limit=15.0 +2024-09-20 10:48:20,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=872680.0, ans=0.0 +2024-09-20 10:48:43,047 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=872760.0, ans=0.125 +2024-09-20 10:48:44,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=872760.0, ans=0.125 +2024-09-20 10:48:46,607 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.74 vs. limit=15.0 +2024-09-20 10:48:53,489 INFO [train.py:1198] (0/2) Epoch 49, batch 1000, loss[loss=0.2139, ctc_loss=0.09212, cr_loss=0.2995, attn_decoder_loss=0.2208, over 29511.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1074, cr_loss=0.3455, attn_decoder_loss=0.2367, over 5733366.93 frames. ], batch size: 77, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:48:53,860 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=872800.0, ans=0.0 +2024-09-20 10:49:35,657 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=12.0 +2024-09-20 10:49:54,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=872960.0, ans=0.125 +2024-09-20 10:50:02,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=872960.0, ans=0.0 +2024-09-20 10:50:10,721 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.566e+01 9.140e+01 9.673e+01 2.370e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 10:50:10,746 INFO [train.py:1198] (0/2) Epoch 49, batch 1050, loss[loss=0.232, ctc_loss=0.1007, cr_loss=0.3298, attn_decoder_loss=0.2393, over 29695.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3451, attn_decoder_loss=0.2364, over 5741299.63 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:50:43,539 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.97 vs. limit=15.0 +2024-09-20 10:50:49,118 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=873080.0, ans=0.1 +2024-09-20 10:50:56,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=873120.0, ans=0.2 +2024-09-20 10:51:01,000 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=873120.0, ans=0.025 +2024-09-20 10:51:03,967 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=873120.0, ans=0.0 +2024-09-20 10:51:14,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=873160.0, ans=0.1 +2024-09-20 10:51:14,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=873160.0, ans=0.125 +2024-09-20 10:51:26,491 INFO [train.py:1198] (0/2) Epoch 49, batch 1100, loss[loss=0.2237, ctc_loss=0.1059, cr_loss=0.3473, attn_decoder_loss=0.2291, over 29458.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1071, cr_loss=0.3448, attn_decoder_loss=0.236, over 5754248.20 frames. ], batch size: 78, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:51:47,184 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=873240.0, ans=0.025 +2024-09-20 10:52:06,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=873280.0, ans=0.09899494936611666 +2024-09-20 10:52:20,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=873320.0, ans=0.125 +2024-09-20 10:52:22,097 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=873320.0, ans=0.125 +2024-09-20 10:52:28,089 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=873360.0, ans=0.125 +2024-09-20 10:52:33,356 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.60 vs. limit=15.0 +2024-09-20 10:52:34,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=873360.0, ans=0.0 +2024-09-20 10:52:40,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=873360.0, ans=0.0 +2024-09-20 10:52:44,485 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.549e+01 9.114e+01 9.620e+01 1.410e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 10:52:44,507 INFO [train.py:1198] (0/2) Epoch 49, batch 1150, loss[loss=0.2314, ctc_loss=0.1065, cr_loss=0.3556, attn_decoder_loss=0.2374, over 29418.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1073, cr_loss=0.3448, attn_decoder_loss=0.236, over 5753016.70 frames. ], batch size: 78, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:52:44,775 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=873400.0, ans=0.0 +2024-09-20 10:53:01,482 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.92 vs. limit=15.0 +2024-09-20 10:54:02,454 INFO [train.py:1198] (0/2) Epoch 49, batch 1200, loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3471, attn_decoder_loss=0.2363, over 29686.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1078, cr_loss=0.346, attn_decoder_loss=0.2369, over 5746268.57 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:54:23,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=873640.0, ans=0.125 +2024-09-20 10:54:27,519 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.97 vs. limit=15.0 +2024-09-20 10:54:30,578 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.28 vs. limit=10.0 +2024-09-20 10:54:42,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=873680.0, ans=0.125 +2024-09-20 10:55:07,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=873760.0, ans=0.125 +2024-09-20 10:55:18,095 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.765e+01 9.274e+01 9.697e+01 1.334e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 10:55:18,117 INFO [train.py:1198] (0/2) Epoch 49, batch 1250, loss[loss=0.2442, ctc_loss=0.116, cr_loss=0.382, attn_decoder_loss=0.25, over 29519.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1079, cr_loss=0.3466, attn_decoder_loss=0.2372, over 5775531.99 frames. ], batch size: 92, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:55:21,500 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=873800.0, ans=0.2 +2024-09-20 10:55:22,023 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.78 vs. limit=15.0 +2024-09-20 10:55:58,407 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:56:01,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=873880.0, ans=0.125 +2024-09-20 10:56:15,023 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=873920.0, ans=0.1 +2024-09-20 10:56:17,083 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.40 vs. limit=22.5 +2024-09-20 10:56:35,758 INFO [train.py:1198] (0/2) Epoch 49, batch 1300, loss[loss=0.227, ctc_loss=0.1033, cr_loss=0.34, attn_decoder_loss=0.2332, over 28399.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1077, cr_loss=0.3466, attn_decoder_loss=0.2367, over 5779635.32 frames. ], batch size: 111, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:56:47,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=874000.0, ans=0.5 +2024-09-20 10:56:47,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=874000.0, ans=0.125 +2024-09-20 10:57:05,584 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=874040.0, ans=0.1 +2024-09-20 10:57:07,094 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=874080.0, ans=0.2 +2024-09-20 10:57:23,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=874120.0, ans=0.125 +2024-09-20 10:57:53,497 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.411e+01 9.030e+01 9.662e+01 1.974e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-20 10:57:53,522 INFO [train.py:1198] (0/2) Epoch 49, batch 1350, loss[loss=0.2298, ctc_loss=0.1091, cr_loss=0.3488, attn_decoder_loss=0.2354, over 29769.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1073, cr_loss=0.3458, attn_decoder_loss=0.2363, over 5795461.62 frames. ], batch size: 81, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:57:58,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=874200.0, ans=0.0 +2024-09-20 10:58:17,555 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=874240.0, ans=0.125 +2024-09-20 10:58:46,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=874320.0, ans=0.0 +2024-09-20 10:58:59,808 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=874360.0, ans=0.1 +2024-09-20 10:59:09,069 INFO [train.py:1198] (0/2) Epoch 49, batch 1400, loss[loss=0.2039, ctc_loss=0.09532, cr_loss=0.3197, attn_decoder_loss=0.2089, over 29597.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1073, cr_loss=0.3455, attn_decoder_loss=0.2363, over 5806949.99 frames. ], batch size: 69, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:59:19,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=874400.0, ans=0.2 +2024-09-20 10:59:27,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=874440.0, ans=0.0 +2024-09-20 10:59:47,522 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=874480.0, ans=0.0 +2024-09-20 10:59:48,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.12 vs. limit=22.5 +2024-09-20 10:59:53,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=874480.0, ans=0.09899494936611666 +2024-09-20 11:00:08,658 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=874520.0, ans=0.125 +2024-09-20 11:00:25,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=874600.0, ans=0.0 +2024-09-20 11:00:26,311 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.532e+01 9.313e+01 9.693e+01 1.325e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-20 11:00:26,332 INFO [train.py:1198] (0/2) Epoch 49, batch 1450, loss[loss=0.2508, ctc_loss=0.1283, cr_loss=0.3969, attn_decoder_loss=0.2556, over 29459.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1078, cr_loss=0.3465, attn_decoder_loss=0.237, over 5805246.86 frames. ], batch size: 94, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:00:26,735 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=874600.0, ans=0.125 +2024-09-20 11:00:31,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=874600.0, ans=0.125 +2024-09-20 11:01:09,257 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=874680.0, ans=0.125 +2024-09-20 11:01:18,266 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=874720.0, ans=0.025 +2024-09-20 11:01:25,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=874720.0, ans=0.0 +2024-09-20 11:01:27,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=874760.0, ans=0.125 +2024-09-20 11:01:31,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=874760.0, ans=0.1 +2024-09-20 11:01:43,697 INFO [train.py:1198] (0/2) Epoch 49, batch 1500, loss[loss=0.2399, ctc_loss=0.1129, cr_loss=0.3548, attn_decoder_loss=0.2461, over 29640.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1076, cr_loss=0.346, attn_decoder_loss=0.2372, over 5805899.35 frames. ], batch size: 86, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:02:00,589 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=874840.0, ans=0.1 +2024-09-20 11:02:46,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=874960.0, ans=0.025 +2024-09-20 11:02:59,723 INFO [train.py:1198] (0/2) Epoch 49, batch 1550, loss[loss=0.2427, ctc_loss=0.1143, cr_loss=0.3654, attn_decoder_loss=0.2488, over 29523.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.108, cr_loss=0.3466, attn_decoder_loss=0.2372, over 5781309.48 frames. ], batch size: 90, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:03:01,251 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.780e+01 9.221e+01 9.714e+01 1.731e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-20 11:03:15,146 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=875040.0, ans=0.025 +2024-09-20 11:03:32,579 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=875080.0, ans=0.0 +2024-09-20 11:03:44,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=875080.0, ans=0.0 +2024-09-20 11:04:17,871 INFO [train.py:1198] (0/2) Epoch 49, batch 1600, loss[loss=0.2441, ctc_loss=0.1084, cr_loss=0.3596, attn_decoder_loss=0.2512, over 29658.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1079, cr_loss=0.3465, attn_decoder_loss=0.237, over 5766687.71 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:04:29,334 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=875200.0, ans=0.09899494936611666 +2024-09-20 11:04:55,839 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.34 vs. limit=6.0 +2024-09-20 11:05:02,551 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:05:28,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=875360.0, ans=0.05 +2024-09-20 11:05:35,272 INFO [train.py:1198] (0/2) Epoch 49, batch 1650, loss[loss=0.2518, ctc_loss=0.1257, cr_loss=0.3894, attn_decoder_loss=0.2571, over 29715.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1075, cr_loss=0.3462, attn_decoder_loss=0.2368, over 5759737.54 frames. ], batch size: 89, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:05:36,820 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.731e+01 9.375e+01 1.033e+02 4.600e+02, threshold=1.875e+02, percent-clipped=3.0 +2024-09-20 11:05:44,703 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=875400.0, ans=0.125 +2024-09-20 11:05:59,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=875440.0, ans=0.125 +2024-09-20 11:06:25,374 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=875520.0, ans=0.125 +2024-09-20 11:06:31,406 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=875520.0, ans=0.05 +2024-09-20 11:06:37,692 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.81 vs. limit=12.0 +2024-09-20 11:06:50,358 INFO [train.py:1198] (0/2) Epoch 49, batch 1700, loss[loss=0.1987, ctc_loss=0.08375, cr_loss=0.2947, attn_decoder_loss=0.2049, over 29596.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1069, cr_loss=0.345, attn_decoder_loss=0.2366, over 5782370.64 frames. ], batch size: 69, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:07:03,285 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.15 vs. limit=12.0 +2024-09-20 11:07:15,594 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=875640.0, ans=0.125 +2024-09-20 11:07:31,370 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.78 vs. limit=22.5 +2024-09-20 11:07:54,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=875760.0, ans=0.125 +2024-09-20 11:08:05,102 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=875760.0, ans=0.125 +2024-09-20 11:08:07,659 INFO [train.py:1198] (0/2) Epoch 49, batch 1750, loss[loss=0.2073, ctc_loss=0.09342, cr_loss=0.3271, attn_decoder_loss=0.2127, over 29346.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1067, cr_loss=0.3448, attn_decoder_loss=0.2361, over 5791214.79 frames. ], batch size: 67, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:08:10,639 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.685e+01 8.566e+01 9.020e+01 9.576e+01 1.474e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 11:08:10,923 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=875800.0, ans=0.025 +2024-09-20 11:08:10,960 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=875800.0, ans=0.0 +2024-09-20 11:08:26,704 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=875840.0, ans=0.2 +2024-09-20 11:08:44,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=875880.0, ans=10.0 +2024-09-20 11:08:53,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=875920.0, ans=0.2 +2024-09-20 11:09:24,845 INFO [train.py:1198] (0/2) Epoch 49, batch 1800, loss[loss=0.2419, ctc_loss=0.113, cr_loss=0.3637, attn_decoder_loss=0.2482, over 29703.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1068, cr_loss=0.3448, attn_decoder_loss=0.2361, over 5792708.40 frames. ], batch size: 83, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:09:34,330 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=876000.0, ans=0.125 +2024-09-20 11:10:01,433 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=876080.0, ans=0.0 +2024-09-20 11:10:03,268 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=876080.0, ans=15.0 +2024-09-20 11:10:32,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=876160.0, ans=0.0 +2024-09-20 11:10:40,175 INFO [train.py:1198] (0/2) Epoch 49, batch 1850, loss[loss=0.2417, ctc_loss=0.1085, cr_loss=0.3486, attn_decoder_loss=0.2487, over 29630.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.107, cr_loss=0.3455, attn_decoder_loss=0.2362, over 5797393.61 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:10:43,129 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.600e+01 9.055e+01 9.654e+01 2.900e+02, threshold=1.811e+02, percent-clipped=2.0 +2024-09-20 11:10:58,862 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.76 vs. limit=10.0 +2024-09-20 11:10:58,971 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.09 vs. limit=15.0 +2024-09-20 11:11:04,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=876240.0, ans=0.2 +2024-09-20 11:11:08,557 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=876280.0, ans=0.125 +2024-09-20 11:11:27,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=876320.0, ans=0.025 +2024-09-20 11:11:36,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=876320.0, ans=0.1 +2024-09-20 11:11:56,967 INFO [train.py:1198] (0/2) Epoch 49, batch 1900, loss[loss=0.251, ctc_loss=0.1227, cr_loss=0.3772, attn_decoder_loss=0.2568, over 29722.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1074, cr_loss=0.3466, attn_decoder_loss=0.2369, over 5805151.49 frames. ], batch size: 89, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:12:06,887 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-20 11:12:14,430 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=876440.0, ans=0.2 +2024-09-20 11:12:49,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=876520.0, ans=0.04949747468305833 +2024-09-20 11:12:54,652 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.17 vs. limit=12.0 +2024-09-20 11:13:01,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=876560.0, ans=0.125 +2024-09-20 11:13:14,825 INFO [train.py:1198] (0/2) Epoch 49, batch 1950, loss[loss=0.2283, ctc_loss=0.108, cr_loss=0.3493, attn_decoder_loss=0.2339, over 29458.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1081, cr_loss=0.3488, attn_decoder_loss=0.2381, over 5819336.21 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:13:15,599 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-20 11:13:17,858 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.737e+01 9.338e+01 9.931e+01 1.218e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 11:13:49,642 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=876680.0, ans=0.125 +2024-09-20 11:14:23,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten.whitening_limit, batch_count=876760.0, ans=15.0 +2024-09-20 11:14:27,217 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=876760.0, ans=0.0 +2024-09-20 11:14:30,284 INFO [train.py:1198] (0/2) Epoch 49, batch 2000, loss[loss=0.1971, ctc_loss=0.08514, cr_loss=0.297, attn_decoder_loss=0.2029, over 29357.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1082, cr_loss=0.3486, attn_decoder_loss=0.2382, over 5797484.48 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:14:33,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=876800.0, ans=0.125 +2024-09-20 11:14:33,749 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:14:39,777 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=876800.0, ans=0.125 +2024-09-20 11:14:59,169 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=876880.0, ans=0.0 +2024-09-20 11:15:23,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=876920.0, ans=0.1 +2024-09-20 11:15:26,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten.whitening_limit, batch_count=876920.0, ans=15.0 +2024-09-20 11:15:47,893 INFO [train.py:1198] (0/2) Epoch 49, batch 2050, loss[loss=0.2014, ctc_loss=0.08559, cr_loss=0.3001, attn_decoder_loss=0.2076, over 29459.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1076, cr_loss=0.3469, attn_decoder_loss=0.2368, over 5789206.55 frames. ], batch size: 70, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:15:49,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=877000.0, ans=0.0 +2024-09-20 11:15:50,918 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.585e+01 9.358e+01 1.005e+02 5.300e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-20 11:15:57,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=877000.0, ans=0.0 +2024-09-20 11:16:06,969 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=877040.0, ans=0.2 +2024-09-20 11:17:05,470 INFO [train.py:1198] (0/2) Epoch 49, batch 2100, loss[loss=0.2326, ctc_loss=0.11, cr_loss=0.3685, attn_decoder_loss=0.238, over 29755.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3458, attn_decoder_loss=0.2364, over 5800751.88 frames. ], batch size: 81, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:17:12,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.09 vs. limit=12.0 +2024-09-20 11:17:28,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=877240.0, ans=0.125 +2024-09-20 11:17:44,628 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=877280.0, ans=0.125 +2024-09-20 11:17:52,295 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=877320.0, ans=0.0 +2024-09-20 11:18:02,637 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=877320.0, ans=0.0 +2024-09-20 11:18:20,702 INFO [train.py:1198] (0/2) Epoch 49, batch 2150, loss[loss=0.2258, ctc_loss=0.1101, cr_loss=0.3423, attn_decoder_loss=0.231, over 29444.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1066, cr_loss=0.3445, attn_decoder_loss=0.2359, over 5815431.29 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:18:23,750 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.478e+01 8.920e+01 9.429e+01 1.261e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-20 11:18:49,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=877480.0, ans=0.125 +2024-09-20 11:18:51,511 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.28 vs. limit=22.5 +2024-09-20 11:18:57,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=877480.0, ans=0.0 +2024-09-20 11:19:15,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=877520.0, ans=0.07 +2024-09-20 11:19:18,045 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=877520.0, ans=22.5 +2024-09-20 11:19:21,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.46 vs. limit=15.0 +2024-09-20 11:19:38,774 INFO [train.py:1198] (0/2) Epoch 49, batch 2200, loss[loss=0.2311, ctc_loss=0.09464, cr_loss=0.306, attn_decoder_loss=0.2394, over 29611.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1065, cr_loss=0.3443, attn_decoder_loss=0.236, over 5812292.71 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:19:43,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=877600.0, ans=0.125 +2024-09-20 11:20:05,859 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.96 vs. limit=22.5 +2024-09-20 11:20:44,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=877760.0, ans=0.125 +2024-09-20 11:20:56,320 INFO [train.py:1198] (0/2) Epoch 49, batch 2250, loss[loss=0.2346, ctc_loss=0.1046, cr_loss=0.3325, attn_decoder_loss=0.2417, over 29713.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1064, cr_loss=0.3439, attn_decoder_loss=0.2359, over 5812525.61 frames. ], batch size: 82, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:20:59,107 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.798e+01 9.192e+01 9.899e+01 1.510e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 11:21:02,575 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=877800.0, ans=0.1 +2024-09-20 11:21:17,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=877840.0, ans=0.2 +2024-09-20 11:21:29,541 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=877880.0, ans=0.125 +2024-09-20 11:21:47,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=877920.0, ans=0.1 +2024-09-20 11:21:52,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=877920.0, ans=0.0 +2024-09-20 11:22:11,323 INFO [train.py:1198] (0/2) Epoch 49, batch 2300, loss[loss=0.2131, ctc_loss=0.09251, cr_loss=0.3124, attn_decoder_loss=0.2196, over 29321.00 frames. ], tot_loss[loss=0.2287, ctc_loss=0.1055, cr_loss=0.3413, attn_decoder_loss=0.2348, over 5800015.27 frames. ], batch size: 71, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:22:27,268 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.46 vs. limit=15.0 +2024-09-20 11:23:06,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=878120.0, ans=0.0 +2024-09-20 11:23:21,904 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=878160.0, ans=0.2 +2024-09-20 11:23:29,168 INFO [train.py:1198] (0/2) Epoch 49, batch 2350, loss[loss=0.2421, ctc_loss=0.1153, cr_loss=0.3608, attn_decoder_loss=0.2481, over 29692.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1063, cr_loss=0.3433, attn_decoder_loss=0.2353, over 5805167.92 frames. ], batch size: 83, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:23:32,113 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 8.491e+01 9.028e+01 9.631e+01 3.047e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-20 11:23:33,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=878200.0, ans=0.125 +2024-09-20 11:23:52,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=878240.0, ans=0.5 +2024-09-20 11:24:17,210 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.97 vs. limit=6.0 +2024-09-20 11:24:19,744 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=878320.0, ans=0.125 +2024-09-20 11:24:21,307 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=878320.0, ans=0.125 +2024-09-20 11:24:35,654 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.93 vs. limit=12.0 +2024-09-20 11:24:47,391 INFO [train.py:1198] (0/2) Epoch 49, batch 2400, loss[loss=0.2228, ctc_loss=0.1058, cr_loss=0.3549, attn_decoder_loss=0.2279, over 29526.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1063, cr_loss=0.3437, attn_decoder_loss=0.2358, over 5807592.66 frames. ], batch size: 76, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:25:01,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=878440.0, ans=0.0 +2024-09-20 11:25:04,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=878440.0, ans=0.1 +2024-09-20 11:25:19,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=878480.0, ans=0.2 +2024-09-20 11:25:19,509 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=878480.0, ans=0.2 +2024-09-20 11:25:56,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.97 vs. limit=22.5 +2024-09-20 11:26:02,944 INFO [train.py:1198] (0/2) Epoch 49, batch 2450, loss[loss=0.247, ctc_loss=0.1244, cr_loss=0.3844, attn_decoder_loss=0.2521, over 29708.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1068, cr_loss=0.3453, attn_decoder_loss=0.2367, over 5784855.24 frames. ], batch size: 82, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:26:07,323 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.282e+01 8.775e+01 9.341e+01 9.851e+01 1.765e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 11:26:16,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=878640.0, ans=0.125 +2024-09-20 11:26:40,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=878680.0, ans=0.125 +2024-09-20 11:26:43,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=878680.0, ans=0.0 +2024-09-20 11:26:51,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_na.min_abs, batch_count=878720.0, ans=0.02 +2024-09-20 11:26:55,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=878720.0, ans=0.2 +2024-09-20 11:27:00,617 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=878720.0, ans=0.125 +2024-09-20 11:27:19,937 INFO [train.py:1198] (0/2) Epoch 49, batch 2500, loss[loss=0.2371, ctc_loss=0.108, cr_loss=0.3397, attn_decoder_loss=0.2439, over 29616.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1068, cr_loss=0.3451, attn_decoder_loss=0.2367, over 5795340.54 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:27:32,880 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff2.min_abs, batch_count=878800.0, ans=0.1 +2024-09-20 11:27:38,985 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=878840.0, ans=0.2 +2024-09-20 11:27:46,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=878840.0, ans=0.125 +2024-09-20 11:27:47,279 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-20 11:28:01,678 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=878880.0, ans=0.125 +2024-09-20 11:28:15,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=878920.0, ans=0.05 +2024-09-20 11:28:37,631 INFO [train.py:1198] (0/2) Epoch 49, batch 2550, loss[loss=0.2091, ctc_loss=0.09666, cr_loss=0.3156, attn_decoder_loss=0.2146, over 29304.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2369, over 5799431.54 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:28:42,020 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.025e+01 8.758e+01 9.202e+01 9.559e+01 1.179e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-20 11:28:47,225 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.75 vs. limit=15.0 +2024-09-20 11:28:49,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=879000.0, ans=0.025 +2024-09-20 11:29:22,330 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.14 vs. limit=15.0 +2024-09-20 11:29:43,085 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=879160.0, ans=0.125 +2024-09-20 11:29:50,613 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=879160.0, ans=0.125 +2024-09-20 11:29:50,615 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=879160.0, ans=0.0 +2024-09-20 11:29:53,788 INFO [train.py:1198] (0/2) Epoch 49, batch 2600, loss[loss=0.2215, ctc_loss=0.1011, cr_loss=0.3403, attn_decoder_loss=0.2273, over 29440.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1073, cr_loss=0.346, attn_decoder_loss=0.2371, over 5794316.93 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:30:08,765 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=879240.0, ans=0.1 +2024-09-20 11:30:11,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=879240.0, ans=0.125 +2024-09-20 11:30:33,086 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.59 vs. limit=22.5 +2024-09-20 11:31:03,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=879360.0, ans=0.1 +2024-09-20 11:31:10,893 INFO [train.py:1198] (0/2) Epoch 49, batch 2650, loss[loss=0.2432, ctc_loss=0.12, cr_loss=0.3718, attn_decoder_loss=0.2486, over 29248.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1073, cr_loss=0.346, attn_decoder_loss=0.2373, over 5800680.22 frames. ], batch size: 100, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:31:15,433 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 8.630e+01 9.011e+01 9.615e+01 2.139e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 11:31:15,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=879400.0, ans=0.0 +2024-09-20 11:31:34,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=879440.0, ans=0.0 +2024-09-20 11:31:36,391 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=12.0 +2024-09-20 11:31:37,202 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=879440.0, ans=0.125 +2024-09-20 11:31:43,747 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.40 vs. limit=15.0 +2024-09-20 11:31:46,206 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=879480.0, ans=0.2 +2024-09-20 11:31:57,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=879520.0, ans=0.0 +2024-09-20 11:32:03,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=879520.0, ans=0.125 +2024-09-20 11:32:06,872 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=879520.0, ans=0.1 +2024-09-20 11:32:24,931 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=879560.0, ans=0.125 +2024-09-20 11:32:27,641 INFO [train.py:1198] (0/2) Epoch 49, batch 2700, loss[loss=0.2402, ctc_loss=0.1133, cr_loss=0.3585, attn_decoder_loss=0.2464, over 29530.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1077, cr_loss=0.3469, attn_decoder_loss=0.2377, over 5796569.28 frames. ], batch size: 87, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:32:46,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.60 vs. limit=22.5 +2024-09-20 11:32:47,598 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=879640.0, ans=0.125 +2024-09-20 11:32:56,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=879680.0, ans=0.1 +2024-09-20 11:32:57,434 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.85 vs. limit=15.0 +2024-09-20 11:33:43,443 INFO [train.py:1198] (0/2) Epoch 49, batch 2750, loss[loss=0.2224, ctc_loss=0.1054, cr_loss=0.3294, attn_decoder_loss=0.2281, over 29534.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.107, cr_loss=0.3453, attn_decoder_loss=0.2365, over 5795588.88 frames. ], batch size: 75, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:33:49,654 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.773e+01 9.217e+01 9.860e+01 5.240e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-20 11:33:51,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=879800.0, ans=10.0 +2024-09-20 11:33:56,815 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=879800.0, ans=15.0 +2024-09-20 11:34:01,306 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=15.0 +2024-09-20 11:34:12,807 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=879880.0, ans=0.2 +2024-09-20 11:34:20,192 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=879880.0, ans=0.125 +2024-09-20 11:34:36,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=879920.0, ans=0.125 +2024-09-20 11:34:52,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=879960.0, ans=0.1 +2024-09-20 11:34:57,847 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.37 vs. limit=15.0 +2024-09-20 11:35:00,239 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-220000.pt +2024-09-20 11:35:09,261 INFO [train.py:1198] (0/2) Epoch 49, batch 2800, loss[loss=0.2405, ctc_loss=0.1197, cr_loss=0.344, attn_decoder_loss=0.2463, over 20099.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1073, cr_loss=0.3457, attn_decoder_loss=0.2366, over 5775052.55 frames. ], batch size: 209, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:35:11,791 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.61 vs. limit=15.0 +2024-09-20 11:35:19,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=880000.0, ans=0.125 +2024-09-20 11:35:19,216 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=880000.0, ans=0.1 +2024-09-20 11:35:43,474 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=880080.0, ans=0.0 +2024-09-20 11:35:44,883 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=880080.0, ans=0.035 +2024-09-20 11:35:54,372 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.13 vs. limit=6.0 +2024-09-20 11:35:58,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=880120.0, ans=0.125 +2024-09-20 11:36:26,662 INFO [train.py:1198] (0/2) Epoch 49, batch 2850, loss[loss=0.2212, ctc_loss=0.1009, cr_loss=0.3224, attn_decoder_loss=0.2273, over 29515.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3463, attn_decoder_loss=0.2369, over 5761996.16 frames. ], batch size: 77, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:36:32,610 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.815e+01 9.180e+01 9.751e+01 2.075e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-20 11:36:57,254 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=880280.0, ans=0.125 +2024-09-20 11:37:01,906 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=880280.0, ans=0.05 +2024-09-20 11:37:21,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=880320.0, ans=0.0 +2024-09-20 11:37:42,429 INFO [train.py:1198] (0/2) Epoch 49, batch 2900, loss[loss=0.2264, ctc_loss=0.1041, cr_loss=0.3442, attn_decoder_loss=0.2324, over 29411.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1082, cr_loss=0.3477, attn_decoder_loss=0.2378, over 5787764.78 frames. ], batch size: 79, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:37:44,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=880400.0, ans=0.2 +2024-09-20 11:37:51,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=880400.0, ans=0.125 +2024-09-20 11:38:01,897 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.23 vs. limit=22.5 +2024-09-20 11:38:31,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=880520.0, ans=0.5 +2024-09-20 11:38:35,930 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=880520.0, ans=0.125 +2024-09-20 11:38:35,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=880520.0, ans=0.125 +2024-09-20 11:38:51,647 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=880560.0, ans=0.025 +2024-09-20 11:39:02,491 INFO [train.py:1198] (0/2) Epoch 49, batch 2950, loss[loss=0.2189, ctc_loss=0.09776, cr_loss=0.3214, attn_decoder_loss=0.2253, over 29513.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2368, over 5782502.80 frames. ], batch size: 75, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:39:02,778 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=880600.0, ans=0.1 +2024-09-20 11:39:07,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=880600.0, ans=0.125 +2024-09-20 11:39:07,661 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.57 vs. limit=15.0 +2024-09-20 11:39:08,383 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.608e+01 9.182e+01 9.827e+01 1.689e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-20 11:39:19,915 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.95 vs. limit=10.0 +2024-09-20 11:39:27,080 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:39:31,544 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=880680.0, ans=0.0 +2024-09-20 11:39:35,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=880680.0, ans=0.125 +2024-09-20 11:39:48,666 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.57 vs. limit=15.0 +2024-09-20 11:39:56,428 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.49 vs. limit=12.0 +2024-09-20 11:40:18,501 INFO [train.py:1198] (0/2) Epoch 49, batch 3000, loss[loss=0.2262, ctc_loss=0.1013, cr_loss=0.3401, attn_decoder_loss=0.2325, over 29743.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1073, cr_loss=0.346, attn_decoder_loss=0.2367, over 5782749.31 frames. ], batch size: 81, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:40:18,502 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 11:40:29,309 INFO [zipformer.py:1858] (0/2) name=encoder.encoders.3.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([2.8820, 3.2156, 3.1212, 3.2973, 3.3037, 3.3074, 2.6312, 3.4788], + device='cuda:0') +2024-09-20 11:40:36,852 INFO [train.py:1230] (0/2) Epoch 49, validation: loss=0.2126, ctc_loss=0.03669, cr_loss=6.618e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-20 11:40:36,853 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 11:41:01,279 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=880840.0, ans=0.025 +2024-09-20 11:41:01,768 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.52 vs. limit=15.0 +2024-09-20 11:41:28,263 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=880920.0, ans=0.125 +2024-09-20 11:41:41,926 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=880960.0, ans=0.125 +2024-09-20 11:41:52,485 INFO [train.py:1198] (0/2) Epoch 49, batch 3050, loss[loss=0.218, ctc_loss=0.09656, cr_loss=0.3226, attn_decoder_loss=0.2244, over 29536.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1074, cr_loss=0.3461, attn_decoder_loss=0.2373, over 5777046.59 frames. ], batch size: 76, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:41:58,503 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.535e+01 9.070e+01 9.568e+01 1.381e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 11:42:03,565 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=881000.0, ans=0.125 +2024-09-20 11:42:05,219 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:42:06,587 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=881040.0, ans=0.125 +2024-09-20 11:42:13,447 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.78 vs. limit=5.0 +2024-09-20 11:42:19,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=881040.0, ans=0.125 +2024-09-20 11:42:22,919 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=881080.0, ans=0.125 +2024-09-20 11:42:29,100 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:42:55,619 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=881160.0, ans=0.2 +2024-09-20 11:43:11,822 INFO [train.py:1198] (0/2) Epoch 49, batch 3100, loss[loss=0.235, ctc_loss=0.109, cr_loss=0.3334, attn_decoder_loss=0.2416, over 29235.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1072, cr_loss=0.3458, attn_decoder_loss=0.2368, over 5777249.57 frames. ], batch size: 100, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:43:30,851 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.96 vs. limit=15.0 +2024-09-20 11:43:32,129 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.72 vs. limit=22.5 +2024-09-20 11:43:42,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=881280.0, ans=0.125 +2024-09-20 11:43:45,345 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=881280.0, ans=0.125 +2024-09-20 11:44:12,538 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=881360.0, ans=0.0 +2024-09-20 11:44:16,353 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.28 vs. limit=15.0 +2024-09-20 11:44:20,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=881360.0, ans=0.125 +2024-09-20 11:44:21,845 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=881360.0, ans=0.125 +2024-09-20 11:44:22,590 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.12 vs. limit=22.5 +2024-09-20 11:44:23,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=881360.0, ans=0.2 +2024-09-20 11:44:27,457 INFO [train.py:1198] (0/2) Epoch 49, batch 3150, loss[loss=0.2402, ctc_loss=0.1037, cr_loss=0.3306, attn_decoder_loss=0.248, over 28808.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1072, cr_loss=0.3455, attn_decoder_loss=0.2368, over 5782422.22 frames. ], batch size: 104, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:44:33,449 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.545e+01 8.538e+01 9.268e+01 9.767e+01 2.524e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-20 11:44:57,731 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=881480.0, ans=0.125 +2024-09-20 11:45:06,624 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=881480.0, ans=0.0 +2024-09-20 11:45:11,291 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=881520.0, ans=0.0 +2024-09-20 11:45:15,612 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=881520.0, ans=0.125 +2024-09-20 11:45:35,104 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=881560.0, ans=0.1 +2024-09-20 11:45:42,745 INFO [train.py:1198] (0/2) Epoch 49, batch 3200, loss[loss=0.2322, ctc_loss=0.1077, cr_loss=0.352, attn_decoder_loss=0.2382, over 29409.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5792307.86 frames. ], batch size: 79, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:45:51,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.97 vs. limit=6.0 +2024-09-20 11:45:58,107 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=881640.0, ans=0.125 +2024-09-20 11:46:04,181 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=881640.0, ans=0.1 +2024-09-20 11:46:31,662 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=881720.0, ans=0.0 +2024-09-20 11:46:41,743 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=881720.0, ans=0.0 +2024-09-20 11:46:55,333 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=881760.0, ans=0.0 +2024-09-20 11:47:02,660 INFO [train.py:1198] (0/2) Epoch 49, batch 3250, loss[loss=0.2434, ctc_loss=0.1222, cr_loss=0.3899, attn_decoder_loss=0.2483, over 29698.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1074, cr_loss=0.346, attn_decoder_loss=0.2371, over 5799130.34 frames. ], batch size: 84, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:47:10,204 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.221e+01 8.742e+01 9.266e+01 9.794e+01 1.259e+02, threshold=1.853e+02, percent-clipped=0.0 +2024-09-20 11:47:34,379 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=881880.0, ans=0.2 +2024-09-20 11:47:54,325 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.22 vs. limit=15.0 +2024-09-20 11:48:17,515 INFO [train.py:1198] (0/2) Epoch 49, batch 3300, loss[loss=0.232, ctc_loss=0.09773, cr_loss=0.3106, attn_decoder_loss=0.24, over 28143.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1066, cr_loss=0.3439, attn_decoder_loss=0.2359, over 5798176.13 frames. ], batch size: 111, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:48:32,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.05 vs. limit=15.0 +2024-09-20 11:48:57,109 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=882080.0, ans=0.0 +2024-09-20 11:49:06,038 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=882120.0, ans=0.1 +2024-09-20 11:49:32,239 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=3.94 vs. limit=12.0 +2024-09-20 11:49:32,907 INFO [train.py:1198] (0/2) Epoch 49, batch 3350, loss[loss=0.2473, ctc_loss=0.1158, cr_loss=0.3741, attn_decoder_loss=0.2536, over 28881.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3448, attn_decoder_loss=0.2365, over 5775708.57 frames. ], batch size: 104, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:49:40,361 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.704e+01 9.230e+01 9.837e+01 1.570e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 11:50:05,513 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.00 vs. limit=10.0 +2024-09-20 11:50:11,794 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.22 vs. limit=12.0 +2024-09-20 11:50:25,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=882320.0, ans=0.125 +2024-09-20 11:50:40,732 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=882360.0, ans=0.1 +2024-09-20 11:50:42,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=882360.0, ans=0.125 +2024-09-20 11:50:52,872 INFO [train.py:1198] (0/2) Epoch 49, batch 3400, loss[loss=0.1955, ctc_loss=0.08902, cr_loss=0.3084, attn_decoder_loss=0.2005, over 29352.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3453, attn_decoder_loss=0.2364, over 5767325.53 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:51:05,627 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.55 vs. limit=15.0 +2024-09-20 11:51:30,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=882480.0, ans=0.125 +2024-09-20 11:51:36,053 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.59 vs. limit=22.5 +2024-09-20 11:52:05,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=882560.0, ans=0.0 +2024-09-20 11:52:08,161 INFO [train.py:1198] (0/2) Epoch 49, batch 3450, loss[loss=0.236, ctc_loss=0.1003, cr_loss=0.3321, attn_decoder_loss=0.2437, over 28316.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1074, cr_loss=0.3456, attn_decoder_loss=0.2369, over 5774800.56 frames. ], batch size: 111, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:52:15,010 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.48 vs. limit=15.0 +2024-09-20 11:52:15,709 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.614e+01 8.667e+01 9.196e+01 9.628e+01 1.869e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-20 11:52:23,552 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=882640.0, ans=0.125 +2024-09-20 11:52:23,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=882640.0, ans=0.125 +2024-09-20 11:52:31,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.93 vs. limit=10.0 +2024-09-20 11:52:54,852 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=882720.0, ans=0.125 +2024-09-20 11:53:01,180 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=882720.0, ans=0.125 +2024-09-20 11:53:11,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=882760.0, ans=0.025 +2024-09-20 11:53:22,836 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.37 vs. limit=15.0 +2024-09-20 11:53:23,233 INFO [train.py:1198] (0/2) Epoch 49, batch 3500, loss[loss=0.2092, ctc_loss=0.0916, cr_loss=0.3212, attn_decoder_loss=0.2151, over 29348.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1075, cr_loss=0.3459, attn_decoder_loss=0.2364, over 5776004.65 frames. ], batch size: 71, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:53:43,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=882840.0, ans=10.0 +2024-09-20 11:53:57,861 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=882880.0, ans=0.125 +2024-09-20 11:54:05,396 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=882880.0, ans=0.09899494936611666 +2024-09-20 11:54:39,746 INFO [train.py:1198] (0/2) Epoch 49, batch 3550, loss[loss=0.2486, ctc_loss=0.1219, cr_loss=0.3952, attn_decoder_loss=0.2539, over 29702.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1072, cr_loss=0.3455, attn_decoder_loss=0.2364, over 5781917.16 frames. ], batch size: 89, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:54:41,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=883000.0, ans=0.0 +2024-09-20 11:54:42,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=883000.0, ans=0.125 +2024-09-20 11:54:47,103 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.606e+01 9.040e+01 9.689e+01 1.934e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-20 11:55:16,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=883080.0, ans=0.125 +2024-09-20 11:55:18,988 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=883080.0, ans=0.025 +2024-09-20 11:55:21,270 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.83 vs. limit=15.0 +2024-09-20 11:55:50,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=883160.0, ans=0.125 +2024-09-20 11:55:51,500 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:55:53,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=883160.0, ans=0.125 +2024-09-20 11:55:56,049 INFO [train.py:1198] (0/2) Epoch 49, batch 3600, loss[loss=0.2239, ctc_loss=0.09737, cr_loss=0.3262, attn_decoder_loss=0.2307, over 29494.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1072, cr_loss=0.3455, attn_decoder_loss=0.2366, over 5792115.95 frames. ], batch size: 77, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:55:59,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=883200.0, ans=0.0 +2024-09-20 11:55:59,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=883200.0, ans=0.125 +2024-09-20 11:56:26,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=883280.0, ans=0.1 +2024-09-20 11:56:32,541 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.81 vs. limit=22.5 +2024-09-20 11:56:52,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=883320.0, ans=0.1 +2024-09-20 11:56:52,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=883320.0, ans=0.125 +2024-09-20 11:57:03,139 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=883360.0, ans=0.0 +2024-09-20 11:57:03,699 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.24 vs. limit=22.5 +2024-09-20 11:57:07,610 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=883360.0, ans=0.0 +2024-09-20 11:57:10,213 INFO [train.py:1198] (0/2) Epoch 49, batch 3650, loss[loss=0.2399, ctc_loss=0.1196, cr_loss=0.3831, attn_decoder_loss=0.2448, over 29492.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1065, cr_loss=0.3443, attn_decoder_loss=0.2359, over 5794264.05 frames. ], batch size: 90, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:57:13,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=883400.0, ans=0.1 +2024-09-20 11:57:17,905 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=883400.0, ans=0.025 +2024-09-20 11:57:19,160 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.791e+01 8.757e+01 9.183e+01 9.714e+01 2.760e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-20 11:57:23,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=883440.0, ans=0.125 +2024-09-20 11:57:38,957 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=883480.0, ans=0.125 +2024-09-20 11:57:49,168 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:57:56,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=883520.0, ans=0.0 +2024-09-20 11:58:00,078 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.64 vs. limit=10.0 +2024-09-20 11:58:02,313 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=883520.0, ans=0.1 +2024-09-20 11:58:24,147 INFO [train.py:1198] (0/2) Epoch 49, batch 3700, loss[loss=0.2412, ctc_loss=0.1088, cr_loss=0.358, attn_decoder_loss=0.2479, over 29691.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3446, attn_decoder_loss=0.2361, over 5804307.68 frames. ], batch size: 84, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:58:31,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=883600.0, ans=0.2 +2024-09-20 11:58:44,034 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.83 vs. limit=15.0 +2024-09-20 11:58:55,381 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=883680.0, ans=0.125 +2024-09-20 11:59:35,798 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=883760.0, ans=0.125 +2024-09-20 11:59:38,359 INFO [train.py:1198] (0/2) Epoch 49, batch 3750, loss[loss=0.206, ctc_loss=0.09756, cr_loss=0.3345, attn_decoder_loss=0.2106, over 29311.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1065, cr_loss=0.3443, attn_decoder_loss=0.2359, over 5808094.56 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:59:47,374 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.555e+01 9.159e+01 9.903e+01 1.587e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-20 12:00:21,728 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=883920.0, ans=0.125 +2024-09-20 12:00:40,185 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.40 vs. limit=22.5 +2024-09-20 12:00:42,787 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=883960.0, ans=0.125 +2024-09-20 12:00:50,896 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.90 vs. limit=15.0 +2024-09-20 12:00:54,870 INFO [train.py:1198] (0/2) Epoch 49, batch 3800, loss[loss=0.2405, ctc_loss=0.1141, cr_loss=0.3476, attn_decoder_loss=0.2469, over 29637.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1064, cr_loss=0.3436, attn_decoder_loss=0.2356, over 5797707.13 frames. ], batch size: 86, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:00:55,215 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=884000.0, ans=0.2 +2024-09-20 12:01:09,189 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.18 vs. limit=15.0 +2024-09-20 12:01:36,780 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=884080.0, ans=0.125 +2024-09-20 12:01:48,609 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=884120.0, ans=0.0 +2024-09-20 12:01:53,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=884120.0, ans=0.0 +2024-09-20 12:01:58,069 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.37 vs. limit=22.5 +2024-09-20 12:02:06,447 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=884160.0, ans=0.125 +2024-09-20 12:02:10,425 INFO [train.py:1198] (0/2) Epoch 49, batch 3850, loss[loss=0.2426, ctc_loss=0.1167, cr_loss=0.366, attn_decoder_loss=0.2485, over 29260.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1062, cr_loss=0.3432, attn_decoder_loss=0.2356, over 5811759.11 frames. ], batch size: 100, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:02:18,341 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=884200.0, ans=0.125 +2024-09-20 12:02:18,348 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=884200.0, ans=0.0 +2024-09-20 12:02:19,394 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.454e+01 8.680e+01 9.125e+01 9.699e+01 1.289e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 12:02:24,518 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.96 vs. limit=22.5 +2024-09-20 12:02:26,059 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.37 vs. limit=15.0 +2024-09-20 12:02:38,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=884280.0, ans=0.125 +2024-09-20 12:02:47,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=884280.0, ans=0.0 +2024-09-20 12:02:49,947 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.25 vs. limit=12.0 +2024-09-20 12:03:24,867 INFO [train.py:1198] (0/2) Epoch 49, batch 3900, loss[loss=0.2422, ctc_loss=0.1111, cr_loss=0.3591, attn_decoder_loss=0.2488, over 29634.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3443, attn_decoder_loss=0.2361, over 5815944.97 frames. ], batch size: 86, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:03:26,729 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=884400.0, ans=0.2 +2024-09-20 12:03:32,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=884400.0, ans=0.125 +2024-09-20 12:03:32,482 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=884400.0, ans=0.07 +2024-09-20 12:04:14,100 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:04:23,399 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.30 vs. limit=22.5 +2024-09-20 12:04:30,106 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=884560.0, ans=10.0 +2024-09-20 12:04:38,693 INFO [train.py:1198] (0/2) Epoch 49, batch 3950, loss[loss=0.2481, ctc_loss=0.1234, cr_loss=0.3867, attn_decoder_loss=0.2534, over 29456.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1071, cr_loss=0.3449, attn_decoder_loss=0.2363, over 5835747.44 frames. ], batch size: 97, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:04:47,484 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.739e+01 9.137e+01 9.584e+01 1.763e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-20 12:04:47,738 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=884600.0, ans=0.1 +2024-09-20 12:04:53,530 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=884640.0, ans=0.0 +2024-09-20 12:04:53,668 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=884640.0, ans=0.125 +2024-09-20 12:05:25,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=884720.0, ans=0.09899494936611666 +2024-09-20 12:05:31,896 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=884720.0, ans=0.125 +2024-09-20 12:05:33,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=884720.0, ans=0.0 +2024-09-20 12:05:33,760 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.28 vs. limit=15.0 +2024-09-20 12:05:36,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=884760.0, ans=0.125 +2024-09-20 12:05:44,733 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=884760.0, ans=0.125 +2024-09-20 12:05:53,639 INFO [train.py:1198] (0/2) Epoch 49, batch 4000, loss[loss=0.2117, ctc_loss=0.09292, cr_loss=0.3133, attn_decoder_loss=0.2179, over 29508.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3447, attn_decoder_loss=0.2365, over 5813399.78 frames. ], batch size: 74, lr: 2.23e-03, grad_scale: 32.0 +2024-09-20 12:05:58,788 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.86 vs. limit=15.0 +2024-09-20 12:06:01,043 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_na.min_abs, batch_count=884800.0, ans=0.02 +2024-09-20 12:06:06,348 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=4.95 vs. limit=15.0 +2024-09-20 12:06:17,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=884840.0, ans=0.2 +2024-09-20 12:06:23,293 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=884880.0, ans=0.125 +2024-09-20 12:06:48,504 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=884920.0, ans=0.2 +2024-09-20 12:07:08,775 INFO [train.py:1198] (0/2) Epoch 49, batch 4050, loss[loss=0.2552, ctc_loss=0.1355, cr_loss=0.358, attn_decoder_loss=0.2605, over 20012.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1069, cr_loss=0.3437, attn_decoder_loss=0.236, over 5796404.90 frames. ], batch size: 210, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:07:18,926 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.846e+01 8.931e+01 9.287e+01 9.798e+01 1.744e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-20 12:07:20,687 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=885000.0, ans=0.125 +2024-09-20 12:07:20,985 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.98 vs. limit=15.0 +2024-09-20 12:07:36,847 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=885080.0, ans=0.1 +2024-09-20 12:07:52,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=885120.0, ans=0.2 +2024-09-20 12:08:04,710 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=885120.0, ans=0.1 +2024-09-20 12:08:07,837 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:08:22,202 INFO [train.py:1198] (0/2) Epoch 49, batch 4100, loss[loss=0.2505, ctc_loss=0.1302, cr_loss=0.4, attn_decoder_loss=0.255, over 29497.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3444, attn_decoder_loss=0.2364, over 5791806.99 frames. ], batch size: 90, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:08:50,543 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=885280.0, ans=0.0 +2024-09-20 12:09:12,475 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=885320.0, ans=0.2 +2024-09-20 12:09:35,737 INFO [train.py:1198] (0/2) Epoch 49, batch 4150, loss[loss=0.2295, ctc_loss=0.1096, cr_loss=0.3377, attn_decoder_loss=0.2354, over 29495.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.107, cr_loss=0.3448, attn_decoder_loss=0.2361, over 5797329.50 frames. ], batch size: 77, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:09:46,203 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.769e+01 9.382e+01 9.981e+01 1.562e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-20 12:09:56,417 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=885440.0, ans=0.125 +2024-09-20 12:10:49,012 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=885560.0, ans=0.0 +2024-09-20 12:10:52,077 INFO [train.py:1198] (0/2) Epoch 49, batch 4200, loss[loss=0.2471, ctc_loss=0.1191, cr_loss=0.3607, attn_decoder_loss=0.2533, over 29501.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3449, attn_decoder_loss=0.2363, over 5799002.89 frames. ], batch size: 90, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:11:16,151 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=885640.0, ans=0.125 +2024-09-20 12:11:37,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.64 vs. limit=15.0 +2024-09-20 12:12:01,748 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.50 vs. limit=15.0 +2024-09-20 12:12:05,523 INFO [train.py:1198] (0/2) Epoch 49, batch 4250, loss[loss=0.213, ctc_loss=0.09217, cr_loss=0.3179, attn_decoder_loss=0.2194, over 29517.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3447, attn_decoder_loss=0.2364, over 5804671.14 frames. ], batch size: 74, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:12:15,831 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.756e+01 9.233e+01 9.751e+01 2.001e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 12:12:26,911 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.43 vs. limit=22.5 +2024-09-20 12:12:40,948 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=885880.0, ans=0.1 +2024-09-20 12:12:56,138 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=15.26 vs. limit=15.0 +2024-09-20 12:13:11,995 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=885960.0, ans=0.125 +2024-09-20 12:13:19,048 INFO [train.py:1198] (0/2) Epoch 49, batch 4300, loss[loss=0.2377, ctc_loss=0.1083, cr_loss=0.3455, attn_decoder_loss=0.2444, over 29525.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1068, cr_loss=0.3441, attn_decoder_loss=0.2365, over 5794049.94 frames. ], batch size: 87, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:13:36,899 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=886040.0, ans=0.125 +2024-09-20 12:13:37,719 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.10 vs. limit=15.0 +2024-09-20 12:13:47,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=886040.0, ans=0.125 +2024-09-20 12:13:47,600 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.19 vs. limit=22.5 +2024-09-20 12:14:31,535 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.27 vs. limit=22.5 +2024-09-20 12:14:34,869 INFO [train.py:1198] (0/2) Epoch 49, batch 4350, loss[loss=0.2496, ctc_loss=0.1269, cr_loss=0.3883, attn_decoder_loss=0.2546, over 29538.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1091, cr_loss=0.3494, attn_decoder_loss=0.2395, over 5797077.62 frames. ], batch size: 97, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:14:38,236 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:14:45,117 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 9.096e+01 9.498e+01 1.000e+02 1.959e+02, threshold=1.900e+02, percent-clipped=0.0 +2024-09-20 12:14:45,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=886200.0, ans=0.125 +2024-09-20 12:14:51,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=886240.0, ans=0.125 +2024-09-20 12:15:05,324 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.50 vs. limit=22.5 +2024-09-20 12:15:08,702 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=886280.0, ans=0.015 +2024-09-20 12:15:11,659 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=886280.0, ans=0.0 +2024-09-20 12:15:24,699 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=886320.0, ans=0.0 +2024-09-20 12:15:24,822 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=886320.0, ans=0.125 +2024-09-20 12:15:42,289 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=886360.0, ans=0.125 +2024-09-20 12:15:48,110 INFO [train.py:1198] (0/2) Epoch 49, batch 4400, loss[loss=0.2426, ctc_loss=0.1117, cr_loss=0.3585, attn_decoder_loss=0.2492, over 27385.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1104, cr_loss=0.3521, attn_decoder_loss=0.2414, over 5768956.01 frames. ], batch size: 124, lr: 2.23e-03, grad_scale: 32.0 +2024-09-20 12:16:12,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=886440.0, ans=0.0 +2024-09-20 12:16:18,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=886480.0, ans=0.1 +2024-09-20 12:16:25,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=886480.0, ans=0.125 +2024-09-20 12:16:39,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=886520.0, ans=0.035 +2024-09-20 12:16:44,807 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.71 vs. limit=15.0 +2024-09-20 12:16:51,304 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=886560.0, ans=0.125 +2024-09-20 12:17:00,343 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=886560.0, ans=0.0 +2024-09-20 12:17:03,039 INFO [train.py:1198] (0/2) Epoch 49, batch 4450, loss[loss=0.2504, ctc_loss=0.13, cr_loss=0.3695, attn_decoder_loss=0.2556, over 19549.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1134, cr_loss=0.3566, attn_decoder_loss=0.2433, over 5581764.67 frames. ], batch size: 209, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:17:04,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=886600.0, ans=0.1 +2024-09-20 12:17:16,287 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.166e+01 9.203e+01 9.654e+01 1.067e+02 3.742e+02, threshold=1.931e+02, percent-clipped=2.0 +2024-09-20 12:17:26,278 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.37 vs. limit=22.5 +2024-09-20 12:17:29,409 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.34 vs. limit=15.0 +2024-09-20 12:17:48,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=886720.0, ans=0.125 +2024-09-20 12:18:08,343 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=17.02 vs. limit=15.0 +2024-09-20 12:18:10,826 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=886760.0, ans=0.125 +2024-09-20 12:18:18,101 INFO [train.py:1198] (0/2) Epoch 49, batch 4500, loss[loss=0.2459, ctc_loss=0.1381, cr_loss=0.3762, attn_decoder_loss=0.2496, over 20530.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1162, cr_loss=0.3597, attn_decoder_loss=0.245, over 5243866.58 frames. ], batch size: 209, lr: 2.23e-03, grad_scale: 8.0 +2024-09-20 12:18:36,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=886840.0, ans=0.125 +2024-09-20 12:18:54,879 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-49.pt +2024-09-20 12:19:45,526 INFO [train.py:1198] (0/2) Epoch 50, batch 0, loss[loss=0.2136, ctc_loss=0.09389, cr_loss=0.3176, attn_decoder_loss=0.2198, over 29592.00 frames. ], tot_loss[loss=0.2136, ctc_loss=0.09389, cr_loss=0.3176, attn_decoder_loss=0.2198, over 29592.00 frames. ], batch size: 73, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:19:45,526 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 12:20:03,817 INFO [train.py:1230] (0/2) Epoch 50, validation: loss=0.2133, ctc_loss=0.03558, cr_loss=6.519e-15, attn_decoder_loss=0.2331, over 944034.00 frames. +2024-09-20 12:20:03,818 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 12:20:15,075 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=12.0 +2024-09-20 12:20:40,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=886980.0, ans=0.125 +2024-09-20 12:20:50,785 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.79 vs. limit=15.0 +2024-09-20 12:20:51,283 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=887020.0, ans=0.125 +2024-09-20 12:20:57,069 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.187e+01 1.009e+02 1.098e+02 1.200e+02 1.487e+02, threshold=2.197e+02, percent-clipped=0.0 +2024-09-20 12:21:21,355 INFO [train.py:1198] (0/2) Epoch 50, batch 50, loss[loss=0.1985, ctc_loss=0.08559, cr_loss=0.291, attn_decoder_loss=0.2045, over 29413.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1092, cr_loss=0.3487, attn_decoder_loss=0.2376, over 1267418.53 frames. ], batch size: 70, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:21:29,943 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-20 12:21:51,928 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=887180.0, ans=0.05 +2024-09-20 12:21:57,101 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.70 vs. limit=22.5 +2024-09-20 12:22:02,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=887180.0, ans=0.125 +2024-09-20 12:22:02,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=887180.0, ans=0.125 +2024-09-20 12:22:04,204 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=887180.0, ans=0.0 +2024-09-20 12:22:13,347 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=887220.0, ans=0.125 +2024-09-20 12:22:18,757 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.89 vs. limit=12.0 +2024-09-20 12:22:20,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=887220.0, ans=15.0 +2024-09-20 12:22:22,469 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=887260.0, ans=0.125 +2024-09-20 12:22:23,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=887260.0, ans=0.125 +2024-09-20 12:22:33,082 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=887260.0, ans=0.125 +2024-09-20 12:22:37,261 INFO [train.py:1198] (0/2) Epoch 50, batch 100, loss[loss=0.2149, ctc_loss=0.0939, cr_loss=0.3158, attn_decoder_loss=0.2213, over 29543.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1099, cr_loss=0.3513, attn_decoder_loss=0.2392, over 2253526.90 frames. ], batch size: 76, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:22:37,480 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=887300.0, ans=0.125 +2024-09-20 12:23:29,880 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.360e+01 8.808e+01 9.273e+01 9.833e+01 1.804e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 12:23:33,411 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=887420.0, ans=0.1 +2024-09-20 12:23:36,316 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=887420.0, ans=0.0 +2024-09-20 12:23:53,825 INFO [train.py:1198] (0/2) Epoch 50, batch 150, loss[loss=0.2066, ctc_loss=0.08658, cr_loss=0.2946, attn_decoder_loss=0.2134, over 29439.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1074, cr_loss=0.3465, attn_decoder_loss=0.2368, over 3048310.59 frames. ], batch size: 70, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:24:07,797 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=887540.0, ans=0.125 +2024-09-20 12:24:29,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=887580.0, ans=0.025 +2024-09-20 12:24:41,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=887620.0, ans=0.0 +2024-09-20 12:24:42,006 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=887620.0, ans=0.1 +2024-09-20 12:24:44,996 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=887620.0, ans=0.125 +2024-09-20 12:24:50,970 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=887620.0, ans=0.0 +2024-09-20 12:25:02,940 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=887660.0, ans=0.125 +2024-09-20 12:25:11,629 INFO [train.py:1198] (0/2) Epoch 50, batch 200, loss[loss=0.2393, ctc_loss=0.1096, cr_loss=0.3616, attn_decoder_loss=0.2457, over 27358.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1069, cr_loss=0.3454, attn_decoder_loss=0.2359, over 3660121.18 frames. ], batch size: 124, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:25:19,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=887700.0, ans=0.125 +2024-09-20 12:25:19,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=887700.0, ans=10.0 +2024-09-20 12:25:25,592 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=887740.0, ans=0.125 +2024-09-20 12:25:27,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=887740.0, ans=0.1 +2024-09-20 12:25:27,025 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=887740.0, ans=0.125 +2024-09-20 12:25:35,166 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.08 vs. limit=22.5 +2024-09-20 12:26:04,372 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 6.975e+01 8.480e+01 9.009e+01 9.638e+01 2.120e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 12:26:26,848 INFO [train.py:1198] (0/2) Epoch 50, batch 250, loss[loss=0.2331, ctc_loss=0.1026, cr_loss=0.3333, attn_decoder_loss=0.2402, over 29195.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1066, cr_loss=0.3445, attn_decoder_loss=0.2357, over 4141757.61 frames. ], batch size: 100, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:26:34,942 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=887900.0, ans=0.125 +2024-09-20 12:26:50,560 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=887940.0, ans=0.0 +2024-09-20 12:26:50,676 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:27:32,516 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.20 vs. limit=10.0 +2024-09-20 12:27:43,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=888100.0, ans=0.0 +2024-09-20 12:27:45,106 INFO [train.py:1198] (0/2) Epoch 50, batch 300, loss[loss=0.2431, ctc_loss=0.1135, cr_loss=0.3725, attn_decoder_loss=0.2492, over 29549.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1063, cr_loss=0.3441, attn_decoder_loss=0.2357, over 4509386.68 frames. ], batch size: 92, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:28:25,479 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=888180.0, ans=0.125 +2024-09-20 12:28:40,139 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 8.884e+01 9.251e+01 9.818e+01 2.212e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-20 12:28:40,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=888220.0, ans=0.125 +2024-09-20 12:28:40,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=888220.0, ans=0.07 +2024-09-20 12:29:02,961 INFO [train.py:1198] (0/2) Epoch 50, batch 350, loss[loss=0.2049, ctc_loss=0.08557, cr_loss=0.2965, attn_decoder_loss=0.2116, over 29328.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1067, cr_loss=0.3449, attn_decoder_loss=0.2362, over 4794483.32 frames. ], batch size: 71, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:29:58,804 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=888420.0, ans=0.125 +2024-09-20 12:30:17,910 INFO [train.py:1198] (0/2) Epoch 50, batch 400, loss[loss=0.2314, ctc_loss=0.1067, cr_loss=0.3577, attn_decoder_loss=0.2373, over 29712.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1061, cr_loss=0.3437, attn_decoder_loss=0.2358, over 5025051.33 frames. ], batch size: 82, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:30:21,408 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=888500.0, ans=0.1 +2024-09-20 12:30:35,152 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=888540.0, ans=0.125 +2024-09-20 12:30:36,553 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=888540.0, ans=0.0 +2024-09-20 12:30:43,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=888540.0, ans=0.125 +2024-09-20 12:31:04,501 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=888620.0, ans=0.1 +2024-09-20 12:31:11,994 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=888620.0, ans=0.0 +2024-09-20 12:31:14,780 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.622e+01 9.023e+01 9.604e+01 1.265e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-20 12:31:35,882 INFO [train.py:1198] (0/2) Epoch 50, batch 450, loss[loss=0.2442, ctc_loss=0.1196, cr_loss=0.3621, attn_decoder_loss=0.25, over 29684.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1067, cr_loss=0.3444, attn_decoder_loss=0.2361, over 5188054.26 frames. ], batch size: 83, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:31:36,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=888700.0, ans=0.1 +2024-09-20 12:32:15,177 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=888780.0, ans=0.0 +2024-09-20 12:32:31,782 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=888820.0, ans=0.125 +2024-09-20 12:32:33,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=888820.0, ans=0.125 +2024-09-20 12:32:45,366 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=888860.0, ans=0.2 +2024-09-20 12:32:51,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=888860.0, ans=0.1 +2024-09-20 12:32:54,119 INFO [train.py:1198] (0/2) Epoch 50, batch 500, loss[loss=0.2391, ctc_loss=0.1108, cr_loss=0.3576, attn_decoder_loss=0.2454, over 29454.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1064, cr_loss=0.3434, attn_decoder_loss=0.2354, over 5330500.17 frames. ], batch size: 94, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:32:55,917 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=888900.0, ans=0.0 +2024-09-20 12:33:11,505 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.47 vs. limit=15.0 +2024-09-20 12:33:21,369 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=888940.0, ans=0.0 +2024-09-20 12:33:36,583 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=888980.0, ans=0.125 +2024-09-20 12:33:48,196 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.714e+01 9.199e+01 9.608e+01 6.151e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-20 12:34:09,225 INFO [train.py:1198] (0/2) Epoch 50, batch 550, loss[loss=0.2401, ctc_loss=0.1088, cr_loss=0.3283, attn_decoder_loss=0.2474, over 28801.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1064, cr_loss=0.3437, attn_decoder_loss=0.2357, over 5423083.85 frames. ], batch size: 104, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:34:17,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=889100.0, ans=0.125 +2024-09-20 12:34:20,256 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=889100.0, ans=0.0 +2024-09-20 12:34:42,290 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=889180.0, ans=0.0 +2024-09-20 12:34:43,717 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=889180.0, ans=0.125 +2024-09-20 12:34:52,625 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=889180.0, ans=0.2 +2024-09-20 12:35:19,977 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=889260.0, ans=0.125 +2024-09-20 12:35:27,190 INFO [train.py:1198] (0/2) Epoch 50, batch 600, loss[loss=0.2503, ctc_loss=0.1228, cr_loss=0.3779, attn_decoder_loss=0.2561, over 29242.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1066, cr_loss=0.3447, attn_decoder_loss=0.236, over 5508142.50 frames. ], batch size: 100, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:35:39,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=889300.0, ans=0.07 +2024-09-20 12:35:48,623 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=889340.0, ans=0.025 +2024-09-20 12:36:10,331 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_ff3.min_abs, batch_count=889380.0, ans=0.2 +2024-09-20 12:36:23,308 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.988e+01 8.691e+01 9.098e+01 9.563e+01 1.951e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-20 12:36:23,696 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=889420.0, ans=0.2 +2024-09-20 12:36:25,236 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=889420.0, ans=0.125 +2024-09-20 12:36:44,428 INFO [train.py:1198] (0/2) Epoch 50, batch 650, loss[loss=0.2289, ctc_loss=0.1022, cr_loss=0.335, attn_decoder_loss=0.2356, over 29762.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.106, cr_loss=0.3433, attn_decoder_loss=0.2353, over 5586809.37 frames. ], batch size: 81, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:37:02,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=889540.0, ans=0.2 +2024-09-20 12:37:59,039 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=889700.0, ans=0.1 +2024-09-20 12:38:00,257 INFO [train.py:1198] (0/2) Epoch 50, batch 700, loss[loss=0.2291, ctc_loss=0.1129, cr_loss=0.3417, attn_decoder_loss=0.2345, over 29542.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1063, cr_loss=0.3439, attn_decoder_loss=0.2359, over 5636366.11 frames. ], batch size: 76, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:38:09,562 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:38:28,123 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.06 vs. limit=15.0 +2024-09-20 12:38:43,980 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-20 12:38:54,486 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.77 vs. limit=15.0 +2024-09-20 12:38:56,327 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=889820.0, ans=22.5 +2024-09-20 12:38:56,770 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.632e+01 8.634e+01 9.067e+01 9.623e+01 1.303e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-20 12:38:58,651 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=889820.0, ans=0.125 +2024-09-20 12:39:17,925 INFO [train.py:1198] (0/2) Epoch 50, batch 750, loss[loss=0.2302, ctc_loss=0.1107, cr_loss=0.362, attn_decoder_loss=0.2354, over 29722.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1061, cr_loss=0.3437, attn_decoder_loss=0.2355, over 5674027.81 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:39:19,707 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=889900.0, ans=0.0 +2024-09-20 12:39:30,221 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=889900.0, ans=0.025 +2024-09-20 12:39:31,773 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=889940.0, ans=0.125 +2024-09-20 12:39:33,262 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=889940.0, ans=0.125 +2024-09-20 12:40:07,157 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=890020.0, ans=0.015 +2024-09-20 12:40:16,384 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=890020.0, ans=0.0 +2024-09-20 12:40:23,702 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:40:29,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=890060.0, ans=0.1 +2024-09-20 12:40:35,475 INFO [train.py:1198] (0/2) Epoch 50, batch 800, loss[loss=0.2116, ctc_loss=0.09415, cr_loss=0.3078, attn_decoder_loss=0.2178, over 29640.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1065, cr_loss=0.3444, attn_decoder_loss=0.2359, over 5706395.14 frames. ], batch size: 73, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:40:43,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=890100.0, ans=0.0 +2024-09-20 12:41:13,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=890180.0, ans=0.125 +2024-09-20 12:41:14,950 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=890180.0, ans=0.0 +2024-09-20 12:41:20,784 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=890220.0, ans=0.2 +2024-09-20 12:41:23,054 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.04 vs. limit=15.0 +2024-09-20 12:41:29,705 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.785e+01 9.269e+01 9.766e+01 2.898e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-20 12:41:46,559 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=890260.0, ans=0.125 +2024-09-20 12:41:50,577 INFO [train.py:1198] (0/2) Epoch 50, batch 850, loss[loss=0.2388, ctc_loss=0.108, cr_loss=0.3564, attn_decoder_loss=0.2454, over 29749.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1061, cr_loss=0.3433, attn_decoder_loss=0.2354, over 5734323.32 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:41:58,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=890300.0, ans=0.1 +2024-09-20 12:42:14,720 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=890340.0, ans=0.5 +2024-09-20 12:42:17,851 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=890340.0, ans=0.125 +2024-09-20 12:42:27,385 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=890380.0, ans=0.1 +2024-09-20 12:42:34,943 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=890420.0, ans=0.1 +2024-09-20 12:42:46,147 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=890420.0, ans=0.07 +2024-09-20 12:43:05,954 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=890460.0, ans=0.125 +2024-09-20 12:43:08,696 INFO [train.py:1198] (0/2) Epoch 50, batch 900, loss[loss=0.2073, ctc_loss=0.08363, cr_loss=0.2979, attn_decoder_loss=0.2144, over 29621.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1064, cr_loss=0.3439, attn_decoder_loss=0.2358, over 5740005.92 frames. ], batch size: 73, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:43:25,582 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:43:45,084 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=890580.0, ans=0.95 +2024-09-20 12:44:05,274 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=890620.0, ans=0.125 +2024-09-20 12:44:06,530 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.704e+01 9.222e+01 9.610e+01 2.090e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-20 12:44:11,385 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:44:25,910 INFO [train.py:1198] (0/2) Epoch 50, batch 950, loss[loss=0.2146, ctc_loss=0.0931, cr_loss=0.3242, attn_decoder_loss=0.2209, over 29516.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1063, cr_loss=0.3437, attn_decoder_loss=0.236, over 5743418.77 frames. ], batch size: 74, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:44:29,285 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=890700.0, ans=0.1 +2024-09-20 12:44:32,178 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=890700.0, ans=0.125 +2024-09-20 12:45:04,068 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=890780.0, ans=0.0 +2024-09-20 12:45:08,630 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=890780.0, ans=0.125 +2024-09-20 12:45:30,947 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=890860.0, ans=0.05 +2024-09-20 12:45:39,882 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=890900.0, ans=0.125 +2024-09-20 12:45:41,064 INFO [train.py:1198] (0/2) Epoch 50, batch 1000, loss[loss=0.2195, ctc_loss=0.09449, cr_loss=0.3107, attn_decoder_loss=0.2264, over 29517.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1069, cr_loss=0.3446, attn_decoder_loss=0.2364, over 5737991.09 frames. ], batch size: 77, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:46:11,611 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=890980.0, ans=0.125 +2024-09-20 12:46:13,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=890980.0, ans=0.025 +2024-09-20 12:46:21,976 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=890980.0, ans=0.125 +2024-09-20 12:46:32,323 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.08 vs. limit=15.0 +2024-09-20 12:46:38,847 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.665e+01 9.186e+01 9.812e+01 1.609e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-20 12:46:48,037 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=891060.0, ans=0.1 +2024-09-20 12:46:56,369 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=12.0 +2024-09-20 12:46:58,380 INFO [train.py:1198] (0/2) Epoch 50, batch 1050, loss[loss=0.2325, ctc_loss=0.1053, cr_loss=0.3318, attn_decoder_loss=0.2393, over 29662.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1063, cr_loss=0.3431, attn_decoder_loss=0.2357, over 5745560.16 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:47:06,288 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_na.min_abs, batch_count=891100.0, ans=0.02 +2024-09-20 12:47:31,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=891180.0, ans=0.125 +2024-09-20 12:47:52,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=891220.0, ans=0.2 +2024-09-20 12:48:09,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=891260.0, ans=0.125 +2024-09-20 12:48:16,840 INFO [train.py:1198] (0/2) Epoch 50, batch 1100, loss[loss=0.2313, ctc_loss=0.1063, cr_loss=0.3544, attn_decoder_loss=0.2373, over 29411.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1061, cr_loss=0.3427, attn_decoder_loss=0.2354, over 5756472.67 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:48:52,003 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=891380.0, ans=0.125 +2024-09-20 12:48:53,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=891380.0, ans=0.1 +2024-09-20 12:49:02,600 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=891420.0, ans=0.0 +2024-09-20 12:49:12,783 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.273e+01 8.673e+01 9.174e+01 9.700e+01 1.224e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 12:49:32,504 INFO [train.py:1198] (0/2) Epoch 50, batch 1150, loss[loss=0.2271, ctc_loss=0.1079, cr_loss=0.3499, attn_decoder_loss=0.2326, over 29459.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1064, cr_loss=0.3432, attn_decoder_loss=0.2356, over 5753371.39 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:49:38,999 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=891500.0, ans=0.1 +2024-09-20 12:49:50,858 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=891540.0, ans=0.125 +2024-09-20 12:49:51,722 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.00 vs. limit=15.0 +2024-09-20 12:50:37,124 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:50:42,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=891660.0, ans=0.2 +2024-09-20 12:50:49,629 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.84 vs. limit=15.0 +2024-09-20 12:50:50,190 INFO [train.py:1198] (0/2) Epoch 50, batch 1200, loss[loss=0.2226, ctc_loss=0.09905, cr_loss=0.3275, attn_decoder_loss=0.2291, over 29702.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1069, cr_loss=0.3444, attn_decoder_loss=0.2363, over 5745543.74 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:51:10,508 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=891740.0, ans=0.04949747468305833 +2024-09-20 12:51:48,362 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.827e+01 9.334e+01 1.012e+02 2.490e+02, threshold=1.867e+02, percent-clipped=1.0 +2024-09-20 12:52:07,778 INFO [train.py:1198] (0/2) Epoch 50, batch 1250, loss[loss=0.2376, ctc_loss=0.1067, cr_loss=0.3415, attn_decoder_loss=0.2446, over 29515.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1073, cr_loss=0.3455, attn_decoder_loss=0.2368, over 5773771.06 frames. ], batch size: 92, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:52:34,506 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=891940.0, ans=15.0 +2024-09-20 12:53:06,354 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=892020.0, ans=0.0 +2024-09-20 12:53:21,377 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=892060.0, ans=0.0 +2024-09-20 12:53:24,061 INFO [train.py:1198] (0/2) Epoch 50, batch 1300, loss[loss=0.2424, ctc_loss=0.1032, cr_loss=0.3353, attn_decoder_loss=0.2504, over 28389.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3452, attn_decoder_loss=0.2365, over 5779069.98 frames. ], batch size: 111, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:53:40,913 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=892140.0, ans=0.025 +2024-09-20 12:54:02,199 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=892180.0, ans=0.125 +2024-09-20 12:54:02,219 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=892180.0, ans=0.0 +2024-09-20 12:54:18,646 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=892220.0, ans=0.07 +2024-09-20 12:54:19,788 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.576e+01 8.998e+01 9.559e+01 1.394e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 12:54:28,424 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=892260.0, ans=0.125 +2024-09-20 12:54:41,684 INFO [train.py:1198] (0/2) Epoch 50, batch 1350, loss[loss=0.2352, ctc_loss=0.1143, cr_loss=0.367, attn_decoder_loss=0.2405, over 29762.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1067, cr_loss=0.3444, attn_decoder_loss=0.2363, over 5795597.22 frames. ], batch size: 81, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:54:49,368 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:54:52,403 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=892300.0, ans=0.0 +2024-09-20 12:55:00,611 INFO [scaling.py:1024] (0/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.46 vs. limit=5.0 +2024-09-20 12:55:07,164 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=892340.0, ans=0.125 +2024-09-20 12:55:12,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=892380.0, ans=0.0 +2024-09-20 12:55:35,927 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=892420.0, ans=0.125 +2024-09-20 12:55:47,951 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=892460.0, ans=0.125 +2024-09-20 12:55:58,268 INFO [train.py:1198] (0/2) Epoch 50, batch 1400, loss[loss=0.212, ctc_loss=0.09596, cr_loss=0.314, attn_decoder_loss=0.2179, over 29574.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1064, cr_loss=0.344, attn_decoder_loss=0.2361, over 5806865.35 frames. ], batch size: 69, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:56:24,647 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-20 12:56:40,566 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=892580.0, ans=0.025 +2024-09-20 12:56:48,028 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=892620.0, ans=0.0 +2024-09-20 12:56:53,531 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.789e+01 8.988e+01 9.426e+01 9.888e+01 1.632e+02, threshold=1.885e+02, percent-clipped=0.0 +2024-09-20 12:56:55,398 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=892620.0, ans=0.125 +2024-09-20 12:56:56,026 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.60 vs. limit=15.0 +2024-09-20 12:56:58,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=892660.0, ans=0.125 +2024-09-20 12:57:13,308 INFO [train.py:1198] (0/2) Epoch 50, batch 1450, loss[loss=0.2485, ctc_loss=0.1268, cr_loss=0.3835, attn_decoder_loss=0.2535, over 29420.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1065, cr_loss=0.3441, attn_decoder_loss=0.2366, over 5803597.61 frames. ], batch size: 94, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:57:34,451 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:57:34,871 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.54 vs. limit=22.5 +2024-09-20 12:57:38,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=892740.0, ans=0.0 +2024-09-20 12:57:46,527 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=892780.0, ans=0.125 +2024-09-20 12:58:30,655 INFO [train.py:1198] (0/2) Epoch 50, batch 1500, loss[loss=0.231, ctc_loss=0.09964, cr_loss=0.3205, attn_decoder_loss=0.2385, over 29617.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1065, cr_loss=0.3438, attn_decoder_loss=0.2367, over 5804988.70 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:58:56,534 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=892940.0, ans=0.0 +2024-09-20 12:58:59,558 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=892980.0, ans=0.125 +2024-09-20 12:59:28,764 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.732e+01 9.209e+01 9.748e+01 2.356e+02, threshold=1.842e+02, percent-clipped=2.0 +2024-09-20 12:59:29,053 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=893020.0, ans=0.125 +2024-09-20 12:59:48,133 INFO [train.py:1198] (0/2) Epoch 50, batch 1550, loss[loss=0.2533, ctc_loss=0.1272, cr_loss=0.3946, attn_decoder_loss=0.2586, over 29542.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1069, cr_loss=0.3446, attn_decoder_loss=0.2368, over 5780405.86 frames. ], batch size: 90, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:00:18,214 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=893180.0, ans=0.95 +2024-09-20 13:00:30,067 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=893180.0, ans=0.125 +2024-09-20 13:00:58,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=893260.0, ans=0.2 +2024-09-20 13:01:02,175 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.36 vs. limit=22.5 +2024-09-20 13:01:02,885 INFO [train.py:1198] (0/2) Epoch 50, batch 1600, loss[loss=0.2453, ctc_loss=0.122, cr_loss=0.3814, attn_decoder_loss=0.2505, over 29666.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.107, cr_loss=0.345, attn_decoder_loss=0.2364, over 5762558.53 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 32.0 +2024-09-20 13:01:20,588 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.81 vs. limit=22.5 +2024-09-20 13:01:35,715 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.09 vs. limit=22.5 +2024-09-20 13:01:36,693 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=893380.0, ans=0.125 +2024-09-20 13:01:36,726 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=893380.0, ans=0.0 +2024-09-20 13:01:41,079 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=893380.0, ans=0.0 +2024-09-20 13:01:48,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=893420.0, ans=0.125 +2024-09-20 13:02:00,397 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.886e+01 8.710e+01 9.146e+01 9.958e+01 1.437e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-20 13:02:06,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=893460.0, ans=0.2 +2024-09-20 13:02:16,415 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=893460.0, ans=0.125 +2024-09-20 13:02:17,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=893460.0, ans=0.0 +2024-09-20 13:02:18,443 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.44 vs. limit=15.0 +2024-09-20 13:02:20,546 INFO [train.py:1198] (0/2) Epoch 50, batch 1650, loss[loss=0.2368, ctc_loss=0.1105, cr_loss=0.3603, attn_decoder_loss=0.2428, over 29705.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1068, cr_loss=0.3445, attn_decoder_loss=0.2363, over 5757343.48 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:02:28,535 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=893500.0, ans=0.125 +2024-09-20 13:02:29,921 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=893500.0, ans=0.125 +2024-09-20 13:02:34,298 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=893540.0, ans=0.1 +2024-09-20 13:02:37,937 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.68 vs. limit=12.0 +2024-09-20 13:02:43,773 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-20 13:02:59,478 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=893580.0, ans=0.125 +2024-09-20 13:03:03,924 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=893580.0, ans=0.2 +2024-09-20 13:03:32,233 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=893660.0, ans=0.0 +2024-09-20 13:03:37,898 INFO [train.py:1198] (0/2) Epoch 50, batch 1700, loss[loss=0.198, ctc_loss=0.08267, cr_loss=0.2941, attn_decoder_loss=0.2042, over 29567.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1064, cr_loss=0.3439, attn_decoder_loss=0.2361, over 5779734.91 frames. ], batch size: 69, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:03:38,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=893700.0, ans=0.025 +2024-09-20 13:03:57,854 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_positive, batch_count=893740.0, ans=0.05 +2024-09-20 13:04:09,801 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=893780.0, ans=0.025 +2024-09-20 13:04:17,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=893780.0, ans=0.125 +2024-09-20 13:04:21,975 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=893820.0, ans=0.2 +2024-09-20 13:04:29,429 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=893820.0, ans=0.1 +2024-09-20 13:04:35,201 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.662e+01 9.260e+01 9.838e+01 1.206e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-20 13:04:38,556 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=893860.0, ans=0.2 +2024-09-20 13:04:53,291 INFO [train.py:1198] (0/2) Epoch 50, batch 1750, loss[loss=0.212, ctc_loss=0.1023, cr_loss=0.3357, attn_decoder_loss=0.2167, over 29384.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1061, cr_loss=0.3438, attn_decoder_loss=0.2357, over 5787766.13 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:05:02,593 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=893900.0, ans=0.1 +2024-09-20 13:05:10,110 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=893940.0, ans=0.125 +2024-09-20 13:05:20,841 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=893940.0, ans=0.2 +2024-09-20 13:05:55,343 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:05:58,130 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=894060.0, ans=0.1 +2024-09-20 13:06:04,226 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=894060.0, ans=0.125 +2024-09-20 13:06:08,369 INFO [train.py:1198] (0/2) Epoch 50, batch 1800, loss[loss=0.2331, ctc_loss=0.1071, cr_loss=0.3364, attn_decoder_loss=0.2397, over 29693.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1067, cr_loss=0.3442, attn_decoder_loss=0.2363, over 5790559.24 frames. ], batch size: 83, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:06:18,507 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=894100.0, ans=0.125 +2024-09-20 13:06:23,005 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=894100.0, ans=0.125 +2024-09-20 13:06:23,613 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.23 vs. limit=15.0 +2024-09-20 13:06:28,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=894140.0, ans=0.0 +2024-09-20 13:06:36,960 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.38 vs. limit=6.0 +2024-09-20 13:06:43,956 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=894180.0, ans=0.0 +2024-09-20 13:07:10,022 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.815e+01 8.701e+01 9.171e+01 9.771e+01 2.069e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-20 13:07:16,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=894260.0, ans=0.125 +2024-09-20 13:07:20,862 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=894260.0, ans=0.025 +2024-09-20 13:07:23,884 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=894260.0, ans=0.2 +2024-09-20 13:07:28,093 INFO [train.py:1198] (0/2) Epoch 50, batch 1850, loss[loss=0.2441, ctc_loss=0.1092, cr_loss=0.3476, attn_decoder_loss=0.2513, over 29614.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1062, cr_loss=0.3433, attn_decoder_loss=0.2359, over 5796216.29 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:07:49,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=894340.0, ans=0.1 +2024-09-20 13:07:52,529 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=894340.0, ans=10.0 +2024-09-20 13:07:54,187 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=894340.0, ans=0.125 +2024-09-20 13:08:27,734 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.25 vs. limit=22.5 +2024-09-20 13:08:34,830 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=894460.0, ans=0.125 +2024-09-20 13:08:43,298 INFO [train.py:1198] (0/2) Epoch 50, batch 1900, loss[loss=0.2478, ctc_loss=0.119, cr_loss=0.3899, attn_decoder_loss=0.2535, over 29694.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1066, cr_loss=0.3448, attn_decoder_loss=0.2365, over 5805273.08 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:08:51,243 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=894500.0, ans=0.125 +2024-09-20 13:09:06,730 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.43 vs. limit=12.0 +2024-09-20 13:09:13,040 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.59 vs. limit=6.0 +2024-09-20 13:09:15,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=894580.0, ans=0.0 +2024-09-20 13:09:32,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=894620.0, ans=0.07 +2024-09-20 13:09:37,552 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.25 vs. limit=15.0 +2024-09-20 13:09:42,597 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.927e+01 9.438e+01 9.973e+01 1.317e+02, threshold=1.888e+02, percent-clipped=0.0 +2024-09-20 13:09:53,540 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=894660.0, ans=0.125 +2024-09-20 13:09:59,249 INFO [train.py:1198] (0/2) Epoch 50, batch 1950, loss[loss=0.2243, ctc_loss=0.1037, cr_loss=0.3408, attn_decoder_loss=0.2301, over 29445.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1073, cr_loss=0.3467, attn_decoder_loss=0.2374, over 5819907.47 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:10:10,677 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=894700.0, ans=0.125 +2024-09-20 13:10:14,388 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.62 vs. limit=15.0 +2024-09-20 13:10:28,409 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=894740.0, ans=0.1 +2024-09-20 13:10:28,869 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.05 vs. limit=6.0 +2024-09-20 13:10:29,235 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.28 vs. limit=10.0 +2024-09-20 13:10:47,014 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=894820.0, ans=0.125 +2024-09-20 13:10:51,405 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=894820.0, ans=0.1 +2024-09-20 13:11:13,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=894860.0, ans=0.125 +2024-09-20 13:11:15,473 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=894860.0, ans=0.025 +2024-09-20 13:11:18,240 INFO [train.py:1198] (0/2) Epoch 50, batch 2000, loss[loss=0.2082, ctc_loss=0.09163, cr_loss=0.3084, attn_decoder_loss=0.2143, over 29349.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1078, cr_loss=0.3472, attn_decoder_loss=0.238, over 5797761.53 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:11:26,165 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=894900.0, ans=0.125 +2024-09-20 13:11:45,992 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=894940.0, ans=0.2 +2024-09-20 13:11:59,311 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=894980.0, ans=0.125 +2024-09-20 13:12:17,047 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.718e+01 9.150e+01 9.752e+01 2.823e+02, threshold=1.830e+02, percent-clipped=2.0 +2024-09-20 13:12:17,460 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=895060.0, ans=0.125 +2024-09-20 13:12:26,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=895060.0, ans=0.1 +2024-09-20 13:12:34,065 INFO [train.py:1198] (0/2) Epoch 50, batch 2050, loss[loss=0.206, ctc_loss=0.08871, cr_loss=0.2974, attn_decoder_loss=0.2124, over 29435.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1068, cr_loss=0.345, attn_decoder_loss=0.2368, over 5789649.11 frames. ], batch size: 70, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:12:37,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=895100.0, ans=0.0 +2024-09-20 13:12:37,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=895100.0, ans=0.125 +2024-09-20 13:12:38,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=895100.0, ans=0.125 +2024-09-20 13:12:39,380 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.04 vs. limit=15.0 +2024-09-20 13:12:47,934 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=895140.0, ans=0.125 +2024-09-20 13:12:52,959 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.79 vs. limit=15.0 +2024-09-20 13:13:01,439 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=895140.0, ans=0.125 +2024-09-20 13:13:18,455 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=895220.0, ans=0.125 +2024-09-20 13:13:34,661 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=895260.0, ans=0.0 +2024-09-20 13:13:46,788 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=895260.0, ans=0.2 +2024-09-20 13:13:49,482 INFO [train.py:1198] (0/2) Epoch 50, batch 2100, loss[loss=0.2295, ctc_loss=0.1136, cr_loss=0.361, attn_decoder_loss=0.2344, over 29789.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1063, cr_loss=0.344, attn_decoder_loss=0.2363, over 5800109.06 frames. ], batch size: 81, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:13:55,749 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=895300.0, ans=0.1 +2024-09-20 13:13:55,812 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=895300.0, ans=0.0 +2024-09-20 13:14:08,383 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:14:17,968 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.32 vs. limit=22.5 +2024-09-20 13:14:51,864 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.617e+01 9.072e+01 9.604e+01 1.170e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 13:15:02,791 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=895460.0, ans=0.2 +2024-09-20 13:15:05,819 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=895460.0, ans=0.0 +2024-09-20 13:15:07,329 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:15:08,554 INFO [train.py:1198] (0/2) Epoch 50, batch 2150, loss[loss=0.2301, ctc_loss=0.1066, cr_loss=0.3479, attn_decoder_loss=0.2361, over 29422.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1059, cr_loss=0.3435, attn_decoder_loss=0.2357, over 5815808.45 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:15:16,426 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=895500.0, ans=0.0 +2024-09-20 13:15:34,936 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.04 vs. limit=6.0 +2024-09-20 13:15:46,436 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=895580.0, ans=0.2 +2024-09-20 13:16:03,926 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.50 vs. limit=15.0 +2024-09-20 13:16:15,141 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=895660.0, ans=0.125 +2024-09-20 13:16:23,847 INFO [train.py:1198] (0/2) Epoch 50, batch 2200, loss[loss=0.2342, ctc_loss=0.1123, cr_loss=0.3471, attn_decoder_loss=0.24, over 29659.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1059, cr_loss=0.3431, attn_decoder_loss=0.2356, over 5813062.70 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:16:24,756 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.66 vs. limit=6.0 +2024-09-20 13:16:44,840 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=895740.0, ans=0.125 +2024-09-20 13:16:46,358 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=895740.0, ans=0.5 +2024-09-20 13:16:52,479 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:17:03,119 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=895780.0, ans=0.125 +2024-09-20 13:17:12,404 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.75 vs. limit=15.0 +2024-09-20 13:17:18,342 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:17:23,882 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.920e+01 8.609e+01 9.131e+01 9.597e+01 2.793e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-20 13:17:31,003 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.73 vs. limit=22.5 +2024-09-20 13:17:31,068 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.79 vs. limit=22.5 +2024-09-20 13:17:38,980 INFO [train.py:1198] (0/2) Epoch 50, batch 2250, loss[loss=0.2414, ctc_loss=0.1102, cr_loss=0.3521, attn_decoder_loss=0.2481, over 29712.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.106, cr_loss=0.3435, attn_decoder_loss=0.2357, over 5812161.97 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:17:39,416 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=895900.0, ans=0.0 +2024-09-20 13:17:47,544 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.13 vs. limit=15.0 +2024-09-20 13:18:17,387 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/checkpoint-224000.pt +2024-09-20 13:18:34,605 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.32 vs. limit=15.0 +2024-09-20 13:18:55,299 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=896060.0, ans=0.1 +2024-09-20 13:19:05,562 INFO [train.py:1198] (0/2) Epoch 50, batch 2300, loss[loss=0.2016, ctc_loss=0.08571, cr_loss=0.3127, attn_decoder_loss=0.2075, over 29340.00 frames. ], tot_loss[loss=0.2287, ctc_loss=0.1055, cr_loss=0.3424, attn_decoder_loss=0.2347, over 5800645.12 frames. ], batch size: 71, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:19:10,461 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=896100.0, ans=0.125 +2024-09-20 13:19:13,338 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=896100.0, ans=0.1 +2024-09-20 13:19:18,998 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=896140.0, ans=0.0 +2024-09-20 13:19:50,949 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=896220.0, ans=0.0 +2024-09-20 13:19:58,123 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=896220.0, ans=0.125 +2024-09-20 13:20:01,390 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=896220.0, ans=0.125 +2024-09-20 13:20:05,620 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.458e+01 8.477e+01 9.129e+01 9.785e+01 2.320e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 13:20:06,502 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.58 vs. limit=15.0 +2024-09-20 13:20:20,684 INFO [train.py:1198] (0/2) Epoch 50, batch 2350, loss[loss=0.2391, ctc_loss=0.111, cr_loss=0.3657, attn_decoder_loss=0.2452, over 29683.00 frames. ], tot_loss[loss=0.2287, ctc_loss=0.1056, cr_loss=0.3426, attn_decoder_loss=0.2348, over 5805896.02 frames. ], batch size: 83, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:20:26,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=896300.0, ans=0.125 +2024-09-20 13:20:29,876 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=896300.0, ans=0.2 +2024-09-20 13:20:44,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=896340.0, ans=0.125 +2024-09-20 13:21:11,945 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=896420.0, ans=0.0 +2024-09-20 13:21:33,251 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=896460.0, ans=0.125 +2024-09-20 13:21:36,032 INFO [train.py:1198] (0/2) Epoch 50, batch 2400, loss[loss=0.2247, ctc_loss=0.1089, cr_loss=0.3538, attn_decoder_loss=0.2297, over 29538.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1064, cr_loss=0.3448, attn_decoder_loss=0.2355, over 5809543.85 frames. ], batch size: 76, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:21:37,799 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=896500.0, ans=0.1 +2024-09-20 13:22:22,303 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=896580.0, ans=0.125 +2024-09-20 13:22:25,356 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=896620.0, ans=0.1 +2024-09-20 13:22:26,877 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=896620.0, ans=0.0 +2024-09-20 13:22:40,129 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.900e+01 8.885e+01 9.266e+01 9.766e+01 1.218e+02, threshold=1.853e+02, percent-clipped=0.0 +2024-09-20 13:22:43,495 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=896660.0, ans=0.125 +2024-09-20 13:22:55,251 INFO [train.py:1198] (0/2) Epoch 50, batch 2450, loss[loss=0.23, ctc_loss=0.1059, cr_loss=0.3485, attn_decoder_loss=0.2361, over 29716.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3456, attn_decoder_loss=0.2365, over 5785629.85 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:22:59,873 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=896700.0, ans=0.2 +2024-09-20 13:23:04,364 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=896700.0, ans=0.035 +2024-09-20 13:23:04,505 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=896700.0, ans=0.1 +2024-09-20 13:23:05,900 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=896700.0, ans=0.1 +2024-09-20 13:23:13,476 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=896740.0, ans=0.0 +2024-09-20 13:23:26,979 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=896780.0, ans=0.125 +2024-09-20 13:23:58,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=896860.0, ans=0.1 +2024-09-20 13:24:10,523 INFO [train.py:1198] (0/2) Epoch 50, batch 2500, loss[loss=0.2375, ctc_loss=0.1108, cr_loss=0.3726, attn_decoder_loss=0.2433, over 29623.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1073, cr_loss=0.3462, attn_decoder_loss=0.2364, over 5795594.10 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:24:19,740 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=896900.0, ans=0.025 +2024-09-20 13:24:48,549 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=896980.0, ans=0.125 +2024-09-20 13:24:48,875 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-20 13:25:00,576 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=897020.0, ans=0.035 +2024-09-20 13:25:10,796 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.343e+01 8.696e+01 9.094e+01 9.539e+01 5.829e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-20 13:25:25,882 INFO [train.py:1198] (0/2) Epoch 50, batch 2550, loss[loss=0.2094, ctc_loss=0.09646, cr_loss=0.3209, attn_decoder_loss=0.2148, over 29338.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1072, cr_loss=0.3461, attn_decoder_loss=0.2363, over 5799225.30 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:25:33,758 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=897100.0, ans=0.0 +2024-09-20 13:25:47,127 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=897140.0, ans=0.2 +2024-09-20 13:25:55,156 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=897140.0, ans=0.125 +2024-09-20 13:26:31,302 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.88 vs. limit=22.5 +2024-09-20 13:26:45,538 INFO [train.py:1198] (0/2) Epoch 50, batch 2600, loss[loss=0.228, ctc_loss=0.1066, cr_loss=0.3502, attn_decoder_loss=0.2337, over 29463.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.107, cr_loss=0.3455, attn_decoder_loss=0.2366, over 5794751.67 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:27:35,297 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=897420.0, ans=0.0 +2024-09-20 13:27:45,538 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.691e+01 8.589e+01 9.158e+01 9.861e+01 1.661e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-20 13:27:54,676 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=897460.0, ans=0.0 +2024-09-20 13:28:00,332 INFO [train.py:1198] (0/2) Epoch 50, batch 2650, loss[loss=0.245, ctc_loss=0.1208, cr_loss=0.3738, attn_decoder_loss=0.2505, over 29232.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1075, cr_loss=0.3466, attn_decoder_loss=0.2372, over 5802545.07 frames. ], batch size: 100, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:28:00,691 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=897500.0, ans=0.0 +2024-09-20 13:28:00,692 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=897500.0, ans=0.125 +2024-09-20 13:28:13,250 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.19 vs. limit=22.5 +2024-09-20 13:28:14,264 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=897540.0, ans=0.125 +2024-09-20 13:28:16,402 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.16 vs. limit=15.0 +2024-09-20 13:28:32,196 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=897580.0, ans=0.2 +2024-09-20 13:28:33,510 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=897580.0, ans=0.025 +2024-09-20 13:28:36,062 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.74 vs. limit=10.0 +2024-09-20 13:28:44,410 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=897620.0, ans=0.125 +2024-09-20 13:29:05,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=897660.0, ans=0.125 +2024-09-20 13:29:15,401 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.14 vs. limit=12.0 +2024-09-20 13:29:15,764 INFO [train.py:1198] (0/2) Epoch 50, batch 2700, loss[loss=0.2411, ctc_loss=0.1139, cr_loss=0.3663, attn_decoder_loss=0.247, over 29532.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1076, cr_loss=0.347, attn_decoder_loss=0.2373, over 5797877.90 frames. ], batch size: 87, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:29:28,030 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=897700.0, ans=0.125 +2024-09-20 13:29:38,450 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:29:50,903 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=897780.0, ans=0.025 +2024-09-20 13:29:52,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=897780.0, ans=0.0 +2024-09-20 13:30:00,664 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=897780.0, ans=0.125 +2024-09-20 13:30:06,650 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=897820.0, ans=0.0 +2024-09-20 13:30:12,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=897820.0, ans=0.025 +2024-09-20 13:30:19,834 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.670e+01 9.155e+01 9.600e+01 1.586e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 13:30:26,730 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=12.0 +2024-09-20 13:30:32,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=897860.0, ans=0.2 +2024-09-20 13:30:35,022 INFO [train.py:1198] (0/2) Epoch 50, batch 2750, loss[loss=0.2184, ctc_loss=0.09926, cr_loss=0.3309, attn_decoder_loss=0.2243, over 29526.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3451, attn_decoder_loss=0.236, over 5796275.49 frames. ], batch size: 75, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:30:46,400 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.54 vs. limit=15.0 +2024-09-20 13:30:50,386 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=897940.0, ans=0.125 +2024-09-20 13:31:02,432 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=897940.0, ans=0.125 +2024-09-20 13:31:21,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=898020.0, ans=0.0 +2024-09-20 13:31:40,237 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=898060.0, ans=0.125 +2024-09-20 13:31:41,794 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=898060.0, ans=0.2 +2024-09-20 13:31:47,867 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=898060.0, ans=0.1 +2024-09-20 13:31:50,612 INFO [train.py:1198] (0/2) Epoch 50, batch 2800, loss[loss=0.2567, ctc_loss=0.1332, cr_loss=0.3895, attn_decoder_loss=0.2618, over 19419.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3458, attn_decoder_loss=0.2365, over 5776445.49 frames. ], batch size: 210, lr: 2.19e-03, grad_scale: 32.0 +2024-09-20 13:31:50,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=898100.0, ans=0.05 +2024-09-20 13:31:54,352 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.72 vs. limit=12.0 +2024-09-20 13:32:17,824 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=898140.0, ans=0.025 +2024-09-20 13:32:47,656 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=898220.0, ans=0.125 +2024-09-20 13:32:53,359 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.751e+01 9.267e+01 9.840e+01 2.500e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-20 13:33:05,423 INFO [train.py:1198] (0/2) Epoch 50, batch 2850, loss[loss=0.2241, ctc_loss=0.101, cr_loss=0.3221, attn_decoder_loss=0.2306, over 29519.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1072, cr_loss=0.3459, attn_decoder_loss=0.2368, over 5761785.29 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:33:38,300 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=898380.0, ans=0.0 +2024-09-20 13:33:49,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=898380.0, ans=0.125 +2024-09-20 13:34:00,632 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.91 vs. limit=15.0 +2024-09-20 13:34:14,289 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.89 vs. limit=15.0 +2024-09-20 13:34:22,363 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=898500.0, ans=0.0 +2024-09-20 13:34:23,631 INFO [train.py:1198] (0/2) Epoch 50, batch 2900, loss[loss=0.2327, ctc_loss=0.1116, cr_loss=0.3792, attn_decoder_loss=0.2378, over 29455.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1076, cr_loss=0.3471, attn_decoder_loss=0.2377, over 5786813.83 frames. ], batch size: 79, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:34:46,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=898540.0, ans=0.125 +2024-09-20 13:35:03,194 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=898580.0, ans=0.125 +2024-09-20 13:35:18,921 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.29 vs. limit=15.0 +2024-09-20 13:35:26,851 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.844e+01 8.699e+01 9.096e+01 9.563e+01 1.472e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 13:35:38,791 INFO [train.py:1198] (0/2) Epoch 50, batch 2950, loss[loss=0.21, ctc_loss=0.09216, cr_loss=0.2988, attn_decoder_loss=0.2164, over 29502.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1068, cr_loss=0.3453, attn_decoder_loss=0.2367, over 5780721.70 frames. ], batch size: 75, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:35:47,334 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.70 vs. limit=15.0 +2024-09-20 13:35:56,349 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.01 vs. limit=12.0 +2024-09-20 13:36:01,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=898740.0, ans=0.05 +2024-09-20 13:36:15,129 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=898780.0, ans=0.125 +2024-09-20 13:36:31,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=898820.0, ans=0.015 +2024-09-20 13:36:48,393 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=898860.0, ans=0.025 +2024-09-20 13:36:51,956 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.98 vs. limit=22.5 +2024-09-20 13:36:53,059 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=898900.0, ans=0.0 +2024-09-20 13:36:53,077 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=898900.0, ans=0.125 +2024-09-20 13:36:54,244 INFO [train.py:1198] (0/2) Epoch 50, batch 3000, loss[loss=0.2398, ctc_loss=0.119, cr_loss=0.3718, attn_decoder_loss=0.2449, over 29748.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1064, cr_loss=0.344, attn_decoder_loss=0.2364, over 5780910.52 frames. ], batch size: 81, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:36:54,245 INFO [train.py:1221] (0/2) Computing validation loss +2024-09-20 13:37:12,393 INFO [train.py:1230] (0/2) Epoch 50, validation: loss=0.213, ctc_loss=0.03629, cr_loss=7.081e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-20 13:37:12,394 INFO [train.py:1231] (0/2) Maximum memory allocated so far is 52576MB +2024-09-20 13:37:14,930 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-20 13:37:20,991 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=898900.0, ans=0.1 +2024-09-20 13:37:28,207 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.72 vs. limit=15.0 +2024-09-20 13:37:39,518 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=898940.0, ans=0.125 +2024-09-20 13:37:47,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.58 vs. limit=6.0 +2024-09-20 13:37:50,177 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.65 vs. limit=12.0 +2024-09-20 13:38:00,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=899020.0, ans=0.025 +2024-09-20 13:38:04,987 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=899020.0, ans=0.1 +2024-09-20 13:38:15,270 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=899060.0, ans=0.0 +2024-09-20 13:38:19,545 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.783e+01 8.906e+01 9.324e+01 9.722e+01 1.754e+02, threshold=1.865e+02, percent-clipped=0.0 +2024-09-20 13:38:20,049 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=899060.0, ans=0.125 +2024-09-20 13:38:31,730 INFO [train.py:1198] (0/2) Epoch 50, batch 3050, loss[loss=0.2284, ctc_loss=0.1065, cr_loss=0.3333, attn_decoder_loss=0.2346, over 29547.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1072, cr_loss=0.3456, attn_decoder_loss=0.2373, over 5776508.44 frames. ], batch size: 76, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:38:47,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=899140.0, ans=0.2 +2024-09-20 13:39:06,983 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.29 vs. limit=15.0 +2024-09-20 13:39:25,703 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.18 vs. limit=15.0 +2024-09-20 13:39:40,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=899260.0, ans=0.0 +2024-09-20 13:39:47,283 INFO [train.py:1198] (0/2) Epoch 50, batch 3100, loss[loss=0.2453, ctc_loss=0.1109, cr_loss=0.3531, attn_decoder_loss=0.2524, over 29258.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1073, cr_loss=0.3457, attn_decoder_loss=0.2371, over 5776397.07 frames. ], batch size: 100, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:40:01,046 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=899340.0, ans=0.025 +2024-09-20 13:40:12,187 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.31 vs. limit=15.0 +2024-09-20 13:40:50,445 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.844e+01 9.233e+01 9.806e+01 2.846e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 13:40:58,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=899460.0, ans=0.125 +2024-09-20 13:41:02,636 INFO [train.py:1198] (0/2) Epoch 50, batch 3150, loss[loss=0.2411, ctc_loss=0.114, cr_loss=0.3658, attn_decoder_loss=0.2471, over 28864.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1073, cr_loss=0.3459, attn_decoder_loss=0.2372, over 5782010.22 frames. ], batch size: 104, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:41:14,002 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=899500.0, ans=0.125 +2024-09-20 13:41:40,868 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.89 vs. limit=10.0 +2024-09-20 13:41:49,208 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=899580.0, ans=0.025 +2024-09-20 13:42:21,892 INFO [train.py:1198] (0/2) Epoch 50, batch 3200, loss[loss=0.2241, ctc_loss=0.1037, cr_loss=0.3342, attn_decoder_loss=0.2301, over 29391.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1068, cr_loss=0.345, attn_decoder_loss=0.2365, over 5792765.16 frames. ], batch size: 79, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:42:46,849 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.46 vs. limit=15.0 +2024-09-20 13:43:04,551 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:43:12,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=899820.0, ans=0.125 +2024-09-20 13:43:25,248 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.416e+01 9.001e+01 9.640e+01 1.386e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 13:43:31,769 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:43:37,445 INFO [train.py:1198] (0/2) Epoch 50, batch 3250, loss[loss=0.2383, ctc_loss=0.112, cr_loss=0.355, attn_decoder_loss=0.2445, over 29725.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1071, cr_loss=0.3457, attn_decoder_loss=0.2372, over 5799733.26 frames. ], batch size: 84, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:43:46,863 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=899900.0, ans=0.1 +2024-09-20 13:44:05,918 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=899980.0, ans=0.125 +2024-09-20 13:44:41,064 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=900060.0, ans=0.2 +2024-09-20 13:44:53,053 INFO [train.py:1198] (0/2) Epoch 50, batch 3300, loss[loss=0.2394, ctc_loss=0.109, cr_loss=0.3474, attn_decoder_loss=0.2461, over 28378.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1063, cr_loss=0.3439, attn_decoder_loss=0.2359, over 5797901.01 frames. ], batch size: 111, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:45:04,632 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=900100.0, ans=0.95 +2024-09-20 13:45:06,658 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.87 vs. limit=15.0 +2024-09-20 13:45:19,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=900140.0, ans=0.0 +2024-09-20 13:45:38,976 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.98 vs. limit=15.0 +2024-09-20 13:45:47,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=900220.0, ans=0.5 +2024-09-20 13:46:01,978 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 8.683e+01 9.254e+01 9.837e+01 3.581e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-20 13:46:12,356 INFO [train.py:1198] (0/2) Epoch 50, batch 3350, loss[loss=0.2454, ctc_loss=0.1113, cr_loss=0.3472, attn_decoder_loss=0.2526, over 28855.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1069, cr_loss=0.3447, attn_decoder_loss=0.2367, over 5774119.94 frames. ], batch size: 104, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:46:18,648 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=900300.0, ans=0.09899494936611666 +2024-09-20 13:46:27,585 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=900340.0, ans=0.07 +2024-09-20 13:46:29,966 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.22 vs. limit=10.0 +2024-09-20 13:46:54,708 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=900380.0, ans=0.125 +2024-09-20 13:47:27,587 INFO [train.py:1198] (0/2) Epoch 50, batch 3400, loss[loss=0.2084, ctc_loss=0.09511, cr_loss=0.3192, attn_decoder_loss=0.2139, over 29341.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1076, cr_loss=0.3462, attn_decoder_loss=0.2368, over 5766118.44 frames. ], batch size: 67, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:47:30,856 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=900500.0, ans=0.125 +2024-09-20 13:47:41,532 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=900540.0, ans=0.125 +2024-09-20 13:47:52,153 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=900540.0, ans=0.125 +2024-09-20 13:48:08,814 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=900580.0, ans=0.2 +2024-09-20 13:48:16,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=900620.0, ans=0.125 +2024-09-20 13:48:32,608 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.696e+01 9.262e+01 1.002e+02 2.353e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-20 13:48:44,943 INFO [train.py:1198] (0/2) Epoch 50, batch 3450, loss[loss=0.2363, ctc_loss=0.1044, cr_loss=0.3378, attn_decoder_loss=0.2435, over 28209.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2373, over 5774468.91 frames. ], batch size: 111, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:48:51,267 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=900700.0, ans=0.125 +2024-09-20 13:49:01,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=900740.0, ans=0.2 +2024-09-20 13:49:06,269 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=900740.0, ans=0.2 +2024-09-20 13:49:09,781 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=900740.0, ans=0.1 +2024-09-20 13:49:17,469 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:49:22,503 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.68 vs. limit=15.0 +2024-09-20 13:49:25,320 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=900780.0, ans=0.025 +2024-09-20 13:49:37,602 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=900820.0, ans=0.125 +2024-09-20 13:49:49,686 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=900860.0, ans=0.125 +2024-09-20 13:50:02,673 INFO [train.py:1198] (0/2) Epoch 50, batch 3500, loss[loss=0.2104, ctc_loss=0.1009, cr_loss=0.3248, attn_decoder_loss=0.2153, over 29330.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1069, cr_loss=0.3446, attn_decoder_loss=0.2364, over 5775771.68 frames. ], batch size: 71, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:50:06,619 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.24 vs. limit=6.0 +2024-09-20 13:50:20,871 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=900940.0, ans=0.1 +2024-09-20 13:50:49,162 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=901020.0, ans=0.2 +2024-09-20 13:51:06,274 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.056e+01 8.687e+01 9.132e+01 9.623e+01 1.623e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 13:51:14,589 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.02 vs. limit=15.0 +2024-09-20 13:51:16,620 INFO [train.py:1198] (0/2) Epoch 50, batch 3550, loss[loss=0.2437, ctc_loss=0.1134, cr_loss=0.3711, attn_decoder_loss=0.25, over 29704.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3442, attn_decoder_loss=0.236, over 5782839.12 frames. ], batch size: 89, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:51:19,961 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=901100.0, ans=0.1 +2024-09-20 13:51:28,606 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=901100.0, ans=0.125 +2024-09-20 13:51:43,520 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=901140.0, ans=0.125 +2024-09-20 13:51:45,044 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=901180.0, ans=0.125 +2024-09-20 13:51:52,275 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=901180.0, ans=0.035 +2024-09-20 13:52:06,966 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=901220.0, ans=0.0 +2024-09-20 13:52:07,704 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.57 vs. limit=10.0 +2024-09-20 13:52:22,112 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.91 vs. limit=15.0 +2024-09-20 13:52:28,137 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.84 vs. limit=15.0 +2024-09-20 13:52:30,415 INFO [train.py:1198] (0/2) Epoch 50, batch 3600, loss[loss=0.2275, ctc_loss=0.1038, cr_loss=0.3333, attn_decoder_loss=0.2338, over 29495.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1068, cr_loss=0.3446, attn_decoder_loss=0.2363, over 5792820.40 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:52:35,114 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=901300.0, ans=0.125 +2024-09-20 13:52:46,972 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=901340.0, ans=0.1 +2024-09-20 13:52:55,789 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=901340.0, ans=0.1 +2024-09-20 13:52:59,708 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.79 vs. limit=15.0 +2024-09-20 13:53:33,076 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=901460.0, ans=0.125 +2024-09-20 13:53:34,116 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.620e+01 9.031e+01 9.703e+01 1.754e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-20 13:53:44,527 INFO [train.py:1198] (0/2) Epoch 50, batch 3650, loss[loss=0.2465, ctc_loss=0.1159, cr_loss=0.3807, attn_decoder_loss=0.2525, over 29492.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1067, cr_loss=0.3447, attn_decoder_loss=0.2359, over 5794745.94 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:53:54,229 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.55 vs. limit=15.0 +2024-09-20 13:54:02,586 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=901540.0, ans=0.125 +2024-09-20 13:54:08,598 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:54:13,090 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=901540.0, ans=0.0 +2024-09-20 13:54:19,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.49 vs. limit=15.0 +2024-09-20 13:54:25,494 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=901580.0, ans=0.2 +2024-09-20 13:54:36,514 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.10 vs. limit=15.0 +2024-09-20 13:54:38,857 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=901620.0, ans=0.125 +2024-09-20 13:55:02,719 INFO [train.py:1198] (0/2) Epoch 50, batch 3700, loss[loss=0.2356, ctc_loss=0.1026, cr_loss=0.3417, attn_decoder_loss=0.2428, over 29708.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1062, cr_loss=0.3437, attn_decoder_loss=0.2359, over 5803830.28 frames. ], batch size: 84, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:55:08,037 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.91 vs. limit=15.0 +2024-09-20 13:55:09,056 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=901700.0, ans=0.125 +2024-09-20 13:55:19,413 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=901740.0, ans=0.2 +2024-09-20 13:55:31,350 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.55 vs. limit=15.0 +2024-09-20 13:55:33,866 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=901780.0, ans=0.125 +2024-09-20 13:55:36,953 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=901780.0, ans=0.0 +2024-09-20 13:55:44,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.82 vs. limit=15.0 +2024-09-20 13:55:45,044 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.09 vs. limit=15.0 +2024-09-20 13:55:46,218 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.06 vs. limit=12.0 +2024-09-20 13:55:54,454 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=901820.0, ans=0.0 +2024-09-20 13:56:05,876 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.521e+01 9.140e+01 9.589e+01 3.115e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 13:56:16,331 INFO [train.py:1198] (0/2) Epoch 50, batch 3750, loss[loss=0.2042, ctc_loss=0.08649, cr_loss=0.3014, attn_decoder_loss=0.2106, over 29328.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1064, cr_loss=0.3443, attn_decoder_loss=0.2359, over 5808121.57 frames. ], batch size: 67, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:56:30,132 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=901940.0, ans=0.2 +2024-09-20 13:56:52,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=901980.0, ans=0.0 +2024-09-20 13:56:55,121 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=901980.0, ans=0.0 +2024-09-20 13:57:30,712 INFO [train.py:1198] (0/2) Epoch 50, batch 3800, loss[loss=0.2449, ctc_loss=0.1172, cr_loss=0.3568, attn_decoder_loss=0.2512, over 29635.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1061, cr_loss=0.3436, attn_decoder_loss=0.2354, over 5798036.49 frames. ], batch size: 86, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:57:30,958 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=902100.0, ans=0.125 +2024-09-20 13:57:36,912 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=902100.0, ans=0.0 +2024-09-20 13:57:38,483 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=902100.0, ans=0.2 +2024-09-20 13:57:39,833 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=902100.0, ans=0.2 +2024-09-20 13:57:48,764 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=902140.0, ans=0.0 +2024-09-20 13:58:09,811 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=902180.0, ans=0.95 +2024-09-20 13:58:12,536 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=902180.0, ans=0.125 +2024-09-20 13:58:14,489 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.76 vs. limit=22.5 +2024-09-20 13:58:22,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=902220.0, ans=0.05 +2024-09-20 13:58:22,837 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=902220.0, ans=0.1 +2024-09-20 13:58:34,453 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.410e+01 8.593e+01 9.127e+01 9.556e+01 1.815e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 13:58:43,523 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=902300.0, ans=0.0 +2024-09-20 13:58:44,682 INFO [train.py:1198] (0/2) Epoch 50, batch 3850, loss[loss=0.2521, ctc_loss=0.1202, cr_loss=0.3833, attn_decoder_loss=0.2582, over 29308.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1061, cr_loss=0.3439, attn_decoder_loss=0.2355, over 5811545.07 frames. ], batch size: 100, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:58:56,571 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=902300.0, ans=0.125 +2024-09-20 13:59:06,952 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=902340.0, ans=0.125 +2024-09-20 13:59:23,711 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=902380.0, ans=0.1 +2024-09-20 13:59:39,916 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=902420.0, ans=0.0 +2024-09-20 13:59:50,112 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=902460.0, ans=0.1 +2024-09-20 14:00:00,224 INFO [train.py:1198] (0/2) Epoch 50, batch 3900, loss[loss=0.2479, ctc_loss=0.115, cr_loss=0.3285, attn_decoder_loss=0.2554, over 29636.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1065, cr_loss=0.3444, attn_decoder_loss=0.2359, over 5815582.47 frames. ], batch size: 86, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:00:05,473 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.91 vs. limit=15.0 +2024-09-20 14:00:12,392 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=902500.0, ans=0.0 +2024-09-20 14:00:40,555 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.92 vs. limit=22.5 +2024-09-20 14:00:46,138 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=902620.0, ans=0.0 +2024-09-20 14:00:59,317 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=902660.0, ans=0.0 +2024-09-20 14:01:05,029 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.795e+01 8.747e+01 9.246e+01 9.668e+01 1.412e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 14:01:06,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=902660.0, ans=0.125 +2024-09-20 14:01:15,505 INFO [train.py:1198] (0/2) Epoch 50, batch 3950, loss[loss=0.2515, ctc_loss=0.1265, cr_loss=0.3954, attn_decoder_loss=0.2566, over 29475.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1067, cr_loss=0.3451, attn_decoder_loss=0.2363, over 5835275.30 frames. ], batch size: 97, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:01:29,021 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=902740.0, ans=0.125 +2024-09-20 14:01:42,248 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=902740.0, ans=0.0 +2024-09-20 14:01:46,813 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=902780.0, ans=0.1 +2024-09-20 14:01:54,051 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=902780.0, ans=0.0 +2024-09-20 14:02:03,337 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=10.74 vs. limit=15.0 +2024-09-20 14:02:16,163 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=902860.0, ans=0.1 +2024-09-20 14:02:17,607 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=902860.0, ans=0.125 +2024-09-20 14:02:20,542 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=902860.0, ans=0.0 +2024-09-20 14:02:28,914 INFO [train.py:1198] (0/2) Epoch 50, batch 4000, loss[loss=0.2067, ctc_loss=0.09226, cr_loss=0.3133, attn_decoder_loss=0.2124, over 29519.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1066, cr_loss=0.3444, attn_decoder_loss=0.2361, over 5813223.71 frames. ], batch size: 74, lr: 2.19e-03, grad_scale: 32.0 +2024-09-20 14:02:33,567 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=902900.0, ans=0.0 +2024-09-20 14:02:36,371 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=902900.0, ans=0.2 +2024-09-20 14:02:39,457 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=902900.0, ans=0.0 +2024-09-20 14:02:39,496 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=902900.0, ans=0.0 +2024-09-20 14:02:46,722 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=902940.0, ans=0.2 +2024-09-20 14:02:57,096 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=902980.0, ans=0.125 +2024-09-20 14:03:01,561 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=902980.0, ans=0.025 +2024-09-20 14:03:17,859 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=903020.0, ans=0.125 +2024-09-20 14:03:22,512 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=903020.0, ans=0.2 +2024-09-20 14:03:22,953 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.39 vs. limit=15.0 +2024-09-20 14:03:32,688 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=903060.0, ans=0.125 +2024-09-20 14:03:33,843 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.945e+01 8.787e+01 9.327e+01 9.838e+01 2.486e+02, threshold=1.865e+02, percent-clipped=3.0 +2024-09-20 14:03:37,007 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=903060.0, ans=0.125 +2024-09-20 14:03:42,695 INFO [train.py:1198] (0/2) Epoch 50, batch 4050, loss[loss=0.2593, ctc_loss=0.1382, cr_loss=0.3817, attn_decoder_loss=0.2642, over 19583.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1067, cr_loss=0.3446, attn_decoder_loss=0.2361, over 5794791.88 frames. ], batch size: 209, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:03:53,108 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=903100.0, ans=0.0 +2024-09-20 14:04:13,513 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=903180.0, ans=0.2 +2024-09-20 14:04:44,418 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=903260.0, ans=0.0 +2024-09-20 14:04:57,484 INFO [train.py:1198] (0/2) Epoch 50, batch 4100, loss[loss=0.25, ctc_loss=0.1222, cr_loss=0.3898, attn_decoder_loss=0.2555, over 29515.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1066, cr_loss=0.344, attn_decoder_loss=0.2361, over 5790534.14 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:05:28,222 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=903380.0, ans=0.125 +2024-09-20 14:05:45,955 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=903420.0, ans=0.1 +2024-09-20 14:05:57,391 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=903460.0, ans=0.1 +2024-09-20 14:06:04,477 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.737e+01 9.246e+01 9.591e+01 2.033e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 14:06:11,996 INFO [train.py:1198] (0/2) Epoch 50, batch 4150, loss[loss=0.2292, ctc_loss=0.1104, cr_loss=0.3436, attn_decoder_loss=0.2347, over 29504.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1064, cr_loss=0.3444, attn_decoder_loss=0.2359, over 5796037.13 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:06:29,939 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=903540.0, ans=0.0 +2024-09-20 14:06:40,944 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.58 vs. limit=15.0 +2024-09-20 14:06:41,499 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=903580.0, ans=0.125 +2024-09-20 14:06:45,701 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=903580.0, ans=0.1 +2024-09-20 14:06:47,368 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=903580.0, ans=0.2 +2024-09-20 14:07:03,458 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=903620.0, ans=0.125 +2024-09-20 14:07:06,389 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=903620.0, ans=0.2 +2024-09-20 14:07:25,114 INFO [train.py:1198] (0/2) Epoch 50, batch 4200, loss[loss=0.2462, ctc_loss=0.1175, cr_loss=0.3781, attn_decoder_loss=0.2521, over 29507.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1066, cr_loss=0.345, attn_decoder_loss=0.2363, over 5799445.03 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:07:29,019 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.99 vs. limit=22.5 +2024-09-20 14:07:29,793 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=903700.0, ans=0.0 +2024-09-20 14:07:38,850 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=903740.0, ans=0.1 +2024-09-20 14:07:40,198 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 14:07:47,526 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=903740.0, ans=0.1 +2024-09-20 14:07:50,568 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=903740.0, ans=0.125 +2024-09-20 14:08:05,810 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.69 vs. limit=15.0 +2024-09-20 14:08:11,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=903820.0, ans=0.125 +2024-09-20 14:08:29,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=903860.0, ans=0.125 +2024-09-20 14:08:32,189 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.954e+01 8.657e+01 9.068e+01 9.554e+01 1.385e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 14:08:32,502 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=903860.0, ans=0.125 +2024-09-20 14:08:39,515 INFO [train.py:1198] (0/2) Epoch 50, batch 4250, loss[loss=0.2156, ctc_loss=0.09524, cr_loss=0.3291, attn_decoder_loss=0.2216, over 29528.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1062, cr_loss=0.344, attn_decoder_loss=0.2363, over 5806101.16 frames. ], batch size: 74, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:08:47,061 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=903900.0, ans=0.125 +2024-09-20 14:09:08,706 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=903980.0, ans=0.125 +2024-09-20 14:09:30,573 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=904020.0, ans=0.125 +2024-09-20 14:09:53,370 INFO [train.py:1198] (0/2) Epoch 50, batch 4300, loss[loss=0.238, ctc_loss=0.1084, cr_loss=0.3295, attn_decoder_loss=0.2451, over 29531.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1061, cr_loss=0.3438, attn_decoder_loss=0.2363, over 5794814.95 frames. ], batch size: 87, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:10:17,334 INFO [scaling.py:1120] (0/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 14:10:17,357 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=904140.0, ans=0.0 +2024-09-20 14:10:59,382 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.761e+01 8.735e+01 9.239e+01 9.870e+01 1.478e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-20 14:10:59,605 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=904260.0, ans=0.125 +2024-09-20 14:11:04,205 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=904260.0, ans=0.125 +2024-09-20 14:11:06,791 INFO [train.py:1198] (0/2) Epoch 50, batch 4350, loss[loss=0.2534, ctc_loss=0.1225, cr_loss=0.3832, attn_decoder_loss=0.2594, over 29470.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1084, cr_loss=0.3486, attn_decoder_loss=0.2394, over 5797554.90 frames. ], batch size: 97, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:11:34,166 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=904340.0, ans=0.2 +2024-09-20 14:12:06,810 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=904460.0, ans=0.125 +2024-09-20 14:12:12,629 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=904460.0, ans=0.125 +2024-09-20 14:12:18,434 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=904460.0, ans=0.025 +2024-09-20 14:12:21,024 INFO [train.py:1198] (0/2) Epoch 50, batch 4400, loss[loss=0.2408, ctc_loss=0.1191, cr_loss=0.3746, attn_decoder_loss=0.246, over 27240.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1096, cr_loss=0.3512, attn_decoder_loss=0.2411, over 5769723.67 frames. ], batch size: 124, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:12:35,959 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=904540.0, ans=0.95 +2024-09-20 14:13:25,695 INFO [scaling.py:1024] (0/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=10.80 vs. limit=12.0 +2024-09-20 14:13:27,637 WARNING [optim.py:487] (0/2) Clipping_scale=2.0, grad-norm quartiles 8.488e+01 9.232e+01 9.764e+01 1.027e+02 1.631e+02, threshold=1.953e+02, percent-clipped=0.0 +2024-09-20 14:13:35,035 INFO [train.py:1198] (0/2) Epoch 50, batch 4450, loss[loss=0.2438, ctc_loss=0.1251, cr_loss=0.3629, attn_decoder_loss=0.2489, over 20354.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.113, cr_loss=0.3571, attn_decoder_loss=0.2432, over 5584724.76 frames. ], batch size: 210, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:14:11,982 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=904780.0, ans=0.125 +2024-09-20 14:14:22,395 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=904820.0, ans=0.2 +2024-09-20 14:14:28,335 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=904820.0, ans=0.2 +2024-09-20 14:14:29,679 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=904820.0, ans=0.0 +2024-09-20 14:14:31,189 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=904820.0, ans=0.125 +2024-09-20 14:14:40,072 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=904860.0, ans=0.0 +2024-09-20 14:14:43,449 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=904860.0, ans=0.1 +2024-09-20 14:14:50,474 INFO [train.py:1198] (0/2) Epoch 50, batch 4500, loss[loss=0.2529, ctc_loss=0.1331, cr_loss=0.3812, attn_decoder_loss=0.2577, over 19615.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1156, cr_loss=0.3592, attn_decoder_loss=0.2449, over 5240240.45 frames. ], batch size: 210, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:14:52,370 INFO [scaling.py:214] (0/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=904900.0, ans=0.95 +2024-09-20 14:15:27,054 INFO [checkpoint.py:75] (0/2) Saving checkpoint to zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1/epoch-50.pt +2024-09-20 14:15:39,984 INFO [train.py:1496] (0/2) Done! diff --git a/exp/log/log-train-2024-09-16-12-25-21-1 b/exp/log/log-train-2024-09-16-12-25-21-1 new file mode 100644 index 0000000000000000000000000000000000000000..fd0ecd5555777cea150034c079483c746482daa6 --- /dev/null +++ b/exp/log/log-train-2024-09-16-12-25-21-1 @@ -0,0 +1,35804 @@ +2024-09-16 12:25:21,768 INFO [train.py:1266] (1/2) Training started +2024-09-16 12:25:21,768 INFO [train.py:1276] (1/2) Device: cuda:1 +2024-09-16 12:25:21,770 INFO [train.py:1307] (1/2) Using dtype=torch.float16 +2024-09-16 12:25:21,770 INFO [train.py:1308] (1/2) Use AMP=True +2024-09-16 12:25:21,770 INFO [train.py:1310] (1/2) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'ignore_id': -1, 'label_smoothing': 0.1, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '9f6206b565b833d71e19b4411493d04d99f0a308', 'k2-git-date': 'Thu Mar 28 09:46:54 2024', 'lhotse-version': '1.27.0', 'torch-version': '2.2.2+cu118', 'torch-cuda-available': True, 'torch-cuda-version': '11.8', 'python-version': '3.10', 'icefall-git-branch': 'cr-ctc', 'icefall-git-sha1': '07d6b123-dirty', 'icefall-git-date': 'Wed Sep 4 19:33:41 2024', 'icefall-path': '/zw/mnt/yaozengwei/workspace/icefall_cr_ctc', 'k2-path': '/root/anaconda3/envs/python3.10/lib/python3.10/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/envs/python3.10/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'NGK_zengwei'}, 'world_size': 2, 'master_port': 12341, 'tensorboard': True, 'num_epochs': 50, 'start_epoch': 1, 'start_batch': 0, 'exp_dir': PosixPath('zipformer/exp-large-ctc-aed-ctc-loss-scale-0.1-aed-loss-scale-0.9-cr-loss-scale-0.02-time-mask-ratio-2.5-scaled-masked-1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'context_size': 2, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'ctc_loss_scale': 0.1, 'cr_loss_scale': 0.02, 'time_mask_ratio': 2.5, 'cr_loss_masked_scale': 1.0, 'attention_decoder_loss_scale': 0.9, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_bf16': False, 'num_encoder_layers': '2,2,4,5,4,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1536,2048,1536,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,512,768,512,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,320,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'attention_decoder_dim': 512, 'attention_decoder_num_layers': 6, 'attention_decoder_attention_dim': 512, 'attention_decoder_num_heads': 8, 'attention_decoder_feedforward_dim': 2048, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': False, 'use_ctc': True, 'use_attention_decoder': True, 'use_cr_ctc': True, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': False, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'blank_id': 0, 'sos_id': 1, 'eos_id': 1, 'vocab_size': 500, 'dtype': torch.float16, 'use_autocast': True} +2024-09-16 12:25:21,771 INFO [train.py:1312] (1/2) About to create model +2024-09-16 12:25:22,546 INFO [train.py:1316] (1/2) Number of model parameters: 174319650 +2024-09-16 12:25:22,546 INFO [train.py:752] (1/2) num_frame_masks: 25.0, max_frames_mask_fraction: 0.375 +2024-09-16 12:25:24,271 INFO [train.py:1338] (1/2) Using DDP +2024-09-16 12:25:26,292 INFO [asr_datamodule.py:436] (1/2) About to get the shuffled train-clean-100, train-clean-360 and train-other-500 cuts +2024-09-16 12:25:26,295 INFO [asr_datamodule.py:232] (1/2) Enable MUSAN +2024-09-16 12:25:26,295 INFO [asr_datamodule.py:233] (1/2) About to get Musan cuts +2024-09-16 12:25:28,332 INFO [asr_datamodule.py:279] (1/2) Disable SpecAugment +2024-09-16 12:25:28,332 INFO [asr_datamodule.py:281] (1/2) About to create train dataset +2024-09-16 12:25:28,332 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler. +2024-09-16 12:25:29,137 INFO [asr_datamodule.py:325] (1/2) About to create train dataloader +2024-09-16 12:25:29,138 INFO [asr_datamodule.py:453] (1/2) About to get dev-clean cuts +2024-09-16 12:25:29,140 INFO [asr_datamodule.py:460] (1/2) About to get dev-other cuts +2024-09-16 12:25:29,141 INFO [asr_datamodule.py:356] (1/2) About to create dev dataset +2024-09-16 12:25:29,338 INFO [asr_datamodule.py:373] (1/2) About to create dev dataloader +2024-09-16 12:25:29,338 INFO [train.py:1545] (1/2) Sanity check -- see if any of the batches in epoch 1 would cause OOM. +2024-09-16 12:28:13,904 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 47142MB +2024-09-16 12:28:15,781 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 47224MB +2024-09-16 12:28:17,892 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 47710MB +2024-09-16 12:28:19,143 INFO [scaling.py:1024] (1/2) Whitening: name=None, num_groups=1, num_channels=512, metric=116.89 vs. limit=7.5 +2024-09-16 12:28:20,063 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 48343MB +2024-09-16 12:28:22,108 INFO [scaling.py:1024] (1/2) Whitening: name=None, num_groups=4, num_channels=128, metric=9.14 vs. limit=3.0 +2024-09-16 12:28:22,391 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 48343MB +2024-09-16 12:28:24,634 INFO [train.py:1576] (1/2) Maximum memory allocated so far is 48343MB +2024-09-16 12:28:54,377 INFO [train.py:1198] (1/2) Epoch 1, batch 0, loss[loss=8.202, ctc_loss=4.745, cr_loss=0.5637, attn_decoder_loss=8.573, over 29616.00 frames. ], tot_loss[loss=8.202, ctc_loss=4.745, cr_loss=0.5637, attn_decoder_loss=8.573, over 29616.00 frames. ], batch size: 73, lr: 2.25e-02, grad_scale: 2.0 +2024-09-16 12:28:54,378 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 12:29:13,592 INFO [train.py:1230] (1/2) Epoch 1, validation: loss=8.234, ctc_loss=4.87, cr_loss=1.182e-15, attn_decoder_loss=8.607, over 944034.00 frames. +2024-09-16 12:29:13,593 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 48575MB +2024-09-16 12:29:14,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.69 vs. limit=5.0 +2024-09-16 12:29:14,503 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.59 vs. limit=7.5 +2024-09-16 12:29:15,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=0.0, ans=0.1 +2024-09-16 12:29:17,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=0.0, ans=0.5 +2024-09-16 12:29:24,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=0.0, ans=0.5 +2024-09-16 12:29:31,005 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 2.329e+03 2.556e+03 3.012e+03 3.068e+03 4.530e+03, threshold=1.205e+04, percent-clipped=0.0 +2024-09-16 12:29:31,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=40.0, ans=0.498125 +2024-09-16 12:29:44,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=108.63 vs. limit=7.515 +2024-09-16 12:29:51,255 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.743e+03 2.098e+03 2.580e+03 3.037e+03 5.426e+03, threshold=1.032e+04, percent-clipped=0.0 +2024-09-16 12:29:57,810 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=15.37 vs. limit=7.53 +2024-09-16 12:30:05,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.19 vs. limit=4.032 +2024-09-16 12:30:15,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=120.0, ans=0.8958 +2024-09-16 12:30:25,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=120.0, ans=0.09925 +2024-09-16 12:30:28,473 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.091e+02 1.328e+03 1.895e+03 2.580e+03 5.426e+03, threshold=7.580e+03, percent-clipped=0.0 +2024-09-16 12:30:28,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=160.0, ans=0.20240000000000002 +2024-09-16 12:30:28,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=160.0, ans=0.0495 +2024-09-16 12:30:30,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=16.52 vs. limit=7.56 +2024-09-16 12:30:37,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=31.42 vs. limit=7.56 +2024-09-16 12:30:47,304 INFO [train.py:1198] (1/2) Epoch 1, batch 50, loss[loss=1.808, ctc_loss=1.139, cr_loss=0.1645, attn_decoder_loss=1.878, over 29441.00 frames. ], tot_loss[loss=3.656, ctc_loss=2.001, cr_loss=0.2576, attn_decoder_loss=3.834, over 1266883.16 frames. ], batch size: 70, lr: 2.48e-02, grad_scale: 2.0 +2024-09-16 12:30:57,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=200.0, ans=0.1925 +2024-09-16 12:31:00,007 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.04 vs. limit=3.03 +2024-09-16 12:31:07,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=24.84 vs. limit=7.59 +2024-09-16 12:31:10,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=240.0, ans=0.04925 +2024-09-16 12:31:11,182 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=20.91 vs. limit=7.59 +2024-09-16 12:31:11,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=29.65 vs. limit=7.68 +2024-09-16 12:31:12,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=240.0, ans=0.2476 +2024-09-16 12:31:18,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=194.75 vs. limit=7.59 +2024-09-16 12:31:26,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=19.49 vs. limit=5.07 +2024-09-16 12:31:26,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=20.56 vs. limit=5.07 +2024-09-16 12:31:31,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=280.0, ans=0.486875 +2024-09-16 12:31:36,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=14.97 vs. limit=7.605 +2024-09-16 12:31:47,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=24.93 vs. limit=7.74 +2024-09-16 12:32:00,597 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=23.07 vs. limit=7.62 +2024-09-16 12:32:02,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=25.85 vs. limit=7.77 +2024-09-16 12:32:04,321 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=12.43 vs. limit=5.18 +2024-09-16 12:32:05,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=360.0, ans=0.2464 +2024-09-16 12:32:09,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_na.min_abs, batch_count=360.0, ans=0.00544 +2024-09-16 12:32:11,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=360.0, ans=0.7536 +2024-09-16 12:32:18,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=360.0, ans=0.455 +2024-09-16 12:32:22,337 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 4.399e+02 6.271e+02 8.906e+02 1.633e+03 5.426e+03, threshold=1.781e+03, percent-clipped=0.0 +2024-09-16 12:32:22,360 INFO [train.py:1198] (1/2) Epoch 1, batch 100, loss[loss=1.164, ctc_loss=1.146, cr_loss=0.144, attn_decoder_loss=1.163, over 29531.00 frames. ], tot_loss[loss=2.448, ctc_loss=1.559, cr_loss=0.1868, attn_decoder_loss=2.543, over 2252420.33 frames. ], batch size: 76, lr: 2.70e-02, grad_scale: 4.0 +2024-09-16 12:32:29,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=25.52 vs. limit=7.8 +2024-09-16 12:32:34,388 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=69.45 vs. limit=5.2 +2024-09-16 12:32:40,227 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=14.56 vs. limit=5.22 +2024-09-16 12:32:47,747 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=45.77 vs. limit=5.22 +2024-09-16 12:32:57,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=440.0, ans=0.048625 +2024-09-16 12:32:57,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.84 vs. limit=7.665 +2024-09-16 12:33:01,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.79 vs. limit=7.86 +2024-09-16 12:33:02,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=124.81 vs. limit=5.24 +2024-09-16 12:33:13,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=480.0, ans=7.68 +2024-09-16 12:33:16,171 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=73.62 vs. limit=7.68 +2024-09-16 12:33:24,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=13.71 vs. limit=5.13 +2024-09-16 12:33:26,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=186.24 vs. limit=7.695 +2024-09-16 12:33:29,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.19 vs. limit=5.13 +2024-09-16 12:33:29,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=520.0, ans=7.695 +2024-09-16 12:33:35,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.46 vs. limit=7.89 +2024-09-16 12:33:42,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=560.0, ans=0.0965 +2024-09-16 12:33:43,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=11.76 vs. limit=5.14 +2024-09-16 12:33:48,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=560.0, ans=7.92 +2024-09-16 12:33:51,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=560.0, ans=0.2944 +2024-09-16 12:33:55,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=560.0, ans=0.43 +2024-09-16 12:33:55,369 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.40 vs. limit=4.224 +2024-09-16 12:33:57,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.46 vs. limit=5.15 +2024-09-16 12:33:58,496 INFO [train.py:1198] (1/2) Epoch 1, batch 150, loss[loss=0.9927, ctc_loss=1.104, cr_loss=0.1141, attn_decoder_loss=0.9778, over 29389.00 frames. ], tot_loss[loss=1.878, ctc_loss=1.396, cr_loss=0.1601, attn_decoder_loss=1.928, over 3047358.29 frames. ], batch size: 70, lr: 2.93e-02, grad_scale: 4.0 +2024-09-16 12:34:08,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=186.05 vs. limit=7.725 +2024-09-16 12:34:12,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=29.67 vs. limit=7.95 +2024-09-16 12:34:15,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.63 vs. limit=5.3 +2024-09-16 12:34:19,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.96 vs. limit=7.74 +2024-09-16 12:34:21,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=99.27 vs. limit=7.74 +2024-09-16 12:34:27,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=12.05 vs. limit=7.74 +2024-09-16 12:34:29,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=165.26 vs. limit=7.74 +2024-09-16 12:34:31,339 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=7.12 vs. limit=4.256 +2024-09-16 12:34:34,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=640.0, ans=0.048 +2024-09-16 12:34:42,176 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.95 vs. limit=8.01 +2024-09-16 12:34:42,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=26.97 vs. limit=7.755 +2024-09-16 12:34:47,800 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.16 vs. limit=5.17 +2024-09-16 12:34:47,985 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.48 vs. limit=8.01 +2024-09-16 12:34:51,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=33.57 vs. limit=7.755 +2024-09-16 12:34:51,949 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.86 vs. limit=8.01 +2024-09-16 12:35:03,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.06 vs. limit=8.04 +2024-09-16 12:35:07,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=720.0, ans=0.0838 +2024-09-16 12:35:07,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=720.0, ans=0.8748 +2024-09-16 12:35:10,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=28.19 vs. limit=8.04 +2024-09-16 12:35:20,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.01 vs. limit=8.07 +2024-09-16 12:35:22,339 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=146.22 vs. limit=7.785 +2024-09-16 12:35:33,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=12.84 vs. limit=7.785 +2024-09-16 12:35:36,603 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.861e+02 2.379e+02 2.686e+02 3.220e+02 5.129e+02, threshold=5.373e+02, percent-clipped=0.0 +2024-09-16 12:35:36,626 INFO [train.py:1198] (1/2) Epoch 1, batch 200, loss[loss=1.047, ctc_loss=1.222, cr_loss=0.1246, attn_decoder_loss=1.024, over 27198.00 frames. ], tot_loss[loss=1.577, ctc_loss=1.316, cr_loss=0.1467, attn_decoder_loss=1.603, over 3658843.15 frames. ], batch size: 124, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 12:35:39,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=23.60 vs. limit=7.8 +2024-09-16 12:35:45,327 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.95 vs. limit=5.2 +2024-09-16 12:35:47,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=17.84 vs. limit=7.8 +2024-09-16 12:35:49,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=136.17 vs. limit=5.4 +2024-09-16 12:36:02,610 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=16.04 vs. limit=5.21 +2024-09-16 12:36:07,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=98.50 vs. limit=7.815 +2024-09-16 12:36:09,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=54.95 vs. limit=7.815 +2024-09-16 12:36:15,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=93.61 vs. limit=5.44 +2024-09-16 12:36:19,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.77 vs. limit=4.352 +2024-09-16 12:36:30,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=181.72 vs. limit=7.83 +2024-09-16 12:36:34,559 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=98.41 vs. limit=7.845 +2024-09-16 12:36:42,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=42.86 vs. limit=7.845 +2024-09-16 12:36:44,111 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.94 vs. limit=8.19 +2024-09-16 12:36:49,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=19.54 vs. limit=7.845 +2024-09-16 12:36:52,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.12 vs. limit=8.22 +2024-09-16 12:36:55,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=27.69 vs. limit=7.86 +2024-09-16 12:36:59,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.88 vs. limit=5.24 +2024-09-16 12:37:11,721 INFO [train.py:1198] (1/2) Epoch 1, batch 250, loss[loss=1.036, ctc_loss=1.222, cr_loss=0.1226, attn_decoder_loss=1.013, over 29354.00 frames. ], tot_loss[loss=1.397, ctc_loss=1.271, cr_loss=0.1401, attn_decoder_loss=1.408, over 4140934.70 frames. ], batch size: 100, lr: 3.38e-02, grad_scale: 8.0 +2024-09-16 12:37:12,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=7.70 vs. limit=4.4 +2024-09-16 12:37:14,686 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.93 vs. limit=7.875 +2024-09-16 12:37:20,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=56.59 vs. limit=5.5 +2024-09-16 12:37:25,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.06 vs. limit=8.25 +2024-09-16 12:37:35,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.69 vs. limit=8.28 +2024-09-16 12:37:42,580 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=22.58 vs. limit=5.52 +2024-09-16 12:37:46,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.39 vs. limit=7.89 +2024-09-16 12:37:54,079 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=22.02 vs. limit=7.905 +2024-09-16 12:38:01,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.68 vs. limit=3.162 +2024-09-16 12:38:07,193 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=64.20 vs. limit=7.905 +2024-09-16 12:38:09,073 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=111.23 vs. limit=7.92 +2024-09-16 12:38:14,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.11 vs. limit=8.34 +2024-09-16 12:38:15,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=1120.0, ans=0.4475 +2024-09-16 12:38:15,749 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=42.19 vs. limit=8.34 +2024-09-16 12:38:19,414 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=56.07 vs. limit=7.92 +2024-09-16 12:38:24,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=1120.0, ans=0.4475 +2024-09-16 12:38:25,633 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.66 vs. limit=8.34 +2024-09-16 12:38:43,602 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=117.70 vs. limit=7.935 +2024-09-16 12:38:45,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=182.69 vs. limit=7.935 +2024-09-16 12:38:48,400 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.367e+02 1.660e+02 1.791e+02 1.982e+02 5.267e+02, threshold=3.582e+02, percent-clipped=0.0 +2024-09-16 12:38:48,422 INFO [train.py:1198] (1/2) Epoch 1, batch 300, loss[loss=0.9973, ctc_loss=1.216, cr_loss=0.1478, attn_decoder_loss=0.9697, over 29561.00 frames. ], tot_loss[loss=1.273, ctc_loss=1.239, cr_loss=0.1387, attn_decoder_loss=1.273, over 4509441.95 frames. ], batch size: 92, lr: 3.60e-02, grad_scale: 8.0 +2024-09-16 12:38:51,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=79.84 vs. limit=7.95 +2024-09-16 12:38:55,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=53.69 vs. limit=7.95 +2024-09-16 12:38:59,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.86 vs. limit=5.3 +2024-09-16 12:39:03,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.10 vs. limit=5.3 +2024-09-16 12:39:05,266 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=8.95 vs. limit=8.4 +2024-09-16 12:39:16,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.09 vs. limit=7.965 +2024-09-16 12:39:22,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=1240.0, ans=0.21860000000000002 +2024-09-16 12:39:22,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=1240.0, ans=0.441875 +2024-09-16 12:39:26,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=1280.0, ans=0.178 +2024-09-16 12:39:41,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=1280.0, ans=0.2872 +2024-09-16 12:39:42,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=22.55 vs. limit=7.98 +2024-09-16 12:39:57,144 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=17.48 vs. limit=7.995 +2024-09-16 12:40:01,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=1320.0, ans=0.438125 +2024-09-16 12:40:02,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.82 vs. limit=8.49 +2024-09-16 12:40:08,972 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=32.31 vs. limit=8.01 +2024-09-16 12:40:09,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=1360.0, ans=0.8524 +2024-09-16 12:40:14,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=8.01 +2024-09-16 12:40:16,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.03 vs. limit=8.52 +2024-09-16 12:40:24,353 INFO [train.py:1198] (1/2) Epoch 1, batch 350, loss[loss=0.8701, ctc_loss=1.057, cr_loss=0.173, attn_decoder_loss=0.8455, over 29365.00 frames. ], tot_loss[loss=1.186, ctc_loss=1.217, cr_loss=0.1438, attn_decoder_loss=1.18, over 4794592.24 frames. ], batch size: 71, lr: 3.83e-02, grad_scale: 8.0 +2024-09-16 12:40:24,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=1400.0, ans=0.5 +2024-09-16 12:40:33,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=1400.0, ans=0.434375 +2024-09-16 12:40:38,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=14.83 vs. limit=5.35 +2024-09-16 12:40:54,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.72 vs. limit=8.04 +2024-09-16 12:40:55,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=155.97 vs. limit=8.04 +2024-09-16 12:41:04,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.44 vs. limit=8.61 +2024-09-16 12:41:05,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=1480.0, ans=0.09075000000000001 +2024-09-16 12:41:10,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.31 vs. limit=8.055 +2024-09-16 12:41:22,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=1520.0, ans=0.42875 +2024-09-16 12:41:39,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=1560.0, ans=0.22340000000000002 +2024-09-16 12:41:39,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.62 vs. limit=8.085 +2024-09-16 12:41:42,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=1560.0, ans=0.06490000000000001 +2024-09-16 12:41:45,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.97 vs. limit=8.085 +2024-09-16 12:41:47,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.19 vs. limit=8.67 +2024-09-16 12:41:54,500 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=25.85 vs. limit=8.085 +2024-09-16 12:41:57,757 INFO [train.py:1198] (1/2) Epoch 1, batch 400, loss[loss=0.939, ctc_loss=1.148, cr_loss=0.2013, attn_decoder_loss=0.9113, over 29723.00 frames. ], tot_loss[loss=1.118, ctc_loss=1.196, cr_loss=0.156, attn_decoder_loss=1.106, over 5024260.41 frames. ], batch size: 82, lr: 4.05e-02, grad_scale: 8.0 +2024-09-16 12:41:59,565 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.379e+02 1.617e+02 1.838e+02 2.123e+02 1.289e+03, threshold=3.677e+02, percent-clipped=4.0 +2024-09-16 12:42:14,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=1600.0, ans=0.09000000000000001 +2024-09-16 12:42:15,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.38 vs. limit=8.7 +2024-09-16 12:42:16,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=1640.0, ans=0.423125 +2024-09-16 12:42:26,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=1640.0, ans=0.1385 +2024-09-16 12:42:36,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=14.33 vs. limit=8.76 +2024-09-16 12:42:36,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=1680.0, ans=0.0622 +2024-09-16 12:42:41,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=20.29 vs. limit=8.13 +2024-09-16 12:42:50,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=13.13 vs. limit=8.13 +2024-09-16 12:42:57,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=1720.0, ans=6.075 +2024-09-16 12:42:59,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=6.25 vs. limit=4.688 +2024-09-16 12:43:03,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=9.12 vs. limit=8.145 +2024-09-16 12:43:29,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=45.74 vs. limit=8.16 +2024-09-16 12:43:33,846 INFO [train.py:1198] (1/2) Epoch 1, batch 450, loss[loss=0.9004, ctc_loss=1.122, cr_loss=0.281, attn_decoder_loss=0.8696, over 29687.00 frames. ], tot_loss[loss=1.065, ctc_loss=1.174, cr_loss=0.1714, attn_decoder_loss=1.049, over 5187486.67 frames. ], batch size: 83, lr: 4.28e-02, grad_scale: 8.0 +2024-09-16 12:43:41,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=1800.0, ans=0.282 +2024-09-16 12:44:00,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=31.25 vs. limit=8.19 +2024-09-16 12:44:03,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=1840.0, ans=0.41375 +2024-09-16 12:44:04,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.98 vs. limit=8.879999999999999 +2024-09-16 12:44:09,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.98 vs. limit=8.91 +2024-09-16 12:44:14,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=1880.0, ans=0.14425 +2024-09-16 12:44:16,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=1880.0, ans=6.175 +2024-09-16 12:44:29,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=101.12 vs. limit=8.22 +2024-09-16 12:44:34,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=1920.0, ans=0.26 +2024-09-16 12:44:44,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.86 vs. limit=8.94 +2024-09-16 12:44:51,176 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=61.13 vs. limit=8.235 +2024-09-16 12:44:58,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=17.68 vs. limit=8.235 +2024-09-16 12:45:01,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=1960.0, ans=0.2804 +2024-09-16 12:45:03,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.76 vs. limit=9.0 +2024-09-16 12:45:04,717 INFO [train.py:1198] (1/2) Epoch 1, batch 500, loss[loss=0.9344, ctc_loss=1.145, cr_loss=0.2492, attn_decoder_loss=0.9055, over 29440.00 frames. ], tot_loss[loss=1.021, ctc_loss=1.152, cr_loss=0.1904, attn_decoder_loss=1.003, over 5329203.13 frames. ], batch size: 94, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:45:06,532 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.448e+02 1.612e+02 2.007e+02 3.487e+02, threshold=3.225e+02, percent-clipped=0.0 +2024-09-16 12:45:11,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.58 vs. limit=8.25 +2024-09-16 12:45:11,691 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=19.74 vs. limit=8.25 +2024-09-16 12:45:13,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.10 vs. limit=5.5 +2024-09-16 12:45:17,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=4.77 vs. limit=4.4 +2024-09-16 12:45:30,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=2040.0, ans=0.404375 +2024-09-16 12:45:42,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.63 vs. limit=8.28 +2024-09-16 12:45:43,847 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=23.93 vs. limit=8.28 +2024-09-16 12:45:44,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.74 vs. limit=8.28 +2024-09-16 12:45:45,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=36.98 vs. limit=8.28 +2024-09-16 12:45:53,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=75.23 vs. limit=8.28 +2024-09-16 12:46:10,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=2120.0, ans=0.1205 +2024-09-16 12:46:13,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=20.05 vs. limit=8.295 +2024-09-16 12:46:19,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=2160.0, ans=0.39875 +2024-09-16 12:46:20,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.80 vs. limit=5.54 +2024-09-16 12:46:25,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=2160.0, ans=0.39875 +2024-09-16 12:46:31,257 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=32.70 vs. limit=8.31 +2024-09-16 12:46:34,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=77.55 vs. limit=8.325 +2024-09-16 12:46:35,772 INFO [train.py:1198] (1/2) Epoch 1, batch 550, loss[loss=0.8929, ctc_loss=1.072, cr_loss=0.3298, attn_decoder_loss=0.8657, over 28897.00 frames. ], tot_loss[loss=0.9889, ctc_loss=1.13, cr_loss=0.211, attn_decoder_loss=0.9685, over 5422479.21 frames. ], batch size: 104, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:46:38,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=2200.0, ans=0.22499999999999998 +2024-09-16 12:46:44,056 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=29.44 vs. limit=8.325 +2024-09-16 12:46:46,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=20.21 vs. limit=8.325 +2024-09-16 12:46:51,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=2200.0, ans=0.396875 +2024-09-16 12:46:59,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=2240.0, ans=8.34 +2024-09-16 12:47:26,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=2280.0, ans=0.2772 +2024-09-16 12:47:34,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=13.56 vs. limit=9.24 +2024-09-16 12:47:43,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=13.60 vs. limit=9.24 +2024-09-16 12:47:52,592 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.83 vs. limit=9.27 +2024-09-16 12:48:03,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.09 vs. limit=4.944 +2024-09-16 12:48:04,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=2360.0, ans=0.2764 +2024-09-16 12:48:05,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.21 vs. limit=8.385 +2024-09-16 12:48:12,274 INFO [train.py:1198] (1/2) Epoch 1, batch 600, loss[loss=0.9101, ctc_loss=1.052, cr_loss=0.3234, attn_decoder_loss=0.8871, over 29327.00 frames. ], tot_loss[loss=0.9609, ctc_loss=1.105, cr_loss=0.2342, attn_decoder_loss=0.9397, over 5509506.32 frames. ], batch size: 100, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:48:12,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=2400.0, ans=0.3875 +2024-09-16 12:48:14,063 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.060e+02 1.236e+02 1.461e+02 1.874e+02 1.065e+03, threshold=2.921e+02, percent-clipped=6.0 +2024-09-16 12:48:21,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=2400.0, ans=0.046 +2024-09-16 12:48:31,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=28.44 vs. limit=8.415 +2024-09-16 12:48:39,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=2440.0, ans=0.385625 +2024-09-16 12:48:44,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.00 vs. limit=8.415 +2024-09-16 12:48:48,729 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.37 vs. limit=4.992 +2024-09-16 12:48:48,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.62 vs. limit=8.43 +2024-09-16 12:48:49,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=2480.0, ans=8.43 +2024-09-16 12:49:03,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.07 vs. limit=8.43 +2024-09-16 12:49:03,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.97 vs. limit=8.43 +2024-09-16 12:49:04,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.13 vs. limit=9.39 +2024-09-16 12:49:10,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=16.83 vs. limit=8.445 +2024-09-16 12:49:16,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=2520.0, ans=0.1055 +2024-09-16 12:49:19,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.28 vs. limit=9.39 +2024-09-16 12:49:19,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.82 vs. limit=8.445 +2024-09-16 12:49:29,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=2560.0, ans=0.38 +2024-09-16 12:49:34,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=2560.0, ans=0.38 +2024-09-16 12:49:38,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.98 vs. limit=5.024 +2024-09-16 12:49:41,293 INFO [train.py:1198] (1/2) Epoch 1, batch 650, loss[loss=0.8594, ctc_loss=0.9786, cr_loss=0.3531, attn_decoder_loss=0.8383, over 29748.00 frames. ], tot_loss[loss=0.932, ctc_loss=1.071, cr_loss=0.2569, attn_decoder_loss=0.9108, over 5586870.07 frames. ], batch size: 81, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:49:46,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=2600.0, ans=0.809 +2024-09-16 12:49:53,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.12 vs. limit=9.45 +2024-09-16 12:50:08,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.12 vs. limit=6.32 +2024-09-16 12:50:10,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=5.056 +2024-09-16 12:50:12,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=44.71 vs. limit=8.49 +2024-09-16 12:50:14,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=3.95 vs. limit=4.5280000000000005 +2024-09-16 12:50:18,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=2680.0, ans=0.374375 +2024-09-16 12:50:26,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=2680.0, ans=0.374375 +2024-09-16 12:50:27,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=2680.0, ans=0.374375 +2024-09-16 12:50:29,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=2680.0, ans=0.09949999999999999 +2024-09-16 12:50:39,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.18 vs. limit=8.52 +2024-09-16 12:50:54,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.75 vs. limit=9.57 +2024-09-16 12:51:06,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=11.80 vs. limit=9.57 +2024-09-16 12:51:06,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.52 vs. limit=9.57 +2024-09-16 12:51:08,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=5.77 vs. limit=4.552 +2024-09-16 12:51:12,610 INFO [train.py:1198] (1/2) Epoch 1, batch 700, loss[loss=0.7594, ctc_loss=0.8418, cr_loss=0.327, attn_decoder_loss=0.743, over 29566.00 frames. ], tot_loss[loss=0.9094, ctc_loss=1.043, cr_loss=0.2789, attn_decoder_loss=0.8884, over 5637769.27 frames. ], batch size: 76, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:51:14,379 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.893e+01 1.304e+02 1.539e+02 2.330e+02 9.417e+02, threshold=3.077e+02, percent-clipped=6.0 +2024-09-16 12:51:17,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.20 vs. limit=8.55 +2024-09-16 12:51:30,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=2840.0, ans=0.2216 +2024-09-16 12:51:35,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.47 vs. limit=5.71 +2024-09-16 12:51:39,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.34 vs. limit=9.629999999999999 +2024-09-16 12:51:49,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=18.12 vs. limit=8.58 +2024-09-16 12:51:52,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.00 vs. limit=8.58 +2024-09-16 12:51:57,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=2880.0, ans=0.365 +2024-09-16 12:52:05,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=2920.0, ans=0.36312500000000003 +2024-09-16 12:52:10,289 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.53 vs. limit=8.595 +2024-09-16 12:52:18,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=2920.0, ans=0.36312500000000003 +2024-09-16 12:52:18,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.12 vs. limit=8.595 +2024-09-16 12:52:22,217 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.82 vs. limit=8.595 +2024-09-16 12:52:28,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.33 vs. limit=5.74 +2024-09-16 12:52:33,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=2960.0, ans=0.13 +2024-09-16 12:52:41,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.64 vs. limit=9.72 +2024-09-16 12:52:43,662 INFO [train.py:1198] (1/2) Epoch 1, batch 750, loss[loss=0.7579, ctc_loss=0.8974, cr_loss=0.3135, attn_decoder_loss=0.7355, over 29698.00 frames. ], tot_loss[loss=0.8801, ctc_loss=1.01, cr_loss=0.2922, attn_decoder_loss=0.8591, over 5676368.83 frames. ], batch size: 82, lr: 4.49e-02, grad_scale: 8.0 +2024-09-16 12:52:44,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=6.51 vs. limit=5.75 +2024-09-16 12:52:44,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=11.26 vs. limit=9.75 +2024-09-16 12:52:49,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=3000.0, ans=0.245 +2024-09-16 12:52:51,828 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.29 vs. limit=8.625 +2024-09-16 12:53:00,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=3000.0, ans=0.040625 +2024-09-16 12:53:22,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=3080.0, ans=0.08449999999999999 +2024-09-16 12:53:43,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=21.32 vs. limit=6.5600000000000005 +2024-09-16 12:53:44,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.79 vs. limit=3.468 +2024-09-16 12:53:49,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=3120.0, ans=0.2688 +2024-09-16 12:53:50,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.71 vs. limit=6.5600000000000005 +2024-09-16 12:53:55,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=10.68 vs. limit=9.870000000000001 +2024-09-16 12:54:12,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.50 vs. limit=5.8 +2024-09-16 12:54:14,212 INFO [train.py:1198] (1/2) Epoch 1, batch 800, loss[loss=0.6653, ctc_loss=0.7983, cr_loss=0.3214, attn_decoder_loss=0.6434, over 29608.00 frames. ], tot_loss[loss=0.8458, ctc_loss=0.9755, cr_loss=0.3007, attn_decoder_loss=0.8247, over 5706530.28 frames. ], batch size: 73, lr: 4.49e-02, grad_scale: 16.0 +2024-09-16 12:54:15,975 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.656e+02 2.537e+02 3.189e+02 4.432e+02 8.958e+02, threshold=6.378e+02, percent-clipped=52.0 +2024-09-16 12:54:25,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.72 vs. limit=8.7 +2024-09-16 12:54:42,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.55 vs. limit=8.715 +2024-09-16 12:54:47,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=3280.0, ans=0.2172 +2024-09-16 12:54:51,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=3280.0, ans=0.34625 +2024-09-16 12:54:54,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=3280.0, ans=0.2672 +2024-09-16 12:54:58,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=3280.0, ans=0.34625 +2024-09-16 12:55:38,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=3360.0, ans=0.3425 +2024-09-16 12:55:40,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_na.min_abs, batch_count=3360.0, ans=0.017439999999999997 +2024-09-16 12:55:41,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=3400.0, ans=0.340625 +2024-09-16 12:55:43,347 INFO [train.py:1198] (1/2) Epoch 1, batch 850, loss[loss=0.7134, ctc_loss=0.8627, cr_loss=0.3776, attn_decoder_loss=0.6884, over 29710.00 frames. ], tot_loss[loss=0.8067, ctc_loss=0.9395, cr_loss=0.3063, attn_decoder_loss=0.7852, over 5736035.44 frames. ], batch size: 89, lr: 4.49e-02, grad_scale: 16.0 +2024-09-16 12:55:51,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=50.71 vs. limit=10.05 +2024-09-16 12:55:58,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.97 vs. limit=10.05 +2024-09-16 12:56:00,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=3440.0, ans=0.2656 +2024-09-16 12:56:07,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=3440.0, ans=0.022599999999999995 +2024-09-16 12:56:09,886 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.78 vs. limit=6.72 +2024-09-16 12:56:18,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.78 vs. limit=10.11 +2024-09-16 12:56:22,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.71 vs. limit=8.805 +2024-09-16 12:56:24,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=10.11 +2024-09-16 12:56:27,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.10 vs. limit=10.11 +2024-09-16 12:56:30,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.31 vs. limit=3.5220000000000002 +2024-09-16 12:56:30,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.92 vs. limit=10.11 +2024-09-16 12:56:52,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=3560.0, ans=0.333125 +2024-09-16 12:57:07,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=3600.0, ans=0.03875000000000001 +2024-09-16 12:57:08,688 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.27 vs. limit=5.9 +2024-09-16 12:57:08,916 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.30 vs. limit=5.9 +2024-09-16 12:57:11,500 INFO [train.py:1198] (1/2) Epoch 1, batch 900, loss[loss=0.5746, ctc_loss=0.7215, cr_loss=0.3425, attn_decoder_loss=0.5507, over 29594.00 frames. ], tot_loss[loss=0.7685, ctc_loss=0.9048, cr_loss=0.3125, attn_decoder_loss=0.7464, over 5741083.57 frames. ], batch size: 73, lr: 4.48e-02, grad_scale: 16.0 +2024-09-16 12:57:13,150 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.606e+02 2.694e+02 3.422e+02 4.565e+02 1.517e+03, threshold=6.845e+02, percent-clipped=7.0 +2024-09-16 12:57:15,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=3600.0, ans=0.33125 +2024-09-16 12:57:20,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=3600.0, ans=0.214 +2024-09-16 12:57:28,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=3640.0, ans=0.329375 +2024-09-16 12:57:36,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=8.26 vs. limit=8.865 +2024-09-16 12:57:55,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.92 vs. limit=8.879999999999999 +2024-09-16 12:58:17,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.22 vs. limit=8.895 +2024-09-16 12:58:23,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.14 vs. limit=8.91 +2024-09-16 12:58:26,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=3760.0, ans=0.015399999999999997 +2024-09-16 12:58:31,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=3760.0, ans=0.015399999999999997 +2024-09-16 12:58:36,753 INFO [train.py:1198] (1/2) Epoch 1, batch 950, loss[loss=0.5325, ctc_loss=0.6755, cr_loss=0.3531, attn_decoder_loss=0.5088, over 29511.00 frames. ], tot_loss[loss=0.7311, ctc_loss=0.8712, cr_loss=0.318, attn_decoder_loss=0.7085, over 5742693.85 frames. ], batch size: 74, lr: 4.48e-02, grad_scale: 16.0 +2024-09-16 12:58:42,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=93.57 vs. limit=10.35 +2024-09-16 12:58:43,155 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.90 vs. limit=8.925 +2024-09-16 12:58:45,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=3800.0, ans=0.014499999999999985 +2024-09-16 12:58:49,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=3800.0, ans=0.014499999999999985 +2024-09-16 12:58:53,490 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.70 vs. limit=10.379999999999999 +2024-09-16 12:59:03,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.92 vs. limit=5.96 +2024-09-16 12:59:12,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=3880.0, ans=0.7642 +2024-09-16 12:59:52,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=3960.0, ans=0.0050000000000000044 +2024-09-16 12:59:54,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=3960.0, ans=0.31437499999999996 +2024-09-16 13:00:00,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=3960.0, ans=0.16337000000000002 +2024-09-16 13:00:04,837 INFO [train.py:1198] (1/2) Epoch 1, batch 1000, loss[loss=0.5231, ctc_loss=0.6536, cr_loss=0.3227, attn_decoder_loss=0.5015, over 29487.00 frames. ], tot_loss[loss=0.6962, ctc_loss=0.8375, cr_loss=0.3274, attn_decoder_loss=0.6732, over 5736289.50 frames. ], batch size: 77, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:00:08,131 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.514e+02 2.266e+02 2.878e+02 3.816e+02 1.272e+03, threshold=5.756e+02, percent-clipped=5.0 +2024-09-16 13:00:17,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.28 vs. limit=3.6 +2024-09-16 13:00:29,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.07 vs. limit=9.015 +2024-09-16 13:00:33,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=9.97 vs. limit=9.015 +2024-09-16 13:00:51,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=4080.0, ans=0.037250000000000005 +2024-09-16 13:00:52,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=4080.0, ans=0.009982608695652173 +2024-09-16 13:00:56,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=4120.0, ans=0.025 +2024-09-16 13:00:57,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.97 vs. limit=9.045 +2024-09-16 13:01:04,549 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:01:16,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=4160.0, ans=0.07400000000000001 +2024-09-16 13:01:21,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=4160.0, ans=0.7544 +2024-09-16 13:01:31,753 INFO [train.py:1198] (1/2) Epoch 1, batch 1050, loss[loss=0.5569, ctc_loss=0.7004, cr_loss=0.3932, attn_decoder_loss=0.5322, over 29692.00 frames. ], tot_loss[loss=0.6601, ctc_loss=0.8003, cr_loss=0.3366, attn_decoder_loss=0.637, over 5744605.50 frames. ], batch size: 85, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:01:49,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=4240.0, ans=0.7516 +2024-09-16 13:02:08,660 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.13 vs. limit=7.140000000000001 +2024-09-16 13:02:18,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.25 vs. limit=9.105 +2024-09-16 13:02:22,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=4320.0, ans=0.7488 +2024-09-16 13:02:29,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=4320.0, ans=0.2975 +2024-09-16 13:02:30,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=12.68 vs. limit=10.74 +2024-09-16 13:02:34,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=4320.0, ans=0.7488 +2024-09-16 13:02:37,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=4360.0, ans=0.0485 +2024-09-16 13:02:55,799 INFO [train.py:1198] (1/2) Epoch 1, batch 1100, loss[loss=0.5542, ctc_loss=0.6697, cr_loss=0.3214, attn_decoder_loss=0.5342, over 29456.00 frames. ], tot_loss[loss=0.6286, ctc_loss=0.7654, cr_loss=0.3465, attn_decoder_loss=0.6056, over 5757496.03 frames. ], batch size: 78, lr: 4.48e-02, grad_scale: 8.0 +2024-09-16 13:02:59,004 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.372e+02 1.990e+02 2.415e+02 3.242e+02 8.137e+02, threshold=4.830e+02, percent-clipped=5.0 +2024-09-16 13:03:06,803 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.68 vs. limit=10.8 +2024-09-16 13:03:20,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=55.49 vs. limit=10.83 +2024-09-16 13:03:31,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=107.79 vs. limit=10.86 +2024-09-16 13:03:41,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=4480.0, ans=0.009895652173913043 +2024-09-16 13:03:45,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.09 vs. limit=6.13 +2024-09-16 13:03:55,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=8.30 vs. limit=9.195 +2024-09-16 13:03:59,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.52 vs. limit=7.26 +2024-09-16 13:04:21,895 INFO [train.py:1198] (1/2) Epoch 1, batch 1150, loss[loss=0.4815, ctc_loss=0.5987, cr_loss=0.3819, attn_decoder_loss=0.46, over 29444.00 frames. ], tot_loss[loss=0.6012, ctc_loss=0.7336, cr_loss=0.3545, attn_decoder_loss=0.5786, over 5754175.58 frames. ], batch size: 78, lr: 4.47e-02, grad_scale: 8.0 +2024-09-16 13:04:24,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=4600.0, ans=0.009869565217391305 +2024-09-16 13:04:40,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=4640.0, ans=0.04733333333333334 +2024-09-16 13:04:41,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.98 vs. limit=6.16 +2024-09-16 13:05:01,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=13.43 vs. limit=11.01 +2024-09-16 13:05:03,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.72 vs. limit=11.01 +2024-09-16 13:05:06,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=11.57 vs. limit=11.01 +2024-09-16 13:05:20,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.95 vs. limit=5.888 +2024-09-16 13:05:47,981 INFO [train.py:1198] (1/2) Epoch 1, batch 1200, loss[loss=0.5262, ctc_loss=0.6367, cr_loss=0.37, attn_decoder_loss=0.5057, over 29691.00 frames. ], tot_loss[loss=0.5784, ctc_loss=0.7061, cr_loss=0.3629, attn_decoder_loss=0.5561, over 5746542.15 frames. ], batch size: 85, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:05:51,298 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.465e+02 1.904e+02 2.227e+02 2.860e+02 9.470e+02, threshold=4.454e+02, percent-clipped=3.0 +2024-09-16 13:06:05,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.28 vs. limit=11.129999999999999 +2024-09-16 13:06:13,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=4840.0, ans=0.273125 +2024-09-16 13:06:27,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=4880.0, ans=7.4399999999999995 +2024-09-16 13:06:28,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=4880.0, ans=0.009808695652173913 +2024-09-16 13:06:33,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff2.min_abs, batch_count=4880.0, ans=0.1 +2024-09-16 13:06:35,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.15 vs. limit=9.33 +2024-09-16 13:06:55,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=4960.0, ans=6.24 +2024-09-16 13:06:58,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.98 vs. limit=9.36 +2024-09-16 13:07:07,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=4960.0, ans=0.26749999999999996 +2024-09-16 13:07:10,530 INFO [train.py:1198] (1/2) Epoch 1, batch 1250, loss[loss=0.5045, ctc_loss=0.6007, cr_loss=0.4467, attn_decoder_loss=0.4839, over 29495.00 frames. ], tot_loss[loss=0.5576, ctc_loss=0.6793, cr_loss=0.3722, attn_decoder_loss=0.5358, over 5774089.14 frames. ], batch size: 92, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:07:18,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.79 vs. limit=11.25 +2024-09-16 13:07:24,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=5000.0, ans=0.265625 +2024-09-16 13:07:35,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=5040.0, ans=0.26375000000000004 +2024-09-16 13:07:40,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=5040.0, ans=0.2496 +2024-09-16 13:07:44,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.41 vs. limit=9.405 +2024-09-16 13:07:45,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=5080.0, ans=0.2762 +2024-09-16 13:07:45,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=5080.0, ans=11.31 +2024-09-16 13:07:59,783 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.27 vs. limit=6.27 +2024-09-16 13:08:05,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=5120.0, ans=0.26 +2024-09-16 13:08:08,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=5120.0, ans=0.04533333333333334 +2024-09-16 13:08:31,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=5160.0, ans=0.258125 +2024-09-16 13:08:34,868 INFO [train.py:1198] (1/2) Epoch 1, batch 1300, loss[loss=0.4924, ctc_loss=0.5938, cr_loss=0.4348, attn_decoder_loss=0.4714, over 28309.00 frames. ], tot_loss[loss=0.5372, ctc_loss=0.6517, cr_loss=0.378, attn_decoder_loss=0.5161, over 5778963.42 frames. ], batch size: 111, lr: 4.47e-02, grad_scale: 16.0 +2024-09-16 13:08:37,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.22 vs. limit=6.3 +2024-09-16 13:08:38,070 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.446e+02 1.796e+02 2.066e+02 2.551e+02 7.251e+02, threshold=4.131e+02, percent-clipped=4.0 +2024-09-16 13:08:42,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.40 vs. limit=6.3 +2024-09-16 13:09:18,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=5280.0, ans=0.2525 +2024-09-16 13:09:19,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=5280.0, ans=0.2525 +2024-09-16 13:09:44,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=5360.0, ans=0.24875000000000003 +2024-09-16 13:09:45,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=5360.0, ans=0.044333333333333336 +2024-09-16 13:09:59,473 INFO [train.py:1198] (1/2) Epoch 1, batch 1350, loss[loss=0.4423, ctc_loss=0.5136, cr_loss=0.413, attn_decoder_loss=0.4252, over 29768.00 frames. ], tot_loss[loss=0.5193, ctc_loss=0.627, cr_loss=0.3838, attn_decoder_loss=0.4988, over 5795618.54 frames. ], batch size: 81, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:10:02,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=5400.0, ans=0.04416666666666667 +2024-09-16 13:10:03,295 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.40 vs. limit=11.55 +2024-09-16 13:10:06,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.73 vs. limit=7.7 +2024-09-16 13:10:16,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=11.65 vs. limit=11.58 +2024-09-16 13:10:53,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=5520.0, ans=0.04949747468305833 +2024-09-16 13:10:56,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=5520.0, ans=0.24125000000000002 +2024-09-16 13:10:59,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=5520.0, ans=0.7068000000000001 +2024-09-16 13:11:08,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.78 vs. limit=3.834 +2024-09-16 13:11:21,352 INFO [train.py:1198] (1/2) Epoch 1, batch 1400, loss[loss=0.4029, ctc_loss=0.4551, cr_loss=0.3809, attn_decoder_loss=0.3887, over 29609.00 frames. ], tot_loss[loss=0.5048, ctc_loss=0.6054, cr_loss=0.3899, attn_decoder_loss=0.4849, over 5806538.94 frames. ], batch size: 69, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:11:24,540 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.351e+02 1.700e+02 1.984e+02 2.487e+02 6.195e+02, threshold=3.968e+02, percent-clipped=5.0 +2024-09-16 13:11:27,109 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=12.32 vs. limit=11.7 +2024-09-16 13:11:34,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=5600.0, ans=0.8059999999999999 +2024-09-16 13:11:34,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=5600.0, ans=0.025 +2024-09-16 13:11:42,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=5640.0, ans=0.23562499999999997 +2024-09-16 13:11:44,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=5640.0, ans=0.23562499999999997 +2024-09-16 13:11:45,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=5640.0, ans=0.04316666666666667 +2024-09-16 13:11:46,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.02 vs. limit=9.615 +2024-09-16 13:11:57,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=5680.0, ans=9.629999999999999 +2024-09-16 13:12:05,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=5680.0, ans=0.23375 +2024-09-16 13:12:37,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=5760.0, ans=0.2424 +2024-09-16 13:12:42,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=5800.0, ans=0.22812500000000002 +2024-09-16 13:12:44,063 INFO [train.py:1198] (1/2) Epoch 1, batch 1450, loss[loss=0.4753, ctc_loss=0.5469, cr_loss=0.4441, attn_decoder_loss=0.4575, over 29433.00 frames. ], tot_loss[loss=0.4929, ctc_loss=0.5869, cr_loss=0.3951, attn_decoder_loss=0.4736, over 5805067.71 frames. ], batch size: 94, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:12:50,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=5800.0, ans=0.06375 +2024-09-16 13:13:18,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.52 vs. limit=11.91 +2024-09-16 13:13:42,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.83 vs. limit=9.72 +2024-09-16 13:14:06,651 INFO [train.py:1198] (1/2) Epoch 1, batch 1500, loss[loss=0.4375, ctc_loss=0.4964, cr_loss=0.3905, attn_decoder_loss=0.4223, over 29636.00 frames. ], tot_loss[loss=0.4822, ctc_loss=0.5701, cr_loss=0.4005, attn_decoder_loss=0.4636, over 5805817.46 frames. ], batch size: 86, lr: 4.46e-02, grad_scale: 16.0 +2024-09-16 13:14:09,809 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.657e+02 1.840e+02 2.318e+02 6.248e+02, threshold=3.680e+02, percent-clipped=4.0 +2024-09-16 13:14:25,059 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.55 vs. limit=9.765 +2024-09-16 13:14:27,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=6040.0, ans=0.21687499999999998 +2024-09-16 13:14:28,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.68 vs. limit=6.416 +2024-09-16 13:14:32,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=6040.0, ans=12.030000000000001 +2024-09-16 13:14:43,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.61 vs. limit=6.52 +2024-09-16 13:14:58,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.67 vs. limit=3.918 +2024-09-16 13:15:05,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=6120.0, ans=0.6858 +2024-09-16 13:15:12,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=6160.0, ans=0.2384 +2024-09-16 13:15:20,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=6160.0, ans=0.21125 +2024-09-16 13:15:27,846 INFO [train.py:1198] (1/2) Epoch 1, batch 1550, loss[loss=0.4431, ctc_loss=0.5033, cr_loss=0.4189, attn_decoder_loss=0.4271, over 29487.00 frames. ], tot_loss[loss=0.4731, ctc_loss=0.5551, cr_loss=0.4039, attn_decoder_loss=0.455, over 5781683.42 frames. ], batch size: 90, lr: 4.45e-02, grad_scale: 16.0 +2024-09-16 13:15:44,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=6240.0, ans=0.20750000000000002 +2024-09-16 13:16:02,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=6280.0, ans=0.009504347826086957 +2024-09-16 13:16:07,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=6280.0, ans=0.205625 +2024-09-16 13:16:20,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.68 vs. limit=9.870000000000001 +2024-09-16 13:16:26,951 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.07 vs. limit=6.58 +2024-09-16 13:16:29,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=6320.0, ans=0.025 +2024-09-16 13:16:36,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=6360.0, ans=0.20187500000000003 +2024-09-16 13:16:50,876 INFO [train.py:1198] (1/2) Epoch 1, batch 1600, loss[loss=0.4347, ctc_loss=0.4913, cr_loss=0.4494, attn_decoder_loss=0.4184, over 29660.00 frames. ], tot_loss[loss=0.4642, ctc_loss=0.5404, cr_loss=0.4075, attn_decoder_loss=0.4467, over 5764191.38 frames. ], batch size: 85, lr: 4.45e-02, grad_scale: 32.0 +2024-09-16 13:16:51,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=6400.0, ans=0.060000000000000005 +2024-09-16 13:16:52,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=6400.0, ans=0.2 +2024-09-16 13:16:53,977 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.353e+02 1.789e+02 2.003e+02 2.671e+02 7.111e+02, threshold=4.005e+02, percent-clipped=7.0 +2024-09-16 13:17:10,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=6440.0, ans=0.03983333333333334 +2024-09-16 13:17:14,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=6440.0, ans=0.0 +2024-09-16 13:17:18,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=6440.0, ans=0.23559999999999998 +2024-09-16 13:17:38,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.84 vs. limit=6.6080000000000005 +2024-09-16 13:18:03,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=4.82 vs. limit=9.96 +2024-09-16 13:18:07,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=6560.0, ans=0.8156 +2024-09-16 13:18:13,094 INFO [train.py:1198] (1/2) Epoch 1, batch 1650, loss[loss=0.4584, ctc_loss=0.5195, cr_loss=0.4559, attn_decoder_loss=0.4414, over 29716.00 frames. ], tot_loss[loss=0.4557, ctc_loss=0.5259, cr_loss=0.4101, attn_decoder_loss=0.4387, over 5758841.60 frames. ], batch size: 89, lr: 4.45e-02, grad_scale: 32.0 +2024-09-16 13:18:34,375 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:18:55,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.75 vs. limit=12.51 +2024-09-16 13:19:12,804 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:19:32,655 INFO [train.py:1198] (1/2) Epoch 1, batch 1700, loss[loss=0.3721, ctc_loss=0.4059, cr_loss=0.3889, attn_decoder_loss=0.3597, over 29594.00 frames. ], tot_loss[loss=0.4472, ctc_loss=0.511, cr_loss=0.4136, attn_decoder_loss=0.431, over 5779740.81 frames. ], batch size: 69, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:19:37,456 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.203e+02 1.510e+02 1.749e+02 2.059e+02 5.300e+02, threshold=3.498e+02, percent-clipped=2.0 +2024-09-16 13:19:41,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=6800.0, ans=0.18125000000000002 +2024-09-16 13:19:44,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.89 vs. limit=10.05 +2024-09-16 13:19:49,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=6840.0, ans=0.179375 +2024-09-16 13:20:10,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.59 vs. limit=10.08 +2024-09-16 13:20:21,606 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.90 vs. limit=8.46 +2024-09-16 13:20:23,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.51 vs. limit=10.095 +2024-09-16 13:20:26,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=6920.0, ans=0.03783333333333334 +2024-09-16 13:20:34,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.29 vs. limit=12.690000000000001 +2024-09-16 13:20:41,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=6960.0, ans=0.17375000000000002 +2024-09-16 13:20:47,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=13.75 vs. limit=12.719999999999999 +2024-09-16 13:20:54,500 INFO [train.py:1198] (1/2) Epoch 1, batch 1750, loss[loss=0.3574, ctc_loss=0.3785, cr_loss=0.3536, attn_decoder_loss=0.3471, over 29372.00 frames. ], tot_loss[loss=0.4398, ctc_loss=0.4974, cr_loss=0.416, attn_decoder_loss=0.4241, over 5788763.85 frames. ], batch size: 67, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:20:57,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=7000.0, ans=0.037500000000000006 +2024-09-16 13:21:01,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=7000.0, ans=0.0 +2024-09-16 13:21:13,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=7040.0, ans=0.16999999999999998 +2024-09-16 13:21:23,993 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=8.83 vs. limit=6.816 +2024-09-16 13:21:26,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=7080.0, ans=0.16812500000000002 +2024-09-16 13:21:44,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.92 vs. limit=12.84 +2024-09-16 13:21:55,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.83 vs. limit=6.848 +2024-09-16 13:22:00,390 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.98 vs. limit=4.074 +2024-09-16 13:22:01,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=7160.0, ans=0.164375 +2024-09-16 13:22:16,327 INFO [train.py:1198] (1/2) Epoch 1, batch 1800, loss[loss=0.4329, ctc_loss=0.4635, cr_loss=0.4553, attn_decoder_loss=0.4194, over 29691.00 frames. ], tot_loss[loss=0.4348, ctc_loss=0.4879, cr_loss=0.4193, attn_decoder_loss=0.4196, over 5790892.75 frames. ], batch size: 83, lr: 4.44e-02, grad_scale: 16.0 +2024-09-16 13:22:21,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.247e+02 1.571e+02 1.759e+02 2.049e+02 3.849e+02, threshold=3.518e+02, percent-clipped=1.0 +2024-09-16 13:22:40,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=7240.0, ans=0.2276 +2024-09-16 13:22:56,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=7280.0, ans=0.2272 +2024-09-16 13:22:57,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=7280.0, ans=0.2272 +2024-09-16 13:23:05,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=7320.0, ans=0.025 +2024-09-16 13:23:35,118 INFO [train.py:1198] (1/2) Epoch 1, batch 1850, loss[loss=0.4232, ctc_loss=0.4505, cr_loss=0.4654, attn_decoder_loss=0.4099, over 29636.00 frames. ], tot_loss[loss=0.4289, ctc_loss=0.4767, cr_loss=0.4213, attn_decoder_loss=0.4143, over 5795186.17 frames. ], batch size: 86, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:23:41,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=7400.0, ans=0.009260869565217392 +2024-09-16 13:23:46,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=7400.0, ans=0.153125 +2024-09-16 13:23:49,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=7440.0, ans=0.15125 +2024-09-16 13:23:53,071 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.87 vs. limit=13.08 +2024-09-16 13:23:54,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=7440.0, ans=0.025 +2024-09-16 13:24:21,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=7480.0, ans=0.14937499999999998 +2024-09-16 13:24:33,713 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.74 vs. limit=10.32 +2024-09-16 13:24:44,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.18 vs. limit=13.17 +2024-09-16 13:24:54,517 INFO [train.py:1198] (1/2) Epoch 1, batch 1900, loss[loss=0.4093, ctc_loss=0.434, cr_loss=0.4424, attn_decoder_loss=0.3967, over 29697.00 frames. ], tot_loss[loss=0.425, ctc_loss=0.4687, cr_loss=0.4244, attn_decoder_loss=0.4107, over 5803239.03 frames. ], batch size: 89, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:24:56,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=7600.0, ans=0.025 +2024-09-16 13:24:59,248 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.162e+02 1.597e+02 1.785e+02 2.217e+02 4.479e+02, threshold=3.571e+02, percent-clipped=3.0 +2024-09-16 13:25:10,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=7640.0, ans=0.07 +2024-09-16 13:25:45,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=7720.0, ans=0.051750000000000004 +2024-09-16 13:26:07,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.83 vs. limit=10.41 +2024-09-16 13:26:14,694 INFO [train.py:1198] (1/2) Epoch 1, batch 1950, loss[loss=0.3983, ctc_loss=0.4292, cr_loss=0.4451, attn_decoder_loss=0.385, over 29453.00 frames. ], tot_loss[loss=0.4219, ctc_loss=0.461, cr_loss=0.428, attn_decoder_loss=0.408, over 5818422.84 frames. ], batch size: 78, lr: 4.43e-02, grad_scale: 16.0 +2024-09-16 13:26:21,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=7800.0, ans=0.222 +2024-09-16 13:26:23,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=7800.0, ans=0.034166666666666665 +2024-09-16 13:26:24,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=7800.0, ans=0.034166666666666665 +2024-09-16 13:26:25,416 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=5.97 vs. limit=10.425 +2024-09-16 13:26:29,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=7840.0, ans=0.1325 +2024-09-16 13:26:58,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=7880.0, ans=0.009156521739130435 +2024-09-16 13:27:23,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=7960.0, ans=0.12687500000000002 +2024-09-16 13:27:26,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=7960.0, ans=0.025 +2024-09-16 13:27:28,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=7960.0, ans=0.07 +2024-09-16 13:27:33,405 INFO [train.py:1198] (1/2) Epoch 1, batch 2000, loss[loss=0.345, ctc_loss=0.3565, cr_loss=0.3486, attn_decoder_loss=0.336, over 29339.00 frames. ], tot_loss[loss=0.419, ctc_loss=0.4544, cr_loss=0.4304, attn_decoder_loss=0.4055, over 5796423.13 frames. ], batch size: 67, lr: 4.42e-02, grad_scale: 32.0 +2024-09-16 13:27:38,130 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.167e+02 1.451e+02 1.684e+02 2.248e+02 3.741e+02, threshold=3.368e+02, percent-clipped=1.0 +2024-09-16 13:27:44,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=8000.0, ans=0.03333333333333334 +2024-09-16 13:27:53,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.20 vs. limit=4.2059999999999995 +2024-09-16 13:28:01,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=8040.0, ans=0.125 +2024-09-16 13:28:16,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=8080.0, ans=0.8308 +2024-09-16 13:28:24,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=8120.0, ans=0.2188 +2024-09-16 13:28:24,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=8120.0, ans=0.125 +2024-09-16 13:28:28,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=8120.0, ans=0.125 +2024-09-16 13:28:30,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=8120.0, ans=0.125 +2024-09-16 13:28:31,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.72 vs. limit=7.029999999999999 +2024-09-16 13:28:52,988 INFO [train.py:1198] (1/2) Epoch 1, batch 2050, loss[loss=0.3787, ctc_loss=0.3994, cr_loss=0.4053, attn_decoder_loss=0.3674, over 29415.00 frames. ], tot_loss[loss=0.4147, ctc_loss=0.4466, cr_loss=0.4303, attn_decoder_loss=0.4016, over 5788353.58 frames. ], batch size: 70, lr: 4.42e-02, grad_scale: 16.0 +2024-09-16 13:28:55,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.24 vs. limit=4.23 +2024-09-16 13:29:05,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=8200.0, ans=0.125 +2024-09-16 13:29:12,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.89 vs. limit=10.59 +2024-09-16 13:29:13,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=8240.0, ans=0.0 +2024-09-16 13:29:13,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=8240.0, ans=0.21760000000000002 +2024-09-16 13:29:21,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.35 vs. limit=10.59 +2024-09-16 13:29:27,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=8280.0, ans=0.125 +2024-09-16 13:29:45,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=8320.0, ans=0.2168 +2024-09-16 13:29:59,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=8360.0, ans=0.125 +2024-09-16 13:30:12,263 INFO [train.py:1198] (1/2) Epoch 1, batch 2100, loss[loss=0.3778, ctc_loss=0.3777, cr_loss=0.4142, attn_decoder_loss=0.3686, over 29748.00 frames. ], tot_loss[loss=0.4105, ctc_loss=0.4389, cr_loss=0.431, attn_decoder_loss=0.3978, over 5800463.16 frames. ], batch size: 81, lr: 4.42e-02, grad_scale: 16.0 +2024-09-16 13:30:18,321 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.222e+02 1.523e+02 1.725e+02 2.064e+02 6.365e+02, threshold=3.449e+02, percent-clipped=2.0 +2024-09-16 13:30:20,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=8400.0, ans=0.125 +2024-09-16 13:30:40,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=8440.0, ans=0.2156 +2024-09-16 13:30:47,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=10.68 +2024-09-16 13:30:53,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.39 vs. limit=4.272 +2024-09-16 13:30:57,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=8520.0, ans=0.03116666666666667 +2024-09-16 13:31:08,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.70 vs. limit=13.89 +2024-09-16 13:31:23,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=8560.0, ans=0.6004 +2024-09-16 13:31:29,556 INFO [train.py:1198] (1/2) Epoch 1, batch 2150, loss[loss=0.368, ctc_loss=0.3858, cr_loss=0.3878, attn_decoder_loss=0.3574, over 29465.00 frames. ], tot_loss[loss=0.407, ctc_loss=0.4324, cr_loss=0.4326, attn_decoder_loss=0.3946, over 5814510.90 frames. ], batch size: 78, lr: 4.41e-02, grad_scale: 16.0 +2024-09-16 13:31:37,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=8600.0, ans=0.599 +2024-09-16 13:31:39,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.00 vs. limit=10.725 +2024-09-16 13:31:50,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=8640.0, ans=0.125 +2024-09-16 13:31:55,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=8640.0, ans=0.2136 +2024-09-16 13:32:00,587 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=8.96 vs. limit=10.74 +2024-09-16 13:32:12,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=8680.0, ans=0.008982608695652174 +2024-09-16 13:32:49,789 INFO [train.py:1198] (1/2) Epoch 1, batch 2200, loss[loss=0.4015, ctc_loss=0.4106, cr_loss=0.4098, attn_decoder_loss=0.3914, over 29627.00 frames. ], tot_loss[loss=0.4049, ctc_loss=0.4273, cr_loss=0.4332, attn_decoder_loss=0.3927, over 5811659.69 frames. ], batch size: 86, lr: 4.41e-02, grad_scale: 16.0 +2024-09-16 13:32:55,854 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.455e+02 1.695e+02 2.050e+02 4.766e+02, threshold=3.390e+02, percent-clipped=3.0 +2024-09-16 13:33:09,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=8840.0, ans=0.008947826086956523 +2024-09-16 13:33:23,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=8880.0, ans=0.0 +2024-09-16 13:33:37,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=8920.0, ans=0.07 +2024-09-16 13:33:45,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.81 vs. limit=7.23 +2024-09-16 13:33:56,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=8960.0, ans=0.008921739130434782 +2024-09-16 13:34:02,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=8960.0, ans=0.025 +2024-09-16 13:34:05,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=9000.0, ans=0.02916666666666667 +2024-09-16 13:34:09,143 INFO [train.py:1198] (1/2) Epoch 1, batch 2250, loss[loss=0.3939, ctc_loss=0.3955, cr_loss=0.4315, attn_decoder_loss=0.3841, over 29736.00 frames. ], tot_loss[loss=0.4022, ctc_loss=0.4215, cr_loss=0.4337, attn_decoder_loss=0.3904, over 5811802.14 frames. ], batch size: 82, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:34:09,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=9000.0, ans=0.025 +2024-09-16 13:34:37,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=9040.0, ans=0.025 +2024-09-16 13:34:40,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=9080.0, ans=0.008895652173913044 +2024-09-16 13:34:57,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.29 vs. limit=10.92 +2024-09-16 13:35:15,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=9160.0, ans=0.0 +2024-09-16 13:35:26,221 INFO [train.py:1198] (1/2) Epoch 1, batch 2300, loss[loss=0.3458, ctc_loss=0.3446, cr_loss=0.4192, attn_decoder_loss=0.3366, over 29284.00 frames. ], tot_loss[loss=0.398, ctc_loss=0.4146, cr_loss=0.4332, attn_decoder_loss=0.3866, over 5800132.23 frames. ], batch size: 71, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:35:28,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.30 vs. limit=14.4 +2024-09-16 13:35:32,249 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.236e+02 1.494e+02 1.712e+02 1.992e+02 4.170e+02, threshold=3.424e+02, percent-clipped=4.0 +2024-09-16 13:35:36,274 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.87 vs. limit=9.6 +2024-09-16 13:35:37,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=9200.0, ans=0.125 +2024-09-16 13:35:51,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=9240.0, ans=0.5766 +2024-09-16 13:35:59,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=9280.0, ans=0.008852173913043479 +2024-09-16 13:36:03,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=9280.0, ans=0.008852173913043479 +2024-09-16 13:36:12,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.21 vs. limit=7.32 +2024-09-16 13:36:22,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten.whitening_limit, batch_count=9320.0, ans=14.49 +2024-09-16 13:36:45,228 INFO [train.py:1198] (1/2) Epoch 1, batch 2350, loss[loss=0.4032, ctc_loss=0.4086, cr_loss=0.435, attn_decoder_loss=0.393, over 29678.00 frames. ], tot_loss[loss=0.3965, ctc_loss=0.4107, cr_loss=0.4345, attn_decoder_loss=0.3852, over 5804717.46 frames. ], batch size: 83, lr: 4.40e-02, grad_scale: 16.0 +2024-09-16 13:36:45,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=9400.0, ans=0.008826086956521739 +2024-09-16 13:36:47,717 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.53 vs. limit=14.55 +2024-09-16 13:36:49,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.00 vs. limit=14.55 +2024-09-16 13:36:50,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.40 vs. limit=7.76 +2024-09-16 13:36:57,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=9400.0, ans=0.20600000000000002 +2024-09-16 13:37:28,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=9480.0, ans=0.125 +2024-09-16 13:37:35,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.29 vs. limit=14.64 +2024-09-16 13:37:46,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=9560.0, ans=0.125 +2024-09-16 13:37:55,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.94 vs. limit=14.67 +2024-09-16 13:37:59,475 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:38:02,853 INFO [train.py:1198] (1/2) Epoch 1, batch 2400, loss[loss=0.3491, ctc_loss=0.3398, cr_loss=0.4278, attn_decoder_loss=0.3406, over 29511.00 frames. ], tot_loss[loss=0.3949, ctc_loss=0.4067, cr_loss=0.4354, attn_decoder_loss=0.3839, over 5808501.15 frames. ], batch size: 76, lr: 4.39e-02, grad_scale: 32.0 +2024-09-16 13:38:10,878 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.192e+02 1.445e+02 1.624e+02 1.930e+02 3.418e+02, threshold=3.248e+02, percent-clipped=0.0 +2024-09-16 13:38:12,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=9600.0, ans=0.5640000000000001 +2024-09-16 13:38:17,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=9600.0, ans=0.125 +2024-09-16 13:38:20,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=9640.0, ans=0.2036 +2024-09-16 13:38:28,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=9640.0, ans=0.2036 +2024-09-16 13:38:31,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=9640.0, ans=0.026500000000000003 +2024-09-16 13:38:34,687 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=5.992e-02 +2024-09-16 13:38:42,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=9680.0, ans=0.0 +2024-09-16 13:38:47,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=9680.0, ans=0.07 +2024-09-16 13:38:47,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=9680.0, ans=0.125 +2024-09-16 13:38:51,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=9720.0, ans=0.5598000000000001 +2024-09-16 13:39:05,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=9760.0, ans=0.5584 +2024-09-16 13:39:07,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=9760.0, ans=0.026000000000000002 +2024-09-16 13:39:14,981 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:39:17,407 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.63 vs. limit=7.4399999999999995 +2024-09-16 13:39:22,242 INFO [train.py:1198] (1/2) Epoch 1, batch 2450, loss[loss=0.3912, ctc_loss=0.392, cr_loss=0.4398, attn_decoder_loss=0.3813, over 29707.00 frames. ], tot_loss[loss=0.3946, ctc_loss=0.4049, cr_loss=0.437, attn_decoder_loss=0.3838, over 5785548.26 frames. ], batch size: 82, lr: 4.39e-02, grad_scale: 16.0 +2024-09-16 13:39:24,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=9800.0, ans=0.202 +2024-09-16 13:39:36,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=9800.0, ans=0.125 +2024-09-16 13:40:05,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=9880.0, ans=0.125 +2024-09-16 13:40:09,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.31 vs. limit=5.0 +2024-09-16 13:40:13,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=9920.0, ans=0.04949747468305833 +2024-09-16 13:40:15,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=9920.0, ans=0.025 +2024-09-16 13:40:19,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=9920.0, ans=0.125 +2024-09-16 13:40:35,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=9960.0, ans=0.125 +2024-09-16 13:40:41,273 INFO [train.py:1198] (1/2) Epoch 1, batch 2500, loss[loss=0.4077, ctc_loss=0.4112, cr_loss=0.4596, attn_decoder_loss=0.3971, over 29644.00 frames. ], tot_loss[loss=0.3918, ctc_loss=0.3993, cr_loss=0.4375, attn_decoder_loss=0.3813, over 5795354.24 frames. ], batch size: 86, lr: 4.38e-02, grad_scale: 16.0 +2024-09-16 13:40:48,951 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.151e+02 1.428e+02 1.613e+02 1.938e+02 4.379e+02, threshold=3.227e+02, percent-clipped=3.0 +2024-09-16 13:40:49,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=10000.0, ans=0.025 +2024-09-16 13:40:51,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.16 vs. limit=8.0 +2024-09-16 13:41:05,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.03 vs. limit=15.030000000000001 +2024-09-16 13:41:14,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.71 vs. limit=15.059999999999999 +2024-09-16 13:41:31,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=10120.0, ans=0.5458000000000001 +2024-09-16 13:41:34,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=10120.0, ans=0.0 +2024-09-16 13:41:49,609 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.16 vs. limit=11.31 +2024-09-16 13:41:54,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=10160.0, ans=0.5444 +2024-09-16 13:42:01,547 INFO [train.py:1198] (1/2) Epoch 1, batch 2550, loss[loss=0.342, ctc_loss=0.3328, cr_loss=0.4006, attn_decoder_loss=0.3342, over 29326.00 frames. ], tot_loss[loss=0.3897, ctc_loss=0.3952, cr_loss=0.4382, attn_decoder_loss=0.3793, over 5798349.02 frames. ], batch size: 67, lr: 4.38e-02, grad_scale: 16.0 +2024-09-16 13:42:01,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=10200.0, ans=0.198 +2024-09-16 13:42:08,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=10200.0, ans=0.198 +2024-09-16 13:42:36,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.82 vs. limit=11.355 +2024-09-16 13:42:49,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=10320.0, ans=0.125 +2024-09-16 13:43:02,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=10360.0, ans=0.023500000000000004 +2024-09-16 13:43:19,972 INFO [train.py:1198] (1/2) Epoch 1, batch 2600, loss[loss=0.3541, ctc_loss=0.337, cr_loss=0.4327, attn_decoder_loss=0.3464, over 29443.00 frames. ], tot_loss[loss=0.3893, ctc_loss=0.3933, cr_loss=0.4397, attn_decoder_loss=0.3791, over 5794721.19 frames. ], batch size: 78, lr: 4.37e-02, grad_scale: 16.0 +2024-09-16 13:43:25,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=10400.0, ans=0.025 +2024-09-16 13:43:29,539 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.186e+02 1.430e+02 1.543e+02 1.954e+02 3.702e+02, threshold=3.087e+02, percent-clipped=5.0 +2024-09-16 13:43:37,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=10440.0, ans=0.125 +2024-09-16 13:43:37,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=10440.0, ans=0.125 +2024-09-16 13:43:41,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=10440.0, ans=0.02316666666666667 +2024-09-16 13:43:48,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.45 vs. limit=11.415 +2024-09-16 13:43:52,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=10480.0, ans=0.19519999999999998 +2024-09-16 13:43:58,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=10480.0, ans=0.09899494936611666 +2024-09-16 13:44:08,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=10520.0, ans=0.022833333333333337 +2024-09-16 13:44:23,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.78 vs. limit=15.42 +2024-09-16 13:44:26,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=10560.0, ans=0.125 +2024-09-16 13:44:26,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=10560.0, ans=0.125 +2024-09-16 13:44:27,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=10560.0, ans=0.025 +2024-09-16 13:44:33,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=10560.0, ans=0.125 +2024-09-16 13:44:37,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=10600.0, ans=0.022500000000000003 +2024-09-16 13:44:38,239 INFO [train.py:1198] (1/2) Epoch 1, batch 2650, loss[loss=0.4177, ctc_loss=0.4145, cr_loss=0.4524, attn_decoder_loss=0.408, over 29269.00 frames. ], tot_loss[loss=0.3884, ctc_loss=0.3903, cr_loss=0.4405, attn_decoder_loss=0.3784, over 5801941.26 frames. ], batch size: 100, lr: 4.37e-02, grad_scale: 16.0 +2024-09-16 13:45:39,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=10760.0, ans=0.125 +2024-09-16 13:45:57,792 INFO [train.py:1198] (1/2) Epoch 1, batch 2700, loss[loss=0.3899, ctc_loss=0.3733, cr_loss=0.4592, attn_decoder_loss=0.3815, over 29535.00 frames. ], tot_loss[loss=0.3867, ctc_loss=0.3866, cr_loss=0.4411, attn_decoder_loss=0.3769, over 5797882.24 frames. ], batch size: 87, lr: 4.36e-02, grad_scale: 16.0 +2024-09-16 13:46:01,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=10800.0, ans=0.008521739130434783 +2024-09-16 13:46:04,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=10800.0, ans=0.02166666666666667 +2024-09-16 13:46:05,443 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.205e+02 1.417e+02 1.675e+02 2.035e+02 4.386e+02, threshold=3.351e+02, percent-clipped=4.0 +2024-09-16 13:46:11,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=10840.0, ans=0.0 +2024-09-16 13:46:11,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=10840.0, ans=0.125 +2024-09-16 13:46:18,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=10840.0, ans=0.1916 +2024-09-16 13:46:24,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=10840.0, ans=0.125 +2024-09-16 13:46:33,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=10880.0, ans=0.125 +2024-09-16 13:47:17,505 INFO [train.py:1198] (1/2) Epoch 1, batch 2750, loss[loss=0.3595, ctc_loss=0.355, cr_loss=0.4235, attn_decoder_loss=0.3506, over 29525.00 frames. ], tot_loss[loss=0.3837, ctc_loss=0.3819, cr_loss=0.44, attn_decoder_loss=0.3741, over 5796808.42 frames. ], batch size: 75, lr: 4.36e-02, grad_scale: 16.0 +2024-09-16 13:47:39,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=11040.0, ans=0.125 +2024-09-16 13:47:42,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=11040.0, ans=0.5136000000000001 +2024-09-16 13:47:43,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=11040.0, ans=0.5136000000000001 +2024-09-16 13:47:48,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=11080.0, ans=0.008460869565217391 +2024-09-16 13:48:00,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.48 vs. limit=11.655000000000001 +2024-09-16 13:48:25,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=11160.0, ans=0.18839999999999998 +2024-09-16 13:48:25,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=11160.0, ans=0.125 +2024-09-16 13:48:32,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=11160.0, ans=0.0 +2024-09-16 13:48:35,668 INFO [train.py:1198] (1/2) Epoch 1, batch 2800, loss[loss=0.4385, ctc_loss=0.4721, cr_loss=0.4677, attn_decoder_loss=0.4244, over 20196.00 frames. ], tot_loss[loss=0.3825, ctc_loss=0.3795, cr_loss=0.4404, attn_decoder_loss=0.373, over 5778641.68 frames. ], batch size: 209, lr: 4.36e-02, grad_scale: 32.0 +2024-09-16 13:48:38,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=11200.0, ans=0.125 +2024-09-16 13:48:43,100 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.389e+02 1.617e+02 2.129e+02 5.220e+02, threshold=3.235e+02, percent-clipped=5.0 +2024-09-16 13:48:43,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=11200.0, ans=0.008434782608695653 +2024-09-16 13:48:58,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=11240.0, ans=0.125 +2024-09-16 13:49:14,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=11280.0, ans=10.0 +2024-09-16 13:49:32,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=11320.0, ans=0.1868 +2024-09-16 13:49:54,509 INFO [train.py:1198] (1/2) Epoch 1, batch 2850, loss[loss=0.3667, ctc_loss=0.3613, cr_loss=0.4395, attn_decoder_loss=0.3575, over 29484.00 frames. ], tot_loss[loss=0.382, ctc_loss=0.3782, cr_loss=0.4413, attn_decoder_loss=0.3726, over 5764924.36 frames. ], batch size: 77, lr: 4.35e-02, grad_scale: 32.0 +2024-09-16 13:49:59,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=11400.0, ans=0.186 +2024-09-16 13:50:10,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=11440.0, ans=0.019000000000000003 +2024-09-16 13:50:14,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=11440.0, ans=0.008382608695652174 +2024-09-16 13:50:22,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=11440.0, ans=0.019000000000000003 +2024-09-16 13:51:13,789 INFO [train.py:1198] (1/2) Epoch 1, batch 2900, loss[loss=0.369, ctc_loss=0.3457, cr_loss=0.4496, attn_decoder_loss=0.3616, over 29429.00 frames. ], tot_loss[loss=0.3816, ctc_loss=0.376, cr_loss=0.4425, attn_decoder_loss=0.3724, over 5789534.44 frames. ], batch size: 79, lr: 4.35e-02, grad_scale: 16.0 +2024-09-16 13:51:22,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.082e+02 1.352e+02 1.492e+02 1.728e+02 4.022e+02, threshold=2.985e+02, percent-clipped=1.0 +2024-09-16 13:51:42,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=11640.0, ans=0.125 +2024-09-16 13:51:47,257 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.63 vs. limit=11.879999999999999 +2024-09-16 13:52:01,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=11720.0, ans=0.09899494936611666 +2024-09-16 13:52:18,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.72 vs. limit=7.9399999999999995 +2024-09-16 13:52:22,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.56 vs. limit=8.704 +2024-09-16 13:52:26,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=11760.0, ans=0.1824 +2024-09-16 13:52:30,701 INFO [train.py:1198] (1/2) Epoch 1, batch 2950, loss[loss=0.343, ctc_loss=0.3164, cr_loss=0.4204, attn_decoder_loss=0.3366, over 29498.00 frames. ], tot_loss[loss=0.3782, ctc_loss=0.3709, cr_loss=0.4405, attn_decoder_loss=0.3692, over 5783701.76 frames. ], batch size: 75, lr: 4.34e-02, grad_scale: 16.0 +2024-09-16 13:52:51,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=11840.0, ans=0.125 +2024-09-16 13:52:52,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=11840.0, ans=0.125 +2024-09-16 13:52:58,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=11840.0, ans=0.125 +2024-09-16 13:53:01,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.01 vs. limit=8.751999999999999 +2024-09-16 13:53:05,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=11880.0, ans=0.125 +2024-09-16 13:53:05,527 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.12 vs. limit=16.41 +2024-09-16 13:53:11,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.70 vs. limit=16.41 +2024-09-16 13:53:29,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=11920.0, ans=0.125 +2024-09-16 13:53:32,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=11960.0, ans=0.8695999999999999 +2024-09-16 13:53:50,808 INFO [train.py:1198] (1/2) Epoch 1, batch 3000, loss[loss=0.393, ctc_loss=0.3849, cr_loss=0.4603, attn_decoder_loss=0.3837, over 29752.00 frames. ], tot_loss[loss=0.3772, ctc_loss=0.3689, cr_loss=0.441, attn_decoder_loss=0.3684, over 5784310.75 frames. ], batch size: 81, lr: 4.34e-02, grad_scale: 16.0 +2024-09-16 13:53:50,808 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 13:54:09,119 INFO [train.py:1230] (1/2) Epoch 1, validation: loss=0.2655, ctc_loss=0.1548, cr_loss=4.113e-15, attn_decoder_loss=0.2778, over 944034.00 frames. +2024-09-16 13:54:09,119 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 13:54:12,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=12000.0, ans=0.125 +2024-09-16 13:54:18,439 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.470e+02 1.654e+02 2.017e+02 3.240e+02, threshold=3.308e+02, percent-clipped=3.0 +2024-09-16 13:54:36,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.95 vs. limit=4.806 +2024-09-16 13:54:39,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.78 vs. limit=16.560000000000002 +2024-09-16 13:54:48,706 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:54:58,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=12120.0, ans=0.008234782608695652 +2024-09-16 13:55:04,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=12120.0, ans=0.008234782608695652 +2024-09-16 13:55:28,452 INFO [train.py:1198] (1/2) Epoch 1, batch 3050, loss[loss=0.3466, ctc_loss=0.3257, cr_loss=0.4404, attn_decoder_loss=0.3391, over 29527.00 frames. ], tot_loss[loss=0.3772, ctc_loss=0.3678, cr_loss=0.442, attn_decoder_loss=0.3684, over 5777895.37 frames. ], batch size: 76, lr: 4.33e-02, grad_scale: 16.0 +2024-09-16 13:55:38,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.73 vs. limit=11.1 +2024-09-16 13:55:42,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=12240.0, ans=0.125 +2024-09-16 13:56:31,270 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.06 vs. limit=12.135 +2024-09-16 13:56:45,427 INFO [train.py:1198] (1/2) Epoch 1, batch 3100, loss[loss=0.3795, ctc_loss=0.362, cr_loss=0.4763, attn_decoder_loss=0.3708, over 29202.00 frames. ], tot_loss[loss=0.3753, ctc_loss=0.3645, cr_loss=0.4422, attn_decoder_loss=0.3667, over 5780076.59 frames. ], batch size: 100, lr: 4.33e-02, grad_scale: 16.0 +2024-09-16 13:56:54,617 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.036e+02 1.315e+02 1.501e+02 1.811e+02 4.491e+02, threshold=3.002e+02, percent-clipped=4.0 +2024-09-16 13:56:56,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten.whitening_limit, batch_count=12400.0, ans=12.15 +2024-09-16 13:57:04,631 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.23 vs. limit=8.11 +2024-09-16 13:57:05,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=12440.0, ans=0.3866 +2024-09-16 13:57:42,060 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 13:57:43,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=12520.0, ans=0.46180000000000004 +2024-09-16 13:58:04,871 INFO [train.py:1198] (1/2) Epoch 1, batch 3150, loss[loss=0.394, ctc_loss=0.3867, cr_loss=0.4438, attn_decoder_loss=0.3849, over 28814.00 frames. ], tot_loss[loss=0.3741, ctc_loss=0.3622, cr_loss=0.4422, attn_decoder_loss=0.3655, over 5786374.93 frames. ], batch size: 104, lr: 4.32e-02, grad_scale: 16.0 +2024-09-16 13:58:23,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=12640.0, ans=0.125 +2024-09-16 13:58:28,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=12640.0, ans=0.17359999999999998 +2024-09-16 13:58:38,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.73 vs. limit=17.009999999999998 +2024-09-16 13:58:39,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=12680.0, ans=0.125 +2024-09-16 13:58:44,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=12680.0, ans=0.013833333333333336 +2024-09-16 13:58:45,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=12680.0, ans=0.013833333333333336 +2024-09-16 13:58:52,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.32 vs. limit=12.27 +2024-09-16 13:58:53,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=12720.0, ans=0.0 +2024-09-16 13:58:54,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=12720.0, ans=0.125 +2024-09-16 13:59:10,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=12760.0, ans=0.125 +2024-09-16 13:59:14,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=12760.0, ans=0.125 +2024-09-16 13:59:24,638 INFO [train.py:1198] (1/2) Epoch 1, batch 3200, loss[loss=0.3606, ctc_loss=0.3317, cr_loss=0.4158, attn_decoder_loss=0.3545, over 29402.00 frames. ], tot_loss[loss=0.3723, ctc_loss=0.3594, cr_loss=0.4417, attn_decoder_loss=0.3639, over 5796361.84 frames. ], batch size: 79, lr: 4.32e-02, grad_scale: 32.0 +2024-09-16 13:59:33,918 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.352e+02 1.572e+02 1.941e+02 4.814e+02, threshold=3.143e+02, percent-clipped=7.0 +2024-09-16 13:59:46,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=12840.0, ans=0.125 +2024-09-16 14:00:19,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=12920.0, ans=0.125 +2024-09-16 14:00:30,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=12960.0, ans=0.125 +2024-09-16 14:00:42,056 INFO [train.py:1198] (1/2) Epoch 1, batch 3250, loss[loss=0.3831, ctc_loss=0.3547, cr_loss=0.4679, attn_decoder_loss=0.3758, over 29701.00 frames. ], tot_loss[loss=0.3707, ctc_loss=0.3561, cr_loss=0.4417, attn_decoder_loss=0.3625, over 5802480.60 frames. ], batch size: 84, lr: 4.31e-02, grad_scale: 32.0 +2024-09-16 14:01:40,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=13120.0, ans=0.125 +2024-09-16 14:01:51,485 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=4.974 +2024-09-16 14:01:55,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=13160.0, ans=0.125 +2024-09-16 14:02:00,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=13200.0, ans=0.882 +2024-09-16 14:02:00,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=13200.0, ans=0.125 +2024-09-16 14:02:01,645 INFO [train.py:1198] (1/2) Epoch 1, batch 3300, loss[loss=0.3929, ctc_loss=0.3844, cr_loss=0.4515, attn_decoder_loss=0.3838, over 28266.00 frames. ], tot_loss[loss=0.3691, ctc_loss=0.3544, cr_loss=0.4409, attn_decoder_loss=0.361, over 5798925.91 frames. ], batch size: 111, lr: 4.31e-02, grad_scale: 16.0 +2024-09-16 14:02:02,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.74 vs. limit=17.4 +2024-09-16 14:02:10,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.63 vs. limit=17.4 +2024-09-16 14:02:12,356 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.074e+02 1.388e+02 1.553e+02 1.864e+02 4.414e+02, threshold=3.106e+02, percent-clipped=4.0 +2024-09-16 14:02:23,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=13240.0, ans=0.007991304347826087 +2024-09-16 14:02:42,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=13280.0, ans=0.025 +2024-09-16 14:02:44,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=13280.0, ans=0.125 +2024-09-16 14:03:20,276 INFO [train.py:1198] (1/2) Epoch 1, batch 3350, loss[loss=0.3992, ctc_loss=0.3782, cr_loss=0.4881, attn_decoder_loss=0.3907, over 28805.00 frames. ], tot_loss[loss=0.3703, ctc_loss=0.3552, cr_loss=0.4418, attn_decoder_loss=0.3622, over 5774653.90 frames. ], batch size: 104, lr: 4.30e-02, grad_scale: 16.0 +2024-09-16 14:03:28,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=13400.0, ans=0.125 +2024-09-16 14:03:36,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=13440.0, ans=0.0 +2024-09-16 14:04:38,391 INFO [train.py:1198] (1/2) Epoch 1, batch 3400, loss[loss=0.3301, ctc_loss=0.3145, cr_loss=0.4295, attn_decoder_loss=0.3223, over 29342.00 frames. ], tot_loss[loss=0.3687, ctc_loss=0.3526, cr_loss=0.4416, attn_decoder_loss=0.3606, over 5768301.56 frames. ], batch size: 67, lr: 4.29e-02, grad_scale: 16.0 +2024-09-16 14:04:48,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.73 vs. limit=17.7 +2024-09-16 14:04:49,183 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.135e+02 1.397e+02 1.601e+02 1.904e+02 5.092e+02, threshold=3.203e+02, percent-clipped=2.0 +2024-09-16 14:04:57,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=13640.0, ans=0.1136 +2024-09-16 14:04:58,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=13640.0, ans=0.125 +2024-09-16 14:05:01,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=13640.0, ans=0.09899494936611666 +2024-09-16 14:05:04,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.18 vs. limit=17.73 +2024-09-16 14:05:15,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=13680.0, ans=0.125 +2024-09-16 14:05:57,559 INFO [train.py:1198] (1/2) Epoch 1, batch 3450, loss[loss=0.3896, ctc_loss=0.3775, cr_loss=0.4516, attn_decoder_loss=0.3809, over 28336.00 frames. ], tot_loss[loss=0.3684, ctc_loss=0.3511, cr_loss=0.4421, attn_decoder_loss=0.3605, over 5776272.13 frames. ], batch size: 111, lr: 4.29e-02, grad_scale: 16.0 +2024-09-16 14:06:08,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=13800.0, ans=0.125 +2024-09-16 14:06:17,587 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.27 vs. limit=17.880000000000003 +2024-09-16 14:06:27,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=13840.0, ans=0.125 +2024-09-16 14:06:41,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=13880.0, ans=0.125 +2024-09-16 14:06:44,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=13920.0, ans=0.09899494936611666 +2024-09-16 14:07:03,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=13960.0, ans=0.025 +2024-09-16 14:07:15,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=14000.0, ans=0.125 +2024-09-16 14:07:16,933 INFO [train.py:1198] (1/2) Epoch 1, batch 3500, loss[loss=0.3277, ctc_loss=0.3018, cr_loss=0.4183, attn_decoder_loss=0.3213, over 29336.00 frames. ], tot_loss[loss=0.3666, ctc_loss=0.3482, cr_loss=0.4412, attn_decoder_loss=0.3589, over 5778045.95 frames. ], batch size: 71, lr: 4.28e-02, grad_scale: 16.0 +2024-09-16 14:07:27,728 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.349e+02 1.530e+02 1.819e+02 5.462e+02, threshold=3.060e+02, percent-clipped=1.0 +2024-09-16 14:07:47,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=14080.0, ans=0.008 +2024-09-16 14:08:01,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=14120.0, ans=0.125 +2024-09-16 14:08:27,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=14160.0, ans=0.1584 +2024-09-16 14:08:32,825 INFO [train.py:1198] (1/2) Epoch 1, batch 3550, loss[loss=0.3757, ctc_loss=0.3545, cr_loss=0.4522, attn_decoder_loss=0.368, over 29746.00 frames. ], tot_loss[loss=0.3654, ctc_loss=0.346, cr_loss=0.4412, attn_decoder_loss=0.3578, over 5784597.27 frames. ], batch size: 89, lr: 4.28e-02, grad_scale: 16.0 +2024-09-16 14:08:33,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=14200.0, ans=0.125 +2024-09-16 14:08:42,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=14200.0, ans=0.125 +2024-09-16 14:08:48,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=14240.0, ans=0.125 +2024-09-16 14:08:54,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=14240.0, ans=0.007773913043478261 +2024-09-16 14:08:54,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=14240.0, ans=0.0 +2024-09-16 14:08:54,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=14240.0, ans=0.0073333333333333375 +2024-09-16 14:08:55,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=14240.0, ans=0.05 +2024-09-16 14:09:02,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.77 vs. limit=18.21 +2024-09-16 14:09:16,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=14320.0, ans=0.125 +2024-09-16 14:09:49,088 INFO [train.py:1198] (1/2) Epoch 1, batch 3600, loss[loss=0.3551, ctc_loss=0.3425, cr_loss=0.4266, attn_decoder_loss=0.347, over 29529.00 frames. ], tot_loss[loss=0.3653, ctc_loss=0.3454, cr_loss=0.4416, attn_decoder_loss=0.3577, over 5793546.64 frames. ], batch size: 77, lr: 4.27e-02, grad_scale: 32.0 +2024-09-16 14:09:52,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=14400.0, ans=0.125 +2024-09-16 14:09:59,802 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.101e+02 1.344e+02 1.491e+02 1.790e+02 3.419e+02, threshold=2.982e+02, percent-clipped=2.0 +2024-09-16 14:10:07,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=14440.0, ans=0.125 +2024-09-16 14:10:43,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=14520.0, ans=0.39180000000000004 +2024-09-16 14:10:45,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=14520.0, ans=0.39180000000000004 +2024-09-16 14:10:56,975 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.46 vs. limit=12.96 +2024-09-16 14:11:06,524 INFO [train.py:1198] (1/2) Epoch 1, batch 3650, loss[loss=0.3773, ctc_loss=0.3472, cr_loss=0.4695, attn_decoder_loss=0.3702, over 29531.00 frames. ], tot_loss[loss=0.3634, ctc_loss=0.3425, cr_loss=0.4403, attn_decoder_loss=0.3559, over 5794741.81 frames. ], batch size: 90, lr: 4.27e-02, grad_scale: 32.0 +2024-09-16 14:11:07,327 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.73 vs. limit=12.975 +2024-09-16 14:11:16,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=14600.0, ans=0.0076956521739130436 +2024-09-16 14:11:20,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=14640.0, ans=0.007686956521739131 +2024-09-16 14:11:34,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=14640.0, ans=0.38760000000000006 +2024-09-16 14:11:48,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=4.58 vs. limit=13.004999999999999 +2024-09-16 14:12:16,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=14760.0, ans=0.1524 +2024-09-16 14:12:22,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=14800.0, ans=0.125 +2024-09-16 14:12:24,269 INFO [train.py:1198] (1/2) Epoch 1, batch 3700, loss[loss=0.3527, ctc_loss=0.315, cr_loss=0.4384, attn_decoder_loss=0.3471, over 29709.00 frames. ], tot_loss[loss=0.3628, ctc_loss=0.3407, cr_loss=0.4412, attn_decoder_loss=0.3554, over 5804682.75 frames. ], batch size: 84, lr: 4.26e-02, grad_scale: 32.0 +2024-09-16 14:12:30,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=14800.0, ans=0.125 +2024-09-16 14:12:34,993 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.086e+02 1.317e+02 1.543e+02 1.858e+02 5.259e+02, threshold=3.086e+02, percent-clipped=2.0 +2024-09-16 14:12:36,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=14800.0, ans=0.15200000000000002 +2024-09-16 14:12:41,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=14840.0, ans=0.125 +2024-09-16 14:12:48,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=14840.0, ans=0.125 +2024-09-16 14:12:50,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:13:02,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=14880.0, ans=0.125 +2024-09-16 14:13:06,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=14880.0, ans=0.125 +2024-09-16 14:13:17,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=14920.0, ans=0.004500000000000004 +2024-09-16 14:13:20,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=14920.0, ans=0.0 +2024-09-16 14:13:34,620 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.64 vs. limit=13.11 +2024-09-16 14:13:40,151 INFO [train.py:1198] (1/2) Epoch 1, batch 3750, loss[loss=0.3246, ctc_loss=0.3043, cr_loss=0.3873, attn_decoder_loss=0.3182, over 29314.00 frames. ], tot_loss[loss=0.3618, ctc_loss=0.339, cr_loss=0.441, attn_decoder_loss=0.3546, over 5807432.80 frames. ], batch size: 67, lr: 4.26e-02, grad_scale: 32.0 +2024-09-16 14:13:55,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=15040.0, ans=0.125 +2024-09-16 14:14:06,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=15040.0, ans=0.0040000000000000036 +2024-09-16 14:14:17,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.26 vs. limit=18.810000000000002 +2024-09-16 14:14:21,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=15080.0, ans=0.025 +2024-09-16 14:14:29,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.91 vs. limit=13.17 +2024-09-16 14:14:30,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=15120.0, ans=0.125 +2024-09-16 14:14:35,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=15120.0, ans=0.05 +2024-09-16 14:14:40,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=21.61 vs. limit=13.184999999999999 +2024-09-16 14:14:45,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=15160.0, ans=0.125 +2024-09-16 14:14:54,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=15200.0, ans=0.125 +2024-09-16 14:14:56,686 INFO [train.py:1198] (1/2) Epoch 1, batch 3800, loss[loss=0.3596, ctc_loss=0.3236, cr_loss=0.4534, attn_decoder_loss=0.3535, over 29621.00 frames. ], tot_loss[loss=0.3607, ctc_loss=0.3374, cr_loss=0.4406, attn_decoder_loss=0.3536, over 5798637.30 frames. ], batch size: 86, lr: 4.25e-02, grad_scale: 32.0 +2024-09-16 14:15:07,356 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.116e+02 1.372e+02 1.609e+02 1.860e+02 5.053e+02, threshold=3.218e+02, percent-clipped=1.0 +2024-09-16 14:15:12,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=15240.0, ans=0.003166666666666672 +2024-09-16 14:15:30,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=15280.0, ans=0.125 +2024-09-16 14:15:50,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.43 vs. limit=13.245000000000001 +2024-09-16 14:16:12,331 INFO [train.py:1198] (1/2) Epoch 1, batch 3850, loss[loss=0.3911, ctc_loss=0.3696, cr_loss=0.4564, attn_decoder_loss=0.3834, over 29305.00 frames. ], tot_loss[loss=0.3601, ctc_loss=0.3358, cr_loss=0.4405, attn_decoder_loss=0.3531, over 5812588.15 frames. ], batch size: 100, lr: 4.24e-02, grad_scale: 32.0 +2024-09-16 14:16:29,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=15440.0, ans=0.1456 +2024-09-16 14:16:31,106 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.22 vs. limit=19.08 +2024-09-16 14:16:39,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=15440.0, ans=0.125 +2024-09-16 14:16:58,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=15520.0, ans=0.3568 +2024-09-16 14:17:15,495 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.84 vs. limit=13.335 +2024-09-16 14:17:22,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=15560.0, ans=0.125 +2024-09-16 14:17:26,167 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.05 vs. limit=13.335 +2024-09-16 14:17:30,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=15600.0, ans=0.354 +2024-09-16 14:17:31,258 INFO [train.py:1198] (1/2) Epoch 1, batch 3900, loss[loss=0.3826, ctc_loss=0.3477, cr_loss=0.4768, attn_decoder_loss=0.3759, over 29624.00 frames. ], tot_loss[loss=0.36, ctc_loss=0.3347, cr_loss=0.4404, attn_decoder_loss=0.3531, over 5816519.14 frames. ], batch size: 86, lr: 4.24e-02, grad_scale: 32.0 +2024-09-16 14:17:33,699 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=13.35 +2024-09-16 14:17:41,709 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.343e+02 1.512e+02 1.794e+02 6.576e+02, threshold=3.024e+02, percent-clipped=3.0 +2024-09-16 14:17:47,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.75 vs. limit=19.23 +2024-09-16 14:18:13,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=15680.0, ans=0.125 +2024-09-16 14:18:16,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=15720.0, ans=0.125 +2024-09-16 14:18:19,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=15720.0, ans=0.00116666666666667 +2024-09-16 14:18:28,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=15720.0, ans=0.125 +2024-09-16 14:18:30,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.39 vs. limit=13.41 +2024-09-16 14:18:31,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.15 vs. limit=10.304 +2024-09-16 14:18:33,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=15760.0, ans=0.125 +2024-09-16 14:18:39,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=15760.0, ans=0.125 +2024-09-16 14:18:42,121 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:18:46,496 INFO [train.py:1198] (1/2) Epoch 1, batch 3950, loss[loss=0.3807, ctc_loss=0.3475, cr_loss=0.472, attn_decoder_loss=0.3739, over 29526.00 frames. ], tot_loss[loss=0.3592, ctc_loss=0.3331, cr_loss=0.4403, attn_decoder_loss=0.3523, over 5836065.60 frames. ], batch size: 97, lr: 4.23e-02, grad_scale: 32.0 +2024-09-16 14:18:46,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=15800.0, ans=0.125 +2024-09-16 14:19:27,649 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.85 vs. limit=13.455 +2024-09-16 14:19:45,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=15960.0, ans=0.125 +2024-09-16 14:19:52,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=15960.0, ans=0.14040000000000002 +2024-09-16 14:20:09,358 INFO [train.py:1198] (1/2) Epoch 1, batch 4000, loss[loss=0.3437, ctc_loss=0.3135, cr_loss=0.4249, attn_decoder_loss=0.3376, over 29517.00 frames. ], tot_loss[loss=0.3592, ctc_loss=0.333, cr_loss=0.4411, attn_decoder_loss=0.3523, over 5814234.21 frames. ], batch size: 74, lr: 4.23e-02, grad_scale: 32.0 +2024-09-16 14:20:19,714 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.414e+02 1.598e+02 1.942e+02 7.205e+02, threshold=3.195e+02, percent-clipped=1.0 +2024-09-16 14:20:55,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=16120.0, ans=0.125 +2024-09-16 14:20:57,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=16120.0, ans=0.025 +2024-09-16 14:21:05,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.98 vs. limit=19.59 +2024-09-16 14:21:24,263 INFO [train.py:1198] (1/2) Epoch 1, batch 4050, loss[loss=0.4181, ctc_loss=0.428, cr_loss=0.4519, attn_decoder_loss=0.4069, over 20715.00 frames. ], tot_loss[loss=0.3588, ctc_loss=0.332, cr_loss=0.4398, attn_decoder_loss=0.352, over 5798691.78 frames. ], batch size: 209, lr: 4.22e-02, grad_scale: 32.0 +2024-09-16 14:21:28,056 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.79 vs. limit=10.48 +2024-09-16 14:22:20,093 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:22:27,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=16360.0, ans=13.18 +2024-09-16 14:22:33,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=16360.0, ans=0.125 +2024-09-16 14:22:40,685 INFO [train.py:1198] (1/2) Epoch 1, batch 4100, loss[loss=0.3559, ctc_loss=0.3231, cr_loss=0.4021, attn_decoder_loss=0.3506, over 29494.00 frames. ], tot_loss[loss=0.3582, ctc_loss=0.3311, cr_loss=0.4396, attn_decoder_loss=0.3514, over 5794389.84 frames. ], batch size: 90, lr: 4.22e-02, grad_scale: 32.0 +2024-09-16 14:22:51,003 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.063e+02 1.366e+02 1.525e+02 1.800e+02 4.946e+02, threshold=3.051e+02, percent-clipped=3.0 +2024-09-16 14:22:51,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.64 vs. limit=13.2 +2024-09-16 14:23:00,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=16440.0, ans=0.0 +2024-09-16 14:23:00,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.53 vs. limit=19.83 +2024-09-16 14:23:06,775 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.50 vs. limit=19.83 +2024-09-16 14:23:23,122 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.87 vs. limit=19.86 +2024-09-16 14:23:24,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.88 vs. limit=19.89 +2024-09-16 14:23:26,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=16520.0, ans=0.125 +2024-09-16 14:23:37,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=16520.0, ans=0.0072782608695652175 +2024-09-16 14:23:45,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.90 vs. limit=9.14 +2024-09-16 14:23:49,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=16560.0, ans=0.125 +2024-09-16 14:23:54,724 INFO [train.py:1198] (1/2) Epoch 1, batch 4150, loss[loss=0.3506, ctc_loss=0.3328, cr_loss=0.4268, attn_decoder_loss=0.3431, over 29506.00 frames. ], tot_loss[loss=0.3569, ctc_loss=0.3292, cr_loss=0.4392, attn_decoder_loss=0.3502, over 5799535.60 frames. ], batch size: 77, lr: 4.21e-02, grad_scale: 32.0 +2024-09-16 14:24:04,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=16600.0, ans=0.134 +2024-09-16 14:24:21,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=16640.0, ans=0.125 +2024-09-16 14:24:45,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.61 vs. limit=20.04 +2024-09-16 14:24:46,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=16720.0, ans=0.125 +2024-09-16 14:24:51,753 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.50 vs. limit=20.04 +2024-09-16 14:24:56,132 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.22 vs. limit=20.07 +2024-09-16 14:25:09,104 INFO [train.py:1198] (1/2) Epoch 1, batch 4200, loss[loss=0.3863, ctc_loss=0.374, cr_loss=0.4513, attn_decoder_loss=0.3776, over 29514.00 frames. ], tot_loss[loss=0.3566, ctc_loss=0.3286, cr_loss=0.4392, attn_decoder_loss=0.3499, over 5801619.86 frames. ], batch size: 90, lr: 4.20e-02, grad_scale: 32.0 +2024-09-16 14:25:19,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.040e+02 1.356e+02 1.563e+02 1.936e+02 3.144e+02, threshold=3.127e+02, percent-clipped=1.0 +2024-09-16 14:25:22,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=33.57 vs. limit=13.8 +2024-09-16 14:25:29,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=6.75 vs. limit=13.815000000000001 +2024-09-16 14:26:13,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=16960.0, ans=0.125 +2024-09-16 14:26:23,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.49 vs. limit=13.86 +2024-09-16 14:26:25,320 INFO [train.py:1198] (1/2) Epoch 1, batch 4250, loss[loss=0.3238, ctc_loss=0.2851, cr_loss=0.4299, attn_decoder_loss=0.3185, over 29500.00 frames. ], tot_loss[loss=0.3561, ctc_loss=0.3273, cr_loss=0.4403, attn_decoder_loss=0.3495, over 5806977.32 frames. ], batch size: 74, lr: 4.20e-02, grad_scale: 32.0 +2024-09-16 14:26:27,699 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.91 vs. limit=13.875 +2024-09-16 14:26:35,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=17000.0, ans=0.0071739130434782614 +2024-09-16 14:26:49,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=17040.0, ans=0.0 +2024-09-16 14:26:51,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.59 vs. limit=9.26 +2024-09-16 14:27:07,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.76 vs. limit=20.310000000000002 +2024-09-16 14:27:18,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.69 vs. limit=13.92 +2024-09-16 14:27:29,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=17160.0, ans=0.125 +2024-09-16 14:27:34,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.90 vs. limit=13.934999999999999 +2024-09-16 14:27:39,369 INFO [train.py:1198] (1/2) Epoch 1, batch 4300, loss[loss=0.361, ctc_loss=0.3217, cr_loss=0.3916, attn_decoder_loss=0.3567, over 29529.00 frames. ], tot_loss[loss=0.3567, ctc_loss=0.3278, cr_loss=0.441, attn_decoder_loss=0.3501, over 5794749.32 frames. ], batch size: 87, lr: 4.19e-02, grad_scale: 32.0 +2024-09-16 14:27:49,860 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.364e+02 1.537e+02 1.919e+02 5.209e+02, threshold=3.074e+02, percent-clipped=5.0 +2024-09-16 14:27:53,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=17240.0, ans=0.125 +2024-09-16 14:28:15,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=17280.0, ans=0.125 +2024-09-16 14:28:16,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=17280.0, ans=0.025 +2024-09-16 14:28:18,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=17280.0, ans=0.0 +2024-09-16 14:28:36,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=17320.0, ans=0.007104347826086957 +2024-09-16 14:28:43,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.80 vs. limit=9.34 +2024-09-16 14:28:53,573 INFO [train.py:1198] (1/2) Epoch 1, batch 4350, loss[loss=0.3681, ctc_loss=0.3322, cr_loss=0.4877, attn_decoder_loss=0.3612, over 29472.00 frames. ], tot_loss[loss=0.3604, ctc_loss=0.3311, cr_loss=0.4455, attn_decoder_loss=0.3537, over 5796064.83 frames. ], batch size: 97, lr: 4.19e-02, grad_scale: 32.0 +2024-09-16 14:29:18,357 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.61 vs. limit=14.04 +2024-09-16 14:29:24,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.55 vs. limit=10.992 +2024-09-16 14:29:24,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.30 vs. limit=14.055 +2024-09-16 14:29:29,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=17480.0, ans=0.125 +2024-09-16 14:30:06,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=17560.0, ans=0.007052173913043479 +2024-09-16 14:30:09,474 INFO [train.py:1198] (1/2) Epoch 1, batch 4400, loss[loss=0.3785, ctc_loss=0.3576, cr_loss=0.459, attn_decoder_loss=0.3706, over 27634.00 frames. ], tot_loss[loss=0.3634, ctc_loss=0.3345, cr_loss=0.448, attn_decoder_loss=0.3566, over 5767336.86 frames. ], batch size: 125, lr: 4.18e-02, grad_scale: 32.0 +2024-09-16 14:30:17,695 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=19.54 vs. limit=14.1 +2024-09-16 14:30:19,702 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.121e+02 1.315e+02 1.467e+02 1.766e+02 6.671e+02, threshold=2.933e+02, percent-clipped=1.0 +2024-09-16 14:30:21,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=17600.0, ans=0.125 +2024-09-16 14:30:22,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.69 vs. limit=20.7 +2024-09-16 14:30:43,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=17680.0, ans=0.1232 +2024-09-16 14:30:54,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.03 vs. limit=20.79 +2024-09-16 14:31:02,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=17720.0, ans=0.0 +2024-09-16 14:31:24,219 INFO [train.py:1198] (1/2) Epoch 1, batch 4450, loss[loss=0.3996, ctc_loss=0.4028, cr_loss=0.4591, attn_decoder_loss=0.3891, over 20129.00 frames. ], tot_loss[loss=0.3671, ctc_loss=0.3411, cr_loss=0.4496, attn_decoder_loss=0.36, over 5571434.34 frames. ], batch size: 211, lr: 4.17e-02, grad_scale: 32.0 +2024-09-16 14:31:51,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=17840.0, ans=0.27560000000000007 +2024-09-16 14:31:54,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=17880.0, ans=0.125 +2024-09-16 14:31:57,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=17880.0, ans=0.0 +2024-09-16 14:32:21,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=17920.0, ans=0.9291999999999999 +2024-09-16 14:32:40,074 INFO [train.py:1198] (1/2) Epoch 1, batch 4500, loss[loss=0.4013, ctc_loss=0.4238, cr_loss=0.4781, attn_decoder_loss=0.3882, over 19448.00 frames. ], tot_loss[loss=0.3719, ctc_loss=0.351, cr_loss=0.4478, attn_decoder_loss=0.3642, over 5230953.38 frames. ], batch size: 209, lr: 4.17e-02, grad_scale: 32.0 +2024-09-16 14:32:50,391 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.043e+02 1.290e+02 1.458e+02 1.671e+02 6.229e+02, threshold=2.915e+02, percent-clipped=1.0 +2024-09-16 14:33:08,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=18080.0, ans=0.125 +2024-09-16 14:33:14,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=18080.0, ans=0.006939130434782609 +2024-09-16 14:34:13,795 INFO [train.py:1198] (1/2) Epoch 2, batch 0, loss[loss=0.4708, ctc_loss=0.2886, cr_loss=0.4265, attn_decoder_loss=0.4815, over 29594.00 frames. ], tot_loss[loss=0.4708, ctc_loss=0.2886, cr_loss=0.4265, attn_decoder_loss=0.4815, over 29594.00 frames. ], batch size: 73, lr: 4.08e-02, grad_scale: 32.0 +2024-09-16 14:34:13,795 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 14:34:17,158 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([1.5251, 2.5152, 2.5979, 2.5003, 2.3752, 2.2946, 1.8668, 2.5723], + device='cuda:1') +2024-09-16 14:34:32,033 INFO [train.py:1230] (1/2) Epoch 2, validation: loss=0.3071, ctc_loss=0.1367, cr_loss=4.721e-15, attn_decoder_loss=0.326, over 944034.00 frames. +2024-09-16 14:34:32,034 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 14:34:52,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=18140.0, ans=0.125 +2024-09-16 14:35:04,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=18180.0, ans=0.0 +2024-09-16 14:35:14,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=18180.0, ans=0.025 +2024-09-16 14:35:23,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=18220.0, ans=0.0 +2024-09-16 14:35:36,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=18260.0, ans=0.125 +2024-09-16 14:35:37,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=18260.0, ans=0.125 +2024-09-16 14:35:48,040 INFO [train.py:1198] (1/2) Epoch 2, batch 50, loss[loss=0.3224, ctc_loss=0.2918, cr_loss=0.3775, attn_decoder_loss=0.3174, over 29433.00 frames. ], tot_loss[loss=0.3767, ctc_loss=0.3397, cr_loss=0.4457, attn_decoder_loss=0.3709, over 1270305.95 frames. ], batch size: 70, lr: 4.08e-02, grad_scale: 16.0 +2024-09-16 14:35:59,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=18300.0, ans=0.125 +2024-09-16 14:36:10,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.81 vs. limit=9.585 +2024-09-16 14:36:11,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=18340.0, ans=0.125 +2024-09-16 14:36:12,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=18340.0, ans=0.11660000000000001 +2024-09-16 14:36:42,199 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.396e+02 1.768e+02 2.293e+02 2.873e+03, threshold=3.536e+02, percent-clipped=13.0 +2024-09-16 14:36:48,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=18420.0, ans=0.006865217391304348 +2024-09-16 14:37:06,514 INFO [train.py:1198] (1/2) Epoch 2, batch 100, loss[loss=0.3445, ctc_loss=0.314, cr_loss=0.4464, attn_decoder_loss=0.3379, over 29528.00 frames. ], tot_loss[loss=0.3678, ctc_loss=0.3345, cr_loss=0.4466, attn_decoder_loss=0.3616, over 2255162.46 frames. ], batch size: 76, lr: 4.07e-02, grad_scale: 16.0 +2024-09-16 14:37:08,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=18500.0, ans=0.006847826086956521 +2024-09-16 14:37:11,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=18500.0, ans=0.125 +2024-09-16 14:37:17,459 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:37:34,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=18540.0, ans=0.125 +2024-09-16 14:37:37,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=18580.0, ans=0.0 +2024-09-16 14:37:38,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=18580.0, ans=0.125 +2024-09-16 14:38:20,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=18660.0, ans=0.125 +2024-09-16 14:38:24,322 INFO [train.py:1198] (1/2) Epoch 2, batch 150, loss[loss=0.3233, ctc_loss=0.292, cr_loss=0.3967, attn_decoder_loss=0.3179, over 29458.00 frames. ], tot_loss[loss=0.3605, ctc_loss=0.3274, cr_loss=0.4437, attn_decoder_loss=0.3543, over 3050116.44 frames. ], batch size: 70, lr: 4.06e-02, grad_scale: 16.0 +2024-09-16 14:38:38,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=18740.0, ans=0.125 +2024-09-16 14:39:07,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=26.14 vs. limit=21.585 +2024-09-16 14:39:14,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=18820.0, ans=0.0 +2024-09-16 14:39:15,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.312e+02 1.456e+02 1.615e+02 4.569e+02, threshold=2.911e+02, percent-clipped=2.0 +2024-09-16 14:39:17,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=18820.0, ans=0.125 +2024-09-16 14:39:31,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=18860.0, ans=0.125 +2024-09-16 14:39:37,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=18860.0, ans=0.1114 +2024-09-16 14:39:40,016 INFO [train.py:1198] (1/2) Epoch 2, batch 200, loss[loss=0.3792, ctc_loss=0.3595, cr_loss=0.4661, attn_decoder_loss=0.3711, over 27317.00 frames. ], tot_loss[loss=0.3561, ctc_loss=0.3229, cr_loss=0.4423, attn_decoder_loss=0.35, over 3661663.80 frames. ], batch size: 124, lr: 4.06e-02, grad_scale: 16.0 +2024-09-16 14:39:49,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=18900.0, ans=0.025 +2024-09-16 14:39:49,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.38 vs. limit=21.675 +2024-09-16 14:39:56,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=18940.0, ans=0.07 +2024-09-16 14:40:09,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=14.38 vs. limit=14.49 +2024-09-16 14:40:20,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.83 vs. limit=5.8469999999999995 +2024-09-16 14:40:30,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=19020.0, ans=0.05979999999999999 +2024-09-16 14:40:35,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=19020.0, ans=0.0 +2024-09-16 14:40:43,445 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=23.45 vs. limit=21.795 +2024-09-16 14:40:52,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=19060.0, ans=0.125 +2024-09-16 14:40:58,056 INFO [train.py:1198] (1/2) Epoch 2, batch 250, loss[loss=0.3685, ctc_loss=0.3332, cr_loss=0.4475, attn_decoder_loss=0.3625, over 29234.00 frames. ], tot_loss[loss=0.3541, ctc_loss=0.3204, cr_loss=0.442, attn_decoder_loss=0.3481, over 4142732.57 frames. ], batch size: 100, lr: 4.05e-02, grad_scale: 16.0 +2024-09-16 14:41:11,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=19140.0, ans=0.1086 +2024-09-16 14:41:19,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=19140.0, ans=0.025 +2024-09-16 14:41:28,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=19180.0, ans=0.125 +2024-09-16 14:41:29,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.42 vs. limit=14.692499999999999 +2024-09-16 14:41:41,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=19180.0, ans=0.125 +2024-09-16 14:41:50,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.071e+02 1.356e+02 1.504e+02 1.757e+02 3.092e+02, threshold=3.008e+02, percent-clipped=1.0 +2024-09-16 14:42:03,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=19260.0, ans=0.10740000000000002 +2024-09-16 14:42:12,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=19260.0, ans=0.07 +2024-09-16 14:42:16,764 INFO [train.py:1198] (1/2) Epoch 2, batch 300, loss[loss=0.3841, ctc_loss=0.3598, cr_loss=0.4378, attn_decoder_loss=0.377, over 29560.00 frames. ], tot_loss[loss=0.3519, ctc_loss=0.3174, cr_loss=0.4414, attn_decoder_loss=0.346, over 4511086.67 frames. ], batch size: 92, lr: 4.05e-02, grad_scale: 16.0 +2024-09-16 14:42:23,324 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:42:24,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=19300.0, ans=0.125 +2024-09-16 14:42:24,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=19300.0, ans=0.025 +2024-09-16 14:42:32,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=19340.0, ans=0.1066 +2024-09-16 14:42:32,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=19340.0, ans=0.125 +2024-09-16 14:42:50,700 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:43:21,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=19460.0, ans=0.125 +2024-09-16 14:43:27,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=19460.0, ans=0.0 +2024-09-16 14:43:33,254 INFO [train.py:1198] (1/2) Epoch 2, batch 350, loss[loss=0.3327, ctc_loss=0.3009, cr_loss=0.4367, attn_decoder_loss=0.3265, over 29283.00 frames. ], tot_loss[loss=0.3513, ctc_loss=0.3163, cr_loss=0.4409, attn_decoder_loss=0.3454, over 4796093.25 frames. ], batch size: 71, lr: 4.04e-02, grad_scale: 16.0 +2024-09-16 14:43:38,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=19500.0, ans=0.125 +2024-09-16 14:43:42,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=19500.0, ans=0.125 +2024-09-16 14:43:56,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=19540.0, ans=0.025 +2024-09-16 14:44:26,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.410e+02 1.578e+02 1.828e+02 5.190e+02, threshold=3.157e+02, percent-clipped=4.0 +2024-09-16 14:44:33,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.215 +2024-09-16 14:44:39,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=19660.0, ans=0.10340000000000002 +2024-09-16 14:44:51,068 INFO [train.py:1198] (1/2) Epoch 2, batch 400, loss[loss=0.355, ctc_loss=0.3204, cr_loss=0.4304, attn_decoder_loss=0.3493, over 29712.00 frames. ], tot_loss[loss=0.3502, ctc_loss=0.3144, cr_loss=0.4406, attn_decoder_loss=0.3444, over 5024529.21 frames. ], batch size: 82, lr: 4.03e-02, grad_scale: 32.0 +2024-09-16 14:44:51,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=19700.0, ans=0.21050000000000002 +2024-09-16 14:44:58,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=5.955 +2024-09-16 14:45:03,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=19700.0, ans=0.125 +2024-09-16 14:45:03,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=19700.0, ans=0.006586956521739131 +2024-09-16 14:45:17,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=19740.0, ans=0.006578260869565217 +2024-09-16 14:45:25,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=19780.0, ans=0.49670000000000003 +2024-09-16 14:45:34,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.12 vs. limit=5.9670000000000005 +2024-09-16 14:45:37,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=19820.0, ans=0.1018 +2024-09-16 14:46:10,055 INFO [train.py:1198] (1/2) Epoch 2, batch 450, loss[loss=0.3631, ctc_loss=0.3226, cr_loss=0.4488, attn_decoder_loss=0.3576, over 29688.00 frames. ], tot_loss[loss=0.3496, ctc_loss=0.3139, cr_loss=0.4412, attn_decoder_loss=0.3438, over 5186045.64 frames. ], batch size: 83, lr: 4.03e-02, grad_scale: 32.0 +2024-09-16 14:46:18,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=19900.0, ans=0.101 +2024-09-16 14:46:22,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=19900.0, ans=0.07 +2024-09-16 14:46:31,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=19940.0, ans=0.125 +2024-09-16 14:46:38,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=14.9775 +2024-09-16 14:46:40,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=19980.0, ans=0.125 +2024-09-16 14:47:02,431 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.051e+02 1.299e+02 1.486e+02 1.745e+02 5.446e+02, threshold=2.972e+02, percent-clipped=3.0 +2024-09-16 14:47:21,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=20060.0, ans=15.0 +2024-09-16 14:47:26,734 INFO [train.py:1198] (1/2) Epoch 2, batch 500, loss[loss=0.3778, ctc_loss=0.3389, cr_loss=0.4826, attn_decoder_loss=0.3714, over 29465.00 frames. ], tot_loss[loss=0.3479, ctc_loss=0.3119, cr_loss=0.4398, attn_decoder_loss=0.3421, over 5328827.63 frames. ], batch size: 94, lr: 4.02e-02, grad_scale: 32.0 +2024-09-16 14:48:06,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=20180.0, ans=0.0 +2024-09-16 14:48:15,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.67 vs. limit=22.5 +2024-09-16 14:48:30,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=20260.0, ans=0.2 +2024-09-16 14:48:34,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=20260.0, ans=0.0 +2024-09-16 14:48:39,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=20260.0, ans=0.2 +2024-09-16 14:48:43,731 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 14:48:44,961 INFO [train.py:1198] (1/2) Epoch 2, batch 550, loss[loss=0.3651, ctc_loss=0.3238, cr_loss=0.4856, attn_decoder_loss=0.3589, over 28893.00 frames. ], tot_loss[loss=0.348, ctc_loss=0.3122, cr_loss=0.4403, attn_decoder_loss=0.3422, over 5423615.25 frames. ], batch size: 104, lr: 4.02e-02, grad_scale: 16.0 +2024-09-16 14:49:11,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=20340.0, ans=0.125 +2024-09-16 14:49:17,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=20380.0, ans=10.0 +2024-09-16 14:49:24,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=20380.0, ans=0.0 +2024-09-16 14:49:26,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=20380.0, ans=0.1 +2024-09-16 14:49:32,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=20420.0, ans=0.0 +2024-09-16 14:49:38,286 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.068e+02 1.358e+02 1.600e+02 1.893e+02 5.686e+02, threshold=3.199e+02, percent-clipped=4.0 +2024-09-16 14:49:38,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=20420.0, ans=0.006430434782608695 +2024-09-16 14:49:56,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=20460.0, ans=0.125 +2024-09-16 14:50:03,473 INFO [train.py:1198] (1/2) Epoch 2, batch 600, loss[loss=0.3809, ctc_loss=0.3436, cr_loss=0.4808, attn_decoder_loss=0.3743, over 29223.00 frames. ], tot_loss[loss=0.3482, ctc_loss=0.3121, cr_loss=0.4404, attn_decoder_loss=0.3424, over 5510942.27 frames. ], batch size: 100, lr: 4.01e-02, grad_scale: 16.0 +2024-09-16 14:50:11,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=20500.0, ans=0.0 +2024-09-16 14:50:15,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=3.97 vs. limit=5.0 +2024-09-16 14:50:23,928 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.39 vs. limit=15.0 +2024-09-16 14:50:26,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=20540.0, ans=0.0 +2024-09-16 14:50:38,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.99 vs. limit=15.0 +2024-09-16 14:50:44,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=20580.0, ans=0.125 +2024-09-16 14:50:56,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=20620.0, ans=0.0 +2024-09-16 14:50:56,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.51 vs. limit=22.5 +2024-09-16 14:51:11,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=20660.0, ans=0.2 +2024-09-16 14:51:13,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=20660.0, ans=0.125 +2024-09-16 14:51:19,267 INFO [train.py:1198] (1/2) Epoch 2, batch 650, loss[loss=0.3464, ctc_loss=0.3001, cr_loss=0.4706, attn_decoder_loss=0.3411, over 29756.00 frames. ], tot_loss[loss=0.347, ctc_loss=0.3104, cr_loss=0.4404, attn_decoder_loss=0.3413, over 5587926.83 frames. ], batch size: 81, lr: 4.00e-02, grad_scale: 16.0 +2024-09-16 14:51:23,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.22 vs. limit=22.5 +2024-09-16 14:51:29,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.37 vs. limit=22.5 +2024-09-16 14:51:51,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=20780.0, ans=0.1 +2024-09-16 14:52:14,475 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.31 vs. limit=22.5 +2024-09-16 14:52:15,199 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.068e+02 1.306e+02 1.501e+02 1.738e+02 3.373e+02, threshold=3.002e+02, percent-clipped=2.0 +2024-09-16 14:52:26,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=20860.0, ans=0.125 +2024-09-16 14:52:29,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=20860.0, ans=0.125 +2024-09-16 14:52:35,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.57 vs. limit=15.0 +2024-09-16 14:52:37,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.10 vs. limit=15.0 +2024-09-16 14:52:38,091 INFO [train.py:1198] (1/2) Epoch 2, batch 700, loss[loss=0.3261, ctc_loss=0.2765, cr_loss=0.4373, attn_decoder_loss=0.3219, over 29537.00 frames. ], tot_loss[loss=0.3473, ctc_loss=0.3108, cr_loss=0.4417, attn_decoder_loss=0.3416, over 5638246.76 frames. ], batch size: 76, lr: 4.00e-02, grad_scale: 16.0 +2024-09-16 14:52:54,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=20940.0, ans=0.2 +2024-09-16 14:52:59,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=20940.0, ans=0.1 +2024-09-16 14:53:03,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=20940.0, ans=0.1 +2024-09-16 14:53:07,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=20980.0, ans=0.006308695652173913 +2024-09-16 14:53:19,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=16.23 vs. limit=15.0 +2024-09-16 14:53:19,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-16 14:53:36,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=21020.0, ans=0.2 +2024-09-16 14:53:56,473 INFO [train.py:1198] (1/2) Epoch 2, batch 750, loss[loss=0.3542, ctc_loss=0.3113, cr_loss=0.476, attn_decoder_loss=0.3484, over 29710.00 frames. ], tot_loss[loss=0.3463, ctc_loss=0.3097, cr_loss=0.4415, attn_decoder_loss=0.3406, over 5676318.46 frames. ], batch size: 82, lr: 3.99e-02, grad_scale: 16.0 +2024-09-16 14:54:07,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=21100.0, ans=0.125 +2024-09-16 14:54:31,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=21180.0, ans=0.2 +2024-09-16 14:54:48,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=21220.0, ans=0.006256521739130435 +2024-09-16 14:54:49,443 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.356e+02 1.549e+02 1.774e+02 3.247e+02, threshold=3.098e+02, percent-clipped=2.0 +2024-09-16 14:54:52,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=16.87 vs. limit=22.5 +2024-09-16 14:55:10,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.02 vs. limit=15.0 +2024-09-16 14:55:12,221 INFO [train.py:1198] (1/2) Epoch 2, batch 800, loss[loss=0.318, ctc_loss=0.2819, cr_loss=0.3968, attn_decoder_loss=0.3132, over 29613.00 frames. ], tot_loss[loss=0.3459, ctc_loss=0.309, cr_loss=0.4403, attn_decoder_loss=0.3402, over 5706596.29 frames. ], batch size: 73, lr: 3.98e-02, grad_scale: 32.0 +2024-09-16 14:55:34,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=21340.0, ans=0.025 +2024-09-16 14:55:37,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=21340.0, ans=0.2 +2024-09-16 14:55:40,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.22 vs. limit=22.5 +2024-09-16 14:56:05,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=21420.0, ans=0.0 +2024-09-16 14:56:05,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=21420.0, ans=0.1 +2024-09-16 14:56:29,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=21500.0, ans=0.125 +2024-09-16 14:56:30,085 INFO [train.py:1198] (1/2) Epoch 2, batch 850, loss[loss=0.3659, ctc_loss=0.3261, cr_loss=0.4912, attn_decoder_loss=0.3594, over 29692.00 frames. ], tot_loss[loss=0.3448, ctc_loss=0.3075, cr_loss=0.4408, attn_decoder_loss=0.3391, over 5735337.33 frames. ], batch size: 89, lr: 3.98e-02, grad_scale: 16.0 +2024-09-16 14:56:42,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=21500.0, ans=0.2 +2024-09-16 14:56:42,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.82 vs. limit=15.0 +2024-09-16 14:56:49,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=21540.0, ans=0.2 +2024-09-16 14:56:57,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=21540.0, ans=0.125 +2024-09-16 14:56:58,135 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.85 vs. limit=15.0 +2024-09-16 14:57:02,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=21580.0, ans=0.125 +2024-09-16 14:57:02,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=24.63 vs. limit=22.5 +2024-09-16 14:57:05,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=21580.0, ans=0.2 +2024-09-16 14:57:05,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=21580.0, ans=0.125 +2024-09-16 14:57:06,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=21580.0, ans=0.025 +2024-09-16 14:57:24,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=21620.0, ans=0.125 +2024-09-16 14:57:25,361 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.083e+02 1.316e+02 1.489e+02 1.639e+02 3.105e+02, threshold=2.978e+02, percent-clipped=1.0 +2024-09-16 14:57:27,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=21620.0, ans=0.1 +2024-09-16 14:57:46,616 INFO [train.py:1198] (1/2) Epoch 2, batch 900, loss[loss=0.3018, ctc_loss=0.263, cr_loss=0.3901, attn_decoder_loss=0.2974, over 29599.00 frames. ], tot_loss[loss=0.3447, ctc_loss=0.307, cr_loss=0.4404, attn_decoder_loss=0.3391, over 5740665.61 frames. ], batch size: 73, lr: 3.97e-02, grad_scale: 16.0 +2024-09-16 14:58:06,617 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-16 14:58:24,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=21780.0, ans=0.1 +2024-09-16 14:59:04,593 INFO [train.py:1198] (1/2) Epoch 2, batch 950, loss[loss=0.3093, ctc_loss=0.2615, cr_loss=0.3908, attn_decoder_loss=0.3059, over 29521.00 frames. ], tot_loss[loss=0.3447, ctc_loss=0.3069, cr_loss=0.4402, attn_decoder_loss=0.3392, over 5741737.12 frames. ], batch size: 74, lr: 3.97e-02, grad_scale: 16.0 +2024-09-16 14:59:46,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=21980.0, ans=0.0 +2024-09-16 14:59:52,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=22020.0, ans=0.006082608695652174 +2024-09-16 15:00:01,565 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.124e+02 1.375e+02 1.582e+02 1.931e+02 4.850e+02, threshold=3.164e+02, percent-clipped=3.0 +2024-09-16 15:00:06,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=22060.0, ans=0.125 +2024-09-16 15:00:06,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=22060.0, ans=0.125 +2024-09-16 15:00:22,486 INFO [train.py:1198] (1/2) Epoch 2, batch 1000, loss[loss=0.3403, ctc_loss=0.2943, cr_loss=0.4548, attn_decoder_loss=0.3353, over 29508.00 frames. ], tot_loss[loss=0.3455, ctc_loss=0.3081, cr_loss=0.4411, attn_decoder_loss=0.3399, over 5736222.82 frames. ], batch size: 77, lr: 3.96e-02, grad_scale: 16.0 +2024-09-16 15:00:24,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=22100.0, ans=0.125 +2024-09-16 15:00:24,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=22100.0, ans=0.125 +2024-09-16 15:00:33,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=22100.0, ans=0.006065217391304348 +2024-09-16 15:00:33,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=22100.0, ans=0.2 +2024-09-16 15:00:37,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=6.72 vs. limit=15.0 +2024-09-16 15:00:47,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=22140.0, ans=0.0 +2024-09-16 15:01:02,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=22180.0, ans=0.125 +2024-09-16 15:01:08,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=22220.0, ans=0.125 +2024-09-16 15:01:18,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=22220.0, ans=0.0060391304347826085 +2024-09-16 15:01:25,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=22260.0, ans=0.006030434782608696 +2024-09-16 15:01:27,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=22260.0, ans=0.025 +2024-09-16 15:01:29,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=22260.0, ans=0.04949747468305833 +2024-09-16 15:01:38,483 INFO [train.py:1198] (1/2) Epoch 2, batch 1050, loss[loss=0.3496, ctc_loss=0.3063, cr_loss=0.4512, attn_decoder_loss=0.3444, over 29699.00 frames. ], tot_loss[loss=0.3442, ctc_loss=0.3063, cr_loss=0.4399, attn_decoder_loss=0.3387, over 5744929.69 frames. ], batch size: 85, lr: 3.95e-02, grad_scale: 16.0 +2024-09-16 15:01:53,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=22300.0, ans=0.5 +2024-09-16 15:02:03,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.66 vs. limit=15.0 +2024-09-16 15:02:05,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=22340.0, ans=0.1 +2024-09-16 15:02:07,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.38 vs. limit=15.0 +2024-09-16 15:02:14,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=22380.0, ans=0.1 +2024-09-16 15:02:20,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.29 vs. limit=12.0 +2024-09-16 15:02:27,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.76 vs. limit=22.5 +2024-09-16 15:02:36,118 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.353e+02 1.564e+02 1.813e+02 2.890e+02, threshold=3.129e+02, percent-clipped=0.0 +2024-09-16 15:02:36,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=22420.0, ans=0.125 +2024-09-16 15:02:57,507 INFO [train.py:1198] (1/2) Epoch 2, batch 1100, loss[loss=0.3307, ctc_loss=0.2855, cr_loss=0.4317, attn_decoder_loss=0.3262, over 29436.00 frames. ], tot_loss[loss=0.3439, ctc_loss=0.3059, cr_loss=0.4403, attn_decoder_loss=0.3383, over 5756191.57 frames. ], batch size: 78, lr: 3.95e-02, grad_scale: 16.0 +2024-09-16 15:03:05,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=22500.0, ans=0.0 +2024-09-16 15:03:17,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=22540.0, ans=0.025 +2024-09-16 15:03:19,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=22540.0, ans=0.1 +2024-09-16 15:03:23,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.48 vs. limit=15.0 +2024-09-16 15:03:36,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.45 vs. limit=6.0 +2024-09-16 15:03:45,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=22620.0, ans=0.125 +2024-09-16 15:04:15,683 INFO [train.py:1198] (1/2) Epoch 2, batch 1150, loss[loss=0.3362, ctc_loss=0.2883, cr_loss=0.4183, attn_decoder_loss=0.3322, over 29461.00 frames. ], tot_loss[loss=0.3435, ctc_loss=0.3057, cr_loss=0.44, attn_decoder_loss=0.338, over 5755340.58 frames. ], batch size: 78, lr: 3.94e-02, grad_scale: 16.0 +2024-09-16 15:04:16,956 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=8.49 vs. limit=15.0 +2024-09-16 15:04:19,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=22700.0, ans=0.1 +2024-09-16 15:04:25,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=22700.0, ans=0.0 +2024-09-16 15:04:55,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=22780.0, ans=0.2 +2024-09-16 15:05:03,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=22820.0, ans=0.025 +2024-09-16 15:05:10,470 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.047e+02 1.307e+02 1.503e+02 1.816e+02 4.036e+02, threshold=3.005e+02, percent-clipped=3.0 +2024-09-16 15:05:10,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=22820.0, ans=0.125 +2024-09-16 15:05:14,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=24.54 vs. limit=22.5 +2024-09-16 15:05:18,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=22860.0, ans=0.07 +2024-09-16 15:05:31,660 INFO [train.py:1198] (1/2) Epoch 2, batch 1200, loss[loss=0.3543, ctc_loss=0.3166, cr_loss=0.4489, attn_decoder_loss=0.3486, over 29669.00 frames. ], tot_loss[loss=0.3445, ctc_loss=0.3063, cr_loss=0.4408, attn_decoder_loss=0.339, over 5747218.38 frames. ], batch size: 85, lr: 3.93e-02, grad_scale: 32.0 +2024-09-16 15:05:45,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:05:45,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=22900.0, ans=0.09899494936611666 +2024-09-16 15:05:46,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=22900.0, ans=0.2 +2024-09-16 15:05:54,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=3.11 vs. limit=15.0 +2024-09-16 15:05:58,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=22940.0, ans=0.1 +2024-09-16 15:06:06,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=22980.0, ans=0.0 +2024-09-16 15:06:17,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.78 vs. limit=22.5 +2024-09-16 15:06:17,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.99 vs. limit=15.0 +2024-09-16 15:06:18,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=23020.0, ans=0.125 +2024-09-16 15:06:18,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=23020.0, ans=10.0 +2024-09-16 15:06:20,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=23020.0, ans=6.0 +2024-09-16 15:06:24,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=23020.0, ans=0.05 +2024-09-16 15:06:44,715 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=1.349e-02 +2024-09-16 15:06:44,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=23060.0, ans=0.125 +2024-09-16 15:06:49,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=23100.0, ans=0.125 +2024-09-16 15:06:50,267 INFO [train.py:1198] (1/2) Epoch 2, batch 1250, loss[loss=0.3597, ctc_loss=0.3196, cr_loss=0.4764, attn_decoder_loss=0.3535, over 29528.00 frames. ], tot_loss[loss=0.3451, ctc_loss=0.3067, cr_loss=0.4426, attn_decoder_loss=0.3396, over 5775085.82 frames. ], batch size: 92, lr: 3.93e-02, grad_scale: 16.0 +2024-09-16 15:06:58,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=23100.0, ans=10.0 +2024-09-16 15:07:08,139 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.61 vs. limit=15.0 +2024-09-16 15:07:14,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=23140.0, ans=0.1 +2024-09-16 15:07:35,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=23180.0, ans=0.2 +2024-09-16 15:07:37,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=23220.0, ans=0.125 +2024-09-16 15:07:47,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=23220.0, ans=0.1 +2024-09-16 15:07:49,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.075e+02 1.374e+02 1.508e+02 1.823e+02 4.800e+02, threshold=3.017e+02, percent-clipped=3.0 +2024-09-16 15:07:56,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.41 vs. limit=12.0 +2024-09-16 15:08:08,751 INFO [train.py:1198] (1/2) Epoch 2, batch 1300, loss[loss=0.3553, ctc_loss=0.3186, cr_loss=0.466, attn_decoder_loss=0.349, over 28454.00 frames. ], tot_loss[loss=0.3438, ctc_loss=0.3051, cr_loss=0.4423, attn_decoder_loss=0.3383, over 5780654.09 frames. ], batch size: 112, lr: 3.92e-02, grad_scale: 16.0 +2024-09-16 15:08:22,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.25 vs. limit=15.0 +2024-09-16 15:08:46,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.83 vs. limit=10.0 +2024-09-16 15:08:47,172 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:09:10,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=23460.0, ans=0.025 +2024-09-16 15:09:14,826 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=4.452e-02 +2024-09-16 15:09:20,841 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:09:25,048 INFO [train.py:1198] (1/2) Epoch 2, batch 1350, loss[loss=0.3524, ctc_loss=0.3135, cr_loss=0.4647, attn_decoder_loss=0.3464, over 29715.00 frames. ], tot_loss[loss=0.3422, ctc_loss=0.3026, cr_loss=0.4415, attn_decoder_loss=0.3367, over 5794960.07 frames. ], batch size: 81, lr: 3.91e-02, grad_scale: 16.0 +2024-09-16 15:09:28,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=23500.0, ans=0.125 +2024-09-16 15:09:36,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=23500.0, ans=0.125 +2024-09-16 15:09:40,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=23540.0, ans=0.125 +2024-09-16 15:09:59,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=23580.0, ans=0.0 +2024-09-16 15:10:23,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.062e+02 1.283e+02 1.428e+02 1.705e+02 2.892e+02, threshold=2.856e+02, percent-clipped=0.0 +2024-09-16 15:10:42,646 INFO [train.py:1198] (1/2) Epoch 2, batch 1400, loss[loss=0.2969, ctc_loss=0.2581, cr_loss=0.3772, attn_decoder_loss=0.2928, over 29599.00 frames. ], tot_loss[loss=0.3418, ctc_loss=0.302, cr_loss=0.4418, attn_decoder_loss=0.3364, over 5806361.29 frames. ], batch size: 69, lr: 3.91e-02, grad_scale: 16.0 +2024-09-16 15:10:55,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.85 vs. limit=6.0 +2024-09-16 15:10:56,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.28 vs. limit=15.0 +2024-09-16 15:10:58,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=23740.0, ans=0.0 +2024-09-16 15:11:00,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=23740.0, ans=0.0 +2024-09-16 15:11:13,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=23780.0, ans=0.125 +2024-09-16 15:11:46,797 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:11:49,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=23860.0, ans=0.125 +2024-09-16 15:12:00,334 INFO [train.py:1198] (1/2) Epoch 2, batch 1450, loss[loss=0.3701, ctc_loss=0.329, cr_loss=0.4833, attn_decoder_loss=0.364, over 29409.00 frames. ], tot_loss[loss=0.3423, ctc_loss=0.3026, cr_loss=0.4417, attn_decoder_loss=0.3369, over 5804801.74 frames. ], batch size: 94, lr: 3.90e-02, grad_scale: 16.0 +2024-09-16 15:12:11,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=23900.0, ans=0.0 +2024-09-16 15:12:20,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=23940.0, ans=10.0 +2024-09-16 15:12:27,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=23940.0, ans=0.0 +2024-09-16 15:12:36,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=23980.0, ans=0.125 +2024-09-16 15:12:40,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=23980.0, ans=0.125 +2024-09-16 15:12:56,334 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.348e+02 1.492e+02 1.698e+02 3.722e+02, threshold=2.983e+02, percent-clipped=2.0 +2024-09-16 15:12:57,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=8.93 vs. limit=15.0 +2024-09-16 15:13:03,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.64 vs. limit=15.0 +2024-09-16 15:13:05,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=24060.0, ans=0.125 +2024-09-16 15:13:16,028 INFO [train.py:1198] (1/2) Epoch 2, batch 1500, loss[loss=0.3466, ctc_loss=0.293, cr_loss=0.4673, attn_decoder_loss=0.3421, over 29612.00 frames. ], tot_loss[loss=0.343, ctc_loss=0.3029, cr_loss=0.4427, attn_decoder_loss=0.3376, over 5806444.77 frames. ], batch size: 86, lr: 3.90e-02, grad_scale: 16.0 +2024-09-16 15:13:16,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=24100.0, ans=0.005630434782608696 +2024-09-16 15:13:25,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=24100.0, ans=0.1 +2024-09-16 15:13:27,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=24100.0, ans=0.125 +2024-09-16 15:13:54,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=24180.0, ans=0.0 +2024-09-16 15:13:57,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.74 vs. limit=10.0 +2024-09-16 15:14:07,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.10 vs. limit=15.0 +2024-09-16 15:14:08,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.40 vs. limit=10.0 +2024-09-16 15:14:34,946 INFO [train.py:1198] (1/2) Epoch 2, batch 1550, loss[loss=0.3692, ctc_loss=0.3339, cr_loss=0.4723, attn_decoder_loss=0.3626, over 29464.00 frames. ], tot_loss[loss=0.3426, ctc_loss=0.3026, cr_loss=0.4424, attn_decoder_loss=0.3372, over 5782517.26 frames. ], batch size: 90, lr: 3.89e-02, grad_scale: 8.0 +2024-09-16 15:15:10,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.95 vs. limit=22.5 +2024-09-16 15:15:14,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=24380.0, ans=0.125 +2024-09-16 15:15:21,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.28 vs. limit=15.0 +2024-09-16 15:15:25,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=24420.0, ans=0.125 +2024-09-16 15:15:25,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=24420.0, ans=0.0 +2024-09-16 15:15:26,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=6.25 vs. limit=15.0 +2024-09-16 15:15:30,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=24420.0, ans=0.0 +2024-09-16 15:15:30,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=24420.0, ans=0.0055608695652173915 +2024-09-16 15:15:34,549 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.379e+02 1.577e+02 1.948e+02 4.764e+02, threshold=3.154e+02, percent-clipped=9.0 +2024-09-16 15:15:44,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.33 vs. limit=6.0 +2024-09-16 15:15:44,578 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.38 vs. limit=15.0 +2024-09-16 15:15:46,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=24460.0, ans=0.0 +2024-09-16 15:15:52,712 INFO [train.py:1198] (1/2) Epoch 2, batch 1600, loss[loss=0.357, ctc_loss=0.3137, cr_loss=0.4844, attn_decoder_loss=0.3511, over 29674.00 frames. ], tot_loss[loss=0.3426, ctc_loss=0.303, cr_loss=0.4422, attn_decoder_loss=0.3372, over 5764056.06 frames. ], batch size: 85, lr: 3.88e-02, grad_scale: 16.0 +2024-09-16 15:16:06,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=24540.0, ans=0.025 +2024-09-16 15:16:14,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.27 vs. limit=10.0 +2024-09-16 15:16:46,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=24620.0, ans=0.125 +2024-09-16 15:16:46,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=24620.0, ans=0.0 +2024-09-16 15:17:08,435 INFO [train.py:1198] (1/2) Epoch 2, batch 1650, loss[loss=0.3632, ctc_loss=0.3245, cr_loss=0.4802, attn_decoder_loss=0.3568, over 29698.00 frames. ], tot_loss[loss=0.3419, ctc_loss=0.3021, cr_loss=0.4413, attn_decoder_loss=0.3366, over 5758119.53 frames. ], batch size: 89, lr: 3.88e-02, grad_scale: 16.0 +2024-09-16 15:17:16,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=24700.0, ans=0.125 +2024-09-16 15:17:44,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=24780.0, ans=0.0 +2024-09-16 15:17:47,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=24780.0, ans=0.125 +2024-09-16 15:17:57,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.08 vs. limit=12.0 +2024-09-16 15:18:08,691 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.020e+02 1.312e+02 1.453e+02 1.722e+02 6.388e+02, threshold=2.905e+02, percent-clipped=6.0 +2024-09-16 15:18:13,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=24860.0, ans=0.125 +2024-09-16 15:18:21,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=18.32 vs. limit=15.0 +2024-09-16 15:18:25,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=24900.0, ans=0.1 +2024-09-16 15:18:26,757 INFO [train.py:1198] (1/2) Epoch 2, batch 1700, loss[loss=0.3016, ctc_loss=0.2655, cr_loss=0.4395, attn_decoder_loss=0.2958, over 29593.00 frames. ], tot_loss[loss=0.3412, ctc_loss=0.3007, cr_loss=0.442, attn_decoder_loss=0.3359, over 5780217.95 frames. ], batch size: 69, lr: 3.87e-02, grad_scale: 16.0 +2024-09-16 15:18:33,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=24.53 vs. limit=22.5 +2024-09-16 15:18:40,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=24900.0, ans=6.0 +2024-09-16 15:18:41,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=24940.0, ans=0.025 +2024-09-16 15:18:47,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=24940.0, ans=0.0 +2024-09-16 15:18:51,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=24940.0, ans=0.125 +2024-09-16 15:19:11,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=24980.0, ans=0.2 +2024-09-16 15:19:11,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=24980.0, ans=0.1 +2024-09-16 15:19:22,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=25020.0, ans=0.125 +2024-09-16 15:19:35,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=25060.0, ans=10.0 +2024-09-16 15:19:40,956 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.63 vs. limit=22.5 +2024-09-16 15:19:44,699 INFO [train.py:1198] (1/2) Epoch 2, batch 1750, loss[loss=0.2897, ctc_loss=0.2351, cr_loss=0.3747, attn_decoder_loss=0.2875, over 29338.00 frames. ], tot_loss[loss=0.3397, ctc_loss=0.2984, cr_loss=0.441, attn_decoder_loss=0.3345, over 5787836.50 frames. ], batch size: 67, lr: 3.86e-02, grad_scale: 16.0 +2024-09-16 15:19:49,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=25100.0, ans=0.05 +2024-09-16 15:19:55,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=25100.0, ans=0.125 +2024-09-16 15:20:05,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=15.0 +2024-09-16 15:20:23,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.70 vs. limit=10.0 +2024-09-16 15:20:31,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.26 vs. limit=12.0 +2024-09-16 15:20:38,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=25220.0, ans=0.125 +2024-09-16 15:20:42,198 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.124e+02 1.354e+02 1.539e+02 1.820e+02 3.547e+02, threshold=3.078e+02, percent-clipped=3.0 +2024-09-16 15:20:44,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=25260.0, ans=0.125 +2024-09-16 15:21:00,330 INFO [train.py:1198] (1/2) Epoch 2, batch 1800, loss[loss=0.3526, ctc_loss=0.3084, cr_loss=0.4289, attn_decoder_loss=0.348, over 29704.00 frames. ], tot_loss[loss=0.3398, ctc_loss=0.2986, cr_loss=0.4413, attn_decoder_loss=0.3346, over 5790527.06 frames. ], batch size: 83, lr: 3.86e-02, grad_scale: 16.0 +2024-09-16 15:21:08,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=25300.0, ans=0.0 +2024-09-16 15:21:31,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=25380.0, ans=0.125 +2024-09-16 15:21:32,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.77 vs. limit=15.0 +2024-09-16 15:22:08,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=25460.0, ans=0.125 +2024-09-16 15:22:14,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.30 vs. limit=22.5 +2024-09-16 15:22:18,553 INFO [train.py:1198] (1/2) Epoch 2, batch 1850, loss[loss=0.3531, ctc_loss=0.3011, cr_loss=0.45, attn_decoder_loss=0.3488, over 29625.00 frames. ], tot_loss[loss=0.3398, ctc_loss=0.2987, cr_loss=0.4424, attn_decoder_loss=0.3346, over 5797080.38 frames. ], batch size: 86, lr: 3.85e-02, grad_scale: 16.0 +2024-09-16 15:22:29,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=25500.0, ans=0.0 +2024-09-16 15:22:30,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=25500.0, ans=0.125 +2024-09-16 15:22:42,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.44 vs. limit=12.0 +2024-09-16 15:22:46,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=25540.0, ans=0.125 +2024-09-16 15:22:46,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.74 vs. limit=15.0 +2024-09-16 15:22:54,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=25580.0, ans=0.125 +2024-09-16 15:23:10,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=25620.0, ans=0.1 +2024-09-16 15:23:19,028 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.115e+02 1.341e+02 1.488e+02 1.704e+02 7.229e+02, threshold=2.976e+02, percent-clipped=2.0 +2024-09-16 15:23:25,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=25660.0, ans=0.125 +2024-09-16 15:23:37,145 INFO [train.py:1198] (1/2) Epoch 2, batch 1900, loss[loss=0.3702, ctc_loss=0.3376, cr_loss=0.4625, attn_decoder_loss=0.3635, over 29685.00 frames. ], tot_loss[loss=0.3405, ctc_loss=0.2991, cr_loss=0.444, attn_decoder_loss=0.3352, over 5805466.96 frames. ], batch size: 89, lr: 3.85e-02, grad_scale: 16.0 +2024-09-16 15:23:47,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=25700.0, ans=0.125 +2024-09-16 15:23:52,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=25740.0, ans=0.2 +2024-09-16 15:23:57,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.12 vs. limit=12.0 +2024-09-16 15:24:04,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=25740.0, ans=0.1 +2024-09-16 15:24:38,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=25860.0, ans=0.1 +2024-09-16 15:24:44,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.06 vs. limit=22.5 +2024-09-16 15:24:53,303 INFO [train.py:1198] (1/2) Epoch 2, batch 1950, loss[loss=0.3279, ctc_loss=0.2854, cr_loss=0.4425, attn_decoder_loss=0.3228, over 29450.00 frames. ], tot_loss[loss=0.3413, ctc_loss=0.2992, cr_loss=0.4453, attn_decoder_loss=0.3361, over 5819778.55 frames. ], batch size: 78, lr: 3.84e-02, grad_scale: 16.0 +2024-09-16 15:25:07,532 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=2.547e-02 +2024-09-16 15:25:08,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=25940.0, ans=0.04949747468305833 +2024-09-16 15:25:15,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.79 vs. limit=15.0 +2024-09-16 15:25:44,757 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.01 vs. limit=15.0 +2024-09-16 15:25:50,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=26020.0, ans=0.125 +2024-09-16 15:25:52,895 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.113e+02 1.307e+02 1.485e+02 1.949e+02 3.051e+02, threshold=2.970e+02, percent-clipped=1.0 +2024-09-16 15:26:11,400 INFO [train.py:1198] (1/2) Epoch 2, batch 2000, loss[loss=0.3098, ctc_loss=0.2733, cr_loss=0.421, attn_decoder_loss=0.3046, over 29362.00 frames. ], tot_loss[loss=0.3419, ctc_loss=0.3004, cr_loss=0.4455, attn_decoder_loss=0.3367, over 5797014.82 frames. ], batch size: 67, lr: 3.83e-02, grad_scale: 32.0 +2024-09-16 15:26:17,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=26100.0, ans=0.125 +2024-09-16 15:26:18,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=28.77 vs. limit=22.5 +2024-09-16 15:26:18,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.68 vs. limit=15.0 +2024-09-16 15:26:30,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=26140.0, ans=0.025 +2024-09-16 15:26:33,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=26140.0, ans=0.005186956521739131 +2024-09-16 15:26:45,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.80 vs. limit=15.0 +2024-09-16 15:27:13,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=26260.0, ans=0.07 +2024-09-16 15:27:19,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=26260.0, ans=0.005160869565217391 +2024-09-16 15:27:21,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=26260.0, ans=0.125 +2024-09-16 15:27:25,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.70 vs. limit=6.0 +2024-09-16 15:27:30,011 INFO [train.py:1198] (1/2) Epoch 2, batch 2050, loss[loss=0.2972, ctc_loss=0.2482, cr_loss=0.391, attn_decoder_loss=0.294, over 29425.00 frames. ], tot_loss[loss=0.3408, ctc_loss=0.2995, cr_loss=0.4451, attn_decoder_loss=0.3355, over 5787504.34 frames. ], batch size: 70, lr: 3.83e-02, grad_scale: 16.0 +2024-09-16 15:27:34,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.86 vs. limit=15.0 +2024-09-16 15:27:50,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.91 vs. limit=10.0 +2024-09-16 15:28:29,485 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.317e+02 1.483e+02 1.822e+02 5.194e+02, threshold=2.965e+02, percent-clipped=3.0 +2024-09-16 15:28:43,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=26460.0, ans=0.125 +2024-09-16 15:28:46,403 INFO [train.py:1198] (1/2) Epoch 2, batch 2100, loss[loss=0.3141, ctc_loss=0.2642, cr_loss=0.4244, attn_decoder_loss=0.3102, over 29763.00 frames. ], tot_loss[loss=0.339, ctc_loss=0.2972, cr_loss=0.4436, attn_decoder_loss=0.3338, over 5800539.29 frames. ], batch size: 81, lr: 3.82e-02, grad_scale: 16.0 +2024-09-16 15:28:57,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=26500.0, ans=0.1 +2024-09-16 15:29:01,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=26540.0, ans=0.125 +2024-09-16 15:30:05,024 INFO [train.py:1198] (1/2) Epoch 2, batch 2150, loss[loss=0.3387, ctc_loss=0.2958, cr_loss=0.4725, attn_decoder_loss=0.333, over 29453.00 frames. ], tot_loss[loss=0.3381, ctc_loss=0.296, cr_loss=0.4435, attn_decoder_loss=0.3329, over 5815708.75 frames. ], batch size: 78, lr: 3.81e-02, grad_scale: 16.0 +2024-09-16 15:30:15,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.39 vs. limit=22.5 +2024-09-16 15:30:26,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=26740.0, ans=0.125 +2024-09-16 15:30:28,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=26740.0, ans=0.0 +2024-09-16 15:30:32,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=26740.0, ans=0.125 +2024-09-16 15:30:36,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=22.66 vs. limit=22.5 +2024-09-16 15:30:39,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=26780.0, ans=0.035 +2024-09-16 15:30:52,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.67 vs. limit=15.0 +2024-09-16 15:31:06,842 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.157e+02 1.386e+02 1.602e+02 1.803e+02 8.431e+02, threshold=3.204e+02, percent-clipped=4.0 +2024-09-16 15:31:23,610 INFO [train.py:1198] (1/2) Epoch 2, batch 2200, loss[loss=0.3474, ctc_loss=0.2953, cr_loss=0.465, attn_decoder_loss=0.3429, over 29646.00 frames. ], tot_loss[loss=0.3381, ctc_loss=0.2959, cr_loss=0.4434, attn_decoder_loss=0.3329, over 5811692.25 frames. ], batch size: 86, lr: 3.81e-02, grad_scale: 16.0 +2024-09-16 15:31:28,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=26900.0, ans=0.005021739130434783 +2024-09-16 15:31:51,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=26940.0, ans=0.1 +2024-09-16 15:32:22,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=27020.0, ans=0.0 +2024-09-16 15:32:40,169 INFO [train.py:1198] (1/2) Epoch 2, batch 2250, loss[loss=0.3401, ctc_loss=0.2913, cr_loss=0.4682, attn_decoder_loss=0.3351, over 29713.00 frames. ], tot_loss[loss=0.3379, ctc_loss=0.2956, cr_loss=0.4435, attn_decoder_loss=0.3327, over 5810629.26 frames. ], batch size: 82, lr: 3.80e-02, grad_scale: 16.0 +2024-09-16 15:32:43,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=27100.0, ans=0.1 +2024-09-16 15:32:46,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=27100.0, ans=0.0 +2024-09-16 15:32:50,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=27100.0, ans=0.125 +2024-09-16 15:32:50,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=27100.0, ans=0.0 +2024-09-16 15:33:04,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=27140.0, ans=0.1 +2024-09-16 15:33:13,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=27180.0, ans=0.1 +2024-09-16 15:33:41,483 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.069e+02 1.301e+02 1.512e+02 1.808e+02 4.415e+02, threshold=3.025e+02, percent-clipped=2.0 +2024-09-16 15:33:58,448 INFO [train.py:1198] (1/2) Epoch 2, batch 2300, loss[loss=0.3012, ctc_loss=0.2605, cr_loss=0.3988, attn_decoder_loss=0.2969, over 29296.00 frames. ], tot_loss[loss=0.3369, ctc_loss=0.2945, cr_loss=0.4416, attn_decoder_loss=0.3318, over 5797054.26 frames. ], batch size: 71, lr: 3.79e-02, grad_scale: 16.0 +2024-09-16 15:34:00,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=27300.0, ans=0.0049347826086956524 +2024-09-16 15:34:09,787 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.37 vs. limit=15.0 +2024-09-16 15:34:23,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.86 vs. limit=22.5 +2024-09-16 15:34:38,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=27380.0, ans=0.2 +2024-09-16 15:34:58,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=27420.0, ans=0.125 +2024-09-16 15:35:16,566 INFO [train.py:1198] (1/2) Epoch 2, batch 2350, loss[loss=0.3642, ctc_loss=0.3303, cr_loss=0.4714, attn_decoder_loss=0.3575, over 29710.00 frames. ], tot_loss[loss=0.3367, ctc_loss=0.2943, cr_loss=0.4421, attn_decoder_loss=0.3316, over 5803006.73 frames. ], batch size: 83, lr: 3.79e-02, grad_scale: 16.0 +2024-09-16 15:35:25,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=27500.0, ans=0.125 +2024-09-16 15:35:25,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=27500.0, ans=0.0 +2024-09-16 15:35:33,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.92 vs. limit=15.0 +2024-09-16 15:35:36,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=24.90 vs. limit=22.5 +2024-09-16 15:36:08,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=27620.0, ans=0.2 +2024-09-16 15:36:15,845 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.428e+02 1.608e+02 2.014e+02 4.831e+02, threshold=3.217e+02, percent-clipped=8.0 +2024-09-16 15:36:25,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.95 vs. limit=22.5 +2024-09-16 15:36:32,927 INFO [train.py:1198] (1/2) Epoch 2, batch 2400, loss[loss=0.3117, ctc_loss=0.2642, cr_loss=0.4043, attn_decoder_loss=0.308, over 29538.00 frames. ], tot_loss[loss=0.3368, ctc_loss=0.294, cr_loss=0.4427, attn_decoder_loss=0.3317, over 5807568.61 frames. ], batch size: 76, lr: 3.78e-02, grad_scale: 32.0 +2024-09-16 15:36:33,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=27700.0, ans=0.2 +2024-09-16 15:36:35,182 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.65 vs. limit=15.0 +2024-09-16 15:36:42,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=27700.0, ans=0.125 +2024-09-16 15:36:43,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=27700.0, ans=0.04949747468305833 +2024-09-16 15:36:49,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=27740.0, ans=0.1 +2024-09-16 15:37:30,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=27820.0, ans=0.2 +2024-09-16 15:37:30,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=27820.0, ans=0.0 +2024-09-16 15:37:48,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=27860.0, ans=0.025 +2024-09-16 15:37:51,269 INFO [train.py:1198] (1/2) Epoch 2, batch 2450, loss[loss=0.3555, ctc_loss=0.3149, cr_loss=0.4929, attn_decoder_loss=0.3491, over 29719.00 frames. ], tot_loss[loss=0.3381, ctc_loss=0.2955, cr_loss=0.4438, attn_decoder_loss=0.333, over 5784724.57 frames. ], batch size: 82, lr: 3.78e-02, grad_scale: 16.0 +2024-09-16 15:37:58,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.78 vs. limit=15.0 +2024-09-16 15:38:14,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.40 vs. limit=15.0 +2024-09-16 15:38:30,204 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=1.047e-02 +2024-09-16 15:38:37,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=28020.0, ans=0.125 +2024-09-16 15:38:41,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.29 vs. limit=15.0 +2024-09-16 15:38:45,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=28020.0, ans=0.2 +2024-09-16 15:38:54,381 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.358e+02 1.541e+02 1.889e+02 3.653e+02, threshold=3.082e+02, percent-clipped=2.0 +2024-09-16 15:38:56,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=28060.0, ans=0.025 +2024-09-16 15:39:02,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=6.03 vs. limit=6.0 +2024-09-16 15:39:09,701 INFO [train.py:1198] (1/2) Epoch 2, batch 2500, loss[loss=0.3598, ctc_loss=0.3171, cr_loss=0.5025, attn_decoder_loss=0.3534, over 29623.00 frames. ], tot_loss[loss=0.3382, ctc_loss=0.2955, cr_loss=0.4441, attn_decoder_loss=0.3331, over 5795177.84 frames. ], batch size: 86, lr: 3.77e-02, grad_scale: 16.0 +2024-09-16 15:39:12,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.90 vs. limit=10.0 +2024-09-16 15:39:18,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.37 vs. limit=15.0 +2024-09-16 15:39:46,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.65 vs. limit=6.0 +2024-09-16 15:40:20,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.87 vs. limit=15.0 +2024-09-16 15:40:24,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=28300.0, ans=0.2 +2024-09-16 15:40:25,986 INFO [train.py:1198] (1/2) Epoch 2, batch 2550, loss[loss=0.3017, ctc_loss=0.2653, cr_loss=0.4112, attn_decoder_loss=0.2966, over 29315.00 frames. ], tot_loss[loss=0.3379, ctc_loss=0.2949, cr_loss=0.4434, attn_decoder_loss=0.3328, over 5798670.23 frames. ], batch size: 67, lr: 3.76e-02, grad_scale: 16.0 +2024-09-16 15:40:26,331 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:40:27,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=28300.0, ans=0.1 +2024-09-16 15:40:38,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=28300.0, ans=0.1 +2024-09-16 15:40:54,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=28380.0, ans=0.125 +2024-09-16 15:41:28,631 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.048e+02 1.393e+02 1.535e+02 1.794e+02 3.607e+02, threshold=3.070e+02, percent-clipped=1.0 +2024-09-16 15:41:30,950 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.31 vs. limit=15.0 +2024-09-16 15:41:31,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=28460.0, ans=0.125 +2024-09-16 15:41:44,114 INFO [train.py:1198] (1/2) Epoch 2, batch 2600, loss[loss=0.3426, ctc_loss=0.3, cr_loss=0.46, attn_decoder_loss=0.3371, over 29453.00 frames. ], tot_loss[loss=0.3381, ctc_loss=0.2953, cr_loss=0.4443, attn_decoder_loss=0.333, over 5795514.73 frames. ], batch size: 78, lr: 3.76e-02, grad_scale: 16.0 +2024-09-16 15:42:57,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=28660.0, ans=0.125 +2024-09-16 15:43:01,958 INFO [train.py:1198] (1/2) Epoch 2, batch 2650, loss[loss=0.3531, ctc_loss=0.3063, cr_loss=0.5076, attn_decoder_loss=0.347, over 29282.00 frames. ], tot_loss[loss=0.3384, ctc_loss=0.2953, cr_loss=0.4455, attn_decoder_loss=0.3333, over 5801402.11 frames. ], batch size: 100, lr: 3.75e-02, grad_scale: 16.0 +2024-09-16 15:43:11,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=28700.0, ans=0.125 +2024-09-16 15:43:16,223 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=8.072e-03 +2024-09-16 15:43:22,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=28740.0, ans=0.125 +2024-09-16 15:43:25,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=28740.0, ans=0.1 +2024-09-16 15:43:45,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=28780.0, ans=0.004613043478260869 +2024-09-16 15:43:46,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=28820.0, ans=0.025 +2024-09-16 15:44:02,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=28860.0, ans=0.125 +2024-09-16 15:44:03,362 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.130e+02 1.330e+02 1.517e+02 1.799e+02 4.153e+02, threshold=3.035e+02, percent-clipped=1.0 +2024-09-16 15:44:05,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=28860.0, ans=0.1 +2024-09-16 15:44:08,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=28860.0, ans=0.125 +2024-09-16 15:44:18,675 INFO [train.py:1198] (1/2) Epoch 2, batch 2700, loss[loss=0.3335, ctc_loss=0.2786, cr_loss=0.4506, attn_decoder_loss=0.3296, over 29545.00 frames. ], tot_loss[loss=0.3385, ctc_loss=0.2952, cr_loss=0.4467, attn_decoder_loss=0.3334, over 5797991.89 frames. ], batch size: 87, lr: 3.74e-02, grad_scale: 16.0 +2024-09-16 15:44:25,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=28900.0, ans=0.0 +2024-09-16 15:44:50,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=28980.0, ans=0.125 +2024-09-16 15:44:58,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=28980.0, ans=0.2 +2024-09-16 15:45:02,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=29020.0, ans=0.05 +2024-09-16 15:45:17,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=29020.0, ans=0.1 +2024-09-16 15:45:20,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=29060.0, ans=0.004552173913043479 +2024-09-16 15:45:23,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=29060.0, ans=0.09899494936611666 +2024-09-16 15:45:36,958 INFO [train.py:1198] (1/2) Epoch 2, batch 2750, loss[loss=0.3313, ctc_loss=0.2911, cr_loss=0.4571, attn_decoder_loss=0.3256, over 29509.00 frames. ], tot_loss[loss=0.3369, ctc_loss=0.2938, cr_loss=0.4447, attn_decoder_loss=0.3318, over 5796233.02 frames. ], batch size: 75, lr: 3.74e-02, grad_scale: 8.0 +2024-09-16 15:45:47,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=29100.0, ans=0.2 +2024-09-16 15:46:16,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=29180.0, ans=0.025 +2024-09-16 15:46:35,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=29220.0, ans=0.0 +2024-09-16 15:46:41,280 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.163e+02 1.322e+02 1.540e+02 1.938e+02 5.454e+02, threshold=3.080e+02, percent-clipped=6.0 +2024-09-16 15:46:43,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=29260.0, ans=0.125 +2024-09-16 15:46:47,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=29260.0, ans=0.125 +2024-09-16 15:46:55,043 INFO [train.py:1198] (1/2) Epoch 2, batch 2800, loss[loss=0.3833, ctc_loss=0.3765, cr_loss=0.4249, attn_decoder_loss=0.3747, over 20405.00 frames. ], tot_loss[loss=0.3373, ctc_loss=0.2943, cr_loss=0.4446, attn_decoder_loss=0.3322, over 5776594.06 frames. ], batch size: 211, lr: 3.73e-02, grad_scale: 16.0 +2024-09-16 15:47:16,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=29340.0, ans=0.1 +2024-09-16 15:47:23,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=29380.0, ans=0.125 +2024-09-16 15:47:29,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=29380.0, ans=0.0 +2024-09-16 15:47:45,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=29420.0, ans=0.125 +2024-09-16 15:47:46,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=29420.0, ans=0.5 +2024-09-16 15:47:50,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.22 vs. limit=22.5 +2024-09-16 15:47:55,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=29460.0, ans=0.2 +2024-09-16 15:47:58,595 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:48:07,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-16 15:48:10,151 INFO [train.py:1198] (1/2) Epoch 2, batch 2850, loss[loss=0.3252, ctc_loss=0.2781, cr_loss=0.4278, attn_decoder_loss=0.3209, over 29519.00 frames. ], tot_loss[loss=0.3377, ctc_loss=0.2948, cr_loss=0.4449, attn_decoder_loss=0.3325, over 5761677.72 frames. ], batch size: 77, lr: 3.73e-02, grad_scale: 16.0 +2024-09-16 15:48:19,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=29500.0, ans=0.125 +2024-09-16 15:48:28,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=29540.0, ans=0.125 +2024-09-16 15:48:31,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=29540.0, ans=0.0 +2024-09-16 15:49:15,201 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.123e+02 1.408e+02 1.587e+02 1.885e+02 4.187e+02, threshold=3.175e+02, percent-clipped=5.0 +2024-09-16 15:49:24,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.77 vs. limit=15.0 +2024-09-16 15:49:28,799 INFO [train.py:1198] (1/2) Epoch 2, batch 2900, loss[loss=0.3145, ctc_loss=0.2486, cr_loss=0.4359, attn_decoder_loss=0.3121, over 29431.00 frames. ], tot_loss[loss=0.3382, ctc_loss=0.2944, cr_loss=0.4462, attn_decoder_loss=0.3332, over 5786227.13 frames. ], batch size: 79, lr: 3.72e-02, grad_scale: 16.0 +2024-09-16 15:49:30,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=29700.0, ans=0.125 +2024-09-16 15:49:39,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=29700.0, ans=0.0 +2024-09-16 15:49:49,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=29740.0, ans=0.2 +2024-09-16 15:49:55,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=29740.0, ans=0.1 +2024-09-16 15:49:58,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.28 vs. limit=6.0 +2024-09-16 15:50:01,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=29780.0, ans=0.125 +2024-09-16 15:50:02,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=29780.0, ans=0.004395652173913044 +2024-09-16 15:50:10,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=29780.0, ans=0.125 +2024-09-16 15:50:26,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=29820.0, ans=0.125 +2024-09-16 15:50:36,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=29860.0, ans=0.125 +2024-09-16 15:50:46,435 INFO [train.py:1198] (1/2) Epoch 2, batch 2950, loss[loss=0.3269, ctc_loss=0.2779, cr_loss=0.456, attn_decoder_loss=0.3222, over 29527.00 frames. ], tot_loss[loss=0.3365, ctc_loss=0.2928, cr_loss=0.444, attn_decoder_loss=0.3315, over 5782292.48 frames. ], batch size: 75, lr: 3.71e-02, grad_scale: 16.0 +2024-09-16 15:51:04,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.62 vs. limit=15.0 +2024-09-16 15:51:10,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=29940.0, ans=0.125 +2024-09-16 15:51:18,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-16 15:51:27,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=29980.0, ans=10.0 +2024-09-16 15:51:48,469 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.139e+02 1.374e+02 1.533e+02 1.890e+02 8.560e+02, threshold=3.066e+02, percent-clipped=4.0 +2024-09-16 15:51:48,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=30060.0, ans=0.125 +2024-09-16 15:52:02,437 INFO [train.py:1198] (1/2) Epoch 2, batch 3000, loss[loss=0.3356, ctc_loss=0.2867, cr_loss=0.4265, attn_decoder_loss=0.3315, over 29765.00 frames. ], tot_loss[loss=0.336, ctc_loss=0.2924, cr_loss=0.4442, attn_decoder_loss=0.331, over 5783292.79 frames. ], batch size: 81, lr: 3.71e-02, grad_scale: 16.0 +2024-09-16 15:52:02,438 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 15:52:20,638 INFO [train.py:1230] (1/2) Epoch 2, validation: loss=0.2432, ctc_loss=0.1092, cr_loss=4.796e-15, attn_decoder_loss=0.2581, over 944034.00 frames. +2024-09-16 15:52:20,638 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 15:52:22,644 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:52:47,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=8.28 vs. limit=15.0 +2024-09-16 15:53:27,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=30260.0, ans=0.1 +2024-09-16 15:53:34,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=30260.0, ans=0.025 +2024-09-16 15:53:42,120 INFO [train.py:1198] (1/2) Epoch 2, batch 3050, loss[loss=0.3155, ctc_loss=0.2735, cr_loss=0.4109, attn_decoder_loss=0.3111, over 29555.00 frames. ], tot_loss[loss=0.3373, ctc_loss=0.2938, cr_loss=0.4457, attn_decoder_loss=0.3322, over 5777076.91 frames. ], batch size: 76, lr: 3.70e-02, grad_scale: 16.0 +2024-09-16 15:53:43,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.71 vs. limit=15.0 +2024-09-16 15:53:44,596 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.42 vs. limit=15.0 +2024-09-16 15:54:03,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=30340.0, ans=0.0 +2024-09-16 15:54:09,615 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:54:25,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-16 15:54:41,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=30460.0, ans=0.2 +2024-09-16 15:54:44,410 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.133e+02 1.365e+02 1.556e+02 1.852e+02 9.980e+02, threshold=3.113e+02, percent-clipped=5.0 +2024-09-16 15:54:50,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=30460.0, ans=0.125 +2024-09-16 15:54:57,987 INFO [train.py:1198] (1/2) Epoch 2, batch 3100, loss[loss=0.3651, ctc_loss=0.3356, cr_loss=0.4895, attn_decoder_loss=0.3575, over 29250.00 frames. ], tot_loss[loss=0.3365, ctc_loss=0.2928, cr_loss=0.4451, attn_decoder_loss=0.3315, over 5776473.54 frames. ], batch size: 100, lr: 3.69e-02, grad_scale: 16.0 +2024-09-16 15:55:01,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.21 vs. limit=15.0 +2024-09-16 15:55:16,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=30540.0, ans=0.125 +2024-09-16 15:55:34,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=30580.0, ans=0.125 +2024-09-16 15:55:56,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=30620.0, ans=0.125 +2024-09-16 15:56:00,779 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:56:14,180 INFO [train.py:1198] (1/2) Epoch 2, batch 3150, loss[loss=0.3792, ctc_loss=0.3463, cr_loss=0.5109, attn_decoder_loss=0.3715, over 28914.00 frames. ], tot_loss[loss=0.3367, ctc_loss=0.2928, cr_loss=0.4461, attn_decoder_loss=0.3317, over 5782851.00 frames. ], batch size: 104, lr: 3.69e-02, grad_scale: 16.0 +2024-09-16 15:56:17,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=30700.0, ans=0.0 +2024-09-16 15:56:42,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=30740.0, ans=0.125 +2024-09-16 15:56:56,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=30780.0, ans=0.125 +2024-09-16 15:57:03,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.36 vs. limit=22.5 +2024-09-16 15:57:20,638 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.062e+02 1.339e+02 1.514e+02 1.730e+02 4.890e+02, threshold=3.027e+02, percent-clipped=3.0 +2024-09-16 15:57:34,169 INFO [train.py:1198] (1/2) Epoch 2, batch 3200, loss[loss=0.3152, ctc_loss=0.2584, cr_loss=0.4455, attn_decoder_loss=0.3116, over 29410.00 frames. ], tot_loss[loss=0.3352, ctc_loss=0.2907, cr_loss=0.445, attn_decoder_loss=0.3303, over 5793108.65 frames. ], batch size: 79, lr: 3.68e-02, grad_scale: 32.0 +2024-09-16 15:57:38,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.73 vs. limit=15.0 +2024-09-16 15:57:59,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.56 vs. limit=15.0 +2024-09-16 15:58:08,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=30980.0, ans=0.2 +2024-09-16 15:58:29,593 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.40 vs. limit=15.0 +2024-09-16 15:58:44,983 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.86 vs. limit=15.0 +2024-09-16 15:58:49,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.14 vs. limit=12.0 +2024-09-16 15:58:50,039 INFO [train.py:1198] (1/2) Epoch 2, batch 3250, loss[loss=0.3449, ctc_loss=0.3031, cr_loss=0.4328, attn_decoder_loss=0.3399, over 29713.00 frames. ], tot_loss[loss=0.3355, ctc_loss=0.2908, cr_loss=0.4452, attn_decoder_loss=0.3305, over 5800257.94 frames. ], batch size: 84, lr: 3.68e-02, grad_scale: 16.0 +2024-09-16 15:58:50,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=31100.0, ans=0.125 +2024-09-16 15:58:57,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=31100.0, ans=0.125 +2024-09-16 15:58:58,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=31100.0, ans=0.125 +2024-09-16 15:59:01,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=31100.0, ans=0.025 +2024-09-16 15:59:21,144 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.76 vs. limit=22.5 +2024-09-16 15:59:21,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=31180.0, ans=0.1 +2024-09-16 15:59:23,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=31180.0, ans=0.125 +2024-09-16 15:59:24,989 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 15:59:47,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=31220.0, ans=0.125 +2024-09-16 15:59:47,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=31220.0, ans=0.125 +2024-09-16 15:59:53,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.050e+02 1.355e+02 1.599e+02 1.863e+02 1.090e+03, threshold=3.197e+02, percent-clipped=6.0 +2024-09-16 16:00:06,053 INFO [train.py:1198] (1/2) Epoch 2, batch 3300, loss[loss=0.3559, ctc_loss=0.3103, cr_loss=0.4832, attn_decoder_loss=0.3503, over 28218.00 frames. ], tot_loss[loss=0.3339, ctc_loss=0.2895, cr_loss=0.4435, attn_decoder_loss=0.3289, over 5797736.23 frames. ], batch size: 111, lr: 3.67e-02, grad_scale: 16.0 +2024-09-16 16:00:23,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=31340.0, ans=0.125 +2024-09-16 16:00:49,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=31380.0, ans=0.0 +2024-09-16 16:01:04,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=31420.0, ans=0.025 +2024-09-16 16:01:09,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=31460.0, ans=0.0 +2024-09-16 16:01:18,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=31460.0, ans=0.125 +2024-09-16 16:01:25,564 INFO [train.py:1198] (1/2) Epoch 2, batch 3350, loss[loss=0.3586, ctc_loss=0.3196, cr_loss=0.4848, attn_decoder_loss=0.3521, over 28968.00 frames. ], tot_loss[loss=0.3352, ctc_loss=0.2915, cr_loss=0.4445, attn_decoder_loss=0.3302, over 5774381.21 frames. ], batch size: 104, lr: 3.66e-02, grad_scale: 16.0 +2024-09-16 16:01:27,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=31500.0, ans=0.2 +2024-09-16 16:01:37,071 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.16 vs. limit=22.5 +2024-09-16 16:01:51,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=31540.0, ans=0.025 +2024-09-16 16:01:54,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=31580.0, ans=0.07 +2024-09-16 16:02:18,837 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:02:27,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.79 vs. limit=12.0 +2024-09-16 16:02:28,106 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:02:29,253 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.156e+02 1.443e+02 1.604e+02 1.937e+02 5.792e+02, threshold=3.209e+02, percent-clipped=1.0 +2024-09-16 16:02:41,560 INFO [train.py:1198] (1/2) Epoch 2, batch 3400, loss[loss=0.3018, ctc_loss=0.2656, cr_loss=0.4082, attn_decoder_loss=0.2968, over 29386.00 frames. ], tot_loss[loss=0.3348, ctc_loss=0.2913, cr_loss=0.4451, attn_decoder_loss=0.3298, over 5766982.06 frames. ], batch size: 67, lr: 3.66e-02, grad_scale: 16.0 +2024-09-16 16:03:16,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=31780.0, ans=0.125 +2024-09-16 16:03:44,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=31860.0, ans=0.1 +2024-09-16 16:03:57,317 INFO [train.py:1198] (1/2) Epoch 2, batch 3450, loss[loss=0.359, ctc_loss=0.3109, cr_loss=0.4776, attn_decoder_loss=0.3537, over 28434.00 frames. ], tot_loss[loss=0.3347, ctc_loss=0.2907, cr_loss=0.4452, attn_decoder_loss=0.3297, over 5776026.85 frames. ], batch size: 112, lr: 3.65e-02, grad_scale: 16.0 +2024-09-16 16:03:59,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=31900.0, ans=0.2 +2024-09-16 16:04:00,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=31900.0, ans=0.1 +2024-09-16 16:04:54,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=32020.0, ans=0.125 +2024-09-16 16:05:07,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=32060.0, ans=0.125 +2024-09-16 16:05:09,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=32060.0, ans=0.1 +2024-09-16 16:05:12,126 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.023e+02 1.335e+02 1.514e+02 1.734e+02 4.417e+02, threshold=3.028e+02, percent-clipped=1.0 +2024-09-16 16:05:12,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=32060.0, ans=0.025 +2024-09-16 16:05:20,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.24 vs. limit=22.5 +2024-09-16 16:05:24,249 INFO [train.py:1198] (1/2) Epoch 2, batch 3500, loss[loss=0.3093, ctc_loss=0.267, cr_loss=0.4373, attn_decoder_loss=0.3043, over 29735.00 frames. ], tot_loss[loss=0.3336, ctc_loss=0.2895, cr_loss=0.4444, attn_decoder_loss=0.3287, over 5777631.59 frames. ], batch size: 72, lr: 3.65e-02, grad_scale: 16.0 +2024-09-16 16:05:29,171 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:05:38,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=32140.0, ans=0.0 +2024-09-16 16:05:48,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=32140.0, ans=0.125 +2024-09-16 16:05:48,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.06 vs. limit=15.0 +2024-09-16 16:05:51,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=32140.0, ans=0.025 +2024-09-16 16:06:01,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=32180.0, ans=0.0038739130434782606 +2024-09-16 16:06:30,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=15.39 vs. limit=15.0 +2024-09-16 16:06:36,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=32260.0, ans=0.0 +2024-09-16 16:06:37,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=32300.0, ans=0.003847826086956522 +2024-09-16 16:06:38,936 INFO [train.py:1198] (1/2) Epoch 2, batch 3550, loss[loss=0.3425, ctc_loss=0.288, cr_loss=0.4475, attn_decoder_loss=0.3386, over 29725.00 frames. ], tot_loss[loss=0.3332, ctc_loss=0.2888, cr_loss=0.4447, attn_decoder_loss=0.3282, over 5783918.12 frames. ], batch size: 89, lr: 3.64e-02, grad_scale: 16.0 +2024-09-16 16:06:39,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=32300.0, ans=0.125 +2024-09-16 16:06:59,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=32340.0, ans=0.0 +2024-09-16 16:07:02,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.59 vs. limit=10.0 +2024-09-16 16:07:13,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=32380.0, ans=0.0 +2024-09-16 16:07:15,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.94 vs. limit=6.0 +2024-09-16 16:07:35,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=32420.0, ans=0.125 +2024-09-16 16:07:41,330 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.383e+02 1.528e+02 1.788e+02 3.393e+02, threshold=3.056e+02, percent-clipped=1.0 +2024-09-16 16:07:43,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=32460.0, ans=0.125 +2024-09-16 16:07:53,265 INFO [train.py:1198] (1/2) Epoch 2, batch 3600, loss[loss=0.314, ctc_loss=0.2544, cr_loss=0.4339, attn_decoder_loss=0.311, over 29494.00 frames. ], tot_loss[loss=0.3328, ctc_loss=0.2879, cr_loss=0.4445, attn_decoder_loss=0.3279, over 5793232.06 frames. ], batch size: 77, lr: 3.63e-02, grad_scale: 32.0 +2024-09-16 16:07:55,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=32500.0, ans=0.0 +2024-09-16 16:08:35,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.68 vs. limit=12.0 +2024-09-16 16:08:36,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=32620.0, ans=0.125 +2024-09-16 16:08:38,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=32620.0, ans=22.5 +2024-09-16 16:09:07,512 INFO [train.py:1198] (1/2) Epoch 2, batch 3650, loss[loss=0.3269, ctc_loss=0.2778, cr_loss=0.4263, attn_decoder_loss=0.3229, over 29489.00 frames. ], tot_loss[loss=0.3318, ctc_loss=0.2864, cr_loss=0.4436, attn_decoder_loss=0.3269, over 5794561.94 frames. ], batch size: 90, lr: 3.63e-02, grad_scale: 16.0 +2024-09-16 16:09:13,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=32700.0, ans=0.003760869565217391 +2024-09-16 16:09:21,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=32740.0, ans=0.0 +2024-09-16 16:09:22,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=32740.0, ans=0.2 +2024-09-16 16:09:24,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=32740.0, ans=0.1 +2024-09-16 16:09:30,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.81 vs. limit=15.0 +2024-09-16 16:09:34,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=32740.0, ans=0.0 +2024-09-16 16:09:46,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.95 vs. limit=12.0 +2024-09-16 16:10:12,076 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.100e+02 1.348e+02 1.495e+02 1.801e+02 3.465e+02, threshold=2.990e+02, percent-clipped=2.0 +2024-09-16 16:10:21,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=32860.0, ans=0.0037260869565217385 +2024-09-16 16:10:22,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.44 vs. limit=22.5 +2024-09-16 16:10:24,624 INFO [train.py:1198] (1/2) Epoch 2, batch 3700, loss[loss=0.3244, ctc_loss=0.2798, cr_loss=0.4376, attn_decoder_loss=0.3196, over 29715.00 frames. ], tot_loss[loss=0.3318, ctc_loss=0.2863, cr_loss=0.4439, attn_decoder_loss=0.3269, over 5804064.36 frames. ], batch size: 84, lr: 3.62e-02, grad_scale: 16.0 +2024-09-16 16:10:32,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=32900.0, ans=0.125 +2024-09-16 16:10:32,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=32900.0, ans=0.125 +2024-09-16 16:10:37,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.96 vs. limit=22.5 +2024-09-16 16:10:46,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.56 vs. limit=15.0 +2024-09-16 16:10:52,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.07 vs. limit=15.0 +2024-09-16 16:10:55,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=32980.0, ans=0.125 +2024-09-16 16:11:06,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=32980.0, ans=0.125 +2024-09-16 16:11:06,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=32980.0, ans=0.0 +2024-09-16 16:11:11,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=33020.0, ans=0.0 +2024-09-16 16:11:27,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=33060.0, ans=0.0036826086956521734 +2024-09-16 16:11:38,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=33060.0, ans=0.125 +2024-09-16 16:11:41,226 INFO [train.py:1198] (1/2) Epoch 2, batch 3750, loss[loss=0.2932, ctc_loss=0.2459, cr_loss=0.4126, attn_decoder_loss=0.2892, over 29349.00 frames. ], tot_loss[loss=0.332, ctc_loss=0.2866, cr_loss=0.4443, attn_decoder_loss=0.3271, over 5808460.73 frames. ], batch size: 67, lr: 3.62e-02, grad_scale: 16.0 +2024-09-16 16:11:53,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=33100.0, ans=0.1 +2024-09-16 16:12:17,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=33180.0, ans=0.025 +2024-09-16 16:12:17,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=33180.0, ans=0.125 +2024-09-16 16:12:22,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=33180.0, ans=0.1 +2024-09-16 16:12:23,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=33180.0, ans=0.125 +2024-09-16 16:12:24,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=33220.0, ans=0.0 +2024-09-16 16:12:38,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=33220.0, ans=0.0 +2024-09-16 16:12:38,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=33220.0, ans=0.1 +2024-09-16 16:12:40,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.65 vs. limit=15.0 +2024-09-16 16:12:45,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.37 vs. limit=22.5 +2024-09-16 16:12:45,645 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.433e+02 1.658e+02 2.025e+02 1.075e+03, threshold=3.317e+02, percent-clipped=10.0 +2024-09-16 16:12:47,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=33260.0, ans=0.0036391304347826083 +2024-09-16 16:12:55,953 INFO [train.py:1198] (1/2) Epoch 2, batch 3800, loss[loss=0.3508, ctc_loss=0.3026, cr_loss=0.4724, attn_decoder_loss=0.3457, over 29643.00 frames. ], tot_loss[loss=0.3314, ctc_loss=0.2861, cr_loss=0.4434, attn_decoder_loss=0.3265, over 5798032.24 frames. ], batch size: 86, lr: 3.61e-02, grad_scale: 16.0 +2024-09-16 16:13:00,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=33300.0, ans=0.125 +2024-09-16 16:13:08,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=33300.0, ans=0.125 +2024-09-16 16:13:26,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=33380.0, ans=0.125 +2024-09-16 16:13:45,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=33420.0, ans=0.1 +2024-09-16 16:13:46,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.66 vs. limit=22.5 +2024-09-16 16:13:50,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=33420.0, ans=0.0 +2024-09-16 16:13:51,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=33420.0, ans=0.0036043478260869566 +2024-09-16 16:13:52,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.22 vs. limit=22.5 +2024-09-16 16:13:56,718 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.71 vs. limit=15.0 +2024-09-16 16:14:04,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=33460.0, ans=0.125 +2024-09-16 16:14:10,550 INFO [train.py:1198] (1/2) Epoch 2, batch 3850, loss[loss=0.3526, ctc_loss=0.3082, cr_loss=0.4747, attn_decoder_loss=0.347, over 29294.00 frames. ], tot_loss[loss=0.3307, ctc_loss=0.2849, cr_loss=0.4432, attn_decoder_loss=0.3259, over 5811901.25 frames. ], batch size: 100, lr: 3.60e-02, grad_scale: 16.0 +2024-09-16 16:14:23,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.27 vs. limit=15.0 +2024-09-16 16:14:38,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=33580.0, ans=0.09899494936611666 +2024-09-16 16:14:40,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=33580.0, ans=0.125 +2024-09-16 16:14:42,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.87 vs. limit=15.0 +2024-09-16 16:14:46,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=33580.0, ans=0.125 +2024-09-16 16:14:48,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=33580.0, ans=0.1 +2024-09-16 16:15:01,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=33620.0, ans=0.1 +2024-09-16 16:15:14,725 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.118e+02 1.336e+02 1.556e+02 1.830e+02 4.264e+02, threshold=3.112e+02, percent-clipped=3.0 +2024-09-16 16:15:22,660 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=7.523e-02 +2024-09-16 16:15:25,320 INFO [train.py:1198] (1/2) Epoch 2, batch 3900, loss[loss=0.3386, ctc_loss=0.2965, cr_loss=0.4547, attn_decoder_loss=0.3332, over 29639.00 frames. ], tot_loss[loss=0.3318, ctc_loss=0.2857, cr_loss=0.4449, attn_decoder_loss=0.327, over 5815510.86 frames. ], batch size: 86, lr: 3.60e-02, grad_scale: 16.0 +2024-09-16 16:15:30,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.30 vs. limit=15.0 +2024-09-16 16:15:39,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=33740.0, ans=0.0035347826086956514 +2024-09-16 16:16:14,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=33820.0, ans=0.125 +2024-09-16 16:16:27,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=33860.0, ans=22.5 +2024-09-16 16:16:27,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.48 vs. limit=22.5 +2024-09-16 16:16:42,727 INFO [train.py:1198] (1/2) Epoch 2, batch 3950, loss[loss=0.3411, ctc_loss=0.2886, cr_loss=0.4189, attn_decoder_loss=0.3376, over 29547.00 frames. ], tot_loss[loss=0.3303, ctc_loss=0.2834, cr_loss=0.4436, attn_decoder_loss=0.3257, over 5835103.72 frames. ], batch size: 97, lr: 3.59e-02, grad_scale: 16.0 +2024-09-16 16:17:14,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=33980.0, ans=0.2 +2024-09-16 16:17:18,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=33980.0, ans=0.125 +2024-09-16 16:17:46,405 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.131e+02 1.360e+02 1.544e+02 1.951e+02 4.705e+02, threshold=3.088e+02, percent-clipped=4.0 +2024-09-16 16:17:53,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.69 vs. limit=12.0 +2024-09-16 16:17:57,003 INFO [train.py:1198] (1/2) Epoch 2, batch 4000, loss[loss=0.3169, ctc_loss=0.2697, cr_loss=0.4124, attn_decoder_loss=0.313, over 29512.00 frames. ], tot_loss[loss=0.3307, ctc_loss=0.2839, cr_loss=0.444, attn_decoder_loss=0.326, over 5812788.35 frames. ], batch size: 74, lr: 3.59e-02, grad_scale: 32.0 +2024-09-16 16:18:06,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.24 vs. limit=10.0 +2024-09-16 16:18:11,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=34140.0, ans=0.1 +2024-09-16 16:18:11,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=34140.0, ans=0.125 +2024-09-16 16:18:31,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=34180.0, ans=0.2 +2024-09-16 16:18:41,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=34220.0, ans=0.125 +2024-09-16 16:18:44,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=34220.0, ans=0.003430434782608696 +2024-09-16 16:18:47,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=34220.0, ans=0.07 +2024-09-16 16:18:57,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=34260.0, ans=0.025 +2024-09-16 16:18:58,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=34260.0, ans=0.025 +2024-09-16 16:18:59,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=34260.0, ans=0.125 +2024-09-16 16:19:11,322 INFO [train.py:1198] (1/2) Epoch 2, batch 4050, loss[loss=0.3847, ctc_loss=0.3656, cr_loss=0.423, attn_decoder_loss=0.3774, over 19870.00 frames. ], tot_loss[loss=0.3312, ctc_loss=0.2846, cr_loss=0.444, attn_decoder_loss=0.3265, over 5795834.04 frames. ], batch size: 210, lr: 3.58e-02, grad_scale: 16.0 +2024-09-16 16:19:26,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.29 vs. limit=22.5 +2024-09-16 16:19:27,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=34340.0, ans=0.2 +2024-09-16 16:19:43,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=34380.0, ans=0.0 +2024-09-16 16:19:57,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=34420.0, ans=0.1 +2024-09-16 16:20:05,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=34420.0, ans=0.1 +2024-09-16 16:20:13,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.95 vs. limit=22.5 +2024-09-16 16:20:13,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=15.18 vs. limit=15.0 +2024-09-16 16:20:16,504 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.136e+02 1.473e+02 1.673e+02 1.934e+02 5.199e+02, threshold=3.345e+02, percent-clipped=3.0 +2024-09-16 16:20:22,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=34460.0, ans=0.0 +2024-09-16 16:20:22,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=34460.0, ans=0.05 +2024-09-16 16:20:26,773 INFO [train.py:1198] (1/2) Epoch 2, batch 4100, loss[loss=0.3489, ctc_loss=0.3027, cr_loss=0.4398, attn_decoder_loss=0.3443, over 29506.00 frames. ], tot_loss[loss=0.3318, ctc_loss=0.2856, cr_loss=0.4451, attn_decoder_loss=0.3271, over 5791596.98 frames. ], batch size: 90, lr: 3.57e-02, grad_scale: 16.0 +2024-09-16 16:20:30,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=34500.0, ans=0.2 +2024-09-16 16:20:49,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=34540.0, ans=0.0 +2024-09-16 16:21:25,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=34620.0, ans=0.125 +2024-09-16 16:21:42,419 INFO [train.py:1198] (1/2) Epoch 2, batch 4150, loss[loss=0.3116, ctc_loss=0.2603, cr_loss=0.4381, attn_decoder_loss=0.3076, over 29513.00 frames. ], tot_loss[loss=0.3308, ctc_loss=0.2842, cr_loss=0.4442, attn_decoder_loss=0.3262, over 5797641.26 frames. ], batch size: 77, lr: 3.57e-02, grad_scale: 8.0 +2024-09-16 16:21:50,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=34700.0, ans=0.125 +2024-09-16 16:21:51,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=34700.0, ans=0.125 +2024-09-16 16:22:16,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=34780.0, ans=0.0033086956521739133 +2024-09-16 16:22:17,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.89 vs. limit=15.0 +2024-09-16 16:22:25,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=34820.0, ans=0.125 +2024-09-16 16:22:32,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=34820.0, ans=0.125 +2024-09-16 16:22:43,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.16 vs. limit=15.0 +2024-09-16 16:22:48,754 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.150e+02 1.357e+02 1.525e+02 1.720e+02 3.077e+02, threshold=3.049e+02, percent-clipped=0.0 +2024-09-16 16:22:49,661 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.20 vs. limit=12.0 +2024-09-16 16:22:56,138 INFO [train.py:1198] (1/2) Epoch 2, batch 4200, loss[loss=0.3577, ctc_loss=0.3151, cr_loss=0.4818, attn_decoder_loss=0.3518, over 29499.00 frames. ], tot_loss[loss=0.331, ctc_loss=0.2843, cr_loss=0.4455, attn_decoder_loss=0.3263, over 5798440.52 frames. ], batch size: 90, lr: 3.56e-02, grad_scale: 8.0 +2024-09-16 16:22:59,330 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:23:12,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=34940.0, ans=0.125 +2024-09-16 16:23:26,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=34980.0, ans=0.2 +2024-09-16 16:23:27,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.53 vs. limit=15.0 +2024-09-16 16:23:48,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=35020.0, ans=0.125 +2024-09-16 16:23:59,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=35060.0, ans=0.125 +2024-09-16 16:24:05,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=35060.0, ans=0.125 +2024-09-16 16:24:09,853 INFO [train.py:1198] (1/2) Epoch 2, batch 4250, loss[loss=0.2999, ctc_loss=0.2409, cr_loss=0.4033, attn_decoder_loss=0.2975, over 29507.00 frames. ], tot_loss[loss=0.331, ctc_loss=0.2841, cr_loss=0.4452, attn_decoder_loss=0.3263, over 5804806.25 frames. ], batch size: 74, lr: 3.56e-02, grad_scale: 8.0 +2024-09-16 16:24:11,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=35100.0, ans=0.003239130434782609 +2024-09-16 16:24:13,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.37 vs. limit=10.0 +2024-09-16 16:24:44,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=35180.0, ans=0.125 +2024-09-16 16:24:45,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff3.min_abs, batch_count=35180.0, ans=0.2 +2024-09-16 16:24:47,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=35180.0, ans=0.125 +2024-09-16 16:25:18,428 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.126e+02 1.431e+02 1.619e+02 1.873e+02 2.888e+02, threshold=3.237e+02, percent-clipped=0.0 +2024-09-16 16:25:25,687 INFO [train.py:1198] (1/2) Epoch 2, batch 4300, loss[loss=0.3503, ctc_loss=0.3008, cr_loss=0.4466, attn_decoder_loss=0.3459, over 29540.00 frames. ], tot_loss[loss=0.3312, ctc_loss=0.2841, cr_loss=0.4451, attn_decoder_loss=0.3266, over 5793735.29 frames. ], batch size: 87, lr: 3.55e-02, grad_scale: 8.0 +2024-09-16 16:25:44,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=8.48 vs. limit=15.0 +2024-09-16 16:26:06,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=15.0 +2024-09-16 16:26:10,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=35420.0, ans=0.125 +2024-09-16 16:26:33,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=35460.0, ans=0.125 +2024-09-16 16:26:40,293 INFO [train.py:1198] (1/2) Epoch 2, batch 4350, loss[loss=0.3503, ctc_loss=0.313, cr_loss=0.4738, attn_decoder_loss=0.3439, over 29501.00 frames. ], tot_loss[loss=0.335, ctc_loss=0.2875, cr_loss=0.4492, attn_decoder_loss=0.3303, over 5796610.69 frames. ], batch size: 97, lr: 3.54e-02, grad_scale: 8.0 +2024-09-16 16:27:15,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.74 vs. limit=22.5 +2024-09-16 16:27:39,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=35660.0, ans=0.025 +2024-09-16 16:27:40,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.29 vs. limit=15.0 +2024-09-16 16:27:45,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=35660.0, ans=0.0 +2024-09-16 16:27:48,087 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.185e+02 1.425e+02 1.627e+02 1.817e+02 2.716e+02, threshold=3.254e+02, percent-clipped=0.0 +2024-09-16 16:27:48,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=35660.0, ans=0.003117391304347826 +2024-09-16 16:27:53,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.54 vs. limit=22.5 +2024-09-16 16:27:55,405 INFO [train.py:1198] (1/2) Epoch 2, batch 4400, loss[loss=0.3593, ctc_loss=0.3163, cr_loss=0.4836, attn_decoder_loss=0.3533, over 27470.00 frames. ], tot_loss[loss=0.3376, ctc_loss=0.2905, cr_loss=0.4515, attn_decoder_loss=0.3328, over 5765353.49 frames. ], batch size: 125, lr: 3.54e-02, grad_scale: 16.0 +2024-09-16 16:28:00,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=35700.0, ans=0.125 +2024-09-16 16:28:03,152 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:28:08,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=35700.0, ans=15.0 +2024-09-16 16:28:18,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=14.23 vs. limit=15.0 +2024-09-16 16:28:28,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=35780.0, ans=0.125 +2024-09-16 16:28:48,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=35820.0, ans=0.1 +2024-09-16 16:29:09,910 INFO [train.py:1198] (1/2) Epoch 2, batch 4450, loss[loss=0.3594, ctc_loss=0.3379, cr_loss=0.4476, attn_decoder_loss=0.3519, over 19933.00 frames. ], tot_loss[loss=0.342, ctc_loss=0.2983, cr_loss=0.4539, attn_decoder_loss=0.3367, over 5569479.18 frames. ], batch size: 210, lr: 3.53e-02, grad_scale: 16.0 +2024-09-16 16:29:13,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=35900.0, ans=0.125 +2024-09-16 16:29:13,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=35900.0, ans=0.125 +2024-09-16 16:29:16,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=35900.0, ans=0.125 +2024-09-16 16:29:19,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=35900.0, ans=0.0 +2024-09-16 16:29:31,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.01 vs. limit=22.5 +2024-09-16 16:29:42,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=35980.0, ans=0.04949747468305833 +2024-09-16 16:29:57,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=36020.0, ans=0.2 +2024-09-16 16:30:02,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=3.51 vs. limit=15.0 +2024-09-16 16:30:08,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=36020.0, ans=0.125 +2024-09-16 16:30:08,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=8.57 vs. limit=12.0 +2024-09-16 16:30:18,622 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.305e+02 1.463e+02 1.734e+02 4.707e+02, threshold=2.926e+02, percent-clipped=2.0 +2024-09-16 16:30:25,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=36100.0, ans=0.0030217391304347826 +2024-09-16 16:30:26,377 INFO [train.py:1198] (1/2) Epoch 2, batch 4500, loss[loss=0.3582, ctc_loss=0.3385, cr_loss=0.4478, attn_decoder_loss=0.3505, over 20147.00 frames. ], tot_loss[loss=0.3463, ctc_loss=0.3076, cr_loss=0.4533, attn_decoder_loss=0.3405, over 5231069.68 frames. ], batch size: 210, lr: 3.53e-02, grad_scale: 16.0 +2024-09-16 16:30:28,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.19 vs. limit=15.0 +2024-09-16 16:30:28,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.72 vs. limit=15.0 +2024-09-16 16:30:31,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=36100.0, ans=0.1 +2024-09-16 16:30:34,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=36100.0, ans=0.1 +2024-09-16 16:30:37,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.02 vs. limit=15.0 +2024-09-16 16:30:52,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=36140.0, ans=0.1 +2024-09-16 16:30:53,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=36140.0, ans=0.125 +2024-09-16 16:31:33,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=36200.0, ans=0.125 +2024-09-16 16:31:58,162 INFO [train.py:1198] (1/2) Epoch 3, batch 0, loss[loss=0.3932, ctc_loss=0.2509, cr_loss=0.4093, attn_decoder_loss=0.4, over 29596.00 frames. ], tot_loss[loss=0.3932, ctc_loss=0.2509, cr_loss=0.4093, attn_decoder_loss=0.4, over 29596.00 frames. ], batch size: 73, lr: 3.35e-02, grad_scale: 8.0 +2024-09-16 16:31:58,163 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 16:32:16,468 INFO [train.py:1230] (1/2) Epoch 3, validation: loss=0.2699, ctc_loss=0.1122, cr_loss=5.059e-15, attn_decoder_loss=0.2874, over 944034.00 frames. +2024-09-16 16:32:16,468 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 16:32:17,921 WARNING [optim.py:503] (1/2) Scaling gradients by 0.08523014932870865, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:18,132 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.norm_self_attn.weight with proportion 0.29, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=3.447e+06, grad_sumsq=2.900e+09, orig_rms_sq=1.188e-03 +2024-09-16 16:32:20,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=36200.0, ans=0.2 +2024-09-16 16:32:25,521 WARNING [optim.py:503] (1/2) Scaling gradients by 0.08528286218643188, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:25,724 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.56, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=6.615e+06, grad_sumsq=1.664e+09, orig_rms_sq=3.977e-03 +2024-09-16 16:32:27,307 WARNING [optim.py:503] (1/2) Scaling gradients by 0.07857576757669449, model_norm_threshold=292.6158752441406 +2024-09-16 16:32:27,512 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.54, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=7.424e+06, grad_sumsq=1.867e+09, orig_rms_sq=3.977e-03 +2024-09-16 16:32:52,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=17.52 vs. limit=15.0 +2024-09-16 16:32:57,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=36280.0, ans=0.2 +2024-09-16 16:32:57,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=15.50 vs. limit=15.0 +2024-09-16 16:33:11,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.45 vs. limit=15.0 +2024-09-16 16:33:21,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=36360.0, ans=0.125 +2024-09-16 16:33:23,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=36360.0, ans=0.035 +2024-09-16 16:33:35,002 INFO [train.py:1198] (1/2) Epoch 3, batch 50, loss[loss=0.3026, ctc_loss=0.2566, cr_loss=0.4291, attn_decoder_loss=0.2982, over 29442.00 frames. ], tot_loss[loss=0.346, ctc_loss=0.2958, cr_loss=0.4521, attn_decoder_loss=0.3415, over 1268466.52 frames. ], batch size: 70, lr: 3.34e-02, grad_scale: 8.0 +2024-09-16 16:34:07,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=36480.0, ans=0.2 +2024-09-16 16:34:08,223 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.045e+02 1.388e+02 1.745e+02 2.275e+02 3.724e+03, threshold=3.490e+02, percent-clipped=16.0 +2024-09-16 16:34:10,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=36480.0, ans=0.2 +2024-09-16 16:34:19,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.98 vs. limit=15.0 +2024-09-16 16:34:50,236 INFO [train.py:1198] (1/2) Epoch 3, batch 100, loss[loss=0.3142, ctc_loss=0.2694, cr_loss=0.4274, attn_decoder_loss=0.3097, over 29505.00 frames. ], tot_loss[loss=0.3417, ctc_loss=0.2936, cr_loss=0.4528, attn_decoder_loss=0.3369, over 2252823.06 frames. ], batch size: 76, lr: 3.34e-02, grad_scale: 8.0 +2024-09-16 16:35:47,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=36720.0, ans=0.125 +2024-09-16 16:35:55,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.30 vs. limit=15.0 +2024-09-16 16:36:04,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=36760.0, ans=0.125 +2024-09-16 16:36:07,251 INFO [train.py:1198] (1/2) Epoch 3, batch 150, loss[loss=0.306, ctc_loss=0.2685, cr_loss=0.431, attn_decoder_loss=0.3006, over 29435.00 frames. ], tot_loss[loss=0.3351, ctc_loss=0.2872, cr_loss=0.449, attn_decoder_loss=0.3304, over 3048253.59 frames. ], batch size: 70, lr: 3.33e-02, grad_scale: 8.0 +2024-09-16 16:36:13,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=36800.0, ans=0.1 +2024-09-16 16:36:17,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.20 vs. limit=15.0 +2024-09-16 16:36:22,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=36840.0, ans=0.125 +2024-09-16 16:36:28,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=36840.0, ans=0.125 +2024-09-16 16:36:35,362 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:36:38,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=36880.0, ans=0.2 +2024-09-16 16:36:42,364 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.372e+02 1.536e+02 1.787e+02 3.735e+02, threshold=3.071e+02, percent-clipped=1.0 +2024-09-16 16:36:48,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=36880.0, ans=0.0 +2024-09-16 16:37:05,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=36920.0, ans=0.025 +2024-09-16 16:37:06,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=36920.0, ans=0.125 +2024-09-16 16:37:07,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=36960.0, ans=0.025 +2024-09-16 16:37:12,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=36960.0, ans=0.2 +2024-09-16 16:37:24,139 INFO [train.py:1198] (1/2) Epoch 3, batch 200, loss[loss=0.3552, ctc_loss=0.308, cr_loss=0.4616, attn_decoder_loss=0.3502, over 27573.00 frames. ], tot_loss[loss=0.3307, ctc_loss=0.2824, cr_loss=0.4452, attn_decoder_loss=0.3262, over 3659883.73 frames. ], batch size: 124, lr: 3.33e-02, grad_scale: 8.0 +2024-09-16 16:37:25,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=37000.0, ans=0.0 +2024-09-16 16:37:55,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.22 vs. limit=15.0 +2024-09-16 16:38:17,229 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:38:17,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=37120.0, ans=0.0028000000000000004 +2024-09-16 16:38:39,572 INFO [train.py:1198] (1/2) Epoch 3, batch 250, loss[loss=0.3593, ctc_loss=0.3148, cr_loss=0.4683, attn_decoder_loss=0.3538, over 29197.00 frames. ], tot_loss[loss=0.3292, ctc_loss=0.2803, cr_loss=0.4449, attn_decoder_loss=0.3248, over 4142166.10 frames. ], batch size: 100, lr: 3.32e-02, grad_scale: 8.0 +2024-09-16 16:38:52,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=37200.0, ans=0.125 +2024-09-16 16:39:05,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.01 vs. limit=15.0 +2024-09-16 16:39:15,191 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.348e+02 1.507e+02 1.717e+02 3.533e+02, threshold=3.014e+02, percent-clipped=1.0 +2024-09-16 16:39:26,456 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.89 vs. limit=15.0 +2024-09-16 16:39:27,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=37320.0, ans=0.0 +2024-09-16 16:39:32,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=37320.0, ans=0.04949747468305833 +2024-09-16 16:39:40,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.91 vs. limit=6.0 +2024-09-16 16:39:48,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=37360.0, ans=0.125 +2024-09-16 16:39:57,443 INFO [train.py:1198] (1/2) Epoch 3, batch 300, loss[loss=0.3387, ctc_loss=0.2865, cr_loss=0.4496, attn_decoder_loss=0.3346, over 29488.00 frames. ], tot_loss[loss=0.3285, ctc_loss=0.2795, cr_loss=0.4442, attn_decoder_loss=0.3241, over 4509202.51 frames. ], batch size: 92, lr: 3.32e-02, grad_scale: 8.0 +2024-09-16 16:40:21,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=37440.0, ans=0.1 +2024-09-16 16:40:31,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=37480.0, ans=0.0 +2024-09-16 16:41:16,155 INFO [train.py:1198] (1/2) Epoch 3, batch 350, loss[loss=0.2968, ctc_loss=0.2547, cr_loss=0.4052, attn_decoder_loss=0.2925, over 29312.00 frames. ], tot_loss[loss=0.3279, ctc_loss=0.2786, cr_loss=0.4446, attn_decoder_loss=0.3235, over 4795309.19 frames. ], batch size: 71, lr: 3.31e-02, grad_scale: 8.0 +2024-09-16 16:41:42,205 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:41:42,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.26 vs. limit=15.0 +2024-09-16 16:41:42,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.86 vs. limit=15.0 +2024-09-16 16:41:49,290 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.316e+02 1.494e+02 1.817e+02 5.633e+02, threshold=2.988e+02, percent-clipped=5.0 +2024-09-16 16:41:49,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=37680.0, ans=0.2 +2024-09-16 16:41:59,506 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.90 vs. limit=15.0 +2024-09-16 16:42:01,961 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:42:06,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=37720.0, ans=0.125 +2024-09-16 16:42:31,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=12.0 +2024-09-16 16:42:32,039 INFO [train.py:1198] (1/2) Epoch 3, batch 400, loss[loss=0.3226, ctc_loss=0.2712, cr_loss=0.4241, attn_decoder_loss=0.3188, over 29683.00 frames. ], tot_loss[loss=0.3266, ctc_loss=0.2768, cr_loss=0.444, attn_decoder_loss=0.3222, over 5025279.16 frames. ], batch size: 82, lr: 3.31e-02, grad_scale: 16.0 +2024-09-16 16:42:42,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=37800.0, ans=0.125 +2024-09-16 16:42:53,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.33 vs. limit=15.0 +2024-09-16 16:43:07,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=25.16 vs. limit=22.5 +2024-09-16 16:43:08,378 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:43:15,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=37880.0, ans=0.04949747468305833 +2024-09-16 16:43:19,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.46 vs. limit=15.0 +2024-09-16 16:43:24,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=37920.0, ans=0.1 +2024-09-16 16:43:34,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=37960.0, ans=0.2 +2024-09-16 16:43:35,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=37960.0, ans=0.125 +2024-09-16 16:43:44,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=37960.0, ans=0.125 +2024-09-16 16:43:45,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=37960.0, ans=0.125 +2024-09-16 16:43:50,132 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=15.0 +2024-09-16 16:43:50,672 INFO [train.py:1198] (1/2) Epoch 3, batch 450, loss[loss=0.3328, ctc_loss=0.2741, cr_loss=0.4321, attn_decoder_loss=0.3298, over 29695.00 frames. ], tot_loss[loss=0.3266, ctc_loss=0.2766, cr_loss=0.4442, attn_decoder_loss=0.3223, over 5188647.04 frames. ], batch size: 83, lr: 3.30e-02, grad_scale: 8.0 +2024-09-16 16:43:57,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=38000.0, ans=0.2 +2024-09-16 16:44:02,176 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-16 16:44:12,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=38040.0, ans=0.0026 +2024-09-16 16:44:25,551 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.091e+02 1.316e+02 1.463e+02 1.797e+02 4.950e+02, threshold=2.926e+02, percent-clipped=3.0 +2024-09-16 16:44:27,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=38080.0, ans=0.1 +2024-09-16 16:44:28,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=38080.0, ans=0.05 +2024-09-16 16:44:35,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=38080.0, ans=0.125 +2024-09-16 16:44:45,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.77 vs. limit=15.0 +2024-09-16 16:44:52,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=38160.0, ans=0.07 +2024-09-16 16:44:58,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=38160.0, ans=0.125 +2024-09-16 16:45:01,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=38160.0, ans=0.0 +2024-09-16 16:45:06,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=38160.0, ans=0.2 +2024-09-16 16:45:09,106 INFO [train.py:1198] (1/2) Epoch 3, batch 500, loss[loss=0.3336, ctc_loss=0.2773, cr_loss=0.4691, attn_decoder_loss=0.3294, over 29459.00 frames. ], tot_loss[loss=0.325, ctc_loss=0.2745, cr_loss=0.4427, attn_decoder_loss=0.3208, over 5330429.97 frames. ], batch size: 94, lr: 3.30e-02, grad_scale: 8.0 +2024-09-16 16:45:10,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=38200.0, ans=0.2 +2024-09-16 16:45:15,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-16 16:45:20,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=38200.0, ans=0.125 +2024-09-16 16:45:37,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.70 vs. limit=15.0 +2024-09-16 16:45:51,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=38280.0, ans=0.125 +2024-09-16 16:46:07,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=13.33 vs. limit=15.0 +2024-09-16 16:46:25,200 INFO [train.py:1198] (1/2) Epoch 3, batch 550, loss[loss=0.3505, ctc_loss=0.307, cr_loss=0.4624, attn_decoder_loss=0.345, over 28722.00 frames. ], tot_loss[loss=0.325, ctc_loss=0.2745, cr_loss=0.4433, attn_decoder_loss=0.3208, over 5423782.10 frames. ], batch size: 104, lr: 3.29e-02, grad_scale: 8.0 +2024-09-16 16:46:34,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.99 vs. limit=15.0 +2024-09-16 16:46:39,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=38400.0, ans=0.07 +2024-09-16 16:47:02,145 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.886e+01 1.383e+02 1.615e+02 1.876e+02 3.927e+02, threshold=3.230e+02, percent-clipped=4.0 +2024-09-16 16:47:33,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=38560.0, ans=0.025 +2024-09-16 16:47:34,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=38560.0, ans=0.125 +2024-09-16 16:47:43,869 INFO [train.py:1198] (1/2) Epoch 3, batch 600, loss[loss=0.352, ctc_loss=0.3004, cr_loss=0.4896, attn_decoder_loss=0.3469, over 29275.00 frames. ], tot_loss[loss=0.3253, ctc_loss=0.2746, cr_loss=0.4444, attn_decoder_loss=0.321, over 5511214.39 frames. ], batch size: 100, lr: 3.28e-02, grad_scale: 8.0 +2024-09-16 16:47:56,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=38600.0, ans=0.0 +2024-09-16 16:47:59,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.69 vs. limit=15.0 +2024-09-16 16:48:00,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=38640.0, ans=0.0 +2024-09-16 16:48:24,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=38680.0, ans=0.125 +2024-09-16 16:48:38,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=38720.0, ans=0.0 +2024-09-16 16:49:02,379 INFO [train.py:1198] (1/2) Epoch 3, batch 650, loss[loss=0.3254, ctc_loss=0.2729, cr_loss=0.4637, attn_decoder_loss=0.321, over 29753.00 frames. ], tot_loss[loss=0.3233, ctc_loss=0.2722, cr_loss=0.4428, attn_decoder_loss=0.3191, over 5587936.34 frames. ], batch size: 81, lr: 3.28e-02, grad_scale: 8.0 +2024-09-16 16:49:11,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=38800.0, ans=0.2 +2024-09-16 16:49:27,593 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.08 vs. limit=15.0 +2024-09-16 16:49:37,337 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.937e+01 1.311e+02 1.474e+02 1.676e+02 3.343e+02, threshold=2.947e+02, percent-clipped=2.0 +2024-09-16 16:49:47,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.12 vs. limit=6.0 +2024-09-16 16:49:57,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=38920.0, ans=0.1 +2024-09-16 16:50:07,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=38960.0, ans=0.125 +2024-09-16 16:50:18,166 INFO [train.py:1198] (1/2) Epoch 3, batch 700, loss[loss=0.3084, ctc_loss=0.2504, cr_loss=0.4225, attn_decoder_loss=0.3054, over 29534.00 frames. ], tot_loss[loss=0.324, ctc_loss=0.2728, cr_loss=0.444, attn_decoder_loss=0.3198, over 5637638.84 frames. ], batch size: 76, lr: 3.27e-02, grad_scale: 8.0 +2024-09-16 16:50:31,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=39000.0, ans=0.125 +2024-09-16 16:50:46,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=39040.0, ans=0.07 +2024-09-16 16:51:35,492 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.31 vs. limit=15.0 +2024-09-16 16:51:36,713 INFO [train.py:1198] (1/2) Epoch 3, batch 750, loss[loss=0.3334, ctc_loss=0.2752, cr_loss=0.4415, attn_decoder_loss=0.33, over 29691.00 frames. ], tot_loss[loss=0.3233, ctc_loss=0.2719, cr_loss=0.4433, attn_decoder_loss=0.3191, over 5676305.26 frames. ], batch size: 82, lr: 3.27e-02, grad_scale: 8.0 +2024-09-16 16:51:48,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=15.0 +2024-09-16 16:52:02,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=39240.0, ans=0.2 +2024-09-16 16:52:11,484 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.529e+01 1.527e+02 1.781e+02 2.064e+02 4.131e+02, threshold=3.563e+02, percent-clipped=5.0 +2024-09-16 16:52:13,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=39280.0, ans=0.002330434782608696 +2024-09-16 16:52:32,447 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 16:52:40,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.93 vs. limit=22.5 +2024-09-16 16:52:51,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=39360.0, ans=22.5 +2024-09-16 16:52:54,785 INFO [train.py:1198] (1/2) Epoch 3, batch 800, loss[loss=0.2994, ctc_loss=0.243, cr_loss=0.4174, attn_decoder_loss=0.2964, over 29613.00 frames. ], tot_loss[loss=0.3235, ctc_loss=0.2724, cr_loss=0.4439, attn_decoder_loss=0.3194, over 5706456.91 frames. ], batch size: 73, lr: 3.26e-02, grad_scale: 16.0 +2024-09-16 16:52:58,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=39400.0, ans=0.0 +2024-09-16 16:53:39,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=39520.0, ans=0.2 +2024-09-16 16:53:43,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=39520.0, ans=0.125 +2024-09-16 16:54:10,344 INFO [train.py:1198] (1/2) Epoch 3, batch 850, loss[loss=0.3255, ctc_loss=0.2662, cr_loss=0.4421, attn_decoder_loss=0.3223, over 29681.00 frames. ], tot_loss[loss=0.3223, ctc_loss=0.271, cr_loss=0.4429, attn_decoder_loss=0.3182, over 5736188.93 frames. ], batch size: 89, lr: 3.26e-02, grad_scale: 8.0 +2024-09-16 16:54:30,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=39640.0, ans=0.0022521739130434773 +2024-09-16 16:54:48,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.091e+02 1.307e+02 1.480e+02 1.661e+02 7.090e+02, threshold=2.960e+02, percent-clipped=1.0 +2024-09-16 16:55:01,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=39720.0, ans=0.0 +2024-09-16 16:55:27,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=39800.0, ans=0.125 +2024-09-16 16:55:28,603 INFO [train.py:1198] (1/2) Epoch 3, batch 900, loss[loss=0.2831, ctc_loss=0.2352, cr_loss=0.3701, attn_decoder_loss=0.2802, over 29597.00 frames. ], tot_loss[loss=0.3227, ctc_loss=0.2714, cr_loss=0.4433, attn_decoder_loss=0.3186, over 5740228.44 frames. ], batch size: 73, lr: 3.25e-02, grad_scale: 8.0 +2024-09-16 16:55:40,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=39800.0, ans=0.0 +2024-09-16 16:55:54,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.83 vs. limit=22.5 +2024-09-16 16:55:54,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=39840.0, ans=0.1 +2024-09-16 16:56:46,822 INFO [train.py:1198] (1/2) Epoch 3, batch 950, loss[loss=0.3058, ctc_loss=0.2503, cr_loss=0.4542, attn_decoder_loss=0.3019, over 29506.00 frames. ], tot_loss[loss=0.3234, ctc_loss=0.2723, cr_loss=0.4442, attn_decoder_loss=0.3192, over 5742212.04 frames. ], batch size: 74, lr: 3.25e-02, grad_scale: 8.0 +2024-09-16 16:56:52,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.79 vs. limit=15.0 +2024-09-16 16:57:00,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=40040.0, ans=0.125 +2024-09-16 16:57:01,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.64 vs. limit=12.0 +2024-09-16 16:57:09,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=40040.0, ans=0.125 +2024-09-16 16:57:14,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.34 vs. limit=10.0 +2024-09-16 16:57:16,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.11 vs. limit=15.0 +2024-09-16 16:57:18,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=40080.0, ans=0.0 +2024-09-16 16:57:20,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.55 vs. limit=22.5 +2024-09-16 16:57:22,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.114e+02 1.433e+02 1.639e+02 1.993e+02 1.138e+03, threshold=3.278e+02, percent-clipped=4.0 +2024-09-16 16:57:31,068 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.13 vs. limit=15.0 +2024-09-16 16:57:37,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=40120.0, ans=0.2 +2024-09-16 16:57:58,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=40160.0, ans=0.125 +2024-09-16 16:58:01,736 INFO [train.py:1198] (1/2) Epoch 3, batch 1000, loss[loss=0.3073, ctc_loss=0.2508, cr_loss=0.4406, attn_decoder_loss=0.3038, over 29493.00 frames. ], tot_loss[loss=0.3245, ctc_loss=0.2735, cr_loss=0.4449, attn_decoder_loss=0.3203, over 5735973.45 frames. ], batch size: 77, lr: 3.24e-02, grad_scale: 8.0 +2024-09-16 16:58:27,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=40240.0, ans=0.0 +2024-09-16 16:58:36,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.10 vs. limit=22.5 +2024-09-16 16:58:40,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=40280.0, ans=0.125 +2024-09-16 16:58:46,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=40280.0, ans=0.0 +2024-09-16 16:58:51,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=40320.0, ans=0.125 +2024-09-16 16:59:14,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.66 vs. limit=22.5 +2024-09-16 16:59:19,696 INFO [train.py:1198] (1/2) Epoch 3, batch 1050, loss[loss=0.3322, ctc_loss=0.2679, cr_loss=0.4535, attn_decoder_loss=0.3293, over 29696.00 frames. ], tot_loss[loss=0.3234, ctc_loss=0.2721, cr_loss=0.4439, attn_decoder_loss=0.3193, over 5743433.39 frames. ], batch size: 85, lr: 3.24e-02, grad_scale: 8.0 +2024-09-16 16:59:56,489 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.083e+02 1.303e+02 1.463e+02 1.706e+02 2.902e+02, threshold=2.927e+02, percent-clipped=0.0 +2024-09-16 16:59:58,369 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:00:09,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=40520.0, ans=0.002060869565217392 +2024-09-16 17:00:29,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=40560.0, ans=0.125 +2024-09-16 17:00:37,928 INFO [train.py:1198] (1/2) Epoch 3, batch 1100, loss[loss=0.3175, ctc_loss=0.2753, cr_loss=0.423, attn_decoder_loss=0.3128, over 29446.00 frames. ], tot_loss[loss=0.3223, ctc_loss=0.2707, cr_loss=0.4431, attn_decoder_loss=0.3182, over 5755751.91 frames. ], batch size: 78, lr: 3.23e-02, grad_scale: 8.0 +2024-09-16 17:00:56,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=12.0 +2024-09-16 17:01:04,527 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.95 vs. limit=15.0 +2024-09-16 17:01:09,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=40680.0, ans=0.125 +2024-09-16 17:01:24,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=40720.0, ans=0.125 +2024-09-16 17:01:33,087 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.79 vs. limit=6.0 +2024-09-16 17:01:34,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=40720.0, ans=0.125 +2024-09-16 17:01:37,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=40760.0, ans=0.125 +2024-09-16 17:01:40,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=40760.0, ans=0.2 +2024-09-16 17:01:53,971 INFO [train.py:1198] (1/2) Epoch 3, batch 1150, loss[loss=0.3366, ctc_loss=0.2844, cr_loss=0.4654, attn_decoder_loss=0.332, over 29452.00 frames. ], tot_loss[loss=0.3226, ctc_loss=0.2707, cr_loss=0.4436, attn_decoder_loss=0.3185, over 5754005.29 frames. ], batch size: 78, lr: 3.23e-02, grad_scale: 8.0 +2024-09-16 17:02:22,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=40840.0, ans=0.125 +2024-09-16 17:02:24,381 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.71 vs. limit=22.5 +2024-09-16 17:02:32,377 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.096e+02 1.405e+02 1.623e+02 1.892e+02 4.412e+02, threshold=3.246e+02, percent-clipped=6.0 +2024-09-16 17:02:59,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=40960.0, ans=0.0019652173913043483 +2024-09-16 17:03:01,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=40960.0, ans=0.125 +2024-09-16 17:03:11,755 INFO [train.py:1198] (1/2) Epoch 3, batch 1200, loss[loss=0.3265, ctc_loss=0.2649, cr_loss=0.4588, attn_decoder_loss=0.3231, over 29690.00 frames. ], tot_loss[loss=0.3236, ctc_loss=0.2716, cr_loss=0.444, attn_decoder_loss=0.3196, over 5746549.05 frames. ], batch size: 85, lr: 3.22e-02, grad_scale: 16.0 +2024-09-16 17:03:15,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=41000.0, ans=0.0 +2024-09-16 17:03:31,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=41040.0, ans=0.2 +2024-09-16 17:03:34,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=41040.0, ans=0.2 +2024-09-16 17:03:53,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.95 vs. limit=15.0 +2024-09-16 17:04:19,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=41160.0, ans=0.025 +2024-09-16 17:04:22,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=41160.0, ans=0.025 +2024-09-16 17:04:27,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.80 vs. limit=22.5 +2024-09-16 17:04:29,439 INFO [train.py:1198] (1/2) Epoch 3, batch 1250, loss[loss=0.3363, ctc_loss=0.2783, cr_loss=0.4749, attn_decoder_loss=0.3322, over 29532.00 frames. ], tot_loss[loss=0.3242, ctc_loss=0.2717, cr_loss=0.445, attn_decoder_loss=0.3202, over 5774893.78 frames. ], batch size: 92, lr: 3.22e-02, grad_scale: 8.0 +2024-09-16 17:04:44,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=41240.0, ans=0.125 +2024-09-16 17:04:48,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=41240.0, ans=0.0 +2024-09-16 17:04:58,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=41280.0, ans=0.0 +2024-09-16 17:05:07,515 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.013e+02 1.378e+02 1.544e+02 1.840e+02 6.927e+02, threshold=3.087e+02, percent-clipped=1.0 +2024-09-16 17:05:31,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.95 vs. limit=22.5 +2024-09-16 17:05:39,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=41360.0, ans=0.125 +2024-09-16 17:05:47,462 INFO [train.py:1198] (1/2) Epoch 3, batch 1300, loss[loss=0.3261, ctc_loss=0.2635, cr_loss=0.4544, attn_decoder_loss=0.3229, over 28290.00 frames. ], tot_loss[loss=0.3225, ctc_loss=0.2698, cr_loss=0.4433, attn_decoder_loss=0.3185, over 5781387.80 frames. ], batch size: 111, lr: 3.21e-02, grad_scale: 8.0 +2024-09-16 17:05:50,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=41400.0, ans=0.025 +2024-09-16 17:05:52,853 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.11 vs. limit=15.0 +2024-09-16 17:06:15,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=41440.0, ans=0.0 +2024-09-16 17:06:26,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.26 vs. limit=22.5 +2024-09-16 17:06:33,352 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:07:03,974 INFO [train.py:1198] (1/2) Epoch 3, batch 1350, loss[loss=0.3101, ctc_loss=0.2457, cr_loss=0.3999, attn_decoder_loss=0.3084, over 29765.00 frames. ], tot_loss[loss=0.3215, ctc_loss=0.2683, cr_loss=0.4429, attn_decoder_loss=0.3176, over 5798425.98 frames. ], batch size: 81, lr: 3.21e-02, grad_scale: 8.0 +2024-09-16 17:07:08,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=41600.0, ans=0.0 +2024-09-16 17:07:10,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=41600.0, ans=0.0018260869565217396 +2024-09-16 17:07:16,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=41600.0, ans=0.2 +2024-09-16 17:07:22,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=41640.0, ans=0.0 +2024-09-16 17:07:41,125 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.097e+02 1.317e+02 1.447e+02 1.601e+02 2.528e+02, threshold=2.895e+02, percent-clipped=1.0 +2024-09-16 17:07:41,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=41680.0, ans=0.0 +2024-09-16 17:07:47,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=41720.0, ans=0.0 +2024-09-16 17:07:56,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=41720.0, ans=0.0018000000000000013 +2024-09-16 17:08:09,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=14.15 vs. limit=15.0 +2024-09-16 17:08:20,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=41800.0, ans=0.0 +2024-09-16 17:08:21,361 INFO [train.py:1198] (1/2) Epoch 3, batch 1400, loss[loss=0.2762, ctc_loss=0.2227, cr_loss=0.3935, attn_decoder_loss=0.2734, over 29585.00 frames. ], tot_loss[loss=0.3207, ctc_loss=0.2673, cr_loss=0.442, attn_decoder_loss=0.3169, over 5809071.72 frames. ], batch size: 69, lr: 3.20e-02, grad_scale: 8.0 +2024-09-16 17:09:04,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.21 vs. limit=22.5 +2024-09-16 17:09:19,677 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.98 vs. limit=15.0 +2024-09-16 17:09:26,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=41960.0, ans=0.2 +2024-09-16 17:09:37,196 INFO [train.py:1198] (1/2) Epoch 3, batch 1450, loss[loss=0.3463, ctc_loss=0.2928, cr_loss=0.4909, attn_decoder_loss=0.3413, over 29472.00 frames. ], tot_loss[loss=0.3217, ctc_loss=0.2682, cr_loss=0.4433, attn_decoder_loss=0.3178, over 5807215.45 frames. ], batch size: 94, lr: 3.20e-02, grad_scale: 8.0 +2024-09-16 17:10:03,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=42040.0, ans=0.125 +2024-09-16 17:10:12,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.59 vs. limit=22.5 +2024-09-16 17:10:17,159 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.984e+01 1.371e+02 1.551e+02 1.946e+02 4.633e+02, threshold=3.101e+02, percent-clipped=3.0 +2024-09-16 17:10:39,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=42160.0, ans=0.001704347826086956 +2024-09-16 17:10:44,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=42160.0, ans=0.125 +2024-09-16 17:10:55,034 INFO [train.py:1198] (1/2) Epoch 3, batch 1500, loss[loss=0.3271, ctc_loss=0.2794, cr_loss=0.4373, attn_decoder_loss=0.3227, over 29630.00 frames. ], tot_loss[loss=0.3221, ctc_loss=0.2686, cr_loss=0.4439, attn_decoder_loss=0.3182, over 5808090.62 frames. ], batch size: 86, lr: 3.19e-02, grad_scale: 8.0 +2024-09-16 17:11:14,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=42240.0, ans=0.125 +2024-09-16 17:11:41,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.97 vs. limit=15.0 +2024-09-16 17:11:45,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=42320.0, ans=0.0 +2024-09-16 17:11:47,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=42320.0, ans=0.1 +2024-09-16 17:12:13,824 INFO [train.py:1198] (1/2) Epoch 3, batch 1550, loss[loss=0.3561, ctc_loss=0.3082, cr_loss=0.4934, attn_decoder_loss=0.3504, over 29530.00 frames. ], tot_loss[loss=0.3229, ctc_loss=0.2704, cr_loss=0.4445, attn_decoder_loss=0.3189, over 5782877.81 frames. ], batch size: 90, lr: 3.19e-02, grad_scale: 8.0 +2024-09-16 17:12:51,101 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.108e+02 1.385e+02 1.541e+02 1.743e+02 3.737e+02, threshold=3.082e+02, percent-clipped=1.0 +2024-09-16 17:12:55,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=42480.0, ans=0.0 +2024-09-16 17:13:10,330 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=10.88 vs. limit=15.0 +2024-09-16 17:13:18,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=42560.0, ans=0.125 +2024-09-16 17:13:27,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=24.44 vs. limit=22.5 +2024-09-16 17:13:28,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=42560.0, ans=0.125 +2024-09-16 17:13:30,977 INFO [train.py:1198] (1/2) Epoch 3, batch 1600, loss[loss=0.33, ctc_loss=0.2737, cr_loss=0.462, attn_decoder_loss=0.326, over 29675.00 frames. ], tot_loss[loss=0.3225, ctc_loss=0.2704, cr_loss=0.4438, attn_decoder_loss=0.3185, over 5763804.90 frames. ], batch size: 85, lr: 3.18e-02, grad_scale: 16.0 +2024-09-16 17:14:31,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=42760.0, ans=0.0 +2024-09-16 17:14:46,580 INFO [train.py:1198] (1/2) Epoch 3, batch 1650, loss[loss=0.3334, ctc_loss=0.2734, cr_loss=0.4496, attn_decoder_loss=0.3301, over 29700.00 frames. ], tot_loss[loss=0.322, ctc_loss=0.2694, cr_loss=0.4436, attn_decoder_loss=0.318, over 5756691.06 frames. ], batch size: 89, lr: 3.18e-02, grad_scale: 8.0 +2024-09-16 17:15:26,273 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.078e+02 1.391e+02 1.585e+02 1.858e+02 6.012e+02, threshold=3.169e+02, percent-clipped=6.0 +2024-09-16 17:15:29,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=42880.0, ans=0.1 +2024-09-16 17:15:30,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=13.34 vs. limit=15.0 +2024-09-16 17:15:41,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=42920.0, ans=0.0 +2024-09-16 17:16:04,590 INFO [train.py:1198] (1/2) Epoch 3, batch 1700, loss[loss=0.2668, ctc_loss=0.2037, cr_loss=0.4131, attn_decoder_loss=0.2646, over 29587.00 frames. ], tot_loss[loss=0.3215, ctc_loss=0.2684, cr_loss=0.4434, attn_decoder_loss=0.3175, over 5778485.02 frames. ], batch size: 69, lr: 3.17e-02, grad_scale: 8.0 +2024-09-16 17:16:07,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=43000.0, ans=0.035 +2024-09-16 17:16:12,456 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:16:22,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-16 17:16:29,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=43040.0, ans=0.0 +2024-09-16 17:16:35,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=43080.0, ans=0.0 +2024-09-16 17:16:56,000 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:17:00,581 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:17:22,491 INFO [train.py:1198] (1/2) Epoch 3, batch 1750, loss[loss=0.2902, ctc_loss=0.2438, cr_loss=0.4403, attn_decoder_loss=0.2855, over 29357.00 frames. ], tot_loss[loss=0.3208, ctc_loss=0.2677, cr_loss=0.4429, attn_decoder_loss=0.3168, over 5787083.81 frames. ], batch size: 67, lr: 3.17e-02, grad_scale: 8.0 +2024-09-16 17:17:33,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=43200.0, ans=0.125 +2024-09-16 17:17:57,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=43280.0, ans=0.125 +2024-09-16 17:18:01,939 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.347e+02 1.520e+02 1.785e+02 2.603e+02, threshold=3.040e+02, percent-clipped=0.0 +2024-09-16 17:18:06,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=43320.0, ans=0.1 +2024-09-16 17:18:38,186 INFO [train.py:1198] (1/2) Epoch 3, batch 1800, loss[loss=0.3204, ctc_loss=0.2628, cr_loss=0.4513, attn_decoder_loss=0.3168, over 29685.00 frames. ], tot_loss[loss=0.3206, ctc_loss=0.2675, cr_loss=0.4428, attn_decoder_loss=0.3167, over 5789722.28 frames. ], batch size: 83, lr: 3.16e-02, grad_scale: 8.0 +2024-09-16 17:18:41,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=43400.0, ans=0.0 +2024-09-16 17:18:52,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=43440.0, ans=0.1 +2024-09-16 17:19:08,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.64 vs. limit=15.0 +2024-09-16 17:19:11,216 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.09 vs. limit=22.5 +2024-09-16 17:19:34,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=43520.0, ans=0.0 +2024-09-16 17:19:36,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=43520.0, ans=0.0014086956521739136 +2024-09-16 17:19:54,247 INFO [train.py:1198] (1/2) Epoch 3, batch 1850, loss[loss=0.3388, ctc_loss=0.2819, cr_loss=0.4575, attn_decoder_loss=0.335, over 29619.00 frames. ], tot_loss[loss=0.3204, ctc_loss=0.2671, cr_loss=0.443, attn_decoder_loss=0.3165, over 5796699.06 frames. ], batch size: 86, lr: 3.16e-02, grad_scale: 8.0 +2024-09-16 17:20:04,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=43600.0, ans=0.05 +2024-09-16 17:20:07,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=43600.0, ans=0.125 +2024-09-16 17:20:17,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=43640.0, ans=0.125 +2024-09-16 17:20:35,358 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.308e+02 1.428e+02 1.692e+02 5.194e+02, threshold=2.856e+02, percent-clipped=3.0 +2024-09-16 17:21:04,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=43760.0, ans=0.1 +2024-09-16 17:21:06,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=43760.0, ans=0.0 +2024-09-16 17:21:13,411 INFO [train.py:1198] (1/2) Epoch 3, batch 1900, loss[loss=0.3458, ctc_loss=0.2971, cr_loss=0.4724, attn_decoder_loss=0.3407, over 29696.00 frames. ], tot_loss[loss=0.3213, ctc_loss=0.2679, cr_loss=0.4444, attn_decoder_loss=0.3174, over 5804527.46 frames. ], batch size: 89, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 17:21:22,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=43800.0, ans=0.125 +2024-09-16 17:21:28,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=43840.0, ans=0.0 +2024-09-16 17:21:31,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=43840.0, ans=0.5 +2024-09-16 17:21:36,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=43840.0, ans=0.125 +2024-09-16 17:21:54,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=43880.0, ans=0.025 +2024-09-16 17:22:02,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=43920.0, ans=0.025 +2024-09-16 17:22:03,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.46 vs. limit=10.0 +2024-09-16 17:22:29,843 INFO [train.py:1198] (1/2) Epoch 3, batch 1950, loss[loss=0.2989, ctc_loss=0.2382, cr_loss=0.4346, attn_decoder_loss=0.296, over 29459.00 frames. ], tot_loss[loss=0.322, ctc_loss=0.2676, cr_loss=0.446, attn_decoder_loss=0.3181, over 5819636.34 frames. ], batch size: 78, lr: 3.15e-02, grad_scale: 8.0 +2024-09-16 17:22:33,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=44000.0, ans=0.125 +2024-09-16 17:22:45,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=44040.0, ans=0.125 +2024-09-16 17:22:49,056 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.49 vs. limit=15.0 +2024-09-16 17:23:09,312 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.072e+02 1.320e+02 1.491e+02 1.683e+02 2.702e+02, threshold=2.982e+02, percent-clipped=0.0 +2024-09-16 17:23:17,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=44120.0, ans=0.0 +2024-09-16 17:23:32,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=44160.0, ans=0.1 +2024-09-16 17:23:35,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_na.min_abs, batch_count=44160.0, ans=0.02 +2024-09-16 17:23:45,534 INFO [train.py:1198] (1/2) Epoch 3, batch 2000, loss[loss=0.2871, ctc_loss=0.2403, cr_loss=0.4093, attn_decoder_loss=0.2832, over 29367.00 frames. ], tot_loss[loss=0.3228, ctc_loss=0.2688, cr_loss=0.4467, attn_decoder_loss=0.3188, over 5796090.13 frames. ], batch size: 67, lr: 3.14e-02, grad_scale: 16.0 +2024-09-16 17:23:52,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=44200.0, ans=0.125 +2024-09-16 17:24:23,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=44280.0, ans=0.1 +2024-09-16 17:24:58,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=44360.0, ans=0.125 +2024-09-16 17:25:05,788 INFO [train.py:1198] (1/2) Epoch 3, batch 2050, loss[loss=0.2963, ctc_loss=0.2492, cr_loss=0.4155, attn_decoder_loss=0.2923, over 29445.00 frames. ], tot_loss[loss=0.3217, ctc_loss=0.2679, cr_loss=0.4453, attn_decoder_loss=0.3177, over 5788609.27 frames. ], batch size: 70, lr: 3.14e-02, grad_scale: 8.0 +2024-09-16 17:25:30,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=44440.0, ans=0.125 +2024-09-16 17:25:42,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=44480.0, ans=0.0 +2024-09-16 17:25:46,772 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.138e+02 1.399e+02 1.535e+02 1.932e+02 1.271e+03, threshold=3.069e+02, percent-clipped=4.0 +2024-09-16 17:25:54,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.17 vs. limit=15.0 +2024-09-16 17:26:21,532 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.36 vs. limit=15.0 +2024-09-16 17:26:21,853 INFO [train.py:1198] (1/2) Epoch 3, batch 2100, loss[loss=0.3276, ctc_loss=0.2685, cr_loss=0.4556, attn_decoder_loss=0.324, over 29765.00 frames. ], tot_loss[loss=0.3207, ctc_loss=0.2667, cr_loss=0.4451, attn_decoder_loss=0.3168, over 5800645.57 frames. ], batch size: 81, lr: 3.13e-02, grad_scale: 8.0 +2024-09-16 17:26:37,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=44640.0, ans=0.05 +2024-09-16 17:26:41,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=44640.0, ans=0.125 +2024-09-16 17:26:50,840 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:26:53,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=44680.0, ans=0.125 +2024-09-16 17:26:56,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=44680.0, ans=0.0 +2024-09-16 17:26:56,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=44680.0, ans=0.125 +2024-09-16 17:26:58,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=44680.0, ans=0.07 +2024-09-16 17:26:59,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=44680.0, ans=0.1 +2024-09-16 17:27:04,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=44680.0, ans=0.125 +2024-09-16 17:27:22,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=44760.0, ans=0.125 +2024-09-16 17:27:38,162 INFO [train.py:1198] (1/2) Epoch 3, batch 2150, loss[loss=0.3055, ctc_loss=0.2544, cr_loss=0.3852, attn_decoder_loss=0.3026, over 29459.00 frames. ], tot_loss[loss=0.3191, ctc_loss=0.2648, cr_loss=0.4435, attn_decoder_loss=0.3153, over 5815540.95 frames. ], batch size: 78, lr: 3.13e-02, grad_scale: 8.0 +2024-09-16 17:27:39,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.whiten.whitening_limit, batch_count=44800.0, ans=12.0 +2024-09-16 17:28:01,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=44840.0, ans=0.0 +2024-09-16 17:28:10,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=44880.0, ans=0.125 +2024-09-16 17:28:20,978 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.031e+02 1.285e+02 1.430e+02 1.712e+02 4.702e+02, threshold=2.859e+02, percent-clipped=3.0 +2024-09-16 17:28:27,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=44920.0, ans=0.0011043478260869578 +2024-09-16 17:28:33,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=44920.0, ans=15.0 +2024-09-16 17:28:39,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=44960.0, ans=0.125 +2024-09-16 17:28:55,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=44960.0, ans=0.05 +2024-09-16 17:28:56,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=45000.0, ans=0.1 +2024-09-16 17:28:57,951 INFO [train.py:1198] (1/2) Epoch 3, batch 2200, loss[loss=0.3243, ctc_loss=0.2679, cr_loss=0.4462, attn_decoder_loss=0.3206, over 29632.00 frames. ], tot_loss[loss=0.3192, ctc_loss=0.2648, cr_loss=0.4432, attn_decoder_loss=0.3153, over 5812741.71 frames. ], batch size: 86, lr: 3.12e-02, grad_scale: 8.0 +2024-09-16 17:29:33,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.67 vs. limit=22.5 +2024-09-16 17:29:34,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=45080.0, ans=0.125 +2024-09-16 17:29:38,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=45080.0, ans=0.125 +2024-09-16 17:30:13,754 INFO [train.py:1198] (1/2) Epoch 3, batch 2250, loss[loss=0.3292, ctc_loss=0.268, cr_loss=0.4485, attn_decoder_loss=0.326, over 29692.00 frames. ], tot_loss[loss=0.3188, ctc_loss=0.2641, cr_loss=0.4426, attn_decoder_loss=0.315, over 5810926.21 frames. ], batch size: 82, lr: 3.12e-02, grad_scale: 8.0 +2024-09-16 17:30:24,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=45200.0, ans=0.1 +2024-09-16 17:30:54,403 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.156e+02 1.411e+02 1.554e+02 1.919e+02 3.789e+02, threshold=3.108e+02, percent-clipped=3.0 +2024-09-16 17:30:54,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=45280.0, ans=0.07 +2024-09-16 17:30:59,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=45320.0, ans=0.125 +2024-09-16 17:31:10,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.14 vs. limit=10.0 +2024-09-16 17:31:21,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=45360.0, ans=0.2 +2024-09-16 17:31:29,210 INFO [train.py:1198] (1/2) Epoch 3, batch 2300, loss[loss=0.288, ctc_loss=0.2288, cr_loss=0.418, attn_decoder_loss=0.2853, over 29344.00 frames. ], tot_loss[loss=0.3179, ctc_loss=0.2633, cr_loss=0.4417, attn_decoder_loss=0.3142, over 5799027.41 frames. ], batch size: 71, lr: 3.11e-02, grad_scale: 8.0 +2024-09-16 17:31:45,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=45440.0, ans=0.0 +2024-09-16 17:32:00,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=45480.0, ans=0.2 +2024-09-16 17:32:09,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=45480.0, ans=0.0 +2024-09-16 17:32:14,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=45480.0, ans=0.1 +2024-09-16 17:32:20,395 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:32:49,796 INFO [train.py:1198] (1/2) Epoch 3, batch 2350, loss[loss=0.3367, ctc_loss=0.2791, cr_loss=0.4699, attn_decoder_loss=0.3326, over 29704.00 frames. ], tot_loss[loss=0.3181, ctc_loss=0.2633, cr_loss=0.4423, attn_decoder_loss=0.3144, over 5805070.31 frames. ], batch size: 83, lr: 3.11e-02, grad_scale: 8.0 +2024-09-16 17:33:03,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=45640.0, ans=0.125 +2024-09-16 17:33:22,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=45680.0, ans=0.125 +2024-09-16 17:33:22,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=45680.0, ans=0.125 +2024-09-16 17:33:30,847 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.361e+02 1.552e+02 1.880e+02 4.928e+02, threshold=3.104e+02, percent-clipped=4.0 +2024-09-16 17:33:32,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=45680.0, ans=0.125 +2024-09-16 17:33:48,640 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.58 vs. limit=22.5 +2024-09-16 17:34:01,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=45760.0, ans=0.0009217391304347823 +2024-09-16 17:34:04,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=45800.0, ans=0.0 +2024-09-16 17:34:06,304 INFO [train.py:1198] (1/2) Epoch 3, batch 2400, loss[loss=0.314, ctc_loss=0.2658, cr_loss=0.4354, attn_decoder_loss=0.3096, over 29534.00 frames. ], tot_loss[loss=0.3186, ctc_loss=0.2638, cr_loss=0.4433, attn_decoder_loss=0.3149, over 5807918.63 frames. ], batch size: 76, lr: 3.10e-02, grad_scale: 16.0 +2024-09-16 17:34:09,611 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:34:11,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=45800.0, ans=0.0009130434782608689 +2024-09-16 17:34:37,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=45880.0, ans=0.0 +2024-09-16 17:34:38,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=45880.0, ans=0.025 +2024-09-16 17:34:46,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=45880.0, ans=0.1 +2024-09-16 17:34:47,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=45880.0, ans=0.125 +2024-09-16 17:34:49,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.56 vs. limit=15.0 +2024-09-16 17:34:58,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=45920.0, ans=0.125 +2024-09-16 17:34:58,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=45920.0, ans=0.0008869565217391306 +2024-09-16 17:35:01,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=45920.0, ans=0.0008869565217391306 +2024-09-16 17:35:07,558 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:35:12,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=45960.0, ans=0.0008782608695652172 +2024-09-16 17:35:22,219 INFO [train.py:1198] (1/2) Epoch 3, batch 2450, loss[loss=0.3241, ctc_loss=0.274, cr_loss=0.449, attn_decoder_loss=0.3197, over 29713.00 frames. ], tot_loss[loss=0.3201, ctc_loss=0.2656, cr_loss=0.4444, attn_decoder_loss=0.3163, over 5785692.78 frames. ], batch size: 82, lr: 3.10e-02, grad_scale: 8.0 +2024-09-16 17:35:47,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=46040.0, ans=0.95 +2024-09-16 17:35:54,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=46080.0, ans=0.125 +2024-09-16 17:35:57,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=46080.0, ans=0.125 +2024-09-16 17:36:06,479 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.121e+02 1.425e+02 1.645e+02 1.863e+02 7.632e+02, threshold=3.291e+02, percent-clipped=3.0 +2024-09-16 17:36:24,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=46120.0, ans=0.04949747468305833 +2024-09-16 17:36:41,948 INFO [train.py:1198] (1/2) Epoch 3, batch 2500, loss[loss=0.3061, ctc_loss=0.2334, cr_loss=0.4246, attn_decoder_loss=0.3048, over 29609.00 frames. ], tot_loss[loss=0.3193, ctc_loss=0.2644, cr_loss=0.4439, attn_decoder_loss=0.3155, over 5795647.65 frames. ], batch size: 86, lr: 3.09e-02, grad_scale: 8.0 +2024-09-16 17:36:48,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=46200.0, ans=0.05 +2024-09-16 17:36:58,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=46240.0, ans=0.07 +2024-09-16 17:37:04,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.16 vs. limit=15.0 +2024-09-16 17:37:06,843 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:37:23,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.25 vs. limit=10.0 +2024-09-16 17:37:43,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=46360.0, ans=0.0 +2024-09-16 17:37:43,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.11 vs. limit=15.0 +2024-09-16 17:37:46,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=46360.0, ans=0.1 +2024-09-16 17:37:49,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=46360.0, ans=0.025 +2024-09-16 17:37:50,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=46360.0, ans=0.125 +2024-09-16 17:37:58,393 INFO [train.py:1198] (1/2) Epoch 3, batch 2550, loss[loss=0.2726, ctc_loss=0.2076, cr_loss=0.3915, attn_decoder_loss=0.2711, over 29391.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2637, cr_loss=0.4442, attn_decoder_loss=0.3152, over 5797607.79 frames. ], batch size: 67, lr: 3.09e-02, grad_scale: 8.0 +2024-09-16 17:38:13,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=46440.0, ans=0.0007739130434782603 +2024-09-16 17:38:40,772 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.116e+02 1.310e+02 1.464e+02 1.728e+02 3.657e+02, threshold=2.928e+02, percent-clipped=2.0 +2024-09-16 17:39:14,457 INFO [train.py:1198] (1/2) Epoch 3, batch 2600, loss[loss=0.2976, ctc_loss=0.2385, cr_loss=0.4222, attn_decoder_loss=0.2948, over 29450.00 frames. ], tot_loss[loss=0.3189, ctc_loss=0.2634, cr_loss=0.4442, attn_decoder_loss=0.3152, over 5794106.73 frames. ], batch size: 78, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:39:22,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=46600.0, ans=0.0 +2024-09-16 17:39:52,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=46680.0, ans=0.125 +2024-09-16 17:39:54,655 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-16 17:40:00,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-16 17:40:06,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=46720.0, ans=0.2 +2024-09-16 17:40:09,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=46720.0, ans=0.125 +2024-09-16 17:40:18,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=46760.0, ans=0.0 +2024-09-16 17:40:21,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=46760.0, ans=0.05 +2024-09-16 17:40:23,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=46760.0, ans=0.125 +2024-09-16 17:40:24,942 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:40:24,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=46760.0, ans=0.1 +2024-09-16 17:40:33,428 INFO [train.py:1198] (1/2) Epoch 3, batch 2650, loss[loss=0.3354, ctc_loss=0.2879, cr_loss=0.4586, attn_decoder_loss=0.3305, over 29258.00 frames. ], tot_loss[loss=0.3191, ctc_loss=0.2635, cr_loss=0.4446, attn_decoder_loss=0.3154, over 5799977.10 frames. ], batch size: 100, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:40:33,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=46800.0, ans=0.125 +2024-09-16 17:40:57,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=46840.0, ans=0.125 +2024-09-16 17:40:58,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=46840.0, ans=10.0 +2024-09-16 17:41:08,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=46880.0, ans=0.05 +2024-09-16 17:41:15,665 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.367e+02 1.536e+02 1.778e+02 3.177e+02, threshold=3.072e+02, percent-clipped=2.0 +2024-09-16 17:41:15,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=46880.0, ans=0.125 +2024-09-16 17:41:25,127 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:41:29,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=46920.0, ans=0.125 +2024-09-16 17:41:49,023 INFO [train.py:1198] (1/2) Epoch 3, batch 2700, loss[loss=0.343, ctc_loss=0.2885, cr_loss=0.5187, attn_decoder_loss=0.3375, over 29518.00 frames. ], tot_loss[loss=0.3198, ctc_loss=0.2644, cr_loss=0.4463, attn_decoder_loss=0.316, over 5794938.22 frames. ], batch size: 87, lr: 3.08e-02, grad_scale: 8.0 +2024-09-16 17:42:04,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=47040.0, ans=0.0006434782608695649 +2024-09-16 17:42:10,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=47040.0, ans=0.025 +2024-09-16 17:42:17,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=47080.0, ans=0.125 +2024-09-16 17:42:24,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=47080.0, ans=0.125 +2024-09-16 17:42:44,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=47120.0, ans=0.125 +2024-09-16 17:43:05,727 INFO [train.py:1198] (1/2) Epoch 3, batch 2750, loss[loss=0.3052, ctc_loss=0.2374, cr_loss=0.41, attn_decoder_loss=0.3036, over 29507.00 frames. ], tot_loss[loss=0.3186, ctc_loss=0.2633, cr_loss=0.4446, attn_decoder_loss=0.3148, over 5793763.80 frames. ], batch size: 75, lr: 3.07e-02, grad_scale: 8.0 +2024-09-16 17:43:06,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=47200.0, ans=0.04949747468305833 +2024-09-16 17:43:28,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.93 vs. limit=6.0 +2024-09-16 17:43:45,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=47280.0, ans=0.07 +2024-09-16 17:43:48,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=47280.0, ans=0.1 +2024-09-16 17:43:50,014 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.345e+02 1.531e+02 1.898e+02 4.354e+02, threshold=3.062e+02, percent-clipped=3.0 +2024-09-16 17:43:53,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=47320.0, ans=0.04949747468305833 +2024-09-16 17:44:23,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=19.56 vs. limit=15.0 +2024-09-16 17:44:25,560 INFO [train.py:1198] (1/2) Epoch 3, batch 2800, loss[loss=0.3676, ctc_loss=0.3493, cr_loss=0.4212, attn_decoder_loss=0.3603, over 20641.00 frames. ], tot_loss[loss=0.319, ctc_loss=0.2637, cr_loss=0.4442, attn_decoder_loss=0.3153, over 5775159.89 frames. ], batch size: 210, lr: 3.07e-02, grad_scale: 16.0 +2024-09-16 17:44:27,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=47400.0, ans=0.125 +2024-09-16 17:44:42,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=47440.0, ans=0.2 +2024-09-16 17:44:55,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=47480.0, ans=0.125 +2024-09-16 17:44:57,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=47480.0, ans=0.125 +2024-09-16 17:44:59,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=47480.0, ans=0.125 +2024-09-16 17:45:01,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=47480.0, ans=0.0 +2024-09-16 17:45:16,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=47520.0, ans=0.125 +2024-09-16 17:45:23,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=47520.0, ans=0.1 +2024-09-16 17:45:40,749 INFO [train.py:1198] (1/2) Epoch 3, batch 2850, loss[loss=0.3081, ctc_loss=0.2582, cr_loss=0.4759, attn_decoder_loss=0.3031, over 29499.00 frames. ], tot_loss[loss=0.3193, ctc_loss=0.264, cr_loss=0.4444, attn_decoder_loss=0.3156, over 5760975.16 frames. ], batch size: 77, lr: 3.06e-02, grad_scale: 8.0 +2024-09-16 17:45:54,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=47640.0, ans=0.0 +2024-09-16 17:46:01,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=47640.0, ans=0.025 +2024-09-16 17:46:25,026 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.134e+02 1.383e+02 1.687e+02 2.154e+02 5.154e+02, threshold=3.374e+02, percent-clipped=7.0 +2024-09-16 17:46:45,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.29 vs. limit=15.0 +2024-09-16 17:46:51,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=47760.0, ans=0.125 +2024-09-16 17:46:55,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=47800.0, ans=0.125 +2024-09-16 17:46:56,749 INFO [train.py:1198] (1/2) Epoch 3, batch 2900, loss[loss=0.3192, ctc_loss=0.2645, cr_loss=0.4614, attn_decoder_loss=0.315, over 29416.00 frames. ], tot_loss[loss=0.3201, ctc_loss=0.2643, cr_loss=0.4461, attn_decoder_loss=0.3164, over 5786606.87 frames. ], batch size: 79, lr: 3.06e-02, grad_scale: 8.0 +2024-09-16 17:47:09,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=47800.0, ans=0.2 +2024-09-16 17:47:20,976 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=9.86 vs. limit=15.0 +2024-09-16 17:47:23,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=47840.0, ans=0.125 +2024-09-16 17:47:47,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=47920.0, ans=0.2 +2024-09-16 17:47:50,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=47920.0, ans=0.0004521739130434795 +2024-09-16 17:47:55,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.69 vs. limit=15.0 +2024-09-16 17:48:00,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=47960.0, ans=0.125 +2024-09-16 17:48:24,232 INFO [train.py:1198] (1/2) Epoch 3, batch 2950, loss[loss=0.3079, ctc_loss=0.2541, cr_loss=0.4352, attn_decoder_loss=0.3042, over 29518.00 frames. ], tot_loss[loss=0.3183, ctc_loss=0.2625, cr_loss=0.4445, attn_decoder_loss=0.3146, over 5781527.37 frames. ], batch size: 75, lr: 3.05e-02, grad_scale: 8.0 +2024-09-16 17:49:08,107 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.130e+02 1.336e+02 1.504e+02 1.810e+02 3.679e+02, threshold=3.009e+02, percent-clipped=1.0 +2024-09-16 17:49:14,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.55 vs. limit=5.0 +2024-09-16 17:49:19,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=48120.0, ans=0.05 +2024-09-16 17:49:19,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=48120.0, ans=0.0 +2024-09-16 17:49:26,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=48160.0, ans=0.5 +2024-09-16 17:49:37,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=48160.0, ans=0.125 +2024-09-16 17:49:40,395 INFO [train.py:1198] (1/2) Epoch 3, batch 3000, loss[loss=0.3198, ctc_loss=0.2614, cr_loss=0.4206, attn_decoder_loss=0.3169, over 29774.00 frames. ], tot_loss[loss=0.3181, ctc_loss=0.2625, cr_loss=0.4441, attn_decoder_loss=0.3144, over 5782323.57 frames. ], batch size: 81, lr: 3.05e-02, grad_scale: 8.0 +2024-09-16 17:49:40,396 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 17:49:58,749 INFO [train.py:1230] (1/2) Epoch 3, validation: loss=0.2335, ctc_loss=0.0936, cr_loss=4.436e-15, attn_decoder_loss=0.2491, over 944034.00 frames. +2024-09-16 17:49:58,749 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 17:50:13,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.97 vs. limit=22.5 +2024-09-16 17:50:31,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=24.89 vs. limit=22.5 +2024-09-16 17:50:41,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=48280.0, ans=0.125 +2024-09-16 17:50:44,053 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=13.34 vs. limit=15.0 +2024-09-16 17:51:16,887 INFO [train.py:1198] (1/2) Epoch 3, batch 3050, loss[loss=0.3029, ctc_loss=0.241, cr_loss=0.4453, attn_decoder_loss=0.2999, over 29540.00 frames. ], tot_loss[loss=0.319, ctc_loss=0.2635, cr_loss=0.4445, attn_decoder_loss=0.3153, over 5776014.81 frames. ], batch size: 76, lr: 3.04e-02, grad_scale: 4.0 +2024-09-16 17:51:41,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=48440.0, ans=0.025 +2024-09-16 17:51:52,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=48480.0, ans=0.0 +2024-09-16 17:51:55,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.58 vs. limit=15.0 +2024-09-16 17:52:04,205 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.201e+02 1.405e+02 1.578e+02 1.940e+02 5.924e+02, threshold=3.157e+02, percent-clipped=5.0 +2024-09-16 17:52:34,209 INFO [train.py:1198] (1/2) Epoch 3, batch 3100, loss[loss=0.3478, ctc_loss=0.2989, cr_loss=0.4771, attn_decoder_loss=0.3427, over 29255.00 frames. ], tot_loss[loss=0.3186, ctc_loss=0.2631, cr_loss=0.4443, attn_decoder_loss=0.3149, over 5776381.15 frames. ], batch size: 100, lr: 3.04e-02, grad_scale: 8.0 +2024-09-16 17:52:40,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=48600.0, ans=0.1 +2024-09-16 17:52:51,382 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.88 vs. limit=15.0 +2024-09-16 17:52:54,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=48640.0, ans=0.00029565217391304237 +2024-09-16 17:52:58,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=48640.0, ans=0.025 +2024-09-16 17:53:48,582 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:53:49,338 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.94 vs. limit=15.0 +2024-09-16 17:53:50,241 INFO [train.py:1198] (1/2) Epoch 3, batch 3150, loss[loss=0.3336, ctc_loss=0.2756, cr_loss=0.4803, attn_decoder_loss=0.3294, over 28928.00 frames. ], tot_loss[loss=0.3186, ctc_loss=0.263, cr_loss=0.4446, attn_decoder_loss=0.3149, over 5783341.28 frames. ], batch size: 104, lr: 3.03e-02, grad_scale: 8.0 +2024-09-16 17:53:58,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=48800.0, ans=0.125 +2024-09-16 17:54:01,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=48800.0, ans=0.125 +2024-09-16 17:54:01,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.45 vs. limit=22.5 +2024-09-16 17:54:35,636 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.107e+02 1.334e+02 1.533e+02 1.776e+02 7.773e+02, threshold=3.065e+02, percent-clipped=4.0 +2024-09-16 17:54:38,836 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:54:50,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=48960.0, ans=0.035 +2024-09-16 17:54:57,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.43 vs. limit=15.0 +2024-09-16 17:55:07,980 INFO [train.py:1198] (1/2) Epoch 3, batch 3200, loss[loss=0.3063, ctc_loss=0.241, cr_loss=0.4233, attn_decoder_loss=0.3042, over 29411.00 frames. ], tot_loss[loss=0.3174, ctc_loss=0.2617, cr_loss=0.4442, attn_decoder_loss=0.3137, over 5793645.54 frames. ], batch size: 79, lr: 3.03e-02, grad_scale: 16.0 +2024-09-16 17:55:14,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=49000.0, ans=0.125 +2024-09-16 17:55:15,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=49000.0, ans=0.025 +2024-09-16 17:55:22,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.44 vs. limit=12.0 +2024-09-16 17:55:33,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=49040.0, ans=0.00020869565217391216 +2024-09-16 17:55:33,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=49040.0, ans=0.125 +2024-09-16 17:55:34,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.41 vs. limit=10.0 +2024-09-16 17:55:35,328 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.10 vs. limit=10.0 +2024-09-16 17:55:35,580 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.26 vs. limit=10.0 +2024-09-16 17:56:23,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=49160.0, ans=0.0001826086956521738 +2024-09-16 17:56:23,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=49160.0, ans=0.2 +2024-09-16 17:56:26,020 INFO [train.py:1198] (1/2) Epoch 3, batch 3250, loss[loss=0.3364, ctc_loss=0.2782, cr_loss=0.4672, attn_decoder_loss=0.3325, over 29698.00 frames. ], tot_loss[loss=0.3179, ctc_loss=0.2617, cr_loss=0.4452, attn_decoder_loss=0.3142, over 5800542.56 frames. ], batch size: 84, lr: 3.03e-02, grad_scale: 8.0 +2024-09-16 17:56:44,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=49240.0, ans=0.125 +2024-09-16 17:56:53,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=49240.0, ans=0.0 +2024-09-16 17:56:57,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=49280.0, ans=0.0 +2024-09-16 17:57:00,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=49280.0, ans=0.125 +2024-09-16 17:57:12,499 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.316e+02 1.449e+02 1.854e+02 6.916e+02, threshold=2.898e+02, percent-clipped=2.0 +2024-09-16 17:57:24,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=49360.0, ans=0.00013913043478260868 +2024-09-16 17:57:41,272 INFO [train.py:1198] (1/2) Epoch 3, batch 3300, loss[loss=0.3357, ctc_loss=0.2739, cr_loss=0.4701, attn_decoder_loss=0.3321, over 28279.00 frames. ], tot_loss[loss=0.3168, ctc_loss=0.2611, cr_loss=0.4442, attn_decoder_loss=0.3131, over 5797763.73 frames. ], batch size: 111, lr: 3.02e-02, grad_scale: 8.0 +2024-09-16 17:57:42,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.09 vs. limit=15.0 +2024-09-16 17:57:49,846 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.42 vs. limit=6.0 +2024-09-16 17:57:50,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=49400.0, ans=0.125 +2024-09-16 17:57:50,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=49400.0, ans=0.0 +2024-09-16 17:57:54,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=49400.0, ans=15.0 +2024-09-16 17:58:12,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=49480.0, ans=0.125 +2024-09-16 17:58:24,649 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.33 vs. limit=6.0 +2024-09-16 17:58:27,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=49520.0, ans=0.2 +2024-09-16 17:58:27,515 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.62 vs. limit=15.0 +2024-09-16 17:58:34,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=49520.0, ans=0.125 +2024-09-16 17:58:34,796 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 17:58:56,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=49560.0, ans=0.125 +2024-09-16 17:58:59,505 INFO [train.py:1198] (1/2) Epoch 3, batch 3350, loss[loss=0.3337, ctc_loss=0.278, cr_loss=0.4749, attn_decoder_loss=0.3294, over 28910.00 frames. ], tot_loss[loss=0.3184, ctc_loss=0.263, cr_loss=0.4457, attn_decoder_loss=0.3146, over 5775235.32 frames. ], batch size: 104, lr: 3.02e-02, grad_scale: 8.0 +2024-09-16 17:59:01,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=49600.0, ans=0.125 +2024-09-16 17:59:04,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=49600.0, ans=0.1 +2024-09-16 17:59:15,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=49640.0, ans=0.0 +2024-09-16 17:59:17,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=49640.0, ans=0.125 +2024-09-16 17:59:48,720 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.019e+02 1.330e+02 1.460e+02 1.779e+02 4.186e+02, threshold=2.920e+02, percent-clipped=7.0 +2024-09-16 17:59:58,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=49720.0, ans=0.0 +2024-09-16 18:00:02,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=49760.0, ans=0.1 +2024-09-16 18:00:05,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=49760.0, ans=0.125 +2024-09-16 18:00:13,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=49760.0, ans=5.217391304347847e-05 +2024-09-16 18:00:17,591 INFO [train.py:1198] (1/2) Epoch 3, batch 3400, loss[loss=0.2961, ctc_loss=0.2484, cr_loss=0.4053, attn_decoder_loss=0.2924, over 29327.00 frames. ], tot_loss[loss=0.3184, ctc_loss=0.2631, cr_loss=0.4463, attn_decoder_loss=0.3146, over 5767634.56 frames. ], batch size: 67, lr: 3.01e-02, grad_scale: 8.0 +2024-09-16 18:00:22,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=49800.0, ans=0.125 +2024-09-16 18:00:28,484 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:00:30,936 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.15 vs. limit=22.5 +2024-09-16 18:00:38,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=49840.0, ans=0.1 +2024-09-16 18:01:13,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=49920.0, ans=0.0 +2024-09-16 18:01:19,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=49960.0, ans=0.0 +2024-09-16 18:01:33,048 INFO [train.py:1198] (1/2) Epoch 3, batch 3450, loss[loss=0.3242, ctc_loss=0.2602, cr_loss=0.4867, attn_decoder_loss=0.3205, over 28336.00 frames. ], tot_loss[loss=0.3184, ctc_loss=0.2626, cr_loss=0.4465, attn_decoder_loss=0.3147, over 5775497.59 frames. ], batch size: 111, lr: 3.01e-02, grad_scale: 8.0 +2024-09-16 18:01:45,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=50000.0, ans=0.125 +2024-09-16 18:02:03,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=50080.0, ans=0.1 +2024-09-16 18:02:12,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.55 vs. limit=15.0 +2024-09-16 18:02:19,801 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.075e+02 1.389e+02 1.591e+02 1.812e+02 6.127e+02, threshold=3.183e+02, percent-clipped=1.0 +2024-09-16 18:02:26,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=50120.0, ans=0.125 +2024-09-16 18:02:27,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=50120.0, ans=0.1 +2024-09-16 18:02:38,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=50160.0, ans=0.2 +2024-09-16 18:02:46,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=50160.0, ans=0.0 +2024-09-16 18:02:47,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=50160.0, ans=0.125 +2024-09-16 18:02:50,652 INFO [train.py:1198] (1/2) Epoch 3, batch 3500, loss[loss=0.2851, ctc_loss=0.2424, cr_loss=0.4011, attn_decoder_loss=0.2809, over 29352.00 frames. ], tot_loss[loss=0.3175, ctc_loss=0.2618, cr_loss=0.4448, attn_decoder_loss=0.3139, over 5778110.86 frames. ], batch size: 71, lr: 3.00e-02, grad_scale: 8.0 +2024-09-16 18:03:08,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=50240.0, ans=0.0 +2024-09-16 18:03:21,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=50280.0, ans=0.1 +2024-09-16 18:03:46,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.00 vs. limit=6.0 +2024-09-16 18:03:46,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=50320.0, ans=0.125 +2024-09-16 18:03:51,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=50360.0, ans=0.125 +2024-09-16 18:04:07,560 INFO [train.py:1198] (1/2) Epoch 3, batch 3550, loss[loss=0.3247, ctc_loss=0.2529, cr_loss=0.4512, attn_decoder_loss=0.3227, over 29705.00 frames. ], tot_loss[loss=0.317, ctc_loss=0.2609, cr_loss=0.4451, attn_decoder_loss=0.3133, over 5784186.87 frames. ], batch size: 89, lr: 3.00e-02, grad_scale: 4.0 +2024-09-16 18:04:15,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=50400.0, ans=0.125 +2024-09-16 18:04:20,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.27 vs. limit=22.5 +2024-09-16 18:04:23,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=50440.0, ans=15.0 +2024-09-16 18:04:24,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=50440.0, ans=0.0 +2024-09-16 18:04:39,165 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:04:40,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.54 vs. limit=15.0 +2024-09-16 18:04:41,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.26 vs. limit=15.0 +2024-09-16 18:04:53,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.11 vs. limit=15.0 +2024-09-16 18:04:55,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.129e+02 1.391e+02 1.610e+02 2.091e+02 4.528e+02, threshold=3.220e+02, percent-clipped=5.0 +2024-09-16 18:05:21,616 INFO [train.py:1198] (1/2) Epoch 3, batch 3600, loss[loss=0.2966, ctc_loss=0.2365, cr_loss=0.3999, attn_decoder_loss=0.2944, over 29495.00 frames. ], tot_loss[loss=0.3166, ctc_loss=0.2601, cr_loss=0.4448, attn_decoder_loss=0.3129, over 5792642.09 frames. ], batch size: 77, lr: 2.99e-02, grad_scale: 8.0 +2024-09-16 18:06:30,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=50760.0, ans=0.1 +2024-09-16 18:06:35,930 INFO [train.py:1198] (1/2) Epoch 3, batch 3650, loss[loss=0.3209, ctc_loss=0.2511, cr_loss=0.4475, attn_decoder_loss=0.3188, over 29507.00 frames. ], tot_loss[loss=0.3153, ctc_loss=0.2581, cr_loss=0.4435, attn_decoder_loss=0.3118, over 5795458.80 frames. ], batch size: 90, lr: 2.99e-02, grad_scale: 4.0 +2024-09-16 18:06:57,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=50840.0, ans=15.0 +2024-09-16 18:07:00,290 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:07:07,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=50880.0, ans=0.0 +2024-09-16 18:07:11,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten.whitening_limit, batch_count=50880.0, ans=22.5 +2024-09-16 18:07:12,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=50880.0, ans=0.125 +2024-09-16 18:07:19,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=50920.0, ans=10.0 +2024-09-16 18:07:25,477 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.018e+02 1.262e+02 1.447e+02 1.690e+02 1.332e+03, threshold=2.894e+02, percent-clipped=3.0 +2024-09-16 18:07:27,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=50920.0, ans=0.125 +2024-09-16 18:07:27,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=50920.0, ans=0.125 +2024-09-16 18:07:27,892 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.25 vs. limit=15.0 +2024-09-16 18:07:40,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=50960.0, ans=0.1 +2024-09-16 18:07:50,876 INFO [train.py:1198] (1/2) Epoch 3, batch 3700, loss[loss=0.3205, ctc_loss=0.2572, cr_loss=0.4862, attn_decoder_loss=0.3167, over 29704.00 frames. ], tot_loss[loss=0.3152, ctc_loss=0.2578, cr_loss=0.4441, attn_decoder_loss=0.3118, over 5805305.67 frames. ], batch size: 84, lr: 2.99e-02, grad_scale: 8.0 +2024-09-16 18:07:52,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=51000.0, ans=0.5 +2024-09-16 18:07:54,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=51000.0, ans=0.125 +2024-09-16 18:08:09,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=51040.0, ans=0.125 +2024-09-16 18:08:41,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=51120.0, ans=0.2 +2024-09-16 18:08:56,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=51160.0, ans=0.125 +2024-09-16 18:09:09,217 INFO [train.py:1198] (1/2) Epoch 3, batch 3750, loss[loss=0.268, ctc_loss=0.2084, cr_loss=0.3814, attn_decoder_loss=0.2662, over 29291.00 frames. ], tot_loss[loss=0.3149, ctc_loss=0.2576, cr_loss=0.4442, attn_decoder_loss=0.3114, over 5808405.31 frames. ], batch size: 67, lr: 2.98e-02, grad_scale: 8.0 +2024-09-16 18:09:18,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=51200.0, ans=0.05 +2024-09-16 18:09:18,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.36 vs. limit=15.0 +2024-09-16 18:09:31,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=51240.0, ans=0.0 +2024-09-16 18:09:32,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.76 vs. limit=15.0 +2024-09-16 18:09:58,527 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.044e+02 1.287e+02 1.522e+02 1.821e+02 1.090e+03, threshold=3.043e+02, percent-clipped=9.0 +2024-09-16 18:10:00,468 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:10:04,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=51320.0, ans=0.05 +2024-09-16 18:10:22,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=51400.0, ans=0.125 +2024-09-16 18:10:23,886 INFO [train.py:1198] (1/2) Epoch 3, batch 3800, loss[loss=0.3288, ctc_loss=0.2764, cr_loss=0.4594, attn_decoder_loss=0.3244, over 29640.00 frames. ], tot_loss[loss=0.3145, ctc_loss=0.2573, cr_loss=0.4432, attn_decoder_loss=0.311, over 5797680.16 frames. ], batch size: 86, lr: 2.98e-02, grad_scale: 8.0 +2024-09-16 18:10:27,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=51400.0, ans=0.0 +2024-09-16 18:10:33,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=51400.0, ans=0.125 +2024-09-16 18:10:33,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=51400.0, ans=15.0 +2024-09-16 18:10:35,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.82 vs. limit=6.0 +2024-09-16 18:10:36,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.36 vs. limit=15.0 +2024-09-16 18:10:54,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.96 vs. limit=22.5 +2024-09-16 18:10:55,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=51480.0, ans=0.125 +2024-09-16 18:11:38,185 INFO [train.py:1198] (1/2) Epoch 3, batch 3850, loss[loss=0.3337, ctc_loss=0.2686, cr_loss=0.4422, attn_decoder_loss=0.3311, over 29299.00 frames. ], tot_loss[loss=0.3138, ctc_loss=0.2562, cr_loss=0.443, attn_decoder_loss=0.3104, over 5811833.46 frames. ], batch size: 100, lr: 2.97e-02, grad_scale: 8.0 +2024-09-16 18:11:59,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=51640.0, ans=0.2 +2024-09-16 18:12:11,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=51680.0, ans=0.0 +2024-09-16 18:12:27,161 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.157e+02 1.321e+02 1.509e+02 1.752e+02 3.872e+02, threshold=3.018e+02, percent-clipped=1.0 +2024-09-16 18:12:39,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=51760.0, ans=0.125 +2024-09-16 18:12:43,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff3.min_abs, batch_count=51760.0, ans=0.2 +2024-09-16 18:12:52,648 INFO [train.py:1198] (1/2) Epoch 3, batch 3900, loss[loss=0.3292, ctc_loss=0.267, cr_loss=0.4658, attn_decoder_loss=0.3258, over 29619.00 frames. ], tot_loss[loss=0.3143, ctc_loss=0.2563, cr_loss=0.4436, attn_decoder_loss=0.3109, over 5816195.00 frames. ], batch size: 86, lr: 2.97e-02, grad_scale: 8.0 +2024-09-16 18:12:54,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=51800.0, ans=0.025 +2024-09-16 18:12:54,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=51800.0, ans=0.125 +2024-09-16 18:13:01,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-16 18:13:03,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=51800.0, ans=0.2 +2024-09-16 18:13:06,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=51840.0, ans=0.125 +2024-09-16 18:13:07,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=51840.0, ans=0.2 +2024-09-16 18:13:16,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=51840.0, ans=0.2 +2024-09-16 18:13:25,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=51880.0, ans=0.0 +2024-09-16 18:13:34,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-16 18:13:48,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=51920.0, ans=0.0 +2024-09-16 18:13:52,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.02 vs. limit=10.0 +2024-09-16 18:13:52,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.11 vs. limit=15.0 +2024-09-16 18:14:02,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=51960.0, ans=0.125 +2024-09-16 18:14:02,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=51960.0, ans=0.125 +2024-09-16 18:14:06,787 INFO [train.py:1198] (1/2) Epoch 3, batch 3950, loss[loss=0.3263, ctc_loss=0.2674, cr_loss=0.4445, attn_decoder_loss=0.3229, over 29502.00 frames. ], tot_loss[loss=0.3142, ctc_loss=0.2559, cr_loss=0.444, attn_decoder_loss=0.3108, over 5835628.84 frames. ], batch size: 97, lr: 2.96e-02, grad_scale: 8.0 +2024-09-16 18:14:11,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=52000.0, ans=0.125 +2024-09-16 18:14:18,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=52000.0, ans=0.0 +2024-09-16 18:14:42,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=52080.0, ans=0.125 +2024-09-16 18:14:48,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=52080.0, ans=0.1 +2024-09-16 18:14:58,188 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.019e+02 1.359e+02 1.491e+02 1.794e+02 3.719e+02, threshold=2.982e+02, percent-clipped=2.0 +2024-09-16 18:15:22,980 INFO [train.py:1198] (1/2) Epoch 3, batch 4000, loss[loss=0.2915, ctc_loss=0.2346, cr_loss=0.4404, attn_decoder_loss=0.288, over 29535.00 frames. ], tot_loss[loss=0.3152, ctc_loss=0.2572, cr_loss=0.445, attn_decoder_loss=0.3118, over 5813357.60 frames. ], batch size: 74, lr: 2.96e-02, grad_scale: 16.0 +2024-09-16 18:16:00,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=52280.0, ans=0.125 +2024-09-16 18:16:16,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=52320.0, ans=0.1 +2024-09-16 18:16:32,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=52360.0, ans=0.2 +2024-09-16 18:16:34,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=52360.0, ans=0.125 +2024-09-16 18:16:36,968 INFO [train.py:1198] (1/2) Epoch 3, batch 4050, loss[loss=0.3691, ctc_loss=0.3419, cr_loss=0.4795, attn_decoder_loss=0.3614, over 20307.00 frames. ], tot_loss[loss=0.3152, ctc_loss=0.2574, cr_loss=0.4448, attn_decoder_loss=0.3117, over 5798354.98 frames. ], batch size: 209, lr: 2.96e-02, grad_scale: 4.0 +2024-09-16 18:16:44,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=52400.0, ans=0.1 +2024-09-16 18:17:06,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=52480.0, ans=0.2 +2024-09-16 18:17:06,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=52480.0, ans=0.125 +2024-09-16 18:17:12,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=52480.0, ans=0.0 +2024-09-16 18:17:18,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=52480.0, ans=0.05 +2024-09-16 18:17:28,049 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.077e+02 1.348e+02 1.567e+02 1.841e+02 9.373e+02, threshold=3.134e+02, percent-clipped=5.0 +2024-09-16 18:17:28,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=52520.0, ans=0.0 +2024-09-16 18:17:28,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=52520.0, ans=0.125 +2024-09-16 18:17:32,842 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:17:41,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=52560.0, ans=0.0 +2024-09-16 18:17:50,222 INFO [train.py:1198] (1/2) Epoch 3, batch 4100, loss[loss=0.3327, ctc_loss=0.2661, cr_loss=0.453, attn_decoder_loss=0.33, over 29514.00 frames. ], tot_loss[loss=0.3154, ctc_loss=0.2579, cr_loss=0.4454, attn_decoder_loss=0.3119, over 5793509.37 frames. ], batch size: 90, lr: 2.95e-02, grad_scale: 8.0 +2024-09-16 18:17:50,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=52600.0, ans=0.125 +2024-09-16 18:17:56,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=52600.0, ans=0.0 +2024-09-16 18:18:33,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=52720.0, ans=0.0 +2024-09-16 18:18:37,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=52720.0, ans=0.0 +2024-09-16 18:18:47,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=52720.0, ans=0.05 +2024-09-16 18:18:50,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=52760.0, ans=0.125 +2024-09-16 18:18:51,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=52760.0, ans=0.125 +2024-09-16 18:19:06,558 INFO [train.py:1198] (1/2) Epoch 3, batch 4150, loss[loss=0.3134, ctc_loss=0.2532, cr_loss=0.4826, attn_decoder_loss=0.3094, over 29512.00 frames. ], tot_loss[loss=0.3142, ctc_loss=0.2566, cr_loss=0.4438, attn_decoder_loss=0.3108, over 5798524.06 frames. ], batch size: 77, lr: 2.95e-02, grad_scale: 4.0 +2024-09-16 18:19:14,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=52800.0, ans=0.125 +2024-09-16 18:19:18,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=52800.0, ans=0.0 +2024-09-16 18:19:21,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=52840.0, ans=0.0 +2024-09-16 18:19:21,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=52840.0, ans=0.125 +2024-09-16 18:19:26,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.33 vs. limit=12.0 +2024-09-16 18:19:37,556 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:19:54,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=52920.0, ans=0.125 +2024-09-16 18:19:59,721 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.110e+02 1.278e+02 1.455e+02 1.672e+02 3.435e+02, threshold=2.910e+02, percent-clipped=1.0 +2024-09-16 18:20:04,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=52960.0, ans=0.2 +2024-09-16 18:20:11,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=52960.0, ans=0.125 +2024-09-16 18:20:19,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=53000.0, ans=0.0 +2024-09-16 18:20:20,392 INFO [train.py:1198] (1/2) Epoch 3, batch 4200, loss[loss=0.3433, ctc_loss=0.2882, cr_loss=0.4838, attn_decoder_loss=0.3386, over 29538.00 frames. ], tot_loss[loss=0.3144, ctc_loss=0.2562, cr_loss=0.4447, attn_decoder_loss=0.3109, over 5800806.46 frames. ], batch size: 90, lr: 2.94e-02, grad_scale: 8.0 +2024-09-16 18:20:25,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=53000.0, ans=0.125 +2024-09-16 18:20:35,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=53040.0, ans=0.0 +2024-09-16 18:20:42,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=53040.0, ans=0.125 +2024-09-16 18:20:43,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.82 vs. limit=10.0 +2024-09-16 18:21:12,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff2.min_abs, batch_count=53120.0, ans=0.1 +2024-09-16 18:21:34,065 INFO [train.py:1198] (1/2) Epoch 3, batch 4250, loss[loss=0.2973, ctc_loss=0.2287, cr_loss=0.4458, attn_decoder_loss=0.295, over 29522.00 frames. ], tot_loss[loss=0.3147, ctc_loss=0.2564, cr_loss=0.4451, attn_decoder_loss=0.3113, over 5805922.07 frames. ], batch size: 74, lr: 2.94e-02, grad_scale: 4.0 +2024-09-16 18:21:37,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=53200.0, ans=0.125 +2024-09-16 18:21:38,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=53200.0, ans=0.0 +2024-09-16 18:21:47,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=53240.0, ans=0.0 +2024-09-16 18:21:48,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=53240.0, ans=0.125 +2024-09-16 18:22:03,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=53280.0, ans=0.125 +2024-09-16 18:22:09,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=53280.0, ans=0.125 +2024-09-16 18:22:26,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=53320.0, ans=0.125 +2024-09-16 18:22:28,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=53320.0, ans=0.0 +2024-09-16 18:22:29,120 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.928e+01 1.354e+02 1.567e+02 1.958e+02 1.183e+03, threshold=3.135e+02, percent-clipped=4.0 +2024-09-16 18:22:32,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=53360.0, ans=0.07 +2024-09-16 18:22:36,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=53360.0, ans=0.125 +2024-09-16 18:22:47,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=53400.0, ans=0.125 +2024-09-16 18:22:49,052 INFO [train.py:1198] (1/2) Epoch 3, batch 4300, loss[loss=0.3363, ctc_loss=0.2681, cr_loss=0.4755, attn_decoder_loss=0.3333, over 29513.00 frames. ], tot_loss[loss=0.3147, ctc_loss=0.2562, cr_loss=0.4447, attn_decoder_loss=0.3113, over 5794611.23 frames. ], batch size: 87, lr: 2.93e-02, grad_scale: 8.0 +2024-09-16 18:22:58,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.08 vs. limit=22.5 +2024-09-16 18:23:10,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=53440.0, ans=0.125 +2024-09-16 18:23:23,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=53480.0, ans=0.125 +2024-09-16 18:23:39,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=53520.0, ans=0.125 +2024-09-16 18:23:41,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=53520.0, ans=0.0 +2024-09-16 18:23:50,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=53560.0, ans=0.125 +2024-09-16 18:24:03,892 INFO [train.py:1198] (1/2) Epoch 3, batch 4350, loss[loss=0.3251, ctc_loss=0.2707, cr_loss=0.4548, attn_decoder_loss=0.321, over 29451.00 frames. ], tot_loss[loss=0.3186, ctc_loss=0.26, cr_loss=0.4495, attn_decoder_loss=0.3151, over 5795894.44 frames. ], batch size: 97, lr: 2.93e-02, grad_scale: 4.0 +2024-09-16 18:24:44,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=53680.0, ans=0.125 +2024-09-16 18:24:45,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.67 vs. limit=22.5 +2024-09-16 18:24:47,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=53720.0, ans=0.125 +2024-09-16 18:24:49,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=53720.0, ans=0.125 +2024-09-16 18:24:59,247 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.313e+02 1.497e+02 1.843e+02 5.151e+02, threshold=2.995e+02, percent-clipped=3.0 +2024-09-16 18:25:06,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=53760.0, ans=0.0 +2024-09-16 18:25:10,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=53760.0, ans=0.0 +2024-09-16 18:25:17,590 INFO [train.py:1198] (1/2) Epoch 3, batch 4400, loss[loss=0.3349, ctc_loss=0.2848, cr_loss=0.4495, attn_decoder_loss=0.3305, over 27358.00 frames. ], tot_loss[loss=0.3215, ctc_loss=0.2633, cr_loss=0.4518, attn_decoder_loss=0.318, over 5766961.94 frames. ], batch size: 124, lr: 2.93e-02, grad_scale: 8.0 +2024-09-16 18:25:19,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=53800.0, ans=0.0 +2024-09-16 18:25:23,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=53800.0, ans=0.0 +2024-09-16 18:25:28,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=53800.0, ans=0.125 +2024-09-16 18:25:28,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=15.0 +2024-09-16 18:25:29,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=53800.0, ans=0.1 +2024-09-16 18:25:32,459 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.69 vs. limit=10.0 +2024-09-16 18:25:38,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.73 vs. limit=22.5 +2024-09-16 18:25:56,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=53880.0, ans=10.0 +2024-09-16 18:25:56,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=53880.0, ans=0.125 +2024-09-16 18:26:18,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=53960.0, ans=0.125 +2024-09-16 18:26:30,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=54000.0, ans=0.1 +2024-09-16 18:26:31,923 INFO [train.py:1198] (1/2) Epoch 3, batch 4450, loss[loss=0.3462, ctc_loss=0.3142, cr_loss=0.4457, attn_decoder_loss=0.3399, over 20196.00 frames. ], tot_loss[loss=0.3258, ctc_loss=0.2707, cr_loss=0.4545, attn_decoder_loss=0.3219, over 5574534.42 frames. ], batch size: 209, lr: 2.92e-02, grad_scale: 8.0 +2024-09-16 18:26:43,160 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=11.23 vs. limit=10.0 +2024-09-16 18:26:58,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=54040.0, ans=0.1 +2024-09-16 18:26:59,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=54040.0, ans=0.125 +2024-09-16 18:27:01,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.70 vs. limit=15.0 +2024-09-16 18:27:01,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.91 vs. limit=15.0 +2024-09-16 18:27:09,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.76 vs. limit=15.0 +2024-09-16 18:27:14,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=54080.0, ans=0.0 +2024-09-16 18:27:28,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=54120.0, ans=0.125 +2024-09-16 18:27:29,242 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.938e+01 1.292e+02 1.431e+02 1.663e+02 2.911e+02, threshold=2.863e+02, percent-clipped=0.0 +2024-09-16 18:27:47,155 INFO [train.py:1198] (1/2) Epoch 3, batch 4500, loss[loss=0.3526, ctc_loss=0.3368, cr_loss=0.444, attn_decoder_loss=0.3445, over 20125.00 frames. ], tot_loss[loss=0.3307, ctc_loss=0.2806, cr_loss=0.4546, attn_decoder_loss=0.3262, over 5233880.25 frames. ], batch size: 209, lr: 2.92e-02, grad_scale: 8.0 +2024-09-16 18:28:11,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=54240.0, ans=0.0 +2024-09-16 18:29:13,313 INFO [train.py:1198] (1/2) Epoch 4, batch 0, loss[loss=0.4052, ctc_loss=0.2539, cr_loss=0.4238, attn_decoder_loss=0.4126, over 29612.00 frames. ], tot_loss[loss=0.4052, ctc_loss=0.2539, cr_loss=0.4238, attn_decoder_loss=0.4126, over 29612.00 frames. ], batch size: 73, lr: 2.73e-02, grad_scale: 4.0 +2024-09-16 18:29:13,314 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 18:29:31,685 INFO [train.py:1230] (1/2) Epoch 4, validation: loss=0.259, ctc_loss=0.0933, cr_loss=4.939e-15, attn_decoder_loss=0.2774, over 944034.00 frames. +2024-09-16 18:29:31,685 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 18:29:47,067 WARNING [optim.py:503] (1/2) Scaling gradients by 0.06680610030889511, model_norm_threshold=286.2942810058594 +2024-09-16 18:29:47,278 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.self_attn.linear_k.weight with proportion 0.28, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=5.084e+06, grad_sumsq=4.710e+06, orig_rms_sq=1.079e+00 +2024-09-16 18:29:56,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=54340.0, ans=0.125 +2024-09-16 18:30:07,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=54380.0, ans=0.125 +2024-09-16 18:30:19,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=22.5 +2024-09-16 18:30:42,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=54460.0, ans=0.1 +2024-09-16 18:30:44,943 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.88 vs. limit=15.0 +2024-09-16 18:30:51,636 INFO [train.py:1198] (1/2) Epoch 4, batch 50, loss[loss=0.2885, ctc_loss=0.2383, cr_loss=0.425, attn_decoder_loss=0.2847, over 29424.00 frames. ], tot_loss[loss=0.3248, ctc_loss=0.2645, cr_loss=0.4464, attn_decoder_loss=0.3216, over 1267681.34 frames. ], batch size: 70, lr: 2.72e-02, grad_scale: 2.0 +2024-09-16 18:30:56,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=54500.0, ans=0.125 +2024-09-16 18:31:05,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=54540.0, ans=0.125 +2024-09-16 18:31:07,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.96 vs. limit=6.0 +2024-09-16 18:31:12,000 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.76 vs. limit=22.5 +2024-09-16 18:31:15,933 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.251e+02 1.386e+02 1.651e+02 4.285e+03, threshold=2.772e+02, percent-clipped=8.0 +2024-09-16 18:31:16,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=54540.0, ans=0.0 +2024-09-16 18:31:17,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=54540.0, ans=0.0 +2024-09-16 18:31:26,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=54580.0, ans=0.0 +2024-09-16 18:31:31,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=54580.0, ans=0.0 +2024-09-16 18:31:37,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=54620.0, ans=0.035 +2024-09-16 18:31:38,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=54620.0, ans=10.0 +2024-09-16 18:31:40,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.44 vs. limit=22.5 +2024-09-16 18:32:07,180 INFO [train.py:1198] (1/2) Epoch 4, batch 100, loss[loss=0.3149, ctc_loss=0.2645, cr_loss=0.4838, attn_decoder_loss=0.3097, over 29536.00 frames. ], tot_loss[loss=0.3223, ctc_loss=0.2628, cr_loss=0.4493, attn_decoder_loss=0.3189, over 2251133.36 frames. ], batch size: 76, lr: 2.72e-02, grad_scale: 4.0 +2024-09-16 18:32:23,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.64 vs. limit=15.0 +2024-09-16 18:32:30,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=54740.0, ans=0.2 +2024-09-16 18:32:33,892 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.19 vs. limit=15.0 +2024-09-16 18:32:40,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=54780.0, ans=0.125 +2024-09-16 18:32:52,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=54820.0, ans=0.125 +2024-09-16 18:32:57,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=54820.0, ans=0.0 +2024-09-16 18:33:11,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=54860.0, ans=0.125 +2024-09-16 18:33:23,762 INFO [train.py:1198] (1/2) Epoch 4, batch 150, loss[loss=0.2878, ctc_loss=0.2349, cr_loss=0.3971, attn_decoder_loss=0.2849, over 29452.00 frames. ], tot_loss[loss=0.3163, ctc_loss=0.2567, cr_loss=0.4451, attn_decoder_loss=0.313, over 3046515.03 frames. ], batch size: 70, lr: 2.72e-02, grad_scale: 4.0 +2024-09-16 18:33:25,461 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:33:27,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=54900.0, ans=0.125 +2024-09-16 18:33:36,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=54900.0, ans=0.125 +2024-09-16 18:33:48,184 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.067e+02 1.258e+02 1.425e+02 1.595e+02 3.260e+02, threshold=2.849e+02, percent-clipped=3.0 +2024-09-16 18:34:00,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=54980.0, ans=0.1 +2024-09-16 18:34:06,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=54980.0, ans=0.125 +2024-09-16 18:34:38,961 INFO [train.py:1198] (1/2) Epoch 4, batch 200, loss[loss=0.3337, ctc_loss=0.2811, cr_loss=0.4558, attn_decoder_loss=0.3295, over 27494.00 frames. ], tot_loss[loss=0.3136, ctc_loss=0.2536, cr_loss=0.4427, attn_decoder_loss=0.3104, over 3658327.97 frames. ], batch size: 125, lr: 2.71e-02, grad_scale: 8.0 +2024-09-16 18:34:54,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.93 vs. limit=22.5 +2024-09-16 18:35:01,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=55140.0, ans=15.0 +2024-09-16 18:35:05,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=55140.0, ans=0.125 +2024-09-16 18:35:10,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.27 vs. limit=15.0 +2024-09-16 18:35:14,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=55180.0, ans=0.0 +2024-09-16 18:35:22,528 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 18:35:25,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=55220.0, ans=0.1 +2024-09-16 18:35:34,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=55220.0, ans=0.2 +2024-09-16 18:35:43,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=55260.0, ans=0.0 +2024-09-16 18:35:43,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=55260.0, ans=0.0 +2024-09-16 18:35:49,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.80 vs. limit=6.0 +2024-09-16 18:35:56,984 INFO [train.py:1198] (1/2) Epoch 4, batch 250, loss[loss=0.3228, ctc_loss=0.2624, cr_loss=0.4708, attn_decoder_loss=0.319, over 29262.00 frames. ], tot_loss[loss=0.3122, ctc_loss=0.2514, cr_loss=0.4424, attn_decoder_loss=0.3091, over 4140471.40 frames. ], batch size: 100, lr: 2.71e-02, grad_scale: 4.0 +2024-09-16 18:36:03,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=55300.0, ans=0.07 +2024-09-16 18:36:09,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.38 vs. limit=15.0 +2024-09-16 18:36:22,542 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.428e+01 1.364e+02 1.529e+02 1.729e+02 3.264e+02, threshold=3.057e+02, percent-clipped=1.0 +2024-09-16 18:36:35,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=55380.0, ans=0.025 +2024-09-16 18:36:35,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=55380.0, ans=0.125 +2024-09-16 18:36:59,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=55460.0, ans=0.125 +2024-09-16 18:37:12,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.84 vs. limit=15.0 +2024-09-16 18:37:14,449 INFO [train.py:1198] (1/2) Epoch 4, batch 300, loss[loss=0.3305, ctc_loss=0.2663, cr_loss=0.4438, attn_decoder_loss=0.3278, over 29494.00 frames. ], tot_loss[loss=0.3118, ctc_loss=0.2511, cr_loss=0.4419, attn_decoder_loss=0.3087, over 4509608.61 frames. ], batch size: 92, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:37:18,357 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.54 vs. limit=15.0 +2024-09-16 18:37:41,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=55540.0, ans=0.1 +2024-09-16 18:37:46,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=55580.0, ans=0.125 +2024-09-16 18:37:47,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=55580.0, ans=0.125 +2024-09-16 18:38:29,982 INFO [train.py:1198] (1/2) Epoch 4, batch 350, loss[loss=0.2648, ctc_loss=0.2034, cr_loss=0.3633, attn_decoder_loss=0.2635, over 29312.00 frames. ], tot_loss[loss=0.3117, ctc_loss=0.251, cr_loss=0.4422, attn_decoder_loss=0.3086, over 4794815.25 frames. ], batch size: 71, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:38:37,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=55700.0, ans=0.125 +2024-09-16 18:38:54,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.92 vs. limit=10.0 +2024-09-16 18:38:55,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=55740.0, ans=0.07 +2024-09-16 18:38:59,297 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.063e+02 1.338e+02 1.528e+02 1.849e+02 4.816e+02, threshold=3.056e+02, percent-clipped=1.0 +2024-09-16 18:39:09,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.54 vs. limit=15.0 +2024-09-16 18:39:22,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=55820.0, ans=0.1 +2024-09-16 18:39:30,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.51 vs. limit=15.0 +2024-09-16 18:39:31,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=55860.0, ans=0.09899494936611666 +2024-09-16 18:39:46,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=55900.0, ans=0.0 +2024-09-16 18:39:47,899 INFO [train.py:1198] (1/2) Epoch 4, batch 400, loss[loss=0.3152, ctc_loss=0.2561, cr_loss=0.4715, attn_decoder_loss=0.3113, over 29701.00 frames. ], tot_loss[loss=0.3106, ctc_loss=0.2494, cr_loss=0.4415, attn_decoder_loss=0.3076, over 5024418.47 frames. ], batch size: 82, lr: 2.70e-02, grad_scale: 8.0 +2024-09-16 18:39:49,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=55900.0, ans=0.2 +2024-09-16 18:39:54,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=55900.0, ans=0.125 +2024-09-16 18:40:10,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=55940.0, ans=0.1 +2024-09-16 18:40:13,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=55940.0, ans=0.125 +2024-09-16 18:40:27,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=55980.0, ans=0.125 +2024-09-16 18:40:48,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.51 vs. limit=12.0 +2024-09-16 18:41:05,939 INFO [train.py:1198] (1/2) Epoch 4, batch 450, loss[loss=0.309, ctc_loss=0.2412, cr_loss=0.432, attn_decoder_loss=0.3069, over 29684.00 frames. ], tot_loss[loss=0.3105, ctc_loss=0.2494, cr_loss=0.4418, attn_decoder_loss=0.3075, over 5187008.35 frames. ], batch size: 83, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:41:34,587 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.042e+02 1.288e+02 1.422e+02 1.644e+02 6.882e+02, threshold=2.845e+02, percent-clipped=3.0 +2024-09-16 18:41:53,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=14.42 vs. limit=15.0 +2024-09-16 18:42:03,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=56220.0, ans=0.2 +2024-09-16 18:42:15,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.65 vs. limit=12.0 +2024-09-16 18:42:15,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=56260.0, ans=0.125 +2024-09-16 18:42:21,532 INFO [train.py:1198] (1/2) Epoch 4, batch 500, loss[loss=0.3301, ctc_loss=0.2654, cr_loss=0.4564, attn_decoder_loss=0.3271, over 29452.00 frames. ], tot_loss[loss=0.3093, ctc_loss=0.2481, cr_loss=0.4407, attn_decoder_loss=0.3063, over 5328453.18 frames. ], batch size: 94, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:42:33,202 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.72 vs. limit=15.0 +2024-09-16 18:42:38,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=56340.0, ans=0.1 +2024-09-16 18:42:45,521 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.98 vs. limit=10.0 +2024-09-16 18:42:48,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=56340.0, ans=0.0 +2024-09-16 18:42:52,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=56380.0, ans=0.5 +2024-09-16 18:42:58,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=56380.0, ans=0.5 +2024-09-16 18:43:12,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=56420.0, ans=0.1 +2024-09-16 18:43:33,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=56460.0, ans=0.1 +2024-09-16 18:43:37,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=56500.0, ans=0.125 +2024-09-16 18:43:37,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=56500.0, ans=0.125 +2024-09-16 18:43:38,983 INFO [train.py:1198] (1/2) Epoch 4, batch 550, loss[loss=0.3381, ctc_loss=0.2802, cr_loss=0.4876, attn_decoder_loss=0.3337, over 28815.00 frames. ], tot_loss[loss=0.3097, ctc_loss=0.2486, cr_loss=0.4401, attn_decoder_loss=0.3067, over 5421303.24 frames. ], batch size: 104, lr: 2.69e-02, grad_scale: 8.0 +2024-09-16 18:43:52,304 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.36 vs. limit=22.5 +2024-09-16 18:44:03,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=56540.0, ans=0.0 +2024-09-16 18:44:05,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=56540.0, ans=0.0 +2024-09-16 18:44:09,213 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.307e+02 1.429e+02 1.661e+02 4.927e+02, threshold=2.859e+02, percent-clipped=1.0 +2024-09-16 18:44:36,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=56620.0, ans=0.125 +2024-09-16 18:44:39,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=56660.0, ans=0.1 +2024-09-16 18:44:47,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.45 vs. limit=15.0 +2024-09-16 18:44:49,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=56660.0, ans=0.1 +2024-09-16 18:44:56,983 INFO [train.py:1198] (1/2) Epoch 4, batch 600, loss[loss=0.3324, ctc_loss=0.2746, cr_loss=0.4422, attn_decoder_loss=0.329, over 29236.00 frames. ], tot_loss[loss=0.3099, ctc_loss=0.2484, cr_loss=0.4414, attn_decoder_loss=0.3069, over 5508279.28 frames. ], batch size: 100, lr: 2.68e-02, grad_scale: 8.0 +2024-09-16 18:45:15,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=56740.0, ans=0.025 +2024-09-16 18:45:16,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=56740.0, ans=0.125 +2024-09-16 18:45:24,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=56740.0, ans=0.125 +2024-09-16 18:45:25,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=56780.0, ans=0.1 +2024-09-16 18:46:12,467 INFO [train.py:1198] (1/2) Epoch 4, batch 650, loss[loss=0.311, ctc_loss=0.249, cr_loss=0.4257, attn_decoder_loss=0.3085, over 29766.00 frames. ], tot_loss[loss=0.3089, ctc_loss=0.2471, cr_loss=0.44, attn_decoder_loss=0.306, over 5585372.46 frames. ], batch size: 81, lr: 2.68e-02, grad_scale: 4.0 +2024-09-16 18:46:21,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=56900.0, ans=0.125 +2024-09-16 18:46:29,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=56940.0, ans=0.95 +2024-09-16 18:46:41,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=56980.0, ans=0.125 +2024-09-16 18:46:46,223 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.050e+02 1.273e+02 1.380e+02 1.624e+02 3.709e+02, threshold=2.760e+02, percent-clipped=3.0 +2024-09-16 18:46:49,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=56980.0, ans=0.5 +2024-09-16 18:46:54,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=56980.0, ans=0.1 +2024-09-16 18:47:06,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.83 vs. limit=10.0 +2024-09-16 18:47:30,025 INFO [train.py:1198] (1/2) Epoch 4, batch 700, loss[loss=0.2906, ctc_loss=0.2285, cr_loss=0.4173, attn_decoder_loss=0.2883, over 29531.00 frames. ], tot_loss[loss=0.3095, ctc_loss=0.2477, cr_loss=0.4408, attn_decoder_loss=0.3066, over 5635399.26 frames. ], batch size: 76, lr: 2.67e-02, grad_scale: 8.0 +2024-09-16 18:47:38,589 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.33 vs. limit=6.0 +2024-09-16 18:47:46,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=57140.0, ans=0.125 +2024-09-16 18:47:47,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.00 vs. limit=10.0 +2024-09-16 18:47:54,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.11 vs. limit=15.0 +2024-09-16 18:47:57,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=57140.0, ans=0.0 +2024-09-16 18:48:02,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=57180.0, ans=0.125 +2024-09-16 18:48:08,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=57180.0, ans=0.2 +2024-09-16 18:48:16,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=57220.0, ans=0.1 +2024-09-16 18:48:20,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=6.23 vs. limit=12.0 +2024-09-16 18:48:37,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=57260.0, ans=0.0 +2024-09-16 18:48:40,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=57260.0, ans=0.1 +2024-09-16 18:48:43,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=57260.0, ans=15.0 +2024-09-16 18:48:46,078 INFO [train.py:1198] (1/2) Epoch 4, batch 750, loss[loss=0.3241, ctc_loss=0.2541, cr_loss=0.4501, attn_decoder_loss=0.3219, over 29717.00 frames. ], tot_loss[loss=0.3088, ctc_loss=0.2471, cr_loss=0.4402, attn_decoder_loss=0.3059, over 5674634.65 frames. ], batch size: 82, lr: 2.67e-02, grad_scale: 4.0 +2024-09-16 18:48:52,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=57300.0, ans=0.0 +2024-09-16 18:49:13,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=57340.0, ans=0.125 +2024-09-16 18:49:21,183 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.034e+02 1.371e+02 1.558e+02 1.817e+02 5.424e+02, threshold=3.116e+02, percent-clipped=2.0 +2024-09-16 18:49:23,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=57380.0, ans=0.125 +2024-09-16 18:49:41,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=57420.0, ans=0.0 +2024-09-16 18:49:54,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=57460.0, ans=0.125 +2024-09-16 18:50:03,607 INFO [train.py:1198] (1/2) Epoch 4, batch 800, loss[loss=0.2801, ctc_loss=0.22, cr_loss=0.3866, attn_decoder_loss=0.2782, over 29598.00 frames. ], tot_loss[loss=0.3083, ctc_loss=0.2466, cr_loss=0.4398, attn_decoder_loss=0.3054, over 5705113.11 frames. ], batch size: 73, lr: 2.67e-02, grad_scale: 8.0 +2024-09-16 18:50:20,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=57540.0, ans=0.125 +2024-09-16 18:50:35,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=57580.0, ans=0.0 +2024-09-16 18:50:36,078 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.84 vs. limit=15.0 +2024-09-16 18:50:47,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.21 vs. limit=22.5 +2024-09-16 18:50:52,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=57620.0, ans=0.5 +2024-09-16 18:51:06,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=57660.0, ans=0.125 +2024-09-16 18:51:20,830 INFO [train.py:1198] (1/2) Epoch 4, batch 850, loss[loss=0.3179, ctc_loss=0.2498, cr_loss=0.4674, attn_decoder_loss=0.3151, over 29698.00 frames. ], tot_loss[loss=0.308, ctc_loss=0.246, cr_loss=0.4398, attn_decoder_loss=0.3051, over 5735864.04 frames. ], batch size: 89, lr: 2.66e-02, grad_scale: 4.0 +2024-09-16 18:51:24,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=57700.0, ans=0.125 +2024-09-16 18:51:32,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=57700.0, ans=0.125 +2024-09-16 18:51:52,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=57780.0, ans=0.0 +2024-09-16 18:51:55,364 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.098e+02 1.339e+02 1.546e+02 1.753e+02 3.025e+02, threshold=3.091e+02, percent-clipped=0.0 +2024-09-16 18:51:57,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=57780.0, ans=0.125 +2024-09-16 18:52:36,400 INFO [train.py:1198] (1/2) Epoch 4, batch 900, loss[loss=0.2811, ctc_loss=0.2172, cr_loss=0.437, attn_decoder_loss=0.2785, over 29581.00 frames. ], tot_loss[loss=0.3084, ctc_loss=0.2467, cr_loss=0.4408, attn_decoder_loss=0.3055, over 5740917.23 frames. ], batch size: 73, lr: 2.66e-02, grad_scale: 8.0 +2024-09-16 18:52:47,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=57900.0, ans=0.1 +2024-09-16 18:53:05,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=57940.0, ans=0.1 +2024-09-16 18:53:41,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=58060.0, ans=0.0 +2024-09-16 18:53:52,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=58100.0, ans=0.1 +2024-09-16 18:53:53,340 INFO [train.py:1198] (1/2) Epoch 4, batch 950, loss[loss=0.2802, ctc_loss=0.2192, cr_loss=0.3988, attn_decoder_loss=0.2781, over 29523.00 frames. ], tot_loss[loss=0.3091, ctc_loss=0.2476, cr_loss=0.4418, attn_decoder_loss=0.3061, over 5743885.03 frames. ], batch size: 74, lr: 2.66e-02, grad_scale: 4.0 +2024-09-16 18:53:54,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.00 vs. limit=15.0 +2024-09-16 18:54:29,621 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.037e+02 1.318e+02 1.459e+02 1.683e+02 8.183e+02, threshold=2.918e+02, percent-clipped=3.0 +2024-09-16 18:55:08,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=58260.0, ans=0.0 +2024-09-16 18:55:10,833 INFO [train.py:1198] (1/2) Epoch 4, batch 1000, loss[loss=0.2834, ctc_loss=0.2138, cr_loss=0.4122, attn_decoder_loss=0.2819, over 29511.00 frames. ], tot_loss[loss=0.3095, ctc_loss=0.2478, cr_loss=0.4417, attn_decoder_loss=0.3065, over 5737047.40 frames. ], batch size: 77, lr: 2.65e-02, grad_scale: 8.0 +2024-09-16 18:55:24,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=58340.0, ans=0.0 +2024-09-16 18:55:42,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=58380.0, ans=0.0 +2024-09-16 18:55:46,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.93 vs. limit=6.0 +2024-09-16 18:55:49,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=58380.0, ans=0.2 +2024-09-16 18:55:52,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=58380.0, ans=0.0 +2024-09-16 18:55:52,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.29 vs. limit=15.0 +2024-09-16 18:55:53,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.04 vs. limit=10.0 +2024-09-16 18:56:28,231 INFO [train.py:1198] (1/2) Epoch 4, batch 1050, loss[loss=0.3264, ctc_loss=0.2681, cr_loss=0.454, attn_decoder_loss=0.3228, over 29683.00 frames. ], tot_loss[loss=0.3078, ctc_loss=0.2461, cr_loss=0.44, attn_decoder_loss=0.3049, over 5744033.92 frames. ], batch size: 85, lr: 2.65e-02, grad_scale: 4.0 +2024-09-16 18:56:54,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=58540.0, ans=0.125 +2024-09-16 18:56:55,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=58540.0, ans=0.125 +2024-09-16 18:56:55,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=58540.0, ans=0.0 +2024-09-16 18:57:06,149 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.039e+02 1.263e+02 1.458e+02 1.745e+02 4.654e+02, threshold=2.917e+02, percent-clipped=3.0 +2024-09-16 18:57:13,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=58620.0, ans=0.125 +2024-09-16 18:57:20,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.35 vs. limit=6.0 +2024-09-16 18:57:33,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=58660.0, ans=0.125 +2024-09-16 18:57:43,659 INFO [train.py:1198] (1/2) Epoch 4, batch 1100, loss[loss=0.3102, ctc_loss=0.2473, cr_loss=0.4598, attn_decoder_loss=0.307, over 29433.00 frames. ], tot_loss[loss=0.3072, ctc_loss=0.2453, cr_loss=0.4402, attn_decoder_loss=0.3043, over 5755581.84 frames. ], batch size: 78, lr: 2.65e-02, grad_scale: 8.0 +2024-09-16 18:58:01,140 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.91 vs. limit=22.5 +2024-09-16 18:58:29,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=58820.0, ans=0.125 +2024-09-16 18:59:01,079 INFO [train.py:1198] (1/2) Epoch 4, batch 1150, loss[loss=0.3074, ctc_loss=0.2486, cr_loss=0.453, attn_decoder_loss=0.3039, over 29453.00 frames. ], tot_loss[loss=0.3078, ctc_loss=0.246, cr_loss=0.4407, attn_decoder_loss=0.3049, over 5755397.91 frames. ], batch size: 78, lr: 2.64e-02, grad_scale: 4.0 +2024-09-16 18:59:04,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=58900.0, ans=0.125 +2024-09-16 18:59:15,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=58940.0, ans=0.125 +2024-09-16 18:59:23,179 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.75 vs. limit=15.0 +2024-09-16 18:59:25,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=58940.0, ans=0.125 +2024-09-16 18:59:30,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=58980.0, ans=0.09899494936611666 +2024-09-16 18:59:40,711 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.556e+01 1.271e+02 1.479e+02 1.697e+02 4.647e+02, threshold=2.959e+02, percent-clipped=3.0 +2024-09-16 18:59:50,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=59020.0, ans=0.0 +2024-09-16 18:59:56,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=59020.0, ans=0.0 +2024-09-16 19:00:18,989 INFO [train.py:1198] (1/2) Epoch 4, batch 1200, loss[loss=0.3082, ctc_loss=0.2321, cr_loss=0.4198, attn_decoder_loss=0.3074, over 29690.00 frames. ], tot_loss[loss=0.3087, ctc_loss=0.2467, cr_loss=0.4413, attn_decoder_loss=0.3057, over 5748009.67 frames. ], batch size: 85, lr: 2.64e-02, grad_scale: 8.0 +2024-09-16 19:01:02,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=12.0 +2024-09-16 19:01:03,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=59220.0, ans=0.1 +2024-09-16 19:01:09,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=59220.0, ans=0.1 +2024-09-16 19:01:25,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-16 19:01:34,922 INFO [train.py:1198] (1/2) Epoch 4, batch 1250, loss[loss=0.3189, ctc_loss=0.2482, cr_loss=0.4318, attn_decoder_loss=0.3171, over 29530.00 frames. ], tot_loss[loss=0.309, ctc_loss=0.2468, cr_loss=0.4425, attn_decoder_loss=0.3061, over 5775614.23 frames. ], batch size: 92, lr: 2.63e-02, grad_scale: 4.0 +2024-09-16 19:02:03,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=59380.0, ans=0.5 +2024-09-16 19:02:15,815 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.032e+02 1.296e+02 1.466e+02 1.683e+02 4.153e+02, threshold=2.932e+02, percent-clipped=2.0 +2024-09-16 19:02:17,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=59380.0, ans=0.125 +2024-09-16 19:02:39,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=59460.0, ans=0.1 +2024-09-16 19:02:52,639 INFO [train.py:1198] (1/2) Epoch 4, batch 1300, loss[loss=0.3163, ctc_loss=0.2595, cr_loss=0.4401, attn_decoder_loss=0.3129, over 28362.00 frames. ], tot_loss[loss=0.3076, ctc_loss=0.2453, cr_loss=0.4412, attn_decoder_loss=0.3047, over 5779063.11 frames. ], batch size: 111, lr: 2.63e-02, grad_scale: 8.0 +2024-09-16 19:03:00,674 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:03:23,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=59580.0, ans=0.0 +2024-09-16 19:03:27,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=59580.0, ans=0.125 +2024-09-16 19:04:08,493 INFO [train.py:1198] (1/2) Epoch 4, batch 1350, loss[loss=0.3093, ctc_loss=0.2538, cr_loss=0.4366, attn_decoder_loss=0.3058, over 29752.00 frames. ], tot_loss[loss=0.307, ctc_loss=0.2442, cr_loss=0.4407, attn_decoder_loss=0.3042, over 5797097.11 frames. ], batch size: 81, lr: 2.63e-02, grad_scale: 4.0 +2024-09-16 19:04:15,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=59700.0, ans=0.125 +2024-09-16 19:04:21,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=59700.0, ans=0.1 +2024-09-16 19:04:44,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=59780.0, ans=0.0 +2024-09-16 19:04:45,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=59780.0, ans=0.2 +2024-09-16 19:04:47,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.50 vs. limit=15.0 +2024-09-16 19:04:51,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=59780.0, ans=0.0 +2024-09-16 19:04:52,258 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.037e+02 1.260e+02 1.419e+02 1.691e+02 3.213e+02, threshold=2.838e+02, percent-clipped=1.0 +2024-09-16 19:05:02,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.09 vs. limit=22.5 +2024-09-16 19:05:04,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff2.min_abs, batch_count=59820.0, ans=0.1 +2024-09-16 19:05:24,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=59900.0, ans=0.0 +2024-09-16 19:05:25,807 INFO [train.py:1198] (1/2) Epoch 4, batch 1400, loss[loss=0.2645, ctc_loss=0.1959, cr_loss=0.3777, attn_decoder_loss=0.2637, over 29570.00 frames. ], tot_loss[loss=0.3067, ctc_loss=0.2436, cr_loss=0.4402, attn_decoder_loss=0.304, over 5808160.36 frames. ], batch size: 69, lr: 2.62e-02, grad_scale: 8.0 +2024-09-16 19:05:32,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=59900.0, ans=0.125 +2024-09-16 19:05:41,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=59940.0, ans=0.125 +2024-09-16 19:05:46,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.99 vs. limit=22.5 +2024-09-16 19:05:51,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=59940.0, ans=0.125 +2024-09-16 19:06:02,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=59980.0, ans=0.125 +2024-09-16 19:06:26,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=60060.0, ans=0.2 +2024-09-16 19:06:29,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=60060.0, ans=0.2 +2024-09-16 19:06:38,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.63 vs. limit=22.5 +2024-09-16 19:06:43,762 INFO [train.py:1198] (1/2) Epoch 4, batch 1450, loss[loss=0.3081, ctc_loss=0.2406, cr_loss=0.4614, attn_decoder_loss=0.3054, over 29437.00 frames. ], tot_loss[loss=0.3074, ctc_loss=0.2442, cr_loss=0.4411, attn_decoder_loss=0.3046, over 5804177.16 frames. ], batch size: 94, lr: 2.62e-02, grad_scale: 4.0 +2024-09-16 19:07:01,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten.whitening_limit, batch_count=60140.0, ans=15.0 +2024-09-16 19:07:21,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=60180.0, ans=0.0 +2024-09-16 19:07:24,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=60180.0, ans=0.0 +2024-09-16 19:07:26,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=60180.0, ans=0.125 +2024-09-16 19:07:26,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=60180.0, ans=0.09899494936611666 +2024-09-16 19:07:27,554 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.279e+02 1.464e+02 1.663e+02 3.366e+02, threshold=2.927e+02, percent-clipped=3.0 +2024-09-16 19:07:28,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.65 vs. limit=15.0 +2024-09-16 19:07:36,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=60220.0, ans=0.125 +2024-09-16 19:07:38,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.61 vs. limit=15.0 +2024-09-16 19:07:57,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=60300.0, ans=0.1 +2024-09-16 19:07:59,067 INFO [train.py:1198] (1/2) Epoch 4, batch 1500, loss[loss=0.3173, ctc_loss=0.2494, cr_loss=0.456, attn_decoder_loss=0.3147, over 29635.00 frames. ], tot_loss[loss=0.3074, ctc_loss=0.2439, cr_loss=0.4417, attn_decoder_loss=0.3046, over 5805261.19 frames. ], batch size: 86, lr: 2.62e-02, grad_scale: 8.0 +2024-09-16 19:08:25,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=60340.0, ans=0.125 +2024-09-16 19:08:27,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=60340.0, ans=0.125 +2024-09-16 19:08:48,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=60420.0, ans=0.0 +2024-09-16 19:08:53,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=60420.0, ans=0.1 +2024-09-16 19:09:00,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=60460.0, ans=0.125 +2024-09-16 19:09:14,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=60460.0, ans=0.125 +2024-09-16 19:09:14,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=60460.0, ans=0.125 +2024-09-16 19:09:16,976 INFO [train.py:1198] (1/2) Epoch 4, batch 1550, loss[loss=0.3228, ctc_loss=0.2648, cr_loss=0.4734, attn_decoder_loss=0.3187, over 29528.00 frames. ], tot_loss[loss=0.3076, ctc_loss=0.2446, cr_loss=0.4411, attn_decoder_loss=0.3047, over 5780493.16 frames. ], batch size: 90, lr: 2.61e-02, grad_scale: 4.0 +2024-09-16 19:09:17,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=60500.0, ans=0.125 +2024-09-16 19:09:17,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=60500.0, ans=0.02 +2024-09-16 19:09:50,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=60580.0, ans=0.1 +2024-09-16 19:10:01,803 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.301e+02 1.510e+02 1.822e+02 6.597e+02, threshold=3.020e+02, percent-clipped=6.0 +2024-09-16 19:10:26,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=60660.0, ans=0.125 +2024-09-16 19:10:29,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=60660.0, ans=0.125 +2024-09-16 19:10:34,137 INFO [train.py:1198] (1/2) Epoch 4, batch 1600, loss[loss=0.3053, ctc_loss=0.2332, cr_loss=0.4513, attn_decoder_loss=0.3033, over 29704.00 frames. ], tot_loss[loss=0.3077, ctc_loss=0.245, cr_loss=0.4416, attn_decoder_loss=0.3048, over 5763431.07 frames. ], batch size: 85, lr: 2.61e-02, grad_scale: 8.0 +2024-09-16 19:10:35,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=60700.0, ans=0.125 +2024-09-16 19:10:56,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=60740.0, ans=0.125 +2024-09-16 19:10:56,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=60740.0, ans=0.1 +2024-09-16 19:11:00,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=60740.0, ans=0.0 +2024-09-16 19:11:18,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=60820.0, ans=0.0 +2024-09-16 19:11:26,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=60820.0, ans=0.1 +2024-09-16 19:11:33,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=60860.0, ans=0.0 +2024-09-16 19:11:39,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=60860.0, ans=0.0 +2024-09-16 19:11:47,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=60860.0, ans=0.125 +2024-09-16 19:11:50,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=60900.0, ans=0.125 +2024-09-16 19:11:52,014 INFO [train.py:1198] (1/2) Epoch 4, batch 1650, loss[loss=0.3086, ctc_loss=0.2404, cr_loss=0.4363, attn_decoder_loss=0.3065, over 29703.00 frames. ], tot_loss[loss=0.3075, ctc_loss=0.2452, cr_loss=0.4414, attn_decoder_loss=0.3046, over 5757319.16 frames. ], batch size: 89, lr: 2.61e-02, grad_scale: 4.0 +2024-09-16 19:11:57,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=60900.0, ans=0.0 +2024-09-16 19:11:58,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=60900.0, ans=0.0 +2024-09-16 19:12:02,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.03 vs. limit=15.0 +2024-09-16 19:12:05,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=60940.0, ans=0.0 +2024-09-16 19:12:11,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=60940.0, ans=0.2 +2024-09-16 19:12:23,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=60980.0, ans=0.125 +2024-09-16 19:12:27,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=60980.0, ans=0.125 +2024-09-16 19:12:38,885 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.008e+02 1.275e+02 1.417e+02 1.655e+02 4.421e+02, threshold=2.835e+02, percent-clipped=2.0 +2024-09-16 19:12:39,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=61020.0, ans=0.125 +2024-09-16 19:12:40,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=61020.0, ans=0.125 +2024-09-16 19:12:57,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=61060.0, ans=0.2 +2024-09-16 19:13:07,420 INFO [train.py:1198] (1/2) Epoch 4, batch 1700, loss[loss=0.2745, ctc_loss=0.2108, cr_loss=0.3969, attn_decoder_loss=0.2727, over 29582.00 frames. ], tot_loss[loss=0.3072, ctc_loss=0.2446, cr_loss=0.4408, attn_decoder_loss=0.3044, over 5778104.86 frames. ], batch size: 69, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:13:32,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.88 vs. limit=12.0 +2024-09-16 19:13:33,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=61140.0, ans=0.5 +2024-09-16 19:13:34,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=61140.0, ans=0.125 +2024-09-16 19:13:40,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=61180.0, ans=0.125 +2024-09-16 19:13:51,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=61220.0, ans=0.025 +2024-09-16 19:13:56,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=61220.0, ans=0.125 +2024-09-16 19:13:57,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=61220.0, ans=0.125 +2024-09-16 19:14:04,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=3.78 vs. limit=15.0 +2024-09-16 19:14:12,731 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:14:24,994 INFO [train.py:1198] (1/2) Epoch 4, batch 1750, loss[loss=0.2718, ctc_loss=0.2126, cr_loss=0.4106, attn_decoder_loss=0.2692, over 29334.00 frames. ], tot_loss[loss=0.3062, ctc_loss=0.243, cr_loss=0.4405, attn_decoder_loss=0.3034, over 5785249.16 frames. ], batch size: 67, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:14:31,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=61300.0, ans=0.2 +2024-09-16 19:14:49,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=61340.0, ans=0.0 +2024-09-16 19:15:07,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=61380.0, ans=0.125 +2024-09-16 19:15:11,788 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.273e+01 1.237e+02 1.382e+02 1.538e+02 2.452e+02, threshold=2.764e+02, percent-clipped=0.0 +2024-09-16 19:15:34,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=61460.0, ans=0.125 +2024-09-16 19:15:42,002 INFO [train.py:1198] (1/2) Epoch 4, batch 1800, loss[loss=0.2984, ctc_loss=0.23, cr_loss=0.4199, attn_decoder_loss=0.2966, over 29690.00 frames. ], tot_loss[loss=0.3064, ctc_loss=0.2434, cr_loss=0.4406, attn_decoder_loss=0.3036, over 5788963.87 frames. ], batch size: 83, lr: 2.60e-02, grad_scale: 8.0 +2024-09-16 19:16:07,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn2.whiten.whitening_limit, batch_count=61540.0, ans=22.5 +2024-09-16 19:16:57,521 INFO [train.py:1198] (1/2) Epoch 4, batch 1850, loss[loss=0.3143, ctc_loss=0.2429, cr_loss=0.4345, attn_decoder_loss=0.3126, over 29626.00 frames. ], tot_loss[loss=0.3062, ctc_loss=0.2428, cr_loss=0.4406, attn_decoder_loss=0.3035, over 5795526.93 frames. ], batch size: 86, lr: 2.59e-02, grad_scale: 4.0 +2024-09-16 19:17:00,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=61700.0, ans=0.0 +2024-09-16 19:17:29,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=61780.0, ans=0.1 +2024-09-16 19:17:32,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=61780.0, ans=0.0 +2024-09-16 19:17:44,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=61820.0, ans=0.2 +2024-09-16 19:17:46,911 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.035e+02 1.284e+02 1.452e+02 1.621e+02 3.527e+02, threshold=2.905e+02, percent-clipped=2.0 +2024-09-16 19:18:12,185 INFO [train.py:1198] (1/2) Epoch 4, batch 1900, loss[loss=0.3074, ctc_loss=0.2457, cr_loss=0.4309, attn_decoder_loss=0.3047, over 29713.00 frames. ], tot_loss[loss=0.3069, ctc_loss=0.2433, cr_loss=0.4414, attn_decoder_loss=0.3041, over 5804183.50 frames. ], batch size: 89, lr: 2.59e-02, grad_scale: 8.0 +2024-09-16 19:18:12,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=61900.0, ans=0.0 +2024-09-16 19:18:29,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=61940.0, ans=0.125 +2024-09-16 19:18:36,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.38 vs. limit=15.0 +2024-09-16 19:18:37,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=61940.0, ans=0.2 +2024-09-16 19:18:49,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=61980.0, ans=0.025 +2024-09-16 19:19:00,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=12.0 +2024-09-16 19:19:13,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=62060.0, ans=0.0 +2024-09-16 19:19:22,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.04 vs. limit=22.5 +2024-09-16 19:19:23,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=62060.0, ans=0.125 +2024-09-16 19:19:31,268 INFO [train.py:1198] (1/2) Epoch 4, batch 1950, loss[loss=0.2962, ctc_loss=0.2276, cr_loss=0.4154, attn_decoder_loss=0.2946, over 29416.00 frames. ], tot_loss[loss=0.3084, ctc_loss=0.2446, cr_loss=0.4435, attn_decoder_loss=0.3056, over 5818808.96 frames. ], batch size: 78, lr: 2.59e-02, grad_scale: 4.0 +2024-09-16 19:19:43,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=62100.0, ans=0.1 +2024-09-16 19:19:45,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=62140.0, ans=0.125 +2024-09-16 19:19:48,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=62140.0, ans=0.1 +2024-09-16 19:19:52,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=62140.0, ans=0.1 +2024-09-16 19:19:53,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.08 vs. limit=10.0 +2024-09-16 19:20:06,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=62180.0, ans=0.0 +2024-09-16 19:20:16,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=62220.0, ans=0.125 +2024-09-16 19:20:22,228 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.204e+02 1.396e+02 1.540e+02 6.321e+02, threshold=2.792e+02, percent-clipped=2.0 +2024-09-16 19:20:28,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=62220.0, ans=0.125 +2024-09-16 19:20:33,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=62260.0, ans=0.125 +2024-09-16 19:20:46,381 INFO [train.py:1198] (1/2) Epoch 4, batch 2000, loss[loss=0.2667, ctc_loss=0.2004, cr_loss=0.3754, attn_decoder_loss=0.2657, over 29343.00 frames. ], tot_loss[loss=0.3092, ctc_loss=0.2455, cr_loss=0.4443, attn_decoder_loss=0.3064, over 5798129.19 frames. ], batch size: 67, lr: 2.58e-02, grad_scale: 8.0 +2024-09-16 19:21:00,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=62340.0, ans=0.2 +2024-09-16 19:21:08,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=62340.0, ans=0.1 +2024-09-16 19:21:25,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.25 vs. limit=10.0 +2024-09-16 19:22:02,148 INFO [train.py:1198] (1/2) Epoch 4, batch 2050, loss[loss=0.2679, ctc_loss=0.2009, cr_loss=0.4105, attn_decoder_loss=0.2662, over 29440.00 frames. ], tot_loss[loss=0.3077, ctc_loss=0.2436, cr_loss=0.442, attn_decoder_loss=0.305, over 5791235.76 frames. ], batch size: 70, lr: 2.58e-02, grad_scale: 4.0 +2024-09-16 19:22:22,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=62540.0, ans=0.0 +2024-09-16 19:22:54,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=62620.0, ans=0.0 +2024-09-16 19:22:55,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=62620.0, ans=0.0 +2024-09-16 19:22:57,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.877e+01 1.306e+02 1.501e+02 1.885e+02 4.145e+02, threshold=3.002e+02, percent-clipped=3.0 +2024-09-16 19:22:59,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.86 vs. limit=15.0 +2024-09-16 19:23:13,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=7.34 vs. limit=12.0 +2024-09-16 19:23:14,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=62660.0, ans=15.0 +2024-09-16 19:23:21,639 INFO [train.py:1198] (1/2) Epoch 4, batch 2100, loss[loss=0.3166, ctc_loss=0.254, cr_loss=0.4655, attn_decoder_loss=0.3132, over 29753.00 frames. ], tot_loss[loss=0.3069, ctc_loss=0.2427, cr_loss=0.4409, attn_decoder_loss=0.3042, over 5802875.60 frames. ], batch size: 81, lr: 2.58e-02, grad_scale: 8.0 +2024-09-16 19:23:40,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=62740.0, ans=0.025 +2024-09-16 19:23:41,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=62740.0, ans=0.05 +2024-09-16 19:23:42,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.88 vs. limit=10.0 +2024-09-16 19:24:09,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=62820.0, ans=0.0 +2024-09-16 19:24:26,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=62860.0, ans=0.125 +2024-09-16 19:24:36,628 INFO [train.py:1198] (1/2) Epoch 4, batch 2150, loss[loss=0.2843, ctc_loss=0.2102, cr_loss=0.428, attn_decoder_loss=0.283, over 29433.00 frames. ], tot_loss[loss=0.3058, ctc_loss=0.2411, cr_loss=0.4401, attn_decoder_loss=0.3032, over 5817465.91 frames. ], batch size: 78, lr: 2.57e-02, grad_scale: 4.0 +2024-09-16 19:24:45,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.66 vs. limit=6.0 +2024-09-16 19:25:24,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.54 vs. limit=22.5 +2024-09-16 19:25:31,031 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.004e+02 1.239e+02 1.413e+02 1.658e+02 2.671e+02, threshold=2.826e+02, percent-clipped=0.0 +2024-09-16 19:25:35,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.80 vs. limit=15.0 +2024-09-16 19:25:38,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=63060.0, ans=0.125 +2024-09-16 19:25:44,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=63060.0, ans=0.2 +2024-09-16 19:25:52,109 INFO [train.py:1198] (1/2) Epoch 4, batch 2200, loss[loss=0.3166, ctc_loss=0.2428, cr_loss=0.4194, attn_decoder_loss=0.3155, over 29619.00 frames. ], tot_loss[loss=0.3056, ctc_loss=0.2411, cr_loss=0.4399, attn_decoder_loss=0.303, over 5813998.16 frames. ], batch size: 86, lr: 2.57e-02, grad_scale: 8.0 +2024-09-16 19:26:04,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.20 vs. limit=15.0 +2024-09-16 19:26:23,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=63180.0, ans=0.0 +2024-09-16 19:26:37,328 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:27:09,942 INFO [train.py:1198] (1/2) Epoch 4, batch 2250, loss[loss=0.3077, ctc_loss=0.2335, cr_loss=0.415, attn_decoder_loss=0.3067, over 29689.00 frames. ], tot_loss[loss=0.3053, ctc_loss=0.2409, cr_loss=0.4393, attn_decoder_loss=0.3027, over 5813534.01 frames. ], batch size: 82, lr: 2.57e-02, grad_scale: 4.0 +2024-09-16 19:27:31,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.66 vs. limit=22.5 +2024-09-16 19:27:40,098 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.01 vs. limit=22.5 +2024-09-16 19:27:51,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=63380.0, ans=0.035 +2024-09-16 19:28:03,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=63420.0, ans=0.0 +2024-09-16 19:28:07,574 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.060e+02 1.265e+02 1.418e+02 1.691e+02 4.004e+02, threshold=2.836e+02, percent-clipped=3.0 +2024-09-16 19:28:15,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=63460.0, ans=0.125 +2024-09-16 19:28:21,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=63460.0, ans=0.125 +2024-09-16 19:28:27,219 INFO [train.py:1198] (1/2) Epoch 4, batch 2300, loss[loss=0.2663, ctc_loss=0.1986, cr_loss=0.4094, attn_decoder_loss=0.2647, over 29735.00 frames. ], tot_loss[loss=0.3046, ctc_loss=0.2402, cr_loss=0.4383, attn_decoder_loss=0.302, over 5801430.33 frames. ], batch size: 72, lr: 2.56e-02, grad_scale: 8.0 +2024-09-16 19:28:45,300 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:28:47,711 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.69 vs. limit=15.0 +2024-09-16 19:28:49,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=16.29 vs. limit=15.0 +2024-09-16 19:28:49,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=63540.0, ans=0.125 +2024-09-16 19:28:50,385 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.46 vs. limit=6.0 +2024-09-16 19:28:55,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=63580.0, ans=0.025 +2024-09-16 19:29:30,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=63660.0, ans=0.04949747468305833 +2024-09-16 19:29:35,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=63660.0, ans=0.125 +2024-09-16 19:29:42,584 INFO [train.py:1198] (1/2) Epoch 4, batch 2350, loss[loss=0.3237, ctc_loss=0.2604, cr_loss=0.4898, attn_decoder_loss=0.3198, over 29690.00 frames. ], tot_loss[loss=0.3044, ctc_loss=0.2398, cr_loss=0.4379, attn_decoder_loss=0.3019, over 5807368.78 frames. ], batch size: 83, lr: 2.56e-02, grad_scale: 4.0 +2024-09-16 19:29:50,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.74 vs. limit=15.0 +2024-09-16 19:29:55,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=63740.0, ans=0.1 +2024-09-16 19:30:02,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=63740.0, ans=0.125 +2024-09-16 19:30:07,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=63740.0, ans=0.0 +2024-09-16 19:30:13,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=63780.0, ans=0.2 +2024-09-16 19:30:31,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.16 vs. limit=22.5 +2024-09-16 19:30:41,597 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.090e+02 1.383e+02 1.538e+02 1.780e+02 4.486e+02, threshold=3.076e+02, percent-clipped=4.0 +2024-09-16 19:30:49,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=63860.0, ans=0.0 +2024-09-16 19:30:57,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=63860.0, ans=0.125 +2024-09-16 19:30:59,750 INFO [train.py:1198] (1/2) Epoch 4, batch 2400, loss[loss=0.2871, ctc_loss=0.2163, cr_loss=0.4111, attn_decoder_loss=0.2858, over 29557.00 frames. ], tot_loss[loss=0.3048, ctc_loss=0.24, cr_loss=0.4386, attn_decoder_loss=0.3023, over 5810519.27 frames. ], batch size: 76, lr: 2.56e-02, grad_scale: 8.0 +2024-09-16 19:31:04,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=12.60 vs. limit=15.0 +2024-09-16 19:31:23,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=63940.0, ans=0.0 +2024-09-16 19:31:36,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=63980.0, ans=0.025 +2024-09-16 19:31:37,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=63980.0, ans=0.0 +2024-09-16 19:31:51,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.61 vs. limit=15.0 +2024-09-16 19:32:06,342 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.60 vs. limit=22.5 +2024-09-16 19:32:22,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=64060.0, ans=0.125 +2024-09-16 19:32:24,847 INFO [train.py:1198] (1/2) Epoch 4, batch 2450, loss[loss=0.3105, ctc_loss=0.2477, cr_loss=0.4673, attn_decoder_loss=0.307, over 29706.00 frames. ], tot_loss[loss=0.3058, ctc_loss=0.2412, cr_loss=0.4395, attn_decoder_loss=0.3032, over 5786703.73 frames. ], batch size: 82, lr: 2.55e-02, grad_scale: 4.0 +2024-09-16 19:32:26,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=64100.0, ans=0.125 +2024-09-16 19:32:29,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=64100.0, ans=0.0 +2024-09-16 19:32:37,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=64100.0, ans=0.025 +2024-09-16 19:32:45,050 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.13 vs. limit=12.0 +2024-09-16 19:33:20,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=64220.0, ans=0.025 +2024-09-16 19:33:23,220 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.817e+01 1.239e+02 1.387e+02 1.580e+02 7.191e+02, threshold=2.774e+02, percent-clipped=3.0 +2024-09-16 19:33:35,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=64260.0, ans=0.0 +2024-09-16 19:33:39,947 INFO [train.py:1198] (1/2) Epoch 4, batch 2500, loss[loss=0.306, ctc_loss=0.2324, cr_loss=0.4239, attn_decoder_loss=0.3048, over 29640.00 frames. ], tot_loss[loss=0.3057, ctc_loss=0.241, cr_loss=0.4401, attn_decoder_loss=0.3031, over 5797120.07 frames. ], batch size: 86, lr: 2.55e-02, grad_scale: 8.0 +2024-09-16 19:33:53,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=64340.0, ans=0.125 +2024-09-16 19:34:09,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=64340.0, ans=0.0 +2024-09-16 19:34:27,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=64420.0, ans=0.07 +2024-09-16 19:34:35,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=64420.0, ans=0.125 +2024-09-16 19:34:50,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=64460.0, ans=0.125 +2024-09-16 19:34:51,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=64460.0, ans=0.0 +2024-09-16 19:34:59,439 INFO [train.py:1198] (1/2) Epoch 4, batch 2550, loss[loss=0.2674, ctc_loss=0.198, cr_loss=0.3979, attn_decoder_loss=0.2663, over 29358.00 frames. ], tot_loss[loss=0.3056, ctc_loss=0.2406, cr_loss=0.4403, attn_decoder_loss=0.303, over 5799378.92 frames. ], batch size: 67, lr: 2.55e-02, grad_scale: 4.0 +2024-09-16 19:35:06,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.67 vs. limit=6.0 +2024-09-16 19:35:12,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.41 vs. limit=6.0 +2024-09-16 19:35:32,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.11 vs. limit=10.0 +2024-09-16 19:35:37,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=64580.0, ans=0.125 +2024-09-16 19:35:46,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=64620.0, ans=10.0 +2024-09-16 19:36:00,133 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.618e+01 1.258e+02 1.410e+02 1.550e+02 4.677e+02, threshold=2.819e+02, percent-clipped=4.0 +2024-09-16 19:36:08,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.02 vs. limit=10.0 +2024-09-16 19:36:15,314 INFO [train.py:1198] (1/2) Epoch 4, batch 2600, loss[loss=0.2904, ctc_loss=0.2254, cr_loss=0.4119, attn_decoder_loss=0.2884, over 29442.00 frames. ], tot_loss[loss=0.306, ctc_loss=0.2409, cr_loss=0.4403, attn_decoder_loss=0.3034, over 5795990.55 frames. ], batch size: 78, lr: 2.54e-02, grad_scale: 8.0 +2024-09-16 19:36:23,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=64700.0, ans=0.025 +2024-09-16 19:37:27,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=64860.0, ans=0.125 +2024-09-16 19:37:29,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=64900.0, ans=0.0 +2024-09-16 19:37:30,536 INFO [train.py:1198] (1/2) Epoch 4, batch 2650, loss[loss=0.3255, ctc_loss=0.2578, cr_loss=0.4645, attn_decoder_loss=0.3227, over 29294.00 frames. ], tot_loss[loss=0.3061, ctc_loss=0.241, cr_loss=0.4408, attn_decoder_loss=0.3035, over 5801883.51 frames. ], batch size: 100, lr: 2.54e-02, grad_scale: 4.0 +2024-09-16 19:37:56,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=64940.0, ans=0.125 +2024-09-16 19:38:00,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=64940.0, ans=0.2 +2024-09-16 19:38:17,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=65020.0, ans=0.0 +2024-09-16 19:38:34,064 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.066e+02 1.250e+02 1.369e+02 1.564e+02 3.210e+02, threshold=2.738e+02, percent-clipped=1.0 +2024-09-16 19:38:34,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=65060.0, ans=0.125 +2024-09-16 19:38:49,703 INFO [train.py:1198] (1/2) Epoch 4, batch 2700, loss[loss=0.308, ctc_loss=0.2333, cr_loss=0.4738, attn_decoder_loss=0.3058, over 29511.00 frames. ], tot_loss[loss=0.3059, ctc_loss=0.2406, cr_loss=0.4406, attn_decoder_loss=0.3033, over 5797444.19 frames. ], batch size: 87, lr: 2.54e-02, grad_scale: 8.0 +2024-09-16 19:39:30,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=65180.0, ans=0.5 +2024-09-16 19:39:30,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=65180.0, ans=0.125 +2024-09-16 19:39:33,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=65220.0, ans=0.2 +2024-09-16 19:39:47,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=65220.0, ans=0.125 +2024-09-16 19:39:53,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=65260.0, ans=0.05 +2024-09-16 19:39:54,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=65260.0, ans=0.0 +2024-09-16 19:39:59,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=65260.0, ans=0.1 +2024-09-16 19:40:05,370 INFO [train.py:1198] (1/2) Epoch 4, batch 2750, loss[loss=0.2958, ctc_loss=0.2275, cr_loss=0.4405, attn_decoder_loss=0.2936, over 29517.00 frames. ], tot_loss[loss=0.3045, ctc_loss=0.2396, cr_loss=0.4395, attn_decoder_loss=0.3019, over 5796483.28 frames. ], batch size: 75, lr: 2.53e-02, grad_scale: 4.0 +2024-09-16 19:40:05,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=65300.0, ans=0.0 +2024-09-16 19:40:10,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=65300.0, ans=0.0 +2024-09-16 19:40:39,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=65380.0, ans=0.2 +2024-09-16 19:40:48,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=65420.0, ans=0.125 +2024-09-16 19:40:50,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=65420.0, ans=0.125 +2024-09-16 19:41:08,343 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.842e+01 1.245e+02 1.440e+02 1.752e+02 4.612e+02, threshold=2.880e+02, percent-clipped=7.0 +2024-09-16 19:41:13,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=65460.0, ans=0.0 +2024-09-16 19:41:14,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.65 vs. limit=15.0 +2024-09-16 19:41:20,408 INFO [train.py:1198] (1/2) Epoch 4, batch 2800, loss[loss=0.3384, ctc_loss=0.3054, cr_loss=0.4945, attn_decoder_loss=0.331, over 20670.00 frames. ], tot_loss[loss=0.3044, ctc_loss=0.2398, cr_loss=0.4395, attn_decoder_loss=0.3018, over 5776842.12 frames. ], batch size: 211, lr: 2.53e-02, grad_scale: 8.0 +2024-09-16 19:41:25,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.66 vs. limit=15.0 +2024-09-16 19:41:38,064 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.33 vs. limit=15.0 +2024-09-16 19:41:44,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=65540.0, ans=0.125 +2024-09-16 19:41:51,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=65580.0, ans=0.1 +2024-09-16 19:41:52,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=65580.0, ans=0.125 +2024-09-16 19:42:33,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=65660.0, ans=0.0 +2024-09-16 19:42:40,277 INFO [train.py:1198] (1/2) Epoch 4, batch 2850, loss[loss=0.3142, ctc_loss=0.2574, cr_loss=0.4526, attn_decoder_loss=0.3105, over 29521.00 frames. ], tot_loss[loss=0.3055, ctc_loss=0.241, cr_loss=0.4398, attn_decoder_loss=0.3029, over 5761777.40 frames. ], batch size: 77, lr: 2.53e-02, grad_scale: 4.0 +2024-09-16 19:42:41,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=65700.0, ans=0.125 +2024-09-16 19:43:12,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=65780.0, ans=0.125 +2024-09-16 19:43:15,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=65780.0, ans=0.125 +2024-09-16 19:43:36,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=65820.0, ans=0.0 +2024-09-16 19:43:45,309 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.117e+02 1.371e+02 1.544e+02 1.863e+02 5.214e+02, threshold=3.089e+02, percent-clipped=4.0 +2024-09-16 19:43:55,804 INFO [train.py:1198] (1/2) Epoch 4, batch 2900, loss[loss=0.2994, ctc_loss=0.2359, cr_loss=0.4438, attn_decoder_loss=0.2966, over 29457.00 frames. ], tot_loss[loss=0.3062, ctc_loss=0.241, cr_loss=0.442, attn_decoder_loss=0.3037, over 5787691.53 frames. ], batch size: 79, lr: 2.52e-02, grad_scale: 8.0 +2024-09-16 19:43:57,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.61 vs. limit=10.0 +2024-09-16 19:44:04,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.96 vs. limit=22.5 +2024-09-16 19:44:09,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=65940.0, ans=0.1 +2024-09-16 19:44:09,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=65940.0, ans=0.125 +2024-09-16 19:44:11,072 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:44:27,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=65980.0, ans=0.1 +2024-09-16 19:44:37,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=65980.0, ans=0.2 +2024-09-16 19:45:10,935 INFO [train.py:1198] (1/2) Epoch 4, batch 2950, loss[loss=0.288, ctc_loss=0.2236, cr_loss=0.4098, attn_decoder_loss=0.286, over 29543.00 frames. ], tot_loss[loss=0.305, ctc_loss=0.2403, cr_loss=0.4405, attn_decoder_loss=0.3024, over 5781618.21 frames. ], batch size: 75, lr: 2.52e-02, grad_scale: 4.0 +2024-09-16 19:45:51,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=66180.0, ans=0.125 +2024-09-16 19:45:57,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=66220.0, ans=0.1 +2024-09-16 19:46:08,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=66220.0, ans=0.0 +2024-09-16 19:46:19,744 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.755e+01 1.228e+02 1.356e+02 1.566e+02 3.773e+02, threshold=2.713e+02, percent-clipped=2.0 +2024-09-16 19:46:22,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=66260.0, ans=0.0 +2024-09-16 19:46:22,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=66260.0, ans=0.125 +2024-09-16 19:46:30,793 INFO [train.py:1198] (1/2) Epoch 4, batch 3000, loss[loss=0.3019, ctc_loss=0.2362, cr_loss=0.427, attn_decoder_loss=0.2997, over 29751.00 frames. ], tot_loss[loss=0.3049, ctc_loss=0.24, cr_loss=0.4407, attn_decoder_loss=0.3023, over 5782830.24 frames. ], batch size: 81, lr: 2.52e-02, grad_scale: 8.0 +2024-09-16 19:46:30,794 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 19:46:49,051 INFO [train.py:1230] (1/2) Epoch 4, validation: loss=0.2264, ctc_loss=0.07857, cr_loss=4.376e-15, attn_decoder_loss=0.2428, over 944034.00 frames. +2024-09-16 19:46:49,052 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 19:47:05,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.29 vs. limit=10.0 +2024-09-16 19:47:06,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=66340.0, ans=0.125 +2024-09-16 19:47:09,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=66340.0, ans=0.125 +2024-09-16 19:47:10,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=66340.0, ans=0.0 +2024-09-16 19:47:15,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.97 vs. limit=15.0 +2024-09-16 19:47:29,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=66380.0, ans=0.0 +2024-09-16 19:47:30,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=66380.0, ans=0.125 +2024-09-16 19:47:41,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=66420.0, ans=0.125 +2024-09-16 19:48:05,600 INFO [train.py:1198] (1/2) Epoch 4, batch 3050, loss[loss=0.2881, ctc_loss=0.2268, cr_loss=0.4391, attn_decoder_loss=0.2852, over 29555.00 frames. ], tot_loss[loss=0.3057, ctc_loss=0.2406, cr_loss=0.4416, attn_decoder_loss=0.3032, over 5777211.78 frames. ], batch size: 76, lr: 2.51e-02, grad_scale: 4.0 +2024-09-16 19:48:27,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=66540.0, ans=0.0 +2024-09-16 19:48:38,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.65 vs. limit=15.0 +2024-09-16 19:48:46,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=66580.0, ans=0.2 +2024-09-16 19:49:13,368 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.917e+01 1.239e+02 1.360e+02 1.654e+02 2.744e+02, threshold=2.720e+02, percent-clipped=1.0 +2024-09-16 19:49:20,803 INFO [train.py:1198] (1/2) Epoch 4, batch 3100, loss[loss=0.3093, ctc_loss=0.2348, cr_loss=0.4379, attn_decoder_loss=0.3078, over 29281.00 frames. ], tot_loss[loss=0.3052, ctc_loss=0.24, cr_loss=0.4406, attn_decoder_loss=0.3027, over 5777615.45 frames. ], batch size: 100, lr: 2.51e-02, grad_scale: 8.0 +2024-09-16 19:49:22,651 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:49:39,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=66740.0, ans=0.1 +2024-09-16 19:49:43,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.98 vs. limit=15.0 +2024-09-16 19:49:45,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=66740.0, ans=0.125 +2024-09-16 19:49:57,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=66780.0, ans=0.1 +2024-09-16 19:49:58,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.68 vs. limit=12.0 +2024-09-16 19:50:07,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=66780.0, ans=0.125 +2024-09-16 19:50:08,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=66820.0, ans=0.0 +2024-09-16 19:50:16,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=66820.0, ans=0.95 +2024-09-16 19:50:28,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=66860.0, ans=0.2 +2024-09-16 19:50:40,202 INFO [train.py:1198] (1/2) Epoch 4, batch 3150, loss[loss=0.3248, ctc_loss=0.2571, cr_loss=0.4785, attn_decoder_loss=0.3216, over 28847.00 frames. ], tot_loss[loss=0.3046, ctc_loss=0.2391, cr_loss=0.4401, attn_decoder_loss=0.3021, over 5784045.07 frames. ], batch size: 104, lr: 2.51e-02, grad_scale: 4.0 +2024-09-16 19:50:48,482 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.59 vs. limit=12.0 +2024-09-16 19:51:05,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.79 vs. limit=15.0 +2024-09-16 19:51:18,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=66980.0, ans=0.125 +2024-09-16 19:51:18,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=66980.0, ans=0.1 +2024-09-16 19:51:39,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=67060.0, ans=0.1 +2024-09-16 19:51:46,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=67060.0, ans=0.125 +2024-09-16 19:51:49,431 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.339e+01 1.205e+02 1.438e+02 1.646e+02 4.024e+02, threshold=2.876e+02, percent-clipped=3.0 +2024-09-16 19:51:51,825 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=8.86 vs. limit=15.0 +2024-09-16 19:51:55,490 INFO [train.py:1198] (1/2) Epoch 4, batch 3200, loss[loss=0.3001, ctc_loss=0.2362, cr_loss=0.4646, attn_decoder_loss=0.2969, over 29420.00 frames. ], tot_loss[loss=0.3038, ctc_loss=0.2381, cr_loss=0.4394, attn_decoder_loss=0.3013, over 5794280.71 frames. ], batch size: 79, lr: 2.51e-02, grad_scale: 8.0 +2024-09-16 19:51:55,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=67100.0, ans=0.1 +2024-09-16 19:52:27,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=67180.0, ans=0.125 +2024-09-16 19:52:32,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=67180.0, ans=0.1 +2024-09-16 19:52:34,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=67180.0, ans=0.125 +2024-09-16 19:52:41,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=67220.0, ans=0.125 +2024-09-16 19:53:11,543 INFO [train.py:1198] (1/2) Epoch 4, batch 3250, loss[loss=0.3191, ctc_loss=0.2497, cr_loss=0.4814, attn_decoder_loss=0.3161, over 29694.00 frames. ], tot_loss[loss=0.3045, ctc_loss=0.2387, cr_loss=0.4407, attn_decoder_loss=0.302, over 5800153.27 frames. ], batch size: 84, lr: 2.50e-02, grad_scale: 4.0 +2024-09-16 19:53:20,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.47 vs. limit=6.0 +2024-09-16 19:53:27,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.78 vs. limit=22.5 +2024-09-16 19:53:51,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=67380.0, ans=0.125 +2024-09-16 19:54:24,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=67460.0, ans=0.125 +2024-09-16 19:54:26,053 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.095e+02 1.284e+02 1.425e+02 1.663e+02 2.668e+02, threshold=2.850e+02, percent-clipped=0.0 +2024-09-16 19:54:29,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=67500.0, ans=0.015 +2024-09-16 19:54:30,766 INFO [train.py:1198] (1/2) Epoch 4, batch 3300, loss[loss=0.3099, ctc_loss=0.2383, cr_loss=0.4339, attn_decoder_loss=0.3082, over 28308.00 frames. ], tot_loss[loss=0.3036, ctc_loss=0.238, cr_loss=0.4399, attn_decoder_loss=0.3011, over 5797099.38 frames. ], batch size: 111, lr: 2.50e-02, grad_scale: 8.0 +2024-09-16 19:54:31,829 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.60 vs. limit=15.0 +2024-09-16 19:54:50,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=67540.0, ans=0.125 +2024-09-16 19:54:56,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=67540.0, ans=0.125 +2024-09-16 19:55:46,066 INFO [train.py:1198] (1/2) Epoch 4, batch 3350, loss[loss=0.3232, ctc_loss=0.2613, cr_loss=0.4433, attn_decoder_loss=0.3203, over 28777.00 frames. ], tot_loss[loss=0.304, ctc_loss=0.2384, cr_loss=0.4394, attn_decoder_loss=0.3015, over 5774410.03 frames. ], batch size: 104, lr: 2.50e-02, grad_scale: 4.0 +2024-09-16 19:55:52,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=67700.0, ans=0.025 +2024-09-16 19:56:24,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=67780.0, ans=0.125 +2024-09-16 19:56:24,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=67780.0, ans=0.0 +2024-09-16 19:56:34,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=67820.0, ans=0.0 +2024-09-16 19:56:53,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=67860.0, ans=0.0 +2024-09-16 19:56:58,645 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.186e+02 1.341e+02 1.622e+02 4.699e+02, threshold=2.682e+02, percent-clipped=3.0 +2024-09-16 19:57:01,666 INFO [train.py:1198] (1/2) Epoch 4, batch 3400, loss[loss=0.2743, ctc_loss=0.211, cr_loss=0.4039, attn_decoder_loss=0.2724, over 29330.00 frames. ], tot_loss[loss=0.3038, ctc_loss=0.2382, cr_loss=0.439, attn_decoder_loss=0.3014, over 5766563.79 frames. ], batch size: 67, lr: 2.49e-02, grad_scale: 8.0 +2024-09-16 19:57:11,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=67900.0, ans=0.2 +2024-09-16 19:57:14,775 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.55 vs. limit=22.5 +2024-09-16 19:57:52,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.44 vs. limit=6.0 +2024-09-16 19:58:14,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=68060.0, ans=0.125 +2024-09-16 19:58:17,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=68060.0, ans=0.125 +2024-09-16 19:58:19,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.55 vs. limit=15.0 +2024-09-16 19:58:21,354 INFO [train.py:1198] (1/2) Epoch 4, batch 3450, loss[loss=0.3237, ctc_loss=0.2579, cr_loss=0.4411, attn_decoder_loss=0.3212, over 28314.00 frames. ], tot_loss[loss=0.3041, ctc_loss=0.238, cr_loss=0.4388, attn_decoder_loss=0.3017, over 5773947.80 frames. ], batch size: 111, lr: 2.49e-02, grad_scale: 4.0 +2024-09-16 19:58:24,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=68100.0, ans=0.1 +2024-09-16 19:58:32,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=68100.0, ans=0.125 +2024-09-16 19:58:46,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.31 vs. limit=15.0 +2024-09-16 19:58:47,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=68140.0, ans=0.2 +2024-09-16 19:59:02,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=68180.0, ans=0.1 +2024-09-16 19:59:14,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.67 vs. limit=10.0 +2024-09-16 19:59:32,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=68260.0, ans=0.125 +2024-09-16 19:59:34,952 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.730e+01 1.161e+02 1.261e+02 1.469e+02 4.535e+02, threshold=2.521e+02, percent-clipped=3.0 +2024-09-16 19:59:36,486 INFO [train.py:1198] (1/2) Epoch 4, batch 3500, loss[loss=0.2655, ctc_loss=0.2049, cr_loss=0.3728, attn_decoder_loss=0.264, over 29325.00 frames. ], tot_loss[loss=0.3031, ctc_loss=0.2371, cr_loss=0.4379, attn_decoder_loss=0.3007, over 5776273.31 frames. ], batch size: 71, lr: 2.49e-02, grad_scale: 8.0 +2024-09-16 19:59:48,914 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 19:59:51,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=68340.0, ans=0.125 +2024-09-16 20:00:02,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=68340.0, ans=0.2 +2024-09-16 20:00:09,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=68380.0, ans=0.0 +2024-09-16 20:00:38,500 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.46 vs. limit=22.5 +2024-09-16 20:00:43,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=68460.0, ans=0.125 +2024-09-16 20:00:49,828 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.47 vs. limit=15.0 +2024-09-16 20:00:50,563 INFO [train.py:1198] (1/2) Epoch 4, batch 3550, loss[loss=0.323, ctc_loss=0.2624, cr_loss=0.4565, attn_decoder_loss=0.3196, over 29703.00 frames. ], tot_loss[loss=0.3032, ctc_loss=0.2372, cr_loss=0.438, attn_decoder_loss=0.3008, over 5781749.03 frames. ], batch size: 89, lr: 2.48e-02, grad_scale: 4.0 +2024-09-16 20:00:50,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=68500.0, ans=0.1 +2024-09-16 20:01:05,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=68540.0, ans=0.0 +2024-09-16 20:01:08,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=68540.0, ans=0.0 +2024-09-16 20:01:29,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=68580.0, ans=0.1 +2024-09-16 20:01:32,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=68580.0, ans=0.2 +2024-09-16 20:01:36,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=68620.0, ans=0.05 +2024-09-16 20:01:38,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=68620.0, ans=0.2 +2024-09-16 20:01:38,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=68620.0, ans=0.2 +2024-09-16 20:02:04,415 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.002e+02 1.275e+02 1.372e+02 1.558e+02 6.376e+02, threshold=2.743e+02, percent-clipped=5.0 +2024-09-16 20:02:04,436 INFO [train.py:1198] (1/2) Epoch 4, batch 3600, loss[loss=0.2868, ctc_loss=0.216, cr_loss=0.4322, attn_decoder_loss=0.2851, over 29505.00 frames. ], tot_loss[loss=0.3031, ctc_loss=0.2369, cr_loss=0.4378, attn_decoder_loss=0.3007, over 5791576.37 frames. ], batch size: 77, lr: 2.48e-02, grad_scale: 8.0 +2024-09-16 20:02:27,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=68740.0, ans=0.1 +2024-09-16 20:02:29,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=68740.0, ans=0.125 +2024-09-16 20:02:36,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=68780.0, ans=0.125 +2024-09-16 20:02:38,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=68780.0, ans=0.2 +2024-09-16 20:03:08,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=68860.0, ans=0.1 +2024-09-16 20:03:17,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=68860.0, ans=0.0 +2024-09-16 20:03:23,382 INFO [train.py:1198] (1/2) Epoch 4, batch 3650, loss[loss=0.3099, ctc_loss=0.2408, cr_loss=0.4408, attn_decoder_loss=0.3078, over 29487.00 frames. ], tot_loss[loss=0.3022, ctc_loss=0.2361, cr_loss=0.4377, attn_decoder_loss=0.2999, over 5794090.99 frames. ], batch size: 90, lr: 2.48e-02, grad_scale: 4.0 +2024-09-16 20:03:23,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=68900.0, ans=0.1 +2024-09-16 20:03:35,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=68900.0, ans=0.125 +2024-09-16 20:04:11,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=69020.0, ans=0.125 +2024-09-16 20:04:14,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=69020.0, ans=0.1 +2024-09-16 20:04:26,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=69060.0, ans=0.125 +2024-09-16 20:04:30,847 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.42 vs. limit=15.0 +2024-09-16 20:04:37,603 INFO [train.py:1198] (1/2) Epoch 4, batch 3700, loss[loss=0.321, ctc_loss=0.2546, cr_loss=0.4587, attn_decoder_loss=0.3182, over 29686.00 frames. ], tot_loss[loss=0.3024, ctc_loss=0.2361, cr_loss=0.4383, attn_decoder_loss=0.3001, over 5804863.08 frames. ], batch size: 84, lr: 2.47e-02, grad_scale: 8.0 +2024-09-16 20:04:39,096 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.031e+02 1.266e+02 1.378e+02 1.578e+02 2.388e+02, threshold=2.756e+02, percent-clipped=0.0 +2024-09-16 20:05:01,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=69140.0, ans=0.125 +2024-09-16 20:05:10,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=69180.0, ans=0.125 +2024-09-16 20:05:16,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=69180.0, ans=0.0 +2024-09-16 20:05:17,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=69180.0, ans=0.125 +2024-09-16 20:05:22,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=69220.0, ans=0.1 +2024-09-16 20:05:45,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.59 vs. limit=15.0 +2024-09-16 20:05:51,396 INFO [train.py:1198] (1/2) Epoch 4, batch 3750, loss[loss=0.2707, ctc_loss=0.2145, cr_loss=0.3927, attn_decoder_loss=0.2683, over 29328.00 frames. ], tot_loss[loss=0.3022, ctc_loss=0.2361, cr_loss=0.4388, attn_decoder_loss=0.2998, over 5807959.29 frames. ], batch size: 67, lr: 2.47e-02, grad_scale: 4.0 +2024-09-16 20:06:09,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=69340.0, ans=0.1 +2024-09-16 20:06:13,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.41 vs. limit=22.5 +2024-09-16 20:06:13,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=69340.0, ans=0.125 +2024-09-16 20:06:30,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=69380.0, ans=0.0 +2024-09-16 20:06:43,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=69420.0, ans=0.025 +2024-09-16 20:06:48,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=69420.0, ans=0.2 +2024-09-16 20:06:54,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=69460.0, ans=0.0 +2024-09-16 20:06:54,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.27 vs. limit=22.5 +2024-09-16 20:07:05,692 INFO [train.py:1198] (1/2) Epoch 4, batch 3800, loss[loss=0.309, ctc_loss=0.231, cr_loss=0.4503, attn_decoder_loss=0.3076, over 29626.00 frames. ], tot_loss[loss=0.3017, ctc_loss=0.2354, cr_loss=0.4373, attn_decoder_loss=0.2994, over 5799244.20 frames. ], batch size: 86, lr: 2.47e-02, grad_scale: 8.0 +2024-09-16 20:07:05,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=69500.0, ans=0.125 +2024-09-16 20:07:08,685 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.301e+02 1.423e+02 1.744e+02 6.965e+02, threshold=2.846e+02, percent-clipped=5.0 +2024-09-16 20:07:25,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=69540.0, ans=0.125 +2024-09-16 20:07:28,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=69540.0, ans=0.04949747468305833 +2024-09-16 20:08:05,208 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=12.0 +2024-09-16 20:08:10,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=69660.0, ans=0.1 +2024-09-16 20:08:20,401 INFO [train.py:1198] (1/2) Epoch 4, batch 3850, loss[loss=0.3164, ctc_loss=0.2465, cr_loss=0.4404, attn_decoder_loss=0.3144, over 29267.00 frames. ], tot_loss[loss=0.302, ctc_loss=0.2353, cr_loss=0.4392, attn_decoder_loss=0.2996, over 5813614.84 frames. ], batch size: 100, lr: 2.47e-02, grad_scale: 4.0 +2024-09-16 20:08:20,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=69700.0, ans=0.125 +2024-09-16 20:08:25,648 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.08 vs. limit=22.5 +2024-09-16 20:08:41,774 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.54 vs. limit=15.0 +2024-09-16 20:08:42,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=69740.0, ans=0.1 +2024-09-16 20:08:42,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=69740.0, ans=0.125 +2024-09-16 20:09:06,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=69820.0, ans=0.125 +2024-09-16 20:09:17,873 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.47 vs. limit=12.0 +2024-09-16 20:09:25,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=69860.0, ans=0.0 +2024-09-16 20:09:28,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=69860.0, ans=0.0 +2024-09-16 20:09:36,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=69900.0, ans=0.125 +2024-09-16 20:09:36,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.66 vs. limit=15.0 +2024-09-16 20:09:37,507 INFO [train.py:1198] (1/2) Epoch 4, batch 3900, loss[loss=0.3104, ctc_loss=0.2464, cr_loss=0.4297, attn_decoder_loss=0.308, over 29616.00 frames. ], tot_loss[loss=0.3023, ctc_loss=0.2353, cr_loss=0.4393, attn_decoder_loss=0.3, over 5818111.86 frames. ], batch size: 86, lr: 2.46e-02, grad_scale: 8.0 +2024-09-16 20:09:40,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=69900.0, ans=0.0 +2024-09-16 20:09:41,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.044e+02 1.221e+02 1.343e+02 1.520e+02 2.719e+02, threshold=2.686e+02, percent-clipped=0.0 +2024-09-16 20:09:46,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=69900.0, ans=0.0 +2024-09-16 20:09:56,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=69940.0, ans=0.025 +2024-09-16 20:09:57,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=69940.0, ans=0.05 +2024-09-16 20:10:25,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=70020.0, ans=0.125 +2024-09-16 20:10:48,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=70060.0, ans=0.125 +2024-09-16 20:10:51,474 INFO [train.py:1198] (1/2) Epoch 4, batch 3950, loss[loss=0.316, ctc_loss=0.2469, cr_loss=0.4614, attn_decoder_loss=0.3134, over 29437.00 frames. ], tot_loss[loss=0.302, ctc_loss=0.2348, cr_loss=0.4394, attn_decoder_loss=0.2997, over 5837414.51 frames. ], batch size: 97, lr: 2.46e-02, grad_scale: 4.0 +2024-09-16 20:11:02,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=70100.0, ans=0.125 +2024-09-16 20:11:27,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=70180.0, ans=0.07 +2024-09-16 20:11:37,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=70220.0, ans=0.95 +2024-09-16 20:11:58,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=70260.0, ans=0.125 +2024-09-16 20:12:05,016 INFO [train.py:1198] (1/2) Epoch 4, batch 4000, loss[loss=0.2832, ctc_loss=0.2012, cr_loss=0.3972, attn_decoder_loss=0.2835, over 29515.00 frames. ], tot_loss[loss=0.3025, ctc_loss=0.2358, cr_loss=0.4397, attn_decoder_loss=0.3002, over 5814149.78 frames. ], batch size: 74, lr: 2.46e-02, grad_scale: 8.0 +2024-09-16 20:12:12,301 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.084e+02 1.309e+02 1.435e+02 1.653e+02 3.484e+02, threshold=2.870e+02, percent-clipped=1.0 +2024-09-16 20:12:19,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=70340.0, ans=0.125 +2024-09-16 20:12:23,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.37 vs. limit=15.0 +2024-09-16 20:12:30,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:12:37,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.77 vs. limit=15.0 +2024-09-16 20:12:37,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=70380.0, ans=0.125 +2024-09-16 20:12:42,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=70380.0, ans=0.125 +2024-09-16 20:13:06,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=70460.0, ans=0.125 +2024-09-16 20:13:20,973 INFO [train.py:1198] (1/2) Epoch 4, batch 4050, loss[loss=0.3464, ctc_loss=0.3111, cr_loss=0.4424, attn_decoder_loss=0.3405, over 20579.00 frames. ], tot_loss[loss=0.3027, ctc_loss=0.236, cr_loss=0.4394, attn_decoder_loss=0.3003, over 5797748.21 frames. ], batch size: 210, lr: 2.45e-02, grad_scale: 4.0 +2024-09-16 20:13:32,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=70500.0, ans=0.1 +2024-09-16 20:13:39,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=70540.0, ans=0.0 +2024-09-16 20:13:44,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=70540.0, ans=0.2 +2024-09-16 20:13:59,205 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:14:26,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=70660.0, ans=0.125 +2024-09-16 20:14:29,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=70660.0, ans=0.025 +2024-09-16 20:14:29,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=70660.0, ans=0.0 +2024-09-16 20:14:35,708 INFO [train.py:1198] (1/2) Epoch 4, batch 4100, loss[loss=0.3208, ctc_loss=0.248, cr_loss=0.4739, attn_decoder_loss=0.3184, over 29515.00 frames. ], tot_loss[loss=0.3031, ctc_loss=0.2365, cr_loss=0.4403, attn_decoder_loss=0.3007, over 5792828.96 frames. ], batch size: 90, lr: 2.45e-02, grad_scale: 8.0 +2024-09-16 20:14:42,855 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.753e+01 1.273e+02 1.617e+02 1.999e+02 3.514e+02, threshold=3.235e+02, percent-clipped=2.0 +2024-09-16 20:14:45,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.14 vs. limit=22.5 +2024-09-16 20:14:57,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=70740.0, ans=0.025 +2024-09-16 20:15:06,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=70780.0, ans=0.125 +2024-09-16 20:15:13,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=70780.0, ans=10.0 +2024-09-16 20:15:48,990 INFO [train.py:1198] (1/2) Epoch 4, batch 4150, loss[loss=0.2849, ctc_loss=0.2136, cr_loss=0.4022, attn_decoder_loss=0.2839, over 29517.00 frames. ], tot_loss[loss=0.3028, ctc_loss=0.236, cr_loss=0.4399, attn_decoder_loss=0.3004, over 5797596.93 frames. ], batch size: 77, lr: 2.45e-02, grad_scale: 4.0 +2024-09-16 20:16:00,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=16.64 vs. limit=15.0 +2024-09-16 20:16:16,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.31 vs. limit=15.0 +2024-09-16 20:16:17,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=70980.0, ans=0.0 +2024-09-16 20:17:02,272 INFO [train.py:1198] (1/2) Epoch 4, batch 4200, loss[loss=0.3162, ctc_loss=0.2451, cr_loss=0.4474, attn_decoder_loss=0.3141, over 29495.00 frames. ], tot_loss[loss=0.3029, ctc_loss=0.2358, cr_loss=0.44, attn_decoder_loss=0.3005, over 5799087.07 frames. ], batch size: 90, lr: 2.44e-02, grad_scale: 8.0 +2024-09-16 20:17:11,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=71100.0, ans=0.025 +2024-09-16 20:17:12,670 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.067e+02 1.228e+02 1.369e+02 1.579e+02 3.524e+02, threshold=2.737e+02, percent-clipped=1.0 +2024-09-16 20:17:16,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=71140.0, ans=0.0 +2024-09-16 20:17:18,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=71140.0, ans=0.2 +2024-09-16 20:17:41,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=71180.0, ans=0.125 +2024-09-16 20:17:42,193 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=15.0 +2024-09-16 20:17:43,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=71180.0, ans=0.125 +2024-09-16 20:18:08,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=71260.0, ans=0.04949747468305833 +2024-09-16 20:18:17,843 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.80 vs. limit=15.0 +2024-09-16 20:18:18,317 INFO [train.py:1198] (1/2) Epoch 4, batch 4250, loss[loss=0.2805, ctc_loss=0.2164, cr_loss=0.4152, attn_decoder_loss=0.2784, over 29501.00 frames. ], tot_loss[loss=0.3026, ctc_loss=0.235, cr_loss=0.4394, attn_decoder_loss=0.3004, over 5804489.61 frames. ], batch size: 74, lr: 2.44e-02, grad_scale: 4.0 +2024-09-16 20:18:25,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71300.0, ans=0.1 +2024-09-16 20:18:38,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.58 vs. limit=22.5 +2024-09-16 20:18:40,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.74 vs. limit=15.0 +2024-09-16 20:18:50,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=71380.0, ans=0.125 +2024-09-16 20:18:52,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=71380.0, ans=0.0 +2024-09-16 20:19:29,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.85 vs. limit=15.0 +2024-09-16 20:19:31,887 INFO [train.py:1198] (1/2) Epoch 4, batch 4300, loss[loss=0.317, ctc_loss=0.2481, cr_loss=0.4737, attn_decoder_loss=0.3141, over 29546.00 frames. ], tot_loss[loss=0.3029, ctc_loss=0.2351, cr_loss=0.4399, attn_decoder_loss=0.3007, over 5793750.90 frames. ], batch size: 87, lr: 2.44e-02, grad_scale: 8.0 +2024-09-16 20:19:33,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=71500.0, ans=0.2 +2024-09-16 20:19:43,709 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.834e+01 1.271e+02 1.418e+02 1.620e+02 3.004e+02, threshold=2.836e+02, percent-clipped=2.0 +2024-09-16 20:19:48,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=71540.0, ans=0.1 +2024-09-16 20:19:49,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=71540.0, ans=0.07 +2024-09-16 20:20:25,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=71620.0, ans=0.1 +2024-09-16 20:20:47,040 INFO [train.py:1198] (1/2) Epoch 4, batch 4350, loss[loss=0.3317, ctc_loss=0.2681, cr_loss=0.4629, attn_decoder_loss=0.3285, over 29460.00 frames. ], tot_loss[loss=0.3067, ctc_loss=0.2387, cr_loss=0.4448, attn_decoder_loss=0.3044, over 5795868.27 frames. ], batch size: 97, lr: 2.44e-02, grad_scale: 4.0 +2024-09-16 20:20:59,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71700.0, ans=0.1 +2024-09-16 20:21:12,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=71740.0, ans=0.125 +2024-09-16 20:21:24,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71780.0, ans=0.1 +2024-09-16 20:21:25,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=71780.0, ans=0.1 +2024-09-16 20:21:30,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-16 20:21:35,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.00 vs. limit=15.0 +2024-09-16 20:22:00,939 INFO [train.py:1198] (1/2) Epoch 4, batch 4400, loss[loss=0.3199, ctc_loss=0.2468, cr_loss=0.4497, attn_decoder_loss=0.318, over 27179.00 frames. ], tot_loss[loss=0.3093, ctc_loss=0.2417, cr_loss=0.4471, attn_decoder_loss=0.3069, over 5766928.43 frames. ], batch size: 124, lr: 2.43e-02, grad_scale: 8.0 +2024-09-16 20:22:01,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=71900.0, ans=0.0 +2024-09-16 20:22:07,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.63 vs. limit=15.0 +2024-09-16 20:22:08,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=71900.0, ans=0.125 +2024-09-16 20:22:14,006 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.841e+01 1.227e+02 1.349e+02 1.608e+02 3.095e+02, threshold=2.698e+02, percent-clipped=2.0 +2024-09-16 20:22:22,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.98 vs. limit=15.0 +2024-09-16 20:22:27,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=71940.0, ans=0.125 +2024-09-16 20:22:36,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=71980.0, ans=0.125 +2024-09-16 20:22:41,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=71980.0, ans=10.0 +2024-09-16 20:22:41,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=71980.0, ans=0.125 +2024-09-16 20:23:16,016 INFO [train.py:1198] (1/2) Epoch 4, batch 4450, loss[loss=0.3421, ctc_loss=0.2993, cr_loss=0.4556, attn_decoder_loss=0.3367, over 20483.00 frames. ], tot_loss[loss=0.3132, ctc_loss=0.2484, cr_loss=0.4496, attn_decoder_loss=0.3104, over 5575591.55 frames. ], batch size: 209, lr: 2.43e-02, grad_scale: 4.0 +2024-09-16 20:23:16,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=72100.0, ans=0.125 +2024-09-16 20:23:40,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=72140.0, ans=0.0 +2024-09-16 20:23:49,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=72180.0, ans=0.125 +2024-09-16 20:24:02,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=72220.0, ans=0.125 +2024-09-16 20:24:29,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=72300.0, ans=0.0 +2024-09-16 20:24:31,032 INFO [train.py:1198] (1/2) Epoch 4, batch 4500, loss[loss=0.332, ctc_loss=0.2911, cr_loss=0.4231, attn_decoder_loss=0.3271, over 20184.00 frames. ], tot_loss[loss=0.3179, ctc_loss=0.2579, cr_loss=0.4503, attn_decoder_loss=0.3145, over 5237045.34 frames. ], batch size: 209, lr: 2.43e-02, grad_scale: 8.0 +2024-09-16 20:24:46,108 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.089e+02 1.245e+02 1.357e+02 1.541e+02 2.817e+02, threshold=2.714e+02, percent-clipped=1.0 +2024-09-16 20:25:04,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=72380.0, ans=0.1 +2024-09-16 20:26:05,473 INFO [train.py:1198] (1/2) Epoch 5, batch 0, loss[loss=0.3362, ctc_loss=0.2134, cr_loss=0.4003, attn_decoder_loss=0.341, over 29603.00 frames. ], tot_loss[loss=0.3362, ctc_loss=0.2134, cr_loss=0.4003, attn_decoder_loss=0.341, over 29603.00 frames. ], batch size: 73, lr: 2.26e-02, grad_scale: 4.0 +2024-09-16 20:26:05,473 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 20:26:24,506 INFO [train.py:1230] (1/2) Epoch 5, validation: loss=0.2407, ctc_loss=0.07934, cr_loss=4.486e-15, attn_decoder_loss=0.2587, over 944034.00 frames. +2024-09-16 20:26:24,507 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 20:26:25,968 WARNING [optim.py:503] (1/2) Scaling gradients by 0.06828752905130386, model_norm_threshold=271.39923095703125 +2024-09-16 20:26:26,177 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.embed.weight with proportion 0.28, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=4.372e+06, grad_sumsq=1.717e+06, orig_rms_sq=2.546e+00 +2024-09-16 20:26:38,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=72440.0, ans=0.025 +2024-09-16 20:26:43,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=72440.0, ans=0.1 +2024-09-16 20:26:55,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=72480.0, ans=0.125 +2024-09-16 20:27:00,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-16 20:27:35,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.58 vs. limit=15.0 +2024-09-16 20:27:36,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.67 vs. limit=15.0 +2024-09-16 20:27:40,506 INFO [train.py:1198] (1/2) Epoch 5, batch 50, loss[loss=0.2771, ctc_loss=0.213, cr_loss=0.3604, attn_decoder_loss=0.2762, over 29438.00 frames. ], tot_loss[loss=0.3081, ctc_loss=0.2405, cr_loss=0.4418, attn_decoder_loss=0.3058, over 1267212.28 frames. ], batch size: 70, lr: 2.26e-02, grad_scale: 4.0 +2024-09-16 20:27:44,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.51 vs. limit=15.0 +2024-09-16 20:27:46,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=72600.0, ans=0.0 +2024-09-16 20:27:48,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=72600.0, ans=0.125 +2024-09-16 20:27:49,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=72600.0, ans=0.0 +2024-09-16 20:27:58,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=72640.0, ans=0.2 +2024-09-16 20:28:05,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=72640.0, ans=0.2 +2024-09-16 20:28:05,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=72640.0, ans=0.0 +2024-09-16 20:28:26,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=72720.0, ans=0.125 +2024-09-16 20:28:37,066 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.048e+02 1.241e+02 1.473e+02 1.722e+02 3.974e+03, threshold=2.946e+02, percent-clipped=9.0 +2024-09-16 20:28:38,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=72720.0, ans=0.0 +2024-09-16 20:28:40,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=72720.0, ans=0.0 +2024-09-16 20:28:42,009 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:28:58,584 INFO [train.py:1198] (1/2) Epoch 5, batch 100, loss[loss=0.2898, ctc_loss=0.2207, cr_loss=0.4276, attn_decoder_loss=0.288, over 29527.00 frames. ], tot_loss[loss=0.3062, ctc_loss=0.2373, cr_loss=0.4408, attn_decoder_loss=0.3041, over 2251916.75 frames. ], batch size: 76, lr: 2.25e-02, grad_scale: 8.0 +2024-09-16 20:28:58,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=72800.0, ans=0.125 +2024-09-16 20:29:07,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=72800.0, ans=0.0 +2024-09-16 20:29:10,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=72800.0, ans=0.2 +2024-09-16 20:29:11,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.72 vs. limit=15.0 +2024-09-16 20:29:58,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=72960.0, ans=0.125 +2024-09-16 20:30:00,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.82 vs. limit=15.0 +2024-09-16 20:30:01,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=72960.0, ans=0.0 +2024-09-16 20:30:14,749 INFO [train.py:1198] (1/2) Epoch 5, batch 150, loss[loss=0.2609, ctc_loss=0.1913, cr_loss=0.3819, attn_decoder_loss=0.2601, over 29421.00 frames. ], tot_loss[loss=0.3027, ctc_loss=0.2341, cr_loss=0.4389, attn_decoder_loss=0.3005, over 3047127.16 frames. ], batch size: 70, lr: 2.25e-02, grad_scale: 4.0 +2024-09-16 20:30:39,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.45 vs. limit=10.0 +2024-09-16 20:31:09,881 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.201e+01 1.170e+02 1.302e+02 1.516e+02 3.725e+02, threshold=2.604e+02, percent-clipped=3.0 +2024-09-16 20:31:16,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=73160.0, ans=0.125 +2024-09-16 20:31:29,493 INFO [train.py:1198] (1/2) Epoch 5, batch 200, loss[loss=0.3185, ctc_loss=0.2572, cr_loss=0.4506, attn_decoder_loss=0.3153, over 27649.00 frames. ], tot_loss[loss=0.3009, ctc_loss=0.2328, cr_loss=0.4389, attn_decoder_loss=0.2987, over 3659830.64 frames. ], batch size: 125, lr: 2.25e-02, grad_scale: 8.0 +2024-09-16 20:31:29,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=73200.0, ans=0.025 +2024-09-16 20:31:30,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=11.97 vs. limit=15.0 +2024-09-16 20:31:42,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=73200.0, ans=0.125 +2024-09-16 20:32:17,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=73320.0, ans=0.025 +2024-09-16 20:32:22,259 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.57 vs. limit=15.0 +2024-09-16 20:32:32,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=73360.0, ans=0.125 +2024-09-16 20:32:35,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=73360.0, ans=0.1 +2024-09-16 20:32:41,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=73360.0, ans=0.125 +2024-09-16 20:32:46,878 INFO [train.py:1198] (1/2) Epoch 5, batch 250, loss[loss=0.3226, ctc_loss=0.2636, cr_loss=0.4511, attn_decoder_loss=0.3191, over 29228.00 frames. ], tot_loss[loss=0.3003, ctc_loss=0.2318, cr_loss=0.4389, attn_decoder_loss=0.2982, over 4140844.65 frames. ], batch size: 100, lr: 2.25e-02, grad_scale: 4.0 +2024-09-16 20:33:21,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=73480.0, ans=0.0 +2024-09-16 20:33:22,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.41 vs. limit=22.5 +2024-09-16 20:33:23,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=73480.0, ans=0.1 +2024-09-16 20:33:28,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=73480.0, ans=15.0 +2024-09-16 20:33:44,203 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.445e+01 1.169e+02 1.336e+02 1.491e+02 2.357e+02, threshold=2.672e+02, percent-clipped=0.0 +2024-09-16 20:33:44,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=73520.0, ans=0.0 +2024-09-16 20:33:54,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=73560.0, ans=0.125 +2024-09-16 20:33:57,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.77 vs. limit=15.0 +2024-09-16 20:34:04,726 INFO [train.py:1198] (1/2) Epoch 5, batch 300, loss[loss=0.3256, ctc_loss=0.2637, cr_loss=0.4847, attn_decoder_loss=0.3217, over 29534.00 frames. ], tot_loss[loss=0.2997, ctc_loss=0.2313, cr_loss=0.4379, attn_decoder_loss=0.2976, over 4510347.54 frames. ], batch size: 92, lr: 2.24e-02, grad_scale: 8.0 +2024-09-16 20:34:12,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=73600.0, ans=0.125 +2024-09-16 20:34:17,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.03 vs. limit=10.0 +2024-09-16 20:34:20,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=73640.0, ans=0.1 +2024-09-16 20:34:32,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=73640.0, ans=0.0 +2024-09-16 20:34:35,618 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.34 vs. limit=15.0 +2024-09-16 20:34:48,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=73720.0, ans=0.1 +2024-09-16 20:35:01,017 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.46 vs. limit=22.5 +2024-09-16 20:35:06,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=73760.0, ans=0.09899494936611666 +2024-09-16 20:35:19,692 INFO [train.py:1198] (1/2) Epoch 5, batch 350, loss[loss=0.2614, ctc_loss=0.1858, cr_loss=0.3824, attn_decoder_loss=0.2613, over 29337.00 frames. ], tot_loss[loss=0.2996, ctc_loss=0.2302, cr_loss=0.4379, attn_decoder_loss=0.2976, over 4797181.79 frames. ], batch size: 71, lr: 2.24e-02, grad_scale: 4.0 +2024-09-16 20:35:20,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.48 vs. limit=10.0 +2024-09-16 20:35:37,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=73840.0, ans=15.0 +2024-09-16 20:35:52,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=73880.0, ans=10.0 +2024-09-16 20:35:59,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=73880.0, ans=0.125 +2024-09-16 20:36:16,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=73920.0, ans=0.1 +2024-09-16 20:36:20,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.525e+01 1.174e+02 1.354e+02 1.521e+02 2.144e+02, threshold=2.708e+02, percent-clipped=0.0 +2024-09-16 20:36:20,925 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:36:37,083 INFO [train.py:1198] (1/2) Epoch 5, batch 400, loss[loss=0.2948, ctc_loss=0.2188, cr_loss=0.451, attn_decoder_loss=0.2932, over 29713.00 frames. ], tot_loss[loss=0.2993, ctc_loss=0.2299, cr_loss=0.4374, attn_decoder_loss=0.2973, over 5026078.87 frames. ], batch size: 82, lr: 2.24e-02, grad_scale: 8.0 +2024-09-16 20:36:42,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=74000.0, ans=0.07 +2024-09-16 20:36:43,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=74000.0, ans=0.125 +2024-09-16 20:37:00,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=74040.0, ans=0.0 +2024-09-16 20:37:09,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=74080.0, ans=0.125 +2024-09-16 20:37:46,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=74160.0, ans=0.0 +2024-09-16 20:37:55,366 INFO [train.py:1198] (1/2) Epoch 5, batch 450, loss[loss=0.304, ctc_loss=0.225, cr_loss=0.4358, attn_decoder_loss=0.3031, over 29686.00 frames. ], tot_loss[loss=0.2992, ctc_loss=0.2295, cr_loss=0.4377, attn_decoder_loss=0.2973, over 5189200.01 frames. ], batch size: 83, lr: 2.24e-02, grad_scale: 4.0 +2024-09-16 20:37:58,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=74200.0, ans=0.125 +2024-09-16 20:38:27,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=74280.0, ans=0.0 +2024-09-16 20:38:32,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.21 vs. limit=15.0 +2024-09-16 20:38:47,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=74320.0, ans=0.0 +2024-09-16 20:38:52,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=74320.0, ans=0.025 +2024-09-16 20:38:56,440 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.507e+01 1.148e+02 1.317e+02 1.480e+02 2.097e+02, threshold=2.634e+02, percent-clipped=0.0 +2024-09-16 20:38:58,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=74360.0, ans=0.0 +2024-09-16 20:38:59,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=74360.0, ans=0.125 +2024-09-16 20:39:11,820 INFO [train.py:1198] (1/2) Epoch 5, batch 500, loss[loss=0.3213, ctc_loss=0.2413, cr_loss=0.479, attn_decoder_loss=0.3196, over 29442.00 frames. ], tot_loss[loss=0.298, ctc_loss=0.2283, cr_loss=0.4371, attn_decoder_loss=0.296, over 5331754.84 frames. ], batch size: 94, lr: 2.23e-02, grad_scale: 8.0 +2024-09-16 20:39:21,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=74400.0, ans=0.0 +2024-09-16 20:39:39,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=74440.0, ans=0.125 +2024-09-16 20:39:41,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=74440.0, ans=0.0 +2024-09-16 20:39:44,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=74480.0, ans=0.125 +2024-09-16 20:39:54,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=74480.0, ans=0.125 +2024-09-16 20:40:00,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=74520.0, ans=0.125 +2024-09-16 20:40:00,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=74520.0, ans=0.0 +2024-09-16 20:40:29,198 INFO [train.py:1198] (1/2) Epoch 5, batch 550, loss[loss=0.3057, ctc_loss=0.2367, cr_loss=0.4495, attn_decoder_loss=0.3034, over 28849.00 frames. ], tot_loss[loss=0.2981, ctc_loss=0.2289, cr_loss=0.437, attn_decoder_loss=0.2961, over 5423945.32 frames. ], batch size: 104, lr: 2.23e-02, grad_scale: 2.0 +2024-09-16 20:40:39,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.04 vs. limit=15.0 +2024-09-16 20:40:46,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=74640.0, ans=0.125 +2024-09-16 20:40:49,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=74640.0, ans=0.125 +2024-09-16 20:41:32,712 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.970e+01 1.190e+02 1.363e+02 1.590e+02 5.102e+02, threshold=2.726e+02, percent-clipped=4.0 +2024-09-16 20:41:36,189 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:41:44,817 INFO [train.py:1198] (1/2) Epoch 5, batch 600, loss[loss=0.3156, ctc_loss=0.2471, cr_loss=0.4535, attn_decoder_loss=0.3131, over 29238.00 frames. ], tot_loss[loss=0.2981, ctc_loss=0.2284, cr_loss=0.4369, attn_decoder_loss=0.2961, over 5510764.29 frames. ], batch size: 100, lr: 2.23e-02, grad_scale: 4.0 +2024-09-16 20:41:50,928 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.87 vs. limit=15.0 +2024-09-16 20:41:51,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=74800.0, ans=0.0 +2024-09-16 20:42:06,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=74840.0, ans=0.05 +2024-09-16 20:42:11,362 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:42:17,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.84 vs. limit=15.0 +2024-09-16 20:42:23,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=74880.0, ans=0.125 +2024-09-16 20:42:51,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=74960.0, ans=0.2 +2024-09-16 20:42:57,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=74960.0, ans=0.125 +2024-09-16 20:43:00,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=75000.0, ans=0.125 +2024-09-16 20:43:02,179 INFO [train.py:1198] (1/2) Epoch 5, batch 650, loss[loss=0.2998, ctc_loss=0.2315, cr_loss=0.4467, attn_decoder_loss=0.2975, over 29762.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2268, cr_loss=0.4347, attn_decoder_loss=0.2949, over 5588035.79 frames. ], batch size: 81, lr: 2.23e-02, grad_scale: 4.0 +2024-09-16 20:43:02,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=75000.0, ans=0.125 +2024-09-16 20:43:06,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=75000.0, ans=0.025 +2024-09-16 20:43:19,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=75040.0, ans=0.125 +2024-09-16 20:43:20,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=75040.0, ans=0.125 +2024-09-16 20:43:31,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=75040.0, ans=0.0 +2024-09-16 20:43:33,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=75080.0, ans=0.07 +2024-09-16 20:43:33,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=75080.0, ans=0.0 +2024-09-16 20:43:45,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=75080.0, ans=0.025 +2024-09-16 20:43:50,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=75120.0, ans=10.0 +2024-09-16 20:44:07,668 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.399e+01 1.145e+02 1.260e+02 1.468e+02 2.396e+02, threshold=2.520e+02, percent-clipped=0.0 +2024-09-16 20:44:13,135 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.96 vs. limit=15.0 +2024-09-16 20:44:20,245 INFO [train.py:1198] (1/2) Epoch 5, batch 700, loss[loss=0.2795, ctc_loss=0.2083, cr_loss=0.4069, attn_decoder_loss=0.2784, over 29537.00 frames. ], tot_loss[loss=0.2973, ctc_loss=0.2271, cr_loss=0.4351, attn_decoder_loss=0.2954, over 5637889.38 frames. ], batch size: 76, lr: 2.22e-02, grad_scale: 8.0 +2024-09-16 20:44:40,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.73 vs. limit=22.5 +2024-09-16 20:44:44,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=75240.0, ans=0.1 +2024-09-16 20:44:49,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=75280.0, ans=0.0 +2024-09-16 20:44:54,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=75280.0, ans=0.125 +2024-09-16 20:45:01,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=75280.0, ans=0.2 +2024-09-16 20:45:12,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=75320.0, ans=0.1 +2024-09-16 20:45:12,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.56 vs. limit=22.5 +2024-09-16 20:45:21,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=75360.0, ans=0.025 +2024-09-16 20:45:26,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=75360.0, ans=0.125 +2024-09-16 20:45:35,701 INFO [train.py:1198] (1/2) Epoch 5, batch 750, loss[loss=0.3009, ctc_loss=0.2267, cr_loss=0.423, attn_decoder_loss=0.2998, over 29711.00 frames. ], tot_loss[loss=0.297, ctc_loss=0.2269, cr_loss=0.4352, attn_decoder_loss=0.2952, over 5676273.37 frames. ], batch size: 82, lr: 2.22e-02, grad_scale: 4.0 +2024-09-16 20:45:43,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=75400.0, ans=0.015 +2024-09-16 20:45:55,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=6.08 vs. limit=12.0 +2024-09-16 20:46:00,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=75440.0, ans=0.1 +2024-09-16 20:46:06,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=75480.0, ans=0.2 +2024-09-16 20:46:21,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=75520.0, ans=0.1 +2024-09-16 20:46:26,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=75520.0, ans=0.0 +2024-09-16 20:46:27,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=75520.0, ans=0.125 +2024-09-16 20:46:42,563 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.394e+01 1.181e+02 1.291e+02 1.489e+02 2.242e+02, threshold=2.582e+02, percent-clipped=0.0 +2024-09-16 20:46:42,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=75560.0, ans=0.0 +2024-09-16 20:46:44,608 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:46:53,299 INFO [train.py:1198] (1/2) Epoch 5, batch 800, loss[loss=0.2793, ctc_loss=0.2089, cr_loss=0.4036, attn_decoder_loss=0.2781, over 29577.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2266, cr_loss=0.4353, attn_decoder_loss=0.2948, over 5707609.49 frames. ], batch size: 73, lr: 2.22e-02, grad_scale: 8.0 +2024-09-16 20:47:14,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=75640.0, ans=0.125 +2024-09-16 20:47:45,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=75720.0, ans=0.09899494936611666 +2024-09-16 20:48:10,225 INFO [train.py:1198] (1/2) Epoch 5, batch 850, loss[loss=0.3086, ctc_loss=0.2305, cr_loss=0.4548, attn_decoder_loss=0.3072, over 29716.00 frames. ], tot_loss[loss=0.2963, ctc_loss=0.2259, cr_loss=0.435, attn_decoder_loss=0.2944, over 5736531.19 frames. ], batch size: 89, lr: 2.22e-02, grad_scale: 4.0 +2024-09-16 20:48:18,019 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:48:30,409 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=6.52 vs. limit=12.0 +2024-09-16 20:48:32,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=75840.0, ans=0.04949747468305833 +2024-09-16 20:48:35,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=75840.0, ans=0.0 +2024-09-16 20:48:54,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.22 vs. limit=15.0 +2024-09-16 20:49:07,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=75920.0, ans=0.2 +2024-09-16 20:49:12,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=75960.0, ans=0.025 +2024-09-16 20:49:16,706 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.020e+02 1.208e+02 1.339e+02 1.559e+02 5.118e+02, threshold=2.679e+02, percent-clipped=4.0 +2024-09-16 20:49:17,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.25 vs. limit=6.0 +2024-09-16 20:49:24,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=76000.0, ans=0.125 +2024-09-16 20:49:25,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.10 vs. limit=10.0 +2024-09-16 20:49:26,159 INFO [train.py:1198] (1/2) Epoch 5, batch 900, loss[loss=0.2688, ctc_loss=0.1969, cr_loss=0.4223, attn_decoder_loss=0.2674, over 29601.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2266, cr_loss=0.4353, attn_decoder_loss=0.2949, over 5741510.24 frames. ], batch size: 73, lr: 2.21e-02, grad_scale: 8.0 +2024-09-16 20:49:52,619 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:50:04,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=76080.0, ans=0.0 +2024-09-16 20:50:26,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.54 vs. limit=15.0 +2024-09-16 20:50:31,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=76160.0, ans=0.125 +2024-09-16 20:50:33,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=76160.0, ans=0.1 +2024-09-16 20:50:40,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=76160.0, ans=0.0 +2024-09-16 20:50:43,185 INFO [train.py:1198] (1/2) Epoch 5, batch 950, loss[loss=0.2856, ctc_loss=0.2143, cr_loss=0.4221, attn_decoder_loss=0.2842, over 29497.00 frames. ], tot_loss[loss=0.2971, ctc_loss=0.227, cr_loss=0.4358, attn_decoder_loss=0.2952, over 5743342.75 frames. ], batch size: 74, lr: 2.21e-02, grad_scale: 4.0 +2024-09-16 20:50:47,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.93 vs. limit=12.0 +2024-09-16 20:51:12,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=76240.0, ans=0.125 +2024-09-16 20:51:49,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.whiten.whitening_limit, batch_count=76360.0, ans=12.0 +2024-09-16 20:51:52,953 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.976e+01 1.191e+02 1.361e+02 1.638e+02 5.772e+02, threshold=2.722e+02, percent-clipped=5.0 +2024-09-16 20:51:54,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=76360.0, ans=0.2 +2024-09-16 20:51:56,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=76360.0, ans=0.0 +2024-09-16 20:51:57,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=76360.0, ans=0.125 +2024-09-16 20:51:59,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=76400.0, ans=0.125 +2024-09-16 20:52:00,419 INFO [train.py:1198] (1/2) Epoch 5, batch 1000, loss[loss=0.2903, ctc_loss=0.2187, cr_loss=0.4357, attn_decoder_loss=0.2885, over 29499.00 frames. ], tot_loss[loss=0.2981, ctc_loss=0.228, cr_loss=0.4367, attn_decoder_loss=0.2962, over 5738279.96 frames. ], batch size: 77, lr: 2.21e-02, grad_scale: 8.0 +2024-09-16 20:52:01,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.19 vs. limit=15.0 +2024-09-16 20:52:58,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=76520.0, ans=0.125 +2024-09-16 20:53:15,781 INFO [train.py:1198] (1/2) Epoch 5, batch 1050, loss[loss=0.3053, ctc_loss=0.2297, cr_loss=0.4418, attn_decoder_loss=0.3039, over 29658.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2266, cr_loss=0.435, attn_decoder_loss=0.295, over 5745881.51 frames. ], batch size: 85, lr: 2.21e-02, grad_scale: 4.0 +2024-09-16 20:53:26,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=76600.0, ans=0.0 +2024-09-16 20:53:34,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.52 vs. limit=15.0 +2024-09-16 20:53:42,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=76640.0, ans=0.09899494936611666 +2024-09-16 20:53:57,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=76680.0, ans=0.125 +2024-09-16 20:54:21,794 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.61 vs. limit=15.0 +2024-09-16 20:54:27,137 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.620e+01 1.158e+02 1.276e+02 1.580e+02 2.597e+02, threshold=2.552e+02, percent-clipped=0.0 +2024-09-16 20:54:27,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=76760.0, ans=0.0 +2024-09-16 20:54:33,690 INFO [train.py:1198] (1/2) Epoch 5, batch 1100, loss[loss=0.288, ctc_loss=0.2203, cr_loss=0.4151, attn_decoder_loss=0.2863, over 29473.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.227, cr_loss=0.4356, attn_decoder_loss=0.2953, over 5758659.38 frames. ], batch size: 78, lr: 2.20e-02, grad_scale: 8.0 +2024-09-16 20:54:39,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=76800.0, ans=0.2 +2024-09-16 20:54:57,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=76840.0, ans=0.025 +2024-09-16 20:55:00,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=76840.0, ans=0.2 +2024-09-16 20:55:06,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=24.65 vs. limit=22.5 +2024-09-16 20:55:11,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=76880.0, ans=0.0 +2024-09-16 20:55:27,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=76920.0, ans=0.125 +2024-09-16 20:55:30,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=76920.0, ans=0.1 +2024-09-16 20:55:41,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=76960.0, ans=0.2 +2024-09-16 20:55:50,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.79 vs. limit=22.5 +2024-09-16 20:55:51,249 INFO [train.py:1198] (1/2) Epoch 5, batch 1150, loss[loss=0.2917, ctc_loss=0.2241, cr_loss=0.4525, attn_decoder_loss=0.2892, over 29464.00 frames. ], tot_loss[loss=0.2968, ctc_loss=0.2265, cr_loss=0.435, attn_decoder_loss=0.2949, over 5756476.11 frames. ], batch size: 78, lr: 2.20e-02, grad_scale: 4.0 +2024-09-16 20:56:12,698 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 20:56:21,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=77080.0, ans=0.125 +2024-09-16 20:56:22,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=77080.0, ans=0.0 +2024-09-16 20:56:29,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=77080.0, ans=0.1 +2024-09-16 20:56:45,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.76 vs. limit=22.5 +2024-09-16 20:56:50,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=77160.0, ans=0.07 +2024-09-16 20:57:02,552 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.643e+01 1.177e+02 1.305e+02 1.494e+02 2.713e+02, threshold=2.610e+02, percent-clipped=1.0 +2024-09-16 20:57:06,990 INFO [train.py:1198] (1/2) Epoch 5, batch 1200, loss[loss=0.2957, ctc_loss=0.2223, cr_loss=0.4383, attn_decoder_loss=0.2941, over 29651.00 frames. ], tot_loss[loss=0.2983, ctc_loss=0.2281, cr_loss=0.4364, attn_decoder_loss=0.2964, over 5749349.11 frames. ], batch size: 85, lr: 2.20e-02, grad_scale: 8.0 +2024-09-16 20:57:13,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=77200.0, ans=0.025 +2024-09-16 20:58:24,346 INFO [train.py:1198] (1/2) Epoch 5, batch 1250, loss[loss=0.3047, ctc_loss=0.2264, cr_loss=0.4517, attn_decoder_loss=0.3033, over 29531.00 frames. ], tot_loss[loss=0.2984, ctc_loss=0.2276, cr_loss=0.4372, attn_decoder_loss=0.2966, over 5776536.46 frames. ], batch size: 92, lr: 2.20e-02, grad_scale: 4.0 +2024-09-16 20:58:24,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=77400.0, ans=0.07 +2024-09-16 20:58:29,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=77400.0, ans=0.2 +2024-09-16 20:58:30,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=77400.0, ans=0.0 +2024-09-16 20:58:43,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=77440.0, ans=0.125 +2024-09-16 20:58:52,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=77440.0, ans=0.5 +2024-09-16 20:58:59,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=77480.0, ans=0.1 +2024-09-16 20:59:02,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=77480.0, ans=0.125 +2024-09-16 20:59:10,944 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.49 vs. limit=15.0 +2024-09-16 20:59:31,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=77560.0, ans=0.1 +2024-09-16 20:59:34,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=77560.0, ans=0.0 +2024-09-16 20:59:38,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.029e+02 1.175e+02 1.290e+02 1.503e+02 2.372e+02, threshold=2.579e+02, percent-clipped=0.0 +2024-09-16 20:59:42,115 INFO [train.py:1198] (1/2) Epoch 5, batch 1300, loss[loss=0.3097, ctc_loss=0.24, cr_loss=0.4231, attn_decoder_loss=0.308, over 28260.00 frames. ], tot_loss[loss=0.2972, ctc_loss=0.2262, cr_loss=0.4354, attn_decoder_loss=0.2954, over 5781267.43 frames. ], batch size: 111, lr: 2.19e-02, grad_scale: 8.0 +2024-09-16 20:59:45,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_ff3.min_abs, batch_count=77600.0, ans=0.2 +2024-09-16 20:59:47,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=77600.0, ans=0.0 +2024-09-16 20:59:52,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.34 vs. limit=15.0 +2024-09-16 21:00:07,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=77640.0, ans=0.125 +2024-09-16 21:00:27,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=77720.0, ans=0.125 +2024-09-16 21:00:27,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=77720.0, ans=0.0 +2024-09-16 21:00:36,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=77720.0, ans=0.2 +2024-09-16 21:00:36,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=77720.0, ans=0.125 +2024-09-16 21:00:39,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=77720.0, ans=0.1 +2024-09-16 21:00:53,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.24 vs. limit=15.0 +2024-09-16 21:00:57,173 INFO [train.py:1198] (1/2) Epoch 5, batch 1350, loss[loss=0.3049, ctc_loss=0.2266, cr_loss=0.463, attn_decoder_loss=0.3033, over 29764.00 frames. ], tot_loss[loss=0.2962, ctc_loss=0.2247, cr_loss=0.4343, attn_decoder_loss=0.2945, over 5796458.46 frames. ], batch size: 81, lr: 2.19e-02, grad_scale: 4.0 +2024-09-16 21:01:07,996 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:01:18,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.67 vs. limit=6.0 +2024-09-16 21:01:24,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=77840.0, ans=0.0 +2024-09-16 21:01:33,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.27 vs. limit=15.0 +2024-09-16 21:01:50,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=77920.0, ans=0.0 +2024-09-16 21:02:10,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=77960.0, ans=0.0 +2024-09-16 21:02:12,849 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.348e+01 1.157e+02 1.281e+02 1.429e+02 2.166e+02, threshold=2.563e+02, percent-clipped=0.0 +2024-09-16 21:02:14,435 INFO [train.py:1198] (1/2) Epoch 5, batch 1400, loss[loss=0.2703, ctc_loss=0.2072, cr_loss=0.424, attn_decoder_loss=0.2678, over 29593.00 frames. ], tot_loss[loss=0.2956, ctc_loss=0.2242, cr_loss=0.4337, attn_decoder_loss=0.2939, over 5807438.21 frames. ], batch size: 69, lr: 2.19e-02, grad_scale: 8.0 +2024-09-16 21:02:14,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=78000.0, ans=0.2 +2024-09-16 21:02:34,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=78040.0, ans=0.09899494936611666 +2024-09-16 21:02:58,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=78080.0, ans=0.2 +2024-09-16 21:03:01,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=78120.0, ans=0.2 +2024-09-16 21:03:03,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=78120.0, ans=0.125 +2024-09-16 21:03:09,348 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:03:10,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=78120.0, ans=0.125 +2024-09-16 21:03:26,136 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:03:31,877 INFO [train.py:1198] (1/2) Epoch 5, batch 1450, loss[loss=0.3091, ctc_loss=0.2328, cr_loss=0.47, attn_decoder_loss=0.3071, over 29442.00 frames. ], tot_loss[loss=0.2964, ctc_loss=0.2249, cr_loss=0.4358, attn_decoder_loss=0.2947, over 5804563.19 frames. ], batch size: 94, lr: 2.19e-02, grad_scale: 4.0 +2024-09-16 21:03:34,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=16.43 vs. limit=15.0 +2024-09-16 21:03:41,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=78200.0, ans=0.0 +2024-09-16 21:03:42,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=78200.0, ans=0.09899494936611666 +2024-09-16 21:03:51,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=78240.0, ans=0.125 +2024-09-16 21:04:21,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=78320.0, ans=0.125 +2024-09-16 21:04:47,756 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.066e+02 1.240e+02 1.382e+02 1.635e+02 6.361e+02, threshold=2.763e+02, percent-clipped=6.0 +2024-09-16 21:04:47,777 INFO [train.py:1198] (1/2) Epoch 5, batch 1500, loss[loss=0.3056, ctc_loss=0.2337, cr_loss=0.4392, attn_decoder_loss=0.3039, over 29633.00 frames. ], tot_loss[loss=0.297, ctc_loss=0.2253, cr_loss=0.4368, attn_decoder_loss=0.2952, over 5805142.05 frames. ], batch size: 86, lr: 2.18e-02, grad_scale: 8.0 +2024-09-16 21:04:48,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=78400.0, ans=0.125 +2024-09-16 21:04:51,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=78400.0, ans=0.125 +2024-09-16 21:05:35,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=78520.0, ans=0.125 +2024-09-16 21:05:35,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=78520.0, ans=0.125 +2024-09-16 21:05:36,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.00 vs. limit=15.0 +2024-09-16 21:05:39,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=9.78 vs. limit=15.0 +2024-09-16 21:05:41,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=78520.0, ans=0.125 +2024-09-16 21:06:05,488 INFO [train.py:1198] (1/2) Epoch 5, batch 1550, loss[loss=0.317, ctc_loss=0.2461, cr_loss=0.4816, attn_decoder_loss=0.3142, over 29498.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2252, cr_loss=0.4367, attn_decoder_loss=0.295, over 5780551.28 frames. ], batch size: 90, lr: 2.18e-02, grad_scale: 4.0 +2024-09-16 21:06:11,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=78600.0, ans=0.0 +2024-09-16 21:06:37,463 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:06:52,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=78720.0, ans=0.125 +2024-09-16 21:07:00,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=78720.0, ans=0.2 +2024-09-16 21:07:08,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.54 vs. limit=22.5 +2024-09-16 21:07:16,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=78760.0, ans=0.025 +2024-09-16 21:07:18,883 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.54 vs. limit=22.5 +2024-09-16 21:07:22,409 INFO [train.py:1198] (1/2) Epoch 5, batch 1600, loss[loss=0.3047, ctc_loss=0.2264, cr_loss=0.4508, attn_decoder_loss=0.3034, over 29679.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2254, cr_loss=0.4359, attn_decoder_loss=0.2949, over 5763835.71 frames. ], batch size: 85, lr: 2.18e-02, grad_scale: 8.0 +2024-09-16 21:07:23,867 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.848e+01 1.266e+02 1.474e+02 1.762e+02 4.006e+02, threshold=2.948e+02, percent-clipped=2.0 +2024-09-16 21:07:30,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=78800.0, ans=0.0 +2024-09-16 21:07:34,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=78800.0, ans=0.125 +2024-09-16 21:07:38,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.66 vs. limit=15.0 +2024-09-16 21:07:46,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=78840.0, ans=0.025 +2024-09-16 21:07:47,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.00 vs. limit=22.5 +2024-09-16 21:07:51,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=78880.0, ans=0.125 +2024-09-16 21:07:51,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=78880.0, ans=0.0 +2024-09-16 21:08:21,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=78960.0, ans=0.0 +2024-09-16 21:08:37,889 INFO [train.py:1198] (1/2) Epoch 5, batch 1650, loss[loss=0.3272, ctc_loss=0.2533, cr_loss=0.4703, attn_decoder_loss=0.3249, over 29728.00 frames. ], tot_loss[loss=0.2966, ctc_loss=0.2258, cr_loss=0.4357, attn_decoder_loss=0.2948, over 5758359.28 frames. ], batch size: 89, lr: 2.18e-02, grad_scale: 4.0 +2024-09-16 21:08:38,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.94 vs. limit=15.0 +2024-09-16 21:08:51,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=79040.0, ans=0.125 +2024-09-16 21:08:59,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=79040.0, ans=0.035 +2024-09-16 21:09:03,064 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=3.88 vs. limit=15.0 +2024-09-16 21:09:21,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=79120.0, ans=0.1 +2024-09-16 21:09:36,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=79120.0, ans=0.125 +2024-09-16 21:09:55,746 INFO [train.py:1198] (1/2) Epoch 5, batch 1700, loss[loss=0.2633, ctc_loss=0.1992, cr_loss=0.3834, attn_decoder_loss=0.2619, over 29597.00 frames. ], tot_loss[loss=0.2961, ctc_loss=0.2247, cr_loss=0.4351, attn_decoder_loss=0.2943, over 5780138.55 frames. ], batch size: 69, lr: 2.17e-02, grad_scale: 8.0 +2024-09-16 21:10:00,286 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.635e+01 1.159e+02 1.263e+02 1.450e+02 2.662e+02, threshold=2.527e+02, percent-clipped=0.0 +2024-09-16 21:10:11,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=79240.0, ans=0.1 +2024-09-16 21:10:14,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=79240.0, ans=0.035 +2024-09-16 21:10:14,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=79240.0, ans=0.125 +2024-09-16 21:10:25,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=79240.0, ans=0.125 +2024-09-16 21:10:44,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=79320.0, ans=0.0 +2024-09-16 21:11:07,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=79360.0, ans=0.125 +2024-09-16 21:11:12,986 INFO [train.py:1198] (1/2) Epoch 5, batch 1750, loss[loss=0.2665, ctc_loss=0.1991, cr_loss=0.398, attn_decoder_loss=0.2651, over 29354.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2242, cr_loss=0.4349, attn_decoder_loss=0.294, over 5788561.03 frames. ], batch size: 67, lr: 2.17e-02, grad_scale: 4.0 +2024-09-16 21:11:23,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=79400.0, ans=0.1 +2024-09-16 21:11:42,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=79480.0, ans=0.1 +2024-09-16 21:11:42,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=79480.0, ans=0.125 +2024-09-16 21:11:48,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=79480.0, ans=0.0 +2024-09-16 21:11:51,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=79480.0, ans=0.125 +2024-09-16 21:11:51,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=79480.0, ans=0.125 +2024-09-16 21:12:02,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.77 vs. limit=15.0 +2024-09-16 21:12:05,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.88 vs. limit=22.5 +2024-09-16 21:12:13,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.13 vs. limit=10.0 +2024-09-16 21:12:16,108 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=7.01 vs. limit=15.0 +2024-09-16 21:12:28,476 INFO [train.py:1198] (1/2) Epoch 5, batch 1800, loss[loss=0.2979, ctc_loss=0.22, cr_loss=0.4189, attn_decoder_loss=0.2973, over 29690.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.224, cr_loss=0.4348, attn_decoder_loss=0.294, over 5790852.43 frames. ], batch size: 83, lr: 2.17e-02, grad_scale: 8.0 +2024-09-16 21:12:34,617 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.637e+01 1.099e+02 1.224e+02 1.443e+02 2.616e+02, threshold=2.449e+02, percent-clipped=2.0 +2024-09-16 21:12:53,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.99 vs. limit=15.0 +2024-09-16 21:13:06,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=79680.0, ans=0.125 +2024-09-16 21:13:26,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=79720.0, ans=0.0 +2024-09-16 21:13:29,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=18.04 vs. limit=15.0 +2024-09-16 21:13:43,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=79760.0, ans=10.0 +2024-09-16 21:13:46,150 INFO [train.py:1198] (1/2) Epoch 5, batch 1850, loss[loss=0.2992, ctc_loss=0.2216, cr_loss=0.4125, attn_decoder_loss=0.2987, over 29631.00 frames. ], tot_loss[loss=0.2958, ctc_loss=0.2242, cr_loss=0.4355, attn_decoder_loss=0.2941, over 5794109.61 frames. ], batch size: 86, lr: 2.17e-02, grad_scale: 4.0 +2024-09-16 21:14:42,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=79920.0, ans=0.2 +2024-09-16 21:14:44,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.28 vs. limit=10.0 +2024-09-16 21:14:54,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=79960.0, ans=0.0 +2024-09-16 21:14:59,120 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.12 vs. limit=12.0 +2024-09-16 21:15:10,169 INFO [train.py:1198] (1/2) Epoch 5, batch 1900, loss[loss=0.3013, ctc_loss=0.2228, cr_loss=0.4309, attn_decoder_loss=0.3005, over 29732.00 frames. ], tot_loss[loss=0.2958, ctc_loss=0.2238, cr_loss=0.4351, attn_decoder_loss=0.2941, over 5803764.62 frames. ], batch size: 89, lr: 2.16e-02, grad_scale: 8.0 +2024-09-16 21:15:17,676 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.933e+01 1.145e+02 1.241e+02 1.387e+02 2.102e+02, threshold=2.481e+02, percent-clipped=0.0 +2024-09-16 21:15:17,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=80000.0, ans=0.125 +2024-09-16 21:15:27,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=80040.0, ans=0.1 +2024-09-16 21:15:39,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=80080.0, ans=0.125 +2024-09-16 21:15:58,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.85 vs. limit=15.0 +2024-09-16 21:16:03,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=80120.0, ans=0.0 +2024-09-16 21:16:26,173 INFO [train.py:1198] (1/2) Epoch 5, batch 1950, loss[loss=0.2956, ctc_loss=0.2204, cr_loss=0.4205, attn_decoder_loss=0.2946, over 29453.00 frames. ], tot_loss[loss=0.2974, ctc_loss=0.2252, cr_loss=0.4375, attn_decoder_loss=0.2957, over 5818778.34 frames. ], batch size: 78, lr: 2.16e-02, grad_scale: 4.0 +2024-09-16 21:16:31,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=80200.0, ans=0.125 +2024-09-16 21:16:40,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=80240.0, ans=0.125 +2024-09-16 21:16:56,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=80280.0, ans=0.1 +2024-09-16 21:17:04,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=80280.0, ans=0.125 +2024-09-16 21:17:16,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=80320.0, ans=0.125 +2024-09-16 21:17:27,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=80360.0, ans=0.025 +2024-09-16 21:17:28,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=80360.0, ans=0.0 +2024-09-16 21:17:38,482 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.39 vs. limit=6.0 +2024-09-16 21:17:40,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=80360.0, ans=0.0 +2024-09-16 21:17:42,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.83 vs. limit=22.5 +2024-09-16 21:17:43,615 INFO [train.py:1198] (1/2) Epoch 5, batch 2000, loss[loss=0.2621, ctc_loss=0.1984, cr_loss=0.4004, attn_decoder_loss=0.2603, over 29352.00 frames. ], tot_loss[loss=0.2981, ctc_loss=0.2263, cr_loss=0.438, attn_decoder_loss=0.2964, over 5797024.42 frames. ], batch size: 67, lr: 2.16e-02, grad_scale: 8.0 +2024-09-16 21:17:52,704 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.796e+01 1.236e+02 1.402e+02 1.608e+02 2.421e+02, threshold=2.804e+02, percent-clipped=0.0 +2024-09-16 21:17:53,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=80400.0, ans=0.0 +2024-09-16 21:18:29,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=80520.0, ans=0.0 +2024-09-16 21:18:35,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=80520.0, ans=0.0 +2024-09-16 21:18:40,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=80520.0, ans=0.1 +2024-09-16 21:18:55,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=80560.0, ans=0.125 +2024-09-16 21:18:58,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=80560.0, ans=0.125 +2024-09-16 21:19:01,084 INFO [train.py:1198] (1/2) Epoch 5, batch 2050, loss[loss=0.2629, ctc_loss=0.1894, cr_loss=0.3951, attn_decoder_loss=0.2623, over 29406.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.225, cr_loss=0.4363, attn_decoder_loss=0.295, over 5789017.75 frames. ], batch size: 70, lr: 2.16e-02, grad_scale: 4.0 +2024-09-16 21:19:04,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=80600.0, ans=0.0 +2024-09-16 21:19:30,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=80680.0, ans=0.125 +2024-09-16 21:19:43,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=80680.0, ans=0.0 +2024-09-16 21:19:55,079 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.17 vs. limit=22.5 +2024-09-16 21:19:59,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.42 vs. limit=15.0 +2024-09-16 21:20:00,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=80760.0, ans=0.2 +2024-09-16 21:20:07,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=80760.0, ans=0.0 +2024-09-16 21:20:13,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=80760.0, ans=0.125 +2024-09-16 21:20:15,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=80800.0, ans=0.125 +2024-09-16 21:20:17,202 INFO [train.py:1198] (1/2) Epoch 5, batch 2100, loss[loss=0.2949, ctc_loss=0.2235, cr_loss=0.4564, attn_decoder_loss=0.2927, over 29758.00 frames. ], tot_loss[loss=0.2955, ctc_loss=0.2236, cr_loss=0.435, attn_decoder_loss=0.2938, over 5801450.23 frames. ], batch size: 81, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:20:22,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=80800.0, ans=0.1 +2024-09-16 21:20:23,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=80800.0, ans=0.0 +2024-09-16 21:20:27,408 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.761e+01 1.220e+02 1.373e+02 1.548e+02 8.609e+02, threshold=2.746e+02, percent-clipped=3.0 +2024-09-16 21:20:55,688 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.12 vs. limit=12.0 +2024-09-16 21:21:14,925 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.99 vs. limit=22.5 +2024-09-16 21:21:31,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=80960.0, ans=0.125 +2024-09-16 21:21:34,120 INFO [train.py:1198] (1/2) Epoch 5, batch 2150, loss[loss=0.2808, ctc_loss=0.2137, cr_loss=0.4592, attn_decoder_loss=0.278, over 29432.00 frames. ], tot_loss[loss=0.2943, ctc_loss=0.2219, cr_loss=0.4343, attn_decoder_loss=0.2927, over 5816314.16 frames. ], batch size: 78, lr: 2.15e-02, grad_scale: 4.0 +2024-09-16 21:21:38,876 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:21:38,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=81000.0, ans=0.2 +2024-09-16 21:21:54,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=81040.0, ans=0.2 +2024-09-16 21:22:11,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=81080.0, ans=0.125 +2024-09-16 21:22:18,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=81080.0, ans=0.2 +2024-09-16 21:22:51,824 INFO [train.py:1198] (1/2) Epoch 5, batch 2200, loss[loss=0.2968, ctc_loss=0.2183, cr_loss=0.419, attn_decoder_loss=0.2962, over 29624.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2227, cr_loss=0.4354, attn_decoder_loss=0.2931, over 5812410.71 frames. ], batch size: 86, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:23:02,276 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.674e+01 1.183e+02 1.300e+02 1.517e+02 2.352e+02, threshold=2.600e+02, percent-clipped=0.0 +2024-09-16 21:23:08,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=81240.0, ans=0.0 +2024-09-16 21:23:09,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=81240.0, ans=0.125 +2024-09-16 21:23:17,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=81240.0, ans=0.0 +2024-09-16 21:23:21,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.99 vs. limit=15.0 +2024-09-16 21:23:43,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=81320.0, ans=0.1 +2024-09-16 21:23:50,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=81360.0, ans=0.04949747468305833 +2024-09-16 21:24:07,113 INFO [train.py:1198] (1/2) Epoch 5, batch 2250, loss[loss=0.2997, ctc_loss=0.2249, cr_loss=0.4246, attn_decoder_loss=0.2986, over 29712.00 frames. ], tot_loss[loss=0.2946, ctc_loss=0.2225, cr_loss=0.4346, attn_decoder_loss=0.2929, over 5811740.83 frames. ], batch size: 82, lr: 2.15e-02, grad_scale: 4.0 +2024-09-16 21:24:11,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.42 vs. limit=15.0 +2024-09-16 21:24:15,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.50 vs. limit=22.5 +2024-09-16 21:24:31,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=81440.0, ans=0.1 +2024-09-16 21:24:32,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=81440.0, ans=0.2 +2024-09-16 21:25:02,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=81520.0, ans=0.125 +2024-09-16 21:25:18,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.30 vs. limit=15.0 +2024-09-16 21:25:19,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=81560.0, ans=0.125 +2024-09-16 21:25:23,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=81600.0, ans=0.125 +2024-09-16 21:25:25,069 INFO [train.py:1198] (1/2) Epoch 5, batch 2300, loss[loss=0.2634, ctc_loss=0.189, cr_loss=0.3993, attn_decoder_loss=0.2628, over 29335.00 frames. ], tot_loss[loss=0.294, ctc_loss=0.2224, cr_loss=0.4339, attn_decoder_loss=0.2923, over 5799226.88 frames. ], batch size: 71, lr: 2.15e-02, grad_scale: 8.0 +2024-09-16 21:25:38,307 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.849e+01 1.191e+02 1.337e+02 1.602e+02 2.823e+02, threshold=2.675e+02, percent-clipped=3.0 +2024-09-16 21:25:46,302 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:26:42,309 INFO [train.py:1198] (1/2) Epoch 5, batch 2350, loss[loss=0.3109, ctc_loss=0.2432, cr_loss=0.4497, attn_decoder_loss=0.3084, over 29692.00 frames. ], tot_loss[loss=0.2944, ctc_loss=0.2228, cr_loss=0.4349, attn_decoder_loss=0.2927, over 5804218.41 frames. ], batch size: 83, lr: 2.14e-02, grad_scale: 4.0 +2024-09-16 21:26:55,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=81840.0, ans=0.0 +2024-09-16 21:27:01,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=81840.0, ans=0.0 +2024-09-16 21:27:06,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=81840.0, ans=0.025 +2024-09-16 21:27:21,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=81880.0, ans=0.07 +2024-09-16 21:27:44,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=81960.0, ans=0.125 +2024-09-16 21:27:47,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=81960.0, ans=0.0 +2024-09-16 21:27:52,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.79 vs. limit=15.0 +2024-09-16 21:27:58,015 INFO [train.py:1198] (1/2) Epoch 5, batch 2400, loss[loss=0.2844, ctc_loss=0.212, cr_loss=0.4188, attn_decoder_loss=0.2831, over 29539.00 frames. ], tot_loss[loss=0.2951, ctc_loss=0.2233, cr_loss=0.4352, attn_decoder_loss=0.2934, over 5807577.23 frames. ], batch size: 76, lr: 2.14e-02, grad_scale: 8.0 +2024-09-16 21:28:13,104 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.326e+01 1.225e+02 1.360e+02 1.581e+02 2.424e+02, threshold=2.721e+02, percent-clipped=0.0 +2024-09-16 21:28:13,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=82040.0, ans=0.035 +2024-09-16 21:28:14,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=82040.0, ans=0.125 +2024-09-16 21:28:33,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=14.29 vs. limit=15.0 +2024-09-16 21:28:39,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=82080.0, ans=0.0 +2024-09-16 21:29:07,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=82160.0, ans=15.0 +2024-09-16 21:29:09,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=82160.0, ans=0.125 +2024-09-16 21:29:11,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=82160.0, ans=0.0 +2024-09-16 21:29:14,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=82200.0, ans=0.95 +2024-09-16 21:29:15,781 INFO [train.py:1198] (1/2) Epoch 5, batch 2450, loss[loss=0.3016, ctc_loss=0.2262, cr_loss=0.4549, attn_decoder_loss=0.2998, over 29719.00 frames. ], tot_loss[loss=0.2962, ctc_loss=0.2242, cr_loss=0.4356, attn_decoder_loss=0.2945, over 5784884.97 frames. ], batch size: 82, lr: 2.14e-02, grad_scale: 4.0 +2024-09-16 21:29:17,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=82200.0, ans=0.0 +2024-09-16 21:29:45,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=82240.0, ans=0.125 +2024-09-16 21:29:49,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=7.15 vs. limit=15.0 +2024-09-16 21:29:52,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=82280.0, ans=0.0 +2024-09-16 21:30:33,938 INFO [train.py:1198] (1/2) Epoch 5, batch 2500, loss[loss=0.3037, ctc_loss=0.2261, cr_loss=0.4433, attn_decoder_loss=0.3025, over 29655.00 frames. ], tot_loss[loss=0.2958, ctc_loss=0.2238, cr_loss=0.436, attn_decoder_loss=0.2941, over 5795298.12 frames. ], batch size: 86, lr: 2.14e-02, grad_scale: 8.0 +2024-09-16 21:30:50,578 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.347e+01 1.183e+02 1.324e+02 1.493e+02 3.213e+02, threshold=2.647e+02, percent-clipped=2.0 +2024-09-16 21:30:59,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.04 vs. limit=22.5 +2024-09-16 21:31:20,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.59 vs. limit=15.0 +2024-09-16 21:31:49,664 INFO [train.py:1198] (1/2) Epoch 5, batch 2550, loss[loss=0.2695, ctc_loss=0.2029, cr_loss=0.3962, attn_decoder_loss=0.2681, over 29302.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2235, cr_loss=0.436, attn_decoder_loss=0.294, over 5798909.23 frames. ], batch size: 67, lr: 2.13e-02, grad_scale: 4.0 +2024-09-16 21:32:04,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=82640.0, ans=0.2 +2024-09-16 21:32:33,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=82720.0, ans=0.1 +2024-09-16 21:32:36,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=82720.0, ans=0.125 +2024-09-16 21:32:44,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=82720.0, ans=0.0 +2024-09-16 21:32:44,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.19 vs. limit=15.0 +2024-09-16 21:32:46,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=82720.0, ans=0.1 +2024-09-16 21:33:04,970 INFO [train.py:1198] (1/2) Epoch 5, batch 2600, loss[loss=0.2849, ctc_loss=0.2042, cr_loss=0.4234, attn_decoder_loss=0.2845, over 29433.00 frames. ], tot_loss[loss=0.2961, ctc_loss=0.2238, cr_loss=0.4364, attn_decoder_loss=0.2944, over 5794567.13 frames. ], batch size: 78, lr: 2.13e-02, grad_scale: 8.0 +2024-09-16 21:33:22,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=82840.0, ans=0.2 +2024-09-16 21:33:25,239 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.991e+01 1.177e+02 1.349e+02 1.549e+02 3.059e+02, threshold=2.698e+02, percent-clipped=1.0 +2024-09-16 21:33:25,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82840.0, ans=0.1 +2024-09-16 21:33:31,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82840.0, ans=0.1 +2024-09-16 21:33:37,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=82880.0, ans=0.0 +2024-09-16 21:33:47,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=82880.0, ans=0.125 +2024-09-16 21:33:53,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=82920.0, ans=0.5 +2024-09-16 21:34:03,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=82920.0, ans=0.1 +2024-09-16 21:34:05,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=82920.0, ans=0.1 +2024-09-16 21:34:14,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=82960.0, ans=0.1 +2024-09-16 21:34:24,540 INFO [train.py:1198] (1/2) Epoch 5, batch 2650, loss[loss=0.314, ctc_loss=0.2459, cr_loss=0.4779, attn_decoder_loss=0.311, over 29250.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2233, cr_loss=0.4363, attn_decoder_loss=0.2941, over 5801238.42 frames. ], batch size: 100, lr: 2.13e-02, grad_scale: 4.0 +2024-09-16 21:34:24,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=83000.0, ans=0.125 +2024-09-16 21:34:34,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.83 vs. limit=15.0 +2024-09-16 21:35:10,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.35 vs. limit=6.0 +2024-09-16 21:35:33,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.28 vs. limit=22.5 +2024-09-16 21:35:38,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=83200.0, ans=0.125 +2024-09-16 21:35:40,217 INFO [train.py:1198] (1/2) Epoch 5, batch 2700, loss[loss=0.3109, ctc_loss=0.2317, cr_loss=0.4501, attn_decoder_loss=0.3098, over 29550.00 frames. ], tot_loss[loss=0.2964, ctc_loss=0.2238, cr_loss=0.4377, attn_decoder_loss=0.2947, over 5796712.50 frames. ], batch size: 87, lr: 2.13e-02, grad_scale: 8.0 +2024-09-16 21:35:40,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=83200.0, ans=0.0 +2024-09-16 21:35:43,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=83200.0, ans=0.125 +2024-09-16 21:35:55,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=83240.0, ans=0.2 +2024-09-16 21:35:59,709 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.832e+01 1.218e+02 1.347e+02 1.527e+02 8.149e+02, threshold=2.695e+02, percent-clipped=3.0 +2024-09-16 21:36:41,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=83360.0, ans=0.2 +2024-09-16 21:36:45,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=83360.0, ans=0.0 +2024-09-16 21:36:56,079 INFO [train.py:1198] (1/2) Epoch 5, batch 2750, loss[loss=0.2851, ctc_loss=0.2179, cr_loss=0.4061, attn_decoder_loss=0.2836, over 29544.00 frames. ], tot_loss[loss=0.2947, ctc_loss=0.2224, cr_loss=0.4353, attn_decoder_loss=0.2931, over 5795131.23 frames. ], batch size: 75, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:37:24,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=83440.0, ans=0.035 +2024-09-16 21:37:34,038 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.69 vs. limit=15.0 +2024-09-16 21:37:48,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=83520.0, ans=0.125 +2024-09-16 21:37:50,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=83520.0, ans=0.0 +2024-09-16 21:37:53,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-16 21:37:57,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=83520.0, ans=0.125 +2024-09-16 21:38:02,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=83560.0, ans=0.2 +2024-09-16 21:38:03,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=83560.0, ans=0.0 +2024-09-16 21:38:15,623 INFO [train.py:1198] (1/2) Epoch 5, batch 2800, loss[loss=0.3413, ctc_loss=0.3019, cr_loss=0.4591, attn_decoder_loss=0.3355, over 20413.00 frames. ], tot_loss[loss=0.2951, ctc_loss=0.2228, cr_loss=0.4354, attn_decoder_loss=0.2934, over 5776059.39 frames. ], batch size: 210, lr: 2.12e-02, grad_scale: 8.0 +2024-09-16 21:38:16,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.34 vs. limit=15.0 +2024-09-16 21:38:26,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=83600.0, ans=0.125 +2024-09-16 21:38:30,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=83640.0, ans=0.0 +2024-09-16 21:38:36,673 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.137e+02 1.290e+02 1.487e+02 2.968e+02, threshold=2.580e+02, percent-clipped=1.0 +2024-09-16 21:38:41,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=83640.0, ans=0.0 +2024-09-16 21:38:46,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=83680.0, ans=0.125 +2024-09-16 21:38:46,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=83680.0, ans=0.0 +2024-09-16 21:38:49,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=83680.0, ans=0.09899494936611666 +2024-09-16 21:38:53,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=83680.0, ans=0.125 +2024-09-16 21:38:59,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=83720.0, ans=0.0 +2024-09-16 21:39:02,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=83720.0, ans=0.0 +2024-09-16 21:39:22,053 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:39:30,816 INFO [train.py:1198] (1/2) Epoch 5, batch 2850, loss[loss=0.2909, ctc_loss=0.2178, cr_loss=0.448, attn_decoder_loss=0.289, over 29506.00 frames. ], tot_loss[loss=0.2957, ctc_loss=0.2232, cr_loss=0.4355, attn_decoder_loss=0.2941, over 5762380.63 frames. ], batch size: 77, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:39:37,909 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.79 vs. limit=15.0 +2024-09-16 21:39:55,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=83840.0, ans=0.025 +2024-09-16 21:40:20,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.57 vs. limit=6.0 +2024-09-16 21:40:22,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=83920.0, ans=0.125 +2024-09-16 21:40:22,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=83920.0, ans=0.5 +2024-09-16 21:40:23,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.29 vs. limit=15.0 +2024-09-16 21:40:40,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=83960.0, ans=0.1 +2024-09-16 21:40:47,090 INFO [train.py:1198] (1/2) Epoch 5, batch 2900, loss[loss=0.2957, ctc_loss=0.2264, cr_loss=0.4551, attn_decoder_loss=0.2933, over 29441.00 frames. ], tot_loss[loss=0.2969, ctc_loss=0.2242, cr_loss=0.4369, attn_decoder_loss=0.2952, over 5788279.91 frames. ], batch size: 79, lr: 2.12e-02, grad_scale: 8.0 +2024-09-16 21:40:53,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=84000.0, ans=0.2 +2024-09-16 21:41:12,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=84040.0, ans=0.025 +2024-09-16 21:41:13,837 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.341e+01 1.106e+02 1.208e+02 1.366e+02 2.377e+02, threshold=2.415e+02, percent-clipped=0.0 +2024-09-16 21:41:14,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=84040.0, ans=0.05 +2024-09-16 21:41:20,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=84080.0, ans=0.125 +2024-09-16 21:41:44,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=84120.0, ans=0.2 +2024-09-16 21:41:47,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=84120.0, ans=0.0 +2024-09-16 21:41:49,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.03 vs. limit=10.0 +2024-09-16 21:41:50,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.93 vs. limit=15.0 +2024-09-16 21:41:57,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=84160.0, ans=0.95 +2024-09-16 21:42:06,519 INFO [train.py:1198] (1/2) Epoch 5, batch 2950, loss[loss=0.2773, ctc_loss=0.1999, cr_loss=0.4101, attn_decoder_loss=0.2768, over 29519.00 frames. ], tot_loss[loss=0.2952, ctc_loss=0.2226, cr_loss=0.4345, attn_decoder_loss=0.2937, over 5782344.26 frames. ], batch size: 75, lr: 2.12e-02, grad_scale: 4.0 +2024-09-16 21:42:11,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=84200.0, ans=0.1 +2024-09-16 21:42:40,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=84280.0, ans=0.2 +2024-09-16 21:42:45,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=15.79 vs. limit=15.0 +2024-09-16 21:42:47,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=84280.0, ans=0.125 +2024-09-16 21:42:51,592 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.17 vs. limit=22.5 +2024-09-16 21:42:52,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=84320.0, ans=0.025 +2024-09-16 21:42:59,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=84320.0, ans=0.125 +2024-09-16 21:43:02,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=84320.0, ans=0.125 +2024-09-16 21:43:22,231 INFO [train.py:1198] (1/2) Epoch 5, batch 3000, loss[loss=0.3006, ctc_loss=0.2313, cr_loss=0.4611, attn_decoder_loss=0.2981, over 29761.00 frames. ], tot_loss[loss=0.2948, ctc_loss=0.2221, cr_loss=0.4337, attn_decoder_loss=0.2932, over 5783036.95 frames. ], batch size: 81, lr: 2.11e-02, grad_scale: 8.0 +2024-09-16 21:43:22,231 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 21:43:33,918 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([2.6347, 3.7815, 2.7621, 2.9047, 2.3242, 3.2492, 3.8772, 3.6092], + device='cuda:1') +2024-09-16 21:43:40,543 INFO [train.py:1230] (1/2) Epoch 5, validation: loss=0.2221, ctc_loss=0.06863, cr_loss=4.342e-15, attn_decoder_loss=0.2392, over 944034.00 frames. +2024-09-16 21:43:40,544 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 21:43:49,162 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.32 vs. limit=10.0 +2024-09-16 21:43:56,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.45 vs. limit=6.0 +2024-09-16 21:44:03,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=84440.0, ans=0.0 +2024-09-16 21:44:04,653 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.812e+01 1.181e+02 1.340e+02 1.602e+02 4.120e+02, threshold=2.680e+02, percent-clipped=4.0 +2024-09-16 21:44:09,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=84480.0, ans=0.0 +2024-09-16 21:44:24,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=84520.0, ans=0.2 +2024-09-16 21:44:35,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.40 vs. limit=12.0 +2024-09-16 21:44:49,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=84560.0, ans=0.2 +2024-09-16 21:45:00,282 INFO [train.py:1198] (1/2) Epoch 5, batch 3050, loss[loss=0.2966, ctc_loss=0.2274, cr_loss=0.4636, attn_decoder_loss=0.294, over 29519.00 frames. ], tot_loss[loss=0.2949, ctc_loss=0.2221, cr_loss=0.4343, attn_decoder_loss=0.2934, over 5777056.19 frames. ], batch size: 76, lr: 2.11e-02, grad_scale: 4.0 +2024-09-16 21:45:05,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=84600.0, ans=0.2 +2024-09-16 21:45:07,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=12.0 +2024-09-16 21:45:20,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.14 vs. limit=15.0 +2024-09-16 21:45:23,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.80 vs. limit=10.0 +2024-09-16 21:45:26,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=84640.0, ans=0.125 +2024-09-16 21:45:35,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=84680.0, ans=0.125 +2024-09-16 21:45:45,189 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=23.36 vs. limit=22.5 +2024-09-16 21:45:47,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=84720.0, ans=0.125 +2024-09-16 21:45:56,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.53 vs. limit=15.0 +2024-09-16 21:46:01,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=84760.0, ans=0.0 +2024-09-16 21:46:02,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=84760.0, ans=0.0 +2024-09-16 21:46:11,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=84760.0, ans=0.05 +2024-09-16 21:46:16,180 INFO [train.py:1198] (1/2) Epoch 5, batch 3100, loss[loss=0.2985, ctc_loss=0.2212, cr_loss=0.4211, attn_decoder_loss=0.2977, over 29255.00 frames. ], tot_loss[loss=0.2942, ctc_loss=0.2211, cr_loss=0.4332, attn_decoder_loss=0.2927, over 5777239.34 frames. ], batch size: 100, lr: 2.11e-02, grad_scale: 8.0 +2024-09-16 21:46:35,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=84840.0, ans=0.1 +2024-09-16 21:46:41,655 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.874e+01 1.199e+02 1.306e+02 1.594e+02 3.534e+02, threshold=2.612e+02, percent-clipped=1.0 +2024-09-16 21:46:44,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=84880.0, ans=0.125 +2024-09-16 21:46:48,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.45 vs. limit=6.0 +2024-09-16 21:47:07,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=84920.0, ans=0.125 +2024-09-16 21:47:31,528 INFO [train.py:1198] (1/2) Epoch 5, batch 3150, loss[loss=0.3178, ctc_loss=0.2435, cr_loss=0.4778, attn_decoder_loss=0.3155, over 28841.00 frames. ], tot_loss[loss=0.294, ctc_loss=0.2206, cr_loss=0.4336, attn_decoder_loss=0.2925, over 5783936.66 frames. ], batch size: 104, lr: 2.11e-02, grad_scale: 4.0 +2024-09-16 21:47:34,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.57 vs. limit=15.0 +2024-09-16 21:47:34,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=85000.0, ans=0.0 +2024-09-16 21:47:51,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=85040.0, ans=0.0 +2024-09-16 21:47:57,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=85040.0, ans=0.2 +2024-09-16 21:48:13,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=85080.0, ans=0.0 +2024-09-16 21:48:13,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=85080.0, ans=0.2 +2024-09-16 21:48:18,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=85120.0, ans=0.0 +2024-09-16 21:48:24,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=85120.0, ans=0.0 +2024-09-16 21:48:28,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=85120.0, ans=0.125 +2024-09-16 21:48:45,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=85160.0, ans=0.95 +2024-09-16 21:48:50,891 INFO [train.py:1198] (1/2) Epoch 5, batch 3200, loss[loss=0.277, ctc_loss=0.1958, cr_loss=0.4129, attn_decoder_loss=0.2768, over 29782.00 frames. ], tot_loss[loss=0.2931, ctc_loss=0.2196, cr_loss=0.4329, attn_decoder_loss=0.2916, over 5794225.78 frames. ], batch size: 80, lr: 2.10e-02, grad_scale: 8.0 +2024-09-16 21:49:09,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=85240.0, ans=22.5 +2024-09-16 21:49:18,344 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.158e+01 1.087e+02 1.227e+02 1.343e+02 2.511e+02, threshold=2.453e+02, percent-clipped=0.0 +2024-09-16 21:49:18,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=85240.0, ans=0.125 +2024-09-16 21:49:20,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=85280.0, ans=0.0 +2024-09-16 21:49:27,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=85280.0, ans=0.2 +2024-09-16 21:49:53,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=85360.0, ans=0.125 +2024-09-16 21:50:07,123 INFO [train.py:1198] (1/2) Epoch 5, batch 3250, loss[loss=0.3012, ctc_loss=0.2351, cr_loss=0.4185, attn_decoder_loss=0.2993, over 29700.00 frames. ], tot_loss[loss=0.2937, ctc_loss=0.2199, cr_loss=0.434, attn_decoder_loss=0.2922, over 5800014.28 frames. ], batch size: 84, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:51:22,828 INFO [train.py:1198] (1/2) Epoch 5, batch 3300, loss[loss=0.3163, ctc_loss=0.2485, cr_loss=0.4695, attn_decoder_loss=0.3133, over 28276.00 frames. ], tot_loss[loss=0.2925, ctc_loss=0.2193, cr_loss=0.4328, attn_decoder_loss=0.2911, over 5798095.83 frames. ], batch size: 111, lr: 2.10e-02, grad_scale: 8.0 +2024-09-16 21:51:33,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=85600.0, ans=0.04949747468305833 +2024-09-16 21:51:36,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=85640.0, ans=0.125 +2024-09-16 21:51:51,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.589e+01 1.170e+02 1.337e+02 1.496e+02 4.068e+02, threshold=2.673e+02, percent-clipped=4.0 +2024-09-16 21:51:54,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.19 vs. limit=8.0 +2024-09-16 21:52:17,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=85720.0, ans=0.125 +2024-09-16 21:52:40,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.49 vs. limit=15.0 +2024-09-16 21:52:42,494 INFO [train.py:1198] (1/2) Epoch 5, batch 3350, loss[loss=0.3172, ctc_loss=0.2472, cr_loss=0.476, attn_decoder_loss=0.3144, over 28768.00 frames. ], tot_loss[loss=0.2938, ctc_loss=0.2209, cr_loss=0.4345, attn_decoder_loss=0.2923, over 5774814.93 frames. ], batch size: 104, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:52:43,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.08 vs. limit=15.0 +2024-09-16 21:52:56,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=85840.0, ans=0.125 +2024-09-16 21:52:56,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=85840.0, ans=0.125 +2024-09-16 21:53:01,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=85840.0, ans=0.1 +2024-09-16 21:53:06,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=85840.0, ans=0.125 +2024-09-16 21:53:13,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=85880.0, ans=0.0 +2024-09-16 21:53:30,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=85920.0, ans=0.0 +2024-09-16 21:53:35,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=85920.0, ans=0.05 +2024-09-16 21:53:50,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=85960.0, ans=0.125 +2024-09-16 21:53:58,049 INFO [train.py:1198] (1/2) Epoch 5, batch 3400, loss[loss=0.2598, ctc_loss=0.191, cr_loss=0.3909, attn_decoder_loss=0.2588, over 29350.00 frames. ], tot_loss[loss=0.2936, ctc_loss=0.2209, cr_loss=0.4331, attn_decoder_loss=0.292, over 5767598.84 frames. ], batch size: 67, lr: 2.10e-02, grad_scale: 4.0 +2024-09-16 21:53:58,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=86000.0, ans=0.125 +2024-09-16 21:54:13,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=86040.0, ans=0.2 +2024-09-16 21:54:16,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=86040.0, ans=0.125 +2024-09-16 21:54:28,081 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.899e+01 1.163e+02 1.316e+02 1.513e+02 4.040e+02, threshold=2.631e+02, percent-clipped=2.0 +2024-09-16 21:54:43,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=86120.0, ans=0.125 +2024-09-16 21:54:50,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.84 vs. limit=15.0 +2024-09-16 21:54:55,568 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:55:04,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=86160.0, ans=0.125 +2024-09-16 21:55:13,312 INFO [train.py:1198] (1/2) Epoch 5, batch 3450, loss[loss=0.3123, ctc_loss=0.2416, cr_loss=0.4461, attn_decoder_loss=0.3102, over 28215.00 frames. ], tot_loss[loss=0.2944, ctc_loss=0.2213, cr_loss=0.4348, attn_decoder_loss=0.2929, over 5775605.88 frames. ], batch size: 111, lr: 2.09e-02, grad_scale: 4.0 +2024-09-16 21:55:13,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=86200.0, ans=0.09899494936611666 +2024-09-16 21:55:25,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=86200.0, ans=0.0 +2024-09-16 21:55:28,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=86240.0, ans=0.025 +2024-09-16 21:55:40,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=86240.0, ans=0.0 +2024-09-16 21:55:45,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=86280.0, ans=0.125 +2024-09-16 21:56:00,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=86320.0, ans=0.0 +2024-09-16 21:56:10,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=86320.0, ans=0.1 +2024-09-16 21:56:19,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:56:28,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.39 vs. limit=22.5 +2024-09-16 21:56:33,259 INFO [train.py:1198] (1/2) Epoch 5, batch 3500, loss[loss=0.2594, ctc_loss=0.1913, cr_loss=0.3963, attn_decoder_loss=0.2582, over 29318.00 frames. ], tot_loss[loss=0.2938, ctc_loss=0.2208, cr_loss=0.4344, attn_decoder_loss=0.2922, over 5776725.97 frames. ], batch size: 71, lr: 2.09e-02, grad_scale: 8.0 +2024-09-16 21:56:37,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.61 vs. limit=12.0 +2024-09-16 21:56:42,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=86400.0, ans=0.125 +2024-09-16 21:56:48,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=86440.0, ans=0.1 +2024-09-16 21:56:51,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=86440.0, ans=0.2 +2024-09-16 21:57:04,583 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.011e+02 1.247e+02 1.357e+02 1.561e+02 2.944e+02, threshold=2.714e+02, percent-clipped=1.0 +2024-09-16 21:57:21,433 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:57:27,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=86520.0, ans=0.07 +2024-09-16 21:57:30,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=86520.0, ans=0.035 +2024-09-16 21:57:40,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=86560.0, ans=0.125 +2024-09-16 21:57:47,998 INFO [train.py:1198] (1/2) Epoch 5, batch 3550, loss[loss=0.2987, ctc_loss=0.2188, cr_loss=0.4214, attn_decoder_loss=0.2982, over 29680.00 frames. ], tot_loss[loss=0.294, ctc_loss=0.221, cr_loss=0.4347, attn_decoder_loss=0.2924, over 5782768.49 frames. ], batch size: 89, lr: 2.09e-02, grad_scale: 4.0 +2024-09-16 21:57:48,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=86600.0, ans=0.2 +2024-09-16 21:58:04,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=86640.0, ans=0.2 +2024-09-16 21:58:13,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=86640.0, ans=0.125 +2024-09-16 21:58:23,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=86680.0, ans=0.1 +2024-09-16 21:58:48,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.70 vs. limit=15.0 +2024-09-16 21:59:02,138 INFO [train.py:1198] (1/2) Epoch 5, batch 3600, loss[loss=0.2845, ctc_loss=0.2073, cr_loss=0.3966, attn_decoder_loss=0.2842, over 29498.00 frames. ], tot_loss[loss=0.2937, ctc_loss=0.2204, cr_loss=0.4347, attn_decoder_loss=0.2922, over 5792168.95 frames. ], batch size: 77, lr: 2.09e-02, grad_scale: 8.0 +2024-09-16 21:59:03,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=86800.0, ans=0.2 +2024-09-16 21:59:14,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=86800.0, ans=0.125 +2024-09-16 21:59:20,408 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 21:59:20,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.25 vs. limit=6.0 +2024-09-16 21:59:22,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.65 vs. limit=6.0 +2024-09-16 21:59:27,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=86840.0, ans=0.0 +2024-09-16 21:59:34,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.246e+01 1.105e+02 1.213e+02 1.386e+02 4.333e+02, threshold=2.426e+02, percent-clipped=4.0 +2024-09-16 21:59:39,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=86880.0, ans=0.0 +2024-09-16 21:59:55,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=86920.0, ans=0.125 +2024-09-16 22:00:10,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=86960.0, ans=0.125 +2024-09-16 22:00:16,127 INFO [train.py:1198] (1/2) Epoch 5, batch 3650, loss[loss=0.3187, ctc_loss=0.2546, cr_loss=0.4699, attn_decoder_loss=0.3154, over 29491.00 frames. ], tot_loss[loss=0.2927, ctc_loss=0.2194, cr_loss=0.4327, attn_decoder_loss=0.2913, over 5794262.29 frames. ], batch size: 90, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:00:20,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=87000.0, ans=0.0 +2024-09-16 22:00:25,252 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:00:29,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=87040.0, ans=0.0 +2024-09-16 22:00:32,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=87040.0, ans=0.125 +2024-09-16 22:00:35,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87040.0, ans=0.1 +2024-09-16 22:00:41,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=87040.0, ans=0.0 +2024-09-16 22:00:44,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=87080.0, ans=0.125 +2024-09-16 22:00:58,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=87080.0, ans=0.0 +2024-09-16 22:01:13,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=87120.0, ans=0.0 +2024-09-16 22:01:22,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.68 vs. limit=15.0 +2024-09-16 22:01:31,076 INFO [train.py:1198] (1/2) Epoch 5, batch 3700, loss[loss=0.3102, ctc_loss=0.2431, cr_loss=0.4677, attn_decoder_loss=0.3072, over 29714.00 frames. ], tot_loss[loss=0.2926, ctc_loss=0.2189, cr_loss=0.4326, attn_decoder_loss=0.2912, over 5804513.19 frames. ], batch size: 84, lr: 2.08e-02, grad_scale: 8.0 +2024-09-16 22:01:40,877 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.70 vs. limit=22.5 +2024-09-16 22:02:02,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=87280.0, ans=0.125 +2024-09-16 22:02:05,209 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.891e+01 1.136e+02 1.234e+02 1.353e+02 4.194e+02, threshold=2.467e+02, percent-clipped=4.0 +2024-09-16 22:02:08,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=87280.0, ans=0.125 +2024-09-16 22:02:08,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=15.46 vs. limit=15.0 +2024-09-16 22:02:25,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.24 vs. limit=15.0 +2024-09-16 22:02:32,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=87360.0, ans=0.0 +2024-09-16 22:02:47,086 INFO [train.py:1198] (1/2) Epoch 5, batch 3750, loss[loss=0.2518, ctc_loss=0.1792, cr_loss=0.3556, attn_decoder_loss=0.252, over 29358.00 frames. ], tot_loss[loss=0.2926, ctc_loss=0.2192, cr_loss=0.4327, attn_decoder_loss=0.2911, over 5807861.32 frames. ], batch size: 67, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:02:47,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=87400.0, ans=0.125 +2024-09-16 22:02:57,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=87400.0, ans=0.125 +2024-09-16 22:03:02,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=87440.0, ans=0.125 +2024-09-16 22:03:24,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=87480.0, ans=0.0 +2024-09-16 22:03:41,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.58 vs. limit=10.0 +2024-09-16 22:03:42,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.15 vs. limit=22.5 +2024-09-16 22:04:01,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=87600.0, ans=0.125 +2024-09-16 22:04:02,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.82 vs. limit=6.0 +2024-09-16 22:04:02,943 INFO [train.py:1198] (1/2) Epoch 5, batch 3800, loss[loss=0.3144, ctc_loss=0.2359, cr_loss=0.4659, attn_decoder_loss=0.3128, over 29642.00 frames. ], tot_loss[loss=0.2924, ctc_loss=0.2192, cr_loss=0.4323, attn_decoder_loss=0.2909, over 5798241.71 frames. ], batch size: 86, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:04:12,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87600.0, ans=0.1 +2024-09-16 22:04:28,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=87640.0, ans=0.5 +2024-09-16 22:04:33,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=87680.0, ans=0.125 +2024-09-16 22:04:37,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=87680.0, ans=0.1 +2024-09-16 22:04:38,691 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.009e+02 1.217e+02 1.354e+02 1.572e+02 4.220e+02, threshold=2.708e+02, percent-clipped=3.0 +2024-09-16 22:04:53,609 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:05:11,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=87760.0, ans=0.125 +2024-09-16 22:05:13,507 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.93 vs. limit=22.5 +2024-09-16 22:05:17,042 INFO [train.py:1198] (1/2) Epoch 5, batch 3850, loss[loss=0.3113, ctc_loss=0.2328, cr_loss=0.4569, attn_decoder_loss=0.3099, over 29246.00 frames. ], tot_loss[loss=0.2919, ctc_loss=0.2183, cr_loss=0.4316, attn_decoder_loss=0.2905, over 5812945.15 frames. ], batch size: 100, lr: 2.08e-02, grad_scale: 4.0 +2024-09-16 22:05:17,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=87800.0, ans=0.025 +2024-09-16 22:05:38,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=87840.0, ans=0.125 +2024-09-16 22:05:49,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.00 vs. limit=22.5 +2024-09-16 22:06:07,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=87920.0, ans=0.125 +2024-09-16 22:06:31,870 INFO [train.py:1198] (1/2) Epoch 5, batch 3900, loss[loss=0.303, ctc_loss=0.2125, cr_loss=0.4363, attn_decoder_loss=0.3033, over 29643.00 frames. ], tot_loss[loss=0.2927, ctc_loss=0.2186, cr_loss=0.4327, attn_decoder_loss=0.2913, over 5816267.64 frames. ], batch size: 86, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:06:33,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=88000.0, ans=0.0 +2024-09-16 22:06:33,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=88000.0, ans=0.125 +2024-09-16 22:06:49,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=88040.0, ans=0.2 +2024-09-16 22:06:52,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=88040.0, ans=0.0 +2024-09-16 22:07:03,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=88080.0, ans=0.0 +2024-09-16 22:07:08,814 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.846e+01 1.141e+02 1.281e+02 1.435e+02 2.843e+02, threshold=2.562e+02, percent-clipped=1.0 +2024-09-16 22:07:19,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=88120.0, ans=0.2 +2024-09-16 22:07:30,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=22.12 vs. limit=22.5 +2024-09-16 22:07:31,660 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.43 vs. limit=15.0 +2024-09-16 22:07:35,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=88160.0, ans=0.125 +2024-09-16 22:07:47,174 INFO [train.py:1198] (1/2) Epoch 5, batch 3950, loss[loss=0.3013, ctc_loss=0.2224, cr_loss=0.4371, attn_decoder_loss=0.3003, over 29511.00 frames. ], tot_loss[loss=0.2923, ctc_loss=0.2176, cr_loss=0.4325, attn_decoder_loss=0.291, over 5835706.89 frames. ], batch size: 97, lr: 2.07e-02, grad_scale: 4.0 +2024-09-16 22:07:56,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=88200.0, ans=0.125 +2024-09-16 22:08:06,690 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:08:12,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.34 vs. limit=15.0 +2024-09-16 22:08:25,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=88280.0, ans=0.125 +2024-09-16 22:08:28,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=88280.0, ans=0.0 +2024-09-16 22:08:29,113 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.77 vs. limit=15.0 +2024-09-16 22:08:40,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=88320.0, ans=0.025 +2024-09-16 22:08:47,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=88360.0, ans=0.2 +2024-09-16 22:09:02,280 INFO [train.py:1198] (1/2) Epoch 5, batch 4000, loss[loss=0.2737, ctc_loss=0.1969, cr_loss=0.4087, attn_decoder_loss=0.2732, over 29504.00 frames. ], tot_loss[loss=0.2928, ctc_loss=0.2186, cr_loss=0.4329, attn_decoder_loss=0.2914, over 5812322.81 frames. ], batch size: 74, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:09:22,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=88440.0, ans=0.125 +2024-09-16 22:09:24,399 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:09:40,467 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.385e+01 1.172e+02 1.271e+02 1.397e+02 4.120e+02, threshold=2.542e+02, percent-clipped=3.0 +2024-09-16 22:09:50,259 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.51 vs. limit=6.0 +2024-09-16 22:10:04,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=88560.0, ans=0.0 +2024-09-16 22:10:16,072 INFO [train.py:1198] (1/2) Epoch 5, batch 4050, loss[loss=0.3427, ctc_loss=0.3141, cr_loss=0.4686, attn_decoder_loss=0.3355, over 20403.00 frames. ], tot_loss[loss=0.2924, ctc_loss=0.2185, cr_loss=0.4324, attn_decoder_loss=0.291, over 5796004.76 frames. ], batch size: 209, lr: 2.07e-02, grad_scale: 4.0 +2024-09-16 22:10:16,735 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.88 vs. limit=6.0 +2024-09-16 22:10:26,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=88600.0, ans=0.125 +2024-09-16 22:10:49,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=88680.0, ans=0.125 +2024-09-16 22:11:09,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=88720.0, ans=0.125 +2024-09-16 22:11:17,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=88760.0, ans=0.1 +2024-09-16 22:11:19,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=88760.0, ans=0.04949747468305833 +2024-09-16 22:11:30,103 INFO [train.py:1198] (1/2) Epoch 5, batch 4100, loss[loss=0.3125, ctc_loss=0.2467, cr_loss=0.4484, attn_decoder_loss=0.3098, over 29528.00 frames. ], tot_loss[loss=0.2928, ctc_loss=0.219, cr_loss=0.4334, attn_decoder_loss=0.2914, over 5791712.97 frames. ], batch size: 90, lr: 2.07e-02, grad_scale: 8.0 +2024-09-16 22:11:30,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=88800.0, ans=0.125 +2024-09-16 22:11:41,162 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.43 vs. limit=15.0 +2024-09-16 22:12:11,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.506e+01 1.179e+02 1.301e+02 1.533e+02 3.400e+02, threshold=2.603e+02, percent-clipped=2.0 +2024-09-16 22:12:20,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=88920.0, ans=0.0 +2024-09-16 22:12:21,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.80 vs. limit=15.0 +2024-09-16 22:12:29,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=88960.0, ans=0.0 +2024-09-16 22:12:36,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=88960.0, ans=0.2 +2024-09-16 22:12:45,302 INFO [train.py:1198] (1/2) Epoch 5, batch 4150, loss[loss=0.2871, ctc_loss=0.2042, cr_loss=0.4154, attn_decoder_loss=0.2871, over 29518.00 frames. ], tot_loss[loss=0.2922, ctc_loss=0.2182, cr_loss=0.4326, attn_decoder_loss=0.2908, over 5797954.37 frames. ], batch size: 77, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:13:04,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=89040.0, ans=0.125 +2024-09-16 22:13:20,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=89080.0, ans=0.0 +2024-09-16 22:13:30,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=89120.0, ans=0.125 +2024-09-16 22:13:40,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.81 vs. limit=10.0 +2024-09-16 22:13:59,802 INFO [train.py:1198] (1/2) Epoch 5, batch 4200, loss[loss=0.3172, ctc_loss=0.2453, cr_loss=0.4635, attn_decoder_loss=0.3149, over 29537.00 frames. ], tot_loss[loss=0.2929, ctc_loss=0.2188, cr_loss=0.4331, attn_decoder_loss=0.2915, over 5798692.67 frames. ], batch size: 90, lr: 2.06e-02, grad_scale: 8.0 +2024-09-16 22:14:00,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.14 vs. limit=22.5 +2024-09-16 22:14:02,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.27 vs. limit=15.0 +2024-09-16 22:14:03,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=89200.0, ans=0.125 +2024-09-16 22:14:10,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=89200.0, ans=0.125 +2024-09-16 22:14:17,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=89240.0, ans=0.125 +2024-09-16 22:14:22,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.98 vs. limit=15.0 +2024-09-16 22:14:23,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=89240.0, ans=0.125 +2024-09-16 22:14:41,004 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.772e+01 1.118e+02 1.246e+02 1.404e+02 2.463e+02, threshold=2.492e+02, percent-clipped=0.0 +2024-09-16 22:14:41,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=89280.0, ans=0.0 +2024-09-16 22:14:42,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=89320.0, ans=0.1 +2024-09-16 22:14:46,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-16 22:15:12,898 INFO [train.py:1198] (1/2) Epoch 5, batch 4250, loss[loss=0.2734, ctc_loss=0.1983, cr_loss=0.3865, attn_decoder_loss=0.2731, over 29506.00 frames. ], tot_loss[loss=0.2933, ctc_loss=0.219, cr_loss=0.4327, attn_decoder_loss=0.2919, over 5805055.94 frames. ], batch size: 74, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:15:26,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=89440.0, ans=0.125 +2024-09-16 22:15:49,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=89480.0, ans=0.125 +2024-09-16 22:15:58,603 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.31 vs. limit=12.0 +2024-09-16 22:16:27,380 INFO [train.py:1198] (1/2) Epoch 5, batch 4300, loss[loss=0.3038, ctc_loss=0.22, cr_loss=0.4442, attn_decoder_loss=0.3033, over 29536.00 frames. ], tot_loss[loss=0.2935, ctc_loss=0.2191, cr_loss=0.4335, attn_decoder_loss=0.2921, over 5795323.37 frames. ], batch size: 87, lr: 2.06e-02, grad_scale: 8.0 +2024-09-16 22:16:47,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=89640.0, ans=0.07 +2024-09-16 22:16:53,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=89640.0, ans=0.2 +2024-09-16 22:16:55,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=89640.0, ans=0.07 +2024-09-16 22:17:05,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=89680.0, ans=0.2 +2024-09-16 22:17:11,160 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.609e+01 1.163e+02 1.276e+02 1.524e+02 3.260e+02, threshold=2.552e+02, percent-clipped=3.0 +2024-09-16 22:17:33,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=89760.0, ans=0.125 +2024-09-16 22:17:42,346 INFO [train.py:1198] (1/2) Epoch 5, batch 4350, loss[loss=0.3089, ctc_loss=0.2372, cr_loss=0.4265, attn_decoder_loss=0.3074, over 29498.00 frames. ], tot_loss[loss=0.2971, ctc_loss=0.2222, cr_loss=0.4388, attn_decoder_loss=0.2957, over 5798219.70 frames. ], batch size: 97, lr: 2.06e-02, grad_scale: 4.0 +2024-09-16 22:17:43,342 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.15 vs. limit=15.0 +2024-09-16 22:17:44,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=89800.0, ans=0.025 +2024-09-16 22:18:04,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.02 vs. limit=15.0 +2024-09-16 22:18:13,630 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:18:16,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=89880.0, ans=0.125 +2024-09-16 22:18:19,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=89880.0, ans=0.0 +2024-09-16 22:18:23,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=89880.0, ans=0.0 +2024-09-16 22:18:42,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=89960.0, ans=0.125 +2024-09-16 22:18:52,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=89960.0, ans=0.0 +2024-09-16 22:18:56,339 INFO [train.py:1198] (1/2) Epoch 5, batch 4400, loss[loss=0.3149, ctc_loss=0.2476, cr_loss=0.4556, attn_decoder_loss=0.3122, over 27160.00 frames. ], tot_loss[loss=0.2997, ctc_loss=0.2248, cr_loss=0.4416, attn_decoder_loss=0.2982, over 5765677.09 frames. ], batch size: 124, lr: 2.05e-02, grad_scale: 8.0 +2024-09-16 22:18:59,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=90000.0, ans=0.2 +2024-09-16 22:19:01,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=90000.0, ans=0.125 +2024-09-16 22:19:05,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=90000.0, ans=0.0 +2024-09-16 22:19:06,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=90000.0, ans=0.2 +2024-09-16 22:19:40,700 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.790e+01 1.097e+02 1.213e+02 1.417e+02 2.444e+02, threshold=2.426e+02, percent-clipped=0.0 +2024-09-16 22:20:10,746 INFO [train.py:1198] (1/2) Epoch 5, batch 4450, loss[loss=0.3188, ctc_loss=0.2662, cr_loss=0.4269, attn_decoder_loss=0.3151, over 20304.00 frames. ], tot_loss[loss=0.3033, ctc_loss=0.2311, cr_loss=0.4441, attn_decoder_loss=0.3015, over 5574302.14 frames. ], batch size: 210, lr: 2.05e-02, grad_scale: 4.0 +2024-09-16 22:20:13,184 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.62 vs. limit=15.0 +2024-09-16 22:20:17,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=90200.0, ans=0.0 +2024-09-16 22:20:23,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=90200.0, ans=0.2 +2024-09-16 22:20:26,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=90240.0, ans=0.2 +2024-09-16 22:20:37,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=90240.0, ans=0.07 +2024-09-16 22:20:48,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=90280.0, ans=0.0 +2024-09-16 22:21:13,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=90360.0, ans=0.125 +2024-09-16 22:21:26,516 INFO [train.py:1198] (1/2) Epoch 5, batch 4500, loss[loss=0.3203, ctc_loss=0.2616, cr_loss=0.4592, attn_decoder_loss=0.3166, over 20925.00 frames. ], tot_loss[loss=0.3078, ctc_loss=0.2402, cr_loss=0.4459, attn_decoder_loss=0.3054, over 5228282.10 frames. ], batch size: 211, lr: 2.05e-02, grad_scale: 8.0 +2024-09-16 22:21:39,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.67 vs. limit=15.0 +2024-09-16 22:22:53,646 INFO [train.py:1198] (1/2) Epoch 6, batch 0, loss[loss=0.3095, ctc_loss=0.1874, cr_loss=0.4096, attn_decoder_loss=0.314, over 29625.00 frames. ], tot_loss[loss=0.3095, ctc_loss=0.1874, cr_loss=0.4096, attn_decoder_loss=0.314, over 29625.00 frames. ], batch size: 73, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:22:53,646 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 22:22:59,477 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([4.5805, 4.9230, 5.1142, 5.1016], device='cuda:1') +2024-09-16 22:23:11,940 INFO [train.py:1230] (1/2) Epoch 6, validation: loss=0.2379, ctc_loss=0.06988, cr_loss=4.72e-15, attn_decoder_loss=0.2566, over 944034.00 frames. +2024-09-16 22:23:11,941 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 22:23:13,422 WARNING [optim.py:503] (1/2) Scaling gradients by 0.0589279979467392, model_norm_threshold=242.58145141601562 +2024-09-16 22:23:13,624 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.1.self_attn.linear_k.weight with proportion 0.26, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=4.469e+06, grad_sumsq=5.019e+05, orig_rms_sq=8.904e+00 +2024-09-16 22:23:21,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=90500.0, ans=0.125 +2024-09-16 22:23:22,770 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.192e+02 1.351e+02 1.731e+02 4.117e+03, threshold=2.703e+02, percent-clipped=9.0 +2024-09-16 22:23:35,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=90540.0, ans=0.2 +2024-09-16 22:23:40,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=90540.0, ans=0.2 +2024-09-16 22:23:49,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=90580.0, ans=0.0 +2024-09-16 22:23:52,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=25.25 vs. limit=22.5 +2024-09-16 22:23:59,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=90620.0, ans=0.04949747468305833 +2024-09-16 22:24:04,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.95 vs. limit=12.0 +2024-09-16 22:24:17,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=90660.0, ans=0.2 +2024-09-16 22:24:22,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=90660.0, ans=0.125 +2024-09-16 22:24:28,167 INFO [train.py:1198] (1/2) Epoch 6, batch 50, loss[loss=0.257, ctc_loss=0.1846, cr_loss=0.396, attn_decoder_loss=0.2563, over 29400.00 frames. ], tot_loss[loss=0.2971, ctc_loss=0.224, cr_loss=0.4365, attn_decoder_loss=0.2955, over 1268462.96 frames. ], batch size: 70, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:24:28,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=90700.0, ans=0.04949747468305833 +2024-09-16 22:24:43,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=13.29 vs. limit=15.0 +2024-09-16 22:24:51,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=90740.0, ans=0.125 +2024-09-16 22:25:04,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.18 vs. limit=15.0 +2024-09-16 22:25:05,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=90780.0, ans=0.1 +2024-09-16 22:25:23,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=90820.0, ans=0.125 +2024-09-16 22:25:25,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=90820.0, ans=0.125 +2024-09-16 22:25:35,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=90860.0, ans=0.2 +2024-09-16 22:25:41,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=90860.0, ans=0.125 +2024-09-16 22:25:45,645 INFO [train.py:1198] (1/2) Epoch 6, batch 100, loss[loss=0.2808, ctc_loss=0.2038, cr_loss=0.4138, attn_decoder_loss=0.2801, over 29525.00 frames. ], tot_loss[loss=0.2967, ctc_loss=0.2226, cr_loss=0.4369, attn_decoder_loss=0.2952, over 2251598.36 frames. ], batch size: 76, lr: 1.91e-02, grad_scale: 8.0 +2024-09-16 22:25:57,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.583e+01 1.187e+02 1.367e+02 1.634e+02 6.216e+02, threshold=2.735e+02, percent-clipped=2.0 +2024-09-16 22:26:02,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=90940.0, ans=0.0 +2024-09-16 22:26:51,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=91060.0, ans=0.125 +2024-09-16 22:27:01,996 INFO [train.py:1198] (1/2) Epoch 6, batch 150, loss[loss=0.2535, ctc_loss=0.1789, cr_loss=0.4074, attn_decoder_loss=0.2527, over 29455.00 frames. ], tot_loss[loss=0.2929, ctc_loss=0.218, cr_loss=0.4337, attn_decoder_loss=0.2916, over 3046725.67 frames. ], batch size: 70, lr: 1.91e-02, grad_scale: 4.0 +2024-09-16 22:27:11,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=91100.0, ans=0.0 +2024-09-16 22:27:25,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=91140.0, ans=0.2 +2024-09-16 22:27:47,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=91220.0, ans=0.0 +2024-09-16 22:27:57,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.06 vs. limit=12.0 +2024-09-16 22:28:06,030 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:28:17,516 INFO [train.py:1198] (1/2) Epoch 6, batch 200, loss[loss=0.3014, ctc_loss=0.2304, cr_loss=0.4259, attn_decoder_loss=0.2998, over 27334.00 frames. ], tot_loss[loss=0.2917, ctc_loss=0.2168, cr_loss=0.4324, attn_decoder_loss=0.2905, over 3657647.70 frames. ], batch size: 124, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:28:29,577 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.192e+01 1.064e+02 1.171e+02 1.354e+02 3.116e+02, threshold=2.342e+02, percent-clipped=1.0 +2024-09-16 22:28:41,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.97 vs. limit=15.0 +2024-09-16 22:28:49,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.99 vs. limit=15.0 +2024-09-16 22:28:57,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=91380.0, ans=0.125 +2024-09-16 22:29:15,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=91420.0, ans=0.0 +2024-09-16 22:29:29,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=91460.0, ans=0.125 +2024-09-16 22:29:31,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.49 vs. limit=15.0 +2024-09-16 22:29:35,030 INFO [train.py:1198] (1/2) Epoch 6, batch 250, loss[loss=0.3043, ctc_loss=0.2224, cr_loss=0.441, attn_decoder_loss=0.3036, over 29253.00 frames. ], tot_loss[loss=0.2912, ctc_loss=0.2159, cr_loss=0.4324, attn_decoder_loss=0.2899, over 4139965.73 frames. ], batch size: 100, lr: 1.90e-02, grad_scale: 4.0 +2024-09-16 22:29:59,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=91540.0, ans=10.0 +2024-09-16 22:30:17,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=91580.0, ans=0.125 +2024-09-16 22:30:52,396 INFO [train.py:1198] (1/2) Epoch 6, batch 300, loss[loss=0.2983, ctc_loss=0.2092, cr_loss=0.4222, attn_decoder_loss=0.2988, over 29531.00 frames. ], tot_loss[loss=0.29, ctc_loss=0.2139, cr_loss=0.4304, attn_decoder_loss=0.2889, over 4508737.31 frames. ], batch size: 92, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:31:07,397 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.038e+01 1.116e+02 1.244e+02 1.492e+02 2.099e+02, threshold=2.488e+02, percent-clipped=0.0 +2024-09-16 22:31:10,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=91740.0, ans=0.0 +2024-09-16 22:31:21,962 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.47 vs. limit=15.0 +2024-09-16 22:31:25,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=91780.0, ans=0.125 +2024-09-16 22:31:29,369 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.37 vs. limit=10.0 +2024-09-16 22:31:33,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.40 vs. limit=15.0 +2024-09-16 22:31:53,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.96 vs. limit=6.0 +2024-09-16 22:32:05,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=91860.0, ans=0.2 +2024-09-16 22:32:07,881 INFO [train.py:1198] (1/2) Epoch 6, batch 350, loss[loss=0.2583, ctc_loss=0.1922, cr_loss=0.3723, attn_decoder_loss=0.2574, over 29308.00 frames. ], tot_loss[loss=0.2901, ctc_loss=0.2138, cr_loss=0.4303, attn_decoder_loss=0.289, over 4794585.18 frames. ], batch size: 71, lr: 1.90e-02, grad_scale: 4.0 +2024-09-16 22:32:14,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=91900.0, ans=0.125 +2024-09-16 22:32:16,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.10 vs. limit=15.0 +2024-09-16 22:32:38,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=91980.0, ans=0.125 +2024-09-16 22:32:47,064 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.20 vs. limit=15.0 +2024-09-16 22:33:12,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.23 vs. limit=10.0 +2024-09-16 22:33:23,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=92060.0, ans=0.1 +2024-09-16 22:33:25,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=92100.0, ans=0.0 +2024-09-16 22:33:26,401 INFO [train.py:1198] (1/2) Epoch 6, batch 400, loss[loss=0.3063, ctc_loss=0.2266, cr_loss=0.48, attn_decoder_loss=0.3045, over 29688.00 frames. ], tot_loss[loss=0.2895, ctc_loss=0.2133, cr_loss=0.4301, attn_decoder_loss=0.2884, over 5024661.27 frames. ], batch size: 82, lr: 1.90e-02, grad_scale: 8.0 +2024-09-16 22:33:28,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=92100.0, ans=0.125 +2024-09-16 22:33:37,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=92100.0, ans=0.0 +2024-09-16 22:33:43,127 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.703e+01 1.115e+02 1.264e+02 1.415e+02 3.594e+02, threshold=2.527e+02, percent-clipped=2.0 +2024-09-16 22:33:43,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=92140.0, ans=0.125 +2024-09-16 22:33:49,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=92140.0, ans=0.125 +2024-09-16 22:33:51,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=92140.0, ans=0.2 +2024-09-16 22:34:45,087 INFO [train.py:1198] (1/2) Epoch 6, batch 450, loss[loss=0.3031, ctc_loss=0.224, cr_loss=0.4659, attn_decoder_loss=0.3015, over 29710.00 frames. ], tot_loss[loss=0.2894, ctc_loss=0.2131, cr_loss=0.4299, attn_decoder_loss=0.2884, over 5184832.09 frames. ], batch size: 83, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:34:49,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=92300.0, ans=0.125 +2024-09-16 22:35:19,593 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.03 vs. limit=22.5 +2024-09-16 22:35:32,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff3.min_abs, batch_count=92420.0, ans=0.2 +2024-09-16 22:35:47,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=92460.0, ans=0.125 +2024-09-16 22:35:55,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=92460.0, ans=0.0 +2024-09-16 22:36:01,411 INFO [train.py:1198] (1/2) Epoch 6, batch 500, loss[loss=0.3177, ctc_loss=0.2373, cr_loss=0.4646, attn_decoder_loss=0.3163, over 29462.00 frames. ], tot_loss[loss=0.2887, ctc_loss=0.2122, cr_loss=0.4295, attn_decoder_loss=0.2876, over 5328401.61 frames. ], batch size: 94, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:36:13,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.35 vs. limit=15.0 +2024-09-16 22:36:18,343 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.047e+01 1.094e+02 1.193e+02 1.318e+02 2.724e+02, threshold=2.387e+02, percent-clipped=2.0 +2024-09-16 22:36:43,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=92580.0, ans=0.0 +2024-09-16 22:36:44,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=92580.0, ans=0.0 +2024-09-16 22:37:20,299 INFO [train.py:1198] (1/2) Epoch 6, batch 550, loss[loss=0.2975, ctc_loss=0.2255, cr_loss=0.4341, attn_decoder_loss=0.2958, over 28798.00 frames. ], tot_loss[loss=0.2886, ctc_loss=0.2122, cr_loss=0.4292, attn_decoder_loss=0.2876, over 5421188.43 frames. ], batch size: 104, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:37:23,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.95 vs. limit=22.5 +2024-09-16 22:37:32,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=92700.0, ans=0.125 +2024-09-16 22:38:13,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=92820.0, ans=0.0 +2024-09-16 22:38:13,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=92820.0, ans=0.1 +2024-09-16 22:38:16,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=92820.0, ans=0.0 +2024-09-16 22:38:18,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=92820.0, ans=0.125 +2024-09-16 22:38:39,397 INFO [train.py:1198] (1/2) Epoch 6, batch 600, loss[loss=0.3176, ctc_loss=0.232, cr_loss=0.4621, attn_decoder_loss=0.3168, over 29251.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2116, cr_loss=0.4292, attn_decoder_loss=0.2875, over 5510122.98 frames. ], batch size: 100, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:38:42,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=92900.0, ans=0.125 +2024-09-16 22:38:47,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=92900.0, ans=0.125 +2024-09-16 22:38:50,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=92900.0, ans=0.1 +2024-09-16 22:38:59,012 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.669e+01 1.124e+02 1.276e+02 1.446e+02 7.170e+02, threshold=2.552e+02, percent-clipped=2.0 +2024-09-16 22:39:01,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.23 vs. limit=22.5 +2024-09-16 22:39:12,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=92980.0, ans=0.04949747468305833 +2024-09-16 22:39:13,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.20 vs. limit=15.0 +2024-09-16 22:39:33,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.35 vs. limit=15.0 +2024-09-16 22:39:37,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=93020.0, ans=0.0 +2024-09-16 22:39:54,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=93100.0, ans=0.0 +2024-09-16 22:39:55,417 INFO [train.py:1198] (1/2) Epoch 6, batch 650, loss[loss=0.2776, ctc_loss=0.2002, cr_loss=0.3967, attn_decoder_loss=0.2773, over 29763.00 frames. ], tot_loss[loss=0.2874, ctc_loss=0.2101, cr_loss=0.4278, attn_decoder_loss=0.2865, over 5587697.26 frames. ], batch size: 81, lr: 1.89e-02, grad_scale: 4.0 +2024-09-16 22:40:15,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=93140.0, ans=0.125 +2024-09-16 22:40:23,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=93140.0, ans=0.125 +2024-09-16 22:40:32,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=93180.0, ans=0.125 +2024-09-16 22:40:38,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=93180.0, ans=0.125 +2024-09-16 22:41:06,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=93260.0, ans=0.125 +2024-09-16 22:41:13,972 INFO [train.py:1198] (1/2) Epoch 6, batch 700, loss[loss=0.2821, ctc_loss=0.2121, cr_loss=0.4282, attn_decoder_loss=0.2804, over 29538.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.2106, cr_loss=0.4279, attn_decoder_loss=0.2869, over 5638739.61 frames. ], batch size: 76, lr: 1.89e-02, grad_scale: 8.0 +2024-09-16 22:41:14,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=93300.0, ans=0.1 +2024-09-16 22:41:29,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=93340.0, ans=0.2 +2024-09-16 22:41:35,159 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.774e+01 1.081e+02 1.183e+02 1.296e+02 3.770e+02, threshold=2.365e+02, percent-clipped=2.0 +2024-09-16 22:41:39,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=93340.0, ans=15.0 +2024-09-16 22:41:58,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=93420.0, ans=0.125 +2024-09-16 22:42:06,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=93420.0, ans=0.125 +2024-09-16 22:42:27,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=93460.0, ans=0.2 +2024-09-16 22:42:32,785 INFO [train.py:1198] (1/2) Epoch 6, batch 750, loss[loss=0.2917, ctc_loss=0.2086, cr_loss=0.4416, attn_decoder_loss=0.2911, over 29728.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.211, cr_loss=0.4287, attn_decoder_loss=0.2868, over 5674889.85 frames. ], batch size: 82, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:42:33,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=93500.0, ans=0.0 +2024-09-16 22:42:33,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=93500.0, ans=0.0 +2024-09-16 22:42:33,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=93500.0, ans=0.2 +2024-09-16 22:42:36,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=93500.0, ans=0.125 +2024-09-16 22:42:50,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=93540.0, ans=0.125 +2024-09-16 22:43:32,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.93 vs. limit=15.0 +2024-09-16 22:43:34,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=93660.0, ans=0.125 +2024-09-16 22:43:49,718 INFO [train.py:1198] (1/2) Epoch 6, batch 800, loss[loss=0.2682, ctc_loss=0.1982, cr_loss=0.407, attn_decoder_loss=0.2669, over 29593.00 frames. ], tot_loss[loss=0.2879, ctc_loss=0.2111, cr_loss=0.4288, attn_decoder_loss=0.2869, over 5705647.08 frames. ], batch size: 73, lr: 1.88e-02, grad_scale: 8.0 +2024-09-16 22:43:53,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=93700.0, ans=0.2 +2024-09-16 22:44:11,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=93740.0, ans=0.1 +2024-09-16 22:44:12,711 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.997e+01 1.068e+02 1.156e+02 1.307e+02 3.410e+02, threshold=2.312e+02, percent-clipped=1.0 +2024-09-16 22:44:20,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=93780.0, ans=0.1 +2024-09-16 22:44:24,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.75 vs. limit=15.0 +2024-09-16 22:44:40,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.95 vs. limit=15.0 +2024-09-16 22:44:46,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=93820.0, ans=0.0 +2024-09-16 22:44:55,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=93860.0, ans=0.0 +2024-09-16 22:45:08,086 INFO [train.py:1198] (1/2) Epoch 6, batch 850, loss[loss=0.3109, ctc_loss=0.2378, cr_loss=0.4582, attn_decoder_loss=0.3088, over 29695.00 frames. ], tot_loss[loss=0.2874, ctc_loss=0.2103, cr_loss=0.4282, attn_decoder_loss=0.2864, over 5735377.66 frames. ], batch size: 89, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:45:34,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=93940.0, ans=0.0 +2024-09-16 22:45:49,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=93980.0, ans=0.125 +2024-09-16 22:45:53,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=94020.0, ans=0.2 +2024-09-16 22:45:57,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=94020.0, ans=0.035 +2024-09-16 22:45:59,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=94020.0, ans=0.0 +2024-09-16 22:46:08,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=94060.0, ans=0.0 +2024-09-16 22:46:16,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=94060.0, ans=0.125 +2024-09-16 22:46:19,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=94060.0, ans=0.1 +2024-09-16 22:46:24,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=94060.0, ans=0.0 +2024-09-16 22:46:27,097 INFO [train.py:1198] (1/2) Epoch 6, batch 900, loss[loss=0.253, ctc_loss=0.176, cr_loss=0.3723, attn_decoder_loss=0.2533, over 29623.00 frames. ], tot_loss[loss=0.2876, ctc_loss=0.2105, cr_loss=0.4281, attn_decoder_loss=0.2867, over 5741270.94 frames. ], batch size: 73, lr: 1.88e-02, grad_scale: 8.0 +2024-09-16 22:46:31,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=94100.0, ans=0.1 +2024-09-16 22:46:33,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=94100.0, ans=0.0 +2024-09-16 22:46:34,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=94100.0, ans=0.1 +2024-09-16 22:46:49,681 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.705e+01 1.096e+02 1.207e+02 1.371e+02 3.827e+02, threshold=2.414e+02, percent-clipped=1.0 +2024-09-16 22:46:53,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.77 vs. limit=15.0 +2024-09-16 22:47:02,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.30 vs. limit=10.0 +2024-09-16 22:47:22,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=94220.0, ans=0.0 +2024-09-16 22:47:42,990 INFO [train.py:1198] (1/2) Epoch 6, batch 950, loss[loss=0.2772, ctc_loss=0.2026, cr_loss=0.431, attn_decoder_loss=0.2759, over 29495.00 frames. ], tot_loss[loss=0.2879, ctc_loss=0.2108, cr_loss=0.4283, attn_decoder_loss=0.287, over 5742576.34 frames. ], batch size: 74, lr: 1.88e-02, grad_scale: 4.0 +2024-09-16 22:47:53,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=94300.0, ans=0.125 +2024-09-16 22:48:04,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=94340.0, ans=0.09899494936611666 +2024-09-16 22:48:34,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=94420.0, ans=0.1 +2024-09-16 22:49:01,845 INFO [train.py:1198] (1/2) Epoch 6, batch 1000, loss[loss=0.2836, ctc_loss=0.202, cr_loss=0.4404, attn_decoder_loss=0.2828, over 29531.00 frames. ], tot_loss[loss=0.2892, ctc_loss=0.2123, cr_loss=0.4301, attn_decoder_loss=0.2881, over 5736387.07 frames. ], batch size: 77, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:49:10,596 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.72 vs. limit=15.0 +2024-09-16 22:49:12,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=94500.0, ans=0.0 +2024-09-16 22:49:17,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=94540.0, ans=0.1 +2024-09-16 22:49:26,355 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.001e+01 1.144e+02 1.278e+02 1.441e+02 2.268e+02, threshold=2.556e+02, percent-clipped=0.0 +2024-09-16 22:49:51,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=94620.0, ans=0.125 +2024-09-16 22:50:20,058 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.31 vs. limit=22.5 +2024-09-16 22:50:20,455 INFO [train.py:1198] (1/2) Epoch 6, batch 1050, loss[loss=0.3035, ctc_loss=0.2212, cr_loss=0.4716, attn_decoder_loss=0.3022, over 29684.00 frames. ], tot_loss[loss=0.2883, ctc_loss=0.2113, cr_loss=0.4291, attn_decoder_loss=0.2874, over 5745978.52 frames. ], batch size: 85, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:50:20,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=94700.0, ans=0.0 +2024-09-16 22:50:26,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=94700.0, ans=0.0 +2024-09-16 22:50:26,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=94700.0, ans=0.1 +2024-09-16 22:50:34,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=94740.0, ans=0.09899494936611666 +2024-09-16 22:50:42,625 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.16 vs. limit=15.0 +2024-09-16 22:51:08,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=94820.0, ans=0.125 +2024-09-16 22:51:09,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=94820.0, ans=0.1 +2024-09-16 22:51:30,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=94860.0, ans=0.2 +2024-09-16 22:51:36,709 INFO [train.py:1198] (1/2) Epoch 6, batch 1100, loss[loss=0.2799, ctc_loss=0.1993, cr_loss=0.4294, attn_decoder_loss=0.2793, over 29457.00 frames. ], tot_loss[loss=0.2881, ctc_loss=0.2111, cr_loss=0.4285, attn_decoder_loss=0.2871, over 5758876.69 frames. ], batch size: 78, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:51:39,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=94900.0, ans=0.0 +2024-09-16 22:51:42,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.30 vs. limit=15.0 +2024-09-16 22:52:03,769 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.796e+01 1.080e+02 1.185e+02 1.359e+02 3.091e+02, threshold=2.369e+02, percent-clipped=1.0 +2024-09-16 22:52:25,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=95020.0, ans=0.0 +2024-09-16 22:52:33,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=95020.0, ans=0.125 +2024-09-16 22:52:42,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=95060.0, ans=0.1 +2024-09-16 22:52:49,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=95060.0, ans=0.125 +2024-09-16 22:52:54,944 INFO [train.py:1198] (1/2) Epoch 6, batch 1150, loss[loss=0.2812, ctc_loss=0.2045, cr_loss=0.4462, attn_decoder_loss=0.2798, over 29455.00 frames. ], tot_loss[loss=0.2876, ctc_loss=0.2108, cr_loss=0.4284, attn_decoder_loss=0.2867, over 5755188.43 frames. ], batch size: 78, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:52:58,415 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:52:59,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=95100.0, ans=0.5 +2024-09-16 22:53:12,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=95140.0, ans=0.2 +2024-09-16 22:53:45,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=1.99 vs. limit=15.0 +2024-09-16 22:53:58,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=95260.0, ans=0.125 +2024-09-16 22:54:01,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=95260.0, ans=0.0 +2024-09-16 22:54:01,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.05 vs. limit=15.0 +2024-09-16 22:54:03,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.26 vs. limit=15.0 +2024-09-16 22:54:14,543 INFO [train.py:1198] (1/2) Epoch 6, batch 1200, loss[loss=0.2909, ctc_loss=0.2038, cr_loss=0.4193, attn_decoder_loss=0.2913, over 29676.00 frames. ], tot_loss[loss=0.2888, ctc_loss=0.212, cr_loss=0.4295, attn_decoder_loss=0.2878, over 5746406.53 frames. ], batch size: 85, lr: 1.87e-02, grad_scale: 8.0 +2024-09-16 22:54:26,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.22 vs. limit=15.0 +2024-09-16 22:54:40,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=95340.0, ans=0.125 +2024-09-16 22:54:43,659 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.331e+01 1.110e+02 1.224e+02 1.490e+02 4.215e+02, threshold=2.447e+02, percent-clipped=3.0 +2024-09-16 22:55:13,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=95420.0, ans=0.0 +2024-09-16 22:55:18,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.45 vs. limit=22.5 +2024-09-16 22:55:31,298 INFO [train.py:1198] (1/2) Epoch 6, batch 1250, loss[loss=0.3084, ctc_loss=0.2333, cr_loss=0.4828, attn_decoder_loss=0.306, over 29524.00 frames. ], tot_loss[loss=0.289, ctc_loss=0.2118, cr_loss=0.4305, attn_decoder_loss=0.288, over 5774384.89 frames. ], batch size: 92, lr: 1.87e-02, grad_scale: 4.0 +2024-09-16 22:55:31,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=95500.0, ans=0.125 +2024-09-16 22:55:34,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=95500.0, ans=0.025 +2024-09-16 22:56:47,922 INFO [train.py:1198] (1/2) Epoch 6, batch 1300, loss[loss=0.2961, ctc_loss=0.218, cr_loss=0.4396, attn_decoder_loss=0.295, over 28300.00 frames. ], tot_loss[loss=0.288, ctc_loss=0.2105, cr_loss=0.429, attn_decoder_loss=0.2871, over 5779417.99 frames. ], batch size: 111, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 22:57:20,352 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.424e+01 1.067e+02 1.141e+02 1.259e+02 1.965e+02, threshold=2.283e+02, percent-clipped=0.0 +2024-09-16 22:57:37,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=95820.0, ans=0.125 +2024-09-16 22:57:43,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=95820.0, ans=0.125 +2024-09-16 22:58:09,244 INFO [train.py:1198] (1/2) Epoch 6, batch 1350, loss[loss=0.2966, ctc_loss=0.2196, cr_loss=0.4362, attn_decoder_loss=0.2954, over 29776.00 frames. ], tot_loss[loss=0.2872, ctc_loss=0.2093, cr_loss=0.4286, attn_decoder_loss=0.2863, over 5797206.34 frames. ], batch size: 81, lr: 1.86e-02, grad_scale: 4.0 +2024-09-16 22:58:12,712 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 22:58:13,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.69 vs. limit=15.0 +2024-09-16 22:59:07,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=96020.0, ans=0.0 +2024-09-16 22:59:16,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.81 vs. limit=10.0 +2024-09-16 22:59:16,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.35 vs. limit=12.0 +2024-09-16 22:59:27,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=96060.0, ans=0.125 +2024-09-16 22:59:32,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=96100.0, ans=22.5 +2024-09-16 22:59:33,055 INFO [train.py:1198] (1/2) Epoch 6, batch 1400, loss[loss=0.2621, ctc_loss=0.1842, cr_loss=0.402, attn_decoder_loss=0.2618, over 29584.00 frames. ], tot_loss[loss=0.2873, ctc_loss=0.2095, cr_loss=0.4288, attn_decoder_loss=0.2864, over 5808040.67 frames. ], batch size: 69, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 22:59:36,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=96100.0, ans=0.125 +2024-09-16 22:59:39,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=96100.0, ans=0.0 +2024-09-16 22:59:51,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=96140.0, ans=0.125 +2024-09-16 23:00:00,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=96140.0, ans=0.125 +2024-09-16 23:00:04,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=96180.0, ans=0.125 +2024-09-16 23:00:05,185 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.329e+01 1.115e+02 1.239e+02 1.357e+02 3.096e+02, threshold=2.478e+02, percent-clipped=1.0 +2024-09-16 23:00:05,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=96180.0, ans=0.0 +2024-09-16 23:00:22,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=96220.0, ans=0.125 +2024-09-16 23:00:49,733 INFO [train.py:1198] (1/2) Epoch 6, batch 1450, loss[loss=0.3161, ctc_loss=0.2361, cr_loss=0.4401, attn_decoder_loss=0.3152, over 29470.00 frames. ], tot_loss[loss=0.2883, ctc_loss=0.2105, cr_loss=0.4297, attn_decoder_loss=0.2874, over 5805285.20 frames. ], batch size: 94, lr: 1.86e-02, grad_scale: 4.0 +2024-09-16 23:00:50,051 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:00:53,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=96300.0, ans=0.0 +2024-09-16 23:00:57,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=96300.0, ans=0.0 +2024-09-16 23:00:59,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=96300.0, ans=0.1 +2024-09-16 23:01:06,083 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:01:27,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-16 23:01:42,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=96420.0, ans=0.1 +2024-09-16 23:01:42,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.42 vs. limit=15.0 +2024-09-16 23:01:49,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=96420.0, ans=0.125 +2024-09-16 23:02:02,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=96460.0, ans=0.125 +2024-09-16 23:02:10,310 INFO [train.py:1198] (1/2) Epoch 6, batch 1500, loss[loss=0.3054, ctc_loss=0.2283, cr_loss=0.4394, attn_decoder_loss=0.3042, over 29625.00 frames. ], tot_loss[loss=0.2881, ctc_loss=0.2101, cr_loss=0.4295, attn_decoder_loss=0.2872, over 5805155.18 frames. ], batch size: 86, lr: 1.86e-02, grad_scale: 8.0 +2024-09-16 23:02:15,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=96500.0, ans=0.125 +2024-09-16 23:02:28,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.93 vs. limit=12.0 +2024-09-16 23:02:44,680 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.157e+01 1.117e+02 1.199e+02 1.410e+02 2.285e+02, threshold=2.399e+02, percent-clipped=0.0 +2024-09-16 23:03:13,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=96660.0, ans=0.025 +2024-09-16 23:03:22,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=96660.0, ans=0.125 +2024-09-16 23:03:28,271 INFO [train.py:1198] (1/2) Epoch 6, batch 1550, loss[loss=0.3109, ctc_loss=0.2277, cr_loss=0.4585, attn_decoder_loss=0.31, over 29490.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2109, cr_loss=0.4298, attn_decoder_loss=0.2875, over 5780556.12 frames. ], batch size: 90, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:03:28,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=96700.0, ans=0.0 +2024-09-16 23:03:34,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=96700.0, ans=0.125 +2024-09-16 23:04:17,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.38 vs. limit=12.0 +2024-09-16 23:04:23,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=8.46 vs. limit=12.0 +2024-09-16 23:04:27,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=96820.0, ans=0.125 +2024-09-16 23:04:42,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=96860.0, ans=0.125 +2024-09-16 23:04:45,349 INFO [train.py:1198] (1/2) Epoch 6, batch 1600, loss[loss=0.3053, ctc_loss=0.226, cr_loss=0.4552, attn_decoder_loss=0.304, over 29670.00 frames. ], tot_loss[loss=0.2883, ctc_loss=0.2111, cr_loss=0.4294, attn_decoder_loss=0.2874, over 5763618.93 frames. ], batch size: 85, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:05:03,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=96940.0, ans=0.125 +2024-09-16 23:05:03,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=96940.0, ans=0.1 +2024-09-16 23:05:13,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=96940.0, ans=0.125 +2024-09-16 23:05:22,799 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.461e+01 1.097e+02 1.251e+02 1.445e+02 2.140e+02, threshold=2.501e+02, percent-clipped=0.0 +2024-09-16 23:05:23,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.21 vs. limit=22.5 +2024-09-16 23:05:26,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=96980.0, ans=0.0 +2024-09-16 23:05:36,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=97020.0, ans=0.125 +2024-09-16 23:05:44,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.85 vs. limit=15.0 +2024-09-16 23:06:02,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=97060.0, ans=0.0 +2024-09-16 23:06:02,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=97060.0, ans=0.125 +2024-09-16 23:06:06,648 INFO [train.py:1198] (1/2) Epoch 6, batch 1650, loss[loss=0.2932, ctc_loss=0.2073, cr_loss=0.4235, attn_decoder_loss=0.2933, over 29668.00 frames. ], tot_loss[loss=0.2883, ctc_loss=0.2112, cr_loss=0.4292, attn_decoder_loss=0.2873, over 5759579.81 frames. ], batch size: 89, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:06:06,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=97100.0, ans=0.035 +2024-09-16 23:06:49,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.54 vs. limit=15.0 +2024-09-16 23:06:57,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=97220.0, ans=0.0 +2024-09-16 23:07:23,293 INFO [train.py:1198] (1/2) Epoch 6, batch 1700, loss[loss=0.2429, ctc_loss=0.1641, cr_loss=0.3598, attn_decoder_loss=0.2437, over 29588.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2093, cr_loss=0.4273, attn_decoder_loss=0.2863, over 5781548.56 frames. ], batch size: 69, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:07:31,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=97300.0, ans=0.125 +2024-09-16 23:07:45,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=97340.0, ans=0.125 +2024-09-16 23:07:51,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=97340.0, ans=0.2 +2024-09-16 23:08:00,040 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.175e+01 1.040e+02 1.164e+02 1.267e+02 1.903e+02, threshold=2.329e+02, percent-clipped=0.0 +2024-09-16 23:08:12,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=97420.0, ans=0.0 +2024-09-16 23:08:39,835 INFO [train.py:1198] (1/2) Epoch 6, batch 1750, loss[loss=0.2638, ctc_loss=0.1953, cr_loss=0.4301, attn_decoder_loss=0.2618, over 29344.00 frames. ], tot_loss[loss=0.2868, ctc_loss=0.2089, cr_loss=0.4274, attn_decoder_loss=0.2859, over 5788915.21 frames. ], batch size: 67, lr: 1.85e-02, grad_scale: 4.0 +2024-09-16 23:08:44,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=97500.0, ans=0.0 +2024-09-16 23:08:52,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=97500.0, ans=0.0 +2024-09-16 23:09:27,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=97620.0, ans=0.05 +2024-09-16 23:09:43,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=97620.0, ans=0.0 +2024-09-16 23:09:59,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.27 vs. limit=15.0 +2024-09-16 23:10:00,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=97700.0, ans=0.125 +2024-09-16 23:10:01,610 INFO [train.py:1198] (1/2) Epoch 6, batch 1800, loss[loss=0.2998, ctc_loss=0.2138, cr_loss=0.4424, attn_decoder_loss=0.2995, over 29685.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2089, cr_loss=0.4273, attn_decoder_loss=0.2862, over 5791864.75 frames. ], batch size: 83, lr: 1.85e-02, grad_scale: 8.0 +2024-09-16 23:10:09,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=97700.0, ans=0.125 +2024-09-16 23:10:18,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=97740.0, ans=0.0 +2024-09-16 23:10:29,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=97740.0, ans=0.125 +2024-09-16 23:10:39,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.002e+01 1.085e+02 1.174e+02 1.306e+02 4.568e+02, threshold=2.348e+02, percent-clipped=1.0 +2024-09-16 23:11:00,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.76 vs. limit=10.0 +2024-09-16 23:11:18,207 INFO [train.py:1198] (1/2) Epoch 6, batch 1850, loss[loss=0.2971, ctc_loss=0.2253, cr_loss=0.4425, attn_decoder_loss=0.2953, over 29625.00 frames. ], tot_loss[loss=0.2867, ctc_loss=0.2085, cr_loss=0.4268, attn_decoder_loss=0.2859, over 5798475.34 frames. ], batch size: 86, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:11:25,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.03 vs. limit=15.0 +2024-09-16 23:11:26,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.43 vs. limit=6.0 +2024-09-16 23:11:36,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=97940.0, ans=0.0 +2024-09-16 23:11:44,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=97940.0, ans=0.125 +2024-09-16 23:12:03,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=98020.0, ans=0.125 +2024-09-16 23:12:34,396 INFO [train.py:1198] (1/2) Epoch 6, batch 1900, loss[loss=0.3004, ctc_loss=0.2249, cr_loss=0.4469, attn_decoder_loss=0.2988, over 29708.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.2095, cr_loss=0.4286, attn_decoder_loss=0.287, over 5806356.91 frames. ], batch size: 89, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:12:45,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=98100.0, ans=10.0 +2024-09-16 23:12:51,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=98140.0, ans=0.125 +2024-09-16 23:13:09,549 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.79 vs. limit=12.0 +2024-09-16 23:13:15,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=98180.0, ans=0.125 +2024-09-16 23:13:15,670 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.92 vs. limit=15.0 +2024-09-16 23:13:16,236 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.164e+01 1.098e+02 1.206e+02 1.393e+02 1.994e+02, threshold=2.412e+02, percent-clipped=0.0 +2024-09-16 23:13:55,789 INFO [train.py:1198] (1/2) Epoch 6, batch 1950, loss[loss=0.276, ctc_loss=0.196, cr_loss=0.4352, attn_decoder_loss=0.2752, over 29446.00 frames. ], tot_loss[loss=0.2887, ctc_loss=0.2098, cr_loss=0.43, attn_decoder_loss=0.2879, over 5820219.80 frames. ], batch size: 78, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:14:04,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=98300.0, ans=0.035 +2024-09-16 23:14:24,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=98340.0, ans=0.1 +2024-09-16 23:14:25,737 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:14:31,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=98380.0, ans=0.125 +2024-09-16 23:14:31,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=98380.0, ans=0.125 +2024-09-16 23:14:49,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=98420.0, ans=0.035 +2024-09-16 23:14:49,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=98420.0, ans=0.025 +2024-09-16 23:15:06,821 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=23.72 vs. limit=22.5 +2024-09-16 23:15:07,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=98460.0, ans=15.0 +2024-09-16 23:15:13,566 INFO [train.py:1198] (1/2) Epoch 6, batch 2000, loss[loss=0.2591, ctc_loss=0.1893, cr_loss=0.4261, attn_decoder_loss=0.2574, over 29381.00 frames. ], tot_loss[loss=0.2897, ctc_loss=0.211, cr_loss=0.4315, attn_decoder_loss=0.2888, over 5796499.08 frames. ], batch size: 67, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:15:33,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=98540.0, ans=0.1 +2024-09-16 23:15:40,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.29 vs. limit=15.0 +2024-09-16 23:15:53,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=98580.0, ans=0.2 +2024-09-16 23:15:55,048 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.337e+01 1.180e+02 1.301e+02 1.522e+02 2.715e+02, threshold=2.602e+02, percent-clipped=3.0 +2024-09-16 23:15:59,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.04 vs. limit=8.0 +2024-09-16 23:16:02,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.30 vs. limit=15.0 +2024-09-16 23:16:02,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.01 vs. limit=6.0 +2024-09-16 23:16:30,540 INFO [train.py:1198] (1/2) Epoch 6, batch 2050, loss[loss=0.2695, ctc_loss=0.1939, cr_loss=0.4307, attn_decoder_loss=0.2683, over 29452.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2102, cr_loss=0.4304, attn_decoder_loss=0.2877, over 5788696.00 frames. ], batch size: 70, lr: 1.84e-02, grad_scale: 4.0 +2024-09-16 23:16:31,067 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:16:40,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=98700.0, ans=0.025 +2024-09-16 23:16:54,516 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=12.0 +2024-09-16 23:17:03,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=98780.0, ans=0.125 +2024-09-16 23:17:36,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=98860.0, ans=0.0 +2024-09-16 23:17:41,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=98860.0, ans=0.125 +2024-09-16 23:17:43,260 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:17:44,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=98860.0, ans=0.125 +2024-09-16 23:17:52,101 INFO [train.py:1198] (1/2) Epoch 6, batch 2100, loss[loss=0.2834, ctc_loss=0.2081, cr_loss=0.4141, attn_decoder_loss=0.2825, over 29761.00 frames. ], tot_loss[loss=0.2875, ctc_loss=0.2089, cr_loss=0.429, attn_decoder_loss=0.2868, over 5800824.44 frames. ], batch size: 81, lr: 1.84e-02, grad_scale: 8.0 +2024-09-16 23:17:59,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=98900.0, ans=0.125 +2024-09-16 23:18:04,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=98900.0, ans=0.125 +2024-09-16 23:18:18,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=98940.0, ans=0.0 +2024-09-16 23:18:30,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=98980.0, ans=0.125 +2024-09-16 23:18:33,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=98980.0, ans=0.0 +2024-09-16 23:18:34,557 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.717e+01 1.049e+02 1.121e+02 1.246e+02 2.037e+02, threshold=2.242e+02, percent-clipped=0.0 +2024-09-16 23:19:01,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.20 vs. limit=10.0 +2024-09-16 23:19:08,379 INFO [train.py:1198] (1/2) Epoch 6, batch 2150, loss[loss=0.279, ctc_loss=0.2016, cr_loss=0.4104, attn_decoder_loss=0.2785, over 29438.00 frames. ], tot_loss[loss=0.286, ctc_loss=0.2071, cr_loss=0.4269, attn_decoder_loss=0.2853, over 5815036.96 frames. ], batch size: 78, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:19:25,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=99140.0, ans=0.0 +2024-09-16 23:19:26,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.99 vs. limit=15.0 +2024-09-16 23:19:33,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=99140.0, ans=0.0 +2024-09-16 23:20:09,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=99260.0, ans=0.0 +2024-09-16 23:20:25,671 INFO [train.py:1198] (1/2) Epoch 6, batch 2200, loss[loss=0.3064, ctc_loss=0.2281, cr_loss=0.4522, attn_decoder_loss=0.305, over 29622.00 frames. ], tot_loss[loss=0.2866, ctc_loss=0.2078, cr_loss=0.4276, attn_decoder_loss=0.2858, over 5811094.70 frames. ], batch size: 86, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:20:37,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.76 vs. limit=22.5 +2024-09-16 23:20:44,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=99340.0, ans=0.125 +2024-09-16 23:21:08,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.12 vs. limit=15.0 +2024-09-16 23:21:12,196 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.295e+01 1.080e+02 1.191e+02 1.298e+02 2.659e+02, threshold=2.382e+02, percent-clipped=1.0 +2024-09-16 23:21:34,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=99460.0, ans=0.2 +2024-09-16 23:21:35,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=99460.0, ans=0.1 +2024-09-16 23:21:46,855 INFO [train.py:1198] (1/2) Epoch 6, batch 2250, loss[loss=0.2997, ctc_loss=0.2257, cr_loss=0.4595, attn_decoder_loss=0.2978, over 29683.00 frames. ], tot_loss[loss=0.2866, ctc_loss=0.2077, cr_loss=0.4282, attn_decoder_loss=0.2859, over 5810797.18 frames. ], batch size: 82, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:22:15,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=99580.0, ans=0.1 +2024-09-16 23:22:52,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=99660.0, ans=0.125 +2024-09-16 23:22:52,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=99660.0, ans=0.125 +2024-09-16 23:22:55,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=99660.0, ans=0.125 +2024-09-16 23:23:02,895 INFO [train.py:1198] (1/2) Epoch 6, batch 2300, loss[loss=0.2674, ctc_loss=0.1983, cr_loss=0.4136, attn_decoder_loss=0.2659, over 29735.00 frames. ], tot_loss[loss=0.2857, ctc_loss=0.2074, cr_loss=0.427, attn_decoder_loss=0.2849, over 5799572.29 frames. ], batch size: 72, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:23:13,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=99700.0, ans=0.0 +2024-09-16 23:23:49,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.439e+01 1.127e+02 1.220e+02 1.323e+02 2.863e+02, threshold=2.441e+02, percent-clipped=2.0 +2024-09-16 23:23:51,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=99820.0, ans=0.025 +2024-09-16 23:23:51,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=99820.0, ans=0.1 +2024-09-16 23:24:12,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=99860.0, ans=0.1 +2024-09-16 23:24:19,893 INFO [train.py:1198] (1/2) Epoch 6, batch 2350, loss[loss=0.2858, ctc_loss=0.2028, cr_loss=0.42, attn_decoder_loss=0.2857, over 29682.00 frames. ], tot_loss[loss=0.2858, ctc_loss=0.2075, cr_loss=0.4269, attn_decoder_loss=0.285, over 5804998.54 frames. ], batch size: 83, lr: 1.83e-02, grad_scale: 4.0 +2024-09-16 23:24:28,334 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.78 vs. limit=22.5 +2024-09-16 23:24:29,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=99900.0, ans=0.0 +2024-09-16 23:24:40,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.04 vs. limit=6.0 +2024-09-16 23:25:02,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=99980.0, ans=0.2 +2024-09-16 23:25:08,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=100020.0, ans=0.0 +2024-09-16 23:25:11,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=100020.0, ans=0.125 +2024-09-16 23:25:20,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=100020.0, ans=0.0 +2024-09-16 23:25:31,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=100060.0, ans=0.125 +2024-09-16 23:25:41,940 INFO [train.py:1198] (1/2) Epoch 6, batch 2400, loss[loss=0.2798, ctc_loss=0.2015, cr_loss=0.3889, attn_decoder_loss=0.2798, over 29512.00 frames. ], tot_loss[loss=0.2861, ctc_loss=0.2075, cr_loss=0.427, attn_decoder_loss=0.2854, over 5808025.55 frames. ], batch size: 76, lr: 1.83e-02, grad_scale: 8.0 +2024-09-16 23:25:43,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=100100.0, ans=0.09899494936611666 +2024-09-16 23:26:04,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=100140.0, ans=0.07 +2024-09-16 23:26:24,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=100180.0, ans=0.125 +2024-09-16 23:26:29,679 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.345e+01 1.104e+02 1.208e+02 1.363e+02 5.197e+02, threshold=2.416e+02, percent-clipped=3.0 +2024-09-16 23:26:58,938 INFO [train.py:1198] (1/2) Epoch 6, batch 2450, loss[loss=0.2882, ctc_loss=0.2088, cr_loss=0.4333, attn_decoder_loss=0.2874, over 29734.00 frames. ], tot_loss[loss=0.2872, ctc_loss=0.2085, cr_loss=0.4281, attn_decoder_loss=0.2864, over 5784471.91 frames. ], batch size: 82, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:26:59,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=100300.0, ans=0.0 +2024-09-16 23:27:28,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=100380.0, ans=0.125 +2024-09-16 23:27:33,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=100380.0, ans=0.0 +2024-09-16 23:27:45,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=100420.0, ans=0.025 +2024-09-16 23:28:08,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=100460.0, ans=0.1 +2024-09-16 23:28:10,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.30 vs. limit=15.0 +2024-09-16 23:28:14,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=100500.0, ans=0.0 +2024-09-16 23:28:15,759 INFO [train.py:1198] (1/2) Epoch 6, batch 2500, loss[loss=0.2831, ctc_loss=0.1877, cr_loss=0.4032, attn_decoder_loss=0.2848, over 29629.00 frames. ], tot_loss[loss=0.287, ctc_loss=0.2083, cr_loss=0.4284, attn_decoder_loss=0.2863, over 5795429.64 frames. ], batch size: 86, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:28:25,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=100500.0, ans=0.125 +2024-09-16 23:28:34,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=100540.0, ans=0.025 +2024-09-16 23:28:38,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.48 vs. limit=12.0 +2024-09-16 23:28:53,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=100580.0, ans=0.04949747468305833 +2024-09-16 23:28:53,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=100580.0, ans=0.0 +2024-09-16 23:29:00,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=100620.0, ans=0.1 +2024-09-16 23:29:04,966 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.098e+02 1.228e+02 1.415e+02 3.536e+02, threshold=2.457e+02, percent-clipped=1.0 +2024-09-16 23:29:10,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=100620.0, ans=0.125 +2024-09-16 23:29:36,830 INFO [train.py:1198] (1/2) Epoch 6, batch 2550, loss[loss=0.2439, ctc_loss=0.1673, cr_loss=0.3814, attn_decoder_loss=0.2439, over 29340.00 frames. ], tot_loss[loss=0.2867, ctc_loss=0.2077, cr_loss=0.4286, attn_decoder_loss=0.286, over 5797710.82 frames. ], batch size: 67, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:29:37,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=100700.0, ans=0.1 +2024-09-16 23:29:39,336 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.08 vs. limit=15.0 +2024-09-16 23:29:42,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=11.18 vs. limit=12.0 +2024-09-16 23:29:53,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=100740.0, ans=0.0 +2024-09-16 23:30:01,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=100740.0, ans=0.0 +2024-09-16 23:30:03,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.09 vs. limit=15.0 +2024-09-16 23:30:06,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=100780.0, ans=0.125 +2024-09-16 23:30:54,115 INFO [train.py:1198] (1/2) Epoch 6, batch 2600, loss[loss=0.2848, ctc_loss=0.205, cr_loss=0.4415, attn_decoder_loss=0.2839, over 29442.00 frames. ], tot_loss[loss=0.2871, ctc_loss=0.2083, cr_loss=0.4285, attn_decoder_loss=0.2864, over 5794472.45 frames. ], batch size: 78, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:31:19,161 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.87 vs. limit=15.0 +2024-09-16 23:31:20,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=100940.0, ans=0.125 +2024-09-16 23:31:34,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=100980.0, ans=0.1 +2024-09-16 23:31:38,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=101020.0, ans=0.09899494936611666 +2024-09-16 23:31:38,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=101020.0, ans=0.1 +2024-09-16 23:31:44,363 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.431e+01 1.047e+02 1.098e+02 1.263e+02 2.416e+02, threshold=2.197e+02, percent-clipped=0.0 +2024-09-16 23:32:10,135 INFO [train.py:1198] (1/2) Epoch 6, batch 2650, loss[loss=0.2996, ctc_loss=0.2243, cr_loss=0.4374, attn_decoder_loss=0.2983, over 29270.00 frames. ], tot_loss[loss=0.2874, ctc_loss=0.2085, cr_loss=0.4289, attn_decoder_loss=0.2867, over 5801466.75 frames. ], batch size: 100, lr: 1.82e-02, grad_scale: 4.0 +2024-09-16 23:32:34,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=101140.0, ans=0.125 +2024-09-16 23:32:36,839 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.08 vs. limit=15.0 +2024-09-16 23:32:40,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=101180.0, ans=0.1 +2024-09-16 23:32:55,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.62 vs. limit=15.0 +2024-09-16 23:32:59,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=101220.0, ans=0.1 +2024-09-16 23:33:23,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=101260.0, ans=0.1 +2024-09-16 23:33:31,359 INFO [train.py:1198] (1/2) Epoch 6, batch 2700, loss[loss=0.2913, ctc_loss=0.2153, cr_loss=0.4394, attn_decoder_loss=0.29, over 29521.00 frames. ], tot_loss[loss=0.2876, ctc_loss=0.2086, cr_loss=0.4295, attn_decoder_loss=0.2869, over 5795686.26 frames. ], batch size: 87, lr: 1.82e-02, grad_scale: 8.0 +2024-09-16 23:34:20,477 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=14.45 vs. limit=22.5 +2024-09-16 23:34:23,727 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.471e+01 1.102e+02 1.222e+02 1.380e+02 2.898e+02, threshold=2.443e+02, percent-clipped=1.0 +2024-09-16 23:34:28,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=101420.0, ans=0.2 +2024-09-16 23:34:41,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=101460.0, ans=0.1 +2024-09-16 23:34:48,560 INFO [train.py:1198] (1/2) Epoch 6, batch 2750, loss[loss=0.2692, ctc_loss=0.1853, cr_loss=0.4036, attn_decoder_loss=0.2695, over 29521.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.2074, cr_loss=0.4271, attn_decoder_loss=0.2855, over 5794624.22 frames. ], batch size: 75, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:34:59,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=101500.0, ans=0.2 +2024-09-16 23:35:10,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=101540.0, ans=0.125 +2024-09-16 23:35:15,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=101540.0, ans=0.125 +2024-09-16 23:35:15,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.07 vs. limit=15.0 +2024-09-16 23:35:37,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.58 vs. limit=15.0 +2024-09-16 23:35:46,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=101620.0, ans=0.0 +2024-09-16 23:36:06,183 INFO [train.py:1198] (1/2) Epoch 6, batch 2800, loss[loss=0.3379, ctc_loss=0.3013, cr_loss=0.4519, attn_decoder_loss=0.3319, over 19808.00 frames. ], tot_loss[loss=0.2866, ctc_loss=0.2081, cr_loss=0.4273, attn_decoder_loss=0.2858, over 5774178.26 frames. ], batch size: 209, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:36:23,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=101740.0, ans=0.125 +2024-09-16 23:36:27,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=101740.0, ans=0.125 +2024-09-16 23:36:38,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=101780.0, ans=0.0 +2024-09-16 23:36:46,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=101780.0, ans=0.0 +2024-09-16 23:37:04,396 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.339e+01 1.139e+02 1.318e+02 1.529e+02 2.693e+02, threshold=2.635e+02, percent-clipped=4.0 +2024-09-16 23:37:17,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=101860.0, ans=0.125 +2024-09-16 23:37:18,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=101860.0, ans=0.125 +2024-09-16 23:37:27,408 INFO [train.py:1198] (1/2) Epoch 6, batch 2850, loss[loss=0.2797, ctc_loss=0.1979, cr_loss=0.4375, attn_decoder_loss=0.2791, over 29502.00 frames. ], tot_loss[loss=0.2878, ctc_loss=0.2096, cr_loss=0.4289, attn_decoder_loss=0.2869, over 5758190.27 frames. ], batch size: 77, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:37:43,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=101940.0, ans=0.0 +2024-09-16 23:37:46,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=101940.0, ans=0.125 +2024-09-16 23:37:58,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=101980.0, ans=0.0 +2024-09-16 23:38:20,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.19 vs. limit=15.0 +2024-09-16 23:38:27,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff2.min_abs, batch_count=102060.0, ans=0.1 +2024-09-16 23:38:41,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=102060.0, ans=0.125 +2024-09-16 23:38:43,930 INFO [train.py:1198] (1/2) Epoch 6, batch 2900, loss[loss=0.2825, ctc_loss=0.1958, cr_loss=0.4154, attn_decoder_loss=0.2829, over 29428.00 frames. ], tot_loss[loss=0.2885, ctc_loss=0.2094, cr_loss=0.4299, attn_decoder_loss=0.2878, over 5785000.79 frames. ], batch size: 79, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:38:58,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.66 vs. limit=15.0 +2024-09-16 23:39:04,994 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.19 vs. limit=15.0 +2024-09-16 23:39:39,380 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.808e+01 1.155e+02 1.262e+02 1.445e+02 2.631e+02, threshold=2.524e+02, percent-clipped=0.0 +2024-09-16 23:39:54,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=102260.0, ans=0.2 +2024-09-16 23:39:57,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=102260.0, ans=0.0 +2024-09-16 23:40:00,667 INFO [train.py:1198] (1/2) Epoch 6, batch 2950, loss[loss=0.2749, ctc_loss=0.1919, cr_loss=0.3991, attn_decoder_loss=0.2753, over 29534.00 frames. ], tot_loss[loss=0.2867, ctc_loss=0.208, cr_loss=0.4277, attn_decoder_loss=0.2859, over 5780008.10 frames. ], batch size: 75, lr: 1.81e-02, grad_scale: 4.0 +2024-09-16 23:40:20,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.52 vs. limit=15.0 +2024-09-16 23:40:21,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.92 vs. limit=15.0 +2024-09-16 23:40:22,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=102340.0, ans=0.125 +2024-09-16 23:40:36,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=102380.0, ans=0.1 +2024-09-16 23:40:40,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=102380.0, ans=0.125 +2024-09-16 23:41:01,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=102420.0, ans=0.125 +2024-09-16 23:41:07,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.67 vs. limit=22.5 +2024-09-16 23:41:09,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=102460.0, ans=0.125 +2024-09-16 23:41:18,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.67 vs. limit=22.5 +2024-09-16 23:41:23,658 INFO [train.py:1198] (1/2) Epoch 6, batch 3000, loss[loss=0.2848, ctc_loss=0.2006, cr_loss=0.4269, attn_decoder_loss=0.2846, over 29750.00 frames. ], tot_loss[loss=0.2864, ctc_loss=0.2076, cr_loss=0.4277, attn_decoder_loss=0.2856, over 5780704.60 frames. ], batch size: 81, lr: 1.81e-02, grad_scale: 8.0 +2024-09-16 23:41:23,659 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-16 23:41:31,512 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.5027, 4.4299, 3.9014, 2.4920], device='cuda:1') +2024-09-16 23:41:42,100 INFO [train.py:1230] (1/2) Epoch 6, validation: loss=0.2192, ctc_loss=0.0625, cr_loss=4.383e-15, attn_decoder_loss=0.2366, over 944034.00 frames. +2024-09-16 23:41:42,101 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-16 23:41:44,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=102500.0, ans=0.2 +2024-09-16 23:42:04,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=102540.0, ans=0.1 +2024-09-16 23:42:14,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=102580.0, ans=0.0 +2024-09-16 23:42:38,834 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.071e+01 1.057e+02 1.164e+02 1.320e+02 2.426e+02, threshold=2.327e+02, percent-clipped=0.0 +2024-09-16 23:42:52,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.47 vs. limit=15.0 +2024-09-16 23:42:57,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=102700.0, ans=0.1 +2024-09-16 23:42:58,906 INFO [train.py:1198] (1/2) Epoch 6, batch 3050, loss[loss=0.2747, ctc_loss=0.1994, cr_loss=0.4098, attn_decoder_loss=0.2739, over 29536.00 frames. ], tot_loss[loss=0.2873, ctc_loss=0.2086, cr_loss=0.4285, attn_decoder_loss=0.2865, over 5775407.32 frames. ], batch size: 76, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:43:16,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=102740.0, ans=0.0 +2024-09-16 23:43:19,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.80 vs. limit=15.0 +2024-09-16 23:43:33,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.04 vs. limit=22.5 +2024-09-16 23:43:42,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=102780.0, ans=0.0 +2024-09-16 23:43:46,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=102820.0, ans=0.2 +2024-09-16 23:43:57,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.20 vs. limit=10.0 +2024-09-16 23:44:00,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=102860.0, ans=0.125 +2024-09-16 23:44:06,572 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-16 23:44:10,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=102860.0, ans=0.2 +2024-09-16 23:44:15,193 INFO [train.py:1198] (1/2) Epoch 6, batch 3100, loss[loss=0.3017, ctc_loss=0.2196, cr_loss=0.4502, attn_decoder_loss=0.3008, over 29261.00 frames. ], tot_loss[loss=0.2863, ctc_loss=0.2076, cr_loss=0.4276, attn_decoder_loss=0.2856, over 5774551.44 frames. ], batch size: 100, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:44:26,969 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.83 vs. limit=6.0 +2024-09-16 23:44:27,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=102900.0, ans=0.0 +2024-09-16 23:44:38,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.45 vs. limit=15.0 +2024-09-16 23:44:41,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=102940.0, ans=0.025 +2024-09-16 23:44:51,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=102980.0, ans=0.2 +2024-09-16 23:45:00,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=102980.0, ans=0.125 +2024-09-16 23:45:02,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=102980.0, ans=0.125 +2024-09-16 23:45:17,447 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.176e+01 1.082e+02 1.229e+02 1.361e+02 4.744e+02, threshold=2.458e+02, percent-clipped=3.0 +2024-09-16 23:45:19,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.01 vs. limit=15.0 +2024-09-16 23:45:23,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=103060.0, ans=0.125 +2024-09-16 23:45:35,736 INFO [train.py:1198] (1/2) Epoch 6, batch 3150, loss[loss=0.3026, ctc_loss=0.215, cr_loss=0.4568, attn_decoder_loss=0.3021, over 28738.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.2073, cr_loss=0.4277, attn_decoder_loss=0.2855, over 5781921.90 frames. ], batch size: 104, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:45:58,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.97 vs. limit=15.0 +2024-09-16 23:46:31,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=103220.0, ans=0.2 +2024-09-16 23:46:47,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=103260.0, ans=0.125 +2024-09-16 23:46:50,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=103260.0, ans=0.125 +2024-09-16 23:46:52,912 INFO [train.py:1198] (1/2) Epoch 6, batch 3200, loss[loss=0.2895, ctc_loss=0.2156, cr_loss=0.4676, attn_decoder_loss=0.2873, over 29440.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.2062, cr_loss=0.4265, attn_decoder_loss=0.2846, over 5793545.72 frames. ], batch size: 79, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:47:01,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.57 vs. limit=15.0 +2024-09-16 23:47:09,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.95 vs. limit=15.0 +2024-09-16 23:47:49,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.76 vs. limit=22.5 +2024-09-16 23:47:52,865 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.859e+01 1.056e+02 1.155e+02 1.311e+02 1.883e+02, threshold=2.309e+02, percent-clipped=0.0 +2024-09-16 23:48:01,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.53 vs. limit=15.0 +2024-09-16 23:48:07,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=103460.0, ans=0.125 +2024-09-16 23:48:09,767 INFO [train.py:1198] (1/2) Epoch 6, batch 3250, loss[loss=0.2828, ctc_loss=0.2016, cr_loss=0.4247, attn_decoder_loss=0.2824, over 29714.00 frames. ], tot_loss[loss=0.2863, ctc_loss=0.2073, cr_loss=0.4282, attn_decoder_loss=0.2856, over 5799800.31 frames. ], batch size: 84, lr: 1.80e-02, grad_scale: 4.0 +2024-09-16 23:48:13,757 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.83 vs. limit=15.0 +2024-09-16 23:48:22,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=103500.0, ans=0.0 +2024-09-16 23:48:44,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=103580.0, ans=0.125 +2024-09-16 23:49:02,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=103620.0, ans=0.0 +2024-09-16 23:49:18,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=103660.0, ans=0.125 +2024-09-16 23:49:30,808 INFO [train.py:1198] (1/2) Epoch 6, batch 3300, loss[loss=0.2984, ctc_loss=0.2132, cr_loss=0.4475, attn_decoder_loss=0.2979, over 28298.00 frames. ], tot_loss[loss=0.2848, ctc_loss=0.2059, cr_loss=0.4262, attn_decoder_loss=0.2841, over 5797466.80 frames. ], batch size: 111, lr: 1.80e-02, grad_scale: 8.0 +2024-09-16 23:49:40,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=103700.0, ans=0.025 +2024-09-16 23:50:02,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=103780.0, ans=0.025 +2024-09-16 23:50:08,595 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.49 vs. limit=15.0 +2024-09-16 23:50:21,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=103820.0, ans=0.125 +2024-09-16 23:50:21,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=103820.0, ans=0.125 +2024-09-16 23:50:25,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.71 vs. limit=6.0 +2024-09-16 23:50:32,022 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.684e+01 1.121e+02 1.244e+02 1.460e+02 3.755e+02, threshold=2.488e+02, percent-clipped=2.0 +2024-09-16 23:50:47,251 INFO [train.py:1198] (1/2) Epoch 6, batch 3350, loss[loss=0.296, ctc_loss=0.2226, cr_loss=0.4253, attn_decoder_loss=0.2947, over 28799.00 frames. ], tot_loss[loss=0.2857, ctc_loss=0.2068, cr_loss=0.4268, attn_decoder_loss=0.285, over 5773932.12 frames. ], batch size: 104, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:51:03,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.19 vs. limit=22.5 +2024-09-16 23:51:04,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=103940.0, ans=0.125 +2024-09-16 23:51:13,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=103940.0, ans=0.125 +2024-09-16 23:51:30,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=103980.0, ans=0.125 +2024-09-16 23:52:04,370 INFO [train.py:1198] (1/2) Epoch 6, batch 3400, loss[loss=0.2427, ctc_loss=0.1643, cr_loss=0.3684, attn_decoder_loss=0.2432, over 29346.00 frames. ], tot_loss[loss=0.2852, ctc_loss=0.2064, cr_loss=0.4256, attn_decoder_loss=0.2845, over 5766020.49 frames. ], batch size: 67, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:52:19,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.72 vs. limit=22.5 +2024-09-16 23:52:53,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=104180.0, ans=0.125 +2024-09-16 23:53:12,531 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.970e+01 1.077e+02 1.207e+02 1.405e+02 5.237e+02, threshold=2.415e+02, percent-clipped=2.0 +2024-09-16 23:53:26,208 INFO [train.py:1198] (1/2) Epoch 6, batch 3450, loss[loss=0.2931, ctc_loss=0.2177, cr_loss=0.4064, attn_decoder_loss=0.2925, over 28118.00 frames. ], tot_loss[loss=0.2858, ctc_loss=0.207, cr_loss=0.4265, attn_decoder_loss=0.2851, over 5774585.17 frames. ], batch size: 111, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:53:49,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=104340.0, ans=0.1 +2024-09-16 23:54:00,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=104380.0, ans=0.0 +2024-09-16 23:54:00,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=104380.0, ans=0.125 +2024-09-16 23:54:11,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.53 vs. limit=15.0 +2024-09-16 23:54:17,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=104420.0, ans=0.125 +2024-09-16 23:54:17,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.85 vs. limit=15.0 +2024-09-16 23:54:33,340 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.59 vs. limit=15.0 +2024-09-16 23:54:37,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=104460.0, ans=0.025 +2024-09-16 23:54:43,086 INFO [train.py:1198] (1/2) Epoch 6, batch 3500, loss[loss=0.2534, ctc_loss=0.18, cr_loss=0.37, attn_decoder_loss=0.2533, over 29315.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.2065, cr_loss=0.4269, attn_decoder_loss=0.2845, over 5777773.96 frames. ], batch size: 71, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:55:31,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=104620.0, ans=0.125 +2024-09-16 23:55:34,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.24 vs. limit=22.5 +2024-09-16 23:55:46,679 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.951e+01 1.039e+02 1.144e+02 1.274e+02 4.432e+02, threshold=2.289e+02, percent-clipped=1.0 +2024-09-16 23:55:48,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=104660.0, ans=0.1 +2024-09-16 23:55:50,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=104660.0, ans=0.125 +2024-09-16 23:55:58,734 INFO [train.py:1198] (1/2) Epoch 6, batch 3550, loss[loss=0.2976, ctc_loss=0.2111, cr_loss=0.4395, attn_decoder_loss=0.2974, over 29719.00 frames. ], tot_loss[loss=0.2852, ctc_loss=0.206, cr_loss=0.427, attn_decoder_loss=0.2845, over 5783166.74 frames. ], batch size: 89, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:56:16,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=104740.0, ans=0.1 +2024-09-16 23:56:57,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=104860.0, ans=0.125 +2024-09-16 23:57:16,976 INFO [train.py:1198] (1/2) Epoch 6, batch 3600, loss[loss=0.2805, ctc_loss=0.1958, cr_loss=0.4132, attn_decoder_loss=0.2807, over 29513.00 frames. ], tot_loss[loss=0.2853, ctc_loss=0.206, cr_loss=0.4276, attn_decoder_loss=0.2846, over 5792343.68 frames. ], batch size: 77, lr: 1.79e-02, grad_scale: 8.0 +2024-09-16 23:57:17,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=104900.0, ans=0.025 +2024-09-16 23:57:34,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.51 vs. limit=15.0 +2024-09-16 23:58:16,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=105020.0, ans=0.125 +2024-09-16 23:58:23,466 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.018e+01 1.117e+02 1.191e+02 1.328e+02 4.381e+02, threshold=2.382e+02, percent-clipped=2.0 +2024-09-16 23:58:24,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.84 vs. limit=12.0 +2024-09-16 23:58:26,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=105060.0, ans=0.0 +2024-09-16 23:58:34,009 INFO [train.py:1198] (1/2) Epoch 6, batch 3650, loss[loss=0.3095, ctc_loss=0.2336, cr_loss=0.4551, attn_decoder_loss=0.3078, over 29522.00 frames. ], tot_loss[loss=0.2843, ctc_loss=0.2048, cr_loss=0.4259, attn_decoder_loss=0.2837, over 5793973.76 frames. ], batch size: 90, lr: 1.79e-02, grad_scale: 4.0 +2024-09-16 23:58:34,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=105100.0, ans=0.125 +2024-09-16 23:58:35,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=105100.0, ans=0.125 +2024-09-16 23:58:43,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=105100.0, ans=0.0 +2024-09-16 23:58:49,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=105140.0, ans=0.125 +2024-09-16 23:58:55,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=105140.0, ans=0.2 +2024-09-16 23:59:04,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=105180.0, ans=15.0 +2024-09-16 23:59:21,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=105220.0, ans=0.0 +2024-09-16 23:59:49,425 INFO [train.py:1198] (1/2) Epoch 6, batch 3700, loss[loss=0.2979, ctc_loss=0.2169, cr_loss=0.4451, attn_decoder_loss=0.297, over 29704.00 frames. ], tot_loss[loss=0.2847, ctc_loss=0.2048, cr_loss=0.4264, attn_decoder_loss=0.2841, over 5803452.83 frames. ], batch size: 84, lr: 1.78e-02, grad_scale: 8.0 +2024-09-17 00:00:11,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=105340.0, ans=0.125 +2024-09-17 00:00:24,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=105380.0, ans=0.125 +2024-09-17 00:00:41,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=105420.0, ans=0.025 +2024-09-17 00:00:45,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=105420.0, ans=0.025 +2024-09-17 00:00:48,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=105460.0, ans=0.2 +2024-09-17 00:00:55,740 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.176e+01 1.057e+02 1.159e+02 1.295e+02 2.172e+02, threshold=2.318e+02, percent-clipped=0.0 +2024-09-17 00:00:56,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=105460.0, ans=0.125 +2024-09-17 00:00:59,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=105460.0, ans=0.0 +2024-09-17 00:01:00,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=105460.0, ans=0.125 +2024-09-17 00:01:04,782 INFO [train.py:1198] (1/2) Epoch 6, batch 3750, loss[loss=0.2447, ctc_loss=0.1683, cr_loss=0.3781, attn_decoder_loss=0.2448, over 29316.00 frames. ], tot_loss[loss=0.2847, ctc_loss=0.2048, cr_loss=0.4267, attn_decoder_loss=0.2841, over 5806843.88 frames. ], batch size: 67, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:01:05,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=105500.0, ans=0.125 +2024-09-17 00:01:23,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=105540.0, ans=0.0 +2024-09-17 00:01:29,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=105540.0, ans=0.125 +2024-09-17 00:01:36,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=105580.0, ans=0.025 +2024-09-17 00:01:42,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=105580.0, ans=0.0 +2024-09-17 00:02:20,319 INFO [train.py:1198] (1/2) Epoch 6, batch 3800, loss[loss=0.285, ctc_loss=0.1969, cr_loss=0.4293, attn_decoder_loss=0.2853, over 29641.00 frames. ], tot_loss[loss=0.2846, ctc_loss=0.2047, cr_loss=0.4263, attn_decoder_loss=0.284, over 5797090.39 frames. ], batch size: 86, lr: 1.78e-02, grad_scale: 8.0 +2024-09-17 00:02:26,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=105700.0, ans=0.125 +2024-09-17 00:02:40,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=105740.0, ans=0.025 +2024-09-17 00:02:50,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=105780.0, ans=0.07 +2024-09-17 00:02:56,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=105780.0, ans=0.0 +2024-09-17 00:03:15,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=105820.0, ans=0.0 +2024-09-17 00:03:22,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=105860.0, ans=0.2 +2024-09-17 00:03:29,920 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.809e+01 1.097e+02 1.194e+02 1.336e+02 2.111e+02, threshold=2.388e+02, percent-clipped=0.0 +2024-09-17 00:03:31,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=105860.0, ans=0.2 +2024-09-17 00:03:36,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=105900.0, ans=0.1 +2024-09-17 00:03:37,377 INFO [train.py:1198] (1/2) Epoch 6, batch 3850, loss[loss=0.3062, ctc_loss=0.2225, cr_loss=0.4527, attn_decoder_loss=0.3054, over 29288.00 frames. ], tot_loss[loss=0.2844, ctc_loss=0.2044, cr_loss=0.4263, attn_decoder_loss=0.2838, over 5811175.92 frames. ], batch size: 100, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:03:39,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.65 vs. limit=15.0 +2024-09-17 00:03:46,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=105900.0, ans=0.0 +2024-09-17 00:03:55,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=105940.0, ans=0.1 +2024-09-17 00:04:25,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=106020.0, ans=0.125 +2024-09-17 00:04:42,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=106060.0, ans=0.0 +2024-09-17 00:04:48,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=106060.0, ans=0.125 +2024-09-17 00:04:54,336 INFO [train.py:1198] (1/2) Epoch 6, batch 3900, loss[loss=0.2832, ctc_loss=0.194, cr_loss=0.4142, attn_decoder_loss=0.284, over 29638.00 frames. ], tot_loss[loss=0.2854, ctc_loss=0.2054, cr_loss=0.4278, attn_decoder_loss=0.2848, over 5815855.43 frames. ], batch size: 86, lr: 1.78e-02, grad_scale: 8.0 +2024-09-17 00:05:01,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.18 vs. limit=6.0 +2024-09-17 00:05:05,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=106100.0, ans=0.125 +2024-09-17 00:05:13,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.76 vs. limit=22.5 +2024-09-17 00:05:40,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.55 vs. limit=6.0 +2024-09-17 00:05:45,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=106220.0, ans=0.0 +2024-09-17 00:05:51,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=106220.0, ans=0.125 +2024-09-17 00:05:58,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=106260.0, ans=0.035 +2024-09-17 00:06:03,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.054e+01 1.064e+02 1.152e+02 1.217e+02 1.852e+02, threshold=2.304e+02, percent-clipped=0.0 +2024-09-17 00:06:03,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=106260.0, ans=0.125 +2024-09-17 00:06:09,221 INFO [train.py:1198] (1/2) Epoch 6, batch 3950, loss[loss=0.3056, ctc_loss=0.2293, cr_loss=0.4591, attn_decoder_loss=0.3038, over 29464.00 frames. ], tot_loss[loss=0.2854, ctc_loss=0.2053, cr_loss=0.4284, attn_decoder_loss=0.2848, over 5835269.43 frames. ], batch size: 97, lr: 1.78e-02, grad_scale: 4.0 +2024-09-17 00:06:20,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=106300.0, ans=0.2 +2024-09-17 00:06:44,506 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.94 vs. limit=15.0 +2024-09-17 00:07:24,789 INFO [train.py:1198] (1/2) Epoch 6, batch 4000, loss[loss=0.2787, ctc_loss=0.2038, cr_loss=0.4253, attn_decoder_loss=0.2775, over 29487.00 frames. ], tot_loss[loss=0.2857, ctc_loss=0.206, cr_loss=0.4287, attn_decoder_loss=0.2851, over 5811032.02 frames. ], batch size: 74, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:07:41,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=106540.0, ans=0.05 +2024-09-17 00:07:41,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=106540.0, ans=0.125 +2024-09-17 00:07:58,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.87 vs. limit=22.5 +2024-09-17 00:07:59,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=106580.0, ans=0.125 +2024-09-17 00:08:01,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.30 vs. limit=15.0 +2024-09-17 00:08:36,818 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.741e+01 1.103e+02 1.193e+02 1.340e+02 9.903e+02, threshold=2.386e+02, percent-clipped=2.0 +2024-09-17 00:08:40,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=106700.0, ans=0.125 +2024-09-17 00:08:41,378 INFO [train.py:1198] (1/2) Epoch 6, batch 4050, loss[loss=0.3372, ctc_loss=0.2953, cr_loss=0.4654, attn_decoder_loss=0.3315, over 19315.00 frames. ], tot_loss[loss=0.2857, ctc_loss=0.2063, cr_loss=0.4285, attn_decoder_loss=0.285, over 5793706.14 frames. ], batch size: 209, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:08:59,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=106740.0, ans=0.1 +2024-09-17 00:09:33,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=106820.0, ans=0.2 +2024-09-17 00:09:56,855 INFO [train.py:1198] (1/2) Epoch 6, batch 4100, loss[loss=0.2904, ctc_loss=0.2099, cr_loss=0.4568, attn_decoder_loss=0.2891, over 29505.00 frames. ], tot_loss[loss=0.2859, ctc_loss=0.2064, cr_loss=0.4285, attn_decoder_loss=0.2852, over 5790025.12 frames. ], batch size: 90, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:10:00,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=106900.0, ans=0.0 +2024-09-17 00:10:12,582 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.71 vs. limit=22.5 +2024-09-17 00:10:13,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=106940.0, ans=0.125 +2024-09-17 00:10:19,205 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:10:27,610 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.12 vs. limit=15.0 +2024-09-17 00:10:46,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=107020.0, ans=0.05 +2024-09-17 00:10:53,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=107020.0, ans=0.09899494936611666 +2024-09-17 00:11:07,985 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.979e+01 1.121e+02 1.241e+02 1.471e+02 3.510e+02, threshold=2.481e+02, percent-clipped=3.0 +2024-09-17 00:11:11,062 INFO [train.py:1198] (1/2) Epoch 6, batch 4150, loss[loss=0.279, ctc_loss=0.1939, cr_loss=0.4102, attn_decoder_loss=0.2793, over 29513.00 frames. ], tot_loss[loss=0.2851, ctc_loss=0.2054, cr_loss=0.4281, attn_decoder_loss=0.2845, over 5796548.85 frames. ], batch size: 77, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:11:21,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=107100.0, ans=0.2 +2024-09-17 00:11:27,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=107140.0, ans=0.0 +2024-09-17 00:12:10,618 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.70 vs. limit=6.0 +2024-09-17 00:12:14,550 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:12:26,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=107300.0, ans=0.125 +2024-09-17 00:12:27,412 INFO [train.py:1198] (1/2) Epoch 6, batch 4200, loss[loss=0.3154, ctc_loss=0.2379, cr_loss=0.4979, attn_decoder_loss=0.3129, over 29524.00 frames. ], tot_loss[loss=0.2854, ctc_loss=0.2057, cr_loss=0.4287, attn_decoder_loss=0.2848, over 5797811.35 frames. ], batch size: 90, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:12:33,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=107300.0, ans=0.125 +2024-09-17 00:12:37,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.06 vs. limit=15.0 +2024-09-17 00:12:53,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=107340.0, ans=0.025 +2024-09-17 00:13:07,164 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.00 vs. limit=15.0 +2024-09-17 00:13:18,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=107420.0, ans=0.2 +2024-09-17 00:13:28,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=107460.0, ans=0.0 +2024-09-17 00:13:38,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=107460.0, ans=0.125 +2024-09-17 00:13:41,774 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.922e+01 1.074e+02 1.154e+02 1.259e+02 3.870e+02, threshold=2.307e+02, percent-clipped=1.0 +2024-09-17 00:13:43,276 INFO [train.py:1198] (1/2) Epoch 6, batch 4250, loss[loss=0.2715, ctc_loss=0.1915, cr_loss=0.3941, attn_decoder_loss=0.2716, over 29525.00 frames. ], tot_loss[loss=0.2852, ctc_loss=0.205, cr_loss=0.4277, attn_decoder_loss=0.2846, over 5803779.69 frames. ], batch size: 74, lr: 1.77e-02, grad_scale: 4.0 +2024-09-17 00:13:48,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.09 vs. limit=15.0 +2024-09-17 00:13:51,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.43 vs. limit=15.0 +2024-09-17 00:13:56,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=107540.0, ans=0.125 +2024-09-17 00:13:59,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=107540.0, ans=0.2 +2024-09-17 00:14:50,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=107660.0, ans=22.5 +2024-09-17 00:14:51,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=107660.0, ans=0.125 +2024-09-17 00:14:53,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=107660.0, ans=0.1 +2024-09-17 00:14:57,392 INFO [train.py:1198] (1/2) Epoch 6, batch 4300, loss[loss=0.2884, ctc_loss=0.1968, cr_loss=0.4207, attn_decoder_loss=0.2893, over 29511.00 frames. ], tot_loss[loss=0.2856, ctc_loss=0.2056, cr_loss=0.4288, attn_decoder_loss=0.285, over 5792445.16 frames. ], batch size: 87, lr: 1.77e-02, grad_scale: 8.0 +2024-09-17 00:14:59,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=107700.0, ans=0.1 +2024-09-17 00:15:17,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.73 vs. limit=22.5 +2024-09-17 00:15:20,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=107740.0, ans=0.125 +2024-09-17 00:15:43,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=107820.0, ans=0.025 +2024-09-17 00:16:09,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=107860.0, ans=0.125 +2024-09-17 00:16:10,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=107860.0, ans=0.125 +2024-09-17 00:16:13,582 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.817e+01 1.068e+02 1.179e+02 1.314e+02 6.167e+02, threshold=2.359e+02, percent-clipped=2.0 +2024-09-17 00:16:13,608 INFO [train.py:1198] (1/2) Epoch 6, batch 4350, loss[loss=0.3069, ctc_loss=0.2313, cr_loss=0.4436, attn_decoder_loss=0.3054, over 29477.00 frames. ], tot_loss[loss=0.2893, ctc_loss=0.2089, cr_loss=0.4334, attn_decoder_loss=0.2886, over 5795622.77 frames. ], batch size: 97, lr: 1.76e-02, grad_scale: 4.0 +2024-09-17 00:16:32,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=107940.0, ans=0.125 +2024-09-17 00:16:40,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=107940.0, ans=0.1 +2024-09-17 00:16:45,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=107980.0, ans=0.125 +2024-09-17 00:17:05,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=108020.0, ans=0.125 +2024-09-17 00:17:28,672 INFO [train.py:1198] (1/2) Epoch 6, batch 4400, loss[loss=0.3026, ctc_loss=0.2232, cr_loss=0.4766, attn_decoder_loss=0.3008, over 27257.00 frames. ], tot_loss[loss=0.2915, ctc_loss=0.2111, cr_loss=0.4358, attn_decoder_loss=0.2907, over 5766829.84 frames. ], batch size: 124, lr: 1.76e-02, grad_scale: 8.0 +2024-09-17 00:17:39,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=108100.0, ans=0.1 +2024-09-17 00:18:13,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.00 vs. limit=15.0 +2024-09-17 00:18:40,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.98 vs. limit=22.5 +2024-09-17 00:18:44,524 INFO [train.py:1198] (1/2) Epoch 6, batch 4450, loss[loss=0.3289, ctc_loss=0.2766, cr_loss=0.4318, attn_decoder_loss=0.3251, over 20173.00 frames. ], tot_loss[loss=0.2951, ctc_loss=0.2173, cr_loss=0.4386, attn_decoder_loss=0.294, over 5574788.60 frames. ], batch size: 210, lr: 1.76e-02, grad_scale: 4.0 +2024-09-17 00:18:46,021 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.821e+01 1.110e+02 1.171e+02 1.331e+02 5.376e+02, threshold=2.342e+02, percent-clipped=1.0 +2024-09-17 00:18:52,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=108300.0, ans=0.0 +2024-09-17 00:18:58,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=108340.0, ans=0.1 +2024-09-17 00:19:10,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=108340.0, ans=0.07 +2024-09-17 00:19:38,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.05 vs. limit=22.5 +2024-09-17 00:19:40,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.87 vs. limit=22.5 +2024-09-17 00:19:49,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=108460.0, ans=0.2 +2024-09-17 00:19:56,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=108460.0, ans=0.0 +2024-09-17 00:20:00,688 INFO [train.py:1198] (1/2) Epoch 6, batch 4500, loss[loss=0.3141, ctc_loss=0.262, cr_loss=0.4567, attn_decoder_loss=0.3097, over 20392.00 frames. ], tot_loss[loss=0.2994, ctc_loss=0.2255, cr_loss=0.4405, attn_decoder_loss=0.2978, over 5230716.98 frames. ], batch size: 210, lr: 1.76e-02, grad_scale: 8.0 +2024-09-17 00:20:08,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=24.51 vs. limit=22.5 +2024-09-17 00:20:21,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=108540.0, ans=0.0 +2024-09-17 00:21:33,134 WARNING [optim.py:503] (1/2) Scaling gradients by 0.06814046949148178, model_norm_threshold=234.16368103027344 +2024-09-17 00:21:33,339 WARNING [optim.py:575] (1/2) Parameter dominating tot_sumsq module.attention_decoder.decoder.layers.0.norm_self_attn.weight with proportion 0.27, where dominant_sumsq=(grad_sumsq*orig_rms_sq)=3.188e+06, grad_sumsq=4.711e+10, orig_rms_sq=6.766e-05 +2024-09-17 00:21:33,370 INFO [train.py:1198] (1/2) Epoch 7, batch 0, loss[loss=0.3051, ctc_loss=0.1955, cr_loss=0.4476, attn_decoder_loss=0.3073, over 29621.00 frames. ], tot_loss[loss=0.3051, ctc_loss=0.1955, cr_loss=0.4476, attn_decoder_loss=0.3073, over 29621.00 frames. ], batch size: 73, lr: 1.65e-02, grad_scale: 8.0 +2024-09-17 00:21:33,371 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 00:21:51,797 INFO [train.py:1230] (1/2) Epoch 7, validation: loss=0.2253, ctc_loss=0.06341, cr_loss=4.598e-15, attn_decoder_loss=0.2433, over 944034.00 frames. +2024-09-17 00:21:51,797 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 00:21:53,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=108600.0, ans=0.125 +2024-09-17 00:22:04,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=108600.0, ans=0.0 +2024-09-17 00:22:11,336 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.18 vs. limit=22.5 +2024-09-17 00:22:36,024 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.794e+01 1.158e+02 1.328e+02 1.536e+02 3.436e+03, threshold=2.655e+02, percent-clipped=8.0 +2024-09-17 00:22:44,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=108720.0, ans=0.125 +2024-09-17 00:22:48,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=108720.0, ans=0.2 +2024-09-17 00:22:50,896 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.06 vs. limit=22.5 +2024-09-17 00:23:11,662 INFO [train.py:1198] (1/2) Epoch 7, batch 50, loss[loss=0.255, ctc_loss=0.1762, cr_loss=0.3672, attn_decoder_loss=0.2556, over 29459.00 frames. ], tot_loss[loss=0.2904, ctc_loss=0.2122, cr_loss=0.4334, attn_decoder_loss=0.2895, over 1266693.78 frames. ], batch size: 70, lr: 1.65e-02, grad_scale: 4.0 +2024-09-17 00:23:18,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=108800.0, ans=0.125 +2024-09-17 00:23:21,683 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.76 vs. limit=6.0 +2024-09-17 00:23:42,920 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.30 vs. limit=22.5 +2024-09-17 00:24:27,111 INFO [train.py:1198] (1/2) Epoch 7, batch 100, loss[loss=0.2757, ctc_loss=0.198, cr_loss=0.4182, attn_decoder_loss=0.275, over 29541.00 frames. ], tot_loss[loss=0.2904, ctc_loss=0.2114, cr_loss=0.4321, attn_decoder_loss=0.2896, over 2249266.25 frames. ], batch size: 76, lr: 1.65e-02, grad_scale: 8.0 +2024-09-17 00:25:04,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=109080.0, ans=0.1 +2024-09-17 00:25:10,521 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.921e+01 1.059e+02 1.169e+02 1.320e+02 2.276e+02, threshold=2.339e+02, percent-clipped=0.0 +2024-09-17 00:25:15,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=109120.0, ans=0.2 +2024-09-17 00:25:22,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=109120.0, ans=0.125 +2024-09-17 00:25:43,869 INFO [train.py:1198] (1/2) Epoch 7, batch 150, loss[loss=0.2547, ctc_loss=0.1725, cr_loss=0.4022, attn_decoder_loss=0.2549, over 29414.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.2062, cr_loss=0.4286, attn_decoder_loss=0.2856, over 3044683.85 frames. ], batch size: 70, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:25:58,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.97 vs. limit=22.5 +2024-09-17 00:26:38,437 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:26:41,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=109320.0, ans=0.125 +2024-09-17 00:27:00,803 INFO [train.py:1198] (1/2) Epoch 7, batch 200, loss[loss=0.2985, ctc_loss=0.2231, cr_loss=0.4267, attn_decoder_loss=0.2974, over 27269.00 frames. ], tot_loss[loss=0.2844, ctc_loss=0.204, cr_loss=0.4275, attn_decoder_loss=0.2838, over 3656987.51 frames. ], batch size: 124, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:27:02,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=109400.0, ans=0.0 +2024-09-17 00:27:06,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=109400.0, ans=0.0 +2024-09-17 00:27:21,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=109440.0, ans=0.1 +2024-09-17 00:27:46,008 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.796e+01 1.025e+02 1.125e+02 1.234e+02 4.171e+02, threshold=2.251e+02, percent-clipped=1.0 +2024-09-17 00:27:54,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=109520.0, ans=0.125 +2024-09-17 00:27:57,662 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.98 vs. limit=22.5 +2024-09-17 00:28:17,026 INFO [train.py:1198] (1/2) Epoch 7, batch 250, loss[loss=0.2959, ctc_loss=0.2173, cr_loss=0.4523, attn_decoder_loss=0.2946, over 29194.00 frames. ], tot_loss[loss=0.2839, ctc_loss=0.2032, cr_loss=0.4272, attn_decoder_loss=0.2833, over 4140529.60 frames. ], batch size: 100, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:28:56,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=109680.0, ans=0.2 +2024-09-17 00:29:06,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.38 vs. limit=6.0 +2024-09-17 00:29:12,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=109720.0, ans=0.125 +2024-09-17 00:29:18,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.14 vs. limit=15.0 +2024-09-17 00:29:19,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=109760.0, ans=0.125 +2024-09-17 00:29:19,846 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:29:33,125 INFO [train.py:1198] (1/2) Epoch 7, batch 300, loss[loss=0.3033, ctc_loss=0.2187, cr_loss=0.4473, attn_decoder_loss=0.3027, over 29521.00 frames. ], tot_loss[loss=0.283, ctc_loss=0.2017, cr_loss=0.4259, attn_decoder_loss=0.2826, over 4508851.44 frames. ], batch size: 92, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:29:34,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=109800.0, ans=0.2 +2024-09-17 00:29:39,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=109800.0, ans=0.025 +2024-09-17 00:29:56,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=109840.0, ans=0.0 +2024-09-17 00:29:56,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=109840.0, ans=0.1 +2024-09-17 00:29:58,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=109840.0, ans=0.0 +2024-09-17 00:30:09,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=109880.0, ans=0.125 +2024-09-17 00:30:09,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.06 vs. limit=6.0 +2024-09-17 00:30:15,588 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.81 vs. limit=6.0 +2024-09-17 00:30:19,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=109920.0, ans=0.0 +2024-09-17 00:30:25,690 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.782e+01 1.034e+02 1.140e+02 1.272e+02 2.553e+02, threshold=2.279e+02, percent-clipped=1.0 +2024-09-17 00:30:30,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=109920.0, ans=0.125 +2024-09-17 00:30:35,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=109920.0, ans=0.2 +2024-09-17 00:30:44,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=109960.0, ans=0.0 +2024-09-17 00:30:54,851 INFO [train.py:1198] (1/2) Epoch 7, batch 350, loss[loss=0.2515, ctc_loss=0.1684, cr_loss=0.3941, attn_decoder_loss=0.2519, over 29330.00 frames. ], tot_loss[loss=0.2835, ctc_loss=0.2019, cr_loss=0.4269, attn_decoder_loss=0.2831, over 4793855.45 frames. ], batch size: 71, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:31:08,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=110040.0, ans=0.1 +2024-09-17 00:31:25,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-17 00:31:34,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=110080.0, ans=0.125 +2024-09-17 00:31:50,044 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.42 vs. limit=6.0 +2024-09-17 00:32:03,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=110160.0, ans=0.0 +2024-09-17 00:32:10,454 INFO [train.py:1198] (1/2) Epoch 7, batch 400, loss[loss=0.2882, ctc_loss=0.1998, cr_loss=0.414, attn_decoder_loss=0.2888, over 29707.00 frames. ], tot_loss[loss=0.2829, ctc_loss=0.2008, cr_loss=0.4254, attn_decoder_loss=0.2825, over 5024103.71 frames. ], batch size: 82, lr: 1.64e-02, grad_scale: 8.0 +2024-09-17 00:32:10,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=110200.0, ans=0.025 +2024-09-17 00:32:12,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=110200.0, ans=0.1 +2024-09-17 00:32:13,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=110200.0, ans=0.0 +2024-09-17 00:32:34,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.33 vs. limit=22.5 +2024-09-17 00:32:46,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=110280.0, ans=0.0 +2024-09-17 00:32:59,552 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.594e+01 1.062e+02 1.160e+02 1.275e+02 1.904e+02, threshold=2.320e+02, percent-clipped=0.0 +2024-09-17 00:33:12,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=110360.0, ans=0.125 +2024-09-17 00:33:18,317 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:33:27,322 INFO [train.py:1198] (1/2) Epoch 7, batch 450, loss[loss=0.2926, ctc_loss=0.2141, cr_loss=0.4276, attn_decoder_loss=0.2919, over 29685.00 frames. ], tot_loss[loss=0.2828, ctc_loss=0.2009, cr_loss=0.4245, attn_decoder_loss=0.2824, over 5186932.93 frames. ], batch size: 83, lr: 1.64e-02, grad_scale: 4.0 +2024-09-17 00:33:27,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=110400.0, ans=0.125 +2024-09-17 00:33:37,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=110400.0, ans=0.125 +2024-09-17 00:34:29,460 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:34:48,895 INFO [train.py:1198] (1/2) Epoch 7, batch 500, loss[loss=0.2969, ctc_loss=0.2134, cr_loss=0.4371, attn_decoder_loss=0.2965, over 29413.00 frames. ], tot_loss[loss=0.2821, ctc_loss=0.2002, cr_loss=0.4249, attn_decoder_loss=0.2817, over 5328435.97 frames. ], batch size: 94, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:34:49,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=110600.0, ans=0.125 +2024-09-17 00:34:57,357 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=24.76 vs. limit=22.5 +2024-09-17 00:34:58,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=110600.0, ans=0.0 +2024-09-17 00:35:00,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=110600.0, ans=0.0 +2024-09-17 00:35:03,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=110640.0, ans=0.2 +2024-09-17 00:35:24,973 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.15 vs. limit=12.0 +2024-09-17 00:35:27,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=110680.0, ans=0.125 +2024-09-17 00:35:31,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=110680.0, ans=0.1 +2024-09-17 00:35:39,103 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.537e+01 1.048e+02 1.174e+02 1.330e+02 3.263e+02, threshold=2.347e+02, percent-clipped=4.0 +2024-09-17 00:35:41,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=110720.0, ans=0.125 +2024-09-17 00:35:44,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=110720.0, ans=0.0 +2024-09-17 00:35:49,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.76 vs. limit=22.5 +2024-09-17 00:35:51,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=110760.0, ans=0.0 +2024-09-17 00:35:58,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.16 vs. limit=15.0 +2024-09-17 00:35:59,846 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.50 vs. limit=10.0 +2024-09-17 00:36:04,946 INFO [train.py:1198] (1/2) Epoch 7, batch 550, loss[loss=0.2911, ctc_loss=0.2024, cr_loss=0.4331, attn_decoder_loss=0.2914, over 28783.00 frames. ], tot_loss[loss=0.282, ctc_loss=0.2003, cr_loss=0.4244, attn_decoder_loss=0.2817, over 5421699.28 frames. ], batch size: 104, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:36:06,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=110800.0, ans=0.0 +2024-09-17 00:36:09,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.31 vs. limit=15.0 +2024-09-17 00:36:25,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=110840.0, ans=0.0 +2024-09-17 00:36:32,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=110840.0, ans=0.125 +2024-09-17 00:36:32,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=110840.0, ans=0.1 +2024-09-17 00:36:49,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=110920.0, ans=0.2 +2024-09-17 00:36:59,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=110920.0, ans=0.1 +2024-09-17 00:37:05,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=110960.0, ans=0.1 +2024-09-17 00:37:12,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=110960.0, ans=0.1 +2024-09-17 00:37:14,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=110960.0, ans=0.95 +2024-09-17 00:37:15,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=110960.0, ans=0.1 +2024-09-17 00:37:21,444 INFO [train.py:1198] (1/2) Epoch 7, batch 600, loss[loss=0.2967, ctc_loss=0.212, cr_loss=0.4357, attn_decoder_loss=0.2964, over 29202.00 frames. ], tot_loss[loss=0.2818, ctc_loss=0.1997, cr_loss=0.4238, attn_decoder_loss=0.2815, over 5507842.29 frames. ], batch size: 100, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:37:37,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.80 vs. limit=6.0 +2024-09-17 00:37:49,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-17 00:37:50,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.65 vs. limit=6.0 +2024-09-17 00:37:58,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=111080.0, ans=0.2 +2024-09-17 00:38:08,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=111120.0, ans=0.2 +2024-09-17 00:38:14,464 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.423e+01 1.086e+02 1.156e+02 1.256e+02 2.672e+02, threshold=2.312e+02, percent-clipped=2.0 +2024-09-17 00:38:17,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=111120.0, ans=0.125 +2024-09-17 00:38:20,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=111120.0, ans=0.07 +2024-09-17 00:38:26,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=111160.0, ans=0.125 +2024-09-17 00:38:40,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=111200.0, ans=0.125 +2024-09-17 00:38:41,960 INFO [train.py:1198] (1/2) Epoch 7, batch 650, loss[loss=0.2735, ctc_loss=0.1822, cr_loss=0.4022, attn_decoder_loss=0.2746, over 29778.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1984, cr_loss=0.4229, attn_decoder_loss=0.2806, over 5585216.94 frames. ], batch size: 81, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:38:59,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=111240.0, ans=0.0 +2024-09-17 00:39:16,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=111280.0, ans=0.2 +2024-09-17 00:39:26,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=111320.0, ans=0.02 +2024-09-17 00:39:35,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.44 vs. limit=12.0 +2024-09-17 00:39:35,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=111320.0, ans=0.025 +2024-09-17 00:39:38,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=111320.0, ans=0.125 +2024-09-17 00:39:58,134 INFO [train.py:1198] (1/2) Epoch 7, batch 700, loss[loss=0.2688, ctc_loss=0.1869, cr_loss=0.4206, attn_decoder_loss=0.2686, over 29535.00 frames. ], tot_loss[loss=0.2819, ctc_loss=0.1993, cr_loss=0.4251, attn_decoder_loss=0.2817, over 5637072.48 frames. ], batch size: 76, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:40:18,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=111440.0, ans=0.1 +2024-09-17 00:40:30,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=111480.0, ans=0.125 +2024-09-17 00:40:35,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=111480.0, ans=0.125 +2024-09-17 00:40:38,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=111480.0, ans=0.2 +2024-09-17 00:40:45,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=111520.0, ans=0.125 +2024-09-17 00:40:45,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=111520.0, ans=0.025 +2024-09-17 00:40:51,488 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.298e+01 1.046e+02 1.126e+02 1.229e+02 1.906e+02, threshold=2.253e+02, percent-clipped=0.0 +2024-09-17 00:40:56,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=111520.0, ans=0.1 +2024-09-17 00:41:02,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=111560.0, ans=0.125 +2024-09-17 00:41:05,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=111560.0, ans=0.07 +2024-09-17 00:41:10,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff2.min_abs, batch_count=111560.0, ans=0.1 +2024-09-17 00:41:14,386 INFO [train.py:1198] (1/2) Epoch 7, batch 750, loss[loss=0.286, ctc_loss=0.1988, cr_loss=0.4353, attn_decoder_loss=0.286, over 29720.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1986, cr_loss=0.4242, attn_decoder_loss=0.2809, over 5675617.36 frames. ], batch size: 82, lr: 1.63e-02, grad_scale: 4.0 +2024-09-17 00:41:15,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.04 vs. limit=15.0 +2024-09-17 00:41:31,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=111640.0, ans=0.125 +2024-09-17 00:41:36,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.98 vs. limit=15.0 +2024-09-17 00:41:42,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.69 vs. limit=15.0 +2024-09-17 00:41:43,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.49 vs. limit=22.5 +2024-09-17 00:41:50,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.40 vs. limit=22.5 +2024-09-17 00:42:08,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=111720.0, ans=0.2 +2024-09-17 00:42:15,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.11 vs. limit=15.0 +2024-09-17 00:42:23,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=111760.0, ans=0.1 +2024-09-17 00:42:35,180 INFO [train.py:1198] (1/2) Epoch 7, batch 800, loss[loss=0.252, ctc_loss=0.1696, cr_loss=0.385, attn_decoder_loss=0.2526, over 29604.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1986, cr_loss=0.4244, attn_decoder_loss=0.2808, over 5707322.87 frames. ], batch size: 73, lr: 1.63e-02, grad_scale: 8.0 +2024-09-17 00:42:36,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=111800.0, ans=0.125 +2024-09-17 00:43:29,888 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.814e+01 1.067e+02 1.173e+02 1.326e+02 3.037e+02, threshold=2.345e+02, percent-clipped=2.0 +2024-09-17 00:43:36,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=111960.0, ans=0.125 +2024-09-17 00:43:58,047 INFO [train.py:1198] (1/2) Epoch 7, batch 850, loss[loss=0.2972, ctc_loss=0.2165, cr_loss=0.433, attn_decoder_loss=0.2966, over 29719.00 frames. ], tot_loss[loss=0.2806, ctc_loss=0.1982, cr_loss=0.4234, attn_decoder_loss=0.2803, over 5736392.31 frames. ], batch size: 89, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:44:08,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=112000.0, ans=15.0 +2024-09-17 00:44:11,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.20 vs. limit=10.0 +2024-09-17 00:44:30,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=112080.0, ans=0.125 +2024-09-17 00:44:42,661 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:45:14,256 INFO [train.py:1198] (1/2) Epoch 7, batch 900, loss[loss=0.2597, ctc_loss=0.1791, cr_loss=0.3961, attn_decoder_loss=0.2599, over 29607.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1988, cr_loss=0.4236, attn_decoder_loss=0.2808, over 5740920.19 frames. ], batch size: 73, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:45:31,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=112240.0, ans=0.0 +2024-09-17 00:45:37,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=112240.0, ans=0.2 +2024-09-17 00:45:50,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=112280.0, ans=0.125 +2024-09-17 00:46:02,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=112320.0, ans=10.0 +2024-09-17 00:46:08,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_na.min_abs, batch_count=112320.0, ans=0.02 +2024-09-17 00:46:12,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=112320.0, ans=0.125 +2024-09-17 00:46:14,981 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.034e+01 1.106e+02 1.225e+02 1.378e+02 5.810e+02, threshold=2.450e+02, percent-clipped=7.0 +2024-09-17 00:46:28,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=112360.0, ans=0.125 +2024-09-17 00:46:34,339 INFO [train.py:1198] (1/2) Epoch 7, batch 950, loss[loss=0.2672, ctc_loss=0.1894, cr_loss=0.4215, attn_decoder_loss=0.2665, over 29522.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1988, cr_loss=0.4241, attn_decoder_loss=0.2808, over 5743870.70 frames. ], batch size: 74, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:46:57,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=112440.0, ans=0.125 +2024-09-17 00:47:09,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=112480.0, ans=0.0 +2024-09-17 00:47:18,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=112520.0, ans=0.025 +2024-09-17 00:47:50,428 INFO [train.py:1198] (1/2) Epoch 7, batch 1000, loss[loss=0.2575, ctc_loss=0.1687, cr_loss=0.3832, attn_decoder_loss=0.2588, over 29507.00 frames. ], tot_loss[loss=0.2817, ctc_loss=0.1993, cr_loss=0.4239, attn_decoder_loss=0.2814, over 5736914.09 frames. ], batch size: 77, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:47:59,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=112600.0, ans=0.1 +2024-09-17 00:48:02,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.89 vs. limit=22.5 +2024-09-17 00:48:12,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.99 vs. limit=10.0 +2024-09-17 00:48:30,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=112680.0, ans=0.0 +2024-09-17 00:48:39,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=112720.0, ans=0.025 +2024-09-17 00:48:41,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=112720.0, ans=0.125 +2024-09-17 00:48:46,758 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.415e+01 1.043e+02 1.127e+02 1.327e+02 3.931e+02, threshold=2.254e+02, percent-clipped=2.0 +2024-09-17 00:48:51,629 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:48:53,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=112760.0, ans=10.0 +2024-09-17 00:48:59,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=112760.0, ans=0.035 +2024-09-17 00:49:02,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=112760.0, ans=0.2 +2024-09-17 00:49:06,951 INFO [train.py:1198] (1/2) Epoch 7, batch 1050, loss[loss=0.2825, ctc_loss=0.1927, cr_loss=0.4227, attn_decoder_loss=0.2831, over 29687.00 frames. ], tot_loss[loss=0.281, ctc_loss=0.1986, cr_loss=0.4231, attn_decoder_loss=0.2808, over 5745449.14 frames. ], batch size: 85, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:49:16,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=112800.0, ans=0.05 +2024-09-17 00:49:18,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=112800.0, ans=0.125 +2024-09-17 00:49:54,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=112920.0, ans=0.0 +2024-09-17 00:50:07,613 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:50:09,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=112920.0, ans=0.0 +2024-09-17 00:50:09,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=112920.0, ans=0.1 +2024-09-17 00:50:28,827 INFO [train.py:1198] (1/2) Epoch 7, batch 1100, loss[loss=0.2753, ctc_loss=0.1969, cr_loss=0.431, attn_decoder_loss=0.2744, over 29460.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1981, cr_loss=0.4229, attn_decoder_loss=0.2805, over 5758411.73 frames. ], batch size: 78, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:50:37,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.85 vs. limit=15.0 +2024-09-17 00:50:39,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=113000.0, ans=0.125 +2024-09-17 00:50:56,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=113040.0, ans=0.125 +2024-09-17 00:51:26,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=113120.0, ans=0.0 +2024-09-17 00:51:28,966 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.665e+01 1.036e+02 1.119e+02 1.238e+02 1.913e+02, threshold=2.238e+02, percent-clipped=0.0 +2024-09-17 00:51:31,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=113160.0, ans=0.0 +2024-09-17 00:51:35,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=113160.0, ans=0.1 +2024-09-17 00:51:45,879 INFO [train.py:1198] (1/2) Epoch 7, batch 1150, loss[loss=0.2718, ctc_loss=0.1871, cr_loss=0.4005, attn_decoder_loss=0.2723, over 29436.00 frames. ], tot_loss[loss=0.281, ctc_loss=0.1989, cr_loss=0.4233, attn_decoder_loss=0.2807, over 5755726.54 frames. ], batch size: 78, lr: 1.62e-02, grad_scale: 4.0 +2024-09-17 00:51:47,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=113200.0, ans=0.125 +2024-09-17 00:51:54,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.05 vs. limit=15.0 +2024-09-17 00:52:24,139 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.74 vs. limit=10.0 +2024-09-17 00:52:25,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=8.56 vs. limit=15.0 +2024-09-17 00:52:30,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=113320.0, ans=0.0 +2024-09-17 00:52:32,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=113320.0, ans=0.0 +2024-09-17 00:52:51,127 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:52:52,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=113360.0, ans=0.95 +2024-09-17 00:53:02,764 INFO [train.py:1198] (1/2) Epoch 7, batch 1200, loss[loss=0.2659, ctc_loss=0.1706, cr_loss=0.3919, attn_decoder_loss=0.2678, over 29688.00 frames. ], tot_loss[loss=0.2819, ctc_loss=0.1998, cr_loss=0.4243, attn_decoder_loss=0.2816, over 5747494.48 frames. ], batch size: 85, lr: 1.62e-02, grad_scale: 8.0 +2024-09-17 00:53:12,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=113400.0, ans=0.1 +2024-09-17 00:53:22,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=113440.0, ans=0.125 +2024-09-17 00:53:24,321 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:53:27,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=113440.0, ans=0.1 +2024-09-17 00:53:28,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=113440.0, ans=0.07 +2024-09-17 00:53:30,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=113440.0, ans=0.125 +2024-09-17 00:53:39,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=113480.0, ans=0.09899494936611666 +2024-09-17 00:54:08,731 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.382e+01 1.039e+02 1.128e+02 1.242e+02 2.195e+02, threshold=2.256e+02, percent-clipped=0.0 +2024-09-17 00:54:12,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=113560.0, ans=0.125 +2024-09-17 00:54:13,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=113560.0, ans=0.1 +2024-09-17 00:54:18,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=113560.0, ans=0.0 +2024-09-17 00:54:24,448 INFO [train.py:1198] (1/2) Epoch 7, batch 1250, loss[loss=0.2924, ctc_loss=0.1975, cr_loss=0.424, attn_decoder_loss=0.2935, over 29500.00 frames. ], tot_loss[loss=0.2823, ctc_loss=0.1999, cr_loss=0.4248, attn_decoder_loss=0.282, over 5775964.62 frames. ], batch size: 92, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:54:26,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=113600.0, ans=0.0 +2024-09-17 00:54:41,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=113640.0, ans=0.0 +2024-09-17 00:54:43,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=113640.0, ans=0.1 +2024-09-17 00:55:40,874 INFO [train.py:1198] (1/2) Epoch 7, batch 1300, loss[loss=0.2968, ctc_loss=0.217, cr_loss=0.4369, attn_decoder_loss=0.2959, over 28183.00 frames. ], tot_loss[loss=0.2817, ctc_loss=0.1993, cr_loss=0.4233, attn_decoder_loss=0.2815, over 5779154.72 frames. ], batch size: 111, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 00:55:54,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=113840.0, ans=0.1 +2024-09-17 00:56:37,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=113920.0, ans=0.125 +2024-09-17 00:56:43,507 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.471e+01 1.026e+02 1.124e+02 1.254e+02 2.028e+02, threshold=2.249e+02, percent-clipped=0.0 +2024-09-17 00:56:55,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=114000.0, ans=0.05 +2024-09-17 00:56:57,146 INFO [train.py:1198] (1/2) Epoch 7, batch 1350, loss[loss=0.2832, ctc_loss=0.1986, cr_loss=0.4311, attn_decoder_loss=0.283, over 29756.00 frames. ], tot_loss[loss=0.2814, ctc_loss=0.1985, cr_loss=0.4236, attn_decoder_loss=0.2812, over 5796439.18 frames. ], batch size: 81, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:57:05,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.72 vs. limit=12.0 +2024-09-17 00:57:08,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=114000.0, ans=0.1 +2024-09-17 00:57:15,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=114040.0, ans=0.0 +2024-09-17 00:57:21,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=114040.0, ans=0.1 +2024-09-17 00:57:27,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=114080.0, ans=0.125 +2024-09-17 00:57:38,538 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.42 vs. limit=15.0 +2024-09-17 00:57:48,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=114120.0, ans=0.2 +2024-09-17 00:58:01,165 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 00:58:04,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=114160.0, ans=0.1 +2024-09-17 00:58:10,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=114160.0, ans=0.1 +2024-09-17 00:58:18,160 INFO [train.py:1198] (1/2) Epoch 7, batch 1400, loss[loss=0.2539, ctc_loss=0.1816, cr_loss=0.3871, attn_decoder_loss=0.2533, over 29595.00 frames. ], tot_loss[loss=0.2812, ctc_loss=0.1983, cr_loss=0.4236, attn_decoder_loss=0.281, over 5807321.96 frames. ], batch size: 69, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 00:58:20,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=114200.0, ans=0.07 +2024-09-17 00:58:50,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=114280.0, ans=0.0 +2024-09-17 00:59:14,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=114320.0, ans=0.125 +2024-09-17 00:59:16,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=114320.0, ans=0.125 +2024-09-17 00:59:21,790 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.456e+01 9.951e+01 1.071e+02 1.173e+02 2.370e+02, threshold=2.143e+02, percent-clipped=1.0 +2024-09-17 00:59:31,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=114360.0, ans=0.07 +2024-09-17 00:59:34,533 INFO [train.py:1198] (1/2) Epoch 7, batch 1450, loss[loss=0.2955, ctc_loss=0.2043, cr_loss=0.4622, attn_decoder_loss=0.2953, over 29442.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1985, cr_loss=0.4237, attn_decoder_loss=0.2813, over 5804540.86 frames. ], batch size: 94, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 00:59:34,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=114400.0, ans=0.025 +2024-09-17 00:59:34,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=114400.0, ans=0.125 +2024-09-17 00:59:48,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=114440.0, ans=0.125 +2024-09-17 01:00:00,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=114440.0, ans=0.125 +2024-09-17 01:00:06,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=114480.0, ans=0.04949747468305833 +2024-09-17 01:00:08,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=114480.0, ans=0.125 +2024-09-17 01:00:14,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-17 01:00:23,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.90 vs. limit=15.0 +2024-09-17 01:00:41,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=114560.0, ans=0.0 +2024-09-17 01:00:41,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=114560.0, ans=0.0 +2024-09-17 01:00:49,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=114600.0, ans=0.125 +2024-09-17 01:00:50,574 INFO [train.py:1198] (1/2) Epoch 7, batch 1500, loss[loss=0.2847, ctc_loss=0.1942, cr_loss=0.4076, attn_decoder_loss=0.2857, over 29647.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1982, cr_loss=0.4233, attn_decoder_loss=0.2814, over 5806535.69 frames. ], batch size: 86, lr: 1.61e-02, grad_scale: 8.0 +2024-09-17 01:01:19,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=114680.0, ans=0.0 +2024-09-17 01:01:29,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=114680.0, ans=0.0 +2024-09-17 01:01:58,848 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.036e+01 1.083e+02 1.173e+02 1.293e+02 2.517e+02, threshold=2.346e+02, percent-clipped=1.0 +2024-09-17 01:02:11,568 INFO [train.py:1198] (1/2) Epoch 7, batch 1550, loss[loss=0.3032, ctc_loss=0.2146, cr_loss=0.4651, attn_decoder_loss=0.3027, over 29497.00 frames. ], tot_loss[loss=0.2816, ctc_loss=0.1987, cr_loss=0.4241, attn_decoder_loss=0.2814, over 5781072.45 frames. ], batch size: 90, lr: 1.61e-02, grad_scale: 4.0 +2024-09-17 01:02:11,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=114800.0, ans=0.0 +2024-09-17 01:02:14,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=114800.0, ans=0.0 +2024-09-17 01:02:20,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=114800.0, ans=0.2 +2024-09-17 01:02:26,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=114840.0, ans=0.1 +2024-09-17 01:02:30,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=114840.0, ans=0.1 +2024-09-17 01:02:35,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten.whitening_limit, batch_count=114840.0, ans=15.0 +2024-09-17 01:02:37,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=114840.0, ans=0.0 +2024-09-17 01:02:43,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=114880.0, ans=0.125 +2024-09-17 01:02:54,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=114880.0, ans=0.0 +2024-09-17 01:03:12,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=114960.0, ans=0.0 +2024-09-17 01:03:19,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.95 vs. limit=15.0 +2024-09-17 01:03:27,529 INFO [train.py:1198] (1/2) Epoch 7, batch 1600, loss[loss=0.287, ctc_loss=0.1898, cr_loss=0.4299, attn_decoder_loss=0.2882, over 29690.00 frames. ], tot_loss[loss=0.2817, ctc_loss=0.1993, cr_loss=0.4244, attn_decoder_loss=0.2815, over 5762970.39 frames. ], batch size: 85, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:03:27,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=115000.0, ans=0.2 +2024-09-17 01:03:41,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.77 vs. limit=15.0 +2024-09-17 01:03:43,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=115040.0, ans=0.1 +2024-09-17 01:03:44,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=115040.0, ans=0.125 +2024-09-17 01:03:49,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=115040.0, ans=0.1 +2024-09-17 01:04:03,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=115080.0, ans=0.125 +2024-09-17 01:04:15,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=115120.0, ans=0.2 +2024-09-17 01:04:18,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=115120.0, ans=0.0 +2024-09-17 01:04:18,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=115120.0, ans=0.125 +2024-09-17 01:04:25,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=115120.0, ans=0.0 +2024-09-17 01:04:28,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=115160.0, ans=0.1 +2024-09-17 01:04:34,567 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.309e+01 1.047e+02 1.116e+02 1.262e+02 4.085e+02, threshold=2.232e+02, percent-clipped=3.0 +2024-09-17 01:04:43,949 INFO [train.py:1198] (1/2) Epoch 7, batch 1650, loss[loss=0.2839, ctc_loss=0.1958, cr_loss=0.4311, attn_decoder_loss=0.2841, over 29705.00 frames. ], tot_loss[loss=0.2813, ctc_loss=0.1987, cr_loss=0.4238, attn_decoder_loss=0.281, over 5756012.94 frames. ], batch size: 89, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:05:01,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=115240.0, ans=0.1 +2024-09-17 01:05:38,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=115320.0, ans=0.125 +2024-09-17 01:05:44,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=115320.0, ans=0.2 +2024-09-17 01:06:04,532 INFO [train.py:1198] (1/2) Epoch 7, batch 1700, loss[loss=0.2392, ctc_loss=0.161, cr_loss=0.3499, attn_decoder_loss=0.2401, over 29584.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1978, cr_loss=0.423, attn_decoder_loss=0.2806, over 5778600.67 frames. ], batch size: 69, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:06:18,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=115440.0, ans=0.035 +2024-09-17 01:06:25,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=115440.0, ans=0.125 +2024-09-17 01:06:26,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=115440.0, ans=0.1 +2024-09-17 01:06:29,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=115440.0, ans=0.125 +2024-09-17 01:06:43,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=115480.0, ans=0.125 +2024-09-17 01:07:01,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=115520.0, ans=0.125 +2024-09-17 01:07:01,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=115520.0, ans=0.125 +2024-09-17 01:07:03,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=115520.0, ans=0.125 +2024-09-17 01:07:06,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=115560.0, ans=0.125 +2024-09-17 01:07:12,701 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:07:13,850 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 9.967e+01 1.096e+02 1.177e+02 1.822e+02, threshold=2.192e+02, percent-clipped=0.0 +2024-09-17 01:07:20,494 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:07:21,577 INFO [train.py:1198] (1/2) Epoch 7, batch 1750, loss[loss=0.2488, ctc_loss=0.1701, cr_loss=0.392, attn_decoder_loss=0.2489, over 29396.00 frames. ], tot_loss[loss=0.2803, ctc_loss=0.1972, cr_loss=0.4227, attn_decoder_loss=0.2801, over 5787852.39 frames. ], batch size: 67, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:07:26,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=115600.0, ans=0.04949747468305833 +2024-09-17 01:07:51,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=115680.0, ans=0.125 +2024-09-17 01:08:05,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=115680.0, ans=0.125 +2024-09-17 01:08:16,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.22 vs. limit=12.0 +2024-09-17 01:08:35,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=115760.0, ans=0.07 +2024-09-17 01:08:38,051 INFO [train.py:1198] (1/2) Epoch 7, batch 1800, loss[loss=0.2943, ctc_loss=0.208, cr_loss=0.4366, attn_decoder_loss=0.2942, over 29689.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1976, cr_loss=0.4229, attn_decoder_loss=0.2805, over 5790610.99 frames. ], batch size: 83, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:08:43,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.74 vs. limit=5.0 +2024-09-17 01:09:07,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=115880.0, ans=0.0 +2024-09-17 01:09:16,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=115880.0, ans=0.125 +2024-09-17 01:09:50,654 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.679e+01 1.033e+02 1.126e+02 1.316e+02 3.290e+02, threshold=2.253e+02, percent-clipped=1.0 +2024-09-17 01:09:57,306 INFO [train.py:1198] (1/2) Epoch 7, batch 1850, loss[loss=0.2884, ctc_loss=0.2013, cr_loss=0.4209, attn_decoder_loss=0.2887, over 29654.00 frames. ], tot_loss[loss=0.28, ctc_loss=0.1968, cr_loss=0.4222, attn_decoder_loss=0.2799, over 5795160.35 frames. ], batch size: 86, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:10:07,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=116000.0, ans=0.1 +2024-09-17 01:10:26,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=7.07 vs. limit=12.0 +2024-09-17 01:10:39,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=116080.0, ans=0.125 +2024-09-17 01:10:39,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=116080.0, ans=0.125 +2024-09-17 01:10:51,886 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.30 vs. limit=15.0 +2024-09-17 01:10:57,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=116120.0, ans=0.025 +2024-09-17 01:11:09,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=116160.0, ans=0.125 +2024-09-17 01:11:11,491 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.35 vs. limit=12.0 +2024-09-17 01:11:14,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.79 vs. limit=15.0 +2024-09-17 01:11:14,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.26 vs. limit=15.0 +2024-09-17 01:11:15,068 INFO [train.py:1198] (1/2) Epoch 7, batch 1900, loss[loss=0.2882, ctc_loss=0.1939, cr_loss=0.4034, attn_decoder_loss=0.2897, over 29688.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1968, cr_loss=0.4227, attn_decoder_loss=0.2804, over 5802719.11 frames. ], batch size: 89, lr: 1.60e-02, grad_scale: 8.0 +2024-09-17 01:11:26,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=24.77 vs. limit=22.5 +2024-09-17 01:11:29,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=116240.0, ans=0.125 +2024-09-17 01:11:29,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.95 vs. limit=22.5 +2024-09-17 01:11:40,491 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-17 01:11:42,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=116240.0, ans=0.125 +2024-09-17 01:11:47,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=116280.0, ans=0.02 +2024-09-17 01:12:26,915 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.068e+01 1.016e+02 1.078e+02 1.162e+02 1.899e+02, threshold=2.156e+02, percent-clipped=0.0 +2024-09-17 01:12:31,489 INFO [train.py:1198] (1/2) Epoch 7, batch 1950, loss[loss=0.2819, ctc_loss=0.194, cr_loss=0.455, attn_decoder_loss=0.2816, over 29463.00 frames. ], tot_loss[loss=0.2811, ctc_loss=0.1968, cr_loss=0.4239, attn_decoder_loss=0.2811, over 5817850.65 frames. ], batch size: 78, lr: 1.60e-02, grad_scale: 4.0 +2024-09-17 01:12:34,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=116400.0, ans=0.125 +2024-09-17 01:12:35,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=116400.0, ans=0.0 +2024-09-17 01:12:41,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=116400.0, ans=0.125 +2024-09-17 01:12:42,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=116400.0, ans=0.125 +2024-09-17 01:12:53,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=116440.0, ans=0.125 +2024-09-17 01:12:54,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=116440.0, ans=0.125 +2024-09-17 01:12:57,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=116440.0, ans=0.125 +2024-09-17 01:13:22,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=116520.0, ans=0.0 +2024-09-17 01:13:23,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=116520.0, ans=0.0 +2024-09-17 01:13:38,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=116560.0, ans=0.0 +2024-09-17 01:13:50,434 INFO [train.py:1198] (1/2) Epoch 7, batch 2000, loss[loss=0.2518, ctc_loss=0.1714, cr_loss=0.3832, attn_decoder_loss=0.2522, over 29372.00 frames. ], tot_loss[loss=0.2819, ctc_loss=0.1981, cr_loss=0.4247, attn_decoder_loss=0.2818, over 5796724.25 frames. ], batch size: 67, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:13:57,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=116600.0, ans=0.125 +2024-09-17 01:14:54,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=116760.0, ans=0.1 +2024-09-17 01:15:02,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=116760.0, ans=0.025 +2024-09-17 01:15:06,318 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.446e+01 1.080e+02 1.203e+02 1.389e+02 2.597e+02, threshold=2.406e+02, percent-clipped=3.0 +2024-09-17 01:15:09,741 INFO [train.py:1198] (1/2) Epoch 7, batch 2050, loss[loss=0.2647, ctc_loss=0.191, cr_loss=0.4184, attn_decoder_loss=0.2636, over 29426.00 frames. ], tot_loss[loss=0.2813, ctc_loss=0.1982, cr_loss=0.424, attn_decoder_loss=0.2811, over 5787173.29 frames. ], batch size: 70, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:15:10,703 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.37 vs. limit=22.5 +2024-09-17 01:15:10,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-17 01:15:15,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.79 vs. limit=10.0 +2024-09-17 01:15:28,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=116840.0, ans=0.1 +2024-09-17 01:15:31,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=116840.0, ans=0.0 +2024-09-17 01:16:23,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=116960.0, ans=0.125 +2024-09-17 01:16:25,959 INFO [train.py:1198] (1/2) Epoch 7, batch 2100, loss[loss=0.2693, ctc_loss=0.184, cr_loss=0.4087, attn_decoder_loss=0.2697, over 29773.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1978, cr_loss=0.4235, attn_decoder_loss=0.2806, over 5798056.55 frames. ], batch size: 81, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:16:29,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=117000.0, ans=10.0 +2024-09-17 01:16:30,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=117000.0, ans=0.1 +2024-09-17 01:16:34,494 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.37 vs. limit=22.5 +2024-09-17 01:16:47,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=117040.0, ans=0.0 +2024-09-17 01:16:57,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=117080.0, ans=0.1 +2024-09-17 01:16:59,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.44 vs. limit=15.0 +2024-09-17 01:17:02,370 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:17:06,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=117080.0, ans=0.125 +2024-09-17 01:17:13,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=117120.0, ans=0.1 +2024-09-17 01:17:25,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.53 vs. limit=12.0 +2024-09-17 01:17:32,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=117160.0, ans=0.0 +2024-09-17 01:17:33,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=117160.0, ans=15.0 +2024-09-17 01:17:41,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=117160.0, ans=0.125 +2024-09-17 01:17:42,584 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.423e+01 1.035e+02 1.132e+02 1.239e+02 1.917e+02, threshold=2.264e+02, percent-clipped=0.0 +2024-09-17 01:17:44,163 INFO [train.py:1198] (1/2) Epoch 7, batch 2150, loss[loss=0.281, ctc_loss=0.1943, cr_loss=0.4185, attn_decoder_loss=0.2814, over 29472.00 frames. ], tot_loss[loss=0.2801, ctc_loss=0.1971, cr_loss=0.4236, attn_decoder_loss=0.2799, over 5814052.40 frames. ], batch size: 78, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:17:49,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=117200.0, ans=0.125 +2024-09-17 01:18:33,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.57 vs. limit=22.5 +2024-09-17 01:18:38,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=117320.0, ans=0.1 +2024-09-17 01:18:40,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=117320.0, ans=0.0 +2024-09-17 01:18:41,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=117320.0, ans=0.1 +2024-09-17 01:18:55,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.08 vs. limit=15.0 +2024-09-17 01:19:02,633 INFO [train.py:1198] (1/2) Epoch 7, batch 2200, loss[loss=0.2929, ctc_loss=0.2057, cr_loss=0.4157, attn_decoder_loss=0.2933, over 29642.00 frames. ], tot_loss[loss=0.28, ctc_loss=0.1972, cr_loss=0.423, attn_decoder_loss=0.2798, over 5810882.92 frames. ], batch size: 86, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:19:04,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=117400.0, ans=0.1 +2024-09-17 01:19:04,675 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:19:13,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=117400.0, ans=0.04949747468305833 +2024-09-17 01:19:15,896 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.81 vs. limit=6.0 +2024-09-17 01:19:19,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.40 vs. limit=22.5 +2024-09-17 01:19:32,392 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.17 vs. limit=15.0 +2024-09-17 01:19:38,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.81 vs. limit=15.0 +2024-09-17 01:19:45,194 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=22.5 +2024-09-17 01:19:45,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.71 vs. limit=15.0 +2024-09-17 01:20:19,562 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.574e+01 1.056e+02 1.108e+02 1.251e+02 3.146e+02, threshold=2.216e+02, percent-clipped=2.0 +2024-09-17 01:20:19,589 INFO [train.py:1198] (1/2) Epoch 7, batch 2250, loss[loss=0.2737, ctc_loss=0.1844, cr_loss=0.4167, attn_decoder_loss=0.2744, over 29716.00 frames. ], tot_loss[loss=0.2796, ctc_loss=0.1966, cr_loss=0.4228, attn_decoder_loss=0.2794, over 5811219.87 frames. ], batch size: 82, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:20:24,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=117600.0, ans=0.0 +2024-09-17 01:20:59,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.22 vs. limit=22.5 +2024-09-17 01:21:04,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=117720.0, ans=0.125 +2024-09-17 01:21:07,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=117720.0, ans=0.0 +2024-09-17 01:21:11,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=117720.0, ans=0.125 +2024-09-17 01:21:37,947 INFO [train.py:1198] (1/2) Epoch 7, batch 2300, loss[loss=0.2509, ctc_loss=0.1701, cr_loss=0.3744, attn_decoder_loss=0.2516, over 29325.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.1958, cr_loss=0.4212, attn_decoder_loss=0.2783, over 5799226.26 frames. ], batch size: 71, lr: 1.59e-02, grad_scale: 8.0 +2024-09-17 01:21:39,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.27 vs. limit=22.5 +2024-09-17 01:21:48,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.01 vs. limit=15.0 +2024-09-17 01:21:56,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=117840.0, ans=0.0 +2024-09-17 01:22:21,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=117880.0, ans=0.1 +2024-09-17 01:22:39,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=117960.0, ans=0.125 +2024-09-17 01:22:52,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=117960.0, ans=0.125 +2024-09-17 01:22:56,188 INFO [train.py:1198] (1/2) Epoch 7, batch 2350, loss[loss=0.288, ctc_loss=0.2023, cr_loss=0.4531, attn_decoder_loss=0.2874, over 29684.00 frames. ], tot_loss[loss=0.279, ctc_loss=0.1962, cr_loss=0.4229, attn_decoder_loss=0.2788, over 5804630.54 frames. ], batch size: 83, lr: 1.59e-02, grad_scale: 4.0 +2024-09-17 01:22:56,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=118000.0, ans=0.025 +2024-09-17 01:22:57,669 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.765e+01 1.037e+02 1.131e+02 1.224e+02 2.356e+02, threshold=2.262e+02, percent-clipped=1.0 +2024-09-17 01:22:59,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-17 01:23:03,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=118000.0, ans=0.025 +2024-09-17 01:23:13,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=118040.0, ans=0.125 +2024-09-17 01:23:17,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=118040.0, ans=0.125 +2024-09-17 01:23:58,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=118160.0, ans=0.0 +2024-09-17 01:24:01,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=118160.0, ans=0.07 +2024-09-17 01:24:04,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=118160.0, ans=0.125 +2024-09-17 01:24:09,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=118160.0, ans=0.025 +2024-09-17 01:24:12,090 INFO [train.py:1198] (1/2) Epoch 7, batch 2400, loss[loss=0.286, ctc_loss=0.2183, cr_loss=0.4411, attn_decoder_loss=0.2837, over 29541.00 frames. ], tot_loss[loss=0.2798, ctc_loss=0.1968, cr_loss=0.4241, attn_decoder_loss=0.2795, over 5807959.52 frames. ], batch size: 76, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:24:21,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=118200.0, ans=0.125 +2024-09-17 01:25:32,289 INFO [train.py:1198] (1/2) Epoch 7, batch 2450, loss[loss=0.2919, ctc_loss=0.2098, cr_loss=0.4507, attn_decoder_loss=0.291, over 29697.00 frames. ], tot_loss[loss=0.281, ctc_loss=0.198, cr_loss=0.4255, attn_decoder_loss=0.2808, over 5785995.13 frames. ], batch size: 82, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:25:35,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.912e+01 1.061e+02 1.128e+02 1.247e+02 1.833e+02, threshold=2.256e+02, percent-clipped=0.0 +2024-09-17 01:25:46,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.84 vs. limit=15.0 +2024-09-17 01:25:49,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.04 vs. limit=15.0 +2024-09-17 01:26:00,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=118440.0, ans=0.1 +2024-09-17 01:26:11,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=118480.0, ans=0.0 +2024-09-17 01:26:17,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=118480.0, ans=0.125 +2024-09-17 01:26:17,910 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.33 vs. limit=10.0 +2024-09-17 01:26:37,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=118560.0, ans=0.2 +2024-09-17 01:26:50,586 INFO [train.py:1198] (1/2) Epoch 7, batch 2500, loss[loss=0.287, ctc_loss=0.1965, cr_loss=0.4239, attn_decoder_loss=0.2876, over 29628.00 frames. ], tot_loss[loss=0.2808, ctc_loss=0.1976, cr_loss=0.4248, attn_decoder_loss=0.2806, over 5796312.07 frames. ], batch size: 86, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:27:23,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=118680.0, ans=0.125 +2024-09-17 01:27:23,772 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.93 vs. limit=15.0 +2024-09-17 01:27:25,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.04 vs. limit=15.0 +2024-09-17 01:28:07,523 INFO [train.py:1198] (1/2) Epoch 7, batch 2550, loss[loss=0.2615, ctc_loss=0.1859, cr_loss=0.4151, attn_decoder_loss=0.2607, over 29362.00 frames. ], tot_loss[loss=0.2807, ctc_loss=0.1975, cr_loss=0.4249, attn_decoder_loss=0.2805, over 5798979.43 frames. ], batch size: 67, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:28:11,986 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.793e+01 1.007e+02 1.102e+02 1.293e+02 3.039e+02, threshold=2.204e+02, percent-clipped=2.0 +2024-09-17 01:28:19,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=118800.0, ans=0.1 +2024-09-17 01:28:35,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=118840.0, ans=0.0 +2024-09-17 01:28:50,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=118880.0, ans=0.0 +2024-09-17 01:29:07,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=118920.0, ans=0.2 +2024-09-17 01:29:14,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=118960.0, ans=0.2 +2024-09-17 01:29:25,800 INFO [train.py:1198] (1/2) Epoch 7, batch 2600, loss[loss=0.2542, ctc_loss=0.1685, cr_loss=0.3748, attn_decoder_loss=0.2554, over 29462.00 frames. ], tot_loss[loss=0.2809, ctc_loss=0.1973, cr_loss=0.4243, attn_decoder_loss=0.2807, over 5794033.73 frames. ], batch size: 78, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:29:29,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=119000.0, ans=22.5 +2024-09-17 01:30:06,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=119080.0, ans=0.125 +2024-09-17 01:30:08,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=119080.0, ans=0.0 +2024-09-17 01:30:13,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=119120.0, ans=0.2 +2024-09-17 01:30:13,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.24 vs. limit=15.0 +2024-09-17 01:30:20,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119120.0, ans=0.1 +2024-09-17 01:30:25,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=119120.0, ans=0.0 +2024-09-17 01:30:32,716 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:30:43,406 INFO [train.py:1198] (1/2) Epoch 7, batch 2650, loss[loss=0.3077, ctc_loss=0.2214, cr_loss=0.4628, attn_decoder_loss=0.307, over 29266.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1979, cr_loss=0.4251, attn_decoder_loss=0.2814, over 5800069.77 frames. ], batch size: 100, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:30:43,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=119200.0, ans=0.015 +2024-09-17 01:30:49,514 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.862e+01 1.038e+02 1.128e+02 1.278e+02 2.890e+02, threshold=2.256e+02, percent-clipped=2.0 +2024-09-17 01:31:24,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=119280.0, ans=0.125 +2024-09-17 01:31:28,246 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.32 vs. limit=12.0 +2024-09-17 01:31:29,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119320.0, ans=0.1 +2024-09-17 01:31:30,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=119320.0, ans=0.125 +2024-09-17 01:31:32,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=119320.0, ans=0.125 +2024-09-17 01:31:41,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=119320.0, ans=0.125 +2024-09-17 01:31:52,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=119360.0, ans=0.025 +2024-09-17 01:31:59,334 INFO [train.py:1198] (1/2) Epoch 7, batch 2700, loss[loss=0.2899, ctc_loss=0.1933, cr_loss=0.4356, attn_decoder_loss=0.291, over 29529.00 frames. ], tot_loss[loss=0.2815, ctc_loss=0.1977, cr_loss=0.425, attn_decoder_loss=0.2813, over 5795770.09 frames. ], batch size: 87, lr: 1.58e-02, grad_scale: 8.0 +2024-09-17 01:32:19,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=119440.0, ans=0.1 +2024-09-17 01:32:43,921 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:32:56,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.14 vs. limit=12.0 +2024-09-17 01:33:03,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=119560.0, ans=0.125 +2024-09-17 01:33:05,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=13.83 vs. limit=15.0 +2024-09-17 01:33:18,443 INFO [train.py:1198] (1/2) Epoch 7, batch 2750, loss[loss=0.2549, ctc_loss=0.1689, cr_loss=0.395, attn_decoder_loss=0.2557, over 29527.00 frames. ], tot_loss[loss=0.2801, ctc_loss=0.1965, cr_loss=0.423, attn_decoder_loss=0.28, over 5794830.70 frames. ], batch size: 75, lr: 1.58e-02, grad_scale: 4.0 +2024-09-17 01:33:22,611 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.65 vs. limit=6.0 +2024-09-17 01:33:26,048 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.208e+01 9.926e+01 1.072e+02 1.182e+02 2.176e+02, threshold=2.145e+02, percent-clipped=0.0 +2024-09-17 01:33:34,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.66 vs. limit=15.0 +2024-09-17 01:33:58,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=119680.0, ans=0.0 +2024-09-17 01:34:17,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=119720.0, ans=0.1 +2024-09-17 01:34:37,085 INFO [train.py:1198] (1/2) Epoch 7, batch 2800, loss[loss=0.3237, ctc_loss=0.2752, cr_loss=0.4636, attn_decoder_loss=0.3188, over 20432.00 frames. ], tot_loss[loss=0.2802, ctc_loss=0.1965, cr_loss=0.4226, attn_decoder_loss=0.2801, over 5776448.88 frames. ], batch size: 210, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:34:40,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119800.0, ans=0.1 +2024-09-17 01:34:41,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=119800.0, ans=0.125 +2024-09-17 01:34:48,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=119800.0, ans=0.125 +2024-09-17 01:34:52,775 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:34:54,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=119840.0, ans=0.0 +2024-09-17 01:35:13,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=119880.0, ans=0.1 +2024-09-17 01:35:24,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=119920.0, ans=0.125 +2024-09-17 01:35:26,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=119920.0, ans=0.0 +2024-09-17 01:35:28,001 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:35:29,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=119920.0, ans=0.125 +2024-09-17 01:35:31,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=119920.0, ans=0.0 +2024-09-17 01:35:31,681 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.72 vs. limit=6.0 +2024-09-17 01:35:42,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.10 vs. limit=6.0 +2024-09-17 01:35:50,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=119960.0, ans=0.0 +2024-09-17 01:35:53,908 INFO [train.py:1198] (1/2) Epoch 7, batch 2850, loss[loss=0.2748, ctc_loss=0.1916, cr_loss=0.4347, attn_decoder_loss=0.2744, over 29517.00 frames. ], tot_loss[loss=0.281, ctc_loss=0.1975, cr_loss=0.4239, attn_decoder_loss=0.2808, over 5760844.14 frames. ], batch size: 77, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:36:03,115 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.909e+01 1.092e+02 1.177e+02 1.435e+02 2.490e+02, threshold=2.355e+02, percent-clipped=3.0 +2024-09-17 01:36:06,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=120000.0, ans=0.125 +2024-09-17 01:36:08,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=120040.0, ans=0.95 +2024-09-17 01:36:14,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=120040.0, ans=0.0 +2024-09-17 01:36:24,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=120080.0, ans=0.1 +2024-09-17 01:36:47,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=120120.0, ans=0.0 +2024-09-17 01:36:48,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=120120.0, ans=0.0 +2024-09-17 01:37:01,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=120160.0, ans=0.07 +2024-09-17 01:37:02,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=120160.0, ans=0.07 +2024-09-17 01:37:04,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=120160.0, ans=0.0 +2024-09-17 01:37:13,106 INFO [train.py:1198] (1/2) Epoch 7, batch 2900, loss[loss=0.2757, ctc_loss=0.1903, cr_loss=0.4219, attn_decoder_loss=0.2758, over 29797.00 frames. ], tot_loss[loss=0.2821, ctc_loss=0.1979, cr_loss=0.4256, attn_decoder_loss=0.282, over 5787056.21 frames. ], batch size: 80, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:37:32,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.19 vs. limit=15.0 +2024-09-17 01:37:43,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=10.66 vs. limit=12.0 +2024-09-17 01:38:07,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=120320.0, ans=0.0 +2024-09-17 01:38:09,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=120320.0, ans=0.1 +2024-09-17 01:38:15,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=120360.0, ans=0.2 +2024-09-17 01:38:31,299 INFO [train.py:1198] (1/2) Epoch 7, batch 2950, loss[loss=0.2756, ctc_loss=0.1963, cr_loss=0.4423, attn_decoder_loss=0.2745, over 29535.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1964, cr_loss=0.4237, attn_decoder_loss=0.2804, over 5782191.44 frames. ], batch size: 75, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:38:41,940 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.289e+01 1.032e+02 1.125e+02 1.263e+02 2.681e+02, threshold=2.250e+02, percent-clipped=2.0 +2024-09-17 01:38:55,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=120440.0, ans=0.125 +2024-09-17 01:39:36,073 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.58 vs. limit=12.0 +2024-09-17 01:39:46,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=120600.0, ans=0.09899494936611666 +2024-09-17 01:39:47,833 INFO [train.py:1198] (1/2) Epoch 7, batch 3000, loss[loss=0.2824, ctc_loss=0.1896, cr_loss=0.4285, attn_decoder_loss=0.2832, over 29765.00 frames. ], tot_loss[loss=0.2805, ctc_loss=0.1965, cr_loss=0.4236, attn_decoder_loss=0.2804, over 5782805.36 frames. ], batch size: 81, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:39:47,834 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 01:40:06,238 INFO [train.py:1230] (1/2) Epoch 7, validation: loss=0.2168, ctc_loss=0.05873, cr_loss=4.524e-15, attn_decoder_loss=0.2344, over 944034.00 frames. +2024-09-17 01:40:06,238 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 01:40:15,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.95 vs. limit=12.0 +2024-09-17 01:40:30,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=120640.0, ans=0.2 +2024-09-17 01:40:50,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=120680.0, ans=0.0 +2024-09-17 01:40:55,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=120720.0, ans=0.1 +2024-09-17 01:40:58,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=120720.0, ans=0.0 +2024-09-17 01:41:01,640 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.06 vs. limit=15.0 +2024-09-17 01:41:02,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=120720.0, ans=0.0 +2024-09-17 01:41:10,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=120760.0, ans=10.0 +2024-09-17 01:41:12,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=120760.0, ans=0.0 +2024-09-17 01:41:19,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=120760.0, ans=0.125 +2024-09-17 01:41:25,956 INFO [train.py:1198] (1/2) Epoch 7, batch 3050, loss[loss=0.259, ctc_loss=0.1713, cr_loss=0.4013, attn_decoder_loss=0.2599, over 29542.00 frames. ], tot_loss[loss=0.2814, ctc_loss=0.1973, cr_loss=0.4246, attn_decoder_loss=0.2813, over 5776612.60 frames. ], batch size: 76, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:41:37,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=120800.0, ans=0.0 +2024-09-17 01:41:40,279 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.612e+01 1.070e+02 1.194e+02 1.343e+02 6.918e+02, threshold=2.387e+02, percent-clipped=4.0 +2024-09-17 01:41:57,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=120880.0, ans=0.125 +2024-09-17 01:42:06,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=120880.0, ans=0.125 +2024-09-17 01:42:12,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=120920.0, ans=0.125 +2024-09-17 01:42:19,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.33 vs. limit=15.0 +2024-09-17 01:42:44,144 INFO [train.py:1198] (1/2) Epoch 7, batch 3100, loss[loss=0.2976, ctc_loss=0.2125, cr_loss=0.4645, attn_decoder_loss=0.2968, over 29285.00 frames. ], tot_loss[loss=0.2803, ctc_loss=0.1961, cr_loss=0.4231, attn_decoder_loss=0.2803, over 5776339.91 frames. ], batch size: 100, lr: 1.57e-02, grad_scale: 8.0 +2024-09-17 01:42:44,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=121000.0, ans=0.2 +2024-09-17 01:42:48,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=121000.0, ans=0.0 +2024-09-17 01:43:04,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=121040.0, ans=0.2 +2024-09-17 01:43:25,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=121080.0, ans=0.0 +2024-09-17 01:43:29,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=121120.0, ans=0.125 +2024-09-17 01:43:35,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.96 vs. limit=22.5 +2024-09-17 01:43:55,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=121160.0, ans=0.125 +2024-09-17 01:44:00,616 INFO [train.py:1198] (1/2) Epoch 7, batch 3150, loss[loss=0.298, ctc_loss=0.2133, cr_loss=0.4419, attn_decoder_loss=0.2976, over 28891.00 frames. ], tot_loss[loss=0.2801, ctc_loss=0.1955, cr_loss=0.4232, attn_decoder_loss=0.2801, over 5783172.68 frames. ], batch size: 104, lr: 1.57e-02, grad_scale: 4.0 +2024-09-17 01:44:07,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=10.36 vs. limit=15.0 +2024-09-17 01:44:08,527 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:44:14,322 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.541e+01 1.004e+02 1.097e+02 1.266e+02 2.300e+02, threshold=2.194e+02, percent-clipped=0.0 +2024-09-17 01:44:18,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=121240.0, ans=0.04949747468305833 +2024-09-17 01:44:46,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=121280.0, ans=0.0 +2024-09-17 01:44:59,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=121320.0, ans=0.0 +2024-09-17 01:45:10,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.35 vs. limit=22.5 +2024-09-17 01:45:16,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=121360.0, ans=0.0 +2024-09-17 01:45:19,455 INFO [train.py:1198] (1/2) Epoch 7, batch 3200, loss[loss=0.2713, ctc_loss=0.1843, cr_loss=0.4172, attn_decoder_loss=0.2716, over 29415.00 frames. ], tot_loss[loss=0.2789, ctc_loss=0.1942, cr_loss=0.4219, attn_decoder_loss=0.279, over 5794116.42 frames. ], batch size: 79, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:45:25,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=121400.0, ans=0.0 +2024-09-17 01:45:52,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=121480.0, ans=0.125 +2024-09-17 01:46:03,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=121480.0, ans=0.125 +2024-09-17 01:46:24,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=121560.0, ans=15.0 +2024-09-17 01:46:27,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=121560.0, ans=0.125 +2024-09-17 01:46:34,506 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.19 vs. limit=12.0 +2024-09-17 01:46:38,569 INFO [train.py:1198] (1/2) Epoch 7, batch 3250, loss[loss=0.2958, ctc_loss=0.2161, cr_loss=0.4341, attn_decoder_loss=0.295, over 29696.00 frames. ], tot_loss[loss=0.2798, ctc_loss=0.1949, cr_loss=0.423, attn_decoder_loss=0.2798, over 5801117.94 frames. ], batch size: 84, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:46:51,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=121600.0, ans=0.025 +2024-09-17 01:46:53,792 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.273e+01 1.020e+02 1.119e+02 1.210e+02 1.676e+02, threshold=2.238e+02, percent-clipped=0.0 +2024-09-17 01:46:57,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=121640.0, ans=0.125 +2024-09-17 01:47:12,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=121680.0, ans=0.0 +2024-09-17 01:47:29,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=121720.0, ans=0.125 +2024-09-17 01:47:35,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.09 vs. limit=15.0 +2024-09-17 01:47:45,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=121760.0, ans=0.125 +2024-09-17 01:47:54,988 INFO [train.py:1198] (1/2) Epoch 7, batch 3300, loss[loss=0.2806, ctc_loss=0.1849, cr_loss=0.3945, attn_decoder_loss=0.2825, over 28201.00 frames. ], tot_loss[loss=0.2783, ctc_loss=0.1939, cr_loss=0.4208, attn_decoder_loss=0.2783, over 5799271.27 frames. ], batch size: 111, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:48:28,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=121880.0, ans=0.0 +2024-09-17 01:48:30,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=121880.0, ans=0.125 +2024-09-17 01:49:03,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=121960.0, ans=0.2 +2024-09-17 01:49:05,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=121960.0, ans=0.125 +2024-09-17 01:49:09,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=121960.0, ans=0.125 +2024-09-17 01:49:13,993 INFO [train.py:1198] (1/2) Epoch 7, batch 3350, loss[loss=0.3008, ctc_loss=0.2145, cr_loss=0.4248, attn_decoder_loss=0.3009, over 28948.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1953, cr_loss=0.4217, attn_decoder_loss=0.2794, over 5775717.70 frames. ], batch size: 104, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:49:14,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=122000.0, ans=0.125 +2024-09-17 01:49:15,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=122000.0, ans=0.07 +2024-09-17 01:49:32,809 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.051e+01 1.075e+02 1.159e+02 1.381e+02 2.720e+02, threshold=2.319e+02, percent-clipped=3.0 +2024-09-17 01:49:49,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=122080.0, ans=0.125 +2024-09-17 01:50:32,397 INFO [train.py:1198] (1/2) Epoch 7, batch 3400, loss[loss=0.2401, ctc_loss=0.1627, cr_loss=0.3776, attn_decoder_loss=0.2404, over 29365.00 frames. ], tot_loss[loss=0.2791, ctc_loss=0.1953, cr_loss=0.4211, attn_decoder_loss=0.2791, over 5767258.68 frames. ], batch size: 67, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:50:32,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=122200.0, ans=0.125 +2024-09-17 01:50:41,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=122200.0, ans=0.125 +2024-09-17 01:50:58,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=122240.0, ans=0.2 +2024-09-17 01:51:15,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=122280.0, ans=0.125 +2024-09-17 01:51:26,727 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.65 vs. limit=15.0 +2024-09-17 01:51:28,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.08 vs. limit=15.0 +2024-09-17 01:51:32,008 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 01:51:40,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=122360.0, ans=0.0 +2024-09-17 01:51:44,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=122360.0, ans=0.0 +2024-09-17 01:51:48,684 INFO [train.py:1198] (1/2) Epoch 7, batch 3450, loss[loss=0.298, ctc_loss=0.2181, cr_loss=0.4223, attn_decoder_loss=0.2975, over 28068.00 frames. ], tot_loss[loss=0.2794, ctc_loss=0.1952, cr_loss=0.4212, attn_decoder_loss=0.2793, over 5776271.10 frames. ], batch size: 111, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:52:09,037 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.695e+01 1.040e+02 1.098e+02 1.235e+02 2.393e+02, threshold=2.195e+02, percent-clipped=1.0 +2024-09-17 01:52:12,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=122440.0, ans=0.125 +2024-09-17 01:52:23,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=122480.0, ans=0.1 +2024-09-17 01:52:40,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=122520.0, ans=0.125 +2024-09-17 01:52:46,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=122520.0, ans=0.125 +2024-09-17 01:52:52,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=122560.0, ans=0.2 +2024-09-17 01:52:53,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.93 vs. limit=15.0 +2024-09-17 01:52:59,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=122560.0, ans=0.1 +2024-09-17 01:53:07,009 INFO [train.py:1198] (1/2) Epoch 7, batch 3500, loss[loss=0.2537, ctc_loss=0.1796, cr_loss=0.396, attn_decoder_loss=0.2531, over 29342.00 frames. ], tot_loss[loss=0.2789, ctc_loss=0.195, cr_loss=0.4217, attn_decoder_loss=0.2788, over 5778119.18 frames. ], batch size: 71, lr: 1.56e-02, grad_scale: 8.0 +2024-09-17 01:53:22,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=122640.0, ans=0.125 +2024-09-17 01:53:29,302 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.08 vs. limit=15.0 +2024-09-17 01:53:42,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=122680.0, ans=0.125 +2024-09-17 01:53:47,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=122680.0, ans=0.125 +2024-09-17 01:54:03,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=122720.0, ans=0.125 +2024-09-17 01:54:24,529 INFO [train.py:1198] (1/2) Epoch 7, batch 3550, loss[loss=0.298, ctc_loss=0.205, cr_loss=0.4414, attn_decoder_loss=0.2986, over 29705.00 frames. ], tot_loss[loss=0.2787, ctc_loss=0.1947, cr_loss=0.4218, attn_decoder_loss=0.2787, over 5784488.50 frames. ], batch size: 89, lr: 1.56e-02, grad_scale: 4.0 +2024-09-17 01:54:41,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=122840.0, ans=0.0 +2024-09-17 01:54:43,848 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.552e+01 9.824e+01 1.101e+02 1.214e+02 1.774e+02, threshold=2.203e+02, percent-clipped=0.0 +2024-09-17 01:55:02,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=122880.0, ans=0.04949747468305833 +2024-09-17 01:55:17,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=122920.0, ans=0.09899494936611666 +2024-09-17 01:55:39,617 INFO [train.py:1198] (1/2) Epoch 7, batch 3600, loss[loss=0.2651, ctc_loss=0.1795, cr_loss=0.4098, attn_decoder_loss=0.2655, over 29495.00 frames. ], tot_loss[loss=0.2789, ctc_loss=0.1946, cr_loss=0.422, attn_decoder_loss=0.2789, over 5792699.34 frames. ], batch size: 77, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 01:55:41,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=123000.0, ans=0.0 +2024-09-17 01:55:43,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.57 vs. limit=15.0 +2024-09-17 01:56:00,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.95 vs. limit=15.0 +2024-09-17 01:56:08,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=123080.0, ans=0.2 +2024-09-17 01:56:23,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=123120.0, ans=0.125 +2024-09-17 01:56:32,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=123120.0, ans=0.1 +2024-09-17 01:56:34,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=123120.0, ans=0.025 +2024-09-17 01:56:37,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=123120.0, ans=0.05 +2024-09-17 01:56:37,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.49 vs. limit=15.0 +2024-09-17 01:56:55,560 INFO [train.py:1198] (1/2) Epoch 7, batch 3650, loss[loss=0.3025, ctc_loss=0.2099, cr_loss=0.4717, attn_decoder_loss=0.3023, over 29482.00 frames. ], tot_loss[loss=0.2782, ctc_loss=0.1937, cr_loss=0.4209, attn_decoder_loss=0.2782, over 5794196.46 frames. ], batch size: 90, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 01:57:05,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=123200.0, ans=0.125 +2024-09-17 01:57:08,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.95 vs. limit=22.5 +2024-09-17 01:57:12,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=123240.0, ans=0.1 +2024-09-17 01:57:16,631 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.280e+01 1.041e+02 1.137e+02 1.251e+02 2.329e+02, threshold=2.273e+02, percent-clipped=0.0 +2024-09-17 01:57:32,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=123280.0, ans=0.2 +2024-09-17 01:57:41,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=123320.0, ans=0.04949747468305833 +2024-09-17 01:57:55,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=123320.0, ans=0.125 +2024-09-17 01:58:13,150 INFO [train.py:1198] (1/2) Epoch 7, batch 3700, loss[loss=0.291, ctc_loss=0.2027, cr_loss=0.4275, attn_decoder_loss=0.2913, over 29715.00 frames. ], tot_loss[loss=0.2786, ctc_loss=0.1938, cr_loss=0.4214, attn_decoder_loss=0.2787, over 5803300.43 frames. ], batch size: 84, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 01:59:05,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=123520.0, ans=0.0 +2024-09-17 01:59:09,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.86 vs. limit=6.0 +2024-09-17 01:59:22,836 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.20 vs. limit=15.0 +2024-09-17 01:59:28,061 INFO [train.py:1198] (1/2) Epoch 7, batch 3750, loss[loss=0.2508, ctc_loss=0.1757, cr_loss=0.3727, attn_decoder_loss=0.2509, over 29361.00 frames. ], tot_loss[loss=0.2781, ctc_loss=0.1932, cr_loss=0.4201, attn_decoder_loss=0.2782, over 5806834.34 frames. ], batch size: 67, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 01:59:34,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.81 vs. limit=15.0 +2024-09-17 01:59:48,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=123640.0, ans=0.125 +2024-09-17 01:59:50,730 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.049e+02 1.152e+02 1.342e+02 3.942e+02, threshold=2.304e+02, percent-clipped=2.0 +2024-09-17 01:59:57,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=123680.0, ans=0.125 +2024-09-17 02:00:01,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.59 vs. limit=15.0 +2024-09-17 02:00:13,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=123720.0, ans=0.125 +2024-09-17 02:00:24,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=123720.0, ans=0.2 +2024-09-17 02:00:35,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=123760.0, ans=0.125 +2024-09-17 02:00:42,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=123760.0, ans=0.0 +2024-09-17 02:00:43,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=123800.0, ans=0.0 +2024-09-17 02:00:45,085 INFO [train.py:1198] (1/2) Epoch 7, batch 3800, loss[loss=0.2882, ctc_loss=0.195, cr_loss=0.4342, attn_decoder_loss=0.2889, over 29608.00 frames. ], tot_loss[loss=0.2779, ctc_loss=0.1932, cr_loss=0.4199, attn_decoder_loss=0.278, over 5797053.83 frames. ], batch size: 86, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:01:34,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.58 vs. limit=15.0 +2024-09-17 02:01:43,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=123960.0, ans=0.09899494936611666 +2024-09-17 02:01:51,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=123960.0, ans=0.1 +2024-09-17 02:02:00,447 INFO [train.py:1198] (1/2) Epoch 7, batch 3850, loss[loss=0.2891, ctc_loss=0.1991, cr_loss=0.4342, attn_decoder_loss=0.2895, over 29273.00 frames. ], tot_loss[loss=0.2775, ctc_loss=0.1928, cr_loss=0.4198, attn_decoder_loss=0.2776, over 5810983.84 frames. ], batch size: 100, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 02:02:20,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=124040.0, ans=0.0 +2024-09-17 02:02:24,328 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.352e+01 1.024e+02 1.121e+02 1.176e+02 2.647e+02, threshold=2.243e+02, percent-clipped=2.0 +2024-09-17 02:02:35,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=124080.0, ans=0.0 +2024-09-17 02:02:38,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=124080.0, ans=0.2 +2024-09-17 02:02:44,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=124120.0, ans=0.125 +2024-09-17 02:02:47,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=124120.0, ans=0.025 +2024-09-17 02:03:15,228 INFO [train.py:1198] (1/2) Epoch 7, batch 3900, loss[loss=0.2953, ctc_loss=0.2118, cr_loss=0.4647, attn_decoder_loss=0.2942, over 29639.00 frames. ], tot_loss[loss=0.278, ctc_loss=0.1932, cr_loss=0.4205, attn_decoder_loss=0.2781, over 5815347.47 frames. ], batch size: 86, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:03:21,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=124200.0, ans=0.125 +2024-09-17 02:03:36,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=124240.0, ans=0.0 +2024-09-17 02:03:38,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=124240.0, ans=0.125 +2024-09-17 02:03:55,468 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-17 02:04:03,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=124320.0, ans=0.015 +2024-09-17 02:04:04,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=124320.0, ans=0.2 +2024-09-17 02:04:26,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=124360.0, ans=0.125 +2024-09-17 02:04:31,859 INFO [train.py:1198] (1/2) Epoch 7, batch 3950, loss[loss=0.2895, ctc_loss=0.1999, cr_loss=0.4183, attn_decoder_loss=0.2902, over 29493.00 frames. ], tot_loss[loss=0.2779, ctc_loss=0.1927, cr_loss=0.4207, attn_decoder_loss=0.278, over 5835040.91 frames. ], batch size: 97, lr: 1.55e-02, grad_scale: 4.0 +2024-09-17 02:04:57,235 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.925e+01 1.017e+02 1.080e+02 1.236e+02 3.410e+02, threshold=2.160e+02, percent-clipped=1.0 +2024-09-17 02:05:12,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=124480.0, ans=0.0 +2024-09-17 02:05:24,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.16 vs. limit=10.0 +2024-09-17 02:05:47,552 INFO [train.py:1198] (1/2) Epoch 7, batch 4000, loss[loss=0.2526, ctc_loss=0.1678, cr_loss=0.3743, attn_decoder_loss=0.2537, over 29527.00 frames. ], tot_loss[loss=0.2784, ctc_loss=0.1934, cr_loss=0.4211, attn_decoder_loss=0.2784, over 5812745.59 frames. ], batch size: 74, lr: 1.55e-02, grad_scale: 8.0 +2024-09-17 02:05:59,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=124600.0, ans=0.0 +2024-09-17 02:06:11,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=124640.0, ans=0.0 +2024-09-17 02:06:22,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=124680.0, ans=0.025 +2024-09-17 02:06:48,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=124760.0, ans=0.0 +2024-09-17 02:06:50,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=124760.0, ans=0.1 +2024-09-17 02:06:52,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=124760.0, ans=0.025 +2024-09-17 02:07:02,873 INFO [train.py:1198] (1/2) Epoch 7, batch 4050, loss[loss=0.3212, ctc_loss=0.2601, cr_loss=0.4519, attn_decoder_loss=0.318, over 20625.00 frames. ], tot_loss[loss=0.2783, ctc_loss=0.1935, cr_loss=0.4211, attn_decoder_loss=0.2784, over 5796673.21 frames. ], batch size: 209, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:07:09,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=124800.0, ans=0.2 +2024-09-17 02:07:26,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=124840.0, ans=0.1 +2024-09-17 02:07:29,486 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.830e+01 1.037e+02 1.133e+02 1.279e+02 3.685e+02, threshold=2.266e+02, percent-clipped=2.0 +2024-09-17 02:08:01,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=24.10 vs. limit=15.0 +2024-09-17 02:08:06,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=124960.0, ans=0.05 +2024-09-17 02:08:12,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=124960.0, ans=0.125 +2024-09-17 02:08:14,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=124960.0, ans=0.1 +2024-09-17 02:08:18,290 INFO [train.py:1198] (1/2) Epoch 7, batch 4100, loss[loss=0.2956, ctc_loss=0.2058, cr_loss=0.4392, attn_decoder_loss=0.2958, over 29495.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.1937, cr_loss=0.4212, attn_decoder_loss=0.2786, over 5791813.32 frames. ], batch size: 90, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:08:24,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=125000.0, ans=0.1 +2024-09-17 02:08:58,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=125080.0, ans=0.0 +2024-09-17 02:09:01,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=125120.0, ans=0.125 +2024-09-17 02:09:04,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=125120.0, ans=0.0 +2024-09-17 02:09:13,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=125120.0, ans=0.1 +2024-09-17 02:09:17,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=125160.0, ans=0.0 +2024-09-17 02:09:32,371 INFO [train.py:1198] (1/2) Epoch 7, batch 4150, loss[loss=0.2718, ctc_loss=0.1871, cr_loss=0.4043, attn_decoder_loss=0.2722, over 29520.00 frames. ], tot_loss[loss=0.2783, ctc_loss=0.1937, cr_loss=0.4218, attn_decoder_loss=0.2783, over 5797646.99 frames. ], batch size: 77, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:10:01,822 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.306e+01 1.018e+02 1.090e+02 1.211e+02 2.746e+02, threshold=2.181e+02, percent-clipped=3.0 +2024-09-17 02:10:03,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=125280.0, ans=0.125 +2024-09-17 02:10:18,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=125320.0, ans=0.125 +2024-09-17 02:10:44,893 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:10:47,450 INFO [train.py:1198] (1/2) Epoch 7, batch 4200, loss[loss=0.2839, ctc_loss=0.1904, cr_loss=0.4338, attn_decoder_loss=0.2847, over 29506.00 frames. ], tot_loss[loss=0.2787, ctc_loss=0.194, cr_loss=0.4219, attn_decoder_loss=0.2787, over 5800242.21 frames. ], batch size: 90, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:10:56,052 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-17 02:10:59,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=125400.0, ans=0.125 +2024-09-17 02:11:24,922 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:12:02,718 INFO [train.py:1198] (1/2) Epoch 7, batch 4250, loss[loss=0.2639, ctc_loss=0.1808, cr_loss=0.4166, attn_decoder_loss=0.2639, over 29500.00 frames. ], tot_loss[loss=0.2788, ctc_loss=0.1937, cr_loss=0.4216, attn_decoder_loss=0.2788, over 5805624.18 frames. ], batch size: 74, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:12:02,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=125600.0, ans=0.125 +2024-09-17 02:12:05,160 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.91 vs. limit=15.0 +2024-09-17 02:12:05,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.13 vs. limit=22.5 +2024-09-17 02:12:06,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=125600.0, ans=0.125 +2024-09-17 02:12:24,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=125640.0, ans=0.125 +2024-09-17 02:12:31,954 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.603e+01 1.048e+02 1.150e+02 1.288e+02 2.522e+02, threshold=2.299e+02, percent-clipped=2.0 +2024-09-17 02:12:32,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=125680.0, ans=0.125 +2024-09-17 02:12:45,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=125720.0, ans=0.0 +2024-09-17 02:12:51,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=125720.0, ans=0.0 +2024-09-17 02:12:58,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.96 vs. limit=15.0 +2024-09-17 02:12:59,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=125720.0, ans=0.0 +2024-09-17 02:13:16,850 INFO [train.py:1198] (1/2) Epoch 7, batch 4300, loss[loss=0.2898, ctc_loss=0.1953, cr_loss=0.4354, attn_decoder_loss=0.2906, over 29528.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1947, cr_loss=0.4226, attn_decoder_loss=0.2796, over 5796600.83 frames. ], batch size: 87, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:13:29,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=125800.0, ans=0.0 +2024-09-17 02:13:58,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=125880.0, ans=0.05 +2024-09-17 02:14:05,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.39 vs. limit=12.0 +2024-09-17 02:14:32,529 INFO [train.py:1198] (1/2) Epoch 7, batch 4350, loss[loss=0.3048, ctc_loss=0.2215, cr_loss=0.4672, attn_decoder_loss=0.3037, over 29521.00 frames. ], tot_loss[loss=0.2832, ctc_loss=0.1982, cr_loss=0.4275, attn_decoder_loss=0.2832, over 5798208.23 frames. ], batch size: 97, lr: 1.54e-02, grad_scale: 4.0 +2024-09-17 02:14:40,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=126000.0, ans=0.0 +2024-09-17 02:14:50,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=126040.0, ans=0.1 +2024-09-17 02:15:04,446 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.068e+01 1.042e+02 1.125e+02 1.257e+02 6.277e+02, threshold=2.251e+02, percent-clipped=2.0 +2024-09-17 02:15:16,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=126120.0, ans=0.025 +2024-09-17 02:15:16,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=126120.0, ans=0.125 +2024-09-17 02:15:25,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=126120.0, ans=0.125 +2024-09-17 02:15:35,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=126160.0, ans=0.125 +2024-09-17 02:15:47,179 INFO [train.py:1198] (1/2) Epoch 7, batch 4400, loss[loss=0.303, ctc_loss=0.2237, cr_loss=0.4585, attn_decoder_loss=0.3016, over 27481.00 frames. ], tot_loss[loss=0.2862, ctc_loss=0.2011, cr_loss=0.4311, attn_decoder_loss=0.2861, over 5768137.65 frames. ], batch size: 124, lr: 1.54e-02, grad_scale: 8.0 +2024-09-17 02:15:48,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:15:48,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:15:51,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:15:53,972 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.50 vs. limit=6.0 +2024-09-17 02:15:56,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=126200.0, ans=0.125 +2024-09-17 02:16:00,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=126240.0, ans=0.1 +2024-09-17 02:16:02,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=126240.0, ans=0.025 +2024-09-17 02:16:07,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.87 vs. limit=15.0 +2024-09-17 02:16:32,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=126320.0, ans=0.0 +2024-09-17 02:16:32,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=126320.0, ans=0.025 +2024-09-17 02:16:43,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=126320.0, ans=0.1 +2024-09-17 02:17:03,040 INFO [train.py:1198] (1/2) Epoch 7, batch 4450, loss[loss=0.3133, ctc_loss=0.2594, cr_loss=0.4594, attn_decoder_loss=0.3091, over 20571.00 frames. ], tot_loss[loss=0.2897, ctc_loss=0.2071, cr_loss=0.4346, attn_decoder_loss=0.2892, over 5575737.76 frames. ], batch size: 209, lr: 1.53e-02, grad_scale: 4.0 +2024-09-17 02:17:05,409 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=16.81 vs. limit=15.0 +2024-09-17 02:17:11,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.81 vs. limit=15.0 +2024-09-17 02:17:36,227 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.504e+01 1.059e+02 1.182e+02 1.268e+02 2.368e+02, threshold=2.364e+02, percent-clipped=1.0 +2024-09-17 02:17:38,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=126480.0, ans=0.2 +2024-09-17 02:17:45,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=126480.0, ans=0.125 +2024-09-17 02:18:13,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=126560.0, ans=0.05 +2024-09-17 02:18:19,613 INFO [train.py:1198] (1/2) Epoch 7, batch 4500, loss[loss=0.3257, ctc_loss=0.2757, cr_loss=0.4568, attn_decoder_loss=0.3211, over 18836.00 frames. ], tot_loss[loss=0.2936, ctc_loss=0.2146, cr_loss=0.436, attn_decoder_loss=0.2927, over 5236827.78 frames. ], batch size: 210, lr: 1.53e-02, grad_scale: 8.0 +2024-09-17 02:18:36,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=126640.0, ans=0.125 +2024-09-17 02:18:36,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=126640.0, ans=0.0 +2024-09-17 02:18:44,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.65 vs. limit=15.0 +2024-09-17 02:18:54,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=126680.0, ans=0.125 +2024-09-17 02:19:46,525 INFO [train.py:1198] (1/2) Epoch 8, batch 0, loss[loss=0.2629, ctc_loss=0.1659, cr_loss=0.3745, attn_decoder_loss=0.2654, over 29586.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1659, cr_loss=0.3745, attn_decoder_loss=0.2654, over 29586.00 frames. ], batch size: 73, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:19:46,526 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 02:20:02,770 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.9871, 5.8845, 5.4621, 5.7261], device='cuda:1') +2024-09-17 02:20:03,679 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.7868, 4.2036, 4.6298, 4.6419], device='cuda:1') +2024-09-17 02:20:04,921 INFO [train.py:1230] (1/2) Epoch 8, validation: loss=0.2208, ctc_loss=0.05894, cr_loss=4.762e-15, attn_decoder_loss=0.2387, over 944034.00 frames. +2024-09-17 02:20:04,922 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 02:20:21,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=126740.0, ans=0.2 +2024-09-17 02:20:43,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=126780.0, ans=0.125 +2024-09-17 02:20:44,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=126780.0, ans=0.1 +2024-09-17 02:20:50,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=126820.0, ans=0.0 +2024-09-17 02:20:54,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.35 vs. limit=22.5 +2024-09-17 02:20:58,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=126820.0, ans=0.0 +2024-09-17 02:21:19,343 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.753e+01 1.155e+02 1.254e+02 1.387e+02 1.225e+03, threshold=2.508e+02, percent-clipped=2.0 +2024-09-17 02:21:20,940 INFO [train.py:1198] (1/2) Epoch 8, batch 50, loss[loss=0.2505, ctc_loss=0.1726, cr_loss=0.3855, attn_decoder_loss=0.2505, over 29449.00 frames. ], tot_loss[loss=0.281, ctc_loss=0.1986, cr_loss=0.4228, attn_decoder_loss=0.2808, over 1267887.86 frames. ], batch size: 70, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:21:28,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=126900.0, ans=0.125 +2024-09-17 02:21:50,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=126980.0, ans=0.2 +2024-09-17 02:22:15,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.63 vs. limit=6.0 +2024-09-17 02:22:41,946 INFO [train.py:1198] (1/2) Epoch 8, batch 100, loss[loss=0.2688, ctc_loss=0.1882, cr_loss=0.3716, attn_decoder_loss=0.2694, over 29534.00 frames. ], tot_loss[loss=0.2827, ctc_loss=0.199, cr_loss=0.4248, attn_decoder_loss=0.2826, over 2252297.10 frames. ], batch size: 76, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:22:57,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=127140.0, ans=0.0 +2024-09-17 02:22:57,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=127140.0, ans=0.1 +2024-09-17 02:22:57,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=127140.0, ans=0.125 +2024-09-17 02:23:14,202 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.64 vs. limit=12.0 +2024-09-17 02:23:18,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=127180.0, ans=0.2 +2024-09-17 02:23:23,680 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.56 vs. limit=15.0 +2024-09-17 02:23:39,489 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:23:48,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=127260.0, ans=0.09899494936611666 +2024-09-17 02:23:51,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=127260.0, ans=0.125 +2024-09-17 02:23:56,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.517e+01 1.081e+02 1.202e+02 1.454e+02 2.807e+02, threshold=2.403e+02, percent-clipped=1.0 +2024-09-17 02:23:56,953 INFO [train.py:1198] (1/2) Epoch 8, batch 150, loss[loss=0.2532, ctc_loss=0.1759, cr_loss=0.4023, attn_decoder_loss=0.2529, over 29439.00 frames. ], tot_loss[loss=0.2795, ctc_loss=0.1951, cr_loss=0.4216, attn_decoder_loss=0.2795, over 3047840.90 frames. ], batch size: 70, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:24:00,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=127300.0, ans=0.0 +2024-09-17 02:24:24,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=127340.0, ans=0.0 +2024-09-17 02:24:45,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.87 vs. limit=15.0 +2024-09-17 02:24:54,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=127420.0, ans=0.125 +2024-09-17 02:24:57,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=127460.0, ans=0.0 +2024-09-17 02:25:00,314 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.06 vs. limit=15.0 +2024-09-17 02:25:12,587 INFO [train.py:1198] (1/2) Epoch 8, batch 200, loss[loss=0.2913, ctc_loss=0.2081, cr_loss=0.4185, attn_decoder_loss=0.2912, over 27152.00 frames. ], tot_loss[loss=0.2783, ctc_loss=0.1936, cr_loss=0.4217, attn_decoder_loss=0.2784, over 3659944.99 frames. ], batch size: 124, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:25:13,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.23 vs. limit=12.0 +2024-09-17 02:25:26,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=127540.0, ans=0.125 +2024-09-17 02:25:27,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.08 vs. limit=15.0 +2024-09-17 02:25:38,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=127540.0, ans=0.0 +2024-09-17 02:25:50,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=127580.0, ans=0.0 +2024-09-17 02:25:54,491 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.24 vs. limit=15.0 +2024-09-17 02:26:17,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=127660.0, ans=0.125 +2024-09-17 02:26:33,475 INFO [train.py:1198] (1/2) Epoch 8, batch 250, loss[loss=0.295, ctc_loss=0.2048, cr_loss=0.4351, attn_decoder_loss=0.2953, over 29244.00 frames. ], tot_loss[loss=0.2777, ctc_loss=0.1923, cr_loss=0.421, attn_decoder_loss=0.2778, over 4140822.62 frames. ], batch size: 100, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:26:34,927 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.173e+01 9.771e+01 1.014e+02 1.103e+02 1.585e+02, threshold=2.028e+02, percent-clipped=0.0 +2024-09-17 02:26:47,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=127740.0, ans=0.1 +2024-09-17 02:27:00,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=127740.0, ans=0.125 +2024-09-17 02:27:03,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=127780.0, ans=0.125 +2024-09-17 02:27:17,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=127820.0, ans=0.0 +2024-09-17 02:27:19,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=127820.0, ans=0.125 +2024-09-17 02:27:25,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=127820.0, ans=0.125 +2024-09-17 02:27:48,967 INFO [train.py:1198] (1/2) Epoch 8, batch 300, loss[loss=0.2923, ctc_loss=0.1983, cr_loss=0.4432, attn_decoder_loss=0.2929, over 29526.00 frames. ], tot_loss[loss=0.2775, ctc_loss=0.192, cr_loss=0.421, attn_decoder_loss=0.2777, over 4509614.03 frames. ], batch size: 92, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:27:52,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=127900.0, ans=0.125 +2024-09-17 02:27:52,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=127900.0, ans=0.125 +2024-09-17 02:27:53,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=127900.0, ans=0.125 +2024-09-17 02:27:59,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=127900.0, ans=0.1 +2024-09-17 02:28:20,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=127980.0, ans=0.0 +2024-09-17 02:28:37,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=127980.0, ans=0.125 +2024-09-17 02:28:54,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=128020.0, ans=0.09899494936611666 +2024-09-17 02:29:12,056 INFO [train.py:1198] (1/2) Epoch 8, batch 350, loss[loss=0.253, ctc_loss=0.1738, cr_loss=0.3976, attn_decoder_loss=0.2529, over 29347.00 frames. ], tot_loss[loss=0.2777, ctc_loss=0.192, cr_loss=0.4212, attn_decoder_loss=0.2779, over 4795465.45 frames. ], batch size: 71, lr: 1.44e-02, grad_scale: 4.0 +2024-09-17 02:29:13,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=128100.0, ans=0.125 +2024-09-17 02:29:14,914 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.409e+01 1.011e+02 1.095e+02 1.201e+02 2.476e+02, threshold=2.189e+02, percent-clipped=3.0 +2024-09-17 02:29:19,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=128100.0, ans=0.0 +2024-09-17 02:29:59,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=128220.0, ans=0.0 +2024-09-17 02:30:29,956 INFO [train.py:1198] (1/2) Epoch 8, batch 400, loss[loss=0.2956, ctc_loss=0.2035, cr_loss=0.4478, attn_decoder_loss=0.2959, over 29709.00 frames. ], tot_loss[loss=0.2773, ctc_loss=0.1913, cr_loss=0.4209, attn_decoder_loss=0.2775, over 5024735.56 frames. ], batch size: 82, lr: 1.44e-02, grad_scale: 8.0 +2024-09-17 02:30:39,751 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.10 vs. limit=15.0 +2024-09-17 02:30:41,164 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.10 vs. limit=15.0 +2024-09-17 02:30:49,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=128340.0, ans=0.125 +2024-09-17 02:30:58,082 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.66 vs. limit=15.0 +2024-09-17 02:30:59,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.42 vs. limit=15.0 +2024-09-17 02:31:26,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=128420.0, ans=0.1 +2024-09-17 02:31:40,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=128460.0, ans=0.0 +2024-09-17 02:31:48,781 INFO [train.py:1198] (1/2) Epoch 8, batch 450, loss[loss=0.2833, ctc_loss=0.1901, cr_loss=0.3982, attn_decoder_loss=0.2848, over 29690.00 frames. ], tot_loss[loss=0.2769, ctc_loss=0.1907, cr_loss=0.4203, attn_decoder_loss=0.2771, over 5187306.64 frames. ], batch size: 83, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:31:49,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=128500.0, ans=0.125 +2024-09-17 02:31:53,265 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.548e+01 1.003e+02 1.077e+02 1.187e+02 3.906e+02, threshold=2.154e+02, percent-clipped=1.0 +2024-09-17 02:32:06,579 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.74 vs. limit=10.0 +2024-09-17 02:32:13,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=128540.0, ans=0.0 +2024-09-17 02:32:16,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=128540.0, ans=0.0 +2024-09-17 02:32:42,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=128620.0, ans=0.2 +2024-09-17 02:32:44,491 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-17 02:33:03,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=128700.0, ans=0.1 +2024-09-17 02:33:05,019 INFO [train.py:1198] (1/2) Epoch 8, batch 500, loss[loss=0.2957, ctc_loss=0.2056, cr_loss=0.4559, attn_decoder_loss=0.2956, over 29406.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1899, cr_loss=0.4193, attn_decoder_loss=0.2764, over 5329590.49 frames. ], batch size: 94, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:33:17,894 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:34:06,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=128860.0, ans=0.2 +2024-09-17 02:34:09,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=128860.0, ans=0.1 +2024-09-17 02:34:14,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=128860.0, ans=0.125 +2024-09-17 02:34:17,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=128860.0, ans=0.125 +2024-09-17 02:34:19,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=128860.0, ans=0.0 +2024-09-17 02:34:19,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.69 vs. limit=15.0 +2024-09-17 02:34:23,769 INFO [train.py:1198] (1/2) Epoch 8, batch 550, loss[loss=0.2857, ctc_loss=0.2023, cr_loss=0.4257, attn_decoder_loss=0.2855, over 28821.00 frames. ], tot_loss[loss=0.2763, ctc_loss=0.1903, cr_loss=0.4196, attn_decoder_loss=0.2766, over 5423202.59 frames. ], batch size: 104, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:34:31,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=128900.0, ans=0.025 +2024-09-17 02:34:32,981 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.612e+01 1.023e+02 1.117e+02 1.226e+02 1.997e+02, threshold=2.234e+02, percent-clipped=0.0 +2024-09-17 02:34:50,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=128940.0, ans=0.125 +2024-09-17 02:34:52,216 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.58 vs. limit=12.0 +2024-09-17 02:35:13,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=129020.0, ans=0.95 +2024-09-17 02:35:14,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=129020.0, ans=0.0 +2024-09-17 02:35:19,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=129020.0, ans=0.0 +2024-09-17 02:35:43,603 INFO [train.py:1198] (1/2) Epoch 8, batch 600, loss[loss=0.2936, ctc_loss=0.2042, cr_loss=0.4512, attn_decoder_loss=0.2935, over 29236.00 frames. ], tot_loss[loss=0.2766, ctc_loss=0.1903, cr_loss=0.4199, attn_decoder_loss=0.2769, over 5509411.54 frames. ], batch size: 100, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:35:47,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=129100.0, ans=0.125 +2024-09-17 02:35:59,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=129140.0, ans=0.125 +2024-09-17 02:36:02,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=129140.0, ans=0.125 +2024-09-17 02:36:06,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=129140.0, ans=0.1 +2024-09-17 02:36:13,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.48 vs. limit=15.0 +2024-09-17 02:36:38,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=129220.0, ans=0.035 +2024-09-17 02:36:55,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=129260.0, ans=0.2 +2024-09-17 02:36:55,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=129260.0, ans=0.2 +2024-09-17 02:36:59,411 INFO [train.py:1198] (1/2) Epoch 8, batch 650, loss[loss=0.2604, ctc_loss=0.1708, cr_loss=0.4052, attn_decoder_loss=0.2614, over 29770.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1887, cr_loss=0.418, attn_decoder_loss=0.2756, over 5586276.17 frames. ], batch size: 81, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:37:01,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.47 vs. limit=15.0 +2024-09-17 02:37:04,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=129300.0, ans=0.125 +2024-09-17 02:37:05,489 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.505e+01 9.950e+01 1.082e+02 1.181e+02 2.497e+02, threshold=2.164e+02, percent-clipped=2.0 +2024-09-17 02:37:27,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=129340.0, ans=0.025 +2024-09-17 02:37:54,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-17 02:38:15,953 INFO [train.py:1198] (1/2) Epoch 8, batch 700, loss[loss=0.2661, ctc_loss=0.1766, cr_loss=0.3781, attn_decoder_loss=0.2677, over 29541.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1891, cr_loss=0.4189, attn_decoder_loss=0.2763, over 5637833.77 frames. ], batch size: 76, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:38:20,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=129500.0, ans=0.1 +2024-09-17 02:38:46,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=129540.0, ans=0.0 +2024-09-17 02:38:59,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=129580.0, ans=0.125 +2024-09-17 02:39:18,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=129620.0, ans=0.0 +2024-09-17 02:39:32,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.08 vs. limit=22.5 +2024-09-17 02:39:37,334 INFO [train.py:1198] (1/2) Epoch 8, batch 750, loss[loss=0.286, ctc_loss=0.1941, cr_loss=0.4322, attn_decoder_loss=0.2866, over 29687.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1887, cr_loss=0.4172, attn_decoder_loss=0.2758, over 5676344.66 frames. ], batch size: 82, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:39:37,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=129700.0, ans=0.125 +2024-09-17 02:39:46,301 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 1.021e+02 1.093e+02 1.208e+02 3.929e+02, threshold=2.185e+02, percent-clipped=1.0 +2024-09-17 02:39:47,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.34 vs. limit=15.0 +2024-09-17 02:40:47,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=129860.0, ans=0.0 +2024-09-17 02:40:53,437 INFO [train.py:1198] (1/2) Epoch 8, batch 800, loss[loss=0.2472, ctc_loss=0.1647, cr_loss=0.3948, attn_decoder_loss=0.2476, over 29608.00 frames. ], tot_loss[loss=0.2751, ctc_loss=0.1885, cr_loss=0.4169, attn_decoder_loss=0.2755, over 5706845.59 frames. ], batch size: 73, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:41:57,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.65 vs. limit=15.0 +2024-09-17 02:42:00,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=130060.0, ans=0.0 +2024-09-17 02:42:08,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=130100.0, ans=0.125 +2024-09-17 02:42:09,583 INFO [train.py:1198] (1/2) Epoch 8, batch 850, loss[loss=0.2808, ctc_loss=0.1845, cr_loss=0.4274, attn_decoder_loss=0.282, over 29711.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1879, cr_loss=0.4166, attn_decoder_loss=0.2751, over 5735114.88 frames. ], batch size: 89, lr: 1.43e-02, grad_scale: 4.0 +2024-09-17 02:42:20,111 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.352e+01 1.021e+02 1.113e+02 1.293e+02 2.449e+02, threshold=2.226e+02, percent-clipped=1.0 +2024-09-17 02:42:20,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=130100.0, ans=0.1 +2024-09-17 02:42:32,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.24 vs. limit=22.5 +2024-09-17 02:42:39,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=130140.0, ans=0.0 +2024-09-17 02:42:41,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.69 vs. limit=22.5 +2024-09-17 02:43:01,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=130220.0, ans=0.04949747468305833 +2024-09-17 02:43:20,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=130260.0, ans=0.025 +2024-09-17 02:43:24,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=130260.0, ans=0.125 +2024-09-17 02:43:27,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=130260.0, ans=0.125 +2024-09-17 02:43:31,943 INFO [train.py:1198] (1/2) Epoch 8, batch 900, loss[loss=0.2455, ctc_loss=0.1558, cr_loss=0.3481, attn_decoder_loss=0.2478, over 29568.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1882, cr_loss=0.4171, attn_decoder_loss=0.2754, over 5740257.07 frames. ], batch size: 73, lr: 1.43e-02, grad_scale: 8.0 +2024-09-17 02:43:41,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=130300.0, ans=0.0 +2024-09-17 02:43:55,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=130340.0, ans=0.125 +2024-09-17 02:44:04,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=130380.0, ans=0.0 +2024-09-17 02:44:07,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=130380.0, ans=0.0 +2024-09-17 02:44:29,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.89 vs. limit=22.5 +2024-09-17 02:44:42,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=130460.0, ans=0.125 +2024-09-17 02:44:48,522 INFO [train.py:1198] (1/2) Epoch 8, batch 950, loss[loss=0.2588, ctc_loss=0.1722, cr_loss=0.3939, attn_decoder_loss=0.2597, over 29520.00 frames. ], tot_loss[loss=0.2753, ctc_loss=0.1886, cr_loss=0.4171, attn_decoder_loss=0.2756, over 5743017.61 frames. ], batch size: 74, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:44:51,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=130500.0, ans=0.2 +2024-09-17 02:44:59,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=130500.0, ans=0.125 +2024-09-17 02:45:00,450 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.707e+01 1.021e+02 1.105e+02 1.238e+02 2.320e+02, threshold=2.209e+02, percent-clipped=1.0 +2024-09-17 02:45:00,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=130500.0, ans=0.05 +2024-09-17 02:45:13,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=130540.0, ans=0.1 +2024-09-17 02:45:34,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=12.0 +2024-09-17 02:45:44,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=130620.0, ans=0.0 +2024-09-17 02:45:53,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=130660.0, ans=0.0 +2024-09-17 02:46:04,908 INFO [train.py:1198] (1/2) Epoch 8, batch 1000, loss[loss=0.2627, ctc_loss=0.1703, cr_loss=0.3937, attn_decoder_loss=0.2642, over 29495.00 frames. ], tot_loss[loss=0.2763, ctc_loss=0.1896, cr_loss=0.4192, attn_decoder_loss=0.2766, over 5737309.70 frames. ], batch size: 77, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:46:23,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=130740.0, ans=0.125 +2024-09-17 02:46:41,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.09 vs. limit=15.0 +2024-09-17 02:46:42,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=130780.0, ans=0.125 +2024-09-17 02:46:44,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.69 vs. limit=15.0 +2024-09-17 02:46:54,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=130820.0, ans=0.0 +2024-09-17 02:47:00,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=130820.0, ans=10.0 +2024-09-17 02:47:02,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.89 vs. limit=22.5 +2024-09-17 02:47:04,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.36 vs. limit=15.0 +2024-09-17 02:47:14,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=15.16 vs. limit=15.0 +2024-09-17 02:47:19,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.92 vs. limit=15.0 +2024-09-17 02:47:26,002 INFO [train.py:1198] (1/2) Epoch 8, batch 1050, loss[loss=0.2763, ctc_loss=0.181, cr_loss=0.4284, attn_decoder_loss=0.2773, over 29698.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1886, cr_loss=0.4178, attn_decoder_loss=0.2757, over 5745150.14 frames. ], batch size: 85, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:47:26,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=130900.0, ans=0.125 +2024-09-17 02:47:32,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.56 vs. limit=6.0 +2024-09-17 02:47:34,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.20 vs. limit=15.0 +2024-09-17 02:47:39,741 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.558e+01 1.020e+02 1.112e+02 1.252e+02 2.111e+02, threshold=2.224e+02, percent-clipped=0.0 +2024-09-17 02:48:01,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=130980.0, ans=0.0 +2024-09-17 02:48:08,681 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.13 vs. limit=22.5 +2024-09-17 02:48:18,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=131020.0, ans=0.125 +2024-09-17 02:48:38,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=131060.0, ans=0.0 +2024-09-17 02:48:42,653 INFO [train.py:1198] (1/2) Epoch 8, batch 1100, loss[loss=0.2767, ctc_loss=0.1937, cr_loss=0.4294, attn_decoder_loss=0.2764, over 29459.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1885, cr_loss=0.4178, attn_decoder_loss=0.2756, over 5757376.92 frames. ], batch size: 78, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:48:51,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.84 vs. limit=15.0 +2024-09-17 02:49:00,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=8.02 vs. limit=10.0 +2024-09-17 02:49:01,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=131140.0, ans=0.0 +2024-09-17 02:49:18,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=131180.0, ans=0.0 +2024-09-17 02:49:18,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=131180.0, ans=0.0 +2024-09-17 02:49:27,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=131220.0, ans=0.025 +2024-09-17 02:49:55,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.27 vs. limit=15.0 +2024-09-17 02:49:59,485 INFO [train.py:1198] (1/2) Epoch 8, batch 1150, loss[loss=0.2731, ctc_loss=0.1866, cr_loss=0.4253, attn_decoder_loss=0.2733, over 29456.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1889, cr_loss=0.4184, attn_decoder_loss=0.276, over 5755616.40 frames. ], batch size: 78, lr: 1.42e-02, grad_scale: 4.0 +2024-09-17 02:50:16,922 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.392e+01 9.941e+01 1.085e+02 1.238e+02 2.659e+02, threshold=2.171e+02, percent-clipped=2.0 +2024-09-17 02:50:26,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=131340.0, ans=0.0 +2024-09-17 02:50:44,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=131380.0, ans=0.125 +2024-09-17 02:50:48,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=131420.0, ans=0.0 +2024-09-17 02:50:54,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=131420.0, ans=0.125 +2024-09-17 02:51:17,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=131460.0, ans=0.95 +2024-09-17 02:51:19,981 INFO [train.py:1198] (1/2) Epoch 8, batch 1200, loss[loss=0.2838, ctc_loss=0.1908, cr_loss=0.4328, attn_decoder_loss=0.2845, over 29668.00 frames. ], tot_loss[loss=0.2768, ctc_loss=0.1903, cr_loss=0.4204, attn_decoder_loss=0.277, over 5748099.61 frames. ], batch size: 85, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:51:35,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=131540.0, ans=0.125 +2024-09-17 02:51:43,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=131540.0, ans=0.0 +2024-09-17 02:51:48,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=20.31 vs. limit=22.5 +2024-09-17 02:51:57,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=131580.0, ans=0.0 +2024-09-17 02:52:18,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.30 vs. limit=12.0 +2024-09-17 02:52:36,141 INFO [train.py:1198] (1/2) Epoch 8, batch 1250, loss[loss=0.2898, ctc_loss=0.1998, cr_loss=0.4265, attn_decoder_loss=0.2903, over 29517.00 frames. ], tot_loss[loss=0.2772, ctc_loss=0.1902, cr_loss=0.4205, attn_decoder_loss=0.2775, over 5774459.38 frames. ], batch size: 92, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:52:50,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=131740.0, ans=10.0 +2024-09-17 02:52:52,513 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.50 vs. limit=22.5 +2024-09-17 02:52:52,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.535e+01 1.024e+02 1.090e+02 1.251e+02 7.392e+02, threshold=2.180e+02, percent-clipped=1.0 +2024-09-17 02:53:09,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=131780.0, ans=0.1 +2024-09-17 02:53:20,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=131820.0, ans=0.125 +2024-09-17 02:53:23,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=131820.0, ans=0.0 +2024-09-17 02:53:52,340 INFO [train.py:1198] (1/2) Epoch 8, batch 1300, loss[loss=0.288, ctc_loss=0.2048, cr_loss=0.4016, attn_decoder_loss=0.2883, over 28510.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1891, cr_loss=0.4189, attn_decoder_loss=0.2764, over 5780427.21 frames. ], batch size: 112, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:54:03,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.67 vs. limit=22.5 +2024-09-17 02:54:13,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=131940.0, ans=0.125 +2024-09-17 02:54:29,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=131980.0, ans=0.125 +2024-09-17 02:54:43,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=132020.0, ans=0.0 +2024-09-17 02:55:12,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=132100.0, ans=0.125 +2024-09-17 02:55:13,328 INFO [train.py:1198] (1/2) Epoch 8, batch 1350, loss[loss=0.2752, ctc_loss=0.1805, cr_loss=0.422, attn_decoder_loss=0.2763, over 29762.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1884, cr_loss=0.4182, attn_decoder_loss=0.2762, over 5796615.90 frames. ], batch size: 81, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:55:15,108 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 02:55:16,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=132100.0, ans=0.125 +2024-09-17 02:55:29,762 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.512e+01 9.976e+01 1.075e+02 1.151e+02 1.437e+02, threshold=2.149e+02, percent-clipped=0.0 +2024-09-17 02:55:33,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.00 vs. limit=22.5 +2024-09-17 02:55:53,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=132180.0, ans=0.125 +2024-09-17 02:56:00,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=132220.0, ans=0.0 +2024-09-17 02:56:05,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=17.54 vs. limit=15.0 +2024-09-17 02:56:07,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=132220.0, ans=0.1 +2024-09-17 02:56:28,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.86 vs. limit=15.0 +2024-09-17 02:56:28,853 INFO [train.py:1198] (1/2) Epoch 8, batch 1400, loss[loss=0.238, ctc_loss=0.1558, cr_loss=0.3571, attn_decoder_loss=0.2392, over 29576.00 frames. ], tot_loss[loss=0.2755, ctc_loss=0.1882, cr_loss=0.4187, attn_decoder_loss=0.2759, over 5807244.20 frames. ], batch size: 69, lr: 1.42e-02, grad_scale: 8.0 +2024-09-17 02:56:30,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=132300.0, ans=0.125 +2024-09-17 02:56:33,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=132300.0, ans=0.125 +2024-09-17 02:56:36,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=132300.0, ans=0.125 +2024-09-17 02:57:00,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=132380.0, ans=0.125 +2024-09-17 02:57:02,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=132380.0, ans=0.2 +2024-09-17 02:57:03,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=132380.0, ans=0.125 +2024-09-17 02:57:28,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.13 vs. limit=15.0 +2024-09-17 02:57:35,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=132460.0, ans=0.125 +2024-09-17 02:57:44,592 INFO [train.py:1198] (1/2) Epoch 8, batch 1450, loss[loss=0.3018, ctc_loss=0.2121, cr_loss=0.4678, attn_decoder_loss=0.3014, over 29454.00 frames. ], tot_loss[loss=0.2759, ctc_loss=0.1887, cr_loss=0.4188, attn_decoder_loss=0.2763, over 5804220.59 frames. ], batch size: 94, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 02:58:03,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.05 vs. limit=15.0 +2024-09-17 02:58:06,572 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.324e+01 1.032e+02 1.089e+02 1.206e+02 2.427e+02, threshold=2.178e+02, percent-clipped=3.0 +2024-09-17 02:58:26,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=132580.0, ans=0.09899494936611666 +2024-09-17 02:58:49,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=132660.0, ans=0.07 +2024-09-17 02:58:50,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=132660.0, ans=0.0 +2024-09-17 02:59:04,178 INFO [train.py:1198] (1/2) Epoch 8, batch 1500, loss[loss=0.2922, ctc_loss=0.2126, cr_loss=0.4688, attn_decoder_loss=0.2906, over 29627.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1889, cr_loss=0.4193, attn_decoder_loss=0.2765, over 5804452.59 frames. ], batch size: 86, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 02:59:06,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=132700.0, ans=0.1 +2024-09-17 02:59:37,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=132780.0, ans=0.2 +2024-09-17 02:59:49,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.87 vs. limit=6.0 +2024-09-17 03:00:20,624 INFO [train.py:1198] (1/2) Epoch 8, batch 1550, loss[loss=0.2964, ctc_loss=0.2095, cr_loss=0.4612, attn_decoder_loss=0.2958, over 29517.00 frames. ], tot_loss[loss=0.2759, ctc_loss=0.189, cr_loss=0.419, attn_decoder_loss=0.2763, over 5777918.50 frames. ], batch size: 90, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:00:34,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=132940.0, ans=0.0 +2024-09-17 03:00:41,775 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.026e+01 9.829e+01 1.097e+02 1.218e+02 3.935e+02, threshold=2.194e+02, percent-clipped=3.0 +2024-09-17 03:00:51,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=132980.0, ans=0.125 +2024-09-17 03:00:52,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=132980.0, ans=0.125 +2024-09-17 03:00:57,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.09 vs. limit=12.0 +2024-09-17 03:01:08,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.49 vs. limit=15.0 +2024-09-17 03:01:35,977 INFO [train.py:1198] (1/2) Epoch 8, batch 1600, loss[loss=0.2886, ctc_loss=0.1956, cr_loss=0.4454, attn_decoder_loss=0.289, over 29672.00 frames. ], tot_loss[loss=0.2755, ctc_loss=0.1887, cr_loss=0.4186, attn_decoder_loss=0.2758, over 5761269.46 frames. ], batch size: 85, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:01:44,791 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.65 vs. limit=15.0 +2024-09-17 03:01:52,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=133140.0, ans=0.125 +2024-09-17 03:01:53,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=133140.0, ans=0.025 +2024-09-17 03:01:54,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.51 vs. limit=22.5 +2024-09-17 03:02:26,981 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-17 03:02:50,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.25 vs. limit=15.0 +2024-09-17 03:02:55,930 INFO [train.py:1198] (1/2) Epoch 8, batch 1650, loss[loss=0.2833, ctc_loss=0.1902, cr_loss=0.4277, attn_decoder_loss=0.2841, over 29720.00 frames. ], tot_loss[loss=0.2754, ctc_loss=0.1888, cr_loss=0.4183, attn_decoder_loss=0.2758, over 5757548.66 frames. ], batch size: 89, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:02:58,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.40 vs. limit=22.5 +2024-09-17 03:03:06,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=133300.0, ans=0.0 +2024-09-17 03:03:14,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=133340.0, ans=0.05 +2024-09-17 03:03:18,406 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.840e+01 1.022e+02 1.128e+02 1.304e+02 4.033e+02, threshold=2.256e+02, percent-clipped=2.0 +2024-09-17 03:03:20,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=133340.0, ans=0.125 +2024-09-17 03:03:41,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=133420.0, ans=0.0 +2024-09-17 03:03:56,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=133460.0, ans=0.2 +2024-09-17 03:04:11,204 INFO [train.py:1198] (1/2) Epoch 8, batch 1700, loss[loss=0.243, ctc_loss=0.1641, cr_loss=0.3752, attn_decoder_loss=0.2435, over 29588.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1881, cr_loss=0.4182, attn_decoder_loss=0.2754, over 5779349.94 frames. ], batch size: 69, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:04:32,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=133540.0, ans=0.125 +2024-09-17 03:04:43,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=133580.0, ans=0.2 +2024-09-17 03:04:52,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=133580.0, ans=0.0 +2024-09-17 03:04:54,694 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.33 vs. limit=15.0 +2024-09-17 03:05:26,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=133700.0, ans=0.2 +2024-09-17 03:05:27,782 INFO [train.py:1198] (1/2) Epoch 8, batch 1750, loss[loss=0.2556, ctc_loss=0.181, cr_loss=0.4072, attn_decoder_loss=0.2549, over 29281.00 frames. ], tot_loss[loss=0.2745, ctc_loss=0.1875, cr_loss=0.4171, attn_decoder_loss=0.2749, over 5789298.83 frames. ], batch size: 67, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:05:38,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=133700.0, ans=0.07 +2024-09-17 03:05:42,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=133700.0, ans=0.125 +2024-09-17 03:05:48,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.67 vs. limit=15.0 +2024-09-17 03:05:55,426 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.904e+01 9.818e+01 1.049e+02 1.183e+02 2.492e+02, threshold=2.098e+02, percent-clipped=1.0 +2024-09-17 03:06:13,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=133780.0, ans=0.1 +2024-09-17 03:06:17,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=133820.0, ans=0.125 +2024-09-17 03:06:48,891 INFO [train.py:1198] (1/2) Epoch 8, batch 1800, loss[loss=0.3003, ctc_loss=0.2112, cr_loss=0.4579, attn_decoder_loss=0.3001, over 29708.00 frames. ], tot_loss[loss=0.2748, ctc_loss=0.1879, cr_loss=0.4171, attn_decoder_loss=0.2752, over 5792337.51 frames. ], batch size: 83, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:06:49,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=133900.0, ans=0.125 +2024-09-17 03:06:49,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=133900.0, ans=0.125 +2024-09-17 03:07:04,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.26 vs. limit=15.0 +2024-09-17 03:07:25,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=133980.0, ans=0.025 +2024-09-17 03:07:31,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=133980.0, ans=0.0 +2024-09-17 03:07:37,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=134020.0, ans=0.125 +2024-09-17 03:07:45,716 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:07:54,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=134060.0, ans=0.2 +2024-09-17 03:08:05,024 INFO [train.py:1198] (1/2) Epoch 8, batch 1850, loss[loss=0.2713, ctc_loss=0.1843, cr_loss=0.4256, attn_decoder_loss=0.2715, over 29634.00 frames. ], tot_loss[loss=0.2741, ctc_loss=0.1869, cr_loss=0.4166, attn_decoder_loss=0.2745, over 5797800.12 frames. ], batch size: 86, lr: 1.41e-02, grad_scale: 4.0 +2024-09-17 03:08:17,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=134100.0, ans=0.2 +2024-09-17 03:08:17,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=134100.0, ans=0.2 +2024-09-17 03:08:30,751 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.292e+01 1.011e+02 1.086e+02 1.212e+02 2.686e+02, threshold=2.172e+02, percent-clipped=1.0 +2024-09-17 03:08:52,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=134220.0, ans=0.125 +2024-09-17 03:08:54,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=134220.0, ans=0.125 +2024-09-17 03:09:00,060 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:09:05,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=134260.0, ans=0.125 +2024-09-17 03:09:15,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=134260.0, ans=0.0 +2024-09-17 03:09:18,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=134260.0, ans=0.2 +2024-09-17 03:09:20,857 INFO [train.py:1198] (1/2) Epoch 8, batch 1900, loss[loss=0.289, ctc_loss=0.1982, cr_loss=0.4472, attn_decoder_loss=0.2892, over 29703.00 frames. ], tot_loss[loss=0.2751, ctc_loss=0.1877, cr_loss=0.4177, attn_decoder_loss=0.2755, over 5804775.35 frames. ], batch size: 89, lr: 1.41e-02, grad_scale: 8.0 +2024-09-17 03:09:21,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=134300.0, ans=0.2 +2024-09-17 03:09:37,199 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:09:47,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=134340.0, ans=15.0 +2024-09-17 03:09:49,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=134340.0, ans=0.125 +2024-09-17 03:09:49,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=134340.0, ans=0.125 +2024-09-17 03:10:02,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=134380.0, ans=0.125 +2024-09-17 03:10:15,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.54 vs. limit=6.0 +2024-09-17 03:10:19,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=134420.0, ans=0.2 +2024-09-17 03:10:22,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=134420.0, ans=0.125 +2024-09-17 03:10:28,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=134460.0, ans=0.0 +2024-09-17 03:10:41,891 INFO [train.py:1198] (1/2) Epoch 8, batch 1950, loss[loss=0.2788, ctc_loss=0.1934, cr_loss=0.4331, attn_decoder_loss=0.2786, over 29464.00 frames. ], tot_loss[loss=0.2765, ctc_loss=0.1886, cr_loss=0.4196, attn_decoder_loss=0.2769, over 5819136.30 frames. ], batch size: 78, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:10:56,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=134540.0, ans=0.125 +2024-09-17 03:11:09,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.585e+01 1.007e+02 1.092e+02 1.214e+02 3.508e+02, threshold=2.184e+02, percent-clipped=3.0 +2024-09-17 03:11:13,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.30 vs. limit=15.0 +2024-09-17 03:11:19,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=134580.0, ans=0.125 +2024-09-17 03:11:28,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=134620.0, ans=0.2 +2024-09-17 03:11:32,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=134620.0, ans=0.125 +2024-09-17 03:11:42,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=134660.0, ans=0.125 +2024-09-17 03:11:47,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=134660.0, ans=0.125 +2024-09-17 03:11:49,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.84 vs. limit=15.0 +2024-09-17 03:11:57,952 INFO [train.py:1198] (1/2) Epoch 8, batch 2000, loss[loss=0.2343, ctc_loss=0.1563, cr_loss=0.3616, attn_decoder_loss=0.2349, over 29328.00 frames. ], tot_loss[loss=0.2772, ctc_loss=0.1895, cr_loss=0.4204, attn_decoder_loss=0.2776, over 5795308.39 frames. ], batch size: 67, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:12:15,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=134740.0, ans=0.125 +2024-09-17 03:12:16,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=134740.0, ans=0.2 +2024-09-17 03:12:21,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=134740.0, ans=0.125 +2024-09-17 03:12:31,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.26 vs. limit=15.0 +2024-09-17 03:12:38,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=134780.0, ans=0.125 +2024-09-17 03:12:51,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=134820.0, ans=0.0 +2024-09-17 03:13:05,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=134860.0, ans=0.0 +2024-09-17 03:13:08,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-17 03:13:14,079 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.32 vs. limit=6.0 +2024-09-17 03:13:14,528 INFO [train.py:1198] (1/2) Epoch 8, batch 2050, loss[loss=0.2392, ctc_loss=0.157, cr_loss=0.3678, attn_decoder_loss=0.2402, over 29465.00 frames. ], tot_loss[loss=0.2762, ctc_loss=0.1888, cr_loss=0.4194, attn_decoder_loss=0.2766, over 5788014.80 frames. ], batch size: 70, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:13:33,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=134940.0, ans=0.125 +2024-09-17 03:13:37,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=134940.0, ans=0.05 +2024-09-17 03:13:38,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=134940.0, ans=0.125 +2024-09-17 03:13:39,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=134940.0, ans=15.0 +2024-09-17 03:13:39,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.32 vs. limit=22.5 +2024-09-17 03:13:45,829 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.747e+01 9.821e+01 1.060e+02 1.158e+02 2.378e+02, threshold=2.119e+02, percent-clipped=1.0 +2024-09-17 03:14:03,472 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.69 vs. limit=15.0 +2024-09-17 03:14:09,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=135020.0, ans=0.1 +2024-09-17 03:14:35,264 INFO [train.py:1198] (1/2) Epoch 8, batch 2100, loss[loss=0.2675, ctc_loss=0.1749, cr_loss=0.4057, attn_decoder_loss=0.2688, over 29736.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.188, cr_loss=0.4185, attn_decoder_loss=0.2761, over 5799174.19 frames. ], batch size: 81, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:14:35,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.whiten.whitening_limit, batch_count=135100.0, ans=12.0 +2024-09-17 03:15:18,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=135180.0, ans=0.1 +2024-09-17 03:15:51,532 INFO [train.py:1198] (1/2) Epoch 8, batch 2150, loss[loss=0.2748, ctc_loss=0.1885, cr_loss=0.4481, attn_decoder_loss=0.2744, over 29463.00 frames. ], tot_loss[loss=0.2746, ctc_loss=0.1867, cr_loss=0.4174, attn_decoder_loss=0.275, over 5814440.31 frames. ], batch size: 78, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:16:04,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.58 vs. limit=10.0 +2024-09-17 03:16:17,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=135340.0, ans=0.1 +2024-09-17 03:16:22,344 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.928e+01 9.784e+01 1.043e+02 1.111e+02 1.443e+02, threshold=2.086e+02, percent-clipped=0.0 +2024-09-17 03:16:24,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=135380.0, ans=0.125 +2024-09-17 03:16:48,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=135420.0, ans=0.0 +2024-09-17 03:16:49,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.19 vs. limit=15.0 +2024-09-17 03:17:02,617 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.27 vs. limit=12.0 +2024-09-17 03:17:07,883 INFO [train.py:1198] (1/2) Epoch 8, batch 2200, loss[loss=0.289, ctc_loss=0.1957, cr_loss=0.4383, attn_decoder_loss=0.2896, over 29624.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1876, cr_loss=0.4184, attn_decoder_loss=0.2756, over 5811459.78 frames. ], batch size: 86, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:17:09,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=135500.0, ans=0.0 +2024-09-17 03:17:17,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=135500.0, ans=0.0 +2024-09-17 03:17:20,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=135500.0, ans=0.125 +2024-09-17 03:17:24,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=135540.0, ans=0.125 +2024-09-17 03:17:33,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=135540.0, ans=0.125 +2024-09-17 03:17:47,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=135580.0, ans=0.025 +2024-09-17 03:17:49,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=135580.0, ans=0.09899494936611666 +2024-09-17 03:17:54,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.31 vs. limit=10.0 +2024-09-17 03:18:28,891 INFO [train.py:1198] (1/2) Epoch 8, batch 2250, loss[loss=0.2808, ctc_loss=0.1978, cr_loss=0.427, attn_decoder_loss=0.2805, over 29706.00 frames. ], tot_loss[loss=0.2749, ctc_loss=0.1874, cr_loss=0.4183, attn_decoder_loss=0.2753, over 5810789.77 frames. ], batch size: 82, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:18:30,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=135700.0, ans=0.125 +2024-09-17 03:18:33,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=135700.0, ans=0.0 +2024-09-17 03:18:54,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=135740.0, ans=0.125 +2024-09-17 03:19:00,614 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.680e+01 9.920e+01 1.107e+02 1.209e+02 3.496e+02, threshold=2.214e+02, percent-clipped=1.0 +2024-09-17 03:19:02,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=135780.0, ans=0.025 +2024-09-17 03:19:08,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=135780.0, ans=0.125 +2024-09-17 03:19:11,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=135780.0, ans=0.125 +2024-09-17 03:19:40,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=135860.0, ans=0.0 +2024-09-17 03:19:44,838 INFO [train.py:1198] (1/2) Epoch 8, batch 2300, loss[loss=0.2529, ctc_loss=0.1694, cr_loss=0.4124, attn_decoder_loss=0.253, over 29332.00 frames. ], tot_loss[loss=0.2742, ctc_loss=0.1869, cr_loss=0.4174, attn_decoder_loss=0.2746, over 5799392.31 frames. ], batch size: 71, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:19:48,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=135900.0, ans=0.5 +2024-09-17 03:19:49,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=135900.0, ans=0.0 +2024-09-17 03:19:58,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=135940.0, ans=10.0 +2024-09-17 03:20:07,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=135940.0, ans=0.025 +2024-09-17 03:20:21,852 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:20:32,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=136020.0, ans=0.125 +2024-09-17 03:20:34,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=136020.0, ans=0.2 +2024-09-17 03:20:47,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.92 vs. limit=15.0 +2024-09-17 03:20:53,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=136060.0, ans=0.0 +2024-09-17 03:20:57,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=136060.0, ans=0.0 +2024-09-17 03:21:01,310 INFO [train.py:1198] (1/2) Epoch 8, batch 2350, loss[loss=0.28, ctc_loss=0.1803, cr_loss=0.4345, attn_decoder_loss=0.2814, over 29704.00 frames. ], tot_loss[loss=0.2744, ctc_loss=0.1872, cr_loss=0.4177, attn_decoder_loss=0.2748, over 5805738.91 frames. ], batch size: 83, lr: 1.40e-02, grad_scale: 4.0 +2024-09-17 03:21:29,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=136140.0, ans=0.07 +2024-09-17 03:21:34,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=136180.0, ans=0.125 +2024-09-17 03:21:37,144 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.392e+01 1.038e+02 1.165e+02 1.369e+02 2.325e+02, threshold=2.330e+02, percent-clipped=1.0 +2024-09-17 03:21:45,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=136180.0, ans=0.125 +2024-09-17 03:22:21,881 INFO [train.py:1198] (1/2) Epoch 8, batch 2400, loss[loss=0.2634, ctc_loss=0.168, cr_loss=0.3885, attn_decoder_loss=0.2654, over 29545.00 frames. ], tot_loss[loss=0.2749, ctc_loss=0.1874, cr_loss=0.4182, attn_decoder_loss=0.2753, over 5808933.29 frames. ], batch size: 76, lr: 1.40e-02, grad_scale: 8.0 +2024-09-17 03:22:26,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=136300.0, ans=0.0 +2024-09-17 03:22:26,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=136300.0, ans=0.125 +2024-09-17 03:22:29,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=136300.0, ans=0.125 +2024-09-17 03:22:41,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=136340.0, ans=0.125 +2024-09-17 03:23:25,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=136460.0, ans=0.125 +2024-09-17 03:23:33,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=136460.0, ans=0.1 +2024-09-17 03:23:37,600 INFO [train.py:1198] (1/2) Epoch 8, batch 2450, loss[loss=0.2795, ctc_loss=0.2007, cr_loss=0.4463, attn_decoder_loss=0.2783, over 29715.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1885, cr_loss=0.4199, attn_decoder_loss=0.2763, over 5786234.01 frames. ], batch size: 82, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:23:48,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=136500.0, ans=0.125 +2024-09-17 03:23:52,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=136540.0, ans=0.125 +2024-09-17 03:24:11,924 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.397e+01 1.019e+02 1.082e+02 1.263e+02 3.288e+02, threshold=2.163e+02, percent-clipped=1.0 +2024-09-17 03:24:19,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=136580.0, ans=0.125 +2024-09-17 03:24:21,426 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:24:28,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=136620.0, ans=0.125 +2024-09-17 03:24:38,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=136660.0, ans=0.1 +2024-09-17 03:24:39,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=136660.0, ans=0.0 +2024-09-17 03:24:40,140 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.35 vs. limit=22.5 +2024-09-17 03:24:52,935 INFO [train.py:1198] (1/2) Epoch 8, batch 2500, loss[loss=0.2726, ctc_loss=0.1833, cr_loss=0.3967, attn_decoder_loss=0.2737, over 29619.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1885, cr_loss=0.4195, attn_decoder_loss=0.2762, over 5796700.57 frames. ], batch size: 86, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:24:54,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=136700.0, ans=0.125 +2024-09-17 03:24:57,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=136700.0, ans=0.0 +2024-09-17 03:25:04,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=136700.0, ans=0.125 +2024-09-17 03:25:21,533 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.05 vs. limit=22.5 +2024-09-17 03:25:48,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=136820.0, ans=0.125 +2024-09-17 03:26:13,247 INFO [train.py:1198] (1/2) Epoch 8, batch 2550, loss[loss=0.2611, ctc_loss=0.1794, cr_loss=0.4319, attn_decoder_loss=0.2606, over 29369.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1885, cr_loss=0.4198, attn_decoder_loss=0.2761, over 5798626.51 frames. ], batch size: 67, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:26:16,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=136900.0, ans=0.2 +2024-09-17 03:26:34,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=136940.0, ans=0.0 +2024-09-17 03:26:49,202 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.639e+01 1.024e+02 1.084e+02 1.212e+02 4.526e+02, threshold=2.168e+02, percent-clipped=2.0 +2024-09-17 03:27:06,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=137020.0, ans=0.125 +2024-09-17 03:27:28,806 INFO [train.py:1198] (1/2) Epoch 8, batch 2600, loss[loss=0.2678, ctc_loss=0.1822, cr_loss=0.4353, attn_decoder_loss=0.2677, over 29464.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1885, cr_loss=0.4201, attn_decoder_loss=0.2764, over 5795388.48 frames. ], batch size: 78, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:27:54,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=137140.0, ans=0.0 +2024-09-17 03:28:03,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=137180.0, ans=0.125 +2024-09-17 03:28:43,794 INFO [train.py:1198] (1/2) Epoch 8, batch 2650, loss[loss=0.2984, ctc_loss=0.2108, cr_loss=0.4395, attn_decoder_loss=0.2984, over 29243.00 frames. ], tot_loss[loss=0.2761, ctc_loss=0.1886, cr_loss=0.4204, attn_decoder_loss=0.2765, over 5801788.50 frames. ], batch size: 100, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:28:44,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=137300.0, ans=0.125 +2024-09-17 03:28:48,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=137300.0, ans=0.0 +2024-09-17 03:28:50,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=137300.0, ans=0.1 +2024-09-17 03:29:00,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.57 vs. limit=15.0 +2024-09-17 03:29:08,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=137340.0, ans=0.0 +2024-09-17 03:29:13,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=137340.0, ans=0.0 +2024-09-17 03:29:14,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=137380.0, ans=0.125 +2024-09-17 03:29:23,309 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.237e+01 1.027e+02 1.110e+02 1.218e+02 2.254e+02, threshold=2.220e+02, percent-clipped=2.0 +2024-09-17 03:29:49,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.94 vs. limit=15.0 +2024-09-17 03:29:53,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=137460.0, ans=0.0 +2024-09-17 03:30:02,789 INFO [train.py:1198] (1/2) Epoch 8, batch 2700, loss[loss=0.2782, ctc_loss=0.1848, cr_loss=0.4271, attn_decoder_loss=0.2791, over 29534.00 frames. ], tot_loss[loss=0.2762, ctc_loss=0.1885, cr_loss=0.4196, attn_decoder_loss=0.2766, over 5796478.68 frames. ], batch size: 87, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:30:09,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=137500.0, ans=0.125 +2024-09-17 03:30:19,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=137540.0, ans=0.04949747468305833 +2024-09-17 03:30:21,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=137540.0, ans=0.125 +2024-09-17 03:30:31,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=137580.0, ans=0.0 +2024-09-17 03:30:50,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=137620.0, ans=0.05 +2024-09-17 03:31:08,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=137660.0, ans=0.025 +2024-09-17 03:31:18,719 INFO [train.py:1198] (1/2) Epoch 8, batch 2750, loss[loss=0.2779, ctc_loss=0.1963, cr_loss=0.4317, attn_decoder_loss=0.2774, over 29502.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1886, cr_loss=0.4195, attn_decoder_loss=0.2759, over 5795769.54 frames. ], batch size: 75, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:31:19,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=137700.0, ans=0.125 +2024-09-17 03:31:32,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=137740.0, ans=0.125 +2024-09-17 03:31:37,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=137740.0, ans=0.1 +2024-09-17 03:31:53,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=137780.0, ans=0.95 +2024-09-17 03:31:56,139 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.690e+01 1.009e+02 1.091e+02 1.195e+02 3.553e+02, threshold=2.183e+02, percent-clipped=1.0 +2024-09-17 03:32:15,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=13.27 vs. limit=15.0 +2024-09-17 03:32:16,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=137820.0, ans=0.125 +2024-09-17 03:32:20,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=137860.0, ans=0.2 +2024-09-17 03:32:34,230 INFO [train.py:1198] (1/2) Epoch 8, batch 2800, loss[loss=0.3067, ctc_loss=0.2525, cr_loss=0.454, attn_decoder_loss=0.3026, over 20093.00 frames. ], tot_loss[loss=0.2762, ctc_loss=0.1894, cr_loss=0.4194, attn_decoder_loss=0.2765, over 5776217.39 frames. ], batch size: 209, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:32:34,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=137900.0, ans=0.0 +2024-09-17 03:32:40,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=137900.0, ans=15.0 +2024-09-17 03:33:15,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=137980.0, ans=0.0 +2024-09-17 03:33:24,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=138020.0, ans=0.125 +2024-09-17 03:33:32,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=138020.0, ans=0.125 +2024-09-17 03:33:41,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=138060.0, ans=0.09899494936611666 +2024-09-17 03:33:41,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=138060.0, ans=0.2 +2024-09-17 03:33:53,372 INFO [train.py:1198] (1/2) Epoch 8, batch 2850, loss[loss=0.268, ctc_loss=0.1828, cr_loss=0.4221, attn_decoder_loss=0.2681, over 29505.00 frames. ], tot_loss[loss=0.2768, ctc_loss=0.19, cr_loss=0.4203, attn_decoder_loss=0.2771, over 5762173.42 frames. ], batch size: 77, lr: 1.39e-02, grad_scale: 4.0 +2024-09-17 03:33:59,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=138100.0, ans=0.125 +2024-09-17 03:34:13,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=138140.0, ans=0.025 +2024-09-17 03:34:33,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.28 vs. limit=6.0 +2024-09-17 03:34:34,371 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 1.050e+02 1.191e+02 1.407e+02 3.981e+02, threshold=2.382e+02, percent-clipped=5.0 +2024-09-17 03:34:45,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=138220.0, ans=0.125 +2024-09-17 03:35:09,031 INFO [train.py:1198] (1/2) Epoch 8, batch 2900, loss[loss=0.2703, ctc_loss=0.1753, cr_loss=0.4092, attn_decoder_loss=0.2718, over 29412.00 frames. ], tot_loss[loss=0.2773, ctc_loss=0.1897, cr_loss=0.4214, attn_decoder_loss=0.2777, over 5787780.21 frames. ], batch size: 79, lr: 1.39e-02, grad_scale: 8.0 +2024-09-17 03:35:24,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.72 vs. limit=15.0 +2024-09-17 03:35:35,041 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:35:45,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=138380.0, ans=0.125 +2024-09-17 03:35:56,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=138420.0, ans=0.07 +2024-09-17 03:36:00,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=138420.0, ans=0.09899494936611666 +2024-09-17 03:36:16,459 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.32 vs. limit=10.0 +2024-09-17 03:36:24,468 INFO [train.py:1198] (1/2) Epoch 8, batch 2950, loss[loss=0.2632, ctc_loss=0.1747, cr_loss=0.4253, attn_decoder_loss=0.2636, over 29511.00 frames. ], tot_loss[loss=0.2759, ctc_loss=0.1884, cr_loss=0.419, attn_decoder_loss=0.2763, over 5782724.01 frames. ], batch size: 75, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:37:04,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=138580.0, ans=0.0 +2024-09-17 03:37:07,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=138580.0, ans=0.125 +2024-09-17 03:37:08,773 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.343e+01 1.007e+02 1.102e+02 1.224e+02 2.215e+02, threshold=2.205e+02, percent-clipped=0.0 +2024-09-17 03:37:42,134 INFO [train.py:1198] (1/2) Epoch 8, batch 3000, loss[loss=0.2769, ctc_loss=0.1807, cr_loss=0.4122, attn_decoder_loss=0.2785, over 29754.00 frames. ], tot_loss[loss=0.2756, ctc_loss=0.1881, cr_loss=0.4188, attn_decoder_loss=0.2761, over 5782944.64 frames. ], batch size: 81, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:37:42,135 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 03:37:59,568 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.7366, 5.5894, 4.9604, 5.2875], device='cuda:1') +2024-09-17 03:38:01,065 INFO [train.py:1230] (1/2) Epoch 8, validation: loss=0.2156, ctc_loss=0.0545, cr_loss=4.305e-15, attn_decoder_loss=0.2335, over 944034.00 frames. +2024-09-17 03:38:01,066 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 03:38:31,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=138780.0, ans=0.0 +2024-09-17 03:38:31,595 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:39:04,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=138860.0, ans=0.125 +2024-09-17 03:39:16,479 INFO [train.py:1198] (1/2) Epoch 8, batch 3050, loss[loss=0.2755, ctc_loss=0.1932, cr_loss=0.4184, attn_decoder_loss=0.2754, over 29538.00 frames. ], tot_loss[loss=0.2765, ctc_loss=0.1888, cr_loss=0.4197, attn_decoder_loss=0.2769, over 5776436.92 frames. ], batch size: 76, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:39:27,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=138900.0, ans=0.125 +2024-09-17 03:39:31,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=138940.0, ans=0.0 +2024-09-17 03:39:49,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=138980.0, ans=0.125 +2024-09-17 03:39:58,633 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.688e+01 1.026e+02 1.087e+02 1.186e+02 2.791e+02, threshold=2.173e+02, percent-clipped=1.0 +2024-09-17 03:39:59,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=138980.0, ans=0.125 +2024-09-17 03:40:14,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=139020.0, ans=0.125 +2024-09-17 03:40:20,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=139060.0, ans=0.2 +2024-09-17 03:40:24,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=139060.0, ans=0.2 +2024-09-17 03:40:27,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=139060.0, ans=0.0 +2024-09-17 03:40:33,614 INFO [train.py:1198] (1/2) Epoch 8, batch 3100, loss[loss=0.2903, ctc_loss=0.2085, cr_loss=0.4239, attn_decoder_loss=0.2899, over 29257.00 frames. ], tot_loss[loss=0.2757, ctc_loss=0.1883, cr_loss=0.4189, attn_decoder_loss=0.2761, over 5775585.15 frames. ], batch size: 100, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:40:42,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=139100.0, ans=0.0 +2024-09-17 03:40:45,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.49 vs. limit=15.0 +2024-09-17 03:40:50,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=139140.0, ans=0.125 +2024-09-17 03:40:50,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=139140.0, ans=0.0 +2024-09-17 03:41:18,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.10 vs. limit=15.0 +2024-09-17 03:41:19,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=139220.0, ans=0.0 +2024-09-17 03:41:28,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=139220.0, ans=0.2 +2024-09-17 03:41:37,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=139260.0, ans=0.0 +2024-09-17 03:41:40,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=139260.0, ans=0.025 +2024-09-17 03:41:44,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=139260.0, ans=0.0 +2024-09-17 03:41:51,267 INFO [train.py:1198] (1/2) Epoch 8, batch 3150, loss[loss=0.2901, ctc_loss=0.2053, cr_loss=0.4606, attn_decoder_loss=0.2893, over 28855.00 frames. ], tot_loss[loss=0.2758, ctc_loss=0.1884, cr_loss=0.4201, attn_decoder_loss=0.2762, over 5781562.50 frames. ], batch size: 104, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:42:36,630 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.236e+01 1.018e+02 1.127e+02 1.309e+02 2.778e+02, threshold=2.254e+02, percent-clipped=1.0 +2024-09-17 03:42:57,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=139460.0, ans=0.0 +2024-09-17 03:43:06,660 INFO [train.py:1198] (1/2) Epoch 8, batch 3200, loss[loss=0.2752, ctc_loss=0.1864, cr_loss=0.4272, attn_decoder_loss=0.2756, over 29441.00 frames. ], tot_loss[loss=0.2752, ctc_loss=0.1876, cr_loss=0.4196, attn_decoder_loss=0.2756, over 5792958.64 frames. ], batch size: 79, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:43:08,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=139500.0, ans=0.125 +2024-09-17 03:43:18,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.15 vs. limit=15.0 +2024-09-17 03:43:52,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=139620.0, ans=0.0 +2024-09-17 03:44:11,677 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.42 vs. limit=22.5 +2024-09-17 03:44:17,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=139660.0, ans=0.0 +2024-09-17 03:44:24,506 INFO [train.py:1198] (1/2) Epoch 8, batch 3250, loss[loss=0.2722, ctc_loss=0.1832, cr_loss=0.3852, attn_decoder_loss=0.2736, over 29713.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1871, cr_loss=0.4189, attn_decoder_loss=0.2755, over 5799362.14 frames. ], batch size: 84, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:44:34,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.01 vs. limit=15.0 +2024-09-17 03:44:39,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=139740.0, ans=10.0 +2024-09-17 03:44:42,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=139740.0, ans=0.025 +2024-09-17 03:44:56,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.22 vs. limit=10.0 +2024-09-17 03:45:08,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=139820.0, ans=0.0 +2024-09-17 03:45:08,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=139820.0, ans=0.0 +2024-09-17 03:45:09,631 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.457e+01 9.664e+01 1.027e+02 1.100e+02 2.131e+02, threshold=2.054e+02, percent-clipped=0.0 +2024-09-17 03:45:23,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=139860.0, ans=0.0 +2024-09-17 03:45:32,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=139860.0, ans=0.95 +2024-09-17 03:45:41,706 INFO [train.py:1198] (1/2) Epoch 8, batch 3300, loss[loss=0.2765, ctc_loss=0.1843, cr_loss=0.4108, attn_decoder_loss=0.2776, over 28339.00 frames. ], tot_loss[loss=0.2735, ctc_loss=0.1857, cr_loss=0.4158, attn_decoder_loss=0.274, over 5796738.45 frames. ], batch size: 111, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:46:02,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.96 vs. limit=15.0 +2024-09-17 03:46:04,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=139940.0, ans=0.1 +2024-09-17 03:46:26,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.86 vs. limit=15.0 +2024-09-17 03:46:32,973 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.13 vs. limit=12.0 +2024-09-17 03:46:57,285 INFO [train.py:1198] (1/2) Epoch 8, batch 3350, loss[loss=0.2871, ctc_loss=0.202, cr_loss=0.4388, attn_decoder_loss=0.2868, over 28900.00 frames. ], tot_loss[loss=0.2742, ctc_loss=0.1865, cr_loss=0.4163, attn_decoder_loss=0.2747, over 5772162.58 frames. ], batch size: 104, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:47:13,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=140140.0, ans=15.0 +2024-09-17 03:47:26,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=140180.0, ans=0.0 +2024-09-17 03:47:29,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.91 vs. limit=22.5 +2024-09-17 03:47:47,549 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.416e+01 1.028e+02 1.095e+02 1.236e+02 5.561e+02, threshold=2.191e+02, percent-clipped=3.0 +2024-09-17 03:47:47,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=140220.0, ans=0.125 +2024-09-17 03:47:49,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=140220.0, ans=0.025 +2024-09-17 03:47:58,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=140260.0, ans=0.125 +2024-09-17 03:48:07,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=140260.0, ans=0.125 +2024-09-17 03:48:14,969 INFO [train.py:1198] (1/2) Epoch 8, batch 3400, loss[loss=0.2436, ctc_loss=0.164, cr_loss=0.3773, attn_decoder_loss=0.2441, over 29363.00 frames. ], tot_loss[loss=0.2748, ctc_loss=0.1873, cr_loss=0.4168, attn_decoder_loss=0.2753, over 5766105.31 frames. ], batch size: 67, lr: 1.38e-02, grad_scale: 8.0 +2024-09-17 03:49:08,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.54 vs. limit=15.0 +2024-09-17 03:49:18,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=140460.0, ans=0.125 +2024-09-17 03:49:27,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=140460.0, ans=0.0 +2024-09-17 03:49:31,834 INFO [train.py:1198] (1/2) Epoch 8, batch 3450, loss[loss=0.2845, ctc_loss=0.1931, cr_loss=0.3876, attn_decoder_loss=0.2861, over 28198.00 frames. ], tot_loss[loss=0.275, ctc_loss=0.1871, cr_loss=0.4167, attn_decoder_loss=0.2755, over 5775378.36 frames. ], batch size: 111, lr: 1.38e-02, grad_scale: 4.0 +2024-09-17 03:49:33,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=140500.0, ans=0.125 +2024-09-17 03:49:45,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=140540.0, ans=0.07 +2024-09-17 03:49:53,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=140540.0, ans=0.1 +2024-09-17 03:50:00,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=140580.0, ans=0.0 +2024-09-17 03:50:21,400 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.539e+01 1.005e+02 1.084e+02 1.145e+02 2.009e+02, threshold=2.168e+02, percent-clipped=0.0 +2024-09-17 03:50:21,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=140620.0, ans=0.02 +2024-09-17 03:50:24,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=140620.0, ans=0.125 +2024-09-17 03:50:24,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=140620.0, ans=0.125 +2024-09-17 03:50:28,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=140620.0, ans=0.125 +2024-09-17 03:50:30,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.08 vs. limit=10.0 +2024-09-17 03:50:47,161 INFO [train.py:1198] (1/2) Epoch 8, batch 3500, loss[loss=0.246, ctc_loss=0.1612, cr_loss=0.3642, attn_decoder_loss=0.2473, over 29349.00 frames. ], tot_loss[loss=0.2744, ctc_loss=0.1868, cr_loss=0.4165, attn_decoder_loss=0.2749, over 5777793.28 frames. ], batch size: 71, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:50:58,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.16 vs. limit=15.0 +2024-09-17 03:51:02,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=140740.0, ans=0.1 +2024-09-17 03:51:12,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=140740.0, ans=0.0 +2024-09-17 03:51:14,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=140740.0, ans=0.0 +2024-09-17 03:51:31,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=140820.0, ans=0.0 +2024-09-17 03:51:42,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=140820.0, ans=0.125 +2024-09-17 03:51:49,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.77 vs. limit=10.0 +2024-09-17 03:52:03,853 INFO [train.py:1198] (1/2) Epoch 8, batch 3550, loss[loss=0.282, ctc_loss=0.1904, cr_loss=0.4383, attn_decoder_loss=0.2824, over 29701.00 frames. ], tot_loss[loss=0.2745, ctc_loss=0.1868, cr_loss=0.4172, attn_decoder_loss=0.275, over 5784471.55 frames. ], batch size: 89, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:52:05,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=140900.0, ans=0.125 +2024-09-17 03:52:05,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=140900.0, ans=0.0 +2024-09-17 03:52:11,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=140900.0, ans=0.015 +2024-09-17 03:52:15,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=140900.0, ans=0.0 +2024-09-17 03:52:20,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=140940.0, ans=0.1 +2024-09-17 03:52:24,761 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:52:31,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.21 vs. limit=15.0 +2024-09-17 03:52:36,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=140980.0, ans=0.125 +2024-09-17 03:52:38,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.29 vs. limit=10.0 +2024-09-17 03:52:46,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=141020.0, ans=0.95 +2024-09-17 03:52:53,893 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.324e+01 1.017e+02 1.100e+02 1.203e+02 4.569e+02, threshold=2.200e+02, percent-clipped=1.0 +2024-09-17 03:52:57,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=141020.0, ans=0.0 +2024-09-17 03:53:06,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=141060.0, ans=0.05 +2024-09-17 03:53:12,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=141060.0, ans=0.125 +2024-09-17 03:53:17,656 INFO [train.py:1198] (1/2) Epoch 8, batch 3600, loss[loss=0.2661, ctc_loss=0.1774, cr_loss=0.4108, attn_decoder_loss=0.2668, over 29506.00 frames. ], tot_loss[loss=0.2747, ctc_loss=0.1869, cr_loss=0.4183, attn_decoder_loss=0.2751, over 5792760.23 frames. ], batch size: 77, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:53:18,485 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.78 vs. limit=15.0 +2024-09-17 03:53:32,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=141140.0, ans=0.2 +2024-09-17 03:53:44,732 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:54:13,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=141220.0, ans=0.1 +2024-09-17 03:54:32,136 INFO [train.py:1198] (1/2) Epoch 8, batch 3650, loss[loss=0.2905, ctc_loss=0.1987, cr_loss=0.4395, attn_decoder_loss=0.2909, over 29492.00 frames. ], tot_loss[loss=0.2737, ctc_loss=0.1858, cr_loss=0.4169, attn_decoder_loss=0.2742, over 5794742.18 frames. ], batch size: 90, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:54:48,978 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 03:55:26,039 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.265e+01 1.005e+02 1.060e+02 1.174e+02 2.245e+02, threshold=2.119e+02, percent-clipped=1.0 +2024-09-17 03:55:38,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=141460.0, ans=0.125 +2024-09-17 03:55:45,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=141460.0, ans=0.1 +2024-09-17 03:55:46,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.39 vs. limit=6.0 +2024-09-17 03:55:48,539 INFO [train.py:1198] (1/2) Epoch 8, batch 3700, loss[loss=0.2845, ctc_loss=0.1992, cr_loss=0.4608, attn_decoder_loss=0.2838, over 29716.00 frames. ], tot_loss[loss=0.2741, ctc_loss=0.186, cr_loss=0.4176, attn_decoder_loss=0.2746, over 5804304.62 frames. ], batch size: 84, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:55:59,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=141500.0, ans=0.125 +2024-09-17 03:56:12,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=141540.0, ans=0.0 +2024-09-17 03:56:16,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=141580.0, ans=0.0 +2024-09-17 03:56:28,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=141580.0, ans=0.125 +2024-09-17 03:56:47,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-17 03:56:52,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=141660.0, ans=0.0 +2024-09-17 03:57:02,879 INFO [train.py:1198] (1/2) Epoch 8, batch 3750, loss[loss=0.2419, ctc_loss=0.158, cr_loss=0.3621, attn_decoder_loss=0.2432, over 29358.00 frames. ], tot_loss[loss=0.2737, ctc_loss=0.1857, cr_loss=0.4173, attn_decoder_loss=0.2742, over 5807845.41 frames. ], batch size: 67, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 03:57:09,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.56 vs. limit=12.0 +2024-09-17 03:57:12,810 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-17 03:57:30,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=141740.0, ans=0.025 +2024-09-17 03:57:40,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=141780.0, ans=0.125 +2024-09-17 03:57:41,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=141780.0, ans=0.025 +2024-09-17 03:57:53,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=141820.0, ans=0.125 +2024-09-17 03:57:55,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=141820.0, ans=0.07 +2024-09-17 03:57:56,594 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.547e+01 9.777e+01 1.089e+02 1.271e+02 6.127e+02, threshold=2.178e+02, percent-clipped=4.0 +2024-09-17 03:58:11,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=141860.0, ans=0.1 +2024-09-17 03:58:18,901 INFO [train.py:1198] (1/2) Epoch 8, batch 3800, loss[loss=0.2904, ctc_loss=0.1963, cr_loss=0.4217, attn_decoder_loss=0.2914, over 29639.00 frames. ], tot_loss[loss=0.2735, ctc_loss=0.1858, cr_loss=0.4168, attn_decoder_loss=0.274, over 5798320.54 frames. ], batch size: 86, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 03:58:33,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=141940.0, ans=0.0 +2024-09-17 03:58:35,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=141940.0, ans=0.125 +2024-09-17 03:58:48,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.01 vs. limit=22.5 +2024-09-17 03:58:55,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.60 vs. limit=15.0 +2024-09-17 03:59:02,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=142020.0, ans=0.1 +2024-09-17 03:59:03,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=142020.0, ans=0.1 +2024-09-17 03:59:11,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=142020.0, ans=0.1 +2024-09-17 03:59:14,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=142020.0, ans=0.2 +2024-09-17 03:59:25,071 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.43 vs. limit=6.0 +2024-09-17 03:59:33,122 INFO [train.py:1198] (1/2) Epoch 8, batch 3850, loss[loss=0.2724, ctc_loss=0.1825, cr_loss=0.4039, attn_decoder_loss=0.2735, over 29245.00 frames. ], tot_loss[loss=0.2729, ctc_loss=0.1848, cr_loss=0.4154, attn_decoder_loss=0.2735, over 5812535.53 frames. ], batch size: 100, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 04:00:07,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=142180.0, ans=0.125 +2024-09-17 04:00:26,679 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.741e+01 9.765e+01 1.055e+02 1.135e+02 1.958e+02, threshold=2.110e+02, percent-clipped=1.0 +2024-09-17 04:00:41,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=142260.0, ans=0.0 +2024-09-17 04:00:48,852 INFO [train.py:1198] (1/2) Epoch 8, batch 3900, loss[loss=0.2766, ctc_loss=0.1762, cr_loss=0.404, attn_decoder_loss=0.2787, over 29597.00 frames. ], tot_loss[loss=0.2733, ctc_loss=0.1847, cr_loss=0.4157, attn_decoder_loss=0.2739, over 5816995.20 frames. ], batch size: 86, lr: 1.37e-02, grad_scale: 8.0 +2024-09-17 04:00:53,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=142300.0, ans=0.2 +2024-09-17 04:01:09,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=142340.0, ans=0.2 +2024-09-17 04:01:13,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=142340.0, ans=0.125 +2024-09-17 04:01:38,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.98 vs. limit=15.0 +2024-09-17 04:02:02,694 INFO [train.py:1198] (1/2) Epoch 8, batch 3950, loss[loss=0.2851, ctc_loss=0.1875, cr_loss=0.4285, attn_decoder_loss=0.2864, over 29454.00 frames. ], tot_loss[loss=0.273, ctc_loss=0.1837, cr_loss=0.4152, attn_decoder_loss=0.2737, over 5836277.97 frames. ], batch size: 97, lr: 1.37e-02, grad_scale: 4.0 +2024-09-17 04:02:07,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=142500.0, ans=0.0 +2024-09-17 04:02:09,407 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.18 vs. limit=6.0 +2024-09-17 04:02:14,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=142500.0, ans=0.025 +2024-09-17 04:02:42,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=142580.0, ans=0.125 +2024-09-17 04:02:54,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=142620.0, ans=0.0 +2024-09-17 04:02:55,185 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=14.03 vs. limit=22.5 +2024-09-17 04:02:58,359 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.061e+01 9.742e+01 1.045e+02 1.185e+02 2.599e+02, threshold=2.090e+02, percent-clipped=1.0 +2024-09-17 04:03:11,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=142660.0, ans=0.0 +2024-09-17 04:03:16,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.26 vs. limit=15.0 +2024-09-17 04:03:17,015 INFO [train.py:1198] (1/2) Epoch 8, batch 4000, loss[loss=0.2566, ctc_loss=0.1626, cr_loss=0.3983, attn_decoder_loss=0.2582, over 29510.00 frames. ], tot_loss[loss=0.2731, ctc_loss=0.1839, cr_loss=0.4148, attn_decoder_loss=0.2738, over 5813337.81 frames. ], batch size: 74, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:03:46,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=142780.0, ans=0.125 +2024-09-17 04:03:51,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=142780.0, ans=0.125 +2024-09-17 04:04:29,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=142900.0, ans=0.0 +2024-09-17 04:04:30,884 INFO [train.py:1198] (1/2) Epoch 8, batch 4050, loss[loss=0.3166, ctc_loss=0.2599, cr_loss=0.4343, attn_decoder_loss=0.3132, over 20030.00 frames. ], tot_loss[loss=0.2728, ctc_loss=0.1838, cr_loss=0.4142, attn_decoder_loss=0.2735, over 5796711.69 frames. ], batch size: 209, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:04:45,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=142940.0, ans=0.125 +2024-09-17 04:04:48,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=142940.0, ans=0.125 +2024-09-17 04:05:12,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=142980.0, ans=0.0 +2024-09-17 04:05:18,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=143020.0, ans=0.125 +2024-09-17 04:05:29,421 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.245e+01 1.069e+02 1.234e+02 1.438e+02 3.012e+02, threshold=2.468e+02, percent-clipped=5.0 +2024-09-17 04:05:31,503 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.09 vs. limit=12.0 +2024-09-17 04:05:44,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=143100.0, ans=0.125 +2024-09-17 04:05:45,648 INFO [train.py:1198] (1/2) Epoch 8, batch 4100, loss[loss=0.2835, ctc_loss=0.1965, cr_loss=0.439, attn_decoder_loss=0.2835, over 29503.00 frames. ], tot_loss[loss=0.2737, ctc_loss=0.1851, cr_loss=0.4167, attn_decoder_loss=0.2743, over 5792364.59 frames. ], batch size: 90, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:05:50,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=143100.0, ans=0.125 +2024-09-17 04:06:00,472 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:06:04,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=143140.0, ans=0.035 +2024-09-17 04:06:06,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=143140.0, ans=0.0 +2024-09-17 04:06:07,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=143140.0, ans=0.0 +2024-09-17 04:06:09,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=143140.0, ans=0.125 +2024-09-17 04:06:40,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=143220.0, ans=0.0 +2024-09-17 04:06:50,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.61 vs. limit=15.0 +2024-09-17 04:06:59,220 INFO [train.py:1198] (1/2) Epoch 8, batch 4150, loss[loss=0.2672, ctc_loss=0.1757, cr_loss=0.4083, attn_decoder_loss=0.2682, over 29498.00 frames. ], tot_loss[loss=0.2733, ctc_loss=0.1849, cr_loss=0.4168, attn_decoder_loss=0.2739, over 5797754.60 frames. ], batch size: 77, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:07:12,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.86 vs. limit=6.0 +2024-09-17 04:07:25,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=12.0 +2024-09-17 04:07:32,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=143380.0, ans=0.025 +2024-09-17 04:07:38,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=143380.0, ans=0.125 +2024-09-17 04:07:56,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=143420.0, ans=0.0 +2024-09-17 04:07:58,960 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.371e+01 9.814e+01 1.059e+02 1.146e+02 1.859e+02, threshold=2.118e+02, percent-clipped=0.0 +2024-09-17 04:08:13,624 INFO [train.py:1198] (1/2) Epoch 8, batch 4200, loss[loss=0.2901, ctc_loss=0.2002, cr_loss=0.4272, attn_decoder_loss=0.2906, over 29504.00 frames. ], tot_loss[loss=0.2739, ctc_loss=0.1853, cr_loss=0.4176, attn_decoder_loss=0.2745, over 5799803.63 frames. ], batch size: 90, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:08:16,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.36 vs. limit=15.0 +2024-09-17 04:08:16,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=143500.0, ans=0.1 +2024-09-17 04:08:18,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=143500.0, ans=0.125 +2024-09-17 04:08:36,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=143540.0, ans=0.025 +2024-09-17 04:08:47,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=143580.0, ans=0.1 +2024-09-17 04:09:05,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=143620.0, ans=0.125 +2024-09-17 04:09:05,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=15.0 +2024-09-17 04:09:22,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=143660.0, ans=0.0 +2024-09-17 04:09:23,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=143660.0, ans=0.025 +2024-09-17 04:09:27,756 INFO [train.py:1198] (1/2) Epoch 8, batch 4250, loss[loss=0.2451, ctc_loss=0.1505, cr_loss=0.3604, attn_decoder_loss=0.2476, over 29533.00 frames. ], tot_loss[loss=0.2739, ctc_loss=0.1848, cr_loss=0.4169, attn_decoder_loss=0.2745, over 5805954.46 frames. ], batch size: 74, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:09:30,302 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.66 vs. limit=10.0 +2024-09-17 04:10:08,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=143780.0, ans=0.0 +2024-09-17 04:10:09,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=29.72 vs. limit=22.5 +2024-09-17 04:10:27,813 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.334e+01 1.014e+02 1.108e+02 1.214e+02 2.997e+02, threshold=2.217e+02, percent-clipped=4.0 +2024-09-17 04:10:28,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=143860.0, ans=0.2 +2024-09-17 04:10:39,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=143900.0, ans=0.0 +2024-09-17 04:10:41,043 INFO [train.py:1198] (1/2) Epoch 8, batch 4300, loss[loss=0.2957, ctc_loss=0.203, cr_loss=0.4649, attn_decoder_loss=0.2956, over 29512.00 frames. ], tot_loss[loss=0.274, ctc_loss=0.1852, cr_loss=0.4174, attn_decoder_loss=0.2746, over 5795688.51 frames. ], batch size: 87, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:10:47,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=143900.0, ans=0.0 +2024-09-17 04:10:50,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.50 vs. limit=15.0 +2024-09-17 04:10:58,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=143940.0, ans=0.125 +2024-09-17 04:11:02,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=143940.0, ans=0.125 +2024-09-17 04:11:56,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=144060.0, ans=0.1 +2024-09-17 04:12:02,537 INFO [train.py:1198] (1/2) Epoch 8, batch 4350, loss[loss=0.2914, ctc_loss=0.1982, cr_loss=0.4393, attn_decoder_loss=0.292, over 29451.00 frames. ], tot_loss[loss=0.2775, ctc_loss=0.188, cr_loss=0.4222, attn_decoder_loss=0.2781, over 5798409.50 frames. ], batch size: 97, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:12:08,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff2.min_abs, batch_count=144100.0, ans=0.1 +2024-09-17 04:12:08,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=144100.0, ans=0.1 +2024-09-17 04:12:16,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=144140.0, ans=0.1 +2024-09-17 04:12:16,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=144140.0, ans=0.2 +2024-09-17 04:12:22,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=144140.0, ans=0.125 +2024-09-17 04:12:43,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.84 vs. limit=6.0 +2024-09-17 04:12:54,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.21 vs. limit=15.0 +2024-09-17 04:12:55,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.66 vs. limit=12.0 +2024-09-17 04:13:03,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=144260.0, ans=0.125 +2024-09-17 04:13:04,678 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.658e+01 1.032e+02 1.110e+02 1.170e+02 3.272e+02, threshold=2.221e+02, percent-clipped=1.0 +2024-09-17 04:13:05,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.57 vs. limit=15.0 +2024-09-17 04:13:16,184 INFO [train.py:1198] (1/2) Epoch 8, batch 4400, loss[loss=0.2888, ctc_loss=0.2031, cr_loss=0.4504, attn_decoder_loss=0.2883, over 27298.00 frames. ], tot_loss[loss=0.2798, ctc_loss=0.1904, cr_loss=0.4245, attn_decoder_loss=0.2803, over 5768149.71 frames. ], batch size: 124, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:13:19,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_na.min_abs, batch_count=144300.0, ans=0.02 +2024-09-17 04:13:22,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=144300.0, ans=0.125 +2024-09-17 04:13:28,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=144300.0, ans=0.0 +2024-09-17 04:13:32,193 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-17 04:13:53,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=144380.0, ans=0.125 +2024-09-17 04:13:54,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=144380.0, ans=0.2 +2024-09-17 04:13:56,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=144380.0, ans=0.125 +2024-09-17 04:14:10,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=144420.0, ans=0.0 +2024-09-17 04:14:10,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=144420.0, ans=0.125 +2024-09-17 04:14:13,855 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.70 vs. limit=15.0 +2024-09-17 04:14:29,772 INFO [train.py:1198] (1/2) Epoch 8, batch 4450, loss[loss=0.3078, ctc_loss=0.2582, cr_loss=0.4239, attn_decoder_loss=0.3038, over 20039.00 frames. ], tot_loss[loss=0.2834, ctc_loss=0.1963, cr_loss=0.4279, attn_decoder_loss=0.2835, over 5572366.71 frames. ], batch size: 209, lr: 1.36e-02, grad_scale: 4.0 +2024-09-17 04:14:30,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=144500.0, ans=0.0 +2024-09-17 04:14:38,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=144500.0, ans=0.0 +2024-09-17 04:14:38,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=144500.0, ans=0.0 +2024-09-17 04:14:55,140 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.02 vs. limit=10.0 +2024-09-17 04:15:15,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=144620.0, ans=0.025 +2024-09-17 04:15:17,093 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:15:17,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=8.10 vs. limit=10.0 +2024-09-17 04:15:34,822 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.417e+01 1.090e+02 1.182e+02 1.322e+02 3.138e+02, threshold=2.364e+02, percent-clipped=1.0 +2024-09-17 04:15:38,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=144660.0, ans=0.125 +2024-09-17 04:15:43,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=144700.0, ans=0.2 +2024-09-17 04:15:45,080 INFO [train.py:1198] (1/2) Epoch 8, batch 4500, loss[loss=0.3081, ctc_loss=0.2408, cr_loss=0.4334, attn_decoder_loss=0.3059, over 20336.00 frames. ], tot_loss[loss=0.2875, ctc_loss=0.204, cr_loss=0.4298, attn_decoder_loss=0.2873, over 5232412.54 frames. ], batch size: 210, lr: 1.36e-02, grad_scale: 8.0 +2024-09-17 04:15:56,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.41 vs. limit=15.0 +2024-09-17 04:16:18,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=144780.0, ans=0.2 +2024-09-17 04:17:10,885 INFO [train.py:1198] (1/2) Epoch 9, batch 0, loss[loss=0.2623, ctc_loss=0.1571, cr_loss=0.3806, attn_decoder_loss=0.2655, over 29651.00 frames. ], tot_loss[loss=0.2623, ctc_loss=0.1571, cr_loss=0.3806, attn_decoder_loss=0.2655, over 29651.00 frames. ], batch size: 73, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:17:10,885 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 04:17:29,061 INFO [train.py:1230] (1/2) Epoch 9, validation: loss=0.2184, ctc_loss=0.05457, cr_loss=4.594e-15, attn_decoder_loss=0.2366, over 944034.00 frames. +2024-09-17 04:17:29,061 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 04:17:30,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=144800.0, ans=0.125 +2024-09-17 04:17:47,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=144840.0, ans=0.125 +2024-09-17 04:17:57,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=144840.0, ans=0.0 +2024-09-17 04:17:57,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=144840.0, ans=0.1 +2024-09-17 04:17:58,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=144840.0, ans=0.0 +2024-09-17 04:18:09,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=144880.0, ans=0.2 +2024-09-17 04:18:24,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=144920.0, ans=0.125 +2024-09-17 04:18:28,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=144920.0, ans=0.125 +2024-09-17 04:18:42,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=144960.0, ans=0.125 +2024-09-17 04:18:44,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=144960.0, ans=0.0 +2024-09-17 04:18:48,436 INFO [train.py:1198] (1/2) Epoch 9, batch 50, loss[loss=0.2575, ctc_loss=0.1799, cr_loss=0.4289, attn_decoder_loss=0.2565, over 29442.00 frames. ], tot_loss[loss=0.2781, ctc_loss=0.1915, cr_loss=0.4223, attn_decoder_loss=0.2783, over 1266815.79 frames. ], batch size: 70, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:19:18,665 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.315e+01 1.028e+02 1.122e+02 1.290e+02 1.269e+03, threshold=2.245e+02, percent-clipped=1.0 +2024-09-17 04:19:32,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=145120.0, ans=0.1 +2024-09-17 04:19:37,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=145120.0, ans=0.125 +2024-09-17 04:19:43,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=145120.0, ans=0.125 +2024-09-17 04:19:47,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=145160.0, ans=0.125 +2024-09-17 04:20:04,118 INFO [train.py:1198] (1/2) Epoch 9, batch 100, loss[loss=0.2769, ctc_loss=0.1957, cr_loss=0.4224, attn_decoder_loss=0.2766, over 29532.00 frames. ], tot_loss[loss=0.2796, ctc_loss=0.1923, cr_loss=0.4252, attn_decoder_loss=0.2799, over 2251715.09 frames. ], batch size: 76, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:20:07,561 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:20:18,202 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:20:42,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=145280.0, ans=0.2 +2024-09-17 04:20:55,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=145320.0, ans=0.125 +2024-09-17 04:21:12,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=145360.0, ans=0.1 +2024-09-17 04:21:19,404 INFO [train.py:1198] (1/2) Epoch 9, batch 150, loss[loss=0.2475, ctc_loss=0.1576, cr_loss=0.3967, attn_decoder_loss=0.2486, over 29436.00 frames. ], tot_loss[loss=0.276, ctc_loss=0.1871, cr_loss=0.4199, attn_decoder_loss=0.2765, over 3046311.12 frames. ], batch size: 70, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:21:19,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=145400.0, ans=0.0 +2024-09-17 04:21:20,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.02 vs. limit=6.0 +2024-09-17 04:21:45,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=145440.0, ans=0.0 +2024-09-17 04:21:55,705 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.472e+01 1.015e+02 1.087e+02 1.260e+02 1.994e+02, threshold=2.174e+02, percent-clipped=0.0 +2024-09-17 04:22:08,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.99 vs. limit=6.0 +2024-09-17 04:22:11,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.08 vs. limit=22.5 +2024-09-17 04:22:18,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=145520.0, ans=0.0 +2024-09-17 04:22:25,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.10 vs. limit=15.0 +2024-09-17 04:22:30,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=145560.0, ans=0.125 +2024-09-17 04:22:32,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=145560.0, ans=0.125 +2024-09-17 04:22:39,821 INFO [train.py:1198] (1/2) Epoch 9, batch 200, loss[loss=0.282, ctc_loss=0.2011, cr_loss=0.4278, attn_decoder_loss=0.2815, over 27638.00 frames. ], tot_loss[loss=0.2745, ctc_loss=0.1858, cr_loss=0.4189, attn_decoder_loss=0.275, over 3659240.49 frames. ], batch size: 125, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:22:47,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=145600.0, ans=0.125 +2024-09-17 04:23:07,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=145640.0, ans=0.025 +2024-09-17 04:23:07,893 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-17 04:23:14,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=145680.0, ans=0.0 +2024-09-17 04:23:19,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=145680.0, ans=0.95 +2024-09-17 04:23:36,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=145720.0, ans=0.125 +2024-09-17 04:23:40,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=145760.0, ans=0.0 +2024-09-17 04:23:55,897 INFO [train.py:1198] (1/2) Epoch 9, batch 250, loss[loss=0.2963, ctc_loss=0.2099, cr_loss=0.4516, attn_decoder_loss=0.2958, over 29193.00 frames. ], tot_loss[loss=0.2735, ctc_loss=0.1843, cr_loss=0.4173, attn_decoder_loss=0.2741, over 4141856.27 frames. ], batch size: 100, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:24:05,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=145800.0, ans=0.125 +2024-09-17 04:24:10,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.57 vs. limit=15.0 +2024-09-17 04:24:29,187 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.100e+01 9.608e+01 1.032e+02 1.129e+02 1.433e+02, threshold=2.064e+02, percent-clipped=0.0 +2024-09-17 04:24:29,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=145880.0, ans=0.0 +2024-09-17 04:25:11,661 INFO [train.py:1198] (1/2) Epoch 9, batch 300, loss[loss=0.2908, ctc_loss=0.1969, cr_loss=0.429, attn_decoder_loss=0.2917, over 29527.00 frames. ], tot_loss[loss=0.2726, ctc_loss=0.1833, cr_loss=0.4158, attn_decoder_loss=0.2733, over 4509365.65 frames. ], batch size: 92, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:25:51,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=146080.0, ans=0.125 +2024-09-17 04:26:00,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=146120.0, ans=0.0 +2024-09-17 04:26:03,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=146120.0, ans=0.125 +2024-09-17 04:26:16,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=146160.0, ans=0.2 +2024-09-17 04:26:32,376 INFO [train.py:1198] (1/2) Epoch 9, batch 350, loss[loss=0.2406, ctc_loss=0.1529, cr_loss=0.3693, attn_decoder_loss=0.2421, over 29313.00 frames. ], tot_loss[loss=0.2727, ctc_loss=0.1834, cr_loss=0.4162, attn_decoder_loss=0.2734, over 4795198.32 frames. ], batch size: 71, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:26:34,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=146200.0, ans=0.025 +2024-09-17 04:26:44,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=146200.0, ans=0.1 +2024-09-17 04:26:46,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=146240.0, ans=0.125 +2024-09-17 04:26:55,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=146240.0, ans=0.125 +2024-09-17 04:26:56,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=146240.0, ans=0.125 +2024-09-17 04:27:06,993 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.424e+01 9.475e+01 1.008e+02 1.084e+02 2.956e+02, threshold=2.017e+02, percent-clipped=2.0 +2024-09-17 04:27:27,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.28 vs. limit=10.0 +2024-09-17 04:27:33,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=146360.0, ans=0.125 +2024-09-17 04:27:41,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=146360.0, ans=0.125 +2024-09-17 04:27:48,641 INFO [train.py:1198] (1/2) Epoch 9, batch 400, loss[loss=0.2775, ctc_loss=0.1845, cr_loss=0.4334, attn_decoder_loss=0.2782, over 29705.00 frames. ], tot_loss[loss=0.2721, ctc_loss=0.1825, cr_loss=0.4155, attn_decoder_loss=0.2728, over 5024805.77 frames. ], batch size: 82, lr: 1.28e-02, grad_scale: 8.0 +2024-09-17 04:28:13,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=146440.0, ans=0.125 +2024-09-17 04:28:18,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=146480.0, ans=0.025 +2024-09-17 04:28:21,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=146480.0, ans=0.0 +2024-09-17 04:28:30,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=146480.0, ans=0.125 +2024-09-17 04:28:39,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=146520.0, ans=0.125 +2024-09-17 04:28:40,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=146520.0, ans=0.125 +2024-09-17 04:29:03,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=146600.0, ans=0.125 +2024-09-17 04:29:04,924 INFO [train.py:1198] (1/2) Epoch 9, batch 450, loss[loss=0.274, ctc_loss=0.1786, cr_loss=0.3924, attn_decoder_loss=0.2759, over 29706.00 frames. ], tot_loss[loss=0.272, ctc_loss=0.1825, cr_loss=0.4155, attn_decoder_loss=0.2727, over 5187751.78 frames. ], batch size: 83, lr: 1.28e-02, grad_scale: 4.0 +2024-09-17 04:29:31,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=146640.0, ans=0.125 +2024-09-17 04:29:46,240 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.248e+01 9.637e+01 1.024e+02 1.129e+02 3.219e+02, threshold=2.049e+02, percent-clipped=1.0 +2024-09-17 04:29:51,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=146680.0, ans=0.0 +2024-09-17 04:29:57,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=146720.0, ans=0.125 +2024-09-17 04:30:17,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=146760.0, ans=0.0 +2024-09-17 04:30:24,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=146800.0, ans=0.125 +2024-09-17 04:30:25,865 INFO [train.py:1198] (1/2) Epoch 9, batch 500, loss[loss=0.2742, ctc_loss=0.1808, cr_loss=0.4054, attn_decoder_loss=0.2756, over 29432.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1814, cr_loss=0.4138, attn_decoder_loss=0.2717, over 5329581.16 frames. ], batch size: 94, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:30:26,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=146800.0, ans=0.125 +2024-09-17 04:30:32,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=146800.0, ans=0.2 +2024-09-17 04:30:37,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=146800.0, ans=0.125 +2024-09-17 04:30:47,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=146840.0, ans=0.125 +2024-09-17 04:30:49,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=146840.0, ans=0.1 +2024-09-17 04:31:26,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=146960.0, ans=0.125 +2024-09-17 04:31:31,219 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.74 vs. limit=15.0 +2024-09-17 04:31:38,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=146960.0, ans=0.125 +2024-09-17 04:31:42,467 INFO [train.py:1198] (1/2) Epoch 9, batch 550, loss[loss=0.2813, ctc_loss=0.193, cr_loss=0.4282, attn_decoder_loss=0.2816, over 28752.00 frames. ], tot_loss[loss=0.2711, ctc_loss=0.1818, cr_loss=0.4141, attn_decoder_loss=0.2718, over 5422550.17 frames. ], batch size: 104, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:32:19,205 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.353e+01 9.413e+01 1.021e+02 1.124e+02 5.702e+02, threshold=2.041e+02, percent-clipped=1.0 +2024-09-17 04:32:31,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=147120.0, ans=0.0 +2024-09-17 04:32:37,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.67 vs. limit=15.0 +2024-09-17 04:32:59,457 INFO [train.py:1198] (1/2) Epoch 9, batch 600, loss[loss=0.2822, ctc_loss=0.1858, cr_loss=0.4078, attn_decoder_loss=0.2839, over 29302.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1819, cr_loss=0.4143, attn_decoder_loss=0.2722, over 5509270.10 frames. ], batch size: 100, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:33:19,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=147240.0, ans=0.125 +2024-09-17 04:33:19,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=147240.0, ans=10.0 +2024-09-17 04:33:44,477 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.27 vs. limit=22.5 +2024-09-17 04:33:51,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.71 vs. limit=15.0 +2024-09-17 04:34:06,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=147360.0, ans=0.2 +2024-09-17 04:34:09,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=147360.0, ans=0.1 +2024-09-17 04:34:14,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=147360.0, ans=0.07 +2024-09-17 04:34:20,310 INFO [train.py:1198] (1/2) Epoch 9, batch 650, loss[loss=0.2657, ctc_loss=0.166, cr_loss=0.3901, attn_decoder_loss=0.2681, over 29745.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1806, cr_loss=0.4131, attn_decoder_loss=0.2714, over 5586025.90 frames. ], batch size: 81, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:34:20,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=147400.0, ans=0.125 +2024-09-17 04:34:32,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=147400.0, ans=0.1 +2024-09-17 04:34:35,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=147440.0, ans=0.07 +2024-09-17 04:34:40,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=147440.0, ans=0.125 +2024-09-17 04:34:54,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=147480.0, ans=0.125 +2024-09-17 04:34:58,647 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.437e+01 9.683e+01 1.026e+02 1.151e+02 1.521e+02, threshold=2.052e+02, percent-clipped=0.0 +2024-09-17 04:35:16,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.42 vs. limit=15.0 +2024-09-17 04:35:19,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.77 vs. limit=15.0 +2024-09-17 04:35:29,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=147560.0, ans=0.0 +2024-09-17 04:35:36,640 INFO [train.py:1198] (1/2) Epoch 9, batch 700, loss[loss=0.2691, ctc_loss=0.1824, cr_loss=0.4198, attn_decoder_loss=0.2695, over 29518.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1816, cr_loss=0.415, attn_decoder_loss=0.2722, over 5636280.61 frames. ], batch size: 76, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:35:52,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=147640.0, ans=0.0 +2024-09-17 04:35:56,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=147640.0, ans=0.1 +2024-09-17 04:36:01,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=147640.0, ans=0.125 +2024-09-17 04:36:19,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=147680.0, ans=0.0 +2024-09-17 04:36:33,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=147720.0, ans=0.09899494936611666 +2024-09-17 04:36:33,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=147720.0, ans=0.0 +2024-09-17 04:36:52,804 INFO [train.py:1198] (1/2) Epoch 9, batch 750, loss[loss=0.2598, ctc_loss=0.1617, cr_loss=0.3821, attn_decoder_loss=0.2622, over 29716.00 frames. ], tot_loss[loss=0.2711, ctc_loss=0.1813, cr_loss=0.4146, attn_decoder_loss=0.2719, over 5674886.00 frames. ], batch size: 82, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:36:56,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=147800.0, ans=0.025 +2024-09-17 04:37:05,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=147800.0, ans=0.2 +2024-09-17 04:37:23,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=147880.0, ans=0.125 +2024-09-17 04:37:37,021 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.147e+01 9.690e+01 1.045e+02 1.120e+02 4.390e+02, threshold=2.090e+02, percent-clipped=1.0 +2024-09-17 04:37:38,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=147880.0, ans=0.025 +2024-09-17 04:37:38,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=147880.0, ans=0.125 +2024-09-17 04:38:13,960 INFO [train.py:1198] (1/2) Epoch 9, batch 800, loss[loss=0.2539, ctc_loss=0.1631, cr_loss=0.3909, attn_decoder_loss=0.2553, over 29606.00 frames. ], tot_loss[loss=0.2711, ctc_loss=0.1813, cr_loss=0.4146, attn_decoder_loss=0.2719, over 5705865.30 frames. ], batch size: 73, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:38:17,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=148000.0, ans=0.125 +2024-09-17 04:38:35,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=148040.0, ans=0.125 +2024-09-17 04:38:50,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=148080.0, ans=0.0 +2024-09-17 04:39:18,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=148160.0, ans=0.025 +2024-09-17 04:39:29,746 INFO [train.py:1198] (1/2) Epoch 9, batch 850, loss[loss=0.2838, ctc_loss=0.1899, cr_loss=0.4243, attn_decoder_loss=0.2848, over 29728.00 frames. ], tot_loss[loss=0.2711, ctc_loss=0.1813, cr_loss=0.4147, attn_decoder_loss=0.2719, over 5735268.33 frames. ], batch size: 89, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:39:45,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.39 vs. limit=15.0 +2024-09-17 04:39:50,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=148240.0, ans=0.125 +2024-09-17 04:40:03,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.79 vs. limit=6.0 +2024-09-17 04:40:04,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=148280.0, ans=0.125 +2024-09-17 04:40:09,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=148280.0, ans=0.05 +2024-09-17 04:40:10,332 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.624e+01 1.050e+02 1.134e+02 2.702e+02, threshold=2.101e+02, percent-clipped=1.0 +2024-09-17 04:40:23,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.12 vs. limit=15.0 +2024-09-17 04:40:30,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=148360.0, ans=0.025 +2024-09-17 04:40:33,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=148360.0, ans=0.07 +2024-09-17 04:40:36,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=148360.0, ans=0.1 +2024-09-17 04:40:39,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=148360.0, ans=0.025 +2024-09-17 04:40:45,748 INFO [train.py:1198] (1/2) Epoch 9, batch 900, loss[loss=0.2433, ctc_loss=0.1571, cr_loss=0.3765, attn_decoder_loss=0.2445, over 29589.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1814, cr_loss=0.4151, attn_decoder_loss=0.272, over 5740660.39 frames. ], batch size: 73, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:41:04,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=148440.0, ans=0.025 +2024-09-17 04:41:04,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=148440.0, ans=0.125 +2024-09-17 04:41:26,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=148480.0, ans=0.0 +2024-09-17 04:41:47,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=148520.0, ans=0.05 +2024-09-17 04:41:48,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=148520.0, ans=0.0 +2024-09-17 04:42:06,875 INFO [train.py:1198] (1/2) Epoch 9, batch 950, loss[loss=0.2546, ctc_loss=0.1641, cr_loss=0.3917, attn_decoder_loss=0.256, over 29493.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.181, cr_loss=0.4142, attn_decoder_loss=0.2722, over 5742983.50 frames. ], batch size: 74, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:42:30,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.23 vs. limit=15.0 +2024-09-17 04:42:34,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=148640.0, ans=0.0 +2024-09-17 04:42:43,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=148680.0, ans=0.125 +2024-09-17 04:42:49,651 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.631e+01 1.018e+02 1.126e+02 1.313e+02 4.383e+02, threshold=2.253e+02, percent-clipped=5.0 +2024-09-17 04:42:50,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=148680.0, ans=0.125 +2024-09-17 04:43:23,703 INFO [train.py:1198] (1/2) Epoch 9, batch 1000, loss[loss=0.26, ctc_loss=0.1727, cr_loss=0.4159, attn_decoder_loss=0.2604, over 29515.00 frames. ], tot_loss[loss=0.272, ctc_loss=0.182, cr_loss=0.4148, attn_decoder_loss=0.2728, over 5736410.92 frames. ], batch size: 77, lr: 1.27e-02, grad_scale: 8.0 +2024-09-17 04:43:25,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=148800.0, ans=0.125 +2024-09-17 04:43:40,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=148840.0, ans=0.125 +2024-09-17 04:44:39,600 INFO [train.py:1198] (1/2) Epoch 9, batch 1050, loss[loss=0.2772, ctc_loss=0.1869, cr_loss=0.4333, attn_decoder_loss=0.2776, over 29671.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1811, cr_loss=0.4128, attn_decoder_loss=0.272, over 5743933.85 frames. ], batch size: 85, lr: 1.27e-02, grad_scale: 4.0 +2024-09-17 04:44:49,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=149000.0, ans=0.0 +2024-09-17 04:44:50,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=149000.0, ans=0.1 +2024-09-17 04:45:12,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.90 vs. limit=12.0 +2024-09-17 04:45:26,427 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.757e+01 9.706e+01 1.051e+02 1.142e+02 2.250e+02, threshold=2.101e+02, percent-clipped=0.0 +2024-09-17 04:46:00,304 INFO [train.py:1198] (1/2) Epoch 9, batch 1100, loss[loss=0.2677, ctc_loss=0.1795, cr_loss=0.4101, attn_decoder_loss=0.2684, over 29469.00 frames. ], tot_loss[loss=0.2707, ctc_loss=0.1807, cr_loss=0.4127, attn_decoder_loss=0.2715, over 5756952.76 frames. ], batch size: 78, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:46:06,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=149200.0, ans=0.125 +2024-09-17 04:46:59,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=149360.0, ans=0.025 +2024-09-17 04:47:07,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=149360.0, ans=0.125 +2024-09-17 04:47:11,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=149360.0, ans=0.125 +2024-09-17 04:47:13,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.87 vs. limit=15.0 +2024-09-17 04:47:16,064 INFO [train.py:1198] (1/2) Epoch 9, batch 1150, loss[loss=0.2614, ctc_loss=0.1759, cr_loss=0.4024, attn_decoder_loss=0.262, over 29408.00 frames. ], tot_loss[loss=0.2708, ctc_loss=0.1809, cr_loss=0.4132, attn_decoder_loss=0.2716, over 5754260.11 frames. ], batch size: 78, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:47:16,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=149400.0, ans=0.1 +2024-09-17 04:47:16,994 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.69 vs. limit=15.0 +2024-09-17 04:47:21,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=149400.0, ans=0.0 +2024-09-17 04:48:01,928 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.308e+01 9.807e+01 1.085e+02 1.342e+02 2.441e+02, threshold=2.171e+02, percent-clipped=4.0 +2024-09-17 04:48:05,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=149520.0, ans=0.125 +2024-09-17 04:48:19,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=149560.0, ans=0.125 +2024-09-17 04:48:33,335 INFO [train.py:1198] (1/2) Epoch 9, batch 1200, loss[loss=0.2772, ctc_loss=0.1855, cr_loss=0.4142, attn_decoder_loss=0.2782, over 29673.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.1817, cr_loss=0.4139, attn_decoder_loss=0.2725, over 5745955.91 frames. ], batch size: 85, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:48:35,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=149600.0, ans=0.025 +2024-09-17 04:48:54,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=149640.0, ans=10.0 +2024-09-17 04:48:57,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=149640.0, ans=0.125 +2024-09-17 04:48:57,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=149640.0, ans=0.0 +2024-09-17 04:49:27,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=149720.0, ans=0.125 +2024-09-17 04:49:30,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=149720.0, ans=0.125 +2024-09-17 04:49:50,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=149760.0, ans=0.0 +2024-09-17 04:49:52,905 INFO [train.py:1198] (1/2) Epoch 9, batch 1250, loss[loss=0.2866, ctc_loss=0.1962, cr_loss=0.4539, attn_decoder_loss=0.2866, over 29538.00 frames. ], tot_loss[loss=0.2723, ctc_loss=0.1817, cr_loss=0.4152, attn_decoder_loss=0.2731, over 5774492.94 frames. ], batch size: 92, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:49:56,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=149800.0, ans=0.125 +2024-09-17 04:50:04,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=149800.0, ans=15.0 +2024-09-17 04:50:10,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=149840.0, ans=0.125 +2024-09-17 04:50:11,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=149840.0, ans=0.0 +2024-09-17 04:50:22,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=149880.0, ans=0.1 +2024-09-17 04:50:22,669 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.95 vs. limit=22.5 +2024-09-17 04:50:35,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=149880.0, ans=0.125 +2024-09-17 04:50:38,401 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.591e+01 1.046e+02 1.160e+02 1.832e+02, threshold=2.092e+02, percent-clipped=0.0 +2024-09-17 04:50:41,897 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:50:49,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=149920.0, ans=0.0 +2024-09-17 04:51:00,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=149960.0, ans=0.125 +2024-09-17 04:51:08,746 INFO [train.py:1198] (1/2) Epoch 9, batch 1300, loss[loss=0.2793, ctc_loss=0.1873, cr_loss=0.4153, attn_decoder_loss=0.2803, over 28076.00 frames. ], tot_loss[loss=0.2715, ctc_loss=0.1811, cr_loss=0.4141, attn_decoder_loss=0.2723, over 5779760.84 frames. ], batch size: 111, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:51:24,588 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.83 vs. limit=6.0 +2024-09-17 04:51:33,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=150040.0, ans=0.0 +2024-09-17 04:51:45,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=150080.0, ans=0.125 +2024-09-17 04:51:51,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=150080.0, ans=0.125 +2024-09-17 04:52:05,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=150120.0, ans=0.125 +2024-09-17 04:52:15,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=150160.0, ans=0.125 +2024-09-17 04:52:21,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=150160.0, ans=0.05 +2024-09-17 04:52:24,370 INFO [train.py:1198] (1/2) Epoch 9, batch 1350, loss[loss=0.2673, ctc_loss=0.169, cr_loss=0.4103, attn_decoder_loss=0.2691, over 29755.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1804, cr_loss=0.4138, attn_decoder_loss=0.2718, over 5798251.29 frames. ], batch size: 81, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:52:30,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=150200.0, ans=0.0 +2024-09-17 04:52:36,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=150200.0, ans=0.125 +2024-09-17 04:53:01,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.89 vs. limit=22.5 +2024-09-17 04:53:11,517 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.083e+01 9.658e+01 1.049e+02 1.137e+02 1.500e+02, threshold=2.097e+02, percent-clipped=0.0 +2024-09-17 04:53:12,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.46 vs. limit=15.0 +2024-09-17 04:53:19,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=150320.0, ans=0.0 +2024-09-17 04:53:19,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=150320.0, ans=0.125 +2024-09-17 04:53:22,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=150320.0, ans=0.1 +2024-09-17 04:53:44,221 INFO [train.py:1198] (1/2) Epoch 9, batch 1400, loss[loss=0.2333, ctc_loss=0.1533, cr_loss=0.3759, attn_decoder_loss=0.2338, over 29574.00 frames. ], tot_loss[loss=0.2704, ctc_loss=0.1798, cr_loss=0.4131, attn_decoder_loss=0.2713, over 5809140.70 frames. ], batch size: 69, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:54:12,932 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 04:54:18,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=150480.0, ans=0.0 +2024-09-17 04:54:46,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=150560.0, ans=0.0 +2024-09-17 04:54:59,194 INFO [train.py:1198] (1/2) Epoch 9, batch 1450, loss[loss=0.2836, ctc_loss=0.181, cr_loss=0.4167, attn_decoder_loss=0.2857, over 29392.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1801, cr_loss=0.4139, attn_decoder_loss=0.2719, over 5805107.72 frames. ], batch size: 94, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:54:59,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=150600.0, ans=0.125 +2024-09-17 04:55:13,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.99 vs. limit=15.0 +2024-09-17 04:55:16,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=150640.0, ans=0.125 +2024-09-17 04:55:32,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=150680.0, ans=0.125 +2024-09-17 04:55:45,590 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.347e+01 1.006e+02 1.117e+02 1.243e+02 2.760e+02, threshold=2.234e+02, percent-clipped=2.0 +2024-09-17 04:56:06,233 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.28 vs. limit=15.0 +2024-09-17 04:56:10,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.66 vs. limit=15.0 +2024-09-17 04:56:14,388 INFO [train.py:1198] (1/2) Epoch 9, batch 1500, loss[loss=0.2809, ctc_loss=0.1874, cr_loss=0.439, attn_decoder_loss=0.2816, over 29624.00 frames. ], tot_loss[loss=0.2715, ctc_loss=0.1805, cr_loss=0.4143, attn_decoder_loss=0.2724, over 5805090.56 frames. ], batch size: 86, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:56:20,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=150800.0, ans=0.125 +2024-09-17 04:56:28,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=150840.0, ans=0.025 +2024-09-17 04:56:42,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=150840.0, ans=0.125 +2024-09-17 04:56:43,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.55 vs. limit=10.0 +2024-09-17 04:56:47,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=150880.0, ans=0.125 +2024-09-17 04:57:03,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.57 vs. limit=10.0 +2024-09-17 04:57:06,050 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.35 vs. limit=15.0 +2024-09-17 04:57:09,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=150920.0, ans=0.125 +2024-09-17 04:57:27,787 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.58 vs. limit=15.0 +2024-09-17 04:57:34,242 INFO [train.py:1198] (1/2) Epoch 9, batch 1550, loss[loss=0.2838, ctc_loss=0.1886, cr_loss=0.4046, attn_decoder_loss=0.2854, over 29502.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1807, cr_loss=0.4142, attn_decoder_loss=0.2723, over 5781370.29 frames. ], batch size: 90, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 04:57:45,825 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.80 vs. limit=22.5 +2024-09-17 04:57:58,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=151040.0, ans=0.125 +2024-09-17 04:58:16,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.47 vs. limit=15.0 +2024-09-17 04:58:19,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=151120.0, ans=0.0 +2024-09-17 04:58:22,181 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 9.832e+01 1.106e+02 1.253e+02 2.763e+02, threshold=2.212e+02, percent-clipped=1.0 +2024-09-17 04:58:40,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=151160.0, ans=0.125 +2024-09-17 04:58:47,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.54 vs. limit=22.5 +2024-09-17 04:58:49,790 INFO [train.py:1198] (1/2) Epoch 9, batch 1600, loss[loss=0.2718, ctc_loss=0.1773, cr_loss=0.4217, attn_decoder_loss=0.2729, over 29676.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.181, cr_loss=0.4147, attn_decoder_loss=0.2725, over 5764414.90 frames. ], batch size: 85, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 04:58:50,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=151200.0, ans=0.125 +2024-09-17 04:58:57,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=151200.0, ans=0.0 +2024-09-17 04:59:02,662 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.62 vs. limit=15.0 +2024-09-17 04:59:08,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=151240.0, ans=0.125 +2024-09-17 04:59:18,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=151280.0, ans=0.1 +2024-09-17 04:59:40,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.20 vs. limit=15.0 +2024-09-17 04:59:53,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=151360.0, ans=0.125 +2024-09-17 05:00:00,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.27 vs. limit=22.5 +2024-09-17 05:00:00,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=151360.0, ans=0.0 +2024-09-17 05:00:05,104 INFO [train.py:1198] (1/2) Epoch 9, batch 1650, loss[loss=0.2741, ctc_loss=0.1829, cr_loss=0.4031, attn_decoder_loss=0.2753, over 29700.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1807, cr_loss=0.414, attn_decoder_loss=0.2722, over 5759747.46 frames. ], batch size: 89, lr: 1.26e-02, grad_scale: 4.0 +2024-09-17 05:00:05,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=151400.0, ans=0.0 +2024-09-17 05:00:12,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=151400.0, ans=0.0 +2024-09-17 05:00:15,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=151400.0, ans=10.0 +2024-09-17 05:00:57,020 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.805e+01 9.589e+01 1.020e+02 1.089e+02 1.544e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 05:01:15,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=151560.0, ans=0.125 +2024-09-17 05:01:16,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=151560.0, ans=0.125 +2024-09-17 05:01:16,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=151560.0, ans=0.0 +2024-09-17 05:01:23,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.61 vs. limit=10.0 +2024-09-17 05:01:24,480 INFO [train.py:1198] (1/2) Epoch 9, batch 1700, loss[loss=0.2429, ctc_loss=0.1514, cr_loss=0.3807, attn_decoder_loss=0.2446, over 29550.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1802, cr_loss=0.4142, attn_decoder_loss=0.2719, over 5780599.69 frames. ], batch size: 69, lr: 1.26e-02, grad_scale: 8.0 +2024-09-17 05:01:25,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.32 vs. limit=15.0 +2024-09-17 05:01:53,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=151680.0, ans=10.0 +2024-09-17 05:01:54,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=151680.0, ans=0.0 +2024-09-17 05:02:06,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=151680.0, ans=0.5 +2024-09-17 05:02:08,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=151720.0, ans=0.2 +2024-09-17 05:02:36,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=151760.0, ans=0.1 +2024-09-17 05:02:39,563 INFO [train.py:1198] (1/2) Epoch 9, batch 1750, loss[loss=0.2325, ctc_loss=0.1542, cr_loss=0.3765, attn_decoder_loss=0.2329, over 29360.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1793, cr_loss=0.4126, attn_decoder_loss=0.271, over 5788504.33 frames. ], batch size: 67, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:02:44,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=151800.0, ans=0.125 +2024-09-17 05:03:11,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=151880.0, ans=0.125 +2024-09-17 05:03:27,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.90 vs. limit=15.0 +2024-09-17 05:03:30,571 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.963e+01 9.433e+01 1.015e+02 1.120e+02 2.449e+02, threshold=2.030e+02, percent-clipped=1.0 +2024-09-17 05:03:33,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.79 vs. limit=15.0 +2024-09-17 05:03:35,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.85 vs. limit=12.0 +2024-09-17 05:03:41,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=151960.0, ans=0.05 +2024-09-17 05:03:54,781 INFO [train.py:1198] (1/2) Epoch 9, batch 1800, loss[loss=0.2845, ctc_loss=0.1942, cr_loss=0.423, attn_decoder_loss=0.2851, over 29707.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.18, cr_loss=0.4131, attn_decoder_loss=0.2715, over 5790897.89 frames. ], batch size: 83, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:03:59,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=152000.0, ans=0.125 +2024-09-17 05:04:10,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=152040.0, ans=0.0 +2024-09-17 05:04:23,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=152080.0, ans=0.0 +2024-09-17 05:04:27,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.89 vs. limit=15.0 +2024-09-17 05:04:52,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=152120.0, ans=0.1 +2024-09-17 05:04:54,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=152120.0, ans=0.0 +2024-09-17 05:04:58,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=152160.0, ans=0.1 +2024-09-17 05:05:03,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=152160.0, ans=0.125 +2024-09-17 05:05:12,096 INFO [train.py:1198] (1/2) Epoch 9, batch 1850, loss[loss=0.2863, ctc_loss=0.1997, cr_loss=0.4287, attn_decoder_loss=0.2864, over 29619.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1798, cr_loss=0.4131, attn_decoder_loss=0.2714, over 5796565.53 frames. ], batch size: 86, lr: 1.25e-02, grad_scale: 4.0 +2024-09-17 05:05:30,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=152240.0, ans=0.125 +2024-09-17 05:05:34,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.43 vs. limit=6.0 +2024-09-17 05:05:45,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=152280.0, ans=0.1 +2024-09-17 05:05:46,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=152280.0, ans=0.0 +2024-09-17 05:05:55,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.52 vs. limit=22.5 +2024-09-17 05:06:06,697 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.561e+01 1.000e+02 1.112e+02 1.269e+02 1.875e+02, threshold=2.225e+02, percent-clipped=0.0 +2024-09-17 05:06:29,014 INFO [train.py:1198] (1/2) Epoch 9, batch 1900, loss[loss=0.286, ctc_loss=0.1871, cr_loss=0.4318, attn_decoder_loss=0.2874, over 29722.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1805, cr_loss=0.4145, attn_decoder_loss=0.2722, over 5804574.49 frames. ], batch size: 89, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:06:29,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=152400.0, ans=0.125 +2024-09-17 05:06:40,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.30 vs. limit=22.5 +2024-09-17 05:06:44,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=152440.0, ans=0.125 +2024-09-17 05:07:01,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=152480.0, ans=0.125 +2024-09-17 05:07:04,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=152480.0, ans=0.2 +2024-09-17 05:07:34,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=152560.0, ans=0.125 +2024-09-17 05:07:38,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=152560.0, ans=0.0 +2024-09-17 05:07:44,262 INFO [train.py:1198] (1/2) Epoch 9, batch 1950, loss[loss=0.2618, ctc_loss=0.1774, cr_loss=0.4065, attn_decoder_loss=0.2622, over 29482.00 frames. ], tot_loss[loss=0.2724, ctc_loss=0.1813, cr_loss=0.4154, attn_decoder_loss=0.2733, over 5818749.34 frames. ], batch size: 78, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:07:46,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=152600.0, ans=0.125 +2024-09-17 05:08:10,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.76 vs. limit=15.0 +2024-09-17 05:08:38,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.49 vs. limit=15.0 +2024-09-17 05:08:40,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.742e+01 1.027e+02 1.111e+02 1.388e+02, threshold=2.054e+02, percent-clipped=0.0 +2024-09-17 05:08:49,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=152760.0, ans=0.125 +2024-09-17 05:08:57,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.48 vs. limit=15.0 +2024-09-17 05:09:01,813 INFO [train.py:1198] (1/2) Epoch 9, batch 2000, loss[loss=0.2393, ctc_loss=0.1553, cr_loss=0.4053, attn_decoder_loss=0.2396, over 29345.00 frames. ], tot_loss[loss=0.2729, ctc_loss=0.1819, cr_loss=0.4167, attn_decoder_loss=0.2738, over 5795500.62 frames. ], batch size: 67, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:09:23,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=152840.0, ans=0.125 +2024-09-17 05:09:31,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=152840.0, ans=0.125 +2024-09-17 05:09:31,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=152840.0, ans=0.2 +2024-09-17 05:09:43,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=152880.0, ans=0.09899494936611666 +2024-09-17 05:10:04,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=152960.0, ans=0.0 +2024-09-17 05:10:11,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=152960.0, ans=0.2 +2024-09-17 05:10:19,048 INFO [train.py:1198] (1/2) Epoch 9, batch 2050, loss[loss=0.2381, ctc_loss=0.1538, cr_loss=0.3855, attn_decoder_loss=0.2389, over 29441.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.1808, cr_loss=0.4146, attn_decoder_loss=0.2726, over 5787993.73 frames. ], batch size: 70, lr: 1.25e-02, grad_scale: 4.0 +2024-09-17 05:10:20,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=153000.0, ans=0.125 +2024-09-17 05:10:29,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=153000.0, ans=0.1 +2024-09-17 05:10:51,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=153080.0, ans=0.125 +2024-09-17 05:10:51,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=153080.0, ans=0.5 +2024-09-17 05:11:14,184 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.89 vs. limit=15.0 +2024-09-17 05:11:15,050 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.385e+01 9.413e+01 1.004e+02 1.102e+02 4.512e+02, threshold=2.009e+02, percent-clipped=3.0 +2024-09-17 05:11:18,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=153160.0, ans=0.125 +2024-09-17 05:11:33,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=153200.0, ans=0.125 +2024-09-17 05:11:34,699 INFO [train.py:1198] (1/2) Epoch 9, batch 2100, loss[loss=0.2813, ctc_loss=0.1921, cr_loss=0.4389, attn_decoder_loss=0.2814, over 29747.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1801, cr_loss=0.4133, attn_decoder_loss=0.2719, over 5799087.36 frames. ], batch size: 81, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:11:46,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.56 vs. limit=10.0 +2024-09-17 05:11:55,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=153240.0, ans=0.125 +2024-09-17 05:12:02,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=153240.0, ans=0.0 +2024-09-17 05:12:09,508 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:12:39,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=153360.0, ans=0.125 +2024-09-17 05:12:39,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=153360.0, ans=0.0 +2024-09-17 05:12:39,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=153360.0, ans=0.125 +2024-09-17 05:12:48,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=153360.0, ans=0.2 +2024-09-17 05:12:51,521 INFO [train.py:1198] (1/2) Epoch 9, batch 2150, loss[loss=0.2638, ctc_loss=0.1751, cr_loss=0.4112, attn_decoder_loss=0.2645, over 29443.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1793, cr_loss=0.4124, attn_decoder_loss=0.2712, over 5813603.10 frames. ], batch size: 78, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:13:17,445 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.18 vs. limit=15.0 +2024-09-17 05:13:21,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.25 vs. limit=15.0 +2024-09-17 05:13:38,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.95 vs. limit=22.5 +2024-09-17 05:13:51,014 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.919e+01 9.836e+01 1.055e+02 1.144e+02 2.218e+02, threshold=2.111e+02, percent-clipped=2.0 +2024-09-17 05:13:54,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=153560.0, ans=0.125 +2024-09-17 05:14:09,669 INFO [train.py:1198] (1/2) Epoch 9, batch 2200, loss[loss=0.2889, ctc_loss=0.1884, cr_loss=0.425, attn_decoder_loss=0.2906, over 29606.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1794, cr_loss=0.4125, attn_decoder_loss=0.2715, over 5810278.91 frames. ], batch size: 86, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:14:20,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=153600.0, ans=0.125 +2024-09-17 05:14:29,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=153640.0, ans=0.125 +2024-09-17 05:14:30,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=153640.0, ans=0.1 +2024-09-17 05:14:30,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=153640.0, ans=0.125 +2024-09-17 05:14:41,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=153680.0, ans=0.125 +2024-09-17 05:14:43,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=153680.0, ans=0.0 +2024-09-17 05:15:02,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=153720.0, ans=0.0 +2024-09-17 05:15:25,270 INFO [train.py:1198] (1/2) Epoch 9, batch 2250, loss[loss=0.2639, ctc_loss=0.1696, cr_loss=0.3806, attn_decoder_loss=0.2659, over 29700.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1789, cr_loss=0.4123, attn_decoder_loss=0.2713, over 5809331.57 frames. ], batch size: 82, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:15:30,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=153800.0, ans=0.125 +2024-09-17 05:15:31,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=153800.0, ans=0.125 +2024-09-17 05:15:40,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=153840.0, ans=0.125 +2024-09-17 05:15:54,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.whiten.whitening_limit, batch_count=153880.0, ans=12.0 +2024-09-17 05:16:24,410 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.430e+01 9.555e+01 1.015e+02 1.096e+02 3.730e+02, threshold=2.031e+02, percent-clipped=3.0 +2024-09-17 05:16:26,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=153960.0, ans=0.1 +2024-09-17 05:16:30,014 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.05 vs. limit=15.0 +2024-09-17 05:16:39,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=153960.0, ans=0.125 +2024-09-17 05:16:42,434 INFO [train.py:1198] (1/2) Epoch 9, batch 2300, loss[loss=0.2432, ctc_loss=0.1636, cr_loss=0.4104, attn_decoder_loss=0.2429, over 29339.00 frames. ], tot_loss[loss=0.2694, ctc_loss=0.1783, cr_loss=0.4115, attn_decoder_loss=0.2704, over 5797439.34 frames. ], batch size: 71, lr: 1.25e-02, grad_scale: 8.0 +2024-09-17 05:16:42,745 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:16:51,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=154000.0, ans=0.125 +2024-09-17 05:17:12,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=154040.0, ans=0.025 +2024-09-17 05:17:50,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=154160.0, ans=0.025 +2024-09-17 05:18:02,036 INFO [train.py:1198] (1/2) Epoch 9, batch 2350, loss[loss=0.2782, ctc_loss=0.1811, cr_loss=0.4441, attn_decoder_loss=0.2791, over 29691.00 frames. ], tot_loss[loss=0.2692, ctc_loss=0.1779, cr_loss=0.411, attn_decoder_loss=0.2702, over 5802564.28 frames. ], batch size: 83, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:18:11,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=154200.0, ans=0.125 +2024-09-17 05:18:12,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=154200.0, ans=0.125 +2024-09-17 05:18:20,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=154240.0, ans=0.125 +2024-09-17 05:18:26,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=154240.0, ans=0.025 +2024-09-17 05:18:31,018 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:18:40,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=154280.0, ans=0.125 +2024-09-17 05:18:55,399 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:18:59,585 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.337e+01 9.484e+01 1.020e+02 1.101e+02 1.845e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 05:19:17,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-17 05:19:18,463 INFO [train.py:1198] (1/2) Epoch 9, batch 2400, loss[loss=0.259, ctc_loss=0.17, cr_loss=0.4139, attn_decoder_loss=0.2597, over 29535.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.1783, cr_loss=0.4119, attn_decoder_loss=0.271, over 5806691.18 frames. ], batch size: 76, lr: 1.24e-02, grad_scale: 16.0 +2024-09-17 05:19:24,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=154400.0, ans=0.125 +2024-09-17 05:19:52,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=154480.0, ans=0.5 +2024-09-17 05:20:36,508 INFO [train.py:1198] (1/2) Epoch 9, batch 2450, loss[loss=0.2766, ctc_loss=0.1961, cr_loss=0.4404, attn_decoder_loss=0.2758, over 29699.00 frames. ], tot_loss[loss=0.271, ctc_loss=0.1795, cr_loss=0.4122, attn_decoder_loss=0.272, over 5785022.21 frames. ], batch size: 82, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:20:50,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=154640.0, ans=0.125 +2024-09-17 05:20:57,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=154640.0, ans=0.125 +2024-09-17 05:20:57,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=154640.0, ans=0.2 +2024-09-17 05:21:34,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.10 vs. limit=15.0 +2024-09-17 05:21:38,516 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.373e+01 9.786e+01 1.038e+02 1.229e+02 2.658e+02, threshold=2.076e+02, percent-clipped=2.0 +2024-09-17 05:21:41,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.25 vs. limit=12.0 +2024-09-17 05:21:49,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.97 vs. limit=22.5 +2024-09-17 05:21:50,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.50 vs. limit=15.0 +2024-09-17 05:21:53,783 INFO [train.py:1198] (1/2) Epoch 9, batch 2500, loss[loss=0.2711, ctc_loss=0.1724, cr_loss=0.399, attn_decoder_loss=0.2732, over 29636.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1793, cr_loss=0.4127, attn_decoder_loss=0.2716, over 5794998.65 frames. ], batch size: 86, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:21:57,780 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.78 vs. limit=10.0 +2024-09-17 05:22:00,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.37 vs. limit=12.0 +2024-09-17 05:22:30,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=154880.0, ans=0.2 +2024-09-17 05:22:47,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=154920.0, ans=0.2 +2024-09-17 05:23:09,598 INFO [train.py:1198] (1/2) Epoch 9, batch 2550, loss[loss=0.2477, ctc_loss=0.1591, cr_loss=0.3961, attn_decoder_loss=0.2488, over 29340.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1792, cr_loss=0.4123, attn_decoder_loss=0.2715, over 5798202.33 frames. ], batch size: 67, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:23:12,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=155000.0, ans=0.125 +2024-09-17 05:23:15,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=155000.0, ans=0.1 +2024-09-17 05:23:40,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=155080.0, ans=0.0 +2024-09-17 05:23:43,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-17 05:24:09,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=155120.0, ans=0.125 +2024-09-17 05:24:12,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.204e+01 9.986e+01 1.053e+02 1.251e+02 2.083e+02, threshold=2.107e+02, percent-clipped=1.0 +2024-09-17 05:24:22,371 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-17 05:24:28,037 INFO [train.py:1198] (1/2) Epoch 9, batch 2600, loss[loss=0.2656, ctc_loss=0.1742, cr_loss=0.4019, attn_decoder_loss=0.2668, over 29411.00 frames. ], tot_loss[loss=0.2708, ctc_loss=0.1792, cr_loss=0.4128, attn_decoder_loss=0.2718, over 5793612.82 frames. ], batch size: 78, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:24:31,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=155200.0, ans=0.1 +2024-09-17 05:24:42,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=155240.0, ans=0.1 +2024-09-17 05:24:46,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=155240.0, ans=0.0 +2024-09-17 05:24:53,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=155240.0, ans=0.125 +2024-09-17 05:25:00,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=155280.0, ans=0.05 +2024-09-17 05:25:19,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.72 vs. limit=15.0 +2024-09-17 05:25:26,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=155320.0, ans=0.0 +2024-09-17 05:25:32,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=155360.0, ans=0.125 +2024-09-17 05:25:35,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=155360.0, ans=0.0 +2024-09-17 05:25:45,384 INFO [train.py:1198] (1/2) Epoch 9, batch 2650, loss[loss=0.2923, ctc_loss=0.197, cr_loss=0.4406, attn_decoder_loss=0.2931, over 29225.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1795, cr_loss=0.413, attn_decoder_loss=0.2724, over 5800223.01 frames. ], batch size: 100, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:25:59,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=155440.0, ans=0.125 +2024-09-17 05:26:11,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=155440.0, ans=0.0 +2024-09-17 05:26:32,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.14 vs. limit=15.0 +2024-09-17 05:26:37,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=155520.0, ans=0.09899494936611666 +2024-09-17 05:26:47,529 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.070e+01 9.715e+01 1.022e+02 1.111e+02 3.079e+02, threshold=2.044e+02, percent-clipped=1.0 +2024-09-17 05:26:49,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=155560.0, ans=0.0 +2024-09-17 05:27:01,161 INFO [train.py:1198] (1/2) Epoch 9, batch 2700, loss[loss=0.2758, ctc_loss=0.1798, cr_loss=0.4073, attn_decoder_loss=0.2774, over 29538.00 frames. ], tot_loss[loss=0.2714, ctc_loss=0.1793, cr_loss=0.4131, attn_decoder_loss=0.2724, over 5796703.11 frames. ], batch size: 87, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:27:13,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=155600.0, ans=0.125 +2024-09-17 05:27:14,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=155640.0, ans=0.125 +2024-09-17 05:27:32,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys.whitening_limit, batch_count=155680.0, ans=6.0 +2024-09-17 05:27:49,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=155720.0, ans=0.125 +2024-09-17 05:27:50,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=155720.0, ans=0.2 +2024-09-17 05:28:14,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=155760.0, ans=0.2 +2024-09-17 05:28:15,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.19 vs. limit=15.0 +2024-09-17 05:28:19,242 INFO [train.py:1198] (1/2) Epoch 9, batch 2750, loss[loss=0.2538, ctc_loss=0.1698, cr_loss=0.404, attn_decoder_loss=0.2542, over 29503.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1788, cr_loss=0.4123, attn_decoder_loss=0.2712, over 5795249.60 frames. ], batch size: 75, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:28:43,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=155840.0, ans=0.0 +2024-09-17 05:29:02,893 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-17 05:29:03,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=155880.0, ans=0.09899494936611666 +2024-09-17 05:29:05,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=155920.0, ans=0.07 +2024-09-17 05:29:08,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=155920.0, ans=0.1 +2024-09-17 05:29:25,256 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 9.518e+01 1.047e+02 1.158e+02 3.298e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 05:29:38,021 INFO [train.py:1198] (1/2) Epoch 9, batch 2800, loss[loss=0.3132, ctc_loss=0.2523, cr_loss=0.4335, attn_decoder_loss=0.3103, over 20293.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1792, cr_loss=0.4127, attn_decoder_loss=0.2713, over 5776309.31 frames. ], batch size: 209, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:30:08,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=156080.0, ans=0.125 +2024-09-17 05:30:35,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=156120.0, ans=0.125 +2024-09-17 05:30:44,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=156160.0, ans=0.2 +2024-09-17 05:30:48,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=156160.0, ans=0.0 +2024-09-17 05:30:48,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=156160.0, ans=0.1 +2024-09-17 05:30:52,939 INFO [train.py:1198] (1/2) Epoch 9, batch 2850, loss[loss=0.2561, ctc_loss=0.1695, cr_loss=0.3884, attn_decoder_loss=0.2571, over 29471.00 frames. ], tot_loss[loss=0.2712, ctc_loss=0.1807, cr_loss=0.4146, attn_decoder_loss=0.2721, over 5762599.73 frames. ], batch size: 77, lr: 1.24e-02, grad_scale: 4.0 +2024-09-17 05:31:17,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=156240.0, ans=0.04949747468305833 +2024-09-17 05:31:18,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=156240.0, ans=0.0 +2024-09-17 05:31:25,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=156280.0, ans=0.0 +2024-09-17 05:32:00,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.550e+01 9.773e+01 1.033e+02 1.202e+02 1.627e+02, threshold=2.066e+02, percent-clipped=0.0 +2024-09-17 05:32:10,733 INFO [train.py:1198] (1/2) Epoch 9, batch 2900, loss[loss=0.2662, ctc_loss=0.17, cr_loss=0.4256, attn_decoder_loss=0.2675, over 29437.00 frames. ], tot_loss[loss=0.2721, ctc_loss=0.1809, cr_loss=0.4163, attn_decoder_loss=0.273, over 5787951.73 frames. ], batch size: 79, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:32:13,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.72 vs. limit=15.0 +2024-09-17 05:32:33,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-17 05:32:51,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=156480.0, ans=0.125 +2024-09-17 05:32:52,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=156480.0, ans=0.2 +2024-09-17 05:33:18,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=156560.0, ans=0.2 +2024-09-17 05:33:19,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=156560.0, ans=0.0 +2024-09-17 05:33:28,637 INFO [train.py:1198] (1/2) Epoch 9, batch 2950, loss[loss=0.2556, ctc_loss=0.1654, cr_loss=0.3892, attn_decoder_loss=0.257, over 29511.00 frames. ], tot_loss[loss=0.2708, ctc_loss=0.1801, cr_loss=0.4137, attn_decoder_loss=0.2717, over 5782118.60 frames. ], batch size: 75, lr: 1.24e-02, grad_scale: 8.0 +2024-09-17 05:33:53,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=156640.0, ans=0.2 +2024-09-17 05:34:09,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=156680.0, ans=0.0 +2024-09-17 05:34:28,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=156760.0, ans=0.125 +2024-09-17 05:34:33,740 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.085e+01 9.535e+01 1.020e+02 1.127e+02 2.521e+02, threshold=2.039e+02, percent-clipped=1.0 +2024-09-17 05:34:43,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=156800.0, ans=0.125 +2024-09-17 05:34:44,917 INFO [train.py:1198] (1/2) Epoch 9, batch 3000, loss[loss=0.2614, ctc_loss=0.1679, cr_loss=0.4178, attn_decoder_loss=0.2625, over 29739.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1797, cr_loss=0.4139, attn_decoder_loss=0.2715, over 5782508.88 frames. ], batch size: 81, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:34:44,917 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 05:35:03,247 INFO [train.py:1230] (1/2) Epoch 9, validation: loss=0.2139, ctc_loss=0.05057, cr_loss=4.328e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-17 05:35:03,247 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 05:35:15,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=156800.0, ans=0.0 +2024-09-17 05:35:36,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=156880.0, ans=0.125 +2024-09-17 05:35:40,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=156880.0, ans=0.09899494936611666 +2024-09-17 05:35:46,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=156880.0, ans=0.125 +2024-09-17 05:36:08,288 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:36:12,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=156960.0, ans=0.125 +2024-09-17 05:36:21,549 INFO [train.py:1198] (1/2) Epoch 9, batch 3050, loss[loss=0.2645, ctc_loss=0.1753, cr_loss=0.4479, attn_decoder_loss=0.2645, over 29522.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1802, cr_loss=0.4148, attn_decoder_loss=0.2722, over 5776332.22 frames. ], batch size: 76, lr: 1.23e-02, grad_scale: 4.0 +2024-09-17 05:36:49,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=157040.0, ans=0.125 +2024-09-17 05:36:49,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=157040.0, ans=0.0 +2024-09-17 05:36:51,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=157040.0, ans=0.125 +2024-09-17 05:36:55,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=157080.0, ans=0.125 +2024-09-17 05:37:00,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=157080.0, ans=0.0 +2024-09-17 05:37:06,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=157080.0, ans=0.125 +2024-09-17 05:37:10,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=157120.0, ans=0.125 +2024-09-17 05:37:13,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=157120.0, ans=0.0 +2024-09-17 05:37:28,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=157160.0, ans=0.125 +2024-09-17 05:37:29,759 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.561e+01 1.002e+02 1.065e+02 1.234e+02 3.157e+02, threshold=2.130e+02, percent-clipped=3.0 +2024-09-17 05:37:38,248 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.09 vs. limit=15.0 +2024-09-17 05:37:38,808 INFO [train.py:1198] (1/2) Epoch 9, batch 3100, loss[loss=0.2927, ctc_loss=0.2036, cr_loss=0.4529, attn_decoder_loss=0.2925, over 29313.00 frames. ], tot_loss[loss=0.2708, ctc_loss=0.1799, cr_loss=0.4136, attn_decoder_loss=0.2717, over 5776307.37 frames. ], batch size: 100, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:37:43,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=157200.0, ans=0.07 +2024-09-17 05:37:57,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=157240.0, ans=0.0 +2024-09-17 05:38:13,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=157280.0, ans=0.0 +2024-09-17 05:38:21,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=157280.0, ans=0.0 +2024-09-17 05:38:27,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=157320.0, ans=0.07 +2024-09-17 05:38:42,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=157360.0, ans=0.125 +2024-09-17 05:38:46,327 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.12 vs. limit=15.0 +2024-09-17 05:38:54,755 INFO [train.py:1198] (1/2) Epoch 9, batch 3150, loss[loss=0.2782, ctc_loss=0.1759, cr_loss=0.4189, attn_decoder_loss=0.2803, over 28799.00 frames. ], tot_loss[loss=0.2706, ctc_loss=0.1793, cr_loss=0.4131, attn_decoder_loss=0.2715, over 5782820.29 frames. ], batch size: 104, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:38:58,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=157400.0, ans=0.125 +2024-09-17 05:39:16,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=157440.0, ans=0.125 +2024-09-17 05:39:23,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=157440.0, ans=0.2 +2024-09-17 05:39:29,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=157480.0, ans=0.125 +2024-09-17 05:39:44,126 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:39:44,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=157520.0, ans=0.125 +2024-09-17 05:39:53,695 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.61 vs. limit=15.0 +2024-09-17 05:40:04,838 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.427e+01 1.013e+02 1.077e+02 1.205e+02 2.021e+02, threshold=2.154e+02, percent-clipped=0.0 +2024-09-17 05:40:12,973 INFO [train.py:1198] (1/2) Epoch 9, batch 3200, loss[loss=0.2629, ctc_loss=0.1697, cr_loss=0.4179, attn_decoder_loss=0.264, over 29399.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1791, cr_loss=0.4133, attn_decoder_loss=0.2712, over 5792998.23 frames. ], batch size: 79, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:40:27,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=157640.0, ans=0.125 +2024-09-17 05:40:58,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=157680.0, ans=0.0 +2024-09-17 05:41:15,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.46 vs. limit=6.0 +2024-09-17 05:41:25,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=157760.0, ans=0.125 +2024-09-17 05:41:25,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=15.0 +2024-09-17 05:41:31,249 INFO [train.py:1198] (1/2) Epoch 9, batch 3250, loss[loss=0.2817, ctc_loss=0.1869, cr_loss=0.4156, attn_decoder_loss=0.283, over 29728.00 frames. ], tot_loss[loss=0.2701, ctc_loss=0.1788, cr_loss=0.4132, attn_decoder_loss=0.2711, over 5800529.63 frames. ], batch size: 84, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:41:36,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=157800.0, ans=0.1 +2024-09-17 05:42:12,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.15 vs. limit=6.0 +2024-09-17 05:42:13,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=157880.0, ans=0.125 +2024-09-17 05:42:13,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=157880.0, ans=0.2 +2024-09-17 05:42:21,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=157920.0, ans=0.2 +2024-09-17 05:42:32,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.02 vs. limit=15.0 +2024-09-17 05:42:39,124 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.206e+01 9.558e+01 1.067e+02 1.153e+02 2.320e+02, threshold=2.135e+02, percent-clipped=2.0 +2024-09-17 05:42:45,765 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:42:46,851 INFO [train.py:1198] (1/2) Epoch 9, batch 3300, loss[loss=0.287, ctc_loss=0.1869, cr_loss=0.3941, attn_decoder_loss=0.2893, over 28306.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1781, cr_loss=0.4126, attn_decoder_loss=0.27, over 5796820.07 frames. ], batch size: 111, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:43:01,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=158000.0, ans=0.05 +2024-09-17 05:43:03,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=158040.0, ans=0.025 +2024-09-17 05:43:06,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=3.97 vs. limit=12.0 +2024-09-17 05:43:25,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=158080.0, ans=0.0 +2024-09-17 05:43:42,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=158120.0, ans=0.0 +2024-09-17 05:44:04,438 INFO [train.py:1198] (1/2) Epoch 9, batch 3350, loss[loss=0.2912, ctc_loss=0.1954, cr_loss=0.4306, attn_decoder_loss=0.2923, over 28831.00 frames. ], tot_loss[loss=0.27, ctc_loss=0.1792, cr_loss=0.4134, attn_decoder_loss=0.2709, over 5774259.92 frames. ], batch size: 104, lr: 1.23e-02, grad_scale: 4.0 +2024-09-17 05:44:29,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=158240.0, ans=0.035 +2024-09-17 05:44:30,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.02 vs. limit=22.5 +2024-09-17 05:44:38,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=158280.0, ans=0.1 +2024-09-17 05:44:46,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.51 vs. limit=10.0 +2024-09-17 05:44:47,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=158280.0, ans=0.125 +2024-09-17 05:45:02,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.96 vs. limit=22.5 +2024-09-17 05:45:16,186 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.103e+01 9.844e+01 1.079e+02 1.203e+02 3.746e+02, threshold=2.158e+02, percent-clipped=3.0 +2024-09-17 05:45:22,612 INFO [train.py:1198] (1/2) Epoch 9, batch 3400, loss[loss=0.2343, ctc_loss=0.139, cr_loss=0.3264, attn_decoder_loss=0.2377, over 29339.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1794, cr_loss=0.4133, attn_decoder_loss=0.2711, over 5766551.40 frames. ], batch size: 67, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:45:30,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=158400.0, ans=0.125 +2024-09-17 05:45:33,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=158400.0, ans=0.05 +2024-09-17 05:45:36,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=158440.0, ans=0.125 +2024-09-17 05:45:53,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=158480.0, ans=0.1 +2024-09-17 05:46:11,762 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 05:46:26,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=158560.0, ans=0.0 +2024-09-17 05:46:33,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.29 vs. limit=15.0 +2024-09-17 05:46:38,411 INFO [train.py:1198] (1/2) Epoch 9, batch 3450, loss[loss=0.2821, ctc_loss=0.1876, cr_loss=0.4198, attn_decoder_loss=0.2833, over 28310.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1792, cr_loss=0.4135, attn_decoder_loss=0.2712, over 5774876.43 frames. ], batch size: 111, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:46:48,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=158600.0, ans=0.025 +2024-09-17 05:46:53,595 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.24 vs. limit=15.0 +2024-09-17 05:47:18,926 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.29 vs. limit=22.5 +2024-09-17 05:47:19,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=158680.0, ans=0.0 +2024-09-17 05:47:33,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=158720.0, ans=0.0 +2024-09-17 05:47:39,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.15 vs. limit=10.0 +2024-09-17 05:47:39,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=158760.0, ans=0.07 +2024-09-17 05:47:50,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=158760.0, ans=0.125 +2024-09-17 05:47:50,855 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.43 vs. limit=15.0 +2024-09-17 05:47:51,397 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.572e+01 9.332e+01 9.969e+01 1.060e+02 1.614e+02, threshold=1.994e+02, percent-clipped=0.0 +2024-09-17 05:47:55,998 INFO [train.py:1198] (1/2) Epoch 9, batch 3500, loss[loss=0.245, ctc_loss=0.1604, cr_loss=0.3691, attn_decoder_loss=0.2461, over 29330.00 frames. ], tot_loss[loss=0.2698, ctc_loss=0.1784, cr_loss=0.4119, attn_decoder_loss=0.2708, over 5777532.32 frames. ], batch size: 71, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:48:11,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=158840.0, ans=0.0 +2024-09-17 05:48:25,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=158880.0, ans=0.125 +2024-09-17 05:48:26,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.79 vs. limit=15.0 +2024-09-17 05:48:37,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=158880.0, ans=0.0 +2024-09-17 05:48:37,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=158880.0, ans=0.04949747468305833 +2024-09-17 05:48:38,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.82 vs. limit=22.5 +2024-09-17 05:48:47,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=158920.0, ans=0.04949747468305833 +2024-09-17 05:48:53,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=158920.0, ans=0.125 +2024-09-17 05:48:58,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=158960.0, ans=0.2 +2024-09-17 05:48:59,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=158960.0, ans=0.0 +2024-09-17 05:49:08,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=158960.0, ans=0.125 +2024-09-17 05:49:10,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.11 vs. limit=10.0 +2024-09-17 05:49:12,737 INFO [train.py:1198] (1/2) Epoch 9, batch 3550, loss[loss=0.2831, ctc_loss=0.1888, cr_loss=0.4179, attn_decoder_loss=0.2843, over 29681.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1782, cr_loss=0.4121, attn_decoder_loss=0.2707, over 5783305.50 frames. ], batch size: 89, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:49:18,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=159000.0, ans=0.125 +2024-09-17 05:49:29,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=159040.0, ans=0.125 +2024-09-17 05:49:35,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=159040.0, ans=0.0 +2024-09-17 05:49:36,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=159040.0, ans=0.125 +2024-09-17 05:49:42,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=159080.0, ans=0.95 +2024-09-17 05:49:44,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=159080.0, ans=0.125 +2024-09-17 05:49:57,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=159120.0, ans=0.125 +2024-09-17 05:50:23,962 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.321e+01 9.598e+01 1.057e+02 1.166e+02 3.699e+02, threshold=2.113e+02, percent-clipped=1.0 +2024-09-17 05:50:27,356 INFO [train.py:1198] (1/2) Epoch 9, batch 3600, loss[loss=0.277, ctc_loss=0.1958, cr_loss=0.4564, attn_decoder_loss=0.2759, over 29509.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.1779, cr_loss=0.4125, attn_decoder_loss=0.2705, over 5791969.85 frames. ], batch size: 77, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:51:04,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=159280.0, ans=0.2 +2024-09-17 05:51:25,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=159360.0, ans=0.0 +2024-09-17 05:51:41,631 INFO [train.py:1198] (1/2) Epoch 9, batch 3650, loss[loss=0.2811, ctc_loss=0.1886, cr_loss=0.4403, attn_decoder_loss=0.2816, over 29501.00 frames. ], tot_loss[loss=0.2687, ctc_loss=0.1772, cr_loss=0.4113, attn_decoder_loss=0.2698, over 5794466.57 frames. ], batch size: 90, lr: 1.23e-02, grad_scale: 8.0 +2024-09-17 05:51:52,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=7.30 vs. limit=12.0 +2024-09-17 05:52:00,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.10 vs. limit=22.5 +2024-09-17 05:52:21,711 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.50 vs. limit=10.0 +2024-09-17 05:52:54,983 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.190e+01 9.468e+01 1.034e+02 1.121e+02 1.943e+02, threshold=2.068e+02, percent-clipped=0.0 +2024-09-17 05:52:57,917 INFO [train.py:1198] (1/2) Epoch 9, batch 3700, loss[loss=0.2863, ctc_loss=0.1945, cr_loss=0.437, attn_decoder_loss=0.2868, over 29711.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1774, cr_loss=0.4119, attn_decoder_loss=0.27, over 5803296.78 frames. ], batch size: 84, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:52:58,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=159600.0, ans=0.125 +2024-09-17 05:52:58,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.66 vs. limit=15.0 +2024-09-17 05:53:24,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=159640.0, ans=0.125 +2024-09-17 05:53:41,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=159720.0, ans=0.125 +2024-09-17 05:53:41,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=159720.0, ans=0.125 +2024-09-17 05:53:51,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=159720.0, ans=0.025 +2024-09-17 05:53:56,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=159760.0, ans=0.125 +2024-09-17 05:54:09,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=159760.0, ans=0.05 +2024-09-17 05:54:12,624 INFO [train.py:1198] (1/2) Epoch 9, batch 3750, loss[loss=0.2435, ctc_loss=0.1556, cr_loss=0.3634, attn_decoder_loss=0.2452, over 29369.00 frames. ], tot_loss[loss=0.2683, ctc_loss=0.1766, cr_loss=0.4105, attn_decoder_loss=0.2694, over 5808720.17 frames. ], batch size: 67, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:54:55,368 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.60 vs. limit=15.0 +2024-09-17 05:55:05,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=159920.0, ans=0.125 +2024-09-17 05:55:21,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=159960.0, ans=0.0 +2024-09-17 05:55:25,523 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.200e+01 9.758e+01 1.061e+02 1.222e+02 3.852e+02, threshold=2.121e+02, percent-clipped=3.0 +2024-09-17 05:55:35,959 INFO [train.py:1198] (1/2) Epoch 9, batch 3800, loss[loss=0.2887, ctc_loss=0.1981, cr_loss=0.4636, attn_decoder_loss=0.2884, over 29628.00 frames. ], tot_loss[loss=0.2683, ctc_loss=0.1767, cr_loss=0.411, attn_decoder_loss=0.2693, over 5799315.21 frames. ], batch size: 86, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:55:39,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=160000.0, ans=0.2 +2024-09-17 05:55:40,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=160000.0, ans=0.1 +2024-09-17 05:55:46,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=160000.0, ans=0.025 +2024-09-17 05:55:53,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=160040.0, ans=0.2 +2024-09-17 05:56:50,300 INFO [train.py:1198] (1/2) Epoch 9, batch 3850, loss[loss=0.3002, ctc_loss=0.2137, cr_loss=0.4627, attn_decoder_loss=0.2996, over 29242.00 frames. ], tot_loss[loss=0.2686, ctc_loss=0.1772, cr_loss=0.4114, attn_decoder_loss=0.2696, over 5813350.66 frames. ], batch size: 100, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:57:08,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-17 05:57:32,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=160280.0, ans=0.125 +2024-09-17 05:57:44,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=160320.0, ans=0.2 +2024-09-17 05:57:44,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.23 vs. limit=22.5 +2024-09-17 05:57:59,606 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.15 vs. limit=10.0 +2024-09-17 05:58:03,424 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.356e+01 9.561e+01 1.016e+02 1.083e+02 1.844e+02, threshold=2.033e+02, percent-clipped=0.0 +2024-09-17 05:58:06,430 INFO [train.py:1198] (1/2) Epoch 9, batch 3900, loss[loss=0.2653, ctc_loss=0.1676, cr_loss=0.3952, attn_decoder_loss=0.2673, over 29624.00 frames. ], tot_loss[loss=0.2693, ctc_loss=0.1777, cr_loss=0.4128, attn_decoder_loss=0.2703, over 5817825.96 frames. ], batch size: 86, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 05:58:12,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=160400.0, ans=0.125 +2024-09-17 05:58:21,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.40 vs. limit=15.0 +2024-09-17 05:58:24,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=160440.0, ans=0.2 +2024-09-17 05:58:52,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=160520.0, ans=0.125 +2024-09-17 05:58:52,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=160520.0, ans=0.125 +2024-09-17 05:58:55,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=160520.0, ans=0.125 +2024-09-17 05:59:20,918 INFO [train.py:1198] (1/2) Epoch 9, batch 3950, loss[loss=0.282, ctc_loss=0.1865, cr_loss=0.4413, attn_decoder_loss=0.2828, over 29451.00 frames. ], tot_loss[loss=0.2696, ctc_loss=0.1776, cr_loss=0.4131, attn_decoder_loss=0.2707, over 5836915.41 frames. ], batch size: 97, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 05:59:26,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.13 vs. limit=15.0 +2024-09-17 05:59:34,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=160640.0, ans=0.0 +2024-09-17 05:59:44,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=160640.0, ans=0.0 +2024-09-17 06:00:11,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=160720.0, ans=0.2 +2024-09-17 06:00:34,493 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.022e+01 9.640e+01 1.057e+02 1.201e+02 4.208e+02, threshold=2.114e+02, percent-clipped=4.0 +2024-09-17 06:00:36,380 INFO [train.py:1198] (1/2) Epoch 9, batch 4000, loss[loss=0.2425, ctc_loss=0.1437, cr_loss=0.3717, attn_decoder_loss=0.2452, over 29494.00 frames. ], tot_loss[loss=0.2691, ctc_loss=0.1773, cr_loss=0.412, attn_decoder_loss=0.2701, over 5813203.84 frames. ], batch size: 74, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:00:38,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=160800.0, ans=0.0 +2024-09-17 06:01:15,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=160880.0, ans=0.0 +2024-09-17 06:01:42,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=160960.0, ans=0.125 +2024-09-17 06:01:43,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=160960.0, ans=0.125 +2024-09-17 06:01:49,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=161000.0, ans=0.0 +2024-09-17 06:01:50,827 INFO [train.py:1198] (1/2) Epoch 9, batch 4050, loss[loss=0.2954, ctc_loss=0.2212, cr_loss=0.4145, attn_decoder_loss=0.2945, over 19973.00 frames. ], tot_loss[loss=0.2691, ctc_loss=0.1775, cr_loss=0.412, attn_decoder_loss=0.2702, over 5797521.00 frames. ], batch size: 209, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:01:56,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=161000.0, ans=0.125 +2024-09-17 06:01:57,138 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:02:14,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=161040.0, ans=0.025 +2024-09-17 06:02:17,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=161040.0, ans=0.125 +2024-09-17 06:02:31,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=161080.0, ans=0.125 +2024-09-17 06:02:31,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=161080.0, ans=0.0 +2024-09-17 06:02:48,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.03 vs. limit=6.0 +2024-09-17 06:03:01,513 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.35 vs. limit=15.0 +2024-09-17 06:03:05,378 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.114e+01 9.617e+01 1.028e+02 1.240e+02 2.479e+02, threshold=2.055e+02, percent-clipped=2.0 +2024-09-17 06:03:05,401 INFO [train.py:1198] (1/2) Epoch 9, batch 4100, loss[loss=0.2846, ctc_loss=0.1823, cr_loss=0.4174, attn_decoder_loss=0.2867, over 29504.00 frames. ], tot_loss[loss=0.2696, ctc_loss=0.1782, cr_loss=0.4131, attn_decoder_loss=0.2706, over 5793886.04 frames. ], batch size: 90, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:03:21,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=161240.0, ans=0.1 +2024-09-17 06:03:29,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=161240.0, ans=0.125 +2024-09-17 06:03:40,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=161280.0, ans=0.2 +2024-09-17 06:03:51,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.53 vs. limit=15.0 +2024-09-17 06:04:18,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.50 vs. limit=15.0 +2024-09-17 06:04:19,362 INFO [train.py:1198] (1/2) Epoch 9, batch 4150, loss[loss=0.2584, ctc_loss=0.1649, cr_loss=0.4021, attn_decoder_loss=0.2598, over 29526.00 frames. ], tot_loss[loss=0.2696, ctc_loss=0.1782, cr_loss=0.4132, attn_decoder_loss=0.2706, over 5798258.49 frames. ], batch size: 77, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:04:25,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=161400.0, ans=0.05 +2024-09-17 06:04:41,753 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:04:51,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=161480.0, ans=10.0 +2024-09-17 06:04:54,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=161480.0, ans=0.125 +2024-09-17 06:04:58,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=161480.0, ans=0.1 +2024-09-17 06:04:58,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=161480.0, ans=0.0 +2024-09-17 06:05:02,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.22 vs. limit=15.0 +2024-09-17 06:05:06,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=161520.0, ans=0.125 +2024-09-17 06:05:18,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=161560.0, ans=0.0 +2024-09-17 06:05:19,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=161560.0, ans=0.0 +2024-09-17 06:05:34,475 INFO [train.py:1198] (1/2) Epoch 9, batch 4200, loss[loss=0.2858, ctc_loss=0.195, cr_loss=0.4501, attn_decoder_loss=0.2859, over 29497.00 frames. ], tot_loss[loss=0.2697, ctc_loss=0.1779, cr_loss=0.4129, attn_decoder_loss=0.2707, over 5799943.49 frames. ], batch size: 90, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:05:35,869 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.845e+01 9.556e+01 1.027e+02 1.111e+02 2.120e+02, threshold=2.054e+02, percent-clipped=2.0 +2024-09-17 06:05:36,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=161600.0, ans=0.125 +2024-09-17 06:05:39,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.22 vs. limit=22.5 +2024-09-17 06:05:44,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.32 vs. limit=15.0 +2024-09-17 06:06:27,972 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.04 vs. limit=15.0 +2024-09-17 06:06:48,812 INFO [train.py:1198] (1/2) Epoch 9, batch 4250, loss[loss=0.2436, ctc_loss=0.1516, cr_loss=0.3747, attn_decoder_loss=0.2455, over 29520.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.1774, cr_loss=0.4123, attn_decoder_loss=0.2706, over 5805325.68 frames. ], batch size: 74, lr: 1.22e-02, grad_scale: 4.0 +2024-09-17 06:07:12,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=161840.0, ans=0.1 +2024-09-17 06:07:20,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.51 vs. limit=22.5 +2024-09-17 06:07:59,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=161960.0, ans=0.125 +2024-09-17 06:08:02,493 INFO [train.py:1198] (1/2) Epoch 9, batch 4300, loss[loss=0.2843, ctc_loss=0.1871, cr_loss=0.4432, attn_decoder_loss=0.2852, over 29525.00 frames. ], tot_loss[loss=0.2703, ctc_loss=0.1784, cr_loss=0.4137, attn_decoder_loss=0.2713, over 5795087.41 frames. ], batch size: 87, lr: 1.22e-02, grad_scale: 8.0 +2024-09-17 06:08:05,467 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.660e+01 9.959e+01 1.074e+02 1.170e+02 2.141e+02, threshold=2.147e+02, percent-clipped=1.0 +2024-09-17 06:08:16,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=162040.0, ans=0.0 +2024-09-17 06:08:46,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=162120.0, ans=0.0 +2024-09-17 06:09:04,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=162160.0, ans=0.1 +2024-09-17 06:09:17,152 INFO [train.py:1198] (1/2) Epoch 9, batch 4350, loss[loss=0.2927, ctc_loss=0.1947, cr_loss=0.4392, attn_decoder_loss=0.2938, over 29449.00 frames. ], tot_loss[loss=0.274, ctc_loss=0.1815, cr_loss=0.4191, attn_decoder_loss=0.2749, over 5797376.53 frames. ], batch size: 97, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:09:55,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=162280.0, ans=0.0 +2024-09-17 06:09:55,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=162280.0, ans=0.125 +2024-09-17 06:09:58,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=162280.0, ans=0.025 +2024-09-17 06:10:07,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=162320.0, ans=0.1 +2024-09-17 06:10:23,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=162360.0, ans=0.09899494936611666 +2024-09-17 06:10:25,957 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.59 vs. limit=15.0 +2024-09-17 06:10:31,074 INFO [train.py:1198] (1/2) Epoch 9, batch 4400, loss[loss=0.2877, ctc_loss=0.2012, cr_loss=0.4359, attn_decoder_loss=0.2877, over 27496.00 frames. ], tot_loss[loss=0.2763, ctc_loss=0.1837, cr_loss=0.4218, attn_decoder_loss=0.2772, over 5765498.70 frames. ], batch size: 124, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:10:35,517 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.675e+01 9.860e+01 1.034e+02 1.169e+02 1.757e+02, threshold=2.069e+02, percent-clipped=0.0 +2024-09-17 06:10:43,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=162400.0, ans=0.1 +2024-09-17 06:10:43,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=162400.0, ans=0.125 +2024-09-17 06:10:50,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=162440.0, ans=0.05 +2024-09-17 06:10:53,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=162440.0, ans=0.0 +2024-09-17 06:11:05,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.52 vs. limit=10.0 +2024-09-17 06:11:20,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=162520.0, ans=0.125 +2024-09-17 06:11:46,092 INFO [train.py:1198] (1/2) Epoch 9, batch 4450, loss[loss=0.3124, ctc_loss=0.2492, cr_loss=0.4708, attn_decoder_loss=0.309, over 20072.00 frames. ], tot_loss[loss=0.2797, ctc_loss=0.1892, cr_loss=0.4256, attn_decoder_loss=0.2803, over 5570202.92 frames. ], batch size: 210, lr: 1.21e-02, grad_scale: 4.0 +2024-09-17 06:11:46,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.67 vs. limit=15.0 +2024-09-17 06:11:51,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=162600.0, ans=10.0 +2024-09-17 06:12:00,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=162640.0, ans=0.09899494936611666 +2024-09-17 06:12:07,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=162640.0, ans=0.125 +2024-09-17 06:12:12,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.08 vs. limit=15.0 +2024-09-17 06:12:17,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=162680.0, ans=0.0 +2024-09-17 06:12:28,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=162680.0, ans=0.1 +2024-09-17 06:12:37,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=162720.0, ans=0.0 +2024-09-17 06:13:01,685 INFO [train.py:1198] (1/2) Epoch 9, batch 4500, loss[loss=0.2971, ctc_loss=0.2238, cr_loss=0.4398, attn_decoder_loss=0.2955, over 20067.00 frames. ], tot_loss[loss=0.2836, ctc_loss=0.1968, cr_loss=0.4278, attn_decoder_loss=0.2838, over 5228674.98 frames. ], batch size: 209, lr: 1.21e-02, grad_scale: 8.0 +2024-09-17 06:13:07,509 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.470e+01 1.060e+02 1.171e+02 1.308e+02 2.646e+02, threshold=2.342e+02, percent-clipped=3.0 +2024-09-17 06:13:12,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=162800.0, ans=0.2 +2024-09-17 06:13:13,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=162800.0, ans=0.025 +2024-09-17 06:13:22,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=162840.0, ans=0.0 +2024-09-17 06:13:28,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=162840.0, ans=0.125 +2024-09-17 06:13:33,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=162880.0, ans=0.05 +2024-09-17 06:14:29,147 INFO [train.py:1198] (1/2) Epoch 10, batch 0, loss[loss=0.2581, ctc_loss=0.1605, cr_loss=0.3872, attn_decoder_loss=0.2603, over 29606.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1605, cr_loss=0.3872, attn_decoder_loss=0.2603, over 29606.00 frames. ], batch size: 73, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:14:29,148 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 06:14:47,510 INFO [train.py:1230] (1/2) Epoch 10, validation: loss=0.2171, ctc_loss=0.05118, cr_loss=4.759e-15, attn_decoder_loss=0.2355, over 944034.00 frames. +2024-09-17 06:14:47,511 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 06:14:50,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=162900.0, ans=0.125 +2024-09-17 06:15:02,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=162940.0, ans=0.125 +2024-09-17 06:15:43,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=163020.0, ans=10.0 +2024-09-17 06:15:49,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=163060.0, ans=0.025 +2024-09-17 06:15:52,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=163060.0, ans=0.125 +2024-09-17 06:15:55,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=163060.0, ans=0.2 +2024-09-17 06:16:02,853 INFO [train.py:1198] (1/2) Epoch 10, batch 50, loss[loss=0.2324, ctc_loss=0.1473, cr_loss=0.3674, attn_decoder_loss=0.2337, over 29423.00 frames. ], tot_loss[loss=0.2717, ctc_loss=0.1817, cr_loss=0.4143, attn_decoder_loss=0.2725, over 1267008.14 frames. ], batch size: 70, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:16:07,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=163100.0, ans=0.1 +2024-09-17 06:16:19,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=163140.0, ans=0.125 +2024-09-17 06:16:25,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.48 vs. limit=6.0 +2024-09-17 06:16:52,270 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.660e+01 1.078e+02 1.244e+02 7.750e+02, threshold=2.155e+02, percent-clipped=3.0 +2024-09-17 06:17:02,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=163220.0, ans=0.1 +2024-09-17 06:17:05,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=163220.0, ans=0.0 +2024-09-17 06:17:13,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.15 vs. limit=22.5 +2024-09-17 06:17:17,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=163260.0, ans=0.0 +2024-09-17 06:17:22,992 INFO [train.py:1198] (1/2) Epoch 10, batch 100, loss[loss=0.2542, ctc_loss=0.1576, cr_loss=0.3854, attn_decoder_loss=0.2564, over 29526.00 frames. ], tot_loss[loss=0.2732, ctc_loss=0.1813, cr_loss=0.4168, attn_decoder_loss=0.2742, over 2251338.93 frames. ], batch size: 76, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:17:27,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=163300.0, ans=0.0 +2024-09-17 06:17:50,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2.whitening_limit, batch_count=163340.0, ans=15.0 +2024-09-17 06:18:08,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=163420.0, ans=0.0 +2024-09-17 06:18:08,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=163420.0, ans=10.0 +2024-09-17 06:18:15,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.94 vs. limit=15.0 +2024-09-17 06:18:21,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=163460.0, ans=0.125 +2024-09-17 06:18:23,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.00 vs. limit=15.0 +2024-09-17 06:18:33,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=163460.0, ans=0.0 +2024-09-17 06:18:37,442 INFO [train.py:1198] (1/2) Epoch 10, batch 150, loss[loss=0.2493, ctc_loss=0.1673, cr_loss=0.3956, attn_decoder_loss=0.2496, over 29467.00 frames. ], tot_loss[loss=0.2702, ctc_loss=0.1777, cr_loss=0.412, attn_decoder_loss=0.2713, over 3047478.20 frames. ], batch size: 70, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:18:51,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=163540.0, ans=0.125 +2024-09-17 06:19:00,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=163540.0, ans=0.1 +2024-09-17 06:19:02,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=163540.0, ans=0.5 +2024-09-17 06:19:04,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=163540.0, ans=0.125 +2024-09-17 06:19:07,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.83 vs. limit=22.5 +2024-09-17 06:19:08,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-17 06:19:13,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=163580.0, ans=0.125 +2024-09-17 06:19:15,580 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.70 vs. limit=15.0 +2024-09-17 06:19:24,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.84 vs. limit=15.0 +2024-09-17 06:19:25,268 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 9.231e+01 9.712e+01 1.046e+02 1.496e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 06:19:30,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=163620.0, ans=0.1 +2024-09-17 06:19:39,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=163660.0, ans=0.025 +2024-09-17 06:19:52,311 INFO [train.py:1198] (1/2) Epoch 10, batch 200, loss[loss=0.2921, ctc_loss=0.1955, cr_loss=0.4316, attn_decoder_loss=0.2933, over 27343.00 frames. ], tot_loss[loss=0.2684, ctc_loss=0.1761, cr_loss=0.4097, attn_decoder_loss=0.2696, over 3659428.76 frames. ], batch size: 125, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:20:00,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.51 vs. limit=22.5 +2024-09-17 06:20:03,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=163700.0, ans=0.0 +2024-09-17 06:20:15,080 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:20:21,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=163780.0, ans=0.09899494936611666 +2024-09-17 06:20:27,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.91 vs. limit=6.0 +2024-09-17 06:20:41,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=163820.0, ans=0.125 +2024-09-17 06:20:53,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=163820.0, ans=0.2 +2024-09-17 06:20:55,251 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.10 vs. limit=12.0 +2024-09-17 06:21:12,288 INFO [train.py:1198] (1/2) Epoch 10, batch 250, loss[loss=0.2812, ctc_loss=0.1893, cr_loss=0.4253, attn_decoder_loss=0.282, over 29289.00 frames. ], tot_loss[loss=0.2678, ctc_loss=0.1755, cr_loss=0.4096, attn_decoder_loss=0.269, over 4140730.77 frames. ], batch size: 100, lr: 1.15e-02, grad_scale: 4.0 +2024-09-17 06:21:17,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.17 vs. limit=22.5 +2024-09-17 06:21:45,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=163980.0, ans=0.1 +2024-09-17 06:21:52,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=163980.0, ans=0.09899494936611666 +2024-09-17 06:21:55,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.36 vs. limit=22.5 +2024-09-17 06:21:57,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.89 vs. limit=15.0 +2024-09-17 06:22:02,582 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.245e+01 9.493e+01 1.020e+02 1.129e+02 1.613e+02, threshold=2.040e+02, percent-clipped=0.0 +2024-09-17 06:22:23,109 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-17 06:22:28,435 INFO [train.py:1198] (1/2) Epoch 10, batch 300, loss[loss=0.2803, ctc_loss=0.1812, cr_loss=0.4367, attn_decoder_loss=0.2816, over 29529.00 frames. ], tot_loss[loss=0.2676, ctc_loss=0.175, cr_loss=0.4096, attn_decoder_loss=0.2688, over 4509830.03 frames. ], batch size: 92, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:22:41,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.47 vs. limit=15.0 +2024-09-17 06:22:51,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=164140.0, ans=0.125 +2024-09-17 06:22:57,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=164180.0, ans=0.125 +2024-09-17 06:23:05,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=164180.0, ans=0.1 +2024-09-17 06:23:28,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=164260.0, ans=0.0 +2024-09-17 06:23:28,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=164260.0, ans=0.1 +2024-09-17 06:23:30,515 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.10 vs. limit=10.0 +2024-09-17 06:23:34,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=164260.0, ans=0.025 +2024-09-17 06:23:43,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=164300.0, ans=0.125 +2024-09-17 06:23:44,699 INFO [train.py:1198] (1/2) Epoch 10, batch 350, loss[loss=0.2408, ctc_loss=0.153, cr_loss=0.3857, attn_decoder_loss=0.242, over 29328.00 frames. ], tot_loss[loss=0.2687, ctc_loss=0.1756, cr_loss=0.4103, attn_decoder_loss=0.2699, over 4795205.45 frames. ], batch size: 71, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:23:45,079 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:23:57,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=164300.0, ans=0.0 +2024-09-17 06:24:07,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=164340.0, ans=0.125 +2024-09-17 06:24:07,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=164340.0, ans=0.125 +2024-09-17 06:24:12,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=164340.0, ans=0.0 +2024-09-17 06:24:19,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=164380.0, ans=0.1 +2024-09-17 06:24:34,751 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.056e+01 9.667e+01 1.045e+02 1.260e+02 3.351e+02, threshold=2.090e+02, percent-clipped=2.0 +2024-09-17 06:24:36,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=164420.0, ans=0.1 +2024-09-17 06:25:05,712 INFO [train.py:1198] (1/2) Epoch 10, batch 400, loss[loss=0.2679, ctc_loss=0.1691, cr_loss=0.3869, attn_decoder_loss=0.2703, over 29711.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.1752, cr_loss=0.4101, attn_decoder_loss=0.2693, over 5024427.14 frames. ], batch size: 82, lr: 1.15e-02, grad_scale: 16.0 +2024-09-17 06:25:13,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=164500.0, ans=0.125 +2024-09-17 06:25:31,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=164540.0, ans=0.125 +2024-09-17 06:25:51,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=164620.0, ans=0.035 +2024-09-17 06:25:58,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=164620.0, ans=0.0 +2024-09-17 06:26:21,193 INFO [train.py:1198] (1/2) Epoch 10, batch 450, loss[loss=0.2831, ctc_loss=0.1904, cr_loss=0.4665, attn_decoder_loss=0.283, over 29707.00 frames. ], tot_loss[loss=0.2685, ctc_loss=0.1755, cr_loss=0.4105, attn_decoder_loss=0.2697, over 5185772.01 frames. ], batch size: 83, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:26:47,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.46 vs. limit=22.5 +2024-09-17 06:26:48,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=164740.0, ans=0.125 +2024-09-17 06:26:50,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.15 vs. limit=15.0 +2024-09-17 06:27:01,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=164780.0, ans=0.125 +2024-09-17 06:27:01,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=164780.0, ans=0.0 +2024-09-17 06:27:13,192 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.450e+01 9.295e+01 1.006e+02 1.063e+02 1.826e+02, threshold=2.013e+02, percent-clipped=0.0 +2024-09-17 06:27:16,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=164820.0, ans=0.025 +2024-09-17 06:27:29,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=164860.0, ans=0.1 +2024-09-17 06:27:37,053 INFO [train.py:1198] (1/2) Epoch 10, batch 500, loss[loss=0.2842, ctc_loss=0.1806, cr_loss=0.4118, attn_decoder_loss=0.2866, over 29438.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1746, cr_loss=0.4096, attn_decoder_loss=0.2688, over 5328350.72 frames. ], batch size: 94, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:27:52,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=164940.0, ans=0.2 +2024-09-17 06:28:22,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=165020.0, ans=0.125 +2024-09-17 06:28:57,305 INFO [train.py:1198] (1/2) Epoch 10, batch 550, loss[loss=0.2837, ctc_loss=0.1922, cr_loss=0.4345, attn_decoder_loss=0.2842, over 28781.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1745, cr_loss=0.4092, attn_decoder_loss=0.2687, over 5420333.13 frames. ], batch size: 104, lr: 1.15e-02, grad_scale: 8.0 +2024-09-17 06:29:00,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=165100.0, ans=0.5 +2024-09-17 06:29:02,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=165100.0, ans=0.2 +2024-09-17 06:29:05,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=165100.0, ans=0.2 +2024-09-17 06:29:15,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=165140.0, ans=0.2 +2024-09-17 06:29:46,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=165220.0, ans=0.0 +2024-09-17 06:29:51,813 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.131e+01 9.579e+01 1.029e+02 1.127e+02 2.367e+02, threshold=2.058e+02, percent-clipped=2.0 +2024-09-17 06:30:10,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=165260.0, ans=0.125 +2024-09-17 06:30:13,110 INFO [train.py:1198] (1/2) Epoch 10, batch 600, loss[loss=0.2901, ctc_loss=0.1909, cr_loss=0.4439, attn_decoder_loss=0.2912, over 29257.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1742, cr_loss=0.4093, attn_decoder_loss=0.2688, over 5507077.33 frames. ], batch size: 100, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:30:26,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=165340.0, ans=0.125 +2024-09-17 06:30:31,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=165340.0, ans=0.0 +2024-09-17 06:30:35,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff2.min_abs, batch_count=165340.0, ans=0.1 +2024-09-17 06:30:45,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.22 vs. limit=12.0 +2024-09-17 06:30:55,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=165380.0, ans=0.125 +2024-09-17 06:30:58,772 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.72 vs. limit=12.0 +2024-09-17 06:31:19,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=165460.0, ans=10.0 +2024-09-17 06:31:27,706 INFO [train.py:1198] (1/2) Epoch 10, batch 650, loss[loss=0.2614, ctc_loss=0.1665, cr_loss=0.3955, attn_decoder_loss=0.2632, over 29758.00 frames. ], tot_loss[loss=0.2666, ctc_loss=0.1732, cr_loss=0.4078, attn_decoder_loss=0.2679, over 5584589.73 frames. ], batch size: 81, lr: 1.14e-02, grad_scale: 4.0 +2024-09-17 06:31:53,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff3.min_abs, batch_count=165540.0, ans=0.2 +2024-09-17 06:32:07,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=165580.0, ans=0.125 +2024-09-17 06:32:19,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=165620.0, ans=0.0 +2024-09-17 06:32:23,906 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.175e+01 9.258e+01 9.852e+01 1.047e+02 1.585e+02, threshold=1.970e+02, percent-clipped=0.0 +2024-09-17 06:32:24,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=165620.0, ans=0.2 +2024-09-17 06:32:25,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=165620.0, ans=0.2 +2024-09-17 06:32:26,506 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.87 vs. limit=12.0 +2024-09-17 06:32:47,994 INFO [train.py:1198] (1/2) Epoch 10, batch 700, loss[loss=0.2619, ctc_loss=0.1697, cr_loss=0.4002, attn_decoder_loss=0.2632, over 29542.00 frames. ], tot_loss[loss=0.2674, ctc_loss=0.1741, cr_loss=0.4094, attn_decoder_loss=0.2687, over 5636214.23 frames. ], batch size: 76, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:32:50,222 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.55 vs. limit=15.0 +2024-09-17 06:33:00,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=165700.0, ans=0.125 +2024-09-17 06:33:04,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=165740.0, ans=0.125 +2024-09-17 06:33:14,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.73 vs. limit=15.0 +2024-09-17 06:33:15,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=165740.0, ans=0.2 +2024-09-17 06:33:43,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=165820.0, ans=0.025 +2024-09-17 06:33:50,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=165860.0, ans=0.0 +2024-09-17 06:33:51,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=165860.0, ans=0.1 +2024-09-17 06:34:03,338 INFO [train.py:1198] (1/2) Epoch 10, batch 750, loss[loss=0.2682, ctc_loss=0.1672, cr_loss=0.4169, attn_decoder_loss=0.2701, over 29714.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1736, cr_loss=0.4089, attn_decoder_loss=0.2683, over 5674319.71 frames. ], batch size: 82, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:34:14,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.70 vs. limit=15.0 +2024-09-17 06:34:39,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=165980.0, ans=0.0 +2024-09-17 06:35:00,788 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 9.819e+01 1.062e+02 1.153e+02 3.541e+02, threshold=2.124e+02, percent-clipped=2.0 +2024-09-17 06:35:18,939 INFO [train.py:1198] (1/2) Epoch 10, batch 800, loss[loss=0.2377, ctc_loss=0.146, cr_loss=0.3668, attn_decoder_loss=0.2397, over 29599.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1737, cr_loss=0.4097, attn_decoder_loss=0.2685, over 5705656.55 frames. ], batch size: 73, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:35:27,592 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.75 vs. limit=15.0 +2024-09-17 06:35:35,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=166140.0, ans=0.1 +2024-09-17 06:35:53,949 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:35:56,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.86 vs. limit=15.0 +2024-09-17 06:35:58,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=166180.0, ans=0.125 +2024-09-17 06:36:09,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.84 vs. limit=22.5 +2024-09-17 06:36:12,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=166220.0, ans=0.07 +2024-09-17 06:36:34,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.87 vs. limit=15.0 +2024-09-17 06:36:36,135 INFO [train.py:1198] (1/2) Epoch 10, batch 850, loss[loss=0.2831, ctc_loss=0.1873, cr_loss=0.4067, attn_decoder_loss=0.2847, over 29705.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.1733, cr_loss=0.4085, attn_decoder_loss=0.268, over 5736269.02 frames. ], batch size: 89, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:37:03,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=166340.0, ans=0.125 +2024-09-17 06:37:12,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=166380.0, ans=0.1 +2024-09-17 06:37:27,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=166420.0, ans=0.125 +2024-09-17 06:37:37,422 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.461e+01 9.790e+01 1.072e+02 1.196e+02 1.464e+02, threshold=2.145e+02, percent-clipped=0.0 +2024-09-17 06:37:37,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=166460.0, ans=0.125 +2024-09-17 06:37:41,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=17.12 vs. limit=15.0 +2024-09-17 06:37:43,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=166460.0, ans=0.0 +2024-09-17 06:37:48,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=166460.0, ans=0.125 +2024-09-17 06:37:50,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=166460.0, ans=0.0 +2024-09-17 06:37:54,164 INFO [train.py:1198] (1/2) Epoch 10, batch 900, loss[loss=0.2549, ctc_loss=0.1708, cr_loss=0.4023, attn_decoder_loss=0.2553, over 29599.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1736, cr_loss=0.4092, attn_decoder_loss=0.2681, over 5742819.18 frames. ], batch size: 73, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:38:07,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=166540.0, ans=0.025 +2024-09-17 06:38:14,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=15.0 +2024-09-17 06:38:33,773 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:38:35,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=166580.0, ans=0.0 +2024-09-17 06:38:38,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=166620.0, ans=0.0 +2024-09-17 06:38:47,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=166620.0, ans=0.125 +2024-09-17 06:38:47,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=166620.0, ans=0.125 +2024-09-17 06:38:56,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=166660.0, ans=0.07 +2024-09-17 06:39:09,489 INFO [train.py:1198] (1/2) Epoch 10, batch 950, loss[loss=0.2504, ctc_loss=0.1658, cr_loss=0.3933, attn_decoder_loss=0.251, over 29492.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.174, cr_loss=0.4098, attn_decoder_loss=0.2686, over 5744469.84 frames. ], batch size: 74, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:39:11,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.50 vs. limit=15.0 +2024-09-17 06:39:18,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.19 vs. limit=10.0 +2024-09-17 06:39:18,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=166700.0, ans=0.0 +2024-09-17 06:39:40,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.66 vs. limit=15.0 +2024-09-17 06:39:48,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.52 vs. limit=15.0 +2024-09-17 06:39:59,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=166820.0, ans=0.2 +2024-09-17 06:40:06,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.30 vs. limit=15.0 +2024-09-17 06:40:09,833 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.298e+01 9.770e+01 1.085e+02 1.240e+02 2.634e+02, threshold=2.170e+02, percent-clipped=2.0 +2024-09-17 06:40:26,945 INFO [train.py:1198] (1/2) Epoch 10, batch 1000, loss[loss=0.2591, ctc_loss=0.1714, cr_loss=0.3871, attn_decoder_loss=0.2602, over 29508.00 frames. ], tot_loss[loss=0.2684, ctc_loss=0.1752, cr_loss=0.41, attn_decoder_loss=0.2696, over 5739304.80 frames. ], batch size: 77, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:40:59,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=166980.0, ans=0.125 +2024-09-17 06:41:10,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.45 vs. limit=15.0 +2024-09-17 06:41:17,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=167020.0, ans=0.1 +2024-09-17 06:41:26,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=167020.0, ans=0.2 +2024-09-17 06:41:34,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=167060.0, ans=0.0 +2024-09-17 06:41:35,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=167060.0, ans=0.125 +2024-09-17 06:41:38,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=167060.0, ans=0.1 +2024-09-17 06:41:40,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=167060.0, ans=0.1 +2024-09-17 06:41:44,651 INFO [train.py:1198] (1/2) Epoch 10, batch 1050, loss[loss=0.2678, ctc_loss=0.175, cr_loss=0.4168, attn_decoder_loss=0.2688, over 29669.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.174, cr_loss=0.4088, attn_decoder_loss=0.2686, over 5745736.36 frames. ], batch size: 85, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:42:16,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=167180.0, ans=0.1 +2024-09-17 06:42:18,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=167180.0, ans=0.0 +2024-09-17 06:42:28,835 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.27 vs. limit=15.0 +2024-09-17 06:42:45,686 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.318e+01 9.401e+01 9.855e+01 1.069e+02 2.033e+02, threshold=1.971e+02, percent-clipped=0.0 +2024-09-17 06:42:47,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=167260.0, ans=0.0 +2024-09-17 06:43:00,924 INFO [train.py:1198] (1/2) Epoch 10, batch 1100, loss[loss=0.2518, ctc_loss=0.1531, cr_loss=0.3733, attn_decoder_loss=0.2544, over 29457.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1736, cr_loss=0.4082, attn_decoder_loss=0.2682, over 5758028.42 frames. ], batch size: 78, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:43:07,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=167300.0, ans=0.125 +2024-09-17 06:43:08,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=167300.0, ans=0.0 +2024-09-17 06:43:11,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=167300.0, ans=0.0 +2024-09-17 06:43:36,633 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.12 vs. limit=15.0 +2024-09-17 06:43:39,135 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.06 vs. limit=22.5 +2024-09-17 06:43:50,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=167420.0, ans=0.0 +2024-09-17 06:43:52,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=167420.0, ans=0.2 +2024-09-17 06:43:54,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.60 vs. limit=22.5 +2024-09-17 06:43:59,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=167460.0, ans=0.125 +2024-09-17 06:44:08,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=167460.0, ans=0.1 +2024-09-17 06:44:18,491 INFO [train.py:1198] (1/2) Epoch 10, batch 1150, loss[loss=0.2645, ctc_loss=0.174, cr_loss=0.3965, attn_decoder_loss=0.2657, over 29441.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1736, cr_loss=0.4085, attn_decoder_loss=0.2683, over 5755343.74 frames. ], batch size: 78, lr: 1.14e-02, grad_scale: 4.0 +2024-09-17 06:44:31,549 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.57 vs. limit=22.5 +2024-09-17 06:44:45,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=167540.0, ans=0.0 +2024-09-17 06:44:50,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=167580.0, ans=0.0 +2024-09-17 06:45:12,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=167620.0, ans=0.0 +2024-09-17 06:45:15,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=167620.0, ans=0.0 +2024-09-17 06:45:22,896 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.006e+01 9.612e+01 1.039e+02 1.179e+02 2.688e+02, threshold=2.078e+02, percent-clipped=2.0 +2024-09-17 06:45:33,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=167660.0, ans=0.125 +2024-09-17 06:45:36,427 INFO [train.py:1198] (1/2) Epoch 10, batch 1200, loss[loss=0.2676, ctc_loss=0.1612, cr_loss=0.4091, attn_decoder_loss=0.2703, over 29671.00 frames. ], tot_loss[loss=0.2677, ctc_loss=0.1743, cr_loss=0.4088, attn_decoder_loss=0.269, over 5748178.95 frames. ], batch size: 85, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:45:38,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=167700.0, ans=0.1 +2024-09-17 06:45:39,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=167700.0, ans=0.2 +2024-09-17 06:46:02,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=167740.0, ans=0.0 +2024-09-17 06:46:09,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.37 vs. limit=15.0 +2024-09-17 06:46:10,039 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:46:16,042 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:46:31,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=167820.0, ans=0.0 +2024-09-17 06:46:47,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=167860.0, ans=0.1 +2024-09-17 06:46:51,948 INFO [train.py:1198] (1/2) Epoch 10, batch 1250, loss[loss=0.2913, ctc_loss=0.1998, cr_loss=0.4382, attn_decoder_loss=0.2918, over 29544.00 frames. ], tot_loss[loss=0.268, ctc_loss=0.1743, cr_loss=0.4094, attn_decoder_loss=0.2693, over 5774584.13 frames. ], batch size: 92, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:46:58,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=167900.0, ans=0.125 +2024-09-17 06:47:11,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=167940.0, ans=0.125 +2024-09-17 06:47:23,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=167980.0, ans=0.125 +2024-09-17 06:47:48,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=168020.0, ans=0.1 +2024-09-17 06:47:49,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=168020.0, ans=0.125 +2024-09-17 06:47:56,413 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.293e+01 9.363e+01 1.028e+02 1.124e+02 2.251e+02, threshold=2.057e+02, percent-clipped=1.0 +2024-09-17 06:47:58,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=168060.0, ans=0.125 +2024-09-17 06:48:05,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=168060.0, ans=0.1 +2024-09-17 06:48:08,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=168100.0, ans=0.125 +2024-09-17 06:48:09,976 INFO [train.py:1198] (1/2) Epoch 10, batch 1300, loss[loss=0.2849, ctc_loss=0.1813, cr_loss=0.4279, attn_decoder_loss=0.2869, over 28411.00 frames. ], tot_loss[loss=0.2671, ctc_loss=0.1732, cr_loss=0.4089, attn_decoder_loss=0.2684, over 5779487.77 frames. ], batch size: 111, lr: 1.14e-02, grad_scale: 8.0 +2024-09-17 06:48:22,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=168100.0, ans=0.125 +2024-09-17 06:48:35,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=168140.0, ans=0.2 +2024-09-17 06:48:42,806 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:49:20,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=168260.0, ans=0.2 +2024-09-17 06:49:27,981 INFO [train.py:1198] (1/2) Epoch 10, batch 1350, loss[loss=0.2618, ctc_loss=0.1621, cr_loss=0.3744, attn_decoder_loss=0.2646, over 29752.00 frames. ], tot_loss[loss=0.2671, ctc_loss=0.1732, cr_loss=0.4087, attn_decoder_loss=0.2684, over 5795018.21 frames. ], batch size: 81, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:49:35,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.31 vs. limit=12.0 +2024-09-17 06:49:36,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.37 vs. limit=12.0 +2024-09-17 06:49:44,748 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:49:49,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=168340.0, ans=0.125 +2024-09-17 06:49:59,931 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.91 vs. limit=22.5 +2024-09-17 06:50:02,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=168380.0, ans=0.125 +2024-09-17 06:50:10,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.97 vs. limit=15.0 +2024-09-17 06:50:29,343 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.801e+01 9.605e+01 1.036e+02 1.132e+02 1.597e+02, threshold=2.072e+02, percent-clipped=0.0 +2024-09-17 06:50:42,759 INFO [train.py:1198] (1/2) Epoch 10, batch 1400, loss[loss=0.2304, ctc_loss=0.1448, cr_loss=0.3508, attn_decoder_loss=0.2321, over 29565.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1731, cr_loss=0.4087, attn_decoder_loss=0.2684, over 5806263.47 frames. ], batch size: 69, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:51:05,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=168540.0, ans=0.0 +2024-09-17 06:51:20,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=168580.0, ans=0.125 +2024-09-17 06:51:40,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=168620.0, ans=0.0 +2024-09-17 06:52:00,292 INFO [train.py:1198] (1/2) Epoch 10, batch 1450, loss[loss=0.2803, ctc_loss=0.1865, cr_loss=0.4393, attn_decoder_loss=0.2809, over 29461.00 frames. ], tot_loss[loss=0.2678, ctc_loss=0.1739, cr_loss=0.4099, attn_decoder_loss=0.2691, over 5803037.05 frames. ], batch size: 94, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:52:03,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=168700.0, ans=0.0 +2024-09-17 06:52:12,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=168700.0, ans=0.125 +2024-09-17 06:52:26,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=168740.0, ans=0.125 +2024-09-17 06:52:26,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.32 vs. limit=22.5 +2024-09-17 06:52:27,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=168740.0, ans=0.2 +2024-09-17 06:52:52,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=168820.0, ans=0.125 +2024-09-17 06:52:56,588 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.11 vs. limit=6.0 +2024-09-17 06:53:02,189 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.31 vs. limit=6.0 +2024-09-17 06:53:06,096 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.081e+01 9.586e+01 1.053e+02 1.129e+02 3.740e+02, threshold=2.106e+02, percent-clipped=3.0 +2024-09-17 06:53:18,281 INFO [train.py:1198] (1/2) Epoch 10, batch 1500, loss[loss=0.2741, ctc_loss=0.1791, cr_loss=0.4094, attn_decoder_loss=0.2755, over 29630.00 frames. ], tot_loss[loss=0.2684, ctc_loss=0.1742, cr_loss=0.4102, attn_decoder_loss=0.2698, over 5804479.03 frames. ], batch size: 86, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:53:34,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.63 vs. limit=22.5 +2024-09-17 06:53:37,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.18 vs. limit=22.5 +2024-09-17 06:53:45,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=168940.0, ans=0.0 +2024-09-17 06:53:45,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=168940.0, ans=0.0 +2024-09-17 06:53:49,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.98 vs. limit=12.0 +2024-09-17 06:53:55,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=168980.0, ans=0.1 +2024-09-17 06:53:56,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=168980.0, ans=0.125 +2024-09-17 06:53:58,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=168980.0, ans=0.0 +2024-09-17 06:54:00,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.39 vs. limit=22.5 +2024-09-17 06:54:02,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=169020.0, ans=0.1 +2024-09-17 06:54:34,393 INFO [train.py:1198] (1/2) Epoch 10, batch 1550, loss[loss=0.2788, ctc_loss=0.1858, cr_loss=0.4233, attn_decoder_loss=0.2798, over 29521.00 frames. ], tot_loss[loss=0.2686, ctc_loss=0.1749, cr_loss=0.4106, attn_decoder_loss=0.2699, over 5780267.15 frames. ], batch size: 90, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 06:54:39,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=169100.0, ans=0.125 +2024-09-17 06:54:43,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=169100.0, ans=0.1 +2024-09-17 06:55:00,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=169140.0, ans=0.2 +2024-09-17 06:55:11,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-17 06:55:17,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.63 vs. limit=22.5 +2024-09-17 06:55:18,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=169220.0, ans=0.125 +2024-09-17 06:55:22,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=169220.0, ans=0.125 +2024-09-17 06:55:25,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=169220.0, ans=0.125 +2024-09-17 06:55:26,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.58 vs. limit=15.0 +2024-09-17 06:55:41,103 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.001e+01 9.536e+01 1.067e+02 1.173e+02 2.612e+02, threshold=2.133e+02, percent-clipped=1.0 +2024-09-17 06:55:44,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=169260.0, ans=0.2 +2024-09-17 06:55:49,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=169260.0, ans=0.07 +2024-09-17 06:55:51,703 INFO [train.py:1198] (1/2) Epoch 10, batch 1600, loss[loss=0.2663, ctc_loss=0.1693, cr_loss=0.403, attn_decoder_loss=0.2681, over 29666.00 frames. ], tot_loss[loss=0.2677, ctc_loss=0.1742, cr_loss=0.4089, attn_decoder_loss=0.269, over 5763615.60 frames. ], batch size: 85, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:55:53,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=169300.0, ans=0.0 +2024-09-17 06:55:55,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.86 vs. limit=12.0 +2024-09-17 06:56:01,079 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 06:56:02,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=169300.0, ans=0.125 +2024-09-17 06:56:05,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=169340.0, ans=0.04949747468305833 +2024-09-17 06:56:58,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.22 vs. limit=15.0 +2024-09-17 06:57:09,468 INFO [train.py:1198] (1/2) Epoch 10, batch 1650, loss[loss=0.2692, ctc_loss=0.1667, cr_loss=0.3886, attn_decoder_loss=0.272, over 29709.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.1737, cr_loss=0.4076, attn_decoder_loss=0.2687, over 5757712.77 frames. ], batch size: 89, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:57:13,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=169500.0, ans=0.04949747468305833 +2024-09-17 06:57:20,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.33 vs. limit=15.0 +2024-09-17 06:57:36,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=169540.0, ans=0.125 +2024-09-17 06:57:53,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=169620.0, ans=0.125 +2024-09-17 06:58:12,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=169660.0, ans=0.125 +2024-09-17 06:58:13,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=169660.0, ans=0.1 +2024-09-17 06:58:14,706 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.189e+01 9.368e+01 9.821e+01 1.048e+02 1.434e+02, threshold=1.964e+02, percent-clipped=0.0 +2024-09-17 06:58:25,137 INFO [train.py:1198] (1/2) Epoch 10, batch 1700, loss[loss=0.2348, ctc_loss=0.1403, cr_loss=0.3623, attn_decoder_loss=0.2373, over 29525.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.173, cr_loss=0.4073, attn_decoder_loss=0.2681, over 5778835.17 frames. ], batch size: 69, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 06:58:32,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=169700.0, ans=0.125 +2024-09-17 06:59:14,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=169820.0, ans=0.125 +2024-09-17 06:59:22,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=169820.0, ans=0.0 +2024-09-17 06:59:26,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=169860.0, ans=0.1 +2024-09-17 06:59:32,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=169860.0, ans=0.125 +2024-09-17 06:59:42,516 INFO [train.py:1198] (1/2) Epoch 10, batch 1750, loss[loss=0.2327, ctc_loss=0.1461, cr_loss=0.3728, attn_decoder_loss=0.2341, over 29320.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.1726, cr_loss=0.407, attn_decoder_loss=0.2677, over 5786972.94 frames. ], batch size: 67, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 07:00:04,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=169940.0, ans=0.95 +2024-09-17 07:00:08,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=169940.0, ans=0.2 +2024-09-17 07:00:10,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=169940.0, ans=0.125 +2024-09-17 07:00:16,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=169980.0, ans=0.0 +2024-09-17 07:00:25,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=169980.0, ans=0.0 +2024-09-17 07:00:42,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=170020.0, ans=0.125 +2024-09-17 07:00:43,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=170060.0, ans=0.1 +2024-09-17 07:00:51,161 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.948e+01 9.325e+01 9.999e+01 1.093e+02 1.950e+02, threshold=2.000e+02, percent-clipped=0.0 +2024-09-17 07:01:00,147 INFO [train.py:1198] (1/2) Epoch 10, batch 1800, loss[loss=0.2683, ctc_loss=0.1698, cr_loss=0.4222, attn_decoder_loss=0.2699, over 29694.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.1728, cr_loss=0.4072, attn_decoder_loss=0.2679, over 5791119.93 frames. ], batch size: 83, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:01:00,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=170100.0, ans=0.95 +2024-09-17 07:01:02,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=170100.0, ans=0.125 +2024-09-17 07:01:19,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=12.0 +2024-09-17 07:01:21,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=170140.0, ans=0.1 +2024-09-17 07:02:09,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=170260.0, ans=0.125 +2024-09-17 07:02:15,931 INFO [train.py:1198] (1/2) Epoch 10, batch 1850, loss[loss=0.2858, ctc_loss=0.1942, cr_loss=0.439, attn_decoder_loss=0.2862, over 29614.00 frames. ], tot_loss[loss=0.2661, ctc_loss=0.1723, cr_loss=0.4062, attn_decoder_loss=0.2675, over 5798564.45 frames. ], batch size: 86, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 07:02:31,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.29 vs. limit=6.0 +2024-09-17 07:02:40,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=170340.0, ans=0.125 +2024-09-17 07:02:43,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=170340.0, ans=0.1 +2024-09-17 07:03:25,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.88 vs. limit=10.0 +2024-09-17 07:03:26,157 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.871e+01 9.382e+01 1.051e+02 1.159e+02 3.606e+02, threshold=2.101e+02, percent-clipped=3.0 +2024-09-17 07:03:33,539 INFO [train.py:1198] (1/2) Epoch 10, batch 1900, loss[loss=0.2847, ctc_loss=0.1928, cr_loss=0.4402, attn_decoder_loss=0.2851, over 29712.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1729, cr_loss=0.4074, attn_decoder_loss=0.2684, over 5805886.54 frames. ], batch size: 89, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:03:39,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=170500.0, ans=0.1 +2024-09-17 07:03:41,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=170500.0, ans=0.025 +2024-09-17 07:03:45,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=170500.0, ans=0.025 +2024-09-17 07:03:45,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=170500.0, ans=0.125 +2024-09-17 07:03:51,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=170540.0, ans=0.125 +2024-09-17 07:04:15,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=170580.0, ans=0.125 +2024-09-17 07:04:15,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=170580.0, ans=0.025 +2024-09-17 07:04:16,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=170580.0, ans=0.0 +2024-09-17 07:04:19,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=170620.0, ans=0.125 +2024-09-17 07:04:20,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.42 vs. limit=15.0 +2024-09-17 07:04:38,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=170660.0, ans=0.125 +2024-09-17 07:04:41,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=170660.0, ans=0.1 +2024-09-17 07:04:49,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.93 vs. limit=15.0 +2024-09-17 07:04:51,552 INFO [train.py:1198] (1/2) Epoch 10, batch 1950, loss[loss=0.2549, ctc_loss=0.1658, cr_loss=0.4143, attn_decoder_loss=0.2556, over 29461.00 frames. ], tot_loss[loss=0.2684, ctc_loss=0.1736, cr_loss=0.4096, attn_decoder_loss=0.2698, over 5820919.56 frames. ], batch size: 78, lr: 1.13e-02, grad_scale: 4.0 +2024-09-17 07:04:59,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=170700.0, ans=0.1 +2024-09-17 07:05:14,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=170740.0, ans=0.0 +2024-09-17 07:05:41,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=170820.0, ans=0.125 +2024-09-17 07:05:44,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.15 vs. limit=15.0 +2024-09-17 07:05:46,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.09 vs. limit=15.0 +2024-09-17 07:05:50,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.15 vs. limit=6.0 +2024-09-17 07:06:00,805 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.428e+01 1.003e+02 1.077e+02 1.161e+02 3.833e+02, threshold=2.155e+02, percent-clipped=2.0 +2024-09-17 07:06:06,881 INFO [train.py:1198] (1/2) Epoch 10, batch 2000, loss[loss=0.2412, ctc_loss=0.1553, cr_loss=0.3638, attn_decoder_loss=0.2427, over 29332.00 frames. ], tot_loss[loss=0.2691, ctc_loss=0.1746, cr_loss=0.4105, attn_decoder_loss=0.2704, over 5797978.47 frames. ], batch size: 67, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:06:22,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=170940.0, ans=0.125 +2024-09-17 07:06:25,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=170940.0, ans=0.125 +2024-09-17 07:06:50,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=171020.0, ans=0.0 +2024-09-17 07:07:14,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=171060.0, ans=0.0 +2024-09-17 07:07:24,544 INFO [train.py:1198] (1/2) Epoch 10, batch 2050, loss[loss=0.2321, ctc_loss=0.1378, cr_loss=0.3595, attn_decoder_loss=0.2346, over 29416.00 frames. ], tot_loss[loss=0.2678, ctc_loss=0.1736, cr_loss=0.409, attn_decoder_loss=0.2691, over 5790717.10 frames. ], batch size: 70, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:07:26,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=171100.0, ans=0.0 +2024-09-17 07:07:26,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=171100.0, ans=10.0 +2024-09-17 07:07:35,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=171100.0, ans=0.2 +2024-09-17 07:07:42,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=171140.0, ans=0.0 +2024-09-17 07:07:53,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=171180.0, ans=0.125 +2024-09-17 07:08:04,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=171180.0, ans=0.07 +2024-09-17 07:08:16,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=171220.0, ans=0.05 +2024-09-17 07:08:37,667 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.516e+01 1.015e+02 1.092e+02 1.956e+02, threshold=2.031e+02, percent-clipped=0.0 +2024-09-17 07:08:42,375 INFO [train.py:1198] (1/2) Epoch 10, batch 2100, loss[loss=0.2699, ctc_loss=0.1696, cr_loss=0.4255, attn_decoder_loss=0.2716, over 29742.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.173, cr_loss=0.4091, attn_decoder_loss=0.2686, over 5801797.66 frames. ], batch size: 81, lr: 1.13e-02, grad_scale: 8.0 +2024-09-17 07:08:50,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.14 vs. limit=10.0 +2024-09-17 07:09:07,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=171340.0, ans=0.125 +2024-09-17 07:09:09,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=171340.0, ans=0.2 +2024-09-17 07:09:15,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=171380.0, ans=0.0 +2024-09-17 07:09:20,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=171380.0, ans=0.125 +2024-09-17 07:09:26,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=171420.0, ans=0.0 +2024-09-17 07:09:38,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=171420.0, ans=0.125 +2024-09-17 07:09:46,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.89 vs. limit=15.0 +2024-09-17 07:09:56,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=171500.0, ans=0.125 +2024-09-17 07:09:57,458 INFO [train.py:1198] (1/2) Epoch 10, batch 2150, loss[loss=0.2614, ctc_loss=0.1813, cr_loss=0.4212, attn_decoder_loss=0.261, over 29453.00 frames. ], tot_loss[loss=0.2663, ctc_loss=0.172, cr_loss=0.407, attn_decoder_loss=0.2677, over 5815709.88 frames. ], batch size: 78, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:09:59,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.72 vs. limit=15.0 +2024-09-17 07:10:00,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=171500.0, ans=0.0 +2024-09-17 07:10:03,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=171500.0, ans=0.1 +2024-09-17 07:10:46,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=171620.0, ans=0.125 +2024-09-17 07:11:11,937 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.141e+01 9.514e+01 1.011e+02 1.070e+02 3.193e+02, threshold=2.022e+02, percent-clipped=1.0 +2024-09-17 07:11:15,098 INFO [train.py:1198] (1/2) Epoch 10, batch 2200, loss[loss=0.256, ctc_loss=0.1526, cr_loss=0.3716, attn_decoder_loss=0.2592, over 29641.00 frames. ], tot_loss[loss=0.2662, ctc_loss=0.172, cr_loss=0.4077, attn_decoder_loss=0.2676, over 5812581.93 frames. ], batch size: 86, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:11:33,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=171740.0, ans=0.125 +2024-09-17 07:11:36,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=171740.0, ans=0.125 +2024-09-17 07:11:48,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=171780.0, ans=0.025 +2024-09-17 07:12:01,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.33 vs. limit=22.5 +2024-09-17 07:12:08,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=171820.0, ans=0.1 +2024-09-17 07:12:31,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=171900.0, ans=0.025 +2024-09-17 07:12:32,671 INFO [train.py:1198] (1/2) Epoch 10, batch 2250, loss[loss=0.2848, ctc_loss=0.1871, cr_loss=0.4341, attn_decoder_loss=0.286, over 29720.00 frames. ], tot_loss[loss=0.2663, ctc_loss=0.1717, cr_loss=0.4074, attn_decoder_loss=0.2677, over 5811595.00 frames. ], batch size: 82, lr: 1.12e-02, grad_scale: 4.0 +2024-09-17 07:12:40,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=171900.0, ans=0.125 +2024-09-17 07:12:50,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.57 vs. limit=15.0 +2024-09-17 07:13:03,107 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:13:21,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=172020.0, ans=0.025 +2024-09-17 07:13:24,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=172020.0, ans=0.2 +2024-09-17 07:13:33,752 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:13:45,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=172060.0, ans=0.1 +2024-09-17 07:13:46,988 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.071e+01 9.786e+01 1.069e+02 1.181e+02 2.871e+02, threshold=2.139e+02, percent-clipped=1.0 +2024-09-17 07:13:48,472 INFO [train.py:1198] (1/2) Epoch 10, batch 2300, loss[loss=0.2506, ctc_loss=0.1583, cr_loss=0.3916, attn_decoder_loss=0.2521, over 29337.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1709, cr_loss=0.4057, attn_decoder_loss=0.2666, over 5799718.72 frames. ], batch size: 71, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:13:59,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=172100.0, ans=0.0 +2024-09-17 07:14:18,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=172180.0, ans=0.125 +2024-09-17 07:15:01,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=172260.0, ans=0.125 +2024-09-17 07:15:05,738 INFO [train.py:1198] (1/2) Epoch 10, batch 2350, loss[loss=0.2767, ctc_loss=0.1782, cr_loss=0.4176, attn_decoder_loss=0.2784, over 29697.00 frames. ], tot_loss[loss=0.2654, ctc_loss=0.1713, cr_loss=0.4068, attn_decoder_loss=0.2669, over 5805206.50 frames. ], batch size: 83, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:15:14,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=172300.0, ans=0.0 +2024-09-17 07:15:28,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=172340.0, ans=0.0 +2024-09-17 07:16:00,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=172420.0, ans=0.1 +2024-09-17 07:16:04,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=172460.0, ans=0.0 +2024-09-17 07:16:07,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=172460.0, ans=0.125 +2024-09-17 07:16:21,796 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.951e+01 9.247e+01 1.012e+02 1.078e+02 1.616e+02, threshold=2.023e+02, percent-clipped=0.0 +2024-09-17 07:16:23,395 INFO [train.py:1198] (1/2) Epoch 10, batch 2400, loss[loss=0.2446, ctc_loss=0.1478, cr_loss=0.3746, attn_decoder_loss=0.2471, over 29539.00 frames. ], tot_loss[loss=0.2656, ctc_loss=0.1713, cr_loss=0.4073, attn_decoder_loss=0.2671, over 5808922.33 frames. ], batch size: 76, lr: 1.12e-02, grad_scale: 16.0 +2024-09-17 07:16:46,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=172540.0, ans=0.125 +2024-09-17 07:16:51,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=172540.0, ans=0.125 +2024-09-17 07:16:55,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=172580.0, ans=0.125 +2024-09-17 07:17:10,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=172620.0, ans=0.1 +2024-09-17 07:17:18,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.whiten.whitening_limit, batch_count=172620.0, ans=12.0 +2024-09-17 07:17:35,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.16 vs. limit=15.0 +2024-09-17 07:17:39,182 INFO [train.py:1198] (1/2) Epoch 10, batch 2450, loss[loss=0.2767, ctc_loss=0.1812, cr_loss=0.4299, attn_decoder_loss=0.2778, over 29695.00 frames. ], tot_loss[loss=0.2666, ctc_loss=0.1724, cr_loss=0.4085, attn_decoder_loss=0.268, over 5787679.01 frames. ], batch size: 82, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:17:43,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=172700.0, ans=0.2 +2024-09-17 07:17:46,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=172700.0, ans=0.1 +2024-09-17 07:17:51,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=172700.0, ans=0.95 +2024-09-17 07:18:06,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=172740.0, ans=0.125 +2024-09-17 07:18:12,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=172780.0, ans=0.125 +2024-09-17 07:18:32,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=172820.0, ans=0.125 +2024-09-17 07:18:33,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.00 vs. limit=12.0 +2024-09-17 07:18:34,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=172820.0, ans=0.0 +2024-09-17 07:18:44,942 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=14.02 vs. limit=15.0 +2024-09-17 07:18:57,666 INFO [train.py:1198] (1/2) Epoch 10, batch 2500, loss[loss=0.2811, ctc_loss=0.1799, cr_loss=0.4183, attn_decoder_loss=0.2831, over 29643.00 frames. ], tot_loss[loss=0.2662, ctc_loss=0.1718, cr_loss=0.4082, attn_decoder_loss=0.2677, over 5796913.22 frames. ], batch size: 86, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:18:59,189 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.164e+01 9.501e+01 9.966e+01 1.113e+02 2.388e+02, threshold=1.993e+02, percent-clipped=1.0 +2024-09-17 07:18:59,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=172900.0, ans=0.0 +2024-09-17 07:19:05,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=172900.0, ans=0.0 +2024-09-17 07:19:16,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=172940.0, ans=0.125 +2024-09-17 07:19:20,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=172940.0, ans=0.0 +2024-09-17 07:19:26,827 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.21 vs. limit=22.5 +2024-09-17 07:19:31,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=172980.0, ans=0.2 +2024-09-17 07:19:40,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=172980.0, ans=0.125 +2024-09-17 07:20:15,466 INFO [train.py:1198] (1/2) Epoch 10, batch 2550, loss[loss=0.235, ctc_loss=0.1428, cr_loss=0.3513, attn_decoder_loss=0.2375, over 29349.00 frames. ], tot_loss[loss=0.2661, ctc_loss=0.1717, cr_loss=0.4081, attn_decoder_loss=0.2676, over 5800012.72 frames. ], batch size: 67, lr: 1.12e-02, grad_scale: 4.0 +2024-09-17 07:20:24,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=173100.0, ans=10.0 +2024-09-17 07:20:36,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=173140.0, ans=0.1 +2024-09-17 07:20:42,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=173140.0, ans=0.125 +2024-09-17 07:20:56,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=173180.0, ans=0.125 +2024-09-17 07:21:14,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=173260.0, ans=0.125 +2024-09-17 07:21:20,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=173260.0, ans=0.2 +2024-09-17 07:21:24,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.43 vs. limit=10.0 +2024-09-17 07:21:27,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=173260.0, ans=0.0 +2024-09-17 07:21:30,616 INFO [train.py:1198] (1/2) Epoch 10, batch 2600, loss[loss=0.2563, ctc_loss=0.1608, cr_loss=0.3915, attn_decoder_loss=0.2582, over 29425.00 frames. ], tot_loss[loss=0.2667, ctc_loss=0.172, cr_loss=0.4085, attn_decoder_loss=0.2682, over 5796088.88 frames. ], batch size: 78, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:21:33,520 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.476e+01 9.594e+01 1.032e+02 1.139e+02 3.672e+02, threshold=2.065e+02, percent-clipped=4.0 +2024-09-17 07:21:36,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=173300.0, ans=0.0 +2024-09-17 07:21:45,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=173340.0, ans=0.125 +2024-09-17 07:22:02,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=173380.0, ans=0.05 +2024-09-17 07:22:08,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=173380.0, ans=0.0 +2024-09-17 07:22:14,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.02 vs. limit=10.0 +2024-09-17 07:22:17,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=173420.0, ans=0.125 +2024-09-17 07:22:33,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=173460.0, ans=0.125 +2024-09-17 07:22:45,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=173460.0, ans=0.0 +2024-09-17 07:22:47,843 INFO [train.py:1198] (1/2) Epoch 10, batch 2650, loss[loss=0.2842, ctc_loss=0.1848, cr_loss=0.4075, attn_decoder_loss=0.2861, over 29328.00 frames. ], tot_loss[loss=0.2673, ctc_loss=0.1726, cr_loss=0.409, attn_decoder_loss=0.2687, over 5802258.91 frames. ], batch size: 100, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:22:51,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.02 vs. limit=15.0 +2024-09-17 07:22:52,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=173500.0, ans=0.125 +2024-09-17 07:22:58,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=173500.0, ans=0.125 +2024-09-17 07:22:58,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=173500.0, ans=0.025 +2024-09-17 07:23:00,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=173500.0, ans=0.2 +2024-09-17 07:23:00,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=173500.0, ans=0.0 +2024-09-17 07:23:06,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=173540.0, ans=0.125 +2024-09-17 07:23:11,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=13.49 vs. limit=15.0 +2024-09-17 07:23:28,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=173580.0, ans=0.2 +2024-09-17 07:24:03,243 INFO [train.py:1198] (1/2) Epoch 10, batch 2700, loss[loss=0.2721, ctc_loss=0.1707, cr_loss=0.4097, attn_decoder_loss=0.2743, over 29527.00 frames. ], tot_loss[loss=0.2671, ctc_loss=0.1726, cr_loss=0.4086, attn_decoder_loss=0.2685, over 5798586.31 frames. ], batch size: 87, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:24:08,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.951e+01 9.630e+01 1.023e+02 1.091e+02 1.557e+02, threshold=2.045e+02, percent-clipped=0.0 +2024-09-17 07:24:20,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=173740.0, ans=0.0 +2024-09-17 07:24:33,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.01 vs. limit=22.5 +2024-09-17 07:24:40,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=173780.0, ans=0.0 +2024-09-17 07:24:51,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=173820.0, ans=0.125 +2024-09-17 07:25:21,531 INFO [train.py:1198] (1/2) Epoch 10, batch 2750, loss[loss=0.2648, ctc_loss=0.1701, cr_loss=0.4362, attn_decoder_loss=0.2657, over 29519.00 frames. ], tot_loss[loss=0.266, ctc_loss=0.1717, cr_loss=0.4077, attn_decoder_loss=0.2674, over 5796601.82 frames. ], batch size: 75, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:25:26,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=173900.0, ans=0.125 +2024-09-17 07:25:28,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=173900.0, ans=0.07 +2024-09-17 07:25:37,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=173940.0, ans=0.1 +2024-09-17 07:25:40,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=173940.0, ans=0.5 +2024-09-17 07:25:47,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=173940.0, ans=0.09899494936611666 +2024-09-17 07:25:49,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=173940.0, ans=0.125 +2024-09-17 07:25:52,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.17 vs. limit=15.0 +2024-09-17 07:25:57,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=173980.0, ans=0.0 +2024-09-17 07:25:57,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=173980.0, ans=0.125 +2024-09-17 07:25:59,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=173980.0, ans=0.0 +2024-09-17 07:26:03,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=173980.0, ans=0.07 +2024-09-17 07:26:19,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=174020.0, ans=0.125 +2024-09-17 07:26:19,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-17 07:26:31,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=174060.0, ans=0.1 +2024-09-17 07:26:39,175 INFO [train.py:1198] (1/2) Epoch 10, batch 2800, loss[loss=0.308, ctc_loss=0.2337, cr_loss=0.4241, attn_decoder_loss=0.3068, over 20430.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.1723, cr_loss=0.4083, attn_decoder_loss=0.2678, over 5777796.80 frames. ], batch size: 211, lr: 1.12e-02, grad_scale: 16.0 +2024-09-17 07:26:42,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=174100.0, ans=0.2 +2024-09-17 07:26:43,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.595e+01 1.029e+02 1.148e+02 1.291e+02 2.335e+02, threshold=2.295e+02, percent-clipped=2.0 +2024-09-17 07:27:02,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.51 vs. limit=15.0 +2024-09-17 07:27:32,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=174220.0, ans=0.125 +2024-09-17 07:27:47,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=174260.0, ans=0.125 +2024-09-17 07:27:54,278 INFO [train.py:1198] (1/2) Epoch 10, batch 2850, loss[loss=0.2553, ctc_loss=0.173, cr_loss=0.3978, attn_decoder_loss=0.2556, over 29521.00 frames. ], tot_loss[loss=0.2672, ctc_loss=0.1733, cr_loss=0.4086, attn_decoder_loss=0.2686, over 5762432.17 frames. ], batch size: 77, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:27:58,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=174300.0, ans=0.125 +2024-09-17 07:28:17,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=174340.0, ans=0.125 +2024-09-17 07:28:23,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=174340.0, ans=0.1 +2024-09-17 07:28:24,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.50 vs. limit=22.5 +2024-09-17 07:28:33,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=174380.0, ans=10.0 +2024-09-17 07:29:03,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=174460.0, ans=0.1 +2024-09-17 07:29:12,373 INFO [train.py:1198] (1/2) Epoch 10, batch 2900, loss[loss=0.2511, ctc_loss=0.151, cr_loss=0.3818, attn_decoder_loss=0.2537, over 29429.00 frames. ], tot_loss[loss=0.2679, ctc_loss=0.1731, cr_loss=0.4097, attn_decoder_loss=0.2693, over 5788283.80 frames. ], batch size: 79, lr: 1.12e-02, grad_scale: 8.0 +2024-09-17 07:29:14,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=174500.0, ans=0.125 +2024-09-17 07:29:15,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=174500.0, ans=0.0 +2024-09-17 07:29:18,303 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.890e+01 9.394e+01 1.011e+02 1.079e+02 3.902e+02, threshold=2.022e+02, percent-clipped=2.0 +2024-09-17 07:29:31,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=30.26 vs. limit=22.5 +2024-09-17 07:29:33,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=174540.0, ans=0.2 +2024-09-17 07:29:40,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=174540.0, ans=0.025 +2024-09-17 07:29:43,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=174580.0, ans=0.0 +2024-09-17 07:29:44,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=174580.0, ans=0.1 +2024-09-17 07:30:19,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=174660.0, ans=0.1 +2024-09-17 07:30:21,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=174660.0, ans=0.1 +2024-09-17 07:30:30,072 INFO [train.py:1198] (1/2) Epoch 10, batch 2950, loss[loss=0.2522, ctc_loss=0.1587, cr_loss=0.3832, attn_decoder_loss=0.2541, over 29513.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.172, cr_loss=0.4075, attn_decoder_loss=0.268, over 5782025.69 frames. ], batch size: 75, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:30:31,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.37 vs. limit=15.0 +2024-09-17 07:30:52,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.20 vs. limit=10.0 +2024-09-17 07:31:19,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=174820.0, ans=0.025 +2024-09-17 07:31:44,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.36 vs. limit=15.0 +2024-09-17 07:31:46,170 INFO [train.py:1198] (1/2) Epoch 10, batch 3000, loss[loss=0.2674, ctc_loss=0.1736, cr_loss=0.4124, attn_decoder_loss=0.2687, over 29751.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.172, cr_loss=0.4076, attn_decoder_loss=0.268, over 5783398.69 frames. ], batch size: 81, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:31:46,171 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 07:32:05,337 INFO [train.py:1230] (1/2) Epoch 10, validation: loss=0.2137, ctc_loss=0.04855, cr_loss=4.713e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-17 07:32:05,338 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 07:32:14,593 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.331e+01 9.561e+01 1.037e+02 1.121e+02 2.530e+02, threshold=2.075e+02, percent-clipped=2.0 +2024-09-17 07:32:31,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=174940.0, ans=0.0 +2024-09-17 07:32:38,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=174980.0, ans=0.125 +2024-09-17 07:32:51,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=175020.0, ans=0.0 +2024-09-17 07:32:54,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=12.0 +2024-09-17 07:32:58,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=175020.0, ans=0.0 +2024-09-17 07:33:07,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=175060.0, ans=0.0 +2024-09-17 07:33:11,974 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:33:20,909 INFO [train.py:1198] (1/2) Epoch 10, batch 3050, loss[loss=0.2583, ctc_loss=0.1677, cr_loss=0.4123, attn_decoder_loss=0.2592, over 29516.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1732, cr_loss=0.4101, attn_decoder_loss=0.2688, over 5776939.34 frames. ], batch size: 76, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:33:21,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=175100.0, ans=0.2 +2024-09-17 07:33:35,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=175140.0, ans=0.1 +2024-09-17 07:33:58,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=175180.0, ans=0.04949747468305833 +2024-09-17 07:34:16,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.45 vs. limit=15.0 +2024-09-17 07:34:27,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=175260.0, ans=0.2 +2024-09-17 07:34:31,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=175260.0, ans=0.09899494936611666 +2024-09-17 07:34:36,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=175260.0, ans=0.07 +2024-09-17 07:34:39,151 INFO [train.py:1198] (1/2) Epoch 10, batch 3100, loss[loss=0.2922, ctc_loss=0.2042, cr_loss=0.4705, attn_decoder_loss=0.2915, over 29249.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1727, cr_loss=0.4091, attn_decoder_loss=0.2684, over 5776896.17 frames. ], batch size: 100, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:34:48,268 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.394e+01 9.542e+01 1.021e+02 1.174e+02 1.946e+02, threshold=2.041e+02, percent-clipped=0.0 +2024-09-17 07:34:51,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=175300.0, ans=0.1 +2024-09-17 07:34:52,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.14 vs. limit=15.0 +2024-09-17 07:35:44,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=175460.0, ans=0.2 +2024-09-17 07:35:52,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=175460.0, ans=0.125 +2024-09-17 07:35:53,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.89 vs. limit=10.0 +2024-09-17 07:35:57,191 INFO [train.py:1198] (1/2) Epoch 10, batch 3150, loss[loss=0.2865, ctc_loss=0.1849, cr_loss=0.4372, attn_decoder_loss=0.288, over 28886.00 frames. ], tot_loss[loss=0.2669, ctc_loss=0.1724, cr_loss=0.4084, attn_decoder_loss=0.2683, over 5782204.40 frames. ], batch size: 104, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:36:15,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=175540.0, ans=0.0 +2024-09-17 07:36:21,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=175540.0, ans=0.0 +2024-09-17 07:36:24,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer_ff2.min_abs, batch_count=175540.0, ans=0.1 +2024-09-17 07:36:29,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.24 vs. limit=15.0 +2024-09-17 07:36:41,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.36 vs. limit=15.0 +2024-09-17 07:37:06,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=175660.0, ans=0.125 +2024-09-17 07:37:12,407 INFO [train.py:1198] (1/2) Epoch 10, batch 3200, loss[loss=0.2718, ctc_loss=0.1811, cr_loss=0.4361, attn_decoder_loss=0.2722, over 29420.00 frames. ], tot_loss[loss=0.2662, ctc_loss=0.1717, cr_loss=0.4079, attn_decoder_loss=0.2676, over 5791796.29 frames. ], batch size: 79, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:37:24,409 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.118e+01 9.421e+01 9.970e+01 1.120e+02 1.872e+02, threshold=1.994e+02, percent-clipped=0.0 +2024-09-17 07:37:37,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=175740.0, ans=0.025 +2024-09-17 07:37:43,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=175780.0, ans=0.0 +2024-09-17 07:37:52,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=175780.0, ans=10.0 +2024-09-17 07:37:54,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=175780.0, ans=0.125 +2024-09-17 07:37:55,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=175780.0, ans=0.0 +2024-09-17 07:38:17,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.54 vs. limit=22.5 +2024-09-17 07:38:30,165 INFO [train.py:1198] (1/2) Epoch 10, batch 3250, loss[loss=0.2771, ctc_loss=0.1855, cr_loss=0.4442, attn_decoder_loss=0.2774, over 29689.00 frames. ], tot_loss[loss=0.2668, ctc_loss=0.1721, cr_loss=0.4089, attn_decoder_loss=0.2682, over 5798572.66 frames. ], batch size: 84, lr: 1.11e-02, grad_scale: 4.0 +2024-09-17 07:38:33,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=175900.0, ans=0.2 +2024-09-17 07:38:39,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=175900.0, ans=0.125 +2024-09-17 07:38:48,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=175940.0, ans=0.125 +2024-09-17 07:38:50,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=175940.0, ans=0.0 +2024-09-17 07:38:57,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=175940.0, ans=0.1 +2024-09-17 07:39:48,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=176060.0, ans=0.0 +2024-09-17 07:39:54,684 INFO [train.py:1198] (1/2) Epoch 10, batch 3300, loss[loss=0.2743, ctc_loss=0.1732, cr_loss=0.4103, attn_decoder_loss=0.2764, over 28224.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1707, cr_loss=0.4067, attn_decoder_loss=0.2666, over 5795791.41 frames. ], batch size: 111, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:40:06,780 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.780e+01 9.364e+01 1.005e+02 1.120e+02 3.139e+02, threshold=2.009e+02, percent-clipped=4.0 +2024-09-17 07:40:26,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=176180.0, ans=0.125 +2024-09-17 07:40:29,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=176180.0, ans=0.125 +2024-09-17 07:40:53,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=176260.0, ans=0.1 +2024-09-17 07:41:04,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=176260.0, ans=0.95 +2024-09-17 07:41:09,879 INFO [train.py:1198] (1/2) Epoch 10, batch 3350, loss[loss=0.2804, ctc_loss=0.1829, cr_loss=0.4227, attn_decoder_loss=0.2819, over 28885.00 frames. ], tot_loss[loss=0.266, ctc_loss=0.1718, cr_loss=0.4078, attn_decoder_loss=0.2674, over 5772120.76 frames. ], batch size: 104, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:41:14,873 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:41:23,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=176340.0, ans=0.0 +2024-09-17 07:41:28,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=176340.0, ans=0.125 +2024-09-17 07:41:28,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=176340.0, ans=0.125 +2024-09-17 07:41:31,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=176340.0, ans=0.0 +2024-09-17 07:41:45,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=176380.0, ans=0.0 +2024-09-17 07:41:46,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=176380.0, ans=0.0 +2024-09-17 07:42:00,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.32 vs. limit=15.0 +2024-09-17 07:42:06,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=176420.0, ans=0.125 +2024-09-17 07:42:19,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=176460.0, ans=0.125 +2024-09-17 07:42:27,210 INFO [train.py:1198] (1/2) Epoch 10, batch 3400, loss[loss=0.2318, ctc_loss=0.1392, cr_loss=0.3345, attn_decoder_loss=0.2346, over 29337.00 frames. ], tot_loss[loss=0.2661, ctc_loss=0.1721, cr_loss=0.4083, attn_decoder_loss=0.2675, over 5764395.54 frames. ], batch size: 67, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:42:30,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=176500.0, ans=0.07 +2024-09-17 07:42:33,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.89 vs. limit=10.0 +2024-09-17 07:42:34,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=176500.0, ans=0.0 +2024-09-17 07:42:39,316 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.353e+01 9.300e+01 1.006e+02 1.112e+02 2.316e+02, threshold=2.013e+02, percent-clipped=1.0 +2024-09-17 07:42:48,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=176540.0, ans=0.025 +2024-09-17 07:42:54,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=176540.0, ans=0.07 +2024-09-17 07:42:59,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=176580.0, ans=0.0 +2024-09-17 07:43:02,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=176580.0, ans=0.125 +2024-09-17 07:43:14,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=176620.0, ans=0.125 +2024-09-17 07:43:29,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=176660.0, ans=0.125 +2024-09-17 07:43:35,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=176660.0, ans=0.125 +2024-09-17 07:43:44,828 INFO [train.py:1198] (1/2) Epoch 10, batch 3450, loss[loss=0.2731, ctc_loss=0.1741, cr_loss=0.3789, attn_decoder_loss=0.2756, over 28332.00 frames. ], tot_loss[loss=0.2664, ctc_loss=0.1722, cr_loss=0.4084, attn_decoder_loss=0.2678, over 5773051.05 frames. ], batch size: 111, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:43:51,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=176700.0, ans=0.025 +2024-09-17 07:43:51,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=176700.0, ans=0.0 +2024-09-17 07:44:04,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=176740.0, ans=0.025 +2024-09-17 07:44:07,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=176740.0, ans=0.125 +2024-09-17 07:44:32,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=176820.0, ans=0.125 +2024-09-17 07:44:40,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=176820.0, ans=15.0 +2024-09-17 07:44:49,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=176860.0, ans=0.025 +2024-09-17 07:45:00,784 INFO [train.py:1198] (1/2) Epoch 10, batch 3500, loss[loss=0.2388, ctc_loss=0.1473, cr_loss=0.3769, attn_decoder_loss=0.2406, over 29325.00 frames. ], tot_loss[loss=0.2659, ctc_loss=0.1719, cr_loss=0.4074, attn_decoder_loss=0.2673, over 5774958.14 frames. ], batch size: 71, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:45:07,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=176900.0, ans=0.125 +2024-09-17 07:45:11,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=176900.0, ans=0.09899494936611666 +2024-09-17 07:45:12,849 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.066e+01 9.560e+01 1.051e+02 1.170e+02 3.242e+02, threshold=2.102e+02, percent-clipped=4.0 +2024-09-17 07:45:23,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=176940.0, ans=0.025 +2024-09-17 07:45:26,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=176940.0, ans=0.025 +2024-09-17 07:45:29,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=176980.0, ans=0.05 +2024-09-17 07:45:30,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=176980.0, ans=0.1 +2024-09-17 07:45:32,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=176980.0, ans=0.125 +2024-09-17 07:46:08,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=177060.0, ans=0.1 +2024-09-17 07:46:17,481 INFO [train.py:1198] (1/2) Epoch 10, batch 3550, loss[loss=0.2724, ctc_loss=0.1705, cr_loss=0.4193, attn_decoder_loss=0.2744, over 29701.00 frames. ], tot_loss[loss=0.2654, ctc_loss=0.1711, cr_loss=0.4067, attn_decoder_loss=0.2669, over 5782597.60 frames. ], batch size: 89, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:46:24,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.28 vs. limit=10.0 +2024-09-17 07:46:26,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=177100.0, ans=0.125 +2024-09-17 07:46:31,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=177140.0, ans=0.125 +2024-09-17 07:46:39,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=177140.0, ans=0.1 +2024-09-17 07:46:44,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=177140.0, ans=0.125 +2024-09-17 07:47:08,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=177220.0, ans=0.125 +2024-09-17 07:47:23,532 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.93 vs. limit=15.0 +2024-09-17 07:47:31,489 INFO [train.py:1198] (1/2) Epoch 10, batch 3600, loss[loss=0.2544, ctc_loss=0.1602, cr_loss=0.3855, attn_decoder_loss=0.2563, over 29501.00 frames. ], tot_loss[loss=0.2657, ctc_loss=0.1714, cr_loss=0.4077, attn_decoder_loss=0.2671, over 5791413.26 frames. ], batch size: 77, lr: 1.11e-02, grad_scale: 16.0 +2024-09-17 07:47:31,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=177300.0, ans=0.125 +2024-09-17 07:47:38,328 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.24 vs. limit=15.0 +2024-09-17 07:47:44,996 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 9.296e+01 9.828e+01 1.086e+02 1.804e+02, threshold=1.966e+02, percent-clipped=0.0 +2024-09-17 07:47:53,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=177340.0, ans=0.125 +2024-09-17 07:47:54,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=177340.0, ans=0.0 +2024-09-17 07:47:56,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.56 vs. limit=15.0 +2024-09-17 07:48:01,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=177380.0, ans=0.0 +2024-09-17 07:48:31,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=177460.0, ans=0.125 +2024-09-17 07:48:37,319 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 07:48:41,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=177460.0, ans=0.125 +2024-09-17 07:48:41,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=177460.0, ans=0.0 +2024-09-17 07:48:45,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.85 vs. limit=6.0 +2024-09-17 07:48:45,924 INFO [train.py:1198] (1/2) Epoch 10, batch 3650, loss[loss=0.2825, ctc_loss=0.1866, cr_loss=0.436, attn_decoder_loss=0.2835, over 29519.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1708, cr_loss=0.4069, attn_decoder_loss=0.2667, over 5794887.10 frames. ], batch size: 90, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:49:15,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=177540.0, ans=0.1 +2024-09-17 07:49:27,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=177580.0, ans=0.0 +2024-09-17 07:49:48,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=177660.0, ans=0.2 +2024-09-17 07:49:51,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=177660.0, ans=0.09899494936611666 +2024-09-17 07:50:03,437 INFO [train.py:1198] (1/2) Epoch 10, batch 3700, loss[loss=0.2774, ctc_loss=0.1794, cr_loss=0.4303, attn_decoder_loss=0.2788, over 29708.00 frames. ], tot_loss[loss=0.2657, ctc_loss=0.171, cr_loss=0.4076, attn_decoder_loss=0.2672, over 5805087.84 frames. ], batch size: 84, lr: 1.11e-02, grad_scale: 8.0 +2024-09-17 07:50:05,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=177700.0, ans=0.125 +2024-09-17 07:50:11,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=177700.0, ans=0.0 +2024-09-17 07:50:16,866 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 9.275e+01 9.841e+01 1.076e+02 3.002e+02, threshold=1.968e+02, percent-clipped=1.0 +2024-09-17 07:50:18,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=177740.0, ans=0.2 +2024-09-17 07:50:36,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=177780.0, ans=0.0 +2024-09-17 07:50:37,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=177780.0, ans=0.125 +2024-09-17 07:50:41,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.95 vs. limit=15.0 +2024-09-17 07:50:45,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=177780.0, ans=0.2 +2024-09-17 07:50:49,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=177820.0, ans=0.125 +2024-09-17 07:50:52,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=177820.0, ans=0.0 +2024-09-17 07:51:17,406 INFO [train.py:1198] (1/2) Epoch 10, batch 3750, loss[loss=0.2383, ctc_loss=0.1473, cr_loss=0.3606, attn_decoder_loss=0.2404, over 29313.00 frames. ], tot_loss[loss=0.2655, ctc_loss=0.1705, cr_loss=0.4064, attn_decoder_loss=0.267, over 5808613.38 frames. ], batch size: 67, lr: 1.10e-02, grad_scale: 4.0 +2024-09-17 07:51:20,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=177900.0, ans=0.0 +2024-09-17 07:51:28,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=177900.0, ans=0.125 +2024-09-17 07:51:38,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=177940.0, ans=0.0 +2024-09-17 07:51:40,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=177940.0, ans=0.0 +2024-09-17 07:51:41,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=177940.0, ans=0.1 +2024-09-17 07:51:46,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=177980.0, ans=0.125 +2024-09-17 07:51:56,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=177980.0, ans=0.0 +2024-09-17 07:52:02,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=178020.0, ans=0.125 +2024-09-17 07:52:10,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=178020.0, ans=0.125 +2024-09-17 07:52:26,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=178060.0, ans=0.0 +2024-09-17 07:52:33,644 INFO [train.py:1198] (1/2) Epoch 10, batch 3800, loss[loss=0.2776, ctc_loss=0.1788, cr_loss=0.4378, attn_decoder_loss=0.2788, over 29618.00 frames. ], tot_loss[loss=0.2656, ctc_loss=0.1708, cr_loss=0.4069, attn_decoder_loss=0.267, over 5798302.90 frames. ], batch size: 86, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:52:48,523 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.594e+01 1.015e+02 1.096e+02 4.461e+02, threshold=2.030e+02, percent-clipped=1.0 +2024-09-17 07:53:09,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=178180.0, ans=0.95 +2024-09-17 07:53:48,160 INFO [train.py:1198] (1/2) Epoch 10, batch 3850, loss[loss=0.2811, ctc_loss=0.1847, cr_loss=0.4204, attn_decoder_loss=0.2824, over 29267.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1704, cr_loss=0.4071, attn_decoder_loss=0.2666, over 5812093.64 frames. ], batch size: 100, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:54:03,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=178340.0, ans=0.125 +2024-09-17 07:54:07,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=178340.0, ans=0.2 +2024-09-17 07:54:10,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=178340.0, ans=0.0 +2024-09-17 07:54:14,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.50 vs. limit=15.0 +2024-09-17 07:54:31,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=178420.0, ans=0.0 +2024-09-17 07:54:38,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.25 vs. limit=12.0 +2024-09-17 07:54:45,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.81 vs. limit=22.5 +2024-09-17 07:54:48,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=178460.0, ans=0.125 +2024-09-17 07:54:57,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=178460.0, ans=0.125 +2024-09-17 07:54:58,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=178460.0, ans=0.0 +2024-09-17 07:55:04,011 INFO [train.py:1198] (1/2) Epoch 10, batch 3900, loss[loss=0.2697, ctc_loss=0.171, cr_loss=0.4147, attn_decoder_loss=0.2715, over 29634.00 frames. ], tot_loss[loss=0.2656, ctc_loss=0.1706, cr_loss=0.4077, attn_decoder_loss=0.2671, over 5816280.21 frames. ], batch size: 86, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:55:07,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=178500.0, ans=0.125 +2024-09-17 07:55:13,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=178500.0, ans=0.0 +2024-09-17 07:55:17,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=178540.0, ans=0.125 +2024-09-17 07:55:20,367 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.621e+01 1.032e+02 1.104e+02 1.342e+02, threshold=2.064e+02, percent-clipped=0.0 +2024-09-17 07:55:25,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=178540.0, ans=0.07 +2024-09-17 07:55:28,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-17 07:55:46,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=178580.0, ans=0.125 +2024-09-17 07:56:18,294 INFO [train.py:1198] (1/2) Epoch 10, batch 3950, loss[loss=0.2841, ctc_loss=0.1821, cr_loss=0.4238, attn_decoder_loss=0.2861, over 29433.00 frames. ], tot_loss[loss=0.2653, ctc_loss=0.17, cr_loss=0.4067, attn_decoder_loss=0.2668, over 5836318.59 frames. ], batch size: 97, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:56:28,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=178700.0, ans=0.1 +2024-09-17 07:56:37,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=178740.0, ans=0.125 +2024-09-17 07:56:46,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=178780.0, ans=0.2 +2024-09-17 07:56:48,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=178780.0, ans=0.125 +2024-09-17 07:56:58,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=178780.0, ans=0.1 +2024-09-17 07:57:19,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.20 vs. limit=6.0 +2024-09-17 07:57:33,103 INFO [train.py:1198] (1/2) Epoch 10, batch 4000, loss[loss=0.2462, ctc_loss=0.1464, cr_loss=0.3637, attn_decoder_loss=0.2492, over 29522.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1701, cr_loss=0.4062, attn_decoder_loss=0.2668, over 5813538.08 frames. ], batch size: 74, lr: 1.10e-02, grad_scale: 16.0 +2024-09-17 07:57:34,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=178900.0, ans=0.0 +2024-09-17 07:57:39,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=178900.0, ans=0.0 +2024-09-17 07:57:50,420 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 9.319e+01 1.030e+02 1.152e+02 2.635e+02, threshold=2.059e+02, percent-clipped=1.0 +2024-09-17 07:57:55,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=178940.0, ans=0.5 +2024-09-17 07:58:31,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.13 vs. limit=10.0 +2024-09-17 07:58:32,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=179060.0, ans=0.125 +2024-09-17 07:58:47,046 INFO [train.py:1198] (1/2) Epoch 10, batch 4050, loss[loss=0.3008, ctc_loss=0.2307, cr_loss=0.4624, attn_decoder_loss=0.2983, over 19903.00 frames. ], tot_loss[loss=0.2649, ctc_loss=0.1703, cr_loss=0.4058, attn_decoder_loss=0.2664, over 5796218.60 frames. ], batch size: 209, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 07:58:47,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=179100.0, ans=0.125 +2024-09-17 07:59:02,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=179140.0, ans=0.125 +2024-09-17 07:59:28,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=179180.0, ans=0.0 +2024-09-17 08:00:01,775 INFO [train.py:1198] (1/2) Epoch 10, batch 4100, loss[loss=0.288, ctc_loss=0.1859, cr_loss=0.4276, attn_decoder_loss=0.2898, over 29537.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1706, cr_loss=0.4063, attn_decoder_loss=0.2665, over 5791019.28 frames. ], batch size: 90, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:00:20,761 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.192e+01 9.136e+01 9.895e+01 1.094e+02 2.839e+02, threshold=1.979e+02, percent-clipped=1.0 +2024-09-17 08:00:25,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=179340.0, ans=0.1 +2024-09-17 08:00:31,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=179380.0, ans=0.125 +2024-09-17 08:00:33,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=179380.0, ans=0.0 +2024-09-17 08:01:15,651 INFO [train.py:1198] (1/2) Epoch 10, batch 4150, loss[loss=0.2608, ctc_loss=0.173, cr_loss=0.4203, attn_decoder_loss=0.2612, over 29502.00 frames. ], tot_loss[loss=0.265, ctc_loss=0.1706, cr_loss=0.4072, attn_decoder_loss=0.2664, over 5796588.61 frames. ], batch size: 77, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:01:21,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=179500.0, ans=0.07 +2024-09-17 08:01:31,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=179540.0, ans=0.125 +2024-09-17 08:01:33,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=179540.0, ans=0.125 +2024-09-17 08:01:42,107 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:01:47,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=179580.0, ans=0.125 +2024-09-17 08:02:05,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=179620.0, ans=0.1 +2024-09-17 08:02:30,746 INFO [train.py:1198] (1/2) Epoch 10, batch 4200, loss[loss=0.2852, ctc_loss=0.1895, cr_loss=0.4308, attn_decoder_loss=0.2862, over 29501.00 frames. ], tot_loss[loss=0.2653, ctc_loss=0.1712, cr_loss=0.4078, attn_decoder_loss=0.2667, over 5798667.63 frames. ], batch size: 90, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:02:31,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=179700.0, ans=0.0 +2024-09-17 08:02:34,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=179700.0, ans=0.125 +2024-09-17 08:02:48,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.27 vs. limit=22.5 +2024-09-17 08:02:50,146 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.369e+01 9.480e+01 1.011e+02 1.105e+02 3.367e+02, threshold=2.021e+02, percent-clipped=4.0 +2024-09-17 08:02:54,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=179740.0, ans=0.05 +2024-09-17 08:03:35,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=179860.0, ans=0.1 +2024-09-17 08:03:37,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=179860.0, ans=0.0 +2024-09-17 08:03:44,285 INFO [train.py:1198] (1/2) Epoch 10, batch 4250, loss[loss=0.2449, ctc_loss=0.1505, cr_loss=0.3709, attn_decoder_loss=0.2471, over 29499.00 frames. ], tot_loss[loss=0.2653, ctc_loss=0.1707, cr_loss=0.4073, attn_decoder_loss=0.2668, over 5804903.83 frames. ], batch size: 74, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:03:53,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=15.0 +2024-09-17 08:04:14,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.54 vs. limit=22.5 +2024-09-17 08:04:34,610 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:04:34,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=180020.0, ans=0.0 +2024-09-17 08:04:34,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=180020.0, ans=0.0 +2024-09-17 08:04:36,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=180020.0, ans=0.025 +2024-09-17 08:04:39,655 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.65 vs. limit=15.0 +2024-09-17 08:04:43,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=180060.0, ans=0.125 +2024-09-17 08:04:43,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=180060.0, ans=0.0 +2024-09-17 08:04:57,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=180060.0, ans=0.125 +2024-09-17 08:05:00,431 INFO [train.py:1198] (1/2) Epoch 10, batch 4300, loss[loss=0.2835, ctc_loss=0.201, cr_loss=0.4693, attn_decoder_loss=0.2822, over 29520.00 frames. ], tot_loss[loss=0.2654, ctc_loss=0.171, cr_loss=0.4075, attn_decoder_loss=0.2669, over 5795377.84 frames. ], batch size: 87, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:05:19,787 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.912e+01 9.707e+01 1.038e+02 1.136e+02 2.980e+02, threshold=2.076e+02, percent-clipped=1.0 +2024-09-17 08:05:20,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=180140.0, ans=0.1 +2024-09-17 08:05:34,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=180180.0, ans=0.1 +2024-09-17 08:06:14,234 INFO [train.py:1198] (1/2) Epoch 10, batch 4350, loss[loss=0.2905, ctc_loss=0.1923, cr_loss=0.4586, attn_decoder_loss=0.2913, over 29495.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.174, cr_loss=0.4131, attn_decoder_loss=0.2704, over 5797375.18 frames. ], batch size: 97, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:06:17,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=180300.0, ans=0.125 +2024-09-17 08:06:18,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=180300.0, ans=0.0 +2024-09-17 08:07:04,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=180420.0, ans=0.0 +2024-09-17 08:07:12,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.18 vs. limit=15.0 +2024-09-17 08:07:19,909 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.72 vs. limit=15.0 +2024-09-17 08:07:25,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=180460.0, ans=0.0 +2024-09-17 08:07:27,839 INFO [train.py:1198] (1/2) Epoch 10, batch 4400, loss[loss=0.2856, ctc_loss=0.1895, cr_loss=0.4459, attn_decoder_loss=0.2864, over 27368.00 frames. ], tot_loss[loss=0.2713, ctc_loss=0.1757, cr_loss=0.4154, attn_decoder_loss=0.2726, over 5766930.51 frames. ], batch size: 124, lr: 1.10e-02, grad_scale: 16.0 +2024-09-17 08:07:33,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=180500.0, ans=10.0 +2024-09-17 08:07:47,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=180540.0, ans=0.1 +2024-09-17 08:07:48,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-17 08:07:48,753 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.847e+01 9.762e+01 1.026e+02 1.096e+02 2.982e+02, threshold=2.053e+02, percent-clipped=1.0 +2024-09-17 08:07:59,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=180580.0, ans=0.2 +2024-09-17 08:08:08,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.78 vs. limit=22.5 +2024-09-17 08:08:13,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.44 vs. limit=22.5 +2024-09-17 08:08:20,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=180620.0, ans=0.2 +2024-09-17 08:08:23,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.01 vs. limit=15.0 +2024-09-17 08:08:37,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=180660.0, ans=0.125 +2024-09-17 08:08:41,979 INFO [train.py:1198] (1/2) Epoch 10, batch 4450, loss[loss=0.3014, ctc_loss=0.2331, cr_loss=0.4711, attn_decoder_loss=0.2985, over 20832.00 frames. ], tot_loss[loss=0.2746, ctc_loss=0.1812, cr_loss=0.4194, attn_decoder_loss=0.2756, over 5573756.38 frames. ], batch size: 212, lr: 1.10e-02, grad_scale: 4.0 +2024-09-17 08:08:49,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=180700.0, ans=0.0 +2024-09-17 08:08:52,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=180700.0, ans=0.0 +2024-09-17 08:08:59,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=180740.0, ans=0.0 +2024-09-17 08:09:05,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=180740.0, ans=0.0 +2024-09-17 08:09:10,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=180740.0, ans=0.0 +2024-09-17 08:09:16,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=180780.0, ans=0.0 +2024-09-17 08:09:52,003 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-17 08:09:58,569 INFO [train.py:1198] (1/2) Epoch 10, batch 4500, loss[loss=0.3014, ctc_loss=0.2292, cr_loss=0.4513, attn_decoder_loss=0.2994, over 20157.00 frames. ], tot_loss[loss=0.2785, ctc_loss=0.1886, cr_loss=0.4221, attn_decoder_loss=0.2791, over 5233978.80 frames. ], batch size: 209, lr: 1.10e-02, grad_scale: 8.0 +2024-09-17 08:10:06,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=180900.0, ans=0.125 +2024-09-17 08:10:21,314 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.766e+01 1.077e+02 1.142e+02 1.231e+02 1.732e+02, threshold=2.283e+02, percent-clipped=0.0 +2024-09-17 08:10:27,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=180980.0, ans=0.125 +2024-09-17 08:11:32,458 INFO [train.py:1198] (1/2) Epoch 11, batch 0, loss[loss=0.2518, ctc_loss=0.1452, cr_loss=0.3697, attn_decoder_loss=0.2554, over 29602.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1452, cr_loss=0.3697, attn_decoder_loss=0.2554, over 29602.00 frames. ], batch size: 73, lr: 1.05e-02, grad_scale: 16.0 +2024-09-17 08:11:32,458 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 08:11:47,881 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.8253, 3.5256, 3.8017, 3.8032], device='cuda:1') +2024-09-17 08:11:50,860 INFO [train.py:1230] (1/2) Epoch 11, validation: loss=0.2172, ctc_loss=0.0495, cr_loss=4.7e-15, attn_decoder_loss=0.2358, over 944034.00 frames. +2024-09-17 08:11:50,861 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 08:12:28,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=181080.0, ans=0.125 +2024-09-17 08:12:47,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=181120.0, ans=0.2 +2024-09-17 08:12:47,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=181120.0, ans=0.2 +2024-09-17 08:12:52,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=181120.0, ans=0.125 +2024-09-17 08:13:10,335 INFO [train.py:1198] (1/2) Epoch 11, batch 50, loss[loss=0.243, ctc_loss=0.1542, cr_loss=0.3818, attn_decoder_loss=0.2444, over 29436.00 frames. ], tot_loss[loss=0.269, ctc_loss=0.1744, cr_loss=0.4118, attn_decoder_loss=0.2704, over 1268509.70 frames. ], batch size: 70, lr: 1.05e-02, grad_scale: 8.0 +2024-09-17 08:13:34,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=181240.0, ans=0.125 +2024-09-17 08:14:03,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=181320.0, ans=0.2 +2024-09-17 08:14:13,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.260e+01 9.758e+01 1.123e+02 1.302e+02 1.602e+03, threshold=2.247e+02, percent-clipped=5.0 +2024-09-17 08:14:19,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.53 vs. limit=15.0 +2024-09-17 08:14:25,851 INFO [train.py:1198] (1/2) Epoch 11, batch 100, loss[loss=0.2565, ctc_loss=0.1687, cr_loss=0.4076, attn_decoder_loss=0.2571, over 29519.00 frames. ], tot_loss[loss=0.2699, ctc_loss=0.1743, cr_loss=0.4128, attn_decoder_loss=0.2714, over 2252184.27 frames. ], batch size: 76, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:14:45,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=181440.0, ans=0.125 +2024-09-17 08:14:49,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=181440.0, ans=0.0 +2024-09-17 08:14:53,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=181440.0, ans=0.125 +2024-09-17 08:14:56,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=181480.0, ans=0.025 +2024-09-17 08:15:04,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.38 vs. limit=15.0 +2024-09-17 08:15:06,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=181480.0, ans=0.125 +2024-09-17 08:15:23,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-17 08:15:41,048 INFO [train.py:1198] (1/2) Epoch 11, batch 150, loss[loss=0.2363, ctc_loss=0.1475, cr_loss=0.3825, attn_decoder_loss=0.2377, over 29450.00 frames. ], tot_loss[loss=0.267, ctc_loss=0.1715, cr_loss=0.4092, attn_decoder_loss=0.2685, over 3047034.20 frames. ], batch size: 70, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:16:03,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=181640.0, ans=0.125 +2024-09-17 08:16:04,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.32 vs. limit=22.5 +2024-09-17 08:16:22,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=181680.0, ans=0.025 +2024-09-17 08:16:27,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=181680.0, ans=0.125 +2024-09-17 08:16:30,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=181720.0, ans=0.1 +2024-09-17 08:16:34,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=181720.0, ans=0.125 +2024-09-17 08:16:49,224 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.120e+01 9.120e+01 9.727e+01 1.024e+02 1.360e+02, threshold=1.945e+02, percent-clipped=0.0 +2024-09-17 08:16:52,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=181760.0, ans=0.0 +2024-09-17 08:16:52,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=181760.0, ans=0.125 +2024-09-17 08:17:01,150 INFO [train.py:1198] (1/2) Epoch 11, batch 200, loss[loss=0.2926, ctc_loss=0.1994, cr_loss=0.4648, attn_decoder_loss=0.2926, over 27139.00 frames. ], tot_loss[loss=0.2658, ctc_loss=0.1708, cr_loss=0.4089, attn_decoder_loss=0.2673, over 3657926.46 frames. ], batch size: 124, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:17:14,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=181840.0, ans=0.1 +2024-09-17 08:17:27,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.67 vs. limit=15.0 +2024-09-17 08:17:57,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=181920.0, ans=0.0 +2024-09-17 08:18:04,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=181960.0, ans=0.125 +2024-09-17 08:18:05,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.48 vs. limit=15.0 +2024-09-17 08:18:10,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=181960.0, ans=0.0 +2024-09-17 08:18:14,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.73 vs. limit=15.0 +2024-09-17 08:18:16,502 INFO [train.py:1198] (1/2) Epoch 11, batch 250, loss[loss=0.2837, ctc_loss=0.1916, cr_loss=0.4475, attn_decoder_loss=0.2839, over 29184.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1697, cr_loss=0.4071, attn_decoder_loss=0.2667, over 4140286.32 frames. ], batch size: 100, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:18:21,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=182000.0, ans=0.2 +2024-09-17 08:18:27,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=182000.0, ans=0.125 +2024-09-17 08:18:36,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=182040.0, ans=0.1 +2024-09-17 08:18:41,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=182040.0, ans=0.0 +2024-09-17 08:18:44,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=182040.0, ans=0.0 +2024-09-17 08:18:50,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=182080.0, ans=0.125 +2024-09-17 08:19:11,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=182120.0, ans=0.125 +2024-09-17 08:19:20,162 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.171e+01 1.004e+02 1.090e+02 1.755e+02, threshold=2.009e+02, percent-clipped=0.0 +2024-09-17 08:19:25,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.49 vs. limit=12.0 +2024-09-17 08:19:32,194 INFO [train.py:1198] (1/2) Epoch 11, batch 300, loss[loss=0.2833, ctc_loss=0.1806, cr_loss=0.4213, attn_decoder_loss=0.2854, over 29544.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1687, cr_loss=0.4059, attn_decoder_loss=0.266, over 4509720.39 frames. ], batch size: 92, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:19:45,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=182200.0, ans=0.2 +2024-09-17 08:20:32,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.55 vs. limit=22.5 +2024-09-17 08:20:38,289 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.68 vs. limit=22.5 +2024-09-17 08:20:52,576 INFO [train.py:1198] (1/2) Epoch 11, batch 350, loss[loss=0.2409, ctc_loss=0.1426, cr_loss=0.3626, attn_decoder_loss=0.2437, over 29348.00 frames. ], tot_loss[loss=0.265, ctc_loss=0.1693, cr_loss=0.4069, attn_decoder_loss=0.2666, over 4795498.04 frames. ], batch size: 71, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:21:03,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=182400.0, ans=0.0 +2024-09-17 08:21:08,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=182440.0, ans=0.125 +2024-09-17 08:21:12,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=182440.0, ans=0.125 +2024-09-17 08:21:20,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=182440.0, ans=0.0 +2024-09-17 08:21:21,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=182480.0, ans=0.125 +2024-09-17 08:21:29,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=182480.0, ans=0.5 +2024-09-17 08:21:38,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=182520.0, ans=0.125 +2024-09-17 08:21:41,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=182520.0, ans=0.125 +2024-09-17 08:21:55,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 9.345e+01 9.958e+01 1.088e+02 1.726e+02, threshold=1.992e+02, percent-clipped=0.0 +2024-09-17 08:21:57,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=182560.0, ans=0.2 +2024-09-17 08:21:58,014 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.10 vs. limit=12.0 +2024-09-17 08:22:07,838 INFO [train.py:1198] (1/2) Epoch 11, batch 400, loss[loss=0.2744, ctc_loss=0.1739, cr_loss=0.4184, attn_decoder_loss=0.2762, over 29706.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1689, cr_loss=0.4063, attn_decoder_loss=0.2662, over 5026337.10 frames. ], batch size: 82, lr: 1.04e-02, grad_scale: 16.0 +2024-09-17 08:22:14,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=182600.0, ans=0.025 +2024-09-17 08:22:27,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=182640.0, ans=0.0 +2024-09-17 08:22:45,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=182680.0, ans=0.125 +2024-09-17 08:23:03,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=182720.0, ans=0.025 +2024-09-17 08:23:05,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=182720.0, ans=0.1 +2024-09-17 08:23:21,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-17 08:23:23,311 INFO [train.py:1198] (1/2) Epoch 11, batch 450, loss[loss=0.277, ctc_loss=0.1679, cr_loss=0.4015, attn_decoder_loss=0.2801, over 29687.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1691, cr_loss=0.4067, attn_decoder_loss=0.2663, over 5187334.12 frames. ], batch size: 83, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:23:28,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=182800.0, ans=0.125 +2024-09-17 08:23:40,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.04 vs. limit=10.0 +2024-09-17 08:24:00,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=182880.0, ans=0.0 +2024-09-17 08:24:06,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=182880.0, ans=0.1 +2024-09-17 08:24:11,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=182920.0, ans=0.125 +2024-09-17 08:24:27,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.19 vs. limit=10.0 +2024-09-17 08:24:29,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.30 vs. limit=12.0 +2024-09-17 08:24:32,891 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.364e+01 9.150e+01 9.879e+01 1.056e+02 3.994e+02, threshold=1.976e+02, percent-clipped=1.0 +2024-09-17 08:24:36,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=182960.0, ans=0.1 +2024-09-17 08:24:43,366 INFO [train.py:1198] (1/2) Epoch 11, batch 500, loss[loss=0.282, ctc_loss=0.1858, cr_loss=0.4298, attn_decoder_loss=0.2832, over 29488.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1679, cr_loss=0.4054, attn_decoder_loss=0.2652, over 5330994.26 frames. ], batch size: 94, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:25:01,300 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.06 vs. limit=6.0 +2024-09-17 08:25:24,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=183080.0, ans=0.125 +2024-09-17 08:25:43,559 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.58 vs. limit=15.0 +2024-09-17 08:25:57,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=183200.0, ans=0.05 +2024-09-17 08:25:59,363 INFO [train.py:1198] (1/2) Epoch 11, batch 550, loss[loss=0.2772, ctc_loss=0.1832, cr_loss=0.449, attn_decoder_loss=0.2777, over 28810.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1683, cr_loss=0.4054, attn_decoder_loss=0.2653, over 5423991.37 frames. ], batch size: 104, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:26:02,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=183200.0, ans=0.2 +2024-09-17 08:26:22,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=183240.0, ans=0.0 +2024-09-17 08:26:34,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=183280.0, ans=0.2 +2024-09-17 08:26:39,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=183280.0, ans=0.025 +2024-09-17 08:26:43,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=183320.0, ans=0.125 +2024-09-17 08:26:48,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=183320.0, ans=0.125 +2024-09-17 08:27:04,997 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 9.204e+01 9.712e+01 1.043e+02 1.936e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 08:27:14,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=183400.0, ans=0.125 +2024-09-17 08:27:15,708 INFO [train.py:1198] (1/2) Epoch 11, batch 600, loss[loss=0.2828, ctc_loss=0.1801, cr_loss=0.4211, attn_decoder_loss=0.2848, over 29303.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1681, cr_loss=0.4053, attn_decoder_loss=0.2655, over 5511059.45 frames. ], batch size: 100, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:27:16,675 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.99 vs. limit=15.0 +2024-09-17 08:27:17,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=183400.0, ans=0.125 +2024-09-17 08:27:31,850 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:27:32,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.89 vs. limit=15.0 +2024-09-17 08:27:52,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=183480.0, ans=0.125 +2024-09-17 08:27:55,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=183480.0, ans=0.2 +2024-09-17 08:28:02,416 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.45 vs. limit=22.5 +2024-09-17 08:28:21,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=183560.0, ans=0.2 +2024-09-17 08:28:35,709 INFO [train.py:1198] (1/2) Epoch 11, batch 650, loss[loss=0.2645, ctc_loss=0.1683, cr_loss=0.3979, attn_decoder_loss=0.2663, over 29779.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1674, cr_loss=0.4045, attn_decoder_loss=0.2652, over 5587763.67 frames. ], batch size: 81, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:28:58,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=183640.0, ans=0.0 +2024-09-17 08:29:15,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=183680.0, ans=0.0 +2024-09-17 08:29:23,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=183720.0, ans=0.125 +2024-09-17 08:29:32,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=183720.0, ans=0.125 +2024-09-17 08:29:36,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=183760.0, ans=0.95 +2024-09-17 08:29:42,306 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 9.127e+01 9.643e+01 1.047e+02 1.455e+02, threshold=1.929e+02, percent-clipped=0.0 +2024-09-17 08:29:51,516 INFO [train.py:1198] (1/2) Epoch 11, batch 700, loss[loss=0.2428, ctc_loss=0.1393, cr_loss=0.3888, attn_decoder_loss=0.2456, over 29529.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1678, cr_loss=0.4054, attn_decoder_loss=0.2658, over 5638983.08 frames. ], batch size: 76, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:29:51,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=183800.0, ans=0.2 +2024-09-17 08:30:05,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=183840.0, ans=0.125 +2024-09-17 08:30:08,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=183840.0, ans=0.125 +2024-09-17 08:30:22,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.03 vs. limit=22.5 +2024-09-17 08:30:45,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=183920.0, ans=0.2 +2024-09-17 08:31:08,136 INFO [train.py:1198] (1/2) Epoch 11, batch 750, loss[loss=0.2605, ctc_loss=0.1589, cr_loss=0.3909, attn_decoder_loss=0.2631, over 29726.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.168, cr_loss=0.4051, attn_decoder_loss=0.2656, over 5678097.60 frames. ], batch size: 82, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:31:13,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.27 vs. limit=22.5 +2024-09-17 08:31:15,903 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:31:18,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=184000.0, ans=0.125 +2024-09-17 08:31:18,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=184000.0, ans=0.09899494936611666 +2024-09-17 08:31:20,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=184000.0, ans=0.0 +2024-09-17 08:31:24,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=184040.0, ans=0.2 +2024-09-17 08:31:50,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.81 vs. limit=22.5 +2024-09-17 08:31:55,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=184120.0, ans=0.0 +2024-09-17 08:31:56,381 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=9.58 vs. limit=12.0 +2024-09-17 08:32:16,691 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.265e+01 9.471e+01 1.047e+02 1.151e+02 2.834e+02, threshold=2.094e+02, percent-clipped=4.0 +2024-09-17 08:32:28,008 INFO [train.py:1198] (1/2) Epoch 11, batch 800, loss[loss=0.2284, ctc_loss=0.1386, cr_loss=0.368, attn_decoder_loss=0.2302, over 29609.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.168, cr_loss=0.4049, attn_decoder_loss=0.2653, over 5707625.00 frames. ], batch size: 73, lr: 1.04e-02, grad_scale: 16.0 +2024-09-17 08:32:40,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=184200.0, ans=0.0 +2024-09-17 08:32:43,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=184240.0, ans=0.07 +2024-09-17 08:32:46,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=184240.0, ans=0.125 +2024-09-17 08:33:09,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.86 vs. limit=22.5 +2024-09-17 08:33:22,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=184320.0, ans=0.025 +2024-09-17 08:33:23,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=184320.0, ans=0.0 +2024-09-17 08:33:38,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.32 vs. limit=15.0 +2024-09-17 08:33:39,336 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.09 vs. limit=15.0 +2024-09-17 08:33:42,951 INFO [train.py:1198] (1/2) Epoch 11, batch 850, loss[loss=0.2721, ctc_loss=0.1698, cr_loss=0.3964, attn_decoder_loss=0.2747, over 29695.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1676, cr_loss=0.4046, attn_decoder_loss=0.2649, over 5736129.36 frames. ], batch size: 89, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:34:01,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=184440.0, ans=0.125 +2024-09-17 08:34:13,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=184480.0, ans=0.125 +2024-09-17 08:34:40,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=184520.0, ans=0.125 +2024-09-17 08:34:45,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=184560.0, ans=0.125 +2024-09-17 08:34:45,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=184560.0, ans=0.1 +2024-09-17 08:34:50,947 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.310e+01 9.399e+01 9.999e+01 1.067e+02 1.963e+02, threshold=2.000e+02, percent-clipped=0.0 +2024-09-17 08:34:58,534 INFO [train.py:1198] (1/2) Epoch 11, batch 900, loss[loss=0.2379, ctc_loss=0.1428, cr_loss=0.3513, attn_decoder_loss=0.2407, over 29598.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1678, cr_loss=0.4048, attn_decoder_loss=0.2653, over 5740857.17 frames. ], batch size: 73, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:35:02,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=184600.0, ans=0.1 +2024-09-17 08:35:07,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=184600.0, ans=0.125 +2024-09-17 08:35:12,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=184640.0, ans=0.1 +2024-09-17 08:35:52,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=184720.0, ans=0.1 +2024-09-17 08:35:52,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=184720.0, ans=0.125 +2024-09-17 08:35:55,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=184720.0, ans=0.1 +2024-09-17 08:35:58,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=184720.0, ans=0.2 +2024-09-17 08:36:04,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=184760.0, ans=0.125 +2024-09-17 08:36:16,597 INFO [train.py:1198] (1/2) Epoch 11, batch 950, loss[loss=0.2477, ctc_loss=0.155, cr_loss=0.3791, attn_decoder_loss=0.2496, over 29524.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1687, cr_loss=0.4063, attn_decoder_loss=0.2661, over 5741322.70 frames. ], batch size: 74, lr: 1.04e-02, grad_scale: 8.0 +2024-09-17 08:37:05,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=184920.0, ans=0.025 +2024-09-17 08:37:18,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=184960.0, ans=0.0 +2024-09-17 08:37:26,876 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.438e+01 9.626e+01 1.055e+02 1.179e+02 5.157e+02, threshold=2.111e+02, percent-clipped=4.0 +2024-09-17 08:37:34,429 INFO [train.py:1198] (1/2) Epoch 11, batch 1000, loss[loss=0.2573, ctc_loss=0.1687, cr_loss=0.4266, attn_decoder_loss=0.2576, over 29507.00 frames. ], tot_loss[loss=0.2651, ctc_loss=0.1694, cr_loss=0.4073, attn_decoder_loss=0.2667, over 5734439.55 frames. ], batch size: 77, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:37:34,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_ff3.min_abs, batch_count=185000.0, ans=0.2 +2024-09-17 08:38:03,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=185080.0, ans=0.125 +2024-09-17 08:38:05,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=185080.0, ans=0.125 +2024-09-17 08:38:06,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=185080.0, ans=0.025 +2024-09-17 08:38:12,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=185080.0, ans=0.1 +2024-09-17 08:38:18,899 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:38:45,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=185160.0, ans=0.2 +2024-09-17 08:38:50,011 INFO [train.py:1198] (1/2) Epoch 11, batch 1050, loss[loss=0.276, ctc_loss=0.1705, cr_loss=0.421, attn_decoder_loss=0.2784, over 29682.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1689, cr_loss=0.4067, attn_decoder_loss=0.266, over 5744093.36 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:39:27,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=185280.0, ans=0.125 +2024-09-17 08:39:40,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=185320.0, ans=0.2 +2024-09-17 08:39:46,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=185320.0, ans=0.1 +2024-09-17 08:39:58,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=185360.0, ans=0.2 +2024-09-17 08:39:58,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=185360.0, ans=0.1 +2024-09-17 08:40:01,447 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 9.188e+01 9.776e+01 1.031e+02 2.876e+02, threshold=1.955e+02, percent-clipped=0.0 +2024-09-17 08:40:06,444 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 08:40:07,590 INFO [train.py:1198] (1/2) Epoch 11, batch 1100, loss[loss=0.2427, ctc_loss=0.1538, cr_loss=0.3891, attn_decoder_loss=0.2439, over 29453.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1681, cr_loss=0.4056, attn_decoder_loss=0.2652, over 5756130.37 frames. ], batch size: 78, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:40:09,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=185400.0, ans=0.125 +2024-09-17 08:40:14,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.98 vs. limit=15.0 +2024-09-17 08:40:26,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=185440.0, ans=0.1 +2024-09-17 08:40:28,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.40 vs. limit=15.0 +2024-09-17 08:40:34,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=185440.0, ans=0.125 +2024-09-17 08:40:38,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=185480.0, ans=0.0 +2024-09-17 08:40:40,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=185480.0, ans=0.2 +2024-09-17 08:40:46,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.29 vs. limit=15.0 +2024-09-17 08:41:09,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=185560.0, ans=0.0 +2024-09-17 08:41:25,822 INFO [train.py:1198] (1/2) Epoch 11, batch 1150, loss[loss=0.253, ctc_loss=0.1599, cr_loss=0.3815, attn_decoder_loss=0.2549, over 29420.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1682, cr_loss=0.4053, attn_decoder_loss=0.2653, over 5753344.86 frames. ], batch size: 78, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:42:00,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.14 vs. limit=10.0 +2024-09-17 08:42:12,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=185720.0, ans=0.1 +2024-09-17 08:42:21,806 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.25 vs. limit=12.0 +2024-09-17 08:42:30,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=185760.0, ans=0.0 +2024-09-17 08:42:37,685 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.117e+01 9.543e+01 1.008e+02 1.096e+02 1.940e+02, threshold=2.016e+02, percent-clipped=1.0 +2024-09-17 08:42:40,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.61 vs. limit=22.5 +2024-09-17 08:42:42,178 INFO [train.py:1198] (1/2) Epoch 11, batch 1200, loss[loss=0.2759, ctc_loss=0.1699, cr_loss=0.4325, attn_decoder_loss=0.278, over 29688.00 frames. ], tot_loss[loss=0.2646, ctc_loss=0.1688, cr_loss=0.406, attn_decoder_loss=0.2662, over 5745455.05 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:42:44,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.74 vs. limit=6.0 +2024-09-17 08:43:09,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=185840.0, ans=0.0 +2024-09-17 08:43:18,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.17 vs. limit=15.0 +2024-09-17 08:43:21,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=185880.0, ans=0.125 +2024-09-17 08:43:22,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=185880.0, ans=0.125 +2024-09-17 08:44:00,293 INFO [train.py:1198] (1/2) Epoch 11, batch 1250, loss[loss=0.2915, ctc_loss=0.1988, cr_loss=0.4599, attn_decoder_loss=0.2916, over 29512.00 frames. ], tot_loss[loss=0.265, ctc_loss=0.169, cr_loss=0.4071, attn_decoder_loss=0.2667, over 5774264.30 frames. ], batch size: 92, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:44:03,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=186000.0, ans=0.125 +2024-09-17 08:44:18,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=186040.0, ans=0.2 +2024-09-17 08:45:04,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=186160.0, ans=0.0 +2024-09-17 08:45:06,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=186160.0, ans=0.125 +2024-09-17 08:45:10,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=186160.0, ans=0.07 +2024-09-17 08:45:13,658 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 9.338e+01 9.972e+01 1.044e+02 2.073e+02, threshold=1.994e+02, percent-clipped=1.0 +2024-09-17 08:45:18,173 INFO [train.py:1198] (1/2) Epoch 11, batch 1300, loss[loss=0.2853, ctc_loss=0.1926, cr_loss=0.4464, attn_decoder_loss=0.2857, over 28272.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1683, cr_loss=0.4056, attn_decoder_loss=0.2657, over 5778267.68 frames. ], batch size: 111, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:45:18,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=186200.0, ans=0.0 +2024-09-17 08:45:20,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.61 vs. limit=15.0 +2024-09-17 08:45:21,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=186200.0, ans=0.125 +2024-09-17 08:45:22,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.58 vs. limit=22.5 +2024-09-17 08:45:40,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.92 vs. limit=15.0 +2024-09-17 08:45:56,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=186280.0, ans=0.125 +2024-09-17 08:46:20,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=186360.0, ans=0.025 +2024-09-17 08:46:21,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=186360.0, ans=0.025 +2024-09-17 08:46:22,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.72 vs. limit=15.0 +2024-09-17 08:46:23,686 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.12 vs. limit=15.0 +2024-09-17 08:46:34,058 INFO [train.py:1198] (1/2) Epoch 11, batch 1350, loss[loss=0.2689, ctc_loss=0.1756, cr_loss=0.4326, attn_decoder_loss=0.2696, over 29780.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1681, cr_loss=0.4061, attn_decoder_loss=0.2657, over 5793990.71 frames. ], batch size: 81, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:46:50,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=186440.0, ans=0.1 +2024-09-17 08:46:55,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=186440.0, ans=0.125 +2024-09-17 08:47:06,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=186480.0, ans=0.125 +2024-09-17 08:47:09,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=186480.0, ans=0.125 +2024-09-17 08:47:30,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=186520.0, ans=0.0 +2024-09-17 08:47:30,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=186520.0, ans=0.2 +2024-09-17 08:47:32,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=186520.0, ans=10.0 +2024-09-17 08:47:46,681 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.867e+01 9.722e+01 1.047e+02 1.106e+02 2.453e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 08:47:51,379 INFO [train.py:1198] (1/2) Epoch 11, batch 1400, loss[loss=0.2278, ctc_loss=0.1386, cr_loss=0.3691, attn_decoder_loss=0.2295, over 29586.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1683, cr_loss=0.4067, attn_decoder_loss=0.2657, over 5805801.75 frames. ], batch size: 69, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:48:12,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=186640.0, ans=0.1 +2024-09-17 08:48:17,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=186640.0, ans=0.125 +2024-09-17 08:48:24,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=186680.0, ans=0.09899494936611666 +2024-09-17 08:48:28,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=186680.0, ans=0.09899494936611666 +2024-09-17 08:48:30,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=186680.0, ans=0.0 +2024-09-17 08:48:30,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=186680.0, ans=0.125 +2024-09-17 08:48:43,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=186720.0, ans=0.0 +2024-09-17 08:48:46,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=186720.0, ans=0.125 +2024-09-17 08:48:51,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=186720.0, ans=0.0 +2024-09-17 08:48:54,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=186760.0, ans=0.125 +2024-09-17 08:48:55,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=186760.0, ans=0.125 +2024-09-17 08:48:57,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=186760.0, ans=0.1 +2024-09-17 08:48:57,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=186760.0, ans=22.5 +2024-09-17 08:49:09,111 INFO [train.py:1198] (1/2) Epoch 11, batch 1450, loss[loss=0.2882, ctc_loss=0.1885, cr_loss=0.431, attn_decoder_loss=0.2897, over 29401.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.168, cr_loss=0.406, attn_decoder_loss=0.2658, over 5802898.27 frames. ], batch size: 94, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:49:54,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=186920.0, ans=0.0 +2024-09-17 08:50:00,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=186920.0, ans=0.2 +2024-09-17 08:50:18,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=186960.0, ans=0.125 +2024-09-17 08:50:21,154 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 9.456e+01 1.033e+02 1.133e+02 2.904e+02, threshold=2.066e+02, percent-clipped=2.0 +2024-09-17 08:50:24,332 INFO [train.py:1198] (1/2) Epoch 11, batch 1500, loss[loss=0.2727, ctc_loss=0.1624, cr_loss=0.4107, attn_decoder_loss=0.2759, over 29623.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1682, cr_loss=0.4067, attn_decoder_loss=0.2662, over 5803430.61 frames. ], batch size: 86, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:50:24,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=187000.0, ans=0.0 +2024-09-17 08:51:37,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=187160.0, ans=0.1 +2024-09-17 08:51:43,025 INFO [train.py:1198] (1/2) Epoch 11, batch 1550, loss[loss=0.2855, ctc_loss=0.182, cr_loss=0.4345, attn_decoder_loss=0.2873, over 29511.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1687, cr_loss=0.4067, attn_decoder_loss=0.2664, over 5778779.43 frames. ], batch size: 90, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:51:43,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten.whitening_limit, batch_count=187200.0, ans=22.5 +2024-09-17 08:52:10,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=187240.0, ans=0.025 +2024-09-17 08:52:40,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=187320.0, ans=0.125 +2024-09-17 08:52:43,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=187320.0, ans=0.125 +2024-09-17 08:52:52,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=187360.0, ans=0.125 +2024-09-17 08:52:56,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.25 vs. limit=12.0 +2024-09-17 08:52:57,788 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.021e+01 9.413e+01 1.008e+02 1.137e+02 5.479e+02, threshold=2.016e+02, percent-clipped=2.0 +2024-09-17 08:53:00,774 INFO [train.py:1198] (1/2) Epoch 11, batch 1600, loss[loss=0.2695, ctc_loss=0.1608, cr_loss=0.3825, attn_decoder_loss=0.2731, over 29670.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1684, cr_loss=0.4062, attn_decoder_loss=0.2662, over 5761721.74 frames. ], batch size: 85, lr: 1.03e-02, grad_scale: 16.0 +2024-09-17 08:53:05,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=187400.0, ans=0.0 +2024-09-17 08:53:19,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=187440.0, ans=0.1 +2024-09-17 08:53:23,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=187440.0, ans=0.125 +2024-09-17 08:53:28,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.71 vs. limit=15.0 +2024-09-17 08:53:33,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=187480.0, ans=0.125 +2024-09-17 08:53:40,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=187480.0, ans=0.1 +2024-09-17 08:53:43,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-17 08:53:50,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=187520.0, ans=0.0 +2024-09-17 08:53:53,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=187520.0, ans=0.2 +2024-09-17 08:53:57,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.49 vs. limit=15.0 +2024-09-17 08:54:16,158 INFO [train.py:1198] (1/2) Epoch 11, batch 1650, loss[loss=0.2823, ctc_loss=0.1864, cr_loss=0.4475, attn_decoder_loss=0.283, over 29695.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.1681, cr_loss=0.4055, attn_decoder_loss=0.2659, over 5757628.40 frames. ], batch size: 89, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:54:21,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=187600.0, ans=0.125 +2024-09-17 08:55:13,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=187720.0, ans=0.0 +2024-09-17 08:55:18,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.86 vs. limit=10.0 +2024-09-17 08:55:32,180 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 9.133e+01 1.002e+02 1.058e+02 1.581e+02, threshold=2.003e+02, percent-clipped=0.0 +2024-09-17 08:55:32,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=187800.0, ans=0.125 +2024-09-17 08:55:33,699 INFO [train.py:1198] (1/2) Epoch 11, batch 1700, loss[loss=0.2356, ctc_loss=0.1452, cr_loss=0.3682, attn_decoder_loss=0.2374, over 29557.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1676, cr_loss=0.4051, attn_decoder_loss=0.2656, over 5778690.06 frames. ], batch size: 69, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:56:04,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=187880.0, ans=0.125 +2024-09-17 08:56:17,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.96 vs. limit=15.0 +2024-09-17 08:56:32,602 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.41 vs. limit=10.0 +2024-09-17 08:56:47,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=187960.0, ans=0.125 +2024-09-17 08:56:51,928 INFO [train.py:1198] (1/2) Epoch 11, batch 1750, loss[loss=0.2277, ctc_loss=0.1382, cr_loss=0.3388, attn_decoder_loss=0.2301, over 29300.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1671, cr_loss=0.405, attn_decoder_loss=0.2652, over 5784870.29 frames. ], batch size: 67, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:57:05,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=188040.0, ans=0.1 +2024-09-17 08:57:22,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=188080.0, ans=0.07 +2024-09-17 08:57:42,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=188120.0, ans=0.0 +2024-09-17 08:57:45,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=188120.0, ans=0.125 +2024-09-17 08:57:48,749 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.85 vs. limit=6.0 +2024-09-17 08:58:04,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=188160.0, ans=0.0 +2024-09-17 08:58:05,829 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.041e+01 9.226e+01 9.775e+01 1.040e+02 1.595e+02, threshold=1.955e+02, percent-clipped=0.0 +2024-09-17 08:58:07,339 INFO [train.py:1198] (1/2) Epoch 11, batch 1800, loss[loss=0.2705, ctc_loss=0.1787, cr_loss=0.4382, attn_decoder_loss=0.271, over 29701.00 frames. ], tot_loss[loss=0.2634, ctc_loss=0.1671, cr_loss=0.4047, attn_decoder_loss=0.2651, over 5788729.85 frames. ], batch size: 83, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 08:58:31,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=188240.0, ans=0.05 +2024-09-17 08:59:03,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=188320.0, ans=0.125 +2024-09-17 08:59:24,938 INFO [train.py:1198] (1/2) Epoch 11, batch 1850, loss[loss=0.2703, ctc_loss=0.1766, cr_loss=0.4035, attn_decoder_loss=0.2717, over 29634.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1669, cr_loss=0.4049, attn_decoder_loss=0.2648, over 5794942.55 frames. ], batch size: 86, lr: 1.03e-02, grad_scale: 4.0 +2024-09-17 08:59:37,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=188400.0, ans=0.125 +2024-09-17 08:59:41,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=188440.0, ans=0.1 +2024-09-17 08:59:44,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=188440.0, ans=0.025 +2024-09-17 08:59:46,437 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:00:02,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=188480.0, ans=10.0 +2024-09-17 09:00:13,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=188520.0, ans=0.0 +2024-09-17 09:00:13,947 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:00:39,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=188560.0, ans=0.0 +2024-09-17 09:00:42,134 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 9.168e+01 9.699e+01 1.053e+02 1.276e+02, threshold=1.940e+02, percent-clipped=0.0 +2024-09-17 09:00:42,160 INFO [train.py:1198] (1/2) Epoch 11, batch 1900, loss[loss=0.2776, ctc_loss=0.1752, cr_loss=0.4355, attn_decoder_loss=0.2794, over 29707.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1677, cr_loss=0.4058, attn_decoder_loss=0.2657, over 5802827.99 frames. ], batch size: 89, lr: 1.03e-02, grad_scale: 8.0 +2024-09-17 09:00:48,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=188600.0, ans=0.1 +2024-09-17 09:00:48,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=188600.0, ans=0.2 +2024-09-17 09:00:58,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=188640.0, ans=0.0 +2024-09-17 09:01:01,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.98 vs. limit=15.0 +2024-09-17 09:01:21,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=188680.0, ans=0.05 +2024-09-17 09:01:36,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=188720.0, ans=0.0 +2024-09-17 09:01:38,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=188720.0, ans=0.125 +2024-09-17 09:01:54,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=188760.0, ans=0.2 +2024-09-17 09:01:57,954 INFO [train.py:1198] (1/2) Epoch 11, batch 1950, loss[loss=0.2602, ctc_loss=0.1594, cr_loss=0.4041, attn_decoder_loss=0.2624, over 29448.00 frames. ], tot_loss[loss=0.2652, ctc_loss=0.1686, cr_loss=0.4074, attn_decoder_loss=0.2669, over 5818130.21 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:02:31,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=188880.0, ans=0.1 +2024-09-17 09:02:49,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-17 09:02:52,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=188920.0, ans=0.0 +2024-09-17 09:03:03,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=188960.0, ans=0.0 +2024-09-17 09:03:03,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=188960.0, ans=0.0 +2024-09-17 09:03:15,374 INFO [train.py:1198] (1/2) Epoch 11, batch 2000, loss[loss=0.2397, ctc_loss=0.1495, cr_loss=0.3848, attn_decoder_loss=0.2411, over 29360.00 frames. ], tot_loss[loss=0.2658, ctc_loss=0.1694, cr_loss=0.4086, attn_decoder_loss=0.2674, over 5796056.47 frames. ], batch size: 67, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:03:16,926 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.350e+01 9.444e+01 9.987e+01 1.091e+02 4.605e+02, threshold=1.997e+02, percent-clipped=2.0 +2024-09-17 09:03:35,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=189040.0, ans=0.07 +2024-09-17 09:03:41,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=189040.0, ans=0.025 +2024-09-17 09:04:07,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=189120.0, ans=0.125 +2024-09-17 09:04:12,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.28 vs. limit=10.0 +2024-09-17 09:04:33,453 INFO [train.py:1198] (1/2) Epoch 11, batch 2050, loss[loss=0.2467, ctc_loss=0.1603, cr_loss=0.3989, attn_decoder_loss=0.2474, over 29441.00 frames. ], tot_loss[loss=0.2647, ctc_loss=0.1688, cr_loss=0.4074, attn_decoder_loss=0.2663, over 5788653.97 frames. ], batch size: 70, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:04:36,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=189200.0, ans=0.1 +2024-09-17 09:04:42,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=189200.0, ans=0.0 +2024-09-17 09:04:43,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.69 vs. limit=12.0 +2024-09-17 09:04:55,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.53 vs. limit=12.0 +2024-09-17 09:05:17,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=189320.0, ans=0.125 +2024-09-17 09:05:29,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=189320.0, ans=0.0 +2024-09-17 09:05:49,129 INFO [train.py:1198] (1/2) Epoch 11, batch 2100, loss[loss=0.2593, ctc_loss=0.163, cr_loss=0.3946, attn_decoder_loss=0.2612, over 29771.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.168, cr_loss=0.4062, attn_decoder_loss=0.2657, over 5800730.76 frames. ], batch size: 81, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:05:50,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.970e+01 9.676e+01 1.062e+02 4.848e+02, threshold=1.935e+02, percent-clipped=1.0 +2024-09-17 09:06:19,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=189480.0, ans=0.025 +2024-09-17 09:06:23,661 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.32 vs. limit=22.5 +2024-09-17 09:06:36,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=189520.0, ans=0.125 +2024-09-17 09:06:51,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=189560.0, ans=0.125 +2024-09-17 09:06:54,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=189560.0, ans=0.125 +2024-09-17 09:07:03,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=189560.0, ans=0.125 +2024-09-17 09:07:03,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=189560.0, ans=0.0 +2024-09-17 09:07:06,746 INFO [train.py:1198] (1/2) Epoch 11, batch 2150, loss[loss=0.2559, ctc_loss=0.1615, cr_loss=0.4065, attn_decoder_loss=0.2574, over 29436.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1666, cr_loss=0.4044, attn_decoder_loss=0.2648, over 5815307.44 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:07:36,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=189680.0, ans=0.1 +2024-09-17 09:07:48,280 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:07:49,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=189680.0, ans=0.125 +2024-09-17 09:07:54,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=189720.0, ans=0.09899494936611666 +2024-09-17 09:08:00,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=189720.0, ans=0.125 +2024-09-17 09:08:01,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=189720.0, ans=0.125 +2024-09-17 09:08:08,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=189760.0, ans=0.025 +2024-09-17 09:08:15,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=189760.0, ans=0.0 +2024-09-17 09:08:24,832 INFO [train.py:1198] (1/2) Epoch 11, batch 2200, loss[loss=0.2744, ctc_loss=0.1771, cr_loss=0.4234, attn_decoder_loss=0.2759, over 29614.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1663, cr_loss=0.404, attn_decoder_loss=0.2647, over 5812071.58 frames. ], batch size: 86, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:08:26,326 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.251e+01 9.350e+01 9.957e+01 1.083e+02 2.059e+02, threshold=1.991e+02, percent-clipped=1.0 +2024-09-17 09:08:27,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=189800.0, ans=0.015 +2024-09-17 09:09:13,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=189920.0, ans=0.125 +2024-09-17 09:09:22,808 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:09:27,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.46 vs. limit=15.0 +2024-09-17 09:09:40,359 INFO [train.py:1198] (1/2) Epoch 11, batch 2250, loss[loss=0.2667, ctc_loss=0.164, cr_loss=0.4052, attn_decoder_loss=0.2691, over 29724.00 frames. ], tot_loss[loss=0.2628, ctc_loss=0.1661, cr_loss=0.4037, attn_decoder_loss=0.2646, over 5811500.67 frames. ], batch size: 82, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:09:46,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=190000.0, ans=0.125 +2024-09-17 09:09:52,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=190000.0, ans=0.125 +2024-09-17 09:10:11,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=190080.0, ans=0.025 +2024-09-17 09:10:23,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=190080.0, ans=0.0 +2024-09-17 09:10:38,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=190120.0, ans=0.09899494936611666 +2024-09-17 09:10:46,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.53 vs. limit=15.0 +2024-09-17 09:10:47,495 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:10:51,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=190160.0, ans=0.125 +2024-09-17 09:10:51,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=190160.0, ans=0.0 +2024-09-17 09:10:55,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=190160.0, ans=0.0 +2024-09-17 09:10:57,790 INFO [train.py:1198] (1/2) Epoch 11, batch 2300, loss[loss=0.2343, ctc_loss=0.1395, cr_loss=0.343, attn_decoder_loss=0.2372, over 29303.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1653, cr_loss=0.4025, attn_decoder_loss=0.2635, over 5800076.63 frames. ], batch size: 71, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:10:59,281 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.762e+01 9.241e+01 9.973e+01 1.088e+02 2.493e+02, threshold=1.995e+02, percent-clipped=2.0 +2024-09-17 09:11:17,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=190240.0, ans=0.125 +2024-09-17 09:11:23,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=190240.0, ans=0.95 +2024-09-17 09:11:30,477 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.18 vs. limit=15.0 +2024-09-17 09:11:32,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=190280.0, ans=0.125 +2024-09-17 09:11:45,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=190320.0, ans=0.1 +2024-09-17 09:11:48,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=190320.0, ans=10.0 +2024-09-17 09:11:55,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=190320.0, ans=0.2 +2024-09-17 09:12:15,944 INFO [train.py:1198] (1/2) Epoch 11, batch 2350, loss[loss=0.2707, ctc_loss=0.1742, cr_loss=0.4289, attn_decoder_loss=0.2718, over 29677.00 frames. ], tot_loss[loss=0.2621, ctc_loss=0.1655, cr_loss=0.4033, attn_decoder_loss=0.2639, over 5805388.29 frames. ], batch size: 83, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:12:17,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=190400.0, ans=0.015 +2024-09-17 09:12:25,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=190400.0, ans=0.1 +2024-09-17 09:12:32,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=190440.0, ans=0.1 +2024-09-17 09:12:43,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=190440.0, ans=0.04949747468305833 +2024-09-17 09:13:03,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=190520.0, ans=0.125 +2024-09-17 09:13:31,664 INFO [train.py:1198] (1/2) Epoch 11, batch 2400, loss[loss=0.2569, ctc_loss=0.1599, cr_loss=0.3903, attn_decoder_loss=0.259, over 29544.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.166, cr_loss=0.4042, attn_decoder_loss=0.2644, over 5808397.85 frames. ], batch size: 76, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:13:34,606 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.758e+01 9.144e+01 9.902e+01 1.071e+02 1.818e+02, threshold=1.980e+02, percent-clipped=0.0 +2024-09-17 09:13:42,580 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:13:43,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=190600.0, ans=0.1 +2024-09-17 09:14:13,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=190680.0, ans=0.025 +2024-09-17 09:14:27,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=190720.0, ans=0.125 +2024-09-17 09:14:50,025 INFO [train.py:1198] (1/2) Epoch 11, batch 2450, loss[loss=0.2715, ctc_loss=0.17, cr_loss=0.4251, attn_decoder_loss=0.2733, over 29713.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1673, cr_loss=0.4057, attn_decoder_loss=0.2657, over 5785307.67 frames. ], batch size: 82, lr: 1.02e-02, grad_scale: 4.0 +2024-09-17 09:14:56,329 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:14:59,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=190800.0, ans=0.07 +2024-09-17 09:15:17,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=190840.0, ans=0.125 +2024-09-17 09:15:36,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=190920.0, ans=0.0 +2024-09-17 09:15:38,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=190920.0, ans=0.025 +2024-09-17 09:15:41,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=190920.0, ans=0.125 +2024-09-17 09:15:50,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=190960.0, ans=0.0 +2024-09-17 09:15:50,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=190960.0, ans=0.2 +2024-09-17 09:16:03,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.34 vs. limit=10.0 +2024-09-17 09:16:07,526 INFO [train.py:1198] (1/2) Epoch 11, batch 2500, loss[loss=0.2843, ctc_loss=0.1936, cr_loss=0.4227, attn_decoder_loss=0.285, over 29611.00 frames. ], tot_loss[loss=0.264, ctc_loss=0.1675, cr_loss=0.4058, attn_decoder_loss=0.2657, over 5795457.66 frames. ], batch size: 86, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:16:10,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=191000.0, ans=0.0 +2024-09-17 09:16:12,136 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.899e+01 9.413e+01 9.956e+01 1.120e+02 1.816e+02, threshold=1.991e+02, percent-clipped=0.0 +2024-09-17 09:16:12,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=191000.0, ans=0.125 +2024-09-17 09:17:15,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.87 vs. limit=15.0 +2024-09-17 09:17:23,740 INFO [train.py:1198] (1/2) Epoch 11, batch 2550, loss[loss=0.2371, ctc_loss=0.149, cr_loss=0.3869, attn_decoder_loss=0.2383, over 29361.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.167, cr_loss=0.4051, attn_decoder_loss=0.2656, over 5799101.05 frames. ], batch size: 67, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:17:28,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=191200.0, ans=0.0 +2024-09-17 09:17:36,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=191200.0, ans=0.125 +2024-09-17 09:18:00,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=191280.0, ans=0.125 +2024-09-17 09:18:02,565 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:18:12,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=191320.0, ans=0.0 +2024-09-17 09:18:17,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=191320.0, ans=0.0 +2024-09-17 09:18:37,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=191360.0, ans=0.125 +2024-09-17 09:18:41,956 INFO [train.py:1198] (1/2) Epoch 11, batch 2600, loss[loss=0.2589, ctc_loss=0.1629, cr_loss=0.4015, attn_decoder_loss=0.2606, over 29452.00 frames. ], tot_loss[loss=0.2641, ctc_loss=0.1669, cr_loss=0.4046, attn_decoder_loss=0.2659, over 5795392.93 frames. ], batch size: 78, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:18:46,519 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.056e+01 9.455e+01 1.019e+02 1.112e+02 3.211e+02, threshold=2.037e+02, percent-clipped=2.0 +2024-09-17 09:19:07,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=191440.0, ans=0.1 +2024-09-17 09:19:12,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=5.00 vs. limit=12.0 +2024-09-17 09:19:14,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.11 vs. limit=15.0 +2024-09-17 09:19:15,611 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-17 09:19:33,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=191520.0, ans=0.025 +2024-09-17 09:19:37,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=191520.0, ans=0.025 +2024-09-17 09:19:45,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=191560.0, ans=0.125 +2024-09-17 09:19:49,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=191560.0, ans=0.0 +2024-09-17 09:19:59,103 INFO [train.py:1198] (1/2) Epoch 11, batch 2650, loss[loss=0.2956, ctc_loss=0.2026, cr_loss=0.4549, attn_decoder_loss=0.2959, over 29274.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.167, cr_loss=0.4048, attn_decoder_loss=0.266, over 5802051.26 frames. ], batch size: 100, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:20:07,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=191600.0, ans=0.0 +2024-09-17 09:20:53,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=191720.0, ans=0.1 +2024-09-17 09:21:01,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=191760.0, ans=0.0 +2024-09-17 09:21:14,609 INFO [train.py:1198] (1/2) Epoch 11, batch 2700, loss[loss=0.271, ctc_loss=0.1738, cr_loss=0.421, attn_decoder_loss=0.2725, over 29549.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1673, cr_loss=0.4055, attn_decoder_loss=0.2662, over 5797549.66 frames. ], batch size: 87, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:21:20,544 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 9.206e+01 9.835e+01 1.075e+02 2.605e+02, threshold=1.967e+02, percent-clipped=2.0 +2024-09-17 09:21:25,388 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:21:25,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=191800.0, ans=0.0 +2024-09-17 09:21:40,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=9.92 vs. limit=22.5 +2024-09-17 09:21:40,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=191840.0, ans=0.025 +2024-09-17 09:21:53,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=191880.0, ans=0.125 +2024-09-17 09:21:58,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.87 vs. limit=15.0 +2024-09-17 09:22:39,949 INFO [train.py:1198] (1/2) Epoch 11, batch 2750, loss[loss=0.2526, ctc_loss=0.1623, cr_loss=0.3879, attn_decoder_loss=0.254, over 29522.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1663, cr_loss=0.403, attn_decoder_loss=0.2649, over 5794782.04 frames. ], batch size: 75, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:22:40,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=192000.0, ans=0.0 +2024-09-17 09:22:56,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=192040.0, ans=0.035 +2024-09-17 09:23:13,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=192080.0, ans=0.125 +2024-09-17 09:23:45,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=192160.0, ans=0.2 +2024-09-17 09:23:55,129 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:23:57,818 INFO [train.py:1198] (1/2) Epoch 11, batch 2800, loss[loss=0.2999, ctc_loss=0.2264, cr_loss=0.4107, attn_decoder_loss=0.299, over 20613.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1664, cr_loss=0.4027, attn_decoder_loss=0.2648, over 5775519.88 frames. ], batch size: 211, lr: 1.02e-02, grad_scale: 16.0 +2024-09-17 09:24:00,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.96 vs. limit=15.0 +2024-09-17 09:24:05,063 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 8.863e+01 9.648e+01 1.109e+02 4.510e+02, threshold=1.930e+02, percent-clipped=4.0 +2024-09-17 09:24:10,042 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:24:11,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=192240.0, ans=0.0 +2024-09-17 09:24:22,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.58 vs. limit=10.0 +2024-09-17 09:24:27,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=5.01 vs. limit=5.0 +2024-09-17 09:24:31,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=192280.0, ans=10.0 +2024-09-17 09:24:32,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=192280.0, ans=0.2 +2024-09-17 09:24:52,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=192320.0, ans=0.0 +2024-09-17 09:24:57,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.07 vs. limit=15.0 +2024-09-17 09:25:13,027 INFO [train.py:1198] (1/2) Epoch 11, batch 2850, loss[loss=0.2543, ctc_loss=0.1584, cr_loss=0.3918, attn_decoder_loss=0.2562, over 29518.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.167, cr_loss=0.4034, attn_decoder_loss=0.2654, over 5761729.59 frames. ], batch size: 77, lr: 1.02e-02, grad_scale: 8.0 +2024-09-17 09:25:30,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=192440.0, ans=0.125 +2024-09-17 09:25:31,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=192440.0, ans=0.025 +2024-09-17 09:25:34,223 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.48 vs. limit=6.0 +2024-09-17 09:25:46,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.83 vs. limit=22.5 +2024-09-17 09:26:00,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_ff3.min_abs, batch_count=192520.0, ans=0.2 +2024-09-17 09:26:08,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.10 vs. limit=15.0 +2024-09-17 09:26:12,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=192520.0, ans=0.0 +2024-09-17 09:26:28,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=192560.0, ans=0.125 +2024-09-17 09:26:29,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=192600.0, ans=0.1 +2024-09-17 09:26:30,890 INFO [train.py:1198] (1/2) Epoch 11, batch 2900, loss[loss=0.2607, ctc_loss=0.1684, cr_loss=0.4199, attn_decoder_loss=0.2616, over 29431.00 frames. ], tot_loss[loss=0.2648, ctc_loss=0.1679, cr_loss=0.4063, attn_decoder_loss=0.2665, over 5787030.66 frames. ], batch size: 79, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:26:37,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=192600.0, ans=0.125 +2024-09-17 09:26:38,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.930e+01 9.694e+01 1.018e+02 1.122e+02 2.522e+02, threshold=2.035e+02, percent-clipped=2.0 +2024-09-17 09:27:01,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=192680.0, ans=0.125 +2024-09-17 09:27:13,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=192680.0, ans=0.125 +2024-09-17 09:27:15,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=192720.0, ans=0.025 +2024-09-17 09:27:25,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=192720.0, ans=0.04949747468305833 +2024-09-17 09:27:49,119 INFO [train.py:1198] (1/2) Epoch 11, batch 2950, loss[loss=0.2532, ctc_loss=0.1569, cr_loss=0.3938, attn_decoder_loss=0.2551, over 29504.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1664, cr_loss=0.4036, attn_decoder_loss=0.2648, over 5780919.04 frames. ], batch size: 75, lr: 1.01e-02, grad_scale: 4.0 +2024-09-17 09:28:03,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=192840.0, ans=0.0 +2024-09-17 09:28:21,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=192880.0, ans=0.125 +2024-09-17 09:28:41,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-17 09:28:44,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.98 vs. limit=10.0 +2024-09-17 09:29:04,911 INFO [train.py:1198] (1/2) Epoch 11, batch 3000, loss[loss=0.2661, ctc_loss=0.1634, cr_loss=0.408, attn_decoder_loss=0.2685, over 29736.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1664, cr_loss=0.4039, attn_decoder_loss=0.2649, over 5781505.62 frames. ], batch size: 81, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:29:04,911 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 09:29:24,075 INFO [train.py:1230] (1/2) Epoch 11, validation: loss=0.2124, ctc_loss=0.04636, cr_loss=4.851e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-17 09:29:24,076 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 09:29:33,324 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 9.274e+01 9.995e+01 1.117e+02 3.922e+02, threshold=1.999e+02, percent-clipped=3.0 +2024-09-17 09:30:23,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=193160.0, ans=0.125 +2024-09-17 09:30:39,849 INFO [train.py:1198] (1/2) Epoch 11, batch 3050, loss[loss=0.2531, ctc_loss=0.1546, cr_loss=0.3968, attn_decoder_loss=0.2552, over 29537.00 frames. ], tot_loss[loss=0.2642, ctc_loss=0.1673, cr_loss=0.4054, attn_decoder_loss=0.2659, over 5774551.51 frames. ], batch size: 76, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:30:46,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=193200.0, ans=0.125 +2024-09-17 09:30:49,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=193200.0, ans=0.125 +2024-09-17 09:30:55,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=193240.0, ans=0.2 +2024-09-17 09:31:02,124 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-17 09:31:25,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=193320.0, ans=0.125 +2024-09-17 09:31:57,183 INFO [train.py:1198] (1/2) Epoch 11, batch 3100, loss[loss=0.2773, ctc_loss=0.1789, cr_loss=0.4188, attn_decoder_loss=0.2789, over 29276.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1667, cr_loss=0.4038, attn_decoder_loss=0.2653, over 5774195.00 frames. ], batch size: 100, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:32:01,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=193400.0, ans=0.2 +2024-09-17 09:32:02,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.60 vs. limit=15.0 +2024-09-17 09:32:07,731 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.928e+01 9.888e+01 1.137e+02 1.275e+02 2.184e+02, threshold=2.273e+02, percent-clipped=1.0 +2024-09-17 09:32:57,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.54 vs. limit=15.0 +2024-09-17 09:32:57,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=193560.0, ans=0.2 +2024-09-17 09:33:15,293 INFO [train.py:1198] (1/2) Epoch 11, batch 3150, loss[loss=0.2693, ctc_loss=0.1677, cr_loss=0.4129, attn_decoder_loss=0.2714, over 28909.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1673, cr_loss=0.4053, attn_decoder_loss=0.2656, over 5781193.28 frames. ], batch size: 104, lr: 1.01e-02, grad_scale: 4.0 +2024-09-17 09:33:17,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=193600.0, ans=0.1 +2024-09-17 09:33:18,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=193600.0, ans=0.125 +2024-09-17 09:33:24,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=193600.0, ans=0.0 +2024-09-17 09:33:31,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.35 vs. limit=10.0 +2024-09-17 09:33:34,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=193640.0, ans=0.0 +2024-09-17 09:33:59,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=193720.0, ans=0.125 +2024-09-17 09:34:00,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=193720.0, ans=0.125 +2024-09-17 09:34:06,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=193720.0, ans=0.0 +2024-09-17 09:34:06,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=193720.0, ans=0.125 +2024-09-17 09:34:30,663 INFO [train.py:1198] (1/2) Epoch 11, batch 3200, loss[loss=0.2565, ctc_loss=0.1538, cr_loss=0.3972, attn_decoder_loss=0.2591, over 29412.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1665, cr_loss=0.4048, attn_decoder_loss=0.265, over 5791758.89 frames. ], batch size: 79, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:34:30,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=193800.0, ans=0.125 +2024-09-17 09:34:35,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=193800.0, ans=0.125 +2024-09-17 09:34:42,603 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.896e+01 9.244e+01 9.694e+01 1.030e+02 2.478e+02, threshold=1.939e+02, percent-clipped=1.0 +2024-09-17 09:34:44,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=193840.0, ans=0.025 +2024-09-17 09:34:47,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=193840.0, ans=0.125 +2024-09-17 09:34:56,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.46 vs. limit=15.0 +2024-09-17 09:35:00,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=193880.0, ans=0.125 +2024-09-17 09:35:06,727 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.44 vs. limit=15.0 +2024-09-17 09:35:20,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=193920.0, ans=0.035 +2024-09-17 09:35:21,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=193920.0, ans=0.1 +2024-09-17 09:35:33,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=193960.0, ans=0.0 +2024-09-17 09:35:36,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=193960.0, ans=0.5 +2024-09-17 09:35:49,417 INFO [train.py:1198] (1/2) Epoch 11, batch 3250, loss[loss=0.2715, ctc_loss=0.1746, cr_loss=0.4169, attn_decoder_loss=0.273, over 29702.00 frames. ], tot_loss[loss=0.2637, ctc_loss=0.1667, cr_loss=0.4052, attn_decoder_loss=0.2654, over 5798178.31 frames. ], batch size: 84, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:35:57,962 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.39 vs. limit=6.0 +2024-09-17 09:35:58,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=194000.0, ans=0.0 +2024-09-17 09:36:07,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.39 vs. limit=15.0 +2024-09-17 09:36:10,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=194040.0, ans=0.0 +2024-09-17 09:36:42,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=194120.0, ans=0.125 +2024-09-17 09:36:45,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=194120.0, ans=0.1 +2024-09-17 09:37:07,223 INFO [train.py:1198] (1/2) Epoch 11, batch 3300, loss[loss=0.2915, ctc_loss=0.1973, cr_loss=0.4409, attn_decoder_loss=0.2922, over 28460.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1664, cr_loss=0.4044, attn_decoder_loss=0.2647, over 5796386.01 frames. ], batch size: 112, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:37:19,488 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.100e+01 9.489e+01 1.035e+02 1.154e+02 2.549e+02, threshold=2.070e+02, percent-clipped=1.0 +2024-09-17 09:37:28,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=194240.0, ans=0.025 +2024-09-17 09:37:33,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=194240.0, ans=0.1 +2024-09-17 09:37:36,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=194280.0, ans=0.125 +2024-09-17 09:37:40,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=194280.0, ans=0.125 +2024-09-17 09:37:46,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=194280.0, ans=0.0 +2024-09-17 09:37:52,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=194320.0, ans=0.0 +2024-09-17 09:38:09,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=194360.0, ans=0.125 +2024-09-17 09:38:21,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=194400.0, ans=0.125 +2024-09-17 09:38:22,983 INFO [train.py:1198] (1/2) Epoch 11, batch 3350, loss[loss=0.2799, ctc_loss=0.1824, cr_loss=0.4062, attn_decoder_loss=0.2817, over 28828.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1673, cr_loss=0.4051, attn_decoder_loss=0.2655, over 5773236.96 frames. ], batch size: 104, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:39:32,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.83 vs. limit=15.0 +2024-09-17 09:39:40,730 INFO [train.py:1198] (1/2) Epoch 11, batch 3400, loss[loss=0.2318, ctc_loss=0.1336, cr_loss=0.3408, attn_decoder_loss=0.2351, over 29337.00 frames. ], tot_loss[loss=0.2635, ctc_loss=0.1669, cr_loss=0.404, attn_decoder_loss=0.2653, over 5766562.58 frames. ], batch size: 67, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:39:45,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=194600.0, ans=0.2 +2024-09-17 09:39:50,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=194600.0, ans=0.125 +2024-09-17 09:39:52,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.86 vs. limit=15.0 +2024-09-17 09:39:52,871 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.232e+01 1.004e+02 1.095e+02 3.484e+02, threshold=2.008e+02, percent-clipped=1.0 +2024-09-17 09:40:15,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=194680.0, ans=0.0 +2024-09-17 09:40:17,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=194680.0, ans=0.2 +2024-09-17 09:40:20,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=194680.0, ans=0.125 +2024-09-17 09:40:57,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=194800.0, ans=0.0 +2024-09-17 09:40:58,435 INFO [train.py:1198] (1/2) Epoch 11, batch 3450, loss[loss=0.2673, ctc_loss=0.1642, cr_loss=0.3864, attn_decoder_loss=0.2702, over 28179.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.1665, cr_loss=0.4038, attn_decoder_loss=0.2654, over 5775810.27 frames. ], batch size: 111, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:40:58,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=194800.0, ans=0.0 +2024-09-17 09:41:20,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.76 vs. limit=15.0 +2024-09-17 09:41:42,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=194920.0, ans=0.125 +2024-09-17 09:41:53,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=194920.0, ans=0.2 +2024-09-17 09:41:57,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=194960.0, ans=0.025 +2024-09-17 09:41:58,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.54 vs. limit=10.0 +2024-09-17 09:42:08,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=194960.0, ans=0.0 +2024-09-17 09:42:13,895 INFO [train.py:1198] (1/2) Epoch 11, batch 3500, loss[loss=0.2362, ctc_loss=0.1449, cr_loss=0.3744, attn_decoder_loss=0.238, over 29313.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1662, cr_loss=0.4035, attn_decoder_loss=0.2647, over 5778897.29 frames. ], batch size: 71, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:42:21,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=195000.0, ans=0.015 +2024-09-17 09:42:22,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=195000.0, ans=0.0 +2024-09-17 09:42:26,192 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 9.210e+01 9.867e+01 1.123e+02 1.745e+02, threshold=1.973e+02, percent-clipped=0.0 +2024-09-17 09:42:38,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=195040.0, ans=0.1 +2024-09-17 09:42:41,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=195040.0, ans=0.0 +2024-09-17 09:42:47,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.12 vs. limit=15.0 +2024-09-17 09:43:23,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=195160.0, ans=0.125 +2024-09-17 09:43:29,167 INFO [train.py:1198] (1/2) Epoch 11, batch 3550, loss[loss=0.2673, ctc_loss=0.1635, cr_loss=0.4022, attn_decoder_loss=0.2699, over 29717.00 frames. ], tot_loss[loss=0.263, ctc_loss=0.1663, cr_loss=0.4037, attn_decoder_loss=0.2648, over 5784456.20 frames. ], batch size: 89, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:43:41,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=195200.0, ans=0.1 +2024-09-17 09:43:52,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=195240.0, ans=0.035 +2024-09-17 09:44:04,922 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.90 vs. limit=15.0 +2024-09-17 09:44:08,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=195280.0, ans=0.1 +2024-09-17 09:44:08,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=195280.0, ans=0.125 +2024-09-17 09:44:12,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=195280.0, ans=0.0 +2024-09-17 09:44:14,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=195320.0, ans=0.0 +2024-09-17 09:44:35,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=195360.0, ans=0.2 +2024-09-17 09:44:45,312 INFO [train.py:1198] (1/2) Epoch 11, batch 3600, loss[loss=0.2529, ctc_loss=0.159, cr_loss=0.3865, attn_decoder_loss=0.2547, over 29493.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1659, cr_loss=0.4031, attn_decoder_loss=0.2647, over 5793146.43 frames. ], batch size: 77, lr: 1.01e-02, grad_scale: 16.0 +2024-09-17 09:44:58,574 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.269e+01 9.046e+01 9.949e+01 1.066e+02 3.484e+02, threshold=1.990e+02, percent-clipped=1.0 +2024-09-17 09:45:09,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=195440.0, ans=0.125 +2024-09-17 09:45:19,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.14 vs. limit=10.0 +2024-09-17 09:45:34,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=195520.0, ans=0.04949747468305833 +2024-09-17 09:45:40,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=195520.0, ans=0.125 +2024-09-17 09:45:51,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=15.0 +2024-09-17 09:45:58,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=195560.0, ans=0.125 +2024-09-17 09:46:01,229 INFO [train.py:1198] (1/2) Epoch 11, batch 3650, loss[loss=0.2708, ctc_loss=0.1738, cr_loss=0.4263, attn_decoder_loss=0.2721, over 29492.00 frames. ], tot_loss[loss=0.2626, ctc_loss=0.1658, cr_loss=0.4034, attn_decoder_loss=0.2644, over 5795747.85 frames. ], batch size: 90, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:46:08,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=195600.0, ans=0.2 +2024-09-17 09:46:13,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=195600.0, ans=0.125 +2024-09-17 09:46:17,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=195640.0, ans=0.1 +2024-09-17 09:46:22,263 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:46:25,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=195640.0, ans=0.125 +2024-09-17 09:46:28,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=195640.0, ans=0.125 +2024-09-17 09:46:32,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=195680.0, ans=0.125 +2024-09-17 09:46:34,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=195680.0, ans=0.2 +2024-09-17 09:46:35,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=195680.0, ans=0.0 +2024-09-17 09:46:52,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.21 vs. limit=15.0 +2024-09-17 09:46:54,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-17 09:46:57,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=195720.0, ans=0.125 +2024-09-17 09:47:05,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=195760.0, ans=0.025 +2024-09-17 09:47:14,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=195800.0, ans=0.025 +2024-09-17 09:47:15,837 INFO [train.py:1198] (1/2) Epoch 11, batch 3700, loss[loss=0.27, ctc_loss=0.1652, cr_loss=0.3919, attn_decoder_loss=0.273, over 29700.00 frames. ], tot_loss[loss=0.2631, ctc_loss=0.1661, cr_loss=0.4039, attn_decoder_loss=0.2649, over 5806568.30 frames. ], batch size: 84, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:47:16,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=195800.0, ans=0.125 +2024-09-17 09:47:29,200 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.197e+01 9.899e+01 1.076e+02 2.230e+02, threshold=1.980e+02, percent-clipped=1.0 +2024-09-17 09:47:39,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=195840.0, ans=0.125 +2024-09-17 09:47:47,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=195880.0, ans=0.0 +2024-09-17 09:47:55,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=195880.0, ans=0.125 +2024-09-17 09:48:30,220 INFO [train.py:1198] (1/2) Epoch 11, batch 3750, loss[loss=0.2281, ctc_loss=0.1386, cr_loss=0.348, attn_decoder_loss=0.2303, over 29379.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1656, cr_loss=0.4033, attn_decoder_loss=0.2645, over 5810637.82 frames. ], batch size: 67, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:49:01,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=196080.0, ans=0.125 +2024-09-17 09:49:10,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=196080.0, ans=0.025 +2024-09-17 09:49:22,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196120.0, ans=0.1 +2024-09-17 09:49:23,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196120.0, ans=0.1 +2024-09-17 09:49:34,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=196160.0, ans=0.0 +2024-09-17 09:49:38,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=196160.0, ans=0.125 +2024-09-17 09:49:44,097 INFO [train.py:1198] (1/2) Epoch 11, batch 3800, loss[loss=0.265, ctc_loss=0.1616, cr_loss=0.4087, attn_decoder_loss=0.2674, over 29643.00 frames. ], tot_loss[loss=0.2624, ctc_loss=0.1653, cr_loss=0.4032, attn_decoder_loss=0.2642, over 5800265.13 frames. ], batch size: 86, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:49:57,599 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.230e+01 9.992e+01 1.078e+02 1.190e+02 1.793e+02, threshold=2.156e+02, percent-clipped=0.0 +2024-09-17 09:49:57,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=196240.0, ans=0.2 +2024-09-17 09:50:03,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=196240.0, ans=0.025 +2024-09-17 09:50:11,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=196240.0, ans=0.07 +2024-09-17 09:50:33,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=196320.0, ans=0.125 +2024-09-17 09:50:47,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=196360.0, ans=0.125 +2024-09-17 09:50:48,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196360.0, ans=0.1 +2024-09-17 09:51:00,331 INFO [train.py:1198] (1/2) Epoch 11, batch 3850, loss[loss=0.2869, ctc_loss=0.1862, cr_loss=0.4145, attn_decoder_loss=0.2889, over 29270.00 frames. ], tot_loss[loss=0.2624, ctc_loss=0.1653, cr_loss=0.4037, attn_decoder_loss=0.2642, over 5814533.93 frames. ], batch size: 100, lr: 1.01e-02, grad_scale: 8.0 +2024-09-17 09:51:29,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=196480.0, ans=15.0 +2024-09-17 09:51:34,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=196480.0, ans=0.0 +2024-09-17 09:51:45,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=196520.0, ans=0.04949747468305833 +2024-09-17 09:51:46,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=196520.0, ans=0.125 +2024-09-17 09:51:54,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=196520.0, ans=0.125 +2024-09-17 09:52:04,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=196560.0, ans=0.2 +2024-09-17 09:52:05,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196560.0, ans=0.1 +2024-09-17 09:52:07,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.00 vs. limit=15.0 +2024-09-17 09:52:16,111 INFO [train.py:1198] (1/2) Epoch 11, batch 3900, loss[loss=0.2814, ctc_loss=0.1734, cr_loss=0.3995, attn_decoder_loss=0.2846, over 29619.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1655, cr_loss=0.4045, attn_decoder_loss=0.2645, over 5818547.26 frames. ], batch size: 86, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:52:23,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=196600.0, ans=0.0 +2024-09-17 09:52:25,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.63 vs. limit=22.5 +2024-09-17 09:52:29,225 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.740e+01 9.236e+01 9.717e+01 1.078e+02 1.405e+02, threshold=1.943e+02, percent-clipped=0.0 +2024-09-17 09:52:44,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=196680.0, ans=0.0 +2024-09-17 09:52:57,682 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:52:59,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=196720.0, ans=0.025 +2024-09-17 09:53:13,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=196760.0, ans=0.125 +2024-09-17 09:53:30,384 INFO [train.py:1198] (1/2) Epoch 11, batch 3950, loss[loss=0.2886, ctc_loss=0.1844, cr_loss=0.4488, attn_decoder_loss=0.2902, over 29453.00 frames. ], tot_loss[loss=0.2625, ctc_loss=0.1648, cr_loss=0.4032, attn_decoder_loss=0.2644, over 5837318.54 frames. ], batch size: 97, lr: 1.00e-02, grad_scale: 4.0 +2024-09-17 09:53:48,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=196840.0, ans=0.125 +2024-09-17 09:54:01,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=196880.0, ans=0.2 +2024-09-17 09:54:19,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=196920.0, ans=0.125 +2024-09-17 09:54:21,299 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.01 vs. limit=15.0 +2024-09-17 09:54:41,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=196960.0, ans=0.1 +2024-09-17 09:54:43,889 INFO [train.py:1198] (1/2) Epoch 11, batch 4000, loss[loss=0.2556, ctc_loss=0.1634, cr_loss=0.4015, attn_decoder_loss=0.2569, over 29479.00 frames. ], tot_loss[loss=0.2628, ctc_loss=0.1656, cr_loss=0.4041, attn_decoder_loss=0.2646, over 5815391.16 frames. ], batch size: 74, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:54:47,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.83 vs. limit=15.0 +2024-09-17 09:54:58,574 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.223e+01 9.139e+01 9.851e+01 1.070e+02 1.973e+02, threshold=1.970e+02, percent-clipped=1.0 +2024-09-17 09:55:04,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=197040.0, ans=0.025 +2024-09-17 09:55:06,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.40 vs. limit=15.0 +2024-09-17 09:55:09,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=197040.0, ans=0.2 +2024-09-17 09:55:37,532 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:55:44,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=197160.0, ans=0.2 +2024-09-17 09:55:52,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.10 vs. limit=15.0 +2024-09-17 09:55:59,405 INFO [train.py:1198] (1/2) Epoch 11, batch 4050, loss[loss=0.2876, ctc_loss=0.2122, cr_loss=0.4092, attn_decoder_loss=0.2869, over 20358.00 frames. ], tot_loss[loss=0.2626, ctc_loss=0.1655, cr_loss=0.4036, attn_decoder_loss=0.2644, over 5799281.86 frames. ], batch size: 210, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:56:02,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=197200.0, ans=0.025 +2024-09-17 09:56:09,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=197200.0, ans=0.125 +2024-09-17 09:56:09,899 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 09:56:09,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=197200.0, ans=0.125 +2024-09-17 09:56:50,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=197320.0, ans=0.0 +2024-09-17 09:56:53,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=197320.0, ans=0.125 +2024-09-17 09:56:55,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=197320.0, ans=0.2 +2024-09-17 09:56:55,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.46 vs. limit=15.0 +2024-09-17 09:57:02,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=197360.0, ans=0.0 +2024-09-17 09:57:08,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=197360.0, ans=0.125 +2024-09-17 09:57:13,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=197400.0, ans=10.0 +2024-09-17 09:57:14,014 INFO [train.py:1198] (1/2) Epoch 11, batch 4100, loss[loss=0.2864, ctc_loss=0.1881, cr_loss=0.4368, attn_decoder_loss=0.2876, over 29489.00 frames. ], tot_loss[loss=0.2628, ctc_loss=0.1657, cr_loss=0.4044, attn_decoder_loss=0.2646, over 5794609.70 frames. ], batch size: 90, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:57:30,193 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 9.399e+01 1.013e+02 1.118e+02 3.429e+02, threshold=2.026e+02, percent-clipped=2.0 +2024-09-17 09:58:28,185 INFO [train.py:1198] (1/2) Epoch 11, batch 4150, loss[loss=0.2555, ctc_loss=0.1623, cr_loss=0.3954, attn_decoder_loss=0.2571, over 29494.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.166, cr_loss=0.4049, attn_decoder_loss=0.2645, over 5799893.98 frames. ], batch size: 77, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:58:34,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=197600.0, ans=0.1 +2024-09-17 09:58:55,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=197640.0, ans=0.125 +2024-09-17 09:59:03,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=197680.0, ans=0.125 +2024-09-17 09:59:30,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=197760.0, ans=0.0 +2024-09-17 09:59:33,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=197760.0, ans=0.0 +2024-09-17 09:59:37,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.15 vs. limit=22.5 +2024-09-17 09:59:42,926 INFO [train.py:1198] (1/2) Epoch 11, batch 4200, loss[loss=0.2758, ctc_loss=0.1824, cr_loss=0.4404, attn_decoder_loss=0.2764, over 29477.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1657, cr_loss=0.4044, attn_decoder_loss=0.2645, over 5802005.13 frames. ], batch size: 90, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 09:59:45,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=197800.0, ans=0.0 +2024-09-17 09:59:59,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.965e+01 8.983e+01 9.678e+01 1.042e+02 2.526e+02, threshold=1.936e+02, percent-clipped=1.0 +2024-09-17 10:00:08,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=197840.0, ans=0.125 +2024-09-17 10:00:40,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=197920.0, ans=0.125 +2024-09-17 10:00:56,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=198000.0, ans=0.125 +2024-09-17 10:00:57,513 INFO [train.py:1198] (1/2) Epoch 11, batch 4250, loss[loss=0.2413, ctc_loss=0.1455, cr_loss=0.3937, attn_decoder_loss=0.2431, over 29504.00 frames. ], tot_loss[loss=0.2629, ctc_loss=0.1655, cr_loss=0.4043, attn_decoder_loss=0.2648, over 5808187.19 frames. ], batch size: 74, lr: 1.00e-02, grad_scale: 4.0 +2024-09-17 10:01:28,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=198080.0, ans=0.125 +2024-09-17 10:01:31,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=198080.0, ans=0.0 +2024-09-17 10:01:44,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=198120.0, ans=0.125 +2024-09-17 10:01:50,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.76 vs. limit=22.5 +2024-09-17 10:02:11,157 INFO [train.py:1198] (1/2) Epoch 11, batch 4300, loss[loss=0.2702, ctc_loss=0.1686, cr_loss=0.4211, attn_decoder_loss=0.2722, over 29527.00 frames. ], tot_loss[loss=0.2627, ctc_loss=0.1652, cr_loss=0.403, attn_decoder_loss=0.2646, over 5797640.47 frames. ], batch size: 87, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 10:02:11,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=198200.0, ans=0.125 +2024-09-17 10:02:17,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=198200.0, ans=0.0 +2024-09-17 10:02:26,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=198240.0, ans=0.125 +2024-09-17 10:02:29,145 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.197e+01 9.375e+01 1.006e+02 1.083e+02 2.279e+02, threshold=2.011e+02, percent-clipped=1.0 +2024-09-17 10:02:40,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.95 vs. limit=15.0 +2024-09-17 10:03:04,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=198320.0, ans=0.07 +2024-09-17 10:03:17,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=198360.0, ans=0.5 +2024-09-17 10:03:27,242 INFO [train.py:1198] (1/2) Epoch 11, batch 4350, loss[loss=0.2869, ctc_loss=0.1832, cr_loss=0.4409, attn_decoder_loss=0.2887, over 29449.00 frames. ], tot_loss[loss=0.2663, ctc_loss=0.1686, cr_loss=0.4082, attn_decoder_loss=0.2681, over 5799604.64 frames. ], batch size: 97, lr: 1.00e-02, grad_scale: 8.0 +2024-09-17 10:03:41,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.99 vs. limit=15.0 +2024-09-17 10:03:41,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.90 vs. limit=12.0 +2024-09-17 10:03:42,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=198440.0, ans=0.125 +2024-09-17 10:04:40,214 INFO [train.py:1198] (1/2) Epoch 11, batch 4400, loss[loss=0.2822, ctc_loss=0.1905, cr_loss=0.4274, attn_decoder_loss=0.2829, over 27258.00 frames. ], tot_loss[loss=0.2685, ctc_loss=0.1707, cr_loss=0.4111, attn_decoder_loss=0.2703, over 5766483.59 frames. ], batch size: 125, lr: 1.00e-02, grad_scale: 16.0 +2024-09-17 10:04:59,280 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.516e+01 9.761e+01 1.030e+02 1.162e+02 9.107e+02, threshold=2.060e+02, percent-clipped=3.0 +2024-09-17 10:05:01,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=198640.0, ans=0.125 +2024-09-17 10:05:03,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=198640.0, ans=0.1 +2024-09-17 10:05:18,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=198680.0, ans=0.0 +2024-09-17 10:05:21,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=198680.0, ans=0.0 +2024-09-17 10:05:22,059 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.76 vs. limit=15.0 +2024-09-17 10:05:24,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=198720.0, ans=0.125 +2024-09-17 10:05:25,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=198720.0, ans=0.2 +2024-09-17 10:05:55,214 INFO [train.py:1198] (1/2) Epoch 11, batch 4450, loss[loss=0.2988, ctc_loss=0.2244, cr_loss=0.4524, attn_decoder_loss=0.297, over 20058.00 frames. ], tot_loss[loss=0.2721, ctc_loss=0.1764, cr_loss=0.4154, attn_decoder_loss=0.2735, over 5575744.19 frames. ], batch size: 209, lr: 9.99e-03, grad_scale: 8.0 +2024-09-17 10:06:04,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=198800.0, ans=0.125 +2024-09-17 10:06:09,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=198840.0, ans=0.0 +2024-09-17 10:06:19,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=198840.0, ans=0.05 +2024-09-17 10:06:19,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=198840.0, ans=0.125 +2024-09-17 10:06:40,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=198920.0, ans=0.0 +2024-09-17 10:06:43,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=7.31 vs. limit=15.0 +2024-09-17 10:06:49,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=198920.0, ans=0.125 +2024-09-17 10:06:51,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=198920.0, ans=0.125 +2024-09-17 10:07:11,092 INFO [train.py:1198] (1/2) Epoch 11, batch 4500, loss[loss=0.2841, ctc_loss=0.2002, cr_loss=0.4225, attn_decoder_loss=0.284, over 21358.00 frames. ], tot_loss[loss=0.2755, ctc_loss=0.1826, cr_loss=0.4175, attn_decoder_loss=0.2766, over 5238002.99 frames. ], batch size: 209, lr: 9.99e-03, grad_scale: 8.0 +2024-09-17 10:07:14,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=199000.0, ans=0.0 +2024-09-17 10:07:17,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=199000.0, ans=0.125 +2024-09-17 10:07:23,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.42 vs. limit=22.5 +2024-09-17 10:07:26,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=199040.0, ans=0.0 +2024-09-17 10:07:31,558 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.514e+01 1.082e+02 1.141e+02 1.239e+02 5.446e+02, threshold=2.282e+02, percent-clipped=2.0 +2024-09-17 10:08:38,953 INFO [train.py:1198] (1/2) Epoch 12, batch 0, loss[loss=0.2548, ctc_loss=0.161, cr_loss=0.3915, attn_decoder_loss=0.2565, over 29591.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.161, cr_loss=0.3915, attn_decoder_loss=0.2565, over 29591.00 frames. ], batch size: 73, lr: 9.56e-03, grad_scale: 16.0 +2024-09-17 10:08:38,954 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 10:08:46,525 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([4.0861, 3.9318, 3.6337, 3.4470], device='cuda:1') +2024-09-17 10:08:57,356 INFO [train.py:1230] (1/2) Epoch 12, validation: loss=0.2149, ctc_loss=0.04611, cr_loss=4.481e-15, attn_decoder_loss=0.2337, over 944034.00 frames. +2024-09-17 10:08:57,357 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 10:09:18,259 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.57 vs. limit=22.5 +2024-09-17 10:09:18,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=199140.0, ans=0.1 +2024-09-17 10:09:20,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=199140.0, ans=0.125 +2024-09-17 10:09:31,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=199180.0, ans=0.125 +2024-09-17 10:09:51,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=199220.0, ans=0.125 +2024-09-17 10:09:53,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=199220.0, ans=0.95 +2024-09-17 10:09:57,875 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=4.22 vs. limit=12.0 +2024-09-17 10:09:58,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=199260.0, ans=0.125 +2024-09-17 10:10:13,549 INFO [train.py:1198] (1/2) Epoch 12, batch 50, loss[loss=0.2305, ctc_loss=0.1379, cr_loss=0.3819, attn_decoder_loss=0.2323, over 29432.00 frames. ], tot_loss[loss=0.2645, ctc_loss=0.1681, cr_loss=0.4092, attn_decoder_loss=0.2661, over 1268096.16 frames. ], batch size: 70, lr: 9.56e-03, grad_scale: 8.0 +2024-09-17 10:10:15,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=199300.0, ans=0.0 +2024-09-17 10:10:35,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=199340.0, ans=0.125 +2024-09-17 10:10:44,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=199380.0, ans=0.125 +2024-09-17 10:11:03,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.75 vs. limit=15.0 +2024-09-17 10:11:04,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=199420.0, ans=0.025 +2024-09-17 10:11:16,126 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.442e+01 9.517e+01 1.012e+02 1.140e+02 5.609e+02, threshold=2.023e+02, percent-clipped=2.0 +2024-09-17 10:11:16,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=199460.0, ans=0.0 +2024-09-17 10:11:33,302 INFO [train.py:1198] (1/2) Epoch 12, batch 100, loss[loss=0.2601, ctc_loss=0.1561, cr_loss=0.39, attn_decoder_loss=0.263, over 29551.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.1692, cr_loss=0.4113, attn_decoder_loss=0.2682, over 2253680.23 frames. ], batch size: 76, lr: 9.56e-03, grad_scale: 8.0 +2024-09-17 10:11:57,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=199540.0, ans=0.0 +2024-09-17 10:12:47,661 INFO [train.py:1198] (1/2) Epoch 12, batch 150, loss[loss=0.236, ctc_loss=0.1335, cr_loss=0.3657, attn_decoder_loss=0.2393, over 29424.00 frames. ], tot_loss[loss=0.2636, ctc_loss=0.166, cr_loss=0.4052, attn_decoder_loss=0.2654, over 3049262.22 frames. ], batch size: 70, lr: 9.55e-03, grad_scale: 8.0 +2024-09-17 10:12:47,964 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:13:06,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=199740.0, ans=0.125 +2024-09-17 10:13:33,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=199820.0, ans=0.2 +2024-09-17 10:13:39,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=199820.0, ans=0.125 +2024-09-17 10:13:47,578 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.984e+01 9.054e+01 9.523e+01 1.007e+02 1.391e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-17 10:13:52,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=199860.0, ans=0.09899494936611666 +2024-09-17 10:13:55,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=199860.0, ans=0.1 +2024-09-17 10:14:02,684 INFO [train.py:1198] (1/2) Epoch 12, batch 200, loss[loss=0.2702, ctc_loss=0.1738, cr_loss=0.4121, attn_decoder_loss=0.2718, over 27144.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1642, cr_loss=0.4031, attn_decoder_loss=0.2635, over 3660327.54 frames. ], batch size: 124, lr: 9.55e-03, grad_scale: 8.0 +2024-09-17 10:14:30,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=199940.0, ans=0.1 +2024-09-17 10:14:38,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=199980.0, ans=0.0 +2024-09-17 10:14:43,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=199980.0, ans=0.0 +2024-09-17 10:15:08,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.28 vs. limit=15.0 +2024-09-17 10:15:20,794 INFO [train.py:1198] (1/2) Epoch 12, batch 250, loss[loss=0.2671, ctc_loss=0.1625, cr_loss=0.378, attn_decoder_loss=0.2703, over 29289.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1636, cr_loss=0.4022, attn_decoder_loss=0.2633, over 4142597.14 frames. ], batch size: 100, lr: 9.54e-03, grad_scale: 8.0 +2024-09-17 10:15:25,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=200100.0, ans=0.125 +2024-09-17 10:15:35,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=200100.0, ans=0.125 +2024-09-17 10:15:48,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=200140.0, ans=0.125 +2024-09-17 10:16:23,313 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.843e+01 9.074e+01 9.707e+01 1.061e+02 3.060e+02, threshold=1.941e+02, percent-clipped=1.0 +2024-09-17 10:16:26,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=200260.0, ans=0.025 +2024-09-17 10:16:38,682 INFO [train.py:1198] (1/2) Epoch 12, batch 300, loss[loss=0.2878, ctc_loss=0.1908, cr_loss=0.4295, attn_decoder_loss=0.2891, over 29572.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1633, cr_loss=0.402, attn_decoder_loss=0.2632, over 4512163.52 frames. ], batch size: 92, lr: 9.54e-03, grad_scale: 8.0 +2024-09-17 10:16:49,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=200300.0, ans=0.0 +2024-09-17 10:17:18,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=200380.0, ans=0.125 +2024-09-17 10:17:22,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=200420.0, ans=0.1 +2024-09-17 10:17:28,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=200420.0, ans=0.125 +2024-09-17 10:17:32,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.00 vs. limit=15.0 +2024-09-17 10:17:34,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=200420.0, ans=0.1 +2024-09-17 10:17:54,235 INFO [train.py:1198] (1/2) Epoch 12, batch 350, loss[loss=0.2389, ctc_loss=0.1444, cr_loss=0.3762, attn_decoder_loss=0.241, over 29333.00 frames. ], tot_loss[loss=0.2615, ctc_loss=0.1634, cr_loss=0.402, attn_decoder_loss=0.2635, over 4795911.89 frames. ], batch size: 71, lr: 9.53e-03, grad_scale: 8.0 +2024-09-17 10:17:54,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=200500.0, ans=0.125 +2024-09-17 10:18:05,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=200500.0, ans=0.125 +2024-09-17 10:18:45,631 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=12.0 +2024-09-17 10:18:56,609 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.651e+01 9.289e+01 1.000e+02 1.114e+02 4.401e+02, threshold=2.000e+02, percent-clipped=4.0 +2024-09-17 10:19:04,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.32 vs. limit=15.0 +2024-09-17 10:19:11,721 INFO [train.py:1198] (1/2) Epoch 12, batch 400, loss[loss=0.2842, ctc_loss=0.1833, cr_loss=0.4478, attn_decoder_loss=0.2854, over 29703.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1625, cr_loss=0.4015, attn_decoder_loss=0.2628, over 5025191.70 frames. ], batch size: 82, lr: 9.53e-03, grad_scale: 16.0 +2024-09-17 10:19:12,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=200700.0, ans=0.125 +2024-09-17 10:19:15,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=200700.0, ans=0.2 +2024-09-17 10:19:30,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=200740.0, ans=0.125 +2024-09-17 10:19:32,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=200740.0, ans=0.1 +2024-09-17 10:19:34,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.51 vs. limit=6.0 +2024-09-17 10:19:56,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.89 vs. limit=6.0 +2024-09-17 10:20:07,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=200820.0, ans=0.125 +2024-09-17 10:20:13,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=200860.0, ans=0.0 +2024-09-17 10:20:21,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=200860.0, ans=0.125 +2024-09-17 10:20:30,344 INFO [train.py:1198] (1/2) Epoch 12, batch 450, loss[loss=0.2766, ctc_loss=0.1787, cr_loss=0.4215, attn_decoder_loss=0.2781, over 29671.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1626, cr_loss=0.4017, attn_decoder_loss=0.2629, over 5186767.80 frames. ], batch size: 83, lr: 9.52e-03, grad_scale: 8.0 +2024-09-17 10:20:44,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=200940.0, ans=0.125 +2024-09-17 10:20:55,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=200940.0, ans=0.125 +2024-09-17 10:21:16,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=201020.0, ans=0.0 +2024-09-17 10:21:30,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.25 vs. limit=22.5 +2024-09-17 10:21:32,801 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.842e+01 9.090e+01 9.686e+01 1.023e+02 4.799e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 10:21:39,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=201060.0, ans=0.0 +2024-09-17 10:21:46,300 INFO [train.py:1198] (1/2) Epoch 12, batch 500, loss[loss=0.281, ctc_loss=0.1823, cr_loss=0.4641, attn_decoder_loss=0.2817, over 29404.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1623, cr_loss=0.4016, attn_decoder_loss=0.2626, over 5329603.95 frames. ], batch size: 94, lr: 9.52e-03, grad_scale: 8.0 +2024-09-17 10:21:46,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=201100.0, ans=0.125 +2024-09-17 10:21:55,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=201100.0, ans=0.125 +2024-09-17 10:21:59,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.28 vs. limit=15.0 +2024-09-17 10:22:08,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=201140.0, ans=0.2 +2024-09-17 10:22:11,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.43 vs. limit=22.5 +2024-09-17 10:22:15,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=201140.0, ans=0.125 +2024-09-17 10:22:20,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=201180.0, ans=0.025 +2024-09-17 10:22:21,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=201180.0, ans=0.05 +2024-09-17 10:22:37,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.79 vs. limit=12.0 +2024-09-17 10:22:46,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=201220.0, ans=10.0 +2024-09-17 10:22:47,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=201260.0, ans=0.1 +2024-09-17 10:22:47,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=201260.0, ans=0.0 +2024-09-17 10:22:50,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=201260.0, ans=0.2 +2024-09-17 10:22:59,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=201260.0, ans=0.0 +2024-09-17 10:23:01,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=201260.0, ans=0.125 +2024-09-17 10:23:03,906 INFO [train.py:1198] (1/2) Epoch 12, batch 550, loss[loss=0.278, ctc_loss=0.1677, cr_loss=0.414, attn_decoder_loss=0.2811, over 28896.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1624, cr_loss=0.4014, attn_decoder_loss=0.2626, over 5420818.32 frames. ], batch size: 104, lr: 9.51e-03, grad_scale: 4.0 +2024-09-17 10:23:06,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=23.16 vs. limit=15.0 +2024-09-17 10:23:39,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.23 vs. limit=6.0 +2024-09-17 10:23:54,550 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.65 vs. limit=12.0 +2024-09-17 10:24:01,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.06 vs. limit=15.0 +2024-09-17 10:24:04,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=201420.0, ans=0.0 +2024-09-17 10:24:06,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=201460.0, ans=0.0 +2024-09-17 10:24:10,194 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.800e+01 9.023e+01 9.660e+01 1.069e+02 2.891e+02, threshold=1.932e+02, percent-clipped=2.0 +2024-09-17 10:24:11,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.43 vs. limit=15.0 +2024-09-17 10:24:13,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=201460.0, ans=0.0 +2024-09-17 10:24:22,370 INFO [train.py:1198] (1/2) Epoch 12, batch 600, loss[loss=0.2753, ctc_loss=0.1718, cr_loss=0.4037, attn_decoder_loss=0.2779, over 29216.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1629, cr_loss=0.4017, attn_decoder_loss=0.2631, over 5507473.02 frames. ], batch size: 100, lr: 9.51e-03, grad_scale: 8.0 +2024-09-17 10:24:27,610 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.65 vs. limit=15.0 +2024-09-17 10:24:35,122 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.09 vs. limit=15.0 +2024-09-17 10:24:42,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.57 vs. limit=15.0 +2024-09-17 10:24:58,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.27 vs. limit=22.5 +2024-09-17 10:25:08,696 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.27 vs. limit=22.5 +2024-09-17 10:25:20,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.39 vs. limit=22.5 +2024-09-17 10:25:21,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=201660.0, ans=0.0 +2024-09-17 10:25:23,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=201660.0, ans=0.125 +2024-09-17 10:25:38,287 INFO [train.py:1198] (1/2) Epoch 12, batch 650, loss[loss=0.2595, ctc_loss=0.1617, cr_loss=0.4141, attn_decoder_loss=0.2612, over 29760.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1621, cr_loss=0.4008, attn_decoder_loss=0.2625, over 5585513.00 frames. ], batch size: 81, lr: 9.50e-03, grad_scale: 8.0 +2024-09-17 10:25:40,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.16 vs. limit=15.0 +2024-09-17 10:25:52,942 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.82 vs. limit=22.5 +2024-09-17 10:25:53,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=201740.0, ans=0.0 +2024-09-17 10:26:11,674 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.57 vs. limit=15.0 +2024-09-17 10:26:20,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=201780.0, ans=0.0 +2024-09-17 10:26:29,306 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:26:30,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=201820.0, ans=0.125 +2024-09-17 10:26:43,925 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.946e+01 9.154e+01 9.702e+01 1.037e+02 1.837e+02, threshold=1.940e+02, percent-clipped=0.0 +2024-09-17 10:26:56,071 INFO [train.py:1198] (1/2) Epoch 12, batch 700, loss[loss=0.2566, ctc_loss=0.1557, cr_loss=0.3864, attn_decoder_loss=0.2592, over 29550.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1624, cr_loss=0.4011, attn_decoder_loss=0.2629, over 5636445.29 frames. ], batch size: 76, lr: 9.50e-03, grad_scale: 8.0 +2024-09-17 10:27:28,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=201980.0, ans=0.125 +2024-09-17 10:27:41,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=201980.0, ans=0.025 +2024-09-17 10:27:54,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=202020.0, ans=0.125 +2024-09-17 10:28:00,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=202060.0, ans=0.0 +2024-09-17 10:28:11,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=202060.0, ans=0.125 +2024-09-17 10:28:14,109 INFO [train.py:1198] (1/2) Epoch 12, batch 750, loss[loss=0.2693, ctc_loss=0.1631, cr_loss=0.4174, attn_decoder_loss=0.2718, over 29721.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1629, cr_loss=0.4012, attn_decoder_loss=0.2628, over 5675727.33 frames. ], batch size: 82, lr: 9.49e-03, grad_scale: 8.0 +2024-09-17 10:28:15,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-17 10:28:21,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=202100.0, ans=0.1 +2024-09-17 10:28:26,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=202100.0, ans=0.125 +2024-09-17 10:28:32,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=202140.0, ans=0.2 +2024-09-17 10:28:32,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=12.0 +2024-09-17 10:28:33,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=202140.0, ans=0.1 +2024-09-17 10:28:38,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=202140.0, ans=0.125 +2024-09-17 10:28:39,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=202140.0, ans=0.0 +2024-09-17 10:28:40,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.72 vs. limit=15.0 +2024-09-17 10:28:51,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=202180.0, ans=0.0 +2024-09-17 10:28:53,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=202180.0, ans=0.2 +2024-09-17 10:29:09,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=202220.0, ans=22.5 +2024-09-17 10:29:17,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.082e+01 9.622e+01 1.037e+02 1.120e+02 2.104e+02, threshold=2.074e+02, percent-clipped=1.0 +2024-09-17 10:29:29,714 INFO [train.py:1198] (1/2) Epoch 12, batch 800, loss[loss=0.2444, ctc_loss=0.1457, cr_loss=0.3907, attn_decoder_loss=0.2467, over 29621.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1627, cr_loss=0.4009, attn_decoder_loss=0.2627, over 5707061.16 frames. ], batch size: 73, lr: 9.49e-03, grad_scale: 16.0 +2024-09-17 10:29:35,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.36 vs. limit=22.5 +2024-09-17 10:29:39,068 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:29:45,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.71 vs. limit=15.0 +2024-09-17 10:29:56,525 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:30:07,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=202380.0, ans=0.125 +2024-09-17 10:30:13,543 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:30:39,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=202460.0, ans=0.125 +2024-09-17 10:30:47,501 INFO [train.py:1198] (1/2) Epoch 12, batch 850, loss[loss=0.2738, ctc_loss=0.1626, cr_loss=0.4058, attn_decoder_loss=0.2771, over 29699.00 frames. ], tot_loss[loss=0.2604, ctc_loss=0.1624, cr_loss=0.4005, attn_decoder_loss=0.2624, over 5736412.38 frames. ], batch size: 89, lr: 9.49e-03, grad_scale: 4.0 +2024-09-17 10:31:08,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=202540.0, ans=0.1 +2024-09-17 10:31:36,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=202620.0, ans=0.025 +2024-09-17 10:31:41,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=202620.0, ans=0.1 +2024-09-17 10:31:55,955 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.132e+01 9.370e+01 1.044e+02 1.217e+02 3.517e+02, threshold=2.088e+02, percent-clipped=3.0 +2024-09-17 10:32:02,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=202660.0, ans=0.1 +2024-09-17 10:32:04,983 INFO [train.py:1198] (1/2) Epoch 12, batch 900, loss[loss=0.2409, ctc_loss=0.1492, cr_loss=0.3588, attn_decoder_loss=0.2431, over 29602.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1627, cr_loss=0.4014, attn_decoder_loss=0.2627, over 5740922.98 frames. ], batch size: 73, lr: 9.48e-03, grad_scale: 8.0 +2024-09-17 10:32:56,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=202820.0, ans=0.125 +2024-09-17 10:33:07,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.62 vs. limit=15.0 +2024-09-17 10:33:20,314 INFO [train.py:1198] (1/2) Epoch 12, batch 950, loss[loss=0.2502, ctc_loss=0.1515, cr_loss=0.3871, attn_decoder_loss=0.2525, over 29510.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.163, cr_loss=0.401, attn_decoder_loss=0.2629, over 5743601.38 frames. ], batch size: 74, lr: 9.48e-03, grad_scale: 8.0 +2024-09-17 10:33:31,110 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:33:45,982 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=16.81 vs. limit=15.0 +2024-09-17 10:34:28,767 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.408e+01 9.367e+01 1.035e+02 1.151e+02 3.076e+02, threshold=2.071e+02, percent-clipped=5.0 +2024-09-17 10:34:29,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=203060.0, ans=0.125 +2024-09-17 10:34:34,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=203060.0, ans=0.5 +2024-09-17 10:34:37,627 INFO [train.py:1198] (1/2) Epoch 12, batch 1000, loss[loss=0.2586, ctc_loss=0.163, cr_loss=0.3967, attn_decoder_loss=0.2604, over 29511.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1639, cr_loss=0.4022, attn_decoder_loss=0.2638, over 5737465.06 frames. ], batch size: 77, lr: 9.47e-03, grad_scale: 8.0 +2024-09-17 10:35:03,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=203140.0, ans=0.0 +2024-09-17 10:35:03,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=203140.0, ans=0.125 +2024-09-17 10:35:03,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=203140.0, ans=0.125 +2024-09-17 10:35:16,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=203180.0, ans=0.125 +2024-09-17 10:35:22,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=203180.0, ans=0.0 +2024-09-17 10:35:43,124 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.02 vs. limit=15.0 +2024-09-17 10:35:55,926 INFO [train.py:1198] (1/2) Epoch 12, batch 1050, loss[loss=0.268, ctc_loss=0.1596, cr_loss=0.3935, attn_decoder_loss=0.2713, over 29678.00 frames. ], tot_loss[loss=0.2614, ctc_loss=0.1636, cr_loss=0.4023, attn_decoder_loss=0.2633, over 5746797.35 frames. ], batch size: 85, lr: 9.47e-03, grad_scale: 4.0 +2024-09-17 10:36:02,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=203300.0, ans=0.1 +2024-09-17 10:36:22,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=203340.0, ans=0.125 +2024-09-17 10:36:41,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=203420.0, ans=0.125 +2024-09-17 10:36:55,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=203460.0, ans=0.0 +2024-09-17 10:37:04,274 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.982e+01 9.127e+01 9.739e+01 1.067e+02 1.550e+02, threshold=1.948e+02, percent-clipped=0.0 +2024-09-17 10:37:04,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=203460.0, ans=0.0 +2024-09-17 10:37:11,898 INFO [train.py:1198] (1/2) Epoch 12, batch 1100, loss[loss=0.2515, ctc_loss=0.1516, cr_loss=0.393, attn_decoder_loss=0.2538, over 29464.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.163, cr_loss=0.4019, attn_decoder_loss=0.263, over 5758190.90 frames. ], batch size: 78, lr: 9.46e-03, grad_scale: 8.0 +2024-09-17 10:38:02,164 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.56 vs. limit=22.5 +2024-09-17 10:38:06,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=203620.0, ans=0.125 +2024-09-17 10:38:30,364 INFO [train.py:1198] (1/2) Epoch 12, batch 1150, loss[loss=0.2494, ctc_loss=0.1543, cr_loss=0.3731, attn_decoder_loss=0.2516, over 29456.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1632, cr_loss=0.4021, attn_decoder_loss=0.2631, over 5756046.90 frames. ], batch size: 78, lr: 9.46e-03, grad_scale: 8.0 +2024-09-17 10:38:53,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=203740.0, ans=0.0 +2024-09-17 10:39:07,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=203780.0, ans=0.2 +2024-09-17 10:39:08,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=203780.0, ans=0.125 +2024-09-17 10:39:13,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=203780.0, ans=10.0 +2024-09-17 10:39:27,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-17 10:39:40,832 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.985e+01 9.529e+01 1.043e+02 1.150e+02 3.679e+02, threshold=2.085e+02, percent-clipped=2.0 +2024-09-17 10:39:47,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=203900.0, ans=0.125 +2024-09-17 10:39:48,421 INFO [train.py:1198] (1/2) Epoch 12, batch 1200, loss[loss=0.2786, ctc_loss=0.1673, cr_loss=0.4536, attn_decoder_loss=0.2809, over 29671.00 frames. ], tot_loss[loss=0.2621, ctc_loss=0.1641, cr_loss=0.4034, attn_decoder_loss=0.2641, over 5748132.60 frames. ], batch size: 85, lr: 9.45e-03, grad_scale: 16.0 +2024-09-17 10:40:10,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.43 vs. limit=15.0 +2024-09-17 10:40:11,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=203940.0, ans=0.2 +2024-09-17 10:40:30,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=203980.0, ans=0.125 +2024-09-17 10:40:42,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=204020.0, ans=0.125 +2024-09-17 10:41:04,803 INFO [train.py:1198] (1/2) Epoch 12, batch 1250, loss[loss=0.2804, ctc_loss=0.1817, cr_loss=0.4238, attn_decoder_loss=0.282, over 29515.00 frames. ], tot_loss[loss=0.2625, ctc_loss=0.1643, cr_loss=0.4044, attn_decoder_loss=0.2644, over 5775295.28 frames. ], batch size: 92, lr: 9.45e-03, grad_scale: 8.0 +2024-09-17 10:41:06,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=204100.0, ans=0.125 +2024-09-17 10:41:24,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.38 vs. limit=6.0 +2024-09-17 10:41:37,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=204180.0, ans=0.125 +2024-09-17 10:42:04,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=204220.0, ans=0.125 +2024-09-17 10:42:08,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=204260.0, ans=0.07 +2024-09-17 10:42:14,823 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.69 vs. limit=15.0 +2024-09-17 10:42:16,783 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.687e+01 9.051e+01 9.485e+01 1.007e+02 2.061e+02, threshold=1.897e+02, percent-clipped=0.0 +2024-09-17 10:42:19,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.88 vs. limit=10.0 +2024-09-17 10:42:22,706 INFO [train.py:1198] (1/2) Epoch 12, batch 1300, loss[loss=0.2789, ctc_loss=0.1765, cr_loss=0.4154, attn_decoder_loss=0.281, over 28480.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1637, cr_loss=0.403, attn_decoder_loss=0.2638, over 5778725.34 frames. ], batch size: 112, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:42:35,288 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:42:38,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=204340.0, ans=0.2 +2024-09-17 10:42:39,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=204340.0, ans=0.125 +2024-09-17 10:42:45,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=204340.0, ans=0.125 +2024-09-17 10:43:04,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.48 vs. limit=6.0 +2024-09-17 10:43:35,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=204460.0, ans=0.125 +2024-09-17 10:43:40,861 INFO [train.py:1198] (1/2) Epoch 12, batch 1350, loss[loss=0.2567, ctc_loss=0.1494, cr_loss=0.3641, attn_decoder_loss=0.2606, over 29755.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1627, cr_loss=0.4016, attn_decoder_loss=0.2632, over 5795926.83 frames. ], batch size: 81, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:43:50,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.25 vs. limit=15.0 +2024-09-17 10:44:25,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=204620.0, ans=0.2 +2024-09-17 10:44:49,480 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.879e+01 9.112e+01 9.581e+01 1.019e+02 1.292e+02, threshold=1.916e+02, percent-clipped=0.0 +2024-09-17 10:44:51,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=204660.0, ans=0.025 +2024-09-17 10:44:53,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.15 vs. limit=15.0 +2024-09-17 10:44:55,538 INFO [train.py:1198] (1/2) Epoch 12, batch 1400, loss[loss=0.2213, ctc_loss=0.1216, cr_loss=0.3218, attn_decoder_loss=0.2253, over 29556.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1622, cr_loss=0.4007, attn_decoder_loss=0.2628, over 5806753.09 frames. ], batch size: 69, lr: 9.44e-03, grad_scale: 8.0 +2024-09-17 10:45:10,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=204740.0, ans=0.2 +2024-09-17 10:45:18,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=204740.0, ans=0.125 +2024-09-17 10:46:03,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=204860.0, ans=0.125 +2024-09-17 10:46:13,810 INFO [train.py:1198] (1/2) Epoch 12, batch 1450, loss[loss=0.2714, ctc_loss=0.1679, cr_loss=0.3995, attn_decoder_loss=0.274, over 29451.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1629, cr_loss=0.402, attn_decoder_loss=0.2636, over 5804789.23 frames. ], batch size: 94, lr: 9.43e-03, grad_scale: 4.0 +2024-09-17 10:46:16,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.02 vs. limit=22.5 +2024-09-17 10:46:44,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=204980.0, ans=0.0 +2024-09-17 10:46:53,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=204980.0, ans=0.07 +2024-09-17 10:47:04,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=205020.0, ans=0.125 +2024-09-17 10:47:05,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=205020.0, ans=0.125 +2024-09-17 10:47:26,848 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.748e+01 9.404e+01 1.003e+02 1.073e+02 8.206e+02, threshold=2.005e+02, percent-clipped=2.0 +2024-09-17 10:47:28,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=205060.0, ans=0.125 +2024-09-17 10:47:31,572 INFO [train.py:1198] (1/2) Epoch 12, batch 1500, loss[loss=0.2753, ctc_loss=0.1726, cr_loss=0.4141, attn_decoder_loss=0.2775, over 29626.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1629, cr_loss=0.402, attn_decoder_loss=0.2638, over 5805563.93 frames. ], batch size: 86, lr: 9.43e-03, grad_scale: 8.0 +2024-09-17 10:47:38,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=205100.0, ans=0.0 +2024-09-17 10:48:06,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=205180.0, ans=0.1 +2024-09-17 10:48:41,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=205260.0, ans=0.125 +2024-09-17 10:48:42,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=205260.0, ans=0.125 +2024-09-17 10:48:47,594 INFO [train.py:1198] (1/2) Epoch 12, batch 1550, loss[loss=0.2731, ctc_loss=0.1691, cr_loss=0.4089, attn_decoder_loss=0.2755, over 29523.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1632, cr_loss=0.4017, attn_decoder_loss=0.2638, over 5782584.88 frames. ], batch size: 90, lr: 9.42e-03, grad_scale: 8.0 +2024-09-17 10:49:02,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=205340.0, ans=15.0 +2024-09-17 10:49:03,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=205340.0, ans=0.0 +2024-09-17 10:49:35,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=205420.0, ans=0.2 +2024-09-17 10:49:51,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=205460.0, ans=0.125 +2024-09-17 10:49:54,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=205460.0, ans=0.125 +2024-09-17 10:50:01,908 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.192e+01 9.435e+01 1.085e+02 1.264e+02 1.596e+02, threshold=2.170e+02, percent-clipped=0.0 +2024-09-17 10:50:04,919 INFO [train.py:1198] (1/2) Epoch 12, batch 1600, loss[loss=0.2612, ctc_loss=0.1583, cr_loss=0.3726, attn_decoder_loss=0.2644, over 29677.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1632, cr_loss=0.401, attn_decoder_loss=0.2637, over 5766074.05 frames. ], batch size: 85, lr: 9.42e-03, grad_scale: 8.0 +2024-09-17 10:50:12,819 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 10:50:44,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=205580.0, ans=0.025 +2024-09-17 10:51:03,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=205620.0, ans=0.1 +2024-09-17 10:51:08,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=205660.0, ans=0.0 +2024-09-17 10:51:23,146 INFO [train.py:1198] (1/2) Epoch 12, batch 1650, loss[loss=0.2697, ctc_loss=0.1629, cr_loss=0.4229, attn_decoder_loss=0.2722, over 29707.00 frames. ], tot_loss[loss=0.2616, ctc_loss=0.1631, cr_loss=0.4009, attn_decoder_loss=0.2636, over 5760189.51 frames. ], batch size: 89, lr: 9.41e-03, grad_scale: 4.0 +2024-09-17 10:51:25,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=205700.0, ans=0.125 +2024-09-17 10:51:29,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=205700.0, ans=0.0 +2024-09-17 10:51:34,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=205700.0, ans=0.0 +2024-09-17 10:51:39,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=205740.0, ans=0.0 +2024-09-17 10:51:44,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.50 vs. limit=15.0 +2024-09-17 10:51:48,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=205740.0, ans=0.025 +2024-09-17 10:52:01,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=205780.0, ans=10.0 +2024-09-17 10:52:07,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=205820.0, ans=0.1 +2024-09-17 10:52:14,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=205820.0, ans=0.0 +2024-09-17 10:52:17,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=205820.0, ans=0.125 +2024-09-17 10:52:20,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=205820.0, ans=0.2 +2024-09-17 10:52:36,818 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 9.373e+01 9.988e+01 1.108e+02 2.072e+02, threshold=1.998e+02, percent-clipped=0.0 +2024-09-17 10:52:38,331 INFO [train.py:1198] (1/2) Epoch 12, batch 1700, loss[loss=0.2239, ctc_loss=0.127, cr_loss=0.3608, attn_decoder_loss=0.2266, over 29559.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1626, cr_loss=0.401, attn_decoder_loss=0.2633, over 5781300.34 frames. ], batch size: 69, lr: 9.41e-03, grad_scale: 8.0 +2024-09-17 10:52:40,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=205900.0, ans=0.125 +2024-09-17 10:53:02,670 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.68 vs. limit=15.0 +2024-09-17 10:53:38,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=206020.0, ans=0.125 +2024-09-17 10:53:48,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=206060.0, ans=0.0 +2024-09-17 10:53:50,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=206060.0, ans=0.0 +2024-09-17 10:53:51,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=206060.0, ans=0.07 +2024-09-17 10:53:55,971 INFO [train.py:1198] (1/2) Epoch 12, batch 1750, loss[loss=0.2297, ctc_loss=0.1351, cr_loss=0.356, attn_decoder_loss=0.2323, over 29366.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1621, cr_loss=0.4004, attn_decoder_loss=0.2628, over 5789508.89 frames. ], batch size: 67, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:53:56,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=206100.0, ans=10.0 +2024-09-17 10:54:15,000 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=15.0 +2024-09-17 10:54:15,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=206140.0, ans=0.125 +2024-09-17 10:54:15,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=206140.0, ans=0.125 +2024-09-17 10:54:20,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=206140.0, ans=0.1 +2024-09-17 10:55:03,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=206260.0, ans=0.2 +2024-09-17 10:55:11,662 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.922e+01 9.583e+01 1.012e+02 1.403e+02, threshold=1.917e+02, percent-clipped=0.0 +2024-09-17 10:55:12,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=206300.0, ans=0.125 +2024-09-17 10:55:13,151 INFO [train.py:1198] (1/2) Epoch 12, batch 1800, loss[loss=0.2718, ctc_loss=0.1594, cr_loss=0.3955, attn_decoder_loss=0.2754, over 29686.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1624, cr_loss=0.4006, attn_decoder_loss=0.2631, over 5791697.17 frames. ], batch size: 83, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:55:15,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=206300.0, ans=0.2 +2024-09-17 10:55:19,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=206300.0, ans=0.5 +2024-09-17 10:55:22,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=206300.0, ans=0.2 +2024-09-17 10:55:51,931 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.25 vs. limit=15.0 +2024-09-17 10:55:55,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=206380.0, ans=0.1 +2024-09-17 10:55:58,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=6.0 +2024-09-17 10:56:03,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=206420.0, ans=0.0 +2024-09-17 10:56:20,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=206460.0, ans=0.07 +2024-09-17 10:56:29,245 INFO [train.py:1198] (1/2) Epoch 12, batch 1850, loss[loss=0.2758, ctc_loss=0.1767, cr_loss=0.4083, attn_decoder_loss=0.2777, over 29656.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1622, cr_loss=0.4006, attn_decoder_loss=0.2629, over 5797619.89 frames. ], batch size: 86, lr: 9.40e-03, grad_scale: 8.0 +2024-09-17 10:57:44,673 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 9.093e+01 9.711e+01 1.054e+02 1.569e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-17 10:57:46,171 INFO [train.py:1198] (1/2) Epoch 12, batch 1900, loss[loss=0.2731, ctc_loss=0.173, cr_loss=0.4119, attn_decoder_loss=0.275, over 29713.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1623, cr_loss=0.401, attn_decoder_loss=0.2632, over 5805322.39 frames. ], batch size: 89, lr: 9.39e-03, grad_scale: 8.0 +2024-09-17 10:58:24,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=206780.0, ans=0.1 +2024-09-17 10:58:49,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=206860.0, ans=0.125 +2024-09-17 10:58:53,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-17 10:59:01,599 INFO [train.py:1198] (1/2) Epoch 12, batch 1950, loss[loss=0.2496, ctc_loss=0.1524, cr_loss=0.3763, attn_decoder_loss=0.2521, over 29432.00 frames. ], tot_loss[loss=0.2623, ctc_loss=0.163, cr_loss=0.4031, attn_decoder_loss=0.2643, over 5819751.50 frames. ], batch size: 78, lr: 9.39e-03, grad_scale: 8.0 +2024-09-17 10:59:38,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=206980.0, ans=0.95 +2024-09-17 10:59:40,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=206980.0, ans=0.125 +2024-09-17 10:59:50,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=207020.0, ans=0.2 +2024-09-17 11:00:17,494 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.829e+01 9.308e+01 9.738e+01 1.045e+02 2.594e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-17 11:00:19,010 INFO [train.py:1198] (1/2) Epoch 12, batch 2000, loss[loss=0.2311, ctc_loss=0.1437, cr_loss=0.3762, attn_decoder_loss=0.2325, over 29375.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1643, cr_loss=0.4041, attn_decoder_loss=0.2652, over 5796871.35 frames. ], batch size: 67, lr: 9.38e-03, grad_scale: 16.0 +2024-09-17 11:00:54,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=207180.0, ans=0.1 +2024-09-17 11:01:08,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=207220.0, ans=0.0 +2024-09-17 11:01:37,092 INFO [train.py:1198] (1/2) Epoch 12, batch 2050, loss[loss=0.2272, ctc_loss=0.1385, cr_loss=0.3601, attn_decoder_loss=0.2291, over 29458.00 frames. ], tot_loss[loss=0.262, ctc_loss=0.1631, cr_loss=0.4019, attn_decoder_loss=0.2641, over 5789017.47 frames. ], batch size: 70, lr: 9.38e-03, grad_scale: 4.0 +2024-09-17 11:01:40,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=207300.0, ans=0.125 +2024-09-17 11:01:42,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.28 vs. limit=15.0 +2024-09-17 11:02:00,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=207340.0, ans=0.0 +2024-09-17 11:02:18,815 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.87 vs. limit=15.0 +2024-09-17 11:02:19,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=207380.0, ans=0.07 +2024-09-17 11:02:32,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=207420.0, ans=0.125 +2024-09-17 11:02:48,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=207460.0, ans=0.125 +2024-09-17 11:02:52,880 INFO [train.py:1198] (1/2) Epoch 12, batch 2100, loss[loss=0.2552, ctc_loss=0.1545, cr_loss=0.3953, attn_decoder_loss=0.2576, over 29752.00 frames. ], tot_loss[loss=0.261, ctc_loss=0.1624, cr_loss=0.4009, attn_decoder_loss=0.2631, over 5799629.48 frames. ], batch size: 81, lr: 9.37e-03, grad_scale: 8.0 +2024-09-17 11:02:54,455 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.611e+01 9.176e+01 9.560e+01 1.030e+02 1.406e+02, threshold=1.912e+02, percent-clipped=0.0 +2024-09-17 11:03:08,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.59 vs. limit=15.0 +2024-09-17 11:03:30,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=207580.0, ans=0.025 +2024-09-17 11:03:34,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=207580.0, ans=0.0 +2024-09-17 11:03:39,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=207620.0, ans=0.125 +2024-09-17 11:03:46,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=207620.0, ans=0.0 +2024-09-17 11:03:49,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=207620.0, ans=0.2 +2024-09-17 11:03:51,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=207620.0, ans=0.0 +2024-09-17 11:04:00,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=207660.0, ans=0.125 +2024-09-17 11:04:01,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=207660.0, ans=0.125 +2024-09-17 11:04:09,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=207700.0, ans=0.0 +2024-09-17 11:04:10,888 INFO [train.py:1198] (1/2) Epoch 12, batch 2150, loss[loss=0.2611, ctc_loss=0.1673, cr_loss=0.4214, attn_decoder_loss=0.2622, over 29443.00 frames. ], tot_loss[loss=0.2602, ctc_loss=0.1617, cr_loss=0.4001, attn_decoder_loss=0.2623, over 5813742.10 frames. ], batch size: 78, lr: 9.37e-03, grad_scale: 4.0 +2024-09-17 11:04:29,051 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.36 vs. limit=12.0 +2024-09-17 11:04:37,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=207740.0, ans=0.0 +2024-09-17 11:04:39,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=207740.0, ans=0.1 +2024-09-17 11:04:54,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=207780.0, ans=0.125 +2024-09-17 11:04:57,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=207820.0, ans=0.125 +2024-09-17 11:05:00,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=207820.0, ans=0.04949747468305833 +2024-09-17 11:05:08,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=207820.0, ans=0.2 +2024-09-17 11:05:22,257 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=12.0 +2024-09-17 11:05:28,908 INFO [train.py:1198] (1/2) Epoch 12, batch 2200, loss[loss=0.2733, ctc_loss=0.1721, cr_loss=0.3965, attn_decoder_loss=0.2757, over 29617.00 frames. ], tot_loss[loss=0.2603, ctc_loss=0.1618, cr_loss=0.4006, attn_decoder_loss=0.2623, over 5809589.40 frames. ], batch size: 86, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:05:31,915 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 9.159e+01 9.816e+01 1.050e+02 6.382e+02, threshold=1.963e+02, percent-clipped=1.0 +2024-09-17 11:05:45,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=207940.0, ans=0.025 +2024-09-17 11:05:49,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.72 vs. limit=15.0 +2024-09-17 11:05:53,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=207940.0, ans=0.0 +2024-09-17 11:06:20,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=208020.0, ans=10.0 +2024-09-17 11:06:31,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=208020.0, ans=0.0 +2024-09-17 11:06:33,313 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.18 vs. limit=22.5 +2024-09-17 11:06:51,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=208100.0, ans=0.07 +2024-09-17 11:06:52,580 INFO [train.py:1198] (1/2) Epoch 12, batch 2250, loss[loss=0.2645, ctc_loss=0.1614, cr_loss=0.4033, attn_decoder_loss=0.267, over 29716.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.162, cr_loss=0.4007, attn_decoder_loss=0.2626, over 5810044.85 frames. ], batch size: 82, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:07:05,617 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-17 11:07:13,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=208140.0, ans=0.0 +2024-09-17 11:07:19,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=208140.0, ans=0.2 +2024-09-17 11:07:34,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=208180.0, ans=0.1 +2024-09-17 11:07:37,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=208180.0, ans=0.125 +2024-09-17 11:08:00,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=208260.0, ans=0.0 +2024-09-17 11:08:10,391 INFO [train.py:1198] (1/2) Epoch 12, batch 2300, loss[loss=0.2293, ctc_loss=0.1278, cr_loss=0.344, attn_decoder_loss=0.2329, over 29311.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.161, cr_loss=0.3988, attn_decoder_loss=0.2614, over 5799139.00 frames. ], batch size: 71, lr: 9.36e-03, grad_scale: 8.0 +2024-09-17 11:08:13,461 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.929e+01 9.033e+01 9.553e+01 1.076e+02 7.023e+02, threshold=1.911e+02, percent-clipped=3.0 +2024-09-17 11:08:18,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=208300.0, ans=0.025 +2024-09-17 11:08:23,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.76 vs. limit=22.5 +2024-09-17 11:08:44,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=208380.0, ans=0.2 +2024-09-17 11:08:58,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=208420.0, ans=0.125 +2024-09-17 11:09:01,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=208420.0, ans=0.125 +2024-09-17 11:09:09,355 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.25 vs. limit=15.0 +2024-09-17 11:09:24,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.08 vs. limit=15.0 +2024-09-17 11:09:28,437 INFO [train.py:1198] (1/2) Epoch 12, batch 2350, loss[loss=0.2691, ctc_loss=0.1688, cr_loss=0.428, attn_decoder_loss=0.2707, over 29699.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1613, cr_loss=0.3996, attn_decoder_loss=0.2618, over 5804995.70 frames. ], batch size: 83, lr: 9.35e-03, grad_scale: 8.0 +2024-09-17 11:09:40,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=208500.0, ans=0.0 +2024-09-17 11:09:51,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=208540.0, ans=0.125 +2024-09-17 11:10:38,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.90 vs. limit=15.0 +2024-09-17 11:10:43,832 INFO [train.py:1198] (1/2) Epoch 12, batch 2400, loss[loss=0.2525, ctc_loss=0.1489, cr_loss=0.3794, attn_decoder_loss=0.2556, over 29537.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1617, cr_loss=0.4003, attn_decoder_loss=0.2622, over 5809148.52 frames. ], batch size: 76, lr: 9.35e-03, grad_scale: 16.0 +2024-09-17 11:10:49,769 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 9.246e+01 9.641e+01 1.033e+02 3.378e+02, threshold=1.928e+02, percent-clipped=1.0 +2024-09-17 11:11:11,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=15.0 +2024-09-17 11:11:21,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=208780.0, ans=0.1 +2024-09-17 11:11:28,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.59 vs. limit=10.0 +2024-09-17 11:11:41,981 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.84 vs. limit=6.0 +2024-09-17 11:11:56,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=208860.0, ans=0.125 +2024-09-17 11:12:02,489 INFO [train.py:1198] (1/2) Epoch 12, batch 2450, loss[loss=0.2594, ctc_loss=0.1653, cr_loss=0.4077, attn_decoder_loss=0.2608, over 29699.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1629, cr_loss=0.4018, attn_decoder_loss=0.2631, over 5786143.13 frames. ], batch size: 82, lr: 9.34e-03, grad_scale: 4.0 +2024-09-17 11:12:11,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=208900.0, ans=0.2 +2024-09-17 11:12:23,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=208940.0, ans=0.125 +2024-09-17 11:12:28,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=208940.0, ans=0.125 +2024-09-17 11:12:39,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=208980.0, ans=0.125 +2024-09-17 11:12:40,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=208980.0, ans=0.1 +2024-09-17 11:13:14,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=209060.0, ans=0.125 +2024-09-17 11:13:19,886 INFO [train.py:1198] (1/2) Epoch 12, batch 2500, loss[loss=0.271, ctc_loss=0.1696, cr_loss=0.4023, attn_decoder_loss=0.2734, over 29627.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1624, cr_loss=0.4015, attn_decoder_loss=0.2629, over 5795654.03 frames. ], batch size: 86, lr: 9.34e-03, grad_scale: 8.0 +2024-09-17 11:13:25,834 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.198e+01 9.207e+01 9.738e+01 1.065e+02 1.820e+02, threshold=1.948e+02, percent-clipped=0.0 +2024-09-17 11:13:29,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=209100.0, ans=0.05 +2024-09-17 11:13:37,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.01 vs. limit=15.0 +2024-09-17 11:13:38,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=209140.0, ans=0.125 +2024-09-17 11:13:42,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1.whitening_limit, batch_count=209140.0, ans=10.0 +2024-09-17 11:14:13,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=209220.0, ans=0.2 +2024-09-17 11:14:24,674 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.18 vs. limit=15.0 +2024-09-17 11:14:36,042 INFO [train.py:1198] (1/2) Epoch 12, batch 2550, loss[loss=0.2285, ctc_loss=0.1363, cr_loss=0.3618, attn_decoder_loss=0.2307, over 29345.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1621, cr_loss=0.4012, attn_decoder_loss=0.2628, over 5798836.80 frames. ], batch size: 67, lr: 9.33e-03, grad_scale: 8.0 +2024-09-17 11:14:39,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=209300.0, ans=0.125 +2024-09-17 11:15:04,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.47 vs. limit=15.0 +2024-09-17 11:15:26,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=209420.0, ans=0.1 +2024-09-17 11:15:40,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=209460.0, ans=0.125 +2024-09-17 11:15:53,681 INFO [train.py:1198] (1/2) Epoch 12, batch 2600, loss[loss=0.2562, ctc_loss=0.1565, cr_loss=0.4053, attn_decoder_loss=0.2583, over 29457.00 frames. ], tot_loss[loss=0.2612, ctc_loss=0.1624, cr_loss=0.402, attn_decoder_loss=0.2633, over 5795261.60 frames. ], batch size: 78, lr: 9.33e-03, grad_scale: 8.0 +2024-09-17 11:16:00,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.42 vs. limit=6.0 +2024-09-17 11:16:01,132 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.249e+01 9.008e+01 9.501e+01 1.038e+02 1.745e+02, threshold=1.900e+02, percent-clipped=0.0 +2024-09-17 11:16:01,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=209500.0, ans=0.025 +2024-09-17 11:16:02,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=209500.0, ans=0.0 +2024-09-17 11:16:04,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=209500.0, ans=0.0 +2024-09-17 11:16:07,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=209540.0, ans=0.125 +2024-09-17 11:16:26,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=209580.0, ans=0.1 +2024-09-17 11:16:56,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=209660.0, ans=0.125 +2024-09-17 11:17:01,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=209660.0, ans=0.125 +2024-09-17 11:17:04,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=209660.0, ans=0.0 +2024-09-17 11:17:08,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=209660.0, ans=0.125 +2024-09-17 11:17:11,201 INFO [train.py:1198] (1/2) Epoch 12, batch 2650, loss[loss=0.2726, ctc_loss=0.171, cr_loss=0.4089, attn_decoder_loss=0.2748, over 29166.00 frames. ], tot_loss[loss=0.2615, ctc_loss=0.1623, cr_loss=0.4024, attn_decoder_loss=0.2636, over 5802144.00 frames. ], batch size: 100, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:17:32,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=209740.0, ans=0.0 +2024-09-17 11:17:34,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=209740.0, ans=0.125 +2024-09-17 11:17:48,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=209780.0, ans=0.1 +2024-09-17 11:18:19,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=209860.0, ans=0.09899494936611666 +2024-09-17 11:18:26,278 INFO [train.py:1198] (1/2) Epoch 12, batch 2700, loss[loss=0.2753, ctc_loss=0.1754, cr_loss=0.4322, attn_decoder_loss=0.2768, over 29522.00 frames. ], tot_loss[loss=0.2617, ctc_loss=0.1624, cr_loss=0.402, attn_decoder_loss=0.2638, over 5797399.10 frames. ], batch size: 87, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:18:35,137 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.890e+01 9.487e+01 1.014e+02 1.859e+02, threshold=1.897e+02, percent-clipped=0.0 +2024-09-17 11:19:18,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.41 vs. limit=15.0 +2024-09-17 11:19:18,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=210020.0, ans=0.125 +2024-09-17 11:19:18,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=210020.0, ans=0.0 +2024-09-17 11:19:41,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=210060.0, ans=0.04949747468305833 +2024-09-17 11:19:44,379 INFO [train.py:1198] (1/2) Epoch 12, batch 2750, loss[loss=0.2427, ctc_loss=0.1403, cr_loss=0.363, attn_decoder_loss=0.246, over 29521.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1614, cr_loss=0.3996, attn_decoder_loss=0.2627, over 5796666.11 frames. ], batch size: 75, lr: 9.32e-03, grad_scale: 8.0 +2024-09-17 11:19:59,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=210140.0, ans=0.125 +2024-09-17 11:19:59,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=210140.0, ans=0.2 +2024-09-17 11:20:12,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.92 vs. limit=15.0 +2024-09-17 11:20:16,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=210180.0, ans=0.0 +2024-09-17 11:20:33,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=210220.0, ans=0.125 +2024-09-17 11:20:42,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=210220.0, ans=0.1 +2024-09-17 11:20:47,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=210260.0, ans=0.0 +2024-09-17 11:20:47,595 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.04 vs. limit=15.0 +2024-09-17 11:21:02,209 INFO [train.py:1198] (1/2) Epoch 12, batch 2800, loss[loss=0.2884, ctc_loss=0.21, cr_loss=0.4086, attn_decoder_loss=0.288, over 20433.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1615, cr_loss=0.3997, attn_decoder_loss=0.2627, over 5777806.46 frames. ], batch size: 209, lr: 9.31e-03, grad_scale: 16.0 +2024-09-17 11:21:12,652 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.172e+01 9.480e+01 1.026e+02 1.256e+02 4.560e+02, threshold=2.052e+02, percent-clipped=3.0 +2024-09-17 11:21:48,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=210420.0, ans=0.2 +2024-09-17 11:22:06,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=210460.0, ans=0.125 +2024-09-17 11:22:09,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=210460.0, ans=0.125 +2024-09-17 11:22:12,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=210460.0, ans=0.2 +2024-09-17 11:22:18,254 INFO [train.py:1198] (1/2) Epoch 12, batch 2850, loss[loss=0.2612, ctc_loss=0.1648, cr_loss=0.3998, attn_decoder_loss=0.263, over 29503.00 frames. ], tot_loss[loss=0.2611, ctc_loss=0.1621, cr_loss=0.3997, attn_decoder_loss=0.2633, over 5763635.36 frames. ], batch size: 77, lr: 9.31e-03, grad_scale: 4.0 +2024-09-17 11:22:30,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=210500.0, ans=0.0 +2024-09-17 11:22:48,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.77 vs. limit=15.0 +2024-09-17 11:23:01,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=210580.0, ans=0.025 +2024-09-17 11:23:27,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=210660.0, ans=0.125 +2024-09-17 11:23:27,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-17 11:23:36,113 INFO [train.py:1198] (1/2) Epoch 12, batch 2900, loss[loss=0.2595, ctc_loss=0.1615, cr_loss=0.4059, attn_decoder_loss=0.2613, over 29428.00 frames. ], tot_loss[loss=0.2622, ctc_loss=0.1625, cr_loss=0.4019, attn_decoder_loss=0.2644, over 5788588.29 frames. ], batch size: 79, lr: 9.30e-03, grad_scale: 8.0 +2024-09-17 11:23:48,032 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.952e+01 8.957e+01 9.627e+01 1.010e+02 3.114e+02, threshold=1.925e+02, percent-clipped=2.0 +2024-09-17 11:23:55,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=210740.0, ans=0.125 +2024-09-17 11:23:58,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=210740.0, ans=0.0 +2024-09-17 11:24:02,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=210740.0, ans=0.0 +2024-09-17 11:24:21,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.01 vs. limit=15.0 +2024-09-17 11:24:36,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.15 vs. limit=22.5 +2024-09-17 11:24:52,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=210900.0, ans=0.0 +2024-09-17 11:24:53,861 INFO [train.py:1198] (1/2) Epoch 12, batch 2950, loss[loss=0.2489, ctc_loss=0.1566, cr_loss=0.3621, attn_decoder_loss=0.2511, over 29522.00 frames. ], tot_loss[loss=0.2609, ctc_loss=0.1613, cr_loss=0.3996, attn_decoder_loss=0.2631, over 5781644.93 frames. ], batch size: 75, lr: 9.30e-03, grad_scale: 8.0 +2024-09-17 11:25:13,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=210940.0, ans=0.0 +2024-09-17 11:25:19,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=210940.0, ans=0.125 +2024-09-17 11:25:53,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.25 vs. limit=22.5 +2024-09-17 11:26:09,715 INFO [train.py:1198] (1/2) Epoch 12, batch 3000, loss[loss=0.2572, ctc_loss=0.1569, cr_loss=0.3814, attn_decoder_loss=0.2598, over 29742.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1613, cr_loss=0.3997, attn_decoder_loss=0.2629, over 5782120.12 frames. ], batch size: 81, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:26:09,715 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 11:26:17,178 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.5995, 4.2348, 4.0770, 3.9412], device='cuda:1') +2024-09-17 11:26:28,163 INFO [train.py:1230] (1/2) Epoch 12, validation: loss=0.2128, ctc_loss=0.04571, cr_loss=4.818e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-17 11:26:28,163 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 11:26:31,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=211100.0, ans=0.0 +2024-09-17 11:26:42,643 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 9.212e+01 9.963e+01 1.087e+02 2.371e+02, threshold=1.993e+02, percent-clipped=1.0 +2024-09-17 11:27:48,913 INFO [train.py:1198] (1/2) Epoch 12, batch 3050, loss[loss=0.2482, ctc_loss=0.1569, cr_loss=0.3884, attn_decoder_loss=0.2497, over 29541.00 frames. ], tot_loss[loss=0.2613, ctc_loss=0.1618, cr_loss=0.4005, attn_decoder_loss=0.2634, over 5776117.06 frames. ], batch size: 76, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:27:51,675 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.58 vs. limit=15.0 +2024-09-17 11:28:01,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=211300.0, ans=0.125 +2024-09-17 11:28:20,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.11 vs. limit=12.0 +2024-09-17 11:29:00,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=211460.0, ans=0.0 +2024-09-17 11:29:02,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.54 vs. limit=10.0 +2024-09-17 11:29:04,431 INFO [train.py:1198] (1/2) Epoch 12, batch 3100, loss[loss=0.2731, ctc_loss=0.17, cr_loss=0.3942, attn_decoder_loss=0.2758, over 29243.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1614, cr_loss=0.3997, attn_decoder_loss=0.2629, over 5776144.76 frames. ], batch size: 100, lr: 9.29e-03, grad_scale: 8.0 +2024-09-17 11:29:12,877 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.59 vs. limit=10.0 +2024-09-17 11:29:16,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.116e+01 9.262e+01 9.866e+01 1.070e+02 1.746e+02, threshold=1.973e+02, percent-clipped=0.0 +2024-09-17 11:29:19,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=211540.0, ans=0.0 +2024-09-17 11:29:21,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=211540.0, ans=10.0 +2024-09-17 11:29:44,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=12.0 +2024-09-17 11:30:02,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=211620.0, ans=0.125 +2024-09-17 11:30:11,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=211660.0, ans=0.2 +2024-09-17 11:30:19,854 INFO [train.py:1198] (1/2) Epoch 12, batch 3150, loss[loss=0.2863, ctc_loss=0.1785, cr_loss=0.4389, attn_decoder_loss=0.2885, over 28856.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1611, cr_loss=0.3996, attn_decoder_loss=0.2627, over 5782612.67 frames. ], batch size: 104, lr: 9.28e-03, grad_scale: 8.0 +2024-09-17 11:30:54,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=211780.0, ans=0.0 +2024-09-17 11:31:39,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=211900.0, ans=0.1 +2024-09-17 11:31:40,397 INFO [train.py:1198] (1/2) Epoch 12, batch 3200, loss[loss=0.2481, ctc_loss=0.1451, cr_loss=0.3674, attn_decoder_loss=0.2514, over 29763.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1608, cr_loss=0.3992, attn_decoder_loss=0.2623, over 5792330.03 frames. ], batch size: 80, lr: 9.28e-03, grad_scale: 16.0 +2024-09-17 11:31:46,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=211900.0, ans=0.1 +2024-09-17 11:31:53,886 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 9.023e+01 9.600e+01 1.061e+02 2.809e+02, threshold=1.920e+02, percent-clipped=1.0 +2024-09-17 11:31:57,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=211940.0, ans=0.1 +2024-09-17 11:32:03,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=211940.0, ans=0.125 +2024-09-17 11:32:14,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=211980.0, ans=0.2 +2024-09-17 11:32:35,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=212020.0, ans=0.0 +2024-09-17 11:32:37,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=212020.0, ans=0.2 +2024-09-17 11:32:41,794 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:32:44,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=212060.0, ans=0.05 +2024-09-17 11:32:56,862 INFO [train.py:1198] (1/2) Epoch 12, batch 3250, loss[loss=0.2758, ctc_loss=0.1716, cr_loss=0.4231, attn_decoder_loss=0.278, over 29713.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1614, cr_loss=0.4004, attn_decoder_loss=0.263, over 5799306.85 frames. ], batch size: 84, lr: 9.27e-03, grad_scale: 8.0 +2024-09-17 11:33:37,922 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:33:40,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=212220.0, ans=0.07 +2024-09-17 11:33:46,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=212220.0, ans=0.025 +2024-09-17 11:33:51,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=212220.0, ans=0.125 +2024-09-17 11:34:11,996 INFO [train.py:1198] (1/2) Epoch 12, batch 3300, loss[loss=0.2714, ctc_loss=0.1656, cr_loss=0.3911, attn_decoder_loss=0.2745, over 28288.00 frames. ], tot_loss[loss=0.2595, ctc_loss=0.16, cr_loss=0.3985, attn_decoder_loss=0.2617, over 5797049.23 frames. ], batch size: 111, lr: 9.27e-03, grad_scale: 8.0 +2024-09-17 11:34:20,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=212300.0, ans=0.0 +2024-09-17 11:34:27,365 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.743e+01 9.323e+01 1.013e+02 1.133e+02 3.364e+02, threshold=2.026e+02, percent-clipped=1.0 +2024-09-17 11:35:04,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=212420.0, ans=0.125 +2024-09-17 11:35:17,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=212460.0, ans=0.125 +2024-09-17 11:35:32,394 INFO [train.py:1198] (1/2) Epoch 12, batch 3350, loss[loss=0.2691, ctc_loss=0.1703, cr_loss=0.4182, attn_decoder_loss=0.2708, over 28795.00 frames. ], tot_loss[loss=0.2606, ctc_loss=0.1613, cr_loss=0.4004, attn_decoder_loss=0.2627, over 5772453.92 frames. ], batch size: 104, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:35:44,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=212500.0, ans=0.09899494936611666 +2024-09-17 11:36:13,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=212580.0, ans=0.0 +2024-09-17 11:36:16,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=212620.0, ans=0.125 +2024-09-17 11:36:18,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:36:28,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=212620.0, ans=0.125 +2024-09-17 11:36:36,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=212660.0, ans=0.125 +2024-09-17 11:36:40,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=212660.0, ans=0.125 +2024-09-17 11:36:42,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=212660.0, ans=0.125 +2024-09-17 11:36:48,155 INFO [train.py:1198] (1/2) Epoch 12, batch 3400, loss[loss=0.2337, ctc_loss=0.1425, cr_loss=0.3967, attn_decoder_loss=0.235, over 29365.00 frames. ], tot_loss[loss=0.2604, ctc_loss=0.1614, cr_loss=0.4002, attn_decoder_loss=0.2626, over 5765405.97 frames. ], batch size: 67, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:37:03,324 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 9.423e+01 1.002e+02 1.091e+02 2.670e+02, threshold=2.004e+02, percent-clipped=1.0 +2024-09-17 11:37:34,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=212820.0, ans=0.1 +2024-09-17 11:37:45,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=212820.0, ans=0.2 +2024-09-17 11:38:04,056 INFO [train.py:1198] (1/2) Epoch 12, batch 3450, loss[loss=0.2628, ctc_loss=0.162, cr_loss=0.3858, attn_decoder_loss=0.2654, over 28190.00 frames. ], tot_loss[loss=0.2607, ctc_loss=0.1616, cr_loss=0.4005, attn_decoder_loss=0.2628, over 5773187.69 frames. ], batch size: 111, lr: 9.26e-03, grad_scale: 8.0 +2024-09-17 11:38:14,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=212900.0, ans=0.125 +2024-09-17 11:38:38,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=212980.0, ans=0.125 +2024-09-17 11:39:23,610 INFO [train.py:1198] (1/2) Epoch 12, batch 3500, loss[loss=0.2333, ctc_loss=0.1435, cr_loss=0.3823, attn_decoder_loss=0.2348, over 29326.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1607, cr_loss=0.3998, attn_decoder_loss=0.2618, over 5775698.62 frames. ], batch size: 71, lr: 9.25e-03, grad_scale: 8.0 +2024-09-17 11:39:25,391 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:39:28,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=213100.0, ans=0.125 +2024-09-17 11:39:40,416 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.771e+01 8.906e+01 9.633e+01 1.043e+02 3.728e+02, threshold=1.927e+02, percent-clipped=3.0 +2024-09-17 11:39:44,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.81 vs. limit=15.0 +2024-09-17 11:39:45,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.75 vs. limit=22.5 +2024-09-17 11:40:10,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=213220.0, ans=0.025 +2024-09-17 11:40:10,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=213220.0, ans=0.125 +2024-09-17 11:40:22,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.44 vs. limit=15.0 +2024-09-17 11:40:38,548 INFO [train.py:1198] (1/2) Epoch 12, batch 3550, loss[loss=0.2733, ctc_loss=0.1637, cr_loss=0.4232, attn_decoder_loss=0.2761, over 29707.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1604, cr_loss=0.3991, attn_decoder_loss=0.2619, over 5781422.21 frames. ], batch size: 89, lr: 9.25e-03, grad_scale: 8.0 +2024-09-17 11:40:54,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=213340.0, ans=0.0 +2024-09-17 11:40:56,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-17 11:41:01,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.39 vs. limit=15.0 +2024-09-17 11:41:03,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=213340.0, ans=0.0 +2024-09-17 11:41:14,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=213380.0, ans=0.1 +2024-09-17 11:41:17,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=213380.0, ans=0.125 +2024-09-17 11:41:26,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=213420.0, ans=0.1 +2024-09-17 11:41:37,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.00 vs. limit=15.0 +2024-09-17 11:41:52,788 INFO [train.py:1198] (1/2) Epoch 12, batch 3600, loss[loss=0.254, ctc_loss=0.1629, cr_loss=0.4137, attn_decoder_loss=0.255, over 29520.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1607, cr_loss=0.4003, attn_decoder_loss=0.2622, over 5790662.93 frames. ], batch size: 77, lr: 9.24e-03, grad_scale: 16.0 +2024-09-17 11:42:03,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=213500.0, ans=0.2 +2024-09-17 11:42:05,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=213500.0, ans=0.125 +2024-09-17 11:42:10,827 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.945e+01 9.059e+01 9.779e+01 1.035e+02 3.079e+02, threshold=1.956e+02, percent-clipped=1.0 +2024-09-17 11:42:15,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=213540.0, ans=0.1 +2024-09-17 11:42:27,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=213580.0, ans=0.125 +2024-09-17 11:42:47,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=213620.0, ans=0.0 +2024-09-17 11:43:02,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.06 vs. limit=10.0 +2024-09-17 11:43:08,008 INFO [train.py:1198] (1/2) Epoch 12, batch 3650, loss[loss=0.2626, ctc_loss=0.159, cr_loss=0.4118, attn_decoder_loss=0.2649, over 29486.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1601, cr_loss=0.3991, attn_decoder_loss=0.2616, over 5793843.80 frames. ], batch size: 90, lr: 9.24e-03, grad_scale: 8.0 +2024-09-17 11:43:30,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=213740.0, ans=0.0 +2024-09-17 11:43:50,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=213780.0, ans=0.0 +2024-09-17 11:43:54,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=213820.0, ans=0.1 +2024-09-17 11:44:23,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=213900.0, ans=0.035 +2024-09-17 11:44:24,706 INFO [train.py:1198] (1/2) Epoch 12, batch 3700, loss[loss=0.2672, ctc_loss=0.1574, cr_loss=0.3951, attn_decoder_loss=0.2706, over 29708.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1598, cr_loss=0.3993, attn_decoder_loss=0.2615, over 5803692.55 frames. ], batch size: 84, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:44:33,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=213900.0, ans=0.125 +2024-09-17 11:44:42,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 9.238e+01 9.737e+01 1.052e+02 3.934e+02, threshold=1.947e+02, percent-clipped=3.0 +2024-09-17 11:44:50,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.51 vs. limit=22.5 +2024-09-17 11:44:55,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.09 vs. limit=15.0 +2024-09-17 11:45:05,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-17 11:45:09,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=214020.0, ans=0.125 +2024-09-17 11:45:12,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=214020.0, ans=0.125 +2024-09-17 11:45:40,789 INFO [train.py:1198] (1/2) Epoch 12, batch 3750, loss[loss=0.2332, ctc_loss=0.1449, cr_loss=0.3689, attn_decoder_loss=0.2348, over 29340.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1598, cr_loss=0.3987, attn_decoder_loss=0.2612, over 5807747.07 frames. ], batch size: 67, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:45:44,129 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:45:57,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=214140.0, ans=0.025 +2024-09-17 11:45:59,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=214140.0, ans=0.125 +2024-09-17 11:46:04,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=214140.0, ans=0.125 +2024-09-17 11:46:09,562 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:46:55,356 INFO [train.py:1198] (1/2) Epoch 12, batch 3800, loss[loss=0.2722, ctc_loss=0.1624, cr_loss=0.4171, attn_decoder_loss=0.2751, over 29611.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1602, cr_loss=0.399, attn_decoder_loss=0.2612, over 5798421.24 frames. ], batch size: 86, lr: 9.23e-03, grad_scale: 8.0 +2024-09-17 11:47:08,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=214340.0, ans=0.125 +2024-09-17 11:47:11,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=214340.0, ans=0.125 +2024-09-17 11:47:13,097 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.095e+01 9.449e+01 1.046e+02 1.140e+02 2.045e+02, threshold=2.093e+02, percent-clipped=1.0 +2024-09-17 11:47:37,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=214380.0, ans=0.125 +2024-09-17 11:47:38,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=214420.0, ans=0.0 +2024-09-17 11:47:39,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.33 vs. limit=22.5 +2024-09-17 11:47:46,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=214420.0, ans=0.125 +2024-09-17 11:47:57,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214460.0, ans=0.1 +2024-09-17 11:48:06,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.57 vs. limit=15.0 +2024-09-17 11:48:09,978 INFO [train.py:1198] (1/2) Epoch 12, batch 3850, loss[loss=0.2794, ctc_loss=0.1724, cr_loss=0.4175, attn_decoder_loss=0.282, over 29256.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1597, cr_loss=0.3988, attn_decoder_loss=0.261, over 5812622.27 frames. ], batch size: 100, lr: 9.22e-03, grad_scale: 8.0 +2024-09-17 11:48:14,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=214500.0, ans=0.07 +2024-09-17 11:48:19,130 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:48:20,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=214500.0, ans=0.025 +2024-09-17 11:48:21,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=214500.0, ans=0.0 +2024-09-17 11:48:27,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214540.0, ans=0.1 +2024-09-17 11:48:36,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=214540.0, ans=0.1 +2024-09-17 11:48:40,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=214580.0, ans=0.2 +2024-09-17 11:48:49,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.94 vs. limit=15.0 +2024-09-17 11:49:06,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=214620.0, ans=0.125 +2024-09-17 11:49:09,052 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.19 vs. limit=22.5 +2024-09-17 11:49:09,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=214660.0, ans=0.125 +2024-09-17 11:49:20,885 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.96 vs. limit=12.0 +2024-09-17 11:49:24,513 INFO [train.py:1198] (1/2) Epoch 12, batch 3900, loss[loss=0.2616, ctc_loss=0.153, cr_loss=0.3862, attn_decoder_loss=0.2651, over 29613.00 frames. ], tot_loss[loss=0.2596, ctc_loss=0.1603, cr_loss=0.3998, attn_decoder_loss=0.2617, over 5816171.90 frames. ], batch size: 86, lr: 9.22e-03, grad_scale: 8.0 +2024-09-17 11:49:42,146 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 9.064e+01 9.520e+01 1.003e+02 3.590e+02, threshold=1.904e+02, percent-clipped=1.0 +2024-09-17 11:49:46,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=214740.0, ans=0.125 +2024-09-17 11:50:12,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.17 vs. limit=22.5 +2024-09-17 11:50:13,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=214820.0, ans=0.025 +2024-09-17 11:50:14,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=214820.0, ans=0.125 +2024-09-17 11:50:41,386 INFO [train.py:1198] (1/2) Epoch 12, batch 3950, loss[loss=0.2694, ctc_loss=0.166, cr_loss=0.4255, attn_decoder_loss=0.2715, over 29494.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1598, cr_loss=0.3991, attn_decoder_loss=0.2617, over 5835654.27 frames. ], batch size: 97, lr: 9.21e-03, grad_scale: 8.0 +2024-09-17 11:51:20,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=214980.0, ans=0.0 +2024-09-17 11:51:33,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=215020.0, ans=0.05 +2024-09-17 11:51:36,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=215020.0, ans=0.2 +2024-09-17 11:51:46,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=215060.0, ans=0.2 +2024-09-17 11:51:54,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=215100.0, ans=0.2 +2024-09-17 11:51:55,372 INFO [train.py:1198] (1/2) Epoch 12, batch 4000, loss[loss=0.2355, ctc_loss=0.1398, cr_loss=0.3703, attn_decoder_loss=0.2379, over 29524.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1602, cr_loss=0.3992, attn_decoder_loss=0.2618, over 5812530.41 frames. ], batch size: 74, lr: 9.21e-03, grad_scale: 16.0 +2024-09-17 11:52:14,272 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.967e+01 9.592e+01 1.062e+02 2.028e+02, threshold=1.918e+02, percent-clipped=1.0 +2024-09-17 11:52:18,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.91 vs. limit=15.0 +2024-09-17 11:52:36,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.82 vs. limit=15.0 +2024-09-17 11:52:59,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=215260.0, ans=0.125 +2024-09-17 11:53:10,007 INFO [train.py:1198] (1/2) Epoch 12, batch 4050, loss[loss=0.2956, ctc_loss=0.2162, cr_loss=0.4173, attn_decoder_loss=0.2951, over 19480.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1609, cr_loss=0.3998, attn_decoder_loss=0.2621, over 5796020.99 frames. ], batch size: 209, lr: 9.21e-03, grad_scale: 8.0 +2024-09-17 11:53:54,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=215420.0, ans=0.025 +2024-09-17 11:54:06,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=215420.0, ans=0.125 +2024-09-17 11:54:12,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=215460.0, ans=0.125 +2024-09-17 11:54:19,887 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.93 vs. limit=15.0 +2024-09-17 11:54:22,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=215500.0, ans=0.125 +2024-09-17 11:54:23,756 INFO [train.py:1198] (1/2) Epoch 12, batch 4100, loss[loss=0.2677, ctc_loss=0.1634, cr_loss=0.4113, attn_decoder_loss=0.2702, over 29507.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1607, cr_loss=0.3995, attn_decoder_loss=0.2618, over 5791253.91 frames. ], batch size: 90, lr: 9.20e-03, grad_scale: 8.0 +2024-09-17 11:54:27,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.32 vs. limit=12.0 +2024-09-17 11:54:39,998 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:54:43,943 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.986e+01 9.324e+01 9.990e+01 1.134e+02 3.141e+02, threshold=1.998e+02, percent-clipped=1.0 +2024-09-17 11:54:49,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.50 vs. limit=22.5 +2024-09-17 11:55:02,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.87 vs. limit=22.5 +2024-09-17 11:55:05,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=215580.0, ans=0.0 +2024-09-17 11:55:08,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=215620.0, ans=0.035 +2024-09-17 11:55:24,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=215660.0, ans=0.125 +2024-09-17 11:55:29,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=215660.0, ans=0.025 +2024-09-17 11:55:39,595 INFO [train.py:1198] (1/2) Epoch 12, batch 4150, loss[loss=0.2541, ctc_loss=0.1503, cr_loss=0.3845, attn_decoder_loss=0.2571, over 29495.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1603, cr_loss=0.3993, attn_decoder_loss=0.2615, over 5796936.90 frames. ], batch size: 77, lr: 9.20e-03, grad_scale: 8.0 +2024-09-17 11:55:42,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=215700.0, ans=0.05 +2024-09-17 11:55:48,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=215700.0, ans=0.125 +2024-09-17 11:56:00,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=215740.0, ans=0.0 +2024-09-17 11:56:00,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=215740.0, ans=0.0 +2024-09-17 11:56:09,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=215780.0, ans=0.125 +2024-09-17 11:56:10,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=215780.0, ans=0.125 +2024-09-17 11:56:11,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=215780.0, ans=6.0 +2024-09-17 11:56:53,538 INFO [train.py:1198] (1/2) Epoch 12, batch 4200, loss[loss=0.289, ctc_loss=0.1906, cr_loss=0.4692, attn_decoder_loss=0.2895, over 29495.00 frames. ], tot_loss[loss=0.2598, ctc_loss=0.1605, cr_loss=0.4, attn_decoder_loss=0.2619, over 5799446.96 frames. ], batch size: 90, lr: 9.19e-03, grad_scale: 8.0 +2024-09-17 11:56:55,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=215900.0, ans=0.0 +2024-09-17 11:57:12,887 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.094e+01 9.529e+01 1.014e+02 1.072e+02 1.789e+02, threshold=2.028e+02, percent-clipped=0.0 +2024-09-17 11:57:31,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.30 vs. limit=15.0 +2024-09-17 11:57:31,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.71 vs. limit=15.0 +2024-09-17 11:58:07,691 INFO [train.py:1198] (1/2) Epoch 12, batch 4250, loss[loss=0.2407, ctc_loss=0.1335, cr_loss=0.3538, attn_decoder_loss=0.2448, over 29507.00 frames. ], tot_loss[loss=0.2598, ctc_loss=0.1604, cr_loss=0.3998, attn_decoder_loss=0.262, over 5805042.59 frames. ], batch size: 74, lr: 9.19e-03, grad_scale: 8.0 +2024-09-17 11:58:09,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=216100.0, ans=0.0 +2024-09-17 11:58:12,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.99 vs. limit=10.0 +2024-09-17 11:58:16,575 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 11:58:20,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.65 vs. limit=22.5 +2024-09-17 11:58:25,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=216140.0, ans=0.125 +2024-09-17 11:58:32,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=216140.0, ans=0.1 +2024-09-17 11:58:36,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=216140.0, ans=0.125 +2024-09-17 11:58:37,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=216180.0, ans=0.125 +2024-09-17 11:58:43,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=216180.0, ans=0.1 +2024-09-17 11:58:56,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=216220.0, ans=0.07 +2024-09-17 11:59:17,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=216260.0, ans=0.125 +2024-09-17 11:59:23,261 INFO [train.py:1198] (1/2) Epoch 12, batch 4300, loss[loss=0.2718, ctc_loss=0.1624, cr_loss=0.401, attn_decoder_loss=0.275, over 29509.00 frames. ], tot_loss[loss=0.2601, ctc_loss=0.1604, cr_loss=0.4002, attn_decoder_loss=0.2623, over 5794235.36 frames. ], batch size: 87, lr: 9.18e-03, grad_scale: 8.0 +2024-09-17 11:59:35,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=216300.0, ans=0.125 +2024-09-17 11:59:41,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=216340.0, ans=0.025 +2024-09-17 11:59:44,352 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.076e+01 9.375e+01 1.010e+02 1.083e+02 2.799e+02, threshold=2.019e+02, percent-clipped=3.0 +2024-09-17 11:59:44,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=216340.0, ans=0.04949747468305833 +2024-09-17 12:00:03,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=216380.0, ans=0.0 +2024-09-17 12:00:10,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.79 vs. limit=10.0 +2024-09-17 12:00:30,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=216460.0, ans=0.2 +2024-09-17 12:00:33,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=216460.0, ans=0.2 +2024-09-17 12:00:36,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=216500.0, ans=0.125 +2024-09-17 12:00:37,270 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.06 vs. limit=10.0 +2024-09-17 12:00:37,750 INFO [train.py:1198] (1/2) Epoch 12, batch 4350, loss[loss=0.2771, ctc_loss=0.1867, cr_loss=0.4129, attn_decoder_loss=0.2779, over 29493.00 frames. ], tot_loss[loss=0.2638, ctc_loss=0.1636, cr_loss=0.4051, attn_decoder_loss=0.2659, over 5797085.60 frames. ], batch size: 97, lr: 9.18e-03, grad_scale: 4.0 +2024-09-17 12:00:54,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.55 vs. limit=12.0 +2024-09-17 12:00:59,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=216540.0, ans=0.0 +2024-09-17 12:00:59,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=216540.0, ans=0.07 +2024-09-17 12:01:07,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=216580.0, ans=0.95 +2024-09-17 12:01:30,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=216620.0, ans=0.0 +2024-09-17 12:01:36,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=216660.0, ans=0.125 +2024-09-17 12:01:42,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=216660.0, ans=0.125 +2024-09-17 12:01:45,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.75 vs. limit=22.5 +2024-09-17 12:01:48,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=216660.0, ans=0.1 +2024-09-17 12:01:49,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=216660.0, ans=0.0 +2024-09-17 12:01:51,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=216700.0, ans=0.5 +2024-09-17 12:01:52,273 INFO [train.py:1198] (1/2) Epoch 12, batch 4400, loss[loss=0.2747, ctc_loss=0.1786, cr_loss=0.4298, attn_decoder_loss=0.2759, over 27373.00 frames. ], tot_loss[loss=0.2665, ctc_loss=0.1659, cr_loss=0.4079, attn_decoder_loss=0.2686, over 5766849.24 frames. ], batch size: 124, lr: 9.18e-03, grad_scale: 8.0 +2024-09-17 12:02:12,849 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.351e+01 9.429e+01 9.897e+01 1.056e+02 1.811e+02, threshold=1.979e+02, percent-clipped=0.0 +2024-09-17 12:02:20,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=216780.0, ans=0.0 +2024-09-17 12:02:53,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=216860.0, ans=0.125 +2024-09-17 12:03:06,659 INFO [train.py:1198] (1/2) Epoch 12, batch 4450, loss[loss=0.2897, ctc_loss=0.2179, cr_loss=0.4256, attn_decoder_loss=0.2882, over 19842.00 frames. ], tot_loss[loss=0.2695, ctc_loss=0.1711, cr_loss=0.4117, attn_decoder_loss=0.2712, over 5574045.14 frames. ], batch size: 209, lr: 9.17e-03, grad_scale: 8.0 +2024-09-17 12:03:36,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=216940.0, ans=10.0 +2024-09-17 12:03:47,556 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:03:54,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=217020.0, ans=0.1 +2024-09-17 12:04:05,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=217020.0, ans=0.125 +2024-09-17 12:04:14,446 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:04:20,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=217060.0, ans=0.125 +2024-09-17 12:04:23,109 INFO [train.py:1198] (1/2) Epoch 12, batch 4500, loss[loss=0.275, ctc_loss=0.1942, cr_loss=0.4173, attn_decoder_loss=0.2748, over 19774.00 frames. ], tot_loss[loss=0.2727, ctc_loss=0.1774, cr_loss=0.4146, attn_decoder_loss=0.2741, over 5232449.48 frames. ], batch size: 209, lr: 9.17e-03, grad_scale: 8.0 +2024-09-17 12:04:28,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=217100.0, ans=0.0 +2024-09-17 12:04:45,819 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.368e+01 1.035e+02 1.137e+02 1.264e+02 3.702e+02, threshold=2.273e+02, percent-clipped=1.0 +2024-09-17 12:04:52,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=217180.0, ans=0.2 +2024-09-17 12:05:52,067 INFO [train.py:1198] (1/2) Epoch 13, batch 0, loss[loss=0.2327, ctc_loss=0.1355, cr_loss=0.3654, attn_decoder_loss=0.2354, over 29606.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1355, cr_loss=0.3654, attn_decoder_loss=0.2354, over 29606.00 frames. ], batch size: 73, lr: 8.81e-03, grad_scale: 16.0 +2024-09-17 12:05:52,067 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 12:06:10,486 INFO [train.py:1230] (1/2) Epoch 13, validation: loss=0.214, ctc_loss=0.04435, cr_loss=4.652e-15, attn_decoder_loss=0.2329, over 944034.00 frames. +2024-09-17 12:06:10,487 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 12:06:10,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=217200.0, ans=0.1 +2024-09-17 12:06:10,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=217200.0, ans=0.05 +2024-09-17 12:06:13,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=217200.0, ans=0.0 +2024-09-17 12:06:33,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=217240.0, ans=0.0 +2024-09-17 12:06:40,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-17 12:06:40,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.69 vs. limit=15.0 +2024-09-17 12:06:42,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=217280.0, ans=0.07 +2024-09-17 12:06:50,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=217280.0, ans=0.0 +2024-09-17 12:07:18,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=217360.0, ans=0.125 +2024-09-17 12:07:28,725 INFO [train.py:1198] (1/2) Epoch 13, batch 50, loss[loss=0.2308, ctc_loss=0.1392, cr_loss=0.3509, attn_decoder_loss=0.2331, over 29430.00 frames. ], tot_loss[loss=0.2619, ctc_loss=0.1626, cr_loss=0.402, attn_decoder_loss=0.264, over 1269455.44 frames. ], batch size: 70, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:07:45,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=217440.0, ans=0.0 +2024-09-17 12:07:56,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=217440.0, ans=0.035 +2024-09-17 12:07:56,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=217440.0, ans=0.125 +2024-09-17 12:07:59,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=217480.0, ans=0.0 +2024-09-17 12:08:05,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=217480.0, ans=0.0 +2024-09-17 12:08:08,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=217480.0, ans=0.125 +2024-09-17 12:08:08,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=217480.0, ans=0.1 +2024-09-17 12:08:19,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=217520.0, ans=0.0 +2024-09-17 12:08:25,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=217520.0, ans=0.125 +2024-09-17 12:08:25,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=217520.0, ans=0.125 +2024-09-17 12:08:28,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=217560.0, ans=0.07 +2024-09-17 12:08:30,967 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.263e+01 9.622e+01 1.023e+02 1.146e+02 3.788e+02, threshold=2.046e+02, percent-clipped=2.0 +2024-09-17 12:08:31,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=217560.0, ans=0.125 +2024-09-17 12:08:32,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=217560.0, ans=0.125 +2024-09-17 12:08:36,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=217560.0, ans=0.125 +2024-09-17 12:08:40,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=217560.0, ans=0.125 +2024-09-17 12:08:45,176 INFO [train.py:1198] (1/2) Epoch 13, batch 100, loss[loss=0.2544, ctc_loss=0.1614, cr_loss=0.4145, attn_decoder_loss=0.2555, over 29537.00 frames. ], tot_loss[loss=0.2639, ctc_loss=0.1644, cr_loss=0.4056, attn_decoder_loss=0.2659, over 2253512.68 frames. ], batch size: 76, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:08:54,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.77 vs. limit=15.0 +2024-09-17 12:09:04,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=217640.0, ans=0.0 +2024-09-17 12:09:20,929 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.02 vs. limit=15.0 +2024-09-17 12:09:38,873 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.23 vs. limit=15.0 +2024-09-17 12:09:39,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=217720.0, ans=0.125 +2024-09-17 12:09:41,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=217720.0, ans=0.2 +2024-09-17 12:09:51,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=217760.0, ans=0.1 +2024-09-17 12:09:53,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=217760.0, ans=0.1 +2024-09-17 12:10:01,870 INFO [train.py:1198] (1/2) Epoch 13, batch 150, loss[loss=0.2296, ctc_loss=0.1376, cr_loss=0.3644, attn_decoder_loss=0.2317, over 29447.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1607, cr_loss=0.3998, attn_decoder_loss=0.2627, over 3048473.81 frames. ], batch size: 70, lr: 8.80e-03, grad_scale: 8.0 +2024-09-17 12:10:28,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=217840.0, ans=0.0 +2024-09-17 12:10:32,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=217880.0, ans=0.5 +2024-09-17 12:10:44,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=217880.0, ans=0.125 +2024-09-17 12:10:57,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=217920.0, ans=0.07 +2024-09-17 12:11:06,256 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.881e+01 9.835e+01 1.094e+02 1.657e+02, threshold=1.967e+02, percent-clipped=0.0 +2024-09-17 12:11:06,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=217960.0, ans=0.05 +2024-09-17 12:11:10,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.66 vs. limit=15.0 +2024-09-17 12:11:15,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=217960.0, ans=0.09899494936611666 +2024-09-17 12:11:20,034 INFO [train.py:1198] (1/2) Epoch 13, batch 200, loss[loss=0.2732, ctc_loss=0.1784, cr_loss=0.4234, attn_decoder_loss=0.2743, over 27241.00 frames. ], tot_loss[loss=0.2594, ctc_loss=0.1599, cr_loss=0.3986, attn_decoder_loss=0.2616, over 3660599.48 frames. ], batch size: 124, lr: 8.79e-03, grad_scale: 8.0 +2024-09-17 12:12:01,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.03 vs. limit=15.0 +2024-09-17 12:12:03,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_ff2.min_abs, batch_count=218120.0, ans=0.1 +2024-09-17 12:12:07,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=218120.0, ans=0.125 +2024-09-17 12:12:22,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=218160.0, ans=0.0 +2024-09-17 12:12:28,480 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:12:35,455 INFO [train.py:1198] (1/2) Epoch 13, batch 250, loss[loss=0.2713, ctc_loss=0.1723, cr_loss=0.4219, attn_decoder_loss=0.2729, over 29276.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1591, cr_loss=0.3976, attn_decoder_loss=0.2612, over 4142991.08 frames. ], batch size: 100, lr: 8.79e-03, grad_scale: 8.0 +2024-09-17 12:12:40,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=218200.0, ans=0.0 +2024-09-17 12:12:44,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=218200.0, ans=0.025 +2024-09-17 12:12:52,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=218240.0, ans=0.125 +2024-09-17 12:13:03,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.90 vs. limit=10.0 +2024-09-17 12:13:11,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=218280.0, ans=0.1 +2024-09-17 12:13:17,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=218280.0, ans=0.125 +2024-09-17 12:13:22,367 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:13:35,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=218320.0, ans=0.125 +2024-09-17 12:13:39,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.631e+01 9.019e+01 9.647e+01 1.091e+02 1.389e+02, threshold=1.929e+02, percent-clipped=0.0 +2024-09-17 12:13:50,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=218360.0, ans=0.025 +2024-09-17 12:13:53,952 INFO [train.py:1198] (1/2) Epoch 13, batch 300, loss[loss=0.272, ctc_loss=0.1708, cr_loss=0.4241, attn_decoder_loss=0.2739, over 29533.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1583, cr_loss=0.3965, attn_decoder_loss=0.2607, over 4511141.91 frames. ], batch size: 92, lr: 8.78e-03, grad_scale: 8.0 +2024-09-17 12:14:34,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=218480.0, ans=0.125 +2024-09-17 12:14:54,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=218520.0, ans=0.0 +2024-09-17 12:15:11,632 INFO [train.py:1198] (1/2) Epoch 13, batch 350, loss[loss=0.2352, ctc_loss=0.1468, cr_loss=0.355, attn_decoder_loss=0.2371, over 29727.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.158, cr_loss=0.3962, attn_decoder_loss=0.2609, over 4796830.83 frames. ], batch size: 72, lr: 8.78e-03, grad_scale: 8.0 +2024-09-17 12:15:25,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=218640.0, ans=0.0 +2024-09-17 12:15:34,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=218640.0, ans=0.0 +2024-09-17 12:15:43,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=218680.0, ans=0.125 +2024-09-17 12:16:13,452 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 9.251e+01 9.818e+01 1.107e+02 7.103e+02, threshold=1.964e+02, percent-clipped=3.0 +2024-09-17 12:16:27,028 INFO [train.py:1198] (1/2) Epoch 13, batch 400, loss[loss=0.2684, ctc_loss=0.1727, cr_loss=0.4217, attn_decoder_loss=0.2696, over 29705.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.158, cr_loss=0.3964, attn_decoder_loss=0.2608, over 5025993.88 frames. ], batch size: 82, lr: 8.78e-03, grad_scale: 16.0 +2024-09-17 12:16:42,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=218840.0, ans=0.025 +2024-09-17 12:16:50,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=218840.0, ans=0.1 +2024-09-17 12:17:00,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=218880.0, ans=0.0 +2024-09-17 12:17:05,017 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.40 vs. limit=12.0 +2024-09-17 12:17:07,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=218880.0, ans=0.125 +2024-09-17 12:17:12,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=218880.0, ans=0.2 +2024-09-17 12:17:22,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=218920.0, ans=0.025 +2024-09-17 12:17:35,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=218960.0, ans=0.125 +2024-09-17 12:17:45,472 INFO [train.py:1198] (1/2) Epoch 13, batch 450, loss[loss=0.2558, ctc_loss=0.1464, cr_loss=0.3739, attn_decoder_loss=0.2597, over 29694.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1581, cr_loss=0.3966, attn_decoder_loss=0.261, over 5188179.16 frames. ], batch size: 83, lr: 8.77e-03, grad_scale: 8.0 +2024-09-17 12:17:51,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=219000.0, ans=0.0 +2024-09-17 12:18:09,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.58 vs. limit=15.0 +2024-09-17 12:18:11,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=219040.0, ans=0.125 +2024-09-17 12:18:11,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=219040.0, ans=0.1 +2024-09-17 12:18:17,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=219080.0, ans=0.0 +2024-09-17 12:18:28,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.81 vs. limit=15.0 +2024-09-17 12:18:40,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=219120.0, ans=0.125 +2024-09-17 12:18:51,587 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.792e+01 8.793e+01 9.370e+01 9.843e+01 2.913e+02, threshold=1.874e+02, percent-clipped=1.0 +2024-09-17 12:18:55,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.88 vs. limit=12.0 +2024-09-17 12:19:00,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=219160.0, ans=0.2 +2024-09-17 12:19:04,118 INFO [train.py:1198] (1/2) Epoch 13, batch 500, loss[loss=0.2789, ctc_loss=0.1741, cr_loss=0.4294, attn_decoder_loss=0.281, over 29420.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1579, cr_loss=0.3968, attn_decoder_loss=0.2605, over 5330793.16 frames. ], batch size: 94, lr: 8.77e-03, grad_scale: 8.0 +2024-09-17 12:19:17,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.35 vs. limit=15.0 +2024-09-17 12:19:38,368 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.97 vs. limit=15.0 +2024-09-17 12:19:40,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=219280.0, ans=0.2 +2024-09-17 12:19:51,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=219320.0, ans=0.2 +2024-09-17 12:19:52,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=219320.0, ans=0.0 +2024-09-17 12:20:06,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=219360.0, ans=0.1 +2024-09-17 12:20:10,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=219360.0, ans=0.125 +2024-09-17 12:20:19,690 INFO [train.py:1198] (1/2) Epoch 13, batch 550, loss[loss=0.2763, ctc_loss=0.1742, cr_loss=0.4368, attn_decoder_loss=0.2779, over 28715.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1578, cr_loss=0.3961, attn_decoder_loss=0.2604, over 5421569.77 frames. ], batch size: 104, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:20:27,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=219400.0, ans=0.0 +2024-09-17 12:20:33,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=219440.0, ans=0.125 +2024-09-17 12:20:41,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=219440.0, ans=0.125 +2024-09-17 12:21:03,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=219480.0, ans=0.0 +2024-09-17 12:21:15,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.77 vs. limit=15.0 +2024-09-17 12:21:26,148 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.042e+01 9.312e+01 1.008e+02 1.110e+02 1.901e+02, threshold=2.017e+02, percent-clipped=1.0 +2024-09-17 12:21:29,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=219560.0, ans=0.125 +2024-09-17 12:21:38,302 INFO [train.py:1198] (1/2) Epoch 13, batch 600, loss[loss=0.2726, ctc_loss=0.1699, cr_loss=0.4163, attn_decoder_loss=0.2748, over 29270.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1582, cr_loss=0.3966, attn_decoder_loss=0.2609, over 5509688.10 frames. ], batch size: 100, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:21:40,538 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.34 vs. limit=22.5 +2024-09-17 12:21:41,696 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:21:56,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=219640.0, ans=0.0 +2024-09-17 12:22:14,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=219680.0, ans=0.125 +2024-09-17 12:22:27,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.72 vs. limit=22.5 +2024-09-17 12:22:50,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=219760.0, ans=0.125 +2024-09-17 12:22:52,055 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.58 vs. limit=15.0 +2024-09-17 12:22:55,880 INFO [train.py:1198] (1/2) Epoch 13, batch 650, loss[loss=0.2651, ctc_loss=0.1598, cr_loss=0.4057, attn_decoder_loss=0.2678, over 29756.00 frames. ], tot_loss[loss=0.2578, ctc_loss=0.1574, cr_loss=0.3958, attn_decoder_loss=0.2602, over 5586851.08 frames. ], batch size: 81, lr: 8.76e-03, grad_scale: 8.0 +2024-09-17 12:23:08,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.46 vs. limit=15.0 +2024-09-17 12:23:09,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=219840.0, ans=0.1 +2024-09-17 12:23:35,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=219880.0, ans=0.0 +2024-09-17 12:23:44,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=219920.0, ans=0.0 +2024-09-17 12:23:47,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=219920.0, ans=0.125 +2024-09-17 12:23:49,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=219920.0, ans=0.0 +2024-09-17 12:23:50,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=219920.0, ans=0.125 +2024-09-17 12:23:59,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 9.144e+01 9.957e+01 1.061e+02 1.597e+02, threshold=1.991e+02, percent-clipped=0.0 +2024-09-17 12:24:12,257 INFO [train.py:1198] (1/2) Epoch 13, batch 700, loss[loss=0.2372, ctc_loss=0.1323, cr_loss=0.3502, attn_decoder_loss=0.2411, over 29522.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1575, cr_loss=0.3962, attn_decoder_loss=0.2606, over 5636808.14 frames. ], batch size: 76, lr: 8.75e-03, grad_scale: 8.0 +2024-09-17 12:24:18,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=220000.0, ans=0.025 +2024-09-17 12:24:18,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=220000.0, ans=0.07 +2024-09-17 12:24:24,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=220000.0, ans=0.1 +2024-09-17 12:24:26,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=220040.0, ans=0.025 +2024-09-17 12:24:46,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=220080.0, ans=0.125 +2024-09-17 12:25:13,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=220160.0, ans=0.125 +2024-09-17 12:25:16,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=220160.0, ans=0.1 +2024-09-17 12:25:19,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.08 vs. limit=22.5 +2024-09-17 12:25:30,052 INFO [train.py:1198] (1/2) Epoch 13, batch 750, loss[loss=0.2643, ctc_loss=0.16, cr_loss=0.4089, attn_decoder_loss=0.2668, over 29691.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1569, cr_loss=0.3954, attn_decoder_loss=0.2601, over 5676926.70 frames. ], batch size: 82, lr: 8.75e-03, grad_scale: 8.0 +2024-09-17 12:25:34,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=220200.0, ans=0.125 +2024-09-17 12:25:57,179 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:26:01,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=220280.0, ans=0.125 +2024-09-17 12:26:06,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=220280.0, ans=0.125 +2024-09-17 12:26:10,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=220280.0, ans=0.1 +2024-09-17 12:26:20,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=220320.0, ans=0.025 +2024-09-17 12:26:33,425 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 9.372e+01 1.007e+02 1.108e+02 5.289e+02, threshold=2.013e+02, percent-clipped=1.0 +2024-09-17 12:26:41,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=220360.0, ans=0.1 +2024-09-17 12:26:45,670 INFO [train.py:1198] (1/2) Epoch 13, batch 800, loss[loss=0.2442, ctc_loss=0.1458, cr_loss=0.3811, attn_decoder_loss=0.2467, over 29594.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.157, cr_loss=0.3953, attn_decoder_loss=0.2601, over 5707998.15 frames. ], batch size: 73, lr: 8.74e-03, grad_scale: 16.0 +2024-09-17 12:26:55,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=220400.0, ans=0.2 +2024-09-17 12:27:08,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=220440.0, ans=0.0 +2024-09-17 12:27:11,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=220440.0, ans=0.2 +2024-09-17 12:27:35,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.65 vs. limit=15.0 +2024-09-17 12:28:03,607 INFO [train.py:1198] (1/2) Epoch 13, batch 850, loss[loss=0.2691, ctc_loss=0.171, cr_loss=0.407, attn_decoder_loss=0.2709, over 29700.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1567, cr_loss=0.3952, attn_decoder_loss=0.2598, over 5736145.88 frames. ], batch size: 89, lr: 8.74e-03, grad_scale: 8.0 +2024-09-17 12:28:14,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=220600.0, ans=0.0 +2024-09-17 12:28:22,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.35 vs. limit=12.0 +2024-09-17 12:28:35,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=220680.0, ans=10.0 +2024-09-17 12:29:07,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=220760.0, ans=0.0 +2024-09-17 12:29:11,982 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.923e+01 9.332e+01 1.023e+02 2.147e+02, threshold=1.866e+02, percent-clipped=2.0 +2024-09-17 12:29:12,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.49 vs. limit=22.5 +2024-09-17 12:29:19,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=220760.0, ans=0.0 +2024-09-17 12:29:23,103 INFO [train.py:1198] (1/2) Epoch 13, batch 900, loss[loss=0.2386, ctc_loss=0.1353, cr_loss=0.3601, attn_decoder_loss=0.2421, over 29595.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1568, cr_loss=0.3957, attn_decoder_loss=0.26, over 5739537.39 frames. ], batch size: 73, lr: 8.74e-03, grad_scale: 8.0 +2024-09-17 12:29:26,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.47 vs. limit=15.0 +2024-09-17 12:29:44,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=220840.0, ans=0.125 +2024-09-17 12:30:01,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.67 vs. limit=6.0 +2024-09-17 12:30:04,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=220880.0, ans=0.125 +2024-09-17 12:30:21,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.56 vs. limit=22.5 +2024-09-17 12:30:26,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=220960.0, ans=0.0 +2024-09-17 12:30:37,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=221000.0, ans=0.125 +2024-09-17 12:30:38,532 INFO [train.py:1198] (1/2) Epoch 13, batch 950, loss[loss=0.2437, ctc_loss=0.1462, cr_loss=0.3814, attn_decoder_loss=0.2461, over 29515.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1575, cr_loss=0.3964, attn_decoder_loss=0.2604, over 5741610.33 frames. ], batch size: 74, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:31:02,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=221040.0, ans=0.0 +2024-09-17 12:31:25,482 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:31:46,091 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.726e+01 9.884e+01 1.087e+02 1.225e+02 3.377e+02, threshold=2.174e+02, percent-clipped=3.0 +2024-09-17 12:31:56,561 INFO [train.py:1198] (1/2) Epoch 13, batch 1000, loss[loss=0.2548, ctc_loss=0.155, cr_loss=0.3895, attn_decoder_loss=0.2572, over 29500.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1586, cr_loss=0.3976, attn_decoder_loss=0.2612, over 5734926.34 frames. ], batch size: 77, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:32:05,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.36 vs. limit=15.0 +2024-09-17 12:32:09,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=221200.0, ans=0.125 +2024-09-17 12:32:12,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.31 vs. limit=15.0 +2024-09-17 12:33:00,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=221360.0, ans=0.1 +2024-09-17 12:33:01,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=221360.0, ans=0.125 +2024-09-17 12:33:10,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=221360.0, ans=0.07 +2024-09-17 12:33:14,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.63 vs. limit=22.5 +2024-09-17 12:33:15,015 INFO [train.py:1198] (1/2) Epoch 13, batch 1050, loss[loss=0.2667, ctc_loss=0.1655, cr_loss=0.4259, attn_decoder_loss=0.2685, over 29671.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1582, cr_loss=0.3974, attn_decoder_loss=0.2607, over 5743740.80 frames. ], batch size: 85, lr: 8.73e-03, grad_scale: 8.0 +2024-09-17 12:33:21,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=221400.0, ans=22.5 +2024-09-17 12:33:28,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.54 vs. limit=6.0 +2024-09-17 12:33:30,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=221440.0, ans=0.125 +2024-09-17 12:34:01,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=221520.0, ans=0.125 +2024-09-17 12:34:02,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=221520.0, ans=15.0 +2024-09-17 12:34:20,744 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.817e+01 9.337e+01 1.034e+02 1.952e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-17 12:34:22,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=221560.0, ans=0.0 +2024-09-17 12:34:32,053 INFO [train.py:1198] (1/2) Epoch 13, batch 1100, loss[loss=0.2646, ctc_loss=0.165, cr_loss=0.4026, attn_decoder_loss=0.2667, over 29449.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1578, cr_loss=0.3967, attn_decoder_loss=0.2606, over 5755028.74 frames. ], batch size: 78, lr: 8.72e-03, grad_scale: 8.0 +2024-09-17 12:34:32,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=221600.0, ans=0.125 +2024-09-17 12:34:49,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=221640.0, ans=0.025 +2024-09-17 12:35:23,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=221720.0, ans=0.1 +2024-09-17 12:35:41,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=221760.0, ans=0.0 +2024-09-17 12:35:49,959 INFO [train.py:1198] (1/2) Epoch 13, batch 1150, loss[loss=0.2523, ctc_loss=0.1604, cr_loss=0.4048, attn_decoder_loss=0.2535, over 29456.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1577, cr_loss=0.3966, attn_decoder_loss=0.2605, over 5753570.47 frames. ], batch size: 78, lr: 8.72e-03, grad_scale: 8.0 +2024-09-17 12:36:18,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.01 vs. limit=6.0 +2024-09-17 12:36:22,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=221880.0, ans=0.0 +2024-09-17 12:36:31,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.65 vs. limit=12.0 +2024-09-17 12:36:32,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=221880.0, ans=0.125 +2024-09-17 12:36:38,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=221920.0, ans=0.025 +2024-09-17 12:36:39,719 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:36:51,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=221960.0, ans=0.1 +2024-09-17 12:36:57,520 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.374e+01 9.019e+01 9.917e+01 1.067e+02 1.578e+02, threshold=1.983e+02, percent-clipped=0.0 +2024-09-17 12:37:07,992 INFO [train.py:1198] (1/2) Epoch 13, batch 1200, loss[loss=0.2639, ctc_loss=0.1541, cr_loss=0.3871, attn_decoder_loss=0.2675, over 29694.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1586, cr_loss=0.3981, attn_decoder_loss=0.2613, over 5746772.65 frames. ], batch size: 85, lr: 8.71e-03, grad_scale: 16.0 +2024-09-17 12:37:11,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=222000.0, ans=0.125 +2024-09-17 12:37:12,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=222000.0, ans=0.0 +2024-09-17 12:37:37,495 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:37:39,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=222080.0, ans=0.125 +2024-09-17 12:37:43,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=222080.0, ans=0.1 +2024-09-17 12:37:49,917 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:37:50,445 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.87 vs. limit=6.0 +2024-09-17 12:37:57,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=222120.0, ans=0.025 +2024-09-17 12:38:01,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.96 vs. limit=15.0 +2024-09-17 12:38:03,470 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:38:08,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.46 vs. limit=22.5 +2024-09-17 12:38:09,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=222160.0, ans=0.2 +2024-09-17 12:38:24,362 INFO [train.py:1198] (1/2) Epoch 13, batch 1250, loss[loss=0.2716, ctc_loss=0.1616, cr_loss=0.3988, attn_decoder_loss=0.2749, over 29503.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1585, cr_loss=0.3988, attn_decoder_loss=0.2617, over 5773811.38 frames. ], batch size: 92, lr: 8.71e-03, grad_scale: 8.0 +2024-09-17 12:38:29,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=222200.0, ans=0.125 +2024-09-17 12:38:32,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=222200.0, ans=0.2 +2024-09-17 12:38:35,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=222200.0, ans=0.125 +2024-09-17 12:38:59,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.37 vs. limit=10.0 +2024-09-17 12:39:12,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=222320.0, ans=0.125 +2024-09-17 12:39:33,606 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.458e+01 9.226e+01 9.923e+01 1.052e+02 2.205e+02, threshold=1.985e+02, percent-clipped=1.0 +2024-09-17 12:39:42,944 INFO [train.py:1198] (1/2) Epoch 13, batch 1300, loss[loss=0.2685, ctc_loss=0.1608, cr_loss=0.4077, attn_decoder_loss=0.2714, over 28155.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1581, cr_loss=0.3981, attn_decoder_loss=0.2612, over 5779325.95 frames. ], batch size: 111, lr: 8.71e-03, grad_scale: 8.0 +2024-09-17 12:39:46,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.18 vs. limit=12.0 +2024-09-17 12:39:56,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=222440.0, ans=0.125 +2024-09-17 12:40:16,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=222480.0, ans=0.0 +2024-09-17 12:40:46,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.75 vs. limit=15.0 +2024-09-17 12:41:00,772 INFO [train.py:1198] (1/2) Epoch 13, batch 1350, loss[loss=0.2656, ctc_loss=0.1621, cr_loss=0.423, attn_decoder_loss=0.2677, over 29741.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.1577, cr_loss=0.3979, attn_decoder_loss=0.2609, over 5795917.90 frames. ], batch size: 81, lr: 8.70e-03, grad_scale: 8.0 +2024-09-17 12:41:10,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=222600.0, ans=0.125 +2024-09-17 12:41:17,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=222640.0, ans=0.0 +2024-09-17 12:41:50,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=222720.0, ans=0.125 +2024-09-17 12:41:54,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=222720.0, ans=0.0 +2024-09-17 12:42:06,531 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.825e+01 9.390e+01 1.007e+02 1.307e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 12:42:15,651 INFO [train.py:1198] (1/2) Epoch 13, batch 1400, loss[loss=0.2199, ctc_loss=0.1257, cr_loss=0.3415, attn_decoder_loss=0.2227, over 29568.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.1577, cr_loss=0.3978, attn_decoder_loss=0.2609, over 5807026.61 frames. ], batch size: 69, lr: 8.70e-03, grad_scale: 8.0 +2024-09-17 12:42:30,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.75 vs. limit=15.0 +2024-09-17 12:43:02,456 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.06 vs. limit=15.0 +2024-09-17 12:43:06,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=222920.0, ans=0.07 +2024-09-17 12:43:24,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=222960.0, ans=0.0 +2024-09-17 12:43:33,422 INFO [train.py:1198] (1/2) Epoch 13, batch 1450, loss[loss=0.2685, ctc_loss=0.161, cr_loss=0.4115, attn_decoder_loss=0.2713, over 29462.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.158, cr_loss=0.398, attn_decoder_loss=0.2614, over 5803951.73 frames. ], batch size: 94, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:43:48,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.34 vs. limit=10.0 +2024-09-17 12:44:04,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=223080.0, ans=0.125 +2024-09-17 12:44:04,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.56 vs. limit=22.5 +2024-09-17 12:44:05,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=223080.0, ans=0.0 +2024-09-17 12:44:06,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=12.0 +2024-09-17 12:44:10,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=223080.0, ans=0.125 +2024-09-17 12:44:20,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.36 vs. limit=15.0 +2024-09-17 12:44:22,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=223120.0, ans=0.125 +2024-09-17 12:44:38,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=223160.0, ans=0.1 +2024-09-17 12:44:42,125 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.708e+01 9.179e+01 9.900e+01 1.065e+02 2.201e+02, threshold=1.980e+02, percent-clipped=1.0 +2024-09-17 12:44:51,610 INFO [train.py:1198] (1/2) Epoch 13, batch 1500, loss[loss=0.2593, ctc_loss=0.152, cr_loss=0.3818, attn_decoder_loss=0.2628, over 29631.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.158, cr_loss=0.3981, attn_decoder_loss=0.2615, over 5805466.86 frames. ], batch size: 86, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:45:02,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=223200.0, ans=0.025 +2024-09-17 12:45:05,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=223240.0, ans=0.1 +2024-09-17 12:45:13,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=223240.0, ans=0.2 +2024-09-17 12:45:14,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=223240.0, ans=0.125 +2024-09-17 12:45:19,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=223240.0, ans=0.0 +2024-09-17 12:45:50,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=223320.0, ans=0.2 +2024-09-17 12:45:50,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=223320.0, ans=0.125 +2024-09-17 12:46:08,022 INFO [train.py:1198] (1/2) Epoch 13, batch 1550, loss[loss=0.2759, ctc_loss=0.1747, cr_loss=0.4344, attn_decoder_loss=0.2775, over 29523.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1585, cr_loss=0.3981, attn_decoder_loss=0.2616, over 5781103.71 frames. ], batch size: 90, lr: 8.69e-03, grad_scale: 8.0 +2024-09-17 12:46:23,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=223440.0, ans=0.07 +2024-09-17 12:47:07,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=223520.0, ans=0.125 +2024-09-17 12:47:16,425 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.020e+01 9.851e+01 1.181e+02 1.437e+02 2.605e+02, threshold=2.361e+02, percent-clipped=3.0 +2024-09-17 12:47:25,506 INFO [train.py:1198] (1/2) Epoch 13, batch 1600, loss[loss=0.266, ctc_loss=0.1669, cr_loss=0.4064, attn_decoder_loss=0.268, over 29673.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1586, cr_loss=0.3975, attn_decoder_loss=0.2613, over 5763713.45 frames. ], batch size: 85, lr: 8.68e-03, grad_scale: 16.0 +2024-09-17 12:47:36,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=223600.0, ans=0.125 +2024-09-17 12:47:40,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=223640.0, ans=0.1 +2024-09-17 12:47:42,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.98 vs. limit=15.0 +2024-09-17 12:48:07,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=223680.0, ans=0.1 +2024-09-17 12:48:15,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=223720.0, ans=0.0 +2024-09-17 12:48:19,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=223720.0, ans=0.125 +2024-09-17 12:48:24,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=7.08 vs. limit=15.0 +2024-09-17 12:48:31,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=223760.0, ans=0.05 +2024-09-17 12:48:43,155 INFO [train.py:1198] (1/2) Epoch 13, batch 1650, loss[loss=0.2702, ctc_loss=0.1654, cr_loss=0.4052, attn_decoder_loss=0.2728, over 29700.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1586, cr_loss=0.3975, attn_decoder_loss=0.2613, over 5758682.43 frames. ], batch size: 89, lr: 8.68e-03, grad_scale: 8.0 +2024-09-17 12:49:06,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=223840.0, ans=0.125 +2024-09-17 12:49:47,794 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.46 vs. limit=12.0 +2024-09-17 12:49:51,350 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.178e+01 9.964e+01 1.088e+02 2.882e+02, threshold=1.993e+02, percent-clipped=2.0 +2024-09-17 12:49:53,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=223960.0, ans=0.125 +2024-09-17 12:49:55,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.26 vs. limit=15.0 +2024-09-17 12:50:06,240 INFO [train.py:1198] (1/2) Epoch 13, batch 1700, loss[loss=0.2273, ctc_loss=0.127, cr_loss=0.3452, attn_decoder_loss=0.2307, over 29579.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1582, cr_loss=0.3978, attn_decoder_loss=0.2613, over 5779695.44 frames. ], batch size: 69, lr: 8.68e-03, grad_scale: 8.0 +2024-09-17 12:50:08,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=224000.0, ans=0.125 +2024-09-17 12:50:14,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=224000.0, ans=0.025 +2024-09-17 12:50:32,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=224040.0, ans=0.025 +2024-09-17 12:51:00,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.54 vs. limit=15.0 +2024-09-17 12:51:01,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=224120.0, ans=0.125 +2024-09-17 12:51:06,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=224120.0, ans=0.2 +2024-09-17 12:51:24,022 INFO [train.py:1198] (1/2) Epoch 13, batch 1750, loss[loss=0.2257, ctc_loss=0.1274, cr_loss=0.3593, attn_decoder_loss=0.2287, over 29386.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1574, cr_loss=0.3972, attn_decoder_loss=0.2605, over 5788003.00 frames. ], batch size: 67, lr: 8.67e-03, grad_scale: 8.0 +2024-09-17 12:51:24,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=224200.0, ans=0.125 +2024-09-17 12:51:27,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=224200.0, ans=0.2 +2024-09-17 12:51:51,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=224240.0, ans=0.2 +2024-09-17 12:52:00,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=224280.0, ans=0.09899494936611666 +2024-09-17 12:52:28,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=224360.0, ans=0.125 +2024-09-17 12:52:29,892 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 12:52:33,954 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 8.908e+01 9.548e+01 1.035e+02 2.424e+02, threshold=1.910e+02, percent-clipped=1.0 +2024-09-17 12:52:41,366 INFO [train.py:1198] (1/2) Epoch 13, batch 1800, loss[loss=0.2646, ctc_loss=0.1599, cr_loss=0.4176, attn_decoder_loss=0.2669, over 29704.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1579, cr_loss=0.3976, attn_decoder_loss=0.2607, over 5790992.05 frames. ], batch size: 83, lr: 8.67e-03, grad_scale: 8.0 +2024-09-17 12:52:44,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=224400.0, ans=0.125 +2024-09-17 12:52:46,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=224400.0, ans=0.2 +2024-09-17 12:52:53,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=224400.0, ans=0.0 +2024-09-17 12:53:01,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=224440.0, ans=0.1 +2024-09-17 12:53:18,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=224480.0, ans=0.2 +2024-09-17 12:53:22,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=224480.0, ans=0.125 +2024-09-17 12:53:41,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=224560.0, ans=0.0 +2024-09-17 12:53:47,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=224560.0, ans=0.1 +2024-09-17 12:53:56,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=224600.0, ans=0.125 +2024-09-17 12:53:57,458 INFO [train.py:1198] (1/2) Epoch 13, batch 1850, loss[loss=0.2721, ctc_loss=0.162, cr_loss=0.3828, attn_decoder_loss=0.2758, over 29638.00 frames. ], tot_loss[loss=0.2582, ctc_loss=0.1575, cr_loss=0.3967, attn_decoder_loss=0.2606, over 5796179.56 frames. ], batch size: 86, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:54:26,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=224680.0, ans=0.125 +2024-09-17 12:54:39,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=224680.0, ans=0.0 +2024-09-17 12:54:40,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=224680.0, ans=0.0 +2024-09-17 12:54:51,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=224720.0, ans=0.125 +2024-09-17 12:55:03,830 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.97 vs. limit=15.0 +2024-09-17 12:55:06,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=224760.0, ans=0.125 +2024-09-17 12:55:07,361 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.966e+01 9.738e+01 1.037e+02 3.444e+02, threshold=1.948e+02, percent-clipped=2.0 +2024-09-17 12:55:15,194 INFO [train.py:1198] (1/2) Epoch 13, batch 1900, loss[loss=0.2627, ctc_loss=0.1533, cr_loss=0.4052, attn_decoder_loss=0.2658, over 29706.00 frames. ], tot_loss[loss=0.2585, ctc_loss=0.1575, cr_loss=0.397, attn_decoder_loss=0.2609, over 5804219.42 frames. ], batch size: 89, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:55:23,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=224800.0, ans=0.05 +2024-09-17 12:55:38,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=224840.0, ans=0.125 +2024-09-17 12:56:03,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=224920.0, ans=0.0 +2024-09-17 12:56:10,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=224920.0, ans=0.125 +2024-09-17 12:56:13,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=224920.0, ans=0.2 +2024-09-17 12:56:16,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=224960.0, ans=0.125 +2024-09-17 12:56:18,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=224960.0, ans=0.0 +2024-09-17 12:56:22,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=224960.0, ans=0.125 +2024-09-17 12:56:22,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=224960.0, ans=0.125 +2024-09-17 12:56:24,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=224960.0, ans=0.125 +2024-09-17 12:56:27,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.15 vs. limit=15.0 +2024-09-17 12:56:30,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=224960.0, ans=0.125 +2024-09-17 12:56:33,260 INFO [train.py:1198] (1/2) Epoch 13, batch 1950, loss[loss=0.2456, ctc_loss=0.1381, cr_loss=0.3834, attn_decoder_loss=0.249, over 29447.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1579, cr_loss=0.398, attn_decoder_loss=0.2618, over 5819304.13 frames. ], batch size: 78, lr: 8.66e-03, grad_scale: 8.0 +2024-09-17 12:57:28,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.29 vs. limit=22.5 +2024-09-17 12:57:40,939 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 9.122e+01 9.575e+01 1.024e+02 4.346e+02, threshold=1.915e+02, percent-clipped=1.0 +2024-09-17 12:57:44,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=22.38 vs. limit=22.5 +2024-09-17 12:57:48,517 INFO [train.py:1198] (1/2) Epoch 13, batch 2000, loss[loss=0.2306, ctc_loss=0.1423, cr_loss=0.379, attn_decoder_loss=0.2319, over 29316.00 frames. ], tot_loss[loss=0.26, ctc_loss=0.1587, cr_loss=0.3987, attn_decoder_loss=0.2624, over 5797313.04 frames. ], batch size: 67, lr: 8.65e-03, grad_scale: 16.0 +2024-09-17 12:57:57,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.31 vs. limit=15.0 +2024-09-17 12:58:06,382 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.67 vs. limit=15.0 +2024-09-17 12:58:08,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=225240.0, ans=0.1 +2024-09-17 12:58:10,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=225240.0, ans=0.125 +2024-09-17 12:58:35,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=225320.0, ans=0.07 +2024-09-17 12:58:37,160 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.24 vs. limit=12.0 +2024-09-17 12:59:06,706 INFO [train.py:1198] (1/2) Epoch 13, batch 2050, loss[loss=0.2222, ctc_loss=0.1263, cr_loss=0.3566, attn_decoder_loss=0.225, over 29418.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1581, cr_loss=0.3973, attn_decoder_loss=0.2613, over 5788292.74 frames. ], batch size: 70, lr: 8.65e-03, grad_scale: 8.0 +2024-09-17 12:59:07,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=225400.0, ans=0.125 +2024-09-17 12:59:20,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=225440.0, ans=0.0 +2024-09-17 12:59:25,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=225440.0, ans=0.125 +2024-09-17 12:59:32,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=225440.0, ans=0.125 +2024-09-17 12:59:54,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=225520.0, ans=0.125 +2024-09-17 13:00:08,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.95 vs. limit=15.0 +2024-09-17 13:00:18,379 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 8.858e+01 9.418e+01 1.005e+02 1.765e+02, threshold=1.884e+02, percent-clipped=0.0 +2024-09-17 13:00:24,866 INFO [train.py:1198] (1/2) Epoch 13, batch 2100, loss[loss=0.2552, ctc_loss=0.1526, cr_loss=0.3895, attn_decoder_loss=0.2579, over 29759.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1573, cr_loss=0.3963, attn_decoder_loss=0.2608, over 5799132.78 frames. ], batch size: 81, lr: 8.65e-03, grad_scale: 8.0 +2024-09-17 13:00:38,736 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:00:43,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=225640.0, ans=0.125 +2024-09-17 13:00:54,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.23 vs. limit=10.0 +2024-09-17 13:01:18,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.71 vs. limit=15.0 +2024-09-17 13:01:40,012 INFO [train.py:1198] (1/2) Epoch 13, batch 2150, loss[loss=0.2649, ctc_loss=0.1624, cr_loss=0.4221, attn_decoder_loss=0.2669, over 29439.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1565, cr_loss=0.3956, attn_decoder_loss=0.2599, over 5813543.95 frames. ], batch size: 78, lr: 8.64e-03, grad_scale: 8.0 +2024-09-17 13:01:40,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=225800.0, ans=0.125 +2024-09-17 13:01:59,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=225840.0, ans=0.0 +2024-09-17 13:02:04,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=225840.0, ans=0.0 +2024-09-17 13:02:04,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=225840.0, ans=0.1 +2024-09-17 13:02:16,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=225880.0, ans=0.125 +2024-09-17 13:02:19,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=225880.0, ans=0.125 +2024-09-17 13:02:51,961 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.903e+01 9.039e+01 9.591e+01 1.017e+02 1.428e+02, threshold=1.918e+02, percent-clipped=0.0 +2024-09-17 13:02:55,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=225960.0, ans=0.0 +2024-09-17 13:02:58,186 INFO [train.py:1198] (1/2) Epoch 13, batch 2200, loss[loss=0.2731, ctc_loss=0.161, cr_loss=0.3996, attn_decoder_loss=0.2766, over 29622.00 frames. ], tot_loss[loss=0.258, ctc_loss=0.1571, cr_loss=0.3972, attn_decoder_loss=0.2604, over 5810366.39 frames. ], batch size: 86, lr: 8.64e-03, grad_scale: 8.0 +2024-09-17 13:03:30,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=226080.0, ans=0.2 +2024-09-17 13:03:37,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=226080.0, ans=0.0 +2024-09-17 13:03:43,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=226080.0, ans=0.125 +2024-09-17 13:03:49,378 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.69 vs. limit=15.0 +2024-09-17 13:03:53,055 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.31 vs. limit=15.0 +2024-09-17 13:04:16,309 INFO [train.py:1198] (1/2) Epoch 13, batch 2250, loss[loss=0.2555, ctc_loss=0.1452, cr_loss=0.3915, attn_decoder_loss=0.2591, over 29690.00 frames. ], tot_loss[loss=0.2575, ctc_loss=0.1565, cr_loss=0.3961, attn_decoder_loss=0.2599, over 5810566.82 frames. ], batch size: 82, lr: 8.63e-03, grad_scale: 4.0 +2024-09-17 13:04:32,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=226240.0, ans=0.125 +2024-09-17 13:05:03,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=226320.0, ans=0.0 +2024-09-17 13:05:05,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.04 vs. limit=15.0 +2024-09-17 13:05:08,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.96 vs. limit=12.0 +2024-09-17 13:05:27,515 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 9.102e+01 9.665e+01 1.015e+02 1.637e+02, threshold=1.933e+02, percent-clipped=0.0 +2024-09-17 13:05:32,603 INFO [train.py:1198] (1/2) Epoch 13, batch 2300, loss[loss=0.2418, ctc_loss=0.14, cr_loss=0.3822, attn_decoder_loss=0.2446, over 29315.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1555, cr_loss=0.3937, attn_decoder_loss=0.2589, over 5798540.28 frames. ], batch size: 71, lr: 8.63e-03, grad_scale: 8.0 +2024-09-17 13:05:37,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=226400.0, ans=0.1 +2024-09-17 13:05:45,755 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.84 vs. limit=15.0 +2024-09-17 13:05:51,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.27 vs. limit=6.0 +2024-09-17 13:06:06,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.19 vs. limit=15.0 +2024-09-17 13:06:21,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=226520.0, ans=0.125 +2024-09-17 13:06:50,457 INFO [train.py:1198] (1/2) Epoch 13, batch 2350, loss[loss=0.2668, ctc_loss=0.1582, cr_loss=0.4092, attn_decoder_loss=0.2698, over 29676.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1559, cr_loss=0.3949, attn_decoder_loss=0.2592, over 5804260.51 frames. ], batch size: 83, lr: 8.63e-03, grad_scale: 8.0 +2024-09-17 13:07:01,298 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:07:04,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=226640.0, ans=0.1 +2024-09-17 13:07:22,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=226680.0, ans=0.025 +2024-09-17 13:07:23,162 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.36 vs. limit=22.5 +2024-09-17 13:07:31,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=226680.0, ans=0.125 +2024-09-17 13:07:37,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn1.whiten.whitening_limit, batch_count=226720.0, ans=22.5 +2024-09-17 13:07:47,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=226720.0, ans=0.09899494936611666 +2024-09-17 13:07:48,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.54 vs. limit=15.0 +2024-09-17 13:07:57,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.82 vs. limit=12.0 +2024-09-17 13:07:59,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=226760.0, ans=0.1 +2024-09-17 13:08:05,465 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.113e+01 9.470e+01 1.028e+02 1.156e+02 2.779e+02, threshold=2.056e+02, percent-clipped=1.0 +2024-09-17 13:08:08,453 INFO [train.py:1198] (1/2) Epoch 13, batch 2400, loss[loss=0.2542, ctc_loss=0.1546, cr_loss=0.4114, attn_decoder_loss=0.2561, over 29519.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1563, cr_loss=0.3954, attn_decoder_loss=0.2597, over 5807708.35 frames. ], batch size: 76, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:08:13,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=226800.0, ans=0.2 +2024-09-17 13:08:13,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=226800.0, ans=0.2 +2024-09-17 13:08:22,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=226840.0, ans=0.0 +2024-09-17 13:09:03,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=226920.0, ans=0.125 +2024-09-17 13:09:24,020 INFO [train.py:1198] (1/2) Epoch 13, batch 2450, loss[loss=0.2572, ctc_loss=0.1531, cr_loss=0.4071, attn_decoder_loss=0.2597, over 29722.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1572, cr_loss=0.3965, attn_decoder_loss=0.2608, over 5786042.02 frames. ], batch size: 82, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:09:24,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=227000.0, ans=0.1 +2024-09-17 13:09:33,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=227000.0, ans=0.1 +2024-09-17 13:09:50,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.77 vs. limit=15.0 +2024-09-17 13:10:00,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=227080.0, ans=0.0 +2024-09-17 13:10:28,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=227160.0, ans=0.125 +2024-09-17 13:10:35,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=227160.0, ans=0.0 +2024-09-17 13:10:38,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.760e+01 9.238e+01 9.776e+01 1.100e+02 2.445e+02, threshold=1.955e+02, percent-clipped=1.0 +2024-09-17 13:10:42,075 INFO [train.py:1198] (1/2) Epoch 13, batch 2500, loss[loss=0.2744, ctc_loss=0.1698, cr_loss=0.4093, attn_decoder_loss=0.2769, over 29637.00 frames. ], tot_loss[loss=0.2583, ctc_loss=0.1571, cr_loss=0.3965, attn_decoder_loss=0.2608, over 5797056.45 frames. ], batch size: 86, lr: 8.62e-03, grad_scale: 8.0 +2024-09-17 13:11:11,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=227280.0, ans=22.5 +2024-09-17 13:11:14,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=227280.0, ans=0.2 +2024-09-17 13:11:15,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=227280.0, ans=0.125 +2024-09-17 13:11:20,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=227280.0, ans=0.0 +2024-09-17 13:11:21,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=227280.0, ans=0.1 +2024-09-17 13:11:27,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.15 vs. limit=15.0 +2024-09-17 13:11:40,299 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.24 vs. limit=15.0 +2024-09-17 13:11:47,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=227360.0, ans=0.2 +2024-09-17 13:11:50,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.28 vs. limit=15.0 +2024-09-17 13:12:00,454 INFO [train.py:1198] (1/2) Epoch 13, batch 2550, loss[loss=0.2309, ctc_loss=0.14, cr_loss=0.3715, attn_decoder_loss=0.2328, over 29358.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1569, cr_loss=0.3965, attn_decoder_loss=0.2605, over 5799954.30 frames. ], batch size: 67, lr: 8.61e-03, grad_scale: 8.0 +2024-09-17 13:12:05,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=227400.0, ans=0.125 +2024-09-17 13:12:17,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=227440.0, ans=0.125 +2024-09-17 13:12:20,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=227440.0, ans=0.125 +2024-09-17 13:12:29,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.64 vs. limit=22.5 +2024-09-17 13:12:32,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=227480.0, ans=0.0 +2024-09-17 13:13:02,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=227560.0, ans=0.2 +2024-09-17 13:13:08,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=227560.0, ans=0.0 +2024-09-17 13:13:13,208 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.947e+01 9.236e+01 9.928e+01 1.060e+02 5.337e+02, threshold=1.986e+02, percent-clipped=3.0 +2024-09-17 13:13:16,294 INFO [train.py:1198] (1/2) Epoch 13, batch 2600, loss[loss=0.2476, ctc_loss=0.1391, cr_loss=0.374, attn_decoder_loss=0.2514, over 29462.00 frames. ], tot_loss[loss=0.2587, ctc_loss=0.1576, cr_loss=0.3977, attn_decoder_loss=0.2611, over 5796253.99 frames. ], batch size: 78, lr: 8.61e-03, grad_scale: 8.0 +2024-09-17 13:13:18,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=227600.0, ans=0.1 +2024-09-17 13:13:24,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=227600.0, ans=0.0 +2024-09-17 13:14:23,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=227760.0, ans=0.125 +2024-09-17 13:14:25,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=227760.0, ans=0.1 +2024-09-17 13:14:34,056 INFO [train.py:1198] (1/2) Epoch 13, batch 2650, loss[loss=0.2785, ctc_loss=0.177, cr_loss=0.4346, attn_decoder_loss=0.2801, over 29254.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.158, cr_loss=0.3984, attn_decoder_loss=0.2614, over 5801977.72 frames. ], batch size: 100, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:14:37,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=227800.0, ans=0.1 +2024-09-17 13:14:41,145 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.53 vs. limit=15.0 +2024-09-17 13:14:57,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=227840.0, ans=0.125 +2024-09-17 13:15:12,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=227880.0, ans=0.2 +2024-09-17 13:15:21,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=227920.0, ans=0.04949747468305833 +2024-09-17 13:15:48,539 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.882e+01 9.244e+01 9.728e+01 1.060e+02 3.050e+02, threshold=1.946e+02, percent-clipped=2.0 +2024-09-17 13:15:52,088 INFO [train.py:1198] (1/2) Epoch 13, batch 2700, loss[loss=0.2702, ctc_loss=0.1657, cr_loss=0.4181, attn_decoder_loss=0.2725, over 29554.00 frames. ], tot_loss[loss=0.2597, ctc_loss=0.1588, cr_loss=0.3998, attn_decoder_loss=0.262, over 5797921.46 frames. ], batch size: 87, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:16:07,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=228040.0, ans=0.125 +2024-09-17 13:16:33,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=228080.0, ans=0.0 +2024-09-17 13:16:36,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=228120.0, ans=0.2 +2024-09-17 13:16:37,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=228120.0, ans=0.2 +2024-09-17 13:17:07,972 INFO [train.py:1198] (1/2) Epoch 13, batch 2750, loss[loss=0.2481, ctc_loss=0.1513, cr_loss=0.3972, attn_decoder_loss=0.25, over 29508.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1581, cr_loss=0.3986, attn_decoder_loss=0.2609, over 5796265.64 frames. ], batch size: 75, lr: 8.60e-03, grad_scale: 8.0 +2024-09-17 13:17:14,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=228200.0, ans=0.125 +2024-09-17 13:18:18,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=228360.0, ans=0.125 +2024-09-17 13:18:23,265 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.445e+01 9.348e+01 1.004e+02 1.120e+02 2.904e+02, threshold=2.008e+02, percent-clipped=2.0 +2024-09-17 13:18:26,299 INFO [train.py:1198] (1/2) Epoch 13, batch 2800, loss[loss=0.2959, ctc_loss=0.2215, cr_loss=0.4484, attn_decoder_loss=0.2942, over 20531.00 frames. ], tot_loss[loss=0.2591, ctc_loss=0.1589, cr_loss=0.3996, attn_decoder_loss=0.2613, over 5776423.32 frames. ], batch size: 209, lr: 8.59e-03, grad_scale: 16.0 +2024-09-17 13:18:28,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=16.58 vs. limit=15.0 +2024-09-17 13:18:41,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.84 vs. limit=10.0 +2024-09-17 13:18:43,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=228440.0, ans=0.125 +2024-09-17 13:19:14,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-17 13:19:21,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=228520.0, ans=0.0 +2024-09-17 13:19:40,204 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.68 vs. limit=22.5 +2024-09-17 13:19:44,131 INFO [train.py:1198] (1/2) Epoch 13, batch 2850, loss[loss=0.2531, ctc_loss=0.1505, cr_loss=0.3772, attn_decoder_loss=0.2561, over 29517.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1591, cr_loss=0.3996, attn_decoder_loss=0.2615, over 5762735.04 frames. ], batch size: 77, lr: 8.59e-03, grad_scale: 8.0 +2024-09-17 13:19:50,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=228600.0, ans=0.05 +2024-09-17 13:19:51,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=228600.0, ans=0.04949747468305833 +2024-09-17 13:20:11,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=228640.0, ans=0.125 +2024-09-17 13:20:18,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=228680.0, ans=0.1 +2024-09-17 13:20:27,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=228680.0, ans=0.025 +2024-09-17 13:20:30,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=228720.0, ans=0.07 +2024-09-17 13:20:33,407 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.12 vs. limit=22.5 +2024-09-17 13:20:39,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.41 vs. limit=15.0 +2024-09-17 13:21:00,199 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.259e+01 1.060e+02 1.394e+02 3.143e+02, threshold=2.120e+02, percent-clipped=6.0 +2024-09-17 13:21:00,229 INFO [train.py:1198] (1/2) Epoch 13, batch 2900, loss[loss=0.2497, ctc_loss=0.1547, cr_loss=0.3689, attn_decoder_loss=0.252, over 29428.00 frames. ], tot_loss[loss=0.2599, ctc_loss=0.1592, cr_loss=0.4007, attn_decoder_loss=0.2622, over 5787302.45 frames. ], batch size: 79, lr: 8.59e-03, grad_scale: 8.0 +2024-09-17 13:21:02,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=228800.0, ans=0.1 +2024-09-17 13:21:26,976 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.39 vs. limit=15.0 +2024-09-17 13:21:40,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=228880.0, ans=0.125 +2024-09-17 13:21:46,283 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:21:56,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.94 vs. limit=15.0 +2024-09-17 13:21:56,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=228920.0, ans=0.2 +2024-09-17 13:22:07,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=228960.0, ans=0.125 +2024-09-17 13:22:09,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=228960.0, ans=0.2 +2024-09-17 13:22:18,345 INFO [train.py:1198] (1/2) Epoch 13, batch 2950, loss[loss=0.2451, ctc_loss=0.1546, cr_loss=0.4071, attn_decoder_loss=0.2461, over 29519.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1579, cr_loss=0.3986, attn_decoder_loss=0.2608, over 5781821.85 frames. ], batch size: 75, lr: 8.58e-03, grad_scale: 4.0 +2024-09-17 13:22:36,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=229040.0, ans=0.0 +2024-09-17 13:22:38,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=229040.0, ans=0.125 +2024-09-17 13:22:41,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=229040.0, ans=0.0 +2024-09-17 13:22:47,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=229080.0, ans=0.125 +2024-09-17 13:23:25,304 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-17 13:23:36,402 INFO [train.py:1198] (1/2) Epoch 13, batch 3000, loss[loss=0.2528, ctc_loss=0.1598, cr_loss=0.4145, attn_decoder_loss=0.2539, over 29770.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1581, cr_loss=0.3988, attn_decoder_loss=0.2609, over 5783585.91 frames. ], batch size: 81, lr: 8.58e-03, grad_scale: 8.0 +2024-09-17 13:23:36,402 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 13:23:54,825 INFO [train.py:1230] (1/2) Epoch 13, validation: loss=0.212, ctc_loss=0.04384, cr_loss=4.97e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-17 13:23:54,825 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 13:23:55,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.44 vs. limit=10.0 +2024-09-17 13:23:56,317 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.701e+01 8.967e+01 9.683e+01 1.075e+02 2.883e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 13:24:07,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=229200.0, ans=0.2 +2024-09-17 13:24:13,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=229240.0, ans=0.05 +2024-09-17 13:24:16,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=229240.0, ans=0.125 +2024-09-17 13:24:16,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=229240.0, ans=0.0 +2024-09-17 13:24:30,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=229280.0, ans=0.125 +2024-09-17 13:24:32,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-17 13:24:39,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=229320.0, ans=0.0 +2024-09-17 13:25:04,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=229360.0, ans=0.025 +2024-09-17 13:25:10,619 INFO [train.py:1198] (1/2) Epoch 13, batch 3050, loss[loss=0.2543, ctc_loss=0.1617, cr_loss=0.4142, attn_decoder_loss=0.2554, over 29538.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.1584, cr_loss=0.3997, attn_decoder_loss=0.2615, over 5776780.50 frames. ], batch size: 76, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:25:30,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229440.0, ans=0.1 +2024-09-17 13:25:39,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=229480.0, ans=0.1 +2024-09-17 13:25:47,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=229480.0, ans=0.1 +2024-09-17 13:25:52,589 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.78 vs. limit=15.0 +2024-09-17 13:26:20,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=229560.0, ans=0.1 +2024-09-17 13:26:29,230 INFO [train.py:1198] (1/2) Epoch 13, batch 3100, loss[loss=0.277, ctc_loss=0.177, cr_loss=0.4143, attn_decoder_loss=0.2789, over 29291.00 frames. ], tot_loss[loss=0.2588, ctc_loss=0.1582, cr_loss=0.3991, attn_decoder_loss=0.2612, over 5775979.73 frames. ], batch size: 100, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:26:32,979 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.799e+01 9.409e+01 1.035e+02 1.210e+02 2.103e+02, threshold=2.070e+02, percent-clipped=1.0 +2024-09-17 13:26:36,825 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.89 vs. limit=15.0 +2024-09-17 13:26:52,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=229640.0, ans=0.125 +2024-09-17 13:27:15,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=229720.0, ans=0.125 +2024-09-17 13:27:36,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=229760.0, ans=0.025 +2024-09-17 13:27:45,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=229800.0, ans=0.125 +2024-09-17 13:27:47,052 INFO [train.py:1198] (1/2) Epoch 13, batch 3150, loss[loss=0.2732, ctc_loss=0.1623, cr_loss=0.4188, attn_decoder_loss=0.2762, over 28793.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1578, cr_loss=0.3986, attn_decoder_loss=0.2609, over 5783162.58 frames. ], batch size: 104, lr: 8.57e-03, grad_scale: 8.0 +2024-09-17 13:27:48,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=229800.0, ans=0.0 +2024-09-17 13:27:54,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=229800.0, ans=0.0 +2024-09-17 13:28:11,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229840.0, ans=0.1 +2024-09-17 13:28:11,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=229840.0, ans=0.0 +2024-09-17 13:28:17,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=229880.0, ans=0.1 +2024-09-17 13:28:27,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=229880.0, ans=0.125 +2024-09-17 13:28:50,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=229960.0, ans=0.0 +2024-09-17 13:28:53,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=229960.0, ans=0.125 +2024-09-17 13:28:56,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=229960.0, ans=0.125 +2024-09-17 13:29:02,206 INFO [train.py:1198] (1/2) Epoch 13, batch 3200, loss[loss=0.2509, ctc_loss=0.144, cr_loss=0.3818, attn_decoder_loss=0.2543, over 29779.00 frames. ], tot_loss[loss=0.2578, ctc_loss=0.1571, cr_loss=0.3972, attn_decoder_loss=0.2602, over 5794413.76 frames. ], batch size: 80, lr: 8.56e-03, grad_scale: 16.0 +2024-09-17 13:29:05,035 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.957e+01 9.025e+01 9.709e+01 1.089e+02 2.819e+02, threshold=1.942e+02, percent-clipped=2.0 +2024-09-17 13:29:08,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=230000.0, ans=0.125 +2024-09-17 13:29:18,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.84 vs. limit=22.5 +2024-09-17 13:29:37,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=230080.0, ans=0.125 +2024-09-17 13:29:48,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=230120.0, ans=0.125 +2024-09-17 13:30:20,633 INFO [train.py:1198] (1/2) Epoch 13, batch 3250, loss[loss=0.2632, ctc_loss=0.1568, cr_loss=0.3959, attn_decoder_loss=0.2663, over 29702.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.1573, cr_loss=0.398, attn_decoder_loss=0.2605, over 5801264.27 frames. ], batch size: 84, lr: 8.56e-03, grad_scale: 8.0 +2024-09-17 13:30:22,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=230200.0, ans=0.125 +2024-09-17 13:30:49,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=230240.0, ans=0.0 +2024-09-17 13:30:56,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=230280.0, ans=0.125 +2024-09-17 13:31:09,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=230320.0, ans=0.0 +2024-09-17 13:31:21,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=230360.0, ans=0.1 +2024-09-17 13:31:38,594 INFO [train.py:1198] (1/2) Epoch 13, batch 3300, loss[loss=0.2616, ctc_loss=0.1534, cr_loss=0.3779, attn_decoder_loss=0.2652, over 28584.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1562, cr_loss=0.3962, attn_decoder_loss=0.2592, over 5799288.28 frames. ], batch size: 112, lr: 8.56e-03, grad_scale: 8.0 +2024-09-17 13:31:41,817 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.916e+01 9.519e+01 1.032e+02 2.087e+02, threshold=1.904e+02, percent-clipped=1.0 +2024-09-17 13:31:42,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=230400.0, ans=0.125 +2024-09-17 13:31:46,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=230400.0, ans=0.0 +2024-09-17 13:31:48,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=230400.0, ans=10.0 +2024-09-17 13:31:55,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=230440.0, ans=0.125 +2024-09-17 13:32:39,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=230560.0, ans=0.1 +2024-09-17 13:32:43,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=230560.0, ans=0.125 +2024-09-17 13:32:53,823 INFO [train.py:1198] (1/2) Epoch 13, batch 3350, loss[loss=0.2665, ctc_loss=0.157, cr_loss=0.3962, attn_decoder_loss=0.2698, over 28777.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1569, cr_loss=0.3967, attn_decoder_loss=0.2599, over 5776814.24 frames. ], batch size: 104, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:32:58,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=230600.0, ans=0.125 +2024-09-17 13:33:12,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=230640.0, ans=0.2 +2024-09-17 13:33:33,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=230680.0, ans=0.125 +2024-09-17 13:33:36,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.57 vs. limit=10.0 +2024-09-17 13:33:57,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=230760.0, ans=0.04949747468305833 +2024-09-17 13:34:14,314 INFO [train.py:1198] (1/2) Epoch 13, batch 3400, loss[loss=0.2253, ctc_loss=0.1331, cr_loss=0.3592, attn_decoder_loss=0.2275, over 29318.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1566, cr_loss=0.3966, attn_decoder_loss=0.26, over 5769617.81 frames. ], batch size: 67, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:34:16,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=230800.0, ans=0.125 +2024-09-17 13:34:17,289 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.853e+01 8.984e+01 9.781e+01 1.096e+02 3.563e+02, threshold=1.956e+02, percent-clipped=2.0 +2024-09-17 13:34:20,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=230800.0, ans=0.125 +2024-09-17 13:34:34,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=230840.0, ans=0.025 +2024-09-17 13:34:38,697 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:34:49,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=230880.0, ans=0.125 +2024-09-17 13:34:58,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=230920.0, ans=0.125 +2024-09-17 13:35:07,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=230920.0, ans=0.125 +2024-09-17 13:35:21,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=230960.0, ans=0.125 +2024-09-17 13:35:30,139 INFO [train.py:1198] (1/2) Epoch 13, batch 3450, loss[loss=0.2743, ctc_loss=0.1711, cr_loss=0.4289, attn_decoder_loss=0.2762, over 28261.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1565, cr_loss=0.3974, attn_decoder_loss=0.2601, over 5777210.60 frames. ], batch size: 111, lr: 8.55e-03, grad_scale: 8.0 +2024-09-17 13:35:39,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=231000.0, ans=0.125 +2024-09-17 13:35:42,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=231000.0, ans=0.2 +2024-09-17 13:35:42,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=231000.0, ans=0.125 +2024-09-17 13:35:54,633 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:36:09,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=231080.0, ans=0.0 +2024-09-17 13:36:11,721 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.95 vs. limit=15.0 +2024-09-17 13:36:12,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=231080.0, ans=0.0 +2024-09-17 13:36:12,598 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:36:12,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=231080.0, ans=0.5 +2024-09-17 13:36:29,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=231160.0, ans=0.125 +2024-09-17 13:36:37,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.36 vs. limit=15.0 +2024-09-17 13:36:45,949 INFO [train.py:1198] (1/2) Epoch 13, batch 3500, loss[loss=0.2466, ctc_loss=0.1541, cr_loss=0.4033, attn_decoder_loss=0.2479, over 29285.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1561, cr_loss=0.3961, attn_decoder_loss=0.2594, over 5777911.46 frames. ], batch size: 71, lr: 8.54e-03, grad_scale: 8.0 +2024-09-17 13:36:46,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=231200.0, ans=0.0 +2024-09-17 13:36:49,013 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.194e+01 9.091e+01 9.756e+01 1.067e+02 1.863e+02, threshold=1.951e+02, percent-clipped=0.0 +2024-09-17 13:36:50,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=231200.0, ans=0.125 +2024-09-17 13:36:54,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.07 vs. limit=10.0 +2024-09-17 13:37:22,905 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.97 vs. limit=15.0 +2024-09-17 13:37:28,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=231280.0, ans=0.05 +2024-09-17 13:37:41,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=231320.0, ans=0.125 +2024-09-17 13:37:58,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.77 vs. limit=15.0 +2024-09-17 13:37:59,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=231400.0, ans=0.0 +2024-09-17 13:38:00,813 INFO [train.py:1198] (1/2) Epoch 13, batch 3550, loss[loss=0.2689, ctc_loss=0.1649, cr_loss=0.4161, attn_decoder_loss=0.2712, over 29684.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.1559, cr_loss=0.3952, attn_decoder_loss=0.2594, over 5782942.72 frames. ], batch size: 89, lr: 8.54e-03, grad_scale: 8.0 +2024-09-17 13:38:01,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=231400.0, ans=0.1 +2024-09-17 13:38:12,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=231400.0, ans=0.125 +2024-09-17 13:38:42,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=231480.0, ans=0.1 +2024-09-17 13:38:57,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=231520.0, ans=0.025 +2024-09-17 13:39:19,465 INFO [train.py:1198] (1/2) Epoch 13, batch 3600, loss[loss=0.2426, ctc_loss=0.1351, cr_loss=0.3918, attn_decoder_loss=0.2458, over 29494.00 frames. ], tot_loss[loss=0.2571, ctc_loss=0.1558, cr_loss=0.3954, attn_decoder_loss=0.2596, over 5791881.76 frames. ], batch size: 77, lr: 8.53e-03, grad_scale: 16.0 +2024-09-17 13:39:24,011 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 8.866e+01 9.672e+01 1.060e+02 2.375e+02, threshold=1.934e+02, percent-clipped=1.0 +2024-09-17 13:39:25,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=231600.0, ans=0.0 +2024-09-17 13:39:36,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=231640.0, ans=0.125 +2024-09-17 13:39:45,827 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.85 vs. limit=15.0 +2024-09-17 13:40:13,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=231720.0, ans=0.125 +2024-09-17 13:40:15,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=231720.0, ans=0.125 +2024-09-17 13:40:15,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=231720.0, ans=0.125 +2024-09-17 13:40:17,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.89 vs. limit=22.5 +2024-09-17 13:40:34,312 INFO [train.py:1198] (1/2) Epoch 13, batch 3650, loss[loss=0.2853, ctc_loss=0.1759, cr_loss=0.4362, attn_decoder_loss=0.2878, over 29488.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1554, cr_loss=0.3948, attn_decoder_loss=0.2591, over 5794427.62 frames. ], batch size: 90, lr: 8.53e-03, grad_scale: 8.0 +2024-09-17 13:40:41,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=231800.0, ans=0.2 +2024-09-17 13:40:46,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=231800.0, ans=0.015 +2024-09-17 13:40:50,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=231840.0, ans=0.0 +2024-09-17 13:40:52,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=231840.0, ans=0.125 +2024-09-17 13:40:52,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=231840.0, ans=0.1 +2024-09-17 13:41:02,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=231880.0, ans=0.025 +2024-09-17 13:41:07,944 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.39 vs. limit=10.0 +2024-09-17 13:41:11,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=231880.0, ans=0.125 +2024-09-17 13:41:22,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=231920.0, ans=0.125 +2024-09-17 13:41:30,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=231920.0, ans=0.125 +2024-09-17 13:41:38,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=231960.0, ans=0.125 +2024-09-17 13:41:48,930 INFO [train.py:1198] (1/2) Epoch 13, batch 3700, loss[loss=0.2513, ctc_loss=0.1473, cr_loss=0.3715, attn_decoder_loss=0.2546, over 29705.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.1555, cr_loss=0.3956, attn_decoder_loss=0.2594, over 5804158.65 frames. ], batch size: 84, lr: 8.53e-03, grad_scale: 8.0 +2024-09-17 13:41:53,428 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.974e+01 9.152e+01 9.843e+01 1.065e+02 3.437e+02, threshold=1.969e+02, percent-clipped=3.0 +2024-09-17 13:41:54,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.60 vs. limit=22.5 +2024-09-17 13:42:02,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=232040.0, ans=0.0 +2024-09-17 13:42:14,993 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.30 vs. limit=22.5 +2024-09-17 13:42:19,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.13 vs. limit=15.0 +2024-09-17 13:42:27,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.95 vs. limit=15.0 +2024-09-17 13:42:29,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=232080.0, ans=0.125 +2024-09-17 13:42:37,392 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.37 vs. limit=15.0 +2024-09-17 13:42:40,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=232120.0, ans=0.015 +2024-09-17 13:42:42,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=232120.0, ans=0.125 +2024-09-17 13:42:45,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=232120.0, ans=0.0 +2024-09-17 13:42:47,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.05 vs. limit=10.0 +2024-09-17 13:42:52,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=232160.0, ans=0.125 +2024-09-17 13:42:55,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=232160.0, ans=0.1 +2024-09-17 13:43:03,015 INFO [train.py:1198] (1/2) Epoch 13, batch 3750, loss[loss=0.2239, ctc_loss=0.1375, cr_loss=0.3467, attn_decoder_loss=0.2258, over 29375.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.1553, cr_loss=0.3949, attn_decoder_loss=0.259, over 5807208.90 frames. ], batch size: 67, lr: 8.52e-03, grad_scale: 4.0 +2024-09-17 13:43:03,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=232200.0, ans=0.125 +2024-09-17 13:43:26,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=232240.0, ans=0.125 +2024-09-17 13:43:28,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=12.0 +2024-09-17 13:43:32,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=232280.0, ans=0.1 +2024-09-17 13:43:38,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=232280.0, ans=0.125 +2024-09-17 13:43:58,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=232320.0, ans=0.0 +2024-09-17 13:44:03,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.55 vs. limit=10.0 +2024-09-17 13:44:17,437 INFO [train.py:1198] (1/2) Epoch 13, batch 3800, loss[loss=0.2567, ctc_loss=0.1447, cr_loss=0.3688, attn_decoder_loss=0.261, over 29638.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1551, cr_loss=0.3945, attn_decoder_loss=0.2587, over 5799024.71 frames. ], batch size: 86, lr: 8.52e-03, grad_scale: 8.0 +2024-09-17 13:44:17,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=232400.0, ans=0.0 +2024-09-17 13:44:23,390 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.990e+01 9.154e+01 9.685e+01 1.039e+02 2.233e+02, threshold=1.937e+02, percent-clipped=1.0 +2024-09-17 13:44:25,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=232400.0, ans=0.0 +2024-09-17 13:45:12,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.26 vs. limit=10.0 +2024-09-17 13:45:14,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=232520.0, ans=0.125 +2024-09-17 13:45:35,422 INFO [train.py:1198] (1/2) Epoch 13, batch 3850, loss[loss=0.2731, ctc_loss=0.1709, cr_loss=0.4175, attn_decoder_loss=0.2751, over 29264.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1551, cr_loss=0.3946, attn_decoder_loss=0.2587, over 5813917.37 frames. ], batch size: 100, lr: 8.52e-03, grad_scale: 4.0 +2024-09-17 13:45:40,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=232600.0, ans=0.0 +2024-09-17 13:46:08,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=232680.0, ans=0.125 +2024-09-17 13:46:19,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.08 vs. limit=15.0 +2024-09-17 13:46:35,446 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 13:46:44,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=232760.0, ans=0.0 +2024-09-17 13:46:45,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=232760.0, ans=0.125 +2024-09-17 13:46:50,400 INFO [train.py:1198] (1/2) Epoch 13, batch 3900, loss[loss=0.2712, ctc_loss=0.1652, cr_loss=0.4077, attn_decoder_loss=0.2739, over 29651.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1558, cr_loss=0.3958, attn_decoder_loss=0.2595, over 5817632.56 frames. ], batch size: 86, lr: 8.51e-03, grad_scale: 8.0 +2024-09-17 13:46:57,792 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.883e+01 8.946e+01 9.477e+01 1.034e+02 1.292e+02, threshold=1.895e+02, percent-clipped=0.0 +2024-09-17 13:47:11,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=232840.0, ans=0.0 +2024-09-17 13:47:26,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.97 vs. limit=15.0 +2024-09-17 13:47:30,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=232880.0, ans=0.0 +2024-09-17 13:47:34,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.60 vs. limit=15.0 +2024-09-17 13:47:36,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=232920.0, ans=0.025 +2024-09-17 13:47:36,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=232920.0, ans=0.125 +2024-09-17 13:47:42,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=232920.0, ans=0.2 +2024-09-17 13:47:56,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.44 vs. limit=15.0 +2024-09-17 13:48:04,563 INFO [train.py:1198] (1/2) Epoch 13, batch 3950, loss[loss=0.2697, ctc_loss=0.1632, cr_loss=0.4068, attn_decoder_loss=0.2724, over 29490.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.155, cr_loss=0.3949, attn_decoder_loss=0.2593, over 5836878.46 frames. ], batch size: 97, lr: 8.51e-03, grad_scale: 8.0 +2024-09-17 13:48:04,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=233000.0, ans=0.125 +2024-09-17 13:48:31,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=233040.0, ans=0.1 +2024-09-17 13:48:36,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.13 vs. limit=22.5 +2024-09-17 13:48:40,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=233080.0, ans=0.125 +2024-09-17 13:49:03,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=233160.0, ans=10.0 +2024-09-17 13:49:12,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=233160.0, ans=0.125 +2024-09-17 13:49:15,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=233160.0, ans=0.125 +2024-09-17 13:49:18,346 INFO [train.py:1198] (1/2) Epoch 13, batch 4000, loss[loss=0.2494, ctc_loss=0.1425, cr_loss=0.3638, attn_decoder_loss=0.2532, over 29511.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.1553, cr_loss=0.3947, attn_decoder_loss=0.2594, over 5814717.45 frames. ], batch size: 74, lr: 8.51e-03, grad_scale: 16.0 +2024-09-17 13:49:25,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=233200.0, ans=0.2 +2024-09-17 13:49:26,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-17 13:49:27,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.753e+01 9.222e+01 9.816e+01 1.053e+02 2.750e+02, threshold=1.963e+02, percent-clipped=1.0 +2024-09-17 13:49:34,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=233240.0, ans=0.2 +2024-09-17 13:49:38,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=233240.0, ans=0.0 +2024-09-17 13:49:43,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=233240.0, ans=0.1 +2024-09-17 13:49:45,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=233240.0, ans=0.125 +2024-09-17 13:49:48,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-17 13:49:49,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=233280.0, ans=0.125 +2024-09-17 13:50:10,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=233320.0, ans=0.125 +2024-09-17 13:50:21,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=233360.0, ans=0.0 +2024-09-17 13:50:35,262 INFO [train.py:1198] (1/2) Epoch 13, batch 4050, loss[loss=0.292, ctc_loss=0.2017, cr_loss=0.4208, attn_decoder_loss=0.2926, over 20917.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1555, cr_loss=0.3944, attn_decoder_loss=0.2595, over 5798183.91 frames. ], batch size: 209, lr: 8.50e-03, grad_scale: 8.0 +2024-09-17 13:50:35,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=233400.0, ans=0.125 +2024-09-17 13:50:37,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=233400.0, ans=0.05 +2024-09-17 13:50:40,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=233400.0, ans=0.2 +2024-09-17 13:50:58,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=233440.0, ans=0.025 +2024-09-17 13:51:12,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=233480.0, ans=0.0 +2024-09-17 13:51:14,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.01 vs. limit=15.0 +2024-09-17 13:51:16,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=233480.0, ans=0.125 +2024-09-17 13:51:21,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=233520.0, ans=0.1 +2024-09-17 13:51:49,529 INFO [train.py:1198] (1/2) Epoch 13, batch 4100, loss[loss=0.2768, ctc_loss=0.1707, cr_loss=0.4321, attn_decoder_loss=0.279, over 29481.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1559, cr_loss=0.3954, attn_decoder_loss=0.2598, over 5792884.52 frames. ], batch size: 90, lr: 8.50e-03, grad_scale: 8.0 +2024-09-17 13:51:49,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=233600.0, ans=0.025 +2024-09-17 13:51:51,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=233600.0, ans=0.125 +2024-09-17 13:51:55,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=233600.0, ans=0.125 +2024-09-17 13:51:59,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.30 vs. limit=15.0 +2024-09-17 13:51:59,586 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.947e+01 9.234e+01 9.794e+01 1.124e+02 2.298e+02, threshold=1.959e+02, percent-clipped=3.0 +2024-09-17 13:52:04,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=233640.0, ans=0.0 +2024-09-17 13:52:12,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=233640.0, ans=0.125 +2024-09-17 13:52:16,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.56 vs. limit=22.5 +2024-09-17 13:52:32,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=233720.0, ans=0.125 +2024-09-17 13:53:02,943 INFO [train.py:1198] (1/2) Epoch 13, batch 4150, loss[loss=0.2451, ctc_loss=0.1432, cr_loss=0.3796, attn_decoder_loss=0.2479, over 29498.00 frames. ], tot_loss[loss=0.2567, ctc_loss=0.1557, cr_loss=0.3946, attn_decoder_loss=0.2592, over 5798252.46 frames. ], batch size: 77, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:53:04,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=233800.0, ans=0.125 +2024-09-17 13:53:24,573 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.51 vs. limit=15.0 +2024-09-17 13:53:33,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=233880.0, ans=0.125 +2024-09-17 13:53:36,217 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.71 vs. limit=15.0 +2024-09-17 13:54:18,795 INFO [train.py:1198] (1/2) Epoch 13, batch 4200, loss[loss=0.2799, ctc_loss=0.179, cr_loss=0.4499, attn_decoder_loss=0.2811, over 29527.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1558, cr_loss=0.3952, attn_decoder_loss=0.2598, over 5799943.19 frames. ], batch size: 90, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:54:25,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=234000.0, ans=0.1 +2024-09-17 13:54:30,792 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.689e+01 8.618e+01 9.139e+01 9.691e+01 3.040e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-17 13:55:01,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=234120.0, ans=0.125 +2024-09-17 13:55:08,193 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-17 13:55:12,736 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.53 vs. limit=6.0 +2024-09-17 13:55:13,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=234120.0, ans=0.125 +2024-09-17 13:55:19,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=234160.0, ans=0.2 +2024-09-17 13:55:22,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=234160.0, ans=0.025 +2024-09-17 13:55:22,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=234160.0, ans=0.125 +2024-09-17 13:55:32,367 INFO [train.py:1198] (1/2) Epoch 13, batch 4250, loss[loss=0.2451, ctc_loss=0.1457, cr_loss=0.3833, attn_decoder_loss=0.2477, over 29497.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1556, cr_loss=0.3947, attn_decoder_loss=0.2602, over 5805455.50 frames. ], batch size: 74, lr: 8.49e-03, grad_scale: 8.0 +2024-09-17 13:55:53,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.72 vs. limit=22.5 +2024-09-17 13:55:57,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=234240.0, ans=0.0 +2024-09-17 13:56:01,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=234280.0, ans=0.125 +2024-09-17 13:56:02,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=234280.0, ans=0.1 +2024-09-17 13:56:19,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=234320.0, ans=0.0 +2024-09-17 13:56:32,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=234360.0, ans=0.0 +2024-09-17 13:56:35,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=234360.0, ans=0.125 +2024-09-17 13:56:46,320 INFO [train.py:1198] (1/2) Epoch 13, batch 4300, loss[loss=0.2754, ctc_loss=0.1689, cr_loss=0.423, attn_decoder_loss=0.2778, over 29530.00 frames. ], tot_loss[loss=0.2581, ctc_loss=0.156, cr_loss=0.3951, attn_decoder_loss=0.2606, over 5793931.08 frames. ], batch size: 87, lr: 8.48e-03, grad_scale: 8.0 +2024-09-17 13:56:46,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=234400.0, ans=0.1 +2024-09-17 13:56:47,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.66 vs. limit=6.0 +2024-09-17 13:56:58,270 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.092e+01 9.409e+01 9.956e+01 1.092e+02 6.321e+02, threshold=1.991e+02, percent-clipped=4.0 +2024-09-17 13:57:35,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=234520.0, ans=0.125 +2024-09-17 13:57:55,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=234560.0, ans=0.0 +2024-09-17 13:58:02,350 INFO [train.py:1198] (1/2) Epoch 13, batch 4350, loss[loss=0.2697, ctc_loss=0.1585, cr_loss=0.3909, attn_decoder_loss=0.2734, over 29450.00 frames. ], tot_loss[loss=0.2618, ctc_loss=0.1591, cr_loss=0.4011, attn_decoder_loss=0.2643, over 5796384.45 frames. ], batch size: 97, lr: 8.48e-03, grad_scale: 8.0 +2024-09-17 13:58:04,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=234600.0, ans=0.025 +2024-09-17 13:58:14,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=234600.0, ans=0.1 +2024-09-17 13:58:26,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=234640.0, ans=0.125 +2024-09-17 13:58:42,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.51 vs. limit=15.0 +2024-09-17 13:58:43,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=234680.0, ans=0.025 +2024-09-17 13:58:49,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.87 vs. limit=10.0 +2024-09-17 13:58:56,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=234720.0, ans=0.125 +2024-09-17 13:58:56,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=234720.0, ans=0.1 +2024-09-17 13:58:56,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=234720.0, ans=0.2 +2024-09-17 13:59:15,467 INFO [train.py:1198] (1/2) Epoch 13, batch 4400, loss[loss=0.2755, ctc_loss=0.1799, cr_loss=0.4252, attn_decoder_loss=0.2767, over 27251.00 frames. ], tot_loss[loss=0.2644, ctc_loss=0.1614, cr_loss=0.4049, attn_decoder_loss=0.2668, over 5765108.21 frames. ], batch size: 124, lr: 8.48e-03, grad_scale: 16.0 +2024-09-17 13:59:15,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=234800.0, ans=0.125 +2024-09-17 13:59:24,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=234800.0, ans=0.125 +2024-09-17 13:59:28,466 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.453e+01 9.581e+01 9.987e+01 1.106e+02 2.626e+02, threshold=1.997e+02, percent-clipped=1.0 +2024-09-17 13:59:56,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=234880.0, ans=0.125 +2024-09-17 14:00:01,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=234920.0, ans=0.125 +2024-09-17 14:00:18,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=234960.0, ans=0.125 +2024-09-17 14:00:23,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=234960.0, ans=0.125 +2024-09-17 14:00:26,621 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-17 14:00:30,054 INFO [train.py:1198] (1/2) Epoch 13, batch 4450, loss[loss=0.2811, ctc_loss=0.2031, cr_loss=0.4236, attn_decoder_loss=0.2804, over 20540.00 frames. ], tot_loss[loss=0.2675, ctc_loss=0.1667, cr_loss=0.4093, attn_decoder_loss=0.2696, over 5572204.63 frames. ], batch size: 209, lr: 8.47e-03, grad_scale: 8.0 +2024-09-17 14:00:36,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=235000.0, ans=0.0 +2024-09-17 14:00:37,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.55 vs. limit=15.0 +2024-09-17 14:00:46,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=235040.0, ans=0.0 +2024-09-17 14:01:00,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=235080.0, ans=15.0 +2024-09-17 14:01:35,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=235160.0, ans=0.125 +2024-09-17 14:01:40,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=235160.0, ans=0.0 +2024-09-17 14:01:46,500 INFO [train.py:1198] (1/2) Epoch 13, batch 4500, loss[loss=0.2753, ctc_loss=0.1838, cr_loss=0.3928, attn_decoder_loss=0.2767, over 19724.00 frames. ], tot_loss[loss=0.2705, ctc_loss=0.1725, cr_loss=0.4107, attn_decoder_loss=0.2722, over 5232207.86 frames. ], batch size: 209, lr: 8.47e-03, grad_scale: 8.0 +2024-09-17 14:01:48,794 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.28 vs. limit=22.5 +2024-09-17 14:02:00,041 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.936e+01 1.022e+02 1.119e+02 1.227e+02 3.439e+02, threshold=2.238e+02, percent-clipped=3.0 +2024-09-17 14:02:07,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=235240.0, ans=0.125 +2024-09-17 14:02:16,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.77 vs. limit=15.0 +2024-09-17 14:02:18,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=235280.0, ans=0.125 +2024-09-17 14:03:16,453 INFO [train.py:1198] (1/2) Epoch 14, batch 0, loss[loss=0.2354, ctc_loss=0.1311, cr_loss=0.3475, attn_decoder_loss=0.2392, over 29611.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1311, cr_loss=0.3475, attn_decoder_loss=0.2392, over 29611.00 frames. ], batch size: 73, lr: 8.16e-03, grad_scale: 16.0 +2024-09-17 14:03:16,454 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 14:03:34,823 INFO [train.py:1230] (1/2) Epoch 14, validation: loss=0.2137, ctc_loss=0.04354, cr_loss=5.325e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-17 14:03:34,823 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 14:03:40,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-17 14:03:42,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=235300.0, ans=0.0 +2024-09-17 14:03:42,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=235300.0, ans=0.0 +2024-09-17 14:03:44,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.68 vs. limit=15.0 +2024-09-17 14:03:47,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=235300.0, ans=0.0 +2024-09-17 14:03:59,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=235340.0, ans=0.0 +2024-09-17 14:04:19,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=235380.0, ans=0.125 +2024-09-17 14:04:26,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.77 vs. limit=6.0 +2024-09-17 14:04:27,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=235420.0, ans=0.1 +2024-09-17 14:04:41,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-17 14:04:46,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.99 vs. limit=15.0 +2024-09-17 14:04:50,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=235460.0, ans=0.0 +2024-09-17 14:04:52,730 INFO [train.py:1198] (1/2) Epoch 14, batch 50, loss[loss=0.2249, ctc_loss=0.1302, cr_loss=0.3356, attn_decoder_loss=0.2279, over 29416.00 frames. ], tot_loss[loss=0.2586, ctc_loss=0.1584, cr_loss=0.398, attn_decoder_loss=0.2609, over 1267789.34 frames. ], batch size: 70, lr: 8.16e-03, grad_scale: 8.0 +2024-09-17 14:05:05,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=235500.0, ans=0.125 +2024-09-17 14:05:09,776 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:05:17,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=235540.0, ans=0.125 +2024-09-17 14:05:27,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=235580.0, ans=0.025 +2024-09-17 14:05:40,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=235620.0, ans=0.0 +2024-09-17 14:05:45,792 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.961e+01 9.193e+01 1.002e+02 1.099e+02 2.018e+02, threshold=2.003e+02, percent-clipped=0.0 +2024-09-17 14:05:50,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=235620.0, ans=0.0 +2024-09-17 14:05:56,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=235660.0, ans=0.125 +2024-09-17 14:05:57,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.74 vs. limit=15.0 +2024-09-17 14:06:04,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=235660.0, ans=0.125 +2024-09-17 14:06:04,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=235660.0, ans=0.125 +2024-09-17 14:06:08,489 INFO [train.py:1198] (1/2) Epoch 14, batch 100, loss[loss=0.2587, ctc_loss=0.1601, cr_loss=0.43, attn_decoder_loss=0.2601, over 29531.00 frames. ], tot_loss[loss=0.2608, ctc_loss=0.1597, cr_loss=0.4002, attn_decoder_loss=0.2631, over 2252593.29 frames. ], batch size: 76, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:06:29,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.12 vs. limit=15.0 +2024-09-17 14:06:36,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=235740.0, ans=0.0 +2024-09-17 14:06:42,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=235780.0, ans=0.125 +2024-09-17 14:06:45,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=235780.0, ans=0.07 +2024-09-17 14:06:50,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=235780.0, ans=0.2 +2024-09-17 14:06:57,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=235820.0, ans=0.0 +2024-09-17 14:07:04,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=235820.0, ans=0.125 +2024-09-17 14:07:07,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=235820.0, ans=0.125 +2024-09-17 14:07:25,372 INFO [train.py:1198] (1/2) Epoch 14, batch 150, loss[loss=0.2375, ctc_loss=0.1443, cr_loss=0.3841, attn_decoder_loss=0.2393, over 29426.00 frames. ], tot_loss[loss=0.2584, ctc_loss=0.1569, cr_loss=0.3974, attn_decoder_loss=0.2608, over 3046777.01 frames. ], batch size: 70, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:07:33,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=235900.0, ans=0.125 +2024-09-17 14:08:05,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.37 vs. limit=15.0 +2024-09-17 14:08:09,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.40 vs. limit=15.0 +2024-09-17 14:08:20,476 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 9.181e+01 9.587e+01 1.009e+02 1.798e+02, threshold=1.917e+02, percent-clipped=0.0 +2024-09-17 14:08:26,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=236060.0, ans=0.0 +2024-09-17 14:08:32,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=236060.0, ans=0.125 +2024-09-17 14:08:32,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=236060.0, ans=0.125 +2024-09-17 14:08:37,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=236060.0, ans=0.04949747468305833 +2024-09-17 14:08:43,261 INFO [train.py:1198] (1/2) Epoch 14, batch 200, loss[loss=0.2762, ctc_loss=0.1709, cr_loss=0.4294, attn_decoder_loss=0.2784, over 27289.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1559, cr_loss=0.397, attn_decoder_loss=0.2599, over 3658736.72 frames. ], batch size: 124, lr: 8.15e-03, grad_scale: 8.0 +2024-09-17 14:09:12,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=236180.0, ans=0.125 +2024-09-17 14:09:16,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=236180.0, ans=0.0 +2024-09-17 14:09:58,994 INFO [train.py:1198] (1/2) Epoch 14, batch 250, loss[loss=0.2733, ctc_loss=0.1692, cr_loss=0.4231, attn_decoder_loss=0.2755, over 29255.00 frames. ], tot_loss[loss=0.2573, ctc_loss=0.1556, cr_loss=0.3959, attn_decoder_loss=0.2598, over 4139859.98 frames. ], batch size: 100, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:10:00,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=236300.0, ans=0.2 +2024-09-17 14:10:20,672 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:10:41,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.50 vs. limit=15.0 +2024-09-17 14:10:54,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.995e+01 9.389e+01 1.000e+02 1.684e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 14:11:13,227 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.48 vs. limit=6.0 +2024-09-17 14:11:17,011 INFO [train.py:1198] (1/2) Epoch 14, batch 300, loss[loss=0.2795, ctc_loss=0.1734, cr_loss=0.4128, attn_decoder_loss=0.2821, over 29515.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.1548, cr_loss=0.3952, attn_decoder_loss=0.2594, over 4508262.32 frames. ], batch size: 92, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:11:18,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=236500.0, ans=0.125 +2024-09-17 14:11:26,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=236500.0, ans=0.125 +2024-09-17 14:12:05,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=236620.0, ans=0.0 +2024-09-17 14:12:25,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.34 vs. limit=15.0 +2024-09-17 14:12:33,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=236700.0, ans=0.125 +2024-09-17 14:12:35,072 INFO [train.py:1198] (1/2) Epoch 14, batch 350, loss[loss=0.227, ctc_loss=0.1252, cr_loss=0.3476, attn_decoder_loss=0.2306, over 29309.00 frames. ], tot_loss[loss=0.2576, ctc_loss=0.1555, cr_loss=0.3969, attn_decoder_loss=0.2601, over 4794081.55 frames. ], batch size: 71, lr: 8.14e-03, grad_scale: 8.0 +2024-09-17 14:12:44,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=236700.0, ans=0.125 +2024-09-17 14:12:47,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=236700.0, ans=0.0 +2024-09-17 14:13:08,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=236780.0, ans=0.125 +2024-09-17 14:13:08,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=236780.0, ans=0.025 +2024-09-17 14:13:28,291 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.698e+01 9.344e+01 1.025e+02 1.871e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-17 14:13:40,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=236860.0, ans=0.95 +2024-09-17 14:13:40,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=236860.0, ans=0.0 +2024-09-17 14:13:48,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.46 vs. limit=22.5 +2024-09-17 14:13:49,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=236900.0, ans=0.0 +2024-09-17 14:13:49,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=236900.0, ans=0.125 +2024-09-17 14:13:50,804 INFO [train.py:1198] (1/2) Epoch 14, batch 400, loss[loss=0.2606, ctc_loss=0.1537, cr_loss=0.3935, attn_decoder_loss=0.2637, over 29700.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1553, cr_loss=0.3961, attn_decoder_loss=0.2597, over 5024802.06 frames. ], batch size: 82, lr: 8.13e-03, grad_scale: 16.0 +2024-09-17 14:13:54,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=236900.0, ans=0.0 +2024-09-17 14:14:01,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=236900.0, ans=0.025 +2024-09-17 14:14:03,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=236900.0, ans=0.5 +2024-09-17 14:14:15,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=236940.0, ans=0.0 +2024-09-17 14:14:44,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=237020.0, ans=0.125 +2024-09-17 14:15:08,974 INFO [train.py:1198] (1/2) Epoch 14, batch 450, loss[loss=0.2657, ctc_loss=0.1569, cr_loss=0.3967, attn_decoder_loss=0.2689, over 29672.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1549, cr_loss=0.3956, attn_decoder_loss=0.2595, over 5185929.56 frames. ], batch size: 83, lr: 8.13e-03, grad_scale: 8.0 +2024-09-17 14:15:24,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=237140.0, ans=0.125 +2024-09-17 14:15:27,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=237140.0, ans=0.5 +2024-09-17 14:15:33,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=237140.0, ans=0.125 +2024-09-17 14:15:55,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=237220.0, ans=0.1 +2024-09-17 14:15:58,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=237220.0, ans=0.2 +2024-09-17 14:16:04,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=237220.0, ans=0.125 +2024-09-17 14:16:05,940 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.900e+01 9.763e+01 1.081e+02 1.650e+02, threshold=1.953e+02, percent-clipped=0.0 +2024-09-17 14:16:18,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=237260.0, ans=0.0 +2024-09-17 14:16:27,064 INFO [train.py:1198] (1/2) Epoch 14, batch 500, loss[loss=0.2695, ctc_loss=0.1618, cr_loss=0.3953, attn_decoder_loss=0.2727, over 29372.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1543, cr_loss=0.3954, attn_decoder_loss=0.2588, over 5328985.44 frames. ], batch size: 94, lr: 8.13e-03, grad_scale: 8.0 +2024-09-17 14:16:31,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=237300.0, ans=0.0 +2024-09-17 14:16:35,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.85 vs. limit=15.0 +2024-09-17 14:16:36,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=237300.0, ans=0.1 +2024-09-17 14:16:39,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=237300.0, ans=0.0 +2024-09-17 14:16:48,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=237340.0, ans=0.0 +2024-09-17 14:17:06,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=237380.0, ans=0.0 +2024-09-17 14:17:42,705 INFO [train.py:1198] (1/2) Epoch 14, batch 550, loss[loss=0.2554, ctc_loss=0.1402, cr_loss=0.3644, attn_decoder_loss=0.2601, over 28877.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1546, cr_loss=0.3954, attn_decoder_loss=0.2589, over 5421250.49 frames. ], batch size: 104, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:17:47,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=237500.0, ans=0.125 +2024-09-17 14:18:13,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=237580.0, ans=0.0 +2024-09-17 14:18:40,101 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.963e+01 9.623e+01 1.012e+02 2.800e+02, threshold=1.925e+02, percent-clipped=3.0 +2024-09-17 14:18:40,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=237620.0, ans=0.0 +2024-09-17 14:18:57,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=237660.0, ans=0.125 +2024-09-17 14:18:58,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=237660.0, ans=0.1 +2024-09-17 14:19:00,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=237700.0, ans=0.125 +2024-09-17 14:19:01,495 INFO [train.py:1198] (1/2) Epoch 14, batch 600, loss[loss=0.2778, ctc_loss=0.1772, cr_loss=0.4299, attn_decoder_loss=0.2795, over 29246.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1547, cr_loss=0.3954, attn_decoder_loss=0.2593, over 5507649.85 frames. ], batch size: 100, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:19:19,841 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:19:22,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.13 vs. limit=22.5 +2024-09-17 14:19:32,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.07 vs. limit=15.0 +2024-09-17 14:19:34,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=237780.0, ans=0.04949747468305833 +2024-09-17 14:19:40,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=237780.0, ans=0.1 +2024-09-17 14:19:40,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=237780.0, ans=0.125 +2024-09-17 14:19:57,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.61 vs. limit=12.0 +2024-09-17 14:20:06,051 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.06 vs. limit=15.0 +2024-09-17 14:20:19,157 INFO [train.py:1198] (1/2) Epoch 14, batch 650, loss[loss=0.2565, ctc_loss=0.1584, cr_loss=0.3895, attn_decoder_loss=0.2587, over 29771.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1537, cr_loss=0.3938, attn_decoder_loss=0.2586, over 5585572.59 frames. ], batch size: 81, lr: 8.12e-03, grad_scale: 8.0 +2024-09-17 14:20:51,324 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:20:57,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=237980.0, ans=0.07 +2024-09-17 14:21:11,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=238020.0, ans=0.0 +2024-09-17 14:21:13,715 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.771e+01 9.255e+01 1.013e+02 1.766e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-17 14:21:24,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=238060.0, ans=0.05 +2024-09-17 14:21:34,763 INFO [train.py:1198] (1/2) Epoch 14, batch 700, loss[loss=0.2504, ctc_loss=0.1567, cr_loss=0.4038, attn_decoder_loss=0.2518, over 29519.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.154, cr_loss=0.3943, attn_decoder_loss=0.2591, over 5637000.16 frames. ], batch size: 76, lr: 8.11e-03, grad_scale: 8.0 +2024-09-17 14:21:44,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=238100.0, ans=0.125 +2024-09-17 14:22:08,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=238180.0, ans=0.0 +2024-09-17 14:22:09,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=238180.0, ans=0.1 +2024-09-17 14:22:13,538 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.76 vs. limit=15.0 +2024-09-17 14:22:37,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=238260.0, ans=0.125 +2024-09-17 14:22:52,567 INFO [train.py:1198] (1/2) Epoch 14, batch 750, loss[loss=0.2664, ctc_loss=0.1619, cr_loss=0.4176, attn_decoder_loss=0.2687, over 29723.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.154, cr_loss=0.3944, attn_decoder_loss=0.2587, over 5677255.98 frames. ], batch size: 82, lr: 8.11e-03, grad_scale: 8.0 +2024-09-17 14:23:07,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=238340.0, ans=0.2 +2024-09-17 14:23:12,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=238340.0, ans=0.1 +2024-09-17 14:23:24,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=238380.0, ans=0.1 +2024-09-17 14:23:32,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=238380.0, ans=0.125 +2024-09-17 14:23:35,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=238380.0, ans=0.125 +2024-09-17 14:23:40,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=238420.0, ans=0.0 +2024-09-17 14:23:42,173 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-17 14:23:47,555 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.84 vs. limit=15.0 +2024-09-17 14:23:48,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=238420.0, ans=0.1 +2024-09-17 14:23:49,573 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.719e+01 9.200e+01 9.849e+01 1.104e+02 2.206e+02, threshold=1.970e+02, percent-clipped=2.0 +2024-09-17 14:24:07,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=11.33 vs. limit=15.0 +2024-09-17 14:24:10,897 INFO [train.py:1198] (1/2) Epoch 14, batch 800, loss[loss=0.2295, ctc_loss=0.1296, cr_loss=0.3453, attn_decoder_loss=0.2329, over 29589.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1543, cr_loss=0.395, attn_decoder_loss=0.2588, over 5708390.29 frames. ], batch size: 73, lr: 8.11e-03, grad_scale: 16.0 +2024-09-17 14:24:42,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=238580.0, ans=0.1 +2024-09-17 14:24:50,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=238580.0, ans=0.125 +2024-09-17 14:25:07,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=238620.0, ans=0.125 +2024-09-17 14:25:13,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=238660.0, ans=0.1 +2024-09-17 14:25:20,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=238660.0, ans=0.125 +2024-09-17 14:25:23,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=238660.0, ans=0.2 +2024-09-17 14:25:26,162 INFO [train.py:1198] (1/2) Epoch 14, batch 850, loss[loss=0.2781, ctc_loss=0.1821, cr_loss=0.437, attn_decoder_loss=0.2791, over 29715.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1536, cr_loss=0.3938, attn_decoder_loss=0.2583, over 5737663.69 frames. ], batch size: 89, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:25:41,515 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:25:42,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=238740.0, ans=0.1 +2024-09-17 14:25:42,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=238740.0, ans=0.125 +2024-09-17 14:25:50,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=238740.0, ans=0.1 +2024-09-17 14:25:58,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=238780.0, ans=0.07 +2024-09-17 14:26:01,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=238780.0, ans=10.0 +2024-09-17 14:26:02,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=238780.0, ans=0.125 +2024-09-17 14:26:20,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=15.10 vs. limit=15.0 +2024-09-17 14:26:22,025 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.470e+01 9.039e+01 9.635e+01 1.057e+02 1.739e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 14:26:22,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=238820.0, ans=0.125 +2024-09-17 14:26:27,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=238860.0, ans=0.1 +2024-09-17 14:26:43,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=238900.0, ans=0.0 +2024-09-17 14:26:44,217 INFO [train.py:1198] (1/2) Epoch 14, batch 900, loss[loss=0.2371, ctc_loss=0.1405, cr_loss=0.3659, attn_decoder_loss=0.2397, over 29629.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1545, cr_loss=0.395, attn_decoder_loss=0.259, over 5741993.99 frames. ], batch size: 73, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:26:44,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=238900.0, ans=0.125 +2024-09-17 14:26:52,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=238900.0, ans=0.04949747468305833 +2024-09-17 14:26:53,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=238900.0, ans=0.025 +2024-09-17 14:27:05,746 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:27:08,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=238940.0, ans=0.125 +2024-09-17 14:27:09,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.88 vs. limit=22.5 +2024-09-17 14:27:21,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-17 14:27:28,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=239020.0, ans=0.125 +2024-09-17 14:27:59,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=239060.0, ans=0.1 +2024-09-17 14:28:01,790 INFO [train.py:1198] (1/2) Epoch 14, batch 950, loss[loss=0.2358, ctc_loss=0.1403, cr_loss=0.3648, attn_decoder_loss=0.2382, over 29493.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.1543, cr_loss=0.3948, attn_decoder_loss=0.2591, over 5742391.10 frames. ], batch size: 74, lr: 8.10e-03, grad_scale: 8.0 +2024-09-17 14:28:23,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=239140.0, ans=0.025 +2024-09-17 14:28:26,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=239140.0, ans=0.2 +2024-09-17 14:28:35,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=239180.0, ans=0.125 +2024-09-17 14:28:57,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.51 vs. limit=15.0 +2024-09-17 14:28:58,290 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 9.217e+01 9.958e+01 1.123e+02 9.034e+02, threshold=1.992e+02, percent-clipped=2.0 +2024-09-17 14:29:17,707 INFO [train.py:1198] (1/2) Epoch 14, batch 1000, loss[loss=0.262, ctc_loss=0.1625, cr_loss=0.4037, attn_decoder_loss=0.2641, over 29493.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1552, cr_loss=0.3961, attn_decoder_loss=0.2597, over 5737531.41 frames. ], batch size: 77, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:29:19,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=239300.0, ans=0.125 +2024-09-17 14:29:24,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=239300.0, ans=0.125 +2024-09-17 14:29:26,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=239300.0, ans=0.125 +2024-09-17 14:29:29,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=239300.0, ans=0.125 +2024-09-17 14:29:55,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=239380.0, ans=0.125 +2024-09-17 14:30:03,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=239420.0, ans=0.0 +2024-09-17 14:30:18,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=239460.0, ans=0.125 +2024-09-17 14:30:35,801 INFO [train.py:1198] (1/2) Epoch 14, batch 1050, loss[loss=0.2641, ctc_loss=0.1585, cr_loss=0.4161, attn_decoder_loss=0.2666, over 29668.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1546, cr_loss=0.3948, attn_decoder_loss=0.2589, over 5745785.83 frames. ], batch size: 85, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:30:42,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.02 vs. limit=15.0 +2024-09-17 14:30:53,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.76 vs. limit=22.5 +2024-09-17 14:30:55,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=239540.0, ans=0.0 +2024-09-17 14:31:19,584 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.53 vs. limit=15.0 +2024-09-17 14:31:20,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=239620.0, ans=0.125 +2024-09-17 14:31:24,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=239620.0, ans=0.1 +2024-09-17 14:31:26,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=239620.0, ans=0.0 +2024-09-17 14:31:34,119 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.789e+01 9.469e+01 1.013e+02 1.494e+02, threshold=1.894e+02, percent-clipped=0.0 +2024-09-17 14:31:53,884 INFO [train.py:1198] (1/2) Epoch 14, batch 1100, loss[loss=0.2533, ctc_loss=0.1528, cr_loss=0.3952, attn_decoder_loss=0.2557, over 29457.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1542, cr_loss=0.3943, attn_decoder_loss=0.2585, over 5756991.47 frames. ], batch size: 78, lr: 8.09e-03, grad_scale: 8.0 +2024-09-17 14:32:07,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=239740.0, ans=0.2 +2024-09-17 14:33:09,740 INFO [train.py:1198] (1/2) Epoch 14, batch 1150, loss[loss=0.2486, ctc_loss=0.1431, cr_loss=0.3781, attn_decoder_loss=0.2519, over 29455.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1543, cr_loss=0.3945, attn_decoder_loss=0.2586, over 5754838.57 frames. ], batch size: 78, lr: 8.08e-03, grad_scale: 8.0 +2024-09-17 14:33:42,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=239980.0, ans=0.125 +2024-09-17 14:34:13,793 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.548e+01 9.029e+01 9.820e+01 1.050e+02 2.109e+02, threshold=1.964e+02, percent-clipped=1.0 +2024-09-17 14:34:23,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=240060.0, ans=0.125 +2024-09-17 14:34:26,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=240060.0, ans=0.0 +2024-09-17 14:34:32,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=240060.0, ans=15.0 +2024-09-17 14:34:36,162 INFO [train.py:1198] (1/2) Epoch 14, batch 1200, loss[loss=0.2573, ctc_loss=0.1473, cr_loss=0.3868, attn_decoder_loss=0.261, over 29671.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1551, cr_loss=0.3955, attn_decoder_loss=0.2595, over 5745099.65 frames. ], batch size: 85, lr: 8.08e-03, grad_scale: 16.0 +2024-09-17 14:35:18,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.06 vs. limit=15.0 +2024-09-17 14:35:30,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=240220.0, ans=0.2 +2024-09-17 14:35:39,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=240260.0, ans=0.125 +2024-09-17 14:35:41,067 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:35:41,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=240260.0, ans=0.0 +2024-09-17 14:35:54,248 INFO [train.py:1198] (1/2) Epoch 14, batch 1250, loss[loss=0.2693, ctc_loss=0.1698, cr_loss=0.4292, attn_decoder_loss=0.2708, over 29562.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1552, cr_loss=0.3961, attn_decoder_loss=0.26, over 5773554.34 frames. ], batch size: 92, lr: 8.08e-03, grad_scale: 8.0 +2024-09-17 14:36:11,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=240340.0, ans=0.0 +2024-09-17 14:36:27,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=4.66 vs. limit=15.0 +2024-09-17 14:36:40,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=240420.0, ans=0.1 +2024-09-17 14:36:52,178 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.786e+01 9.275e+01 9.951e+01 3.249e+02, threshold=1.855e+02, percent-clipped=3.0 +2024-09-17 14:37:04,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=240460.0, ans=0.125 +2024-09-17 14:37:09,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=240500.0, ans=0.95 +2024-09-17 14:37:10,334 INFO [train.py:1198] (1/2) Epoch 14, batch 1300, loss[loss=0.266, ctc_loss=0.1651, cr_loss=0.3872, attn_decoder_loss=0.2686, over 28210.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1544, cr_loss=0.3951, attn_decoder_loss=0.2592, over 5777094.82 frames. ], batch size: 111, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:37:27,278 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:37:54,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=240620.0, ans=0.09899494936611666 +2024-09-17 14:38:04,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=240620.0, ans=0.1 +2024-09-17 14:38:24,437 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:38:25,776 INFO [train.py:1198] (1/2) Epoch 14, batch 1350, loss[loss=0.2595, ctc_loss=0.1573, cr_loss=0.4095, attn_decoder_loss=0.2617, over 29762.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1538, cr_loss=0.3951, attn_decoder_loss=0.2589, over 5794203.67 frames. ], batch size: 81, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:38:28,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=240700.0, ans=0.5 +2024-09-17 14:39:09,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=240780.0, ans=0.125 +2024-09-17 14:39:17,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.29 vs. limit=22.5 +2024-09-17 14:39:27,570 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.834e+01 9.288e+01 9.876e+01 1.389e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-17 14:39:33,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=240860.0, ans=0.0 +2024-09-17 14:39:34,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=240860.0, ans=0.0 +2024-09-17 14:39:45,916 INFO [train.py:1198] (1/2) Epoch 14, batch 1400, loss[loss=0.2255, ctc_loss=0.134, cr_loss=0.3677, attn_decoder_loss=0.2275, over 29576.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1538, cr_loss=0.3946, attn_decoder_loss=0.2587, over 5805692.97 frames. ], batch size: 69, lr: 8.07e-03, grad_scale: 8.0 +2024-09-17 14:40:18,421 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-17 14:40:34,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=241020.0, ans=0.0 +2024-09-17 14:40:57,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=241060.0, ans=0.125 +2024-09-17 14:41:00,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.78 vs. limit=12.0 +2024-09-17 14:41:01,383 INFO [train.py:1198] (1/2) Epoch 14, batch 1450, loss[loss=0.2767, ctc_loss=0.172, cr_loss=0.4248, attn_decoder_loss=0.2789, over 29438.00 frames. ], tot_loss[loss=0.2567, ctc_loss=0.1543, cr_loss=0.3959, attn_decoder_loss=0.2593, over 5803338.15 frames. ], batch size: 94, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:41:06,750 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.55 vs. limit=15.0 +2024-09-17 14:41:43,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=241180.0, ans=0.0 +2024-09-17 14:41:58,395 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.760e+01 9.187e+01 9.748e+01 1.026e+02 3.155e+02, threshold=1.950e+02, percent-clipped=2.0 +2024-09-17 14:42:03,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=241260.0, ans=0.125 +2024-09-17 14:42:12,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=241260.0, ans=0.2 +2024-09-17 14:42:12,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=241260.0, ans=0.125 +2024-09-17 14:42:16,801 INFO [train.py:1198] (1/2) Epoch 14, batch 1500, loss[loss=0.2671, ctc_loss=0.1578, cr_loss=0.4009, attn_decoder_loss=0.2703, over 29656.00 frames. ], tot_loss[loss=0.2569, ctc_loss=0.1542, cr_loss=0.3955, attn_decoder_loss=0.2595, over 5803771.76 frames. ], batch size: 86, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:42:39,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=241340.0, ans=0.0 +2024-09-17 14:42:41,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.05 vs. limit=22.5 +2024-09-17 14:42:42,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=241340.0, ans=0.0 +2024-09-17 14:42:43,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=241340.0, ans=0.125 +2024-09-17 14:42:48,122 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:42:56,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.90 vs. limit=15.0 +2024-09-17 14:43:00,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=241380.0, ans=0.125 +2024-09-17 14:43:07,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=13.65 vs. limit=15.0 +2024-09-17 14:43:11,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=241420.0, ans=0.125 +2024-09-17 14:43:13,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=241420.0, ans=0.125 +2024-09-17 14:43:37,274 INFO [train.py:1198] (1/2) Epoch 14, batch 1550, loss[loss=0.2698, ctc_loss=0.1628, cr_loss=0.426, attn_decoder_loss=0.2722, over 29499.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1543, cr_loss=0.3951, attn_decoder_loss=0.2594, over 5781158.44 frames. ], batch size: 90, lr: 8.06e-03, grad_scale: 8.0 +2024-09-17 14:44:11,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.88 vs. limit=15.0 +2024-09-17 14:44:24,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=241620.0, ans=0.125 +2024-09-17 14:44:34,866 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.788e+01 9.004e+01 9.910e+01 1.078e+02 4.071e+02, threshold=1.982e+02, percent-clipped=2.0 +2024-09-17 14:44:53,195 INFO [train.py:1198] (1/2) Epoch 14, batch 1600, loss[loss=0.2654, ctc_loss=0.1569, cr_loss=0.3874, attn_decoder_loss=0.2689, over 29667.00 frames. ], tot_loss[loss=0.2567, ctc_loss=0.1548, cr_loss=0.3953, attn_decoder_loss=0.2593, over 5764821.75 frames. ], batch size: 85, lr: 8.05e-03, grad_scale: 16.0 +2024-09-17 14:45:07,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.60 vs. limit=22.5 +2024-09-17 14:45:23,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=241780.0, ans=0.125 +2024-09-17 14:45:29,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=241780.0, ans=0.025 +2024-09-17 14:45:31,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=241780.0, ans=0.2 +2024-09-17 14:45:33,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:45:40,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=241820.0, ans=0.125 +2024-09-17 14:45:59,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-17 14:46:07,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=241900.0, ans=0.025 +2024-09-17 14:46:08,676 INFO [train.py:1198] (1/2) Epoch 14, batch 1650, loss[loss=0.2798, ctc_loss=0.1706, cr_loss=0.4113, attn_decoder_loss=0.2828, over 29732.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1548, cr_loss=0.395, attn_decoder_loss=0.2593, over 5758585.08 frames. ], batch size: 89, lr: 8.05e-03, grad_scale: 8.0 +2024-09-17 14:46:53,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.43 vs. limit=15.0 +2024-09-17 14:47:12,402 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 8.773e+01 9.391e+01 1.036e+02 1.444e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 14:47:17,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=242060.0, ans=0.125 +2024-09-17 14:47:26,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=242060.0, ans=0.02 +2024-09-17 14:47:27,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=242100.0, ans=0.125 +2024-09-17 14:47:28,929 INFO [train.py:1198] (1/2) Epoch 14, batch 1700, loss[loss=0.2284, ctc_loss=0.1259, cr_loss=0.3505, attn_decoder_loss=0.232, over 29610.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1541, cr_loss=0.3938, attn_decoder_loss=0.2589, over 5780581.18 frames. ], batch size: 69, lr: 8.05e-03, grad_scale: 8.0 +2024-09-17 14:47:58,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=242180.0, ans=0.125 +2024-09-17 14:48:02,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=242180.0, ans=0.04949747468305833 +2024-09-17 14:48:13,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=242220.0, ans=0.125 +2024-09-17 14:48:26,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=242220.0, ans=0.125 +2024-09-17 14:48:29,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=242260.0, ans=0.125 +2024-09-17 14:48:32,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=242260.0, ans=0.125 +2024-09-17 14:48:43,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=242300.0, ans=0.0 +2024-09-17 14:48:44,855 INFO [train.py:1198] (1/2) Epoch 14, batch 1750, loss[loss=0.2299, ctc_loss=0.1304, cr_loss=0.3569, attn_decoder_loss=0.2331, over 29388.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1538, cr_loss=0.394, attn_decoder_loss=0.2586, over 5789017.42 frames. ], batch size: 67, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:48:51,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=242300.0, ans=0.125 +2024-09-17 14:49:00,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=242340.0, ans=0.125 +2024-09-17 14:49:03,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=242340.0, ans=0.0 +2024-09-17 14:49:19,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=242380.0, ans=0.0 +2024-09-17 14:49:44,145 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.412e+01 8.812e+01 9.337e+01 1.025e+02 2.569e+02, threshold=1.867e+02, percent-clipped=1.0 +2024-09-17 14:49:45,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242460.0, ans=0.1 +2024-09-17 14:49:48,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=14.41 vs. limit=15.0 +2024-09-17 14:49:54,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=242460.0, ans=0.0 +2024-09-17 14:50:00,703 INFO [train.py:1198] (1/2) Epoch 14, batch 1800, loss[loss=0.262, ctc_loss=0.1524, cr_loss=0.4132, attn_decoder_loss=0.2649, over 29675.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1539, cr_loss=0.3945, attn_decoder_loss=0.2587, over 5790796.23 frames. ], batch size: 83, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:50:07,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=242500.0, ans=10.0 +2024-09-17 14:50:33,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=242580.0, ans=0.125 +2024-09-17 14:50:45,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=242580.0, ans=0.125 +2024-09-17 14:50:51,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=242620.0, ans=0.025 +2024-09-17 14:50:59,371 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.17 vs. limit=15.0 +2024-09-17 14:51:20,990 INFO [train.py:1198] (1/2) Epoch 14, batch 1850, loss[loss=0.2589, ctc_loss=0.1481, cr_loss=0.3885, attn_decoder_loss=0.2626, over 29654.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1534, cr_loss=0.3939, attn_decoder_loss=0.2585, over 5797314.36 frames. ], batch size: 86, lr: 8.04e-03, grad_scale: 8.0 +2024-09-17 14:51:26,060 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:51:27,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=242700.0, ans=0.0 +2024-09-17 14:51:36,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=242740.0, ans=0.05 +2024-09-17 14:52:01,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=242780.0, ans=10.0 +2024-09-17 14:52:19,865 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.993e+01 9.601e+01 1.027e+02 2.401e+02, threshold=1.920e+02, percent-clipped=1.0 +2024-09-17 14:52:29,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=242860.0, ans=0.05 +2024-09-17 14:52:32,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=242860.0, ans=0.0 +2024-09-17 14:52:36,268 INFO [train.py:1198] (1/2) Epoch 14, batch 1900, loss[loss=0.2794, ctc_loss=0.167, cr_loss=0.4393, attn_decoder_loss=0.2821, over 29701.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1537, cr_loss=0.3948, attn_decoder_loss=0.259, over 5805717.65 frames. ], batch size: 89, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:52:51,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=242940.0, ans=0.125 +2024-09-17 14:53:08,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=242980.0, ans=0.1 +2024-09-17 14:53:08,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=242980.0, ans=0.025 +2024-09-17 14:53:14,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=242980.0, ans=0.025 +2024-09-17 14:53:35,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.44 vs. limit=15.0 +2024-09-17 14:53:36,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=243060.0, ans=0.125 +2024-09-17 14:53:46,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.46 vs. limit=15.0 +2024-09-17 14:53:49,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=243060.0, ans=0.025 +2024-09-17 14:53:51,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=243100.0, ans=0.125 +2024-09-17 14:53:52,713 INFO [train.py:1198] (1/2) Epoch 14, batch 1950, loss[loss=0.2494, ctc_loss=0.1491, cr_loss=0.3832, attn_decoder_loss=0.252, over 29452.00 frames. ], tot_loss[loss=0.2577, ctc_loss=0.1545, cr_loss=0.397, attn_decoder_loss=0.2604, over 5819876.15 frames. ], batch size: 78, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:53:53,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=243100.0, ans=15.0 +2024-09-17 14:54:08,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=243140.0, ans=0.125 +2024-09-17 14:54:12,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=243140.0, ans=0.07 +2024-09-17 14:54:57,845 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.855e+01 9.181e+01 9.574e+01 1.007e+02 1.903e+02, threshold=1.915e+02, percent-clipped=0.0 +2024-09-17 14:55:02,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.34 vs. limit=15.0 +2024-09-17 14:55:13,060 INFO [train.py:1198] (1/2) Epoch 14, batch 2000, loss[loss=0.2361, ctc_loss=0.1465, cr_loss=0.408, attn_decoder_loss=0.237, over 29354.00 frames. ], tot_loss[loss=0.2579, ctc_loss=0.1549, cr_loss=0.3976, attn_decoder_loss=0.2605, over 5796931.29 frames. ], batch size: 67, lr: 8.03e-03, grad_scale: 8.0 +2024-09-17 14:55:30,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=243340.0, ans=10.0 +2024-09-17 14:55:58,952 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 14:56:07,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=243420.0, ans=0.125 +2024-09-17 14:56:29,005 INFO [train.py:1198] (1/2) Epoch 14, batch 2050, loss[loss=0.2312, ctc_loss=0.1285, cr_loss=0.3479, attn_decoder_loss=0.2349, over 29419.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1547, cr_loss=0.3966, attn_decoder_loss=0.2598, over 5788485.26 frames. ], batch size: 70, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:56:36,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=243500.0, ans=0.125 +2024-09-17 14:56:54,112 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.02 vs. limit=15.0 +2024-09-17 14:57:04,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.45 vs. limit=15.0 +2024-09-17 14:57:12,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.47 vs. limit=22.5 +2024-09-17 14:57:19,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.14 vs. limit=10.0 +2024-09-17 14:57:25,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=243620.0, ans=0.125 +2024-09-17 14:57:29,372 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.444e+01 8.883e+01 9.401e+01 1.013e+02 1.488e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 14:57:44,649 INFO [train.py:1198] (1/2) Epoch 14, batch 2100, loss[loss=0.26, ctc_loss=0.1554, cr_loss=0.413, attn_decoder_loss=0.2625, over 29784.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1537, cr_loss=0.3949, attn_decoder_loss=0.2585, over 5801368.28 frames. ], batch size: 81, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:57:44,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=243700.0, ans=0.125 +2024-09-17 14:57:55,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=243700.0, ans=0.125 +2024-09-17 14:58:03,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=243740.0, ans=10.0 +2024-09-17 14:58:36,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=243820.0, ans=0.125 +2024-09-17 14:58:46,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=243820.0, ans=0.05 +2024-09-17 14:58:59,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.29 vs. limit=22.5 +2024-09-17 14:59:00,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=243860.0, ans=0.0 +2024-09-17 14:59:04,626 INFO [train.py:1198] (1/2) Epoch 14, batch 2150, loss[loss=0.2495, ctc_loss=0.1452, cr_loss=0.3914, attn_decoder_loss=0.2524, over 29436.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1525, cr_loss=0.3931, attn_decoder_loss=0.2577, over 5816845.44 frames. ], batch size: 78, lr: 8.02e-03, grad_scale: 8.0 +2024-09-17 14:59:08,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=243900.0, ans=0.1 +2024-09-17 14:59:17,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=243900.0, ans=0.125 +2024-09-17 14:59:43,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=243980.0, ans=0.125 +2024-09-17 14:59:51,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.19 vs. limit=15.0 +2024-09-17 15:00:05,496 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.957e+01 9.631e+01 1.031e+02 4.379e+02, threshold=1.926e+02, percent-clipped=1.0 +2024-09-17 15:00:20,630 INFO [train.py:1198] (1/2) Epoch 14, batch 2200, loss[loss=0.2639, ctc_loss=0.1571, cr_loss=0.4135, attn_decoder_loss=0.2666, over 29621.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1524, cr_loss=0.3928, attn_decoder_loss=0.2578, over 5812772.20 frames. ], batch size: 86, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:00:23,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=244100.0, ans=22.5 +2024-09-17 15:00:27,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=244100.0, ans=0.2 +2024-09-17 15:01:03,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=244180.0, ans=0.125 +2024-09-17 15:01:12,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.93 vs. limit=15.0 +2024-09-17 15:01:13,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.whiten.whitening_limit, batch_count=244220.0, ans=12.0 +2024-09-17 15:01:24,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=244260.0, ans=0.0 +2024-09-17 15:01:36,358 INFO [train.py:1198] (1/2) Epoch 14, batch 2250, loss[loss=0.2642, ctc_loss=0.1523, cr_loss=0.3995, attn_decoder_loss=0.2677, over 29715.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1526, cr_loss=0.3928, attn_decoder_loss=0.2581, over 5812857.08 frames. ], batch size: 82, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:01:37,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=9.96 vs. limit=12.0 +2024-09-17 15:01:38,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.37 vs. limit=15.0 +2024-09-17 15:01:39,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=244300.0, ans=0.125 +2024-09-17 15:01:44,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=244300.0, ans=0.0 +2024-09-17 15:01:48,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=244300.0, ans=0.125 +2024-09-17 15:01:51,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=244340.0, ans=0.125 +2024-09-17 15:01:54,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=244340.0, ans=0.025 +2024-09-17 15:01:55,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.63 vs. limit=22.5 +2024-09-17 15:02:09,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=244380.0, ans=0.125 +2024-09-17 15:02:13,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-17 15:02:31,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=244420.0, ans=0.125 +2024-09-17 15:02:38,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=244420.0, ans=0.0 +2024-09-17 15:02:40,757 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 8.724e+01 9.348e+01 1.021e+02 5.677e+02, threshold=1.870e+02, percent-clipped=2.0 +2024-09-17 15:02:56,094 INFO [train.py:1198] (1/2) Epoch 14, batch 2300, loss[loss=0.2356, ctc_loss=0.1421, cr_loss=0.3679, attn_decoder_loss=0.2378, over 29350.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1521, cr_loss=0.3918, attn_decoder_loss=0.2572, over 5799164.61 frames. ], batch size: 71, lr: 8.01e-03, grad_scale: 8.0 +2024-09-17 15:03:05,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=244500.0, ans=0.0 +2024-09-17 15:03:13,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.74 vs. limit=12.0 +2024-09-17 15:03:27,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=244580.0, ans=0.1 +2024-09-17 15:03:46,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=244620.0, ans=0.125 +2024-09-17 15:03:47,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=244620.0, ans=0.5 +2024-09-17 15:03:59,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=244660.0, ans=0.125 +2024-09-17 15:04:00,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.05 vs. limit=10.0 +2024-09-17 15:04:11,745 INFO [train.py:1198] (1/2) Epoch 14, batch 2350, loss[loss=0.2635, ctc_loss=0.1668, cr_loss=0.4215, attn_decoder_loss=0.2648, over 29685.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1527, cr_loss=0.3935, attn_decoder_loss=0.2575, over 5805381.94 frames. ], batch size: 83, lr: 8.00e-03, grad_scale: 8.0 +2024-09-17 15:04:16,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=244700.0, ans=0.0 +2024-09-17 15:04:19,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=244700.0, ans=0.1 +2024-09-17 15:04:19,825 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.71 vs. limit=15.0 +2024-09-17 15:04:35,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=244740.0, ans=0.0 +2024-09-17 15:04:36,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=244740.0, ans=0.1 +2024-09-17 15:04:40,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=244780.0, ans=0.125 +2024-09-17 15:04:43,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=244780.0, ans=0.07 +2024-09-17 15:04:50,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=244780.0, ans=0.1 +2024-09-17 15:04:57,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=244820.0, ans=0.035 +2024-09-17 15:05:05,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.42 vs. limit=15.0 +2024-09-17 15:05:12,275 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.793e+01 8.941e+01 9.524e+01 1.022e+02 1.702e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-17 15:05:27,597 INFO [train.py:1198] (1/2) Epoch 14, batch 2400, loss[loss=0.2381, ctc_loss=0.1487, cr_loss=0.383, attn_decoder_loss=0.2395, over 29538.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.153, cr_loss=0.3942, attn_decoder_loss=0.2578, over 5809382.44 frames. ], batch size: 76, lr: 8.00e-03, grad_scale: 16.0 +2024-09-17 15:05:42,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=244940.0, ans=0.0 +2024-09-17 15:05:43,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=244940.0, ans=0.025 +2024-09-17 15:06:14,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=245020.0, ans=0.125 +2024-09-17 15:06:45,772 INFO [train.py:1198] (1/2) Epoch 14, batch 2450, loss[loss=0.257, ctc_loss=0.1513, cr_loss=0.384, attn_decoder_loss=0.2602, over 29727.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.154, cr_loss=0.3949, attn_decoder_loss=0.2588, over 5785889.42 frames. ], batch size: 82, lr: 8.00e-03, grad_scale: 4.0 +2024-09-17 15:06:46,721 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-17 15:06:47,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=245100.0, ans=0.125 +2024-09-17 15:07:18,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.76 vs. limit=15.0 +2024-09-17 15:07:49,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.704e+01 8.888e+01 9.584e+01 1.028e+02 5.136e+02, threshold=1.917e+02, percent-clipped=2.0 +2024-09-17 15:08:01,733 INFO [train.py:1198] (1/2) Epoch 14, batch 2500, loss[loss=0.271, ctc_loss=0.1658, cr_loss=0.4228, attn_decoder_loss=0.2733, over 29663.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1542, cr_loss=0.3957, attn_decoder_loss=0.2589, over 5795428.61 frames. ], batch size: 86, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:08:33,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=245380.0, ans=0.1 +2024-09-17 15:08:59,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=245420.0, ans=0.125 +2024-09-17 15:09:18,073 INFO [train.py:1198] (1/2) Epoch 14, batch 2550, loss[loss=0.2334, ctc_loss=0.137, cr_loss=0.3653, attn_decoder_loss=0.236, over 29349.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.1538, cr_loss=0.395, attn_decoder_loss=0.2588, over 5798068.04 frames. ], batch size: 67, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:09:18,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=245500.0, ans=0.0 +2024-09-17 15:09:40,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.18 vs. limit=15.0 +2024-09-17 15:10:00,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=245580.0, ans=0.125 +2024-09-17 15:10:06,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=245620.0, ans=0.125 +2024-09-17 15:10:27,971 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.823e+01 9.211e+01 1.016e+02 2.509e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-17 15:10:35,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=245660.0, ans=0.0 +2024-09-17 15:10:38,540 INFO [train.py:1198] (1/2) Epoch 14, batch 2600, loss[loss=0.2492, ctc_loss=0.1447, cr_loss=0.3808, attn_decoder_loss=0.2523, over 29446.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1539, cr_loss=0.3952, attn_decoder_loss=0.2591, over 5794084.86 frames. ], batch size: 78, lr: 7.99e-03, grad_scale: 8.0 +2024-09-17 15:10:47,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=245700.0, ans=0.125 +2024-09-17 15:10:50,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=245700.0, ans=0.125 +2024-09-17 15:10:51,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.89 vs. limit=15.0 +2024-09-17 15:11:01,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.43 vs. limit=15.0 +2024-09-17 15:11:07,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=245780.0, ans=0.125 +2024-09-17 15:11:11,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=245780.0, ans=0.0 +2024-09-17 15:11:11,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=245780.0, ans=0.0 +2024-09-17 15:11:16,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=245780.0, ans=0.1 +2024-09-17 15:11:24,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=245820.0, ans=0.125 +2024-09-17 15:11:51,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=245860.0, ans=0.0 +2024-09-17 15:11:54,250 INFO [train.py:1198] (1/2) Epoch 14, batch 2650, loss[loss=0.2638, ctc_loss=0.1608, cr_loss=0.3981, attn_decoder_loss=0.2664, over 29280.00 frames. ], tot_loss[loss=0.2567, ctc_loss=0.1538, cr_loss=0.3956, attn_decoder_loss=0.2593, over 5801624.64 frames. ], batch size: 100, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:12:01,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.99 vs. limit=15.0 +2024-09-17 15:12:14,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=245940.0, ans=0.95 +2024-09-17 15:12:15,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=245940.0, ans=10.0 +2024-09-17 15:12:21,171 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.16 vs. limit=22.5 +2024-09-17 15:12:27,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff3.min_abs, batch_count=245980.0, ans=0.2 +2024-09-17 15:12:59,468 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.821e+01 9.405e+01 9.920e+01 1.834e+02, threshold=1.881e+02, percent-clipped=0.0 +2024-09-17 15:13:10,174 INFO [train.py:1198] (1/2) Epoch 14, batch 2700, loss[loss=0.2673, ctc_loss=0.1683, cr_loss=0.4212, attn_decoder_loss=0.2689, over 29547.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.154, cr_loss=0.3957, attn_decoder_loss=0.2595, over 5797499.20 frames. ], batch size: 87, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:13:16,370 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:13:20,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=246100.0, ans=0.125 +2024-09-17 15:13:30,036 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:13:34,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=246140.0, ans=0.04949747468305833 +2024-09-17 15:13:51,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=246180.0, ans=0.125 +2024-09-17 15:13:52,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=246180.0, ans=0.125 +2024-09-17 15:14:08,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=246220.0, ans=0.04949747468305833 +2024-09-17 15:14:10,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=246220.0, ans=0.0 +2024-09-17 15:14:18,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=246260.0, ans=0.125 +2024-09-17 15:14:27,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=246260.0, ans=0.125 +2024-09-17 15:14:30,686 INFO [train.py:1198] (1/2) Epoch 14, batch 2750, loss[loss=0.2499, ctc_loss=0.1432, cr_loss=0.3919, attn_decoder_loss=0.2531, over 29497.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1531, cr_loss=0.3942, attn_decoder_loss=0.2585, over 5796110.93 frames. ], batch size: 75, lr: 7.98e-03, grad_scale: 8.0 +2024-09-17 15:14:31,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=246300.0, ans=0.0 +2024-09-17 15:14:43,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.15 vs. limit=15.0 +2024-09-17 15:15:01,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=246380.0, ans=0.0 +2024-09-17 15:15:04,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=246380.0, ans=0.2 +2024-09-17 15:15:06,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-17 15:15:19,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=246420.0, ans=0.125 +2024-09-17 15:15:28,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=246420.0, ans=0.125 +2024-09-17 15:15:36,354 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.822e+01 9.426e+01 1.011e+02 2.167e+02, threshold=1.885e+02, percent-clipped=1.0 +2024-09-17 15:15:47,142 INFO [train.py:1198] (1/2) Epoch 14, batch 2800, loss[loss=0.2877, ctc_loss=0.203, cr_loss=0.4451, attn_decoder_loss=0.2872, over 20013.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1535, cr_loss=0.3941, attn_decoder_loss=0.2585, over 5777997.30 frames. ], batch size: 210, lr: 7.97e-03, grad_scale: 16.0 +2024-09-17 15:15:48,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=246500.0, ans=0.125 +2024-09-17 15:15:51,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=246500.0, ans=0.09899494936611666 +2024-09-17 15:17:02,336 INFO [train.py:1198] (1/2) Epoch 14, batch 2850, loss[loss=0.2554, ctc_loss=0.151, cr_loss=0.3863, attn_decoder_loss=0.2584, over 29519.00 frames. ], tot_loss[loss=0.2564, ctc_loss=0.1538, cr_loss=0.394, attn_decoder_loss=0.2591, over 5762706.05 frames. ], batch size: 77, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:17:08,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=246700.0, ans=0.125 +2024-09-17 15:17:17,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=246740.0, ans=0.125 +2024-09-17 15:17:28,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=246740.0, ans=0.1 +2024-09-17 15:17:28,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.96 vs. limit=15.0 +2024-09-17 15:17:49,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.69 vs. limit=10.0 +2024-09-17 15:17:49,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.60 vs. limit=6.0 +2024-09-17 15:17:54,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=246820.0, ans=0.0 +2024-09-17 15:18:12,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.83 vs. limit=22.5 +2024-09-17 15:18:13,427 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.023e+01 9.108e+01 9.602e+01 1.037e+02 1.624e+02, threshold=1.920e+02, percent-clipped=0.0 +2024-09-17 15:18:21,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=246900.0, ans=0.125 +2024-09-17 15:18:22,633 INFO [train.py:1198] (1/2) Epoch 14, batch 2900, loss[loss=0.2504, ctc_loss=0.1522, cr_loss=0.4052, attn_decoder_loss=0.2524, over 29440.00 frames. ], tot_loss[loss=0.2572, ctc_loss=0.1542, cr_loss=0.3947, attn_decoder_loss=0.2598, over 5788110.99 frames. ], batch size: 79, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:18:58,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.91 vs. limit=15.0 +2024-09-17 15:19:20,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=247020.0, ans=0.125 +2024-09-17 15:19:29,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=247060.0, ans=0.0 +2024-09-17 15:19:38,653 INFO [train.py:1198] (1/2) Epoch 14, batch 2950, loss[loss=0.2502, ctc_loss=0.1481, cr_loss=0.3945, attn_decoder_loss=0.2528, over 29519.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1531, cr_loss=0.3927, attn_decoder_loss=0.2585, over 5781320.48 frames. ], batch size: 75, lr: 7.97e-03, grad_scale: 8.0 +2024-09-17 15:19:45,485 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=10.27 vs. limit=15.0 +2024-09-17 15:19:57,338 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:19:57,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=247140.0, ans=0.025 +2024-09-17 15:20:15,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=247180.0, ans=0.125 +2024-09-17 15:20:15,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=247180.0, ans=0.2 +2024-09-17 15:20:17,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=247180.0, ans=0.125 +2024-09-17 15:20:31,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=247220.0, ans=0.125 +2024-09-17 15:20:46,134 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.963e+01 9.607e+01 1.034e+02 4.390e+02, threshold=1.921e+02, percent-clipped=3.0 +2024-09-17 15:20:55,396 INFO [train.py:1198] (1/2) Epoch 14, batch 3000, loss[loss=0.2543, ctc_loss=0.144, cr_loss=0.3597, attn_decoder_loss=0.2585, over 29754.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1531, cr_loss=0.3935, attn_decoder_loss=0.2585, over 5782601.35 frames. ], batch size: 81, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:20:55,396 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 15:21:13,887 INFO [train.py:1230] (1/2) Epoch 14, validation: loss=0.212, ctc_loss=0.04343, cr_loss=5.03e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-17 15:21:13,887 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 15:21:28,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=247340.0, ans=0.0 +2024-09-17 15:21:28,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=247340.0, ans=0.125 +2024-09-17 15:21:40,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-17 15:22:34,537 INFO [train.py:1198] (1/2) Epoch 14, batch 3050, loss[loss=0.2463, ctc_loss=0.1425, cr_loss=0.3789, attn_decoder_loss=0.2494, over 29542.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.154, cr_loss=0.3955, attn_decoder_loss=0.2595, over 5776991.26 frames. ], batch size: 76, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:22:55,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=247540.0, ans=0.1 +2024-09-17 15:22:56,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=247540.0, ans=0.125 +2024-09-17 15:23:08,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.74 vs. limit=15.0 +2024-09-17 15:23:12,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=247580.0, ans=0.0 +2024-09-17 15:23:40,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.297e+01 9.179e+01 9.620e+01 1.029e+02 1.592e+02, threshold=1.924e+02, percent-clipped=0.0 +2024-09-17 15:23:49,652 INFO [train.py:1198] (1/2) Epoch 14, batch 3100, loss[loss=0.2735, ctc_loss=0.1657, cr_loss=0.404, attn_decoder_loss=0.2765, over 29298.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.1541, cr_loss=0.3949, attn_decoder_loss=0.2592, over 5777127.31 frames. ], batch size: 100, lr: 7.96e-03, grad_scale: 8.0 +2024-09-17 15:23:53,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.55 vs. limit=15.0 +2024-09-17 15:24:03,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=247740.0, ans=0.1 +2024-09-17 15:24:15,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=247740.0, ans=0.125 +2024-09-17 15:24:20,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=247780.0, ans=10.0 +2024-09-17 15:24:41,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=247820.0, ans=0.2 +2024-09-17 15:24:49,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=247860.0, ans=0.1 +2024-09-17 15:24:55,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=247860.0, ans=0.0 +2024-09-17 15:25:01,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=247860.0, ans=0.07 +2024-09-17 15:25:05,570 INFO [train.py:1198] (1/2) Epoch 14, batch 3150, loss[loss=0.2663, ctc_loss=0.1605, cr_loss=0.4141, attn_decoder_loss=0.2688, over 28830.00 frames. ], tot_loss[loss=0.2563, ctc_loss=0.1535, cr_loss=0.3942, attn_decoder_loss=0.259, over 5783249.21 frames. ], batch size: 104, lr: 7.95e-03, grad_scale: 4.0 +2024-09-17 15:25:35,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=247940.0, ans=0.025 +2024-09-17 15:26:17,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=248060.0, ans=0.125 +2024-09-17 15:26:18,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.880e+01 9.083e+01 9.655e+01 1.039e+02 2.253e+02, threshold=1.931e+02, percent-clipped=1.0 +2024-09-17 15:26:25,935 INFO [train.py:1198] (1/2) Epoch 14, batch 3200, loss[loss=0.2494, ctc_loss=0.1466, cr_loss=0.3945, attn_decoder_loss=0.252, over 29425.00 frames. ], tot_loss[loss=0.2556, ctc_loss=0.1529, cr_loss=0.3929, attn_decoder_loss=0.2583, over 5794016.81 frames. ], batch size: 79, lr: 7.95e-03, grad_scale: 8.0 +2024-09-17 15:26:30,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=248100.0, ans=0.125 +2024-09-17 15:26:47,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=248140.0, ans=0.2 +2024-09-17 15:26:56,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=248180.0, ans=0.0 +2024-09-17 15:27:22,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=248220.0, ans=0.0 +2024-09-17 15:27:42,196 INFO [train.py:1198] (1/2) Epoch 14, batch 3250, loss[loss=0.2784, ctc_loss=0.1702, cr_loss=0.4278, attn_decoder_loss=0.2809, over 29697.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1531, cr_loss=0.3938, attn_decoder_loss=0.2588, over 5800128.76 frames. ], batch size: 84, lr: 7.95e-03, grad_scale: 8.0 +2024-09-17 15:28:31,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=248420.0, ans=10.0 +2024-09-17 15:28:49,497 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.916e+01 9.075e+01 9.653e+01 1.031e+02 3.050e+02, threshold=1.931e+02, percent-clipped=1.0 +2024-09-17 15:28:57,462 INFO [train.py:1198] (1/2) Epoch 14, batch 3300, loss[loss=0.2674, ctc_loss=0.1645, cr_loss=0.4142, attn_decoder_loss=0.2696, over 28503.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1528, cr_loss=0.3936, attn_decoder_loss=0.258, over 5798173.19 frames. ], batch size: 112, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:29:23,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.79 vs. limit=15.0 +2024-09-17 15:29:26,831 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.31 vs. limit=15.0 +2024-09-17 15:29:31,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.69 vs. limit=15.0 +2024-09-17 15:29:33,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=248580.0, ans=0.125 +2024-09-17 15:29:39,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=248580.0, ans=0.125 +2024-09-17 15:29:39,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=248580.0, ans=0.1 +2024-09-17 15:30:03,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=248660.0, ans=0.2 +2024-09-17 15:30:09,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=248660.0, ans=0.125 +2024-09-17 15:30:16,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=248700.0, ans=0.125 +2024-09-17 15:30:17,822 INFO [train.py:1198] (1/2) Epoch 14, batch 3350, loss[loss=0.2643, ctc_loss=0.1494, cr_loss=0.3591, attn_decoder_loss=0.2691, over 28736.00 frames. ], tot_loss[loss=0.2559, ctc_loss=0.1533, cr_loss=0.3936, attn_decoder_loss=0.2586, over 5774849.12 frames. ], batch size: 104, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:30:41,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.17 vs. limit=22.5 +2024-09-17 15:30:41,273 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.16 vs. limit=15.0 +2024-09-17 15:30:51,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=248780.0, ans=0.125 +2024-09-17 15:30:58,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=248780.0, ans=0.5 +2024-09-17 15:31:00,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=248780.0, ans=0.0 +2024-09-17 15:31:02,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=248820.0, ans=0.0 +2024-09-17 15:31:04,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.72 vs. limit=6.0 +2024-09-17 15:31:05,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=248820.0, ans=0.2 +2024-09-17 15:31:11,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=248820.0, ans=0.2 +2024-09-17 15:31:13,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=248820.0, ans=0.2 +2024-09-17 15:31:26,420 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.815e+01 9.128e+01 9.625e+01 1.037e+02 1.571e+02, threshold=1.925e+02, percent-clipped=0.0 +2024-09-17 15:31:34,158 INFO [train.py:1198] (1/2) Epoch 14, batch 3400, loss[loss=0.2333, ctc_loss=0.1319, cr_loss=0.3728, attn_decoder_loss=0.2363, over 29369.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1534, cr_loss=0.3933, attn_decoder_loss=0.2587, over 5768234.91 frames. ], batch size: 67, lr: 7.94e-03, grad_scale: 8.0 +2024-09-17 15:31:34,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=248900.0, ans=0.125 +2024-09-17 15:31:34,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=248900.0, ans=0.125 +2024-09-17 15:32:25,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=249020.0, ans=0.125 +2024-09-17 15:32:26,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=249020.0, ans=0.125 +2024-09-17 15:32:29,235 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:32:30,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=249020.0, ans=0.0 +2024-09-17 15:32:32,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=249020.0, ans=0.125 +2024-09-17 15:32:39,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=249060.0, ans=0.125 +2024-09-17 15:32:41,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=249060.0, ans=0.0 +2024-09-17 15:32:42,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=249060.0, ans=0.0 +2024-09-17 15:32:47,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=249060.0, ans=0.125 +2024-09-17 15:32:50,243 INFO [train.py:1198] (1/2) Epoch 14, batch 3450, loss[loss=0.2667, ctc_loss=0.1581, cr_loss=0.4019, attn_decoder_loss=0.2699, over 28409.00 frames. ], tot_loss[loss=0.2565, ctc_loss=0.1536, cr_loss=0.394, attn_decoder_loss=0.2592, over 5775614.99 frames. ], batch size: 111, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:33:05,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-17 15:33:12,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.10 vs. limit=15.0 +2024-09-17 15:33:29,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=249180.0, ans=0.125 +2024-09-17 15:33:38,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=249220.0, ans=0.125 +2024-09-17 15:33:38,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=249220.0, ans=0.2 +2024-09-17 15:34:03,111 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.438e+01 8.913e+01 9.467e+01 9.956e+01 4.435e+02, threshold=1.893e+02, percent-clipped=2.0 +2024-09-17 15:34:06,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=249260.0, ans=0.0 +2024-09-17 15:34:08,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=249260.0, ans=0.2 +2024-09-17 15:34:10,848 INFO [train.py:1198] (1/2) Epoch 14, batch 3500, loss[loss=0.2282, ctc_loss=0.129, cr_loss=0.3532, attn_decoder_loss=0.2314, over 29316.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1534, cr_loss=0.3941, attn_decoder_loss=0.2586, over 5777243.24 frames. ], batch size: 71, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:34:11,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.49 vs. limit=6.0 +2024-09-17 15:34:23,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=249300.0, ans=0.0 +2024-09-17 15:34:42,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=249380.0, ans=0.1 +2024-09-17 15:34:47,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff2.min_abs, batch_count=249380.0, ans=0.1 +2024-09-17 15:34:47,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=249380.0, ans=0.2 +2024-09-17 15:34:56,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=249420.0, ans=0.1 +2024-09-17 15:34:57,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=249420.0, ans=0.2 +2024-09-17 15:35:12,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=249460.0, ans=0.125 +2024-09-17 15:35:16,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=249460.0, ans=0.1 +2024-09-17 15:35:21,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=249460.0, ans=0.125 +2024-09-17 15:35:24,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=249500.0, ans=0.125 +2024-09-17 15:35:25,499 INFO [train.py:1198] (1/2) Epoch 14, batch 3550, loss[loss=0.268, ctc_loss=0.1577, cr_loss=0.4012, attn_decoder_loss=0.2714, over 29704.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1528, cr_loss=0.3934, attn_decoder_loss=0.2584, over 5782868.08 frames. ], batch size: 89, lr: 7.93e-03, grad_scale: 8.0 +2024-09-17 15:35:27,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=249500.0, ans=0.125 +2024-09-17 15:36:20,293 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.15 vs. limit=22.5 +2024-09-17 15:36:24,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=249660.0, ans=0.1 +2024-09-17 15:36:27,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=249660.0, ans=0.0 +2024-09-17 15:36:33,057 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.867e+01 9.428e+01 1.003e+02 3.029e+02, threshold=1.886e+02, percent-clipped=1.0 +2024-09-17 15:36:39,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=249700.0, ans=0.125 +2024-09-17 15:36:40,442 INFO [train.py:1198] (1/2) Epoch 14, batch 3600, loss[loss=0.2429, ctc_loss=0.1454, cr_loss=0.3692, attn_decoder_loss=0.2455, over 29521.00 frames. ], tot_loss[loss=0.2558, ctc_loss=0.1531, cr_loss=0.3937, attn_decoder_loss=0.2584, over 5793121.65 frames. ], batch size: 77, lr: 7.92e-03, grad_scale: 16.0 +2024-09-17 15:36:43,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=249700.0, ans=0.035 +2024-09-17 15:36:48,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=249700.0, ans=0.0 +2024-09-17 15:37:01,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=249740.0, ans=0.125 +2024-09-17 15:37:21,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.29 vs. limit=10.0 +2024-09-17 15:37:27,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=249820.0, ans=0.07 +2024-09-17 15:37:28,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=249820.0, ans=0.125 +2024-09-17 15:37:46,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=249860.0, ans=0.125 +2024-09-17 15:37:53,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.74 vs. limit=6.0 +2024-09-17 15:37:55,263 INFO [train.py:1198] (1/2) Epoch 14, batch 3650, loss[loss=0.2655, ctc_loss=0.1656, cr_loss=0.3917, attn_decoder_loss=0.2679, over 29530.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.152, cr_loss=0.3925, attn_decoder_loss=0.2577, over 5795341.63 frames. ], batch size: 90, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:37:56,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=249900.0, ans=0.0 +2024-09-17 15:38:05,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=249900.0, ans=0.025 +2024-09-17 15:38:13,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=249940.0, ans=0.2 +2024-09-17 15:38:16,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=249940.0, ans=0.125 +2024-09-17 15:38:39,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=249980.0, ans=0.95 +2024-09-17 15:38:46,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=250020.0, ans=0.1 +2024-09-17 15:38:50,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.63 vs. limit=6.0 +2024-09-17 15:38:58,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=250060.0, ans=0.125 +2024-09-17 15:39:01,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=250060.0, ans=0.2 +2024-09-17 15:39:01,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=250060.0, ans=0.0 +2024-09-17 15:39:05,888 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.885e+01 9.531e+01 1.024e+02 1.907e+02, threshold=1.906e+02, percent-clipped=1.0 +2024-09-17 15:39:09,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=250060.0, ans=0.125 +2024-09-17 15:39:11,991 INFO [train.py:1198] (1/2) Epoch 14, batch 3700, loss[loss=0.2693, ctc_loss=0.1652, cr_loss=0.3945, attn_decoder_loss=0.272, over 29708.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1519, cr_loss=0.3921, attn_decoder_loss=0.2576, over 5805218.93 frames. ], batch size: 84, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:39:16,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=250100.0, ans=0.0 +2024-09-17 15:39:42,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=250180.0, ans=0.125 +2024-09-17 15:40:03,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=250220.0, ans=0.125 +2024-09-17 15:40:16,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=250260.0, ans=0.2 +2024-09-17 15:40:17,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.43 vs. limit=15.0 +2024-09-17 15:40:28,253 INFO [train.py:1198] (1/2) Epoch 14, batch 3750, loss[loss=0.2229, ctc_loss=0.127, cr_loss=0.338, attn_decoder_loss=0.2261, over 29341.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.152, cr_loss=0.3923, attn_decoder_loss=0.2577, over 5809359.21 frames. ], batch size: 67, lr: 7.92e-03, grad_scale: 8.0 +2024-09-17 15:40:35,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=250300.0, ans=0.2 +2024-09-17 15:40:37,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=250300.0, ans=0.125 +2024-09-17 15:40:43,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.63 vs. limit=22.5 +2024-09-17 15:41:02,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=250380.0, ans=0.025 +2024-09-17 15:41:06,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=250380.0, ans=0.1 +2024-09-17 15:41:06,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=250380.0, ans=0.0 +2024-09-17 15:41:09,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=250380.0, ans=0.125 +2024-09-17 15:41:15,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=250420.0, ans=0.125 +2024-09-17 15:41:25,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.70 vs. limit=15.0 +2024-09-17 15:41:37,294 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.832e+01 9.041e+01 9.799e+01 1.080e+02 3.062e+02, threshold=1.960e+02, percent-clipped=2.0 +2024-09-17 15:41:43,359 INFO [train.py:1198] (1/2) Epoch 14, batch 3800, loss[loss=0.2578, ctc_loss=0.1528, cr_loss=0.393, attn_decoder_loss=0.2607, over 29635.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1519, cr_loss=0.3917, attn_decoder_loss=0.2575, over 5799053.55 frames. ], batch size: 86, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:42:10,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=250540.0, ans=0.1 +2024-09-17 15:42:19,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=250580.0, ans=0.0 +2024-09-17 15:42:55,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=250660.0, ans=0.125 +2024-09-17 15:42:57,825 INFO [train.py:1198] (1/2) Epoch 14, batch 3850, loss[loss=0.2677, ctc_loss=0.1547, cr_loss=0.3975, attn_decoder_loss=0.2714, over 29338.00 frames. ], tot_loss[loss=0.2546, ctc_loss=0.1513, cr_loss=0.3911, attn_decoder_loss=0.2574, over 5811447.40 frames. ], batch size: 100, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:43:11,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.82 vs. limit=15.0 +2024-09-17 15:43:20,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=250740.0, ans=0.125 +2024-09-17 15:44:06,765 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.174e+01 8.879e+01 9.449e+01 1.016e+02 1.639e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-17 15:44:14,350 INFO [train.py:1198] (1/2) Epoch 14, batch 3900, loss[loss=0.2582, ctc_loss=0.1484, cr_loss=0.3882, attn_decoder_loss=0.2617, over 29619.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1518, cr_loss=0.3922, attn_decoder_loss=0.2581, over 5816368.22 frames. ], batch size: 86, lr: 7.91e-03, grad_scale: 8.0 +2024-09-17 15:44:15,193 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.55 vs. limit=6.0 +2024-09-17 15:44:22,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=250900.0, ans=0.1 +2024-09-17 15:44:46,757 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.87 vs. limit=15.0 +2024-09-17 15:44:49,174 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.35 vs. limit=15.0 +2024-09-17 15:44:53,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=250980.0, ans=0.0 +2024-09-17 15:45:02,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=251020.0, ans=0.0 +2024-09-17 15:45:27,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=251100.0, ans=0.125 +2024-09-17 15:45:28,471 INFO [train.py:1198] (1/2) Epoch 14, batch 3950, loss[loss=0.2596, ctc_loss=0.1554, cr_loss=0.3885, attn_decoder_loss=0.2626, over 29487.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1512, cr_loss=0.3919, attn_decoder_loss=0.2578, over 5835835.01 frames. ], batch size: 97, lr: 7.90e-03, grad_scale: 8.0 +2024-09-17 15:45:40,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=251100.0, ans=0.0 +2024-09-17 15:45:40,809 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:45:50,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.88 vs. limit=22.5 +2024-09-17 15:45:58,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=251180.0, ans=0.1 +2024-09-17 15:46:02,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=251180.0, ans=0.025 +2024-09-17 15:46:38,427 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.473e+01 8.707e+01 9.297e+01 1.013e+02 1.953e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-17 15:46:39,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.21 vs. limit=15.0 +2024-09-17 15:46:44,340 INFO [train.py:1198] (1/2) Epoch 14, batch 4000, loss[loss=0.2377, ctc_loss=0.1411, cr_loss=0.3561, attn_decoder_loss=0.2406, over 29512.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1518, cr_loss=0.3921, attn_decoder_loss=0.2581, over 5814096.12 frames. ], batch size: 74, lr: 7.90e-03, grad_scale: 16.0 +2024-09-17 15:47:00,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff3.min_abs, batch_count=251340.0, ans=0.2 +2024-09-17 15:47:04,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.46 vs. limit=10.0 +2024-09-17 15:47:12,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=251380.0, ans=0.125 +2024-09-17 15:47:17,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=251380.0, ans=0.2 +2024-09-17 15:47:33,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=251420.0, ans=0.125 +2024-09-17 15:47:42,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=251460.0, ans=0.0 +2024-09-17 15:47:54,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=251460.0, ans=0.125 +2024-09-17 15:47:57,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=251500.0, ans=0.0 +2024-09-17 15:47:57,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=251500.0, ans=0.0 +2024-09-17 15:47:58,628 INFO [train.py:1198] (1/2) Epoch 14, batch 4050, loss[loss=0.2845, ctc_loss=0.2078, cr_loss=0.4404, attn_decoder_loss=0.2832, over 20062.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1522, cr_loss=0.3927, attn_decoder_loss=0.2583, over 5796555.82 frames. ], batch size: 210, lr: 7.90e-03, grad_scale: 8.0 +2024-09-17 15:48:04,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=251500.0, ans=0.125 +2024-09-17 15:48:09,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=251500.0, ans=0.125 +2024-09-17 15:48:12,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.42 vs. limit=15.0 +2024-09-17 15:48:26,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=251580.0, ans=0.125 +2024-09-17 15:48:34,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=251580.0, ans=10.0 +2024-09-17 15:48:51,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=251620.0, ans=0.0 +2024-09-17 15:49:06,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=251660.0, ans=0.125 +2024-09-17 15:49:08,784 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.922e+01 9.172e+01 9.805e+01 1.134e+02 3.956e+02, threshold=1.961e+02, percent-clipped=2.0 +2024-09-17 15:49:13,321 INFO [train.py:1198] (1/2) Epoch 14, batch 4100, loss[loss=0.283, ctc_loss=0.1824, cr_loss=0.4385, attn_decoder_loss=0.2844, over 29493.00 frames. ], tot_loss[loss=0.256, ctc_loss=0.1528, cr_loss=0.3938, attn_decoder_loss=0.2588, over 5792167.55 frames. ], batch size: 90, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:49:15,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=251700.0, ans=0.125 +2024-09-17 15:49:16,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=251700.0, ans=0.1 +2024-09-17 15:49:56,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=251820.0, ans=0.125 +2024-09-17 15:50:05,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=251820.0, ans=0.125 +2024-09-17 15:50:10,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=251820.0, ans=0.125 +2024-09-17 15:50:24,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=251860.0, ans=0.125 +2024-09-17 15:50:28,375 INFO [train.py:1198] (1/2) Epoch 14, batch 4150, loss[loss=0.2417, ctc_loss=0.1345, cr_loss=0.374, attn_decoder_loss=0.2453, over 29527.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1523, cr_loss=0.3926, attn_decoder_loss=0.2581, over 5797673.04 frames. ], batch size: 77, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:50:52,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.67 vs. limit=22.5 +2024-09-17 15:50:54,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.14 vs. limit=15.0 +2024-09-17 15:51:04,527 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.20 vs. limit=15.0 +2024-09-17 15:51:11,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=252020.0, ans=0.2 +2024-09-17 15:51:37,954 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.595e+01 8.795e+01 9.407e+01 9.935e+01 3.892e+02, threshold=1.881e+02, percent-clipped=3.0 +2024-09-17 15:51:42,412 INFO [train.py:1198] (1/2) Epoch 14, batch 4200, loss[loss=0.2759, ctc_loss=0.1762, cr_loss=0.4425, attn_decoder_loss=0.2772, over 29490.00 frames. ], tot_loss[loss=0.2556, ctc_loss=0.1527, cr_loss=0.3936, attn_decoder_loss=0.2583, over 5799196.79 frames. ], batch size: 90, lr: 7.89e-03, grad_scale: 8.0 +2024-09-17 15:51:47,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=252100.0, ans=0.125 +2024-09-17 15:51:48,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=252100.0, ans=0.125 +2024-09-17 15:52:10,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=252180.0, ans=0.0 +2024-09-17 15:52:20,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=252180.0, ans=0.125 +2024-09-17 15:52:42,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=252260.0, ans=0.125 +2024-09-17 15:52:56,847 INFO [train.py:1198] (1/2) Epoch 14, batch 4250, loss[loss=0.2395, ctc_loss=0.1373, cr_loss=0.366, attn_decoder_loss=0.2428, over 29502.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.152, cr_loss=0.3925, attn_decoder_loss=0.2582, over 5804568.01 frames. ], batch size: 74, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:53:06,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=12.0 +2024-09-17 15:53:19,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=252340.0, ans=0.0 +2024-09-17 15:53:31,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=252380.0, ans=0.0 +2024-09-17 15:53:35,430 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:53:57,055 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:54:06,785 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 8.931e+01 9.548e+01 1.031e+02 6.441e+02, threshold=1.910e+02, percent-clipped=3.0 +2024-09-17 15:54:11,269 INFO [train.py:1198] (1/2) Epoch 14, batch 4300, loss[loss=0.2535, ctc_loss=0.147, cr_loss=0.4072, attn_decoder_loss=0.2563, over 29517.00 frames. ], tot_loss[loss=0.2557, ctc_loss=0.1524, cr_loss=0.3923, attn_decoder_loss=0.2584, over 5793844.48 frames. ], batch size: 87, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:54:20,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.61 vs. limit=22.5 +2024-09-17 15:54:30,185 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-17 15:54:54,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=252620.0, ans=0.125 +2024-09-17 15:54:56,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=252620.0, ans=0.125 +2024-09-17 15:55:13,490 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.09 vs. limit=22.5 +2024-09-17 15:55:25,800 INFO [train.py:1198] (1/2) Epoch 14, batch 4350, loss[loss=0.2696, ctc_loss=0.1577, cr_loss=0.404, attn_decoder_loss=0.273, over 29505.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1555, cr_loss=0.3985, attn_decoder_loss=0.262, over 5796952.08 frames. ], batch size: 97, lr: 7.88e-03, grad_scale: 8.0 +2024-09-17 15:55:33,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=252700.0, ans=0.1 +2024-09-17 15:56:05,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=252780.0, ans=0.1 +2024-09-17 15:56:21,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=252820.0, ans=0.0 +2024-09-17 15:56:34,333 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 15:56:35,453 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.459e+01 9.298e+01 9.711e+01 1.038e+02 2.895e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-17 15:56:39,912 INFO [train.py:1198] (1/2) Epoch 14, batch 4400, loss[loss=0.2748, ctc_loss=0.1763, cr_loss=0.4342, attn_decoder_loss=0.2761, over 27244.00 frames. ], tot_loss[loss=0.2615, ctc_loss=0.1573, cr_loss=0.4015, attn_decoder_loss=0.2641, over 5766382.29 frames. ], batch size: 124, lr: 7.87e-03, grad_scale: 16.0 +2024-09-17 15:56:52,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=252900.0, ans=0.125 +2024-09-17 15:56:53,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=252940.0, ans=0.1 +2024-09-17 15:56:53,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=252940.0, ans=0.0 +2024-09-17 15:57:02,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=252940.0, ans=0.05 +2024-09-17 15:57:02,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=252940.0, ans=0.125 +2024-09-17 15:57:04,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=252940.0, ans=0.125 +2024-09-17 15:57:50,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=253060.0, ans=0.05 +2024-09-17 15:57:55,063 INFO [train.py:1198] (1/2) Epoch 14, batch 4450, loss[loss=0.2806, ctc_loss=0.1916, cr_loss=0.4312, attn_decoder_loss=0.2809, over 20402.00 frames. ], tot_loss[loss=0.2648, ctc_loss=0.1628, cr_loss=0.4065, attn_decoder_loss=0.2671, over 5576675.08 frames. ], batch size: 211, lr: 7.87e-03, grad_scale: 8.0 +2024-09-17 15:58:22,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.37 vs. limit=15.0 +2024-09-17 15:59:08,778 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.421e+01 1.004e+02 1.135e+02 1.248e+02 2.199e+02, threshold=2.271e+02, percent-clipped=1.0 +2024-09-17 15:59:10,360 INFO [train.py:1198] (1/2) Epoch 14, batch 4500, loss[loss=0.2791, ctc_loss=0.1893, cr_loss=0.4018, attn_decoder_loss=0.2801, over 20752.00 frames. ], tot_loss[loss=0.2681, ctc_loss=0.1687, cr_loss=0.4087, attn_decoder_loss=0.2701, over 5237993.98 frames. ], batch size: 209, lr: 7.87e-03, grad_scale: 8.0 +2024-09-17 16:00:38,577 INFO [train.py:1198] (1/2) Epoch 15, batch 0, loss[loss=0.2446, ctc_loss=0.1369, cr_loss=0.3715, attn_decoder_loss=0.2483, over 29589.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1369, cr_loss=0.3715, attn_decoder_loss=0.2483, over 29589.00 frames. ], batch size: 73, lr: 7.60e-03, grad_scale: 16.0 +2024-09-17 16:00:38,577 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 16:00:56,922 INFO [train.py:1230] (1/2) Epoch 15, validation: loss=0.2128, ctc_loss=0.04201, cr_loss=5.567e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-17 16:00:56,922 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 16:00:58,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=253400.0, ans=0.125 +2024-09-17 16:01:02,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.66 vs. limit=22.5 +2024-09-17 16:01:19,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=253440.0, ans=0.125 +2024-09-17 16:01:41,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=253520.0, ans=0.2 +2024-09-17 16:01:44,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=253520.0, ans=0.0 +2024-09-17 16:01:48,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=253520.0, ans=0.125 +2024-09-17 16:02:15,219 INFO [train.py:1198] (1/2) Epoch 15, batch 50, loss[loss=0.2359, ctc_loss=0.1389, cr_loss=0.3804, attn_decoder_loss=0.2383, over 29381.00 frames. ], tot_loss[loss=0.2589, ctc_loss=0.1567, cr_loss=0.4048, attn_decoder_loss=0.2613, over 1268339.51 frames. ], batch size: 70, lr: 7.60e-03, grad_scale: 8.0 +2024-09-17 16:02:20,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.17 vs. limit=22.5 +2024-09-17 16:02:21,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=253600.0, ans=0.125 +2024-09-17 16:02:26,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=253600.0, ans=0.0 +2024-09-17 16:02:42,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=253640.0, ans=0.07 +2024-09-17 16:02:51,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=253680.0, ans=0.0 +2024-09-17 16:02:51,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=253680.0, ans=0.2 +2024-09-17 16:02:53,176 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.523e+01 9.844e+01 1.055e+02 1.171e+02 3.873e+02, threshold=2.109e+02, percent-clipped=1.0 +2024-09-17 16:03:07,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=253720.0, ans=0.0 +2024-09-17 16:03:12,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.90 vs. limit=22.5 +2024-09-17 16:03:29,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.55 vs. limit=15.0 +2024-09-17 16:03:33,153 INFO [train.py:1198] (1/2) Epoch 15, batch 100, loss[loss=0.2471, ctc_loss=0.1368, cr_loss=0.384, attn_decoder_loss=0.2509, over 29540.00 frames. ], tot_loss[loss=0.2592, ctc_loss=0.156, cr_loss=0.4015, attn_decoder_loss=0.2617, over 2250973.94 frames. ], batch size: 76, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:03:35,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=253800.0, ans=0.1 +2024-09-17 16:03:46,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=253840.0, ans=0.2 +2024-09-17 16:04:01,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=253880.0, ans=0.2 +2024-09-17 16:04:23,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=14.49 vs. limit=15.0 +2024-09-17 16:04:40,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=253960.0, ans=0.2 +2024-09-17 16:04:40,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=253960.0, ans=0.1 +2024-09-17 16:04:47,890 INFO [train.py:1198] (1/2) Epoch 15, batch 150, loss[loss=0.2224, ctc_loss=0.1101, cr_loss=0.3166, attn_decoder_loss=0.2278, over 29446.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.153, cr_loss=0.3966, attn_decoder_loss=0.2595, over 3045158.45 frames. ], batch size: 70, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:04:48,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.74 vs. limit=22.5 +2024-09-17 16:05:21,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=254080.0, ans=0.025 +2024-09-17 16:05:25,781 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.837e+01 9.448e+01 1.022e+02 1.353e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-17 16:05:35,753 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.96 vs. limit=15.0 +2024-09-17 16:05:43,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.23 vs. limit=22.5 +2024-09-17 16:05:48,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=254160.0, ans=0.125 +2024-09-17 16:06:03,417 INFO [train.py:1198] (1/2) Epoch 15, batch 200, loss[loss=0.2714, ctc_loss=0.1704, cr_loss=0.4239, attn_decoder_loss=0.2732, over 27024.00 frames. ], tot_loss[loss=0.2556, ctc_loss=0.1519, cr_loss=0.3947, attn_decoder_loss=0.2584, over 3657014.15 frames. ], batch size: 124, lr: 7.59e-03, grad_scale: 8.0 +2024-09-17 16:06:33,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=254240.0, ans=0.025 +2024-09-17 16:07:19,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=254360.0, ans=10.0 +2024-09-17 16:07:24,603 INFO [train.py:1198] (1/2) Epoch 15, batch 250, loss[loss=0.2765, ctc_loss=0.1637, cr_loss=0.4339, attn_decoder_loss=0.2794, over 29213.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1516, cr_loss=0.3941, attn_decoder_loss=0.2581, over 4137760.04 frames. ], batch size: 100, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:07:25,325 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.52 vs. limit=22.5 +2024-09-17 16:07:26,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=254400.0, ans=10.0 +2024-09-17 16:07:27,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=254400.0, ans=0.125 +2024-09-17 16:07:49,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.15 vs. limit=15.0 +2024-09-17 16:07:53,431 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:07:59,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=254480.0, ans=0.2 +2024-09-17 16:08:02,186 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.896e+01 9.283e+01 1.022e+02 2.095e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 16:08:24,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.33 vs. limit=15.0 +2024-09-17 16:08:40,123 INFO [train.py:1198] (1/2) Epoch 15, batch 300, loss[loss=0.2738, ctc_loss=0.1591, cr_loss=0.4185, attn_decoder_loss=0.2772, over 29546.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1504, cr_loss=0.3914, attn_decoder_loss=0.257, over 4507794.15 frames. ], batch size: 92, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:08:53,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=254640.0, ans=0.125 +2024-09-17 16:09:09,670 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.16 vs. limit=15.0 +2024-09-17 16:09:39,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=254760.0, ans=0.125 +2024-09-17 16:09:44,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=254760.0, ans=0.125 +2024-09-17 16:09:48,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=254760.0, ans=0.125 +2024-09-17 16:09:51,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.20 vs. limit=22.5 +2024-09-17 16:09:56,011 INFO [train.py:1198] (1/2) Epoch 15, batch 350, loss[loss=0.2308, ctc_loss=0.126, cr_loss=0.3433, attn_decoder_loss=0.2348, over 29329.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1506, cr_loss=0.3922, attn_decoder_loss=0.2573, over 4793021.07 frames. ], batch size: 71, lr: 7.58e-03, grad_scale: 8.0 +2024-09-17 16:09:56,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=254800.0, ans=0.2 +2024-09-17 16:09:56,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=254800.0, ans=0.125 +2024-09-17 16:09:56,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.39 vs. limit=15.0 +2024-09-17 16:10:04,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.99 vs. limit=22.5 +2024-09-17 16:10:16,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=254840.0, ans=0.025 +2024-09-17 16:10:18,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=254840.0, ans=0.2 +2024-09-17 16:10:19,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.19 vs. limit=22.5 +2024-09-17 16:10:31,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=254880.0, ans=0.0 +2024-09-17 16:10:35,868 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.706e+01 9.394e+01 1.041e+02 2.813e+02, threshold=1.879e+02, percent-clipped=1.0 +2024-09-17 16:10:59,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=254960.0, ans=0.1 +2024-09-17 16:11:16,017 INFO [train.py:1198] (1/2) Epoch 15, batch 400, loss[loss=0.2572, ctc_loss=0.1455, cr_loss=0.3901, attn_decoder_loss=0.261, over 29711.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1509, cr_loss=0.3917, attn_decoder_loss=0.2573, over 5022670.65 frames. ], batch size: 82, lr: 7.58e-03, grad_scale: 16.0 +2024-09-17 16:11:17,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=255000.0, ans=0.2 +2024-09-17 16:11:22,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=255000.0, ans=0.125 +2024-09-17 16:11:39,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=255040.0, ans=0.0 +2024-09-17 16:11:43,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=255040.0, ans=0.0 +2024-09-17 16:12:14,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=255120.0, ans=0.0 +2024-09-17 16:12:23,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=255160.0, ans=0.0 +2024-09-17 16:12:25,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.66 vs. limit=15.0 +2024-09-17 16:12:32,459 INFO [train.py:1198] (1/2) Epoch 15, batch 450, loss[loss=0.2592, ctc_loss=0.1513, cr_loss=0.3954, attn_decoder_loss=0.2624, over 29689.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1509, cr_loss=0.3917, attn_decoder_loss=0.2573, over 5186122.55 frames. ], batch size: 83, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:12:40,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=255200.0, ans=0.125 +2024-09-17 16:12:41,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=255200.0, ans=0.025 +2024-09-17 16:13:11,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.843e+01 9.415e+01 1.015e+02 2.907e+02, threshold=1.883e+02, percent-clipped=1.0 +2024-09-17 16:13:12,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=255280.0, ans=0.125 +2024-09-17 16:13:15,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=255280.0, ans=0.125 +2024-09-17 16:13:21,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=255320.0, ans=0.07 +2024-09-17 16:13:21,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=255320.0, ans=0.125 +2024-09-17 16:13:24,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=255320.0, ans=0.025 +2024-09-17 16:13:30,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=255320.0, ans=0.0 +2024-09-17 16:13:32,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=255360.0, ans=0.0 +2024-09-17 16:13:36,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=255360.0, ans=0.125 +2024-09-17 16:13:42,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=255360.0, ans=0.025 +2024-09-17 16:13:48,521 INFO [train.py:1198] (1/2) Epoch 15, batch 500, loss[loss=0.2791, ctc_loss=0.1735, cr_loss=0.4351, attn_decoder_loss=0.2812, over 29480.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1503, cr_loss=0.3911, attn_decoder_loss=0.2568, over 5329229.41 frames. ], batch size: 94, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:13:51,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=255400.0, ans=0.95 +2024-09-17 16:13:56,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=255400.0, ans=0.1 +2024-09-17 16:14:13,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=255440.0, ans=0.1 +2024-09-17 16:14:16,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=255440.0, ans=0.125 +2024-09-17 16:14:27,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=255480.0, ans=0.0 +2024-09-17 16:14:29,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=255480.0, ans=0.0 +2024-09-17 16:14:50,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=255560.0, ans=0.025 +2024-09-17 16:14:51,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=255560.0, ans=0.04949747468305833 +2024-09-17 16:15:08,804 INFO [train.py:1198] (1/2) Epoch 15, batch 550, loss[loss=0.2701, ctc_loss=0.157, cr_loss=0.4097, attn_decoder_loss=0.2736, over 28804.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1505, cr_loss=0.3908, attn_decoder_loss=0.2568, over 5421395.06 frames. ], batch size: 104, lr: 7.57e-03, grad_scale: 8.0 +2024-09-17 16:15:13,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=255600.0, ans=0.125 +2024-09-17 16:15:26,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.64 vs. limit=12.0 +2024-09-17 16:15:34,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=255640.0, ans=0.125 +2024-09-17 16:15:42,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=255680.0, ans=0.0 +2024-09-17 16:15:48,136 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.773e+01 8.993e+01 9.917e+01 1.076e+02 7.641e+02, threshold=1.983e+02, percent-clipped=4.0 +2024-09-17 16:16:08,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=255760.0, ans=0.125 +2024-09-17 16:16:24,501 INFO [train.py:1198] (1/2) Epoch 15, batch 600, loss[loss=0.269, ctc_loss=0.1601, cr_loss=0.4161, attn_decoder_loss=0.2719, over 29280.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1509, cr_loss=0.3922, attn_decoder_loss=0.2572, over 5507696.85 frames. ], batch size: 100, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:16:31,772 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.23 vs. limit=15.0 +2024-09-17 16:16:35,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.96 vs. limit=15.0 +2024-09-17 16:16:35,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=255800.0, ans=0.125 +2024-09-17 16:16:42,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.89 vs. limit=15.0 +2024-09-17 16:16:53,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=255880.0, ans=0.2 +2024-09-17 16:17:21,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=255920.0, ans=0.125 +2024-09-17 16:17:23,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=255960.0, ans=0.125 +2024-09-17 16:17:28,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.22 vs. limit=15.0 +2024-09-17 16:17:47,270 INFO [train.py:1198] (1/2) Epoch 15, batch 650, loss[loss=0.2567, ctc_loss=0.1516, cr_loss=0.4045, attn_decoder_loss=0.2594, over 29750.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.15, cr_loss=0.3912, attn_decoder_loss=0.2566, over 5584347.18 frames. ], batch size: 81, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:17:47,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=256000.0, ans=0.0 +2024-09-17 16:17:59,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.43 vs. limit=15.0 +2024-09-17 16:18:04,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=256040.0, ans=0.125 +2024-09-17 16:18:26,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=256080.0, ans=0.125 +2024-09-17 16:18:28,869 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.547e+01 9.070e+01 9.577e+01 1.264e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-17 16:18:30,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=256080.0, ans=0.0 +2024-09-17 16:18:31,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.41 vs. limit=15.0 +2024-09-17 16:18:34,409 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.27 vs. limit=10.0 +2024-09-17 16:18:34,530 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.59 vs. limit=15.0 +2024-09-17 16:18:36,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-17 16:18:48,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=256160.0, ans=0.0 +2024-09-17 16:18:56,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=256160.0, ans=0.1 +2024-09-17 16:18:59,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=256160.0, ans=0.04949747468305833 +2024-09-17 16:19:07,282 INFO [train.py:1198] (1/2) Epoch 15, batch 700, loss[loss=0.2394, ctc_loss=0.1419, cr_loss=0.381, attn_decoder_loss=0.2418, over 29540.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1502, cr_loss=0.3917, attn_decoder_loss=0.257, over 5635143.59 frames. ], batch size: 76, lr: 7.56e-03, grad_scale: 8.0 +2024-09-17 16:19:41,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=256280.0, ans=0.95 +2024-09-17 16:19:47,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=256280.0, ans=0.125 +2024-09-17 16:20:04,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=256320.0, ans=0.125 +2024-09-17 16:20:14,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=256360.0, ans=0.1 +2024-09-17 16:20:23,532 INFO [train.py:1198] (1/2) Epoch 15, batch 750, loss[loss=0.2553, ctc_loss=0.1525, cr_loss=0.4208, attn_decoder_loss=0.2574, over 29688.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1499, cr_loss=0.3909, attn_decoder_loss=0.2568, over 5674362.80 frames. ], batch size: 82, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:20:26,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=256400.0, ans=0.125 +2024-09-17 16:20:32,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=256400.0, ans=0.1 +2024-09-17 16:20:38,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=256440.0, ans=0.125 +2024-09-17 16:20:59,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.41 vs. limit=22.5 +2024-09-17 16:21:02,920 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.739e+01 9.295e+01 9.820e+01 3.813e+02, threshold=1.859e+02, percent-clipped=2.0 +2024-09-17 16:21:06,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=256480.0, ans=0.1 +2024-09-17 16:21:09,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=256520.0, ans=0.2 +2024-09-17 16:21:12,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=256520.0, ans=0.125 +2024-09-17 16:21:18,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.25 vs. limit=22.5 +2024-09-17 16:21:39,091 INFO [train.py:1198] (1/2) Epoch 15, batch 800, loss[loss=0.236, ctc_loss=0.1324, cr_loss=0.3702, attn_decoder_loss=0.2393, over 29584.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.15, cr_loss=0.3917, attn_decoder_loss=0.2567, over 5705241.91 frames. ], batch size: 73, lr: 7.55e-03, grad_scale: 16.0 +2024-09-17 16:22:04,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=256640.0, ans=0.0 +2024-09-17 16:22:08,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.67 vs. limit=15.0 +2024-09-17 16:22:26,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=256720.0, ans=0.0 +2024-09-17 16:22:32,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=256720.0, ans=0.2 +2024-09-17 16:22:40,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=256760.0, ans=0.2 +2024-09-17 16:22:43,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=15.0 +2024-09-17 16:22:49,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=256760.0, ans=0.0 +2024-09-17 16:22:49,430 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:22:49,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=256760.0, ans=0.125 +2024-09-17 16:22:56,871 INFO [train.py:1198] (1/2) Epoch 15, batch 850, loss[loss=0.2719, ctc_loss=0.1687, cr_loss=0.4223, attn_decoder_loss=0.274, over 29695.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.15, cr_loss=0.3916, attn_decoder_loss=0.2568, over 5735395.85 frames. ], batch size: 89, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:23:08,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=256800.0, ans=0.0 +2024-09-17 16:23:11,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=256800.0, ans=0.125 +2024-09-17 16:23:38,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=256880.0, ans=0.125 +2024-09-17 16:23:39,537 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.676e+01 9.240e+01 9.818e+01 3.041e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 16:23:42,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=256920.0, ans=0.125 +2024-09-17 16:23:50,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=256920.0, ans=0.025 +2024-09-17 16:23:51,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.56 vs. limit=6.0 +2024-09-17 16:24:14,677 INFO [train.py:1198] (1/2) Epoch 15, batch 900, loss[loss=0.2355, ctc_loss=0.1342, cr_loss=0.3762, attn_decoder_loss=0.2384, over 29601.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1506, cr_loss=0.3928, attn_decoder_loss=0.2572, over 5740678.67 frames. ], batch size: 73, lr: 7.55e-03, grad_scale: 8.0 +2024-09-17 16:24:42,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=257040.0, ans=0.125 +2024-09-17 16:25:30,435 INFO [train.py:1198] (1/2) Epoch 15, batch 950, loss[loss=0.2432, ctc_loss=0.1401, cr_loss=0.3687, attn_decoder_loss=0.2464, over 29509.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1506, cr_loss=0.3924, attn_decoder_loss=0.257, over 5742685.78 frames. ], batch size: 74, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:25:36,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=257200.0, ans=0.0 +2024-09-17 16:25:47,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=257240.0, ans=0.1 +2024-09-17 16:25:59,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=257280.0, ans=0.0 +2024-09-17 16:26:08,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=11.80 vs. limit=15.0 +2024-09-17 16:26:13,742 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.071e+01 9.347e+01 1.011e+02 1.116e+02 3.125e+02, threshold=2.021e+02, percent-clipped=4.0 +2024-09-17 16:26:14,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=257280.0, ans=0.2 +2024-09-17 16:26:27,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=257320.0, ans=0.0 +2024-09-17 16:26:27,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=257320.0, ans=0.125 +2024-09-17 16:26:34,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.15 vs. limit=22.5 +2024-09-17 16:26:50,492 INFO [train.py:1198] (1/2) Epoch 15, batch 1000, loss[loss=0.2467, ctc_loss=0.1489, cr_loss=0.3967, attn_decoder_loss=0.2488, over 29485.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1516, cr_loss=0.3935, attn_decoder_loss=0.2577, over 5735805.51 frames. ], batch size: 77, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:26:52,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.13 vs. limit=15.0 +2024-09-17 16:27:09,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=257440.0, ans=0.0 +2024-09-17 16:27:10,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=257440.0, ans=0.125 +2024-09-17 16:27:27,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=257480.0, ans=0.0 +2024-09-17 16:27:28,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=257480.0, ans=0.5 +2024-09-17 16:27:59,353 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:28:06,970 INFO [train.py:1198] (1/2) Epoch 15, batch 1050, loss[loss=0.2624, ctc_loss=0.1579, cr_loss=0.3966, attn_decoder_loss=0.2652, over 29688.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1513, cr_loss=0.3932, attn_decoder_loss=0.2571, over 5743659.68 frames. ], batch size: 85, lr: 7.54e-03, grad_scale: 8.0 +2024-09-17 16:28:16,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=257600.0, ans=0.0 +2024-09-17 16:28:29,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.51 vs. limit=22.5 +2024-09-17 16:28:33,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=257640.0, ans=0.95 +2024-09-17 16:28:34,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=257640.0, ans=0.1 +2024-09-17 16:28:34,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=257640.0, ans=0.1 +2024-09-17 16:28:48,455 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.880e+01 9.520e+01 1.043e+02 1.808e+02, threshold=1.904e+02, percent-clipped=0.0 +2024-09-17 16:29:19,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=257760.0, ans=0.125 +2024-09-17 16:29:21,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.37 vs. limit=22.5 +2024-09-17 16:29:23,481 INFO [train.py:1198] (1/2) Epoch 15, batch 1100, loss[loss=0.26, ctc_loss=0.1568, cr_loss=0.3931, attn_decoder_loss=0.2628, over 29428.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1511, cr_loss=0.3928, attn_decoder_loss=0.2569, over 5756164.03 frames. ], batch size: 78, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:29:32,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=257800.0, ans=0.1 +2024-09-17 16:29:40,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=257840.0, ans=0.07 +2024-09-17 16:29:57,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=257880.0, ans=0.125 +2024-09-17 16:30:10,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=257920.0, ans=0.1 +2024-09-17 16:30:10,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=257920.0, ans=0.125 +2024-09-17 16:30:15,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=257920.0, ans=0.1 +2024-09-17 16:30:22,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=257920.0, ans=0.0 +2024-09-17 16:30:26,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.09 vs. limit=10.0 +2024-09-17 16:30:43,779 INFO [train.py:1198] (1/2) Epoch 15, batch 1150, loss[loss=0.2506, ctc_loss=0.1456, cr_loss=0.3905, attn_decoder_loss=0.2536, over 29445.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1511, cr_loss=0.3922, attn_decoder_loss=0.2569, over 5755078.94 frames. ], batch size: 78, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:30:57,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=258040.0, ans=0.2 +2024-09-17 16:31:02,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=258040.0, ans=0.125 +2024-09-17 16:31:08,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=258040.0, ans=0.1 +2024-09-17 16:31:14,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=258080.0, ans=0.1 +2024-09-17 16:31:20,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=258080.0, ans=0.1 +2024-09-17 16:31:25,059 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.854e+01 8.950e+01 9.429e+01 1.052e+02 4.091e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-17 16:31:26,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=258080.0, ans=0.0 +2024-09-17 16:31:38,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.23 vs. limit=10.0 +2024-09-17 16:31:43,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=258160.0, ans=0.125 +2024-09-17 16:31:43,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=258160.0, ans=0.1 +2024-09-17 16:31:56,446 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.36 vs. limit=15.0 +2024-09-17 16:32:00,032 INFO [train.py:1198] (1/2) Epoch 15, batch 1200, loss[loss=0.2648, ctc_loss=0.1483, cr_loss=0.3723, attn_decoder_loss=0.2695, over 29675.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1517, cr_loss=0.3927, attn_decoder_loss=0.2577, over 5747601.48 frames. ], batch size: 85, lr: 7.53e-03, grad_scale: 16.0 +2024-09-17 16:32:01,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=258200.0, ans=0.125 +2024-09-17 16:32:15,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=258240.0, ans=0.1 +2024-09-17 16:32:23,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=258240.0, ans=0.0 +2024-09-17 16:32:31,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.87 vs. limit=12.0 +2024-09-17 16:32:46,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=258320.0, ans=0.0 +2024-09-17 16:32:52,717 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.66 vs. limit=15.0 +2024-09-17 16:33:01,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=258360.0, ans=0.125 +2024-09-17 16:33:06,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.02 vs. limit=6.0 +2024-09-17 16:33:11,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.06 vs. limit=10.0 +2024-09-17 16:33:15,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=258400.0, ans=0.125 +2024-09-17 16:33:16,673 INFO [train.py:1198] (1/2) Epoch 15, batch 1250, loss[loss=0.2663, ctc_loss=0.1531, cr_loss=0.4107, attn_decoder_loss=0.2698, over 29548.00 frames. ], tot_loss[loss=0.2554, ctc_loss=0.1518, cr_loss=0.3933, attn_decoder_loss=0.2582, over 5774906.07 frames. ], batch size: 92, lr: 7.53e-03, grad_scale: 8.0 +2024-09-17 16:33:32,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=258440.0, ans=0.0 +2024-09-17 16:33:39,126 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.43 vs. limit=15.0 +2024-09-17 16:33:59,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.854e+01 8.868e+01 9.518e+01 1.036e+02 1.703e+02, threshold=1.904e+02, percent-clipped=0.0 +2024-09-17 16:34:21,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.54 vs. limit=6.0 +2024-09-17 16:34:23,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=258560.0, ans=0.1 +2024-09-17 16:34:29,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=258560.0, ans=0.2 +2024-09-17 16:34:37,194 INFO [train.py:1198] (1/2) Epoch 15, batch 1300, loss[loss=0.2619, ctc_loss=0.1445, cr_loss=0.3688, attn_decoder_loss=0.2668, over 28302.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1515, cr_loss=0.3931, attn_decoder_loss=0.2577, over 5778505.06 frames. ], batch size: 111, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:35:06,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=258680.0, ans=0.125 +2024-09-17 16:35:24,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=258720.0, ans=0.04949747468305833 +2024-09-17 16:35:35,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.96 vs. limit=22.5 +2024-09-17 16:35:39,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=258760.0, ans=0.2 +2024-09-17 16:35:53,272 INFO [train.py:1198] (1/2) Epoch 15, batch 1350, loss[loss=0.2584, ctc_loss=0.1548, cr_loss=0.4105, attn_decoder_loss=0.2608, over 29736.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1508, cr_loss=0.3921, attn_decoder_loss=0.2572, over 5795907.42 frames. ], batch size: 81, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:36:01,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=258800.0, ans=0.0 +2024-09-17 16:36:07,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=258840.0, ans=0.0 +2024-09-17 16:36:35,391 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.925e+01 9.317e+01 1.009e+02 1.483e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-17 16:36:44,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=258920.0, ans=0.0 +2024-09-17 16:36:50,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=258920.0, ans=0.2 +2024-09-17 16:37:06,425 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.88 vs. limit=15.0 +2024-09-17 16:37:08,670 INFO [train.py:1198] (1/2) Epoch 15, batch 1400, loss[loss=0.221, ctc_loss=0.1233, cr_loss=0.3429, attn_decoder_loss=0.2242, over 29596.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1505, cr_loss=0.3921, attn_decoder_loss=0.2569, over 5806813.58 frames. ], batch size: 69, lr: 7.52e-03, grad_scale: 8.0 +2024-09-17 16:37:09,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.39 vs. limit=15.0 +2024-09-17 16:37:22,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=259040.0, ans=0.125 +2024-09-17 16:37:33,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.18 vs. limit=15.0 +2024-09-17 16:37:36,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=259040.0, ans=0.125 +2024-09-17 16:37:39,415 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:37:48,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=259080.0, ans=0.125 +2024-09-17 16:37:54,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=259120.0, ans=15.0 +2024-09-17 16:37:56,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=15.0 +2024-09-17 16:38:10,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=259160.0, ans=0.125 +2024-09-17 16:38:19,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.14 vs. limit=15.0 +2024-09-17 16:38:23,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=259160.0, ans=0.1 +2024-09-17 16:38:27,009 INFO [train.py:1198] (1/2) Epoch 15, batch 1450, loss[loss=0.2686, ctc_loss=0.1594, cr_loss=0.4128, attn_decoder_loss=0.2715, over 29451.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1508, cr_loss=0.3931, attn_decoder_loss=0.2575, over 5804189.42 frames. ], batch size: 94, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:38:27,839 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.70 vs. limit=6.0 +2024-09-17 16:38:39,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=259200.0, ans=0.0 +2024-09-17 16:38:43,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=259240.0, ans=0.07 +2024-09-17 16:38:46,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=259240.0, ans=0.125 +2024-09-17 16:38:54,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=259240.0, ans=0.0 +2024-09-17 16:39:01,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=259280.0, ans=0.125 +2024-09-17 16:39:07,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=259280.0, ans=0.0 +2024-09-17 16:39:11,108 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.490e+01 8.827e+01 9.578e+01 1.049e+02 2.248e+02, threshold=1.916e+02, percent-clipped=2.0 +2024-09-17 16:39:25,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=259320.0, ans=0.125 +2024-09-17 16:39:28,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.51 vs. limit=22.5 +2024-09-17 16:39:34,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=259360.0, ans=0.0 +2024-09-17 16:39:44,413 INFO [train.py:1198] (1/2) Epoch 15, batch 1500, loss[loss=0.2711, ctc_loss=0.1645, cr_loss=0.4245, attn_decoder_loss=0.2735, over 29646.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1513, cr_loss=0.3937, attn_decoder_loss=0.258, over 5805645.87 frames. ], batch size: 86, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:39:47,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=259400.0, ans=0.2 +2024-09-17 16:40:38,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=259520.0, ans=0.0 +2024-09-17 16:40:39,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=259520.0, ans=0.5 +2024-09-17 16:40:47,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=259560.0, ans=0.0 +2024-09-17 16:40:53,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=259560.0, ans=0.125 +2024-09-17 16:40:58,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=259560.0, ans=0.0 +2024-09-17 16:40:59,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=259600.0, ans=0.125 +2024-09-17 16:41:00,891 INFO [train.py:1198] (1/2) Epoch 15, batch 1550, loss[loss=0.2861, ctc_loss=0.1821, cr_loss=0.4619, attn_decoder_loss=0.2874, over 29505.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1512, cr_loss=0.3927, attn_decoder_loss=0.2578, over 5782753.85 frames. ], batch size: 90, lr: 7.51e-03, grad_scale: 8.0 +2024-09-17 16:41:04,794 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.29 vs. limit=15.0 +2024-09-17 16:41:14,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=259640.0, ans=0.0 +2024-09-17 16:41:19,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.60 vs. limit=22.5 +2024-09-17 16:41:20,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=259640.0, ans=0.125 +2024-09-17 16:41:42,749 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.774e+01 9.466e+01 1.042e+02 2.668e+02, threshold=1.893e+02, percent-clipped=3.0 +2024-09-17 16:42:02,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=259760.0, ans=0.125 +2024-09-17 16:42:08,780 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.46 vs. limit=6.0 +2024-09-17 16:42:17,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=259760.0, ans=0.125 +2024-09-17 16:42:20,457 INFO [train.py:1198] (1/2) Epoch 15, batch 1600, loss[loss=0.2727, ctc_loss=0.1616, cr_loss=0.4061, attn_decoder_loss=0.276, over 29665.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1511, cr_loss=0.3922, attn_decoder_loss=0.2575, over 5764622.31 frames. ], batch size: 85, lr: 7.51e-03, grad_scale: 16.0 +2024-09-17 16:42:28,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=259800.0, ans=0.2 +2024-09-17 16:42:28,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=259800.0, ans=0.2 +2024-09-17 16:42:40,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=259840.0, ans=0.1 +2024-09-17 16:42:45,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=259840.0, ans=0.125 +2024-09-17 16:42:46,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=259840.0, ans=0.125 +2024-09-17 16:42:55,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=259880.0, ans=0.09899494936611666 +2024-09-17 16:42:59,439 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.00 vs. limit=15.0 +2024-09-17 16:43:04,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=259920.0, ans=0.2 +2024-09-17 16:43:05,329 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.33 vs. limit=22.5 +2024-09-17 16:43:13,906 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 16:43:36,452 INFO [train.py:1198] (1/2) Epoch 15, batch 1650, loss[loss=0.2744, ctc_loss=0.1632, cr_loss=0.4142, attn_decoder_loss=0.2776, over 29713.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1509, cr_loss=0.392, attn_decoder_loss=0.2571, over 5758140.47 frames. ], batch size: 89, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:43:59,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=260040.0, ans=10.0 +2024-09-17 16:43:59,605 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.89 vs. limit=10.0 +2024-09-17 16:44:03,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.30 vs. limit=22.5 +2024-09-17 16:44:20,238 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.336e+01 8.810e+01 9.523e+01 1.053e+02 2.945e+02, threshold=1.905e+02, percent-clipped=2.0 +2024-09-17 16:44:48,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=260160.0, ans=0.125 +2024-09-17 16:44:51,638 INFO [train.py:1198] (1/2) Epoch 15, batch 1700, loss[loss=0.2277, ctc_loss=0.1302, cr_loss=0.3586, attn_decoder_loss=0.2306, over 29566.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1505, cr_loss=0.392, attn_decoder_loss=0.2569, over 5778004.33 frames. ], batch size: 69, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:44:57,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=260200.0, ans=0.125 +2024-09-17 16:44:57,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=260200.0, ans=0.0 +2024-09-17 16:44:59,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=260200.0, ans=0.125 +2024-09-17 16:45:10,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=260240.0, ans=0.125 +2024-09-17 16:45:15,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.09 vs. limit=15.0 +2024-09-17 16:45:17,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=260240.0, ans=0.025 +2024-09-17 16:45:19,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=260240.0, ans=0.2 +2024-09-17 16:45:19,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=260240.0, ans=0.2 +2024-09-17 16:45:26,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=260280.0, ans=0.025 +2024-09-17 16:45:34,580 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.11 vs. limit=15.0 +2024-09-17 16:46:04,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.96 vs. limit=15.0 +2024-09-17 16:46:11,458 INFO [train.py:1198] (1/2) Epoch 15, batch 1750, loss[loss=0.2191, ctc_loss=0.1226, cr_loss=0.3584, attn_decoder_loss=0.2218, over 29368.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1501, cr_loss=0.3916, attn_decoder_loss=0.2565, over 5787528.80 frames. ], batch size: 67, lr: 7.50e-03, grad_scale: 8.0 +2024-09-17 16:46:32,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.02 vs. limit=15.0 +2024-09-17 16:46:55,373 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.854e+01 9.659e+01 1.042e+02 2.660e+02, threshold=1.932e+02, percent-clipped=4.0 +2024-09-17 16:46:57,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=260520.0, ans=0.0 +2024-09-17 16:47:09,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=260520.0, ans=0.125 +2024-09-17 16:47:15,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=260560.0, ans=0.125 +2024-09-17 16:47:26,571 INFO [train.py:1198] (1/2) Epoch 15, batch 1800, loss[loss=0.2656, ctc_loss=0.1649, cr_loss=0.4224, attn_decoder_loss=0.2674, over 29680.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.15, cr_loss=0.391, attn_decoder_loss=0.2566, over 5791253.65 frames. ], batch size: 83, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:47:33,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=260600.0, ans=0.125 +2024-09-17 16:47:53,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-17 16:48:11,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=260720.0, ans=0.0 +2024-09-17 16:48:11,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=260720.0, ans=0.125 +2024-09-17 16:48:32,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=260760.0, ans=0.0 +2024-09-17 16:48:38,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=260760.0, ans=0.125 +2024-09-17 16:48:43,404 INFO [train.py:1198] (1/2) Epoch 15, batch 1850, loss[loss=0.2741, ctc_loss=0.1671, cr_loss=0.423, attn_decoder_loss=0.2766, over 29632.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1495, cr_loss=0.3903, attn_decoder_loss=0.2562, over 5797819.22 frames. ], batch size: 86, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:48:47,329 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.03 vs. limit=15.0 +2024-09-17 16:48:55,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=260800.0, ans=0.07 +2024-09-17 16:49:27,359 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.760e+01 9.313e+01 1.001e+02 1.511e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-17 16:49:42,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=260960.0, ans=0.1 +2024-09-17 16:49:50,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=260960.0, ans=0.0 +2024-09-17 16:50:01,055 INFO [train.py:1198] (1/2) Epoch 15, batch 1900, loss[loss=0.2634, ctc_loss=0.1506, cr_loss=0.376, attn_decoder_loss=0.2676, over 29716.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1495, cr_loss=0.3902, attn_decoder_loss=0.2566, over 5805797.70 frames. ], batch size: 89, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:50:12,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=261000.0, ans=0.025 +2024-09-17 16:50:16,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=261040.0, ans=0.125 +2024-09-17 16:50:33,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=261080.0, ans=0.125 +2024-09-17 16:50:48,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.51 vs. limit=22.5 +2024-09-17 16:50:58,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.89 vs. limit=15.0 +2024-09-17 16:51:12,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.78 vs. limit=15.0 +2024-09-17 16:51:18,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.18 vs. limit=15.0 +2024-09-17 16:51:18,999 INFO [train.py:1198] (1/2) Epoch 15, batch 1950, loss[loss=0.2502, ctc_loss=0.1324, cr_loss=0.3552, attn_decoder_loss=0.2554, over 29455.00 frames. ], tot_loss[loss=0.2547, ctc_loss=0.1498, cr_loss=0.3916, attn_decoder_loss=0.2577, over 5819790.17 frames. ], batch size: 78, lr: 7.49e-03, grad_scale: 8.0 +2024-09-17 16:51:33,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=261240.0, ans=0.125 +2024-09-17 16:51:38,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=261240.0, ans=0.0 +2024-09-17 16:51:39,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.40 vs. limit=15.0 +2024-09-17 16:51:53,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=261280.0, ans=0.025 +2024-09-17 16:52:02,588 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.191e+01 8.962e+01 9.463e+01 1.031e+02 5.545e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-17 16:52:02,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=261320.0, ans=0.125 +2024-09-17 16:52:34,220 INFO [train.py:1198] (1/2) Epoch 15, batch 2000, loss[loss=0.2266, ctc_loss=0.1333, cr_loss=0.365, attn_decoder_loss=0.2289, over 29334.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1508, cr_loss=0.3927, attn_decoder_loss=0.2584, over 5798037.35 frames. ], batch size: 67, lr: 7.48e-03, grad_scale: 16.0 +2024-09-17 16:52:37,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=261400.0, ans=0.0 +2024-09-17 16:52:43,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.68 vs. limit=15.0 +2024-09-17 16:52:47,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.91 vs. limit=15.0 +2024-09-17 16:52:57,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=261440.0, ans=0.125 +2024-09-17 16:53:03,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=261480.0, ans=0.0 +2024-09-17 16:53:53,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=261600.0, ans=0.125 +2024-09-17 16:53:54,014 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.08 vs. limit=12.0 +2024-09-17 16:53:55,005 INFO [train.py:1198] (1/2) Epoch 15, batch 2050, loss[loss=0.2303, ctc_loss=0.1325, cr_loss=0.358, attn_decoder_loss=0.2332, over 29424.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1505, cr_loss=0.3919, attn_decoder_loss=0.2578, over 5790431.09 frames. ], batch size: 70, lr: 7.48e-03, grad_scale: 8.0 +2024-09-17 16:54:16,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=261640.0, ans=0.5 +2024-09-17 16:54:24,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=261680.0, ans=0.125 +2024-09-17 16:54:28,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=261680.0, ans=0.0 +2024-09-17 16:54:30,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.52 vs. limit=15.0 +2024-09-17 16:54:38,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=261680.0, ans=0.125 +2024-09-17 16:54:40,625 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 9.073e+01 9.648e+01 1.067e+02 2.180e+02, threshold=1.930e+02, percent-clipped=1.0 +2024-09-17 16:54:51,855 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.06 vs. limit=15.0 +2024-09-17 16:55:03,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=261760.0, ans=0.125 +2024-09-17 16:55:10,751 INFO [train.py:1198] (1/2) Epoch 15, batch 2100, loss[loss=0.2545, ctc_loss=0.1491, cr_loss=0.3944, attn_decoder_loss=0.2574, over 29770.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1501, cr_loss=0.3917, attn_decoder_loss=0.2572, over 5801205.33 frames. ], batch size: 81, lr: 7.48e-03, grad_scale: 8.0 +2024-09-17 16:55:15,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=261800.0, ans=0.2 +2024-09-17 16:55:44,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=261880.0, ans=0.125 +2024-09-17 16:55:57,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=261920.0, ans=0.04949747468305833 +2024-09-17 16:55:59,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=261920.0, ans=0.125 +2024-09-17 16:56:23,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=261960.0, ans=0.0 +2024-09-17 16:56:26,349 INFO [train.py:1198] (1/2) Epoch 15, batch 2150, loss[loss=0.2445, ctc_loss=0.142, cr_loss=0.393, attn_decoder_loss=0.2472, over 29444.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1494, cr_loss=0.3908, attn_decoder_loss=0.2564, over 5815567.00 frames. ], batch size: 78, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:56:40,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=262040.0, ans=0.0 +2024-09-17 16:57:11,851 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.582e+01 8.623e+01 9.076e+01 9.705e+01 5.465e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-17 16:57:17,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.17 vs. limit=15.0 +2024-09-17 16:57:41,945 INFO [train.py:1198] (1/2) Epoch 15, batch 2200, loss[loss=0.2631, ctc_loss=0.1536, cr_loss=0.4136, attn_decoder_loss=0.2661, over 29607.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1494, cr_loss=0.3909, attn_decoder_loss=0.2564, over 5811862.51 frames. ], batch size: 86, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:57:48,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=262200.0, ans=0.2 +2024-09-17 16:57:56,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.63 vs. limit=15.0 +2024-09-17 16:58:13,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=262240.0, ans=0.125 +2024-09-17 16:58:29,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=262280.0, ans=0.0 +2024-09-17 16:58:35,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=262320.0, ans=0.035 +2024-09-17 16:58:55,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=262360.0, ans=0.1 +2024-09-17 16:59:02,696 INFO [train.py:1198] (1/2) Epoch 15, batch 2250, loss[loss=0.2599, ctc_loss=0.152, cr_loss=0.3981, attn_decoder_loss=0.2631, over 29689.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1489, cr_loss=0.3897, attn_decoder_loss=0.2561, over 5811510.54 frames. ], batch size: 82, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 16:59:34,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=262480.0, ans=0.2 +2024-09-17 16:59:42,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=262480.0, ans=0.125 +2024-09-17 16:59:47,795 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.637e+01 9.303e+01 1.004e+02 1.390e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-17 16:59:54,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=262520.0, ans=0.125 +2024-09-17 17:00:00,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.22 vs. limit=6.0 +2024-09-17 17:00:04,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=262560.0, ans=0.05 +2024-09-17 17:00:09,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=262560.0, ans=0.0 +2024-09-17 17:00:18,402 INFO [train.py:1198] (1/2) Epoch 15, batch 2300, loss[loss=0.2343, ctc_loss=0.1338, cr_loss=0.3509, attn_decoder_loss=0.2377, over 29328.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1482, cr_loss=0.3884, attn_decoder_loss=0.2553, over 5798120.71 frames. ], batch size: 71, lr: 7.47e-03, grad_scale: 8.0 +2024-09-17 17:00:23,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=262600.0, ans=0.2 +2024-09-17 17:00:29,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=262600.0, ans=0.2 +2024-09-17 17:00:34,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=262640.0, ans=0.125 +2024-09-17 17:00:36,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=262640.0, ans=0.1 +2024-09-17 17:00:50,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=262680.0, ans=0.0 +2024-09-17 17:00:56,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=262680.0, ans=0.0 +2024-09-17 17:01:07,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.94 vs. limit=15.0 +2024-09-17 17:01:19,789 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.17 vs. limit=15.0 +2024-09-17 17:01:20,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=262760.0, ans=0.1 +2024-09-17 17:01:22,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=262760.0, ans=0.1 +2024-09-17 17:01:28,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.62 vs. limit=15.0 +2024-09-17 17:01:32,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=262800.0, ans=0.125 +2024-09-17 17:01:34,192 INFO [train.py:1198] (1/2) Epoch 15, batch 2350, loss[loss=0.2699, ctc_loss=0.1646, cr_loss=0.4066, attn_decoder_loss=0.2726, over 29697.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1485, cr_loss=0.3886, attn_decoder_loss=0.2555, over 5803871.74 frames. ], batch size: 83, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:02:12,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=262880.0, ans=0.2 +2024-09-17 17:02:14,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=262880.0, ans=0.125 +2024-09-17 17:02:17,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.39 vs. limit=15.0 +2024-09-17 17:02:24,199 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.797e+01 9.531e+01 1.053e+02 3.289e+02, threshold=1.906e+02, percent-clipped=2.0 +2024-09-17 17:02:49,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.07 vs. limit=15.0 +2024-09-17 17:02:54,792 INFO [train.py:1198] (1/2) Epoch 15, batch 2400, loss[loss=0.2366, ctc_loss=0.1354, cr_loss=0.3664, attn_decoder_loss=0.2398, over 29536.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1493, cr_loss=0.3904, attn_decoder_loss=0.2563, over 5808899.22 frames. ], batch size: 76, lr: 7.46e-03, grad_scale: 16.0 +2024-09-17 17:03:05,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=263000.0, ans=0.025 +2024-09-17 17:03:08,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=263040.0, ans=0.125 +2024-09-17 17:03:14,659 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:03:46,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=263120.0, ans=0.1 +2024-09-17 17:04:05,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=263160.0, ans=0.0 +2024-09-17 17:04:11,101 INFO [train.py:1198] (1/2) Epoch 15, batch 2450, loss[loss=0.2625, ctc_loss=0.1553, cr_loss=0.3977, attn_decoder_loss=0.2656, over 29711.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.15, cr_loss=0.391, attn_decoder_loss=0.2574, over 5785786.05 frames. ], batch size: 82, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:04:17,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=263200.0, ans=0.125 +2024-09-17 17:04:21,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=263200.0, ans=0.125 +2024-09-17 17:04:35,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=263240.0, ans=0.125 +2024-09-17 17:04:46,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=263280.0, ans=0.125 +2024-09-17 17:04:46,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=263280.0, ans=0.125 +2024-09-17 17:04:49,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=263280.0, ans=0.125 +2024-09-17 17:04:57,814 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 9.083e+01 9.931e+01 1.099e+02 3.144e+02, threshold=1.986e+02, percent-clipped=3.0 +2024-09-17 17:04:59,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=263320.0, ans=0.125 +2024-09-17 17:05:26,722 INFO [train.py:1198] (1/2) Epoch 15, batch 2500, loss[loss=0.2606, ctc_loss=0.1504, cr_loss=0.3914, attn_decoder_loss=0.2642, over 29640.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1501, cr_loss=0.3909, attn_decoder_loss=0.2574, over 5795177.13 frames. ], batch size: 86, lr: 7.46e-03, grad_scale: 8.0 +2024-09-17 17:05:39,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=263400.0, ans=0.0 +2024-09-17 17:05:58,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=263440.0, ans=0.2 +2024-09-17 17:06:18,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.34 vs. limit=22.5 +2024-09-17 17:06:20,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=263520.0, ans=0.125 +2024-09-17 17:06:31,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=263560.0, ans=10.0 +2024-09-17 17:06:32,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=263560.0, ans=0.0 +2024-09-17 17:06:44,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=263560.0, ans=0.125 +2024-09-17 17:06:47,416 INFO [train.py:1198] (1/2) Epoch 15, batch 2550, loss[loss=0.2283, ctc_loss=0.1296, cr_loss=0.3717, attn_decoder_loss=0.231, over 29362.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1499, cr_loss=0.3911, attn_decoder_loss=0.2571, over 5797897.26 frames. ], batch size: 67, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:06:53,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=263600.0, ans=0.025 +2024-09-17 17:07:11,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=263640.0, ans=0.125 +2024-09-17 17:07:22,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=263680.0, ans=0.125 +2024-09-17 17:07:34,222 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.892e+01 9.357e+01 1.015e+02 2.489e+02, threshold=1.871e+02, percent-clipped=2.0 +2024-09-17 17:07:34,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=263720.0, ans=0.125 +2024-09-17 17:07:42,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=263720.0, ans=0.125 +2024-09-17 17:07:43,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=263720.0, ans=0.125 +2024-09-17 17:08:02,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=263800.0, ans=0.125 +2024-09-17 17:08:03,390 INFO [train.py:1198] (1/2) Epoch 15, batch 2600, loss[loss=0.2528, ctc_loss=0.1533, cr_loss=0.3976, attn_decoder_loss=0.255, over 29431.00 frames. ], tot_loss[loss=0.2545, ctc_loss=0.1501, cr_loss=0.3914, attn_decoder_loss=0.2574, over 5794458.39 frames. ], batch size: 78, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:08:06,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=263800.0, ans=0.0 +2024-09-17 17:08:06,836 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:08:12,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.47 vs. limit=10.0 +2024-09-17 17:08:20,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=263840.0, ans=0.1 +2024-09-17 17:08:22,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.30 vs. limit=10.0 +2024-09-17 17:08:28,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=263840.0, ans=0.0 +2024-09-17 17:08:50,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=263920.0, ans=0.0 +2024-09-17 17:08:50,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=263920.0, ans=0.2 +2024-09-17 17:08:57,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=263920.0, ans=0.125 +2024-09-17 17:09:15,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=263960.0, ans=0.0 +2024-09-17 17:09:18,683 INFO [train.py:1198] (1/2) Epoch 15, batch 2650, loss[loss=0.2677, ctc_loss=0.1613, cr_loss=0.4111, attn_decoder_loss=0.2704, over 29258.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1506, cr_loss=0.3924, attn_decoder_loss=0.2578, over 5801027.78 frames. ], batch size: 100, lr: 7.45e-03, grad_scale: 8.0 +2024-09-17 17:09:19,143 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:09:33,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=264000.0, ans=0.125 +2024-09-17 17:09:51,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=264080.0, ans=0.125 +2024-09-17 17:10:02,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=264080.0, ans=0.0 +2024-09-17 17:10:06,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=264120.0, ans=0.2 +2024-09-17 17:10:09,779 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 9.042e+01 9.386e+01 1.019e+02 2.005e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 17:10:22,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=264160.0, ans=0.125 +2024-09-17 17:10:32,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.56 vs. limit=15.0 +2024-09-17 17:10:36,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=264160.0, ans=0.1 +2024-09-17 17:10:38,717 INFO [train.py:1198] (1/2) Epoch 15, batch 2700, loss[loss=0.2459, ctc_loss=0.132, cr_loss=0.3645, attn_decoder_loss=0.2505, over 29515.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1508, cr_loss=0.392, attn_decoder_loss=0.258, over 5797103.84 frames. ], batch size: 87, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:10:39,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=264200.0, ans=0.1 +2024-09-17 17:10:49,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=264200.0, ans=0.0 +2024-09-17 17:10:52,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=264240.0, ans=0.0 +2024-09-17 17:11:06,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=264240.0, ans=0.0 +2024-09-17 17:11:15,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=264280.0, ans=0.125 +2024-09-17 17:11:20,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=264280.0, ans=0.125 +2024-09-17 17:11:23,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=264320.0, ans=0.025 +2024-09-17 17:11:30,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=264320.0, ans=0.2 +2024-09-17 17:11:39,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=264360.0, ans=0.1 +2024-09-17 17:11:55,010 INFO [train.py:1198] (1/2) Epoch 15, batch 2750, loss[loss=0.2483, ctc_loss=0.144, cr_loss=0.368, attn_decoder_loss=0.2517, over 29524.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1496, cr_loss=0.3897, attn_decoder_loss=0.2566, over 5794608.65 frames. ], batch size: 75, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:12:07,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=264400.0, ans=0.125 +2024-09-17 17:12:21,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=264440.0, ans=0.035 +2024-09-17 17:12:24,129 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:12:33,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=264480.0, ans=0.0 +2024-09-17 17:12:41,847 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.996e+01 9.846e+01 1.075e+02 1.941e+02, threshold=1.969e+02, percent-clipped=1.0 +2024-09-17 17:13:13,288 INFO [train.py:1198] (1/2) Epoch 15, batch 2800, loss[loss=0.2827, ctc_loss=0.2015, cr_loss=0.411, attn_decoder_loss=0.2826, over 20405.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1504, cr_loss=0.3908, attn_decoder_loss=0.2572, over 5776481.99 frames. ], batch size: 210, lr: 7.44e-03, grad_scale: 16.0 +2024-09-17 17:13:42,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.73 vs. limit=22.5 +2024-09-17 17:13:45,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=264680.0, ans=0.05 +2024-09-17 17:14:31,447 INFO [train.py:1198] (1/2) Epoch 15, batch 2850, loss[loss=0.2379, ctc_loss=0.1409, cr_loss=0.401, attn_decoder_loss=0.2398, over 29519.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1512, cr_loss=0.3918, attn_decoder_loss=0.2577, over 5762308.63 frames. ], batch size: 77, lr: 7.44e-03, grad_scale: 8.0 +2024-09-17 17:14:51,483 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:15:06,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=264880.0, ans=0.0 +2024-09-17 17:15:09,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=264880.0, ans=0.1 +2024-09-17 17:15:20,057 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.176e+01 9.061e+01 9.943e+01 1.094e+02 2.532e+02, threshold=1.989e+02, percent-clipped=2.0 +2024-09-17 17:15:23,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=264920.0, ans=0.1 +2024-09-17 17:15:24,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=264920.0, ans=0.125 +2024-09-17 17:15:40,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=264960.0, ans=0.125 +2024-09-17 17:15:47,577 INFO [train.py:1198] (1/2) Epoch 15, batch 2900, loss[loss=0.2564, ctc_loss=0.1417, cr_loss=0.3821, attn_decoder_loss=0.2607, over 29447.00 frames. ], tot_loss[loss=0.2562, ctc_loss=0.152, cr_loss=0.3942, attn_decoder_loss=0.259, over 5787670.87 frames. ], batch size: 79, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:15:53,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=265000.0, ans=0.025 +2024-09-17 17:16:25,343 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.36 vs. limit=15.0 +2024-09-17 17:16:35,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=265120.0, ans=0.2 +2024-09-17 17:17:05,973 INFO [train.py:1198] (1/2) Epoch 15, batch 2950, loss[loss=0.2378, ctc_loss=0.1398, cr_loss=0.3854, attn_decoder_loss=0.2401, over 29538.00 frames. ], tot_loss[loss=0.255, ctc_loss=0.1512, cr_loss=0.3924, attn_decoder_loss=0.2578, over 5783079.06 frames. ], batch size: 75, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:17:21,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.40 vs. limit=15.0 +2024-09-17 17:17:25,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=265240.0, ans=0.1 +2024-09-17 17:17:35,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.68 vs. limit=10.0 +2024-09-17 17:17:41,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=265280.0, ans=0.0 +2024-09-17 17:17:47,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=265280.0, ans=0.2 +2024-09-17 17:17:56,690 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.763e+01 8.981e+01 9.731e+01 1.093e+02 3.344e+02, threshold=1.946e+02, percent-clipped=1.0 +2024-09-17 17:18:12,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=265360.0, ans=0.125 +2024-09-17 17:18:24,069 INFO [train.py:1198] (1/2) Epoch 15, batch 3000, loss[loss=0.2522, ctc_loss=0.1478, cr_loss=0.4039, attn_decoder_loss=0.2548, over 29796.00 frames. ], tot_loss[loss=0.2546, ctc_loss=0.1507, cr_loss=0.3916, attn_decoder_loss=0.2574, over 5784782.27 frames. ], batch size: 81, lr: 7.43e-03, grad_scale: 8.0 +2024-09-17 17:18:24,069 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 17:18:31,763 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([3.2878, 2.8578, 2.1803, 2.8235, 2.6970, 1.6750, 2.1686, 2.6388], + device='cuda:1') +2024-09-17 17:18:41,439 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.6429, 4.6855, 4.1347, 2.4540], device='cuda:1') +2024-09-17 17:18:42,410 INFO [train.py:1230] (1/2) Epoch 15, validation: loss=0.2111, ctc_loss=0.04175, cr_loss=4.872e-15, attn_decoder_loss=0.23, over 944034.00 frames. +2024-09-17 17:18:42,410 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 17:18:44,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=265400.0, ans=0.0 +2024-09-17 17:19:10,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=265440.0, ans=0.2 +2024-09-17 17:19:22,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=265480.0, ans=0.1 +2024-09-17 17:19:48,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=265560.0, ans=0.125 +2024-09-17 17:19:58,991 INFO [train.py:1198] (1/2) Epoch 15, batch 3050, loss[loss=0.2466, ctc_loss=0.1421, cr_loss=0.3894, attn_decoder_loss=0.2496, over 29536.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1514, cr_loss=0.393, attn_decoder_loss=0.2583, over 5778017.30 frames. ], batch size: 76, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:20:17,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=265640.0, ans=0.125 +2024-09-17 17:20:49,578 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.921e+01 9.345e+01 1.016e+02 1.110e+02 2.723e+02, threshold=2.032e+02, percent-clipped=2.0 +2024-09-17 17:20:49,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=265720.0, ans=0.1 +2024-09-17 17:21:09,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=265760.0, ans=0.0 +2024-09-17 17:21:16,562 INFO [train.py:1198] (1/2) Epoch 15, batch 3100, loss[loss=0.2633, ctc_loss=0.1529, cr_loss=0.3978, attn_decoder_loss=0.2668, over 29280.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1507, cr_loss=0.3919, attn_decoder_loss=0.258, over 5778092.26 frames. ], batch size: 100, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:21:16,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=265800.0, ans=0.125 +2024-09-17 17:21:35,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=265840.0, ans=0.0 +2024-09-17 17:21:54,011 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.32 vs. limit=15.0 +2024-09-17 17:22:02,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=265920.0, ans=0.125 +2024-09-17 17:22:13,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=265920.0, ans=0.0 +2024-09-17 17:22:18,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=265960.0, ans=0.05 +2024-09-17 17:22:19,724 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:22:25,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=265960.0, ans=0.125 +2024-09-17 17:22:30,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=265960.0, ans=0.125 +2024-09-17 17:22:32,385 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.51 vs. limit=15.0 +2024-09-17 17:22:34,711 INFO [train.py:1198] (1/2) Epoch 15, batch 3150, loss[loss=0.2711, ctc_loss=0.1664, cr_loss=0.4112, attn_decoder_loss=0.2735, over 28882.00 frames. ], tot_loss[loss=0.2549, ctc_loss=0.1507, cr_loss=0.3922, attn_decoder_loss=0.2578, over 5785386.82 frames. ], batch size: 104, lr: 7.42e-03, grad_scale: 8.0 +2024-09-17 17:22:38,762 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.90 vs. limit=22.5 +2024-09-17 17:22:53,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=266040.0, ans=0.05 +2024-09-17 17:23:11,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=266080.0, ans=0.125 +2024-09-17 17:23:16,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=266080.0, ans=0.95 +2024-09-17 17:23:23,256 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.876e+01 9.219e+01 9.735e+01 3.011e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-17 17:23:50,661 INFO [train.py:1198] (1/2) Epoch 15, batch 3200, loss[loss=0.2519, ctc_loss=0.14, cr_loss=0.3765, attn_decoder_loss=0.2559, over 29401.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.1501, cr_loss=0.3915, attn_decoder_loss=0.257, over 5796023.00 frames. ], batch size: 79, lr: 7.42e-03, grad_scale: 16.0 +2024-09-17 17:23:57,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.42 vs. limit=15.0 +2024-09-17 17:24:14,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.08 vs. limit=22.5 +2024-09-17 17:24:15,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=266240.0, ans=0.125 +2024-09-17 17:24:26,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=266280.0, ans=0.2 +2024-09-17 17:24:27,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=266280.0, ans=0.125 +2024-09-17 17:24:31,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.60 vs. limit=12.0 +2024-09-17 17:24:43,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-17 17:24:46,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=266320.0, ans=0.1 +2024-09-17 17:24:48,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=266320.0, ans=0.125 +2024-09-17 17:25:07,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=266400.0, ans=0.025 +2024-09-17 17:25:09,282 INFO [train.py:1198] (1/2) Epoch 15, batch 3250, loss[loss=0.2576, ctc_loss=0.1463, cr_loss=0.3967, attn_decoder_loss=0.2612, over 29706.00 frames. ], tot_loss[loss=0.2541, ctc_loss=0.1497, cr_loss=0.3907, attn_decoder_loss=0.257, over 5803327.60 frames. ], batch size: 84, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:25:15,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=266400.0, ans=0.125 +2024-09-17 17:25:21,697 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:25:24,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=266440.0, ans=0.025 +2024-09-17 17:25:29,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-17 17:25:32,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=266440.0, ans=0.0 +2024-09-17 17:26:01,023 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.078e+01 8.753e+01 9.181e+01 1.001e+02 1.564e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-17 17:26:04,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=266520.0, ans=0.05 +2024-09-17 17:26:04,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=266520.0, ans=0.0 +2024-09-17 17:26:26,790 INFO [train.py:1198] (1/2) Epoch 15, batch 3300, loss[loss=0.2723, ctc_loss=0.1675, cr_loss=0.4181, attn_decoder_loss=0.2746, over 28357.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1489, cr_loss=0.3899, attn_decoder_loss=0.256, over 5799894.85 frames. ], batch size: 111, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:26:36,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=266600.0, ans=0.1 +2024-09-17 17:27:17,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=266720.0, ans=0.2 +2024-09-17 17:27:18,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=266720.0, ans=0.125 +2024-09-17 17:27:26,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=266760.0, ans=0.07 +2024-09-17 17:27:35,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=266760.0, ans=0.1 +2024-09-17 17:27:42,411 INFO [train.py:1198] (1/2) Epoch 15, batch 3350, loss[loss=0.2631, ctc_loss=0.1529, cr_loss=0.388, attn_decoder_loss=0.2667, over 28826.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1498, cr_loss=0.3912, attn_decoder_loss=0.2567, over 5774638.66 frames. ], batch size: 104, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:27:44,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=266800.0, ans=0.0 +2024-09-17 17:27:57,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.50 vs. limit=22.5 +2024-09-17 17:28:14,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=266880.0, ans=0.125 +2024-09-17 17:28:25,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=266880.0, ans=0.125 +2024-09-17 17:28:27,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=266880.0, ans=0.2 +2024-09-17 17:28:28,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=266920.0, ans=0.125 +2024-09-17 17:28:31,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.78 vs. limit=15.0 +2024-09-17 17:28:34,699 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.002e+01 8.982e+01 9.740e+01 1.080e+02 2.374e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-17 17:28:41,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=266920.0, ans=0.125 +2024-09-17 17:28:59,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=267000.0, ans=0.0 +2024-09-17 17:29:00,483 INFO [train.py:1198] (1/2) Epoch 15, batch 3400, loss[loss=0.2293, ctc_loss=0.1345, cr_loss=0.3499, attn_decoder_loss=0.232, over 29345.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1502, cr_loss=0.3919, attn_decoder_loss=0.2568, over 5768003.61 frames. ], batch size: 67, lr: 7.41e-03, grad_scale: 8.0 +2024-09-17 17:29:03,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=267000.0, ans=0.125 +2024-09-17 17:29:05,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=267000.0, ans=0.0 +2024-09-17 17:29:05,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=267000.0, ans=0.0 +2024-09-17 17:29:08,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=267000.0, ans=0.125 +2024-09-17 17:29:19,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=267040.0, ans=0.025 +2024-09-17 17:29:27,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=267040.0, ans=0.125 +2024-09-17 17:29:27,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.54 vs. limit=15.0 +2024-09-17 17:29:33,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=267080.0, ans=0.125 +2024-09-17 17:29:33,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.76 vs. limit=15.0 +2024-09-17 17:29:45,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=267080.0, ans=0.125 +2024-09-17 17:30:18,824 INFO [train.py:1198] (1/2) Epoch 15, batch 3450, loss[loss=0.2651, ctc_loss=0.1472, cr_loss=0.3852, attn_decoder_loss=0.2697, over 28208.00 frames. ], tot_loss[loss=0.2544, ctc_loss=0.1502, cr_loss=0.3923, attn_decoder_loss=0.2572, over 5775577.85 frames. ], batch size: 111, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:30:25,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=267200.0, ans=0.2 +2024-09-17 17:30:37,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=267240.0, ans=0.125 +2024-09-17 17:30:40,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=267240.0, ans=0.04949747468305833 +2024-09-17 17:30:58,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=267280.0, ans=0.125 +2024-09-17 17:31:00,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.63 vs. limit=15.0 +2024-09-17 17:31:00,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.60 vs. limit=22.5 +2024-09-17 17:31:08,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.414e+01 8.765e+01 9.280e+01 9.883e+01 2.461e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-17 17:31:16,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=267320.0, ans=0.04949747468305833 +2024-09-17 17:31:34,402 INFO [train.py:1198] (1/2) Epoch 15, batch 3500, loss[loss=0.2301, ctc_loss=0.1268, cr_loss=0.3495, attn_decoder_loss=0.2338, over 29319.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1495, cr_loss=0.3916, attn_decoder_loss=0.2566, over 5777873.78 frames. ], batch size: 71, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:32:28,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=267520.0, ans=0.0 +2024-09-17 17:32:42,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=267560.0, ans=0.025 +2024-09-17 17:32:51,207 INFO [train.py:1198] (1/2) Epoch 15, batch 3550, loss[loss=0.2649, ctc_loss=0.1521, cr_loss=0.3994, attn_decoder_loss=0.2686, over 29698.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.1491, cr_loss=0.3911, attn_decoder_loss=0.2565, over 5784130.05 frames. ], batch size: 89, lr: 7.40e-03, grad_scale: 8.0 +2024-09-17 17:32:51,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=267600.0, ans=0.125 +2024-09-17 17:32:55,833 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:33:17,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=267640.0, ans=0.0 +2024-09-17 17:33:22,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=267680.0, ans=0.125 +2024-09-17 17:33:25,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=267680.0, ans=15.0 +2024-09-17 17:33:39,992 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.842e+01 9.279e+01 9.951e+01 4.838e+02, threshold=1.856e+02, percent-clipped=2.0 +2024-09-17 17:33:46,735 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.74 vs. limit=15.0 +2024-09-17 17:34:00,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=267760.0, ans=0.0 +2024-09-17 17:34:05,140 INFO [train.py:1198] (1/2) Epoch 15, batch 3600, loss[loss=0.249, ctc_loss=0.1496, cr_loss=0.4014, attn_decoder_loss=0.2512, over 29512.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1493, cr_loss=0.3913, attn_decoder_loss=0.2566, over 5792870.17 frames. ], batch size: 77, lr: 7.39e-03, grad_scale: 16.0 +2024-09-17 17:34:05,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=267800.0, ans=0.125 +2024-09-17 17:34:05,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=267800.0, ans=0.125 +2024-09-17 17:34:19,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=267800.0, ans=0.125 +2024-09-17 17:34:27,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=267840.0, ans=0.0 +2024-09-17 17:34:31,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=267840.0, ans=0.0 +2024-09-17 17:34:33,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=267840.0, ans=0.125 +2024-09-17 17:34:51,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=267920.0, ans=0.125 +2024-09-17 17:34:52,713 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:34:55,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=267920.0, ans=0.1 +2024-09-17 17:34:56,381 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=8.77 vs. limit=15.0 +2024-09-17 17:34:59,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.83 vs. limit=15.0 +2024-09-17 17:34:59,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=267920.0, ans=0.125 +2024-09-17 17:35:04,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=267920.0, ans=0.025 +2024-09-17 17:35:06,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=267960.0, ans=0.1 +2024-09-17 17:35:19,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.32 vs. limit=15.0 +2024-09-17 17:35:22,564 INFO [train.py:1198] (1/2) Epoch 15, batch 3650, loss[loss=0.2691, ctc_loss=0.1665, cr_loss=0.4098, attn_decoder_loss=0.2714, over 29537.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1483, cr_loss=0.3897, attn_decoder_loss=0.2558, over 5793449.42 frames. ], batch size: 90, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:35:41,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.38 vs. limit=22.5 +2024-09-17 17:35:44,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=268040.0, ans=6.0 +2024-09-17 17:35:45,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=268040.0, ans=0.0 +2024-09-17 17:35:47,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.87 vs. limit=15.0 +2024-09-17 17:35:49,811 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:35:51,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=268080.0, ans=0.0 +2024-09-17 17:35:51,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=268080.0, ans=0.1 +2024-09-17 17:36:13,524 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.685e+01 9.367e+01 9.867e+01 1.459e+02, threshold=1.873e+02, percent-clipped=0.0 +2024-09-17 17:36:37,447 INFO [train.py:1198] (1/2) Epoch 15, batch 3700, loss[loss=0.2612, ctc_loss=0.1534, cr_loss=0.4045, attn_decoder_loss=0.2642, over 29709.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1482, cr_loss=0.3895, attn_decoder_loss=0.2558, over 5802924.34 frames. ], batch size: 84, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:36:45,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=268200.0, ans=0.125 +2024-09-17 17:36:46,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=268200.0, ans=0.1 +2024-09-17 17:36:48,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=268200.0, ans=0.2 +2024-09-17 17:36:50,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.38 vs. limit=15.0 +2024-09-17 17:36:56,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=268240.0, ans=0.125 +2024-09-17 17:37:51,817 INFO [train.py:1198] (1/2) Epoch 15, batch 3750, loss[loss=0.2221, ctc_loss=0.1286, cr_loss=0.3575, attn_decoder_loss=0.2245, over 29345.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.148, cr_loss=0.3891, attn_decoder_loss=0.2557, over 5807485.15 frames. ], batch size: 67, lr: 7.39e-03, grad_scale: 8.0 +2024-09-17 17:37:57,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=15.0 +2024-09-17 17:38:00,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=268400.0, ans=0.0 +2024-09-17 17:38:20,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=268480.0, ans=0.0 +2024-09-17 17:38:21,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=268480.0, ans=0.125 +2024-09-17 17:38:24,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=268480.0, ans=0.1 +2024-09-17 17:38:42,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.742e+01 9.217e+01 9.952e+01 4.415e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-17 17:38:48,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=268520.0, ans=0.025 +2024-09-17 17:39:05,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=268560.0, ans=0.125 +2024-09-17 17:39:07,745 INFO [train.py:1198] (1/2) Epoch 15, batch 3800, loss[loss=0.2717, ctc_loss=0.1601, cr_loss=0.4358, attn_decoder_loss=0.2745, over 29638.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1475, cr_loss=0.3883, attn_decoder_loss=0.2553, over 5797180.02 frames. ], batch size: 86, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:39:10,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=268600.0, ans=0.025 +2024-09-17 17:39:24,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=268640.0, ans=0.125 +2024-09-17 17:39:32,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=268640.0, ans=0.0 +2024-09-17 17:39:32,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.29 vs. limit=15.0 +2024-09-17 17:39:40,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.18 vs. limit=22.5 +2024-09-17 17:39:50,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=268680.0, ans=0.0 +2024-09-17 17:40:24,549 INFO [train.py:1198] (1/2) Epoch 15, batch 3850, loss[loss=0.2713, ctc_loss=0.1514, cr_loss=0.3796, attn_decoder_loss=0.2762, over 29258.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1475, cr_loss=0.3883, attn_decoder_loss=0.2553, over 5812226.03 frames. ], batch size: 100, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:40:28,090 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.04 vs. limit=22.5 +2024-09-17 17:40:30,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=268800.0, ans=0.125 +2024-09-17 17:40:51,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=268840.0, ans=0.1 +2024-09-17 17:40:59,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=268880.0, ans=0.07 +2024-09-17 17:40:59,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=268880.0, ans=0.025 +2024-09-17 17:41:16,760 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 9.057e+01 9.751e+01 1.063e+02 2.027e+02, threshold=1.950e+02, percent-clipped=1.0 +2024-09-17 17:41:20,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=268920.0, ans=0.0 +2024-09-17 17:41:21,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=268920.0, ans=0.0 +2024-09-17 17:41:22,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=268960.0, ans=0.125 +2024-09-17 17:41:36,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=268960.0, ans=0.025 +2024-09-17 17:41:38,870 INFO [train.py:1198] (1/2) Epoch 15, batch 3900, loss[loss=0.2643, ctc_loss=0.1572, cr_loss=0.4002, attn_decoder_loss=0.2673, over 29618.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1482, cr_loss=0.3899, attn_decoder_loss=0.2563, over 5816934.54 frames. ], batch size: 86, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:41:52,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=269040.0, ans=0.1 +2024-09-17 17:41:53,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=269040.0, ans=0.125 +2024-09-17 17:42:11,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=269080.0, ans=0.0 +2024-09-17 17:42:16,385 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.88 vs. limit=15.0 +2024-09-17 17:42:52,609 INFO [train.py:1198] (1/2) Epoch 15, batch 3950, loss[loss=0.2699, ctc_loss=0.1636, cr_loss=0.4106, attn_decoder_loss=0.2726, over 29505.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1481, cr_loss=0.3897, attn_decoder_loss=0.2561, over 5836333.29 frames. ], batch size: 97, lr: 7.38e-03, grad_scale: 8.0 +2024-09-17 17:42:54,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=269200.0, ans=0.0 +2024-09-17 17:43:05,442 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.75 vs. limit=15.0 +2024-09-17 17:43:09,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.04 vs. limit=10.0 +2024-09-17 17:43:32,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=269280.0, ans=0.125 +2024-09-17 17:43:34,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=269280.0, ans=0.1 +2024-09-17 17:43:44,311 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 9.088e+01 9.576e+01 1.054e+02 2.878e+02, threshold=1.915e+02, percent-clipped=1.0 +2024-09-17 17:43:45,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=12.0 +2024-09-17 17:43:47,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=269320.0, ans=0.0 +2024-09-17 17:43:53,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=269360.0, ans=0.5 +2024-09-17 17:44:07,777 INFO [train.py:1198] (1/2) Epoch 15, batch 4000, loss[loss=0.2317, ctc_loss=0.1313, cr_loss=0.3675, attn_decoder_loss=0.2347, over 29521.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1482, cr_loss=0.389, attn_decoder_loss=0.2558, over 5813111.55 frames. ], batch size: 74, lr: 7.37e-03, grad_scale: 16.0 +2024-09-17 17:44:21,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=269440.0, ans=0.125 +2024-09-17 17:44:23,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.52 vs. limit=15.0 +2024-09-17 17:45:13,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=269560.0, ans=0.125 +2024-09-17 17:45:22,408 INFO [train.py:1198] (1/2) Epoch 15, batch 4050, loss[loss=0.3008, ctc_loss=0.2226, cr_loss=0.4265, attn_decoder_loss=0.3, over 19355.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1485, cr_loss=0.3889, attn_decoder_loss=0.256, over 5796234.00 frames. ], batch size: 209, lr: 7.37e-03, grad_scale: 8.0 +2024-09-17 17:45:59,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=269680.0, ans=0.125 +2024-09-17 17:46:16,371 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 8.913e+01 9.474e+01 1.030e+02 4.406e+02, threshold=1.895e+02, percent-clipped=2.0 +2024-09-17 17:46:25,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=269760.0, ans=0.1 +2024-09-17 17:46:35,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=269800.0, ans=0.2 +2024-09-17 17:46:37,086 INFO [train.py:1198] (1/2) Epoch 15, batch 4100, loss[loss=0.2712, ctc_loss=0.1641, cr_loss=0.4312, attn_decoder_loss=0.2735, over 29495.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1491, cr_loss=0.3896, attn_decoder_loss=0.2563, over 5791378.60 frames. ], batch size: 90, lr: 7.37e-03, grad_scale: 8.0 +2024-09-17 17:46:40,683 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.59 vs. limit=22.5 +2024-09-17 17:47:02,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=269840.0, ans=0.125 +2024-09-17 17:47:08,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=269880.0, ans=0.125 +2024-09-17 17:47:14,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-17 17:47:15,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=269880.0, ans=0.125 +2024-09-17 17:47:15,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=269880.0, ans=0.125 +2024-09-17 17:47:23,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=269920.0, ans=0.125 +2024-09-17 17:47:45,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.50 vs. limit=15.0 +2024-09-17 17:47:50,741 INFO [train.py:1198] (1/2) Epoch 15, batch 4150, loss[loss=0.2573, ctc_loss=0.1541, cr_loss=0.4002, attn_decoder_loss=0.2599, over 29477.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1488, cr_loss=0.3893, attn_decoder_loss=0.2561, over 5796669.16 frames. ], batch size: 77, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:48:05,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=270040.0, ans=0.0 +2024-09-17 17:48:21,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.46 vs. limit=22.5 +2024-09-17 17:48:35,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=270120.0, ans=0.0 +2024-09-17 17:48:42,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=270120.0, ans=0.1 +2024-09-17 17:48:45,085 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.781e+01 9.267e+01 9.931e+01 2.534e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-17 17:48:46,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=270120.0, ans=0.2 +2024-09-17 17:48:58,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=270160.0, ans=0.125 +2024-09-17 17:49:06,149 INFO [train.py:1198] (1/2) Epoch 15, batch 4200, loss[loss=0.27, ctc_loss=0.1646, cr_loss=0.4262, attn_decoder_loss=0.2722, over 29486.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1488, cr_loss=0.3894, attn_decoder_loss=0.2562, over 5797682.93 frames. ], batch size: 90, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:49:15,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=270200.0, ans=0.2 +2024-09-17 17:49:15,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=270200.0, ans=0.0 +2024-09-17 17:49:22,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=270240.0, ans=0.125 +2024-09-17 17:49:38,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=270280.0, ans=0.2 +2024-09-17 17:49:55,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=270320.0, ans=0.025 +2024-09-17 17:50:06,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=270360.0, ans=0.025 +2024-09-17 17:50:21,302 INFO [train.py:1198] (1/2) Epoch 15, batch 4250, loss[loss=0.2344, ctc_loss=0.1311, cr_loss=0.3534, attn_decoder_loss=0.238, over 29514.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1484, cr_loss=0.3889, attn_decoder_loss=0.2564, over 5803571.93 frames. ], batch size: 74, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:50:24,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=270400.0, ans=0.125 +2024-09-17 17:50:31,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=270400.0, ans=0.2 +2024-09-17 17:50:46,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=270440.0, ans=0.04949747468305833 +2024-09-17 17:50:55,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=270480.0, ans=0.125 +2024-09-17 17:50:59,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=270480.0, ans=0.125 +2024-09-17 17:51:11,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=270520.0, ans=0.125 +2024-09-17 17:51:14,266 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.903e+01 9.469e+01 1.020e+02 2.237e+02, threshold=1.894e+02, percent-clipped=2.0 +2024-09-17 17:51:19,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=270560.0, ans=0.0 +2024-09-17 17:51:35,161 INFO [train.py:1198] (1/2) Epoch 15, batch 4300, loss[loss=0.2573, ctc_loss=0.1443, cr_loss=0.3775, attn_decoder_loss=0.2615, over 29549.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1486, cr_loss=0.3888, attn_decoder_loss=0.2566, over 5793885.01 frames. ], batch size: 87, lr: 7.36e-03, grad_scale: 8.0 +2024-09-17 17:51:55,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=270640.0, ans=0.125 +2024-09-17 17:52:03,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=270640.0, ans=0.125 +2024-09-17 17:52:27,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=270720.0, ans=0.125 +2024-09-17 17:52:32,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=270720.0, ans=0.1 +2024-09-17 17:52:40,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.42 vs. limit=22.5 +2024-09-17 17:52:50,156 INFO [train.py:1198] (1/2) Epoch 15, batch 4350, loss[loss=0.2618, ctc_loss=0.1615, cr_loss=0.3952, attn_decoder_loss=0.2642, over 29471.00 frames. ], tot_loss[loss=0.257, ctc_loss=0.1516, cr_loss=0.3938, attn_decoder_loss=0.2599, over 5796545.41 frames. ], batch size: 97, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:53:01,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=270800.0, ans=0.04949747468305833 +2024-09-17 17:53:06,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=270840.0, ans=0.025 +2024-09-17 17:53:10,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=270840.0, ans=0.125 +2024-09-17 17:53:16,695 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.05 vs. limit=15.0 +2024-09-17 17:53:18,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=270880.0, ans=0.0 +2024-09-17 17:53:26,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=270880.0, ans=0.125 +2024-09-17 17:53:27,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=270880.0, ans=0.125 +2024-09-17 17:53:43,655 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.462e+01 9.074e+01 9.575e+01 1.004e+02 1.676e+02, threshold=1.915e+02, percent-clipped=0.0 +2024-09-17 17:54:03,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=271000.0, ans=0.025 +2024-09-17 17:54:04,374 INFO [train.py:1198] (1/2) Epoch 15, batch 4400, loss[loss=0.2705, ctc_loss=0.1694, cr_loss=0.4379, attn_decoder_loss=0.272, over 27182.00 frames. ], tot_loss[loss=0.2593, ctc_loss=0.1535, cr_loss=0.3967, attn_decoder_loss=0.2623, over 5767137.01 frames. ], batch size: 124, lr: 7.35e-03, grad_scale: 16.0 +2024-09-17 17:55:18,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=271200.0, ans=0.125 +2024-09-17 17:55:19,873 INFO [train.py:1198] (1/2) Epoch 15, batch 4450, loss[loss=0.2816, ctc_loss=0.1984, cr_loss=0.4188, attn_decoder_loss=0.2816, over 19305.00 frames. ], tot_loss[loss=0.2622, ctc_loss=0.1585, cr_loss=0.4017, attn_decoder_loss=0.2648, over 5570144.65 frames. ], batch size: 209, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:55:20,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=271200.0, ans=0.2 +2024-09-17 17:55:25,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.73 vs. limit=22.5 +2024-09-17 17:55:30,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=271200.0, ans=0.125 +2024-09-17 17:55:45,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=271240.0, ans=0.125 +2024-09-17 17:55:51,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.47 vs. limit=15.0 +2024-09-17 17:55:53,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=271280.0, ans=0.2 +2024-09-17 17:56:09,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.61 vs. limit=15.0 +2024-09-17 17:56:17,153 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.262e+01 9.506e+01 1.069e+02 1.178e+02 1.981e+02, threshold=2.138e+02, percent-clipped=1.0 +2024-09-17 17:56:19,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.40 vs. limit=10.0 +2024-09-17 17:56:30,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=271360.0, ans=0.125 +2024-09-17 17:56:35,250 INFO [train.py:1198] (1/2) Epoch 15, batch 4500, loss[loss=0.2832, ctc_loss=0.1905, cr_loss=0.4282, attn_decoder_loss=0.284, over 20377.00 frames. ], tot_loss[loss=0.2654, ctc_loss=0.1643, cr_loss=0.4043, attn_decoder_loss=0.2677, over 5229556.26 frames. ], batch size: 209, lr: 7.35e-03, grad_scale: 8.0 +2024-09-17 17:56:38,645 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:56:42,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=271400.0, ans=0.125 +2024-09-17 17:56:43,618 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.90 vs. limit=15.0 +2024-09-17 17:56:46,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=271400.0, ans=0.2 +2024-09-17 17:58:05,196 INFO [train.py:1198] (1/2) Epoch 16, batch 0, loss[loss=0.2358, ctc_loss=0.136, cr_loss=0.3633, attn_decoder_loss=0.2388, over 29605.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.136, cr_loss=0.3633, attn_decoder_loss=0.2388, over 29605.00 frames. ], batch size: 73, lr: 7.11e-03, grad_scale: 16.0 +2024-09-17 17:58:05,196 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 17:58:11,303 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([4.6454, 4.1344, 3.7429, 4.4286, 3.5211, 3.6254, 3.7390, 3.7935], + device='cuda:1') +2024-09-17 17:58:23,630 INFO [train.py:1230] (1/2) Epoch 16, validation: loss=0.2124, ctc_loss=0.04089, cr_loss=4.638e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-17 17:58:23,630 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 17:58:29,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=271500.0, ans=0.125 +2024-09-17 17:58:29,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=271500.0, ans=0.125 +2024-09-17 17:58:34,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=7.48 vs. limit=12.0 +2024-09-17 17:59:08,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=271620.0, ans=0.125 +2024-09-17 17:59:13,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=271620.0, ans=0.025 +2024-09-17 17:59:16,653 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 17:59:19,711 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.32 vs. limit=15.0 +2024-09-17 17:59:19,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.22 vs. limit=22.5 +2024-09-17 17:59:40,416 INFO [train.py:1198] (1/2) Epoch 16, batch 50, loss[loss=0.2203, ctc_loss=0.1201, cr_loss=0.3373, attn_decoder_loss=0.2239, over 29449.00 frames. ], tot_loss[loss=0.2552, ctc_loss=0.1521, cr_loss=0.3965, attn_decoder_loss=0.2578, over 1267704.93 frames. ], batch size: 70, lr: 7.11e-03, grad_scale: 8.0 +2024-09-17 17:59:54,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=271740.0, ans=0.125 +2024-09-17 18:00:01,933 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.262e+01 1.005e+02 1.104e+02 1.206e+02 4.510e+02, threshold=2.208e+02, percent-clipped=2.0 +2024-09-17 18:00:19,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=271780.0, ans=15.0 +2024-09-17 18:00:50,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=271860.0, ans=0.125 +2024-09-17 18:00:56,280 INFO [train.py:1198] (1/2) Epoch 16, batch 100, loss[loss=0.2452, ctc_loss=0.1436, cr_loss=0.3803, attn_decoder_loss=0.2481, over 29542.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1527, cr_loss=0.3964, attn_decoder_loss=0.2603, over 2252277.18 frames. ], batch size: 76, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:01:13,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.81 vs. limit=22.5 +2024-09-17 18:01:14,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=271940.0, ans=0.0 +2024-09-17 18:01:26,498 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.26 vs. limit=15.0 +2024-09-17 18:01:33,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=271980.0, ans=0.125 +2024-09-17 18:01:57,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=272020.0, ans=0.0 +2024-09-17 18:02:02,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=272020.0, ans=0.1 +2024-09-17 18:02:18,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=272060.0, ans=0.0 +2024-09-17 18:02:21,066 INFO [train.py:1198] (1/2) Epoch 16, batch 150, loss[loss=0.2316, ctc_loss=0.1353, cr_loss=0.3637, attn_decoder_loss=0.2343, over 29432.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1494, cr_loss=0.3926, attn_decoder_loss=0.2569, over 3048296.68 frames. ], batch size: 70, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:02:23,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.81 vs. limit=6.0 +2024-09-17 18:02:35,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=272140.0, ans=0.125 +2024-09-17 18:02:40,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.23 vs. limit=15.0 +2024-09-17 18:02:42,307 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.742e+01 8.615e+01 9.462e+01 1.007e+02 3.571e+02, threshold=1.892e+02, percent-clipped=1.0 +2024-09-17 18:02:43,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.84 vs. limit=15.0 +2024-09-17 18:02:52,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=272180.0, ans=0.125 +2024-09-17 18:02:56,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=272180.0, ans=0.125 +2024-09-17 18:03:17,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=272220.0, ans=0.125 +2024-09-17 18:03:32,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=272260.0, ans=0.025 +2024-09-17 18:03:38,603 INFO [train.py:1198] (1/2) Epoch 16, batch 200, loss[loss=0.271, ctc_loss=0.1639, cr_loss=0.4228, attn_decoder_loss=0.2735, over 27236.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1485, cr_loss=0.3906, attn_decoder_loss=0.2556, over 3658982.98 frames. ], batch size: 124, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:04:54,434 INFO [train.py:1198] (1/2) Epoch 16, batch 250, loss[loss=0.2749, ctc_loss=0.164, cr_loss=0.4137, attn_decoder_loss=0.278, over 29206.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1485, cr_loss=0.3912, attn_decoder_loss=0.2562, over 4141418.96 frames. ], batch size: 100, lr: 7.10e-03, grad_scale: 8.0 +2024-09-17 18:05:15,431 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.756e+01 8.691e+01 9.311e+01 9.688e+01 2.016e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-17 18:05:15,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=272540.0, ans=0.125 +2024-09-17 18:05:18,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=272540.0, ans=0.2 +2024-09-17 18:05:27,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.58 vs. limit=15.0 +2024-09-17 18:05:31,890 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.03 vs. limit=15.0 +2024-09-17 18:05:43,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=14.79 vs. limit=15.0 +2024-09-17 18:05:54,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=272620.0, ans=0.125 +2024-09-17 18:05:59,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.35 vs. limit=15.0 +2024-09-17 18:06:12,071 INFO [train.py:1198] (1/2) Epoch 16, batch 300, loss[loss=0.2719, ctc_loss=0.1555, cr_loss=0.4101, attn_decoder_loss=0.2757, over 29539.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1481, cr_loss=0.3908, attn_decoder_loss=0.256, over 4510126.14 frames. ], batch size: 92, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:06:12,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=272700.0, ans=0.025 +2024-09-17 18:06:35,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-17 18:06:52,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=272780.0, ans=0.2 +2024-09-17 18:07:12,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=272820.0, ans=0.125 +2024-09-17 18:07:30,517 INFO [train.py:1198] (1/2) Epoch 16, batch 350, loss[loss=0.2283, ctc_loss=0.1268, cr_loss=0.3353, attn_decoder_loss=0.2321, over 29310.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1482, cr_loss=0.3908, attn_decoder_loss=0.2563, over 4796195.94 frames. ], batch size: 71, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:07:33,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=272900.0, ans=0.2 +2024-09-17 18:07:51,716 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.443e+01 8.832e+01 9.583e+01 1.052e+02 2.461e+02, threshold=1.917e+02, percent-clipped=3.0 +2024-09-17 18:07:59,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=272980.0, ans=0.125 +2024-09-17 18:08:31,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=273060.0, ans=0.2 +2024-09-17 18:08:31,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=273060.0, ans=0.0 +2024-09-17 18:08:32,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=273060.0, ans=0.125 +2024-09-17 18:08:45,894 INFO [train.py:1198] (1/2) Epoch 16, batch 400, loss[loss=0.2667, ctc_loss=0.1578, cr_loss=0.407, attn_decoder_loss=0.2697, over 29721.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1478, cr_loss=0.39, attn_decoder_loss=0.2559, over 5025881.51 frames. ], batch size: 82, lr: 7.09e-03, grad_scale: 16.0 +2024-09-17 18:08:47,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=273100.0, ans=0.125 +2024-09-17 18:08:53,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=273100.0, ans=0.2 +2024-09-17 18:08:56,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=273100.0, ans=0.0 +2024-09-17 18:09:00,249 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.32 vs. limit=22.5 +2024-09-17 18:09:21,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=273180.0, ans=0.125 +2024-09-17 18:09:25,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=273180.0, ans=0.125 +2024-09-17 18:09:35,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=273220.0, ans=0.125 +2024-09-17 18:09:38,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=273220.0, ans=0.125 +2024-09-17 18:09:41,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=273220.0, ans=0.125 +2024-09-17 18:09:57,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=273260.0, ans=0.0 +2024-09-17 18:10:00,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=273260.0, ans=0.125 +2024-09-17 18:10:04,243 INFO [train.py:1198] (1/2) Epoch 16, batch 450, loss[loss=0.261, ctc_loss=0.1526, cr_loss=0.4124, attn_decoder_loss=0.2639, over 29707.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.148, cr_loss=0.3905, attn_decoder_loss=0.256, over 5187344.06 frames. ], batch size: 83, lr: 7.09e-03, grad_scale: 8.0 +2024-09-17 18:10:16,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=273300.0, ans=0.125 +2024-09-17 18:10:25,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=273340.0, ans=0.0 +2024-09-17 18:10:26,872 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.756e+01 9.412e+01 1.001e+02 2.554e+02, threshold=1.882e+02, percent-clipped=1.0 +2024-09-17 18:11:00,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=273420.0, ans=0.2 +2024-09-17 18:11:16,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=273460.0, ans=0.025 +2024-09-17 18:11:22,627 INFO [train.py:1198] (1/2) Epoch 16, batch 500, loss[loss=0.2733, ctc_loss=0.165, cr_loss=0.4468, attn_decoder_loss=0.2754, over 29362.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1472, cr_loss=0.3894, attn_decoder_loss=0.255, over 5329993.98 frames. ], batch size: 94, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:11:23,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.46 vs. limit=15.0 +2024-09-17 18:11:24,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=273500.0, ans=0.125 +2024-09-17 18:11:26,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=273500.0, ans=0.1 +2024-09-17 18:11:35,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=273500.0, ans=0.125 +2024-09-17 18:11:38,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=273540.0, ans=0.125 +2024-09-17 18:11:41,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.46 vs. limit=6.0 +2024-09-17 18:12:30,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.14 vs. limit=15.0 +2024-09-17 18:12:39,140 INFO [train.py:1198] (1/2) Epoch 16, batch 550, loss[loss=0.2708, ctc_loss=0.1583, cr_loss=0.405, attn_decoder_loss=0.2743, over 28834.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1471, cr_loss=0.3895, attn_decoder_loss=0.255, over 5421959.40 frames. ], batch size: 104, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:12:44,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=273700.0, ans=0.2 +2024-09-17 18:12:46,256 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.24 vs. limit=6.0 +2024-09-17 18:13:01,896 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.696e+01 8.801e+01 9.400e+01 1.011e+02 1.613e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 18:13:27,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=273820.0, ans=0.125 +2024-09-17 18:13:28,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=273820.0, ans=0.125 +2024-09-17 18:13:32,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.81 vs. limit=15.0 +2024-09-17 18:13:33,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=273820.0, ans=0.0 +2024-09-17 18:13:57,168 INFO [train.py:1198] (1/2) Epoch 16, batch 600, loss[loss=0.2582, ctc_loss=0.1462, cr_loss=0.3898, attn_decoder_loss=0.262, over 29286.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1468, cr_loss=0.3888, attn_decoder_loss=0.2548, over 5507978.34 frames. ], batch size: 100, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:14:32,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=273980.0, ans=0.125 +2024-09-17 18:14:54,718 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=12.0 +2024-09-17 18:15:02,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.17 vs. limit=15.0 +2024-09-17 18:15:04,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=274060.0, ans=0.1 +2024-09-17 18:15:15,059 INFO [train.py:1198] (1/2) Epoch 16, batch 650, loss[loss=0.2572, ctc_loss=0.1436, cr_loss=0.3726, attn_decoder_loss=0.2616, over 29738.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1463, cr_loss=0.3876, attn_decoder_loss=0.2543, over 5585326.45 frames. ], batch size: 81, lr: 7.08e-03, grad_scale: 8.0 +2024-09-17 18:15:33,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=274140.0, ans=0.0 +2024-09-17 18:15:37,737 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.963e+01 8.981e+01 9.378e+01 1.004e+02 1.703e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-17 18:16:00,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.75 vs. limit=15.0 +2024-09-17 18:16:06,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=274220.0, ans=0.0 +2024-09-17 18:16:13,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=274220.0, ans=0.0 +2024-09-17 18:16:22,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=274260.0, ans=0.1 +2024-09-17 18:16:25,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.00 vs. limit=22.5 +2024-09-17 18:16:30,940 INFO [train.py:1198] (1/2) Epoch 16, batch 700, loss[loss=0.234, ctc_loss=0.1327, cr_loss=0.3697, attn_decoder_loss=0.237, over 29516.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.147, cr_loss=0.3885, attn_decoder_loss=0.2551, over 5635656.08 frames. ], batch size: 76, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:16:38,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=274300.0, ans=0.0 +2024-09-17 18:16:46,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=274340.0, ans=0.1 +2024-09-17 18:16:46,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=274340.0, ans=0.125 +2024-09-17 18:16:46,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=274340.0, ans=0.0 +2024-09-17 18:16:47,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=274340.0, ans=0.125 +2024-09-17 18:16:51,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=274340.0, ans=0.025 +2024-09-17 18:16:56,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=274340.0, ans=0.2 +2024-09-17 18:16:59,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=274380.0, ans=0.0 +2024-09-17 18:17:03,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.24 vs. limit=10.0 +2024-09-17 18:17:18,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=274420.0, ans=0.125 +2024-09-17 18:17:23,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=274420.0, ans=0.0 +2024-09-17 18:17:35,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=274460.0, ans=0.125 +2024-09-17 18:17:49,225 INFO [train.py:1198] (1/2) Epoch 16, batch 750, loss[loss=0.2643, ctc_loss=0.1552, cr_loss=0.4181, attn_decoder_loss=0.2671, over 29700.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1471, cr_loss=0.3882, attn_decoder_loss=0.2549, over 5674925.14 frames. ], batch size: 82, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:17:50,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=274500.0, ans=0.2 +2024-09-17 18:18:11,564 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.869e+01 8.564e+01 9.225e+01 9.974e+01 3.199e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-17 18:18:17,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=274580.0, ans=0.1 +2024-09-17 18:18:30,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=274580.0, ans=0.1 +2024-09-17 18:18:40,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=274620.0, ans=0.05 +2024-09-17 18:18:50,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=274660.0, ans=0.0 +2024-09-17 18:19:06,929 INFO [train.py:1198] (1/2) Epoch 16, batch 800, loss[loss=0.2239, ctc_loss=0.1218, cr_loss=0.357, attn_decoder_loss=0.2273, over 29596.00 frames. ], tot_loss[loss=0.2519, ctc_loss=0.147, cr_loss=0.3884, attn_decoder_loss=0.2549, over 5705979.64 frames. ], batch size: 73, lr: 7.07e-03, grad_scale: 16.0 +2024-09-17 18:19:07,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.31 vs. limit=15.0 +2024-09-17 18:19:08,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=274700.0, ans=0.125 +2024-09-17 18:19:13,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=274700.0, ans=0.0 +2024-09-17 18:19:14,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=274700.0, ans=0.0 +2024-09-17 18:19:16,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=274700.0, ans=0.125 +2024-09-17 18:19:19,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=274700.0, ans=0.025 +2024-09-17 18:19:25,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=274740.0, ans=0.125 +2024-09-17 18:19:34,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.24 vs. limit=15.0 +2024-09-17 18:20:03,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.88 vs. limit=15.0 +2024-09-17 18:20:04,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=274820.0, ans=0.0 +2024-09-17 18:20:16,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=274860.0, ans=0.125 +2024-09-17 18:20:22,023 INFO [train.py:1198] (1/2) Epoch 16, batch 850, loss[loss=0.2617, ctc_loss=0.1451, cr_loss=0.3881, attn_decoder_loss=0.2661, over 29702.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1466, cr_loss=0.3878, attn_decoder_loss=0.2547, over 5735757.01 frames. ], batch size: 89, lr: 7.07e-03, grad_scale: 8.0 +2024-09-17 18:20:45,874 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.953e+01 9.515e+01 1.010e+02 2.580e+02, threshold=1.903e+02, percent-clipped=2.0 +2024-09-17 18:20:47,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=274940.0, ans=0.1 +2024-09-17 18:20:50,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=274980.0, ans=0.0 +2024-09-17 18:20:53,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=274980.0, ans=0.2 +2024-09-17 18:20:58,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=274980.0, ans=0.1 +2024-09-17 18:21:09,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=7.90 vs. limit=15.0 +2024-09-17 18:21:28,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=275060.0, ans=0.125 +2024-09-17 18:21:36,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.26 vs. limit=22.5 +2024-09-17 18:21:38,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=275100.0, ans=0.0 +2024-09-17 18:21:39,955 INFO [train.py:1198] (1/2) Epoch 16, batch 900, loss[loss=0.2281, ctc_loss=0.1263, cr_loss=0.3492, attn_decoder_loss=0.2316, over 29620.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1469, cr_loss=0.388, attn_decoder_loss=0.2551, over 5741851.29 frames. ], batch size: 73, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:21:43,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-17 18:21:47,777 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:21:53,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=275140.0, ans=0.125 +2024-09-17 18:21:55,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.86 vs. limit=10.0 +2024-09-17 18:22:01,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=275140.0, ans=0.025 +2024-09-17 18:22:18,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=275180.0, ans=0.125 +2024-09-17 18:22:41,082 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.62 vs. limit=15.0 +2024-09-17 18:22:57,939 INFO [train.py:1198] (1/2) Epoch 16, batch 950, loss[loss=0.2246, ctc_loss=0.1183, cr_loss=0.3251, attn_decoder_loss=0.2292, over 29496.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1472, cr_loss=0.3884, attn_decoder_loss=0.2554, over 5743120.89 frames. ], batch size: 74, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:23:13,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=275340.0, ans=0.0 +2024-09-17 18:23:19,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=275340.0, ans=0.0 +2024-09-17 18:23:21,952 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.870e+01 9.132e+01 9.762e+01 1.082e+02 2.725e+02, threshold=1.952e+02, percent-clipped=3.0 +2024-09-17 18:23:28,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=275380.0, ans=0.125 +2024-09-17 18:23:34,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=275380.0, ans=0.0 +2024-09-17 18:24:01,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=275460.0, ans=0.125 +2024-09-17 18:24:04,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=275460.0, ans=0.0 +2024-09-17 18:24:13,030 INFO [train.py:1198] (1/2) Epoch 16, batch 1000, loss[loss=0.2503, ctc_loss=0.1399, cr_loss=0.3793, attn_decoder_loss=0.2541, over 29490.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1483, cr_loss=0.39, attn_decoder_loss=0.2563, over 5736693.36 frames. ], batch size: 77, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:24:36,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=275540.0, ans=0.125 +2024-09-17 18:25:01,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.27 vs. limit=15.0 +2024-09-17 18:25:07,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=275620.0, ans=0.2 +2024-09-17 18:25:18,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=275660.0, ans=0.0 +2024-09-17 18:25:30,805 INFO [train.py:1198] (1/2) Epoch 16, batch 1050, loss[loss=0.2636, ctc_loss=0.1517, cr_loss=0.4101, attn_decoder_loss=0.2669, over 29678.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1477, cr_loss=0.3893, attn_decoder_loss=0.2557, over 5743916.50 frames. ], batch size: 85, lr: 7.06e-03, grad_scale: 8.0 +2024-09-17 18:25:55,380 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.705e+01 8.680e+01 9.093e+01 1.030e+02 1.882e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-17 18:26:01,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=275780.0, ans=0.125 +2024-09-17 18:26:30,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.30 vs. limit=22.5 +2024-09-17 18:26:49,382 INFO [train.py:1198] (1/2) Epoch 16, batch 1100, loss[loss=0.2403, ctc_loss=0.1449, cr_loss=0.3855, attn_decoder_loss=0.2423, over 29471.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1473, cr_loss=0.389, attn_decoder_loss=0.2554, over 5755359.28 frames. ], batch size: 78, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:27:13,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=275940.0, ans=0.2 +2024-09-17 18:27:35,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=276020.0, ans=0.1 +2024-09-17 18:27:38,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=276020.0, ans=0.0 +2024-09-17 18:27:41,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=276020.0, ans=0.05 +2024-09-17 18:28:05,488 INFO [train.py:1198] (1/2) Epoch 16, batch 1150, loss[loss=0.2458, ctc_loss=0.1395, cr_loss=0.3777, attn_decoder_loss=0.2492, over 29472.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1476, cr_loss=0.3892, attn_decoder_loss=0.2554, over 5754070.15 frames. ], batch size: 78, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:28:26,219 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.97 vs. limit=15.0 +2024-09-17 18:28:29,844 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.723e+01 8.744e+01 9.236e+01 1.006e+02 2.528e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-17 18:28:33,434 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:28:44,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=276180.0, ans=0.125 +2024-09-17 18:28:54,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=276220.0, ans=0.125 +2024-09-17 18:28:57,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=276220.0, ans=0.015 +2024-09-17 18:29:14,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=276260.0, ans=0.125 +2024-09-17 18:29:20,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=276260.0, ans=0.2 +2024-09-17 18:29:23,622 INFO [train.py:1198] (1/2) Epoch 16, batch 1200, loss[loss=0.2682, ctc_loss=0.1665, cr_loss=0.3976, attn_decoder_loss=0.2706, over 29681.00 frames. ], tot_loss[loss=0.2531, ctc_loss=0.1483, cr_loss=0.39, attn_decoder_loss=0.2561, over 5745707.65 frames. ], batch size: 85, lr: 7.05e-03, grad_scale: 16.0 +2024-09-17 18:29:24,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=12.0 +2024-09-17 18:29:49,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=276340.0, ans=0.0 +2024-09-17 18:30:00,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.06 vs. limit=15.0 +2024-09-17 18:30:19,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=276420.0, ans=0.125 +2024-09-17 18:30:19,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=276420.0, ans=0.1 +2024-09-17 18:30:23,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=276420.0, ans=0.125 +2024-09-17 18:30:41,712 INFO [train.py:1198] (1/2) Epoch 16, batch 1250, loss[loss=0.264, ctc_loss=0.155, cr_loss=0.4191, attn_decoder_loss=0.2668, over 29523.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1481, cr_loss=0.39, attn_decoder_loss=0.2565, over 5773844.06 frames. ], batch size: 92, lr: 7.05e-03, grad_scale: 8.0 +2024-09-17 18:30:52,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=276500.0, ans=0.125 +2024-09-17 18:31:07,629 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.801e+01 9.250e+01 9.945e+01 2.307e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-17 18:31:12,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=276580.0, ans=0.125 +2024-09-17 18:31:24,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=276580.0, ans=0.125 +2024-09-17 18:31:27,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=276620.0, ans=0.2 +2024-09-17 18:31:52,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=276660.0, ans=0.0 +2024-09-17 18:31:57,699 INFO [train.py:1198] (1/2) Epoch 16, batch 1300, loss[loss=0.263, ctc_loss=0.1512, cr_loss=0.3926, attn_decoder_loss=0.2667, over 28178.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1479, cr_loss=0.3897, attn_decoder_loss=0.2561, over 5779237.34 frames. ], batch size: 111, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:31:58,044 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:32:01,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=276700.0, ans=0.0 +2024-09-17 18:32:31,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=276780.0, ans=0.0 +2024-09-17 18:32:33,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.67 vs. limit=22.5 +2024-09-17 18:32:50,129 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:32:50,702 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.84 vs. limit=15.0 +2024-09-17 18:33:05,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=276860.0, ans=0.125 +2024-09-17 18:33:14,155 INFO [train.py:1198] (1/2) Epoch 16, batch 1350, loss[loss=0.2529, ctc_loss=0.1514, cr_loss=0.3895, attn_decoder_loss=0.2555, over 29767.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1469, cr_loss=0.3887, attn_decoder_loss=0.2554, over 5796497.26 frames. ], batch size: 81, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:33:15,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=276900.0, ans=0.0 +2024-09-17 18:33:41,945 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.515e+01 9.086e+01 9.689e+01 1.239e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-17 18:33:45,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=276980.0, ans=0.125 +2024-09-17 18:33:48,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=276980.0, ans=0.125 +2024-09-17 18:33:53,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.07 vs. limit=15.0 +2024-09-17 18:34:08,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=277020.0, ans=0.125 +2024-09-17 18:34:22,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=277060.0, ans=0.1 +2024-09-17 18:34:23,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=277060.0, ans=0.125 +2024-09-17 18:34:24,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.97 vs. limit=15.0 +2024-09-17 18:34:34,352 INFO [train.py:1198] (1/2) Epoch 16, batch 1400, loss[loss=0.2185, ctc_loss=0.1193, cr_loss=0.3512, attn_decoder_loss=0.2217, over 29622.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1466, cr_loss=0.3886, attn_decoder_loss=0.2552, over 5807424.80 frames. ], batch size: 69, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:34:34,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=277100.0, ans=0.1 +2024-09-17 18:34:44,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-17 18:35:18,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=277220.0, ans=0.0 +2024-09-17 18:35:31,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=277220.0, ans=0.125 +2024-09-17 18:35:36,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.74 vs. limit=12.0 +2024-09-17 18:35:49,911 INFO [train.py:1198] (1/2) Epoch 16, batch 1450, loss[loss=0.2671, ctc_loss=0.16, cr_loss=0.4134, attn_decoder_loss=0.2698, over 29439.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1471, cr_loss=0.3894, attn_decoder_loss=0.2559, over 5804962.48 frames. ], batch size: 94, lr: 7.04e-03, grad_scale: 8.0 +2024-09-17 18:35:51,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=277300.0, ans=0.125 +2024-09-17 18:35:54,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=277300.0, ans=0.1 +2024-09-17 18:35:58,440 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.18 vs. limit=12.0 +2024-09-17 18:36:01,550 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.88 vs. limit=10.0 +2024-09-17 18:36:13,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=277340.0, ans=0.1 +2024-09-17 18:36:15,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.64 vs. limit=15.0 +2024-09-17 18:36:15,714 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.712e+01 8.888e+01 9.569e+01 1.025e+02 2.533e+02, threshold=1.914e+02, percent-clipped=1.0 +2024-09-17 18:36:21,459 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-17 18:36:28,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=277380.0, ans=0.125 +2024-09-17 18:36:29,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=277380.0, ans=22.5 +2024-09-17 18:36:30,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.98 vs. limit=22.5 +2024-09-17 18:36:34,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=277420.0, ans=0.0 +2024-09-17 18:37:05,607 INFO [train.py:1198] (1/2) Epoch 16, batch 1500, loss[loss=0.2679, ctc_loss=0.1665, cr_loss=0.405, attn_decoder_loss=0.2702, over 29620.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1471, cr_loss=0.39, attn_decoder_loss=0.256, over 5806325.86 frames. ], batch size: 86, lr: 7.03e-03, grad_scale: 8.0 +2024-09-17 18:37:18,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=277500.0, ans=0.2 +2024-09-17 18:37:23,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=277540.0, ans=0.0 +2024-09-17 18:37:50,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=277580.0, ans=0.04949747468305833 +2024-09-17 18:37:56,658 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:38:13,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=277660.0, ans=0.0 +2024-09-17 18:38:13,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=277660.0, ans=0.1 +2024-09-17 18:38:18,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.08 vs. limit=22.5 +2024-09-17 18:38:26,892 INFO [train.py:1198] (1/2) Epoch 16, batch 1550, loss[loss=0.2769, ctc_loss=0.1685, cr_loss=0.4355, attn_decoder_loss=0.2793, over 29518.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.148, cr_loss=0.3906, attn_decoder_loss=0.2564, over 5781554.05 frames. ], batch size: 90, lr: 7.03e-03, grad_scale: 8.0 +2024-09-17 18:38:31,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=277700.0, ans=0.125 +2024-09-17 18:38:34,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=277700.0, ans=0.125 +2024-09-17 18:38:48,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.89 vs. limit=22.5 +2024-09-17 18:38:49,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=277740.0, ans=0.1 +2024-09-17 18:38:52,448 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.047e+01 8.973e+01 9.587e+01 1.017e+02 1.956e+02, threshold=1.917e+02, percent-clipped=1.0 +2024-09-17 18:39:03,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=277780.0, ans=0.125 +2024-09-17 18:39:15,405 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:39:21,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=277820.0, ans=0.0 +2024-09-17 18:39:23,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=277820.0, ans=0.1 +2024-09-17 18:39:29,005 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:39:42,132 INFO [train.py:1198] (1/2) Epoch 16, batch 1600, loss[loss=0.2601, ctc_loss=0.1492, cr_loss=0.3731, attn_decoder_loss=0.2641, over 29675.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1479, cr_loss=0.3894, attn_decoder_loss=0.256, over 5765326.46 frames. ], batch size: 85, lr: 7.03e-03, grad_scale: 16.0 +2024-09-17 18:39:49,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=277900.0, ans=0.125 +2024-09-17 18:40:05,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=277940.0, ans=0.2 +2024-09-17 18:40:11,248 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:40:16,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.26 vs. limit=15.0 +2024-09-17 18:40:26,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-17 18:40:30,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=278020.0, ans=0.0 +2024-09-17 18:40:36,880 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:40:40,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.90 vs. limit=15.0 +2024-09-17 18:40:57,568 INFO [train.py:1198] (1/2) Epoch 16, batch 1650, loss[loss=0.2627, ctc_loss=0.1479, cr_loss=0.3908, attn_decoder_loss=0.2667, over 29737.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.1478, cr_loss=0.3889, attn_decoder_loss=0.2559, over 5759843.85 frames. ], batch size: 89, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:41:06,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=278100.0, ans=0.125 +2024-09-17 18:41:14,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-17 18:41:19,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=278140.0, ans=0.0 +2024-09-17 18:41:26,853 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.637e+01 9.438e+01 1.013e+02 1.642e+02, threshold=1.888e+02, percent-clipped=0.0 +2024-09-17 18:41:46,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=278220.0, ans=0.125 +2024-09-17 18:41:50,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=278220.0, ans=0.2 +2024-09-17 18:42:01,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=278260.0, ans=0.1 +2024-09-17 18:42:13,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=278260.0, ans=0.0 +2024-09-17 18:42:14,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.26 vs. limit=15.0 +2024-09-17 18:42:17,546 INFO [train.py:1198] (1/2) Epoch 16, batch 1700, loss[loss=0.2176, ctc_loss=0.1197, cr_loss=0.3311, attn_decoder_loss=0.2212, over 29556.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1473, cr_loss=0.3882, attn_decoder_loss=0.2556, over 5781333.77 frames. ], batch size: 69, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:42:26,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=278300.0, ans=0.125 +2024-09-17 18:43:10,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=278420.0, ans=0.125 +2024-09-17 18:43:24,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=278460.0, ans=0.025 +2024-09-17 18:43:29,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=278460.0, ans=0.0 +2024-09-17 18:43:32,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.25 vs. limit=15.0 +2024-09-17 18:43:33,568 INFO [train.py:1198] (1/2) Epoch 16, batch 1750, loss[loss=0.2258, ctc_loss=0.1288, cr_loss=0.3537, attn_decoder_loss=0.2287, over 29292.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1466, cr_loss=0.3874, attn_decoder_loss=0.2551, over 5787408.81 frames. ], batch size: 67, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:44:00,899 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.179e+01 8.367e+01 8.955e+01 9.624e+01 1.381e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-17 18:44:29,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=278620.0, ans=0.0 +2024-09-17 18:44:49,085 INFO [train.py:1198] (1/2) Epoch 16, batch 1800, loss[loss=0.265, ctc_loss=0.1535, cr_loss=0.4126, attn_decoder_loss=0.2682, over 29674.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1472, cr_loss=0.3882, attn_decoder_loss=0.2555, over 5789023.18 frames. ], batch size: 83, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:44:53,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=278700.0, ans=0.125 +2024-09-17 18:45:30,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=278780.0, ans=0.125 +2024-09-17 18:45:44,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=278820.0, ans=0.125 +2024-09-17 18:46:07,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=278860.0, ans=0.0 +2024-09-17 18:46:09,877 INFO [train.py:1198] (1/2) Epoch 16, batch 1850, loss[loss=0.2629, ctc_loss=0.1487, cr_loss=0.4038, attn_decoder_loss=0.2666, over 29647.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1471, cr_loss=0.3883, attn_decoder_loss=0.2553, over 5795882.61 frames. ], batch size: 86, lr: 7.02e-03, grad_scale: 8.0 +2024-09-17 18:46:12,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.83 vs. limit=5.0 +2024-09-17 18:46:20,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.36 vs. limit=15.0 +2024-09-17 18:46:36,825 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.695e+01 8.719e+01 9.438e+01 1.018e+02 2.897e+02, threshold=1.888e+02, percent-clipped=1.0 +2024-09-17 18:46:40,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=278980.0, ans=0.125 +2024-09-17 18:46:41,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=278980.0, ans=0.125 +2024-09-17 18:47:08,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=279060.0, ans=0.125 +2024-09-17 18:47:09,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.16 vs. limit=22.5 +2024-09-17 18:47:11,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=279060.0, ans=0.0 +2024-09-17 18:47:17,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=279060.0, ans=0.125 +2024-09-17 18:47:24,895 INFO [train.py:1198] (1/2) Epoch 16, batch 1900, loss[loss=0.2596, ctc_loss=0.1512, cr_loss=0.396, attn_decoder_loss=0.2628, over 29712.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1473, cr_loss=0.3887, attn_decoder_loss=0.2557, over 5803711.86 frames. ], batch size: 89, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:47:38,964 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:47:44,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=279140.0, ans=0.025 +2024-09-17 18:47:51,695 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.00 vs. limit=22.5 +2024-09-17 18:47:52,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=279140.0, ans=0.125 +2024-09-17 18:47:54,435 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:48:01,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.35 vs. limit=15.0 +2024-09-17 18:48:08,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=279180.0, ans=0.125 +2024-09-17 18:48:10,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=279220.0, ans=0.125 +2024-09-17 18:48:27,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.17 vs. limit=15.0 +2024-09-17 18:48:41,359 INFO [train.py:1198] (1/2) Epoch 16, batch 1950, loss[loss=0.2397, ctc_loss=0.1322, cr_loss=0.3604, attn_decoder_loss=0.2436, over 29418.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.1475, cr_loss=0.3895, attn_decoder_loss=0.2567, over 5818614.95 frames. ], batch size: 78, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:48:55,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=279340.0, ans=0.2 +2024-09-17 18:49:08,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=279340.0, ans=0.125 +2024-09-17 18:49:11,013 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.858e+01 9.399e+01 1.005e+02 1.788e+02, threshold=1.880e+02, percent-clipped=1.0 +2024-09-17 18:49:27,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.47 vs. limit=15.0 +2024-09-17 18:49:30,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=279420.0, ans=0.125 +2024-09-17 18:49:32,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.32 vs. limit=15.0 +2024-09-17 18:49:37,821 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:49:51,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.70 vs. limit=22.5 +2024-09-17 18:49:54,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=279460.0, ans=0.05 +2024-09-17 18:50:00,612 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:50:01,812 INFO [train.py:1198] (1/2) Epoch 16, batch 2000, loss[loss=0.2349, ctc_loss=0.1407, cr_loss=0.3738, attn_decoder_loss=0.237, over 29336.00 frames. ], tot_loss[loss=0.2543, ctc_loss=0.1485, cr_loss=0.3906, attn_decoder_loss=0.2573, over 5795109.36 frames. ], batch size: 67, lr: 7.01e-03, grad_scale: 16.0 +2024-09-17 18:50:20,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=279540.0, ans=0.125 +2024-09-17 18:50:36,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.40 vs. limit=15.0 +2024-09-17 18:50:37,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.75 vs. limit=15.0 +2024-09-17 18:50:52,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=279620.0, ans=0.125 +2024-09-17 18:51:17,833 INFO [train.py:1198] (1/2) Epoch 16, batch 2050, loss[loss=0.2193, ctc_loss=0.1221, cr_loss=0.3362, attn_decoder_loss=0.2227, over 29413.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1477, cr_loss=0.3892, attn_decoder_loss=0.2562, over 5787449.11 frames. ], batch size: 70, lr: 7.01e-03, grad_scale: 8.0 +2024-09-17 18:51:19,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=279700.0, ans=0.1 +2024-09-17 18:51:21,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=279700.0, ans=0.125 +2024-09-17 18:51:27,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=279700.0, ans=0.125 +2024-09-17 18:51:46,760 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.240e+01 9.024e+01 1.001e+02 1.116e+02 1.891e+02, threshold=2.001e+02, percent-clipped=1.0 +2024-09-17 18:51:50,299 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:52:03,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=279820.0, ans=0.125 +2024-09-17 18:52:20,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.90 vs. limit=22.5 +2024-09-17 18:52:20,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.22 vs. limit=15.0 +2024-09-17 18:52:33,743 INFO [train.py:1198] (1/2) Epoch 16, batch 2100, loss[loss=0.2451, ctc_loss=0.132, cr_loss=0.3543, attn_decoder_loss=0.2498, over 29723.00 frames. ], tot_loss[loss=0.253, ctc_loss=0.1476, cr_loss=0.3893, attn_decoder_loss=0.256, over 5799744.69 frames. ], batch size: 81, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:52:37,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=279900.0, ans=0.125 +2024-09-17 18:52:42,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.44 vs. limit=15.0 +2024-09-17 18:52:46,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=279900.0, ans=0.125 +2024-09-17 18:53:47,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=280060.0, ans=0.0 +2024-09-17 18:53:48,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=280060.0, ans=0.125 +2024-09-17 18:53:53,808 INFO [train.py:1198] (1/2) Epoch 16, batch 2150, loss[loss=0.2547, ctc_loss=0.1531, cr_loss=0.4027, attn_decoder_loss=0.257, over 29456.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1468, cr_loss=0.3883, attn_decoder_loss=0.2553, over 5814794.54 frames. ], batch size: 78, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:53:55,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=280100.0, ans=0.125 +2024-09-17 18:54:08,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.41 vs. limit=6.0 +2024-09-17 18:54:12,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=280140.0, ans=0.125 +2024-09-17 18:54:22,950 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.825e+01 8.753e+01 9.321e+01 9.810e+01 1.786e+02, threshold=1.864e+02, percent-clipped=0.0 +2024-09-17 18:54:23,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=280180.0, ans=0.0 +2024-09-17 18:54:23,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=280180.0, ans=0.125 +2024-09-17 18:54:30,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.75 vs. limit=15.0 +2024-09-17 18:54:44,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=280220.0, ans=0.125 +2024-09-17 18:54:52,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=280220.0, ans=0.1 +2024-09-17 18:54:53,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=280260.0, ans=0.035 +2024-09-17 18:55:00,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.54 vs. limit=22.5 +2024-09-17 18:55:10,169 INFO [train.py:1198] (1/2) Epoch 16, batch 2200, loss[loss=0.2692, ctc_loss=0.1551, cr_loss=0.412, attn_decoder_loss=0.2728, over 29659.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1469, cr_loss=0.389, attn_decoder_loss=0.2553, over 5811188.11 frames. ], batch size: 86, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:55:47,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.07 vs. limit=6.0 +2024-09-17 18:55:49,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=280380.0, ans=0.125 +2024-09-17 18:56:11,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=280460.0, ans=0.125 +2024-09-17 18:56:16,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=280460.0, ans=0.0 +2024-09-17 18:56:24,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=280500.0, ans=0.125 +2024-09-17 18:56:25,748 INFO [train.py:1198] (1/2) Epoch 16, batch 2250, loss[loss=0.2626, ctc_loss=0.1524, cr_loss=0.3786, attn_decoder_loss=0.2665, over 29693.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1471, cr_loss=0.3892, attn_decoder_loss=0.2555, over 5812007.59 frames. ], batch size: 82, lr: 7.00e-03, grad_scale: 8.0 +2024-09-17 18:56:29,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=280500.0, ans=0.1 +2024-09-17 18:56:29,151 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 18:56:38,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=280500.0, ans=0.0 +2024-09-17 18:56:54,264 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.834e+01 9.325e+01 1.002e+02 2.125e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-17 18:57:41,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=280660.0, ans=0.125 +2024-09-17 18:57:45,349 INFO [train.py:1198] (1/2) Epoch 16, batch 2300, loss[loss=0.2209, ctc_loss=0.1238, cr_loss=0.3512, attn_decoder_loss=0.2239, over 29349.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1462, cr_loss=0.3873, attn_decoder_loss=0.2542, over 5800407.76 frames. ], batch size: 71, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 18:57:51,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=280700.0, ans=0.025 +2024-09-17 18:58:03,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=280740.0, ans=0.125 +2024-09-17 18:58:06,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=280740.0, ans=0.1 +2024-09-17 18:58:32,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=280820.0, ans=0.125 +2024-09-17 18:58:48,691 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.76 vs. limit=15.0 +2024-09-17 18:58:52,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=280860.0, ans=0.0 +2024-09-17 18:58:58,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=280860.0, ans=0.0 +2024-09-17 18:59:01,560 INFO [train.py:1198] (1/2) Epoch 16, batch 2350, loss[loss=0.2717, ctc_loss=0.1646, cr_loss=0.3891, attn_decoder_loss=0.275, over 29697.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1466, cr_loss=0.3879, attn_decoder_loss=0.2546, over 5805837.84 frames. ], batch size: 83, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 18:59:30,268 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.837e+01 9.442e+01 1.004e+02 6.270e+02, threshold=1.888e+02, percent-clipped=1.0 +2024-09-17 19:00:17,721 INFO [train.py:1198] (1/2) Epoch 16, batch 2400, loss[loss=0.2427, ctc_loss=0.1376, cr_loss=0.3628, attn_decoder_loss=0.2464, over 29521.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1463, cr_loss=0.388, attn_decoder_loss=0.2548, over 5808351.14 frames. ], batch size: 76, lr: 6.99e-03, grad_scale: 16.0 +2024-09-17 19:00:44,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=10.88 vs. limit=15.0 +2024-09-17 19:01:16,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=281220.0, ans=0.035 +2024-09-17 19:01:28,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=281260.0, ans=0.025 +2024-09-17 19:01:30,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=281260.0, ans=0.0 +2024-09-17 19:01:33,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.26 vs. limit=15.0 +2024-09-17 19:01:36,118 INFO [train.py:1198] (1/2) Epoch 16, batch 2450, loss[loss=0.2755, ctc_loss=0.1693, cr_loss=0.4428, attn_decoder_loss=0.2774, over 29743.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1472, cr_loss=0.3891, attn_decoder_loss=0.2559, over 5786707.30 frames. ], batch size: 82, lr: 6.99e-03, grad_scale: 8.0 +2024-09-17 19:01:37,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=281300.0, ans=0.1 +2024-09-17 19:01:45,533 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:01:52,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=281340.0, ans=0.125 +2024-09-17 19:02:02,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=281340.0, ans=0.0 +2024-09-17 19:02:06,408 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 9.367e+01 1.015e+02 1.200e+02 3.423e+02, threshold=2.029e+02, percent-clipped=2.0 +2024-09-17 19:02:15,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=281380.0, ans=0.0 +2024-09-17 19:02:28,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=281420.0, ans=0.2 +2024-09-17 19:02:35,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=281460.0, ans=0.0 +2024-09-17 19:02:50,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=281500.0, ans=0.1 +2024-09-17 19:02:51,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.60 vs. limit=22.5 +2024-09-17 19:02:51,879 INFO [train.py:1198] (1/2) Epoch 16, batch 2500, loss[loss=0.2538, ctc_loss=0.1397, cr_loss=0.3781, attn_decoder_loss=0.258, over 29626.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1469, cr_loss=0.3887, attn_decoder_loss=0.2559, over 5796636.57 frames. ], batch size: 86, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:02:58,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=281500.0, ans=0.1 +2024-09-17 19:03:13,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=281540.0, ans=0.0 +2024-09-17 19:03:24,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=281580.0, ans=0.1 +2024-09-17 19:03:24,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=281580.0, ans=0.0 +2024-09-17 19:03:33,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.89 vs. limit=12.0 +2024-09-17 19:03:45,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=281620.0, ans=0.125 +2024-09-17 19:03:54,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=281660.0, ans=0.125 +2024-09-17 19:03:56,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=281660.0, ans=0.125 +2024-09-17 19:04:08,345 INFO [train.py:1198] (1/2) Epoch 16, batch 2550, loss[loss=0.2141, ctc_loss=0.109, cr_loss=0.3275, attn_decoder_loss=0.2185, over 29316.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1467, cr_loss=0.3882, attn_decoder_loss=0.2556, over 5798965.11 frames. ], batch size: 67, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:04:37,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=281780.0, ans=0.0 +2024-09-17 19:04:38,194 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.18 vs. limit=5.0 +2024-09-17 19:04:38,401 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.661e+01 9.348e+01 1.013e+02 3.774e+02, threshold=1.870e+02, percent-clipped=2.0 +2024-09-17 19:05:02,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=281820.0, ans=0.0 +2024-09-17 19:05:07,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=281820.0, ans=0.125 +2024-09-17 19:05:13,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=281860.0, ans=0.0 +2024-09-17 19:05:28,338 INFO [train.py:1198] (1/2) Epoch 16, batch 2600, loss[loss=0.2565, ctc_loss=0.149, cr_loss=0.4052, attn_decoder_loss=0.2594, over 29437.00 frames. ], tot_loss[loss=0.2529, ctc_loss=0.147, cr_loss=0.3888, attn_decoder_loss=0.256, over 5795439.51 frames. ], batch size: 78, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:05:38,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=281900.0, ans=0.0 +2024-09-17 19:05:46,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=281940.0, ans=0.0 +2024-09-17 19:05:55,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=281940.0, ans=0.1 +2024-09-17 19:06:00,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=281980.0, ans=0.125 +2024-09-17 19:06:22,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=282020.0, ans=0.0 +2024-09-17 19:06:25,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=282020.0, ans=0.0 +2024-09-17 19:06:43,657 INFO [train.py:1198] (1/2) Epoch 16, batch 2650, loss[loss=0.282, ctc_loss=0.175, cr_loss=0.4406, attn_decoder_loss=0.2841, over 29319.00 frames. ], tot_loss[loss=0.2534, ctc_loss=0.1475, cr_loss=0.3898, attn_decoder_loss=0.2566, over 5802060.40 frames. ], batch size: 100, lr: 6.98e-03, grad_scale: 8.0 +2024-09-17 19:06:43,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=282100.0, ans=0.1 +2024-09-17 19:07:06,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=282140.0, ans=0.0 +2024-09-17 19:07:13,814 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.875e+01 8.834e+01 9.287e+01 9.746e+01 1.582e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-17 19:07:18,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=282180.0, ans=0.5 +2024-09-17 19:07:21,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=282180.0, ans=0.1 +2024-09-17 19:07:59,126 INFO [train.py:1198] (1/2) Epoch 16, batch 2700, loss[loss=0.2669, ctc_loss=0.1485, cr_loss=0.4093, attn_decoder_loss=0.2709, over 29534.00 frames. ], tot_loss[loss=0.2536, ctc_loss=0.1477, cr_loss=0.3903, attn_decoder_loss=0.2567, over 5798618.99 frames. ], batch size: 87, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:08:02,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=282300.0, ans=0.1 +2024-09-17 19:08:07,131 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:08:10,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=282300.0, ans=0.025 +2024-09-17 19:08:17,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=282340.0, ans=0.125 +2024-09-17 19:08:50,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=282420.0, ans=0.125 +2024-09-17 19:09:19,244 INFO [train.py:1198] (1/2) Epoch 16, batch 2750, loss[loss=0.2401, ctc_loss=0.1438, cr_loss=0.3812, attn_decoder_loss=0.2423, over 29513.00 frames. ], tot_loss[loss=0.2527, ctc_loss=0.1469, cr_loss=0.3888, attn_decoder_loss=0.2558, over 5796951.03 frames. ], batch size: 75, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:09:44,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.94 vs. limit=15.0 +2024-09-17 19:09:49,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.861e+01 8.782e+01 9.545e+01 1.036e+02 3.066e+02, threshold=1.909e+02, percent-clipped=3.0 +2024-09-17 19:09:51,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=282580.0, ans=0.0 +2024-09-17 19:09:53,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=282580.0, ans=0.2 +2024-09-17 19:09:57,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=282580.0, ans=0.2 +2024-09-17 19:10:03,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=282620.0, ans=0.125 +2024-09-17 19:10:12,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=282620.0, ans=0.07 +2024-09-17 19:10:27,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=282660.0, ans=0.0 +2024-09-17 19:10:34,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=282700.0, ans=0.025 +2024-09-17 19:10:34,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.95 vs. limit=22.5 +2024-09-17 19:10:35,383 INFO [train.py:1198] (1/2) Epoch 16, batch 2800, loss[loss=0.2839, ctc_loss=0.1881, cr_loss=0.4131, attn_decoder_loss=0.2854, over 20176.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1472, cr_loss=0.3893, attn_decoder_loss=0.2559, over 5777417.17 frames. ], batch size: 209, lr: 6.97e-03, grad_scale: 16.0 +2024-09-17 19:11:02,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.32 vs. limit=22.5 +2024-09-17 19:11:04,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.74 vs. limit=15.0 +2024-09-17 19:11:08,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=282780.0, ans=0.0 +2024-09-17 19:11:16,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=282780.0, ans=0.0 +2024-09-17 19:11:34,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=282860.0, ans=0.125 +2024-09-17 19:11:49,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=282900.0, ans=0.2 +2024-09-17 19:11:50,921 INFO [train.py:1198] (1/2) Epoch 16, batch 2850, loss[loss=0.2373, ctc_loss=0.1273, cr_loss=0.3596, attn_decoder_loss=0.2415, over 29534.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1479, cr_loss=0.3903, attn_decoder_loss=0.2563, over 5760900.57 frames. ], batch size: 77, lr: 6.97e-03, grad_scale: 8.0 +2024-09-17 19:11:53,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.34 vs. limit=15.0 +2024-09-17 19:12:03,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=282900.0, ans=0.1 +2024-09-17 19:12:14,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=282940.0, ans=0.125 +2024-09-17 19:12:24,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.778e+01 8.830e+01 9.442e+01 1.037e+02 2.855e+02, threshold=1.888e+02, percent-clipped=3.0 +2024-09-17 19:12:44,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=283020.0, ans=0.09899494936611666 +2024-09-17 19:12:44,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=283020.0, ans=0.125 +2024-09-17 19:13:10,861 INFO [train.py:1198] (1/2) Epoch 16, batch 2900, loss[loss=0.2542, ctc_loss=0.1536, cr_loss=0.396, attn_decoder_loss=0.2565, over 29412.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1481, cr_loss=0.3916, attn_decoder_loss=0.257, over 5787422.05 frames. ], batch size: 79, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:13:33,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=283140.0, ans=0.125 +2024-09-17 19:13:45,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=283180.0, ans=0.0 +2024-09-17 19:13:54,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.81 vs. limit=10.0 +2024-09-17 19:14:15,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=283260.0, ans=0.125 +2024-09-17 19:14:18,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=283260.0, ans=0.125 +2024-09-17 19:14:27,127 INFO [train.py:1198] (1/2) Epoch 16, batch 2950, loss[loss=0.2392, ctc_loss=0.1353, cr_loss=0.3813, attn_decoder_loss=0.2422, over 29510.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.147, cr_loss=0.3894, attn_decoder_loss=0.2556, over 5781908.60 frames. ], batch size: 75, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:14:27,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=283300.0, ans=0.125 +2024-09-17 19:14:44,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=283340.0, ans=0.025 +2024-09-17 19:14:56,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=283380.0, ans=0.125 +2024-09-17 19:14:58,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.449e+01 8.668e+01 9.077e+01 9.673e+01 1.448e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-17 19:15:00,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=283380.0, ans=0.125 +2024-09-17 19:15:11,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=283420.0, ans=0.125 +2024-09-17 19:15:15,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=283420.0, ans=0.1 +2024-09-17 19:15:20,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=283420.0, ans=0.0 +2024-09-17 19:15:23,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=283420.0, ans=0.1 +2024-09-17 19:15:35,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=283460.0, ans=0.125 +2024-09-17 19:15:42,948 INFO [train.py:1198] (1/2) Epoch 16, batch 3000, loss[loss=0.2457, ctc_loss=0.1393, cr_loss=0.3775, attn_decoder_loss=0.2491, over 29729.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1467, cr_loss=0.3889, attn_decoder_loss=0.2552, over 5782273.08 frames. ], batch size: 81, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:15:42,949 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 19:15:54,548 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.3261, 4.9228, 4.6825, 4.6293], device='cuda:1') +2024-09-17 19:16:01,439 INFO [train.py:1230] (1/2) Epoch 16, validation: loss=0.2115, ctc_loss=0.04131, cr_loss=4.919e-15, attn_decoder_loss=0.2304, over 944034.00 frames. +2024-09-17 19:16:01,439 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 19:16:24,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=283540.0, ans=0.1 +2024-09-17 19:16:53,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.50 vs. limit=12.0 +2024-09-17 19:17:10,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=283660.0, ans=0.125 +2024-09-17 19:17:22,015 INFO [train.py:1198] (1/2) Epoch 16, batch 3050, loss[loss=0.2464, ctc_loss=0.1441, cr_loss=0.3928, attn_decoder_loss=0.249, over 29550.00 frames. ], tot_loss[loss=0.2533, ctc_loss=0.1477, cr_loss=0.3901, attn_decoder_loss=0.2564, over 5776812.89 frames. ], batch size: 76, lr: 6.96e-03, grad_scale: 8.0 +2024-09-17 19:17:26,187 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.46 vs. limit=15.0 +2024-09-17 19:17:33,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=283700.0, ans=0.125 +2024-09-17 19:17:46,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=283740.0, ans=0.2 +2024-09-17 19:17:50,171 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.99 vs. limit=22.5 +2024-09-17 19:17:53,964 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.977e+01 8.926e+01 9.487e+01 1.019e+02 3.855e+02, threshold=1.897e+02, percent-clipped=1.0 +2024-09-17 19:18:09,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=283820.0, ans=0.125 +2024-09-17 19:18:27,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.34 vs. limit=15.0 +2024-09-17 19:18:34,249 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.52 vs. limit=15.0 +2024-09-17 19:18:36,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=283900.0, ans=0.0 +2024-09-17 19:18:37,845 INFO [train.py:1198] (1/2) Epoch 16, batch 3100, loss[loss=0.2675, ctc_loss=0.154, cr_loss=0.4224, attn_decoder_loss=0.2708, over 29275.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1469, cr_loss=0.3885, attn_decoder_loss=0.2556, over 5777310.91 frames. ], batch size: 100, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:18:41,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=283900.0, ans=0.2 +2024-09-17 19:19:24,176 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:19:54,452 INFO [train.py:1198] (1/2) Epoch 16, batch 3150, loss[loss=0.2626, ctc_loss=0.1486, cr_loss=0.3934, attn_decoder_loss=0.2666, over 28977.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.147, cr_loss=0.3891, attn_decoder_loss=0.2556, over 5783122.29 frames. ], batch size: 104, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:20:22,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.78 vs. limit=15.0 +2024-09-17 19:20:28,022 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.755e+01 8.635e+01 9.420e+01 9.793e+01 2.697e+02, threshold=1.884e+02, percent-clipped=2.0 +2024-09-17 19:20:45,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=284220.0, ans=0.0 +2024-09-17 19:21:00,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=284260.0, ans=0.125 +2024-09-17 19:21:12,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=284300.0, ans=0.125 +2024-09-17 19:21:13,977 INFO [train.py:1198] (1/2) Epoch 16, batch 3200, loss[loss=0.2433, ctc_loss=0.1451, cr_loss=0.3811, attn_decoder_loss=0.2457, over 29406.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1461, cr_loss=0.3875, attn_decoder_loss=0.2549, over 5794093.13 frames. ], batch size: 79, lr: 6.95e-03, grad_scale: 16.0 +2024-09-17 19:21:23,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=284300.0, ans=0.125 +2024-09-17 19:21:37,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=284340.0, ans=0.125 +2024-09-17 19:21:47,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=284380.0, ans=0.1 +2024-09-17 19:21:59,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=284420.0, ans=0.025 +2024-09-17 19:22:09,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.10 vs. limit=22.5 +2024-09-17 19:22:12,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=284420.0, ans=0.2 +2024-09-17 19:22:13,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=284460.0, ans=0.125 +2024-09-17 19:22:29,952 INFO [train.py:1198] (1/2) Epoch 16, batch 3250, loss[loss=0.2552, ctc_loss=0.142, cr_loss=0.4068, attn_decoder_loss=0.2587, over 29699.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1465, cr_loss=0.3886, attn_decoder_loss=0.2558, over 5800873.24 frames. ], batch size: 84, lr: 6.95e-03, grad_scale: 8.0 +2024-09-17 19:22:39,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=284500.0, ans=0.0 +2024-09-17 19:22:41,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=15.0 +2024-09-17 19:22:45,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=284540.0, ans=0.125 +2024-09-17 19:22:53,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.44 vs. limit=15.0 +2024-09-17 19:23:03,115 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.613e+01 8.619e+01 9.155e+01 9.687e+01 2.235e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-17 19:23:06,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=284580.0, ans=0.2 +2024-09-17 19:23:07,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=284580.0, ans=0.125 +2024-09-17 19:23:08,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=284580.0, ans=0.2 +2024-09-17 19:23:26,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=284620.0, ans=0.1 +2024-09-17 19:23:35,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=284660.0, ans=0.1 +2024-09-17 19:23:45,582 INFO [train.py:1198] (1/2) Epoch 16, batch 3300, loss[loss=0.2763, ctc_loss=0.1675, cr_loss=0.4091, attn_decoder_loss=0.2793, over 28402.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1455, cr_loss=0.3865, attn_decoder_loss=0.2544, over 5797010.09 frames. ], batch size: 111, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:24:18,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.55 vs. limit=6.0 +2024-09-17 19:24:23,806 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.10 vs. limit=15.0 +2024-09-17 19:24:57,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=284860.0, ans=0.025 +2024-09-17 19:24:58,928 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:25:02,584 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-17 19:25:06,085 INFO [train.py:1198] (1/2) Epoch 16, batch 3350, loss[loss=0.2636, ctc_loss=0.1527, cr_loss=0.386, attn_decoder_loss=0.2673, over 28772.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1464, cr_loss=0.3877, attn_decoder_loss=0.2552, over 5772794.66 frames. ], batch size: 104, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:25:39,361 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.948e+01 9.628e+01 1.043e+02 1.952e+02, threshold=1.926e+02, percent-clipped=2.0 +2024-09-17 19:25:59,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=285020.0, ans=0.125 +2024-09-17 19:26:21,750 INFO [train.py:1198] (1/2) Epoch 16, batch 3400, loss[loss=0.216, ctc_loss=0.1119, cr_loss=0.329, attn_decoder_loss=0.2202, over 29353.00 frames. ], tot_loss[loss=0.2523, ctc_loss=0.1469, cr_loss=0.3889, attn_decoder_loss=0.2554, over 5765814.42 frames. ], batch size: 67, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:26:22,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.31 vs. limit=12.0 +2024-09-17 19:26:26,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=285100.0, ans=0.0 +2024-09-17 19:26:58,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=285180.0, ans=0.0 +2024-09-17 19:27:10,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=285220.0, ans=0.035 +2024-09-17 19:27:28,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=285260.0, ans=0.125 +2024-09-17 19:27:31,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=285260.0, ans=0.0 +2024-09-17 19:27:37,292 INFO [train.py:1198] (1/2) Epoch 16, batch 3450, loss[loss=0.2523, ctc_loss=0.1347, cr_loss=0.3529, attn_decoder_loss=0.2576, over 28507.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1467, cr_loss=0.3888, attn_decoder_loss=0.2555, over 5774602.19 frames. ], batch size: 111, lr: 6.94e-03, grad_scale: 8.0 +2024-09-17 19:27:41,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=285300.0, ans=0.2 +2024-09-17 19:27:45,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=285300.0, ans=0.2 +2024-09-17 19:27:47,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=285300.0, ans=0.2 +2024-09-17 19:27:56,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff3.min_abs, batch_count=285340.0, ans=0.2 +2024-09-17 19:28:07,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.05 vs. limit=10.0 +2024-09-17 19:28:12,413 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.860e+01 9.055e+01 9.633e+01 1.034e+02 1.561e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 19:28:23,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=285380.0, ans=0.0 +2024-09-17 19:28:57,133 INFO [train.py:1198] (1/2) Epoch 16, batch 3500, loss[loss=0.2229, ctc_loss=0.1235, cr_loss=0.3267, attn_decoder_loss=0.2266, over 29350.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1465, cr_loss=0.3883, attn_decoder_loss=0.2551, over 5775752.19 frames. ], batch size: 71, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:29:06,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=285500.0, ans=0.0 +2024-09-17 19:29:23,937 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.06 vs. limit=15.0 +2024-09-17 19:29:53,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=285620.0, ans=0.1 +2024-09-17 19:29:59,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=285660.0, ans=0.07 +2024-09-17 19:30:12,380 INFO [train.py:1198] (1/2) Epoch 16, batch 3550, loss[loss=0.2649, ctc_loss=0.1544, cr_loss=0.4097, attn_decoder_loss=0.268, over 29693.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1463, cr_loss=0.3881, attn_decoder_loss=0.2551, over 5783020.73 frames. ], batch size: 89, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:30:30,471 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:30:41,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.95 vs. limit=15.0 +2024-09-17 19:30:42,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=285780.0, ans=0.1 +2024-09-17 19:30:45,351 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.292e+01 8.552e+01 9.135e+01 9.623e+01 1.565e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-17 19:30:50,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=285780.0, ans=0.025 +2024-09-17 19:31:06,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=285820.0, ans=0.0 +2024-09-17 19:31:22,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=285860.0, ans=0.0 +2024-09-17 19:31:26,862 INFO [train.py:1198] (1/2) Epoch 16, batch 3600, loss[loss=0.2312, ctc_loss=0.1265, cr_loss=0.3481, attn_decoder_loss=0.2351, over 29491.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.1461, cr_loss=0.3878, attn_decoder_loss=0.2551, over 5792029.02 frames. ], batch size: 77, lr: 6.93e-03, grad_scale: 16.0 +2024-09-17 19:32:02,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=285980.0, ans=0.04949747468305833 +2024-09-17 19:32:25,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=286060.0, ans=0.1 +2024-09-17 19:32:32,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=286060.0, ans=0.125 +2024-09-17 19:32:41,300 INFO [train.py:1198] (1/2) Epoch 16, batch 3650, loss[loss=0.2705, ctc_loss=0.1618, cr_loss=0.4199, attn_decoder_loss=0.2733, over 29500.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1459, cr_loss=0.387, attn_decoder_loss=0.2547, over 5794237.48 frames. ], batch size: 90, lr: 6.93e-03, grad_scale: 8.0 +2024-09-17 19:32:50,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-17 19:32:57,801 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:32:59,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=286140.0, ans=0.1 +2024-09-17 19:33:17,579 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.668e+01 9.269e+01 9.880e+01 1.402e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-17 19:33:19,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=286180.0, ans=0.125 +2024-09-17 19:33:32,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=286220.0, ans=0.07 +2024-09-17 19:33:34,766 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.73 vs. limit=15.0 +2024-09-17 19:33:42,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.14 vs. limit=6.0 +2024-09-17 19:33:44,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=286260.0, ans=0.0 +2024-09-17 19:33:57,802 INFO [train.py:1198] (1/2) Epoch 16, batch 3700, loss[loss=0.2562, ctc_loss=0.1378, cr_loss=0.3678, attn_decoder_loss=0.2611, over 29703.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1459, cr_loss=0.3874, attn_decoder_loss=0.2548, over 5802877.04 frames. ], batch size: 84, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:34:04,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=286300.0, ans=0.125 +2024-09-17 19:34:07,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=286300.0, ans=0.1 +2024-09-17 19:34:59,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=286460.0, ans=0.125 +2024-09-17 19:35:14,227 INFO [train.py:1198] (1/2) Epoch 16, batch 3750, loss[loss=0.2285, ctc_loss=0.1331, cr_loss=0.3592, attn_decoder_loss=0.2311, over 29339.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1456, cr_loss=0.3872, attn_decoder_loss=0.2545, over 5808048.77 frames. ], batch size: 67, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:35:30,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=286540.0, ans=0.125 +2024-09-17 19:35:40,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.93 vs. limit=15.0 +2024-09-17 19:35:48,294 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.804e+01 8.849e+01 9.329e+01 1.007e+02 6.454e+02, threshold=1.866e+02, percent-clipped=5.0 +2024-09-17 19:36:11,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=286620.0, ans=0.1 +2024-09-17 19:36:28,744 INFO [train.py:1198] (1/2) Epoch 16, batch 3800, loss[loss=0.2777, ctc_loss=0.1671, cr_loss=0.4423, attn_decoder_loss=0.2802, over 29636.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1454, cr_loss=0.3871, attn_decoder_loss=0.2544, over 5798247.87 frames. ], batch size: 86, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:36:41,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.29 vs. limit=12.0 +2024-09-17 19:36:55,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=286740.0, ans=0.07 +2024-09-17 19:37:03,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=286780.0, ans=0.0 +2024-09-17 19:37:11,394 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.95 vs. limit=15.0 +2024-09-17 19:37:35,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=286860.0, ans=0.1 +2024-09-17 19:37:42,781 INFO [train.py:1198] (1/2) Epoch 16, batch 3850, loss[loss=0.272, ctc_loss=0.1632, cr_loss=0.4281, attn_decoder_loss=0.2746, over 29224.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1449, cr_loss=0.386, attn_decoder_loss=0.2542, over 5812678.44 frames. ], batch size: 100, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:37:44,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=10.18 vs. limit=10.0 +2024-09-17 19:37:44,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=286900.0, ans=0.2 +2024-09-17 19:38:11,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=286980.0, ans=0.1 +2024-09-17 19:38:16,698 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 9.163e+01 9.754e+01 1.076e+02 2.177e+02, threshold=1.951e+02, percent-clipped=1.0 +2024-09-17 19:38:18,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=286980.0, ans=0.125 +2024-09-17 19:38:18,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=286980.0, ans=0.1 +2024-09-17 19:38:18,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=286980.0, ans=0.1 +2024-09-17 19:38:24,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=286980.0, ans=0.0 +2024-09-17 19:38:24,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=286980.0, ans=0.0 +2024-09-17 19:38:39,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=287020.0, ans=0.1 +2024-09-17 19:38:58,596 INFO [train.py:1198] (1/2) Epoch 16, batch 3900, loss[loss=0.267, ctc_loss=0.1548, cr_loss=0.4228, attn_decoder_loss=0.27, over 29617.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.145, cr_loss=0.3863, attn_decoder_loss=0.2545, over 5817036.57 frames. ], batch size: 86, lr: 6.92e-03, grad_scale: 8.0 +2024-09-17 19:38:58,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=287100.0, ans=0.125 +2024-09-17 19:39:04,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=287100.0, ans=0.1 +2024-09-17 19:39:06,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=287100.0, ans=0.125 +2024-09-17 19:39:08,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.39 vs. limit=10.0 +2024-09-17 19:39:09,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.72 vs. limit=6.0 +2024-09-17 19:39:12,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=287140.0, ans=0.0 +2024-09-17 19:39:19,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=287140.0, ans=0.05 +2024-09-17 19:39:32,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=287180.0, ans=0.1 +2024-09-17 19:39:37,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=287180.0, ans=0.1 +2024-09-17 19:39:43,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=287220.0, ans=0.125 +2024-09-17 19:40:14,822 INFO [train.py:1198] (1/2) Epoch 16, batch 3950, loss[loss=0.2784, ctc_loss=0.1735, cr_loss=0.4445, attn_decoder_loss=0.2802, over 29486.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1451, cr_loss=0.3868, attn_decoder_loss=0.2546, over 5836259.24 frames. ], batch size: 97, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:40:16,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=287300.0, ans=0.125 +2024-09-17 19:40:40,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.81 vs. limit=12.0 +2024-09-17 19:40:48,604 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.783e+01 9.413e+01 1.005e+02 2.800e+02, threshold=1.883e+02, percent-clipped=1.0 +2024-09-17 19:41:15,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=287460.0, ans=0.025 +2024-09-17 19:41:28,446 INFO [train.py:1198] (1/2) Epoch 16, batch 4000, loss[loss=0.2329, ctc_loss=0.1212, cr_loss=0.3256, attn_decoder_loss=0.238, over 29479.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1455, cr_loss=0.3872, attn_decoder_loss=0.2548, over 5813629.47 frames. ], batch size: 74, lr: 6.91e-03, grad_scale: 16.0 +2024-09-17 19:41:33,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.48 vs. limit=15.0 +2024-09-17 19:42:09,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=287580.0, ans=0.125 +2024-09-17 19:42:14,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=287620.0, ans=0.125 +2024-09-17 19:42:20,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=287620.0, ans=0.05 +2024-09-17 19:42:30,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.60 vs. limit=15.0 +2024-09-17 19:42:31,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=287660.0, ans=0.1 +2024-09-17 19:42:42,630 INFO [train.py:1198] (1/2) Epoch 16, batch 4050, loss[loss=0.2799, ctc_loss=0.1817, cr_loss=0.4126, attn_decoder_loss=0.2816, over 20272.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1458, cr_loss=0.3873, attn_decoder_loss=0.2547, over 5797151.93 frames. ], batch size: 210, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:42:48,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=287700.0, ans=0.2 +2024-09-17 19:42:55,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.20 vs. limit=22.5 +2024-09-17 19:43:01,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.86 vs. limit=15.0 +2024-09-17 19:43:07,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=287740.0, ans=0.025 +2024-09-17 19:43:17,956 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.851e+01 8.954e+01 9.709e+01 1.044e+02 2.247e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-17 19:43:57,685 INFO [train.py:1198] (1/2) Epoch 16, batch 4100, loss[loss=0.2717, ctc_loss=0.1623, cr_loss=0.4202, attn_decoder_loss=0.2745, over 29509.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1457, cr_loss=0.3869, attn_decoder_loss=0.2546, over 5791353.09 frames. ], batch size: 90, lr: 6.91e-03, grad_scale: 8.0 +2024-09-17 19:44:02,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=287900.0, ans=0.2 +2024-09-17 19:44:03,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=287900.0, ans=0.0 +2024-09-17 19:45:16,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.02 vs. limit=6.0 +2024-09-17 19:45:20,176 INFO [train.py:1198] (1/2) Epoch 16, batch 4150, loss[loss=0.2413, ctc_loss=0.1338, cr_loss=0.3681, attn_decoder_loss=0.2451, over 29495.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1456, cr_loss=0.3866, attn_decoder_loss=0.2544, over 5797443.28 frames. ], batch size: 77, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:45:21,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=288100.0, ans=0.125 +2024-09-17 19:45:43,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=288140.0, ans=0.0 +2024-09-17 19:45:45,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=288140.0, ans=10.0 +2024-09-17 19:45:46,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=288140.0, ans=0.0 +2024-09-17 19:45:52,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=288180.0, ans=0.0 +2024-09-17 19:45:54,965 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.454e+01 9.164e+01 9.745e+01 4.465e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-17 19:45:55,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.05 vs. limit=12.0 +2024-09-17 19:46:01,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=288180.0, ans=0.0 +2024-09-17 19:46:01,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.70 vs. limit=12.0 +2024-09-17 19:46:04,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=288220.0, ans=0.1 +2024-09-17 19:46:05,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=288220.0, ans=0.015 +2024-09-17 19:46:07,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=288220.0, ans=0.0 +2024-09-17 19:46:16,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=288220.0, ans=0.125 +2024-09-17 19:46:17,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=288260.0, ans=0.0 +2024-09-17 19:46:20,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=288260.0, ans=0.025 +2024-09-17 19:46:33,310 INFO [train.py:1198] (1/2) Epoch 16, batch 4200, loss[loss=0.2727, ctc_loss=0.1703, cr_loss=0.4171, attn_decoder_loss=0.2749, over 29524.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.146, cr_loss=0.387, attn_decoder_loss=0.2549, over 5799422.13 frames. ], batch size: 90, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:46:37,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=288300.0, ans=0.0 +2024-09-17 19:46:55,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=288340.0, ans=0.125 +2024-09-17 19:47:11,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=288380.0, ans=0.05 +2024-09-17 19:47:14,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=288380.0, ans=0.125 +2024-09-17 19:47:29,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=288420.0, ans=0.0 +2024-09-17 19:47:40,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=288460.0, ans=0.035 +2024-09-17 19:47:45,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=288460.0, ans=0.125 +2024-09-17 19:47:47,733 INFO [train.py:1198] (1/2) Epoch 16, batch 4250, loss[loss=0.2267, ctc_loss=0.1263, cr_loss=0.3493, attn_decoder_loss=0.2301, over 29521.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1458, cr_loss=0.3866, attn_decoder_loss=0.255, over 5805195.58 frames. ], batch size: 74, lr: 6.90e-03, grad_scale: 4.0 +2024-09-17 19:47:47,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=288500.0, ans=10.0 +2024-09-17 19:47:50,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=288500.0, ans=0.125 +2024-09-17 19:47:53,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=288500.0, ans=0.125 +2024-09-17 19:48:01,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=288540.0, ans=0.07 +2024-09-17 19:48:06,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=288540.0, ans=0.125 +2024-09-17 19:48:09,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=288540.0, ans=0.125 +2024-09-17 19:48:10,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.83 vs. limit=6.0 +2024-09-17 19:48:10,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=288540.0, ans=0.1 +2024-09-17 19:48:17,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=288580.0, ans=0.125 +2024-09-17 19:48:18,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=288580.0, ans=0.125 +2024-09-17 19:48:24,158 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.118e+01 8.844e+01 9.399e+01 1.005e+02 1.682e+02, threshold=1.880e+02, percent-clipped=0.0 +2024-09-17 19:48:25,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=288580.0, ans=0.125 +2024-09-17 19:48:37,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=288620.0, ans=0.125 +2024-09-17 19:48:37,883 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.95 vs. limit=15.0 +2024-09-17 19:48:41,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=288620.0, ans=0.025 +2024-09-17 19:48:44,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=288620.0, ans=0.0 +2024-09-17 19:48:50,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=288660.0, ans=0.1 +2024-09-17 19:49:01,838 INFO [train.py:1198] (1/2) Epoch 16, batch 4300, loss[loss=0.2671, ctc_loss=0.1608, cr_loss=0.4118, attn_decoder_loss=0.2698, over 29536.00 frames. ], tot_loss[loss=0.252, ctc_loss=0.146, cr_loss=0.3872, attn_decoder_loss=0.2551, over 5794928.20 frames. ], batch size: 87, lr: 6.90e-03, grad_scale: 8.0 +2024-09-17 19:49:11,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=288700.0, ans=0.125 +2024-09-17 19:49:18,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=288740.0, ans=0.125 +2024-09-17 19:49:23,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=288740.0, ans=10.0 +2024-09-17 19:49:24,616 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:49:26,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.60 vs. limit=12.0 +2024-09-17 19:49:30,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=288780.0, ans=0.1 +2024-09-17 19:49:30,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=288780.0, ans=0.1 +2024-09-17 19:49:36,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=288780.0, ans=0.0 +2024-09-17 19:49:37,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=288780.0, ans=0.1 +2024-09-17 19:50:03,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.29 vs. limit=6.0 +2024-09-17 19:50:13,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=288860.0, ans=0.0 +2024-09-17 19:50:16,551 INFO [train.py:1198] (1/2) Epoch 16, batch 4350, loss[loss=0.2661, ctc_loss=0.1625, cr_loss=0.4031, attn_decoder_loss=0.2686, over 29560.00 frames. ], tot_loss[loss=0.2553, ctc_loss=0.1489, cr_loss=0.3927, attn_decoder_loss=0.2584, over 5796644.58 frames. ], batch size: 97, lr: 6.89e-03, grad_scale: 8.0 +2024-09-17 19:50:33,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=288940.0, ans=0.125 +2024-09-17 19:50:41,217 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:50:42,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=288940.0, ans=0.0 +2024-09-17 19:50:53,800 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.345e+01 8.913e+01 9.427e+01 9.937e+01 2.646e+02, threshold=1.885e+02, percent-clipped=2.0 +2024-09-17 19:50:54,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.13 vs. limit=15.0 +2024-09-17 19:51:24,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=289060.0, ans=0.0 +2024-09-17 19:51:31,269 INFO [train.py:1198] (1/2) Epoch 16, batch 4400, loss[loss=0.2564, ctc_loss=0.1521, cr_loss=0.4102, attn_decoder_loss=0.2588, over 27262.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1504, cr_loss=0.3947, attn_decoder_loss=0.2605, over 5765276.24 frames. ], batch size: 124, lr: 6.89e-03, grad_scale: 16.0 +2024-09-17 19:51:56,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=289140.0, ans=0.0 +2024-09-17 19:52:02,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=289180.0, ans=0.125 +2024-09-17 19:52:36,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=289260.0, ans=0.0 +2024-09-17 19:52:44,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=289300.0, ans=0.0 +2024-09-17 19:52:44,962 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=23.31 vs. limit=22.5 +2024-09-17 19:52:45,502 INFO [train.py:1198] (1/2) Epoch 16, batch 4450, loss[loss=0.2793, ctc_loss=0.1918, cr_loss=0.4516, attn_decoder_loss=0.279, over 20795.00 frames. ], tot_loss[loss=0.2605, ctc_loss=0.1552, cr_loss=0.3999, attn_decoder_loss=0.2633, over 5571692.71 frames. ], batch size: 210, lr: 6.89e-03, grad_scale: 4.0 +2024-09-17 19:53:13,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=289340.0, ans=0.05 +2024-09-17 19:53:15,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.67 vs. limit=15.0 +2024-09-17 19:53:19,531 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:53:26,477 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.172e+01 9.461e+01 1.058e+02 1.169e+02 3.185e+02, threshold=2.116e+02, percent-clipped=2.0 +2024-09-17 19:53:49,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=289460.0, ans=0.0 +2024-09-17 19:53:54,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.53 vs. limit=6.0 +2024-09-17 19:53:57,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=289460.0, ans=0.125 +2024-09-17 19:54:01,245 INFO [train.py:1198] (1/2) Epoch 16, batch 4500, loss[loss=0.2576, ctc_loss=0.1634, cr_loss=0.37, attn_decoder_loss=0.2598, over 19871.00 frames. ], tot_loss[loss=0.2632, ctc_loss=0.1601, cr_loss=0.4019, attn_decoder_loss=0.2657, over 5227858.82 frames. ], batch size: 210, lr: 6.89e-03, grad_scale: 8.0 +2024-09-17 19:54:35,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=289580.0, ans=0.1 +2024-09-17 19:55:24,183 INFO [train.py:1198] (1/2) Epoch 17, batch 0, loss[loss=0.2269, ctc_loss=0.1176, cr_loss=0.3275, attn_decoder_loss=0.2318, over 29590.00 frames. ], tot_loss[loss=0.2269, ctc_loss=0.1176, cr_loss=0.3275, attn_decoder_loss=0.2318, over 29590.00 frames. ], batch size: 73, lr: 6.68e-03, grad_scale: 16.0 +2024-09-17 19:55:24,183 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 19:55:42,766 INFO [train.py:1230] (1/2) Epoch 17, validation: loss=0.2133, ctc_loss=0.04137, cr_loss=4.881e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-17 19:55:42,766 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 19:55:50,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=289600.0, ans=0.125 +2024-09-17 19:55:53,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=289600.0, ans=0.05 +2024-09-17 19:55:59,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=289640.0, ans=0.0 +2024-09-17 19:56:01,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=289640.0, ans=0.0 +2024-09-17 19:56:02,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=289640.0, ans=0.125 +2024-09-17 19:56:03,042 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-17 19:56:31,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=289720.0, ans=0.2 +2024-09-17 19:56:48,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=289760.0, ans=0.0 +2024-09-17 19:56:54,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=289760.0, ans=0.125 +2024-09-17 19:56:59,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=289800.0, ans=0.125 +2024-09-17 19:57:00,372 INFO [train.py:1198] (1/2) Epoch 17, batch 50, loss[loss=0.2324, ctc_loss=0.1302, cr_loss=0.3618, attn_decoder_loss=0.2357, over 29411.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1498, cr_loss=0.392, attn_decoder_loss=0.2569, over 1269219.05 frames. ], batch size: 70, lr: 6.68e-03, grad_scale: 8.0 +2024-09-17 19:57:05,049 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.027e+01 9.620e+01 1.078e+02 1.162e+02 4.794e+02, threshold=2.156e+02, percent-clipped=2.0 +2024-09-17 19:57:07,383 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.46 vs. limit=10.0 +2024-09-17 19:57:08,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=289800.0, ans=0.1 +2024-09-17 19:57:14,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=289840.0, ans=0.0 +2024-09-17 19:57:15,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.70 vs. limit=15.0 +2024-09-17 19:57:15,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=289840.0, ans=0.07 +2024-09-17 19:57:32,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=289880.0, ans=0.09899494936611666 +2024-09-17 19:57:44,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.14 vs. limit=15.0 +2024-09-17 19:57:55,465 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 19:58:18,295 INFO [train.py:1198] (1/2) Epoch 17, batch 100, loss[loss=0.2419, ctc_loss=0.1431, cr_loss=0.3873, attn_decoder_loss=0.2443, over 29520.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.1495, cr_loss=0.3928, attn_decoder_loss=0.2581, over 2253151.27 frames. ], batch size: 76, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 19:58:20,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.32 vs. limit=22.5 +2024-09-17 19:58:53,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.21 vs. limit=22.5 +2024-09-17 19:58:59,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=290080.0, ans=0.0 +2024-09-17 19:59:04,321 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=15.0 +2024-09-17 19:59:04,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=290120.0, ans=0.125 +2024-09-17 19:59:08,445 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.49 vs. limit=15.0 +2024-09-17 19:59:15,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=290120.0, ans=0.125 +2024-09-17 19:59:19,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=290160.0, ans=0.5 +2024-09-17 19:59:32,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.59 vs. limit=15.0 +2024-09-17 19:59:32,862 INFO [train.py:1198] (1/2) Epoch 17, batch 150, loss[loss=0.2333, ctc_loss=0.1338, cr_loss=0.3776, attn_decoder_loss=0.2359, over 29449.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.1468, cr_loss=0.3892, attn_decoder_loss=0.2559, over 3048412.45 frames. ], batch size: 70, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 19:59:37,320 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.030e+01 8.871e+01 9.281e+01 1.009e+02 2.332e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-17 19:59:53,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.23 vs. limit=22.5 +2024-09-17 20:00:49,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=290400.0, ans=0.1 +2024-09-17 20:00:50,783 INFO [train.py:1198] (1/2) Epoch 17, batch 200, loss[loss=0.2669, ctc_loss=0.1562, cr_loss=0.4, attn_decoder_loss=0.2703, over 27365.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1455, cr_loss=0.3876, attn_decoder_loss=0.2545, over 3660367.05 frames. ], batch size: 124, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 20:01:01,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=290400.0, ans=0.025 +2024-09-17 20:01:08,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-17 20:01:24,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff3.min_abs, batch_count=290480.0, ans=0.2 +2024-09-17 20:01:28,601 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.25 vs. limit=6.0 +2024-09-17 20:01:44,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=290520.0, ans=0.1 +2024-09-17 20:02:00,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=290560.0, ans=0.1 +2024-09-17 20:02:09,186 INFO [train.py:1198] (1/2) Epoch 17, batch 250, loss[loss=0.276, ctc_loss=0.163, cr_loss=0.4119, attn_decoder_loss=0.2794, over 29241.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1453, cr_loss=0.3871, attn_decoder_loss=0.2544, over 4142206.22 frames. ], batch size: 100, lr: 6.67e-03, grad_scale: 8.0 +2024-09-17 20:02:13,827 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.681e+01 8.517e+01 9.040e+01 9.817e+01 1.381e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-17 20:02:29,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=290640.0, ans=0.95 +2024-09-17 20:02:29,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=290640.0, ans=0.0 +2024-09-17 20:02:29,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=290640.0, ans=0.2 +2024-09-17 20:02:41,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=290680.0, ans=0.125 +2024-09-17 20:02:46,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.88 vs. limit=15.0 +2024-09-17 20:03:13,329 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.63 vs. limit=15.0 +2024-09-17 20:03:24,668 INFO [train.py:1198] (1/2) Epoch 17, batch 300, loss[loss=0.269, ctc_loss=0.1643, cr_loss=0.448, attn_decoder_loss=0.2707, over 29504.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1448, cr_loss=0.3869, attn_decoder_loss=0.254, over 4511384.75 frames. ], batch size: 92, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:04:15,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.34 vs. limit=15.0 +2024-09-17 20:04:37,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.83 vs. limit=15.0 +2024-09-17 20:04:42,401 INFO [train.py:1198] (1/2) Epoch 17, batch 350, loss[loss=0.2375, ctc_loss=0.1372, cr_loss=0.3688, attn_decoder_loss=0.2404, over 29314.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1449, cr_loss=0.3864, attn_decoder_loss=0.2547, over 4796703.87 frames. ], batch size: 71, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:04:46,787 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.070e+01 8.690e+01 9.264e+01 9.789e+01 1.817e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-17 20:05:00,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=291040.0, ans=0.0 +2024-09-17 20:05:18,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=291080.0, ans=0.0 +2024-09-17 20:05:21,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=291080.0, ans=0.0 +2024-09-17 20:05:36,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=291120.0, ans=0.125 +2024-09-17 20:06:00,101 INFO [train.py:1198] (1/2) Epoch 17, batch 400, loss[loss=0.2532, ctc_loss=0.1331, cr_loss=0.3822, attn_decoder_loss=0.2581, over 29677.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1444, cr_loss=0.3858, attn_decoder_loss=0.2544, over 5024890.44 frames. ], batch size: 82, lr: 6.66e-03, grad_scale: 16.0 +2024-09-17 20:06:03,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=291200.0, ans=0.125 +2024-09-17 20:06:38,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=291280.0, ans=0.125 +2024-09-17 20:06:41,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=291280.0, ans=0.125 +2024-09-17 20:06:50,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=291320.0, ans=0.1 +2024-09-17 20:06:53,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=291320.0, ans=0.125 +2024-09-17 20:07:15,629 INFO [train.py:1198] (1/2) Epoch 17, batch 450, loss[loss=0.2569, ctc_loss=0.1469, cr_loss=0.3891, attn_decoder_loss=0.2605, over 29703.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1439, cr_loss=0.386, attn_decoder_loss=0.2544, over 5188030.81 frames. ], batch size: 83, lr: 6.66e-03, grad_scale: 8.0 +2024-09-17 20:07:21,595 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.042e+01 8.659e+01 9.188e+01 9.784e+01 2.602e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-17 20:07:23,892 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.77 vs. limit=15.0 +2024-09-17 20:07:25,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=291400.0, ans=0.05 +2024-09-17 20:07:49,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.60 vs. limit=22.5 +2024-09-17 20:08:23,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=291560.0, ans=15.0 +2024-09-17 20:08:33,706 INFO [train.py:1198] (1/2) Epoch 17, batch 500, loss[loss=0.2697, ctc_loss=0.1599, cr_loss=0.4195, attn_decoder_loss=0.2725, over 29395.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1431, cr_loss=0.385, attn_decoder_loss=0.2534, over 5330803.42 frames. ], batch size: 94, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:08:47,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.66 vs. limit=22.5 +2024-09-17 20:09:19,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=291720.0, ans=0.0 +2024-09-17 20:09:20,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=291720.0, ans=0.0 +2024-09-17 20:09:45,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=291760.0, ans=0.0 +2024-09-17 20:09:49,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=291760.0, ans=0.125 +2024-09-17 20:09:49,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.93 vs. limit=15.0 +2024-09-17 20:09:51,607 INFO [train.py:1198] (1/2) Epoch 17, batch 550, loss[loss=0.2587, ctc_loss=0.1406, cr_loss=0.3771, attn_decoder_loss=0.2635, over 28813.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1438, cr_loss=0.386, attn_decoder_loss=0.2539, over 5423508.99 frames. ], batch size: 104, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:09:53,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=291800.0, ans=0.0 +2024-09-17 20:09:57,716 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.623e+01 9.075e+01 9.597e+01 1.052e+02 1.735e+02, threshold=1.919e+02, percent-clipped=0.0 +2024-09-17 20:10:01,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=291800.0, ans=0.125 +2024-09-17 20:10:04,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=291800.0, ans=0.1 +2024-09-17 20:10:45,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=291920.0, ans=0.125 +2024-09-17 20:10:51,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=291960.0, ans=0.025 +2024-09-17 20:10:54,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=291960.0, ans=0.125 +2024-09-17 20:11:03,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=291960.0, ans=0.125 +2024-09-17 20:11:08,249 INFO [train.py:1198] (1/2) Epoch 17, batch 600, loss[loss=0.2746, ctc_loss=0.1674, cr_loss=0.4239, attn_decoder_loss=0.2771, over 29226.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.144, cr_loss=0.3854, attn_decoder_loss=0.2542, over 5508427.99 frames. ], batch size: 100, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:11:19,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=292000.0, ans=0.2 +2024-09-17 20:11:28,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.06 vs. limit=12.0 +2024-09-17 20:11:35,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=292040.0, ans=0.125 +2024-09-17 20:11:37,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.36 vs. limit=15.0 +2024-09-17 20:11:43,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=292080.0, ans=0.2 +2024-09-17 20:11:45,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=292080.0, ans=0.125 +2024-09-17 20:12:23,219 INFO [train.py:1198] (1/2) Epoch 17, batch 650, loss[loss=0.2505, ctc_loss=0.1444, cr_loss=0.3804, attn_decoder_loss=0.2539, over 29728.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.143, cr_loss=0.3842, attn_decoder_loss=0.2533, over 5585510.98 frames. ], batch size: 81, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:12:29,212 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.569e+01 9.101e+01 9.967e+01 2.303e+02, threshold=1.820e+02, percent-clipped=2.0 +2024-09-17 20:12:29,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=292200.0, ans=0.0 +2024-09-17 20:12:40,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=292240.0, ans=0.1 +2024-09-17 20:12:40,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=292240.0, ans=0.5 +2024-09-17 20:12:56,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=292280.0, ans=0.125 +2024-09-17 20:13:00,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=292280.0, ans=0.125 +2024-09-17 20:13:13,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-17 20:13:23,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=292320.0, ans=0.0 +2024-09-17 20:13:34,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.18 vs. limit=15.0 +2024-09-17 20:13:43,855 INFO [train.py:1198] (1/2) Epoch 17, batch 700, loss[loss=0.241, ctc_loss=0.1383, cr_loss=0.3986, attn_decoder_loss=0.2436, over 29512.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1435, cr_loss=0.3857, attn_decoder_loss=0.254, over 5635172.20 frames. ], batch size: 76, lr: 6.65e-03, grad_scale: 8.0 +2024-09-17 20:14:08,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=292440.0, ans=0.1 +2024-09-17 20:14:10,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.40 vs. limit=15.0 +2024-09-17 20:14:32,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=292520.0, ans=0.125 +2024-09-17 20:14:39,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.32 vs. limit=15.0 +2024-09-17 20:14:59,474 INFO [train.py:1198] (1/2) Epoch 17, batch 750, loss[loss=0.254, ctc_loss=0.1428, cr_loss=0.3912, attn_decoder_loss=0.2576, over 29710.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.143, cr_loss=0.385, attn_decoder_loss=0.2533, over 5674538.88 frames. ], batch size: 82, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:15:05,323 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.641e+01 8.913e+01 9.464e+01 1.024e+02 2.439e+02, threshold=1.893e+02, percent-clipped=2.0 +2024-09-17 20:15:16,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=292640.0, ans=0.025 +2024-09-17 20:15:26,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=292640.0, ans=0.0 +2024-09-17 20:15:26,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=292640.0, ans=0.0 +2024-09-17 20:15:51,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=292720.0, ans=0.125 +2024-09-17 20:16:05,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.84 vs. limit=15.0 +2024-09-17 20:16:06,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=292760.0, ans=0.025 +2024-09-17 20:16:15,488 INFO [train.py:1198] (1/2) Epoch 17, batch 800, loss[loss=0.2342, ctc_loss=0.1319, cr_loss=0.3583, attn_decoder_loss=0.2376, over 29621.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1432, cr_loss=0.3847, attn_decoder_loss=0.2533, over 5703738.45 frames. ], batch size: 73, lr: 6.64e-03, grad_scale: 16.0 +2024-09-17 20:16:15,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=292800.0, ans=0.125 +2024-09-17 20:16:31,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.13 vs. limit=15.0 +2024-09-17 20:16:52,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=292880.0, ans=0.125 +2024-09-17 20:17:27,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.25 vs. limit=15.0 +2024-09-17 20:17:33,058 INFO [train.py:1198] (1/2) Epoch 17, batch 850, loss[loss=0.2584, ctc_loss=0.1579, cr_loss=0.3855, attn_decoder_loss=0.261, over 29710.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1434, cr_loss=0.385, attn_decoder_loss=0.2533, over 5733189.97 frames. ], batch size: 89, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:17:38,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=293000.0, ans=0.0 +2024-09-17 20:17:42,755 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.757e+01 8.745e+01 9.386e+01 1.018e+02 1.977e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 20:17:47,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=293000.0, ans=0.125 +2024-09-17 20:18:25,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=293120.0, ans=0.1 +2024-09-17 20:18:51,106 INFO [train.py:1198] (1/2) Epoch 17, batch 900, loss[loss=0.2297, ctc_loss=0.1245, cr_loss=0.3449, attn_decoder_loss=0.2338, over 29614.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1436, cr_loss=0.3852, attn_decoder_loss=0.2535, over 5738760.40 frames. ], batch size: 73, lr: 6.64e-03, grad_scale: 8.0 +2024-09-17 20:18:52,111 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.81 vs. limit=15.0 +2024-09-17 20:19:24,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.06 vs. limit=15.0 +2024-09-17 20:19:26,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=293280.0, ans=15.0 +2024-09-17 20:19:53,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=293360.0, ans=0.025 +2024-09-17 20:19:56,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=293360.0, ans=0.0 +2024-09-17 20:19:59,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=293360.0, ans=0.125 +2024-09-17 20:20:06,848 INFO [train.py:1198] (1/2) Epoch 17, batch 950, loss[loss=0.227, ctc_loss=0.1196, cr_loss=0.3479, attn_decoder_loss=0.2312, over 29515.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1437, cr_loss=0.3855, attn_decoder_loss=0.2536, over 5741744.29 frames. ], batch size: 74, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:20:14,253 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.889e+01 9.768e+01 1.117e+02 1.855e+02, threshold=1.954e+02, percent-clipped=0.0 +2024-09-17 20:20:19,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=293400.0, ans=0.1 +2024-09-17 20:20:33,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=293440.0, ans=0.125 +2024-09-17 20:20:41,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=293480.0, ans=0.0 +2024-09-17 20:20:46,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.14 vs. limit=15.0 +2024-09-17 20:20:54,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=293520.0, ans=0.025 +2024-09-17 20:20:54,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=293520.0, ans=0.125 +2024-09-17 20:20:57,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=293520.0, ans=0.0 +2024-09-17 20:21:06,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=293520.0, ans=0.0 +2024-09-17 20:21:13,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.11 vs. limit=22.5 +2024-09-17 20:21:25,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=293600.0, ans=0.125 +2024-09-17 20:21:26,828 INFO [train.py:1198] (1/2) Epoch 17, batch 1000, loss[loss=0.2415, ctc_loss=0.1332, cr_loss=0.3764, attn_decoder_loss=0.2451, over 29536.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1441, cr_loss=0.3856, attn_decoder_loss=0.254, over 5735231.65 frames. ], batch size: 77, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:21:46,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.86 vs. limit=6.0 +2024-09-17 20:22:16,785 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.84 vs. limit=12.0 +2024-09-17 20:22:17,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=293720.0, ans=0.125 +2024-09-17 20:22:23,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-17 20:22:29,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=293760.0, ans=0.125 +2024-09-17 20:22:29,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=293760.0, ans=0.0 +2024-09-17 20:22:41,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=293800.0, ans=0.0 +2024-09-17 20:22:42,677 INFO [train.py:1198] (1/2) Epoch 17, batch 1050, loss[loss=0.2608, ctc_loss=0.1483, cr_loss=0.4005, attn_decoder_loss=0.2644, over 29687.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1437, cr_loss=0.3852, attn_decoder_loss=0.2532, over 5743943.39 frames. ], batch size: 85, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:22:43,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=293800.0, ans=0.1 +2024-09-17 20:22:50,129 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.852e+01 9.385e+01 1.035e+02 1.958e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 20:22:53,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=293800.0, ans=0.0 +2024-09-17 20:22:53,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=293800.0, ans=0.1 +2024-09-17 20:22:58,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1.whitening_limit, batch_count=293840.0, ans=10.0 +2024-09-17 20:23:07,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=293840.0, ans=0.1 +2024-09-17 20:23:10,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=293840.0, ans=0.125 +2024-09-17 20:23:11,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=293880.0, ans=0.0 +2024-09-17 20:23:28,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=293920.0, ans=0.1 +2024-09-17 20:23:33,836 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.66 vs. limit=6.0 +2024-09-17 20:23:34,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=293920.0, ans=0.0 +2024-09-17 20:23:36,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=293920.0, ans=0.125 +2024-09-17 20:23:37,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=293920.0, ans=0.2 +2024-09-17 20:23:43,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=293960.0, ans=0.5 +2024-09-17 20:23:48,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=293960.0, ans=0.1 +2024-09-17 20:23:58,416 INFO [train.py:1198] (1/2) Epoch 17, batch 1100, loss[loss=0.2497, ctc_loss=0.15, cr_loss=0.4219, attn_decoder_loss=0.2514, over 29446.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1437, cr_loss=0.3858, attn_decoder_loss=0.2531, over 5756246.83 frames. ], batch size: 78, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:24:07,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=294000.0, ans=0.125 +2024-09-17 20:24:19,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=294040.0, ans=0.125 +2024-09-17 20:24:19,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=294040.0, ans=0.0 +2024-09-17 20:24:27,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=294080.0, ans=0.0 +2024-09-17 20:25:04,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=294160.0, ans=0.2 +2024-09-17 20:25:12,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=294160.0, ans=0.125 +2024-09-17 20:25:18,670 INFO [train.py:1198] (1/2) Epoch 17, batch 1150, loss[loss=0.2508, ctc_loss=0.1521, cr_loss=0.4033, attn_decoder_loss=0.2528, over 29474.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1444, cr_loss=0.3862, attn_decoder_loss=0.2533, over 5756503.17 frames. ], batch size: 78, lr: 6.63e-03, grad_scale: 8.0 +2024-09-17 20:25:19,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=294200.0, ans=0.125 +2024-09-17 20:25:26,292 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.857e+01 8.746e+01 9.258e+01 9.833e+01 4.199e+02, threshold=1.852e+02, percent-clipped=3.0 +2024-09-17 20:25:27,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.11 vs. limit=22.5 +2024-09-17 20:25:38,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=294240.0, ans=0.125 +2024-09-17 20:25:45,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=294240.0, ans=0.0 +2024-09-17 20:26:00,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=294280.0, ans=0.125 +2024-09-17 20:26:17,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.14 vs. limit=6.0 +2024-09-17 20:26:23,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.52 vs. limit=15.0 +2024-09-17 20:26:24,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=294360.0, ans=0.125 +2024-09-17 20:26:24,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.82 vs. limit=12.0 +2024-09-17 20:26:34,873 INFO [train.py:1198] (1/2) Epoch 17, batch 1200, loss[loss=0.2708, ctc_loss=0.1593, cr_loss=0.4077, attn_decoder_loss=0.2741, over 29663.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1451, cr_loss=0.3876, attn_decoder_loss=0.2544, over 5748074.47 frames. ], batch size: 85, lr: 6.62e-03, grad_scale: 16.0 +2024-09-17 20:26:52,067 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:26:53,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=294440.0, ans=0.0 +2024-09-17 20:26:56,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=294440.0, ans=10.0 +2024-09-17 20:27:33,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=294520.0, ans=0.07 +2024-09-17 20:27:50,823 INFO [train.py:1198] (1/2) Epoch 17, batch 1250, loss[loss=0.2711, ctc_loss=0.1661, cr_loss=0.4275, attn_decoder_loss=0.2732, over 29559.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1457, cr_loss=0.3895, attn_decoder_loss=0.2549, over 5775011.99 frames. ], batch size: 92, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:27:55,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=294600.0, ans=0.125 +2024-09-17 20:27:56,100 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.91 vs. limit=15.0 +2024-09-17 20:27:59,842 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.670e+01 8.886e+01 9.388e+01 9.868e+01 1.541e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-17 20:28:00,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=294600.0, ans=0.1 +2024-09-17 20:28:03,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=294600.0, ans=0.025 +2024-09-17 20:28:11,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=294640.0, ans=6.0 +2024-09-17 20:28:29,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=294680.0, ans=0.125 +2024-09-17 20:28:34,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=294680.0, ans=0.125 +2024-09-17 20:29:09,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=294800.0, ans=0.1 +2024-09-17 20:29:10,543 INFO [train.py:1198] (1/2) Epoch 17, batch 1300, loss[loss=0.2519, ctc_loss=0.1384, cr_loss=0.3643, attn_decoder_loss=0.2564, over 28363.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.145, cr_loss=0.388, attn_decoder_loss=0.2542, over 5781302.40 frames. ], batch size: 112, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:29:18,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=294800.0, ans=0.1 +2024-09-17 20:29:19,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=294800.0, ans=0.1 +2024-09-17 20:29:21,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=294800.0, ans=0.09899494936611666 +2024-09-17 20:29:38,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=294840.0, ans=0.1 +2024-09-17 20:30:17,439 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:30:26,392 INFO [train.py:1198] (1/2) Epoch 17, batch 1350, loss[loss=0.242, ctc_loss=0.1357, cr_loss=0.3505, attn_decoder_loss=0.2461, over 29761.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1447, cr_loss=0.3877, attn_decoder_loss=0.2541, over 5796191.80 frames. ], batch size: 81, lr: 6.62e-03, grad_scale: 8.0 +2024-09-17 20:30:31,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=295000.0, ans=0.125 +2024-09-17 20:30:35,309 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.707e+01 9.188e+01 9.676e+01 1.559e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-17 20:30:46,609 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.89 vs. limit=15.0 +2024-09-17 20:30:48,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=295040.0, ans=0.125 +2024-09-17 20:30:57,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=295080.0, ans=0.125 +2024-09-17 20:31:08,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=295080.0, ans=0.125 +2024-09-17 20:31:21,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=295120.0, ans=0.2 +2024-09-17 20:31:35,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=295160.0, ans=0.07 +2024-09-17 20:31:37,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=295160.0, ans=0.125 +2024-09-17 20:31:40,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=295200.0, ans=0.125 +2024-09-17 20:31:41,836 INFO [train.py:1198] (1/2) Epoch 17, batch 1400, loss[loss=0.2159, ctc_loss=0.1139, cr_loss=0.3259, attn_decoder_loss=0.22, over 29585.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1445, cr_loss=0.3878, attn_decoder_loss=0.2538, over 5807140.12 frames. ], batch size: 69, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:31:55,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=295240.0, ans=0.125 +2024-09-17 20:31:58,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=295240.0, ans=0.125 +2024-09-17 20:32:14,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.04 vs. limit=15.0 +2024-09-17 20:32:28,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=295320.0, ans=0.125 +2024-09-17 20:32:29,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=295320.0, ans=0.1 +2024-09-17 20:32:32,120 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=13.59 vs. limit=15.0 +2024-09-17 20:32:41,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=295320.0, ans=0.125 +2024-09-17 20:32:46,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=295360.0, ans=0.025 +2024-09-17 20:33:01,974 INFO [train.py:1198] (1/2) Epoch 17, batch 1450, loss[loss=0.2592, ctc_loss=0.1486, cr_loss=0.3831, attn_decoder_loss=0.263, over 29448.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1441, cr_loss=0.3871, attn_decoder_loss=0.254, over 5803787.58 frames. ], batch size: 94, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:33:03,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=295400.0, ans=10.0 +2024-09-17 20:33:10,933 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.631e+01 9.209e+01 9.989e+01 1.746e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-17 20:33:21,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=295440.0, ans=0.0 +2024-09-17 20:34:02,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=295560.0, ans=0.125 +2024-09-17 20:34:16,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=295600.0, ans=0.2 +2024-09-17 20:34:17,471 INFO [train.py:1198] (1/2) Epoch 17, batch 1500, loss[loss=0.2645, ctc_loss=0.1486, cr_loss=0.3835, attn_decoder_loss=0.2688, over 29627.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1441, cr_loss=0.387, attn_decoder_loss=0.2545, over 5806449.30 frames. ], batch size: 86, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:34:36,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.30 vs. limit=15.0 +2024-09-17 20:34:55,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=295680.0, ans=0.125 +2024-09-17 20:34:57,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.66 vs. limit=15.0 +2024-09-17 20:35:04,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=295720.0, ans=0.125 +2024-09-17 20:35:17,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=295760.0, ans=0.125 +2024-09-17 20:35:29,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=295760.0, ans=0.125 +2024-09-17 20:35:33,486 INFO [train.py:1198] (1/2) Epoch 17, batch 1550, loss[loss=0.2755, ctc_loss=0.1642, cr_loss=0.4302, attn_decoder_loss=0.2783, over 29521.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1449, cr_loss=0.3873, attn_decoder_loss=0.2547, over 5782761.53 frames. ], batch size: 90, lr: 6.61e-03, grad_scale: 8.0 +2024-09-17 20:35:33,970 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 20:35:42,528 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.733e+01 9.019e+01 9.707e+01 1.076e+02 7.268e+02, threshold=1.941e+02, percent-clipped=2.0 +2024-09-17 20:35:49,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.55 vs. limit=22.5 +2024-09-17 20:36:06,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=295880.0, ans=0.0 +2024-09-17 20:36:08,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=295880.0, ans=0.07 +2024-09-17 20:36:30,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=295920.0, ans=0.125 +2024-09-17 20:36:33,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=295920.0, ans=0.0 +2024-09-17 20:36:50,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=295960.0, ans=0.125 +2024-09-17 20:36:53,526 INFO [train.py:1198] (1/2) Epoch 17, batch 1600, loss[loss=0.2581, ctc_loss=0.1477, cr_loss=0.395, attn_decoder_loss=0.2616, over 29681.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1451, cr_loss=0.3872, attn_decoder_loss=0.2546, over 5765606.35 frames. ], batch size: 85, lr: 6.61e-03, grad_scale: 16.0 +2024-09-17 20:37:31,052 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.63 vs. limit=22.5 +2024-09-17 20:37:43,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=296120.0, ans=0.125 +2024-09-17 20:37:51,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=296120.0, ans=0.125 +2024-09-17 20:38:03,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.71 vs. limit=15.0 +2024-09-17 20:38:08,986 INFO [train.py:1198] (1/2) Epoch 17, batch 1650, loss[loss=0.2677, ctc_loss=0.1505, cr_loss=0.3901, attn_decoder_loss=0.2721, over 29703.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1448, cr_loss=0.3863, attn_decoder_loss=0.2545, over 5761835.03 frames. ], batch size: 89, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:38:10,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=296200.0, ans=0.125 +2024-09-17 20:38:17,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=296200.0, ans=0.125 +2024-09-17 20:38:18,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=296200.0, ans=0.2 +2024-09-17 20:38:19,708 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.329e+01 8.617e+01 9.352e+01 1.025e+02 5.265e+02, threshold=1.870e+02, percent-clipped=3.0 +2024-09-17 20:38:21,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=296200.0, ans=0.2 +2024-09-17 20:38:28,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.90 vs. limit=10.0 +2024-09-17 20:38:41,789 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.77 vs. limit=15.0 +2024-09-17 20:39:24,831 INFO [train.py:1198] (1/2) Epoch 17, batch 1700, loss[loss=0.2157, ctc_loss=0.1232, cr_loss=0.3483, attn_decoder_loss=0.2182, over 29564.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1443, cr_loss=0.3866, attn_decoder_loss=0.2541, over 5782256.54 frames. ], batch size: 69, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:39:31,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=296400.0, ans=0.2 +2024-09-17 20:39:41,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=296440.0, ans=0.125 +2024-09-17 20:39:53,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=296480.0, ans=0.0 +2024-09-17 20:40:04,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=296480.0, ans=0.125 +2024-09-17 20:40:06,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.10 vs. limit=12.0 +2024-09-17 20:40:07,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=296480.0, ans=0.07 +2024-09-17 20:40:24,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=296520.0, ans=0.125 +2024-09-17 20:40:26,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.49 vs. limit=15.0 +2024-09-17 20:40:31,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=296560.0, ans=0.09899494936611666 +2024-09-17 20:40:44,382 INFO [train.py:1198] (1/2) Epoch 17, batch 1750, loss[loss=0.2218, ctc_loss=0.1186, cr_loss=0.3389, attn_decoder_loss=0.2257, over 29387.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.144, cr_loss=0.3862, attn_decoder_loss=0.2538, over 5790634.97 frames. ], batch size: 67, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:40:45,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.67 vs. limit=15.0 +2024-09-17 20:40:46,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=296600.0, ans=0.1 +2024-09-17 20:40:47,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=296600.0, ans=0.1 +2024-09-17 20:40:54,992 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.563e+01 9.059e+01 9.719e+01 2.142e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-17 20:41:14,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=296680.0, ans=0.1 +2024-09-17 20:41:34,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=296720.0, ans=0.1 +2024-09-17 20:41:34,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=296720.0, ans=0.2 +2024-09-17 20:41:46,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=296760.0, ans=0.125 +2024-09-17 20:41:47,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=296760.0, ans=0.0 +2024-09-17 20:42:00,004 INFO [train.py:1198] (1/2) Epoch 17, batch 1800, loss[loss=0.2509, ctc_loss=0.1445, cr_loss=0.3899, attn_decoder_loss=0.2541, over 29693.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1442, cr_loss=0.3865, attn_decoder_loss=0.2542, over 5793146.56 frames. ], batch size: 83, lr: 6.60e-03, grad_scale: 8.0 +2024-09-17 20:42:01,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=296800.0, ans=0.125 +2024-09-17 20:42:03,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=296800.0, ans=0.0 +2024-09-17 20:42:06,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=296800.0, ans=0.0 +2024-09-17 20:42:27,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=296840.0, ans=0.125 +2024-09-17 20:42:29,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=296880.0, ans=0.125 +2024-09-17 20:42:58,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=296920.0, ans=0.125 +2024-09-17 20:43:07,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=296960.0, ans=0.125 +2024-09-17 20:43:08,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=296960.0, ans=0.1 +2024-09-17 20:43:16,000 INFO [train.py:1198] (1/2) Epoch 17, batch 1850, loss[loss=0.2678, ctc_loss=0.1511, cr_loss=0.4075, attn_decoder_loss=0.2717, over 29643.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.144, cr_loss=0.3863, attn_decoder_loss=0.2541, over 5799165.19 frames. ], batch size: 86, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:43:26,330 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.783e+01 8.992e+01 9.506e+01 1.016e+02 2.077e+02, threshold=1.901e+02, percent-clipped=1.0 +2024-09-17 20:43:40,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=297040.0, ans=0.125 +2024-09-17 20:43:45,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.04 vs. limit=22.5 +2024-09-17 20:43:54,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=297080.0, ans=0.1 +2024-09-17 20:43:54,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.50 vs. limit=15.0 +2024-09-17 20:43:57,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=297080.0, ans=0.125 +2024-09-17 20:44:12,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=297120.0, ans=0.125 +2024-09-17 20:44:12,983 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=9.73 vs. limit=15.0 +2024-09-17 20:44:24,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=297160.0, ans=0.125 +2024-09-17 20:44:26,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=297160.0, ans=0.0 +2024-09-17 20:44:26,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=297160.0, ans=0.0 +2024-09-17 20:44:35,805 INFO [train.py:1198] (1/2) Epoch 17, batch 1900, loss[loss=0.2557, ctc_loss=0.1383, cr_loss=0.3853, attn_decoder_loss=0.2602, over 29688.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1442, cr_loss=0.3868, attn_decoder_loss=0.2543, over 5806205.11 frames. ], batch size: 89, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:44:38,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.90 vs. limit=6.0 +2024-09-17 20:45:02,836 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.15 vs. limit=6.0 +2024-09-17 20:45:09,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.57 vs. limit=15.0 +2024-09-17 20:45:18,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=297280.0, ans=0.025 +2024-09-17 20:45:41,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=297360.0, ans=0.025 +2024-09-17 20:45:44,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=297360.0, ans=0.0 +2024-09-17 20:45:46,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=297360.0, ans=0.025 +2024-09-17 20:45:49,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=297360.0, ans=0.125 +2024-09-17 20:45:52,073 INFO [train.py:1198] (1/2) Epoch 17, batch 1950, loss[loss=0.2521, ctc_loss=0.14, cr_loss=0.3732, attn_decoder_loss=0.2563, over 29447.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1448, cr_loss=0.3874, attn_decoder_loss=0.2554, over 5820150.49 frames. ], batch size: 78, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:46:00,530 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.90 vs. limit=15.0 +2024-09-17 20:46:02,790 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.875e+01 9.464e+01 9.894e+01 2.247e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-17 20:46:04,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=297400.0, ans=0.125 +2024-09-17 20:46:06,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=297440.0, ans=0.125 +2024-09-17 20:46:11,530 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.07 vs. limit=15.0 +2024-09-17 20:46:12,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=297440.0, ans=0.125 +2024-09-17 20:46:19,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=297440.0, ans=0.0 +2024-09-17 20:46:28,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=297480.0, ans=0.125 +2024-09-17 20:46:36,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=297520.0, ans=0.125 +2024-09-17 20:46:40,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.67 vs. limit=15.0 +2024-09-17 20:46:43,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.31 vs. limit=22.5 +2024-09-17 20:47:08,297 INFO [train.py:1198] (1/2) Epoch 17, batch 2000, loss[loss=0.2177, ctc_loss=0.1171, cr_loss=0.336, attn_decoder_loss=0.2214, over 29334.00 frames. ], tot_loss[loss=0.2526, ctc_loss=0.1454, cr_loss=0.3882, attn_decoder_loss=0.2559, over 5797091.44 frames. ], batch size: 67, lr: 6.59e-03, grad_scale: 16.0 +2024-09-17 20:47:16,896 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.87 vs. limit=22.5 +2024-09-17 20:47:20,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=297600.0, ans=0.125 +2024-09-17 20:47:28,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=297640.0, ans=0.0 +2024-09-17 20:47:37,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.32 vs. limit=15.0 +2024-09-17 20:47:43,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=297680.0, ans=0.2 +2024-09-17 20:48:26,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=297800.0, ans=0.2 +2024-09-17 20:48:27,878 INFO [train.py:1198] (1/2) Epoch 17, batch 2050, loss[loss=0.2316, ctc_loss=0.1259, cr_loss=0.3635, attn_decoder_loss=0.2353, over 29409.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1449, cr_loss=0.3872, attn_decoder_loss=0.2548, over 5789545.18 frames. ], batch size: 70, lr: 6.59e-03, grad_scale: 8.0 +2024-09-17 20:48:40,029 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.707e+01 9.110e+01 9.757e+01 1.726e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-17 20:48:59,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=297880.0, ans=0.0 +2024-09-17 20:49:07,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=297880.0, ans=0.125 +2024-09-17 20:49:08,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=297880.0, ans=0.1 +2024-09-17 20:49:09,420 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.34 vs. limit=6.0 +2024-09-17 20:49:17,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=297920.0, ans=0.125 +2024-09-17 20:49:31,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=297960.0, ans=0.025 +2024-09-17 20:49:38,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=297960.0, ans=0.0 +2024-09-17 20:49:38,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=297960.0, ans=0.1 +2024-09-17 20:49:39,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.94 vs. limit=6.0 +2024-09-17 20:49:41,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=298000.0, ans=0.04949747468305833 +2024-09-17 20:49:43,061 INFO [train.py:1198] (1/2) Epoch 17, batch 2100, loss[loss=0.2344, ctc_loss=0.1304, cr_loss=0.3573, attn_decoder_loss=0.2381, over 29785.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1436, cr_loss=0.385, attn_decoder_loss=0.2536, over 5802375.64 frames. ], batch size: 81, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:49:44,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=298000.0, ans=0.0 +2024-09-17 20:49:50,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=298000.0, ans=0.04949747468305833 +2024-09-17 20:49:58,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=298040.0, ans=15.0 +2024-09-17 20:50:29,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-17 20:50:46,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=298160.0, ans=0.125 +2024-09-17 20:50:47,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.14 vs. limit=12.0 +2024-09-17 20:50:58,163 INFO [train.py:1198] (1/2) Epoch 17, batch 2150, loss[loss=0.2541, ctc_loss=0.1492, cr_loss=0.4085, attn_decoder_loss=0.2567, over 29479.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1431, cr_loss=0.3845, attn_decoder_loss=0.2531, over 5817133.15 frames. ], batch size: 78, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:51:01,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=298200.0, ans=0.125 +2024-09-17 20:51:10,382 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.066e+01 8.718e+01 9.185e+01 9.940e+01 1.615e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 20:51:42,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=298320.0, ans=0.025 +2024-09-17 20:51:49,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=298320.0, ans=0.125 +2024-09-17 20:51:55,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=298320.0, ans=0.2 +2024-09-17 20:51:57,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=298360.0, ans=0.125 +2024-09-17 20:52:03,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.92 vs. limit=22.5 +2024-09-17 20:52:13,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff2.min_abs, batch_count=298360.0, ans=0.1 +2024-09-17 20:52:17,829 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.73 vs. limit=22.5 +2024-09-17 20:52:18,711 INFO [train.py:1198] (1/2) Epoch 17, batch 2200, loss[loss=0.2542, ctc_loss=0.1366, cr_loss=0.3647, attn_decoder_loss=0.2591, over 29641.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1431, cr_loss=0.3848, attn_decoder_loss=0.2535, over 5812660.70 frames. ], batch size: 86, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:53:11,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=298520.0, ans=0.125 +2024-09-17 20:53:14,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=298520.0, ans=0.1 +2024-09-17 20:53:20,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=298560.0, ans=0.125 +2024-09-17 20:53:24,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.69 vs. limit=15.0 +2024-09-17 20:53:25,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=298560.0, ans=0.125 +2024-09-17 20:53:34,402 INFO [train.py:1198] (1/2) Epoch 17, batch 2250, loss[loss=0.2573, ctc_loss=0.1478, cr_loss=0.4012, attn_decoder_loss=0.2605, over 29680.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1433, cr_loss=0.3846, attn_decoder_loss=0.2536, over 5811318.95 frames. ], batch size: 82, lr: 6.58e-03, grad_scale: 8.0 +2024-09-17 20:53:37,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.54 vs. limit=5.0 +2024-09-17 20:53:46,690 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.540e+01 9.223e+01 9.820e+01 2.780e+02, threshold=1.845e+02, percent-clipped=3.0 +2024-09-17 20:53:55,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=298640.0, ans=0.0 +2024-09-17 20:54:01,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=298640.0, ans=0.125 +2024-09-17 20:54:26,822 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.03 vs. limit=22.5 +2024-09-17 20:54:30,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=298720.0, ans=0.125 +2024-09-17 20:54:33,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=298760.0, ans=0.125 +2024-09-17 20:54:39,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=298760.0, ans=0.0 +2024-09-17 20:54:43,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-17 20:54:50,222 INFO [train.py:1198] (1/2) Epoch 17, batch 2300, loss[loss=0.228, ctc_loss=0.1261, cr_loss=0.363, attn_decoder_loss=0.2313, over 29361.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1429, cr_loss=0.384, attn_decoder_loss=0.2528, over 5797241.33 frames. ], batch size: 71, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:54:56,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=298800.0, ans=0.1 +2024-09-17 20:55:04,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=298840.0, ans=0.1 +2024-09-17 20:55:10,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=298840.0, ans=0.125 +2024-09-17 20:55:39,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=298920.0, ans=0.1 +2024-09-17 20:55:39,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=298920.0, ans=0.0 +2024-09-17 20:55:48,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=298920.0, ans=0.0 +2024-09-17 20:55:55,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=298960.0, ans=0.05 +2024-09-17 20:55:59,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.03 vs. limit=15.0 +2024-09-17 20:56:00,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.46 vs. limit=15.0 +2024-09-17 20:56:07,994 INFO [train.py:1198] (1/2) Epoch 17, batch 2350, loss[loss=0.2635, ctc_loss=0.1509, cr_loss=0.3915, attn_decoder_loss=0.2673, over 29680.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1432, cr_loss=0.3848, attn_decoder_loss=0.2533, over 5801847.80 frames. ], batch size: 83, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:56:21,978 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.665e+01 8.873e+01 9.644e+01 1.055e+02 1.144e+03, threshold=1.929e+02, percent-clipped=2.0 +2024-09-17 20:57:03,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=299120.0, ans=0.125 +2024-09-17 20:57:26,176 INFO [train.py:1198] (1/2) Epoch 17, batch 2400, loss[loss=0.2335, ctc_loss=0.1327, cr_loss=0.3736, attn_decoder_loss=0.2364, over 29520.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1438, cr_loss=0.3857, attn_decoder_loss=0.2537, over 5806576.09 frames. ], batch size: 76, lr: 6.57e-03, grad_scale: 16.0 +2024-09-17 20:57:35,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=299200.0, ans=0.125 +2024-09-17 20:57:38,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=299200.0, ans=0.09899494936611666 +2024-09-17 20:57:50,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=299240.0, ans=0.0 +2024-09-17 20:57:52,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=299240.0, ans=0.1 +2024-09-17 20:57:55,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=299280.0, ans=0.125 +2024-09-17 20:58:10,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=299320.0, ans=0.1 +2024-09-17 20:58:23,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=299320.0, ans=0.1 +2024-09-17 20:58:24,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.09 vs. limit=15.0 +2024-09-17 20:58:38,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.23 vs. limit=15.0 +2024-09-17 20:58:39,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=299360.0, ans=0.0 +2024-09-17 20:58:41,907 INFO [train.py:1198] (1/2) Epoch 17, batch 2450, loss[loss=0.2463, ctc_loss=0.1304, cr_loss=0.3649, attn_decoder_loss=0.2511, over 29697.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1443, cr_loss=0.3868, attn_decoder_loss=0.2548, over 5784365.42 frames. ], batch size: 82, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 20:58:55,497 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 9.066e+01 9.720e+01 1.171e+02 1.991e+02, threshold=1.944e+02, percent-clipped=1.0 +2024-09-17 20:58:55,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=299440.0, ans=0.1 +2024-09-17 20:59:19,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=299480.0, ans=0.125 +2024-09-17 20:59:26,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=299520.0, ans=0.125 +2024-09-17 20:59:27,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=299520.0, ans=0.125 +2024-09-17 20:59:42,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=299560.0, ans=0.125 +2024-09-17 20:59:54,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.51 vs. limit=22.5 +2024-09-17 20:59:59,621 INFO [train.py:1198] (1/2) Epoch 17, batch 2500, loss[loss=0.2448, ctc_loss=0.1281, cr_loss=0.366, attn_decoder_loss=0.2496, over 29641.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1445, cr_loss=0.3872, attn_decoder_loss=0.2549, over 5794824.45 frames. ], batch size: 86, lr: 6.57e-03, grad_scale: 8.0 +2024-09-17 21:00:19,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=299640.0, ans=0.1 +2024-09-17 21:00:20,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=299640.0, ans=0.125 +2024-09-17 21:00:36,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.06 vs. limit=10.0 +2024-09-17 21:00:50,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.26 vs. limit=15.0 +2024-09-17 21:01:18,017 INFO [train.py:1198] (1/2) Epoch 17, batch 2550, loss[loss=0.2229, ctc_loss=0.1246, cr_loss=0.3465, attn_decoder_loss=0.2261, over 29357.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1443, cr_loss=0.387, attn_decoder_loss=0.2548, over 5797727.52 frames. ], batch size: 67, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:01:31,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.868e+01 8.659e+01 9.126e+01 9.764e+01 1.342e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-17 21:01:36,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=299840.0, ans=0.0 +2024-09-17 21:01:49,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=299880.0, ans=0.125 +2024-09-17 21:01:51,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=299880.0, ans=0.125 +2024-09-17 21:02:08,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=299920.0, ans=0.0 +2024-09-17 21:02:09,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=299920.0, ans=0.2 +2024-09-17 21:02:18,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=299960.0, ans=0.125 +2024-09-17 21:02:23,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=299960.0, ans=0.1 +2024-09-17 21:02:34,196 INFO [train.py:1198] (1/2) Epoch 17, batch 2600, loss[loss=0.249, ctc_loss=0.144, cr_loss=0.3847, attn_decoder_loss=0.2522, over 29451.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1445, cr_loss=0.3875, attn_decoder_loss=0.255, over 5794294.29 frames. ], batch size: 78, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:02:39,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=300000.0, ans=0.0 +2024-09-17 21:02:48,381 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.96 vs. limit=22.5 +2024-09-17 21:02:53,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=300040.0, ans=0.2 +2024-09-17 21:03:02,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=300080.0, ans=0.125 +2024-09-17 21:03:38,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=300160.0, ans=0.0 +2024-09-17 21:03:51,168 INFO [train.py:1198] (1/2) Epoch 17, batch 2650, loss[loss=0.259, ctc_loss=0.1456, cr_loss=0.4127, attn_decoder_loss=0.2624, over 29254.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1448, cr_loss=0.388, attn_decoder_loss=0.2554, over 5799683.73 frames. ], batch size: 100, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:03:54,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=300200.0, ans=0.125 +2024-09-17 21:03:56,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.87 vs. limit=6.0 +2024-09-17 21:04:06,991 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.955e+01 9.384e+01 9.945e+01 2.228e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-17 21:04:27,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=300280.0, ans=0.07 +2024-09-17 21:04:49,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=300320.0, ans=0.2 +2024-09-17 21:05:09,151 INFO [train.py:1198] (1/2) Epoch 17, batch 2700, loss[loss=0.2575, ctc_loss=0.1457, cr_loss=0.4069, attn_decoder_loss=0.2609, over 29541.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1446, cr_loss=0.388, attn_decoder_loss=0.2555, over 5794745.25 frames. ], batch size: 87, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:05:28,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=300440.0, ans=0.125 +2024-09-17 21:05:47,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=300480.0, ans=0.0 +2024-09-17 21:05:50,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=300480.0, ans=0.0 +2024-09-17 21:05:53,590 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.92 vs. limit=6.0 +2024-09-17 21:05:59,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=300520.0, ans=0.5 +2024-09-17 21:06:00,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=300520.0, ans=0.1 +2024-09-17 21:06:16,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.34 vs. limit=12.0 +2024-09-17 21:06:24,706 INFO [train.py:1198] (1/2) Epoch 17, batch 2750, loss[loss=0.2289, ctc_loss=0.1308, cr_loss=0.3706, attn_decoder_loss=0.2315, over 29526.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1437, cr_loss=0.3863, attn_decoder_loss=0.2541, over 5795235.05 frames. ], batch size: 75, lr: 6.56e-03, grad_scale: 8.0 +2024-09-17 21:06:38,339 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.681e+01 9.439e+01 1.052e+02 4.745e+02, threshold=1.888e+02, percent-clipped=3.0 +2024-09-17 21:06:52,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=300640.0, ans=0.125 +2024-09-17 21:07:10,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=300720.0, ans=0.125 +2024-09-17 21:07:29,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=300760.0, ans=0.025 +2024-09-17 21:07:35,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=300760.0, ans=0.1 +2024-09-17 21:07:43,693 INFO [train.py:1198] (1/2) Epoch 17, batch 2800, loss[loss=0.2793, ctc_loss=0.1869, cr_loss=0.4242, attn_decoder_loss=0.2802, over 20448.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1436, cr_loss=0.3859, attn_decoder_loss=0.2541, over 5776567.61 frames. ], batch size: 209, lr: 6.55e-03, grad_scale: 16.0 +2024-09-17 21:07:59,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=300840.0, ans=0.0 +2024-09-17 21:08:05,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=300840.0, ans=0.125 +2024-09-17 21:08:07,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.91 vs. limit=6.0 +2024-09-17 21:08:10,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=300840.0, ans=0.09899494936611666 +2024-09-17 21:08:13,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=300840.0, ans=0.125 +2024-09-17 21:08:17,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=300880.0, ans=0.0 +2024-09-17 21:08:40,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=300920.0, ans=0.0 +2024-09-17 21:08:43,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=300920.0, ans=0.125 +2024-09-17 21:08:52,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=300960.0, ans=0.0 +2024-09-17 21:08:56,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.65 vs. limit=15.0 +2024-09-17 21:09:01,395 INFO [train.py:1198] (1/2) Epoch 17, batch 2850, loss[loss=0.2507, ctc_loss=0.141, cr_loss=0.3737, attn_decoder_loss=0.2546, over 29492.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1443, cr_loss=0.3864, attn_decoder_loss=0.2547, over 5763011.02 frames. ], batch size: 77, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:09:09,850 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.01 vs. limit=15.0 +2024-09-17 21:09:15,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=301040.0, ans=0.1 +2024-09-17 21:09:16,420 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.088e+01 8.947e+01 9.466e+01 1.049e+02 1.883e+02, threshold=1.893e+02, percent-clipped=0.0 +2024-09-17 21:09:45,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=301120.0, ans=0.95 +2024-09-17 21:09:55,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-17 21:10:05,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=301160.0, ans=0.0 +2024-09-17 21:10:06,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=301160.0, ans=0.125 +2024-09-17 21:10:06,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=301160.0, ans=0.125 +2024-09-17 21:10:11,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=301160.0, ans=0.025 +2024-09-17 21:10:17,118 INFO [train.py:1198] (1/2) Epoch 17, batch 2900, loss[loss=0.2463, ctc_loss=0.1383, cr_loss=0.3771, attn_decoder_loss=0.2499, over 29797.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1448, cr_loss=0.388, attn_decoder_loss=0.2558, over 5788203.50 frames. ], batch size: 80, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:10:21,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=301200.0, ans=0.125 +2024-09-17 21:10:21,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=301200.0, ans=0.125 +2024-09-17 21:10:24,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=301200.0, ans=0.0 +2024-09-17 21:10:28,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=301200.0, ans=0.1 +2024-09-17 21:10:38,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=301240.0, ans=0.125 +2024-09-17 21:10:42,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.18 vs. limit=15.0 +2024-09-17 21:10:46,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=301280.0, ans=0.125 +2024-09-17 21:10:52,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=301280.0, ans=0.125 +2024-09-17 21:10:52,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=301280.0, ans=0.125 +2024-09-17 21:11:01,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=301320.0, ans=0.125 +2024-09-17 21:11:35,008 INFO [train.py:1198] (1/2) Epoch 17, batch 2950, loss[loss=0.2381, ctc_loss=0.1235, cr_loss=0.3487, attn_decoder_loss=0.2431, over 29519.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.144, cr_loss=0.3862, attn_decoder_loss=0.2545, over 5782269.75 frames. ], batch size: 75, lr: 6.55e-03, grad_scale: 8.0 +2024-09-17 21:11:42,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=301400.0, ans=0.125 +2024-09-17 21:11:42,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=301400.0, ans=0.125 +2024-09-17 21:11:52,336 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.744e+01 8.656e+01 9.103e+01 9.738e+01 1.377e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-17 21:12:07,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=301480.0, ans=10.0 +2024-09-17 21:12:21,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=301520.0, ans=0.025 +2024-09-17 21:12:21,932 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.16 vs. limit=22.5 +2024-09-17 21:12:27,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=301520.0, ans=0.0 +2024-09-17 21:12:45,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:12:48,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.74 vs. limit=15.0 +2024-09-17 21:12:51,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=301600.0, ans=0.0 +2024-09-17 21:12:52,856 INFO [train.py:1198] (1/2) Epoch 17, batch 3000, loss[loss=0.2506, ctc_loss=0.1386, cr_loss=0.3829, attn_decoder_loss=0.2546, over 29758.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1439, cr_loss=0.3864, attn_decoder_loss=0.2541, over 5782437.83 frames. ], batch size: 81, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:12:52,856 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 21:12:58,622 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([6.7307, 6.6486, 6.1153, 6.1799], device='cuda:1') +2024-09-17 21:13:11,356 INFO [train.py:1230] (1/2) Epoch 17, validation: loss=0.2115, ctc_loss=0.04066, cr_loss=4.995e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-17 21:13:11,357 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 21:13:20,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=301600.0, ans=0.125 +2024-09-17 21:13:34,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=301640.0, ans=0.2 +2024-09-17 21:13:47,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=301680.0, ans=0.0 +2024-09-17 21:14:04,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=301720.0, ans=0.125 +2024-09-17 21:14:27,356 INFO [train.py:1198] (1/2) Epoch 17, batch 3050, loss[loss=0.2425, ctc_loss=0.1385, cr_loss=0.39, attn_decoder_loss=0.2454, over 29525.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1448, cr_loss=0.3877, attn_decoder_loss=0.2551, over 5776740.95 frames. ], batch size: 76, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:14:30,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=301800.0, ans=0.04949747468305833 +2024-09-17 21:14:39,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=301800.0, ans=0.125 +2024-09-17 21:14:42,349 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 9.363e+01 1.016e+02 1.140e+02 2.796e+02, threshold=2.033e+02, percent-clipped=4.0 +2024-09-17 21:14:56,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=301880.0, ans=0.125 +2024-09-17 21:14:59,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.62 vs. limit=15.0 +2024-09-17 21:15:36,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=301960.0, ans=0.0 +2024-09-17 21:15:43,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-17 21:15:46,820 INFO [train.py:1198] (1/2) Epoch 17, batch 3100, loss[loss=0.2712, ctc_loss=0.1551, cr_loss=0.4218, attn_decoder_loss=0.2747, over 29321.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1445, cr_loss=0.387, attn_decoder_loss=0.2547, over 5777170.20 frames. ], batch size: 100, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:16:21,671 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:17:02,401 INFO [train.py:1198] (1/2) Epoch 17, batch 3150, loss[loss=0.2595, ctc_loss=0.1506, cr_loss=0.3995, attn_decoder_loss=0.2627, over 28847.00 frames. ], tot_loss[loss=0.2515, ctc_loss=0.1444, cr_loss=0.3868, attn_decoder_loss=0.2548, over 5783813.29 frames. ], batch size: 104, lr: 6.54e-03, grad_scale: 8.0 +2024-09-17 21:17:04,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=302200.0, ans=0.0 +2024-09-17 21:17:07,422 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:17:17,552 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.912e+01 9.257e+01 9.921e+01 1.761e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-17 21:17:35,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=302280.0, ans=0.1 +2024-09-17 21:17:55,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=302320.0, ans=0.0 +2024-09-17 21:18:00,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=302320.0, ans=0.2 +2024-09-17 21:18:03,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=302360.0, ans=0.0 +2024-09-17 21:18:18,471 INFO [train.py:1198] (1/2) Epoch 17, batch 3200, loss[loss=0.2556, ctc_loss=0.1512, cr_loss=0.4037, attn_decoder_loss=0.2582, over 29415.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1436, cr_loss=0.3853, attn_decoder_loss=0.2539, over 5793515.37 frames. ], batch size: 79, lr: 6.54e-03, grad_scale: 16.0 +2024-09-17 21:18:27,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=302400.0, ans=0.0 +2024-09-17 21:18:32,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=302440.0, ans=0.125 +2024-09-17 21:18:46,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.68 vs. limit=15.0 +2024-09-17 21:19:09,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=302520.0, ans=0.025 +2024-09-17 21:19:19,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=302560.0, ans=0.125 +2024-09-17 21:19:38,256 INFO [train.py:1198] (1/2) Epoch 17, batch 3250, loss[loss=0.2581, ctc_loss=0.16, cr_loss=0.414, attn_decoder_loss=0.2598, over 29705.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1438, cr_loss=0.3862, attn_decoder_loss=0.2543, over 5801104.28 frames. ], batch size: 84, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:19:44,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=302600.0, ans=0.125 +2024-09-17 21:19:54,947 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.527e+01 9.036e+01 9.665e+01 1.223e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-17 21:19:55,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=302640.0, ans=0.125 +2024-09-17 21:19:58,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.21 vs. limit=15.0 +2024-09-17 21:20:17,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=302680.0, ans=0.09899494936611666 +2024-09-17 21:20:51,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-17 21:20:53,908 INFO [train.py:1198] (1/2) Epoch 17, batch 3300, loss[loss=0.2536, ctc_loss=0.1389, cr_loss=0.3865, attn_decoder_loss=0.2578, over 28170.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1431, cr_loss=0.385, attn_decoder_loss=0.2532, over 5798973.56 frames. ], batch size: 111, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:21:03,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=302800.0, ans=0.125 +2024-09-17 21:21:17,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=302840.0, ans=0.1 +2024-09-17 21:21:35,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=302880.0, ans=0.0 +2024-09-17 21:21:36,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=302880.0, ans=0.0 +2024-09-17 21:21:38,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=302920.0, ans=0.05 +2024-09-17 21:21:41,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=302920.0, ans=0.0 +2024-09-17 21:21:41,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=302920.0, ans=0.125 +2024-09-17 21:21:53,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=302960.0, ans=0.125 +2024-09-17 21:22:09,780 INFO [train.py:1198] (1/2) Epoch 17, batch 3350, loss[loss=0.2601, ctc_loss=0.1523, cr_loss=0.3961, attn_decoder_loss=0.2632, over 28758.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1438, cr_loss=0.386, attn_decoder_loss=0.2541, over 5775278.52 frames. ], batch size: 104, lr: 6.53e-03, grad_scale: 4.0 +2024-09-17 21:22:11,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=303000.0, ans=0.0 +2024-09-17 21:22:28,072 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.973e+01 8.919e+01 9.576e+01 1.043e+02 2.558e+02, threshold=1.915e+02, percent-clipped=2.0 +2024-09-17 21:22:40,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=303080.0, ans=0.2 +2024-09-17 21:23:29,947 INFO [train.py:1198] (1/2) Epoch 17, batch 3400, loss[loss=0.2277, ctc_loss=0.1271, cr_loss=0.3516, attn_decoder_loss=0.2311, over 29354.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1442, cr_loss=0.3864, attn_decoder_loss=0.2544, over 5767419.01 frames. ], batch size: 67, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:24:23,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=303320.0, ans=0.125 +2024-09-17 21:24:40,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=303360.0, ans=0.2 +2024-09-17 21:24:45,978 INFO [train.py:1198] (1/2) Epoch 17, batch 3450, loss[loss=0.256, ctc_loss=0.1406, cr_loss=0.3923, attn_decoder_loss=0.2601, over 28324.00 frames. ], tot_loss[loss=0.2511, ctc_loss=0.1438, cr_loss=0.3861, attn_decoder_loss=0.2545, over 5774327.29 frames. ], batch size: 111, lr: 6.53e-03, grad_scale: 8.0 +2024-09-17 21:24:55,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=303400.0, ans=0.0 +2024-09-17 21:25:00,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=303440.0, ans=0.125 +2024-09-17 21:25:00,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=303440.0, ans=0.125 +2024-09-17 21:25:04,358 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 9.059e+01 9.380e+01 1.001e+02 2.094e+02, threshold=1.876e+02, percent-clipped=1.0 +2024-09-17 21:25:13,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=303440.0, ans=0.07 +2024-09-17 21:25:15,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=303480.0, ans=0.0 +2024-09-17 21:25:16,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=303480.0, ans=0.125 +2024-09-17 21:25:19,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=303480.0, ans=0.1 +2024-09-17 21:25:24,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=303480.0, ans=0.125 +2024-09-17 21:25:38,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=303520.0, ans=0.125 +2024-09-17 21:26:01,983 INFO [train.py:1198] (1/2) Epoch 17, batch 3500, loss[loss=0.2194, ctc_loss=0.1136, cr_loss=0.337, attn_decoder_loss=0.2237, over 29302.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1432, cr_loss=0.3851, attn_decoder_loss=0.2537, over 5775872.86 frames. ], batch size: 71, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:26:11,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=303600.0, ans=0.1 +2024-09-17 21:26:14,774 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.85 vs. limit=10.0 +2024-09-17 21:26:32,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=303680.0, ans=0.1 +2024-09-17 21:26:35,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.27 vs. limit=15.0 +2024-09-17 21:26:42,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=303680.0, ans=0.125 +2024-09-17 21:26:58,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=303720.0, ans=0.025 +2024-09-17 21:27:01,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.73 vs. limit=12.0 +2024-09-17 21:27:02,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=303760.0, ans=0.125 +2024-09-17 21:27:18,819 INFO [train.py:1198] (1/2) Epoch 17, batch 3550, loss[loss=0.2577, ctc_loss=0.1349, cr_loss=0.3564, attn_decoder_loss=0.2634, over 29705.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1433, cr_loss=0.3857, attn_decoder_loss=0.2537, over 5781493.67 frames. ], batch size: 89, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:27:36,512 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.165e+01 8.716e+01 9.254e+01 9.841e+01 2.209e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-17 21:27:39,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=303840.0, ans=0.125 +2024-09-17 21:27:57,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=303880.0, ans=0.025 +2024-09-17 21:28:00,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=303880.0, ans=0.125 +2024-09-17 21:28:18,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.20 vs. limit=15.0 +2024-09-17 21:28:43,014 INFO [train.py:1198] (1/2) Epoch 17, batch 3600, loss[loss=0.2273, ctc_loss=0.1189, cr_loss=0.3231, attn_decoder_loss=0.2322, over 29502.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1437, cr_loss=0.3862, attn_decoder_loss=0.254, over 5791569.09 frames. ], batch size: 77, lr: 6.52e-03, grad_scale: 16.0 +2024-09-17 21:28:43,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=304000.0, ans=0.035 +2024-09-17 21:28:43,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=304000.0, ans=0.125 +2024-09-17 21:29:01,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=304040.0, ans=0.1 +2024-09-17 21:29:18,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.32 vs. limit=15.0 +2024-09-17 21:29:25,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=304080.0, ans=0.0 +2024-09-17 21:29:26,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=304120.0, ans=0.125 +2024-09-17 21:29:37,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=304120.0, ans=0.2 +2024-09-17 21:29:57,763 INFO [train.py:1198] (1/2) Epoch 17, batch 3650, loss[loss=0.2597, ctc_loss=0.1519, cr_loss=0.4026, attn_decoder_loss=0.2627, over 29521.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1429, cr_loss=0.3845, attn_decoder_loss=0.2532, over 5794204.23 frames. ], batch size: 90, lr: 6.52e-03, grad_scale: 8.0 +2024-09-17 21:30:11,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=304240.0, ans=0.07 +2024-09-17 21:30:17,004 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.843e+01 9.212e+01 9.798e+01 3.342e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-17 21:30:46,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.92 vs. limit=15.0 +2024-09-17 21:31:12,236 INFO [train.py:1198] (1/2) Epoch 17, batch 3700, loss[loss=0.2551, ctc_loss=0.1444, cr_loss=0.3868, attn_decoder_loss=0.2588, over 29703.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1428, cr_loss=0.3843, attn_decoder_loss=0.2533, over 5804124.89 frames. ], batch size: 84, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:31:18,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=304400.0, ans=0.0 +2024-09-17 21:31:20,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=304400.0, ans=0.025 +2024-09-17 21:31:21,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=304400.0, ans=0.1 +2024-09-17 21:31:24,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=304400.0, ans=0.95 +2024-09-17 21:31:27,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=304440.0, ans=0.1 +2024-09-17 21:31:36,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=304440.0, ans=0.125 +2024-09-17 21:31:36,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=304440.0, ans=0.2 +2024-09-17 21:31:50,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.56 vs. limit=15.0 +2024-09-17 21:31:53,145 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.46 vs. limit=15.0 +2024-09-17 21:31:59,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=304520.0, ans=0.0 +2024-09-17 21:32:02,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.40 vs. limit=15.0 +2024-09-17 21:32:06,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=304520.0, ans=15.0 +2024-09-17 21:32:06,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.12 vs. limit=15.0 +2024-09-17 21:32:15,208 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.64 vs. limit=10.0 +2024-09-17 21:32:26,330 INFO [train.py:1198] (1/2) Epoch 17, batch 3750, loss[loss=0.2158, ctc_loss=0.1149, cr_loss=0.3274, attn_decoder_loss=0.2197, over 29322.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1429, cr_loss=0.3838, attn_decoder_loss=0.2532, over 5807798.41 frames. ], batch size: 67, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:32:38,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=304600.0, ans=0.125 +2024-09-17 21:32:45,808 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.729e+01 9.186e+01 9.795e+01 2.542e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-17 21:32:48,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.21 vs. limit=12.0 +2024-09-17 21:32:53,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=304640.0, ans=0.125 +2024-09-17 21:33:17,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=304720.0, ans=0.125 +2024-09-17 21:33:20,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=304720.0, ans=0.125 +2024-09-17 21:33:31,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=304760.0, ans=0.025 +2024-09-17 21:33:43,105 INFO [train.py:1198] (1/2) Epoch 17, batch 3800, loss[loss=0.2596, ctc_loss=0.1454, cr_loss=0.3975, attn_decoder_loss=0.2634, over 29626.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1427, cr_loss=0.3838, attn_decoder_loss=0.2532, over 5799061.58 frames. ], batch size: 86, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:33:49,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=304800.0, ans=0.1 +2024-09-17 21:33:50,150 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.22 vs. limit=15.0 +2024-09-17 21:33:59,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=304840.0, ans=0.2 +2024-09-17 21:34:17,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=304880.0, ans=0.1 +2024-09-17 21:34:20,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=304880.0, ans=0.025 +2024-09-17 21:34:50,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=304960.0, ans=0.125 +2024-09-17 21:34:51,259 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.34 vs. limit=15.0 +2024-09-17 21:34:59,151 INFO [train.py:1198] (1/2) Epoch 17, batch 3850, loss[loss=0.264, ctc_loss=0.1512, cr_loss=0.4122, attn_decoder_loss=0.2674, over 29224.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1428, cr_loss=0.3846, attn_decoder_loss=0.2533, over 5813045.93 frames. ], batch size: 100, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:35:00,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.35 vs. limit=15.0 +2024-09-17 21:35:00,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.54 vs. limit=12.0 +2024-09-17 21:35:16,150 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.58 vs. limit=15.0 +2024-09-17 21:35:18,452 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.722e+01 9.215e+01 9.828e+01 1.401e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-17 21:35:20,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=305040.0, ans=0.125 +2024-09-17 21:35:36,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.35 vs. limit=15.0 +2024-09-17 21:35:48,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=305120.0, ans=0.025 +2024-09-17 21:35:54,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=305120.0, ans=0.0 +2024-09-17 21:36:04,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.14 vs. limit=15.0 +2024-09-17 21:36:12,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=305200.0, ans=0.0 +2024-09-17 21:36:13,794 INFO [train.py:1198] (1/2) Epoch 17, batch 3900, loss[loss=0.2668, ctc_loss=0.152, cr_loss=0.4096, attn_decoder_loss=0.2705, over 29631.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1429, cr_loss=0.3846, attn_decoder_loss=0.2537, over 5817121.57 frames. ], batch size: 86, lr: 6.51e-03, grad_scale: 8.0 +2024-09-17 21:36:48,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=305280.0, ans=0.125 +2024-09-17 21:36:52,717 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:36:58,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=305320.0, ans=0.025 +2024-09-17 21:37:05,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=305320.0, ans=0.0 +2024-09-17 21:37:07,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=305320.0, ans=0.0 +2024-09-17 21:37:19,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=305360.0, ans=0.0 +2024-09-17 21:37:28,055 INFO [train.py:1198] (1/2) Epoch 17, batch 3950, loss[loss=0.2714, ctc_loss=0.1637, cr_loss=0.4438, attn_decoder_loss=0.2735, over 29483.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1426, cr_loss=0.385, attn_decoder_loss=0.2536, over 5836425.41 frames. ], batch size: 97, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:37:32,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=305400.0, ans=0.0 +2024-09-17 21:37:34,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=305400.0, ans=0.125 +2024-09-17 21:37:47,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.06 vs. limit=15.0 +2024-09-17 21:37:47,421 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.275e+01 8.762e+01 9.164e+01 9.964e+01 1.868e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-17 21:37:50,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=305440.0, ans=0.2 +2024-09-17 21:37:56,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=305480.0, ans=0.125 +2024-09-17 21:37:56,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=305480.0, ans=0.07 +2024-09-17 21:37:59,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.11 vs. limit=22.5 +2024-09-17 21:38:06,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=305480.0, ans=0.1 +2024-09-17 21:38:09,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=305480.0, ans=0.125 +2024-09-17 21:38:09,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=305480.0, ans=0.125 +2024-09-17 21:38:40,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.81 vs. limit=15.0 +2024-09-17 21:38:44,016 INFO [train.py:1198] (1/2) Epoch 17, batch 4000, loss[loss=0.2347, ctc_loss=0.1289, cr_loss=0.3485, attn_decoder_loss=0.2387, over 29506.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1432, cr_loss=0.3853, attn_decoder_loss=0.2537, over 5812671.69 frames. ], batch size: 74, lr: 6.50e-03, grad_scale: 16.0 +2024-09-17 21:39:04,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=305640.0, ans=0.125 +2024-09-17 21:39:09,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=305640.0, ans=0.09899494936611666 +2024-09-17 21:39:18,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=305680.0, ans=0.0 +2024-09-17 21:39:27,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=305720.0, ans=0.0 +2024-09-17 21:39:38,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.48 vs. limit=15.0 +2024-09-17 21:39:55,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=12.0 +2024-09-17 21:39:59,434 INFO [train.py:1198] (1/2) Epoch 17, batch 4050, loss[loss=0.2865, ctc_loss=0.1909, cr_loss=0.4292, attn_decoder_loss=0.2876, over 20173.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1432, cr_loss=0.3855, attn_decoder_loss=0.2538, over 5796427.29 frames. ], batch size: 210, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:40:01,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=305800.0, ans=0.1 +2024-09-17 21:40:19,859 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.516e+01 8.726e+01 9.314e+01 1.066e+02 2.595e+02, threshold=1.863e+02, percent-clipped=1.0 +2024-09-17 21:40:22,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.64 vs. limit=15.0 +2024-09-17 21:40:27,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=305880.0, ans=0.125 +2024-09-17 21:40:46,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=305920.0, ans=0.125 +2024-09-17 21:41:06,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.44 vs. limit=15.0 +2024-09-17 21:41:07,237 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:41:08,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=305960.0, ans=0.0 +2024-09-17 21:41:11,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=306000.0, ans=0.1 +2024-09-17 21:41:12,870 INFO [train.py:1198] (1/2) Epoch 17, batch 4100, loss[loss=0.2661, ctc_loss=0.1571, cr_loss=0.4374, attn_decoder_loss=0.2685, over 29506.00 frames. ], tot_loss[loss=0.2508, ctc_loss=0.1437, cr_loss=0.3861, attn_decoder_loss=0.2541, over 5792194.11 frames. ], batch size: 90, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:41:29,925 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.47 vs. limit=22.5 +2024-09-17 21:41:48,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=306080.0, ans=0.0 +2024-09-17 21:41:54,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=306080.0, ans=0.1 +2024-09-17 21:41:57,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-17 21:42:26,513 INFO [train.py:1198] (1/2) Epoch 17, batch 4150, loss[loss=0.2478, ctc_loss=0.1438, cr_loss=0.391, attn_decoder_loss=0.2506, over 29488.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1435, cr_loss=0.3854, attn_decoder_loss=0.2535, over 5797381.48 frames. ], batch size: 77, lr: 6.50e-03, grad_scale: 8.0 +2024-09-17 21:42:28,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=306200.0, ans=0.125 +2024-09-17 21:42:42,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=306240.0, ans=0.2 +2024-09-17 21:42:47,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=306240.0, ans=0.0 +2024-09-17 21:42:48,267 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.896e+01 9.299e+01 9.873e+01 2.442e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-17 21:43:00,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=306280.0, ans=0.125 +2024-09-17 21:43:00,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=306280.0, ans=0.07 +2024-09-17 21:43:03,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=306280.0, ans=0.125 +2024-09-17 21:43:12,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=306320.0, ans=0.0 +2024-09-17 21:43:29,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=306360.0, ans=0.125 +2024-09-17 21:43:30,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.45 vs. limit=12.0 +2024-09-17 21:43:41,494 INFO [train.py:1198] (1/2) Epoch 17, batch 4200, loss[loss=0.2684, ctc_loss=0.1702, cr_loss=0.4485, attn_decoder_loss=0.2694, over 29498.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1435, cr_loss=0.3861, attn_decoder_loss=0.2536, over 5799648.90 frames. ], batch size: 90, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:43:52,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-17 21:44:02,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=306440.0, ans=0.1 +2024-09-17 21:44:17,194 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.38 vs. limit=22.5 +2024-09-17 21:44:24,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=306480.0, ans=0.0 +2024-09-17 21:44:26,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=306520.0, ans=0.0 +2024-09-17 21:44:27,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=306520.0, ans=0.1 +2024-09-17 21:44:56,160 INFO [train.py:1198] (1/2) Epoch 17, batch 4250, loss[loss=0.2353, ctc_loss=0.1293, cr_loss=0.3602, attn_decoder_loss=0.2391, over 29501.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.143, cr_loss=0.3851, attn_decoder_loss=0.2535, over 5805525.99 frames. ], batch size: 74, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:44:57,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=306600.0, ans=0.125 +2024-09-17 21:45:08,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=11.66 vs. limit=15.0 +2024-09-17 21:45:16,392 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.648e+01 8.736e+01 9.267e+01 9.996e+01 5.774e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-17 21:45:23,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=306680.0, ans=0.0 +2024-09-17 21:45:25,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=306680.0, ans=0.125 +2024-09-17 21:45:39,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.98 vs. limit=15.0 +2024-09-17 21:45:55,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=306760.0, ans=0.125 +2024-09-17 21:46:02,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=306760.0, ans=0.0 +2024-09-17 21:46:02,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=306760.0, ans=0.2 +2024-09-17 21:46:05,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=306760.0, ans=0.2 +2024-09-17 21:46:09,565 INFO [train.py:1198] (1/2) Epoch 17, batch 4300, loss[loss=0.2659, ctc_loss=0.1528, cr_loss=0.3939, attn_decoder_loss=0.2697, over 29524.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1434, cr_loss=0.3852, attn_decoder_loss=0.254, over 5796078.35 frames. ], batch size: 87, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:46:10,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=306800.0, ans=0.125 +2024-09-17 21:46:31,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=306840.0, ans=0.05 +2024-09-17 21:46:45,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.93 vs. limit=15.0 +2024-09-17 21:46:47,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.94 vs. limit=22.5 +2024-09-17 21:46:55,207 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:47:08,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=306960.0, ans=0.0 +2024-09-17 21:47:16,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=306960.0, ans=0.125 +2024-09-17 21:47:18,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=306960.0, ans=0.1 +2024-09-17 21:47:25,584 INFO [train.py:1198] (1/2) Epoch 17, batch 4350, loss[loss=0.2602, ctc_loss=0.1515, cr_loss=0.4, attn_decoder_loss=0.2634, over 29486.00 frames. ], tot_loss[loss=0.2537, ctc_loss=0.1458, cr_loss=0.39, attn_decoder_loss=0.2571, over 5798655.14 frames. ], batch size: 97, lr: 6.49e-03, grad_scale: 8.0 +2024-09-17 21:47:25,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=307000.0, ans=0.125 +2024-09-17 21:47:42,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=307040.0, ans=0.025 +2024-09-17 21:47:43,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=307040.0, ans=0.125 +2024-09-17 21:47:45,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-17 21:47:46,025 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.975e+01 9.056e+01 9.451e+01 1.005e+02 2.709e+02, threshold=1.890e+02, percent-clipped=3.0 +2024-09-17 21:47:52,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=307040.0, ans=0.2 +2024-09-17 21:48:24,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=307160.0, ans=0.2 +2024-09-17 21:48:30,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=307160.0, ans=0.125 +2024-09-17 21:48:34,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=307160.0, ans=0.125 +2024-09-17 21:48:39,024 INFO [train.py:1198] (1/2) Epoch 17, batch 4400, loss[loss=0.2599, ctc_loss=0.1501, cr_loss=0.3658, attn_decoder_loss=0.264, over 27120.00 frames. ], tot_loss[loss=0.2561, ctc_loss=0.1477, cr_loss=0.3934, attn_decoder_loss=0.2594, over 5766936.15 frames. ], batch size: 124, lr: 6.49e-03, grad_scale: 16.0 +2024-09-17 21:48:42,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=307200.0, ans=0.125 +2024-09-17 21:48:57,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=307240.0, ans=0.1 +2024-09-17 21:49:03,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=307240.0, ans=0.2 +2024-09-17 21:49:06,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=307240.0, ans=22.5 +2024-09-17 21:49:26,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=307320.0, ans=0.0 +2024-09-17 21:49:37,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=307360.0, ans=0.0 +2024-09-17 21:49:43,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=307360.0, ans=0.125 +2024-09-17 21:49:54,141 INFO [train.py:1198] (1/2) Epoch 17, batch 4450, loss[loss=0.2719, ctc_loss=0.1688, cr_loss=0.4145, attn_decoder_loss=0.2741, over 20321.00 frames. ], tot_loss[loss=0.259, ctc_loss=0.1524, cr_loss=0.3986, attn_decoder_loss=0.262, over 5574321.89 frames. ], batch size: 210, lr: 6.48e-03, grad_scale: 8.0 +2024-09-17 21:49:58,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.05 vs. limit=22.5 +2024-09-17 21:50:16,945 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.924e+01 9.297e+01 9.769e+01 1.205e+02 1.699e+02, threshold=1.954e+02, percent-clipped=0.0 +2024-09-17 21:50:29,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=307480.0, ans=0.025 +2024-09-17 21:50:34,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.63 vs. limit=6.0 +2024-09-17 21:50:38,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=307520.0, ans=0.125 +2024-09-17 21:51:10,065 INFO [train.py:1198] (1/2) Epoch 17, batch 4500, loss[loss=0.2873, ctc_loss=0.1965, cr_loss=0.4461, attn_decoder_loss=0.2875, over 19754.00 frames. ], tot_loss[loss=0.2624, ctc_loss=0.1583, cr_loss=0.4011, attn_decoder_loss=0.265, over 5235070.29 frames. ], batch size: 210, lr: 6.48e-03, grad_scale: 8.0 +2024-09-17 21:51:14,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=307600.0, ans=0.125 +2024-09-17 21:51:19,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=307600.0, ans=0.125 +2024-09-17 21:51:24,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.75 vs. limit=15.0 +2024-09-17 21:51:28,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=307640.0, ans=0.5 +2024-09-17 21:51:33,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 21:51:39,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=307680.0, ans=0.125 +2024-09-17 21:51:45,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.51 vs. limit=15.0 +2024-09-17 21:52:34,419 INFO [train.py:1198] (1/2) Epoch 18, batch 0, loss[loss=0.2279, ctc_loss=0.1244, cr_loss=0.3611, attn_decoder_loss=0.2313, over 29621.00 frames. ], tot_loss[loss=0.2279, ctc_loss=0.1244, cr_loss=0.3611, attn_decoder_loss=0.2313, over 29621.00 frames. ], batch size: 73, lr: 6.29e-03, grad_scale: 16.0 +2024-09-17 21:52:34,419 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 21:52:52,855 INFO [train.py:1230] (1/2) Epoch 18, validation: loss=0.2122, ctc_loss=0.03991, cr_loss=4.926e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-17 21:52:52,856 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 21:52:57,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=307700.0, ans=0.125 +2024-09-17 21:53:12,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=307740.0, ans=0.125 +2024-09-17 21:53:37,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=307780.0, ans=0.1 +2024-09-17 21:53:56,818 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.012e+01 9.686e+01 1.126e+02 1.212e+02 3.801e+02, threshold=2.253e+02, percent-clipped=2.0 +2024-09-17 21:54:10,448 INFO [train.py:1198] (1/2) Epoch 18, batch 50, loss[loss=0.2191, ctc_loss=0.1146, cr_loss=0.3401, attn_decoder_loss=0.2232, over 29473.00 frames. ], tot_loss[loss=0.2517, ctc_loss=0.1459, cr_loss=0.3888, attn_decoder_loss=0.2548, over 1268906.64 frames. ], batch size: 70, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:54:11,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.02 vs. limit=12.0 +2024-09-17 21:54:13,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=307900.0, ans=0.125 +2024-09-17 21:54:16,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=307900.0, ans=0.0 +2024-09-17 21:54:27,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=307940.0, ans=0.0 +2024-09-17 21:54:55,702 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.60 vs. limit=15.0 +2024-09-17 21:54:57,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-17 21:55:02,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=308020.0, ans=0.2 +2024-09-17 21:55:25,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=308100.0, ans=0.125 +2024-09-17 21:55:26,559 INFO [train.py:1198] (1/2) Epoch 18, batch 100, loss[loss=0.2445, ctc_loss=0.1411, cr_loss=0.3801, attn_decoder_loss=0.2476, over 29546.00 frames. ], tot_loss[loss=0.2542, ctc_loss=0.147, cr_loss=0.3925, attn_decoder_loss=0.2574, over 2252356.58 frames. ], batch size: 76, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:55:52,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=308140.0, ans=0.04949747468305833 +2024-09-17 21:55:52,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=308140.0, ans=0.2 +2024-09-17 21:56:01,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=308180.0, ans=0.07 +2024-09-17 21:56:06,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=308180.0, ans=0.125 +2024-09-17 21:56:30,070 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.618e+01 9.118e+01 9.635e+01 1.582e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-17 21:56:30,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=308260.0, ans=0.125 +2024-09-17 21:56:43,557 INFO [train.py:1198] (1/2) Epoch 18, batch 150, loss[loss=0.2153, ctc_loss=0.1159, cr_loss=0.3412, attn_decoder_loss=0.2188, over 29407.00 frames. ], tot_loss[loss=0.2509, ctc_loss=0.1436, cr_loss=0.3861, attn_decoder_loss=0.2543, over 3048509.20 frames. ], batch size: 70, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:56:43,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=308300.0, ans=0.05 +2024-09-17 21:57:28,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=308380.0, ans=0.04949747468305833 +2024-09-17 21:57:40,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=308420.0, ans=0.125 +2024-09-17 21:57:59,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=308500.0, ans=0.125 +2024-09-17 21:58:01,110 INFO [train.py:1198] (1/2) Epoch 18, batch 200, loss[loss=0.2625, ctc_loss=0.1638, cr_loss=0.4227, attn_decoder_loss=0.264, over 27203.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1433, cr_loss=0.3865, attn_decoder_loss=0.2537, over 3660338.02 frames. ], batch size: 124, lr: 6.29e-03, grad_scale: 8.0 +2024-09-17 21:58:16,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=308540.0, ans=0.125 +2024-09-17 21:58:35,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.50 vs. limit=22.5 +2024-09-17 21:58:55,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=308620.0, ans=0.1 +2024-09-17 21:59:03,043 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.405e+01 8.718e+01 9.535e+01 1.012e+02 1.370e+02, threshold=1.907e+02, percent-clipped=0.0 +2024-09-17 21:59:09,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=308660.0, ans=0.125 +2024-09-17 21:59:16,548 INFO [train.py:1198] (1/2) Epoch 18, batch 250, loss[loss=0.2616, ctc_loss=0.1481, cr_loss=0.3946, attn_decoder_loss=0.2654, over 29252.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.143, cr_loss=0.3863, attn_decoder_loss=0.2537, over 4142604.43 frames. ], batch size: 100, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 21:59:24,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=308700.0, ans=0.125 +2024-09-17 21:59:26,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=308700.0, ans=0.2 +2024-09-17 21:59:33,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=308740.0, ans=0.125 +2024-09-17 21:59:33,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=308740.0, ans=0.125 +2024-09-17 22:00:31,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=308860.0, ans=0.09899494936611666 +2024-09-17 22:00:35,431 INFO [train.py:1198] (1/2) Epoch 18, batch 300, loss[loss=0.2553, ctc_loss=0.1451, cr_loss=0.3944, attn_decoder_loss=0.2588, over 29542.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1428, cr_loss=0.3859, attn_decoder_loss=0.2534, over 4510851.58 frames. ], batch size: 92, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:00:42,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=308900.0, ans=15.0 +2024-09-17 22:01:18,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=308980.0, ans=0.0 +2024-09-17 22:01:25,323 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.61 vs. limit=22.5 +2024-09-17 22:01:39,753 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.552e+01 9.008e+01 9.448e+01 1.517e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-17 22:01:53,397 INFO [train.py:1198] (1/2) Epoch 18, batch 350, loss[loss=0.2208, ctc_loss=0.1153, cr_loss=0.3421, attn_decoder_loss=0.2249, over 29314.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1427, cr_loss=0.3864, attn_decoder_loss=0.2537, over 4796341.63 frames. ], batch size: 71, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:01:53,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=309100.0, ans=0.05 +2024-09-17 22:02:04,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=309100.0, ans=0.2 +2024-09-17 22:02:17,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=309140.0, ans=0.0 +2024-09-17 22:02:23,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=309180.0, ans=0.1 +2024-09-17 22:02:44,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=309220.0, ans=0.0 +2024-09-17 22:02:49,992 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.29 vs. limit=15.0 +2024-09-17 22:02:50,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=309220.0, ans=0.0 +2024-09-17 22:02:53,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=309260.0, ans=0.1 +2024-09-17 22:03:08,711 INFO [train.py:1198] (1/2) Epoch 18, batch 400, loss[loss=0.2489, ctc_loss=0.1334, cr_loss=0.3733, attn_decoder_loss=0.2535, over 29735.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1423, cr_loss=0.386, attn_decoder_loss=0.2532, over 5025927.94 frames. ], batch size: 82, lr: 6.28e-03, grad_scale: 16.0 +2024-09-17 22:03:09,850 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.97 vs. limit=22.5 +2024-09-17 22:03:10,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=309300.0, ans=0.125 +2024-09-17 22:03:15,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=309300.0, ans=0.125 +2024-09-17 22:03:30,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=309340.0, ans=0.5 +2024-09-17 22:03:39,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=309380.0, ans=0.125 +2024-09-17 22:03:41,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.95 vs. limit=15.0 +2024-09-17 22:04:00,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=309420.0, ans=0.2 +2024-09-17 22:04:14,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309460.0, ans=0.1 +2024-09-17 22:04:15,317 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.825e+01 9.596e+01 1.056e+02 3.642e+02, threshold=1.919e+02, percent-clipped=2.0 +2024-09-17 22:04:18,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=309460.0, ans=0.05 +2024-09-17 22:04:20,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=309460.0, ans=0.125 +2024-09-17 22:04:27,498 INFO [train.py:1198] (1/2) Epoch 18, batch 450, loss[loss=0.2502, ctc_loss=0.1358, cr_loss=0.3674, attn_decoder_loss=0.2548, over 29695.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1422, cr_loss=0.385, attn_decoder_loss=0.2531, over 5188270.06 frames. ], batch size: 83, lr: 6.28e-03, grad_scale: 8.0 +2024-09-17 22:04:29,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=309500.0, ans=0.125 +2024-09-17 22:04:44,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=309540.0, ans=0.0 +2024-09-17 22:04:51,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=309540.0, ans=0.125 +2024-09-17 22:04:59,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=309580.0, ans=0.125 +2024-09-17 22:05:11,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=309580.0, ans=0.1 +2024-09-17 22:05:13,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=309580.0, ans=0.125 +2024-09-17 22:05:16,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=309620.0, ans=0.025 +2024-09-17 22:05:27,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=309620.0, ans=0.125 +2024-09-17 22:05:33,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=309660.0, ans=0.125 +2024-09-17 22:05:42,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.51 vs. limit=12.0 +2024-09-17 22:05:45,822 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.60 vs. limit=15.0 +2024-09-17 22:05:46,475 INFO [train.py:1198] (1/2) Epoch 18, batch 500, loss[loss=0.2626, ctc_loss=0.1439, cr_loss=0.4062, attn_decoder_loss=0.2668, over 29441.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1416, cr_loss=0.3847, attn_decoder_loss=0.2525, over 5331125.46 frames. ], batch size: 94, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:05:57,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=309700.0, ans=0.025 +2024-09-17 22:06:13,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.58 vs. limit=15.0 +2024-09-17 22:06:19,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=309780.0, ans=0.125 +2024-09-17 22:06:19,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309780.0, ans=0.1 +2024-09-17 22:06:29,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=309780.0, ans=0.1 +2024-09-17 22:06:33,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=309820.0, ans=0.2 +2024-09-17 22:06:35,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.29 vs. limit=22.5 +2024-09-17 22:06:44,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=309820.0, ans=0.125 +2024-09-17 22:06:48,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=309860.0, ans=0.125 +2024-09-17 22:06:50,089 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.497e+01 9.185e+01 1.006e+02 4.777e+02, threshold=1.837e+02, percent-clipped=3.0 +2024-09-17 22:07:01,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=309900.0, ans=0.1 +2024-09-17 22:07:02,204 INFO [train.py:1198] (1/2) Epoch 18, batch 550, loss[loss=0.2633, ctc_loss=0.1524, cr_loss=0.4074, attn_decoder_loss=0.2666, over 28853.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1413, cr_loss=0.3838, attn_decoder_loss=0.2525, over 5423934.93 frames. ], batch size: 104, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:07:04,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=309900.0, ans=0.125 +2024-09-17 22:07:07,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=309900.0, ans=0.125 +2024-09-17 22:07:08,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=309900.0, ans=0.1 +2024-09-17 22:07:35,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=309980.0, ans=0.0 +2024-09-17 22:08:12,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.whiten.whitening_limit, batch_count=310060.0, ans=12.0 +2024-09-17 22:08:13,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=310060.0, ans=0.035 +2024-09-17 22:08:17,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=310060.0, ans=0.125 +2024-09-17 22:08:19,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=310100.0, ans=0.0 +2024-09-17 22:08:20,548 INFO [train.py:1198] (1/2) Epoch 18, batch 600, loss[loss=0.2622, ctc_loss=0.1523, cr_loss=0.3967, attn_decoder_loss=0.2656, over 29218.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1414, cr_loss=0.3841, attn_decoder_loss=0.2529, over 5511555.18 frames. ], batch size: 100, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:08:28,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=310100.0, ans=0.0 +2024-09-17 22:08:29,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=310100.0, ans=0.2 +2024-09-17 22:08:38,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=310140.0, ans=0.0 +2024-09-17 22:08:40,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=310140.0, ans=0.125 +2024-09-17 22:09:02,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=310180.0, ans=0.05 +2024-09-17 22:09:25,967 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.219e+01 8.500e+01 9.114e+01 9.640e+01 1.427e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 22:09:38,130 INFO [train.py:1198] (1/2) Epoch 18, batch 650, loss[loss=0.2465, ctc_loss=0.1388, cr_loss=0.387, attn_decoder_loss=0.2498, over 29734.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1407, cr_loss=0.3829, attn_decoder_loss=0.252, over 5588592.75 frames. ], batch size: 81, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:09:56,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=310340.0, ans=0.1 +2024-09-17 22:09:58,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=310340.0, ans=0.1 +2024-09-17 22:10:15,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.46 vs. limit=15.0 +2024-09-17 22:10:32,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.04 vs. limit=22.5 +2024-09-17 22:10:36,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=310420.0, ans=0.0 +2024-09-17 22:10:54,162 INFO [train.py:1198] (1/2) Epoch 18, batch 700, loss[loss=0.2357, ctc_loss=0.1332, cr_loss=0.3745, attn_decoder_loss=0.2388, over 29540.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1414, cr_loss=0.384, attn_decoder_loss=0.2527, over 5639186.27 frames. ], batch size: 76, lr: 6.27e-03, grad_scale: 8.0 +2024-09-17 22:11:09,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=310540.0, ans=0.07 +2024-09-17 22:11:16,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=310540.0, ans=0.125 +2024-09-17 22:11:39,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=310620.0, ans=0.125 +2024-09-17 22:11:57,450 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.839e+01 8.847e+01 9.240e+01 9.883e+01 4.255e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 22:12:00,122 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.78 vs. limit=15.0 +2024-09-17 22:12:11,909 INFO [train.py:1198] (1/2) Epoch 18, batch 750, loss[loss=0.2609, ctc_loss=0.1509, cr_loss=0.3984, attn_decoder_loss=0.2643, over 29720.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1414, cr_loss=0.3836, attn_decoder_loss=0.2526, over 5678215.49 frames. ], batch size: 82, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:12:31,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=310740.0, ans=0.0 +2024-09-17 22:13:08,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=310820.0, ans=0.015 +2024-09-17 22:13:11,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=310820.0, ans=0.125 +2024-09-17 22:13:19,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.23 vs. limit=15.0 +2024-09-17 22:13:23,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=310860.0, ans=0.125 +2024-09-17 22:13:29,310 INFO [train.py:1198] (1/2) Epoch 18, batch 800, loss[loss=0.2173, ctc_loss=0.1131, cr_loss=0.3287, attn_decoder_loss=0.2216, over 29598.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1412, cr_loss=0.3834, attn_decoder_loss=0.2522, over 5706743.57 frames. ], batch size: 73, lr: 6.26e-03, grad_scale: 16.0 +2024-09-17 22:14:34,573 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.772e+01 9.230e+01 9.952e+01 3.129e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-17 22:14:44,960 INFO [train.py:1198] (1/2) Epoch 18, batch 850, loss[loss=0.2666, ctc_loss=0.1523, cr_loss=0.4101, attn_decoder_loss=0.2702, over 29703.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1406, cr_loss=0.3823, attn_decoder_loss=0.2518, over 5735837.18 frames. ], batch size: 89, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:14:58,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=311140.0, ans=0.1 +2024-09-17 22:15:13,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=311180.0, ans=0.04949747468305833 +2024-09-17 22:15:16,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=311180.0, ans=0.125 +2024-09-17 22:15:55,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=311260.0, ans=0.125 +2024-09-17 22:16:03,544 INFO [train.py:1198] (1/2) Epoch 18, batch 900, loss[loss=0.2306, ctc_loss=0.1252, cr_loss=0.3539, attn_decoder_loss=0.2345, over 29599.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1411, cr_loss=0.3833, attn_decoder_loss=0.2522, over 5740743.28 frames. ], batch size: 73, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:16:15,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=311300.0, ans=0.125 +2024-09-17 22:16:18,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=311340.0, ans=0.1 +2024-09-17 22:16:23,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=311340.0, ans=0.2 +2024-09-17 22:16:29,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.23 vs. limit=15.0 +2024-09-17 22:16:41,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=311380.0, ans=0.125 +2024-09-17 22:17:10,605 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.085e+01 8.926e+01 9.503e+01 1.103e+02 2.746e+02, threshold=1.901e+02, percent-clipped=1.0 +2024-09-17 22:17:21,151 INFO [train.py:1198] (1/2) Epoch 18, batch 950, loss[loss=0.2364, ctc_loss=0.1351, cr_loss=0.3757, attn_decoder_loss=0.2393, over 29509.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1413, cr_loss=0.3837, attn_decoder_loss=0.2524, over 5744103.78 frames. ], batch size: 74, lr: 6.26e-03, grad_scale: 8.0 +2024-09-17 22:17:29,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.62 vs. limit=15.0 +2024-09-17 22:17:30,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=311500.0, ans=0.0 +2024-09-17 22:17:32,779 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.70 vs. limit=10.0 +2024-09-17 22:17:45,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=311540.0, ans=0.2 +2024-09-17 22:18:14,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=311620.0, ans=0.0 +2024-09-17 22:18:17,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.99 vs. limit=15.0 +2024-09-17 22:18:21,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=311660.0, ans=0.0 +2024-09-17 22:18:21,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=311660.0, ans=0.0 +2024-09-17 22:18:26,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=311660.0, ans=0.035 +2024-09-17 22:18:36,550 INFO [train.py:1198] (1/2) Epoch 18, batch 1000, loss[loss=0.2398, ctc_loss=0.1379, cr_loss=0.3698, attn_decoder_loss=0.2429, over 29512.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1425, cr_loss=0.3854, attn_decoder_loss=0.2534, over 5737884.16 frames. ], batch size: 77, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:18:47,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=311700.0, ans=0.0 +2024-09-17 22:18:48,962 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:18:51,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=311740.0, ans=0.0 +2024-09-17 22:18:52,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=311740.0, ans=0.025 +2024-09-17 22:18:59,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=311740.0, ans=0.1 +2024-09-17 22:19:26,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=311820.0, ans=0.125 +2024-09-17 22:19:37,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=311860.0, ans=0.0 +2024-09-17 22:19:41,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.748e+01 9.283e+01 1.021e+02 2.281e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 22:19:49,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=311860.0, ans=0.04949747468305833 +2024-09-17 22:19:51,944 INFO [train.py:1198] (1/2) Epoch 18, batch 1050, loss[loss=0.2467, ctc_loss=0.1376, cr_loss=0.3853, attn_decoder_loss=0.2503, over 29659.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1418, cr_loss=0.3844, attn_decoder_loss=0.2525, over 5744902.10 frames. ], batch size: 85, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:20:08,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=311940.0, ans=0.125 +2024-09-17 22:20:14,994 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.02 vs. limit=15.0 +2024-09-17 22:20:16,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=311940.0, ans=0.125 +2024-09-17 22:20:51,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.49 vs. limit=15.0 +2024-09-17 22:20:56,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=312060.0, ans=0.125 +2024-09-17 22:21:08,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=312060.0, ans=0.5 +2024-09-17 22:21:13,197 INFO [train.py:1198] (1/2) Epoch 18, batch 1100, loss[loss=0.2688, ctc_loss=0.1682, cr_loss=0.4193, attn_decoder_loss=0.2706, over 29457.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1414, cr_loss=0.3836, attn_decoder_loss=0.2521, over 5756834.93 frames. ], batch size: 78, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:21:13,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_na.min_abs, batch_count=312100.0, ans=0.02 +2024-09-17 22:21:20,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=312100.0, ans=0.0 +2024-09-17 22:21:22,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=312100.0, ans=0.0 +2024-09-17 22:21:27,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=312140.0, ans=0.125 +2024-09-17 22:21:30,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=312140.0, ans=0.2 +2024-09-17 22:22:03,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=312220.0, ans=0.125 +2024-09-17 22:22:03,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=312220.0, ans=0.1 +2024-09-17 22:22:18,320 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.460e+01 8.510e+01 9.386e+01 9.841e+01 2.672e+02, threshold=1.877e+02, percent-clipped=2.0 +2024-09-17 22:22:28,971 INFO [train.py:1198] (1/2) Epoch 18, batch 1150, loss[loss=0.2335, ctc_loss=0.1315, cr_loss=0.3597, attn_decoder_loss=0.2369, over 29433.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1418, cr_loss=0.384, attn_decoder_loss=0.2526, over 5755834.76 frames. ], batch size: 78, lr: 6.25e-03, grad_scale: 8.0 +2024-09-17 22:22:32,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=312300.0, ans=0.125 +2024-09-17 22:22:36,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.67 vs. limit=10.0 +2024-09-17 22:22:47,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=312340.0, ans=0.125 +2024-09-17 22:22:58,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.95 vs. limit=22.5 +2024-09-17 22:22:59,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=312380.0, ans=0.04949747468305833 +2024-09-17 22:23:12,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=312380.0, ans=0.0 +2024-09-17 22:23:44,699 INFO [train.py:1198] (1/2) Epoch 18, batch 1200, loss[loss=0.2487, ctc_loss=0.1319, cr_loss=0.359, attn_decoder_loss=0.2537, over 29681.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1426, cr_loss=0.3849, attn_decoder_loss=0.2534, over 5746765.61 frames. ], batch size: 85, lr: 6.25e-03, grad_scale: 16.0 +2024-09-17 22:23:46,883 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.83 vs. limit=6.0 +2024-09-17 22:24:18,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.34 vs. limit=15.0 +2024-09-17 22:24:21,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=312580.0, ans=0.125 +2024-09-17 22:24:24,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.46 vs. limit=22.5 +2024-09-17 22:24:26,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff3.min_abs, batch_count=312580.0, ans=0.2 +2024-09-17 22:24:31,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=312580.0, ans=0.2 +2024-09-17 22:24:39,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=312620.0, ans=0.1 +2024-09-17 22:24:44,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=312620.0, ans=0.07 +2024-09-17 22:24:44,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.49 vs. limit=15.0 +2024-09-17 22:24:44,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=15.0 +2024-09-17 22:24:55,872 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 9.002e+01 9.543e+01 1.051e+02 1.930e+02, threshold=1.909e+02, percent-clipped=1.0 +2024-09-17 22:25:04,860 INFO [train.py:1198] (1/2) Epoch 18, batch 1250, loss[loss=0.2596, ctc_loss=0.149, cr_loss=0.3783, attn_decoder_loss=0.2635, over 29519.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.143, cr_loss=0.3867, attn_decoder_loss=0.2539, over 5774830.40 frames. ], batch size: 92, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:25:48,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.80 vs. limit=15.0 +2024-09-17 22:25:54,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=312820.0, ans=0.125 +2024-09-17 22:25:57,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=312820.0, ans=0.1 +2024-09-17 22:26:20,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=312900.0, ans=0.0 +2024-09-17 22:26:21,430 INFO [train.py:1198] (1/2) Epoch 18, batch 1300, loss[loss=0.2516, ctc_loss=0.1397, cr_loss=0.3813, attn_decoder_loss=0.2555, over 28637.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1421, cr_loss=0.3849, attn_decoder_loss=0.253, over 5780381.28 frames. ], batch size: 112, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:26:26,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.45 vs. limit=15.0 +2024-09-17 22:27:04,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=312980.0, ans=0.1 +2024-09-17 22:27:07,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=313020.0, ans=0.125 +2024-09-17 22:27:25,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=313060.0, ans=0.2 +2024-09-17 22:27:28,522 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.616e+01 9.113e+01 9.632e+01 1.228e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 22:27:37,670 INFO [train.py:1198] (1/2) Epoch 18, batch 1350, loss[loss=0.2447, ctc_loss=0.1366, cr_loss=0.3753, attn_decoder_loss=0.2484, over 29747.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1421, cr_loss=0.3856, attn_decoder_loss=0.253, over 5795291.99 frames. ], batch size: 81, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:27:50,221 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:28:20,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=313180.0, ans=0.0 +2024-09-17 22:28:25,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.37 vs. limit=22.5 +2024-09-17 22:28:57,962 INFO [train.py:1198] (1/2) Epoch 18, batch 1400, loss[loss=0.2203, ctc_loss=0.1203, cr_loss=0.3453, attn_decoder_loss=0.2238, over 29583.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1416, cr_loss=0.3846, attn_decoder_loss=0.2525, over 5806550.19 frames. ], batch size: 69, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:29:29,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=313380.0, ans=0.0 +2024-09-17 22:29:31,936 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.69 vs. limit=15.0 +2024-09-17 22:29:33,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=313380.0, ans=0.125 +2024-09-17 22:29:34,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=313380.0, ans=0.0 +2024-09-17 22:29:34,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=313380.0, ans=0.0 +2024-09-17 22:29:45,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=313420.0, ans=0.125 +2024-09-17 22:30:04,782 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.593e+01 9.088e+01 9.649e+01 1.870e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-17 22:30:05,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=313460.0, ans=0.04949747468305833 +2024-09-17 22:30:13,973 INFO [train.py:1198] (1/2) Epoch 18, batch 1450, loss[loss=0.2637, ctc_loss=0.1492, cr_loss=0.3949, attn_decoder_loss=0.2677, over 29464.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1419, cr_loss=0.3852, attn_decoder_loss=0.253, over 5801877.81 frames. ], batch size: 94, lr: 6.24e-03, grad_scale: 8.0 +2024-09-17 22:30:15,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=313500.0, ans=0.125 +2024-09-17 22:30:18,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=313500.0, ans=0.05 +2024-09-17 22:30:22,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.96 vs. limit=6.0 +2024-09-17 22:30:27,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=313540.0, ans=0.025 +2024-09-17 22:30:31,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=313540.0, ans=0.0 +2024-09-17 22:30:35,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=313540.0, ans=0.0 +2024-09-17 22:30:38,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=313540.0, ans=0.125 +2024-09-17 22:30:38,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=313540.0, ans=0.0 +2024-09-17 22:30:48,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=313580.0, ans=6.0 +2024-09-17 22:31:07,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=313620.0, ans=0.0 +2024-09-17 22:31:16,790 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:31:30,413 INFO [train.py:1198] (1/2) Epoch 18, batch 1500, loss[loss=0.272, ctc_loss=0.1606, cr_loss=0.4254, attn_decoder_loss=0.2749, over 29634.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1423, cr_loss=0.3853, attn_decoder_loss=0.2535, over 5804347.19 frames. ], batch size: 86, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:31:44,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=313740.0, ans=0.125 +2024-09-17 22:32:08,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=313780.0, ans=0.0 +2024-09-17 22:32:22,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=313820.0, ans=0.125 +2024-09-17 22:32:42,104 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.582e+01 9.289e+01 9.829e+01 2.134e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-17 22:32:42,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=313860.0, ans=0.125 +2024-09-17 22:32:51,077 INFO [train.py:1198] (1/2) Epoch 18, batch 1550, loss[loss=0.2513, ctc_loss=0.1442, cr_loss=0.3917, attn_decoder_loss=0.2545, over 29536.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1426, cr_loss=0.3859, attn_decoder_loss=0.2536, over 5780934.11 frames. ], batch size: 90, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:32:56,601 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.14 vs. limit=15.0 +2024-09-17 22:33:00,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=313900.0, ans=0.0 +2024-09-17 22:33:29,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=313980.0, ans=0.1 +2024-09-17 22:33:38,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=314020.0, ans=0.1 +2024-09-17 22:33:59,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=314060.0, ans=0.0 +2024-09-17 22:34:06,769 INFO [train.py:1198] (1/2) Epoch 18, batch 1600, loss[loss=0.2541, ctc_loss=0.1405, cr_loss=0.3709, attn_decoder_loss=0.2585, over 29664.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.143, cr_loss=0.3861, attn_decoder_loss=0.2536, over 5763002.25 frames. ], batch size: 85, lr: 6.23e-03, grad_scale: 16.0 +2024-09-17 22:34:09,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=314100.0, ans=0.035 +2024-09-17 22:34:11,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=314100.0, ans=0.1 +2024-09-17 22:34:31,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=314140.0, ans=0.1 +2024-09-17 22:34:31,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=314140.0, ans=0.125 +2024-09-17 22:34:32,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=314140.0, ans=0.125 +2024-09-17 22:35:00,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.19 vs. limit=22.5 +2024-09-17 22:35:14,216 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.89 vs. limit=15.0 +2024-09-17 22:35:14,481 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-17 22:35:14,876 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.294e+01 8.911e+01 9.926e+01 1.138e+02 4.601e+02, threshold=1.985e+02, percent-clipped=5.0 +2024-09-17 22:35:22,532 INFO [train.py:1198] (1/2) Epoch 18, batch 1650, loss[loss=0.2673, ctc_loss=0.1499, cr_loss=0.4073, attn_decoder_loss=0.2713, over 29737.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.1429, cr_loss=0.3861, attn_decoder_loss=0.2536, over 5759318.20 frames. ], batch size: 89, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:35:36,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.70 vs. limit=15.0 +2024-09-17 22:35:45,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=314340.0, ans=0.0 +2024-09-17 22:35:50,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=314340.0, ans=0.125 +2024-09-17 22:35:55,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=314380.0, ans=0.0 +2024-09-17 22:35:58,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=314380.0, ans=0.125 +2024-09-17 22:36:05,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=314380.0, ans=0.125 +2024-09-17 22:36:06,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=314380.0, ans=0.0 +2024-09-17 22:36:38,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=314460.0, ans=0.0 +2024-09-17 22:36:40,862 INFO [train.py:1198] (1/2) Epoch 18, batch 1700, loss[loss=0.2164, ctc_loss=0.116, cr_loss=0.3265, attn_decoder_loss=0.2203, over 29553.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1422, cr_loss=0.3851, attn_decoder_loss=0.2531, over 5780144.60 frames. ], batch size: 69, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:36:48,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=314500.0, ans=0.125 +2024-09-17 22:36:51,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=314500.0, ans=0.1 +2024-09-17 22:37:05,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=314540.0, ans=0.05 +2024-09-17 22:37:13,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=314580.0, ans=0.125 +2024-09-17 22:37:38,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=314620.0, ans=0.125 +2024-09-17 22:37:49,121 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.791e+01 8.699e+01 9.289e+01 9.898e+01 1.574e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-17 22:37:49,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=314660.0, ans=0.125 +2024-09-17 22:37:56,799 INFO [train.py:1198] (1/2) Epoch 18, batch 1750, loss[loss=0.2137, ctc_loss=0.1092, cr_loss=0.3274, attn_decoder_loss=0.218, over 29374.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1417, cr_loss=0.3844, attn_decoder_loss=0.2527, over 5788774.12 frames. ], batch size: 67, lr: 6.23e-03, grad_scale: 8.0 +2024-09-17 22:38:07,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=314700.0, ans=0.1 +2024-09-17 22:38:07,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=314700.0, ans=0.0 +2024-09-17 22:38:13,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=314740.0, ans=0.025 +2024-09-17 22:38:19,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=314740.0, ans=0.125 +2024-09-17 22:38:36,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=314780.0, ans=0.125 +2024-09-17 22:38:39,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=314780.0, ans=0.125 +2024-09-17 22:38:44,132 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:38:59,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=314860.0, ans=0.1 +2024-09-17 22:39:10,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=314900.0, ans=0.125 +2024-09-17 22:39:12,152 INFO [train.py:1198] (1/2) Epoch 18, batch 1800, loss[loss=0.2599, ctc_loss=0.1468, cr_loss=0.3896, attn_decoder_loss=0.2638, over 29684.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1422, cr_loss=0.3857, attn_decoder_loss=0.2532, over 5790736.65 frames. ], batch size: 83, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:39:12,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=314900.0, ans=0.125 +2024-09-17 22:39:54,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=314980.0, ans=0.1 +2024-09-17 22:40:11,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=315020.0, ans=0.0 +2024-09-17 22:40:17,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=315060.0, ans=0.1 +2024-09-17 22:40:24,496 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.622e+01 9.178e+01 9.904e+01 1.304e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-17 22:40:27,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.20 vs. limit=15.0 +2024-09-17 22:40:28,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.59 vs. limit=15.0 +2024-09-17 22:40:30,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.67 vs. limit=22.5 +2024-09-17 22:40:32,251 INFO [train.py:1198] (1/2) Epoch 18, batch 1850, loss[loss=0.2659, ctc_loss=0.1562, cr_loss=0.4293, attn_decoder_loss=0.2685, over 29630.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1418, cr_loss=0.3855, attn_decoder_loss=0.2529, over 5796032.73 frames. ], batch size: 86, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:40:37,033 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:40:44,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.76 vs. limit=15.0 +2024-09-17 22:40:53,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=315140.0, ans=0.125 +2024-09-17 22:41:02,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=315180.0, ans=0.125 +2024-09-17 22:41:28,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=315220.0, ans=0.0 +2024-09-17 22:41:28,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=315220.0, ans=0.125 +2024-09-17 22:41:48,285 INFO [train.py:1198] (1/2) Epoch 18, batch 1900, loss[loss=0.2614, ctc_loss=0.1536, cr_loss=0.4035, attn_decoder_loss=0.2644, over 29693.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1427, cr_loss=0.3865, attn_decoder_loss=0.2539, over 5803499.03 frames. ], batch size: 89, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:42:43,240 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:42:56,523 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.577e+01 8.839e+01 9.337e+01 9.876e+01 1.224e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-17 22:43:04,086 INFO [train.py:1198] (1/2) Epoch 18, batch 1950, loss[loss=0.2506, ctc_loss=0.1432, cr_loss=0.3958, attn_decoder_loss=0.2537, over 29427.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1432, cr_loss=0.3881, attn_decoder_loss=0.2548, over 5818515.02 frames. ], batch size: 78, lr: 6.22e-03, grad_scale: 8.0 +2024-09-17 22:43:13,651 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:43:20,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.20 vs. limit=10.0 +2024-09-17 22:43:41,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=315580.0, ans=0.0 +2024-09-17 22:44:03,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=315620.0, ans=0.125 +2024-09-17 22:44:23,104 INFO [train.py:1198] (1/2) Epoch 18, batch 2000, loss[loss=0.2206, ctc_loss=0.1229, cr_loss=0.3404, attn_decoder_loss=0.2238, over 29366.00 frames. ], tot_loss[loss=0.2516, ctc_loss=0.1436, cr_loss=0.3882, attn_decoder_loss=0.255, over 5796999.11 frames. ], batch size: 67, lr: 6.22e-03, grad_scale: 16.0 +2024-09-17 22:44:23,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys.whitening_limit, batch_count=315700.0, ans=6.0 +2024-09-17 22:44:31,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=315700.0, ans=0.025 +2024-09-17 22:44:32,703 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:45:00,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=315780.0, ans=0.025 +2024-09-17 22:45:03,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.86 vs. limit=15.0 +2024-09-17 22:45:05,003 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.72 vs. limit=15.0 +2024-09-17 22:45:11,325 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.85 vs. limit=15.0 +2024-09-17 22:45:27,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=315860.0, ans=0.025 +2024-09-17 22:45:31,338 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.725e+01 9.284e+01 1.004e+02 4.329e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-17 22:45:38,903 INFO [train.py:1198] (1/2) Epoch 18, batch 2050, loss[loss=0.2265, ctc_loss=0.124, cr_loss=0.3415, attn_decoder_loss=0.2303, over 29470.00 frames. ], tot_loss[loss=0.2505, ctc_loss=0.1429, cr_loss=0.3864, attn_decoder_loss=0.2539, over 5788999.40 frames. ], batch size: 70, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:46:23,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=316020.0, ans=0.125 +2024-09-17 22:46:23,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=316020.0, ans=0.2 +2024-09-17 22:46:37,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=316020.0, ans=0.2 +2024-09-17 22:46:43,846 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.59 vs. limit=10.0 +2024-09-17 22:46:54,760 INFO [train.py:1198] (1/2) Epoch 18, batch 2100, loss[loss=0.2568, ctc_loss=0.1467, cr_loss=0.4019, attn_decoder_loss=0.2601, over 29770.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1422, cr_loss=0.3852, attn_decoder_loss=0.2533, over 5800811.32 frames. ], batch size: 81, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:47:16,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=316140.0, ans=0.2 +2024-09-17 22:47:28,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=316180.0, ans=0.1 +2024-09-17 22:48:08,069 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.905e+01 8.595e+01 9.065e+01 9.620e+01 1.690e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-17 22:48:14,162 INFO [train.py:1198] (1/2) Epoch 18, batch 2150, loss[loss=0.256, ctc_loss=0.1528, cr_loss=0.4027, attn_decoder_loss=0.2586, over 29436.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1415, cr_loss=0.3839, attn_decoder_loss=0.2524, over 5816389.60 frames. ], batch size: 78, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:48:25,669 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.13 vs. limit=15.0 +2024-09-17 22:48:55,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=316380.0, ans=0.125 +2024-09-17 22:49:02,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=316420.0, ans=0.0 +2024-09-17 22:49:04,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=316420.0, ans=0.0 +2024-09-17 22:49:07,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=316420.0, ans=0.0 +2024-09-17 22:49:19,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=316460.0, ans=0.04949747468305833 +2024-09-17 22:49:21,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=316460.0, ans=0.125 +2024-09-17 22:49:29,883 INFO [train.py:1198] (1/2) Epoch 18, batch 2200, loss[loss=0.2542, ctc_loss=0.135, cr_loss=0.3709, attn_decoder_loss=0.2593, over 29607.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1416, cr_loss=0.384, attn_decoder_loss=0.2527, over 5811698.57 frames. ], batch size: 86, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:50:03,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.82 vs. limit=15.0 +2024-09-17 22:50:09,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=316580.0, ans=0.125 +2024-09-17 22:50:18,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=316620.0, ans=0.2 +2024-09-17 22:50:18,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=316620.0, ans=0.07 +2024-09-17 22:50:39,196 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.790e+01 9.365e+01 1.003e+02 3.289e+02, threshold=1.873e+02, percent-clipped=2.0 +2024-09-17 22:50:45,435 INFO [train.py:1198] (1/2) Epoch 18, batch 2250, loss[loss=0.2552, ctc_loss=0.1436, cr_loss=0.3979, attn_decoder_loss=0.2587, over 29712.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1416, cr_loss=0.3841, attn_decoder_loss=0.2527, over 5810344.63 frames. ], batch size: 82, lr: 6.21e-03, grad_scale: 8.0 +2024-09-17 22:50:56,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=316700.0, ans=0.0 +2024-09-17 22:50:57,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=316700.0, ans=0.125 +2024-09-17 22:51:03,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=316740.0, ans=0.125 +2024-09-17 22:51:47,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=316820.0, ans=0.125 +2024-09-17 22:52:02,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=316860.0, ans=0.1 +2024-09-17 22:52:05,589 INFO [train.py:1198] (1/2) Epoch 18, batch 2300, loss[loss=0.2228, ctc_loss=0.1142, cr_loss=0.3404, attn_decoder_loss=0.2274, over 29337.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1406, cr_loss=0.3821, attn_decoder_loss=0.2516, over 5798754.99 frames. ], batch size: 71, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:52:08,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=316900.0, ans=0.125 +2024-09-17 22:52:16,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=316900.0, ans=0.1 +2024-09-17 22:52:25,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=316940.0, ans=0.025 +2024-09-17 22:52:30,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.23 vs. limit=15.0 +2024-09-17 22:52:41,495 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-17 22:53:05,625 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.42 vs. limit=22.5 +2024-09-17 22:53:15,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.054e+01 8.549e+01 8.919e+01 9.975e+01 1.965e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-17 22:53:21,367 INFO [train.py:1198] (1/2) Epoch 18, batch 2350, loss[loss=0.2508, ctc_loss=0.1362, cr_loss=0.3727, attn_decoder_loss=0.2553, over 29698.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1406, cr_loss=0.3821, attn_decoder_loss=0.2517, over 5803470.23 frames. ], batch size: 83, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:53:23,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=317100.0, ans=0.1 +2024-09-17 22:53:35,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=317140.0, ans=0.0 +2024-09-17 22:53:38,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=317140.0, ans=0.125 +2024-09-17 22:53:54,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=317180.0, ans=0.125 +2024-09-17 22:53:56,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=317180.0, ans=0.09899494936611666 +2024-09-17 22:53:57,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=35.40 vs. limit=22.5 +2024-09-17 22:53:57,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=317180.0, ans=0.125 +2024-09-17 22:54:12,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=317220.0, ans=0.07 +2024-09-17 22:54:37,095 INFO [train.py:1198] (1/2) Epoch 18, batch 2400, loss[loss=0.2488, ctc_loss=0.1443, cr_loss=0.3953, attn_decoder_loss=0.2516, over 29510.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1412, cr_loss=0.383, attn_decoder_loss=0.2523, over 5807236.49 frames. ], batch size: 76, lr: 6.20e-03, grad_scale: 16.0 +2024-09-17 22:54:39,495 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.67 vs. limit=15.0 +2024-09-17 22:54:43,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=317300.0, ans=0.125 +2024-09-17 22:54:55,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=317340.0, ans=0.0 +2024-09-17 22:54:58,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=317340.0, ans=0.1 +2024-09-17 22:54:58,653 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 22:55:01,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=317340.0, ans=0.125 +2024-09-17 22:55:52,493 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.296e+01 8.608e+01 9.226e+01 9.922e+01 2.120e+02, threshold=1.845e+02, percent-clipped=2.0 +2024-09-17 22:55:57,160 INFO [train.py:1198] (1/2) Epoch 18, batch 2450, loss[loss=0.2672, ctc_loss=0.1648, cr_loss=0.4324, attn_decoder_loss=0.269, over 29703.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1417, cr_loss=0.3838, attn_decoder_loss=0.2532, over 5784168.36 frames. ], batch size: 82, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:56:01,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=317500.0, ans=0.125 +2024-09-17 22:56:06,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=317500.0, ans=0.125 +2024-09-17 22:56:27,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=317580.0, ans=0.125 +2024-09-17 22:56:47,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=317620.0, ans=0.0 +2024-09-17 22:56:58,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=317660.0, ans=0.0 +2024-09-17 22:57:07,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=317660.0, ans=0.0 +2024-09-17 22:57:13,354 INFO [train.py:1198] (1/2) Epoch 18, batch 2500, loss[loss=0.2606, ctc_loss=0.1449, cr_loss=0.3792, attn_decoder_loss=0.265, over 29613.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1419, cr_loss=0.3841, attn_decoder_loss=0.2531, over 5794590.06 frames. ], batch size: 86, lr: 6.20e-03, grad_scale: 8.0 +2024-09-17 22:57:54,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=317780.0, ans=0.125 +2024-09-17 22:57:59,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=317820.0, ans=0.125 +2024-09-17 22:58:08,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=317820.0, ans=0.125 +2024-09-17 22:58:14,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=317860.0, ans=0.1 +2024-09-17 22:58:21,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=317860.0, ans=0.0 +2024-09-17 22:58:24,506 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.912e+01 8.741e+01 9.201e+01 9.902e+01 1.726e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-17 22:58:28,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.28 vs. limit=15.0 +2024-09-17 22:58:29,147 INFO [train.py:1198] (1/2) Epoch 18, batch 2550, loss[loss=0.225, ctc_loss=0.127, cr_loss=0.3528, attn_decoder_loss=0.2281, over 29338.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1417, cr_loss=0.3843, attn_decoder_loss=0.2532, over 5798041.64 frames. ], batch size: 67, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 22:58:29,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=317900.0, ans=0.0 +2024-09-17 22:58:45,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=317940.0, ans=0.125 +2024-09-17 22:59:06,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=317980.0, ans=0.2 +2024-09-17 22:59:09,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=317980.0, ans=0.1 +2024-09-17 22:59:14,572 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-17 22:59:33,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.08 vs. limit=15.0 +2024-09-17 22:59:49,434 INFO [train.py:1198] (1/2) Epoch 18, batch 2600, loss[loss=0.2484, ctc_loss=0.1353, cr_loss=0.3549, attn_decoder_loss=0.253, over 29420.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1422, cr_loss=0.3855, attn_decoder_loss=0.2537, over 5795298.07 frames. ], batch size: 78, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 22:59:50,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.08 vs. limit=12.0 +2024-09-17 22:59:52,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.25 vs. limit=8.0 +2024-09-17 23:00:33,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=318220.0, ans=0.0 +2024-09-17 23:00:33,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=318220.0, ans=0.1 +2024-09-17 23:00:38,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=318220.0, ans=0.0 +2024-09-17 23:00:45,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=318220.0, ans=0.125 +2024-09-17 23:00:57,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=318260.0, ans=0.125 +2024-09-17 23:01:00,205 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.599e+01 9.133e+01 9.930e+01 1.773e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-17 23:01:04,598 INFO [train.py:1198] (1/2) Epoch 18, batch 2650, loss[loss=0.2625, ctc_loss=0.151, cr_loss=0.3897, attn_decoder_loss=0.2662, over 29291.00 frames. ], tot_loss[loss=0.2504, ctc_loss=0.1422, cr_loss=0.3851, attn_decoder_loss=0.2539, over 5800742.64 frames. ], batch size: 100, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:01:47,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=318380.0, ans=0.0 +2024-09-17 23:01:49,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.43 vs. limit=15.0 +2024-09-17 23:02:04,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=318460.0, ans=0.0 +2024-09-17 23:02:05,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.71 vs. limit=6.0 +2024-09-17 23:02:19,080 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:02:20,280 INFO [train.py:1198] (1/2) Epoch 18, batch 2700, loss[loss=0.2511, ctc_loss=0.1376, cr_loss=0.3915, attn_decoder_loss=0.255, over 29539.00 frames. ], tot_loss[loss=0.2506, ctc_loss=0.1424, cr_loss=0.3853, attn_decoder_loss=0.2541, over 5796436.19 frames. ], batch size: 87, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:02:37,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=318540.0, ans=0.125 +2024-09-17 23:02:42,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=318540.0, ans=22.5 +2024-09-17 23:02:46,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=318540.0, ans=0.125 +2024-09-17 23:02:55,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=318580.0, ans=0.125 +2024-09-17 23:03:00,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.64 vs. limit=15.0 +2024-09-17 23:03:16,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=318620.0, ans=0.125 +2024-09-17 23:03:28,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=318660.0, ans=0.0 +2024-09-17 23:03:36,482 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.684e+01 8.599e+01 9.139e+01 9.802e+01 1.659e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-17 23:03:41,065 INFO [train.py:1198] (1/2) Epoch 18, batch 2750, loss[loss=0.2379, ctc_loss=0.1333, cr_loss=0.3793, attn_decoder_loss=0.2411, over 29536.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1413, cr_loss=0.3827, attn_decoder_loss=0.2527, over 5795386.03 frames. ], batch size: 75, lr: 6.19e-03, grad_scale: 8.0 +2024-09-17 23:03:43,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=318700.0, ans=0.025 +2024-09-17 23:03:50,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=7.13 vs. limit=12.0 +2024-09-17 23:03:56,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=318740.0, ans=0.0 +2024-09-17 23:04:02,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=318740.0, ans=0.04949747468305833 +2024-09-17 23:04:11,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.22 vs. limit=10.0 +2024-09-17 23:04:40,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.69 vs. limit=15.0 +2024-09-17 23:04:51,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=318860.0, ans=0.125 +2024-09-17 23:04:57,092 INFO [train.py:1198] (1/2) Epoch 18, batch 2800, loss[loss=0.2726, ctc_loss=0.173, cr_loss=0.3852, attn_decoder_loss=0.2751, over 20366.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1416, cr_loss=0.3831, attn_decoder_loss=0.253, over 5775981.26 frames. ], batch size: 209, lr: 6.18e-03, grad_scale: 16.0 +2024-09-17 23:04:58,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=318900.0, ans=0.125 +2024-09-17 23:05:04,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=318900.0, ans=0.0 +2024-09-17 23:05:58,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=319060.0, ans=0.0 +2024-09-17 23:06:02,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=14.14 vs. limit=15.0 +2024-09-17 23:06:02,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=319060.0, ans=0.125 +2024-09-17 23:06:04,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=319060.0, ans=0.025 +2024-09-17 23:06:10,010 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.902e+01 8.963e+01 9.729e+01 1.060e+02 3.606e+02, threshold=1.946e+02, percent-clipped=3.0 +2024-09-17 23:06:13,107 INFO [train.py:1198] (1/2) Epoch 18, batch 2850, loss[loss=0.2539, ctc_loss=0.1433, cr_loss=0.4071, attn_decoder_loss=0.2571, over 29476.00 frames. ], tot_loss[loss=0.25, ctc_loss=0.1421, cr_loss=0.3842, attn_decoder_loss=0.2534, over 5761301.44 frames. ], batch size: 77, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:06:16,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=319100.0, ans=0.0 +2024-09-17 23:06:24,751 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.01 vs. limit=15.0 +2024-09-17 23:06:32,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.26 vs. limit=22.5 +2024-09-17 23:06:33,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=319140.0, ans=0.2 +2024-09-17 23:06:40,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=319140.0, ans=0.0 +2024-09-17 23:07:12,052 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:07:33,501 INFO [train.py:1198] (1/2) Epoch 18, batch 2900, loss[loss=0.2435, ctc_loss=0.1343, cr_loss=0.3835, attn_decoder_loss=0.2471, over 29437.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1428, cr_loss=0.386, attn_decoder_loss=0.2547, over 5786888.91 frames. ], batch size: 79, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:07:44,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=319300.0, ans=0.0 +2024-09-17 23:07:48,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.63 vs. limit=22.5 +2024-09-17 23:07:53,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=319340.0, ans=0.0 +2024-09-17 23:08:02,939 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:08:13,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=319380.0, ans=0.1 +2024-09-17 23:08:18,302 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.08 vs. limit=12.0 +2024-09-17 23:08:23,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.50 vs. limit=10.0 +2024-09-17 23:08:46,530 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.577e+01 9.174e+01 9.696e+01 1.530e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-17 23:08:49,581 INFO [train.py:1198] (1/2) Epoch 18, batch 2950, loss[loss=0.2396, ctc_loss=0.1392, cr_loss=0.3909, attn_decoder_loss=0.242, over 29522.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1417, cr_loss=0.3837, attn_decoder_loss=0.2532, over 5780572.99 frames. ], batch size: 75, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:08:56,649 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.87 vs. limit=15.0 +2024-09-17 23:08:58,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=319500.0, ans=0.125 +2024-09-17 23:09:13,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.93 vs. limit=15.0 +2024-09-17 23:09:41,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=319620.0, ans=0.125 +2024-09-17 23:09:45,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.22 vs. limit=6.0 +2024-09-17 23:09:46,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=319620.0, ans=0.125 +2024-09-17 23:09:55,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=319660.0, ans=0.1 +2024-09-17 23:10:04,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=319700.0, ans=0.1 +2024-09-17 23:10:05,406 INFO [train.py:1198] (1/2) Epoch 18, batch 3000, loss[loss=0.2416, ctc_loss=0.1308, cr_loss=0.3675, attn_decoder_loss=0.2457, over 29756.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1415, cr_loss=0.3834, attn_decoder_loss=0.2529, over 5782820.89 frames. ], batch size: 81, lr: 6.18e-03, grad_scale: 8.0 +2024-09-17 23:10:05,406 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 23:10:24,020 INFO [train.py:1230] (1/2) Epoch 18, validation: loss=0.211, ctc_loss=0.04071, cr_loss=4.994e-15, attn_decoder_loss=0.23, over 944034.00 frames. +2024-09-17 23:10:24,021 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 23:10:40,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=319740.0, ans=0.0 +2024-09-17 23:10:55,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=319780.0, ans=0.2 +2024-09-17 23:11:05,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=319780.0, ans=0.2 +2024-09-17 23:11:20,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=319820.0, ans=0.025 +2024-09-17 23:11:30,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.03 vs. limit=15.0 +2024-09-17 23:11:33,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.28 vs. limit=15.0 +2024-09-17 23:11:41,318 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 9.075e+01 9.591e+01 1.002e+02 5.340e+02, threshold=1.918e+02, percent-clipped=4.0 +2024-09-17 23:11:44,493 INFO [train.py:1198] (1/2) Epoch 18, batch 3050, loss[loss=0.2359, ctc_loss=0.1217, cr_loss=0.3426, attn_decoder_loss=0.241, over 29537.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.142, cr_loss=0.3837, attn_decoder_loss=0.2536, over 5776498.69 frames. ], batch size: 76, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:12:02,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=319940.0, ans=0.0 +2024-09-17 23:12:04,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=319940.0, ans=0.1 +2024-09-17 23:12:08,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=319940.0, ans=0.125 +2024-09-17 23:12:15,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=319980.0, ans=0.0 +2024-09-17 23:12:57,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=320060.0, ans=0.1 +2024-09-17 23:13:07,354 INFO [train.py:1198] (1/2) Epoch 18, batch 3100, loss[loss=0.2615, ctc_loss=0.1447, cr_loss=0.3753, attn_decoder_loss=0.2661, over 29308.00 frames. ], tot_loss[loss=0.2497, ctc_loss=0.1418, cr_loss=0.3836, attn_decoder_loss=0.2532, over 5777190.37 frames. ], batch size: 100, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:13:12,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=320100.0, ans=0.125 +2024-09-17 23:13:21,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=320140.0, ans=0.025 +2024-09-17 23:13:25,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=320140.0, ans=0.2 +2024-09-17 23:13:55,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=320220.0, ans=10.0 +2024-09-17 23:14:21,537 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.202e+01 8.516e+01 8.988e+01 9.440e+01 2.574e+02, threshold=1.798e+02, percent-clipped=3.0 +2024-09-17 23:14:25,207 INFO [train.py:1198] (1/2) Epoch 18, batch 3150, loss[loss=0.258, ctc_loss=0.1501, cr_loss=0.414, attn_decoder_loss=0.2607, over 28934.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1414, cr_loss=0.3837, attn_decoder_loss=0.253, over 5782951.44 frames. ], batch size: 104, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:15:29,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=320460.0, ans=0.5 +2024-09-17 23:15:31,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=320460.0, ans=0.125 +2024-09-17 23:15:34,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=320460.0, ans=0.0 +2024-09-17 23:15:40,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=320460.0, ans=0.0 +2024-09-17 23:15:41,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=320500.0, ans=0.125 +2024-09-17 23:15:43,028 INFO [train.py:1198] (1/2) Epoch 18, batch 3200, loss[loss=0.2523, ctc_loss=0.1411, cr_loss=0.41, attn_decoder_loss=0.2556, over 29423.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1407, cr_loss=0.3825, attn_decoder_loss=0.2522, over 5793601.15 frames. ], batch size: 79, lr: 6.17e-03, grad_scale: 16.0 +2024-09-17 23:15:50,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=320500.0, ans=0.2 +2024-09-17 23:15:56,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=320540.0, ans=0.0 +2024-09-17 23:16:14,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.94 vs. limit=15.0 +2024-09-17 23:16:22,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=320580.0, ans=0.05 +2024-09-17 23:16:45,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=320660.0, ans=0.1 +2024-09-17 23:16:52,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.49 vs. limit=15.0 +2024-09-17 23:16:58,817 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.899e+01 8.773e+01 9.216e+01 9.587e+01 1.476e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-17 23:16:58,843 INFO [train.py:1198] (1/2) Epoch 18, batch 3250, loss[loss=0.248, ctc_loss=0.1329, cr_loss=0.3573, attn_decoder_loss=0.2528, over 29684.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.141, cr_loss=0.383, attn_decoder_loss=0.2528, over 5800607.63 frames. ], batch size: 84, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:17:08,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=320700.0, ans=0.125 +2024-09-17 23:17:16,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=320740.0, ans=0.0 +2024-09-17 23:17:24,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=320740.0, ans=0.1 +2024-09-17 23:17:27,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=320780.0, ans=0.09899494936611666 +2024-09-17 23:17:32,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=320780.0, ans=0.125 +2024-09-17 23:17:44,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=320820.0, ans=0.09899494936611666 +2024-09-17 23:17:47,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=320820.0, ans=0.125 +2024-09-17 23:18:10,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=320860.0, ans=0.05 +2024-09-17 23:18:13,781 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:18:14,951 INFO [train.py:1198] (1/2) Epoch 18, batch 3300, loss[loss=0.2586, ctc_loss=0.145, cr_loss=0.3946, attn_decoder_loss=0.2625, over 28217.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1403, cr_loss=0.3816, attn_decoder_loss=0.2518, over 5797593.44 frames. ], batch size: 111, lr: 6.17e-03, grad_scale: 8.0 +2024-09-17 23:18:54,680 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.40 vs. limit=15.0 +2024-09-17 23:19:17,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=321020.0, ans=0.125 +2024-09-17 23:19:35,429 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.646e+01 9.242e+01 9.946e+01 1.965e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-17 23:19:35,450 INFO [train.py:1198] (1/2) Epoch 18, batch 3350, loss[loss=0.2632, ctc_loss=0.1483, cr_loss=0.4147, attn_decoder_loss=0.2668, over 28890.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1412, cr_loss=0.3831, attn_decoder_loss=0.2528, over 5774036.44 frames. ], batch size: 104, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:19:46,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=321100.0, ans=0.1 +2024-09-17 23:20:17,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=321180.0, ans=0.0 +2024-09-17 23:20:18,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=321180.0, ans=0.125 +2024-09-17 23:20:21,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=321220.0, ans=0.025 +2024-09-17 23:20:22,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.47 vs. limit=6.0 +2024-09-17 23:20:23,640 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.07 vs. limit=15.0 +2024-09-17 23:20:50,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=321300.0, ans=0.125 +2024-09-17 23:20:51,647 INFO [train.py:1198] (1/2) Epoch 18, batch 3400, loss[loss=0.2183, ctc_loss=0.1191, cr_loss=0.3276, attn_decoder_loss=0.2221, over 29352.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1417, cr_loss=0.3833, attn_decoder_loss=0.2529, over 5766874.59 frames. ], batch size: 67, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:20:55,419 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.95 vs. limit=15.0 +2024-09-17 23:21:14,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=321340.0, ans=0.0 +2024-09-17 23:21:18,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=321340.0, ans=15.0 +2024-09-17 23:21:19,550 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.61 vs. limit=15.0 +2024-09-17 23:21:28,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.12 vs. limit=15.0 +2024-09-17 23:21:31,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=321380.0, ans=0.025 +2024-09-17 23:21:36,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=321420.0, ans=0.125 +2024-09-17 23:21:47,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.16 vs. limit=22.5 +2024-09-17 23:22:09,248 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.32 vs. limit=15.0 +2024-09-17 23:22:09,825 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.690e+01 9.213e+01 9.933e+01 2.095e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-17 23:22:09,848 INFO [train.py:1198] (1/2) Epoch 18, batch 3450, loss[loss=0.2657, ctc_loss=0.1534, cr_loss=0.4109, attn_decoder_loss=0.269, over 28192.00 frames. ], tot_loss[loss=0.2498, ctc_loss=0.1419, cr_loss=0.3841, attn_decoder_loss=0.2533, over 5774627.38 frames. ], batch size: 111, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:22:29,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=321540.0, ans=0.125 +2024-09-17 23:22:34,985 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.95 vs. limit=15.0 +2024-09-17 23:22:43,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=321580.0, ans=0.125 +2024-09-17 23:22:43,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.50 vs. limit=15.0 +2024-09-17 23:22:57,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=321620.0, ans=0.025 +2024-09-17 23:22:58,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=321620.0, ans=0.1 +2024-09-17 23:23:10,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=321620.0, ans=0.125 +2024-09-17 23:23:10,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=321620.0, ans=0.125 +2024-09-17 23:23:14,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=321660.0, ans=0.0 +2024-09-17 23:23:25,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=321660.0, ans=0.125 +2024-09-17 23:23:28,089 INFO [train.py:1198] (1/2) Epoch 18, batch 3500, loss[loss=0.2144, ctc_loss=0.1154, cr_loss=0.3299, attn_decoder_loss=0.2181, over 29337.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1414, cr_loss=0.3837, attn_decoder_loss=0.2525, over 5775331.27 frames. ], batch size: 71, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:23:36,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=321700.0, ans=0.125 +2024-09-17 23:23:40,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=321700.0, ans=0.0 +2024-09-17 23:23:45,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=321740.0, ans=0.1 +2024-09-17 23:23:45,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=321740.0, ans=0.125 +2024-09-17 23:23:58,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=321780.0, ans=0.125 +2024-09-17 23:24:10,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=321780.0, ans=0.035 +2024-09-17 23:24:31,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=321860.0, ans=0.2 +2024-09-17 23:24:35,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=321860.0, ans=0.0 +2024-09-17 23:24:42,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.524e+01 9.219e+01 9.966e+01 2.449e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-17 23:24:42,831 INFO [train.py:1198] (1/2) Epoch 18, batch 3550, loss[loss=0.2534, ctc_loss=0.1331, cr_loss=0.3615, attn_decoder_loss=0.2587, over 29714.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1412, cr_loss=0.3831, attn_decoder_loss=0.2526, over 5782403.15 frames. ], batch size: 89, lr: 6.16e-03, grad_scale: 8.0 +2024-09-17 23:24:43,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.79 vs. limit=15.0 +2024-09-17 23:25:11,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=321980.0, ans=0.1 +2024-09-17 23:25:28,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=322020.0, ans=22.5 +2024-09-17 23:25:29,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.80 vs. limit=6.0 +2024-09-17 23:25:33,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=322020.0, ans=10.0 +2024-09-17 23:25:57,198 INFO [train.py:1198] (1/2) Epoch 18, batch 3600, loss[loss=0.2446, ctc_loss=0.1271, cr_loss=0.3596, attn_decoder_loss=0.2497, over 29489.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1413, cr_loss=0.3837, attn_decoder_loss=0.253, over 5791700.76 frames. ], batch size: 77, lr: 6.15e-03, grad_scale: 16.0 +2024-09-17 23:26:16,213 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.20 vs. limit=15.0 +2024-09-17 23:26:49,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=322220.0, ans=0.125 +2024-09-17 23:26:54,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=322220.0, ans=0.0 +2024-09-17 23:27:11,822 INFO [train.py:1198] (1/2) Epoch 18, batch 3650, loss[loss=0.2639, ctc_loss=0.1519, cr_loss=0.4094, attn_decoder_loss=0.2673, over 29475.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1407, cr_loss=0.3829, attn_decoder_loss=0.2524, over 5793055.85 frames. ], batch size: 90, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:27:13,207 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.529e+01 9.051e+01 9.513e+01 1.639e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-17 23:27:27,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=322340.0, ans=0.0 +2024-09-17 23:27:27,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=322340.0, ans=0.125 +2024-09-17 23:27:28,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=322340.0, ans=0.1 +2024-09-17 23:27:34,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=322340.0, ans=0.1 +2024-09-17 23:27:34,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=322340.0, ans=0.09899494936611666 +2024-09-17 23:27:46,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=322380.0, ans=0.125 +2024-09-17 23:28:01,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=322420.0, ans=0.0 +2024-09-17 23:28:05,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=322420.0, ans=0.07 +2024-09-17 23:28:29,087 INFO [train.py:1198] (1/2) Epoch 18, batch 3700, loss[loss=0.2679, ctc_loss=0.1524, cr_loss=0.4168, attn_decoder_loss=0.2715, over 29712.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1408, cr_loss=0.3835, attn_decoder_loss=0.2526, over 5803210.26 frames. ], batch size: 84, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:28:44,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=322540.0, ans=0.125 +2024-09-17 23:28:51,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=322540.0, ans=0.1 +2024-09-17 23:29:04,343 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.25 vs. limit=15.0 +2024-09-17 23:29:24,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=322620.0, ans=0.125 +2024-09-17 23:29:44,674 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.60 vs. limit=15.0 +2024-09-17 23:29:45,361 INFO [train.py:1198] (1/2) Epoch 18, batch 3750, loss[loss=0.2218, ctc_loss=0.1219, cr_loss=0.3412, attn_decoder_loss=0.2253, over 29373.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1412, cr_loss=0.3845, attn_decoder_loss=0.2525, over 5807066.96 frames. ], batch size: 67, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:29:46,817 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.942e+01 8.687e+01 9.263e+01 1.001e+02 2.346e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-17 23:29:48,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=13.83 vs. limit=22.5 +2024-09-17 23:29:56,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=322700.0, ans=0.125 +2024-09-17 23:30:24,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=322780.0, ans=0.2 +2024-09-17 23:30:31,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=322820.0, ans=0.0 +2024-09-17 23:30:40,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=322820.0, ans=0.2 +2024-09-17 23:30:43,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.76 vs. limit=10.0 +2024-09-17 23:30:56,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=322860.0, ans=0.125 +2024-09-17 23:30:58,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=322900.0, ans=0.015 +2024-09-17 23:31:00,152 INFO [train.py:1198] (1/2) Epoch 18, batch 3800, loss[loss=0.2569, ctc_loss=0.1382, cr_loss=0.3848, attn_decoder_loss=0.2616, over 29643.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1409, cr_loss=0.3844, attn_decoder_loss=0.252, over 5797128.56 frames. ], batch size: 86, lr: 6.15e-03, grad_scale: 8.0 +2024-09-17 23:31:31,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.26 vs. limit=12.0 +2024-09-17 23:31:40,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=322980.0, ans=0.5 +2024-09-17 23:31:48,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=323020.0, ans=0.125 +2024-09-17 23:31:48,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=323020.0, ans=0.05 +2024-09-17 23:31:49,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=323020.0, ans=0.0 +2024-09-17 23:32:06,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=323060.0, ans=0.1 +2024-09-17 23:32:08,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.76 vs. limit=6.0 +2024-09-17 23:32:14,631 INFO [train.py:1198] (1/2) Epoch 18, batch 3850, loss[loss=0.2638, ctc_loss=0.1483, cr_loss=0.4157, attn_decoder_loss=0.2674, over 29182.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1404, cr_loss=0.3833, attn_decoder_loss=0.2519, over 5811274.43 frames. ], batch size: 100, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:32:16,117 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.799e+01 9.187e+01 9.877e+01 1.493e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 23:32:22,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=323100.0, ans=0.0 +2024-09-17 23:32:26,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=323100.0, ans=0.0 +2024-09-17 23:32:26,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=323100.0, ans=0.125 +2024-09-17 23:32:36,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=323140.0, ans=0.125 +2024-09-17 23:32:40,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.58 vs. limit=15.0 +2024-09-17 23:32:55,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=323180.0, ans=0.0 +2024-09-17 23:33:01,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=323220.0, ans=0.125 +2024-09-17 23:33:06,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.39 vs. limit=15.0 +2024-09-17 23:33:31,048 INFO [train.py:1198] (1/2) Epoch 18, batch 3900, loss[loss=0.2622, ctc_loss=0.1493, cr_loss=0.403, attn_decoder_loss=0.2658, over 29645.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1411, cr_loss=0.3846, attn_decoder_loss=0.2525, over 5815352.59 frames. ], batch size: 86, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:33:34,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=323300.0, ans=0.125 +2024-09-17 23:34:10,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.34 vs. limit=22.5 +2024-09-17 23:34:24,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=323420.0, ans=0.2 +2024-09-17 23:34:44,931 INFO [train.py:1198] (1/2) Epoch 18, batch 3950, loss[loss=0.2584, ctc_loss=0.1486, cr_loss=0.3954, attn_decoder_loss=0.2618, over 29495.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1408, cr_loss=0.3844, attn_decoder_loss=0.2526, over 5834893.91 frames. ], batch size: 97, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:34:46,429 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.658e+01 8.710e+01 9.175e+01 9.677e+01 1.510e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-17 23:34:54,633 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.60 vs. limit=12.0 +2024-09-17 23:35:00,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=323540.0, ans=0.125 +2024-09-17 23:35:01,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=323540.0, ans=0.2 +2024-09-17 23:35:06,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.76 vs. limit=12.0 +2024-09-17 23:35:18,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.36 vs. limit=15.0 +2024-09-17 23:35:27,161 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.05 vs. limit=15.0 +2024-09-17 23:35:40,633 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.86 vs. limit=12.0 +2024-09-17 23:35:41,950 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.11 vs. limit=22.5 +2024-09-17 23:35:53,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=323660.0, ans=0.125 +2024-09-17 23:36:00,068 INFO [train.py:1198] (1/2) Epoch 18, batch 4000, loss[loss=0.2282, ctc_loss=0.1208, cr_loss=0.3412, attn_decoder_loss=0.2325, over 29513.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1406, cr_loss=0.3834, attn_decoder_loss=0.2523, over 5812730.21 frames. ], batch size: 74, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:36:00,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=323700.0, ans=10.0 +2024-09-17 23:36:07,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=323700.0, ans=0.125 +2024-09-17 23:36:16,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=323740.0, ans=0.035 +2024-09-17 23:36:31,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=323780.0, ans=0.0 +2024-09-17 23:36:37,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=323780.0, ans=0.1 +2024-09-17 23:36:41,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=323780.0, ans=0.0 +2024-09-17 23:36:52,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=323820.0, ans=0.0 +2024-09-17 23:37:14,031 INFO [train.py:1198] (1/2) Epoch 18, batch 4050, loss[loss=0.2873, ctc_loss=0.2079, cr_loss=0.434, attn_decoder_loss=0.2865, over 20369.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1404, cr_loss=0.3828, attn_decoder_loss=0.2522, over 5797034.08 frames. ], batch size: 210, lr: 6.14e-03, grad_scale: 8.0 +2024-09-17 23:37:16,868 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.693e+01 8.741e+01 9.284e+01 9.840e+01 3.533e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-17 23:37:26,278 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.71 vs. limit=22.5 +2024-09-17 23:37:28,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=323940.0, ans=0.125 +2024-09-17 23:37:30,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=323940.0, ans=0.07 +2024-09-17 23:37:38,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=323940.0, ans=0.125 +2024-09-17 23:37:41,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=323980.0, ans=0.125 +2024-09-17 23:37:44,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=323980.0, ans=0.5 +2024-09-17 23:37:46,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=323980.0, ans=0.125 +2024-09-17 23:37:58,771 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.32 vs. limit=15.0 +2024-09-17 23:37:59,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=324020.0, ans=0.2 +2024-09-17 23:38:12,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=324060.0, ans=0.125 +2024-09-17 23:38:28,779 INFO [train.py:1198] (1/2) Epoch 18, batch 4100, loss[loss=0.2594, ctc_loss=0.1454, cr_loss=0.4015, attn_decoder_loss=0.2631, over 29487.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.141, cr_loss=0.3838, attn_decoder_loss=0.2527, over 5792281.30 frames. ], batch size: 90, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:38:33,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=324100.0, ans=0.125 +2024-09-17 23:38:39,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=324100.0, ans=0.0 +2024-09-17 23:39:07,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=324180.0, ans=0.04949747468305833 +2024-09-17 23:39:30,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=324260.0, ans=0.125 +2024-09-17 23:39:36,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=324260.0, ans=0.125 +2024-09-17 23:39:43,551 INFO [train.py:1198] (1/2) Epoch 18, batch 4150, loss[loss=0.2334, ctc_loss=0.1308, cr_loss=0.3468, attn_decoder_loss=0.2371, over 29514.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1403, cr_loss=0.3831, attn_decoder_loss=0.2521, over 5798072.65 frames. ], batch size: 77, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:39:45,386 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:39:46,526 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.404e+01 8.386e+01 9.045e+01 9.725e+01 1.428e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-17 23:40:04,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=324340.0, ans=0.125 +2024-09-17 23:40:19,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=324380.0, ans=0.0 +2024-09-17 23:40:19,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=324380.0, ans=0.025 +2024-09-17 23:40:39,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=324420.0, ans=0.025 +2024-09-17 23:40:42,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=324460.0, ans=0.125 +2024-09-17 23:40:44,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=324460.0, ans=0.2 +2024-09-17 23:40:44,472 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:40:54,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=324460.0, ans=0.1 +2024-09-17 23:40:57,213 INFO [train.py:1198] (1/2) Epoch 18, batch 4200, loss[loss=0.2693, ctc_loss=0.1609, cr_loss=0.4277, attn_decoder_loss=0.2719, over 29513.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1411, cr_loss=0.3843, attn_decoder_loss=0.2527, over 5800138.33 frames. ], batch size: 90, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:41:14,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=324540.0, ans=0.2 +2024-09-17 23:41:26,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.41 vs. limit=15.0 +2024-09-17 23:41:31,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=324580.0, ans=0.125 +2024-09-17 23:41:35,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.37 vs. limit=15.0 +2024-09-17 23:41:56,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=324660.0, ans=0.0 +2024-09-17 23:42:03,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=324660.0, ans=0.125 +2024-09-17 23:42:10,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=324700.0, ans=0.125 +2024-09-17 23:42:11,842 INFO [train.py:1198] (1/2) Epoch 18, batch 4250, loss[loss=0.2238, ctc_loss=0.1224, cr_loss=0.3604, attn_decoder_loss=0.2271, over 29528.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1407, cr_loss=0.384, attn_decoder_loss=0.2526, over 5804736.91 frames. ], batch size: 74, lr: 6.13e-03, grad_scale: 4.0 +2024-09-17 23:42:13,484 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:42:16,131 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.827e+01 9.431e+01 1.016e+02 4.056e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-17 23:42:19,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=324700.0, ans=0.0 +2024-09-17 23:42:22,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=324700.0, ans=0.1 +2024-09-17 23:42:45,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=324780.0, ans=0.1 +2024-09-17 23:42:52,582 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.27 vs. limit=15.0 +2024-09-17 23:43:08,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=324820.0, ans=0.125 +2024-09-17 23:43:11,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.02 vs. limit=22.5 +2024-09-17 23:43:24,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=324860.0, ans=0.04949747468305833 +2024-09-17 23:43:27,292 INFO [train.py:1198] (1/2) Epoch 18, batch 4300, loss[loss=0.2618, ctc_loss=0.146, cr_loss=0.3947, attn_decoder_loss=0.2659, over 29546.00 frames. ], tot_loss[loss=0.2494, ctc_loss=0.1409, cr_loss=0.3838, attn_decoder_loss=0.2529, over 5796093.48 frames. ], batch size: 87, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:43:27,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=324900.0, ans=0.1 +2024-09-17 23:44:17,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=325020.0, ans=10.0 +2024-09-17 23:44:22,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=325020.0, ans=0.2 +2024-09-17 23:44:25,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=325060.0, ans=0.125 +2024-09-17 23:44:38,478 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:44:41,170 INFO [train.py:1198] (1/2) Epoch 18, batch 4350, loss[loss=0.2659, ctc_loss=0.1595, cr_loss=0.4163, attn_decoder_loss=0.2684, over 29461.00 frames. ], tot_loss[loss=0.2525, ctc_loss=0.1433, cr_loss=0.3889, attn_decoder_loss=0.256, over 5798464.91 frames. ], batch size: 97, lr: 6.13e-03, grad_scale: 8.0 +2024-09-17 23:44:46,378 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.876e+01 8.831e+01 9.306e+01 9.822e+01 6.484e+02, threshold=1.861e+02, percent-clipped=2.0 +2024-09-17 23:44:51,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.48 vs. limit=15.0 +2024-09-17 23:45:04,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=325140.0, ans=0.0 +2024-09-17 23:45:37,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=325220.0, ans=0.125 +2024-09-17 23:45:38,021 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.04 vs. limit=22.5 +2024-09-17 23:45:43,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=325260.0, ans=0.1 +2024-09-17 23:45:54,954 INFO [train.py:1198] (1/2) Epoch 18, batch 4400, loss[loss=0.2628, ctc_loss=0.1526, cr_loss=0.3982, attn_decoder_loss=0.2661, over 27156.00 frames. ], tot_loss[loss=0.2548, ctc_loss=0.1452, cr_loss=0.3915, attn_decoder_loss=0.2583, over 5768104.73 frames. ], batch size: 124, lr: 6.12e-03, grad_scale: 16.0 +2024-09-17 23:46:46,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=325420.0, ans=0.1 +2024-09-17 23:46:53,334 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.36 vs. limit=22.5 +2024-09-17 23:47:10,214 INFO [train.py:1198] (1/2) Epoch 18, batch 4450, loss[loss=0.2707, ctc_loss=0.1762, cr_loss=0.4152, attn_decoder_loss=0.272, over 20154.00 frames. ], tot_loss[loss=0.2574, ctc_loss=0.1493, cr_loss=0.3959, attn_decoder_loss=0.2606, over 5578451.33 frames. ], batch size: 211, lr: 6.12e-03, grad_scale: 8.0 +2024-09-17 23:47:16,218 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.181e+01 9.154e+01 9.637e+01 1.052e+02 1.489e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-17 23:47:24,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=325540.0, ans=0.2 +2024-09-17 23:47:42,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=325580.0, ans=0.125 +2024-09-17 23:47:57,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=325620.0, ans=0.125 +2024-09-17 23:48:06,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=325620.0, ans=0.125 +2024-09-17 23:48:09,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=325660.0, ans=0.1 +2024-09-17 23:48:14,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=325660.0, ans=0.04949747468305833 +2024-09-17 23:48:17,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=325660.0, ans=0.0 +2024-09-17 23:48:20,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=325660.0, ans=0.1 +2024-09-17 23:48:22,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=325660.0, ans=0.2 +2024-09-17 23:48:25,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=325700.0, ans=0.125 +2024-09-17 23:48:26,297 INFO [train.py:1198] (1/2) Epoch 18, batch 4500, loss[loss=0.2792, ctc_loss=0.1908, cr_loss=0.4286, attn_decoder_loss=0.2795, over 19478.00 frames. ], tot_loss[loss=0.2603, ctc_loss=0.1545, cr_loss=0.3987, attn_decoder_loss=0.2632, over 5235106.79 frames. ], batch size: 209, lr: 6.12e-03, grad_scale: 8.0 +2024-09-17 23:48:56,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=325780.0, ans=0.125 +2024-09-17 23:48:56,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=325780.0, ans=0.2 +2024-09-17 23:49:55,593 INFO [train.py:1198] (1/2) Epoch 19, batch 0, loss[loss=0.2332, ctc_loss=0.1278, cr_loss=0.374, attn_decoder_loss=0.2366, over 29596.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1278, cr_loss=0.374, attn_decoder_loss=0.2366, over 29596.00 frames. ], batch size: 73, lr: 5.95e-03, grad_scale: 16.0 +2024-09-17 23:49:55,593 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-17 23:50:13,877 INFO [train.py:1230] (1/2) Epoch 19, validation: loss=0.2122, ctc_loss=0.03932, cr_loss=5e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-17 23:50:13,877 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-17 23:50:14,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=325800.0, ans=0.0 +2024-09-17 23:50:23,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=325800.0, ans=0.0 +2024-09-17 23:50:45,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=325880.0, ans=0.125 +2024-09-17 23:50:48,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=325880.0, ans=0.1 +2024-09-17 23:50:59,123 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.957e+01 1.057e+02 1.132e+02 1.239e+02 3.685e+02, threshold=2.265e+02, percent-clipped=3.0 +2024-09-17 23:51:12,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.31 vs. limit=8.0 +2024-09-17 23:51:17,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=325960.0, ans=0.0 +2024-09-17 23:51:28,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=326000.0, ans=0.125 +2024-09-17 23:51:29,515 INFO [train.py:1198] (1/2) Epoch 19, batch 50, loss[loss=0.2169, ctc_loss=0.1111, cr_loss=0.3196, attn_decoder_loss=0.2215, over 29408.00 frames. ], tot_loss[loss=0.2512, ctc_loss=0.1446, cr_loss=0.3886, attn_decoder_loss=0.2544, over 1267457.46 frames. ], batch size: 70, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:51:33,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=326000.0, ans=0.025 +2024-09-17 23:51:34,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=326000.0, ans=0.125 +2024-09-17 23:51:41,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=326000.0, ans=0.0 +2024-09-17 23:52:01,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=326080.0, ans=0.1 +2024-09-17 23:52:03,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=326080.0, ans=0.125 +2024-09-17 23:52:13,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=326080.0, ans=0.125 +2024-09-17 23:52:15,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=326080.0, ans=0.025 +2024-09-17 23:52:49,548 INFO [train.py:1198] (1/2) Epoch 19, batch 100, loss[loss=0.2454, ctc_loss=0.1382, cr_loss=0.3723, attn_decoder_loss=0.2491, over 29549.00 frames. ], tot_loss[loss=0.2528, ctc_loss=0.145, cr_loss=0.3912, attn_decoder_loss=0.256, over 2251143.70 frames. ], batch size: 76, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:52:51,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=326200.0, ans=0.0 +2024-09-17 23:53:03,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.60 vs. limit=22.5 +2024-09-17 23:53:13,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=326240.0, ans=0.125 +2024-09-17 23:53:16,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=326240.0, ans=0.04949747468305833 +2024-09-17 23:53:18,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=326280.0, ans=0.1 +2024-09-17 23:53:24,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=326280.0, ans=0.125 +2024-09-17 23:53:24,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=326280.0, ans=0.125 +2024-09-17 23:53:27,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=326280.0, ans=0.125 +2024-09-17 23:53:31,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=326280.0, ans=0.1 +2024-09-17 23:53:34,404 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.770e+01 8.614e+01 9.117e+01 9.815e+01 1.763e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-17 23:53:39,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=326320.0, ans=0.125 +2024-09-17 23:54:03,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=326400.0, ans=0.5 +2024-09-17 23:54:04,645 INFO [train.py:1198] (1/2) Epoch 19, batch 150, loss[loss=0.2144, ctc_loss=0.1103, cr_loss=0.3241, attn_decoder_loss=0.2188, over 29435.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1414, cr_loss=0.3844, attn_decoder_loss=0.2531, over 3045490.00 frames. ], batch size: 70, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:54:06,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=326400.0, ans=0.0 +2024-09-17 23:54:18,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=326440.0, ans=0.125 +2024-09-17 23:54:24,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=326440.0, ans=0.0 +2024-09-17 23:55:06,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=326560.0, ans=0.125 +2024-09-17 23:55:17,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=326560.0, ans=0.0 +2024-09-17 23:55:20,174 INFO [train.py:1198] (1/2) Epoch 19, batch 200, loss[loss=0.2646, ctc_loss=0.1574, cr_loss=0.4028, attn_decoder_loss=0.2675, over 27313.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1406, cr_loss=0.3838, attn_decoder_loss=0.2523, over 3657907.19 frames. ], batch size: 124, lr: 5.95e-03, grad_scale: 8.0 +2024-09-17 23:55:32,047 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:55:43,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=326640.0, ans=0.0 +2024-09-17 23:55:44,614 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.80 vs. limit=6.0 +2024-09-17 23:55:54,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=326680.0, ans=0.125 +2024-09-17 23:56:00,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=326680.0, ans=0.125 +2024-09-17 23:56:10,482 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.598e+01 9.185e+01 9.838e+01 1.653e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-17 23:56:14,229 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.23 vs. limit=22.5 +2024-09-17 23:56:18,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.80 vs. limit=10.0 +2024-09-17 23:56:30,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=326760.0, ans=0.025 +2024-09-17 23:56:38,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=326760.0, ans=0.2 +2024-09-17 23:56:40,801 INFO [train.py:1198] (1/2) Epoch 19, batch 250, loss[loss=0.2592, ctc_loss=0.1546, cr_loss=0.4087, attn_decoder_loss=0.2618, over 29272.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1401, cr_loss=0.3837, attn_decoder_loss=0.2522, over 4139321.62 frames. ], batch size: 100, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:56:45,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=326800.0, ans=0.2 +2024-09-17 23:56:49,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.44 vs. limit=5.0 +2024-09-17 23:57:02,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=326840.0, ans=0.025 +2024-09-17 23:57:22,160 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-17 23:57:32,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=326920.0, ans=0.1 +2024-09-17 23:57:38,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=326920.0, ans=0.1 +2024-09-17 23:57:40,866 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.70 vs. limit=10.0 +2024-09-17 23:57:43,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=326960.0, ans=0.1 +2024-09-17 23:57:46,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=326960.0, ans=0.0 +2024-09-17 23:57:56,483 INFO [train.py:1198] (1/2) Epoch 19, batch 300, loss[loss=0.2717, ctc_loss=0.1632, cr_loss=0.4242, attn_decoder_loss=0.2743, over 29546.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1395, cr_loss=0.3836, attn_decoder_loss=0.2519, over 4508869.40 frames. ], batch size: 92, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:58:03,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=327000.0, ans=0.1 +2024-09-17 23:58:12,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=327040.0, ans=0.0 +2024-09-17 23:58:19,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=327040.0, ans=0.2 +2024-09-17 23:58:21,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=327040.0, ans=0.125 +2024-09-17 23:58:23,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.53 vs. limit=15.0 +2024-09-17 23:58:31,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=327080.0, ans=0.0 +2024-09-17 23:58:37,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=327080.0, ans=0.0 +2024-09-17 23:58:41,749 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.487e+01 9.041e+01 9.802e+01 3.671e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-17 23:58:48,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.33 vs. limit=15.0 +2024-09-17 23:59:06,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=327160.0, ans=0.0 +2024-09-17 23:59:12,436 INFO [train.py:1198] (1/2) Epoch 19, batch 350, loss[loss=0.2146, ctc_loss=0.1147, cr_loss=0.3323, attn_decoder_loss=0.2183, over 29302.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1397, cr_loss=0.3833, attn_decoder_loss=0.2522, over 4796071.55 frames. ], batch size: 71, lr: 5.94e-03, grad_scale: 8.0 +2024-09-17 23:59:18,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=327200.0, ans=0.0 +2024-09-17 23:59:30,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=327240.0, ans=0.0 +2024-09-17 23:59:30,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=327240.0, ans=0.1 +2024-09-17 23:59:33,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=327240.0, ans=0.125 +2024-09-17 23:59:47,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.35 vs. limit=22.5 +2024-09-17 23:59:58,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.25 vs. limit=15.0 +2024-09-18 00:00:07,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=327320.0, ans=0.2 +2024-09-18 00:00:09,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.53 vs. limit=15.0 +2024-09-18 00:00:11,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=327320.0, ans=0.0 +2024-09-18 00:00:17,167 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.02 vs. limit=15.0 +2024-09-18 00:00:21,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=327360.0, ans=10.0 +2024-09-18 00:00:27,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=327360.0, ans=0.125 +2024-09-18 00:00:30,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=327360.0, ans=0.04949747468305833 +2024-09-18 00:00:32,747 INFO [train.py:1198] (1/2) Epoch 19, batch 400, loss[loss=0.2652, ctc_loss=0.1537, cr_loss=0.4121, attn_decoder_loss=0.2684, over 29719.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1393, cr_loss=0.3818, attn_decoder_loss=0.2518, over 5025017.96 frames. ], batch size: 82, lr: 5.94e-03, grad_scale: 16.0 +2024-09-18 00:00:41,378 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.21 vs. limit=15.0 +2024-09-18 00:01:05,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.39 vs. limit=15.0 +2024-09-18 00:01:05,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.48 vs. limit=15.0 +2024-09-18 00:01:12,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=327480.0, ans=0.0 +2024-09-18 00:01:19,971 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.098e+01 8.676e+01 9.493e+01 1.045e+02 1.663e+02, threshold=1.899e+02, percent-clipped=0.0 +2024-09-18 00:01:34,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=327560.0, ans=0.0 +2024-09-18 00:01:34,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=327560.0, ans=0.125 +2024-09-18 00:01:46,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=327560.0, ans=0.0 +2024-09-18 00:01:48,886 INFO [train.py:1198] (1/2) Epoch 19, batch 450, loss[loss=0.2724, ctc_loss=0.1613, cr_loss=0.4146, attn_decoder_loss=0.2756, over 29699.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.14, cr_loss=0.3833, attn_decoder_loss=0.2522, over 5187986.71 frames. ], batch size: 83, lr: 5.94e-03, grad_scale: 8.0 +2024-09-18 00:02:01,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=327600.0, ans=0.0 +2024-09-18 00:02:10,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=327640.0, ans=22.5 +2024-09-18 00:02:12,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.70 vs. limit=15.0 +2024-09-18 00:02:42,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=327720.0, ans=0.05 +2024-09-18 00:02:51,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=327760.0, ans=0.1 +2024-09-18 00:03:04,353 INFO [train.py:1198] (1/2) Epoch 19, batch 500, loss[loss=0.2599, ctc_loss=0.151, cr_loss=0.4188, attn_decoder_loss=0.2627, over 29452.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1396, cr_loss=0.3827, attn_decoder_loss=0.2515, over 5330962.93 frames. ], batch size: 94, lr: 5.94e-03, grad_scale: 8.0 +2024-09-18 00:03:39,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=327880.0, ans=0.125 +2024-09-18 00:03:56,186 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.669e+01 8.703e+01 9.333e+01 1.015e+02 2.225e+02, threshold=1.867e+02, percent-clipped=2.0 +2024-09-18 00:04:04,875 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.34 vs. limit=22.5 +2024-09-18 00:04:25,792 INFO [train.py:1198] (1/2) Epoch 19, batch 550, loss[loss=0.2636, ctc_loss=0.1521, cr_loss=0.4005, attn_decoder_loss=0.2671, over 28823.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1396, cr_loss=0.3821, attn_decoder_loss=0.2516, over 5423682.67 frames. ], batch size: 104, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:05:10,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.33 vs. limit=15.0 +2024-09-18 00:05:19,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=328120.0, ans=0.1 +2024-09-18 00:05:21,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=328120.0, ans=0.1 +2024-09-18 00:05:24,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=328160.0, ans=0.125 +2024-09-18 00:05:37,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=328160.0, ans=0.1 +2024-09-18 00:05:37,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=328160.0, ans=0.125 +2024-09-18 00:05:37,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=328160.0, ans=0.2 +2024-09-18 00:05:41,273 INFO [train.py:1198] (1/2) Epoch 19, batch 600, loss[loss=0.2645, ctc_loss=0.1523, cr_loss=0.4105, attn_decoder_loss=0.2679, over 29258.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1398, cr_loss=0.3828, attn_decoder_loss=0.2521, over 5509657.44 frames. ], batch size: 100, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:06:14,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=328280.0, ans=0.025 +2024-09-18 00:06:27,692 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.610e+01 8.945e+01 9.378e+01 9.831e+01 2.043e+02, threshold=1.876e+02, percent-clipped=1.0 +2024-09-18 00:06:33,472 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.36 vs. limit=10.0 +2024-09-18 00:06:35,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.30 vs. limit=15.0 +2024-09-18 00:06:52,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=328360.0, ans=0.125 +2024-09-18 00:06:56,864 INFO [train.py:1198] (1/2) Epoch 19, batch 650, loss[loss=0.2513, ctc_loss=0.1457, cr_loss=0.4087, attn_decoder_loss=0.2539, over 29766.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1392, cr_loss=0.3817, attn_decoder_loss=0.2513, over 5586805.26 frames. ], batch size: 81, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:07:01,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=328400.0, ans=0.015 +2024-09-18 00:07:06,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=328400.0, ans=0.125 +2024-09-18 00:07:15,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=328440.0, ans=0.09899494936611666 +2024-09-18 00:07:24,916 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-18 00:07:37,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=328480.0, ans=0.125 +2024-09-18 00:07:45,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=328520.0, ans=0.2 +2024-09-18 00:08:10,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=328560.0, ans=0.2 +2024-09-18 00:08:10,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.19 vs. limit=15.0 +2024-09-18 00:08:17,453 INFO [train.py:1198] (1/2) Epoch 19, batch 700, loss[loss=0.2372, ctc_loss=0.1302, cr_loss=0.3796, attn_decoder_loss=0.2407, over 29535.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1393, cr_loss=0.3824, attn_decoder_loss=0.2515, over 5637551.26 frames. ], batch size: 76, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:08:25,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.59 vs. limit=12.0 +2024-09-18 00:08:29,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=328600.0, ans=0.125 +2024-09-18 00:08:31,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=328640.0, ans=0.125 +2024-09-18 00:08:33,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-18 00:08:53,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=328680.0, ans=0.125 +2024-09-18 00:09:04,108 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 8.484e+01 8.986e+01 9.600e+01 2.397e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 00:09:19,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=328760.0, ans=0.025 +2024-09-18 00:09:33,283 INFO [train.py:1198] (1/2) Epoch 19, batch 750, loss[loss=0.2545, ctc_loss=0.1514, cr_loss=0.4021, attn_decoder_loss=0.257, over 29706.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1386, cr_loss=0.3811, attn_decoder_loss=0.251, over 5677616.79 frames. ], batch size: 82, lr: 5.93e-03, grad_scale: 8.0 +2024-09-18 00:09:41,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=328800.0, ans=0.125 +2024-09-18 00:09:54,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-18 00:10:12,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=328880.0, ans=0.125 +2024-09-18 00:10:20,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=328920.0, ans=0.0 +2024-09-18 00:10:44,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=328960.0, ans=0.125 +2024-09-18 00:10:45,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=328960.0, ans=0.1 +2024-09-18 00:10:47,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=329000.0, ans=0.125 +2024-09-18 00:10:48,708 INFO [train.py:1198] (1/2) Epoch 19, batch 800, loss[loss=0.2306, ctc_loss=0.1305, cr_loss=0.3633, attn_decoder_loss=0.2337, over 29644.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.139, cr_loss=0.3816, attn_decoder_loss=0.251, over 5707397.38 frames. ], batch size: 73, lr: 5.92e-03, grad_scale: 16.0 +2024-09-18 00:10:49,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=329000.0, ans=0.09899494936611666 +2024-09-18 00:10:50,601 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:10:52,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.75 vs. limit=15.0 +2024-09-18 00:11:21,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=329080.0, ans=0.0 +2024-09-18 00:11:25,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=329080.0, ans=0.125 +2024-09-18 00:11:33,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=329080.0, ans=0.07 +2024-09-18 00:11:33,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=329080.0, ans=0.2 +2024-09-18 00:11:36,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=329120.0, ans=0.0 +2024-09-18 00:11:39,696 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.734e+01 9.110e+01 9.840e+01 2.381e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 00:12:09,147 INFO [train.py:1198] (1/2) Epoch 19, batch 850, loss[loss=0.2698, ctc_loss=0.1629, cr_loss=0.4353, attn_decoder_loss=0.272, over 29728.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1392, cr_loss=0.3824, attn_decoder_loss=0.2512, over 5736758.15 frames. ], batch size: 89, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:12:13,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=329200.0, ans=0.0 +2024-09-18 00:12:57,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=329320.0, ans=0.95 +2024-09-18 00:13:24,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=329400.0, ans=0.025 +2024-09-18 00:13:25,353 INFO [train.py:1198] (1/2) Epoch 19, batch 900, loss[loss=0.2214, ctc_loss=0.1157, cr_loss=0.3189, attn_decoder_loss=0.2261, over 29602.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1399, cr_loss=0.3833, attn_decoder_loss=0.2517, over 5741101.68 frames. ], batch size: 73, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:13:39,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=329440.0, ans=0.1 +2024-09-18 00:13:45,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=329440.0, ans=0.0 +2024-09-18 00:13:50,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.64 vs. limit=10.0 +2024-09-18 00:14:14,238 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.000e+01 8.696e+01 9.115e+01 9.955e+01 6.704e+02, threshold=1.823e+02, percent-clipped=4.0 +2024-09-18 00:14:17,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=329520.0, ans=0.125 +2024-09-18 00:14:30,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=329560.0, ans=0.0 +2024-09-18 00:14:41,597 INFO [train.py:1198] (1/2) Epoch 19, batch 950, loss[loss=0.228, ctc_loss=0.1148, cr_loss=0.3249, attn_decoder_loss=0.2333, over 29494.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1398, cr_loss=0.3828, attn_decoder_loss=0.2517, over 5741427.93 frames. ], batch size: 74, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:14:52,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=329600.0, ans=0.1 +2024-09-18 00:15:03,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=329640.0, ans=0.05 +2024-09-18 00:15:03,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=329640.0, ans=0.0 +2024-09-18 00:15:06,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=329640.0, ans=10.0 +2024-09-18 00:15:10,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=329680.0, ans=0.2 +2024-09-18 00:15:14,606 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.13 vs. limit=6.0 +2024-09-18 00:15:17,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.37 vs. limit=6.0 +2024-09-18 00:15:23,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=329680.0, ans=0.125 +2024-09-18 00:15:26,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=329680.0, ans=0.125 +2024-09-18 00:15:38,268 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:15:59,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=329760.0, ans=0.0 +2024-09-18 00:16:01,894 INFO [train.py:1198] (1/2) Epoch 19, batch 1000, loss[loss=0.242, ctc_loss=0.1318, cr_loss=0.3637, attn_decoder_loss=0.2461, over 29509.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1408, cr_loss=0.3848, attn_decoder_loss=0.2526, over 5736592.52 frames. ], batch size: 77, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:16:08,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=329800.0, ans=0.0 +2024-09-18 00:16:08,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.45 vs. limit=15.0 +2024-09-18 00:16:50,540 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.634e+01 8.872e+01 9.584e+01 1.048e+02 1.890e+02, threshold=1.917e+02, percent-clipped=1.0 +2024-09-18 00:16:52,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=329920.0, ans=0.0 +2024-09-18 00:16:56,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=329920.0, ans=0.125 +2024-09-18 00:17:07,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=329960.0, ans=0.0 +2024-09-18 00:17:14,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=329960.0, ans=0.0 +2024-09-18 00:17:17,662 INFO [train.py:1198] (1/2) Epoch 19, batch 1050, loss[loss=0.2534, ctc_loss=0.138, cr_loss=0.3977, attn_decoder_loss=0.2574, over 29692.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1398, cr_loss=0.3834, attn_decoder_loss=0.2517, over 5742447.52 frames. ], batch size: 85, lr: 5.92e-03, grad_scale: 8.0 +2024-09-18 00:17:30,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=330000.0, ans=0.1 +2024-09-18 00:17:32,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.14 vs. limit=10.0 +2024-09-18 00:18:01,371 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:18:06,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-18 00:18:25,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=330160.0, ans=0.1 +2024-09-18 00:18:27,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=330160.0, ans=0.0 +2024-09-18 00:18:28,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=330160.0, ans=0.125 +2024-09-18 00:18:34,357 INFO [train.py:1198] (1/2) Epoch 19, batch 1100, loss[loss=0.2498, ctc_loss=0.148, cr_loss=0.4084, attn_decoder_loss=0.252, over 29431.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1393, cr_loss=0.3824, attn_decoder_loss=0.2512, over 5754868.47 frames. ], batch size: 78, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:18:46,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=330200.0, ans=0.125 +2024-09-18 00:18:48,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=330240.0, ans=0.125 +2024-09-18 00:18:48,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=330240.0, ans=0.125 +2024-09-18 00:19:12,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=330280.0, ans=0.1 +2024-09-18 00:19:20,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=330320.0, ans=0.125 +2024-09-18 00:19:25,406 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.301e+01 8.385e+01 8.690e+01 9.252e+01 1.167e+02, threshold=1.738e+02, percent-clipped=0.0 +2024-09-18 00:19:28,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=330320.0, ans=0.125 +2024-09-18 00:19:30,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=330320.0, ans=0.125 +2024-09-18 00:19:36,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.67 vs. limit=22.5 +2024-09-18 00:19:55,681 INFO [train.py:1198] (1/2) Epoch 19, batch 1150, loss[loss=0.2489, ctc_loss=0.1422, cr_loss=0.3989, attn_decoder_loss=0.2519, over 29444.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1394, cr_loss=0.3828, attn_decoder_loss=0.2512, over 5753819.08 frames. ], batch size: 78, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:20:09,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=330440.0, ans=0.125 +2024-09-18 00:20:15,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=330440.0, ans=0.2 +2024-09-18 00:20:17,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=330440.0, ans=0.0 +2024-09-18 00:20:18,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=330440.0, ans=0.0 +2024-09-18 00:20:32,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=330480.0, ans=0.0 +2024-09-18 00:20:46,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=330520.0, ans=0.025 +2024-09-18 00:20:50,302 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.76 vs. limit=15.0 +2024-09-18 00:21:11,966 INFO [train.py:1198] (1/2) Epoch 19, batch 1200, loss[loss=0.2574, ctc_loss=0.1382, cr_loss=0.3696, attn_decoder_loss=0.2625, over 29707.00 frames. ], tot_loss[loss=0.2486, ctc_loss=0.1401, cr_loss=0.3839, attn_decoder_loss=0.2521, over 5747631.79 frames. ], batch size: 85, lr: 5.91e-03, grad_scale: 16.0 +2024-09-18 00:21:13,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=330600.0, ans=0.0 +2024-09-18 00:21:14,251 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-18 00:21:18,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=330600.0, ans=0.0 +2024-09-18 00:21:19,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=330600.0, ans=0.025 +2024-09-18 00:21:24,712 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:22:02,443 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.778e+01 9.349e+01 9.833e+01 1.592e+02, threshold=1.870e+02, percent-clipped=0.0 +2024-09-18 00:22:02,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=330720.0, ans=0.0 +2024-09-18 00:22:12,214 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.34 vs. limit=15.0 +2024-09-18 00:22:24,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=330760.0, ans=0.125 +2024-09-18 00:22:24,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=330760.0, ans=0.2 +2024-09-18 00:22:28,392 INFO [train.py:1198] (1/2) Epoch 19, batch 1250, loss[loss=0.2642, ctc_loss=0.1479, cr_loss=0.3922, attn_decoder_loss=0.2684, over 29489.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1402, cr_loss=0.3839, attn_decoder_loss=0.2524, over 5774168.78 frames. ], batch size: 92, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:23:00,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=330880.0, ans=0.2 +2024-09-18 00:23:30,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=330920.0, ans=0.0 +2024-09-18 00:23:30,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=330920.0, ans=0.125 +2024-09-18 00:23:48,831 INFO [train.py:1198] (1/2) Epoch 19, batch 1300, loss[loss=0.2549, ctc_loss=0.1444, cr_loss=0.3909, attn_decoder_loss=0.2585, over 28588.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1401, cr_loss=0.3835, attn_decoder_loss=0.2521, over 5778782.67 frames. ], batch size: 112, lr: 5.91e-03, grad_scale: 8.0 +2024-09-18 00:23:52,295 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:23:53,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=331000.0, ans=0.125 +2024-09-18 00:24:05,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=331040.0, ans=0.0 +2024-09-18 00:24:07,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=331040.0, ans=0.125 +2024-09-18 00:24:18,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=331080.0, ans=0.2 +2024-09-18 00:24:39,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.628e+01 9.058e+01 9.767e+01 1.420e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 00:25:05,554 INFO [train.py:1198] (1/2) Epoch 19, batch 1350, loss[loss=0.2473, ctc_loss=0.1431, cr_loss=0.3742, attn_decoder_loss=0.2506, over 29774.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1395, cr_loss=0.3827, attn_decoder_loss=0.2518, over 5795682.77 frames. ], batch size: 81, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:25:10,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=331200.0, ans=0.125 +2024-09-18 00:25:15,785 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.76 vs. limit=15.0 +2024-09-18 00:25:19,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=331240.0, ans=0.125 +2024-09-18 00:25:19,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=331240.0, ans=0.2 +2024-09-18 00:25:25,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=331240.0, ans=0.125 +2024-09-18 00:25:41,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=331280.0, ans=0.09899494936611666 +2024-09-18 00:25:43,566 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:25:46,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=331280.0, ans=0.025 +2024-09-18 00:25:54,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=331320.0, ans=0.04949747468305833 +2024-09-18 00:26:21,744 INFO [train.py:1198] (1/2) Epoch 19, batch 1400, loss[loss=0.2176, ctc_loss=0.1124, cr_loss=0.3161, attn_decoder_loss=0.2223, over 29568.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1394, cr_loss=0.3822, attn_decoder_loss=0.2518, over 5807442.11 frames. ], batch size: 69, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:26:51,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=12.0 +2024-09-18 00:26:52,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=331480.0, ans=0.125 +2024-09-18 00:26:55,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=331480.0, ans=0.1 +2024-09-18 00:26:58,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=331480.0, ans=0.125 +2024-09-18 00:27:11,761 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.236e+01 8.640e+01 9.143e+01 9.808e+01 1.570e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-18 00:27:17,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=331520.0, ans=0.125 +2024-09-18 00:27:17,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=331520.0, ans=0.0 +2024-09-18 00:27:27,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=331560.0, ans=0.0 +2024-09-18 00:27:33,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=331560.0, ans=0.125 +2024-09-18 00:27:38,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.61 vs. limit=15.0 +2024-09-18 00:27:42,247 INFO [train.py:1198] (1/2) Epoch 19, batch 1450, loss[loss=0.2785, ctc_loss=0.1646, cr_loss=0.4487, attn_decoder_loss=0.2812, over 29400.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1398, cr_loss=0.383, attn_decoder_loss=0.2524, over 5803552.81 frames. ], batch size: 94, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:27:45,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=331600.0, ans=0.125 +2024-09-18 00:28:09,687 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:28:27,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=331720.0, ans=0.0 +2024-09-18 00:28:29,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=331720.0, ans=0.1 +2024-09-18 00:28:57,908 INFO [train.py:1198] (1/2) Epoch 19, batch 1500, loss[loss=0.2544, ctc_loss=0.1333, cr_loss=0.3767, attn_decoder_loss=0.2595, over 29618.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1397, cr_loss=0.3829, attn_decoder_loss=0.2526, over 5803813.66 frames. ], batch size: 86, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:29:19,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=331840.0, ans=0.125 +2024-09-18 00:29:21,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=331840.0, ans=0.0 +2024-09-18 00:29:21,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=331840.0, ans=0.5 +2024-09-18 00:29:41,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=331880.0, ans=0.0 +2024-09-18 00:29:48,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.295e+01 8.706e+01 9.242e+01 9.878e+01 2.158e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 00:29:52,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=331920.0, ans=0.0 +2024-09-18 00:29:54,052 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:30:15,119 INFO [train.py:1198] (1/2) Epoch 19, batch 1550, loss[loss=0.2599, ctc_loss=0.1457, cr_loss=0.4047, attn_decoder_loss=0.2636, over 29468.00 frames. ], tot_loss[loss=0.249, ctc_loss=0.1398, cr_loss=0.3826, attn_decoder_loss=0.2526, over 5780860.20 frames. ], batch size: 90, lr: 5.90e-03, grad_scale: 8.0 +2024-09-18 00:30:38,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=332040.0, ans=0.0 +2024-09-18 00:31:10,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=332120.0, ans=0.1 +2024-09-18 00:31:10,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=332120.0, ans=0.125 +2024-09-18 00:31:32,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-18 00:31:35,033 INFO [train.py:1198] (1/2) Epoch 19, batch 1600, loss[loss=0.2633, ctc_loss=0.1408, cr_loss=0.3826, attn_decoder_loss=0.2684, over 29670.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1395, cr_loss=0.3805, attn_decoder_loss=0.2521, over 5764562.80 frames. ], batch size: 85, lr: 5.90e-03, grad_scale: 16.0 +2024-09-18 00:31:41,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=332200.0, ans=0.125 +2024-09-18 00:31:44,779 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.10 vs. limit=10.0 +2024-09-18 00:31:56,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=332240.0, ans=0.125 +2024-09-18 00:32:16,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=332280.0, ans=0.025 +2024-09-18 00:32:26,528 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.753e+01 8.856e+01 9.608e+01 1.051e+02 2.791e+02, threshold=1.922e+02, percent-clipped=1.0 +2024-09-18 00:32:29,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=332320.0, ans=0.125 +2024-09-18 00:32:35,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=332360.0, ans=0.125 +2024-09-18 00:32:50,578 INFO [train.py:1198] (1/2) Epoch 19, batch 1650, loss[loss=0.2538, ctc_loss=0.1417, cr_loss=0.3936, attn_decoder_loss=0.2575, over 29718.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1395, cr_loss=0.381, attn_decoder_loss=0.2521, over 5758218.97 frames. ], batch size: 89, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:32:54,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.43 vs. limit=15.0 +2024-09-18 00:33:07,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=332440.0, ans=0.0 +2024-09-18 00:33:16,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=332440.0, ans=0.125 +2024-09-18 00:33:18,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=332440.0, ans=0.2 +2024-09-18 00:33:21,603 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.71 vs. limit=15.0 +2024-09-18 00:33:44,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=6.34 vs. limit=15.0 +2024-09-18 00:34:06,071 INFO [train.py:1198] (1/2) Epoch 19, batch 1700, loss[loss=0.2146, ctc_loss=0.1129, cr_loss=0.3286, attn_decoder_loss=0.2186, over 29601.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1394, cr_loss=0.382, attn_decoder_loss=0.252, over 5779724.36 frames. ], batch size: 69, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:34:09,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=332600.0, ans=0.0 +2024-09-18 00:34:17,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=332600.0, ans=0.125 +2024-09-18 00:34:21,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=332640.0, ans=0.125 +2024-09-18 00:34:45,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=332680.0, ans=0.125 +2024-09-18 00:34:51,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=332720.0, ans=0.1 +2024-09-18 00:34:59,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.557e+01 9.059e+01 9.709e+01 1.358e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 00:35:01,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=332720.0, ans=0.125 +2024-09-18 00:35:04,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=332720.0, ans=0.2 +2024-09-18 00:35:07,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=332760.0, ans=0.025 +2024-09-18 00:35:09,621 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.92 vs. limit=15.0 +2024-09-18 00:35:26,339 INFO [train.py:1198] (1/2) Epoch 19, batch 1750, loss[loss=0.2227, ctc_loss=0.1189, cr_loss=0.3438, attn_decoder_loss=0.2266, over 29296.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1391, cr_loss=0.381, attn_decoder_loss=0.2518, over 5788339.36 frames. ], batch size: 67, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:35:53,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.85 vs. limit=15.0 +2024-09-18 00:36:03,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=332880.0, ans=0.025 +2024-09-18 00:36:12,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=332920.0, ans=0.0 +2024-09-18 00:36:41,681 INFO [train.py:1198] (1/2) Epoch 19, batch 1800, loss[loss=0.2504, ctc_loss=0.138, cr_loss=0.3875, attn_decoder_loss=0.2543, over 29699.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1388, cr_loss=0.381, attn_decoder_loss=0.2515, over 5792120.17 frames. ], batch size: 83, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:37:31,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=333120.0, ans=0.025 +2024-09-18 00:37:33,207 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.534e+01 9.002e+01 9.561e+01 2.098e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 00:37:57,823 INFO [train.py:1198] (1/2) Epoch 19, batch 1850, loss[loss=0.26, ctc_loss=0.1424, cr_loss=0.3824, attn_decoder_loss=0.2645, over 29633.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1388, cr_loss=0.3818, attn_decoder_loss=0.2513, over 5798065.09 frames. ], batch size: 86, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:38:31,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=333280.0, ans=0.0 +2024-09-18 00:38:36,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=333280.0, ans=0.125 +2024-09-18 00:38:36,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=333280.0, ans=0.1 +2024-09-18 00:38:43,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=333320.0, ans=0.125 +2024-09-18 00:39:14,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=333400.0, ans=0.0 +2024-09-18 00:39:15,881 INFO [train.py:1198] (1/2) Epoch 19, batch 1900, loss[loss=0.2562, ctc_loss=0.1376, cr_loss=0.3759, attn_decoder_loss=0.2611, over 29721.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1389, cr_loss=0.3828, attn_decoder_loss=0.2515, over 5805742.20 frames. ], batch size: 89, lr: 5.89e-03, grad_scale: 8.0 +2024-09-18 00:39:49,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=333480.0, ans=0.0 +2024-09-18 00:39:52,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=333480.0, ans=0.0 +2024-09-18 00:40:01,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=333480.0, ans=0.1 +2024-09-18 00:40:06,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.09 vs. limit=22.5 +2024-09-18 00:40:10,193 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.673e+01 8.878e+01 9.424e+01 1.001e+02 2.862e+02, threshold=1.885e+02, percent-clipped=2.0 +2024-09-18 00:40:25,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=333560.0, ans=0.0 +2024-09-18 00:40:34,854 INFO [train.py:1198] (1/2) Epoch 19, batch 1950, loss[loss=0.2473, ctc_loss=0.1403, cr_loss=0.4016, attn_decoder_loss=0.2502, over 29432.00 frames. ], tot_loss[loss=0.2491, ctc_loss=0.1401, cr_loss=0.385, attn_decoder_loss=0.2527, over 5820214.39 frames. ], batch size: 78, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:41:38,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=333760.0, ans=0.125 +2024-09-18 00:41:50,436 INFO [train.py:1198] (1/2) Epoch 19, batch 2000, loss[loss=0.2223, ctc_loss=0.1187, cr_loss=0.3514, attn_decoder_loss=0.226, over 29383.00 frames. ], tot_loss[loss=0.2496, ctc_loss=0.1408, cr_loss=0.3855, attn_decoder_loss=0.2531, over 5796936.45 frames. ], batch size: 67, lr: 5.88e-03, grad_scale: 16.0 +2024-09-18 00:41:50,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=333800.0, ans=0.125 +2024-09-18 00:41:55,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=333800.0, ans=0.0 +2024-09-18 00:41:58,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=333800.0, ans=0.125 +2024-09-18 00:42:00,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=333800.0, ans=0.125 +2024-09-18 00:42:17,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.20 vs. limit=15.0 +2024-09-18 00:42:20,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.80 vs. limit=15.0 +2024-09-18 00:42:24,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=333880.0, ans=0.125 +2024-09-18 00:42:34,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=333920.0, ans=0.125 +2024-09-18 00:42:41,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=333920.0, ans=0.125 +2024-09-18 00:42:44,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=333920.0, ans=0.125 +2024-09-18 00:42:45,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.whiten.whitening_limit, batch_count=333920.0, ans=12.0 +2024-09-18 00:42:46,024 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.666e+01 9.128e+01 9.687e+01 2.181e+02, threshold=1.826e+02, percent-clipped=3.0 +2024-09-18 00:42:55,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=333960.0, ans=0.025 +2024-09-18 00:43:08,929 INFO [train.py:1198] (1/2) Epoch 19, batch 2050, loss[loss=0.2146, ctc_loss=0.1127, cr_loss=0.3275, attn_decoder_loss=0.2187, over 29419.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1404, cr_loss=0.3847, attn_decoder_loss=0.2524, over 5788542.30 frames. ], batch size: 70, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:43:20,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=334000.0, ans=0.025 +2024-09-18 00:43:45,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=334080.0, ans=0.07 +2024-09-18 00:43:55,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.32 vs. limit=22.5 +2024-09-18 00:44:01,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=334120.0, ans=0.125 +2024-09-18 00:44:03,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=334120.0, ans=0.125 +2024-09-18 00:44:27,291 INFO [train.py:1198] (1/2) Epoch 19, batch 2100, loss[loss=0.2544, ctc_loss=0.1412, cr_loss=0.3806, attn_decoder_loss=0.2585, over 29756.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1397, cr_loss=0.3833, attn_decoder_loss=0.252, over 5799168.52 frames. ], batch size: 81, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:44:34,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.46 vs. limit=12.0 +2024-09-18 00:44:49,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.99 vs. limit=10.0 +2024-09-18 00:45:08,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=334280.0, ans=0.0 +2024-09-18 00:45:09,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.whiten.whitening_limit, batch_count=334280.0, ans=15.0 +2024-09-18 00:45:13,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.14 vs. limit=15.0 +2024-09-18 00:45:17,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=334320.0, ans=0.0 +2024-09-18 00:45:18,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.52 vs. limit=6.0 +2024-09-18 00:45:20,467 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.379e+01 9.013e+01 9.583e+01 3.257e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-18 00:45:26,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=334360.0, ans=0.2 +2024-09-18 00:45:39,730 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-18 00:45:44,015 INFO [train.py:1198] (1/2) Epoch 19, batch 2150, loss[loss=0.2303, ctc_loss=0.1243, cr_loss=0.3633, attn_decoder_loss=0.234, over 29445.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1389, cr_loss=0.3818, attn_decoder_loss=0.2514, over 5813896.09 frames. ], batch size: 78, lr: 5.88e-03, grad_scale: 8.0 +2024-09-18 00:45:45,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=334400.0, ans=0.125 +2024-09-18 00:45:58,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=334440.0, ans=0.0 +2024-09-18 00:46:01,530 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.64 vs. limit=15.0 +2024-09-18 00:46:14,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=334480.0, ans=0.2 +2024-09-18 00:46:42,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=334520.0, ans=0.1 +2024-09-18 00:46:53,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=334560.0, ans=0.0 +2024-09-18 00:47:02,265 INFO [train.py:1198] (1/2) Epoch 19, batch 2200, loss[loss=0.2509, ctc_loss=0.1433, cr_loss=0.3772, attn_decoder_loss=0.2545, over 29604.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1389, cr_loss=0.3821, attn_decoder_loss=0.2513, over 5811439.74 frames. ], batch size: 86, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:47:40,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.02 vs. limit=15.0 +2024-09-18 00:47:41,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=334680.0, ans=0.0 +2024-09-18 00:47:41,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=12.0 +2024-09-18 00:47:57,758 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.512e+01 9.076e+01 9.778e+01 1.780e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 00:48:14,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=334760.0, ans=0.125 +2024-09-18 00:48:15,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=334760.0, ans=10.0 +2024-09-18 00:48:20,690 INFO [train.py:1198] (1/2) Epoch 19, batch 2250, loss[loss=0.2393, ctc_loss=0.1279, cr_loss=0.3539, attn_decoder_loss=0.2438, over 29705.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1385, cr_loss=0.3815, attn_decoder_loss=0.2511, over 5810661.66 frames. ], batch size: 82, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:48:37,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=334840.0, ans=0.0 +2024-09-18 00:48:43,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=334840.0, ans=0.125 +2024-09-18 00:48:54,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=334880.0, ans=0.0 +2024-09-18 00:49:22,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=334960.0, ans=0.2 +2024-09-18 00:49:22,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=334960.0, ans=0.2 +2024-09-18 00:49:28,008 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.95 vs. limit=15.0 +2024-09-18 00:49:36,424 INFO [train.py:1198] (1/2) Epoch 19, batch 2300, loss[loss=0.2318, ctc_loss=0.1284, cr_loss=0.3744, attn_decoder_loss=0.2349, over 29335.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1379, cr_loss=0.3803, attn_decoder_loss=0.2503, over 5797957.27 frames. ], batch size: 71, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:49:38,937 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.50 vs. limit=15.0 +2024-09-18 00:49:44,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=335000.0, ans=0.0 +2024-09-18 00:49:50,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=335040.0, ans=0.025 +2024-09-18 00:50:03,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=335040.0, ans=0.125 +2024-09-18 00:50:09,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.31 vs. limit=15.0 +2024-09-18 00:50:22,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=335120.0, ans=0.125 +2024-09-18 00:50:28,533 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:50:29,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.590e+01 9.155e+01 9.781e+01 6.273e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-18 00:50:31,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.72 vs. limit=15.0 +2024-09-18 00:50:37,729 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.63 vs. limit=22.5 +2024-09-18 00:50:43,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=335160.0, ans=0.035 +2024-09-18 00:50:50,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=335160.0, ans=0.125 +2024-09-18 00:50:55,509 INFO [train.py:1198] (1/2) Epoch 19, batch 2350, loss[loss=0.2642, ctc_loss=0.1546, cr_loss=0.4255, attn_decoder_loss=0.2669, over 29698.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1379, cr_loss=0.38, attn_decoder_loss=0.2506, over 5803381.22 frames. ], batch size: 83, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:50:55,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=335200.0, ans=0.1 +2024-09-18 00:50:58,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=335200.0, ans=0.125 +2024-09-18 00:51:04,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=335200.0, ans=0.025 +2024-09-18 00:52:13,763 INFO [train.py:1198] (1/2) Epoch 19, batch 2400, loss[loss=0.2429, ctc_loss=0.1437, cr_loss=0.3791, attn_decoder_loss=0.2455, over 29523.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1382, cr_loss=0.3802, attn_decoder_loss=0.2509, over 5807499.92 frames. ], batch size: 76, lr: 5.87e-03, grad_scale: 16.0 +2024-09-18 00:52:21,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=335400.0, ans=0.1 +2024-09-18 00:52:41,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=335440.0, ans=0.1 +2024-09-18 00:53:08,371 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.399e+01 8.603e+01 9.064e+01 9.775e+01 3.534e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-18 00:53:20,947 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 00:53:29,715 INFO [train.py:1198] (1/2) Epoch 19, batch 2450, loss[loss=0.2388, ctc_loss=0.1294, cr_loss=0.3713, attn_decoder_loss=0.2427, over 29714.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1389, cr_loss=0.3814, attn_decoder_loss=0.2517, over 5785628.96 frames. ], batch size: 82, lr: 5.87e-03, grad_scale: 8.0 +2024-09-18 00:53:46,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=335640.0, ans=0.0 +2024-09-18 00:53:55,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=335640.0, ans=0.025 +2024-09-18 00:53:58,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=335680.0, ans=10.0 +2024-09-18 00:54:15,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=335720.0, ans=0.1 +2024-09-18 00:54:35,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=335760.0, ans=0.09899494936611666 +2024-09-18 00:54:41,153 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.24 vs. limit=10.0 +2024-09-18 00:54:47,583 INFO [train.py:1198] (1/2) Epoch 19, batch 2500, loss[loss=0.2587, ctc_loss=0.1435, cr_loss=0.3862, attn_decoder_loss=0.2629, over 29622.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1387, cr_loss=0.3813, attn_decoder_loss=0.2515, over 5796025.71 frames. ], batch size: 86, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:55:01,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=335840.0, ans=0.07 +2024-09-18 00:55:25,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=335880.0, ans=0.1 +2024-09-18 00:55:32,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=335880.0, ans=0.2 +2024-09-18 00:55:40,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=335920.0, ans=0.1 +2024-09-18 00:55:40,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=335920.0, ans=0.125 +2024-09-18 00:55:44,877 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.526e+01 9.010e+01 9.846e+01 5.892e+02, threshold=1.802e+02, percent-clipped=2.0 +2024-09-18 00:55:55,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.73 vs. limit=10.0 +2024-09-18 00:56:03,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=335960.0, ans=0.0 +2024-09-18 00:56:13,788 INFO [train.py:1198] (1/2) Epoch 19, batch 2550, loss[loss=0.2254, ctc_loss=0.1156, cr_loss=0.334, attn_decoder_loss=0.2302, over 29329.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1392, cr_loss=0.382, attn_decoder_loss=0.252, over 5798069.40 frames. ], batch size: 67, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:56:38,884 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.51 vs. limit=15.0 +2024-09-18 00:56:39,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=336040.0, ans=0.125 +2024-09-18 00:56:44,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=336080.0, ans=0.015 +2024-09-18 00:56:45,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=336080.0, ans=0.2 +2024-09-18 00:56:53,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=336080.0, ans=0.0 +2024-09-18 00:57:29,676 INFO [train.py:1198] (1/2) Epoch 19, batch 2600, loss[loss=0.2308, ctc_loss=0.1171, cr_loss=0.3405, attn_decoder_loss=0.2359, over 29449.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1394, cr_loss=0.3825, attn_decoder_loss=0.2525, over 5794928.97 frames. ], batch size: 78, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:57:49,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=336240.0, ans=0.125 +2024-09-18 00:57:53,144 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.86 vs. limit=15.0 +2024-09-18 00:58:26,745 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.566e+01 8.963e+01 9.636e+01 1.354e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-18 00:58:47,578 INFO [train.py:1198] (1/2) Epoch 19, batch 2650, loss[loss=0.2722, ctc_loss=0.1641, cr_loss=0.4342, attn_decoder_loss=0.2745, over 29259.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1394, cr_loss=0.3829, attn_decoder_loss=0.2526, over 5801340.32 frames. ], batch size: 100, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 00:59:10,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=336440.0, ans=0.2 +2024-09-18 00:59:17,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=336440.0, ans=0.1 +2024-09-18 00:59:23,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=336480.0, ans=0.125 +2024-09-18 00:59:37,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=336520.0, ans=0.125 +2024-09-18 00:59:46,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=336520.0, ans=0.09899494936611666 +2024-09-18 00:59:49,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=336560.0, ans=0.125 +2024-09-18 01:00:03,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.30 vs. limit=15.0 +2024-09-18 01:00:05,874 INFO [train.py:1198] (1/2) Epoch 19, batch 2700, loss[loss=0.2473, ctc_loss=0.1297, cr_loss=0.3535, attn_decoder_loss=0.2526, over 29501.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1396, cr_loss=0.3831, attn_decoder_loss=0.2529, over 5797444.72 frames. ], batch size: 87, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 01:00:25,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=336640.0, ans=0.2 +2024-09-18 01:00:32,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.whiten.whitening_limit, batch_count=336640.0, ans=12.0 +2024-09-18 01:00:36,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=336680.0, ans=0.0 +2024-09-18 01:00:50,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.whiten.whitening_limit, batch_count=336720.0, ans=12.0 +2024-09-18 01:00:56,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=336720.0, ans=0.95 +2024-09-18 01:01:00,336 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.475e+01 9.059e+01 9.583e+01 2.142e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-18 01:01:00,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=336720.0, ans=0.025 +2024-09-18 01:01:22,318 INFO [train.py:1198] (1/2) Epoch 19, batch 2750, loss[loss=0.2368, ctc_loss=0.1326, cr_loss=0.3679, attn_decoder_loss=0.2402, over 29493.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1388, cr_loss=0.3815, attn_decoder_loss=0.2517, over 5794936.44 frames. ], batch size: 75, lr: 5.86e-03, grad_scale: 8.0 +2024-09-18 01:01:22,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=336800.0, ans=0.1 +2024-09-18 01:01:37,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=336840.0, ans=0.125 +2024-09-18 01:01:45,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=336840.0, ans=0.125 +2024-09-18 01:01:54,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.21 vs. limit=22.5 +2024-09-18 01:02:04,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=336880.0, ans=0.125 +2024-09-18 01:02:19,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=336920.0, ans=0.125 +2024-09-18 01:02:28,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=336960.0, ans=0.0 +2024-09-18 01:02:39,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=337000.0, ans=0.0 +2024-09-18 01:02:40,759 INFO [train.py:1198] (1/2) Epoch 19, batch 2800, loss[loss=0.2718, ctc_loss=0.1712, cr_loss=0.3897, attn_decoder_loss=0.2744, over 20130.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1391, cr_loss=0.3817, attn_decoder_loss=0.2519, over 5775057.38 frames. ], batch size: 209, lr: 5.85e-03, grad_scale: 16.0 +2024-09-18 01:02:45,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=337000.0, ans=0.0 +2024-09-18 01:03:01,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=337040.0, ans=15.0 +2024-09-18 01:03:21,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.86 vs. limit=15.0 +2024-09-18 01:03:33,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.55 vs. limit=15.0 +2024-09-18 01:03:38,940 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.828e+01 9.014e+01 9.335e+01 1.020e+02 1.618e+02, threshold=1.867e+02, percent-clipped=0.0 +2024-09-18 01:03:47,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.24 vs. limit=22.5 +2024-09-18 01:03:58,621 INFO [train.py:1198] (1/2) Epoch 19, batch 2850, loss[loss=0.2419, ctc_loss=0.1344, cr_loss=0.3889, attn_decoder_loss=0.2452, over 29513.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1397, cr_loss=0.3826, attn_decoder_loss=0.2523, over 5761153.06 frames. ], batch size: 77, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:04:05,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=337200.0, ans=0.125 +2024-09-18 01:04:08,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=337200.0, ans=0.0 +2024-09-18 01:04:09,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=337200.0, ans=0.2 +2024-09-18 01:04:28,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.79 vs. limit=22.5 +2024-09-18 01:04:35,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.21 vs. limit=15.0 +2024-09-18 01:04:44,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=337320.0, ans=0.0 +2024-09-18 01:04:54,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=337320.0, ans=0.5 +2024-09-18 01:04:57,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.95 vs. limit=15.0 +2024-09-18 01:05:15,150 INFO [train.py:1198] (1/2) Epoch 19, batch 2900, loss[loss=0.2387, ctc_loss=0.1416, cr_loss=0.3771, attn_decoder_loss=0.2411, over 29417.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1407, cr_loss=0.3851, attn_decoder_loss=0.2535, over 5787167.01 frames. ], batch size: 79, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:05:17,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.55 vs. limit=15.0 +2024-09-18 01:05:24,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=337400.0, ans=0.0 +2024-09-18 01:05:41,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=337440.0, ans=0.125 +2024-09-18 01:06:06,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.97 vs. limit=22.5 +2024-09-18 01:06:13,397 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.650e+01 9.061e+01 9.798e+01 5.022e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-18 01:06:14,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.50 vs. limit=15.0 +2024-09-18 01:06:33,629 INFO [train.py:1198] (1/2) Epoch 19, batch 2950, loss[loss=0.2388, ctc_loss=0.1339, cr_loss=0.3906, attn_decoder_loss=0.2418, over 29521.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1399, cr_loss=0.3835, attn_decoder_loss=0.2523, over 5780823.44 frames. ], batch size: 75, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:06:51,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.53 vs. limit=12.0 +2024-09-18 01:07:09,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.89 vs. limit=22.5 +2024-09-18 01:07:17,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=337680.0, ans=0.0 +2024-09-18 01:07:20,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=337720.0, ans=0.125 +2024-09-18 01:07:26,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=337720.0, ans=0.0 +2024-09-18 01:07:52,296 INFO [train.py:1198] (1/2) Epoch 19, batch 3000, loss[loss=0.2484, ctc_loss=0.1338, cr_loss=0.3785, attn_decoder_loss=0.2527, over 29731.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1397, cr_loss=0.3828, attn_decoder_loss=0.252, over 5782818.58 frames. ], batch size: 81, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:07:52,297 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 01:08:10,717 INFO [train.py:1230] (1/2) Epoch 19, validation: loss=0.2115, ctc_loss=0.0393, cr_loss=5.039e-15, attn_decoder_loss=0.2306, over 944034.00 frames. +2024-09-18 01:08:10,717 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 01:08:14,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=337800.0, ans=0.1 +2024-09-18 01:08:16,033 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:08:20,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=337800.0, ans=0.04949747468305833 +2024-09-18 01:08:21,194 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=15.45 vs. limit=15.0 +2024-09-18 01:08:22,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.81 vs. limit=15.0 +2024-09-18 01:08:42,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=337880.0, ans=0.2 +2024-09-18 01:09:07,057 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.545e+01 8.664e+01 9.190e+01 9.808e+01 2.398e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-18 01:09:26,832 INFO [train.py:1198] (1/2) Epoch 19, batch 3050, loss[loss=0.2215, ctc_loss=0.1115, cr_loss=0.3124, attn_decoder_loss=0.2267, over 29533.00 frames. ], tot_loss[loss=0.2493, ctc_loss=0.1406, cr_loss=0.3842, attn_decoder_loss=0.2528, over 5775729.66 frames. ], batch size: 76, lr: 5.85e-03, grad_scale: 8.0 +2024-09-18 01:09:39,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=338000.0, ans=0.0 +2024-09-18 01:09:39,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=338000.0, ans=0.0 +2024-09-18 01:09:41,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=338040.0, ans=0.95 +2024-09-18 01:09:47,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=338040.0, ans=0.2 +2024-09-18 01:09:57,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.41 vs. limit=15.0 +2024-09-18 01:10:16,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.01 vs. limit=6.0 +2024-09-18 01:10:20,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=338120.0, ans=0.2 +2024-09-18 01:10:31,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=338160.0, ans=0.0 +2024-09-18 01:10:44,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=338200.0, ans=0.125 +2024-09-18 01:10:45,194 INFO [train.py:1198] (1/2) Epoch 19, batch 3100, loss[loss=0.2683, ctc_loss=0.147, cr_loss=0.4035, attn_decoder_loss=0.2728, over 29222.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1399, cr_loss=0.3832, attn_decoder_loss=0.2524, over 5775825.39 frames. ], batch size: 100, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:10:45,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=338200.0, ans=0.125 +2024-09-18 01:10:55,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=338200.0, ans=0.2 +2024-09-18 01:11:07,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=338240.0, ans=0.1 +2024-09-18 01:11:24,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=338280.0, ans=0.1 +2024-09-18 01:11:39,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=338320.0, ans=0.0 +2024-09-18 01:11:42,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=338320.0, ans=0.0 +2024-09-18 01:11:43,976 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.533e+01 9.118e+01 9.870e+01 1.992e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-18 01:12:04,279 INFO [train.py:1198] (1/2) Epoch 19, batch 3150, loss[loss=0.2636, ctc_loss=0.1458, cr_loss=0.4057, attn_decoder_loss=0.2676, over 28798.00 frames. ], tot_loss[loss=0.2488, ctc_loss=0.1397, cr_loss=0.3828, attn_decoder_loss=0.2524, over 5783642.04 frames. ], batch size: 104, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:12:22,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=338440.0, ans=0.2 +2024-09-18 01:12:50,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=338520.0, ans=0.0 +2024-09-18 01:13:10,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.81 vs. limit=15.0 +2024-09-18 01:13:20,347 INFO [train.py:1198] (1/2) Epoch 19, batch 3200, loss[loss=0.2388, ctc_loss=0.1257, cr_loss=0.3546, attn_decoder_loss=0.2435, over 29408.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.139, cr_loss=0.3815, attn_decoder_loss=0.2517, over 5794209.70 frames. ], batch size: 79, lr: 5.84e-03, grad_scale: 16.0 +2024-09-18 01:13:23,584 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:13:43,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.34 vs. limit=22.5 +2024-09-18 01:13:50,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=338640.0, ans=0.125 +2024-09-18 01:13:50,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=338640.0, ans=0.2 +2024-09-18 01:14:02,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.90 vs. limit=15.0 +2024-09-18 01:14:20,071 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.580e+01 9.076e+01 9.687e+01 2.351e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-18 01:14:20,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_ff2.min_abs, batch_count=338720.0, ans=0.1 +2024-09-18 01:14:34,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=338760.0, ans=0.0 +2024-09-18 01:14:35,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=338760.0, ans=0.125 +2024-09-18 01:14:38,536 INFO [train.py:1198] (1/2) Epoch 19, batch 3250, loss[loss=0.2639, ctc_loss=0.1492, cr_loss=0.3951, attn_decoder_loss=0.2678, over 29716.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1394, cr_loss=0.382, attn_decoder_loss=0.2522, over 5800170.48 frames. ], batch size: 84, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:14:56,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=338840.0, ans=0.125 +2024-09-18 01:14:56,109 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:15:14,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=338880.0, ans=0.125 +2024-09-18 01:15:19,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=338880.0, ans=0.125 +2024-09-18 01:15:23,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=338880.0, ans=22.5 +2024-09-18 01:15:31,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.27 vs. limit=15.0 +2024-09-18 01:15:38,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.36 vs. limit=15.0 +2024-09-18 01:15:56,362 INFO [train.py:1198] (1/2) Epoch 19, batch 3300, loss[loss=0.2551, ctc_loss=0.1284, cr_loss=0.3701, attn_decoder_loss=0.261, over 28434.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.138, cr_loss=0.3802, attn_decoder_loss=0.2508, over 5797271.16 frames. ], batch size: 111, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:16:12,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=339040.0, ans=0.125 +2024-09-18 01:16:18,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=339040.0, ans=0.125 +2024-09-18 01:16:22,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=339040.0, ans=0.2 +2024-09-18 01:16:27,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=339080.0, ans=0.2 +2024-09-18 01:16:27,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=339080.0, ans=0.0 +2024-09-18 01:16:43,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=339120.0, ans=0.0 +2024-09-18 01:16:53,878 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.663e+01 9.126e+01 9.763e+01 2.623e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 01:16:57,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=339160.0, ans=0.0 +2024-09-18 01:17:12,575 INFO [train.py:1198] (1/2) Epoch 19, batch 3350, loss[loss=0.259, ctc_loss=0.1512, cr_loss=0.4029, attn_decoder_loss=0.262, over 28818.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1386, cr_loss=0.3806, attn_decoder_loss=0.2514, over 5775446.06 frames. ], batch size: 104, lr: 5.84e-03, grad_scale: 8.0 +2024-09-18 01:17:20,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=339200.0, ans=0.125 +2024-09-18 01:17:53,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=339280.0, ans=0.2 +2024-09-18 01:18:11,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=339320.0, ans=0.2 +2024-09-18 01:18:18,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=339360.0, ans=0.125 +2024-09-18 01:18:30,834 INFO [train.py:1198] (1/2) Epoch 19, batch 3400, loss[loss=0.2297, ctc_loss=0.1345, cr_loss=0.3683, attn_decoder_loss=0.2321, over 29310.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1394, cr_loss=0.382, attn_decoder_loss=0.2517, over 5768198.97 frames. ], batch size: 67, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:18:34,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=339400.0, ans=0.09899494936611666 +2024-09-18 01:18:34,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=339400.0, ans=0.0 +2024-09-18 01:19:22,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.86 vs. limit=22.5 +2024-09-18 01:19:30,514 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.395e+01 8.511e+01 9.195e+01 9.878e+01 2.681e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 01:19:35,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=339560.0, ans=0.125 +2024-09-18 01:19:37,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=339560.0, ans=0.125 +2024-09-18 01:19:42,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=339560.0, ans=0.2 +2024-09-18 01:19:48,773 INFO [train.py:1198] (1/2) Epoch 19, batch 3450, loss[loss=0.2535, ctc_loss=0.1373, cr_loss=0.3825, attn_decoder_loss=0.2579, over 28342.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1394, cr_loss=0.3827, attn_decoder_loss=0.2519, over 5775602.69 frames. ], batch size: 111, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:19:49,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=339600.0, ans=0.125 +2024-09-18 01:20:26,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=339680.0, ans=0.2 +2024-09-18 01:20:30,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=339680.0, ans=0.025 +2024-09-18 01:20:39,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=339720.0, ans=0.95 +2024-09-18 01:20:45,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=339720.0, ans=0.1 +2024-09-18 01:21:04,604 INFO [train.py:1198] (1/2) Epoch 19, batch 3500, loss[loss=0.2259, ctc_loss=0.1243, cr_loss=0.3384, attn_decoder_loss=0.2296, over 29309.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1395, cr_loss=0.3822, attn_decoder_loss=0.2516, over 5777124.38 frames. ], batch size: 71, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:21:34,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=339840.0, ans=0.0 +2024-09-18 01:21:48,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.42 vs. limit=22.5 +2024-09-18 01:21:52,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=339920.0, ans=0.025 +2024-09-18 01:21:59,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=339920.0, ans=0.125 +2024-09-18 01:22:04,058 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.228e+01 8.478e+01 8.934e+01 9.584e+01 2.565e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 01:22:10,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=339960.0, ans=0.07 +2024-09-18 01:22:13,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=339960.0, ans=0.125 +2024-09-18 01:22:13,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=339960.0, ans=0.05 +2024-09-18 01:22:22,258 INFO [train.py:1198] (1/2) Epoch 19, batch 3550, loss[loss=0.2492, ctc_loss=0.1269, cr_loss=0.371, attn_decoder_loss=0.2546, over 29737.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1393, cr_loss=0.3823, attn_decoder_loss=0.2516, over 5783482.58 frames. ], batch size: 89, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:22:27,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=340000.0, ans=0.125 +2024-09-18 01:22:28,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=340000.0, ans=0.125 +2024-09-18 01:22:52,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=340080.0, ans=0.0 +2024-09-18 01:23:16,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=340120.0, ans=0.125 +2024-09-18 01:23:16,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=340120.0, ans=0.0 +2024-09-18 01:23:19,344 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:23:29,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=340160.0, ans=0.125 +2024-09-18 01:23:34,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=340160.0, ans=0.125 +2024-09-18 01:23:37,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=340200.0, ans=0.125 +2024-09-18 01:23:38,727 INFO [train.py:1198] (1/2) Epoch 19, batch 3600, loss[loss=0.2378, ctc_loss=0.1299, cr_loss=0.378, attn_decoder_loss=0.2413, over 29525.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1393, cr_loss=0.3828, attn_decoder_loss=0.2517, over 5792308.23 frames. ], batch size: 77, lr: 5.83e-03, grad_scale: 16.0 +2024-09-18 01:23:42,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=340200.0, ans=0.1 +2024-09-18 01:23:59,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.36 vs. limit=15.0 +2024-09-18 01:24:00,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=340240.0, ans=0.125 +2024-09-18 01:24:21,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=340280.0, ans=0.0 +2024-09-18 01:24:37,093 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.610e+01 9.225e+01 9.925e+01 8.683e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-18 01:24:53,597 INFO [train.py:1198] (1/2) Epoch 19, batch 3650, loss[loss=0.2554, ctc_loss=0.1373, cr_loss=0.3723, attn_decoder_loss=0.2602, over 29502.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1386, cr_loss=0.3818, attn_decoder_loss=0.2511, over 5795045.14 frames. ], batch size: 90, lr: 5.83e-03, grad_scale: 8.0 +2024-09-18 01:25:26,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=340480.0, ans=0.125 +2024-09-18 01:25:27,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.33 vs. limit=15.0 +2024-09-18 01:25:49,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=340520.0, ans=0.0 +2024-09-18 01:25:55,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=340560.0, ans=0.025 +2024-09-18 01:25:56,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.71 vs. limit=15.0 +2024-09-18 01:25:57,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=16.60 vs. limit=15.0 +2024-09-18 01:26:03,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=340560.0, ans=0.125 +2024-09-18 01:26:03,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=340560.0, ans=10.0 +2024-09-18 01:26:08,874 INFO [train.py:1198] (1/2) Epoch 19, batch 3700, loss[loss=0.2625, ctc_loss=0.1442, cr_loss=0.3835, attn_decoder_loss=0.2671, over 29687.00 frames. ], tot_loss[loss=0.2475, ctc_loss=0.1385, cr_loss=0.3822, attn_decoder_loss=0.2511, over 5804802.43 frames. ], batch size: 84, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:27:03,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=340720.0, ans=0.125 +2024-09-18 01:27:07,335 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.588e+01 9.238e+01 9.671e+01 4.711e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-18 01:27:15,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=340760.0, ans=10.0 +2024-09-18 01:27:24,446 INFO [train.py:1198] (1/2) Epoch 19, batch 3750, loss[loss=0.2166, ctc_loss=0.1188, cr_loss=0.3339, attn_decoder_loss=0.22, over 29319.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1382, cr_loss=0.3812, attn_decoder_loss=0.251, over 5808480.56 frames. ], batch size: 67, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:27:38,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=340800.0, ans=0.0 +2024-09-18 01:27:52,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=340840.0, ans=0.125 +2024-09-18 01:27:55,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.66 vs. limit=22.5 +2024-09-18 01:28:23,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=340920.0, ans=0.125 +2024-09-18 01:28:39,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=341000.0, ans=0.0 +2024-09-18 01:28:41,212 INFO [train.py:1198] (1/2) Epoch 19, batch 3800, loss[loss=0.2571, ctc_loss=0.137, cr_loss=0.3705, attn_decoder_loss=0.2622, over 29649.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1382, cr_loss=0.3807, attn_decoder_loss=0.2507, over 5798958.90 frames. ], batch size: 86, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:28:52,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=341000.0, ans=0.0 +2024-09-18 01:28:53,699 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:28:54,273 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.04 vs. limit=15.0 +2024-09-18 01:29:02,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=341040.0, ans=0.2 +2024-09-18 01:29:35,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=341120.0, ans=0.125 +2024-09-18 01:29:36,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=341120.0, ans=0.125 +2024-09-18 01:29:39,592 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.913e+01 9.389e+01 9.954e+01 1.370e+02, threshold=1.878e+02, percent-clipped=0.0 +2024-09-18 01:29:56,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=341200.0, ans=0.0 +2024-09-18 01:29:57,763 INFO [train.py:1198] (1/2) Epoch 19, batch 3850, loss[loss=0.2558, ctc_loss=0.1431, cr_loss=0.3881, attn_decoder_loss=0.2597, over 29276.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1382, cr_loss=0.3808, attn_decoder_loss=0.2507, over 5812688.75 frames. ], batch size: 100, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:30:01,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=341200.0, ans=0.1 +2024-09-18 01:30:26,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=341280.0, ans=0.0 +2024-09-18 01:31:08,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=341360.0, ans=0.125 +2024-09-18 01:31:12,348 INFO [train.py:1198] (1/2) Epoch 19, batch 3900, loss[loss=0.2537, ctc_loss=0.1343, cr_loss=0.3714, attn_decoder_loss=0.2587, over 29644.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.138, cr_loss=0.3808, attn_decoder_loss=0.251, over 5817151.67 frames. ], batch size: 86, lr: 5.82e-03, grad_scale: 8.0 +2024-09-18 01:31:17,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.36 vs. limit=15.0 +2024-09-18 01:31:18,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.08 vs. limit=12.0 +2024-09-18 01:31:30,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.74 vs. limit=12.0 +2024-09-18 01:31:31,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=341440.0, ans=0.0 +2024-09-18 01:31:45,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=341480.0, ans=0.0 +2024-09-18 01:31:48,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.65 vs. limit=22.5 +2024-09-18 01:31:58,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=341520.0, ans=0.0 +2024-09-18 01:32:02,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.32 vs. limit=15.0 +2024-09-18 01:32:04,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=341520.0, ans=0.125 +2024-09-18 01:32:10,245 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.279e+01 8.574e+01 8.925e+01 9.348e+01 1.659e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 01:32:22,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=341560.0, ans=0.1 +2024-09-18 01:32:27,242 INFO [train.py:1198] (1/2) Epoch 19, batch 3950, loss[loss=0.2771, ctc_loss=0.1696, cr_loss=0.4257, attn_decoder_loss=0.2796, over 29486.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1382, cr_loss=0.3814, attn_decoder_loss=0.2514, over 5836576.58 frames. ], batch size: 97, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:32:42,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=341640.0, ans=0.95 +2024-09-18 01:32:45,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=341640.0, ans=0.125 +2024-09-18 01:33:21,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=341720.0, ans=0.0 +2024-09-18 01:33:30,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=341760.0, ans=0.125 +2024-09-18 01:33:32,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=341760.0, ans=0.0 +2024-09-18 01:33:42,796 INFO [train.py:1198] (1/2) Epoch 19, batch 4000, loss[loss=0.2234, ctc_loss=0.1145, cr_loss=0.3299, attn_decoder_loss=0.2282, over 29523.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1387, cr_loss=0.3814, attn_decoder_loss=0.2515, over 5814087.24 frames. ], batch size: 74, lr: 5.81e-03, grad_scale: 16.0 +2024-09-18 01:33:42,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=341800.0, ans=0.125 +2024-09-18 01:33:53,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=341800.0, ans=0.125 +2024-09-18 01:34:08,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.52 vs. limit=15.0 +2024-09-18 01:34:33,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=341920.0, ans=0.125 +2024-09-18 01:34:37,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=341920.0, ans=0.1 +2024-09-18 01:34:41,832 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.874e+01 9.386e+01 1.032e+02 2.674e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-18 01:34:57,881 INFO [train.py:1198] (1/2) Epoch 19, batch 4050, loss[loss=0.2753, ctc_loss=0.1862, cr_loss=0.3955, attn_decoder_loss=0.2764, over 19136.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.139, cr_loss=0.3815, attn_decoder_loss=0.2514, over 5795870.85 frames. ], batch size: 209, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:34:59,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=342000.0, ans=0.035 +2024-09-18 01:35:07,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=342000.0, ans=0.125 +2024-09-18 01:35:37,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=342080.0, ans=0.0 +2024-09-18 01:36:02,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=342160.0, ans=0.125 +2024-09-18 01:36:11,637 INFO [train.py:1198] (1/2) Epoch 19, batch 4100, loss[loss=0.2695, ctc_loss=0.1603, cr_loss=0.4138, attn_decoder_loss=0.2724, over 29521.00 frames. ], tot_loss[loss=0.248, ctc_loss=0.1393, cr_loss=0.3821, attn_decoder_loss=0.2516, over 5791968.96 frames. ], batch size: 90, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:36:35,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.23 vs. limit=15.0 +2024-09-18 01:36:55,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.87 vs. limit=10.0 +2024-09-18 01:37:11,583 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 8.625e+01 9.215e+01 9.767e+01 2.484e+02, threshold=1.843e+02, percent-clipped=3.0 +2024-09-18 01:37:16,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=342360.0, ans=0.0 +2024-09-18 01:37:27,168 INFO [train.py:1198] (1/2) Epoch 19, batch 4150, loss[loss=0.2478, ctc_loss=0.1504, cr_loss=0.3944, attn_decoder_loss=0.2499, over 29474.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1392, cr_loss=0.3817, attn_decoder_loss=0.2514, over 5797309.39 frames. ], batch size: 77, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:37:34,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=342400.0, ans=0.125 +2024-09-18 01:37:49,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=342440.0, ans=0.1 +2024-09-18 01:38:18,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.95 vs. limit=15.0 +2024-09-18 01:38:21,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=342520.0, ans=0.0 +2024-09-18 01:38:32,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=342560.0, ans=0.125 +2024-09-18 01:38:40,956 INFO [train.py:1198] (1/2) Epoch 19, batch 4200, loss[loss=0.2681, ctc_loss=0.1609, cr_loss=0.4158, attn_decoder_loss=0.2708, over 29518.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1392, cr_loss=0.3818, attn_decoder_loss=0.2518, over 5799788.25 frames. ], batch size: 90, lr: 5.81e-03, grad_scale: 8.0 +2024-09-18 01:38:41,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=342600.0, ans=0.2 +2024-09-18 01:39:06,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=342640.0, ans=0.125 +2024-09-18 01:39:12,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.63 vs. limit=6.0 +2024-09-18 01:39:16,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.10 vs. limit=15.0 +2024-09-18 01:39:35,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=342720.0, ans=0.04949747468305833 +2024-09-18 01:39:41,096 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.293e+01 8.502e+01 9.115e+01 9.695e+01 2.005e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 01:39:55,935 INFO [train.py:1198] (1/2) Epoch 19, batch 4250, loss[loss=0.2266, ctc_loss=0.1199, cr_loss=0.3393, attn_decoder_loss=0.2309, over 29513.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1392, cr_loss=0.3815, attn_decoder_loss=0.2519, over 5805227.55 frames. ], batch size: 74, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:39:59,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=342800.0, ans=0.0 +2024-09-18 01:39:59,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=342800.0, ans=0.0 +2024-09-18 01:40:08,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=342800.0, ans=0.1 +2024-09-18 01:40:13,051 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.13 vs. limit=6.0 +2024-09-18 01:40:41,486 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.87 vs. limit=10.0 +2024-09-18 01:40:42,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=342920.0, ans=0.125 +2024-09-18 01:40:45,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=342920.0, ans=0.125 +2024-09-18 01:41:02,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=342960.0, ans=0.125 +2024-09-18 01:41:09,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=343000.0, ans=0.125 +2024-09-18 01:41:11,123 INFO [train.py:1198] (1/2) Epoch 19, batch 4300, loss[loss=0.26, ctc_loss=0.1423, cr_loss=0.385, attn_decoder_loss=0.2645, over 29515.00 frames. ], tot_loss[loss=0.2483, ctc_loss=0.1388, cr_loss=0.3808, attn_decoder_loss=0.252, over 5794162.45 frames. ], batch size: 87, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:41:14,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=343000.0, ans=0.2 +2024-09-18 01:41:28,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.98 vs. limit=15.0 +2024-09-18 01:41:32,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=343040.0, ans=0.125 +2024-09-18 01:41:44,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=343080.0, ans=0.0 +2024-09-18 01:41:45,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=343080.0, ans=0.2 +2024-09-18 01:41:47,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=343080.0, ans=0.0 +2024-09-18 01:41:59,087 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:42:02,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=343120.0, ans=0.125 +2024-09-18 01:42:10,633 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.890e+01 9.360e+01 1.027e+02 1.828e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-18 01:42:11,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.61 vs. limit=10.0 +2024-09-18 01:42:15,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=343160.0, ans=0.125 +2024-09-18 01:42:20,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=343160.0, ans=0.1 +2024-09-18 01:42:27,033 INFO [train.py:1198] (1/2) Epoch 19, batch 4350, loss[loss=0.2622, ctc_loss=0.1494, cr_loss=0.3903, attn_decoder_loss=0.2661, over 29502.00 frames. ], tot_loss[loss=0.2518, ctc_loss=0.1417, cr_loss=0.3866, attn_decoder_loss=0.2555, over 5796318.74 frames. ], batch size: 97, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:42:39,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=343200.0, ans=0.0 +2024-09-18 01:43:09,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=343320.0, ans=0.025 +2024-09-18 01:43:17,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.18 vs. limit=15.0 +2024-09-18 01:43:26,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.80 vs. limit=15.0 +2024-09-18 01:43:30,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=343360.0, ans=0.125 +2024-09-18 01:43:41,028 INFO [train.py:1198] (1/2) Epoch 19, batch 4400, loss[loss=0.275, ctc_loss=0.1632, cr_loss=0.4437, attn_decoder_loss=0.2776, over 27302.00 frames. ], tot_loss[loss=0.2539, ctc_loss=0.1433, cr_loss=0.3895, attn_decoder_loss=0.2576, over 5765858.35 frames. ], batch size: 125, lr: 5.80e-03, grad_scale: 16.0 +2024-09-18 01:43:54,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=343440.0, ans=0.125 +2024-09-18 01:44:20,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=343480.0, ans=0.125 +2024-09-18 01:44:21,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=6.71 vs. limit=12.0 +2024-09-18 01:44:26,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=343520.0, ans=0.025 +2024-09-18 01:44:40,313 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:44:41,226 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.011e+01 9.170e+01 9.647e+01 1.019e+02 1.899e+02, threshold=1.929e+02, percent-clipped=1.0 +2024-09-18 01:44:52,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=343560.0, ans=0.125 +2024-09-18 01:44:54,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.71 vs. limit=15.0 +2024-09-18 01:44:55,327 INFO [train.py:1198] (1/2) Epoch 19, batch 4450, loss[loss=0.2793, ctc_loss=0.1866, cr_loss=0.4167, attn_decoder_loss=0.2803, over 20113.00 frames. ], tot_loss[loss=0.2566, ctc_loss=0.1476, cr_loss=0.3944, attn_decoder_loss=0.26, over 5573102.29 frames. ], batch size: 210, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:44:55,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=343600.0, ans=0.2 +2024-09-18 01:44:59,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=14.40 vs. limit=15.0 +2024-09-18 01:45:47,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=343720.0, ans=0.0 +2024-09-18 01:45:49,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=343720.0, ans=0.125 +2024-09-18 01:46:07,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=343760.0, ans=0.0 +2024-09-18 01:46:11,293 INFO [train.py:1198] (1/2) Epoch 19, batch 4500, loss[loss=0.2662, ctc_loss=0.1598, cr_loss=0.4003, attn_decoder_loss=0.2691, over 21170.00 frames. ], tot_loss[loss=0.2595, ctc_loss=0.1527, cr_loss=0.3973, attn_decoder_loss=0.2625, over 5234558.80 frames. ], batch size: 209, lr: 5.80e-03, grad_scale: 8.0 +2024-09-18 01:46:13,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=343800.0, ans=0.125 +2024-09-18 01:46:36,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.68 vs. limit=6.0 +2024-09-18 01:46:41,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=343880.0, ans=0.0 +2024-09-18 01:47:35,799 INFO [train.py:1198] (1/2) Epoch 20, batch 0, loss[loss=0.2281, ctc_loss=0.123, cr_loss=0.3448, attn_decoder_loss=0.2321, over 29589.00 frames. ], tot_loss[loss=0.2281, ctc_loss=0.123, cr_loss=0.3448, attn_decoder_loss=0.2321, over 29589.00 frames. ], batch size: 73, lr: 5.65e-03, grad_scale: 16.0 +2024-09-18 01:47:35,800 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 01:47:41,226 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.7596, 3.6028, 3.3981, 3.7046], device='cuda:1') +2024-09-18 01:47:53,061 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.8772, 4.5716, 4.4670, 4.3070], device='cuda:1') +2024-09-18 01:47:54,256 INFO [train.py:1230] (1/2) Epoch 20, validation: loss=0.2118, ctc_loss=0.0395, cr_loss=4.878e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-18 01:47:54,256 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 01:48:08,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=343900.0, ans=0.125 +2024-09-18 01:48:23,223 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.011e+01 1.094e+02 1.165e+02 1.257e+02 3.397e+02, threshold=2.331e+02, percent-clipped=2.0 +2024-09-18 01:48:31,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=343980.0, ans=0.125 +2024-09-18 01:48:36,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=343980.0, ans=0.09899494936611666 +2024-09-18 01:48:40,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=344020.0, ans=0.2 +2024-09-18 01:48:55,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=344060.0, ans=0.0 +2024-09-18 01:48:59,085 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 01:49:12,438 INFO [train.py:1198] (1/2) Epoch 20, batch 50, loss[loss=0.2262, ctc_loss=0.1192, cr_loss=0.3518, attn_decoder_loss=0.2302, over 29457.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1399, cr_loss=0.3814, attn_decoder_loss=0.252, over 1268105.74 frames. ], batch size: 70, lr: 5.64e-03, grad_scale: 4.0 +2024-09-18 01:49:37,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=344140.0, ans=0.0 +2024-09-18 01:49:47,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=344180.0, ans=0.1 +2024-09-18 01:49:57,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.33 vs. limit=15.0 +2024-09-18 01:50:12,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=344260.0, ans=0.5 +2024-09-18 01:50:17,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.77 vs. limit=15.0 +2024-09-18 01:50:22,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=344260.0, ans=0.2 +2024-09-18 01:50:23,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.60 vs. limit=15.0 +2024-09-18 01:50:28,288 INFO [train.py:1198] (1/2) Epoch 20, batch 100, loss[loss=0.2408, ctc_loss=0.1294, cr_loss=0.343, attn_decoder_loss=0.2456, over 29533.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1405, cr_loss=0.3839, attn_decoder_loss=0.2535, over 2252628.55 frames. ], batch size: 76, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:50:28,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=344300.0, ans=0.125 +2024-09-18 01:50:46,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=344340.0, ans=0.125 +2024-09-18 01:50:48,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.70 vs. limit=10.0 +2024-09-18 01:50:51,843 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.74 vs. limit=15.0 +2024-09-18 01:50:55,263 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.781e+01 9.298e+01 1.012e+02 1.493e+02, threshold=1.860e+02, percent-clipped=0.0 +2024-09-18 01:51:29,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=344460.0, ans=0.2 +2024-09-18 01:51:39,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=344460.0, ans=0.125 +2024-09-18 01:51:45,540 INFO [train.py:1198] (1/2) Epoch 20, batch 150, loss[loss=0.2269, ctc_loss=0.1208, cr_loss=0.3633, attn_decoder_loss=0.2306, over 29459.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1381, cr_loss=0.3809, attn_decoder_loss=0.2514, over 3047922.64 frames. ], batch size: 70, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:51:47,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=344500.0, ans=0.2 +2024-09-18 01:51:52,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=344500.0, ans=0.0 +2024-09-18 01:52:04,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=344540.0, ans=0.125 +2024-09-18 01:52:15,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=344540.0, ans=0.2 +2024-09-18 01:52:33,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=344620.0, ans=0.05 +2024-09-18 01:52:36,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=344620.0, ans=0.125 +2024-09-18 01:52:43,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=344620.0, ans=0.025 +2024-09-18 01:53:03,322 INFO [train.py:1198] (1/2) Epoch 20, batch 200, loss[loss=0.2731, ctc_loss=0.1667, cr_loss=0.45, attn_decoder_loss=0.275, over 27516.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1378, cr_loss=0.381, attn_decoder_loss=0.2508, over 3659966.63 frames. ], batch size: 124, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:53:18,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=344740.0, ans=0.0 +2024-09-18 01:53:30,640 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.380e+01 8.894e+01 9.610e+01 1.111e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 01:53:40,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=344780.0, ans=0.125 +2024-09-18 01:53:55,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=344820.0, ans=0.09899494936611666 +2024-09-18 01:54:01,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=344820.0, ans=0.0 +2024-09-18 01:54:18,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=344900.0, ans=0.125 +2024-09-18 01:54:19,421 INFO [train.py:1198] (1/2) Epoch 20, batch 250, loss[loss=0.2556, ctc_loss=0.1426, cr_loss=0.3784, attn_decoder_loss=0.2597, over 29300.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1375, cr_loss=0.3811, attn_decoder_loss=0.2508, over 4142432.13 frames. ], batch size: 100, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:54:24,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.74 vs. limit=22.5 +2024-09-18 01:54:54,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=344980.0, ans=0.125 +2024-09-18 01:54:59,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=344980.0, ans=0.125 +2024-09-18 01:54:59,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=344980.0, ans=0.125 +2024-09-18 01:54:59,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=344980.0, ans=10.0 +2024-09-18 01:55:12,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=345020.0, ans=0.05 +2024-09-18 01:55:37,647 INFO [train.py:1198] (1/2) Epoch 20, batch 300, loss[loss=0.26, ctc_loss=0.1525, cr_loss=0.4205, attn_decoder_loss=0.2626, over 29553.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1374, cr_loss=0.3811, attn_decoder_loss=0.2507, over 4510618.38 frames. ], batch size: 92, lr: 5.64e-03, grad_scale: 8.0 +2024-09-18 01:55:51,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=345100.0, ans=0.125 +2024-09-18 01:55:51,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=345100.0, ans=0.125 +2024-09-18 01:55:52,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=345100.0, ans=0.1 +2024-09-18 01:56:02,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.09 vs. limit=10.0 +2024-09-18 01:56:07,385 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.480e+01 8.946e+01 9.469e+01 2.628e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-18 01:56:08,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=15.0 +2024-09-18 01:56:20,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1.whitening_limit, batch_count=345180.0, ans=10.0 +2024-09-18 01:56:56,027 INFO [train.py:1198] (1/2) Epoch 20, batch 350, loss[loss=0.2222, ctc_loss=0.1142, cr_loss=0.3328, attn_decoder_loss=0.2268, over 29351.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1383, cr_loss=0.382, attn_decoder_loss=0.2518, over 4796018.64 frames. ], batch size: 71, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 01:56:59,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=345300.0, ans=0.125 +2024-09-18 01:57:06,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=345300.0, ans=0.025 +2024-09-18 01:57:29,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=345380.0, ans=0.0 +2024-09-18 01:57:57,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=345460.0, ans=0.125 +2024-09-18 01:58:05,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.77 vs. limit=15.0 +2024-09-18 01:58:11,280 INFO [train.py:1198] (1/2) Epoch 20, batch 400, loss[loss=0.2555, ctc_loss=0.1436, cr_loss=0.3783, attn_decoder_loss=0.2596, over 29724.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1376, cr_loss=0.3814, attn_decoder_loss=0.2513, over 5024753.16 frames. ], batch size: 82, lr: 5.63e-03, grad_scale: 16.0 +2024-09-18 01:58:14,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=345500.0, ans=0.125 +2024-09-18 01:58:31,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=345540.0, ans=0.125 +2024-09-18 01:58:40,351 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.703e+01 9.237e+01 1.010e+02 2.283e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-18 01:58:55,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-18 01:59:19,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.82 vs. limit=15.0 +2024-09-18 01:59:29,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=345700.0, ans=0.1 +2024-09-18 01:59:30,505 INFO [train.py:1198] (1/2) Epoch 20, batch 450, loss[loss=0.2447, ctc_loss=0.1298, cr_loss=0.3492, attn_decoder_loss=0.2497, over 29688.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1374, cr_loss=0.3803, attn_decoder_loss=0.2511, over 5187738.53 frames. ], batch size: 83, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 01:59:35,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=345700.0, ans=0.0 +2024-09-18 02:00:07,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.02 vs. limit=15.0 +2024-09-18 02:00:11,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=345780.0, ans=0.025 +2024-09-18 02:00:11,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=345780.0, ans=0.1 +2024-09-18 02:00:11,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.78 vs. limit=10.0 +2024-09-18 02:00:29,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=345820.0, ans=0.125 +2024-09-18 02:00:35,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=345860.0, ans=0.0 +2024-09-18 02:00:40,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=345860.0, ans=0.125 +2024-09-18 02:00:40,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.10 vs. limit=15.0 +2024-09-18 02:00:48,890 INFO [train.py:1198] (1/2) Epoch 20, batch 500, loss[loss=0.2567, ctc_loss=0.143, cr_loss=0.3818, attn_decoder_loss=0.2609, over 29440.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1365, cr_loss=0.3789, attn_decoder_loss=0.2498, over 5331501.60 frames. ], batch size: 94, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:01:01,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=345900.0, ans=0.0 +2024-09-18 02:01:13,736 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:01:17,859 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.440e+01 8.932e+01 9.633e+01 1.955e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 02:01:21,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=345980.0, ans=0.0 +2024-09-18 02:01:39,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=346020.0, ans=0.125 +2024-09-18 02:01:44,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=346020.0, ans=0.0 +2024-09-18 02:01:52,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.76 vs. limit=15.0 +2024-09-18 02:02:05,126 INFO [train.py:1198] (1/2) Epoch 20, batch 550, loss[loss=0.2736, ctc_loss=0.1667, cr_loss=0.4235, attn_decoder_loss=0.276, over 28817.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1365, cr_loss=0.3784, attn_decoder_loss=0.2499, over 5424702.06 frames. ], batch size: 104, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:02:23,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=346140.0, ans=0.125 +2024-09-18 02:02:29,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=346140.0, ans=0.125 +2024-09-18 02:02:35,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=346180.0, ans=10.0 +2024-09-18 02:02:41,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=346180.0, ans=0.0 +2024-09-18 02:02:57,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=346220.0, ans=0.125 +2024-09-18 02:03:23,209 INFO [train.py:1198] (1/2) Epoch 20, batch 600, loss[loss=0.2564, ctc_loss=0.1392, cr_loss=0.3874, attn_decoder_loss=0.2609, over 29236.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1363, cr_loss=0.378, attn_decoder_loss=0.2497, over 5512239.07 frames. ], batch size: 100, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:03:23,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=346300.0, ans=0.125 +2024-09-18 02:03:54,166 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.587e+01 8.611e+01 9.331e+01 1.005e+02 2.865e+02, threshold=1.866e+02, percent-clipped=3.0 +2024-09-18 02:04:10,235 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:04:19,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=12.0 +2024-09-18 02:04:34,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=346460.0, ans=0.125 +2024-09-18 02:04:37,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=346460.0, ans=0.0 +2024-09-18 02:04:41,266 INFO [train.py:1198] (1/2) Epoch 20, batch 650, loss[loss=0.2418, ctc_loss=0.1247, cr_loss=0.3625, attn_decoder_loss=0.2468, over 29775.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1359, cr_loss=0.3779, attn_decoder_loss=0.2492, over 5589416.02 frames. ], batch size: 81, lr: 5.63e-03, grad_scale: 8.0 +2024-09-18 02:04:52,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=346500.0, ans=0.125 +2024-09-18 02:04:56,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=346540.0, ans=0.0 +2024-09-18 02:04:56,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=346540.0, ans=0.125 +2024-09-18 02:05:10,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=346580.0, ans=15.0 +2024-09-18 02:05:13,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=346580.0, ans=0.125 +2024-09-18 02:05:13,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=346580.0, ans=0.125 +2024-09-18 02:05:26,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=346620.0, ans=0.125 +2024-09-18 02:05:54,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=346660.0, ans=0.02 +2024-09-18 02:05:56,825 INFO [train.py:1198] (1/2) Epoch 20, batch 700, loss[loss=0.2311, ctc_loss=0.121, cr_loss=0.3516, attn_decoder_loss=0.2355, over 29511.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1363, cr_loss=0.3787, attn_decoder_loss=0.2499, over 5639580.97 frames. ], batch size: 76, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:05:58,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=346700.0, ans=0.125 +2024-09-18 02:06:25,602 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.807e+01 8.542e+01 8.952e+01 9.567e+01 1.859e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 02:06:27,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=346780.0, ans=0.0 +2024-09-18 02:06:54,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=346820.0, ans=0.1 +2024-09-18 02:06:59,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.09 vs. limit=15.0 +2024-09-18 02:07:14,603 INFO [train.py:1198] (1/2) Epoch 20, batch 750, loss[loss=0.2418, ctc_loss=0.1269, cr_loss=0.3652, attn_decoder_loss=0.2464, over 29716.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.136, cr_loss=0.3782, attn_decoder_loss=0.2497, over 5679340.01 frames. ], batch size: 82, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:08:14,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=347020.0, ans=0.0 +2024-09-18 02:08:29,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=347060.0, ans=0.2 +2024-09-18 02:08:32,360 INFO [train.py:1198] (1/2) Epoch 20, batch 800, loss[loss=0.2325, ctc_loss=0.1291, cr_loss=0.3737, attn_decoder_loss=0.2356, over 29575.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1368, cr_loss=0.3793, attn_decoder_loss=0.2499, over 5710034.17 frames. ], batch size: 73, lr: 5.62e-03, grad_scale: 16.0 +2024-09-18 02:08:34,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=347100.0, ans=0.0 +2024-09-18 02:09:01,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=347180.0, ans=0.125 +2024-09-18 02:09:01,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.43 vs. limit=15.0 +2024-09-18 02:09:02,545 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.412e+01 8.904e+01 9.473e+01 1.507e+02, threshold=1.781e+02, percent-clipped=0.0 +2024-09-18 02:09:08,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=347180.0, ans=0.0 +2024-09-18 02:09:33,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=347260.0, ans=0.0 +2024-09-18 02:09:46,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=347300.0, ans=0.025 +2024-09-18 02:09:48,230 INFO [train.py:1198] (1/2) Epoch 20, batch 850, loss[loss=0.2523, ctc_loss=0.1388, cr_loss=0.3837, attn_decoder_loss=0.2564, over 29701.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1363, cr_loss=0.3784, attn_decoder_loss=0.2497, over 5739507.61 frames. ], batch size: 89, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:10:13,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=347340.0, ans=0.95 +2024-09-18 02:10:19,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=347380.0, ans=0.125 +2024-09-18 02:10:42,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=347420.0, ans=0.2 +2024-09-18 02:11:03,657 INFO [train.py:1198] (1/2) Epoch 20, batch 900, loss[loss=0.2319, ctc_loss=0.1214, cr_loss=0.3683, attn_decoder_loss=0.2359, over 29613.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1364, cr_loss=0.3784, attn_decoder_loss=0.2498, over 5744653.01 frames. ], batch size: 73, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:11:12,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=347500.0, ans=0.1 +2024-09-18 02:11:16,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.21 vs. limit=15.0 +2024-09-18 02:11:22,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=347540.0, ans=0.2 +2024-09-18 02:11:38,370 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.746e+01 8.646e+01 9.308e+01 1.001e+02 2.040e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-18 02:11:44,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=347580.0, ans=0.0 +2024-09-18 02:11:47,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=347580.0, ans=0.125 +2024-09-18 02:11:49,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=347580.0, ans=0.125 +2024-09-18 02:12:08,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=347660.0, ans=0.025 +2024-09-18 02:12:12,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.33 vs. limit=15.0 +2024-09-18 02:12:14,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=347660.0, ans=0.1 +2024-09-18 02:12:15,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:12:23,495 INFO [train.py:1198] (1/2) Epoch 20, batch 950, loss[loss=0.2209, ctc_loss=0.1102, cr_loss=0.3145, attn_decoder_loss=0.2263, over 29512.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1363, cr_loss=0.3782, attn_decoder_loss=0.2498, over 5745727.72 frames. ], batch size: 74, lr: 5.62e-03, grad_scale: 8.0 +2024-09-18 02:13:07,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=347820.0, ans=0.025 +2024-09-18 02:13:13,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=347820.0, ans=0.1 +2024-09-18 02:13:15,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=347820.0, ans=0.2 +2024-09-18 02:13:18,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=347820.0, ans=0.1 +2024-09-18 02:13:25,795 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:13:25,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=347860.0, ans=0.1 +2024-09-18 02:13:33,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=347860.0, ans=0.125 +2024-09-18 02:13:39,003 INFO [train.py:1198] (1/2) Epoch 20, batch 1000, loss[loss=0.2414, ctc_loss=0.1347, cr_loss=0.3934, attn_decoder_loss=0.2445, over 29522.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1375, cr_loss=0.3801, attn_decoder_loss=0.2507, over 5737924.71 frames. ], batch size: 77, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:13:51,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=347900.0, ans=0.0 +2024-09-18 02:14:09,208 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.603e+01 8.704e+01 9.397e+01 1.040e+02 1.771e+02, threshold=1.879e+02, percent-clipped=0.0 +2024-09-18 02:14:50,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=348060.0, ans=0.125 +2024-09-18 02:14:53,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=348100.0, ans=0.2 +2024-09-18 02:14:54,852 INFO [train.py:1198] (1/2) Epoch 20, batch 1050, loss[loss=0.2648, ctc_loss=0.152, cr_loss=0.4204, attn_decoder_loss=0.268, over 29673.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1368, cr_loss=0.3792, attn_decoder_loss=0.2501, over 5746085.39 frames. ], batch size: 85, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:15:03,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=348100.0, ans=0.2 +2024-09-18 02:15:09,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=348100.0, ans=0.025 +2024-09-18 02:15:30,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=348180.0, ans=0.5 +2024-09-18 02:15:34,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=348180.0, ans=0.125 +2024-09-18 02:15:42,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.99 vs. limit=12.0 +2024-09-18 02:15:49,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=348220.0, ans=0.2 +2024-09-18 02:16:04,921 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:16:08,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=348260.0, ans=0.125 +2024-09-18 02:16:15,293 INFO [train.py:1198] (1/2) Epoch 20, batch 1100, loss[loss=0.2458, ctc_loss=0.1418, cr_loss=0.3828, attn_decoder_loss=0.2488, over 29466.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1368, cr_loss=0.3786, attn_decoder_loss=0.25, over 5758569.23 frames. ], batch size: 78, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:16:17,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=348300.0, ans=0.125 +2024-09-18 02:16:22,348 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.51 vs. limit=15.0 +2024-09-18 02:16:25,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.21 vs. limit=15.0 +2024-09-18 02:16:45,740 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.272e+01 8.537e+01 9.169e+01 9.929e+01 2.148e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 02:16:58,949 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.77 vs. limit=6.0 +2024-09-18 02:17:11,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.04 vs. limit=15.0 +2024-09-18 02:17:11,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=348420.0, ans=0.125 +2024-09-18 02:17:21,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=348460.0, ans=0.1 +2024-09-18 02:17:21,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=348460.0, ans=0.125 +2024-09-18 02:17:30,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=348500.0, ans=0.09899494936611666 +2024-09-18 02:17:31,305 INFO [train.py:1198] (1/2) Epoch 20, batch 1150, loss[loss=0.2426, ctc_loss=0.1287, cr_loss=0.3816, attn_decoder_loss=0.2468, over 29452.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1366, cr_loss=0.3787, attn_decoder_loss=0.25, over 5757378.89 frames. ], batch size: 78, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:17:33,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=348500.0, ans=0.125 +2024-09-18 02:17:34,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=348500.0, ans=0.125 +2024-09-18 02:17:54,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=348540.0, ans=0.125 +2024-09-18 02:18:00,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=348580.0, ans=0.1 +2024-09-18 02:18:20,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=348620.0, ans=0.1 +2024-09-18 02:18:34,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.71 vs. limit=15.0 +2024-09-18 02:18:45,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=348700.0, ans=0.125 +2024-09-18 02:18:46,987 INFO [train.py:1198] (1/2) Epoch 20, batch 1200, loss[loss=0.262, ctc_loss=0.1525, cr_loss=0.4103, attn_decoder_loss=0.265, over 29660.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1373, cr_loss=0.3798, attn_decoder_loss=0.251, over 5749377.30 frames. ], batch size: 85, lr: 5.61e-03, grad_scale: 16.0 +2024-09-18 02:18:57,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=348700.0, ans=0.125 +2024-09-18 02:19:06,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=348740.0, ans=0.125 +2024-09-18 02:19:14,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.75 vs. limit=6.0 +2024-09-18 02:19:22,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.48 vs. limit=6.0 +2024-09-18 02:19:23,033 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.725e+01 9.303e+01 1.008e+02 1.601e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-18 02:19:37,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=348820.0, ans=0.1 +2024-09-18 02:19:51,887 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.47 vs. limit=6.0 +2024-09-18 02:19:53,800 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=15.0 +2024-09-18 02:19:55,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=348860.0, ans=0.0 +2024-09-18 02:20:07,686 INFO [train.py:1198] (1/2) Epoch 20, batch 1250, loss[loss=0.2735, ctc_loss=0.1585, cr_loss=0.4385, attn_decoder_loss=0.2766, over 29522.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1379, cr_loss=0.3822, attn_decoder_loss=0.2516, over 5776082.44 frames. ], batch size: 92, lr: 5.61e-03, grad_scale: 8.0 +2024-09-18 02:20:26,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=348940.0, ans=0.0 +2024-09-18 02:21:01,334 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.41 vs. limit=10.0 +2024-09-18 02:21:03,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=349020.0, ans=0.0 +2024-09-18 02:21:05,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=349020.0, ans=0.025 +2024-09-18 02:21:23,263 INFO [train.py:1198] (1/2) Epoch 20, batch 1300, loss[loss=0.2546, ctc_loss=0.1377, cr_loss=0.3577, attn_decoder_loss=0.2596, over 28632.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1373, cr_loss=0.3806, attn_decoder_loss=0.2508, over 5781676.92 frames. ], batch size: 112, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:21:28,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=349100.0, ans=10.0 +2024-09-18 02:21:30,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.83 vs. limit=15.0 +2024-09-18 02:21:55,239 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.542e+01 9.047e+01 9.656e+01 1.934e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-18 02:22:25,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=349260.0, ans=0.1 +2024-09-18 02:22:28,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=349260.0, ans=0.125 +2024-09-18 02:22:38,978 INFO [train.py:1198] (1/2) Epoch 20, batch 1350, loss[loss=0.2482, ctc_loss=0.1308, cr_loss=0.3554, attn_decoder_loss=0.2534, over 29750.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1364, cr_loss=0.3794, attn_decoder_loss=0.2503, over 5798812.38 frames. ], batch size: 81, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:22:39,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=349300.0, ans=0.125 +2024-09-18 02:23:56,658 INFO [train.py:1198] (1/2) Epoch 20, batch 1400, loss[loss=0.2244, ctc_loss=0.1229, cr_loss=0.3357, attn_decoder_loss=0.2282, over 29562.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1365, cr_loss=0.3792, attn_decoder_loss=0.2503, over 5809771.02 frames. ], batch size: 69, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:23:58,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=349500.0, ans=0.125 +2024-09-18 02:23:59,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.73 vs. limit=15.0 +2024-09-18 02:24:09,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.92 vs. limit=12.0 +2024-09-18 02:24:28,082 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.400e+01 8.906e+01 9.445e+01 1.188e+02, threshold=1.781e+02, percent-clipped=0.0 +2024-09-18 02:24:46,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=349620.0, ans=0.0 +2024-09-18 02:25:12,146 INFO [train.py:1198] (1/2) Epoch 20, batch 1450, loss[loss=0.2645, ctc_loss=0.1479, cr_loss=0.4225, attn_decoder_loss=0.268, over 29444.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1368, cr_loss=0.3799, attn_decoder_loss=0.2509, over 5806426.87 frames. ], batch size: 94, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:25:15,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=349700.0, ans=0.0 +2024-09-18 02:25:33,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=349740.0, ans=0.125 +2024-09-18 02:25:38,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=349740.0, ans=0.0 +2024-09-18 02:25:56,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=349820.0, ans=0.125 +2024-09-18 02:26:27,567 INFO [train.py:1198] (1/2) Epoch 20, batch 1500, loss[loss=0.2521, ctc_loss=0.1359, cr_loss=0.3615, attn_decoder_loss=0.257, over 29646.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1368, cr_loss=0.3795, attn_decoder_loss=0.251, over 5807438.14 frames. ], batch size: 86, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:27:04,146 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.814e+01 9.450e+01 1.000e+02 1.461e+02, threshold=1.890e+02, percent-clipped=0.0 +2024-09-18 02:27:10,011 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.98 vs. limit=10.0 +2024-09-18 02:27:16,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=350020.0, ans=0.0 +2024-09-18 02:27:17,621 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.60 vs. limit=10.0 +2024-09-18 02:27:17,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=5.66 vs. limit=12.0 +2024-09-18 02:27:30,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=350020.0, ans=0.125 +2024-09-18 02:27:33,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=350060.0, ans=0.0 +2024-09-18 02:27:48,317 INFO [train.py:1198] (1/2) Epoch 20, batch 1550, loss[loss=0.2621, ctc_loss=0.1522, cr_loss=0.4233, attn_decoder_loss=0.2649, over 29520.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1374, cr_loss=0.3797, attn_decoder_loss=0.2511, over 5783641.47 frames. ], batch size: 90, lr: 5.60e-03, grad_scale: 8.0 +2024-09-18 02:27:59,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=350100.0, ans=0.1 +2024-09-18 02:28:09,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=350140.0, ans=0.0 +2024-09-18 02:28:12,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=350140.0, ans=0.125 +2024-09-18 02:29:03,919 INFO [train.py:1198] (1/2) Epoch 20, batch 1600, loss[loss=0.2598, ctc_loss=0.1405, cr_loss=0.3956, attn_decoder_loss=0.2642, over 29671.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1376, cr_loss=0.3802, attn_decoder_loss=0.2511, over 5765256.06 frames. ], batch size: 85, lr: 5.59e-03, grad_scale: 16.0 +2024-09-18 02:29:37,442 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.748e+01 9.299e+01 1.007e+02 2.517e+02, threshold=1.860e+02, percent-clipped=3.0 +2024-09-18 02:29:39,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=350380.0, ans=0.125 +2024-09-18 02:29:57,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=350420.0, ans=0.125 +2024-09-18 02:30:19,929 INFO [train.py:1198] (1/2) Epoch 20, batch 1650, loss[loss=0.2581, ctc_loss=0.1442, cr_loss=0.3889, attn_decoder_loss=0.2621, over 29705.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1373, cr_loss=0.3798, attn_decoder_loss=0.2508, over 5759594.68 frames. ], batch size: 89, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:30:25,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.92 vs. limit=15.0 +2024-09-18 02:30:33,164 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:30:40,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=350540.0, ans=0.2 +2024-09-18 02:30:57,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=350580.0, ans=0.1 +2024-09-18 02:31:26,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=350660.0, ans=0.0 +2024-09-18 02:31:32,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=350660.0, ans=0.125 +2024-09-18 02:31:32,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=350660.0, ans=0.2 +2024-09-18 02:31:39,886 INFO [train.py:1198] (1/2) Epoch 20, batch 1700, loss[loss=0.2117, ctc_loss=0.1106, cr_loss=0.3384, attn_decoder_loss=0.2154, over 29578.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.137, cr_loss=0.3791, attn_decoder_loss=0.2506, over 5781901.64 frames. ], batch size: 69, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:31:40,162 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:31:44,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=350700.0, ans=0.125 +2024-09-18 02:31:56,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=350740.0, ans=0.125 +2024-09-18 02:32:04,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=350740.0, ans=0.2 +2024-09-18 02:32:05,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=350740.0, ans=0.125 +2024-09-18 02:32:07,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-18 02:32:13,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.537e+01 9.114e+01 9.746e+01 1.208e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 02:32:14,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.10 vs. limit=15.0 +2024-09-18 02:32:34,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=350820.0, ans=0.2 +2024-09-18 02:32:50,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=350860.0, ans=0.0 +2024-09-18 02:32:52,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.33 vs. limit=15.0 +2024-09-18 02:32:55,906 INFO [train.py:1198] (1/2) Epoch 20, batch 1750, loss[loss=0.2239, ctc_loss=0.1345, cr_loss=0.3655, attn_decoder_loss=0.2258, over 29343.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.137, cr_loss=0.3793, attn_decoder_loss=0.2504, over 5788897.36 frames. ], batch size: 67, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:32:59,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.69 vs. limit=12.0 +2024-09-18 02:33:15,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=350940.0, ans=0.1 +2024-09-18 02:33:21,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.86 vs. limit=15.0 +2024-09-18 02:33:32,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=350980.0, ans=0.125 +2024-09-18 02:33:36,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=350980.0, ans=0.025 +2024-09-18 02:33:44,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=351020.0, ans=0.0 +2024-09-18 02:33:44,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=351020.0, ans=0.0 +2024-09-18 02:33:55,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=351060.0, ans=0.125 +2024-09-18 02:33:56,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=351060.0, ans=0.0 +2024-09-18 02:34:11,446 INFO [train.py:1198] (1/2) Epoch 20, batch 1800, loss[loss=0.2614, ctc_loss=0.1502, cr_loss=0.4014, attn_decoder_loss=0.2648, over 29687.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1369, cr_loss=0.3794, attn_decoder_loss=0.2507, over 5791489.57 frames. ], batch size: 83, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:34:20,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=351100.0, ans=0.125 +2024-09-18 02:34:32,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=351140.0, ans=15.0 +2024-09-18 02:34:36,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=351140.0, ans=0.125 +2024-09-18 02:34:36,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=351140.0, ans=0.0 +2024-09-18 02:34:41,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=351140.0, ans=0.125 +2024-09-18 02:34:41,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=351140.0, ans=0.125 +2024-09-18 02:34:48,914 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.074e+01 8.564e+01 9.228e+01 9.746e+01 1.564e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-18 02:35:00,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=351220.0, ans=0.125 +2024-09-18 02:35:01,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=351220.0, ans=0.125 +2024-09-18 02:35:04,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.81 vs. limit=15.0 +2024-09-18 02:35:04,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.06 vs. limit=15.0 +2024-09-18 02:35:32,099 INFO [train.py:1198] (1/2) Epoch 20, batch 1850, loss[loss=0.2563, ctc_loss=0.1394, cr_loss=0.3631, attn_decoder_loss=0.2613, over 29616.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1372, cr_loss=0.3803, attn_decoder_loss=0.2508, over 5797368.98 frames. ], batch size: 86, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:35:32,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=351300.0, ans=0.1 +2024-09-18 02:35:47,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=351340.0, ans=0.125 +2024-09-18 02:35:53,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=351340.0, ans=0.1 +2024-09-18 02:35:55,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.47 vs. limit=10.0 +2024-09-18 02:36:14,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=351380.0, ans=0.1 +2024-09-18 02:36:14,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=351380.0, ans=0.2 +2024-09-18 02:36:14,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=351380.0, ans=0.125 +2024-09-18 02:36:23,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-18 02:36:25,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=351420.0, ans=0.125 +2024-09-18 02:36:32,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=351460.0, ans=0.05 +2024-09-18 02:36:47,370 INFO [train.py:1198] (1/2) Epoch 20, batch 1900, loss[loss=0.2588, ctc_loss=0.1469, cr_loss=0.4083, attn_decoder_loss=0.2622, over 29698.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1376, cr_loss=0.3815, attn_decoder_loss=0.2514, over 5803793.64 frames. ], batch size: 89, lr: 5.59e-03, grad_scale: 8.0 +2024-09-18 02:36:52,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=351500.0, ans=0.2 +2024-09-18 02:36:56,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=351500.0, ans=0.0 +2024-09-18 02:37:13,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=351540.0, ans=0.125 +2024-09-18 02:37:20,728 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.814e+01 8.754e+01 9.062e+01 9.837e+01 1.384e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 02:37:22,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=351580.0, ans=0.125 +2024-09-18 02:37:28,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=351580.0, ans=0.125 +2024-09-18 02:37:37,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=351620.0, ans=0.07 +2024-09-18 02:37:42,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=351620.0, ans=0.125 +2024-09-18 02:37:46,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=351660.0, ans=0.0 +2024-09-18 02:37:46,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=351660.0, ans=0.2 +2024-09-18 02:37:46,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=351660.0, ans=0.125 +2024-09-18 02:37:54,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=351660.0, ans=0.125 +2024-09-18 02:38:03,042 INFO [train.py:1198] (1/2) Epoch 20, batch 1950, loss[loss=0.2482, ctc_loss=0.1415, cr_loss=0.3797, attn_decoder_loss=0.2516, over 29439.00 frames. ], tot_loss[loss=0.2487, ctc_loss=0.1387, cr_loss=0.3832, attn_decoder_loss=0.2524, over 5818497.01 frames. ], batch size: 78, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:38:19,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=351740.0, ans=0.125 +2024-09-18 02:38:24,587 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.51 vs. limit=15.0 +2024-09-18 02:38:44,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=351780.0, ans=0.0 +2024-09-18 02:38:48,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=351780.0, ans=0.125 +2024-09-18 02:39:23,347 INFO [train.py:1198] (1/2) Epoch 20, batch 2000, loss[loss=0.2137, ctc_loss=0.114, cr_loss=0.3378, attn_decoder_loss=0.2172, over 29374.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.139, cr_loss=0.3833, attn_decoder_loss=0.2526, over 5796671.71 frames. ], batch size: 67, lr: 5.58e-03, grad_scale: 16.0 +2024-09-18 02:39:31,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=351900.0, ans=0.0 +2024-09-18 02:39:42,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=351940.0, ans=0.125 +2024-09-18 02:39:46,585 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:39:51,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=351940.0, ans=0.2 +2024-09-18 02:39:56,880 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.619e+01 9.159e+01 9.729e+01 7.125e+02, threshold=1.832e+02, percent-clipped=2.0 +2024-09-18 02:40:00,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=351980.0, ans=0.1 +2024-09-18 02:40:00,840 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.59 vs. limit=15.0 +2024-09-18 02:40:17,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=352020.0, ans=0.125 +2024-09-18 02:40:19,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=352020.0, ans=0.0 +2024-09-18 02:40:31,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=352060.0, ans=0.125 +2024-09-18 02:40:32,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.36 vs. limit=15.0 +2024-09-18 02:40:34,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=352060.0, ans=0.125 +2024-09-18 02:40:36,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=352060.0, ans=0.2 +2024-09-18 02:40:46,373 INFO [train.py:1198] (1/2) Epoch 20, batch 2050, loss[loss=0.2246, ctc_loss=0.1195, cr_loss=0.3269, attn_decoder_loss=0.229, over 29467.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1386, cr_loss=0.3825, attn_decoder_loss=0.2518, over 5787932.39 frames. ], batch size: 70, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:41:09,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=352140.0, ans=0.125 +2024-09-18 02:41:10,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=352140.0, ans=0.125 +2024-09-18 02:41:12,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=352140.0, ans=0.125 +2024-09-18 02:41:12,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=352140.0, ans=0.125 +2024-09-18 02:41:35,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=352220.0, ans=0.125 +2024-09-18 02:41:59,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=352260.0, ans=0.0 +2024-09-18 02:42:01,886 INFO [train.py:1198] (1/2) Epoch 20, batch 2100, loss[loss=0.2466, ctc_loss=0.1327, cr_loss=0.3826, attn_decoder_loss=0.2507, over 29746.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1375, cr_loss=0.3809, attn_decoder_loss=0.2509, over 5800476.55 frames. ], batch size: 81, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:42:38,586 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.126e+01 8.424e+01 8.970e+01 9.709e+01 1.410e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 02:42:40,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=352380.0, ans=0.2 +2024-09-18 02:42:41,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=352380.0, ans=0.125 +2024-09-18 02:43:00,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=352420.0, ans=0.2 +2024-09-18 02:43:21,526 INFO [train.py:1198] (1/2) Epoch 20, batch 2150, loss[loss=0.2608, ctc_loss=0.1529, cr_loss=0.3998, attn_decoder_loss=0.2639, over 29439.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1369, cr_loss=0.3797, attn_decoder_loss=0.2505, over 5815752.05 frames. ], batch size: 78, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:43:41,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=352540.0, ans=0.0 +2024-09-18 02:44:06,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.32 vs. limit=10.0 +2024-09-18 02:44:07,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=352620.0, ans=0.1 +2024-09-18 02:44:11,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=352620.0, ans=0.1 +2024-09-18 02:44:30,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=352660.0, ans=0.1 +2024-09-18 02:44:31,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=352660.0, ans=0.125 +2024-09-18 02:44:37,538 INFO [train.py:1198] (1/2) Epoch 20, batch 2200, loss[loss=0.2667, ctc_loss=0.1526, cr_loss=0.4162, attn_decoder_loss=0.2701, over 29648.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1367, cr_loss=0.3793, attn_decoder_loss=0.2503, over 5811441.60 frames. ], batch size: 86, lr: 5.58e-03, grad_scale: 8.0 +2024-09-18 02:44:47,163 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:44:53,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.78 vs. limit=15.0 +2024-09-18 02:44:57,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=352740.0, ans=0.0 +2024-09-18 02:45:08,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=352780.0, ans=0.0 +2024-09-18 02:45:12,309 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.647e+01 9.174e+01 9.915e+01 1.896e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-18 02:45:26,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=352820.0, ans=0.0 +2024-09-18 02:45:32,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=352820.0, ans=0.1 +2024-09-18 02:45:45,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.48 vs. limit=15.0 +2024-09-18 02:45:47,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=352860.0, ans=0.125 +2024-09-18 02:45:53,590 INFO [train.py:1198] (1/2) Epoch 20, batch 2250, loss[loss=0.2435, ctc_loss=0.133, cr_loss=0.3806, attn_decoder_loss=0.2473, over 29726.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1364, cr_loss=0.3787, attn_decoder_loss=0.2499, over 5811260.55 frames. ], batch size: 82, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:45:56,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=352900.0, ans=0.125 +2024-09-18 02:45:59,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=352900.0, ans=0.125 +2024-09-18 02:46:01,420 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:46:04,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=352900.0, ans=0.125 +2024-09-18 02:46:07,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=352940.0, ans=0.1 +2024-09-18 02:46:34,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.27 vs. limit=15.0 +2024-09-18 02:46:38,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=352980.0, ans=0.0 +2024-09-18 02:46:42,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=353020.0, ans=0.1 +2024-09-18 02:47:13,751 INFO [train.py:1198] (1/2) Epoch 20, batch 2300, loss[loss=0.2275, ctc_loss=0.1267, cr_loss=0.3484, attn_decoder_loss=0.2309, over 29729.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.136, cr_loss=0.3775, attn_decoder_loss=0.2494, over 5798177.43 frames. ], batch size: 72, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:47:21,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=353100.0, ans=0.0 +2024-09-18 02:47:48,564 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.594e+01 9.374e+01 1.007e+02 2.489e+02, threshold=1.875e+02, percent-clipped=2.0 +2024-09-18 02:47:53,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=353180.0, ans=0.05 +2024-09-18 02:48:29,442 INFO [train.py:1198] (1/2) Epoch 20, batch 2350, loss[loss=0.2481, ctc_loss=0.1373, cr_loss=0.3856, attn_decoder_loss=0.2518, over 29699.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1363, cr_loss=0.3783, attn_decoder_loss=0.2496, over 5803956.34 frames. ], batch size: 83, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:48:50,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=353340.0, ans=0.1 +2024-09-18 02:49:16,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=353420.0, ans=0.125 +2024-09-18 02:49:19,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=353420.0, ans=0.1 +2024-09-18 02:49:24,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=353420.0, ans=0.125 +2024-09-18 02:49:31,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=353460.0, ans=0.125 +2024-09-18 02:49:34,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=353460.0, ans=0.0 +2024-09-18 02:49:45,306 INFO [train.py:1198] (1/2) Epoch 20, batch 2400, loss[loss=0.2401, ctc_loss=0.1281, cr_loss=0.365, attn_decoder_loss=0.2445, over 29517.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1366, cr_loss=0.379, attn_decoder_loss=0.2501, over 5808323.03 frames. ], batch size: 76, lr: 5.57e-03, grad_scale: 16.0 +2024-09-18 02:49:59,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.87 vs. limit=22.5 +2024-09-18 02:50:15,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-18 02:50:23,695 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.283e+01 8.660e+01 9.243e+01 9.853e+01 2.252e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-18 02:50:50,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=353660.0, ans=0.125 +2024-09-18 02:50:52,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=353660.0, ans=0.2 +2024-09-18 02:50:55,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=353660.0, ans=0.125 +2024-09-18 02:50:55,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=353660.0, ans=0.125 +2024-09-18 02:51:01,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=353660.0, ans=0.1 +2024-09-18 02:51:05,907 INFO [train.py:1198] (1/2) Epoch 20, batch 2450, loss[loss=0.2604, ctc_loss=0.1445, cr_loss=0.3996, attn_decoder_loss=0.2644, over 29708.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1376, cr_loss=0.3807, attn_decoder_loss=0.2511, over 5786727.46 frames. ], batch size: 82, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:51:22,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=353740.0, ans=0.125 +2024-09-18 02:51:29,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.25 vs. limit=15.0 +2024-09-18 02:51:38,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=353780.0, ans=0.0 +2024-09-18 02:52:08,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=353860.0, ans=0.2 +2024-09-18 02:52:12,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=353860.0, ans=0.125 +2024-09-18 02:52:21,948 INFO [train.py:1198] (1/2) Epoch 20, batch 2500, loss[loss=0.2581, ctc_loss=0.1429, cr_loss=0.3931, attn_decoder_loss=0.2622, over 29622.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1374, cr_loss=0.3807, attn_decoder_loss=0.2511, over 5797202.06 frames. ], batch size: 86, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:52:23,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=353900.0, ans=0.1 +2024-09-18 02:52:34,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=353900.0, ans=0.1 +2024-09-18 02:52:43,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=353940.0, ans=0.09899494936611666 +2024-09-18 02:52:43,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=353940.0, ans=0.0 +2024-09-18 02:52:58,484 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.450e+01 8.592e+01 8.974e+01 9.558e+01 1.231e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 02:53:17,165 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 02:53:29,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=354060.0, ans=0.0 +2024-09-18 02:53:38,060 INFO [train.py:1198] (1/2) Epoch 20, batch 2550, loss[loss=0.2224, ctc_loss=0.1169, cr_loss=0.345, attn_decoder_loss=0.2265, over 29333.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1369, cr_loss=0.3793, attn_decoder_loss=0.2508, over 5801080.95 frames. ], batch size: 67, lr: 5.57e-03, grad_scale: 8.0 +2024-09-18 02:53:42,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=354100.0, ans=0.0 +2024-09-18 02:53:44,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=354100.0, ans=0.125 +2024-09-18 02:54:33,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=354220.0, ans=0.125 +2024-09-18 02:54:58,070 INFO [train.py:1198] (1/2) Epoch 20, batch 2600, loss[loss=0.2387, ctc_loss=0.125, cr_loss=0.3632, attn_decoder_loss=0.2433, over 29474.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1372, cr_loss=0.3804, attn_decoder_loss=0.2512, over 5797035.57 frames. ], batch size: 78, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:54:58,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=354300.0, ans=0.2 +2024-09-18 02:55:12,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.whiten.whitening_limit, batch_count=354340.0, ans=12.0 +2024-09-18 02:55:13,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=354340.0, ans=0.0 +2024-09-18 02:55:34,144 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.411e+01 8.665e+01 9.316e+01 9.977e+01 1.565e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-18 02:55:48,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=354420.0, ans=0.0 +2024-09-18 02:56:13,762 INFO [train.py:1198] (1/2) Epoch 20, batch 2650, loss[loss=0.2748, ctc_loss=0.1576, cr_loss=0.4257, attn_decoder_loss=0.2784, over 29171.00 frames. ], tot_loss[loss=0.2478, ctc_loss=0.1375, cr_loss=0.3811, attn_decoder_loss=0.2515, over 5803077.04 frames. ], batch size: 100, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:56:18,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=354500.0, ans=0.0 +2024-09-18 02:56:33,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=354540.0, ans=0.125 +2024-09-18 02:56:55,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.84 vs. limit=15.0 +2024-09-18 02:57:02,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=354620.0, ans=0.0 +2024-09-18 02:57:29,344 INFO [train.py:1198] (1/2) Epoch 20, batch 2700, loss[loss=0.259, ctc_loss=0.1438, cr_loss=0.4132, attn_decoder_loss=0.2626, over 29533.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.138, cr_loss=0.3818, attn_decoder_loss=0.2523, over 5798098.77 frames. ], batch size: 87, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:57:33,440 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.53 vs. limit=22.5 +2024-09-18 02:57:37,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=354700.0, ans=0.125 +2024-09-18 02:57:52,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=354740.0, ans=0.125 +2024-09-18 02:58:07,622 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.506e+01 9.049e+01 9.472e+01 1.287e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 02:58:12,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=354780.0, ans=0.0 +2024-09-18 02:58:41,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=354860.0, ans=0.125 +2024-09-18 02:58:49,462 INFO [train.py:1198] (1/2) Epoch 20, batch 2750, loss[loss=0.2433, ctc_loss=0.1292, cr_loss=0.3709, attn_decoder_loss=0.2477, over 29520.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1371, cr_loss=0.3806, attn_decoder_loss=0.251, over 5797070.49 frames. ], batch size: 75, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 02:58:51,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=354900.0, ans=0.125 +2024-09-18 02:58:52,112 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.97 vs. limit=12.0 +2024-09-18 02:58:54,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=354900.0, ans=0.0 +2024-09-18 02:58:58,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=354900.0, ans=0.1 +2024-09-18 02:59:00,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=354900.0, ans=0.09899494936611666 +2024-09-18 02:59:03,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=354940.0, ans=0.0 +2024-09-18 02:59:10,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=354940.0, ans=0.125 +2024-09-18 02:59:48,176 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.92 vs. limit=15.0 +2024-09-18 02:59:50,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=355060.0, ans=0.125 +2024-09-18 02:59:58,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=355060.0, ans=0.0 +2024-09-18 03:00:02,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=355060.0, ans=0.125 +2024-09-18 03:00:05,925 INFO [train.py:1198] (1/2) Epoch 20, batch 2800, loss[loss=0.2794, ctc_loss=0.1934, cr_loss=0.4274, attn_decoder_loss=0.2795, over 19736.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.1375, cr_loss=0.3809, attn_decoder_loss=0.2513, over 5777227.56 frames. ], batch size: 210, lr: 5.56e-03, grad_scale: 16.0 +2024-09-18 03:00:19,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=355140.0, ans=0.125 +2024-09-18 03:00:27,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=355140.0, ans=0.125 +2024-09-18 03:00:44,102 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.732e+01 9.172e+01 1.024e+02 2.809e+02, threshold=1.834e+02, percent-clipped=3.0 +2024-09-18 03:00:53,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=355220.0, ans=0.09899494936611666 +2024-09-18 03:00:59,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=355220.0, ans=0.125 +2024-09-18 03:01:08,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=355260.0, ans=0.125 +2024-09-18 03:01:10,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.92 vs. limit=10.0 +2024-09-18 03:01:12,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.62 vs. limit=15.0 +2024-09-18 03:01:21,675 INFO [train.py:1198] (1/2) Epoch 20, batch 2850, loss[loss=0.2412, ctc_loss=0.1302, cr_loss=0.3672, attn_decoder_loss=0.2454, over 29525.00 frames. ], tot_loss[loss=0.2479, ctc_loss=0.1378, cr_loss=0.3813, attn_decoder_loss=0.2517, over 5761810.48 frames. ], batch size: 77, lr: 5.56e-03, grad_scale: 8.0 +2024-09-18 03:01:23,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=355300.0, ans=0.0 +2024-09-18 03:02:09,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=355420.0, ans=0.125 +2024-09-18 03:02:41,742 INFO [train.py:1198] (1/2) Epoch 20, batch 2900, loss[loss=0.2492, ctc_loss=0.1455, cr_loss=0.3983, attn_decoder_loss=0.2519, over 29415.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1385, cr_loss=0.3826, attn_decoder_loss=0.2526, over 5787797.95 frames. ], batch size: 79, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:03:01,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=355540.0, ans=0.2 +2024-09-18 03:03:12,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=355580.0, ans=0.025 +2024-09-18 03:03:12,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=355580.0, ans=0.0 +2024-09-18 03:03:18,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=355580.0, ans=0.0 +2024-09-18 03:03:19,775 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.493e+01 9.196e+01 9.952e+01 2.490e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 03:03:24,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=355580.0, ans=0.0 +2024-09-18 03:03:52,388 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.19 vs. limit=22.5 +2024-09-18 03:03:57,573 INFO [train.py:1198] (1/2) Epoch 20, batch 2950, loss[loss=0.237, ctc_loss=0.1334, cr_loss=0.3918, attn_decoder_loss=0.2398, over 29520.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1375, cr_loss=0.3808, attn_decoder_loss=0.2514, over 5782486.98 frames. ], batch size: 75, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:03:57,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=355700.0, ans=0.125 +2024-09-18 03:04:05,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=355700.0, ans=0.025 +2024-09-18 03:04:26,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.18 vs. limit=10.0 +2024-09-18 03:04:31,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=355780.0, ans=0.0 +2024-09-18 03:04:41,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=355820.0, ans=0.0 +2024-09-18 03:04:44,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=355820.0, ans=0.125 +2024-09-18 03:04:46,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=355820.0, ans=0.09899494936611666 +2024-09-18 03:04:55,503 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.02 vs. limit=15.0 +2024-09-18 03:05:01,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=355860.0, ans=0.025 +2024-09-18 03:05:04,257 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:05:05,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=355860.0, ans=0.125 +2024-09-18 03:05:08,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=355860.0, ans=0.125 +2024-09-18 03:05:13,039 INFO [train.py:1198] (1/2) Epoch 20, batch 3000, loss[loss=0.2363, ctc_loss=0.1265, cr_loss=0.3564, attn_decoder_loss=0.2406, over 29736.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1368, cr_loss=0.3791, attn_decoder_loss=0.251, over 5783844.62 frames. ], batch size: 81, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:05:13,040 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 03:05:32,383 INFO [train.py:1230] (1/2) Epoch 20, validation: loss=0.2111, ctc_loss=0.03914, cr_loss=5.228e-15, attn_decoder_loss=0.2302, over 944034.00 frames. +2024-09-18 03:05:32,383 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 03:05:40,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=355900.0, ans=0.125 +2024-09-18 03:06:04,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.03 vs. limit=12.0 +2024-09-18 03:06:10,670 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.213e+01 8.598e+01 9.158e+01 9.918e+01 2.557e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-18 03:06:12,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=355980.0, ans=0.0 +2024-09-18 03:06:23,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=356020.0, ans=0.07 +2024-09-18 03:06:25,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=356020.0, ans=0.05 +2024-09-18 03:06:25,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=356020.0, ans=0.125 +2024-09-18 03:06:35,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=356060.0, ans=0.5 +2024-09-18 03:06:37,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=356060.0, ans=0.125 +2024-09-18 03:06:50,689 INFO [train.py:1198] (1/2) Epoch 20, batch 3050, loss[loss=0.2342, ctc_loss=0.1272, cr_loss=0.3596, attn_decoder_loss=0.2381, over 29499.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1374, cr_loss=0.38, attn_decoder_loss=0.2515, over 5777809.96 frames. ], batch size: 76, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:07:09,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=356140.0, ans=0.0 +2024-09-18 03:07:20,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.36 vs. limit=10.0 +2024-09-18 03:07:34,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=356220.0, ans=0.125 +2024-09-18 03:07:55,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356260.0, ans=0.1 +2024-09-18 03:07:55,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=356260.0, ans=0.025 +2024-09-18 03:08:05,860 INFO [train.py:1198] (1/2) Epoch 20, batch 3100, loss[loss=0.2687, ctc_loss=0.1546, cr_loss=0.4146, attn_decoder_loss=0.2721, over 29281.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1372, cr_loss=0.3791, attn_decoder_loss=0.2509, over 5777820.40 frames. ], batch size: 100, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:08:25,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=356340.0, ans=10.0 +2024-09-18 03:08:41,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=356380.0, ans=0.0 +2024-09-18 03:08:43,842 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.464e+01 9.160e+01 9.747e+01 2.632e+02, threshold=1.832e+02, percent-clipped=3.0 +2024-09-18 03:09:15,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=356460.0, ans=0.0 +2024-09-18 03:09:16,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=356460.0, ans=0.07 +2024-09-18 03:09:18,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=356460.0, ans=0.2 +2024-09-18 03:09:24,108 INFO [train.py:1198] (1/2) Epoch 20, batch 3150, loss[loss=0.2683, ctc_loss=0.157, cr_loss=0.413, attn_decoder_loss=0.2714, over 28913.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1372, cr_loss=0.3797, attn_decoder_loss=0.2509, over 5784629.84 frames. ], batch size: 104, lr: 5.55e-03, grad_scale: 8.0 +2024-09-18 03:09:25,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=356500.0, ans=0.125 +2024-09-18 03:09:34,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356500.0, ans=0.1 +2024-09-18 03:09:44,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=356540.0, ans=0.0 +2024-09-18 03:09:48,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356540.0, ans=0.1 +2024-09-18 03:10:07,485 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.46 vs. limit=15.0 +2024-09-18 03:10:11,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=356620.0, ans=0.0 +2024-09-18 03:10:36,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=356660.0, ans=0.125 +2024-09-18 03:10:42,207 INFO [train.py:1198] (1/2) Epoch 20, batch 3200, loss[loss=0.2382, ctc_loss=0.1234, cr_loss=0.3537, attn_decoder_loss=0.2431, over 29389.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1366, cr_loss=0.3788, attn_decoder_loss=0.2506, over 5794702.30 frames. ], batch size: 79, lr: 5.54e-03, grad_scale: 16.0 +2024-09-18 03:10:42,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=356700.0, ans=0.2 +2024-09-18 03:10:45,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=356700.0, ans=0.125 +2024-09-18 03:10:46,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=356700.0, ans=0.0 +2024-09-18 03:11:02,177 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:11:06,009 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.10 vs. limit=15.0 +2024-09-18 03:11:21,928 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.707e+01 8.428e+01 9.069e+01 9.579e+01 2.573e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-18 03:11:40,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=356820.0, ans=0.025 +2024-09-18 03:11:47,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.41 vs. limit=15.0 +2024-09-18 03:11:52,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=356860.0, ans=0.0 +2024-09-18 03:11:58,536 INFO [train.py:1198] (1/2) Epoch 20, batch 3250, loss[loss=0.2635, ctc_loss=0.1435, cr_loss=0.3741, attn_decoder_loss=0.2685, over 29687.00 frames. ], tot_loss[loss=0.2472, ctc_loss=0.1368, cr_loss=0.3795, attn_decoder_loss=0.2511, over 5801366.72 frames. ], batch size: 84, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:12:02,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356900.0, ans=0.1 +2024-09-18 03:12:24,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=356940.0, ans=0.025 +2024-09-18 03:12:27,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.57 vs. limit=12.0 +2024-09-18 03:12:33,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.94 vs. limit=15.0 +2024-09-18 03:12:37,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=356980.0, ans=0.125 +2024-09-18 03:12:37,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=356980.0, ans=0.1 +2024-09-18 03:12:52,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=357020.0, ans=0.125 +2024-09-18 03:12:54,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=357020.0, ans=0.125 +2024-09-18 03:12:54,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=357020.0, ans=0.125 +2024-09-18 03:12:58,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.79 vs. limit=6.0 +2024-09-18 03:13:01,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=357060.0, ans=0.2 +2024-09-18 03:13:10,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=357060.0, ans=0.125 +2024-09-18 03:13:11,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=357060.0, ans=0.07 +2024-09-18 03:13:15,869 INFO [train.py:1198] (1/2) Epoch 20, batch 3300, loss[loss=0.2529, ctc_loss=0.133, cr_loss=0.3531, attn_decoder_loss=0.2584, over 28461.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1362, cr_loss=0.3783, attn_decoder_loss=0.2499, over 5799208.25 frames. ], batch size: 111, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:13:26,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=357100.0, ans=0.1 +2024-09-18 03:13:34,942 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.45 vs. limit=12.0 +2024-09-18 03:13:46,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=357180.0, ans=0.1 +2024-09-18 03:13:55,058 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.624e+01 9.196e+01 9.884e+01 4.402e+02, threshold=1.839e+02, percent-clipped=2.0 +2024-09-18 03:13:59,887 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:14:15,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=357220.0, ans=0.125 +2024-09-18 03:14:20,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=357260.0, ans=0.05 +2024-09-18 03:14:28,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=357260.0, ans=0.125 +2024-09-18 03:14:33,202 INFO [train.py:1198] (1/2) Epoch 20, batch 3350, loss[loss=0.2569, ctc_loss=0.1487, cr_loss=0.416, attn_decoder_loss=0.2596, over 28724.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1373, cr_loss=0.3801, attn_decoder_loss=0.2508, over 5775883.71 frames. ], batch size: 104, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:14:38,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=357300.0, ans=0.025 +2024-09-18 03:14:38,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=357300.0, ans=0.0 +2024-09-18 03:14:58,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.00 vs. limit=22.5 +2024-09-18 03:15:25,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=357420.0, ans=0.0 +2024-09-18 03:15:29,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=357420.0, ans=0.125 +2024-09-18 03:15:41,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=357460.0, ans=0.1 +2024-09-18 03:15:48,967 INFO [train.py:1198] (1/2) Epoch 20, batch 3400, loss[loss=0.2212, ctc_loss=0.1146, cr_loss=0.3336, attn_decoder_loss=0.2256, over 29390.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1375, cr_loss=0.3801, attn_decoder_loss=0.2511, over 5766928.59 frames. ], batch size: 67, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:15:50,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=357500.0, ans=0.125 +2024-09-18 03:16:18,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.83 vs. limit=15.0 +2024-09-18 03:16:28,756 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 8.602e+01 9.311e+01 9.873e+01 3.083e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-18 03:16:30,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=357580.0, ans=0.0 +2024-09-18 03:16:50,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.81 vs. limit=22.5 +2024-09-18 03:17:07,403 INFO [train.py:1198] (1/2) Epoch 20, batch 3450, loss[loss=0.2643, ctc_loss=0.1465, cr_loss=0.381, attn_decoder_loss=0.269, over 28244.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1378, cr_loss=0.3805, attn_decoder_loss=0.2515, over 5774525.20 frames. ], batch size: 111, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:17:22,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=357740.0, ans=0.125 +2024-09-18 03:17:22,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=357740.0, ans=0.125 +2024-09-18 03:17:33,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=357740.0, ans=0.125 +2024-09-18 03:17:38,570 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.25 vs. limit=15.0 +2024-09-18 03:17:40,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=357780.0, ans=0.09899494936611666 +2024-09-18 03:17:43,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=357780.0, ans=0.125 +2024-09-18 03:18:02,570 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.35 vs. limit=15.0 +2024-09-18 03:18:16,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=357860.0, ans=0.0 +2024-09-18 03:18:22,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=357860.0, ans=0.125 +2024-09-18 03:18:25,109 INFO [train.py:1198] (1/2) Epoch 20, batch 3500, loss[loss=0.2169, ctc_loss=0.1167, cr_loss=0.3383, attn_decoder_loss=0.2205, over 29326.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1376, cr_loss=0.3806, attn_decoder_loss=0.251, over 5775483.23 frames. ], batch size: 71, lr: 5.54e-03, grad_scale: 8.0 +2024-09-18 03:18:29,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=357900.0, ans=0.125 +2024-09-18 03:19:01,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=357980.0, ans=0.09899494936611666 +2024-09-18 03:19:03,918 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.576e+01 9.185e+01 9.795e+01 1.651e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-18 03:19:13,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=358020.0, ans=0.1 +2024-09-18 03:19:28,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=358060.0, ans=0.125 +2024-09-18 03:19:39,918 INFO [train.py:1198] (1/2) Epoch 20, batch 3550, loss[loss=0.2553, ctc_loss=0.1427, cr_loss=0.3878, attn_decoder_loss=0.2591, over 29734.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.137, cr_loss=0.38, attn_decoder_loss=0.2507, over 5783363.08 frames. ], batch size: 89, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:19:48,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.38 vs. limit=15.0 +2024-09-18 03:19:53,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=358140.0, ans=0.125 +2024-09-18 03:19:54,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=358140.0, ans=0.0 +2024-09-18 03:20:11,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=358180.0, ans=0.0 +2024-09-18 03:20:14,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=358180.0, ans=0.025 +2024-09-18 03:20:23,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.77 vs. limit=12.0 +2024-09-18 03:20:26,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:27,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:29,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.81 vs. limit=15.0 +2024-09-18 03:20:31,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:32,000 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:20:33,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.17 vs. limit=15.0 +2024-09-18 03:20:34,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=358220.0, ans=0.0 +2024-09-18 03:20:36,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=358220.0, ans=0.125 +2024-09-18 03:20:48,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=358260.0, ans=0.125 +2024-09-18 03:20:50,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.65 vs. limit=10.0 +2024-09-18 03:20:53,743 INFO [train.py:1198] (1/2) Epoch 20, batch 3600, loss[loss=0.243, ctc_loss=0.133, cr_loss=0.3635, attn_decoder_loss=0.2472, over 29497.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1371, cr_loss=0.3797, attn_decoder_loss=0.2508, over 5791796.26 frames. ], batch size: 77, lr: 5.53e-03, grad_scale: 16.0 +2024-09-18 03:20:54,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=358300.0, ans=0.125 +2024-09-18 03:21:01,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=358300.0, ans=0.0 +2024-09-18 03:21:06,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=358300.0, ans=0.0 +2024-09-18 03:21:06,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=358300.0, ans=0.1 +2024-09-18 03:21:07,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=358340.0, ans=0.2 +2024-09-18 03:21:08,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=358340.0, ans=0.125 +2024-09-18 03:21:18,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.66 vs. limit=15.0 +2024-09-18 03:21:32,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.46 vs. limit=15.0 +2024-09-18 03:21:33,254 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.608e+01 9.165e+01 9.950e+01 3.634e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 03:22:01,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=358460.0, ans=0.0 +2024-09-18 03:22:10,961 INFO [train.py:1198] (1/2) Epoch 20, batch 3650, loss[loss=0.2582, ctc_loss=0.1449, cr_loss=0.3983, attn_decoder_loss=0.2619, over 29513.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1365, cr_loss=0.3788, attn_decoder_loss=0.2501, over 5792458.18 frames. ], batch size: 90, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:22:35,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=358540.0, ans=0.2 +2024-09-18 03:22:57,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=358620.0, ans=0.0 +2024-09-18 03:22:58,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=358620.0, ans=0.0 +2024-09-18 03:23:24,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=358700.0, ans=0.2 +2024-09-18 03:23:25,634 INFO [train.py:1198] (1/2) Epoch 20, batch 3700, loss[loss=0.2491, ctc_loss=0.1414, cr_loss=0.3907, attn_decoder_loss=0.2524, over 29706.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.1366, cr_loss=0.3798, attn_decoder_loss=0.2505, over 5802558.80 frames. ], batch size: 84, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:23:29,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=358700.0, ans=0.125 +2024-09-18 03:23:31,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=358700.0, ans=0.125 +2024-09-18 03:23:34,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=358700.0, ans=0.125 +2024-09-18 03:23:54,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.79 vs. limit=15.0 +2024-09-18 03:23:55,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=358780.0, ans=0.125 +2024-09-18 03:24:05,520 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 8.568e+01 9.154e+01 9.793e+01 1.686e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-18 03:24:31,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=358860.0, ans=0.1 +2024-09-18 03:24:35,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=358860.0, ans=0.125 +2024-09-18 03:24:41,704 INFO [train.py:1198] (1/2) Epoch 20, batch 3750, loss[loss=0.2158, ctc_loss=0.1159, cr_loss=0.3414, attn_decoder_loss=0.2193, over 29365.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1364, cr_loss=0.3796, attn_decoder_loss=0.2504, over 5806783.48 frames. ], batch size: 67, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:24:54,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.01 vs. limit=15.0 +2024-09-18 03:24:59,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=358940.0, ans=0.125 +2024-09-18 03:25:19,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=358980.0, ans=0.125 +2024-09-18 03:25:20,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=358980.0, ans=0.125 +2024-09-18 03:25:26,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=359020.0, ans=0.1 +2024-09-18 03:25:44,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=359060.0, ans=0.0 +2024-09-18 03:25:56,008 INFO [train.py:1198] (1/2) Epoch 20, batch 3800, loss[loss=0.2692, ctc_loss=0.1537, cr_loss=0.4258, attn_decoder_loss=0.2726, over 29608.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1363, cr_loss=0.3792, attn_decoder_loss=0.25, over 5796153.51 frames. ], batch size: 86, lr: 5.53e-03, grad_scale: 8.0 +2024-09-18 03:26:14,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.75 vs. limit=15.0 +2024-09-18 03:26:36,572 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.575e+01 9.018e+01 9.556e+01 1.555e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-18 03:26:54,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=359260.0, ans=0.125 +2024-09-18 03:26:56,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=359260.0, ans=0.0 +2024-09-18 03:27:03,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=359260.0, ans=0.025 +2024-09-18 03:27:06,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.00 vs. limit=15.0 +2024-09-18 03:27:10,604 INFO [train.py:1198] (1/2) Epoch 20, batch 3850, loss[loss=0.266, ctc_loss=0.1537, cr_loss=0.4222, attn_decoder_loss=0.2691, over 29288.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1358, cr_loss=0.3786, attn_decoder_loss=0.2497, over 5810988.60 frames. ], batch size: 100, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:27:21,951 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.83 vs. limit=15.0 +2024-09-18 03:27:24,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359340.0, ans=0.1 +2024-09-18 03:27:32,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.44 vs. limit=10.0 +2024-09-18 03:27:48,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=359380.0, ans=0.1 +2024-09-18 03:27:54,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=359380.0, ans=0.125 +2024-09-18 03:27:57,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=359420.0, ans=0.1 +2024-09-18 03:28:01,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=359420.0, ans=0.125 +2024-09-18 03:28:03,699 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.68 vs. limit=15.0 +2024-09-18 03:28:04,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=359420.0, ans=0.0 +2024-09-18 03:28:26,444 INFO [train.py:1198] (1/2) Epoch 20, batch 3900, loss[loss=0.2537, ctc_loss=0.1377, cr_loss=0.3959, attn_decoder_loss=0.2578, over 29636.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1361, cr_loss=0.3792, attn_decoder_loss=0.25, over 5815319.35 frames. ], batch size: 86, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:28:31,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=359500.0, ans=0.125 +2024-09-18 03:28:34,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=359500.0, ans=0.125 +2024-09-18 03:28:37,156 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:29:06,326 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.505e+01 9.019e+01 9.664e+01 2.565e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 03:29:06,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=359580.0, ans=0.0 +2024-09-18 03:29:11,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=359620.0, ans=0.125 +2024-09-18 03:29:11,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.18 vs. limit=15.0 +2024-09-18 03:29:22,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=359620.0, ans=0.0 +2024-09-18 03:29:34,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=359660.0, ans=0.025 +2024-09-18 03:29:40,553 INFO [train.py:1198] (1/2) Epoch 20, batch 3950, loss[loss=0.2605, ctc_loss=0.1547, cr_loss=0.4219, attn_decoder_loss=0.2628, over 29475.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.136, cr_loss=0.3799, attn_decoder_loss=0.2501, over 5834870.71 frames. ], batch size: 97, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:30:15,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten.whitening_limit, batch_count=359780.0, ans=15.0 +2024-09-18 03:30:35,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.88 vs. limit=15.0 +2024-09-18 03:30:35,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=359820.0, ans=0.125 +2024-09-18 03:30:38,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=359820.0, ans=0.0 +2024-09-18 03:30:56,092 INFO [train.py:1198] (1/2) Epoch 20, batch 4000, loss[loss=0.2369, ctc_loss=0.127, cr_loss=0.3695, attn_decoder_loss=0.2409, over 29498.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1363, cr_loss=0.3798, attn_decoder_loss=0.25, over 5811658.90 frames. ], batch size: 74, lr: 5.52e-03, grad_scale: 16.0 +2024-09-18 03:30:56,437 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:31:09,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=359940.0, ans=0.2 +2024-09-18 03:31:10,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.76 vs. limit=22.5 +2024-09-18 03:31:28,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=359980.0, ans=0.125 +2024-09-18 03:31:38,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.709e+01 8.706e+01 9.188e+01 9.943e+01 2.259e+02, threshold=1.838e+02, percent-clipped=3.0 +2024-09-18 03:31:49,392 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.98 vs. limit=12.0 +2024-09-18 03:32:09,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=360100.0, ans=0.04949747468305833 +2024-09-18 03:32:10,754 INFO [train.py:1198] (1/2) Epoch 20, batch 4050, loss[loss=0.2841, ctc_loss=0.1873, cr_loss=0.4312, attn_decoder_loss=0.2853, over 20286.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1366, cr_loss=0.3799, attn_decoder_loss=0.25, over 5796573.27 frames. ], batch size: 210, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:32:17,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.09 vs. limit=10.0 +2024-09-18 03:32:35,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=360140.0, ans=0.125 +2024-09-18 03:32:48,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=360180.0, ans=0.125 +2024-09-18 03:32:49,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=360180.0, ans=0.125 +2024-09-18 03:33:25,675 INFO [train.py:1198] (1/2) Epoch 20, batch 4100, loss[loss=0.2635, ctc_loss=0.1535, cr_loss=0.4275, attn_decoder_loss=0.2662, over 29532.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1368, cr_loss=0.3798, attn_decoder_loss=0.2503, over 5791828.86 frames. ], batch size: 90, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:33:34,145 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.45 vs. limit=5.0 +2024-09-18 03:33:58,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=360380.0, ans=0.125 +2024-09-18 03:34:06,719 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.554e+01 9.204e+01 1.015e+02 1.958e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 03:34:17,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=360420.0, ans=0.125 +2024-09-18 03:34:24,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.21 vs. limit=6.0 +2024-09-18 03:34:33,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=360460.0, ans=0.125 +2024-09-18 03:34:40,410 INFO [train.py:1198] (1/2) Epoch 20, batch 4150, loss[loss=0.2425, ctc_loss=0.1373, cr_loss=0.3778, attn_decoder_loss=0.2458, over 29509.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1369, cr_loss=0.3804, attn_decoder_loss=0.2502, over 5798470.96 frames. ], batch size: 77, lr: 5.52e-03, grad_scale: 8.0 +2024-09-18 03:34:48,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=360500.0, ans=0.125 +2024-09-18 03:34:55,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=360540.0, ans=0.025 +2024-09-18 03:35:10,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.50 vs. limit=15.0 +2024-09-18 03:35:15,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=360580.0, ans=0.2 +2024-09-18 03:35:32,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=360620.0, ans=0.0 +2024-09-18 03:35:44,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.67 vs. limit=15.0 +2024-09-18 03:35:47,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=7.08 vs. limit=15.0 +2024-09-18 03:35:53,871 INFO [train.py:1198] (1/2) Epoch 20, batch 4200, loss[loss=0.2654, ctc_loss=0.154, cr_loss=0.4039, attn_decoder_loss=0.2688, over 29507.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1363, cr_loss=0.379, attn_decoder_loss=0.2503, over 5799802.27 frames. ], batch size: 90, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:35:54,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=360700.0, ans=0.125 +2024-09-18 03:36:25,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.25 vs. limit=22.5 +2024-09-18 03:36:36,270 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.598e+01 9.049e+01 1.004e+02 1.437e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 03:36:36,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.76 vs. limit=12.0 +2024-09-18 03:36:41,114 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:36:41,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=360820.0, ans=0.09899494936611666 +2024-09-18 03:36:53,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=360860.0, ans=0.125 +2024-09-18 03:36:57,787 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=12.0 +2024-09-18 03:36:58,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=360860.0, ans=0.125 +2024-09-18 03:37:09,126 INFO [train.py:1198] (1/2) Epoch 20, batch 4250, loss[loss=0.2282, ctc_loss=0.1197, cr_loss=0.3443, attn_decoder_loss=0.2326, over 29493.00 frames. ], tot_loss[loss=0.2466, ctc_loss=0.1362, cr_loss=0.3792, attn_decoder_loss=0.2505, over 5805985.17 frames. ], batch size: 74, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:37:09,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=360900.0, ans=0.125 +2024-09-18 03:37:29,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.77 vs. limit=22.5 +2024-09-18 03:37:46,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=360980.0, ans=0.125 +2024-09-18 03:37:58,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=361020.0, ans=0.125 +2024-09-18 03:37:59,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=361020.0, ans=0.125 +2024-09-18 03:38:23,852 INFO [train.py:1198] (1/2) Epoch 20, batch 4300, loss[loss=0.2531, ctc_loss=0.1371, cr_loss=0.3614, attn_decoder_loss=0.258, over 29562.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1361, cr_loss=0.3789, attn_decoder_loss=0.2507, over 5795120.06 frames. ], batch size: 87, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:38:24,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.73 vs. limit=12.0 +2024-09-18 03:38:28,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=361100.0, ans=0.0 +2024-09-18 03:38:44,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=361140.0, ans=0.2 +2024-09-18 03:39:05,354 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.631e+01 8.736e+01 9.238e+01 9.877e+01 2.557e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 03:39:10,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=361220.0, ans=0.0 +2024-09-18 03:39:38,011 INFO [train.py:1198] (1/2) Epoch 20, batch 4350, loss[loss=0.2705, ctc_loss=0.1479, cr_loss=0.4012, attn_decoder_loss=0.2753, over 29522.00 frames. ], tot_loss[loss=0.2502, ctc_loss=0.139, cr_loss=0.3842, attn_decoder_loss=0.254, over 5797338.81 frames. ], batch size: 97, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:39:44,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=361300.0, ans=0.025 +2024-09-18 03:39:47,054 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.11 vs. limit=15.0 +2024-09-18 03:39:58,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=361340.0, ans=0.125 +2024-09-18 03:40:00,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=361340.0, ans=0.125 +2024-09-18 03:40:06,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=361380.0, ans=0.125 +2024-09-18 03:40:21,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.09 vs. limit=15.0 +2024-09-18 03:40:29,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-18 03:40:36,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.69 vs. limit=15.0 +2024-09-18 03:40:50,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=361500.0, ans=0.0 +2024-09-18 03:40:51,530 INFO [train.py:1198] (1/2) Epoch 20, batch 4400, loss[loss=0.2586, ctc_loss=0.153, cr_loss=0.3917, attn_decoder_loss=0.2616, over 27409.00 frames. ], tot_loss[loss=0.2522, ctc_loss=0.1406, cr_loss=0.387, attn_decoder_loss=0.256, over 5766900.26 frames. ], batch size: 125, lr: 5.51e-03, grad_scale: 16.0 +2024-09-18 03:41:28,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.35 vs. limit=15.0 +2024-09-18 03:41:34,934 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.117e+01 8.833e+01 9.166e+01 9.784e+01 1.631e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 03:41:50,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=10.59 vs. limit=12.0 +2024-09-18 03:42:01,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.30 vs. limit=15.0 +2024-09-18 03:42:02,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=361660.0, ans=0.125 +2024-09-18 03:42:05,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=361700.0, ans=0.2 +2024-09-18 03:42:06,988 INFO [train.py:1198] (1/2) Epoch 20, batch 4450, loss[loss=0.2761, ctc_loss=0.1758, cr_loss=0.4038, attn_decoder_loss=0.2782, over 20234.00 frames. ], tot_loss[loss=0.2551, ctc_loss=0.145, cr_loss=0.3919, attn_decoder_loss=0.2586, over 5573449.65 frames. ], batch size: 210, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:42:16,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=361700.0, ans=0.125 +2024-09-18 03:42:28,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=361740.0, ans=0.125 +2024-09-18 03:42:39,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=361780.0, ans=0.125 +2024-09-18 03:42:48,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=361780.0, ans=0.1 +2024-09-18 03:43:11,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=361860.0, ans=0.0 +2024-09-18 03:43:22,862 INFO [train.py:1198] (1/2) Epoch 20, batch 4500, loss[loss=0.2658, ctc_loss=0.1689, cr_loss=0.3874, attn_decoder_loss=0.2679, over 20263.00 frames. ], tot_loss[loss=0.2579, ctc_loss=0.1498, cr_loss=0.3948, attn_decoder_loss=0.2611, over 5234671.82 frames. ], batch size: 210, lr: 5.51e-03, grad_scale: 8.0 +2024-09-18 03:43:33,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=361900.0, ans=0.1 +2024-09-18 03:43:34,515 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.02 vs. limit=6.0 +2024-09-18 03:43:50,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=14.20 vs. limit=15.0 +2024-09-18 03:44:52,331 INFO [train.py:1198] (1/2) Epoch 21, batch 0, loss[loss=0.2261, ctc_loss=0.1132, cr_loss=0.3427, attn_decoder_loss=0.2311, over 29620.00 frames. ], tot_loss[loss=0.2261, ctc_loss=0.1132, cr_loss=0.3427, attn_decoder_loss=0.2311, over 29620.00 frames. ], batch size: 73, lr: 5.37e-03, grad_scale: 16.0 +2024-09-18 03:44:52,331 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 03:45:10,775 INFO [train.py:1230] (1/2) Epoch 21, validation: loss=0.2126, ctc_loss=0.0391, cr_loss=5.275e-15, attn_decoder_loss=0.2319, over 944034.00 frames. +2024-09-18 03:45:10,775 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 03:45:19,728 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.930e+01 1.076e+02 1.145e+02 1.241e+02 1.705e+02, threshold=2.291e+02, percent-clipped=0.0 +2024-09-18 03:45:42,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=362080.0, ans=0.125 +2024-09-18 03:46:01,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=362120.0, ans=0.1 +2024-09-18 03:46:11,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=362160.0, ans=0.125 +2024-09-18 03:46:19,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=362160.0, ans=0.0 +2024-09-18 03:46:24,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=362160.0, ans=0.0 +2024-09-18 03:46:28,338 INFO [train.py:1198] (1/2) Epoch 21, batch 50, loss[loss=0.2173, ctc_loss=0.1103, cr_loss=0.3455, attn_decoder_loss=0.2215, over 29446.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1389, cr_loss=0.3837, attn_decoder_loss=0.2518, over 1267937.69 frames. ], batch size: 70, lr: 5.37e-03, grad_scale: 8.0 +2024-09-18 03:46:30,196 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:46:30,789 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.44 vs. limit=15.0 +2024-09-18 03:47:10,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.16 vs. limit=15.0 +2024-09-18 03:47:26,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=362320.0, ans=0.0 +2024-09-18 03:47:38,139 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.66 vs. limit=12.0 +2024-09-18 03:47:44,304 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.06 vs. limit=10.0 +2024-09-18 03:47:46,710 INFO [train.py:1198] (1/2) Epoch 21, batch 100, loss[loss=0.2458, ctc_loss=0.1333, cr_loss=0.3756, attn_decoder_loss=0.2499, over 29540.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1401, cr_loss=0.3863, attn_decoder_loss=0.2537, over 2251903.63 frames. ], batch size: 76, lr: 5.37e-03, grad_scale: 8.0 +2024-09-18 03:47:55,559 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.793e+01 9.358e+01 9.884e+01 2.727e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-18 03:47:56,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=10.05 vs. limit=15.0 +2024-09-18 03:48:00,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=362440.0, ans=0.125 +2024-09-18 03:48:16,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=362480.0, ans=0.0 +2024-09-18 03:48:17,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=362480.0, ans=0.1 +2024-09-18 03:48:31,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=362520.0, ans=0.07 +2024-09-18 03:48:32,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.92 vs. limit=15.0 +2024-09-18 03:48:48,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=362560.0, ans=0.05 +2024-09-18 03:49:01,017 INFO [train.py:1198] (1/2) Epoch 21, batch 150, loss[loss=0.2139, ctc_loss=0.1204, cr_loss=0.3497, attn_decoder_loss=0.2165, over 29420.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1363, cr_loss=0.3787, attn_decoder_loss=0.2504, over 3046567.10 frames. ], batch size: 70, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:49:13,660 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:49:15,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=362640.0, ans=0.1 +2024-09-18 03:49:24,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=362640.0, ans=0.125 +2024-09-18 03:49:40,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=362680.0, ans=0.0 +2024-09-18 03:49:55,409 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:50:09,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.40 vs. limit=15.0 +2024-09-18 03:50:18,545 INFO [train.py:1198] (1/2) Epoch 21, batch 200, loss[loss=0.2672, ctc_loss=0.1573, cr_loss=0.4149, attn_decoder_loss=0.2702, over 27157.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1359, cr_loss=0.3789, attn_decoder_loss=0.2498, over 3658875.53 frames. ], batch size: 124, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:50:26,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=362800.0, ans=0.125 +2024-09-18 03:50:27,601 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.138e+01 8.461e+01 9.001e+01 9.601e+01 1.394e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-18 03:50:43,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=362840.0, ans=0.0 +2024-09-18 03:50:43,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-18 03:50:52,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.85 vs. limit=22.5 +2024-09-18 03:50:57,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=362880.0, ans=0.125 +2024-09-18 03:51:04,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=362880.0, ans=0.125 +2024-09-18 03:51:05,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=362920.0, ans=0.09899494936611666 +2024-09-18 03:51:07,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=362920.0, ans=0.0 +2024-09-18 03:51:09,494 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.76 vs. limit=15.0 +2024-09-18 03:51:37,224 INFO [train.py:1198] (1/2) Epoch 21, batch 250, loss[loss=0.2634, ctc_loss=0.1498, cr_loss=0.4251, attn_decoder_loss=0.2666, over 29165.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1357, cr_loss=0.3793, attn_decoder_loss=0.2497, over 4140081.45 frames. ], batch size: 100, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:51:45,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=363000.0, ans=0.2 +2024-09-18 03:52:05,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.88 vs. limit=15.0 +2024-09-18 03:52:12,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=363080.0, ans=0.035 +2024-09-18 03:52:36,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=363160.0, ans=0.1 +2024-09-18 03:52:53,505 INFO [train.py:1198] (1/2) Epoch 21, batch 300, loss[loss=0.2653, ctc_loss=0.159, cr_loss=0.4155, attn_decoder_loss=0.2679, over 29493.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1354, cr_loss=0.379, attn_decoder_loss=0.2496, over 4510453.95 frames. ], batch size: 92, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:52:54,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=363200.0, ans=15.0 +2024-09-18 03:52:58,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=363200.0, ans=0.125 +2024-09-18 03:53:02,574 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.788e+01 8.424e+01 9.085e+01 9.553e+01 2.134e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-18 03:53:13,765 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:53:22,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=363280.0, ans=0.1 +2024-09-18 03:53:22,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=363280.0, ans=0.2 +2024-09-18 03:53:31,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=363280.0, ans=0.125 +2024-09-18 03:53:44,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.37 vs. limit=10.0 +2024-09-18 03:54:03,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=363360.0, ans=0.2 +2024-09-18 03:54:11,653 INFO [train.py:1198] (1/2) Epoch 21, batch 350, loss[loss=0.23, ctc_loss=0.125, cr_loss=0.3528, attn_decoder_loss=0.2338, over 29345.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1358, cr_loss=0.3795, attn_decoder_loss=0.2499, over 4796980.01 frames. ], batch size: 71, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:54:30,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=363440.0, ans=0.1 +2024-09-18 03:54:37,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=363440.0, ans=0.125 +2024-09-18 03:55:29,552 INFO [train.py:1198] (1/2) Epoch 21, batch 400, loss[loss=0.2536, ctc_loss=0.1467, cr_loss=0.3966, attn_decoder_loss=0.2567, over 29713.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1355, cr_loss=0.3785, attn_decoder_loss=0.2497, over 5023479.03 frames. ], batch size: 82, lr: 5.36e-03, grad_scale: 16.0 +2024-09-18 03:55:38,686 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.497e+01 9.045e+01 9.813e+01 2.448e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 03:55:45,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=363640.0, ans=0.035 +2024-09-18 03:56:12,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=363680.0, ans=0.0 +2024-09-18 03:56:39,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=363760.0, ans=0.0 +2024-09-18 03:56:41,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=363760.0, ans=0.0 +2024-09-18 03:56:45,199 INFO [train.py:1198] (1/2) Epoch 21, batch 450, loss[loss=0.2454, ctc_loss=0.1282, cr_loss=0.3506, attn_decoder_loss=0.2506, over 29687.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1358, cr_loss=0.3789, attn_decoder_loss=0.25, over 5185122.53 frames. ], batch size: 83, lr: 5.36e-03, grad_scale: 8.0 +2024-09-18 03:56:54,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=363800.0, ans=0.0 +2024-09-18 03:56:57,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=363800.0, ans=0.125 +2024-09-18 03:57:06,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=363840.0, ans=0.1 +2024-09-18 03:57:08,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=363840.0, ans=0.0 +2024-09-18 03:57:22,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-18 03:57:52,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=363960.0, ans=0.0 +2024-09-18 03:57:54,827 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.55 vs. limit=15.0 +2024-09-18 03:58:01,714 INFO [train.py:1198] (1/2) Epoch 21, batch 500, loss[loss=0.2636, ctc_loss=0.1493, cr_loss=0.408, attn_decoder_loss=0.2672, over 29404.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.135, cr_loss=0.3782, attn_decoder_loss=0.2494, over 5329033.05 frames. ], batch size: 94, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 03:58:05,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.84 vs. limit=12.0 +2024-09-18 03:58:14,709 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.455e+01 8.968e+01 9.588e+01 2.224e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-18 03:58:18,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=364040.0, ans=0.125 +2024-09-18 03:58:31,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=364040.0, ans=0.0 +2024-09-18 03:58:32,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.77 vs. limit=15.0 +2024-09-18 03:58:47,732 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 03:59:16,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=364160.0, ans=0.125 +2024-09-18 03:59:22,165 INFO [train.py:1198] (1/2) Epoch 21, batch 550, loss[loss=0.2635, ctc_loss=0.1403, cr_loss=0.3815, attn_decoder_loss=0.2687, over 28901.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1351, cr_loss=0.3777, attn_decoder_loss=0.2497, over 5420949.85 frames. ], batch size: 104, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 03:59:31,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=364200.0, ans=0.125 +2024-09-18 03:59:40,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=364240.0, ans=0.0 +2024-09-18 03:59:45,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=364240.0, ans=6.0 +2024-09-18 04:00:09,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=364320.0, ans=0.2 +2024-09-18 04:00:23,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=364360.0, ans=10.0 +2024-09-18 04:00:38,401 INFO [train.py:1198] (1/2) Epoch 21, batch 600, loss[loss=0.2624, ctc_loss=0.1427, cr_loss=0.4014, attn_decoder_loss=0.2668, over 29305.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1354, cr_loss=0.3785, attn_decoder_loss=0.2499, over 5507914.81 frames. ], batch size: 100, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:00:39,378 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.51 vs. limit=15.0 +2024-09-18 04:00:43,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=364400.0, ans=0.05 +2024-09-18 04:00:48,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.547e+01 9.115e+01 9.764e+01 2.691e+02, threshold=1.823e+02, percent-clipped=3.0 +2024-09-18 04:00:56,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=364440.0, ans=0.1 +2024-09-18 04:01:08,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=364480.0, ans=0.1 +2024-09-18 04:01:23,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=364520.0, ans=0.125 +2024-09-18 04:01:42,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=364560.0, ans=0.0 +2024-09-18 04:01:49,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=364560.0, ans=0.1 +2024-09-18 04:01:53,841 INFO [train.py:1198] (1/2) Epoch 21, batch 650, loss[loss=0.2358, ctc_loss=0.1206, cr_loss=0.3486, attn_decoder_loss=0.2409, over 29768.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1342, cr_loss=0.3767, attn_decoder_loss=0.2488, over 5585798.05 frames. ], batch size: 81, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:02:01,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=364600.0, ans=0.125 +2024-09-18 04:02:23,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=364640.0, ans=0.0 +2024-09-18 04:02:27,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=364680.0, ans=0.025 +2024-09-18 04:03:14,772 INFO [train.py:1198] (1/2) Epoch 21, batch 700, loss[loss=0.2344, ctc_loss=0.1263, cr_loss=0.3554, attn_decoder_loss=0.2385, over 29540.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1347, cr_loss=0.3773, attn_decoder_loss=0.2495, over 5636347.20 frames. ], batch size: 76, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:03:21,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=364800.0, ans=0.025 +2024-09-18 04:03:24,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=364800.0, ans=0.1 +2024-09-18 04:03:25,121 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.583e+01 8.553e+01 9.088e+01 9.665e+01 1.426e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 04:04:22,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.90 vs. limit=15.0 +2024-09-18 04:04:30,487 INFO [train.py:1198] (1/2) Epoch 21, batch 750, loss[loss=0.2595, ctc_loss=0.1446, cr_loss=0.4154, attn_decoder_loss=0.2631, over 29710.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1349, cr_loss=0.3773, attn_decoder_loss=0.2492, over 5673251.65 frames. ], batch size: 82, lr: 5.35e-03, grad_scale: 8.0 +2024-09-18 04:04:38,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=365000.0, ans=0.0 +2024-09-18 04:04:39,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=365000.0, ans=0.125 +2024-09-18 04:04:45,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=365040.0, ans=0.125 +2024-09-18 04:04:51,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=365040.0, ans=0.0 +2024-09-18 04:04:52,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.80 vs. limit=15.0 +2024-09-18 04:04:56,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=365040.0, ans=0.0 +2024-09-18 04:05:28,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=365120.0, ans=0.125 +2024-09-18 04:05:31,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=365160.0, ans=0.125 +2024-09-18 04:05:45,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=365200.0, ans=0.2 +2024-09-18 04:05:46,211 INFO [train.py:1198] (1/2) Epoch 21, batch 800, loss[loss=0.2238, ctc_loss=0.1122, cr_loss=0.3268, attn_decoder_loss=0.2289, over 29596.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.135, cr_loss=0.3772, attn_decoder_loss=0.2492, over 5704299.55 frames. ], batch size: 73, lr: 5.35e-03, grad_scale: 16.0 +2024-09-18 04:05:46,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=365200.0, ans=0.125 +2024-09-18 04:05:56,666 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.575e+01 9.275e+01 9.797e+01 6.839e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-18 04:06:18,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=365280.0, ans=0.125 +2024-09-18 04:06:33,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=365280.0, ans=0.95 +2024-09-18 04:06:41,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=365320.0, ans=0.0 +2024-09-18 04:06:44,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=365320.0, ans=0.09899494936611666 +2024-09-18 04:07:06,320 INFO [train.py:1198] (1/2) Epoch 21, batch 850, loss[loss=0.2463, ctc_loss=0.1361, cr_loss=0.3769, attn_decoder_loss=0.2502, over 29729.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1348, cr_loss=0.3771, attn_decoder_loss=0.249, over 5733455.53 frames. ], batch size: 89, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:07:15,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=365400.0, ans=0.2 +2024-09-18 04:07:26,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=365440.0, ans=0.125 +2024-09-18 04:07:26,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=365440.0, ans=0.0 +2024-09-18 04:07:51,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.98 vs. limit=12.0 +2024-09-18 04:08:22,627 INFO [train.py:1198] (1/2) Epoch 21, batch 900, loss[loss=0.2237, ctc_loss=0.1178, cr_loss=0.3435, attn_decoder_loss=0.2278, over 29607.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.135, cr_loss=0.3771, attn_decoder_loss=0.2494, over 5738002.31 frames. ], batch size: 73, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:08:28,054 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.08 vs. limit=22.5 +2024-09-18 04:08:32,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.61 vs. limit=12.0 +2024-09-18 04:08:34,646 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.573e+01 9.119e+01 9.639e+01 3.066e+02, threshold=1.824e+02, percent-clipped=3.0 +2024-09-18 04:08:36,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=365640.0, ans=0.025 +2024-09-18 04:08:55,490 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.49 vs. limit=15.0 +2024-09-18 04:09:22,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.01 vs. limit=15.0 +2024-09-18 04:09:31,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.42 vs. limit=6.0 +2024-09-18 04:09:38,095 INFO [train.py:1198] (1/2) Epoch 21, batch 950, loss[loss=0.2303, ctc_loss=0.1237, cr_loss=0.3555, attn_decoder_loss=0.2343, over 29528.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1351, cr_loss=0.3767, attn_decoder_loss=0.2494, over 5741431.98 frames. ], batch size: 74, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:09:38,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=365800.0, ans=0.1 +2024-09-18 04:09:59,810 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.46 vs. limit=10.0 +2024-09-18 04:10:13,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=365880.0, ans=0.125 +2024-09-18 04:10:20,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=365880.0, ans=0.09899494936611666 +2024-09-18 04:10:32,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=365920.0, ans=0.1 +2024-09-18 04:10:41,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=365960.0, ans=0.125 +2024-09-18 04:10:44,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.34 vs. limit=15.0 +2024-09-18 04:10:48,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-18 04:10:58,230 INFO [train.py:1198] (1/2) Epoch 21, batch 1000, loss[loss=0.2257, ctc_loss=0.1152, cr_loss=0.341, attn_decoder_loss=0.2304, over 29509.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1359, cr_loss=0.3783, attn_decoder_loss=0.2502, over 5736789.28 frames. ], batch size: 77, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:11:10,224 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.656e+01 8.708e+01 9.150e+01 9.911e+01 2.107e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-18 04:11:15,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.07 vs. limit=22.5 +2024-09-18 04:11:16,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=366040.0, ans=0.125 +2024-09-18 04:11:29,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=366080.0, ans=0.2 +2024-09-18 04:11:38,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.26 vs. limit=10.0 +2024-09-18 04:12:03,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=366160.0, ans=0.125 +2024-09-18 04:12:05,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=366160.0, ans=0.125 +2024-09-18 04:12:13,877 INFO [train.py:1198] (1/2) Epoch 21, batch 1050, loss[loss=0.2465, ctc_loss=0.1279, cr_loss=0.3704, attn_decoder_loss=0.2515, over 29677.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1355, cr_loss=0.3774, attn_decoder_loss=0.2495, over 5744721.41 frames. ], batch size: 85, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:12:20,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.78 vs. limit=22.5 +2024-09-18 04:12:23,890 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=8.06 vs. limit=15.0 +2024-09-18 04:12:39,301 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.78 vs. limit=6.0 +2024-09-18 04:13:04,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.90 vs. limit=15.0 +2024-09-18 04:13:07,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=366320.0, ans=0.125 +2024-09-18 04:13:21,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.01 vs. limit=15.0 +2024-09-18 04:13:30,129 INFO [train.py:1198] (1/2) Epoch 21, batch 1100, loss[loss=0.2481, ctc_loss=0.1337, cr_loss=0.3776, attn_decoder_loss=0.2524, over 29461.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1352, cr_loss=0.3768, attn_decoder_loss=0.2493, over 5756203.77 frames. ], batch size: 78, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:13:35,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.83 vs. limit=22.5 +2024-09-18 04:13:42,197 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.489e+01 9.148e+01 9.741e+01 7.755e+02, threshold=1.830e+02, percent-clipped=3.0 +2024-09-18 04:13:50,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.26 vs. limit=15.0 +2024-09-18 04:14:33,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=366560.0, ans=0.125 +2024-09-18 04:14:50,298 INFO [train.py:1198] (1/2) Epoch 21, batch 1150, loss[loss=0.2444, ctc_loss=0.1399, cr_loss=0.4013, attn_decoder_loss=0.2471, over 29443.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1352, cr_loss=0.3777, attn_decoder_loss=0.2493, over 5754278.46 frames. ], batch size: 78, lr: 5.34e-03, grad_scale: 8.0 +2024-09-18 04:15:05,021 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.86 vs. limit=22.5 +2024-09-18 04:15:07,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=366640.0, ans=0.125 +2024-09-18 04:15:20,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=366680.0, ans=0.125 +2024-09-18 04:15:22,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=366680.0, ans=0.1 +2024-09-18 04:15:38,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.60 vs. limit=15.0 +2024-09-18 04:15:39,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=366720.0, ans=0.05 +2024-09-18 04:16:05,930 INFO [train.py:1198] (1/2) Epoch 21, batch 1200, loss[loss=0.2482, ctc_loss=0.1244, cr_loss=0.3533, attn_decoder_loss=0.2541, over 29680.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1356, cr_loss=0.3783, attn_decoder_loss=0.25, over 5745884.24 frames. ], batch size: 85, lr: 5.33e-03, grad_scale: 16.0 +2024-09-18 04:16:18,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=366800.0, ans=0.125 +2024-09-18 04:16:19,653 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.603e+01 9.203e+01 9.910e+01 1.694e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-18 04:16:27,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=366840.0, ans=0.125 +2024-09-18 04:16:33,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=366840.0, ans=0.025 +2024-09-18 04:16:35,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=366880.0, ans=0.0 +2024-09-18 04:16:38,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.19 vs. limit=15.0 +2024-09-18 04:16:39,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=366880.0, ans=0.125 +2024-09-18 04:16:41,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=366880.0, ans=0.1 +2024-09-18 04:16:41,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=366880.0, ans=0.07 +2024-09-18 04:16:41,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=366880.0, ans=0.025 +2024-09-18 04:16:41,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.01 vs. limit=22.5 +2024-09-18 04:16:45,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=366880.0, ans=0.025 +2024-09-18 04:17:03,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.73 vs. limit=15.0 +2024-09-18 04:17:07,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=366960.0, ans=0.1 +2024-09-18 04:17:20,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=367000.0, ans=0.0 +2024-09-18 04:17:22,064 INFO [train.py:1198] (1/2) Epoch 21, batch 1250, loss[loss=0.2687, ctc_loss=0.1524, cr_loss=0.4125, attn_decoder_loss=0.2725, over 29519.00 frames. ], tot_loss[loss=0.247, ctc_loss=0.1362, cr_loss=0.3799, attn_decoder_loss=0.2509, over 5773772.44 frames. ], batch size: 92, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:17:27,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=367000.0, ans=0.07 +2024-09-18 04:17:29,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=367000.0, ans=0.0 +2024-09-18 04:18:19,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=367120.0, ans=0.2 +2024-09-18 04:18:41,102 INFO [train.py:1198] (1/2) Epoch 21, batch 1300, loss[loss=0.2556, ctc_loss=0.1349, cr_loss=0.3812, attn_decoder_loss=0.2606, over 28552.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1356, cr_loss=0.3792, attn_decoder_loss=0.2501, over 5780485.29 frames. ], batch size: 112, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:18:54,774 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.475e+01 9.131e+01 9.688e+01 1.292e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-18 04:19:10,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.02 vs. limit=15.0 +2024-09-18 04:19:10,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.98 vs. limit=10.0 +2024-09-18 04:19:16,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=367280.0, ans=0.025 +2024-09-18 04:19:33,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=367320.0, ans=0.2 +2024-09-18 04:19:40,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.29 vs. limit=15.0 +2024-09-18 04:19:55,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=367400.0, ans=0.125 +2024-09-18 04:19:57,012 INFO [train.py:1198] (1/2) Epoch 21, batch 1350, loss[loss=0.244, ctc_loss=0.1299, cr_loss=0.3742, attn_decoder_loss=0.2483, over 29794.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1351, cr_loss=0.3784, attn_decoder_loss=0.25, over 5796255.89 frames. ], batch size: 81, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:20:09,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=367400.0, ans=0.125 +2024-09-18 04:20:20,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=367440.0, ans=0.125 +2024-09-18 04:20:42,498 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:21:02,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=367560.0, ans=0.0 +2024-09-18 04:21:12,476 INFO [train.py:1198] (1/2) Epoch 21, batch 1400, loss[loss=0.2111, ctc_loss=0.1134, cr_loss=0.3311, attn_decoder_loss=0.2146, over 29589.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1351, cr_loss=0.3784, attn_decoder_loss=0.2498, over 5807647.88 frames. ], batch size: 69, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:21:25,901 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.911e+01 8.438e+01 9.001e+01 9.853e+01 2.309e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 04:21:32,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=367640.0, ans=0.1 +2024-09-18 04:21:50,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=367680.0, ans=0.125 +2024-09-18 04:22:08,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=367720.0, ans=0.0 +2024-09-18 04:22:17,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=367760.0, ans=10.0 +2024-09-18 04:22:22,532 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.90 vs. limit=15.0 +2024-09-18 04:22:25,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.26 vs. limit=15.0 +2024-09-18 04:22:26,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=367760.0, ans=0.125 +2024-09-18 04:22:32,117 INFO [train.py:1198] (1/2) Epoch 21, batch 1450, loss[loss=0.2606, ctc_loss=0.146, cr_loss=0.3955, attn_decoder_loss=0.2646, over 29416.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1351, cr_loss=0.3788, attn_decoder_loss=0.2501, over 5804794.23 frames. ], batch size: 94, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:22:44,451 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:22:44,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=367800.0, ans=0.05 +2024-09-18 04:22:46,117 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:22:55,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=367840.0, ans=0.125 +2024-09-18 04:23:19,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=367920.0, ans=0.0 +2024-09-18 04:23:23,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=367920.0, ans=0.125 +2024-09-18 04:23:31,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=367960.0, ans=0.0 +2024-09-18 04:23:43,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=367960.0, ans=0.5 +2024-09-18 04:23:55,037 INFO [train.py:1198] (1/2) Epoch 21, batch 1500, loss[loss=0.2553, ctc_loss=0.1397, cr_loss=0.3864, attn_decoder_loss=0.2596, over 29618.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1351, cr_loss=0.3792, attn_decoder_loss=0.2502, over 5807549.13 frames. ], batch size: 86, lr: 5.33e-03, grad_scale: 8.0 +2024-09-18 04:24:08,785 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.602e+01 9.157e+01 9.632e+01 2.068e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-18 04:24:36,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=368080.0, ans=0.125 +2024-09-18 04:24:44,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=368120.0, ans=0.04949747468305833 +2024-09-18 04:24:46,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.06 vs. limit=22.5 +2024-09-18 04:24:47,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=368120.0, ans=0.025 +2024-09-18 04:24:53,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=368120.0, ans=0.0 +2024-09-18 04:24:53,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=368120.0, ans=0.2 +2024-09-18 04:25:11,429 INFO [train.py:1198] (1/2) Epoch 21, batch 1550, loss[loss=0.2699, ctc_loss=0.1599, cr_loss=0.4197, attn_decoder_loss=0.2728, over 29518.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1357, cr_loss=0.3793, attn_decoder_loss=0.2503, over 5782133.84 frames. ], batch size: 90, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:25:20,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=368200.0, ans=0.0 +2024-09-18 04:25:41,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.65 vs. limit=6.0 +2024-09-18 04:25:54,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=368280.0, ans=0.125 +2024-09-18 04:26:09,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=368320.0, ans=0.0 +2024-09-18 04:26:13,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=368320.0, ans=0.0 +2024-09-18 04:26:23,521 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.49 vs. limit=15.0 +2024-09-18 04:26:31,487 INFO [train.py:1198] (1/2) Epoch 21, batch 1600, loss[loss=0.2572, ctc_loss=0.1339, cr_loss=0.3699, attn_decoder_loss=0.2627, over 29691.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1356, cr_loss=0.3783, attn_decoder_loss=0.2502, over 5764362.24 frames. ], batch size: 85, lr: 5.32e-03, grad_scale: 16.0 +2024-09-18 04:26:39,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-18 04:26:46,819 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.519e+01 9.030e+01 9.960e+01 2.636e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 04:26:58,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=18.28 vs. limit=22.5 +2024-09-18 04:27:14,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=368480.0, ans=0.125 +2024-09-18 04:27:32,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=368560.0, ans=0.125 +2024-09-18 04:27:41,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=368560.0, ans=0.025 +2024-09-18 04:27:47,311 INFO [train.py:1198] (1/2) Epoch 21, batch 1650, loss[loss=0.254, ctc_loss=0.1403, cr_loss=0.3952, attn_decoder_loss=0.2579, over 29696.00 frames. ], tot_loss[loss=0.2461, ctc_loss=0.1354, cr_loss=0.3781, attn_decoder_loss=0.25, over 5758006.91 frames. ], batch size: 89, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:28:02,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=368640.0, ans=0.125 +2024-09-18 04:28:06,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=368640.0, ans=0.125 +2024-09-18 04:28:06,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=368640.0, ans=0.125 +2024-09-18 04:28:22,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=368680.0, ans=0.125 +2024-09-18 04:28:48,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=368760.0, ans=0.125 +2024-09-18 04:29:03,551 INFO [train.py:1198] (1/2) Epoch 21, batch 1700, loss[loss=0.2158, ctc_loss=0.1141, cr_loss=0.3262, attn_decoder_loss=0.2199, over 29594.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1351, cr_loss=0.3777, attn_decoder_loss=0.2499, over 5780203.61 frames. ], batch size: 69, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:29:05,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=368800.0, ans=0.0 +2024-09-18 04:29:07,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=368800.0, ans=0.2 +2024-09-18 04:29:18,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.057e+01 8.456e+01 9.072e+01 9.555e+01 1.411e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 04:29:20,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=368840.0, ans=0.0 +2024-09-18 04:29:48,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.63 vs. limit=22.5 +2024-09-18 04:29:53,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=368920.0, ans=0.0 +2024-09-18 04:29:57,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=368920.0, ans=0.125 +2024-09-18 04:30:14,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=368960.0, ans=0.0 +2024-09-18 04:30:23,625 INFO [train.py:1198] (1/2) Epoch 21, batch 1750, loss[loss=0.2226, ctc_loss=0.119, cr_loss=0.342, attn_decoder_loss=0.2265, over 29372.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1349, cr_loss=0.3778, attn_decoder_loss=0.2497, over 5787707.11 frames. ], batch size: 67, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:30:46,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.18 vs. limit=15.0 +2024-09-18 04:30:52,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=369080.0, ans=0.125 +2024-09-18 04:31:04,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=369080.0, ans=0.025 +2024-09-18 04:31:19,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=369120.0, ans=0.0 +2024-09-18 04:31:30,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.99 vs. limit=15.0 +2024-09-18 04:31:35,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.97 vs. limit=15.0 +2024-09-18 04:31:38,789 INFO [train.py:1198] (1/2) Epoch 21, batch 1800, loss[loss=0.257, ctc_loss=0.1434, cr_loss=0.3991, attn_decoder_loss=0.2608, over 29702.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.135, cr_loss=0.3783, attn_decoder_loss=0.2499, over 5789593.01 frames. ], batch size: 83, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:31:54,056 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.570e+01 9.201e+01 9.986e+01 1.467e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-18 04:31:55,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.56 vs. limit=12.0 +2024-09-18 04:32:18,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=369280.0, ans=0.2 +2024-09-18 04:32:27,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=369320.0, ans=0.2 +2024-09-18 04:32:54,990 INFO [train.py:1198] (1/2) Epoch 21, batch 1850, loss[loss=0.2594, ctc_loss=0.1423, cr_loss=0.3806, attn_decoder_loss=0.264, over 29607.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1348, cr_loss=0.3778, attn_decoder_loss=0.2496, over 5797149.22 frames. ], batch size: 86, lr: 5.32e-03, grad_scale: 8.0 +2024-09-18 04:33:11,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.69 vs. limit=15.0 +2024-09-18 04:33:33,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=369480.0, ans=0.125 +2024-09-18 04:33:48,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=369520.0, ans=0.125 +2024-09-18 04:33:57,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=369560.0, ans=0.2 +2024-09-18 04:34:11,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-18 04:34:15,224 INFO [train.py:1198] (1/2) Epoch 21, batch 1900, loss[loss=0.259, ctc_loss=0.149, cr_loss=0.4261, attn_decoder_loss=0.2618, over 29711.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1352, cr_loss=0.3788, attn_decoder_loss=0.2502, over 5803873.49 frames. ], batch size: 89, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:34:30,312 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.618e+01 9.006e+01 9.728e+01 3.211e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-18 04:34:44,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=369680.0, ans=0.1 +2024-09-18 04:34:52,699 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.72 vs. limit=15.0 +2024-09-18 04:35:13,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=369720.0, ans=0.125 +2024-09-18 04:35:16,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.67 vs. limit=22.5 +2024-09-18 04:35:31,127 INFO [train.py:1198] (1/2) Epoch 21, batch 1950, loss[loss=0.2463, ctc_loss=0.1342, cr_loss=0.4065, attn_decoder_loss=0.2497, over 29456.00 frames. ], tot_loss[loss=0.2471, ctc_loss=0.1356, cr_loss=0.38, attn_decoder_loss=0.251, over 5818220.92 frames. ], batch size: 78, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:35:55,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=369840.0, ans=0.125 +2024-09-18 04:35:55,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=369840.0, ans=0.025 +2024-09-18 04:35:57,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=369840.0, ans=10.0 +2024-09-18 04:36:10,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=369880.0, ans=0.125 +2024-09-18 04:36:31,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=369960.0, ans=0.025 +2024-09-18 04:36:46,468 INFO [train.py:1198] (1/2) Epoch 21, batch 2000, loss[loss=0.2211, ctc_loss=0.1226, cr_loss=0.3533, attn_decoder_loss=0.2242, over 29337.00 frames. ], tot_loss[loss=0.2474, ctc_loss=0.1361, cr_loss=0.3805, attn_decoder_loss=0.2513, over 5796869.98 frames. ], batch size: 67, lr: 5.31e-03, grad_scale: 16.0 +2024-09-18 04:37:01,560 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.001e+01 8.831e+01 9.227e+01 9.765e+01 5.439e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-18 04:37:04,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=370040.0, ans=0.125 +2024-09-18 04:37:10,269 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.38 vs. limit=8.0 +2024-09-18 04:37:16,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=370080.0, ans=0.2 +2024-09-18 04:37:24,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=370080.0, ans=0.125 +2024-09-18 04:37:38,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=370120.0, ans=0.07 +2024-09-18 04:37:41,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=370120.0, ans=0.125 +2024-09-18 04:37:59,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.53 vs. limit=12.0 +2024-09-18 04:38:05,837 INFO [train.py:1198] (1/2) Epoch 21, batch 2050, loss[loss=0.2218, ctc_loss=0.1198, cr_loss=0.3641, attn_decoder_loss=0.2251, over 29421.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1355, cr_loss=0.379, attn_decoder_loss=0.2503, over 5787488.15 frames. ], batch size: 70, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:38:11,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.71 vs. limit=12.0 +2024-09-18 04:38:15,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=370200.0, ans=0.125 +2024-09-18 04:39:11,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=370360.0, ans=0.125 +2024-09-18 04:39:12,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=370360.0, ans=0.0 +2024-09-18 04:39:12,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=370360.0, ans=0.125 +2024-09-18 04:39:21,716 INFO [train.py:1198] (1/2) Epoch 21, batch 2100, loss[loss=0.2311, ctc_loss=0.1207, cr_loss=0.3444, attn_decoder_loss=0.2357, over 29750.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1344, cr_loss=0.3767, attn_decoder_loss=0.2495, over 5798917.51 frames. ], batch size: 81, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:39:21,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=370400.0, ans=0.1 +2024-09-18 04:39:38,260 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.389e+01 8.297e+01 8.835e+01 9.326e+01 1.551e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 04:40:07,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=370520.0, ans=0.2 +2024-09-18 04:40:24,173 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=15.0 +2024-09-18 04:40:26,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=370560.0, ans=0.0 +2024-09-18 04:40:32,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=370560.0, ans=0.125 +2024-09-18 04:40:37,250 INFO [train.py:1198] (1/2) Epoch 21, batch 2150, loss[loss=0.24, ctc_loss=0.1388, cr_loss=0.3791, attn_decoder_loss=0.2429, over 29439.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1336, cr_loss=0.3755, attn_decoder_loss=0.2487, over 5814391.34 frames. ], batch size: 78, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:40:39,498 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.52 vs. limit=15.0 +2024-09-18 04:40:45,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=370600.0, ans=0.0 +2024-09-18 04:40:57,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=370640.0, ans=0.2 +2024-09-18 04:41:00,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=370640.0, ans=15.0 +2024-09-18 04:41:04,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=370640.0, ans=0.125 +2024-09-18 04:41:25,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.16 vs. limit=22.5 +2024-09-18 04:41:33,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=370720.0, ans=0.125 +2024-09-18 04:41:36,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=370720.0, ans=0.0 +2024-09-18 04:41:54,697 INFO [train.py:1198] (1/2) Epoch 21, batch 2200, loss[loss=0.255, ctc_loss=0.1403, cr_loss=0.3886, attn_decoder_loss=0.2591, over 29643.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1341, cr_loss=0.3766, attn_decoder_loss=0.2489, over 5811591.28 frames. ], batch size: 86, lr: 5.31e-03, grad_scale: 8.0 +2024-09-18 04:42:06,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=370800.0, ans=0.1 +2024-09-18 04:42:13,472 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.585e+01 9.031e+01 9.683e+01 2.928e+02, threshold=1.806e+02, percent-clipped=3.0 +2024-09-18 04:42:30,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.00 vs. limit=22.5 +2024-09-18 04:43:12,511 INFO [train.py:1198] (1/2) Epoch 21, batch 2250, loss[loss=0.2469, ctc_loss=0.1351, cr_loss=0.3857, attn_decoder_loss=0.2507, over 29695.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1341, cr_loss=0.3765, attn_decoder_loss=0.2488, over 5811091.91 frames. ], batch size: 82, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:43:24,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=371000.0, ans=0.2 +2024-09-18 04:43:56,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=371120.0, ans=0.1 +2024-09-18 04:44:11,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=371160.0, ans=0.0 +2024-09-18 04:44:28,717 INFO [train.py:1198] (1/2) Epoch 21, batch 2300, loss[loss=0.2257, ctc_loss=0.1152, cr_loss=0.339, attn_decoder_loss=0.2305, over 29331.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1336, cr_loss=0.3755, attn_decoder_loss=0.248, over 5798789.98 frames. ], batch size: 71, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:44:45,171 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.457e+01 8.450e+01 8.935e+01 9.776e+01 2.210e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 04:45:21,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.42 vs. limit=12.0 +2024-09-18 04:45:33,735 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.66 vs. limit=15.0 +2024-09-18 04:45:46,558 INFO [train.py:1198] (1/2) Epoch 21, batch 2350, loss[loss=0.2476, ctc_loss=0.1376, cr_loss=0.3853, attn_decoder_loss=0.2513, over 29682.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1336, cr_loss=0.3755, attn_decoder_loss=0.2481, over 5805019.18 frames. ], batch size: 83, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:45:49,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=371400.0, ans=0.1 +2024-09-18 04:46:34,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=371520.0, ans=0.125 +2024-09-18 04:46:43,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=371520.0, ans=0.125 +2024-09-18 04:46:54,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=371560.0, ans=0.07 +2024-09-18 04:46:57,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=371560.0, ans=0.0 +2024-09-18 04:47:00,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=371560.0, ans=0.125 +2024-09-18 04:47:04,808 INFO [train.py:1198] (1/2) Epoch 21, batch 2400, loss[loss=0.2448, ctc_loss=0.1437, cr_loss=0.3792, attn_decoder_loss=0.2476, over 29510.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1343, cr_loss=0.3768, attn_decoder_loss=0.2488, over 5808497.48 frames. ], batch size: 76, lr: 5.30e-03, grad_scale: 16.0 +2024-09-18 04:47:11,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=371600.0, ans=0.125 +2024-09-18 04:47:12,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=371600.0, ans=0.0 +2024-09-18 04:47:21,501 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.073e+01 8.473e+01 9.186e+01 9.665e+01 3.026e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 04:47:27,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=371640.0, ans=0.025 +2024-09-18 04:47:35,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=371680.0, ans=0.125 +2024-09-18 04:47:44,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=371680.0, ans=0.025 +2024-09-18 04:48:04,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=371760.0, ans=0.125 +2024-09-18 04:48:13,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=371760.0, ans=0.125 +2024-09-18 04:48:16,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=371760.0, ans=0.0 +2024-09-18 04:48:20,894 INFO [train.py:1198] (1/2) Epoch 21, batch 2450, loss[loss=0.2495, ctc_loss=0.1388, cr_loss=0.3901, attn_decoder_loss=0.2532, over 29711.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1344, cr_loss=0.377, attn_decoder_loss=0.2495, over 5784591.76 frames. ], batch size: 82, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:48:25,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=371800.0, ans=0.2 +2024-09-18 04:48:40,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=371840.0, ans=0.125 +2024-09-18 04:49:15,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=371920.0, ans=0.1 +2024-09-18 04:49:20,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=371920.0, ans=0.1 +2024-09-18 04:49:29,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=371960.0, ans=0.125 +2024-09-18 04:49:30,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.60 vs. limit=15.0 +2024-09-18 04:49:34,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=371960.0, ans=0.125 +2024-09-18 04:49:38,820 INFO [train.py:1198] (1/2) Epoch 21, batch 2500, loss[loss=0.2571, ctc_loss=0.1363, cr_loss=0.387, attn_decoder_loss=0.262, over 29621.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1348, cr_loss=0.3782, attn_decoder_loss=0.2496, over 5795598.05 frames. ], batch size: 86, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:49:59,137 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.020e+01 8.495e+01 9.101e+01 9.738e+01 1.875e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 04:50:26,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=372120.0, ans=0.07 +2024-09-18 04:50:35,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=372120.0, ans=0.0 +2024-09-18 04:50:57,260 INFO [train.py:1198] (1/2) Epoch 21, batch 2550, loss[loss=0.2201, ctc_loss=0.1151, cr_loss=0.349, attn_decoder_loss=0.224, over 29349.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1347, cr_loss=0.3785, attn_decoder_loss=0.2495, over 5798809.55 frames. ], batch size: 67, lr: 5.30e-03, grad_scale: 8.0 +2024-09-18 04:50:59,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=372200.0, ans=0.125 +2024-09-18 04:51:14,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=372240.0, ans=0.0 +2024-09-18 04:51:30,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=372280.0, ans=0.125 +2024-09-18 04:51:35,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=372280.0, ans=0.025 +2024-09-18 04:51:39,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=372280.0, ans=0.2 +2024-09-18 04:51:42,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=372320.0, ans=0.1 +2024-09-18 04:51:52,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=372320.0, ans=0.125 +2024-09-18 04:52:02,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=372360.0, ans=0.125 +2024-09-18 04:52:08,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=372360.0, ans=0.2 +2024-09-18 04:52:13,129 INFO [train.py:1198] (1/2) Epoch 21, batch 2600, loss[loss=0.239, ctc_loss=0.1336, cr_loss=0.3751, attn_decoder_loss=0.2424, over 29431.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1352, cr_loss=0.3791, attn_decoder_loss=0.2499, over 5794218.82 frames. ], batch size: 78, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:52:19,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=372400.0, ans=0.1 +2024-09-18 04:52:31,048 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.657e+01 9.187e+01 9.794e+01 2.069e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 04:52:40,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=372440.0, ans=0.125 +2024-09-18 04:52:51,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=372480.0, ans=0.1 +2024-09-18 04:53:00,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=372520.0, ans=0.0 +2024-09-18 04:53:14,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=372560.0, ans=0.125 +2024-09-18 04:53:17,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.15 vs. limit=10.0 +2024-09-18 04:53:17,943 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.37 vs. limit=15.0 +2024-09-18 04:53:18,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.53 vs. limit=10.0 +2024-09-18 04:53:30,475 INFO [train.py:1198] (1/2) Epoch 21, batch 2650, loss[loss=0.2641, ctc_loss=0.1451, cr_loss=0.3923, attn_decoder_loss=0.2686, over 29270.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1352, cr_loss=0.3795, attn_decoder_loss=0.2503, over 5800535.41 frames. ], batch size: 100, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:53:33,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=372600.0, ans=0.0 +2024-09-18 04:53:49,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=372640.0, ans=0.125 +2024-09-18 04:53:58,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=372640.0, ans=0.125 +2024-09-18 04:54:22,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=372720.0, ans=0.0 +2024-09-18 04:54:48,381 INFO [train.py:1198] (1/2) Epoch 21, batch 2700, loss[loss=0.2506, ctc_loss=0.1389, cr_loss=0.3822, attn_decoder_loss=0.2545, over 29531.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1358, cr_loss=0.3803, attn_decoder_loss=0.2508, over 5797580.06 frames. ], batch size: 87, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:54:56,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=372800.0, ans=0.1 +2024-09-18 04:55:06,489 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.257e+01 8.585e+01 9.069e+01 9.661e+01 1.375e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 04:55:32,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=372920.0, ans=0.04949747468305833 +2024-09-18 04:55:57,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=372960.0, ans=0.125 +2024-09-18 04:56:04,538 INFO [train.py:1198] (1/2) Epoch 21, batch 2750, loss[loss=0.2281, ctc_loss=0.1293, cr_loss=0.3789, attn_decoder_loss=0.2307, over 29513.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1352, cr_loss=0.3794, attn_decoder_loss=0.2498, over 5795317.94 frames. ], batch size: 75, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:56:06,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=373000.0, ans=0.1 +2024-09-18 04:56:15,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=373000.0, ans=0.1 +2024-09-18 04:56:35,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=373080.0, ans=0.125 +2024-09-18 04:56:35,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-18 04:56:46,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=373080.0, ans=0.125 +2024-09-18 04:56:55,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.73 vs. limit=15.0 +2024-09-18 04:57:21,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=373200.0, ans=0.125 +2024-09-18 04:57:21,587 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.06 vs. limit=15.0 +2024-09-18 04:57:22,200 INFO [train.py:1198] (1/2) Epoch 21, batch 2800, loss[loss=0.2641, ctc_loss=0.164, cr_loss=0.3937, attn_decoder_loss=0.2665, over 20587.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1355, cr_loss=0.3797, attn_decoder_loss=0.2502, over 5777742.02 frames. ], batch size: 209, lr: 5.29e-03, grad_scale: 16.0 +2024-09-18 04:57:24,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=373200.0, ans=0.07 +2024-09-18 04:57:30,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=373200.0, ans=0.0 +2024-09-18 04:57:43,972 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.642e+01 9.187e+01 1.013e+02 2.371e+02, threshold=1.837e+02, percent-clipped=3.0 +2024-09-18 04:57:47,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=373240.0, ans=0.125 +2024-09-18 04:57:59,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=373280.0, ans=0.125 +2024-09-18 04:58:01,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=373280.0, ans=0.125 +2024-09-18 04:58:04,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=15.48 vs. limit=15.0 +2024-09-18 04:58:20,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=373320.0, ans=0.125 +2024-09-18 04:58:35,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=373360.0, ans=0.125 +2024-09-18 04:58:40,011 INFO [train.py:1198] (1/2) Epoch 21, batch 2850, loss[loss=0.2415, ctc_loss=0.1321, cr_loss=0.3922, attn_decoder_loss=0.245, over 29518.00 frames. ], tot_loss[loss=0.2467, ctc_loss=0.136, cr_loss=0.3802, attn_decoder_loss=0.2506, over 5762212.64 frames. ], batch size: 77, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 04:58:41,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=373400.0, ans=0.125 +2024-09-18 04:58:53,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=373440.0, ans=0.1 +2024-09-18 04:58:56,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=373440.0, ans=0.0 +2024-09-18 04:59:09,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=373480.0, ans=0.125 +2024-09-18 04:59:10,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=373480.0, ans=0.125 +2024-09-18 04:59:34,883 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 04:59:37,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=373520.0, ans=0.125 +2024-09-18 04:59:39,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.11 vs. limit=15.0 +2024-09-18 04:59:42,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=373560.0, ans=0.07 +2024-09-18 04:59:42,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=373560.0, ans=0.07 +2024-09-18 04:59:47,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=373560.0, ans=0.125 +2024-09-18 04:59:54,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=373600.0, ans=0.125 +2024-09-18 04:59:56,350 INFO [train.py:1198] (1/2) Epoch 21, batch 2900, loss[loss=0.2288, ctc_loss=0.1155, cr_loss=0.3531, attn_decoder_loss=0.2336, over 29427.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1363, cr_loss=0.3817, attn_decoder_loss=0.2516, over 5787421.58 frames. ], batch size: 79, lr: 5.29e-03, grad_scale: 8.0 +2024-09-18 05:00:15,829 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.588e+01 9.125e+01 9.672e+01 3.101e+02, threshold=1.825e+02, percent-clipped=2.0 +2024-09-18 05:00:17,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=373640.0, ans=0.125 +2024-09-18 05:01:12,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=373800.0, ans=0.125 +2024-09-18 05:01:13,983 INFO [train.py:1198] (1/2) Epoch 21, batch 2950, loss[loss=0.2345, ctc_loss=0.1239, cr_loss=0.361, attn_decoder_loss=0.2388, over 29525.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1351, cr_loss=0.3787, attn_decoder_loss=0.2501, over 5782514.73 frames. ], batch size: 75, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:01:18,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=373800.0, ans=0.125 +2024-09-18 05:01:21,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=373800.0, ans=0.125 +2024-09-18 05:01:21,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=373800.0, ans=0.09899494936611666 +2024-09-18 05:01:42,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=373840.0, ans=0.125 +2024-09-18 05:01:43,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=373840.0, ans=0.0 +2024-09-18 05:01:49,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=373880.0, ans=0.125 +2024-09-18 05:02:10,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=373920.0, ans=0.0 +2024-09-18 05:02:15,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=373960.0, ans=0.125 +2024-09-18 05:02:18,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=373960.0, ans=0.125 +2024-09-18 05:02:21,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=373960.0, ans=0.2 +2024-09-18 05:02:27,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=373960.0, ans=0.0 +2024-09-18 05:02:31,642 INFO [train.py:1198] (1/2) Epoch 21, batch 3000, loss[loss=0.2508, ctc_loss=0.143, cr_loss=0.3898, attn_decoder_loss=0.2542, over 29777.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1347, cr_loss=0.3777, attn_decoder_loss=0.2497, over 5783262.87 frames. ], batch size: 81, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:02:31,642 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 05:02:39,437 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([2.3577, 3.0348, 2.4504, 2.7416, 2.2709, 2.8968, 2.8559, 3.0956], + device='cuda:1') +2024-09-18 05:02:50,162 INFO [train.py:1230] (1/2) Epoch 21, validation: loss=0.2116, ctc_loss=0.03952, cr_loss=5.001e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-18 05:02:50,162 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 05:02:53,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=374000.0, ans=10.0 +2024-09-18 05:02:57,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.81 vs. limit=15.0 +2024-09-18 05:03:10,258 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.360e+01 8.620e+01 9.343e+01 9.937e+01 2.049e+02, threshold=1.869e+02, percent-clipped=2.0 +2024-09-18 05:03:12,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff2.min_abs, batch_count=374040.0, ans=0.1 +2024-09-18 05:03:33,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=374080.0, ans=0.2 +2024-09-18 05:03:44,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=374120.0, ans=0.2 +2024-09-18 05:04:06,324 INFO [train.py:1198] (1/2) Epoch 21, batch 3050, loss[loss=0.2256, ctc_loss=0.1272, cr_loss=0.359, attn_decoder_loss=0.2286, over 29546.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1355, cr_loss=0.3792, attn_decoder_loss=0.2504, over 5776934.68 frames. ], batch size: 76, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:04:18,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=374200.0, ans=22.5 +2024-09-18 05:04:19,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=374200.0, ans=0.125 +2024-09-18 05:04:37,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=374280.0, ans=0.125 +2024-09-18 05:04:46,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=374280.0, ans=0.1 +2024-09-18 05:04:50,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=374280.0, ans=0.125 +2024-09-18 05:04:57,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=374320.0, ans=0.1 +2024-09-18 05:05:01,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=374320.0, ans=0.125 +2024-09-18 05:05:12,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=374360.0, ans=0.125 +2024-09-18 05:05:26,565 INFO [train.py:1198] (1/2) Epoch 21, batch 3100, loss[loss=0.2618, ctc_loss=0.153, cr_loss=0.4146, attn_decoder_loss=0.2646, over 29232.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1353, cr_loss=0.3792, attn_decoder_loss=0.2502, over 5777603.04 frames. ], batch size: 100, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:05:26,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=374400.0, ans=0.2 +2024-09-18 05:05:31,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=374400.0, ans=0.0 +2024-09-18 05:05:35,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=374400.0, ans=0.0 +2024-09-18 05:05:44,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=374440.0, ans=0.125 +2024-09-18 05:05:45,911 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.504e+01 9.125e+01 9.577e+01 2.431e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 05:05:52,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=374440.0, ans=0.0 +2024-09-18 05:05:58,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=374480.0, ans=0.125 +2024-09-18 05:06:28,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=374560.0, ans=0.125 +2024-09-18 05:06:41,991 INFO [train.py:1198] (1/2) Epoch 21, batch 3150, loss[loss=0.2601, ctc_loss=0.1497, cr_loss=0.4078, attn_decoder_loss=0.2633, over 28738.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1353, cr_loss=0.3792, attn_decoder_loss=0.2501, over 5783110.60 frames. ], batch size: 104, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:06:55,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=374640.0, ans=0.025 +2024-09-18 05:06:59,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=374640.0, ans=0.2 +2024-09-18 05:07:05,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.74 vs. limit=22.5 +2024-09-18 05:07:17,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=374680.0, ans=0.0 +2024-09-18 05:07:17,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=374680.0, ans=0.0 +2024-09-18 05:07:30,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=374720.0, ans=0.1 +2024-09-18 05:07:30,834 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:07:47,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=374760.0, ans=0.025 +2024-09-18 05:07:57,648 INFO [train.py:1198] (1/2) Epoch 21, batch 3200, loss[loss=0.2447, ctc_loss=0.1257, cr_loss=0.3761, attn_decoder_loss=0.2495, over 29407.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.135, cr_loss=0.3784, attn_decoder_loss=0.2497, over 5792974.03 frames. ], batch size: 79, lr: 5.28e-03, grad_scale: 16.0 +2024-09-18 05:08:03,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=374800.0, ans=0.5 +2024-09-18 05:08:20,843 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.806e+01 8.671e+01 9.297e+01 1.015e+02 2.448e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-18 05:08:30,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=374880.0, ans=0.0 +2024-09-18 05:08:48,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=374920.0, ans=0.125 +2024-09-18 05:08:56,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.80 vs. limit=15.0 +2024-09-18 05:09:15,148 INFO [train.py:1198] (1/2) Epoch 21, batch 3250, loss[loss=0.2542, ctc_loss=0.1348, cr_loss=0.3867, attn_decoder_loss=0.2589, over 29695.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1348, cr_loss=0.3787, attn_decoder_loss=0.2498, over 5799822.87 frames. ], batch size: 84, lr: 5.28e-03, grad_scale: 8.0 +2024-09-18 05:09:15,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=375000.0, ans=0.125 +2024-09-18 05:10:10,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=375120.0, ans=0.125 +2024-09-18 05:10:13,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=375120.0, ans=0.1 +2024-09-18 05:10:18,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=375160.0, ans=0.125 +2024-09-18 05:10:28,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=375160.0, ans=0.1 +2024-09-18 05:10:33,523 INFO [train.py:1198] (1/2) Epoch 21, batch 3300, loss[loss=0.2559, ctc_loss=0.1387, cr_loss=0.3716, attn_decoder_loss=0.2607, over 28272.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1345, cr_loss=0.3776, attn_decoder_loss=0.2488, over 5797975.68 frames. ], batch size: 111, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:10:43,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=375200.0, ans=0.1 +2024-09-18 05:10:54,884 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.586e+01 9.172e+01 9.727e+01 2.274e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 05:11:07,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=375280.0, ans=0.0 +2024-09-18 05:11:32,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=375360.0, ans=0.125 +2024-09-18 05:11:34,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=375360.0, ans=0.125 +2024-09-18 05:11:48,828 INFO [train.py:1198] (1/2) Epoch 21, batch 3350, loss[loss=0.2603, ctc_loss=0.1508, cr_loss=0.426, attn_decoder_loss=0.263, over 28745.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1353, cr_loss=0.379, attn_decoder_loss=0.2497, over 5775029.86 frames. ], batch size: 104, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:11:54,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.96 vs. limit=15.0 +2024-09-18 05:12:44,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=375520.0, ans=10.0 +2024-09-18 05:13:06,608 INFO [train.py:1198] (1/2) Epoch 21, batch 3400, loss[loss=0.2261, ctc_loss=0.1228, cr_loss=0.3826, attn_decoder_loss=0.2291, over 29360.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1352, cr_loss=0.3785, attn_decoder_loss=0.2495, over 5769023.20 frames. ], batch size: 67, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:13:29,671 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.485e+01 9.062e+01 9.587e+01 1.561e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 05:13:36,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=375640.0, ans=0.0 +2024-09-18 05:13:42,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=375680.0, ans=0.0 +2024-09-18 05:14:01,100 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.95 vs. limit=15.0 +2024-09-18 05:14:02,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=375720.0, ans=0.125 +2024-09-18 05:14:03,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=375720.0, ans=0.125 +2024-09-18 05:14:05,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=375720.0, ans=0.1 +2024-09-18 05:14:24,502 INFO [train.py:1198] (1/2) Epoch 21, batch 3450, loss[loss=0.262, ctc_loss=0.1499, cr_loss=0.3882, attn_decoder_loss=0.2658, over 28445.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1358, cr_loss=0.38, attn_decoder_loss=0.2502, over 5778211.85 frames. ], batch size: 112, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:14:51,108 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.13 vs. limit=22.5 +2024-09-18 05:15:40,552 INFO [train.py:1198] (1/2) Epoch 21, batch 3500, loss[loss=0.2316, ctc_loss=0.1305, cr_loss=0.4024, attn_decoder_loss=0.2339, over 29303.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1359, cr_loss=0.3798, attn_decoder_loss=0.2498, over 5779782.10 frames. ], batch size: 71, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:15:40,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=376000.0, ans=0.0 +2024-09-18 05:15:41,015 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:16:02,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=376040.0, ans=0.0 +2024-09-18 05:16:03,719 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.357e+01 8.729e+01 9.303e+01 9.808e+01 4.681e+02, threshold=1.861e+02, percent-clipped=2.0 +2024-09-18 05:16:11,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=376080.0, ans=0.125 +2024-09-18 05:16:16,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=376080.0, ans=0.125 +2024-09-18 05:16:38,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=376120.0, ans=0.125 +2024-09-18 05:16:57,298 INFO [train.py:1198] (1/2) Epoch 21, batch 3550, loss[loss=0.2571, ctc_loss=0.137, cr_loss=0.378, attn_decoder_loss=0.2621, over 29713.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1353, cr_loss=0.3792, attn_decoder_loss=0.2497, over 5785999.16 frames. ], batch size: 89, lr: 5.27e-03, grad_scale: 8.0 +2024-09-18 05:17:05,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=376200.0, ans=0.125 +2024-09-18 05:17:33,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=376280.0, ans=0.0 +2024-09-18 05:17:59,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=376360.0, ans=0.125 +2024-09-18 05:18:13,900 INFO [train.py:1198] (1/2) Epoch 21, batch 3600, loss[loss=0.2406, ctc_loss=0.132, cr_loss=0.3715, attn_decoder_loss=0.2444, over 29502.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1353, cr_loss=0.3792, attn_decoder_loss=0.2497, over 5794715.92 frames. ], batch size: 77, lr: 5.27e-03, grad_scale: 16.0 +2024-09-18 05:18:17,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=376400.0, ans=0.125 +2024-09-18 05:18:23,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.07 vs. limit=10.0 +2024-09-18 05:18:34,851 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.337e+01 8.787e+01 9.364e+01 1.302e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 05:19:18,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=376560.0, ans=0.2 +2024-09-18 05:19:25,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=376560.0, ans=0.0 +2024-09-18 05:19:28,189 INFO [train.py:1198] (1/2) Epoch 21, batch 3650, loss[loss=0.2661, ctc_loss=0.151, cr_loss=0.3925, attn_decoder_loss=0.2702, over 29477.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1349, cr_loss=0.3782, attn_decoder_loss=0.2494, over 5796536.31 frames. ], batch size: 90, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:19:32,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=376600.0, ans=0.125 +2024-09-18 05:19:57,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.50 vs. limit=22.5 +2024-09-18 05:20:08,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=376680.0, ans=0.1 +2024-09-18 05:20:17,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=376720.0, ans=0.5 +2024-09-18 05:20:20,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=376720.0, ans=0.125 +2024-09-18 05:20:23,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=376720.0, ans=0.0 +2024-09-18 05:20:43,490 INFO [train.py:1198] (1/2) Epoch 21, batch 3700, loss[loss=0.2518, ctc_loss=0.1338, cr_loss=0.3753, attn_decoder_loss=0.2565, over 29699.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1348, cr_loss=0.3778, attn_decoder_loss=0.2495, over 5805551.79 frames. ], batch size: 84, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:20:54,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=376800.0, ans=0.04949747468305833 +2024-09-18 05:21:04,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=376840.0, ans=0.09899494936611666 +2024-09-18 05:21:05,912 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.517e+01 9.022e+01 9.849e+01 1.949e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-18 05:21:12,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=376880.0, ans=0.2 +2024-09-18 05:21:25,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=376880.0, ans=0.0 +2024-09-18 05:21:29,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=376920.0, ans=0.125 +2024-09-18 05:21:44,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=376960.0, ans=0.2 +2024-09-18 05:21:46,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=376960.0, ans=0.125 +2024-09-18 05:21:49,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=376960.0, ans=0.025 +2024-09-18 05:21:57,756 INFO [train.py:1198] (1/2) Epoch 21, batch 3750, loss[loss=0.2317, ctc_loss=0.1338, cr_loss=0.3786, attn_decoder_loss=0.2342, over 29334.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1347, cr_loss=0.3777, attn_decoder_loss=0.2491, over 5808225.74 frames. ], batch size: 67, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:22:04,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=377000.0, ans=0.1 +2024-09-18 05:22:07,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=377000.0, ans=15.0 +2024-09-18 05:22:34,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=377080.0, ans=0.0 +2024-09-18 05:22:38,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=377080.0, ans=0.1 +2024-09-18 05:22:43,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=377120.0, ans=6.0 +2024-09-18 05:22:48,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=377120.0, ans=0.1 +2024-09-18 05:22:52,179 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:22:59,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=377160.0, ans=0.125 +2024-09-18 05:23:14,085 INFO [train.py:1198] (1/2) Epoch 21, batch 3800, loss[loss=0.2556, ctc_loss=0.1406, cr_loss=0.3897, attn_decoder_loss=0.2597, over 29615.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1341, cr_loss=0.3764, attn_decoder_loss=0.2487, over 5798592.88 frames. ], batch size: 86, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:23:36,537 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.558e+01 9.240e+01 9.922e+01 2.766e+02, threshold=1.848e+02, percent-clipped=2.0 +2024-09-18 05:23:38,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=377240.0, ans=0.2 +2024-09-18 05:24:30,308 INFO [train.py:1198] (1/2) Epoch 21, batch 3850, loss[loss=0.2617, ctc_loss=0.1492, cr_loss=0.4039, attn_decoder_loss=0.2652, over 29242.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1333, cr_loss=0.3754, attn_decoder_loss=0.2482, over 5812698.78 frames. ], batch size: 100, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:24:40,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.85 vs. limit=15.0 +2024-09-18 05:25:32,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.10 vs. limit=22.5 +2024-09-18 05:25:39,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=7.17 vs. limit=15.0 +2024-09-18 05:25:45,167 INFO [train.py:1198] (1/2) Epoch 21, batch 3900, loss[loss=0.259, ctc_loss=0.1375, cr_loss=0.3946, attn_decoder_loss=0.2637, over 29641.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1337, cr_loss=0.3763, attn_decoder_loss=0.2486, over 5817052.13 frames. ], batch size: 86, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:25:57,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=377600.0, ans=0.125 +2024-09-18 05:26:00,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=377640.0, ans=0.0 +2024-09-18 05:26:02,167 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.42 vs. limit=15.0 +2024-09-18 05:26:07,257 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.452e+01 8.671e+01 9.111e+01 9.603e+01 1.300e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-18 05:26:19,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=377680.0, ans=0.1 +2024-09-18 05:26:43,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=377760.0, ans=0.0 +2024-09-18 05:26:48,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=377760.0, ans=0.0 +2024-09-18 05:26:59,564 INFO [train.py:1198] (1/2) Epoch 21, batch 3950, loss[loss=0.2579, ctc_loss=0.1448, cr_loss=0.4011, attn_decoder_loss=0.2616, over 29500.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1337, cr_loss=0.3767, attn_decoder_loss=0.2487, over 5836167.40 frames. ], batch size: 97, lr: 5.26e-03, grad_scale: 8.0 +2024-09-18 05:27:15,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=377840.0, ans=0.04949747468305833 +2024-09-18 05:27:35,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=377880.0, ans=0.0 +2024-09-18 05:27:47,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=377920.0, ans=0.0 +2024-09-18 05:28:07,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=377960.0, ans=0.125 +2024-09-18 05:28:09,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=377960.0, ans=0.1 +2024-09-18 05:28:14,620 INFO [train.py:1198] (1/2) Epoch 21, batch 4000, loss[loss=0.2382, ctc_loss=0.131, cr_loss=0.3774, attn_decoder_loss=0.2417, over 29501.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1344, cr_loss=0.378, attn_decoder_loss=0.2492, over 5811938.34 frames. ], batch size: 74, lr: 5.26e-03, grad_scale: 16.0 +2024-09-18 05:28:15,312 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.29 vs. limit=22.5 +2024-09-18 05:28:35,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=378040.0, ans=0.125 +2024-09-18 05:28:38,253 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.613e+01 8.637e+01 9.105e+01 9.736e+01 3.809e+02, threshold=1.821e+02, percent-clipped=2.0 +2024-09-18 05:28:38,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=378040.0, ans=0.125 +2024-09-18 05:28:41,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=378040.0, ans=0.025 +2024-09-18 05:28:43,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.80 vs. limit=6.0 +2024-09-18 05:28:47,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=378080.0, ans=0.025 +2024-09-18 05:28:50,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=378080.0, ans=0.125 +2024-09-18 05:29:00,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=378120.0, ans=0.125 +2024-09-18 05:29:06,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378120.0, ans=0.1 +2024-09-18 05:29:09,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=378120.0, ans=0.125 +2024-09-18 05:29:17,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=378160.0, ans=0.125 +2024-09-18 05:29:22,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.94 vs. limit=15.0 +2024-09-18 05:29:30,129 INFO [train.py:1198] (1/2) Epoch 21, batch 4050, loss[loss=0.2785, ctc_loss=0.1873, cr_loss=0.4407, attn_decoder_loss=0.2789, over 20379.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.135, cr_loss=0.379, attn_decoder_loss=0.2494, over 5796041.75 frames. ], batch size: 210, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:29:39,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=378200.0, ans=0.0 +2024-09-18 05:29:46,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=378240.0, ans=0.1 +2024-09-18 05:29:49,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=378240.0, ans=0.0 +2024-09-18 05:30:24,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=378320.0, ans=0.0 +2024-09-18 05:30:27,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378360.0, ans=0.1 +2024-09-18 05:30:28,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=378360.0, ans=0.025 +2024-09-18 05:30:39,969 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.47 vs. limit=6.0 +2024-09-18 05:30:44,002 INFO [train.py:1198] (1/2) Epoch 21, batch 4100, loss[loss=0.2666, ctc_loss=0.156, cr_loss=0.4157, attn_decoder_loss=0.2697, over 29490.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1346, cr_loss=0.3784, attn_decoder_loss=0.2493, over 5791785.49 frames. ], batch size: 90, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:31:07,489 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.357e+01 8.642e+01 9.337e+01 1.033e+02 5.468e+02, threshold=1.867e+02, percent-clipped=3.0 +2024-09-18 05:31:08,510 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-18 05:31:19,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=378480.0, ans=0.025 +2024-09-18 05:31:22,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=378480.0, ans=0.125 +2024-09-18 05:31:27,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=378520.0, ans=10.0 +2024-09-18 05:31:55,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.74 vs. limit=15.0 +2024-09-18 05:31:57,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=378600.0, ans=0.125 +2024-09-18 05:31:58,984 INFO [train.py:1198] (1/2) Epoch 21, batch 4150, loss[loss=0.2277, ctc_loss=0.1145, cr_loss=0.3318, attn_decoder_loss=0.2329, over 29478.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1343, cr_loss=0.3779, attn_decoder_loss=0.2491, over 5797866.69 frames. ], batch size: 77, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:32:05,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=378600.0, ans=0.125 +2024-09-18 05:32:11,555 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.36 vs. limit=12.0 +2024-09-18 05:32:24,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=378640.0, ans=0.1 +2024-09-18 05:32:44,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=378720.0, ans=0.0 +2024-09-18 05:32:52,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=378720.0, ans=0.0 +2024-09-18 05:33:12,767 INFO [train.py:1198] (1/2) Epoch 21, batch 4200, loss[loss=0.2561, ctc_loss=0.1412, cr_loss=0.4022, attn_decoder_loss=0.2599, over 29498.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1343, cr_loss=0.3781, attn_decoder_loss=0.2494, over 5799717.69 frames. ], batch size: 90, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:33:20,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=378800.0, ans=0.2 +2024-09-18 05:33:37,367 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.402e+01 9.063e+01 9.513e+01 1.420e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-18 05:33:49,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=378880.0, ans=0.1 +2024-09-18 05:34:17,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=378960.0, ans=0.2 +2024-09-18 05:34:24,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=378960.0, ans=0.025 +2024-09-18 05:34:25,323 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.34 vs. limit=15.0 +2024-09-18 05:34:26,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=379000.0, ans=0.1 +2024-09-18 05:34:27,305 INFO [train.py:1198] (1/2) Epoch 21, batch 4250, loss[loss=0.2266, ctc_loss=0.1214, cr_loss=0.3467, attn_decoder_loss=0.2306, over 29525.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.134, cr_loss=0.3771, attn_decoder_loss=0.2493, over 5806606.33 frames. ], batch size: 74, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:34:39,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.43 vs. limit=15.0 +2024-09-18 05:34:45,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=379040.0, ans=0.125 +2024-09-18 05:34:48,199 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:34:56,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.50 vs. limit=15.0 +2024-09-18 05:35:10,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=379120.0, ans=0.125 +2024-09-18 05:35:16,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=379120.0, ans=0.1 +2024-09-18 05:35:34,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.09 vs. limit=15.0 +2024-09-18 05:35:42,512 INFO [train.py:1198] (1/2) Epoch 21, batch 4300, loss[loss=0.252, ctc_loss=0.1308, cr_loss=0.3732, attn_decoder_loss=0.2572, over 29520.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.134, cr_loss=0.3768, attn_decoder_loss=0.2496, over 5795400.99 frames. ], batch size: 87, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:35:44,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=379200.0, ans=0.0 +2024-09-18 05:36:06,490 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.631e+01 9.482e+01 1.010e+02 4.284e+02, threshold=1.896e+02, percent-clipped=4.0 +2024-09-18 05:36:27,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=379320.0, ans=0.125 +2024-09-18 05:36:36,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=379320.0, ans=0.125 +2024-09-18 05:36:45,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.39 vs. limit=6.0 +2024-09-18 05:36:46,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=379360.0, ans=0.125 +2024-09-18 05:36:57,602 INFO [train.py:1198] (1/2) Epoch 21, batch 4350, loss[loss=0.2699, ctc_loss=0.1566, cr_loss=0.4203, attn_decoder_loss=0.2731, over 29532.00 frames. ], tot_loss[loss=0.2492, ctc_loss=0.1371, cr_loss=0.3829, attn_decoder_loss=0.2531, over 5796844.75 frames. ], batch size: 97, lr: 5.25e-03, grad_scale: 8.0 +2024-09-18 05:37:27,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=379480.0, ans=0.125 +2024-09-18 05:37:42,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=379520.0, ans=0.125 +2024-09-18 05:37:46,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=379520.0, ans=0.025 +2024-09-18 05:38:10,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=379600.0, ans=0.125 +2024-09-18 05:38:11,715 INFO [train.py:1198] (1/2) Epoch 21, batch 4400, loss[loss=0.2558, ctc_loss=0.1521, cr_loss=0.4056, attn_decoder_loss=0.2583, over 27010.00 frames. ], tot_loss[loss=0.2513, ctc_loss=0.1385, cr_loss=0.3855, attn_decoder_loss=0.2553, over 5767148.35 frames. ], batch size: 124, lr: 5.24e-03, grad_scale: 16.0 +2024-09-18 05:38:12,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=379600.0, ans=0.0 +2024-09-18 05:38:23,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=379600.0, ans=0.0 +2024-09-18 05:38:25,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=379640.0, ans=0.2 +2024-09-18 05:38:26,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=379640.0, ans=0.0 +2024-09-18 05:38:34,951 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.049e+01 8.987e+01 9.326e+01 1.008e+02 3.021e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-18 05:38:37,073 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.23 vs. limit=22.5 +2024-09-18 05:38:43,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=379680.0, ans=0.1 +2024-09-18 05:38:49,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=379680.0, ans=0.0 +2024-09-18 05:39:08,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=379760.0, ans=0.0 +2024-09-18 05:39:15,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=379760.0, ans=0.125 +2024-09-18 05:39:25,852 INFO [train.py:1198] (1/2) Epoch 21, batch 4450, loss[loss=0.2643, ctc_loss=0.1582, cr_loss=0.3813, attn_decoder_loss=0.2676, over 20038.00 frames. ], tot_loss[loss=0.254, ctc_loss=0.1426, cr_loss=0.3894, attn_decoder_loss=0.2578, over 5572942.05 frames. ], batch size: 209, lr: 5.24e-03, grad_scale: 8.0 +2024-09-18 05:39:48,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=379840.0, ans=0.2 +2024-09-18 05:39:56,707 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 05:40:01,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.54 vs. limit=15.0 +2024-09-18 05:40:30,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=379960.0, ans=0.0 +2024-09-18 05:40:33,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=379960.0, ans=0.0 +2024-09-18 05:40:41,893 INFO [train.py:1198] (1/2) Epoch 21, batch 4500, loss[loss=0.2682, ctc_loss=0.1653, cr_loss=0.4087, attn_decoder_loss=0.2705, over 20179.00 frames. ], tot_loss[loss=0.2568, ctc_loss=0.1473, cr_loss=0.3921, attn_decoder_loss=0.2602, over 5233102.91 frames. ], batch size: 209, lr: 5.24e-03, grad_scale: 8.0 +2024-09-18 05:40:56,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=380040.0, ans=0.125 +2024-09-18 05:40:57,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.66 vs. limit=15.0 +2024-09-18 05:41:04,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=380040.0, ans=0.0 +2024-09-18 05:41:07,264 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.842e+01 1.023e+02 1.116e+02 1.184e+02 1.723e+02, threshold=2.233e+02, percent-clipped=0.0 +2024-09-18 05:42:06,204 INFO [train.py:1198] (1/2) Epoch 22, batch 0, loss[loss=0.2341, ctc_loss=0.1262, cr_loss=0.3582, attn_decoder_loss=0.2382, over 29582.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1262, cr_loss=0.3582, attn_decoder_loss=0.2382, over 29582.00 frames. ], batch size: 73, lr: 5.12e-03, grad_scale: 16.0 +2024-09-18 05:42:06,205 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 05:42:13,737 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.0576, 3.5965, 3.9289, 3.5301], device='cuda:1') +2024-09-18 05:42:24,647 INFO [train.py:1230] (1/2) Epoch 22, validation: loss=0.212, ctc_loss=0.0382, cr_loss=5.087e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 05:42:24,647 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 05:42:26,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=380100.0, ans=0.0 +2024-09-18 05:42:46,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=380140.0, ans=0.05 +2024-09-18 05:42:57,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=380180.0, ans=0.0 +2024-09-18 05:43:09,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=380180.0, ans=0.125 +2024-09-18 05:43:19,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=380220.0, ans=0.125 +2024-09-18 05:43:23,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=380220.0, ans=0.025 +2024-09-18 05:43:42,237 INFO [train.py:1198] (1/2) Epoch 22, batch 50, loss[loss=0.219, ctc_loss=0.1199, cr_loss=0.3504, attn_decoder_loss=0.2222, over 29429.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1362, cr_loss=0.3786, attn_decoder_loss=0.2501, over 1269808.36 frames. ], batch size: 70, lr: 5.12e-03, grad_scale: 8.0 +2024-09-18 05:43:48,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=380300.0, ans=0.125 +2024-09-18 05:44:14,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=380380.0, ans=0.07 +2024-09-18 05:44:16,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=380380.0, ans=0.07 +2024-09-18 05:44:17,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=380380.0, ans=0.1 +2024-09-18 05:44:25,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=14.25 vs. limit=15.0 +2024-09-18 05:44:29,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=380420.0, ans=0.125 +2024-09-18 05:44:44,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=380460.0, ans=0.1 +2024-09-18 05:44:44,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=380460.0, ans=0.125 +2024-09-18 05:44:47,550 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.106e+01 8.759e+01 9.355e+01 1.030e+02 2.527e+02, threshold=1.871e+02, percent-clipped=1.0 +2024-09-18 05:44:50,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=380460.0, ans=15.0 +2024-09-18 05:44:57,986 INFO [train.py:1198] (1/2) Epoch 22, batch 100, loss[loss=0.231, ctc_loss=0.1194, cr_loss=0.3596, attn_decoder_loss=0.2354, over 29553.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1373, cr_loss=0.3811, attn_decoder_loss=0.2521, over 2253464.09 frames. ], batch size: 76, lr: 5.12e-03, grad_scale: 8.0 +2024-09-18 05:45:29,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=380580.0, ans=0.125 +2024-09-18 05:45:40,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=380580.0, ans=0.0 +2024-09-18 05:45:43,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=380620.0, ans=0.2 +2024-09-18 05:45:53,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.77 vs. limit=15.0 +2024-09-18 05:46:06,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=380660.0, ans=0.1 +2024-09-18 05:46:16,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=380700.0, ans=0.125 +2024-09-18 05:46:17,497 INFO [train.py:1198] (1/2) Epoch 22, batch 150, loss[loss=0.2241, ctc_loss=0.1176, cr_loss=0.3439, attn_decoder_loss=0.2283, over 29426.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1351, cr_loss=0.3775, attn_decoder_loss=0.2502, over 3048147.53 frames. ], batch size: 70, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:46:26,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=380700.0, ans=0.125 +2024-09-18 05:46:36,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=380740.0, ans=0.04949747468305833 +2024-09-18 05:46:42,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.16 vs. limit=12.0 +2024-09-18 05:46:47,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=380780.0, ans=0.0 +2024-09-18 05:46:53,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=380780.0, ans=0.125 +2024-09-18 05:47:03,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=380820.0, ans=0.0 +2024-09-18 05:47:22,575 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.751e+01 8.602e+01 9.163e+01 9.915e+01 1.341e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 05:47:31,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=380900.0, ans=0.0 +2024-09-18 05:47:33,201 INFO [train.py:1198] (1/2) Epoch 22, batch 200, loss[loss=0.2632, ctc_loss=0.1565, cr_loss=0.4289, attn_decoder_loss=0.2655, over 27468.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1347, cr_loss=0.3773, attn_decoder_loss=0.2493, over 3660283.60 frames. ], batch size: 124, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:47:34,161 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.84 vs. limit=10.0 +2024-09-18 05:47:41,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=380900.0, ans=0.0 +2024-09-18 05:47:48,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=380940.0, ans=0.2 +2024-09-18 05:47:55,113 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.91 vs. limit=15.0 +2024-09-18 05:48:01,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.01 vs. limit=15.0 +2024-09-18 05:48:08,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=380980.0, ans=0.125 +2024-09-18 05:48:12,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=380980.0, ans=0.1 +2024-09-18 05:48:14,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=380980.0, ans=0.0 +2024-09-18 05:48:23,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=381020.0, ans=0.015 +2024-09-18 05:48:27,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=381020.0, ans=0.0 +2024-09-18 05:48:27,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=381020.0, ans=0.125 +2024-09-18 05:48:35,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=381060.0, ans=0.125 +2024-09-18 05:48:41,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=381060.0, ans=0.035 +2024-09-18 05:48:48,673 INFO [train.py:1198] (1/2) Epoch 22, batch 250, loss[loss=0.2627, ctc_loss=0.144, cr_loss=0.3888, attn_decoder_loss=0.2673, over 29177.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1339, cr_loss=0.3776, attn_decoder_loss=0.2492, over 4143018.46 frames. ], batch size: 100, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:48:56,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=381100.0, ans=0.125 +2024-09-18 05:49:02,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=381140.0, ans=0.125 +2024-09-18 05:49:10,932 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.07 vs. limit=15.0 +2024-09-18 05:49:17,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=381180.0, ans=0.2 +2024-09-18 05:49:19,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=381180.0, ans=0.0 +2024-09-18 05:49:32,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=381220.0, ans=0.125 +2024-09-18 05:49:50,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=381260.0, ans=0.04949747468305833 +2024-09-18 05:49:51,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.92 vs. limit=15.0 +2024-09-18 05:49:53,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=381260.0, ans=0.0 +2024-09-18 05:49:56,365 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.533e+01 8.896e+01 9.505e+01 2.232e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-18 05:50:06,901 INFO [train.py:1198] (1/2) Epoch 22, batch 300, loss[loss=0.2621, ctc_loss=0.1486, cr_loss=0.4126, attn_decoder_loss=0.2656, over 29535.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.134, cr_loss=0.378, attn_decoder_loss=0.2489, over 4510180.26 frames. ], batch size: 92, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:50:15,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=381300.0, ans=0.2 +2024-09-18 05:50:16,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.89 vs. limit=15.0 +2024-09-18 05:50:53,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=7.19 vs. limit=15.0 +2024-09-18 05:50:56,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=381420.0, ans=0.125 +2024-09-18 05:51:24,709 INFO [train.py:1198] (1/2) Epoch 22, batch 350, loss[loss=0.2209, ctc_loss=0.1085, cr_loss=0.3431, attn_decoder_loss=0.2258, over 29329.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1339, cr_loss=0.3776, attn_decoder_loss=0.2491, over 4794950.00 frames. ], batch size: 71, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:51:26,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=381500.0, ans=0.125 +2024-09-18 05:51:50,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=381540.0, ans=0.1 +2024-09-18 05:51:56,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=381580.0, ans=0.125 +2024-09-18 05:52:29,883 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.396e+01 8.841e+01 9.371e+01 8.849e+02, threshold=1.768e+02, percent-clipped=1.0 +2024-09-18 05:52:40,320 INFO [train.py:1198] (1/2) Epoch 22, batch 400, loss[loss=0.2437, ctc_loss=0.1279, cr_loss=0.3447, attn_decoder_loss=0.2489, over 29716.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1335, cr_loss=0.3765, attn_decoder_loss=0.2488, over 5025049.85 frames. ], batch size: 82, lr: 5.11e-03, grad_scale: 16.0 +2024-09-18 05:53:09,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=381780.0, ans=0.125 +2024-09-18 05:53:14,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=381780.0, ans=0.07 +2024-09-18 05:53:14,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=381780.0, ans=0.0 +2024-09-18 05:53:17,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=381780.0, ans=0.1 +2024-09-18 05:53:19,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.19 vs. limit=6.0 +2024-09-18 05:53:23,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=381780.0, ans=0.125 +2024-09-18 05:53:28,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=381820.0, ans=0.0 +2024-09-18 05:53:59,078 INFO [train.py:1198] (1/2) Epoch 22, batch 450, loss[loss=0.2509, ctc_loss=0.1374, cr_loss=0.3882, attn_decoder_loss=0.2549, over 29699.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1332, cr_loss=0.3755, attn_decoder_loss=0.2489, over 5186773.74 frames. ], batch size: 83, lr: 5.11e-03, grad_scale: 8.0 +2024-09-18 05:54:28,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.32 vs. limit=10.0 +2024-09-18 05:54:30,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=381980.0, ans=0.125 +2024-09-18 05:54:33,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=381980.0, ans=10.0 +2024-09-18 05:55:08,440 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.444e+01 8.472e+01 8.899e+01 9.397e+01 1.729e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 05:55:17,496 INFO [train.py:1198] (1/2) Epoch 22, batch 500, loss[loss=0.2545, ctc_loss=0.1399, cr_loss=0.4054, attn_decoder_loss=0.2582, over 29426.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1328, cr_loss=0.3749, attn_decoder_loss=0.2483, over 5330349.01 frames. ], batch size: 94, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:55:33,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=382140.0, ans=0.0 +2024-09-18 05:55:42,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=382140.0, ans=0.025 +2024-09-18 05:55:54,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=382180.0, ans=0.1 +2024-09-18 05:55:55,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=382180.0, ans=0.125 +2024-09-18 05:56:18,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=382260.0, ans=0.0 +2024-09-18 05:56:33,384 INFO [train.py:1198] (1/2) Epoch 22, batch 550, loss[loss=0.2598, ctc_loss=0.1432, cr_loss=0.399, attn_decoder_loss=0.2639, over 28810.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1327, cr_loss=0.3746, attn_decoder_loss=0.2482, over 5421082.22 frames. ], batch size: 104, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:56:38,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=382300.0, ans=0.125 +2024-09-18 05:57:01,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=382340.0, ans=0.2 +2024-09-18 05:57:02,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=382380.0, ans=0.125 +2024-09-18 05:57:15,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=382380.0, ans=0.1 +2024-09-18 05:57:28,368 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.81 vs. limit=6.0 +2024-09-18 05:57:40,963 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.068e+01 8.705e+01 9.082e+01 9.823e+01 4.645e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 05:57:47,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=382460.0, ans=0.125 +2024-09-18 05:57:52,557 INFO [train.py:1198] (1/2) Epoch 22, batch 600, loss[loss=0.2579, ctc_loss=0.1473, cr_loss=0.4079, attn_decoder_loss=0.2611, over 29248.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1329, cr_loss=0.3757, attn_decoder_loss=0.2486, over 5508382.85 frames. ], batch size: 100, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:58:30,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=382580.0, ans=0.0 +2024-09-18 05:58:44,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=382620.0, ans=0.0 +2024-09-18 05:58:50,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.49 vs. limit=10.0 +2024-09-18 05:58:53,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=382660.0, ans=0.05 +2024-09-18 05:59:08,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=382700.0, ans=0.125 +2024-09-18 05:59:09,507 INFO [train.py:1198] (1/2) Epoch 22, batch 650, loss[loss=0.2427, ctc_loss=0.1285, cr_loss=0.3613, attn_decoder_loss=0.2473, over 29755.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1321, cr_loss=0.374, attn_decoder_loss=0.2479, over 5585368.09 frames. ], batch size: 81, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 05:59:09,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=382700.0, ans=0.0 +2024-09-18 05:59:17,631 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.30 vs. limit=12.0 +2024-09-18 05:59:20,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=382700.0, ans=0.0 +2024-09-18 05:59:31,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=382740.0, ans=0.0 +2024-09-18 06:00:01,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=382820.0, ans=0.125 +2024-09-18 06:00:04,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=382820.0, ans=0.1 +2024-09-18 06:00:06,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=382820.0, ans=0.125 +2024-09-18 06:00:14,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.96 vs. limit=22.5 +2024-09-18 06:00:15,692 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.721e+01 8.434e+01 8.895e+01 9.353e+01 1.142e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 06:00:24,703 INFO [train.py:1198] (1/2) Epoch 22, batch 700, loss[loss=0.2386, ctc_loss=0.1261, cr_loss=0.3698, attn_decoder_loss=0.2429, over 29524.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1326, cr_loss=0.3752, attn_decoder_loss=0.2486, over 5636653.22 frames. ], batch size: 76, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:00:27,120 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.14 vs. limit=15.0 +2024-09-18 06:00:31,847 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.30 vs. limit=15.0 +2024-09-18 06:00:37,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.72 vs. limit=10.0 +2024-09-18 06:00:38,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=382940.0, ans=0.125 +2024-09-18 06:00:52,495 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.41 vs. limit=15.0 +2024-09-18 06:00:52,931 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.68 vs. limit=15.0 +2024-09-18 06:00:55,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=382980.0, ans=0.05 +2024-09-18 06:01:02,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=12.0 +2024-09-18 06:01:02,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=382980.0, ans=0.0 +2024-09-18 06:01:21,780 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.70 vs. limit=22.5 +2024-09-18 06:01:24,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=383060.0, ans=0.1 +2024-09-18 06:01:40,822 INFO [train.py:1198] (1/2) Epoch 22, batch 750, loss[loss=0.2486, ctc_loss=0.1336, cr_loss=0.3816, attn_decoder_loss=0.2529, over 29700.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1324, cr_loss=0.3754, attn_decoder_loss=0.2483, over 5675627.05 frames. ], batch size: 82, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:01:49,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=383100.0, ans=0.0 +2024-09-18 06:02:04,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.68 vs. limit=15.0 +2024-09-18 06:02:06,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=383140.0, ans=0.2 +2024-09-18 06:02:20,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=383180.0, ans=0.0 +2024-09-18 06:02:25,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=383180.0, ans=0.125 +2024-09-18 06:02:29,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.69 vs. limit=15.0 +2024-09-18 06:02:43,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=383220.0, ans=0.125 +2024-09-18 06:02:52,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.347e+01 8.642e+01 9.168e+01 9.743e+01 1.816e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 06:02:52,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=383260.0, ans=0.0 +2024-09-18 06:02:59,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=383300.0, ans=0.0 +2024-09-18 06:03:01,135 INFO [train.py:1198] (1/2) Epoch 22, batch 800, loss[loss=0.218, ctc_loss=0.1048, cr_loss=0.3133, attn_decoder_loss=0.2236, over 29598.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1329, cr_loss=0.376, attn_decoder_loss=0.2484, over 5705249.37 frames. ], batch size: 73, lr: 5.10e-03, grad_scale: 16.0 +2024-09-18 06:03:22,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=383340.0, ans=0.0 +2024-09-18 06:03:24,586 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.42 vs. limit=15.0 +2024-09-18 06:03:38,481 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=7.84 vs. limit=22.5 +2024-09-18 06:03:51,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=383420.0, ans=0.0 +2024-09-18 06:04:16,265 INFO [train.py:1198] (1/2) Epoch 22, batch 850, loss[loss=0.2586, ctc_loss=0.1367, cr_loss=0.3847, attn_decoder_loss=0.2636, over 29694.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1326, cr_loss=0.3755, attn_decoder_loss=0.2481, over 5732620.67 frames. ], batch size: 89, lr: 5.10e-03, grad_scale: 8.0 +2024-09-18 06:04:16,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=383500.0, ans=0.09899494936611666 +2024-09-18 06:04:37,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=383540.0, ans=0.125 +2024-09-18 06:04:40,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=383540.0, ans=0.125 +2024-09-18 06:05:20,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=383660.0, ans=0.025 +2024-09-18 06:05:24,337 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.172e+01 8.714e+01 9.138e+01 9.767e+01 2.023e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 06:05:32,026 INFO [train.py:1198] (1/2) Epoch 22, batch 900, loss[loss=0.2295, ctc_loss=0.125, cr_loss=0.3625, attn_decoder_loss=0.233, over 29606.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1328, cr_loss=0.3751, attn_decoder_loss=0.2482, over 5739075.17 frames. ], batch size: 73, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:05:34,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.74 vs. limit=15.0 +2024-09-18 06:06:00,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=383740.0, ans=0.125 +2024-09-18 06:06:04,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.91 vs. limit=15.0 +2024-09-18 06:06:28,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=383820.0, ans=0.025 +2024-09-18 06:06:52,156 INFO [train.py:1198] (1/2) Epoch 22, batch 950, loss[loss=0.2269, ctc_loss=0.1181, cr_loss=0.35, attn_decoder_loss=0.2313, over 29504.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1329, cr_loss=0.375, attn_decoder_loss=0.2485, over 5741211.07 frames. ], batch size: 74, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:07:12,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=383940.0, ans=0.07 +2024-09-18 06:07:58,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=384060.0, ans=0.125 +2024-09-18 06:07:59,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=384060.0, ans=0.025 +2024-09-18 06:08:06,681 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.106e+01 8.991e+01 9.492e+01 1.022e+02 3.198e+02, threshold=1.898e+02, percent-clipped=2.0 +2024-09-18 06:08:06,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=384060.0, ans=0.025 +2024-09-18 06:08:11,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=384060.0, ans=0.125 +2024-09-18 06:08:13,998 INFO [train.py:1198] (1/2) Epoch 22, batch 1000, loss[loss=0.2262, ctc_loss=0.1183, cr_loss=0.3519, attn_decoder_loss=0.2303, over 29524.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1337, cr_loss=0.3764, attn_decoder_loss=0.2491, over 5735806.93 frames. ], batch size: 77, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:08:17,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=384100.0, ans=0.0 +2024-09-18 06:08:35,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=384140.0, ans=0.2 +2024-09-18 06:08:58,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=384220.0, ans=0.0 +2024-09-18 06:09:29,786 INFO [train.py:1198] (1/2) Epoch 22, batch 1050, loss[loss=0.2588, ctc_loss=0.1432, cr_loss=0.3881, attn_decoder_loss=0.2631, over 29700.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1331, cr_loss=0.3753, attn_decoder_loss=0.2484, over 5742554.22 frames. ], batch size: 85, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:09:44,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=384300.0, ans=0.125 +2024-09-18 06:09:44,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=384300.0, ans=0.2 +2024-09-18 06:09:56,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=384340.0, ans=0.125 +2024-09-18 06:10:19,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=384420.0, ans=0.2 +2024-09-18 06:10:39,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.94 vs. limit=10.0 +2024-09-18 06:10:42,978 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.419e+01 8.971e+01 9.530e+01 1.277e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 06:10:50,698 INFO [train.py:1198] (1/2) Epoch 22, batch 1100, loss[loss=0.2348, ctc_loss=0.1144, cr_loss=0.3477, attn_decoder_loss=0.2405, over 29435.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1327, cr_loss=0.3746, attn_decoder_loss=0.2483, over 5755583.13 frames. ], batch size: 78, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:11:09,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=384540.0, ans=10.0 +2024-09-18 06:11:10,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=384540.0, ans=0.125 +2024-09-18 06:11:54,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=384660.0, ans=0.125 +2024-09-18 06:12:02,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.14 vs. limit=15.0 +2024-09-18 06:12:06,669 INFO [train.py:1198] (1/2) Epoch 22, batch 1150, loss[loss=0.2481, ctc_loss=0.1432, cr_loss=0.3945, attn_decoder_loss=0.2509, over 29424.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1329, cr_loss=0.375, attn_decoder_loss=0.2483, over 5753584.50 frames. ], batch size: 78, lr: 5.09e-03, grad_scale: 8.0 +2024-09-18 06:12:11,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=384700.0, ans=0.2 +2024-09-18 06:12:42,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=384780.0, ans=0.025 +2024-09-18 06:12:43,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=384780.0, ans=0.0 +2024-09-18 06:12:47,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=384780.0, ans=10.0 +2024-09-18 06:12:58,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.37 vs. limit=15.0 +2024-09-18 06:13:01,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=384820.0, ans=0.125 +2024-09-18 06:13:11,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=384860.0, ans=0.025 +2024-09-18 06:13:15,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.605e+01 9.127e+01 9.575e+01 1.863e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-18 06:13:22,816 INFO [train.py:1198] (1/2) Epoch 22, batch 1200, loss[loss=0.2719, ctc_loss=0.1578, cr_loss=0.4245, attn_decoder_loss=0.2752, over 29684.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1334, cr_loss=0.3756, attn_decoder_loss=0.2493, over 5745990.05 frames. ], batch size: 85, lr: 5.09e-03, grad_scale: 16.0 +2024-09-18 06:13:37,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=384900.0, ans=0.0 +2024-09-18 06:14:42,674 INFO [train.py:1198] (1/2) Epoch 22, batch 1250, loss[loss=0.2569, ctc_loss=0.1397, cr_loss=0.3967, attn_decoder_loss=0.2611, over 29547.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1333, cr_loss=0.3764, attn_decoder_loss=0.2494, over 5772847.97 frames. ], batch size: 92, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:14:47,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=385100.0, ans=0.2 +2024-09-18 06:14:50,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=385100.0, ans=0.025 +2024-09-18 06:14:52,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=385100.0, ans=0.125 +2024-09-18 06:15:10,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=385140.0, ans=0.04949747468305833 +2024-09-18 06:15:15,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=385180.0, ans=0.1 +2024-09-18 06:15:17,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.32 vs. limit=22.5 +2024-09-18 06:15:39,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=385220.0, ans=0.0 +2024-09-18 06:15:43,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=385260.0, ans=0.1 +2024-09-18 06:15:50,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=385260.0, ans=0.125 +2024-09-18 06:15:52,592 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.078e+01 8.234e+01 8.912e+01 9.418e+01 2.045e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-18 06:15:55,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=385260.0, ans=0.025 +2024-09-18 06:15:58,560 INFO [train.py:1198] (1/2) Epoch 22, batch 1300, loss[loss=0.2644, ctc_loss=0.1561, cr_loss=0.4036, attn_decoder_loss=0.2675, over 28299.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1326, cr_loss=0.3748, attn_decoder_loss=0.2486, over 5778958.67 frames. ], batch size: 111, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:16:09,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.96 vs. limit=15.0 +2024-09-18 06:16:23,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=385340.0, ans=0.125 +2024-09-18 06:16:44,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=385420.0, ans=0.1 +2024-09-18 06:16:51,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=385420.0, ans=0.1 +2024-09-18 06:17:08,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=385460.0, ans=0.0 +2024-09-18 06:17:14,123 INFO [train.py:1198] (1/2) Epoch 22, batch 1350, loss[loss=0.2479, ctc_loss=0.1301, cr_loss=0.3752, attn_decoder_loss=0.2527, over 29763.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1327, cr_loss=0.3749, attn_decoder_loss=0.2483, over 5795678.58 frames. ], batch size: 81, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:17:43,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=385540.0, ans=0.2 +2024-09-18 06:17:46,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=385580.0, ans=0.125 +2024-09-18 06:17:46,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=385580.0, ans=0.2 +2024-09-18 06:18:15,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=385620.0, ans=0.2 +2024-09-18 06:18:27,476 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.549e+01 8.459e+01 9.043e+01 9.728e+01 1.319e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 06:18:33,609 INFO [train.py:1198] (1/2) Epoch 22, batch 1400, loss[loss=0.2153, ctc_loss=0.1015, cr_loss=0.3039, attn_decoder_loss=0.2212, over 29582.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1324, cr_loss=0.3744, attn_decoder_loss=0.2481, over 5806506.85 frames. ], batch size: 69, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:18:35,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=385700.0, ans=0.0 +2024-09-18 06:18:46,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=385700.0, ans=0.0 +2024-09-18 06:19:10,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=385780.0, ans=0.125 +2024-09-18 06:19:14,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=385780.0, ans=0.125 +2024-09-18 06:19:17,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=385820.0, ans=0.2 +2024-09-18 06:19:32,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=385860.0, ans=0.2 +2024-09-18 06:19:36,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.33 vs. limit=15.0 +2024-09-18 06:19:43,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=385860.0, ans=0.1 +2024-09-18 06:19:49,087 INFO [train.py:1198] (1/2) Epoch 22, batch 1450, loss[loss=0.269, ctc_loss=0.1552, cr_loss=0.4297, attn_decoder_loss=0.2721, over 29503.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1332, cr_loss=0.3765, attn_decoder_loss=0.249, over 5803270.17 frames. ], batch size: 94, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:19:58,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=385900.0, ans=0.125 +2024-09-18 06:20:17,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.06 vs. limit=10.0 +2024-09-18 06:20:38,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=386020.0, ans=0.2 +2024-09-18 06:20:55,601 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.32 vs. limit=15.0 +2024-09-18 06:20:58,985 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.476e+01 9.077e+01 9.872e+01 2.572e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-18 06:21:05,105 INFO [train.py:1198] (1/2) Epoch 22, batch 1500, loss[loss=0.2578, ctc_loss=0.1418, cr_loss=0.4098, attn_decoder_loss=0.2615, over 29623.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1333, cr_loss=0.3765, attn_decoder_loss=0.2493, over 5804309.84 frames. ], batch size: 86, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:21:14,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.65 vs. limit=15.0 +2024-09-18 06:21:26,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=386140.0, ans=0.0 +2024-09-18 06:21:42,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=386180.0, ans=0.125 +2024-09-18 06:21:42,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=386180.0, ans=0.125 +2024-09-18 06:21:46,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=386180.0, ans=10.0 +2024-09-18 06:21:54,753 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-18 06:22:25,767 INFO [train.py:1198] (1/2) Epoch 22, batch 1550, loss[loss=0.2713, ctc_loss=0.1632, cr_loss=0.441, attn_decoder_loss=0.2735, over 29507.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1336, cr_loss=0.3765, attn_decoder_loss=0.2491, over 5781161.26 frames. ], batch size: 90, lr: 5.08e-03, grad_scale: 8.0 +2024-09-18 06:22:43,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.11 vs. limit=10.0 +2024-09-18 06:23:01,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=386380.0, ans=0.1 +2024-09-18 06:23:06,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=386380.0, ans=0.125 +2024-09-18 06:23:10,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=386420.0, ans=0.025 +2024-09-18 06:23:11,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.24 vs. limit=15.0 +2024-09-18 06:23:12,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.63 vs. limit=12.0 +2024-09-18 06:23:13,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=386420.0, ans=0.0 +2024-09-18 06:23:31,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=386460.0, ans=0.125 +2024-09-18 06:23:34,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=386460.0, ans=0.1 +2024-09-18 06:23:35,902 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.667e+01 9.294e+01 9.875e+01 4.781e+02, threshold=1.859e+02, percent-clipped=2.0 +2024-09-18 06:23:39,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=386460.0, ans=0.1 +2024-09-18 06:23:41,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.95 vs. limit=15.0 +2024-09-18 06:23:41,956 INFO [train.py:1198] (1/2) Epoch 22, batch 1600, loss[loss=0.2414, ctc_loss=0.1178, cr_loss=0.3325, attn_decoder_loss=0.2478, over 29679.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1337, cr_loss=0.3766, attn_decoder_loss=0.249, over 5763986.17 frames. ], batch size: 85, lr: 5.08e-03, grad_scale: 16.0 +2024-09-18 06:23:49,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=386500.0, ans=0.125 +2024-09-18 06:24:15,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=386580.0, ans=0.0 +2024-09-18 06:24:20,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.45 vs. limit=22.5 +2024-09-18 06:24:23,232 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:24:38,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=386620.0, ans=0.0 +2024-09-18 06:24:39,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=386620.0, ans=0.0 +2024-09-18 06:24:41,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=386660.0, ans=0.125 +2024-09-18 06:24:50,695 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:24:57,609 INFO [train.py:1198] (1/2) Epoch 22, batch 1650, loss[loss=0.2606, ctc_loss=0.1443, cr_loss=0.3934, attn_decoder_loss=0.2647, over 29690.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1336, cr_loss=0.376, attn_decoder_loss=0.249, over 5758124.26 frames. ], batch size: 89, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:24:59,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=386700.0, ans=0.125 +2024-09-18 06:25:13,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=386740.0, ans=0.125 +2024-09-18 06:25:55,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.79 vs. limit=15.0 +2024-09-18 06:25:55,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.53 vs. limit=15.0 +2024-09-18 06:25:56,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=386820.0, ans=0.125 +2024-09-18 06:26:12,695 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.418e+01 9.168e+01 9.653e+01 1.530e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 06:26:14,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=386860.0, ans=0.125 +2024-09-18 06:26:17,113 INFO [train.py:1198] (1/2) Epoch 22, batch 1700, loss[loss=0.216, ctc_loss=0.1076, cr_loss=0.3385, attn_decoder_loss=0.2206, over 29568.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1332, cr_loss=0.3758, attn_decoder_loss=0.2489, over 5780844.96 frames. ], batch size: 69, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:26:45,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=386980.0, ans=0.1 +2024-09-18 06:26:52,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=386980.0, ans=0.2 +2024-09-18 06:27:09,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.80 vs. limit=15.0 +2024-09-18 06:27:21,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-18 06:27:32,815 INFO [train.py:1198] (1/2) Epoch 22, batch 1750, loss[loss=0.2209, ctc_loss=0.1192, cr_loss=0.3545, attn_decoder_loss=0.2243, over 29306.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1327, cr_loss=0.3753, attn_decoder_loss=0.2484, over 5788969.78 frames. ], batch size: 67, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:27:40,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=387100.0, ans=0.125 +2024-09-18 06:27:49,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=387140.0, ans=0.2 +2024-09-18 06:28:24,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=387220.0, ans=0.025 +2024-09-18 06:28:38,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=387260.0, ans=0.2 +2024-09-18 06:28:39,448 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.54 vs. limit=15.0 +2024-09-18 06:28:44,542 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.338e+01 8.803e+01 9.481e+01 6.567e+02, threshold=1.761e+02, percent-clipped=1.0 +2024-09-18 06:28:44,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=387260.0, ans=0.0 +2024-09-18 06:28:49,111 INFO [train.py:1198] (1/2) Epoch 22, batch 1800, loss[loss=0.2573, ctc_loss=0.1491, cr_loss=0.4204, attn_decoder_loss=0.26, over 29681.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1328, cr_loss=0.3754, attn_decoder_loss=0.2483, over 5791110.01 frames. ], batch size: 83, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:28:54,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=387300.0, ans=0.125 +2024-09-18 06:29:05,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-18 06:29:11,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=387340.0, ans=0.1 +2024-09-18 06:29:48,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=387420.0, ans=0.125 +2024-09-18 06:29:59,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=387460.0, ans=0.125 +2024-09-18 06:29:59,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=387460.0, ans=0.125 +2024-09-18 06:30:09,423 INFO [train.py:1198] (1/2) Epoch 22, batch 1850, loss[loss=0.2608, ctc_loss=0.1432, cr_loss=0.3911, attn_decoder_loss=0.2652, over 29615.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1324, cr_loss=0.3753, attn_decoder_loss=0.248, over 5797576.76 frames. ], batch size: 86, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:30:14,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=387500.0, ans=0.1 +2024-09-18 06:30:17,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=387500.0, ans=0.0 +2024-09-18 06:30:17,920 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.16 vs. limit=12.0 +2024-09-18 06:30:23,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=387540.0, ans=0.125 +2024-09-18 06:30:30,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=387540.0, ans=0.025 +2024-09-18 06:30:55,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=387620.0, ans=0.125 +2024-09-18 06:31:20,352 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.630e+01 9.053e+01 9.518e+01 1.576e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 06:31:24,767 INFO [train.py:1198] (1/2) Epoch 22, batch 1900, loss[loss=0.2595, ctc_loss=0.139, cr_loss=0.3997, attn_decoder_loss=0.264, over 29717.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1326, cr_loss=0.3755, attn_decoder_loss=0.2484, over 5805200.15 frames. ], batch size: 89, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:31:40,515 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.58 vs. limit=15.0 +2024-09-18 06:31:55,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=387780.0, ans=0.05 +2024-09-18 06:32:00,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=387780.0, ans=0.125 +2024-09-18 06:32:15,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=387820.0, ans=0.0 +2024-09-18 06:32:32,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=387860.0, ans=0.125 +2024-09-18 06:32:32,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.03 vs. limit=22.5 +2024-09-18 06:32:40,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.53 vs. limit=10.0 +2024-09-18 06:32:40,935 INFO [train.py:1198] (1/2) Epoch 22, batch 1950, loss[loss=0.2332, ctc_loss=0.1267, cr_loss=0.3614, attn_decoder_loss=0.237, over 29422.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1333, cr_loss=0.3773, attn_decoder_loss=0.2497, over 5819548.96 frames. ], batch size: 78, lr: 5.07e-03, grad_scale: 8.0 +2024-09-18 06:32:57,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=387940.0, ans=0.125 +2024-09-18 06:33:03,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=387940.0, ans=0.0 +2024-09-18 06:33:13,733 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:33:21,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=387980.0, ans=0.2 +2024-09-18 06:33:29,800 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.40 vs. limit=22.5 +2024-09-18 06:33:47,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=388060.0, ans=0.125 +2024-09-18 06:33:55,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=388060.0, ans=0.1 +2024-09-18 06:33:56,801 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 8.668e+01 9.187e+01 9.705e+01 3.737e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-18 06:34:01,472 INFO [train.py:1198] (1/2) Epoch 22, batch 2000, loss[loss=0.2218, ctc_loss=0.1142, cr_loss=0.3468, attn_decoder_loss=0.2261, over 29352.00 frames. ], tot_loss[loss=0.2463, ctc_loss=0.1339, cr_loss=0.3783, attn_decoder_loss=0.2503, over 5798445.45 frames. ], batch size: 67, lr: 5.07e-03, grad_scale: 16.0 +2024-09-18 06:34:15,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=388140.0, ans=0.1 +2024-09-18 06:34:26,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=388140.0, ans=0.0 +2024-09-18 06:34:28,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.81 vs. limit=15.0 +2024-09-18 06:34:33,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=388180.0, ans=0.0 +2024-09-18 06:34:36,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=388180.0, ans=0.125 +2024-09-18 06:34:41,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=388180.0, ans=0.09899494936611666 +2024-09-18 06:34:42,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=388180.0, ans=0.025 +2024-09-18 06:34:54,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=388220.0, ans=0.125 +2024-09-18 06:34:56,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=388220.0, ans=0.125 +2024-09-18 06:35:03,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=388260.0, ans=0.1 +2024-09-18 06:35:17,272 INFO [train.py:1198] (1/2) Epoch 22, batch 2050, loss[loss=0.2282, ctc_loss=0.1252, cr_loss=0.3588, attn_decoder_loss=0.2317, over 29440.00 frames. ], tot_loss[loss=0.2459, ctc_loss=0.1342, cr_loss=0.3788, attn_decoder_loss=0.2499, over 5789924.53 frames. ], batch size: 70, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:35:19,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=388300.0, ans=0.07 +2024-09-18 06:35:23,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=388300.0, ans=0.1 +2024-09-18 06:35:44,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=388340.0, ans=0.125 +2024-09-18 06:35:51,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.07 vs. limit=15.0 +2024-09-18 06:35:54,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=388380.0, ans=0.1 +2024-09-18 06:35:54,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=388380.0, ans=0.125 +2024-09-18 06:36:03,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=388420.0, ans=0.07 +2024-09-18 06:36:24,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=388460.0, ans=0.125 +2024-09-18 06:36:30,007 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.598e+01 9.133e+01 9.835e+01 1.696e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 06:36:33,165 INFO [train.py:1198] (1/2) Epoch 22, batch 2100, loss[loss=0.2436, ctc_loss=0.1252, cr_loss=0.3594, attn_decoder_loss=0.2488, over 29792.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1338, cr_loss=0.3781, attn_decoder_loss=0.2494, over 5800972.43 frames. ], batch size: 81, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:36:33,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=388500.0, ans=0.1 +2024-09-18 06:36:44,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=388500.0, ans=0.125 +2024-09-18 06:37:11,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=388580.0, ans=0.2 +2024-09-18 06:37:11,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=388580.0, ans=0.0 +2024-09-18 06:37:25,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=388620.0, ans=0.0 +2024-09-18 06:37:42,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=388660.0, ans=0.0 +2024-09-18 06:37:52,731 INFO [train.py:1198] (1/2) Epoch 22, batch 2150, loss[loss=0.2476, ctc_loss=0.1356, cr_loss=0.3895, attn_decoder_loss=0.2514, over 29425.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1331, cr_loss=0.3768, attn_decoder_loss=0.2487, over 5816076.29 frames. ], batch size: 78, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:38:10,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.70 vs. limit=15.0 +2024-09-18 06:38:31,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=388780.0, ans=0.2 +2024-09-18 06:38:51,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten.whitening_limit, batch_count=388820.0, ans=22.5 +2024-09-18 06:38:56,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.52 vs. limit=5.0 +2024-09-18 06:39:05,566 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.464e+01 8.588e+01 8.944e+01 9.592e+01 1.412e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 06:39:06,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=388860.0, ans=0.125 +2024-09-18 06:39:08,643 INFO [train.py:1198] (1/2) Epoch 22, batch 2200, loss[loss=0.2443, ctc_loss=0.1321, cr_loss=0.3765, attn_decoder_loss=0.2484, over 29633.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1334, cr_loss=0.3779, attn_decoder_loss=0.249, over 5813317.25 frames. ], batch size: 86, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:39:15,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.98 vs. limit=15.0 +2024-09-18 06:39:50,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.18 vs. limit=15.0 +2024-09-18 06:39:54,662 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-18 06:39:56,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=15.0 +2024-09-18 06:40:23,972 INFO [train.py:1198] (1/2) Epoch 22, batch 2250, loss[loss=0.2401, ctc_loss=0.1252, cr_loss=0.3538, attn_decoder_loss=0.245, over 29702.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1331, cr_loss=0.3769, attn_decoder_loss=0.2488, over 5813055.48 frames. ], batch size: 82, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:40:59,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=389180.0, ans=0.125 +2024-09-18 06:41:08,578 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:41:22,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=389220.0, ans=0.2 +2024-09-18 06:41:26,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=389260.0, ans=0.125 +2024-09-18 06:41:41,016 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.612e+01 9.109e+01 9.746e+01 4.316e+02, threshold=1.822e+02, percent-clipped=5.0 +2024-09-18 06:41:44,067 INFO [train.py:1198] (1/2) Epoch 22, batch 2300, loss[loss=0.2198, ctc_loss=0.1147, cr_loss=0.3345, attn_decoder_loss=0.224, over 29280.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1326, cr_loss=0.3757, attn_decoder_loss=0.2476, over 5801002.46 frames. ], batch size: 71, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:41:44,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=389300.0, ans=0.0 +2024-09-18 06:41:53,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=389300.0, ans=0.125 +2024-09-18 06:42:05,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=389340.0, ans=0.0 +2024-09-18 06:42:20,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=389380.0, ans=0.125 +2024-09-18 06:42:31,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=389420.0, ans=0.0 +2024-09-18 06:42:59,650 INFO [train.py:1198] (1/2) Epoch 22, batch 2350, loss[loss=0.2597, ctc_loss=0.151, cr_loss=0.4132, attn_decoder_loss=0.2626, over 29697.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1328, cr_loss=0.3762, attn_decoder_loss=0.2479, over 5806194.95 frames. ], batch size: 83, lr: 5.06e-03, grad_scale: 8.0 +2024-09-18 06:43:19,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=389540.0, ans=0.0 +2024-09-18 06:43:38,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=389580.0, ans=6.0 +2024-09-18 06:43:47,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=389620.0, ans=10.0 +2024-09-18 06:44:01,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.07 vs. limit=15.0 +2024-09-18 06:44:13,260 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.909e+01 8.749e+01 9.346e+01 1.024e+02 1.570e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-18 06:44:16,222 INFO [train.py:1198] (1/2) Epoch 22, batch 2400, loss[loss=0.2351, ctc_loss=0.1328, cr_loss=0.3825, attn_decoder_loss=0.238, over 29515.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1334, cr_loss=0.3766, attn_decoder_loss=0.2485, over 5809495.90 frames. ], batch size: 76, lr: 5.05e-03, grad_scale: 16.0 +2024-09-18 06:44:23,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=389700.0, ans=0.2 +2024-09-18 06:44:28,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=389700.0, ans=0.125 +2024-09-18 06:44:28,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=389700.0, ans=0.125 +2024-09-18 06:44:28,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=389700.0, ans=0.025 +2024-09-18 06:44:42,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=389740.0, ans=0.125 +2024-09-18 06:44:44,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=389740.0, ans=0.125 +2024-09-18 06:44:46,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.38 vs. limit=15.0 +2024-09-18 06:44:52,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=389780.0, ans=0.125 +2024-09-18 06:45:08,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=389820.0, ans=0.09899494936611666 +2024-09-18 06:45:25,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=389860.0, ans=0.125 +2024-09-18 06:45:36,563 INFO [train.py:1198] (1/2) Epoch 22, batch 2450, loss[loss=0.2549, ctc_loss=0.1282, cr_loss=0.3695, attn_decoder_loss=0.2608, over 29709.00 frames. ], tot_loss[loss=0.2454, ctc_loss=0.1339, cr_loss=0.3771, attn_decoder_loss=0.2494, over 5786357.26 frames. ], batch size: 82, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:45:36,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=389900.0, ans=0.0 +2024-09-18 06:45:40,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.91 vs. limit=6.0 +2024-09-18 06:46:02,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=389940.0, ans=0.025 +2024-09-18 06:46:19,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.44 vs. limit=6.0 +2024-09-18 06:46:22,507 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.71 vs. limit=15.0 +2024-09-18 06:46:25,336 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-18 06:46:39,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=390060.0, ans=0.125 +2024-09-18 06:46:41,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=390060.0, ans=0.125 +2024-09-18 06:46:45,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=390060.0, ans=0.125 +2024-09-18 06:46:47,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=390060.0, ans=0.125 +2024-09-18 06:46:50,168 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.749e+01 9.206e+01 9.779e+01 5.372e+02, threshold=1.841e+02, percent-clipped=2.0 +2024-09-18 06:46:51,746 INFO [train.py:1198] (1/2) Epoch 22, batch 2500, loss[loss=0.2558, ctc_loss=0.1423, cr_loss=0.4017, attn_decoder_loss=0.2595, over 29629.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.134, cr_loss=0.377, attn_decoder_loss=0.2492, over 5795715.74 frames. ], batch size: 86, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:47:11,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=390140.0, ans=0.125 +2024-09-18 06:47:23,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-18 06:47:33,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=390180.0, ans=0.0 +2024-09-18 06:48:01,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=390260.0, ans=0.125 +2024-09-18 06:48:07,750 INFO [train.py:1198] (1/2) Epoch 22, batch 2550, loss[loss=0.2108, ctc_loss=0.1073, cr_loss=0.3296, attn_decoder_loss=0.215, over 29372.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1337, cr_loss=0.3774, attn_decoder_loss=0.2493, over 5797403.84 frames. ], batch size: 67, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:48:35,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=390340.0, ans=0.09899494936611666 +2024-09-18 06:48:44,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=390380.0, ans=0.2 +2024-09-18 06:48:54,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=390420.0, ans=0.125 +2024-09-18 06:48:58,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=390420.0, ans=0.035 +2024-09-18 06:49:12,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=390460.0, ans=0.025 +2024-09-18 06:49:22,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=390460.0, ans=0.1 +2024-09-18 06:49:24,704 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.515e+01 9.035e+01 9.623e+01 2.254e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 06:49:26,243 INFO [train.py:1198] (1/2) Epoch 22, batch 2600, loss[loss=0.2365, ctc_loss=0.1245, cr_loss=0.3764, attn_decoder_loss=0.2406, over 29437.00 frames. ], tot_loss[loss=0.2457, ctc_loss=0.1338, cr_loss=0.3778, attn_decoder_loss=0.2497, over 5793797.95 frames. ], batch size: 78, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:49:26,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=390500.0, ans=0.125 +2024-09-18 06:49:37,189 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.94 vs. limit=10.0 +2024-09-18 06:49:57,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=390580.0, ans=0.1 +2024-09-18 06:50:06,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=390580.0, ans=0.125 +2024-09-18 06:50:10,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.58 vs. limit=6.0 +2024-09-18 06:50:15,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=390620.0, ans=0.025 +2024-09-18 06:50:26,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=390620.0, ans=0.125 +2024-09-18 06:50:43,876 INFO [train.py:1198] (1/2) Epoch 22, batch 2650, loss[loss=0.2607, ctc_loss=0.1462, cr_loss=0.4111, attn_decoder_loss=0.2643, over 29226.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1339, cr_loss=0.3785, attn_decoder_loss=0.2498, over 5800889.80 frames. ], batch size: 100, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:50:50,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=390700.0, ans=0.1 +2024-09-18 06:51:02,470 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:51:08,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=6.09 vs. limit=12.0 +2024-09-18 06:51:18,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.33 vs. limit=22.5 +2024-09-18 06:51:23,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=390780.0, ans=0.0 +2024-09-18 06:51:40,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=390820.0, ans=0.025 +2024-09-18 06:51:42,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=390860.0, ans=0.2 +2024-09-18 06:51:51,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=390860.0, ans=0.0 +2024-09-18 06:51:53,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=390860.0, ans=0.1 +2024-09-18 06:51:56,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=390860.0, ans=0.07 +2024-09-18 06:51:57,485 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.432e+01 9.013e+01 9.580e+01 2.667e+02, threshold=1.803e+02, percent-clipped=2.0 +2024-09-18 06:51:59,082 INFO [train.py:1198] (1/2) Epoch 22, batch 2700, loss[loss=0.2514, ctc_loss=0.1376, cr_loss=0.381, attn_decoder_loss=0.2556, over 29525.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1338, cr_loss=0.3784, attn_decoder_loss=0.2499, over 5796779.39 frames. ], batch size: 87, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:52:02,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=390900.0, ans=0.0 +2024-09-18 06:52:42,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.08 vs. limit=22.5 +2024-09-18 06:52:45,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=391020.0, ans=0.0 +2024-09-18 06:52:47,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.84 vs. limit=15.0 +2024-09-18 06:53:15,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.14 vs. limit=6.0 +2024-09-18 06:53:17,091 INFO [train.py:1198] (1/2) Epoch 22, batch 2750, loss[loss=0.2361, ctc_loss=0.1312, cr_loss=0.3716, attn_decoder_loss=0.2395, over 29500.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1327, cr_loss=0.3757, attn_decoder_loss=0.2485, over 5795780.10 frames. ], batch size: 75, lr: 5.05e-03, grad_scale: 8.0 +2024-09-18 06:53:17,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.30 vs. limit=22.5 +2024-09-18 06:53:28,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=391100.0, ans=0.0 +2024-09-18 06:53:46,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=391140.0, ans=0.125 +2024-09-18 06:53:49,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=391180.0, ans=0.125 +2024-09-18 06:53:54,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=391180.0, ans=0.2 +2024-09-18 06:53:57,457 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:54:04,609 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.30 vs. limit=22.5 +2024-09-18 06:54:06,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=391220.0, ans=0.1 +2024-09-18 06:54:15,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=391220.0, ans=0.125 +2024-09-18 06:54:23,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=391260.0, ans=0.0 +2024-09-18 06:54:26,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=391260.0, ans=0.1 +2024-09-18 06:54:34,213 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.791e+01 9.407e+01 1.009e+02 2.763e+02, threshold=1.881e+02, percent-clipped=2.0 +2024-09-18 06:54:35,691 INFO [train.py:1198] (1/2) Epoch 22, batch 2800, loss[loss=0.2609, ctc_loss=0.1637, cr_loss=0.3954, attn_decoder_loss=0.2629, over 20425.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1332, cr_loss=0.3766, attn_decoder_loss=0.2487, over 5776564.56 frames. ], batch size: 209, lr: 5.04e-03, grad_scale: 16.0 +2024-09-18 06:54:45,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.04 vs. limit=10.0 +2024-09-18 06:54:46,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=391300.0, ans=0.1 +2024-09-18 06:54:47,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=391300.0, ans=0.1 +2024-09-18 06:54:56,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.25 vs. limit=15.0 +2024-09-18 06:54:57,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=391340.0, ans=0.07 +2024-09-18 06:54:58,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=391340.0, ans=0.125 +2024-09-18 06:55:03,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=391340.0, ans=0.1 +2024-09-18 06:55:08,549 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.12 vs. limit=6.0 +2024-09-18 06:55:36,891 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 06:55:38,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=391460.0, ans=0.125 +2024-09-18 06:55:50,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-18 06:55:51,518 INFO [train.py:1198] (1/2) Epoch 22, batch 2850, loss[loss=0.2358, ctc_loss=0.1352, cr_loss=0.3771, attn_decoder_loss=0.2386, over 29515.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1337, cr_loss=0.3771, attn_decoder_loss=0.249, over 5761324.26 frames. ], batch size: 77, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:55:59,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.93 vs. limit=22.5 +2024-09-18 06:56:09,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=391540.0, ans=0.0 +2024-09-18 06:56:16,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=391540.0, ans=0.125 +2024-09-18 06:56:23,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=391580.0, ans=0.125 +2024-09-18 06:56:24,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=391580.0, ans=0.125 +2024-09-18 06:56:33,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=391580.0, ans=0.035 +2024-09-18 06:56:37,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.54 vs. limit=22.5 +2024-09-18 06:56:55,222 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.96 vs. limit=22.5 +2024-09-18 06:57:03,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=391660.0, ans=0.07 +2024-09-18 06:57:06,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.54 vs. limit=15.0 +2024-09-18 06:57:09,048 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.934e+01 9.738e+01 1.096e+02 2.741e+02, threshold=1.948e+02, percent-clipped=1.0 +2024-09-18 06:57:09,074 INFO [train.py:1198] (1/2) Epoch 22, batch 2900, loss[loss=0.2389, ctc_loss=0.1237, cr_loss=0.3624, attn_decoder_loss=0.2437, over 29424.00 frames. ], tot_loss[loss=0.2464, ctc_loss=0.1344, cr_loss=0.3792, attn_decoder_loss=0.2504, over 5786963.23 frames. ], batch size: 79, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:57:19,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=391700.0, ans=0.1 +2024-09-18 06:57:52,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=391780.0, ans=0.5 +2024-09-18 06:57:58,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=391820.0, ans=0.025 +2024-09-18 06:58:17,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.88 vs. limit=15.0 +2024-09-18 06:58:19,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=391860.0, ans=0.025 +2024-09-18 06:58:27,098 INFO [train.py:1198] (1/2) Epoch 22, batch 2950, loss[loss=0.2372, ctc_loss=0.1179, cr_loss=0.3609, attn_decoder_loss=0.2424, over 29516.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1333, cr_loss=0.3766, attn_decoder_loss=0.2492, over 5780900.42 frames. ], batch size: 75, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:58:35,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=391900.0, ans=0.125 +2024-09-18 06:58:51,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=391940.0, ans=0.0 +2024-09-18 06:59:36,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=392060.0, ans=0.1 +2024-09-18 06:59:36,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=392060.0, ans=0.125 +2024-09-18 06:59:43,577 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.512e+01 8.926e+01 9.722e+01 3.359e+02, threshold=1.785e+02, percent-clipped=2.0 +2024-09-18 06:59:43,614 INFO [train.py:1198] (1/2) Epoch 22, batch 3000, loss[loss=0.2431, ctc_loss=0.1288, cr_loss=0.3765, attn_decoder_loss=0.2475, over 29767.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1332, cr_loss=0.376, attn_decoder_loss=0.2491, over 5780984.07 frames. ], batch size: 81, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 06:59:43,614 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 07:00:03,085 INFO [train.py:1230] (1/2) Epoch 22, validation: loss=0.2118, ctc_loss=0.03901, cr_loss=5.241e-15, attn_decoder_loss=0.231, over 944034.00 frames. +2024-09-18 07:00:03,086 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 07:00:15,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=392100.0, ans=0.125 +2024-09-18 07:00:27,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=392140.0, ans=0.125 +2024-09-18 07:00:30,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=392140.0, ans=10.0 +2024-09-18 07:00:58,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=392220.0, ans=0.2 +2024-09-18 07:01:15,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=392260.0, ans=0.2 +2024-09-18 07:01:21,518 INFO [train.py:1198] (1/2) Epoch 22, batch 3050, loss[loss=0.2405, ctc_loss=0.1349, cr_loss=0.3677, attn_decoder_loss=0.2441, over 29520.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1343, cr_loss=0.3776, attn_decoder_loss=0.2503, over 5776076.39 frames. ], batch size: 76, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:01:40,321 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:01:43,736 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.03 vs. limit=22.5 +2024-09-18 07:01:44,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=392340.0, ans=0.125 +2024-09-18 07:01:45,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.68 vs. limit=15.0 +2024-09-18 07:01:46,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=392340.0, ans=0.0 +2024-09-18 07:02:11,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=392420.0, ans=0.2 +2024-09-18 07:02:11,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=392420.0, ans=0.025 +2024-09-18 07:02:20,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=392460.0, ans=0.0 +2024-09-18 07:02:22,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=392460.0, ans=0.125 +2024-09-18 07:02:22,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=392460.0, ans=0.125 +2024-09-18 07:02:27,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.71 vs. limit=6.0 +2024-09-18 07:02:34,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=392460.0, ans=0.125 +2024-09-18 07:02:37,019 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.586e+01 8.738e+01 9.227e+01 9.918e+01 5.288e+02, threshold=1.845e+02, percent-clipped=2.0 +2024-09-18 07:02:37,041 INFO [train.py:1198] (1/2) Epoch 22, batch 3100, loss[loss=0.261, ctc_loss=0.152, cr_loss=0.3844, attn_decoder_loss=0.2645, over 29256.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.1338, cr_loss=0.377, attn_decoder_loss=0.2496, over 5776269.56 frames. ], batch size: 100, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:02:38,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=392500.0, ans=0.1 +2024-09-18 07:02:41,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=392500.0, ans=0.125 +2024-09-18 07:02:48,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=392500.0, ans=0.2 +2024-09-18 07:02:52,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=392540.0, ans=0.2 +2024-09-18 07:03:09,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=392580.0, ans=0.0 +2024-09-18 07:03:16,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=392580.0, ans=0.0 +2024-09-18 07:03:49,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=392660.0, ans=0.125 +2024-09-18 07:03:51,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=392660.0, ans=0.125 +2024-09-18 07:03:55,332 INFO [train.py:1198] (1/2) Epoch 22, batch 3150, loss[loss=0.2538, ctc_loss=0.1399, cr_loss=0.3768, attn_decoder_loss=0.2581, over 28854.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1336, cr_loss=0.3767, attn_decoder_loss=0.2494, over 5783172.61 frames. ], batch size: 104, lr: 5.04e-03, grad_scale: 8.0 +2024-09-18 07:04:18,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.67 vs. limit=6.0 +2024-09-18 07:05:10,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=392860.0, ans=0.125 +2024-09-18 07:05:13,336 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.207e+01 8.635e+01 9.167e+01 9.821e+01 1.751e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-18 07:05:13,357 INFO [train.py:1198] (1/2) Epoch 22, batch 3200, loss[loss=0.256, ctc_loss=0.1488, cr_loss=0.4239, attn_decoder_loss=0.2585, over 29406.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1329, cr_loss=0.3758, attn_decoder_loss=0.2487, over 5793169.16 frames. ], batch size: 79, lr: 5.03e-03, grad_scale: 16.0 +2024-09-18 07:05:27,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=392940.0, ans=0.125 +2024-09-18 07:05:36,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=392940.0, ans=0.125 +2024-09-18 07:05:47,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=392980.0, ans=0.125 +2024-09-18 07:05:49,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.33 vs. limit=10.0 +2024-09-18 07:06:11,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.05 vs. limit=12.0 +2024-09-18 07:06:20,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=393060.0, ans=0.125 +2024-09-18 07:06:26,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=393060.0, ans=0.125 +2024-09-18 07:06:29,130 INFO [train.py:1198] (1/2) Epoch 22, batch 3250, loss[loss=0.248, ctc_loss=0.1331, cr_loss=0.3853, attn_decoder_loss=0.2522, over 29684.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1327, cr_loss=0.3753, attn_decoder_loss=0.2489, over 5799410.67 frames. ], batch size: 84, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:06:37,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=393100.0, ans=0.125 +2024-09-18 07:06:59,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=393180.0, ans=0.0 +2024-09-18 07:07:09,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-18 07:07:13,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=393220.0, ans=0.0 +2024-09-18 07:07:14,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=393220.0, ans=0.1 +2024-09-18 07:07:25,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=393220.0, ans=0.125 +2024-09-18 07:07:47,124 INFO [train.py:1198] (1/2) Epoch 22, batch 3300, loss[loss=0.2563, ctc_loss=0.1401, cr_loss=0.3784, attn_decoder_loss=0.2608, over 28574.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1317, cr_loss=0.373, attn_decoder_loss=0.2477, over 5796953.86 frames. ], batch size: 112, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:07:48,689 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.576e+01 9.104e+01 9.607e+01 2.025e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 07:08:05,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=393340.0, ans=0.125 +2024-09-18 07:08:11,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=393340.0, ans=0.125 +2024-09-18 07:08:29,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=393380.0, ans=0.07 +2024-09-18 07:08:40,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.85 vs. limit=12.0 +2024-09-18 07:08:47,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=393460.0, ans=0.0 +2024-09-18 07:09:04,458 INFO [train.py:1198] (1/2) Epoch 22, batch 3350, loss[loss=0.2539, ctc_loss=0.1459, cr_loss=0.3864, attn_decoder_loss=0.2573, over 28753.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1327, cr_loss=0.3748, attn_decoder_loss=0.2488, over 5774218.60 frames. ], batch size: 104, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:09:05,293 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.72 vs. limit=15.0 +2024-09-18 07:09:24,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=393540.0, ans=0.125 +2024-09-18 07:09:36,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=393580.0, ans=0.125 +2024-09-18 07:09:46,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=393580.0, ans=0.125 +2024-09-18 07:10:09,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=393660.0, ans=0.125 +2024-09-18 07:10:20,920 INFO [train.py:1198] (1/2) Epoch 22, batch 3400, loss[loss=0.2146, ctc_loss=0.1187, cr_loss=0.3437, attn_decoder_loss=0.2176, over 29314.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1333, cr_loss=0.3759, attn_decoder_loss=0.2488, over 5765965.26 frames. ], batch size: 67, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:10:22,302 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.673e+01 9.256e+01 9.754e+01 2.312e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 07:10:24,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=393700.0, ans=0.125 +2024-09-18 07:10:31,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=393700.0, ans=0.125 +2024-09-18 07:10:31,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=393700.0, ans=0.1 +2024-09-18 07:10:33,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=393700.0, ans=0.125 +2024-09-18 07:11:02,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.07 vs. limit=22.5 +2024-09-18 07:11:10,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.66 vs. limit=22.5 +2024-09-18 07:11:13,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=393820.0, ans=0.2 +2024-09-18 07:11:18,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.79 vs. limit=15.0 +2024-09-18 07:11:18,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.66 vs. limit=15.0 +2024-09-18 07:11:31,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=393860.0, ans=0.04949747468305833 +2024-09-18 07:11:38,233 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.84 vs. limit=15.0 +2024-09-18 07:11:38,636 INFO [train.py:1198] (1/2) Epoch 22, batch 3450, loss[loss=0.2538, ctc_loss=0.1348, cr_loss=0.3925, attn_decoder_loss=0.2583, over 28327.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1333, cr_loss=0.3764, attn_decoder_loss=0.2491, over 5773570.80 frames. ], batch size: 111, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:11:51,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=393900.0, ans=0.0 +2024-09-18 07:12:03,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=393940.0, ans=0.025 +2024-09-18 07:12:07,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=393980.0, ans=0.1 +2024-09-18 07:12:17,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=393980.0, ans=0.2 +2024-09-18 07:12:40,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=394060.0, ans=0.125 +2024-09-18 07:12:56,515 INFO [train.py:1198] (1/2) Epoch 22, batch 3500, loss[loss=0.2334, ctc_loss=0.1275, cr_loss=0.3564, attn_decoder_loss=0.2373, over 29777.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1333, cr_loss=0.3767, attn_decoder_loss=0.2488, over 5776370.20 frames. ], batch size: 72, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:12:56,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=394100.0, ans=0.025 +2024-09-18 07:12:58,048 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.509e+01 8.992e+01 9.710e+01 6.035e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-18 07:13:07,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_ff3.min_abs, batch_count=394100.0, ans=0.2 +2024-09-18 07:13:20,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=394140.0, ans=0.95 +2024-09-18 07:13:25,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=394180.0, ans=0.1 +2024-09-18 07:13:25,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.64 vs. limit=15.0 +2024-09-18 07:13:31,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=394180.0, ans=0.125 +2024-09-18 07:13:52,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=394220.0, ans=0.125 +2024-09-18 07:13:53,819 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:14:01,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=394260.0, ans=0.125 +2024-09-18 07:14:11,196 INFO [train.py:1198] (1/2) Epoch 22, batch 3550, loss[loss=0.2562, ctc_loss=0.1301, cr_loss=0.3685, attn_decoder_loss=0.262, over 29713.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1329, cr_loss=0.3756, attn_decoder_loss=0.2487, over 5783901.56 frames. ], batch size: 89, lr: 5.03e-03, grad_scale: 8.0 +2024-09-18 07:14:36,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=394340.0, ans=0.025 +2024-09-18 07:14:44,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=394380.0, ans=0.025 +2024-09-18 07:14:52,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn2.whiten.whitening_limit, batch_count=394380.0, ans=22.5 +2024-09-18 07:15:11,875 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.57 vs. limit=15.0 +2024-09-18 07:15:17,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=394460.0, ans=0.0 +2024-09-18 07:15:18,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=394460.0, ans=0.125 +2024-09-18 07:15:20,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.55 vs. limit=15.0 +2024-09-18 07:15:25,911 INFO [train.py:1198] (1/2) Epoch 22, batch 3600, loss[loss=0.2458, ctc_loss=0.1289, cr_loss=0.3711, attn_decoder_loss=0.2505, over 29514.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1329, cr_loss=0.3756, attn_decoder_loss=0.2488, over 5793487.71 frames. ], batch size: 77, lr: 5.02e-03, grad_scale: 16.0 +2024-09-18 07:15:27,409 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.441e+01 8.945e+01 9.412e+01 1.487e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 07:15:51,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=394540.0, ans=0.025 +2024-09-18 07:16:17,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.47 vs. limit=12.0 +2024-09-18 07:16:40,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.80 vs. limit=12.0 +2024-09-18 07:16:41,173 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:16:42,258 INFO [train.py:1198] (1/2) Epoch 22, batch 3650, loss[loss=0.2683, ctc_loss=0.1535, cr_loss=0.4216, attn_decoder_loss=0.2717, over 29508.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1323, cr_loss=0.3742, attn_decoder_loss=0.2482, over 5795058.31 frames. ], batch size: 90, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:16:49,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=394700.0, ans=0.1 +2024-09-18 07:16:52,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=394700.0, ans=0.125 +2024-09-18 07:16:55,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=394740.0, ans=0.1 +2024-09-18 07:17:19,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=394780.0, ans=0.0 +2024-09-18 07:17:40,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=394860.0, ans=0.2 +2024-09-18 07:17:40,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=394860.0, ans=0.0 +2024-09-18 07:17:41,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=394860.0, ans=0.0 +2024-09-18 07:17:56,473 INFO [train.py:1198] (1/2) Epoch 22, batch 3700, loss[loss=0.2538, ctc_loss=0.1463, cr_loss=0.3996, attn_decoder_loss=0.2569, over 29721.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1317, cr_loss=0.3737, attn_decoder_loss=0.248, over 5805251.08 frames. ], batch size: 84, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:17:59,513 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.466e+01 8.986e+01 9.824e+01 1.367e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 07:18:41,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=395020.0, ans=0.125 +2024-09-18 07:18:42,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=395020.0, ans=0.0 +2024-09-18 07:18:53,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=395020.0, ans=0.125 +2024-09-18 07:18:59,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=395060.0, ans=0.125 +2024-09-18 07:19:12,624 INFO [train.py:1198] (1/2) Epoch 22, batch 3750, loss[loss=0.2157, ctc_loss=0.1145, cr_loss=0.3394, attn_decoder_loss=0.2194, over 29333.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.132, cr_loss=0.3742, attn_decoder_loss=0.2479, over 5808703.48 frames. ], batch size: 67, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:19:12,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=395100.0, ans=0.125 +2024-09-18 07:19:15,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=395100.0, ans=0.125 +2024-09-18 07:19:23,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=395100.0, ans=0.0 +2024-09-18 07:20:04,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=395220.0, ans=0.0 +2024-09-18 07:20:16,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=395260.0, ans=0.0 +2024-09-18 07:20:23,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=395260.0, ans=0.025 +2024-09-18 07:20:27,725 INFO [train.py:1198] (1/2) Epoch 22, batch 3800, loss[loss=0.2566, ctc_loss=0.1457, cr_loss=0.3957, attn_decoder_loss=0.2602, over 29635.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.132, cr_loss=0.3742, attn_decoder_loss=0.2477, over 5799227.12 frames. ], batch size: 86, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:20:30,688 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.441e+01 9.008e+01 9.541e+01 1.561e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-18 07:20:36,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=395300.0, ans=0.125 +2024-09-18 07:20:47,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=395340.0, ans=0.0 +2024-09-18 07:20:49,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.31 vs. limit=15.0 +2024-09-18 07:21:36,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=395460.0, ans=0.025 +2024-09-18 07:21:41,876 INFO [train.py:1198] (1/2) Epoch 22, batch 3850, loss[loss=0.2606, ctc_loss=0.147, cr_loss=0.4147, attn_decoder_loss=0.264, over 29205.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.132, cr_loss=0.3743, attn_decoder_loss=0.2478, over 5813790.43 frames. ], batch size: 100, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:21:45,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=395500.0, ans=0.0 +2024-09-18 07:21:49,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=395500.0, ans=0.0 +2024-09-18 07:21:52,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=395500.0, ans=0.0 +2024-09-18 07:21:52,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=395500.0, ans=0.125 +2024-09-18 07:21:55,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=395540.0, ans=0.1 +2024-09-18 07:22:33,414 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-18 07:22:37,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=395620.0, ans=0.125 +2024-09-18 07:22:38,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.85 vs. limit=15.0 +2024-09-18 07:22:41,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=395660.0, ans=0.1 +2024-09-18 07:22:50,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.29 vs. limit=15.0 +2024-09-18 07:22:57,687 INFO [train.py:1198] (1/2) Epoch 22, batch 3900, loss[loss=0.2631, ctc_loss=0.1486, cr_loss=0.4157, attn_decoder_loss=0.2665, over 29616.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1322, cr_loss=0.3753, attn_decoder_loss=0.2484, over 5818118.53 frames. ], batch size: 86, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:23:00,757 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.669e+01 9.089e+01 9.620e+01 1.531e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 07:23:06,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=395700.0, ans=0.04949747468305833 +2024-09-18 07:23:09,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=395700.0, ans=0.035 +2024-09-18 07:23:17,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=395740.0, ans=0.125 +2024-09-18 07:23:20,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.40 vs. limit=15.0 +2024-09-18 07:23:26,385 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.86 vs. limit=10.0 +2024-09-18 07:23:37,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.12 vs. limit=12.0 +2024-09-18 07:23:52,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=395820.0, ans=0.125 +2024-09-18 07:24:01,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=395860.0, ans=0.125 +2024-09-18 07:24:11,538 INFO [train.py:1198] (1/2) Epoch 22, batch 3950, loss[loss=0.255, ctc_loss=0.1349, cr_loss=0.3993, attn_decoder_loss=0.2595, over 29476.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1316, cr_loss=0.3743, attn_decoder_loss=0.248, over 5837018.28 frames. ], batch size: 97, lr: 5.02e-03, grad_scale: 8.0 +2024-09-18 07:24:14,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.49 vs. limit=15.0 +2024-09-18 07:24:17,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=395900.0, ans=0.125 +2024-09-18 07:24:17,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=395900.0, ans=0.2 +2024-09-18 07:25:01,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=396020.0, ans=0.0 +2024-09-18 07:25:05,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=396020.0, ans=0.125 +2024-09-18 07:25:07,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.05 vs. limit=15.0 +2024-09-18 07:25:12,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=396060.0, ans=0.125 +2024-09-18 07:25:14,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_na.min_abs, batch_count=396060.0, ans=0.02 +2024-09-18 07:25:21,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=396060.0, ans=0.125 +2024-09-18 07:25:27,270 INFO [train.py:1198] (1/2) Epoch 22, batch 4000, loss[loss=0.2325, ctc_loss=0.1282, cr_loss=0.3698, attn_decoder_loss=0.2359, over 29538.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1321, cr_loss=0.3745, attn_decoder_loss=0.2483, over 5813094.81 frames. ], batch size: 74, lr: 5.01e-03, grad_scale: 16.0 +2024-09-18 07:25:29,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.57 vs. limit=15.0 +2024-09-18 07:25:30,134 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.472e+01 8.530e+01 8.952e+01 9.583e+01 2.635e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-18 07:25:34,901 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:25:35,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=396100.0, ans=0.125 +2024-09-18 07:25:42,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=396140.0, ans=0.125 +2024-09-18 07:25:55,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=396180.0, ans=0.125 +2024-09-18 07:25:57,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=396180.0, ans=0.2 +2024-09-18 07:26:37,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=396260.0, ans=0.0 +2024-09-18 07:26:37,809 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.68 vs. limit=15.0 +2024-09-18 07:26:41,543 INFO [train.py:1198] (1/2) Epoch 22, batch 4050, loss[loss=0.2713, ctc_loss=0.1732, cr_loss=0.4303, attn_decoder_loss=0.2726, over 19996.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1325, cr_loss=0.3748, attn_decoder_loss=0.2483, over 5796227.53 frames. ], batch size: 209, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:26:46,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=396300.0, ans=0.0 +2024-09-18 07:27:31,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=396420.0, ans=0.0 +2024-09-18 07:27:32,021 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=10.32 vs. limit=22.5 +2024-09-18 07:27:38,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=396420.0, ans=0.125 +2024-09-18 07:27:55,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=396500.0, ans=0.125 +2024-09-18 07:27:55,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=396500.0, ans=0.125 +2024-09-18 07:27:56,313 INFO [train.py:1198] (1/2) Epoch 22, batch 4100, loss[loss=0.2636, ctc_loss=0.1499, cr_loss=0.4122, attn_decoder_loss=0.2671, over 29499.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.133, cr_loss=0.3753, attn_decoder_loss=0.2483, over 5792124.05 frames. ], batch size: 90, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:27:58,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=396500.0, ans=0.0 +2024-09-18 07:28:00,300 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.04 vs. limit=22.5 +2024-09-18 07:28:00,767 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.483e+01 8.697e+01 9.214e+01 1.008e+02 3.653e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-18 07:28:09,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.07 vs. limit=15.0 +2024-09-18 07:28:14,243 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:28:22,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=396540.0, ans=0.125 +2024-09-18 07:28:31,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=396580.0, ans=0.0 +2024-09-18 07:28:36,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=396580.0, ans=0.125 +2024-09-18 07:28:40,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=396620.0, ans=0.0 +2024-09-18 07:28:51,675 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.96 vs. limit=15.0 +2024-09-18 07:28:55,523 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:29:09,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=396700.0, ans=0.125 +2024-09-18 07:29:10,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.87 vs. limit=15.0 +2024-09-18 07:29:11,101 INFO [train.py:1198] (1/2) Epoch 22, batch 4150, loss[loss=0.2414, ctc_loss=0.1327, cr_loss=0.3786, attn_decoder_loss=0.2451, over 29515.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1323, cr_loss=0.3742, attn_decoder_loss=0.248, over 5798320.55 frames. ], batch size: 77, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:29:16,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.05 vs. limit=15.0 +2024-09-18 07:29:17,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=396700.0, ans=0.0 +2024-09-18 07:29:53,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=396780.0, ans=0.0 +2024-09-18 07:29:59,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=396820.0, ans=0.5 +2024-09-18 07:30:16,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=396860.0, ans=0.125 +2024-09-18 07:30:19,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=396860.0, ans=0.0 +2024-09-18 07:30:22,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=396860.0, ans=0.1 +2024-09-18 07:30:25,224 INFO [train.py:1198] (1/2) Epoch 22, batch 4200, loss[loss=0.2516, ctc_loss=0.1364, cr_loss=0.3694, attn_decoder_loss=0.2562, over 29523.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1328, cr_loss=0.375, attn_decoder_loss=0.2486, over 5800382.54 frames. ], batch size: 90, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:30:29,585 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.447e+01 9.085e+01 9.593e+01 1.747e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-18 07:30:49,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=396940.0, ans=0.125 +2024-09-18 07:30:50,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=396940.0, ans=0.0 +2024-09-18 07:31:07,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=396980.0, ans=0.0 +2024-09-18 07:31:22,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=397020.0, ans=0.2 +2024-09-18 07:31:37,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=397060.0, ans=0.125 +2024-09-18 07:31:38,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=397100.0, ans=0.0 +2024-09-18 07:31:39,885 INFO [train.py:1198] (1/2) Epoch 22, batch 4250, loss[loss=0.2285, ctc_loss=0.121, cr_loss=0.3704, attn_decoder_loss=0.2322, over 29496.00 frames. ], tot_loss[loss=0.2447, ctc_loss=0.1328, cr_loss=0.3753, attn_decoder_loss=0.2488, over 5806265.40 frames. ], batch size: 74, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:31:47,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=397100.0, ans=0.125 +2024-09-18 07:32:03,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=397140.0, ans=0.125 +2024-09-18 07:32:07,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=397180.0, ans=0.125 +2024-09-18 07:32:16,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=397180.0, ans=0.025 +2024-09-18 07:32:35,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=397220.0, ans=0.0 +2024-09-18 07:32:53,985 INFO [train.py:1198] (1/2) Epoch 22, batch 4300, loss[loss=0.266, ctc_loss=0.1445, cr_loss=0.3959, attn_decoder_loss=0.2707, over 29557.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.133, cr_loss=0.3755, attn_decoder_loss=0.2492, over 5795094.61 frames. ], batch size: 87, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:32:58,453 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.737e+01 9.479e+01 1.036e+02 1.602e+02, threshold=1.896e+02, percent-clipped=0.0 +2024-09-18 07:33:12,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=397340.0, ans=0.0 +2024-09-18 07:33:17,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=397340.0, ans=0.125 +2024-09-18 07:33:19,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=397340.0, ans=0.0 +2024-09-18 07:33:24,636 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.78 vs. limit=15.0 +2024-09-18 07:33:30,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=397380.0, ans=0.0 +2024-09-18 07:34:00,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.51 vs. limit=6.0 +2024-09-18 07:34:08,148 INFO [train.py:1198] (1/2) Epoch 22, batch 4350, loss[loss=0.2601, ctc_loss=0.1395, cr_loss=0.3849, attn_decoder_loss=0.2649, over 29443.00 frames. ], tot_loss[loss=0.2482, ctc_loss=0.1355, cr_loss=0.3804, attn_decoder_loss=0.2522, over 5796898.29 frames. ], batch size: 97, lr: 5.01e-03, grad_scale: 8.0 +2024-09-18 07:34:18,059 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.86 vs. limit=15.0 +2024-09-18 07:34:21,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=397540.0, ans=0.1 +2024-09-18 07:34:31,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten.whitening_limit, batch_count=397540.0, ans=15.0 +2024-09-18 07:34:34,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=397540.0, ans=0.125 +2024-09-18 07:34:38,266 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.23 vs. limit=12.0 +2024-09-18 07:34:39,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=397580.0, ans=0.125 +2024-09-18 07:34:51,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=397620.0, ans=0.07 +2024-09-18 07:35:08,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=397660.0, ans=0.1 +2024-09-18 07:35:12,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=397660.0, ans=0.0 +2024-09-18 07:35:21,409 INFO [train.py:1198] (1/2) Epoch 22, batch 4400, loss[loss=0.2611, ctc_loss=0.1502, cr_loss=0.3878, attn_decoder_loss=0.2648, over 27157.00 frames. ], tot_loss[loss=0.2503, ctc_loss=0.1371, cr_loss=0.3831, attn_decoder_loss=0.2544, over 5764635.91 frames. ], batch size: 124, lr: 5.00e-03, grad_scale: 16.0 +2024-09-18 07:35:25,704 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.295e+01 9.019e+01 9.432e+01 1.021e+02 4.096e+02, threshold=1.886e+02, percent-clipped=2.0 +2024-09-18 07:35:35,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=397740.0, ans=0.1 +2024-09-18 07:35:41,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=397740.0, ans=0.0 +2024-09-18 07:35:48,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=397740.0, ans=0.035 +2024-09-18 07:35:58,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=397780.0, ans=0.125 +2024-09-18 07:36:36,332 INFO [train.py:1198] (1/2) Epoch 22, batch 4450, loss[loss=0.2842, ctc_loss=0.1903, cr_loss=0.4164, attn_decoder_loss=0.2854, over 20292.00 frames. ], tot_loss[loss=0.2532, ctc_loss=0.1414, cr_loss=0.3878, attn_decoder_loss=0.257, over 5572305.52 frames. ], batch size: 209, lr: 5.00e-03, grad_scale: 8.0 +2024-09-18 07:36:47,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=397900.0, ans=0.125 +2024-09-18 07:36:51,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=397940.0, ans=0.0 +2024-09-18 07:36:54,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=397940.0, ans=0.0 +2024-09-18 07:37:03,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=397940.0, ans=0.125 +2024-09-18 07:37:09,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=397980.0, ans=0.125 +2024-09-18 07:37:14,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=397980.0, ans=0.025 +2024-09-18 07:37:22,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=9.62 vs. limit=10.0 +2024-09-18 07:37:37,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=398060.0, ans=0.125 +2024-09-18 07:37:42,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.03 vs. limit=15.0 +2024-09-18 07:37:42,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=10.98 vs. limit=12.0 +2024-09-18 07:37:50,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=398100.0, ans=0.1 +2024-09-18 07:37:51,773 INFO [train.py:1198] (1/2) Epoch 22, batch 4500, loss[loss=0.2672, ctc_loss=0.1715, cr_loss=0.4114, attn_decoder_loss=0.2686, over 20307.00 frames. ], tot_loss[loss=0.2555, ctc_loss=0.1457, cr_loss=0.3899, attn_decoder_loss=0.259, over 5230004.09 frames. ], batch size: 210, lr: 5.00e-03, grad_scale: 8.0 +2024-09-18 07:37:57,651 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.580e+01 1.014e+02 1.103e+02 1.223e+02 2.065e+02, threshold=2.205e+02, percent-clipped=1.0 +2024-09-18 07:38:03,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=398100.0, ans=0.2 +2024-09-18 07:38:23,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=398180.0, ans=0.125 +2024-09-18 07:38:23,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=398180.0, ans=0.125 +2024-09-18 07:38:24,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=398180.0, ans=0.025 +2024-09-18 07:39:14,511 INFO [train.py:1198] (1/2) Epoch 23, batch 0, loss[loss=0.2205, ctc_loss=0.1125, cr_loss=0.3312, attn_decoder_loss=0.2251, over 29612.00 frames. ], tot_loss[loss=0.2205, ctc_loss=0.1125, cr_loss=0.3312, attn_decoder_loss=0.2251, over 29612.00 frames. ], batch size: 73, lr: 4.89e-03, grad_scale: 16.0 +2024-09-18 07:39:14,511 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 07:39:33,043 INFO [train.py:1230] (1/2) Epoch 23, validation: loss=0.212, ctc_loss=0.03823, cr_loss=5.578e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 07:39:33,043 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 07:39:58,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=398240.0, ans=0.2 +2024-09-18 07:40:21,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=398320.0, ans=0.125 +2024-09-18 07:40:27,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=398320.0, ans=0.0 +2024-09-18 07:40:37,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.02 vs. limit=15.0 +2024-09-18 07:40:41,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=398360.0, ans=0.125 +2024-09-18 07:40:49,045 INFO [train.py:1198] (1/2) Epoch 23, batch 50, loss[loss=0.2245, ctc_loss=0.1199, cr_loss=0.354, attn_decoder_loss=0.2282, over 29413.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1335, cr_loss=0.378, attn_decoder_loss=0.2483, over 1268255.72 frames. ], batch size: 70, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:40:58,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.71 vs. limit=15.0 +2024-09-18 07:41:02,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=398400.0, ans=0.05 +2024-09-18 07:41:03,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=398400.0, ans=0.125 +2024-09-18 07:41:14,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=398440.0, ans=0.025 +2024-09-18 07:41:38,768 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.725e+01 8.809e+01 9.782e+01 1.101e+02 2.337e+02, threshold=1.956e+02, percent-clipped=1.0 +2024-09-18 07:41:54,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=398560.0, ans=0.125 +2024-09-18 07:41:56,590 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.29 vs. limit=15.0 +2024-09-18 07:41:58,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=398560.0, ans=0.125 +2024-09-18 07:42:08,981 INFO [train.py:1198] (1/2) Epoch 23, batch 100, loss[loss=0.2351, ctc_loss=0.1282, cr_loss=0.3609, attn_decoder_loss=0.239, over 29549.00 frames. ], tot_loss[loss=0.2476, ctc_loss=0.136, cr_loss=0.3816, attn_decoder_loss=0.2515, over 2252520.95 frames. ], batch size: 76, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:42:36,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=398640.0, ans=0.1 +2024-09-18 07:42:36,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=398640.0, ans=0.0 +2024-09-18 07:43:10,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=398760.0, ans=0.125 +2024-09-18 07:43:22,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=398800.0, ans=0.0 +2024-09-18 07:43:23,639 INFO [train.py:1198] (1/2) Epoch 23, batch 150, loss[loss=0.2264, ctc_loss=0.1197, cr_loss=0.3491, attn_decoder_loss=0.2305, over 29416.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1334, cr_loss=0.3763, attn_decoder_loss=0.2492, over 3046649.39 frames. ], batch size: 70, lr: 4.89e-03, grad_scale: 8.0 +2024-09-18 07:43:23,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=398800.0, ans=0.2 +2024-09-18 07:43:29,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.95 vs. limit=15.0 +2024-09-18 07:43:31,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=398800.0, ans=0.125 +2024-09-18 07:43:45,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=398840.0, ans=0.125 +2024-09-18 07:43:57,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=398880.0, ans=0.125 +2024-09-18 07:44:08,888 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.565e+01 8.443e+01 9.031e+01 9.523e+01 1.308e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 07:44:10,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=398920.0, ans=0.125 +2024-09-18 07:44:12,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=398920.0, ans=0.2 +2024-09-18 07:44:13,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=398920.0, ans=0.09899494936611666 +2024-09-18 07:44:16,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=398920.0, ans=0.2 +2024-09-18 07:44:19,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=398920.0, ans=0.125 +2024-09-18 07:44:33,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=398960.0, ans=0.07 +2024-09-18 07:44:38,852 INFO [train.py:1198] (1/2) Epoch 23, batch 200, loss[loss=0.2535, ctc_loss=0.133, cr_loss=0.3894, attn_decoder_loss=0.2582, over 27457.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1323, cr_loss=0.3753, attn_decoder_loss=0.2481, over 3658611.50 frames. ], batch size: 125, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:44:39,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=399000.0, ans=0.2 +2024-09-18 07:45:04,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=399040.0, ans=10.0 +2024-09-18 07:45:05,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=399040.0, ans=0.0 +2024-09-18 07:45:26,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=399080.0, ans=0.04949747468305833 +2024-09-18 07:45:27,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=399120.0, ans=0.125 +2024-09-18 07:45:48,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=12.0 +2024-09-18 07:45:59,855 INFO [train.py:1198] (1/2) Epoch 23, batch 250, loss[loss=0.2622, ctc_loss=0.1468, cr_loss=0.3911, attn_decoder_loss=0.2663, over 29262.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1324, cr_loss=0.3752, attn_decoder_loss=0.2483, over 4141753.81 frames. ], batch size: 100, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:46:00,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=399200.0, ans=0.2 +2024-09-18 07:46:03,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=399200.0, ans=0.0 +2024-09-18 07:46:06,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=399200.0, ans=0.07 +2024-09-18 07:46:18,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=399240.0, ans=0.5 +2024-09-18 07:46:22,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=399240.0, ans=0.1 +2024-09-18 07:46:33,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=399280.0, ans=0.0 +2024-09-18 07:46:35,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=399280.0, ans=0.0 +2024-09-18 07:46:45,298 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.537e+01 9.009e+01 9.547e+01 2.225e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 07:46:59,956 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.30 vs. limit=22.5 +2024-09-18 07:47:03,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=399360.0, ans=0.1 +2024-09-18 07:47:11,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.53 vs. limit=10.0 +2024-09-18 07:47:15,479 INFO [train.py:1198] (1/2) Epoch 23, batch 300, loss[loss=0.2721, ctc_loss=0.15, cr_loss=0.4222, attn_decoder_loss=0.2763, over 29515.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1324, cr_loss=0.3758, attn_decoder_loss=0.2481, over 4510162.84 frames. ], batch size: 92, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:47:20,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.80 vs. limit=15.0 +2024-09-18 07:47:44,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=399480.0, ans=0.2 +2024-09-18 07:48:31,065 INFO [train.py:1198] (1/2) Epoch 23, batch 350, loss[loss=0.222, ctc_loss=0.116, cr_loss=0.3431, attn_decoder_loss=0.2262, over 29324.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1321, cr_loss=0.3753, attn_decoder_loss=0.2481, over 4795619.12 frames. ], batch size: 71, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:48:54,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=399640.0, ans=0.1 +2024-09-18 07:49:09,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=399680.0, ans=0.125 +2024-09-18 07:49:13,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=399680.0, ans=0.0 +2024-09-18 07:49:13,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=399680.0, ans=0.0 +2024-09-18 07:49:16,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=399680.0, ans=0.125 +2024-09-18 07:49:16,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=399680.0, ans=0.0 +2024-09-18 07:49:20,804 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.416e+01 8.727e+01 9.232e+01 2.116e+02, threshold=1.745e+02, percent-clipped=2.0 +2024-09-18 07:49:25,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=399720.0, ans=0.125 +2024-09-18 07:49:49,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=399800.0, ans=0.0 +2024-09-18 07:49:50,850 INFO [train.py:1198] (1/2) Epoch 23, batch 400, loss[loss=0.2485, ctc_loss=0.1361, cr_loss=0.4079, attn_decoder_loss=0.2519, over 29719.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.131, cr_loss=0.3732, attn_decoder_loss=0.2476, over 5025175.46 frames. ], batch size: 82, lr: 4.88e-03, grad_scale: 16.0 +2024-09-18 07:50:00,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=399800.0, ans=0.125 +2024-09-18 07:50:29,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.19 vs. limit=22.5 +2024-09-18 07:50:36,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=399920.0, ans=0.125 +2024-09-18 07:50:46,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.53 vs. limit=15.0 +2024-09-18 07:51:14,610 INFO [train.py:1198] (1/2) Epoch 23, batch 450, loss[loss=0.2524, ctc_loss=0.1414, cr_loss=0.39, attn_decoder_loss=0.256, over 29699.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1307, cr_loss=0.3726, attn_decoder_loss=0.2477, over 5187582.08 frames. ], batch size: 83, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:51:14,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400000.0, ans=0.1 +2024-09-18 07:51:23,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400000.0, ans=0.1 +2024-09-18 07:51:51,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400080.0, ans=0.1 +2024-09-18 07:51:51,305 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 07:52:01,477 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.450e+01 8.997e+01 9.501e+01 2.678e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-18 07:52:18,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=400160.0, ans=0.09899494936611666 +2024-09-18 07:52:30,188 INFO [train.py:1198] (1/2) Epoch 23, batch 500, loss[loss=0.257, ctc_loss=0.1422, cr_loss=0.3934, attn_decoder_loss=0.261, over 29441.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1302, cr_loss=0.3725, attn_decoder_loss=0.247, over 5331203.76 frames. ], batch size: 94, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:52:59,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.22 vs. limit=15.0 +2024-09-18 07:53:00,468 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.58 vs. limit=15.0 +2024-09-18 07:53:04,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=400280.0, ans=0.0 +2024-09-18 07:53:08,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=400280.0, ans=0.125 +2024-09-18 07:53:11,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=400280.0, ans=0.0 +2024-09-18 07:53:21,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=400320.0, ans=0.125 +2024-09-18 07:53:21,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=400320.0, ans=0.125 +2024-09-18 07:53:30,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.75 vs. limit=15.0 +2024-09-18 07:53:49,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.80 vs. limit=15.0 +2024-09-18 07:53:49,674 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.74 vs. limit=15.0 +2024-09-18 07:53:50,364 INFO [train.py:1198] (1/2) Epoch 23, batch 550, loss[loss=0.2522, ctc_loss=0.1292, cr_loss=0.3756, attn_decoder_loss=0.2575, over 28886.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1306, cr_loss=0.3723, attn_decoder_loss=0.2473, over 5425302.59 frames. ], batch size: 104, lr: 4.88e-03, grad_scale: 8.0 +2024-09-18 07:53:55,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400400.0, ans=0.1 +2024-09-18 07:53:59,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=400400.0, ans=0.1 +2024-09-18 07:54:01,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=400400.0, ans=0.125 +2024-09-18 07:54:10,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=400440.0, ans=0.125 +2024-09-18 07:54:23,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=400480.0, ans=0.125 +2024-09-18 07:54:35,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=400520.0, ans=0.125 +2024-09-18 07:54:37,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.525e+01 9.043e+01 9.907e+01 2.945e+02, threshold=1.809e+02, percent-clipped=3.0 +2024-09-18 07:54:53,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=400560.0, ans=10.0 +2024-09-18 07:55:01,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.46 vs. limit=10.0 +2024-09-18 07:55:05,790 INFO [train.py:1198] (1/2) Epoch 23, batch 600, loss[loss=0.2658, ctc_loss=0.1496, cr_loss=0.4015, attn_decoder_loss=0.2698, over 29307.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1307, cr_loss=0.3733, attn_decoder_loss=0.2475, over 5511223.10 frames. ], batch size: 100, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:55:24,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=400640.0, ans=0.0 +2024-09-18 07:55:25,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400640.0, ans=0.1 +2024-09-18 07:55:27,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten.whitening_limit, batch_count=400640.0, ans=22.5 +2024-09-18 07:55:28,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=400640.0, ans=0.1 +2024-09-18 07:55:30,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=400640.0, ans=0.125 +2024-09-18 07:55:33,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=400640.0, ans=0.125 +2024-09-18 07:56:14,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.14 vs. limit=10.0 +2024-09-18 07:56:21,495 INFO [train.py:1198] (1/2) Epoch 23, batch 650, loss[loss=0.2467, ctc_loss=0.1321, cr_loss=0.3969, attn_decoder_loss=0.2506, over 29763.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1304, cr_loss=0.3726, attn_decoder_loss=0.247, over 5588437.08 frames. ], batch size: 81, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:56:26,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=400800.0, ans=0.125 +2024-09-18 07:56:53,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=400880.0, ans=0.07 +2024-09-18 07:57:12,931 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.280e+01 8.571e+01 9.065e+01 9.710e+01 2.691e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-18 07:57:13,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=400920.0, ans=0.125 +2024-09-18 07:57:25,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=400960.0, ans=0.125 +2024-09-18 07:57:25,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.79 vs. limit=22.5 +2024-09-18 07:57:28,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=400960.0, ans=0.125 +2024-09-18 07:57:35,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=400960.0, ans=0.1 +2024-09-18 07:57:41,626 INFO [train.py:1198] (1/2) Epoch 23, batch 700, loss[loss=0.245, ctc_loss=0.1366, cr_loss=0.3808, attn_decoder_loss=0.2485, over 29522.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1308, cr_loss=0.3731, attn_decoder_loss=0.2478, over 5638571.22 frames. ], batch size: 76, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:57:49,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=401000.0, ans=0.025 +2024-09-18 07:57:57,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.83 vs. limit=10.0 +2024-09-18 07:58:01,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=401040.0, ans=0.125 +2024-09-18 07:58:21,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=401080.0, ans=0.125 +2024-09-18 07:58:27,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=401120.0, ans=0.2 +2024-09-18 07:58:35,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=401120.0, ans=0.025 +2024-09-18 07:58:36,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=401120.0, ans=0.125 +2024-09-18 07:58:41,588 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=12.0 +2024-09-18 07:58:57,482 INFO [train.py:1198] (1/2) Epoch 23, batch 750, loss[loss=0.2478, ctc_loss=0.1275, cr_loss=0.357, attn_decoder_loss=0.2532, over 29709.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.131, cr_loss=0.3734, attn_decoder_loss=0.2477, over 5677598.51 frames. ], batch size: 82, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 07:58:57,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=401200.0, ans=0.04949747468305833 +2024-09-18 07:58:59,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=401200.0, ans=0.125 +2024-09-18 07:59:15,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=401240.0, ans=0.025 +2024-09-18 07:59:15,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=401240.0, ans=0.125 +2024-09-18 07:59:29,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=401280.0, ans=0.125 +2024-09-18 07:59:35,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=401280.0, ans=0.1 +2024-09-18 07:59:44,086 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.454e+01 8.911e+01 9.640e+01 3.418e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-18 07:59:51,366 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.74 vs. limit=15.0 +2024-09-18 08:00:12,907 INFO [train.py:1198] (1/2) Epoch 23, batch 800, loss[loss=0.2213, ctc_loss=0.1106, cr_loss=0.3267, attn_decoder_loss=0.2264, over 29606.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1307, cr_loss=0.3732, attn_decoder_loss=0.2475, over 5707130.50 frames. ], batch size: 73, lr: 4.87e-03, grad_scale: 16.0 +2024-09-18 08:00:20,728 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:00:35,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=401440.0, ans=0.05 +2024-09-18 08:00:50,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=401480.0, ans=0.125 +2024-09-18 08:01:02,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=401520.0, ans=0.09899494936611666 +2024-09-18 08:01:07,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-18 08:01:21,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=401560.0, ans=0.0 +2024-09-18 08:01:26,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=401560.0, ans=0.2 +2024-09-18 08:01:30,977 INFO [train.py:1198] (1/2) Epoch 23, batch 850, loss[loss=0.2586, ctc_loss=0.1442, cr_loss=0.3949, attn_decoder_loss=0.2625, over 29707.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1304, cr_loss=0.373, attn_decoder_loss=0.2471, over 5736013.59 frames. ], batch size: 89, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:01:31,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=401600.0, ans=0.0 +2024-09-18 08:01:32,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=401600.0, ans=0.125 +2024-09-18 08:01:47,509 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:02:18,870 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.183e+01 8.350e+01 8.947e+01 9.398e+01 1.136e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-18 08:02:22,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=401720.0, ans=0.125 +2024-09-18 08:02:26,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=401720.0, ans=0.125 +2024-09-18 08:02:30,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=401760.0, ans=0.0 +2024-09-18 08:02:31,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=401760.0, ans=0.0 +2024-09-18 08:02:39,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-18 08:02:46,367 INFO [train.py:1198] (1/2) Epoch 23, batch 900, loss[loss=0.2171, ctc_loss=0.1061, cr_loss=0.3153, attn_decoder_loss=0.2224, over 29598.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1306, cr_loss=0.3727, attn_decoder_loss=0.2472, over 5740795.32 frames. ], batch size: 73, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:02:48,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=401800.0, ans=0.125 +2024-09-18 08:03:09,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=401840.0, ans=0.0 +2024-09-18 08:03:11,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=401840.0, ans=0.2 +2024-09-18 08:03:17,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=401880.0, ans=0.125 +2024-09-18 08:03:21,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=401880.0, ans=0.0 +2024-09-18 08:03:24,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=401880.0, ans=0.125 +2024-09-18 08:03:28,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=401880.0, ans=0.0 +2024-09-18 08:03:33,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.94 vs. limit=15.0 +2024-09-18 08:03:37,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=401920.0, ans=0.125 +2024-09-18 08:03:37,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=401920.0, ans=0.125 +2024-09-18 08:03:52,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=401960.0, ans=0.2 +2024-09-18 08:04:01,328 INFO [train.py:1198] (1/2) Epoch 23, batch 950, loss[loss=0.2328, ctc_loss=0.1266, cr_loss=0.3504, attn_decoder_loss=0.2369, over 29525.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.131, cr_loss=0.3736, attn_decoder_loss=0.2473, over 5743768.49 frames. ], batch size: 74, lr: 4.87e-03, grad_scale: 8.0 +2024-09-18 08:04:10,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=402000.0, ans=0.2 +2024-09-18 08:04:15,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=9.82 vs. limit=15.0 +2024-09-18 08:04:21,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=402040.0, ans=0.0 +2024-09-18 08:04:28,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=402040.0, ans=0.125 +2024-09-18 08:04:28,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=402040.0, ans=0.125 +2024-09-18 08:04:41,050 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.08 vs. limit=10.0 +2024-09-18 08:04:41,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=402080.0, ans=0.0 +2024-09-18 08:04:49,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=402120.0, ans=0.125 +2024-09-18 08:04:50,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.78 vs. limit=15.0 +2024-09-18 08:04:54,185 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.348e+01 8.814e+01 9.447e+01 1.062e+02 2.466e+02, threshold=1.889e+02, percent-clipped=1.0 +2024-09-18 08:05:02,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=402120.0, ans=0.5 +2024-09-18 08:05:11,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.95 vs. limit=22.5 +2024-09-18 08:05:21,294 INFO [train.py:1198] (1/2) Epoch 23, batch 1000, loss[loss=0.2382, ctc_loss=0.1261, cr_loss=0.3789, attn_decoder_loss=0.2423, over 29522.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1315, cr_loss=0.3741, attn_decoder_loss=0.2478, over 5737752.55 frames. ], batch size: 77, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:05:26,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=402200.0, ans=0.2 +2024-09-18 08:05:28,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.79 vs. limit=15.0 +2024-09-18 08:05:33,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=402200.0, ans=0.125 +2024-09-18 08:05:35,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=402240.0, ans=0.0 +2024-09-18 08:05:39,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.24 vs. limit=15.0 +2024-09-18 08:05:56,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=402280.0, ans=0.125 +2024-09-18 08:06:05,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=402320.0, ans=0.0 +2024-09-18 08:06:11,038 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.16 vs. limit=22.5 +2024-09-18 08:06:20,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_ff3.min_abs, batch_count=402360.0, ans=0.2 +2024-09-18 08:06:22,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=402360.0, ans=0.1 +2024-09-18 08:06:28,886 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-18 08:06:37,692 INFO [train.py:1198] (1/2) Epoch 23, batch 1050, loss[loss=0.2517, ctc_loss=0.1396, cr_loss=0.3903, attn_decoder_loss=0.2555, over 29657.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.131, cr_loss=0.3733, attn_decoder_loss=0.2471, over 5745761.73 frames. ], batch size: 85, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:06:41,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=402400.0, ans=0.125 +2024-09-18 08:06:45,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=402400.0, ans=0.0 +2024-09-18 08:06:54,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=402440.0, ans=0.2 +2024-09-18 08:06:54,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=402440.0, ans=0.125 +2024-09-18 08:07:10,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.02 vs. limit=15.0 +2024-09-18 08:07:19,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=402480.0, ans=0.0 +2024-09-18 08:07:19,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=402480.0, ans=0.09899494936611666 +2024-09-18 08:07:26,531 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.610e+01 8.303e+01 8.731e+01 9.470e+01 1.420e+02, threshold=1.746e+02, percent-clipped=0.0 +2024-09-18 08:07:31,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=402520.0, ans=0.125 +2024-09-18 08:07:33,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.43 vs. limit=22.5 +2024-09-18 08:07:54,164 INFO [train.py:1198] (1/2) Epoch 23, batch 1100, loss[loss=0.2453, ctc_loss=0.1408, cr_loss=0.3914, attn_decoder_loss=0.2482, over 29431.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1309, cr_loss=0.3732, attn_decoder_loss=0.2472, over 5756729.68 frames. ], batch size: 78, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:08:12,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=402640.0, ans=0.125 +2024-09-18 08:08:20,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=402640.0, ans=0.125 +2024-09-18 08:08:39,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=402680.0, ans=0.125 +2024-09-18 08:08:53,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=402720.0, ans=0.125 +2024-09-18 08:08:58,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=402760.0, ans=0.125 +2024-09-18 08:09:12,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.88 vs. limit=22.5 +2024-09-18 08:09:13,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=402800.0, ans=0.125 +2024-09-18 08:09:14,481 INFO [train.py:1198] (1/2) Epoch 23, batch 1150, loss[loss=0.2415, ctc_loss=0.1363, cr_loss=0.4074, attn_decoder_loss=0.2441, over 29435.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1312, cr_loss=0.374, attn_decoder_loss=0.2474, over 5753713.33 frames. ], batch size: 78, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:09:16,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=402800.0, ans=0.1 +2024-09-18 08:09:25,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=402800.0, ans=0.0 +2024-09-18 08:09:25,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=402800.0, ans=0.2 +2024-09-18 08:09:27,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=402800.0, ans=0.025 +2024-09-18 08:10:03,213 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.173e+01 8.564e+01 9.109e+01 9.682e+01 1.953e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 08:10:04,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=402920.0, ans=0.025 +2024-09-18 08:10:05,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=8.21 vs. limit=15.0 +2024-09-18 08:10:06,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=402920.0, ans=0.0 +2024-09-18 08:10:21,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=402960.0, ans=0.0 +2024-09-18 08:10:29,781 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.58 vs. limit=15.0 +2024-09-18 08:10:30,399 INFO [train.py:1198] (1/2) Epoch 23, batch 1200, loss[loss=0.2505, ctc_loss=0.1362, cr_loss=0.3901, attn_decoder_loss=0.2545, over 29682.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1316, cr_loss=0.3748, attn_decoder_loss=0.2483, over 5745758.89 frames. ], batch size: 85, lr: 4.86e-03, grad_scale: 16.0 +2024-09-18 08:10:53,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=403040.0, ans=0.0 +2024-09-18 08:11:01,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=403080.0, ans=0.0 +2024-09-18 08:11:05,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=403080.0, ans=0.1 +2024-09-18 08:11:13,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=403080.0, ans=0.0 +2024-09-18 08:11:28,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=403120.0, ans=0.0 +2024-09-18 08:11:36,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.32 vs. limit=15.0 +2024-09-18 08:11:37,944 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.52 vs. limit=15.0 +2024-09-18 08:11:46,529 INFO [train.py:1198] (1/2) Epoch 23, batch 1250, loss[loss=0.2564, ctc_loss=0.1413, cr_loss=0.3975, attn_decoder_loss=0.2604, over 29526.00 frames. ], tot_loss[loss=0.2444, ctc_loss=0.1318, cr_loss=0.3754, attn_decoder_loss=0.2486, over 5773666.10 frames. ], batch size: 92, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:12:34,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=403320.0, ans=0.0 +2024-09-18 08:12:34,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=403320.0, ans=0.0 +2024-09-18 08:12:39,076 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.079e+01 8.243e+01 8.772e+01 9.696e+01 1.858e+02, threshold=1.754e+02, percent-clipped=1.0 +2024-09-18 08:12:44,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.11 vs. limit=15.0 +2024-09-18 08:12:47,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=403320.0, ans=0.1 +2024-09-18 08:13:04,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=403360.0, ans=0.2 +2024-09-18 08:13:04,985 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.49 vs. limit=15.0 +2024-09-18 08:13:06,947 INFO [train.py:1198] (1/2) Epoch 23, batch 1300, loss[loss=0.2548, ctc_loss=0.1241, cr_loss=0.3611, attn_decoder_loss=0.2613, over 28241.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1313, cr_loss=0.3747, attn_decoder_loss=0.2481, over 5778322.18 frames. ], batch size: 111, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:13:07,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=403400.0, ans=0.07 +2024-09-18 08:13:19,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=403400.0, ans=0.125 +2024-09-18 08:13:27,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=403440.0, ans=0.025 +2024-09-18 08:13:34,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=403440.0, ans=0.125 +2024-09-18 08:14:22,794 INFO [train.py:1198] (1/2) Epoch 23, batch 1350, loss[loss=0.24, ctc_loss=0.128, cr_loss=0.3665, attn_decoder_loss=0.2443, over 29745.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1309, cr_loss=0.3736, attn_decoder_loss=0.2476, over 5796573.62 frames. ], batch size: 81, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:14:26,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=403600.0, ans=0.125 +2024-09-18 08:14:28,330 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.29 vs. limit=15.0 +2024-09-18 08:14:34,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.32 vs. limit=15.0 +2024-09-18 08:14:55,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=403680.0, ans=0.025 +2024-09-18 08:15:12,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.370e+01 8.788e+01 9.254e+01 1.206e+02, threshold=1.758e+02, percent-clipped=0.0 +2024-09-18 08:15:26,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=403760.0, ans=0.0 +2024-09-18 08:15:37,899 INFO [train.py:1198] (1/2) Epoch 23, batch 1400, loss[loss=0.2179, ctc_loss=0.1189, cr_loss=0.3417, attn_decoder_loss=0.2213, over 29541.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.131, cr_loss=0.3741, attn_decoder_loss=0.2477, over 5807581.31 frames. ], batch size: 69, lr: 4.86e-03, grad_scale: 8.0 +2024-09-18 08:15:42,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=403800.0, ans=0.125 +2024-09-18 08:15:47,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.30 vs. limit=22.5 +2024-09-18 08:15:48,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=403800.0, ans=0.1 +2024-09-18 08:16:03,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=403840.0, ans=0.0 +2024-09-18 08:16:12,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.77 vs. limit=15.0 +2024-09-18 08:16:22,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=403880.0, ans=0.07 +2024-09-18 08:16:25,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=403920.0, ans=0.1 +2024-09-18 08:16:33,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=403920.0, ans=0.1 +2024-09-18 08:16:44,189 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.05 vs. limit=22.5 +2024-09-18 08:16:58,235 INFO [train.py:1198] (1/2) Epoch 23, batch 1450, loss[loss=0.2526, ctc_loss=0.1353, cr_loss=0.3863, attn_decoder_loss=0.2571, over 29446.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1313, cr_loss=0.3748, attn_decoder_loss=0.2483, over 5805456.39 frames. ], batch size: 94, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:17:23,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.53 vs. limit=22.5 +2024-09-18 08:17:46,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=404120.0, ans=0.125 +2024-09-18 08:17:47,799 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.721e+01 9.213e+01 9.736e+01 2.438e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-18 08:17:55,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=404120.0, ans=0.125 +2024-09-18 08:17:55,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=404120.0, ans=0.125 +2024-09-18 08:18:01,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=404160.0, ans=0.09899494936611666 +2024-09-18 08:18:13,662 INFO [train.py:1198] (1/2) Epoch 23, batch 1500, loss[loss=0.2582, ctc_loss=0.1391, cr_loss=0.3917, attn_decoder_loss=0.2627, over 29649.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1318, cr_loss=0.3752, attn_decoder_loss=0.2488, over 5806293.25 frames. ], batch size: 86, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:18:26,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=404200.0, ans=0.125 +2024-09-18 08:18:30,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=404240.0, ans=0.125 +2024-09-18 08:18:44,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=404280.0, ans=0.125 +2024-09-18 08:19:07,449 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:19:08,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=404320.0, ans=0.125 +2024-09-18 08:19:18,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.85 vs. limit=15.0 +2024-09-18 08:19:24,533 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.06 vs. limit=6.0 +2024-09-18 08:19:29,873 INFO [train.py:1198] (1/2) Epoch 23, batch 1550, loss[loss=0.2643, ctc_loss=0.156, cr_loss=0.437, attn_decoder_loss=0.2666, over 29485.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1326, cr_loss=0.3767, attn_decoder_loss=0.249, over 5781024.97 frames. ], batch size: 90, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:19:53,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=404440.0, ans=0.125 +2024-09-18 08:20:14,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=404480.0, ans=0.125 +2024-09-18 08:20:17,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=404520.0, ans=0.1 +2024-09-18 08:20:21,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.91 vs. limit=22.5 +2024-09-18 08:20:22,179 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.646e+01 9.169e+01 9.938e+01 3.341e+02, threshold=1.834e+02, percent-clipped=2.0 +2024-09-18 08:20:41,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=404560.0, ans=0.0 +2024-09-18 08:20:50,183 INFO [train.py:1198] (1/2) Epoch 23, batch 1600, loss[loss=0.2579, ctc_loss=0.1396, cr_loss=0.3932, attn_decoder_loss=0.2623, over 29682.00 frames. ], tot_loss[loss=0.2446, ctc_loss=0.1326, cr_loss=0.3764, attn_decoder_loss=0.2487, over 5764184.22 frames. ], batch size: 85, lr: 4.85e-03, grad_scale: 16.0 +2024-09-18 08:20:51,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-18 08:21:03,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=404640.0, ans=0.0 +2024-09-18 08:21:11,527 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:21:31,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=404680.0, ans=0.2 +2024-09-18 08:21:35,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=404720.0, ans=0.2 +2024-09-18 08:21:57,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.77 vs. limit=15.0 +2024-09-18 08:22:06,228 INFO [train.py:1198] (1/2) Epoch 23, batch 1650, loss[loss=0.2479, ctc_loss=0.1275, cr_loss=0.3805, attn_decoder_loss=0.2529, over 29691.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1319, cr_loss=0.3753, attn_decoder_loss=0.2483, over 5758280.20 frames. ], batch size: 89, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:22:08,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=404800.0, ans=0.0 +2024-09-18 08:22:20,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=404840.0, ans=0.0 +2024-09-18 08:22:34,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=404840.0, ans=0.125 +2024-09-18 08:22:58,240 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.577e+01 9.100e+01 9.886e+01 2.579e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 08:23:15,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=404960.0, ans=0.125 +2024-09-18 08:23:17,044 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.56 vs. limit=10.0 +2024-09-18 08:23:22,332 INFO [train.py:1198] (1/2) Epoch 23, batch 1700, loss[loss=0.2157, ctc_loss=0.1052, cr_loss=0.3225, attn_decoder_loss=0.2208, over 29527.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1314, cr_loss=0.3748, attn_decoder_loss=0.2479, over 5779587.89 frames. ], batch size: 69, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:23:22,932 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.74 vs. limit=12.0 +2024-09-18 08:23:30,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=405000.0, ans=0.0 +2024-09-18 08:23:41,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.66 vs. limit=5.0 +2024-09-18 08:23:45,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=405040.0, ans=0.125 +2024-09-18 08:24:01,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=405080.0, ans=0.025 +2024-09-18 08:24:06,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.73 vs. limit=22.5 +2024-09-18 08:24:07,926 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.81 vs. limit=12.0 +2024-09-18 08:24:42,501 INFO [train.py:1198] (1/2) Epoch 23, batch 1750, loss[loss=0.2139, ctc_loss=0.1138, cr_loss=0.3486, attn_decoder_loss=0.2173, over 29330.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1305, cr_loss=0.3728, attn_decoder_loss=0.2472, over 5787786.28 frames. ], batch size: 67, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:24:59,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=405240.0, ans=0.125 +2024-09-18 08:25:01,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=405240.0, ans=0.1 +2024-09-18 08:25:10,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=405240.0, ans=0.2 +2024-09-18 08:25:11,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=405280.0, ans=0.0 +2024-09-18 08:25:13,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=405280.0, ans=0.125 +2024-09-18 08:25:18,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.36 vs. limit=15.0 +2024-09-18 08:25:33,641 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.000e+01 8.508e+01 9.190e+01 9.615e+01 2.377e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-18 08:25:40,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=405320.0, ans=0.125 +2024-09-18 08:25:54,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=405360.0, ans=0.125 +2024-09-18 08:25:57,527 INFO [train.py:1198] (1/2) Epoch 23, batch 1800, loss[loss=0.2565, ctc_loss=0.1375, cr_loss=0.3899, attn_decoder_loss=0.261, over 29689.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1309, cr_loss=0.3737, attn_decoder_loss=0.2476, over 5791248.52 frames. ], batch size: 83, lr: 4.85e-03, grad_scale: 8.0 +2024-09-18 08:26:02,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=405400.0, ans=0.2 +2024-09-18 08:26:21,513 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.30 vs. limit=10.0 +2024-09-18 08:26:27,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=12.0 +2024-09-18 08:26:58,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=405560.0, ans=0.2 +2024-09-18 08:27:13,786 INFO [train.py:1198] (1/2) Epoch 23, batch 1850, loss[loss=0.2533, ctc_loss=0.1345, cr_loss=0.3789, attn_decoder_loss=0.258, over 29635.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1311, cr_loss=0.3744, attn_decoder_loss=0.2477, over 5797312.37 frames. ], batch size: 86, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:27:41,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=405640.0, ans=0.125 +2024-09-18 08:27:42,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=405680.0, ans=0.125 +2024-09-18 08:27:49,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=405680.0, ans=0.0 +2024-09-18 08:28:07,768 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.499e+01 9.020e+01 9.564e+01 1.401e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-18 08:28:17,920 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.43 vs. limit=15.0 +2024-09-18 08:28:31,644 INFO [train.py:1198] (1/2) Epoch 23, batch 1900, loss[loss=0.2441, ctc_loss=0.1257, cr_loss=0.3518, attn_decoder_loss=0.2494, over 29706.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1316, cr_loss=0.3754, attn_decoder_loss=0.2482, over 5804990.08 frames. ], batch size: 89, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:28:46,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=405800.0, ans=0.125 +2024-09-18 08:28:58,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=405840.0, ans=0.025 +2024-09-18 08:29:08,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=405880.0, ans=0.125 +2024-09-18 08:29:17,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=405880.0, ans=0.125 +2024-09-18 08:29:27,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=405920.0, ans=0.2 +2024-09-18 08:29:35,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=405960.0, ans=0.1 +2024-09-18 08:29:41,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=405960.0, ans=0.0 +2024-09-18 08:29:50,195 INFO [train.py:1198] (1/2) Epoch 23, batch 1950, loss[loss=0.2425, ctc_loss=0.1328, cr_loss=0.3805, attn_decoder_loss=0.2463, over 29456.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1328, cr_loss=0.378, attn_decoder_loss=0.2496, over 5819533.60 frames. ], batch size: 78, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:29:52,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.16 vs. limit=22.5 +2024-09-18 08:29:56,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=406000.0, ans=0.0 +2024-09-18 08:30:05,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=406040.0, ans=0.125 +2024-09-18 08:30:14,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=406040.0, ans=0.125 +2024-09-18 08:30:41,519 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.634e+01 9.173e+01 9.833e+01 1.215e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-18 08:30:46,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=406120.0, ans=10.0 +2024-09-18 08:31:01,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=406160.0, ans=0.125 +2024-09-18 08:31:05,553 INFO [train.py:1198] (1/2) Epoch 23, batch 2000, loss[loss=0.2216, ctc_loss=0.1116, cr_loss=0.3403, attn_decoder_loss=0.2263, over 29348.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1331, cr_loss=0.378, attn_decoder_loss=0.25, over 5797773.57 frames. ], batch size: 67, lr: 4.84e-03, grad_scale: 16.0 +2024-09-18 08:31:07,512 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:31:18,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.89 vs. limit=15.0 +2024-09-18 08:31:36,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-18 08:31:41,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=406280.0, ans=0.125 +2024-09-18 08:31:51,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=406320.0, ans=0.0 +2024-09-18 08:31:51,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=406320.0, ans=0.125 +2024-09-18 08:31:58,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.55 vs. limit=22.5 +2024-09-18 08:32:22,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=10.33 vs. limit=15.0 +2024-09-18 08:32:23,713 INFO [train.py:1198] (1/2) Epoch 23, batch 2050, loss[loss=0.2202, ctc_loss=0.1175, cr_loss=0.3499, attn_decoder_loss=0.2239, over 29456.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1323, cr_loss=0.3753, attn_decoder_loss=0.2487, over 5789289.86 frames. ], batch size: 70, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:33:05,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.71 vs. limit=22.5 +2024-09-18 08:33:07,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=406480.0, ans=0.0 +2024-09-18 08:33:07,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.56 vs. limit=15.0 +2024-09-18 08:33:16,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=406520.0, ans=0.025 +2024-09-18 08:33:18,754 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.483e+01 9.027e+01 9.590e+01 1.679e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-18 08:33:25,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.73 vs. limit=15.0 +2024-09-18 08:33:30,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.72 vs. limit=15.0 +2024-09-18 08:33:34,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=406560.0, ans=0.125 +2024-09-18 08:33:41,484 INFO [train.py:1198] (1/2) Epoch 23, batch 2100, loss[loss=0.2415, ctc_loss=0.1282, cr_loss=0.3701, attn_decoder_loss=0.2458, over 29758.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1315, cr_loss=0.3743, attn_decoder_loss=0.2481, over 5799938.31 frames. ], batch size: 81, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:34:04,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=406640.0, ans=0.1 +2024-09-18 08:34:13,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.55 vs. limit=22.5 +2024-09-18 08:34:21,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=406680.0, ans=0.0 +2024-09-18 08:34:25,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=406720.0, ans=0.125 +2024-09-18 08:34:31,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=406720.0, ans=0.025 +2024-09-18 08:34:46,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=406760.0, ans=0.125 +2024-09-18 08:34:56,605 INFO [train.py:1198] (1/2) Epoch 23, batch 2150, loss[loss=0.2402, ctc_loss=0.1286, cr_loss=0.3725, attn_decoder_loss=0.2443, over 29443.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.131, cr_loss=0.3745, attn_decoder_loss=0.2477, over 5814850.91 frames. ], batch size: 78, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:35:42,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=406920.0, ans=0.125 +2024-09-18 08:35:51,675 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.476e+01 8.832e+01 9.481e+01 1.697e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-18 08:35:56,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=406920.0, ans=0.025 +2024-09-18 08:36:14,436 INFO [train.py:1198] (1/2) Epoch 23, batch 2200, loss[loss=0.2643, ctc_loss=0.1395, cr_loss=0.3926, attn_decoder_loss=0.2695, over 29643.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1312, cr_loss=0.3742, attn_decoder_loss=0.2479, over 5811756.58 frames. ], batch size: 86, lr: 4.84e-03, grad_scale: 8.0 +2024-09-18 08:36:28,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=407040.0, ans=0.125 +2024-09-18 08:36:36,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=407040.0, ans=0.0 +2024-09-18 08:36:50,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=407080.0, ans=0.125 +2024-09-18 08:36:53,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=407080.0, ans=0.025 +2024-09-18 08:37:06,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=407120.0, ans=0.1 +2024-09-18 08:37:19,094 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:37:19,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.73 vs. limit=15.0 +2024-09-18 08:37:23,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=407160.0, ans=0.1 +2024-09-18 08:37:32,955 INFO [train.py:1198] (1/2) Epoch 23, batch 2250, loss[loss=0.2479, ctc_loss=0.1269, cr_loss=0.3696, attn_decoder_loss=0.2532, over 29700.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.131, cr_loss=0.3736, attn_decoder_loss=0.2476, over 5811472.41 frames. ], batch size: 82, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:37:46,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=407240.0, ans=0.0 +2024-09-18 08:37:51,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.14 vs. limit=6.0 +2024-09-18 08:38:06,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=407280.0, ans=0.025 +2024-09-18 08:38:25,776 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.661e+01 8.566e+01 9.041e+01 9.811e+01 1.660e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-18 08:38:36,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=407360.0, ans=0.125 +2024-09-18 08:38:44,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=407360.0, ans=0.0 +2024-09-18 08:38:48,507 INFO [train.py:1198] (1/2) Epoch 23, batch 2300, loss[loss=0.2149, ctc_loss=0.1101, cr_loss=0.3314, attn_decoder_loss=0.2192, over 29731.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1301, cr_loss=0.3717, attn_decoder_loss=0.2465, over 5799651.58 frames. ], batch size: 72, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:39:02,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=407440.0, ans=0.1 +2024-09-18 08:39:03,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=407440.0, ans=0.0 +2024-09-18 08:39:26,392 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:39:30,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=407480.0, ans=0.2 +2024-09-18 08:39:50,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.80 vs. limit=15.0 +2024-09-18 08:40:06,181 INFO [train.py:1198] (1/2) Epoch 23, batch 2350, loss[loss=0.2663, ctc_loss=0.1469, cr_loss=0.4119, attn_decoder_loss=0.2704, over 29699.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1303, cr_loss=0.3721, attn_decoder_loss=0.2467, over 5805143.51 frames. ], batch size: 83, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:40:23,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.24 vs. limit=22.5 +2024-09-18 08:40:30,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=407640.0, ans=0.125 +2024-09-18 08:40:57,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=407720.0, ans=0.035 +2024-09-18 08:40:57,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=407720.0, ans=0.125 +2024-09-18 08:41:01,410 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.649e+01 9.243e+01 9.923e+01 8.680e+02, threshold=1.849e+02, percent-clipped=2.0 +2024-09-18 08:41:12,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=407760.0, ans=0.0 +2024-09-18 08:41:23,896 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.44 vs. limit=12.0 +2024-09-18 08:41:24,566 INFO [train.py:1198] (1/2) Epoch 23, batch 2400, loss[loss=0.2351, ctc_loss=0.1225, cr_loss=0.35, attn_decoder_loss=0.2398, over 29534.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1302, cr_loss=0.3719, attn_decoder_loss=0.247, over 5807838.22 frames. ], batch size: 76, lr: 4.83e-03, grad_scale: 16.0 +2024-09-18 08:41:27,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=9.90 vs. limit=15.0 +2024-09-18 08:41:33,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=407800.0, ans=0.125 +2024-09-18 08:41:36,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=407800.0, ans=0.2 +2024-09-18 08:41:54,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=407880.0, ans=0.125 +2024-09-18 08:42:31,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=407960.0, ans=0.2 +2024-09-18 08:42:31,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.96 vs. limit=22.5 +2024-09-18 08:42:40,653 INFO [train.py:1198] (1/2) Epoch 23, batch 2450, loss[loss=0.2438, ctc_loss=0.1303, cr_loss=0.3649, attn_decoder_loss=0.2483, over 29729.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1309, cr_loss=0.3729, attn_decoder_loss=0.2479, over 5784720.56 frames. ], batch size: 82, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:42:55,208 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.42 vs. limit=15.0 +2024-09-18 08:43:33,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.38 vs. limit=15.0 +2024-09-18 08:43:37,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.992e+01 9.709e+01 1.062e+02 3.982e+02, threshold=1.942e+02, percent-clipped=1.0 +2024-09-18 08:43:53,329 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.69 vs. limit=15.0 +2024-09-18 08:43:58,466 INFO [train.py:1198] (1/2) Epoch 23, batch 2500, loss[loss=0.2624, ctc_loss=0.1407, cr_loss=0.4168, attn_decoder_loss=0.2667, over 29609.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1306, cr_loss=0.3729, attn_decoder_loss=0.2477, over 5796263.33 frames. ], batch size: 86, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:44:07,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=408200.0, ans=0.125 +2024-09-18 08:44:22,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.83 vs. limit=15.0 +2024-09-18 08:44:24,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=408240.0, ans=0.025 +2024-09-18 08:45:04,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=408360.0, ans=0.2 +2024-09-18 08:45:08,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.72 vs. limit=10.0 +2024-09-18 08:45:16,694 INFO [train.py:1198] (1/2) Epoch 23, batch 2550, loss[loss=0.2196, ctc_loss=0.1188, cr_loss=0.3627, attn_decoder_loss=0.2228, over 29354.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.131, cr_loss=0.3738, attn_decoder_loss=0.2478, over 5798919.59 frames. ], batch size: 67, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:45:19,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.63 vs. limit=15.0 +2024-09-18 08:45:41,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.57 vs. limit=15.0 +2024-09-18 08:45:47,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=408480.0, ans=0.0 +2024-09-18 08:46:02,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=12.0 +2024-09-18 08:46:05,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=408520.0, ans=0.125 +2024-09-18 08:46:11,113 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.424e+01 8.872e+01 9.650e+01 4.846e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 08:46:11,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=408520.0, ans=0.025 +2024-09-18 08:46:14,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=408520.0, ans=0.125 +2024-09-18 08:46:14,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.15 vs. limit=6.0 +2024-09-18 08:46:25,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=408560.0, ans=0.125 +2024-09-18 08:46:31,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=408600.0, ans=0.2 +2024-09-18 08:46:32,466 INFO [train.py:1198] (1/2) Epoch 23, batch 2600, loss[loss=0.2429, ctc_loss=0.1397, cr_loss=0.3856, attn_decoder_loss=0.2458, over 29445.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1315, cr_loss=0.3744, attn_decoder_loss=0.2481, over 5795250.18 frames. ], batch size: 78, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:46:41,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=408600.0, ans=10.0 +2024-09-18 08:46:43,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=408600.0, ans=0.125 +2024-09-18 08:47:07,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_positive, batch_count=408680.0, ans=0.05 +2024-09-18 08:47:23,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=408720.0, ans=0.125 +2024-09-18 08:47:23,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=408720.0, ans=0.125 +2024-09-18 08:47:27,591 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:47:42,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=408760.0, ans=0.025 +2024-09-18 08:47:46,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.28 vs. limit=15.0 +2024-09-18 08:47:50,072 INFO [train.py:1198] (1/2) Epoch 23, batch 2650, loss[loss=0.2557, ctc_loss=0.1391, cr_loss=0.3814, attn_decoder_loss=0.2602, over 29264.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1313, cr_loss=0.3744, attn_decoder_loss=0.2483, over 5801781.60 frames. ], batch size: 100, lr: 4.83e-03, grad_scale: 8.0 +2024-09-18 08:47:51,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=408800.0, ans=0.025 +2024-09-18 08:47:53,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=408800.0, ans=0.2 +2024-09-18 08:48:23,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=408880.0, ans=0.2 +2024-09-18 08:48:33,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=408880.0, ans=0.125 +2024-09-18 08:48:43,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=408920.0, ans=0.025 +2024-09-18 08:48:46,443 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.322e+01 8.846e+01 9.392e+01 1.397e+02, threshold=1.769e+02, percent-clipped=0.0 +2024-09-18 08:48:47,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.57 vs. limit=15.0 +2024-09-18 08:48:54,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=408960.0, ans=0.0 +2024-09-18 08:48:59,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.52 vs. limit=22.5 +2024-09-18 08:49:01,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=408960.0, ans=0.125 +2024-09-18 08:49:07,644 INFO [train.py:1198] (1/2) Epoch 23, batch 2700, loss[loss=0.2503, ctc_loss=0.1251, cr_loss=0.3547, attn_decoder_loss=0.2563, over 29542.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1314, cr_loss=0.3745, attn_decoder_loss=0.2485, over 5795156.11 frames. ], batch size: 87, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:49:16,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=409000.0, ans=0.0 +2024-09-18 08:49:17,336 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.12 vs. limit=22.5 +2024-09-18 08:49:21,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.58 vs. limit=6.0 +2024-09-18 08:49:38,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.53 vs. limit=15.0 +2024-09-18 08:49:39,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=409080.0, ans=0.2 +2024-09-18 08:50:00,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=409120.0, ans=0.0 +2024-09-18 08:50:23,533 INFO [train.py:1198] (1/2) Epoch 23, batch 2750, loss[loss=0.2324, ctc_loss=0.1188, cr_loss=0.3378, attn_decoder_loss=0.2375, over 29510.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1303, cr_loss=0.3721, attn_decoder_loss=0.2473, over 5793699.69 frames. ], batch size: 75, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:50:40,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=409240.0, ans=0.5 +2024-09-18 08:50:58,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=409280.0, ans=0.0 +2024-09-18 08:51:08,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=409280.0, ans=0.125 +2024-09-18 08:51:20,301 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.385e+01 8.421e+01 8.872e+01 9.349e+01 6.581e+02, threshold=1.774e+02, percent-clipped=3.0 +2024-09-18 08:51:41,683 INFO [train.py:1198] (1/2) Epoch 23, batch 2800, loss[loss=0.2655, ctc_loss=0.1673, cr_loss=0.4009, attn_decoder_loss=0.2676, over 19345.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1308, cr_loss=0.3722, attn_decoder_loss=0.2475, over 5774242.90 frames. ], batch size: 210, lr: 4.82e-03, grad_scale: 16.0 +2024-09-18 08:51:49,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=409400.0, ans=10.0 +2024-09-18 08:51:49,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=409400.0, ans=0.0 +2024-09-18 08:52:06,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=409440.0, ans=0.0 +2024-09-18 08:52:10,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=409480.0, ans=0.125 +2024-09-18 08:52:13,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=409480.0, ans=0.1 +2024-09-18 08:52:27,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=409520.0, ans=0.125 +2024-09-18 08:52:35,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=409520.0, ans=0.2 +2024-09-18 08:52:51,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=409560.0, ans=0.125 +2024-09-18 08:52:59,484 INFO [train.py:1198] (1/2) Epoch 23, batch 2850, loss[loss=0.2445, ctc_loss=0.1285, cr_loss=0.3894, attn_decoder_loss=0.2487, over 29501.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1316, cr_loss=0.3736, attn_decoder_loss=0.2481, over 5761207.15 frames. ], batch size: 77, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:53:18,131 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 08:53:21,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=409640.0, ans=0.125 +2024-09-18 08:53:55,293 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.592e+01 9.017e+01 9.666e+01 1.557e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 08:54:15,009 INFO [train.py:1198] (1/2) Epoch 23, batch 2900, loss[loss=0.2497, ctc_loss=0.1391, cr_loss=0.3943, attn_decoder_loss=0.2533, over 29429.00 frames. ], tot_loss[loss=0.245, ctc_loss=0.1322, cr_loss=0.3757, attn_decoder_loss=0.2491, over 5786642.11 frames. ], batch size: 79, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:54:27,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=409800.0, ans=0.2 +2024-09-18 08:54:30,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=409840.0, ans=0.05 +2024-09-18 08:54:36,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=409840.0, ans=0.2 +2024-09-18 08:54:48,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.35 vs. limit=15.0 +2024-09-18 08:55:03,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.92 vs. limit=15.0 +2024-09-18 08:55:20,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=409960.0, ans=0.125 +2024-09-18 08:55:33,240 INFO [train.py:1198] (1/2) Epoch 23, batch 2950, loss[loss=0.2287, ctc_loss=0.1186, cr_loss=0.3543, attn_decoder_loss=0.233, over 29522.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1318, cr_loss=0.3744, attn_decoder_loss=0.2483, over 5781914.27 frames. ], batch size: 75, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:55:36,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=410000.0, ans=0.0 +2024-09-18 08:55:50,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=410040.0, ans=0.125 +2024-09-18 08:56:00,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=410040.0, ans=0.125 +2024-09-18 08:56:06,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=410080.0, ans=0.2 +2024-09-18 08:56:08,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_na.min_abs, batch_count=410080.0, ans=0.02 +2024-09-18 08:56:29,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.78 vs. limit=15.0 +2024-09-18 08:56:30,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=410120.0, ans=0.1 +2024-09-18 08:56:31,675 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.552e+01 9.299e+01 9.927e+01 2.795e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-18 08:56:51,630 INFO [train.py:1198] (1/2) Epoch 23, batch 3000, loss[loss=0.248, ctc_loss=0.1336, cr_loss=0.3793, attn_decoder_loss=0.2522, over 29764.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1317, cr_loss=0.3745, attn_decoder_loss=0.248, over 5783600.97 frames. ], batch size: 81, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:56:51,631 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 08:57:10,060 INFO [train.py:1230] (1/2) Epoch 23, validation: loss=0.2116, ctc_loss=0.03932, cr_loss=5.516e-15, attn_decoder_loss=0.2308, over 944034.00 frames. +2024-09-18 08:57:10,060 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 08:57:10,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=410200.0, ans=0.07 +2024-09-18 08:57:24,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=410240.0, ans=0.025 +2024-09-18 08:57:29,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=410240.0, ans=0.025 +2024-09-18 08:57:30,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=410240.0, ans=0.125 +2024-09-18 08:57:39,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten.whitening_limit, batch_count=410280.0, ans=15.0 +2024-09-18 08:57:40,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=410280.0, ans=0.0 +2024-09-18 08:58:26,565 INFO [train.py:1198] (1/2) Epoch 23, batch 3050, loss[loss=0.2308, ctc_loss=0.1221, cr_loss=0.3631, attn_decoder_loss=0.2349, over 29540.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1323, cr_loss=0.3755, attn_decoder_loss=0.2489, over 5776450.77 frames. ], batch size: 76, lr: 4.82e-03, grad_scale: 8.0 +2024-09-18 08:58:49,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=410440.0, ans=0.0 +2024-09-18 08:58:58,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=410480.0, ans=0.125 +2024-09-18 08:59:02,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=410480.0, ans=0.0 +2024-09-18 08:59:08,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=410480.0, ans=0.0 +2024-09-18 08:59:20,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=410520.0, ans=0.125 +2024-09-18 08:59:24,739 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.520e+01 8.808e+01 9.332e+01 1.013e+02 4.220e+02, threshold=1.866e+02, percent-clipped=2.0 +2024-09-18 08:59:33,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=410560.0, ans=15.0 +2024-09-18 08:59:38,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=410560.0, ans=0.125 +2024-09-18 08:59:44,268 INFO [train.py:1198] (1/2) Epoch 23, batch 3100, loss[loss=0.2586, ctc_loss=0.1367, cr_loss=0.3715, attn_decoder_loss=0.2639, over 29250.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1317, cr_loss=0.3741, attn_decoder_loss=0.2484, over 5776925.08 frames. ], batch size: 100, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 08:59:47,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=410600.0, ans=0.0 +2024-09-18 08:59:59,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=410640.0, ans=0.025 +2024-09-18 09:00:01,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=410640.0, ans=0.1 +2024-09-18 09:00:09,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=410640.0, ans=0.125 +2024-09-18 09:00:51,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=410760.0, ans=0.1 +2024-09-18 09:01:02,038 INFO [train.py:1198] (1/2) Epoch 23, batch 3150, loss[loss=0.2551, ctc_loss=0.1359, cr_loss=0.3831, attn_decoder_loss=0.2599, over 28898.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1317, cr_loss=0.3745, attn_decoder_loss=0.2484, over 5783974.54 frames. ], batch size: 104, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:01:19,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.43 vs. limit=15.0 +2024-09-18 09:01:22,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=410840.0, ans=0.5 +2024-09-18 09:01:31,909 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.49 vs. limit=15.0 +2024-09-18 09:01:50,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=410920.0, ans=0.0 +2024-09-18 09:01:56,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=410920.0, ans=0.125 +2024-09-18 09:01:57,777 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.637e+01 9.168e+01 9.786e+01 2.272e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-18 09:02:10,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.96 vs. limit=12.0 +2024-09-18 09:02:13,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=410960.0, ans=0.1 +2024-09-18 09:02:17,390 INFO [train.py:1198] (1/2) Epoch 23, batch 3200, loss[loss=0.2593, ctc_loss=0.1467, cr_loss=0.4305, attn_decoder_loss=0.2622, over 29422.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1313, cr_loss=0.3743, attn_decoder_loss=0.248, over 5793671.28 frames. ], batch size: 79, lr: 4.81e-03, grad_scale: 16.0 +2024-09-18 09:02:33,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.74 vs. limit=22.5 +2024-09-18 09:02:54,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.48 vs. limit=15.0 +2024-09-18 09:03:08,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=411120.0, ans=0.125 +2024-09-18 09:03:20,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=411160.0, ans=0.125 +2024-09-18 09:03:20,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=411160.0, ans=0.2 +2024-09-18 09:03:22,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=411160.0, ans=0.2 +2024-09-18 09:03:35,718 INFO [train.py:1198] (1/2) Epoch 23, batch 3250, loss[loss=0.2538, ctc_loss=0.1364, cr_loss=0.4052, attn_decoder_loss=0.2578, over 29696.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1318, cr_loss=0.3756, attn_decoder_loss=0.2485, over 5801720.39 frames. ], batch size: 84, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:03:57,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=411240.0, ans=0.0 +2024-09-18 09:04:26,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=411320.0, ans=0.1 +2024-09-18 09:04:34,986 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.655e+01 9.272e+01 9.823e+01 1.322e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-18 09:04:35,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=411320.0, ans=0.125 +2024-09-18 09:04:53,307 INFO [train.py:1198] (1/2) Epoch 23, batch 3300, loss[loss=0.2495, ctc_loss=0.1352, cr_loss=0.3721, attn_decoder_loss=0.2539, over 28110.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1314, cr_loss=0.3745, attn_decoder_loss=0.2474, over 5799028.38 frames. ], batch size: 111, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:04:58,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.71 vs. limit=6.0 +2024-09-18 09:05:26,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=411480.0, ans=0.0 +2024-09-18 09:05:26,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=411480.0, ans=0.125 +2024-09-18 09:05:55,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=411560.0, ans=0.0 +2024-09-18 09:06:05,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.99 vs. limit=22.5 +2024-09-18 09:06:08,660 INFO [train.py:1198] (1/2) Epoch 23, batch 3350, loss[loss=0.2531, ctc_loss=0.1345, cr_loss=0.3879, attn_decoder_loss=0.2576, over 28794.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1322, cr_loss=0.376, attn_decoder_loss=0.2482, over 5775318.31 frames. ], batch size: 104, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:06:25,713 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.50 vs. limit=15.0 +2024-09-18 09:06:39,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.41 vs. limit=15.0 +2024-09-18 09:06:40,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=411680.0, ans=0.0 +2024-09-18 09:06:43,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.54 vs. limit=15.0 +2024-09-18 09:06:46,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=411680.0, ans=0.0 +2024-09-18 09:06:46,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=411680.0, ans=0.0 +2024-09-18 09:06:47,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=411680.0, ans=0.09899494936611666 +2024-09-18 09:06:58,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=411720.0, ans=0.125 +2024-09-18 09:07:02,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=411720.0, ans=0.125 +2024-09-18 09:07:03,532 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=6.03 vs. limit=15.0 +2024-09-18 09:07:08,459 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.663e+01 9.206e+01 9.789e+01 2.075e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 09:07:11,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=411760.0, ans=0.05 +2024-09-18 09:07:13,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.24 vs. limit=6.0 +2024-09-18 09:07:26,459 INFO [train.py:1198] (1/2) Epoch 23, batch 3400, loss[loss=0.2124, ctc_loss=0.1082, cr_loss=0.3285, attn_decoder_loss=0.2167, over 29346.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.132, cr_loss=0.3752, attn_decoder_loss=0.248, over 5766876.25 frames. ], batch size: 67, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:07:31,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=411800.0, ans=0.0 +2024-09-18 09:08:12,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=411920.0, ans=0.0 +2024-09-18 09:08:16,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=411920.0, ans=0.1 +2024-09-18 09:08:25,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=411920.0, ans=0.035 +2024-09-18 09:08:32,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=411960.0, ans=10.0 +2024-09-18 09:08:35,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=411960.0, ans=0.5 +2024-09-18 09:08:37,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=411960.0, ans=0.125 +2024-09-18 09:08:41,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=411960.0, ans=0.025 +2024-09-18 09:08:44,749 INFO [train.py:1198] (1/2) Epoch 23, batch 3450, loss[loss=0.2486, ctc_loss=0.1299, cr_loss=0.3834, attn_decoder_loss=0.2533, over 28185.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1321, cr_loss=0.3758, attn_decoder_loss=0.2484, over 5775213.16 frames. ], batch size: 111, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:08:45,421 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.12 vs. limit=15.0 +2024-09-18 09:08:47,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.90 vs. limit=10.0 +2024-09-18 09:08:49,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=412000.0, ans=0.0 +2024-09-18 09:08:55,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=412000.0, ans=0.0 +2024-09-18 09:09:00,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=412040.0, ans=0.125 +2024-09-18 09:09:01,771 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:09:01,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=412040.0, ans=0.125 +2024-09-18 09:09:03,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.21 vs. limit=10.0 +2024-09-18 09:09:05,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.03 vs. limit=10.0 +2024-09-18 09:09:28,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=412120.0, ans=0.2 +2024-09-18 09:09:32,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.18 vs. limit=22.5 +2024-09-18 09:09:33,558 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:09:36,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=412120.0, ans=0.2 +2024-09-18 09:09:42,462 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.670e+01 9.056e+01 9.530e+01 1.937e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 09:09:48,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.24 vs. limit=6.0 +2024-09-18 09:09:54,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=412160.0, ans=0.1 +2024-09-18 09:10:02,665 INFO [train.py:1198] (1/2) Epoch 23, batch 3500, loss[loss=0.2218, ctc_loss=0.1135, cr_loss=0.3277, attn_decoder_loss=0.2265, over 29320.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1314, cr_loss=0.3742, attn_decoder_loss=0.2476, over 5778079.98 frames. ], batch size: 71, lr: 4.81e-03, grad_scale: 8.0 +2024-09-18 09:10:07,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=412200.0, ans=0.0 +2024-09-18 09:10:15,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=412200.0, ans=0.2 +2024-09-18 09:10:16,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=412240.0, ans=0.1 +2024-09-18 09:10:21,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=412240.0, ans=0.125 +2024-09-18 09:10:29,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=412240.0, ans=0.1 +2024-09-18 09:10:37,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=412280.0, ans=0.0 +2024-09-18 09:10:40,755 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.02 vs. limit=12.0 +2024-09-18 09:10:55,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=412320.0, ans=0.2 +2024-09-18 09:11:09,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=15.0 +2024-09-18 09:11:17,262 INFO [train.py:1198] (1/2) Epoch 23, batch 3550, loss[loss=0.2463, ctc_loss=0.1224, cr_loss=0.3501, attn_decoder_loss=0.2523, over 29691.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1311, cr_loss=0.374, attn_decoder_loss=0.2476, over 5786707.33 frames. ], batch size: 89, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:11:25,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.93 vs. limit=12.0 +2024-09-18 09:11:29,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.87 vs. limit=22.5 +2024-09-18 09:11:35,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=412440.0, ans=0.2 +2024-09-18 09:11:53,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=412480.0, ans=10.0 +2024-09-18 09:11:54,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=412480.0, ans=0.125 +2024-09-18 09:12:11,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=412520.0, ans=0.1 +2024-09-18 09:12:14,057 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.214e+01 8.498e+01 8.967e+01 9.754e+01 1.546e+02, threshold=1.793e+02, percent-clipped=1.0 +2024-09-18 09:12:18,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=412560.0, ans=0.2 +2024-09-18 09:12:20,327 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:12:26,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=412560.0, ans=0.025 +2024-09-18 09:12:31,834 INFO [train.py:1198] (1/2) Epoch 23, batch 3600, loss[loss=0.2338, ctc_loss=0.124, cr_loss=0.3775, attn_decoder_loss=0.2376, over 29528.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1308, cr_loss=0.373, attn_decoder_loss=0.2475, over 5794801.49 frames. ], batch size: 77, lr: 4.80e-03, grad_scale: 16.0 +2024-09-18 09:12:59,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=412640.0, ans=0.1 +2024-09-18 09:13:01,620 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.98 vs. limit=22.5 +2024-09-18 09:13:02,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=412680.0, ans=0.125 +2024-09-18 09:13:09,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=412680.0, ans=0.125 +2024-09-18 09:13:18,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=412720.0, ans=0.125 +2024-09-18 09:13:21,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=412720.0, ans=0.125 +2024-09-18 09:13:21,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=412720.0, ans=0.125 +2024-09-18 09:13:35,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=412760.0, ans=0.2 +2024-09-18 09:13:42,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=412760.0, ans=0.5 +2024-09-18 09:13:44,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=412760.0, ans=0.125 +2024-09-18 09:13:47,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=412800.0, ans=0.0 +2024-09-18 09:13:48,737 INFO [train.py:1198] (1/2) Epoch 23, batch 3650, loss[loss=0.2654, ctc_loss=0.1466, cr_loss=0.4131, attn_decoder_loss=0.2694, over 29519.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1302, cr_loss=0.3718, attn_decoder_loss=0.2468, over 5796426.38 frames. ], batch size: 90, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:14:01,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.22 vs. limit=22.5 +2024-09-18 09:14:10,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.50 vs. limit=15.0 +2024-09-18 09:14:16,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.50 vs. limit=15.0 +2024-09-18 09:14:26,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=412880.0, ans=0.125 +2024-09-18 09:14:46,460 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.590e+01 8.375e+01 8.989e+01 9.606e+01 2.045e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-18 09:14:48,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=412960.0, ans=0.125 +2024-09-18 09:14:57,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=412960.0, ans=0.0 +2024-09-18 09:15:00,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.49 vs. limit=15.0 +2024-09-18 09:15:02,937 INFO [train.py:1198] (1/2) Epoch 23, batch 3700, loss[loss=0.2466, ctc_loss=0.1378, cr_loss=0.3814, attn_decoder_loss=0.2502, over 29708.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1302, cr_loss=0.3722, attn_decoder_loss=0.2469, over 5805968.35 frames. ], batch size: 84, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:15:33,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=413080.0, ans=0.125 +2024-09-18 09:15:50,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.31 vs. limit=15.0 +2024-09-18 09:16:19,396 INFO [train.py:1198] (1/2) Epoch 23, batch 3750, loss[loss=0.2159, ctc_loss=0.1121, cr_loss=0.3434, attn_decoder_loss=0.2198, over 29281.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.13, cr_loss=0.3716, attn_decoder_loss=0.2466, over 5809616.55 frames. ], batch size: 67, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:16:43,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=413240.0, ans=0.125 +2024-09-18 09:16:55,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.58 vs. limit=15.0 +2024-09-18 09:17:03,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.19 vs. limit=22.5 +2024-09-18 09:17:17,038 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.618e+01 9.156e+01 9.859e+01 5.134e+02, threshold=1.831e+02, percent-clipped=3.0 +2024-09-18 09:17:23,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=413360.0, ans=0.05 +2024-09-18 09:17:30,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=413360.0, ans=0.2 +2024-09-18 09:17:32,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=413400.0, ans=0.2 +2024-09-18 09:17:33,576 INFO [train.py:1198] (1/2) Epoch 23, batch 3800, loss[loss=0.2545, ctc_loss=0.138, cr_loss=0.3806, attn_decoder_loss=0.259, over 29608.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1301, cr_loss=0.3717, attn_decoder_loss=0.2466, over 5799251.96 frames. ], batch size: 86, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:17:35,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=413400.0, ans=0.0 +2024-09-18 09:17:45,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=413400.0, ans=0.0 +2024-09-18 09:18:11,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=413480.0, ans=0.0 +2024-09-18 09:18:26,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=413520.0, ans=0.125 +2024-09-18 09:18:33,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=413560.0, ans=0.1 +2024-09-18 09:18:35,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=413560.0, ans=0.125 +2024-09-18 09:18:48,739 INFO [train.py:1198] (1/2) Epoch 23, batch 3850, loss[loss=0.27, ctc_loss=0.1575, cr_loss=0.4341, attn_decoder_loss=0.2729, over 29333.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1304, cr_loss=0.3726, attn_decoder_loss=0.2469, over 5813826.96 frames. ], batch size: 100, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:19:02,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=413600.0, ans=0.0 +2024-09-18 09:19:12,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=413640.0, ans=0.125 +2024-09-18 09:19:15,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.01 vs. limit=15.0 +2024-09-18 09:19:25,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.23 vs. limit=22.5 +2024-09-18 09:19:46,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=413720.0, ans=0.1 +2024-09-18 09:19:46,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=413720.0, ans=15.0 +2024-09-18 09:19:48,639 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.512e+01 9.090e+01 9.629e+01 1.233e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 09:20:05,015 INFO [train.py:1198] (1/2) Epoch 23, batch 3900, loss[loss=0.2598, ctc_loss=0.1377, cr_loss=0.3842, attn_decoder_loss=0.2648, over 29625.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1309, cr_loss=0.3735, attn_decoder_loss=0.2475, over 5817715.17 frames. ], batch size: 86, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:20:09,818 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:20:23,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=413840.0, ans=0.125 +2024-09-18 09:20:24,957 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.09 vs. limit=15.0 +2024-09-18 09:20:37,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=413880.0, ans=0.0 +2024-09-18 09:20:52,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=413920.0, ans=0.1 +2024-09-18 09:20:59,339 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.74 vs. limit=22.5 +2024-09-18 09:21:03,158 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:21:08,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=413960.0, ans=0.125 +2024-09-18 09:21:10,456 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:21:11,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=413960.0, ans=0.025 +2024-09-18 09:21:19,075 INFO [train.py:1198] (1/2) Epoch 23, batch 3950, loss[loss=0.254, ctc_loss=0.1373, cr_loss=0.4042, attn_decoder_loss=0.258, over 29432.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1305, cr_loss=0.3737, attn_decoder_loss=0.2476, over 5836790.54 frames. ], batch size: 97, lr: 4.80e-03, grad_scale: 8.0 +2024-09-18 09:21:28,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=414000.0, ans=0.1 +2024-09-18 09:21:29,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=414000.0, ans=0.125 +2024-09-18 09:21:41,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=414040.0, ans=0.04949747468305833 +2024-09-18 09:21:53,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=414080.0, ans=0.1 +2024-09-18 09:21:55,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.55 vs. limit=15.0 +2024-09-18 09:21:58,459 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.05 vs. limit=22.5 +2024-09-18 09:22:03,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=414120.0, ans=0.125 +2024-09-18 09:22:09,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=414120.0, ans=0.2 +2024-09-18 09:22:18,242 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.449e+01 8.566e+01 9.101e+01 9.931e+01 2.734e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 09:22:28,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=414160.0, ans=0.1 +2024-09-18 09:22:34,490 INFO [train.py:1198] (1/2) Epoch 23, batch 4000, loss[loss=0.2329, ctc_loss=0.1215, cr_loss=0.3526, attn_decoder_loss=0.2375, over 29526.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.131, cr_loss=0.3744, attn_decoder_loss=0.248, over 5813299.79 frames. ], batch size: 74, lr: 4.79e-03, grad_scale: 16.0 +2024-09-18 09:22:52,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.77 vs. limit=12.0 +2024-09-18 09:22:57,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.18 vs. limit=15.0 +2024-09-18 09:23:17,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.86 vs. limit=10.0 +2024-09-18 09:23:21,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=414320.0, ans=0.125 +2024-09-18 09:23:48,951 INFO [train.py:1198] (1/2) Epoch 23, batch 4050, loss[loss=0.2737, ctc_loss=0.171, cr_loss=0.4251, attn_decoder_loss=0.2756, over 21030.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1309, cr_loss=0.3734, attn_decoder_loss=0.2478, over 5796910.98 frames. ], batch size: 210, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:23:56,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=414400.0, ans=0.0 +2024-09-18 09:24:03,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=414440.0, ans=0.125 +2024-09-18 09:24:08,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=414440.0, ans=0.1 +2024-09-18 09:24:49,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.682e+01 9.236e+01 9.757e+01 1.586e+02, threshold=1.847e+02, percent-clipped=0.0 +2024-09-18 09:25:03,925 INFO [train.py:1198] (1/2) Epoch 23, batch 4100, loss[loss=0.2399, ctc_loss=0.1139, cr_loss=0.3376, attn_decoder_loss=0.2464, over 29478.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1306, cr_loss=0.373, attn_decoder_loss=0.2475, over 5792170.81 frames. ], batch size: 90, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:25:08,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=414600.0, ans=0.0 +2024-09-18 09:25:14,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.74 vs. limit=15.0 +2024-09-18 09:26:09,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=414760.0, ans=0.0 +2024-09-18 09:26:10,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.30 vs. limit=10.0 +2024-09-18 09:26:18,790 INFO [train.py:1198] (1/2) Epoch 23, batch 4150, loss[loss=0.2474, ctc_loss=0.1367, cr_loss=0.3866, attn_decoder_loss=0.2512, over 29523.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1308, cr_loss=0.3738, attn_decoder_loss=0.2474, over 5798836.71 frames. ], batch size: 77, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:26:23,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=414800.0, ans=0.2 +2024-09-18 09:26:29,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=414800.0, ans=0.0 +2024-09-18 09:26:41,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=414840.0, ans=0.125 +2024-09-18 09:27:07,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=414920.0, ans=0.2 +2024-09-18 09:27:17,890 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.495e+01 8.961e+01 9.619e+01 1.585e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 09:27:32,606 INFO [train.py:1198] (1/2) Epoch 23, batch 4200, loss[loss=0.2564, ctc_loss=0.137, cr_loss=0.3915, attn_decoder_loss=0.261, over 29508.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1314, cr_loss=0.3748, attn_decoder_loss=0.2481, over 5801278.38 frames. ], batch size: 90, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:27:50,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=415040.0, ans=0.125 +2024-09-18 09:27:55,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=415040.0, ans=0.07 +2024-09-18 09:28:12,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=415080.0, ans=0.0 +2024-09-18 09:28:16,991 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:28:21,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=415120.0, ans=0.0 +2024-09-18 09:28:27,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=415120.0, ans=0.125 +2024-09-18 09:28:33,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=415160.0, ans=0.0 +2024-09-18 09:28:37,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=415160.0, ans=0.0 +2024-09-18 09:28:48,192 INFO [train.py:1198] (1/2) Epoch 23, batch 4250, loss[loss=0.2291, ctc_loss=0.1163, cr_loss=0.3406, attn_decoder_loss=0.234, over 29500.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.131, cr_loss=0.3741, attn_decoder_loss=0.2481, over 5806020.64 frames. ], batch size: 74, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:28:59,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2.whitening_limit, batch_count=415200.0, ans=15.0 +2024-09-18 09:29:17,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=415280.0, ans=0.125 +2024-09-18 09:29:45,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=415320.0, ans=0.0 +2024-09-18 09:29:47,863 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.786e+01 8.728e+01 9.274e+01 9.904e+01 2.860e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-18 09:29:50,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=415360.0, ans=0.0 +2024-09-18 09:29:52,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=415360.0, ans=0.1 +2024-09-18 09:30:00,087 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:30:00,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=415360.0, ans=0.1 +2024-09-18 09:30:02,722 INFO [train.py:1198] (1/2) Epoch 23, batch 4300, loss[loss=0.2473, ctc_loss=0.1265, cr_loss=0.379, attn_decoder_loss=0.2523, over 29545.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1307, cr_loss=0.3737, attn_decoder_loss=0.2482, over 5794157.95 frames. ], batch size: 87, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:30:54,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.15 vs. limit=12.0 +2024-09-18 09:31:09,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=415560.0, ans=0.025 +2024-09-18 09:31:16,767 INFO [train.py:1198] (1/2) Epoch 23, batch 4350, loss[loss=0.2611, ctc_loss=0.1455, cr_loss=0.4132, attn_decoder_loss=0.2648, over 29491.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1333, cr_loss=0.3788, attn_decoder_loss=0.2515, over 5797165.84 frames. ], batch size: 97, lr: 4.79e-03, grad_scale: 8.0 +2024-09-18 09:31:22,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=415600.0, ans=0.0 +2024-09-18 09:31:40,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.04 vs. limit=6.0 +2024-09-18 09:31:54,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=415680.0, ans=0.0 +2024-09-18 09:32:16,359 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.901e+01 9.212e+01 9.767e+01 1.363e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-18 09:32:29,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=415800.0, ans=0.125 +2024-09-18 09:32:31,047 INFO [train.py:1198] (1/2) Epoch 23, batch 4400, loss[loss=0.2661, ctc_loss=0.1575, cr_loss=0.4321, attn_decoder_loss=0.2686, over 27383.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.135, cr_loss=0.3817, attn_decoder_loss=0.2537, over 5766260.22 frames. ], batch size: 124, lr: 4.78e-03, grad_scale: 16.0 +2024-09-18 09:32:37,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=415800.0, ans=0.125 +2024-09-18 09:32:40,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=415800.0, ans=0.0 +2024-09-18 09:32:50,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=415840.0, ans=0.125 +2024-09-18 09:33:16,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.81 vs. limit=22.5 +2024-09-18 09:33:37,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=415960.0, ans=0.0 +2024-09-18 09:33:53,117 INFO [train.py:1198] (1/2) Epoch 23, batch 4450, loss[loss=0.2602, ctc_loss=0.1614, cr_loss=0.398, attn_decoder_loss=0.2624, over 19469.00 frames. ], tot_loss[loss=0.2521, ctc_loss=0.1392, cr_loss=0.3875, attn_decoder_loss=0.2561, over 5575429.89 frames. ], batch size: 209, lr: 4.78e-03, grad_scale: 16.0 +2024-09-18 09:34:16,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=416040.0, ans=0.125 +2024-09-18 09:34:19,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=416040.0, ans=0.125 +2024-09-18 09:34:30,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=416080.0, ans=0.1 +2024-09-18 09:34:35,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=416080.0, ans=0.125 +2024-09-18 09:34:41,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=6.66 vs. limit=10.0 +2024-09-18 09:34:43,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=416120.0, ans=0.025 +2024-09-18 09:34:45,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=416120.0, ans=0.1 +2024-09-18 09:34:49,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=416120.0, ans=0.125 +2024-09-18 09:34:55,166 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.365e+01 9.450e+01 1.070e+02 1.179e+02 4.631e+02, threshold=2.141e+02, percent-clipped=3.0 +2024-09-18 09:34:58,753 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:35:09,057 INFO [train.py:1198] (1/2) Epoch 23, batch 4500, loss[loss=0.2627, ctc_loss=0.1641, cr_loss=0.3864, attn_decoder_loss=0.2651, over 19636.00 frames. ], tot_loss[loss=0.2546, ctc_loss=0.1433, cr_loss=0.3899, attn_decoder_loss=0.2583, over 5237968.89 frames. ], batch size: 209, lr: 4.78e-03, grad_scale: 8.0 +2024-09-18 09:35:15,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=416200.0, ans=0.0 +2024-09-18 09:35:21,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=416200.0, ans=0.125 +2024-09-18 09:35:21,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=416200.0, ans=0.125 +2024-09-18 09:36:38,046 INFO [train.py:1198] (1/2) Epoch 24, batch 0, loss[loss=0.2208, ctc_loss=0.1088, cr_loss=0.3344, attn_decoder_loss=0.2259, over 29605.00 frames. ], tot_loss[loss=0.2208, ctc_loss=0.1088, cr_loss=0.3344, attn_decoder_loss=0.2259, over 29605.00 frames. ], batch size: 73, lr: 4.68e-03, grad_scale: 16.0 +2024-09-18 09:36:38,047 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 09:36:58,732 INFO [train.py:1230] (1/2) Epoch 24, validation: loss=0.2127, ctc_loss=0.03777, cr_loss=4.976e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-18 09:36:58,732 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 09:37:33,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=416380.0, ans=0.0 +2024-09-18 09:37:35,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=416380.0, ans=0.125 +2024-09-18 09:37:40,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=416380.0, ans=0.1 +2024-09-18 09:38:14,655 INFO [train.py:1198] (1/2) Epoch 24, batch 50, loss[loss=0.2263, ctc_loss=0.118, cr_loss=0.3601, attn_decoder_loss=0.2303, over 29447.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1327, cr_loss=0.3803, attn_decoder_loss=0.2489, over 1269680.50 frames. ], batch size: 70, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:38:40,701 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.838e+01 9.694e+01 1.103e+02 3.363e+02, threshold=1.939e+02, percent-clipped=1.0 +2024-09-18 09:38:42,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=416540.0, ans=0.0 +2024-09-18 09:39:10,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=416620.0, ans=0.05 +2024-09-18 09:39:30,898 INFO [train.py:1198] (1/2) Epoch 24, batch 100, loss[loss=0.222, ctc_loss=0.1115, cr_loss=0.3482, attn_decoder_loss=0.2265, over 29520.00 frames. ], tot_loss[loss=0.2468, ctc_loss=0.1336, cr_loss=0.38, attn_decoder_loss=0.251, over 2253055.54 frames. ], batch size: 76, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:39:45,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.75 vs. limit=22.5 +2024-09-18 09:39:52,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=416740.0, ans=0.2 +2024-09-18 09:40:13,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.32 vs. limit=15.0 +2024-09-18 09:40:18,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=416820.0, ans=0.125 +2024-09-18 09:40:20,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=416820.0, ans=0.07 +2024-09-18 09:40:21,042 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.18 vs. limit=22.5 +2024-09-18 09:40:24,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=416820.0, ans=0.0 +2024-09-18 09:40:24,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=416820.0, ans=0.0 +2024-09-18 09:40:50,687 INFO [train.py:1198] (1/2) Epoch 24, batch 150, loss[loss=0.2127, ctc_loss=0.1042, cr_loss=0.3237, attn_decoder_loss=0.2175, over 29436.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1308, cr_loss=0.376, attn_decoder_loss=0.2482, over 3046595.96 frames. ], batch size: 70, lr: 4.68e-03, grad_scale: 8.0 +2024-09-18 09:41:16,575 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.498e+01 9.006e+01 9.810e+01 1.466e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 09:41:25,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=416980.0, ans=0.0 +2024-09-18 09:41:37,313 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.58 vs. limit=15.0 +2024-09-18 09:42:06,293 INFO [train.py:1198] (1/2) Epoch 24, batch 200, loss[loss=0.2603, ctc_loss=0.1455, cr_loss=0.3956, attn_decoder_loss=0.2643, over 27140.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1307, cr_loss=0.3757, attn_decoder_loss=0.2477, over 3657806.87 frames. ], batch size: 125, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:42:09,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=417100.0, ans=0.125 +2024-09-18 09:42:12,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=417100.0, ans=0.1 +2024-09-18 09:42:30,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.76 vs. limit=15.0 +2024-09-18 09:42:36,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.97 vs. limit=15.0 +2024-09-18 09:43:04,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=417220.0, ans=0.2 +2024-09-18 09:43:08,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=417260.0, ans=0.125 +2024-09-18 09:43:13,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=417260.0, ans=0.125 +2024-09-18 09:43:17,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=417260.0, ans=0.0 +2024-09-18 09:43:22,130 INFO [train.py:1198] (1/2) Epoch 24, batch 250, loss[loss=0.2541, ctc_loss=0.1375, cr_loss=0.3851, attn_decoder_loss=0.2585, over 29243.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1298, cr_loss=0.3737, attn_decoder_loss=0.2473, over 4139780.76 frames. ], batch size: 100, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:43:22,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=417300.0, ans=0.07 +2024-09-18 09:43:23,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=417300.0, ans=0.025 +2024-09-18 09:43:47,872 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.833e+01 9.396e+01 1.002e+02 2.195e+02, threshold=1.879e+02, percent-clipped=2.0 +2024-09-18 09:44:11,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=417420.0, ans=0.2 +2024-09-18 09:44:16,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=417420.0, ans=0.125 +2024-09-18 09:44:20,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.47 vs. limit=22.5 +2024-09-18 09:44:26,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=417460.0, ans=0.0 +2024-09-18 09:44:36,140 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.17 vs. limit=15.0 +2024-09-18 09:44:42,567 INFO [train.py:1198] (1/2) Epoch 24, batch 300, loss[loss=0.2523, ctc_loss=0.1369, cr_loss=0.3738, attn_decoder_loss=0.2568, over 29499.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1299, cr_loss=0.3736, attn_decoder_loss=0.2472, over 4507168.44 frames. ], batch size: 92, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:45:22,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=417580.0, ans=0.1 +2024-09-18 09:45:24,884 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.07 vs. limit=15.0 +2024-09-18 09:45:31,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=417620.0, ans=0.125 +2024-09-18 09:45:39,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=417620.0, ans=0.0 +2024-09-18 09:45:43,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=417660.0, ans=0.0 +2024-09-18 09:45:51,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=417660.0, ans=0.025 +2024-09-18 09:45:54,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=417660.0, ans=0.2 +2024-09-18 09:45:58,774 INFO [train.py:1198] (1/2) Epoch 24, batch 350, loss[loss=0.2229, ctc_loss=0.1164, cr_loss=0.3445, attn_decoder_loss=0.2271, over 29313.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1308, cr_loss=0.3757, attn_decoder_loss=0.2482, over 4793702.78 frames. ], batch size: 71, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:46:05,093 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:46:13,510 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.94 vs. limit=15.0 +2024-09-18 09:46:25,834 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.494e+01 8.951e+01 9.745e+01 1.329e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 09:47:14,291 INFO [train.py:1198] (1/2) Epoch 24, batch 400, loss[loss=0.2455, ctc_loss=0.1291, cr_loss=0.3797, attn_decoder_loss=0.25, over 29693.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1306, cr_loss=0.375, attn_decoder_loss=0.2481, over 5023870.83 frames. ], batch size: 82, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:47:14,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=417900.0, ans=0.09899494936611666 +2024-09-18 09:47:22,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=417900.0, ans=0.0 +2024-09-18 09:47:28,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=417940.0, ans=0.125 +2024-09-18 09:47:36,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=417940.0, ans=0.2 +2024-09-18 09:48:03,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=418020.0, ans=0.2 +2024-09-18 09:48:05,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.57 vs. limit=15.0 +2024-09-18 09:48:31,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=418060.0, ans=0.0 +2024-09-18 09:48:35,414 INFO [train.py:1198] (1/2) Epoch 24, batch 450, loss[loss=0.2516, ctc_loss=0.1374, cr_loss=0.384, attn_decoder_loss=0.2558, over 29694.00 frames. ], tot_loss[loss=0.244, ctc_loss=0.1306, cr_loss=0.3746, attn_decoder_loss=0.2482, over 5186864.25 frames. ], batch size: 83, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:48:35,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=418100.0, ans=0.0 +2024-09-18 09:48:37,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=418100.0, ans=0.125 +2024-09-18 09:49:02,619 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.530e+01 9.135e+01 9.796e+01 4.658e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-18 09:49:30,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.65 vs. limit=15.0 +2024-09-18 09:49:42,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=418260.0, ans=0.125 +2024-09-18 09:49:50,882 INFO [train.py:1198] (1/2) Epoch 24, batch 500, loss[loss=0.2596, ctc_loss=0.1426, cr_loss=0.403, attn_decoder_loss=0.2636, over 29448.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1298, cr_loss=0.3729, attn_decoder_loss=0.2472, over 5329753.03 frames. ], batch size: 94, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:49:54,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=418300.0, ans=0.2 +2024-09-18 09:50:08,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=418340.0, ans=0.125 +2024-09-18 09:50:18,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=418340.0, ans=0.125 +2024-09-18 09:50:23,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=418380.0, ans=0.125 +2024-09-18 09:50:31,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=418380.0, ans=0.125 +2024-09-18 09:50:40,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=418420.0, ans=0.125 +2024-09-18 09:50:47,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=418420.0, ans=0.0 +2024-09-18 09:50:49,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=418420.0, ans=0.1 +2024-09-18 09:50:55,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=418460.0, ans=0.125 +2024-09-18 09:51:07,113 INFO [train.py:1198] (1/2) Epoch 24, batch 550, loss[loss=0.258, ctc_loss=0.1439, cr_loss=0.411, attn_decoder_loss=0.2615, over 28810.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.13, cr_loss=0.3735, attn_decoder_loss=0.2475, over 5422793.82 frames. ], batch size: 104, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:51:07,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.66 vs. limit=15.0 +2024-09-18 09:51:18,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=418500.0, ans=0.0 +2024-09-18 09:51:19,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=418500.0, ans=0.1 +2024-09-18 09:51:25,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=418540.0, ans=0.2 +2024-09-18 09:51:34,482 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.569e+01 9.031e+01 9.630e+01 1.358e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 09:51:34,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=418540.0, ans=0.125 +2024-09-18 09:51:39,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.52 vs. limit=12.0 +2024-09-18 09:51:48,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=418580.0, ans=0.125 +2024-09-18 09:52:27,729 INFO [train.py:1198] (1/2) Epoch 24, batch 600, loss[loss=0.2527, ctc_loss=0.1295, cr_loss=0.3756, attn_decoder_loss=0.2581, over 29267.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1301, cr_loss=0.3738, attn_decoder_loss=0.2476, over 5509527.79 frames. ], batch size: 100, lr: 4.67e-03, grad_scale: 8.0 +2024-09-18 09:52:39,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.34 vs. limit=15.0 +2024-09-18 09:52:58,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=418780.0, ans=0.07 +2024-09-18 09:53:08,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.81 vs. limit=15.0 +2024-09-18 09:53:24,095 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.72 vs. limit=15.0 +2024-09-18 09:53:26,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=418860.0, ans=0.0 +2024-09-18 09:53:40,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.58 vs. limit=6.0 +2024-09-18 09:53:42,745 INFO [train.py:1198] (1/2) Epoch 24, batch 650, loss[loss=0.2361, ctc_loss=0.1151, cr_loss=0.3482, attn_decoder_loss=0.2418, over 29776.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1287, cr_loss=0.3714, attn_decoder_loss=0.2465, over 5586691.52 frames. ], batch size: 81, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:53:52,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.12 vs. limit=10.0 +2024-09-18 09:54:10,118 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.656e+01 8.941e+01 9.589e+01 2.067e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 09:54:19,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=418980.0, ans=0.125 +2024-09-18 09:54:30,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.17 vs. limit=15.0 +2024-09-18 09:54:37,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=419020.0, ans=0.1 +2024-09-18 09:54:52,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=419060.0, ans=0.5 +2024-09-18 09:54:54,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=419060.0, ans=0.125 +2024-09-18 09:54:58,689 INFO [train.py:1198] (1/2) Epoch 24, batch 700, loss[loss=0.2224, ctc_loss=0.1076, cr_loss=0.3348, attn_decoder_loss=0.2277, over 29527.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1294, cr_loss=0.3729, attn_decoder_loss=0.2471, over 5636472.79 frames. ], batch size: 76, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:55:32,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=419180.0, ans=0.125 +2024-09-18 09:55:33,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=419180.0, ans=0.0 +2024-09-18 09:55:59,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=419260.0, ans=0.125 +2024-09-18 09:56:17,371 INFO [train.py:1198] (1/2) Epoch 24, batch 750, loss[loss=0.2385, ctc_loss=0.1174, cr_loss=0.3444, attn_decoder_loss=0.2443, over 29688.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1296, cr_loss=0.3723, attn_decoder_loss=0.2469, over 5674250.63 frames. ], batch size: 82, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:56:19,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=419300.0, ans=0.125 +2024-09-18 09:56:46,705 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.693e+01 9.112e+01 9.779e+01 2.514e+02, threshold=1.822e+02, percent-clipped=3.0 +2024-09-18 09:56:57,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=419380.0, ans=0.125 +2024-09-18 09:57:04,065 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:57:19,284 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 09:57:36,006 INFO [train.py:1198] (1/2) Epoch 24, batch 800, loss[loss=0.219, ctc_loss=0.1139, cr_loss=0.342, attn_decoder_loss=0.223, over 29600.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1291, cr_loss=0.3718, attn_decoder_loss=0.2467, over 5705834.25 frames. ], batch size: 73, lr: 4.66e-03, grad_scale: 16.0 +2024-09-18 09:57:45,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=419500.0, ans=0.2 +2024-09-18 09:57:49,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=419540.0, ans=0.125 +2024-09-18 09:58:25,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=419620.0, ans=0.125 +2024-09-18 09:58:52,065 INFO [train.py:1198] (1/2) Epoch 24, batch 850, loss[loss=0.2511, ctc_loss=0.1286, cr_loss=0.363, attn_decoder_loss=0.2566, over 29694.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.129, cr_loss=0.3711, attn_decoder_loss=0.2463, over 5734649.73 frames. ], batch size: 89, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 09:58:58,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=419700.0, ans=0.125 +2024-09-18 09:59:21,100 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.516e+01 9.029e+01 9.587e+01 2.043e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-18 09:59:51,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=419820.0, ans=0.125 +2024-09-18 09:59:54,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=419860.0, ans=0.07 +2024-09-18 10:00:01,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=419860.0, ans=0.2 +2024-09-18 10:00:09,278 INFO [train.py:1198] (1/2) Epoch 24, batch 900, loss[loss=0.2253, ctc_loss=0.1118, cr_loss=0.3447, attn_decoder_loss=0.2302, over 29578.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1293, cr_loss=0.3723, attn_decoder_loss=0.2466, over 5739197.66 frames. ], batch size: 73, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:00:24,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=419900.0, ans=0.2 +2024-09-18 10:00:30,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=419940.0, ans=0.125 +2024-09-18 10:00:41,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=419940.0, ans=0.125 +2024-09-18 10:00:44,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=419980.0, ans=0.0 +2024-09-18 10:01:15,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=420060.0, ans=0.125 +2024-09-18 10:01:17,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-18 10:01:31,751 INFO [train.py:1198] (1/2) Epoch 24, batch 950, loss[loss=0.2296, ctc_loss=0.1194, cr_loss=0.3624, attn_decoder_loss=0.2338, over 29504.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1293, cr_loss=0.3722, attn_decoder_loss=0.2469, over 5740883.39 frames. ], batch size: 74, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:01:51,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=420140.0, ans=0.05 +2024-09-18 10:02:00,914 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.680e+01 8.541e+01 9.200e+01 9.747e+01 2.326e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-18 10:02:01,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=420180.0, ans=0.0 +2024-09-18 10:02:17,142 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.05 vs. limit=15.0 +2024-09-18 10:02:19,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=420220.0, ans=0.125 +2024-09-18 10:02:24,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=420220.0, ans=0.1 +2024-09-18 10:02:26,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.27 vs. limit=15.0 +2024-09-18 10:02:27,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=420220.0, ans=0.1 +2024-09-18 10:02:37,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=420260.0, ans=0.1 +2024-09-18 10:02:38,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.09 vs. limit=15.0 +2024-09-18 10:02:47,897 INFO [train.py:1198] (1/2) Epoch 24, batch 1000, loss[loss=0.2351, ctc_loss=0.1286, cr_loss=0.3804, attn_decoder_loss=0.2385, over 29521.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1302, cr_loss=0.3733, attn_decoder_loss=0.2474, over 5734405.69 frames. ], batch size: 77, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:02:57,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=420300.0, ans=0.125 +2024-09-18 10:03:03,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=420340.0, ans=0.0 +2024-09-18 10:03:03,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=420340.0, ans=0.1 +2024-09-18 10:03:20,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=420380.0, ans=0.1 +2024-09-18 10:03:21,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=420380.0, ans=0.0 +2024-09-18 10:03:40,748 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.93 vs. limit=10.0 +2024-09-18 10:04:03,950 INFO [train.py:1198] (1/2) Epoch 24, batch 1050, loss[loss=0.2622, ctc_loss=0.1453, cr_loss=0.3923, attn_decoder_loss=0.2665, over 29696.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1302, cr_loss=0.3737, attn_decoder_loss=0.247, over 5743784.42 frames. ], batch size: 85, lr: 4.66e-03, grad_scale: 8.0 +2024-09-18 10:04:18,896 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:04:19,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.42 vs. limit=10.0 +2024-09-18 10:04:23,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=420540.0, ans=0.95 +2024-09-18 10:04:35,588 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.418e+01 8.849e+01 9.632e+01 1.961e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-18 10:04:37,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=420580.0, ans=0.125 +2024-09-18 10:04:44,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=420580.0, ans=0.125 +2024-09-18 10:05:01,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=420620.0, ans=0.1 +2024-09-18 10:05:22,853 INFO [train.py:1198] (1/2) Epoch 24, batch 1100, loss[loss=0.2368, ctc_loss=0.1206, cr_loss=0.3539, attn_decoder_loss=0.2418, over 29470.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1297, cr_loss=0.3726, attn_decoder_loss=0.2467, over 5756175.46 frames. ], batch size: 78, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:05:27,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.90 vs. limit=15.0 +2024-09-18 10:05:41,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=420740.0, ans=0.0 +2024-09-18 10:06:04,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.01 vs. limit=22.5 +2024-09-18 10:06:08,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.06 vs. limit=6.0 +2024-09-18 10:06:19,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-18 10:06:39,960 INFO [train.py:1198] (1/2) Epoch 24, batch 1150, loss[loss=0.2335, ctc_loss=0.1207, cr_loss=0.3622, attn_decoder_loss=0.2379, over 29450.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1299, cr_loss=0.3735, attn_decoder_loss=0.2467, over 5754086.85 frames. ], batch size: 78, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:06:41,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=420900.0, ans=0.2 +2024-09-18 10:06:54,190 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:06:58,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=420940.0, ans=0.125 +2024-09-18 10:07:09,196 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.376e+01 8.363e+01 8.865e+01 9.557e+01 3.982e+02, threshold=1.773e+02, percent-clipped=2.0 +2024-09-18 10:07:24,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=421020.0, ans=0.125 +2024-09-18 10:07:26,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=421020.0, ans=0.125 +2024-09-18 10:07:44,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=421060.0, ans=0.125 +2024-09-18 10:07:58,074 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.00 vs. limit=12.0 +2024-09-18 10:07:58,434 INFO [train.py:1198] (1/2) Epoch 24, batch 1200, loss[loss=0.2518, ctc_loss=0.1317, cr_loss=0.3683, attn_decoder_loss=0.2569, over 29678.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1303, cr_loss=0.3733, attn_decoder_loss=0.2476, over 5746721.04 frames. ], batch size: 85, lr: 4.65e-03, grad_scale: 16.0 +2024-09-18 10:08:01,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=421100.0, ans=10.0 +2024-09-18 10:08:20,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=421140.0, ans=0.0 +2024-09-18 10:08:33,295 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.62 vs. limit=15.0 +2024-09-18 10:08:52,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=421220.0, ans=0.2 +2024-09-18 10:09:01,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=421260.0, ans=0.125 +2024-09-18 10:09:03,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=421260.0, ans=0.125 +2024-09-18 10:09:16,338 INFO [train.py:1198] (1/2) Epoch 24, batch 1250, loss[loss=0.2678, ctc_loss=0.143, cr_loss=0.3954, attn_decoder_loss=0.2729, over 29563.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1304, cr_loss=0.374, attn_decoder_loss=0.2479, over 5774101.85 frames. ], batch size: 92, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:09:21,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=421300.0, ans=0.125 +2024-09-18 10:09:25,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=421300.0, ans=0.5 +2024-09-18 10:09:33,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421340.0, ans=0.1 +2024-09-18 10:09:46,769 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.716e+01 9.105e+01 9.689e+01 1.606e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-18 10:09:56,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.26 vs. limit=15.0 +2024-09-18 10:10:20,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=421460.0, ans=0.2 +2024-09-18 10:10:22,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=421460.0, ans=0.5 +2024-09-18 10:10:25,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.05 vs. limit=22.5 +2024-09-18 10:10:32,438 INFO [train.py:1198] (1/2) Epoch 24, batch 1300, loss[loss=0.2478, ctc_loss=0.1309, cr_loss=0.3743, attn_decoder_loss=0.2524, over 28364.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1298, cr_loss=0.3726, attn_decoder_loss=0.2469, over 5780140.03 frames. ], batch size: 112, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:11:00,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=421540.0, ans=0.035 +2024-09-18 10:11:11,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421580.0, ans=0.1 +2024-09-18 10:11:26,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=421620.0, ans=0.125 +2024-09-18 10:11:34,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=421660.0, ans=0.0 +2024-09-18 10:11:49,275 INFO [train.py:1198] (1/2) Epoch 24, batch 1350, loss[loss=0.2451, ctc_loss=0.1237, cr_loss=0.3611, attn_decoder_loss=0.2506, over 29763.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1293, cr_loss=0.3725, attn_decoder_loss=0.2468, over 5798220.99 frames. ], batch size: 81, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:11:54,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=421700.0, ans=0.125 +2024-09-18 10:12:02,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=421700.0, ans=0.2 +2024-09-18 10:12:24,087 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.522e+01 8.854e+01 9.285e+01 9.935e+01 1.189e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 10:12:27,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=421780.0, ans=0.0 +2024-09-18 10:12:45,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=421820.0, ans=0.125 +2024-09-18 10:12:51,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=421820.0, ans=0.0 +2024-09-18 10:13:02,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421860.0, ans=0.1 +2024-09-18 10:13:09,384 INFO [train.py:1198] (1/2) Epoch 24, batch 1400, loss[loss=0.2162, ctc_loss=0.1119, cr_loss=0.3179, attn_decoder_loss=0.2208, over 29559.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1289, cr_loss=0.3714, attn_decoder_loss=0.2466, over 5808672.95 frames. ], batch size: 69, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:13:15,008 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.10 vs. limit=15.0 +2024-09-18 10:13:15,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=421900.0, ans=0.1 +2024-09-18 10:13:35,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=421940.0, ans=0.125 +2024-09-18 10:14:14,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=422060.0, ans=0.125 +2024-09-18 10:14:17,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=422060.0, ans=0.125 +2024-09-18 10:14:17,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=422060.0, ans=0.025 +2024-09-18 10:14:24,943 INFO [train.py:1198] (1/2) Epoch 24, batch 1450, loss[loss=0.2607, ctc_loss=0.1497, cr_loss=0.4058, attn_decoder_loss=0.264, over 29436.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1292, cr_loss=0.3721, attn_decoder_loss=0.2472, over 5804611.02 frames. ], batch size: 94, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:14:42,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=422140.0, ans=0.125 +2024-09-18 10:14:55,267 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.177e+01 8.531e+01 9.051e+01 9.633e+01 1.306e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 10:15:23,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=422260.0, ans=0.2 +2024-09-18 10:15:40,534 INFO [train.py:1198] (1/2) Epoch 24, batch 1500, loss[loss=0.2477, ctc_loss=0.1305, cr_loss=0.3611, attn_decoder_loss=0.2527, over 29639.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1298, cr_loss=0.3734, attn_decoder_loss=0.2478, over 5805094.36 frames. ], batch size: 86, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:15:50,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=422300.0, ans=0.2 +2024-09-18 10:15:55,803 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.14 vs. limit=12.0 +2024-09-18 10:16:13,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.97 vs. limit=6.0 +2024-09-18 10:16:17,615 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:16:20,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=422380.0, ans=0.0 +2024-09-18 10:16:24,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=422380.0, ans=0.125 +2024-09-18 10:17:01,792 INFO [train.py:1198] (1/2) Epoch 24, batch 1550, loss[loss=0.268, ctc_loss=0.152, cr_loss=0.4249, attn_decoder_loss=0.2714, over 29516.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1305, cr_loss=0.3741, attn_decoder_loss=0.2479, over 5781098.13 frames. ], batch size: 90, lr: 4.65e-03, grad_scale: 8.0 +2024-09-18 10:17:15,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=422540.0, ans=0.1 +2024-09-18 10:17:32,003 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.697e+01 9.200e+01 9.648e+01 4.928e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-18 10:17:41,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=422580.0, ans=0.125 +2024-09-18 10:17:45,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=422620.0, ans=0.125 +2024-09-18 10:17:58,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=422620.0, ans=0.0 +2024-09-18 10:18:06,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=422660.0, ans=0.2 +2024-09-18 10:18:17,231 INFO [train.py:1198] (1/2) Epoch 24, batch 1600, loss[loss=0.2461, ctc_loss=0.1241, cr_loss=0.3498, attn_decoder_loss=0.2518, over 29698.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1308, cr_loss=0.3742, attn_decoder_loss=0.2479, over 5762841.10 frames. ], batch size: 85, lr: 4.64e-03, grad_scale: 16.0 +2024-09-18 10:18:35,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=422740.0, ans=0.125 +2024-09-18 10:19:15,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=422820.0, ans=0.1 +2024-09-18 10:19:25,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=422860.0, ans=0.2 +2024-09-18 10:19:25,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=422860.0, ans=0.2 +2024-09-18 10:19:35,112 INFO [train.py:1198] (1/2) Epoch 24, batch 1650, loss[loss=0.255, ctc_loss=0.1373, cr_loss=0.4148, attn_decoder_loss=0.2588, over 29698.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1307, cr_loss=0.3742, attn_decoder_loss=0.2477, over 5756359.49 frames. ], batch size: 89, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:19:35,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=422900.0, ans=0.125 +2024-09-18 10:19:51,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=422940.0, ans=0.1 +2024-09-18 10:20:03,929 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.88 vs. limit=15.0 +2024-09-18 10:20:08,930 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.940e+01 8.438e+01 9.287e+01 9.952e+01 1.595e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 10:20:23,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.15 vs. limit=10.0 +2024-09-18 10:20:30,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=423020.0, ans=0.0 +2024-09-18 10:20:32,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=423020.0, ans=0.0 +2024-09-18 10:20:39,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=423060.0, ans=0.015 +2024-09-18 10:20:45,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.71 vs. limit=22.5 +2024-09-18 10:20:52,755 INFO [train.py:1198] (1/2) Epoch 24, batch 1700, loss[loss=0.2113, ctc_loss=0.1053, cr_loss=0.3333, attn_decoder_loss=0.2156, over 29546.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1302, cr_loss=0.3736, attn_decoder_loss=0.2471, over 5779302.32 frames. ], batch size: 69, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:20:56,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=423100.0, ans=0.0 +2024-09-18 10:20:59,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.26 vs. limit=6.0 +2024-09-18 10:21:03,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=423100.0, ans=0.0 +2024-09-18 10:21:35,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=423180.0, ans=10.0 +2024-09-18 10:21:35,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=423180.0, ans=0.1 +2024-09-18 10:21:46,932 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.38 vs. limit=15.0 +2024-09-18 10:21:49,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.32 vs. limit=15.0 +2024-09-18 10:21:50,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=423220.0, ans=0.125 +2024-09-18 10:22:07,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=423300.0, ans=0.0 +2024-09-18 10:22:08,370 INFO [train.py:1198] (1/2) Epoch 24, batch 1750, loss[loss=0.2071, ctc_loss=0.1033, cr_loss=0.3146, attn_decoder_loss=0.2117, over 29373.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1297, cr_loss=0.3725, attn_decoder_loss=0.2466, over 5788099.21 frames. ], batch size: 67, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:22:13,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.55 vs. limit=12.0 +2024-09-18 10:22:33,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=423340.0, ans=0.0 +2024-09-18 10:22:40,204 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.086e+01 8.547e+01 8.974e+01 9.351e+01 1.739e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 10:22:45,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=423380.0, ans=0.125 +2024-09-18 10:23:12,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=423460.0, ans=0.0 +2024-09-18 10:23:25,962 INFO [train.py:1198] (1/2) Epoch 24, batch 1800, loss[loss=0.255, ctc_loss=0.1365, cr_loss=0.3751, attn_decoder_loss=0.2598, over 29716.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1296, cr_loss=0.3722, attn_decoder_loss=0.2468, over 5791797.05 frames. ], batch size: 83, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:23:27,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=423500.0, ans=0.125 +2024-09-18 10:24:05,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.44 vs. limit=10.0 +2024-09-18 10:24:43,915 INFO [train.py:1198] (1/2) Epoch 24, batch 1850, loss[loss=0.2484, ctc_loss=0.1313, cr_loss=0.3625, attn_decoder_loss=0.2533, over 29634.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1297, cr_loss=0.3725, attn_decoder_loss=0.2468, over 5798014.97 frames. ], batch size: 86, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:24:50,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=423700.0, ans=0.025 +2024-09-18 10:24:56,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=423700.0, ans=0.05 +2024-09-18 10:25:15,525 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.229e+01 8.631e+01 9.392e+01 8.263e+02, threshold=1.726e+02, percent-clipped=1.0 +2024-09-18 10:25:20,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=423780.0, ans=0.125 +2024-09-18 10:25:24,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.43 vs. limit=12.0 +2024-09-18 10:25:28,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=423820.0, ans=0.125 +2024-09-18 10:25:43,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=423860.0, ans=0.125 +2024-09-18 10:25:43,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.12 vs. limit=15.0 +2024-09-18 10:25:59,346 INFO [train.py:1198] (1/2) Epoch 24, batch 1900, loss[loss=0.2613, ctc_loss=0.1434, cr_loss=0.3924, attn_decoder_loss=0.2657, over 29727.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1301, cr_loss=0.3732, attn_decoder_loss=0.2475, over 5805191.85 frames. ], batch size: 89, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:25:59,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=423900.0, ans=0.2 +2024-09-18 10:26:02,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=423900.0, ans=0.0 +2024-09-18 10:26:15,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=423940.0, ans=0.125 +2024-09-18 10:26:27,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=423940.0, ans=0.0 +2024-09-18 10:26:39,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=423980.0, ans=0.125 +2024-09-18 10:26:41,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=423980.0, ans=0.125 +2024-09-18 10:27:01,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.62 vs. limit=15.0 +2024-09-18 10:27:15,265 INFO [train.py:1198] (1/2) Epoch 24, batch 1950, loss[loss=0.2405, ctc_loss=0.1225, cr_loss=0.3649, attn_decoder_loss=0.2455, over 29417.00 frames. ], tot_loss[loss=0.2443, ctc_loss=0.1307, cr_loss=0.3745, attn_decoder_loss=0.2486, over 5819917.94 frames. ], batch size: 78, lr: 4.64e-03, grad_scale: 8.0 +2024-09-18 10:27:31,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=424140.0, ans=0.125 +2024-09-18 10:27:37,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=424140.0, ans=0.125 +2024-09-18 10:27:37,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=424140.0, ans=0.0 +2024-09-18 10:27:48,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=424180.0, ans=0.0 +2024-09-18 10:27:49,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.703e+01 9.158e+01 9.577e+01 1.650e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-18 10:28:20,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=424260.0, ans=0.04949747468305833 +2024-09-18 10:28:35,221 INFO [train.py:1198] (1/2) Epoch 24, batch 2000, loss[loss=0.2224, ctc_loss=0.1135, cr_loss=0.3322, attn_decoder_loss=0.2271, over 29337.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.131, cr_loss=0.375, attn_decoder_loss=0.2488, over 5795293.81 frames. ], batch size: 67, lr: 4.64e-03, grad_scale: 16.0 +2024-09-18 10:29:18,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=424380.0, ans=0.125 +2024-09-18 10:29:43,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.14 vs. limit=6.0 +2024-09-18 10:29:47,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=424460.0, ans=0.09899494936611666 +2024-09-18 10:29:51,157 INFO [train.py:1198] (1/2) Epoch 24, batch 2050, loss[loss=0.2196, ctc_loss=0.1146, cr_loss=0.3615, attn_decoder_loss=0.2232, over 29457.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.13, cr_loss=0.3734, attn_decoder_loss=0.2476, over 5787631.79 frames. ], batch size: 70, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:29:57,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=424500.0, ans=0.2 +2024-09-18 10:30:06,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=424540.0, ans=0.0 +2024-09-18 10:30:11,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=424540.0, ans=0.025 +2024-09-18 10:30:24,690 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.469e+01 9.021e+01 9.794e+01 2.013e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 10:30:36,394 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.34 vs. limit=15.0 +2024-09-18 10:30:45,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.44 vs. limit=15.0 +2024-09-18 10:31:07,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=424700.0, ans=0.07 +2024-09-18 10:31:08,882 INFO [train.py:1198] (1/2) Epoch 24, batch 2100, loss[loss=0.2405, ctc_loss=0.1259, cr_loss=0.358, attn_decoder_loss=0.2453, over 29770.00 frames. ], tot_loss[loss=0.2429, ctc_loss=0.1296, cr_loss=0.3726, attn_decoder_loss=0.2472, over 5800025.72 frames. ], batch size: 81, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:31:16,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=424700.0, ans=0.125 +2024-09-18 10:31:42,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=424780.0, ans=0.125 +2024-09-18 10:31:55,098 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.74 vs. limit=15.0 +2024-09-18 10:32:07,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=424820.0, ans=0.125 +2024-09-18 10:32:07,649 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.12 vs. limit=15.0 +2024-09-18 10:32:24,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=424860.0, ans=0.125 +2024-09-18 10:32:26,897 INFO [train.py:1198] (1/2) Epoch 24, batch 2150, loss[loss=0.2454, ctc_loss=0.1329, cr_loss=0.375, attn_decoder_loss=0.2495, over 29444.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1287, cr_loss=0.371, attn_decoder_loss=0.2466, over 5815404.79 frames. ], batch size: 78, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:32:28,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=424900.0, ans=0.1 +2024-09-18 10:32:42,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=424940.0, ans=0.125 +2024-09-18 10:33:00,319 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.319e+01 8.375e+01 8.762e+01 9.510e+01 1.706e+02, threshold=1.752e+02, percent-clipped=0.0 +2024-09-18 10:33:10,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.41 vs. limit=15.0 +2024-09-18 10:33:13,334 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.18 vs. limit=15.0 +2024-09-18 10:33:35,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=425060.0, ans=0.0 +2024-09-18 10:33:35,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.78 vs. limit=15.0 +2024-09-18 10:33:42,619 INFO [train.py:1198] (1/2) Epoch 24, batch 2200, loss[loss=0.2424, ctc_loss=0.1291, cr_loss=0.3726, attn_decoder_loss=0.2467, over 29626.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1289, cr_loss=0.3714, attn_decoder_loss=0.2467, over 5810433.60 frames. ], batch size: 86, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:33:49,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=425100.0, ans=0.125 +2024-09-18 10:33:50,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=425100.0, ans=0.05 +2024-09-18 10:34:19,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=425180.0, ans=0.125 +2024-09-18 10:34:23,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=425180.0, ans=0.125 +2024-09-18 10:34:28,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=425220.0, ans=0.0 +2024-09-18 10:34:44,312 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=10.00 vs. limit=12.0 +2024-09-18 10:34:45,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=425260.0, ans=0.0 +2024-09-18 10:34:50,313 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.22 vs. limit=12.0 +2024-09-18 10:34:58,661 INFO [train.py:1198] (1/2) Epoch 24, batch 2250, loss[loss=0.2442, ctc_loss=0.1286, cr_loss=0.3822, attn_decoder_loss=0.2485, over 29714.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.129, cr_loss=0.3719, attn_decoder_loss=0.2469, over 5810122.47 frames. ], batch size: 82, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:35:17,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=425340.0, ans=0.2 +2024-09-18 10:35:31,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=425380.0, ans=0.125 +2024-09-18 10:35:33,888 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.517e+01 9.003e+01 9.651e+01 2.176e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-18 10:35:47,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=425420.0, ans=0.0 +2024-09-18 10:35:51,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=425420.0, ans=0.0 +2024-09-18 10:36:18,253 INFO [train.py:1198] (1/2) Epoch 24, batch 2300, loss[loss=0.2231, ctc_loss=0.1151, cr_loss=0.3465, attn_decoder_loss=0.2274, over 29329.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1287, cr_loss=0.3704, attn_decoder_loss=0.246, over 5798521.93 frames. ], batch size: 71, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:36:18,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=425500.0, ans=0.2 +2024-09-18 10:37:06,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=425620.0, ans=0.09899494936611666 +2024-09-18 10:37:09,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=425620.0, ans=0.0 +2024-09-18 10:37:12,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.59 vs. limit=15.0 +2024-09-18 10:37:34,648 INFO [train.py:1198] (1/2) Epoch 24, batch 2350, loss[loss=0.2447, ctc_loss=0.128, cr_loss=0.3704, attn_decoder_loss=0.2494, over 29688.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1289, cr_loss=0.371, attn_decoder_loss=0.2463, over 5804547.36 frames. ], batch size: 83, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:38:07,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.350e+01 8.476e+01 9.011e+01 9.684e+01 2.166e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 10:38:09,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=425780.0, ans=0.2 +2024-09-18 10:38:26,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=425820.0, ans=0.125 +2024-09-18 10:38:35,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=425860.0, ans=0.025 +2024-09-18 10:38:50,535 INFO [train.py:1198] (1/2) Epoch 24, batch 2400, loss[loss=0.2289, ctc_loss=0.1172, cr_loss=0.3458, attn_decoder_loss=0.2336, over 29540.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1291, cr_loss=0.3715, attn_decoder_loss=0.2466, over 5807727.24 frames. ], batch size: 76, lr: 4.63e-03, grad_scale: 16.0 +2024-09-18 10:38:57,827 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-18 10:39:07,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=425940.0, ans=0.125 +2024-09-18 10:39:15,721 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:40:09,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.70 vs. limit=15.0 +2024-09-18 10:40:10,568 INFO [train.py:1198] (1/2) Epoch 24, batch 2450, loss[loss=0.2428, ctc_loss=0.1269, cr_loss=0.3869, attn_decoder_loss=0.2471, over 29730.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.13, cr_loss=0.3729, attn_decoder_loss=0.2476, over 5784836.66 frames. ], batch size: 82, lr: 4.63e-03, grad_scale: 8.0 +2024-09-18 10:40:12,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=426100.0, ans=0.2 +2024-09-18 10:40:33,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=426140.0, ans=0.95 +2024-09-18 10:40:35,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=426140.0, ans=0.125 +2024-09-18 10:40:37,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=12.0 +2024-09-18 10:40:45,374 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.473e+01 8.992e+01 9.865e+01 1.103e+02 3.120e+02, threshold=1.973e+02, percent-clipped=1.0 +2024-09-18 10:40:59,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=426220.0, ans=0.1 +2024-09-18 10:41:02,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=426220.0, ans=0.1 +2024-09-18 10:41:26,504 INFO [train.py:1198] (1/2) Epoch 24, batch 2500, loss[loss=0.2492, ctc_loss=0.1346, cr_loss=0.3784, attn_decoder_loss=0.2535, over 29645.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1298, cr_loss=0.3729, attn_decoder_loss=0.2474, over 5795113.19 frames. ], batch size: 86, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:41:46,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=426340.0, ans=0.1 +2024-09-18 10:41:54,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=426340.0, ans=0.0 +2024-09-18 10:41:58,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=426380.0, ans=0.125 +2024-09-18 10:42:11,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=426420.0, ans=0.0 +2024-09-18 10:42:35,785 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:42:45,155 INFO [train.py:1198] (1/2) Epoch 24, batch 2550, loss[loss=0.2286, ctc_loss=0.1221, cr_loss=0.3697, attn_decoder_loss=0.2322, over 29334.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1297, cr_loss=0.3729, attn_decoder_loss=0.2474, over 5799323.89 frames. ], batch size: 67, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:43:06,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=426540.0, ans=0.125 +2024-09-18 10:43:19,600 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.669e+01 9.245e+01 9.655e+01 1.436e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-18 10:43:26,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=426580.0, ans=0.125 +2024-09-18 10:44:03,063 INFO [train.py:1198] (1/2) Epoch 24, batch 2600, loss[loss=0.2345, ctc_loss=0.1272, cr_loss=0.3633, attn_decoder_loss=0.2384, over 29432.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1301, cr_loss=0.374, attn_decoder_loss=0.2479, over 5795871.30 frames. ], batch size: 78, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:44:06,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.55 vs. limit=22.5 +2024-09-18 10:44:12,672 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.80 vs. limit=15.0 +2024-09-18 10:44:21,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=426740.0, ans=0.125 +2024-09-18 10:44:21,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=426740.0, ans=0.125 +2024-09-18 10:44:27,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=426740.0, ans=0.125 +2024-09-18 10:44:31,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=426780.0, ans=0.025 +2024-09-18 10:44:37,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=426780.0, ans=0.125 +2024-09-18 10:45:11,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=426860.0, ans=15.0 +2024-09-18 10:45:18,306 INFO [train.py:1198] (1/2) Epoch 24, batch 2650, loss[loss=0.2607, ctc_loss=0.1411, cr_loss=0.4122, attn_decoder_loss=0.2648, over 29301.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1297, cr_loss=0.3736, attn_decoder_loss=0.2479, over 5801789.58 frames. ], batch size: 100, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:45:53,134 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.692e+01 8.422e+01 8.884e+01 9.489e+01 2.051e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-18 10:46:05,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=427020.0, ans=0.125 +2024-09-18 10:46:23,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=427060.0, ans=0.125 +2024-09-18 10:46:35,896 INFO [train.py:1198] (1/2) Epoch 24, batch 2700, loss[loss=0.2424, ctc_loss=0.1243, cr_loss=0.3477, attn_decoder_loss=0.2478, over 29504.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1302, cr_loss=0.374, attn_decoder_loss=0.2479, over 5796766.64 frames. ], batch size: 87, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:46:45,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.44 vs. limit=15.0 +2024-09-18 10:46:46,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=427100.0, ans=0.125 +2024-09-18 10:46:48,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=427100.0, ans=0.125 +2024-09-18 10:46:58,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=427140.0, ans=0.0 +2024-09-18 10:47:07,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=427180.0, ans=0.0 +2024-09-18 10:47:07,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=427180.0, ans=0.125 +2024-09-18 10:47:41,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=427260.0, ans=0.0 +2024-09-18 10:47:48,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=427260.0, ans=0.2 +2024-09-18 10:47:54,350 INFO [train.py:1198] (1/2) Epoch 24, batch 2750, loss[loss=0.2367, ctc_loss=0.1305, cr_loss=0.3819, attn_decoder_loss=0.24, over 29527.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1298, cr_loss=0.373, attn_decoder_loss=0.247, over 5796191.12 frames. ], batch size: 75, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:48:08,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=427340.0, ans=0.1 +2024-09-18 10:48:23,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=427380.0, ans=0.025 +2024-09-18 10:48:28,941 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.612e+01 9.140e+01 9.786e+01 3.109e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 10:48:38,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=427420.0, ans=0.05 +2024-09-18 10:48:44,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=427420.0, ans=0.95 +2024-09-18 10:48:55,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=427460.0, ans=0.2 +2024-09-18 10:49:00,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.94 vs. limit=12.0 +2024-09-18 10:49:10,153 INFO [train.py:1198] (1/2) Epoch 24, batch 2800, loss[loss=0.2711, ctc_loss=0.1654, cr_loss=0.4066, attn_decoder_loss=0.2738, over 20653.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1303, cr_loss=0.3742, attn_decoder_loss=0.2474, over 5778468.14 frames. ], batch size: 210, lr: 4.62e-03, grad_scale: 16.0 +2024-09-18 10:49:11,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=427500.0, ans=0.04949747468305833 +2024-09-18 10:49:39,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=427580.0, ans=0.025 +2024-09-18 10:49:50,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.68 vs. limit=15.0 +2024-09-18 10:49:54,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=427620.0, ans=0.07 +2024-09-18 10:50:27,671 INFO [train.py:1198] (1/2) Epoch 24, batch 2850, loss[loss=0.2306, ctc_loss=0.12, cr_loss=0.3408, attn_decoder_loss=0.2353, over 29515.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1302, cr_loss=0.3736, attn_decoder_loss=0.2474, over 5761380.07 frames. ], batch size: 77, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:50:29,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=427700.0, ans=0.125 +2024-09-18 10:50:32,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=427700.0, ans=0.0 +2024-09-18 10:50:43,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=427740.0, ans=0.2 +2024-09-18 10:50:48,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.64 vs. limit=15.0 +2024-09-18 10:51:04,123 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.609e+01 8.759e+01 9.407e+01 9.943e+01 3.710e+02, threshold=1.881e+02, percent-clipped=1.0 +2024-09-18 10:51:10,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=427780.0, ans=0.1 +2024-09-18 10:51:19,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=427820.0, ans=0.0 +2024-09-18 10:51:23,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=427820.0, ans=0.0 +2024-09-18 10:51:33,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.97 vs. limit=12.0 +2024-09-18 10:51:44,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=427900.0, ans=0.0 +2024-09-18 10:51:45,762 INFO [train.py:1198] (1/2) Epoch 24, batch 2900, loss[loss=0.2457, ctc_loss=0.134, cr_loss=0.395, attn_decoder_loss=0.2493, over 29441.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.1314, cr_loss=0.3766, attn_decoder_loss=0.249, over 5787365.47 frames. ], batch size: 79, lr: 4.62e-03, grad_scale: 8.0 +2024-09-18 10:52:00,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=427940.0, ans=0.0 +2024-09-18 10:52:07,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=427940.0, ans=0.1 +2024-09-18 10:52:31,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=428020.0, ans=0.125 +2024-09-18 10:52:36,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.27 vs. limit=15.0 +2024-09-18 10:52:51,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.37 vs. limit=15.0 +2024-09-18 10:52:51,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=428060.0, ans=0.025 +2024-09-18 10:53:02,270 INFO [train.py:1198] (1/2) Epoch 24, batch 2950, loss[loss=0.2274, ctc_loss=0.115, cr_loss=0.3599, attn_decoder_loss=0.2319, over 29525.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1305, cr_loss=0.3744, attn_decoder_loss=0.2475, over 5781612.25 frames. ], batch size: 75, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:53:28,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=428140.0, ans=0.0 +2024-09-18 10:53:38,592 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.398e+01 8.942e+01 9.654e+01 3.446e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 10:54:06,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=428260.0, ans=0.125 +2024-09-18 10:54:08,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=428260.0, ans=0.125 +2024-09-18 10:54:20,523 INFO [train.py:1198] (1/2) Epoch 24, batch 3000, loss[loss=0.2414, ctc_loss=0.127, cr_loss=0.3707, attn_decoder_loss=0.2459, over 29757.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1302, cr_loss=0.3736, attn_decoder_loss=0.2473, over 5782653.75 frames. ], batch size: 81, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:54:20,524 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 10:54:23,362 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.1916, 3.5015, 3.5165, 3.6745], device='cuda:1') +2024-09-18 10:54:24,927 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([2.6869, 2.2879, 1.9646, 2.2364, 2.2940, 1.4754, 1.9805, 2.0655], + device='cuda:1') +2024-09-18 10:54:38,998 INFO [train.py:1230] (1/2) Epoch 24, validation: loss=0.2118, ctc_loss=0.03891, cr_loss=5.525e-15, attn_decoder_loss=0.231, over 944034.00 frames. +2024-09-18 10:54:38,998 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 10:54:55,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.71 vs. limit=22.5 +2024-09-18 10:54:58,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=428340.0, ans=0.125 +2024-09-18 10:54:58,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_na.min_abs, batch_count=428340.0, ans=0.02 +2024-09-18 10:55:02,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=428340.0, ans=0.1 +2024-09-18 10:55:09,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=428380.0, ans=0.125 +2024-09-18 10:55:20,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=428380.0, ans=0.125 +2024-09-18 10:55:34,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=428420.0, ans=0.0 +2024-09-18 10:55:49,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=428460.0, ans=0.2 +2024-09-18 10:55:57,315 INFO [train.py:1198] (1/2) Epoch 24, batch 3050, loss[loss=0.2301, ctc_loss=0.116, cr_loss=0.3515, attn_decoder_loss=0.235, over 29510.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1303, cr_loss=0.3736, attn_decoder_loss=0.2477, over 5776421.50 frames. ], batch size: 76, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:56:11,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=428540.0, ans=0.0 +2024-09-18 10:56:15,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=428540.0, ans=0.125 +2024-09-18 10:56:22,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.15 vs. limit=15.0 +2024-09-18 10:56:30,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=428580.0, ans=0.2 +2024-09-18 10:56:32,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=428580.0, ans=0.1 +2024-09-18 10:56:33,589 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.657e+01 9.220e+01 9.690e+01 1.587e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-18 10:56:46,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=428620.0, ans=0.125 +2024-09-18 10:56:47,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=428620.0, ans=0.125 +2024-09-18 10:56:53,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=428620.0, ans=0.0 +2024-09-18 10:57:05,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=428660.0, ans=0.0 +2024-09-18 10:57:10,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=428660.0, ans=0.125 +2024-09-18 10:57:13,016 INFO [train.py:1198] (1/2) Epoch 24, batch 3100, loss[loss=0.2546, ctc_loss=0.1382, cr_loss=0.3933, attn_decoder_loss=0.2588, over 29251.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1296, cr_loss=0.3723, attn_decoder_loss=0.2473, over 5775578.72 frames. ], batch size: 100, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:57:14,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=428700.0, ans=0.125 +2024-09-18 10:57:42,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=428780.0, ans=0.125 +2024-09-18 10:58:22,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=428860.0, ans=0.025 +2024-09-18 10:58:31,274 INFO [train.py:1198] (1/2) Epoch 24, batch 3150, loss[loss=0.2594, ctc_loss=0.141, cr_loss=0.4038, attn_decoder_loss=0.2636, over 28756.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1295, cr_loss=0.3717, attn_decoder_loss=0.2474, over 5781780.04 frames. ], batch size: 104, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 10:58:31,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=428900.0, ans=10.0 +2024-09-18 10:58:36,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=428900.0, ans=0.2 +2024-09-18 10:58:36,324 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 10:59:00,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=428980.0, ans=0.2 +2024-09-18 10:59:07,547 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.611e+01 9.043e+01 9.612e+01 2.237e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 10:59:07,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=428980.0, ans=0.125 +2024-09-18 10:59:18,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=429020.0, ans=0.125 +2024-09-18 10:59:36,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.60 vs. limit=22.5 +2024-09-18 10:59:49,146 INFO [train.py:1198] (1/2) Epoch 24, batch 3200, loss[loss=0.2431, ctc_loss=0.1299, cr_loss=0.3688, attn_decoder_loss=0.2475, over 29435.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1294, cr_loss=0.3712, attn_decoder_loss=0.2469, over 5791831.69 frames. ], batch size: 79, lr: 4.61e-03, grad_scale: 16.0 +2024-09-18 10:59:55,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=429100.0, ans=0.125 +2024-09-18 11:00:01,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=429100.0, ans=0.125 +2024-09-18 11:00:04,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=429140.0, ans=0.125 +2024-09-18 11:00:18,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=429180.0, ans=0.0 +2024-09-18 11:00:27,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=429180.0, ans=0.125 +2024-09-18 11:00:28,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=429180.0, ans=0.04949747468305833 +2024-09-18 11:00:30,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.92 vs. limit=15.0 +2024-09-18 11:00:31,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=429180.0, ans=0.025 +2024-09-18 11:00:36,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=429220.0, ans=0.0 +2024-09-18 11:00:44,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=429220.0, ans=0.125 +2024-09-18 11:00:44,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.02 vs. limit=10.0 +2024-09-18 11:00:56,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=429260.0, ans=0.025 +2024-09-18 11:00:59,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=429260.0, ans=0.0 +2024-09-18 11:01:01,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.24 vs. limit=22.5 +2024-09-18 11:01:04,859 INFO [train.py:1198] (1/2) Epoch 24, batch 3250, loss[loss=0.2552, ctc_loss=0.1434, cr_loss=0.4049, attn_decoder_loss=0.2586, over 29715.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1301, cr_loss=0.3727, attn_decoder_loss=0.2476, over 5797951.18 frames. ], batch size: 84, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:01:26,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.73 vs. limit=15.0 +2024-09-18 11:01:42,457 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.459e+01 8.528e+01 8.996e+01 9.575e+01 1.279e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 11:01:45,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=429380.0, ans=0.0 +2024-09-18 11:01:56,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.45 vs. limit=15.0 +2024-09-18 11:02:22,368 INFO [train.py:1198] (1/2) Epoch 24, batch 3300, loss[loss=0.2439, ctc_loss=0.1261, cr_loss=0.372, attn_decoder_loss=0.2487, over 28413.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1294, cr_loss=0.3716, attn_decoder_loss=0.2465, over 5795114.10 frames. ], batch size: 111, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:02:29,553 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.36 vs. limit=6.0 +2024-09-18 11:02:46,104 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.19 vs. limit=15.0 +2024-09-18 11:02:50,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.18 vs. limit=6.0 +2024-09-18 11:02:53,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=429580.0, ans=0.025 +2024-09-18 11:03:02,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=429580.0, ans=0.025 +2024-09-18 11:03:03,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=429580.0, ans=0.1 +2024-09-18 11:03:04,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=429580.0, ans=0.125 +2024-09-18 11:03:22,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=429620.0, ans=0.2 +2024-09-18 11:03:27,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=429660.0, ans=0.125 +2024-09-18 11:03:40,494 INFO [train.py:1198] (1/2) Epoch 24, batch 3350, loss[loss=0.2654, ctc_loss=0.1415, cr_loss=0.3975, attn_decoder_loss=0.2703, over 28816.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1304, cr_loss=0.374, attn_decoder_loss=0.2476, over 5773207.39 frames. ], batch size: 104, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:04:18,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=429780.0, ans=6.0 +2024-09-18 11:04:18,810 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.492e+01 9.207e+01 9.979e+01 1.773e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-18 11:04:19,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=429780.0, ans=0.125 +2024-09-18 11:04:23,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=429780.0, ans=0.1 +2024-09-18 11:04:37,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=429820.0, ans=0.0 +2024-09-18 11:04:43,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=429860.0, ans=0.125 +2024-09-18 11:04:56,630 INFO [train.py:1198] (1/2) Epoch 24, batch 3400, loss[loss=0.2237, ctc_loss=0.118, cr_loss=0.3483, attn_decoder_loss=0.2277, over 29373.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1305, cr_loss=0.3741, attn_decoder_loss=0.2475, over 5765806.24 frames. ], batch size: 67, lr: 4.61e-03, grad_scale: 8.0 +2024-09-18 11:04:56,949 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:05:04,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=429900.0, ans=0.0 +2024-09-18 11:05:10,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=429940.0, ans=0.125 +2024-09-18 11:05:19,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=429940.0, ans=0.1 +2024-09-18 11:05:23,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.99 vs. limit=15.0 +2024-09-18 11:05:28,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=429980.0, ans=0.2 +2024-09-18 11:05:35,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=429980.0, ans=0.125 +2024-09-18 11:05:46,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=430020.0, ans=0.125 +2024-09-18 11:05:46,611 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-18 11:06:01,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=430060.0, ans=0.1 +2024-09-18 11:06:04,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=430060.0, ans=0.09899494936611666 +2024-09-18 11:06:14,374 INFO [train.py:1198] (1/2) Epoch 24, batch 3450, loss[loss=0.2492, ctc_loss=0.1246, cr_loss=0.3765, attn_decoder_loss=0.2547, over 28225.00 frames. ], tot_loss[loss=0.2436, ctc_loss=0.1305, cr_loss=0.3748, attn_decoder_loss=0.2478, over 5773975.42 frames. ], batch size: 111, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:06:31,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=430140.0, ans=0.2 +2024-09-18 11:06:43,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.49 vs. limit=22.5 +2024-09-18 11:06:51,985 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.937e+01 8.449e+01 8.954e+01 9.468e+01 1.386e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 11:07:05,485 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.26 vs. limit=6.0 +2024-09-18 11:07:32,350 INFO [train.py:1198] (1/2) Epoch 24, batch 3500, loss[loss=0.219, ctc_loss=0.1138, cr_loss=0.3422, attn_decoder_loss=0.2231, over 29318.00 frames. ], tot_loss[loss=0.2431, ctc_loss=0.1304, cr_loss=0.374, attn_decoder_loss=0.2473, over 5776149.67 frames. ], batch size: 71, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:07:35,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=430300.0, ans=0.125 +2024-09-18 11:07:37,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=430300.0, ans=0.125 +2024-09-18 11:07:43,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=430300.0, ans=0.2 +2024-09-18 11:07:51,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.21 vs. limit=22.5 +2024-09-18 11:07:52,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=430340.0, ans=0.125 +2024-09-18 11:08:02,947 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.69 vs. limit=6.0 +2024-09-18 11:08:07,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.28 vs. limit=22.5 +2024-09-18 11:08:19,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=430420.0, ans=0.025 +2024-09-18 11:08:26,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=430420.0, ans=0.125 +2024-09-18 11:08:37,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=430460.0, ans=0.125 +2024-09-18 11:08:47,394 INFO [train.py:1198] (1/2) Epoch 24, batch 3550, loss[loss=0.2531, ctc_loss=0.1275, cr_loss=0.3726, attn_decoder_loss=0.2588, over 29698.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1297, cr_loss=0.3726, attn_decoder_loss=0.247, over 5781032.38 frames. ], batch size: 89, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:09:03,092 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.89 vs. limit=15.0 +2024-09-18 11:09:06,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=430540.0, ans=0.0 +2024-09-18 11:09:24,141 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.484e+01 9.073e+01 9.801e+01 1.561e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 11:09:29,388 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.63 vs. limit=15.0 +2024-09-18 11:09:45,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=430660.0, ans=0.0 +2024-09-18 11:10:01,177 INFO [train.py:1198] (1/2) Epoch 24, batch 3600, loss[loss=0.2302, ctc_loss=0.1179, cr_loss=0.3473, attn_decoder_loss=0.235, over 29466.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1293, cr_loss=0.3716, attn_decoder_loss=0.2471, over 5790921.72 frames. ], batch size: 77, lr: 4.60e-03, grad_scale: 16.0 +2024-09-18 11:10:01,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=430700.0, ans=0.05 +2024-09-18 11:10:10,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=430700.0, ans=0.125 +2024-09-18 11:10:23,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=430740.0, ans=0.0 +2024-09-18 11:10:24,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=430740.0, ans=22.5 +2024-09-18 11:10:33,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff3.min_abs, batch_count=430780.0, ans=0.2 +2024-09-18 11:10:43,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=430780.0, ans=0.025 +2024-09-18 11:10:47,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.50 vs. limit=5.0 +2024-09-18 11:10:52,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=430820.0, ans=0.2 +2024-09-18 11:10:54,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=430820.0, ans=0.125 +2024-09-18 11:10:55,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=430820.0, ans=0.125 +2024-09-18 11:11:01,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=430860.0, ans=0.125 +2024-09-18 11:11:07,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=430860.0, ans=0.0 +2024-09-18 11:11:11,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=430860.0, ans=0.0 +2024-09-18 11:11:17,342 INFO [train.py:1198] (1/2) Epoch 24, batch 3650, loss[loss=0.2628, ctc_loss=0.1522, cr_loss=0.4154, attn_decoder_loss=0.2659, over 29480.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1288, cr_loss=0.3705, attn_decoder_loss=0.2465, over 5793097.12 frames. ], batch size: 90, lr: 4.60e-03, grad_scale: 16.0 +2024-09-18 11:11:22,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.11 vs. limit=22.5 +2024-09-18 11:11:26,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=430900.0, ans=0.125 +2024-09-18 11:11:43,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-18 11:11:49,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=430980.0, ans=0.1 +2024-09-18 11:11:56,435 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.267e+01 8.409e+01 9.046e+01 9.842e+01 1.750e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 11:11:58,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=430980.0, ans=0.125 +2024-09-18 11:12:32,072 INFO [train.py:1198] (1/2) Epoch 24, batch 3700, loss[loss=0.2459, ctc_loss=0.127, cr_loss=0.3674, attn_decoder_loss=0.251, over 29695.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1288, cr_loss=0.371, attn_decoder_loss=0.2466, over 5802942.00 frames. ], batch size: 84, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:12:32,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=431100.0, ans=0.025 +2024-09-18 11:13:07,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.55 vs. limit=15.0 +2024-09-18 11:13:44,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.whiten.whitening_limit, batch_count=431260.0, ans=12.0 +2024-09-18 11:13:48,724 INFO [train.py:1198] (1/2) Epoch 24, batch 3750, loss[loss=0.2196, ctc_loss=0.1141, cr_loss=0.3517, attn_decoder_loss=0.2235, over 29376.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1288, cr_loss=0.3712, attn_decoder_loss=0.2465, over 5806875.66 frames. ], batch size: 67, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:13:56,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=431300.0, ans=0.0 +2024-09-18 11:14:03,270 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=7.97 vs. limit=22.5 +2024-09-18 11:14:09,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=431340.0, ans=0.125 +2024-09-18 11:14:09,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=431340.0, ans=0.125 +2024-09-18 11:14:24,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=431380.0, ans=0.95 +2024-09-18 11:14:27,338 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.117e+01 8.348e+01 8.770e+01 9.473e+01 2.105e+02, threshold=1.754e+02, percent-clipped=1.0 +2024-09-18 11:14:29,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=431380.0, ans=0.0 +2024-09-18 11:14:51,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=431460.0, ans=0.125 +2024-09-18 11:15:03,202 INFO [train.py:1198] (1/2) Epoch 24, batch 3800, loss[loss=0.256, ctc_loss=0.1385, cr_loss=0.3968, attn_decoder_loss=0.2602, over 29625.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1284, cr_loss=0.3702, attn_decoder_loss=0.2461, over 5798659.79 frames. ], batch size: 86, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:15:26,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=431540.0, ans=0.125 +2024-09-18 11:15:39,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=431580.0, ans=0.025 +2024-09-18 11:15:39,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=431580.0, ans=0.0 +2024-09-18 11:16:07,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=431660.0, ans=0.125 +2024-09-18 11:16:19,239 INFO [train.py:1198] (1/2) Epoch 24, batch 3850, loss[loss=0.253, ctc_loss=0.1344, cr_loss=0.39, attn_decoder_loss=0.2575, over 29300.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1278, cr_loss=0.3696, attn_decoder_loss=0.2457, over 5811703.34 frames. ], batch size: 100, lr: 4.60e-03, grad_scale: 8.0 +2024-09-18 11:16:20,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=431700.0, ans=0.5 +2024-09-18 11:16:23,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=431700.0, ans=0.125 +2024-09-18 11:16:30,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-18 11:16:31,190 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:16:50,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=431780.0, ans=0.1 +2024-09-18 11:16:57,866 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.248e+01 8.427e+01 9.024e+01 9.626e+01 1.408e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-18 11:16:58,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=431780.0, ans=0.09899494936611666 +2024-09-18 11:17:18,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=431860.0, ans=0.125 +2024-09-18 11:17:21,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=431860.0, ans=0.2 +2024-09-18 11:17:33,515 INFO [train.py:1198] (1/2) Epoch 24, batch 3900, loss[loss=0.2595, ctc_loss=0.138, cr_loss=0.3959, attn_decoder_loss=0.2642, over 29626.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1282, cr_loss=0.3707, attn_decoder_loss=0.2461, over 5816358.76 frames. ], batch size: 86, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:17:45,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=431900.0, ans=0.0 +2024-09-18 11:17:48,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=431940.0, ans=0.125 +2024-09-18 11:17:54,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=431940.0, ans=0.125 +2024-09-18 11:17:57,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=431940.0, ans=0.2 +2024-09-18 11:18:03,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=431980.0, ans=0.125 +2024-09-18 11:18:43,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=432060.0, ans=0.0 +2024-09-18 11:18:55,357 INFO [train.py:1198] (1/2) Epoch 24, batch 3950, loss[loss=0.2511, ctc_loss=0.1317, cr_loss=0.384, attn_decoder_loss=0.2558, over 29449.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1275, cr_loss=0.3695, attn_decoder_loss=0.246, over 5835801.72 frames. ], batch size: 97, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:18:56,098 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.69 vs. limit=15.0 +2024-09-18 11:18:57,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=432100.0, ans=0.125 +2024-09-18 11:19:00,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=432100.0, ans=0.2 +2024-09-18 11:19:06,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=432100.0, ans=0.1 +2024-09-18 11:19:30,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.43 vs. limit=15.0 +2024-09-18 11:19:35,350 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.178e+01 8.348e+01 8.902e+01 9.353e+01 3.258e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 11:19:37,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=432180.0, ans=0.125 +2024-09-18 11:19:38,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=432180.0, ans=0.0 +2024-09-18 11:19:47,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=432220.0, ans=0.2 +2024-09-18 11:20:00,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=432260.0, ans=0.125 +2024-09-18 11:20:09,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=432300.0, ans=0.0 +2024-09-18 11:20:10,532 INFO [train.py:1198] (1/2) Epoch 24, batch 4000, loss[loss=0.22, ctc_loss=0.1086, cr_loss=0.3315, attn_decoder_loss=0.225, over 29520.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1278, cr_loss=0.37, attn_decoder_loss=0.2461, over 5813650.97 frames. ], batch size: 74, lr: 4.59e-03, grad_scale: 16.0 +2024-09-18 11:20:28,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=432340.0, ans=0.1 +2024-09-18 11:20:34,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=432340.0, ans=0.125 +2024-09-18 11:20:43,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=432380.0, ans=0.2 +2024-09-18 11:20:43,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=432380.0, ans=0.125 +2024-09-18 11:21:05,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=432420.0, ans=0.5 +2024-09-18 11:21:10,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=432460.0, ans=0.125 +2024-09-18 11:21:25,731 INFO [train.py:1198] (1/2) Epoch 24, batch 4050, loss[loss=0.265, ctc_loss=0.1635, cr_loss=0.3888, attn_decoder_loss=0.2676, over 19825.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1279, cr_loss=0.3693, attn_decoder_loss=0.2462, over 5797268.41 frames. ], batch size: 210, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:21:26,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=432500.0, ans=0.0 +2024-09-18 11:21:26,866 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=9.04 vs. limit=15.0 +2024-09-18 11:21:38,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.27 vs. limit=15.0 +2024-09-18 11:21:56,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=432580.0, ans=0.2 +2024-09-18 11:22:05,480 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.379e+01 9.029e+01 9.565e+01 1.787e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 11:22:26,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=432660.0, ans=0.1 +2024-09-18 11:22:39,426 INFO [train.py:1198] (1/2) Epoch 24, batch 4100, loss[loss=0.2612, ctc_loss=0.1463, cr_loss=0.4071, attn_decoder_loss=0.2649, over 29515.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1285, cr_loss=0.3703, attn_decoder_loss=0.2465, over 5792973.60 frames. ], batch size: 90, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:22:44,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=432700.0, ans=0.1 +2024-09-18 11:22:45,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=432700.0, ans=0.125 +2024-09-18 11:22:58,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=432740.0, ans=0.125 +2024-09-18 11:23:10,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=432780.0, ans=0.125 +2024-09-18 11:23:38,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=432860.0, ans=0.1 +2024-09-18 11:23:54,710 INFO [train.py:1198] (1/2) Epoch 24, batch 4150, loss[loss=0.2363, ctc_loss=0.1317, cr_loss=0.3711, attn_decoder_loss=0.2397, over 29508.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1286, cr_loss=0.3703, attn_decoder_loss=0.2463, over 5798770.20 frames. ], batch size: 77, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:24:28,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=432980.0, ans=0.1 +2024-09-18 11:24:34,390 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.403e+01 8.706e+01 9.310e+01 1.000e+02 1.548e+02, threshold=1.862e+02, percent-clipped=0.0 +2024-09-18 11:24:37,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=433020.0, ans=0.0 +2024-09-18 11:24:40,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=433020.0, ans=0.0 +2024-09-18 11:24:58,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=433060.0, ans=0.125 +2024-09-18 11:24:59,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.72 vs. limit=15.0 +2024-09-18 11:25:05,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=433060.0, ans=0.0 +2024-09-18 11:25:08,588 INFO [train.py:1198] (1/2) Epoch 24, batch 4200, loss[loss=0.2627, ctc_loss=0.1491, cr_loss=0.4055, attn_decoder_loss=0.2663, over 29537.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1289, cr_loss=0.3711, attn_decoder_loss=0.2468, over 5800216.01 frames. ], batch size: 90, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:25:10,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=433100.0, ans=0.0 +2024-09-18 11:25:32,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=433140.0, ans=0.125 +2024-09-18 11:25:54,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=433220.0, ans=0.125 +2024-09-18 11:26:10,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=433260.0, ans=0.125 +2024-09-18 11:26:15,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.34 vs. limit=12.0 +2024-09-18 11:26:19,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=433260.0, ans=0.125 +2024-09-18 11:26:23,507 INFO [train.py:1198] (1/2) Epoch 24, batch 4250, loss[loss=0.2194, ctc_loss=0.1102, cr_loss=0.3378, attn_decoder_loss=0.224, over 29525.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1284, cr_loss=0.3706, attn_decoder_loss=0.2467, over 5806648.16 frames. ], batch size: 74, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:26:45,000 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.72 vs. limit=6.0 +2024-09-18 11:27:03,540 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.615e+01 9.136e+01 9.637e+01 1.647e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 11:27:05,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=433380.0, ans=0.0 +2024-09-18 11:27:15,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=433420.0, ans=0.0 +2024-09-18 11:27:18,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=433420.0, ans=0.125 +2024-09-18 11:27:27,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=433460.0, ans=0.125 +2024-09-18 11:27:34,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=433460.0, ans=0.125 +2024-09-18 11:27:38,617 INFO [train.py:1198] (1/2) Epoch 24, batch 4300, loss[loss=0.2484, ctc_loss=0.1294, cr_loss=0.3641, attn_decoder_loss=0.2535, over 29521.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1282, cr_loss=0.3704, attn_decoder_loss=0.2469, over 5796486.45 frames. ], batch size: 87, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:27:47,851 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:27:58,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.33 vs. limit=15.0 +2024-09-18 11:28:01,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=433540.0, ans=0.0 +2024-09-18 11:28:10,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=433580.0, ans=0.025 +2024-09-18 11:28:10,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.55 vs. limit=15.0 +2024-09-18 11:28:17,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.50 vs. limit=15.0 +2024-09-18 11:28:21,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=433580.0, ans=0.125 +2024-09-18 11:28:22,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=433620.0, ans=0.125 +2024-09-18 11:28:25,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=433620.0, ans=10.0 +2024-09-18 11:28:54,031 INFO [train.py:1198] (1/2) Epoch 24, batch 4350, loss[loss=0.2428, ctc_loss=0.1137, cr_loss=0.3334, attn_decoder_loss=0.2498, over 29469.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1313, cr_loss=0.3766, attn_decoder_loss=0.2502, over 5798407.31 frames. ], batch size: 97, lr: 4.59e-03, grad_scale: 8.0 +2024-09-18 11:29:09,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=433740.0, ans=0.125 +2024-09-18 11:29:17,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=433740.0, ans=0.125 +2024-09-18 11:29:29,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.33 vs. limit=15.0 +2024-09-18 11:29:33,324 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.959e+01 8.974e+01 9.434e+01 1.011e+02 1.996e+02, threshold=1.887e+02, percent-clipped=1.0 +2024-09-18 11:30:01,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=433860.0, ans=0.2 +2024-09-18 11:30:07,013 INFO [train.py:1198] (1/2) Epoch 24, batch 4400, loss[loss=0.2492, ctc_loss=0.1327, cr_loss=0.3959, attn_decoder_loss=0.2534, over 27559.00 frames. ], tot_loss[loss=0.2481, ctc_loss=0.1328, cr_loss=0.38, attn_decoder_loss=0.2525, over 5768554.08 frames. ], batch size: 124, lr: 4.58e-03, grad_scale: 16.0 +2024-09-18 11:30:13,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=433900.0, ans=0.025 +2024-09-18 11:30:16,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=433900.0, ans=0.125 +2024-09-18 11:30:32,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=433940.0, ans=0.2 +2024-09-18 11:30:43,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=433980.0, ans=0.125 +2024-09-18 11:30:43,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=433980.0, ans=0.0 +2024-09-18 11:30:53,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=434020.0, ans=0.95 +2024-09-18 11:30:59,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=434020.0, ans=0.1 +2024-09-18 11:31:21,664 INFO [train.py:1198] (1/2) Epoch 24, batch 4450, loss[loss=0.263, ctc_loss=0.1554, cr_loss=0.3955, attn_decoder_loss=0.2661, over 20132.00 frames. ], tot_loss[loss=0.251, ctc_loss=0.1373, cr_loss=0.3848, attn_decoder_loss=0.2551, over 5574638.46 frames. ], batch size: 210, lr: 4.58e-03, grad_scale: 8.0 +2024-09-18 11:31:23,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=434100.0, ans=0.125 +2024-09-18 11:31:35,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=434140.0, ans=0.1 +2024-09-18 11:31:46,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=434140.0, ans=0.025 +2024-09-18 11:31:46,706 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.94 vs. limit=22.5 +2024-09-18 11:31:51,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=434180.0, ans=0.125 +2024-09-18 11:31:52,392 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.62 vs. limit=6.0 +2024-09-18 11:31:53,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=434180.0, ans=0.1 +2024-09-18 11:32:04,067 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 9.021e+01 9.778e+01 1.211e+02 1.854e+02, threshold=1.956e+02, percent-clipped=0.0 +2024-09-18 11:32:04,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=434180.0, ans=0.0 +2024-09-18 11:32:12,420 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.34 vs. limit=15.0 +2024-09-18 11:32:20,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=434220.0, ans=0.125 +2024-09-18 11:32:25,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=434260.0, ans=0.125 +2024-09-18 11:32:34,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=434260.0, ans=0.125 +2024-09-18 11:32:37,512 INFO [train.py:1198] (1/2) Epoch 24, batch 4500, loss[loss=0.2738, ctc_loss=0.1765, cr_loss=0.4277, attn_decoder_loss=0.2751, over 20221.00 frames. ], tot_loss[loss=0.2535, ctc_loss=0.1416, cr_loss=0.387, attn_decoder_loss=0.2573, over 5236157.26 frames. ], batch size: 209, lr: 4.58e-03, grad_scale: 8.0 +2024-09-18 11:32:45,099 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:33:07,972 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=10.10 vs. limit=12.0 +2024-09-18 11:34:00,538 INFO [train.py:1198] (1/2) Epoch 25, batch 0, loss[loss=0.2239, ctc_loss=0.1064, cr_loss=0.3412, attn_decoder_loss=0.2293, over 29615.00 frames. ], tot_loss[loss=0.2239, ctc_loss=0.1064, cr_loss=0.3412, attn_decoder_loss=0.2293, over 29615.00 frames. ], batch size: 73, lr: 4.49e-03, grad_scale: 16.0 +2024-09-18 11:34:00,538 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 11:34:18,957 INFO [train.py:1230] (1/2) Epoch 25, validation: loss=0.2119, ctc_loss=0.03765, cr_loss=5.538e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 11:34:18,958 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 11:34:53,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=434480.0, ans=0.1 +2024-09-18 11:34:57,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=434480.0, ans=0.125 +2024-09-18 11:35:36,617 INFO [train.py:1198] (1/2) Epoch 25, batch 50, loss[loss=0.2203, ctc_loss=0.1107, cr_loss=0.332, attn_decoder_loss=0.2251, over 29407.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1304, cr_loss=0.3766, attn_decoder_loss=0.2477, over 1269232.14 frames. ], batch size: 70, lr: 4.49e-03, grad_scale: 8.0 +2024-09-18 11:35:42,736 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.115e+01 8.952e+01 1.043e+02 1.177e+02 2.373e+02, threshold=2.086e+02, percent-clipped=2.0 +2024-09-18 11:35:42,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=434600.0, ans=0.2 +2024-09-18 11:36:02,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=434640.0, ans=0.125 +2024-09-18 11:36:19,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=434680.0, ans=0.125 +2024-09-18 11:36:53,504 INFO [train.py:1198] (1/2) Epoch 25, batch 100, loss[loss=0.2325, ctc_loss=0.124, cr_loss=0.36, attn_decoder_loss=0.2365, over 29543.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1319, cr_loss=0.3785, attn_decoder_loss=0.2501, over 2254033.48 frames. ], batch size: 76, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:37:16,806 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.30 vs. limit=15.0 +2024-09-18 11:37:23,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=434880.0, ans=0.0 +2024-09-18 11:37:32,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=434880.0, ans=0.125 +2024-09-18 11:37:43,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.10 vs. limit=22.5 +2024-09-18 11:38:04,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=434960.0, ans=0.0 +2024-09-18 11:38:05,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.51 vs. limit=15.0 +2024-09-18 11:38:08,148 INFO [train.py:1198] (1/2) Epoch 25, batch 150, loss[loss=0.2123, ctc_loss=0.1123, cr_loss=0.3391, attn_decoder_loss=0.2159, over 29420.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1297, cr_loss=0.3739, attn_decoder_loss=0.2473, over 3048048.12 frames. ], batch size: 70, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:38:08,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=435000.0, ans=0.125 +2024-09-18 11:38:10,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=435000.0, ans=0.125 +2024-09-18 11:38:11,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=435000.0, ans=0.0 +2024-09-18 11:38:14,095 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.648e+01 9.269e+01 9.917e+01 1.697e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-18 11:38:20,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=435000.0, ans=0.125 +2024-09-18 11:38:23,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=435040.0, ans=0.2 +2024-09-18 11:38:35,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=435040.0, ans=0.125 +2024-09-18 11:38:43,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.86 vs. limit=15.0 +2024-09-18 11:38:47,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=435080.0, ans=0.0 +2024-09-18 11:38:48,416 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.57 vs. limit=15.0 +2024-09-18 11:39:07,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=435160.0, ans=0.125 +2024-09-18 11:39:22,454 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:39:22,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=435200.0, ans=0.125 +2024-09-18 11:39:24,077 INFO [train.py:1198] (1/2) Epoch 25, batch 200, loss[loss=0.2609, ctc_loss=0.145, cr_loss=0.3935, attn_decoder_loss=0.2651, over 27369.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1287, cr_loss=0.372, attn_decoder_loss=0.2461, over 3658365.28 frames. ], batch size: 124, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:39:24,840 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.72 vs. limit=15.0 +2024-09-18 11:39:31,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=435200.0, ans=0.125 +2024-09-18 11:39:34,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=435200.0, ans=0.0 +2024-09-18 11:40:03,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=435280.0, ans=0.1 +2024-09-18 11:40:04,701 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.40 vs. limit=6.0 +2024-09-18 11:40:07,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.25 vs. limit=10.0 +2024-09-18 11:40:14,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=435320.0, ans=0.2 +2024-09-18 11:40:25,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-18 11:40:38,973 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:40:44,515 INFO [train.py:1198] (1/2) Epoch 25, batch 250, loss[loss=0.2588, ctc_loss=0.1406, cr_loss=0.3836, attn_decoder_loss=0.2634, over 29166.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1282, cr_loss=0.3718, attn_decoder_loss=0.2463, over 4141460.07 frames. ], batch size: 100, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:40:50,552 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.660e+01 8.459e+01 8.857e+01 9.365e+01 1.077e+02, threshold=1.771e+02, percent-clipped=0.0 +2024-09-18 11:40:55,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=435400.0, ans=0.0 +2024-09-18 11:41:18,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=435480.0, ans=0.0 +2024-09-18 11:41:26,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.40 vs. limit=22.5 +2024-09-18 11:41:30,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=435520.0, ans=0.125 +2024-09-18 11:41:51,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=435560.0, ans=0.2 +2024-09-18 11:41:52,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=435560.0, ans=0.125 +2024-09-18 11:41:59,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=435600.0, ans=0.125 +2024-09-18 11:42:00,691 INFO [train.py:1198] (1/2) Epoch 25, batch 300, loss[loss=0.252, ctc_loss=0.133, cr_loss=0.3814, attn_decoder_loss=0.2567, over 29514.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.128, cr_loss=0.3717, attn_decoder_loss=0.2464, over 4507734.67 frames. ], batch size: 92, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:42:18,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.27 vs. limit=10.0 +2024-09-18 11:42:28,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=435640.0, ans=0.0 +2024-09-18 11:42:40,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=435680.0, ans=0.125 +2024-09-18 11:43:04,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.79 vs. limit=22.5 +2024-09-18 11:43:15,963 INFO [train.py:1198] (1/2) Epoch 25, batch 350, loss[loss=0.2194, ctc_loss=0.1093, cr_loss=0.3332, attn_decoder_loss=0.2242, over 29320.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1286, cr_loss=0.3727, attn_decoder_loss=0.247, over 4793791.52 frames. ], batch size: 71, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:43:18,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.89 vs. limit=22.5 +2024-09-18 11:43:21,919 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.434e+01 8.932e+01 9.530e+01 2.745e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 11:43:22,189 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:43:33,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=435840.0, ans=0.2 +2024-09-18 11:43:53,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.54 vs. limit=12.0 +2024-09-18 11:44:03,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=435880.0, ans=0.1 +2024-09-18 11:44:06,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=435920.0, ans=0.125 +2024-09-18 11:44:16,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=435920.0, ans=0.2 +2024-09-18 11:44:18,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=435920.0, ans=0.125 +2024-09-18 11:44:36,423 INFO [train.py:1198] (1/2) Epoch 25, batch 400, loss[loss=0.2445, ctc_loss=0.1279, cr_loss=0.3747, attn_decoder_loss=0.2491, over 29733.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1279, cr_loss=0.3713, attn_decoder_loss=0.2464, over 5023274.57 frames. ], batch size: 82, lr: 4.48e-03, grad_scale: 16.0 +2024-09-18 11:45:02,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=436040.0, ans=0.2 +2024-09-18 11:45:31,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=436120.0, ans=0.0 +2024-09-18 11:45:40,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=436160.0, ans=0.2 +2024-09-18 11:45:43,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=436160.0, ans=0.125 +2024-09-18 11:45:52,029 INFO [train.py:1198] (1/2) Epoch 25, batch 450, loss[loss=0.254, ctc_loss=0.1386, cr_loss=0.3975, attn_decoder_loss=0.258, over 29691.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.128, cr_loss=0.3704, attn_decoder_loss=0.2465, over 5185224.35 frames. ], batch size: 83, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:45:52,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=436200.0, ans=0.2 +2024-09-18 11:45:59,433 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.625e+01 9.050e+01 9.660e+01 1.722e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-18 11:46:02,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=436200.0, ans=0.1 +2024-09-18 11:46:47,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=436320.0, ans=0.2 +2024-09-18 11:46:51,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=436360.0, ans=0.125 +2024-09-18 11:47:04,052 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.85 vs. limit=15.0 +2024-09-18 11:47:07,834 INFO [train.py:1198] (1/2) Epoch 25, batch 500, loss[loss=0.2615, ctc_loss=0.1459, cr_loss=0.415, attn_decoder_loss=0.2651, over 29439.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1276, cr_loss=0.3697, attn_decoder_loss=0.2457, over 5328690.67 frames. ], batch size: 94, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:47:08,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=436400.0, ans=0.125 +2024-09-18 11:47:39,092 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.54 vs. limit=15.0 +2024-09-18 11:47:40,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=436440.0, ans=0.125 +2024-09-18 11:47:41,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=436480.0, ans=0.2 +2024-09-18 11:47:49,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.36 vs. limit=12.0 +2024-09-18 11:48:07,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=436520.0, ans=0.125 +2024-09-18 11:48:08,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=436520.0, ans=0.2 +2024-09-18 11:48:19,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=436560.0, ans=0.125 +2024-09-18 11:48:23,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=436560.0, ans=0.1 +2024-09-18 11:48:28,244 INFO [train.py:1198] (1/2) Epoch 25, batch 550, loss[loss=0.2478, ctc_loss=0.1327, cr_loss=0.3779, attn_decoder_loss=0.2522, over 28894.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1273, cr_loss=0.369, attn_decoder_loss=0.2456, over 5421714.56 frames. ], batch size: 104, lr: 4.48e-03, grad_scale: 8.0 +2024-09-18 11:48:35,875 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.562e+01 9.108e+01 9.510e+01 4.336e+02, threshold=1.822e+02, percent-clipped=3.0 +2024-09-18 11:48:39,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=436600.0, ans=0.0 +2024-09-18 11:48:54,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=436640.0, ans=0.2 +2024-09-18 11:49:06,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=436680.0, ans=0.125 +2024-09-18 11:49:06,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=436680.0, ans=0.025 +2024-09-18 11:49:14,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=436720.0, ans=0.125 +2024-09-18 11:49:26,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=436720.0, ans=0.0 +2024-09-18 11:49:26,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=436720.0, ans=0.0 +2024-09-18 11:49:45,354 INFO [train.py:1198] (1/2) Epoch 25, batch 600, loss[loss=0.2616, ctc_loss=0.1476, cr_loss=0.403, attn_decoder_loss=0.2653, over 29247.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1275, cr_loss=0.3702, attn_decoder_loss=0.2461, over 5509417.54 frames. ], batch size: 100, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:49:48,630 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:49:54,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=436800.0, ans=0.1 +2024-09-18 11:49:57,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=436800.0, ans=0.2 +2024-09-18 11:49:59,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=436840.0, ans=0.0 +2024-09-18 11:50:16,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=436880.0, ans=0.125 +2024-09-18 11:50:18,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=436880.0, ans=0.1 +2024-09-18 11:50:21,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=436880.0, ans=0.2 +2024-09-18 11:50:36,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=436920.0, ans=0.05 +2024-09-18 11:50:37,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=436920.0, ans=0.125 +2024-09-18 11:50:45,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=436960.0, ans=0.1 +2024-09-18 11:50:48,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=436960.0, ans=0.025 +2024-09-18 11:50:56,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=436960.0, ans=0.1 +2024-09-18 11:51:00,506 INFO [train.py:1198] (1/2) Epoch 25, batch 650, loss[loss=0.2439, ctc_loss=0.1325, cr_loss=0.3875, attn_decoder_loss=0.2477, over 29772.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1271, cr_loss=0.3696, attn_decoder_loss=0.2455, over 5586628.85 frames. ], batch size: 81, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:51:08,135 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.416e+01 8.904e+01 9.509e+01 2.097e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-18 11:51:09,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.29 vs. limit=12.0 +2024-09-18 11:51:09,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=437000.0, ans=0.125 +2024-09-18 11:51:20,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=437040.0, ans=0.125 +2024-09-18 11:51:45,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=437080.0, ans=0.1 +2024-09-18 11:52:21,117 INFO [train.py:1198] (1/2) Epoch 25, batch 700, loss[loss=0.232, ctc_loss=0.1263, cr_loss=0.3659, attn_decoder_loss=0.2356, over 29532.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1277, cr_loss=0.3704, attn_decoder_loss=0.2459, over 5638237.31 frames. ], batch size: 76, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:52:21,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=437200.0, ans=0.0 +2024-09-18 11:52:27,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=437200.0, ans=0.0 +2024-09-18 11:53:00,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.34 vs. limit=15.0 +2024-09-18 11:53:21,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.71 vs. limit=15.0 +2024-09-18 11:53:23,161 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.80 vs. limit=15.0 +2024-09-18 11:53:28,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=437360.0, ans=0.125 +2024-09-18 11:53:36,276 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:53:37,366 INFO [train.py:1198] (1/2) Epoch 25, batch 750, loss[loss=0.2497, ctc_loss=0.1337, cr_loss=0.3833, attn_decoder_loss=0.2541, over 29717.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1277, cr_loss=0.3703, attn_decoder_loss=0.2456, over 5676319.17 frames. ], batch size: 82, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:53:37,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=437400.0, ans=0.125 +2024-09-18 11:53:44,706 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.436e+01 8.901e+01 9.527e+01 2.571e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 11:54:06,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=437480.0, ans=0.125 +2024-09-18 11:54:09,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=437480.0, ans=0.125 +2024-09-18 11:54:13,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=437480.0, ans=0.125 +2024-09-18 11:54:17,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=437480.0, ans=0.125 +2024-09-18 11:54:31,071 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.76 vs. limit=15.0 +2024-09-18 11:54:45,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=437560.0, ans=0.125 +2024-09-18 11:54:53,469 INFO [train.py:1198] (1/2) Epoch 25, batch 800, loss[loss=0.2175, ctc_loss=0.1042, cr_loss=0.3152, attn_decoder_loss=0.2231, over 29584.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1278, cr_loss=0.3705, attn_decoder_loss=0.2456, over 5707140.16 frames. ], batch size: 73, lr: 4.47e-03, grad_scale: 16.0 +2024-09-18 11:54:53,902 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:55:10,531 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:55:11,935 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:55:15,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=437640.0, ans=0.2 +2024-09-18 11:55:16,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=437640.0, ans=0.2 +2024-09-18 11:55:21,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=437640.0, ans=0.125 +2024-09-18 11:55:25,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=437680.0, ans=0.0 +2024-09-18 11:55:47,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=437720.0, ans=0.125 +2024-09-18 11:55:48,614 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 11:56:06,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=437760.0, ans=0.0 +2024-09-18 11:56:13,698 INFO [train.py:1198] (1/2) Epoch 25, batch 850, loss[loss=0.256, ctc_loss=0.1321, cr_loss=0.3847, attn_decoder_loss=0.2612, over 29726.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1273, cr_loss=0.3701, attn_decoder_loss=0.2455, over 5736012.44 frames. ], batch size: 89, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:56:17,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=437800.0, ans=0.025 +2024-09-18 11:56:22,488 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.420e+01 8.934e+01 9.567e+01 3.952e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 11:56:25,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=437800.0, ans=0.0 +2024-09-18 11:56:39,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=437840.0, ans=0.5 +2024-09-18 11:56:59,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=437920.0, ans=0.125 +2024-09-18 11:57:08,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=437920.0, ans=0.0 +2024-09-18 11:57:20,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=437960.0, ans=0.0 +2024-09-18 11:57:23,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=437960.0, ans=0.1 +2024-09-18 11:57:29,253 INFO [train.py:1198] (1/2) Epoch 25, batch 900, loss[loss=0.2141, ctc_loss=0.1036, cr_loss=0.3285, attn_decoder_loss=0.2191, over 29610.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1277, cr_loss=0.3707, attn_decoder_loss=0.2458, over 5739192.88 frames. ], batch size: 73, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:57:41,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=438000.0, ans=0.125 +2024-09-18 11:57:53,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=438040.0, ans=0.125 +2024-09-18 11:58:10,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=438080.0, ans=0.125 +2024-09-18 11:58:17,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=438120.0, ans=0.2 +2024-09-18 11:58:21,425 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.94 vs. limit=12.0 +2024-09-18 11:58:38,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=438160.0, ans=0.125 +2024-09-18 11:58:40,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=438160.0, ans=0.025 +2024-09-18 11:58:44,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.52 vs. limit=15.0 +2024-09-18 11:58:44,558 INFO [train.py:1198] (1/2) Epoch 25, batch 950, loss[loss=0.2283, ctc_loss=0.1148, cr_loss=0.3464, attn_decoder_loss=0.2333, over 29500.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1278, cr_loss=0.37, attn_decoder_loss=0.246, over 5740247.79 frames. ], batch size: 74, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 11:58:53,516 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.540e+01 9.168e+01 9.959e+01 1.680e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 11:59:13,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=438280.0, ans=0.07 +2024-09-18 11:59:18,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=438280.0, ans=0.2 +2024-09-18 11:59:59,507 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.63 vs. limit=15.0 +2024-09-18 12:00:04,896 INFO [train.py:1198] (1/2) Epoch 25, batch 1000, loss[loss=0.2447, ctc_loss=0.1345, cr_loss=0.3837, attn_decoder_loss=0.2484, over 29499.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1289, cr_loss=0.372, attn_decoder_loss=0.2469, over 5733262.52 frames. ], batch size: 77, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 12:00:08,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=438400.0, ans=0.09899494936611666 +2024-09-18 12:00:18,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=438440.0, ans=0.125 +2024-09-18 12:00:37,240 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:00:52,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=438520.0, ans=0.0 +2024-09-18 12:00:56,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=438520.0, ans=0.125 +2024-09-18 12:01:02,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=438520.0, ans=0.0 +2024-09-18 12:01:05,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=438560.0, ans=0.0 +2024-09-18 12:01:07,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=438560.0, ans=0.125 +2024-09-18 12:01:20,734 INFO [train.py:1198] (1/2) Epoch 25, batch 1050, loss[loss=0.2534, ctc_loss=0.1381, cr_loss=0.4028, attn_decoder_loss=0.2573, over 29684.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1283, cr_loss=0.3712, attn_decoder_loss=0.2461, over 5742235.47 frames. ], batch size: 85, lr: 4.47e-03, grad_scale: 8.0 +2024-09-18 12:01:24,843 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.30 vs. limit=6.0 +2024-09-18 12:01:25,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=438600.0, ans=0.125 +2024-09-18 12:01:29,762 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.550e+01 9.112e+01 9.812e+01 2.455e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 12:01:37,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=438640.0, ans=0.035 +2024-09-18 12:02:14,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.55 vs. limit=15.0 +2024-09-18 12:02:15,707 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:02:21,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=438760.0, ans=0.125 +2024-09-18 12:02:24,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=438760.0, ans=0.025 +2024-09-18 12:02:24,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=438760.0, ans=0.125 +2024-09-18 12:02:36,545 INFO [train.py:1198] (1/2) Epoch 25, batch 1100, loss[loss=0.2428, ctc_loss=0.1197, cr_loss=0.3684, attn_decoder_loss=0.2483, over 29452.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1276, cr_loss=0.3696, attn_decoder_loss=0.2455, over 5755622.99 frames. ], batch size: 78, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:02:51,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.42 vs. limit=10.0 +2024-09-18 12:02:53,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=438840.0, ans=0.0 +2024-09-18 12:02:57,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=438840.0, ans=0.04949747468305833 +2024-09-18 12:02:59,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=438840.0, ans=0.125 +2024-09-18 12:03:02,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=438840.0, ans=0.125 +2024-09-18 12:03:04,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=438840.0, ans=0.125 +2024-09-18 12:03:15,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=438880.0, ans=0.0 +2024-09-18 12:03:19,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=438880.0, ans=0.125 +2024-09-18 12:03:44,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=438960.0, ans=0.125 +2024-09-18 12:03:48,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.89 vs. limit=10.0 +2024-09-18 12:03:55,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=439000.0, ans=0.125 +2024-09-18 12:03:56,687 INFO [train.py:1198] (1/2) Epoch 25, batch 1150, loss[loss=0.2349, ctc_loss=0.1204, cr_loss=0.3499, attn_decoder_loss=0.2398, over 29457.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1278, cr_loss=0.3697, attn_decoder_loss=0.2458, over 5754249.69 frames. ], batch size: 78, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:04:04,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=439000.0, ans=0.125 +2024-09-18 12:04:05,928 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.230e+01 8.574e+01 9.064e+01 9.855e+01 2.778e+02, threshold=1.813e+02, percent-clipped=2.0 +2024-09-18 12:04:33,909 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:04:33,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=439080.0, ans=0.2 +2024-09-18 12:05:13,552 INFO [train.py:1198] (1/2) Epoch 25, batch 1200, loss[loss=0.244, ctc_loss=0.1219, cr_loss=0.3481, attn_decoder_loss=0.2498, over 29691.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1288, cr_loss=0.3716, attn_decoder_loss=0.2466, over 5747590.72 frames. ], batch size: 85, lr: 4.46e-03, grad_scale: 16.0 +2024-09-18 12:05:15,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=439200.0, ans=0.125 +2024-09-18 12:05:22,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=439200.0, ans=0.0 +2024-09-18 12:05:36,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=439240.0, ans=0.0 +2024-09-18 12:06:16,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=439360.0, ans=0.125 +2024-09-18 12:06:17,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=439360.0, ans=0.125 +2024-09-18 12:06:25,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=439360.0, ans=0.125 +2024-09-18 12:06:29,680 INFO [train.py:1198] (1/2) Epoch 25, batch 1250, loss[loss=0.2531, ctc_loss=0.1322, cr_loss=0.3846, attn_decoder_loss=0.258, over 29533.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1289, cr_loss=0.3723, attn_decoder_loss=0.2471, over 5775062.60 frames. ], batch size: 92, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:06:33,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=439400.0, ans=0.2 +2024-09-18 12:06:40,446 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 8.697e+01 9.266e+01 9.820e+01 4.128e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-18 12:06:45,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=439440.0, ans=0.125 +2024-09-18 12:07:04,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.97 vs. limit=15.0 +2024-09-18 12:07:29,900 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.59 vs. limit=15.0 +2024-09-18 12:07:49,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=439600.0, ans=0.2 +2024-09-18 12:07:50,379 INFO [train.py:1198] (1/2) Epoch 25, batch 1300, loss[loss=0.2485, ctc_loss=0.1319, cr_loss=0.3636, attn_decoder_loss=0.2534, over 28075.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1279, cr_loss=0.3702, attn_decoder_loss=0.2462, over 5778324.42 frames. ], batch size: 111, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:08:23,204 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.97 vs. limit=10.0 +2024-09-18 12:08:29,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=439680.0, ans=0.125 +2024-09-18 12:08:39,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=439720.0, ans=0.125 +2024-09-18 12:08:40,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=439720.0, ans=0.125 +2024-09-18 12:09:03,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=439760.0, ans=0.2 +2024-09-18 12:09:06,245 INFO [train.py:1198] (1/2) Epoch 25, batch 1350, loss[loss=0.2457, ctc_loss=0.1374, cr_loss=0.3771, attn_decoder_loss=0.2493, over 29746.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1274, cr_loss=0.3695, attn_decoder_loss=0.2456, over 5796859.96 frames. ], batch size: 81, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:09:08,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=439800.0, ans=0.2 +2024-09-18 12:09:16,806 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.561e+01 9.293e+01 1.003e+02 2.081e+02, threshold=1.859e+02, percent-clipped=1.0 +2024-09-18 12:09:28,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=439840.0, ans=0.125 +2024-09-18 12:09:48,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=439880.0, ans=0.1 +2024-09-18 12:10:04,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=439960.0, ans=0.2 +2024-09-18 12:10:21,578 INFO [train.py:1198] (1/2) Epoch 25, batch 1400, loss[loss=0.2092, ctc_loss=0.09743, cr_loss=0.2977, attn_decoder_loss=0.215, over 29612.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1274, cr_loss=0.3695, attn_decoder_loss=0.2455, over 5807620.00 frames. ], batch size: 69, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:10:28,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=440000.0, ans=0.025 +2024-09-18 12:10:47,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=440040.0, ans=0.0 +2024-09-18 12:10:53,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.08 vs. limit=12.0 +2024-09-18 12:10:56,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=440080.0, ans=0.125 +2024-09-18 12:11:00,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=440080.0, ans=0.125 +2024-09-18 12:11:15,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=440120.0, ans=0.0 +2024-09-18 12:11:41,713 INFO [train.py:1198] (1/2) Epoch 25, batch 1450, loss[loss=0.2669, ctc_loss=0.1448, cr_loss=0.4193, attn_decoder_loss=0.2711, over 29429.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1279, cr_loss=0.3705, attn_decoder_loss=0.2462, over 5805167.73 frames. ], batch size: 94, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:11:51,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=440200.0, ans=0.0 +2024-09-18 12:11:52,219 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.790e+01 8.701e+01 9.305e+01 9.884e+01 1.753e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-18 12:11:53,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=440200.0, ans=0.0 +2024-09-18 12:12:11,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.62 vs. limit=22.5 +2024-09-18 12:12:21,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.92 vs. limit=12.0 +2024-09-18 12:12:46,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=440360.0, ans=0.5 +2024-09-18 12:12:57,332 INFO [train.py:1198] (1/2) Epoch 25, batch 1500, loss[loss=0.2509, ctc_loss=0.1402, cr_loss=0.4052, attn_decoder_loss=0.2542, over 29628.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1284, cr_loss=0.3714, attn_decoder_loss=0.2467, over 5806811.32 frames. ], batch size: 86, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:13:17,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=440440.0, ans=0.125 +2024-09-18 12:13:24,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=440440.0, ans=0.2 +2024-09-18 12:13:27,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=440480.0, ans=0.2 +2024-09-18 12:13:32,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=440480.0, ans=0.025 +2024-09-18 12:13:33,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=440480.0, ans=0.125 +2024-09-18 12:13:37,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=440480.0, ans=0.0 +2024-09-18 12:13:47,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=440520.0, ans=0.2 +2024-09-18 12:13:52,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=440520.0, ans=0.025 +2024-09-18 12:13:53,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=440520.0, ans=0.1 +2024-09-18 12:13:54,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.15 vs. limit=22.5 +2024-09-18 12:13:55,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=440520.0, ans=0.0 +2024-09-18 12:14:13,209 INFO [train.py:1198] (1/2) Epoch 25, batch 1550, loss[loss=0.2608, ctc_loss=0.1427, cr_loss=0.4113, attn_decoder_loss=0.2647, over 29499.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1287, cr_loss=0.3718, attn_decoder_loss=0.2467, over 5783024.13 frames. ], batch size: 90, lr: 4.46e-03, grad_scale: 8.0 +2024-09-18 12:14:14,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=440600.0, ans=0.1 +2024-09-18 12:14:23,709 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.497e+01 9.186e+01 9.794e+01 2.835e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-18 12:14:29,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.53 vs. limit=15.0 +2024-09-18 12:14:36,069 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:14:47,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=440680.0, ans=0.2 +2024-09-18 12:15:03,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=440720.0, ans=0.0 +2024-09-18 12:15:25,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=440760.0, ans=0.125 +2024-09-18 12:15:33,644 INFO [train.py:1198] (1/2) Epoch 25, batch 1600, loss[loss=0.2372, ctc_loss=0.1173, cr_loss=0.3529, attn_decoder_loss=0.2427, over 29661.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1285, cr_loss=0.3711, attn_decoder_loss=0.2465, over 5764793.43 frames. ], batch size: 85, lr: 4.45e-03, grad_scale: 16.0 +2024-09-18 12:15:45,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=440800.0, ans=0.0 +2024-09-18 12:15:56,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=440840.0, ans=0.2 +2024-09-18 12:16:02,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=440880.0, ans=0.07 +2024-09-18 12:16:17,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=440920.0, ans=0.1 +2024-09-18 12:16:20,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=440920.0, ans=0.0 +2024-09-18 12:16:28,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=440920.0, ans=0.0 +2024-09-18 12:16:28,574 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:16:31,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=440920.0, ans=0.0 +2024-09-18 12:16:49,306 INFO [train.py:1198] (1/2) Epoch 25, batch 1650, loss[loss=0.2556, ctc_loss=0.134, cr_loss=0.402, attn_decoder_loss=0.2602, over 29699.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1285, cr_loss=0.3706, attn_decoder_loss=0.2465, over 5759199.50 frames. ], batch size: 89, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:17:01,297 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.666e+01 8.636e+01 9.380e+01 1.005e+02 4.034e+02, threshold=1.876e+02, percent-clipped=3.0 +2024-09-18 12:17:22,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=441080.0, ans=0.0 +2024-09-18 12:17:30,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=441080.0, ans=0.0 +2024-09-18 12:17:33,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=441120.0, ans=0.125 +2024-09-18 12:17:49,321 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.89 vs. limit=15.0 +2024-09-18 12:18:04,966 INFO [train.py:1198] (1/2) Epoch 25, batch 1700, loss[loss=0.2224, ctc_loss=0.1154, cr_loss=0.3456, attn_decoder_loss=0.2266, over 29582.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1282, cr_loss=0.3701, attn_decoder_loss=0.2464, over 5781448.75 frames. ], batch size: 69, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:18:25,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.86 vs. limit=15.0 +2024-09-18 12:18:29,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=441240.0, ans=0.09899494936611666 +2024-09-18 12:18:42,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=441280.0, ans=0.125 +2024-09-18 12:18:45,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=441280.0, ans=0.05 +2024-09-18 12:18:51,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=441320.0, ans=0.0 +2024-09-18 12:19:14,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=441360.0, ans=0.125 +2024-09-18 12:19:25,317 INFO [train.py:1198] (1/2) Epoch 25, batch 1750, loss[loss=0.2182, ctc_loss=0.1095, cr_loss=0.3307, attn_decoder_loss=0.2229, over 29301.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1276, cr_loss=0.3691, attn_decoder_loss=0.2458, over 5790219.41 frames. ], batch size: 67, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:19:37,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.136e+01 8.435e+01 8.870e+01 9.715e+01 1.342e+02, threshold=1.774e+02, percent-clipped=0.0 +2024-09-18 12:19:41,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.96 vs. limit=6.0 +2024-09-18 12:19:42,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=441440.0, ans=0.125 +2024-09-18 12:20:38,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.77 vs. limit=15.0 +2024-09-18 12:20:41,486 INFO [train.py:1198] (1/2) Epoch 25, batch 1800, loss[loss=0.2548, ctc_loss=0.1408, cr_loss=0.3832, attn_decoder_loss=0.2589, over 29693.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1277, cr_loss=0.3694, attn_decoder_loss=0.2459, over 5792026.91 frames. ], batch size: 83, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:20:53,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=441600.0, ans=0.1 +2024-09-18 12:21:07,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=441640.0, ans=0.0 +2024-09-18 12:21:29,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=441720.0, ans=0.04949747468305833 +2024-09-18 12:21:32,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=441720.0, ans=0.2 +2024-09-18 12:21:57,722 INFO [train.py:1198] (1/2) Epoch 25, batch 1850, loss[loss=0.2509, ctc_loss=0.1243, cr_loss=0.3524, attn_decoder_loss=0.2572, over 29608.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1275, cr_loss=0.3695, attn_decoder_loss=0.2458, over 5796513.90 frames. ], batch size: 86, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:22:05,656 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:22:09,697 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.990e+01 8.488e+01 8.939e+01 9.551e+01 1.184e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-18 12:22:15,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=441840.0, ans=0.125 +2024-09-18 12:22:19,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=441840.0, ans=0.1 +2024-09-18 12:22:19,160 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:23:11,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=441960.0, ans=0.125 +2024-09-18 12:23:15,214 INFO [train.py:1198] (1/2) Epoch 25, batch 1900, loss[loss=0.2625, ctc_loss=0.1466, cr_loss=0.4076, attn_decoder_loss=0.2663, over 29710.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1282, cr_loss=0.3714, attn_decoder_loss=0.2466, over 5804424.74 frames. ], batch size: 89, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:23:28,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=442000.0, ans=0.1 +2024-09-18 12:23:36,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=442040.0, ans=0.125 +2024-09-18 12:23:43,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=442040.0, ans=0.0 +2024-09-18 12:23:48,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.13 vs. limit=15.0 +2024-09-18 12:23:59,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=442080.0, ans=0.1 +2024-09-18 12:24:24,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.09 vs. limit=15.0 +2024-09-18 12:24:24,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=442160.0, ans=0.2 +2024-09-18 12:24:33,475 INFO [train.py:1198] (1/2) Epoch 25, batch 1950, loss[loss=0.249, ctc_loss=0.1391, cr_loss=0.4224, attn_decoder_loss=0.2518, over 29436.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1291, cr_loss=0.3737, attn_decoder_loss=0.2478, over 5819268.93 frames. ], batch size: 78, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:24:45,598 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 8.609e+01 9.254e+01 9.710e+01 4.424e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 12:24:45,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=442200.0, ans=0.0 +2024-09-18 12:24:47,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=442240.0, ans=0.05 +2024-09-18 12:24:47,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=442240.0, ans=0.125 +2024-09-18 12:24:58,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=442240.0, ans=0.0 +2024-09-18 12:24:59,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=442240.0, ans=0.125 +2024-09-18 12:25:39,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=442360.0, ans=0.04949747468305833 +2024-09-18 12:25:49,608 INFO [train.py:1198] (1/2) Epoch 25, batch 2000, loss[loss=0.2204, ctc_loss=0.1153, cr_loss=0.3403, attn_decoder_loss=0.2246, over 29359.00 frames. ], tot_loss[loss=0.2439, ctc_loss=0.1295, cr_loss=0.3741, attn_decoder_loss=0.2483, over 5798153.46 frames. ], batch size: 67, lr: 4.45e-03, grad_scale: 16.0 +2024-09-18 12:25:49,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=442400.0, ans=0.05 +2024-09-18 12:25:58,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.51 vs. limit=12.0 +2024-09-18 12:26:00,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=442400.0, ans=0.1 +2024-09-18 12:26:12,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.24 vs. limit=15.0 +2024-09-18 12:26:34,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=442480.0, ans=0.0 +2024-09-18 12:26:37,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=442520.0, ans=0.125 +2024-09-18 12:27:03,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=442560.0, ans=0.125 +2024-09-18 12:27:07,724 INFO [train.py:1198] (1/2) Epoch 25, batch 2050, loss[loss=0.2199, ctc_loss=0.1175, cr_loss=0.3413, attn_decoder_loss=0.2237, over 29439.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1289, cr_loss=0.3726, attn_decoder_loss=0.2471, over 5788729.66 frames. ], batch size: 70, lr: 4.45e-03, grad_scale: 8.0 +2024-09-18 12:27:17,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.77 vs. limit=15.0 +2024-09-18 12:27:23,554 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.313e+01 8.436e+01 8.905e+01 9.396e+01 1.982e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-18 12:27:23,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=442640.0, ans=0.0 +2024-09-18 12:27:25,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=442640.0, ans=0.125 +2024-09-18 12:27:34,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=442640.0, ans=0.0 +2024-09-18 12:27:57,631 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.19 vs. limit=15.0 +2024-09-18 12:28:24,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=442800.0, ans=0.2 +2024-09-18 12:28:25,821 INFO [train.py:1198] (1/2) Epoch 25, batch 2100, loss[loss=0.2338, ctc_loss=0.1136, cr_loss=0.3459, attn_decoder_loss=0.2395, over 29750.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1281, cr_loss=0.3709, attn_decoder_loss=0.2464, over 5800945.29 frames. ], batch size: 81, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:28:39,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=442840.0, ans=0.1 +2024-09-18 12:28:56,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=442880.0, ans=0.025 +2024-09-18 12:29:05,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=442880.0, ans=0.125 +2024-09-18 12:29:12,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=442920.0, ans=0.0 +2024-09-18 12:29:17,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=442920.0, ans=0.0 +2024-09-18 12:29:27,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=442960.0, ans=0.0 +2024-09-18 12:29:41,142 INFO [train.py:1198] (1/2) Epoch 25, batch 2150, loss[loss=0.2337, ctc_loss=0.1195, cr_loss=0.3757, attn_decoder_loss=0.2381, over 29443.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1274, cr_loss=0.3697, attn_decoder_loss=0.2459, over 5815827.38 frames. ], batch size: 78, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:29:42,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=443000.0, ans=0.04949747468305833 +2024-09-18 12:29:52,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=443000.0, ans=0.0 +2024-09-18 12:29:54,895 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.193e+01 8.437e+01 8.935e+01 9.622e+01 1.303e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 12:29:55,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=443040.0, ans=0.125 +2024-09-18 12:30:10,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.65 vs. limit=15.0 +2024-09-18 12:30:21,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=443080.0, ans=0.2 +2024-09-18 12:30:22,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.86 vs. limit=15.0 +2024-09-18 12:30:22,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=443080.0, ans=0.125 +2024-09-18 12:30:30,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=443120.0, ans=0.1 +2024-09-18 12:30:37,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.39 vs. limit=22.5 +2024-09-18 12:30:59,553 INFO [train.py:1198] (1/2) Epoch 25, batch 2200, loss[loss=0.2385, ctc_loss=0.1272, cr_loss=0.3605, attn_decoder_loss=0.2428, over 29624.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1274, cr_loss=0.3699, attn_decoder_loss=0.2457, over 5813657.21 frames. ], batch size: 86, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:31:10,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.27 vs. limit=15.0 +2024-09-18 12:31:11,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.09 vs. limit=15.0 +2024-09-18 12:31:15,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=443240.0, ans=0.05 +2024-09-18 12:31:20,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=443240.0, ans=0.0 +2024-09-18 12:32:05,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=443360.0, ans=0.0 +2024-09-18 12:32:07,252 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:32:11,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=443360.0, ans=0.2 +2024-09-18 12:32:17,477 INFO [train.py:1198] (1/2) Epoch 25, batch 2250, loss[loss=0.2493, ctc_loss=0.1282, cr_loss=0.3874, attn_decoder_loss=0.2541, over 29715.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1272, cr_loss=0.3695, attn_decoder_loss=0.2456, over 5812335.81 frames. ], batch size: 82, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:32:25,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=443400.0, ans=0.0 +2024-09-18 12:32:31,058 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.266e+01 8.316e+01 8.883e+01 9.424e+01 4.658e+02, threshold=1.777e+02, percent-clipped=2.0 +2024-09-18 12:32:46,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.67 vs. limit=15.0 +2024-09-18 12:33:01,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=443520.0, ans=0.025 +2024-09-18 12:33:22,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=443560.0, ans=0.125 +2024-09-18 12:33:24,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=443560.0, ans=0.125 +2024-09-18 12:33:33,023 INFO [train.py:1198] (1/2) Epoch 25, batch 2300, loss[loss=0.2235, ctc_loss=0.1155, cr_loss=0.3487, attn_decoder_loss=0.2278, over 29307.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1266, cr_loss=0.3681, attn_decoder_loss=0.2447, over 5800365.13 frames. ], batch size: 71, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:33:38,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.20 vs. limit=12.0 +2024-09-18 12:33:40,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=443600.0, ans=0.125 +2024-09-18 12:33:48,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=443640.0, ans=0.125 +2024-09-18 12:34:08,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=443680.0, ans=0.125 +2024-09-18 12:34:13,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=443680.0, ans=0.07 +2024-09-18 12:34:50,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=443800.0, ans=0.125 +2024-09-18 12:34:51,416 INFO [train.py:1198] (1/2) Epoch 25, batch 2350, loss[loss=0.2533, ctc_loss=0.131, cr_loss=0.3636, attn_decoder_loss=0.2588, over 29708.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1271, cr_loss=0.3693, attn_decoder_loss=0.245, over 5804680.41 frames. ], batch size: 83, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:35:04,930 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.160e+01 8.571e+01 9.088e+01 9.554e+01 1.522e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-18 12:35:05,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=443840.0, ans=0.125 +2024-09-18 12:35:09,841 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:35:38,246 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:35:44,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=443920.0, ans=0.125 +2024-09-18 12:35:51,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=443920.0, ans=0.0 +2024-09-18 12:36:10,420 INFO [train.py:1198] (1/2) Epoch 25, batch 2400, loss[loss=0.2308, ctc_loss=0.1185, cr_loss=0.3588, attn_decoder_loss=0.2353, over 29552.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1278, cr_loss=0.3705, attn_decoder_loss=0.2456, over 5808458.00 frames. ], batch size: 76, lr: 4.44e-03, grad_scale: 16.0 +2024-09-18 12:36:15,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=444000.0, ans=0.2 +2024-09-18 12:36:15,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=444000.0, ans=0.125 +2024-09-18 12:36:21,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=444000.0, ans=0.125 +2024-09-18 12:36:47,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=444080.0, ans=0.0 +2024-09-18 12:37:06,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=444120.0, ans=0.125 +2024-09-18 12:37:09,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=444160.0, ans=0.2 +2024-09-18 12:37:26,063 INFO [train.py:1198] (1/2) Epoch 25, batch 2450, loss[loss=0.2493, ctc_loss=0.1383, cr_loss=0.3895, attn_decoder_loss=0.2529, over 29733.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1283, cr_loss=0.3712, attn_decoder_loss=0.2464, over 5783697.38 frames. ], batch size: 82, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:37:38,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=444200.0, ans=0.1 +2024-09-18 12:37:40,935 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.902e+01 9.594e+01 1.053e+02 2.320e+02, threshold=1.919e+02, percent-clipped=3.0 +2024-09-18 12:37:51,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.12 vs. limit=22.5 +2024-09-18 12:38:10,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=444280.0, ans=0.125 +2024-09-18 12:38:41,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=444360.0, ans=0.2 +2024-09-18 12:38:43,740 INFO [train.py:1198] (1/2) Epoch 25, batch 2500, loss[loss=0.2481, ctc_loss=0.1295, cr_loss=0.3685, attn_decoder_loss=0.2531, over 29626.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1282, cr_loss=0.3714, attn_decoder_loss=0.2463, over 5794113.86 frames. ], batch size: 86, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:39:00,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=444440.0, ans=0.025 +2024-09-18 12:39:30,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=444520.0, ans=0.1 +2024-09-18 12:39:31,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=444520.0, ans=0.2 +2024-09-18 12:39:35,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=444520.0, ans=0.125 +2024-09-18 12:39:39,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=444520.0, ans=0.125 +2024-09-18 12:39:41,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.27 vs. limit=22.5 +2024-09-18 12:39:44,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=444520.0, ans=0.125 +2024-09-18 12:40:02,043 INFO [train.py:1198] (1/2) Epoch 25, batch 2550, loss[loss=0.2259, ctc_loss=0.1211, cr_loss=0.3713, attn_decoder_loss=0.2293, over 29335.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1284, cr_loss=0.372, attn_decoder_loss=0.2466, over 5796708.31 frames. ], batch size: 67, lr: 4.44e-03, grad_scale: 8.0 +2024-09-18 12:40:03,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=444600.0, ans=0.05 +2024-09-18 12:40:17,176 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.300e+01 8.248e+01 8.745e+01 9.244e+01 1.627e+02, threshold=1.749e+02, percent-clipped=0.0 +2024-09-18 12:40:34,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=444680.0, ans=0.125 +2024-09-18 12:40:40,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=444680.0, ans=0.5 +2024-09-18 12:40:55,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=444720.0, ans=0.1 +2024-09-18 12:41:18,555 INFO [train.py:1198] (1/2) Epoch 25, batch 2600, loss[loss=0.2336, ctc_loss=0.1204, cr_loss=0.3532, attn_decoder_loss=0.2383, over 29449.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1284, cr_loss=0.3719, attn_decoder_loss=0.2468, over 5792551.67 frames. ], batch size: 78, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:41:28,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=444800.0, ans=0.1 +2024-09-18 12:41:44,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=444840.0, ans=0.0 +2024-09-18 12:41:49,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.26 vs. limit=15.0 +2024-09-18 12:42:24,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=444960.0, ans=0.1 +2024-09-18 12:42:36,073 INFO [train.py:1198] (1/2) Epoch 25, batch 2650, loss[loss=0.2542, ctc_loss=0.1376, cr_loss=0.373, attn_decoder_loss=0.2589, over 29227.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1286, cr_loss=0.3724, attn_decoder_loss=0.2471, over 5799304.18 frames. ], batch size: 100, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:42:51,109 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.486e+01 8.913e+01 9.474e+01 1.768e+02, threshold=1.783e+02, percent-clipped=1.0 +2024-09-18 12:43:02,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=445040.0, ans=0.0 +2024-09-18 12:43:20,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=445080.0, ans=0.125 +2024-09-18 12:43:30,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=445120.0, ans=0.2 +2024-09-18 12:43:49,137 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:43:53,473 INFO [train.py:1198] (1/2) Epoch 25, batch 2700, loss[loss=0.245, ctc_loss=0.1315, cr_loss=0.3797, attn_decoder_loss=0.2491, over 29519.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1293, cr_loss=0.3732, attn_decoder_loss=0.2475, over 5793995.42 frames. ], batch size: 87, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:44:01,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=445200.0, ans=0.0 +2024-09-18 12:44:14,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=445240.0, ans=0.0 +2024-09-18 12:44:16,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=445240.0, ans=0.0 +2024-09-18 12:44:28,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=445280.0, ans=0.025 +2024-09-18 12:44:34,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=445280.0, ans=0.025 +2024-09-18 12:44:39,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=445320.0, ans=0.125 +2024-09-18 12:44:51,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.64 vs. limit=15.0 +2024-09-18 12:45:09,582 INFO [train.py:1198] (1/2) Epoch 25, batch 2750, loss[loss=0.2398, ctc_loss=0.1343, cr_loss=0.3793, attn_decoder_loss=0.2431, over 29519.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1285, cr_loss=0.3715, attn_decoder_loss=0.2462, over 5794106.93 frames. ], batch size: 75, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:45:16,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=445400.0, ans=0.0 +2024-09-18 12:45:24,757 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.540e+01 9.041e+01 9.626e+01 3.086e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-18 12:45:29,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=445440.0, ans=0.125 +2024-09-18 12:45:48,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=445480.0, ans=0.025 +2024-09-18 12:45:57,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=445520.0, ans=0.1 +2024-09-18 12:46:28,560 INFO [train.py:1198] (1/2) Epoch 25, batch 2800, loss[loss=0.2712, ctc_loss=0.1676, cr_loss=0.3983, attn_decoder_loss=0.2738, over 20165.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1288, cr_loss=0.3723, attn_decoder_loss=0.2466, over 5774040.58 frames. ], batch size: 210, lr: 4.43e-03, grad_scale: 16.0 +2024-09-18 12:46:47,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=445640.0, ans=0.125 +2024-09-18 12:47:43,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=445760.0, ans=0.025 +2024-09-18 12:47:46,146 INFO [train.py:1198] (1/2) Epoch 25, batch 2850, loss[loss=0.232, ctc_loss=0.1162, cr_loss=0.3462, attn_decoder_loss=0.2372, over 29521.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1292, cr_loss=0.3732, attn_decoder_loss=0.2468, over 5759592.33 frames. ], batch size: 77, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:47:58,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=445800.0, ans=0.125 +2024-09-18 12:48:02,746 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.484e+01 8.566e+01 9.294e+01 9.797e+01 4.897e+02, threshold=1.859e+02, percent-clipped=4.0 +2024-09-18 12:49:01,842 INFO [train.py:1198] (1/2) Epoch 25, batch 2900, loss[loss=0.2305, ctc_loss=0.1163, cr_loss=0.3486, attn_decoder_loss=0.2354, over 29421.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1297, cr_loss=0.3748, attn_decoder_loss=0.2477, over 5784697.43 frames. ], batch size: 79, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:49:02,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=446000.0, ans=0.0 +2024-09-18 12:49:09,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=446000.0, ans=0.0 +2024-09-18 12:49:11,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.47 vs. limit=22.5 +2024-09-18 12:49:30,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=446040.0, ans=0.2 +2024-09-18 12:49:36,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=446080.0, ans=0.125 +2024-09-18 12:49:49,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=446120.0, ans=0.0 +2024-09-18 12:49:51,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=446120.0, ans=0.07 +2024-09-18 12:50:01,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=446120.0, ans=0.125 +2024-09-18 12:50:09,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=446160.0, ans=0.1 +2024-09-18 12:50:19,428 INFO [train.py:1198] (1/2) Epoch 25, batch 2950, loss[loss=0.2357, ctc_loss=0.1218, cr_loss=0.3566, attn_decoder_loss=0.2405, over 29497.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1285, cr_loss=0.3724, attn_decoder_loss=0.2466, over 5780630.42 frames. ], batch size: 75, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:50:25,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=446200.0, ans=0.2 +2024-09-18 12:50:28,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=446200.0, ans=0.125 +2024-09-18 12:50:28,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=446200.0, ans=0.125 +2024-09-18 12:50:36,056 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.376e+01 8.898e+01 9.637e+01 1.288e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 12:50:40,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=446240.0, ans=0.125 +2024-09-18 12:50:45,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.66 vs. limit=10.0 +2024-09-18 12:50:51,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=446280.0, ans=0.125 +2024-09-18 12:50:57,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.43 vs. limit=15.0 +2024-09-18 12:51:01,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=446280.0, ans=0.0 +2024-09-18 12:51:10,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446320.0, ans=0.1 +2024-09-18 12:51:13,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=446320.0, ans=0.2 +2024-09-18 12:51:38,071 INFO [train.py:1198] (1/2) Epoch 25, batch 3000, loss[loss=0.2362, ctc_loss=0.1248, cr_loss=0.3586, attn_decoder_loss=0.2406, over 29761.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1281, cr_loss=0.3716, attn_decoder_loss=0.2461, over 5780669.11 frames. ], batch size: 81, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:51:38,072 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 12:51:56,649 INFO [train.py:1230] (1/2) Epoch 25, validation: loss=0.2113, ctc_loss=0.03809, cr_loss=5.582e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-18 12:51:56,649 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 12:52:04,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=446400.0, ans=0.125 +2024-09-18 12:52:05,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.47 vs. limit=12.0 +2024-09-18 12:52:13,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=446440.0, ans=0.125 +2024-09-18 12:52:15,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=446440.0, ans=0.025 +2024-09-18 12:52:19,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.97 vs. limit=15.0 +2024-09-18 12:52:30,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=446480.0, ans=0.2 +2024-09-18 12:52:35,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=446480.0, ans=0.1 +2024-09-18 12:52:41,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=446520.0, ans=0.0 +2024-09-18 12:53:11,417 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:53:12,534 INFO [train.py:1198] (1/2) Epoch 25, batch 3050, loss[loss=0.24, ctc_loss=0.1317, cr_loss=0.391, attn_decoder_loss=0.2433, over 29545.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1284, cr_loss=0.3717, attn_decoder_loss=0.2469, over 5775150.35 frames. ], batch size: 76, lr: 4.43e-03, grad_scale: 8.0 +2024-09-18 12:53:26,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=446600.0, ans=0.0 +2024-09-18 12:53:31,784 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.617e+01 9.221e+01 9.973e+01 3.035e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-18 12:53:33,595 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:53:48,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446680.0, ans=0.1 +2024-09-18 12:54:01,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.66 vs. limit=15.0 +2024-09-18 12:54:11,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.65 vs. limit=6.0 +2024-09-18 12:54:12,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446720.0, ans=0.1 +2024-09-18 12:54:12,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.40 vs. limit=15.0 +2024-09-18 12:54:15,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=446760.0, ans=0.0 +2024-09-18 12:54:17,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=446760.0, ans=0.125 +2024-09-18 12:54:30,186 INFO [train.py:1198] (1/2) Epoch 25, batch 3100, loss[loss=0.251, ctc_loss=0.1349, cr_loss=0.3932, attn_decoder_loss=0.2551, over 29239.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1282, cr_loss=0.3709, attn_decoder_loss=0.2467, over 5775844.60 frames. ], batch size: 100, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:54:32,812 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.33 vs. limit=15.0 +2024-09-18 12:55:04,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=446880.0, ans=0.1 +2024-09-18 12:55:04,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=446880.0, ans=0.1 +2024-09-18 12:55:24,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=446920.0, ans=0.125 +2024-09-18 12:55:28,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=446920.0, ans=0.1 +2024-09-18 12:55:29,606 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.71 vs. limit=22.5 +2024-09-18 12:55:31,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=446960.0, ans=0.0 +2024-09-18 12:55:32,202 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.38 vs. limit=15.0 +2024-09-18 12:55:36,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=446960.0, ans=0.125 +2024-09-18 12:55:48,326 INFO [train.py:1198] (1/2) Epoch 25, batch 3150, loss[loss=0.2537, ctc_loss=0.1275, cr_loss=0.3817, attn_decoder_loss=0.2593, over 28859.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.128, cr_loss=0.3706, attn_decoder_loss=0.2466, over 5781162.40 frames. ], batch size: 104, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:56:05,069 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.834e+01 8.618e+01 9.043e+01 9.824e+01 1.542e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 12:56:05,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=447040.0, ans=0.025 +2024-09-18 12:56:11,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=447040.0, ans=0.1 +2024-09-18 12:56:21,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=447080.0, ans=0.025 +2024-09-18 12:56:53,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=447160.0, ans=0.04949747468305833 +2024-09-18 12:56:56,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=447160.0, ans=0.0 +2024-09-18 12:57:01,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.12 vs. limit=15.0 +2024-09-18 12:57:02,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=447200.0, ans=0.125 +2024-09-18 12:57:04,420 INFO [train.py:1198] (1/2) Epoch 25, batch 3200, loss[loss=0.2409, ctc_loss=0.1257, cr_loss=0.3542, attn_decoder_loss=0.2458, over 29428.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1275, cr_loss=0.3696, attn_decoder_loss=0.2462, over 5791470.18 frames. ], batch size: 79, lr: 4.42e-03, grad_scale: 16.0 +2024-09-18 12:57:14,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=447200.0, ans=0.125 +2024-09-18 12:57:34,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=447240.0, ans=0.0 +2024-09-18 12:57:44,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=447280.0, ans=0.125 +2024-09-18 12:58:08,344 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.52 vs. limit=12.0 +2024-09-18 12:58:22,733 INFO [train.py:1198] (1/2) Epoch 25, batch 3250, loss[loss=0.2526, ctc_loss=0.138, cr_loss=0.391, attn_decoder_loss=0.2566, over 29707.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1277, cr_loss=0.3704, attn_decoder_loss=0.2464, over 5799256.55 frames. ], batch size: 84, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:58:27,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=447400.0, ans=0.2 +2024-09-18 12:58:40,939 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.602e+01 9.212e+01 9.778e+01 1.600e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-18 12:58:42,805 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 12:59:13,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=447520.0, ans=0.1 +2024-09-18 12:59:15,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=447520.0, ans=0.0 +2024-09-18 12:59:27,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-18 12:59:32,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_na.min_abs, batch_count=447560.0, ans=0.02 +2024-09-18 12:59:38,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=447560.0, ans=0.0 +2024-09-18 12:59:40,960 INFO [train.py:1198] (1/2) Epoch 25, batch 3300, loss[loss=0.2572, ctc_loss=0.1298, cr_loss=0.3601, attn_decoder_loss=0.2633, over 28252.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.127, cr_loss=0.3689, attn_decoder_loss=0.2454, over 5797135.82 frames. ], batch size: 111, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 12:59:41,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=447600.0, ans=0.0 +2024-09-18 12:59:49,580 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.35 vs. limit=15.0 +2024-09-18 12:59:50,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=447600.0, ans=0.025 +2024-09-18 12:59:53,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=447600.0, ans=0.0 +2024-09-18 12:59:57,165 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.53 vs. limit=22.5 +2024-09-18 13:00:07,253 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:00:10,597 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.41 vs. limit=12.0 +2024-09-18 13:00:19,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=447680.0, ans=0.125 +2024-09-18 13:00:19,703 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.82 vs. limit=15.0 +2024-09-18 13:00:46,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=447760.0, ans=0.1 +2024-09-18 13:00:58,871 INFO [train.py:1198] (1/2) Epoch 25, batch 3350, loss[loss=0.2573, ctc_loss=0.1414, cr_loss=0.3891, attn_decoder_loss=0.2616, over 28896.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1277, cr_loss=0.3703, attn_decoder_loss=0.2461, over 5773554.05 frames. ], batch size: 104, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:01:00,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=447800.0, ans=0.025 +2024-09-18 13:01:17,274 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.442e+01 8.840e+01 9.298e+01 1.002e+02 3.178e+02, threshold=1.860e+02, percent-clipped=4.0 +2024-09-18 13:01:18,256 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.11 vs. limit=15.0 +2024-09-18 13:01:22,929 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.72 vs. limit=15.0 +2024-09-18 13:01:25,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.08 vs. limit=15.0 +2024-09-18 13:01:25,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=15.0 +2024-09-18 13:01:26,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=447840.0, ans=0.125 +2024-09-18 13:01:40,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=447880.0, ans=0.09899494936611666 +2024-09-18 13:01:44,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=447920.0, ans=0.07 +2024-09-18 13:01:53,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.05 vs. limit=10.0 +2024-09-18 13:02:07,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=447960.0, ans=0.125 +2024-09-18 13:02:12,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=447960.0, ans=0.2 +2024-09-18 13:02:22,690 INFO [train.py:1198] (1/2) Epoch 25, batch 3400, loss[loss=0.2161, ctc_loss=0.1218, cr_loss=0.3638, attn_decoder_loss=0.2185, over 29358.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1283, cr_loss=0.3719, attn_decoder_loss=0.2466, over 5766616.52 frames. ], batch size: 67, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:02:46,885 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.13 vs. limit=6.0 +2024-09-18 13:02:49,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=448040.0, ans=0.125 +2024-09-18 13:02:55,596 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:03:13,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=448120.0, ans=0.125 +2024-09-18 13:03:21,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=448120.0, ans=0.05 +2024-09-18 13:03:38,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=448160.0, ans=0.125 +2024-09-18 13:03:40,800 INFO [train.py:1198] (1/2) Epoch 25, batch 3450, loss[loss=0.2508, ctc_loss=0.1291, cr_loss=0.3668, attn_decoder_loss=0.2561, over 28346.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1282, cr_loss=0.3717, attn_decoder_loss=0.2466, over 5775259.49 frames. ], batch size: 111, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:03:49,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.10 vs. limit=10.0 +2024-09-18 13:03:58,830 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.900e+01 8.450e+01 9.075e+01 9.587e+01 1.383e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 13:04:12,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=448280.0, ans=0.125 +2024-09-18 13:04:17,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=448280.0, ans=0.07 +2024-09-18 13:04:40,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_na.min_abs, batch_count=448360.0, ans=0.02 +2024-09-18 13:04:58,610 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.61 vs. limit=22.5 +2024-09-18 13:04:58,914 INFO [train.py:1198] (1/2) Epoch 25, batch 3500, loss[loss=0.2197, ctc_loss=0.1084, cr_loss=0.3317, attn_decoder_loss=0.2247, over 29309.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1283, cr_loss=0.3719, attn_decoder_loss=0.2463, over 5777337.69 frames. ], batch size: 71, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:05:00,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=448400.0, ans=0.125 +2024-09-18 13:05:08,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=448400.0, ans=0.125 +2024-09-18 13:05:23,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=448440.0, ans=0.125 +2024-09-18 13:05:25,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=448440.0, ans=0.07 +2024-09-18 13:05:32,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=448480.0, ans=0.1 +2024-09-18 13:05:40,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=448480.0, ans=0.025 +2024-09-18 13:05:41,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=448480.0, ans=0.125 +2024-09-18 13:05:57,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=448560.0, ans=0.2 +2024-09-18 13:06:00,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-18 13:06:05,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=448560.0, ans=0.2 +2024-09-18 13:06:13,959 INFO [train.py:1198] (1/2) Epoch 25, batch 3550, loss[loss=0.2607, ctc_loss=0.14, cr_loss=0.4004, attn_decoder_loss=0.2652, over 29707.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1278, cr_loss=0.371, attn_decoder_loss=0.2461, over 5783590.36 frames. ], batch size: 89, lr: 4.42e-03, grad_scale: 8.0 +2024-09-18 13:06:17,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=448600.0, ans=0.5 +2024-09-18 13:06:27,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=12.0 +2024-09-18 13:06:31,448 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.716e+01 8.657e+01 9.167e+01 9.744e+01 2.782e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-18 13:06:33,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=448640.0, ans=0.2 +2024-09-18 13:06:46,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=448680.0, ans=0.025 +2024-09-18 13:06:48,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=448680.0, ans=0.125 +2024-09-18 13:06:48,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.62 vs. limit=15.0 +2024-09-18 13:07:28,556 INFO [train.py:1198] (1/2) Epoch 25, batch 3600, loss[loss=0.2393, ctc_loss=0.1295, cr_loss=0.3715, attn_decoder_loss=0.2433, over 29499.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1279, cr_loss=0.3714, attn_decoder_loss=0.2462, over 5792523.34 frames. ], batch size: 77, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:07:54,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=448840.0, ans=0.05 +2024-09-18 13:08:14,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=448920.0, ans=0.125 +2024-09-18 13:08:17,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=448920.0, ans=0.0 +2024-09-18 13:08:18,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=448920.0, ans=0.125 +2024-09-18 13:08:44,785 INFO [train.py:1198] (1/2) Epoch 25, batch 3650, loss[loss=0.2543, ctc_loss=0.1353, cr_loss=0.4022, attn_decoder_loss=0.2585, over 29489.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1275, cr_loss=0.3709, attn_decoder_loss=0.2459, over 5794062.23 frames. ], batch size: 90, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:08:46,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=449000.0, ans=0.2 +2024-09-18 13:08:52,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=449000.0, ans=0.2 +2024-09-18 13:08:58,730 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.68 vs. limit=15.0 +2024-09-18 13:09:02,519 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.539e+01 8.955e+01 9.424e+01 1.447e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 13:09:02,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=449040.0, ans=0.07 +2024-09-18 13:09:04,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=449040.0, ans=0.2 +2024-09-18 13:09:06,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.14 vs. limit=6.0 +2024-09-18 13:09:11,092 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.07 vs. limit=15.0 +2024-09-18 13:09:21,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=449080.0, ans=0.0 +2024-09-18 13:09:26,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.62 vs. limit=8.0 +2024-09-18 13:09:58,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=449200.0, ans=0.025 +2024-09-18 13:09:59,591 INFO [train.py:1198] (1/2) Epoch 25, batch 3700, loss[loss=0.248, ctc_loss=0.1395, cr_loss=0.3928, attn_decoder_loss=0.2513, over 29697.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1279, cr_loss=0.3719, attn_decoder_loss=0.2464, over 5804408.29 frames. ], batch size: 84, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:09:59,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=449200.0, ans=0.125 +2024-09-18 13:10:06,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=449200.0, ans=0.1 +2024-09-18 13:10:13,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=449240.0, ans=0.125 +2024-09-18 13:10:31,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=449280.0, ans=0.2 +2024-09-18 13:10:31,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=449280.0, ans=0.125 +2024-09-18 13:10:32,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=449280.0, ans=0.1 +2024-09-18 13:10:32,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=449280.0, ans=0.0 +2024-09-18 13:10:35,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=449280.0, ans=0.1 +2024-09-18 13:10:47,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=449320.0, ans=0.0 +2024-09-18 13:10:59,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=449360.0, ans=0.0 +2024-09-18 13:11:01,618 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.54 vs. limit=12.0 +2024-09-18 13:11:01,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.64 vs. limit=15.0 +2024-09-18 13:11:07,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=449360.0, ans=0.1 +2024-09-18 13:11:13,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=449360.0, ans=0.2 +2024-09-18 13:11:16,099 INFO [train.py:1198] (1/2) Epoch 25, batch 3750, loss[loss=0.2189, ctc_loss=0.1145, cr_loss=0.3291, attn_decoder_loss=0.2232, over 29373.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1277, cr_loss=0.3711, attn_decoder_loss=0.2459, over 5808015.06 frames. ], batch size: 67, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:11:19,566 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:11:31,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=449440.0, ans=0.125 +2024-09-18 13:11:35,579 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.392e+01 8.983e+01 9.467e+01 5.174e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 13:11:36,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.78 vs. limit=15.0 +2024-09-18 13:11:40,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=449440.0, ans=0.125 +2024-09-18 13:11:41,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=449440.0, ans=0.0 +2024-09-18 13:12:29,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=449600.0, ans=0.125 +2024-09-18 13:12:31,194 INFO [train.py:1198] (1/2) Epoch 25, batch 3800, loss[loss=0.2569, ctc_loss=0.1364, cr_loss=0.3645, attn_decoder_loss=0.2621, over 29650.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1274, cr_loss=0.3704, attn_decoder_loss=0.2455, over 5797762.81 frames. ], batch size: 86, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:12:37,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=449600.0, ans=0.125 +2024-09-18 13:12:44,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=449640.0, ans=0.0 +2024-09-18 13:12:53,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=449640.0, ans=0.125 +2024-09-18 13:13:05,155 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.43 vs. limit=15.0 +2024-09-18 13:13:47,360 INFO [train.py:1198] (1/2) Epoch 25, batch 3850, loss[loss=0.2581, ctc_loss=0.1351, cr_loss=0.388, attn_decoder_loss=0.2631, over 29226.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1274, cr_loss=0.3704, attn_decoder_loss=0.2455, over 5811411.39 frames. ], batch size: 100, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:14:06,410 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.121e+01 8.692e+01 9.184e+01 9.971e+01 1.957e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 13:14:25,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.96 vs. limit=15.0 +2024-09-18 13:14:42,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=449920.0, ans=0.1 +2024-09-18 13:14:48,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=449960.0, ans=0.1 +2024-09-18 13:15:02,133 INFO [train.py:1198] (1/2) Epoch 25, batch 3900, loss[loss=0.2481, ctc_loss=0.1239, cr_loss=0.366, attn_decoder_loss=0.2537, over 29654.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1276, cr_loss=0.3702, attn_decoder_loss=0.2457, over 5815723.19 frames. ], batch size: 86, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:15:28,219 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.65 vs. limit=15.0 +2024-09-18 13:15:41,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.73 vs. limit=6.0 +2024-09-18 13:15:43,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=450080.0, ans=0.0 +2024-09-18 13:15:47,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=450120.0, ans=0.125 +2024-09-18 13:16:09,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=450160.0, ans=0.125 +2024-09-18 13:16:14,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.80 vs. limit=10.0 +2024-09-18 13:16:16,560 INFO [train.py:1198] (1/2) Epoch 25, batch 3950, loss[loss=0.2566, ctc_loss=0.1407, cr_loss=0.3944, attn_decoder_loss=0.2608, over 29542.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1273, cr_loss=0.3702, attn_decoder_loss=0.2458, over 5835357.47 frames. ], batch size: 97, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:16:20,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=450200.0, ans=0.0 +2024-09-18 13:16:37,517 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.544e+01 9.055e+01 9.627e+01 1.387e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 13:17:01,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=450320.0, ans=0.0 +2024-09-18 13:17:06,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.96 vs. limit=22.5 +2024-09-18 13:17:11,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=450320.0, ans=0.125 +2024-09-18 13:17:11,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=450320.0, ans=0.2 +2024-09-18 13:17:23,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=450360.0, ans=0.125 +2024-09-18 13:17:26,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=450360.0, ans=0.0 +2024-09-18 13:17:32,502 INFO [train.py:1198] (1/2) Epoch 25, batch 4000, loss[loss=0.2278, ctc_loss=0.1197, cr_loss=0.3491, attn_decoder_loss=0.232, over 29488.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1278, cr_loss=0.3705, attn_decoder_loss=0.2458, over 5812345.89 frames. ], batch size: 74, lr: 4.41e-03, grad_scale: 16.0 +2024-09-18 13:17:59,828 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.21 vs. limit=15.0 +2024-09-18 13:18:18,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=450520.0, ans=0.025 +2024-09-18 13:18:33,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=450560.0, ans=0.2 +2024-09-18 13:18:45,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=450560.0, ans=0.2 +2024-09-18 13:18:48,056 INFO [train.py:1198] (1/2) Epoch 25, batch 4050, loss[loss=0.2645, ctc_loss=0.1609, cr_loss=0.3943, attn_decoder_loss=0.2672, over 19985.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1276, cr_loss=0.3701, attn_decoder_loss=0.2458, over 5796157.98 frames. ], batch size: 209, lr: 4.41e-03, grad_scale: 8.0 +2024-09-18 13:19:08,364 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.851e+01 9.697e+01 1.095e+02 3.076e+02, threshold=1.939e+02, percent-clipped=2.0 +2024-09-18 13:19:22,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.31 vs. limit=12.0 +2024-09-18 13:19:37,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=450720.0, ans=0.025 +2024-09-18 13:19:49,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=450760.0, ans=0.125 +2024-09-18 13:19:49,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=450760.0, ans=0.0 +2024-09-18 13:19:55,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=450760.0, ans=0.0 +2024-09-18 13:19:55,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=450760.0, ans=0.125 +2024-09-18 13:20:00,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.44 vs. limit=15.0 +2024-09-18 13:20:01,363 INFO [train.py:1198] (1/2) Epoch 25, batch 4100, loss[loss=0.2498, ctc_loss=0.1383, cr_loss=0.3961, attn_decoder_loss=0.2534, over 29536.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1276, cr_loss=0.3702, attn_decoder_loss=0.2459, over 5791896.57 frames. ], batch size: 90, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:20:13,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=450800.0, ans=0.1 +2024-09-18 13:20:14,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=450840.0, ans=0.125 +2024-09-18 13:20:19,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=450840.0, ans=0.125 +2024-09-18 13:20:22,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=450840.0, ans=0.0 +2024-09-18 13:20:28,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.17 vs. limit=15.0 +2024-09-18 13:20:33,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=450880.0, ans=0.025 +2024-09-18 13:20:37,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.10 vs. limit=6.0 +2024-09-18 13:20:38,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=450880.0, ans=0.125 +2024-09-18 13:20:51,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=450920.0, ans=0.125 +2024-09-18 13:20:51,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=450920.0, ans=22.5 +2024-09-18 13:21:16,046 INFO [train.py:1198] (1/2) Epoch 25, batch 4150, loss[loss=0.2302, ctc_loss=0.1189, cr_loss=0.354, attn_decoder_loss=0.2347, over 29537.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1274, cr_loss=0.3698, attn_decoder_loss=0.2457, over 5798040.81 frames. ], batch size: 77, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:21:19,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=451000.0, ans=0.125 +2024-09-18 13:21:23,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=451000.0, ans=0.0 +2024-09-18 13:21:32,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=451040.0, ans=0.125 +2024-09-18 13:21:36,739 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.849e+01 8.352e+01 8.963e+01 9.819e+01 3.617e+02, threshold=1.793e+02, percent-clipped=2.0 +2024-09-18 13:21:41,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=451040.0, ans=0.2 +2024-09-18 13:22:03,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=451120.0, ans=0.0 +2024-09-18 13:22:07,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=451120.0, ans=0.0 +2024-09-18 13:22:18,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=451160.0, ans=0.125 +2024-09-18 13:22:30,266 INFO [train.py:1198] (1/2) Epoch 25, batch 4200, loss[loss=0.263, ctc_loss=0.1458, cr_loss=0.3952, attn_decoder_loss=0.2672, over 29470.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1278, cr_loss=0.3709, attn_decoder_loss=0.2461, over 5799972.56 frames. ], batch size: 90, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:22:53,265 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.52 vs. limit=15.0 +2024-09-18 13:23:22,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=451320.0, ans=0.125 +2024-09-18 13:23:41,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=451360.0, ans=0.1 +2024-09-18 13:23:42,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=451360.0, ans=0.125 +2024-09-18 13:23:45,271 INFO [train.py:1198] (1/2) Epoch 25, batch 4250, loss[loss=0.231, ctc_loss=0.1168, cr_loss=0.3481, attn_decoder_loss=0.236, over 29489.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1276, cr_loss=0.3704, attn_decoder_loss=0.2464, over 5806422.65 frames. ], batch size: 74, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:23:47,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.90 vs. limit=15.0 +2024-09-18 13:24:05,762 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.410e+01 8.848e+01 9.485e+01 3.555e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-18 13:24:18,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.34 vs. limit=15.0 +2024-09-18 13:24:39,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=451520.0, ans=0.0 +2024-09-18 13:24:47,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.54 vs. limit=22.5 +2024-09-18 13:24:48,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=451560.0, ans=0.125 +2024-09-18 13:24:51,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=451560.0, ans=0.125 +2024-09-18 13:24:59,840 INFO [train.py:1198] (1/2) Epoch 25, batch 4300, loss[loss=0.261, ctc_loss=0.1421, cr_loss=0.4006, attn_decoder_loss=0.2653, over 29516.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1271, cr_loss=0.3691, attn_decoder_loss=0.2465, over 5795780.32 frames. ], batch size: 87, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:25:28,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=451680.0, ans=0.0 +2024-09-18 13:25:30,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.28 vs. limit=15.0 +2024-09-18 13:26:05,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=451760.0, ans=0.0 +2024-09-18 13:26:07,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=451760.0, ans=0.0 +2024-09-18 13:26:14,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=451800.0, ans=0.0 +2024-09-18 13:26:15,307 INFO [train.py:1198] (1/2) Epoch 25, batch 4350, loss[loss=0.2542, ctc_loss=0.1339, cr_loss=0.3924, attn_decoder_loss=0.2588, over 29476.00 frames. ], tot_loss[loss=0.2453, ctc_loss=0.1299, cr_loss=0.3754, attn_decoder_loss=0.2498, over 5797801.06 frames. ], batch size: 97, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:26:34,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.38 vs. limit=15.0 +2024-09-18 13:26:36,114 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.060e+01 8.772e+01 9.206e+01 9.719e+01 3.076e+02, threshold=1.841e+02, percent-clipped=2.0 +2024-09-18 13:26:41,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.14 vs. limit=15.0 +2024-09-18 13:26:42,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=451840.0, ans=0.125 +2024-09-18 13:26:46,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=451880.0, ans=0.025 +2024-09-18 13:26:51,662 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-18 13:27:05,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=451920.0, ans=0.1 +2024-09-18 13:27:07,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=451920.0, ans=0.0 +2024-09-18 13:27:08,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=451920.0, ans=0.2 +2024-09-18 13:27:08,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=451920.0, ans=0.125 +2024-09-18 13:27:16,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=451960.0, ans=0.0 +2024-09-18 13:27:23,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=451960.0, ans=0.1 +2024-09-18 13:27:24,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.75 vs. limit=6.0 +2024-09-18 13:27:29,899 INFO [train.py:1198] (1/2) Epoch 25, batch 4400, loss[loss=0.2488, ctc_loss=0.134, cr_loss=0.3881, attn_decoder_loss=0.2529, over 27346.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1314, cr_loss=0.3778, attn_decoder_loss=0.2518, over 5765789.45 frames. ], batch size: 124, lr: 4.40e-03, grad_scale: 16.0 +2024-09-18 13:27:56,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=452040.0, ans=0.2 +2024-09-18 13:28:00,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=452080.0, ans=0.1 +2024-09-18 13:28:12,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=452120.0, ans=10.0 +2024-09-18 13:28:18,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=452120.0, ans=0.2 +2024-09-18 13:28:22,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.31 vs. limit=15.0 +2024-09-18 13:28:24,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=452120.0, ans=0.2 +2024-09-18 13:28:30,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=452160.0, ans=0.125 +2024-09-18 13:28:44,345 INFO [train.py:1198] (1/2) Epoch 25, batch 4450, loss[loss=0.2657, ctc_loss=0.1607, cr_loss=0.4128, attn_decoder_loss=0.2682, over 20996.00 frames. ], tot_loss[loss=0.2499, ctc_loss=0.1356, cr_loss=0.3832, attn_decoder_loss=0.2541, over 5572957.68 frames. ], batch size: 209, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:28:53,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=452200.0, ans=0.0 +2024-09-18 13:28:58,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=452240.0, ans=0.1 +2024-09-18 13:29:07,250 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.138e+01 9.104e+01 9.870e+01 1.187e+02 3.111e+02, threshold=1.974e+02, percent-clipped=3.0 +2024-09-18 13:29:23,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.83 vs. limit=8.0 +2024-09-18 13:29:25,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=452280.0, ans=0.1 +2024-09-18 13:29:28,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=452320.0, ans=0.0 +2024-09-18 13:29:33,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=452320.0, ans=0.125 +2024-09-18 13:29:46,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=452360.0, ans=0.125 +2024-09-18 13:30:00,169 INFO [train.py:1198] (1/2) Epoch 25, batch 4500, loss[loss=0.2606, ctc_loss=0.1562, cr_loss=0.3974, attn_decoder_loss=0.2634, over 19792.00 frames. ], tot_loss[loss=0.2524, ctc_loss=0.1396, cr_loss=0.3863, attn_decoder_loss=0.2564, over 5231045.83 frames. ], batch size: 210, lr: 4.40e-03, grad_scale: 8.0 +2024-09-18 13:30:01,340 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.90 vs. limit=12.0 +2024-09-18 13:30:18,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=452440.0, ans=0.025 +2024-09-18 13:30:23,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-18 13:31:31,900 INFO [train.py:1198] (1/2) Epoch 26, batch 0, loss[loss=0.2164, ctc_loss=0.1106, cr_loss=0.3274, attn_decoder_loss=0.2209, over 29632.00 frames. ], tot_loss[loss=0.2164, ctc_loss=0.1106, cr_loss=0.3274, attn_decoder_loss=0.2209, over 29632.00 frames. ], batch size: 73, lr: 4.31e-03, grad_scale: 16.0 +2024-09-18 13:31:31,901 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 13:31:52,390 INFO [train.py:1230] (1/2) Epoch 26, validation: loss=0.2126, ctc_loss=0.03779, cr_loss=5.994e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-18 13:31:52,391 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 13:31:58,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=452500.0, ans=0.0 +2024-09-18 13:32:06,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=452540.0, ans=0.1 +2024-09-18 13:32:40,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=452620.0, ans=0.0 +2024-09-18 13:32:40,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=452620.0, ans=0.1 +2024-09-18 13:32:51,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=452660.0, ans=0.0 +2024-09-18 13:32:52,740 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 9.299e+01 1.068e+02 1.174e+02 2.339e+02, threshold=2.135e+02, percent-clipped=1.0 +2024-09-18 13:32:53,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.48 vs. limit=15.0 +2024-09-18 13:33:07,811 INFO [train.py:1198] (1/2) Epoch 26, batch 50, loss[loss=0.2145, ctc_loss=0.1051, cr_loss=0.3168, attn_decoder_loss=0.2196, over 29442.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1305, cr_loss=0.3752, attn_decoder_loss=0.2474, over 1267583.64 frames. ], batch size: 70, lr: 4.31e-03, grad_scale: 16.0 +2024-09-18 13:33:11,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=452700.0, ans=0.0 +2024-09-18 13:33:21,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=452740.0, ans=0.125 +2024-09-18 13:33:26,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=452740.0, ans=0.0 +2024-09-18 13:33:52,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=452820.0, ans=0.0 +2024-09-18 13:34:03,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.57 vs. limit=15.0 +2024-09-18 13:34:07,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=452860.0, ans=0.125 +2024-09-18 13:34:21,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=452860.0, ans=0.0 +2024-09-18 13:34:24,116 INFO [train.py:1198] (1/2) Epoch 26, batch 100, loss[loss=0.2258, ctc_loss=0.1187, cr_loss=0.3465, attn_decoder_loss=0.23, over 29524.00 frames. ], tot_loss[loss=0.2456, ctc_loss=0.132, cr_loss=0.3794, attn_decoder_loss=0.2498, over 2252577.39 frames. ], batch size: 76, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:34:27,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=452900.0, ans=0.1 +2024-09-18 13:34:48,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=452940.0, ans=0.125 +2024-09-18 13:34:55,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=452980.0, ans=0.2 +2024-09-18 13:34:58,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=452980.0, ans=0.2 +2024-09-18 13:35:00,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=452980.0, ans=0.125 +2024-09-18 13:35:01,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=452980.0, ans=0.1 +2024-09-18 13:35:25,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=453060.0, ans=0.2 +2024-09-18 13:35:27,636 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.544e+01 8.982e+01 9.348e+01 1.241e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-18 13:35:36,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.62 vs. limit=15.0 +2024-09-18 13:35:40,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=453060.0, ans=0.125 +2024-09-18 13:35:43,441 INFO [train.py:1198] (1/2) Epoch 26, batch 150, loss[loss=0.2075, ctc_loss=0.09893, cr_loss=0.3165, attn_decoder_loss=0.2125, over 29456.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1296, cr_loss=0.376, attn_decoder_loss=0.2474, over 3046642.94 frames. ], batch size: 70, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:35:44,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.56 vs. limit=10.0 +2024-09-18 13:35:57,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=453140.0, ans=0.125 +2024-09-18 13:36:29,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=453220.0, ans=0.0 +2024-09-18 13:36:47,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=453260.0, ans=0.125 +2024-09-18 13:36:58,963 INFO [train.py:1198] (1/2) Epoch 26, batch 200, loss[loss=0.2513, ctc_loss=0.1345, cr_loss=0.3688, attn_decoder_loss=0.256, over 27384.00 frames. ], tot_loss[loss=0.2423, ctc_loss=0.1292, cr_loss=0.3752, attn_decoder_loss=0.2465, over 3657986.46 frames. ], batch size: 125, lr: 4.31e-03, grad_scale: 8.0 +2024-09-18 13:37:16,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.29 vs. limit=15.0 +2024-09-18 13:37:27,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=453380.0, ans=0.0 +2024-09-18 13:37:30,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=453380.0, ans=0.07 +2024-09-18 13:37:42,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=453420.0, ans=0.125 +2024-09-18 13:38:00,826 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.772e+01 8.385e+01 8.934e+01 9.482e+01 1.708e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 13:38:02,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_positive, batch_count=453460.0, ans=0.05 +2024-09-18 13:38:14,316 INFO [train.py:1198] (1/2) Epoch 26, batch 250, loss[loss=0.2558, ctc_loss=0.1321, cr_loss=0.379, attn_decoder_loss=0.2611, over 29287.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1282, cr_loss=0.3733, attn_decoder_loss=0.2462, over 4140979.26 frames. ], batch size: 100, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:38:47,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=453580.0, ans=0.1 +2024-09-18 13:38:57,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=453580.0, ans=0.125 +2024-09-18 13:39:05,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=453620.0, ans=0.125 +2024-09-18 13:39:09,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=453620.0, ans=0.09899494936611666 +2024-09-18 13:39:18,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=453660.0, ans=0.025 +2024-09-18 13:39:34,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=453700.0, ans=0.125 +2024-09-18 13:39:35,290 INFO [train.py:1198] (1/2) Epoch 26, batch 300, loss[loss=0.2509, ctc_loss=0.1299, cr_loss=0.3904, attn_decoder_loss=0.2556, over 29587.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1277, cr_loss=0.3719, attn_decoder_loss=0.246, over 4509074.61 frames. ], batch size: 92, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:40:28,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=453820.0, ans=0.025 +2024-09-18 13:40:37,318 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.478e+01 8.450e+01 8.925e+01 9.500e+01 1.325e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 13:40:50,742 INFO [train.py:1198] (1/2) Epoch 26, batch 350, loss[loss=0.212, ctc_loss=0.1126, cr_loss=0.3531, attn_decoder_loss=0.2152, over 29327.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1278, cr_loss=0.3716, attn_decoder_loss=0.2463, over 4793470.11 frames. ], batch size: 71, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:42:05,714 INFO [train.py:1198] (1/2) Epoch 26, batch 400, loss[loss=0.2484, ctc_loss=0.1341, cr_loss=0.3927, attn_decoder_loss=0.2524, over 29718.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.127, cr_loss=0.3706, attn_decoder_loss=0.2459, over 5023879.84 frames. ], batch size: 82, lr: 4.30e-03, grad_scale: 16.0 +2024-09-18 13:42:19,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=454140.0, ans=0.025 +2024-09-18 13:42:53,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=454220.0, ans=0.125 +2024-09-18 13:43:03,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=454220.0, ans=0.1 +2024-09-18 13:43:07,925 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.062e+01 8.396e+01 8.968e+01 9.786e+01 1.327e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 13:43:16,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=454260.0, ans=0.125 +2024-09-18 13:43:26,144 INFO [train.py:1198] (1/2) Epoch 26, batch 450, loss[loss=0.2394, ctc_loss=0.1263, cr_loss=0.3765, attn_decoder_loss=0.2436, over 29685.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1271, cr_loss=0.3701, attn_decoder_loss=0.2458, over 5185330.31 frames. ], batch size: 83, lr: 4.30e-03, grad_scale: 16.0 +2024-09-18 13:43:27,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=454300.0, ans=0.025 +2024-09-18 13:43:38,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=454300.0, ans=0.0 +2024-09-18 13:44:13,357 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.47 vs. limit=5.0 +2024-09-18 13:44:14,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=454420.0, ans=0.0 +2024-09-18 13:44:15,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=454420.0, ans=0.125 +2024-09-18 13:44:24,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=454420.0, ans=0.125 +2024-09-18 13:44:27,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=454460.0, ans=0.125 +2024-09-18 13:44:35,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=454460.0, ans=0.125 +2024-09-18 13:44:35,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=454460.0, ans=0.125 +2024-09-18 13:44:42,488 INFO [train.py:1198] (1/2) Epoch 26, batch 500, loss[loss=0.2564, ctc_loss=0.1434, cr_loss=0.4162, attn_decoder_loss=0.2597, over 29478.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1264, cr_loss=0.3693, attn_decoder_loss=0.2449, over 5328940.65 frames. ], batch size: 94, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:44:44,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=454500.0, ans=0.1 +2024-09-18 13:44:50,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=454500.0, ans=0.1 +2024-09-18 13:44:59,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=454540.0, ans=0.125 +2024-09-18 13:45:21,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.68 vs. limit=15.0 +2024-09-18 13:45:26,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.77 vs. limit=15.0 +2024-09-18 13:45:28,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=454620.0, ans=0.09899494936611666 +2024-09-18 13:45:35,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.48 vs. limit=15.0 +2024-09-18 13:45:45,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=454660.0, ans=0.0 +2024-09-18 13:45:46,385 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.427e+01 8.869e+01 9.503e+01 2.659e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 13:45:52,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=454660.0, ans=0.025 +2024-09-18 13:45:58,558 INFO [train.py:1198] (1/2) Epoch 26, batch 550, loss[loss=0.2525, ctc_loss=0.1381, cr_loss=0.3899, attn_decoder_loss=0.2565, over 28714.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1263, cr_loss=0.3691, attn_decoder_loss=0.245, over 5420840.42 frames. ], batch size: 104, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:46:01,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=454700.0, ans=0.2 +2024-09-18 13:46:14,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=454740.0, ans=0.125 +2024-09-18 13:46:31,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.65 vs. limit=15.0 +2024-09-18 13:46:34,553 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.00 vs. limit=15.0 +2024-09-18 13:47:16,785 INFO [train.py:1198] (1/2) Epoch 26, batch 600, loss[loss=0.2541, ctc_loss=0.1347, cr_loss=0.3979, attn_decoder_loss=0.2585, over 29286.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1263, cr_loss=0.3691, attn_decoder_loss=0.2451, over 5508757.64 frames. ], batch size: 100, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:47:44,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=454940.0, ans=0.2 +2024-09-18 13:48:22,372 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.526e+01 8.982e+01 9.575e+01 5.252e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-18 13:48:31,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=455060.0, ans=0.125 +2024-09-18 13:48:34,596 INFO [train.py:1198] (1/2) Epoch 26, batch 650, loss[loss=0.244, ctc_loss=0.1291, cr_loss=0.3713, attn_decoder_loss=0.2485, over 29777.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1257, cr_loss=0.3676, attn_decoder_loss=0.2445, over 5585849.47 frames. ], batch size: 81, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:48:35,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.20 vs. limit=15.0 +2024-09-18 13:48:37,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=455100.0, ans=0.1 +2024-09-18 13:48:39,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=455100.0, ans=0.0 +2024-09-18 13:48:59,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=455140.0, ans=0.0 +2024-09-18 13:49:03,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=455180.0, ans=0.2 +2024-09-18 13:49:37,626 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:49:46,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=455260.0, ans=0.2 +2024-09-18 13:49:49,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=455300.0, ans=0.125 +2024-09-18 13:49:50,941 INFO [train.py:1198] (1/2) Epoch 26, batch 700, loss[loss=0.229, ctc_loss=0.1145, cr_loss=0.3331, attn_decoder_loss=0.2343, over 29565.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1263, cr_loss=0.3687, attn_decoder_loss=0.2452, over 5634597.71 frames. ], batch size: 76, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:49:56,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.77 vs. limit=10.0 +2024-09-18 13:50:01,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=455300.0, ans=0.125 +2024-09-18 13:50:09,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=455340.0, ans=0.125 +2024-09-18 13:50:15,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=455340.0, ans=0.125 +2024-09-18 13:50:18,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=455340.0, ans=0.0 +2024-09-18 13:50:31,425 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.92 vs. limit=6.0 +2024-09-18 13:50:36,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=455420.0, ans=0.025 +2024-09-18 13:50:40,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.46 vs. limit=15.0 +2024-09-18 13:50:47,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=455420.0, ans=0.125 +2024-09-18 13:50:53,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=455460.0, ans=0.125 +2024-09-18 13:50:54,634 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.356e+01 8.785e+01 9.330e+01 1.328e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 13:50:55,085 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:51:00,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=455460.0, ans=0.1 +2024-09-18 13:51:04,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=455460.0, ans=0.0 +2024-09-18 13:51:06,765 INFO [train.py:1198] (1/2) Epoch 26, batch 750, loss[loss=0.2427, ctc_loss=0.1251, cr_loss=0.3597, attn_decoder_loss=0.2478, over 29725.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.126, cr_loss=0.3685, attn_decoder_loss=0.2449, over 5674455.42 frames. ], batch size: 82, lr: 4.30e-03, grad_scale: 8.0 +2024-09-18 13:51:07,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=455500.0, ans=0.125 +2024-09-18 13:51:39,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=455580.0, ans=0.1 +2024-09-18 13:52:00,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=455620.0, ans=0.0 +2024-09-18 13:52:14,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=455660.0, ans=0.1 +2024-09-18 13:52:25,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=455700.0, ans=0.0 +2024-09-18 13:52:26,854 INFO [train.py:1198] (1/2) Epoch 26, batch 800, loss[loss=0.2197, ctc_loss=0.1177, cr_loss=0.3493, attn_decoder_loss=0.2233, over 29589.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1261, cr_loss=0.3687, attn_decoder_loss=0.2449, over 5705936.46 frames. ], batch size: 73, lr: 4.29e-03, grad_scale: 16.0 +2024-09-18 13:52:30,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=455700.0, ans=0.2 +2024-09-18 13:52:31,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=455700.0, ans=0.0 +2024-09-18 13:53:12,422 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:53:24,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=455820.0, ans=0.125 +2024-09-18 13:53:30,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=455860.0, ans=0.0 +2024-09-18 13:53:31,731 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.399e+01 8.438e+01 9.008e+01 9.520e+01 4.430e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 13:53:42,296 INFO [train.py:1198] (1/2) Epoch 26, batch 850, loss[loss=0.2559, ctc_loss=0.1311, cr_loss=0.3675, attn_decoder_loss=0.2616, over 29732.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1262, cr_loss=0.3687, attn_decoder_loss=0.2449, over 5736299.98 frames. ], batch size: 89, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:54:31,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=456020.0, ans=0.0 +2024-09-18 13:54:39,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=456020.0, ans=0.125 +2024-09-18 13:54:55,939 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:54:58,545 INFO [train.py:1198] (1/2) Epoch 26, batch 900, loss[loss=0.2314, ctc_loss=0.1166, cr_loss=0.3672, attn_decoder_loss=0.236, over 29638.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1265, cr_loss=0.3692, attn_decoder_loss=0.2451, over 5739524.24 frames. ], batch size: 73, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:55:03,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=456100.0, ans=0.125 +2024-09-18 13:55:47,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=456220.0, ans=0.125 +2024-09-18 13:55:53,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=456220.0, ans=0.2 +2024-09-18 13:55:56,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=456220.0, ans=0.0 +2024-09-18 13:56:05,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=456260.0, ans=0.0 +2024-09-18 13:56:07,881 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.077e+01 8.650e+01 9.071e+01 9.568e+01 1.657e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 13:56:11,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=456260.0, ans=0.0 +2024-09-18 13:56:18,480 INFO [train.py:1198] (1/2) Epoch 26, batch 950, loss[loss=0.2232, ctc_loss=0.1091, cr_loss=0.3182, attn_decoder_loss=0.2288, over 29486.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1264, cr_loss=0.3685, attn_decoder_loss=0.2451, over 5741178.81 frames. ], batch size: 74, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:56:42,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=456340.0, ans=0.125 +2024-09-18 13:56:55,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.70 vs. limit=10.0 +2024-09-18 13:57:00,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=456380.0, ans=0.125 +2024-09-18 13:57:02,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=456420.0, ans=0.07 +2024-09-18 13:57:06,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=456420.0, ans=0.125 +2024-09-18 13:57:33,408 INFO [train.py:1198] (1/2) Epoch 26, batch 1000, loss[loss=0.2234, ctc_loss=0.1072, cr_loss=0.326, attn_decoder_loss=0.2291, over 29502.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1274, cr_loss=0.3705, attn_decoder_loss=0.2459, over 5735840.49 frames. ], batch size: 77, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:57:35,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=456500.0, ans=0.125 +2024-09-18 13:57:44,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=456500.0, ans=0.2 +2024-09-18 13:58:05,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=456580.0, ans=0.0 +2024-09-18 13:58:18,604 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.24 vs. limit=15.0 +2024-09-18 13:58:38,768 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.192e+01 8.379e+01 9.044e+01 9.595e+01 2.964e+02, threshold=1.809e+02, percent-clipped=3.0 +2024-09-18 13:58:40,602 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 13:58:43,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=456660.0, ans=0.125 +2024-09-18 13:58:49,367 INFO [train.py:1198] (1/2) Epoch 26, batch 1050, loss[loss=0.2557, ctc_loss=0.1292, cr_loss=0.3653, attn_decoder_loss=0.2616, over 29704.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1268, cr_loss=0.3696, attn_decoder_loss=0.2453, over 5744590.59 frames. ], batch size: 85, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 13:58:49,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=456700.0, ans=0.09899494936611666 +2024-09-18 13:58:55,592 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.11 vs. limit=15.0 +2024-09-18 13:58:56,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=456700.0, ans=0.125 +2024-09-18 13:58:56,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=456700.0, ans=0.125 +2024-09-18 13:58:59,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=456700.0, ans=0.04949747468305833 +2024-09-18 13:59:10,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=456740.0, ans=0.125 +2024-09-18 13:59:56,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.68 vs. limit=10.0 +2024-09-18 14:00:10,565 INFO [train.py:1198] (1/2) Epoch 26, batch 1100, loss[loss=0.2329, ctc_loss=0.1286, cr_loss=0.3874, attn_decoder_loss=0.2359, over 29457.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1267, cr_loss=0.3691, attn_decoder_loss=0.2453, over 5756399.94 frames. ], batch size: 78, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:00:32,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=456940.0, ans=0.125 +2024-09-18 14:00:38,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=456940.0, ans=0.0 +2024-09-18 14:00:45,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=456980.0, ans=0.125 +2024-09-18 14:00:56,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=457020.0, ans=0.125 +2024-09-18 14:01:07,553 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-18 14:01:15,842 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.069e+01 8.599e+01 9.010e+01 9.619e+01 1.920e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-18 14:01:26,680 INFO [train.py:1198] (1/2) Epoch 26, batch 1150, loss[loss=0.2272, ctc_loss=0.1157, cr_loss=0.3536, attn_decoder_loss=0.2318, over 29449.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1268, cr_loss=0.3698, attn_decoder_loss=0.2454, over 5756156.56 frames. ], batch size: 78, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:01:28,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=457100.0, ans=0.05 +2024-09-18 14:01:41,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.67 vs. limit=15.0 +2024-09-18 14:01:53,439 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.19 vs. limit=22.5 +2024-09-18 14:01:54,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=457140.0, ans=0.025 +2024-09-18 14:02:07,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.76 vs. limit=22.5 +2024-09-18 14:02:11,982 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.15 vs. limit=22.5 +2024-09-18 14:02:28,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.26 vs. limit=15.0 +2024-09-18 14:02:44,782 INFO [train.py:1198] (1/2) Epoch 26, batch 1200, loss[loss=0.246, ctc_loss=0.123, cr_loss=0.353, attn_decoder_loss=0.2519, over 29667.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1274, cr_loss=0.37, attn_decoder_loss=0.2464, over 5747629.28 frames. ], batch size: 85, lr: 4.29e-03, grad_scale: 16.0 +2024-09-18 14:03:01,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=457340.0, ans=0.05 +2024-09-18 14:03:10,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=457340.0, ans=0.125 +2024-09-18 14:03:19,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=457380.0, ans=0.1 +2024-09-18 14:03:20,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.10 vs. limit=15.0 +2024-09-18 14:03:40,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=457420.0, ans=0.0 +2024-09-18 14:03:43,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=457420.0, ans=0.125 +2024-09-18 14:03:44,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=457420.0, ans=0.125 +2024-09-18 14:03:46,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=457460.0, ans=0.125 +2024-09-18 14:03:53,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.704e+01 9.142e+01 9.758e+01 1.993e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 14:04:02,760 INFO [train.py:1198] (1/2) Epoch 26, batch 1250, loss[loss=0.2574, ctc_loss=0.1422, cr_loss=0.411, attn_decoder_loss=0.2611, over 29547.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.128, cr_loss=0.3711, attn_decoder_loss=0.2468, over 5773961.71 frames. ], batch size: 92, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:04:03,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=457500.0, ans=0.125 +2024-09-18 14:04:16,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=457540.0, ans=0.95 +2024-09-18 14:04:35,718 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-18 14:04:39,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=457580.0, ans=0.1 +2024-09-18 14:04:41,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=457580.0, ans=0.1 +2024-09-18 14:05:19,421 INFO [train.py:1198] (1/2) Epoch 26, batch 1300, loss[loss=0.2549, ctc_loss=0.144, cr_loss=0.4133, attn_decoder_loss=0.258, over 28331.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1277, cr_loss=0.371, attn_decoder_loss=0.2462, over 5777037.24 frames. ], batch size: 111, lr: 4.29e-03, grad_scale: 8.0 +2024-09-18 14:05:19,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=457700.0, ans=0.125 +2024-09-18 14:05:30,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=457700.0, ans=0.0 +2024-09-18 14:05:31,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=457700.0, ans=0.0 +2024-09-18 14:05:46,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_abs, batch_count=457740.0, ans=0.5 +2024-09-18 14:05:48,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=457780.0, ans=0.2 +2024-09-18 14:05:54,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=457780.0, ans=0.125 +2024-09-18 14:06:08,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.86 vs. limit=15.0 +2024-09-18 14:06:17,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=457820.0, ans=0.025 +2024-09-18 14:06:25,985 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.454e+01 9.061e+01 9.465e+01 1.475e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 14:06:33,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.98 vs. limit=15.0 +2024-09-18 14:06:35,249 INFO [train.py:1198] (1/2) Epoch 26, batch 1350, loss[loss=0.2351, ctc_loss=0.1184, cr_loss=0.3407, attn_decoder_loss=0.2405, over 29749.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.127, cr_loss=0.3698, attn_decoder_loss=0.2458, over 5793844.55 frames. ], batch size: 81, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:06:47,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.10 vs. limit=15.0 +2024-09-18 14:06:54,091 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:07:03,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=457940.0, ans=0.2 +2024-09-18 14:07:17,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=457980.0, ans=0.125 +2024-09-18 14:07:23,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.75 vs. limit=15.0 +2024-09-18 14:07:28,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=8.07 vs. limit=12.0 +2024-09-18 14:07:31,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.48 vs. limit=15.0 +2024-09-18 14:07:44,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=458060.0, ans=0.0 +2024-09-18 14:07:55,051 INFO [train.py:1198] (1/2) Epoch 26, batch 1400, loss[loss=0.2222, ctc_loss=0.1113, cr_loss=0.331, attn_decoder_loss=0.2271, over 29580.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1271, cr_loss=0.3703, attn_decoder_loss=0.2459, over 5805656.10 frames. ], batch size: 69, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:07:55,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=458100.0, ans=0.125 +2024-09-18 14:08:04,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=458100.0, ans=0.0 +2024-09-18 14:08:25,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.04 vs. limit=15.0 +2024-09-18 14:08:55,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=458260.0, ans=0.1 +2024-09-18 14:09:01,515 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.404e+01 8.959e+01 9.350e+01 1.926e+02, threshold=1.792e+02, percent-clipped=1.0 +2024-09-18 14:09:01,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=458260.0, ans=0.0 +2024-09-18 14:09:10,658 INFO [train.py:1198] (1/2) Epoch 26, batch 1450, loss[loss=0.2536, ctc_loss=0.1365, cr_loss=0.3882, attn_decoder_loss=0.258, over 29418.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1276, cr_loss=0.3712, attn_decoder_loss=0.2464, over 5802225.00 frames. ], batch size: 94, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:09:30,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=458340.0, ans=0.125 +2024-09-18 14:09:36,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=458340.0, ans=0.0 +2024-09-18 14:09:55,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.70 vs. limit=22.5 +2024-09-18 14:10:09,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=458420.0, ans=0.95 +2024-09-18 14:10:12,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=458460.0, ans=0.125 +2024-09-18 14:10:27,093 INFO [train.py:1198] (1/2) Epoch 26, batch 1500, loss[loss=0.2514, ctc_loss=0.1335, cr_loss=0.4025, attn_decoder_loss=0.2556, over 29612.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1275, cr_loss=0.3712, attn_decoder_loss=0.2466, over 5804009.26 frames. ], batch size: 86, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:10:37,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=458500.0, ans=0.035 +2024-09-18 14:10:46,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=458540.0, ans=0.1 +2024-09-18 14:10:54,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=458540.0, ans=0.125 +2024-09-18 14:10:55,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=458540.0, ans=0.0 +2024-09-18 14:11:01,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=458580.0, ans=0.2 +2024-09-18 14:11:05,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.41 vs. limit=22.5 +2024-09-18 14:11:38,954 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.523e+01 8.611e+01 9.025e+01 9.915e+01 2.823e+02, threshold=1.805e+02, percent-clipped=2.0 +2024-09-18 14:11:48,169 INFO [train.py:1198] (1/2) Epoch 26, batch 1550, loss[loss=0.2573, ctc_loss=0.1411, cr_loss=0.4051, attn_decoder_loss=0.2612, over 29533.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1276, cr_loss=0.371, attn_decoder_loss=0.2465, over 5780472.20 frames. ], batch size: 90, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:12:00,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=458700.0, ans=0.05 +2024-09-18 14:12:38,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=458820.0, ans=0.1 +2024-09-18 14:12:49,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.23 vs. limit=15.0 +2024-09-18 14:13:01,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.45 vs. limit=15.0 +2024-09-18 14:13:03,840 INFO [train.py:1198] (1/2) Epoch 26, batch 1600, loss[loss=0.256, ctc_loss=0.1278, cr_loss=0.3636, attn_decoder_loss=0.2622, over 29687.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1277, cr_loss=0.3709, attn_decoder_loss=0.2462, over 5762824.57 frames. ], batch size: 85, lr: 4.28e-03, grad_scale: 16.0 +2024-09-18 14:13:16,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=458900.0, ans=0.025 +2024-09-18 14:13:28,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=458940.0, ans=0.125 +2024-09-18 14:13:49,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=459020.0, ans=0.05 +2024-09-18 14:13:54,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=459020.0, ans=0.2 +2024-09-18 14:13:54,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=459020.0, ans=0.125 +2024-09-18 14:13:58,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=459020.0, ans=0.035 +2024-09-18 14:13:58,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=459020.0, ans=0.0 +2024-09-18 14:14:03,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=459060.0, ans=0.0 +2024-09-18 14:14:04,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=459060.0, ans=0.2 +2024-09-18 14:14:04,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=459060.0, ans=0.0 +2024-09-18 14:14:09,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_abs, batch_count=459060.0, ans=0.5 +2024-09-18 14:14:09,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=459060.0, ans=0.0 +2024-09-18 14:14:12,106 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.327e+01 8.927e+01 9.503e+01 2.372e+02, threshold=1.785e+02, percent-clipped=2.0 +2024-09-18 14:14:21,599 INFO [train.py:1198] (1/2) Epoch 26, batch 1650, loss[loss=0.2489, ctc_loss=0.132, cr_loss=0.3761, attn_decoder_loss=0.2535, over 29714.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1275, cr_loss=0.3705, attn_decoder_loss=0.2461, over 5756886.16 frames. ], batch size: 89, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:14:23,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=459100.0, ans=0.125 +2024-09-18 14:14:45,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.33 vs. limit=6.0 +2024-09-18 14:15:34,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=459260.0, ans=0.125 +2024-09-18 14:15:37,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=459260.0, ans=0.0 +2024-09-18 14:15:39,933 INFO [train.py:1198] (1/2) Epoch 26, batch 1700, loss[loss=0.2127, ctc_loss=0.1042, cr_loss=0.3153, attn_decoder_loss=0.2177, over 29597.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1269, cr_loss=0.3698, attn_decoder_loss=0.2458, over 5779022.16 frames. ], batch size: 69, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:15:40,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=459300.0, ans=0.125 +2024-09-18 14:15:41,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=459300.0, ans=0.1 +2024-09-18 14:15:56,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=459340.0, ans=0.07 +2024-09-18 14:16:01,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=459340.0, ans=0.04949747468305833 +2024-09-18 14:16:08,972 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:16:10,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=459380.0, ans=0.125 +2024-09-18 14:16:14,981 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:16:20,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=459380.0, ans=0.0 +2024-09-18 14:16:40,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=459460.0, ans=0.125 +2024-09-18 14:16:45,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=459460.0, ans=0.125 +2024-09-18 14:16:48,019 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.426e+01 8.833e+01 9.428e+01 1.268e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 14:16:55,730 INFO [train.py:1198] (1/2) Epoch 26, batch 1750, loss[loss=0.222, ctc_loss=0.1129, cr_loss=0.3385, attn_decoder_loss=0.2266, over 29329.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1264, cr_loss=0.3686, attn_decoder_loss=0.2453, over 5788553.66 frames. ], batch size: 67, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:17:03,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=459500.0, ans=0.0 +2024-09-18 14:17:13,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=459540.0, ans=0.125 +2024-09-18 14:17:14,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=459540.0, ans=0.025 +2024-09-18 14:17:20,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=459540.0, ans=0.125 +2024-09-18 14:17:23,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=459540.0, ans=0.125 +2024-09-18 14:17:25,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=459580.0, ans=0.125 +2024-09-18 14:18:01,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=459660.0, ans=0.125 +2024-09-18 14:18:11,489 INFO [train.py:1198] (1/2) Epoch 26, batch 1800, loss[loss=0.2443, ctc_loss=0.1288, cr_loss=0.3744, attn_decoder_loss=0.2488, over 29694.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1263, cr_loss=0.3687, attn_decoder_loss=0.2456, over 5791346.14 frames. ], batch size: 83, lr: 4.28e-03, grad_scale: 8.0 +2024-09-18 14:18:45,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=459780.0, ans=0.125 +2024-09-18 14:18:57,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=459820.0, ans=0.1 +2024-09-18 14:19:13,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=459820.0, ans=0.125 +2024-09-18 14:19:15,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=459860.0, ans=0.125 +2024-09-18 14:19:23,997 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.569e+01 8.931e+01 9.347e+01 1.247e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-18 14:19:31,609 INFO [train.py:1198] (1/2) Epoch 26, batch 1850, loss[loss=0.2517, ctc_loss=0.1355, cr_loss=0.413, attn_decoder_loss=0.2554, over 29627.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1264, cr_loss=0.369, attn_decoder_loss=0.2455, over 5798372.87 frames. ], batch size: 86, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:19:49,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=459940.0, ans=0.1 +2024-09-18 14:19:51,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=459940.0, ans=0.125 +2024-09-18 14:19:51,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=459940.0, ans=0.0 +2024-09-18 14:19:54,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=459940.0, ans=0.125 +2024-09-18 14:20:21,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=460020.0, ans=15.0 +2024-09-18 14:20:47,475 INFO [train.py:1198] (1/2) Epoch 26, batch 1900, loss[loss=0.2562, ctc_loss=0.1306, cr_loss=0.3824, attn_decoder_loss=0.2616, over 29714.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1265, cr_loss=0.3695, attn_decoder_loss=0.246, over 5805388.00 frames. ], batch size: 89, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:20:52,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=460100.0, ans=0.2 +2024-09-18 14:20:59,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=460100.0, ans=0.2 +2024-09-18 14:21:32,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=460220.0, ans=0.125 +2024-09-18 14:21:38,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=460220.0, ans=0.125 +2024-09-18 14:21:54,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.80 vs. limit=22.5 +2024-09-18 14:21:56,075 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.154e+01 8.718e+01 9.273e+01 9.664e+01 1.625e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-18 14:22:00,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=460260.0, ans=0.09899494936611666 +2024-09-18 14:22:03,709 INFO [train.py:1198] (1/2) Epoch 26, batch 1950, loss[loss=0.2502, ctc_loss=0.1364, cr_loss=0.3999, attn_decoder_loss=0.254, over 29438.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1273, cr_loss=0.3713, attn_decoder_loss=0.2471, over 5820002.42 frames. ], batch size: 78, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:22:42,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=2.92 vs. limit=15.0 +2024-09-18 14:22:49,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=460420.0, ans=0.025 +2024-09-18 14:23:03,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.71 vs. limit=15.0 +2024-09-18 14:23:05,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=460420.0, ans=0.0 +2024-09-18 14:23:23,259 INFO [train.py:1198] (1/2) Epoch 26, batch 2000, loss[loss=0.2173, ctc_loss=0.1073, cr_loss=0.3262, attn_decoder_loss=0.2223, over 29340.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1277, cr_loss=0.3718, attn_decoder_loss=0.2475, over 5796740.22 frames. ], batch size: 67, lr: 4.27e-03, grad_scale: 16.0 +2024-09-18 14:23:25,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=460500.0, ans=0.1 +2024-09-18 14:23:38,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=460540.0, ans=0.0 +2024-09-18 14:23:48,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.90 vs. limit=15.0 +2024-09-18 14:23:49,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=460540.0, ans=0.025 +2024-09-18 14:24:04,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=460580.0, ans=0.125 +2024-09-18 14:24:21,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=460620.0, ans=0.1 +2024-09-18 14:24:33,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.635e+01 8.701e+01 9.104e+01 9.478e+01 2.564e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 14:24:39,264 INFO [train.py:1198] (1/2) Epoch 26, batch 2050, loss[loss=0.2182, ctc_loss=0.1089, cr_loss=0.3312, attn_decoder_loss=0.223, over 29411.00 frames. ], tot_loss[loss=0.2421, ctc_loss=0.1274, cr_loss=0.3708, attn_decoder_loss=0.2466, over 5787789.93 frames. ], batch size: 70, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:24:44,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=460700.0, ans=0.125 +2024-09-18 14:25:06,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=460740.0, ans=0.025 +2024-09-18 14:25:37,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=460820.0, ans=0.0 +2024-09-18 14:25:55,178 INFO [train.py:1198] (1/2) Epoch 26, batch 2100, loss[loss=0.2436, ctc_loss=0.1215, cr_loss=0.3521, attn_decoder_loss=0.2494, over 29758.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1266, cr_loss=0.3694, attn_decoder_loss=0.2458, over 5800291.75 frames. ], batch size: 81, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:25:59,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.69 vs. limit=15.0 +2024-09-18 14:26:29,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=460980.0, ans=0.0 +2024-09-18 14:26:47,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=461020.0, ans=0.125 +2024-09-18 14:26:49,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=461020.0, ans=0.0 +2024-09-18 14:26:54,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=461020.0, ans=0.125 +2024-09-18 14:27:08,387 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.265e+01 8.897e+01 9.459e+01 1.093e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 14:27:08,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=461060.0, ans=0.1 +2024-09-18 14:27:14,496 INFO [train.py:1198] (1/2) Epoch 26, batch 2150, loss[loss=0.2474, ctc_loss=0.1353, cr_loss=0.3873, attn_decoder_loss=0.2512, over 29430.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1258, cr_loss=0.3682, attn_decoder_loss=0.2452, over 5815491.42 frames. ], batch size: 78, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:27:22,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=461100.0, ans=0.125 +2024-09-18 14:27:24,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=461100.0, ans=0.1 +2024-09-18 14:27:52,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=461180.0, ans=0.125 +2024-09-18 14:28:03,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=461220.0, ans=0.125 +2024-09-18 14:28:13,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=461220.0, ans=22.5 +2024-09-18 14:28:15,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=461260.0, ans=0.125 +2024-09-18 14:28:19,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.13 vs. limit=15.0 +2024-09-18 14:28:26,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=461260.0, ans=0.0 +2024-09-18 14:28:29,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=461300.0, ans=0.125 +2024-09-18 14:28:30,683 INFO [train.py:1198] (1/2) Epoch 26, batch 2200, loss[loss=0.2513, ctc_loss=0.1347, cr_loss=0.4038, attn_decoder_loss=0.2553, over 29633.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.126, cr_loss=0.3682, attn_decoder_loss=0.2452, over 5812762.09 frames. ], batch size: 86, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:28:48,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.74 vs. limit=15.0 +2024-09-18 14:28:50,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=461340.0, ans=0.0 +2024-09-18 14:28:55,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=461340.0, ans=0.2 +2024-09-18 14:29:32,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=10.19 vs. limit=12.0 +2024-09-18 14:29:40,320 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.263e+01 8.743e+01 9.086e+01 9.862e+01 3.457e+02, threshold=1.817e+02, percent-clipped=3.0 +2024-09-18 14:29:43,847 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:29:45,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=461500.0, ans=0.0 +2024-09-18 14:29:46,510 INFO [train.py:1198] (1/2) Epoch 26, batch 2250, loss[loss=0.2402, ctc_loss=0.1229, cr_loss=0.3584, attn_decoder_loss=0.2453, over 29686.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1257, cr_loss=0.3674, attn_decoder_loss=0.2451, over 5811632.87 frames. ], batch size: 82, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:29:48,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=461500.0, ans=0.1 +2024-09-18 14:29:50,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys.whitening_limit, batch_count=461500.0, ans=6.0 +2024-09-18 14:30:11,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=461540.0, ans=0.0 +2024-09-18 14:30:11,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=461540.0, ans=0.125 +2024-09-18 14:30:19,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=461580.0, ans=0.125 +2024-09-18 14:30:37,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=461620.0, ans=0.2 +2024-09-18 14:30:42,962 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.08 vs. limit=12.0 +2024-09-18 14:30:54,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.15 vs. limit=22.5 +2024-09-18 14:31:06,202 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.19 vs. limit=22.5 +2024-09-18 14:31:07,027 INFO [train.py:1198] (1/2) Epoch 26, batch 2300, loss[loss=0.214, ctc_loss=0.1036, cr_loss=0.3153, attn_decoder_loss=0.2192, over 29334.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1251, cr_loss=0.3658, attn_decoder_loss=0.2442, over 5800864.24 frames. ], batch size: 71, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:31:08,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=461700.0, ans=0.125 +2024-09-18 14:31:11,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=461700.0, ans=0.0 +2024-09-18 14:31:12,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-18 14:31:13,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.79 vs. limit=15.0 +2024-09-18 14:31:17,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=461700.0, ans=0.1 +2024-09-18 14:31:22,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=461740.0, ans=0.125 +2024-09-18 14:31:50,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.09 vs. limit=10.0 +2024-09-18 14:31:51,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=461820.0, ans=0.0 +2024-09-18 14:32:14,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=461860.0, ans=0.1 +2024-09-18 14:32:16,744 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.397e+01 8.981e+01 9.856e+01 3.624e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-18 14:32:20,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.17 vs. limit=15.0 +2024-09-18 14:32:22,730 INFO [train.py:1198] (1/2) Epoch 26, batch 2350, loss[loss=0.255, ctc_loss=0.1327, cr_loss=0.3833, attn_decoder_loss=0.2601, over 29702.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1253, cr_loss=0.3662, attn_decoder_loss=0.2447, over 5805401.43 frames. ], batch size: 83, lr: 4.27e-03, grad_scale: 8.0 +2024-09-18 14:32:22,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=461900.0, ans=0.125 +2024-09-18 14:32:28,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=461900.0, ans=0.125 +2024-09-18 14:32:36,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=461940.0, ans=0.125 +2024-09-18 14:32:58,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.80 vs. limit=6.0 +2024-09-18 14:33:00,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=461980.0, ans=0.125 +2024-09-18 14:33:14,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.62 vs. limit=15.0 +2024-09-18 14:33:27,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-18 14:33:28,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=462060.0, ans=0.04949747468305833 +2024-09-18 14:33:29,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=462060.0, ans=0.0 +2024-09-18 14:33:38,605 INFO [train.py:1198] (1/2) Epoch 26, batch 2400, loss[loss=0.2357, ctc_loss=0.1159, cr_loss=0.3401, attn_decoder_loss=0.2414, over 29539.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1262, cr_loss=0.368, attn_decoder_loss=0.2452, over 5808333.32 frames. ], batch size: 76, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:34:07,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.86 vs. limit=15.0 +2024-09-18 14:34:28,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.66 vs. limit=15.0 +2024-09-18 14:34:29,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=462220.0, ans=0.125 +2024-09-18 14:34:36,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=462220.0, ans=0.1 +2024-09-18 14:34:36,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=462220.0, ans=0.0 +2024-09-18 14:34:51,672 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.393e+01 8.579e+01 9.212e+01 9.914e+01 2.760e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-18 14:34:58,422 INFO [train.py:1198] (1/2) Epoch 26, batch 2450, loss[loss=0.2438, ctc_loss=0.1274, cr_loss=0.3741, attn_decoder_loss=0.2484, over 29730.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1268, cr_loss=0.3693, attn_decoder_loss=0.2461, over 5784553.62 frames. ], batch size: 82, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:35:10,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=462300.0, ans=0.0 +2024-09-18 14:35:10,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=462300.0, ans=0.125 +2024-09-18 14:35:13,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=462340.0, ans=0.125 +2024-09-18 14:36:10,772 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.54 vs. limit=6.0 +2024-09-18 14:36:13,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=462500.0, ans=0.0 +2024-09-18 14:36:14,449 INFO [train.py:1198] (1/2) Epoch 26, batch 2500, loss[loss=0.243, ctc_loss=0.1254, cr_loss=0.3712, attn_decoder_loss=0.2478, over 29629.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1268, cr_loss=0.3693, attn_decoder_loss=0.2461, over 5794491.21 frames. ], batch size: 86, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:36:20,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=462500.0, ans=0.125 +2024-09-18 14:36:25,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=462500.0, ans=0.125 +2024-09-18 14:36:25,994 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.18 vs. limit=22.5 +2024-09-18 14:36:31,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=462540.0, ans=0.125 +2024-09-18 14:37:06,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=462620.0, ans=0.125 +2024-09-18 14:37:20,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=462660.0, ans=0.125 +2024-09-18 14:37:25,679 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.473e+01 8.987e+01 9.500e+01 1.769e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 14:37:29,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.47 vs. limit=22.5 +2024-09-18 14:37:30,373 INFO [train.py:1198] (1/2) Epoch 26, batch 2550, loss[loss=0.2168, ctc_loss=0.114, cr_loss=0.3536, attn_decoder_loss=0.2204, over 29364.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1265, cr_loss=0.369, attn_decoder_loss=0.2458, over 5797716.86 frames. ], batch size: 67, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:38:08,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=462780.0, ans=0.125 +2024-09-18 14:38:27,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=462820.0, ans=0.125 +2024-09-18 14:38:45,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=462860.0, ans=0.125 +2024-09-18 14:38:48,007 INFO [train.py:1198] (1/2) Epoch 26, batch 2600, loss[loss=0.2415, ctc_loss=0.1231, cr_loss=0.3625, attn_decoder_loss=0.2466, over 29425.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1267, cr_loss=0.3694, attn_decoder_loss=0.2462, over 5795234.69 frames. ], batch size: 78, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:38:55,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=462900.0, ans=0.125 +2024-09-18 14:39:01,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.68 vs. limit=22.5 +2024-09-18 14:39:13,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=462940.0, ans=0.09899494936611666 +2024-09-18 14:39:21,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=462980.0, ans=0.125 +2024-09-18 14:39:21,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=462980.0, ans=0.125 +2024-09-18 14:39:32,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=462980.0, ans=0.2 +2024-09-18 14:39:40,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=463020.0, ans=0.125 +2024-09-18 14:39:52,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=463060.0, ans=0.0 +2024-09-18 14:39:59,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=463060.0, ans=0.2 +2024-09-18 14:40:01,220 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.375e+01 8.942e+01 9.564e+01 2.475e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 14:40:05,704 INFO [train.py:1198] (1/2) Epoch 26, batch 2650, loss[loss=0.2529, ctc_loss=0.1425, cr_loss=0.4007, attn_decoder_loss=0.2563, over 29199.00 frames. ], tot_loss[loss=0.2418, ctc_loss=0.1267, cr_loss=0.3695, attn_decoder_loss=0.2464, over 5801936.54 frames. ], batch size: 100, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:40:34,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=463180.0, ans=0.125 +2024-09-18 14:40:44,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=463180.0, ans=0.1 +2024-09-18 14:41:00,891 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:41:15,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.47 vs. limit=6.0 +2024-09-18 14:41:19,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=463260.0, ans=0.2 +2024-09-18 14:41:23,922 INFO [train.py:1198] (1/2) Epoch 26, batch 2700, loss[loss=0.2458, ctc_loss=0.1243, cr_loss=0.3777, attn_decoder_loss=0.2509, over 29525.00 frames. ], tot_loss[loss=0.242, ctc_loss=0.1269, cr_loss=0.3703, attn_decoder_loss=0.2466, over 5795864.43 frames. ], batch size: 87, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:41:27,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=463300.0, ans=0.0 +2024-09-18 14:41:49,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=463340.0, ans=0.125 +2024-09-18 14:42:08,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=463420.0, ans=0.0 +2024-09-18 14:42:26,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=463460.0, ans=0.09899494936611666 +2024-09-18 14:42:35,432 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.497e+01 8.933e+01 9.409e+01 1.999e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 14:42:40,227 INFO [train.py:1198] (1/2) Epoch 26, batch 2750, loss[loss=0.2447, ctc_loss=0.1305, cr_loss=0.3787, attn_decoder_loss=0.2489, over 29514.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1262, cr_loss=0.3688, attn_decoder_loss=0.2454, over 5795203.18 frames. ], batch size: 75, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:43:32,714 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 14:43:40,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=463620.0, ans=0.0 +2024-09-18 14:43:42,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=463660.0, ans=0.125 +2024-09-18 14:43:43,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=463660.0, ans=15.0 +2024-09-18 14:43:57,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=463700.0, ans=0.1 +2024-09-18 14:43:58,322 INFO [train.py:1198] (1/2) Epoch 26, batch 2800, loss[loss=0.2731, ctc_loss=0.1689, cr_loss=0.4204, attn_decoder_loss=0.2754, over 19580.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1266, cr_loss=0.369, attn_decoder_loss=0.2456, over 5776537.86 frames. ], batch size: 210, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:44:16,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=463740.0, ans=0.2 +2024-09-18 14:44:40,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=463780.0, ans=0.125 +2024-09-18 14:44:48,570 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.04 vs. limit=6.0 +2024-09-18 14:44:52,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=463820.0, ans=0.125 +2024-09-18 14:45:03,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=463860.0, ans=0.2 +2024-09-18 14:45:10,995 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.701e+01 9.139e+01 9.864e+01 2.017e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-18 14:45:12,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=463860.0, ans=0.0 +2024-09-18 14:45:15,526 INFO [train.py:1198] (1/2) Epoch 26, batch 2850, loss[loss=0.2405, ctc_loss=0.1235, cr_loss=0.3734, attn_decoder_loss=0.2452, over 29510.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1271, cr_loss=0.37, attn_decoder_loss=0.2462, over 5761716.33 frames. ], batch size: 77, lr: 4.26e-03, grad_scale: 16.0 +2024-09-18 14:45:40,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=463940.0, ans=0.125 +2024-09-18 14:45:40,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=463940.0, ans=0.125 +2024-09-18 14:45:49,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=463980.0, ans=0.125 +2024-09-18 14:45:50,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=463980.0, ans=0.125 +2024-09-18 14:46:11,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=464020.0, ans=0.125 +2024-09-18 14:46:11,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=464020.0, ans=0.125 +2024-09-18 14:46:16,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-18 14:46:40,975 INFO [train.py:1198] (1/2) Epoch 26, batch 2900, loss[loss=0.2466, ctc_loss=0.1368, cr_loss=0.4046, attn_decoder_loss=0.2498, over 29428.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1279, cr_loss=0.3721, attn_decoder_loss=0.2473, over 5787684.10 frames. ], batch size: 79, lr: 4.26e-03, grad_scale: 8.0 +2024-09-18 14:46:42,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=464100.0, ans=0.125 +2024-09-18 14:47:05,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=464140.0, ans=0.05 +2024-09-18 14:47:11,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=464180.0, ans=0.0 +2024-09-18 14:47:40,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=464260.0, ans=0.0 +2024-09-18 14:47:53,451 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.170e+01 8.512e+01 9.090e+01 9.867e+01 2.207e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 14:47:56,513 INFO [train.py:1198] (1/2) Epoch 26, batch 2950, loss[loss=0.2421, ctc_loss=0.1291, cr_loss=0.3936, attn_decoder_loss=0.2459, over 29502.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1267, cr_loss=0.3693, attn_decoder_loss=0.2459, over 5782938.68 frames. ], batch size: 75, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:47:58,893 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.53 vs. limit=15.0 +2024-09-18 14:48:22,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=464340.0, ans=0.0 +2024-09-18 14:48:58,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.36 vs. limit=15.0 +2024-09-18 14:49:15,212 INFO [train.py:1198] (1/2) Epoch 26, batch 3000, loss[loss=0.233, ctc_loss=0.1145, cr_loss=0.3339, attn_decoder_loss=0.2387, over 29740.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1264, cr_loss=0.3691, attn_decoder_loss=0.2454, over 5782548.70 frames. ], batch size: 81, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:49:15,213 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 14:49:33,742 INFO [train.py:1230] (1/2) Epoch 26, validation: loss=0.2113, ctc_loss=0.03775, cr_loss=5.571e-15, attn_decoder_loss=0.2305, over 944034.00 frames. +2024-09-18 14:49:33,743 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 14:50:04,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=464580.0, ans=0.125 +2024-09-18 14:50:21,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=464620.0, ans=0.2 +2024-09-18 14:50:24,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=464620.0, ans=0.035 +2024-09-18 14:50:35,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=464660.0, ans=0.0 +2024-09-18 14:50:40,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.16 vs. limit=15.0 +2024-09-18 14:50:45,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.30 vs. limit=15.0 +2024-09-18 14:50:49,037 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.963e+01 8.504e+01 9.014e+01 9.631e+01 1.549e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 14:50:52,202 INFO [train.py:1198] (1/2) Epoch 26, batch 3050, loss[loss=0.2249, ctc_loss=0.1125, cr_loss=0.3476, attn_decoder_loss=0.2297, over 29526.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1266, cr_loss=0.3701, attn_decoder_loss=0.246, over 5776357.31 frames. ], batch size: 76, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:51:03,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=464700.0, ans=0.125 +2024-09-18 14:51:35,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=464780.0, ans=0.0 +2024-09-18 14:51:37,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.41 vs. limit=15.0 +2024-09-18 14:51:41,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=464820.0, ans=0.125 +2024-09-18 14:52:08,318 INFO [train.py:1198] (1/2) Epoch 26, batch 3100, loss[loss=0.2667, ctc_loss=0.1513, cr_loss=0.4183, attn_decoder_loss=0.2702, over 29281.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1265, cr_loss=0.3693, attn_decoder_loss=0.2458, over 5776528.83 frames. ], batch size: 100, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:52:22,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.58 vs. limit=22.5 +2024-09-18 14:52:45,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=464980.0, ans=0.125 +2024-09-18 14:52:47,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.70 vs. limit=22.5 +2024-09-18 14:52:49,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=464980.0, ans=0.125 +2024-09-18 14:53:10,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=465060.0, ans=0.125 +2024-09-18 14:53:17,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=465060.0, ans=0.2 +2024-09-18 14:53:23,452 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.570e+01 9.069e+01 9.533e+01 2.948e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-18 14:53:26,521 INFO [train.py:1198] (1/2) Epoch 26, batch 3150, loss[loss=0.2645, ctc_loss=0.1485, cr_loss=0.4046, attn_decoder_loss=0.2684, over 28875.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1265, cr_loss=0.3687, attn_decoder_loss=0.2459, over 5782187.12 frames. ], batch size: 104, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:54:38,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=465260.0, ans=0.125 +2024-09-18 14:54:44,437 INFO [train.py:1198] (1/2) Epoch 26, batch 3200, loss[loss=0.2405, ctc_loss=0.1249, cr_loss=0.3684, attn_decoder_loss=0.2451, over 29420.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1263, cr_loss=0.3681, attn_decoder_loss=0.2454, over 5791930.80 frames. ], batch size: 79, lr: 4.25e-03, grad_scale: 16.0 +2024-09-18 14:55:06,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=465340.0, ans=0.125 +2024-09-18 14:55:10,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.45 vs. limit=22.5 +2024-09-18 14:55:26,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.35 vs. limit=15.0 +2024-09-18 14:55:38,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=465420.0, ans=0.05 +2024-09-18 14:55:41,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=465420.0, ans=0.125 +2024-09-18 14:55:41,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=465420.0, ans=0.0 +2024-09-18 14:55:41,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=465420.0, ans=0.1 +2024-09-18 14:55:51,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=465460.0, ans=0.1 +2024-09-18 14:55:59,029 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.879e+01 8.413e+01 8.869e+01 9.551e+01 1.271e+02, threshold=1.774e+02, percent-clipped=0.0 +2024-09-18 14:56:00,550 INFO [train.py:1198] (1/2) Epoch 26, batch 3250, loss[loss=0.2562, ctc_loss=0.1412, cr_loss=0.4019, attn_decoder_loss=0.2601, over 29696.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1267, cr_loss=0.3692, attn_decoder_loss=0.2459, over 5798570.07 frames. ], batch size: 84, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:56:04,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.44 vs. limit=12.0 +2024-09-18 14:56:11,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=465500.0, ans=0.025 +2024-09-18 14:56:13,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=465500.0, ans=0.1 +2024-09-18 14:56:14,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=465540.0, ans=0.125 +2024-09-18 14:56:20,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=465540.0, ans=0.125 +2024-09-18 14:56:50,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=465620.0, ans=0.5 +2024-09-18 14:56:53,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=465620.0, ans=0.125 +2024-09-18 14:57:18,688 INFO [train.py:1198] (1/2) Epoch 26, batch 3300, loss[loss=0.2509, ctc_loss=0.1258, cr_loss=0.3711, attn_decoder_loss=0.2565, over 28308.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1262, cr_loss=0.3679, attn_decoder_loss=0.2449, over 5796095.28 frames. ], batch size: 111, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:57:25,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=465700.0, ans=0.0 +2024-09-18 14:57:28,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=465700.0, ans=0.2 +2024-09-18 14:57:46,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=465740.0, ans=0.0 +2024-09-18 14:57:46,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.10 vs. limit=10.0 +2024-09-18 14:57:49,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=465780.0, ans=0.07 +2024-09-18 14:57:56,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=465780.0, ans=0.125 +2024-09-18 14:58:08,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=465820.0, ans=0.2 +2024-09-18 14:58:13,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=465820.0, ans=0.125 +2024-09-18 14:58:13,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=465820.0, ans=0.5 +2024-09-18 14:58:14,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=465820.0, ans=0.0 +2024-09-18 14:58:15,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-18 14:58:34,537 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.655e+01 9.163e+01 9.654e+01 2.275e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 14:58:36,149 INFO [train.py:1198] (1/2) Epoch 26, batch 3350, loss[loss=0.2546, ctc_loss=0.1353, cr_loss=0.3886, attn_decoder_loss=0.2592, over 28830.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1268, cr_loss=0.369, attn_decoder_loss=0.2457, over 5772760.35 frames. ], batch size: 104, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:58:58,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.81 vs. limit=22.5 +2024-09-18 14:59:23,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=466020.0, ans=0.125 +2024-09-18 14:59:33,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=466020.0, ans=0.2 +2024-09-18 14:59:51,875 INFO [train.py:1198] (1/2) Epoch 26, batch 3400, loss[loss=0.2212, ctc_loss=0.1112, cr_loss=0.349, attn_decoder_loss=0.2257, over 29373.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1269, cr_loss=0.3697, attn_decoder_loss=0.2457, over 5765872.94 frames. ], batch size: 67, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 14:59:52,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=466100.0, ans=0.0 +2024-09-18 15:00:08,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=466140.0, ans=0.125 +2024-09-18 15:00:20,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=466180.0, ans=0.05 +2024-09-18 15:00:28,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=466180.0, ans=0.0 +2024-09-18 15:00:47,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.25 vs. limit=15.0 +2024-09-18 15:01:08,279 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.379e+01 8.854e+01 9.422e+01 2.123e+02, threshold=1.771e+02, percent-clipped=1.0 +2024-09-18 15:01:08,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=466300.0, ans=0.125 +2024-09-18 15:01:08,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=466300.0, ans=0.2 +2024-09-18 15:01:09,878 INFO [train.py:1198] (1/2) Epoch 26, batch 3450, loss[loss=0.2485, ctc_loss=0.1282, cr_loss=0.3732, attn_decoder_loss=0.2535, over 28194.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1272, cr_loss=0.3708, attn_decoder_loss=0.2461, over 5773841.97 frames. ], batch size: 111, lr: 4.25e-03, grad_scale: 8.0 +2024-09-18 15:01:37,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=466340.0, ans=0.125 +2024-09-18 15:02:24,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.36 vs. limit=15.0 +2024-09-18 15:02:28,038 INFO [train.py:1198] (1/2) Epoch 26, batch 3500, loss[loss=0.2146, ctc_loss=0.102, cr_loss=0.3253, attn_decoder_loss=0.2199, over 29352.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1268, cr_loss=0.37, attn_decoder_loss=0.2455, over 5775682.97 frames. ], batch size: 71, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:02:32,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=466500.0, ans=0.1 +2024-09-18 15:03:35,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.80 vs. limit=15.0 +2024-09-18 15:03:36,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=466660.0, ans=0.0 +2024-09-18 15:03:36,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=466660.0, ans=0.125 +2024-09-18 15:03:40,927 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.124e+01 8.579e+01 9.256e+01 9.884e+01 2.781e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-18 15:03:42,426 INFO [train.py:1198] (1/2) Epoch 26, batch 3550, loss[loss=0.2423, ctc_loss=0.1206, cr_loss=0.3583, attn_decoder_loss=0.2479, over 29714.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1262, cr_loss=0.3689, attn_decoder_loss=0.2454, over 5782854.34 frames. ], batch size: 89, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:03:58,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=466740.0, ans=0.125 +2024-09-18 15:04:10,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=466780.0, ans=0.0 +2024-09-18 15:04:19,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=466780.0, ans=0.1 +2024-09-18 15:04:27,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=466820.0, ans=0.125 +2024-09-18 15:04:29,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=466820.0, ans=0.0 +2024-09-18 15:04:36,751 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.98 vs. limit=22.5 +2024-09-18 15:04:37,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=466820.0, ans=0.0 +2024-09-18 15:04:49,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=466860.0, ans=0.125 +2024-09-18 15:04:52,456 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:04:55,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=466900.0, ans=0.125 +2024-09-18 15:04:56,570 INFO [train.py:1198] (1/2) Epoch 26, batch 3600, loss[loss=0.23, ctc_loss=0.1167, cr_loss=0.3606, attn_decoder_loss=0.2346, over 29460.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.126, cr_loss=0.3687, attn_decoder_loss=0.2455, over 5791798.57 frames. ], batch size: 77, lr: 4.24e-03, grad_scale: 16.0 +2024-09-18 15:04:58,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=466900.0, ans=0.1 +2024-09-18 15:05:11,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.63 vs. limit=12.0 +2024-09-18 15:05:19,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=466940.0, ans=0.125 +2024-09-18 15:05:29,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=466980.0, ans=0.125 +2024-09-18 15:05:51,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=467020.0, ans=0.125 +2024-09-18 15:05:58,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=467060.0, ans=0.125 +2024-09-18 15:06:07,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.81 vs. limit=15.0 +2024-09-18 15:06:12,879 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.659e+01 8.525e+01 9.113e+01 9.643e+01 7.477e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-18 15:06:12,900 INFO [train.py:1198] (1/2) Epoch 26, batch 3650, loss[loss=0.2566, ctc_loss=0.1414, cr_loss=0.3943, attn_decoder_loss=0.2607, over 29510.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.126, cr_loss=0.3685, attn_decoder_loss=0.2451, over 5794734.22 frames. ], batch size: 90, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:06:21,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.86 vs. limit=22.5 +2024-09-18 15:06:35,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=467140.0, ans=0.0 +2024-09-18 15:06:45,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=467180.0, ans=0.0 +2024-09-18 15:06:58,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=467220.0, ans=0.125 +2024-09-18 15:06:59,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=467220.0, ans=0.0 +2024-09-18 15:07:05,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=467220.0, ans=0.5 +2024-09-18 15:07:07,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.45 vs. limit=15.0 +2024-09-18 15:07:27,485 INFO [train.py:1198] (1/2) Epoch 26, batch 3700, loss[loss=0.2447, ctc_loss=0.1299, cr_loss=0.3591, attn_decoder_loss=0.2495, over 29712.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1259, cr_loss=0.3683, attn_decoder_loss=0.2452, over 5805408.99 frames. ], batch size: 84, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:07:30,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=467300.0, ans=0.025 +2024-09-18 15:08:02,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=467380.0, ans=0.125 +2024-09-18 15:08:03,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=467380.0, ans=0.0 +2024-09-18 15:08:30,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.56 vs. limit=22.5 +2024-09-18 15:08:38,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.24 vs. limit=15.0 +2024-09-18 15:08:43,721 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.301e+01 8.766e+01 9.365e+01 1.565e+02, threshold=1.753e+02, percent-clipped=0.0 +2024-09-18 15:08:43,743 INFO [train.py:1198] (1/2) Epoch 26, batch 3750, loss[loss=0.2182, ctc_loss=0.116, cr_loss=0.3582, attn_decoder_loss=0.2216, over 29364.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1259, cr_loss=0.3686, attn_decoder_loss=0.2448, over 5808658.04 frames. ], batch size: 67, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:08:52,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=467500.0, ans=0.07 +2024-09-18 15:08:54,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.17 vs. limit=15.0 +2024-09-18 15:08:58,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=467540.0, ans=0.125 +2024-09-18 15:09:00,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=467540.0, ans=0.2 +2024-09-18 15:09:00,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=467540.0, ans=0.1 +2024-09-18 15:09:03,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=467540.0, ans=0.5 +2024-09-18 15:09:10,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=467540.0, ans=0.125 +2024-09-18 15:09:29,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.61 vs. limit=15.0 +2024-09-18 15:09:58,345 INFO [train.py:1198] (1/2) Epoch 26, batch 3800, loss[loss=0.2562, ctc_loss=0.1351, cr_loss=0.3971, attn_decoder_loss=0.2609, over 29650.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1259, cr_loss=0.3683, attn_decoder_loss=0.2447, over 5799422.46 frames. ], batch size: 86, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:10:22,471 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:10:26,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=467780.0, ans=0.125 +2024-09-18 15:10:37,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=467780.0, ans=0.125 +2024-09-18 15:10:47,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=467820.0, ans=0.0 +2024-09-18 15:10:56,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=467860.0, ans=0.0 +2024-09-18 15:11:01,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=467860.0, ans=0.0 +2024-09-18 15:11:10,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.whiten.whitening_limit, batch_count=467860.0, ans=12.0 +2024-09-18 15:11:12,681 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.723e+01 9.262e+01 9.820e+01 3.411e+02, threshold=1.852e+02, percent-clipped=3.0 +2024-09-18 15:11:12,704 INFO [train.py:1198] (1/2) Epoch 26, batch 3850, loss[loss=0.26, ctc_loss=0.1372, cr_loss=0.3884, attn_decoder_loss=0.265, over 29237.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1256, cr_loss=0.3676, attn_decoder_loss=0.2446, over 5813243.99 frames. ], batch size: 100, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:11:25,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.63 vs. limit=15.0 +2024-09-18 15:11:44,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.31 vs. limit=12.0 +2024-09-18 15:11:53,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=467980.0, ans=0.0 +2024-09-18 15:11:58,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=468020.0, ans=0.125 +2024-09-18 15:12:29,120 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.53 vs. limit=22.5 +2024-09-18 15:12:29,506 INFO [train.py:1198] (1/2) Epoch 26, batch 3900, loss[loss=0.2536, ctc_loss=0.1317, cr_loss=0.385, attn_decoder_loss=0.2585, over 29611.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1265, cr_loss=0.3697, attn_decoder_loss=0.2455, over 5818334.34 frames. ], batch size: 86, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:12:37,779 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.44 vs. limit=15.0 +2024-09-18 15:12:40,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=468100.0, ans=0.125 +2024-09-18 15:12:46,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=468140.0, ans=0.0 +2024-09-18 15:13:06,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=468180.0, ans=0.09899494936611666 +2024-09-18 15:13:08,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=468180.0, ans=0.0 +2024-09-18 15:13:36,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=468260.0, ans=0.1 +2024-09-18 15:13:37,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=468260.0, ans=0.2 +2024-09-18 15:13:43,697 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.582e+01 9.076e+01 9.520e+01 1.404e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 15:13:43,724 INFO [train.py:1198] (1/2) Epoch 26, batch 3950, loss[loss=0.2691, ctc_loss=0.1544, cr_loss=0.4464, attn_decoder_loss=0.2719, over 29478.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1263, cr_loss=0.3691, attn_decoder_loss=0.2455, over 5837710.77 frames. ], batch size: 97, lr: 4.24e-03, grad_scale: 8.0 +2024-09-18 15:13:52,533 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.28 vs. limit=6.0 +2024-09-18 15:13:53,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.39 vs. limit=22.5 +2024-09-18 15:14:10,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=468340.0, ans=0.5 +2024-09-18 15:14:29,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=468420.0, ans=0.125 +2024-09-18 15:14:47,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=468460.0, ans=0.0 +2024-09-18 15:14:57,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=468500.0, ans=0.125 +2024-09-18 15:14:58,756 INFO [train.py:1198] (1/2) Epoch 26, batch 4000, loss[loss=0.2248, ctc_loss=0.1082, cr_loss=0.3386, attn_decoder_loss=0.2302, over 29505.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1268, cr_loss=0.3693, attn_decoder_loss=0.2457, over 5813419.49 frames. ], batch size: 74, lr: 4.24e-03, grad_scale: 16.0 +2024-09-18 15:15:38,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=468580.0, ans=0.07 +2024-09-18 15:15:40,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=468580.0, ans=0.125 +2024-09-18 15:15:44,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.49 vs. limit=15.0 +2024-09-18 15:15:58,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=468660.0, ans=0.1 +2024-09-18 15:15:59,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=468660.0, ans=10.0 +2024-09-18 15:16:07,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=468660.0, ans=0.1 +2024-09-18 15:16:10,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=468660.0, ans=0.0 +2024-09-18 15:16:14,196 INFO [train.py:1198] (1/2) Epoch 26, batch 4050, loss[loss=0.2716, ctc_loss=0.1593, cr_loss=0.397, attn_decoder_loss=0.2752, over 20247.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.127, cr_loss=0.3698, attn_decoder_loss=0.2457, over 5796588.85 frames. ], batch size: 209, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:16:15,586 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.012e+01 8.606e+01 9.122e+01 9.849e+01 6.037e+02, threshold=1.824e+02, percent-clipped=3.0 +2024-09-18 15:16:23,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=468700.0, ans=0.125 +2024-09-18 15:16:29,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=468740.0, ans=0.1 +2024-09-18 15:16:46,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=468780.0, ans=0.2 +2024-09-18 15:16:53,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=468780.0, ans=0.125 +2024-09-18 15:17:19,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.82 vs. limit=5.0 +2024-09-18 15:17:24,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=468860.0, ans=0.125 +2024-09-18 15:17:28,157 INFO [train.py:1198] (1/2) Epoch 26, batch 4100, loss[loss=0.2517, ctc_loss=0.1401, cr_loss=0.3925, attn_decoder_loss=0.2554, over 29491.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1271, cr_loss=0.3699, attn_decoder_loss=0.2458, over 5791149.29 frames. ], batch size: 90, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:17:51,998 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:17:59,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.28 vs. limit=15.0 +2024-09-18 15:18:18,311 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:18:41,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=469100.0, ans=0.0 +2024-09-18 15:18:42,763 INFO [train.py:1198] (1/2) Epoch 26, batch 4150, loss[loss=0.2444, ctc_loss=0.1329, cr_loss=0.399, attn_decoder_loss=0.2479, over 29478.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1267, cr_loss=0.3698, attn_decoder_loss=0.2455, over 5797504.78 frames. ], batch size: 77, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:18:44,181 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.459e+01 8.973e+01 9.469e+01 6.878e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 15:18:47,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=469100.0, ans=0.125 +2024-09-18 15:19:10,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=469180.0, ans=0.0 +2024-09-18 15:19:13,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=469180.0, ans=0.125 +2024-09-18 15:19:21,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=469180.0, ans=0.035 +2024-09-18 15:19:21,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=469180.0, ans=10.0 +2024-09-18 15:19:30,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=469220.0, ans=0.125 +2024-09-18 15:19:44,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.24 vs. limit=22.5 +2024-09-18 15:19:44,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=469260.0, ans=0.125 +2024-09-18 15:19:52,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=469260.0, ans=0.5 +2024-09-18 15:19:56,319 INFO [train.py:1198] (1/2) Epoch 26, batch 4200, loss[loss=0.2527, ctc_loss=0.1395, cr_loss=0.3832, attn_decoder_loss=0.2568, over 29529.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1267, cr_loss=0.3695, attn_decoder_loss=0.2455, over 5800276.75 frames. ], batch size: 90, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:20:02,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=469300.0, ans=0.0 +2024-09-18 15:20:07,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=469300.0, ans=0.125 +2024-09-18 15:20:31,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=469380.0, ans=0.0 +2024-09-18 15:21:08,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=469460.0, ans=0.2 +2024-09-18 15:21:10,804 INFO [train.py:1198] (1/2) Epoch 26, batch 4250, loss[loss=0.2264, ctc_loss=0.102, cr_loss=0.3162, attn_decoder_loss=0.2332, over 29531.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.126, cr_loss=0.3682, attn_decoder_loss=0.2454, over 5806122.32 frames. ], batch size: 74, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:21:12,219 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.377e+01 8.717e+01 9.053e+01 9.730e+01 2.394e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 15:21:13,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=469500.0, ans=0.125 +2024-09-18 15:21:16,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=469500.0, ans=0.0 +2024-09-18 15:21:32,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=469540.0, ans=0.125 +2024-09-18 15:21:33,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.77 vs. limit=22.5 +2024-09-18 15:21:43,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=469580.0, ans=0.0 +2024-09-18 15:21:55,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=469620.0, ans=0.0 +2024-09-18 15:22:11,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.61 vs. limit=10.0 +2024-09-18 15:22:20,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=469660.0, ans=0.1 +2024-09-18 15:22:22,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=469660.0, ans=0.0 +2024-09-18 15:22:25,794 INFO [train.py:1198] (1/2) Epoch 26, batch 4300, loss[loss=0.266, ctc_loss=0.1431, cr_loss=0.3987, attn_decoder_loss=0.2708, over 29515.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1257, cr_loss=0.3677, attn_decoder_loss=0.2454, over 5796184.25 frames. ], batch size: 87, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:22:52,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=469740.0, ans=0.0 +2024-09-18 15:23:07,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=469780.0, ans=0.2 +2024-09-18 15:23:16,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=469820.0, ans=0.2 +2024-09-18 15:23:19,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=469820.0, ans=0.125 +2024-09-18 15:23:20,885 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:23:27,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=469860.0, ans=0.1 +2024-09-18 15:23:39,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=469900.0, ans=0.1 +2024-09-18 15:23:40,784 INFO [train.py:1198] (1/2) Epoch 26, batch 4350, loss[loss=0.252, ctc_loss=0.1309, cr_loss=0.3882, attn_decoder_loss=0.2568, over 29499.00 frames. ], tot_loss[loss=0.2442, ctc_loss=0.1285, cr_loss=0.373, attn_decoder_loss=0.2487, over 5798187.90 frames. ], batch size: 97, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:23:42,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.612e+01 9.127e+01 9.671e+01 1.308e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-18 15:23:42,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=469900.0, ans=0.125 +2024-09-18 15:24:25,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=470020.0, ans=0.0 +2024-09-18 15:24:26,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=470020.0, ans=0.2 +2024-09-18 15:24:26,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=470020.0, ans=0.0 +2024-09-18 15:24:38,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.62 vs. limit=15.0 +2024-09-18 15:24:42,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=470060.0, ans=0.125 +2024-09-18 15:24:47,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=470060.0, ans=0.125 +2024-09-18 15:24:54,002 INFO [train.py:1198] (1/2) Epoch 26, batch 4400, loss[loss=0.2556, ctc_loss=0.1429, cr_loss=0.3992, attn_decoder_loss=0.2593, over 27257.00 frames. ], tot_loss[loss=0.2465, ctc_loss=0.1302, cr_loss=0.3762, attn_decoder_loss=0.251, over 5765612.81 frames. ], batch size: 124, lr: 4.23e-03, grad_scale: 16.0 +2024-09-18 15:24:59,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.72 vs. limit=15.0 +2024-09-18 15:25:17,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=470140.0, ans=0.0 +2024-09-18 15:25:43,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=470220.0, ans=0.125 +2024-09-18 15:26:09,414 INFO [train.py:1198] (1/2) Epoch 26, batch 4450, loss[loss=0.2687, ctc_loss=0.1717, cr_loss=0.4097, attn_decoder_loss=0.2704, over 20329.00 frames. ], tot_loss[loss=0.2489, ctc_loss=0.1344, cr_loss=0.3817, attn_decoder_loss=0.2532, over 5572524.93 frames. ], batch size: 209, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:26:12,368 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.301e+01 9.167e+01 9.608e+01 1.048e+02 2.652e+02, threshold=1.922e+02, percent-clipped=1.0 +2024-09-18 15:26:23,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=470340.0, ans=0.125 +2024-09-18 15:26:30,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=470340.0, ans=0.025 +2024-09-18 15:26:30,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=470340.0, ans=0.125 +2024-09-18 15:26:31,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.85 vs. limit=15.0 +2024-09-18 15:26:33,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=470340.0, ans=0.0 +2024-09-18 15:26:41,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=470380.0, ans=0.2 +2024-09-18 15:26:52,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.92 vs. limit=15.0 +2024-09-18 15:26:57,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=470420.0, ans=0.1 +2024-09-18 15:27:25,442 INFO [train.py:1198] (1/2) Epoch 26, batch 4500, loss[loss=0.2689, ctc_loss=0.1668, cr_loss=0.4233, attn_decoder_loss=0.2708, over 20306.00 frames. ], tot_loss[loss=0.2514, ctc_loss=0.1384, cr_loss=0.3842, attn_decoder_loss=0.2554, over 5233377.17 frames. ], batch size: 210, lr: 4.23e-03, grad_scale: 8.0 +2024-09-18 15:27:35,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=470500.0, ans=0.125 +2024-09-18 15:27:49,879 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.82 vs. limit=10.0 +2024-09-18 15:28:54,200 INFO [train.py:1198] (1/2) Epoch 27, batch 0, loss[loss=0.2161, ctc_loss=0.1028, cr_loss=0.3285, attn_decoder_loss=0.2214, over 29598.00 frames. ], tot_loss[loss=0.2161, ctc_loss=0.1028, cr_loss=0.3285, attn_decoder_loss=0.2214, over 29598.00 frames. ], batch size: 73, lr: 4.15e-03, grad_scale: 16.0 +2024-09-18 15:28:54,201 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 15:29:04,698 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.6813, 4.6660, 4.0043, 4.4543], device='cuda:1') +2024-09-18 15:29:12,733 INFO [train.py:1230] (1/2) Epoch 27, validation: loss=0.2127, ctc_loss=0.03797, cr_loss=5.907e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-18 15:29:12,733 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 15:29:14,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=470600.0, ans=0.07 +2024-09-18 15:29:22,486 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.81 vs. limit=22.5 +2024-09-18 15:29:35,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.92 vs. limit=15.0 +2024-09-18 15:29:36,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.14 vs. limit=15.0 +2024-09-18 15:29:37,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=470640.0, ans=0.0 +2024-09-18 15:29:41,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=470680.0, ans=0.1 +2024-09-18 15:29:44,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=470680.0, ans=0.125 +2024-09-18 15:29:47,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=470680.0, ans=0.125 +2024-09-18 15:29:50,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=470680.0, ans=0.125 +2024-09-18 15:29:53,215 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.260e+01 1.034e+02 1.128e+02 1.240e+02 3.218e+02, threshold=2.256e+02, percent-clipped=3.0 +2024-09-18 15:30:01,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=470720.0, ans=0.1 +2024-09-18 15:30:03,222 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.95 vs. limit=15.0 +2024-09-18 15:30:05,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=470720.0, ans=0.2 +2024-09-18 15:30:14,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=470760.0, ans=0.125 +2024-09-18 15:30:14,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=470760.0, ans=0.125 +2024-09-18 15:30:28,340 INFO [train.py:1198] (1/2) Epoch 27, batch 50, loss[loss=0.2181, ctc_loss=0.1155, cr_loss=0.3472, attn_decoder_loss=0.2217, over 29436.00 frames. ], tot_loss[loss=0.2422, ctc_loss=0.1281, cr_loss=0.3714, attn_decoder_loss=0.2466, over 1267229.45 frames. ], batch size: 70, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:30:52,983 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.23 vs. limit=15.0 +2024-09-18 15:30:55,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=470840.0, ans=0.125 +2024-09-18 15:30:56,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=470840.0, ans=0.0 +2024-09-18 15:31:37,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=470960.0, ans=0.2 +2024-09-18 15:31:42,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.48 vs. limit=15.0 +2024-09-18 15:31:47,304 INFO [train.py:1198] (1/2) Epoch 27, batch 100, loss[loss=0.2316, ctc_loss=0.1205, cr_loss=0.3611, attn_decoder_loss=0.2359, over 29529.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1288, cr_loss=0.3737, attn_decoder_loss=0.2483, over 2251682.52 frames. ], batch size: 76, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:31:58,318 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 15:31:59,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=471000.0, ans=0.125 +2024-09-18 15:32:03,104 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.61 vs. limit=15.0 +2024-09-18 15:32:04,579 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.69 vs. limit=15.0 +2024-09-18 15:32:08,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=471040.0, ans=0.0 +2024-09-18 15:32:14,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=471040.0, ans=0.125 +2024-09-18 15:32:28,928 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.524e+01 9.170e+01 9.614e+01 1.417e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-18 15:32:45,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=471160.0, ans=0.0 +2024-09-18 15:32:53,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=471160.0, ans=0.04949747468305833 +2024-09-18 15:33:02,259 INFO [train.py:1198] (1/2) Epoch 27, batch 150, loss[loss=0.2073, ctc_loss=0.1031, cr_loss=0.3248, attn_decoder_loss=0.2117, over 29426.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1266, cr_loss=0.3706, attn_decoder_loss=0.2459, over 3046329.12 frames. ], batch size: 70, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:33:29,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=471240.0, ans=0.2 +2024-09-18 15:33:47,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=471320.0, ans=0.0 +2024-09-18 15:33:50,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=471320.0, ans=0.125 +2024-09-18 15:33:58,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=471320.0, ans=0.0 +2024-09-18 15:34:01,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=471360.0, ans=0.2 +2024-09-18 15:34:08,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=471360.0, ans=0.1 +2024-09-18 15:34:17,454 INFO [train.py:1198] (1/2) Epoch 27, batch 200, loss[loss=0.2523, ctc_loss=0.1383, cr_loss=0.391, attn_decoder_loss=0.2563, over 27527.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1266, cr_loss=0.3709, attn_decoder_loss=0.2453, over 3658330.40 frames. ], batch size: 124, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:34:34,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=471440.0, ans=0.1 +2024-09-18 15:34:52,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=471480.0, ans=0.0 +2024-09-18 15:34:54,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=471480.0, ans=0.07 +2024-09-18 15:35:01,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=471480.0, ans=0.125 +2024-09-18 15:35:03,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=471480.0, ans=0.1 +2024-09-18 15:35:04,163 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.473e+01 8.928e+01 9.557e+01 1.148e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-18 15:35:05,053 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.64 vs. limit=15.0 +2024-09-18 15:35:15,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=471520.0, ans=0.125 +2024-09-18 15:35:19,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=471520.0, ans=0.0 +2024-09-18 15:35:22,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=471560.0, ans=0.125 +2024-09-18 15:35:24,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=471560.0, ans=0.0 +2024-09-18 15:35:28,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=471560.0, ans=0.2 +2024-09-18 15:35:28,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=471560.0, ans=0.125 +2024-09-18 15:35:33,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=471560.0, ans=0.2 +2024-09-18 15:35:37,445 INFO [train.py:1198] (1/2) Epoch 27, batch 250, loss[loss=0.257, ctc_loss=0.1308, cr_loss=0.3916, attn_decoder_loss=0.2623, over 29224.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1267, cr_loss=0.3706, attn_decoder_loss=0.2452, over 4141619.37 frames. ], batch size: 100, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:35:39,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=471600.0, ans=0.125 +2024-09-18 15:35:45,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=471600.0, ans=0.1 +2024-09-18 15:35:54,887 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.95 vs. limit=6.0 +2024-09-18 15:36:00,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=471640.0, ans=0.125 +2024-09-18 15:36:07,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=471680.0, ans=0.025 +2024-09-18 15:36:11,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.92 vs. limit=10.0 +2024-09-18 15:36:18,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=471680.0, ans=0.0 +2024-09-18 15:36:18,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=471680.0, ans=0.125 +2024-09-18 15:36:53,037 INFO [train.py:1198] (1/2) Epoch 27, batch 300, loss[loss=0.2571, ctc_loss=0.1432, cr_loss=0.4369, attn_decoder_loss=0.26, over 29518.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.126, cr_loss=0.3693, attn_decoder_loss=0.2451, over 4509049.93 frames. ], batch size: 92, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:36:56,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=471800.0, ans=0.125 +2024-09-18 15:37:01,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=471800.0, ans=0.125 +2024-09-18 15:37:22,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=471880.0, ans=0.0 +2024-09-18 15:37:34,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=471880.0, ans=0.125 +2024-09-18 15:37:35,277 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.118e+01 8.847e+01 9.359e+01 3.678e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-18 15:37:53,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=471960.0, ans=0.125 +2024-09-18 15:37:53,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=471960.0, ans=0.2 +2024-09-18 15:38:03,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-18 15:38:09,245 INFO [train.py:1198] (1/2) Epoch 27, batch 350, loss[loss=0.2182, ctc_loss=0.1071, cr_loss=0.3236, attn_decoder_loss=0.2233, over 29333.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1264, cr_loss=0.3705, attn_decoder_loss=0.2458, over 4795061.13 frames. ], batch size: 71, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:38:12,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=472000.0, ans=0.0 +2024-09-18 15:38:32,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=472040.0, ans=0.125 +2024-09-18 15:38:34,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.88 vs. limit=15.0 +2024-09-18 15:39:10,781 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.06 vs. limit=15.0 +2024-09-18 15:39:29,365 INFO [train.py:1198] (1/2) Epoch 27, batch 400, loss[loss=0.2398, ctc_loss=0.1228, cr_loss=0.3575, attn_decoder_loss=0.2449, over 29720.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1258, cr_loss=0.3693, attn_decoder_loss=0.2453, over 5024362.16 frames. ], batch size: 82, lr: 4.14e-03, grad_scale: 16.0 +2024-09-18 15:39:37,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=472200.0, ans=0.125 +2024-09-18 15:39:38,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=472200.0, ans=0.0 +2024-09-18 15:40:04,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=472280.0, ans=0.125 +2024-09-18 15:40:11,991 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.649e+01 9.100e+01 9.719e+01 1.502e+02, threshold=1.820e+02, percent-clipped=0.0 +2024-09-18 15:40:33,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=472360.0, ans=0.125 +2024-09-18 15:40:33,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=472360.0, ans=0.0 +2024-09-18 15:40:45,444 INFO [train.py:1198] (1/2) Epoch 27, batch 450, loss[loss=0.2484, ctc_loss=0.1256, cr_loss=0.372, attn_decoder_loss=0.2538, over 29713.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1257, cr_loss=0.369, attn_decoder_loss=0.2453, over 5186280.42 frames. ], batch size: 83, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:40:58,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=472400.0, ans=0.0 +2024-09-18 15:42:02,026 INFO [train.py:1198] (1/2) Epoch 27, batch 500, loss[loss=0.2622, ctc_loss=0.1437, cr_loss=0.4049, attn_decoder_loss=0.2664, over 29451.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1254, cr_loss=0.3687, attn_decoder_loss=0.2446, over 5329294.61 frames. ], batch size: 94, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:42:14,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=472600.0, ans=0.05 +2024-09-18 15:42:25,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.45 vs. limit=15.0 +2024-09-18 15:42:41,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=472680.0, ans=0.2 +2024-09-18 15:42:49,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=472680.0, ans=0.125 +2024-09-18 15:42:50,904 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.545e+01 8.912e+01 9.466e+01 2.661e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-18 15:42:55,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=472720.0, ans=0.025 +2024-09-18 15:43:06,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=472760.0, ans=0.0 +2024-09-18 15:43:07,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=472760.0, ans=0.0 +2024-09-18 15:43:12,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=472760.0, ans=0.125 +2024-09-18 15:43:23,226 INFO [train.py:1198] (1/2) Epoch 27, batch 550, loss[loss=0.2489, ctc_loss=0.1273, cr_loss=0.3724, attn_decoder_loss=0.2541, over 28932.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1259, cr_loss=0.3692, attn_decoder_loss=0.245, over 5422818.18 frames. ], batch size: 104, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:43:25,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=472800.0, ans=0.0 +2024-09-18 15:43:32,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=472800.0, ans=0.025 +2024-09-18 15:43:34,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=472800.0, ans=0.125 +2024-09-18 15:43:38,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=472840.0, ans=0.0 +2024-09-18 15:43:52,910 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-18 15:44:09,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.58 vs. limit=15.0 +2024-09-18 15:44:15,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.06 vs. limit=15.0 +2024-09-18 15:44:18,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=472920.0, ans=0.2 +2024-09-18 15:44:24,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=472960.0, ans=0.1 +2024-09-18 15:44:39,413 INFO [train.py:1198] (1/2) Epoch 27, batch 600, loss[loss=0.2597, ctc_loss=0.1334, cr_loss=0.3748, attn_decoder_loss=0.2654, over 29217.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1262, cr_loss=0.3703, attn_decoder_loss=0.2455, over 5508837.98 frames. ], batch size: 100, lr: 4.14e-03, grad_scale: 8.0 +2024-09-18 15:44:39,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=473000.0, ans=0.125 +2024-09-18 15:44:46,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=473000.0, ans=15.0 +2024-09-18 15:44:48,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=473000.0, ans=0.125 +2024-09-18 15:44:50,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473000.0, ans=0.1 +2024-09-18 15:44:51,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=473000.0, ans=0.125 +2024-09-18 15:45:11,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=473080.0, ans=0.125 +2024-09-18 15:45:22,886 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.064e+01 8.416e+01 8.737e+01 9.314e+01 1.829e+02, threshold=1.747e+02, percent-clipped=2.0 +2024-09-18 15:45:35,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=473120.0, ans=0.125 +2024-09-18 15:45:43,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=473160.0, ans=0.125 +2024-09-18 15:45:54,997 INFO [train.py:1198] (1/2) Epoch 27, batch 650, loss[loss=0.2371, ctc_loss=0.1218, cr_loss=0.3659, attn_decoder_loss=0.2418, over 29782.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1253, cr_loss=0.3682, attn_decoder_loss=0.2447, over 5585992.97 frames. ], batch size: 81, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:46:05,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473200.0, ans=0.1 +2024-09-18 15:46:18,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=473240.0, ans=0.025 +2024-09-18 15:46:22,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=473240.0, ans=0.125 +2024-09-18 15:46:28,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.85 vs. limit=15.0 +2024-09-18 15:46:30,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=473280.0, ans=0.025 +2024-09-18 15:46:41,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-18 15:47:15,995 INFO [train.py:1198] (1/2) Epoch 27, batch 700, loss[loss=0.2288, ctc_loss=0.1158, cr_loss=0.344, attn_decoder_loss=0.2337, over 29556.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1252, cr_loss=0.368, attn_decoder_loss=0.245, over 5636312.40 frames. ], batch size: 76, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:47:23,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=473400.0, ans=0.1 +2024-09-18 15:47:34,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=473440.0, ans=0.05 +2024-09-18 15:47:48,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=473480.0, ans=0.1 +2024-09-18 15:47:51,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=473480.0, ans=10.0 +2024-09-18 15:47:55,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=473480.0, ans=0.125 +2024-09-18 15:48:00,171 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.450e+01 9.020e+01 9.619e+01 3.078e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 15:48:06,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=473520.0, ans=0.1 +2024-09-18 15:48:32,689 INFO [train.py:1198] (1/2) Epoch 27, batch 750, loss[loss=0.2435, ctc_loss=0.1278, cr_loss=0.3685, attn_decoder_loss=0.2482, over 29714.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1251, cr_loss=0.3675, attn_decoder_loss=0.2446, over 5675850.71 frames. ], batch size: 82, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:48:53,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.33 vs. limit=15.0 +2024-09-18 15:49:02,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.50 vs. limit=15.0 +2024-09-18 15:49:23,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=473720.0, ans=0.1 +2024-09-18 15:49:34,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=473760.0, ans=0.125 +2024-09-18 15:49:48,907 INFO [train.py:1198] (1/2) Epoch 27, batch 800, loss[loss=0.2254, ctc_loss=0.1141, cr_loss=0.3441, attn_decoder_loss=0.2302, over 29594.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.125, cr_loss=0.3676, attn_decoder_loss=0.2444, over 5705364.10 frames. ], batch size: 73, lr: 4.13e-03, grad_scale: 16.0 +2024-09-18 15:49:53,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=473800.0, ans=0.0 +2024-09-18 15:50:04,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=473840.0, ans=0.2 +2024-09-18 15:50:19,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=473880.0, ans=0.0 +2024-09-18 15:50:25,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=473880.0, ans=0.125 +2024-09-18 15:50:35,327 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.485e+01 9.104e+01 9.795e+01 7.519e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 15:50:40,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=473920.0, ans=0.0 +2024-09-18 15:50:41,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=473920.0, ans=0.09899494936611666 +2024-09-18 15:50:43,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=473920.0, ans=0.0 +2024-09-18 15:50:56,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=473960.0, ans=0.1 +2024-09-18 15:51:09,199 INFO [train.py:1198] (1/2) Epoch 27, batch 850, loss[loss=0.2542, ctc_loss=0.1326, cr_loss=0.3663, attn_decoder_loss=0.2596, over 29716.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1248, cr_loss=0.3673, attn_decoder_loss=0.244, over 5735441.65 frames. ], batch size: 89, lr: 4.13e-03, grad_scale: 16.0 +2024-09-18 15:51:11,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.38 vs. limit=6.0 +2024-09-18 15:51:15,877 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.97 vs. limit=22.5 +2024-09-18 15:51:19,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=474000.0, ans=0.125 +2024-09-18 15:51:31,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=474040.0, ans=0.1 +2024-09-18 15:51:46,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-18 15:51:47,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.40 vs. limit=15.0 +2024-09-18 15:52:00,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=474120.0, ans=0.125 +2024-09-18 15:52:03,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=474120.0, ans=0.125 +2024-09-18 15:52:24,936 INFO [train.py:1198] (1/2) Epoch 27, batch 900, loss[loss=0.2208, ctc_loss=0.1158, cr_loss=0.3524, attn_decoder_loss=0.2246, over 29607.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1254, cr_loss=0.368, attn_decoder_loss=0.2446, over 5740667.02 frames. ], batch size: 73, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:53:06,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=474280.0, ans=0.125 +2024-09-18 15:53:10,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.261e+01 8.501e+01 8.938e+01 9.467e+01 2.355e+02, threshold=1.788e+02, percent-clipped=2.0 +2024-09-18 15:53:17,274 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.48 vs. limit=10.0 +2024-09-18 15:53:31,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=474360.0, ans=0.0 +2024-09-18 15:53:41,178 INFO [train.py:1198] (1/2) Epoch 27, batch 950, loss[loss=0.2317, ctc_loss=0.1167, cr_loss=0.3572, attn_decoder_loss=0.2365, over 29500.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1255, cr_loss=0.3684, attn_decoder_loss=0.2449, over 5743591.80 frames. ], batch size: 74, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:54:38,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=474520.0, ans=0.2 +2024-09-18 15:55:01,615 INFO [train.py:1198] (1/2) Epoch 27, batch 1000, loss[loss=0.2436, ctc_loss=0.1322, cr_loss=0.3834, attn_decoder_loss=0.2475, over 29505.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1264, cr_loss=0.3705, attn_decoder_loss=0.2458, over 5736826.30 frames. ], batch size: 77, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:55:09,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=474600.0, ans=0.125 +2024-09-18 15:55:41,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=474680.0, ans=0.025 +2024-09-18 15:55:44,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=474680.0, ans=0.0 +2024-09-18 15:55:47,614 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.547e+01 9.112e+01 9.993e+01 2.254e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 15:56:17,918 INFO [train.py:1198] (1/2) Epoch 27, batch 1050, loss[loss=0.2518, ctc_loss=0.129, cr_loss=0.3895, attn_decoder_loss=0.2568, over 29682.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.126, cr_loss=0.369, attn_decoder_loss=0.2453, over 5745954.48 frames. ], batch size: 85, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:56:21,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=474800.0, ans=0.1 +2024-09-18 15:56:24,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=474800.0, ans=0.0 +2024-09-18 15:56:31,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=474840.0, ans=0.125 +2024-09-18 15:56:56,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=474880.0, ans=0.025 +2024-09-18 15:57:07,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=474920.0, ans=0.1 +2024-09-18 15:57:16,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=474920.0, ans=0.125 +2024-09-18 15:57:27,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=474960.0, ans=0.125 +2024-09-18 15:57:34,517 INFO [train.py:1198] (1/2) Epoch 27, batch 1100, loss[loss=0.2305, ctc_loss=0.1261, cr_loss=0.3723, attn_decoder_loss=0.2338, over 29441.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1261, cr_loss=0.3688, attn_decoder_loss=0.245, over 5757380.28 frames. ], batch size: 78, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:57:41,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.03 vs. limit=22.5 +2024-09-18 15:57:47,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=475000.0, ans=0.09899494936611666 +2024-09-18 15:57:59,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.17 vs. limit=15.0 +2024-09-18 15:58:02,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=475040.0, ans=0.0 +2024-09-18 15:58:18,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=475080.0, ans=0.125 +2024-09-18 15:58:18,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=475080.0, ans=0.125 +2024-09-18 15:58:22,631 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.448e+01 9.006e+01 9.632e+01 1.338e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 15:58:25,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=475120.0, ans=0.025 +2024-09-18 15:58:36,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=475160.0, ans=0.125 +2024-09-18 15:58:39,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=475160.0, ans=0.0 +2024-09-18 15:58:54,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=475200.0, ans=0.0 +2024-09-18 15:58:55,772 INFO [train.py:1198] (1/2) Epoch 27, batch 1150, loss[loss=0.2327, ctc_loss=0.1177, cr_loss=0.3564, attn_decoder_loss=0.2376, over 29457.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1258, cr_loss=0.3683, attn_decoder_loss=0.2449, over 5755672.12 frames. ], batch size: 78, lr: 4.13e-03, grad_scale: 8.0 +2024-09-18 15:59:20,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=475240.0, ans=0.05 +2024-09-18 15:59:28,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=475280.0, ans=0.04949747468305833 +2024-09-18 15:59:29,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=475280.0, ans=0.125 +2024-09-18 15:59:56,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=475360.0, ans=0.125 +2024-09-18 16:00:11,932 INFO [train.py:1198] (1/2) Epoch 27, batch 1200, loss[loss=0.2474, ctc_loss=0.1304, cr_loss=0.3789, attn_decoder_loss=0.2519, over 29659.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1262, cr_loss=0.3689, attn_decoder_loss=0.2454, over 5747835.19 frames. ], batch size: 85, lr: 4.12e-03, grad_scale: 16.0 +2024-09-18 16:00:18,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=475400.0, ans=0.025 +2024-09-18 16:00:32,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=475440.0, ans=0.2 +2024-09-18 16:00:39,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=475440.0, ans=0.0 +2024-09-18 16:00:48,102 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.01 vs. limit=15.0 +2024-09-18 16:00:55,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.96 vs. limit=15.0 +2024-09-18 16:00:59,251 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.652e+01 9.107e+01 9.727e+01 1.637e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-18 16:01:09,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.60 vs. limit=15.0 +2024-09-18 16:01:19,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=475560.0, ans=0.2 +2024-09-18 16:01:27,920 INFO [train.py:1198] (1/2) Epoch 27, batch 1250, loss[loss=0.2568, ctc_loss=0.1412, cr_loss=0.3903, attn_decoder_loss=0.261, over 29527.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1262, cr_loss=0.3694, attn_decoder_loss=0.2458, over 5774800.39 frames. ], batch size: 92, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:01:32,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=475600.0, ans=0.125 +2024-09-18 16:01:35,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=475600.0, ans=0.0 +2024-09-18 16:01:56,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.85 vs. limit=15.0 +2024-09-18 16:02:06,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.37 vs. limit=22.5 +2024-09-18 16:02:44,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=475760.0, ans=0.125 +2024-09-18 16:02:48,706 INFO [train.py:1198] (1/2) Epoch 27, batch 1300, loss[loss=0.2472, ctc_loss=0.1275, cr_loss=0.3627, attn_decoder_loss=0.2524, over 28244.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1258, cr_loss=0.3683, attn_decoder_loss=0.2451, over 5778878.71 frames. ], batch size: 111, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:03:12,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.92 vs. limit=22.5 +2024-09-18 16:03:22,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=475880.0, ans=0.125 +2024-09-18 16:03:35,952 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.380e+01 8.992e+01 9.418e+01 1.555e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-18 16:03:37,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=475920.0, ans=0.09899494936611666 +2024-09-18 16:03:42,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=475920.0, ans=0.125 +2024-09-18 16:03:54,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=475960.0, ans=0.0 +2024-09-18 16:04:05,245 INFO [train.py:1198] (1/2) Epoch 27, batch 1350, loss[loss=0.2371, ctc_loss=0.1236, cr_loss=0.3735, attn_decoder_loss=0.2414, over 29765.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.125, cr_loss=0.3673, attn_decoder_loss=0.2447, over 5796055.22 frames. ], batch size: 81, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:04:44,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=476080.0, ans=0.125 +2024-09-18 16:04:47,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=476080.0, ans=0.125 +2024-09-18 16:04:53,598 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:04:59,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=476120.0, ans=0.2 +2024-09-18 16:05:02,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=476120.0, ans=0.125 +2024-09-18 16:05:19,586 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:05:20,670 INFO [train.py:1198] (1/2) Epoch 27, batch 1400, loss[loss=0.2016, ctc_loss=0.09796, cr_loss=0.2896, attn_decoder_loss=0.2067, over 29593.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1246, cr_loss=0.3665, attn_decoder_loss=0.2443, over 5807347.44 frames. ], batch size: 69, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:05:31,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=476200.0, ans=0.2 +2024-09-18 16:05:54,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=476280.0, ans=0.0 +2024-09-18 16:06:09,969 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.340e+01 8.342e+01 8.774e+01 9.500e+01 1.505e+02, threshold=1.755e+02, percent-clipped=0.0 +2024-09-18 16:06:38,574 INFO [train.py:1198] (1/2) Epoch 27, batch 1450, loss[loss=0.2582, ctc_loss=0.1351, cr_loss=0.3779, attn_decoder_loss=0.2635, over 29443.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1249, cr_loss=0.3673, attn_decoder_loss=0.2447, over 5804388.85 frames. ], batch size: 94, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:06:43,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.25 vs. limit=15.0 +2024-09-18 16:07:19,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=476480.0, ans=0.125 +2024-09-18 16:07:20,840 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:07:25,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=476520.0, ans=0.125 +2024-09-18 16:07:41,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=476560.0, ans=0.125 +2024-09-18 16:07:43,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=476560.0, ans=0.125 +2024-09-18 16:07:56,828 INFO [train.py:1198] (1/2) Epoch 27, batch 1500, loss[loss=0.2515, ctc_loss=0.1348, cr_loss=0.3851, attn_decoder_loss=0.2559, over 29621.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1251, cr_loss=0.3676, attn_decoder_loss=0.2451, over 5804455.49 frames. ], batch size: 86, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:08:00,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=476600.0, ans=0.125 +2024-09-18 16:08:31,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=476680.0, ans=0.0 +2024-09-18 16:08:43,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=476720.0, ans=0.125 +2024-09-18 16:08:44,409 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.578e+01 9.265e+01 1.012e+02 4.469e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-18 16:08:53,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.04 vs. limit=15.0 +2024-09-18 16:08:58,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_na.min_abs, batch_count=476760.0, ans=0.02 +2024-09-18 16:09:13,770 INFO [train.py:1198] (1/2) Epoch 27, batch 1550, loss[loss=0.2594, ctc_loss=0.1415, cr_loss=0.4161, attn_decoder_loss=0.2633, over 29525.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1256, cr_loss=0.3685, attn_decoder_loss=0.2453, over 5780870.76 frames. ], batch size: 90, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:09:18,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=476800.0, ans=0.1 +2024-09-18 16:09:20,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=476800.0, ans=0.125 +2024-09-18 16:09:20,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.87 vs. limit=15.0 +2024-09-18 16:09:34,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=476840.0, ans=10.0 +2024-09-18 16:09:38,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=476840.0, ans=0.1 +2024-09-18 16:10:01,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=476920.0, ans=0.07 +2024-09-18 16:10:04,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.82 vs. limit=6.0 +2024-09-18 16:10:31,969 INFO [train.py:1198] (1/2) Epoch 27, batch 1600, loss[loss=0.2466, ctc_loss=0.1224, cr_loss=0.3724, attn_decoder_loss=0.2521, over 29665.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1257, cr_loss=0.3681, attn_decoder_loss=0.2453, over 5762906.09 frames. ], batch size: 85, lr: 4.12e-03, grad_scale: 16.0 +2024-09-18 16:10:32,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=477000.0, ans=0.025 +2024-09-18 16:10:44,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=477000.0, ans=0.0 +2024-09-18 16:10:48,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=477040.0, ans=0.025 +2024-09-18 16:11:04,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=477080.0, ans=0.125 +2024-09-18 16:11:11,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=477080.0, ans=0.125 +2024-09-18 16:11:11,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=477080.0, ans=0.0 +2024-09-18 16:11:11,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.92 vs. limit=10.0 +2024-09-18 16:11:23,076 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.401e+01 8.546e+01 9.000e+01 9.569e+01 2.285e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 16:11:35,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=477160.0, ans=0.125 +2024-09-18 16:11:40,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=477160.0, ans=0.5 +2024-09-18 16:11:41,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=477160.0, ans=0.0 +2024-09-18 16:11:50,241 INFO [train.py:1198] (1/2) Epoch 27, batch 1650, loss[loss=0.2477, ctc_loss=0.1207, cr_loss=0.3631, attn_decoder_loss=0.2538, over 29698.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1254, cr_loss=0.3679, attn_decoder_loss=0.2452, over 5758617.80 frames. ], batch size: 89, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:11:50,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=477200.0, ans=0.0 +2024-09-18 16:12:22,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=477280.0, ans=0.1 +2024-09-18 16:12:37,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=477320.0, ans=0.125 +2024-09-18 16:12:42,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=477320.0, ans=0.125 +2024-09-18 16:12:51,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=477360.0, ans=0.125 +2024-09-18 16:13:05,982 INFO [train.py:1198] (1/2) Epoch 27, batch 1700, loss[loss=0.2155, ctc_loss=0.1111, cr_loss=0.3541, attn_decoder_loss=0.2192, over 29573.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1253, cr_loss=0.3678, attn_decoder_loss=0.245, over 5781040.25 frames. ], batch size: 69, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:13:17,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=477400.0, ans=0.0 +2024-09-18 16:13:54,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=477520.0, ans=0.125 +2024-09-18 16:13:54,260 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:13:55,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=477520.0, ans=6.0 +2024-09-18 16:13:56,895 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.259e+01 8.516e+01 9.095e+01 9.729e+01 1.325e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-18 16:14:04,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=477520.0, ans=0.125 +2024-09-18 16:14:18,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=477560.0, ans=0.125 +2024-09-18 16:14:19,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=477560.0, ans=0.025 +2024-09-18 16:14:24,683 INFO [train.py:1198] (1/2) Epoch 27, batch 1750, loss[loss=0.2096, ctc_loss=0.1047, cr_loss=0.3449, attn_decoder_loss=0.2136, over 29355.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1251, cr_loss=0.3674, attn_decoder_loss=0.2446, over 5788557.74 frames. ], batch size: 67, lr: 4.12e-03, grad_scale: 8.0 +2024-09-18 16:14:36,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.69 vs. limit=15.0 +2024-09-18 16:14:38,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.58 vs. limit=22.5 +2024-09-18 16:14:50,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=477640.0, ans=0.0 +2024-09-18 16:15:00,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=477680.0, ans=0.0 +2024-09-18 16:15:20,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=477720.0, ans=0.05 +2024-09-18 16:15:22,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.68 vs. limit=15.0 +2024-09-18 16:15:23,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=477720.0, ans=0.0 +2024-09-18 16:15:29,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=477760.0, ans=0.2 +2024-09-18 16:15:30,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=477760.0, ans=0.025 +2024-09-18 16:15:42,639 INFO [train.py:1198] (1/2) Epoch 27, batch 1800, loss[loss=0.2515, ctc_loss=0.1292, cr_loss=0.3581, attn_decoder_loss=0.2571, over 29705.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1256, cr_loss=0.3682, attn_decoder_loss=0.2449, over 5792434.05 frames. ], batch size: 83, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:16:02,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=477840.0, ans=0.125 +2024-09-18 16:16:08,223 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.27 vs. limit=22.5 +2024-09-18 16:16:30,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=477920.0, ans=0.125 +2024-09-18 16:16:31,638 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.468e+01 8.306e+01 8.965e+01 9.478e+01 1.194e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-18 16:16:59,480 INFO [train.py:1198] (1/2) Epoch 27, batch 1850, loss[loss=0.2472, ctc_loss=0.1387, cr_loss=0.3929, attn_decoder_loss=0.2505, over 29638.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1258, cr_loss=0.3689, attn_decoder_loss=0.245, over 5797158.60 frames. ], batch size: 86, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:17:22,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=478040.0, ans=0.125 +2024-09-18 16:17:30,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=478080.0, ans=0.1 +2024-09-18 16:17:42,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.96 vs. limit=10.0 +2024-09-18 16:17:43,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=478080.0, ans=0.1 +2024-09-18 16:18:03,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.01 vs. limit=10.0 +2024-09-18 16:18:17,768 INFO [train.py:1198] (1/2) Epoch 27, batch 1900, loss[loss=0.2509, ctc_loss=0.123, cr_loss=0.3819, attn_decoder_loss=0.2567, over 29717.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1257, cr_loss=0.3688, attn_decoder_loss=0.2452, over 5805758.40 frames. ], batch size: 89, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:18:32,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.61 vs. limit=15.0 +2024-09-18 16:19:08,922 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.594e+01 9.103e+01 9.777e+01 2.715e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-18 16:19:10,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=478320.0, ans=0.125 +2024-09-18 16:19:15,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=478320.0, ans=0.1 +2024-09-18 16:19:21,922 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.06 vs. limit=12.0 +2024-09-18 16:19:22,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=478360.0, ans=0.125 +2024-09-18 16:19:25,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=478360.0, ans=0.2 +2024-09-18 16:19:36,745 INFO [train.py:1198] (1/2) Epoch 27, batch 1950, loss[loss=0.2367, ctc_loss=0.1281, cr_loss=0.3866, attn_decoder_loss=0.2402, over 29457.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1261, cr_loss=0.3701, attn_decoder_loss=0.2462, over 5820875.41 frames. ], batch size: 78, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:19:49,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=478400.0, ans=0.07 +2024-09-18 16:20:10,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=478480.0, ans=0.0 +2024-09-18 16:20:27,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=478520.0, ans=0.1 +2024-09-18 16:20:37,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=478560.0, ans=0.1 +2024-09-18 16:20:38,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.00 vs. limit=12.0 +2024-09-18 16:20:40,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=478560.0, ans=0.0 +2024-09-18 16:20:40,754 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:20:43,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=478560.0, ans=0.0 +2024-09-18 16:20:46,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=478560.0, ans=0.125 +2024-09-18 16:20:52,626 INFO [train.py:1198] (1/2) Epoch 27, batch 2000, loss[loss=0.2212, ctc_loss=0.116, cr_loss=0.3631, attn_decoder_loss=0.2248, over 29327.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1263, cr_loss=0.3704, attn_decoder_loss=0.2465, over 5797698.36 frames. ], batch size: 67, lr: 4.11e-03, grad_scale: 16.0 +2024-09-18 16:21:05,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=478600.0, ans=0.125 +2024-09-18 16:21:15,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=478640.0, ans=0.0 +2024-09-18 16:21:18,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=478640.0, ans=0.0 +2024-09-18 16:21:45,120 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.111e+01 8.586e+01 9.013e+01 9.702e+01 5.300e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-18 16:21:50,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=478720.0, ans=0.0 +2024-09-18 16:22:01,421 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.44 vs. limit=15.0 +2024-09-18 16:22:10,911 INFO [train.py:1198] (1/2) Epoch 27, batch 2050, loss[loss=0.2152, ctc_loss=0.1063, cr_loss=0.3415, attn_decoder_loss=0.2198, over 29424.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1257, cr_loss=0.369, attn_decoder_loss=0.2455, over 5789286.56 frames. ], batch size: 70, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:22:24,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=478840.0, ans=0.0 +2024-09-18 16:22:29,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=478840.0, ans=0.0 +2024-09-18 16:23:07,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=478920.0, ans=0.0 +2024-09-18 16:23:28,901 INFO [train.py:1198] (1/2) Epoch 27, batch 2100, loss[loss=0.2325, ctc_loss=0.1125, cr_loss=0.3312, attn_decoder_loss=0.2385, over 29749.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1251, cr_loss=0.3677, attn_decoder_loss=0.245, over 5801425.48 frames. ], batch size: 81, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:23:33,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=479000.0, ans=0.0 +2024-09-18 16:23:41,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=479000.0, ans=0.025 +2024-09-18 16:23:49,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=479040.0, ans=0.1 +2024-09-18 16:24:04,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=479080.0, ans=0.025 +2024-09-18 16:24:09,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=479080.0, ans=0.0 +2024-09-18 16:24:11,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=479080.0, ans=0.125 +2024-09-18 16:24:11,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=479080.0, ans=0.125 +2024-09-18 16:24:17,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=479120.0, ans=0.125 +2024-09-18 16:24:18,656 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.253e+01 8.787e+01 9.429e+01 1.232e+02, threshold=1.757e+02, percent-clipped=0.0 +2024-09-18 16:24:24,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=479120.0, ans=0.1 +2024-09-18 16:24:45,029 INFO [train.py:1198] (1/2) Epoch 27, batch 2150, loss[loss=0.2396, ctc_loss=0.129, cr_loss=0.3592, attn_decoder_loss=0.2439, over 29458.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1244, cr_loss=0.3667, attn_decoder_loss=0.2443, over 5816658.47 frames. ], batch size: 78, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:24:52,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=479200.0, ans=0.1 +2024-09-18 16:24:53,669 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-18 16:25:02,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.36 vs. limit=6.0 +2024-09-18 16:25:14,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=479280.0, ans=0.0 +2024-09-18 16:25:15,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=479280.0, ans=0.125 +2024-09-18 16:25:20,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=479280.0, ans=0.0 +2024-09-18 16:25:28,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=479280.0, ans=0.0 +2024-09-18 16:25:33,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=479320.0, ans=0.125 +2024-09-18 16:25:38,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=479320.0, ans=0.125 +2024-09-18 16:26:03,691 INFO [train.py:1198] (1/2) Epoch 27, batch 2200, loss[loss=0.2449, ctc_loss=0.1268, cr_loss=0.3869, attn_decoder_loss=0.2494, over 29642.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1247, cr_loss=0.3675, attn_decoder_loss=0.2445, over 5814103.22 frames. ], batch size: 86, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:26:05,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=479400.0, ans=0.0 +2024-09-18 16:26:08,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=479400.0, ans=0.125 +2024-09-18 16:26:12,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.18 vs. limit=15.0 +2024-09-18 16:26:17,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=479440.0, ans=0.0 +2024-09-18 16:26:20,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=479440.0, ans=0.0 +2024-09-18 16:26:47,792 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.23 vs. limit=15.0 +2024-09-18 16:26:55,790 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.516e+01 8.471e+01 9.024e+01 9.757e+01 3.508e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-18 16:27:07,766 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.69 vs. limit=10.0 +2024-09-18 16:27:12,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=479560.0, ans=0.0 +2024-09-18 16:27:21,639 INFO [train.py:1198] (1/2) Epoch 27, batch 2250, loss[loss=0.2383, ctc_loss=0.1199, cr_loss=0.3535, attn_decoder_loss=0.2436, over 29716.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1243, cr_loss=0.3664, attn_decoder_loss=0.2442, over 5811035.29 frames. ], batch size: 82, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:27:32,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=479600.0, ans=0.0 +2024-09-18 16:27:52,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.75 vs. limit=15.0 +2024-09-18 16:27:52,890 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.90 vs. limit=22.5 +2024-09-18 16:27:58,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=479680.0, ans=0.125 +2024-09-18 16:28:07,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=479720.0, ans=0.09899494936611666 +2024-09-18 16:28:36,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=479800.0, ans=0.125 +2024-09-18 16:28:37,668 INFO [train.py:1198] (1/2) Epoch 27, batch 2300, loss[loss=0.2064, ctc_loss=0.09569, cr_loss=0.2951, attn_decoder_loss=0.2122, over 29313.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1238, cr_loss=0.365, attn_decoder_loss=0.2434, over 5798199.23 frames. ], batch size: 71, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:28:40,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=479800.0, ans=0.125 +2024-09-18 16:28:52,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=479840.0, ans=0.0 +2024-09-18 16:28:54,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=479840.0, ans=0.1 +2024-09-18 16:29:09,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=479880.0, ans=0.1 +2024-09-18 16:29:29,679 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.039e+01 8.211e+01 8.889e+01 9.358e+01 1.563e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-18 16:30:02,926 INFO [train.py:1198] (1/2) Epoch 27, batch 2350, loss[loss=0.2496, ctc_loss=0.1287, cr_loss=0.3779, attn_decoder_loss=0.2547, over 29695.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1238, cr_loss=0.3652, attn_decoder_loss=0.2436, over 5805045.60 frames. ], batch size: 83, lr: 4.11e-03, grad_scale: 8.0 +2024-09-18 16:30:39,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=480080.0, ans=0.025 +2024-09-18 16:30:59,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=480120.0, ans=0.0 +2024-09-18 16:31:04,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=480160.0, ans=0.2 +2024-09-18 16:31:07,322 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:31:10,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=480160.0, ans=0.125 +2024-09-18 16:31:18,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=480160.0, ans=0.125 +2024-09-18 16:31:20,883 INFO [train.py:1198] (1/2) Epoch 27, batch 2400, loss[loss=0.2256, ctc_loss=0.1051, cr_loss=0.3161, attn_decoder_loss=0.2319, over 29522.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1242, cr_loss=0.3657, attn_decoder_loss=0.2442, over 5808337.44 frames. ], batch size: 76, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:31:27,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=480200.0, ans=0.1 +2024-09-18 16:31:30,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=480200.0, ans=0.0 +2024-09-18 16:31:33,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=480200.0, ans=10.0 +2024-09-18 16:31:37,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=480240.0, ans=0.1 +2024-09-18 16:31:48,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=480240.0, ans=0.1 +2024-09-18 16:31:51,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=480280.0, ans=0.125 +2024-09-18 16:32:08,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:32:09,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=480320.0, ans=0.125 +2024-09-18 16:32:12,453 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.717e+01 9.101e+01 9.636e+01 2.464e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-18 16:32:17,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=480320.0, ans=0.125 +2024-09-18 16:32:18,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=480320.0, ans=0.1 +2024-09-18 16:32:36,817 INFO [train.py:1198] (1/2) Epoch 27, batch 2450, loss[loss=0.2415, ctc_loss=0.1216, cr_loss=0.3558, attn_decoder_loss=0.2469, over 29707.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.125, cr_loss=0.3676, attn_decoder_loss=0.2451, over 5784891.60 frames. ], batch size: 82, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:32:38,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=480400.0, ans=0.0 +2024-09-18 16:32:40,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=480400.0, ans=0.0 +2024-09-18 16:33:07,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=480480.0, ans=0.125 +2024-09-18 16:33:15,823 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:33:17,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=10.64 vs. limit=15.0 +2024-09-18 16:33:18,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=480480.0, ans=0.0 +2024-09-18 16:33:20,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.66 vs. limit=15.0 +2024-09-18 16:33:24,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=480520.0, ans=0.1 +2024-09-18 16:33:54,968 INFO [train.py:1198] (1/2) Epoch 27, batch 2500, loss[loss=0.2486, ctc_loss=0.1224, cr_loss=0.3585, attn_decoder_loss=0.2547, over 29612.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1253, cr_loss=0.368, attn_decoder_loss=0.2453, over 5795223.34 frames. ], batch size: 86, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:33:59,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=480600.0, ans=0.125 +2024-09-18 16:34:10,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=480640.0, ans=0.125 +2024-09-18 16:34:12,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=480640.0, ans=0.125 +2024-09-18 16:34:16,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=480640.0, ans=0.0 +2024-09-18 16:34:20,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=480640.0, ans=10.0 +2024-09-18 16:34:27,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=480680.0, ans=0.025 +2024-09-18 16:34:36,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=480680.0, ans=0.125 +2024-09-18 16:34:39,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=480720.0, ans=0.1 +2024-09-18 16:34:49,374 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.100e+01 8.457e+01 8.825e+01 9.370e+01 1.600e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-18 16:34:49,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=480720.0, ans=0.0 +2024-09-18 16:35:14,344 INFO [train.py:1198] (1/2) Epoch 27, batch 2550, loss[loss=0.2113, ctc_loss=0.1051, cr_loss=0.338, attn_decoder_loss=0.2156, over 29328.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1249, cr_loss=0.3673, attn_decoder_loss=0.2452, over 5796272.37 frames. ], batch size: 67, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:35:16,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=480800.0, ans=0.0 +2024-09-18 16:35:38,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=480840.0, ans=0.09899494936611666 +2024-09-18 16:35:52,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=480880.0, ans=0.125 +2024-09-18 16:35:58,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.29 vs. limit=15.0 +2024-09-18 16:36:04,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=480920.0, ans=0.0 +2024-09-18 16:36:15,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=480960.0, ans=0.1 +2024-09-18 16:36:16,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=480960.0, ans=0.125 +2024-09-18 16:36:30,364 INFO [train.py:1198] (1/2) Epoch 27, batch 2600, loss[loss=0.2282, ctc_loss=0.1086, cr_loss=0.3415, attn_decoder_loss=0.2339, over 29455.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1256, cr_loss=0.3688, attn_decoder_loss=0.2457, over 5792770.20 frames. ], batch size: 78, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:36:45,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=481040.0, ans=0.125 +2024-09-18 16:37:13,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.14 vs. limit=15.0 +2024-09-18 16:37:16,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=481080.0, ans=0.0 +2024-09-18 16:37:16,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=481080.0, ans=0.125 +2024-09-18 16:37:19,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=481120.0, ans=0.125 +2024-09-18 16:37:24,591 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.268e+01 8.374e+01 8.989e+01 9.651e+01 1.905e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-18 16:37:48,735 INFO [train.py:1198] (1/2) Epoch 27, batch 2650, loss[loss=0.2491, ctc_loss=0.1344, cr_loss=0.3934, attn_decoder_loss=0.2531, over 29232.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1253, cr_loss=0.3686, attn_decoder_loss=0.2457, over 5799802.02 frames. ], batch size: 100, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:38:01,698 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-18 16:38:14,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=481240.0, ans=0.0 +2024-09-18 16:38:19,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=481280.0, ans=0.0 +2024-09-18 16:38:19,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=481280.0, ans=0.125 +2024-09-18 16:38:28,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=481280.0, ans=0.0 +2024-09-18 16:38:37,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=481320.0, ans=0.125 +2024-09-18 16:38:59,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=481360.0, ans=0.0 +2024-09-18 16:39:06,863 INFO [train.py:1198] (1/2) Epoch 27, batch 2700, loss[loss=0.2528, ctc_loss=0.1338, cr_loss=0.3861, attn_decoder_loss=0.2575, over 29557.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1257, cr_loss=0.3696, attn_decoder_loss=0.2461, over 5796489.76 frames. ], batch size: 87, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:39:25,414 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:39:26,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=481440.0, ans=0.125 +2024-09-18 16:39:43,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=15.0 +2024-09-18 16:39:51,297 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.64 vs. limit=12.0 +2024-09-18 16:39:54,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.45 vs. limit=6.0 +2024-09-18 16:39:55,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=481520.0, ans=0.125 +2024-09-18 16:39:58,202 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.481e+01 8.531e+01 8.958e+01 9.495e+01 1.703e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 16:40:09,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=481560.0, ans=0.0 +2024-09-18 16:40:23,155 INFO [train.py:1198] (1/2) Epoch 27, batch 2750, loss[loss=0.2317, ctc_loss=0.1197, cr_loss=0.364, attn_decoder_loss=0.2361, over 29525.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.125, cr_loss=0.368, attn_decoder_loss=0.2451, over 5795040.48 frames. ], batch size: 75, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:41:15,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=481720.0, ans=0.125 +2024-09-18 16:41:17,940 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.74 vs. limit=6.0 +2024-09-18 16:41:26,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=481760.0, ans=0.125 +2024-09-18 16:41:41,656 INFO [train.py:1198] (1/2) Epoch 27, batch 2800, loss[loss=0.2671, ctc_loss=0.1607, cr_loss=0.3726, attn_decoder_loss=0.2707, over 20255.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1257, cr_loss=0.3694, attn_decoder_loss=0.2453, over 5776227.21 frames. ], batch size: 210, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:41:44,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=481800.0, ans=0.125 +2024-09-18 16:42:09,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=481840.0, ans=0.1 +2024-09-18 16:42:09,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=481840.0, ans=0.0 +2024-09-18 16:42:15,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=481880.0, ans=0.0 +2024-09-18 16:42:21,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=481880.0, ans=0.2 +2024-09-18 16:42:22,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=481880.0, ans=0.0 +2024-09-18 16:42:35,141 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.963e+01 8.581e+01 9.268e+01 9.879e+01 2.017e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-18 16:42:43,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=481960.0, ans=0.125 +2024-09-18 16:42:47,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=481960.0, ans=0.125 +2024-09-18 16:42:56,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=481960.0, ans=0.0 +2024-09-18 16:42:59,623 INFO [train.py:1198] (1/2) Epoch 27, batch 2850, loss[loss=0.2263, ctc_loss=0.1146, cr_loss=0.3374, attn_decoder_loss=0.2312, over 29502.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1262, cr_loss=0.3704, attn_decoder_loss=0.2459, over 5760515.06 frames. ], batch size: 77, lr: 4.10e-03, grad_scale: 16.0 +2024-09-18 16:43:10,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=482000.0, ans=0.125 +2024-09-18 16:43:14,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.91 vs. limit=10.0 +2024-09-18 16:43:53,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=482120.0, ans=0.125 +2024-09-18 16:43:55,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=482120.0, ans=0.125 +2024-09-18 16:44:15,379 INFO [train.py:1198] (1/2) Epoch 27, batch 2900, loss[loss=0.2341, ctc_loss=0.1175, cr_loss=0.3614, attn_decoder_loss=0.2391, over 29396.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1269, cr_loss=0.3721, attn_decoder_loss=0.2471, over 5786207.06 frames. ], batch size: 79, lr: 4.10e-03, grad_scale: 8.0 +2024-09-18 16:44:21,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=482200.0, ans=0.125 +2024-09-18 16:44:32,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=482240.0, ans=0.0 +2024-09-18 16:44:35,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.70 vs. limit=15.0 +2024-09-18 16:45:12,315 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.581e+01 8.546e+01 8.987e+01 9.686e+01 7.083e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 16:45:30,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=482360.0, ans=0.0 +2024-09-18 16:45:33,848 INFO [train.py:1198] (1/2) Epoch 27, batch 2950, loss[loss=0.2451, ctc_loss=0.136, cr_loss=0.3817, attn_decoder_loss=0.2488, over 29526.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1259, cr_loss=0.37, attn_decoder_loss=0.2457, over 5779987.02 frames. ], batch size: 75, lr: 4.09e-03, grad_scale: 4.0 +2024-09-18 16:45:35,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=482400.0, ans=0.125 +2024-09-18 16:45:48,338 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.57 vs. limit=22.5 +2024-09-18 16:45:49,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=482440.0, ans=0.125 +2024-09-18 16:46:00,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=482440.0, ans=0.0 +2024-09-18 16:46:07,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=482480.0, ans=0.0 +2024-09-18 16:46:52,466 INFO [train.py:1198] (1/2) Epoch 27, batch 3000, loss[loss=0.2464, ctc_loss=0.1314, cr_loss=0.3779, attn_decoder_loss=0.2508, over 29753.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.126, cr_loss=0.3701, attn_decoder_loss=0.2457, over 5781573.22 frames. ], batch size: 81, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:46:52,466 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 16:47:10,903 INFO [train.py:1230] (1/2) Epoch 27, validation: loss=0.212, ctc_loss=0.03868, cr_loss=6.15e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 16:47:10,903 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 16:47:12,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=482600.0, ans=0.0 +2024-09-18 16:47:20,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=482600.0, ans=0.1 +2024-09-18 16:47:23,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=482600.0, ans=0.2 +2024-09-18 16:47:26,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=482640.0, ans=0.125 +2024-09-18 16:47:59,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=482720.0, ans=0.125 +2024-09-18 16:48:05,508 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.646e+01 9.161e+01 1.019e+02 2.247e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-18 16:48:16,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.04 vs. limit=22.5 +2024-09-18 16:48:26,847 INFO [train.py:1198] (1/2) Epoch 27, batch 3050, loss[loss=0.2287, ctc_loss=0.1164, cr_loss=0.3488, attn_decoder_loss=0.2334, over 29563.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1262, cr_loss=0.3705, attn_decoder_loss=0.246, over 5775753.16 frames. ], batch size: 76, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:48:40,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=482800.0, ans=0.05 +2024-09-18 16:48:40,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.38 vs. limit=6.0 +2024-09-18 16:48:45,549 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.07 vs. limit=15.0 +2024-09-18 16:49:14,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.29 vs. limit=22.5 +2024-09-18 16:49:44,764 INFO [train.py:1198] (1/2) Epoch 27, batch 3100, loss[loss=0.2613, ctc_loss=0.1365, cr_loss=0.3948, attn_decoder_loss=0.2664, over 29188.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1259, cr_loss=0.3697, attn_decoder_loss=0.2457, over 5775941.98 frames. ], batch size: 100, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:49:47,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=483000.0, ans=0.0 +2024-09-18 16:49:51,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=483000.0, ans=0.1 +2024-09-18 16:49:52,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=483000.0, ans=0.125 +2024-09-18 16:50:03,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=483040.0, ans=0.1 +2024-09-18 16:50:09,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=483040.0, ans=0.07 +2024-09-18 16:50:31,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=483120.0, ans=0.125 +2024-09-18 16:50:41,515 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.612e+01 9.047e+01 9.758e+01 3.006e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-18 16:51:03,296 INFO [train.py:1198] (1/2) Epoch 27, batch 3150, loss[loss=0.2528, ctc_loss=0.1315, cr_loss=0.3715, attn_decoder_loss=0.258, over 28933.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1258, cr_loss=0.3691, attn_decoder_loss=0.2457, over 5783033.53 frames. ], batch size: 104, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:51:06,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=483200.0, ans=0.0 +2024-09-18 16:51:19,047 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:51:33,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=483280.0, ans=0.0 +2024-09-18 16:51:35,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-18 16:52:13,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=483360.0, ans=0.125 +2024-09-18 16:52:18,912 INFO [train.py:1198] (1/2) Epoch 27, batch 3200, loss[loss=0.2394, ctc_loss=0.129, cr_loss=0.3739, attn_decoder_loss=0.2434, over 29406.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1249, cr_loss=0.3676, attn_decoder_loss=0.2448, over 5792672.68 frames. ], batch size: 79, lr: 4.09e-03, grad_scale: 16.0 +2024-09-18 16:52:20,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=483400.0, ans=0.2 +2024-09-18 16:52:36,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=483440.0, ans=0.125 +2024-09-18 16:52:56,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=483480.0, ans=0.1 +2024-09-18 16:53:16,079 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.193e+01 8.478e+01 8.969e+01 9.595e+01 1.807e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 16:53:17,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=483520.0, ans=0.125 +2024-09-18 16:53:37,328 INFO [train.py:1198] (1/2) Epoch 27, batch 3250, loss[loss=0.2521, ctc_loss=0.1407, cr_loss=0.3994, attn_decoder_loss=0.2556, over 29717.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1253, cr_loss=0.3687, attn_decoder_loss=0.2454, over 5800067.13 frames. ], batch size: 84, lr: 4.09e-03, grad_scale: 16.0 +2024-09-18 16:53:37,706 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 16:53:45,183 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=483600.0, ans=0.035 +2024-09-18 16:54:03,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=483640.0, ans=0.125 +2024-09-18 16:54:05,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=483680.0, ans=0.125 +2024-09-18 16:54:06,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=483680.0, ans=0.125 +2024-09-18 16:54:16,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=483680.0, ans=0.1 +2024-09-18 16:54:27,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=483720.0, ans=0.125 +2024-09-18 16:54:54,878 INFO [train.py:1198] (1/2) Epoch 27, batch 3300, loss[loss=0.2528, ctc_loss=0.133, cr_loss=0.3975, attn_decoder_loss=0.2572, over 28320.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1246, cr_loss=0.3675, attn_decoder_loss=0.2444, over 5797612.27 frames. ], batch size: 111, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:54:58,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=483800.0, ans=0.1 +2024-09-18 16:55:29,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.47 vs. limit=15.0 +2024-09-18 16:55:41,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=483920.0, ans=0.125 +2024-09-18 16:55:46,791 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.28 vs. limit=6.0 +2024-09-18 16:55:48,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.05 vs. limit=15.0 +2024-09-18 16:55:50,530 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.603e+01 8.485e+01 9.035e+01 9.621e+01 1.592e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-18 16:55:51,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=483920.0, ans=0.2 +2024-09-18 16:56:00,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.99 vs. limit=10.0 +2024-09-18 16:56:05,162 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.10 vs. limit=15.0 +2024-09-18 16:56:10,661 INFO [train.py:1198] (1/2) Epoch 27, batch 3350, loss[loss=0.2531, ctc_loss=0.1339, cr_loss=0.3954, attn_decoder_loss=0.2575, over 28849.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1253, cr_loss=0.3687, attn_decoder_loss=0.2451, over 5774269.46 frames. ], batch size: 104, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:56:28,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=484040.0, ans=0.0 +2024-09-18 16:56:28,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.48 vs. limit=15.0 +2024-09-18 16:56:32,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484040.0, ans=0.1 +2024-09-18 16:56:34,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484040.0, ans=0.1 +2024-09-18 16:56:36,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.97 vs. limit=15.0 +2024-09-18 16:56:41,265 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.37 vs. limit=15.0 +2024-09-18 16:56:48,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.63 vs. limit=15.0 +2024-09-18 16:56:52,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.whiten.whitening_limit, batch_count=484080.0, ans=12.0 +2024-09-18 16:56:57,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=484120.0, ans=0.125 +2024-09-18 16:57:09,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=484120.0, ans=0.125 +2024-09-18 16:57:18,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=484160.0, ans=0.025 +2024-09-18 16:57:22,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484160.0, ans=0.1 +2024-09-18 16:57:24,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.37 vs. limit=15.0 +2024-09-18 16:57:28,520 INFO [train.py:1198] (1/2) Epoch 27, batch 3400, loss[loss=0.2157, ctc_loss=0.1073, cr_loss=0.3257, attn_decoder_loss=0.2205, over 29346.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1257, cr_loss=0.3691, attn_decoder_loss=0.2451, over 5767530.63 frames. ], batch size: 67, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:57:28,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=484200.0, ans=0.125 +2024-09-18 16:57:32,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.44 vs. limit=22.5 +2024-09-18 16:57:42,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=484240.0, ans=0.0 +2024-09-18 16:58:10,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=484280.0, ans=0.1 +2024-09-18 16:58:12,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=484280.0, ans=0.0 +2024-09-18 16:58:19,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=484320.0, ans=0.125 +2024-09-18 16:58:24,446 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.06 vs. limit=15.0 +2024-09-18 16:58:26,754 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.585e+01 9.028e+01 9.662e+01 1.590e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-18 16:58:44,074 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.92 vs. limit=15.0 +2024-09-18 16:58:46,440 INFO [train.py:1198] (1/2) Epoch 27, batch 3450, loss[loss=0.2446, ctc_loss=0.1253, cr_loss=0.3676, attn_decoder_loss=0.2497, over 28381.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1259, cr_loss=0.369, attn_decoder_loss=0.2455, over 5775558.26 frames. ], batch size: 111, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 16:59:27,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=484480.0, ans=0.2 +2024-09-18 16:59:38,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=484520.0, ans=0.1 +2024-09-18 16:59:56,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=484560.0, ans=0.1 +2024-09-18 17:00:04,105 INFO [train.py:1198] (1/2) Epoch 27, batch 3500, loss[loss=0.2178, ctc_loss=0.1106, cr_loss=0.3461, attn_decoder_loss=0.222, over 29329.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1254, cr_loss=0.3679, attn_decoder_loss=0.2449, over 5776342.14 frames. ], batch size: 71, lr: 4.09e-03, grad_scale: 8.0 +2024-09-18 17:00:59,675 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.562e+01 8.977e+01 9.669e+01 2.220e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-18 17:01:03,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=484760.0, ans=0.125 +2024-09-18 17:01:19,573 INFO [train.py:1198] (1/2) Epoch 27, batch 3550, loss[loss=0.2363, ctc_loss=0.115, cr_loss=0.3448, attn_decoder_loss=0.2421, over 29721.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1253, cr_loss=0.3675, attn_decoder_loss=0.2448, over 5782457.47 frames. ], batch size: 89, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:01:19,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=484800.0, ans=0.125 +2024-09-18 17:01:54,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=484880.0, ans=0.125 +2024-09-18 17:02:01,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=484880.0, ans=0.1 +2024-09-18 17:02:33,745 INFO [train.py:1198] (1/2) Epoch 27, batch 3600, loss[loss=0.2266, ctc_loss=0.1145, cr_loss=0.3368, attn_decoder_loss=0.2316, over 29525.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1252, cr_loss=0.3671, attn_decoder_loss=0.2448, over 5791483.40 frames. ], batch size: 77, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:02:41,770 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.62 vs. limit=15.0 +2024-09-18 17:02:44,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=485000.0, ans=0.125 +2024-09-18 17:02:58,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=485040.0, ans=0.2 +2024-09-18 17:03:26,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=485120.0, ans=0.0 +2024-09-18 17:03:28,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=485120.0, ans=0.125 +2024-09-18 17:03:30,862 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.309e+01 8.400e+01 9.013e+01 9.523e+01 1.334e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 17:03:35,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=485160.0, ans=0.0 +2024-09-18 17:03:48,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=485200.0, ans=0.125 +2024-09-18 17:03:50,134 INFO [train.py:1198] (1/2) Epoch 27, batch 3650, loss[loss=0.2567, ctc_loss=0.1367, cr_loss=0.3907, attn_decoder_loss=0.2614, over 29491.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1244, cr_loss=0.3658, attn_decoder_loss=0.2441, over 5793614.67 frames. ], batch size: 90, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:03:51,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=485200.0, ans=0.025 +2024-09-18 17:03:53,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.49 vs. limit=15.0 +2024-09-18 17:03:56,455 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:03:57,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=485200.0, ans=0.0 +2024-09-18 17:03:58,419 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.52 vs. limit=15.0 +2024-09-18 17:04:08,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=485240.0, ans=0.0 +2024-09-18 17:04:08,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=485240.0, ans=0.2 +2024-09-18 17:04:45,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=485320.0, ans=0.05 +2024-09-18 17:04:47,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=485320.0, ans=0.035 +2024-09-18 17:04:49,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.82 vs. limit=10.0 +2024-09-18 17:04:54,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=485360.0, ans=0.125 +2024-09-18 17:05:04,962 INFO [train.py:1198] (1/2) Epoch 27, batch 3700, loss[loss=0.2454, ctc_loss=0.1173, cr_loss=0.3548, attn_decoder_loss=0.2518, over 29726.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1244, cr_loss=0.3659, attn_decoder_loss=0.2442, over 5804283.70 frames. ], batch size: 84, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:05:09,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=485400.0, ans=0.125 +2024-09-18 17:05:12,055 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.63 vs. limit=10.0 +2024-09-18 17:05:30,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=485440.0, ans=0.1 +2024-09-18 17:05:57,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=485520.0, ans=0.125 +2024-09-18 17:06:01,399 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.031e+01 8.546e+01 8.927e+01 9.450e+01 1.781e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-18 17:06:04,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=485560.0, ans=0.0 +2024-09-18 17:06:04,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=485560.0, ans=0.1 +2024-09-18 17:06:12,866 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.99 vs. limit=22.5 +2024-09-18 17:06:21,379 INFO [train.py:1198] (1/2) Epoch 27, batch 3750, loss[loss=0.2114, ctc_loss=0.1035, cr_loss=0.3175, attn_decoder_loss=0.2163, over 29401.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1243, cr_loss=0.3658, attn_decoder_loss=0.2442, over 5807937.38 frames. ], batch size: 67, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:06:21,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=485600.0, ans=0.125 +2024-09-18 17:06:35,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=485640.0, ans=0.0 +2024-09-18 17:06:46,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=485640.0, ans=0.125 +2024-09-18 17:06:48,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=485640.0, ans=0.0 +2024-09-18 17:06:58,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=485680.0, ans=0.0 +2024-09-18 17:07:24,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.58 vs. limit=15.0 +2024-09-18 17:07:30,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.77 vs. limit=10.0 +2024-09-18 17:07:33,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=485760.0, ans=0.0 +2024-09-18 17:07:35,761 INFO [train.py:1198] (1/2) Epoch 27, batch 3800, loss[loss=0.2508, ctc_loss=0.1326, cr_loss=0.3712, attn_decoder_loss=0.2557, over 29626.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1243, cr_loss=0.3659, attn_decoder_loss=0.244, over 5798653.12 frames. ], batch size: 86, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:07:41,132 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.60 vs. limit=15.0 +2024-09-18 17:07:41,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=485800.0, ans=0.125 +2024-09-18 17:07:51,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=485840.0, ans=0.125 +2024-09-18 17:07:52,809 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.62 vs. limit=15.0 +2024-09-18 17:07:54,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.74 vs. limit=22.5 +2024-09-18 17:08:07,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=485880.0, ans=0.125 +2024-09-18 17:08:08,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=485880.0, ans=0.0 +2024-09-18 17:08:09,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.62 vs. limit=22.5 +2024-09-18 17:08:15,663 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.27 vs. limit=15.0 +2024-09-18 17:08:25,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.39 vs. limit=15.0 +2024-09-18 17:08:32,466 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.128e+01 8.550e+01 9.227e+01 9.705e+01 1.468e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-18 17:08:50,202 INFO [train.py:1198] (1/2) Epoch 27, batch 3850, loss[loss=0.2601, ctc_loss=0.1474, cr_loss=0.4142, attn_decoder_loss=0.2634, over 29234.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1246, cr_loss=0.3669, attn_decoder_loss=0.2442, over 5813055.44 frames. ], batch size: 100, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:08:50,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=486000.0, ans=0.125 +2024-09-18 17:08:51,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=486000.0, ans=0.125 +2024-09-18 17:09:03,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=486040.0, ans=0.0 +2024-09-18 17:09:12,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=486040.0, ans=0.125 +2024-09-18 17:09:13,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.39 vs. limit=12.0 +2024-09-18 17:09:26,584 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.42 vs. limit=22.5 +2024-09-18 17:09:42,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=486120.0, ans=0.0 +2024-09-18 17:09:45,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=486120.0, ans=0.125 +2024-09-18 17:10:06,039 INFO [train.py:1198] (1/2) Epoch 27, batch 3900, loss[loss=0.254, ctc_loss=0.149, cr_loss=0.3904, attn_decoder_loss=0.257, over 29639.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1251, cr_loss=0.3676, attn_decoder_loss=0.2447, over 5817375.15 frames. ], batch size: 86, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:10:06,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=486200.0, ans=0.125 +2024-09-18 17:10:09,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=486200.0, ans=0.125 +2024-09-18 17:10:24,718 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.61 vs. limit=15.0 +2024-09-18 17:10:29,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.77 vs. limit=22.5 +2024-09-18 17:10:43,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=486280.0, ans=0.125 +2024-09-18 17:10:50,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=486320.0, ans=0.125 +2024-09-18 17:10:52,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=486320.0, ans=0.125 +2024-09-18 17:10:59,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=486320.0, ans=0.125 +2024-09-18 17:11:02,351 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.580e+01 9.073e+01 9.587e+01 1.534e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-18 17:11:07,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=486360.0, ans=0.1 +2024-09-18 17:11:20,657 INFO [train.py:1198] (1/2) Epoch 27, batch 3950, loss[loss=0.2493, ctc_loss=0.1283, cr_loss=0.3799, attn_decoder_loss=0.2543, over 29470.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1245, cr_loss=0.367, attn_decoder_loss=0.2445, over 5836493.97 frames. ], batch size: 97, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:11:22,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=486400.0, ans=0.1 +2024-09-18 17:12:35,436 INFO [train.py:1198] (1/2) Epoch 27, batch 4000, loss[loss=0.2216, ctc_loss=0.1076, cr_loss=0.3257, attn_decoder_loss=0.2271, over 29498.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1243, cr_loss=0.3661, attn_decoder_loss=0.2443, over 5814672.95 frames. ], batch size: 74, lr: 4.08e-03, grad_scale: 16.0 +2024-09-18 17:13:08,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=486680.0, ans=0.025 +2024-09-18 17:13:20,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=486720.0, ans=0.125 +2024-09-18 17:13:20,873 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.85 vs. limit=15.0 +2024-09-18 17:13:29,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-18 17:13:33,393 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.629e+01 8.740e+01 9.217e+01 9.696e+01 1.612e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-18 17:13:49,523 INFO [train.py:1198] (1/2) Epoch 27, batch 4050, loss[loss=0.2639, ctc_loss=0.1583, cr_loss=0.3983, attn_decoder_loss=0.2668, over 20601.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1247, cr_loss=0.3672, attn_decoder_loss=0.2443, over 5798167.30 frames. ], batch size: 209, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:14:08,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=486840.0, ans=0.125 +2024-09-18 17:14:21,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=486880.0, ans=0.125 +2024-09-18 17:14:32,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=486880.0, ans=0.07 +2024-09-18 17:14:39,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=486920.0, ans=0.125 +2024-09-18 17:15:02,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.14 vs. limit=15.0 +2024-09-18 17:15:04,284 INFO [train.py:1198] (1/2) Epoch 27, batch 4100, loss[loss=0.2579, ctc_loss=0.146, cr_loss=0.4174, attn_decoder_loss=0.2611, over 29519.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1248, cr_loss=0.3671, attn_decoder_loss=0.2446, over 5793858.33 frames. ], batch size: 90, lr: 4.08e-03, grad_scale: 8.0 +2024-09-18 17:15:31,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=487040.0, ans=0.1 +2024-09-18 17:15:43,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.04 vs. limit=15.0 +2024-09-18 17:15:54,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=487120.0, ans=0.0 +2024-09-18 17:15:56,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=487120.0, ans=0.1 +2024-09-18 17:16:03,236 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.595e+01 8.410e+01 8.915e+01 9.592e+01 1.452e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-18 17:16:19,986 INFO [train.py:1198] (1/2) Epoch 27, batch 4150, loss[loss=0.2342, ctc_loss=0.1148, cr_loss=0.336, attn_decoder_loss=0.24, over 29489.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1245, cr_loss=0.3666, attn_decoder_loss=0.2444, over 5799344.74 frames. ], batch size: 77, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:16:27,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=487200.0, ans=0.2 +2024-09-18 17:16:32,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=487200.0, ans=0.125 +2024-09-18 17:16:39,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=487240.0, ans=0.0 +2024-09-18 17:16:49,711 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:17:13,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=487320.0, ans=0.125 +2024-09-18 17:17:20,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=487360.0, ans=10.0 +2024-09-18 17:17:21,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.14 vs. limit=15.0 +2024-09-18 17:17:25,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.28 vs. limit=15.0 +2024-09-18 17:17:33,693 INFO [train.py:1198] (1/2) Epoch 27, batch 4200, loss[loss=0.2598, ctc_loss=0.1402, cr_loss=0.3986, attn_decoder_loss=0.2643, over 29492.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1246, cr_loss=0.3669, attn_decoder_loss=0.2447, over 5800722.82 frames. ], batch size: 90, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:17:39,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=487400.0, ans=0.025 +2024-09-18 17:17:47,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=487440.0, ans=0.5 +2024-09-18 17:18:01,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=487480.0, ans=0.025 +2024-09-18 17:18:22,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=487520.0, ans=0.2 +2024-09-18 17:18:32,352 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.514e+01 8.963e+01 9.288e+01 3.975e+02, threshold=1.793e+02, percent-clipped=1.0 +2024-09-18 17:18:39,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=487560.0, ans=0.0 +2024-09-18 17:18:48,509 INFO [train.py:1198] (1/2) Epoch 27, batch 4250, loss[loss=0.2259, ctc_loss=0.1127, cr_loss=0.3523, attn_decoder_loss=0.2306, over 29526.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1248, cr_loss=0.3675, attn_decoder_loss=0.245, over 5806534.73 frames. ], batch size: 74, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:18:53,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=487600.0, ans=0.2 +2024-09-18 17:19:07,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=487640.0, ans=10.0 +2024-09-18 17:19:22,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=487680.0, ans=0.125 +2024-09-18 17:19:54,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.56 vs. limit=15.0 +2024-09-18 17:20:02,919 INFO [train.py:1198] (1/2) Epoch 27, batch 4300, loss[loss=0.249, ctc_loss=0.1312, cr_loss=0.3815, attn_decoder_loss=0.2536, over 29558.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1246, cr_loss=0.3671, attn_decoder_loss=0.2453, over 5795568.59 frames. ], batch size: 87, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:20:05,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.13 vs. limit=15.0 +2024-09-18 17:20:25,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=487840.0, ans=0.0 +2024-09-18 17:20:38,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=487880.0, ans=0.1 +2024-09-18 17:20:40,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=487880.0, ans=0.125 +2024-09-18 17:21:00,646 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.078e+01 8.751e+01 9.154e+01 9.778e+01 2.419e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-18 17:21:17,484 INFO [train.py:1198] (1/2) Epoch 27, batch 4350, loss[loss=0.2564, ctc_loss=0.1407, cr_loss=0.411, attn_decoder_loss=0.2601, over 29532.00 frames. ], tot_loss[loss=0.2438, ctc_loss=0.1276, cr_loss=0.3733, attn_decoder_loss=0.2484, over 5798007.27 frames. ], batch size: 97, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:21:19,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=488000.0, ans=0.1 +2024-09-18 17:21:31,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=488040.0, ans=0.0 +2024-09-18 17:22:03,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=488120.0, ans=0.125 +2024-09-18 17:22:06,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=488120.0, ans=0.09899494936611666 +2024-09-18 17:22:32,283 INFO [train.py:1198] (1/2) Epoch 27, batch 4400, loss[loss=0.2586, ctc_loss=0.1447, cr_loss=0.4201, attn_decoder_loss=0.2619, over 27306.00 frames. ], tot_loss[loss=0.2458, ctc_loss=0.1288, cr_loss=0.3756, attn_decoder_loss=0.2505, over 5769497.88 frames. ], batch size: 124, lr: 4.07e-03, grad_scale: 16.0 +2024-09-18 17:22:32,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=488200.0, ans=0.0 +2024-09-18 17:22:37,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=488200.0, ans=0.07 +2024-09-18 17:23:00,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=488280.0, ans=0.0 +2024-09-18 17:23:06,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=488280.0, ans=0.0 +2024-09-18 17:23:19,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=488320.0, ans=0.125 +2024-09-18 17:23:24,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.17 vs. limit=15.0 +2024-09-18 17:23:29,593 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.043e+01 8.897e+01 9.375e+01 9.833e+01 4.108e+02, threshold=1.875e+02, percent-clipped=1.0 +2024-09-18 17:23:46,266 INFO [train.py:1198] (1/2) Epoch 27, batch 4450, loss[loss=0.2534, ctc_loss=0.152, cr_loss=0.3836, attn_decoder_loss=0.2562, over 19340.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1329, cr_loss=0.3813, attn_decoder_loss=0.2527, over 5574693.70 frames. ], batch size: 209, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:23:59,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=488400.0, ans=0.1 +2024-09-18 17:24:10,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=488440.0, ans=0.125 +2024-09-18 17:24:13,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=488440.0, ans=0.0 +2024-09-18 17:24:13,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=488440.0, ans=0.0 +2024-09-18 17:24:24,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=488480.0, ans=0.2 +2024-09-18 17:24:46,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.65 vs. limit=15.0 +2024-09-18 17:25:02,147 INFO [train.py:1198] (1/2) Epoch 27, batch 4500, loss[loss=0.2536, ctc_loss=0.1458, cr_loss=0.3727, attn_decoder_loss=0.2573, over 20008.00 frames. ], tot_loss[loss=0.2507, ctc_loss=0.1367, cr_loss=0.3836, attn_decoder_loss=0.2548, over 5238559.51 frames. ], batch size: 209, lr: 4.07e-03, grad_scale: 8.0 +2024-09-18 17:25:10,840 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=13.09 vs. limit=15.0 +2024-09-18 17:26:25,150 INFO [train.py:1198] (1/2) Epoch 28, batch 0, loss[loss=0.2079, ctc_loss=0.0912, cr_loss=0.2905, attn_decoder_loss=0.2144, over 29629.00 frames. ], tot_loss[loss=0.2079, ctc_loss=0.0912, cr_loss=0.2905, attn_decoder_loss=0.2144, over 29629.00 frames. ], batch size: 73, lr: 3.99e-03, grad_scale: 16.0 +2024-09-18 17:26:25,151 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 17:26:45,477 INFO [train.py:1230] (1/2) Epoch 28, validation: loss=0.2131, ctc_loss=0.0377, cr_loss=5.605e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-18 17:26:45,478 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 17:27:09,718 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.684e+01 1.052e+02 1.136e+02 1.230e+02 3.342e+02, threshold=2.271e+02, percent-clipped=3.0 +2024-09-18 17:27:14,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=488780.0, ans=0.0 +2024-09-18 17:27:25,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=488780.0, ans=0.0 +2024-09-18 17:27:36,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=488820.0, ans=0.125 +2024-09-18 17:27:50,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=12.0 +2024-09-18 17:27:55,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=488860.0, ans=0.0 +2024-09-18 17:27:59,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.02 vs. limit=15.0 +2024-09-18 17:28:01,694 INFO [train.py:1198] (1/2) Epoch 28, batch 50, loss[loss=0.2212, ctc_loss=0.1178, cr_loss=0.34, attn_decoder_loss=0.2251, over 29423.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.1264, cr_loss=0.3705, attn_decoder_loss=0.2462, over 1269186.12 frames. ], batch size: 70, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:28:16,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.18 vs. limit=22.5 +2024-09-18 17:28:23,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=488940.0, ans=0.2 +2024-09-18 17:28:24,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=488940.0, ans=0.0 +2024-09-18 17:28:25,548 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.63 vs. limit=22.5 +2024-09-18 17:28:32,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=488980.0, ans=0.125 +2024-09-18 17:28:53,905 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.37 vs. limit=15.0 +2024-09-18 17:28:55,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=489020.0, ans=0.125 +2024-09-18 17:28:56,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=489020.0, ans=0.2 +2024-09-18 17:29:01,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=489060.0, ans=0.0 +2024-09-18 17:29:02,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=489060.0, ans=0.125 +2024-09-18 17:29:16,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=489100.0, ans=0.0 +2024-09-18 17:29:17,677 INFO [train.py:1198] (1/2) Epoch 28, batch 100, loss[loss=0.229, ctc_loss=0.1195, cr_loss=0.3537, attn_decoder_loss=0.2334, over 29546.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1276, cr_loss=0.3724, attn_decoder_loss=0.2478, over 2253009.81 frames. ], batch size: 76, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:29:41,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.772e+01 8.514e+01 8.987e+01 9.639e+01 1.687e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 17:29:45,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.63 vs. limit=15.0 +2024-09-18 17:29:59,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=489180.0, ans=0.0 +2024-09-18 17:30:17,671 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:30:22,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=489260.0, ans=0.2 +2024-09-18 17:30:36,905 INFO [train.py:1198] (1/2) Epoch 28, batch 150, loss[loss=0.214, ctc_loss=0.1047, cr_loss=0.3399, attn_decoder_loss=0.2186, over 29429.00 frames. ], tot_loss[loss=0.2414, ctc_loss=0.1259, cr_loss=0.3698, attn_decoder_loss=0.246, over 3047772.47 frames. ], batch size: 70, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:31:08,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=489380.0, ans=0.07 +2024-09-18 17:31:17,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=489380.0, ans=0.125 +2024-09-18 17:31:20,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-18 17:31:38,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=489460.0, ans=0.125 +2024-09-18 17:31:47,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=489460.0, ans=0.07 +2024-09-18 17:31:52,057 INFO [train.py:1198] (1/2) Epoch 28, batch 200, loss[loss=0.2589, ctc_loss=0.1458, cr_loss=0.3986, attn_decoder_loss=0.2626, over 27298.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1245, cr_loss=0.3669, attn_decoder_loss=0.2446, over 3660224.47 frames. ], batch size: 124, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:31:56,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=489500.0, ans=0.0 +2024-09-18 17:32:01,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=489500.0, ans=0.05 +2024-09-18 17:32:03,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=489500.0, ans=0.125 +2024-09-18 17:32:09,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=489540.0, ans=0.125 +2024-09-18 17:32:16,582 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.292e+01 9.011e+01 9.460e+01 1.346e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-18 17:32:35,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=489580.0, ans=0.2 +2024-09-18 17:33:01,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=489660.0, ans=0.025 +2024-09-18 17:33:01,741 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.66 vs. limit=15.0 +2024-09-18 17:33:08,527 INFO [train.py:1198] (1/2) Epoch 28, batch 250, loss[loss=0.2545, ctc_loss=0.1319, cr_loss=0.3922, attn_decoder_loss=0.2595, over 29229.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.125, cr_loss=0.3684, attn_decoder_loss=0.2448, over 4141771.30 frames. ], batch size: 100, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:33:09,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=9.98 vs. limit=12.0 +2024-09-18 17:33:24,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.38 vs. limit=22.5 +2024-09-18 17:33:27,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.41 vs. limit=6.0 +2024-09-18 17:33:33,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=489740.0, ans=0.04949747468305833 +2024-09-18 17:33:40,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=489780.0, ans=0.09899494936611666 +2024-09-18 17:33:54,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=489820.0, ans=0.125 +2024-09-18 17:33:55,179 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=15.0 +2024-09-18 17:34:24,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=489860.0, ans=0.2 +2024-09-18 17:34:26,611 INFO [train.py:1198] (1/2) Epoch 28, batch 300, loss[loss=0.2547, ctc_loss=0.1357, cr_loss=0.3971, attn_decoder_loss=0.2591, over 29531.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1245, cr_loss=0.3677, attn_decoder_loss=0.2447, over 4510720.49 frames. ], batch size: 92, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:34:28,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=489900.0, ans=0.125 +2024-09-18 17:34:29,179 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.66 vs. limit=10.0 +2024-09-18 17:34:44,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=489940.0, ans=0.125 +2024-09-18 17:34:45,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=489940.0, ans=0.0 +2024-09-18 17:34:53,037 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.323e+01 8.453e+01 8.832e+01 9.524e+01 1.905e+02, threshold=1.766e+02, percent-clipped=1.0 +2024-09-18 17:34:55,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=489940.0, ans=0.1 +2024-09-18 17:34:58,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=489980.0, ans=0.1 +2024-09-18 17:35:14,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=490020.0, ans=0.0 +2024-09-18 17:35:25,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=490020.0, ans=0.125 +2024-09-18 17:35:37,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=490060.0, ans=0.0 +2024-09-18 17:35:44,880 INFO [train.py:1198] (1/2) Epoch 28, batch 350, loss[loss=0.2181, ctc_loss=0.108, cr_loss=0.325, attn_decoder_loss=0.2231, over 29312.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1249, cr_loss=0.3682, attn_decoder_loss=0.2451, over 4795275.60 frames. ], batch size: 71, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:36:12,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=490140.0, ans=0.1 +2024-09-18 17:36:18,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.86 vs. limit=22.5 +2024-09-18 17:36:45,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=490260.0, ans=0.05 +2024-09-18 17:36:59,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=490300.0, ans=0.125 +2024-09-18 17:37:00,257 INFO [train.py:1198] (1/2) Epoch 28, batch 400, loss[loss=0.2494, ctc_loss=0.1284, cr_loss=0.3819, attn_decoder_loss=0.2544, over 29686.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1244, cr_loss=0.3676, attn_decoder_loss=0.2446, over 5023762.82 frames. ], batch size: 82, lr: 3.99e-03, grad_scale: 16.0 +2024-09-18 17:37:08,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=490300.0, ans=0.1 +2024-09-18 17:37:26,415 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.632e+01 9.035e+01 9.717e+01 2.941e+02, threshold=1.807e+02, percent-clipped=3.0 +2024-09-18 17:37:35,214 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.28 vs. limit=15.0 +2024-09-18 17:37:39,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=490380.0, ans=0.125 +2024-09-18 17:37:47,313 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.50 vs. limit=15.0 +2024-09-18 17:37:49,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.16 vs. limit=15.0 +2024-09-18 17:37:57,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=490420.0, ans=0.1 +2024-09-18 17:38:16,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=490460.0, ans=0.0 +2024-09-18 17:38:19,618 INFO [train.py:1198] (1/2) Epoch 28, batch 450, loss[loss=0.2457, ctc_loss=0.1162, cr_loss=0.3501, attn_decoder_loss=0.2523, over 29696.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1245, cr_loss=0.368, attn_decoder_loss=0.2448, over 5186573.05 frames. ], batch size: 83, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:38:27,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=490500.0, ans=0.125 +2024-09-18 17:38:29,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=490500.0, ans=0.125 +2024-09-18 17:38:35,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.66 vs. limit=15.0 +2024-09-18 17:38:45,428 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:38:46,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=490540.0, ans=0.0 +2024-09-18 17:39:02,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=490580.0, ans=0.0 +2024-09-18 17:39:09,095 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.01 vs. limit=12.0 +2024-09-18 17:39:11,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=490620.0, ans=0.125 +2024-09-18 17:39:32,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=490660.0, ans=0.125 +2024-09-18 17:39:38,407 INFO [train.py:1198] (1/2) Epoch 28, batch 500, loss[loss=0.2533, ctc_loss=0.1368, cr_loss=0.4122, attn_decoder_loss=0.2571, over 29455.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1243, cr_loss=0.3679, attn_decoder_loss=0.2443, over 5328520.11 frames. ], batch size: 94, lr: 3.99e-03, grad_scale: 8.0 +2024-09-18 17:39:44,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.56 vs. limit=22.5 +2024-09-18 17:39:54,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-18 17:39:55,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=490740.0, ans=0.0 +2024-09-18 17:40:04,212 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.021e+01 8.478e+01 8.864e+01 9.440e+01 1.535e+02, threshold=1.773e+02, percent-clipped=0.0 +2024-09-18 17:40:08,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=490780.0, ans=0.2 +2024-09-18 17:40:23,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.17 vs. limit=12.0 +2024-09-18 17:40:47,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=490860.0, ans=0.125 +2024-09-18 17:40:48,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.22 vs. limit=15.0 +2024-09-18 17:40:53,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=490900.0, ans=0.125 +2024-09-18 17:40:54,416 INFO [train.py:1198] (1/2) Epoch 28, batch 550, loss[loss=0.249, ctc_loss=0.1337, cr_loss=0.3969, attn_decoder_loss=0.253, over 28820.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1244, cr_loss=0.3678, attn_decoder_loss=0.2441, over 5421725.20 frames. ], batch size: 104, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:41:20,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=490940.0, ans=0.07 +2024-09-18 17:41:37,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=490980.0, ans=0.125 +2024-09-18 17:41:46,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.72 vs. limit=15.0 +2024-09-18 17:42:12,518 INFO [train.py:1198] (1/2) Epoch 28, batch 600, loss[loss=0.2519, ctc_loss=0.1323, cr_loss=0.3673, attn_decoder_loss=0.257, over 29150.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1246, cr_loss=0.3684, attn_decoder_loss=0.2443, over 5508713.94 frames. ], batch size: 100, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:42:24,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=491100.0, ans=0.125 +2024-09-18 17:42:40,162 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.281e+01 8.877e+01 9.486e+01 1.809e+02, threshold=1.775e+02, percent-clipped=1.0 +2024-09-18 17:42:40,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=491140.0, ans=0.125 +2024-09-18 17:43:00,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=491220.0, ans=0.2 +2024-09-18 17:43:07,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=491220.0, ans=0.0 +2024-09-18 17:43:22,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=491260.0, ans=0.0 +2024-09-18 17:43:29,932 INFO [train.py:1198] (1/2) Epoch 28, batch 650, loss[loss=0.2403, ctc_loss=0.1224, cr_loss=0.3806, attn_decoder_loss=0.245, over 29760.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1235, cr_loss=0.3665, attn_decoder_loss=0.2435, over 5585993.74 frames. ], batch size: 81, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:43:40,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.15 vs. limit=15.0 +2024-09-18 17:43:48,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=491340.0, ans=0.125 +2024-09-18 17:43:48,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=491340.0, ans=0.125 +2024-09-18 17:44:22,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=491420.0, ans=0.125 +2024-09-18 17:44:24,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=491420.0, ans=0.0 +2024-09-18 17:44:37,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=491460.0, ans=0.125 +2024-09-18 17:44:43,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=491460.0, ans=10.0 +2024-09-18 17:44:46,046 INFO [train.py:1198] (1/2) Epoch 28, batch 700, loss[loss=0.2304, ctc_loss=0.125, cr_loss=0.384, attn_decoder_loss=0.2336, over 29550.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1239, cr_loss=0.3673, attn_decoder_loss=0.244, over 5637340.24 frames. ], batch size: 76, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:45:01,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=491540.0, ans=0.125 +2024-09-18 17:45:01,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=491540.0, ans=0.0 +2024-09-18 17:45:11,730 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.262e+01 8.777e+01 9.267e+01 2.724e+02, threshold=1.755e+02, percent-clipped=1.0 +2024-09-18 17:45:12,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.98 vs. limit=15.0 +2024-09-18 17:45:27,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=491580.0, ans=0.125 +2024-09-18 17:45:27,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.56 vs. limit=15.0 +2024-09-18 17:45:39,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=491620.0, ans=0.125 +2024-09-18 17:46:01,803 INFO [train.py:1198] (1/2) Epoch 28, batch 750, loss[loss=0.2395, ctc_loss=0.1159, cr_loss=0.3564, attn_decoder_loss=0.2454, over 29713.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1233, cr_loss=0.3658, attn_decoder_loss=0.2436, over 5676222.58 frames. ], batch size: 82, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:46:16,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=491700.0, ans=0.1 +2024-09-18 17:46:23,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=491740.0, ans=0.125 +2024-09-18 17:47:20,365 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:47:21,496 INFO [train.py:1198] (1/2) Epoch 28, batch 800, loss[loss=0.229, ctc_loss=0.1184, cr_loss=0.3488, attn_decoder_loss=0.2335, over 29585.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1236, cr_loss=0.3661, attn_decoder_loss=0.2436, over 5705857.23 frames. ], batch size: 73, lr: 3.98e-03, grad_scale: 16.0 +2024-09-18 17:47:21,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=491900.0, ans=0.0 +2024-09-18 17:47:36,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=491940.0, ans=0.0 +2024-09-18 17:47:47,342 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.491e+01 9.037e+01 9.523e+01 1.873e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 17:47:53,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=491980.0, ans=0.025 +2024-09-18 17:48:00,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=491980.0, ans=0.1 +2024-09-18 17:48:01,751 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:48:32,930 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:48:35,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=492100.0, ans=0.125 +2024-09-18 17:48:37,061 INFO [train.py:1198] (1/2) Epoch 28, batch 850, loss[loss=0.2446, ctc_loss=0.1267, cr_loss=0.3665, attn_decoder_loss=0.2495, over 29694.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1232, cr_loss=0.3654, attn_decoder_loss=0.2432, over 5734472.33 frames. ], batch size: 89, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:48:41,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=492100.0, ans=0.2 +2024-09-18 17:48:44,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=492100.0, ans=0.125 +2024-09-18 17:49:01,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=492140.0, ans=0.125 +2024-09-18 17:49:10,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=492180.0, ans=0.07 +2024-09-18 17:49:11,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.51 vs. limit=15.0 +2024-09-18 17:49:21,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=492220.0, ans=0.05 +2024-09-18 17:49:40,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=492260.0, ans=0.125 +2024-09-18 17:49:52,768 INFO [train.py:1198] (1/2) Epoch 28, batch 900, loss[loss=0.2253, ctc_loss=0.1086, cr_loss=0.3485, attn_decoder_loss=0.2305, over 29635.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1243, cr_loss=0.367, attn_decoder_loss=0.2439, over 5739622.64 frames. ], batch size: 73, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:49:54,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=492300.0, ans=0.1 +2024-09-18 17:50:04,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=492300.0, ans=0.0 +2024-09-18 17:50:11,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=492340.0, ans=0.0 +2024-09-18 17:50:21,990 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.505e+01 9.006e+01 9.829e+01 2.830e+02, threshold=1.801e+02, percent-clipped=3.0 +2024-09-18 17:50:22,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=492340.0, ans=0.125 +2024-09-18 17:50:26,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=492380.0, ans=0.0 +2024-09-18 17:50:31,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=492380.0, ans=0.1 +2024-09-18 17:50:49,062 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:50:53,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=492420.0, ans=0.0 +2024-09-18 17:51:12,925 INFO [train.py:1198] (1/2) Epoch 28, batch 950, loss[loss=0.2211, ctc_loss=0.1092, cr_loss=0.3477, attn_decoder_loss=0.2258, over 29491.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1241, cr_loss=0.3665, attn_decoder_loss=0.244, over 5742979.59 frames. ], batch size: 74, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:51:16,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=492500.0, ans=0.1 +2024-09-18 17:51:32,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=492540.0, ans=0.125 +2024-09-18 17:51:48,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=492580.0, ans=0.125 +2024-09-18 17:52:00,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=492620.0, ans=0.04949747468305833 +2024-09-18 17:52:08,419 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.42 vs. limit=15.0 +2024-09-18 17:52:13,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=492660.0, ans=0.1 +2024-09-18 17:52:28,280 INFO [train.py:1198] (1/2) Epoch 28, batch 1000, loss[loss=0.2345, ctc_loss=0.1224, cr_loss=0.364, attn_decoder_loss=0.2388, over 29508.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1249, cr_loss=0.3678, attn_decoder_loss=0.2446, over 5736631.94 frames. ], batch size: 77, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:52:34,568 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:52:55,758 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.508e+01 8.563e+01 9.173e+01 1.012e+02 1.591e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-18 17:52:59,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=492780.0, ans=0.0 +2024-09-18 17:53:00,021 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.34 vs. limit=22.5 +2024-09-18 17:53:13,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=492820.0, ans=0.125 +2024-09-18 17:53:46,490 INFO [train.py:1198] (1/2) Epoch 28, batch 1050, loss[loss=0.2415, ctc_loss=0.1213, cr_loss=0.3617, attn_decoder_loss=0.2468, over 29673.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1247, cr_loss=0.3674, attn_decoder_loss=0.2443, over 5742484.27 frames. ], batch size: 85, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:54:09,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=492940.0, ans=0.125 +2024-09-18 17:54:11,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=492940.0, ans=0.125 +2024-09-18 17:54:18,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=492980.0, ans=0.125 +2024-09-18 17:54:34,719 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:54:35,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.27 vs. limit=10.0 +2024-09-18 17:54:43,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=493020.0, ans=0.1 +2024-09-18 17:54:53,155 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.20 vs. limit=22.5 +2024-09-18 17:54:57,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=493060.0, ans=0.125 +2024-09-18 17:55:01,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=493060.0, ans=0.0 +2024-09-18 17:55:01,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=493060.0, ans=0.0 +2024-09-18 17:55:04,302 INFO [train.py:1198] (1/2) Epoch 28, batch 1100, loss[loss=0.2447, ctc_loss=0.1299, cr_loss=0.3748, attn_decoder_loss=0.2492, over 29467.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1244, cr_loss=0.3668, attn_decoder_loss=0.244, over 5755702.29 frames. ], batch size: 78, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:55:12,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=15.0 +2024-09-18 17:55:31,740 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.956e+01 8.310e+01 8.930e+01 9.558e+01 2.939e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-18 17:55:35,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=493180.0, ans=0.0 +2024-09-18 17:55:51,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=493220.0, ans=0.125 +2024-09-18 17:55:56,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=493220.0, ans=0.2 +2024-09-18 17:56:04,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=493260.0, ans=0.0 +2024-09-18 17:56:10,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=493260.0, ans=0.2 +2024-09-18 17:56:20,581 INFO [train.py:1198] (1/2) Epoch 28, batch 1150, loss[loss=0.2304, ctc_loss=0.1152, cr_loss=0.3329, attn_decoder_loss=0.2358, over 29466.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1243, cr_loss=0.3667, attn_decoder_loss=0.2441, over 5754142.05 frames. ], batch size: 78, lr: 3.98e-03, grad_scale: 8.0 +2024-09-18 17:56:22,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=493300.0, ans=0.0 +2024-09-18 17:56:48,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.93 vs. limit=22.5 +2024-09-18 17:56:59,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.07 vs. limit=22.5 +2024-09-18 17:57:13,100 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.51 vs. limit=15.0 +2024-09-18 17:57:23,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.99 vs. limit=15.0 +2024-09-18 17:57:36,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=9.18 vs. limit=12.0 +2024-09-18 17:57:38,535 INFO [train.py:1198] (1/2) Epoch 28, batch 1200, loss[loss=0.2468, ctc_loss=0.1262, cr_loss=0.377, attn_decoder_loss=0.2518, over 29699.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1248, cr_loss=0.3676, attn_decoder_loss=0.2449, over 5747663.92 frames. ], batch size: 85, lr: 3.97e-03, grad_scale: 16.0 +2024-09-18 17:57:39,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.90 vs. limit=12.0 +2024-09-18 17:57:41,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=493500.0, ans=10.0 +2024-09-18 17:57:46,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=493500.0, ans=0.0 +2024-09-18 17:58:07,203 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.372e+01 8.554e+01 9.030e+01 9.625e+01 2.213e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-18 17:58:13,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=493580.0, ans=0.125 +2024-09-18 17:58:15,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=493580.0, ans=0.0 +2024-09-18 17:58:28,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=493620.0, ans=0.0 +2024-09-18 17:58:28,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=493620.0, ans=0.125 +2024-09-18 17:58:31,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=493620.0, ans=0.0 +2024-09-18 17:58:46,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=493660.0, ans=0.0 +2024-09-18 17:58:52,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.76 vs. limit=22.5 +2024-09-18 17:58:56,907 INFO [train.py:1198] (1/2) Epoch 28, batch 1250, loss[loss=0.2634, ctc_loss=0.157, cr_loss=0.4291, attn_decoder_loss=0.2657, over 29522.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1251, cr_loss=0.3682, attn_decoder_loss=0.2453, over 5775671.88 frames. ], batch size: 92, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 17:58:57,227 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 17:59:00,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=493700.0, ans=0.0 +2024-09-18 17:59:36,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=493780.0, ans=0.125 +2024-09-18 17:59:45,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=493820.0, ans=0.1 +2024-09-18 17:59:49,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=493820.0, ans=0.2 +2024-09-18 17:59:50,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.19 vs. limit=22.5 +2024-09-18 17:59:52,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.79 vs. limit=15.0 +2024-09-18 17:59:55,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=493820.0, ans=0.0 +2024-09-18 18:00:01,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=493860.0, ans=0.07 +2024-09-18 18:00:13,086 INFO [train.py:1198] (1/2) Epoch 28, batch 1300, loss[loss=0.2464, ctc_loss=0.1224, cr_loss=0.3698, attn_decoder_loss=0.252, over 28233.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1246, cr_loss=0.3668, attn_decoder_loss=0.2446, over 5779068.15 frames. ], batch size: 111, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:00:18,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=493900.0, ans=0.1 +2024-09-18 18:00:41,219 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.02 vs. limit=15.0 +2024-09-18 18:00:41,987 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.448e+01 8.590e+01 9.154e+01 9.575e+01 1.829e+02, threshold=1.831e+02, percent-clipped=1.0 +2024-09-18 18:00:43,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=493980.0, ans=0.1 +2024-09-18 18:00:51,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=493980.0, ans=0.05 +2024-09-18 18:01:03,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=494020.0, ans=0.07 +2024-09-18 18:01:04,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=494020.0, ans=0.0 +2024-09-18 18:01:20,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=494060.0, ans=0.5 +2024-09-18 18:01:23,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=494060.0, ans=0.1 +2024-09-18 18:01:26,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=494060.0, ans=0.125 +2024-09-18 18:01:29,055 INFO [train.py:1198] (1/2) Epoch 28, batch 1350, loss[loss=0.2369, ctc_loss=0.1155, cr_loss=0.3503, attn_decoder_loss=0.2426, over 29764.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1238, cr_loss=0.3652, attn_decoder_loss=0.244, over 5797705.98 frames. ], batch size: 81, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:01:49,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=494140.0, ans=0.125 +2024-09-18 18:01:51,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=494140.0, ans=0.1 +2024-09-18 18:01:54,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=494140.0, ans=0.025 +2024-09-18 18:02:08,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=494180.0, ans=0.125 +2024-09-18 18:02:10,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=494180.0, ans=0.125 +2024-09-18 18:02:16,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=494220.0, ans=0.025 +2024-09-18 18:02:22,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=494220.0, ans=0.125 +2024-09-18 18:02:22,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=494220.0, ans=0.125 +2024-09-18 18:02:24,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=494220.0, ans=0.0 +2024-09-18 18:02:25,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=494220.0, ans=0.1 +2024-09-18 18:02:29,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=494260.0, ans=0.125 +2024-09-18 18:02:39,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=494260.0, ans=0.125 +2024-09-18 18:02:48,564 INFO [train.py:1198] (1/2) Epoch 28, batch 1400, loss[loss=0.2165, ctc_loss=0.1074, cr_loss=0.3405, attn_decoder_loss=0.2211, over 29575.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1233, cr_loss=0.3644, attn_decoder_loss=0.2438, over 5808779.94 frames. ], batch size: 69, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:03:02,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=494340.0, ans=0.125 +2024-09-18 18:03:17,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.696e+01 8.548e+01 9.065e+01 9.786e+01 1.272e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-18 18:03:39,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=494420.0, ans=0.1 +2024-09-18 18:04:04,980 INFO [train.py:1198] (1/2) Epoch 28, batch 1450, loss[loss=0.2458, ctc_loss=0.1251, cr_loss=0.3866, attn_decoder_loss=0.2506, over 29451.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1236, cr_loss=0.365, attn_decoder_loss=0.2445, over 5805400.88 frames. ], batch size: 94, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:04:22,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.min_positive, batch_count=494540.0, ans=0.025 +2024-09-18 18:04:31,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=494540.0, ans=0.125 +2024-09-18 18:04:35,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=494580.0, ans=0.0 +2024-09-18 18:05:20,934 INFO [train.py:1198] (1/2) Epoch 28, batch 1500, loss[loss=0.2466, ctc_loss=0.1211, cr_loss=0.3791, attn_decoder_loss=0.2521, over 29635.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1239, cr_loss=0.3661, attn_decoder_loss=0.2449, over 5806076.65 frames. ], batch size: 86, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:05:28,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=494700.0, ans=0.0 +2024-09-18 18:05:31,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=494700.0, ans=0.2 +2024-09-18 18:05:35,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=494700.0, ans=0.125 +2024-09-18 18:05:45,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.34 vs. limit=15.0 +2024-09-18 18:05:50,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.08 vs. limit=15.0 +2024-09-18 18:05:52,388 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.464e+01 8.636e+01 9.142e+01 9.701e+01 7.436e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-18 18:06:12,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=494820.0, ans=0.2 +2024-09-18 18:06:13,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=494820.0, ans=15.0 +2024-09-18 18:06:26,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=494860.0, ans=0.125 +2024-09-18 18:06:31,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=494860.0, ans=0.0 +2024-09-18 18:06:41,531 INFO [train.py:1198] (1/2) Epoch 28, batch 1550, loss[loss=0.2544, ctc_loss=0.1359, cr_loss=0.3965, attn_decoder_loss=0.2588, over 29498.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1241, cr_loss=0.3664, attn_decoder_loss=0.245, over 5783306.24 frames. ], batch size: 90, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:06:45,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=494900.0, ans=0.125 +2024-09-18 18:06:47,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.09 vs. limit=15.0 +2024-09-18 18:07:04,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=494940.0, ans=0.0 +2024-09-18 18:07:45,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=495060.0, ans=0.125 +2024-09-18 18:07:57,364 INFO [train.py:1198] (1/2) Epoch 28, batch 1600, loss[loss=0.2451, ctc_loss=0.1248, cr_loss=0.3584, attn_decoder_loss=0.2505, over 29696.00 frames. ], tot_loss[loss=0.2405, ctc_loss=0.1245, cr_loss=0.3672, attn_decoder_loss=0.2452, over 5766661.01 frames. ], batch size: 85, lr: 3.97e-03, grad_scale: 16.0 +2024-09-18 18:08:18,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=495140.0, ans=0.1 +2024-09-18 18:08:27,526 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.529e+01 9.034e+01 9.836e+01 1.943e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 18:08:43,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=495220.0, ans=0.1 +2024-09-18 18:09:05,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=495260.0, ans=0.0 +2024-09-18 18:09:09,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=495260.0, ans=0.05 +2024-09-18 18:09:12,765 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:09:14,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=495300.0, ans=0.125 +2024-09-18 18:09:14,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=495300.0, ans=0.07 +2024-09-18 18:09:15,393 INFO [train.py:1198] (1/2) Epoch 28, batch 1650, loss[loss=0.2527, ctc_loss=0.1338, cr_loss=0.3903, attn_decoder_loss=0.2573, over 29720.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1244, cr_loss=0.3668, attn_decoder_loss=0.2449, over 5759866.33 frames. ], batch size: 89, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:09:18,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=495300.0, ans=0.025 +2024-09-18 18:09:29,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=495340.0, ans=0.035 +2024-09-18 18:09:32,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=495340.0, ans=0.125 +2024-09-18 18:09:32,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=495340.0, ans=0.125 +2024-09-18 18:09:41,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=495340.0, ans=0.0 +2024-09-18 18:10:12,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=495420.0, ans=0.2 +2024-09-18 18:10:33,326 INFO [train.py:1198] (1/2) Epoch 28, batch 1700, loss[loss=0.2078, ctc_loss=0.09849, cr_loss=0.3024, attn_decoder_loss=0.2132, over 29580.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1239, cr_loss=0.3661, attn_decoder_loss=0.2444, over 5781027.76 frames. ], batch size: 69, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:10:47,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=495540.0, ans=0.125 +2024-09-18 18:10:52,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.56 vs. limit=15.0 +2024-09-18 18:11:03,300 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.621e+01 8.597e+01 9.283e+01 9.916e+01 1.626e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-18 18:11:11,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=495580.0, ans=0.015 +2024-09-18 18:11:14,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=495580.0, ans=0.0 +2024-09-18 18:11:19,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=495620.0, ans=15.0 +2024-09-18 18:11:31,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=495620.0, ans=0.5 +2024-09-18 18:11:40,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.15 vs. limit=15.0 +2024-09-18 18:11:45,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=495660.0, ans=0.1 +2024-09-18 18:11:49,159 INFO [train.py:1198] (1/2) Epoch 28, batch 1750, loss[loss=0.2178, ctc_loss=0.1093, cr_loss=0.3272, attn_decoder_loss=0.2226, over 29324.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1236, cr_loss=0.3658, attn_decoder_loss=0.244, over 5788304.45 frames. ], batch size: 67, lr: 3.97e-03, grad_scale: 8.0 +2024-09-18 18:11:57,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=495700.0, ans=0.0 +2024-09-18 18:12:09,475 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:12:21,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=495780.0, ans=0.025 +2024-09-18 18:12:43,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.50 vs. limit=15.0 +2024-09-18 18:12:45,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=495820.0, ans=0.025 +2024-09-18 18:12:50,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=495860.0, ans=0.125 +2024-09-18 18:12:53,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=495860.0, ans=0.125 +2024-09-18 18:13:07,141 INFO [train.py:1198] (1/2) Epoch 28, batch 1800, loss[loss=0.2519, ctc_loss=0.137, cr_loss=0.389, attn_decoder_loss=0.256, over 29696.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1237, cr_loss=0.366, attn_decoder_loss=0.2441, over 5792081.88 frames. ], batch size: 83, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:13:10,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=495900.0, ans=0.1 +2024-09-18 18:13:37,655 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.001e+01 8.359e+01 8.858e+01 9.396e+01 1.273e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-18 18:13:39,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=495980.0, ans=0.1 +2024-09-18 18:14:11,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=496020.0, ans=0.0 +2024-09-18 18:14:15,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=496060.0, ans=0.0 +2024-09-18 18:14:23,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=496060.0, ans=0.0 +2024-09-18 18:14:30,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.64 vs. limit=6.0 +2024-09-18 18:14:32,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=496100.0, ans=15.0 +2024-09-18 18:14:32,844 INFO [train.py:1198] (1/2) Epoch 28, batch 1850, loss[loss=0.2448, ctc_loss=0.1247, cr_loss=0.364, attn_decoder_loss=0.2501, over 29616.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1237, cr_loss=0.366, attn_decoder_loss=0.244, over 5796005.54 frames. ], batch size: 86, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:14:52,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=496140.0, ans=0.125 +2024-09-18 18:14:56,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=496140.0, ans=0.1 +2024-09-18 18:14:57,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=496140.0, ans=0.1 +2024-09-18 18:15:09,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=496180.0, ans=0.125 +2024-09-18 18:15:47,678 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.16 vs. limit=22.5 +2024-09-18 18:15:48,300 INFO [train.py:1198] (1/2) Epoch 28, batch 1900, loss[loss=0.2454, ctc_loss=0.1199, cr_loss=0.362, attn_decoder_loss=0.2512, over 29714.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1238, cr_loss=0.3667, attn_decoder_loss=0.2444, over 5803831.89 frames. ], batch size: 89, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:15:57,677 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:16:08,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=15.0 +2024-09-18 18:16:10,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.24 vs. limit=15.0 +2024-09-18 18:16:18,826 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.546e+01 8.544e+01 9.072e+01 9.391e+01 1.587e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-18 18:16:23,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=496380.0, ans=0.125 +2024-09-18 18:16:36,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.77 vs. limit=6.0 +2024-09-18 18:16:49,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=496460.0, ans=0.125 +2024-09-18 18:17:06,240 INFO [train.py:1198] (1/2) Epoch 28, batch 1950, loss[loss=0.239, ctc_loss=0.1264, cr_loss=0.3774, attn_decoder_loss=0.2431, over 29428.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1243, cr_loss=0.3674, attn_decoder_loss=0.2454, over 5818419.89 frames. ], batch size: 78, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:17:06,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=496500.0, ans=0.125 +2024-09-18 18:17:34,140 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.46 vs. limit=15.0 +2024-09-18 18:17:47,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=496580.0, ans=0.1 +2024-09-18 18:17:48,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=496580.0, ans=0.125 +2024-09-18 18:18:03,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=496620.0, ans=0.125 +2024-09-18 18:18:06,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=496660.0, ans=0.0 +2024-09-18 18:18:24,149 INFO [train.py:1198] (1/2) Epoch 28, batch 2000, loss[loss=0.2111, ctc_loss=0.09839, cr_loss=0.3154, attn_decoder_loss=0.2166, over 29319.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1248, cr_loss=0.3683, attn_decoder_loss=0.2458, over 5795025.13 frames. ], batch size: 67, lr: 3.96e-03, grad_scale: 16.0 +2024-09-18 18:18:51,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=496740.0, ans=0.0 +2024-09-18 18:18:55,933 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.651e+01 8.591e+01 9.006e+01 9.471e+01 1.475e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 18:18:57,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=496780.0, ans=0.125 +2024-09-18 18:19:04,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=496780.0, ans=0.1 +2024-09-18 18:19:08,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=496820.0, ans=0.125 +2024-09-18 18:19:13,922 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.39 vs. limit=22.5 +2024-09-18 18:19:14,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=496820.0, ans=0.0 +2024-09-18 18:19:23,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=496860.0, ans=0.125 +2024-09-18 18:19:35,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=496860.0, ans=0.1 +2024-09-18 18:19:39,996 INFO [train.py:1198] (1/2) Epoch 28, batch 2050, loss[loss=0.2179, ctc_loss=0.1112, cr_loss=0.3489, attn_decoder_loss=0.222, over 29413.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1243, cr_loss=0.3671, attn_decoder_loss=0.2448, over 5787032.22 frames. ], batch size: 70, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:19:40,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=496900.0, ans=0.1 +2024-09-18 18:19:44,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.36 vs. limit=22.5 +2024-09-18 18:19:55,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.97 vs. limit=15.0 +2024-09-18 18:19:56,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=496940.0, ans=0.125 +2024-09-18 18:20:01,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=496940.0, ans=0.0 +2024-09-18 18:20:12,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=496980.0, ans=0.2 +2024-09-18 18:20:36,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=497020.0, ans=0.125 +2024-09-18 18:20:43,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.75 vs. limit=12.0 +2024-09-18 18:20:44,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=497060.0, ans=0.0 +2024-09-18 18:20:58,295 INFO [train.py:1198] (1/2) Epoch 28, batch 2100, loss[loss=0.2352, ctc_loss=0.1153, cr_loss=0.3585, attn_decoder_loss=0.2406, over 29777.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1235, cr_loss=0.366, attn_decoder_loss=0.2443, over 5799557.41 frames. ], batch size: 81, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:21:20,348 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.55 vs. limit=15.0 +2024-09-18 18:21:26,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.36 vs. limit=15.0 +2024-09-18 18:21:29,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.088e+01 8.428e+01 8.818e+01 9.232e+01 1.075e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-18 18:21:39,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=497180.0, ans=0.0 +2024-09-18 18:21:45,094 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:21:48,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=497220.0, ans=0.125 +2024-09-18 18:21:58,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=497260.0, ans=0.2 +2024-09-18 18:21:58,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=497260.0, ans=10.0 +2024-09-18 18:22:01,907 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:22:13,507 INFO [train.py:1198] (1/2) Epoch 28, batch 2150, loss[loss=0.2338, ctc_loss=0.1287, cr_loss=0.3727, attn_decoder_loss=0.2372, over 29454.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1234, cr_loss=0.366, attn_decoder_loss=0.2438, over 5815110.88 frames. ], batch size: 78, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:22:15,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=497300.0, ans=0.125 +2024-09-18 18:22:23,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=497300.0, ans=0.125 +2024-09-18 18:22:33,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.07 vs. limit=22.5 +2024-09-18 18:22:55,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=497380.0, ans=0.0 +2024-09-18 18:23:31,648 INFO [train.py:1198] (1/2) Epoch 28, batch 2200, loss[loss=0.2452, ctc_loss=0.1246, cr_loss=0.3676, attn_decoder_loss=0.2504, over 29630.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1239, cr_loss=0.3668, attn_decoder_loss=0.2441, over 5812322.45 frames. ], batch size: 86, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:23:39,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=497500.0, ans=0.125 +2024-09-18 18:24:03,362 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.572e+01 8.974e+01 9.491e+01 1.804e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 18:24:03,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=497580.0, ans=0.125 +2024-09-18 18:24:17,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=497620.0, ans=0.1 +2024-09-18 18:24:28,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=497620.0, ans=0.0 +2024-09-18 18:24:43,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=497660.0, ans=0.125 +2024-09-18 18:24:47,970 INFO [train.py:1198] (1/2) Epoch 28, batch 2250, loss[loss=0.238, ctc_loss=0.119, cr_loss=0.3683, attn_decoder_loss=0.243, over 29689.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1238, cr_loss=0.3668, attn_decoder_loss=0.2441, over 5811907.94 frames. ], batch size: 82, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:24:52,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=497700.0, ans=0.125 +2024-09-18 18:24:53,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=497700.0, ans=0.0 +2024-09-18 18:25:11,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.27 vs. limit=15.0 +2024-09-18 18:25:26,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=497780.0, ans=0.025 +2024-09-18 18:25:31,785 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.68 vs. limit=6.0 +2024-09-18 18:25:52,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=497860.0, ans=0.125 +2024-09-18 18:26:04,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=497900.0, ans=0.0 +2024-09-18 18:26:04,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=497900.0, ans=0.125 +2024-09-18 18:26:05,876 INFO [train.py:1198] (1/2) Epoch 28, batch 2300, loss[loss=0.2246, ctc_loss=0.1168, cr_loss=0.3539, attn_decoder_loss=0.2288, over 29367.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.123, cr_loss=0.3651, attn_decoder_loss=0.2431, over 5797830.95 frames. ], batch size: 71, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:26:30,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=497940.0, ans=0.125 +2024-09-18 18:26:30,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=497940.0, ans=0.125 +2024-09-18 18:26:39,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.522e+01 8.383e+01 8.665e+01 9.441e+01 6.698e+02, threshold=1.733e+02, percent-clipped=3.0 +2024-09-18 18:26:41,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=497980.0, ans=0.2 +2024-09-18 18:26:47,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=497980.0, ans=0.2 +2024-09-18 18:26:51,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=497980.0, ans=15.0 +2024-09-18 18:26:59,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=498020.0, ans=0.09899494936611666 +2024-09-18 18:27:23,866 INFO [train.py:1198] (1/2) Epoch 28, batch 2350, loss[loss=0.2505, ctc_loss=0.1286, cr_loss=0.3686, attn_decoder_loss=0.2558, over 29689.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1236, cr_loss=0.3663, attn_decoder_loss=0.2437, over 5803769.13 frames. ], batch size: 83, lr: 3.96e-03, grad_scale: 8.0 +2024-09-18 18:27:37,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=498140.0, ans=10.0 +2024-09-18 18:28:15,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=498220.0, ans=0.025 +2024-09-18 18:28:17,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=498220.0, ans=0.125 +2024-09-18 18:28:27,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=498260.0, ans=0.1 +2024-09-18 18:28:39,737 INFO [train.py:1198] (1/2) Epoch 28, batch 2400, loss[loss=0.2398, ctc_loss=0.1326, cr_loss=0.4071, attn_decoder_loss=0.2426, over 29549.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1241, cr_loss=0.3673, attn_decoder_loss=0.2439, over 5808055.09 frames. ], batch size: 76, lr: 3.96e-03, grad_scale: 16.0 +2024-09-18 18:28:46,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=498300.0, ans=0.0 +2024-09-18 18:28:48,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=498300.0, ans=0.0 +2024-09-18 18:28:51,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-18 18:28:55,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=498340.0, ans=0.0 +2024-09-18 18:29:08,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=498340.0, ans=0.125 +2024-09-18 18:29:15,195 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.714e+01 9.180e+01 9.673e+01 2.821e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-18 18:29:23,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=498380.0, ans=0.125 +2024-09-18 18:29:40,969 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.96 vs. limit=15.0 +2024-09-18 18:29:47,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=498460.0, ans=0.1 +2024-09-18 18:29:57,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=498500.0, ans=0.0 +2024-09-18 18:29:58,168 INFO [train.py:1198] (1/2) Epoch 28, batch 2450, loss[loss=0.2342, ctc_loss=0.1121, cr_loss=0.3482, attn_decoder_loss=0.24, over 29690.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1247, cr_loss=0.3684, attn_decoder_loss=0.245, over 5785716.44 frames. ], batch size: 82, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:30:10,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.41 vs. limit=10.0 +2024-09-18 18:30:11,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=498540.0, ans=0.025 +2024-09-18 18:30:36,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=498580.0, ans=0.0 +2024-09-18 18:30:50,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=498620.0, ans=0.0 +2024-09-18 18:31:09,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=498660.0, ans=0.05 +2024-09-18 18:31:13,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=498660.0, ans=0.2 +2024-09-18 18:31:13,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=498660.0, ans=0.125 +2024-09-18 18:31:16,314 INFO [train.py:1198] (1/2) Epoch 28, batch 2500, loss[loss=0.2499, ctc_loss=0.1318, cr_loss=0.3893, attn_decoder_loss=0.2544, over 29648.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.125, cr_loss=0.3693, attn_decoder_loss=0.2454, over 5795987.86 frames. ], batch size: 86, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:31:22,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=498700.0, ans=0.0 +2024-09-18 18:31:33,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.64 vs. limit=15.0 +2024-09-18 18:31:40,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=498740.0, ans=0.125 +2024-09-18 18:31:49,773 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.126e+01 8.525e+01 9.051e+01 9.521e+01 3.075e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-18 18:31:51,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=498780.0, ans=0.125 +2024-09-18 18:31:57,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=498780.0, ans=0.125 +2024-09-18 18:32:05,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=498820.0, ans=0.0 +2024-09-18 18:32:05,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=15.0 +2024-09-18 18:32:09,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=498820.0, ans=0.0 +2024-09-18 18:32:11,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=498820.0, ans=0.09899494936611666 +2024-09-18 18:32:27,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.90 vs. limit=10.0 +2024-09-18 18:32:32,442 INFO [train.py:1198] (1/2) Epoch 28, batch 2550, loss[loss=0.2229, ctc_loss=0.1147, cr_loss=0.3584, attn_decoder_loss=0.227, over 29361.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1245, cr_loss=0.3686, attn_decoder_loss=0.2453, over 5799672.95 frames. ], batch size: 67, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:32:46,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=498900.0, ans=15.0 +2024-09-18 18:32:48,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=498940.0, ans=0.1 +2024-09-18 18:32:49,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.57 vs. limit=15.0 +2024-09-18 18:32:54,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=498940.0, ans=0.1 +2024-09-18 18:33:01,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.32 vs. limit=15.0 +2024-09-18 18:33:06,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=498980.0, ans=0.125 +2024-09-18 18:33:12,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=498980.0, ans=0.1 +2024-09-18 18:33:33,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.35 vs. limit=15.0 +2024-09-18 18:33:35,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=499060.0, ans=0.125 +2024-09-18 18:33:40,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=499060.0, ans=0.125 +2024-09-18 18:33:50,491 INFO [train.py:1198] (1/2) Epoch 28, batch 2600, loss[loss=0.2341, ctc_loss=0.117, cr_loss=0.3657, attn_decoder_loss=0.239, over 29460.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1248, cr_loss=0.3695, attn_decoder_loss=0.2456, over 5796640.35 frames. ], batch size: 78, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:34:05,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=499140.0, ans=0.025 +2024-09-18 18:34:14,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=499140.0, ans=0.125 +2024-09-18 18:34:25,551 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.719e+01 9.111e+01 9.618e+01 2.208e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 18:35:01,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.whiten.whitening_limit, batch_count=499260.0, ans=12.0 +2024-09-18 18:35:03,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=499260.0, ans=0.1 +2024-09-18 18:35:07,774 INFO [train.py:1198] (1/2) Epoch 28, batch 2650, loss[loss=0.2497, ctc_loss=0.1302, cr_loss=0.3948, attn_decoder_loss=0.2542, over 29221.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1246, cr_loss=0.3691, attn_decoder_loss=0.2454, over 5802825.22 frames. ], batch size: 100, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:35:09,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.65 vs. limit=15.0 +2024-09-18 18:35:18,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=499300.0, ans=0.1 +2024-09-18 18:35:45,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=499380.0, ans=0.5 +2024-09-18 18:35:57,278 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=5.20 vs. limit=5.0 +2024-09-18 18:36:11,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=499460.0, ans=0.125 +2024-09-18 18:36:20,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=499460.0, ans=0.95 +2024-09-18 18:36:25,506 INFO [train.py:1198] (1/2) Epoch 28, batch 2700, loss[loss=0.2433, ctc_loss=0.1248, cr_loss=0.378, attn_decoder_loss=0.2481, over 29532.00 frames. ], tot_loss[loss=0.241, ctc_loss=0.1249, cr_loss=0.3688, attn_decoder_loss=0.2457, over 5797877.95 frames. ], batch size: 87, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:36:28,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.84 vs. limit=22.5 +2024-09-18 18:36:33,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=499500.0, ans=0.5 +2024-09-18 18:36:35,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=499500.0, ans=0.0 +2024-09-18 18:36:58,799 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.414e+01 8.942e+01 9.601e+01 1.842e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 18:37:07,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=499580.0, ans=0.125 +2024-09-18 18:37:08,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.72 vs. limit=15.0 +2024-09-18 18:37:30,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=499660.0, ans=0.125 +2024-09-18 18:37:34,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.whiten.whitening_limit, batch_count=499660.0, ans=12.0 +2024-09-18 18:37:38,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=499660.0, ans=0.0 +2024-09-18 18:37:41,523 INFO [train.py:1198] (1/2) Epoch 28, batch 2750, loss[loss=0.2243, ctc_loss=0.1129, cr_loss=0.3571, attn_decoder_loss=0.2288, over 29511.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1238, cr_loss=0.3665, attn_decoder_loss=0.2443, over 5795126.33 frames. ], batch size: 75, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:37:53,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=499700.0, ans=0.025 +2024-09-18 18:37:56,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=499740.0, ans=0.125 +2024-09-18 18:38:09,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=499740.0, ans=0.0 +2024-09-18 18:38:21,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=499780.0, ans=0.125 +2024-09-18 18:38:27,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=499820.0, ans=0.0 +2024-09-18 18:38:27,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=499820.0, ans=0.1 +2024-09-18 18:38:38,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=499820.0, ans=0.0 +2024-09-18 18:38:55,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=499860.0, ans=0.2 +2024-09-18 18:38:59,698 INFO [train.py:1198] (1/2) Epoch 28, batch 2800, loss[loss=0.2594, ctc_loss=0.1523, cr_loss=0.3722, attn_decoder_loss=0.263, over 20837.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1242, cr_loss=0.367, attn_decoder_loss=0.2446, over 5778017.09 frames. ], batch size: 210, lr: 3.95e-03, grad_scale: 16.0 +2024-09-18 18:39:13,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=499940.0, ans=0.1 +2024-09-18 18:39:22,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=499940.0, ans=0.04949747468305833 +2024-09-18 18:39:34,535 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.662e+01 9.200e+01 9.823e+01 1.916e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-18 18:39:38,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=499980.0, ans=0.125 +2024-09-18 18:39:47,630 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:39:56,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=500020.0, ans=0.0 +2024-09-18 18:40:18,057 INFO [train.py:1198] (1/2) Epoch 28, batch 2850, loss[loss=0.2362, ctc_loss=0.1221, cr_loss=0.3577, attn_decoder_loss=0.2409, over 29509.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1247, cr_loss=0.3683, attn_decoder_loss=0.245, over 5762991.16 frames. ], batch size: 77, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:40:19,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=500100.0, ans=0.0 +2024-09-18 18:40:19,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=500100.0, ans=0.0 +2024-09-18 18:40:25,113 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.71 vs. limit=15.0 +2024-09-18 18:40:34,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=500140.0, ans=0.0 +2024-09-18 18:40:39,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=500140.0, ans=0.125 +2024-09-18 18:40:41,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=500140.0, ans=0.0 +2024-09-18 18:40:51,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=500180.0, ans=0.125 +2024-09-18 18:40:57,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=500180.0, ans=0.1 +2024-09-18 18:41:19,302 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:41:25,280 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:41:31,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=500260.0, ans=0.125 +2024-09-18 18:41:34,004 INFO [train.py:1198] (1/2) Epoch 28, batch 2900, loss[loss=0.2276, ctc_loss=0.1064, cr_loss=0.3227, attn_decoder_loss=0.2338, over 29410.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.125, cr_loss=0.369, attn_decoder_loss=0.2458, over 5788043.91 frames. ], batch size: 79, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:41:50,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=500340.0, ans=0.0 +2024-09-18 18:42:10,949 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.093e+01 8.571e+01 8.982e+01 9.611e+01 1.691e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-18 18:42:34,674 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.46 vs. limit=22.5 +2024-09-18 18:42:43,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=500460.0, ans=0.0 +2024-09-18 18:42:51,877 INFO [train.py:1198] (1/2) Epoch 28, batch 2950, loss[loss=0.2283, ctc_loss=0.1149, cr_loss=0.3628, attn_decoder_loss=0.2329, over 29521.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1241, cr_loss=0.3669, attn_decoder_loss=0.2442, over 5783268.06 frames. ], batch size: 75, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:43:09,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.60 vs. limit=10.0 +2024-09-18 18:43:18,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=500540.0, ans=0.025 +2024-09-18 18:43:34,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=500580.0, ans=0.125 +2024-09-18 18:43:39,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=500620.0, ans=0.07 +2024-09-18 18:44:01,348 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=6.01 vs. limit=12.0 +2024-09-18 18:44:02,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=500660.0, ans=0.125 +2024-09-18 18:44:10,170 INFO [train.py:1198] (1/2) Epoch 28, batch 3000, loss[loss=0.2362, ctc_loss=0.1179, cr_loss=0.3651, attn_decoder_loss=0.2413, over 29766.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1245, cr_loss=0.3675, attn_decoder_loss=0.2445, over 5783513.05 frames. ], batch size: 81, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:44:10,171 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 18:44:28,709 INFO [train.py:1230] (1/2) Epoch 28, validation: loss=0.2115, ctc_loss=0.03821, cr_loss=5.852e-15, attn_decoder_loss=0.2307, over 944034.00 frames. +2024-09-18 18:44:28,709 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 18:44:32,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten.whitening_limit, batch_count=500700.0, ans=15.0 +2024-09-18 18:44:35,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=500700.0, ans=0.0 +2024-09-18 18:44:55,455 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.47 vs. limit=15.0 +2024-09-18 18:44:56,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=500740.0, ans=0.0 +2024-09-18 18:45:03,652 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.528e+01 8.580e+01 9.034e+01 9.618e+01 2.130e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-18 18:45:03,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=500780.0, ans=0.2 +2024-09-18 18:45:05,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=500780.0, ans=0.035 +2024-09-18 18:45:05,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=500780.0, ans=0.125 +2024-09-18 18:45:13,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=500820.0, ans=0.125 +2024-09-18 18:45:37,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=500860.0, ans=0.125 +2024-09-18 18:45:45,085 INFO [train.py:1198] (1/2) Epoch 28, batch 3050, loss[loss=0.2278, ctc_loss=0.12, cr_loss=0.3598, attn_decoder_loss=0.2318, over 29515.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1251, cr_loss=0.3686, attn_decoder_loss=0.2452, over 5776117.17 frames. ], batch size: 76, lr: 3.95e-03, grad_scale: 8.0 +2024-09-18 18:45:50,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=500900.0, ans=0.125 +2024-09-18 18:46:01,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=500940.0, ans=0.1 +2024-09-18 18:46:05,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_na.min_abs, batch_count=500940.0, ans=0.02 +2024-09-18 18:46:22,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=500980.0, ans=0.0 +2024-09-18 18:46:34,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=501020.0, ans=0.125 +2024-09-18 18:46:41,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.51 vs. limit=6.0 +2024-09-18 18:46:48,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=501060.0, ans=0.125 +2024-09-18 18:46:49,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=501060.0, ans=0.125 +2024-09-18 18:46:52,976 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:47:02,937 INFO [train.py:1198] (1/2) Epoch 28, batch 3100, loss[loss=0.2504, ctc_loss=0.1322, cr_loss=0.3974, attn_decoder_loss=0.2547, over 29265.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1249, cr_loss=0.368, attn_decoder_loss=0.2449, over 5776181.88 frames. ], batch size: 100, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:47:03,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=501100.0, ans=0.2 +2024-09-18 18:47:19,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=501140.0, ans=0.125 +2024-09-18 18:47:25,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=501140.0, ans=0.0 +2024-09-18 18:47:37,590 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.256e+01 8.481e+01 8.983e+01 9.463e+01 1.324e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-18 18:48:16,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=501260.0, ans=0.0 +2024-09-18 18:48:20,795 INFO [train.py:1198] (1/2) Epoch 28, batch 3150, loss[loss=0.2527, ctc_loss=0.1306, cr_loss=0.3752, attn_decoder_loss=0.2579, over 28812.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1248, cr_loss=0.3676, attn_decoder_loss=0.2451, over 5782972.90 frames. ], batch size: 104, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:48:33,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=501300.0, ans=0.125 +2024-09-18 18:48:42,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=501340.0, ans=0.0 +2024-09-18 18:48:52,229 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.44 vs. limit=15.0 +2024-09-18 18:49:15,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.87 vs. limit=15.0 +2024-09-18 18:49:16,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=501420.0, ans=0.0 +2024-09-18 18:49:20,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.63 vs. limit=22.5 +2024-09-18 18:49:30,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=501460.0, ans=0.125 +2024-09-18 18:49:36,029 INFO [train.py:1198] (1/2) Epoch 28, batch 3200, loss[loss=0.2332, ctc_loss=0.1152, cr_loss=0.358, attn_decoder_loss=0.2384, over 29405.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1239, cr_loss=0.3662, attn_decoder_loss=0.2443, over 5794577.09 frames. ], batch size: 79, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:49:40,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=501500.0, ans=0.125 +2024-09-18 18:49:43,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=501500.0, ans=0.2 +2024-09-18 18:49:49,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=501500.0, ans=0.125 +2024-09-18 18:50:03,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=501540.0, ans=0.125 +2024-09-18 18:50:10,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=501580.0, ans=0.5 +2024-09-18 18:50:13,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.202e+01 8.510e+01 8.995e+01 9.300e+01 1.777e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 18:50:15,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=501580.0, ans=0.2 +2024-09-18 18:50:21,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=501580.0, ans=0.1 +2024-09-18 18:50:29,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=501620.0, ans=0.125 +2024-09-18 18:50:46,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.60 vs. limit=5.0 +2024-09-18 18:50:54,450 INFO [train.py:1198] (1/2) Epoch 28, batch 3250, loss[loss=0.2468, ctc_loss=0.1303, cr_loss=0.3751, attn_decoder_loss=0.2514, over 29696.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1241, cr_loss=0.3667, attn_decoder_loss=0.2447, over 5799617.10 frames. ], batch size: 84, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:50:59,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=501700.0, ans=0.125 +2024-09-18 18:51:05,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=501700.0, ans=0.025 +2024-09-18 18:51:19,150 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=15.0 +2024-09-18 18:51:33,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=501780.0, ans=0.125 +2024-09-18 18:51:36,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=501780.0, ans=10.0 +2024-09-18 18:51:38,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=501820.0, ans=0.0 +2024-09-18 18:51:50,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=501820.0, ans=0.125 +2024-09-18 18:51:52,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=501820.0, ans=0.125 +2024-09-18 18:52:02,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=501860.0, ans=0.035 +2024-09-18 18:52:08,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=501860.0, ans=0.0 +2024-09-18 18:52:10,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=501900.0, ans=0.125 +2024-09-18 18:52:10,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=501900.0, ans=0.04949747468305833 +2024-09-18 18:52:11,769 INFO [train.py:1198] (1/2) Epoch 28, batch 3300, loss[loss=0.25, ctc_loss=0.1257, cr_loss=0.3617, attn_decoder_loss=0.2558, over 28134.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1234, cr_loss=0.3648, attn_decoder_loss=0.2438, over 5795912.93 frames. ], batch size: 111, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:52:19,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=501900.0, ans=0.125 +2024-09-18 18:52:48,162 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.472e+01 9.021e+01 9.788e+01 2.409e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-18 18:53:21,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=502060.0, ans=0.05 +2024-09-18 18:53:27,376 INFO [train.py:1198] (1/2) Epoch 28, batch 3350, loss[loss=0.2515, ctc_loss=0.1286, cr_loss=0.3686, attn_decoder_loss=0.257, over 28867.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.124, cr_loss=0.3658, attn_decoder_loss=0.2446, over 5772854.75 frames. ], batch size: 104, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:53:30,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=502100.0, ans=0.125 +2024-09-18 18:53:30,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=502100.0, ans=10.0 +2024-09-18 18:53:35,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=502100.0, ans=0.125 +2024-09-18 18:53:55,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=502140.0, ans=0.0 +2024-09-18 18:54:03,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=502180.0, ans=0.0 +2024-09-18 18:54:13,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=502220.0, ans=0.125 +2024-09-18 18:54:35,747 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.20 vs. limit=15.0 +2024-09-18 18:54:38,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=502260.0, ans=0.125 +2024-09-18 18:54:45,497 INFO [train.py:1198] (1/2) Epoch 28, batch 3400, loss[loss=0.2174, ctc_loss=0.1074, cr_loss=0.3431, attn_decoder_loss=0.2219, over 29320.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1244, cr_loss=0.3664, attn_decoder_loss=0.2446, over 5766049.52 frames. ], batch size: 67, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:54:48,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=502300.0, ans=0.0 +2024-09-18 18:55:11,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=502340.0, ans=0.0 +2024-09-18 18:55:18,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=502380.0, ans=0.2 +2024-09-18 18:55:21,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.432e+01 8.459e+01 8.977e+01 9.782e+01 2.197e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-18 18:55:56,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=502460.0, ans=0.0 +2024-09-18 18:56:03,376 INFO [train.py:1198] (1/2) Epoch 28, batch 3450, loss[loss=0.2444, ctc_loss=0.1236, cr_loss=0.3519, attn_decoder_loss=0.25, over 28351.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1244, cr_loss=0.3664, attn_decoder_loss=0.2448, over 5773875.15 frames. ], batch size: 111, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:56:11,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.49 vs. limit=15.0 +2024-09-18 18:56:21,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=502540.0, ans=0.05 +2024-09-18 18:56:25,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.11 vs. limit=15.0 +2024-09-18 18:56:30,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=502540.0, ans=0.125 +2024-09-18 18:56:51,740 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 18:57:17,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=502700.0, ans=0.0 +2024-09-18 18:57:18,867 INFO [train.py:1198] (1/2) Epoch 28, batch 3500, loss[loss=0.2195, ctc_loss=0.1116, cr_loss=0.3483, attn_decoder_loss=0.2238, over 29308.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1242, cr_loss=0.3664, attn_decoder_loss=0.2443, over 5774866.36 frames. ], batch size: 71, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:57:19,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=502700.0, ans=0.2 +2024-09-18 18:57:20,007 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.21 vs. limit=15.0 +2024-09-18 18:57:23,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=502700.0, ans=0.125 +2024-09-18 18:57:51,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=502780.0, ans=0.1 +2024-09-18 18:57:57,152 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.602e+01 9.014e+01 9.488e+01 1.440e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-18 18:57:58,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=502780.0, ans=0.1 +2024-09-18 18:58:01,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=502780.0, ans=0.0 +2024-09-18 18:58:24,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=502860.0, ans=0.025 +2024-09-18 18:58:27,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=502860.0, ans=0.125 +2024-09-18 18:58:35,925 INFO [train.py:1198] (1/2) Epoch 28, batch 3550, loss[loss=0.2545, ctc_loss=0.1263, cr_loss=0.3725, attn_decoder_loss=0.2605, over 29705.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1238, cr_loss=0.3656, attn_decoder_loss=0.2442, over 5780797.96 frames. ], batch size: 89, lr: 3.94e-03, grad_scale: 8.0 +2024-09-18 18:58:37,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=502900.0, ans=0.1 +2024-09-18 18:58:43,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=502900.0, ans=0.125 +2024-09-18 18:58:53,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=502940.0, ans=0.0 +2024-09-18 18:59:13,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=502980.0, ans=0.125 +2024-09-18 18:59:14,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=502980.0, ans=0.125 +2024-09-18 18:59:37,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=503060.0, ans=0.125 +2024-09-18 18:59:50,237 INFO [train.py:1198] (1/2) Epoch 28, batch 3600, loss[loss=0.2289, ctc_loss=0.1235, cr_loss=0.3634, attn_decoder_loss=0.2326, over 29496.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.124, cr_loss=0.3664, attn_decoder_loss=0.2445, over 5790468.06 frames. ], batch size: 77, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 18:59:50,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=503100.0, ans=0.1 +2024-09-18 18:59:53,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=503100.0, ans=0.0 +2024-09-18 19:00:07,389 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.22 vs. limit=15.0 +2024-09-18 19:00:20,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=503180.0, ans=0.125 +2024-09-18 19:00:24,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=503180.0, ans=0.2 +2024-09-18 19:00:26,000 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.400e+01 8.358e+01 8.868e+01 9.352e+01 4.010e+02, threshold=1.774e+02, percent-clipped=1.0 +2024-09-18 19:00:35,589 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.37 vs. limit=15.0 +2024-09-18 19:00:38,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.18 vs. limit=22.5 +2024-09-18 19:00:39,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=503220.0, ans=6.0 +2024-09-18 19:00:51,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=503260.0, ans=0.125 +2024-09-18 19:01:05,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=503300.0, ans=0.0 +2024-09-18 19:01:07,134 INFO [train.py:1198] (1/2) Epoch 28, batch 3650, loss[loss=0.2584, ctc_loss=0.1446, cr_loss=0.4188, attn_decoder_loss=0.2618, over 29531.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1232, cr_loss=0.3646, attn_decoder_loss=0.2436, over 5793261.80 frames. ], batch size: 90, lr: 3.94e-03, grad_scale: 16.0 +2024-09-18 19:01:25,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=503340.0, ans=0.125 +2024-09-18 19:01:37,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=503380.0, ans=0.1 +2024-09-18 19:01:43,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=503380.0, ans=0.125 +2024-09-18 19:01:44,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=503380.0, ans=0.125 +2024-09-18 19:01:49,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=503380.0, ans=0.2 +2024-09-18 19:01:50,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=503420.0, ans=0.125 +2024-09-18 19:01:53,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=503420.0, ans=0.125 +2024-09-18 19:02:01,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=503420.0, ans=0.1 +2024-09-18 19:02:04,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.78 vs. limit=22.5 +2024-09-18 19:02:13,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=503460.0, ans=0.025 +2024-09-18 19:02:14,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=503460.0, ans=0.125 +2024-09-18 19:02:15,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=503460.0, ans=0.2 +2024-09-18 19:02:21,715 INFO [train.py:1198] (1/2) Epoch 28, batch 3700, loss[loss=0.2541, ctc_loss=0.1333, cr_loss=0.3971, attn_decoder_loss=0.2587, over 29706.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1234, cr_loss=0.3656, attn_decoder_loss=0.2438, over 5804097.45 frames. ], batch size: 84, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:02:53,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=503580.0, ans=0.0 +2024-09-18 19:02:57,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=503580.0, ans=0.125 +2024-09-18 19:02:58,673 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.604e+01 9.187e+01 9.989e+01 2.860e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-18 19:03:00,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=503580.0, ans=0.1 +2024-09-18 19:03:06,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=503620.0, ans=0.0 +2024-09-18 19:03:14,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.40 vs. limit=22.5 +2024-09-18 19:03:22,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=503660.0, ans=0.125 +2024-09-18 19:03:27,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=503660.0, ans=0.125 +2024-09-18 19:03:35,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.40 vs. limit=15.0 +2024-09-18 19:03:36,039 INFO [train.py:1198] (1/2) Epoch 28, batch 3750, loss[loss=0.2134, ctc_loss=0.1057, cr_loss=0.3278, attn_decoder_loss=0.218, over 29314.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1235, cr_loss=0.3664, attn_decoder_loss=0.2437, over 5807841.41 frames. ], batch size: 67, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:03:45,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=503700.0, ans=0.125 +2024-09-18 19:04:52,049 INFO [train.py:1198] (1/2) Epoch 28, batch 3800, loss[loss=0.2479, ctc_loss=0.1271, cr_loss=0.3849, attn_decoder_loss=0.2527, over 29633.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.123, cr_loss=0.3647, attn_decoder_loss=0.2431, over 5799284.49 frames. ], batch size: 86, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:04:53,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=503900.0, ans=0.125 +2024-09-18 19:05:14,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=503940.0, ans=0.0 +2024-09-18 19:05:28,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=503980.0, ans=0.125 +2024-09-18 19:05:29,966 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.108e+01 8.413e+01 8.933e+01 9.626e+01 3.409e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 19:05:30,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=503980.0, ans=0.125 +2024-09-18 19:05:39,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.37 vs. limit=22.5 +2024-09-18 19:05:39,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.88 vs. limit=22.5 +2024-09-18 19:05:45,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=504020.0, ans=0.1 +2024-09-18 19:05:45,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.86 vs. limit=12.0 +2024-09-18 19:05:51,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.21 vs. limit=10.0 +2024-09-18 19:06:07,076 INFO [train.py:1198] (1/2) Epoch 28, batch 3850, loss[loss=0.2586, ctc_loss=0.1447, cr_loss=0.4031, attn_decoder_loss=0.2623, over 29223.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.123, cr_loss=0.3645, attn_decoder_loss=0.2433, over 5813708.20 frames. ], batch size: 100, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:06:07,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=504100.0, ans=0.125 +2024-09-18 19:06:27,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=504140.0, ans=0.0 +2024-09-18 19:06:31,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=504140.0, ans=0.2 +2024-09-18 19:07:09,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=504260.0, ans=0.0 +2024-09-18 19:07:15,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=504260.0, ans=0.1 +2024-09-18 19:07:23,008 INFO [train.py:1198] (1/2) Epoch 28, batch 3900, loss[loss=0.2464, ctc_loss=0.1307, cr_loss=0.369, attn_decoder_loss=0.2511, over 29637.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1232, cr_loss=0.3651, attn_decoder_loss=0.2438, over 5817987.83 frames. ], batch size: 86, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:07:58,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=504380.0, ans=0.0 +2024-09-18 19:07:59,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=504380.0, ans=0.125 +2024-09-18 19:08:00,190 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.164e+01 8.446e+01 8.921e+01 9.410e+01 1.233e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 19:08:20,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=8.81 vs. limit=15.0 +2024-09-18 19:08:32,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=504460.0, ans=0.125 +2024-09-18 19:08:36,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=504500.0, ans=0.125 +2024-09-18 19:08:37,160 INFO [train.py:1198] (1/2) Epoch 28, batch 3950, loss[loss=0.2624, ctc_loss=0.1376, cr_loss=0.3905, attn_decoder_loss=0.2676, over 29438.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1226, cr_loss=0.3645, attn_decoder_loss=0.2435, over 5837027.72 frames. ], batch size: 97, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:08:43,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=504500.0, ans=0.025 +2024-09-18 19:09:11,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=504580.0, ans=0.125 +2024-09-18 19:09:52,201 INFO [train.py:1198] (1/2) Epoch 28, batch 4000, loss[loss=0.2223, ctc_loss=0.1123, cr_loss=0.3413, attn_decoder_loss=0.2269, over 29510.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1227, cr_loss=0.3641, attn_decoder_loss=0.2435, over 5812478.16 frames. ], batch size: 74, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:10:09,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.38 vs. limit=10.0 +2024-09-18 19:10:23,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=504780.0, ans=0.0 +2024-09-18 19:10:24,957 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:10:29,483 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.096e+01 8.633e+01 9.036e+01 9.608e+01 3.784e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-18 19:10:29,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=504780.0, ans=0.125 +2024-09-18 19:10:38,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=504820.0, ans=0.125 +2024-09-18 19:11:08,050 INFO [train.py:1198] (1/2) Epoch 28, batch 4050, loss[loss=0.2563, ctc_loss=0.1441, cr_loss=0.3738, attn_decoder_loss=0.2605, over 20036.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1224, cr_loss=0.363, attn_decoder_loss=0.2432, over 5796491.62 frames. ], batch size: 209, lr: 3.93e-03, grad_scale: 16.0 +2024-09-18 19:11:08,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=504900.0, ans=0.035 +2024-09-18 19:11:10,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.44 vs. limit=12.0 +2024-09-18 19:11:14,944 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.08 vs. limit=10.0 +2024-09-18 19:11:23,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=504940.0, ans=0.95 +2024-09-18 19:11:46,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=504980.0, ans=0.2 +2024-09-18 19:11:51,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.96 vs. limit=22.5 +2024-09-18 19:11:56,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=505020.0, ans=0.0 +2024-09-18 19:12:02,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=505020.0, ans=0.125 +2024-09-18 19:12:19,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=505060.0, ans=0.5 +2024-09-18 19:12:21,989 INFO [train.py:1198] (1/2) Epoch 28, batch 4100, loss[loss=0.2519, ctc_loss=0.1323, cr_loss=0.3864, attn_decoder_loss=0.2566, over 29504.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1229, cr_loss=0.3638, attn_decoder_loss=0.2434, over 5790646.35 frames. ], batch size: 90, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:12:41,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=505140.0, ans=0.2 +2024-09-18 19:12:42,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=505140.0, ans=0.07 +2024-09-18 19:12:50,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=505180.0, ans=22.5 +2024-09-18 19:13:00,182 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.509e+01 9.171e+01 9.842e+01 2.303e+02, threshold=1.834e+02, percent-clipped=2.0 +2024-09-18 19:13:24,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=505260.0, ans=0.125 +2024-09-18 19:13:34,720 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:13:35,877 INFO [train.py:1198] (1/2) Epoch 28, batch 4150, loss[loss=0.2385, ctc_loss=0.1383, cr_loss=0.3911, attn_decoder_loss=0.241, over 29502.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.123, cr_loss=0.3638, attn_decoder_loss=0.2431, over 5796199.78 frames. ], batch size: 77, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:13:46,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=505300.0, ans=0.1 +2024-09-18 19:14:04,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.02 vs. limit=15.0 +2024-09-18 19:14:12,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=505380.0, ans=0.0 +2024-09-18 19:14:17,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=20.95 vs. limit=22.5 +2024-09-18 19:14:26,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.78 vs. limit=15.0 +2024-09-18 19:14:27,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=505420.0, ans=0.125 +2024-09-18 19:14:33,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=505420.0, ans=0.0 +2024-09-18 19:14:34,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=505460.0, ans=0.2 +2024-09-18 19:14:39,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.14 vs. limit=15.0 +2024-09-18 19:14:47,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.29 vs. limit=22.5 +2024-09-18 19:14:49,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=505500.0, ans=0.125 +2024-09-18 19:14:50,776 INFO [train.py:1198] (1/2) Epoch 28, batch 4200, loss[loss=0.2422, ctc_loss=0.1224, cr_loss=0.3449, attn_decoder_loss=0.2479, over 29541.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1232, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5798770.80 frames. ], batch size: 90, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:14:55,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=505500.0, ans=0.125 +2024-09-18 19:15:30,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.561e+01 9.045e+01 9.717e+01 1.244e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-18 19:15:32,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=505580.0, ans=0.125 +2024-09-18 19:15:33,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=505580.0, ans=0.0 +2024-09-18 19:15:36,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=505620.0, ans=0.1 +2024-09-18 19:15:47,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=505620.0, ans=0.1 +2024-09-18 19:15:51,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=505660.0, ans=0.125 +2024-09-18 19:15:55,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=505660.0, ans=0.04949747468305833 +2024-09-18 19:16:06,008 INFO [train.py:1198] (1/2) Epoch 28, batch 4250, loss[loss=0.2155, ctc_loss=0.1003, cr_loss=0.3096, attn_decoder_loss=0.2214, over 29523.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1231, cr_loss=0.3642, attn_decoder_loss=0.2438, over 5805381.32 frames. ], batch size: 74, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:16:12,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=505700.0, ans=0.125 +2024-09-18 19:16:15,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=505700.0, ans=0.125 +2024-09-18 19:17:03,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=505860.0, ans=0.125 +2024-09-18 19:17:03,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=505860.0, ans=0.0 +2024-09-18 19:17:19,606 INFO [train.py:1198] (1/2) Epoch 28, batch 4300, loss[loss=0.2406, ctc_loss=0.1207, cr_loss=0.347, attn_decoder_loss=0.2463, over 29556.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1231, cr_loss=0.3642, attn_decoder_loss=0.244, over 5794703.08 frames. ], batch size: 87, lr: 3.93e-03, grad_scale: 8.0 +2024-09-18 19:17:44,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=505940.0, ans=0.125 +2024-09-18 19:17:55,472 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.13 vs. limit=15.0 +2024-09-18 19:17:57,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=505980.0, ans=0.2 +2024-09-18 19:17:58,958 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.536e+01 8.600e+01 9.054e+01 9.453e+01 1.609e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-18 19:18:08,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=506020.0, ans=0.0 +2024-09-18 19:18:22,380 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:18:29,828 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.28 vs. limit=15.0 +2024-09-18 19:18:35,187 INFO [train.py:1198] (1/2) Epoch 28, batch 4350, loss[loss=0.2556, ctc_loss=0.133, cr_loss=0.3794, attn_decoder_loss=0.2607, over 29492.00 frames. ], tot_loss[loss=0.2428, ctc_loss=0.1262, cr_loss=0.3706, attn_decoder_loss=0.2475, over 5797219.96 frames. ], batch size: 97, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:18:41,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=506100.0, ans=0.1 +2024-09-18 19:18:59,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=506140.0, ans=0.125 +2024-09-18 19:19:07,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=506180.0, ans=0.1 +2024-09-18 19:19:18,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=506220.0, ans=0.125 +2024-09-18 19:19:18,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.72 vs. limit=15.0 +2024-09-18 19:19:21,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=506220.0, ans=0.1 +2024-09-18 19:19:25,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=506220.0, ans=0.0 +2024-09-18 19:19:27,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=506220.0, ans=0.1 +2024-09-18 19:19:38,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=506260.0, ans=0.125 +2024-09-18 19:19:40,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=506260.0, ans=0.125 +2024-09-18 19:19:48,761 INFO [train.py:1198] (1/2) Epoch 28, batch 4400, loss[loss=0.2571, ctc_loss=0.1418, cr_loss=0.4118, attn_decoder_loss=0.2607, over 27428.00 frames. ], tot_loss[loss=0.2451, ctc_loss=0.1276, cr_loss=0.373, attn_decoder_loss=0.2498, over 5768052.90 frames. ], batch size: 124, lr: 3.92e-03, grad_scale: 16.0 +2024-09-18 19:20:20,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=506380.0, ans=0.125 +2024-09-18 19:20:28,788 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.311e+01 8.874e+01 9.241e+01 9.772e+01 1.532e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-18 19:20:50,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=506460.0, ans=0.125 +2024-09-18 19:21:03,588 INFO [train.py:1198] (1/2) Epoch 28, batch 4450, loss[loss=0.2583, ctc_loss=0.1515, cr_loss=0.3881, attn_decoder_loss=0.2616, over 19700.00 frames. ], tot_loss[loss=0.2477, ctc_loss=0.1316, cr_loss=0.3783, attn_decoder_loss=0.2522, over 5577560.91 frames. ], batch size: 209, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:21:25,120 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:21:45,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=506580.0, ans=0.1 +2024-09-18 19:21:58,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=506620.0, ans=0.2 +2024-09-18 19:22:18,777 INFO [train.py:1198] (1/2) Epoch 28, batch 4500, loss[loss=0.2595, ctc_loss=0.1529, cr_loss=0.3724, attn_decoder_loss=0.2631, over 20547.00 frames. ], tot_loss[loss=0.2501, ctc_loss=0.1357, cr_loss=0.3813, attn_decoder_loss=0.2544, over 5238401.56 frames. ], batch size: 210, lr: 3.92e-03, grad_scale: 8.0 +2024-09-18 19:22:19,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=506700.0, ans=0.125 +2024-09-18 19:22:23,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=506700.0, ans=0.0 +2024-09-18 19:22:27,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=506700.0, ans=0.0 +2024-09-18 19:22:37,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.95 vs. limit=22.5 +2024-09-18 19:22:38,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=506740.0, ans=0.0 +2024-09-18 19:23:47,632 INFO [train.py:1198] (1/2) Epoch 29, batch 0, loss[loss=0.2216, ctc_loss=0.1073, cr_loss=0.3398, attn_decoder_loss=0.2268, over 29599.00 frames. ], tot_loss[loss=0.2216, ctc_loss=0.1073, cr_loss=0.3398, attn_decoder_loss=0.2268, over 29599.00 frames. ], batch size: 73, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:23:47,633 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 19:23:52,463 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.6832, 4.5922, 4.4039, 4.1298], device='cuda:1') +2024-09-18 19:24:06,127 INFO [train.py:1230] (1/2) Epoch 29, validation: loss=0.2126, ctc_loss=0.03746, cr_loss=5.58e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-18 19:24:06,128 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 19:24:09,037 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.797e+01 1.050e+02 1.169e+02 1.299e+02 2.763e+02, threshold=2.337e+02, percent-clipped=3.0 +2024-09-18 19:24:15,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=506800.0, ans=0.125 +2024-09-18 19:24:54,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=506920.0, ans=0.5 +2024-09-18 19:25:03,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=506920.0, ans=0.1 +2024-09-18 19:25:08,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=506960.0, ans=0.0 +2024-09-18 19:25:12,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=506960.0, ans=0.1 +2024-09-18 19:25:16,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=506960.0, ans=0.125 +2024-09-18 19:25:21,689 INFO [train.py:1198] (1/2) Epoch 29, batch 50, loss[loss=0.2115, ctc_loss=0.1051, cr_loss=0.3289, attn_decoder_loss=0.216, over 29455.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1272, cr_loss=0.3745, attn_decoder_loss=0.2463, over 1267295.76 frames. ], batch size: 70, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:25:28,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=507000.0, ans=0.09899494936611666 +2024-09-18 19:25:34,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=507000.0, ans=0.1 +2024-09-18 19:26:02,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=507080.0, ans=0.125 +2024-09-18 19:26:29,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=507160.0, ans=0.125 +2024-09-18 19:26:41,678 INFO [train.py:1198] (1/2) Epoch 29, batch 100, loss[loss=0.2417, ctc_loss=0.1347, cr_loss=0.4107, attn_decoder_loss=0.2445, over 29540.00 frames. ], tot_loss[loss=0.2433, ctc_loss=0.1275, cr_loss=0.3741, attn_decoder_loss=0.2478, over 2252168.36 frames. ], batch size: 76, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:26:46,192 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.735e+01 9.318e+01 1.000e+02 1.586e+02, threshold=1.864e+02, percent-clipped=0.0 +2024-09-18 19:26:46,555 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:27:06,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.20 vs. limit=15.0 +2024-09-18 19:27:12,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=507280.0, ans=0.125 +2024-09-18 19:27:46,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=507360.0, ans=0.125 +2024-09-18 19:27:56,473 INFO [train.py:1198] (1/2) Epoch 29, batch 150, loss[loss=0.211, ctc_loss=0.1042, cr_loss=0.3284, attn_decoder_loss=0.2156, over 29424.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.125, cr_loss=0.37, attn_decoder_loss=0.2455, over 3045972.17 frames. ], batch size: 70, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:27:56,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=507400.0, ans=0.1 +2024-09-18 19:27:59,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=507400.0, ans=0.125 +2024-09-18 19:28:25,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=507480.0, ans=0.2 +2024-09-18 19:28:25,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=507480.0, ans=10.0 +2024-09-18 19:28:28,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=507480.0, ans=0.125 +2024-09-18 19:28:32,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=507480.0, ans=0.2 +2024-09-18 19:28:44,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=507520.0, ans=0.0 +2024-09-18 19:28:59,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=507560.0, ans=0.1 +2024-09-18 19:29:11,259 INFO [train.py:1198] (1/2) Epoch 29, batch 200, loss[loss=0.2424, ctc_loss=0.1174, cr_loss=0.344, attn_decoder_loss=0.2486, over 27605.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1245, cr_loss=0.3686, attn_decoder_loss=0.2447, over 3658529.29 frames. ], batch size: 125, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:29:11,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=507600.0, ans=0.0 +2024-09-18 19:29:11,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=507600.0, ans=0.125 +2024-09-18 19:29:14,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=507600.0, ans=0.09899494936611666 +2024-09-18 19:29:15,708 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.478e+01 8.328e+01 8.818e+01 9.310e+01 1.091e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-18 19:29:37,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.54 vs. limit=22.5 +2024-09-18 19:29:50,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=507680.0, ans=0.2 +2024-09-18 19:29:54,777 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:30:01,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=507720.0, ans=0.0 +2024-09-18 19:30:05,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.40 vs. limit=10.0 +2024-09-18 19:30:10,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=507720.0, ans=0.0 +2024-09-18 19:30:14,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.78 vs. limit=15.0 +2024-09-18 19:30:25,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.51 vs. limit=6.0 +2024-09-18 19:30:29,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=507760.0, ans=0.125 +2024-09-18 19:30:31,824 INFO [train.py:1198] (1/2) Epoch 29, batch 250, loss[loss=0.2517, ctc_loss=0.141, cr_loss=0.3938, attn_decoder_loss=0.2552, over 29187.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1239, cr_loss=0.3672, attn_decoder_loss=0.2443, over 4141477.92 frames. ], batch size: 100, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:30:53,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=507840.0, ans=0.125 +2024-09-18 19:31:12,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=507880.0, ans=0.0 +2024-09-18 19:31:12,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=507880.0, ans=0.125 +2024-09-18 19:31:47,694 INFO [train.py:1198] (1/2) Epoch 29, batch 300, loss[loss=0.2568, ctc_loss=0.1307, cr_loss=0.374, attn_decoder_loss=0.2625, over 29514.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1231, cr_loss=0.3654, attn_decoder_loss=0.2437, over 4510823.58 frames. ], batch size: 92, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:31:50,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=508000.0, ans=0.0 +2024-09-18 19:31:52,194 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.405e+01 8.844e+01 9.472e+01 2.622e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-18 19:31:56,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.89 vs. limit=22.5 +2024-09-18 19:32:26,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=508080.0, ans=0.0 +2024-09-18 19:32:31,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=508120.0, ans=0.125 +2024-09-18 19:33:00,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=508160.0, ans=0.125 +2024-09-18 19:33:03,340 INFO [train.py:1198] (1/2) Epoch 29, batch 350, loss[loss=0.2227, ctc_loss=0.1168, cr_loss=0.3476, attn_decoder_loss=0.2267, over 29305.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1231, cr_loss=0.3654, attn_decoder_loss=0.244, over 4794848.54 frames. ], batch size: 71, lr: 3.85e-03, grad_scale: 8.0 +2024-09-18 19:33:23,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=508240.0, ans=0.0 +2024-09-18 19:33:28,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=508240.0, ans=0.2 +2024-09-18 19:33:37,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=508280.0, ans=0.025 +2024-09-18 19:33:47,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.92 vs. limit=15.0 +2024-09-18 19:34:00,796 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:34:22,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=508400.0, ans=0.125 +2024-09-18 19:34:23,115 INFO [train.py:1198] (1/2) Epoch 29, batch 400, loss[loss=0.2422, ctc_loss=0.1215, cr_loss=0.3721, attn_decoder_loss=0.2473, over 29722.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1225, cr_loss=0.3639, attn_decoder_loss=0.2434, over 5023338.67 frames. ], batch size: 82, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:34:27,753 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.140e+01 8.478e+01 8.916e+01 9.451e+01 2.866e+02, threshold=1.783e+02, percent-clipped=2.0 +2024-09-18 19:34:31,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=508400.0, ans=0.0 +2024-09-18 19:34:43,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.74 vs. limit=15.0 +2024-09-18 19:34:56,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=508480.0, ans=0.125 +2024-09-18 19:35:04,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=508480.0, ans=0.0 +2024-09-18 19:35:39,007 INFO [train.py:1198] (1/2) Epoch 29, batch 450, loss[loss=0.2459, ctc_loss=0.1291, cr_loss=0.3781, attn_decoder_loss=0.2505, over 29699.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1226, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5186334.77 frames. ], batch size: 83, lr: 3.85e-03, grad_scale: 16.0 +2024-09-18 19:35:45,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=508600.0, ans=0.0 +2024-09-18 19:36:00,810 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:36:08,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.68 vs. limit=22.5 +2024-09-18 19:36:24,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-18 19:36:26,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=508720.0, ans=0.0 +2024-09-18 19:36:32,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=508720.0, ans=0.0 +2024-09-18 19:36:52,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=508760.0, ans=0.0 +2024-09-18 19:36:55,957 INFO [train.py:1198] (1/2) Epoch 29, batch 500, loss[loss=0.2486, ctc_loss=0.1282, cr_loss=0.3794, attn_decoder_loss=0.2536, over 29485.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1222, cr_loss=0.3637, attn_decoder_loss=0.243, over 5329516.04 frames. ], batch size: 94, lr: 3.84e-03, grad_scale: 16.0 +2024-09-18 19:37:00,504 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.214e+01 8.526e+01 8.926e+01 9.589e+01 3.622e+02, threshold=1.785e+02, percent-clipped=3.0 +2024-09-18 19:37:15,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=508840.0, ans=0.1 +2024-09-18 19:37:19,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=508840.0, ans=0.1 +2024-09-18 19:37:24,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=508840.0, ans=0.125 +2024-09-18 19:37:24,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=508840.0, ans=0.125 +2024-09-18 19:38:05,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=508960.0, ans=0.125 +2024-09-18 19:38:15,952 INFO [train.py:1198] (1/2) Epoch 29, batch 550, loss[loss=0.2477, ctc_loss=0.1283, cr_loss=0.3798, attn_decoder_loss=0.2525, over 28865.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1219, cr_loss=0.363, attn_decoder_loss=0.2431, over 5422124.06 frames. ], batch size: 104, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:38:16,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=509000.0, ans=0.2 +2024-09-18 19:38:44,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=509080.0, ans=0.0 +2024-09-18 19:38:47,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=509080.0, ans=0.125 +2024-09-18 19:39:31,424 INFO [train.py:1198] (1/2) Epoch 29, batch 600, loss[loss=0.2584, ctc_loss=0.1385, cr_loss=0.41, attn_decoder_loss=0.2626, over 29256.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1221, cr_loss=0.3635, attn_decoder_loss=0.2434, over 5509825.95 frames. ], batch size: 100, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:39:37,601 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.349e+01 8.468e+01 8.932e+01 9.529e+01 2.879e+02, threshold=1.786e+02, percent-clipped=3.0 +2024-09-18 19:39:43,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.08 vs. limit=15.0 +2024-09-18 19:40:06,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=509280.0, ans=0.125 +2024-09-18 19:40:07,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=509280.0, ans=0.125 +2024-09-18 19:40:09,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=509280.0, ans=0.0 +2024-09-18 19:40:13,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=509280.0, ans=0.125 +2024-09-18 19:40:14,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.36 vs. limit=15.0 +2024-09-18 19:40:16,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=509320.0, ans=0.07 +2024-09-18 19:40:29,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.37 vs. limit=15.0 +2024-09-18 19:40:37,796 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:40:45,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=509400.0, ans=0.0 +2024-09-18 19:40:46,514 INFO [train.py:1198] (1/2) Epoch 29, batch 650, loss[loss=0.2417, ctc_loss=0.1299, cr_loss=0.3924, attn_decoder_loss=0.2454, over 29754.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1219, cr_loss=0.3633, attn_decoder_loss=0.2431, over 5587455.01 frames. ], batch size: 81, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:41:01,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=509440.0, ans=0.125 +2024-09-18 19:41:13,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=509440.0, ans=0.125 +2024-09-18 19:41:34,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=509520.0, ans=0.0 +2024-09-18 19:41:35,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=509520.0, ans=0.125 +2024-09-18 19:41:37,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=509520.0, ans=0.2 +2024-09-18 19:41:37,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=509520.0, ans=0.125 +2024-09-18 19:41:43,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=509520.0, ans=0.2 +2024-09-18 19:41:47,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=509560.0, ans=0.125 +2024-09-18 19:41:56,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=509560.0, ans=0.0 +2024-09-18 19:42:05,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.95 vs. limit=22.5 +2024-09-18 19:42:06,941 INFO [train.py:1198] (1/2) Epoch 29, batch 700, loss[loss=0.2362, ctc_loss=0.1264, cr_loss=0.3866, attn_decoder_loss=0.2399, over 29539.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1226, cr_loss=0.365, attn_decoder_loss=0.2438, over 5637733.69 frames. ], batch size: 76, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:42:11,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=509600.0, ans=0.0 +2024-09-18 19:42:12,943 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.515e+01 8.488e+01 8.956e+01 9.496e+01 1.572e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 19:42:13,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=509600.0, ans=0.125 +2024-09-18 19:42:13,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.32 vs. limit=15.0 +2024-09-18 19:42:26,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=509640.0, ans=0.125 +2024-09-18 19:42:42,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=509680.0, ans=0.1 +2024-09-18 19:42:43,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=509680.0, ans=0.125 +2024-09-18 19:42:48,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=509680.0, ans=0.125 +2024-09-18 19:42:49,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=509680.0, ans=0.1 +2024-09-18 19:42:59,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=509720.0, ans=0.5 +2024-09-18 19:43:14,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=509760.0, ans=0.125 +2024-09-18 19:43:23,117 INFO [train.py:1198] (1/2) Epoch 29, batch 750, loss[loss=0.2493, ctc_loss=0.1337, cr_loss=0.384, attn_decoder_loss=0.2536, over 29697.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.122, cr_loss=0.3639, attn_decoder_loss=0.2434, over 5676958.21 frames. ], batch size: 82, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:43:32,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.61 vs. limit=15.0 +2024-09-18 19:43:44,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.29 vs. limit=22.5 +2024-09-18 19:44:01,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=509880.0, ans=0.125 +2024-09-18 19:44:07,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=509920.0, ans=0.0 +2024-09-18 19:44:28,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=509960.0, ans=0.125 +2024-09-18 19:44:32,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=509960.0, ans=0.125 +2024-09-18 19:44:38,406 INFO [train.py:1198] (1/2) Epoch 29, batch 800, loss[loss=0.2258, ctc_loss=0.1124, cr_loss=0.3557, attn_decoder_loss=0.2305, over 29603.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1218, cr_loss=0.3636, attn_decoder_loss=0.2433, over 5707962.83 frames. ], batch size: 73, lr: 3.84e-03, grad_scale: 16.0 +2024-09-18 19:44:44,456 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.377e+01 8.861e+01 9.386e+01 4.532e+02, threshold=1.772e+02, percent-clipped=1.0 +2024-09-18 19:45:14,037 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:45:19,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=510080.0, ans=0.125 +2024-09-18 19:45:55,652 INFO [train.py:1198] (1/2) Epoch 29, batch 850, loss[loss=0.2458, ctc_loss=0.1228, cr_loss=0.3648, attn_decoder_loss=0.2514, over 29734.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1217, cr_loss=0.3636, attn_decoder_loss=0.2431, over 5736666.42 frames. ], batch size: 89, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:46:07,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=510200.0, ans=0.0 +2024-09-18 19:46:13,037 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:46:43,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=510320.0, ans=0.2 +2024-09-18 19:46:50,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.64 vs. limit=15.0 +2024-09-18 19:46:57,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=510360.0, ans=0.125 +2024-09-18 19:47:13,949 INFO [train.py:1198] (1/2) Epoch 29, batch 900, loss[loss=0.2227, ctc_loss=0.1148, cr_loss=0.3378, attn_decoder_loss=0.2272, over 29593.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1221, cr_loss=0.3641, attn_decoder_loss=0.2433, over 5741270.45 frames. ], batch size: 73, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:47:21,305 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.562e+01 8.540e+01 9.030e+01 9.336e+01 1.932e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-18 19:48:02,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=510520.0, ans=0.1 +2024-09-18 19:48:07,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=510520.0, ans=0.1 +2024-09-18 19:48:29,447 INFO [train.py:1198] (1/2) Epoch 29, batch 950, loss[loss=0.2305, ctc_loss=0.1128, cr_loss=0.3611, attn_decoder_loss=0.2355, over 29523.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1226, cr_loss=0.365, attn_decoder_loss=0.2439, over 5742088.56 frames. ], batch size: 74, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:48:29,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=510600.0, ans=0.125 +2024-09-18 19:48:29,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=510600.0, ans=0.07 +2024-09-18 19:48:32,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=510600.0, ans=0.0 +2024-09-18 19:48:43,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=510640.0, ans=0.5 +2024-09-18 19:48:59,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.70 vs. limit=6.0 +2024-09-18 19:49:03,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=510680.0, ans=0.125 +2024-09-18 19:49:18,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=510720.0, ans=0.04949747468305833 +2024-09-18 19:49:36,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=510760.0, ans=0.0 +2024-09-18 19:49:39,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=510760.0, ans=0.125 +2024-09-18 19:49:46,873 INFO [train.py:1198] (1/2) Epoch 29, batch 1000, loss[loss=0.2273, ctc_loss=0.1161, cr_loss=0.3533, attn_decoder_loss=0.2318, over 29475.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1233, cr_loss=0.3661, attn_decoder_loss=0.2444, over 5734608.49 frames. ], batch size: 77, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:49:56,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.108e+01 8.627e+01 9.386e+01 1.009e+02 2.634e+02, threshold=1.877e+02, percent-clipped=2.0 +2024-09-18 19:50:15,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=510840.0, ans=0.2 +2024-09-18 19:50:33,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=510920.0, ans=0.125 +2024-09-18 19:50:55,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.58 vs. limit=22.5 +2024-09-18 19:51:03,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=20.20 vs. limit=22.5 +2024-09-18 19:51:04,704 INFO [train.py:1198] (1/2) Epoch 29, batch 1050, loss[loss=0.2406, ctc_loss=0.1161, cr_loss=0.3624, attn_decoder_loss=0.2464, over 29674.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1225, cr_loss=0.3648, attn_decoder_loss=0.2435, over 5742866.70 frames. ], batch size: 85, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:51:17,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=511000.0, ans=0.1 +2024-09-18 19:51:17,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.46 vs. limit=15.0 +2024-09-18 19:51:34,655 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.01 vs. limit=15.0 +2024-09-18 19:51:39,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.77 vs. limit=15.0 +2024-09-18 19:51:58,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=511120.0, ans=0.1 +2024-09-18 19:51:59,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=511120.0, ans=0.125 +2024-09-18 19:52:19,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=511200.0, ans=0.0 +2024-09-18 19:52:21,229 INFO [train.py:1198] (1/2) Epoch 29, batch 1100, loss[loss=0.2391, ctc_loss=0.129, cr_loss=0.379, attn_decoder_loss=0.2429, over 29436.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1225, cr_loss=0.3651, attn_decoder_loss=0.2433, over 5755460.49 frames. ], batch size: 78, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:52:28,711 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.758e+01 8.572e+01 8.922e+01 9.420e+01 4.206e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 19:53:13,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=511320.0, ans=0.1 +2024-09-18 19:53:37,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=511400.0, ans=0.125 +2024-09-18 19:53:38,727 INFO [train.py:1198] (1/2) Epoch 29, batch 1150, loss[loss=0.2374, ctc_loss=0.1258, cr_loss=0.3697, attn_decoder_loss=0.2416, over 29467.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1227, cr_loss=0.3655, attn_decoder_loss=0.2431, over 5755279.31 frames. ], batch size: 78, lr: 3.84e-03, grad_scale: 8.0 +2024-09-18 19:53:39,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=511400.0, ans=0.0 +2024-09-18 19:53:48,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=511400.0, ans=0.125 +2024-09-18 19:53:52,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=511400.0, ans=0.0 +2024-09-18 19:53:56,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=511440.0, ans=0.0 +2024-09-18 19:53:56,673 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 19:53:59,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=511440.0, ans=0.125 +2024-09-18 19:54:42,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=511560.0, ans=0.2 +2024-09-18 19:54:50,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.39 vs. limit=22.5 +2024-09-18 19:54:55,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=511600.0, ans=0.0 +2024-09-18 19:54:56,976 INFO [train.py:1198] (1/2) Epoch 29, batch 1200, loss[loss=0.2381, ctc_loss=0.1113, cr_loss=0.3379, attn_decoder_loss=0.2447, over 29685.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1234, cr_loss=0.3669, attn_decoder_loss=0.244, over 5748348.98 frames. ], batch size: 85, lr: 3.83e-03, grad_scale: 16.0 +2024-09-18 19:55:04,482 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.543e+01 9.016e+01 9.683e+01 2.653e+02, threshold=1.803e+02, percent-clipped=3.0 +2024-09-18 19:55:12,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=511640.0, ans=0.1 +2024-09-18 19:55:26,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=511680.0, ans=0.125 +2024-09-18 19:55:36,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=511680.0, ans=0.125 +2024-09-18 19:55:47,836 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.39 vs. limit=15.0 +2024-09-18 19:56:04,491 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.27 vs. limit=12.0 +2024-09-18 19:56:12,579 INFO [train.py:1198] (1/2) Epoch 29, batch 1250, loss[loss=0.2497, ctc_loss=0.1365, cr_loss=0.3929, attn_decoder_loss=0.2536, over 29522.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1235, cr_loss=0.3666, attn_decoder_loss=0.2443, over 5774568.03 frames. ], batch size: 92, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:56:51,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=511880.0, ans=0.1 +2024-09-18 19:57:07,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.11 vs. limit=15.0 +2024-09-18 19:57:08,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=511920.0, ans=0.0 +2024-09-18 19:57:38,302 INFO [train.py:1198] (1/2) Epoch 29, batch 1300, loss[loss=0.2442, ctc_loss=0.1259, cr_loss=0.3665, attn_decoder_loss=0.2492, over 28327.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1229, cr_loss=0.3645, attn_decoder_loss=0.2435, over 5780100.07 frames. ], batch size: 111, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:57:38,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=512000.0, ans=0.125 +2024-09-18 19:57:42,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.66 vs. limit=22.5 +2024-09-18 19:57:47,484 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.526e+01 8.940e+01 9.401e+01 4.173e+02, threshold=1.788e+02, percent-clipped=2.0 +2024-09-18 19:57:53,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=512040.0, ans=0.0 +2024-09-18 19:58:02,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=512040.0, ans=0.1 +2024-09-18 19:58:03,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=512040.0, ans=0.125 +2024-09-18 19:58:09,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=512080.0, ans=0.0 +2024-09-18 19:58:24,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=512120.0, ans=0.125 +2024-09-18 19:58:37,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=512120.0, ans=0.025 +2024-09-18 19:58:49,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=512160.0, ans=0.125 +2024-09-18 19:58:49,901 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.56 vs. limit=22.5 +2024-09-18 19:58:56,559 INFO [train.py:1198] (1/2) Epoch 29, batch 1350, loss[loss=0.2366, ctc_loss=0.1215, cr_loss=0.3568, attn_decoder_loss=0.2414, over 29749.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1222, cr_loss=0.3634, attn_decoder_loss=0.243, over 5796200.89 frames. ], batch size: 81, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 19:58:56,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=512200.0, ans=0.125 +2024-09-18 19:59:01,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=512200.0, ans=0.125 +2024-09-18 19:59:04,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=512200.0, ans=0.125 +2024-09-18 19:59:10,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=512240.0, ans=0.025 +2024-09-18 19:59:16,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=512240.0, ans=0.0 +2024-09-18 19:59:22,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=512240.0, ans=0.025 +2024-09-18 19:59:23,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.92 vs. limit=22.5 +2024-09-18 19:59:31,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.44 vs. limit=15.0 +2024-09-18 19:59:35,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=512280.0, ans=0.125 +2024-09-18 19:59:41,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=512320.0, ans=0.125 +2024-09-18 19:59:55,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.58 vs. limit=12.0 +2024-09-18 20:00:07,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=512360.0, ans=0.125 +2024-09-18 20:00:11,671 INFO [train.py:1198] (1/2) Epoch 29, batch 1400, loss[loss=0.2099, ctc_loss=0.1022, cr_loss=0.3144, attn_decoder_loss=0.2149, over 29576.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1223, cr_loss=0.364, attn_decoder_loss=0.2432, over 5807230.39 frames. ], batch size: 69, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:00:19,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=512400.0, ans=0.0 +2024-09-18 20:00:20,750 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.135e+01 8.361e+01 8.836e+01 9.387e+01 1.190e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-18 20:00:25,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=512440.0, ans=0.125 +2024-09-18 20:00:27,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=512440.0, ans=0.2 +2024-09-18 20:00:41,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=512440.0, ans=0.0 +2024-09-18 20:01:02,529 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:01:06,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=512520.0, ans=0.0 +2024-09-18 20:01:24,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=512560.0, ans=0.125 +2024-09-18 20:01:29,200 INFO [train.py:1198] (1/2) Epoch 29, batch 1450, loss[loss=0.2538, ctc_loss=0.1364, cr_loss=0.4007, attn_decoder_loss=0.2579, over 29465.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1225, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5803945.00 frames. ], batch size: 94, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:01:40,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=512600.0, ans=0.2 +2024-09-18 20:02:09,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=512680.0, ans=0.125 +2024-09-18 20:02:10,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=512680.0, ans=0.0 +2024-09-18 20:02:17,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=512720.0, ans=0.125 +2024-09-18 20:02:31,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=512760.0, ans=15.0 +2024-09-18 20:02:38,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=512760.0, ans=0.125 +2024-09-18 20:02:47,668 INFO [train.py:1198] (1/2) Epoch 29, batch 1500, loss[loss=0.243, ctc_loss=0.1213, cr_loss=0.372, attn_decoder_loss=0.2483, over 29618.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1227, cr_loss=0.3649, attn_decoder_loss=0.244, over 5806395.89 frames. ], batch size: 86, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:02:54,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.19 vs. limit=10.0 +2024-09-18 20:02:58,363 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.696e+01 9.136e+01 9.651e+01 1.564e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-18 20:03:03,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-18 20:03:06,414 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:03:08,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=512840.0, ans=0.0 +2024-09-18 20:03:20,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=512880.0, ans=0.125 +2024-09-18 20:03:21,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=512880.0, ans=0.2 +2024-09-18 20:03:26,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=512880.0, ans=0.025 +2024-09-18 20:03:31,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=512880.0, ans=0.125 +2024-09-18 20:03:34,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.52 vs. limit=15.0 +2024-09-18 20:03:58,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=512960.0, ans=0.025 +2024-09-18 20:04:03,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-18 20:04:03,792 INFO [train.py:1198] (1/2) Epoch 29, batch 1550, loss[loss=0.2663, ctc_loss=0.1468, cr_loss=0.4307, attn_decoder_loss=0.27, over 29494.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1232, cr_loss=0.366, attn_decoder_loss=0.2443, over 5783638.65 frames. ], batch size: 90, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:04:03,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=513000.0, ans=0.015 +2024-09-18 20:04:08,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=513000.0, ans=0.0 +2024-09-18 20:04:33,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=513040.0, ans=0.125 +2024-09-18 20:04:40,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=513080.0, ans=0.125 +2024-09-18 20:04:42,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=513080.0, ans=0.125 +2024-09-18 20:05:21,274 INFO [train.py:1198] (1/2) Epoch 29, batch 1600, loss[loss=0.2496, ctc_loss=0.1282, cr_loss=0.3836, attn_decoder_loss=0.2545, over 29666.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1232, cr_loss=0.3651, attn_decoder_loss=0.2441, over 5766603.69 frames. ], batch size: 85, lr: 3.83e-03, grad_scale: 16.0 +2024-09-18 20:05:21,625 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:05:24,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=513200.0, ans=0.125 +2024-09-18 20:05:25,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=513200.0, ans=0.0 +2024-09-18 20:05:31,637 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.586e+01 9.089e+01 9.783e+01 2.042e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 20:05:59,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.99 vs. limit=15.0 +2024-09-18 20:06:00,625 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.66 vs. limit=15.0 +2024-09-18 20:06:01,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=513280.0, ans=0.125 +2024-09-18 20:06:21,943 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.44 vs. limit=10.0 +2024-09-18 20:06:39,180 INFO [train.py:1198] (1/2) Epoch 29, batch 1650, loss[loss=0.2444, ctc_loss=0.1247, cr_loss=0.3708, attn_decoder_loss=0.2495, over 29687.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1228, cr_loss=0.3641, attn_decoder_loss=0.2437, over 5761817.36 frames. ], batch size: 89, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:07:01,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.94 vs. limit=22.5 +2024-09-18 20:07:11,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=513480.0, ans=0.0 +2024-09-18 20:07:14,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=513480.0, ans=10.0 +2024-09-18 20:07:20,975 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.36 vs. limit=12.0 +2024-09-18 20:07:26,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=513520.0, ans=0.0 +2024-09-18 20:07:28,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=513520.0, ans=0.2 +2024-09-18 20:07:28,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=513520.0, ans=0.125 +2024-09-18 20:07:40,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=513560.0, ans=0.1 +2024-09-18 20:07:55,037 INFO [train.py:1198] (1/2) Epoch 29, batch 1700, loss[loss=0.2142, ctc_loss=0.1003, cr_loss=0.3069, attn_decoder_loss=0.22, over 29560.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1224, cr_loss=0.3636, attn_decoder_loss=0.2436, over 5781851.35 frames. ], batch size: 69, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:07:59,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=513600.0, ans=0.1 +2024-09-18 20:08:07,210 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.650e+01 8.371e+01 8.901e+01 9.499e+01 1.304e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-18 20:08:12,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.28 vs. limit=15.0 +2024-09-18 20:08:34,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=513680.0, ans=0.125 +2024-09-18 20:08:45,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=513720.0, ans=0.1 +2024-09-18 20:08:47,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=513720.0, ans=0.125 +2024-09-18 20:09:12,833 INFO [train.py:1198] (1/2) Epoch 29, batch 1750, loss[loss=0.2099, ctc_loss=0.09885, cr_loss=0.3092, attn_decoder_loss=0.2154, over 29405.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.122, cr_loss=0.3627, attn_decoder_loss=0.2433, over 5789966.00 frames. ], batch size: 67, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:09:26,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=513840.0, ans=0.1 +2024-09-18 20:10:00,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=513920.0, ans=0.1 +2024-09-18 20:10:20,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=513960.0, ans=0.125 +2024-09-18 20:10:30,207 INFO [train.py:1198] (1/2) Epoch 29, batch 1800, loss[loss=0.2439, ctc_loss=0.1249, cr_loss=0.3564, attn_decoder_loss=0.2492, over 29690.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1226, cr_loss=0.3643, attn_decoder_loss=0.2437, over 5793627.95 frames. ], batch size: 83, lr: 3.83e-03, grad_scale: 8.0 +2024-09-18 20:10:31,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=514000.0, ans=0.0 +2024-09-18 20:10:42,243 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.472e+01 8.834e+01 9.561e+01 3.303e+02, threshold=1.767e+02, percent-clipped=1.0 +2024-09-18 20:10:48,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=514040.0, ans=0.125 +2024-09-18 20:10:53,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=514040.0, ans=0.025 +2024-09-18 20:11:00,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=514080.0, ans=0.125 +2024-09-18 20:11:14,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=514120.0, ans=0.125 +2024-09-18 20:11:19,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.63 vs. limit=12.0 +2024-09-18 20:11:43,747 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=7.16 vs. limit=15.0 +2024-09-18 20:11:46,032 INFO [train.py:1198] (1/2) Epoch 29, batch 1850, loss[loss=0.2496, ctc_loss=0.129, cr_loss=0.3815, attn_decoder_loss=0.2545, over 29623.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1225, cr_loss=0.3647, attn_decoder_loss=0.2434, over 5798305.50 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:11:46,769 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.22 vs. limit=6.0 +2024-09-18 20:11:49,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=514200.0, ans=0.125 +2024-09-18 20:12:24,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=514280.0, ans=0.0 +2024-09-18 20:12:29,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=514280.0, ans=0.2 +2024-09-18 20:12:33,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=514320.0, ans=0.2 +2024-09-18 20:12:35,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=514320.0, ans=0.125 +2024-09-18 20:12:51,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=514360.0, ans=0.125 +2024-09-18 20:12:54,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=514360.0, ans=0.2 +2024-09-18 20:12:55,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.30 vs. limit=15.0 +2024-09-18 20:13:03,715 INFO [train.py:1198] (1/2) Epoch 29, batch 1900, loss[loss=0.2513, ctc_loss=0.1269, cr_loss=0.3808, attn_decoder_loss=0.2566, over 29688.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1227, cr_loss=0.3652, attn_decoder_loss=0.2438, over 5805299.76 frames. ], batch size: 89, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:13:08,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=514400.0, ans=0.0 +2024-09-18 20:13:15,864 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.747e+01 8.630e+01 9.084e+01 9.711e+01 2.750e+02, threshold=1.817e+02, percent-clipped=3.0 +2024-09-18 20:14:01,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=514520.0, ans=0.125 +2024-09-18 20:14:14,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=514560.0, ans=0.2 +2024-09-18 20:14:22,092 INFO [train.py:1198] (1/2) Epoch 29, batch 1950, loss[loss=0.2386, ctc_loss=0.1265, cr_loss=0.3854, attn_decoder_loss=0.2425, over 29448.00 frames. ], tot_loss[loss=0.24, ctc_loss=0.1231, cr_loss=0.3662, attn_decoder_loss=0.2449, over 5820303.01 frames. ], batch size: 78, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:14:25,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=514600.0, ans=0.025 +2024-09-18 20:14:40,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.63 vs. limit=15.0 +2024-09-18 20:14:59,425 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.35 vs. limit=10.0 +2024-09-18 20:15:20,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=514760.0, ans=0.0 +2024-09-18 20:15:37,425 INFO [train.py:1198] (1/2) Epoch 29, batch 2000, loss[loss=0.2155, ctc_loss=0.1117, cr_loss=0.3365, attn_decoder_loss=0.2196, over 29350.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1234, cr_loss=0.3667, attn_decoder_loss=0.245, over 5797385.27 frames. ], batch size: 67, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:15:46,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=514800.0, ans=0.0 +2024-09-18 20:15:49,646 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.639e+01 9.197e+01 9.637e+01 2.415e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-18 20:15:53,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=514840.0, ans=0.125 +2024-09-18 20:16:11,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=514880.0, ans=0.0 +2024-09-18 20:16:22,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=514880.0, ans=0.125 +2024-09-18 20:16:35,092 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.02 vs. limit=15.0 +2024-09-18 20:16:46,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=514960.0, ans=0.0 +2024-09-18 20:16:48,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=514960.0, ans=0.2 +2024-09-18 20:16:55,174 INFO [train.py:1198] (1/2) Epoch 29, batch 2050, loss[loss=0.2172, ctc_loss=0.1095, cr_loss=0.3341, attn_decoder_loss=0.2217, over 29411.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1233, cr_loss=0.3664, attn_decoder_loss=0.2445, over 5788684.50 frames. ], batch size: 70, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:17:00,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=515000.0, ans=0.035 +2024-09-18 20:17:00,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=515000.0, ans=0.2 +2024-09-18 20:17:04,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=515000.0, ans=0.0 +2024-09-18 20:17:23,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.70 vs. limit=6.0 +2024-09-18 20:17:36,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=515080.0, ans=0.125 +2024-09-18 20:17:37,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer_ff3.min_abs, batch_count=515080.0, ans=0.2 +2024-09-18 20:17:46,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=515120.0, ans=0.1 +2024-09-18 20:17:47,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=515120.0, ans=0.125 +2024-09-18 20:17:49,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=515120.0, ans=0.025 +2024-09-18 20:17:56,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=515160.0, ans=0.125 +2024-09-18 20:17:58,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.54 vs. limit=22.5 +2024-09-18 20:18:13,431 INFO [train.py:1198] (1/2) Epoch 29, batch 2100, loss[loss=0.235, ctc_loss=0.1169, cr_loss=0.3567, attn_decoder_loss=0.2402, over 29763.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1224, cr_loss=0.3643, attn_decoder_loss=0.2435, over 5801386.76 frames. ], batch size: 81, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:18:25,547 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.305e+01 8.420e+01 8.993e+01 9.361e+01 1.152e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-18 20:18:25,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=515200.0, ans=0.0 +2024-09-18 20:18:28,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=515240.0, ans=0.0 +2024-09-18 20:18:39,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=515240.0, ans=0.0 +2024-09-18 20:18:43,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=515280.0, ans=0.0 +2024-09-18 20:18:52,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=515280.0, ans=0.2 +2024-09-18 20:19:00,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=515320.0, ans=0.09899494936611666 +2024-09-18 20:19:28,179 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.69 vs. limit=15.0 +2024-09-18 20:19:28,672 INFO [train.py:1198] (1/2) Epoch 29, batch 2150, loss[loss=0.2381, ctc_loss=0.1201, cr_loss=0.3761, attn_decoder_loss=0.2429, over 29429.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1217, cr_loss=0.3628, attn_decoder_loss=0.2429, over 5815279.40 frames. ], batch size: 78, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:19:36,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=515400.0, ans=0.125 +2024-09-18 20:19:36,866 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:19:36,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=515400.0, ans=0.125 +2024-09-18 20:19:41,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=515400.0, ans=0.1 +2024-09-18 20:19:50,803 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.36 vs. limit=15.0 +2024-09-18 20:20:06,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.75 vs. limit=6.0 +2024-09-18 20:20:35,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.82 vs. limit=10.0 +2024-09-18 20:20:46,532 INFO [train.py:1198] (1/2) Epoch 29, batch 2200, loss[loss=0.2452, ctc_loss=0.1233, cr_loss=0.3677, attn_decoder_loss=0.2506, over 29621.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1225, cr_loss=0.3636, attn_decoder_loss=0.2434, over 5811791.41 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:20:52,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=515600.0, ans=0.125 +2024-09-18 20:20:55,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=515600.0, ans=0.2 +2024-09-18 20:20:58,446 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.069e+01 8.349e+01 8.970e+01 9.403e+01 1.511e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 20:21:17,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.73 vs. limit=8.0 +2024-09-18 20:21:25,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=515680.0, ans=0.125 +2024-09-18 20:21:33,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=515720.0, ans=0.125 +2024-09-18 20:21:41,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=515720.0, ans=0.0 +2024-09-18 20:22:02,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=515800.0, ans=0.0 +2024-09-18 20:22:04,183 INFO [train.py:1198] (1/2) Epoch 29, batch 2250, loss[loss=0.2512, ctc_loss=0.1267, cr_loss=0.3766, attn_decoder_loss=0.2567, over 29716.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1224, cr_loss=0.3641, attn_decoder_loss=0.2435, over 5810438.88 frames. ], batch size: 82, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:22:26,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=515840.0, ans=0.09899494936611666 +2024-09-18 20:22:29,135 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.73 vs. limit=15.0 +2024-09-18 20:22:31,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=515840.0, ans=0.125 +2024-09-18 20:23:19,799 INFO [train.py:1198] (1/2) Epoch 29, batch 2300, loss[loss=0.2114, ctc_loss=0.09794, cr_loss=0.3234, attn_decoder_loss=0.2168, over 29335.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1217, cr_loss=0.3629, attn_decoder_loss=0.2424, over 5798571.72 frames. ], batch size: 71, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:23:30,544 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:23:31,707 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.422e+01 8.460e+01 8.964e+01 9.608e+01 5.700e+02, threshold=1.793e+02, percent-clipped=2.0 +2024-09-18 20:24:04,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff3.min_abs, batch_count=516080.0, ans=0.2 +2024-09-18 20:24:08,171 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.09 vs. limit=15.0 +2024-09-18 20:24:37,597 INFO [train.py:1198] (1/2) Epoch 29, batch 2350, loss[loss=0.2511, ctc_loss=0.1309, cr_loss=0.3785, attn_decoder_loss=0.256, over 29688.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.122, cr_loss=0.3637, attn_decoder_loss=0.2427, over 5803745.84 frames. ], batch size: 83, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:25:09,375 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:25:12,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=516280.0, ans=0.2 +2024-09-18 20:25:23,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=516320.0, ans=0.125 +2024-09-18 20:25:32,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=516320.0, ans=0.125 +2024-09-18 20:25:55,371 INFO [train.py:1198] (1/2) Epoch 29, batch 2400, loss[loss=0.2286, ctc_loss=0.1199, cr_loss=0.3775, attn_decoder_loss=0.2323, over 29545.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1226, cr_loss=0.3647, attn_decoder_loss=0.2434, over 5807093.14 frames. ], batch size: 76, lr: 3.82e-03, grad_scale: 16.0 +2024-09-18 20:26:08,939 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 8.500e+01 8.937e+01 9.634e+01 2.540e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 20:26:15,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=516440.0, ans=0.0 +2024-09-18 20:26:16,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=516440.0, ans=0.1 +2024-09-18 20:26:43,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.32 vs. limit=15.0 +2024-09-18 20:26:45,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=516520.0, ans=0.95 +2024-09-18 20:27:11,181 INFO [train.py:1198] (1/2) Epoch 29, batch 2450, loss[loss=0.2412, ctc_loss=0.1204, cr_loss=0.3598, attn_decoder_loss=0.2467, over 29701.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1235, cr_loss=0.3662, attn_decoder_loss=0.2444, over 5784843.04 frames. ], batch size: 82, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:27:13,419 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.03 vs. limit=22.5 +2024-09-18 20:27:20,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=516600.0, ans=0.1 +2024-09-18 20:27:20,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=516600.0, ans=0.0 +2024-09-18 20:27:24,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=516640.0, ans=0.2 +2024-09-18 20:27:36,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=516640.0, ans=0.1 +2024-09-18 20:27:38,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.23 vs. limit=22.5 +2024-09-18 20:27:49,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=516680.0, ans=0.025 +2024-09-18 20:27:49,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=516680.0, ans=0.2 +2024-09-18 20:27:50,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.38 vs. limit=6.0 +2024-09-18 20:27:57,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=516720.0, ans=0.0 +2024-09-18 20:27:57,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.07 vs. limit=6.0 +2024-09-18 20:28:09,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=516720.0, ans=0.2 +2024-09-18 20:28:26,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=516760.0, ans=0.1 +2024-09-18 20:28:29,440 INFO [train.py:1198] (1/2) Epoch 29, batch 2500, loss[loss=0.2423, ctc_loss=0.1212, cr_loss=0.3692, attn_decoder_loss=0.2475, over 29643.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1234, cr_loss=0.3663, attn_decoder_loss=0.2444, over 5794783.58 frames. ], batch size: 86, lr: 3.82e-03, grad_scale: 8.0 +2024-09-18 20:28:44,586 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.372e+01 8.869e+01 9.573e+01 2.936e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-18 20:29:00,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.60 vs. limit=15.0 +2024-09-18 20:29:20,179 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.05 vs. limit=15.0 +2024-09-18 20:29:21,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=516920.0, ans=0.125 +2024-09-18 20:29:47,333 INFO [train.py:1198] (1/2) Epoch 29, batch 2550, loss[loss=0.2115, ctc_loss=0.107, cr_loss=0.3342, attn_decoder_loss=0.2157, over 29380.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1234, cr_loss=0.3669, attn_decoder_loss=0.2443, over 5798385.67 frames. ], batch size: 67, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:30:02,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=517040.0, ans=0.0 +2024-09-18 20:30:04,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=517040.0, ans=0.1 +2024-09-18 20:30:07,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=517040.0, ans=0.125 +2024-09-18 20:30:11,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=517040.0, ans=0.125 +2024-09-18 20:30:49,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=517160.0, ans=0.125 +2024-09-18 20:31:02,915 INFO [train.py:1198] (1/2) Epoch 29, batch 2600, loss[loss=0.242, ctc_loss=0.1224, cr_loss=0.3697, attn_decoder_loss=0.2471, over 29453.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1235, cr_loss=0.3667, attn_decoder_loss=0.2446, over 5793768.66 frames. ], batch size: 78, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:31:17,751 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.549e+01 8.951e+01 9.409e+01 2.372e+02, threshold=1.790e+02, percent-clipped=2.0 +2024-09-18 20:31:18,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=517240.0, ans=0.0 +2024-09-18 20:31:33,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=517280.0, ans=0.0 +2024-09-18 20:31:36,866 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:31:47,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=517280.0, ans=0.0 +2024-09-18 20:31:48,221 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=12.0 +2024-09-18 20:32:07,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=517360.0, ans=0.0 +2024-09-18 20:32:20,505 INFO [train.py:1198] (1/2) Epoch 29, batch 2650, loss[loss=0.2453, ctc_loss=0.1286, cr_loss=0.3735, attn_decoder_loss=0.2499, over 29210.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1234, cr_loss=0.3664, attn_decoder_loss=0.2447, over 5800481.06 frames. ], batch size: 100, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:32:35,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=517440.0, ans=0.125 +2024-09-18 20:32:37,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.12 vs. limit=15.0 +2024-09-18 20:32:55,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=517480.0, ans=0.125 +2024-09-18 20:33:01,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=517480.0, ans=0.05 +2024-09-18 20:33:09,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=517520.0, ans=0.2 +2024-09-18 20:33:15,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=517520.0, ans=0.125 +2024-09-18 20:33:15,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=517520.0, ans=0.125 +2024-09-18 20:33:38,534 INFO [train.py:1198] (1/2) Epoch 29, batch 2700, loss[loss=0.235, ctc_loss=0.1085, cr_loss=0.3277, attn_decoder_loss=0.2417, over 29534.00 frames. ], tot_loss[loss=0.2401, ctc_loss=0.1237, cr_loss=0.3669, attn_decoder_loss=0.2448, over 5796305.23 frames. ], batch size: 87, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:33:41,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=517600.0, ans=0.125 +2024-09-18 20:33:47,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=517600.0, ans=0.125 +2024-09-18 20:33:53,537 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.043e+01 8.653e+01 9.179e+01 9.808e+01 2.021e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-18 20:34:21,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=517680.0, ans=0.125 +2024-09-18 20:34:23,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.87 vs. limit=22.5 +2024-09-18 20:34:33,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=517720.0, ans=0.125 +2024-09-18 20:34:54,588 INFO [train.py:1198] (1/2) Epoch 29, batch 2750, loss[loss=0.2164, ctc_loss=0.1056, cr_loss=0.3426, attn_decoder_loss=0.2211, over 29510.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1227, cr_loss=0.365, attn_decoder_loss=0.2435, over 5795661.01 frames. ], batch size: 75, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:35:13,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=517840.0, ans=0.125 +2024-09-18 20:35:24,678 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.75 vs. limit=22.5 +2024-09-18 20:35:27,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=517880.0, ans=0.125 +2024-09-18 20:35:28,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=517880.0, ans=0.1 +2024-09-18 20:35:29,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.20 vs. limit=15.0 +2024-09-18 20:35:43,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.93 vs. limit=15.0 +2024-09-18 20:36:08,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=517960.0, ans=0.125 +2024-09-18 20:36:11,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=518000.0, ans=0.125 +2024-09-18 20:36:12,343 INFO [train.py:1198] (1/2) Epoch 29, batch 2800, loss[loss=0.2482, ctc_loss=0.1429, cr_loss=0.3638, attn_decoder_loss=0.2519, over 20029.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1228, cr_loss=0.3658, attn_decoder_loss=0.2435, over 5776812.59 frames. ], batch size: 210, lr: 3.81e-03, grad_scale: 16.0 +2024-09-18 20:36:28,867 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.249e+01 8.371e+01 8.942e+01 9.579e+01 2.215e+02, threshold=1.788e+02, percent-clipped=1.0 +2024-09-18 20:36:37,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.46 vs. limit=22.5 +2024-09-18 20:37:05,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=518120.0, ans=0.125 +2024-09-18 20:37:05,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=518120.0, ans=0.025 +2024-09-18 20:37:10,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=518120.0, ans=0.05 +2024-09-18 20:37:30,067 INFO [train.py:1198] (1/2) Epoch 29, batch 2850, loss[loss=0.221, ctc_loss=0.1073, cr_loss=0.3351, attn_decoder_loss=0.2262, over 29515.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1231, cr_loss=0.3658, attn_decoder_loss=0.2438, over 5761879.30 frames. ], batch size: 77, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:37:51,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=518240.0, ans=0.125 +2024-09-18 20:37:56,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=518240.0, ans=0.125 +2024-09-18 20:38:23,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=518320.0, ans=0.5 +2024-09-18 20:38:32,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=518360.0, ans=0.0 +2024-09-18 20:38:35,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=518360.0, ans=0.5 +2024-09-18 20:38:46,510 INFO [train.py:1198] (1/2) Epoch 29, batch 2900, loss[loss=0.2386, ctc_loss=0.1213, cr_loss=0.3573, attn_decoder_loss=0.2437, over 29404.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1237, cr_loss=0.3676, attn_decoder_loss=0.245, over 5787573.80 frames. ], batch size: 79, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:38:55,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=518400.0, ans=0.2 +2024-09-18 20:39:05,308 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.499e+01 8.947e+01 9.458e+01 2.522e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-18 20:39:37,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=518520.0, ans=0.125 +2024-09-18 20:40:04,382 INFO [train.py:1198] (1/2) Epoch 29, batch 2950, loss[loss=0.23, ctc_loss=0.1162, cr_loss=0.3622, attn_decoder_loss=0.2346, over 29528.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1229, cr_loss=0.3658, attn_decoder_loss=0.2437, over 5783949.43 frames. ], batch size: 75, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:40:16,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=518600.0, ans=0.0 +2024-09-18 20:40:22,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=518640.0, ans=0.125 +2024-09-18 20:40:29,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-18 20:40:32,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=518640.0, ans=0.125 +2024-09-18 20:40:45,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=518680.0, ans=0.125 +2024-09-18 20:40:47,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=518680.0, ans=0.2 +2024-09-18 20:40:59,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.82 vs. limit=22.5 +2024-09-18 20:41:01,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=518720.0, ans=0.1 +2024-09-18 20:41:19,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=518800.0, ans=0.125 +2024-09-18 20:41:22,997 INFO [train.py:1198] (1/2) Epoch 29, batch 3000, loss[loss=0.2419, ctc_loss=0.1248, cr_loss=0.3759, attn_decoder_loss=0.2465, over 29769.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1225, cr_loss=0.3645, attn_decoder_loss=0.2436, over 5782300.45 frames. ], batch size: 81, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:41:22,997 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 20:41:34,884 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([2.6141, 3.6614, 3.4227, 3.1690, 3.7522, 3.1724, 3.7708, 2.8082], + device='cuda:1') +2024-09-18 20:41:41,476 INFO [train.py:1230] (1/2) Epoch 29, validation: loss=0.2115, ctc_loss=0.03752, cr_loss=5.604e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-18 20:41:41,476 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 20:41:58,310 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.278e+01 8.694e+01 9.323e+01 9.820e+01 2.000e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-18 20:41:59,265 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.72 vs. limit=15.0 +2024-09-18 20:42:09,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=518840.0, ans=0.125 +2024-09-18 20:42:13,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=518880.0, ans=0.125 +2024-09-18 20:42:24,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=518880.0, ans=0.125 +2024-09-18 20:42:37,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=518920.0, ans=0.125 +2024-09-18 20:42:59,583 INFO [train.py:1198] (1/2) Epoch 29, batch 3050, loss[loss=0.2288, ctc_loss=0.1174, cr_loss=0.364, attn_decoder_loss=0.2331, over 29503.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1231, cr_loss=0.3659, attn_decoder_loss=0.2443, over 5777107.52 frames. ], batch size: 76, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:43:28,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=519080.0, ans=0.125 +2024-09-18 20:43:34,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=519080.0, ans=0.125 +2024-09-18 20:43:57,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.25 vs. limit=12.0 +2024-09-18 20:43:58,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=519160.0, ans=0.0 +2024-09-18 20:44:02,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.62 vs. limit=12.0 +2024-09-18 20:44:03,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=519160.0, ans=0.025 +2024-09-18 20:44:09,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=519160.0, ans=0.125 +2024-09-18 20:44:15,289 INFO [train.py:1198] (1/2) Epoch 29, batch 3100, loss[loss=0.2548, ctc_loss=0.1325, cr_loss=0.3755, attn_decoder_loss=0.2601, over 29220.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1231, cr_loss=0.3658, attn_decoder_loss=0.244, over 5776497.20 frames. ], batch size: 100, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:44:26,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=519200.0, ans=0.125 +2024-09-18 20:44:31,780 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.578e+01 9.222e+01 9.783e+01 2.939e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-18 20:45:02,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=519320.0, ans=0.125 +2024-09-18 20:45:29,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=519360.0, ans=0.125 +2024-09-18 20:45:33,295 INFO [train.py:1198] (1/2) Epoch 29, batch 3150, loss[loss=0.2471, ctc_loss=0.1285, cr_loss=0.3759, attn_decoder_loss=0.252, over 28852.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1227, cr_loss=0.3653, attn_decoder_loss=0.2437, over 5781857.37 frames. ], batch size: 104, lr: 3.81e-03, grad_scale: 8.0 +2024-09-18 20:45:44,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.98 vs. limit=10.0 +2024-09-18 20:45:56,381 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:46:00,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=519440.0, ans=0.1 +2024-09-18 20:46:24,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=519520.0, ans=0.2 +2024-09-18 20:46:32,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=519560.0, ans=0.1 +2024-09-18 20:46:44,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=12.0 +2024-09-18 20:46:50,882 INFO [train.py:1198] (1/2) Epoch 29, batch 3200, loss[loss=0.2367, ctc_loss=0.1191, cr_loss=0.3546, attn_decoder_loss=0.2418, over 29399.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1225, cr_loss=0.3649, attn_decoder_loss=0.2432, over 5791811.39 frames. ], batch size: 79, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:47:07,588 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.418e+01 8.919e+01 9.479e+01 2.582e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 20:47:25,584 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.09 vs. limit=15.0 +2024-09-18 20:48:07,105 INFO [train.py:1198] (1/2) Epoch 29, batch 3250, loss[loss=0.2459, ctc_loss=0.126, cr_loss=0.3699, attn_decoder_loss=0.251, over 29702.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1224, cr_loss=0.3651, attn_decoder_loss=0.2436, over 5798110.04 frames. ], batch size: 84, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:48:57,983 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.49 vs. limit=15.0 +2024-09-18 20:49:25,273 INFO [train.py:1198] (1/2) Epoch 29, batch 3300, loss[loss=0.2513, ctc_loss=0.1279, cr_loss=0.3716, attn_decoder_loss=0.2568, over 28331.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1218, cr_loss=0.3639, attn_decoder_loss=0.2427, over 5796286.30 frames. ], batch size: 111, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:49:34,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=520000.0, ans=0.125 +2024-09-18 20:49:42,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.577e+01 8.993e+01 9.559e+01 2.414e+02, threshold=1.799e+02, percent-clipped=3.0 +2024-09-18 20:49:59,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=520080.0, ans=0.0 +2024-09-18 20:50:02,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=520080.0, ans=0.125 +2024-09-18 20:50:03,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=520080.0, ans=0.025 +2024-09-18 20:50:15,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=520120.0, ans=0.0 +2024-09-18 20:50:21,749 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:50:34,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=520160.0, ans=0.025 +2024-09-18 20:50:43,269 INFO [train.py:1198] (1/2) Epoch 29, batch 3350, loss[loss=0.2495, ctc_loss=0.127, cr_loss=0.3745, attn_decoder_loss=0.2548, over 28872.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1227, cr_loss=0.3651, attn_decoder_loss=0.2434, over 5773273.16 frames. ], batch size: 104, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:50:51,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=520200.0, ans=0.125 +2024-09-18 20:51:01,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=520240.0, ans=0.0 +2024-09-18 20:51:05,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.94 vs. limit=15.0 +2024-09-18 20:51:26,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=520280.0, ans=0.125 +2024-09-18 20:51:27,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=520320.0, ans=0.0 +2024-09-18 20:51:35,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=520320.0, ans=0.125 +2024-09-18 20:51:38,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=520320.0, ans=0.125 +2024-09-18 20:51:44,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.74 vs. limit=6.0 +2024-09-18 20:51:45,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=520360.0, ans=0.125 +2024-09-18 20:51:50,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.99 vs. limit=6.0 +2024-09-18 20:51:51,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=520360.0, ans=0.0 +2024-09-18 20:51:54,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=520360.0, ans=0.125 +2024-09-18 20:51:59,172 INFO [train.py:1198] (1/2) Epoch 29, batch 3400, loss[loss=0.2139, ctc_loss=0.1053, cr_loss=0.3304, attn_decoder_loss=0.2186, over 29342.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1231, cr_loss=0.366, attn_decoder_loss=0.2438, over 5766801.83 frames. ], batch size: 67, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:52:00,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=520400.0, ans=0.125 +2024-09-18 20:52:08,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=520400.0, ans=0.1 +2024-09-18 20:52:15,975 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.276e+01 8.635e+01 9.188e+01 1.005e+02 1.629e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-18 20:52:25,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=520440.0, ans=0.0 +2024-09-18 20:52:54,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=520520.0, ans=0.125 +2024-09-18 20:52:57,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.39 vs. limit=15.0 +2024-09-18 20:53:08,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=520560.0, ans=0.125 +2024-09-18 20:53:16,960 INFO [train.py:1198] (1/2) Epoch 29, batch 3450, loss[loss=0.2514, ctc_loss=0.1266, cr_loss=0.3824, attn_decoder_loss=0.2568, over 28192.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1231, cr_loss=0.3669, attn_decoder_loss=0.2441, over 5774304.51 frames. ], batch size: 111, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:53:34,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=520640.0, ans=0.1 +2024-09-18 20:53:50,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=520680.0, ans=0.125 +2024-09-18 20:53:58,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=520680.0, ans=0.0 +2024-09-18 20:54:01,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=520720.0, ans=0.125 +2024-09-18 20:54:05,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=520720.0, ans=0.0 +2024-09-18 20:54:10,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=520720.0, ans=0.125 +2024-09-18 20:54:35,153 INFO [train.py:1198] (1/2) Epoch 29, batch 3500, loss[loss=0.2149, ctc_loss=0.1095, cr_loss=0.3385, attn_decoder_loss=0.2191, over 29334.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1227, cr_loss=0.3656, attn_decoder_loss=0.2436, over 5777093.72 frames. ], batch size: 71, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:54:35,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=520800.0, ans=0.025 +2024-09-18 20:54:35,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.54 vs. limit=15.0 +2024-09-18 20:54:39,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=520800.0, ans=0.0 +2024-09-18 20:54:53,387 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.549e+01 8.236e+01 8.769e+01 9.566e+01 1.320e+02, threshold=1.754e+02, percent-clipped=0.0 +2024-09-18 20:54:54,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.51 vs. limit=15.0 +2024-09-18 20:54:58,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=520840.0, ans=0.025 +2024-09-18 20:55:01,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=520840.0, ans=0.1 +2024-09-18 20:55:07,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=520880.0, ans=0.125 +2024-09-18 20:55:24,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=520920.0, ans=0.125 +2024-09-18 20:55:26,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=520920.0, ans=0.125 +2024-09-18 20:55:31,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.20 vs. limit=15.0 +2024-09-18 20:55:34,035 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:55:50,176 INFO [train.py:1198] (1/2) Epoch 29, batch 3550, loss[loss=0.2398, ctc_loss=0.1186, cr_loss=0.3426, attn_decoder_loss=0.2457, over 29690.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1227, cr_loss=0.3654, attn_decoder_loss=0.2436, over 5784555.59 frames. ], batch size: 89, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:55:53,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=521000.0, ans=0.125 +2024-09-18 20:55:54,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=521000.0, ans=0.1 +2024-09-18 20:55:58,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.55 vs. limit=22.5 +2024-09-18 20:56:27,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=521080.0, ans=0.125 +2024-09-18 20:56:32,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=521080.0, ans=0.125 +2024-09-18 20:56:35,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=521120.0, ans=0.1 +2024-09-18 20:56:45,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.04 vs. limit=15.0 +2024-09-18 20:56:54,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=521160.0, ans=0.125 +2024-09-18 20:57:00,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.23 vs. limit=15.0 +2024-09-18 20:57:04,190 INFO [train.py:1198] (1/2) Epoch 29, batch 3600, loss[loss=0.2274, ctc_loss=0.1099, cr_loss=0.3308, attn_decoder_loss=0.2331, over 29498.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1226, cr_loss=0.3653, attn_decoder_loss=0.2436, over 5792569.74 frames. ], batch size: 77, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:57:06,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=521200.0, ans=0.025 +2024-09-18 20:57:08,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.49 vs. limit=10.0 +2024-09-18 20:57:12,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=521200.0, ans=0.0 +2024-09-18 20:57:19,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=521240.0, ans=0.0 +2024-09-18 20:57:22,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.410e+01 8.552e+01 8.984e+01 9.474e+01 4.897e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 20:57:35,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=521280.0, ans=0.125 +2024-09-18 20:57:36,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.95 vs. limit=6.0 +2024-09-18 20:57:40,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=521280.0, ans=0.125 +2024-09-18 20:57:47,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=521320.0, ans=0.0 +2024-09-18 20:57:47,830 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 20:57:48,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=521320.0, ans=0.125 +2024-09-18 20:58:05,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=521360.0, ans=0.0 +2024-09-18 20:58:07,574 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.69 vs. limit=15.0 +2024-09-18 20:58:07,809 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-18 20:58:18,781 INFO [train.py:1198] (1/2) Epoch 29, batch 3650, loss[loss=0.2501, ctc_loss=0.1369, cr_loss=0.4036, attn_decoder_loss=0.2537, over 29498.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1222, cr_loss=0.3643, attn_decoder_loss=0.2432, over 5793761.72 frames. ], batch size: 90, lr: 3.80e-03, grad_scale: 16.0 +2024-09-18 20:58:23,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=521400.0, ans=0.125 +2024-09-18 20:58:25,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=521400.0, ans=0.0 +2024-09-18 20:58:25,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=521400.0, ans=0.0 +2024-09-18 20:58:25,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.67 vs. limit=15.0 +2024-09-18 20:58:51,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=521480.0, ans=0.125 +2024-09-18 20:59:03,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.55 vs. limit=22.5 +2024-09-18 20:59:05,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=521520.0, ans=0.0 +2024-09-18 20:59:10,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=521520.0, ans=0.125 +2024-09-18 20:59:36,072 INFO [train.py:1198] (1/2) Epoch 29, batch 3700, loss[loss=0.2372, ctc_loss=0.1128, cr_loss=0.351, attn_decoder_loss=0.2432, over 29692.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.122, cr_loss=0.3644, attn_decoder_loss=0.2433, over 5803990.18 frames. ], batch size: 84, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 20:59:48,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.57 vs. limit=15.0 +2024-09-18 20:59:55,283 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.397e+01 8.875e+01 9.405e+01 1.712e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-18 21:00:22,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=521720.0, ans=0.0 +2024-09-18 21:00:29,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=521720.0, ans=0.125 +2024-09-18 21:00:37,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=521760.0, ans=0.1 +2024-09-18 21:00:38,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=521760.0, ans=0.125 +2024-09-18 21:00:43,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=521760.0, ans=0.2 +2024-09-18 21:00:46,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=521760.0, ans=0.1 +2024-09-18 21:00:49,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=521760.0, ans=0.125 +2024-09-18 21:00:50,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=521800.0, ans=0.0 +2024-09-18 21:00:51,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.10 vs. limit=22.5 +2024-09-18 21:00:52,041 INFO [train.py:1198] (1/2) Epoch 29, batch 3750, loss[loss=0.2119, ctc_loss=0.1011, cr_loss=0.3215, attn_decoder_loss=0.217, over 29318.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1222, cr_loss=0.3649, attn_decoder_loss=0.2433, over 5807842.76 frames. ], batch size: 67, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:01:08,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=521840.0, ans=0.0 +2024-09-18 21:01:37,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=521920.0, ans=0.125 +2024-09-18 21:01:49,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=521920.0, ans=0.125 +2024-09-18 21:01:58,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=521960.0, ans=0.125 +2024-09-18 21:02:05,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=522000.0, ans=0.125 +2024-09-18 21:02:06,642 INFO [train.py:1198] (1/2) Epoch 29, batch 3800, loss[loss=0.2499, ctc_loss=0.1242, cr_loss=0.3657, attn_decoder_loss=0.2557, over 29651.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1219, cr_loss=0.3641, attn_decoder_loss=0.2429, over 5799208.46 frames. ], batch size: 86, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:02:14,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=522000.0, ans=0.025 +2024-09-18 21:02:18,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=522000.0, ans=0.125 +2024-09-18 21:02:25,918 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.282e+01 8.472e+01 8.859e+01 9.703e+01 1.383e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-18 21:02:33,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=522040.0, ans=0.1 +2024-09-18 21:02:40,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.93 vs. limit=12.0 +2024-09-18 21:02:45,754 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:02:50,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=522120.0, ans=0.2 +2024-09-18 21:02:54,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=522120.0, ans=0.1 +2024-09-18 21:03:01,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=522120.0, ans=0.2 +2024-09-18 21:03:20,793 INFO [train.py:1198] (1/2) Epoch 29, batch 3850, loss[loss=0.2517, ctc_loss=0.1322, cr_loss=0.3854, attn_decoder_loss=0.2565, over 29249.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1219, cr_loss=0.3642, attn_decoder_loss=0.2428, over 5812424.21 frames. ], batch size: 100, lr: 3.80e-03, grad_scale: 8.0 +2024-09-18 21:03:52,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=522280.0, ans=0.0 +2024-09-18 21:03:53,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=522280.0, ans=0.0 +2024-09-18 21:03:56,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.69 vs. limit=15.0 +2024-09-18 21:04:19,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=522360.0, ans=0.1 +2024-09-18 21:04:37,290 INFO [train.py:1198] (1/2) Epoch 29, batch 3900, loss[loss=0.2449, ctc_loss=0.1179, cr_loss=0.3476, attn_decoder_loss=0.2513, over 29625.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1223, cr_loss=0.3645, attn_decoder_loss=0.2434, over 5816619.43 frames. ], batch size: 86, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:04:42,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=522400.0, ans=0.2 +2024-09-18 21:04:50,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=522440.0, ans=0.07 +2024-09-18 21:04:56,262 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.564e+01 8.663e+01 9.050e+01 9.576e+01 3.697e+02, threshold=1.810e+02, percent-clipped=2.0 +2024-09-18 21:04:56,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=522440.0, ans=0.125 +2024-09-18 21:04:58,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=522440.0, ans=0.0 +2024-09-18 21:05:04,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.21 vs. limit=15.0 +2024-09-18 21:05:06,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=522480.0, ans=0.0 +2024-09-18 21:05:18,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=522480.0, ans=0.2 +2024-09-18 21:05:42,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=522560.0, ans=0.2 +2024-09-18 21:05:52,782 INFO [train.py:1198] (1/2) Epoch 29, batch 3950, loss[loss=0.2587, ctc_loss=0.1389, cr_loss=0.398, attn_decoder_loss=0.2631, over 29491.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1227, cr_loss=0.3658, attn_decoder_loss=0.2438, over 5836097.34 frames. ], batch size: 97, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:06:02,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=522600.0, ans=0.125 +2024-09-18 21:06:04,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=522600.0, ans=0.125 +2024-09-18 21:06:07,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.31 vs. limit=8.0 +2024-09-18 21:06:07,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=522640.0, ans=0.0 +2024-09-18 21:06:15,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=522640.0, ans=0.125 +2024-09-18 21:06:19,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=522640.0, ans=0.125 +2024-09-18 21:06:30,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.54 vs. limit=22.5 +2024-09-18 21:06:33,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-18 21:06:37,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=522720.0, ans=0.1 +2024-09-18 21:06:41,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=522720.0, ans=0.5 +2024-09-18 21:07:00,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.40 vs. limit=22.5 +2024-09-18 21:07:06,729 INFO [train.py:1198] (1/2) Epoch 29, batch 4000, loss[loss=0.2171, ctc_loss=0.0954, cr_loss=0.3056, attn_decoder_loss=0.2238, over 29513.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1229, cr_loss=0.3658, attn_decoder_loss=0.244, over 5813547.02 frames. ], batch size: 74, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:07:11,877 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.50 vs. limit=15.0 +2024-09-18 21:07:14,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=522800.0, ans=0.125 +2024-09-18 21:07:25,858 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.544e+01 9.038e+01 9.843e+01 4.905e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-18 21:07:34,257 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.90 vs. limit=15.0 +2024-09-18 21:07:59,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=522920.0, ans=0.0 +2024-09-18 21:08:05,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=522960.0, ans=0.125 +2024-09-18 21:08:10,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.00 vs. limit=6.0 +2024-09-18 21:08:16,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=522960.0, ans=0.0 +2024-09-18 21:08:21,176 INFO [train.py:1198] (1/2) Epoch 29, batch 4050, loss[loss=0.2641, ctc_loss=0.1535, cr_loss=0.3875, attn_decoder_loss=0.2677, over 20721.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1229, cr_loss=0.3657, attn_decoder_loss=0.2438, over 5796716.83 frames. ], batch size: 209, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:08:21,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=523000.0, ans=0.1 +2024-09-18 21:08:21,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=523000.0, ans=0.125 +2024-09-18 21:08:43,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=523040.0, ans=0.1 +2024-09-18 21:08:48,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=523080.0, ans=0.125 +2024-09-18 21:08:50,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=523080.0, ans=0.125 +2024-09-18 21:08:50,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=523080.0, ans=0.07 +2024-09-18 21:09:05,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=523120.0, ans=0.125 +2024-09-18 21:09:18,481 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:09:27,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=523160.0, ans=0.95 +2024-09-18 21:09:30,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=523160.0, ans=0.125 +2024-09-18 21:09:36,357 INFO [train.py:1198] (1/2) Epoch 29, batch 4100, loss[loss=0.2576, ctc_loss=0.1415, cr_loss=0.4181, attn_decoder_loss=0.2613, over 29520.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1233, cr_loss=0.3661, attn_decoder_loss=0.2441, over 5791037.28 frames. ], batch size: 90, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:09:45,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=523200.0, ans=0.125 +2024-09-18 21:09:47,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=523200.0, ans=0.0 +2024-09-18 21:09:56,914 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.375e+01 8.631e+01 9.111e+01 9.616e+01 2.001e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-18 21:10:00,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=523240.0, ans=0.0 +2024-09-18 21:10:01,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=523240.0, ans=0.1 +2024-09-18 21:10:21,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.20 vs. limit=6.0 +2024-09-18 21:10:21,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=523320.0, ans=0.025 +2024-09-18 21:10:31,371 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.33 vs. limit=15.0 +2024-09-18 21:10:43,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=523360.0, ans=0.125 +2024-09-18 21:10:51,014 INFO [train.py:1198] (1/2) Epoch 29, batch 4150, loss[loss=0.2332, ctc_loss=0.1188, cr_loss=0.36, attn_decoder_loss=0.238, over 29504.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1229, cr_loss=0.3656, attn_decoder_loss=0.2435, over 5797260.69 frames. ], batch size: 77, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:11:06,884 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.35 vs. limit=15.0 +2024-09-18 21:11:19,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.21 vs. limit=15.0 +2024-09-18 21:11:31,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=523480.0, ans=0.1 +2024-09-18 21:11:33,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.46 vs. limit=12.0 +2024-09-18 21:11:44,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.88 vs. limit=15.0 +2024-09-18 21:11:48,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=523560.0, ans=0.07 +2024-09-18 21:11:50,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=523560.0, ans=0.0 +2024-09-18 21:11:59,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=523560.0, ans=0.2 +2024-09-18 21:12:04,658 INFO [train.py:1198] (1/2) Epoch 29, batch 4200, loss[loss=0.2502, ctc_loss=0.1336, cr_loss=0.4008, attn_decoder_loss=0.2543, over 29490.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1232, cr_loss=0.3668, attn_decoder_loss=0.244, over 5799243.91 frames. ], batch size: 90, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:12:04,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=523600.0, ans=0.125 +2024-09-18 21:12:09,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=523600.0, ans=0.09899494936611666 +2024-09-18 21:12:25,444 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.320e+01 8.488e+01 8.959e+01 9.406e+01 1.586e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 21:12:40,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=523680.0, ans=0.125 +2024-09-18 21:13:03,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=523760.0, ans=0.1 +2024-09-18 21:13:19,316 INFO [train.py:1198] (1/2) Epoch 29, batch 4250, loss[loss=0.2184, ctc_loss=0.1024, cr_loss=0.3201, attn_decoder_loss=0.2242, over 29526.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1228, cr_loss=0.3657, attn_decoder_loss=0.244, over 5805434.25 frames. ], batch size: 74, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:13:25,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.55 vs. limit=10.0 +2024-09-18 21:13:34,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.48 vs. limit=15.0 +2024-09-18 21:13:38,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.37 vs. limit=22.5 +2024-09-18 21:13:54,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=523880.0, ans=0.125 +2024-09-18 21:14:06,148 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:14:12,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.23 vs. limit=15.0 +2024-09-18 21:14:23,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=523960.0, ans=0.0 +2024-09-18 21:14:33,932 INFO [train.py:1198] (1/2) Epoch 29, batch 4300, loss[loss=0.237, ctc_loss=0.1177, cr_loss=0.3507, attn_decoder_loss=0.2424, over 29518.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1224, cr_loss=0.365, attn_decoder_loss=0.244, over 5794558.48 frames. ], batch size: 87, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:14:49,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=524040.0, ans=0.125 +2024-09-18 21:14:54,741 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.701e+01 8.566e+01 9.093e+01 9.563e+01 1.622e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-18 21:15:00,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=524040.0, ans=0.1 +2024-09-18 21:15:09,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=524080.0, ans=0.125 +2024-09-18 21:15:11,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=524080.0, ans=0.0 +2024-09-18 21:15:20,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=524120.0, ans=0.2 +2024-09-18 21:15:23,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=524120.0, ans=0.125 +2024-09-18 21:15:26,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=524120.0, ans=0.0 +2024-09-18 21:15:48,232 INFO [train.py:1198] (1/2) Epoch 29, batch 4350, loss[loss=0.254, ctc_loss=0.1308, cr_loss=0.3824, attn_decoder_loss=0.2591, over 29446.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1253, cr_loss=0.3706, attn_decoder_loss=0.2472, over 5796462.84 frames. ], batch size: 97, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:15:53,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=524200.0, ans=0.125 +2024-09-18 21:16:13,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=524240.0, ans=0.125 +2024-09-18 21:16:32,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-18 21:16:46,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=524360.0, ans=0.0 +2024-09-18 21:16:52,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=524360.0, ans=0.2 +2024-09-18 21:17:02,836 INFO [train.py:1198] (1/2) Epoch 29, batch 4400, loss[loss=0.2588, ctc_loss=0.1443, cr_loss=0.3954, attn_decoder_loss=0.2627, over 27285.00 frames. ], tot_loss[loss=0.2448, ctc_loss=0.127, cr_loss=0.3742, attn_decoder_loss=0.2496, over 5767331.29 frames. ], batch size: 124, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:17:06,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.74 vs. limit=22.5 +2024-09-18 21:17:14,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=524400.0, ans=0.0 +2024-09-18 21:17:23,260 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.178e+01 8.977e+01 9.367e+01 9.862e+01 3.705e+02, threshold=1.873e+02, percent-clipped=1.0 +2024-09-18 21:17:31,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=524480.0, ans=0.125 +2024-09-18 21:17:59,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=524520.0, ans=0.2 +2024-09-18 21:17:59,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=524520.0, ans=0.1 +2024-09-18 21:18:12,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=524560.0, ans=0.1 +2024-09-18 21:18:16,792 INFO [train.py:1198] (1/2) Epoch 29, batch 4450, loss[loss=0.2625, ctc_loss=0.1587, cr_loss=0.4126, attn_decoder_loss=0.2649, over 20399.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1307, cr_loss=0.3786, attn_decoder_loss=0.2519, over 5574456.43 frames. ], batch size: 209, lr: 3.79e-03, grad_scale: 16.0 +2024-09-18 21:18:18,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=524600.0, ans=0.025 +2024-09-18 21:18:20,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=524600.0, ans=0.125 +2024-09-18 21:18:30,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=524600.0, ans=0.125 +2024-09-18 21:18:31,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=524640.0, ans=0.1 +2024-09-18 21:18:47,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.32 vs. limit=22.5 +2024-09-18 21:19:10,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=524720.0, ans=0.07 +2024-09-18 21:19:12,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=524720.0, ans=0.125 +2024-09-18 21:19:18,866 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.28 vs. limit=15.0 +2024-09-18 21:19:28,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=524760.0, ans=0.125 +2024-09-18 21:19:33,077 INFO [train.py:1198] (1/2) Epoch 29, batch 4500, loss[loss=0.2547, ctc_loss=0.1384, cr_loss=0.3638, attn_decoder_loss=0.2595, over 20395.00 frames. ], tot_loss[loss=0.2495, ctc_loss=0.1342, cr_loss=0.3812, attn_decoder_loss=0.2539, over 5235346.35 frames. ], batch size: 209, lr: 3.79e-03, grad_scale: 8.0 +2024-09-18 21:19:55,853 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.478e+01 1.036e+02 1.116e+02 1.208e+02 3.141e+02, threshold=2.233e+02, percent-clipped=1.0 +2024-09-18 21:21:05,422 INFO [train.py:1198] (1/2) Epoch 30, batch 0, loss[loss=0.2251, ctc_loss=0.1143, cr_loss=0.352, attn_decoder_loss=0.2296, over 29627.00 frames. ], tot_loss[loss=0.2251, ctc_loss=0.1143, cr_loss=0.352, attn_decoder_loss=0.2296, over 29627.00 frames. ], batch size: 73, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:21:05,422 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 21:21:23,762 INFO [train.py:1230] (1/2) Epoch 30, validation: loss=0.2119, ctc_loss=0.03754, cr_loss=5.775e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-18 21:21:23,762 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 21:21:27,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=524900.0, ans=0.2 +2024-09-18 21:21:53,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=12.49 vs. limit=15.0 +2024-09-18 21:21:55,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=524980.0, ans=0.05 +2024-09-18 21:22:10,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=525020.0, ans=0.0 +2024-09-18 21:22:12,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=525020.0, ans=0.125 +2024-09-18 21:22:17,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=8.90 vs. limit=15.0 +2024-09-18 21:22:37,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=525060.0, ans=0.125 +2024-09-18 21:22:40,141 INFO [train.py:1198] (1/2) Epoch 30, batch 50, loss[loss=0.2195, ctc_loss=0.1123, cr_loss=0.3279, attn_decoder_loss=0.2241, over 29436.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1246, cr_loss=0.3698, attn_decoder_loss=0.2453, over 1267680.18 frames. ], batch size: 70, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:22:42,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.44 vs. limit=15.0 +2024-09-18 21:22:42,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.94 vs. limit=15.0 +2024-09-18 21:22:49,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=525100.0, ans=0.125 +2024-09-18 21:22:57,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.60 vs. limit=22.5 +2024-09-18 21:23:36,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=525220.0, ans=0.0 +2024-09-18 21:23:39,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=525260.0, ans=0.125 +2024-09-18 21:23:42,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.716e+01 8.848e+01 9.545e+01 1.010e+02 1.497e+02, threshold=1.909e+02, percent-clipped=0.0 +2024-09-18 21:23:54,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=525300.0, ans=0.0 +2024-09-18 21:23:56,174 INFO [train.py:1198] (1/2) Epoch 30, batch 100, loss[loss=0.2284, ctc_loss=0.1143, cr_loss=0.348, attn_decoder_loss=0.2334, over 29516.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.125, cr_loss=0.3688, attn_decoder_loss=0.2463, over 2251336.72 frames. ], batch size: 76, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:24:09,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=525340.0, ans=0.125 +2024-09-18 21:24:14,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=525340.0, ans=0.07 +2024-09-18 21:24:21,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=525340.0, ans=0.125 +2024-09-18 21:24:29,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=525380.0, ans=0.0 +2024-09-18 21:24:42,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=525420.0, ans=0.125 +2024-09-18 21:24:42,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=525420.0, ans=0.09899494936611666 +2024-09-18 21:24:44,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.61 vs. limit=15.0 +2024-09-18 21:24:49,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=525420.0, ans=0.125 +2024-09-18 21:24:49,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=525420.0, ans=0.0 +2024-09-18 21:24:51,544 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.5 +2024-09-18 21:25:13,043 INFO [train.py:1198] (1/2) Epoch 30, batch 150, loss[loss=0.2232, ctc_loss=0.1089, cr_loss=0.3545, attn_decoder_loss=0.228, over 29424.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.122, cr_loss=0.3635, attn_decoder_loss=0.2437, over 3046267.96 frames. ], batch size: 70, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:25:22,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=525500.0, ans=0.0 +2024-09-18 21:26:02,905 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-18 21:26:06,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.62 vs. limit=15.0 +2024-09-18 21:26:10,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=525620.0, ans=0.95 +2024-09-18 21:26:17,348 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.425e+01 8.976e+01 9.725e+01 1.408e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-18 21:26:23,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=525660.0, ans=0.1 +2024-09-18 21:26:28,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=525660.0, ans=0.95 +2024-09-18 21:26:28,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.67 vs. limit=15.0 +2024-09-18 21:26:30,927 INFO [train.py:1198] (1/2) Epoch 30, batch 200, loss[loss=0.2491, ctc_loss=0.1255, cr_loss=0.3661, attn_decoder_loss=0.2547, over 27491.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1213, cr_loss=0.3628, attn_decoder_loss=0.243, over 3657753.43 frames. ], batch size: 125, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:26:31,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=525700.0, ans=0.025 +2024-09-18 21:26:58,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=525740.0, ans=0.0 +2024-09-18 21:27:01,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=525780.0, ans=0.2 +2024-09-18 21:27:07,920 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.81 vs. limit=15.0 +2024-09-18 21:27:16,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=525820.0, ans=0.125 +2024-09-18 21:27:19,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=525820.0, ans=0.2 +2024-09-18 21:27:41,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=525860.0, ans=0.125 +2024-09-18 21:27:46,550 INFO [train.py:1198] (1/2) Epoch 30, batch 250, loss[loss=0.2518, ctc_loss=0.1305, cr_loss=0.3897, attn_decoder_loss=0.2566, over 29233.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1212, cr_loss=0.363, attn_decoder_loss=0.2428, over 4138179.76 frames. ], batch size: 100, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:27:54,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=525900.0, ans=0.1 +2024-09-18 21:28:02,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.36 vs. limit=15.0 +2024-09-18 21:28:16,050 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.45 vs. limit=10.0 +2024-09-18 21:28:17,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=12.74 vs. limit=15.0 +2024-09-18 21:28:24,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=525980.0, ans=0.0 +2024-09-18 21:28:29,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=525980.0, ans=0.0 +2024-09-18 21:28:50,799 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.169e+01 8.439e+01 8.914e+01 9.350e+01 1.362e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-18 21:28:57,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=526060.0, ans=0.125 +2024-09-18 21:29:04,750 INFO [train.py:1198] (1/2) Epoch 30, batch 300, loss[loss=0.2538, ctc_loss=0.1315, cr_loss=0.3778, attn_decoder_loss=0.259, over 29553.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1211, cr_loss=0.3628, attn_decoder_loss=0.2425, over 4507189.15 frames. ], batch size: 92, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:29:24,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=526140.0, ans=0.125 +2024-09-18 21:29:39,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=526180.0, ans=0.0 +2024-09-18 21:29:54,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=526220.0, ans=0.1 +2024-09-18 21:30:00,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=526220.0, ans=0.125 +2024-09-18 21:30:15,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=526260.0, ans=0.125 +2024-09-18 21:30:22,667 INFO [train.py:1198] (1/2) Epoch 30, batch 350, loss[loss=0.2175, ctc_loss=0.1158, cr_loss=0.3606, attn_decoder_loss=0.2208, over 29349.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1217, cr_loss=0.3643, attn_decoder_loss=0.2432, over 4794330.41 frames. ], batch size: 71, lr: 3.72e-03, grad_scale: 8.0 +2024-09-18 21:30:58,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.51 vs. limit=15.0 +2024-09-18 21:30:59,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=526380.0, ans=0.125 +2024-09-18 21:31:07,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=526420.0, ans=0.125 +2024-09-18 21:31:07,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=526420.0, ans=0.125 +2024-09-18 21:31:19,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=526420.0, ans=0.125 +2024-09-18 21:31:24,895 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.639e+01 9.253e+01 9.920e+01 3.039e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-18 21:31:38,412 INFO [train.py:1198] (1/2) Epoch 30, batch 400, loss[loss=0.2489, ctc_loss=0.127, cr_loss=0.3836, attn_decoder_loss=0.2539, over 29702.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1215, cr_loss=0.3632, attn_decoder_loss=0.243, over 5024184.53 frames. ], batch size: 82, lr: 3.72e-03, grad_scale: 16.0 +2024-09-18 21:31:54,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=526540.0, ans=0.0 +2024-09-18 21:31:57,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-18 21:32:33,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=526620.0, ans=0.0 +2024-09-18 21:32:39,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=526660.0, ans=0.025 +2024-09-18 21:32:42,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=526660.0, ans=0.5 +2024-09-18 21:32:56,035 INFO [train.py:1198] (1/2) Epoch 30, batch 450, loss[loss=0.2463, ctc_loss=0.1277, cr_loss=0.3638, attn_decoder_loss=0.2514, over 29697.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1213, cr_loss=0.363, attn_decoder_loss=0.2429, over 5184654.23 frames. ], batch size: 83, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:33:00,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.33 vs. limit=22.5 +2024-09-18 21:33:13,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.17 vs. limit=10.0 +2024-09-18 21:33:35,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=526780.0, ans=0.125 +2024-09-18 21:33:48,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=526820.0, ans=0.0 +2024-09-18 21:33:49,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.51 vs. limit=15.0 +2024-09-18 21:33:54,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=526820.0, ans=0.125 +2024-09-18 21:33:57,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=526860.0, ans=0.125 +2024-09-18 21:34:01,944 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.512e+01 8.936e+01 9.488e+01 1.864e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-18 21:34:13,945 INFO [train.py:1198] (1/2) Epoch 30, batch 500, loss[loss=0.2664, ctc_loss=0.1386, cr_loss=0.4079, attn_decoder_loss=0.2715, over 29431.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1209, cr_loss=0.3619, attn_decoder_loss=0.2423, over 5329203.37 frames. ], batch size: 94, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:34:44,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=526980.0, ans=0.0 +2024-09-18 21:34:46,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.75 vs. limit=22.5 +2024-09-18 21:34:54,087 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.79 vs. limit=22.5 +2024-09-18 21:34:54,605 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.31 vs. limit=15.0 +2024-09-18 21:34:59,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.86 vs. limit=22.5 +2024-09-18 21:35:05,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.84 vs. limit=15.0 +2024-09-18 21:35:07,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.05 vs. limit=15.0 +2024-09-18 21:35:30,013 INFO [train.py:1198] (1/2) Epoch 30, batch 550, loss[loss=0.2537, ctc_loss=0.132, cr_loss=0.3811, attn_decoder_loss=0.2588, over 28805.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1211, cr_loss=0.3619, attn_decoder_loss=0.2426, over 5421609.90 frames. ], batch size: 104, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:35:38,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=527100.0, ans=0.025 +2024-09-18 21:35:41,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=527100.0, ans=0.0 +2024-09-18 21:35:42,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=527100.0, ans=0.025 +2024-09-18 21:36:10,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=527180.0, ans=0.0 +2024-09-18 21:36:23,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=527220.0, ans=0.2 +2024-09-18 21:36:31,165 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.98 vs. limit=22.5 +2024-09-18 21:36:33,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=527260.0, ans=0.1 +2024-09-18 21:36:35,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=23.20 vs. limit=22.5 +2024-09-18 21:36:36,365 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.634e+01 8.972e+01 9.427e+01 2.186e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-18 21:36:48,692 INFO [train.py:1198] (1/2) Epoch 30, batch 600, loss[loss=0.2538, ctc_loss=0.1251, cr_loss=0.377, attn_decoder_loss=0.2597, over 29286.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1215, cr_loss=0.3631, attn_decoder_loss=0.2429, over 5509463.65 frames. ], batch size: 100, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:36:58,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=527300.0, ans=0.2 +2024-09-18 21:36:59,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=527300.0, ans=0.125 +2024-09-18 21:37:03,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.49 vs. limit=15.0 +2024-09-18 21:37:14,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=527340.0, ans=0.125 +2024-09-18 21:37:19,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=527380.0, ans=0.1 +2024-09-18 21:37:20,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=527380.0, ans=0.125 +2024-09-18 21:37:40,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=527420.0, ans=0.2 +2024-09-18 21:37:42,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.24 vs. limit=15.0 +2024-09-18 21:38:00,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.22 vs. limit=8.0 +2024-09-18 21:38:06,366 INFO [train.py:1198] (1/2) Epoch 30, batch 650, loss[loss=0.2438, ctc_loss=0.1333, cr_loss=0.3849, attn_decoder_loss=0.2475, over 29767.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1209, cr_loss=0.3616, attn_decoder_loss=0.2423, over 5586540.67 frames. ], batch size: 81, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:38:13,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.77 vs. limit=12.0 +2024-09-18 21:38:34,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=527540.0, ans=10.0 +2024-09-18 21:39:09,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.386e+01 8.897e+01 9.302e+01 1.225e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-18 21:39:21,804 INFO [train.py:1198] (1/2) Epoch 30, batch 700, loss[loss=0.2333, ctc_loss=0.1191, cr_loss=0.3591, attn_decoder_loss=0.238, over 29530.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1216, cr_loss=0.3629, attn_decoder_loss=0.243, over 5638247.31 frames. ], batch size: 76, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:39:36,344 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-18 21:39:41,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=527740.0, ans=0.125 +2024-09-18 21:39:55,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=527780.0, ans=0.04949747468305833 +2024-09-18 21:40:04,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.06 vs. limit=15.0 +2024-09-18 21:40:22,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.14 vs. limit=10.0 +2024-09-18 21:40:26,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=527860.0, ans=0.125 +2024-09-18 21:40:39,931 INFO [train.py:1198] (1/2) Epoch 30, batch 750, loss[loss=0.2421, ctc_loss=0.1255, cr_loss=0.382, attn_decoder_loss=0.2466, over 29732.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1218, cr_loss=0.3638, attn_decoder_loss=0.2429, over 5676724.93 frames. ], batch size: 82, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:40:52,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=527900.0, ans=0.2 +2024-09-18 21:40:53,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=527940.0, ans=0.2 +2024-09-18 21:41:05,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=527940.0, ans=0.0 +2024-09-18 21:41:47,174 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=528020.0, ans=0.125 +2024-09-18 21:41:52,808 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.561e+01 8.909e+01 9.515e+01 3.316e+02, threshold=1.782e+02, percent-clipped=2.0 +2024-09-18 21:42:04,963 INFO [train.py:1198] (1/2) Epoch 30, batch 800, loss[loss=0.2237, ctc_loss=0.1106, cr_loss=0.3514, attn_decoder_loss=0.2285, over 29595.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1214, cr_loss=0.3633, attn_decoder_loss=0.2428, over 5706231.41 frames. ], batch size: 73, lr: 3.71e-03, grad_scale: 16.0 +2024-09-18 21:42:14,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=528100.0, ans=0.0 +2024-09-18 21:42:15,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=528100.0, ans=0.125 +2024-09-18 21:42:29,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=528140.0, ans=0.05 +2024-09-18 21:42:31,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.03 vs. limit=10.0 +2024-09-18 21:42:32,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=528140.0, ans=0.125 +2024-09-18 21:42:35,676 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.57 vs. limit=15.0 +2024-09-18 21:42:39,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=528180.0, ans=0.1 +2024-09-18 21:42:52,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.87 vs. limit=10.0 +2024-09-18 21:42:57,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.19 vs. limit=12.0 +2024-09-18 21:43:06,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=528260.0, ans=0.125 +2024-09-18 21:43:20,043 INFO [train.py:1198] (1/2) Epoch 30, batch 850, loss[loss=0.252, ctc_loss=0.1352, cr_loss=0.4062, attn_decoder_loss=0.256, over 29699.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1212, cr_loss=0.3629, attn_decoder_loss=0.2425, over 5735070.66 frames. ], batch size: 89, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:43:20,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=528300.0, ans=0.125 +2024-09-18 21:43:25,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.60 vs. limit=10.0 +2024-09-18 21:43:27,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.57 vs. limit=15.0 +2024-09-18 21:43:29,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=528300.0, ans=0.125 +2024-09-18 21:43:39,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=528340.0, ans=0.0 +2024-09-18 21:44:04,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=528420.0, ans=0.125 +2024-09-18 21:44:06,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-18 21:44:14,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=528420.0, ans=0.125 +2024-09-18 21:44:27,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.440e+01 8.960e+01 9.629e+01 1.513e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-18 21:44:32,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=528460.0, ans=0.2 +2024-09-18 21:44:33,001 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.70 vs. limit=6.0 +2024-09-18 21:44:37,925 INFO [train.py:1198] (1/2) Epoch 30, batch 900, loss[loss=0.218, ctc_loss=0.09342, cr_loss=0.3088, attn_decoder_loss=0.225, over 29613.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1213, cr_loss=0.3633, attn_decoder_loss=0.2427, over 5740393.95 frames. ], batch size: 73, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:44:40,246 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.38 vs. limit=15.0 +2024-09-18 21:44:44,097 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:44:44,853 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.37 vs. limit=15.0 +2024-09-18 21:44:45,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=528500.0, ans=0.125 +2024-09-18 21:44:59,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=528540.0, ans=0.025 +2024-09-18 21:45:09,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.89 vs. limit=15.0 +2024-09-18 21:45:12,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=528580.0, ans=0.125 +2024-09-18 21:45:14,483 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:45:16,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-18 21:45:29,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=528620.0, ans=0.125 +2024-09-18 21:45:29,949 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=12.0 +2024-09-18 21:45:32,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=528620.0, ans=0.05 +2024-09-18 21:45:34,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=528620.0, ans=0.125 +2024-09-18 21:45:42,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=528660.0, ans=0.0 +2024-09-18 21:45:55,576 INFO [train.py:1198] (1/2) Epoch 30, batch 950, loss[loss=0.2227, ctc_loss=0.1041, cr_loss=0.3317, attn_decoder_loss=0.2285, over 29511.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1211, cr_loss=0.3626, attn_decoder_loss=0.2428, over 5742400.31 frames. ], batch size: 74, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:46:05,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.59 vs. limit=12.0 +2024-09-18 21:46:29,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=528780.0, ans=0.125 +2024-09-18 21:46:38,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=528780.0, ans=0.125 +2024-09-18 21:46:42,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=528820.0, ans=0.125 +2024-09-18 21:47:00,630 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.476e+01 9.054e+01 9.594e+01 4.825e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-18 21:47:07,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.00 vs. limit=15.0 +2024-09-18 21:47:08,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=528860.0, ans=0.1 +2024-09-18 21:47:08,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=528860.0, ans=0.125 +2024-09-18 21:47:09,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=528900.0, ans=0.125 +2024-09-18 21:47:11,027 INFO [train.py:1198] (1/2) Epoch 30, batch 1000, loss[loss=0.234, ctc_loss=0.117, cr_loss=0.3595, attn_decoder_loss=0.239, over 29508.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1223, cr_loss=0.3645, attn_decoder_loss=0.2438, over 5736566.78 frames. ], batch size: 77, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:47:11,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.69 vs. limit=15.0 +2024-09-18 21:47:15,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=528900.0, ans=0.0 +2024-09-18 21:47:29,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=528940.0, ans=0.125 +2024-09-18 21:47:44,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=528980.0, ans=0.125 +2024-09-18 21:48:09,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=529020.0, ans=0.0 +2024-09-18 21:48:28,828 INFO [train.py:1198] (1/2) Epoch 30, batch 1050, loss[loss=0.2509, ctc_loss=0.1225, cr_loss=0.3755, attn_decoder_loss=0.2568, over 29673.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1219, cr_loss=0.3642, attn_decoder_loss=0.243, over 5745340.70 frames. ], batch size: 85, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:48:43,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.22 vs. limit=10.0 +2024-09-18 21:49:11,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.37 vs. limit=15.0 +2024-09-18 21:49:20,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=529220.0, ans=0.1 +2024-09-18 21:49:25,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=529220.0, ans=0.125 +2024-09-18 21:49:29,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=529260.0, ans=0.125 +2024-09-18 21:49:36,319 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.145e+01 8.411e+01 8.824e+01 9.446e+01 1.337e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-18 21:49:46,959 INFO [train.py:1198] (1/2) Epoch 30, batch 1100, loss[loss=0.2425, ctc_loss=0.1232, cr_loss=0.3848, attn_decoder_loss=0.2473, over 29447.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1214, cr_loss=0.3635, attn_decoder_loss=0.2429, over 5757235.76 frames. ], batch size: 78, lr: 3.71e-03, grad_scale: 8.0 +2024-09-18 21:49:48,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=529300.0, ans=0.2 +2024-09-18 21:50:00,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=529340.0, ans=0.1 +2024-09-18 21:50:07,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=529340.0, ans=0.0 +2024-09-18 21:50:14,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.17 vs. limit=15.0 +2024-09-18 21:50:15,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=529380.0, ans=0.125 +2024-09-18 21:51:02,521 INFO [train.py:1198] (1/2) Epoch 30, batch 1150, loss[loss=0.2241, ctc_loss=0.1063, cr_loss=0.3191, attn_decoder_loss=0.2301, over 29436.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1216, cr_loss=0.364, attn_decoder_loss=0.243, over 5755091.72 frames. ], batch size: 78, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:51:07,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=529500.0, ans=0.0 +2024-09-18 21:51:21,676 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.93 vs. limit=15.0 +2024-09-18 21:51:33,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=529580.0, ans=0.0 +2024-09-18 21:51:44,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=529580.0, ans=0.125 +2024-09-18 21:51:55,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.22 vs. limit=22.5 +2024-09-18 21:52:06,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=529660.0, ans=0.09899494936611666 +2024-09-18 21:52:10,739 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.491e+01 9.020e+01 1.005e+02 1.994e+02, threshold=1.804e+02, percent-clipped=1.0 +2024-09-18 21:52:21,428 INFO [train.py:1198] (1/2) Epoch 30, batch 1200, loss[loss=0.2446, ctc_loss=0.1167, cr_loss=0.3614, attn_decoder_loss=0.2508, over 29670.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1221, cr_loss=0.3648, attn_decoder_loss=0.2438, over 5747435.67 frames. ], batch size: 85, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:52:28,251 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.53 vs. limit=15.0 +2024-09-18 21:52:38,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=529740.0, ans=0.125 +2024-09-18 21:52:52,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=529780.0, ans=0.2 +2024-09-18 21:52:55,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=529780.0, ans=0.1 +2024-09-18 21:52:59,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=529780.0, ans=0.0 +2024-09-18 21:53:09,721 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.09 vs. limit=15.0 +2024-09-18 21:53:15,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=529820.0, ans=0.125 +2024-09-18 21:53:15,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=529820.0, ans=10.0 +2024-09-18 21:53:33,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.24 vs. limit=15.0 +2024-09-18 21:53:37,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=529860.0, ans=0.125 +2024-09-18 21:53:40,008 INFO [train.py:1198] (1/2) Epoch 30, batch 1250, loss[loss=0.266, ctc_loss=0.1551, cr_loss=0.4598, attn_decoder_loss=0.2681, over 29528.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1227, cr_loss=0.3666, attn_decoder_loss=0.2444, over 5775556.79 frames. ], batch size: 92, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:53:51,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=529900.0, ans=0.2 +2024-09-18 21:54:21,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=529980.0, ans=0.2 +2024-09-18 21:54:21,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=529980.0, ans=0.1 +2024-09-18 21:54:36,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=530020.0, ans=0.125 +2024-09-18 21:54:41,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=530060.0, ans=0.0 +2024-09-18 21:54:45,401 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.969e+01 8.456e+01 8.891e+01 9.459e+01 2.793e+02, threshold=1.778e+02, percent-clipped=2.0 +2024-09-18 21:54:55,964 INFO [train.py:1198] (1/2) Epoch 30, batch 1300, loss[loss=0.2538, ctc_loss=0.1328, cr_loss=0.3903, attn_decoder_loss=0.2585, over 28425.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1222, cr_loss=0.3646, attn_decoder_loss=0.2438, over 5779795.74 frames. ], batch size: 112, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:54:58,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.00 vs. limit=15.0 +2024-09-18 21:55:05,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=530100.0, ans=0.025 +2024-09-18 21:55:31,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=530180.0, ans=0.125 +2024-09-18 21:56:14,323 INFO [train.py:1198] (1/2) Epoch 30, batch 1350, loss[loss=0.2355, ctc_loss=0.1209, cr_loss=0.3638, attn_decoder_loss=0.2401, over 29772.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1219, cr_loss=0.3646, attn_decoder_loss=0.2434, over 5795482.34 frames. ], batch size: 81, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 21:56:27,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=530340.0, ans=0.125 +2024-09-18 21:57:04,664 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:57:04,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=530420.0, ans=0.2 +2024-09-18 21:57:04,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=530420.0, ans=0.1 +2024-09-18 21:57:07,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=530420.0, ans=0.125 +2024-09-18 21:57:10,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=530420.0, ans=0.2 +2024-09-18 21:57:20,872 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.385e+01 8.847e+01 9.362e+01 1.529e+02, threshold=1.769e+02, percent-clipped=0.0 +2024-09-18 21:57:21,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=530460.0, ans=0.125 +2024-09-18 21:57:30,075 INFO [train.py:1198] (1/2) Epoch 30, batch 1400, loss[loss=0.2138, ctc_loss=0.1015, cr_loss=0.3271, attn_decoder_loss=0.219, over 29572.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1217, cr_loss=0.3641, attn_decoder_loss=0.243, over 5806774.11 frames. ], batch size: 69, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:57:49,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=530540.0, ans=0.125 +2024-09-18 21:57:58,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=530540.0, ans=0.125 +2024-09-18 21:58:07,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=530580.0, ans=0.125 +2024-09-18 21:58:21,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=530620.0, ans=0.125 +2024-09-18 21:58:33,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.65 vs. limit=6.0 +2024-09-18 21:58:47,601 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.42 vs. limit=22.5 +2024-09-18 21:58:48,079 INFO [train.py:1198] (1/2) Epoch 30, batch 1450, loss[loss=0.2545, ctc_loss=0.1317, cr_loss=0.3793, attn_decoder_loss=0.2598, over 29401.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1218, cr_loss=0.3639, attn_decoder_loss=0.2437, over 5803281.60 frames. ], batch size: 94, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 21:58:52,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=530700.0, ans=0.1 +2024-09-18 21:59:12,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=530740.0, ans=0.125 +2024-09-18 21:59:14,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.93 vs. limit=15.0 +2024-09-18 21:59:24,816 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 21:59:28,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.68 vs. limit=15.0 +2024-09-18 21:59:50,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=530860.0, ans=0.025 +2024-09-18 21:59:56,665 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.746e+01 9.204e+01 9.882e+01 6.648e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-18 21:59:59,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.36 vs. limit=15.0 +2024-09-18 22:00:04,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=530900.0, ans=0.0 +2024-09-18 22:00:05,887 INFO [train.py:1198] (1/2) Epoch 30, batch 1500, loss[loss=0.2498, ctc_loss=0.1294, cr_loss=0.3764, attn_decoder_loss=0.2548, over 29624.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1217, cr_loss=0.3635, attn_decoder_loss=0.2436, over 5805593.86 frames. ], batch size: 86, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 22:00:25,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=530940.0, ans=0.125 +2024-09-18 22:00:39,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=530980.0, ans=0.125 +2024-09-18 22:00:47,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=530980.0, ans=0.125 +2024-09-18 22:00:55,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=531020.0, ans=0.125 +2024-09-18 22:01:01,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=531020.0, ans=0.0 +2024-09-18 22:01:05,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=531060.0, ans=0.0 +2024-09-18 22:01:22,006 INFO [train.py:1198] (1/2) Epoch 30, batch 1550, loss[loss=0.2485, ctc_loss=0.1286, cr_loss=0.3832, attn_decoder_loss=0.2533, over 29493.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.122, cr_loss=0.3639, attn_decoder_loss=0.2436, over 5782344.87 frames. ], batch size: 90, lr: 3.70e-03, grad_scale: 8.0 +2024-09-18 22:01:32,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=531100.0, ans=0.1 +2024-09-18 22:01:50,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=531140.0, ans=0.025 +2024-09-18 22:01:55,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.84 vs. limit=15.0 +2024-09-18 22:02:21,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=531220.0, ans=0.1 +2024-09-18 22:02:23,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.27 vs. limit=6.0 +2024-09-18 22:02:31,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.245e+01 8.587e+01 9.165e+01 1.006e+02 3.566e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-18 22:02:34,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=531260.0, ans=0.1 +2024-09-18 22:02:40,591 INFO [train.py:1198] (1/2) Epoch 30, batch 1600, loss[loss=0.2591, ctc_loss=0.1353, cr_loss=0.4062, attn_decoder_loss=0.2638, over 29671.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1219, cr_loss=0.3639, attn_decoder_loss=0.2435, over 5766618.97 frames. ], batch size: 85, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:02:45,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=531300.0, ans=0.2 +2024-09-18 22:02:46,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=531300.0, ans=0.0 +2024-09-18 22:02:57,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=531340.0, ans=0.2 +2024-09-18 22:03:05,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=531340.0, ans=10.0 +2024-09-18 22:03:08,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=531340.0, ans=0.2 +2024-09-18 22:03:40,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=531420.0, ans=0.125 +2024-09-18 22:03:52,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=531460.0, ans=0.1 +2024-09-18 22:03:58,211 INFO [train.py:1198] (1/2) Epoch 30, batch 1650, loss[loss=0.2412, ctc_loss=0.1129, cr_loss=0.3526, attn_decoder_loss=0.2476, over 29691.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1217, cr_loss=0.3637, attn_decoder_loss=0.243, over 5761113.83 frames. ], batch size: 89, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:04:39,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=531580.0, ans=0.0 +2024-09-18 22:04:52,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=531620.0, ans=0.1 +2024-09-18 22:05:03,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.05 vs. limit=15.0 +2024-09-18 22:05:04,522 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.948e+01 8.547e+01 8.983e+01 9.697e+01 1.906e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-18 22:05:10,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=531660.0, ans=0.125 +2024-09-18 22:05:13,434 INFO [train.py:1198] (1/2) Epoch 30, batch 1700, loss[loss=0.2111, ctc_loss=0.1091, cr_loss=0.3306, attn_decoder_loss=0.2151, over 29576.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1213, cr_loss=0.3631, attn_decoder_loss=0.2428, over 5782331.41 frames. ], batch size: 69, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:05:18,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=531700.0, ans=0.0 +2024-09-18 22:05:47,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=531780.0, ans=0.0 +2024-09-18 22:06:04,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=531820.0, ans=0.125 +2024-09-18 22:06:04,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=531820.0, ans=0.125 +2024-09-18 22:06:11,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=531820.0, ans=10.0 +2024-09-18 22:06:31,430 INFO [train.py:1198] (1/2) Epoch 30, batch 1750, loss[loss=0.2153, ctc_loss=0.1039, cr_loss=0.3368, attn_decoder_loss=0.2202, over 29317.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.121, cr_loss=0.3624, attn_decoder_loss=0.2425, over 5790470.31 frames. ], batch size: 67, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:07:06,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=531980.0, ans=0.125 +2024-09-18 22:07:08,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=531980.0, ans=0.125 +2024-09-18 22:07:10,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=531980.0, ans=0.0 +2024-09-18 22:07:20,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=532020.0, ans=0.1 +2024-09-18 22:07:40,548 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.985e+01 8.324e+01 8.730e+01 9.634e+01 1.252e+02, threshold=1.746e+02, percent-clipped=0.0 +2024-09-18 22:07:48,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=532100.0, ans=0.1 +2024-09-18 22:07:49,663 INFO [train.py:1198] (1/2) Epoch 30, batch 1800, loss[loss=0.2387, ctc_loss=0.117, cr_loss=0.3583, attn_decoder_loss=0.2442, over 29688.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1213, cr_loss=0.363, attn_decoder_loss=0.2427, over 5792159.86 frames. ], batch size: 83, lr: 3.70e-03, grad_scale: 16.0 +2024-09-18 22:08:34,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=532220.0, ans=0.07 +2024-09-18 22:08:45,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.99 vs. limit=10.0 +2024-09-18 22:09:05,417 INFO [train.py:1198] (1/2) Epoch 30, batch 1850, loss[loss=0.2478, ctc_loss=0.1275, cr_loss=0.3671, attn_decoder_loss=0.253, over 29649.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1214, cr_loss=0.3635, attn_decoder_loss=0.2428, over 5796806.41 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:09:05,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=532300.0, ans=0.0 +2024-09-18 22:10:15,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.201e+01 8.313e+01 8.937e+01 9.417e+01 1.433e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-18 22:10:22,900 INFO [train.py:1198] (1/2) Epoch 30, batch 1900, loss[loss=0.2452, ctc_loss=0.1355, cr_loss=0.3755, attn_decoder_loss=0.2491, over 29708.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1218, cr_loss=0.3642, attn_decoder_loss=0.2435, over 5804445.42 frames. ], batch size: 89, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:10:35,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=532500.0, ans=0.125 +2024-09-18 22:11:18,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=532620.0, ans=0.0 +2024-09-18 22:11:41,044 INFO [train.py:1198] (1/2) Epoch 30, batch 1950, loss[loss=0.2437, ctc_loss=0.1274, cr_loss=0.3859, attn_decoder_loss=0.248, over 29447.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1221, cr_loss=0.3657, attn_decoder_loss=0.2445, over 5819606.95 frames. ], batch size: 78, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:11:54,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=532740.0, ans=0.125 +2024-09-18 22:12:10,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.61 vs. limit=15.0 +2024-09-18 22:12:13,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.29 vs. limit=6.0 +2024-09-18 22:12:15,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.00 vs. limit=15.0 +2024-09-18 22:12:17,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.19 vs. limit=12.0 +2024-09-18 22:12:35,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=532820.0, ans=0.0 +2024-09-18 22:12:43,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=532860.0, ans=0.125 +2024-09-18 22:12:49,466 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.712e+01 9.082e+01 9.590e+01 8.305e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:12:56,993 INFO [train.py:1198] (1/2) Epoch 30, batch 2000, loss[loss=0.2242, ctc_loss=0.1156, cr_loss=0.3686, attn_decoder_loss=0.2281, over 29386.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1229, cr_loss=0.367, attn_decoder_loss=0.2451, over 5796756.19 frames. ], batch size: 67, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:13:06,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=532900.0, ans=0.125 +2024-09-18 22:13:20,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=532940.0, ans=0.0 +2024-09-18 22:13:28,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=532980.0, ans=0.05 +2024-09-18 22:13:35,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.35 vs. limit=15.0 +2024-09-18 22:14:01,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=533060.0, ans=0.2 +2024-09-18 22:14:03,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=533060.0, ans=0.125 +2024-09-18 22:14:08,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.30 vs. limit=15.0 +2024-09-18 22:14:15,044 INFO [train.py:1198] (1/2) Epoch 30, batch 2050, loss[loss=0.2304, ctc_loss=0.1246, cr_loss=0.38, attn_decoder_loss=0.2337, over 29408.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1226, cr_loss=0.366, attn_decoder_loss=0.2443, over 5789237.65 frames. ], batch size: 70, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:14:37,109 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.60 vs. limit=15.0 +2024-09-18 22:14:47,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=533180.0, ans=0.0 +2024-09-18 22:14:48,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=533180.0, ans=0.0 +2024-09-18 22:14:48,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=533180.0, ans=0.125 +2024-09-18 22:14:51,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=533180.0, ans=0.125 +2024-09-18 22:14:53,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=533180.0, ans=0.2 +2024-09-18 22:14:54,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=533180.0, ans=0.025 +2024-09-18 22:14:56,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=533180.0, ans=0.2 +2024-09-18 22:15:19,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=533260.0, ans=15.0 +2024-09-18 22:15:21,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=533260.0, ans=0.5 +2024-09-18 22:15:24,161 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:15:24,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=533260.0, ans=0.0 +2024-09-18 22:15:26,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.57 vs. limit=15.0 +2024-09-18 22:15:26,828 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.796e+01 8.326e+01 8.954e+01 9.784e+01 1.550e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-18 22:15:33,011 INFO [train.py:1198] (1/2) Epoch 30, batch 2100, loss[loss=0.2441, ctc_loss=0.1262, cr_loss=0.3802, attn_decoder_loss=0.2487, over 29767.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1221, cr_loss=0.3652, attn_decoder_loss=0.2438, over 5800496.97 frames. ], batch size: 81, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:15:42,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=533300.0, ans=0.1 +2024-09-18 22:15:55,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=533340.0, ans=0.0 +2024-09-18 22:15:57,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=533340.0, ans=0.0 +2024-09-18 22:16:14,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys.whitening_limit, batch_count=533380.0, ans=6.0 +2024-09-18 22:16:16,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533420.0, ans=0.1 +2024-09-18 22:16:41,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=533460.0, ans=0.07 +2024-09-18 22:16:48,304 INFO [train.py:1198] (1/2) Epoch 30, batch 2150, loss[loss=0.2293, ctc_loss=0.1213, cr_loss=0.3603, attn_decoder_loss=0.2333, over 29461.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1214, cr_loss=0.3638, attn_decoder_loss=0.243, over 5815555.51 frames. ], batch size: 78, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:17:04,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.42 vs. limit=12.0 +2024-09-18 22:17:39,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533620.0, ans=0.1 +2024-09-18 22:18:00,806 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.351e+01 8.919e+01 9.432e+01 1.434e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 22:18:02,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=533660.0, ans=0.125 +2024-09-18 22:18:04,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=533660.0, ans=0.125 +2024-09-18 22:18:06,124 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.51 vs. limit=6.0 +2024-09-18 22:18:06,998 INFO [train.py:1198] (1/2) Epoch 30, batch 2200, loss[loss=0.2474, ctc_loss=0.1251, cr_loss=0.3723, attn_decoder_loss=0.2527, over 29653.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.122, cr_loss=0.3655, attn_decoder_loss=0.2434, over 5812978.18 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:18:17,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=533700.0, ans=0.2 +2024-09-18 22:18:28,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=533740.0, ans=0.125 +2024-09-18 22:18:40,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=533780.0, ans=0.2 +2024-09-18 22:18:43,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=533780.0, ans=0.125 +2024-09-18 22:18:48,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=533780.0, ans=0.125 +2024-09-18 22:18:52,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=533820.0, ans=0.125 +2024-09-18 22:19:03,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=533820.0, ans=0.1 +2024-09-18 22:19:12,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.38 vs. limit=22.5 +2024-09-18 22:19:22,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=533860.0, ans=0.2 +2024-09-18 22:19:24,811 INFO [train.py:1198] (1/2) Epoch 30, batch 2250, loss[loss=0.235, ctc_loss=0.1216, cr_loss=0.3612, attn_decoder_loss=0.2395, over 29704.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1217, cr_loss=0.3644, attn_decoder_loss=0.2431, over 5812267.39 frames. ], batch size: 82, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:19:33,853 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.50 vs. limit=15.0 +2024-09-18 22:19:43,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=533940.0, ans=0.125 +2024-09-18 22:19:44,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=533940.0, ans=0.1 +2024-09-18 22:19:44,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=533940.0, ans=0.025 +2024-09-18 22:19:46,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=533940.0, ans=0.125 +2024-09-18 22:19:49,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=533940.0, ans=0.125 +2024-09-18 22:19:52,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=533940.0, ans=0.0 +2024-09-18 22:19:54,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.whiten.whitening_limit, batch_count=533980.0, ans=12.0 +2024-09-18 22:20:34,699 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.594e+01 8.443e+01 9.095e+01 9.654e+01 4.299e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-18 22:20:40,904 INFO [train.py:1198] (1/2) Epoch 30, batch 2300, loss[loss=0.2125, ctc_loss=0.09919, cr_loss=0.3084, attn_decoder_loss=0.2183, over 29335.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1208, cr_loss=0.3624, attn_decoder_loss=0.242, over 5797828.61 frames. ], batch size: 71, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:20:44,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=534100.0, ans=0.1 +2024-09-18 22:21:14,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=534180.0, ans=0.2 +2024-09-18 22:21:27,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=534220.0, ans=0.2 +2024-09-18 22:21:39,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=534220.0, ans=0.2 +2024-09-18 22:21:56,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.39 vs. limit=6.0 +2024-09-18 22:21:58,859 INFO [train.py:1198] (1/2) Epoch 30, batch 2350, loss[loss=0.2466, ctc_loss=0.1237, cr_loss=0.3723, attn_decoder_loss=0.252, over 29699.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.121, cr_loss=0.363, attn_decoder_loss=0.2423, over 5804272.62 frames. ], batch size: 83, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:22:26,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=534340.0, ans=0.1 +2024-09-18 22:22:42,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=534380.0, ans=0.1 +2024-09-18 22:22:45,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=534420.0, ans=0.125 +2024-09-18 22:22:53,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=534420.0, ans=0.2 +2024-09-18 22:22:57,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=534420.0, ans=0.125 +2024-09-18 22:23:05,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=534460.0, ans=0.125 +2024-09-18 22:23:05,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=534460.0, ans=0.125 +2024-09-18 22:23:11,015 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.566e+01 9.166e+01 9.835e+01 1.994e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-18 22:23:17,293 INFO [train.py:1198] (1/2) Epoch 30, batch 2400, loss[loss=0.2231, ctc_loss=0.1052, cr_loss=0.3228, attn_decoder_loss=0.229, over 29536.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1217, cr_loss=0.3642, attn_decoder_loss=0.243, over 5808405.61 frames. ], batch size: 76, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:23:23,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=534500.0, ans=0.0 +2024-09-18 22:23:49,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=534580.0, ans=0.2 +2024-09-18 22:23:55,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=534580.0, ans=0.1 +2024-09-18 22:24:07,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=534620.0, ans=0.0 +2024-09-18 22:24:07,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=534620.0, ans=0.1 +2024-09-18 22:24:09,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=534620.0, ans=0.0 +2024-09-18 22:24:20,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=534660.0, ans=0.0 +2024-09-18 22:24:23,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.65 vs. limit=10.0 +2024-09-18 22:24:33,285 INFO [train.py:1198] (1/2) Epoch 30, batch 2450, loss[loss=0.239, ctc_loss=0.1175, cr_loss=0.3587, attn_decoder_loss=0.2445, over 29711.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1224, cr_loss=0.3659, attn_decoder_loss=0.244, over 5786619.30 frames. ], batch size: 82, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:24:45,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=534700.0, ans=0.2 +2024-09-18 22:24:57,791 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:25:19,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.47 vs. limit=15.0 +2024-09-18 22:25:25,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=534820.0, ans=0.1 +2024-09-18 22:25:25,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=534820.0, ans=0.125 +2024-09-18 22:25:28,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=534820.0, ans=0.125 +2024-09-18 22:25:28,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=534820.0, ans=0.05 +2024-09-18 22:25:31,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=534820.0, ans=0.0 +2024-09-18 22:25:44,674 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.707e+01 9.216e+01 9.749e+01 1.884e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-18 22:25:50,698 INFO [train.py:1198] (1/2) Epoch 30, batch 2500, loss[loss=0.2406, ctc_loss=0.118, cr_loss=0.3584, attn_decoder_loss=0.2463, over 29612.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1224, cr_loss=0.3658, attn_decoder_loss=0.2439, over 5796240.02 frames. ], batch size: 86, lr: 3.69e-03, grad_scale: 16.0 +2024-09-18 22:26:12,320 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:26:24,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=534980.0, ans=0.5 +2024-09-18 22:26:27,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.20 vs. limit=15.0 +2024-09-18 22:26:39,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=535020.0, ans=0.0 +2024-09-18 22:26:41,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=535020.0, ans=0.125 +2024-09-18 22:26:41,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.49 vs. limit=12.0 +2024-09-18 22:26:42,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=535020.0, ans=0.125 +2024-09-18 22:26:45,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=535020.0, ans=0.125 +2024-09-18 22:26:50,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=535060.0, ans=0.125 +2024-09-18 22:27:01,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=535060.0, ans=0.125 +2024-09-18 22:27:03,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=535060.0, ans=0.125 +2024-09-18 22:27:09,174 INFO [train.py:1198] (1/2) Epoch 30, batch 2550, loss[loss=0.204, ctc_loss=0.09782, cr_loss=0.314, attn_decoder_loss=0.2089, over 29315.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1224, cr_loss=0.3665, attn_decoder_loss=0.2438, over 5798400.05 frames. ], batch size: 67, lr: 3.69e-03, grad_scale: 8.0 +2024-09-18 22:27:09,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=535100.0, ans=0.125 +2024-09-18 22:27:16,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=535100.0, ans=0.2 +2024-09-18 22:27:47,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=535180.0, ans=0.125 +2024-09-18 22:27:49,779 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-18 22:27:53,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=535220.0, ans=0.125 +2024-09-18 22:28:04,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=535220.0, ans=0.125 +2024-09-18 22:28:05,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=535220.0, ans=0.07 +2024-09-18 22:28:20,583 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.046e+01 8.542e+01 9.079e+01 9.680e+01 2.807e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:28:22,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=535260.0, ans=0.0 +2024-09-18 22:28:25,107 INFO [train.py:1198] (1/2) Epoch 30, batch 2600, loss[loss=0.2274, ctc_loss=0.1125, cr_loss=0.3364, attn_decoder_loss=0.2327, over 29434.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1222, cr_loss=0.3662, attn_decoder_loss=0.2441, over 5794399.47 frames. ], batch size: 78, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:28:26,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=535300.0, ans=0.1 +2024-09-18 22:28:28,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=535300.0, ans=0.0 +2024-09-18 22:28:35,112 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.34 vs. limit=15.0 +2024-09-18 22:29:18,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=535420.0, ans=0.1 +2024-09-18 22:29:42,490 INFO [train.py:1198] (1/2) Epoch 30, batch 2650, loss[loss=0.2438, ctc_loss=0.1241, cr_loss=0.3693, attn_decoder_loss=0.2489, over 29202.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1221, cr_loss=0.3664, attn_decoder_loss=0.2442, over 5800986.30 frames. ], batch size: 100, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:29:49,249 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.26 vs. limit=22.5 +2024-09-18 22:29:50,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=535500.0, ans=0.0 +2024-09-18 22:30:20,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.69 vs. limit=15.0 +2024-09-18 22:30:33,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=535620.0, ans=0.035 +2024-09-18 22:30:33,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=535620.0, ans=0.0 +2024-09-18 22:30:55,368 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.645e+01 8.677e+01 9.089e+01 9.646e+01 4.909e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-18 22:30:56,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.42 vs. limit=15.0 +2024-09-18 22:31:00,026 INFO [train.py:1198] (1/2) Epoch 30, batch 2700, loss[loss=0.2457, ctc_loss=0.1274, cr_loss=0.3695, attn_decoder_loss=0.2507, over 29520.00 frames. ], tot_loss[loss=0.2398, ctc_loss=0.1227, cr_loss=0.367, attn_decoder_loss=0.2447, over 5796968.31 frames. ], batch size: 87, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:31:22,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=535740.0, ans=0.05 +2024-09-18 22:31:30,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=535780.0, ans=0.0 +2024-09-18 22:31:56,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=535820.0, ans=0.035 +2024-09-18 22:32:11,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=535860.0, ans=0.0 +2024-09-18 22:32:15,740 INFO [train.py:1198] (1/2) Epoch 30, batch 2750, loss[loss=0.2367, ctc_loss=0.1223, cr_loss=0.3821, attn_decoder_loss=0.2409, over 29535.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1224, cr_loss=0.366, attn_decoder_loss=0.2437, over 5795525.22 frames. ], batch size: 75, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:32:55,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=535980.0, ans=0.125 +2024-09-18 22:33:13,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=536020.0, ans=0.2 +2024-09-18 22:33:26,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=12.41 vs. limit=15.0 +2024-09-18 22:33:26,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=536060.0, ans=0.025 +2024-09-18 22:33:27,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.20 vs. limit=6.0 +2024-09-18 22:33:28,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=536060.0, ans=0.09899494936611666 +2024-09-18 22:33:29,618 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.131e+01 8.488e+01 8.995e+01 9.694e+01 2.537e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-18 22:33:34,345 INFO [train.py:1198] (1/2) Epoch 30, batch 2800, loss[loss=0.2551, ctc_loss=0.1466, cr_loss=0.3848, attn_decoder_loss=0.2586, over 20385.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1227, cr_loss=0.3658, attn_decoder_loss=0.2438, over 5776660.07 frames. ], batch size: 209, lr: 3.68e-03, grad_scale: 16.0 +2024-09-18 22:34:12,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=536180.0, ans=0.2 +2024-09-18 22:34:38,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=536260.0, ans=0.125 +2024-09-18 22:34:39,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=536260.0, ans=0.125 +2024-09-18 22:34:50,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=536300.0, ans=0.0 +2024-09-18 22:34:51,692 INFO [train.py:1198] (1/2) Epoch 30, batch 2850, loss[loss=0.2389, ctc_loss=0.1295, cr_loss=0.3843, attn_decoder_loss=0.2425, over 29501.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1229, cr_loss=0.366, attn_decoder_loss=0.2443, over 5762226.68 frames. ], batch size: 77, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:35:24,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=536380.0, ans=0.125 +2024-09-18 22:35:34,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.22 vs. limit=15.0 +2024-09-18 22:35:34,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=536380.0, ans=0.5 +2024-09-18 22:36:04,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.865e+01 8.533e+01 9.000e+01 9.896e+01 2.723e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-18 22:36:05,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.06 vs. limit=22.5 +2024-09-18 22:36:07,494 INFO [train.py:1198] (1/2) Epoch 30, batch 2900, loss[loss=0.2352, ctc_loss=0.1225, cr_loss=0.3758, attn_decoder_loss=0.2394, over 29435.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1233, cr_loss=0.3671, attn_decoder_loss=0.2451, over 5787133.55 frames. ], batch size: 79, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:36:12,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=536500.0, ans=0.0 +2024-09-18 22:36:18,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=536500.0, ans=15.0 +2024-09-18 22:36:29,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=536540.0, ans=0.125 +2024-09-18 22:36:42,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=536580.0, ans=0.0 +2024-09-18 22:36:47,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=536580.0, ans=0.02 +2024-09-18 22:36:50,812 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.18 vs. limit=15.0 +2024-09-18 22:37:13,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=536660.0, ans=0.2 +2024-09-18 22:37:25,636 INFO [train.py:1198] (1/2) Epoch 30, batch 2950, loss[loss=0.2211, ctc_loss=0.1102, cr_loss=0.3463, attn_decoder_loss=0.2257, over 29526.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1221, cr_loss=0.3644, attn_decoder_loss=0.2437, over 5780479.17 frames. ], batch size: 75, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:37:52,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.82 vs. limit=22.5 +2024-09-18 22:37:55,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=536780.0, ans=0.125 +2024-09-18 22:38:01,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=536780.0, ans=0.0 +2024-09-18 22:38:18,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.40 vs. limit=15.0 +2024-09-18 22:38:41,266 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.475e+01 9.079e+01 9.790e+01 2.714e+02, threshold=1.816e+02, percent-clipped=3.0 +2024-09-18 22:38:44,470 INFO [train.py:1198] (1/2) Epoch 30, batch 3000, loss[loss=0.2336, ctc_loss=0.1144, cr_loss=0.3409, attn_decoder_loss=0.2393, over 29756.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1217, cr_loss=0.3636, attn_decoder_loss=0.2434, over 5781098.17 frames. ], batch size: 81, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:38:44,470 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 22:39:01,304 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([2.1718, 2.6885, 2.6134, 2.7114, 2.6339, 2.6832, 2.0530, 2.8223], + device='cuda:1') +2024-09-18 22:39:02,887 INFO [train.py:1230] (1/2) Epoch 30, validation: loss=0.2118, ctc_loss=0.03796, cr_loss=5.626e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-18 22:39:02,888 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 22:39:12,886 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.71 vs. limit=15.0 +2024-09-18 22:39:38,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=536980.0, ans=0.125 +2024-09-18 22:39:52,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=537020.0, ans=0.125 +2024-09-18 22:39:56,686 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:40:10,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=537060.0, ans=0.1 +2024-09-18 22:40:11,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=537060.0, ans=0.125 +2024-09-18 22:40:13,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=537060.0, ans=0.125 +2024-09-18 22:40:19,083 INFO [train.py:1198] (1/2) Epoch 30, batch 3050, loss[loss=0.2333, ctc_loss=0.1212, cr_loss=0.3649, attn_decoder_loss=0.2377, over 29552.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1226, cr_loss=0.3657, attn_decoder_loss=0.2443, over 5774679.06 frames. ], batch size: 76, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:41:13,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-18 22:41:16,676 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.21 vs. limit=6.0 +2024-09-18 22:41:20,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=537260.0, ans=0.0 +2024-09-18 22:41:33,833 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.707e+01 8.461e+01 8.902e+01 9.446e+01 1.923e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-18 22:41:36,772 INFO [train.py:1198] (1/2) Epoch 30, batch 3100, loss[loss=0.2539, ctc_loss=0.1285, cr_loss=0.3719, attn_decoder_loss=0.2596, over 29243.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1226, cr_loss=0.366, attn_decoder_loss=0.2441, over 5775711.42 frames. ], batch size: 100, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:41:38,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=537300.0, ans=0.0 +2024-09-18 22:41:50,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=537340.0, ans=0.0 +2024-09-18 22:41:52,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=537340.0, ans=0.2 +2024-09-18 22:41:52,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=537340.0, ans=0.05 +2024-09-18 22:42:02,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=537340.0, ans=0.025 +2024-09-18 22:42:17,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=537380.0, ans=0.125 +2024-09-18 22:42:54,839 INFO [train.py:1198] (1/2) Epoch 30, batch 3150, loss[loss=0.2413, ctc_loss=0.1112, cr_loss=0.3476, attn_decoder_loss=0.248, over 28853.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1225, cr_loss=0.3655, attn_decoder_loss=0.2442, over 5781878.29 frames. ], batch size: 104, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:43:27,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.73 vs. limit=15.0 +2024-09-18 22:43:58,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=537660.0, ans=0.0 +2024-09-18 22:44:06,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=537660.0, ans=0.125 +2024-09-18 22:44:07,776 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.272e+01 8.378e+01 8.875e+01 9.441e+01 1.254e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-18 22:44:10,855 INFO [train.py:1198] (1/2) Epoch 30, batch 3200, loss[loss=0.2349, ctc_loss=0.1175, cr_loss=0.3597, attn_decoder_loss=0.24, over 29407.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1213, cr_loss=0.3631, attn_decoder_loss=0.2432, over 5791882.94 frames. ], batch size: 79, lr: 3.68e-03, grad_scale: 16.0 +2024-09-18 22:44:15,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=537700.0, ans=0.1 +2024-09-18 22:44:27,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=537740.0, ans=0.0 +2024-09-18 22:44:32,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=537740.0, ans=0.125 +2024-09-18 22:44:34,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=537740.0, ans=0.0 +2024-09-18 22:44:52,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.44 vs. limit=15.0 +2024-09-18 22:44:56,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=537780.0, ans=0.125 +2024-09-18 22:45:07,383 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.57 vs. limit=15.0 +2024-09-18 22:45:26,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=537860.0, ans=0.125 +2024-09-18 22:45:29,322 INFO [train.py:1198] (1/2) Epoch 30, batch 3250, loss[loss=0.2348, ctc_loss=0.1149, cr_loss=0.3409, attn_decoder_loss=0.2405, over 29686.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1214, cr_loss=0.3638, attn_decoder_loss=0.2435, over 5798736.97 frames. ], batch size: 84, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:45:35,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=537900.0, ans=0.07 +2024-09-18 22:45:53,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=537940.0, ans=0.1 +2024-09-18 22:46:03,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=537980.0, ans=0.0 +2024-09-18 22:46:07,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=537980.0, ans=0.125 +2024-09-18 22:46:41,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=538060.0, ans=0.125 +2024-09-18 22:46:45,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.587e+01 8.918e+01 9.595e+01 3.976e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-18 22:46:47,133 INFO [train.py:1198] (1/2) Epoch 30, batch 3300, loss[loss=0.2462, ctc_loss=0.1227, cr_loss=0.3638, attn_decoder_loss=0.2518, over 28217.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1206, cr_loss=0.3619, attn_decoder_loss=0.2423, over 5795567.30 frames. ], batch size: 111, lr: 3.68e-03, grad_scale: 8.0 +2024-09-18 22:47:02,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=538140.0, ans=0.0 +2024-09-18 22:47:13,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=538140.0, ans=0.0 +2024-09-18 22:47:14,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=538140.0, ans=0.035 +2024-09-18 22:47:22,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=538180.0, ans=0.125 +2024-09-18 22:47:35,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=538220.0, ans=10.0 +2024-09-18 22:47:46,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=538260.0, ans=0.2 +2024-09-18 22:47:51,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.91 vs. limit=15.0 +2024-09-18 22:47:52,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=538260.0, ans=0.125 +2024-09-18 22:48:02,468 INFO [train.py:1198] (1/2) Epoch 30, batch 3350, loss[loss=0.2581, ctc_loss=0.1383, cr_loss=0.3948, attn_decoder_loss=0.2626, over 28901.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1214, cr_loss=0.3626, attn_decoder_loss=0.2431, over 5772684.60 frames. ], batch size: 104, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:48:08,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=538300.0, ans=0.125 +2024-09-18 22:48:19,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=538340.0, ans=0.0 +2024-09-18 22:48:29,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=538340.0, ans=0.125 +2024-09-18 22:48:34,599 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:48:56,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=538420.0, ans=0.125 +2024-09-18 22:49:17,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten.whitening_limit, batch_count=538460.0, ans=15.0 +2024-09-18 22:49:19,303 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.711e+01 9.247e+01 9.714e+01 4.351e+02, threshold=1.849e+02, percent-clipped=3.0 +2024-09-18 22:49:19,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=538500.0, ans=0.125 +2024-09-18 22:49:20,832 INFO [train.py:1198] (1/2) Epoch 30, batch 3400, loss[loss=0.2118, ctc_loss=0.1016, cr_loss=0.3325, attn_decoder_loss=0.2167, over 29402.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1215, cr_loss=0.3632, attn_decoder_loss=0.243, over 5766879.40 frames. ], batch size: 67, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:49:49,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=538540.0, ans=0.125 +2024-09-18 22:49:49,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=538540.0, ans=0.025 +2024-09-18 22:49:51,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.03 vs. limit=15.0 +2024-09-18 22:49:58,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=538580.0, ans=0.04949747468305833 +2024-09-18 22:49:58,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=538580.0, ans=0.09899494936611666 +2024-09-18 22:49:58,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.44 vs. limit=15.0 +2024-09-18 22:49:59,069 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.80 vs. limit=15.0 +2024-09-18 22:50:16,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=538620.0, ans=0.125 +2024-09-18 22:50:25,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=538660.0, ans=0.04949747468305833 +2024-09-18 22:50:38,778 INFO [train.py:1198] (1/2) Epoch 30, batch 3450, loss[loss=0.2388, ctc_loss=0.1128, cr_loss=0.3436, attn_decoder_loss=0.2451, over 28274.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1211, cr_loss=0.3625, attn_decoder_loss=0.2431, over 5774918.33 frames. ], batch size: 111, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:50:39,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff3.min_abs, batch_count=538700.0, ans=0.2 +2024-09-18 22:50:40,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=538700.0, ans=0.2 +2024-09-18 22:51:13,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=538780.0, ans=0.125 +2024-09-18 22:51:15,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.34 vs. limit=6.0 +2024-09-18 22:51:22,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=538820.0, ans=0.1 +2024-09-18 22:51:52,847 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.290e+01 8.505e+01 9.176e+01 9.588e+01 2.343e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-18 22:51:54,375 INFO [train.py:1198] (1/2) Epoch 30, batch 3500, loss[loss=0.2203, ctc_loss=0.1127, cr_loss=0.353, attn_decoder_loss=0.2244, over 29313.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1211, cr_loss=0.3626, attn_decoder_loss=0.2427, over 5777214.86 frames. ], batch size: 71, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:51:58,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.29 vs. limit=15.0 +2024-09-18 22:52:16,039 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 22:52:50,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=539020.0, ans=0.0 +2024-09-18 22:52:54,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=539020.0, ans=0.125 +2024-09-18 22:53:04,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=539060.0, ans=0.125 +2024-09-18 22:53:11,558 INFO [train.py:1198] (1/2) Epoch 30, batch 3550, loss[loss=0.2376, ctc_loss=0.1107, cr_loss=0.3513, attn_decoder_loss=0.2439, over 29719.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1211, cr_loss=0.3624, attn_decoder_loss=0.2426, over 5783830.64 frames. ], batch size: 89, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:53:23,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.63 vs. limit=15.0 +2024-09-18 22:53:54,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.82 vs. limit=15.0 +2024-09-18 22:53:56,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=539220.0, ans=0.0 +2024-09-18 22:54:03,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=539220.0, ans=0.125 +2024-09-18 22:54:10,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=539260.0, ans=0.125 +2024-09-18 22:54:17,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=539260.0, ans=0.1 +2024-09-18 22:54:20,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=539260.0, ans=0.125 +2024-09-18 22:54:23,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=539260.0, ans=0.0 +2024-09-18 22:54:26,535 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.244e+01 8.421e+01 8.886e+01 9.459e+01 1.383e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-18 22:54:28,094 INFO [train.py:1198] (1/2) Epoch 30, batch 3600, loss[loss=0.2381, ctc_loss=0.1251, cr_loss=0.3721, attn_decoder_loss=0.2424, over 29519.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1212, cr_loss=0.3627, attn_decoder_loss=0.243, over 5791824.04 frames. ], batch size: 77, lr: 3.67e-03, grad_scale: 16.0 +2024-09-18 22:54:41,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=15.0 +2024-09-18 22:54:43,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=539340.0, ans=0.5 +2024-09-18 22:54:43,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=539340.0, ans=0.125 +2024-09-18 22:54:46,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=539340.0, ans=0.025 +2024-09-18 22:55:00,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=539380.0, ans=0.1 +2024-09-18 22:55:24,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=539420.0, ans=0.0 +2024-09-18 22:55:39,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=539460.0, ans=0.125 +2024-09-18 22:55:42,274 INFO [train.py:1198] (1/2) Epoch 30, batch 3650, loss[loss=0.2599, ctc_loss=0.1396, cr_loss=0.3979, attn_decoder_loss=0.2645, over 29493.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1207, cr_loss=0.3618, attn_decoder_loss=0.2425, over 5794752.86 frames. ], batch size: 90, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:55:58,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=539540.0, ans=0.0 +2024-09-18 22:56:06,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=539540.0, ans=0.0 +2024-09-18 22:56:13,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=539580.0, ans=0.0 +2024-09-18 22:56:18,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=539580.0, ans=0.0 +2024-09-18 22:56:56,902 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.540e+01 9.082e+01 9.609e+01 1.779e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-18 22:56:56,924 INFO [train.py:1198] (1/2) Epoch 30, batch 3700, loss[loss=0.2459, ctc_loss=0.1247, cr_loss=0.3624, attn_decoder_loss=0.2513, over 29706.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1206, cr_loss=0.3621, attn_decoder_loss=0.2427, over 5804721.57 frames. ], batch size: 84, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:56:57,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=539700.0, ans=0.0 +2024-09-18 22:57:06,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=539700.0, ans=0.0 +2024-09-18 22:57:06,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=539700.0, ans=0.125 +2024-09-18 22:57:09,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=539700.0, ans=0.125 +2024-09-18 22:57:18,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=539740.0, ans=0.125 +2024-09-18 22:57:21,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=539740.0, ans=0.1 +2024-09-18 22:57:24,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=539740.0, ans=0.125 +2024-09-18 22:57:45,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=539820.0, ans=0.125 +2024-09-18 22:58:11,649 INFO [train.py:1198] (1/2) Epoch 30, batch 3750, loss[loss=0.2071, ctc_loss=0.1048, cr_loss=0.3223, attn_decoder_loss=0.2113, over 29367.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1203, cr_loss=0.3611, attn_decoder_loss=0.2422, over 5808378.70 frames. ], batch size: 67, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:58:29,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=539940.0, ans=0.0 +2024-09-18 22:58:37,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=539940.0, ans=0.09899494936611666 +2024-09-18 22:58:57,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=540020.0, ans=0.5 +2024-09-18 22:59:08,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=540020.0, ans=0.2 +2024-09-18 22:59:16,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=540060.0, ans=0.2 +2024-09-18 22:59:23,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=540060.0, ans=0.125 +2024-09-18 22:59:28,011 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.475e+01 8.896e+01 9.603e+01 2.511e+02, threshold=1.779e+02, percent-clipped=2.0 +2024-09-18 22:59:28,038 INFO [train.py:1198] (1/2) Epoch 30, batch 3800, loss[loss=0.2365, ctc_loss=0.1156, cr_loss=0.345, attn_decoder_loss=0.2422, over 29634.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1202, cr_loss=0.361, attn_decoder_loss=0.242, over 5799031.44 frames. ], batch size: 86, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 22:59:34,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=21.14 vs. limit=22.5 +2024-09-18 22:59:46,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=540140.0, ans=0.1 +2024-09-18 22:59:47,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=540140.0, ans=0.1 +2024-09-18 23:00:07,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=540180.0, ans=0.125 +2024-09-18 23:00:11,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=540220.0, ans=0.0 +2024-09-18 23:00:34,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=540260.0, ans=0.0 +2024-09-18 23:00:35,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=540260.0, ans=0.125 +2024-09-18 23:00:36,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=540260.0, ans=0.125 +2024-09-18 23:00:44,095 INFO [train.py:1198] (1/2) Epoch 30, batch 3850, loss[loss=0.26, ctc_loss=0.1424, cr_loss=0.4166, attn_decoder_loss=0.2638, over 29246.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1205, cr_loss=0.3613, attn_decoder_loss=0.2421, over 5812286.43 frames. ], batch size: 100, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:00:51,871 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:01:02,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=540340.0, ans=0.125 +2024-09-18 23:01:03,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=540340.0, ans=0.1 +2024-09-18 23:01:15,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=540380.0, ans=0.125 +2024-09-18 23:01:55,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.01 vs. limit=15.0 +2024-09-18 23:01:58,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.853e+01 8.547e+01 9.033e+01 9.737e+01 1.184e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-18 23:01:58,803 INFO [train.py:1198] (1/2) Epoch 30, batch 3900, loss[loss=0.2468, ctc_loss=0.1195, cr_loss=0.3558, attn_decoder_loss=0.253, over 29618.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1213, cr_loss=0.3631, attn_decoder_loss=0.2429, over 5816216.72 frames. ], batch size: 86, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:02:01,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.76 vs. limit=12.0 +2024-09-18 23:02:38,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.07 vs. limit=22.5 +2024-09-18 23:02:49,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=540620.0, ans=0.125 +2024-09-18 23:02:58,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=540660.0, ans=0.035 +2024-09-18 23:03:00,404 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-18 23:03:11,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=540700.0, ans=0.2 +2024-09-18 23:03:13,094 INFO [train.py:1198] (1/2) Epoch 30, batch 3950, loss[loss=0.2572, ctc_loss=0.1392, cr_loss=0.3869, attn_decoder_loss=0.2618, over 29502.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1213, cr_loss=0.3636, attn_decoder_loss=0.2429, over 5835671.59 frames. ], batch size: 97, lr: 3.67e-03, grad_scale: 8.0 +2024-09-18 23:03:16,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=540700.0, ans=0.0 +2024-09-18 23:04:10,698 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.44 vs. limit=15.0 +2024-09-18 23:04:21,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=540860.0, ans=0.0 +2024-09-18 23:04:23,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=540860.0, ans=0.0 +2024-09-18 23:04:27,383 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.475e+01 8.885e+01 9.495e+01 1.627e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-18 23:04:27,405 INFO [train.py:1198] (1/2) Epoch 30, batch 4000, loss[loss=0.2248, ctc_loss=0.1077, cr_loss=0.3457, attn_decoder_loss=0.2301, over 29502.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1213, cr_loss=0.3635, attn_decoder_loss=0.2429, over 5812032.08 frames. ], batch size: 74, lr: 3.67e-03, grad_scale: 16.0 +2024-09-18 23:04:43,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=540940.0, ans=0.0 +2024-09-18 23:05:05,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.96 vs. limit=15.0 +2024-09-18 23:05:29,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.59 vs. limit=15.0 +2024-09-18 23:05:44,350 INFO [train.py:1198] (1/2) Epoch 30, batch 4050, loss[loss=0.2605, ctc_loss=0.1558, cr_loss=0.3904, attn_decoder_loss=0.2635, over 20547.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1212, cr_loss=0.3628, attn_decoder_loss=0.2427, over 5796473.75 frames. ], batch size: 209, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:05:47,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=541100.0, ans=0.1 +2024-09-18 23:05:52,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.59 vs. limit=22.5 +2024-09-18 23:06:14,477 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.31 vs. limit=15.0 +2024-09-18 23:06:38,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=541220.0, ans=0.125 +2024-09-18 23:06:49,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=541260.0, ans=0.125 +2024-09-18 23:06:57,913 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.018e+01 8.748e+01 9.261e+01 9.930e+01 1.570e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-18 23:06:57,938 INFO [train.py:1198] (1/2) Epoch 30, batch 4100, loss[loss=0.2621, ctc_loss=0.1483, cr_loss=0.4434, attn_decoder_loss=0.2648, over 29490.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1217, cr_loss=0.3639, attn_decoder_loss=0.2434, over 5791995.30 frames. ], batch size: 90, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:07:04,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=541300.0, ans=0.125 +2024-09-18 23:07:20,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=541340.0, ans=0.0 +2024-09-18 23:07:25,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.44 vs. limit=15.0 +2024-09-18 23:07:32,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=541380.0, ans=0.2 +2024-09-18 23:07:38,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=541380.0, ans=0.125 +2024-09-18 23:07:44,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=541420.0, ans=0.2 +2024-09-18 23:08:01,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=541460.0, ans=0.125 +2024-09-18 23:08:02,602 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.93 vs. limit=10.0 +2024-09-18 23:08:11,984 INFO [train.py:1198] (1/2) Epoch 30, batch 4150, loss[loss=0.2305, ctc_loss=0.123, cr_loss=0.369, attn_decoder_loss=0.2342, over 29497.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1214, cr_loss=0.3636, attn_decoder_loss=0.243, over 5797565.11 frames. ], batch size: 77, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:08:21,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=541500.0, ans=0.125 +2024-09-18 23:08:50,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=541580.0, ans=0.125 +2024-09-18 23:09:06,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=541620.0, ans=0.125 +2024-09-18 23:09:11,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=541660.0, ans=0.125 +2024-09-18 23:09:24,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=541660.0, ans=0.125 +2024-09-18 23:09:27,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=541700.0, ans=0.0 +2024-09-18 23:09:28,238 INFO [train.py:1198] (1/2) Epoch 30, batch 4200, loss[loss=0.2622, ctc_loss=0.1383, cr_loss=0.3928, attn_decoder_loss=0.2673, over 29529.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1215, cr_loss=0.3642, attn_decoder_loss=0.2435, over 5799501.27 frames. ], batch size: 90, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:09:29,683 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.353e+01 8.390e+01 9.004e+01 9.409e+01 1.747e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-18 23:09:37,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys.whitening_limit, batch_count=541700.0, ans=6.0 +2024-09-18 23:09:44,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=541740.0, ans=0.125 +2024-09-18 23:09:52,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=541740.0, ans=0.125 +2024-09-18 23:09:55,696 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.08 vs. limit=15.0 +2024-09-18 23:10:20,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=541820.0, ans=0.125 +2024-09-18 23:10:33,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=541860.0, ans=0.125 +2024-09-18 23:10:34,757 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:10:41,862 INFO [train.py:1198] (1/2) Epoch 30, batch 4250, loss[loss=0.2157, ctc_loss=0.1016, cr_loss=0.3146, attn_decoder_loss=0.2213, over 29519.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1213, cr_loss=0.3639, attn_decoder_loss=0.2435, over 5806079.31 frames. ], batch size: 74, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:10:45,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=541900.0, ans=0.125 +2024-09-18 23:10:48,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=541900.0, ans=0.1 +2024-09-18 23:10:49,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=541900.0, ans=0.125 +2024-09-18 23:11:02,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=541940.0, ans=0.125 +2024-09-18 23:11:14,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=541980.0, ans=0.0 +2024-09-18 23:11:31,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=542020.0, ans=0.025 +2024-09-18 23:11:55,753 INFO [train.py:1198] (1/2) Epoch 30, batch 4300, loss[loss=0.2443, ctc_loss=0.1192, cr_loss=0.3481, attn_decoder_loss=0.2504, over 29516.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1212, cr_loss=0.3631, attn_decoder_loss=0.2435, over 5796045.31 frames. ], batch size: 87, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:11:57,300 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.934e+01 8.627e+01 9.132e+01 9.730e+01 6.693e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-18 23:12:59,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.37 vs. limit=15.0 +2024-09-18 23:13:11,894 INFO [train.py:1198] (1/2) Epoch 30, batch 4350, loss[loss=0.2588, ctc_loss=0.1393, cr_loss=0.4031, attn_decoder_loss=0.2631, over 29499.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1237, cr_loss=0.3686, attn_decoder_loss=0.2466, over 5799138.04 frames. ], batch size: 97, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:13:19,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=542300.0, ans=0.125 +2024-09-18 23:13:21,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=542300.0, ans=10.0 +2024-09-18 23:13:30,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.55 vs. limit=15.0 +2024-09-18 23:13:32,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=542340.0, ans=0.1 +2024-09-18 23:13:51,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=542380.0, ans=0.07 +2024-09-18 23:13:55,569 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.31 vs. limit=15.0 +2024-09-18 23:13:57,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=542420.0, ans=0.125 +2024-09-18 23:14:25,117 INFO [train.py:1198] (1/2) Epoch 30, batch 4400, loss[loss=0.2605, ctc_loss=0.1444, cr_loss=0.4148, attn_decoder_loss=0.2642, over 27566.00 frames. ], tot_loss[loss=0.2437, ctc_loss=0.1252, cr_loss=0.3715, attn_decoder_loss=0.2486, over 5768847.41 frames. ], batch size: 125, lr: 3.66e-03, grad_scale: 16.0 +2024-09-18 23:14:26,524 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.984e+01 8.805e+01 9.147e+01 9.646e+01 3.836e+02, threshold=1.829e+02, percent-clipped=2.0 +2024-09-18 23:14:26,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=542500.0, ans=10.0 +2024-09-18 23:14:39,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.10 vs. limit=15.0 +2024-09-18 23:14:40,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=542540.0, ans=0.0 +2024-09-18 23:14:41,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=542540.0, ans=0.125 +2024-09-18 23:14:47,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=542540.0, ans=0.125 +2024-09-18 23:14:47,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=542540.0, ans=0.0 +2024-09-18 23:15:29,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=542660.0, ans=0.125 +2024-09-18 23:15:34,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=542660.0, ans=0.2 +2024-09-18 23:15:37,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=542660.0, ans=0.125 +2024-09-18 23:15:38,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=542700.0, ans=0.0 +2024-09-18 23:15:39,909 INFO [train.py:1198] (1/2) Epoch 30, batch 4450, loss[loss=0.267, ctc_loss=0.1577, cr_loss=0.3925, attn_decoder_loss=0.2705, over 20783.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.1288, cr_loss=0.3759, attn_decoder_loss=0.2507, over 5574678.60 frames. ], batch size: 210, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:15:49,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=542700.0, ans=0.1 +2024-09-18 23:16:13,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=542780.0, ans=0.125 +2024-09-18 23:16:25,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=542820.0, ans=0.0 +2024-09-18 23:16:25,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=542820.0, ans=0.125 +2024-09-18 23:16:53,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=542860.0, ans=0.125 +2024-09-18 23:16:55,893 INFO [train.py:1198] (1/2) Epoch 30, batch 4500, loss[loss=0.2647, ctc_loss=0.1558, cr_loss=0.3846, attn_decoder_loss=0.2682, over 19400.00 frames. ], tot_loss[loss=0.2485, ctc_loss=0.1328, cr_loss=0.3787, attn_decoder_loss=0.253, over 5232821.14 frames. ], batch size: 209, lr: 3.66e-03, grad_scale: 8.0 +2024-09-18 23:16:58,808 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.613e+01 9.634e+01 1.118e+02 1.226e+02 1.647e+02, threshold=2.235e+02, percent-clipped=0.0 +2024-09-18 23:16:59,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=542900.0, ans=0.0 +2024-09-18 23:17:09,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=542940.0, ans=0.2 +2024-09-18 23:17:24,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=542980.0, ans=0.0 +2024-09-18 23:17:27,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.24 vs. limit=15.0 +2024-09-18 23:18:19,541 INFO [train.py:1198] (1/2) Epoch 31, batch 0, loss[loss=0.2199, ctc_loss=0.1122, cr_loss=0.3369, attn_decoder_loss=0.2244, over 29596.00 frames. ], tot_loss[loss=0.2199, ctc_loss=0.1122, cr_loss=0.3369, attn_decoder_loss=0.2244, over 29596.00 frames. ], batch size: 73, lr: 3.60e-03, grad_scale: 16.0 +2024-09-18 23:18:19,541 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-18 23:18:37,942 INFO [train.py:1230] (1/2) Epoch 31, validation: loss=0.2119, ctc_loss=0.03668, cr_loss=5.946e-15, attn_decoder_loss=0.2314, over 944034.00 frames. +2024-09-18 23:18:37,942 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-18 23:18:39,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=543000.0, ans=0.0 +2024-09-18 23:19:01,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.53 vs. limit=22.5 +2024-09-18 23:19:08,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=543080.0, ans=0.0 +2024-09-18 23:19:09,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.24 vs. limit=15.0 +2024-09-18 23:19:25,218 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:19:30,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.29 vs. limit=15.0 +2024-09-18 23:19:55,995 INFO [train.py:1198] (1/2) Epoch 31, batch 50, loss[loss=0.2077, ctc_loss=0.09466, cr_loss=0.3162, attn_decoder_loss=0.2132, over 29436.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1209, cr_loss=0.359, attn_decoder_loss=0.2425, over 1267607.03 frames. ], batch size: 70, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:19:56,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=543200.0, ans=0.0 +2024-09-18 23:20:10,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=543240.0, ans=0.125 +2024-09-18 23:20:38,778 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.848e+01 9.634e+01 1.110e+02 1.417e+02, threshold=1.927e+02, percent-clipped=0.0 +2024-09-18 23:20:54,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=543320.0, ans=0.125 +2024-09-18 23:21:02,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=543360.0, ans=0.125 +2024-09-18 23:21:14,572 INFO [train.py:1198] (1/2) Epoch 31, batch 100, loss[loss=0.2406, ctc_loss=0.1217, cr_loss=0.3649, attn_decoder_loss=0.2457, over 29511.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1226, cr_loss=0.3641, attn_decoder_loss=0.2448, over 2251260.62 frames. ], batch size: 76, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:21:34,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=543440.0, ans=0.125 +2024-09-18 23:21:45,274 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.04 vs. limit=22.5 +2024-09-18 23:21:47,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=543480.0, ans=0.1 +2024-09-18 23:21:49,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.87 vs. limit=15.0 +2024-09-18 23:21:52,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=543480.0, ans=0.1 +2024-09-18 23:22:01,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=543520.0, ans=0.1 +2024-09-18 23:22:17,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=543560.0, ans=0.125 +2024-09-18 23:22:29,482 INFO [train.py:1198] (1/2) Epoch 31, batch 150, loss[loss=0.2035, ctc_loss=0.09444, cr_loss=0.2961, attn_decoder_loss=0.209, over 29435.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1203, cr_loss=0.3602, attn_decoder_loss=0.2427, over 3046153.14 frames. ], batch size: 70, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:22:45,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=543640.0, ans=0.1 +2024-09-18 23:23:11,521 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.390e+01 8.450e+01 8.920e+01 9.351e+01 1.507e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-18 23:23:47,251 INFO [train.py:1198] (1/2) Epoch 31, batch 200, loss[loss=0.2486, ctc_loss=0.1277, cr_loss=0.3911, attn_decoder_loss=0.2534, over 27307.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1206, cr_loss=0.3622, attn_decoder_loss=0.2426, over 3658832.86 frames. ], batch size: 124, lr: 3.60e-03, grad_scale: 8.0 +2024-09-18 23:23:58,196 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:23:58,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=543800.0, ans=15.0 +2024-09-18 23:24:05,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=543840.0, ans=0.1 +2024-09-18 23:24:07,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.19 vs. limit=12.0 +2024-09-18 23:24:18,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.04 vs. limit=15.0 +2024-09-18 23:24:23,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=543880.0, ans=0.125 +2024-09-18 23:24:24,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.10 vs. limit=15.0 +2024-09-18 23:24:29,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=543880.0, ans=0.025 +2024-09-18 23:24:54,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=543960.0, ans=0.07 +2024-09-18 23:25:02,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=543960.0, ans=0.125 +2024-09-18 23:25:13,221 INFO [train.py:1198] (1/2) Epoch 31, batch 250, loss[loss=0.2537, ctc_loss=0.1268, cr_loss=0.3657, attn_decoder_loss=0.2596, over 29164.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1198, cr_loss=0.3608, attn_decoder_loss=0.2424, over 4141136.80 frames. ], batch size: 100, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:25:55,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.24 vs. limit=6.0 +2024-09-18 23:25:55,611 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.468e+01 8.439e+01 8.894e+01 9.430e+01 6.449e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-18 23:26:00,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=544120.0, ans=0.2 +2024-09-18 23:26:07,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.38 vs. limit=22.5 +2024-09-18 23:26:12,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=544160.0, ans=0.0 +2024-09-18 23:26:25,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.10 vs. limit=10.0 +2024-09-18 23:26:28,632 INFO [train.py:1198] (1/2) Epoch 31, batch 300, loss[loss=0.2504, ctc_loss=0.1354, cr_loss=0.3969, attn_decoder_loss=0.2543, over 29528.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1202, cr_loss=0.3615, attn_decoder_loss=0.2425, over 4510455.63 frames. ], batch size: 92, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:26:43,078 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.30 vs. limit=15.0 +2024-09-18 23:26:50,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=544240.0, ans=0.125 +2024-09-18 23:27:02,499 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:27:34,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=544360.0, ans=0.07 +2024-09-18 23:27:35,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=544360.0, ans=0.125 +2024-09-18 23:27:40,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.06 vs. limit=10.0 +2024-09-18 23:27:46,727 INFO [train.py:1198] (1/2) Epoch 31, batch 350, loss[loss=0.2212, ctc_loss=0.1115, cr_loss=0.3434, attn_decoder_loss=0.2258, over 29343.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1208, cr_loss=0.3627, attn_decoder_loss=0.2432, over 4796482.41 frames. ], batch size: 71, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:28:06,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=544440.0, ans=0.125 +2024-09-18 23:28:12,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=544440.0, ans=0.05 +2024-09-18 23:28:24,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=544480.0, ans=0.125 +2024-09-18 23:28:28,465 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.589e+01 8.389e+01 8.860e+01 9.607e+01 2.348e+02, threshold=1.772e+02, percent-clipped=3.0 +2024-09-18 23:28:31,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=544520.0, ans=0.125 +2024-09-18 23:28:33,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=544520.0, ans=0.125 +2024-09-18 23:28:37,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=544520.0, ans=0.125 +2024-09-18 23:29:01,566 INFO [train.py:1198] (1/2) Epoch 31, batch 400, loss[loss=0.2391, ctc_loss=0.1168, cr_loss=0.3782, attn_decoder_loss=0.2443, over 29703.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1204, cr_loss=0.3622, attn_decoder_loss=0.2429, over 5024366.91 frames. ], batch size: 82, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:29:10,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=544600.0, ans=0.125 +2024-09-18 23:29:17,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=544640.0, ans=0.125 +2024-09-18 23:29:23,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=544640.0, ans=0.125 +2024-09-18 23:29:27,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=544640.0, ans=0.025 +2024-09-18 23:30:03,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.71 vs. limit=6.0 +2024-09-18 23:30:20,319 INFO [train.py:1198] (1/2) Epoch 31, batch 450, loss[loss=0.2502, ctc_loss=0.1356, cr_loss=0.4, attn_decoder_loss=0.254, over 29685.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1204, cr_loss=0.3621, attn_decoder_loss=0.2428, over 5187453.06 frames. ], batch size: 83, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:30:20,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=544800.0, ans=0.2 +2024-09-18 23:30:20,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=544800.0, ans=0.125 +2024-09-18 23:30:31,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=544800.0, ans=0.0 +2024-09-18 23:30:49,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=544880.0, ans=0.025 +2024-09-18 23:30:54,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=544880.0, ans=0.2 +2024-09-18 23:31:03,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=544880.0, ans=0.1 +2024-09-18 23:31:04,391 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.382e+01 8.525e+01 8.811e+01 9.438e+01 1.510e+02, threshold=1.762e+02, percent-clipped=0.0 +2024-09-18 23:31:09,582 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:31:12,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=544920.0, ans=0.0 +2024-09-18 23:31:13,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=544920.0, ans=0.125 +2024-09-18 23:31:24,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.63 vs. limit=15.0 +2024-09-18 23:31:29,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.44 vs. limit=22.5 +2024-09-18 23:31:37,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=545000.0, ans=0.07 +2024-09-18 23:31:38,404 INFO [train.py:1198] (1/2) Epoch 31, batch 500, loss[loss=0.2553, ctc_loss=0.1268, cr_loss=0.3777, attn_decoder_loss=0.2612, over 29440.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1204, cr_loss=0.3619, attn_decoder_loss=0.2422, over 5330420.76 frames. ], batch size: 94, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:31:40,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=545000.0, ans=0.0 +2024-09-18 23:31:44,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=545000.0, ans=0.125 +2024-09-18 23:31:48,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=545000.0, ans=0.07 +2024-09-18 23:31:52,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=545040.0, ans=0.025 +2024-09-18 23:32:13,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=545080.0, ans=0.0 +2024-09-18 23:32:28,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=545120.0, ans=0.1 +2024-09-18 23:32:36,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=545120.0, ans=0.0 +2024-09-18 23:32:39,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=545160.0, ans=0.025 +2024-09-18 23:32:54,163 INFO [train.py:1198] (1/2) Epoch 31, batch 550, loss[loss=0.2455, ctc_loss=0.1175, cr_loss=0.3564, attn_decoder_loss=0.2519, over 28793.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1203, cr_loss=0.3615, attn_decoder_loss=0.2421, over 5421577.81 frames. ], batch size: 104, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:32:59,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=545200.0, ans=0.025 +2024-09-18 23:32:59,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=545200.0, ans=0.125 +2024-09-18 23:33:20,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.33 vs. limit=15.0 +2024-09-18 23:33:40,331 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.523e+01 8.948e+01 9.609e+01 1.463e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-18 23:34:12,434 INFO [train.py:1198] (1/2) Epoch 31, batch 600, loss[loss=0.2407, ctc_loss=0.122, cr_loss=0.3666, attn_decoder_loss=0.2458, over 29264.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1206, cr_loss=0.3621, attn_decoder_loss=0.2425, over 5509534.27 frames. ], batch size: 100, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:34:14,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=545400.0, ans=0.0 +2024-09-18 23:34:23,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.59 vs. limit=15.0 +2024-09-18 23:34:41,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=545480.0, ans=0.125 +2024-09-18 23:34:41,783 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-18 23:34:49,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.61 vs. limit=10.0 +2024-09-18 23:35:03,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=545520.0, ans=0.125 +2024-09-18 23:35:08,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=545520.0, ans=0.0 +2024-09-18 23:35:08,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.36 vs. limit=22.5 +2024-09-18 23:35:09,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=545520.0, ans=0.0 +2024-09-18 23:35:30,102 INFO [train.py:1198] (1/2) Epoch 31, batch 650, loss[loss=0.2403, ctc_loss=0.1193, cr_loss=0.3699, attn_decoder_loss=0.2455, over 29766.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1197, cr_loss=0.36, attn_decoder_loss=0.2418, over 5586392.53 frames. ], batch size: 81, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:35:31,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=545600.0, ans=0.0 +2024-09-18 23:35:39,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=545600.0, ans=0.125 +2024-09-18 23:35:49,173 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.51 vs. limit=12.0 +2024-09-18 23:35:56,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=545640.0, ans=0.0 +2024-09-18 23:35:57,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=545640.0, ans=0.125 +2024-09-18 23:35:59,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.79 vs. limit=22.5 +2024-09-18 23:36:14,176 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.324e+01 8.831e+01 9.249e+01 1.386e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-18 23:36:17,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=545720.0, ans=0.125 +2024-09-18 23:36:31,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=545760.0, ans=0.125 +2024-09-18 23:36:38,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=545760.0, ans=0.2 +2024-09-18 23:36:46,083 INFO [train.py:1198] (1/2) Epoch 31, batch 700, loss[loss=0.2191, ctc_loss=0.1132, cr_loss=0.3502, attn_decoder_loss=0.2231, over 29530.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1199, cr_loss=0.3607, attn_decoder_loss=0.2423, over 5637119.85 frames. ], batch size: 76, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:36:50,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=545800.0, ans=0.125 +2024-09-18 23:37:10,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=545840.0, ans=0.125 +2024-09-18 23:37:21,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=545880.0, ans=0.125 +2024-09-18 23:37:38,038 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.97 vs. limit=12.0 +2024-09-18 23:37:40,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=545920.0, ans=0.1 +2024-09-18 23:37:47,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=545960.0, ans=0.125 +2024-09-18 23:37:52,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=545960.0, ans=0.1 +2024-09-18 23:37:55,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=545960.0, ans=0.125 +2024-09-18 23:38:00,079 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:38:04,175 INFO [train.py:1198] (1/2) Epoch 31, batch 750, loss[loss=0.2357, ctc_loss=0.1166, cr_loss=0.3697, attn_decoder_loss=0.2407, over 29726.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1197, cr_loss=0.3606, attn_decoder_loss=0.2418, over 5675903.51 frames. ], batch size: 82, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:38:12,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=546000.0, ans=0.0 +2024-09-18 23:38:30,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=546040.0, ans=0.125 +2024-09-18 23:38:47,925 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.065e+01 8.604e+01 9.058e+01 9.496e+01 1.707e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-18 23:38:51,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.12 vs. limit=15.0 +2024-09-18 23:39:19,499 INFO [train.py:1198] (1/2) Epoch 31, batch 800, loss[loss=0.2153, ctc_loss=0.1062, cr_loss=0.334, attn_decoder_loss=0.22, over 29638.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1201, cr_loss=0.3612, attn_decoder_loss=0.2419, over 5706404.70 frames. ], batch size: 73, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:39:58,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=546280.0, ans=0.2 +2024-09-18 23:39:58,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=546280.0, ans=0.125 +2024-09-18 23:40:17,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.92 vs. limit=10.0 +2024-09-18 23:40:33,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=546360.0, ans=0.125 +2024-09-18 23:40:34,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=546360.0, ans=0.0 +2024-09-18 23:40:37,683 INFO [train.py:1198] (1/2) Epoch 31, batch 850, loss[loss=0.2521, ctc_loss=0.127, cr_loss=0.398, attn_decoder_loss=0.2571, over 29703.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1203, cr_loss=0.3622, attn_decoder_loss=0.242, over 5734810.02 frames. ], batch size: 89, lr: 3.59e-03, grad_scale: 16.0 +2024-09-18 23:40:51,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=546440.0, ans=0.125 +2024-09-18 23:40:54,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=546440.0, ans=0.2 +2024-09-18 23:41:19,510 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:41:20,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=546480.0, ans=0.125 +2024-09-18 23:41:20,948 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:41:23,564 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.645e+01 9.090e+01 9.691e+01 3.180e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-18 23:41:46,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=546560.0, ans=0.0 +2024-09-18 23:41:51,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=546560.0, ans=0.125 +2024-09-18 23:41:54,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=546600.0, ans=0.125 +2024-09-18 23:41:55,485 INFO [train.py:1198] (1/2) Epoch 31, batch 900, loss[loss=0.219, ctc_loss=0.1065, cr_loss=0.3387, attn_decoder_loss=0.2239, over 29632.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1204, cr_loss=0.3623, attn_decoder_loss=0.2421, over 5740566.76 frames. ], batch size: 73, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:42:09,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=546640.0, ans=0.1 +2024-09-18 23:42:16,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=546640.0, ans=0.0 +2024-09-18 23:42:30,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=546680.0, ans=0.125 +2024-09-18 23:42:46,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=546720.0, ans=0.025 +2024-09-18 23:42:52,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.39 vs. limit=6.0 +2024-09-18 23:43:03,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=546760.0, ans=0.125 +2024-09-18 23:43:06,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=546760.0, ans=0.125 +2024-09-18 23:43:10,543 INFO [train.py:1198] (1/2) Epoch 31, batch 950, loss[loss=0.2222, ctc_loss=0.1047, cr_loss=0.3477, attn_decoder_loss=0.2276, over 29519.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1203, cr_loss=0.3622, attn_decoder_loss=0.2423, over 5740904.85 frames. ], batch size: 74, lr: 3.59e-03, grad_scale: 8.0 +2024-09-18 23:43:23,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=546800.0, ans=0.125 +2024-09-18 23:43:28,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.29 vs. limit=6.0 +2024-09-18 23:43:31,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=546840.0, ans=0.0 +2024-09-18 23:43:58,298 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.530e+01 9.181e+01 9.954e+01 1.509e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-18 23:44:06,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=546920.0, ans=0.125 +2024-09-18 23:44:08,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-18 23:44:28,410 INFO [train.py:1198] (1/2) Epoch 31, batch 1000, loss[loss=0.2159, ctc_loss=0.0976, cr_loss=0.3223, attn_decoder_loss=0.2219, over 29462.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1209, cr_loss=0.3634, attn_decoder_loss=0.243, over 5735285.74 frames. ], batch size: 77, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:44:47,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=547040.0, ans=0.125 +2024-09-18 23:44:54,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=547040.0, ans=0.125 +2024-09-18 23:44:54,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=547040.0, ans=0.125 +2024-09-18 23:45:06,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=547080.0, ans=0.95 +2024-09-18 23:45:33,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=547160.0, ans=0.125 +2024-09-18 23:45:38,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=547160.0, ans=0.0 +2024-09-18 23:45:47,376 INFO [train.py:1198] (1/2) Epoch 31, batch 1050, loss[loss=0.2515, ctc_loss=0.1276, cr_loss=0.3841, attn_decoder_loss=0.2567, over 29677.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1205, cr_loss=0.3623, attn_decoder_loss=0.2422, over 5742621.31 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:46:01,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=547240.0, ans=0.125 +2024-09-18 23:46:10,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=547240.0, ans=0.2 +2024-09-18 23:46:21,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=547280.0, ans=10.0 +2024-09-18 23:46:27,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=547280.0, ans=0.125 +2024-09-18 23:46:33,063 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.494e+01 8.412e+01 9.049e+01 9.703e+01 1.961e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-18 23:46:58,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=547360.0, ans=0.125 +2024-09-18 23:47:03,203 INFO [train.py:1198] (1/2) Epoch 31, batch 1100, loss[loss=0.2246, ctc_loss=0.1026, cr_loss=0.3225, attn_decoder_loss=0.231, over 29434.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1207, cr_loss=0.363, attn_decoder_loss=0.2424, over 5755402.92 frames. ], batch size: 78, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:47:32,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=547440.0, ans=0.0 +2024-09-18 23:47:33,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.11 vs. limit=15.0 +2024-09-18 23:47:52,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=547520.0, ans=0.125 +2024-09-18 23:48:01,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.95 vs. limit=22.5 +2024-09-18 23:48:03,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=547520.0, ans=0.125 +2024-09-18 23:48:13,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=547560.0, ans=0.0 +2024-09-18 23:48:21,593 INFO [train.py:1198] (1/2) Epoch 31, batch 1150, loss[loss=0.2401, ctc_loss=0.128, cr_loss=0.3827, attn_decoder_loss=0.244, over 29459.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1209, cr_loss=0.3637, attn_decoder_loss=0.2427, over 5752489.98 frames. ], batch size: 78, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:48:37,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.58 vs. limit=15.0 +2024-09-18 23:48:38,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=547640.0, ans=0.025 +2024-09-18 23:48:39,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=547640.0, ans=0.0 +2024-09-18 23:48:46,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=547640.0, ans=15.0 +2024-09-18 23:49:00,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=547680.0, ans=0.0 +2024-09-18 23:49:00,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=547680.0, ans=0.125 +2024-09-18 23:49:02,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=547680.0, ans=0.0 +2024-09-18 23:49:09,382 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.218e+01 8.554e+01 9.013e+01 9.585e+01 3.112e+02, threshold=1.803e+02, percent-clipped=2.0 +2024-09-18 23:49:39,561 INFO [train.py:1198] (1/2) Epoch 31, batch 1200, loss[loss=0.2459, ctc_loss=0.1267, cr_loss=0.3544, attn_decoder_loss=0.2513, over 29662.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1211, cr_loss=0.3632, attn_decoder_loss=0.243, over 5745801.09 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:50:05,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=547840.0, ans=0.125 +2024-09-18 23:50:58,373 INFO [train.py:1198] (1/2) Epoch 31, batch 1250, loss[loss=0.2573, ctc_loss=0.1399, cr_loss=0.4022, attn_decoder_loss=0.2614, over 29524.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1217, cr_loss=0.3649, attn_decoder_loss=0.2438, over 5773911.71 frames. ], batch size: 92, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:51:07,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=548000.0, ans=0.125 +2024-09-18 23:51:08,440 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.34 vs. limit=6.0 +2024-09-18 23:51:16,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=548040.0, ans=0.1 +2024-09-18 23:51:43,986 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.542e+01 8.406e+01 8.800e+01 9.095e+01 1.339e+02, threshold=1.760e+02, percent-clipped=0.0 +2024-09-18 23:52:02,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=548160.0, ans=0.025 +2024-09-18 23:52:03,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=15.0 +2024-09-18 23:52:07,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=548160.0, ans=0.125 +2024-09-18 23:52:14,291 INFO [train.py:1198] (1/2) Epoch 31, batch 1300, loss[loss=0.2493, ctc_loss=0.1268, cr_loss=0.3728, attn_decoder_loss=0.2546, over 28593.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1213, cr_loss=0.3643, attn_decoder_loss=0.2431, over 5779222.08 frames. ], batch size: 112, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:52:37,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=548240.0, ans=0.125 +2024-09-18 23:52:39,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=548240.0, ans=0.125 +2024-09-18 23:53:03,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=548320.0, ans=0.125 +2024-09-18 23:53:03,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=548320.0, ans=0.025 +2024-09-18 23:53:13,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=548320.0, ans=0.04949747468305833 +2024-09-18 23:53:26,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=548360.0, ans=0.125 +2024-09-18 23:53:32,537 INFO [train.py:1198] (1/2) Epoch 31, batch 1350, loss[loss=0.2478, ctc_loss=0.1282, cr_loss=0.3697, attn_decoder_loss=0.2529, over 29757.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.121, cr_loss=0.3637, attn_decoder_loss=0.2428, over 5798074.50 frames. ], batch size: 81, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:53:54,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.04 vs. limit=6.0 +2024-09-18 23:54:01,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=548480.0, ans=0.0 +2024-09-18 23:54:08,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=548480.0, ans=0.0 +2024-09-18 23:54:17,460 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.480e+01 8.970e+01 9.556e+01 1.739e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-18 23:54:26,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=548520.0, ans=0.125 +2024-09-18 23:54:35,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=548560.0, ans=0.125 +2024-09-18 23:54:41,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=548560.0, ans=0.2 +2024-09-18 23:54:47,704 INFO [train.py:1198] (1/2) Epoch 31, batch 1400, loss[loss=0.2059, ctc_loss=0.09306, cr_loss=0.3047, attn_decoder_loss=0.2117, over 29531.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1204, cr_loss=0.3626, attn_decoder_loss=0.2425, over 5808726.72 frames. ], batch size: 69, lr: 3.58e-03, grad_scale: 16.0 +2024-09-18 23:54:49,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=548600.0, ans=0.125 +2024-09-18 23:55:00,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=548600.0, ans=0.07 +2024-09-18 23:55:25,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.14 vs. limit=15.0 +2024-09-18 23:55:33,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=548720.0, ans=0.1 +2024-09-18 23:55:35,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=548720.0, ans=0.1 +2024-09-18 23:55:42,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=548720.0, ans=0.0 +2024-09-18 23:56:05,888 INFO [train.py:1198] (1/2) Epoch 31, batch 1450, loss[loss=0.2481, ctc_loss=0.1263, cr_loss=0.3771, attn_decoder_loss=0.2532, over 29425.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1207, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5804639.35 frames. ], batch size: 94, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:56:09,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=5.43 vs. limit=12.0 +2024-09-18 23:56:22,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=548840.0, ans=0.1 +2024-09-18 23:56:51,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=548920.0, ans=0.0 +2024-09-18 23:56:52,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.514e+01 9.000e+01 9.465e+01 1.182e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-18 23:56:54,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=548920.0, ans=0.125 +2024-09-18 23:56:59,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=548920.0, ans=0.0 +2024-09-18 23:57:03,989 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-18 23:57:12,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=548960.0, ans=0.2 +2024-09-18 23:57:12,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=548960.0, ans=0.125 +2024-09-18 23:57:23,233 INFO [train.py:1198] (1/2) Epoch 31, batch 1500, loss[loss=0.2464, ctc_loss=0.1213, cr_loss=0.3772, attn_decoder_loss=0.2519, over 29654.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1209, cr_loss=0.3636, attn_decoder_loss=0.2434, over 5806351.89 frames. ], batch size: 86, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:57:32,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=549000.0, ans=0.125 +2024-09-18 23:57:50,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=549040.0, ans=0.125 +2024-09-18 23:57:53,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=549080.0, ans=0.125 +2024-09-18 23:58:27,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=549160.0, ans=0.125 +2024-09-18 23:58:33,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=549160.0, ans=0.1 +2024-09-18 23:58:41,412 INFO [train.py:1198] (1/2) Epoch 31, batch 1550, loss[loss=0.2434, ctc_loss=0.1204, cr_loss=0.3786, attn_decoder_loss=0.2487, over 29510.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.121, cr_loss=0.3634, attn_decoder_loss=0.2432, over 5781988.83 frames. ], batch size: 90, lr: 3.58e-03, grad_scale: 8.0 +2024-09-18 23:58:50,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=549200.0, ans=0.2 +2024-09-18 23:58:55,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=549240.0, ans=0.025 +2024-09-18 23:59:13,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=549280.0, ans=0.125 +2024-09-18 23:59:23,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=549280.0, ans=0.025 +2024-09-18 23:59:25,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=549320.0, ans=0.0 +2024-09-18 23:59:27,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.398e+01 8.813e+01 9.564e+01 2.152e+02, threshold=1.763e+02, percent-clipped=1.0 +2024-09-18 23:59:28,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=549320.0, ans=0.125 +2024-09-18 23:59:36,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=549320.0, ans=0.125 +2024-09-18 23:59:39,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=549320.0, ans=0.125 +2024-09-18 23:59:54,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=549360.0, ans=0.125 +2024-09-18 23:59:56,779 INFO [train.py:1198] (1/2) Epoch 31, batch 1600, loss[loss=0.2444, ctc_loss=0.1211, cr_loss=0.3529, attn_decoder_loss=0.2502, over 29685.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1212, cr_loss=0.363, attn_decoder_loss=0.243, over 5763204.96 frames. ], batch size: 85, lr: 3.58e-03, grad_scale: 16.0 +2024-09-19 00:00:04,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=549400.0, ans=0.0 +2024-09-19 00:00:27,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=549480.0, ans=0.125 +2024-09-19 00:00:33,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=549480.0, ans=0.125 +2024-09-19 00:01:14,871 INFO [train.py:1198] (1/2) Epoch 31, batch 1650, loss[loss=0.2462, ctc_loss=0.1232, cr_loss=0.3522, attn_decoder_loss=0.2521, over 29683.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1208, cr_loss=0.3625, attn_decoder_loss=0.2429, over 5757998.76 frames. ], batch size: 89, lr: 3.58e-03, grad_scale: 8.0 +2024-09-19 00:01:16,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=549600.0, ans=0.0 +2024-09-19 00:01:19,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=549600.0, ans=0.0 +2024-09-19 00:01:20,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.50 vs. limit=6.0 +2024-09-19 00:01:25,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=549600.0, ans=0.1 +2024-09-19 00:02:03,353 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.401e+01 8.981e+01 9.648e+01 1.683e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 00:02:32,504 INFO [train.py:1198] (1/2) Epoch 31, batch 1700, loss[loss=0.2119, ctc_loss=0.1006, cr_loss=0.3267, attn_decoder_loss=0.217, over 29557.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1204, cr_loss=0.3618, attn_decoder_loss=0.2427, over 5778542.75 frames. ], batch size: 69, lr: 3.58e-03, grad_scale: 8.0 +2024-09-19 00:02:32,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=549800.0, ans=0.0 +2024-09-19 00:02:35,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=549800.0, ans=0.2 +2024-09-19 00:02:43,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=549800.0, ans=0.025 +2024-09-19 00:02:57,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=549840.0, ans=0.04949747468305833 +2024-09-19 00:03:01,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=549880.0, ans=0.1 +2024-09-19 00:03:05,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=549880.0, ans=0.1 +2024-09-19 00:03:05,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.75 vs. limit=15.0 +2024-09-19 00:03:18,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=549920.0, ans=0.125 +2024-09-19 00:03:48,475 INFO [train.py:1198] (1/2) Epoch 31, batch 1750, loss[loss=0.2113, ctc_loss=0.105, cr_loss=0.3264, attn_decoder_loss=0.2159, over 29297.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1203, cr_loss=0.3617, attn_decoder_loss=0.2425, over 5786861.90 frames. ], batch size: 67, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:04:36,768 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.248e+01 8.450e+01 9.086e+01 9.663e+01 1.697e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 00:04:44,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=550120.0, ans=0.0 +2024-09-19 00:04:45,127 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.91 vs. limit=15.0 +2024-09-19 00:05:05,971 INFO [train.py:1198] (1/2) Epoch 31, batch 1800, loss[loss=0.2465, ctc_loss=0.1285, cr_loss=0.381, attn_decoder_loss=0.2511, over 29693.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1205, cr_loss=0.3625, attn_decoder_loss=0.2429, over 5790043.50 frames. ], batch size: 83, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:05:12,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=550200.0, ans=0.1 +2024-09-19 00:05:12,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=550200.0, ans=0.125 +2024-09-19 00:05:27,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=550240.0, ans=0.125 +2024-09-19 00:05:38,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.40 vs. limit=15.0 +2024-09-19 00:06:23,827 INFO [train.py:1198] (1/2) Epoch 31, batch 1850, loss[loss=0.2523, ctc_loss=0.1295, cr_loss=0.3857, attn_decoder_loss=0.2573, over 29648.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3625, attn_decoder_loss=0.2427, over 5796830.99 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 4.0 +2024-09-19 00:06:27,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=550400.0, ans=0.125 +2024-09-19 00:06:29,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.40 vs. limit=15.0 +2024-09-19 00:06:45,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=550440.0, ans=0.1 +2024-09-19 00:07:14,094 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.638e+01 9.110e+01 9.627e+01 2.703e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 00:07:14,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=550520.0, ans=0.1 +2024-09-19 00:07:23,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=550560.0, ans=0.0 +2024-09-19 00:07:31,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=550560.0, ans=0.0 +2024-09-19 00:07:37,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.58 vs. limit=22.5 +2024-09-19 00:07:39,697 INFO [train.py:1198] (1/2) Epoch 31, batch 1900, loss[loss=0.2388, ctc_loss=0.1092, cr_loss=0.3406, attn_decoder_loss=0.2456, over 29709.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1206, cr_loss=0.3634, attn_decoder_loss=0.2433, over 5803972.65 frames. ], batch size: 89, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:07:53,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=550640.0, ans=0.1 +2024-09-19 00:08:13,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=550680.0, ans=0.125 +2024-09-19 00:08:39,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=550760.0, ans=0.0 +2024-09-19 00:08:51,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=550760.0, ans=0.125 +2024-09-19 00:08:57,912 INFO [train.py:1198] (1/2) Epoch 31, batch 1950, loss[loss=0.2362, ctc_loss=0.1201, cr_loss=0.3676, attn_decoder_loss=0.2409, over 29472.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1211, cr_loss=0.365, attn_decoder_loss=0.2443, over 5818511.47 frames. ], batch size: 78, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:08:59,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=550800.0, ans=0.125 +2024-09-19 00:09:08,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=550800.0, ans=0.0 +2024-09-19 00:09:47,673 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.604e+01 9.078e+01 9.873e+01 2.917e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 00:09:50,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.97 vs. limit=15.0 +2024-09-19 00:10:15,517 INFO [train.py:1198] (1/2) Epoch 31, batch 2000, loss[loss=0.2199, ctc_loss=0.1112, cr_loss=0.3599, attn_decoder_loss=0.224, over 29342.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1217, cr_loss=0.3656, attn_decoder_loss=0.2446, over 5797133.96 frames. ], batch size: 67, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:10:28,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=551000.0, ans=0.07 +2024-09-19 00:10:28,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=551000.0, ans=0.1 +2024-09-19 00:10:32,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=551040.0, ans=0.125 +2024-09-19 00:10:50,786 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:10:59,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=551120.0, ans=0.0 +2024-09-19 00:11:08,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=551120.0, ans=0.0 +2024-09-19 00:11:19,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=551160.0, ans=0.1 +2024-09-19 00:11:20,821 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:11:31,484 INFO [train.py:1198] (1/2) Epoch 31, batch 2050, loss[loss=0.2126, ctc_loss=0.1044, cr_loss=0.3235, attn_decoder_loss=0.2174, over 29408.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.121, cr_loss=0.3635, attn_decoder_loss=0.2434, over 5788128.30 frames. ], batch size: 70, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:11:43,202 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.61 vs. limit=6.0 +2024-09-19 00:11:44,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.68 vs. limit=22.5 +2024-09-19 00:11:50,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=551240.0, ans=0.125 +2024-09-19 00:12:00,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=551280.0, ans=0.05 +2024-09-19 00:12:04,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.93 vs. limit=12.0 +2024-09-19 00:12:14,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=551280.0, ans=0.0 +2024-09-19 00:12:18,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=551320.0, ans=10.0 +2024-09-19 00:12:18,695 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:12:21,280 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.718e+01 8.532e+01 9.006e+01 9.562e+01 1.976e+02, threshold=1.801e+02, percent-clipped=1.0 +2024-09-19 00:12:33,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=551360.0, ans=0.125 +2024-09-19 00:12:33,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=551360.0, ans=10.0 +2024-09-19 00:12:37,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.45 vs. limit=15.0 +2024-09-19 00:12:49,617 INFO [train.py:1198] (1/2) Epoch 31, batch 2100, loss[loss=0.2306, ctc_loss=0.1084, cr_loss=0.3333, attn_decoder_loss=0.2367, over 29775.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3626, attn_decoder_loss=0.2427, over 5800837.14 frames. ], batch size: 81, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:12:49,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=551400.0, ans=0.025 +2024-09-19 00:12:55,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=551400.0, ans=0.125 +2024-09-19 00:13:01,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=551400.0, ans=0.2 +2024-09-19 00:13:32,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.02 vs. limit=22.5 +2024-09-19 00:13:33,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=551520.0, ans=0.2 +2024-09-19 00:13:47,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.55 vs. limit=15.0 +2024-09-19 00:13:52,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=551560.0, ans=6.0 +2024-09-19 00:14:07,015 INFO [train.py:1198] (1/2) Epoch 31, batch 2150, loss[loss=0.2332, ctc_loss=0.1154, cr_loss=0.3604, attn_decoder_loss=0.2383, over 29455.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1196, cr_loss=0.3613, attn_decoder_loss=0.2421, over 5815915.80 frames. ], batch size: 78, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:14:16,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=551600.0, ans=0.125 +2024-09-19 00:14:21,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=551640.0, ans=0.125 +2024-09-19 00:14:40,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=551680.0, ans=0.125 +2024-09-19 00:14:43,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.11 vs. limit=15.0 +2024-09-19 00:14:49,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=551680.0, ans=0.2 +2024-09-19 00:14:57,054 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.507e+01 8.560e+01 8.969e+01 9.441e+01 3.216e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 00:15:23,015 INFO [train.py:1198] (1/2) Epoch 31, batch 2200, loss[loss=0.2526, ctc_loss=0.1283, cr_loss=0.3794, attn_decoder_loss=0.2579, over 29655.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.12, cr_loss=0.362, attn_decoder_loss=0.2424, over 5812101.83 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:15:35,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=551800.0, ans=0.125 +2024-09-19 00:15:44,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=551840.0, ans=0.125 +2024-09-19 00:15:49,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff2.min_abs, batch_count=551840.0, ans=0.1 +2024-09-19 00:16:10,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=551920.0, ans=0.0 +2024-09-19 00:16:29,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=551960.0, ans=0.1 +2024-09-19 00:16:32,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=551960.0, ans=0.125 +2024-09-19 00:16:34,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=551960.0, ans=0.125 +2024-09-19 00:16:39,383 INFO [train.py:1198] (1/2) Epoch 31, batch 2250, loss[loss=0.2406, ctc_loss=0.1274, cr_loss=0.3787, attn_decoder_loss=0.2447, over 29684.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1202, cr_loss=0.3623, attn_decoder_loss=0.2424, over 5810615.67 frames. ], batch size: 82, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:16:58,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=552040.0, ans=0.125 +2024-09-19 00:17:09,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=552040.0, ans=0.2 +2024-09-19 00:17:12,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=552080.0, ans=0.0 +2024-09-19 00:17:23,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.05 vs. limit=15.0 +2024-09-19 00:17:32,969 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.496e+01 8.948e+01 9.461e+01 2.809e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-19 00:17:36,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=552120.0, ans=0.025 +2024-09-19 00:17:57,278 INFO [train.py:1198] (1/2) Epoch 31, batch 2300, loss[loss=0.2186, ctc_loss=0.1052, cr_loss=0.3342, attn_decoder_loss=0.2238, over 29324.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1199, cr_loss=0.362, attn_decoder_loss=0.2417, over 5799237.52 frames. ], batch size: 71, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:18:17,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=552240.0, ans=0.125 +2024-09-19 00:18:23,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=552240.0, ans=0.125 +2024-09-19 00:18:37,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=552280.0, ans=0.125 +2024-09-19 00:18:57,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=552320.0, ans=0.2 +2024-09-19 00:19:04,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=552360.0, ans=0.125 +2024-09-19 00:19:15,033 INFO [train.py:1198] (1/2) Epoch 31, batch 2350, loss[loss=0.2483, ctc_loss=0.1264, cr_loss=0.377, attn_decoder_loss=0.2534, over 29706.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.12, cr_loss=0.3627, attn_decoder_loss=0.2419, over 5804697.48 frames. ], batch size: 83, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:19:19,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=552400.0, ans=0.0 +2024-09-19 00:19:35,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=552440.0, ans=0.0 +2024-09-19 00:19:36,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=552440.0, ans=0.125 +2024-09-19 00:19:38,680 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.36 vs. limit=6.0 +2024-09-19 00:19:42,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=552440.0, ans=0.025 +2024-09-19 00:19:50,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.43 vs. limit=15.0 +2024-09-19 00:19:51,784 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:20:03,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=552520.0, ans=0.1 +2024-09-19 00:20:06,504 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.603e+01 9.093e+01 9.793e+01 1.880e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 00:20:31,122 INFO [train.py:1198] (1/2) Epoch 31, batch 2400, loss[loss=0.2307, ctc_loss=0.1149, cr_loss=0.3482, attn_decoder_loss=0.2358, over 29521.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1205, cr_loss=0.3635, attn_decoder_loss=0.2423, over 5808774.61 frames. ], batch size: 76, lr: 3.57e-03, grad_scale: 16.0 +2024-09-19 00:20:41,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=552600.0, ans=0.125 +2024-09-19 00:21:06,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=552680.0, ans=0.0 +2024-09-19 00:21:11,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=552680.0, ans=0.1 +2024-09-19 00:21:35,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=552760.0, ans=0.2 +2024-09-19 00:21:36,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=552760.0, ans=0.1 +2024-09-19 00:21:42,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=552760.0, ans=0.0 +2024-09-19 00:21:43,676 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.02 vs. limit=15.0 +2024-09-19 00:21:51,232 INFO [train.py:1198] (1/2) Epoch 31, batch 2450, loss[loss=0.2487, ctc_loss=0.1283, cr_loss=0.3902, attn_decoder_loss=0.2535, over 29696.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.121, cr_loss=0.3643, attn_decoder_loss=0.2429, over 5785748.62 frames. ], batch size: 82, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:22:13,073 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.76 vs. limit=22.5 +2024-09-19 00:22:16,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=552840.0, ans=0.035 +2024-09-19 00:22:21,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=552880.0, ans=0.125 +2024-09-19 00:22:26,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=552880.0, ans=0.025 +2024-09-19 00:22:44,295 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.732e+01 9.075e+01 9.673e+01 2.868e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 00:22:46,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=552920.0, ans=0.125 +2024-09-19 00:22:51,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=552960.0, ans=0.125 +2024-09-19 00:22:56,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=552960.0, ans=0.1 +2024-09-19 00:22:56,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=552960.0, ans=0.5 +2024-09-19 00:22:59,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=552960.0, ans=0.0 +2024-09-19 00:22:59,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=552960.0, ans=0.125 +2024-09-19 00:23:05,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=553000.0, ans=0.0 +2024-09-19 00:23:06,987 INFO [train.py:1198] (1/2) Epoch 31, batch 2500, loss[loss=0.2464, ctc_loss=0.1229, cr_loss=0.3549, attn_decoder_loss=0.2522, over 29634.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.121, cr_loss=0.3639, attn_decoder_loss=0.243, over 5795138.86 frames. ], batch size: 86, lr: 3.57e-03, grad_scale: 8.0 +2024-09-19 00:23:34,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=553040.0, ans=0.125 +2024-09-19 00:23:43,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=553080.0, ans=0.125 +2024-09-19 00:23:57,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=553120.0, ans=0.125 +2024-09-19 00:24:01,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.24 vs. limit=15.0 +2024-09-19 00:24:11,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=553160.0, ans=0.125 +2024-09-19 00:24:15,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=553160.0, ans=0.125 +2024-09-19 00:24:20,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=553160.0, ans=0.025 +2024-09-19 00:24:22,890 INFO [train.py:1198] (1/2) Epoch 31, batch 2550, loss[loss=0.2171, ctc_loss=0.1135, cr_loss=0.355, attn_decoder_loss=0.2207, over 29363.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1209, cr_loss=0.3641, attn_decoder_loss=0.243, over 5799705.75 frames. ], batch size: 67, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:24:46,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=553240.0, ans=0.1 +2024-09-19 00:24:47,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=553240.0, ans=0.0 +2024-09-19 00:24:56,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=553280.0, ans=0.125 +2024-09-19 00:25:18,171 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.379e+01 8.309e+01 8.709e+01 9.331e+01 1.370e+02, threshold=1.742e+02, percent-clipped=0.0 +2024-09-19 00:25:27,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=553360.0, ans=0.125 +2024-09-19 00:25:43,188 INFO [train.py:1198] (1/2) Epoch 31, batch 2600, loss[loss=0.2442, ctc_loss=0.1277, cr_loss=0.3882, attn_decoder_loss=0.2485, over 29452.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1211, cr_loss=0.3645, attn_decoder_loss=0.2435, over 5796404.26 frames. ], batch size: 78, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:25:45,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.92 vs. limit=12.0 +2024-09-19 00:25:58,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=553440.0, ans=0.0 +2024-09-19 00:26:04,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=553440.0, ans=0.09899494936611666 +2024-09-19 00:26:25,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=553480.0, ans=0.1 +2024-09-19 00:26:25,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=553480.0, ans=0.125 +2024-09-19 00:26:25,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=553480.0, ans=0.1 +2024-09-19 00:26:33,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=553520.0, ans=0.125 +2024-09-19 00:26:34,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=553520.0, ans=10.0 +2024-09-19 00:26:46,955 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:26:49,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=553560.0, ans=0.125 +2024-09-19 00:26:55,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=553560.0, ans=0.125 +2024-09-19 00:26:58,909 INFO [train.py:1198] (1/2) Epoch 31, batch 2650, loss[loss=0.2535, ctc_loss=0.1294, cr_loss=0.383, attn_decoder_loss=0.2588, over 29229.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.121, cr_loss=0.3642, attn_decoder_loss=0.2438, over 5802079.82 frames. ], batch size: 100, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:27:05,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=553600.0, ans=0.5 +2024-09-19 00:27:12,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=553640.0, ans=0.1 +2024-09-19 00:27:14,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=553640.0, ans=10.0 +2024-09-19 00:27:21,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=553640.0, ans=0.2 +2024-09-19 00:27:26,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=553640.0, ans=0.125 +2024-09-19 00:27:42,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=553720.0, ans=0.125 +2024-09-19 00:27:45,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=553720.0, ans=0.0 +2024-09-19 00:27:51,190 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.524e+01 8.581e+01 9.000e+01 9.310e+01 1.740e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 00:27:56,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=553720.0, ans=0.1 +2024-09-19 00:28:03,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=553760.0, ans=0.1 +2024-09-19 00:28:14,274 INFO [train.py:1198] (1/2) Epoch 31, batch 2700, loss[loss=0.2372, ctc_loss=0.1156, cr_loss=0.3488, attn_decoder_loss=0.243, over 29554.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.121, cr_loss=0.3639, attn_decoder_loss=0.2439, over 5797192.43 frames. ], batch size: 87, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:28:25,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.98 vs. limit=15.0 +2024-09-19 00:28:29,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=553840.0, ans=0.125 +2024-09-19 00:28:41,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.36 vs. limit=15.0 +2024-09-19 00:28:54,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=553880.0, ans=0.025 +2024-09-19 00:29:03,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=553920.0, ans=0.05 +2024-09-19 00:29:08,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=553920.0, ans=0.2 +2024-09-19 00:29:15,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=553960.0, ans=0.025 +2024-09-19 00:29:15,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=553960.0, ans=0.5 +2024-09-19 00:29:20,821 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.05 vs. limit=15.0 +2024-09-19 00:29:23,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.03 vs. limit=10.0 +2024-09-19 00:29:24,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=553960.0, ans=0.125 +2024-09-19 00:29:28,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=553960.0, ans=0.125 +2024-09-19 00:29:34,469 INFO [train.py:1198] (1/2) Epoch 31, batch 2750, loss[loss=0.2328, ctc_loss=0.1226, cr_loss=0.3863, attn_decoder_loss=0.2365, over 29509.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1207, cr_loss=0.3628, attn_decoder_loss=0.2427, over 5795212.28 frames. ], batch size: 75, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:29:39,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=554000.0, ans=0.0 +2024-09-19 00:29:48,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=554040.0, ans=0.125 +2024-09-19 00:29:50,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=554040.0, ans=0.125 +2024-09-19 00:29:52,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.96 vs. limit=15.0 +2024-09-19 00:29:57,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=554040.0, ans=0.125 +2024-09-19 00:30:27,592 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.490e+01 8.925e+01 9.454e+01 1.870e+02, threshold=1.785e+02, percent-clipped=1.0 +2024-09-19 00:30:32,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=554120.0, ans=0.125 +2024-09-19 00:30:50,894 INFO [train.py:1198] (1/2) Epoch 31, batch 2800, loss[loss=0.2507, ctc_loss=0.1397, cr_loss=0.3644, attn_decoder_loss=0.255, over 20100.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.121, cr_loss=0.3632, attn_decoder_loss=0.2431, over 5776741.82 frames. ], batch size: 210, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:31:08,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.28 vs. limit=15.0 +2024-09-19 00:31:11,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.20 vs. limit=15.0 +2024-09-19 00:31:20,440 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.42 vs. limit=22.5 +2024-09-19 00:31:21,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=554280.0, ans=0.125 +2024-09-19 00:31:28,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=554280.0, ans=0.125 +2024-09-19 00:31:34,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=554320.0, ans=0.125 +2024-09-19 00:31:42,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=554320.0, ans=0.125 +2024-09-19 00:32:00,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=554360.0, ans=0.125 +2024-09-19 00:32:06,642 INFO [train.py:1198] (1/2) Epoch 31, batch 2850, loss[loss=0.2369, ctc_loss=0.1273, cr_loss=0.4052, attn_decoder_loss=0.2401, over 29524.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1218, cr_loss=0.3645, attn_decoder_loss=0.2434, over 5762930.82 frames. ], batch size: 77, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:32:46,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=554480.0, ans=0.125 +2024-09-19 00:32:56,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.78 vs. limit=22.5 +2024-09-19 00:33:03,235 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.652e+01 9.121e+01 9.681e+01 2.307e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 00:33:05,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=554520.0, ans=0.1 +2024-09-19 00:33:06,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=554520.0, ans=0.2 +2024-09-19 00:33:11,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=554560.0, ans=0.125 +2024-09-19 00:33:26,764 INFO [train.py:1198] (1/2) Epoch 31, batch 2900, loss[loss=0.2241, ctc_loss=0.1108, cr_loss=0.3459, attn_decoder_loss=0.229, over 29457.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1222, cr_loss=0.3662, attn_decoder_loss=0.2445, over 5788574.44 frames. ], batch size: 79, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:33:27,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=554600.0, ans=0.125 +2024-09-19 00:33:28,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=554600.0, ans=0.0 +2024-09-19 00:34:06,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=554680.0, ans=10.0 +2024-09-19 00:34:06,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=554680.0, ans=0.0 +2024-09-19 00:34:18,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=554720.0, ans=0.0 +2024-09-19 00:34:30,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=554760.0, ans=0.125 +2024-09-19 00:34:42,840 INFO [train.py:1198] (1/2) Epoch 31, batch 2950, loss[loss=0.2255, ctc_loss=0.1098, cr_loss=0.358, attn_decoder_loss=0.2304, over 29530.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1212, cr_loss=0.3639, attn_decoder_loss=0.2432, over 5782601.73 frames. ], batch size: 75, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:35:28,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=554920.0, ans=0.0 +2024-09-19 00:35:33,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=554920.0, ans=0.0 +2024-09-19 00:35:33,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.42 vs. limit=10.0 +2024-09-19 00:35:37,422 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.875e+01 8.492e+01 9.114e+01 9.567e+01 2.273e+02, threshold=1.823e+02, percent-clipped=2.0 +2024-09-19 00:35:42,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=554960.0, ans=0.0 +2024-09-19 00:35:45,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=554960.0, ans=6.0 +2024-09-19 00:35:56,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=554960.0, ans=0.125 +2024-09-19 00:35:56,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.46 vs. limit=10.0 +2024-09-19 00:35:59,046 INFO [train.py:1198] (1/2) Epoch 31, batch 3000, loss[loss=0.2373, ctc_loss=0.1256, cr_loss=0.378, attn_decoder_loss=0.2414, over 29755.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.121, cr_loss=0.3632, attn_decoder_loss=0.2431, over 5783491.37 frames. ], batch size: 81, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:35:59,047 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 00:36:15,055 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([2.9543, 3.8489, 3.9176, 3.2570, 3.4097, 3.6364, 3.7075, 4.0390], + device='cuda:1') +2024-09-19 00:36:19,672 INFO [train.py:1230] (1/2) Epoch 31, validation: loss=0.2117, ctc_loss=0.03748, cr_loss=5.925e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-19 00:36:19,673 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 00:37:07,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=555120.0, ans=0.0 +2024-09-19 00:37:15,622 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:37:24,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=555160.0, ans=0.1 +2024-09-19 00:37:37,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-19 00:37:38,275 INFO [train.py:1198] (1/2) Epoch 31, batch 3050, loss[loss=0.2285, ctc_loss=0.1147, cr_loss=0.3592, attn_decoder_loss=0.2332, over 29518.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1213, cr_loss=0.3637, attn_decoder_loss=0.2436, over 5777797.33 frames. ], batch size: 76, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:37:49,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=555200.0, ans=0.125 +2024-09-19 00:37:52,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=555240.0, ans=0.1 +2024-09-19 00:38:05,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.80 vs. limit=15.0 +2024-09-19 00:38:23,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=555320.0, ans=6.0 +2024-09-19 00:38:30,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=555320.0, ans=0.0 +2024-09-19 00:38:32,840 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.264e+01 8.564e+01 9.261e+01 9.873e+01 2.101e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 00:38:34,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=555320.0, ans=0.125 +2024-09-19 00:38:34,838 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:38:52,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=555400.0, ans=0.125 +2024-09-19 00:38:53,997 INFO [train.py:1198] (1/2) Epoch 31, batch 3100, loss[loss=0.252, ctc_loss=0.127, cr_loss=0.3687, attn_decoder_loss=0.2576, over 29231.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1212, cr_loss=0.3637, attn_decoder_loss=0.2432, over 5777004.07 frames. ], batch size: 100, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:39:15,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=555440.0, ans=0.125 +2024-09-19 00:39:20,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=555440.0, ans=0.5 +2024-09-19 00:39:24,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=555480.0, ans=0.125 +2024-09-19 00:39:26,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=555480.0, ans=0.1 +2024-09-19 00:39:30,113 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.47 vs. limit=15.0 +2024-09-19 00:40:10,307 INFO [train.py:1198] (1/2) Epoch 31, batch 3150, loss[loss=0.25, ctc_loss=0.128, cr_loss=0.3713, attn_decoder_loss=0.2553, over 28858.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1212, cr_loss=0.3639, attn_decoder_loss=0.2433, over 5783324.51 frames. ], batch size: 104, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:40:13,095 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.44 vs. limit=22.5 +2024-09-19 00:40:26,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=555640.0, ans=0.0 +2024-09-19 00:40:57,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=555680.0, ans=0.2 +2024-09-19 00:41:09,174 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.063e+01 8.684e+01 9.147e+01 9.580e+01 2.256e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 00:41:21,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=555760.0, ans=0.125 +2024-09-19 00:41:24,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=555760.0, ans=0.125 +2024-09-19 00:41:30,413 INFO [train.py:1198] (1/2) Epoch 31, batch 3200, loss[loss=0.2293, ctc_loss=0.1064, cr_loss=0.3258, attn_decoder_loss=0.2358, over 29426.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1202, cr_loss=0.3618, attn_decoder_loss=0.2422, over 5793653.55 frames. ], batch size: 79, lr: 3.56e-03, grad_scale: 16.0 +2024-09-19 00:41:33,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=555800.0, ans=0.0 +2024-09-19 00:41:42,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.84 vs. limit=15.0 +2024-09-19 00:41:43,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.15 vs. limit=22.5 +2024-09-19 00:42:12,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=555880.0, ans=0.0 +2024-09-19 00:42:28,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=555920.0, ans=0.025 +2024-09-19 00:42:43,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.30 vs. limit=15.0 +2024-09-19 00:42:46,903 INFO [train.py:1198] (1/2) Epoch 31, batch 3250, loss[loss=0.2393, ctc_loss=0.124, cr_loss=0.3959, attn_decoder_loss=0.2433, over 29702.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1203, cr_loss=0.3629, attn_decoder_loss=0.2427, over 5800740.32 frames. ], batch size: 84, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:43:27,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=556080.0, ans=0.025 +2024-09-19 00:43:28,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.09 vs. limit=22.5 +2024-09-19 00:43:30,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=556120.0, ans=0.5 +2024-09-19 00:43:35,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=556120.0, ans=0.125 +2024-09-19 00:43:42,506 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.541e+01 8.932e+01 9.487e+01 3.275e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 00:43:53,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=556160.0, ans=0.1 +2024-09-19 00:44:02,476 INFO [train.py:1198] (1/2) Epoch 31, batch 3300, loss[loss=0.247, ctc_loss=0.1253, cr_loss=0.3734, attn_decoder_loss=0.2522, over 28158.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.12, cr_loss=0.3615, attn_decoder_loss=0.2419, over 5798002.30 frames. ], batch size: 111, lr: 3.56e-03, grad_scale: 8.0 +2024-09-19 00:44:06,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=556200.0, ans=0.0 +2024-09-19 00:44:14,116 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:44:44,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=556280.0, ans=0.0 +2024-09-19 00:45:02,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=556320.0, ans=0.125 +2024-09-19 00:45:10,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-19 00:45:11,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=556360.0, ans=0.125 +2024-09-19 00:45:21,977 INFO [train.py:1198] (1/2) Epoch 31, batch 3350, loss[loss=0.2551, ctc_loss=0.128, cr_loss=0.3779, attn_decoder_loss=0.2608, over 28898.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1202, cr_loss=0.3618, attn_decoder_loss=0.2423, over 5773586.36 frames. ], batch size: 104, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:45:34,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=556400.0, ans=0.0 +2024-09-19 00:45:50,648 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.72 vs. limit=10.0 +2024-09-19 00:46:09,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=556520.0, ans=0.2 +2024-09-19 00:46:12,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=556520.0, ans=0.025 +2024-09-19 00:46:18,240 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.469e+01 9.027e+01 9.591e+01 1.739e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 00:46:23,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.23 vs. limit=15.0 +2024-09-19 00:46:26,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=12.0 +2024-09-19 00:46:29,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.91 vs. limit=22.5 +2024-09-19 00:46:30,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=556560.0, ans=0.125 +2024-09-19 00:46:38,052 INFO [train.py:1198] (1/2) Epoch 31, batch 3400, loss[loss=0.211, ctc_loss=0.09763, cr_loss=0.3095, attn_decoder_loss=0.2167, over 29302.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1207, cr_loss=0.3625, attn_decoder_loss=0.2424, over 5766738.28 frames. ], batch size: 67, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:46:48,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.89 vs. limit=22.5 +2024-09-19 00:46:52,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=556640.0, ans=0.125 +2024-09-19 00:46:54,126 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=11.76 vs. limit=15.0 +2024-09-19 00:46:55,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=556640.0, ans=0.125 +2024-09-19 00:47:05,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=556640.0, ans=0.1 +2024-09-19 00:47:06,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 00:47:45,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=556760.0, ans=0.05 +2024-09-19 00:47:48,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.42 vs. limit=6.0 +2024-09-19 00:47:54,290 INFO [train.py:1198] (1/2) Epoch 31, batch 3450, loss[loss=0.255, ctc_loss=0.1324, cr_loss=0.3826, attn_decoder_loss=0.2601, over 28401.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1208, cr_loss=0.3629, attn_decoder_loss=0.2429, over 5775296.89 frames. ], batch size: 111, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:47:58,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=556800.0, ans=15.0 +2024-09-19 00:48:05,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=556800.0, ans=0.125 +2024-09-19 00:48:06,789 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.88 vs. limit=22.5 +2024-09-19 00:48:09,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.89 vs. limit=15.0 +2024-09-19 00:48:27,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=556880.0, ans=0.1 +2024-09-19 00:48:45,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=556920.0, ans=0.025 +2024-09-19 00:48:54,517 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 8.684e+01 9.223e+01 9.839e+01 1.576e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-19 00:49:13,825 INFO [train.py:1198] (1/2) Epoch 31, batch 3500, loss[loss=0.2153, ctc_loss=0.1081, cr_loss=0.3398, attn_decoder_loss=0.2197, over 29318.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1202, cr_loss=0.362, attn_decoder_loss=0.2423, over 5776099.53 frames. ], batch size: 71, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:49:24,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=557000.0, ans=0.125 +2024-09-19 00:49:26,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=557000.0, ans=0.125 +2024-09-19 00:49:30,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=557040.0, ans=0.2 +2024-09-19 00:49:43,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.35 vs. limit=10.0 +2024-09-19 00:49:45,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=557080.0, ans=0.025 +2024-09-19 00:49:53,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.95 vs. limit=15.0 +2024-09-19 00:50:06,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer_ff2.min_abs, batch_count=557120.0, ans=0.1 +2024-09-19 00:50:19,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=557160.0, ans=0.125 +2024-09-19 00:50:27,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=557200.0, ans=0.125 +2024-09-19 00:50:28,513 INFO [train.py:1198] (1/2) Epoch 31, batch 3550, loss[loss=0.2462, ctc_loss=0.1176, cr_loss=0.3599, attn_decoder_loss=0.2525, over 29733.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.12, cr_loss=0.3619, attn_decoder_loss=0.2423, over 5783385.81 frames. ], batch size: 89, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:50:37,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=557200.0, ans=0.0 +2024-09-19 00:50:55,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=557240.0, ans=0.0 +2024-09-19 00:51:00,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.56 vs. limit=10.0 +2024-09-19 00:51:10,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=557280.0, ans=0.125 +2024-09-19 00:51:15,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.84 vs. limit=22.5 +2024-09-19 00:51:23,621 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.575e+01 9.105e+01 9.496e+01 5.708e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 00:51:43,113 INFO [train.py:1198] (1/2) Epoch 31, batch 3600, loss[loss=0.2346, ctc_loss=0.12, cr_loss=0.377, attn_decoder_loss=0.2389, over 29534.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1202, cr_loss=0.3623, attn_decoder_loss=0.2426, over 5790902.96 frames. ], batch size: 77, lr: 3.55e-03, grad_scale: 16.0 +2024-09-19 00:51:55,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=557400.0, ans=0.1 +2024-09-19 00:52:06,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=557440.0, ans=0.125 +2024-09-19 00:52:09,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.46 vs. limit=15.0 +2024-09-19 00:52:15,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.68 vs. limit=12.0 +2024-09-19 00:52:47,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=557560.0, ans=0.125 +2024-09-19 00:52:58,317 INFO [train.py:1198] (1/2) Epoch 31, batch 3650, loss[loss=0.2578, ctc_loss=0.1316, cr_loss=0.3982, attn_decoder_loss=0.263, over 29476.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1198, cr_loss=0.3614, attn_decoder_loss=0.2421, over 5793492.75 frames. ], batch size: 90, lr: 3.55e-03, grad_scale: 16.0 +2024-09-19 00:52:58,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=557600.0, ans=0.035 +2024-09-19 00:53:08,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=557600.0, ans=0.025 +2024-09-19 00:53:19,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=557640.0, ans=0.125 +2024-09-19 00:53:36,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=557680.0, ans=0.1 +2024-09-19 00:53:51,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=557720.0, ans=0.04949747468305833 +2024-09-19 00:53:55,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.94 vs. limit=22.5 +2024-09-19 00:53:55,291 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.468e+01 9.016e+01 9.512e+01 1.613e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-19 00:54:16,695 INFO [train.py:1198] (1/2) Epoch 31, batch 3700, loss[loss=0.2396, ctc_loss=0.122, cr_loss=0.3661, attn_decoder_loss=0.2446, over 29716.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1199, cr_loss=0.3621, attn_decoder_loss=0.2424, over 5802756.73 frames. ], batch size: 84, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:54:37,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=557840.0, ans=0.125 +2024-09-19 00:54:40,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=557840.0, ans=0.0 +2024-09-19 00:54:48,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=557880.0, ans=0.2 +2024-09-19 00:54:57,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.76 vs. limit=15.0 +2024-09-19 00:55:16,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=557960.0, ans=0.0 +2024-09-19 00:55:30,750 INFO [train.py:1198] (1/2) Epoch 31, batch 3750, loss[loss=0.2151, ctc_loss=0.1035, cr_loss=0.3047, attn_decoder_loss=0.2207, over 29366.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1202, cr_loss=0.3624, attn_decoder_loss=0.2422, over 5807317.47 frames. ], batch size: 67, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:56:10,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.33 vs. limit=15.0 +2024-09-19 00:56:26,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=558120.0, ans=0.125 +2024-09-19 00:56:27,501 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.551e+01 9.139e+01 9.962e+01 3.532e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 00:56:30,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=558160.0, ans=0.0 +2024-09-19 00:56:41,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=558160.0, ans=0.1 +2024-09-19 00:56:41,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 00:56:45,311 INFO [train.py:1198] (1/2) Epoch 31, batch 3800, loss[loss=0.241, ctc_loss=0.1145, cr_loss=0.344, attn_decoder_loss=0.2474, over 29641.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.12, cr_loss=0.3622, attn_decoder_loss=0.2419, over 5798016.22 frames. ], batch size: 86, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:56:52,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.71 vs. limit=22.5 +2024-09-19 00:57:23,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=558280.0, ans=0.2 +2024-09-19 00:57:30,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=558320.0, ans=10.0 +2024-09-19 00:57:45,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.31 vs. limit=10.0 +2024-09-19 00:57:46,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=558360.0, ans=0.0 +2024-09-19 00:57:46,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=558360.0, ans=0.95 +2024-09-19 00:57:54,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=558360.0, ans=0.0 +2024-09-19 00:57:59,982 INFO [train.py:1198] (1/2) Epoch 31, batch 3850, loss[loss=0.2492, ctc_loss=0.1241, cr_loss=0.3775, attn_decoder_loss=0.2548, over 29321.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1203, cr_loss=0.3627, attn_decoder_loss=0.242, over 5811445.49 frames. ], batch size: 100, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:58:44,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=558520.0, ans=0.0 +2024-09-19 00:58:53,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=558520.0, ans=0.125 +2024-09-19 00:58:55,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=558520.0, ans=0.025 +2024-09-19 00:58:56,236 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.602e+01 9.005e+01 9.471e+01 1.448e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 00:59:06,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=558560.0, ans=0.5 +2024-09-19 00:59:08,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.01 vs. limit=22.5 +2024-09-19 00:59:14,164 INFO [train.py:1198] (1/2) Epoch 31, batch 3900, loss[loss=0.2472, ctc_loss=0.1179, cr_loss=0.3598, attn_decoder_loss=0.2535, over 29623.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1209, cr_loss=0.364, attn_decoder_loss=0.2427, over 5815640.84 frames. ], batch size: 86, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 00:59:32,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=558640.0, ans=0.0 +2024-09-19 01:00:14,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=558760.0, ans=0.0 +2024-09-19 01:00:30,737 INFO [train.py:1198] (1/2) Epoch 31, batch 3950, loss[loss=0.2497, ctc_loss=0.1264, cr_loss=0.3742, attn_decoder_loss=0.2551, over 29468.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1203, cr_loss=0.3628, attn_decoder_loss=0.2428, over 5835209.89 frames. ], batch size: 97, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:00:34,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.97 vs. limit=22.5 +2024-09-19 01:01:03,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=558880.0, ans=0.1 +2024-09-19 01:01:15,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=558920.0, ans=0.125 +2024-09-19 01:01:18,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=558920.0, ans=0.125 +2024-09-19 01:01:28,225 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.387e+01 8.418e+01 8.953e+01 9.483e+01 1.231e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 01:01:29,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=558960.0, ans=0.0 +2024-09-19 01:01:37,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=558960.0, ans=0.025 +2024-09-19 01:01:44,268 INFO [train.py:1198] (1/2) Epoch 31, batch 4000, loss[loss=0.2178, ctc_loss=0.1078, cr_loss=0.3483, attn_decoder_loss=0.2223, over 29525.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.12, cr_loss=0.3619, attn_decoder_loss=0.2424, over 5812707.43 frames. ], batch size: 74, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:01:44,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=559000.0, ans=0.0 +2024-09-19 01:01:47,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=559000.0, ans=0.0 +2024-09-19 01:01:49,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=559000.0, ans=0.125 +2024-09-19 01:01:58,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.89 vs. limit=22.5 +2024-09-19 01:02:03,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=559040.0, ans=0.125 +2024-09-19 01:02:06,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.39 vs. limit=15.0 +2024-09-19 01:02:12,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=559080.0, ans=0.0 +2024-09-19 01:02:23,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=559080.0, ans=0.025 +2024-09-19 01:02:38,662 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-19 01:02:41,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.50 vs. limit=15.0 +2024-09-19 01:02:43,301 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-19 01:02:48,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=559160.0, ans=0.125 +2024-09-19 01:02:56,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=559160.0, ans=0.125 +2024-09-19 01:02:59,113 INFO [train.py:1198] (1/2) Epoch 31, batch 4050, loss[loss=0.2625, ctc_loss=0.1618, cr_loss=0.3864, attn_decoder_loss=0.2651, over 20564.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1199, cr_loss=0.3615, attn_decoder_loss=0.242, over 5797052.13 frames. ], batch size: 209, lr: 3.55e-03, grad_scale: 8.0 +2024-09-19 01:03:05,578 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.40 vs. limit=15.0 +2024-09-19 01:03:18,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=559240.0, ans=0.1 +2024-09-19 01:03:50,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=559320.0, ans=0.2 +2024-09-19 01:03:56,622 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.819e+01 8.784e+01 9.351e+01 1.021e+02 4.182e+02, threshold=1.870e+02, percent-clipped=0.0 +2024-09-19 01:03:57,042 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:03:59,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.64 vs. limit=15.0 +2024-09-19 01:04:14,230 INFO [train.py:1198] (1/2) Epoch 31, batch 4100, loss[loss=0.2496, ctc_loss=0.1376, cr_loss=0.4048, attn_decoder_loss=0.2531, over 29499.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1205, cr_loss=0.3622, attn_decoder_loss=0.2424, over 5793051.86 frames. ], batch size: 90, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:04:30,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=559440.0, ans=0.5 +2024-09-19 01:04:37,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=559440.0, ans=0.1 +2024-09-19 01:05:02,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=559520.0, ans=0.1 +2024-09-19 01:05:03,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=559520.0, ans=0.125 +2024-09-19 01:05:09,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=559520.0, ans=0.025 +2024-09-19 01:05:24,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=559560.0, ans=0.0 +2024-09-19 01:05:28,645 INFO [train.py:1198] (1/2) Epoch 31, batch 4150, loss[loss=0.2303, ctc_loss=0.1166, cr_loss=0.356, attn_decoder_loss=0.235, over 29490.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1206, cr_loss=0.3625, attn_decoder_loss=0.2424, over 5798932.95 frames. ], batch size: 77, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:05:42,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=559640.0, ans=0.125 +2024-09-19 01:05:45,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=559640.0, ans=0.025 +2024-09-19 01:06:26,030 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.331e+01 8.844e+01 9.480e+01 1.340e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 01:06:29,385 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:06:42,216 INFO [train.py:1198] (1/2) Epoch 31, batch 4200, loss[loss=0.2539, ctc_loss=0.1346, cr_loss=0.399, attn_decoder_loss=0.2583, over 29499.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1206, cr_loss=0.3629, attn_decoder_loss=0.2425, over 5801081.41 frames. ], batch size: 90, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:07:08,068 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.87 vs. limit=15.0 +2024-09-19 01:07:13,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=559880.0, ans=0.0 +2024-09-19 01:08:04,342 INFO [train.py:1198] (1/2) Epoch 31, batch 4250, loss[loss=0.2192, ctc_loss=0.1085, cr_loss=0.3337, attn_decoder_loss=0.2241, over 29506.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1205, cr_loss=0.3627, attn_decoder_loss=0.2428, over 5806568.61 frames. ], batch size: 74, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:08:37,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=560080.0, ans=0.125 +2024-09-19 01:08:49,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=560120.0, ans=0.025 +2024-09-19 01:09:03,973 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.505e+01 8.697e+01 9.428e+01 9.992e+01 2.936e+02, threshold=1.886e+02, percent-clipped=1.0 +2024-09-19 01:09:04,800 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.19 vs. limit=12.0 +2024-09-19 01:09:08,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=560160.0, ans=0.125 +2024-09-19 01:09:14,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=560160.0, ans=0.0 +2024-09-19 01:09:20,361 INFO [train.py:1198] (1/2) Epoch 31, batch 4300, loss[loss=0.2435, ctc_loss=0.1222, cr_loss=0.3794, attn_decoder_loss=0.2486, over 29517.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1202, cr_loss=0.3624, attn_decoder_loss=0.243, over 5797183.43 frames. ], batch size: 87, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:09:23,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=560200.0, ans=0.125 +2024-09-19 01:09:26,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=560200.0, ans=0.0 +2024-09-19 01:09:30,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.88 vs. limit=15.0 +2024-09-19 01:09:53,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=560280.0, ans=0.125 +2024-09-19 01:09:59,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=560280.0, ans=0.0 +2024-09-19 01:10:06,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=560320.0, ans=0.1 +2024-09-19 01:10:06,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=560320.0, ans=0.125 +2024-09-19 01:10:21,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=560360.0, ans=0.125 +2024-09-19 01:10:34,929 INFO [train.py:1198] (1/2) Epoch 31, batch 4350, loss[loss=0.2433, ctc_loss=0.1125, cr_loss=0.345, attn_decoder_loss=0.2502, over 29502.00 frames. ], tot_loss[loss=0.2412, ctc_loss=0.1229, cr_loss=0.3679, attn_decoder_loss=0.2461, over 5798658.32 frames. ], batch size: 97, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:10:38,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=560400.0, ans=0.0 +2024-09-19 01:11:05,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.84 vs. limit=22.5 +2024-09-19 01:11:14,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=560480.0, ans=0.0 +2024-09-19 01:11:18,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=560520.0, ans=0.0 +2024-09-19 01:11:33,997 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.066e+01 8.755e+01 9.230e+01 9.613e+01 3.743e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-19 01:11:50,367 INFO [train.py:1198] (1/2) Epoch 31, batch 4400, loss[loss=0.2555, ctc_loss=0.1313, cr_loss=0.3872, attn_decoder_loss=0.2607, over 27534.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1247, cr_loss=0.3714, attn_decoder_loss=0.2485, over 5769354.64 frames. ], batch size: 125, lr: 3.54e-03, grad_scale: 16.0 +2024-09-19 01:11:50,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=560600.0, ans=0.1 +2024-09-19 01:12:01,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.79 vs. limit=22.5 +2024-09-19 01:12:18,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=560680.0, ans=0.0 +2024-09-19 01:12:47,357 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.30 vs. limit=22.5 +2024-09-19 01:12:51,691 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.18 vs. limit=22.5 +2024-09-19 01:13:03,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=560800.0, ans=0.125 +2024-09-19 01:13:05,140 INFO [train.py:1198] (1/2) Epoch 31, batch 4450, loss[loss=0.2638, ctc_loss=0.1615, cr_loss=0.4109, attn_decoder_loss=0.2661, over 19488.00 frames. ], tot_loss[loss=0.2462, ctc_loss=0.1287, cr_loss=0.3768, attn_decoder_loss=0.2508, over 5571923.75 frames. ], batch size: 209, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:13:24,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=560840.0, ans=15.0 +2024-09-19 01:13:29,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.53 vs. limit=22.5 +2024-09-19 01:13:35,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=560880.0, ans=0.0 +2024-09-19 01:13:38,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=560880.0, ans=0.07 +2024-09-19 01:13:48,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=560880.0, ans=0.125 +2024-09-19 01:13:48,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=7.47 vs. limit=12.0 +2024-09-19 01:13:53,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=560920.0, ans=0.1 +2024-09-19 01:14:03,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=560920.0, ans=0.0 +2024-09-19 01:14:06,069 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.287e+01 9.493e+01 1.082e+02 1.219e+02 3.408e+02, threshold=2.163e+02, percent-clipped=1.0 +2024-09-19 01:14:09,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=560960.0, ans=0.125 +2024-09-19 01:14:21,379 INFO [train.py:1198] (1/2) Epoch 31, batch 4500, loss[loss=0.2567, ctc_loss=0.1467, cr_loss=0.4029, attn_decoder_loss=0.2599, over 20243.00 frames. ], tot_loss[loss=0.2484, ctc_loss=0.1323, cr_loss=0.379, attn_decoder_loss=0.2528, over 5226104.06 frames. ], batch size: 209, lr: 3.54e-03, grad_scale: 8.0 +2024-09-19 01:14:23,792 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.42 vs. limit=6.0 +2024-09-19 01:14:42,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=561040.0, ans=0.2 +2024-09-19 01:15:44,694 INFO [train.py:1198] (1/2) Epoch 32, batch 0, loss[loss=0.2177, ctc_loss=0.1069, cr_loss=0.3304, attn_decoder_loss=0.2226, over 29622.00 frames. ], tot_loss[loss=0.2177, ctc_loss=0.1069, cr_loss=0.3304, attn_decoder_loss=0.2226, over 29622.00 frames. ], batch size: 73, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:15:44,695 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 01:16:03,114 INFO [train.py:1230] (1/2) Epoch 32, validation: loss=0.2127, ctc_loss=0.03714, cr_loss=6.101e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-19 01:16:03,114 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 01:16:12,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=561100.0, ans=0.1 +2024-09-19 01:16:26,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=561140.0, ans=0.1 +2024-09-19 01:16:27,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=561140.0, ans=0.125 +2024-09-19 01:16:44,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=561180.0, ans=0.125 +2024-09-19 01:16:53,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.57 vs. limit=10.0 +2024-09-19 01:16:59,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=561220.0, ans=0.125 +2024-09-19 01:17:06,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=561260.0, ans=0.125 +2024-09-19 01:17:18,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_positive, batch_count=561260.0, ans=0.05 +2024-09-19 01:17:19,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=561300.0, ans=0.0 +2024-09-19 01:17:20,693 INFO [train.py:1198] (1/2) Epoch 32, batch 50, loss[loss=0.2211, ctc_loss=0.1071, cr_loss=0.3411, attn_decoder_loss=0.2262, over 29427.00 frames. ], tot_loss[loss=0.2393, ctc_loss=0.1233, cr_loss=0.3663, attn_decoder_loss=0.2441, over 1268232.14 frames. ], batch size: 70, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:17:45,065 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.792e+01 8.848e+01 9.833e+01 1.147e+02 1.812e+02, threshold=1.967e+02, percent-clipped=0.0 +2024-09-19 01:18:02,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=561380.0, ans=0.125 +2024-09-19 01:18:12,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=561420.0, ans=0.1 +2024-09-19 01:18:17,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=561420.0, ans=0.1 +2024-09-19 01:18:23,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=561460.0, ans=0.1 +2024-09-19 01:18:36,351 INFO [train.py:1198] (1/2) Epoch 32, batch 100, loss[loss=0.2275, ctc_loss=0.1199, cr_loss=0.364, attn_decoder_loss=0.2313, over 29525.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1236, cr_loss=0.3691, attn_decoder_loss=0.2456, over 2251558.89 frames. ], batch size: 76, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:18:48,390 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.55 vs. limit=15.0 +2024-09-19 01:19:09,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.37 vs. limit=15.0 +2024-09-19 01:19:14,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=561580.0, ans=0.0 +2024-09-19 01:19:29,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.83 vs. limit=15.0 +2024-09-19 01:19:32,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.80 vs. limit=15.0 +2024-09-19 01:19:36,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=561620.0, ans=0.09899494936611666 +2024-09-19 01:19:36,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=561620.0, ans=0.1 +2024-09-19 01:19:51,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten.whitening_limit, batch_count=561660.0, ans=15.0 +2024-09-19 01:19:53,957 INFO [train.py:1198] (1/2) Epoch 32, batch 150, loss[loss=0.2086, ctc_loss=0.09687, cr_loss=0.3205, attn_decoder_loss=0.2139, over 29439.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1205, cr_loss=0.3641, attn_decoder_loss=0.2434, over 3047836.54 frames. ], batch size: 70, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:19:57,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=561700.0, ans=0.0 +2024-09-19 01:19:57,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=561700.0, ans=0.125 +2024-09-19 01:19:57,620 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.01 vs. limit=15.0 +2024-09-19 01:19:58,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=561700.0, ans=0.035 +2024-09-19 01:20:18,127 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.368e+01 8.757e+01 9.262e+01 1.493e+02, threshold=1.751e+02, percent-clipped=0.0 +2024-09-19 01:20:38,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=561820.0, ans=0.0 +2024-09-19 01:21:04,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=561860.0, ans=0.125 +2024-09-19 01:21:11,621 INFO [train.py:1198] (1/2) Epoch 32, batch 200, loss[loss=0.2471, ctc_loss=0.1276, cr_loss=0.3825, attn_decoder_loss=0.2519, over 27409.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.12, cr_loss=0.3633, attn_decoder_loss=0.2429, over 3659966.13 frames. ], batch size: 124, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:21:26,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.86 vs. limit=5.0 +2024-09-19 01:21:36,513 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.20 vs. limit=22.5 +2024-09-19 01:21:48,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=561980.0, ans=0.1 +2024-09-19 01:22:24,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.43 vs. limit=12.0 +2024-09-19 01:22:27,308 INFO [train.py:1198] (1/2) Epoch 32, batch 250, loss[loss=0.2479, ctc_loss=0.1272, cr_loss=0.3866, attn_decoder_loss=0.2528, over 29279.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1196, cr_loss=0.3625, attn_decoder_loss=0.2425, over 4142435.04 frames. ], batch size: 100, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:22:49,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.max_abs, batch_count=562140.0, ans=10.0 +2024-09-19 01:22:53,928 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.466e+01 9.044e+01 9.662e+01 1.743e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 01:23:03,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=562180.0, ans=0.125 +2024-09-19 01:23:35,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.51 vs. limit=15.0 +2024-09-19 01:23:45,499 INFO [train.py:1198] (1/2) Epoch 32, batch 300, loss[loss=0.2535, ctc_loss=0.1341, cr_loss=0.386, attn_decoder_loss=0.2582, over 29553.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1195, cr_loss=0.3624, attn_decoder_loss=0.2423, over 4511285.76 frames. ], batch size: 92, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:24:27,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=562380.0, ans=0.0 +2024-09-19 01:24:27,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=562380.0, ans=0.0 +2024-09-19 01:24:27,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.46 vs. limit=15.0 +2024-09-19 01:24:42,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=562420.0, ans=22.5 +2024-09-19 01:25:04,396 INFO [train.py:1198] (1/2) Epoch 32, batch 350, loss[loss=0.2183, ctc_loss=0.105, cr_loss=0.3237, attn_decoder_loss=0.2237, over 29299.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.12, cr_loss=0.3632, attn_decoder_loss=0.2426, over 4796318.03 frames. ], batch size: 71, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:25:28,324 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.336e+01 8.922e+01 9.619e+01 6.149e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 01:25:42,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=562580.0, ans=0.125 +2024-09-19 01:26:03,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=562660.0, ans=0.125 +2024-09-19 01:26:05,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=562660.0, ans=0.0 +2024-09-19 01:26:11,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=562660.0, ans=0.125 +2024-09-19 01:26:11,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=562660.0, ans=0.125 +2024-09-19 01:26:15,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=562660.0, ans=0.125 +2024-09-19 01:26:19,651 INFO [train.py:1198] (1/2) Epoch 32, batch 400, loss[loss=0.2467, ctc_loss=0.1267, cr_loss=0.3814, attn_decoder_loss=0.2516, over 29696.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1197, cr_loss=0.363, attn_decoder_loss=0.2424, over 5024609.59 frames. ], batch size: 82, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:26:31,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.91 vs. limit=15.0 +2024-09-19 01:26:45,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=562740.0, ans=0.0 +2024-09-19 01:26:51,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=562780.0, ans=0.0 +2024-09-19 01:26:55,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.78 vs. limit=15.0 +2024-09-19 01:26:57,982 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.48 vs. limit=15.0 +2024-09-19 01:27:38,243 INFO [train.py:1198] (1/2) Epoch 32, batch 450, loss[loss=0.2435, ctc_loss=0.1228, cr_loss=0.3571, attn_decoder_loss=0.249, over 29700.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1199, cr_loss=0.363, attn_decoder_loss=0.2425, over 5186077.29 frames. ], batch size: 83, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:27:49,809 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.69 vs. limit=22.5 +2024-09-19 01:27:57,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=16.71 vs. limit=15.0 +2024-09-19 01:28:02,737 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.493e+01 8.894e+01 9.370e+01 1.465e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-19 01:28:06,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=562940.0, ans=0.125 +2024-09-19 01:28:07,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=562980.0, ans=0.125 +2024-09-19 01:28:28,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=563020.0, ans=0.125 +2024-09-19 01:28:36,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=563020.0, ans=0.035 +2024-09-19 01:28:39,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=563060.0, ans=0.05 +2024-09-19 01:28:40,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=563060.0, ans=0.0 +2024-09-19 01:28:56,474 INFO [train.py:1198] (1/2) Epoch 32, batch 500, loss[loss=0.2514, ctc_loss=0.1275, cr_loss=0.3715, attn_decoder_loss=0.2569, over 29444.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1195, cr_loss=0.3621, attn_decoder_loss=0.2419, over 5329552.85 frames. ], batch size: 94, lr: 3.48e-03, grad_scale: 16.0 +2024-09-19 01:29:16,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=563140.0, ans=0.125 +2024-09-19 01:29:47,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=563220.0, ans=0.125 +2024-09-19 01:30:07,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.93 vs. limit=15.0 +2024-09-19 01:30:08,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=563260.0, ans=0.04949747468305833 +2024-09-19 01:30:09,231 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.22 vs. limit=15.0 +2024-09-19 01:30:10,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.39 vs. limit=15.0 +2024-09-19 01:30:12,542 INFO [train.py:1198] (1/2) Epoch 32, batch 550, loss[loss=0.2555, ctc_loss=0.1307, cr_loss=0.3769, attn_decoder_loss=0.261, over 28925.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1199, cr_loss=0.3626, attn_decoder_loss=0.2421, over 5422748.35 frames. ], batch size: 104, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:30:29,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=563340.0, ans=0.2 +2024-09-19 01:30:36,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=563340.0, ans=0.1 +2024-09-19 01:30:40,429 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.146e+01 8.423e+01 9.076e+01 9.566e+01 2.311e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 01:30:48,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=563380.0, ans=0.0 +2024-09-19 01:31:18,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=563460.0, ans=0.035 +2024-09-19 01:31:30,748 INFO [train.py:1198] (1/2) Epoch 32, batch 600, loss[loss=0.2503, ctc_loss=0.1303, cr_loss=0.3827, attn_decoder_loss=0.2551, over 29250.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1196, cr_loss=0.3622, attn_decoder_loss=0.2422, over 5507978.67 frames. ], batch size: 100, lr: 3.48e-03, grad_scale: 8.0 +2024-09-19 01:31:35,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.71 vs. limit=10.0 +2024-09-19 01:31:44,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=563540.0, ans=0.0 +2024-09-19 01:31:47,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=563540.0, ans=0.0 +2024-09-19 01:31:58,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.52 vs. limit=15.0 +2024-09-19 01:32:04,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=563580.0, ans=0.0 +2024-09-19 01:32:04,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=563580.0, ans=0.5 +2024-09-19 01:32:05,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=563580.0, ans=0.0 +2024-09-19 01:32:23,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=563620.0, ans=0.125 +2024-09-19 01:32:25,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=563620.0, ans=0.125 +2024-09-19 01:32:28,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=563620.0, ans=0.125 +2024-09-19 01:32:29,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=563660.0, ans=0.125 +2024-09-19 01:32:33,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.49 vs. limit=15.0 +2024-09-19 01:32:35,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.99 vs. limit=22.5 +2024-09-19 01:32:40,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=563660.0, ans=0.2 +2024-09-19 01:32:45,913 INFO [train.py:1198] (1/2) Epoch 32, batch 650, loss[loss=0.2411, ctc_loss=0.1182, cr_loss=0.3545, attn_decoder_loss=0.2468, over 29759.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1189, cr_loss=0.3604, attn_decoder_loss=0.2415, over 5584808.02 frames. ], batch size: 81, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:33:02,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=563740.0, ans=0.0 +2024-09-19 01:33:02,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=563740.0, ans=0.2 +2024-09-19 01:33:05,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=563740.0, ans=0.125 +2024-09-19 01:33:14,107 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.233e+01 8.421e+01 8.815e+01 9.543e+01 5.182e+02, threshold=1.763e+02, percent-clipped=1.0 +2024-09-19 01:33:14,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=563740.0, ans=0.125 +2024-09-19 01:33:14,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=563740.0, ans=0.125 +2024-09-19 01:33:40,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=563820.0, ans=0.125 +2024-09-19 01:33:49,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=563860.0, ans=0.0 +2024-09-19 01:33:53,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=563860.0, ans=0.2 +2024-09-19 01:34:04,049 INFO [train.py:1198] (1/2) Epoch 32, batch 700, loss[loss=0.2295, ctc_loss=0.1169, cr_loss=0.3487, attn_decoder_loss=0.2343, over 29512.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1196, cr_loss=0.3617, attn_decoder_loss=0.2421, over 5635022.05 frames. ], batch size: 76, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:34:19,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=563940.0, ans=0.025 +2024-09-19 01:34:19,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=563940.0, ans=0.0 +2024-09-19 01:34:19,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-19 01:34:25,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=563940.0, ans=0.0 +2024-09-19 01:34:30,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=563940.0, ans=0.1 +2024-09-19 01:34:40,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=563980.0, ans=0.0 +2024-09-19 01:34:57,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.06 vs. limit=15.0 +2024-09-19 01:35:01,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=564020.0, ans=0.1 +2024-09-19 01:35:22,669 INFO [train.py:1198] (1/2) Epoch 32, batch 750, loss[loss=0.2374, ctc_loss=0.1128, cr_loss=0.3246, attn_decoder_loss=0.2441, over 29696.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1191, cr_loss=0.3602, attn_decoder_loss=0.2416, over 5673396.83 frames. ], batch size: 82, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:35:24,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=564100.0, ans=0.0 +2024-09-19 01:35:42,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=564140.0, ans=0.07 +2024-09-19 01:35:43,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=564140.0, ans=0.0 +2024-09-19 01:35:48,088 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.447e+01 8.933e+01 9.518e+01 3.479e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 01:35:57,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=564180.0, ans=0.1 +2024-09-19 01:36:06,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=564220.0, ans=0.09899494936611666 +2024-09-19 01:36:07,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.90 vs. limit=15.0 +2024-09-19 01:36:38,475 INFO [train.py:1198] (1/2) Epoch 32, batch 800, loss[loss=0.2222, ctc_loss=0.1056, cr_loss=0.3371, attn_decoder_loss=0.2277, over 29606.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1189, cr_loss=0.3598, attn_decoder_loss=0.2415, over 5704904.62 frames. ], batch size: 73, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:36:55,573 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.17 vs. limit=10.0 +2024-09-19 01:37:29,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=564420.0, ans=0.125 +2024-09-19 01:37:42,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=564460.0, ans=0.0 +2024-09-19 01:37:56,055 INFO [train.py:1198] (1/2) Epoch 32, batch 850, loss[loss=0.2436, ctc_loss=0.1123, cr_loss=0.3292, attn_decoder_loss=0.2509, over 29693.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1186, cr_loss=0.3589, attn_decoder_loss=0.2411, over 5733824.02 frames. ], batch size: 89, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:37:59,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=564500.0, ans=0.1 +2024-09-19 01:38:12,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=564540.0, ans=0.125 +2024-09-19 01:38:15,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=564540.0, ans=0.1 +2024-09-19 01:38:21,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=564540.0, ans=0.125 +2024-09-19 01:38:23,001 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.469e+01 8.587e+01 9.050e+01 9.701e+01 1.930e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 01:38:30,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=564580.0, ans=0.125 +2024-09-19 01:38:33,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=564580.0, ans=0.2 +2024-09-19 01:38:34,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=564580.0, ans=0.0 +2024-09-19 01:39:13,904 INFO [train.py:1198] (1/2) Epoch 32, batch 900, loss[loss=0.2225, ctc_loss=0.1074, cr_loss=0.3309, attn_decoder_loss=0.228, over 29570.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1192, cr_loss=0.3601, attn_decoder_loss=0.2417, over 5739613.87 frames. ], batch size: 73, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:39:18,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=564700.0, ans=0.125 +2024-09-19 01:39:21,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=564700.0, ans=0.07 +2024-09-19 01:39:23,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=564700.0, ans=0.125 +2024-09-19 01:39:26,619 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-19 01:39:26,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.34 vs. limit=22.5 +2024-09-19 01:39:30,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=564740.0, ans=0.05 +2024-09-19 01:39:32,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.00 vs. limit=15.0 +2024-09-19 01:39:34,570 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.22 vs. limit=15.0 +2024-09-19 01:39:58,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=564820.0, ans=0.125 +2024-09-19 01:40:27,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=564860.0, ans=0.0 +2024-09-19 01:40:28,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=564900.0, ans=0.025 +2024-09-19 01:40:29,655 INFO [train.py:1198] (1/2) Epoch 32, batch 950, loss[loss=0.2154, ctc_loss=0.1033, cr_loss=0.3099, attn_decoder_loss=0.221, over 29494.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1197, cr_loss=0.3608, attn_decoder_loss=0.2419, over 5742741.67 frames. ], batch size: 74, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:40:40,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=564900.0, ans=0.0 +2024-09-19 01:40:57,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=564940.0, ans=0.2 +2024-09-19 01:40:59,193 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.638e+01 8.678e+01 9.053e+01 9.826e+01 2.124e+02, threshold=1.811e+02, percent-clipped=3.0 +2024-09-19 01:41:07,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=564980.0, ans=0.025 +2024-09-19 01:41:07,498 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.06 vs. limit=15.0 +2024-09-19 01:41:20,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=565020.0, ans=0.125 +2024-09-19 01:41:22,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=565020.0, ans=0.0 +2024-09-19 01:41:32,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=565060.0, ans=0.0 +2024-09-19 01:41:40,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=565060.0, ans=0.0 +2024-09-19 01:41:47,539 INFO [train.py:1198] (1/2) Epoch 32, batch 1000, loss[loss=0.2248, ctc_loss=0.1127, cr_loss=0.3493, attn_decoder_loss=0.2295, over 29524.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1201, cr_loss=0.3612, attn_decoder_loss=0.2423, over 5737400.70 frames. ], batch size: 77, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:41:49,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=565100.0, ans=0.0 +2024-09-19 01:41:50,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=565100.0, ans=0.125 +2024-09-19 01:42:19,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.90 vs. limit=15.0 +2024-09-19 01:42:23,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=565180.0, ans=0.125 +2024-09-19 01:42:43,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=565220.0, ans=0.0 +2024-09-19 01:42:45,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=13.26 vs. limit=15.0 +2024-09-19 01:42:46,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=565220.0, ans=10.0 +2024-09-19 01:42:55,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=565260.0, ans=0.1 +2024-09-19 01:43:02,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=565260.0, ans=0.0 +2024-09-19 01:43:02,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=565260.0, ans=0.125 +2024-09-19 01:43:05,367 INFO [train.py:1198] (1/2) Epoch 32, batch 1050, loss[loss=0.2395, ctc_loss=0.1177, cr_loss=0.3417, attn_decoder_loss=0.2455, over 29674.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.12, cr_loss=0.3612, attn_decoder_loss=0.2418, over 5745808.82 frames. ], batch size: 85, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:43:10,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=565300.0, ans=0.07 +2024-09-19 01:43:23,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=565340.0, ans=0.125 +2024-09-19 01:43:32,745 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.148e+01 8.597e+01 9.014e+01 9.453e+01 2.467e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 01:43:44,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.93 vs. limit=12.0 +2024-09-19 01:44:21,449 INFO [train.py:1198] (1/2) Epoch 32, batch 1100, loss[loss=0.2424, ctc_loss=0.1246, cr_loss=0.384, attn_decoder_loss=0.2469, over 29461.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.12, cr_loss=0.3614, attn_decoder_loss=0.2419, over 5758217.20 frames. ], batch size: 78, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:44:33,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.35 vs. limit=15.0 +2024-09-19 01:44:45,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.61 vs. limit=12.0 +2024-09-19 01:44:47,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=565540.0, ans=0.125 +2024-09-19 01:45:24,763 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.89 vs. limit=10.0 +2024-09-19 01:45:40,260 INFO [train.py:1198] (1/2) Epoch 32, batch 1150, loss[loss=0.2406, ctc_loss=0.1257, cr_loss=0.3965, attn_decoder_loss=0.2446, over 29474.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1201, cr_loss=0.3614, attn_decoder_loss=0.2421, over 5755811.72 frames. ], batch size: 78, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:45:58,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=565740.0, ans=0.125 +2024-09-19 01:46:08,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=565740.0, ans=0.2 +2024-09-19 01:46:10,194 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.460e+01 8.830e+01 9.335e+01 1.572e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 01:46:36,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=565820.0, ans=0.2 +2024-09-19 01:46:39,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=565820.0, ans=0.125 +2024-09-19 01:46:48,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=565860.0, ans=0.1 +2024-09-19 01:46:57,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=565900.0, ans=0.1 +2024-09-19 01:46:58,700 INFO [train.py:1198] (1/2) Epoch 32, batch 1200, loss[loss=0.2387, ctc_loss=0.1182, cr_loss=0.3575, attn_decoder_loss=0.2442, over 29689.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1204, cr_loss=0.3622, attn_decoder_loss=0.2426, over 5748055.40 frames. ], batch size: 85, lr: 3.47e-03, grad_scale: 16.0 +2024-09-19 01:47:17,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.95 vs. limit=6.0 +2024-09-19 01:47:46,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=566020.0, ans=0.125 +2024-09-19 01:48:14,673 INFO [train.py:1198] (1/2) Epoch 32, batch 1250, loss[loss=0.2524, ctc_loss=0.1357, cr_loss=0.3849, attn_decoder_loss=0.2568, over 29538.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1207, cr_loss=0.3634, attn_decoder_loss=0.2431, over 5775316.87 frames. ], batch size: 92, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:48:24,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.56 vs. limit=15.0 +2024-09-19 01:48:28,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=566140.0, ans=0.125 +2024-09-19 01:48:39,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=566140.0, ans=0.035 +2024-09-19 01:48:43,430 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.280e+01 8.626e+01 9.127e+01 9.598e+01 1.741e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 01:49:22,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=566260.0, ans=0.125 +2024-09-19 01:49:22,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=566260.0, ans=0.0 +2024-09-19 01:49:32,739 INFO [train.py:1198] (1/2) Epoch 32, batch 1300, loss[loss=0.2434, ctc_loss=0.1178, cr_loss=0.3582, attn_decoder_loss=0.2494, over 28607.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.12, cr_loss=0.3613, attn_decoder_loss=0.2422, over 5778606.81 frames. ], batch size: 112, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:49:48,423 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:50:36,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=566460.0, ans=0.125 +2024-09-19 01:50:44,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.12 vs. limit=15.0 +2024-09-19 01:50:51,304 INFO [train.py:1198] (1/2) Epoch 32, batch 1350, loss[loss=0.2367, ctc_loss=0.1201, cr_loss=0.3608, attn_decoder_loss=0.2417, over 29768.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1195, cr_loss=0.3612, attn_decoder_loss=0.2418, over 5796842.38 frames. ], batch size: 81, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:50:53,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=566500.0, ans=0.125 +2024-09-19 01:51:06,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=566540.0, ans=0.125 +2024-09-19 01:51:19,498 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.271e+01 8.117e+01 8.702e+01 9.337e+01 1.229e+02, threshold=1.740e+02, percent-clipped=0.0 +2024-09-19 01:51:40,256 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.12 vs. limit=12.0 +2024-09-19 01:51:58,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.69 vs. limit=10.0 +2024-09-19 01:52:02,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=566660.0, ans=0.125 +2024-09-19 01:52:04,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.00 vs. limit=15.0 +2024-09-19 01:52:06,533 INFO [train.py:1198] (1/2) Epoch 32, batch 1400, loss[loss=0.2058, ctc_loss=0.0915, cr_loss=0.2935, attn_decoder_loss=0.212, over 29560.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1193, cr_loss=0.3607, attn_decoder_loss=0.2417, over 5807902.58 frames. ], batch size: 69, lr: 3.47e-03, grad_scale: 8.0 +2024-09-19 01:52:26,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.86 vs. limit=12.0 +2024-09-19 01:52:46,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.83 vs. limit=15.0 +2024-09-19 01:52:47,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=566780.0, ans=0.1 +2024-09-19 01:52:55,304 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-19 01:53:08,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=566860.0, ans=0.1 +2024-09-19 01:53:12,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=566860.0, ans=0.1 +2024-09-19 01:53:14,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=566860.0, ans=0.0 +2024-09-19 01:53:24,541 INFO [train.py:1198] (1/2) Epoch 32, batch 1450, loss[loss=0.2552, ctc_loss=0.1349, cr_loss=0.4089, attn_decoder_loss=0.2595, over 29432.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1193, cr_loss=0.3609, attn_decoder_loss=0.242, over 5804066.30 frames. ], batch size: 94, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:53:38,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=566940.0, ans=0.0 +2024-09-19 01:53:41,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=566940.0, ans=0.1 +2024-09-19 01:53:47,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=566940.0, ans=0.1 +2024-09-19 01:53:53,180 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.580e+01 8.959e+01 9.480e+01 1.633e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 01:54:10,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=567020.0, ans=0.125 +2024-09-19 01:54:27,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=567060.0, ans=0.0 +2024-09-19 01:54:28,918 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 01:54:31,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=567060.0, ans=0.125 +2024-09-19 01:54:42,377 INFO [train.py:1198] (1/2) Epoch 32, batch 1500, loss[loss=0.2416, ctc_loss=0.1199, cr_loss=0.3541, attn_decoder_loss=0.2472, over 29642.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1194, cr_loss=0.3612, attn_decoder_loss=0.2423, over 5804806.62 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:54:46,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.62 vs. limit=15.0 +2024-09-19 01:54:48,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=567100.0, ans=0.0 +2024-09-19 01:55:02,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=567140.0, ans=0.2 +2024-09-19 01:55:38,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.94 vs. limit=15.0 +2024-09-19 01:55:49,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=567260.0, ans=0.125 +2024-09-19 01:55:52,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=567260.0, ans=0.125 +2024-09-19 01:55:52,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=567260.0, ans=0.125 +2024-09-19 01:55:57,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.05 vs. limit=22.5 +2024-09-19 01:55:58,510 INFO [train.py:1198] (1/2) Epoch 32, batch 1550, loss[loss=0.2547, ctc_loss=0.1322, cr_loss=0.3922, attn_decoder_loss=0.2597, over 29528.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1196, cr_loss=0.361, attn_decoder_loss=0.242, over 5779497.81 frames. ], batch size: 90, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:55:58,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=567300.0, ans=0.2 +2024-09-19 01:56:11,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=567300.0, ans=0.125 +2024-09-19 01:56:13,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=567340.0, ans=0.0 +2024-09-19 01:56:17,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=567340.0, ans=0.125 +2024-09-19 01:56:18,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=567340.0, ans=0.0 +2024-09-19 01:56:27,113 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.568e+01 8.583e+01 9.090e+01 9.539e+01 2.299e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 01:56:39,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=567380.0, ans=0.125 +2024-09-19 01:57:00,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.31 vs. limit=15.0 +2024-09-19 01:57:16,197 INFO [train.py:1198] (1/2) Epoch 32, batch 1600, loss[loss=0.2394, ctc_loss=0.1142, cr_loss=0.3444, attn_decoder_loss=0.2457, over 29714.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1194, cr_loss=0.3606, attn_decoder_loss=0.2419, over 5760809.96 frames. ], batch size: 85, lr: 3.46e-03, grad_scale: 16.0 +2024-09-19 01:57:25,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=567500.0, ans=0.025 +2024-09-19 01:57:34,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=567540.0, ans=0.125 +2024-09-19 01:57:49,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=567580.0, ans=0.05 +2024-09-19 01:57:55,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=567580.0, ans=0.0 +2024-09-19 01:58:25,939 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.69 vs. limit=15.0 +2024-09-19 01:58:28,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=567660.0, ans=0.0 +2024-09-19 01:58:28,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=567660.0, ans=0.0 +2024-09-19 01:58:34,175 INFO [train.py:1198] (1/2) Epoch 32, batch 1650, loss[loss=0.2503, ctc_loss=0.1224, cr_loss=0.3683, attn_decoder_loss=0.2564, over 29726.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1193, cr_loss=0.3606, attn_decoder_loss=0.2419, over 5756351.97 frames. ], batch size: 89, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 01:58:42,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=567700.0, ans=0.125 +2024-09-19 01:58:47,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=567740.0, ans=0.125 +2024-09-19 01:59:04,229 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.351e+01 8.988e+01 9.892e+01 1.504e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 01:59:19,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=567820.0, ans=0.0 +2024-09-19 01:59:30,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=567820.0, ans=0.1 +2024-09-19 01:59:33,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=567860.0, ans=0.0 +2024-09-19 01:59:36,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=567860.0, ans=0.125 +2024-09-19 01:59:49,521 INFO [train.py:1198] (1/2) Epoch 32, batch 1700, loss[loss=0.2067, ctc_loss=0.09634, cr_loss=0.3241, attn_decoder_loss=0.2118, over 29566.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1191, cr_loss=0.3607, attn_decoder_loss=0.2418, over 5778192.71 frames. ], batch size: 69, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:00:00,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=567900.0, ans=0.125 +2024-09-19 02:00:10,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-19 02:00:15,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=567940.0, ans=0.0 +2024-09-19 02:00:19,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=567980.0, ans=0.125 +2024-09-19 02:01:05,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=568060.0, ans=0.125 +2024-09-19 02:01:07,013 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:01:08,136 INFO [train.py:1198] (1/2) Epoch 32, batch 1750, loss[loss=0.2128, ctc_loss=0.1049, cr_loss=0.3156, attn_decoder_loss=0.2178, over 29342.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1187, cr_loss=0.3597, attn_decoder_loss=0.2414, over 5786781.04 frames. ], batch size: 67, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:01:08,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=568100.0, ans=0.0 +2024-09-19 02:01:31,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=568140.0, ans=0.125 +2024-09-19 02:01:38,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=568140.0, ans=0.5 +2024-09-19 02:01:38,530 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.59 vs. limit=22.5 +2024-09-19 02:01:40,653 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.109e+01 8.621e+01 8.991e+01 9.586e+01 2.043e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-19 02:01:42,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=568180.0, ans=0.0 +2024-09-19 02:01:48,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=568180.0, ans=0.025 +2024-09-19 02:01:54,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=568220.0, ans=0.125 +2024-09-19 02:02:10,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=568260.0, ans=0.025 +2024-09-19 02:02:25,097 INFO [train.py:1198] (1/2) Epoch 32, batch 1800, loss[loss=0.2428, ctc_loss=0.1209, cr_loss=0.3542, attn_decoder_loss=0.2485, over 29703.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.119, cr_loss=0.3601, attn_decoder_loss=0.2417, over 5789430.45 frames. ], batch size: 83, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:03:41,086 INFO [train.py:1198] (1/2) Epoch 32, batch 1850, loss[loss=0.2474, ctc_loss=0.1265, cr_loss=0.3531, attn_decoder_loss=0.253, over 29607.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.119, cr_loss=0.3597, attn_decoder_loss=0.2413, over 5796580.05 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:04:00,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=568540.0, ans=0.2 +2024-09-19 02:04:11,110 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.765e+01 8.550e+01 9.044e+01 9.477e+01 1.404e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 02:04:16,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=568580.0, ans=0.125 +2024-09-19 02:04:19,481 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.21 vs. limit=15.0 +2024-09-19 02:04:32,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=568620.0, ans=0.0 +2024-09-19 02:04:35,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=568620.0, ans=0.0 +2024-09-19 02:04:58,431 INFO [train.py:1198] (1/2) Epoch 32, batch 1900, loss[loss=0.2465, ctc_loss=0.1175, cr_loss=0.3559, attn_decoder_loss=0.253, over 29690.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3613, attn_decoder_loss=0.2423, over 5804582.92 frames. ], batch size: 89, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:05:17,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.93 vs. limit=15.0 +2024-09-19 02:05:18,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=568740.0, ans=0.0 +2024-09-19 02:05:39,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=568780.0, ans=0.2 +2024-09-19 02:05:46,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.36 vs. limit=15.0 +2024-09-19 02:05:53,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=568820.0, ans=0.0 +2024-09-19 02:06:04,208 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.30 vs. limit=12.0 +2024-09-19 02:06:08,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=568860.0, ans=0.0 +2024-09-19 02:06:09,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=568860.0, ans=0.2 +2024-09-19 02:06:16,861 INFO [train.py:1198] (1/2) Epoch 32, batch 1950, loss[loss=0.231, ctc_loss=0.113, cr_loss=0.3555, attn_decoder_loss=0.2362, over 29435.00 frames. ], tot_loss[loss=0.2386, ctc_loss=0.1204, cr_loss=0.3635, attn_decoder_loss=0.2436, over 5819255.24 frames. ], batch size: 78, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:06:23,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=568900.0, ans=0.0 +2024-09-19 02:06:47,104 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.678e+01 9.081e+01 9.709e+01 1.589e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 02:06:52,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=568980.0, ans=0.0 +2024-09-19 02:06:59,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=568980.0, ans=0.1 +2024-09-19 02:07:11,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=569020.0, ans=0.0 +2024-09-19 02:07:14,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=569020.0, ans=0.1 +2024-09-19 02:07:23,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=569060.0, ans=0.125 +2024-09-19 02:07:32,375 INFO [train.py:1198] (1/2) Epoch 32, batch 2000, loss[loss=0.2143, ctc_loss=0.1028, cr_loss=0.3357, attn_decoder_loss=0.2192, over 29352.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1208, cr_loss=0.3647, attn_decoder_loss=0.244, over 5795424.58 frames. ], batch size: 67, lr: 3.46e-03, grad_scale: 16.0 +2024-09-19 02:07:32,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=569100.0, ans=0.125 +2024-09-19 02:08:13,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=569180.0, ans=0.025 +2024-09-19 02:08:24,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=569220.0, ans=0.1 +2024-09-19 02:08:50,444 INFO [train.py:1198] (1/2) Epoch 32, batch 2050, loss[loss=0.2145, ctc_loss=0.1033, cr_loss=0.3354, attn_decoder_loss=0.2194, over 29418.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1203, cr_loss=0.3638, attn_decoder_loss=0.243, over 5788467.14 frames. ], batch size: 70, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:08:50,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=569300.0, ans=0.0 +2024-09-19 02:08:58,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=569300.0, ans=0.1 +2024-09-19 02:09:04,973 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.27 vs. limit=15.0 +2024-09-19 02:09:24,445 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.363e+01 8.440e+01 8.893e+01 9.652e+01 5.207e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-19 02:09:45,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=569420.0, ans=0.1 +2024-09-19 02:09:50,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=569420.0, ans=0.0 +2024-09-19 02:09:59,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=569460.0, ans=0.125 +2024-09-19 02:10:00,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=569460.0, ans=0.0 +2024-09-19 02:10:08,278 INFO [train.py:1198] (1/2) Epoch 32, batch 2100, loss[loss=0.2304, ctc_loss=0.1111, cr_loss=0.3432, attn_decoder_loss=0.2361, over 29767.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1197, cr_loss=0.3626, attn_decoder_loss=0.2424, over 5799187.24 frames. ], batch size: 81, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:10:42,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.53 vs. limit=15.0 +2024-09-19 02:10:47,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=569580.0, ans=0.1 +2024-09-19 02:10:52,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=569620.0, ans=0.125 +2024-09-19 02:10:55,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=569620.0, ans=0.125 +2024-09-19 02:10:56,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=569620.0, ans=0.125 +2024-09-19 02:11:02,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=569620.0, ans=0.1 +2024-09-19 02:11:08,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=569660.0, ans=0.125 +2024-09-19 02:11:11,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=569660.0, ans=0.0 +2024-09-19 02:11:17,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=569660.0, ans=0.0 +2024-09-19 02:11:19,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=569660.0, ans=0.125 +2024-09-19 02:11:23,547 INFO [train.py:1198] (1/2) Epoch 32, batch 2150, loss[loss=0.2366, ctc_loss=0.1221, cr_loss=0.383, attn_decoder_loss=0.2408, over 29413.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1193, cr_loss=0.3619, attn_decoder_loss=0.2419, over 5813551.73 frames. ], batch size: 78, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:11:25,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=569700.0, ans=0.125 +2024-09-19 02:11:25,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=569700.0, ans=0.0 +2024-09-19 02:11:36,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=569700.0, ans=0.125 +2024-09-19 02:11:42,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.72 vs. limit=10.0 +2024-09-19 02:11:48,218 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:11:55,487 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.476e+01 8.874e+01 9.335e+01 1.569e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-19 02:11:58,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=569780.0, ans=0.125 +2024-09-19 02:12:12,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=569820.0, ans=0.0 +2024-09-19 02:12:41,307 INFO [train.py:1198] (1/2) Epoch 32, batch 2200, loss[loss=0.2444, ctc_loss=0.1175, cr_loss=0.3385, attn_decoder_loss=0.251, over 29631.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1196, cr_loss=0.363, attn_decoder_loss=0.2423, over 5810036.18 frames. ], batch size: 86, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:12:42,185 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.93 vs. limit=15.0 +2024-09-19 02:12:45,213 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.45 vs. limit=22.5 +2024-09-19 02:13:05,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=569940.0, ans=0.125 +2024-09-19 02:13:08,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=569940.0, ans=0.0 +2024-09-19 02:13:45,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=12.14 vs. limit=12.0 +2024-09-19 02:13:46,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=570060.0, ans=0.025 +2024-09-19 02:13:50,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=570060.0, ans=0.5 +2024-09-19 02:13:51,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=570060.0, ans=0.125 +2024-09-19 02:13:55,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten.whitening_limit, batch_count=570060.0, ans=15.0 +2024-09-19 02:13:59,411 INFO [train.py:1198] (1/2) Epoch 32, batch 2250, loss[loss=0.2492, ctc_loss=0.1238, cr_loss=0.3727, attn_decoder_loss=0.2548, over 29703.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1194, cr_loss=0.3624, attn_decoder_loss=0.2421, over 5808327.24 frames. ], batch size: 82, lr: 3.46e-03, grad_scale: 8.0 +2024-09-19 02:13:59,716 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:14:31,248 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.789e+01 8.483e+01 9.181e+01 9.844e+01 2.065e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-19 02:14:31,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=570180.0, ans=0.0 +2024-09-19 02:14:44,273 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.60 vs. limit=15.0 +2024-09-19 02:14:49,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=570220.0, ans=0.0 +2024-09-19 02:14:59,139 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.37 vs. limit=22.5 +2024-09-19 02:15:08,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=570260.0, ans=0.125 +2024-09-19 02:15:11,702 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.23 vs. limit=22.5 +2024-09-19 02:15:12,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=570260.0, ans=0.0 +2024-09-19 02:15:15,151 INFO [train.py:1198] (1/2) Epoch 32, batch 2300, loss[loss=0.2126, ctc_loss=0.1079, cr_loss=0.3394, attn_decoder_loss=0.2167, over 29316.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1186, cr_loss=0.3604, attn_decoder_loss=0.2409, over 5798343.45 frames. ], batch size: 71, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:15:17,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=570300.0, ans=0.025 +2024-09-19 02:15:17,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=570300.0, ans=0.1 +2024-09-19 02:15:30,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=570340.0, ans=0.1 +2024-09-19 02:15:41,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=570340.0, ans=0.09899494936611666 +2024-09-19 02:15:59,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=570420.0, ans=0.125 +2024-09-19 02:16:05,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=570420.0, ans=0.0 +2024-09-19 02:16:10,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.41 vs. limit=6.0 +2024-09-19 02:16:31,331 INFO [train.py:1198] (1/2) Epoch 32, batch 2350, loss[loss=0.2414, ctc_loss=0.1201, cr_loss=0.3595, attn_decoder_loss=0.2469, over 29705.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1188, cr_loss=0.3608, attn_decoder_loss=0.2413, over 5805316.17 frames. ], batch size: 83, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:16:47,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=570540.0, ans=0.125 +2024-09-19 02:17:07,393 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.359e+01 8.588e+01 9.289e+01 9.851e+01 1.770e+02, threshold=1.858e+02, percent-clipped=0.0 +2024-09-19 02:17:21,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=570620.0, ans=0.1 +2024-09-19 02:17:22,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=570620.0, ans=0.2 +2024-09-19 02:17:24,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=570620.0, ans=0.07 +2024-09-19 02:17:27,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=570620.0, ans=0.125 +2024-09-19 02:17:34,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=570660.0, ans=0.05 +2024-09-19 02:17:50,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.86 vs. limit=15.0 +2024-09-19 02:17:51,211 INFO [train.py:1198] (1/2) Epoch 32, batch 2400, loss[loss=0.2365, ctc_loss=0.1248, cr_loss=0.3762, attn_decoder_loss=0.2405, over 29550.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1195, cr_loss=0.3627, attn_decoder_loss=0.2421, over 5808896.73 frames. ], batch size: 76, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:19:07,315 INFO [train.py:1198] (1/2) Epoch 32, batch 2450, loss[loss=0.2428, ctc_loss=0.1244, cr_loss=0.3658, attn_decoder_loss=0.2478, over 29740.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1203, cr_loss=0.3637, attn_decoder_loss=0.2431, over 5784935.90 frames. ], batch size: 82, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:19:17,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.96 vs. limit=15.0 +2024-09-19 02:19:24,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.42 vs. limit=15.0 +2024-09-19 02:19:25,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=570940.0, ans=0.0 +2024-09-19 02:19:26,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=9.26 vs. limit=15.0 +2024-09-19 02:19:30,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=570940.0, ans=0.125 +2024-09-19 02:19:33,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=570940.0, ans=0.125 +2024-09-19 02:19:39,211 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.233e+01 8.392e+01 8.947e+01 9.639e+01 2.320e+02, threshold=1.789e+02, percent-clipped=2.0 +2024-09-19 02:19:39,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.02 vs. limit=15.0 +2024-09-19 02:19:44,597 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.43 vs. limit=15.0 +2024-09-19 02:19:48,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=570980.0, ans=0.1 +2024-09-19 02:19:56,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=571020.0, ans=0.2 +2024-09-19 02:20:06,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=571060.0, ans=0.0 +2024-09-19 02:20:20,781 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:20:23,375 INFO [train.py:1198] (1/2) Epoch 32, batch 2500, loss[loss=0.2498, ctc_loss=0.1282, cr_loss=0.3881, attn_decoder_loss=0.2547, over 29629.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1204, cr_loss=0.3641, attn_decoder_loss=0.2429, over 5796255.92 frames. ], batch size: 86, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:20:37,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.76 vs. limit=15.0 +2024-09-19 02:20:42,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=571140.0, ans=0.1 +2024-09-19 02:20:44,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=571140.0, ans=0.2 +2024-09-19 02:20:46,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.52 vs. limit=22.5 +2024-09-19 02:20:58,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=571180.0, ans=0.125 +2024-09-19 02:21:11,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=571180.0, ans=0.1 +2024-09-19 02:21:17,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=571220.0, ans=0.125 +2024-09-19 02:21:27,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=571260.0, ans=0.5 +2024-09-19 02:21:32,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=571260.0, ans=0.2 +2024-09-19 02:21:38,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=571260.0, ans=0.09899494936611666 +2024-09-19 02:21:43,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=571300.0, ans=0.0 +2024-09-19 02:21:44,430 INFO [train.py:1198] (1/2) Epoch 32, batch 2550, loss[loss=0.2122, ctc_loss=0.1056, cr_loss=0.3379, attn_decoder_loss=0.2165, over 29373.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1199, cr_loss=0.3631, attn_decoder_loss=0.2427, over 5798459.11 frames. ], batch size: 67, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:21:44,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=571300.0, ans=0.125 +2024-09-19 02:21:49,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=571300.0, ans=0.1 +2024-09-19 02:21:55,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=12.0 +2024-09-19 02:22:17,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.677e+01 9.042e+01 9.632e+01 1.838e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 02:22:19,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=571380.0, ans=0.125 +2024-09-19 02:22:40,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.47 vs. limit=15.0 +2024-09-19 02:23:00,326 INFO [train.py:1198] (1/2) Epoch 32, batch 2600, loss[loss=0.2246, ctc_loss=0.1035, cr_loss=0.3373, attn_decoder_loss=0.2305, over 29448.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1199, cr_loss=0.363, attn_decoder_loss=0.2429, over 5795152.38 frames. ], batch size: 78, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:23:01,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.24 vs. limit=15.0 +2024-09-19 02:23:04,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.87 vs. limit=15.0 +2024-09-19 02:23:18,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=571540.0, ans=0.125 +2024-09-19 02:23:19,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=571540.0, ans=0.2 +2024-09-19 02:23:21,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.13 vs. limit=15.0 +2024-09-19 02:23:39,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=571580.0, ans=0.1 +2024-09-19 02:23:45,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=571620.0, ans=0.0 +2024-09-19 02:24:15,302 INFO [train.py:1198] (1/2) Epoch 32, batch 2650, loss[loss=0.2461, ctc_loss=0.1267, cr_loss=0.3712, attn_decoder_loss=0.2511, over 29238.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1197, cr_loss=0.3623, attn_decoder_loss=0.243, over 5800936.48 frames. ], batch size: 100, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:24:23,616 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.57 vs. limit=22.5 +2024-09-19 02:24:32,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=571740.0, ans=0.1 +2024-09-19 02:24:34,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=571740.0, ans=0.5 +2024-09-19 02:24:52,691 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.436e+01 8.918e+01 9.348e+01 1.627e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 02:24:57,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=571780.0, ans=0.125 +2024-09-19 02:25:07,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.49 vs. limit=8.0 +2024-09-19 02:25:10,383 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.23 vs. limit=15.0 +2024-09-19 02:25:34,874 INFO [train.py:1198] (1/2) Epoch 32, batch 2700, loss[loss=0.2463, ctc_loss=0.1181, cr_loss=0.3562, attn_decoder_loss=0.2526, over 29539.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1199, cr_loss=0.3629, attn_decoder_loss=0.2432, over 5796303.19 frames. ], batch size: 87, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:25:38,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=571900.0, ans=0.2 +2024-09-19 02:25:44,245 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:25:53,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=571940.0, ans=0.125 +2024-09-19 02:25:59,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.43 vs. limit=10.0 +2024-09-19 02:26:18,425 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.45 vs. limit=15.0 +2024-09-19 02:26:30,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=572020.0, ans=0.0 +2024-09-19 02:26:51,238 INFO [train.py:1198] (1/2) Epoch 32, batch 2750, loss[loss=0.2288, ctc_loss=0.1088, cr_loss=0.333, attn_decoder_loss=0.2348, over 29499.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1198, cr_loss=0.3623, attn_decoder_loss=0.2425, over 5795174.42 frames. ], batch size: 75, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:27:00,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=572100.0, ans=0.025 +2024-09-19 02:27:04,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=572140.0, ans=0.125 +2024-09-19 02:27:20,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=572180.0, ans=0.04949747468305833 +2024-09-19 02:27:24,113 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.589e+01 9.060e+01 9.796e+01 2.270e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-19 02:27:30,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=572180.0, ans=0.025 +2024-09-19 02:27:41,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=572220.0, ans=0.125 +2024-09-19 02:27:48,752 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:27:50,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=572260.0, ans=15.0 +2024-09-19 02:28:07,093 INFO [train.py:1198] (1/2) Epoch 32, batch 2800, loss[loss=0.2688, ctc_loss=0.159, cr_loss=0.4005, attn_decoder_loss=0.2721, over 20354.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1204, cr_loss=0.3628, attn_decoder_loss=0.2428, over 5776632.83 frames. ], batch size: 210, lr: 3.45e-03, grad_scale: 16.0 +2024-09-19 02:28:24,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=572340.0, ans=0.1 +2024-09-19 02:28:24,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=572340.0, ans=0.125 +2024-09-19 02:28:45,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=572380.0, ans=0.1 +2024-09-19 02:28:46,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=572380.0, ans=0.1 +2024-09-19 02:29:01,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=572420.0, ans=15.0 +2024-09-19 02:29:16,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=572460.0, ans=0.125 +2024-09-19 02:29:18,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=572460.0, ans=0.125 +2024-09-19 02:29:18,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=572460.0, ans=0.0 +2024-09-19 02:29:26,879 INFO [train.py:1198] (1/2) Epoch 32, batch 2850, loss[loss=0.2346, ctc_loss=0.1169, cr_loss=0.3705, attn_decoder_loss=0.2394, over 29513.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1208, cr_loss=0.3636, attn_decoder_loss=0.2432, over 5762638.59 frames. ], batch size: 77, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:29:37,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=572500.0, ans=0.07 +2024-09-19 02:29:42,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=572540.0, ans=0.2 +2024-09-19 02:30:01,860 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.632e+01 8.715e+01 9.222e+01 9.934e+01 2.539e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-19 02:30:33,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=572660.0, ans=0.1 +2024-09-19 02:30:36,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=572660.0, ans=0.125 +2024-09-19 02:30:42,439 INFO [train.py:1198] (1/2) Epoch 32, batch 2900, loss[loss=0.2279, ctc_loss=0.1103, cr_loss=0.3538, attn_decoder_loss=0.2331, over 29423.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.121, cr_loss=0.3647, attn_decoder_loss=0.2439, over 5788095.47 frames. ], batch size: 79, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:30:47,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=572700.0, ans=0.1 +2024-09-19 02:30:58,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=572740.0, ans=0.125 +2024-09-19 02:31:01,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=572740.0, ans=0.1 +2024-09-19 02:31:01,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.31 vs. limit=10.0 +2024-09-19 02:31:21,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=572780.0, ans=0.125 +2024-09-19 02:31:32,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.09 vs. limit=22.5 +2024-09-19 02:31:41,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.79 vs. limit=15.0 +2024-09-19 02:31:43,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=572860.0, ans=0.125 +2024-09-19 02:31:48,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.38 vs. limit=10.0 +2024-09-19 02:31:51,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=572860.0, ans=0.125 +2024-09-19 02:31:54,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.78 vs. limit=22.5 +2024-09-19 02:31:58,401 INFO [train.py:1198] (1/2) Epoch 32, batch 2950, loss[loss=0.2282, ctc_loss=0.1055, cr_loss=0.3398, attn_decoder_loss=0.2343, over 29496.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1201, cr_loss=0.3626, attn_decoder_loss=0.2428, over 5783252.10 frames. ], batch size: 75, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:32:09,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=572900.0, ans=0.2 +2024-09-19 02:32:10,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.99 vs. limit=22.5 +2024-09-19 02:32:37,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.68 vs. limit=15.0 +2024-09-19 02:32:37,784 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.544e+01 8.997e+01 9.588e+01 2.155e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-19 02:32:50,926 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.17 vs. limit=15.0 +2024-09-19 02:33:18,465 INFO [train.py:1198] (1/2) Epoch 32, batch 3000, loss[loss=0.236, ctc_loss=0.1229, cr_loss=0.386, attn_decoder_loss=0.24, over 29751.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1199, cr_loss=0.3625, attn_decoder_loss=0.2427, over 5784397.95 frames. ], batch size: 81, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:33:18,466 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 02:33:36,935 INFO [train.py:1230] (1/2) Epoch 32, validation: loss=0.2117, ctc_loss=0.0367, cr_loss=5.626e-15, attn_decoder_loss=0.2311, over 944034.00 frames. +2024-09-19 02:33:36,936 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 02:33:40,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=573100.0, ans=0.125 +2024-09-19 02:33:42,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=573100.0, ans=0.0 +2024-09-19 02:33:46,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=573100.0, ans=0.025 +2024-09-19 02:34:04,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=573140.0, ans=0.0 +2024-09-19 02:34:10,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=573180.0, ans=0.0 +2024-09-19 02:34:12,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=573180.0, ans=0.125 +2024-09-19 02:34:24,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=573220.0, ans=0.0 +2024-09-19 02:34:46,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=573260.0, ans=0.125 +2024-09-19 02:34:52,854 INFO [train.py:1198] (1/2) Epoch 32, batch 3050, loss[loss=0.2339, ctc_loss=0.1211, cr_loss=0.3697, attn_decoder_loss=0.2382, over 29506.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1206, cr_loss=0.3642, attn_decoder_loss=0.2435, over 5777657.56 frames. ], batch size: 76, lr: 3.45e-03, grad_scale: 8.0 +2024-09-19 02:35:14,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=573340.0, ans=0.07 +2024-09-19 02:35:19,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten.whitening_limit, batch_count=573340.0, ans=22.5 +2024-09-19 02:35:27,632 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.755e+01 9.253e+01 9.957e+01 1.667e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-19 02:35:57,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=573460.0, ans=0.125 +2024-09-19 02:36:12,281 INFO [train.py:1198] (1/2) Epoch 32, batch 3100, loss[loss=0.2593, ctc_loss=0.1339, cr_loss=0.3858, attn_decoder_loss=0.2647, over 29238.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1203, cr_loss=0.3635, attn_decoder_loss=0.2431, over 5777900.20 frames. ], batch size: 100, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:36:42,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=573580.0, ans=0.0 +2024-09-19 02:36:48,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=573580.0, ans=0.0 +2024-09-19 02:36:55,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=573580.0, ans=10.0 +2024-09-19 02:37:03,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.60 vs. limit=22.5 +2024-09-19 02:37:12,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=573660.0, ans=0.1 +2024-09-19 02:37:18,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=573660.0, ans=0.2 +2024-09-19 02:37:25,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=573660.0, ans=0.125 +2024-09-19 02:37:27,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.20 vs. limit=15.0 +2024-09-19 02:37:28,501 INFO [train.py:1198] (1/2) Epoch 32, batch 3150, loss[loss=0.2576, ctc_loss=0.1327, cr_loss=0.4066, attn_decoder_loss=0.2624, over 28766.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1202, cr_loss=0.3632, attn_decoder_loss=0.2432, over 5784085.46 frames. ], batch size: 104, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:37:31,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=573700.0, ans=0.0 +2024-09-19 02:37:33,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=573700.0, ans=0.1 +2024-09-19 02:37:53,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=573740.0, ans=0.125 +2024-09-19 02:37:59,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=573780.0, ans=0.0 +2024-09-19 02:37:59,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=573780.0, ans=0.125 +2024-09-19 02:38:03,501 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.454e+01 8.918e+01 9.492e+01 5.119e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 02:38:43,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=573900.0, ans=0.0 +2024-09-19 02:38:44,610 INFO [train.py:1198] (1/2) Epoch 32, batch 3200, loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3302, attn_decoder_loss=0.2384, over 29794.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.12, cr_loss=0.3627, attn_decoder_loss=0.2426, over 5794932.88 frames. ], batch size: 80, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:39:07,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=573940.0, ans=0.0 +2024-09-19 02:39:40,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=574020.0, ans=0.125 +2024-09-19 02:40:00,527 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-19 02:40:01,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=574100.0, ans=0.2 +2024-09-19 02:40:04,581 INFO [train.py:1198] (1/2) Epoch 32, batch 3250, loss[loss=0.2391, ctc_loss=0.1234, cr_loss=0.3573, attn_decoder_loss=0.244, over 29686.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1202, cr_loss=0.3631, attn_decoder_loss=0.2429, over 5801767.25 frames. ], batch size: 84, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:40:16,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=574100.0, ans=0.0 +2024-09-19 02:40:19,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=574140.0, ans=0.125 +2024-09-19 02:40:40,323 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.274e+01 8.537e+01 9.027e+01 9.508e+01 1.850e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 02:40:52,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=574220.0, ans=0.2 +2024-09-19 02:41:00,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=574220.0, ans=0.125 +2024-09-19 02:41:19,691 INFO [train.py:1198] (1/2) Epoch 32, batch 3300, loss[loss=0.2409, ctc_loss=0.1147, cr_loss=0.3546, attn_decoder_loss=0.2471, over 28497.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1196, cr_loss=0.3614, attn_decoder_loss=0.2417, over 5798945.00 frames. ], batch size: 112, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:41:23,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=574300.0, ans=0.125 +2024-09-19 02:41:42,727 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:41:54,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=574380.0, ans=0.125 +2024-09-19 02:42:04,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=574420.0, ans=0.125 +2024-09-19 02:42:07,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=574420.0, ans=0.125 +2024-09-19 02:42:15,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.91 vs. limit=15.0 +2024-09-19 02:42:23,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=574460.0, ans=0.125 +2024-09-19 02:42:35,355 INFO [train.py:1198] (1/2) Epoch 32, batch 3350, loss[loss=0.2505, ctc_loss=0.1264, cr_loss=0.372, attn_decoder_loss=0.256, over 28896.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1201, cr_loss=0.3619, attn_decoder_loss=0.2423, over 5775821.77 frames. ], batch size: 104, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:42:47,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=574500.0, ans=0.125 +2024-09-19 02:42:53,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-19 02:43:11,945 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.605e+01 8.988e+01 9.712e+01 2.177e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 02:43:37,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=574660.0, ans=0.0 +2024-09-19 02:43:37,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.71 vs. limit=15.0 +2024-09-19 02:43:55,671 INFO [train.py:1198] (1/2) Epoch 32, batch 3400, loss[loss=0.2079, ctc_loss=0.1045, cr_loss=0.3348, attn_decoder_loss=0.2119, over 29335.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1203, cr_loss=0.3624, attn_decoder_loss=0.2423, over 5768110.21 frames. ], batch size: 67, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:44:32,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=574780.0, ans=0.125 +2024-09-19 02:44:42,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=574820.0, ans=0.0 +2024-09-19 02:44:56,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=574860.0, ans=0.125 +2024-09-19 02:44:56,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=574860.0, ans=0.0 +2024-09-19 02:45:10,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=574900.0, ans=0.0 +2024-09-19 02:45:11,592 INFO [train.py:1198] (1/2) Epoch 32, batch 3450, loss[loss=0.2575, ctc_loss=0.1416, cr_loss=0.3936, attn_decoder_loss=0.2617, over 28198.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1201, cr_loss=0.3624, attn_decoder_loss=0.2428, over 5775461.25 frames. ], batch size: 111, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:45:36,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=574940.0, ans=0.125 +2024-09-19 02:45:40,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=574980.0, ans=0.125 +2024-09-19 02:45:43,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=574980.0, ans=0.025 +2024-09-19 02:45:43,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=574980.0, ans=0.2 +2024-09-19 02:45:47,951 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.523e+01 9.077e+01 9.652e+01 1.976e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 02:45:48,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.96 vs. limit=15.0 +2024-09-19 02:45:51,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=574980.0, ans=0.0 +2024-09-19 02:45:55,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=575020.0, ans=0.2 +2024-09-19 02:45:55,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=575020.0, ans=0.1 +2024-09-19 02:45:58,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=575020.0, ans=0.125 +2024-09-19 02:46:00,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=575020.0, ans=0.2 +2024-09-19 02:46:04,955 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:46:07,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=575020.0, ans=0.125 +2024-09-19 02:46:27,033 INFO [train.py:1198] (1/2) Epoch 32, batch 3500, loss[loss=0.2174, ctc_loss=0.1113, cr_loss=0.3448, attn_decoder_loss=0.2215, over 29345.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1198, cr_loss=0.3616, attn_decoder_loss=0.2421, over 5776240.91 frames. ], batch size: 71, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:46:33,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=575100.0, ans=0.2 +2024-09-19 02:46:49,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.06 vs. limit=15.0 +2024-09-19 02:46:58,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=575180.0, ans=10.0 +2024-09-19 02:47:03,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=575180.0, ans=0.125 +2024-09-19 02:47:15,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=575220.0, ans=0.125 +2024-09-19 02:47:34,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=575260.0, ans=0.2 +2024-09-19 02:47:42,061 INFO [train.py:1198] (1/2) Epoch 32, batch 3550, loss[loss=0.247, ctc_loss=0.1204, cr_loss=0.37, attn_decoder_loss=0.2528, over 29713.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1194, cr_loss=0.3612, attn_decoder_loss=0.242, over 5782130.66 frames. ], batch size: 89, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:47:48,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten.whitening_limit, batch_count=575300.0, ans=22.5 +2024-09-19 02:48:12,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=575380.0, ans=0.125 +2024-09-19 02:48:19,430 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.300e+01 8.288e+01 8.961e+01 9.598e+01 1.614e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 02:48:19,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=575380.0, ans=0.0 +2024-09-19 02:48:30,248 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:48:40,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1.whitening_limit, batch_count=575420.0, ans=10.0 +2024-09-19 02:48:42,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=575420.0, ans=0.125 +2024-09-19 02:48:48,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=575460.0, ans=0.125 +2024-09-19 02:48:58,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=575500.0, ans=0.0 +2024-09-19 02:49:00,121 INFO [train.py:1198] (1/2) Epoch 32, batch 3600, loss[loss=0.2405, ctc_loss=0.1323, cr_loss=0.3838, attn_decoder_loss=0.244, over 29518.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1193, cr_loss=0.3612, attn_decoder_loss=0.2421, over 5791136.87 frames. ], batch size: 77, lr: 3.44e-03, grad_scale: 16.0 +2024-09-19 02:49:22,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=575540.0, ans=0.0 +2024-09-19 02:49:24,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.96 vs. limit=22.5 +2024-09-19 02:49:31,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=575580.0, ans=0.1 +2024-09-19 02:49:48,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=575620.0, ans=0.0 +2024-09-19 02:49:55,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=575620.0, ans=0.125 +2024-09-19 02:50:11,917 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:50:14,517 INFO [train.py:1198] (1/2) Epoch 32, batch 3650, loss[loss=0.2472, ctc_loss=0.1269, cr_loss=0.3707, attn_decoder_loss=0.2524, over 29508.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1185, cr_loss=0.3597, attn_decoder_loss=0.2413, over 5792489.84 frames. ], batch size: 90, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:50:17,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=575700.0, ans=0.125 +2024-09-19 02:50:17,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=575700.0, ans=0.2 +2024-09-19 02:50:37,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.70 vs. limit=15.0 +2024-09-19 02:50:40,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=15.0 +2024-09-19 02:50:51,847 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.010e+01 8.427e+01 8.894e+01 9.403e+01 1.898e+02, threshold=1.779e+02, percent-clipped=1.0 +2024-09-19 02:51:01,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=575820.0, ans=0.125 +2024-09-19 02:51:06,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=575820.0, ans=0.0 +2024-09-19 02:51:18,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=575860.0, ans=0.1 +2024-09-19 02:51:27,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=575900.0, ans=0.125 +2024-09-19 02:51:29,100 INFO [train.py:1198] (1/2) Epoch 32, batch 3700, loss[loss=0.241, ctc_loss=0.1155, cr_loss=0.3478, attn_decoder_loss=0.2472, over 29726.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1184, cr_loss=0.3595, attn_decoder_loss=0.2416, over 5802902.32 frames. ], batch size: 84, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:51:37,348 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.24 vs. limit=15.0 +2024-09-19 02:51:50,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=575940.0, ans=0.025 +2024-09-19 02:52:31,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=576020.0, ans=0.04949747468305833 +2024-09-19 02:52:51,297 INFO [train.py:1198] (1/2) Epoch 32, batch 3750, loss[loss=0.2079, ctc_loss=0.09606, cr_loss=0.2971, attn_decoder_loss=0.2137, over 29328.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1185, cr_loss=0.3598, attn_decoder_loss=0.2414, over 5805987.73 frames. ], batch size: 67, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:52:58,017 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.78 vs. limit=22.5 +2024-09-19 02:53:09,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=576140.0, ans=0.1 +2024-09-19 02:53:15,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=576140.0, ans=0.2 +2024-09-19 02:53:22,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=576180.0, ans=0.1 +2024-09-19 02:53:28,123 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.053e+01 8.581e+01 9.002e+01 9.610e+01 1.544e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 02:53:42,038 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 02:53:53,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.20 vs. limit=15.0 +2024-09-19 02:54:01,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=576260.0, ans=0.0 +2024-09-19 02:54:05,647 INFO [train.py:1198] (1/2) Epoch 32, batch 3800, loss[loss=0.2458, ctc_loss=0.1212, cr_loss=0.38, attn_decoder_loss=0.2511, over 29618.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1186, cr_loss=0.3596, attn_decoder_loss=0.2413, over 5798146.61 frames. ], batch size: 86, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:54:20,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=576340.0, ans=0.125 +2024-09-19 02:54:25,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=576340.0, ans=0.1 +2024-09-19 02:54:38,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=576380.0, ans=0.125 +2024-09-19 02:54:58,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=576420.0, ans=0.125 +2024-09-19 02:55:08,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=576460.0, ans=0.2 +2024-09-19 02:55:13,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=576460.0, ans=0.0 +2024-09-19 02:55:19,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=576460.0, ans=0.05 +2024-09-19 02:55:19,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=576460.0, ans=0.125 +2024-09-19 02:55:23,188 INFO [train.py:1198] (1/2) Epoch 32, batch 3850, loss[loss=0.2501, ctc_loss=0.1311, cr_loss=0.3795, attn_decoder_loss=0.2549, over 29322.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1186, cr_loss=0.3597, attn_decoder_loss=0.2413, over 5812429.52 frames. ], batch size: 100, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:55:41,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=576540.0, ans=0.0 +2024-09-19 02:55:53,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=576580.0, ans=0.125 +2024-09-19 02:56:00,149 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.855e+01 8.481e+01 8.994e+01 9.437e+01 1.418e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 02:56:01,122 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.12 vs. limit=15.0 +2024-09-19 02:56:30,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=576660.0, ans=0.125 +2024-09-19 02:56:34,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=576660.0, ans=0.025 +2024-09-19 02:56:37,440 INFO [train.py:1198] (1/2) Epoch 32, batch 3900, loss[loss=0.2483, ctc_loss=0.126, cr_loss=0.3776, attn_decoder_loss=0.2535, over 29628.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1189, cr_loss=0.3604, attn_decoder_loss=0.2418, over 5816952.15 frames. ], batch size: 86, lr: 3.44e-03, grad_scale: 8.0 +2024-09-19 02:57:05,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=576780.0, ans=0.125 +2024-09-19 02:57:15,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=576780.0, ans=0.0 +2024-09-19 02:57:30,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=576820.0, ans=10.0 +2024-09-19 02:57:42,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=576860.0, ans=0.125 +2024-09-19 02:57:52,055 INFO [train.py:1198] (1/2) Epoch 32, batch 3950, loss[loss=0.2512, ctc_loss=0.1293, cr_loss=0.3935, attn_decoder_loss=0.256, over 29462.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1187, cr_loss=0.3596, attn_decoder_loss=0.242, over 5836193.83 frames. ], batch size: 97, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 02:58:19,274 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.16 vs. limit=15.0 +2024-09-19 02:58:26,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=576980.0, ans=0.125 +2024-09-19 02:58:28,774 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.521e+01 9.029e+01 9.542e+01 2.820e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-19 02:58:39,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=577020.0, ans=0.125 +2024-09-19 02:58:39,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=577020.0, ans=0.2 +2024-09-19 02:59:05,617 INFO [train.py:1198] (1/2) Epoch 32, batch 4000, loss[loss=0.2268, ctc_loss=0.1116, cr_loss=0.3399, attn_decoder_loss=0.232, over 29513.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1189, cr_loss=0.3594, attn_decoder_loss=0.2419, over 5812916.01 frames. ], batch size: 74, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 02:59:08,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=577100.0, ans=0.125 +2024-09-19 02:59:19,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.90 vs. limit=22.5 +2024-09-19 02:59:26,748 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.15 vs. limit=15.0 +2024-09-19 02:59:29,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=577140.0, ans=0.0 +2024-09-19 03:00:05,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=577220.0, ans=0.125 +2024-09-19 03:00:09,649 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:00:13,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=577260.0, ans=0.2 +2024-09-19 03:00:22,474 INFO [train.py:1198] (1/2) Epoch 32, batch 4050, loss[loss=0.2561, ctc_loss=0.1495, cr_loss=0.3869, attn_decoder_loss=0.2594, over 20518.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.119, cr_loss=0.3595, attn_decoder_loss=0.2422, over 5796751.46 frames. ], batch size: 213, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:00:28,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=577300.0, ans=0.0 +2024-09-19 03:00:30,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=577300.0, ans=0.125 +2024-09-19 03:00:33,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.36 vs. limit=15.0 +2024-09-19 03:00:59,176 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.569e+01 8.593e+01 9.238e+01 9.964e+01 1.548e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-19 03:01:25,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=577460.0, ans=0.025 +2024-09-19 03:01:28,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.25 vs. limit=15.0 +2024-09-19 03:01:36,060 INFO [train.py:1198] (1/2) Epoch 32, batch 4100, loss[loss=0.2557, ctc_loss=0.132, cr_loss=0.383, attn_decoder_loss=0.2609, over 29506.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1192, cr_loss=0.3601, attn_decoder_loss=0.2423, over 5792163.52 frames. ], batch size: 90, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:01:36,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=577500.0, ans=0.0 +2024-09-19 03:01:38,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.35 vs. limit=15.0 +2024-09-19 03:01:56,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=577540.0, ans=0.1 +2024-09-19 03:01:59,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=577540.0, ans=0.125 +2024-09-19 03:02:35,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=577660.0, ans=0.025 +2024-09-19 03:02:39,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.52 vs. limit=22.5 +2024-09-19 03:02:50,335 INFO [train.py:1198] (1/2) Epoch 32, batch 4150, loss[loss=0.2321, ctc_loss=0.1145, cr_loss=0.3647, attn_decoder_loss=0.2371, over 29505.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.119, cr_loss=0.3596, attn_decoder_loss=0.2419, over 5797756.07 frames. ], batch size: 77, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:02:54,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=577700.0, ans=0.09899494936611666 +2024-09-19 03:03:02,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=577700.0, ans=0.0 +2024-09-19 03:03:05,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=577740.0, ans=0.1 +2024-09-19 03:03:26,923 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.412e+01 8.911e+01 9.455e+01 1.648e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 03:03:48,302 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.94 vs. limit=12.0 +2024-09-19 03:03:50,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=577860.0, ans=0.5 +2024-09-19 03:03:53,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=577860.0, ans=0.125 +2024-09-19 03:04:04,951 INFO [train.py:1198] (1/2) Epoch 32, batch 4200, loss[loss=0.2464, ctc_loss=0.1241, cr_loss=0.3817, attn_decoder_loss=0.2515, over 29510.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1192, cr_loss=0.3599, attn_decoder_loss=0.242, over 5799916.52 frames. ], batch size: 90, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:04:31,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=577940.0, ans=0.2 +2024-09-19 03:04:41,949 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.53 vs. limit=22.5 +2024-09-19 03:04:48,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=578020.0, ans=0.0 +2024-09-19 03:05:06,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=578060.0, ans=0.125 +2024-09-19 03:05:11,384 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.48 vs. limit=15.0 +2024-09-19 03:05:15,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=578060.0, ans=0.125 +2024-09-19 03:05:19,406 INFO [train.py:1198] (1/2) Epoch 32, batch 4250, loss[loss=0.2212, ctc_loss=0.1022, cr_loss=0.3159, attn_decoder_loss=0.2274, over 29513.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1194, cr_loss=0.3603, attn_decoder_loss=0.2425, over 5804922.34 frames. ], batch size: 74, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:05:51,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=578180.0, ans=0.1 +2024-09-19 03:05:57,460 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.485e+01 9.060e+01 9.670e+01 1.862e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 03:06:33,353 INFO [train.py:1198] (1/2) Epoch 32, batch 4300, loss[loss=0.2447, ctc_loss=0.1151, cr_loss=0.3596, attn_decoder_loss=0.2511, over 29551.00 frames. ], tot_loss[loss=0.2377, ctc_loss=0.1196, cr_loss=0.3611, attn_decoder_loss=0.2428, over 5793936.63 frames. ], batch size: 87, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:06:35,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.89 vs. limit=15.0 +2024-09-19 03:06:36,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=578300.0, ans=0.1 +2024-09-19 03:06:38,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=578300.0, ans=0.125 +2024-09-19 03:06:40,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.43 vs. limit=15.0 +2024-09-19 03:06:54,100 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:07:04,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=578380.0, ans=0.125 +2024-09-19 03:07:21,701 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=3.98 vs. limit=12.0 +2024-09-19 03:07:25,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=578420.0, ans=0.125 +2024-09-19 03:07:38,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=578460.0, ans=0.125 +2024-09-19 03:07:40,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=578460.0, ans=0.125 +2024-09-19 03:07:41,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=578460.0, ans=0.07 +2024-09-19 03:07:45,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=578460.0, ans=0.125 +2024-09-19 03:07:50,080 INFO [train.py:1198] (1/2) Epoch 32, batch 4350, loss[loss=0.2423, ctc_loss=0.1217, cr_loss=0.3675, attn_decoder_loss=0.2475, over 29459.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1221, cr_loss=0.3664, attn_decoder_loss=0.2459, over 5797182.97 frames. ], batch size: 97, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:07:58,327 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.06 vs. limit=10.0 +2024-09-19 03:08:03,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=578540.0, ans=0.2 +2024-09-19 03:08:25,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=578580.0, ans=0.125 +2024-09-19 03:08:28,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.091e+01 8.949e+01 9.418e+01 9.976e+01 1.682e+02, threshold=1.884e+02, percent-clipped=0.0 +2024-09-19 03:08:28,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=578580.0, ans=0.125 +2024-09-19 03:08:31,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=578580.0, ans=0.025 +2024-09-19 03:08:50,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=578660.0, ans=0.125 +2024-09-19 03:08:52,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=578660.0, ans=0.125 +2024-09-19 03:08:59,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=578660.0, ans=0.125 +2024-09-19 03:09:03,611 INFO [train.py:1198] (1/2) Epoch 32, batch 4400, loss[loss=0.2477, ctc_loss=0.1308, cr_loss=0.3755, attn_decoder_loss=0.2523, over 27288.00 frames. ], tot_loss[loss=0.2427, ctc_loss=0.1235, cr_loss=0.3693, attn_decoder_loss=0.2477, over 5768670.57 frames. ], batch size: 124, lr: 3.43e-03, grad_scale: 16.0 +2024-09-19 03:09:17,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=578740.0, ans=0.125 +2024-09-19 03:09:56,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.08 vs. limit=10.0 +2024-09-19 03:10:02,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=578860.0, ans=0.0 +2024-09-19 03:10:14,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=578860.0, ans=0.125 +2024-09-19 03:10:15,944 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:10:18,545 INFO [train.py:1198] (1/2) Epoch 32, batch 4450, loss[loss=0.2586, ctc_loss=0.151, cr_loss=0.4019, attn_decoder_loss=0.2616, over 19631.00 frames. ], tot_loss[loss=0.2452, ctc_loss=0.1275, cr_loss=0.3748, attn_decoder_loss=0.25, over 5573098.13 frames. ], batch size: 209, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:10:23,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=578900.0, ans=0.2 +2024-09-19 03:10:32,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=578940.0, ans=0.0 +2024-09-19 03:10:56,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=578980.0, ans=0.0 +2024-09-19 03:10:58,946 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.256e+01 9.217e+01 9.990e+01 1.147e+02 3.633e+02, threshold=1.998e+02, percent-clipped=4.0 +2024-09-19 03:11:19,927 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=10.17 vs. limit=12.0 +2024-09-19 03:11:29,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.26 vs. limit=15.0 +2024-09-19 03:11:34,138 INFO [train.py:1198] (1/2) Epoch 32, batch 4500, loss[loss=0.2594, ctc_loss=0.1541, cr_loss=0.3806, attn_decoder_loss=0.2627, over 19317.00 frames. ], tot_loss[loss=0.2473, ctc_loss=0.1308, cr_loss=0.3776, attn_decoder_loss=0.2519, over 5232297.40 frames. ], batch size: 209, lr: 3.43e-03, grad_scale: 8.0 +2024-09-19 03:11:37,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=579100.0, ans=0.0 +2024-09-19 03:11:43,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=579100.0, ans=0.2 +2024-09-19 03:13:03,884 INFO [train.py:1198] (1/2) Epoch 33, batch 0, loss[loss=0.2149, ctc_loss=0.09693, cr_loss=0.3242, attn_decoder_loss=0.2208, over 29598.00 frames. ], tot_loss[loss=0.2149, ctc_loss=0.09693, cr_loss=0.3242, attn_decoder_loss=0.2208, over 29598.00 frames. ], batch size: 73, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:13:03,885 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 03:13:20,655 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([3.8306, 3.4001, 3.6408, 3.2934], device='cuda:1') +2024-09-19 03:13:22,384 INFO [train.py:1230] (1/2) Epoch 33, validation: loss=0.2131, ctc_loss=0.03625, cr_loss=6.2e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-19 03:13:22,385 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 03:13:24,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=579200.0, ans=0.125 +2024-09-19 03:13:26,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=579200.0, ans=0.0 +2024-09-19 03:13:43,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=579240.0, ans=0.125 +2024-09-19 03:14:06,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.54 vs. limit=15.0 +2024-09-19 03:14:16,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=579320.0, ans=0.0 +2024-09-19 03:14:30,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.16 vs. limit=15.0 +2024-09-19 03:14:31,539 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:14:38,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.66 vs. limit=12.0 +2024-09-19 03:14:38,706 INFO [train.py:1198] (1/2) Epoch 33, batch 50, loss[loss=0.2122, ctc_loss=0.1034, cr_loss=0.3276, attn_decoder_loss=0.2171, over 29444.00 frames. ], tot_loss[loss=0.2397, ctc_loss=0.1235, cr_loss=0.371, attn_decoder_loss=0.2444, over 1269184.28 frames. ], batch size: 70, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:14:43,364 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 9.277e+01 1.031e+02 1.119e+02 2.001e+02, threshold=2.062e+02, percent-clipped=1.0 +2024-09-19 03:14:46,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.66 vs. limit=22.5 +2024-09-19 03:15:05,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.65 vs. limit=15.0 +2024-09-19 03:15:24,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=579520.0, ans=0.0 +2024-09-19 03:15:26,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.46 vs. limit=15.0 +2024-09-19 03:15:38,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=579560.0, ans=0.2 +2024-09-19 03:15:54,966 INFO [train.py:1198] (1/2) Epoch 33, batch 100, loss[loss=0.2324, ctc_loss=0.1117, cr_loss=0.3458, attn_decoder_loss=0.2382, over 29540.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1236, cr_loss=0.3706, attn_decoder_loss=0.2458, over 2253565.37 frames. ], batch size: 76, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:15:55,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=579600.0, ans=22.5 +2024-09-19 03:15:59,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=579600.0, ans=0.125 +2024-09-19 03:16:03,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.94 vs. limit=6.0 +2024-09-19 03:16:22,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=579640.0, ans=0.0 +2024-09-19 03:16:24,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=579640.0, ans=0.5 +2024-09-19 03:16:27,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=579680.0, ans=0.125 +2024-09-19 03:16:28,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=579680.0, ans=0.125 +2024-09-19 03:16:31,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.49 vs. limit=15.0 +2024-09-19 03:16:46,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=579720.0, ans=0.015 +2024-09-19 03:16:58,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.62 vs. limit=15.0 +2024-09-19 03:17:07,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=579760.0, ans=0.1 +2024-09-19 03:17:10,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=579800.0, ans=0.125 +2024-09-19 03:17:11,875 INFO [train.py:1198] (1/2) Epoch 33, batch 150, loss[loss=0.2111, ctc_loss=0.09349, cr_loss=0.3069, attn_decoder_loss=0.2174, over 29431.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1214, cr_loss=0.3652, attn_decoder_loss=0.2437, over 3047867.68 frames. ], batch size: 70, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:17:12,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=579800.0, ans=0.125 +2024-09-19 03:17:16,301 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.420e+01 8.477e+01 8.945e+01 9.593e+01 9.750e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-19 03:17:22,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=579800.0, ans=0.1 +2024-09-19 03:17:28,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=579840.0, ans=0.125 +2024-09-19 03:17:47,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=579880.0, ans=10.0 +2024-09-19 03:18:23,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=579960.0, ans=0.125 +2024-09-19 03:18:25,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=579960.0, ans=0.025 +2024-09-19 03:18:28,278 INFO [train.py:1198] (1/2) Epoch 33, batch 200, loss[loss=0.2557, ctc_loss=0.139, cr_loss=0.4011, attn_decoder_loss=0.2597, over 27339.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1201, cr_loss=0.3623, attn_decoder_loss=0.2424, over 3659146.54 frames. ], batch size: 124, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:18:52,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=580040.0, ans=0.2 +2024-09-19 03:18:57,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.01 vs. limit=6.0 +2024-09-19 03:18:59,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.27 vs. limit=15.0 +2024-09-19 03:19:13,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=580120.0, ans=0.125 +2024-09-19 03:19:30,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=580160.0, ans=0.025 +2024-09-19 03:19:38,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=580160.0, ans=0.025 +2024-09-19 03:19:43,961 INFO [train.py:1198] (1/2) Epoch 33, batch 250, loss[loss=0.2611, ctc_loss=0.1305, cr_loss=0.3892, attn_decoder_loss=0.267, over 29214.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1197, cr_loss=0.3618, attn_decoder_loss=0.2423, over 4140597.47 frames. ], batch size: 100, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:19:48,590 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.257e+01 8.698e+01 9.269e+01 2.011e+02, threshold=1.740e+02, percent-clipped=1.0 +2024-09-19 03:19:58,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=580240.0, ans=0.0 +2024-09-19 03:20:00,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.69 vs. limit=12.0 +2024-09-19 03:20:16,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=580280.0, ans=0.025 +2024-09-19 03:20:17,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=580280.0, ans=0.025 +2024-09-19 03:20:44,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=580320.0, ans=0.025 +2024-09-19 03:20:44,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=580320.0, ans=0.0 +2024-09-19 03:20:47,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=580360.0, ans=0.0 +2024-09-19 03:21:02,339 INFO [train.py:1198] (1/2) Epoch 33, batch 300, loss[loss=0.2559, ctc_loss=0.1421, cr_loss=0.4226, attn_decoder_loss=0.2592, over 29543.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1188, cr_loss=0.3601, attn_decoder_loss=0.2418, over 4509419.61 frames. ], batch size: 92, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:21:17,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=580440.0, ans=0.0 +2024-09-19 03:21:22,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=580440.0, ans=0.0 +2024-09-19 03:21:22,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=580440.0, ans=0.2 +2024-09-19 03:21:26,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=580440.0, ans=0.2 +2024-09-19 03:21:30,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.20 vs. limit=15.0 +2024-09-19 03:21:36,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=580480.0, ans=0.125 +2024-09-19 03:21:36,513 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:21:37,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.79 vs. limit=22.5 +2024-09-19 03:21:37,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=580480.0, ans=0.0 +2024-09-19 03:21:42,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=580480.0, ans=0.0 +2024-09-19 03:22:12,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.75 vs. limit=15.0 +2024-09-19 03:22:20,296 INFO [train.py:1198] (1/2) Epoch 33, batch 350, loss[loss=0.2136, ctc_loss=0.09791, cr_loss=0.3235, attn_decoder_loss=0.2192, over 29352.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1185, cr_loss=0.3595, attn_decoder_loss=0.2419, over 4794280.08 frames. ], batch size: 71, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:22:24,712 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.206e+01 8.463e+01 8.888e+01 9.398e+01 1.588e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 03:22:29,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=580600.0, ans=0.125 +2024-09-19 03:22:40,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-19 03:23:24,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=580760.0, ans=0.0 +2024-09-19 03:23:28,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=580760.0, ans=0.025 +2024-09-19 03:23:36,283 INFO [train.py:1198] (1/2) Epoch 33, batch 400, loss[loss=0.2433, ctc_loss=0.1243, cr_loss=0.3752, attn_decoder_loss=0.2482, over 29699.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1182, cr_loss=0.3593, attn_decoder_loss=0.2417, over 5024396.91 frames. ], batch size: 82, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:23:41,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.65 vs. limit=15.0 +2024-09-19 03:23:44,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=580800.0, ans=0.025 +2024-09-19 03:23:50,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=580840.0, ans=0.2 +2024-09-19 03:24:06,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=580880.0, ans=0.1 +2024-09-19 03:24:30,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=580920.0, ans=0.125 +2024-09-19 03:24:37,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.39 vs. limit=15.0 +2024-09-19 03:24:54,730 INFO [train.py:1198] (1/2) Epoch 33, batch 450, loss[loss=0.2407, ctc_loss=0.1226, cr_loss=0.3685, attn_decoder_loss=0.2456, over 29700.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1187, cr_loss=0.3604, attn_decoder_loss=0.242, over 5186917.37 frames. ], batch size: 83, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:25:00,690 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.321e+01 8.447e+01 9.007e+01 9.616e+01 1.601e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 03:25:04,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=581000.0, ans=0.025 +2024-09-19 03:25:05,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=581000.0, ans=0.0 +2024-09-19 03:25:25,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=581080.0, ans=0.2 +2024-09-19 03:25:27,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.55 vs. limit=15.0 +2024-09-19 03:25:49,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=581120.0, ans=0.125 +2024-09-19 03:26:12,851 INFO [train.py:1198] (1/2) Epoch 33, batch 500, loss[loss=0.251, ctc_loss=0.1247, cr_loss=0.3725, attn_decoder_loss=0.2567, over 29446.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1184, cr_loss=0.3597, attn_decoder_loss=0.2414, over 5329791.36 frames. ], batch size: 94, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:26:13,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=581200.0, ans=0.125 +2024-09-19 03:26:17,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=581200.0, ans=0.125 +2024-09-19 03:26:37,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=581240.0, ans=0.0 +2024-09-19 03:26:54,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.59 vs. limit=15.0 +2024-09-19 03:26:57,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=581320.0, ans=0.125 +2024-09-19 03:27:19,686 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:27:25,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=581360.0, ans=0.025 +2024-09-19 03:27:28,657 INFO [train.py:1198] (1/2) Epoch 33, batch 550, loss[loss=0.2518, ctc_loss=0.1303, cr_loss=0.3871, attn_decoder_loss=0.2567, over 28820.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1187, cr_loss=0.3601, attn_decoder_loss=0.2417, over 5422255.15 frames. ], batch size: 104, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:27:34,829 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.333e+01 9.017e+01 9.436e+01 4.024e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 03:27:41,154 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:27:54,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.37 vs. limit=12.0 +2024-09-19 03:28:02,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=581480.0, ans=0.125 +2024-09-19 03:28:03,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=581480.0, ans=0.125 +2024-09-19 03:28:10,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=581480.0, ans=0.0 +2024-09-19 03:28:27,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=581520.0, ans=0.125 +2024-09-19 03:28:41,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=581560.0, ans=0.1 +2024-09-19 03:28:47,663 INFO [train.py:1198] (1/2) Epoch 33, batch 600, loss[loss=0.2467, ctc_loss=0.1229, cr_loss=0.3601, attn_decoder_loss=0.2524, over 29236.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1188, cr_loss=0.3605, attn_decoder_loss=0.2419, over 5508141.45 frames. ], batch size: 100, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:28:49,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=581600.0, ans=0.2 +2024-09-19 03:29:10,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=581640.0, ans=0.125 +2024-09-19 03:29:28,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.82 vs. limit=15.0 +2024-09-19 03:29:45,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=581720.0, ans=0.125 +2024-09-19 03:29:56,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=581760.0, ans=0.0 +2024-09-19 03:30:05,157 INFO [train.py:1198] (1/2) Epoch 33, batch 650, loss[loss=0.2369, ctc_loss=0.1189, cr_loss=0.3673, attn_decoder_loss=0.2419, over 29758.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.118, cr_loss=0.3591, attn_decoder_loss=0.2412, over 5586103.12 frames. ], batch size: 81, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:30:11,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.439e+01 8.577e+01 8.986e+01 9.488e+01 1.360e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 03:30:37,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=581880.0, ans=0.0 +2024-09-19 03:31:21,279 INFO [train.py:1198] (1/2) Epoch 33, batch 700, loss[loss=0.2284, ctc_loss=0.1127, cr_loss=0.3519, attn_decoder_loss=0.2335, over 29567.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1183, cr_loss=0.3602, attn_decoder_loss=0.2416, over 5636971.74 frames. ], batch size: 76, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:32:22,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=582160.0, ans=0.0 +2024-09-19 03:32:30,981 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.12 vs. limit=10.0 +2024-09-19 03:32:39,391 INFO [train.py:1198] (1/2) Epoch 33, batch 750, loss[loss=0.2439, ctc_loss=0.121, cr_loss=0.3616, attn_decoder_loss=0.2495, over 29720.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2415, over 5677162.30 frames. ], batch size: 82, lr: 3.37e-03, grad_scale: 8.0 +2024-09-19 03:32:42,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=582200.0, ans=0.0 +2024-09-19 03:32:44,033 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:32:46,693 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.188e+01 8.541e+01 8.897e+01 9.394e+01 1.704e+02, threshold=1.779e+02, percent-clipped=0.0 +2024-09-19 03:32:50,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582200.0, ans=0.1 +2024-09-19 03:32:56,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=582240.0, ans=0.125 +2024-09-19 03:33:03,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=582240.0, ans=0.025 +2024-09-19 03:33:06,497 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:33:09,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=582280.0, ans=0.2 +2024-09-19 03:33:26,545 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.34 vs. limit=10.0 +2024-09-19 03:33:47,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=582360.0, ans=0.125 +2024-09-19 03:33:48,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=582360.0, ans=0.125 +2024-09-19 03:33:50,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=582360.0, ans=0.125 +2024-09-19 03:33:57,742 INFO [train.py:1198] (1/2) Epoch 33, batch 800, loss[loss=0.2155, ctc_loss=0.09827, cr_loss=0.3034, attn_decoder_loss=0.2218, over 29612.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1186, cr_loss=0.3606, attn_decoder_loss=0.2417, over 5707187.33 frames. ], batch size: 73, lr: 3.37e-03, grad_scale: 16.0 +2024-09-19 03:34:13,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=582440.0, ans=0.125 +2024-09-19 03:34:20,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=582440.0, ans=0.125 +2024-09-19 03:34:41,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.71 vs. limit=22.5 +2024-09-19 03:34:42,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582520.0, ans=0.1 +2024-09-19 03:34:57,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=12.0 +2024-09-19 03:34:58,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=582560.0, ans=0.0 +2024-09-19 03:35:00,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=582560.0, ans=0.1 +2024-09-19 03:35:13,258 INFO [train.py:1198] (1/2) Epoch 33, batch 850, loss[loss=0.2407, ctc_loss=0.1174, cr_loss=0.3703, attn_decoder_loss=0.2461, over 29706.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1181, cr_loss=0.3596, attn_decoder_loss=0.2411, over 5736417.53 frames. ], batch size: 89, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:35:17,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=582600.0, ans=0.025 +2024-09-19 03:35:20,678 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.452e+01 8.956e+01 9.635e+01 2.624e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-19 03:35:21,095 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:35:25,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=582600.0, ans=10.0 +2024-09-19 03:35:36,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.85 vs. limit=12.0 +2024-09-19 03:35:49,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=582680.0, ans=0.125 +2024-09-19 03:35:49,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=582680.0, ans=0.125 +2024-09-19 03:36:01,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.61 vs. limit=22.5 +2024-09-19 03:36:05,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=582720.0, ans=0.125 +2024-09-19 03:36:27,163 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:36:31,364 INFO [train.py:1198] (1/2) Epoch 33, batch 900, loss[loss=0.2278, ctc_loss=0.1089, cr_loss=0.3543, attn_decoder_loss=0.2331, over 29573.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1183, cr_loss=0.3596, attn_decoder_loss=0.2414, over 5741989.94 frames. ], batch size: 73, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:36:31,687 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:36:42,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=582800.0, ans=0.125 +2024-09-19 03:36:52,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=582840.0, ans=0.1 +2024-09-19 03:36:57,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=582840.0, ans=0.125 +2024-09-19 03:36:59,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.70 vs. limit=15.0 +2024-09-19 03:37:01,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=582880.0, ans=0.0 +2024-09-19 03:37:06,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=582880.0, ans=0.125 +2024-09-19 03:37:19,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.62 vs. limit=15.0 +2024-09-19 03:37:22,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=582920.0, ans=0.125 +2024-09-19 03:37:26,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=582920.0, ans=0.5 +2024-09-19 03:37:48,790 INFO [train.py:1198] (1/2) Epoch 33, batch 950, loss[loss=0.2234, ctc_loss=0.1137, cr_loss=0.3533, attn_decoder_loss=0.2278, over 29517.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.118, cr_loss=0.359, attn_decoder_loss=0.2413, over 5743358.79 frames. ], batch size: 74, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:37:52,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=583000.0, ans=0.1 +2024-09-19 03:37:54,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=583000.0, ans=0.025 +2024-09-19 03:37:56,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.465e+01 9.060e+01 1.004e+02 2.208e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 03:38:20,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=583080.0, ans=0.125 +2024-09-19 03:39:02,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-19 03:39:04,616 INFO [train.py:1198] (1/2) Epoch 33, batch 1000, loss[loss=0.2411, ctc_loss=0.1179, cr_loss=0.3611, attn_decoder_loss=0.2468, over 29508.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1184, cr_loss=0.3599, attn_decoder_loss=0.2419, over 5737188.52 frames. ], batch size: 77, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:39:07,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=583200.0, ans=0.2 +2024-09-19 03:39:12,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=583200.0, ans=0.05 +2024-09-19 03:39:23,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=583240.0, ans=0.125 +2024-09-19 03:39:23,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=583240.0, ans=0.0 +2024-09-19 03:39:26,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=583240.0, ans=0.1 +2024-09-19 03:39:29,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=583240.0, ans=0.125 +2024-09-19 03:39:32,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=583240.0, ans=0.0 +2024-09-19 03:39:41,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=583280.0, ans=0.125 +2024-09-19 03:40:04,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=583360.0, ans=0.0 +2024-09-19 03:40:05,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=583360.0, ans=0.125 +2024-09-19 03:40:05,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=583360.0, ans=0.07 +2024-09-19 03:40:15,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=583360.0, ans=0.125 +2024-09-19 03:40:22,696 INFO [train.py:1198] (1/2) Epoch 33, batch 1050, loss[loss=0.2419, ctc_loss=0.1177, cr_loss=0.3643, attn_decoder_loss=0.2476, over 29676.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1183, cr_loss=0.3602, attn_decoder_loss=0.2414, over 5744706.77 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:40:32,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=583400.0, ans=0.0 +2024-09-19 03:40:33,196 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.604e+01 9.076e+01 9.577e+01 3.537e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 03:40:42,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.10 vs. limit=6.0 +2024-09-19 03:40:43,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.72 vs. limit=22.5 +2024-09-19 03:40:54,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=583480.0, ans=0.125 +2024-09-19 03:40:54,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=583480.0, ans=0.1 +2024-09-19 03:41:32,510 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.95 vs. limit=15.0 +2024-09-19 03:41:40,572 INFO [train.py:1198] (1/2) Epoch 33, batch 1100, loss[loss=0.2375, ctc_loss=0.1219, cr_loss=0.3793, attn_decoder_loss=0.242, over 29458.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1182, cr_loss=0.3594, attn_decoder_loss=0.2411, over 5756686.69 frames. ], batch size: 78, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:41:54,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=583640.0, ans=0.025 +2024-09-19 03:41:54,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=583640.0, ans=0.125 +2024-09-19 03:42:03,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=583640.0, ans=0.5 +2024-09-19 03:42:08,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=583640.0, ans=0.1 +2024-09-19 03:42:23,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.94 vs. limit=22.5 +2024-09-19 03:42:29,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=583720.0, ans=0.125 +2024-09-19 03:42:38,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=583720.0, ans=0.0 +2024-09-19 03:42:52,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=583760.0, ans=0.07 +2024-09-19 03:42:55,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=583800.0, ans=0.1 +2024-09-19 03:42:56,359 INFO [train.py:1198] (1/2) Epoch 33, batch 1150, loss[loss=0.2363, ctc_loss=0.1126, cr_loss=0.3451, attn_decoder_loss=0.2423, over 29448.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1183, cr_loss=0.3596, attn_decoder_loss=0.2413, over 5754090.82 frames. ], batch size: 78, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:43:06,973 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.330e+01 8.417e+01 8.891e+01 9.458e+01 2.719e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 03:43:10,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=583840.0, ans=0.2 +2024-09-19 03:43:33,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=583880.0, ans=0.0 +2024-09-19 03:43:43,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=583920.0, ans=0.07 +2024-09-19 03:43:47,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.53 vs. limit=22.5 +2024-09-19 03:44:13,008 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:44:14,585 INFO [train.py:1198] (1/2) Epoch 33, batch 1200, loss[loss=0.2515, ctc_loss=0.1275, cr_loss=0.3982, attn_decoder_loss=0.2564, over 29678.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1189, cr_loss=0.3607, attn_decoder_loss=0.242, over 5745627.05 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:44:27,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=584000.0, ans=0.1 +2024-09-19 03:45:04,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=584120.0, ans=0.125 +2024-09-19 03:45:05,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=584120.0, ans=0.1 +2024-09-19 03:45:28,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=584160.0, ans=0.0 +2024-09-19 03:45:32,641 INFO [train.py:1198] (1/2) Epoch 33, batch 1250, loss[loss=0.2432, ctc_loss=0.1245, cr_loss=0.3578, attn_decoder_loss=0.2485, over 29551.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1189, cr_loss=0.3608, attn_decoder_loss=0.2424, over 5773684.37 frames. ], batch size: 92, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:45:37,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=584200.0, ans=0.125 +2024-09-19 03:45:43,420 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.568e+01 9.117e+01 9.876e+01 2.169e+02, threshold=1.823e+02, percent-clipped=3.0 +2024-09-19 03:45:43,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=584200.0, ans=0.125 +2024-09-19 03:45:51,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.11 vs. limit=15.0 +2024-09-19 03:46:40,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=584360.0, ans=0.125 +2024-09-19 03:46:45,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.55 vs. limit=10.0 +2024-09-19 03:46:48,666 INFO [train.py:1198] (1/2) Epoch 33, batch 1300, loss[loss=0.2372, ctc_loss=0.1099, cr_loss=0.3165, attn_decoder_loss=0.2444, over 28349.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1188, cr_loss=0.3604, attn_decoder_loss=0.2418, over 5779039.59 frames. ], batch size: 111, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:47:01,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=584400.0, ans=0.125 +2024-09-19 03:47:04,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=584440.0, ans=0.125 +2024-09-19 03:47:27,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=584480.0, ans=0.1 +2024-09-19 03:47:54,835 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.63 vs. limit=15.0 +2024-09-19 03:48:03,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=584600.0, ans=0.95 +2024-09-19 03:48:04,741 INFO [train.py:1198] (1/2) Epoch 33, batch 1350, loss[loss=0.2269, ctc_loss=0.1076, cr_loss=0.3323, attn_decoder_loss=0.2327, over 29754.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1183, cr_loss=0.3595, attn_decoder_loss=0.2416, over 5795690.74 frames. ], batch size: 81, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:48:17,533 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.274e+01 8.749e+01 9.320e+01 1.394e+02, threshold=1.750e+02, percent-clipped=0.0 +2024-09-19 03:48:35,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=584680.0, ans=0.125 +2024-09-19 03:48:35,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=584680.0, ans=0.125 +2024-09-19 03:48:50,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=584720.0, ans=0.125 +2024-09-19 03:49:03,313 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:49:07,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=584760.0, ans=0.125 +2024-09-19 03:49:24,971 INFO [train.py:1198] (1/2) Epoch 33, batch 1400, loss[loss=0.2152, ctc_loss=0.1061, cr_loss=0.3207, attn_decoder_loss=0.2201, over 29564.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1182, cr_loss=0.3592, attn_decoder_loss=0.2415, over 5807023.37 frames. ], batch size: 69, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:49:29,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=584800.0, ans=0.125 +2024-09-19 03:49:46,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=584840.0, ans=0.0 +2024-09-19 03:49:50,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=584840.0, ans=0.2 +2024-09-19 03:49:59,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=584880.0, ans=0.125 +2024-09-19 03:50:13,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=584920.0, ans=0.1 +2024-09-19 03:50:22,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=584920.0, ans=0.04949747468305833 +2024-09-19 03:50:40,324 INFO [train.py:1198] (1/2) Epoch 33, batch 1450, loss[loss=0.2566, ctc_loss=0.1416, cr_loss=0.4125, attn_decoder_loss=0.2602, over 29446.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1182, cr_loss=0.3591, attn_decoder_loss=0.2417, over 5803881.22 frames. ], batch size: 94, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:50:41,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.92 vs. limit=22.5 +2024-09-19 03:50:50,830 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.392e+01 8.954e+01 9.384e+01 1.541e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 03:50:53,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.47 vs. limit=15.0 +2024-09-19 03:50:55,087 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.88 vs. limit=12.0 +2024-09-19 03:51:03,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=585040.0, ans=0.0 +2024-09-19 03:51:12,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=585080.0, ans=0.0 +2024-09-19 03:51:22,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=585080.0, ans=0.125 +2024-09-19 03:51:29,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=585120.0, ans=0.125 +2024-09-19 03:51:48,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=585160.0, ans=0.0 +2024-09-19 03:51:51,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=585160.0, ans=0.2 +2024-09-19 03:51:52,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=585160.0, ans=0.0 +2024-09-19 03:51:55,821 INFO [train.py:1198] (1/2) Epoch 33, batch 1500, loss[loss=0.2453, ctc_loss=0.1242, cr_loss=0.3541, attn_decoder_loss=0.2508, over 29637.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1184, cr_loss=0.3594, attn_decoder_loss=0.242, over 5805098.72 frames. ], batch size: 86, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:52:02,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=585200.0, ans=0.0 +2024-09-19 03:52:19,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=585240.0, ans=0.125 +2024-09-19 03:52:33,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.85 vs. limit=15.0 +2024-09-19 03:52:54,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=585320.0, ans=0.2 +2024-09-19 03:52:54,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=585320.0, ans=0.2 +2024-09-19 03:53:06,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=585360.0, ans=0.125 +2024-09-19 03:53:15,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.16 vs. limit=15.0 +2024-09-19 03:53:16,547 INFO [train.py:1198] (1/2) Epoch 33, batch 1550, loss[loss=0.2597, ctc_loss=0.1411, cr_loss=0.4137, attn_decoder_loss=0.2636, over 29535.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.119, cr_loss=0.3604, attn_decoder_loss=0.2421, over 5780775.08 frames. ], batch size: 90, lr: 3.36e-03, grad_scale: 8.0 +2024-09-19 03:53:27,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=585400.0, ans=0.1 +2024-09-19 03:53:28,682 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.266e+01 8.624e+01 9.001e+01 9.537e+01 4.675e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 03:53:32,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:53:53,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=585480.0, ans=0.125 +2024-09-19 03:54:05,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=585520.0, ans=0.125 +2024-09-19 03:54:10,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=585520.0, ans=0.125 +2024-09-19 03:54:12,482 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.54 vs. limit=10.0 +2024-09-19 03:54:30,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=585600.0, ans=0.0 +2024-09-19 03:54:32,528 INFO [train.py:1198] (1/2) Epoch 33, batch 1600, loss[loss=0.2473, ctc_loss=0.1219, cr_loss=0.3764, attn_decoder_loss=0.2528, over 29671.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1192, cr_loss=0.3605, attn_decoder_loss=0.2422, over 5764620.16 frames. ], batch size: 85, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:54:36,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.49 vs. limit=10.0 +2024-09-19 03:54:37,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=585600.0, ans=0.0 +2024-09-19 03:54:38,231 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.38 vs. limit=15.0 +2024-09-19 03:54:55,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=585640.0, ans=0.0 +2024-09-19 03:55:06,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=585680.0, ans=0.125 +2024-09-19 03:55:21,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=585720.0, ans=0.0 +2024-09-19 03:55:35,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=585760.0, ans=0.125 +2024-09-19 03:55:48,309 INFO [train.py:1198] (1/2) Epoch 33, batch 1650, loss[loss=0.2494, ctc_loss=0.1255, cr_loss=0.3929, attn_decoder_loss=0.2544, over 29682.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1192, cr_loss=0.3611, attn_decoder_loss=0.2422, over 5759240.85 frames. ], batch size: 89, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:56:02,791 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.482e+01 8.668e+01 9.020e+01 9.711e+01 1.996e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-19 03:56:03,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=585800.0, ans=0.125 +2024-09-19 03:56:12,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=585840.0, ans=0.025 +2024-09-19 03:56:57,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.19 vs. limit=6.0 +2024-09-19 03:56:57,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.83 vs. limit=6.0 +2024-09-19 03:56:59,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=585960.0, ans=0.125 +2024-09-19 03:57:08,345 INFO [train.py:1198] (1/2) Epoch 33, batch 1700, loss[loss=0.2102, ctc_loss=0.09916, cr_loss=0.3241, attn_decoder_loss=0.2154, over 29560.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1186, cr_loss=0.3599, attn_decoder_loss=0.2419, over 5780665.42 frames. ], batch size: 69, lr: 3.36e-03, grad_scale: 16.0 +2024-09-19 03:57:16,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=586000.0, ans=0.125 +2024-09-19 03:57:46,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=586080.0, ans=0.1 +2024-09-19 03:57:56,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=586120.0, ans=0.125 +2024-09-19 03:58:23,847 INFO [train.py:1198] (1/2) Epoch 33, batch 1750, loss[loss=0.2098, ctc_loss=0.09804, cr_loss=0.3183, attn_decoder_loss=0.2152, over 29321.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1185, cr_loss=0.36, attn_decoder_loss=0.2415, over 5789751.53 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 03:58:37,583 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.205e+01 8.452e+01 8.998e+01 9.448e+01 1.573e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-19 03:58:56,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=586280.0, ans=0.0 +2024-09-19 03:59:02,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=586280.0, ans=0.125 +2024-09-19 03:59:23,804 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:59:31,380 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 03:59:34,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=586360.0, ans=0.125 +2024-09-19 03:59:40,295 INFO [train.py:1198] (1/2) Epoch 33, batch 1800, loss[loss=0.2462, ctc_loss=0.1233, cr_loss=0.386, attn_decoder_loss=0.2513, over 29706.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1186, cr_loss=0.3607, attn_decoder_loss=0.2418, over 5791937.49 frames. ], batch size: 83, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 03:59:45,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=586400.0, ans=0.0 +2024-09-19 04:00:00,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=586440.0, ans=0.125 +2024-09-19 04:00:11,916 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:00:26,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=586520.0, ans=0.125 +2024-09-19 04:00:39,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=586520.0, ans=0.125 +2024-09-19 04:01:00,448 INFO [train.py:1198] (1/2) Epoch 33, batch 1850, loss[loss=0.2537, ctc_loss=0.1255, cr_loss=0.3782, attn_decoder_loss=0.2595, over 29657.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.119, cr_loss=0.3612, attn_decoder_loss=0.2419, over 5795500.11 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:01:00,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff2.min_abs, batch_count=586600.0, ans=0.1 +2024-09-19 04:01:12,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=586600.0, ans=0.2 +2024-09-19 04:01:13,844 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.226e+01 8.429e+01 8.891e+01 9.502e+01 1.976e+02, threshold=1.778e+02, percent-clipped=1.0 +2024-09-19 04:01:30,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=586680.0, ans=0.0 +2024-09-19 04:01:48,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=586720.0, ans=0.125 +2024-09-19 04:01:51,009 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.65 vs. limit=15.0 +2024-09-19 04:02:11,635 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:02:13,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=586760.0, ans=0.0 +2024-09-19 04:02:13,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=586760.0, ans=0.125 +2024-09-19 04:02:15,710 INFO [train.py:1198] (1/2) Epoch 33, batch 1900, loss[loss=0.2498, ctc_loss=0.1303, cr_loss=0.3985, attn_decoder_loss=0.2542, over 29685.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1189, cr_loss=0.3613, attn_decoder_loss=0.2421, over 5802932.21 frames. ], batch size: 89, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:02:19,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=586800.0, ans=0.1 +2024-09-19 04:02:44,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=586880.0, ans=0.125 +2024-09-19 04:03:04,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=586920.0, ans=0.125 +2024-09-19 04:03:20,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.93 vs. limit=15.0 +2024-09-19 04:03:31,173 INFO [train.py:1198] (1/2) Epoch 33, batch 1950, loss[loss=0.2398, ctc_loss=0.1337, cr_loss=0.3891, attn_decoder_loss=0.2429, over 29435.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1194, cr_loss=0.3625, attn_decoder_loss=0.2433, over 5817845.72 frames. ], batch size: 78, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:03:39,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=587000.0, ans=0.025 +2024-09-19 04:03:44,771 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.530e+01 9.165e+01 9.739e+01 1.607e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-19 04:03:46,050 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.68 vs. limit=15.0 +2024-09-19 04:04:03,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=587080.0, ans=0.5 +2024-09-19 04:04:14,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=587080.0, ans=0.0 +2024-09-19 04:04:41,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=587160.0, ans=0.125 +2024-09-19 04:04:42,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=587160.0, ans=0.125 +2024-09-19 04:04:43,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=587160.0, ans=0.0 +2024-09-19 04:04:51,439 INFO [train.py:1198] (1/2) Epoch 33, batch 2000, loss[loss=0.2089, ctc_loss=0.09566, cr_loss=0.31, attn_decoder_loss=0.2146, over 29370.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1197, cr_loss=0.3631, attn_decoder_loss=0.2435, over 5796192.22 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:05:06,344 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.59 vs. limit=15.0 +2024-09-19 04:05:08,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.32 vs. limit=22.5 +2024-09-19 04:05:14,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.74 vs. limit=15.0 +2024-09-19 04:05:18,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=587240.0, ans=0.125 +2024-09-19 04:05:37,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=587320.0, ans=0.125 +2024-09-19 04:05:57,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=587360.0, ans=0.0 +2024-09-19 04:06:07,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.40 vs. limit=10.0 +2024-09-19 04:06:07,787 INFO [train.py:1198] (1/2) Epoch 33, batch 2050, loss[loss=0.2144, ctc_loss=0.1048, cr_loss=0.327, attn_decoder_loss=0.2193, over 29441.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1194, cr_loss=0.3621, attn_decoder_loss=0.2426, over 5789336.00 frames. ], batch size: 70, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:06:11,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=587400.0, ans=0.0 +2024-09-19 04:06:21,349 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.725e+01 9.262e+01 9.868e+01 2.043e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 04:06:29,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=587440.0, ans=0.125 +2024-09-19 04:06:30,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=587440.0, ans=0.125 +2024-09-19 04:07:05,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=587520.0, ans=0.0 +2024-09-19 04:07:19,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=587560.0, ans=0.025 +2024-09-19 04:07:23,428 INFO [train.py:1198] (1/2) Epoch 33, batch 2100, loss[loss=0.2385, ctc_loss=0.1258, cr_loss=0.3816, attn_decoder_loss=0.2426, over 29746.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1188, cr_loss=0.3605, attn_decoder_loss=0.242, over 5800144.84 frames. ], batch size: 81, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:07:34,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=587600.0, ans=0.1 +2024-09-19 04:07:39,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.02 vs. limit=15.0 +2024-09-19 04:07:41,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=587640.0, ans=0.125 +2024-09-19 04:07:43,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.96 vs. limit=12.0 +2024-09-19 04:07:46,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=587640.0, ans=0.0 +2024-09-19 04:07:53,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=587680.0, ans=0.125 +2024-09-19 04:07:53,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=587680.0, ans=0.125 +2024-09-19 04:08:02,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=587680.0, ans=0.0 +2024-09-19 04:08:03,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=587680.0, ans=0.125 +2024-09-19 04:08:09,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=587720.0, ans=0.125 +2024-09-19 04:08:28,270 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:08:42,922 INFO [train.py:1198] (1/2) Epoch 33, batch 2150, loss[loss=0.2352, ctc_loss=0.1213, cr_loss=0.3526, attn_decoder_loss=0.24, over 29483.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1181, cr_loss=0.3597, attn_decoder_loss=0.2416, over 5814842.63 frames. ], batch size: 78, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:08:47,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=587800.0, ans=0.125 +2024-09-19 04:08:55,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=587800.0, ans=0.1 +2024-09-19 04:08:56,496 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.296e+01 8.476e+01 8.968e+01 9.482e+01 1.071e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 04:08:57,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.16 vs. limit=6.0 +2024-09-19 04:09:40,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=587920.0, ans=0.1 +2024-09-19 04:09:58,817 INFO [train.py:1198] (1/2) Epoch 33, batch 2200, loss[loss=0.2518, ctc_loss=0.1339, cr_loss=0.3923, attn_decoder_loss=0.2562, over 29627.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1181, cr_loss=0.3595, attn_decoder_loss=0.2417, over 5811640.34 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:09:59,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588000.0, ans=0.1 +2024-09-19 04:10:00,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=588000.0, ans=0.1 +2024-09-19 04:10:17,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.65 vs. limit=15.0 +2024-09-19 04:10:23,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=588040.0, ans=0.2 +2024-09-19 04:10:27,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588080.0, ans=0.1 +2024-09-19 04:10:31,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=588080.0, ans=10.0 +2024-09-19 04:10:42,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588120.0, ans=0.1 +2024-09-19 04:10:48,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=588120.0, ans=0.2 +2024-09-19 04:11:03,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=588160.0, ans=0.0 +2024-09-19 04:11:05,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588160.0, ans=0.1 +2024-09-19 04:11:14,406 INFO [train.py:1198] (1/2) Epoch 33, batch 2250, loss[loss=0.2383, ctc_loss=0.116, cr_loss=0.3571, attn_decoder_loss=0.244, over 29692.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1178, cr_loss=0.3591, attn_decoder_loss=0.2414, over 5812258.62 frames. ], batch size: 82, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:11:23,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=588200.0, ans=0.025 +2024-09-19 04:11:29,580 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.468e+01 9.055e+01 9.587e+01 2.332e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 04:12:09,925 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.80 vs. limit=15.0 +2024-09-19 04:12:16,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=588320.0, ans=0.95 +2024-09-19 04:12:24,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=588360.0, ans=0.125 +2024-09-19 04:12:30,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=588360.0, ans=0.1 +2024-09-19 04:12:34,490 INFO [train.py:1198] (1/2) Epoch 33, batch 2300, loss[loss=0.2116, ctc_loss=0.102, cr_loss=0.3312, attn_decoder_loss=0.2164, over 29319.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.117, cr_loss=0.3572, attn_decoder_loss=0.2402, over 5799791.46 frames. ], batch size: 71, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:12:48,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=588440.0, ans=0.0 +2024-09-19 04:12:48,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=588440.0, ans=0.125 +2024-09-19 04:12:54,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=588440.0, ans=0.025 +2024-09-19 04:13:04,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=588480.0, ans=0.125 +2024-09-19 04:13:09,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588480.0, ans=0.1 +2024-09-19 04:13:45,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=588560.0, ans=0.09899494936611666 +2024-09-19 04:13:49,846 INFO [train.py:1198] (1/2) Epoch 33, batch 2350, loss[loss=0.2519, ctc_loss=0.1293, cr_loss=0.3951, attn_decoder_loss=0.2567, over 29691.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1171, cr_loss=0.3576, attn_decoder_loss=0.2404, over 5804482.66 frames. ], batch size: 83, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:13:51,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=588600.0, ans=0.0 +2024-09-19 04:13:55,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=588600.0, ans=0.0 +2024-09-19 04:14:01,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=588600.0, ans=0.05 +2024-09-19 04:14:04,759 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.984e+01 8.296e+01 8.859e+01 9.524e+01 1.352e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 04:14:12,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=588640.0, ans=0.0 +2024-09-19 04:14:18,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=588680.0, ans=0.2 +2024-09-19 04:14:25,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-19 04:14:32,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=588680.0, ans=0.125 +2024-09-19 04:14:38,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.40 vs. limit=15.0 +2024-09-19 04:14:45,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=588720.0, ans=0.2 +2024-09-19 04:15:03,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=588760.0, ans=0.0 +2024-09-19 04:15:04,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=588800.0, ans=0.125 +2024-09-19 04:15:06,278 INFO [train.py:1198] (1/2) Epoch 33, batch 2400, loss[loss=0.2253, ctc_loss=0.1106, cr_loss=0.3572, attn_decoder_loss=0.2301, over 29520.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1174, cr_loss=0.3585, attn_decoder_loss=0.2411, over 5808886.09 frames. ], batch size: 76, lr: 3.35e-03, grad_scale: 16.0 +2024-09-19 04:15:10,213 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.97 vs. limit=22.5 +2024-09-19 04:15:15,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=588800.0, ans=0.125 +2024-09-19 04:15:16,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=588800.0, ans=0.125 +2024-09-19 04:15:17,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=588800.0, ans=0.0 +2024-09-19 04:15:46,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=588880.0, ans=0.125 +2024-09-19 04:15:50,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=588920.0, ans=0.0 +2024-09-19 04:15:58,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=588920.0, ans=0.1 +2024-09-19 04:16:10,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=588960.0, ans=0.1 +2024-09-19 04:16:23,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=589000.0, ans=0.125 +2024-09-19 04:16:24,261 INFO [train.py:1198] (1/2) Epoch 33, batch 2450, loss[loss=0.2454, ctc_loss=0.1254, cr_loss=0.3852, attn_decoder_loss=0.2502, over 29712.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1185, cr_loss=0.3603, attn_decoder_loss=0.242, over 5785274.10 frames. ], batch size: 82, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:16:36,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=589000.0, ans=0.07 +2024-09-19 04:16:37,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.45 vs. limit=6.0 +2024-09-19 04:16:39,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=589040.0, ans=0.1 +2024-09-19 04:16:40,789 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.743e+01 8.663e+01 9.079e+01 9.765e+01 4.096e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 04:16:41,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=589040.0, ans=0.0 +2024-09-19 04:16:47,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=589040.0, ans=0.5 +2024-09-19 04:17:12,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=589120.0, ans=0.125 +2024-09-19 04:17:14,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=589120.0, ans=0.125 +2024-09-19 04:17:22,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=589120.0, ans=0.0 +2024-09-19 04:17:36,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.78 vs. limit=15.0 +2024-09-19 04:17:40,051 INFO [train.py:1198] (1/2) Epoch 33, batch 2500, loss[loss=0.2479, ctc_loss=0.1226, cr_loss=0.3573, attn_decoder_loss=0.2539, over 29630.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1188, cr_loss=0.361, attn_decoder_loss=0.2421, over 5795747.23 frames. ], batch size: 86, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:18:00,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=589240.0, ans=0.09899494936611666 +2024-09-19 04:18:15,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=589280.0, ans=0.125 +2024-09-19 04:18:37,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=589320.0, ans=0.2 +2024-09-19 04:18:56,403 INFO [train.py:1198] (1/2) Epoch 33, batch 2550, loss[loss=0.2142, ctc_loss=0.09711, cr_loss=0.3315, attn_decoder_loss=0.2199, over 29378.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1184, cr_loss=0.3604, attn_decoder_loss=0.242, over 5798979.64 frames. ], batch size: 67, lr: 3.35e-03, grad_scale: 8.0 +2024-09-19 04:19:03,432 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.22 vs. limit=15.0 +2024-09-19 04:19:07,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=589400.0, ans=0.1 +2024-09-19 04:19:12,964 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.839e+01 8.542e+01 8.992e+01 9.541e+01 1.643e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 04:19:31,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=589480.0, ans=0.125 +2024-09-19 04:19:32,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.41 vs. limit=15.0 +2024-09-19 04:19:37,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=589480.0, ans=0.1 +2024-09-19 04:19:43,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=589520.0, ans=0.1 +2024-09-19 04:19:56,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=589520.0, ans=0.0 +2024-09-19 04:20:02,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=589560.0, ans=0.125 +2024-09-19 04:20:10,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=589560.0, ans=0.05 +2024-09-19 04:20:16,548 INFO [train.py:1198] (1/2) Epoch 33, batch 2600, loss[loss=0.2382, ctc_loss=0.1253, cr_loss=0.385, attn_decoder_loss=0.2422, over 29455.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1191, cr_loss=0.3615, attn_decoder_loss=0.2427, over 5794605.69 frames. ], batch size: 78, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:20:21,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=589600.0, ans=0.0 +2024-09-19 04:20:33,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=589640.0, ans=0.07 +2024-09-19 04:20:34,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=589640.0, ans=0.125 +2024-09-19 04:21:29,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.74 vs. limit=15.0 +2024-09-19 04:21:30,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=589800.0, ans=0.1 +2024-09-19 04:21:31,665 INFO [train.py:1198] (1/2) Epoch 33, batch 2650, loss[loss=0.2571, ctc_loss=0.1383, cr_loss=0.381, attn_decoder_loss=0.2619, over 29248.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1193, cr_loss=0.362, attn_decoder_loss=0.2432, over 5800943.80 frames. ], batch size: 100, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:21:48,459 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.311e+01 8.528e+01 8.946e+01 9.384e+01 1.299e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-19 04:21:57,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=589840.0, ans=0.09899494936611666 +2024-09-19 04:22:27,472 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=12.0 +2024-09-19 04:22:47,777 INFO [train.py:1198] (1/2) Epoch 33, batch 2700, loss[loss=0.2419, ctc_loss=0.119, cr_loss=0.3761, attn_decoder_loss=0.2472, over 29523.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.1195, cr_loss=0.3621, attn_decoder_loss=0.2432, over 5797099.48 frames. ], batch size: 87, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:22:56,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=590000.0, ans=0.125 +2024-09-19 04:23:04,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=590040.0, ans=0.125 +2024-09-19 04:23:09,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=590040.0, ans=0.2 +2024-09-19 04:23:44,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=590120.0, ans=0.125 +2024-09-19 04:24:06,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=590200.0, ans=0.1 +2024-09-19 04:24:07,844 INFO [train.py:1198] (1/2) Epoch 33, batch 2750, loss[loss=0.2342, ctc_loss=0.1162, cr_loss=0.3682, attn_decoder_loss=0.2391, over 29535.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1188, cr_loss=0.3611, attn_decoder_loss=0.2422, over 5795181.66 frames. ], batch size: 75, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:24:11,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=590200.0, ans=0.125 +2024-09-19 04:24:12,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=590200.0, ans=0.125 +2024-09-19 04:24:13,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.79 vs. limit=12.0 +2024-09-19 04:24:24,587 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.542e+01 8.884e+01 9.570e+01 2.810e+02, threshold=1.777e+02, percent-clipped=3.0 +2024-09-19 04:24:30,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=590240.0, ans=0.125 +2024-09-19 04:24:35,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=590240.0, ans=0.0 +2024-09-19 04:25:02,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=590320.0, ans=0.125 +2024-09-19 04:25:02,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=590320.0, ans=0.125 +2024-09-19 04:25:05,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=590320.0, ans=0.125 +2024-09-19 04:25:14,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=590360.0, ans=0.125 +2024-09-19 04:25:24,059 INFO [train.py:1198] (1/2) Epoch 33, batch 2800, loss[loss=0.2549, ctc_loss=0.1445, cr_loss=0.3709, attn_decoder_loss=0.2589, over 20063.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1194, cr_loss=0.3619, attn_decoder_loss=0.2425, over 5775276.59 frames. ], batch size: 209, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:25:33,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=590400.0, ans=0.125 +2024-09-19 04:25:38,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=590440.0, ans=0.125 +2024-09-19 04:25:43,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=590440.0, ans=0.125 +2024-09-19 04:25:46,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=590440.0, ans=0.125 +2024-09-19 04:25:59,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=590480.0, ans=0.2 +2024-09-19 04:26:13,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.40 vs. limit=15.0 +2024-09-19 04:26:39,477 INFO [train.py:1198] (1/2) Epoch 33, batch 2850, loss[loss=0.2357, ctc_loss=0.1134, cr_loss=0.351, attn_decoder_loss=0.2414, over 29474.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1199, cr_loss=0.363, attn_decoder_loss=0.2429, over 5761367.24 frames. ], batch size: 77, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:26:57,771 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.590e+01 8.701e+01 9.298e+01 9.945e+01 2.152e+02, threshold=1.860e+02, percent-clipped=1.0 +2024-09-19 04:27:33,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=590720.0, ans=0.2 +2024-09-19 04:27:35,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=590720.0, ans=0.0 +2024-09-19 04:27:36,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=590720.0, ans=0.025 +2024-09-19 04:27:40,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.50 vs. limit=15.0 +2024-09-19 04:27:45,768 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-19 04:27:50,162 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-19 04:27:57,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=590760.0, ans=0.1 +2024-09-19 04:27:59,741 INFO [train.py:1198] (1/2) Epoch 33, batch 2900, loss[loss=0.2358, ctc_loss=0.1158, cr_loss=0.3533, attn_decoder_loss=0.2413, over 29425.00 frames. ], tot_loss[loss=0.2389, ctc_loss=0.1205, cr_loss=0.3647, attn_decoder_loss=0.244, over 5786526.58 frames. ], batch size: 79, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:28:04,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=590800.0, ans=0.1 +2024-09-19 04:28:12,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.67 vs. limit=15.0 +2024-09-19 04:28:26,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=590840.0, ans=0.125 +2024-09-19 04:28:38,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=590880.0, ans=0.2 +2024-09-19 04:29:13,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.58 vs. limit=12.0 +2024-09-19 04:29:15,409 INFO [train.py:1198] (1/2) Epoch 33, batch 2950, loss[loss=0.229, ctc_loss=0.1134, cr_loss=0.3457, attn_decoder_loss=0.2341, over 29480.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1189, cr_loss=0.3614, attn_decoder_loss=0.242, over 5781424.51 frames. ], batch size: 75, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:29:33,865 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.422e+01 8.881e+01 9.248e+01 1.525e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 04:29:41,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=591040.0, ans=0.0 +2024-09-19 04:29:55,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=591080.0, ans=0.05 +2024-09-19 04:30:32,266 INFO [train.py:1198] (1/2) Epoch 33, batch 3000, loss[loss=0.2368, ctc_loss=0.1149, cr_loss=0.3485, attn_decoder_loss=0.2426, over 29749.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1186, cr_loss=0.3611, attn_decoder_loss=0.2419, over 5782991.02 frames. ], batch size: 81, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:30:32,267 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 04:30:50,729 INFO [train.py:1230] (1/2) Epoch 33, validation: loss=0.2119, ctc_loss=0.03704, cr_loss=5.931e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 04:30:50,730 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 04:30:51,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=591200.0, ans=0.0 +2024-09-19 04:31:03,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=591200.0, ans=0.125 +2024-09-19 04:31:12,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.72 vs. limit=15.0 +2024-09-19 04:31:16,442 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:31:26,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=591280.0, ans=0.5 +2024-09-19 04:31:39,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=591320.0, ans=0.0 +2024-09-19 04:31:56,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=591360.0, ans=0.125 +2024-09-19 04:31:56,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=591360.0, ans=0.0 +2024-09-19 04:32:03,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=591360.0, ans=0.125 +2024-09-19 04:32:11,228 INFO [train.py:1198] (1/2) Epoch 33, batch 3050, loss[loss=0.2243, ctc_loss=0.1076, cr_loss=0.3469, attn_decoder_loss=0.2296, over 29536.00 frames. ], tot_loss[loss=0.2375, ctc_loss=0.1191, cr_loss=0.3619, attn_decoder_loss=0.2426, over 5777459.36 frames. ], batch size: 76, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:32:14,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=591400.0, ans=0.0 +2024-09-19 04:32:29,479 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.592e+01 9.144e+01 9.827e+01 2.461e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 04:32:41,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=591480.0, ans=0.0 +2024-09-19 04:32:51,270 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=13.07 vs. limit=15.0 +2024-09-19 04:33:07,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=591520.0, ans=0.125 +2024-09-19 04:33:13,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.07 vs. limit=15.0 +2024-09-19 04:33:18,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=12.17 vs. limit=15.0 +2024-09-19 04:33:18,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.04 vs. limit=10.0 +2024-09-19 04:33:21,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=591560.0, ans=0.025 +2024-09-19 04:33:26,780 INFO [train.py:1198] (1/2) Epoch 33, batch 3100, loss[loss=0.2418, ctc_loss=0.1221, cr_loss=0.3514, attn_decoder_loss=0.2472, over 29279.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1189, cr_loss=0.361, attn_decoder_loss=0.2423, over 5777552.90 frames. ], batch size: 100, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:33:40,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=591640.0, ans=0.0 +2024-09-19 04:33:41,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=591640.0, ans=0.1 +2024-09-19 04:34:10,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.43 vs. limit=22.5 +2024-09-19 04:34:18,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=591720.0, ans=0.0 +2024-09-19 04:34:42,673 INFO [train.py:1198] (1/2) Epoch 33, batch 3150, loss[loss=0.2515, ctc_loss=0.1268, cr_loss=0.3772, attn_decoder_loss=0.2569, over 28865.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1191, cr_loss=0.3617, attn_decoder_loss=0.2424, over 5784472.13 frames. ], batch size: 104, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:34:50,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=591800.0, ans=0.2 +2024-09-19 04:35:03,070 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.559e+01 9.035e+01 9.509e+01 1.493e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 04:35:34,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=591920.0, ans=0.2 +2024-09-19 04:35:34,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=591920.0, ans=0.2 +2024-09-19 04:35:43,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=591920.0, ans=0.025 +2024-09-19 04:35:59,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.69 vs. limit=15.0 +2024-09-19 04:36:10,888 INFO [train.py:1198] (1/2) Epoch 33, batch 3200, loss[loss=0.2389, ctc_loss=0.1174, cr_loss=0.3552, attn_decoder_loss=0.2445, over 29405.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1182, cr_loss=0.3599, attn_decoder_loss=0.2416, over 5794521.48 frames. ], batch size: 79, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:36:11,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=592000.0, ans=0.0 +2024-09-19 04:36:23,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=592000.0, ans=0.125 +2024-09-19 04:37:04,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=592120.0, ans=0.0 +2024-09-19 04:37:06,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.38 vs. limit=15.0 +2024-09-19 04:37:08,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.72 vs. limit=22.5 +2024-09-19 04:37:10,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=592160.0, ans=0.125 +2024-09-19 04:37:15,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.85 vs. limit=12.0 +2024-09-19 04:37:18,228 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:37:26,888 INFO [train.py:1198] (1/2) Epoch 33, batch 3250, loss[loss=0.2416, ctc_loss=0.1123, cr_loss=0.3493, attn_decoder_loss=0.2482, over 29710.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1185, cr_loss=0.3602, attn_decoder_loss=0.242, over 5800956.65 frames. ], batch size: 84, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:37:31,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=592200.0, ans=0.1 +2024-09-19 04:37:40,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=592240.0, ans=0.125 +2024-09-19 04:37:44,970 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.600e+01 8.640e+01 9.097e+01 9.766e+01 4.487e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 04:37:57,983 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.48 vs. limit=6.0 +2024-09-19 04:38:09,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=592280.0, ans=0.025 +2024-09-19 04:38:30,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=592360.0, ans=0.2 +2024-09-19 04:38:38,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=592360.0, ans=0.0 +2024-09-19 04:38:39,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=592360.0, ans=0.09899494936611666 +2024-09-19 04:38:42,510 INFO [train.py:1198] (1/2) Epoch 33, batch 3300, loss[loss=0.2498, ctc_loss=0.1247, cr_loss=0.364, attn_decoder_loss=0.2556, over 28156.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1179, cr_loss=0.3587, attn_decoder_loss=0.241, over 5797368.87 frames. ], batch size: 111, lr: 3.34e-03, grad_scale: 16.0 +2024-09-19 04:39:04,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=592440.0, ans=0.1 +2024-09-19 04:39:17,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=592480.0, ans=0.1 +2024-09-19 04:39:17,124 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:39:29,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.69 vs. limit=15.0 +2024-09-19 04:39:31,812 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.55 vs. limit=10.0 +2024-09-19 04:40:02,678 INFO [train.py:1198] (1/2) Epoch 33, batch 3350, loss[loss=0.2474, ctc_loss=0.119, cr_loss=0.3675, attn_decoder_loss=0.2535, over 28916.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1188, cr_loss=0.3603, attn_decoder_loss=0.2419, over 5774490.39 frames. ], batch size: 104, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:40:10,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=592600.0, ans=0.0 +2024-09-19 04:40:22,488 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.734e+01 8.878e+01 9.274e+01 9.993e+01 2.283e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-19 04:40:25,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=592640.0, ans=0.125 +2024-09-19 04:40:43,420 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.55 vs. limit=22.5 +2024-09-19 04:40:47,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=592720.0, ans=0.125 +2024-09-19 04:40:50,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=592720.0, ans=0.125 +2024-09-19 04:40:55,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-19 04:41:08,518 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:41:11,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=592760.0, ans=0.125 +2024-09-19 04:41:17,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=592800.0, ans=0.1 +2024-09-19 04:41:19,056 INFO [train.py:1198] (1/2) Epoch 33, batch 3400, loss[loss=0.2064, ctc_loss=0.1017, cr_loss=0.308, attn_decoder_loss=0.2111, over 29395.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1193, cr_loss=0.3613, attn_decoder_loss=0.242, over 5766394.65 frames. ], batch size: 67, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:41:23,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=592800.0, ans=0.0 +2024-09-19 04:41:36,821 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.98 vs. limit=22.5 +2024-09-19 04:41:44,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.63 vs. limit=15.0 +2024-09-19 04:41:51,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=592880.0, ans=0.125 +2024-09-19 04:41:54,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=592880.0, ans=0.0 +2024-09-19 04:42:04,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=592920.0, ans=0.125 +2024-09-19 04:42:11,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.70 vs. limit=15.0 +2024-09-19 04:42:14,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=592920.0, ans=0.125 +2024-09-19 04:42:34,747 INFO [train.py:1198] (1/2) Epoch 33, batch 3450, loss[loss=0.2426, ctc_loss=0.1153, cr_loss=0.3383, attn_decoder_loss=0.2492, over 28373.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1192, cr_loss=0.3617, attn_decoder_loss=0.2422, over 5775433.81 frames. ], batch size: 111, lr: 3.34e-03, grad_scale: 8.0 +2024-09-19 04:42:35,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-19 04:42:36,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=593000.0, ans=0.1 +2024-09-19 04:42:41,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=593000.0, ans=0.125 +2024-09-19 04:42:49,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=593000.0, ans=0.125 +2024-09-19 04:42:52,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=593040.0, ans=0.0 +2024-09-19 04:42:56,753 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.423e+01 8.686e+01 9.141e+01 9.790e+01 2.387e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 04:43:06,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=593080.0, ans=0.025 +2024-09-19 04:43:11,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=593080.0, ans=0.1 +2024-09-19 04:43:32,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.88 vs. limit=6.0 +2024-09-19 04:43:55,197 INFO [train.py:1198] (1/2) Epoch 33, batch 3500, loss[loss=0.2134, ctc_loss=0.1043, cr_loss=0.3291, attn_decoder_loss=0.2182, over 29317.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1189, cr_loss=0.361, attn_decoder_loss=0.2417, over 5776904.07 frames. ], batch size: 71, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:44:07,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=593200.0, ans=0.5 +2024-09-19 04:44:10,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=593240.0, ans=0.125 +2024-09-19 04:44:16,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=593240.0, ans=0.07 +2024-09-19 04:44:35,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=593280.0, ans=0.125 +2024-09-19 04:44:38,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=593320.0, ans=0.0 +2024-09-19 04:44:40,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=593320.0, ans=0.125 +2024-09-19 04:44:44,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=593320.0, ans=0.025 +2024-09-19 04:45:09,797 INFO [train.py:1198] (1/2) Epoch 33, batch 3550, loss[loss=0.2409, ctc_loss=0.1152, cr_loss=0.3337, attn_decoder_loss=0.2475, over 29704.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1187, cr_loss=0.3604, attn_decoder_loss=0.2418, over 5784389.38 frames. ], batch size: 89, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:45:12,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=593400.0, ans=0.125 +2024-09-19 04:45:28,727 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.090e+01 8.555e+01 9.089e+01 9.583e+01 3.040e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-19 04:45:29,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=593440.0, ans=0.95 +2024-09-19 04:45:31,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.36 vs. limit=15.0 +2024-09-19 04:45:57,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=593520.0, ans=0.125 +2024-09-19 04:46:04,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=593520.0, ans=0.07 +2024-09-19 04:46:24,255 INFO [train.py:1198] (1/2) Epoch 33, batch 3600, loss[loss=0.228, ctc_loss=0.1104, cr_loss=0.3533, attn_decoder_loss=0.2332, over 29487.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1182, cr_loss=0.3599, attn_decoder_loss=0.2418, over 5793735.34 frames. ], batch size: 77, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:46:35,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=593600.0, ans=0.125 +2024-09-19 04:47:09,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=593720.0, ans=0.125 +2024-09-19 04:47:18,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=593720.0, ans=0.0 +2024-09-19 04:47:27,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=593760.0, ans=0.07 +2024-09-19 04:47:29,588 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.58 vs. limit=15.0 +2024-09-19 04:47:36,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=593760.0, ans=0.125 +2024-09-19 04:47:38,881 INFO [train.py:1198] (1/2) Epoch 33, batch 3650, loss[loss=0.2503, ctc_loss=0.1279, cr_loss=0.3627, attn_decoder_loss=0.2558, over 29543.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.3587, attn_decoder_loss=0.2412, over 5796047.45 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:47:46,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=12.06 vs. limit=15.0 +2024-09-19 04:47:58,206 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.325e+01 8.858e+01 9.502e+01 1.563e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 04:48:33,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=593920.0, ans=0.125 +2024-09-19 04:48:33,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=593920.0, ans=0.1 +2024-09-19 04:48:35,486 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-19 04:48:36,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=593920.0, ans=0.125 +2024-09-19 04:48:52,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=593960.0, ans=0.2 +2024-09-19 04:48:55,548 INFO [train.py:1198] (1/2) Epoch 33, batch 3700, loss[loss=0.2386, ctc_loss=0.1145, cr_loss=0.3498, attn_decoder_loss=0.2447, over 29701.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1174, cr_loss=0.3587, attn_decoder_loss=0.2411, over 5806163.56 frames. ], batch size: 84, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:49:01,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=594000.0, ans=0.0 +2024-09-19 04:49:10,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=594040.0, ans=0.0 +2024-09-19 04:49:27,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=594080.0, ans=0.2 +2024-09-19 04:49:37,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.75 vs. limit=15.0 +2024-09-19 04:49:40,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=594120.0, ans=0.125 +2024-09-19 04:49:41,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.11 vs. limit=6.0 +2024-09-19 04:49:51,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=594120.0, ans=0.0 +2024-09-19 04:50:11,530 INFO [train.py:1198] (1/2) Epoch 33, batch 3750, loss[loss=0.2132, ctc_loss=0.1062, cr_loss=0.3311, attn_decoder_loss=0.2178, over 29335.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1177, cr_loss=0.359, attn_decoder_loss=0.241, over 5808759.11 frames. ], batch size: 67, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:50:30,937 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.561e+01 9.006e+01 9.475e+01 6.465e+02, threshold=1.801e+02, percent-clipped=2.0 +2024-09-19 04:50:44,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=594280.0, ans=0.1 +2024-09-19 04:50:59,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=594320.0, ans=0.125 +2024-09-19 04:51:20,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=594360.0, ans=0.1 +2024-09-19 04:51:26,168 INFO [train.py:1198] (1/2) Epoch 33, batch 3800, loss[loss=0.2486, ctc_loss=0.1225, cr_loss=0.3763, attn_decoder_loss=0.2542, over 29629.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1178, cr_loss=0.359, attn_decoder_loss=0.241, over 5798906.93 frames. ], batch size: 86, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:51:39,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.40 vs. limit=22.5 +2024-09-19 04:52:06,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=594480.0, ans=0.0 +2024-09-19 04:52:08,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=594480.0, ans=0.125 +2024-09-19 04:52:16,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.89 vs. limit=15.0 +2024-09-19 04:52:34,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-19 04:52:40,416 INFO [train.py:1198] (1/2) Epoch 33, batch 3850, loss[loss=0.2404, ctc_loss=0.1132, cr_loss=0.352, attn_decoder_loss=0.2467, over 29263.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1176, cr_loss=0.3584, attn_decoder_loss=0.241, over 5812991.38 frames. ], batch size: 100, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:52:59,670 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.228e+01 8.527e+01 9.047e+01 9.575e+01 1.638e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 04:53:08,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=594680.0, ans=0.1 +2024-09-19 04:53:13,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=594680.0, ans=0.025 +2024-09-19 04:53:38,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=594760.0, ans=0.125 +2024-09-19 04:53:41,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=594760.0, ans=0.0 +2024-09-19 04:53:46,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=594760.0, ans=0.2 +2024-09-19 04:53:56,277 INFO [train.py:1198] (1/2) Epoch 33, batch 3900, loss[loss=0.2458, ctc_loss=0.116, cr_loss=0.3316, attn_decoder_loss=0.2528, over 29630.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1182, cr_loss=0.3595, attn_decoder_loss=0.2415, over 5816673.90 frames. ], batch size: 86, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:53:58,097 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:54:12,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=594840.0, ans=0.05 +2024-09-19 04:54:24,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=594880.0, ans=0.125 +2024-09-19 04:54:30,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=594880.0, ans=0.0 +2024-09-19 04:54:39,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.85 vs. limit=12.0 +2024-09-19 04:54:40,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=594920.0, ans=0.0 +2024-09-19 04:54:53,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=594960.0, ans=0.025 +2024-09-19 04:55:04,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=594960.0, ans=0.125 +2024-09-19 04:55:11,404 INFO [train.py:1198] (1/2) Epoch 33, batch 3950, loss[loss=0.2504, ctc_loss=0.1255, cr_loss=0.3787, attn_decoder_loss=0.2558, over 29399.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1177, cr_loss=0.3591, attn_decoder_loss=0.2415, over 5836346.21 frames. ], batch size: 97, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 04:55:14,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=595000.0, ans=0.025 +2024-09-19 04:55:19,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=595000.0, ans=0.0 +2024-09-19 04:55:22,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.22 vs. limit=6.0 +2024-09-19 04:55:32,033 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.582e+01 8.607e+01 9.033e+01 9.637e+01 1.585e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 04:55:53,711 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.89 vs. limit=15.0 +2024-09-19 04:56:03,925 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.80 vs. limit=22.5 +2024-09-19 04:56:12,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=595160.0, ans=0.125 +2024-09-19 04:56:15,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=595160.0, ans=0.0 +2024-09-19 04:56:15,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=595160.0, ans=0.125 +2024-09-19 04:56:25,734 INFO [train.py:1198] (1/2) Epoch 33, batch 4000, loss[loss=0.2174, ctc_loss=0.09819, cr_loss=0.3036, attn_decoder_loss=0.2239, over 29480.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1181, cr_loss=0.3596, attn_decoder_loss=0.2416, over 5813041.27 frames. ], batch size: 74, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:56:36,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=595200.0, ans=0.0 +2024-09-19 04:56:44,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=595240.0, ans=0.125 +2024-09-19 04:56:46,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=595240.0, ans=0.1 +2024-09-19 04:56:47,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=595240.0, ans=0.035 +2024-09-19 04:57:14,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=595320.0, ans=0.125 +2024-09-19 04:57:39,812 INFO [train.py:1198] (1/2) Epoch 33, batch 4050, loss[loss=0.2538, ctc_loss=0.1439, cr_loss=0.3885, attn_decoder_loss=0.2573, over 20212.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1178, cr_loss=0.3589, attn_decoder_loss=0.2413, over 5795729.76 frames. ], batch size: 210, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:57:44,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=595400.0, ans=0.0 +2024-09-19 04:57:52,609 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.53 vs. limit=15.0 +2024-09-19 04:57:56,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=595440.0, ans=0.2 +2024-09-19 04:58:00,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.573e+01 9.185e+01 9.893e+01 2.518e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-19 04:58:09,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=595480.0, ans=0.125 +2024-09-19 04:58:10,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.93 vs. limit=22.5 +2024-09-19 04:58:14,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.99 vs. limit=15.0 +2024-09-19 04:58:19,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=595480.0, ans=0.125 +2024-09-19 04:58:34,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=595520.0, ans=0.0 +2024-09-19 04:58:34,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.40 vs. limit=22.5 +2024-09-19 04:58:51,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.82 vs. limit=6.0 +2024-09-19 04:58:55,015 INFO [train.py:1198] (1/2) Epoch 33, batch 4100, loss[loss=0.2609, ctc_loss=0.1384, cr_loss=0.4011, attn_decoder_loss=0.2656, over 29513.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1181, cr_loss=0.359, attn_decoder_loss=0.2416, over 5791149.60 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 04:58:59,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=595600.0, ans=0.125 +2024-09-19 04:59:06,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.62 vs. limit=12.0 +2024-09-19 04:59:14,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=595640.0, ans=0.0 +2024-09-19 04:59:17,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=595640.0, ans=0.125 +2024-09-19 04:59:21,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=595640.0, ans=0.0 +2024-09-19 04:59:44,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=595720.0, ans=0.125 +2024-09-19 04:59:55,536 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 04:59:59,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=595760.0, ans=0.125 +2024-09-19 05:00:09,892 INFO [train.py:1198] (1/2) Epoch 33, batch 4150, loss[loss=0.2344, ctc_loss=0.1162, cr_loss=0.3635, attn_decoder_loss=0.2394, over 29482.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1181, cr_loss=0.3597, attn_decoder_loss=0.2415, over 5796258.18 frames. ], batch size: 77, lr: 3.33e-03, grad_scale: 16.0 +2024-09-19 05:00:15,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=595800.0, ans=0.09899494936611666 +2024-09-19 05:00:25,340 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.00 vs. limit=22.5 +2024-09-19 05:00:31,921 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.198e+01 8.400e+01 8.837e+01 9.482e+01 1.626e+02, threshold=1.767e+02, percent-clipped=0.0 +2024-09-19 05:00:35,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=595840.0, ans=0.09899494936611666 +2024-09-19 05:00:51,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=595880.0, ans=0.125 +2024-09-19 05:01:23,826 INFO [train.py:1198] (1/2) Epoch 33, batch 4200, loss[loss=0.2513, ctc_loss=0.1308, cr_loss=0.3661, attn_decoder_loss=0.2565, over 29501.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.118, cr_loss=0.3589, attn_decoder_loss=0.2418, over 5797695.93 frames. ], batch size: 90, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:01:30,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=596000.0, ans=0.0 +2024-09-19 05:01:30,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=596000.0, ans=0.1 +2024-09-19 05:01:31,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=596000.0, ans=0.125 +2024-09-19 05:01:34,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=596000.0, ans=0.125 +2024-09-19 05:01:49,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=596040.0, ans=0.125 +2024-09-19 05:02:01,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=596080.0, ans=0.0 +2024-09-19 05:02:06,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=596120.0, ans=0.125 +2024-09-19 05:02:26,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=596160.0, ans=0.125 +2024-09-19 05:02:38,335 INFO [train.py:1198] (1/2) Epoch 33, batch 4250, loss[loss=0.2129, ctc_loss=0.09583, cr_loss=0.3014, attn_decoder_loss=0.2192, over 29523.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1183, cr_loss=0.3592, attn_decoder_loss=0.242, over 5803103.05 frames. ], batch size: 74, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:02:59,962 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.204e+01 8.601e+01 9.024e+01 9.699e+01 1.912e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 05:03:52,547 INFO [train.py:1198] (1/2) Epoch 33, batch 4300, loss[loss=0.2478, ctc_loss=0.1269, cr_loss=0.3935, attn_decoder_loss=0.2525, over 29548.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1182, cr_loss=0.359, attn_decoder_loss=0.2422, over 5792414.10 frames. ], batch size: 87, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:04:09,205 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:04:10,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=596440.0, ans=0.07 +2024-09-19 05:04:28,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=596480.0, ans=0.025 +2024-09-19 05:04:32,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.21 vs. limit=15.0 +2024-09-19 05:04:34,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=596480.0, ans=0.07 +2024-09-19 05:04:46,890 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.37 vs. limit=15.0 +2024-09-19 05:04:55,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=596560.0, ans=0.1 +2024-09-19 05:05:02,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=596560.0, ans=0.1 +2024-09-19 05:05:07,035 INFO [train.py:1198] (1/2) Epoch 33, batch 4350, loss[loss=0.2515, ctc_loss=0.1279, cr_loss=0.3784, attn_decoder_loss=0.2569, over 29478.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1206, cr_loss=0.3641, attn_decoder_loss=0.2454, over 5795534.67 frames. ], batch size: 97, lr: 3.33e-03, grad_scale: 8.0 +2024-09-19 05:05:07,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=596600.0, ans=0.0 +2024-09-19 05:05:25,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=596640.0, ans=0.05 +2024-09-19 05:05:29,990 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.801e+01 9.131e+01 9.765e+01 2.028e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-19 05:05:34,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=596640.0, ans=0.1 +2024-09-19 05:06:22,392 INFO [train.py:1198] (1/2) Epoch 33, batch 4400, loss[loss=0.2598, ctc_loss=0.1455, cr_loss=0.418, attn_decoder_loss=0.2632, over 27331.00 frames. ], tot_loss[loss=0.2424, ctc_loss=0.1222, cr_loss=0.3673, attn_decoder_loss=0.2476, over 5764176.27 frames. ], batch size: 124, lr: 3.32e-03, grad_scale: 16.0 +2024-09-19 05:06:48,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=596840.0, ans=0.125 +2024-09-19 05:07:00,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=596880.0, ans=0.0 +2024-09-19 05:07:18,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.93 vs. limit=15.0 +2024-09-19 05:07:35,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=597000.0, ans=0.125 +2024-09-19 05:07:36,275 INFO [train.py:1198] (1/2) Epoch 33, batch 4450, loss[loss=0.2595, ctc_loss=0.1499, cr_loss=0.398, attn_decoder_loss=0.2628, over 20191.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.126, cr_loss=0.3732, attn_decoder_loss=0.2498, over 5574010.63 frames. ], batch size: 210, lr: 3.32e-03, grad_scale: 8.0 +2024-09-19 05:07:47,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=597000.0, ans=0.1 +2024-09-19 05:08:00,436 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.114e+01 9.208e+01 9.597e+01 1.124e+02 1.638e+02, threshold=1.919e+02, percent-clipped=0.0 +2024-09-19 05:08:06,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.52 vs. limit=15.0 +2024-09-19 05:08:10,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=597080.0, ans=0.025 +2024-09-19 05:08:52,055 INFO [train.py:1198] (1/2) Epoch 33, batch 4500, loss[loss=0.2562, ctc_loss=0.1461, cr_loss=0.3927, attn_decoder_loss=0.2597, over 20685.00 frames. ], tot_loss[loss=0.2469, ctc_loss=0.1293, cr_loss=0.3756, attn_decoder_loss=0.2516, over 5232767.99 frames. ], batch size: 209, lr: 3.32e-03, grad_scale: 8.0 +2024-09-19 05:09:02,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=597200.0, ans=0.0 +2024-09-19 05:09:08,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=597240.0, ans=0.1 +2024-09-19 05:09:13,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=597240.0, ans=0.1 +2024-09-19 05:09:25,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=597280.0, ans=0.1 +2024-09-19 05:10:21,347 INFO [train.py:1198] (1/2) Epoch 34, batch 0, loss[loss=0.22, ctc_loss=0.1063, cr_loss=0.3171, attn_decoder_loss=0.2256, over 29602.00 frames. ], tot_loss[loss=0.22, ctc_loss=0.1063, cr_loss=0.3171, attn_decoder_loss=0.2256, over 29602.00 frames. ], batch size: 73, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:10:21,348 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 05:10:26,105 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.6615, 4.5626, 4.3889, 4.1253], device='cuda:1') +2024-09-19 05:10:39,720 INFO [train.py:1230] (1/2) Epoch 34, validation: loss=0.2115, ctc_loss=0.03706, cr_loss=5.889e-15, attn_decoder_loss=0.2309, over 944034.00 frames. +2024-09-19 05:10:39,721 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 05:10:41,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=597300.0, ans=0.125 +2024-09-19 05:10:52,601 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.39 vs. limit=15.0 +2024-09-19 05:11:10,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=597380.0, ans=0.1 +2024-09-19 05:11:16,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=597380.0, ans=0.125 +2024-09-19 05:11:28,634 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:11:28,975 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.67 vs. limit=22.5 +2024-09-19 05:11:44,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=597460.0, ans=0.125 +2024-09-19 05:11:44,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=597460.0, ans=0.125 +2024-09-19 05:11:45,332 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.942e+01 9.532e+01 1.086e+02 1.158e+02 1.194e+03, threshold=2.172e+02, percent-clipped=2.0 +2024-09-19 05:11:50,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=597460.0, ans=0.125 +2024-09-19 05:11:56,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=597500.0, ans=0.0 +2024-09-19 05:11:56,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=597500.0, ans=0.0 +2024-09-19 05:11:57,343 INFO [train.py:1198] (1/2) Epoch 34, batch 50, loss[loss=0.2148, ctc_loss=0.09824, cr_loss=0.3137, attn_decoder_loss=0.2208, over 29423.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1203, cr_loss=0.3657, attn_decoder_loss=0.242, over 1268305.87 frames. ], batch size: 70, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:12:02,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=597500.0, ans=0.2 +2024-09-19 05:12:05,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=597500.0, ans=0.2 +2024-09-19 05:12:11,725 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-19 05:12:23,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=597540.0, ans=0.2 +2024-09-19 05:12:25,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=597540.0, ans=0.0 +2024-09-19 05:12:39,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=597580.0, ans=0.125 +2024-09-19 05:12:47,665 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.14 vs. limit=22.5 +2024-09-19 05:13:04,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=597660.0, ans=0.09899494936611666 +2024-09-19 05:13:04,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=597660.0, ans=0.125 +2024-09-19 05:13:10,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.88 vs. limit=15.0 +2024-09-19 05:13:16,054 INFO [train.py:1198] (1/2) Epoch 34, batch 100, loss[loss=0.2311, ctc_loss=0.1241, cr_loss=0.3598, attn_decoder_loss=0.235, over 29529.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1217, cr_loss=0.3681, attn_decoder_loss=0.2443, over 2251362.13 frames. ], batch size: 76, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:14:16,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=597860.0, ans=0.125 +2024-09-19 05:14:18,838 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.662e+01 8.574e+01 9.028e+01 9.395e+01 1.381e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-19 05:14:20,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=597860.0, ans=0.07 +2024-09-19 05:14:29,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=597900.0, ans=0.125 +2024-09-19 05:14:30,761 INFO [train.py:1198] (1/2) Epoch 34, batch 150, loss[loss=0.2179, ctc_loss=0.1057, cr_loss=0.3351, attn_decoder_loss=0.2229, over 29440.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1189, cr_loss=0.3613, attn_decoder_loss=0.2421, over 3046804.74 frames. ], batch size: 70, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:14:32,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=597900.0, ans=0.125 +2024-09-19 05:14:33,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.68 vs. limit=15.0 +2024-09-19 05:14:53,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.02 vs. limit=10.0 +2024-09-19 05:14:58,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=597940.0, ans=0.09899494936611666 +2024-09-19 05:15:03,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=15.0 +2024-09-19 05:15:04,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=597980.0, ans=0.2 +2024-09-19 05:15:20,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=598020.0, ans=0.125 +2024-09-19 05:15:48,461 INFO [train.py:1198] (1/2) Epoch 34, batch 200, loss[loss=0.259, ctc_loss=0.1473, cr_loss=0.3996, attn_decoder_loss=0.2625, over 27286.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1186, cr_loss=0.361, attn_decoder_loss=0.2416, over 3658258.91 frames. ], batch size: 124, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:16:03,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=598140.0, ans=0.125 +2024-09-19 05:16:12,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.68 vs. limit=6.0 +2024-09-19 05:16:15,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=598140.0, ans=0.0 +2024-09-19 05:16:21,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.45 vs. limit=10.0 +2024-09-19 05:16:26,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=598180.0, ans=0.09899494936611666 +2024-09-19 05:16:32,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=598220.0, ans=0.0 +2024-09-19 05:16:54,091 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.406e+01 8.433e+01 8.957e+01 9.594e+01 1.517e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 05:17:02,730 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.01 vs. limit=15.0 +2024-09-19 05:17:06,375 INFO [train.py:1198] (1/2) Epoch 34, batch 250, loss[loss=0.2593, ctc_loss=0.1338, cr_loss=0.3869, attn_decoder_loss=0.2646, over 29156.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1182, cr_loss=0.3599, attn_decoder_loss=0.2417, over 4141007.04 frames. ], batch size: 100, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:17:27,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=598340.0, ans=0.125 +2024-09-19 05:17:29,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=598340.0, ans=6.0 +2024-09-19 05:17:47,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=598380.0, ans=0.0 +2024-09-19 05:17:58,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=598420.0, ans=0.125 +2024-09-19 05:18:10,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=598460.0, ans=0.0 +2024-09-19 05:18:12,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=598460.0, ans=0.1 +2024-09-19 05:18:22,533 INFO [train.py:1198] (1/2) Epoch 34, batch 300, loss[loss=0.2514, ctc_loss=0.132, cr_loss=0.3909, attn_decoder_loss=0.256, over 29544.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.3595, attn_decoder_loss=0.2413, over 4509506.54 frames. ], batch size: 92, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:18:40,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=598540.0, ans=0.0 +2024-09-19 05:18:51,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=598580.0, ans=0.1 +2024-09-19 05:19:02,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.25 vs. limit=15.0 +2024-09-19 05:19:08,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.08 vs. limit=15.0 +2024-09-19 05:19:16,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.16 vs. limit=6.0 +2024-09-19 05:19:20,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=598620.0, ans=0.125 +2024-09-19 05:19:26,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.105e+01 8.376e+01 8.844e+01 9.262e+01 3.831e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 05:19:40,608 INFO [train.py:1198] (1/2) Epoch 34, batch 350, loss[loss=0.2177, ctc_loss=0.1007, cr_loss=0.3216, attn_decoder_loss=0.2235, over 29302.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1181, cr_loss=0.3596, attn_decoder_loss=0.242, over 4792984.62 frames. ], batch size: 71, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:19:48,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=598700.0, ans=0.125 +2024-09-19 05:19:50,373 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.43 vs. limit=15.0 +2024-09-19 05:20:07,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=598740.0, ans=0.125 +2024-09-19 05:20:13,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=598780.0, ans=0.0 +2024-09-19 05:20:26,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=598820.0, ans=0.0 +2024-09-19 05:20:27,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=598820.0, ans=0.1 +2024-09-19 05:20:35,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=598820.0, ans=0.2 +2024-09-19 05:20:37,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.26 vs. limit=15.0 +2024-09-19 05:20:38,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=598820.0, ans=0.2 +2024-09-19 05:20:58,169 INFO [train.py:1198] (1/2) Epoch 34, batch 400, loss[loss=0.2445, ctc_loss=0.1288, cr_loss=0.376, attn_decoder_loss=0.2491, over 29674.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1177, cr_loss=0.359, attn_decoder_loss=0.2415, over 5022933.67 frames. ], batch size: 82, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:21:01,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=598900.0, ans=0.025 +2024-09-19 05:21:09,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.71 vs. limit=15.0 +2024-09-19 05:21:22,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=598940.0, ans=0.04949747468305833 +2024-09-19 05:21:35,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=598980.0, ans=0.1 +2024-09-19 05:21:36,791 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:21:38,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=598980.0, ans=0.0 +2024-09-19 05:21:52,345 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.82 vs. limit=6.0 +2024-09-19 05:21:58,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.43 vs. limit=15.0 +2024-09-19 05:22:02,034 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.466e+01 8.485e+01 9.014e+01 9.585e+01 2.227e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 05:22:14,034 INFO [train.py:1198] (1/2) Epoch 34, batch 450, loss[loss=0.2454, ctc_loss=0.1242, cr_loss=0.3717, attn_decoder_loss=0.2507, over 29695.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2418, over 5184977.31 frames. ], batch size: 83, lr: 3.27e-03, grad_scale: 16.0 +2024-09-19 05:22:35,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=599140.0, ans=0.05 +2024-09-19 05:22:37,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.25 vs. limit=6.0 +2024-09-19 05:22:41,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=599140.0, ans=0.125 +2024-09-19 05:22:43,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=599180.0, ans=0.1 +2024-09-19 05:23:30,233 INFO [train.py:1198] (1/2) Epoch 34, batch 500, loss[loss=0.2526, ctc_loss=0.13, cr_loss=0.3879, attn_decoder_loss=0.2577, over 29458.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1178, cr_loss=0.3589, attn_decoder_loss=0.2411, over 5327549.95 frames. ], batch size: 94, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:23:32,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=599300.0, ans=0.2 +2024-09-19 05:23:46,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=599340.0, ans=0.5 +2024-09-19 05:23:57,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=599340.0, ans=0.1 +2024-09-19 05:23:58,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=599340.0, ans=0.025 +2024-09-19 05:24:01,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=599380.0, ans=0.125 +2024-09-19 05:24:09,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=599380.0, ans=0.125 +2024-09-19 05:24:21,476 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:24:25,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.17 vs. limit=12.0 +2024-09-19 05:24:25,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=599420.0, ans=0.125 +2024-09-19 05:24:33,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=599460.0, ans=0.09899494936611666 +2024-09-19 05:24:37,604 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.414e+01 8.516e+01 9.011e+01 9.672e+01 1.492e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 05:24:50,588 INFO [train.py:1198] (1/2) Epoch 34, batch 550, loss[loss=0.2533, ctc_loss=0.1287, cr_loss=0.3732, attn_decoder_loss=0.2588, over 28877.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1179, cr_loss=0.3591, attn_decoder_loss=0.2412, over 5420914.14 frames. ], batch size: 104, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:24:51,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.00 vs. limit=15.0 +2024-09-19 05:25:04,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=599540.0, ans=0.0 +2024-09-19 05:25:34,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=599620.0, ans=0.125 +2024-09-19 05:25:52,108 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.67 vs. limit=15.0 +2024-09-19 05:25:54,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=599660.0, ans=0.0 +2024-09-19 05:26:05,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=599700.0, ans=0.0 +2024-09-19 05:26:06,403 INFO [train.py:1198] (1/2) Epoch 34, batch 600, loss[loss=0.2519, ctc_loss=0.1243, cr_loss=0.3604, attn_decoder_loss=0.2581, over 29209.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.118, cr_loss=0.3598, attn_decoder_loss=0.2415, over 5507686.39 frames. ], batch size: 100, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:26:06,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=599700.0, ans=0.0 +2024-09-19 05:26:09,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=599700.0, ans=0.125 +2024-09-19 05:26:16,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.36 vs. limit=15.0 +2024-09-19 05:26:31,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=599740.0, ans=10.0 +2024-09-19 05:26:32,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=599740.0, ans=0.05 +2024-09-19 05:26:38,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=599780.0, ans=0.0 +2024-09-19 05:26:49,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.28 vs. limit=22.5 +2024-09-19 05:27:11,322 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.757e+01 8.437e+01 8.830e+01 9.420e+01 2.114e+02, threshold=1.766e+02, percent-clipped=1.0 +2024-09-19 05:27:14,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=599860.0, ans=0.125 +2024-09-19 05:27:16,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=599860.0, ans=0.125 +2024-09-19 05:27:21,949 INFO [train.py:1198] (1/2) Epoch 34, batch 650, loss[loss=0.2424, ctc_loss=0.1193, cr_loss=0.3432, attn_decoder_loss=0.2485, over 29766.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1172, cr_loss=0.3583, attn_decoder_loss=0.2409, over 5585656.74 frames. ], batch size: 81, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:27:23,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.91 vs. limit=15.0 +2024-09-19 05:27:34,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=599900.0, ans=0.0 +2024-09-19 05:27:34,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=599900.0, ans=0.125 +2024-09-19 05:27:48,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=599940.0, ans=0.0 +2024-09-19 05:27:50,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=599940.0, ans=0.0 +2024-09-19 05:27:59,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=599980.0, ans=0.0 +2024-09-19 05:28:17,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.41 vs. limit=15.0 +2024-09-19 05:28:27,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=600060.0, ans=0.1 +2024-09-19 05:28:41,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=600100.0, ans=0.0 +2024-09-19 05:28:42,767 INFO [train.py:1198] (1/2) Epoch 34, batch 700, loss[loss=0.2257, ctc_loss=0.1092, cr_loss=0.3465, attn_decoder_loss=0.2309, over 29534.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3598, attn_decoder_loss=0.2415, over 5636378.91 frames. ], batch size: 76, lr: 3.27e-03, grad_scale: 8.0 +2024-09-19 05:29:39,593 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=600220.0, ans=0.2 +2024-09-19 05:29:44,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=600260.0, ans=0.125 +2024-09-19 05:29:48,399 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.364e+01 8.809e+01 9.436e+01 2.463e+02, threshold=1.762e+02, percent-clipped=1.0 +2024-09-19 05:29:48,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=600260.0, ans=0.2 +2024-09-19 05:29:53,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=600260.0, ans=0.125 +2024-09-19 05:29:59,005 INFO [train.py:1198] (1/2) Epoch 34, batch 750, loss[loss=0.2449, ctc_loss=0.122, cr_loss=0.3677, attn_decoder_loss=0.2504, over 29701.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1178, cr_loss=0.3596, attn_decoder_loss=0.2411, over 5676268.68 frames. ], batch size: 82, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:31:03,390 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-19 05:31:05,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=600460.0, ans=0.0 +2024-09-19 05:31:10,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=600460.0, ans=0.2 +2024-09-19 05:31:14,730 INFO [train.py:1198] (1/2) Epoch 34, batch 800, loss[loss=0.2097, ctc_loss=0.1016, cr_loss=0.3299, attn_decoder_loss=0.2144, over 29612.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1177, cr_loss=0.3593, attn_decoder_loss=0.241, over 5707030.17 frames. ], batch size: 73, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:31:15,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=600500.0, ans=0.0 +2024-09-19 05:31:26,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.11 vs. limit=15.0 +2024-09-19 05:31:37,683 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 05:31:56,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=600580.0, ans=0.125 +2024-09-19 05:32:21,718 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.053e+01 8.379e+01 9.063e+01 9.651e+01 1.795e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 05:32:23,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=600660.0, ans=0.0 +2024-09-19 05:32:32,193 INFO [train.py:1198] (1/2) Epoch 34, batch 850, loss[loss=0.2381, ctc_loss=0.1112, cr_loss=0.3344, attn_decoder_loss=0.2448, over 29730.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1173, cr_loss=0.3581, attn_decoder_loss=0.2407, over 5735695.95 frames. ], batch size: 89, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:32:48,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=600740.0, ans=0.1 +2024-09-19 05:32:58,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=600740.0, ans=0.125 +2024-09-19 05:33:10,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=600780.0, ans=0.125 +2024-09-19 05:33:13,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=600780.0, ans=0.95 +2024-09-19 05:33:34,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=600860.0, ans=0.05 +2024-09-19 05:33:50,508 INFO [train.py:1198] (1/2) Epoch 34, batch 900, loss[loss=0.2147, ctc_loss=0.1066, cr_loss=0.3374, attn_decoder_loss=0.2192, over 29622.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1175, cr_loss=0.3585, attn_decoder_loss=0.2408, over 5741967.86 frames. ], batch size: 73, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:34:04,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=600940.0, ans=0.0 +2024-09-19 05:34:19,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=600980.0, ans=0.125 +2024-09-19 05:34:31,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=600980.0, ans=0.0 +2024-09-19 05:34:32,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=600980.0, ans=0.125 +2024-09-19 05:34:56,767 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.371e+01 8.479e+01 9.154e+01 9.598e+01 2.436e+02, threshold=1.831e+02, percent-clipped=2.0 +2024-09-19 05:35:05,825 INFO [train.py:1198] (1/2) Epoch 34, batch 950, loss[loss=0.2155, ctc_loss=0.09974, cr_loss=0.3311, attn_decoder_loss=0.221, over 29499.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1177, cr_loss=0.3589, attn_decoder_loss=0.2411, over 5744373.69 frames. ], batch size: 74, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:35:21,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=601140.0, ans=0.125 +2024-09-19 05:35:26,655 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.64 vs. limit=15.0 +2024-09-19 05:35:40,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=601180.0, ans=0.05 +2024-09-19 05:35:45,918 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.85 vs. limit=15.0 +2024-09-19 05:35:48,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys.whitening_limit, batch_count=601180.0, ans=6.0 +2024-09-19 05:36:01,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=601220.0, ans=0.1 +2024-09-19 05:36:04,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=601220.0, ans=0.1 +2024-09-19 05:36:24,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=601300.0, ans=0.125 +2024-09-19 05:36:26,135 INFO [train.py:1198] (1/2) Epoch 34, batch 1000, loss[loss=0.2243, ctc_loss=0.1051, cr_loss=0.3403, attn_decoder_loss=0.2299, over 29509.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1182, cr_loss=0.3601, attn_decoder_loss=0.2418, over 5738458.29 frames. ], batch size: 77, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:36:32,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=601300.0, ans=0.0 +2024-09-19 05:36:41,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=601340.0, ans=0.0 +2024-09-19 05:36:42,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=601340.0, ans=0.125 +2024-09-19 05:37:04,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=601380.0, ans=0.125 +2024-09-19 05:37:08,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=601380.0, ans=0.125 +2024-09-19 05:37:13,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=601420.0, ans=0.09899494936611666 +2024-09-19 05:37:14,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=601420.0, ans=0.0 +2024-09-19 05:37:17,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=601420.0, ans=0.125 +2024-09-19 05:37:26,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=601460.0, ans=0.125 +2024-09-19 05:37:32,489 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.521e+01 9.169e+01 9.649e+01 1.531e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-19 05:37:35,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=601460.0, ans=0.125 +2024-09-19 05:37:41,620 INFO [train.py:1198] (1/2) Epoch 34, batch 1050, loss[loss=0.2433, ctc_loss=0.1185, cr_loss=0.3637, attn_decoder_loss=0.249, over 29672.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1181, cr_loss=0.3602, attn_decoder_loss=0.241, over 5746146.65 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:38:10,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=601580.0, ans=0.1 +2024-09-19 05:38:20,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=601580.0, ans=0.125 +2024-09-19 05:38:35,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=601620.0, ans=0.05 +2024-09-19 05:38:51,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.54 vs. limit=10.0 +2024-09-19 05:38:58,092 INFO [train.py:1198] (1/2) Epoch 34, batch 1100, loss[loss=0.2355, ctc_loss=0.1167, cr_loss=0.3485, attn_decoder_loss=0.2409, over 29482.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1181, cr_loss=0.3598, attn_decoder_loss=0.241, over 5757897.53 frames. ], batch size: 78, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:39:07,956 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.57 vs. limit=22.5 +2024-09-19 05:39:11,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=601740.0, ans=0.1 +2024-09-19 05:39:21,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=601740.0, ans=0.125 +2024-09-19 05:39:24,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=601740.0, ans=0.07 +2024-09-19 05:39:36,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=601780.0, ans=0.125 +2024-09-19 05:39:39,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=601780.0, ans=0.0 +2024-09-19 05:39:43,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=601780.0, ans=0.025 +2024-09-19 05:40:04,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=601860.0, ans=0.125 +2024-09-19 05:40:06,785 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.458e+01 9.005e+01 9.723e+01 2.492e+02, threshold=1.801e+02, percent-clipped=1.0 +2024-09-19 05:40:18,194 INFO [train.py:1198] (1/2) Epoch 34, batch 1150, loss[loss=0.2359, ctc_loss=0.119, cr_loss=0.373, attn_decoder_loss=0.2406, over 29452.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1181, cr_loss=0.3598, attn_decoder_loss=0.2412, over 5756226.25 frames. ], batch size: 78, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:40:22,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.26 vs. limit=22.5 +2024-09-19 05:40:36,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=601940.0, ans=0.2 +2024-09-19 05:40:53,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=601980.0, ans=0.125 +2024-09-19 05:40:59,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=601980.0, ans=0.125 +2024-09-19 05:41:08,853 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.65 vs. limit=22.5 +2024-09-19 05:41:21,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=602060.0, ans=0.125 +2024-09-19 05:41:33,777 INFO [train.py:1198] (1/2) Epoch 34, batch 1200, loss[loss=0.2495, ctc_loss=0.1279, cr_loss=0.3911, attn_decoder_loss=0.2543, over 29685.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1185, cr_loss=0.3612, attn_decoder_loss=0.2416, over 5748633.20 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:41:44,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=602100.0, ans=0.07 +2024-09-19 05:42:09,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=602180.0, ans=0.125 +2024-09-19 05:42:12,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=602180.0, ans=0.1 +2024-09-19 05:42:12,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=602180.0, ans=0.125 +2024-09-19 05:42:41,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=602260.0, ans=0.0 +2024-09-19 05:42:42,361 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.436e+01 8.575e+01 9.202e+01 9.867e+01 2.398e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-19 05:42:49,890 INFO [train.py:1198] (1/2) Epoch 34, batch 1250, loss[loss=0.2523, ctc_loss=0.1318, cr_loss=0.4001, attn_decoder_loss=0.2568, over 29532.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1188, cr_loss=0.3623, attn_decoder_loss=0.2422, over 5775813.13 frames. ], batch size: 92, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:43:43,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=602420.0, ans=0.1 +2024-09-19 05:44:10,634 INFO [train.py:1198] (1/2) Epoch 34, batch 1300, loss[loss=0.2369, ctc_loss=0.1102, cr_loss=0.3309, attn_decoder_loss=0.2436, over 28228.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1183, cr_loss=0.3609, attn_decoder_loss=0.2418, over 5779305.66 frames. ], batch size: 111, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:44:10,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=602500.0, ans=0.125 +2024-09-19 05:44:17,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.62 vs. limit=22.5 +2024-09-19 05:44:18,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=602500.0, ans=0.0 +2024-09-19 05:44:24,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=602540.0, ans=0.0 +2024-09-19 05:44:26,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.27 vs. limit=6.0 +2024-09-19 05:44:47,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=602580.0, ans=0.07 +2024-09-19 05:44:57,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=602620.0, ans=0.125 +2024-09-19 05:45:01,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.72 vs. limit=15.0 +2024-09-19 05:45:09,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=602660.0, ans=0.125 +2024-09-19 05:45:11,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=602660.0, ans=0.0 +2024-09-19 05:45:18,893 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.383e+01 8.885e+01 9.572e+01 2.098e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 05:45:19,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=602660.0, ans=0.125 +2024-09-19 05:45:19,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=4.10 vs. limit=15.0 +2024-09-19 05:45:26,507 INFO [train.py:1198] (1/2) Epoch 34, batch 1350, loss[loss=0.2312, ctc_loss=0.1096, cr_loss=0.34, attn_decoder_loss=0.2372, over 29740.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1176, cr_loss=0.3596, attn_decoder_loss=0.2414, over 5795621.37 frames. ], batch size: 81, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:45:26,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:32,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:38,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=602700.0, ans=0.125 +2024-09-19 05:45:43,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=602740.0, ans=0.1 +2024-09-19 05:45:47,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=602740.0, ans=0.125 +2024-09-19 05:45:58,924 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.93 vs. limit=22.5 +2024-09-19 05:46:04,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=602780.0, ans=0.1 +2024-09-19 05:46:26,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=602860.0, ans=0.1 +2024-09-19 05:46:37,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=602860.0, ans=0.0 +2024-09-19 05:46:41,836 INFO [train.py:1198] (1/2) Epoch 34, batch 1400, loss[loss=0.2171, ctc_loss=0.1063, cr_loss=0.3388, attn_decoder_loss=0.2219, over 29579.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3604, attn_decoder_loss=0.2415, over 5806934.97 frames. ], batch size: 69, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:46:46,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=602900.0, ans=0.0 +2024-09-19 05:47:00,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=602940.0, ans=0.0 +2024-09-19 05:47:00,572 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.83 vs. limit=15.0 +2024-09-19 05:47:35,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=603020.0, ans=0.0 +2024-09-19 05:47:49,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=603060.0, ans=0.0 +2024-09-19 05:47:51,984 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.415e+01 9.038e+01 9.472e+01 1.467e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 05:47:59,661 INFO [train.py:1198] (1/2) Epoch 34, batch 1450, loss[loss=0.2576, ctc_loss=0.1324, cr_loss=0.3831, attn_decoder_loss=0.263, over 29441.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1188, cr_loss=0.3617, attn_decoder_loss=0.2423, over 5804909.11 frames. ], batch size: 94, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:48:11,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=603100.0, ans=0.125 +2024-09-19 05:48:17,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=603140.0, ans=0.04949747468305833 +2024-09-19 05:48:17,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=603140.0, ans=0.0 +2024-09-19 05:48:31,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.41 vs. limit=6.0 +2024-09-19 05:48:35,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=603180.0, ans=0.2 +2024-09-19 05:48:40,909 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.42 vs. limit=15.0 +2024-09-19 05:48:42,416 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.10 vs. limit=12.0 +2024-09-19 05:48:43,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=603180.0, ans=0.0 +2024-09-19 05:48:46,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.92 vs. limit=15.0 +2024-09-19 05:48:50,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=603220.0, ans=0.125 +2024-09-19 05:48:58,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.43 vs. limit=15.0 +2024-09-19 05:49:03,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.58 vs. limit=6.0 +2024-09-19 05:49:04,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=603260.0, ans=0.04949747468305833 +2024-09-19 05:49:13,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=603260.0, ans=0.2 +2024-09-19 05:49:17,714 INFO [train.py:1198] (1/2) Epoch 34, batch 1500, loss[loss=0.2546, ctc_loss=0.1284, cr_loss=0.3906, attn_decoder_loss=0.2599, over 29628.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.1185, cr_loss=0.3615, attn_decoder_loss=0.2424, over 5805345.54 frames. ], batch size: 86, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:49:27,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=603300.0, ans=0.125 +2024-09-19 05:49:50,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=603380.0, ans=0.1 +2024-09-19 05:49:59,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=603380.0, ans=0.05 +2024-09-19 05:50:23,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=603460.0, ans=0.07 +2024-09-19 05:50:26,557 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.633e+01 8.541e+01 9.102e+01 9.733e+01 3.230e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 05:50:34,071 INFO [train.py:1198] (1/2) Epoch 34, batch 1550, loss[loss=0.2566, ctc_loss=0.1401, cr_loss=0.413, attn_decoder_loss=0.2603, over 29514.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.119, cr_loss=0.362, attn_decoder_loss=0.2426, over 5781548.46 frames. ], batch size: 90, lr: 3.26e-03, grad_scale: 8.0 +2024-09-19 05:51:12,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=603580.0, ans=0.0 +2024-09-19 05:51:30,041 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=13.03 vs. limit=15.0 +2024-09-19 05:51:37,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.07 vs. limit=15.0 +2024-09-19 05:51:39,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=603660.0, ans=0.0 +2024-09-19 05:51:53,947 INFO [train.py:1198] (1/2) Epoch 34, batch 1600, loss[loss=0.2469, ctc_loss=0.1222, cr_loss=0.3653, attn_decoder_loss=0.2527, over 29675.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1186, cr_loss=0.3609, attn_decoder_loss=0.242, over 5763839.07 frames. ], batch size: 85, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:51:54,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=603700.0, ans=0.5 +2024-09-19 05:52:04,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=603700.0, ans=0.125 +2024-09-19 05:53:01,945 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.513e+01 8.541e+01 8.929e+01 9.524e+01 1.976e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 05:53:09,385 INFO [train.py:1198] (1/2) Epoch 34, batch 1650, loss[loss=0.2363, ctc_loss=0.1126, cr_loss=0.3552, attn_decoder_loss=0.2422, over 29691.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.118, cr_loss=0.3599, attn_decoder_loss=0.2415, over 5759327.51 frames. ], batch size: 89, lr: 3.26e-03, grad_scale: 16.0 +2024-09-19 05:53:19,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=603900.0, ans=0.125 +2024-09-19 05:53:21,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=603900.0, ans=0.125 +2024-09-19 05:53:22,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.23 vs. limit=15.0 +2024-09-19 05:53:36,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.37 vs. limit=12.0 +2024-09-19 05:53:42,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.94 vs. limit=22.5 +2024-09-19 05:53:53,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=603980.0, ans=0.09899494936611666 +2024-09-19 05:53:53,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=603980.0, ans=0.125 +2024-09-19 05:54:00,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=604020.0, ans=0.1 +2024-09-19 05:54:03,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=604020.0, ans=0.125 +2024-09-19 05:54:14,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=604060.0, ans=0.125 +2024-09-19 05:54:20,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=604060.0, ans=0.2 +2024-09-19 05:54:22,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=604060.0, ans=0.125 +2024-09-19 05:54:25,724 INFO [train.py:1198] (1/2) Epoch 34, batch 1700, loss[loss=0.2117, ctc_loss=0.09915, cr_loss=0.3268, attn_decoder_loss=0.2169, over 29566.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1175, cr_loss=0.359, attn_decoder_loss=0.2412, over 5781261.16 frames. ], batch size: 69, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:54:27,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=604100.0, ans=0.0 +2024-09-19 05:55:35,943 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.641e+01 8.515e+01 9.078e+01 9.556e+01 1.170e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 05:55:42,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=604260.0, ans=0.1 +2024-09-19 05:55:45,691 INFO [train.py:1198] (1/2) Epoch 34, batch 1750, loss[loss=0.2022, ctc_loss=0.0873, cr_loss=0.2756, attn_decoder_loss=0.2088, over 29357.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1175, cr_loss=0.3591, attn_decoder_loss=0.2411, over 5788194.93 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:55:50,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=604300.0, ans=0.125 +2024-09-19 05:55:54,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.63 vs. limit=15.0 +2024-09-19 05:55:54,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.16 vs. limit=10.0 +2024-09-19 05:55:58,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=604300.0, ans=0.125 +2024-09-19 05:56:11,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=604340.0, ans=0.0 +2024-09-19 05:56:51,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.94 vs. limit=12.0 +2024-09-19 05:57:00,873 INFO [train.py:1198] (1/2) Epoch 34, batch 1800, loss[loss=0.2429, ctc_loss=0.1268, cr_loss=0.3829, attn_decoder_loss=0.2473, over 29705.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1176, cr_loss=0.359, attn_decoder_loss=0.2412, over 5790577.32 frames. ], batch size: 83, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:57:16,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=604540.0, ans=0.07 +2024-09-19 05:57:20,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=604540.0, ans=0.025 +2024-09-19 05:57:45,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=604620.0, ans=0.2 +2024-09-19 05:57:48,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=604620.0, ans=0.2 +2024-09-19 05:57:50,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=604620.0, ans=0.1 +2024-09-19 05:57:52,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=10.01 vs. limit=10.0 +2024-09-19 05:57:58,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.04 vs. limit=15.0 +2024-09-19 05:58:03,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=604660.0, ans=0.0 +2024-09-19 05:58:09,165 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.032e+01 8.453e+01 8.879e+01 9.546e+01 1.316e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 05:58:12,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=604660.0, ans=0.1 +2024-09-19 05:58:15,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=604700.0, ans=0.1 +2024-09-19 05:58:16,923 INFO [train.py:1198] (1/2) Epoch 34, batch 1850, loss[loss=0.2354, ctc_loss=0.1119, cr_loss=0.3493, attn_decoder_loss=0.2413, over 29621.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1171, cr_loss=0.3582, attn_decoder_loss=0.2407, over 5797406.55 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:58:35,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=604740.0, ans=0.125 +2024-09-19 05:58:41,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=604740.0, ans=0.125 +2024-09-19 05:58:53,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=604780.0, ans=0.025 +2024-09-19 05:59:13,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=604820.0, ans=0.0 +2024-09-19 05:59:24,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=604860.0, ans=0.1 +2024-09-19 05:59:36,986 INFO [train.py:1198] (1/2) Epoch 34, batch 1900, loss[loss=0.2385, ctc_loss=0.1118, cr_loss=0.3353, attn_decoder_loss=0.2451, over 29710.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1172, cr_loss=0.3586, attn_decoder_loss=0.2415, over 5805278.36 frames. ], batch size: 89, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 05:59:42,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.02 vs. limit=22.5 +2024-09-19 05:59:58,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=604940.0, ans=0.125 +2024-09-19 06:00:03,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.22 vs. limit=15.0 +2024-09-19 06:00:04,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=604940.0, ans=0.0 +2024-09-19 06:00:24,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=605020.0, ans=0.125 +2024-09-19 06:00:24,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=605020.0, ans=0.1 +2024-09-19 06:00:24,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=605020.0, ans=0.125 +2024-09-19 06:00:26,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.19 vs. limit=22.5 +2024-09-19 06:00:33,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=605020.0, ans=0.1 +2024-09-19 06:00:42,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=605060.0, ans=0.125 +2024-09-19 06:00:44,600 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=7.35 vs. limit=15.0 +2024-09-19 06:00:46,819 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.799e+01 9.191e+01 9.672e+01 1.531e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-19 06:00:48,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=605060.0, ans=0.125 +2024-09-19 06:00:52,900 INFO [train.py:1198] (1/2) Epoch 34, batch 1950, loss[loss=0.2294, ctc_loss=0.1098, cr_loss=0.339, attn_decoder_loss=0.2351, over 29443.00 frames. ], tot_loss[loss=0.2373, ctc_loss=0.118, cr_loss=0.3601, attn_decoder_loss=0.2425, over 5819860.62 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:01:02,486 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:01:09,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=605140.0, ans=0.0 +2024-09-19 06:01:20,603 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.78 vs. limit=15.0 +2024-09-19 06:01:46,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=605220.0, ans=0.125 +2024-09-19 06:01:48,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=605220.0, ans=0.125 +2024-09-19 06:02:08,568 INFO [train.py:1198] (1/2) Epoch 34, batch 2000, loss[loss=0.2149, ctc_loss=0.1006, cr_loss=0.3282, attn_decoder_loss=0.2203, over 29345.00 frames. ], tot_loss[loss=0.2379, ctc_loss=0.1186, cr_loss=0.3617, attn_decoder_loss=0.2431, over 5798558.73 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:02:24,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=605340.0, ans=0.1 +2024-09-19 06:02:24,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=605340.0, ans=0.125 +2024-09-19 06:02:51,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=605380.0, ans=0.0 +2024-09-19 06:02:59,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.43 vs. limit=22.5 +2024-09-19 06:03:16,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=605460.0, ans=0.0 +2024-09-19 06:03:17,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=605460.0, ans=10.0 +2024-09-19 06:03:22,869 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.535e+01 9.098e+01 9.559e+01 2.375e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 06:03:28,940 INFO [train.py:1198] (1/2) Epoch 34, batch 2050, loss[loss=0.2126, ctc_loss=0.0959, cr_loss=0.3217, attn_decoder_loss=0.2185, over 29412.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1177, cr_loss=0.3598, attn_decoder_loss=0.2419, over 5790231.70 frames. ], batch size: 70, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:03:39,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=605500.0, ans=0.1 +2024-09-19 06:03:42,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=605540.0, ans=0.125 +2024-09-19 06:03:55,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=15.0 +2024-09-19 06:03:58,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.88 vs. limit=22.5 +2024-09-19 06:04:00,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=605580.0, ans=0.125 +2024-09-19 06:04:02,751 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.64 vs. limit=15.0 +2024-09-19 06:04:37,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=605660.0, ans=0.0 +2024-09-19 06:04:44,661 INFO [train.py:1198] (1/2) Epoch 34, batch 2100, loss[loss=0.2248, ctc_loss=0.1038, cr_loss=0.3289, attn_decoder_loss=0.2309, over 29759.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1174, cr_loss=0.3595, attn_decoder_loss=0.2413, over 5802078.71 frames. ], batch size: 81, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:04:45,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten.whitening_limit, batch_count=605700.0, ans=15.0 +2024-09-19 06:04:49,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=605700.0, ans=0.025 +2024-09-19 06:04:52,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=605700.0, ans=0.125 +2024-09-19 06:05:07,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=605740.0, ans=0.0 +2024-09-19 06:05:07,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=605740.0, ans=0.2 +2024-09-19 06:05:20,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=605780.0, ans=0.1 +2024-09-19 06:05:53,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.782e+01 8.705e+01 9.050e+01 9.610e+01 1.138e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 06:05:59,956 INFO [train.py:1198] (1/2) Epoch 34, batch 2150, loss[loss=0.2355, ctc_loss=0.1168, cr_loss=0.3579, attn_decoder_loss=0.2408, over 29452.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1171, cr_loss=0.3588, attn_decoder_loss=0.241, over 5817073.37 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:06:09,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=605900.0, ans=0.0 +2024-09-19 06:06:38,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=605980.0, ans=0.125 +2024-09-19 06:06:39,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=605980.0, ans=0.125 +2024-09-19 06:06:46,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.whiten.whitening_limit, batch_count=606020.0, ans=12.0 +2024-09-19 06:07:14,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=606100.0, ans=0.125 +2024-09-19 06:07:17,779 INFO [train.py:1198] (1/2) Epoch 34, batch 2200, loss[loss=0.2487, ctc_loss=0.1275, cr_loss=0.3864, attn_decoder_loss=0.2536, over 29622.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1172, cr_loss=0.3588, attn_decoder_loss=0.2411, over 5811927.04 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:07:24,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=606100.0, ans=0.0 +2024-09-19 06:07:27,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.68 vs. limit=15.0 +2024-09-19 06:07:46,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=606140.0, ans=0.125 +2024-09-19 06:08:11,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=606220.0, ans=0.0 +2024-09-19 06:08:22,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=606260.0, ans=0.125 +2024-09-19 06:08:31,027 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.748e+01 9.126e+01 9.549e+01 2.332e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-19 06:08:31,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=606260.0, ans=0.125 +2024-09-19 06:08:31,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=606260.0, ans=0.125 +2024-09-19 06:08:32,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=606260.0, ans=0.0 +2024-09-19 06:08:35,725 INFO [train.py:1198] (1/2) Epoch 34, batch 2250, loss[loss=0.2238, ctc_loss=0.102, cr_loss=0.2986, attn_decoder_loss=0.2306, over 29696.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3587, attn_decoder_loss=0.2412, over 5811276.55 frames. ], batch size: 82, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:08:43,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=606300.0, ans=0.0 +2024-09-19 06:08:52,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=606340.0, ans=0.125 +2024-09-19 06:08:57,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=606340.0, ans=0.125 +2024-09-19 06:08:58,866 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:09:00,603 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.94 vs. limit=15.0 +2024-09-19 06:09:20,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.71 vs. limit=22.5 +2024-09-19 06:09:30,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=606420.0, ans=0.2 +2024-09-19 06:09:36,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=606460.0, ans=0.125 +2024-09-19 06:09:51,729 INFO [train.py:1198] (1/2) Epoch 34, batch 2300, loss[loss=0.2126, ctc_loss=0.1013, cr_loss=0.319, attn_decoder_loss=0.2179, over 29723.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.117, cr_loss=0.3582, attn_decoder_loss=0.2405, over 5800237.04 frames. ], batch size: 72, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:09:52,620 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.38 vs. limit=12.0 +2024-09-19 06:09:59,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=606500.0, ans=0.0 +2024-09-19 06:10:04,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=606500.0, ans=0.2 +2024-09-19 06:10:08,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=606540.0, ans=0.125 +2024-09-19 06:10:11,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=606540.0, ans=0.125 +2024-09-19 06:10:25,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.46 vs. limit=15.0 +2024-09-19 06:10:43,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=606620.0, ans=0.0 +2024-09-19 06:10:52,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=606660.0, ans=0.95 +2024-09-19 06:11:00,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=606660.0, ans=0.0 +2024-09-19 06:11:02,987 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.476e+01 8.516e+01 9.072e+01 9.584e+01 2.753e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 06:11:07,544 INFO [train.py:1198] (1/2) Epoch 34, batch 2350, loss[loss=0.25, ctc_loss=0.1258, cr_loss=0.3879, attn_decoder_loss=0.2551, over 29684.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1173, cr_loss=0.3589, attn_decoder_loss=0.2407, over 5805801.03 frames. ], batch size: 83, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:11:09,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=606700.0, ans=0.025 +2024-09-19 06:11:31,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=606740.0, ans=0.1 +2024-09-19 06:12:11,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.min_abs, batch_count=606860.0, ans=0.5 +2024-09-19 06:12:26,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=606900.0, ans=0.0 +2024-09-19 06:12:27,793 INFO [train.py:1198] (1/2) Epoch 34, batch 2400, loss[loss=0.2339, ctc_loss=0.1245, cr_loss=0.3849, attn_decoder_loss=0.2375, over 29539.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1176, cr_loss=0.3591, attn_decoder_loss=0.241, over 5809341.42 frames. ], batch size: 76, lr: 3.25e-03, grad_scale: 16.0 +2024-09-19 06:12:28,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=606900.0, ans=0.125 +2024-09-19 06:12:35,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=606900.0, ans=0.2 +2024-09-19 06:12:39,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.69 vs. limit=15.0 +2024-09-19 06:12:44,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=606940.0, ans=0.0 +2024-09-19 06:12:47,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=606940.0, ans=10.0 +2024-09-19 06:13:08,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=606980.0, ans=0.125 +2024-09-19 06:13:29,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=607060.0, ans=0.125 +2024-09-19 06:13:32,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.16 vs. limit=15.0 +2024-09-19 06:13:40,175 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.501e+01 8.985e+01 9.485e+01 2.487e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 06:13:43,270 INFO [train.py:1198] (1/2) Epoch 34, batch 2450, loss[loss=0.2517, ctc_loss=0.1303, cr_loss=0.3927, attn_decoder_loss=0.2564, over 29673.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1183, cr_loss=0.3608, attn_decoder_loss=0.2421, over 5785150.45 frames. ], batch size: 82, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:13:46,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=607100.0, ans=0.1 +2024-09-19 06:13:51,648 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.99 vs. limit=12.0 +2024-09-19 06:14:04,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=607140.0, ans=0.0 +2024-09-19 06:14:07,065 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.86 vs. limit=15.0 +2024-09-19 06:14:07,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=607140.0, ans=0.1 +2024-09-19 06:14:10,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=607140.0, ans=0.05 +2024-09-19 06:14:20,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.97 vs. limit=15.0 +2024-09-19 06:14:32,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=607220.0, ans=0.025 +2024-09-19 06:14:59,118 INFO [train.py:1198] (1/2) Epoch 34, batch 2500, loss[loss=0.2452, ctc_loss=0.1172, cr_loss=0.3602, attn_decoder_loss=0.2514, over 29636.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1182, cr_loss=0.3609, attn_decoder_loss=0.2419, over 5796011.38 frames. ], batch size: 86, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:15:04,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.04 vs. limit=10.0 +2024-09-19 06:15:15,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=607300.0, ans=0.1 +2024-09-19 06:15:21,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.90 vs. limit=12.0 +2024-09-19 06:15:21,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=607340.0, ans=0.1 +2024-09-19 06:15:28,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=607340.0, ans=0.0 +2024-09-19 06:15:47,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.65 vs. limit=6.0 +2024-09-19 06:15:54,137 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:16:01,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=607420.0, ans=0.125 +2024-09-19 06:16:16,475 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.449e+01 8.900e+01 9.375e+01 2.079e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 06:16:19,648 INFO [train.py:1198] (1/2) Epoch 34, batch 2550, loss[loss=0.212, ctc_loss=0.1018, cr_loss=0.3312, attn_decoder_loss=0.2169, over 29350.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1181, cr_loss=0.3608, attn_decoder_loss=0.2419, over 5798785.02 frames. ], batch size: 67, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:16:20,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys.whitening_limit, batch_count=607500.0, ans=6.0 +2024-09-19 06:16:24,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=607500.0, ans=0.125 +2024-09-19 06:16:34,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=607540.0, ans=0.125 +2024-09-19 06:17:14,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=607620.0, ans=0.125 +2024-09-19 06:17:25,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.53 vs. limit=15.0 +2024-09-19 06:17:35,395 INFO [train.py:1198] (1/2) Epoch 34, batch 2600, loss[loss=0.2286, ctc_loss=0.1144, cr_loss=0.3477, attn_decoder_loss=0.2335, over 29453.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.118, cr_loss=0.3605, attn_decoder_loss=0.2421, over 5794666.82 frames. ], batch size: 78, lr: 3.25e-03, grad_scale: 8.0 +2024-09-19 06:17:39,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.40 vs. limit=22.5 +2024-09-19 06:17:42,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.69 vs. limit=10.0 +2024-09-19 06:18:20,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=607820.0, ans=0.125 +2024-09-19 06:18:31,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=607820.0, ans=0.1 +2024-09-19 06:18:47,315 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.717e+01 9.275e+01 9.753e+01 1.560e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 06:18:50,257 INFO [train.py:1198] (1/2) Epoch 34, batch 2650, loss[loss=0.2515, ctc_loss=0.1287, cr_loss=0.3951, attn_decoder_loss=0.2563, over 29229.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1182, cr_loss=0.3607, attn_decoder_loss=0.2423, over 5801812.20 frames. ], batch size: 100, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:18:54,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.85 vs. limit=22.5 +2024-09-19 06:19:28,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=607980.0, ans=0.125 +2024-09-19 06:19:40,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=607980.0, ans=0.5 +2024-09-19 06:19:41,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.75 vs. limit=22.5 +2024-09-19 06:19:41,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=607980.0, ans=0.125 +2024-09-19 06:19:46,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=608020.0, ans=0.2 +2024-09-19 06:19:46,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.76 vs. limit=12.0 +2024-09-19 06:20:06,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 06:20:17,683 INFO [train.py:1198] (1/2) Epoch 34, batch 2700, loss[loss=0.2498, ctc_loss=0.1305, cr_loss=0.392, attn_decoder_loss=0.2543, over 29526.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1183, cr_loss=0.3607, attn_decoder_loss=0.2424, over 5797873.74 frames. ], batch size: 87, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:20:25,567 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:20:26,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.72 vs. limit=22.5 +2024-09-19 06:20:44,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-19 06:21:10,371 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.22 vs. limit=15.0 +2024-09-19 06:21:30,495 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.546e+01 9.039e+01 9.900e+01 1.946e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 06:21:33,579 INFO [train.py:1198] (1/2) Epoch 34, batch 2750, loss[loss=0.2201, ctc_loss=0.1038, cr_loss=0.3311, attn_decoder_loss=0.2257, over 29525.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1177, cr_loss=0.3598, attn_decoder_loss=0.2413, over 5795434.65 frames. ], batch size: 75, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:21:40,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=608300.0, ans=0.07 +2024-09-19 06:21:41,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=608300.0, ans=0.125 +2024-09-19 06:21:53,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=608340.0, ans=0.1 +2024-09-19 06:22:20,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=608420.0, ans=0.1 +2024-09-19 06:22:40,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=608460.0, ans=0.025 +2024-09-19 06:22:40,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=608460.0, ans=0.0 +2024-09-19 06:22:42,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=608460.0, ans=0.0 +2024-09-19 06:22:51,651 INFO [train.py:1198] (1/2) Epoch 34, batch 2800, loss[loss=0.2465, ctc_loss=0.1421, cr_loss=0.3779, attn_decoder_loss=0.2498, over 20202.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1178, cr_loss=0.3599, attn_decoder_loss=0.2411, over 5775026.47 frames. ], batch size: 213, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:22:56,730 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.27 vs. limit=22.5 +2024-09-19 06:23:00,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=608500.0, ans=0.0 +2024-09-19 06:23:10,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-19 06:23:11,414 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.10 vs. limit=15.0 +2024-09-19 06:23:13,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=608540.0, ans=0.1 +2024-09-19 06:23:16,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=608540.0, ans=0.125 +2024-09-19 06:23:16,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=608540.0, ans=0.125 +2024-09-19 06:23:17,932 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:23:29,082 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.50 vs. limit=22.5 +2024-09-19 06:23:45,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.50 vs. limit=15.0 +2024-09-19 06:23:46,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=608620.0, ans=0.125 +2024-09-19 06:23:52,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=608660.0, ans=0.125 +2024-09-19 06:24:07,383 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.476e+01 9.039e+01 9.642e+01 3.312e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 06:24:08,985 INFO [train.py:1198] (1/2) Epoch 34, batch 2850, loss[loss=0.2292, ctc_loss=0.1207, cr_loss=0.3719, attn_decoder_loss=0.233, over 29503.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1185, cr_loss=0.3608, attn_decoder_loss=0.2417, over 5761110.73 frames. ], batch size: 77, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:24:19,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=608700.0, ans=0.125 +2024-09-19 06:24:44,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.30 vs. limit=22.5 +2024-09-19 06:24:47,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=608780.0, ans=0.1 +2024-09-19 06:24:48,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.88 vs. limit=15.0 +2024-09-19 06:24:53,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=608820.0, ans=0.2 +2024-09-19 06:25:00,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=608820.0, ans=0.0 +2024-09-19 06:25:01,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=608820.0, ans=0.1 +2024-09-19 06:25:10,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=608860.0, ans=0.0 +2024-09-19 06:25:25,011 INFO [train.py:1198] (1/2) Epoch 34, batch 2900, loss[loss=0.2308, ctc_loss=0.1163, cr_loss=0.3621, attn_decoder_loss=0.2355, over 29438.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1189, cr_loss=0.362, attn_decoder_loss=0.2427, over 5786906.07 frames. ], batch size: 79, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:25:37,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=608900.0, ans=0.125 +2024-09-19 06:25:43,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=608940.0, ans=0.125 +2024-09-19 06:26:26,830 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.29 vs. limit=15.0 +2024-09-19 06:26:41,352 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.332e+01 8.811e+01 9.212e+01 1.381e+02, threshold=1.762e+02, percent-clipped=0.0 +2024-09-19 06:26:42,921 INFO [train.py:1198] (1/2) Epoch 34, batch 2950, loss[loss=0.2214, ctc_loss=0.1147, cr_loss=0.3523, attn_decoder_loss=0.2255, over 29509.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1179, cr_loss=0.3598, attn_decoder_loss=0.2414, over 5781159.45 frames. ], batch size: 75, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:27:05,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.84 vs. limit=15.0 +2024-09-19 06:27:20,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=609180.0, ans=0.1 +2024-09-19 06:27:49,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=609260.0, ans=0.0 +2024-09-19 06:27:54,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=609260.0, ans=0.125 +2024-09-19 06:27:56,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=609260.0, ans=0.0 +2024-09-19 06:28:00,848 INFO [train.py:1198] (1/2) Epoch 34, batch 3000, loss[loss=0.2371, ctc_loss=0.1175, cr_loss=0.3604, attn_decoder_loss=0.2424, over 29790.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1174, cr_loss=0.3585, attn_decoder_loss=0.241, over 5782931.43 frames. ], batch size: 81, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:28:00,848 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 06:28:13,099 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.9958, 5.1169, 4.7967, 2.7444], device='cuda:1') +2024-09-19 06:28:19,437 INFO [train.py:1230] (1/2) Epoch 34, validation: loss=0.2118, ctc_loss=0.03645, cr_loss=6.088e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 06:28:19,437 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 06:28:25,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=609300.0, ans=0.1 +2024-09-19 06:29:06,007 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.38 vs. limit=6.0 +2024-09-19 06:29:18,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=609460.0, ans=0.2 +2024-09-19 06:29:30,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=609460.0, ans=0.0 +2024-09-19 06:29:33,593 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.609e+01 9.134e+01 9.597e+01 3.076e+02, threshold=1.827e+02, percent-clipped=2.0 +2024-09-19 06:29:35,187 INFO [train.py:1198] (1/2) Epoch 34, batch 3050, loss[loss=0.222, ctc_loss=0.1109, cr_loss=0.3474, attn_decoder_loss=0.2267, over 29503.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1178, cr_loss=0.3596, attn_decoder_loss=0.2417, over 5776221.85 frames. ], batch size: 76, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:29:41,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=609500.0, ans=0.125 +2024-09-19 06:29:43,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=609500.0, ans=0.125 +2024-09-19 06:29:56,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=609540.0, ans=0.1 +2024-09-19 06:29:56,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=609540.0, ans=0.1 +2024-09-19 06:29:57,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.11 vs. limit=15.0 +2024-09-19 06:30:05,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=609580.0, ans=0.125 +2024-09-19 06:30:11,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=609580.0, ans=0.1 +2024-09-19 06:30:15,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=609580.0, ans=0.125 +2024-09-19 06:30:24,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=609620.0, ans=0.2 +2024-09-19 06:30:54,915 INFO [train.py:1198] (1/2) Epoch 34, batch 3100, loss[loss=0.2542, ctc_loss=0.13, cr_loss=0.4039, attn_decoder_loss=0.2591, over 29266.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1174, cr_loss=0.359, attn_decoder_loss=0.2414, over 5775448.83 frames. ], batch size: 100, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:31:08,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=609740.0, ans=0.0 +2024-09-19 06:31:26,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=609780.0, ans=0.125 +2024-09-19 06:31:40,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.94 vs. limit=15.0 +2024-09-19 06:31:49,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=609820.0, ans=0.125 +2024-09-19 06:32:03,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=609860.0, ans=0.125 +2024-09-19 06:32:09,148 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.390e+01 8.517e+01 8.968e+01 9.546e+01 1.931e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 06:32:10,677 INFO [train.py:1198] (1/2) Epoch 34, batch 3150, loss[loss=0.2494, ctc_loss=0.1261, cr_loss=0.3813, attn_decoder_loss=0.2546, over 28814.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1177, cr_loss=0.3595, attn_decoder_loss=0.2416, over 5782628.97 frames. ], batch size: 104, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:32:22,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=609900.0, ans=0.125 +2024-09-19 06:32:30,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=609940.0, ans=0.1 +2024-09-19 06:32:36,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=609940.0, ans=0.0 +2024-09-19 06:32:36,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=609940.0, ans=0.125 +2024-09-19 06:33:07,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=610020.0, ans=0.2 +2024-09-19 06:33:25,797 INFO [train.py:1198] (1/2) Epoch 34, batch 3200, loss[loss=0.2352, ctc_loss=0.1215, cr_loss=0.3704, attn_decoder_loss=0.2396, over 29413.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1171, cr_loss=0.3583, attn_decoder_loss=0.241, over 5793451.65 frames. ], batch size: 79, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:33:26,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=610100.0, ans=0.2 +2024-09-19 06:33:32,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=610100.0, ans=0.125 +2024-09-19 06:33:35,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=610100.0, ans=0.0 +2024-09-19 06:33:55,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=610180.0, ans=0.125 +2024-09-19 06:33:56,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=610180.0, ans=0.125 +2024-09-19 06:34:02,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=610180.0, ans=0.125 +2024-09-19 06:34:08,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=610180.0, ans=0.1 +2024-09-19 06:34:27,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=610260.0, ans=0.125 +2024-09-19 06:34:32,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=610260.0, ans=0.0 +2024-09-19 06:34:42,402 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.785e+01 8.499e+01 9.052e+01 9.605e+01 1.287e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 06:34:43,878 INFO [train.py:1198] (1/2) Epoch 34, batch 3250, loss[loss=0.2373, ctc_loss=0.1134, cr_loss=0.3365, attn_decoder_loss=0.2436, over 29705.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.117, cr_loss=0.3586, attn_decoder_loss=0.2414, over 5799825.92 frames. ], batch size: 84, lr: 3.24e-03, grad_scale: 16.0 +2024-09-19 06:34:44,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=610300.0, ans=0.125 +2024-09-19 06:34:47,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=610300.0, ans=0.0 +2024-09-19 06:35:08,932 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:36:01,924 INFO [train.py:1198] (1/2) Epoch 34, batch 3300, loss[loss=0.2428, ctc_loss=0.1179, cr_loss=0.3708, attn_decoder_loss=0.2484, over 28270.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1162, cr_loss=0.3566, attn_decoder_loss=0.2402, over 5797149.92 frames. ], batch size: 111, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:36:11,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=610500.0, ans=0.0 +2024-09-19 06:36:23,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=610540.0, ans=0.125 +2024-09-19 06:36:26,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=610540.0, ans=0.2 +2024-09-19 06:36:32,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=610580.0, ans=0.125 +2024-09-19 06:36:49,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.46 vs. limit=15.0 +2024-09-19 06:36:56,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=610620.0, ans=0.0 +2024-09-19 06:37:04,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=610660.0, ans=0.0 +2024-09-19 06:37:17,122 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.592e+01 9.077e+01 9.630e+01 2.771e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 06:37:17,148 INFO [train.py:1198] (1/2) Epoch 34, batch 3350, loss[loss=0.2519, ctc_loss=0.128, cr_loss=0.3801, attn_decoder_loss=0.2573, over 28900.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1172, cr_loss=0.358, attn_decoder_loss=0.2411, over 5774504.25 frames. ], batch size: 104, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:37:35,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=610740.0, ans=0.0 +2024-09-19 06:37:43,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=610740.0, ans=0.125 +2024-09-19 06:37:47,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=610780.0, ans=0.0 +2024-09-19 06:37:59,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=610780.0, ans=0.125 +2024-09-19 06:38:06,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=610820.0, ans=10.0 +2024-09-19 06:38:24,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=610860.0, ans=0.125 +2024-09-19 06:38:35,424 INFO [train.py:1198] (1/2) Epoch 34, batch 3400, loss[loss=0.2069, ctc_loss=0.1068, cr_loss=0.3257, attn_decoder_loss=0.2107, over 29318.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1172, cr_loss=0.3579, attn_decoder_loss=0.241, over 5767750.84 frames. ], batch size: 67, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:38:54,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=610940.0, ans=0.125 +2024-09-19 06:39:31,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=611020.0, ans=0.125 +2024-09-19 06:39:53,418 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.710e+01 9.261e+01 9.751e+01 2.657e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 06:39:53,441 INFO [train.py:1198] (1/2) Epoch 34, batch 3450, loss[loss=0.2446, ctc_loss=0.1203, cr_loss=0.3509, attn_decoder_loss=0.2506, over 28369.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1179, cr_loss=0.359, attn_decoder_loss=0.2418, over 5775900.58 frames. ], batch size: 111, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:39:55,683 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=14.98 vs. limit=22.5 +2024-09-19 06:40:04,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=611100.0, ans=0.95 +2024-09-19 06:40:19,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=611140.0, ans=0.125 +2024-09-19 06:40:54,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=611260.0, ans=0.035 +2024-09-19 06:40:57,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.12 vs. limit=15.0 +2024-09-19 06:41:09,776 INFO [train.py:1198] (1/2) Epoch 34, batch 3500, loss[loss=0.2155, ctc_loss=0.1011, cr_loss=0.3245, attn_decoder_loss=0.221, over 29319.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1174, cr_loss=0.3579, attn_decoder_loss=0.2412, over 5776607.34 frames. ], batch size: 71, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:41:10,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=611300.0, ans=0.0 +2024-09-19 06:41:11,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=611300.0, ans=0.2 +2024-09-19 06:41:19,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=611300.0, ans=0.125 +2024-09-19 06:41:22,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=611300.0, ans=0.125 +2024-09-19 06:41:24,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.36 vs. limit=15.0 +2024-09-19 06:41:33,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=5.70 vs. limit=8.0 +2024-09-19 06:41:35,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=611340.0, ans=0.125 +2024-09-19 06:41:37,581 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.35 vs. limit=15.0 +2024-09-19 06:41:48,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=611380.0, ans=0.2 +2024-09-19 06:42:09,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.23 vs. limit=15.0 +2024-09-19 06:42:10,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=611460.0, ans=0.035 +2024-09-19 06:42:19,119 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:42:26,136 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.638e+01 9.255e+01 9.995e+01 3.984e+02, threshold=1.851e+02, percent-clipped=2.0 +2024-09-19 06:42:26,158 INFO [train.py:1198] (1/2) Epoch 34, batch 3550, loss[loss=0.2414, ctc_loss=0.1159, cr_loss=0.3526, attn_decoder_loss=0.2475, over 29709.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1175, cr_loss=0.3586, attn_decoder_loss=0.2412, over 5784205.34 frames. ], batch size: 89, lr: 3.24e-03, grad_scale: 8.0 +2024-09-19 06:42:27,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=611500.0, ans=0.0 +2024-09-19 06:43:34,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=611660.0, ans=0.125 +2024-09-19 06:43:37,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.16 vs. limit=12.0 +2024-09-19 06:43:42,114 INFO [train.py:1198] (1/2) Epoch 34, batch 3600, loss[loss=0.2377, ctc_loss=0.1223, cr_loss=0.3681, attn_decoder_loss=0.2423, over 29483.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1175, cr_loss=0.3586, attn_decoder_loss=0.2413, over 5793283.39 frames. ], batch size: 77, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:43:54,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=611700.0, ans=0.0 +2024-09-19 06:44:12,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.51 vs. limit=15.0 +2024-09-19 06:44:22,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=611780.0, ans=0.0 +2024-09-19 06:44:27,792 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.19 vs. limit=22.5 +2024-09-19 06:44:39,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.59 vs. limit=15.0 +2024-09-19 06:44:56,828 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.640e+01 9.081e+01 9.603e+01 2.325e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 06:44:56,854 INFO [train.py:1198] (1/2) Epoch 34, batch 3650, loss[loss=0.259, ctc_loss=0.1402, cr_loss=0.4264, attn_decoder_loss=0.2627, over 29541.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1172, cr_loss=0.358, attn_decoder_loss=0.2409, over 5794886.38 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:44:57,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=611900.0, ans=0.0 +2024-09-19 06:45:02,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=611900.0, ans=0.0 +2024-09-19 06:45:24,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=611940.0, ans=0.125 +2024-09-19 06:45:28,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=611980.0, ans=0.0 +2024-09-19 06:45:54,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=612020.0, ans=0.125 +2024-09-19 06:45:57,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=612060.0, ans=0.0 +2024-09-19 06:46:11,765 INFO [train.py:1198] (1/2) Epoch 34, batch 3700, loss[loss=0.2476, ctc_loss=0.1178, cr_loss=0.3565, attn_decoder_loss=0.2541, over 29697.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1171, cr_loss=0.3579, attn_decoder_loss=0.2409, over 5804724.69 frames. ], batch size: 84, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:46:22,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=612100.0, ans=0.125 +2024-09-19 06:46:27,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=612140.0, ans=0.0 +2024-09-19 06:46:33,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=612140.0, ans=0.125 +2024-09-19 06:46:42,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=612180.0, ans=0.0 +2024-09-19 06:46:50,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=612180.0, ans=0.025 +2024-09-19 06:46:54,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.36 vs. limit=15.0 +2024-09-19 06:46:55,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.29 vs. limit=15.0 +2024-09-19 06:47:01,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=612220.0, ans=0.0 +2024-09-19 06:47:14,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=612260.0, ans=0.0 +2024-09-19 06:47:17,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=612260.0, ans=0.1 +2024-09-19 06:47:26,239 INFO [train.py:1198] (1/2) Epoch 34, batch 3750, loss[loss=0.2103, ctc_loss=0.09817, cr_loss=0.3268, attn_decoder_loss=0.2155, over 29345.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.117, cr_loss=0.3579, attn_decoder_loss=0.2406, over 5808242.47 frames. ], batch size: 67, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:47:27,708 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.454e+01 8.933e+01 9.373e+01 1.602e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 06:47:28,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=612300.0, ans=0.1 +2024-09-19 06:47:41,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=612340.0, ans=0.125 +2024-09-19 06:47:46,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=612340.0, ans=0.1 +2024-09-19 06:48:08,292 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:48:12,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=612420.0, ans=0.125 +2024-09-19 06:48:19,593 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.41 vs. limit=15.0 +2024-09-19 06:48:21,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=612420.0, ans=0.09899494936611666 +2024-09-19 06:48:24,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=612420.0, ans=0.025 +2024-09-19 06:48:29,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=612460.0, ans=0.125 +2024-09-19 06:48:32,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=612460.0, ans=0.125 +2024-09-19 06:48:35,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=612460.0, ans=0.0 +2024-09-19 06:48:42,285 INFO [train.py:1198] (1/2) Epoch 34, batch 3800, loss[loss=0.2447, ctc_loss=0.118, cr_loss=0.3612, attn_decoder_loss=0.2507, over 29629.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1168, cr_loss=0.3573, attn_decoder_loss=0.2405, over 5798649.47 frames. ], batch size: 86, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:48:57,995 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.44 vs. limit=15.0 +2024-09-19 06:49:05,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=612540.0, ans=0.1 +2024-09-19 06:49:30,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=612620.0, ans=0.125 +2024-09-19 06:49:31,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=612620.0, ans=0.025 +2024-09-19 06:49:37,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=612620.0, ans=0.0 +2024-09-19 06:49:39,801 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.92 vs. limit=15.0 +2024-09-19 06:49:47,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-19 06:49:52,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=612660.0, ans=0.125 +2024-09-19 06:49:58,136 INFO [train.py:1198] (1/2) Epoch 34, batch 3850, loss[loss=0.2568, ctc_loss=0.1291, cr_loss=0.3824, attn_decoder_loss=0.2625, over 29287.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1164, cr_loss=0.3568, attn_decoder_loss=0.2404, over 5811878.83 frames. ], batch size: 100, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:49:59,606 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.333e+01 8.389e+01 8.951e+01 9.412e+01 1.497e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 06:50:08,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=612700.0, ans=0.125 +2024-09-19 06:50:26,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=612780.0, ans=0.1 +2024-09-19 06:51:07,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=612860.0, ans=0.05 +2024-09-19 06:51:12,887 INFO [train.py:1198] (1/2) Epoch 34, batch 3900, loss[loss=0.2493, ctc_loss=0.1252, cr_loss=0.3761, attn_decoder_loss=0.2547, over 29626.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1167, cr_loss=0.3578, attn_decoder_loss=0.2409, over 5815795.66 frames. ], batch size: 86, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:51:24,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.83 vs. limit=15.0 +2024-09-19 06:51:44,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.60 vs. limit=22.5 +2024-09-19 06:52:03,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=613020.0, ans=0.125 +2024-09-19 06:52:12,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=613060.0, ans=0.0 +2024-09-19 06:52:22,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=613060.0, ans=0.025 +2024-09-19 06:52:26,853 INFO [train.py:1198] (1/2) Epoch 34, batch 3950, loss[loss=0.246, ctc_loss=0.1254, cr_loss=0.3877, attn_decoder_loss=0.2508, over 29491.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1167, cr_loss=0.3577, attn_decoder_loss=0.241, over 5835244.75 frames. ], batch size: 97, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:52:28,322 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.556e+01 9.009e+01 9.395e+01 1.816e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 06:52:55,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:53:42,101 INFO [train.py:1198] (1/2) Epoch 34, batch 4000, loss[loss=0.2202, ctc_loss=0.1029, cr_loss=0.3159, attn_decoder_loss=0.2262, over 29511.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1171, cr_loss=0.3585, attn_decoder_loss=0.2411, over 5812201.77 frames. ], batch size: 74, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 06:53:53,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.81 vs. limit=6.0 +2024-09-19 06:54:06,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=613340.0, ans=0.1 +2024-09-19 06:54:15,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=613380.0, ans=0.125 +2024-09-19 06:54:27,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=613420.0, ans=0.0 +2024-09-19 06:54:33,321 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.64 vs. limit=6.0 +2024-09-19 06:54:52,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=613460.0, ans=0.125 +2024-09-19 06:54:57,755 INFO [train.py:1198] (1/2) Epoch 34, batch 4050, loss[loss=0.2591, ctc_loss=0.1428, cr_loss=0.3871, attn_decoder_loss=0.2634, over 20176.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1171, cr_loss=0.3584, attn_decoder_loss=0.2411, over 5796656.19 frames. ], batch size: 209, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:55:00,718 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.471e+01 9.121e+01 9.639e+01 2.999e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 06:55:14,871 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.24 vs. limit=10.0 +2024-09-19 06:55:42,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=613620.0, ans=0.125 +2024-09-19 06:55:46,189 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.92 vs. limit=15.0 +2024-09-19 06:55:46,806 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:55:50,589 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.84 vs. limit=15.0 +2024-09-19 06:55:57,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=613660.0, ans=0.2 +2024-09-19 06:56:11,835 INFO [train.py:1198] (1/2) Epoch 34, batch 4100, loss[loss=0.2479, ctc_loss=0.1225, cr_loss=0.3595, attn_decoder_loss=0.2538, over 29523.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1174, cr_loss=0.3588, attn_decoder_loss=0.2413, over 5791668.56 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:56:28,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=613740.0, ans=0.125 +2024-09-19 06:56:41,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=613780.0, ans=0.0 +2024-09-19 06:56:43,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=613780.0, ans=0.07 +2024-09-19 06:56:50,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=613780.0, ans=0.125 +2024-09-19 06:56:52,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.96 vs. limit=22.5 +2024-09-19 06:56:54,861 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:56:56,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.46 vs. limit=15.0 +2024-09-19 06:57:02,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=613820.0, ans=0.125 +2024-09-19 06:57:05,308 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 06:57:09,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=613860.0, ans=0.2 +2024-09-19 06:57:09,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=613860.0, ans=0.125 +2024-09-19 06:57:25,789 INFO [train.py:1198] (1/2) Epoch 34, batch 4150, loss[loss=0.2314, ctc_loss=0.1182, cr_loss=0.3706, attn_decoder_loss=0.2358, over 29489.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1172, cr_loss=0.3589, attn_decoder_loss=0.2409, over 5796762.33 frames. ], batch size: 77, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:57:26,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=613900.0, ans=0.2 +2024-09-19 06:57:28,815 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.811e+01 8.506e+01 8.901e+01 9.635e+01 1.346e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 06:57:45,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.98 vs. limit=15.0 +2024-09-19 06:57:49,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=613940.0, ans=0.0 +2024-09-19 06:57:55,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=613980.0, ans=0.0 +2024-09-19 06:57:55,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=613980.0, ans=0.125 +2024-09-19 06:57:56,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=613980.0, ans=0.0 +2024-09-19 06:57:58,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=613980.0, ans=0.2 +2024-09-19 06:58:39,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=614100.0, ans=0.0 +2024-09-19 06:58:40,889 INFO [train.py:1198] (1/2) Epoch 34, batch 4200, loss[loss=0.2626, ctc_loss=0.1366, cr_loss=0.4162, attn_decoder_loss=0.2674, over 29527.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3594, attn_decoder_loss=0.2412, over 5798360.44 frames. ], batch size: 90, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:58:49,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.96 vs. limit=15.0 +2024-09-19 06:59:10,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.57 vs. limit=12.0 +2024-09-19 06:59:17,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=614180.0, ans=0.125 +2024-09-19 06:59:20,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=614180.0, ans=0.125 +2024-09-19 06:59:20,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=614180.0, ans=0.0 +2024-09-19 06:59:38,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=614220.0, ans=0.125 +2024-09-19 06:59:41,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=614260.0, ans=0.05 +2024-09-19 06:59:49,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.10 vs. limit=15.0 +2024-09-19 06:59:55,724 INFO [train.py:1198] (1/2) Epoch 34, batch 4250, loss[loss=0.2095, ctc_loss=0.0865, cr_loss=0.2876, attn_decoder_loss=0.2167, over 29527.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.117, cr_loss=0.3587, attn_decoder_loss=0.2413, over 5804416.47 frames. ], batch size: 74, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 06:59:58,625 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.615e+01 8.496e+01 8.853e+01 9.381e+01 2.444e+02, threshold=1.771e+02, percent-clipped=1.0 +2024-09-19 07:00:13,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.66 vs. limit=15.0 +2024-09-19 07:00:19,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=614340.0, ans=0.0 +2024-09-19 07:00:24,338 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.48 vs. limit=15.0 +2024-09-19 07:00:26,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=614380.0, ans=0.1 +2024-09-19 07:00:32,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=614380.0, ans=0.125 +2024-09-19 07:00:35,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=614380.0, ans=0.0 +2024-09-19 07:00:37,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=614380.0, ans=0.125 +2024-09-19 07:00:38,949 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:00:46,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=614420.0, ans=0.2 +2024-09-19 07:00:46,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=614420.0, ans=0.5 +2024-09-19 07:00:49,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=614420.0, ans=0.0 +2024-09-19 07:00:53,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=614460.0, ans=0.125 +2024-09-19 07:01:09,459 INFO [train.py:1198] (1/2) Epoch 34, batch 4300, loss[loss=0.2471, ctc_loss=0.1177, cr_loss=0.3672, attn_decoder_loss=0.2533, over 29535.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.117, cr_loss=0.3585, attn_decoder_loss=0.2415, over 5794345.87 frames. ], batch size: 87, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:01:29,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=614540.0, ans=0.015 +2024-09-19 07:01:41,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.90 vs. limit=22.5 +2024-09-19 07:01:44,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=614580.0, ans=0.2 +2024-09-19 07:02:00,227 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.75 vs. limit=15.0 +2024-09-19 07:02:04,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.52 vs. limit=22.5 +2024-09-19 07:02:16,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=614660.0, ans=0.0 +2024-09-19 07:02:20,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.37 vs. limit=10.0 +2024-09-19 07:02:21,951 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.60 vs. limit=22.5 +2024-09-19 07:02:22,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=614660.0, ans=0.0 +2024-09-19 07:02:25,305 INFO [train.py:1198] (1/2) Epoch 34, batch 4350, loss[loss=0.2412, ctc_loss=0.1203, cr_loss=0.369, attn_decoder_loss=0.2464, over 29489.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1199, cr_loss=0.3642, attn_decoder_loss=0.2448, over 5797183.89 frames. ], batch size: 97, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:02:28,307 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.836e+01 9.274e+01 9.839e+01 5.976e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 07:03:12,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=614820.0, ans=0.5 +2024-09-19 07:03:13,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=614820.0, ans=0.0 +2024-09-19 07:03:16,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=614820.0, ans=0.125 +2024-09-19 07:03:38,634 INFO [train.py:1198] (1/2) Epoch 34, batch 4400, loss[loss=0.249, ctc_loss=0.1305, cr_loss=0.3834, attn_decoder_loss=0.2536, over 27434.00 frames. ], tot_loss[loss=0.2417, ctc_loss=0.1212, cr_loss=0.3668, attn_decoder_loss=0.2469, over 5767514.03 frames. ], batch size: 124, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 07:03:41,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=614900.0, ans=0.125 +2024-09-19 07:04:16,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=614980.0, ans=0.125 +2024-09-19 07:04:29,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=615020.0, ans=0.1 +2024-09-19 07:04:37,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=615060.0, ans=0.125 +2024-09-19 07:04:37,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.76 vs. limit=10.0 +2024-09-19 07:04:38,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=615060.0, ans=0.125 +2024-09-19 07:04:53,549 INFO [train.py:1198] (1/2) Epoch 34, batch 4450, loss[loss=0.265, ctc_loss=0.1527, cr_loss=0.3974, attn_decoder_loss=0.2686, over 20329.00 frames. ], tot_loss[loss=0.2441, ctc_loss=0.1249, cr_loss=0.3719, attn_decoder_loss=0.2491, over 5574717.09 frames. ], batch size: 210, lr: 3.23e-03, grad_scale: 16.0 +2024-09-19 07:04:56,493 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.888e+01 9.045e+01 9.501e+01 1.052e+02 3.870e+02, threshold=1.900e+02, percent-clipped=1.0 +2024-09-19 07:05:13,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=615140.0, ans=0.0 +2024-09-19 07:05:13,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=615140.0, ans=0.125 +2024-09-19 07:05:24,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=615180.0, ans=0.07 +2024-09-19 07:05:35,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=615180.0, ans=0.2 +2024-09-19 07:05:54,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=615260.0, ans=0.2 +2024-09-19 07:06:09,428 INFO [train.py:1198] (1/2) Epoch 34, batch 4500, loss[loss=0.2607, ctc_loss=0.1516, cr_loss=0.4022, attn_decoder_loss=0.2638, over 20084.00 frames. ], tot_loss[loss=0.246, ctc_loss=0.128, cr_loss=0.3742, attn_decoder_loss=0.2508, over 5236885.40 frames. ], batch size: 209, lr: 3.23e-03, grad_scale: 8.0 +2024-09-19 07:06:32,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=615340.0, ans=0.04949747468305833 +2024-09-19 07:06:34,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=615340.0, ans=0.025 +2024-09-19 07:06:40,139 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:07:33,731 INFO [train.py:1198] (1/2) Epoch 35, batch 0, loss[loss=0.2151, ctc_loss=0.09534, cr_loss=0.3194, attn_decoder_loss=0.2214, over 29556.00 frames. ], tot_loss[loss=0.2151, ctc_loss=0.09534, cr_loss=0.3194, attn_decoder_loss=0.2214, over 29556.00 frames. ], batch size: 73, lr: 3.18e-03, grad_scale: 16.0 +2024-09-19 07:07:33,731 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 07:07:52,109 INFO [train.py:1230] (1/2) Epoch 35, validation: loss=0.2125, ctc_loss=0.03615, cr_loss=6.293e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 07:07:52,109 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 07:07:53,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.78 vs. limit=10.0 +2024-09-19 07:08:24,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=615480.0, ans=0.0 +2024-09-19 07:08:36,043 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.990e+01 1.018e+02 1.116e+02 1.176e+02 2.643e+02, threshold=2.232e+02, percent-clipped=1.0 +2024-09-19 07:09:09,392 INFO [train.py:1198] (1/2) Epoch 35, batch 50, loss[loss=0.2081, ctc_loss=0.09678, cr_loss=0.3043, attn_decoder_loss=0.2137, over 29430.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.121, cr_loss=0.3672, attn_decoder_loss=0.2431, over 1266867.30 frames. ], batch size: 70, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:09:27,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=615640.0, ans=0.125 +2024-09-19 07:09:32,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=615640.0, ans=0.0 +2024-09-19 07:09:38,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=615680.0, ans=0.025 +2024-09-19 07:10:15,003 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:10:25,305 INFO [train.py:1198] (1/2) Epoch 35, batch 100, loss[loss=0.2199, ctc_loss=0.1081, cr_loss=0.33, attn_decoder_loss=0.225, over 29542.00 frames. ], tot_loss[loss=0.2395, ctc_loss=0.1218, cr_loss=0.3684, attn_decoder_loss=0.2444, over 2251631.28 frames. ], batch size: 76, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:10:40,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=615840.0, ans=0.025 +2024-09-19 07:10:42,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.62 vs. limit=15.0 +2024-09-19 07:10:43,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=615840.0, ans=0.2 +2024-09-19 07:10:51,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=615840.0, ans=0.125 +2024-09-19 07:11:11,189 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.556e+01 9.012e+01 9.778e+01 2.155e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 07:11:11,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=615920.0, ans=0.0 +2024-09-19 07:11:15,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=615920.0, ans=0.125 +2024-09-19 07:11:15,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=615920.0, ans=0.125 +2024-09-19 07:11:23,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=615920.0, ans=0.0 +2024-09-19 07:11:23,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=615920.0, ans=0.04949747468305833 +2024-09-19 07:11:27,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-19 07:11:42,843 INFO [train.py:1198] (1/2) Epoch 35, batch 150, loss[loss=0.2146, ctc_loss=0.09813, cr_loss=0.312, attn_decoder_loss=0.2206, over 29434.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1183, cr_loss=0.3611, attn_decoder_loss=0.2417, over 3045776.08 frames. ], batch size: 70, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:11:57,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=616000.0, ans=0.125 +2024-09-19 07:11:59,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=616040.0, ans=0.1 +2024-09-19 07:12:24,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=616080.0, ans=0.1 +2024-09-19 07:12:30,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=616120.0, ans=0.125 +2024-09-19 07:12:36,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=616120.0, ans=0.125 +2024-09-19 07:12:58,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.08 vs. limit=15.0 +2024-09-19 07:13:00,664 INFO [train.py:1198] (1/2) Epoch 35, batch 200, loss[loss=0.2462, ctc_loss=0.1278, cr_loss=0.3855, attn_decoder_loss=0.2508, over 27519.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1176, cr_loss=0.36, attn_decoder_loss=0.2412, over 3658757.30 frames. ], batch size: 125, lr: 3.18e-03, grad_scale: 8.0 +2024-09-19 07:13:10,967 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.66 vs. limit=6.0 +2024-09-19 07:13:32,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=616280.0, ans=0.025 +2024-09-19 07:13:40,712 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.58 vs. limit=22.5 +2024-09-19 07:13:44,290 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.322e+01 8.803e+01 9.325e+01 1.291e+02, threshold=1.761e+02, percent-clipped=0.0 +2024-09-19 07:13:46,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=616320.0, ans=0.125 +2024-09-19 07:14:10,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.53 vs. limit=10.0 +2024-09-19 07:14:11,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=616360.0, ans=0.1 +2024-09-19 07:14:15,851 INFO [train.py:1198] (1/2) Epoch 35, batch 250, loss[loss=0.2545, ctc_loss=0.1375, cr_loss=0.3921, attn_decoder_loss=0.2588, over 29273.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1169, cr_loss=0.3587, attn_decoder_loss=0.2407, over 4142255.03 frames. ], batch size: 100, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:14:16,681 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.69 vs. limit=22.5 +2024-09-19 07:14:29,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=616440.0, ans=0.125 +2024-09-19 07:14:32,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=616440.0, ans=0.125 +2024-09-19 07:14:38,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.00 vs. limit=15.0 +2024-09-19 07:14:39,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.72 vs. limit=22.5 +2024-09-19 07:14:44,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=616480.0, ans=0.125 +2024-09-19 07:15:22,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.26 vs. limit=15.0 +2024-09-19 07:15:22,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.79 vs. limit=15.0 +2024-09-19 07:15:31,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=616560.0, ans=0.0 +2024-09-19 07:15:34,116 INFO [train.py:1198] (1/2) Epoch 35, batch 300, loss[loss=0.2634, ctc_loss=0.1422, cr_loss=0.4052, attn_decoder_loss=0.2679, over 29494.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1164, cr_loss=0.3578, attn_decoder_loss=0.2405, over 4510431.00 frames. ], batch size: 92, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:15:50,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=616640.0, ans=0.5 +2024-09-19 07:16:20,345 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.277e+01 8.384e+01 8.991e+01 9.743e+01 6.934e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 07:16:31,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=616720.0, ans=0.1 +2024-09-19 07:16:52,556 INFO [train.py:1198] (1/2) Epoch 35, batch 350, loss[loss=0.2112, ctc_loss=0.09738, cr_loss=0.3028, attn_decoder_loss=0.2171, over 29324.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1165, cr_loss=0.358, attn_decoder_loss=0.2408, over 4796270.75 frames. ], batch size: 71, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:17:06,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=616840.0, ans=0.0 +2024-09-19 07:17:09,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=616840.0, ans=0.025 +2024-09-19 07:17:16,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=616840.0, ans=0.125 +2024-09-19 07:17:30,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=616880.0, ans=0.2 +2024-09-19 07:17:34,277 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.16 vs. limit=6.0 +2024-09-19 07:17:45,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=616920.0, ans=0.0 +2024-09-19 07:17:45,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.62 vs. limit=15.0 +2024-09-19 07:18:06,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=617000.0, ans=0.125 +2024-09-19 07:18:07,763 INFO [train.py:1198] (1/2) Epoch 35, batch 400, loss[loss=0.2406, ctc_loss=0.1237, cr_loss=0.3728, attn_decoder_loss=0.2453, over 29721.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.357, attn_decoder_loss=0.2404, over 5025970.85 frames. ], batch size: 82, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:18:08,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.28 vs. limit=22.5 +2024-09-19 07:18:14,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=617000.0, ans=0.125 +2024-09-19 07:18:31,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.96 vs. limit=15.0 +2024-09-19 07:18:49,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=617080.0, ans=0.125 +2024-09-19 07:18:54,451 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 8.637e+01 9.137e+01 9.905e+01 1.373e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 07:18:59,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=617120.0, ans=0.0 +2024-09-19 07:19:13,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=617160.0, ans=0.025 +2024-09-19 07:19:17,992 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.49 vs. limit=15.0 +2024-09-19 07:19:22,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=617160.0, ans=0.07 +2024-09-19 07:19:25,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=617200.0, ans=0.125 +2024-09-19 07:19:26,438 INFO [train.py:1198] (1/2) Epoch 35, batch 450, loss[loss=0.2463, ctc_loss=0.1201, cr_loss=0.3589, attn_decoder_loss=0.2523, over 29701.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1162, cr_loss=0.3573, attn_decoder_loss=0.2406, over 5189734.36 frames. ], batch size: 83, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:19:33,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.77 vs. limit=6.0 +2024-09-19 07:19:35,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=617200.0, ans=0.0 +2024-09-19 07:19:39,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.34 vs. limit=15.0 +2024-09-19 07:19:49,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.13 vs. limit=10.0 +2024-09-19 07:19:56,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.15 vs. limit=15.0 +2024-09-19 07:19:57,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.13 vs. limit=10.0 +2024-09-19 07:20:01,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.16 vs. limit=6.0 +2024-09-19 07:20:14,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=617320.0, ans=0.0 +2024-09-19 07:20:16,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=617320.0, ans=0.1 +2024-09-19 07:20:17,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=617320.0, ans=0.0 +2024-09-19 07:20:26,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=617320.0, ans=0.025 +2024-09-19 07:20:40,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=617360.0, ans=0.125 +2024-09-19 07:20:44,376 INFO [train.py:1198] (1/2) Epoch 35, batch 500, loss[loss=0.2538, ctc_loss=0.1234, cr_loss=0.3733, attn_decoder_loss=0.26, over 29465.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1161, cr_loss=0.3568, attn_decoder_loss=0.2403, over 5331872.08 frames. ], batch size: 94, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:20:56,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=617400.0, ans=0.1 +2024-09-19 07:20:57,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-19 07:21:00,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.whiten.whitening_limit, batch_count=617440.0, ans=15.0 +2024-09-19 07:21:11,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=617440.0, ans=0.125 +2024-09-19 07:21:24,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=617480.0, ans=0.125 +2024-09-19 07:21:24,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.38 vs. limit=15.0 +2024-09-19 07:21:29,906 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.448e+01 8.461e+01 8.901e+01 9.576e+01 2.460e+02, threshold=1.780e+02, percent-clipped=1.0 +2024-09-19 07:21:42,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-19 07:22:00,515 INFO [train.py:1198] (1/2) Epoch 35, batch 550, loss[loss=0.2519, ctc_loss=0.1242, cr_loss=0.3751, attn_decoder_loss=0.2578, over 28841.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1162, cr_loss=0.3571, attn_decoder_loss=0.2405, over 5425156.25 frames. ], batch size: 104, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:22:47,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=617720.0, ans=0.0 +2024-09-19 07:22:55,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.16 vs. limit=15.0 +2024-09-19 07:23:00,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=617720.0, ans=0.125 +2024-09-19 07:23:07,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=617760.0, ans=0.2 +2024-09-19 07:23:13,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.72 vs. limit=12.0 +2024-09-19 07:23:14,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=617760.0, ans=0.0 +2024-09-19 07:23:18,916 INFO [train.py:1198] (1/2) Epoch 35, batch 600, loss[loss=0.2485, ctc_loss=0.131, cr_loss=0.3748, attn_decoder_loss=0.2533, over 29255.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3573, attn_decoder_loss=0.2408, over 5512334.70 frames. ], batch size: 100, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:23:25,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.66 vs. limit=15.0 +2024-09-19 07:23:36,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=617840.0, ans=0.125 +2024-09-19 07:23:52,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=617880.0, ans=0.07 +2024-09-19 07:24:06,189 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.466e+01 8.823e+01 9.402e+01 3.791e+02, threshold=1.765e+02, percent-clipped=1.0 +2024-09-19 07:24:08,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.18 vs. limit=15.0 +2024-09-19 07:24:12,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=617920.0, ans=0.0 +2024-09-19 07:24:29,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=617960.0, ans=0.0 +2024-09-19 07:24:35,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=618000.0, ans=0.1 +2024-09-19 07:24:36,200 INFO [train.py:1198] (1/2) Epoch 35, batch 650, loss[loss=0.2331, ctc_loss=0.1108, cr_loss=0.3586, attn_decoder_loss=0.2387, over 29759.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1154, cr_loss=0.3553, attn_decoder_loss=0.2399, over 5588743.29 frames. ], batch size: 81, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:24:44,733 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.90 vs. limit=22.5 +2024-09-19 07:24:47,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=618000.0, ans=0.2 +2024-09-19 07:25:07,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=618080.0, ans=0.125 +2024-09-19 07:25:46,375 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:25:52,021 INFO [train.py:1198] (1/2) Epoch 35, batch 700, loss[loss=0.2269, ctc_loss=0.112, cr_loss=0.3519, attn_decoder_loss=0.2318, over 29533.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1163, cr_loss=0.3573, attn_decoder_loss=0.2408, over 5638789.33 frames. ], batch size: 76, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:26:22,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.33 vs. limit=22.5 +2024-09-19 07:26:33,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=618280.0, ans=0.1 +2024-09-19 07:26:34,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=618280.0, ans=0.125 +2024-09-19 07:26:37,261 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.406e+01 8.899e+01 9.421e+01 1.331e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 07:26:40,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=618320.0, ans=0.0 +2024-09-19 07:26:45,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=618320.0, ans=0.0 +2024-09-19 07:26:51,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=618360.0, ans=0.0 +2024-09-19 07:26:53,694 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.98 vs. limit=6.0 +2024-09-19 07:27:10,382 INFO [train.py:1198] (1/2) Epoch 35, batch 750, loss[loss=0.2449, ctc_loss=0.1171, cr_loss=0.3777, attn_decoder_loss=0.2507, over 29721.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1162, cr_loss=0.3573, attn_decoder_loss=0.2406, over 5678206.58 frames. ], batch size: 82, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:27:15,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=618400.0, ans=0.0 +2024-09-19 07:27:18,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.41 vs. limit=6.0 +2024-09-19 07:27:35,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=618440.0, ans=0.125 +2024-09-19 07:27:38,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=618440.0, ans=0.0 +2024-09-19 07:27:49,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=618480.0, ans=0.2 +2024-09-19 07:27:52,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=618480.0, ans=0.1 +2024-09-19 07:28:09,348 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:28:16,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=618560.0, ans=0.2 +2024-09-19 07:28:28,640 INFO [train.py:1198] (1/2) Epoch 35, batch 800, loss[loss=0.2143, ctc_loss=0.101, cr_loss=0.3195, attn_decoder_loss=0.2198, over 29563.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1159, cr_loss=0.3567, attn_decoder_loss=0.2404, over 5709174.48 frames. ], batch size: 73, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:29:00,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=618680.0, ans=0.1 +2024-09-19 07:29:00,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.60 vs. limit=15.0 +2024-09-19 07:29:15,160 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.859e+01 8.586e+01 8.985e+01 9.600e+01 2.003e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 07:29:23,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.15 vs. limit=15.0 +2024-09-19 07:29:28,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=618760.0, ans=0.0 +2024-09-19 07:29:29,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=618760.0, ans=10.0 +2024-09-19 07:29:40,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=618760.0, ans=0.125 +2024-09-19 07:29:42,406 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:29:43,495 INFO [train.py:1198] (1/2) Epoch 35, batch 850, loss[loss=0.2463, ctc_loss=0.1187, cr_loss=0.354, attn_decoder_loss=0.2526, over 29724.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1158, cr_loss=0.3567, attn_decoder_loss=0.2402, over 5737349.15 frames. ], batch size: 89, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:29:48,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=618800.0, ans=0.125 +2024-09-19 07:29:51,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=618800.0, ans=0.0 +2024-09-19 07:30:09,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=618840.0, ans=6.0 +2024-09-19 07:30:13,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=618880.0, ans=0.125 +2024-09-19 07:30:14,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.24 vs. limit=15.0 +2024-09-19 07:30:30,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=618920.0, ans=0.125 +2024-09-19 07:30:50,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=618960.0, ans=0.1 +2024-09-19 07:30:53,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=618960.0, ans=0.125 +2024-09-19 07:31:01,540 INFO [train.py:1198] (1/2) Epoch 35, batch 900, loss[loss=0.2222, ctc_loss=0.1108, cr_loss=0.3557, attn_decoder_loss=0.2266, over 29629.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.357, attn_decoder_loss=0.2404, over 5742416.40 frames. ], batch size: 73, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:31:07,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=619000.0, ans=0.05 +2024-09-19 07:31:29,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=619040.0, ans=0.125 +2024-09-19 07:31:34,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=619080.0, ans=0.1 +2024-09-19 07:31:34,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=619080.0, ans=0.125 +2024-09-19 07:31:44,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=619080.0, ans=0.125 +2024-09-19 07:31:48,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=619120.0, ans=0.2 +2024-09-19 07:31:50,718 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.514e+01 9.190e+01 1.005e+02 2.448e+02, threshold=1.838e+02, percent-clipped=2.0 +2024-09-19 07:31:54,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=619120.0, ans=0.0 +2024-09-19 07:32:12,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.80 vs. limit=12.0 +2024-09-19 07:32:19,720 INFO [train.py:1198] (1/2) Epoch 35, batch 950, loss[loss=0.2197, ctc_loss=0.09761, cr_loss=0.3115, attn_decoder_loss=0.2263, over 29520.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1162, cr_loss=0.3571, attn_decoder_loss=0.2405, over 5743551.77 frames. ], batch size: 74, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:32:24,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=619200.0, ans=0.2 +2024-09-19 07:32:33,741 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:32:35,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=619240.0, ans=0.125 +2024-09-19 07:32:43,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=619240.0, ans=0.125 +2024-09-19 07:32:50,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.45 vs. limit=15.0 +2024-09-19 07:32:56,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=619280.0, ans=0.5 +2024-09-19 07:33:00,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=619280.0, ans=0.2 +2024-09-19 07:33:01,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=619280.0, ans=0.125 +2024-09-19 07:33:04,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.43 vs. limit=15.0 +2024-09-19 07:33:22,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=619360.0, ans=0.0 +2024-09-19 07:33:26,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=619360.0, ans=0.0 +2024-09-19 07:33:34,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=619400.0, ans=0.125 +2024-09-19 07:33:35,499 INFO [train.py:1198] (1/2) Epoch 35, batch 1000, loss[loss=0.2332, ctc_loss=0.1211, cr_loss=0.3546, attn_decoder_loss=0.2378, over 29486.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.117, cr_loss=0.3582, attn_decoder_loss=0.2414, over 5738215.57 frames. ], batch size: 77, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:33:38,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=619400.0, ans=0.0 +2024-09-19 07:33:41,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=619400.0, ans=0.0 +2024-09-19 07:33:49,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=619440.0, ans=0.125 +2024-09-19 07:33:58,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=619440.0, ans=0.1 +2024-09-19 07:34:09,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=619480.0, ans=0.125 +2024-09-19 07:34:20,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=619520.0, ans=10.0 +2024-09-19 07:34:22,841 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.419e+01 8.400e+01 8.920e+01 9.804e+01 1.524e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 07:34:26,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.49 vs. limit=15.0 +2024-09-19 07:34:42,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=619560.0, ans=0.125 +2024-09-19 07:34:53,684 INFO [train.py:1198] (1/2) Epoch 35, batch 1050, loss[loss=0.2449, ctc_loss=0.1225, cr_loss=0.3706, attn_decoder_loss=0.2503, over 29696.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1164, cr_loss=0.3569, attn_decoder_loss=0.2406, over 5744757.45 frames. ], batch size: 85, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:34:53,981 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:35:04,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=619600.0, ans=0.125 +2024-09-19 07:35:23,732 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:35:45,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=619720.0, ans=0.07 +2024-09-19 07:35:50,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=619720.0, ans=0.2 +2024-09-19 07:35:58,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=619760.0, ans=0.0 +2024-09-19 07:36:07,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=619760.0, ans=0.05 +2024-09-19 07:36:11,766 INFO [train.py:1198] (1/2) Epoch 35, batch 1100, loss[loss=0.2312, ctc_loss=0.115, cr_loss=0.3482, attn_decoder_loss=0.2364, over 29439.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1163, cr_loss=0.3568, attn_decoder_loss=0.2405, over 5757465.56 frames. ], batch size: 78, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:36:27,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=619840.0, ans=0.125 +2024-09-19 07:36:33,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=619840.0, ans=0.0 +2024-09-19 07:36:36,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=619840.0, ans=0.2 +2024-09-19 07:36:47,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.93 vs. limit=12.0 +2024-09-19 07:36:56,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=619920.0, ans=0.125 +2024-09-19 07:36:58,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=619920.0, ans=0.2 +2024-09-19 07:36:59,138 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.311e+01 8.442e+01 8.888e+01 9.490e+01 5.357e+02, threshold=1.778e+02, percent-clipped=1.0 +2024-09-19 07:37:28,429 INFO [train.py:1198] (1/2) Epoch 35, batch 1150, loss[loss=0.2357, ctc_loss=0.1132, cr_loss=0.3413, attn_decoder_loss=0.2417, over 29472.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1165, cr_loss=0.3574, attn_decoder_loss=0.2405, over 5754123.82 frames. ], batch size: 78, lr: 3.17e-03, grad_scale: 8.0 +2024-09-19 07:37:42,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=620040.0, ans=0.125 +2024-09-19 07:37:46,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.74 vs. limit=15.0 +2024-09-19 07:37:51,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=620040.0, ans=0.05 +2024-09-19 07:38:46,890 INFO [train.py:1198] (1/2) Epoch 35, batch 1200, loss[loss=0.2347, ctc_loss=0.1118, cr_loss=0.3294, attn_decoder_loss=0.2411, over 29669.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1167, cr_loss=0.3578, attn_decoder_loss=0.2412, over 5745658.13 frames. ], batch size: 85, lr: 3.17e-03, grad_scale: 16.0 +2024-09-19 07:38:57,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=620200.0, ans=0.125 +2024-09-19 07:39:08,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=620240.0, ans=0.125 +2024-09-19 07:39:12,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=620240.0, ans=0.0 +2024-09-19 07:39:35,881 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.532e+01 9.165e+01 9.750e+01 1.443e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-19 07:39:39,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=620320.0, ans=0.125 +2024-09-19 07:39:47,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten.whitening_limit, batch_count=620320.0, ans=22.5 +2024-09-19 07:39:58,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=620360.0, ans=0.125 +2024-09-19 07:40:01,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=620360.0, ans=0.125 +2024-09-19 07:40:03,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.05 vs. limit=15.0 +2024-09-19 07:40:04,437 INFO [train.py:1198] (1/2) Epoch 35, batch 1250, loss[loss=0.2492, ctc_loss=0.1251, cr_loss=0.3851, attn_decoder_loss=0.2544, over 29541.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1169, cr_loss=0.3588, attn_decoder_loss=0.2415, over 5773341.53 frames. ], batch size: 92, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:40:04,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=620400.0, ans=0.0 +2024-09-19 07:40:26,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=620440.0, ans=0.1 +2024-09-19 07:40:36,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=620480.0, ans=0.125 +2024-09-19 07:40:50,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=620520.0, ans=0.125 +2024-09-19 07:40:57,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=620520.0, ans=0.1 +2024-09-19 07:41:05,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=620560.0, ans=0.05 +2024-09-19 07:41:20,150 INFO [train.py:1198] (1/2) Epoch 35, batch 1300, loss[loss=0.2423, ctc_loss=0.1146, cr_loss=0.3504, attn_decoder_loss=0.2487, over 28317.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1163, cr_loss=0.3578, attn_decoder_loss=0.2408, over 5777949.52 frames. ], batch size: 111, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:41:26,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=620600.0, ans=0.125 +2024-09-19 07:41:26,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=620600.0, ans=0.125 +2024-09-19 07:41:35,769 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:41:37,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=620640.0, ans=0.025 +2024-09-19 07:41:41,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=620640.0, ans=0.125 +2024-09-19 07:41:47,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=620640.0, ans=0.125 +2024-09-19 07:41:47,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=620640.0, ans=0.125 +2024-09-19 07:42:08,927 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.419e+01 8.887e+01 9.525e+01 1.443e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-19 07:42:16,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=620720.0, ans=0.2 +2024-09-19 07:42:36,591 INFO [train.py:1198] (1/2) Epoch 35, batch 1350, loss[loss=0.2413, ctc_loss=0.1212, cr_loss=0.3742, attn_decoder_loss=0.2463, over 29735.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3573, attn_decoder_loss=0.2404, over 5794957.72 frames. ], batch size: 81, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:42:36,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=620800.0, ans=0.125 +2024-09-19 07:42:41,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=620800.0, ans=0.125 +2024-09-19 07:42:44,444 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 07:43:07,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.55 vs. limit=22.5 +2024-09-19 07:43:09,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=620880.0, ans=0.0 +2024-09-19 07:43:09,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=620880.0, ans=0.1 +2024-09-19 07:43:14,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=620880.0, ans=0.125 +2024-09-19 07:43:21,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=620880.0, ans=0.0 +2024-09-19 07:43:39,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=620960.0, ans=0.125 +2024-09-19 07:43:56,462 INFO [train.py:1198] (1/2) Epoch 35, batch 1400, loss[loss=0.2078, ctc_loss=0.09964, cr_loss=0.3178, attn_decoder_loss=0.2127, over 29570.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1161, cr_loss=0.3576, attn_decoder_loss=0.2401, over 5807186.84 frames. ], batch size: 69, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:44:04,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=621000.0, ans=0.125 +2024-09-19 07:44:44,723 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.569e+01 8.443e+01 9.009e+01 9.628e+01 2.334e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 07:44:49,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=621120.0, ans=0.125 +2024-09-19 07:44:58,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=621160.0, ans=0.0 +2024-09-19 07:45:09,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=621160.0, ans=0.05 +2024-09-19 07:45:11,967 INFO [train.py:1198] (1/2) Epoch 35, batch 1450, loss[loss=0.248, ctc_loss=0.1182, cr_loss=0.3568, attn_decoder_loss=0.2545, over 29423.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1162, cr_loss=0.3577, attn_decoder_loss=0.2405, over 5803639.55 frames. ], batch size: 94, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:45:36,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=621240.0, ans=0.125 +2024-09-19 07:45:59,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=621320.0, ans=0.95 +2024-09-19 07:46:09,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=621320.0, ans=0.0 +2024-09-19 07:46:27,759 INFO [train.py:1198] (1/2) Epoch 35, batch 1500, loss[loss=0.2498, ctc_loss=0.1174, cr_loss=0.376, attn_decoder_loss=0.2562, over 29618.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1161, cr_loss=0.3573, attn_decoder_loss=0.2408, over 5805131.34 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:46:38,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=621400.0, ans=0.1 +2024-09-19 07:47:01,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=621480.0, ans=0.125 +2024-09-19 07:47:04,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=621480.0, ans=6.0 +2024-09-19 07:47:07,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=621480.0, ans=0.1 +2024-09-19 07:47:20,907 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.458e+01 9.148e+01 9.758e+01 1.676e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 07:47:27,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=621520.0, ans=0.125 +2024-09-19 07:47:34,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=621560.0, ans=0.125 +2024-09-19 07:47:44,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=621560.0, ans=10.0 +2024-09-19 07:47:48,562 INFO [train.py:1198] (1/2) Epoch 35, batch 1550, loss[loss=0.2636, ctc_loss=0.1559, cr_loss=0.4459, attn_decoder_loss=0.2657, over 29530.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1167, cr_loss=0.3581, attn_decoder_loss=0.2408, over 5780716.56 frames. ], batch size: 90, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:48:17,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=621680.0, ans=0.125 +2024-09-19 07:48:31,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=621680.0, ans=0.0 +2024-09-19 07:48:35,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=621720.0, ans=0.2 +2024-09-19 07:48:38,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=621720.0, ans=0.125 +2024-09-19 07:49:04,124 INFO [train.py:1198] (1/2) Epoch 35, batch 1600, loss[loss=0.2373, ctc_loss=0.111, cr_loss=0.3535, attn_decoder_loss=0.2435, over 29682.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.117, cr_loss=0.3585, attn_decoder_loss=0.2408, over 5765048.70 frames. ], batch size: 85, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:49:30,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=621840.0, ans=0.0 +2024-09-19 07:49:34,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=621880.0, ans=0.125 +2024-09-19 07:49:52,917 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.384e+01 8.463e+01 9.203e+01 9.882e+01 2.471e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-19 07:50:15,419 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.09 vs. limit=12.0 +2024-09-19 07:50:20,050 INFO [train.py:1198] (1/2) Epoch 35, batch 1650, loss[loss=0.2482, ctc_loss=0.1214, cr_loss=0.3605, attn_decoder_loss=0.2542, over 29715.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1163, cr_loss=0.3572, attn_decoder_loss=0.2403, over 5761454.98 frames. ], batch size: 89, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:50:20,922 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.30 vs. limit=15.0 +2024-09-19 07:50:59,686 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.23 vs. limit=15.0 +2024-09-19 07:51:01,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-19 07:51:08,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=622120.0, ans=0.125 +2024-09-19 07:51:31,344 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.82 vs. limit=15.0 +2024-09-19 07:51:39,323 INFO [train.py:1198] (1/2) Epoch 35, batch 1700, loss[loss=0.2024, ctc_loss=0.09548, cr_loss=0.3052, attn_decoder_loss=0.2075, over 29577.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.116, cr_loss=0.3566, attn_decoder_loss=0.2402, over 5780986.35 frames. ], batch size: 69, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:51:47,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=622200.0, ans=0.2 +2024-09-19 07:51:48,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=622200.0, ans=0.125 +2024-09-19 07:51:54,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=622240.0, ans=0.1 +2024-09-19 07:52:02,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=622240.0, ans=0.125 +2024-09-19 07:52:07,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.82 vs. limit=10.0 +2024-09-19 07:52:15,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.05 vs. limit=10.0 +2024-09-19 07:52:27,941 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.208e+01 8.456e+01 8.908e+01 9.428e+01 1.294e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 07:52:48,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.13 vs. limit=12.0 +2024-09-19 07:52:55,731 INFO [train.py:1198] (1/2) Epoch 35, batch 1750, loss[loss=0.2138, ctc_loss=0.105, cr_loss=0.3461, attn_decoder_loss=0.2182, over 29345.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1158, cr_loss=0.3561, attn_decoder_loss=0.2399, over 5788041.35 frames. ], batch size: 67, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:53:02,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=622400.0, ans=0.1 +2024-09-19 07:53:08,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=622400.0, ans=0.125 +2024-09-19 07:53:26,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=622480.0, ans=0.0 +2024-09-19 07:53:46,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=622520.0, ans=10.0 +2024-09-19 07:53:47,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=622520.0, ans=0.1 +2024-09-19 07:54:02,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=622560.0, ans=0.125 +2024-09-19 07:54:04,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.41 vs. limit=15.0 +2024-09-19 07:54:11,180 INFO [train.py:1198] (1/2) Epoch 35, batch 1800, loss[loss=0.253, ctc_loss=0.1351, cr_loss=0.3945, attn_decoder_loss=0.2573, over 29680.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1158, cr_loss=0.3563, attn_decoder_loss=0.2401, over 5791206.22 frames. ], batch size: 83, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:54:14,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=622600.0, ans=0.0 +2024-09-19 07:55:05,588 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.311e+01 8.892e+01 9.552e+01 1.638e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 07:55:08,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=622720.0, ans=0.0 +2024-09-19 07:55:10,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=622720.0, ans=0.1 +2024-09-19 07:55:11,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=622720.0, ans=0.025 +2024-09-19 07:55:14,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=622760.0, ans=0.125 +2024-09-19 07:55:31,273 INFO [train.py:1198] (1/2) Epoch 35, batch 1850, loss[loss=0.2481, ctc_loss=0.1252, cr_loss=0.3822, attn_decoder_loss=0.2533, over 29644.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1156, cr_loss=0.3559, attn_decoder_loss=0.2399, over 5795477.24 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:55:52,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=622840.0, ans=0.05 +2024-09-19 07:56:15,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=622920.0, ans=0.1 +2024-09-19 07:56:18,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=622920.0, ans=0.125 +2024-09-19 07:56:18,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.49 vs. limit=15.0 +2024-09-19 07:56:19,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=622920.0, ans=0.2 +2024-09-19 07:56:30,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=622960.0, ans=0.125 +2024-09-19 07:56:33,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=622960.0, ans=0.125 +2024-09-19 07:56:46,597 INFO [train.py:1198] (1/2) Epoch 35, batch 1900, loss[loss=0.2414, ctc_loss=0.1252, cr_loss=0.3745, attn_decoder_loss=0.246, over 29697.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1164, cr_loss=0.3572, attn_decoder_loss=0.2408, over 5803015.37 frames. ], batch size: 89, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:56:54,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=623000.0, ans=0.0 +2024-09-19 07:57:08,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=623040.0, ans=0.125 +2024-09-19 07:57:14,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=623040.0, ans=0.2 +2024-09-19 07:57:36,455 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.574e+01 9.165e+01 9.579e+01 2.044e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-19 07:57:39,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=623120.0, ans=0.0 +2024-09-19 07:57:44,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=623120.0, ans=0.125 +2024-09-19 07:57:57,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=623160.0, ans=0.125 +2024-09-19 07:57:59,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=623160.0, ans=0.0 +2024-09-19 07:58:02,542 INFO [train.py:1198] (1/2) Epoch 35, batch 1950, loss[loss=0.2269, ctc_loss=0.1103, cr_loss=0.3395, attn_decoder_loss=0.2323, over 29459.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1173, cr_loss=0.3593, attn_decoder_loss=0.242, over 5818065.90 frames. ], batch size: 78, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 07:58:35,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=623280.0, ans=0.2 +2024-09-19 07:58:43,886 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.98 vs. limit=15.0 +2024-09-19 07:59:07,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=623360.0, ans=0.125 +2024-09-19 07:59:20,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=623400.0, ans=0.125 +2024-09-19 07:59:22,128 INFO [train.py:1198] (1/2) Epoch 35, batch 2000, loss[loss=0.2094, ctc_loss=0.1044, cr_loss=0.3507, attn_decoder_loss=0.2133, over 29352.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1175, cr_loss=0.3603, attn_decoder_loss=0.2422, over 5798060.81 frames. ], batch size: 67, lr: 3.16e-03, grad_scale: 16.0 +2024-09-19 07:59:26,132 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.22 vs. limit=15.0 +2024-09-19 07:59:54,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=623480.0, ans=0.0 +2024-09-19 08:00:09,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=623520.0, ans=0.0 +2024-09-19 08:00:13,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.71 vs. limit=15.0 +2024-09-19 08:00:13,529 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.517e+01 9.042e+01 9.652e+01 2.863e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 08:00:33,334 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:00:37,513 INFO [train.py:1198] (1/2) Epoch 35, batch 2050, loss[loss=0.2113, ctc_loss=0.1014, cr_loss=0.3261, attn_decoder_loss=0.2163, over 29458.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.117, cr_loss=0.3585, attn_decoder_loss=0.2414, over 5789228.31 frames. ], batch size: 70, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:01:04,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.37 vs. limit=10.0 +2024-09-19 08:01:10,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=623680.0, ans=0.125 +2024-09-19 08:01:15,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-19 08:01:17,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.10 vs. limit=15.0 +2024-09-19 08:01:47,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=623760.0, ans=0.125 +2024-09-19 08:01:50,703 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:01:53,588 INFO [train.py:1198] (1/2) Epoch 35, batch 2100, loss[loss=0.2303, ctc_loss=0.1098, cr_loss=0.3484, attn_decoder_loss=0.2359, over 29732.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1168, cr_loss=0.3581, attn_decoder_loss=0.2411, over 5800736.21 frames. ], batch size: 81, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:01:55,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=623800.0, ans=0.025 +2024-09-19 08:02:43,774 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:02:49,244 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.509e+01 9.008e+01 9.603e+01 1.299e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 08:03:20,895 INFO [train.py:1198] (1/2) Epoch 35, batch 2150, loss[loss=0.2306, ctc_loss=0.1172, cr_loss=0.3548, attn_decoder_loss=0.2354, over 29435.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.116, cr_loss=0.3568, attn_decoder_loss=0.2403, over 5815795.14 frames. ], batch size: 78, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:03:22,904 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:03:25,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=624000.0, ans=0.1 +2024-09-19 08:03:37,940 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:03:42,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=624040.0, ans=0.125 +2024-09-19 08:03:42,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=624040.0, ans=0.5 +2024-09-19 08:03:53,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=624080.0, ans=0.1 +2024-09-19 08:04:13,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=624120.0, ans=0.1 +2024-09-19 08:04:32,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=624160.0, ans=0.125 +2024-09-19 08:04:36,500 INFO [train.py:1198] (1/2) Epoch 35, batch 2200, loss[loss=0.2588, ctc_loss=0.1266, cr_loss=0.3864, attn_decoder_loss=0.2649, over 29636.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1163, cr_loss=0.3569, attn_decoder_loss=0.2405, over 5811490.01 frames. ], batch size: 86, lr: 3.16e-03, grad_scale: 8.0 +2024-09-19 08:04:41,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=624200.0, ans=0.0 +2024-09-19 08:04:51,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=624240.0, ans=0.0 +2024-09-19 08:05:05,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=624280.0, ans=0.0 +2024-09-19 08:05:14,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=624280.0, ans=0.0 +2024-09-19 08:05:23,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=22.85 vs. limit=22.5 +2024-09-19 08:05:27,602 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.240e+01 8.597e+01 9.109e+01 9.743e+01 2.251e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 08:05:28,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=624320.0, ans=0.125 +2024-09-19 08:05:30,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=624320.0, ans=0.1 +2024-09-19 08:05:32,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=624320.0, ans=0.125 +2024-09-19 08:05:37,765 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.75 vs. limit=15.0 +2024-09-19 08:05:47,564 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:05:51,839 INFO [train.py:1198] (1/2) Epoch 35, batch 2250, loss[loss=0.2464, ctc_loss=0.1271, cr_loss=0.382, attn_decoder_loss=0.2511, over 29708.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1164, cr_loss=0.3568, attn_decoder_loss=0.2406, over 5810561.73 frames. ], batch size: 82, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:06:02,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=624400.0, ans=0.0 +2024-09-19 08:06:02,732 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:06:09,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.14 vs. limit=15.0 +2024-09-19 08:06:21,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=624440.0, ans=0.0 +2024-09-19 08:06:39,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=624520.0, ans=0.125 +2024-09-19 08:06:44,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=624520.0, ans=0.07 +2024-09-19 08:06:49,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=624520.0, ans=0.125 +2024-09-19 08:06:53,552 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:07:10,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=624600.0, ans=0.125 +2024-09-19 08:07:11,531 INFO [train.py:1198] (1/2) Epoch 35, batch 2300, loss[loss=0.1965, ctc_loss=0.08313, cr_loss=0.2772, attn_decoder_loss=0.2029, over 29293.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1155, cr_loss=0.3551, attn_decoder_loss=0.2396, over 5798754.98 frames. ], batch size: 71, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:07:23,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=624600.0, ans=0.07 +2024-09-19 08:07:25,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=624640.0, ans=0.025 +2024-09-19 08:07:45,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.68 vs. limit=15.0 +2024-09-19 08:07:47,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=5.95 vs. limit=15.0 +2024-09-19 08:08:02,739 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.224e+01 8.578e+01 9.084e+01 9.791e+01 1.309e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 08:08:03,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=624720.0, ans=0.0 +2024-09-19 08:08:27,457 INFO [train.py:1198] (1/2) Epoch 35, batch 2350, loss[loss=0.2643, ctc_loss=0.142, cr_loss=0.4126, attn_decoder_loss=0.2687, over 29689.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1159, cr_loss=0.3561, attn_decoder_loss=0.2398, over 5804272.75 frames. ], batch size: 83, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:08:40,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.72 vs. limit=10.0 +2024-09-19 08:09:02,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=624880.0, ans=0.0 +2024-09-19 08:09:14,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=624920.0, ans=0.1 +2024-09-19 08:09:21,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=624920.0, ans=0.125 +2024-09-19 08:09:21,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=624920.0, ans=0.125 +2024-09-19 08:09:40,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=624960.0, ans=0.0 +2024-09-19 08:09:42,847 INFO [train.py:1198] (1/2) Epoch 35, batch 2400, loss[loss=0.2262, ctc_loss=0.1135, cr_loss=0.351, attn_decoder_loss=0.2309, over 29546.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1165, cr_loss=0.3573, attn_decoder_loss=0.2404, over 5808529.11 frames. ], batch size: 76, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:09:44,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=625000.0, ans=0.0 +2024-09-19 08:09:47,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=625000.0, ans=0.2 +2024-09-19 08:09:49,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.09 vs. limit=6.0 +2024-09-19 08:09:58,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=625040.0, ans=0.125 +2024-09-19 08:10:00,216 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.79 vs. limit=10.0 +2024-09-19 08:10:15,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=625080.0, ans=0.0 +2024-09-19 08:10:23,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.25 vs. limit=22.5 +2024-09-19 08:10:35,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=625120.0, ans=0.0 +2024-09-19 08:10:38,550 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.636e+01 9.212e+01 9.895e+01 1.857e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 08:10:56,228 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.11 vs. limit=6.0 +2024-09-19 08:11:02,873 INFO [train.py:1198] (1/2) Epoch 35, batch 2450, loss[loss=0.2394, ctc_loss=0.1175, cr_loss=0.3579, attn_decoder_loss=0.245, over 29722.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1173, cr_loss=0.3589, attn_decoder_loss=0.2415, over 5783918.45 frames. ], batch size: 82, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:11:18,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=625240.0, ans=0.125 +2024-09-19 08:11:18,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.09 vs. limit=22.5 +2024-09-19 08:11:33,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=625280.0, ans=0.05 +2024-09-19 08:11:33,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=625280.0, ans=0.0 +2024-09-19 08:11:48,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=625320.0, ans=0.0 +2024-09-19 08:12:00,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.09 vs. limit=10.0 +2024-09-19 08:12:01,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=625320.0, ans=0.125 +2024-09-19 08:12:18,951 INFO [train.py:1198] (1/2) Epoch 35, batch 2500, loss[loss=0.2383, ctc_loss=0.1181, cr_loss=0.3599, attn_decoder_loss=0.2436, over 29635.00 frames. ], tot_loss[loss=0.2364, ctc_loss=0.1174, cr_loss=0.3598, attn_decoder_loss=0.2416, over 5794904.87 frames. ], batch size: 86, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:12:41,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.39 vs. limit=10.0 +2024-09-19 08:13:10,580 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.524e+01 8.980e+01 9.425e+01 1.614e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 08:13:24,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=625560.0, ans=0.2 +2024-09-19 08:13:26,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.51 vs. limit=12.0 +2024-09-19 08:13:29,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=625560.0, ans=0.2 +2024-09-19 08:13:34,334 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.37 vs. limit=6.0 +2024-09-19 08:13:35,396 INFO [train.py:1198] (1/2) Epoch 35, batch 2550, loss[loss=0.211, ctc_loss=0.101, cr_loss=0.3399, attn_decoder_loss=0.2157, over 29313.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1172, cr_loss=0.3595, attn_decoder_loss=0.2415, over 5798242.18 frames. ], batch size: 67, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:13:43,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=625600.0, ans=0.125 +2024-09-19 08:13:54,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=625640.0, ans=0.0 +2024-09-19 08:13:55,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.31 vs. limit=15.0 +2024-09-19 08:14:20,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=625680.0, ans=0.125 +2024-09-19 08:14:20,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=625680.0, ans=0.0 +2024-09-19 08:14:23,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=625720.0, ans=0.125 +2024-09-19 08:14:33,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=625720.0, ans=0.125 +2024-09-19 08:14:55,516 INFO [train.py:1198] (1/2) Epoch 35, batch 2600, loss[loss=0.2323, ctc_loss=0.1151, cr_loss=0.3489, attn_decoder_loss=0.2376, over 29466.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1174, cr_loss=0.36, attn_decoder_loss=0.2418, over 5794818.30 frames. ], batch size: 78, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:15:16,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=625840.0, ans=0.0 +2024-09-19 08:15:18,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=625840.0, ans=0.125 +2024-09-19 08:15:20,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.19 vs. limit=15.0 +2024-09-19 08:15:27,392 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:15:30,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=625880.0, ans=0.1 +2024-09-19 08:15:31,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=625880.0, ans=0.025 +2024-09-19 08:15:45,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=625920.0, ans=0.125 +2024-09-19 08:15:46,693 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.822e+01 8.594e+01 9.058e+01 9.611e+01 1.555e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 08:15:57,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=625960.0, ans=0.125 +2024-09-19 08:15:57,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=625960.0, ans=0.1 +2024-09-19 08:16:10,541 INFO [train.py:1198] (1/2) Epoch 35, batch 2650, loss[loss=0.2522, ctc_loss=0.1257, cr_loss=0.3699, attn_decoder_loss=0.2581, over 29331.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1174, cr_loss=0.3602, attn_decoder_loss=0.242, over 5801969.27 frames. ], batch size: 100, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:16:10,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=626000.0, ans=0.0 +2024-09-19 08:16:12,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=626000.0, ans=0.0 +2024-09-19 08:16:34,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=626040.0, ans=0.125 +2024-09-19 08:16:56,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.52 vs. limit=22.5 +2024-09-19 08:17:00,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=626120.0, ans=0.125 +2024-09-19 08:17:14,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.39 vs. limit=22.5 +2024-09-19 08:17:25,667 INFO [train.py:1198] (1/2) Epoch 35, batch 2700, loss[loss=0.2521, ctc_loss=0.126, cr_loss=0.3799, attn_decoder_loss=0.2577, over 29523.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1179, cr_loss=0.3609, attn_decoder_loss=0.2425, over 5797851.12 frames. ], batch size: 87, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:17:26,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=626200.0, ans=0.0 +2024-09-19 08:17:35,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.78 vs. limit=10.0 +2024-09-19 08:17:46,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=626240.0, ans=0.125 +2024-09-19 08:17:46,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=626240.0, ans=0.025 +2024-09-19 08:17:46,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=626240.0, ans=0.0 +2024-09-19 08:17:58,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=626280.0, ans=0.2 +2024-09-19 08:17:59,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=626280.0, ans=0.125 +2024-09-19 08:18:20,758 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.500e+01 8.428e+01 9.037e+01 9.618e+01 3.244e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-19 08:18:34,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=626360.0, ans=0.2 +2024-09-19 08:18:46,431 INFO [train.py:1198] (1/2) Epoch 35, batch 2750, loss[loss=0.2247, ctc_loss=0.1088, cr_loss=0.3406, attn_decoder_loss=0.23, over 29520.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1171, cr_loss=0.3588, attn_decoder_loss=0.2415, over 5797527.68 frames. ], batch size: 75, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:18:49,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=626400.0, ans=0.07 +2024-09-19 08:18:51,443 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:18:51,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=626400.0, ans=0.05 +2024-09-19 08:18:54,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=626400.0, ans=0.125 +2024-09-19 08:19:15,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=626480.0, ans=0.0 +2024-09-19 08:19:25,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=626480.0, ans=0.04949747468305833 +2024-09-19 08:19:35,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=626520.0, ans=0.125 +2024-09-19 08:20:02,181 INFO [train.py:1198] (1/2) Epoch 35, batch 2800, loss[loss=0.2529, ctc_loss=0.1432, cr_loss=0.3907, attn_decoder_loss=0.2564, over 19864.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.117, cr_loss=0.3586, attn_decoder_loss=0.2413, over 5777963.21 frames. ], batch size: 209, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:20:52,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=626720.0, ans=0.0 +2024-09-19 08:20:54,997 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.599e+01 9.222e+01 9.663e+01 2.009e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-19 08:21:17,437 INFO [train.py:1198] (1/2) Epoch 35, batch 2850, loss[loss=0.2261, ctc_loss=0.1106, cr_loss=0.345, attn_decoder_loss=0.2312, over 29499.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1175, cr_loss=0.3591, attn_decoder_loss=0.2415, over 5763607.33 frames. ], batch size: 77, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:21:17,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=626800.0, ans=0.2 +2024-09-19 08:21:38,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=626840.0, ans=0.09899494936611666 +2024-09-19 08:22:00,150 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.62 vs. limit=15.0 +2024-09-19 08:22:03,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn1.whiten.whitening_limit, batch_count=626880.0, ans=22.5 +2024-09-19 08:22:25,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.75 vs. limit=22.5 +2024-09-19 08:22:28,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=626960.0, ans=0.1 +2024-09-19 08:22:37,754 INFO [train.py:1198] (1/2) Epoch 35, batch 2900, loss[loss=0.2405, ctc_loss=0.1229, cr_loss=0.3753, attn_decoder_loss=0.2452, over 29419.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.118, cr_loss=0.3609, attn_decoder_loss=0.2424, over 5788619.68 frames. ], batch size: 79, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:23:02,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=627040.0, ans=0.2 +2024-09-19 08:23:27,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=627120.0, ans=0.0 +2024-09-19 08:23:32,019 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.291e+01 8.643e+01 9.038e+01 9.732e+01 2.249e+02, threshold=1.808e+02, percent-clipped=2.0 +2024-09-19 08:23:43,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.86 vs. limit=15.0 +2024-09-19 08:23:45,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=627160.0, ans=0.125 +2024-09-19 08:23:50,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=627160.0, ans=0.0 +2024-09-19 08:23:50,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=627160.0, ans=0.125 +2024-09-19 08:23:53,515 INFO [train.py:1198] (1/2) Epoch 35, batch 2950, loss[loss=0.2182, ctc_loss=0.1048, cr_loss=0.3349, attn_decoder_loss=0.2233, over 29515.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1173, cr_loss=0.3593, attn_decoder_loss=0.2412, over 5783539.03 frames. ], batch size: 75, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:24:25,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=627280.0, ans=0.125 +2024-09-19 08:24:33,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=627280.0, ans=0.125 +2024-09-19 08:25:09,411 INFO [train.py:1198] (1/2) Epoch 35, batch 3000, loss[loss=0.2413, ctc_loss=0.1235, cr_loss=0.3749, attn_decoder_loss=0.246, over 29739.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1171, cr_loss=0.3591, attn_decoder_loss=0.2411, over 5782780.28 frames. ], batch size: 81, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:25:09,412 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 08:25:28,762 INFO [train.py:1230] (1/2) Epoch 35, validation: loss=0.2119, ctc_loss=0.03685, cr_loss=6.108e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-19 08:25:28,762 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 08:25:38,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=627400.0, ans=0.125 +2024-09-19 08:25:46,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=627440.0, ans=0.1 +2024-09-19 08:26:01,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=627480.0, ans=0.125 +2024-09-19 08:26:25,876 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.691e+01 9.210e+01 9.887e+01 4.457e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 08:26:36,940 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:26:47,038 INFO [train.py:1198] (1/2) Epoch 35, batch 3050, loss[loss=0.2302, ctc_loss=0.116, cr_loss=0.3756, attn_decoder_loss=0.2345, over 29537.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1172, cr_loss=0.3597, attn_decoder_loss=0.2416, over 5776273.88 frames. ], batch size: 76, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:26:55,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.32 vs. limit=15.0 +2024-09-19 08:27:07,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=627640.0, ans=0.1 +2024-09-19 08:27:15,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.71 vs. limit=22.5 +2024-09-19 08:27:15,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.33 vs. limit=15.0 +2024-09-19 08:27:16,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=627680.0, ans=0.0 +2024-09-19 08:27:23,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=627680.0, ans=0.125 +2024-09-19 08:27:49,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=627760.0, ans=0.125 +2024-09-19 08:28:02,355 INFO [train.py:1198] (1/2) Epoch 35, batch 3100, loss[loss=0.2418, ctc_loss=0.117, cr_loss=0.3511, attn_decoder_loss=0.2478, over 29261.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.117, cr_loss=0.3595, attn_decoder_loss=0.2413, over 5776268.04 frames. ], batch size: 100, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:28:16,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=627840.0, ans=0.95 +2024-09-19 08:28:45,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=627880.0, ans=0.125 +2024-09-19 08:28:57,339 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.618e+01 9.080e+01 9.751e+01 2.675e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 08:28:57,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=627920.0, ans=0.125 +2024-09-19 08:29:03,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=627960.0, ans=0.125 +2024-09-19 08:29:16,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=627960.0, ans=0.125 +2024-09-19 08:29:21,433 INFO [train.py:1198] (1/2) Epoch 35, batch 3150, loss[loss=0.2575, ctc_loss=0.1381, cr_loss=0.4043, attn_decoder_loss=0.2618, over 28741.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1171, cr_loss=0.3596, attn_decoder_loss=0.2413, over 5783403.49 frames. ], batch size: 104, lr: 3.15e-03, grad_scale: 8.0 +2024-09-19 08:29:23,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=628000.0, ans=0.125 +2024-09-19 08:29:48,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=628040.0, ans=0.125 +2024-09-19 08:30:01,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=628080.0, ans=0.0 +2024-09-19 08:30:18,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=628120.0, ans=0.125 +2024-09-19 08:30:36,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=628160.0, ans=0.2 +2024-09-19 08:30:38,989 INFO [train.py:1198] (1/2) Epoch 35, batch 3200, loss[loss=0.2255, ctc_loss=0.1017, cr_loss=0.3313, attn_decoder_loss=0.2318, over 29774.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1169, cr_loss=0.359, attn_decoder_loss=0.2411, over 5794326.20 frames. ], batch size: 80, lr: 3.15e-03, grad_scale: 16.0 +2024-09-19 08:30:50,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.99 vs. limit=6.0 +2024-09-19 08:30:51,451 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:30:55,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=628240.0, ans=0.0 +2024-09-19 08:31:34,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.608e+01 9.276e+01 9.756e+01 1.910e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 08:31:50,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=628360.0, ans=0.0 +2024-09-19 08:31:54,892 INFO [train.py:1198] (1/2) Epoch 35, batch 3250, loss[loss=0.2453, ctc_loss=0.1271, cr_loss=0.3836, attn_decoder_loss=0.2499, over 29700.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1168, cr_loss=0.3588, attn_decoder_loss=0.2413, over 5800968.31 frames. ], batch size: 84, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:32:04,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=628400.0, ans=0.1 +2024-09-19 08:32:07,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=628400.0, ans=0.0 +2024-09-19 08:32:10,291 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:32:57,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=628560.0, ans=0.2 +2024-09-19 08:33:04,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.60 vs. limit=10.0 +2024-09-19 08:33:12,770 INFO [train.py:1198] (1/2) Epoch 35, batch 3300, loss[loss=0.2451, ctc_loss=0.1255, cr_loss=0.3623, attn_decoder_loss=0.2503, over 28431.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.116, cr_loss=0.3569, attn_decoder_loss=0.2401, over 5797504.56 frames. ], batch size: 112, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:33:26,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=628640.0, ans=0.125 +2024-09-19 08:33:31,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=628640.0, ans=0.04949747468305833 +2024-09-19 08:34:10,345 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.666e+01 8.715e+01 9.290e+01 9.754e+01 2.928e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-19 08:34:13,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=628760.0, ans=0.09899494936611666 +2024-09-19 08:34:13,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=628760.0, ans=0.2 +2024-09-19 08:34:30,126 INFO [train.py:1198] (1/2) Epoch 35, batch 3350, loss[loss=0.2506, ctc_loss=0.1282, cr_loss=0.3672, attn_decoder_loss=0.256, over 28812.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.117, cr_loss=0.3588, attn_decoder_loss=0.241, over 5774378.97 frames. ], batch size: 104, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:34:53,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.53 vs. limit=12.0 +2024-09-19 08:34:56,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=628840.0, ans=0.125 +2024-09-19 08:35:11,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=628880.0, ans=0.2 +2024-09-19 08:35:28,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=628920.0, ans=0.04949747468305833 +2024-09-19 08:35:28,931 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.48 vs. limit=12.0 +2024-09-19 08:35:33,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.57 vs. limit=15.0 +2024-09-19 08:35:34,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=628960.0, ans=0.125 +2024-09-19 08:35:43,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=628960.0, ans=0.2 +2024-09-19 08:35:46,089 INFO [train.py:1198] (1/2) Epoch 35, batch 3400, loss[loss=0.2062, ctc_loss=0.1008, cr_loss=0.3295, attn_decoder_loss=0.2106, over 29325.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1173, cr_loss=0.3593, attn_decoder_loss=0.241, over 5767431.26 frames. ], batch size: 67, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:35:46,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=629000.0, ans=0.2 +2024-09-19 08:35:51,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.98 vs. limit=8.0 +2024-09-19 08:35:52,523 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:35:56,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=629000.0, ans=0.125 +2024-09-19 08:36:05,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=629040.0, ans=0.2 +2024-09-19 08:36:30,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=629120.0, ans=0.0 +2024-09-19 08:36:44,225 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.920e+01 8.517e+01 9.055e+01 9.651e+01 2.142e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 08:36:45,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=629120.0, ans=0.125 +2024-09-19 08:36:49,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.19 vs. limit=15.0 +2024-09-19 08:36:55,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-19 08:37:03,842 INFO [train.py:1198] (1/2) Epoch 35, batch 3450, loss[loss=0.2406, ctc_loss=0.1151, cr_loss=0.344, attn_decoder_loss=0.2469, over 28299.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.117, cr_loss=0.3589, attn_decoder_loss=0.2413, over 5775150.62 frames. ], batch size: 111, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:37:10,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=629200.0, ans=0.125 +2024-09-19 08:37:10,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=629200.0, ans=0.125 +2024-09-19 08:37:11,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.76 vs. limit=15.0 +2024-09-19 08:37:48,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=629320.0, ans=0.0 +2024-09-19 08:38:08,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=629360.0, ans=0.0 +2024-09-19 08:38:13,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=629360.0, ans=0.125 +2024-09-19 08:38:21,939 INFO [train.py:1198] (1/2) Epoch 35, batch 3500, loss[loss=0.2118, ctc_loss=0.09745, cr_loss=0.3107, attn_decoder_loss=0.2176, over 29290.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.117, cr_loss=0.3589, attn_decoder_loss=0.241, over 5776448.94 frames. ], batch size: 71, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:38:56,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=629480.0, ans=0.025 +2024-09-19 08:38:59,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=629480.0, ans=0.07 +2024-09-19 08:39:04,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=629480.0, ans=0.0 +2024-09-19 08:39:11,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.19 vs. limit=15.0 +2024-09-19 08:39:17,037 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.529e+01 8.957e+01 9.484e+01 1.276e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 08:39:32,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=629560.0, ans=10.0 +2024-09-19 08:39:36,788 INFO [train.py:1198] (1/2) Epoch 35, batch 3550, loss[loss=0.2408, ctc_loss=0.1107, cr_loss=0.3493, attn_decoder_loss=0.2475, over 29715.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1169, cr_loss=0.359, attn_decoder_loss=0.2411, over 5782315.37 frames. ], batch size: 89, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:39:37,812 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.81 vs. limit=10.0 +2024-09-19 08:39:51,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=629640.0, ans=0.0 +2024-09-19 08:40:50,972 INFO [train.py:1198] (1/2) Epoch 35, batch 3600, loss[loss=0.2345, ctc_loss=0.1125, cr_loss=0.3567, attn_decoder_loss=0.2402, over 29478.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.117, cr_loss=0.3592, attn_decoder_loss=0.2411, over 5791274.26 frames. ], batch size: 77, lr: 3.14e-03, grad_scale: 16.0 +2024-09-19 08:40:54,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=629800.0, ans=0.125 +2024-09-19 08:41:10,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=629840.0, ans=0.1 +2024-09-19 08:41:12,313 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:41:16,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=629840.0, ans=0.2 +2024-09-19 08:41:16,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=629840.0, ans=0.0 +2024-09-19 08:41:36,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.10 vs. limit=15.0 +2024-09-19 08:41:47,405 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.545e+01 9.030e+01 9.736e+01 4.485e+02, threshold=1.806e+02, percent-clipped=2.0 +2024-09-19 08:41:49,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=629960.0, ans=0.125 +2024-09-19 08:41:54,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=629960.0, ans=0.125 +2024-09-19 08:41:58,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=629960.0, ans=0.2 +2024-09-19 08:42:01,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=629960.0, ans=0.125 +2024-09-19 08:42:07,119 INFO [train.py:1198] (1/2) Epoch 35, batch 3650, loss[loss=0.2552, ctc_loss=0.1335, cr_loss=0.3979, attn_decoder_loss=0.2599, over 29497.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1163, cr_loss=0.3575, attn_decoder_loss=0.2404, over 5794515.25 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:42:22,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=630040.0, ans=0.025 +2024-09-19 08:42:29,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=630040.0, ans=0.125 +2024-09-19 08:42:40,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=630080.0, ans=0.125 +2024-09-19 08:43:03,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=630120.0, ans=0.2 +2024-09-19 08:43:16,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=630160.0, ans=0.0 +2024-09-19 08:43:21,875 INFO [train.py:1198] (1/2) Epoch 35, batch 3700, loss[loss=0.2494, ctc_loss=0.1279, cr_loss=0.3847, attn_decoder_loss=0.2543, over 29700.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1161, cr_loss=0.3574, attn_decoder_loss=0.2404, over 5804084.17 frames. ], batch size: 84, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:43:22,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=630200.0, ans=0.1 +2024-09-19 08:43:50,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=630280.0, ans=0.1 +2024-09-19 08:44:02,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=630280.0, ans=0.125 +2024-09-19 08:44:05,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=630280.0, ans=0.1 +2024-09-19 08:44:08,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=630320.0, ans=0.0 +2024-09-19 08:44:08,830 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.85 vs. limit=15.0 +2024-09-19 08:44:11,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=630320.0, ans=0.125 +2024-09-19 08:44:15,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=630320.0, ans=0.125 +2024-09-19 08:44:19,842 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 8.511e+01 9.010e+01 9.557e+01 1.443e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 08:44:20,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=630320.0, ans=0.0 +2024-09-19 08:44:20,548 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-19 08:44:25,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.24 vs. limit=10.0 +2024-09-19 08:44:36,822 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.71 vs. limit=6.0 +2024-09-19 08:44:38,050 INFO [train.py:1198] (1/2) Epoch 35, batch 3750, loss[loss=0.2035, ctc_loss=0.09671, cr_loss=0.3113, attn_decoder_loss=0.2084, over 29340.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.116, cr_loss=0.3571, attn_decoder_loss=0.2402, over 5807587.06 frames. ], batch size: 67, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:45:00,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=630440.0, ans=0.125 +2024-09-19 08:45:03,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=630440.0, ans=0.0 +2024-09-19 08:45:13,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=630480.0, ans=0.125 +2024-09-19 08:45:23,102 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:45:24,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=630520.0, ans=0.5 +2024-09-19 08:45:47,516 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.73 vs. limit=22.5 +2024-09-19 08:45:52,361 INFO [train.py:1198] (1/2) Epoch 35, batch 3800, loss[loss=0.2322, ctc_loss=0.106, cr_loss=0.3431, attn_decoder_loss=0.2386, over 29653.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1165, cr_loss=0.3579, attn_decoder_loss=0.2401, over 5798620.58 frames. ], batch size: 86, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:45:58,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=630600.0, ans=0.125 +2024-09-19 08:46:05,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.82 vs. limit=10.0 +2024-09-19 08:46:05,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=630640.0, ans=0.1 +2024-09-19 08:46:11,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.91 vs. limit=15.0 +2024-09-19 08:46:20,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.53 vs. limit=15.0 +2024-09-19 08:46:22,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=630680.0, ans=0.125 +2024-09-19 08:46:48,666 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.417e+01 9.020e+01 9.508e+01 1.354e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-19 08:46:57,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=630760.0, ans=0.05 +2024-09-19 08:46:59,783 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.20 vs. limit=12.0 +2024-09-19 08:47:06,573 INFO [train.py:1198] (1/2) Epoch 35, batch 3850, loss[loss=0.2479, ctc_loss=0.1189, cr_loss=0.3707, attn_decoder_loss=0.254, over 29222.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1165, cr_loss=0.3582, attn_decoder_loss=0.2402, over 5812839.33 frames. ], batch size: 100, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:47:11,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=630800.0, ans=0.1 +2024-09-19 08:47:26,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=630840.0, ans=0.025 +2024-09-19 08:47:32,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=630840.0, ans=0.125 +2024-09-19 08:47:33,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=630840.0, ans=0.1 +2024-09-19 08:47:33,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=630840.0, ans=0.0 +2024-09-19 08:47:47,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=630880.0, ans=0.2 +2024-09-19 08:47:48,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=630880.0, ans=0.125 +2024-09-19 08:47:50,230 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:47:57,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=630920.0, ans=0.0 +2024-09-19 08:48:00,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=630920.0, ans=0.2 +2024-09-19 08:48:17,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=630960.0, ans=15.0 +2024-09-19 08:48:18,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=630960.0, ans=0.04949747468305833 +2024-09-19 08:48:21,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=631000.0, ans=0.125 +2024-09-19 08:48:22,473 INFO [train.py:1198] (1/2) Epoch 35, batch 3900, loss[loss=0.2393, ctc_loss=0.1134, cr_loss=0.3619, attn_decoder_loss=0.2452, over 29618.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1167, cr_loss=0.3589, attn_decoder_loss=0.2407, over 5816856.53 frames. ], batch size: 86, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:48:48,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=631040.0, ans=0.125 +2024-09-19 08:48:55,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=631080.0, ans=0.05 +2024-09-19 08:48:57,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=631080.0, ans=0.125 +2024-09-19 08:49:08,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=631120.0, ans=0.125 +2024-09-19 08:49:15,774 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.02 vs. limit=10.0 +2024-09-19 08:49:17,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=631120.0, ans=0.1 +2024-09-19 08:49:18,877 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.482e+01 8.961e+01 9.353e+01 1.224e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 08:49:19,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=631120.0, ans=0.125 +2024-09-19 08:49:30,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=631160.0, ans=0.125 +2024-09-19 08:49:38,529 INFO [train.py:1198] (1/2) Epoch 35, batch 3950, loss[loss=0.2472, ctc_loss=0.1254, cr_loss=0.3702, attn_decoder_loss=0.2525, over 29454.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1165, cr_loss=0.3583, attn_decoder_loss=0.2409, over 5836104.35 frames. ], batch size: 97, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:49:40,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=631200.0, ans=0.125 +2024-09-19 08:50:02,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=631240.0, ans=0.125 +2024-09-19 08:50:02,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=631240.0, ans=0.0 +2024-09-19 08:50:18,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=631280.0, ans=0.125 +2024-09-19 08:50:36,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.19 vs. limit=6.0 +2024-09-19 08:50:45,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=631360.0, ans=0.07 +2024-09-19 08:50:45,369 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:50:52,168 INFO [train.py:1198] (1/2) Epoch 35, batch 4000, loss[loss=0.2179, ctc_loss=0.1011, cr_loss=0.337, attn_decoder_loss=0.2234, over 29548.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1168, cr_loss=0.3589, attn_decoder_loss=0.2409, over 5814083.82 frames. ], batch size: 74, lr: 3.14e-03, grad_scale: 16.0 +2024-09-19 08:50:58,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=631400.0, ans=0.2 +2024-09-19 08:51:25,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=631480.0, ans=0.0 +2024-09-19 08:51:38,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=631520.0, ans=0.125 +2024-09-19 08:51:50,542 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.935e+01 8.609e+01 9.049e+01 9.611e+01 2.994e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 08:52:06,768 INFO [train.py:1198] (1/2) Epoch 35, batch 4050, loss[loss=0.2563, ctc_loss=0.1465, cr_loss=0.3966, attn_decoder_loss=0.2597, over 20498.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1163, cr_loss=0.3578, attn_decoder_loss=0.2405, over 5798511.82 frames. ], batch size: 209, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:52:40,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=631680.0, ans=0.0 +2024-09-19 08:52:54,887 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:53:03,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=631720.0, ans=0.125 +2024-09-19 08:53:08,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=631760.0, ans=0.125 +2024-09-19 08:53:21,185 INFO [train.py:1198] (1/2) Epoch 35, batch 4100, loss[loss=0.2525, ctc_loss=0.1324, cr_loss=0.393, attn_decoder_loss=0.2571, over 29510.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1168, cr_loss=0.3586, attn_decoder_loss=0.2408, over 5793533.21 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:53:28,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=631800.0, ans=0.125 +2024-09-19 08:54:00,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=631880.0, ans=0.125 +2024-09-19 08:54:02,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=631880.0, ans=0.0 +2024-09-19 08:54:03,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=631920.0, ans=0.5 +2024-09-19 08:54:13,111 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=9.87 vs. limit=15.0 +2024-09-19 08:54:19,449 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.260e+01 8.556e+01 9.136e+01 9.776e+01 2.394e+02, threshold=1.827e+02, percent-clipped=3.0 +2024-09-19 08:54:19,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=631960.0, ans=0.05 +2024-09-19 08:54:21,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=631960.0, ans=0.1 +2024-09-19 08:54:26,100 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.89 vs. limit=22.5 +2024-09-19 08:54:34,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=632000.0, ans=0.2 +2024-09-19 08:54:36,190 INFO [train.py:1198] (1/2) Epoch 35, batch 4150, loss[loss=0.2327, ctc_loss=0.1186, cr_loss=0.3704, attn_decoder_loss=0.2371, over 29516.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1166, cr_loss=0.3577, attn_decoder_loss=0.2405, over 5800535.74 frames. ], batch size: 77, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:54:42,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=632000.0, ans=0.1 +2024-09-19 08:55:08,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=632080.0, ans=0.125 +2024-09-19 08:55:11,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=632080.0, ans=0.0 +2024-09-19 08:55:26,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=632120.0, ans=0.2 +2024-09-19 08:55:30,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=632120.0, ans=0.025 +2024-09-19 08:55:49,859 INFO [train.py:1198] (1/2) Epoch 35, batch 4200, loss[loss=0.2477, ctc_loss=0.1394, cr_loss=0.398, attn_decoder_loss=0.2508, over 29513.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1166, cr_loss=0.3579, attn_decoder_loss=0.2408, over 5802238.38 frames. ], batch size: 90, lr: 3.14e-03, grad_scale: 8.0 +2024-09-19 08:56:12,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=632240.0, ans=0.125 +2024-09-19 08:56:36,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=632320.0, ans=0.1 +2024-09-19 08:56:39,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=632320.0, ans=0.025 +2024-09-19 08:56:42,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=632320.0, ans=0.125 +2024-09-19 08:56:48,205 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.470e+01 8.972e+01 9.495e+01 2.308e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-19 08:56:51,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=632360.0, ans=0.0 +2024-09-19 08:56:51,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=632360.0, ans=0.0 +2024-09-19 08:57:04,334 INFO [train.py:1198] (1/2) Epoch 35, batch 4250, loss[loss=0.2181, ctc_loss=0.101, cr_loss=0.3227, attn_decoder_loss=0.224, over 29512.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1162, cr_loss=0.357, attn_decoder_loss=0.2407, over 5806676.66 frames. ], batch size: 74, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:57:14,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=632400.0, ans=0.5 +2024-09-19 08:57:16,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.07 vs. limit=6.0 +2024-09-19 08:57:19,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=632440.0, ans=0.0 +2024-09-19 08:57:25,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.54 vs. limit=22.5 +2024-09-19 08:58:13,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=632560.0, ans=0.125 +2024-09-19 08:58:17,922 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 08:58:17,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=632600.0, ans=0.125 +2024-09-19 08:58:19,080 INFO [train.py:1198] (1/2) Epoch 35, batch 4300, loss[loss=0.2388, ctc_loss=0.1114, cr_loss=0.3514, attn_decoder_loss=0.2452, over 29503.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1159, cr_loss=0.3567, attn_decoder_loss=0.2407, over 5795939.72 frames. ], batch size: 87, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:58:20,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=632600.0, ans=0.0 +2024-09-19 08:58:52,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=632680.0, ans=10.0 +2024-09-19 08:59:01,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=632680.0, ans=0.125 +2024-09-19 08:59:02,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=632720.0, ans=0.0 +2024-09-19 08:59:13,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.09 vs. limit=15.0 +2024-09-19 08:59:17,003 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.393e+01 8.841e+01 9.230e+01 9.936e+01 2.115e+02, threshold=1.846e+02, percent-clipped=2.0 +2024-09-19 08:59:17,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=632760.0, ans=0.1 +2024-09-19 08:59:34,556 INFO [train.py:1198] (1/2) Epoch 35, batch 4350, loss[loss=0.2423, ctc_loss=0.1216, cr_loss=0.3655, attn_decoder_loss=0.2475, over 29465.00 frames. ], tot_loss[loss=0.2387, ctc_loss=0.1186, cr_loss=0.362, attn_decoder_loss=0.244, over 5798759.88 frames. ], batch size: 97, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 08:59:40,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=632800.0, ans=0.125 +2024-09-19 08:59:45,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=632800.0, ans=0.1 +2024-09-19 08:59:53,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=632840.0, ans=0.025 +2024-09-19 09:00:17,660 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.18 vs. limit=15.0 +2024-09-19 09:00:27,885 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.31 vs. limit=22.5 +2024-09-19 09:00:47,707 INFO [train.py:1198] (1/2) Epoch 35, batch 4400, loss[loss=0.2465, ctc_loss=0.1242, cr_loss=0.3801, attn_decoder_loss=0.2517, over 27331.00 frames. ], tot_loss[loss=0.2411, ctc_loss=0.1201, cr_loss=0.3651, attn_decoder_loss=0.2464, over 5767745.18 frames. ], batch size: 124, lr: 3.13e-03, grad_scale: 16.0 +2024-09-19 09:00:48,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=633000.0, ans=0.125 +2024-09-19 09:00:53,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=633000.0, ans=0.025 +2024-09-19 09:00:56,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.43 vs. limit=10.0 +2024-09-19 09:01:03,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=633040.0, ans=0.1 +2024-09-19 09:01:10,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=633040.0, ans=0.1 +2024-09-19 09:01:45,575 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.031e+01 8.931e+01 9.450e+01 9.933e+01 1.920e+02, threshold=1.890e+02, percent-clipped=1.0 +2024-09-19 09:02:02,863 INFO [train.py:1198] (1/2) Epoch 35, batch 4450, loss[loss=0.2521, ctc_loss=0.1409, cr_loss=0.3679, attn_decoder_loss=0.2563, over 20280.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1235, cr_loss=0.3699, attn_decoder_loss=0.2485, over 5577439.41 frames. ], batch size: 209, lr: 3.13e-03, grad_scale: 16.0 +2024-09-19 09:02:18,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=633240.0, ans=0.025 +2024-09-19 09:02:46,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=633320.0, ans=0.1 +2024-09-19 09:02:56,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=7.66 vs. limit=10.0 +2024-09-19 09:03:15,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=633360.0, ans=0.2 +2024-09-19 09:03:17,988 INFO [train.py:1198] (1/2) Epoch 35, batch 4500, loss[loss=0.2556, ctc_loss=0.1406, cr_loss=0.3761, attn_decoder_loss=0.26, over 20381.00 frames. ], tot_loss[loss=0.2455, ctc_loss=0.1269, cr_loss=0.3724, attn_decoder_loss=0.2505, over 5235461.09 frames. ], batch size: 209, lr: 3.13e-03, grad_scale: 8.0 +2024-09-19 09:03:19,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=633400.0, ans=0.125 +2024-09-19 09:03:31,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=633440.0, ans=0.125 +2024-09-19 09:04:27,601 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:04:41,123 INFO [train.py:1198] (1/2) Epoch 36, batch 0, loss[loss=0.2228, ctc_loss=0.1012, cr_loss=0.3353, attn_decoder_loss=0.2288, over 29594.00 frames. ], tot_loss[loss=0.2228, ctc_loss=0.1012, cr_loss=0.3353, attn_decoder_loss=0.2288, over 29594.00 frames. ], batch size: 73, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:04:41,123 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 09:04:49,391 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.2378, 3.8471, 4.1154, 3.7352], device='cuda:1') +2024-09-19 09:04:56,230 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.1307, 4.3156, 4.5002, 4.7744], device='cuda:1') +2024-09-19 09:04:59,473 INFO [train.py:1230] (1/2) Epoch 36, validation: loss=0.2129, ctc_loss=0.03662, cr_loss=5.743e-15, attn_decoder_loss=0.2325, over 944034.00 frames. +2024-09-19 09:04:59,473 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 09:05:02,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=633500.0, ans=0.0 +2024-09-19 09:05:08,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=633500.0, ans=0.0 +2024-09-19 09:05:08,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=633500.0, ans=0.2 +2024-09-19 09:05:11,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=633500.0, ans=0.2 +2024-09-19 09:05:17,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=633540.0, ans=0.1 +2024-09-19 09:05:18,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.68 vs. limit=15.0 +2024-09-19 09:05:22,046 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.883e+01 1.073e+02 1.144e+02 1.210e+02 8.768e+02, threshold=2.289e+02, percent-clipped=4.0 +2024-09-19 09:05:28,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.46 vs. limit=22.5 +2024-09-19 09:05:59,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=633660.0, ans=0.1 +2024-09-19 09:06:00,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=633660.0, ans=0.125 +2024-09-19 09:06:06,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=633660.0, ans=0.1 +2024-09-19 09:06:06,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=633660.0, ans=0.05 +2024-09-19 09:06:08,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=12.0 +2024-09-19 09:06:12,187 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.92 vs. limit=6.0 +2024-09-19 09:06:15,569 INFO [train.py:1198] (1/2) Epoch 36, batch 50, loss[loss=0.2122, ctc_loss=0.1022, cr_loss=0.3304, attn_decoder_loss=0.2171, over 29454.00 frames. ], tot_loss[loss=0.2374, ctc_loss=0.1193, cr_loss=0.3647, attn_decoder_loss=0.2424, over 1267863.35 frames. ], batch size: 70, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:06:49,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=633780.0, ans=0.125 +2024-09-19 09:06:52,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=633780.0, ans=0.0 +2024-09-19 09:06:53,722 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.18 vs. limit=15.0 +2024-09-19 09:07:22,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=633860.0, ans=0.0 +2024-09-19 09:07:32,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=633860.0, ans=0.125 +2024-09-19 09:07:35,511 INFO [train.py:1198] (1/2) Epoch 36, batch 100, loss[loss=0.2192, ctc_loss=0.1102, cr_loss=0.346, attn_decoder_loss=0.2236, over 29547.00 frames. ], tot_loss[loss=0.2383, ctc_loss=0.1191, cr_loss=0.3634, attn_decoder_loss=0.2435, over 2253480.06 frames. ], batch size: 76, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:07:47,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=633900.0, ans=0.1 +2024-09-19 09:07:47,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=633900.0, ans=0.0 +2024-09-19 09:07:57,926 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.147e+01 8.630e+01 9.046e+01 9.825e+01 1.723e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 09:08:18,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.27 vs. limit=10.0 +2024-09-19 09:08:19,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=634020.0, ans=0.125 +2024-09-19 09:08:42,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=634060.0, ans=0.125 +2024-09-19 09:08:50,183 INFO [train.py:1198] (1/2) Epoch 36, batch 150, loss[loss=0.2141, ctc_loss=0.1044, cr_loss=0.3443, attn_decoder_loss=0.2186, over 29448.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1176, cr_loss=0.3604, attn_decoder_loss=0.2418, over 3048518.25 frames. ], batch size: 70, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:08:52,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=634100.0, ans=0.125 +2024-09-19 09:09:05,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=634140.0, ans=0.0 +2024-09-19 09:09:22,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634180.0, ans=0.1 +2024-09-19 09:10:04,912 INFO [train.py:1198] (1/2) Epoch 36, batch 200, loss[loss=0.2543, ctc_loss=0.1304, cr_loss=0.3941, attn_decoder_loss=0.2593, over 27623.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1167, cr_loss=0.3583, attn_decoder_loss=0.2407, over 3661735.45 frames. ], batch size: 125, lr: 3.09e-03, grad_scale: 16.0 +2024-09-19 09:10:08,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=634300.0, ans=0.125 +2024-09-19 09:10:26,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634340.0, ans=0.1 +2024-09-19 09:10:29,662 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.434e+01 8.423e+01 8.790e+01 9.226e+01 1.100e+02, threshold=1.758e+02, percent-clipped=0.0 +2024-09-19 09:10:36,721 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.48 vs. limit=15.0 +2024-09-19 09:11:10,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634460.0, ans=0.1 +2024-09-19 09:11:23,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=634460.0, ans=0.0 +2024-09-19 09:11:25,661 INFO [train.py:1198] (1/2) Epoch 36, batch 250, loss[loss=0.2492, ctc_loss=0.1317, cr_loss=0.3926, attn_decoder_loss=0.2535, over 29266.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1172, cr_loss=0.3597, attn_decoder_loss=0.2411, over 4142316.01 frames. ], batch size: 100, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:11:49,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=634540.0, ans=0.0 +2024-09-19 09:11:52,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=634540.0, ans=0.125 +2024-09-19 09:12:06,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=634580.0, ans=0.1 +2024-09-19 09:12:08,559 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.49 vs. limit=15.0 +2024-09-19 09:12:40,877 INFO [train.py:1198] (1/2) Epoch 36, batch 300, loss[loss=0.2437, ctc_loss=0.1179, cr_loss=0.3748, attn_decoder_loss=0.2494, over 29530.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1167, cr_loss=0.359, attn_decoder_loss=0.2404, over 4510055.94 frames. ], batch size: 92, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:12:51,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=634700.0, ans=10.0 +2024-09-19 09:12:54,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=634740.0, ans=0.0 +2024-09-19 09:12:59,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=634740.0, ans=10.0 +2024-09-19 09:13:04,718 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.387e+01 8.698e+01 9.076e+01 9.667e+01 1.639e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-19 09:13:12,543 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:13:12,702 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:13:12,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634780.0, ans=0.1 +2024-09-19 09:13:21,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=634780.0, ans=0.0 +2024-09-19 09:13:50,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=634860.0, ans=0.125 +2024-09-19 09:13:56,338 INFO [train.py:1198] (1/2) Epoch 36, batch 350, loss[loss=0.2, ctc_loss=0.09005, cr_loss=0.2937, attn_decoder_loss=0.2057, over 29327.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1172, cr_loss=0.3601, attn_decoder_loss=0.2412, over 4795753.29 frames. ], batch size: 71, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:14:06,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=634900.0, ans=0.125 +2024-09-19 09:14:17,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=634940.0, ans=0.1 +2024-09-19 09:14:21,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=634940.0, ans=0.1 +2024-09-19 09:14:26,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=634940.0, ans=0.125 +2024-09-19 09:14:38,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=634980.0, ans=0.5 +2024-09-19 09:15:04,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=635060.0, ans=0.125 +2024-09-19 09:15:13,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=635060.0, ans=0.125 +2024-09-19 09:15:16,546 INFO [train.py:1198] (1/2) Epoch 36, batch 400, loss[loss=0.2415, ctc_loss=0.1109, cr_loss=0.3499, attn_decoder_loss=0.2482, over 29723.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1171, cr_loss=0.3604, attn_decoder_loss=0.2411, over 5025991.75 frames. ], batch size: 82, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:15:18,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=635100.0, ans=0.125 +2024-09-19 09:15:23,005 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:15:40,918 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.564e+01 9.194e+01 9.781e+01 3.536e+02, threshold=1.839e+02, percent-clipped=4.0 +2024-09-19 09:15:51,931 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:16:19,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=635260.0, ans=0.0 +2024-09-19 09:16:21,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=635260.0, ans=0.2 +2024-09-19 09:16:25,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=635260.0, ans=0.125 +2024-09-19 09:16:33,031 INFO [train.py:1198] (1/2) Epoch 36, batch 450, loss[loss=0.2465, ctc_loss=0.1172, cr_loss=0.3597, attn_decoder_loss=0.2528, over 29700.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1167, cr_loss=0.3591, attn_decoder_loss=0.2411, over 5187723.64 frames. ], batch size: 83, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:17:15,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=635380.0, ans=0.05 +2024-09-19 09:17:20,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=635420.0, ans=0.125 +2024-09-19 09:17:34,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.73 vs. limit=15.0 +2024-09-19 09:17:48,989 INFO [train.py:1198] (1/2) Epoch 36, batch 500, loss[loss=0.252, ctc_loss=0.1379, cr_loss=0.4027, attn_decoder_loss=0.2558, over 29412.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1163, cr_loss=0.3577, attn_decoder_loss=0.2401, over 5330905.18 frames. ], batch size: 94, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:17:50,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=635500.0, ans=0.07 +2024-09-19 09:17:50,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=635500.0, ans=0.125 +2024-09-19 09:17:54,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.25 vs. limit=15.0 +2024-09-19 09:17:59,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=15.0 +2024-09-19 09:18:13,071 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.195e+01 8.310e+01 8.819e+01 9.519e+01 1.597e+02, threshold=1.764e+02, percent-clipped=0.0 +2024-09-19 09:18:20,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=635580.0, ans=0.125 +2024-09-19 09:18:23,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=635580.0, ans=0.125 +2024-09-19 09:18:24,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=635580.0, ans=0.0 +2024-09-19 09:18:40,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=635620.0, ans=0.0 +2024-09-19 09:18:52,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=635660.0, ans=0.125 +2024-09-19 09:18:54,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=635660.0, ans=0.125 +2024-09-19 09:19:01,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.16 vs. limit=15.0 +2024-09-19 09:19:05,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.47 vs. limit=15.0 +2024-09-19 09:19:08,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=635700.0, ans=0.1 +2024-09-19 09:19:09,304 INFO [train.py:1198] (1/2) Epoch 36, batch 550, loss[loss=0.2549, ctc_loss=0.1349, cr_loss=0.396, attn_decoder_loss=0.2594, over 28755.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1165, cr_loss=0.3582, attn_decoder_loss=0.2402, over 5424239.34 frames. ], batch size: 104, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:19:20,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=635700.0, ans=0.125 +2024-09-19 09:19:36,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=635740.0, ans=0.0 +2024-09-19 09:20:01,823 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.11 vs. limit=15.0 +2024-09-19 09:20:24,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.48 vs. limit=22.5 +2024-09-19 09:20:25,473 INFO [train.py:1198] (1/2) Epoch 36, batch 600, loss[loss=0.2425, ctc_loss=0.1181, cr_loss=0.3738, attn_decoder_loss=0.248, over 29249.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1167, cr_loss=0.3593, attn_decoder_loss=0.2407, over 5510754.33 frames. ], batch size: 100, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:20:27,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=635900.0, ans=0.0 +2024-09-19 09:20:36,713 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.53 vs. limit=15.0 +2024-09-19 09:20:37,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=635900.0, ans=0.07 +2024-09-19 09:20:41,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.13 vs. limit=10.0 +2024-09-19 09:20:45,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=635940.0, ans=0.1 +2024-09-19 09:20:49,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=635940.0, ans=0.025 +2024-09-19 09:20:50,745 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.519e+01 9.044e+01 9.582e+01 1.949e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 09:21:08,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.25 vs. limit=15.0 +2024-09-19 09:21:12,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=636020.0, ans=0.2 +2024-09-19 09:21:14,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=636020.0, ans=0.125 +2024-09-19 09:21:18,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=636020.0, ans=0.025 +2024-09-19 09:21:33,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=636060.0, ans=0.2 +2024-09-19 09:21:40,525 INFO [train.py:1198] (1/2) Epoch 36, batch 650, loss[loss=0.2288, ctc_loss=0.1025, cr_loss=0.3264, attn_decoder_loss=0.2356, over 29758.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1157, cr_loss=0.357, attn_decoder_loss=0.2398, over 5587358.68 frames. ], batch size: 81, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:21:47,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.62 vs. limit=15.0 +2024-09-19 09:21:49,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=636100.0, ans=0.125 +2024-09-19 09:21:51,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.97 vs. limit=10.0 +2024-09-19 09:22:02,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=636140.0, ans=0.0 +2024-09-19 09:22:06,608 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:22:10,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=636180.0, ans=0.1 +2024-09-19 09:22:25,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=636180.0, ans=0.125 +2024-09-19 09:22:45,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.53 vs. limit=6.0 +2024-09-19 09:22:55,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.21 vs. limit=22.5 +2024-09-19 09:23:00,742 INFO [train.py:1198] (1/2) Epoch 36, batch 700, loss[loss=0.2332, ctc_loss=0.1117, cr_loss=0.3615, attn_decoder_loss=0.2387, over 29544.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1162, cr_loss=0.3582, attn_decoder_loss=0.2404, over 5637703.30 frames. ], batch size: 76, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:23:05,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=636300.0, ans=0.1 +2024-09-19 09:23:20,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.17 vs. limit=10.0 +2024-09-19 09:23:26,423 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.520e+01 8.919e+01 9.430e+01 1.206e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 09:23:31,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=14.53 vs. limit=22.5 +2024-09-19 09:23:45,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=636420.0, ans=0.125 +2024-09-19 09:23:47,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=636420.0, ans=0.0 +2024-09-19 09:24:00,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=636460.0, ans=0.125 +2024-09-19 09:24:03,884 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.95 vs. limit=15.0 +2024-09-19 09:24:16,393 INFO [train.py:1198] (1/2) Epoch 36, batch 750, loss[loss=0.2436, ctc_loss=0.117, cr_loss=0.3682, attn_decoder_loss=0.2495, over 29700.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.116, cr_loss=0.3575, attn_decoder_loss=0.24, over 5676424.56 frames. ], batch size: 82, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:24:33,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=636540.0, ans=0.1 +2024-09-19 09:24:36,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=636540.0, ans=0.025 +2024-09-19 09:24:42,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=636540.0, ans=0.0 +2024-09-19 09:24:50,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.46 vs. limit=15.0 +2024-09-19 09:25:12,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=636620.0, ans=0.025 +2024-09-19 09:25:29,678 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.57 vs. limit=15.0 +2024-09-19 09:25:31,852 INFO [train.py:1198] (1/2) Epoch 36, batch 800, loss[loss=0.2111, ctc_loss=0.1034, cr_loss=0.3393, attn_decoder_loss=0.2155, over 29622.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1161, cr_loss=0.358, attn_decoder_loss=0.2401, over 5708062.76 frames. ], batch size: 73, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:25:38,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.41 vs. limit=15.0 +2024-09-19 09:25:56,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=636740.0, ans=0.125 +2024-09-19 09:25:57,540 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.447e+01 8.844e+01 9.388e+01 5.453e+02, threshold=1.769e+02, percent-clipped=1.0 +2024-09-19 09:26:25,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=636820.0, ans=0.2 +2024-09-19 09:26:38,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=636860.0, ans=0.0 +2024-09-19 09:26:41,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=636860.0, ans=0.125 +2024-09-19 09:26:52,610 INFO [train.py:1198] (1/2) Epoch 36, batch 850, loss[loss=0.2459, ctc_loss=0.1247, cr_loss=0.3976, attn_decoder_loss=0.2505, over 29711.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1158, cr_loss=0.3576, attn_decoder_loss=0.2398, over 5737016.40 frames. ], batch size: 89, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:27:01,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=636900.0, ans=0.125 +2024-09-19 09:27:17,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.28 vs. limit=12.0 +2024-09-19 09:27:35,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=636980.0, ans=0.125 +2024-09-19 09:27:46,465 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.79 vs. limit=10.0 +2024-09-19 09:27:48,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=637020.0, ans=0.125 +2024-09-19 09:28:02,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=637060.0, ans=0.125 +2024-09-19 09:28:02,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=637060.0, ans=0.0 +2024-09-19 09:28:08,137 INFO [train.py:1198] (1/2) Epoch 36, batch 900, loss[loss=0.2144, ctc_loss=0.1018, cr_loss=0.3259, attn_decoder_loss=0.2196, over 29604.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1157, cr_loss=0.357, attn_decoder_loss=0.2402, over 5741795.34 frames. ], batch size: 73, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:28:18,328 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.40 vs. limit=22.5 +2024-09-19 09:28:34,501 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.83 vs. limit=10.0 +2024-09-19 09:28:35,110 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.474e+01 8.598e+01 8.959e+01 9.567e+01 2.745e+02, threshold=1.792e+02, percent-clipped=2.0 +2024-09-19 09:28:44,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=637180.0, ans=0.09899494936611666 +2024-09-19 09:28:55,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=637220.0, ans=0.125 +2024-09-19 09:28:56,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=637220.0, ans=0.125 +2024-09-19 09:28:58,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=637220.0, ans=0.125 +2024-09-19 09:29:00,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=637220.0, ans=0.125 +2024-09-19 09:29:04,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=637220.0, ans=0.2 +2024-09-19 09:29:23,684 INFO [train.py:1198] (1/2) Epoch 36, batch 950, loss[loss=0.2235, ctc_loss=0.1029, cr_loss=0.3257, attn_decoder_loss=0.2297, over 29488.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1155, cr_loss=0.3565, attn_decoder_loss=0.2403, over 5744737.44 frames. ], batch size: 74, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:29:41,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.00 vs. limit=15.0 +2024-09-19 09:29:42,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=637340.0, ans=0.1 +2024-09-19 09:29:50,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.96 vs. limit=10.0 +2024-09-19 09:30:13,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=637420.0, ans=0.125 +2024-09-19 09:30:28,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=7.39 vs. limit=12.0 +2024-09-19 09:30:36,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=637460.0, ans=0.125 +2024-09-19 09:30:43,636 INFO [train.py:1198] (1/2) Epoch 36, batch 1000, loss[loss=0.236, ctc_loss=0.1219, cr_loss=0.3722, attn_decoder_loss=0.2404, over 29539.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1165, cr_loss=0.3582, attn_decoder_loss=0.241, over 5737221.78 frames. ], batch size: 77, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:30:43,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=637500.0, ans=0.125 +2024-09-19 09:31:03,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=637540.0, ans=0.125 +2024-09-19 09:31:11,034 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.580e+01 9.134e+01 9.845e+01 2.020e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 09:31:50,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=637660.0, ans=0.125 +2024-09-19 09:31:54,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=637660.0, ans=0.0 +2024-09-19 09:31:59,734 INFO [train.py:1198] (1/2) Epoch 36, batch 1050, loss[loss=0.2337, ctc_loss=0.1117, cr_loss=0.3559, attn_decoder_loss=0.2393, over 29670.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1159, cr_loss=0.3573, attn_decoder_loss=0.2401, over 5746955.02 frames. ], batch size: 85, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:32:32,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-19 09:32:59,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=637860.0, ans=0.125 +2024-09-19 09:33:07,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=637860.0, ans=0.0 +2024-09-19 09:33:15,958 INFO [train.py:1198] (1/2) Epoch 36, batch 1100, loss[loss=0.2318, ctc_loss=0.1154, cr_loss=0.3314, attn_decoder_loss=0.2373, over 29460.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1154, cr_loss=0.3559, attn_decoder_loss=0.2397, over 5758668.12 frames. ], batch size: 78, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:33:40,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=637940.0, ans=0.2 +2024-09-19 09:33:43,101 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.551e+01 8.368e+01 8.851e+01 9.380e+01 2.140e+02, threshold=1.770e+02, percent-clipped=1.0 +2024-09-19 09:33:48,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=637980.0, ans=0.125 +2024-09-19 09:33:55,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=637980.0, ans=0.125 +2024-09-19 09:34:06,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=638020.0, ans=10.0 +2024-09-19 09:34:35,865 INFO [train.py:1198] (1/2) Epoch 36, batch 1150, loss[loss=0.234, ctc_loss=0.1132, cr_loss=0.3429, attn_decoder_loss=0.2398, over 29459.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1159, cr_loss=0.3568, attn_decoder_loss=0.2401, over 5757345.14 frames. ], batch size: 78, lr: 3.08e-03, grad_scale: 8.0 +2024-09-19 09:34:37,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=638100.0, ans=0.125 +2024-09-19 09:34:42,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=638100.0, ans=0.125 +2024-09-19 09:34:45,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=638100.0, ans=0.2 +2024-09-19 09:34:46,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=638100.0, ans=0.2 +2024-09-19 09:34:51,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=638140.0, ans=0.1 +2024-09-19 09:34:54,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.39 vs. limit=15.0 +2024-09-19 09:35:00,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=638140.0, ans=0.125 +2024-09-19 09:35:00,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=638140.0, ans=0.1 +2024-09-19 09:35:10,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.42 vs. limit=15.0 +2024-09-19 09:35:11,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=638180.0, ans=0.1 +2024-09-19 09:35:18,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.77 vs. limit=12.0 +2024-09-19 09:35:38,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=638260.0, ans=0.125 +2024-09-19 09:35:46,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=638260.0, ans=0.0 +2024-09-19 09:35:51,885 INFO [train.py:1198] (1/2) Epoch 36, batch 1200, loss[loss=0.2515, ctc_loss=0.1244, cr_loss=0.3686, attn_decoder_loss=0.2574, over 29696.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1165, cr_loss=0.3575, attn_decoder_loss=0.241, over 5749290.30 frames. ], batch size: 85, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:36:10,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=638340.0, ans=0.125 +2024-09-19 09:36:19,076 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.768e+01 8.630e+01 9.163e+01 9.879e+01 2.531e+02, threshold=1.833e+02, percent-clipped=3.0 +2024-09-19 09:36:25,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=638380.0, ans=0.0 +2024-09-19 09:36:32,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=638380.0, ans=0.1 +2024-09-19 09:36:40,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.45 vs. limit=15.0 +2024-09-19 09:37:08,426 INFO [train.py:1198] (1/2) Epoch 36, batch 1250, loss[loss=0.2472, ctc_loss=0.1302, cr_loss=0.3954, attn_decoder_loss=0.2514, over 29510.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.117, cr_loss=0.3589, attn_decoder_loss=0.2416, over 5776197.72 frames. ], batch size: 92, lr: 3.08e-03, grad_scale: 16.0 +2024-09-19 09:37:11,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=638500.0, ans=0.0 +2024-09-19 09:37:37,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=638580.0, ans=0.125 +2024-09-19 09:37:50,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.47 vs. limit=5.0 +2024-09-19 09:37:55,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=638620.0, ans=0.125 +2024-09-19 09:38:10,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.99 vs. limit=12.0 +2024-09-19 09:38:14,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=638660.0, ans=0.125 +2024-09-19 09:38:26,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=638660.0, ans=0.125 +2024-09-19 09:38:29,053 INFO [train.py:1198] (1/2) Epoch 36, batch 1300, loss[loss=0.2455, ctc_loss=0.12, cr_loss=0.3574, attn_decoder_loss=0.2515, over 28172.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1166, cr_loss=0.3583, attn_decoder_loss=0.241, over 5779619.06 frames. ], batch size: 111, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:38:35,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=638700.0, ans=0.95 +2024-09-19 09:38:52,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=638740.0, ans=0.2 +2024-09-19 09:38:56,431 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.178e+01 8.817e+01 9.661e+01 1.409e+02, threshold=1.763e+02, percent-clipped=0.0 +2024-09-19 09:39:07,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=638780.0, ans=0.025 +2024-09-19 09:39:17,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.18 vs. limit=22.5 +2024-09-19 09:39:30,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=638860.0, ans=0.1 +2024-09-19 09:39:31,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=638860.0, ans=0.025 +2024-09-19 09:39:45,480 INFO [train.py:1198] (1/2) Epoch 36, batch 1350, loss[loss=0.2369, ctc_loss=0.1087, cr_loss=0.3499, attn_decoder_loss=0.2434, over 29750.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1161, cr_loss=0.3572, attn_decoder_loss=0.2407, over 5795258.95 frames. ], batch size: 81, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:39:50,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=638900.0, ans=0.1 +2024-09-19 09:39:53,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=638900.0, ans=0.125 +2024-09-19 09:40:14,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.22 vs. limit=15.0 +2024-09-19 09:40:17,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=638980.0, ans=0.125 +2024-09-19 09:40:29,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=639020.0, ans=0.125 +2024-09-19 09:40:29,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=639020.0, ans=0.125 +2024-09-19 09:40:32,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=639020.0, ans=0.0 +2024-09-19 09:40:33,021 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.24 vs. limit=15.0 +2024-09-19 09:40:35,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=639020.0, ans=0.0 +2024-09-19 09:40:46,827 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.74 vs. limit=15.0 +2024-09-19 09:40:53,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=639060.0, ans=0.1 +2024-09-19 09:41:00,602 INFO [train.py:1198] (1/2) Epoch 36, batch 1400, loss[loss=0.2064, ctc_loss=0.09397, cr_loss=0.3096, attn_decoder_loss=0.2121, over 29585.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.116, cr_loss=0.3573, attn_decoder_loss=0.2403, over 5806735.86 frames. ], batch size: 69, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:41:15,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=639140.0, ans=0.125 +2024-09-19 09:41:18,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=639140.0, ans=0.1 +2024-09-19 09:41:27,784 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.347e+01 8.417e+01 9.024e+01 9.500e+01 1.848e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-19 09:41:35,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=639180.0, ans=0.125 +2024-09-19 09:42:02,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=639260.0, ans=0.025 +2024-09-19 09:42:18,900 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.33 vs. limit=12.0 +2024-09-19 09:42:20,993 INFO [train.py:1198] (1/2) Epoch 36, batch 1450, loss[loss=0.2461, ctc_loss=0.1249, cr_loss=0.3582, attn_decoder_loss=0.2516, over 29457.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1161, cr_loss=0.357, attn_decoder_loss=0.2407, over 5804129.38 frames. ], batch size: 94, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:42:37,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=639340.0, ans=0.125 +2024-09-19 09:42:46,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=639340.0, ans=0.0 +2024-09-19 09:42:47,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.24 vs. limit=12.0 +2024-09-19 09:43:21,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=639460.0, ans=0.2 +2024-09-19 09:43:24,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=639460.0, ans=0.0 +2024-09-19 09:43:36,445 INFO [train.py:1198] (1/2) Epoch 36, batch 1500, loss[loss=0.2364, ctc_loss=0.1125, cr_loss=0.3574, attn_decoder_loss=0.2422, over 29645.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1161, cr_loss=0.357, attn_decoder_loss=0.2409, over 5805335.60 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:43:57,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=639540.0, ans=0.0 +2024-09-19 09:44:03,553 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.409e+01 8.635e+01 9.112e+01 9.549e+01 2.206e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 09:44:11,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=7.05 vs. limit=15.0 +2024-09-19 09:44:16,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=639580.0, ans=0.0 +2024-09-19 09:44:26,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=639620.0, ans=0.2 +2024-09-19 09:44:32,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=639620.0, ans=0.125 +2024-09-19 09:44:34,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=639620.0, ans=0.125 +2024-09-19 09:44:40,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=639660.0, ans=0.125 +2024-09-19 09:44:44,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-19 09:44:52,262 INFO [train.py:1198] (1/2) Epoch 36, batch 1550, loss[loss=0.2593, ctc_loss=0.1366, cr_loss=0.4165, attn_decoder_loss=0.2637, over 29524.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1166, cr_loss=0.3584, attn_decoder_loss=0.2412, over 5781225.00 frames. ], batch size: 90, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:44:54,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=639700.0, ans=0.035 +2024-09-19 09:44:54,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=639700.0, ans=0.125 +2024-09-19 09:45:01,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=639700.0, ans=0.125 +2024-09-19 09:45:03,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=639700.0, ans=0.0 +2024-09-19 09:45:13,731 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:45:46,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.14 vs. limit=22.5 +2024-09-19 09:45:50,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=639820.0, ans=0.025 +2024-09-19 09:46:00,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=639860.0, ans=0.04949747468305833 +2024-09-19 09:46:11,768 INFO [train.py:1198] (1/2) Epoch 36, batch 1600, loss[loss=0.2441, ctc_loss=0.1206, cr_loss=0.3732, attn_decoder_loss=0.2495, over 29658.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1164, cr_loss=0.3579, attn_decoder_loss=0.241, over 5764159.46 frames. ], batch size: 85, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:46:18,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.48 vs. limit=15.0 +2024-09-19 09:46:42,000 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.502e+01 8.623e+01 9.307e+01 9.759e+01 1.491e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-19 09:46:43,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.28 vs. limit=15.0 +2024-09-19 09:46:59,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.31 vs. limit=15.0 +2024-09-19 09:47:01,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=639980.0, ans=0.125 +2024-09-19 09:47:11,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=640020.0, ans=0.0 +2024-09-19 09:47:34,966 INFO [train.py:1198] (1/2) Epoch 36, batch 1650, loss[loss=0.247, ctc_loss=0.1214, cr_loss=0.3694, attn_decoder_loss=0.2528, over 29735.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1164, cr_loss=0.3578, attn_decoder_loss=0.2409, over 5758348.96 frames. ], batch size: 89, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:47:41,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=640100.0, ans=0.125 +2024-09-19 09:47:42,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=640100.0, ans=0.0 +2024-09-19 09:47:42,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=640100.0, ans=0.125 +2024-09-19 09:47:54,748 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:47:59,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=640140.0, ans=0.0 +2024-09-19 09:48:16,097 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:48:22,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=640220.0, ans=0.0 +2024-09-19 09:48:44,474 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:48:50,206 INFO [train.py:1198] (1/2) Epoch 36, batch 1700, loss[loss=0.211, ctc_loss=0.09741, cr_loss=0.3116, attn_decoder_loss=0.2167, over 29575.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1163, cr_loss=0.3576, attn_decoder_loss=0.2407, over 5780386.92 frames. ], batch size: 69, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:48:59,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=640300.0, ans=0.125 +2024-09-19 09:49:01,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.90 vs. limit=15.0 +2024-09-19 09:49:13,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=640340.0, ans=0.95 +2024-09-19 09:49:20,274 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.508e+01 8.971e+01 9.480e+01 1.290e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 09:49:42,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=640420.0, ans=0.125 +2024-09-19 09:49:53,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=640460.0, ans=0.2 +2024-09-19 09:50:03,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=640460.0, ans=0.125 +2024-09-19 09:50:03,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=640460.0, ans=0.125 +2024-09-19 09:50:10,193 INFO [train.py:1198] (1/2) Epoch 36, batch 1750, loss[loss=0.2025, ctc_loss=0.09049, cr_loss=0.3031, attn_decoder_loss=0.2082, over 29342.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.116, cr_loss=0.357, attn_decoder_loss=0.2406, over 5787776.84 frames. ], batch size: 67, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:50:11,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=640500.0, ans=0.1 +2024-09-19 09:50:11,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=640500.0, ans=0.05 +2024-09-19 09:50:20,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.65 vs. limit=22.5 +2024-09-19 09:50:26,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.76 vs. limit=22.5 +2024-09-19 09:50:44,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=640580.0, ans=0.025 +2024-09-19 09:51:03,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=640620.0, ans=0.0 +2024-09-19 09:51:03,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=640620.0, ans=0.0 +2024-09-19 09:51:11,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=640660.0, ans=0.0 +2024-09-19 09:51:13,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.23 vs. limit=6.0 +2024-09-19 09:51:26,090 INFO [train.py:1198] (1/2) Epoch 36, batch 1800, loss[loss=0.2527, ctc_loss=0.1287, cr_loss=0.3888, attn_decoder_loss=0.2578, over 29677.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1163, cr_loss=0.3581, attn_decoder_loss=0.2409, over 5790785.20 frames. ], batch size: 83, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:51:45,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.55 vs. limit=6.0 +2024-09-19 09:51:47,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=640740.0, ans=0.125 +2024-09-19 09:51:49,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=640740.0, ans=0.125 +2024-09-19 09:51:56,469 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.495e+01 8.489e+01 9.081e+01 9.519e+01 1.920e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 09:51:56,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=640780.0, ans=0.125 +2024-09-19 09:52:02,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.13 vs. limit=15.0 +2024-09-19 09:52:05,673 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.81 vs. limit=10.0 +2024-09-19 09:52:18,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=640820.0, ans=0.125 +2024-09-19 09:52:19,161 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.48 vs. limit=22.5 +2024-09-19 09:52:20,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=640820.0, ans=0.125 +2024-09-19 09:52:42,685 INFO [train.py:1198] (1/2) Epoch 36, batch 1850, loss[loss=0.2469, ctc_loss=0.1201, cr_loss=0.3763, attn_decoder_loss=0.2527, over 29623.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1159, cr_loss=0.3572, attn_decoder_loss=0.2406, over 5795070.81 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:52:48,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=640900.0, ans=0.1 +2024-09-19 09:53:14,255 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.86 vs. limit=5.0 +2024-09-19 09:53:19,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=640980.0, ans=0.125 +2024-09-19 09:53:22,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=640980.0, ans=0.2 +2024-09-19 09:53:42,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.max_positive, batch_count=641020.0, ans=0.95 +2024-09-19 09:53:44,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=641060.0, ans=0.125 +2024-09-19 09:53:51,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=641060.0, ans=0.125 +2024-09-19 09:53:53,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=641060.0, ans=0.125 +2024-09-19 09:53:53,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=641060.0, ans=0.0 +2024-09-19 09:54:00,521 INFO [train.py:1198] (1/2) Epoch 36, batch 1900, loss[loss=0.2494, ctc_loss=0.1181, cr_loss=0.3793, attn_decoder_loss=0.2556, over 29696.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1156, cr_loss=0.3566, attn_decoder_loss=0.2407, over 5802953.49 frames. ], batch size: 89, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:54:13,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=641100.0, ans=0.125 +2024-09-19 09:54:15,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.73 vs. limit=15.0 +2024-09-19 09:54:30,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=641140.0, ans=0.2 +2024-09-19 09:54:31,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=641180.0, ans=0.125 +2024-09-19 09:54:33,086 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.812e+01 8.610e+01 8.955e+01 9.499e+01 1.383e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 09:54:44,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=641180.0, ans=0.07 +2024-09-19 09:54:48,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=641220.0, ans=0.025 +2024-09-19 09:54:51,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=641220.0, ans=0.125 +2024-09-19 09:55:18,518 INFO [train.py:1198] (1/2) Epoch 36, batch 1950, loss[loss=0.2448, ctc_loss=0.124, cr_loss=0.3826, attn_decoder_loss=0.2497, over 29429.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1159, cr_loss=0.3575, attn_decoder_loss=0.2415, over 5818225.56 frames. ], batch size: 78, lr: 3.07e-03, grad_scale: 8.0 +2024-09-19 09:55:43,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=10.97 vs. limit=15.0 +2024-09-19 09:55:57,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=641380.0, ans=0.125 +2024-09-19 09:56:12,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=641420.0, ans=0.1 +2024-09-19 09:56:26,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=641460.0, ans=0.125 +2024-09-19 09:56:29,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=641460.0, ans=0.2 +2024-09-19 09:56:33,539 INFO [train.py:1198] (1/2) Epoch 36, batch 2000, loss[loss=0.2223, ctc_loss=0.108, cr_loss=0.3446, attn_decoder_loss=0.2274, over 29358.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1166, cr_loss=0.3592, attn_decoder_loss=0.242, over 5795870.77 frames. ], batch size: 67, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:56:36,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=641500.0, ans=0.2 +2024-09-19 09:56:46,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=641500.0, ans=0.1 +2024-09-19 09:57:04,113 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.346e+01 8.610e+01 8.991e+01 9.571e+01 3.322e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-19 09:57:06,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=641580.0, ans=0.025 +2024-09-19 09:57:38,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=641660.0, ans=0.0 +2024-09-19 09:57:40,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=641660.0, ans=0.125 +2024-09-19 09:57:48,071 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:57:52,157 INFO [train.py:1198] (1/2) Epoch 36, batch 2050, loss[loss=0.2124, ctc_loss=0.09843, cr_loss=0.3315, attn_decoder_loss=0.2177, over 29441.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1162, cr_loss=0.3579, attn_decoder_loss=0.241, over 5786809.95 frames. ], batch size: 70, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:58:02,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.18 vs. limit=12.0 +2024-09-19 09:58:11,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=641740.0, ans=0.1 +2024-09-19 09:58:29,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=641780.0, ans=0.0 +2024-09-19 09:59:09,451 INFO [train.py:1198] (1/2) Epoch 36, batch 2100, loss[loss=0.2286, ctc_loss=0.111, cr_loss=0.3406, attn_decoder_loss=0.2341, over 29781.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.116, cr_loss=0.3577, attn_decoder_loss=0.2407, over 5798977.81 frames. ], batch size: 81, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 09:59:15,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.64 vs. limit=15.0 +2024-09-19 09:59:20,258 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 09:59:21,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=641900.0, ans=0.0 +2024-09-19 09:59:39,341 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.566e+01 8.432e+01 8.828e+01 9.578e+01 1.169e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 10:00:01,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.95 vs. limit=12.0 +2024-09-19 10:00:12,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=642060.0, ans=0.125 +2024-09-19 10:00:17,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=642060.0, ans=0.2 +2024-09-19 10:00:24,374 INFO [train.py:1198] (1/2) Epoch 36, batch 2150, loss[loss=0.229, ctc_loss=0.1158, cr_loss=0.3607, attn_decoder_loss=0.2336, over 29448.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1152, cr_loss=0.3557, attn_decoder_loss=0.2398, over 5814500.80 frames. ], batch size: 78, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:00:47,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=642140.0, ans=0.125 +2024-09-19 10:00:55,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=642180.0, ans=0.125 +2024-09-19 10:01:14,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=642220.0, ans=0.1 +2024-09-19 10:01:25,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=642260.0, ans=0.025 +2024-09-19 10:01:30,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=642260.0, ans=0.0 +2024-09-19 10:01:42,330 INFO [train.py:1198] (1/2) Epoch 36, batch 2200, loss[loss=0.2494, ctc_loss=0.1286, cr_loss=0.3976, attn_decoder_loss=0.254, over 29625.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1155, cr_loss=0.3562, attn_decoder_loss=0.2401, over 5810580.53 frames. ], batch size: 86, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:01:44,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=642300.0, ans=0.125 +2024-09-19 10:01:50,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=642300.0, ans=10.0 +2024-09-19 10:01:57,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=642340.0, ans=0.125 +2024-09-19 10:02:07,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=642340.0, ans=0.1 +2024-09-19 10:02:14,558 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.653e+01 8.688e+01 9.104e+01 9.664e+01 2.107e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 10:02:23,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.04 vs. limit=6.0 +2024-09-19 10:02:33,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=642420.0, ans=0.125 +2024-09-19 10:02:34,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=642420.0, ans=0.125 +2024-09-19 10:02:45,659 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:02:50,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.64 vs. limit=15.0 +2024-09-19 10:02:54,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=642460.0, ans=0.125 +2024-09-19 10:03:00,207 INFO [train.py:1198] (1/2) Epoch 36, batch 2250, loss[loss=0.2423, ctc_loss=0.1188, cr_loss=0.3652, attn_decoder_loss=0.2479, over 29721.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1148, cr_loss=0.3553, attn_decoder_loss=0.2399, over 5809735.09 frames. ], batch size: 82, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:03:14,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=642540.0, ans=0.025 +2024-09-19 10:03:16,392 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.25 vs. limit=15.0 +2024-09-19 10:03:20,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.72 vs. limit=15.0 +2024-09-19 10:03:21,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=642540.0, ans=0.1 +2024-09-19 10:04:15,306 INFO [train.py:1198] (1/2) Epoch 36, batch 2300, loss[loss=0.2063, ctc_loss=0.09486, cr_loss=0.3102, attn_decoder_loss=0.2118, over 29323.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1146, cr_loss=0.3544, attn_decoder_loss=0.2391, over 5797742.25 frames. ], batch size: 71, lr: 3.07e-03, grad_scale: 16.0 +2024-09-19 10:04:26,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=642700.0, ans=0.035 +2024-09-19 10:04:46,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.402e+01 9.082e+01 9.464e+01 1.800e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 10:05:33,288 INFO [train.py:1198] (1/2) Epoch 36, batch 2350, loss[loss=0.2385, ctc_loss=0.1167, cr_loss=0.3602, attn_decoder_loss=0.244, over 29695.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1148, cr_loss=0.3556, attn_decoder_loss=0.2394, over 5803615.58 frames. ], batch size: 83, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:05:35,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=642900.0, ans=0.125 +2024-09-19 10:05:42,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=642900.0, ans=0.125 +2024-09-19 10:06:22,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=643020.0, ans=0.125 +2024-09-19 10:06:34,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=643060.0, ans=0.2 +2024-09-19 10:06:38,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=643060.0, ans=0.0 +2024-09-19 10:06:46,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=643060.0, ans=0.2 +2024-09-19 10:06:50,474 INFO [train.py:1198] (1/2) Epoch 36, batch 2400, loss[loss=0.2267, ctc_loss=0.1118, cr_loss=0.3489, attn_decoder_loss=0.2317, over 29547.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1157, cr_loss=0.3576, attn_decoder_loss=0.24, over 5807181.07 frames. ], batch size: 76, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:06:51,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.14 vs. limit=6.0 +2024-09-19 10:06:57,579 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.28 vs. limit=22.5 +2024-09-19 10:07:10,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=643140.0, ans=0.0 +2024-09-19 10:07:15,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=643140.0, ans=0.125 +2024-09-19 10:07:22,299 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.644e+01 9.234e+01 9.836e+01 2.155e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-19 10:07:24,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=643180.0, ans=0.125 +2024-09-19 10:07:25,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=643180.0, ans=0.1 +2024-09-19 10:07:26,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=643180.0, ans=15.0 +2024-09-19 10:07:30,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=643180.0, ans=0.2 +2024-09-19 10:07:30,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=643180.0, ans=0.125 +2024-09-19 10:07:35,242 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:07:36,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=643220.0, ans=0.0 +2024-09-19 10:07:40,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.59 vs. limit=15.0 +2024-09-19 10:07:50,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=643260.0, ans=0.2 +2024-09-19 10:08:01,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=643260.0, ans=0.125 +2024-09-19 10:08:07,033 INFO [train.py:1198] (1/2) Epoch 36, batch 2450, loss[loss=0.2394, ctc_loss=0.1198, cr_loss=0.3678, attn_decoder_loss=0.2445, over 29686.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1163, cr_loss=0.3583, attn_decoder_loss=0.241, over 5782390.73 frames. ], batch size: 82, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:08:07,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=643300.0, ans=0.2 +2024-09-19 10:08:08,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=643300.0, ans=0.0 +2024-09-19 10:08:22,442 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:08:39,969 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.77 vs. limit=5.0 +2024-09-19 10:08:46,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=643380.0, ans=0.0 +2024-09-19 10:09:00,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=643420.0, ans=0.125 +2024-09-19 10:09:05,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=643420.0, ans=0.0 +2024-09-19 10:09:06,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=643420.0, ans=0.125 +2024-09-19 10:09:22,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.96 vs. limit=15.0 +2024-09-19 10:09:22,481 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=19.05 vs. limit=22.5 +2024-09-19 10:09:24,637 INFO [train.py:1198] (1/2) Epoch 36, batch 2500, loss[loss=0.2432, ctc_loss=0.1063, cr_loss=0.3243, attn_decoder_loss=0.2512, over 29621.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1161, cr_loss=0.358, attn_decoder_loss=0.2407, over 5793148.31 frames. ], batch size: 86, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:09:26,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=643500.0, ans=0.1 +2024-09-19 10:09:29,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=643500.0, ans=0.125 +2024-09-19 10:09:50,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=643540.0, ans=0.125 +2024-09-19 10:09:58,826 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.618e+01 8.994e+01 9.637e+01 2.222e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-19 10:10:05,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=643580.0, ans=0.125 +2024-09-19 10:10:17,327 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:10:24,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=643620.0, ans=0.0 +2024-09-19 10:10:42,727 INFO [train.py:1198] (1/2) Epoch 36, batch 2550, loss[loss=0.2075, ctc_loss=0.0944, cr_loss=0.3296, attn_decoder_loss=0.2127, over 29341.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.116, cr_loss=0.3575, attn_decoder_loss=0.2407, over 5796165.40 frames. ], batch size: 67, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:10:43,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=643700.0, ans=0.1 +2024-09-19 10:10:47,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=643700.0, ans=0.125 +2024-09-19 10:10:48,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=643700.0, ans=0.0 +2024-09-19 10:10:59,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.89 vs. limit=15.0 +2024-09-19 10:11:16,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.95 vs. limit=15.0 +2024-09-19 10:11:24,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=643780.0, ans=0.125 +2024-09-19 10:11:58,150 INFO [train.py:1198] (1/2) Epoch 36, batch 2600, loss[loss=0.2313, ctc_loss=0.1044, cr_loss=0.3425, attn_decoder_loss=0.2378, over 29439.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1164, cr_loss=0.3585, attn_decoder_loss=0.2411, over 5794061.82 frames. ], batch size: 78, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:12:03,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=643900.0, ans=0.125 +2024-09-19 10:12:24,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=643940.0, ans=0.125 +2024-09-19 10:12:31,417 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.615e+01 9.147e+01 9.711e+01 1.347e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 10:12:35,106 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.60 vs. limit=12.0 +2024-09-19 10:12:53,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=644020.0, ans=0.0 +2024-09-19 10:12:58,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=644020.0, ans=0.0 +2024-09-19 10:13:06,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.29 vs. limit=15.0 +2024-09-19 10:13:08,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=644060.0, ans=0.0 +2024-09-19 10:13:16,049 INFO [train.py:1198] (1/2) Epoch 36, batch 2650, loss[loss=0.2534, ctc_loss=0.1312, cr_loss=0.3937, attn_decoder_loss=0.2582, over 29282.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1163, cr_loss=0.3584, attn_decoder_loss=0.2413, over 5801736.41 frames. ], batch size: 100, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:13:22,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=644100.0, ans=0.125 +2024-09-19 10:13:22,981 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.46 vs. limit=15.0 +2024-09-19 10:13:23,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=644100.0, ans=0.125 +2024-09-19 10:13:28,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=644100.0, ans=0.125 +2024-09-19 10:13:54,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=644180.0, ans=0.125 +2024-09-19 10:14:06,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=644220.0, ans=0.0 +2024-09-19 10:14:06,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.22 vs. limit=22.5 +2024-09-19 10:14:07,188 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.10 vs. limit=15.0 +2024-09-19 10:14:12,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=644220.0, ans=0.05 +2024-09-19 10:14:21,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=644260.0, ans=0.125 +2024-09-19 10:14:33,360 INFO [train.py:1198] (1/2) Epoch 36, batch 2700, loss[loss=0.2326, ctc_loss=0.1048, cr_loss=0.3231, attn_decoder_loss=0.2397, over 29519.00 frames. ], tot_loss[loss=0.2361, ctc_loss=0.1161, cr_loss=0.3582, attn_decoder_loss=0.2414, over 5796020.75 frames. ], batch size: 87, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:14:49,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=644340.0, ans=0.125 +2024-09-19 10:15:06,280 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.558e+01 9.078e+01 9.683e+01 1.491e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 10:15:22,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=15.0 +2024-09-19 10:15:29,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=644420.0, ans=0.0 +2024-09-19 10:15:30,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.84 vs. limit=15.0 +2024-09-19 10:15:33,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=644460.0, ans=0.0 +2024-09-19 10:15:38,898 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.62 vs. limit=15.0 +2024-09-19 10:15:48,818 INFO [train.py:1198] (1/2) Epoch 36, batch 2750, loss[loss=0.2349, ctc_loss=0.1233, cr_loss=0.3877, attn_decoder_loss=0.2387, over 29506.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1157, cr_loss=0.357, attn_decoder_loss=0.2404, over 5793674.58 frames. ], batch size: 75, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:16:04,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=644540.0, ans=10.0 +2024-09-19 10:16:53,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.43 vs. limit=15.0 +2024-09-19 10:17:06,642 INFO [train.py:1198] (1/2) Epoch 36, batch 2800, loss[loss=0.255, ctc_loss=0.1485, cr_loss=0.4033, attn_decoder_loss=0.2579, over 20326.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.116, cr_loss=0.3571, attn_decoder_loss=0.2406, over 5774572.62 frames. ], batch size: 210, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:17:18,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=644700.0, ans=0.0 +2024-09-19 10:17:28,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=644740.0, ans=0.125 +2024-09-19 10:17:38,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=644780.0, ans=0.0 +2024-09-19 10:17:43,407 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.665e+01 8.452e+01 9.019e+01 9.554e+01 2.850e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-19 10:17:47,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.35 vs. limit=15.0 +2024-09-19 10:18:08,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=644860.0, ans=0.1 +2024-09-19 10:18:11,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=644860.0, ans=0.125 +2024-09-19 10:18:13,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.59 vs. limit=22.5 +2024-09-19 10:18:22,256 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.23 vs. limit=15.0 +2024-09-19 10:18:24,434 INFO [train.py:1198] (1/2) Epoch 36, batch 2850, loss[loss=0.2279, ctc_loss=0.1106, cr_loss=0.3346, attn_decoder_loss=0.2334, over 29485.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1162, cr_loss=0.3567, attn_decoder_loss=0.2408, over 5760315.83 frames. ], batch size: 77, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:18:29,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=644900.0, ans=0.125 +2024-09-19 10:18:30,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=644900.0, ans=0.0 +2024-09-19 10:18:35,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=644900.0, ans=0.0 +2024-09-19 10:18:38,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=644940.0, ans=0.0 +2024-09-19 10:19:14,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=645020.0, ans=0.2 +2024-09-19 10:19:14,997 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.45 vs. limit=15.0 +2024-09-19 10:19:23,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=645060.0, ans=0.1 +2024-09-19 10:19:23,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=645060.0, ans=0.2 +2024-09-19 10:19:32,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=645060.0, ans=0.95 +2024-09-19 10:19:32,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=645060.0, ans=0.125 +2024-09-19 10:19:39,851 INFO [train.py:1198] (1/2) Epoch 36, batch 2900, loss[loss=0.238, ctc_loss=0.1227, cr_loss=0.3722, attn_decoder_loss=0.2425, over 29431.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1168, cr_loss=0.3583, attn_decoder_loss=0.2419, over 5786444.66 frames. ], batch size: 79, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:19:59,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=645140.0, ans=0.0 +2024-09-19 10:20:14,869 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.477e+01 8.534e+01 8.969e+01 9.435e+01 1.794e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 10:20:24,984 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:20:41,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=645260.0, ans=0.1 +2024-09-19 10:20:57,804 INFO [train.py:1198] (1/2) Epoch 36, batch 2950, loss[loss=0.2218, ctc_loss=0.1053, cr_loss=0.3454, attn_decoder_loss=0.2271, over 29509.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1156, cr_loss=0.356, attn_decoder_loss=0.2405, over 5782576.26 frames. ], batch size: 75, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:21:04,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=645300.0, ans=0.2 +2024-09-19 10:21:06,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.73 vs. limit=12.0 +2024-09-19 10:21:11,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=645340.0, ans=0.1 +2024-09-19 10:21:12,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.20 vs. limit=15.0 +2024-09-19 10:22:15,282 INFO [train.py:1198] (1/2) Epoch 36, batch 3000, loss[loss=0.2379, ctc_loss=0.1099, cr_loss=0.3549, attn_decoder_loss=0.2442, over 29752.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1157, cr_loss=0.3562, attn_decoder_loss=0.2403, over 5783198.54 frames. ], batch size: 81, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:22:15,282 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 10:22:33,133 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.4.self_attn_weights, attn_weights_entropy = tensor([3.7443, 3.2396, 2.7017, 3.4968, 3.0432, 2.3886, 2.7009, 3.0437], + device='cuda:1') +2024-09-19 10:22:33,842 INFO [train.py:1230] (1/2) Epoch 36, validation: loss=0.212, ctc_loss=0.03671, cr_loss=5.93e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 10:22:33,842 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 10:22:38,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=645500.0, ans=0.0 +2024-09-19 10:22:40,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=645500.0, ans=0.09899494936611666 +2024-09-19 10:22:43,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=645500.0, ans=0.2 +2024-09-19 10:22:54,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=645540.0, ans=15.0 +2024-09-19 10:23:07,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=645580.0, ans=0.125 +2024-09-19 10:23:08,441 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.539e+01 8.660e+01 9.002e+01 9.609e+01 4.841e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 10:23:15,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=645580.0, ans=0.025 +2024-09-19 10:23:24,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=645620.0, ans=0.0 +2024-09-19 10:23:28,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=645620.0, ans=0.125 +2024-09-19 10:23:48,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=645700.0, ans=0.0 +2024-09-19 10:23:49,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.59 vs. limit=15.0 +2024-09-19 10:23:50,006 INFO [train.py:1198] (1/2) Epoch 36, batch 3050, loss[loss=0.2208, ctc_loss=0.1023, cr_loss=0.3352, attn_decoder_loss=0.2265, over 29543.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1164, cr_loss=0.3578, attn_decoder_loss=0.2413, over 5777031.19 frames. ], batch size: 76, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:23:52,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.36 vs. limit=15.0 +2024-09-19 10:24:01,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=645700.0, ans=0.025 +2024-09-19 10:24:10,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=645740.0, ans=0.125 +2024-09-19 10:24:24,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=645780.0, ans=0.0 +2024-09-19 10:24:26,165 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.16 vs. limit=15.0 +2024-09-19 10:24:37,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=645820.0, ans=0.125 +2024-09-19 10:24:38,614 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.46 vs. limit=22.5 +2024-09-19 10:24:52,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.96 vs. limit=22.5 +2024-09-19 10:24:59,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=645860.0, ans=0.125 +2024-09-19 10:25:07,755 INFO [train.py:1198] (1/2) Epoch 36, batch 3100, loss[loss=0.2616, ctc_loss=0.1431, cr_loss=0.4093, attn_decoder_loss=0.2657, over 29262.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1164, cr_loss=0.3574, attn_decoder_loss=0.241, over 5777979.18 frames. ], batch size: 100, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:25:24,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=645940.0, ans=0.125 +2024-09-19 10:25:33,248 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.90 vs. limit=15.0 +2024-09-19 10:25:34,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=645940.0, ans=0.125 +2024-09-19 10:25:40,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=645980.0, ans=0.0 +2024-09-19 10:25:44,450 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.450e+01 9.039e+01 9.711e+01 1.761e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 10:25:45,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.97 vs. limit=15.0 +2024-09-19 10:26:16,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=646060.0, ans=0.125 +2024-09-19 10:26:17,111 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.10 vs. limit=12.0 +2024-09-19 10:26:25,345 INFO [train.py:1198] (1/2) Epoch 36, batch 3150, loss[loss=0.2513, ctc_loss=0.1237, cr_loss=0.3657, attn_decoder_loss=0.2574, over 28793.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1164, cr_loss=0.3575, attn_decoder_loss=0.2411, over 5783674.00 frames. ], batch size: 104, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:26:54,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=646180.0, ans=0.125 +2024-09-19 10:27:07,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=646180.0, ans=0.125 +2024-09-19 10:27:22,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=646220.0, ans=0.5 +2024-09-19 10:27:36,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=646260.0, ans=0.125 +2024-09-19 10:27:39,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=646300.0, ans=0.1 +2024-09-19 10:27:40,721 INFO [train.py:1198] (1/2) Epoch 36, batch 3200, loss[loss=0.2345, ctc_loss=0.1157, cr_loss=0.3382, attn_decoder_loss=0.2402, over 29401.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1161, cr_loss=0.3566, attn_decoder_loss=0.2406, over 5793608.00 frames. ], batch size: 79, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:27:55,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.88 vs. limit=6.0 +2024-09-19 10:27:59,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=646340.0, ans=0.125 +2024-09-19 10:28:06,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=646340.0, ans=0.0 +2024-09-19 10:28:09,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=646340.0, ans=0.1 +2024-09-19 10:28:17,946 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.484e+01 8.478e+01 9.056e+01 9.805e+01 1.899e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 10:28:29,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=646420.0, ans=0.125 +2024-09-19 10:28:29,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=646420.0, ans=0.125 +2024-09-19 10:28:45,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=646460.0, ans=0.0 +2024-09-19 10:28:59,139 INFO [train.py:1198] (1/2) Epoch 36, batch 3250, loss[loss=0.2422, ctc_loss=0.1285, cr_loss=0.3912, attn_decoder_loss=0.2461, over 29697.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1162, cr_loss=0.3572, attn_decoder_loss=0.2409, over 5801179.52 frames. ], batch size: 84, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:29:00,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=646500.0, ans=0.125 +2024-09-19 10:29:27,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=646540.0, ans=0.5 +2024-09-19 10:29:29,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.97 vs. limit=22.5 +2024-09-19 10:29:37,010 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.89 vs. limit=15.0 +2024-09-19 10:29:46,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=646620.0, ans=0.125 +2024-09-19 10:30:00,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.33 vs. limit=6.0 +2024-09-19 10:30:15,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=646700.0, ans=0.0 +2024-09-19 10:30:16,478 INFO [train.py:1198] (1/2) Epoch 36, batch 3300, loss[loss=0.2392, ctc_loss=0.1094, cr_loss=0.3285, attn_decoder_loss=0.2463, over 28393.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1155, cr_loss=0.3556, attn_decoder_loss=0.2399, over 5798327.26 frames. ], batch size: 111, lr: 3.06e-03, grad_scale: 16.0 +2024-09-19 10:30:22,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=646700.0, ans=0.0 +2024-09-19 10:30:25,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=646700.0, ans=0.125 +2024-09-19 10:30:52,747 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.457e+01 8.565e+01 9.043e+01 9.746e+01 1.474e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 10:30:56,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=646780.0, ans=0.07 +2024-09-19 10:30:56,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.48 vs. limit=15.0 +2024-09-19 10:31:14,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=646820.0, ans=0.04949747468305833 +2024-09-19 10:31:15,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=646860.0, ans=0.1 +2024-09-19 10:31:18,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=646860.0, ans=0.2 +2024-09-19 10:31:30,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=646900.0, ans=0.0 +2024-09-19 10:31:31,685 INFO [train.py:1198] (1/2) Epoch 36, batch 3350, loss[loss=0.2395, ctc_loss=0.1079, cr_loss=0.3551, attn_decoder_loss=0.2462, over 28891.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.116, cr_loss=0.3561, attn_decoder_loss=0.2404, over 5775610.03 frames. ], batch size: 104, lr: 3.06e-03, grad_scale: 8.0 +2024-09-19 10:31:36,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=646900.0, ans=0.0 +2024-09-19 10:31:44,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.99 vs. limit=15.0 +2024-09-19 10:31:52,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=646940.0, ans=0.1 +2024-09-19 10:32:20,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=647020.0, ans=0.125 +2024-09-19 10:32:36,516 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.91 vs. limit=15.0 +2024-09-19 10:32:37,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.98 vs. limit=22.5 +2024-09-19 10:32:47,064 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.78 vs. limit=10.0 +2024-09-19 10:32:49,153 INFO [train.py:1198] (1/2) Epoch 36, batch 3400, loss[loss=0.2023, ctc_loss=0.09535, cr_loss=0.3231, attn_decoder_loss=0.207, over 29351.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1161, cr_loss=0.3563, attn_decoder_loss=0.2404, over 5767635.33 frames. ], batch size: 67, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:33:23,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=647180.0, ans=0.0 +2024-09-19 10:33:27,779 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.490e+01 8.557e+01 9.096e+01 9.972e+01 2.860e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 10:33:34,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=647180.0, ans=0.0 +2024-09-19 10:33:39,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=647220.0, ans=0.5 +2024-09-19 10:34:07,340 INFO [train.py:1198] (1/2) Epoch 36, batch 3450, loss[loss=0.2461, ctc_loss=0.1161, cr_loss=0.3584, attn_decoder_loss=0.2526, over 28204.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1162, cr_loss=0.3572, attn_decoder_loss=0.2408, over 5775207.79 frames. ], batch size: 111, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:34:51,711 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.84 vs. limit=22.5 +2024-09-19 10:34:54,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=647420.0, ans=0.0 +2024-09-19 10:35:00,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.54 vs. limit=15.0 +2024-09-19 10:35:06,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=647460.0, ans=0.125 +2024-09-19 10:35:12,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=647460.0, ans=0.125 +2024-09-19 10:35:22,385 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.21 vs. limit=15.0 +2024-09-19 10:35:22,926 INFO [train.py:1198] (1/2) Epoch 36, batch 3500, loss[loss=0.2065, ctc_loss=0.08676, cr_loss=0.28, attn_decoder_loss=0.2135, over 29345.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1157, cr_loss=0.356, attn_decoder_loss=0.2403, over 5778032.53 frames. ], batch size: 71, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:35:56,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=647580.0, ans=0.125 +2024-09-19 10:36:00,831 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.443e+01 8.960e+01 9.445e+01 1.390e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 10:36:07,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=647580.0, ans=0.025 +2024-09-19 10:36:08,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=647620.0, ans=0.125 +2024-09-19 10:36:33,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=647660.0, ans=0.125 +2024-09-19 10:36:36,850 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:36:39,502 INFO [train.py:1198] (1/2) Epoch 36, batch 3550, loss[loss=0.2459, ctc_loss=0.1139, cr_loss=0.3542, attn_decoder_loss=0.2527, over 29685.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1157, cr_loss=0.3561, attn_decoder_loss=0.2405, over 5783970.10 frames. ], batch size: 89, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:36:58,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=647740.0, ans=0.125 +2024-09-19 10:37:03,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=647740.0, ans=0.125 +2024-09-19 10:37:12,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=647780.0, ans=0.1 +2024-09-19 10:37:21,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=647780.0, ans=0.1 +2024-09-19 10:37:43,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=647860.0, ans=0.1 +2024-09-19 10:37:53,727 INFO [train.py:1198] (1/2) Epoch 36, batch 3600, loss[loss=0.2265, ctc_loss=0.1067, cr_loss=0.3512, attn_decoder_loss=0.232, over 29492.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1158, cr_loss=0.3565, attn_decoder_loss=0.2407, over 5792863.55 frames. ], batch size: 77, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:37:55,673 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:38:04,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=647900.0, ans=0.125 +2024-09-19 10:38:10,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=647940.0, ans=0.1 +2024-09-19 10:38:31,860 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.382e+01 8.950e+01 9.458e+01 2.043e+02, threshold=1.790e+02, percent-clipped=2.0 +2024-09-19 10:38:35,632 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:38:51,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=648020.0, ans=0.5 +2024-09-19 10:38:56,751 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.63 vs. limit=15.0 +2024-09-19 10:38:59,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=648060.0, ans=0.0 +2024-09-19 10:39:10,853 INFO [train.py:1198] (1/2) Epoch 36, batch 3650, loss[loss=0.2512, ctc_loss=0.1267, cr_loss=0.3919, attn_decoder_loss=0.2563, over 29492.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1153, cr_loss=0.3558, attn_decoder_loss=0.2399, over 5794728.01 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:39:29,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=648140.0, ans=0.0 +2024-09-19 10:39:58,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=648220.0, ans=0.035 +2024-09-19 10:40:18,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.24 vs. limit=6.0 +2024-09-19 10:40:18,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.00 vs. limit=15.0 +2024-09-19 10:40:22,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=648260.0, ans=0.2 +2024-09-19 10:40:25,596 INFO [train.py:1198] (1/2) Epoch 36, batch 3700, loss[loss=0.2479, ctc_loss=0.118, cr_loss=0.3593, attn_decoder_loss=0.2543, over 29725.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1152, cr_loss=0.3555, attn_decoder_loss=0.2401, over 5803836.08 frames. ], batch size: 84, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:40:26,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.26 vs. limit=15.0 +2024-09-19 10:40:49,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=648340.0, ans=0.1 +2024-09-19 10:40:52,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=648340.0, ans=0.1 +2024-09-19 10:40:55,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=648380.0, ans=0.2 +2024-09-19 10:41:01,469 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.727e+01 8.595e+01 9.070e+01 9.562e+01 1.267e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 10:41:10,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=648420.0, ans=0.125 +2024-09-19 10:41:13,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=648420.0, ans=0.125 +2024-09-19 10:41:19,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=648420.0, ans=0.2 +2024-09-19 10:41:30,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=648460.0, ans=0.0 +2024-09-19 10:41:39,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=648500.0, ans=0.125 +2024-09-19 10:41:40,460 INFO [train.py:1198] (1/2) Epoch 36, batch 3750, loss[loss=0.211, ctc_loss=0.09514, cr_loss=0.3044, attn_decoder_loss=0.2171, over 29349.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.115, cr_loss=0.355, attn_decoder_loss=0.2399, over 5807834.32 frames. ], batch size: 67, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:41:42,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=648500.0, ans=0.0 +2024-09-19 10:41:52,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=648500.0, ans=0.1 +2024-09-19 10:41:57,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=648540.0, ans=0.125 +2024-09-19 10:42:13,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=648580.0, ans=0.125 +2024-09-19 10:42:15,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=648580.0, ans=0.125 +2024-09-19 10:42:35,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=648620.0, ans=0.1 +2024-09-19 10:42:35,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=648620.0, ans=0.125 +2024-09-19 10:42:44,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.47 vs. limit=15.0 +2024-09-19 10:42:56,340 INFO [train.py:1198] (1/2) Epoch 36, batch 3800, loss[loss=0.2325, ctc_loss=0.1018, cr_loss=0.3287, attn_decoder_loss=0.2398, over 29622.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1149, cr_loss=0.3547, attn_decoder_loss=0.2398, over 5798550.24 frames. ], batch size: 86, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:42:59,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=648700.0, ans=0.125 +2024-09-19 10:43:26,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=648780.0, ans=0.0 +2024-09-19 10:43:34,111 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.360e+01 8.859e+01 9.442e+01 1.706e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 10:43:46,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=648820.0, ans=0.1 +2024-09-19 10:43:48,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=648820.0, ans=0.125 +2024-09-19 10:43:58,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=648860.0, ans=0.04949747468305833 +2024-09-19 10:44:00,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.70 vs. limit=15.0 +2024-09-19 10:44:01,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=648860.0, ans=0.1 +2024-09-19 10:44:01,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=648860.0, ans=0.125 +2024-09-19 10:44:06,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=648860.0, ans=0.1 +2024-09-19 10:44:11,224 INFO [train.py:1198] (1/2) Epoch 36, batch 3850, loss[loss=0.2498, ctc_loss=0.1248, cr_loss=0.3775, attn_decoder_loss=0.2553, over 29244.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1149, cr_loss=0.3553, attn_decoder_loss=0.2397, over 5812883.31 frames. ], batch size: 100, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:44:33,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=648940.0, ans=0.025 +2024-09-19 10:44:53,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=648980.0, ans=0.0 +2024-09-19 10:45:06,484 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:45:08,249 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.86 vs. limit=6.0 +2024-09-19 10:45:26,941 INFO [train.py:1198] (1/2) Epoch 36, batch 3900, loss[loss=0.2492, ctc_loss=0.1175, cr_loss=0.3774, attn_decoder_loss=0.2554, over 29625.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1153, cr_loss=0.3563, attn_decoder_loss=0.2404, over 5817748.57 frames. ], batch size: 86, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:45:33,069 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:45:36,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=649100.0, ans=0.5 +2024-09-19 10:45:41,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=649140.0, ans=0.1 +2024-09-19 10:45:46,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=649140.0, ans=0.125 +2024-09-19 10:45:46,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=649140.0, ans=0.125 +2024-09-19 10:46:03,828 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.759e+01 8.587e+01 8.995e+01 9.649e+01 1.195e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 10:46:06,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=649180.0, ans=0.0 +2024-09-19 10:46:21,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=649220.0, ans=0.0 +2024-09-19 10:46:27,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=649260.0, ans=0.1 +2024-09-19 10:46:32,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=649260.0, ans=0.09899494936611666 +2024-09-19 10:46:40,851 INFO [train.py:1198] (1/2) Epoch 36, batch 3950, loss[loss=0.252, ctc_loss=0.1281, cr_loss=0.4011, attn_decoder_loss=0.2568, over 29499.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3568, attn_decoder_loss=0.2405, over 5837045.18 frames. ], batch size: 97, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:46:41,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=649300.0, ans=0.125 +2024-09-19 10:47:03,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=649340.0, ans=0.1 +2024-09-19 10:47:10,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=649380.0, ans=0.125 +2024-09-19 10:47:18,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.33 vs. limit=10.0 +2024-09-19 10:47:21,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.25 vs. limit=15.0 +2024-09-19 10:47:47,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=649460.0, ans=0.125 +2024-09-19 10:47:51,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=649460.0, ans=0.125 +2024-09-19 10:47:56,081 INFO [train.py:1198] (1/2) Epoch 36, batch 4000, loss[loss=0.2166, ctc_loss=0.09488, cr_loss=0.3188, attn_decoder_loss=0.223, over 29521.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1156, cr_loss=0.3573, attn_decoder_loss=0.2405, over 5814855.73 frames. ], batch size: 74, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:47:56,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.23 vs. limit=22.5 +2024-09-19 10:47:57,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=649500.0, ans=0.1 +2024-09-19 10:48:05,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=649500.0, ans=0.0 +2024-09-19 10:48:33,452 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.599e+01 9.136e+01 9.707e+01 2.354e+02, threshold=1.827e+02, percent-clipped=2.0 +2024-09-19 10:48:58,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=10.24 vs. limit=12.0 +2024-09-19 10:49:06,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=649660.0, ans=0.035 +2024-09-19 10:49:10,625 INFO [train.py:1198] (1/2) Epoch 36, batch 4050, loss[loss=0.2587, ctc_loss=0.1473, cr_loss=0.3877, attn_decoder_loss=0.2625, over 20619.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1155, cr_loss=0.3563, attn_decoder_loss=0.2403, over 5798085.84 frames. ], batch size: 210, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:49:20,177 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.68 vs. limit=22.5 +2024-09-19 10:49:31,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-19 10:49:51,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=649780.0, ans=0.125 +2024-09-19 10:49:53,668 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.91 vs. limit=22.5 +2024-09-19 10:50:10,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=649860.0, ans=0.125 +2024-09-19 10:50:11,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.35 vs. limit=15.0 +2024-09-19 10:50:19,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=649860.0, ans=0.0 +2024-09-19 10:50:21,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=649860.0, ans=0.125 +2024-09-19 10:50:25,522 INFO [train.py:1198] (1/2) Epoch 36, batch 4100, loss[loss=0.2464, ctc_loss=0.1315, cr_loss=0.3905, attn_decoder_loss=0.2505, over 29479.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1162, cr_loss=0.3575, attn_decoder_loss=0.2409, over 5793010.71 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:50:26,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.25 vs. limit=22.5 +2024-09-19 10:50:35,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.02 vs. limit=6.0 +2024-09-19 10:50:40,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=649940.0, ans=0.1 +2024-09-19 10:50:40,328 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:50:41,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=649940.0, ans=0.125 +2024-09-19 10:50:51,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=649940.0, ans=0.1 +2024-09-19 10:50:59,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=649980.0, ans=0.025 +2024-09-19 10:51:01,957 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.541e+01 9.319e+01 9.811e+01 6.662e+02, threshold=1.864e+02, percent-clipped=1.0 +2024-09-19 10:51:14,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.18 vs. limit=10.0 +2024-09-19 10:51:21,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=650020.0, ans=0.0 +2024-09-19 10:51:31,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.28 vs. limit=12.0 +2024-09-19 10:51:39,870 INFO [train.py:1198] (1/2) Epoch 36, batch 4150, loss[loss=0.2385, ctc_loss=0.1229, cr_loss=0.3785, attn_decoder_loss=0.2429, over 29504.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1162, cr_loss=0.3576, attn_decoder_loss=0.2407, over 5799042.50 frames. ], batch size: 77, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:51:44,690 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:51:52,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=650100.0, ans=0.125 +2024-09-19 10:51:52,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys.whitening_limit, batch_count=650100.0, ans=6.0 +2024-09-19 10:51:59,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=650140.0, ans=0.125 +2024-09-19 10:51:59,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=650140.0, ans=0.2 +2024-09-19 10:52:00,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.99 vs. limit=22.5 +2024-09-19 10:52:14,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=650180.0, ans=0.0 +2024-09-19 10:52:23,950 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.12 vs. limit=22.5 +2024-09-19 10:52:27,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer_na.min_abs, batch_count=650220.0, ans=0.02 +2024-09-19 10:52:51,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=650260.0, ans=0.025 +2024-09-19 10:52:53,759 INFO [train.py:1198] (1/2) Epoch 36, batch 4200, loss[loss=0.2622, ctc_loss=0.1413, cr_loss=0.4081, attn_decoder_loss=0.2665, over 29491.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1162, cr_loss=0.3578, attn_decoder_loss=0.2407, over 5800174.91 frames. ], batch size: 90, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:53:01,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=650300.0, ans=0.125 +2024-09-19 10:53:01,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=650300.0, ans=0.125 +2024-09-19 10:53:01,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=650300.0, ans=0.1 +2024-09-19 10:53:22,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=650380.0, ans=0.125 +2024-09-19 10:53:31,781 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.662e+01 9.257e+01 9.687e+01 2.927e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-19 10:54:01,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=650460.0, ans=0.0 +2024-09-19 10:54:04,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=650460.0, ans=0.1 +2024-09-19 10:54:04,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=650460.0, ans=0.1 +2024-09-19 10:54:05,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=650460.0, ans=0.025 +2024-09-19 10:54:08,386 INFO [train.py:1198] (1/2) Epoch 36, batch 4250, loss[loss=0.2206, ctc_loss=0.1022, cr_loss=0.3299, attn_decoder_loss=0.2264, over 29509.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1158, cr_loss=0.3565, attn_decoder_loss=0.2407, over 5805853.95 frames. ], batch size: 74, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:54:26,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=650540.0, ans=0.0 +2024-09-19 10:54:36,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=650580.0, ans=0.125 +2024-09-19 10:54:52,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=650620.0, ans=0.2 +2024-09-19 10:55:09,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=650660.0, ans=0.0 +2024-09-19 10:55:22,678 INFO [train.py:1198] (1/2) Epoch 36, batch 4300, loss[loss=0.2545, ctc_loss=0.1305, cr_loss=0.3909, attn_decoder_loss=0.2596, over 29539.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1157, cr_loss=0.3561, attn_decoder_loss=0.241, over 5795129.25 frames. ], batch size: 87, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:55:43,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=650740.0, ans=0.125 +2024-09-19 10:55:58,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=650780.0, ans=0.125 +2024-09-19 10:55:58,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=650780.0, ans=0.125 +2024-09-19 10:56:01,046 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.651e+01 9.063e+01 9.682e+01 5.777e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 10:56:01,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=650780.0, ans=0.125 +2024-09-19 10:56:01,846 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.16 vs. limit=15.0 +2024-09-19 10:56:05,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=650820.0, ans=0.125 +2024-09-19 10:56:19,853 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.62 vs. limit=15.0 +2024-09-19 10:56:36,544 INFO [train.py:1198] (1/2) Epoch 36, batch 4350, loss[loss=0.2443, ctc_loss=0.1177, cr_loss=0.3608, attn_decoder_loss=0.2504, over 29459.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1179, cr_loss=0.361, attn_decoder_loss=0.2443, over 5797412.22 frames. ], batch size: 97, lr: 3.05e-03, grad_scale: 8.0 +2024-09-19 10:56:39,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.58 vs. limit=22.5 +2024-09-19 10:56:50,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=650940.0, ans=0.1 +2024-09-19 10:56:55,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=650940.0, ans=0.125 +2024-09-19 10:57:01,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=650940.0, ans=0.125 +2024-09-19 10:57:02,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=650940.0, ans=0.125 +2024-09-19 10:57:30,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.38 vs. limit=15.0 +2024-09-19 10:57:51,172 INFO [train.py:1198] (1/2) Epoch 36, batch 4400, loss[loss=0.2487, ctc_loss=0.132, cr_loss=0.3979, attn_decoder_loss=0.2528, over 27353.00 frames. ], tot_loss[loss=0.2409, ctc_loss=0.1192, cr_loss=0.3636, attn_decoder_loss=0.2463, over 5768090.34 frames. ], batch size: 124, lr: 3.05e-03, grad_scale: 16.0 +2024-09-19 10:57:58,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=651100.0, ans=0.125 +2024-09-19 10:58:02,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=651100.0, ans=15.0 +2024-09-19 10:58:03,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=651100.0, ans=0.025 +2024-09-19 10:58:16,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=651140.0, ans=0.125 +2024-09-19 10:58:29,467 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.803e+01 8.945e+01 9.277e+01 9.704e+01 3.205e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 10:58:42,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=651220.0, ans=0.0 +2024-09-19 10:58:43,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=651220.0, ans=10.0 +2024-09-19 10:58:52,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=651260.0, ans=0.125 +2024-09-19 10:58:56,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=651260.0, ans=0.125 +2024-09-19 10:59:05,957 INFO [train.py:1198] (1/2) Epoch 36, batch 4450, loss[loss=0.2598, ctc_loss=0.1505, cr_loss=0.3966, attn_decoder_loss=0.2631, over 21405.00 frames. ], tot_loss[loss=0.243, ctc_loss=0.1227, cr_loss=0.3686, attn_decoder_loss=0.2482, over 5576663.01 frames. ], batch size: 210, lr: 3.04e-03, grad_scale: 8.0 +2024-09-19 10:59:16,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=651300.0, ans=0.0 +2024-09-19 10:59:26,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.04 vs. limit=15.0 +2024-09-19 10:59:29,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=651340.0, ans=0.125 +2024-09-19 10:59:44,488 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 10:59:44,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=651380.0, ans=0.0 +2024-09-19 10:59:46,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=10.37 vs. limit=15.0 +2024-09-19 10:59:51,313 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=7.59 vs. limit=15.0 +2024-09-19 11:00:20,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=651500.0, ans=0.125 +2024-09-19 11:00:21,397 INFO [train.py:1198] (1/2) Epoch 36, batch 4500, loss[loss=0.2517, ctc_loss=0.1366, cr_loss=0.3642, attn_decoder_loss=0.2564, over 20222.00 frames. ], tot_loss[loss=0.2449, ctc_loss=0.1258, cr_loss=0.3713, attn_decoder_loss=0.2499, over 5240033.97 frames. ], batch size: 210, lr: 3.04e-03, grad_scale: 8.0 +2024-09-19 11:00:21,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=651500.0, ans=0.125 +2024-09-19 11:00:25,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=651500.0, ans=0.025 +2024-09-19 11:00:28,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.76 vs. limit=15.0 +2024-09-19 11:00:29,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=651500.0, ans=0.0 +2024-09-19 11:00:53,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=651580.0, ans=0.125 +2024-09-19 11:00:55,885 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.64 vs. limit=15.0 +2024-09-19 11:01:50,762 INFO [train.py:1198] (1/2) Epoch 37, batch 0, loss[loss=0.2147, ctc_loss=0.09723, cr_loss=0.32, attn_decoder_loss=0.2206, over 29586.00 frames. ], tot_loss[loss=0.2147, ctc_loss=0.09723, cr_loss=0.32, attn_decoder_loss=0.2206, over 29586.00 frames. ], batch size: 73, lr: 3.00e-03, grad_scale: 16.0 +2024-09-19 11:01:50,762 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 11:01:53,999 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([4.4250, 4.0850, 3.9626, 3.8090], device='cuda:1') +2024-09-19 11:02:09,656 INFO [train.py:1230] (1/2) Epoch 37, validation: loss=0.2132, ctc_loss=0.03619, cr_loss=6.181e-15, attn_decoder_loss=0.2329, over 944034.00 frames. +2024-09-19 11:02:09,657 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 11:02:11,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=651600.0, ans=0.2 +2024-09-19 11:02:12,625 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.774e+01 1.049e+02 1.138e+02 1.230e+02 2.136e+02, threshold=2.276e+02, percent-clipped=1.0 +2024-09-19 11:02:12,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=651600.0, ans=0.1 +2024-09-19 11:02:22,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=651600.0, ans=0.0 +2024-09-19 11:02:45,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=651680.0, ans=0.0 +2024-09-19 11:02:51,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=651680.0, ans=0.125 +2024-09-19 11:02:54,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=651720.0, ans=0.07 +2024-09-19 11:02:59,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.73 vs. limit=15.0 +2024-09-19 11:03:05,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=651720.0, ans=0.2 +2024-09-19 11:03:08,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=651720.0, ans=0.2 +2024-09-19 11:03:08,579 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.06 vs. limit=15.0 +2024-09-19 11:03:09,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=651760.0, ans=0.05 +2024-09-19 11:03:15,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=651760.0, ans=0.125 +2024-09-19 11:03:23,556 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:03:26,221 INFO [train.py:1198] (1/2) Epoch 37, batch 50, loss[loss=0.2103, ctc_loss=0.09044, cr_loss=0.2881, attn_decoder_loss=0.2172, over 29451.00 frames. ], tot_loss[loss=0.2367, ctc_loss=0.1179, cr_loss=0.3624, attn_decoder_loss=0.2418, over 1268510.03 frames. ], batch size: 70, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:03:26,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=651800.0, ans=0.0 +2024-09-19 11:03:35,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=651800.0, ans=0.0 +2024-09-19 11:03:41,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=651840.0, ans=0.1 +2024-09-19 11:03:44,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=651840.0, ans=0.0 +2024-09-19 11:03:57,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=651880.0, ans=0.0 +2024-09-19 11:04:01,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=651880.0, ans=0.125 +2024-09-19 11:04:04,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=651880.0, ans=0.0 +2024-09-19 11:04:33,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=651960.0, ans=0.025 +2024-09-19 11:04:42,595 INFO [train.py:1198] (1/2) Epoch 37, batch 100, loss[loss=0.2264, ctc_loss=0.1116, cr_loss=0.336, attn_decoder_loss=0.2317, over 29547.00 frames. ], tot_loss[loss=0.2385, ctc_loss=0.1189, cr_loss=0.3654, attn_decoder_loss=0.2437, over 2252117.45 frames. ], batch size: 76, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:04:46,988 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.810e+01 8.722e+01 9.272e+01 9.995e+01 2.422e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-19 11:05:11,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=652080.0, ans=0.2 +2024-09-19 11:05:13,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.96 vs. limit=22.5 +2024-09-19 11:05:19,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.44 vs. limit=15.0 +2024-09-19 11:05:23,144 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:05:40,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=652120.0, ans=0.125 +2024-09-19 11:05:52,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.38 vs. limit=10.0 +2024-09-19 11:05:55,627 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=15.0 +2024-09-19 11:05:59,143 INFO [train.py:1198] (1/2) Epoch 37, batch 150, loss[loss=0.2069, ctc_loss=0.09072, cr_loss=0.2981, attn_decoder_loss=0.2132, over 29420.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.116, cr_loss=0.3584, attn_decoder_loss=0.2411, over 3046972.17 frames. ], batch size: 70, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:06:07,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=652200.0, ans=0.025 +2024-09-19 11:06:22,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=652240.0, ans=0.125 +2024-09-19 11:06:30,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=652280.0, ans=0.125 +2024-09-19 11:06:52,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=652320.0, ans=0.0 +2024-09-19 11:06:58,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=652320.0, ans=0.2 +2024-09-19 11:07:16,348 INFO [train.py:1198] (1/2) Epoch 37, batch 200, loss[loss=0.2406, ctc_loss=0.1252, cr_loss=0.3923, attn_decoder_loss=0.2447, over 27339.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.115, cr_loss=0.3563, attn_decoder_loss=0.2397, over 3659232.06 frames. ], batch size: 124, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:07:17,231 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.16 vs. limit=22.5 +2024-09-19 11:07:20,813 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.578e+01 8.412e+01 8.881e+01 9.450e+01 8.334e+02, threshold=1.776e+02, percent-clipped=1.0 +2024-09-19 11:07:33,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=652440.0, ans=0.125 +2024-09-19 11:07:36,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=652440.0, ans=0.0 +2024-09-19 11:07:53,014 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:07:55,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=11.93 vs. limit=15.0 +2024-09-19 11:07:57,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=652480.0, ans=0.125 +2024-09-19 11:08:14,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=652520.0, ans=15.0 +2024-09-19 11:08:15,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=652560.0, ans=0.0 +2024-09-19 11:08:28,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.13 vs. limit=22.5 +2024-09-19 11:08:31,987 INFO [train.py:1198] (1/2) Epoch 37, batch 250, loss[loss=0.2446, ctc_loss=0.1214, cr_loss=0.3616, attn_decoder_loss=0.2502, over 29293.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1143, cr_loss=0.3545, attn_decoder_loss=0.2395, over 4142080.69 frames. ], batch size: 100, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:08:42,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=652600.0, ans=0.125 +2024-09-19 11:08:49,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=652640.0, ans=0.125 +2024-09-19 11:08:56,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=652640.0, ans=0.125 +2024-09-19 11:09:05,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=652680.0, ans=0.125 +2024-09-19 11:09:20,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=652720.0, ans=0.125 +2024-09-19 11:09:43,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=652760.0, ans=0.125 +2024-09-19 11:09:43,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=652760.0, ans=0.125 +2024-09-19 11:09:50,022 INFO [train.py:1198] (1/2) Epoch 37, batch 300, loss[loss=0.2472, ctc_loss=0.123, cr_loss=0.3782, attn_decoder_loss=0.2526, over 29551.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1142, cr_loss=0.3541, attn_decoder_loss=0.2395, over 4510761.53 frames. ], batch size: 92, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:09:53,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=652800.0, ans=0.125 +2024-09-19 11:09:54,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.181e+01 8.309e+01 8.922e+01 9.556e+01 2.479e+02, threshold=1.784e+02, percent-clipped=1.0 +2024-09-19 11:10:00,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.64 vs. limit=15.0 +2024-09-19 11:10:05,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.89 vs. limit=15.0 +2024-09-19 11:10:18,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=652840.0, ans=0.0 +2024-09-19 11:10:35,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=652880.0, ans=0.125 +2024-09-19 11:10:47,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=652920.0, ans=0.0 +2024-09-19 11:10:53,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=652960.0, ans=0.09899494936611666 +2024-09-19 11:10:53,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=652960.0, ans=0.0 +2024-09-19 11:11:08,045 INFO [train.py:1198] (1/2) Epoch 37, batch 350, loss[loss=0.2068, ctc_loss=0.09164, cr_loss=0.3086, attn_decoder_loss=0.2127, over 29306.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1146, cr_loss=0.3549, attn_decoder_loss=0.2399, over 4795556.50 frames. ], batch size: 71, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:11:31,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=653040.0, ans=0.0 +2024-09-19 11:11:39,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=653080.0, ans=0.0 +2024-09-19 11:11:47,515 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:12:10,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=653160.0, ans=0.125 +2024-09-19 11:12:10,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=653160.0, ans=0.125 +2024-09-19 11:12:23,524 INFO [train.py:1198] (1/2) Epoch 37, batch 400, loss[loss=0.2401, ctc_loss=0.1243, cr_loss=0.3766, attn_decoder_loss=0.2446, over 29705.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1145, cr_loss=0.3545, attn_decoder_loss=0.2398, over 5025650.99 frames. ], batch size: 82, lr: 3.00e-03, grad_scale: 16.0 +2024-09-19 11:12:28,156 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.730e+01 8.454e+01 8.886e+01 9.286e+01 1.359e+02, threshold=1.777e+02, percent-clipped=0.0 +2024-09-19 11:12:32,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=653200.0, ans=0.125 +2024-09-19 11:12:54,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=653280.0, ans=0.125 +2024-09-19 11:13:13,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=653320.0, ans=0.0 +2024-09-19 11:13:41,669 INFO [train.py:1198] (1/2) Epoch 37, batch 450, loss[loss=0.2492, ctc_loss=0.1278, cr_loss=0.3793, attn_decoder_loss=0.2543, over 29699.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1148, cr_loss=0.3554, attn_decoder_loss=0.2401, over 5188091.46 frames. ], batch size: 83, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:13:59,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.04 vs. limit=22.5 +2024-09-19 11:14:03,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=653440.0, ans=0.1 +2024-09-19 11:14:20,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=653480.0, ans=0.0 +2024-09-19 11:14:21,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=653480.0, ans=0.1 +2024-09-19 11:14:24,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=653480.0, ans=0.0 +2024-09-19 11:14:37,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.81 vs. limit=15.0 +2024-09-19 11:14:48,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=653560.0, ans=0.1 +2024-09-19 11:15:00,249 INFO [train.py:1198] (1/2) Epoch 37, batch 500, loss[loss=0.259, ctc_loss=0.1389, cr_loss=0.4116, attn_decoder_loss=0.2633, over 29416.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1147, cr_loss=0.3548, attn_decoder_loss=0.2396, over 5330947.36 frames. ], batch size: 94, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:15:03,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=653600.0, ans=0.125 +2024-09-19 11:15:06,232 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.426e+01 9.049e+01 9.525e+01 1.733e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 11:15:11,602 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.72 vs. limit=15.0 +2024-09-19 11:15:17,579 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.63 vs. limit=22.5 +2024-09-19 11:15:23,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=653640.0, ans=0.125 +2024-09-19 11:15:32,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.77 vs. limit=22.5 +2024-09-19 11:15:42,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=653680.0, ans=0.025 +2024-09-19 11:16:10,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=653760.0, ans=0.1 +2024-09-19 11:16:13,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=653760.0, ans=0.125 +2024-09-19 11:16:15,824 INFO [train.py:1198] (1/2) Epoch 37, batch 550, loss[loss=0.2441, ctc_loss=0.1164, cr_loss=0.348, attn_decoder_loss=0.2505, over 28888.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1146, cr_loss=0.3541, attn_decoder_loss=0.2396, over 5423903.46 frames. ], batch size: 104, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:16:16,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=653800.0, ans=0.04949747468305833 +2024-09-19 11:16:44,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=653880.0, ans=0.0 +2024-09-19 11:17:06,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=653920.0, ans=0.125 +2024-09-19 11:17:11,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=653920.0, ans=0.1 +2024-09-19 11:17:13,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=653920.0, ans=0.125 +2024-09-19 11:17:24,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=653960.0, ans=0.0 +2024-09-19 11:17:31,925 INFO [train.py:1198] (1/2) Epoch 37, batch 600, loss[loss=0.2545, ctc_loss=0.1316, cr_loss=0.381, attn_decoder_loss=0.2597, over 29213.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1151, cr_loss=0.3554, attn_decoder_loss=0.2402, over 5510468.09 frames. ], batch size: 100, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:17:40,202 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.510e+01 8.494e+01 8.998e+01 9.681e+01 2.744e+02, threshold=1.800e+02, percent-clipped=3.0 +2024-09-19 11:17:54,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=654040.0, ans=0.0 +2024-09-19 11:18:05,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=654080.0, ans=0.04949747468305833 +2024-09-19 11:18:23,649 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.43 vs. limit=12.0 +2024-09-19 11:18:51,778 INFO [train.py:1198] (1/2) Epoch 37, batch 650, loss[loss=0.2442, ctc_loss=0.1214, cr_loss=0.3661, attn_decoder_loss=0.2497, over 29778.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1147, cr_loss=0.3547, attn_decoder_loss=0.2395, over 5586798.66 frames. ], batch size: 81, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:19:04,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.53 vs. limit=15.0 +2024-09-19 11:19:24,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=654280.0, ans=0.025 +2024-09-19 11:19:27,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=654280.0, ans=0.1 +2024-09-19 11:19:35,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.31 vs. limit=10.0 +2024-09-19 11:19:45,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=654320.0, ans=0.125 +2024-09-19 11:20:07,723 INFO [train.py:1198] (1/2) Epoch 37, batch 700, loss[loss=0.2193, ctc_loss=0.1037, cr_loss=0.3372, attn_decoder_loss=0.2246, over 29519.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.115, cr_loss=0.3559, attn_decoder_loss=0.2399, over 5637503.07 frames. ], batch size: 76, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:20:13,641 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.549e+01 8.958e+01 9.415e+01 1.725e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 11:20:24,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=654440.0, ans=0.125 +2024-09-19 11:20:32,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=654440.0, ans=0.025 +2024-09-19 11:20:34,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=15.0 +2024-09-19 11:20:35,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=654440.0, ans=0.125 +2024-09-19 11:20:54,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=654520.0, ans=0.2 +2024-09-19 11:21:00,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=654520.0, ans=10.0 +2024-09-19 11:21:05,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=654520.0, ans=0.2 +2024-09-19 11:21:05,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.93 vs. limit=12.0 +2024-09-19 11:21:23,307 INFO [train.py:1198] (1/2) Epoch 37, batch 750, loss[loss=0.2488, ctc_loss=0.1275, cr_loss=0.3835, attn_decoder_loss=0.2538, over 29689.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1149, cr_loss=0.3556, attn_decoder_loss=0.2398, over 5674523.66 frames. ], batch size: 82, lr: 3.00e-03, grad_scale: 8.0 +2024-09-19 11:21:34,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=654600.0, ans=0.125 +2024-09-19 11:22:30,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=654760.0, ans=0.1 +2024-09-19 11:22:43,728 INFO [train.py:1198] (1/2) Epoch 37, batch 800, loss[loss=0.2151, ctc_loss=0.09286, cr_loss=0.2996, attn_decoder_loss=0.222, over 29586.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1148, cr_loss=0.3551, attn_decoder_loss=0.2397, over 5705939.18 frames. ], batch size: 73, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:22:45,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=654800.0, ans=0.125 +2024-09-19 11:22:49,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.191e+01 8.523e+01 9.017e+01 9.581e+01 2.303e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 11:23:29,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.43 vs. limit=15.0 +2024-09-19 11:23:41,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=654920.0, ans=0.0 +2024-09-19 11:23:58,746 INFO [train.py:1198] (1/2) Epoch 37, batch 850, loss[loss=0.2558, ctc_loss=0.1335, cr_loss=0.3915, attn_decoder_loss=0.2607, over 29711.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3543, attn_decoder_loss=0.2395, over 5736217.10 frames. ], batch size: 89, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:24:31,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.94 vs. limit=15.0 +2024-09-19 11:24:39,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.80 vs. limit=15.0 +2024-09-19 11:24:44,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=15.0 +2024-09-19 11:24:53,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=655120.0, ans=0.125 +2024-09-19 11:25:04,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.93 vs. limit=10.0 +2024-09-19 11:25:15,268 INFO [train.py:1198] (1/2) Epoch 37, batch 900, loss[loss=0.2102, ctc_loss=0.09577, cr_loss=0.2987, attn_decoder_loss=0.2162, over 29576.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1148, cr_loss=0.3551, attn_decoder_loss=0.2398, over 5740839.17 frames. ], batch size: 73, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:25:19,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=655200.0, ans=0.125 +2024-09-19 11:25:22,644 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.623e+01 9.305e+01 9.762e+01 2.031e+02, threshold=1.861e+02, percent-clipped=1.0 +2024-09-19 11:25:23,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.11 vs. limit=15.0 +2024-09-19 11:25:27,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=655200.0, ans=0.025 +2024-09-19 11:25:32,806 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.03 vs. limit=15.0 +2024-09-19 11:25:41,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=655240.0, ans=0.0 +2024-09-19 11:25:42,579 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:25:43,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.34 vs. limit=15.0 +2024-09-19 11:26:17,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=655320.0, ans=0.0 +2024-09-19 11:26:18,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=655360.0, ans=0.025 +2024-09-19 11:26:23,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=655360.0, ans=0.125 +2024-09-19 11:26:24,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff3.min_abs, batch_count=655360.0, ans=0.2 +2024-09-19 11:26:26,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.71 vs. limit=22.5 +2024-09-19 11:26:30,736 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:26:34,857 INFO [train.py:1198] (1/2) Epoch 37, batch 950, loss[loss=0.22, ctc_loss=0.1066, cr_loss=0.3192, attn_decoder_loss=0.2255, over 29529.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1148, cr_loss=0.3553, attn_decoder_loss=0.2399, over 5743013.62 frames. ], batch size: 74, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:27:00,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=655440.0, ans=0.0 +2024-09-19 11:27:11,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=655480.0, ans=0.125 +2024-09-19 11:27:19,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.56 vs. limit=22.5 +2024-09-19 11:27:20,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=655520.0, ans=0.1 +2024-09-19 11:27:25,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.79 vs. limit=12.0 +2024-09-19 11:27:41,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=655560.0, ans=0.125 +2024-09-19 11:27:50,314 INFO [train.py:1198] (1/2) Epoch 37, batch 1000, loss[loss=0.2326, ctc_loss=0.1158, cr_loss=0.3546, attn_decoder_loss=0.2376, over 29489.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1156, cr_loss=0.3571, attn_decoder_loss=0.2408, over 5735816.89 frames. ], batch size: 77, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:27:57,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.598e+01 8.810e+01 9.265e+01 9.999e+01 4.241e+02, threshold=1.853e+02, percent-clipped=2.0 +2024-09-19 11:27:59,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=655600.0, ans=0.1 +2024-09-19 11:28:00,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=655600.0, ans=0.125 +2024-09-19 11:28:04,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=655640.0, ans=0.05 +2024-09-19 11:28:13,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=655640.0, ans=0.125 +2024-09-19 11:28:16,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=655640.0, ans=0.0 +2024-09-19 11:28:18,174 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.74 vs. limit=22.5 +2024-09-19 11:28:43,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=655720.0, ans=0.125 +2024-09-19 11:28:58,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=655760.0, ans=0.0 +2024-09-19 11:29:06,001 INFO [train.py:1198] (1/2) Epoch 37, batch 1050, loss[loss=0.2403, ctc_loss=0.1149, cr_loss=0.357, attn_decoder_loss=0.2463, over 29686.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1151, cr_loss=0.3562, attn_decoder_loss=0.2401, over 5743772.94 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:29:18,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=655800.0, ans=0.0 +2024-09-19 11:29:39,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=655880.0, ans=0.125 +2024-09-19 11:29:52,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=655880.0, ans=0.025 +2024-09-19 11:29:56,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=655920.0, ans=0.0 +2024-09-19 11:29:58,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=655920.0, ans=0.125 +2024-09-19 11:30:04,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=655920.0, ans=0.2 +2024-09-19 11:30:16,871 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.47 vs. limit=15.0 +2024-09-19 11:30:33,919 INFO [train.py:1198] (1/2) Epoch 37, batch 1100, loss[loss=0.2346, ctc_loss=0.1164, cr_loss=0.383, attn_decoder_loss=0.2392, over 29452.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.115, cr_loss=0.356, attn_decoder_loss=0.2398, over 5757055.82 frames. ], batch size: 78, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:30:41,295 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.579e+01 8.502e+01 8.949e+01 9.455e+01 1.229e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 11:30:43,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=656000.0, ans=0.125 +2024-09-19 11:31:07,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=656080.0, ans=0.0 +2024-09-19 11:31:12,822 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.17 vs. limit=22.5 +2024-09-19 11:31:25,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=656120.0, ans=0.0 +2024-09-19 11:31:39,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.39 vs. limit=15.0 +2024-09-19 11:31:41,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=656160.0, ans=0.125 +2024-09-19 11:31:48,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=656200.0, ans=0.0 +2024-09-19 11:31:49,466 INFO [train.py:1198] (1/2) Epoch 37, batch 1150, loss[loss=0.2329, ctc_loss=0.1143, cr_loss=0.3545, attn_decoder_loss=0.2382, over 29431.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1151, cr_loss=0.3561, attn_decoder_loss=0.2397, over 5754441.94 frames. ], batch size: 78, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:31:56,484 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.56 vs. limit=22.5 +2024-09-19 11:32:07,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=656240.0, ans=0.025 +2024-09-19 11:32:09,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.34 vs. limit=10.0 +2024-09-19 11:32:20,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=656280.0, ans=0.125 +2024-09-19 11:32:26,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=656280.0, ans=0.125 +2024-09-19 11:32:27,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=656280.0, ans=0.125 +2024-09-19 11:32:32,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.73 vs. limit=5.0 +2024-09-19 11:32:43,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=656320.0, ans=0.125 +2024-09-19 11:33:05,436 INFO [train.py:1198] (1/2) Epoch 37, batch 1200, loss[loss=0.2386, ctc_loss=0.1065, cr_loss=0.3299, attn_decoder_loss=0.2459, over 29674.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1157, cr_loss=0.3571, attn_decoder_loss=0.2406, over 5747126.19 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:33:12,938 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.373e+01 8.756e+01 9.143e+01 9.785e+01 1.884e+02, threshold=1.829e+02, percent-clipped=2.0 +2024-09-19 11:33:31,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=656440.0, ans=0.125 +2024-09-19 11:33:34,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=656440.0, ans=0.2 +2024-09-19 11:33:38,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=656480.0, ans=0.0 +2024-09-19 11:33:43,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=656480.0, ans=0.125 +2024-09-19 11:33:57,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=656520.0, ans=0.0 +2024-09-19 11:34:25,537 INFO [train.py:1198] (1/2) Epoch 37, batch 1250, loss[loss=0.2532, ctc_loss=0.132, cr_loss=0.3922, attn_decoder_loss=0.258, over 29525.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.1163, cr_loss=0.3588, attn_decoder_loss=0.2415, over 5775379.36 frames. ], batch size: 92, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:34:27,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=656600.0, ans=0.025 +2024-09-19 11:35:22,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=656720.0, ans=0.1 +2024-09-19 11:35:23,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.55 vs. limit=15.0 +2024-09-19 11:35:41,474 INFO [train.py:1198] (1/2) Epoch 37, batch 1300, loss[loss=0.2477, ctc_loss=0.1176, cr_loss=0.3461, attn_decoder_loss=0.2544, over 28267.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1159, cr_loss=0.358, attn_decoder_loss=0.2409, over 5780586.37 frames. ], batch size: 111, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:35:42,503 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.04 vs. limit=15.0 +2024-09-19 11:35:49,094 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.478e+01 8.951e+01 9.333e+01 1.111e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 11:35:58,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=656840.0, ans=0.0 +2024-09-19 11:36:03,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=656840.0, ans=0.0 +2024-09-19 11:36:16,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=656880.0, ans=0.125 +2024-09-19 11:36:36,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=656920.0, ans=0.125 +2024-09-19 11:36:56,962 INFO [train.py:1198] (1/2) Epoch 37, batch 1350, loss[loss=0.2329, ctc_loss=0.1097, cr_loss=0.3341, attn_decoder_loss=0.2391, over 29772.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1155, cr_loss=0.357, attn_decoder_loss=0.2407, over 5797605.97 frames. ], batch size: 81, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:37:10,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=657040.0, ans=10.0 +2024-09-19 11:37:15,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.96 vs. limit=22.5 +2024-09-19 11:37:16,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=657040.0, ans=0.0 +2024-09-19 11:37:17,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=657040.0, ans=0.125 +2024-09-19 11:37:19,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=657040.0, ans=0.1 +2024-09-19 11:37:32,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=657080.0, ans=0.125 +2024-09-19 11:37:37,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=657080.0, ans=0.0 +2024-09-19 11:37:47,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=657120.0, ans=0.125 +2024-09-19 11:38:12,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=657160.0, ans=0.125 +2024-09-19 11:38:16,416 INFO [train.py:1198] (1/2) Epoch 37, batch 1400, loss[loss=0.213, ctc_loss=0.09876, cr_loss=0.3317, attn_decoder_loss=0.2183, over 29570.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1152, cr_loss=0.3561, attn_decoder_loss=0.2401, over 5808426.20 frames. ], batch size: 69, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:38:25,471 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.397e+01 9.027e+01 9.734e+01 1.349e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 11:38:33,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.14 vs. limit=6.0 +2024-09-19 11:38:38,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=657240.0, ans=0.025 +2024-09-19 11:38:54,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.47 vs. limit=15.0 +2024-09-19 11:39:32,000 INFO [train.py:1198] (1/2) Epoch 37, batch 1450, loss[loss=0.2569, ctc_loss=0.1314, cr_loss=0.391, attn_decoder_loss=0.2621, over 29436.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1149, cr_loss=0.3558, attn_decoder_loss=0.2404, over 5806162.67 frames. ], batch size: 94, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:39:45,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=10.31 vs. limit=15.0 +2024-09-19 11:39:45,939 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:40:04,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.57 vs. limit=6.0 +2024-09-19 11:40:17,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=657520.0, ans=0.09899494936611666 +2024-09-19 11:40:25,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=657520.0, ans=0.125 +2024-09-19 11:40:37,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=657560.0, ans=0.125 +2024-09-19 11:40:48,387 INFO [train.py:1198] (1/2) Epoch 37, batch 1500, loss[loss=0.2435, ctc_loss=0.1158, cr_loss=0.3676, attn_decoder_loss=0.2495, over 29615.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.115, cr_loss=0.3559, attn_decoder_loss=0.2406, over 5807290.01 frames. ], batch size: 86, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:40:57,335 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.474e+01 8.863e+01 9.485e+01 5.565e+02, threshold=1.773e+02, percent-clipped=3.0 +2024-09-19 11:40:57,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=657600.0, ans=0.025 +2024-09-19 11:40:57,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=657600.0, ans=0.0 +2024-09-19 11:41:31,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.58 vs. limit=10.0 +2024-09-19 11:41:33,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=657680.0, ans=0.0 +2024-09-19 11:41:46,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.20 vs. limit=12.0 +2024-09-19 11:42:07,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.40 vs. limit=15.0 +2024-09-19 11:42:07,915 INFO [train.py:1198] (1/2) Epoch 37, batch 1550, loss[loss=0.2517, ctc_loss=0.134, cr_loss=0.4114, attn_decoder_loss=0.2556, over 29519.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1156, cr_loss=0.357, attn_decoder_loss=0.2406, over 5783013.75 frames. ], batch size: 90, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:42:32,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=657840.0, ans=0.025 +2024-09-19 11:42:39,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=657880.0, ans=0.2 +2024-09-19 11:42:56,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=657920.0, ans=0.125 +2024-09-19 11:43:23,367 INFO [train.py:1198] (1/2) Epoch 37, batch 1600, loss[loss=0.2358, ctc_loss=0.1138, cr_loss=0.3643, attn_decoder_loss=0.2413, over 29668.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1156, cr_loss=0.3571, attn_decoder_loss=0.2403, over 5765563.60 frames. ], batch size: 85, lr: 2.99e-03, grad_scale: 16.0 +2024-09-19 11:43:26,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=658000.0, ans=0.125 +2024-09-19 11:43:29,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=658000.0, ans=0.0 +2024-09-19 11:43:32,327 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.772e+01 8.728e+01 9.146e+01 9.748e+01 2.180e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 11:43:55,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=658080.0, ans=0.0 +2024-09-19 11:44:02,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.12 vs. limit=10.0 +2024-09-19 11:44:33,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=658160.0, ans=0.125 +2024-09-19 11:44:33,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=658160.0, ans=0.0 +2024-09-19 11:44:38,984 INFO [train.py:1198] (1/2) Epoch 37, batch 1650, loss[loss=0.2475, ctc_loss=0.1235, cr_loss=0.3745, attn_decoder_loss=0.2529, over 29704.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1154, cr_loss=0.3567, attn_decoder_loss=0.2402, over 5760472.02 frames. ], batch size: 89, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:44:49,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=658200.0, ans=0.125 +2024-09-19 11:44:58,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=658240.0, ans=0.0 +2024-09-19 11:44:59,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=658240.0, ans=0.2 +2024-09-19 11:45:44,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=658360.0, ans=0.125 +2024-09-19 11:45:47,124 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:45:59,547 INFO [train.py:1198] (1/2) Epoch 37, batch 1700, loss[loss=0.2116, ctc_loss=0.09662, cr_loss=0.305, attn_decoder_loss=0.2176, over 29537.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1153, cr_loss=0.3563, attn_decoder_loss=0.24, over 5782722.06 frames. ], batch size: 69, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:46:03,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.31 vs. limit=15.0 +2024-09-19 11:46:10,138 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.251e+01 8.607e+01 9.068e+01 9.479e+01 1.872e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 11:46:14,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=658440.0, ans=0.0 +2024-09-19 11:46:28,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=658480.0, ans=0.125 +2024-09-19 11:46:34,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=658480.0, ans=0.125 +2024-09-19 11:46:42,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.07 vs. limit=22.5 +2024-09-19 11:46:49,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=658520.0, ans=0.0 +2024-09-19 11:46:53,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.16 vs. limit=15.0 +2024-09-19 11:47:13,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=658600.0, ans=0.125 +2024-09-19 11:47:15,006 INFO [train.py:1198] (1/2) Epoch 37, batch 1750, loss[loss=0.2182, ctc_loss=0.1096, cr_loss=0.3564, attn_decoder_loss=0.2224, over 29362.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.115, cr_loss=0.3555, attn_decoder_loss=0.2394, over 5790041.67 frames. ], batch size: 67, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:47:20,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.47 vs. limit=15.0 +2024-09-19 11:47:51,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=658680.0, ans=0.0 +2024-09-19 11:48:05,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=658720.0, ans=0.125 +2024-09-19 11:48:11,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=658720.0, ans=0.2 +2024-09-19 11:48:24,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=658760.0, ans=0.0 +2024-09-19 11:48:26,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.29 vs. limit=15.0 +2024-09-19 11:48:30,138 INFO [train.py:1198] (1/2) Epoch 37, batch 1800, loss[loss=0.2369, ctc_loss=0.1124, cr_loss=0.359, attn_decoder_loss=0.2428, over 29699.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1151, cr_loss=0.3559, attn_decoder_loss=0.2397, over 5792001.26 frames. ], batch size: 83, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:48:40,895 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.421e+01 8.885e+01 9.322e+01 2.627e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 11:49:44,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-19 11:49:50,129 INFO [train.py:1198] (1/2) Epoch 37, batch 1850, loss[loss=0.2404, ctc_loss=0.1174, cr_loss=0.3439, attn_decoder_loss=0.2464, over 29642.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.115, cr_loss=0.3554, attn_decoder_loss=0.2398, over 5796935.36 frames. ], batch size: 86, lr: 2.99e-03, grad_scale: 8.0 +2024-09-19 11:50:35,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=659120.0, ans=0.125 +2024-09-19 11:50:52,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=659160.0, ans=0.1 +2024-09-19 11:50:53,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=659160.0, ans=0.2 +2024-09-19 11:51:00,369 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.17 vs. limit=10.0 +2024-09-19 11:51:02,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=659160.0, ans=0.1 +2024-09-19 11:51:05,865 INFO [train.py:1198] (1/2) Epoch 37, batch 1900, loss[loss=0.2409, ctc_loss=0.1117, cr_loss=0.3603, attn_decoder_loss=0.2473, over 29734.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1154, cr_loss=0.3564, attn_decoder_loss=0.2404, over 5805152.84 frames. ], batch size: 89, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:51:11,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.63 vs. limit=15.0 +2024-09-19 11:51:16,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.346e+01 8.529e+01 8.942e+01 9.570e+01 1.575e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 11:51:48,937 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.62 vs. limit=10.0 +2024-09-19 11:52:09,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=659360.0, ans=0.125 +2024-09-19 11:52:21,354 INFO [train.py:1198] (1/2) Epoch 37, batch 1950, loss[loss=0.2287, ctc_loss=0.1145, cr_loss=0.3634, attn_decoder_loss=0.2333, over 29433.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1156, cr_loss=0.3573, attn_decoder_loss=0.2413, over 5820317.73 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:52:26,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.98 vs. limit=15.0 +2024-09-19 11:52:33,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=659400.0, ans=0.05 +2024-09-19 11:52:38,478 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:52:46,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.38 vs. limit=22.5 +2024-09-19 11:53:01,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=659480.0, ans=0.125 +2024-09-19 11:53:09,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=659520.0, ans=0.0 +2024-09-19 11:53:17,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=659520.0, ans=0.125 +2024-09-19 11:53:17,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=659520.0, ans=0.1 +2024-09-19 11:53:18,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=659520.0, ans=0.125 +2024-09-19 11:53:24,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=659560.0, ans=0.025 +2024-09-19 11:53:41,382 INFO [train.py:1198] (1/2) Epoch 37, batch 2000, loss[loss=0.2124, ctc_loss=0.1007, cr_loss=0.3357, attn_decoder_loss=0.2174, over 29341.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1163, cr_loss=0.3588, attn_decoder_loss=0.242, over 5798556.15 frames. ], batch size: 67, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:53:47,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=659600.0, ans=0.125 +2024-09-19 11:53:51,965 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.756e+01 9.402e+01 9.802e+01 1.853e+02, threshold=1.880e+02, percent-clipped=1.0 +2024-09-19 11:53:55,525 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 11:53:55,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten.whitening_limit, batch_count=659640.0, ans=15.0 +2024-09-19 11:54:05,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=659640.0, ans=0.0 +2024-09-19 11:54:08,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=659640.0, ans=0.035 +2024-09-19 11:54:11,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=659680.0, ans=0.0 +2024-09-19 11:54:56,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=659800.0, ans=0.0 +2024-09-19 11:54:57,539 INFO [train.py:1198] (1/2) Epoch 37, batch 2050, loss[loss=0.2031, ctc_loss=0.09512, cr_loss=0.322, attn_decoder_loss=0.2079, over 29431.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1156, cr_loss=0.3568, attn_decoder_loss=0.2409, over 5790919.87 frames. ], batch size: 70, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:54:58,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.47 vs. limit=15.0 +2024-09-19 11:55:32,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=659880.0, ans=0.125 +2024-09-19 11:55:34,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=659880.0, ans=0.125 +2024-09-19 11:55:47,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=659920.0, ans=0.0 +2024-09-19 11:55:50,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=659920.0, ans=0.125 +2024-09-19 11:56:13,779 INFO [train.py:1198] (1/2) Epoch 37, batch 2100, loss[loss=0.2305, ctc_loss=0.1127, cr_loss=0.3578, attn_decoder_loss=0.2356, over 29778.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1148, cr_loss=0.3553, attn_decoder_loss=0.2401, over 5803241.60 frames. ], batch size: 81, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:56:19,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=660000.0, ans=0.125 +2024-09-19 11:56:24,222 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.530e+01 8.399e+01 8.911e+01 9.542e+01 1.204e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 11:56:24,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=660000.0, ans=0.125 +2024-09-19 11:56:26,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=660000.0, ans=0.1 +2024-09-19 11:56:26,614 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.68 vs. limit=15.0 +2024-09-19 11:56:36,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.05 vs. limit=6.0 +2024-09-19 11:56:54,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.37 vs. limit=15.0 +2024-09-19 11:57:11,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=660120.0, ans=0.1 +2024-09-19 11:57:26,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=660160.0, ans=0.1 +2024-09-19 11:57:27,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=660160.0, ans=0.125 +2024-09-19 11:57:33,572 INFO [train.py:1198] (1/2) Epoch 37, batch 2150, loss[loss=0.2292, ctc_loss=0.1097, cr_loss=0.3502, attn_decoder_loss=0.2346, over 29491.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1145, cr_loss=0.3549, attn_decoder_loss=0.2399, over 5817613.30 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 11:57:52,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.26 vs. limit=15.0 +2024-09-19 11:58:04,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=660280.0, ans=0.2 +2024-09-19 11:58:18,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.65 vs. limit=12.0 +2024-09-19 11:58:35,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=660360.0, ans=0.0 +2024-09-19 11:58:48,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=660400.0, ans=0.125 +2024-09-19 11:58:49,161 INFO [train.py:1198] (1/2) Epoch 37, batch 2200, loss[loss=0.2502, ctc_loss=0.1241, cr_loss=0.3754, attn_decoder_loss=0.2559, over 29637.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1152, cr_loss=0.3563, attn_decoder_loss=0.2403, over 5813499.24 frames. ], batch size: 86, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 11:58:50,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.13 vs. limit=15.0 +2024-09-19 11:59:01,231 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.378e+01 8.935e+01 9.603e+01 1.294e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 11:59:53,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=660560.0, ans=0.0 +2024-09-19 12:00:03,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=660600.0, ans=0.07 +2024-09-19 12:00:03,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.63 vs. limit=22.5 +2024-09-19 12:00:04,885 INFO [train.py:1198] (1/2) Epoch 37, batch 2250, loss[loss=0.2388, ctc_loss=0.1159, cr_loss=0.3564, attn_decoder_loss=0.2445, over 29700.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1148, cr_loss=0.3559, attn_decoder_loss=0.2401, over 5811135.17 frames. ], batch size: 82, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:00:21,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=660640.0, ans=0.125 +2024-09-19 12:00:52,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=660720.0, ans=0.95 +2024-09-19 12:01:21,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=660760.0, ans=0.0 +2024-09-19 12:01:25,027 INFO [train.py:1198] (1/2) Epoch 37, batch 2300, loss[loss=0.2036, ctc_loss=0.0903, cr_loss=0.2931, attn_decoder_loss=0.2097, over 29318.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1142, cr_loss=0.354, attn_decoder_loss=0.2392, over 5799440.25 frames. ], batch size: 71, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:01:36,927 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.404e+01 8.548e+01 9.077e+01 9.950e+01 1.821e+02, threshold=1.815e+02, percent-clipped=1.0 +2024-09-19 12:02:09,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=660920.0, ans=0.0 +2024-09-19 12:02:18,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=660920.0, ans=0.125 +2024-09-19 12:02:23,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=660920.0, ans=0.0 +2024-09-19 12:02:29,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=660960.0, ans=0.0 +2024-09-19 12:02:41,027 INFO [train.py:1198] (1/2) Epoch 37, batch 2350, loss[loss=0.2388, ctc_loss=0.1147, cr_loss=0.3594, attn_decoder_loss=0.2446, over 29702.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1144, cr_loss=0.3552, attn_decoder_loss=0.2393, over 5804777.07 frames. ], batch size: 83, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:02:47,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=661000.0, ans=0.0 +2024-09-19 12:03:19,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=661080.0, ans=0.1 +2024-09-19 12:03:25,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=661120.0, ans=0.125 +2024-09-19 12:03:32,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=661120.0, ans=0.125 +2024-09-19 12:03:41,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.92 vs. limit=22.5 +2024-09-19 12:03:52,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=661160.0, ans=0.1 +2024-09-19 12:03:56,970 INFO [train.py:1198] (1/2) Epoch 37, batch 2400, loss[loss=0.2272, ctc_loss=0.1166, cr_loss=0.3519, attn_decoder_loss=0.2317, over 29544.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1151, cr_loss=0.356, attn_decoder_loss=0.2399, over 5809132.27 frames. ], batch size: 76, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:04:08,988 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.655e+01 8.594e+01 9.080e+01 9.693e+01 1.252e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 12:04:10,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=661240.0, ans=0.0 +2024-09-19 12:04:22,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=661240.0, ans=0.125 +2024-09-19 12:04:23,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=661240.0, ans=0.025 +2024-09-19 12:04:23,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=661240.0, ans=0.2 +2024-09-19 12:04:32,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=661280.0, ans=0.125 +2024-09-19 12:04:44,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=661320.0, ans=0.125 +2024-09-19 12:05:17,004 INFO [train.py:1198] (1/2) Epoch 37, batch 2450, loss[loss=0.238, ctc_loss=0.1188, cr_loss=0.355, attn_decoder_loss=0.2433, over 29704.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1155, cr_loss=0.3571, attn_decoder_loss=0.2407, over 5786171.16 frames. ], batch size: 82, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:05:21,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=661400.0, ans=0.125 +2024-09-19 12:05:21,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=661400.0, ans=0.125 +2024-09-19 12:05:25,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=661400.0, ans=0.125 +2024-09-19 12:05:26,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=661400.0, ans=0.125 +2024-09-19 12:05:29,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=661400.0, ans=0.1 +2024-09-19 12:05:34,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.38 vs. limit=22.5 +2024-09-19 12:06:04,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=661520.0, ans=0.0 +2024-09-19 12:06:10,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=661520.0, ans=0.125 +2024-09-19 12:06:19,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=661560.0, ans=0.0 +2024-09-19 12:06:21,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=661560.0, ans=0.125 +2024-09-19 12:06:23,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.52 vs. limit=15.0 +2024-09-19 12:06:28,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=661560.0, ans=0.125 +2024-09-19 12:06:33,566 INFO [train.py:1198] (1/2) Epoch 37, batch 2500, loss[loss=0.2439, ctc_loss=0.1224, cr_loss=0.3539, attn_decoder_loss=0.2495, over 29606.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1162, cr_loss=0.3585, attn_decoder_loss=0.2409, over 5796443.66 frames. ], batch size: 86, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:06:44,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=661600.0, ans=0.125 +2024-09-19 12:06:45,678 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.640e+01 9.238e+01 1.003e+02 4.668e+02, threshold=1.848e+02, percent-clipped=4.0 +2024-09-19 12:06:53,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=661640.0, ans=0.1 +2024-09-19 12:07:10,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=661680.0, ans=0.125 +2024-09-19 12:07:12,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.00 vs. limit=10.0 +2024-09-19 12:07:13,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=661680.0, ans=0.2 +2024-09-19 12:07:31,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=661720.0, ans=0.0 +2024-09-19 12:07:33,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=661760.0, ans=0.125 +2024-09-19 12:07:46,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=661760.0, ans=0.2 +2024-09-19 12:07:49,438 INFO [train.py:1198] (1/2) Epoch 37, batch 2550, loss[loss=0.2086, ctc_loss=0.09405, cr_loss=0.3084, attn_decoder_loss=0.2145, over 29360.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1156, cr_loss=0.3571, attn_decoder_loss=0.2406, over 5799734.17 frames. ], batch size: 67, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:07:58,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=661800.0, ans=0.2 +2024-09-19 12:08:00,573 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.91 vs. limit=22.5 +2024-09-19 12:08:06,411 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:08:12,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=661840.0, ans=0.125 +2024-09-19 12:08:23,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=661880.0, ans=0.125 +2024-09-19 12:08:28,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-19 12:08:35,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=661920.0, ans=0.125 +2024-09-19 12:08:48,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=661920.0, ans=0.125 +2024-09-19 12:09:00,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=661960.0, ans=0.05 +2024-09-19 12:09:04,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=661960.0, ans=0.035 +2024-09-19 12:09:07,398 INFO [train.py:1198] (1/2) Epoch 37, batch 2600, loss[loss=0.2324, ctc_loss=0.1066, cr_loss=0.3465, attn_decoder_loss=0.2387, over 29433.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1155, cr_loss=0.3572, attn_decoder_loss=0.2409, over 5796823.92 frames. ], batch size: 78, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:09:13,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.44 vs. limit=15.0 +2024-09-19 12:09:21,435 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.726e+01 8.481e+01 8.933e+01 9.512e+01 2.457e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 12:09:21,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=662000.0, ans=0.125 +2024-09-19 12:09:21,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=662000.0, ans=0.025 +2024-09-19 12:09:26,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=662040.0, ans=0.0 +2024-09-19 12:09:32,771 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.11 vs. limit=15.0 +2024-09-19 12:10:23,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=662200.0, ans=0.125 +2024-09-19 12:10:23,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.49 vs. limit=12.0 +2024-09-19 12:10:24,382 INFO [train.py:1198] (1/2) Epoch 37, batch 2650, loss[loss=0.2482, ctc_loss=0.1225, cr_loss=0.3758, attn_decoder_loss=0.2539, over 29291.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1157, cr_loss=0.3573, attn_decoder_loss=0.2411, over 5802405.51 frames. ], batch size: 100, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:10:26,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=662200.0, ans=0.0 +2024-09-19 12:10:35,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=662200.0, ans=0.125 +2024-09-19 12:10:42,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=662240.0, ans=0.125 +2024-09-19 12:10:58,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=662280.0, ans=0.2 +2024-09-19 12:11:09,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=662320.0, ans=0.0 +2024-09-19 12:11:37,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=662360.0, ans=0.125 +2024-09-19 12:11:40,389 INFO [train.py:1198] (1/2) Epoch 37, batch 2700, loss[loss=0.2337, ctc_loss=0.1148, cr_loss=0.357, attn_decoder_loss=0.239, over 29494.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1158, cr_loss=0.357, attn_decoder_loss=0.2412, over 5797558.37 frames. ], batch size: 87, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:11:52,431 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.281e+01 8.575e+01 9.095e+01 9.529e+01 6.705e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 12:11:55,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=662440.0, ans=0.0 +2024-09-19 12:12:30,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=662520.0, ans=0.125 +2024-09-19 12:12:36,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.05 vs. limit=6.0 +2024-09-19 12:12:39,766 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.46 vs. limit=22.5 +2024-09-19 12:12:58,483 INFO [train.py:1198] (1/2) Epoch 37, batch 2750, loss[loss=0.2291, ctc_loss=0.1126, cr_loss=0.3536, attn_decoder_loss=0.2342, over 29510.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1146, cr_loss=0.355, attn_decoder_loss=0.2399, over 5796664.41 frames. ], batch size: 75, lr: 2.98e-03, grad_scale: 16.0 +2024-09-19 12:13:04,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=662600.0, ans=0.015 +2024-09-19 12:13:26,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=662640.0, ans=0.125 +2024-09-19 12:13:28,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=662640.0, ans=0.125 +2024-09-19 12:13:28,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=662640.0, ans=0.025 +2024-09-19 12:13:28,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=662640.0, ans=0.125 +2024-09-19 12:13:41,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=662680.0, ans=0.0 +2024-09-19 12:14:04,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=15.0 +2024-09-19 12:14:12,839 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.33 vs. limit=15.0 +2024-09-19 12:14:13,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=662760.0, ans=10.0 +2024-09-19 12:14:15,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=662800.0, ans=0.5 +2024-09-19 12:14:16,755 INFO [train.py:1198] (1/2) Epoch 37, batch 2800, loss[loss=0.2441, ctc_loss=0.1271, cr_loss=0.3512, attn_decoder_loss=0.2493, over 20405.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1151, cr_loss=0.3559, attn_decoder_loss=0.24, over 5777905.33 frames. ], batch size: 210, lr: 2.98e-03, grad_scale: 32.0 +2024-09-19 12:14:24,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=662800.0, ans=0.2 +2024-09-19 12:14:27,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=662800.0, ans=0.125 +2024-09-19 12:14:30,283 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.502e+01 8.910e+01 9.403e+01 2.471e+02, threshold=1.782e+02, percent-clipped=1.0 +2024-09-19 12:14:50,314 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:15:06,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=662920.0, ans=0.07 +2024-09-19 12:15:08,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=662920.0, ans=0.2 +2024-09-19 12:15:32,005 INFO [train.py:1198] (1/2) Epoch 37, batch 2850, loss[loss=0.2245, ctc_loss=0.1003, cr_loss=0.3194, attn_decoder_loss=0.2312, over 29520.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1153, cr_loss=0.3556, attn_decoder_loss=0.2404, over 5763305.30 frames. ], batch size: 77, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:15:50,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.47 vs. limit=15.0 +2024-09-19 12:16:42,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=663160.0, ans=0.125 +2024-09-19 12:16:50,711 INFO [train.py:1198] (1/2) Epoch 37, batch 2900, loss[loss=0.2274, ctc_loss=0.1032, cr_loss=0.333, attn_decoder_loss=0.2338, over 29427.00 frames. ], tot_loss[loss=0.2366, ctc_loss=0.1164, cr_loss=0.3585, attn_decoder_loss=0.2419, over 5789212.11 frames. ], batch size: 79, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:16:50,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=663200.0, ans=0.1 +2024-09-19 12:17:07,907 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.541e+01 8.975e+01 9.658e+01 1.927e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 12:17:10,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.81 vs. limit=15.0 +2024-09-19 12:17:15,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=663240.0, ans=0.2 +2024-09-19 12:17:29,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=663280.0, ans=0.125 +2024-09-19 12:17:33,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=663280.0, ans=0.125 +2024-09-19 12:17:33,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=663280.0, ans=10.0 +2024-09-19 12:17:38,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.48 vs. limit=15.0 +2024-09-19 12:17:53,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=663360.0, ans=0.125 +2024-09-19 12:17:59,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.69 vs. limit=15.0 +2024-09-19 12:18:02,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=663360.0, ans=0.0 +2024-09-19 12:18:04,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.80 vs. limit=12.0 +2024-09-19 12:18:07,937 INFO [train.py:1198] (1/2) Epoch 37, batch 2950, loss[loss=0.226, ctc_loss=0.09673, cr_loss=0.3194, attn_decoder_loss=0.2332, over 29519.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1151, cr_loss=0.3555, attn_decoder_loss=0.2406, over 5782382.99 frames. ], batch size: 75, lr: 2.98e-03, grad_scale: 8.0 +2024-09-19 12:18:17,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=663400.0, ans=0.0 +2024-09-19 12:18:38,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=663480.0, ans=0.2 +2024-09-19 12:18:49,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=663480.0, ans=0.2 +2024-09-19 12:18:59,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=663520.0, ans=0.125 +2024-09-19 12:19:17,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=663560.0, ans=0.0 +2024-09-19 12:19:23,862 INFO [train.py:1198] (1/2) Epoch 37, batch 3000, loss[loss=0.2306, ctc_loss=0.1083, cr_loss=0.33, attn_decoder_loss=0.2368, over 29749.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1144, cr_loss=0.3546, attn_decoder_loss=0.2402, over 5782984.52 frames. ], batch size: 81, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:19:23,862 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 12:19:42,338 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.1714, 4.7660, 4.6118, 4.3581], device='cuda:1') +2024-09-19 12:19:43,125 INFO [train.py:1230] (1/2) Epoch 37, validation: loss=0.212, ctc_loss=0.03675, cr_loss=6.305e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 12:19:43,126 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 12:19:48,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=663600.0, ans=0.125 +2024-09-19 12:19:48,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=663600.0, ans=0.125 +2024-09-19 12:19:58,485 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.507e+01 8.935e+01 9.407e+01 3.949e+02, threshold=1.787e+02, percent-clipped=1.0 +2024-09-19 12:20:15,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=663680.0, ans=0.2 +2024-09-19 12:20:36,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=663720.0, ans=0.1 +2024-09-19 12:21:01,306 INFO [train.py:1198] (1/2) Epoch 37, batch 3050, loss[loss=0.2243, ctc_loss=0.1039, cr_loss=0.3336, attn_decoder_loss=0.2303, over 29536.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1152, cr_loss=0.3566, attn_decoder_loss=0.2409, over 5777027.38 frames. ], batch size: 76, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:21:07,158 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=9.29 vs. limit=15.0 +2024-09-19 12:21:23,440 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.01 vs. limit=15.0 +2024-09-19 12:21:30,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=663880.0, ans=0.1 +2024-09-19 12:21:42,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=663880.0, ans=0.125 +2024-09-19 12:22:17,490 INFO [train.py:1198] (1/2) Epoch 37, batch 3100, loss[loss=0.2543, ctc_loss=0.1368, cr_loss=0.4001, attn_decoder_loss=0.2585, over 29255.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1155, cr_loss=0.3568, attn_decoder_loss=0.2408, over 5776733.47 frames. ], batch size: 100, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:22:25,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=664000.0, ans=0.125 +2024-09-19 12:22:32,775 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.352e+01 8.715e+01 9.369e+01 9.767e+01 1.782e+02, threshold=1.874e+02, percent-clipped=0.0 +2024-09-19 12:22:41,489 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.60 vs. limit=15.0 +2024-09-19 12:22:49,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=664080.0, ans=0.125 +2024-09-19 12:23:05,573 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.65 vs. limit=22.5 +2024-09-19 12:23:35,721 INFO [train.py:1198] (1/2) Epoch 37, batch 3150, loss[loss=0.2488, ctc_loss=0.1242, cr_loss=0.3742, attn_decoder_loss=0.2544, over 28858.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1154, cr_loss=0.3567, attn_decoder_loss=0.2409, over 5783011.06 frames. ], batch size: 104, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:23:43,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=664200.0, ans=0.125 +2024-09-19 12:23:51,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.68 vs. limit=15.0 +2024-09-19 12:24:17,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=664280.0, ans=0.0 +2024-09-19 12:24:31,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=664320.0, ans=0.125 +2024-09-19 12:24:32,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.02 vs. limit=22.5 +2024-09-19 12:24:33,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=664320.0, ans=0.0 +2024-09-19 12:24:34,333 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.64 vs. limit=5.0 +2024-09-19 12:24:53,361 INFO [train.py:1198] (1/2) Epoch 37, batch 3200, loss[loss=0.2307, ctc_loss=0.1166, cr_loss=0.3692, attn_decoder_loss=0.2352, over 29416.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1149, cr_loss=0.3554, attn_decoder_loss=0.2401, over 5792940.91 frames. ], batch size: 79, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:24:55,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.70 vs. limit=22.5 +2024-09-19 12:25:04,429 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=664400.0, ans=0.0 +2024-09-19 12:25:08,494 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.621e+01 9.120e+01 9.766e+01 2.704e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 12:25:34,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=664480.0, ans=0.125 +2024-09-19 12:25:55,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.39 vs. limit=8.0 +2024-09-19 12:26:00,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=664560.0, ans=0.125 +2024-09-19 12:26:07,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.76 vs. limit=15.0 +2024-09-19 12:26:07,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.65 vs. limit=15.0 +2024-09-19 12:26:09,283 INFO [train.py:1198] (1/2) Epoch 37, batch 3250, loss[loss=0.2405, ctc_loss=0.1179, cr_loss=0.3514, attn_decoder_loss=0.2463, over 29696.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1147, cr_loss=0.3555, attn_decoder_loss=0.2404, over 5800251.77 frames. ], batch size: 84, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:26:09,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=664600.0, ans=0.125 +2024-09-19 12:26:15,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=664600.0, ans=0.125 +2024-09-19 12:26:26,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=664640.0, ans=0.125 +2024-09-19 12:26:57,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=664720.0, ans=0.05 +2024-09-19 12:27:17,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=664760.0, ans=0.2 +2024-09-19 12:27:27,407 INFO [train.py:1198] (1/2) Epoch 37, batch 3300, loss[loss=0.2405, ctc_loss=0.1155, cr_loss=0.367, attn_decoder_loss=0.2462, over 28282.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1141, cr_loss=0.3541, attn_decoder_loss=0.2393, over 5798665.45 frames. ], batch size: 111, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:27:33,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=664800.0, ans=0.0 +2024-09-19 12:27:42,599 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.021e+01 8.526e+01 9.078e+01 9.888e+01 1.961e+02, threshold=1.816e+02, percent-clipped=2.0 +2024-09-19 12:27:42,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=664840.0, ans=0.125 +2024-09-19 12:27:56,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=664880.0, ans=0.1 +2024-09-19 12:27:59,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=664880.0, ans=0.125 +2024-09-19 12:28:07,360 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.56 vs. limit=22.5 +2024-09-19 12:28:08,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=664880.0, ans=0.125 +2024-09-19 12:28:43,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=665000.0, ans=0.0 +2024-09-19 12:28:45,005 INFO [train.py:1198] (1/2) Epoch 37, batch 3350, loss[loss=0.2482, ctc_loss=0.1245, cr_loss=0.3855, attn_decoder_loss=0.2534, over 28855.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1154, cr_loss=0.3568, attn_decoder_loss=0.2405, over 5774825.08 frames. ], batch size: 104, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:28:54,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=665000.0, ans=0.125 +2024-09-19 12:28:55,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=665000.0, ans=0.125 +2024-09-19 12:29:01,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=665040.0, ans=0.125 +2024-09-19 12:29:10,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=665040.0, ans=0.0 +2024-09-19 12:29:22,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.42 vs. limit=15.0 +2024-09-19 12:29:29,372 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.49 vs. limit=15.0 +2024-09-19 12:29:50,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=665160.0, ans=0.0 +2024-09-19 12:30:00,464 INFO [train.py:1198] (1/2) Epoch 37, batch 3400, loss[loss=0.213, ctc_loss=0.1005, cr_loss=0.3258, attn_decoder_loss=0.2183, over 29319.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1158, cr_loss=0.3572, attn_decoder_loss=0.2405, over 5766111.87 frames. ], batch size: 67, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:30:17,145 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.972e+01 8.762e+01 9.202e+01 9.777e+01 2.648e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 12:30:32,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=665280.0, ans=0.0 +2024-09-19 12:30:54,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=665320.0, ans=0.125 +2024-09-19 12:31:05,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.69 vs. limit=10.0 +2024-09-19 12:31:06,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=665360.0, ans=0.125 +2024-09-19 12:31:18,358 INFO [train.py:1198] (1/2) Epoch 37, batch 3450, loss[loss=0.2396, ctc_loss=0.1144, cr_loss=0.3497, attn_decoder_loss=0.2458, over 28370.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1154, cr_loss=0.3564, attn_decoder_loss=0.2406, over 5774464.72 frames. ], batch size: 112, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:31:27,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=665400.0, ans=0.125 +2024-09-19 12:31:56,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=665480.0, ans=0.025 +2024-09-19 12:32:07,336 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:32:07,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.58 vs. limit=10.0 +2024-09-19 12:32:36,778 INFO [train.py:1198] (1/2) Epoch 37, batch 3500, loss[loss=0.208, ctc_loss=0.09652, cr_loss=0.3171, attn_decoder_loss=0.2134, over 29280.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.115, cr_loss=0.3556, attn_decoder_loss=0.2401, over 5776719.45 frames. ], batch size: 71, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:32:43,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.28 vs. limit=15.0 +2024-09-19 12:32:52,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.26 vs. limit=15.0 +2024-09-19 12:32:53,368 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.540e+01 8.561e+01 8.978e+01 9.459e+01 2.098e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 12:32:55,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=665640.0, ans=0.125 +2024-09-19 12:33:11,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=665680.0, ans=0.0 +2024-09-19 12:33:15,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=665680.0, ans=0.2 +2024-09-19 12:33:32,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=665720.0, ans=0.125 +2024-09-19 12:33:47,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=665760.0, ans=0.025 +2024-09-19 12:33:51,559 INFO [train.py:1198] (1/2) Epoch 37, batch 3550, loss[loss=0.2403, ctc_loss=0.1156, cr_loss=0.3512, attn_decoder_loss=0.2463, over 29707.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1152, cr_loss=0.3562, attn_decoder_loss=0.2403, over 5782404.86 frames. ], batch size: 89, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:33:57,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=665800.0, ans=0.125 +2024-09-19 12:33:59,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=665800.0, ans=0.125 +2024-09-19 12:34:34,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.62 vs. limit=22.5 +2024-09-19 12:34:48,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.64 vs. limit=15.0 +2024-09-19 12:35:05,687 INFO [train.py:1198] (1/2) Epoch 37, batch 3600, loss[loss=0.2368, ctc_loss=0.1226, cr_loss=0.3808, attn_decoder_loss=0.241, over 29468.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1153, cr_loss=0.3567, attn_decoder_loss=0.2405, over 5791545.89 frames. ], batch size: 77, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:35:13,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=666000.0, ans=0.2 +2024-09-19 12:35:22,125 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.557e+01 8.583e+01 9.106e+01 9.636e+01 2.538e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-19 12:35:22,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=666040.0, ans=0.0 +2024-09-19 12:35:30,196 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.83 vs. limit=15.0 +2024-09-19 12:35:44,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=666080.0, ans=0.125 +2024-09-19 12:36:20,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=666200.0, ans=0.0 +2024-09-19 12:36:22,093 INFO [train.py:1198] (1/2) Epoch 37, batch 3650, loss[loss=0.2468, ctc_loss=0.1212, cr_loss=0.3873, attn_decoder_loss=0.2522, over 29501.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1146, cr_loss=0.3551, attn_decoder_loss=0.2396, over 5794409.01 frames. ], batch size: 90, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:36:31,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=666200.0, ans=0.0 +2024-09-19 12:36:41,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=666240.0, ans=0.0 +2024-09-19 12:37:00,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=666280.0, ans=0.2 +2024-09-19 12:37:23,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=666360.0, ans=0.125 +2024-09-19 12:37:32,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=666360.0, ans=0.0 +2024-09-19 12:37:33,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=666360.0, ans=0.125 +2024-09-19 12:37:36,693 INFO [train.py:1198] (1/2) Epoch 37, batch 3700, loss[loss=0.2378, ctc_loss=0.1159, cr_loss=0.3767, attn_decoder_loss=0.243, over 29728.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1146, cr_loss=0.3556, attn_decoder_loss=0.2398, over 5804343.36 frames. ], batch size: 84, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:37:45,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=666400.0, ans=0.125 +2024-09-19 12:37:56,007 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.469e+01 9.062e+01 9.671e+01 3.468e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 12:38:02,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=666440.0, ans=0.0 +2024-09-19 12:38:03,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=666440.0, ans=0.125 +2024-09-19 12:38:13,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.75 vs. limit=15.0 +2024-09-19 12:38:15,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=666480.0, ans=0.125 +2024-09-19 12:38:20,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=666520.0, ans=0.0 +2024-09-19 12:38:30,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.87 vs. limit=15.0 +2024-09-19 12:38:52,740 INFO [train.py:1198] (1/2) Epoch 37, batch 3750, loss[loss=0.2118, ctc_loss=0.1006, cr_loss=0.3444, attn_decoder_loss=0.2165, over 29398.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1148, cr_loss=0.3561, attn_decoder_loss=0.2398, over 5808087.32 frames. ], batch size: 67, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:39:00,606 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:39:02,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=666600.0, ans=0.025 +2024-09-19 12:39:05,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=666600.0, ans=0.0 +2024-09-19 12:39:19,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.01 vs. limit=22.5 +2024-09-19 12:39:22,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=666680.0, ans=0.1 +2024-09-19 12:39:30,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=666680.0, ans=0.0 +2024-09-19 12:39:30,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=666680.0, ans=0.125 +2024-09-19 12:39:44,789 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.28 vs. limit=12.0 +2024-09-19 12:39:51,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=666760.0, ans=0.025 +2024-09-19 12:39:58,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.25 vs. limit=22.5 +2024-09-19 12:40:07,584 INFO [train.py:1198] (1/2) Epoch 37, batch 3800, loss[loss=0.2424, ctc_loss=0.1175, cr_loss=0.3728, attn_decoder_loss=0.248, over 29638.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1145, cr_loss=0.3554, attn_decoder_loss=0.2395, over 5798231.25 frames. ], batch size: 86, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:40:26,997 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.615e+01 8.580e+01 8.987e+01 9.690e+01 1.357e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 12:40:31,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=666840.0, ans=0.125 +2024-09-19 12:40:37,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=666880.0, ans=0.125 +2024-09-19 12:40:48,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=666880.0, ans=0.125 +2024-09-19 12:40:52,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=666920.0, ans=0.1 +2024-09-19 12:41:05,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=666960.0, ans=0.07 +2024-09-19 12:41:21,806 INFO [train.py:1198] (1/2) Epoch 37, batch 3850, loss[loss=0.2514, ctc_loss=0.1287, cr_loss=0.3943, attn_decoder_loss=0.2562, over 29252.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1141, cr_loss=0.3551, attn_decoder_loss=0.2391, over 5813457.11 frames. ], batch size: 100, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:41:22,236 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:41:30,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.05 vs. limit=10.0 +2024-09-19 12:41:30,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667000.0, ans=0.1 +2024-09-19 12:41:32,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=667000.0, ans=0.125 +2024-09-19 12:41:35,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=667040.0, ans=0.125 +2024-09-19 12:41:35,964 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.84 vs. limit=22.5 +2024-09-19 12:41:51,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=667080.0, ans=0.1 +2024-09-19 12:42:19,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.37 vs. limit=15.0 +2024-09-19 12:42:38,730 INFO [train.py:1198] (1/2) Epoch 37, batch 3900, loss[loss=0.2351, ctc_loss=0.1046, cr_loss=0.3289, attn_decoder_loss=0.2423, over 29627.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.115, cr_loss=0.3566, attn_decoder_loss=0.2398, over 5817760.60 frames. ], batch size: 86, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:42:55,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=667240.0, ans=0.125 +2024-09-19 12:42:57,947 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.596e+01 8.935e+01 9.669e+01 1.380e+02, threshold=1.787e+02, percent-clipped=0.0 +2024-09-19 12:43:52,902 INFO [train.py:1198] (1/2) Epoch 37, batch 3950, loss[loss=0.2376, ctc_loss=0.1162, cr_loss=0.3512, attn_decoder_loss=0.2433, over 29529.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1146, cr_loss=0.3557, attn_decoder_loss=0.2398, over 5836896.49 frames. ], batch size: 97, lr: 2.97e-03, grad_scale: 8.0 +2024-09-19 12:44:11,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=667440.0, ans=0.1 +2024-09-19 12:44:12,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=667440.0, ans=0.2 +2024-09-19 12:44:16,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=667440.0, ans=0.0 +2024-09-19 12:44:25,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=667480.0, ans=10.0 +2024-09-19 12:44:38,017 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.87 vs. limit=15.0 +2024-09-19 12:44:40,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=667520.0, ans=0.0 +2024-09-19 12:44:40,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=667520.0, ans=0.125 +2024-09-19 12:44:51,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.03 vs. limit=12.0 +2024-09-19 12:44:59,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.34 vs. limit=10.0 +2024-09-19 12:45:01,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=667560.0, ans=0.05 +2024-09-19 12:45:08,490 INFO [train.py:1198] (1/2) Epoch 37, batch 4000, loss[loss=0.2212, ctc_loss=0.09703, cr_loss=0.3177, attn_decoder_loss=0.2279, over 29528.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.115, cr_loss=0.3564, attn_decoder_loss=0.2402, over 5813938.58 frames. ], batch size: 74, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:45:10,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667600.0, ans=0.1 +2024-09-19 12:45:13,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=667600.0, ans=0.2 +2024-09-19 12:45:14,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=667600.0, ans=0.125 +2024-09-19 12:45:16,140 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:45:20,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=667600.0, ans=0.2 +2024-09-19 12:45:27,467 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 8.490e+01 9.030e+01 9.800e+01 2.988e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 12:45:32,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=667640.0, ans=0.2 +2024-09-19 12:45:33,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=667640.0, ans=0.0 +2024-09-19 12:45:35,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=667640.0, ans=0.1 +2024-09-19 12:45:52,239 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.18 vs. limit=15.0 +2024-09-19 12:46:15,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten.whitening_limit, batch_count=667760.0, ans=15.0 +2024-09-19 12:46:22,198 INFO [train.py:1198] (1/2) Epoch 37, batch 4050, loss[loss=0.2547, ctc_loss=0.1453, cr_loss=0.3956, attn_decoder_loss=0.2581, over 19586.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1152, cr_loss=0.3567, attn_decoder_loss=0.2402, over 5795315.72 frames. ], batch size: 211, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:46:31,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.68 vs. limit=15.0 +2024-09-19 12:46:35,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=667840.0, ans=0.125 +2024-09-19 12:47:19,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=667920.0, ans=0.1 +2024-09-19 12:47:32,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=667960.0, ans=0.125 +2024-09-19 12:47:37,492 INFO [train.py:1198] (1/2) Epoch 37, batch 4100, loss[loss=0.2597, ctc_loss=0.1354, cr_loss=0.3981, attn_decoder_loss=0.2646, over 29510.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1151, cr_loss=0.3564, attn_decoder_loss=0.2402, over 5791610.25 frames. ], batch size: 90, lr: 2.97e-03, grad_scale: 16.0 +2024-09-19 12:47:56,152 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.370e+01 8.448e+01 9.033e+01 9.875e+01 1.600e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 12:48:02,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=668040.0, ans=0.0 +2024-09-19 12:48:08,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.16 vs. limit=15.0 +2024-09-19 12:48:30,808 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-19 12:48:34,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=668160.0, ans=0.07 +2024-09-19 12:48:42,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.whiten.whitening_limit, batch_count=668160.0, ans=12.0 +2024-09-19 12:48:46,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=668160.0, ans=0.0 +2024-09-19 12:48:50,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=668200.0, ans=0.1 +2024-09-19 12:48:51,883 INFO [train.py:1198] (1/2) Epoch 37, batch 4150, loss[loss=0.2251, ctc_loss=0.1123, cr_loss=0.35, attn_decoder_loss=0.2299, over 29479.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1151, cr_loss=0.3564, attn_decoder_loss=0.2401, over 5797204.14 frames. ], batch size: 77, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:48:53,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=668200.0, ans=0.125 +2024-09-19 12:49:05,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=668240.0, ans=0.1 +2024-09-19 12:49:12,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=668240.0, ans=0.125 +2024-09-19 12:49:24,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=668280.0, ans=0.0 +2024-09-19 12:49:27,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=668280.0, ans=0.0 +2024-09-19 12:49:48,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=668320.0, ans=0.125 +2024-09-19 12:49:51,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.56 vs. limit=12.0 +2024-09-19 12:49:54,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.52 vs. limit=22.5 +2024-09-19 12:50:05,531 INFO [train.py:1198] (1/2) Epoch 37, batch 4200, loss[loss=0.2596, ctc_loss=0.1391, cr_loss=0.4137, attn_decoder_loss=0.2638, over 29512.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1151, cr_loss=0.357, attn_decoder_loss=0.2403, over 5798849.27 frames. ], batch size: 90, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:50:13,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=668400.0, ans=0.1 +2024-09-19 12:50:13,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.60 vs. limit=10.0 +2024-09-19 12:50:16,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=668400.0, ans=0.025 +2024-09-19 12:50:24,820 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.799e+01 8.584e+01 9.010e+01 9.647e+01 2.583e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 12:50:27,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=668440.0, ans=0.125 +2024-09-19 12:50:28,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=668440.0, ans=0.0 +2024-09-19 12:50:33,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=668480.0, ans=0.1 +2024-09-19 12:50:52,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=668520.0, ans=0.025 +2024-09-19 12:50:55,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=668520.0, ans=0.125 +2024-09-19 12:51:04,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=668560.0, ans=0.125 +2024-09-19 12:51:20,678 INFO [train.py:1198] (1/2) Epoch 37, batch 4250, loss[loss=0.2194, ctc_loss=0.1007, cr_loss=0.3328, attn_decoder_loss=0.2252, over 29512.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1148, cr_loss=0.3561, attn_decoder_loss=0.2404, over 5804283.76 frames. ], batch size: 74, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:51:21,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=668600.0, ans=0.0 +2024-09-19 12:51:29,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=668600.0, ans=0.125 +2024-09-19 12:51:32,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=668600.0, ans=0.0 +2024-09-19 12:51:41,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=668640.0, ans=0.0 +2024-09-19 12:51:41,230 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 12:51:47,414 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-19 12:51:48,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=668680.0, ans=0.025 +2024-09-19 12:51:52,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=668680.0, ans=0.125 +2024-09-19 12:52:03,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=668720.0, ans=0.1 +2024-09-19 12:52:04,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=668720.0, ans=0.1 +2024-09-19 12:52:09,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=668720.0, ans=0.0 +2024-09-19 12:52:35,092 INFO [train.py:1198] (1/2) Epoch 37, batch 4300, loss[loss=0.2414, ctc_loss=0.1202, cr_loss=0.3516, attn_decoder_loss=0.247, over 29540.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1144, cr_loss=0.3549, attn_decoder_loss=0.2405, over 5794181.22 frames. ], batch size: 87, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:52:36,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=668800.0, ans=0.125 +2024-09-19 12:52:39,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=668800.0, ans=0.035 +2024-09-19 12:52:54,262 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.770e+01 8.796e+01 9.094e+01 9.550e+01 2.475e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 12:52:56,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=668840.0, ans=0.125 +2024-09-19 12:52:58,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=668840.0, ans=0.125 +2024-09-19 12:53:19,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=668920.0, ans=0.125 +2024-09-19 12:53:43,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=668960.0, ans=0.2 +2024-09-19 12:53:48,879 INFO [train.py:1198] (1/2) Epoch 37, batch 4350, loss[loss=0.2614, ctc_loss=0.1408, cr_loss=0.4096, attn_decoder_loss=0.2657, over 29508.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.117, cr_loss=0.3609, attn_decoder_loss=0.2436, over 5797010.50 frames. ], batch size: 97, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:54:02,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=669000.0, ans=22.5 +2024-09-19 12:54:04,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=669040.0, ans=0.125 +2024-09-19 12:54:19,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.93 vs. limit=6.0 +2024-09-19 12:54:29,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=669080.0, ans=0.1 +2024-09-19 12:54:33,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=669120.0, ans=0.125 +2024-09-19 12:54:47,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.96 vs. limit=15.0 +2024-09-19 12:54:48,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=669160.0, ans=0.125 +2024-09-19 12:54:56,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=669160.0, ans=0.1 +2024-09-19 12:55:02,636 INFO [train.py:1198] (1/2) Epoch 37, batch 4400, loss[loss=0.2453, ctc_loss=0.1202, cr_loss=0.3684, attn_decoder_loss=0.251, over 27421.00 frames. ], tot_loss[loss=0.2404, ctc_loss=0.1185, cr_loss=0.3634, attn_decoder_loss=0.2458, over 5768764.65 frames. ], batch size: 125, lr: 2.96e-03, grad_scale: 32.0 +2024-09-19 12:55:23,900 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.307e+01 8.852e+01 9.362e+01 9.812e+01 1.394e+02, threshold=1.872e+02, percent-clipped=0.0 +2024-09-19 12:55:30,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=669240.0, ans=0.025 +2024-09-19 12:55:31,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=669280.0, ans=0.1 +2024-09-19 12:55:57,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=669320.0, ans=0.125 +2024-09-19 12:56:17,475 INFO [train.py:1198] (1/2) Epoch 37, batch 4450, loss[loss=0.2562, ctc_loss=0.1476, cr_loss=0.4016, attn_decoder_loss=0.2593, over 20657.00 frames. ], tot_loss[loss=0.2425, ctc_loss=0.1216, cr_loss=0.3679, attn_decoder_loss=0.2478, over 5574657.32 frames. ], batch size: 209, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:56:17,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=669400.0, ans=0.125 +2024-09-19 12:57:06,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=669520.0, ans=0.2 +2024-09-19 12:57:33,291 INFO [train.py:1198] (1/2) Epoch 37, batch 4500, loss[loss=0.2471, ctc_loss=0.1375, cr_loss=0.3742, attn_decoder_loss=0.251, over 20812.00 frames. ], tot_loss[loss=0.2445, ctc_loss=0.1249, cr_loss=0.3705, attn_decoder_loss=0.2496, over 5234260.86 frames. ], batch size: 209, lr: 2.96e-03, grad_scale: 16.0 +2024-09-19 12:57:33,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=669600.0, ans=0.125 +2024-09-19 12:57:33,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=669600.0, ans=0.0 +2024-09-19 12:57:35,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=669600.0, ans=0.125 +2024-09-19 12:57:45,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=669600.0, ans=0.0 +2024-09-19 12:57:54,009 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.699e+01 1.043e+02 1.161e+02 1.270e+02 1.246e+03, threshold=2.323e+02, percent-clipped=2.0 +2024-09-19 12:58:56,698 INFO [train.py:1198] (1/2) Epoch 38, batch 0, loss[loss=0.2166, ctc_loss=0.1053, cr_loss=0.3465, attn_decoder_loss=0.2213, over 29622.00 frames. ], tot_loss[loss=0.2166, ctc_loss=0.1053, cr_loss=0.3465, attn_decoder_loss=0.2213, over 29622.00 frames. ], batch size: 73, lr: 2.92e-03, grad_scale: 32.0 +2024-09-19 12:58:56,699 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 12:59:15,170 INFO [train.py:1230] (1/2) Epoch 38, validation: loss=0.2124, ctc_loss=0.03582, cr_loss=6.776e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 12:59:15,171 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 12:59:18,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=669700.0, ans=0.2 +2024-09-19 12:59:23,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.09 vs. limit=15.0 +2024-09-19 13:00:09,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=669820.0, ans=0.125 +2024-09-19 13:00:14,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=669860.0, ans=0.2 +2024-09-19 13:00:20,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=669860.0, ans=0.125 +2024-09-19 13:00:27,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.26 vs. limit=22.5 +2024-09-19 13:00:32,669 INFO [train.py:1198] (1/2) Epoch 38, batch 50, loss[loss=0.2057, ctc_loss=0.09307, cr_loss=0.3089, attn_decoder_loss=0.2114, over 29449.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1162, cr_loss=0.3593, attn_decoder_loss=0.2399, over 1266296.41 frames. ], batch size: 70, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:00:42,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=669900.0, ans=0.125 +2024-09-19 13:00:45,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=669900.0, ans=0.125 +2024-09-19 13:00:48,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=669940.0, ans=0.0 +2024-09-19 13:00:54,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=669940.0, ans=0.0 +2024-09-19 13:00:56,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=669940.0, ans=0.2 +2024-09-19 13:01:02,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=669980.0, ans=0.0 +2024-09-19 13:01:26,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=670020.0, ans=0.0 +2024-09-19 13:01:28,455 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=10.19 vs. limit=12.0 +2024-09-19 13:01:35,761 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.803e+01 8.673e+01 9.380e+01 1.040e+02 1.745e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-19 13:01:44,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=670060.0, ans=0.0 +2024-09-19 13:01:45,372 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.75 vs. limit=22.5 +2024-09-19 13:01:47,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=670060.0, ans=0.1 +2024-09-19 13:01:48,554 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.69 vs. limit=15.0 +2024-09-19 13:01:50,659 INFO [train.py:1198] (1/2) Epoch 38, batch 100, loss[loss=0.2148, ctc_loss=0.1024, cr_loss=0.3344, attn_decoder_loss=0.2198, over 29531.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1174, cr_loss=0.3622, attn_decoder_loss=0.2421, over 2252308.64 frames. ], batch size: 76, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:02:04,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=670140.0, ans=0.09899494936611666 +2024-09-19 13:02:07,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=670140.0, ans=0.125 +2024-09-19 13:02:13,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=670140.0, ans=0.0 +2024-09-19 13:02:13,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=670140.0, ans=0.125 +2024-09-19 13:02:50,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.68 vs. limit=5.0 +2024-09-19 13:03:05,242 INFO [train.py:1198] (1/2) Epoch 38, batch 150, loss[loss=0.2147, ctc_loss=0.09888, cr_loss=0.3338, attn_decoder_loss=0.2201, over 29454.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1149, cr_loss=0.3568, attn_decoder_loss=0.2402, over 3047744.70 frames. ], batch size: 70, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:03:05,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=670300.0, ans=0.2 +2024-09-19 13:03:16,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=670300.0, ans=0.95 +2024-09-19 13:03:52,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=670420.0, ans=0.125 +2024-09-19 13:04:05,662 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.245e+01 8.326e+01 8.770e+01 9.236e+01 1.783e+02, threshold=1.754e+02, percent-clipped=0.0 +2024-09-19 13:04:20,687 INFO [train.py:1198] (1/2) Epoch 38, batch 200, loss[loss=0.2429, ctc_loss=0.1209, cr_loss=0.3638, attn_decoder_loss=0.2483, over 27468.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1142, cr_loss=0.3551, attn_decoder_loss=0.2393, over 3659843.18 frames. ], batch size: 125, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:04:33,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=670500.0, ans=0.125 +2024-09-19 13:04:40,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.32 vs. limit=10.0 +2024-09-19 13:04:43,557 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.66 vs. limit=15.0 +2024-09-19 13:05:10,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=670620.0, ans=0.2 +2024-09-19 13:05:41,277 INFO [train.py:1198] (1/2) Epoch 38, batch 250, loss[loss=0.2433, ctc_loss=0.1165, cr_loss=0.3479, attn_decoder_loss=0.2496, over 29239.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1141, cr_loss=0.3548, attn_decoder_loss=0.2393, over 4142177.34 frames. ], batch size: 100, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:05:41,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=670700.0, ans=0.0 +2024-09-19 13:05:49,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.01 vs. limit=22.5 +2024-09-19 13:05:50,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=670700.0, ans=0.07 +2024-09-19 13:05:55,655 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.64 vs. limit=15.0 +2024-09-19 13:06:04,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=670740.0, ans=0.0 +2024-09-19 13:06:14,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=670780.0, ans=0.0 +2024-09-19 13:06:17,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=8.96 vs. limit=10.0 +2024-09-19 13:06:22,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=670780.0, ans=0.1 +2024-09-19 13:06:27,823 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.99 vs. limit=12.0 +2024-09-19 13:06:36,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=670820.0, ans=0.1 +2024-09-19 13:06:36,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=670820.0, ans=0.0 +2024-09-19 13:06:41,637 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.456e+01 8.891e+01 9.506e+01 1.343e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 13:06:56,768 INFO [train.py:1198] (1/2) Epoch 38, batch 300, loss[loss=0.2423, ctc_loss=0.1146, cr_loss=0.3707, attn_decoder_loss=0.2482, over 29528.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1138, cr_loss=0.354, attn_decoder_loss=0.2391, over 4509820.00 frames. ], batch size: 92, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:07:08,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.78 vs. limit=15.0 +2024-09-19 13:07:22,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=670940.0, ans=0.125 +2024-09-19 13:07:24,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=670940.0, ans=0.07 +2024-09-19 13:07:28,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=670980.0, ans=0.125 +2024-09-19 13:07:58,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=671060.0, ans=0.125 +2024-09-19 13:08:03,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=671060.0, ans=0.1 +2024-09-19 13:08:12,148 INFO [train.py:1198] (1/2) Epoch 38, batch 350, loss[loss=0.2098, ctc_loss=0.09627, cr_loss=0.3064, attn_decoder_loss=0.2156, over 29327.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1145, cr_loss=0.3555, attn_decoder_loss=0.2399, over 4796808.40 frames. ], batch size: 71, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:08:18,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=671100.0, ans=0.0 +2024-09-19 13:08:31,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=671140.0, ans=0.0 +2024-09-19 13:08:40,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=671140.0, ans=0.2 +2024-09-19 13:09:03,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=671220.0, ans=0.0 +2024-09-19 13:09:13,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=671260.0, ans=0.0 +2024-09-19 13:09:16,779 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.520e+01 8.939e+01 9.511e+01 1.277e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 13:09:31,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=671300.0, ans=0.0 +2024-09-19 13:09:32,532 INFO [train.py:1198] (1/2) Epoch 38, batch 400, loss[loss=0.2384, ctc_loss=0.1179, cr_loss=0.3705, attn_decoder_loss=0.2435, over 29718.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1142, cr_loss=0.3549, attn_decoder_loss=0.2398, over 5026736.18 frames. ], batch size: 82, lr: 2.92e-03, grad_scale: 16.0 +2024-09-19 13:09:37,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=671300.0, ans=0.125 +2024-09-19 13:09:52,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=671340.0, ans=0.125 +2024-09-19 13:09:54,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.48 vs. limit=10.0 +2024-09-19 13:10:00,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=671340.0, ans=0.5 +2024-09-19 13:10:09,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=671380.0, ans=0.0 +2024-09-19 13:10:19,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=671420.0, ans=0.125 +2024-09-19 13:10:22,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=671420.0, ans=0.2 +2024-09-19 13:10:46,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.94 vs. limit=6.0 +2024-09-19 13:10:48,140 INFO [train.py:1198] (1/2) Epoch 38, batch 450, loss[loss=0.2487, ctc_loss=0.1244, cr_loss=0.3728, attn_decoder_loss=0.2543, over 29693.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1145, cr_loss=0.3552, attn_decoder_loss=0.24, over 5190345.72 frames. ], batch size: 83, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:11:15,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=671540.0, ans=0.125 +2024-09-19 13:11:16,076 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.23 vs. limit=10.0 +2024-09-19 13:11:51,639 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.658e+01 9.040e+01 9.546e+01 1.503e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 13:11:53,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=671660.0, ans=0.0 +2024-09-19 13:12:03,565 INFO [train.py:1198] (1/2) Epoch 38, batch 500, loss[loss=0.2556, ctc_loss=0.1312, cr_loss=0.3961, attn_decoder_loss=0.2606, over 29457.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1145, cr_loss=0.3552, attn_decoder_loss=0.2397, over 5332705.15 frames. ], batch size: 94, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:12:11,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=671700.0, ans=0.0 +2024-09-19 13:12:25,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=671740.0, ans=0.125 +2024-09-19 13:12:31,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=671740.0, ans=0.2 +2024-09-19 13:12:42,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=671780.0, ans=0.0 +2024-09-19 13:12:53,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=671820.0, ans=0.0 +2024-09-19 13:13:23,862 INFO [train.py:1198] (1/2) Epoch 38, batch 550, loss[loss=0.2482, ctc_loss=0.1276, cr_loss=0.3972, attn_decoder_loss=0.2527, over 28861.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1147, cr_loss=0.3559, attn_decoder_loss=0.2397, over 5423776.54 frames. ], batch size: 104, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:13:28,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=671900.0, ans=0.0 +2024-09-19 13:13:30,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=671900.0, ans=0.2 +2024-09-19 13:13:39,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=671940.0, ans=0.0 +2024-09-19 13:13:40,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=671940.0, ans=0.125 +2024-09-19 13:13:47,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=671940.0, ans=0.125 +2024-09-19 13:13:48,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=671940.0, ans=0.1 +2024-09-19 13:14:17,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=672020.0, ans=0.125 +2024-09-19 13:14:26,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=672020.0, ans=0.2 +2024-09-19 13:14:35,173 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.563e+01 9.079e+01 9.918e+01 4.106e+02, threshold=1.816e+02, percent-clipped=4.0 +2024-09-19 13:14:47,311 INFO [train.py:1198] (1/2) Epoch 38, batch 600, loss[loss=0.2526, ctc_loss=0.1305, cr_loss=0.3947, attn_decoder_loss=0.2574, over 29300.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1149, cr_loss=0.3563, attn_decoder_loss=0.2399, over 5511814.48 frames. ], batch size: 100, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:14:47,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=672100.0, ans=0.125 +2024-09-19 13:15:01,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=672140.0, ans=0.2 +2024-09-19 13:15:01,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=672140.0, ans=0.1 +2024-09-19 13:15:04,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=672140.0, ans=0.2 +2024-09-19 13:15:21,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.36 vs. limit=10.0 +2024-09-19 13:15:26,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=672180.0, ans=0.0 +2024-09-19 13:16:02,923 INFO [train.py:1198] (1/2) Epoch 38, batch 650, loss[loss=0.2384, ctc_loss=0.1145, cr_loss=0.3581, attn_decoder_loss=0.2442, over 29757.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3541, attn_decoder_loss=0.2392, over 5587911.89 frames. ], batch size: 81, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:16:09,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=15.0 +2024-09-19 13:16:22,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=672340.0, ans=0.125 +2024-09-19 13:16:33,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=672340.0, ans=0.0 +2024-09-19 13:16:36,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=672380.0, ans=0.0 +2024-09-19 13:17:09,154 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.435e+01 8.584e+01 9.023e+01 9.741e+01 1.282e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 13:17:10,897 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=672460.0, ans=0.1 +2024-09-19 13:17:10,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=672460.0, ans=0.125 +2024-09-19 13:17:20,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=672460.0, ans=0.125 +2024-09-19 13:17:23,538 INFO [train.py:1198] (1/2) Epoch 38, batch 700, loss[loss=0.2309, ctc_loss=0.1173, cr_loss=0.3707, attn_decoder_loss=0.2353, over 29534.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1146, cr_loss=0.356, attn_decoder_loss=0.2398, over 5637183.84 frames. ], batch size: 76, lr: 2.92e-03, grad_scale: 8.0 +2024-09-19 13:17:25,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=672500.0, ans=10.0 +2024-09-19 13:17:43,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=672540.0, ans=0.07 +2024-09-19 13:18:11,455 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.32 vs. limit=15.0 +2024-09-19 13:18:18,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=672620.0, ans=0.125 +2024-09-19 13:18:22,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.66 vs. limit=15.0 +2024-09-19 13:18:23,823 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.98 vs. limit=10.0 +2024-09-19 13:18:32,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=672660.0, ans=0.125 +2024-09-19 13:18:38,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=672700.0, ans=0.125 +2024-09-19 13:18:39,486 INFO [train.py:1198] (1/2) Epoch 38, batch 750, loss[loss=0.2392, ctc_loss=0.1168, cr_loss=0.3561, attn_decoder_loss=0.2449, over 29702.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1143, cr_loss=0.3554, attn_decoder_loss=0.2395, over 5675260.50 frames. ], batch size: 82, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:18:39,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=672700.0, ans=0.125 +2024-09-19 13:19:43,313 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.750e+01 9.083e+01 9.607e+01 5.779e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 13:19:54,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=672900.0, ans=0.0 +2024-09-19 13:19:55,414 INFO [train.py:1198] (1/2) Epoch 38, batch 800, loss[loss=0.2085, ctc_loss=0.0967, cr_loss=0.3106, attn_decoder_loss=0.214, over 29607.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1141, cr_loss=0.3545, attn_decoder_loss=0.2392, over 5706116.09 frames. ], batch size: 73, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:20:15,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=672940.0, ans=0.0 +2024-09-19 13:20:21,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=672940.0, ans=0.125 +2024-09-19 13:20:41,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=673020.0, ans=0.125 +2024-09-19 13:21:15,076 INFO [train.py:1198] (1/2) Epoch 38, batch 850, loss[loss=0.2378, ctc_loss=0.1093, cr_loss=0.3428, attn_decoder_loss=0.2444, over 29729.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3541, attn_decoder_loss=0.2393, over 5735093.17 frames. ], batch size: 89, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:21:19,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=673100.0, ans=0.125 +2024-09-19 13:21:39,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=673140.0, ans=0.2 +2024-09-19 13:22:09,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=673220.0, ans=0.125 +2024-09-19 13:22:15,870 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:22:19,981 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.440e+01 8.974e+01 9.392e+01 3.199e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-19 13:22:26,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=673260.0, ans=0.0 +2024-09-19 13:22:30,665 INFO [train.py:1198] (1/2) Epoch 38, batch 900, loss[loss=0.2102, ctc_loss=0.0962, cr_loss=0.3052, attn_decoder_loss=0.2161, over 29625.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1139, cr_loss=0.3538, attn_decoder_loss=0.2396, over 5739499.11 frames. ], batch size: 73, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:22:36,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.58 vs. limit=22.5 +2024-09-19 13:22:37,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.17 vs. limit=15.0 +2024-09-19 13:22:39,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=673300.0, ans=0.0 +2024-09-19 13:22:42,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=673300.0, ans=0.125 +2024-09-19 13:22:53,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=673340.0, ans=0.05 +2024-09-19 13:22:56,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=673340.0, ans=0.0 +2024-09-19 13:22:57,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=673340.0, ans=0.1 +2024-09-19 13:22:58,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.51 vs. limit=15.0 +2024-09-19 13:23:14,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=673420.0, ans=0.09899494936611666 +2024-09-19 13:23:14,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=673420.0, ans=0.125 +2024-09-19 13:23:17,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=673420.0, ans=0.2 +2024-09-19 13:23:25,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=673420.0, ans=0.1 +2024-09-19 13:23:41,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=673460.0, ans=0.125 +2024-09-19 13:23:45,778 INFO [train.py:1198] (1/2) Epoch 38, batch 950, loss[loss=0.2178, ctc_loss=0.09564, cr_loss=0.313, attn_decoder_loss=0.2244, over 29522.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.3533, attn_decoder_loss=0.2395, over 5741838.91 frames. ], batch size: 74, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:23:46,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=673500.0, ans=0.125 +2024-09-19 13:24:17,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=673580.0, ans=0.1 +2024-09-19 13:24:28,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=673580.0, ans=0.0 +2024-09-19 13:24:34,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=673620.0, ans=0.2 +2024-09-19 13:24:51,003 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:24:53,669 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.808e+01 9.253e+01 1.008e+02 2.662e+02, threshold=1.851e+02, percent-clipped=5.0 +2024-09-19 13:25:00,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=673660.0, ans=0.125 +2024-09-19 13:25:03,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=673660.0, ans=0.125 +2024-09-19 13:25:06,338 INFO [train.py:1198] (1/2) Epoch 38, batch 1000, loss[loss=0.2235, ctc_loss=0.1046, cr_loss=0.3294, attn_decoder_loss=0.2294, over 29519.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1153, cr_loss=0.3557, attn_decoder_loss=0.2406, over 5736114.89 frames. ], batch size: 77, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:25:27,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=673740.0, ans=0.0 +2024-09-19 13:25:27,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=673740.0, ans=0.95 +2024-09-19 13:25:46,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=673780.0, ans=0.1 +2024-09-19 13:25:47,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=673780.0, ans=0.125 +2024-09-19 13:25:50,790 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:25:51,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-19 13:25:51,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.57 vs. limit=12.0 +2024-09-19 13:26:08,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=673860.0, ans=0.125 +2024-09-19 13:26:09,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=673860.0, ans=0.2 +2024-09-19 13:26:21,861 INFO [train.py:1198] (1/2) Epoch 38, batch 1050, loss[loss=0.2438, ctc_loss=0.1191, cr_loss=0.3724, attn_decoder_loss=0.2494, over 29671.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1149, cr_loss=0.3551, attn_decoder_loss=0.2399, over 5744519.33 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:26:22,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=673900.0, ans=0.0 +2024-09-19 13:26:29,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=673900.0, ans=0.0 +2024-09-19 13:26:37,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=673940.0, ans=0.0 +2024-09-19 13:26:38,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.55 vs. limit=6.0 +2024-09-19 13:27:13,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=674020.0, ans=0.2 +2024-09-19 13:27:21,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=674060.0, ans=0.125 +2024-09-19 13:27:27,114 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.345e+01 8.465e+01 8.973e+01 9.470e+01 1.777e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-19 13:27:37,781 INFO [train.py:1198] (1/2) Epoch 38, batch 1100, loss[loss=0.2327, ctc_loss=0.1139, cr_loss=0.3433, attn_decoder_loss=0.2382, over 29433.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1146, cr_loss=0.3546, attn_decoder_loss=0.2394, over 5757649.21 frames. ], batch size: 78, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:27:50,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=674100.0, ans=0.09899494936611666 +2024-09-19 13:28:03,931 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:28:08,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=674180.0, ans=0.2 +2024-09-19 13:28:13,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.58 vs. limit=22.5 +2024-09-19 13:28:18,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=674180.0, ans=0.1 +2024-09-19 13:28:24,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=674220.0, ans=0.125 +2024-09-19 13:28:28,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=674220.0, ans=0.07 +2024-09-19 13:28:39,547 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:28:57,999 INFO [train.py:1198] (1/2) Epoch 38, batch 1150, loss[loss=0.2292, ctc_loss=0.1134, cr_loss=0.3618, attn_decoder_loss=0.234, over 29421.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1146, cr_loss=0.3549, attn_decoder_loss=0.2393, over 5755070.84 frames. ], batch size: 78, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:29:38,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=674380.0, ans=0.0 +2024-09-19 13:29:51,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=674420.0, ans=0.0 +2024-09-19 13:29:52,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.03 vs. limit=22.5 +2024-09-19 13:30:03,757 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.627e+01 8.613e+01 9.064e+01 9.591e+01 1.895e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-19 13:30:12,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.43 vs. limit=15.0 +2024-09-19 13:30:14,471 INFO [train.py:1198] (1/2) Epoch 38, batch 1200, loss[loss=0.2498, ctc_loss=0.1237, cr_loss=0.3535, attn_decoder_loss=0.256, over 29668.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1147, cr_loss=0.3552, attn_decoder_loss=0.2398, over 5747559.65 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:30:20,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=674500.0, ans=0.0 +2024-09-19 13:30:25,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=674500.0, ans=0.0 +2024-09-19 13:30:45,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.76 vs. limit=15.0 +2024-09-19 13:30:52,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=674580.0, ans=0.125 +2024-09-19 13:30:54,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.70 vs. limit=12.0 +2024-09-19 13:31:01,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=674620.0, ans=0.1 +2024-09-19 13:31:13,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=674660.0, ans=0.025 +2024-09-19 13:31:21,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=674660.0, ans=0.1 +2024-09-19 13:31:22,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=674660.0, ans=0.125 +2024-09-19 13:31:29,824 INFO [train.py:1198] (1/2) Epoch 38, batch 1250, loss[loss=0.2424, ctc_loss=0.1272, cr_loss=0.3871, attn_decoder_loss=0.2466, over 29487.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1151, cr_loss=0.3561, attn_decoder_loss=0.2403, over 5774856.66 frames. ], batch size: 92, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:31:31,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=674700.0, ans=0.0 +2024-09-19 13:31:54,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=674740.0, ans=0.1 +2024-09-19 13:32:12,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.56 vs. limit=15.0 +2024-09-19 13:32:33,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.23 vs. limit=15.0 +2024-09-19 13:32:37,395 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.525e+01 9.083e+01 9.622e+01 1.847e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 13:32:45,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=674860.0, ans=0.125 +2024-09-19 13:32:50,168 INFO [train.py:1198] (1/2) Epoch 38, batch 1300, loss[loss=0.2472, ctc_loss=0.1222, cr_loss=0.3745, attn_decoder_loss=0.2528, over 28398.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1148, cr_loss=0.3559, attn_decoder_loss=0.2399, over 5780290.40 frames. ], batch size: 111, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:33:13,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=674940.0, ans=0.125 +2024-09-19 13:33:34,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=675020.0, ans=0.125 +2024-09-19 13:33:37,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=675020.0, ans=0.1 +2024-09-19 13:34:05,850 INFO [train.py:1198] (1/2) Epoch 38, batch 1350, loss[loss=0.2249, ctc_loss=0.1022, cr_loss=0.33, attn_decoder_loss=0.2312, over 29758.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1147, cr_loss=0.3559, attn_decoder_loss=0.2397, over 5797174.81 frames. ], batch size: 81, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:34:12,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=675100.0, ans=0.125 +2024-09-19 13:34:37,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=675180.0, ans=0.1 +2024-09-19 13:34:52,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=675220.0, ans=0.0 +2024-09-19 13:34:54,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=675220.0, ans=0.125 +2024-09-19 13:35:01,781 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:35:01,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.20 vs. limit=15.0 +2024-09-19 13:35:11,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.058e+01 8.540e+01 8.958e+01 9.553e+01 1.189e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-19 13:35:20,936 INFO [train.py:1198] (1/2) Epoch 38, batch 1400, loss[loss=0.2093, ctc_loss=0.1018, cr_loss=0.3274, attn_decoder_loss=0.2139, over 29561.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1143, cr_loss=0.3556, attn_decoder_loss=0.2394, over 5808076.94 frames. ], batch size: 69, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:35:24,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=675300.0, ans=0.1 +2024-09-19 13:35:36,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=675340.0, ans=0.0 +2024-09-19 13:35:47,150 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:35:50,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=675380.0, ans=0.125 +2024-09-19 13:35:58,145 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.52 vs. limit=15.0 +2024-09-19 13:36:19,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=675420.0, ans=0.125 +2024-09-19 13:36:23,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=675460.0, ans=0.0 +2024-09-19 13:36:26,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=675460.0, ans=0.125 +2024-09-19 13:36:39,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=675500.0, ans=0.1 +2024-09-19 13:36:40,635 INFO [train.py:1198] (1/2) Epoch 38, batch 1450, loss[loss=0.2575, ctc_loss=0.1353, cr_loss=0.4089, attn_decoder_loss=0.262, over 29460.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1145, cr_loss=0.356, attn_decoder_loss=0.24, over 5803915.23 frames. ], batch size: 94, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:36:43,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.90 vs. limit=22.5 +2024-09-19 13:36:46,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=675500.0, ans=0.125 +2024-09-19 13:37:18,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=675580.0, ans=0.125 +2024-09-19 13:37:23,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=675580.0, ans=0.05 +2024-09-19 13:37:30,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=675620.0, ans=0.125 +2024-09-19 13:37:30,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=675620.0, ans=10.0 +2024-09-19 13:37:37,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.57 vs. limit=15.0 +2024-09-19 13:37:46,829 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.416e+01 8.618e+01 9.172e+01 9.928e+01 3.328e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-19 13:37:53,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=675660.0, ans=0.1 +2024-09-19 13:37:56,074 INFO [train.py:1198] (1/2) Epoch 38, batch 1500, loss[loss=0.2429, ctc_loss=0.1172, cr_loss=0.3486, attn_decoder_loss=0.2491, over 29612.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1146, cr_loss=0.3557, attn_decoder_loss=0.2402, over 5804379.24 frames. ], batch size: 86, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:37:58,459 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.84 vs. limit=22.5 +2024-09-19 13:38:19,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=675740.0, ans=0.2 +2024-09-19 13:38:19,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=675740.0, ans=0.025 +2024-09-19 13:38:34,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=675780.0, ans=0.0 +2024-09-19 13:38:44,414 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.41 vs. limit=15.0 +2024-09-19 13:38:54,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=675820.0, ans=0.07 +2024-09-19 13:38:58,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=675860.0, ans=0.1 +2024-09-19 13:39:00,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=675860.0, ans=0.025 +2024-09-19 13:39:09,611 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.32 vs. limit=10.0 +2024-09-19 13:39:10,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=675900.0, ans=0.1 +2024-09-19 13:39:11,898 INFO [train.py:1198] (1/2) Epoch 38, batch 1550, loss[loss=0.2489, ctc_loss=0.124, cr_loss=0.3915, attn_decoder_loss=0.2541, over 29545.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1148, cr_loss=0.3562, attn_decoder_loss=0.2402, over 5780521.57 frames. ], batch size: 90, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:39:16,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=675900.0, ans=0.1 +2024-09-19 13:39:17,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.57 vs. limit=15.0 +2024-09-19 13:39:43,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=675980.0, ans=0.025 +2024-09-19 13:39:48,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=675980.0, ans=0.125 +2024-09-19 13:40:20,839 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.168e+01 8.489e+01 9.048e+01 9.769e+01 3.941e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 13:40:27,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=676060.0, ans=0.1 +2024-09-19 13:40:32,004 INFO [train.py:1198] (1/2) Epoch 38, batch 1600, loss[loss=0.2407, ctc_loss=0.117, cr_loss=0.3522, attn_decoder_loss=0.2466, over 29673.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1147, cr_loss=0.3559, attn_decoder_loss=0.24, over 5762751.99 frames. ], batch size: 85, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:40:41,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=676100.0, ans=0.125 +2024-09-19 13:40:49,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=676140.0, ans=0.07 +2024-09-19 13:41:10,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=676180.0, ans=0.125 +2024-09-19 13:41:43,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=676260.0, ans=0.125 +2024-09-19 13:41:47,495 INFO [train.py:1198] (1/2) Epoch 38, batch 1650, loss[loss=0.2407, ctc_loss=0.1182, cr_loss=0.3589, attn_decoder_loss=0.2464, over 29704.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1143, cr_loss=0.3552, attn_decoder_loss=0.2397, over 5758036.76 frames. ], batch size: 89, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:41:47,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=676300.0, ans=0.1 +2024-09-19 13:41:50,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=676300.0, ans=0.0 +2024-09-19 13:42:04,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=676340.0, ans=0.125 +2024-09-19 13:42:07,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=676340.0, ans=0.125 +2024-09-19 13:42:19,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=676380.0, ans=0.1 +2024-09-19 13:42:53,543 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.620e+01 8.638e+01 9.232e+01 9.728e+01 1.403e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-19 13:43:02,489 INFO [train.py:1198] (1/2) Epoch 38, batch 1700, loss[loss=0.2051, ctc_loss=0.09457, cr_loss=0.3104, attn_decoder_loss=0.2105, over 29550.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1142, cr_loss=0.3555, attn_decoder_loss=0.2396, over 5779824.18 frames. ], batch size: 69, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:43:02,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=676500.0, ans=0.1 +2024-09-19 13:43:02,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=676500.0, ans=0.125 +2024-09-19 13:43:08,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=676500.0, ans=0.1 +2024-09-19 13:43:28,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=676540.0, ans=0.125 +2024-09-19 13:43:39,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=676580.0, ans=0.1 +2024-09-19 13:43:40,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=676580.0, ans=0.1 +2024-09-19 13:43:48,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=676620.0, ans=0.125 +2024-09-19 13:44:22,575 INFO [train.py:1198] (1/2) Epoch 38, batch 1750, loss[loss=0.2081, ctc_loss=0.09593, cr_loss=0.3142, attn_decoder_loss=0.2135, over 29375.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1142, cr_loss=0.3551, attn_decoder_loss=0.2392, over 5786062.17 frames. ], batch size: 67, lr: 2.91e-03, grad_scale: 16.0 +2024-09-19 13:44:27,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.01 vs. limit=15.0 +2024-09-19 13:44:34,074 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-19 13:44:39,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=676740.0, ans=0.125 +2024-09-19 13:44:48,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=676740.0, ans=0.025 +2024-09-19 13:44:56,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=676780.0, ans=0.125 +2024-09-19 13:45:05,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=676780.0, ans=0.125 +2024-09-19 13:45:13,142 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:45:19,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=676820.0, ans=0.95 +2024-09-19 13:45:30,735 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.567e+01 9.147e+01 9.670e+01 2.287e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-19 13:45:38,186 INFO [train.py:1198] (1/2) Epoch 38, batch 1800, loss[loss=0.2436, ctc_loss=0.1164, cr_loss=0.3725, attn_decoder_loss=0.2495, over 29696.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1145, cr_loss=0.3552, attn_decoder_loss=0.2393, over 5789443.64 frames. ], batch size: 83, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:45:48,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.69 vs. limit=15.0 +2024-09-19 13:45:53,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=676940.0, ans=0.1 +2024-09-19 13:46:02,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=676940.0, ans=0.025 +2024-09-19 13:46:46,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=677060.0, ans=0.5 +2024-09-19 13:46:47,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=677060.0, ans=0.125 +2024-09-19 13:46:53,820 INFO [train.py:1198] (1/2) Epoch 38, batch 1850, loss[loss=0.2385, ctc_loss=0.1067, cr_loss=0.3275, attn_decoder_loss=0.2459, over 29622.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1143, cr_loss=0.3552, attn_decoder_loss=0.2394, over 5796066.47 frames. ], batch size: 86, lr: 2.91e-03, grad_scale: 8.0 +2024-09-19 13:46:58,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=677100.0, ans=0.2 +2024-09-19 13:47:03,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=677100.0, ans=0.125 +2024-09-19 13:47:07,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=677140.0, ans=10.0 +2024-09-19 13:47:12,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=677140.0, ans=0.2 +2024-09-19 13:47:47,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.11 vs. limit=15.0 +2024-09-19 13:47:51,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=677220.0, ans=0.2 +2024-09-19 13:48:01,721 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.336e+01 8.507e+01 9.088e+01 9.545e+01 1.586e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 13:48:03,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=677260.0, ans=0.125 +2024-09-19 13:48:03,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=677260.0, ans=0.1 +2024-09-19 13:48:09,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=677260.0, ans=0.1 +2024-09-19 13:48:13,529 INFO [train.py:1198] (1/2) Epoch 38, batch 1900, loss[loss=0.2406, ctc_loss=0.1132, cr_loss=0.3649, attn_decoder_loss=0.2466, over 29713.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1141, cr_loss=0.3545, attn_decoder_loss=0.2395, over 5803539.16 frames. ], batch size: 89, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:48:19,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=677300.0, ans=0.125 +2024-09-19 13:48:51,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=677380.0, ans=0.125 +2024-09-19 13:49:11,269 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=677420.0, ans=0.2 +2024-09-19 13:49:24,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=677460.0, ans=0.125 +2024-09-19 13:49:26,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=677460.0, ans=0.125 +2024-09-19 13:49:29,173 INFO [train.py:1198] (1/2) Epoch 38, batch 1950, loss[loss=0.2321, ctc_loss=0.1179, cr_loss=0.3731, attn_decoder_loss=0.2365, over 29429.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1147, cr_loss=0.3563, attn_decoder_loss=0.2408, over 5817919.64 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:49:34,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=677500.0, ans=0.125 +2024-09-19 13:49:39,388 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.12 vs. limit=15.0 +2024-09-19 13:50:06,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=12.0 +2024-09-19 13:50:09,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=677580.0, ans=0.0 +2024-09-19 13:50:16,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=677620.0, ans=0.125 +2024-09-19 13:50:28,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=677660.0, ans=0.125 +2024-09-19 13:50:34,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=677660.0, ans=0.1 +2024-09-19 13:50:35,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.40 vs. limit=15.0 +2024-09-19 13:50:37,364 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.817e+01 8.623e+01 9.104e+01 9.387e+01 1.434e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-19 13:50:44,925 INFO [train.py:1198] (1/2) Epoch 38, batch 2000, loss[loss=0.2129, ctc_loss=0.1006, cr_loss=0.3342, attn_decoder_loss=0.2179, over 29361.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1148, cr_loss=0.3559, attn_decoder_loss=0.2412, over 5796779.17 frames. ], batch size: 67, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:50:45,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=677700.0, ans=0.125 +2024-09-19 13:50:54,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=677700.0, ans=0.1 +2024-09-19 13:50:59,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=677740.0, ans=0.125 +2024-09-19 13:50:59,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=677740.0, ans=0.125 +2024-09-19 13:51:05,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=677740.0, ans=0.125 +2024-09-19 13:51:25,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.66 vs. limit=15.0 +2024-09-19 13:51:38,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=677820.0, ans=0.0 +2024-09-19 13:52:04,798 INFO [train.py:1198] (1/2) Epoch 38, batch 2050, loss[loss=0.2111, ctc_loss=0.09285, cr_loss=0.3157, attn_decoder_loss=0.2172, over 29457.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1143, cr_loss=0.3555, attn_decoder_loss=0.2402, over 5789093.06 frames. ], batch size: 70, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:52:05,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=677900.0, ans=0.1 +2024-09-19 13:52:06,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=677900.0, ans=0.125 +2024-09-19 13:52:06,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=677900.0, ans=0.125 +2024-09-19 13:52:17,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=677900.0, ans=0.0 +2024-09-19 13:52:27,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=677940.0, ans=0.5 +2024-09-19 13:52:32,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=677940.0, ans=0.1 +2024-09-19 13:52:40,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.32 vs. limit=10.0 +2024-09-19 13:52:56,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=678020.0, ans=0.1 +2024-09-19 13:53:11,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=678060.0, ans=0.125 +2024-09-19 13:53:12,676 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.512e+01 8.454e+01 8.907e+01 9.620e+01 4.678e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-19 13:53:20,341 INFO [train.py:1198] (1/2) Epoch 38, batch 2100, loss[loss=0.2383, ctc_loss=0.1196, cr_loss=0.3685, attn_decoder_loss=0.2433, over 29768.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1138, cr_loss=0.3543, attn_decoder_loss=0.2395, over 5801046.73 frames. ], batch size: 81, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:53:20,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=678100.0, ans=0.125 +2024-09-19 13:53:23,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=678100.0, ans=0.0 +2024-09-19 13:53:41,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=678140.0, ans=0.125 +2024-09-19 13:53:45,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.75 vs. limit=15.0 +2024-09-19 13:53:45,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.01 vs. limit=22.5 +2024-09-19 13:53:46,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=678140.0, ans=0.125 +2024-09-19 13:54:26,128 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=13.41 vs. limit=15.0 +2024-09-19 13:54:28,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.90 vs. limit=15.0 +2024-09-19 13:54:31,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=678260.0, ans=0.125 +2024-09-19 13:54:32,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=678260.0, ans=0.125 +2024-09-19 13:54:35,529 INFO [train.py:1198] (1/2) Epoch 38, batch 2150, loss[loss=0.2243, ctc_loss=0.112, cr_loss=0.3441, attn_decoder_loss=0.2292, over 29445.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1132, cr_loss=0.353, attn_decoder_loss=0.2389, over 5814887.28 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 13:54:47,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.57 vs. limit=22.5 +2024-09-19 13:55:00,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=678340.0, ans=0.125 +2024-09-19 13:55:12,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=678380.0, ans=0.2 +2024-09-19 13:55:32,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=678420.0, ans=0.125 +2024-09-19 13:55:36,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=678460.0, ans=0.0 +2024-09-19 13:55:38,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=678460.0, ans=0.2 +2024-09-19 13:55:45,739 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.394e+01 8.586e+01 8.959e+01 9.597e+01 2.666e+02, threshold=1.792e+02, percent-clipped=1.0 +2024-09-19 13:55:53,919 INFO [train.py:1198] (1/2) Epoch 38, batch 2200, loss[loss=0.2519, ctc_loss=0.1287, cr_loss=0.3792, attn_decoder_loss=0.2571, over 29643.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1136, cr_loss=0.3532, attn_decoder_loss=0.239, over 5810750.23 frames. ], batch size: 86, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:55:58,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=678500.0, ans=0.2 +2024-09-19 13:56:28,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=678580.0, ans=0.125 +2024-09-19 13:56:31,120 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:56:44,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=678620.0, ans=0.2 +2024-09-19 13:56:53,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=678620.0, ans=0.0 +2024-09-19 13:57:00,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=678660.0, ans=0.1 +2024-09-19 13:57:10,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=678700.0, ans=0.025 +2024-09-19 13:57:11,727 INFO [train.py:1198] (1/2) Epoch 38, batch 2250, loss[loss=0.236, ctc_loss=0.1125, cr_loss=0.349, attn_decoder_loss=0.242, over 29730.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1135, cr_loss=0.3531, attn_decoder_loss=0.2391, over 5810265.58 frames. ], batch size: 82, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:57:13,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.90 vs. limit=22.5 +2024-09-19 13:57:15,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff2.min_abs, batch_count=678700.0, ans=0.1 +2024-09-19 13:57:19,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=678700.0, ans=0.0 +2024-09-19 13:57:27,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-19 13:57:41,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=678780.0, ans=0.125 +2024-09-19 13:58:20,712 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.565e+01 9.062e+01 9.644e+01 4.463e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-19 13:58:22,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=678860.0, ans=0.125 +2024-09-19 13:58:26,882 INFO [train.py:1198] (1/2) Epoch 38, batch 2300, loss[loss=0.1983, ctc_loss=0.09069, cr_loss=0.2863, attn_decoder_loss=0.2039, over 29304.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.113, cr_loss=0.3521, attn_decoder_loss=0.2381, over 5796051.18 frames. ], batch size: 71, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:58:43,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=678940.0, ans=0.2 +2024-09-19 13:58:58,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=678980.0, ans=0.125 +2024-09-19 13:59:10,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=679020.0, ans=0.2 +2024-09-19 13:59:24,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=679020.0, ans=0.1 +2024-09-19 13:59:28,465 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.46 vs. limit=15.0 +2024-09-19 13:59:33,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=679060.0, ans=0.125 +2024-09-19 13:59:36,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=679060.0, ans=0.125 +2024-09-19 13:59:42,416 INFO [train.py:1198] (1/2) Epoch 38, batch 2350, loss[loss=0.2471, ctc_loss=0.1221, cr_loss=0.3758, attn_decoder_loss=0.2526, over 29689.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1132, cr_loss=0.3533, attn_decoder_loss=0.2385, over 5802452.08 frames. ], batch size: 83, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 13:59:44,952 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 13:59:46,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.98 vs. limit=12.0 +2024-09-19 13:59:47,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=679100.0, ans=0.125 +2024-09-19 14:00:47,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=679260.0, ans=0.125 +2024-09-19 14:00:56,207 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.402e+01 8.471e+01 8.979e+01 9.530e+01 2.043e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 14:01:02,398 INFO [train.py:1198] (1/2) Epoch 38, batch 2400, loss[loss=0.2186, ctc_loss=0.1057, cr_loss=0.3537, attn_decoder_loss=0.2233, over 29535.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1139, cr_loss=0.355, attn_decoder_loss=0.2391, over 5806503.72 frames. ], batch size: 76, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 14:01:02,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=679300.0, ans=0.2 +2024-09-19 14:01:25,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=679340.0, ans=0.1 +2024-09-19 14:01:31,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=679380.0, ans=0.125 +2024-09-19 14:01:36,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=679380.0, ans=0.2 +2024-09-19 14:01:54,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=679420.0, ans=0.0 +2024-09-19 14:02:15,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=679460.0, ans=0.2 +2024-09-19 14:02:18,225 INFO [train.py:1198] (1/2) Epoch 38, batch 2450, loss[loss=0.2367, ctc_loss=0.1095, cr_loss=0.3456, attn_decoder_loss=0.2432, over 29738.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1143, cr_loss=0.3556, attn_decoder_loss=0.2397, over 5786531.00 frames. ], batch size: 82, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:02:31,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=679540.0, ans=0.125 +2024-09-19 14:02:46,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=679580.0, ans=0.125 +2024-09-19 14:02:56,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=679580.0, ans=0.125 +2024-09-19 14:03:06,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=679620.0, ans=0.125 +2024-09-19 14:03:23,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=679660.0, ans=0.0 +2024-09-19 14:03:28,796 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.486e+01 8.520e+01 8.981e+01 9.531e+01 3.262e+02, threshold=1.796e+02, percent-clipped=1.0 +2024-09-19 14:03:34,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=679700.0, ans=0.1 +2024-09-19 14:03:35,433 INFO [train.py:1198] (1/2) Epoch 38, batch 2500, loss[loss=0.2345, ctc_loss=0.1108, cr_loss=0.3519, attn_decoder_loss=0.2405, over 29605.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3553, attn_decoder_loss=0.2396, over 5795061.52 frames. ], batch size: 86, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:04:22,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.89 vs. limit=15.0 +2024-09-19 14:04:47,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=679860.0, ans=0.025 +2024-09-19 14:04:53,320 INFO [train.py:1198] (1/2) Epoch 38, batch 2550, loss[loss=0.2128, ctc_loss=0.1091, cr_loss=0.3266, attn_decoder_loss=0.217, over 29358.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1145, cr_loss=0.3558, attn_decoder_loss=0.2399, over 5798359.77 frames. ], batch size: 67, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:04:53,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=679900.0, ans=0.0 +2024-09-19 14:05:11,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=679940.0, ans=0.1 +2024-09-19 14:05:22,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=679980.0, ans=0.0 +2024-09-19 14:05:51,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-19 14:06:03,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=680060.0, ans=0.2 +2024-09-19 14:06:04,811 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.327e+01 8.447e+01 9.059e+01 9.443e+01 1.451e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 14:06:06,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=680060.0, ans=0.125 +2024-09-19 14:06:09,435 INFO [train.py:1198] (1/2) Epoch 38, batch 2600, loss[loss=0.2262, ctc_loss=0.1087, cr_loss=0.3384, attn_decoder_loss=0.2318, over 29447.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1143, cr_loss=0.3554, attn_decoder_loss=0.2403, over 5794823.66 frames. ], batch size: 78, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:06:11,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=680100.0, ans=0.025 +2024-09-19 14:06:18,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=680100.0, ans=0.125 +2024-09-19 14:06:18,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=680100.0, ans=0.1 +2024-09-19 14:06:20,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=680100.0, ans=0.0 +2024-09-19 14:06:33,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=680140.0, ans=0.0 +2024-09-19 14:06:36,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=680140.0, ans=0.125 +2024-09-19 14:06:41,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=680180.0, ans=0.125 +2024-09-19 14:06:41,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=680180.0, ans=0.1 +2024-09-19 14:06:50,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=680180.0, ans=0.0 +2024-09-19 14:06:51,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=680180.0, ans=0.2 +2024-09-19 14:07:06,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.15 vs. limit=6.0 +2024-09-19 14:07:10,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=680260.0, ans=0.125 +2024-09-19 14:07:16,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=680260.0, ans=0.0 +2024-09-19 14:07:24,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=680260.0, ans=0.05 +2024-09-19 14:07:26,861 INFO [train.py:1198] (1/2) Epoch 38, batch 2650, loss[loss=0.2503, ctc_loss=0.1247, cr_loss=0.3758, attn_decoder_loss=0.2559, over 29296.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1142, cr_loss=0.3553, attn_decoder_loss=0.2406, over 5802224.17 frames. ], batch size: 100, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:07:27,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=680300.0, ans=0.1 +2024-09-19 14:07:32,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.51 vs. limit=15.0 +2024-09-19 14:07:42,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=680340.0, ans=0.125 +2024-09-19 14:08:00,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=680380.0, ans=0.0 +2024-09-19 14:08:12,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=680420.0, ans=0.125 +2024-09-19 14:08:39,401 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.526e+01 9.066e+01 9.711e+01 1.379e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-19 14:08:44,058 INFO [train.py:1198] (1/2) Epoch 38, batch 2700, loss[loss=0.2388, ctc_loss=0.1069, cr_loss=0.3441, attn_decoder_loss=0.2458, over 29527.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1144, cr_loss=0.3554, attn_decoder_loss=0.2406, over 5797039.47 frames. ], batch size: 87, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:08:47,447 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:09:04,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=680540.0, ans=0.0 +2024-09-19 14:09:24,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.61 vs. limit=15.0 +2024-09-19 14:09:27,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.90 vs. limit=15.0 +2024-09-19 14:09:27,408 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.94 vs. limit=15.0 +2024-09-19 14:09:59,761 INFO [train.py:1198] (1/2) Epoch 38, batch 2750, loss[loss=0.2169, ctc_loss=0.1046, cr_loss=0.3276, attn_decoder_loss=0.2221, over 29492.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.3544, attn_decoder_loss=0.2395, over 5794564.14 frames. ], batch size: 75, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:10:01,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=680700.0, ans=0.2 +2024-09-19 14:10:16,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=680740.0, ans=0.125 +2024-09-19 14:10:27,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=680740.0, ans=0.125 +2024-09-19 14:10:28,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=680780.0, ans=0.125 +2024-09-19 14:10:34,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.26 vs. limit=15.0 +2024-09-19 14:10:34,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=680780.0, ans=0.025 +2024-09-19 14:10:41,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=680780.0, ans=0.125 +2024-09-19 14:11:00,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=680860.0, ans=0.2 +2024-09-19 14:11:03,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.52 vs. limit=10.0 +2024-09-19 14:11:07,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.82 vs. limit=15.0 +2024-09-19 14:11:10,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=680860.0, ans=0.125 +2024-09-19 14:11:13,462 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.135e+01 8.653e+01 9.096e+01 9.746e+01 4.436e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-19 14:11:18,085 INFO [train.py:1198] (1/2) Epoch 38, batch 2800, loss[loss=0.2543, ctc_loss=0.1372, cr_loss=0.3702, attn_decoder_loss=0.2591, over 20258.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1144, cr_loss=0.3547, attn_decoder_loss=0.24, over 5775730.78 frames. ], batch size: 210, lr: 2.90e-03, grad_scale: 16.0 +2024-09-19 14:11:36,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=680940.0, ans=0.025 +2024-09-19 14:11:41,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=680940.0, ans=0.0 +2024-09-19 14:11:43,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=680940.0, ans=0.1 +2024-09-19 14:11:49,102 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:12:05,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=681020.0, ans=0.04949747468305833 +2024-09-19 14:12:14,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=681020.0, ans=0.05 +2024-09-19 14:12:25,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_na.min_abs, batch_count=681060.0, ans=0.02 +2024-09-19 14:12:26,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=681060.0, ans=0.125 +2024-09-19 14:12:32,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=681060.0, ans=0.125 +2024-09-19 14:12:35,543 INFO [train.py:1198] (1/2) Epoch 38, batch 2850, loss[loss=0.2401, ctc_loss=0.1203, cr_loss=0.3845, attn_decoder_loss=0.2449, over 29521.00 frames. ], tot_loss[loss=0.2351, ctc_loss=0.1148, cr_loss=0.3553, attn_decoder_loss=0.2406, over 5760870.52 frames. ], batch size: 77, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:13:04,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=681180.0, ans=0.1 +2024-09-19 14:13:19,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.45 vs. limit=22.5 +2024-09-19 14:13:24,265 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:13:25,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=681220.0, ans=10.0 +2024-09-19 14:13:35,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.59 vs. limit=15.0 +2024-09-19 14:13:47,917 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.614e+01 9.082e+01 1.001e+02 4.152e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 14:13:50,897 INFO [train.py:1198] (1/2) Epoch 38, batch 2900, loss[loss=0.2223, ctc_loss=0.1058, cr_loss=0.335, attn_decoder_loss=0.2278, over 29409.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1154, cr_loss=0.3568, attn_decoder_loss=0.2414, over 5786085.04 frames. ], batch size: 79, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:14:10,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=681340.0, ans=0.125 +2024-09-19 14:14:28,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=681380.0, ans=0.125 +2024-09-19 14:14:34,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=681420.0, ans=0.2 +2024-09-19 14:15:08,292 INFO [train.py:1198] (1/2) Epoch 38, batch 2950, loss[loss=0.226, ctc_loss=0.1064, cr_loss=0.3396, attn_decoder_loss=0.2317, over 29529.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1145, cr_loss=0.3549, attn_decoder_loss=0.2402, over 5781989.18 frames. ], batch size: 75, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:15:17,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=681500.0, ans=0.125 +2024-09-19 14:15:23,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=681540.0, ans=0.1 +2024-09-19 14:15:34,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=681540.0, ans=0.125 +2024-09-19 14:15:34,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=681540.0, ans=0.1 +2024-09-19 14:15:35,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=681540.0, ans=0.125 +2024-09-19 14:15:54,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=681620.0, ans=0.5 +2024-09-19 14:16:05,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=681620.0, ans=0.2 +2024-09-19 14:16:23,460 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.275e+01 8.330e+01 8.968e+01 9.568e+01 1.287e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 14:16:26,709 INFO [train.py:1198] (1/2) Epoch 38, batch 3000, loss[loss=0.2349, ctc_loss=0.1108, cr_loss=0.3491, attn_decoder_loss=0.241, over 29769.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.114, cr_loss=0.3539, attn_decoder_loss=0.2401, over 5782569.21 frames. ], batch size: 81, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:16:26,710 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 14:16:30,946 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([1.8763, 2.1476, 2.3465, 2.3023, 2.3014, 2.3400, 2.5334, 2.4661], + device='cuda:1') +2024-09-19 14:16:45,081 INFO [train.py:1230] (1/2) Epoch 38, validation: loss=0.2118, ctc_loss=0.03653, cr_loss=5.871e-15, attn_decoder_loss=0.2312, over 944034.00 frames. +2024-09-19 14:16:45,081 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 14:16:59,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=681740.0, ans=0.025 +2024-09-19 14:17:03,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=681740.0, ans=0.0 +2024-09-19 14:17:03,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=681740.0, ans=0.0 +2024-09-19 14:17:14,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=681780.0, ans=0.0 +2024-09-19 14:17:15,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=681780.0, ans=0.125 +2024-09-19 14:17:30,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=681820.0, ans=0.1 +2024-09-19 14:17:44,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=681860.0, ans=0.025 +2024-09-19 14:17:49,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=681860.0, ans=0.125 +2024-09-19 14:17:52,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=681860.0, ans=0.95 +2024-09-19 14:17:53,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=681860.0, ans=0.0 +2024-09-19 14:18:00,865 INFO [train.py:1198] (1/2) Epoch 38, batch 3050, loss[loss=0.2192, ctc_loss=0.1031, cr_loss=0.3265, attn_decoder_loss=0.2248, over 29505.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1145, cr_loss=0.3551, attn_decoder_loss=0.2408, over 5776899.83 frames. ], batch size: 76, lr: 2.90e-03, grad_scale: 8.0 +2024-09-19 14:18:01,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=681900.0, ans=0.0 +2024-09-19 14:18:04,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=681900.0, ans=0.1 +2024-09-19 14:18:07,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=681900.0, ans=0.1 +2024-09-19 14:18:09,735 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.75 vs. limit=15.0 +2024-09-19 14:18:39,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=681980.0, ans=0.1 +2024-09-19 14:18:47,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=682020.0, ans=10.0 +2024-09-19 14:18:52,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=682020.0, ans=0.1 +2024-09-19 14:18:53,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=682020.0, ans=0.125 +2024-09-19 14:19:09,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=682060.0, ans=0.0 +2024-09-19 14:19:15,327 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.187e+01 8.484e+01 8.987e+01 9.703e+01 1.967e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 14:19:18,298 INFO [train.py:1198] (1/2) Epoch 38, batch 3100, loss[loss=0.2481, ctc_loss=0.1191, cr_loss=0.3707, attn_decoder_loss=0.2542, over 29266.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1148, cr_loss=0.3563, attn_decoder_loss=0.2407, over 5776327.88 frames. ], batch size: 100, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:19:23,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.39 vs. limit=15.0 +2024-09-19 14:19:59,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=682180.0, ans=0.1 +2024-09-19 14:20:13,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=682220.0, ans=0.0 +2024-09-19 14:20:21,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=682260.0, ans=0.025 +2024-09-19 14:20:27,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.50 vs. limit=22.5 +2024-09-19 14:20:35,963 INFO [train.py:1198] (1/2) Epoch 38, batch 3150, loss[loss=0.2473, ctc_loss=0.1238, cr_loss=0.3655, attn_decoder_loss=0.2529, over 28778.00 frames. ], tot_loss[loss=0.2352, ctc_loss=0.1147, cr_loss=0.3559, attn_decoder_loss=0.2406, over 5782840.58 frames. ], batch size: 104, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:20:37,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=682300.0, ans=0.2 +2024-09-19 14:20:50,295 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.05 vs. limit=6.0 +2024-09-19 14:20:51,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=682340.0, ans=0.125 +2024-09-19 14:20:53,469 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.09 vs. limit=15.0 +2024-09-19 14:21:01,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=682340.0, ans=0.2 +2024-09-19 14:21:11,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=682380.0, ans=0.5 +2024-09-19 14:21:32,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=682420.0, ans=0.05 +2024-09-19 14:21:48,495 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.507e+01 9.169e+01 9.644e+01 2.178e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-19 14:21:51,678 INFO [train.py:1198] (1/2) Epoch 38, batch 3200, loss[loss=0.2349, ctc_loss=0.1127, cr_loss=0.3282, attn_decoder_loss=0.2412, over 29412.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1144, cr_loss=0.3556, attn_decoder_loss=0.2401, over 5793040.62 frames. ], batch size: 79, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:21:55,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.20 vs. limit=22.5 +2024-09-19 14:22:35,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=682580.0, ans=0.0 +2024-09-19 14:23:02,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=682660.0, ans=0.1 +2024-09-19 14:23:09,408 INFO [train.py:1198] (1/2) Epoch 38, batch 3250, loss[loss=0.2337, ctc_loss=0.1097, cr_loss=0.3529, attn_decoder_loss=0.2397, over 29701.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1144, cr_loss=0.3555, attn_decoder_loss=0.2404, over 5799681.90 frames. ], batch size: 84, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:23:12,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=682700.0, ans=0.04949747468305833 +2024-09-19 14:23:12,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=682700.0, ans=0.025 +2024-09-19 14:23:17,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=682700.0, ans=0.0 +2024-09-19 14:23:25,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.86 vs. limit=15.0 +2024-09-19 14:23:37,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=682740.0, ans=0.125 +2024-09-19 14:23:41,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=682780.0, ans=0.0 +2024-09-19 14:23:56,078 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.43 vs. limit=15.0 +2024-09-19 14:23:58,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=682820.0, ans=0.0 +2024-09-19 14:24:01,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=682820.0, ans=0.125 +2024-09-19 14:24:19,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=682860.0, ans=0.0 +2024-09-19 14:24:23,686 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.537e+01 9.091e+01 9.701e+01 1.814e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 14:24:26,790 INFO [train.py:1198] (1/2) Epoch 38, batch 3300, loss[loss=0.246, ctc_loss=0.1242, cr_loss=0.3863, attn_decoder_loss=0.2509, over 28194.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1139, cr_loss=0.3539, attn_decoder_loss=0.2393, over 5796553.55 frames. ], batch size: 111, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:24:31,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=682900.0, ans=0.1 +2024-09-19 14:24:37,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=682900.0, ans=0.125 +2024-09-19 14:25:01,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=682980.0, ans=0.2 +2024-09-19 14:25:03,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=682980.0, ans=0.125 +2024-09-19 14:25:09,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=682980.0, ans=0.125 +2024-09-19 14:25:12,969 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.55 vs. limit=15.0 +2024-09-19 14:25:15,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=683020.0, ans=0.2 +2024-09-19 14:25:27,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=683060.0, ans=10.0 +2024-09-19 14:25:42,133 INFO [train.py:1198] (1/2) Epoch 38, batch 3350, loss[loss=0.2503, ctc_loss=0.1314, cr_loss=0.3797, attn_decoder_loss=0.2551, over 28829.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1144, cr_loss=0.3545, attn_decoder_loss=0.24, over 5773472.23 frames. ], batch size: 104, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:26:10,192 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.97 vs. limit=10.0 +2024-09-19 14:26:25,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=683180.0, ans=0.125 +2024-09-19 14:26:42,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=683220.0, ans=0.125 +2024-09-19 14:26:57,152 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.721e+01 9.366e+01 9.877e+01 4.380e+02, threshold=1.873e+02, percent-clipped=1.0 +2024-09-19 14:27:00,142 INFO [train.py:1198] (1/2) Epoch 38, batch 3400, loss[loss=0.2089, ctc_loss=0.09958, cr_loss=0.3273, attn_decoder_loss=0.2138, over 29379.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1146, cr_loss=0.3546, attn_decoder_loss=0.24, over 5764992.65 frames. ], batch size: 67, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:27:01,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=683300.0, ans=0.04949747468305833 +2024-09-19 14:27:41,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=683380.0, ans=0.125 +2024-09-19 14:27:47,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=683420.0, ans=0.125 +2024-09-19 14:27:49,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=683420.0, ans=0.0 +2024-09-19 14:27:50,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=683420.0, ans=0.0 +2024-09-19 14:28:04,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=683460.0, ans=0.125 +2024-09-19 14:28:17,514 INFO [train.py:1198] (1/2) Epoch 38, batch 3450, loss[loss=0.2439, ctc_loss=0.1167, cr_loss=0.3617, attn_decoder_loss=0.25, over 28118.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1147, cr_loss=0.3554, attn_decoder_loss=0.2403, over 5773696.14 frames. ], batch size: 111, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:28:22,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.68 vs. limit=15.0 +2024-09-19 14:28:23,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=683500.0, ans=0.125 +2024-09-19 14:28:42,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=683540.0, ans=0.125 +2024-09-19 14:28:45,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=683540.0, ans=0.0 +2024-09-19 14:28:45,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=683540.0, ans=0.125 +2024-09-19 14:28:49,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=683580.0, ans=0.125 +2024-09-19 14:28:52,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=683580.0, ans=0.2 +2024-09-19 14:28:57,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.38 vs. limit=15.0 +2024-09-19 14:29:00,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=19.10 vs. limit=22.5 +2024-09-19 14:29:26,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=683660.0, ans=0.0 +2024-09-19 14:29:31,893 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.708e+01 8.608e+01 9.088e+01 9.945e+01 4.659e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 14:29:33,407 INFO [train.py:1198] (1/2) Epoch 38, batch 3500, loss[loss=0.2112, ctc_loss=0.09454, cr_loss=0.3248, attn_decoder_loss=0.217, over 29314.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3544, attn_decoder_loss=0.2396, over 5776626.43 frames. ], batch size: 71, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:29:41,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=683700.0, ans=0.0 +2024-09-19 14:29:44,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=683700.0, ans=0.1 +2024-09-19 14:30:14,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=683780.0, ans=0.2 +2024-09-19 14:30:40,436 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:30:49,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=683900.0, ans=0.125 +2024-09-19 14:30:50,286 INFO [train.py:1198] (1/2) Epoch 38, batch 3550, loss[loss=0.2389, ctc_loss=0.1119, cr_loss=0.3173, attn_decoder_loss=0.2459, over 29700.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3544, attn_decoder_loss=0.2396, over 5782106.74 frames. ], batch size: 89, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:30:55,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.13 vs. limit=22.5 +2024-09-19 14:31:12,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=683940.0, ans=0.125 +2024-09-19 14:31:15,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=683940.0, ans=0.125 +2024-09-19 14:31:23,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=683980.0, ans=0.0 +2024-09-19 14:31:34,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=684020.0, ans=0.2 +2024-09-19 14:31:48,171 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=5.01 vs. limit=15.0 +2024-09-19 14:31:50,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=684060.0, ans=0.1 +2024-09-19 14:32:05,114 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.324e+01 8.514e+01 8.990e+01 9.421e+01 1.244e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 14:32:06,675 INFO [train.py:1198] (1/2) Epoch 38, batch 3600, loss[loss=0.2316, ctc_loss=0.1136, cr_loss=0.3635, attn_decoder_loss=0.2366, over 29489.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1147, cr_loss=0.356, attn_decoder_loss=0.24, over 5791653.82 frames. ], batch size: 77, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:32:13,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=684100.0, ans=0.125 +2024-09-19 14:32:26,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=684140.0, ans=0.125 +2024-09-19 14:32:36,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.10 vs. limit=6.0 +2024-09-19 14:33:05,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=684260.0, ans=0.0 +2024-09-19 14:33:06,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=684260.0, ans=0.1 +2024-09-19 14:33:21,071 INFO [train.py:1198] (1/2) Epoch 38, batch 3650, loss[loss=0.2519, ctc_loss=0.1273, cr_loss=0.3952, attn_decoder_loss=0.257, over 29484.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.114, cr_loss=0.3548, attn_decoder_loss=0.2392, over 5793262.44 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:33:28,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=684300.0, ans=0.125 +2024-09-19 14:33:33,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=684300.0, ans=0.025 +2024-09-19 14:33:41,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.84 vs. limit=15.0 +2024-09-19 14:34:16,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=684420.0, ans=0.0 +2024-09-19 14:34:28,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=684460.0, ans=0.125 +2024-09-19 14:34:29,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=684460.0, ans=0.0 +2024-09-19 14:34:34,296 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.479e+01 8.884e+01 9.372e+01 5.863e+02, threshold=1.777e+02, percent-clipped=1.0 +2024-09-19 14:34:35,782 INFO [train.py:1198] (1/2) Epoch 38, batch 3700, loss[loss=0.2386, ctc_loss=0.1119, cr_loss=0.3407, attn_decoder_loss=0.2451, over 29704.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.114, cr_loss=0.3547, attn_decoder_loss=0.2393, over 5803957.88 frames. ], batch size: 84, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:35:04,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=684580.0, ans=0.2 +2024-09-19 14:35:06,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=684580.0, ans=0.0 +2024-09-19 14:35:21,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=684620.0, ans=0.1 +2024-09-19 14:35:29,803 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=12.0 +2024-09-19 14:35:35,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=684660.0, ans=0.2 +2024-09-19 14:35:47,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=684660.0, ans=0.0 +2024-09-19 14:35:49,916 INFO [train.py:1198] (1/2) Epoch 38, batch 3750, loss[loss=0.2081, ctc_loss=0.09909, cr_loss=0.3251, attn_decoder_loss=0.213, over 29362.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.114, cr_loss=0.355, attn_decoder_loss=0.2394, over 5807769.95 frames. ], batch size: 67, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:36:32,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=684780.0, ans=0.125 +2024-09-19 14:36:48,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=684820.0, ans=0.0 +2024-09-19 14:37:04,809 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.242e+01 8.375e+01 8.926e+01 9.574e+01 1.662e+02, threshold=1.785e+02, percent-clipped=0.0 +2024-09-19 14:37:06,352 INFO [train.py:1198] (1/2) Epoch 38, batch 3800, loss[loss=0.2592, ctc_loss=0.1361, cr_loss=0.4064, attn_decoder_loss=0.2638, over 29633.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1139, cr_loss=0.3545, attn_decoder_loss=0.2392, over 5797225.53 frames. ], batch size: 86, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:37:11,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=684900.0, ans=0.1 +2024-09-19 14:37:14,553 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.25 vs. limit=15.0 +2024-09-19 14:37:18,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=684900.0, ans=0.07 +2024-09-19 14:37:23,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=684940.0, ans=0.125 +2024-09-19 14:37:33,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=684940.0, ans=0.125 +2024-09-19 14:37:38,342 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.88 vs. limit=15.0 +2024-09-19 14:38:10,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=685060.0, ans=0.125 +2024-09-19 14:38:20,241 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.55 vs. limit=15.0 +2024-09-19 14:38:22,275 INFO [train.py:1198] (1/2) Epoch 38, batch 3850, loss[loss=0.2423, ctc_loss=0.117, cr_loss=0.3581, attn_decoder_loss=0.2483, over 29297.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1138, cr_loss=0.3545, attn_decoder_loss=0.239, over 5811231.48 frames. ], batch size: 100, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:38:31,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=685100.0, ans=0.09899494936611666 +2024-09-19 14:38:33,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=685100.0, ans=0.0 +2024-09-19 14:38:51,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=685180.0, ans=0.125 +2024-09-19 14:38:52,722 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.27 vs. limit=22.5 +2024-09-19 14:38:54,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=685180.0, ans=0.2 +2024-09-19 14:38:57,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=685180.0, ans=0.0 +2024-09-19 14:39:05,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=685220.0, ans=0.0 +2024-09-19 14:39:15,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=685220.0, ans=0.0 +2024-09-19 14:39:16,165 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.59 vs. limit=22.5 +2024-09-19 14:39:23,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=685260.0, ans=0.07 +2024-09-19 14:39:34,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.061e+01 8.495e+01 8.957e+01 9.535e+01 1.173e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 14:39:36,179 INFO [train.py:1198] (1/2) Epoch 38, batch 3900, loss[loss=0.2451, ctc_loss=0.1124, cr_loss=0.3448, attn_decoder_loss=0.2521, over 29617.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1141, cr_loss=0.3549, attn_decoder_loss=0.2394, over 5815515.96 frames. ], batch size: 86, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:39:37,009 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.40 vs. limit=15.0 +2024-09-19 14:39:42,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=685300.0, ans=0.1 +2024-09-19 14:39:51,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.01 vs. limit=10.0 +2024-09-19 14:39:54,572 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.39 vs. limit=15.0 +2024-09-19 14:40:08,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=685380.0, ans=0.125 +2024-09-19 14:40:23,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=685420.0, ans=0.125 +2024-09-19 14:40:35,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=685460.0, ans=0.1 +2024-09-19 14:40:49,773 INFO [train.py:1198] (1/2) Epoch 38, batch 3950, loss[loss=0.2431, ctc_loss=0.1147, cr_loss=0.3558, attn_decoder_loss=0.2495, over 29534.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1141, cr_loss=0.3555, attn_decoder_loss=0.2395, over 5835479.91 frames. ], batch size: 97, lr: 2.89e-03, grad_scale: 16.0 +2024-09-19 14:41:07,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=685540.0, ans=0.1 +2024-09-19 14:41:34,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=685620.0, ans=0.125 +2024-09-19 14:41:52,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=685660.0, ans=0.2 +2024-09-19 14:41:53,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=685660.0, ans=0.1 +2024-09-19 14:42:01,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.55 vs. limit=15.0 +2024-09-19 14:42:03,854 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.426e+01 8.719e+01 9.341e+01 1.012e+02 2.118e+02, threshold=1.868e+02, percent-clipped=1.0 +2024-09-19 14:42:05,291 INFO [train.py:1198] (1/2) Epoch 38, batch 4000, loss[loss=0.2245, ctc_loss=0.104, cr_loss=0.3283, attn_decoder_loss=0.2306, over 29521.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3555, attn_decoder_loss=0.2397, over 5812510.78 frames. ], batch size: 74, lr: 2.89e-03, grad_scale: 32.0 +2024-09-19 14:42:15,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=685700.0, ans=0.125 +2024-09-19 14:42:41,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=685780.0, ans=0.125 +2024-09-19 14:43:07,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=685860.0, ans=0.125 +2024-09-19 14:43:20,734 INFO [train.py:1198] (1/2) Epoch 38, batch 4050, loss[loss=0.2537, ctc_loss=0.1484, cr_loss=0.4087, attn_decoder_loss=0.2563, over 20447.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1141, cr_loss=0.355, attn_decoder_loss=0.2394, over 5796746.42 frames. ], batch size: 209, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:43:31,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=685900.0, ans=0.125 +2024-09-19 14:43:35,463 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:43:59,311 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.24 vs. limit=15.0 +2024-09-19 14:44:20,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.50 vs. limit=15.0 +2024-09-19 14:44:25,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=686060.0, ans=0.125 +2024-09-19 14:44:25,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=686060.0, ans=0.125 +2024-09-19 14:44:33,762 INFO [train.py:1198] (1/2) Epoch 38, batch 4100, loss[loss=0.25, ctc_loss=0.1297, cr_loss=0.3796, attn_decoder_loss=0.255, over 29488.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1142, cr_loss=0.3545, attn_decoder_loss=0.2393, over 5792947.30 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:44:35,195 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.479e+01 8.495e+01 9.024e+01 9.584e+01 1.415e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 14:45:25,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=686220.0, ans=0.0 +2024-09-19 14:45:36,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=686260.0, ans=0.125 +2024-09-19 14:45:47,168 INFO [train.py:1198] (1/2) Epoch 38, batch 4150, loss[loss=0.2304, ctc_loss=0.1139, cr_loss=0.3608, attn_decoder_loss=0.2353, over 29487.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1144, cr_loss=0.3551, attn_decoder_loss=0.2395, over 5797985.57 frames. ], batch size: 77, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:45:52,688 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.38 vs. limit=10.0 +2024-09-19 14:46:07,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=686340.0, ans=0.125 +2024-09-19 14:46:31,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=686420.0, ans=0.07 +2024-09-19 14:46:50,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=686460.0, ans=0.0 +2024-09-19 14:47:01,641 INFO [train.py:1198] (1/2) Epoch 38, batch 4200, loss[loss=0.2497, ctc_loss=0.127, cr_loss=0.38, attn_decoder_loss=0.2549, over 29493.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3546, attn_decoder_loss=0.2397, over 5799544.37 frames. ], batch size: 90, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:47:03,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.063e+01 8.618e+01 9.071e+01 9.625e+01 1.972e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 14:47:04,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=686500.0, ans=0.5 +2024-09-19 14:47:13,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.04 vs. limit=15.0 +2024-09-19 14:47:21,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=686540.0, ans=0.125 +2024-09-19 14:47:21,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=686540.0, ans=0.125 +2024-09-19 14:47:50,245 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:47:52,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.97 vs. limit=10.0 +2024-09-19 14:48:07,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=686660.0, ans=0.5 +2024-09-19 14:48:16,372 INFO [train.py:1198] (1/2) Epoch 38, batch 4250, loss[loss=0.2096, ctc_loss=0.0897, cr_loss=0.3027, attn_decoder_loss=0.2162, over 29520.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1135, cr_loss=0.3531, attn_decoder_loss=0.2395, over 5806421.86 frames. ], batch size: 74, lr: 2.89e-03, grad_scale: 8.0 +2024-09-19 14:48:29,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=686740.0, ans=0.05 +2024-09-19 14:48:31,795 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.07 vs. limit=22.5 +2024-09-19 14:48:38,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=686740.0, ans=0.2 +2024-09-19 14:49:12,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=686820.0, ans=0.0 +2024-09-19 14:49:12,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=686820.0, ans=0.125 +2024-09-19 14:49:30,555 INFO [train.py:1198] (1/2) Epoch 38, batch 4300, loss[loss=0.2406, ctc_loss=0.1159, cr_loss=0.3681, attn_decoder_loss=0.2463, over 29521.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1135, cr_loss=0.3532, attn_decoder_loss=0.2398, over 5796458.71 frames. ], batch size: 87, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:49:32,030 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.244e+01 8.697e+01 9.242e+01 9.593e+01 9.804e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-19 14:49:34,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=686900.0, ans=15.0 +2024-09-19 14:49:36,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=686900.0, ans=0.1 +2024-09-19 14:49:53,405 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:50:05,162 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 14:50:28,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=687060.0, ans=0.0 +2024-09-19 14:50:41,984 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=687060.0, ans=0.07 +2024-09-19 14:50:45,380 INFO [train.py:1198] (1/2) Epoch 38, batch 4350, loss[loss=0.2503, ctc_loss=0.123, cr_loss=0.3722, attn_decoder_loss=0.2561, over 29489.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1164, cr_loss=0.3595, attn_decoder_loss=0.243, over 5799326.02 frames. ], batch size: 97, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:50:55,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.04 vs. limit=22.5 +2024-09-19 14:51:10,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=687140.0, ans=0.125 +2024-09-19 14:51:22,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=687180.0, ans=0.125 +2024-09-19 14:51:26,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=687180.0, ans=0.125 +2024-09-19 14:51:33,262 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-19 14:51:42,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=687260.0, ans=0.125 +2024-09-19 14:51:58,763 INFO [train.py:1198] (1/2) Epoch 38, batch 4400, loss[loss=0.2491, ctc_loss=0.1246, cr_loss=0.3732, attn_decoder_loss=0.2547, over 27449.00 frames. ], tot_loss[loss=0.2392, ctc_loss=0.1172, cr_loss=0.3613, attn_decoder_loss=0.2447, over 5767820.61 frames. ], batch size: 124, lr: 2.88e-03, grad_scale: 16.0 +2024-09-19 14:52:00,220 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.169e+01 8.939e+01 9.261e+01 9.709e+01 1.293e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-19 14:52:06,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=687300.0, ans=0.0 +2024-09-19 14:52:13,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=687340.0, ans=0.125 +2024-09-19 14:52:13,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=687340.0, ans=0.2 +2024-09-19 14:52:15,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=687340.0, ans=0.125 +2024-09-19 14:52:18,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=687340.0, ans=0.1 +2024-09-19 14:52:30,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=687380.0, ans=0.2 +2024-09-19 14:52:37,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=687380.0, ans=0.5 +2024-09-19 14:52:37,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=687380.0, ans=0.0 +2024-09-19 14:53:01,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.58 vs. limit=10.0 +2024-09-19 14:53:13,726 INFO [train.py:1198] (1/2) Epoch 38, batch 4450, loss[loss=0.261, ctc_loss=0.1539, cr_loss=0.3939, attn_decoder_loss=0.2642, over 19951.00 frames. ], tot_loss[loss=0.2415, ctc_loss=0.1205, cr_loss=0.367, attn_decoder_loss=0.2468, over 5574662.16 frames. ], batch size: 209, lr: 2.88e-03, grad_scale: 16.0 +2024-09-19 14:53:17,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=687500.0, ans=0.125 +2024-09-19 14:53:18,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=687500.0, ans=0.125 +2024-09-19 14:53:20,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=6.12 vs. limit=10.0 +2024-09-19 14:53:21,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=687500.0, ans=0.07 +2024-09-19 14:53:24,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=687500.0, ans=0.025 +2024-09-19 14:53:29,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=687540.0, ans=0.025 +2024-09-19 14:53:53,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=687580.0, ans=0.125 +2024-09-19 14:54:14,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=687660.0, ans=0.0 +2024-09-19 14:54:28,750 INFO [train.py:1198] (1/2) Epoch 38, batch 4500, loss[loss=0.2604, ctc_loss=0.1438, cr_loss=0.4014, attn_decoder_loss=0.2644, over 20642.00 frames. ], tot_loss[loss=0.2435, ctc_loss=0.1239, cr_loss=0.3694, attn_decoder_loss=0.2486, over 5232654.02 frames. ], batch size: 211, lr: 2.88e-03, grad_scale: 8.0 +2024-09-19 14:54:31,692 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.150e+01 9.974e+01 1.104e+02 1.169e+02 2.298e+02, threshold=2.208e+02, percent-clipped=1.0 +2024-09-19 14:54:55,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=687740.0, ans=0.125 +2024-09-19 14:54:55,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=687740.0, ans=0.125 +2024-09-19 14:55:50,039 INFO [train.py:1198] (1/2) Epoch 39, batch 0, loss[loss=0.2146, ctc_loss=0.09725, cr_loss=0.3361, attn_decoder_loss=0.2202, over 29632.00 frames. ], tot_loss[loss=0.2146, ctc_loss=0.09725, cr_loss=0.3361, attn_decoder_loss=0.2202, over 29632.00 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 14:55:50,039 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 14:56:08,887 INFO [train.py:1230] (1/2) Epoch 39, validation: loss=0.2125, ctc_loss=0.03631, cr_loss=6.129e-15, attn_decoder_loss=0.232, over 944034.00 frames. +2024-09-19 14:56:08,887 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 14:56:37,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=687840.0, ans=0.125 +2024-09-19 14:56:40,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=687880.0, ans=0.125 +2024-09-19 14:56:44,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=8.61 vs. limit=15.0 +2024-09-19 14:56:46,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=687880.0, ans=0.125 +2024-09-19 14:57:03,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=687920.0, ans=0.2 +2024-09-19 14:57:17,335 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.26 vs. limit=10.0 +2024-09-19 14:57:32,783 INFO [train.py:1198] (1/2) Epoch 39, batch 50, loss[loss=0.2038, ctc_loss=0.09344, cr_loss=0.2908, attn_decoder_loss=0.2095, over 29406.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.114, cr_loss=0.357, attn_decoder_loss=0.2396, over 1267293.14 frames. ], batch size: 70, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 14:57:36,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=688000.0, ans=0.07 +2024-09-19 14:58:03,979 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.85 vs. limit=6.0 +2024-09-19 14:58:14,950 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.223e+01 8.884e+01 9.468e+01 1.073e+02 2.116e+02, threshold=1.894e+02, percent-clipped=0.0 +2024-09-19 14:58:16,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=688120.0, ans=0.125 +2024-09-19 14:58:18,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=688120.0, ans=0.0 +2024-09-19 14:58:43,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=688160.0, ans=0.125 +2024-09-19 14:58:44,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.87 vs. limit=15.0 +2024-09-19 14:58:45,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=688160.0, ans=15.0 +2024-09-19 14:58:47,906 INFO [train.py:1198] (1/2) Epoch 39, batch 100, loss[loss=0.2294, ctc_loss=0.1133, cr_loss=0.3612, attn_decoder_loss=0.2342, over 29545.00 frames. ], tot_loss[loss=0.2369, ctc_loss=0.1164, cr_loss=0.3599, attn_decoder_loss=0.2423, over 2250840.67 frames. ], batch size: 76, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 14:58:51,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.37 vs. limit=15.0 +2024-09-19 14:59:00,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=688200.0, ans=0.1 +2024-09-19 14:59:09,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.58 vs. limit=15.0 +2024-09-19 14:59:12,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=688240.0, ans=0.0 +2024-09-19 14:59:27,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=688280.0, ans=0.0 +2024-09-19 14:59:39,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=7.91 vs. limit=22.5 +2024-09-19 15:00:04,827 INFO [train.py:1198] (1/2) Epoch 39, batch 150, loss[loss=0.2108, ctc_loss=0.1062, cr_loss=0.3375, attn_decoder_loss=0.215, over 29394.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.114, cr_loss=0.3551, attn_decoder_loss=0.2399, over 3045595.18 frames. ], batch size: 70, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:00:29,270 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.51 vs. limit=12.0 +2024-09-19 15:00:37,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=688480.0, ans=0.0 +2024-09-19 15:00:47,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=688480.0, ans=10.0 +2024-09-19 15:00:48,857 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.419e+01 8.955e+01 9.625e+01 1.555e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 15:00:59,768 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=688520.0, ans=0.2 +2024-09-19 15:01:07,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=688560.0, ans=0.1 +2024-09-19 15:01:22,060 INFO [train.py:1198] (1/2) Epoch 39, batch 200, loss[loss=0.2476, ctc_loss=0.1228, cr_loss=0.375, attn_decoder_loss=0.2531, over 27343.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1135, cr_loss=0.3535, attn_decoder_loss=0.239, over 3658256.86 frames. ], batch size: 124, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:01:33,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.64 vs. limit=15.0 +2024-09-19 15:01:49,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=688640.0, ans=0.125 +2024-09-19 15:01:58,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=688680.0, ans=0.0 +2024-09-19 15:01:59,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=688680.0, ans=0.125 +2024-09-19 15:02:02,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=688680.0, ans=0.125 +2024-09-19 15:02:08,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=688720.0, ans=0.2 +2024-09-19 15:02:36,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=688800.0, ans=0.05 +2024-09-19 15:02:37,645 INFO [train.py:1198] (1/2) Epoch 39, batch 250, loss[loss=0.2589, ctc_loss=0.1369, cr_loss=0.4088, attn_decoder_loss=0.2633, over 29237.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1133, cr_loss=0.353, attn_decoder_loss=0.2387, over 4141688.56 frames. ], batch size: 100, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:03:19,941 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.148e+01 8.603e+01 9.098e+01 9.821e+01 6.363e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-19 15:03:21,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=688920.0, ans=0.125 +2024-09-19 15:03:52,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=688960.0, ans=0.125 +2024-09-19 15:03:52,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=688960.0, ans=0.0 +2024-09-19 15:03:55,540 INFO [train.py:1198] (1/2) Epoch 39, batch 300, loss[loss=0.2519, ctc_loss=0.1275, cr_loss=0.4035, attn_decoder_loss=0.2568, over 29553.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1131, cr_loss=0.3523, attn_decoder_loss=0.2388, over 4509597.31 frames. ], batch size: 92, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:04:28,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=689080.0, ans=0.0 +2024-09-19 15:04:42,312 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.54 vs. limit=15.0 +2024-09-19 15:05:06,023 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:05:13,145 INFO [train.py:1198] (1/2) Epoch 39, batch 350, loss[loss=0.2156, ctc_loss=0.101, cr_loss=0.3284, attn_decoder_loss=0.221, over 29341.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1136, cr_loss=0.3536, attn_decoder_loss=0.2395, over 4795725.71 frames. ], batch size: 71, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:05:13,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=689200.0, ans=0.0 +2024-09-19 15:05:24,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=689200.0, ans=0.2 +2024-09-19 15:05:25,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=689200.0, ans=0.0 +2024-09-19 15:05:30,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.54 vs. limit=6.0 +2024-09-19 15:05:32,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=689240.0, ans=0.1 +2024-09-19 15:05:32,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=689240.0, ans=0.125 +2024-09-19 15:05:46,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=11.12 vs. limit=15.0 +2024-09-19 15:05:52,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=689280.0, ans=0.125 +2024-09-19 15:05:55,258 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.629e+01 8.459e+01 8.983e+01 9.522e+01 3.712e+02, threshold=1.797e+02, percent-clipped=2.0 +2024-09-19 15:06:00,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=689320.0, ans=0.04949747468305833 +2024-09-19 15:06:18,584 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:06:28,538 INFO [train.py:1198] (1/2) Epoch 39, batch 400, loss[loss=0.2473, ctc_loss=0.1274, cr_loss=0.3941, attn_decoder_loss=0.2518, over 29713.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.355, attn_decoder_loss=0.2394, over 5024357.85 frames. ], batch size: 82, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:06:36,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=689400.0, ans=0.125 +2024-09-19 15:06:42,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=689440.0, ans=0.125 +2024-09-19 15:07:12,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=689520.0, ans=0.125 +2024-09-19 15:07:27,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=689560.0, ans=0.125 +2024-09-19 15:07:46,823 INFO [train.py:1198] (1/2) Epoch 39, batch 450, loss[loss=0.2498, ctc_loss=0.1285, cr_loss=0.3669, attn_decoder_loss=0.2552, over 29694.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1141, cr_loss=0.3546, attn_decoder_loss=0.2396, over 5186924.97 frames. ], batch size: 83, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:07:48,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=689600.0, ans=0.2 +2024-09-19 15:07:53,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=689600.0, ans=0.125 +2024-09-19 15:07:54,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=689600.0, ans=0.95 +2024-09-19 15:08:05,245 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.35 vs. limit=12.0 +2024-09-19 15:08:13,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=689640.0, ans=0.125 +2024-09-19 15:08:24,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=689680.0, ans=0.0 +2024-09-19 15:08:30,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=689680.0, ans=0.0 +2024-09-19 15:08:32,805 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.422e+01 8.949e+01 9.558e+01 1.384e+02, threshold=1.790e+02, percent-clipped=0.0 +2024-09-19 15:08:39,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=689720.0, ans=0.125 +2024-09-19 15:09:04,769 INFO [train.py:1198] (1/2) Epoch 39, batch 500, loss[loss=0.2545, ctc_loss=0.1311, cr_loss=0.3853, attn_decoder_loss=0.2597, over 29436.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1134, cr_loss=0.3536, attn_decoder_loss=0.2387, over 5330712.87 frames. ], batch size: 94, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:09:06,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=689800.0, ans=0.0 +2024-09-19 15:09:25,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.16 vs. limit=15.0 +2024-09-19 15:09:38,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=689880.0, ans=0.125 +2024-09-19 15:09:44,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=689880.0, ans=0.125 +2024-09-19 15:09:47,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=689880.0, ans=0.0 +2024-09-19 15:09:50,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=689920.0, ans=0.125 +2024-09-19 15:10:08,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.61 vs. limit=15.0 +2024-09-19 15:10:15,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.42 vs. limit=22.5 +2024-09-19 15:10:20,414 INFO [train.py:1198] (1/2) Epoch 39, batch 550, loss[loss=0.24, ctc_loss=0.1139, cr_loss=0.3538, attn_decoder_loss=0.2462, over 28839.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.113, cr_loss=0.353, attn_decoder_loss=0.2385, over 5423072.97 frames. ], batch size: 104, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:10:29,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=690000.0, ans=0.125 +2024-09-19 15:10:32,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=690000.0, ans=0.0 +2024-09-19 15:10:50,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.79 vs. limit=15.0 +2024-09-19 15:11:04,425 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.487e+01 8.632e+01 8.977e+01 9.526e+01 2.010e+02, threshold=1.795e+02, percent-clipped=2.0 +2024-09-19 15:11:19,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=690120.0, ans=15.0 +2024-09-19 15:11:27,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=690160.0, ans=0.2 +2024-09-19 15:11:38,692 INFO [train.py:1198] (1/2) Epoch 39, batch 600, loss[loss=0.2507, ctc_loss=0.1266, cr_loss=0.3929, attn_decoder_loss=0.2557, over 29304.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1134, cr_loss=0.3538, attn_decoder_loss=0.2392, over 5509685.80 frames. ], batch size: 100, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:11:51,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=690200.0, ans=0.0 +2024-09-19 15:11:55,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.17 vs. limit=12.0 +2024-09-19 15:12:24,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=690320.0, ans=0.125 +2024-09-19 15:12:26,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=690320.0, ans=0.125 +2024-09-19 15:12:31,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=690320.0, ans=0.125 +2024-09-19 15:12:33,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=690320.0, ans=0.025 +2024-09-19 15:12:45,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=690360.0, ans=0.05 +2024-09-19 15:12:56,381 INFO [train.py:1198] (1/2) Epoch 39, batch 650, loss[loss=0.2324, ctc_loss=0.1089, cr_loss=0.327, attn_decoder_loss=0.2388, over 29755.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1125, cr_loss=0.3514, attn_decoder_loss=0.2383, over 5586395.96 frames. ], batch size: 81, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:13:05,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=690400.0, ans=0.125 +2024-09-19 15:13:37,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=690480.0, ans=0.125 +2024-09-19 15:13:40,405 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.365e+01 8.432e+01 8.976e+01 9.547e+01 1.845e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 15:13:45,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=690520.0, ans=0.1 +2024-09-19 15:13:49,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=690520.0, ans=0.05 +2024-09-19 15:13:56,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=690560.0, ans=0.125 +2024-09-19 15:14:04,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=690560.0, ans=0.09899494936611666 +2024-09-19 15:14:06,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=690560.0, ans=0.1 +2024-09-19 15:14:12,092 INFO [train.py:1198] (1/2) Epoch 39, batch 700, loss[loss=0.2272, ctc_loss=0.1143, cr_loss=0.3677, attn_decoder_loss=0.2316, over 29501.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3534, attn_decoder_loss=0.2392, over 5635426.57 frames. ], batch size: 76, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:14:16,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=690600.0, ans=10.0 +2024-09-19 15:14:27,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=690640.0, ans=0.1 +2024-09-19 15:14:38,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.87 vs. limit=15.0 +2024-09-19 15:14:43,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=690680.0, ans=0.125 +2024-09-19 15:14:45,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=690680.0, ans=0.125 +2024-09-19 15:15:03,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=690720.0, ans=0.2 +2024-09-19 15:15:06,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=690720.0, ans=0.0 +2024-09-19 15:15:17,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=690760.0, ans=0.125 +2024-09-19 15:15:27,463 INFO [train.py:1198] (1/2) Epoch 39, batch 750, loss[loss=0.2337, ctc_loss=0.112, cr_loss=0.3523, attn_decoder_loss=0.2394, over 29712.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.113, cr_loss=0.3527, attn_decoder_loss=0.2388, over 5675397.28 frames. ], batch size: 82, lr: 2.84e-03, grad_scale: 8.0 +2024-09-19 15:15:30,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=690800.0, ans=0.0 +2024-09-19 15:15:50,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=690840.0, ans=0.0 +2024-09-19 15:16:15,751 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.497e+01 9.078e+01 9.651e+01 1.974e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 15:16:31,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=690960.0, ans=0.125 +2024-09-19 15:16:47,094 INFO [train.py:1198] (1/2) Epoch 39, batch 800, loss[loss=0.2093, ctc_loss=0.0983, cr_loss=0.3262, attn_decoder_loss=0.2144, over 29613.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1131, cr_loss=0.3528, attn_decoder_loss=0.2388, over 5706619.66 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:16:59,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=691000.0, ans=0.0 +2024-09-19 15:17:09,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.59 vs. limit=15.0 +2024-09-19 15:17:16,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.67 vs. limit=15.0 +2024-09-19 15:17:20,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=691080.0, ans=0.125 +2024-09-19 15:17:41,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=691120.0, ans=0.09899494936611666 +2024-09-19 15:18:02,587 INFO [train.py:1198] (1/2) Epoch 39, batch 850, loss[loss=0.2465, ctc_loss=0.1251, cr_loss=0.3965, attn_decoder_loss=0.2512, over 29702.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1122, cr_loss=0.3509, attn_decoder_loss=0.238, over 5736265.04 frames. ], batch size: 89, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:18:08,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=691200.0, ans=0.07 +2024-09-19 15:18:14,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.29 vs. limit=15.0 +2024-09-19 15:18:19,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=691240.0, ans=15.0 +2024-09-19 15:18:20,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=691240.0, ans=0.025 +2024-09-19 15:18:28,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=691240.0, ans=0.125 +2024-09-19 15:18:44,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=691280.0, ans=0.125 +2024-09-19 15:18:46,233 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.532e+01 8.459e+01 8.825e+01 9.402e+01 1.909e+02, threshold=1.765e+02, percent-clipped=1.0 +2024-09-19 15:18:47,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=691320.0, ans=0.125 +2024-09-19 15:19:10,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=691360.0, ans=0.125 +2024-09-19 15:19:18,058 INFO [train.py:1198] (1/2) Epoch 39, batch 900, loss[loss=0.2164, ctc_loss=0.102, cr_loss=0.3193, attn_decoder_loss=0.222, over 29573.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1122, cr_loss=0.3515, attn_decoder_loss=0.2384, over 5741784.77 frames. ], batch size: 73, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:19:29,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.72 vs. limit=15.0 +2024-09-19 15:19:32,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=691400.0, ans=0.2 +2024-09-19 15:20:29,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=691560.0, ans=0.025 +2024-09-19 15:20:35,665 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.30 vs. limit=15.0 +2024-09-19 15:20:37,720 INFO [train.py:1198] (1/2) Epoch 39, batch 950, loss[loss=0.2344, ctc_loss=0.1136, cr_loss=0.3649, attn_decoder_loss=0.2398, over 29503.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1127, cr_loss=0.3523, attn_decoder_loss=0.239, over 5743468.51 frames. ], batch size: 74, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:20:38,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.09 vs. limit=15.0 +2024-09-19 15:20:42,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=691600.0, ans=0.125 +2024-09-19 15:20:52,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.51 vs. limit=15.0 +2024-09-19 15:21:15,536 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.57 vs. limit=5.0 +2024-09-19 15:21:20,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=691680.0, ans=0.125 +2024-09-19 15:21:21,643 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.688e+01 9.370e+01 1.012e+02 2.860e+02, threshold=1.874e+02, percent-clipped=2.0 +2024-09-19 15:21:24,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=691720.0, ans=0.025 +2024-09-19 15:21:53,217 INFO [train.py:1198] (1/2) Epoch 39, batch 1000, loss[loss=0.2362, ctc_loss=0.1199, cr_loss=0.3636, attn_decoder_loss=0.241, over 29510.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1137, cr_loss=0.3537, attn_decoder_loss=0.2398, over 5737576.32 frames. ], batch size: 77, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:21:59,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=691800.0, ans=0.125 +2024-09-19 15:22:13,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=691840.0, ans=22.5 +2024-09-19 15:22:17,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=691840.0, ans=0.125 +2024-09-19 15:22:35,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=691880.0, ans=0.125 +2024-09-19 15:22:42,070 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.64 vs. limit=15.0 +2024-09-19 15:23:02,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=691960.0, ans=0.125 +2024-09-19 15:23:08,636 INFO [train.py:1198] (1/2) Epoch 39, batch 1050, loss[loss=0.2471, ctc_loss=0.1257, cr_loss=0.3924, attn_decoder_loss=0.2518, over 29670.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1135, cr_loss=0.3532, attn_decoder_loss=0.2395, over 5745939.50 frames. ], batch size: 85, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:23:08,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=692000.0, ans=0.125 +2024-09-19 15:23:34,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.71 vs. limit=6.0 +2024-09-19 15:23:36,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.78 vs. limit=22.5 +2024-09-19 15:23:46,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=692080.0, ans=10.0 +2024-09-19 15:23:54,906 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.552e+01 9.121e+01 9.553e+01 1.921e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 15:24:13,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=692160.0, ans=0.125 +2024-09-19 15:24:14,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=692160.0, ans=0.125 +2024-09-19 15:24:26,504 INFO [train.py:1198] (1/2) Epoch 39, batch 1100, loss[loss=0.2313, ctc_loss=0.1105, cr_loss=0.3571, attn_decoder_loss=0.2367, over 29445.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1132, cr_loss=0.3534, attn_decoder_loss=0.2393, over 5757487.83 frames. ], batch size: 78, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:24:43,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=692240.0, ans=0.125 +2024-09-19 15:24:45,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=692240.0, ans=0.0 +2024-09-19 15:25:04,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=692280.0, ans=0.07 +2024-09-19 15:25:13,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=692320.0, ans=0.125 +2024-09-19 15:25:42,568 INFO [train.py:1198] (1/2) Epoch 39, batch 1150, loss[loss=0.2262, ctc_loss=0.1074, cr_loss=0.3504, attn_decoder_loss=0.2316, over 29465.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1134, cr_loss=0.3531, attn_decoder_loss=0.2391, over 5755561.54 frames. ], batch size: 78, lr: 2.84e-03, grad_scale: 16.0 +2024-09-19 15:25:44,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=692400.0, ans=0.025 +2024-09-19 15:25:49,058 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:25:49,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=692400.0, ans=0.125 +2024-09-19 15:25:55,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=692400.0, ans=0.0 +2024-09-19 15:26:01,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=692440.0, ans=0.2 +2024-09-19 15:26:07,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=692440.0, ans=0.125 +2024-09-19 15:26:25,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=692480.0, ans=0.2 +2024-09-19 15:26:26,558 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.659e+01 8.488e+01 9.080e+01 9.695e+01 1.564e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 15:26:55,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=692560.0, ans=0.0 +2024-09-19 15:26:58,173 INFO [train.py:1198] (1/2) Epoch 39, batch 1200, loss[loss=0.2394, ctc_loss=0.12, cr_loss=0.3586, attn_decoder_loss=0.2447, over 29688.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1138, cr_loss=0.3542, attn_decoder_loss=0.2398, over 5748007.53 frames. ], batch size: 85, lr: 2.83e-03, grad_scale: 32.0 +2024-09-19 15:27:00,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=692600.0, ans=0.125 +2024-09-19 15:27:14,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=692640.0, ans=0.125 +2024-09-19 15:27:33,999 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.43 vs. limit=10.0 +2024-09-19 15:27:42,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=692680.0, ans=0.1 +2024-09-19 15:27:44,328 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.49 vs. limit=15.0 +2024-09-19 15:28:18,500 INFO [train.py:1198] (1/2) Epoch 39, batch 1250, loss[loss=0.2547, ctc_loss=0.122, cr_loss=0.3713, attn_decoder_loss=0.2612, over 29539.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1143, cr_loss=0.3557, attn_decoder_loss=0.2404, over 5774727.82 frames. ], batch size: 92, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:28:44,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=692840.0, ans=0.0 +2024-09-19 15:28:44,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=692840.0, ans=0.2 +2024-09-19 15:28:49,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.99 vs. limit=15.0 +2024-09-19 15:28:56,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff3.min_abs, batch_count=692880.0, ans=0.2 +2024-09-19 15:28:57,449 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.53 vs. limit=22.5 +2024-09-19 15:29:05,374 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.383e+01 8.604e+01 9.074e+01 9.816e+01 4.150e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 15:29:32,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=693000.0, ans=0.125 +2024-09-19 15:29:33,892 INFO [train.py:1198] (1/2) Epoch 39, batch 1300, loss[loss=0.2441, ctc_loss=0.12, cr_loss=0.3781, attn_decoder_loss=0.2495, over 28335.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1138, cr_loss=0.3551, attn_decoder_loss=0.2397, over 5778853.22 frames. ], batch size: 111, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:29:36,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.28 vs. limit=15.0 +2024-09-19 15:29:44,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.70 vs. limit=15.0 +2024-09-19 15:29:52,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=693040.0, ans=0.2 +2024-09-19 15:29:55,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=693040.0, ans=0.1 +2024-09-19 15:29:55,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=693040.0, ans=0.1 +2024-09-19 15:30:04,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=693080.0, ans=0.125 +2024-09-19 15:30:13,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=693080.0, ans=0.125 +2024-09-19 15:30:28,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=693120.0, ans=0.125 +2024-09-19 15:30:49,669 INFO [train.py:1198] (1/2) Epoch 39, batch 1350, loss[loss=0.2352, ctc_loss=0.114, cr_loss=0.3743, attn_decoder_loss=0.2403, over 29752.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1135, cr_loss=0.3542, attn_decoder_loss=0.2394, over 5795428.33 frames. ], batch size: 81, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:31:16,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=693240.0, ans=0.125 +2024-09-19 15:31:22,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=693280.0, ans=0.0 +2024-09-19 15:31:29,595 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.61 vs. limit=6.0 +2024-09-19 15:31:35,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=693280.0, ans=0.025 +2024-09-19 15:31:38,210 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:31:39,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=693320.0, ans=0.2 +2024-09-19 15:31:40,745 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.173e+01 8.563e+01 8.987e+01 9.374e+01 1.474e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 15:31:46,513 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-19 15:31:59,233 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:32:02,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=693360.0, ans=0.125 +2024-09-19 15:32:09,542 INFO [train.py:1198] (1/2) Epoch 39, batch 1400, loss[loss=0.2054, ctc_loss=0.08855, cr_loss=0.2893, attn_decoder_loss=0.2119, over 29604.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1134, cr_loss=0.3538, attn_decoder_loss=0.239, over 5807143.27 frames. ], batch size: 69, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:32:09,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=693400.0, ans=0.2 +2024-09-19 15:32:19,597 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.78 vs. limit=15.0 +2024-09-19 15:32:38,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=693480.0, ans=0.2 +2024-09-19 15:32:44,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=693480.0, ans=0.025 +2024-09-19 15:32:48,098 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.37 vs. limit=22.5 +2024-09-19 15:33:02,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=693520.0, ans=0.125 +2024-09-19 15:33:13,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=693560.0, ans=0.0 +2024-09-19 15:33:25,284 INFO [train.py:1198] (1/2) Epoch 39, batch 1450, loss[loss=0.2542, ctc_loss=0.1333, cr_loss=0.3897, attn_decoder_loss=0.259, over 29422.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1134, cr_loss=0.3535, attn_decoder_loss=0.2394, over 5803405.64 frames. ], batch size: 94, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:33:31,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=693600.0, ans=0.125 +2024-09-19 15:33:35,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=693600.0, ans=0.125 +2024-09-19 15:34:11,639 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.483e+01 8.532e+01 9.213e+01 9.668e+01 2.812e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-19 15:34:12,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=693720.0, ans=0.0 +2024-09-19 15:34:13,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.96 vs. limit=15.0 +2024-09-19 15:34:25,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=693760.0, ans=0.025 +2024-09-19 15:34:40,278 INFO [train.py:1198] (1/2) Epoch 39, batch 1500, loss[loss=0.2416, ctc_loss=0.1186, cr_loss=0.3719, attn_decoder_loss=0.247, over 29635.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1136, cr_loss=0.3542, attn_decoder_loss=0.2398, over 5803726.68 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:34:45,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.82 vs. limit=6.0 +2024-09-19 15:34:51,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.29 vs. limit=15.0 +2024-09-19 15:35:21,751 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:35:30,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=693920.0, ans=0.0 +2024-09-19 15:35:35,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=693920.0, ans=0.125 +2024-09-19 15:35:36,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=693920.0, ans=0.1 +2024-09-19 15:35:41,835 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.65 vs. limit=15.0 +2024-09-19 15:35:45,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.53 vs. limit=22.5 +2024-09-19 15:35:48,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.94 vs. limit=22.5 +2024-09-19 15:35:54,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.14 vs. limit=8.0 +2024-09-19 15:35:57,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=693960.0, ans=0.125 +2024-09-19 15:36:00,509 INFO [train.py:1198] (1/2) Epoch 39, batch 1550, loss[loss=0.2472, ctc_loss=0.1252, cr_loss=0.3796, attn_decoder_loss=0.2524, over 29473.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1138, cr_loss=0.3545, attn_decoder_loss=0.2398, over 5780471.69 frames. ], batch size: 90, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:36:09,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=694000.0, ans=0.2 +2024-09-19 15:36:19,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.16 vs. limit=15.0 +2024-09-19 15:36:22,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.79 vs. limit=22.5 +2024-09-19 15:36:23,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=694040.0, ans=0.125 +2024-09-19 15:36:44,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=694120.0, ans=0.1 +2024-09-19 15:36:47,233 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.376e+01 8.823e+01 9.525e+01 1.389e+02, threshold=1.765e+02, percent-clipped=0.0 +2024-09-19 15:36:56,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=694120.0, ans=0.2 +2024-09-19 15:37:03,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 15:37:04,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=694160.0, ans=0.0 +2024-09-19 15:37:14,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=694200.0, ans=0.125 +2024-09-19 15:37:14,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=694200.0, ans=0.0 +2024-09-19 15:37:16,080 INFO [train.py:1198] (1/2) Epoch 39, batch 1600, loss[loss=0.2409, ctc_loss=0.121, cr_loss=0.372, attn_decoder_loss=0.246, over 29681.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1138, cr_loss=0.3543, attn_decoder_loss=0.2396, over 5764066.68 frames. ], batch size: 85, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:37:17,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=694200.0, ans=0.0 +2024-09-19 15:37:19,694 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.53 vs. limit=12.0 +2024-09-19 15:37:25,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=694200.0, ans=0.125 +2024-09-19 15:38:01,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=694320.0, ans=0.2 +2024-09-19 15:38:01,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=694320.0, ans=0.1 +2024-09-19 15:38:07,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=694320.0, ans=0.0 +2024-09-19 15:38:22,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=694360.0, ans=0.025 +2024-09-19 15:38:28,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=694360.0, ans=0.125 +2024-09-19 15:38:30,559 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.99 vs. limit=6.0 +2024-09-19 15:38:31,551 INFO [train.py:1198] (1/2) Epoch 39, batch 1650, loss[loss=0.2503, ctc_loss=0.1142, cr_loss=0.3665, attn_decoder_loss=0.2573, over 29718.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1136, cr_loss=0.3539, attn_decoder_loss=0.2395, over 5760057.53 frames. ], batch size: 89, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:38:43,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.43 vs. limit=6.0 +2024-09-19 15:38:44,627 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=694400.0, ans=0.0 +2024-09-19 15:38:47,625 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:38:59,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=694440.0, ans=0.125 +2024-09-19 15:39:07,905 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.75 vs. limit=12.0 +2024-09-19 15:39:13,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.86 vs. limit=15.0 +2024-09-19 15:39:17,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=694480.0, ans=0.0 +2024-09-19 15:39:22,828 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.471e+01 8.887e+01 9.578e+01 2.740e+02, threshold=1.777e+02, percent-clipped=2.0 +2024-09-19 15:39:26,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=694520.0, ans=0.2 +2024-09-19 15:39:51,167 INFO [train.py:1198] (1/2) Epoch 39, batch 1700, loss[loss=0.2009, ctc_loss=0.08864, cr_loss=0.2975, attn_decoder_loss=0.2067, over 29552.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1134, cr_loss=0.3536, attn_decoder_loss=0.2395, over 5781507.61 frames. ], batch size: 69, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:39:58,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.82 vs. limit=22.5 +2024-09-19 15:40:44,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=694720.0, ans=10.0 +2024-09-19 15:40:53,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=694760.0, ans=0.1 +2024-09-19 15:41:06,500 INFO [train.py:1198] (1/2) Epoch 39, batch 1750, loss[loss=0.2064, ctc_loss=0.09019, cr_loss=0.3019, attn_decoder_loss=0.2126, over 29355.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.113, cr_loss=0.3526, attn_decoder_loss=0.2388, over 5789972.63 frames. ], batch size: 67, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:41:06,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=694800.0, ans=0.125 +2024-09-19 15:41:07,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-19 15:41:11,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=694800.0, ans=0.1 +2024-09-19 15:41:17,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=694800.0, ans=0.125 +2024-09-19 15:41:21,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=694840.0, ans=0.0 +2024-09-19 15:41:41,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=694880.0, ans=0.125 +2024-09-19 15:41:46,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=694880.0, ans=0.125 +2024-09-19 15:41:53,563 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.741e+01 9.226e+01 9.687e+01 1.772e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-19 15:41:53,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=694920.0, ans=0.0 +2024-09-19 15:41:53,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=694920.0, ans=0.0 +2024-09-19 15:41:55,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=694920.0, ans=0.125 +2024-09-19 15:41:55,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=694920.0, ans=0.1 +2024-09-19 15:41:56,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=694920.0, ans=0.1 +2024-09-19 15:41:57,521 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.30 vs. limit=10.0 +2024-09-19 15:42:01,641 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.76 vs. limit=15.0 +2024-09-19 15:42:10,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.19 vs. limit=22.5 +2024-09-19 15:42:16,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=694960.0, ans=0.2 +2024-09-19 15:42:22,081 INFO [train.py:1198] (1/2) Epoch 39, batch 1800, loss[loss=0.2422, ctc_loss=0.1282, cr_loss=0.386, attn_decoder_loss=0.2463, over 29672.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3531, attn_decoder_loss=0.2391, over 5792050.46 frames. ], batch size: 83, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:42:32,184 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.97 vs. limit=15.0 +2024-09-19 15:42:32,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=695000.0, ans=0.125 +2024-09-19 15:42:41,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=695040.0, ans=0.125 +2024-09-19 15:42:48,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=695040.0, ans=0.125 +2024-09-19 15:42:56,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=695080.0, ans=0.0 +2024-09-19 15:42:57,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=695080.0, ans=0.2 +2024-09-19 15:43:00,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=695080.0, ans=0.0 +2024-09-19 15:43:35,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=695160.0, ans=0.1 +2024-09-19 15:43:41,997 INFO [train.py:1198] (1/2) Epoch 39, batch 1850, loss[loss=0.2403, ctc_loss=0.1073, cr_loss=0.3307, attn_decoder_loss=0.2478, over 29628.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3532, attn_decoder_loss=0.2391, over 5797491.23 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:43:51,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=695200.0, ans=0.125 +2024-09-19 15:43:57,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=695240.0, ans=0.125 +2024-09-19 15:44:01,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=695240.0, ans=0.0 +2024-09-19 15:44:20,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=695280.0, ans=0.125 +2024-09-19 15:44:21,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=695280.0, ans=0.125 +2024-09-19 15:44:28,629 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.257e+01 8.566e+01 9.030e+01 9.513e+01 1.502e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-19 15:44:41,299 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.41 vs. limit=15.0 +2024-09-19 15:44:46,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.63 vs. limit=15.0 +2024-09-19 15:44:56,815 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.21 vs. limit=22.5 +2024-09-19 15:44:57,274 INFO [train.py:1198] (1/2) Epoch 39, batch 1900, loss[loss=0.2345, ctc_loss=0.1052, cr_loss=0.3386, attn_decoder_loss=0.2413, over 29713.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1135, cr_loss=0.3537, attn_decoder_loss=0.2395, over 5804986.63 frames. ], batch size: 89, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:44:58,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.78 vs. limit=12.0 +2024-09-19 15:45:05,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=695400.0, ans=0.125 +2024-09-19 15:45:19,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=695440.0, ans=0.0 +2024-09-19 15:45:28,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=695480.0, ans=0.0 +2024-09-19 15:45:43,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=695520.0, ans=0.0 +2024-09-19 15:46:13,429 INFO [train.py:1198] (1/2) Epoch 39, batch 1950, loss[loss=0.2323, ctc_loss=0.1056, cr_loss=0.3294, attn_decoder_loss=0.239, over 29460.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1145, cr_loss=0.3566, attn_decoder_loss=0.2409, over 5820183.65 frames. ], batch size: 78, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:46:24,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=695600.0, ans=0.025 +2024-09-19 15:46:26,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.29 vs. limit=22.5 +2024-09-19 15:46:32,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=695640.0, ans=0.0 +2024-09-19 15:46:43,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=695640.0, ans=0.1 +2024-09-19 15:46:45,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=695680.0, ans=0.125 +2024-09-19 15:47:04,138 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.858e+01 9.313e+01 9.741e+01 2.178e+02, threshold=1.863e+02, percent-clipped=1.0 +2024-09-19 15:47:05,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=695720.0, ans=0.0 +2024-09-19 15:47:25,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=695760.0, ans=0.0 +2024-09-19 15:47:25,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=695760.0, ans=0.2 +2024-09-19 15:47:32,836 INFO [train.py:1198] (1/2) Epoch 39, batch 2000, loss[loss=0.2055, ctc_loss=0.0959, cr_loss=0.3087, attn_decoder_loss=0.2108, over 29335.00 frames. ], tot_loss[loss=0.2358, ctc_loss=0.1151, cr_loss=0.357, attn_decoder_loss=0.2412, over 5797738.69 frames. ], batch size: 67, lr: 2.83e-03, grad_scale: 32.0 +2024-09-19 15:47:48,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=695840.0, ans=0.125 +2024-09-19 15:47:49,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.47 vs. limit=15.0 +2024-09-19 15:47:50,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=695840.0, ans=0.1 +2024-09-19 15:48:44,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=695960.0, ans=0.0 +2024-09-19 15:48:48,812 INFO [train.py:1198] (1/2) Epoch 39, batch 2050, loss[loss=0.2122, ctc_loss=0.0968, cr_loss=0.3178, attn_decoder_loss=0.218, over 29434.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1141, cr_loss=0.3548, attn_decoder_loss=0.2399, over 5789172.79 frames. ], batch size: 70, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:48:52,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=696000.0, ans=0.025 +2024-09-19 15:49:29,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=696080.0, ans=0.0 +2024-09-19 15:49:32,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=696120.0, ans=0.1 +2024-09-19 15:49:37,159 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.009e+01 8.542e+01 8.929e+01 9.648e+01 1.386e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-19 15:49:52,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=696160.0, ans=0.125 +2024-09-19 15:50:02,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.01 vs. limit=12.0 +2024-09-19 15:50:04,426 INFO [train.py:1198] (1/2) Epoch 39, batch 2100, loss[loss=0.2414, ctc_loss=0.1165, cr_loss=0.3603, attn_decoder_loss=0.2472, over 29755.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1138, cr_loss=0.3544, attn_decoder_loss=0.2396, over 5801458.05 frames. ], batch size: 81, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:50:36,023 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.38 vs. limit=10.0 +2024-09-19 15:50:39,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-19 15:50:40,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=696280.0, ans=0.0 +2024-09-19 15:50:44,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=696280.0, ans=0.035 +2024-09-19 15:50:44,474 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=696280.0, ans=0.0 +2024-09-19 15:50:45,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=696280.0, ans=0.0 +2024-09-19 15:50:50,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=696320.0, ans=0.07 +2024-09-19 15:50:51,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.17 vs. limit=10.0 +2024-09-19 15:50:59,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=696320.0, ans=0.125 +2024-09-19 15:51:05,121 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.15 vs. limit=6.0 +2024-09-19 15:51:06,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=696320.0, ans=0.125 +2024-09-19 15:51:23,877 INFO [train.py:1198] (1/2) Epoch 39, batch 2150, loss[loss=0.228, ctc_loss=0.1091, cr_loss=0.3456, attn_decoder_loss=0.2336, over 29445.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.113, cr_loss=0.3526, attn_decoder_loss=0.2389, over 5816008.36 frames. ], batch size: 78, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:51:27,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=696400.0, ans=0.0 +2024-09-19 15:51:28,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=696400.0, ans=0.025 +2024-09-19 15:51:55,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=696480.0, ans=0.025 +2024-09-19 15:52:10,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=696520.0, ans=0.125 +2024-09-19 15:52:12,124 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.553e+01 8.545e+01 9.048e+01 9.484e+01 1.799e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 15:52:17,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_ff3.min_abs, batch_count=696520.0, ans=0.2 +2024-09-19 15:52:32,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=696560.0, ans=0.2 +2024-09-19 15:52:34,233 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.44 vs. limit=15.0 +2024-09-19 15:52:36,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=696560.0, ans=0.2 +2024-09-19 15:52:39,538 INFO [train.py:1198] (1/2) Epoch 39, batch 2200, loss[loss=0.2401, ctc_loss=0.1098, cr_loss=0.3403, attn_decoder_loss=0.247, over 29617.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1135, cr_loss=0.3535, attn_decoder_loss=0.2392, over 5812251.86 frames. ], batch size: 86, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:52:39,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=696600.0, ans=0.125 +2024-09-19 15:52:54,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=696640.0, ans=0.125 +2024-09-19 15:52:55,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.83 vs. limit=22.5 +2024-09-19 15:52:57,060 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-19 15:53:43,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.60 vs. limit=22.5 +2024-09-19 15:53:54,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=12.0 +2024-09-19 15:53:55,432 INFO [train.py:1198] (1/2) Epoch 39, batch 2250, loss[loss=0.2494, ctc_loss=0.1181, cr_loss=0.3814, attn_decoder_loss=0.2555, over 29710.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.113, cr_loss=0.353, attn_decoder_loss=0.2389, over 5811904.62 frames. ], batch size: 82, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:54:06,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=696800.0, ans=0.2 +2024-09-19 15:54:25,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=696840.0, ans=0.07 +2024-09-19 15:54:28,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.11 vs. limit=15.0 +2024-09-19 15:54:45,789 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.531e+01 8.499e+01 9.039e+01 9.530e+01 1.426e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 15:55:04,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=696960.0, ans=0.125 +2024-09-19 15:55:06,922 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=11.77 vs. limit=15.0 +2024-09-19 15:55:15,228 INFO [train.py:1198] (1/2) Epoch 39, batch 2300, loss[loss=0.209, ctc_loss=0.09698, cr_loss=0.3205, attn_decoder_loss=0.2143, over 29337.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1124, cr_loss=0.3512, attn_decoder_loss=0.238, over 5798315.21 frames. ], batch size: 71, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:55:33,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=697040.0, ans=0.025 +2024-09-19 15:55:34,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=697040.0, ans=0.125 +2024-09-19 15:55:50,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=4.97 vs. limit=15.0 +2024-09-19 15:56:08,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.63 vs. limit=15.0 +2024-09-19 15:56:23,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=697160.0, ans=0.125 +2024-09-19 15:56:30,680 INFO [train.py:1198] (1/2) Epoch 39, batch 2350, loss[loss=0.2414, ctc_loss=0.1183, cr_loss=0.3801, attn_decoder_loss=0.2466, over 29698.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1129, cr_loss=0.3521, attn_decoder_loss=0.2386, over 5802985.09 frames. ], batch size: 83, lr: 2.83e-03, grad_scale: 8.0 +2024-09-19 15:56:47,577 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:56:56,421 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:56:56,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=697240.0, ans=0.09899494936611666 +2024-09-19 15:57:07,056 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:57:16,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=697320.0, ans=0.025 +2024-09-19 15:57:20,254 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.672e+01 9.121e+01 9.858e+01 6.738e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-19 15:57:30,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=697360.0, ans=0.015 +2024-09-19 15:57:46,029 INFO [train.py:1198] (1/2) Epoch 39, batch 2400, loss[loss=0.2194, ctc_loss=0.0994, cr_loss=0.3317, attn_decoder_loss=0.2254, over 29543.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1136, cr_loss=0.3536, attn_decoder_loss=0.2392, over 5807307.53 frames. ], batch size: 76, lr: 2.83e-03, grad_scale: 16.0 +2024-09-19 15:57:46,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=697400.0, ans=0.0 +2024-09-19 15:58:14,353 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:58:20,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.05 vs. limit=15.0 +2024-09-19 15:58:24,826 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 15:58:26,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=697480.0, ans=0.125 +2024-09-19 15:58:27,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=697480.0, ans=0.05 +2024-09-19 15:58:36,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=697520.0, ans=0.125 +2024-09-19 15:58:40,855 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.23 vs. limit=22.5 +2024-09-19 15:58:44,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=697520.0, ans=0.0 +2024-09-19 15:59:01,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=697560.0, ans=0.125 +2024-09-19 15:59:06,339 INFO [train.py:1198] (1/2) Epoch 39, batch 2450, loss[loss=0.2403, ctc_loss=0.1142, cr_loss=0.3588, attn_decoder_loss=0.2463, over 29683.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1141, cr_loss=0.3555, attn_decoder_loss=0.24, over 5783734.85 frames. ], batch size: 82, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 15:59:24,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=697640.0, ans=0.2 +2024-09-19 15:59:29,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.75 vs. limit=10.0 +2024-09-19 15:59:29,611 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.24 vs. limit=15.0 +2024-09-19 15:59:55,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.313e+01 8.647e+01 9.273e+01 9.890e+01 2.382e+02, threshold=1.855e+02, percent-clipped=2.0 +2024-09-19 16:00:16,372 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.30 vs. limit=15.0 +2024-09-19 16:00:21,282 INFO [train.py:1198] (1/2) Epoch 39, batch 2500, loss[loss=0.2568, ctc_loss=0.1302, cr_loss=0.3879, attn_decoder_loss=0.2623, over 29620.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1139, cr_loss=0.3552, attn_decoder_loss=0.2399, over 5794497.71 frames. ], batch size: 86, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:00:24,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.12 vs. limit=15.0 +2024-09-19 16:00:26,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=697800.0, ans=0.125 +2024-09-19 16:00:39,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=697840.0, ans=0.2 +2024-09-19 16:00:47,729 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-19 16:00:50,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=15.0 +2024-09-19 16:01:06,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.30 vs. limit=6.0 +2024-09-19 16:01:11,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=697920.0, ans=0.0 +2024-09-19 16:01:17,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=697920.0, ans=0.125 +2024-09-19 16:01:19,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=697920.0, ans=0.125 +2024-09-19 16:01:37,121 INFO [train.py:1198] (1/2) Epoch 39, batch 2550, loss[loss=0.1988, ctc_loss=0.08845, cr_loss=0.3036, attn_decoder_loss=0.2043, over 29353.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1133, cr_loss=0.3538, attn_decoder_loss=0.2394, over 5797464.60 frames. ], batch size: 67, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:02:01,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=698040.0, ans=0.1 +2024-09-19 16:02:05,759 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.10 vs. limit=15.0 +2024-09-19 16:02:06,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=698040.0, ans=0.2 +2024-09-19 16:02:28,818 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.383e+01 8.876e+01 9.415e+01 4.021e+02, threshold=1.775e+02, percent-clipped=1.0 +2024-09-19 16:02:56,977 INFO [train.py:1198] (1/2) Epoch 39, batch 2600, loss[loss=0.2257, ctc_loss=0.1105, cr_loss=0.3472, attn_decoder_loss=0.2308, over 29461.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1138, cr_loss=0.3545, attn_decoder_loss=0.24, over 5793140.24 frames. ], batch size: 78, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:02:59,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-19 16:03:13,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.19 vs. limit=15.0 +2024-09-19 16:03:30,897 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=10.18 vs. limit=15.0 +2024-09-19 16:03:56,852 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.22 vs. limit=15.0 +2024-09-19 16:04:02,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.60 vs. limit=15.0 +2024-09-19 16:04:12,645 INFO [train.py:1198] (1/2) Epoch 39, batch 2650, loss[loss=0.2482, ctc_loss=0.1291, cr_loss=0.3954, attn_decoder_loss=0.2527, over 29238.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1142, cr_loss=0.3555, attn_decoder_loss=0.2404, over 5800944.17 frames. ], batch size: 100, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:05:02,269 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.675e+01 8.983e+01 9.685e+01 2.002e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 16:05:17,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer_ff3.min_abs, batch_count=698560.0, ans=0.2 +2024-09-19 16:05:26,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=698600.0, ans=0.2 +2024-09-19 16:05:27,840 INFO [train.py:1198] (1/2) Epoch 39, batch 2700, loss[loss=0.2404, ctc_loss=0.1089, cr_loss=0.331, attn_decoder_loss=0.2476, over 29512.00 frames. ], tot_loss[loss=0.235, ctc_loss=0.1142, cr_loss=0.3557, attn_decoder_loss=0.2406, over 5796201.14 frames. ], batch size: 87, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:05:28,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=698600.0, ans=0.125 +2024-09-19 16:05:28,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=698600.0, ans=0.125 +2024-09-19 16:05:32,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=698600.0, ans=0.125 +2024-09-19 16:05:49,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=698640.0, ans=0.125 +2024-09-19 16:06:03,332 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=698680.0, ans=0.125 +2024-09-19 16:06:06,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=698680.0, ans=0.125 +2024-09-19 16:06:37,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.96 vs. limit=15.0 +2024-09-19 16:06:45,671 INFO [train.py:1198] (1/2) Epoch 39, batch 2750, loss[loss=0.231, ctc_loss=0.1145, cr_loss=0.367, attn_decoder_loss=0.2358, over 29503.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1133, cr_loss=0.3532, attn_decoder_loss=0.2391, over 5794568.35 frames. ], batch size: 75, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:06:45,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=698800.0, ans=0.125 +2024-09-19 16:06:48,085 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.51 vs. limit=6.0 +2024-09-19 16:07:00,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=698800.0, ans=0.125 +2024-09-19 16:07:04,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=698840.0, ans=0.125 +2024-09-19 16:07:09,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.02 vs. limit=15.0 +2024-09-19 16:07:20,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.00 vs. limit=15.0 +2024-09-19 16:07:38,785 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.635e+01 8.394e+01 9.092e+01 9.647e+01 2.225e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 16:08:03,256 INFO [train.py:1198] (1/2) Epoch 39, batch 2800, loss[loss=0.2511, ctc_loss=0.1354, cr_loss=0.3685, attn_decoder_loss=0.2558, over 20333.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1137, cr_loss=0.3542, attn_decoder_loss=0.2394, over 5774757.65 frames. ], batch size: 210, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:08:24,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=699040.0, ans=0.125 +2024-09-19 16:08:53,685 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-19 16:09:04,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.16 vs. limit=15.0 +2024-09-19 16:09:18,776 INFO [train.py:1198] (1/2) Epoch 39, batch 2850, loss[loss=0.2289, ctc_loss=0.1202, cr_loss=0.3768, attn_decoder_loss=0.2326, over 29499.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1138, cr_loss=0.3545, attn_decoder_loss=0.2397, over 5760549.15 frames. ], batch size: 77, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:09:30,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=699200.0, ans=0.125 +2024-09-19 16:09:46,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=699240.0, ans=0.025 +2024-09-19 16:09:58,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=699280.0, ans=0.125 +2024-09-19 16:10:09,389 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:10:10,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=699320.0, ans=0.125 +2024-09-19 16:10:13,569 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.429e+01 8.628e+01 9.119e+01 9.691e+01 3.191e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-19 16:10:32,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=699360.0, ans=0.0 +2024-09-19 16:10:35,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=699400.0, ans=0.0 +2024-09-19 16:10:36,337 INFO [train.py:1198] (1/2) Epoch 39, batch 2900, loss[loss=0.2294, ctc_loss=0.1069, cr_loss=0.3506, attn_decoder_loss=0.2353, over 29424.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1147, cr_loss=0.3566, attn_decoder_loss=0.2409, over 5786803.73 frames. ], batch size: 79, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:10:58,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=699440.0, ans=0.125 +2024-09-19 16:11:28,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=699520.0, ans=0.0 +2024-09-19 16:11:43,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=699560.0, ans=0.025 +2024-09-19 16:11:53,779 INFO [train.py:1198] (1/2) Epoch 39, batch 2950, loss[loss=0.2306, ctc_loss=0.1184, cr_loss=0.3758, attn_decoder_loss=0.2348, over 29528.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1135, cr_loss=0.3539, attn_decoder_loss=0.2394, over 5782045.11 frames. ], batch size: 75, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:11:55,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=699600.0, ans=0.125 +2024-09-19 16:12:24,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=699680.0, ans=0.125 +2024-09-19 16:12:36,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=699680.0, ans=0.1 +2024-09-19 16:12:46,697 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.658e+01 9.205e+01 9.936e+01 3.321e+02, threshold=1.841e+02, percent-clipped=1.0 +2024-09-19 16:12:56,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=699760.0, ans=0.0 +2024-09-19 16:12:57,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=699760.0, ans=0.125 +2024-09-19 16:13:00,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=699760.0, ans=0.0 +2024-09-19 16:13:09,520 INFO [train.py:1198] (1/2) Epoch 39, batch 3000, loss[loss=0.2342, ctc_loss=0.1158, cr_loss=0.3638, attn_decoder_loss=0.2392, over 29752.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1129, cr_loss=0.3525, attn_decoder_loss=0.239, over 5784048.78 frames. ], batch size: 81, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:13:09,520 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 16:13:16,250 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.9441, 5.3603, 5.7330, 5.8899], device='cuda:1') +2024-09-19 16:13:28,815 INFO [train.py:1230] (1/2) Epoch 39, validation: loss=0.2123, ctc_loss=0.03671, cr_loss=6.289e-15, attn_decoder_loss=0.2318, over 944034.00 frames. +2024-09-19 16:13:28,815 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 16:13:43,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=699840.0, ans=0.125 +2024-09-19 16:14:31,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=699960.0, ans=0.125 +2024-09-19 16:14:44,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.11 vs. limit=15.0 +2024-09-19 16:14:46,912 INFO [train.py:1198] (1/2) Epoch 39, batch 3050, loss[loss=0.2229, ctc_loss=0.09988, cr_loss=0.3242, attn_decoder_loss=0.2294, over 29529.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1134, cr_loss=0.3533, attn_decoder_loss=0.2396, over 5777083.02 frames. ], batch size: 76, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:14:50,419 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:15:09,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=700040.0, ans=0.1 +2024-09-19 16:15:39,617 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.938e+01 8.454e+01 9.058e+01 9.630e+01 1.961e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 16:16:02,117 INFO [train.py:1198] (1/2) Epoch 39, batch 3100, loss[loss=0.2453, ctc_loss=0.1143, cr_loss=0.3696, attn_decoder_loss=0.2517, over 29220.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1131, cr_loss=0.3528, attn_decoder_loss=0.2393, over 5777074.55 frames. ], batch size: 100, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:16:02,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=700200.0, ans=0.04949747468305833 +2024-09-19 16:16:05,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=700200.0, ans=0.125 +2024-09-19 16:16:21,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=700240.0, ans=0.125 +2024-09-19 16:16:35,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=700280.0, ans=0.0 +2024-09-19 16:16:42,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=700280.0, ans=0.0 +2024-09-19 16:17:13,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=700360.0, ans=0.1 +2024-09-19 16:17:13,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=700360.0, ans=0.09899494936611666 +2024-09-19 16:17:19,629 INFO [train.py:1198] (1/2) Epoch 39, batch 3150, loss[loss=0.2431, ctc_loss=0.1237, cr_loss=0.3702, attn_decoder_loss=0.2481, over 28908.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1137, cr_loss=0.354, attn_decoder_loss=0.2395, over 5783023.93 frames. ], batch size: 104, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:17:38,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=700440.0, ans=0.0 +2024-09-19 16:17:53,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=700480.0, ans=0.125 +2024-09-19 16:18:12,226 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.334e+01 8.732e+01 9.135e+01 9.638e+01 1.512e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 16:18:26,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=700560.0, ans=0.125 +2024-09-19 16:18:26,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=700560.0, ans=0.1 +2024-09-19 16:18:32,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=700560.0, ans=0.1 +2024-09-19 16:18:34,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.39 vs. limit=10.0 +2024-09-19 16:18:36,811 INFO [train.py:1198] (1/2) Epoch 39, batch 3200, loss[loss=0.2266, ctc_loss=0.1059, cr_loss=0.3457, attn_decoder_loss=0.2323, over 29430.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1134, cr_loss=0.3531, attn_decoder_loss=0.2392, over 5793537.46 frames. ], batch size: 79, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:18:47,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=700600.0, ans=0.5 +2024-09-19 16:18:54,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.53 vs. limit=22.5 +2024-09-19 16:19:07,685 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:19:13,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=700680.0, ans=0.125 +2024-09-19 16:19:53,012 INFO [train.py:1198] (1/2) Epoch 39, batch 3250, loss[loss=0.2428, ctc_loss=0.1208, cr_loss=0.3737, attn_decoder_loss=0.248, over 29715.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1137, cr_loss=0.3543, attn_decoder_loss=0.2397, over 5799840.01 frames. ], batch size: 84, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:19:56,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.82 vs. limit=22.5 +2024-09-19 16:19:57,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=700800.0, ans=0.125 +2024-09-19 16:20:25,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=700880.0, ans=0.1 +2024-09-19 16:20:28,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=700880.0, ans=0.1 +2024-09-19 16:20:28,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=700880.0, ans=0.125 +2024-09-19 16:20:34,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=700880.0, ans=0.125 +2024-09-19 16:20:46,556 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.633e+01 8.604e+01 9.197e+01 9.698e+01 1.830e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 16:20:55,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=700960.0, ans=0.1 +2024-09-19 16:20:58,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=700960.0, ans=0.0 +2024-09-19 16:21:09,875 INFO [train.py:1198] (1/2) Epoch 39, batch 3300, loss[loss=0.239, ctc_loss=0.1117, cr_loss=0.3536, attn_decoder_loss=0.2453, over 28354.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1132, cr_loss=0.3532, attn_decoder_loss=0.2389, over 5796249.38 frames. ], batch size: 111, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:21:52,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=701080.0, ans=0.125 +2024-09-19 16:21:58,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=701120.0, ans=0.125 +2024-09-19 16:22:01,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=701120.0, ans=0.125 +2024-09-19 16:22:19,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=701160.0, ans=0.125 +2024-09-19 16:22:27,048 INFO [train.py:1198] (1/2) Epoch 39, batch 3350, loss[loss=0.2381, ctc_loss=0.1094, cr_loss=0.3463, attn_decoder_loss=0.2447, over 28828.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1139, cr_loss=0.3545, attn_decoder_loss=0.2397, over 5772877.34 frames. ], batch size: 104, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:22:41,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.17 vs. limit=15.0 +2024-09-19 16:22:51,664 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:22:54,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=701240.0, ans=0.125 +2024-09-19 16:23:21,140 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.630e+01 9.121e+01 9.700e+01 6.720e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 16:23:36,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=701360.0, ans=0.1 +2024-09-19 16:23:36,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=701360.0, ans=0.125 +2024-09-19 16:23:42,603 INFO [train.py:1198] (1/2) Epoch 39, batch 3400, loss[loss=0.1969, ctc_loss=0.08936, cr_loss=0.2975, attn_decoder_loss=0.2022, over 29352.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1142, cr_loss=0.3553, attn_decoder_loss=0.2398, over 5767222.58 frames. ], batch size: 67, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:24:00,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=701440.0, ans=0.125 +2024-09-19 16:24:03,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=701440.0, ans=0.0 +2024-09-19 16:24:07,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.13 vs. limit=6.0 +2024-09-19 16:24:19,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=701480.0, ans=0.0 +2024-09-19 16:24:19,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.39 vs. limit=15.0 +2024-09-19 16:24:43,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=701560.0, ans=0.125 +2024-09-19 16:24:46,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=701560.0, ans=0.0 +2024-09-19 16:24:46,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=701560.0, ans=0.125 +2024-09-19 16:25:00,307 INFO [train.py:1198] (1/2) Epoch 39, batch 3450, loss[loss=0.2415, ctc_loss=0.1143, cr_loss=0.3515, attn_decoder_loss=0.2478, over 28307.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.114, cr_loss=0.3549, attn_decoder_loss=0.2398, over 5774792.97 frames. ], batch size: 111, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:25:08,858 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-19 16:25:51,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=701720.0, ans=0.0 +2024-09-19 16:25:54,476 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.388e+01 8.636e+01 9.201e+01 9.668e+01 2.196e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 16:25:58,275 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.64 vs. limit=12.0 +2024-09-19 16:26:00,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=701760.0, ans=0.125 +2024-09-19 16:26:17,576 INFO [train.py:1198] (1/2) Epoch 39, batch 3500, loss[loss=0.2024, ctc_loss=0.08832, cr_loss=0.2847, attn_decoder_loss=0.2087, over 29337.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.3544, attn_decoder_loss=0.2394, over 5776439.66 frames. ], batch size: 71, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:26:22,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=701800.0, ans=0.125 +2024-09-19 16:26:36,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=701840.0, ans=0.2 +2024-09-19 16:26:49,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=701880.0, ans=0.125 +2024-09-19 16:26:49,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=701880.0, ans=0.125 +2024-09-19 16:27:24,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.67 vs. limit=22.5 +2024-09-19 16:27:31,755 INFO [train.py:1198] (1/2) Epoch 39, batch 3550, loss[loss=0.2422, ctc_loss=0.1154, cr_loss=0.3681, attn_decoder_loss=0.2481, over 29722.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1139, cr_loss=0.3546, attn_decoder_loss=0.2395, over 5783376.57 frames. ], batch size: 89, lr: 2.82e-03, grad_scale: 8.0 +2024-09-19 16:27:48,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=702040.0, ans=10.0 +2024-09-19 16:27:55,590 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:27:56,293 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.40 vs. limit=6.0 +2024-09-19 16:27:56,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=702040.0, ans=0.0 +2024-09-19 16:28:10,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=702080.0, ans=0.125 +2024-09-19 16:28:24,646 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.449e+01 9.039e+01 9.569e+01 2.236e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 16:28:24,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=702120.0, ans=0.0 +2024-09-19 16:28:24,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=702120.0, ans=0.1 +2024-09-19 16:28:27,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=702120.0, ans=0.125 +2024-09-19 16:28:45,267 INFO [train.py:1198] (1/2) Epoch 39, batch 3600, loss[loss=0.2304, ctc_loss=0.108, cr_loss=0.3297, attn_decoder_loss=0.2366, over 29496.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.114, cr_loss=0.3544, attn_decoder_loss=0.2397, over 5791925.81 frames. ], batch size: 77, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:29:05,239 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:29:21,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=702280.0, ans=0.125 +2024-09-19 16:29:30,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.68 vs. limit=15.0 +2024-09-19 16:29:50,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.80 vs. limit=6.0 +2024-09-19 16:29:52,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=702360.0, ans=0.025 +2024-09-19 16:30:01,925 INFO [train.py:1198] (1/2) Epoch 39, batch 3650, loss[loss=0.2439, ctc_loss=0.1256, cr_loss=0.3776, attn_decoder_loss=0.2487, over 29501.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1134, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5793000.93 frames. ], batch size: 90, lr: 2.82e-03, grad_scale: 16.0 +2024-09-19 16:30:02,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.33 vs. limit=15.0 +2024-09-19 16:30:28,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=702440.0, ans=0.0 +2024-09-19 16:30:33,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=702480.0, ans=0.0 +2024-09-19 16:30:48,515 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:30:53,512 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.54 vs. limit=15.0 +2024-09-19 16:30:55,336 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.559e+01 9.136e+01 9.465e+01 1.942e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 16:31:04,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=702560.0, ans=0.0 +2024-09-19 16:31:13,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=702560.0, ans=0.125 +2024-09-19 16:31:16,132 INFO [train.py:1198] (1/2) Epoch 39, batch 3700, loss[loss=0.2457, ctc_loss=0.127, cr_loss=0.3957, attn_decoder_loss=0.2501, over 29714.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1131, cr_loss=0.3525, attn_decoder_loss=0.239, over 5803048.56 frames. ], batch size: 84, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:31:32,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=702640.0, ans=0.1 +2024-09-19 16:31:34,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.24 vs. limit=10.0 +2024-09-19 16:31:37,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=702640.0, ans=0.2 +2024-09-19 16:31:56,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=702680.0, ans=0.125 +2024-09-19 16:32:03,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=702720.0, ans=0.125 +2024-09-19 16:32:08,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.53 vs. limit=5.0 +2024-09-19 16:32:13,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=702760.0, ans=0.0 +2024-09-19 16:32:19,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=702760.0, ans=0.125 +2024-09-19 16:32:20,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.88 vs. limit=12.0 +2024-09-19 16:32:23,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=12.0 +2024-09-19 16:32:31,622 INFO [train.py:1198] (1/2) Epoch 39, batch 3750, loss[loss=0.2093, ctc_loss=0.1016, cr_loss=0.347, attn_decoder_loss=0.2135, over 29340.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1132, cr_loss=0.3528, attn_decoder_loss=0.2388, over 5807512.50 frames. ], batch size: 67, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:32:36,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=702800.0, ans=0.0 +2024-09-19 16:32:52,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=702840.0, ans=0.125 +2024-09-19 16:33:06,450 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.57 vs. limit=15.0 +2024-09-19 16:33:19,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.72 vs. limit=15.0 +2024-09-19 16:33:23,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.25 vs. limit=15.0 +2024-09-19 16:33:26,584 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.464e+01 8.930e+01 9.588e+01 2.704e+02, threshold=1.786e+02, percent-clipped=2.0 +2024-09-19 16:33:38,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=702960.0, ans=0.125 +2024-09-19 16:33:38,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=702960.0, ans=0.1 +2024-09-19 16:33:45,847 INFO [train.py:1198] (1/2) Epoch 39, batch 3800, loss[loss=0.2338, ctc_loss=0.1079, cr_loss=0.3466, attn_decoder_loss=0.2401, over 29627.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1133, cr_loss=0.3525, attn_decoder_loss=0.2386, over 5798360.21 frames. ], batch size: 86, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:33:56,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=703000.0, ans=0.2 +2024-09-19 16:34:06,204 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.63 vs. limit=12.0 +2024-09-19 16:34:17,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=703080.0, ans=0.0 +2024-09-19 16:34:36,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.44 vs. limit=12.0 +2024-09-19 16:34:57,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=703160.0, ans=0.0 +2024-09-19 16:35:00,348 INFO [train.py:1198] (1/2) Epoch 39, batch 3850, loss[loss=0.2464, ctc_loss=0.1249, cr_loss=0.3723, attn_decoder_loss=0.2516, over 29235.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1129, cr_loss=0.3525, attn_decoder_loss=0.2384, over 5812949.82 frames. ], batch size: 100, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:35:18,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=703240.0, ans=0.0 +2024-09-19 16:35:24,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=703240.0, ans=0.125 +2024-09-19 16:35:33,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.13 vs. limit=6.0 +2024-09-19 16:35:56,712 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.648e+01 9.079e+01 9.833e+01 2.007e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-19 16:36:15,978 INFO [train.py:1198] (1/2) Epoch 39, batch 3900, loss[loss=0.2441, ctc_loss=0.1203, cr_loss=0.3717, attn_decoder_loss=0.2496, over 29622.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1131, cr_loss=0.3527, attn_decoder_loss=0.2388, over 5816832.52 frames. ], batch size: 86, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:36:32,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=703440.0, ans=0.0 +2024-09-19 16:36:38,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=703440.0, ans=0.025 +2024-09-19 16:36:50,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=703480.0, ans=0.0 +2024-09-19 16:37:07,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=703520.0, ans=0.04949747468305833 +2024-09-19 16:37:09,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=703520.0, ans=0.125 +2024-09-19 16:37:29,843 INFO [train.py:1198] (1/2) Epoch 39, batch 3950, loss[loss=0.2554, ctc_loss=0.1324, cr_loss=0.4045, attn_decoder_loss=0.2601, over 29458.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.113, cr_loss=0.3526, attn_decoder_loss=0.2388, over 5835848.30 frames. ], batch size: 97, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:37:35,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=703600.0, ans=0.125 +2024-09-19 16:37:37,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=703600.0, ans=0.0 +2024-09-19 16:37:55,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=703640.0, ans=0.0 +2024-09-19 16:38:06,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.93 vs. limit=12.0 +2024-09-19 16:38:11,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=703680.0, ans=0.035 +2024-09-19 16:38:25,886 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.999e+01 8.633e+01 9.078e+01 9.598e+01 1.411e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 16:38:29,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=703760.0, ans=0.125 +2024-09-19 16:38:36,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=703760.0, ans=0.0 +2024-09-19 16:38:42,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=703760.0, ans=0.2 +2024-09-19 16:38:44,845 INFO [train.py:1198] (1/2) Epoch 39, batch 4000, loss[loss=0.2248, ctc_loss=0.1085, cr_loss=0.3441, attn_decoder_loss=0.2301, over 29497.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1133, cr_loss=0.3529, attn_decoder_loss=0.239, over 5812337.42 frames. ], batch size: 74, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:38:48,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.87 vs. limit=15.0 +2024-09-19 16:39:30,933 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:39:36,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=703920.0, ans=10.0 +2024-09-19 16:39:44,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=703960.0, ans=0.125 +2024-09-19 16:39:53,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=703960.0, ans=0.125 +2024-09-19 16:40:06,232 INFO [train.py:1198] (1/2) Epoch 39, batch 4050, loss[loss=0.2515, ctc_loss=0.1411, cr_loss=0.3843, attn_decoder_loss=0.2552, over 20326.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1136, cr_loss=0.3532, attn_decoder_loss=0.239, over 5796603.51 frames. ], batch size: 210, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:40:15,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.54 vs. limit=15.0 +2024-09-19 16:40:16,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=704000.0, ans=0.125 +2024-09-19 16:40:18,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=704000.0, ans=0.1 +2024-09-19 16:40:28,133 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:40:29,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=704040.0, ans=0.125 +2024-09-19 16:40:47,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=704080.0, ans=0.125 +2024-09-19 16:40:47,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=704080.0, ans=0.125 +2024-09-19 16:41:01,438 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.594e+01 8.633e+01 9.112e+01 9.845e+01 1.931e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 16:41:01,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=704120.0, ans=0.0 +2024-09-19 16:41:05,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.58 vs. limit=5.0 +2024-09-19 16:41:08,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.28 vs. limit=15.0 +2024-09-19 16:41:20,592 INFO [train.py:1198] (1/2) Epoch 39, batch 4100, loss[loss=0.2435, ctc_loss=0.1237, cr_loss=0.3597, attn_decoder_loss=0.2489, over 29517.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.114, cr_loss=0.3541, attn_decoder_loss=0.2394, over 5791371.30 frames. ], batch size: 90, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:41:30,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=704200.0, ans=0.125 +2024-09-19 16:41:31,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=704200.0, ans=0.0 +2024-09-19 16:41:44,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=704240.0, ans=0.125 +2024-09-19 16:42:07,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=704320.0, ans=0.125 +2024-09-19 16:42:29,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=704360.0, ans=0.125 +2024-09-19 16:42:35,215 INFO [train.py:1198] (1/2) Epoch 39, batch 4150, loss[loss=0.2313, ctc_loss=0.115, cr_loss=0.3457, attn_decoder_loss=0.2365, over 29484.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1138, cr_loss=0.3539, attn_decoder_loss=0.2393, over 5797243.27 frames. ], batch size: 77, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:42:53,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=704440.0, ans=0.2 +2024-09-19 16:43:01,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=704440.0, ans=0.125 +2024-09-19 16:43:04,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=704480.0, ans=0.1 +2024-09-19 16:43:22,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=704520.0, ans=0.2 +2024-09-19 16:43:29,356 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.310e+01 8.507e+01 9.056e+01 9.500e+01 2.477e+02, threshold=1.811e+02, percent-clipped=1.0 +2024-09-19 16:43:31,252 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:43:32,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=704560.0, ans=0.09899494936611666 +2024-09-19 16:43:42,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=704560.0, ans=0.05 +2024-09-19 16:43:48,513 INFO [train.py:1198] (1/2) Epoch 39, batch 4200, loss[loss=0.2599, ctc_loss=0.134, cr_loss=0.4147, attn_decoder_loss=0.2647, over 29501.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1137, cr_loss=0.3536, attn_decoder_loss=0.2395, over 5799543.84 frames. ], batch size: 90, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:43:59,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.94 vs. limit=12.0 +2024-09-19 16:44:04,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.61 vs. limit=22.5 +2024-09-19 16:44:09,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=704640.0, ans=0.125 +2024-09-19 16:44:20,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=704680.0, ans=0.0 +2024-09-19 16:44:35,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=704720.0, ans=0.125 +2024-09-19 16:44:47,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=704760.0, ans=0.125 +2024-09-19 16:45:03,371 INFO [train.py:1198] (1/2) Epoch 39, batch 4250, loss[loss=0.2214, ctc_loss=0.1007, cr_loss=0.3284, attn_decoder_loss=0.2275, over 29505.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1132, cr_loss=0.3528, attn_decoder_loss=0.2393, over 5805570.47 frames. ], batch size: 74, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:45:06,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=704800.0, ans=0.025 +2024-09-19 16:45:16,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=704840.0, ans=0.125 +2024-09-19 16:45:28,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=704840.0, ans=0.0 +2024-09-19 16:45:38,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=704880.0, ans=0.125 +2024-09-19 16:45:53,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_na.min_abs, batch_count=704920.0, ans=0.02 +2024-09-19 16:45:55,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.78 vs. limit=12.0 +2024-09-19 16:45:56,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=704920.0, ans=0.09899494936611666 +2024-09-19 16:45:57,876 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.522e+01 9.039e+01 9.490e+01 2.336e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 16:46:05,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=704960.0, ans=0.2 +2024-09-19 16:46:17,663 INFO [train.py:1198] (1/2) Epoch 39, batch 4300, loss[loss=0.2379, ctc_loss=0.105, cr_loss=0.3422, attn_decoder_loss=0.245, over 29545.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1132, cr_loss=0.3526, attn_decoder_loss=0.2396, over 5795464.63 frames. ], batch size: 87, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:46:31,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=705040.0, ans=0.0 +2024-09-19 16:46:49,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=705080.0, ans=0.1 +2024-09-19 16:46:59,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=705080.0, ans=0.125 +2024-09-19 16:47:16,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.38 vs. limit=15.0 +2024-09-19 16:47:26,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=705160.0, ans=0.1 +2024-09-19 16:47:26,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=705160.0, ans=0.125 +2024-09-19 16:47:32,376 INFO [train.py:1198] (1/2) Epoch 39, batch 4350, loss[loss=0.2437, ctc_loss=0.1205, cr_loss=0.345, attn_decoder_loss=0.2497, over 29438.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1157, cr_loss=0.3582, attn_decoder_loss=0.2427, over 5797539.57 frames. ], batch size: 97, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:48:04,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=705280.0, ans=0.125 +2024-09-19 16:48:06,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=705280.0, ans=0.125 +2024-09-19 16:48:17,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=705320.0, ans=0.125 +2024-09-19 16:48:27,651 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.048e+01 8.862e+01 9.354e+01 9.777e+01 1.379e+02, threshold=1.871e+02, percent-clipped=0.0 +2024-09-19 16:48:41,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=705360.0, ans=0.2 +2024-09-19 16:48:45,016 INFO [train.py:1198] (1/2) Epoch 39, batch 4400, loss[loss=0.2451, ctc_loss=0.1222, cr_loss=0.3787, attn_decoder_loss=0.2504, over 27281.00 frames. ], tot_loss[loss=0.239, ctc_loss=0.1168, cr_loss=0.3603, attn_decoder_loss=0.2446, over 5766311.55 frames. ], batch size: 124, lr: 2.81e-03, grad_scale: 16.0 +2024-09-19 16:49:20,059 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.32 vs. limit=15.0 +2024-09-19 16:49:31,959 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=5.18 vs. limit=12.0 +2024-09-19 16:49:34,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=705520.0, ans=0.125 +2024-09-19 16:49:38,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=705520.0, ans=0.025 +2024-09-19 16:49:54,448 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.31 vs. limit=15.0 +2024-09-19 16:50:00,204 INFO [train.py:1198] (1/2) Epoch 39, batch 4450, loss[loss=0.2559, ctc_loss=0.1444, cr_loss=0.3996, attn_decoder_loss=0.2594, over 19523.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1203, cr_loss=0.3658, attn_decoder_loss=0.2466, over 5571792.21 frames. ], batch size: 209, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:50:20,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=705640.0, ans=0.0 +2024-09-19 16:50:24,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=705640.0, ans=0.125 +2024-09-19 16:50:25,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.50 vs. limit=22.5 +2024-09-19 16:50:30,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=705680.0, ans=0.0 +2024-09-19 16:50:42,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.39 vs. limit=15.0 +2024-09-19 16:50:45,784 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=705720.0, ans=0.125 +2024-09-19 16:50:47,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=705720.0, ans=0.0 +2024-09-19 16:50:48,736 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:50:58,886 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.301e+01 9.418e+01 1.029e+02 1.185e+02 3.823e+02, threshold=2.058e+02, percent-clipped=1.0 +2024-09-19 16:51:15,116 INFO [train.py:1198] (1/2) Epoch 39, batch 4500, loss[loss=0.2541, ctc_loss=0.1384, cr_loss=0.3787, attn_decoder_loss=0.2586, over 20059.00 frames. ], tot_loss[loss=0.2434, ctc_loss=0.1234, cr_loss=0.3684, attn_decoder_loss=0.2486, over 5233921.96 frames. ], batch size: 209, lr: 2.81e-03, grad_scale: 8.0 +2024-09-19 16:51:24,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=705800.0, ans=0.0 +2024-09-19 16:51:39,257 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 16:52:42,177 INFO [train.py:1198] (1/2) Epoch 40, batch 0, loss[loss=0.2158, ctc_loss=0.09235, cr_loss=0.3223, attn_decoder_loss=0.2223, over 29618.00 frames. ], tot_loss[loss=0.2158, ctc_loss=0.09235, cr_loss=0.3223, attn_decoder_loss=0.2223, over 29618.00 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 16:52:42,177 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 16:53:00,472 INFO [train.py:1230] (1/2) Epoch 40, validation: loss=0.2128, ctc_loss=0.03605, cr_loss=6.84e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-19 16:53:00,473 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 16:53:27,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=705940.0, ans=0.2 +2024-09-19 16:53:35,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=705980.0, ans=0.1 +2024-09-19 16:53:38,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=705980.0, ans=0.07 +2024-09-19 16:53:51,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=706020.0, ans=0.2 +2024-09-19 16:53:56,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=706020.0, ans=0.1 +2024-09-19 16:53:57,342 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.75 vs. limit=15.0 +2024-09-19 16:54:12,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=706060.0, ans=0.0 +2024-09-19 16:54:17,771 INFO [train.py:1198] (1/2) Epoch 40, batch 50, loss[loss=0.2125, ctc_loss=0.102, cr_loss=0.3246, attn_decoder_loss=0.2175, over 29429.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1153, cr_loss=0.3574, attn_decoder_loss=0.2409, over 1267482.11 frames. ], batch size: 70, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:54:24,957 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.52 vs. limit=10.0 +2024-09-19 16:54:42,491 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.552e+01 8.881e+01 9.876e+01 1.118e+02 1.337e+02, threshold=1.975e+02, percent-clipped=0.0 +2024-09-19 16:55:04,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=706220.0, ans=0.2 +2024-09-19 16:55:06,578 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=14.21 vs. limit=22.5 +2024-09-19 16:55:07,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.18 vs. limit=15.0 +2024-09-19 16:55:16,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=706220.0, ans=0.2 +2024-09-19 16:55:23,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=706260.0, ans=0.125 +2024-09-19 16:55:26,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=706260.0, ans=0.125 +2024-09-19 16:55:32,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=706260.0, ans=0.95 +2024-09-19 16:55:35,520 INFO [train.py:1198] (1/2) Epoch 40, batch 100, loss[loss=0.2258, ctc_loss=0.1131, cr_loss=0.3485, attn_decoder_loss=0.2306, over 29543.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1166, cr_loss=0.3609, attn_decoder_loss=0.2425, over 2252855.82 frames. ], batch size: 76, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:55:56,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=706340.0, ans=0.125 +2024-09-19 16:55:59,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=706340.0, ans=0.125 +2024-09-19 16:55:59,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=706340.0, ans=0.1 +2024-09-19 16:56:22,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=706420.0, ans=0.0 +2024-09-19 16:56:25,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=706420.0, ans=0.125 +2024-09-19 16:56:28,398 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=706420.0, ans=0.125 +2024-09-19 16:56:30,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.83 vs. limit=22.5 +2024-09-19 16:56:34,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=706460.0, ans=0.035 +2024-09-19 16:56:37,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=706460.0, ans=0.0 +2024-09-19 16:56:40,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=11.67 vs. limit=15.0 +2024-09-19 16:56:50,194 INFO [train.py:1198] (1/2) Epoch 40, batch 150, loss[loss=0.2078, ctc_loss=0.09043, cr_loss=0.3031, attn_decoder_loss=0.2141, over 29457.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1143, cr_loss=0.356, attn_decoder_loss=0.2404, over 3047184.18 frames. ], batch size: 70, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:57:02,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=706500.0, ans=0.125 +2024-09-19 16:57:11,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=706540.0, ans=0.125 +2024-09-19 16:57:12,855 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.418e+01 8.727e+01 9.012e+01 9.533e+01 1.739e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 16:57:35,976 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.69 vs. limit=15.0 +2024-09-19 16:57:36,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=706620.0, ans=0.025 +2024-09-19 16:57:44,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=706620.0, ans=0.0 +2024-09-19 16:57:55,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=706660.0, ans=0.1 +2024-09-19 16:58:05,147 INFO [train.py:1198] (1/2) Epoch 40, batch 200, loss[loss=0.2465, ctc_loss=0.1245, cr_loss=0.3847, attn_decoder_loss=0.2515, over 27156.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1134, cr_loss=0.3547, attn_decoder_loss=0.2394, over 3658886.09 frames. ], batch size: 124, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:58:05,401 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=706700.0, ans=0.2 +2024-09-19 16:58:08,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=706700.0, ans=0.0 +2024-09-19 16:58:23,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=706740.0, ans=0.2 +2024-09-19 16:58:45,697 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.77 vs. limit=15.0 +2024-09-19 16:59:04,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=706820.0, ans=0.0 +2024-09-19 16:59:13,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=706860.0, ans=0.2 +2024-09-19 16:59:21,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=706860.0, ans=0.0 +2024-09-19 16:59:25,387 INFO [train.py:1198] (1/2) Epoch 40, batch 250, loss[loss=0.2434, ctc_loss=0.1168, cr_loss=0.3588, attn_decoder_loss=0.2495, over 29261.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1129, cr_loss=0.3534, attn_decoder_loss=0.2391, over 4141166.67 frames. ], batch size: 100, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 16:59:29,359 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.28 vs. limit=10.0 +2024-09-19 16:59:31,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=706900.0, ans=0.2 +2024-09-19 16:59:40,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=706940.0, ans=0.1 +2024-09-19 16:59:47,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.197e+01 8.510e+01 9.023e+01 9.427e+01 1.559e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 16:59:52,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=706940.0, ans=0.0 +2024-09-19 17:00:03,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=706980.0, ans=0.0 +2024-09-19 17:00:18,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=707020.0, ans=0.125 +2024-09-19 17:00:40,604 INFO [train.py:1198] (1/2) Epoch 40, batch 300, loss[loss=0.2499, ctc_loss=0.1211, cr_loss=0.3833, attn_decoder_loss=0.2557, over 29540.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1126, cr_loss=0.3523, attn_decoder_loss=0.2386, over 4508875.05 frames. ], batch size: 92, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:00:58,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.44 vs. limit=15.0 +2024-09-19 17:00:59,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=707140.0, ans=0.5 +2024-09-19 17:01:15,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=707180.0, ans=0.0 +2024-09-19 17:01:17,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=707180.0, ans=0.0 +2024-09-19 17:01:25,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=707220.0, ans=0.2 +2024-09-19 17:01:28,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=707220.0, ans=0.2 +2024-09-19 17:01:38,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=707220.0, ans=0.125 +2024-09-19 17:01:41,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=707260.0, ans=0.125 +2024-09-19 17:01:55,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=707300.0, ans=0.0 +2024-09-19 17:01:56,854 INFO [train.py:1198] (1/2) Epoch 40, batch 350, loss[loss=0.2093, ctc_loss=0.09134, cr_loss=0.3015, attn_decoder_loss=0.2157, over 29351.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1131, cr_loss=0.3529, attn_decoder_loss=0.2391, over 4794398.47 frames. ], batch size: 71, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:02:21,765 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.315e+01 8.445e+01 8.881e+01 9.307e+01 1.282e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 17:02:26,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=707340.0, ans=0.1 +2024-09-19 17:02:43,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=707420.0, ans=0.2 +2024-09-19 17:02:50,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=707420.0, ans=0.125 +2024-09-19 17:02:56,035 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.35 vs. limit=15.0 +2024-09-19 17:02:56,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=707420.0, ans=0.125 +2024-09-19 17:02:58,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=707460.0, ans=0.125 +2024-09-19 17:03:04,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=707460.0, ans=0.2 +2024-09-19 17:03:13,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=707500.0, ans=0.125 +2024-09-19 17:03:14,404 INFO [train.py:1198] (1/2) Epoch 40, batch 400, loss[loss=0.2523, ctc_loss=0.1315, cr_loss=0.3981, attn_decoder_loss=0.2569, over 29689.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.113, cr_loss=0.3528, attn_decoder_loss=0.239, over 5023608.94 frames. ], batch size: 82, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:03:48,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=707580.0, ans=0.125 +2024-09-19 17:03:56,329 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.38 vs. limit=10.0 +2024-09-19 17:03:58,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=707620.0, ans=0.1 +2024-09-19 17:04:04,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=707620.0, ans=0.025 +2024-09-19 17:04:30,150 INFO [train.py:1198] (1/2) Epoch 40, batch 450, loss[loss=0.2469, ctc_loss=0.1245, cr_loss=0.3762, attn_decoder_loss=0.2521, over 29699.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1131, cr_loss=0.3527, attn_decoder_loss=0.2391, over 5186687.82 frames. ], batch size: 83, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:04:52,851 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.308e+01 8.522e+01 8.945e+01 9.353e+01 2.975e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-19 17:05:02,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=707780.0, ans=0.125 +2024-09-19 17:05:03,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=707780.0, ans=0.1 +2024-09-19 17:05:45,818 INFO [train.py:1198] (1/2) Epoch 40, batch 500, loss[loss=0.2568, ctc_loss=0.1301, cr_loss=0.3898, attn_decoder_loss=0.2622, over 29408.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1123, cr_loss=0.3515, attn_decoder_loss=0.2383, over 5330296.94 frames. ], batch size: 94, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:06:03,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=707940.0, ans=0.125 +2024-09-19 17:06:14,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=707940.0, ans=0.125 +2024-09-19 17:06:16,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=707940.0, ans=0.025 +2024-09-19 17:06:47,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=708020.0, ans=0.125 +2024-09-19 17:07:06,550 INFO [train.py:1198] (1/2) Epoch 40, batch 550, loss[loss=0.2486, ctc_loss=0.1222, cr_loss=0.355, attn_decoder_loss=0.2548, over 28813.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1123, cr_loss=0.3514, attn_decoder_loss=0.2384, over 5422224.83 frames. ], batch size: 104, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:07:17,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=708100.0, ans=0.0 +2024-09-19 17:07:22,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=708140.0, ans=0.2 +2024-09-19 17:07:26,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=708140.0, ans=0.1 +2024-09-19 17:07:30,881 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.557e+01 8.930e+01 9.623e+01 2.134e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 17:07:58,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=708220.0, ans=0.125 +2024-09-19 17:08:08,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=708260.0, ans=0.125 +2024-09-19 17:08:11,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=708260.0, ans=0.0 +2024-09-19 17:08:14,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=708260.0, ans=0.0 +2024-09-19 17:08:17,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=708260.0, ans=0.125 +2024-09-19 17:08:22,290 INFO [train.py:1198] (1/2) Epoch 40, batch 600, loss[loss=0.242, ctc_loss=0.1197, cr_loss=0.3726, attn_decoder_loss=0.2474, over 29310.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1124, cr_loss=0.3522, attn_decoder_loss=0.2385, over 5507515.68 frames. ], batch size: 100, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:08:27,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=708300.0, ans=0.07 +2024-09-19 17:08:33,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=708300.0, ans=0.0 +2024-09-19 17:08:41,180 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.91 vs. limit=6.0 +2024-09-19 17:08:58,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=708380.0, ans=0.125 +2024-09-19 17:09:13,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=708420.0, ans=0.125 +2024-09-19 17:09:13,987 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.16 vs. limit=15.0 +2024-09-19 17:09:25,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=708460.0, ans=0.2 +2024-09-19 17:09:37,339 INFO [train.py:1198] (1/2) Epoch 40, batch 650, loss[loss=0.2311, ctc_loss=0.1136, cr_loss=0.3682, attn_decoder_loss=0.236, over 29776.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1119, cr_loss=0.3514, attn_decoder_loss=0.238, over 5585257.19 frames. ], batch size: 81, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:10:03,863 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.123e+01 8.522e+01 8.894e+01 9.367e+01 2.518e+02, threshold=1.779e+02, percent-clipped=2.0 +2024-09-19 17:10:50,708 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.86 vs. limit=12.0 +2024-09-19 17:10:57,354 INFO [train.py:1198] (1/2) Epoch 40, batch 700, loss[loss=0.2137, ctc_loss=0.0987, cr_loss=0.3082, attn_decoder_loss=0.2196, over 29565.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1124, cr_loss=0.3526, attn_decoder_loss=0.2385, over 5635604.78 frames. ], batch size: 76, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:11:05,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=708700.0, ans=0.0 +2024-09-19 17:11:09,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=708700.0, ans=0.2 +2024-09-19 17:11:49,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=708820.0, ans=0.0 +2024-09-19 17:12:07,699 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=708860.0, ans=0.1 +2024-09-19 17:12:13,436 INFO [train.py:1198] (1/2) Epoch 40, batch 750, loss[loss=0.2354, ctc_loss=0.1099, cr_loss=0.3393, attn_decoder_loss=0.2418, over 29710.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1126, cr_loss=0.353, attn_decoder_loss=0.2385, over 5674893.38 frames. ], batch size: 82, lr: 2.77e-03, grad_scale: 8.0 +2024-09-19 17:12:15,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=708900.0, ans=0.0 +2024-09-19 17:12:25,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=708900.0, ans=10.0 +2024-09-19 17:12:28,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=708940.0, ans=0.2 +2024-09-19 17:12:37,368 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.374e+01 9.046e+01 9.655e+01 1.904e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 17:12:59,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=709020.0, ans=0.125 +2024-09-19 17:13:13,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.02 vs. limit=6.0 +2024-09-19 17:13:21,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=709060.0, ans=0.125 +2024-09-19 17:13:23,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=709060.0, ans=0.125 +2024-09-19 17:13:26,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=709060.0, ans=0.125 +2024-09-19 17:13:27,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=709100.0, ans=0.0 +2024-09-19 17:13:28,941 INFO [train.py:1198] (1/2) Epoch 40, batch 800, loss[loss=0.2105, ctc_loss=0.09718, cr_loss=0.313, attn_decoder_loss=0.2162, over 29614.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1124, cr_loss=0.3525, attn_decoder_loss=0.2383, over 5707538.68 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:13:42,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=709140.0, ans=0.2 +2024-09-19 17:14:04,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=709180.0, ans=0.1 +2024-09-19 17:14:22,299 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=709220.0, ans=0.2 +2024-09-19 17:14:29,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=709220.0, ans=0.2 +2024-09-19 17:14:29,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=709220.0, ans=0.0 +2024-09-19 17:14:48,728 INFO [train.py:1198] (1/2) Epoch 40, batch 850, loss[loss=0.2396, ctc_loss=0.1121, cr_loss=0.3581, attn_decoder_loss=0.2458, over 29694.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1121, cr_loss=0.3516, attn_decoder_loss=0.2382, over 5737430.86 frames. ], batch size: 89, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:14:50,847 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.85 vs. limit=22.5 +2024-09-19 17:14:57,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=709300.0, ans=0.125 +2024-09-19 17:15:05,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=709340.0, ans=0.05 +2024-09-19 17:15:12,634 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.471e+01 8.469e+01 8.929e+01 9.566e+01 2.198e+02, threshold=1.786e+02, percent-clipped=1.0 +2024-09-19 17:16:01,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=709460.0, ans=0.125 +2024-09-19 17:16:03,933 INFO [train.py:1198] (1/2) Epoch 40, batch 900, loss[loss=0.208, ctc_loss=0.09007, cr_loss=0.2932, attn_decoder_loss=0.2146, over 29575.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1125, cr_loss=0.3526, attn_decoder_loss=0.2384, over 5741826.40 frames. ], batch size: 73, lr: 2.77e-03, grad_scale: 16.0 +2024-09-19 17:16:53,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-19 17:17:00,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=709620.0, ans=0.0 +2024-09-19 17:17:04,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=709660.0, ans=0.125 +2024-09-19 17:17:19,238 INFO [train.py:1198] (1/2) Epoch 40, batch 950, loss[loss=0.221, ctc_loss=0.09635, cr_loss=0.3217, attn_decoder_loss=0.2278, over 29504.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1125, cr_loss=0.3527, attn_decoder_loss=0.2384, over 5743174.66 frames. ], batch size: 74, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:17:28,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=709700.0, ans=10.0 +2024-09-19 17:17:32,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=709740.0, ans=0.125 +2024-09-19 17:17:45,396 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.576e+01 8.548e+01 9.083e+01 9.830e+01 2.215e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-19 17:18:19,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=709820.0, ans=0.025 +2024-09-19 17:18:25,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=709860.0, ans=0.5 +2024-09-19 17:18:29,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=709860.0, ans=0.0 +2024-09-19 17:18:39,001 INFO [train.py:1198] (1/2) Epoch 40, batch 1000, loss[loss=0.2307, ctc_loss=0.1222, cr_loss=0.3691, attn_decoder_loss=0.2345, over 29495.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1132, cr_loss=0.3541, attn_decoder_loss=0.2392, over 5735692.17 frames. ], batch size: 77, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:18:39,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=709900.0, ans=0.125 +2024-09-19 17:18:49,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=709900.0, ans=0.125 +2024-09-19 17:18:51,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=709900.0, ans=0.125 +2024-09-19 17:18:51,837 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-19 17:19:08,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=709980.0, ans=0.125 +2024-09-19 17:19:20,108 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:19:32,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=710020.0, ans=0.125 +2024-09-19 17:19:40,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=710060.0, ans=0.1 +2024-09-19 17:19:49,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=710060.0, ans=0.125 +2024-09-19 17:19:51,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=710060.0, ans=0.125 +2024-09-19 17:19:51,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=710060.0, ans=0.0 +2024-09-19 17:19:51,978 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.17 vs. limit=15.0 +2024-09-19 17:19:52,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=710100.0, ans=0.125 +2024-09-19 17:19:54,090 INFO [train.py:1198] (1/2) Epoch 40, batch 1050, loss[loss=0.2397, ctc_loss=0.12, cr_loss=0.3826, attn_decoder_loss=0.2445, over 29666.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1131, cr_loss=0.354, attn_decoder_loss=0.2387, over 5743505.14 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:20:15,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=710140.0, ans=0.125 +2024-09-19 17:20:20,063 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.590e+01 9.048e+01 9.519e+01 1.628e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 17:20:21,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=710140.0, ans=0.0 +2024-09-19 17:20:32,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=710180.0, ans=0.0 +2024-09-19 17:21:09,833 INFO [train.py:1198] (1/2) Epoch 40, batch 1100, loss[loss=0.231, ctc_loss=0.1112, cr_loss=0.3427, attn_decoder_loss=0.2367, over 29433.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1127, cr_loss=0.3531, attn_decoder_loss=0.2387, over 5755235.93 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:21:20,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=710300.0, ans=0.1 +2024-09-19 17:21:26,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=710340.0, ans=0.1 +2024-09-19 17:22:10,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=710420.0, ans=0.125 +2024-09-19 17:22:24,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=710460.0, ans=0.0 +2024-09-19 17:22:30,002 INFO [train.py:1198] (1/2) Epoch 40, batch 1150, loss[loss=0.2264, ctc_loss=0.1099, cr_loss=0.3528, attn_decoder_loss=0.2315, over 29426.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1128, cr_loss=0.3527, attn_decoder_loss=0.2386, over 5753605.23 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:22:42,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=710500.0, ans=0.125 +2024-09-19 17:22:51,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=710540.0, ans=0.0 +2024-09-19 17:22:55,728 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.177e+01 8.424e+01 8.898e+01 9.617e+01 1.555e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-19 17:23:17,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=710620.0, ans=0.0 +2024-09-19 17:23:17,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=710620.0, ans=0.125 +2024-09-19 17:23:26,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=710620.0, ans=0.125 +2024-09-19 17:23:27,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=710620.0, ans=0.0 +2024-09-19 17:23:35,105 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:23:37,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.82 vs. limit=10.0 +2024-09-19 17:23:45,222 INFO [train.py:1198] (1/2) Epoch 40, batch 1200, loss[loss=0.2392, ctc_loss=0.1075, cr_loss=0.3473, attn_decoder_loss=0.2461, over 29657.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.113, cr_loss=0.3533, attn_decoder_loss=0.2393, over 5746527.51 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:24:26,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=710780.0, ans=0.0 +2024-09-19 17:24:36,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.07 vs. limit=6.0 +2024-09-19 17:24:42,036 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.41 vs. limit=15.0 +2024-09-19 17:24:42,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten.whitening_limit, batch_count=710820.0, ans=15.0 +2024-09-19 17:24:47,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=710860.0, ans=0.0 +2024-09-19 17:25:01,045 INFO [train.py:1198] (1/2) Epoch 40, batch 1250, loss[loss=0.2471, ctc_loss=0.1201, cr_loss=0.364, attn_decoder_loss=0.2531, over 29511.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1133, cr_loss=0.3544, attn_decoder_loss=0.2398, over 5774400.44 frames. ], batch size: 92, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:25:01,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=710900.0, ans=0.125 +2024-09-19 17:25:07,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=710900.0, ans=0.1 +2024-09-19 17:25:08,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=710900.0, ans=0.0 +2024-09-19 17:25:21,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=710940.0, ans=0.1 +2024-09-19 17:25:29,035 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.896e+01 8.708e+01 9.133e+01 9.581e+01 1.854e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-19 17:25:50,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=711020.0, ans=0.125 +2024-09-19 17:26:13,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=15.0 +2024-09-19 17:26:21,603 INFO [train.py:1198] (1/2) Epoch 40, batch 1300, loss[loss=0.2437, ctc_loss=0.1098, cr_loss=0.3567, attn_decoder_loss=0.2507, over 28294.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1132, cr_loss=0.3544, attn_decoder_loss=0.2392, over 5778883.75 frames. ], batch size: 111, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:26:25,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=711100.0, ans=0.2 +2024-09-19 17:26:28,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten.whitening_limit, batch_count=711100.0, ans=15.0 +2024-09-19 17:26:55,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=711180.0, ans=0.0 +2024-09-19 17:26:57,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=711180.0, ans=0.0 +2024-09-19 17:26:59,299 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-19 17:27:11,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=711220.0, ans=0.2 +2024-09-19 17:27:37,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.77 vs. limit=22.5 +2024-09-19 17:27:38,126 INFO [train.py:1198] (1/2) Epoch 40, batch 1350, loss[loss=0.235, ctc_loss=0.1084, cr_loss=0.3301, attn_decoder_loss=0.2417, over 29761.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1126, cr_loss=0.3529, attn_decoder_loss=0.239, over 5796107.60 frames. ], batch size: 81, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:27:45,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=711300.0, ans=0.125 +2024-09-19 17:27:46,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.96 vs. limit=22.5 +2024-09-19 17:27:51,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=711340.0, ans=0.125 +2024-09-19 17:27:58,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.27 vs. limit=22.5 +2024-09-19 17:28:03,655 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.275e+01 9.002e+01 9.355e+01 2.084e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 17:28:12,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=711380.0, ans=0.0 +2024-09-19 17:28:15,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=711380.0, ans=0.0 +2024-09-19 17:28:26,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=711420.0, ans=0.125 +2024-09-19 17:28:49,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=711460.0, ans=0.125 +2024-09-19 17:28:53,210 INFO [train.py:1198] (1/2) Epoch 40, batch 1400, loss[loss=0.2129, ctc_loss=0.1012, cr_loss=0.3228, attn_decoder_loss=0.2182, over 29575.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1123, cr_loss=0.3522, attn_decoder_loss=0.2386, over 5806872.26 frames. ], batch size: 69, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:28:56,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=711500.0, ans=0.0 +2024-09-19 17:29:02,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=711500.0, ans=0.025 +2024-09-19 17:29:11,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=711540.0, ans=0.125 +2024-09-19 17:29:23,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=711540.0, ans=0.125 +2024-09-19 17:29:27,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=711580.0, ans=0.125 +2024-09-19 17:29:35,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.28 vs. limit=6.0 +2024-09-19 17:29:56,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=711660.0, ans=0.2 +2024-09-19 17:30:04,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=711660.0, ans=0.0 +2024-09-19 17:30:13,142 INFO [train.py:1198] (1/2) Epoch 40, batch 1450, loss[loss=0.23, ctc_loss=0.1054, cr_loss=0.3425, attn_decoder_loss=0.2362, over 29402.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1128, cr_loss=0.3537, attn_decoder_loss=0.2392, over 5803082.13 frames. ], batch size: 94, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:30:38,766 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.480e+01 8.710e+01 9.115e+01 9.620e+01 3.738e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-19 17:30:46,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=711780.0, ans=0.2 +2024-09-19 17:30:49,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=711780.0, ans=0.0 +2024-09-19 17:31:28,344 INFO [train.py:1198] (1/2) Epoch 40, batch 1500, loss[loss=0.2439, ctc_loss=0.1219, cr_loss=0.3496, attn_decoder_loss=0.2497, over 29622.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.113, cr_loss=0.3537, attn_decoder_loss=0.2396, over 5804829.78 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:31:43,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=711940.0, ans=0.125 +2024-09-19 17:31:48,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=711940.0, ans=0.125 +2024-09-19 17:32:00,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=711980.0, ans=0.1 +2024-09-19 17:32:15,014 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-19 17:32:17,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=712020.0, ans=0.125 +2024-09-19 17:32:17,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=712020.0, ans=0.125 +2024-09-19 17:32:28,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=712060.0, ans=0.125 +2024-09-19 17:32:32,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=712060.0, ans=0.125 +2024-09-19 17:32:42,572 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.17 vs. limit=22.5 +2024-09-19 17:32:44,589 INFO [train.py:1198] (1/2) Epoch 40, batch 1550, loss[loss=0.2592, ctc_loss=0.1383, cr_loss=0.413, attn_decoder_loss=0.2634, over 29491.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1134, cr_loss=0.3546, attn_decoder_loss=0.2394, over 5780688.47 frames. ], batch size: 90, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:32:46,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=712100.0, ans=0.025 +2024-09-19 17:32:57,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.02 vs. limit=15.0 +2024-09-19 17:33:08,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=712140.0, ans=0.0 +2024-09-19 17:33:14,041 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.677e+01 9.047e+01 9.758e+01 3.580e+02, threshold=1.809e+02, percent-clipped=1.0 +2024-09-19 17:33:27,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=712180.0, ans=0.125 +2024-09-19 17:33:46,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=712260.0, ans=0.025 +2024-09-19 17:33:55,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=712260.0, ans=0.125 +2024-09-19 17:33:57,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=712260.0, ans=0.0 +2024-09-19 17:34:04,515 INFO [train.py:1198] (1/2) Epoch 40, batch 1600, loss[loss=0.2407, ctc_loss=0.1142, cr_loss=0.3565, attn_decoder_loss=0.2468, over 29679.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1134, cr_loss=0.354, attn_decoder_loss=0.2393, over 5763728.92 frames. ], batch size: 85, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:34:06,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer_ff2.min_abs, batch_count=712300.0, ans=0.1 +2024-09-19 17:34:07,195 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.52 vs. limit=15.0 +2024-09-19 17:34:29,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=712340.0, ans=0.1 +2024-09-19 17:34:49,013 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=712420.0, ans=0.1 +2024-09-19 17:35:06,387 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.51 vs. limit=15.0 +2024-09-19 17:35:13,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=712460.0, ans=0.125 +2024-09-19 17:35:20,309 INFO [train.py:1198] (1/2) Epoch 40, batch 1650, loss[loss=0.2513, ctc_loss=0.1194, cr_loss=0.3703, attn_decoder_loss=0.2577, over 29707.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1128, cr_loss=0.3529, attn_decoder_loss=0.2389, over 5757142.70 frames. ], batch size: 89, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:35:48,737 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.111e+01 8.375e+01 9.140e+01 9.741e+01 3.230e+02, threshold=1.828e+02, percent-clipped=2.0 +2024-09-19 17:35:49,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=712580.0, ans=0.1 +2024-09-19 17:35:55,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=712580.0, ans=0.125 +2024-09-19 17:35:59,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=712580.0, ans=0.1 +2024-09-19 17:36:25,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=712660.0, ans=0.125 +2024-09-19 17:36:35,509 INFO [train.py:1198] (1/2) Epoch 40, batch 1700, loss[loss=0.2103, ctc_loss=0.09984, cr_loss=0.3265, attn_decoder_loss=0.2154, over 29596.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1126, cr_loss=0.3526, attn_decoder_loss=0.2388, over 5778179.56 frames. ], batch size: 69, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:36:43,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=712700.0, ans=0.125 +2024-09-19 17:37:08,616 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.04 vs. limit=15.0 +2024-09-19 17:37:31,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.64 vs. limit=10.0 +2024-09-19 17:37:33,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.88 vs. limit=10.0 +2024-09-19 17:37:53,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=712860.0, ans=0.025 +2024-09-19 17:37:53,531 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.80 vs. limit=12.0 +2024-09-19 17:37:55,711 INFO [train.py:1198] (1/2) Epoch 40, batch 1750, loss[loss=0.206, ctc_loss=0.09212, cr_loss=0.3093, attn_decoder_loss=0.2118, over 29345.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1122, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5785729.38 frames. ], batch size: 67, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:38:00,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=712900.0, ans=0.125 +2024-09-19 17:38:06,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.48 vs. limit=15.0 +2024-09-19 17:38:20,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=712940.0, ans=0.125 +2024-09-19 17:38:24,561 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.489e+01 8.436e+01 8.990e+01 9.570e+01 1.574e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-19 17:38:33,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=712980.0, ans=0.0 +2024-09-19 17:38:35,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=712980.0, ans=0.125 +2024-09-19 17:38:39,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=713020.0, ans=0.125 +2024-09-19 17:38:53,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=13.06 vs. limit=15.0 +2024-09-19 17:38:54,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.28 vs. limit=15.0 +2024-09-19 17:39:10,862 INFO [train.py:1198] (1/2) Epoch 40, batch 1800, loss[loss=0.2516, ctc_loss=0.1264, cr_loss=0.392, attn_decoder_loss=0.2568, over 29690.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1126, cr_loss=0.3529, attn_decoder_loss=0.2387, over 5788913.42 frames. ], batch size: 83, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:39:37,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=713140.0, ans=0.125 +2024-09-19 17:39:41,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=713180.0, ans=0.125 +2024-09-19 17:39:41,949 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.12 vs. limit=22.5 +2024-09-19 17:39:47,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=713180.0, ans=0.0 +2024-09-19 17:39:52,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.04 vs. limit=15.0 +2024-09-19 17:39:54,978 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:40:06,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=713220.0, ans=0.2 +2024-09-19 17:40:06,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=713220.0, ans=0.125 +2024-09-19 17:40:12,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.15 vs. limit=15.0 +2024-09-19 17:40:26,411 INFO [train.py:1198] (1/2) Epoch 40, batch 1850, loss[loss=0.2396, ctc_loss=0.108, cr_loss=0.3351, attn_decoder_loss=0.2468, over 29623.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1124, cr_loss=0.3525, attn_decoder_loss=0.2384, over 5795752.21 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:40:57,146 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.615e+01 9.088e+01 9.758e+01 2.205e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-19 17:41:14,453 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.02 vs. limit=15.0 +2024-09-19 17:41:27,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=713460.0, ans=0.0 +2024-09-19 17:41:43,532 INFO [train.py:1198] (1/2) Epoch 40, batch 1900, loss[loss=0.2461, ctc_loss=0.1228, cr_loss=0.372, attn_decoder_loss=0.2516, over 29692.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1125, cr_loss=0.3526, attn_decoder_loss=0.239, over 5803091.11 frames. ], batch size: 89, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:42:13,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=713540.0, ans=0.125 +2024-09-19 17:42:40,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=713620.0, ans=0.125 +2024-09-19 17:42:42,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=713620.0, ans=0.125 +2024-09-19 17:42:55,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=713660.0, ans=0.125 +2024-09-19 17:43:01,613 INFO [train.py:1198] (1/2) Epoch 40, batch 1950, loss[loss=0.2234, ctc_loss=0.1043, cr_loss=0.3423, attn_decoder_loss=0.2291, over 29443.00 frames. ], tot_loss[loss=0.2344, ctc_loss=0.1131, cr_loss=0.3539, attn_decoder_loss=0.24, over 5818129.96 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 8.0 +2024-09-19 17:43:06,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=713700.0, ans=0.125 +2024-09-19 17:43:18,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=713740.0, ans=0.125 +2024-09-19 17:43:24,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=713740.0, ans=0.125 +2024-09-19 17:43:30,166 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.694e+01 9.094e+01 9.637e+01 1.422e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-19 17:43:33,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=713780.0, ans=0.125 +2024-09-19 17:44:08,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.13 vs. limit=15.0 +2024-09-19 17:44:13,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.16 vs. limit=10.0 +2024-09-19 17:44:16,979 INFO [train.py:1198] (1/2) Epoch 40, batch 2000, loss[loss=0.2117, ctc_loss=0.1009, cr_loss=0.3134, attn_decoder_loss=0.2171, over 29385.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.1133, cr_loss=0.3546, attn_decoder_loss=0.2404, over 5796058.72 frames. ], batch size: 67, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:44:17,421 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:44:48,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=713980.0, ans=0.1 +2024-09-19 17:45:28,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.86 vs. limit=10.0 +2024-09-19 17:45:34,776 INFO [train.py:1198] (1/2) Epoch 40, batch 2050, loss[loss=0.2117, ctc_loss=0.09592, cr_loss=0.3183, attn_decoder_loss=0.2174, over 29445.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1129, cr_loss=0.3537, attn_decoder_loss=0.2396, over 5788358.10 frames. ], batch size: 70, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:45:36,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=714100.0, ans=0.2 +2024-09-19 17:45:49,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=714100.0, ans=0.1 +2024-09-19 17:45:51,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=714140.0, ans=0.1 +2024-09-19 17:45:55,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=714140.0, ans=0.07 +2024-09-19 17:46:05,844 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.514e+01 8.392e+01 8.898e+01 9.558e+01 3.245e+02, threshold=1.780e+02, percent-clipped=2.0 +2024-09-19 17:46:07,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.57 vs. limit=15.0 +2024-09-19 17:46:21,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=714220.0, ans=0.025 +2024-09-19 17:46:24,433 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:46:34,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=714220.0, ans=0.125 +2024-09-19 17:46:52,700 INFO [train.py:1198] (1/2) Epoch 40, batch 2100, loss[loss=0.2304, ctc_loss=0.1045, cr_loss=0.3452, attn_decoder_loss=0.2367, over 29777.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1124, cr_loss=0.3526, attn_decoder_loss=0.239, over 5799740.25 frames. ], batch size: 81, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:46:55,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=714300.0, ans=0.125 +2024-09-19 17:47:04,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=714300.0, ans=0.0 +2024-09-19 17:47:52,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=714460.0, ans=10.0 +2024-09-19 17:48:07,619 INFO [train.py:1198] (1/2) Epoch 40, batch 2150, loss[loss=0.2294, ctc_loss=0.1101, cr_loss=0.3387, attn_decoder_loss=0.2351, over 29465.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1119, cr_loss=0.3516, attn_decoder_loss=0.2385, over 5814879.38 frames. ], batch size: 78, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:48:12,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=714500.0, ans=0.05 +2024-09-19 17:48:17,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=714500.0, ans=0.0 +2024-09-19 17:48:38,287 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.587e+01 9.010e+01 9.804e+01 2.260e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 17:48:38,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.min_positive, batch_count=714580.0, ans=0.025 +2024-09-19 17:48:46,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=714580.0, ans=0.125 +2024-09-19 17:48:55,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=15.72 vs. limit=22.5 +2024-09-19 17:49:05,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=714620.0, ans=0.125 +2024-09-19 17:49:08,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=714660.0, ans=0.125 +2024-09-19 17:49:19,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=714660.0, ans=0.0 +2024-09-19 17:49:24,962 INFO [train.py:1198] (1/2) Epoch 40, batch 2200, loss[loss=0.2406, ctc_loss=0.1118, cr_loss=0.3603, attn_decoder_loss=0.2469, over 29620.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1123, cr_loss=0.3523, attn_decoder_loss=0.2387, over 5811124.49 frames. ], batch size: 86, lr: 2.76e-03, grad_scale: 16.0 +2024-09-19 17:49:26,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=14.19 vs. limit=22.5 +2024-09-19 17:49:51,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=714740.0, ans=0.125 +2024-09-19 17:49:54,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=714740.0, ans=0.04949747468305833 +2024-09-19 17:50:15,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=714820.0, ans=0.125 +2024-09-19 17:50:26,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=714860.0, ans=0.125 +2024-09-19 17:50:42,694 INFO [train.py:1198] (1/2) Epoch 40, batch 2250, loss[loss=0.2396, ctc_loss=0.1131, cr_loss=0.3618, attn_decoder_loss=0.2457, over 29723.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1119, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5810711.74 frames. ], batch size: 82, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:51:12,518 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.430e+01 8.488e+01 9.052e+01 9.511e+01 5.082e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-19 17:51:14,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=714980.0, ans=0.1 +2024-09-19 17:51:21,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.20 vs. limit=15.0 +2024-09-19 17:51:21,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=714980.0, ans=0.0 +2024-09-19 17:51:41,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=715060.0, ans=0.025 +2024-09-19 17:51:57,722 INFO [train.py:1198] (1/2) Epoch 40, batch 2300, loss[loss=0.2098, ctc_loss=0.09598, cr_loss=0.309, attn_decoder_loss=0.2156, over 29367.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1115, cr_loss=0.3504, attn_decoder_loss=0.2373, over 5798173.82 frames. ], batch size: 71, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:52:17,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=715140.0, ans=0.2 +2024-09-19 17:52:45,623 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:53:02,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=715260.0, ans=0.125 +2024-09-19 17:53:15,290 INFO [train.py:1198] (1/2) Epoch 40, batch 2350, loss[loss=0.2362, ctc_loss=0.1118, cr_loss=0.3512, attn_decoder_loss=0.2422, over 29685.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1119, cr_loss=0.351, attn_decoder_loss=0.2377, over 5804318.13 frames. ], batch size: 83, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 17:53:17,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=715300.0, ans=0.125 +2024-09-19 17:53:21,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.21 vs. limit=15.0 +2024-09-19 17:53:44,633 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 17:53:47,273 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.564e+01 8.558e+01 9.025e+01 9.597e+01 1.404e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 17:53:55,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.max_abs, batch_count=715380.0, ans=10.0 +2024-09-19 17:54:01,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=715420.0, ans=0.0 +2024-09-19 17:54:30,661 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.76 vs. limit=6.0 +2024-09-19 17:54:32,767 INFO [train.py:1198] (1/2) Epoch 40, batch 2400, loss[loss=0.2292, ctc_loss=0.1111, cr_loss=0.3557, attn_decoder_loss=0.2344, over 29528.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1126, cr_loss=0.3528, attn_decoder_loss=0.2383, over 5808766.01 frames. ], batch size: 76, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:54:45,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=715500.0, ans=0.0 +2024-09-19 17:54:54,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=715540.0, ans=0.125 +2024-09-19 17:55:12,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=715580.0, ans=0.1 +2024-09-19 17:55:17,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=715620.0, ans=0.0 +2024-09-19 17:55:43,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.77 vs. limit=10.0 +2024-09-19 17:55:48,049 INFO [train.py:1198] (1/2) Epoch 40, batch 2450, loss[loss=0.239, ctc_loss=0.117, cr_loss=0.3561, attn_decoder_loss=0.2446, over 29733.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1132, cr_loss=0.354, attn_decoder_loss=0.2392, over 5786189.02 frames. ], batch size: 82, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:56:01,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=715740.0, ans=0.125 +2024-09-19 17:56:18,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.28 vs. limit=15.0 +2024-09-19 17:56:20,254 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.714e+01 9.274e+01 9.862e+01 1.579e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-19 17:56:35,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=715820.0, ans=0.0 +2024-09-19 17:56:39,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=715820.0, ans=0.125 +2024-09-19 17:57:02,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=715860.0, ans=0.1 +2024-09-19 17:57:05,503 INFO [train.py:1198] (1/2) Epoch 40, batch 2500, loss[loss=0.245, ctc_loss=0.1222, cr_loss=0.3765, attn_decoder_loss=0.2503, over 29625.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.113, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5795996.88 frames. ], batch size: 86, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:57:05,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=715900.0, ans=0.2 +2024-09-19 17:57:10,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=715900.0, ans=0.1 +2024-09-19 17:57:16,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=715900.0, ans=0.0 +2024-09-19 17:57:16,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=715900.0, ans=0.125 +2024-09-19 17:57:35,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=715940.0, ans=0.125 +2024-09-19 17:57:46,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=715980.0, ans=0.125 +2024-09-19 17:58:24,137 INFO [train.py:1198] (1/2) Epoch 40, batch 2550, loss[loss=0.2058, ctc_loss=0.09378, cr_loss=0.3031, attn_decoder_loss=0.2115, over 29313.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1129, cr_loss=0.3534, attn_decoder_loss=0.2391, over 5800076.74 frames. ], batch size: 67, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:58:28,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=716100.0, ans=0.125 +2024-09-19 17:58:45,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=716140.0, ans=0.1 +2024-09-19 17:58:53,983 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.529e+01 8.996e+01 9.557e+01 1.715e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-19 17:59:20,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=716220.0, ans=0.125 +2024-09-19 17:59:26,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=716260.0, ans=0.025 +2024-09-19 17:59:39,657 INFO [train.py:1198] (1/2) Epoch 40, batch 2600, loss[loss=0.2211, ctc_loss=0.09925, cr_loss=0.318, attn_decoder_loss=0.2275, over 29466.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1125, cr_loss=0.3519, attn_decoder_loss=0.2393, over 5795983.51 frames. ], batch size: 78, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 17:59:59,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.26 vs. limit=10.0 +2024-09-19 18:00:08,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=716340.0, ans=0.125 +2024-09-19 18:00:12,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.53 vs. limit=10.0 +2024-09-19 18:00:32,926 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:00:49,702 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.54 vs. limit=15.0 +2024-09-19 18:00:50,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=716460.0, ans=0.0 +2024-09-19 18:00:53,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=716460.0, ans=0.125 +2024-09-19 18:00:56,053 INFO [train.py:1198] (1/2) Epoch 40, batch 2650, loss[loss=0.2565, ctc_loss=0.1349, cr_loss=0.3994, attn_decoder_loss=0.2612, over 29299.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1124, cr_loss=0.3519, attn_decoder_loss=0.2394, over 5802874.28 frames. ], batch size: 100, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:01:02,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=716500.0, ans=0.025 +2024-09-19 18:01:02,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=716500.0, ans=10.0 +2024-09-19 18:01:07,482 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.55 vs. limit=15.0 +2024-09-19 18:01:28,225 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.306e+01 8.493e+01 9.009e+01 9.595e+01 1.150e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-19 18:01:42,354 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:02:00,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=716660.0, ans=0.0 +2024-09-19 18:02:13,718 INFO [train.py:1198] (1/2) Epoch 40, batch 2700, loss[loss=0.2385, ctc_loss=0.1063, cr_loss=0.3446, attn_decoder_loss=0.2455, over 29551.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.113, cr_loss=0.3532, attn_decoder_loss=0.2399, over 5797843.67 frames. ], batch size: 87, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:02:18,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=716700.0, ans=0.025 +2024-09-19 18:02:22,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=716700.0, ans=0.1 +2024-09-19 18:02:25,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=716700.0, ans=0.125 +2024-09-19 18:02:32,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=716740.0, ans=0.2 +2024-09-19 18:02:42,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=716780.0, ans=0.2 +2024-09-19 18:02:56,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=716780.0, ans=0.125 +2024-09-19 18:03:17,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=716860.0, ans=0.125 +2024-09-19 18:03:29,470 INFO [train.py:1198] (1/2) Epoch 40, batch 2750, loss[loss=0.2332, ctc_loss=0.1208, cr_loss=0.3818, attn_decoder_loss=0.2372, over 29527.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1121, cr_loss=0.3516, attn_decoder_loss=0.2386, over 5795522.96 frames. ], batch size: 75, lr: 2.75e-03, grad_scale: 4.0 +2024-09-19 18:03:35,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=716900.0, ans=0.025 +2024-09-19 18:03:40,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=716900.0, ans=0.125 +2024-09-19 18:03:53,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=716940.0, ans=0.125 +2024-09-19 18:03:55,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.43 vs. limit=15.0 +2024-09-19 18:04:04,573 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.418e+01 8.972e+01 9.467e+01 1.420e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 18:04:07,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=716980.0, ans=0.0 +2024-09-19 18:04:09,472 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:04:18,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=717020.0, ans=0.125 +2024-09-19 18:04:20,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=717020.0, ans=0.125 +2024-09-19 18:04:30,789 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:04:46,936 INFO [train.py:1198] (1/2) Epoch 40, batch 2800, loss[loss=0.256, ctc_loss=0.1522, cr_loss=0.4175, attn_decoder_loss=0.2582, over 20082.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1124, cr_loss=0.3522, attn_decoder_loss=0.2387, over 5776185.13 frames. ], batch size: 210, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:04:52,001 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.44 vs. limit=15.0 +2024-09-19 18:04:53,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.66 vs. limit=15.0 +2024-09-19 18:04:58,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=717100.0, ans=15.0 +2024-09-19 18:05:02,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=717140.0, ans=0.125 +2024-09-19 18:05:04,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.15 vs. limit=15.0 +2024-09-19 18:05:12,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=15.0 +2024-09-19 18:05:15,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=717180.0, ans=0.125 +2024-09-19 18:05:22,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=717180.0, ans=0.0 +2024-09-19 18:05:37,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=717220.0, ans=0.125 +2024-09-19 18:05:37,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=717220.0, ans=0.04949747468305833 +2024-09-19 18:05:58,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=717260.0, ans=0.125 +2024-09-19 18:05:59,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=717260.0, ans=0.0 +2024-09-19 18:06:03,969 INFO [train.py:1198] (1/2) Epoch 40, batch 2850, loss[loss=0.2254, ctc_loss=0.1042, cr_loss=0.327, attn_decoder_loss=0.2316, over 29515.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1128, cr_loss=0.3531, attn_decoder_loss=0.2392, over 5761635.72 frames. ], batch size: 77, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:06:33,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=717380.0, ans=0.125 +2024-09-19 18:06:35,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.19 vs. limit=22.5 +2024-09-19 18:06:37,391 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.590e+01 9.012e+01 9.613e+01 1.852e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-19 18:07:15,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=717460.0, ans=0.2 +2024-09-19 18:07:19,945 INFO [train.py:1198] (1/2) Epoch 40, batch 2900, loss[loss=0.2332, ctc_loss=0.1088, cr_loss=0.3517, attn_decoder_loss=0.2393, over 29402.00 frames. ], tot_loss[loss=0.2346, ctc_loss=0.1133, cr_loss=0.3543, attn_decoder_loss=0.2402, over 5787226.19 frames. ], batch size: 79, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:07:21,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=717500.0, ans=0.09899494936611666 +2024-09-19 18:08:06,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=717620.0, ans=0.125 +2024-09-19 18:08:07,879 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:08:26,350 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.69 vs. limit=15.0 +2024-09-19 18:08:37,798 INFO [train.py:1198] (1/2) Epoch 40, batch 2950, loss[loss=0.2207, ctc_loss=0.1026, cr_loss=0.3242, attn_decoder_loss=0.2266, over 29530.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1124, cr_loss=0.3523, attn_decoder_loss=0.239, over 5781977.83 frames. ], batch size: 75, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:08:48,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=717700.0, ans=0.125 +2024-09-19 18:09:01,757 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.55 vs. limit=12.0 +2024-09-19 18:09:11,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.157e+01 8.443e+01 9.079e+01 9.666e+01 1.457e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 18:09:27,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=717820.0, ans=0.125 +2024-09-19 18:09:29,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.90 vs. limit=22.5 +2024-09-19 18:09:36,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=717820.0, ans=0.025 +2024-09-19 18:09:46,106 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.67 vs. limit=10.0 +2024-09-19 18:09:53,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=717860.0, ans=0.125 +2024-09-19 18:09:56,126 INFO [train.py:1198] (1/2) Epoch 40, batch 3000, loss[loss=0.2363, ctc_loss=0.1112, cr_loss=0.3505, attn_decoder_loss=0.2424, over 29772.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1123, cr_loss=0.3519, attn_decoder_loss=0.2388, over 5782654.10 frames. ], batch size: 81, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:09:56,126 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 18:10:14,431 INFO [train.py:1230] (1/2) Epoch 40, validation: loss=0.2122, ctc_loss=0.03685, cr_loss=5.615e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-19 18:10:14,431 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 18:10:24,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=717900.0, ans=0.0 +2024-09-19 18:11:00,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=718020.0, ans=0.1 +2024-09-19 18:11:06,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=718020.0, ans=0.125 +2024-09-19 18:11:09,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=718020.0, ans=0.125 +2024-09-19 18:11:11,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.45 vs. limit=15.0 +2024-09-19 18:11:20,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=718060.0, ans=0.015 +2024-09-19 18:11:31,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=718100.0, ans=0.125 +2024-09-19 18:11:31,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=718100.0, ans=0.125 +2024-09-19 18:11:32,563 INFO [train.py:1198] (1/2) Epoch 40, batch 3050, loss[loss=0.23, ctc_loss=0.1121, cr_loss=0.3614, attn_decoder_loss=0.2351, over 29539.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1132, cr_loss=0.3546, attn_decoder_loss=0.2398, over 5776346.13 frames. ], batch size: 76, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:11:45,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=718100.0, ans=0.1 +2024-09-19 18:11:51,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=718140.0, ans=0.1 +2024-09-19 18:11:54,565 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.06 vs. limit=15.0 +2024-09-19 18:12:04,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=718180.0, ans=0.05 +2024-09-19 18:12:05,626 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.688e+01 8.520e+01 9.084e+01 9.934e+01 1.461e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 18:12:05,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=718180.0, ans=0.5 +2024-09-19 18:12:23,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=718220.0, ans=0.125 +2024-09-19 18:12:25,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=718220.0, ans=0.1 +2024-09-19 18:12:47,574 INFO [train.py:1198] (1/2) Epoch 40, batch 3100, loss[loss=0.2512, ctc_loss=0.1265, cr_loss=0.3975, attn_decoder_loss=0.2562, over 29245.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1131, cr_loss=0.354, attn_decoder_loss=0.2395, over 5776580.67 frames. ], batch size: 100, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:12:59,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=718300.0, ans=0.125 +2024-09-19 18:13:09,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=718340.0, ans=0.125 +2024-09-19 18:13:12,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=718340.0, ans=0.125 +2024-09-19 18:13:14,222 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:13:26,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.48 vs. limit=15.0 +2024-09-19 18:13:30,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.61 vs. limit=15.0 +2024-09-19 18:13:42,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=718420.0, ans=0.1 +2024-09-19 18:13:46,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=718420.0, ans=0.0 +2024-09-19 18:13:53,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=718460.0, ans=0.125 +2024-09-19 18:13:58,700 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:13:59,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.30 vs. limit=15.0 +2024-09-19 18:14:02,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.24 vs. limit=15.0 +2024-09-19 18:14:03,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=718460.0, ans=0.05 +2024-09-19 18:14:04,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=718500.0, ans=0.125 +2024-09-19 18:14:06,030 INFO [train.py:1198] (1/2) Epoch 40, batch 3150, loss[loss=0.2492, ctc_loss=0.1126, cr_loss=0.3395, attn_decoder_loss=0.2568, over 28836.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1128, cr_loss=0.3532, attn_decoder_loss=0.2395, over 5782140.03 frames. ], batch size: 104, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:14:13,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=718500.0, ans=0.0 +2024-09-19 18:14:18,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=718500.0, ans=0.125 +2024-09-19 18:14:24,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=718540.0, ans=0.1 +2024-09-19 18:14:26,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.53 vs. limit=15.0 +2024-09-19 18:14:32,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=718540.0, ans=0.125 +2024-09-19 18:14:39,242 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.506e+01 9.197e+01 9.540e+01 2.562e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 18:14:45,617 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:15:23,520 INFO [train.py:1198] (1/2) Epoch 40, batch 3200, loss[loss=0.2349, ctc_loss=0.1143, cr_loss=0.354, attn_decoder_loss=0.2404, over 29413.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1126, cr_loss=0.353, attn_decoder_loss=0.2388, over 5792054.57 frames. ], batch size: 79, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:15:25,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=718700.0, ans=0.125 +2024-09-19 18:15:28,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=718700.0, ans=0.125 +2024-09-19 18:15:51,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.99 vs. limit=12.0 +2024-09-19 18:15:52,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=718780.0, ans=0.125 +2024-09-19 18:15:52,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=718780.0, ans=0.2 +2024-09-19 18:15:58,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=718780.0, ans=0.125 +2024-09-19 18:16:06,515 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.50 vs. limit=15.0 +2024-09-19 18:16:06,632 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.17 vs. limit=22.5 +2024-09-19 18:16:38,753 INFO [train.py:1198] (1/2) Epoch 40, batch 3250, loss[loss=0.2425, ctc_loss=0.1104, cr_loss=0.3476, attn_decoder_loss=0.2494, over 29718.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1126, cr_loss=0.3534, attn_decoder_loss=0.2393, over 5799163.80 frames. ], batch size: 84, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:17:05,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=718940.0, ans=0.125 +2024-09-19 18:17:09,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=718980.0, ans=0.025 +2024-09-19 18:17:13,652 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.518e+01 9.005e+01 9.479e+01 1.398e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 18:17:35,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.16 vs. limit=6.0 +2024-09-19 18:17:45,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=719060.0, ans=0.125 +2024-09-19 18:17:55,808 INFO [train.py:1198] (1/2) Epoch 40, batch 3300, loss[loss=0.248, ctc_loss=0.113, cr_loss=0.3535, attn_decoder_loss=0.2551, over 28295.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1119, cr_loss=0.3518, attn_decoder_loss=0.2384, over 5795426.09 frames. ], batch size: 111, lr: 2.75e-03, grad_scale: 16.0 +2024-09-19 18:18:02,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=719100.0, ans=0.1 +2024-09-19 18:18:02,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.19 vs. limit=10.0 +2024-09-19 18:18:13,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1.whitening_limit, batch_count=719140.0, ans=10.0 +2024-09-19 18:18:30,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=719180.0, ans=0.125 +2024-09-19 18:18:40,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=719220.0, ans=0.0 +2024-09-19 18:19:05,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.00 vs. limit=15.0 +2024-09-19 18:19:10,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=719260.0, ans=0.0 +2024-09-19 18:19:13,642 INFO [train.py:1198] (1/2) Epoch 40, batch 3350, loss[loss=0.2522, ctc_loss=0.1332, cr_loss=0.4049, attn_decoder_loss=0.2565, over 28914.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.113, cr_loss=0.3536, attn_decoder_loss=0.2392, over 5771024.08 frames. ], batch size: 104, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:19:29,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=719340.0, ans=0.125 +2024-09-19 18:19:35,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.91 vs. limit=15.0 +2024-09-19 18:19:48,481 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.582e+01 9.036e+01 9.650e+01 6.119e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 18:19:57,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=719420.0, ans=0.1 +2024-09-19 18:20:12,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=719460.0, ans=0.0 +2024-09-19 18:20:14,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=719460.0, ans=0.1 +2024-09-19 18:20:15,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=719460.0, ans=0.125 +2024-09-19 18:20:22,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=719460.0, ans=0.0 +2024-09-19 18:20:29,165 INFO [train.py:1198] (1/2) Epoch 40, batch 3400, loss[loss=0.2111, ctc_loss=0.09669, cr_loss=0.3133, attn_decoder_loss=0.2169, over 29360.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.113, cr_loss=0.3535, attn_decoder_loss=0.2391, over 5764362.26 frames. ], batch size: 67, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:20:29,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=719500.0, ans=0.05 +2024-09-19 18:20:49,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=719540.0, ans=0.125 +2024-09-19 18:21:11,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=719580.0, ans=0.07 +2024-09-19 18:21:29,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=719620.0, ans=0.1 +2024-09-19 18:21:30,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=719660.0, ans=0.2 +2024-09-19 18:21:32,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-19 18:21:46,909 INFO [train.py:1198] (1/2) Epoch 40, batch 3450, loss[loss=0.2449, ctc_loss=0.1136, cr_loss=0.3423, attn_decoder_loss=0.2519, over 28557.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.113, cr_loss=0.3538, attn_decoder_loss=0.2395, over 5773889.65 frames. ], batch size: 112, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:21:53,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=719700.0, ans=0.0 +2024-09-19 18:22:09,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=719740.0, ans=0.125 +2024-09-19 18:22:21,307 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.879e+01 8.580e+01 9.014e+01 9.618e+01 1.900e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 18:22:23,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=719780.0, ans=0.125 +2024-09-19 18:22:48,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=719860.0, ans=0.2 +2024-09-19 18:22:51,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=719860.0, ans=0.2 +2024-09-19 18:22:54,105 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=719860.0, ans=0.0 +2024-09-19 18:23:04,427 INFO [train.py:1198] (1/2) Epoch 40, batch 3500, loss[loss=0.2075, ctc_loss=0.09319, cr_loss=0.3108, attn_decoder_loss=0.2133, over 29323.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1129, cr_loss=0.3533, attn_decoder_loss=0.2391, over 5776982.04 frames. ], batch size: 71, lr: 2.75e-03, grad_scale: 8.0 +2024-09-19 18:23:10,809 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:23:16,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=719900.0, ans=0.2 +2024-09-19 18:23:27,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.21 vs. limit=15.0 +2024-09-19 18:23:39,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=719980.0, ans=0.125 +2024-09-19 18:23:51,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=719980.0, ans=0.125 +2024-09-19 18:23:54,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=719980.0, ans=0.125 +2024-09-19 18:24:03,672 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.73 vs. limit=15.0 +2024-09-19 18:24:19,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=720060.0, ans=0.125 +2024-09-19 18:24:23,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=720060.0, ans=0.1 +2024-09-19 18:24:26,506 INFO [train.py:1198] (1/2) Epoch 40, batch 3550, loss[loss=0.2393, ctc_loss=0.1141, cr_loss=0.3583, attn_decoder_loss=0.2453, over 29717.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1129, cr_loss=0.3534, attn_decoder_loss=0.2391, over 5784095.44 frames. ], batch size: 89, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:24:29,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=720100.0, ans=0.125 +2024-09-19 18:24:32,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=720100.0, ans=0.125 +2024-09-19 18:24:39,122 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.92 vs. limit=15.0 +2024-09-19 18:24:44,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=720140.0, ans=0.2 +2024-09-19 18:24:58,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=720180.0, ans=0.0 +2024-09-19 18:24:59,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=720180.0, ans=0.125 +2024-09-19 18:25:00,513 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.309e+01 8.600e+01 9.034e+01 9.634e+01 4.593e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 18:25:11,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=720220.0, ans=0.5 +2024-09-19 18:25:17,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=720220.0, ans=0.0 +2024-09-19 18:25:29,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.94 vs. limit=15.0 +2024-09-19 18:25:31,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=720260.0, ans=0.0 +2024-09-19 18:25:40,397 INFO [train.py:1198] (1/2) Epoch 40, batch 3600, loss[loss=0.2235, ctc_loss=0.1022, cr_loss=0.3207, attn_decoder_loss=0.2299, over 29493.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1131, cr_loss=0.3537, attn_decoder_loss=0.2393, over 5792581.50 frames. ], batch size: 77, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:25:42,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=720300.0, ans=0.125 +2024-09-19 18:25:57,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=720340.0, ans=0.125 +2024-09-19 18:26:10,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=720380.0, ans=0.125 +2024-09-19 18:26:21,332 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:26:23,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.78 vs. limit=22.5 +2024-09-19 18:26:56,388 INFO [train.py:1198] (1/2) Epoch 40, batch 3650, loss[loss=0.2337, ctc_loss=0.1126, cr_loss=0.3413, attn_decoder_loss=0.2395, over 29524.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1126, cr_loss=0.3527, attn_decoder_loss=0.2387, over 5794065.57 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:27:05,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=720500.0, ans=0.125 +2024-09-19 18:27:12,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=720540.0, ans=0.1 +2024-09-19 18:27:30,422 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.153e+01 8.608e+01 9.210e+01 9.736e+01 1.315e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-19 18:27:42,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=720620.0, ans=0.125 +2024-09-19 18:27:44,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=720620.0, ans=0.09899494936611666 +2024-09-19 18:27:50,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.19 vs. limit=15.0 +2024-09-19 18:27:53,985 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.18 vs. limit=15.0 +2024-09-19 18:27:59,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.32 vs. limit=22.5 +2024-09-19 18:28:02,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=720660.0, ans=0.025 +2024-09-19 18:28:10,693 INFO [train.py:1198] (1/2) Epoch 40, batch 3700, loss[loss=0.2463, ctc_loss=0.1171, cr_loss=0.3519, attn_decoder_loss=0.2529, over 29706.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1127, cr_loss=0.3531, attn_decoder_loss=0.2389, over 5803265.55 frames. ], batch size: 84, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:28:28,008 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.29 vs. limit=10.0 +2024-09-19 18:28:46,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=720780.0, ans=0.07 +2024-09-19 18:28:49,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=720780.0, ans=0.1 +2024-09-19 18:28:49,824 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:29:17,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=720860.0, ans=0.1 +2024-09-19 18:29:19,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=720860.0, ans=0.125 +2024-09-19 18:29:26,573 INFO [train.py:1198] (1/2) Epoch 40, batch 3750, loss[loss=0.2042, ctc_loss=0.0897, cr_loss=0.3018, attn_decoder_loss=0.2102, over 29359.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1126, cr_loss=0.3531, attn_decoder_loss=0.2386, over 5806888.21 frames. ], batch size: 67, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:29:40,312 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:29:44,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=720940.0, ans=0.1 +2024-09-19 18:29:51,289 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.77 vs. limit=10.0 +2024-09-19 18:29:57,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=720980.0, ans=0.0 +2024-09-19 18:30:01,966 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 8.539e+01 9.071e+01 9.494e+01 1.651e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 18:30:10,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.52 vs. limit=15.0 +2024-09-19 18:30:15,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=721020.0, ans=0.125 +2024-09-19 18:30:17,316 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=721020.0, ans=0.125 +2024-09-19 18:30:17,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=721020.0, ans=0.95 +2024-09-19 18:30:29,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=721060.0, ans=0.0 +2024-09-19 18:30:40,981 INFO [train.py:1198] (1/2) Epoch 40, batch 3800, loss[loss=0.244, ctc_loss=0.119, cr_loss=0.3715, attn_decoder_loss=0.2496, over 29643.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1123, cr_loss=0.3527, attn_decoder_loss=0.2383, over 5797182.08 frames. ], batch size: 86, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:31:03,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=721140.0, ans=0.2 +2024-09-19 18:31:05,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=721140.0, ans=0.2 +2024-09-19 18:31:09,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=721180.0, ans=0.1 +2024-09-19 18:31:12,838 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.37 vs. limit=10.0 +2024-09-19 18:31:19,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=721180.0, ans=0.125 +2024-09-19 18:31:19,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=721180.0, ans=0.1 +2024-09-19 18:31:27,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=721220.0, ans=0.0 +2024-09-19 18:31:46,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=721260.0, ans=0.125 +2024-09-19 18:31:56,319 INFO [train.py:1198] (1/2) Epoch 40, batch 3850, loss[loss=0.2443, ctc_loss=0.1152, cr_loss=0.3729, attn_decoder_loss=0.2504, over 29235.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2381, over 5811796.06 frames. ], batch size: 100, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:31:59,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=721300.0, ans=0.125 +2024-09-19 18:32:30,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=721380.0, ans=0.0 +2024-09-19 18:32:31,695 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.429e+01 8.857e+01 9.400e+01 1.753e+02, threshold=1.771e+02, percent-clipped=0.0 +2024-09-19 18:32:44,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.21 vs. limit=22.5 +2024-09-19 18:33:10,022 INFO [train.py:1198] (1/2) Epoch 40, batch 3900, loss[loss=0.241, ctc_loss=0.1203, cr_loss=0.3592, attn_decoder_loss=0.2464, over 29604.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1124, cr_loss=0.3528, attn_decoder_loss=0.2388, over 5815789.01 frames. ], batch size: 86, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:33:24,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=721540.0, ans=0.125 +2024-09-19 18:33:26,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=721540.0, ans=0.125 +2024-09-19 18:33:44,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=721580.0, ans=0.0 +2024-09-19 18:33:57,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=721620.0, ans=0.0 +2024-09-19 18:34:25,648 INFO [train.py:1198] (1/2) Epoch 40, batch 3950, loss[loss=0.241, ctc_loss=0.1199, cr_loss=0.3582, attn_decoder_loss=0.2465, over 29514.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.112, cr_loss=0.3518, attn_decoder_loss=0.2387, over 5835121.11 frames. ], batch size: 97, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:34:40,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=721740.0, ans=0.125 +2024-09-19 18:34:48,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=721740.0, ans=0.1 +2024-09-19 18:34:49,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=721740.0, ans=0.1 +2024-09-19 18:35:00,908 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.341e+01 8.605e+01 9.141e+01 9.620e+01 2.736e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-19 18:35:01,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=721780.0, ans=0.09899494936611666 +2024-09-19 18:35:14,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=721820.0, ans=0.125 +2024-09-19 18:35:24,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=721860.0, ans=0.125 +2024-09-19 18:35:32,929 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.53 vs. limit=12.0 +2024-09-19 18:35:39,103 INFO [train.py:1198] (1/2) Epoch 40, batch 4000, loss[loss=0.2286, ctc_loss=0.1056, cr_loss=0.334, attn_decoder_loss=0.2348, over 29492.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1121, cr_loss=0.352, attn_decoder_loss=0.2389, over 5813469.85 frames. ], batch size: 74, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:36:01,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=721940.0, ans=0.2 +2024-09-19 18:36:12,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=721980.0, ans=0.0 +2024-09-19 18:36:29,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=722020.0, ans=0.1 +2024-09-19 18:36:29,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=722020.0, ans=0.125 +2024-09-19 18:36:54,431 INFO [train.py:1198] (1/2) Epoch 40, batch 4050, loss[loss=0.2545, ctc_loss=0.146, cr_loss=0.3909, attn_decoder_loss=0.2579, over 20153.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.112, cr_loss=0.3514, attn_decoder_loss=0.2386, over 5796860.48 frames. ], batch size: 209, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:37:07,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.51 vs. limit=15.0 +2024-09-19 18:37:17,775 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.53 vs. limit=10.0 +2024-09-19 18:37:18,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=722140.0, ans=0.1 +2024-09-19 18:37:19,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=722140.0, ans=0.125 +2024-09-19 18:37:29,859 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.600e+01 8.666e+01 9.149e+01 9.737e+01 4.805e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 18:37:40,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=722220.0, ans=0.0 +2024-09-19 18:37:47,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=722220.0, ans=0.125 +2024-09-19 18:37:57,185 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=7.10 vs. limit=15.0 +2024-09-19 18:38:08,046 INFO [train.py:1198] (1/2) Epoch 40, batch 4100, loss[loss=0.2592, ctc_loss=0.1388, cr_loss=0.4261, attn_decoder_loss=0.2631, over 29458.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1122, cr_loss=0.3512, attn_decoder_loss=0.2387, over 5792077.31 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:38:18,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=722300.0, ans=0.2 +2024-09-19 18:38:28,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=722340.0, ans=0.2 +2024-09-19 18:38:36,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=722380.0, ans=0.2 +2024-09-19 18:38:42,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=722380.0, ans=0.0 +2024-09-19 18:38:50,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.57 vs. limit=15.0 +2024-09-19 18:39:23,039 INFO [train.py:1198] (1/2) Epoch 40, batch 4150, loss[loss=0.2237, ctc_loss=0.1049, cr_loss=0.3496, attn_decoder_loss=0.2291, over 29503.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1122, cr_loss=0.3508, attn_decoder_loss=0.2386, over 5797549.09 frames. ], batch size: 77, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:39:27,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=722500.0, ans=0.0 +2024-09-19 18:39:27,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=722500.0, ans=0.0 +2024-09-19 18:39:57,904 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.961e+01 8.418e+01 8.915e+01 9.615e+01 1.835e+02, threshold=1.783e+02, percent-clipped=1.0 +2024-09-19 18:40:36,033 INFO [train.py:1198] (1/2) Epoch 40, batch 4200, loss[loss=0.2463, ctc_loss=0.1221, cr_loss=0.3756, attn_decoder_loss=0.2517, over 29498.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1125, cr_loss=0.3517, attn_decoder_loss=0.2392, over 5799953.52 frames. ], batch size: 90, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:40:52,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=722740.0, ans=0.1 +2024-09-19 18:40:57,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=722740.0, ans=0.125 +2024-09-19 18:41:08,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=722780.0, ans=0.2 +2024-09-19 18:41:18,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=722780.0, ans=0.2 +2024-09-19 18:41:19,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=722820.0, ans=0.125 +2024-09-19 18:41:30,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=722820.0, ans=0.0 +2024-09-19 18:41:47,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=722860.0, ans=0.125 +2024-09-19 18:41:50,106 INFO [train.py:1198] (1/2) Epoch 40, batch 4250, loss[loss=0.2195, ctc_loss=0.09814, cr_loss=0.324, attn_decoder_loss=0.2258, over 29509.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1123, cr_loss=0.3517, attn_decoder_loss=0.2395, over 5805969.80 frames. ], batch size: 74, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:41:50,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=722900.0, ans=0.125 +2024-09-19 18:41:56,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=722900.0, ans=0.125 +2024-09-19 18:42:12,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=722940.0, ans=0.0 +2024-09-19 18:42:27,446 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.534e+01 9.089e+01 9.722e+01 3.339e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-19 18:42:39,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=723020.0, ans=0.95 +2024-09-19 18:42:42,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=723020.0, ans=0.07 +2024-09-19 18:42:51,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=723060.0, ans=0.125 +2024-09-19 18:43:04,245 INFO [train.py:1198] (1/2) Epoch 40, batch 4300, loss[loss=0.2439, ctc_loss=0.1116, cr_loss=0.347, attn_decoder_loss=0.2509, over 29516.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1123, cr_loss=0.3518, attn_decoder_loss=0.2397, over 5794582.28 frames. ], batch size: 87, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:43:12,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=723100.0, ans=0.0 +2024-09-19 18:43:26,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=723140.0, ans=0.0 +2024-09-19 18:43:28,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=723140.0, ans=0.125 +2024-09-19 18:44:06,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=723260.0, ans=0.2 +2024-09-19 18:44:15,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=723260.0, ans=0.0 +2024-09-19 18:44:19,239 INFO [train.py:1198] (1/2) Epoch 40, batch 4350, loss[loss=0.2454, ctc_loss=0.1229, cr_loss=0.3649, attn_decoder_loss=0.2509, over 29501.00 frames. ], tot_loss[loss=0.2371, ctc_loss=0.1151, cr_loss=0.3575, attn_decoder_loss=0.2427, over 5797601.55 frames. ], batch size: 97, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:44:25,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=723300.0, ans=0.125 +2024-09-19 18:44:55,940 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.669e+01 8.976e+01 9.434e+01 1.012e+02 1.882e+02, threshold=1.887e+02, percent-clipped=1.0 +2024-09-19 18:44:56,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=723380.0, ans=0.95 +2024-09-19 18:45:23,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.95 vs. limit=15.0 +2024-09-19 18:45:30,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=723460.0, ans=0.125 +2024-09-19 18:45:32,930 INFO [train.py:1198] (1/2) Epoch 40, batch 4400, loss[loss=0.2415, ctc_loss=0.1179, cr_loss=0.3737, attn_decoder_loss=0.2469, over 27379.00 frames. ], tot_loss[loss=0.2391, ctc_loss=0.1163, cr_loss=0.3599, attn_decoder_loss=0.2447, over 5767373.78 frames. ], batch size: 124, lr: 2.74e-03, grad_scale: 16.0 +2024-09-19 18:45:42,381 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=4.22 vs. limit=12.0 +2024-09-19 18:45:44,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=723500.0, ans=0.125 +2024-09-19 18:45:55,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=723540.0, ans=0.0 +2024-09-19 18:46:31,712 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 18:46:33,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=723660.0, ans=0.125 +2024-09-19 18:46:38,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=723660.0, ans=0.025 +2024-09-19 18:46:38,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.60 vs. limit=15.0 +2024-09-19 18:46:46,890 INFO [train.py:1198] (1/2) Epoch 40, batch 4450, loss[loss=0.2532, ctc_loss=0.1465, cr_loss=0.3731, attn_decoder_loss=0.2568, over 19816.00 frames. ], tot_loss[loss=0.2413, ctc_loss=0.1199, cr_loss=0.3654, attn_decoder_loss=0.2467, over 5570431.54 frames. ], batch size: 209, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:46:53,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=723700.0, ans=0.125 +2024-09-19 18:46:59,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=723700.0, ans=0.1 +2024-09-19 18:47:14,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=723740.0, ans=0.05 +2024-09-19 18:47:16,620 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=14.52 vs. limit=15.0 +2024-09-19 18:47:26,344 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.232e+01 9.186e+01 1.020e+02 1.192e+02 3.727e+02, threshold=2.040e+02, percent-clipped=2.0 +2024-09-19 18:48:02,473 INFO [train.py:1198] (1/2) Epoch 40, batch 4500, loss[loss=0.2497, ctc_loss=0.1397, cr_loss=0.3564, attn_decoder_loss=0.254, over 20483.00 frames. ], tot_loss[loss=0.2432, ctc_loss=0.1229, cr_loss=0.3682, attn_decoder_loss=0.2484, over 5233227.71 frames. ], batch size: 209, lr: 2.74e-03, grad_scale: 8.0 +2024-09-19 18:48:03,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=7.04 vs. limit=10.0 +2024-09-19 18:48:07,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=723900.0, ans=0.125 +2024-09-19 18:48:17,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=723940.0, ans=0.125 +2024-09-19 18:48:21,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.77 vs. limit=22.5 +2024-09-19 18:48:22,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.20 vs. limit=22.5 +2024-09-19 18:48:33,009 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.35 vs. limit=15.0 +2024-09-19 18:49:10,542 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.58 vs. limit=22.5 +2024-09-19 18:49:17,629 INFO [train.py:1198] (1/2) Epoch 41, batch 0, loss[loss=0.2099, ctc_loss=0.09146, cr_loss=0.2958, attn_decoder_loss=0.2164, over 29607.00 frames. ], tot_loss[loss=0.2099, ctc_loss=0.09146, cr_loss=0.2958, attn_decoder_loss=0.2164, over 29607.00 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:49:17,629 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 18:49:35,496 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.1.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([4.7393, 4.2303, 4.5439, 4.6690], device='cuda:1') +2024-09-19 18:49:36,951 INFO [train.py:1230] (1/2) Epoch 41, validation: loss=0.2123, ctc_loss=0.03622, cr_loss=6.741e-15, attn_decoder_loss=0.2319, over 944034.00 frames. +2024-09-19 18:49:36,952 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 18:49:59,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=724040.0, ans=0.125 +2024-09-19 18:50:01,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=724040.0, ans=0.125 +2024-09-19 18:50:15,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=724080.0, ans=0.2 +2024-09-19 18:50:16,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=724080.0, ans=0.1 +2024-09-19 18:50:32,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.04 vs. limit=15.0 +2024-09-19 18:50:52,546 INFO [train.py:1198] (1/2) Epoch 41, batch 50, loss[loss=0.2078, ctc_loss=0.0907, cr_loss=0.3102, attn_decoder_loss=0.2139, over 29483.00 frames. ], tot_loss[loss=0.2357, ctc_loss=0.1146, cr_loss=0.3579, attn_decoder_loss=0.2412, over 1266993.74 frames. ], batch size: 70, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:50:54,028 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 9.153e+01 1.062e+02 1.232e+02 3.092e+02, threshold=2.125e+02, percent-clipped=2.0 +2024-09-19 18:51:00,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=724200.0, ans=0.0 +2024-09-19 18:51:04,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=724200.0, ans=0.2 +2024-09-19 18:51:07,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=724240.0, ans=0.0 +2024-09-19 18:51:21,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=724280.0, ans=0.0 +2024-09-19 18:51:32,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=724280.0, ans=0.0 +2024-09-19 18:51:33,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=724280.0, ans=0.1 +2024-09-19 18:51:38,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=724320.0, ans=0.125 +2024-09-19 18:51:41,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.56 vs. limit=12.0 +2024-09-19 18:51:43,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.51 vs. limit=22.5 +2024-09-19 18:51:48,942 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.15 vs. limit=15.0 +2024-09-19 18:52:07,810 INFO [train.py:1198] (1/2) Epoch 41, batch 100, loss[loss=0.2149, ctc_loss=0.0992, cr_loss=0.322, attn_decoder_loss=0.2206, over 29513.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1146, cr_loss=0.3576, attn_decoder_loss=0.2421, over 2251192.56 frames. ], batch size: 76, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 18:52:08,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=12.0 +2024-09-19 18:52:32,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.28 vs. limit=15.0 +2024-09-19 18:52:37,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.17 vs. limit=15.0 +2024-09-19 18:53:15,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=724560.0, ans=0.1 +2024-09-19 18:53:27,441 INFO [train.py:1198] (1/2) Epoch 41, batch 150, loss[loss=0.217, ctc_loss=0.09723, cr_loss=0.3282, attn_decoder_loss=0.223, over 29409.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1128, cr_loss=0.3537, attn_decoder_loss=0.2401, over 3046234.21 frames. ], batch size: 70, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:53:30,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.681e+01 9.088e+01 9.657e+01 1.697e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 18:53:34,563 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.72 vs. limit=15.0 +2024-09-19 18:53:39,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=724600.0, ans=0.025 +2024-09-19 18:53:49,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.61 vs. limit=15.0 +2024-09-19 18:53:58,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.48 vs. limit=22.5 +2024-09-19 18:54:32,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=724760.0, ans=0.0 +2024-09-19 18:54:42,651 INFO [train.py:1198] (1/2) Epoch 41, batch 200, loss[loss=0.2473, ctc_loss=0.1203, cr_loss=0.3591, attn_decoder_loss=0.2534, over 27503.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1124, cr_loss=0.3533, attn_decoder_loss=0.2392, over 3658267.46 frames. ], batch size: 125, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:55:06,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.93 vs. limit=15.0 +2024-09-19 18:55:13,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=724880.0, ans=0.2 +2024-09-19 18:55:22,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=724880.0, ans=0.0 +2024-09-19 18:55:40,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=724920.0, ans=0.0 +2024-09-19 18:55:45,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=724960.0, ans=0.025 +2024-09-19 18:55:49,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=724960.0, ans=0.0 +2024-09-19 18:55:57,748 INFO [train.py:1198] (1/2) Epoch 41, batch 250, loss[loss=0.2409, ctc_loss=0.1145, cr_loss=0.3621, attn_decoder_loss=0.2469, over 29226.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1121, cr_loss=0.3529, attn_decoder_loss=0.2388, over 4140999.70 frames. ], batch size: 100, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:56:00,844 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.416e+01 8.964e+01 9.351e+01 1.561e+02, threshold=1.793e+02, percent-clipped=0.0 +2024-09-19 18:56:02,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=725000.0, ans=0.0 +2024-09-19 18:56:04,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=725000.0, ans=0.125 +2024-09-19 18:56:04,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=725000.0, ans=0.125 +2024-09-19 18:56:23,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=725040.0, ans=0.125 +2024-09-19 18:56:26,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=725080.0, ans=0.125 +2024-09-19 18:56:46,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=725120.0, ans=0.0 +2024-09-19 18:56:59,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=725160.0, ans=0.09899494936611666 +2024-09-19 18:57:09,256 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.28 vs. limit=6.0 +2024-09-19 18:57:09,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.92 vs. limit=15.0 +2024-09-19 18:57:17,569 INFO [train.py:1198] (1/2) Epoch 41, batch 300, loss[loss=0.2442, ctc_loss=0.1247, cr_loss=0.3918, attn_decoder_loss=0.2488, over 29553.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3503, attn_decoder_loss=0.238, over 4510201.53 frames. ], batch size: 92, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:58:33,155 INFO [train.py:1198] (1/2) Epoch 41, batch 350, loss[loss=0.2074, ctc_loss=0.09603, cr_loss=0.3139, attn_decoder_loss=0.2127, over 29762.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.112, cr_loss=0.3513, attn_decoder_loss=0.2387, over 4795313.58 frames. ], batch size: 72, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 18:58:36,042 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.397e+01 8.852e+01 9.608e+01 1.644e+02, threshold=1.770e+02, percent-clipped=0.0 +2024-09-19 18:58:36,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=725400.0, ans=0.1 +2024-09-19 18:58:42,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=725400.0, ans=0.125 +2024-09-19 18:58:56,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.48 vs. limit=10.0 +2024-09-19 18:59:22,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=725520.0, ans=0.2 +2024-09-19 18:59:48,501 INFO [train.py:1198] (1/2) Epoch 41, batch 400, loss[loss=0.2399, ctc_loss=0.1139, cr_loss=0.3595, attn_decoder_loss=0.2459, over 29735.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1121, cr_loss=0.3521, attn_decoder_loss=0.2385, over 5024608.62 frames. ], batch size: 82, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:00:05,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=725640.0, ans=0.125 +2024-09-19 19:00:14,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=725640.0, ans=0.0 +2024-09-19 19:00:16,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=725640.0, ans=0.0 +2024-09-19 19:00:19,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=725680.0, ans=0.0 +2024-09-19 19:01:08,850 INFO [train.py:1198] (1/2) Epoch 41, batch 450, loss[loss=0.2468, ctc_loss=0.1215, cr_loss=0.379, attn_decoder_loss=0.2522, over 29694.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1124, cr_loss=0.3525, attn_decoder_loss=0.2387, over 5188538.54 frames. ], batch size: 83, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:01:11,785 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.467e+01 8.907e+01 9.504e+01 2.028e+02, threshold=1.781e+02, percent-clipped=1.0 +2024-09-19 19:01:39,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=725880.0, ans=0.0 +2024-09-19 19:02:15,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=725960.0, ans=0.2 +2024-09-19 19:02:20,798 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.00 vs. limit=15.0 +2024-09-19 19:02:24,456 INFO [train.py:1198] (1/2) Epoch 41, batch 500, loss[loss=0.2538, ctc_loss=0.1269, cr_loss=0.4031, attn_decoder_loss=0.259, over 29450.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1121, cr_loss=0.3521, attn_decoder_loss=0.2383, over 5331113.44 frames. ], batch size: 94, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:02:32,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=726000.0, ans=0.95 +2024-09-19 19:02:43,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=726040.0, ans=0.2 +2024-09-19 19:03:06,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=726080.0, ans=0.0 +2024-09-19 19:03:39,813 INFO [train.py:1198] (1/2) Epoch 41, batch 550, loss[loss=0.2391, ctc_loss=0.11, cr_loss=0.349, attn_decoder_loss=0.2456, over 28945.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1122, cr_loss=0.3524, attn_decoder_loss=0.2383, over 5423341.09 frames. ], batch size: 104, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:03:42,904 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.521e+01 8.739e+01 9.193e+01 9.957e+01 2.783e+02, threshold=1.839e+02, percent-clipped=3.0 +2024-09-19 19:04:02,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=726240.0, ans=0.125 +2024-09-19 19:04:14,295 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.72 vs. limit=6.0 +2024-09-19 19:04:21,497 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.80 vs. limit=22.5 +2024-09-19 19:04:23,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=726320.0, ans=0.035 +2024-09-19 19:04:31,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=726320.0, ans=0.025 +2024-09-19 19:04:40,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=726360.0, ans=0.125 +2024-09-19 19:04:42,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=726360.0, ans=0.1 +2024-09-19 19:04:58,276 INFO [train.py:1198] (1/2) Epoch 41, batch 600, loss[loss=0.2563, ctc_loss=0.13, cr_loss=0.3758, attn_decoder_loss=0.2619, over 29259.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1124, cr_loss=0.3527, attn_decoder_loss=0.2386, over 5509592.11 frames. ], batch size: 100, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:05:20,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=726440.0, ans=0.125 +2024-09-19 19:05:31,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=726480.0, ans=0.2 +2024-09-19 19:05:43,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=726520.0, ans=0.0 +2024-09-19 19:05:47,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.39 vs. limit=10.0 +2024-09-19 19:05:48,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=726520.0, ans=0.0 +2024-09-19 19:05:49,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=726520.0, ans=0.0 +2024-09-19 19:05:51,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=726520.0, ans=0.125 +2024-09-19 19:06:15,345 INFO [train.py:1198] (1/2) Epoch 41, batch 650, loss[loss=0.247, ctc_loss=0.1241, cr_loss=0.3882, attn_decoder_loss=0.2521, over 29743.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1113, cr_loss=0.3508, attn_decoder_loss=0.238, over 5586708.84 frames. ], batch size: 81, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:06:17,605 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.22 vs. limit=22.5 +2024-09-19 19:06:19,865 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.350e+01 8.880e+01 9.262e+01 1.448e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-19 19:06:27,916 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:06:39,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=726640.0, ans=0.0 +2024-09-19 19:07:18,054 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.92 vs. limit=15.0 +2024-09-19 19:07:23,944 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.66 vs. limit=15.0 +2024-09-19 19:07:25,791 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=9.15 vs. limit=15.0 +2024-09-19 19:07:30,713 INFO [train.py:1198] (1/2) Epoch 41, batch 700, loss[loss=0.2274, ctc_loss=0.1092, cr_loss=0.3535, attn_decoder_loss=0.2327, over 29539.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3517, attn_decoder_loss=0.2386, over 5637676.82 frames. ], batch size: 76, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:07:32,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=726800.0, ans=0.0 +2024-09-19 19:07:51,280 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.90 vs. limit=15.0 +2024-09-19 19:07:51,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=726840.0, ans=0.0 +2024-09-19 19:07:58,650 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.15 vs. limit=15.0 +2024-09-19 19:08:08,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=726880.0, ans=0.0 +2024-09-19 19:08:23,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=726920.0, ans=0.1 +2024-09-19 19:08:30,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.55 vs. limit=12.0 +2024-09-19 19:08:31,881 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.32 vs. limit=12.0 +2024-09-19 19:08:46,103 INFO [train.py:1198] (1/2) Epoch 41, batch 750, loss[loss=0.235, ctc_loss=0.1174, cr_loss=0.3498, attn_decoder_loss=0.2402, over 29703.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1119, cr_loss=0.3518, attn_decoder_loss=0.2385, over 5678033.83 frames. ], batch size: 82, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:08:52,745 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.416e+01 8.976e+01 9.718e+01 1.767e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-19 19:08:56,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=727000.0, ans=0.1 +2024-09-19 19:09:02,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=727040.0, ans=0.0 +2024-09-19 19:09:43,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=727120.0, ans=0.0 +2024-09-19 19:09:44,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=727120.0, ans=0.125 +2024-09-19 19:10:06,044 INFO [train.py:1198] (1/2) Epoch 41, batch 800, loss[loss=0.2161, ctc_loss=0.09687, cr_loss=0.3178, attn_decoder_loss=0.2223, over 29619.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1118, cr_loss=0.3519, attn_decoder_loss=0.2382, over 5708201.15 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:10:06,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=727200.0, ans=0.125 +2024-09-19 19:10:22,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=727240.0, ans=0.125 +2024-09-19 19:10:28,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=727240.0, ans=0.125 +2024-09-19 19:10:35,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=727280.0, ans=0.125 +2024-09-19 19:11:05,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=727360.0, ans=0.125 +2024-09-19 19:11:21,343 INFO [train.py:1198] (1/2) Epoch 41, batch 850, loss[loss=0.2361, ctc_loss=0.1075, cr_loss=0.3417, attn_decoder_loss=0.2428, over 29684.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1113, cr_loss=0.3505, attn_decoder_loss=0.2379, over 5738962.43 frames. ], batch size: 89, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:11:25,688 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.681e+01 8.437e+01 9.040e+01 9.490e+01 1.672e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-19 19:11:54,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=727480.0, ans=0.025 +2024-09-19 19:11:56,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=727480.0, ans=0.125 +2024-09-19 19:12:05,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=727520.0, ans=0.015 +2024-09-19 19:12:06,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=727520.0, ans=0.125 +2024-09-19 19:12:14,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=727520.0, ans=0.2 +2024-09-19 19:12:16,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=727520.0, ans=0.125 +2024-09-19 19:12:19,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=727520.0, ans=0.125 +2024-09-19 19:12:21,572 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.71 vs. limit=10.0 +2024-09-19 19:12:37,294 INFO [train.py:1198] (1/2) Epoch 41, batch 900, loss[loss=0.2147, ctc_loss=0.09822, cr_loss=0.3168, attn_decoder_loss=0.2206, over 29611.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1115, cr_loss=0.3509, attn_decoder_loss=0.2382, over 5742353.10 frames. ], batch size: 73, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:12:43,475 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.08 vs. limit=22.5 +2024-09-19 19:12:46,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.23 vs. limit=12.0 +2024-09-19 19:12:47,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=727600.0, ans=0.125 +2024-09-19 19:12:53,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=727640.0, ans=0.0 +2024-09-19 19:12:57,453 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=727640.0, ans=0.2 +2024-09-19 19:13:28,752 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.61 vs. limit=15.0 +2024-09-19 19:13:49,325 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:13:53,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=727760.0, ans=0.125 +2024-09-19 19:13:56,362 INFO [train.py:1198] (1/2) Epoch 41, batch 950, loss[loss=0.2126, ctc_loss=0.09233, cr_loss=0.2993, attn_decoder_loss=0.2193, over 29524.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1115, cr_loss=0.3506, attn_decoder_loss=0.2385, over 5744438.22 frames. ], batch size: 74, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:14:00,866 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.928e+01 8.606e+01 9.118e+01 9.826e+01 2.095e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-19 19:14:14,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=727840.0, ans=0.125 +2024-09-19 19:14:54,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=727920.0, ans=0.2 +2024-09-19 19:15:04,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=727960.0, ans=0.0 +2024-09-19 19:15:12,361 INFO [train.py:1198] (1/2) Epoch 41, batch 1000, loss[loss=0.226, ctc_loss=0.111, cr_loss=0.3544, attn_decoder_loss=0.2309, over 29517.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1121, cr_loss=0.3514, attn_decoder_loss=0.2388, over 5737680.94 frames. ], batch size: 77, lr: 2.70e-03, grad_scale: 16.0 +2024-09-19 19:15:33,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.64 vs. limit=15.0 +2024-09-19 19:15:35,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=728040.0, ans=0.2 +2024-09-19 19:15:45,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=728080.0, ans=0.015 +2024-09-19 19:15:46,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=728080.0, ans=0.125 +2024-09-19 19:15:49,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=728080.0, ans=0.125 +2024-09-19 19:15:52,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=728080.0, ans=0.125 +2024-09-19 19:16:01,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=728120.0, ans=0.125 +2024-09-19 19:16:11,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=728160.0, ans=0.07 +2024-09-19 19:16:21,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=728160.0, ans=0.125 +2024-09-19 19:16:28,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=728200.0, ans=0.125 +2024-09-19 19:16:29,754 INFO [train.py:1198] (1/2) Epoch 41, batch 1050, loss[loss=0.2392, ctc_loss=0.1152, cr_loss=0.3617, attn_decoder_loss=0.2449, over 29658.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.112, cr_loss=0.351, attn_decoder_loss=0.2382, over 5744057.03 frames. ], batch size: 85, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:16:30,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=728200.0, ans=0.0 +2024-09-19 19:16:35,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.355e+01 8.570e+01 9.055e+01 9.661e+01 1.822e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-19 19:17:08,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=728280.0, ans=0.0 +2024-09-19 19:17:12,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=728280.0, ans=0.07 +2024-09-19 19:17:13,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.37 vs. limit=15.0 +2024-09-19 19:17:19,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=728320.0, ans=0.0 +2024-09-19 19:17:47,180 INFO [train.py:1198] (1/2) Epoch 41, batch 1100, loss[loss=0.2214, ctc_loss=0.1069, cr_loss=0.3361, attn_decoder_loss=0.2267, over 29444.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1116, cr_loss=0.3504, attn_decoder_loss=0.2378, over 5756208.17 frames. ], batch size: 78, lr: 2.70e-03, grad_scale: 8.0 +2024-09-19 19:17:48,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=728400.0, ans=0.0 +2024-09-19 19:17:56,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=728400.0, ans=0.125 +2024-09-19 19:17:59,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=728400.0, ans=0.0 +2024-09-19 19:18:11,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=728440.0, ans=0.2 +2024-09-19 19:18:21,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.00 vs. limit=22.5 +2024-09-19 19:18:22,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.19 vs. limit=15.0 +2024-09-19 19:18:42,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.67 vs. limit=15.0 +2024-09-19 19:18:55,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=728560.0, ans=0.04949747468305833 +2024-09-19 19:18:59,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=728560.0, ans=0.0 +2024-09-19 19:19:02,682 INFO [train.py:1198] (1/2) Epoch 41, batch 1150, loss[loss=0.222, ctc_loss=0.102, cr_loss=0.3314, attn_decoder_loss=0.2279, over 29470.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1117, cr_loss=0.3509, attn_decoder_loss=0.2381, over 5753096.54 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:19:08,827 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.493e+01 8.986e+01 9.432e+01 3.581e+02, threshold=1.797e+02, percent-clipped=4.0 +2024-09-19 19:19:20,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.81 vs. limit=6.0 +2024-09-19 19:19:22,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=728640.0, ans=0.1 +2024-09-19 19:19:28,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=728640.0, ans=0.125 +2024-09-19 19:20:17,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=728760.0, ans=0.0 +2024-09-19 19:20:19,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=728800.0, ans=0.125 +2024-09-19 19:20:20,764 INFO [train.py:1198] (1/2) Epoch 41, batch 1200, loss[loss=0.234, ctc_loss=0.1068, cr_loss=0.3485, attn_decoder_loss=0.2404, over 29688.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1121, cr_loss=0.3518, attn_decoder_loss=0.2388, over 5745567.77 frames. ], batch size: 85, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:20:39,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=728840.0, ans=0.0 +2024-09-19 19:20:39,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=728840.0, ans=0.125 +2024-09-19 19:20:46,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=728840.0, ans=0.125 +2024-09-19 19:20:47,521 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.69 vs. limit=15.0 +2024-09-19 19:20:55,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=728880.0, ans=0.025 +2024-09-19 19:21:10,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=728920.0, ans=0.1 +2024-09-19 19:21:13,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=728920.0, ans=0.1 +2024-09-19 19:21:28,351 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=728960.0, ans=0.125 +2024-09-19 19:21:30,456 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-19 19:21:37,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=729000.0, ans=0.125 +2024-09-19 19:21:37,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=729000.0, ans=0.1 +2024-09-19 19:21:38,651 INFO [train.py:1198] (1/2) Epoch 41, batch 1250, loss[loss=0.2461, ctc_loss=0.1255, cr_loss=0.3873, attn_decoder_loss=0.2509, over 29527.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1127, cr_loss=0.3531, attn_decoder_loss=0.2394, over 5774007.06 frames. ], batch size: 92, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:21:44,546 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.620e+01 9.115e+01 9.641e+01 1.627e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 19:21:48,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=729000.0, ans=0.0 +2024-09-19 19:21:58,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=729040.0, ans=0.1 +2024-09-19 19:22:00,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=729040.0, ans=0.125 +2024-09-19 19:22:06,660 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.79 vs. limit=15.0 +2024-09-19 19:22:32,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=729120.0, ans=0.0 +2024-09-19 19:22:43,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.26 vs. limit=22.5 +2024-09-19 19:22:47,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=729160.0, ans=0.125 +2024-09-19 19:22:54,564 INFO [train.py:1198] (1/2) Epoch 41, batch 1300, loss[loss=0.2452, ctc_loss=0.1188, cr_loss=0.3636, attn_decoder_loss=0.2512, over 28498.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1125, cr_loss=0.3526, attn_decoder_loss=0.2389, over 5778338.39 frames. ], batch size: 112, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:22:57,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=729200.0, ans=0.125 +2024-09-19 19:22:59,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=729200.0, ans=0.025 +2024-09-19 19:23:17,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=729240.0, ans=0.0 +2024-09-19 19:23:40,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=729320.0, ans=0.0 +2024-09-19 19:23:52,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=729320.0, ans=0.05 +2024-09-19 19:23:52,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=729320.0, ans=0.2 +2024-09-19 19:23:57,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=729360.0, ans=0.1 +2024-09-19 19:24:10,601 INFO [train.py:1198] (1/2) Epoch 41, batch 1350, loss[loss=0.2372, ctc_loss=0.114, cr_loss=0.3628, attn_decoder_loss=0.2428, over 29750.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1121, cr_loss=0.3521, attn_decoder_loss=0.2387, over 5794636.50 frames. ], batch size: 81, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:24:14,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=729400.0, ans=0.1 +2024-09-19 19:24:18,634 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.744e+01 8.406e+01 8.862e+01 9.438e+01 1.295e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 19:24:18,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=729400.0, ans=0.125 +2024-09-19 19:24:51,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.74 vs. limit=15.0 +2024-09-19 19:25:02,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=9.41 vs. limit=22.5 +2024-09-19 19:25:04,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=729520.0, ans=0.025 +2024-09-19 19:25:10,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=729520.0, ans=0.125 +2024-09-19 19:25:22,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=729560.0, ans=0.0 +2024-09-19 19:25:30,638 INFO [train.py:1198] (1/2) Epoch 41, batch 1400, loss[loss=0.2052, ctc_loss=0.09652, cr_loss=0.32, attn_decoder_loss=0.2102, over 29575.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1118, cr_loss=0.3512, attn_decoder_loss=0.2385, over 5805620.04 frames. ], batch size: 69, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:25:37,738 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.48 vs. limit=22.5 +2024-09-19 19:25:51,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=729640.0, ans=0.125 +2024-09-19 19:25:56,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=729640.0, ans=0.1 +2024-09-19 19:26:19,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=729720.0, ans=0.1 +2024-09-19 19:26:20,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=729720.0, ans=0.125 +2024-09-19 19:26:29,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=729760.0, ans=0.125 +2024-09-19 19:26:32,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=729760.0, ans=0.125 +2024-09-19 19:26:32,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.82 vs. limit=15.0 +2024-09-19 19:26:41,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=729760.0, ans=0.0 +2024-09-19 19:26:45,627 INFO [train.py:1198] (1/2) Epoch 41, batch 1450, loss[loss=0.2557, ctc_loss=0.1274, cr_loss=0.382, attn_decoder_loss=0.2615, over 29469.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1122, cr_loss=0.3522, attn_decoder_loss=0.239, over 5802858.45 frames. ], batch size: 94, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:26:48,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=729800.0, ans=0.125 +2024-09-19 19:26:51,369 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.433e+01 8.557e+01 9.068e+01 9.745e+01 1.592e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 19:27:14,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=729880.0, ans=0.0 +2024-09-19 19:27:18,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.61 vs. limit=12.0 +2024-09-19 19:27:26,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.52 vs. limit=22.5 +2024-09-19 19:27:34,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=729920.0, ans=0.5 +2024-09-19 19:27:44,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=729960.0, ans=0.2 +2024-09-19 19:28:03,359 INFO [train.py:1198] (1/2) Epoch 41, batch 1500, loss[loss=0.2449, ctc_loss=0.1189, cr_loss=0.372, attn_decoder_loss=0.2507, over 29634.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1123, cr_loss=0.3521, attn_decoder_loss=0.2393, over 5802648.03 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:28:05,818 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.93 vs. limit=12.0 +2024-09-19 19:28:49,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=730120.0, ans=0.0 +2024-09-19 19:28:54,375 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=730120.0, ans=0.5 +2024-09-19 19:28:54,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=730120.0, ans=0.025 +2024-09-19 19:29:12,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=730160.0, ans=0.05 +2024-09-19 19:29:21,288 INFO [train.py:1198] (1/2) Epoch 41, batch 1550, loss[loss=0.2426, ctc_loss=0.1227, cr_loss=0.3705, attn_decoder_loss=0.2476, over 29487.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1129, cr_loss=0.353, attn_decoder_loss=0.2393, over 5779508.52 frames. ], batch size: 90, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:29:27,255 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.571e+01 8.596e+01 9.016e+01 9.921e+01 2.014e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-19 19:29:31,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=730200.0, ans=0.125 +2024-09-19 19:30:36,413 INFO [train.py:1198] (1/2) Epoch 41, batch 1600, loss[loss=0.2416, ctc_loss=0.1135, cr_loss=0.3533, attn_decoder_loss=0.2479, over 29665.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1129, cr_loss=0.3524, attn_decoder_loss=0.239, over 5762133.71 frames. ], batch size: 85, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:30:38,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=730400.0, ans=0.1 +2024-09-19 19:30:38,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=730400.0, ans=0.125 +2024-09-19 19:30:48,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=730400.0, ans=0.0 +2024-09-19 19:30:51,782 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:30:54,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=730440.0, ans=0.0 +2024-09-19 19:31:04,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=730440.0, ans=0.125 +2024-09-19 19:31:05,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=730480.0, ans=0.125 +2024-09-19 19:31:08,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=730480.0, ans=0.05 +2024-09-19 19:31:54,180 INFO [train.py:1198] (1/2) Epoch 41, batch 1650, loss[loss=0.2489, ctc_loss=0.1233, cr_loss=0.3689, attn_decoder_loss=0.2546, over 29712.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1129, cr_loss=0.3527, attn_decoder_loss=0.2391, over 5758078.43 frames. ], batch size: 89, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:31:59,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=730600.0, ans=0.0 +2024-09-19 19:32:03,241 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.587e+01 9.228e+01 9.861e+01 2.680e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-19 19:32:12,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=730640.0, ans=0.2 +2024-09-19 19:33:11,311 INFO [train.py:1198] (1/2) Epoch 41, batch 1700, loss[loss=0.2113, ctc_loss=0.1042, cr_loss=0.3295, attn_decoder_loss=0.2159, over 29569.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1127, cr_loss=0.3529, attn_decoder_loss=0.2389, over 5779421.60 frames. ], batch size: 69, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:33:27,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.55 vs. limit=12.0 +2024-09-19 19:33:31,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=730840.0, ans=0.1 +2024-09-19 19:33:46,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=730880.0, ans=0.04949747468305833 +2024-09-19 19:33:52,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=730880.0, ans=0.125 +2024-09-19 19:33:52,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=730880.0, ans=0.125 +2024-09-19 19:34:10,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=730960.0, ans=0.125 +2024-09-19 19:34:13,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=730960.0, ans=0.125 +2024-09-19 19:34:26,881 INFO [train.py:1198] (1/2) Epoch 41, batch 1750, loss[loss=0.2069, ctc_loss=0.091, cr_loss=0.306, attn_decoder_loss=0.213, over 29330.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1122, cr_loss=0.3515, attn_decoder_loss=0.2382, over 5787963.25 frames. ], batch size: 67, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:34:35,972 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.612e+01 9.117e+01 9.709e+01 1.098e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 19:34:37,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=731000.0, ans=0.125 +2024-09-19 19:34:46,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=731040.0, ans=0.0 +2024-09-19 19:34:57,407 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=731080.0, ans=0.0 +2024-09-19 19:35:08,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.91 vs. limit=22.5 +2024-09-19 19:35:15,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=731120.0, ans=0.2 +2024-09-19 19:35:15,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=731120.0, ans=0.0 +2024-09-19 19:35:27,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=731160.0, ans=0.1 +2024-09-19 19:35:44,367 INFO [train.py:1198] (1/2) Epoch 41, batch 1800, loss[loss=0.248, ctc_loss=0.1231, cr_loss=0.3725, attn_decoder_loss=0.2536, over 29667.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1122, cr_loss=0.3519, attn_decoder_loss=0.2385, over 5790409.66 frames. ], batch size: 83, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:35:46,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=731200.0, ans=0.0 +2024-09-19 19:35:48,306 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.92 vs. limit=6.0 +2024-09-19 19:36:04,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=731240.0, ans=0.125 +2024-09-19 19:36:14,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=731280.0, ans=0.125 +2024-09-19 19:36:19,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=731280.0, ans=0.5 +2024-09-19 19:36:19,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=731280.0, ans=0.125 +2024-09-19 19:36:22,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=731280.0, ans=0.125 +2024-09-19 19:36:25,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=731280.0, ans=0.1 +2024-09-19 19:36:58,229 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.83 vs. limit=12.0 +2024-09-19 19:37:02,117 INFO [train.py:1198] (1/2) Epoch 41, batch 1850, loss[loss=0.2484, ctc_loss=0.1183, cr_loss=0.3735, attn_decoder_loss=0.2546, over 29616.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.112, cr_loss=0.3519, attn_decoder_loss=0.2385, over 5797439.27 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:37:03,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=731400.0, ans=0.125 +2024-09-19 19:37:10,994 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.675e+01 9.084e+01 9.615e+01 1.395e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-19 19:37:20,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=731440.0, ans=0.125 +2024-09-19 19:37:27,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.55 vs. limit=15.0 +2024-09-19 19:37:30,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=731480.0, ans=0.125 +2024-09-19 19:37:36,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=731480.0, ans=0.0 +2024-09-19 19:37:39,968 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:37:44,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=731480.0, ans=0.125 +2024-09-19 19:37:57,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=731520.0, ans=0.0 +2024-09-19 19:37:59,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=731520.0, ans=0.0 +2024-09-19 19:38:17,056 INFO [train.py:1198] (1/2) Epoch 41, batch 1900, loss[loss=0.2438, ctc_loss=0.1224, cr_loss=0.3733, attn_decoder_loss=0.249, over 29691.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1124, cr_loss=0.353, attn_decoder_loss=0.2393, over 5805481.97 frames. ], batch size: 89, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:38:19,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=731600.0, ans=0.025 +2024-09-19 19:38:22,115 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:38:23,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=731600.0, ans=0.125 +2024-09-19 19:38:26,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=731600.0, ans=0.0 +2024-09-19 19:38:28,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=731600.0, ans=0.125 +2024-09-19 19:39:02,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=731720.0, ans=0.0 +2024-09-19 19:39:12,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.07 vs. limit=22.5 +2024-09-19 19:39:34,504 INFO [train.py:1198] (1/2) Epoch 41, batch 1950, loss[loss=0.2316, ctc_loss=0.1106, cr_loss=0.3685, attn_decoder_loss=0.2369, over 29428.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1131, cr_loss=0.3544, attn_decoder_loss=0.2404, over 5820028.97 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 8.0 +2024-09-19 19:39:43,507 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.306e+01 8.775e+01 9.303e+01 9.846e+01 2.591e+02, threshold=1.861e+02, percent-clipped=0.0 +2024-09-19 19:39:57,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=731840.0, ans=0.0 +2024-09-19 19:40:06,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=731880.0, ans=0.125 +2024-09-19 19:40:23,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=731920.0, ans=0.125 +2024-09-19 19:40:48,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=731960.0, ans=0.2 +2024-09-19 19:40:51,612 INFO [train.py:1198] (1/2) Epoch 41, batch 2000, loss[loss=0.2089, ctc_loss=0.1011, cr_loss=0.3179, attn_decoder_loss=0.2138, over 29323.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1132, cr_loss=0.3545, attn_decoder_loss=0.2405, over 5798003.48 frames. ], batch size: 67, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:41:05,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=732040.0, ans=0.125 +2024-09-19 19:41:26,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=732080.0, ans=0.0 +2024-09-19 19:41:37,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=732120.0, ans=0.125 +2024-09-19 19:41:44,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=732120.0, ans=0.1 +2024-09-19 19:42:05,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.89 vs. limit=15.0 +2024-09-19 19:42:07,261 INFO [train.py:1198] (1/2) Epoch 41, batch 2050, loss[loss=0.1969, ctc_loss=0.07727, cr_loss=0.265, attn_decoder_loss=0.2043, over 29431.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1127, cr_loss=0.353, attn_decoder_loss=0.2395, over 5790586.03 frames. ], batch size: 70, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:42:07,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=732200.0, ans=0.025 +2024-09-19 19:42:16,362 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.729e+01 8.645e+01 9.096e+01 9.473e+01 4.528e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 19:42:19,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=732200.0, ans=0.2 +2024-09-19 19:42:24,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=732240.0, ans=0.025 +2024-09-19 19:42:25,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.54 vs. limit=22.5 +2024-09-19 19:42:30,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=732240.0, ans=0.0 +2024-09-19 19:43:08,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=732360.0, ans=0.0 +2024-09-19 19:43:19,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=732360.0, ans=0.2 +2024-09-19 19:43:25,408 INFO [train.py:1198] (1/2) Epoch 41, batch 2100, loss[loss=0.2218, ctc_loss=0.09582, cr_loss=0.3194, attn_decoder_loss=0.2287, over 29755.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1117, cr_loss=0.3511, attn_decoder_loss=0.2387, over 5801225.59 frames. ], batch size: 81, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:43:36,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=732400.0, ans=0.125 +2024-09-19 19:43:51,560 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.76 vs. limit=22.5 +2024-09-19 19:43:57,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=732480.0, ans=0.1 +2024-09-19 19:44:04,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=732480.0, ans=0.125 +2024-09-19 19:44:25,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=732560.0, ans=0.0 +2024-09-19 19:44:42,399 INFO [train.py:1198] (1/2) Epoch 41, batch 2150, loss[loss=0.2299, ctc_loss=0.1117, cr_loss=0.3513, attn_decoder_loss=0.2353, over 29435.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1109, cr_loss=0.3496, attn_decoder_loss=0.2379, over 5815522.88 frames. ], batch size: 78, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:44:43,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.17 vs. limit=6.0 +2024-09-19 19:44:51,575 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.227e+01 8.830e+01 9.472e+01 1.149e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-19 19:44:54,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=732600.0, ans=0.125 +2024-09-19 19:45:22,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=732680.0, ans=0.125 +2024-09-19 19:45:24,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=732680.0, ans=0.125 +2024-09-19 19:45:29,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=732720.0, ans=0.2 +2024-09-19 19:45:40,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=732720.0, ans=0.125 +2024-09-19 19:45:43,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=732760.0, ans=0.0 +2024-09-19 19:45:46,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=732760.0, ans=0.2 +2024-09-19 19:45:49,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=732760.0, ans=0.025 +2024-09-19 19:45:58,479 INFO [train.py:1198] (1/2) Epoch 41, batch 2200, loss[loss=0.2451, ctc_loss=0.1125, cr_loss=0.3539, attn_decoder_loss=0.252, over 29611.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1115, cr_loss=0.3502, attn_decoder_loss=0.2382, over 5812825.60 frames. ], batch size: 86, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:46:03,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=732800.0, ans=0.1 +2024-09-19 19:46:42,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=732920.0, ans=0.125 +2024-09-19 19:46:51,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=732920.0, ans=0.05 +2024-09-19 19:47:07,548 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.97 vs. limit=22.5 +2024-09-19 19:47:16,341 INFO [train.py:1198] (1/2) Epoch 41, batch 2250, loss[loss=0.2384, ctc_loss=0.113, cr_loss=0.3553, attn_decoder_loss=0.2445, over 29720.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3506, attn_decoder_loss=0.2386, over 5813475.24 frames. ], batch size: 82, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:47:16,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=733000.0, ans=0.0 +2024-09-19 19:47:25,241 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.035e+01 8.546e+01 9.093e+01 9.694e+01 2.560e+02, threshold=1.819e+02, percent-clipped=3.0 +2024-09-19 19:47:27,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=733000.0, ans=0.0 +2024-09-19 19:47:57,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=733080.0, ans=0.125 +2024-09-19 19:48:01,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=733120.0, ans=0.125 +2024-09-19 19:48:06,639 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.51 vs. limit=15.0 +2024-09-19 19:48:33,631 INFO [train.py:1198] (1/2) Epoch 41, batch 2300, loss[loss=0.2063, ctc_loss=0.09303, cr_loss=0.3145, attn_decoder_loss=0.2119, over 29311.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1107, cr_loss=0.3493, attn_decoder_loss=0.2375, over 5798699.20 frames. ], batch size: 71, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:49:06,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.99 vs. limit=15.0 +2024-09-19 19:49:45,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.75 vs. limit=15.0 +2024-09-19 19:49:49,350 INFO [train.py:1198] (1/2) Epoch 41, batch 2350, loss[loss=0.2395, ctc_loss=0.1151, cr_loss=0.3605, attn_decoder_loss=0.2453, over 29692.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.111, cr_loss=0.3495, attn_decoder_loss=0.2376, over 5804848.96 frames. ], batch size: 83, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:49:54,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=733400.0, ans=15.0 +2024-09-19 19:49:54,525 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.12 vs. limit=15.0 +2024-09-19 19:49:58,160 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.660e+01 9.088e+01 9.774e+01 1.601e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-19 19:50:00,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=733400.0, ans=0.0 +2024-09-19 19:50:06,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=733440.0, ans=0.1 +2024-09-19 19:50:06,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=733440.0, ans=0.2 +2024-09-19 19:50:09,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=733440.0, ans=0.125 +2024-09-19 19:50:12,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=733440.0, ans=0.2 +2024-09-19 19:50:19,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=733480.0, ans=0.1 +2024-09-19 19:50:37,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=733520.0, ans=0.125 +2024-09-19 19:50:52,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=733560.0, ans=0.0 +2024-09-19 19:50:57,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=733560.0, ans=0.1 +2024-09-19 19:51:06,699 INFO [train.py:1198] (1/2) Epoch 41, batch 2400, loss[loss=0.232, ctc_loss=0.1158, cr_loss=0.3754, attn_decoder_loss=0.2366, over 29536.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1115, cr_loss=0.3509, attn_decoder_loss=0.2381, over 5808791.13 frames. ], batch size: 76, lr: 2.69e-03, grad_scale: 32.0 +2024-09-19 19:51:08,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=733600.0, ans=0.2 +2024-09-19 19:51:50,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.82 vs. limit=10.0 +2024-09-19 19:52:06,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=733760.0, ans=0.125 +2024-09-19 19:52:09,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=733760.0, ans=0.0 +2024-09-19 19:52:10,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=733760.0, ans=0.125 +2024-09-19 19:52:24,341 INFO [train.py:1198] (1/2) Epoch 41, batch 2450, loss[loss=0.2374, ctc_loss=0.1157, cr_loss=0.3555, attn_decoder_loss=0.243, over 29705.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.112, cr_loss=0.3516, attn_decoder_loss=0.2389, over 5784739.42 frames. ], batch size: 82, lr: 2.69e-03, grad_scale: 16.0 +2024-09-19 19:52:24,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=733800.0, ans=0.0 +2024-09-19 19:52:34,692 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.580e+01 8.655e+01 9.209e+01 9.754e+01 2.010e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-19 19:52:54,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=733880.0, ans=0.125 +2024-09-19 19:52:54,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=733880.0, ans=0.0 +2024-09-19 19:53:03,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=733880.0, ans=0.125 +2024-09-19 19:53:38,228 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:53:39,343 INFO [train.py:1198] (1/2) Epoch 41, batch 2500, loss[loss=0.242, ctc_loss=0.1128, cr_loss=0.354, attn_decoder_loss=0.2485, over 29628.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1122, cr_loss=0.3524, attn_decoder_loss=0.239, over 5794705.60 frames. ], batch size: 86, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:54:02,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=734040.0, ans=0.125 +2024-09-19 19:54:14,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=734080.0, ans=0.2 +2024-09-19 19:54:40,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=734160.0, ans=0.05 +2024-09-19 19:54:43,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.89 vs. limit=6.0 +2024-09-19 19:54:48,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.78 vs. limit=15.0 +2024-09-19 19:54:57,249 INFO [train.py:1198] (1/2) Epoch 41, batch 2550, loss[loss=0.2091, ctc_loss=0.09644, cr_loss=0.3157, attn_decoder_loss=0.2146, over 29347.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1118, cr_loss=0.3521, attn_decoder_loss=0.2388, over 5797079.44 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:54:59,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.37 vs. limit=22.5 +2024-09-19 19:55:06,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=734200.0, ans=0.125 +2024-09-19 19:55:07,642 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.302e+01 8.421e+01 8.984e+01 9.489e+01 4.917e+02, threshold=1.797e+02, percent-clipped=1.0 +2024-09-19 19:55:15,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.71 vs. limit=15.0 +2024-09-19 19:55:16,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=734240.0, ans=0.0 +2024-09-19 19:55:24,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=734240.0, ans=0.0 +2024-09-19 19:55:35,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.94 vs. limit=10.0 +2024-09-19 19:55:38,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=734280.0, ans=0.0 +2024-09-19 19:55:38,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.34 vs. limit=15.0 +2024-09-19 19:55:47,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=734320.0, ans=0.125 +2024-09-19 19:55:47,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=734320.0, ans=0.125 +2024-09-19 19:55:48,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=734320.0, ans=0.125 +2024-09-19 19:56:13,325 INFO [train.py:1198] (1/2) Epoch 41, batch 2600, loss[loss=0.229, ctc_loss=0.1161, cr_loss=0.3746, attn_decoder_loss=0.2332, over 29467.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.112, cr_loss=0.3528, attn_decoder_loss=0.2393, over 5794328.16 frames. ], batch size: 78, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:56:18,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=10.95 vs. limit=15.0 +2024-09-19 19:56:31,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.25 vs. limit=6.0 +2024-09-19 19:56:33,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=734440.0, ans=0.2 +2024-09-19 19:56:35,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=734440.0, ans=0.0 +2024-09-19 19:56:35,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=734440.0, ans=0.1 +2024-09-19 19:56:51,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=734480.0, ans=0.125 +2024-09-19 19:56:54,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=734480.0, ans=0.1 +2024-09-19 19:56:59,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=734520.0, ans=15.0 +2024-09-19 19:57:02,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=734520.0, ans=0.2 +2024-09-19 19:57:11,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=734520.0, ans=0.1 +2024-09-19 19:57:17,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=734560.0, ans=0.2 +2024-09-19 19:57:24,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.19 vs. limit=22.5 +2024-09-19 19:57:27,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=734560.0, ans=0.1 +2024-09-19 19:57:30,517 INFO [train.py:1198] (1/2) Epoch 41, batch 2650, loss[loss=0.246, ctc_loss=0.1165, cr_loss=0.3481, attn_decoder_loss=0.2527, over 29239.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1122, cr_loss=0.3529, attn_decoder_loss=0.2395, over 5800871.87 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:57:33,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=734600.0, ans=0.0 +2024-09-19 19:57:41,308 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.192e+01 8.633e+01 9.136e+01 9.710e+01 1.315e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 19:57:44,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=734640.0, ans=0.09899494936611666 +2024-09-19 19:58:05,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=734680.0, ans=0.125 +2024-09-19 19:58:22,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.32 vs. limit=15.0 +2024-09-19 19:58:45,660 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=734760.0, ans=0.0 +2024-09-19 19:58:48,349 INFO [train.py:1198] (1/2) Epoch 41, batch 2700, loss[loss=0.2463, ctc_loss=0.1202, cr_loss=0.3591, attn_decoder_loss=0.2524, over 29511.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1126, cr_loss=0.354, attn_decoder_loss=0.2399, over 5796188.69 frames. ], batch size: 87, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 19:59:06,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=734840.0, ans=0.125 +2024-09-19 19:59:24,610 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 19:59:24,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=734880.0, ans=0.0 +2024-09-19 19:59:26,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=734880.0, ans=0.0 +2024-09-19 19:59:29,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=734880.0, ans=0.125 +2024-09-19 19:59:38,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=734920.0, ans=0.0 +2024-09-19 20:00:02,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=735000.0, ans=0.125 +2024-09-19 20:00:03,699 INFO [train.py:1198] (1/2) Epoch 41, batch 2750, loss[loss=0.2242, ctc_loss=0.1053, cr_loss=0.3463, attn_decoder_loss=0.2297, over 29528.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.112, cr_loss=0.3528, attn_decoder_loss=0.2388, over 5796029.37 frames. ], batch size: 75, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:00:14,116 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.948e+01 8.495e+01 8.920e+01 9.727e+01 1.790e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 20:00:41,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=735080.0, ans=0.125 +2024-09-19 20:00:48,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=735080.0, ans=0.0 +2024-09-19 20:00:48,319 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:01:06,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=735160.0, ans=0.125 +2024-09-19 20:01:10,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=735160.0, ans=0.125 +2024-09-19 20:01:21,634 INFO [train.py:1198] (1/2) Epoch 41, batch 2800, loss[loss=0.2512, ctc_loss=0.1346, cr_loss=0.3698, attn_decoder_loss=0.2559, over 20050.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1119, cr_loss=0.3525, attn_decoder_loss=0.2388, over 5776678.42 frames. ], batch size: 209, lr: 2.68e-03, grad_scale: 32.0 +2024-09-19 20:01:26,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=735200.0, ans=0.125 +2024-09-19 20:01:29,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=735200.0, ans=0.2 +2024-09-19 20:01:42,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=735240.0, ans=0.2 +2024-09-19 20:01:42,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=735240.0, ans=0.0 +2024-09-19 20:01:48,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=735240.0, ans=0.125 +2024-09-19 20:01:55,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.31 vs. limit=10.0 +2024-09-19 20:01:56,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=735280.0, ans=0.125 +2024-09-19 20:01:58,526 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.70 vs. limit=22.5 +2024-09-19 20:02:33,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.44 vs. limit=15.0 +2024-09-19 20:02:38,651 INFO [train.py:1198] (1/2) Epoch 41, batch 2850, loss[loss=0.2257, ctc_loss=0.1097, cr_loss=0.3503, attn_decoder_loss=0.2308, over 29513.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1122, cr_loss=0.3527, attn_decoder_loss=0.2393, over 5762730.57 frames. ], batch size: 77, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:02:50,645 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.779e+01 8.742e+01 9.309e+01 1.007e+02 1.847e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-19 20:02:51,348 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.51 vs. limit=15.0 +2024-09-19 20:02:52,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=735440.0, ans=0.125 +2024-09-19 20:03:27,418 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=735520.0, ans=0.07 +2024-09-19 20:03:42,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=735560.0, ans=0.2 +2024-09-19 20:03:49,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=735560.0, ans=0.125 +2024-09-19 20:03:54,113 INFO [train.py:1198] (1/2) Epoch 41, batch 2900, loss[loss=0.2313, ctc_loss=0.1121, cr_loss=0.3627, attn_decoder_loss=0.2365, over 29418.00 frames. ], tot_loss[loss=0.2348, ctc_loss=0.113, cr_loss=0.3549, attn_decoder_loss=0.2405, over 5788047.65 frames. ], batch size: 79, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:04:04,012 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.22 vs. limit=15.0 +2024-09-19 20:04:26,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=735680.0, ans=0.0 +2024-09-19 20:04:39,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=735680.0, ans=0.125 +2024-09-19 20:04:46,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=735720.0, ans=0.125 +2024-09-19 20:04:49,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=735720.0, ans=0.125 +2024-09-19 20:05:12,045 INFO [train.py:1198] (1/2) Epoch 41, batch 2950, loss[loss=0.2269, ctc_loss=0.1073, cr_loss=0.3505, attn_decoder_loss=0.2325, over 29505.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1118, cr_loss=0.352, attn_decoder_loss=0.2391, over 5781544.82 frames. ], batch size: 75, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:05:21,630 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:05:24,173 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.467e+01 8.387e+01 8.869e+01 9.638e+01 2.369e+02, threshold=1.774e+02, percent-clipped=2.0 +2024-09-19 20:05:37,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=735840.0, ans=0.125 +2024-09-19 20:05:51,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=735880.0, ans=0.125 +2024-09-19 20:06:00,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=735920.0, ans=0.1 +2024-09-19 20:06:37,437 INFO [train.py:1198] (1/2) Epoch 41, batch 3000, loss[loss=0.238, ctc_loss=0.1172, cr_loss=0.3568, attn_decoder_loss=0.2435, over 29763.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.112, cr_loss=0.3523, attn_decoder_loss=0.239, over 5783179.33 frames. ], batch size: 81, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:06:37,438 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 20:06:55,723 INFO [train.py:1230] (1/2) Epoch 41, validation: loss=0.2123, ctc_loss=0.03697, cr_loss=6.466e-15, attn_decoder_loss=0.2318, over 944034.00 frames. +2024-09-19 20:06:55,723 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 20:07:09,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=736040.0, ans=0.125 +2024-09-19 20:07:13,430 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.15 vs. limit=15.0 +2024-09-19 20:07:14,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=736040.0, ans=0.07 +2024-09-19 20:07:24,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=736080.0, ans=0.2 +2024-09-19 20:07:43,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=736120.0, ans=0.0 +2024-09-19 20:07:43,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=736120.0, ans=0.125 +2024-09-19 20:08:13,489 INFO [train.py:1198] (1/2) Epoch 41, batch 3050, loss[loss=0.2273, ctc_loss=0.1122, cr_loss=0.3343, attn_decoder_loss=0.2326, over 29522.00 frames. ], tot_loss[loss=0.234, ctc_loss=0.1128, cr_loss=0.3537, attn_decoder_loss=0.2396, over 5776720.75 frames. ], batch size: 76, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:08:25,662 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.668e+01 9.193e+01 9.788e+01 2.004e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 20:08:33,531 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:08:53,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=736280.0, ans=0.0 +2024-09-19 20:09:28,914 INFO [train.py:1198] (1/2) Epoch 41, batch 3100, loss[loss=0.2392, ctc_loss=0.1062, cr_loss=0.341, attn_decoder_loss=0.2465, over 29191.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1125, cr_loss=0.3527, attn_decoder_loss=0.239, over 5776383.87 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:09:33,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=736400.0, ans=0.125 +2024-09-19 20:09:41,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=736400.0, ans=0.125 +2024-09-19 20:09:57,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=736480.0, ans=0.125 +2024-09-19 20:09:59,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=736480.0, ans=0.05 +2024-09-19 20:10:03,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.11 vs. limit=15.0 +2024-09-19 20:10:10,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=736480.0, ans=0.025 +2024-09-19 20:10:13,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=736480.0, ans=0.0 +2024-09-19 20:10:20,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.96 vs. limit=15.0 +2024-09-19 20:10:20,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.10 vs. limit=15.0 +2024-09-19 20:10:21,658 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.97 vs. limit=6.0 +2024-09-19 20:10:46,491 INFO [train.py:1198] (1/2) Epoch 41, batch 3150, loss[loss=0.232, ctc_loss=0.1046, cr_loss=0.3236, attn_decoder_loss=0.239, over 28904.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1123, cr_loss=0.3519, attn_decoder_loss=0.2389, over 5782329.63 frames. ], batch size: 104, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:10:46,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=736600.0, ans=0.125 +2024-09-19 20:10:48,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=736600.0, ans=0.2 +2024-09-19 20:10:54,355 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:10:58,490 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.553e+01 9.133e+01 9.719e+01 1.833e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 20:11:13,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=736640.0, ans=0.0 +2024-09-19 20:11:51,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=736760.0, ans=0.2 +2024-09-19 20:12:04,119 INFO [train.py:1198] (1/2) Epoch 41, batch 3200, loss[loss=0.2245, ctc_loss=0.1025, cr_loss=0.3304, attn_decoder_loss=0.2307, over 29404.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1117, cr_loss=0.3509, attn_decoder_loss=0.2384, over 5793201.48 frames. ], batch size: 79, lr: 2.68e-03, grad_scale: 32.0 +2024-09-19 20:12:11,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=736800.0, ans=0.125 +2024-09-19 20:13:11,781 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.30 vs. limit=6.0 +2024-09-19 20:13:19,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-19 20:13:20,190 INFO [train.py:1198] (1/2) Epoch 41, batch 3250, loss[loss=0.2411, ctc_loss=0.1162, cr_loss=0.3628, attn_decoder_loss=0.2469, over 29711.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1117, cr_loss=0.3512, attn_decoder_loss=0.2387, over 5800306.17 frames. ], batch size: 84, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:13:23,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=737000.0, ans=0.025 +2024-09-19 20:13:33,816 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 8.531e+01 9.147e+01 9.717e+01 1.259e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 20:13:40,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=737040.0, ans=0.07 +2024-09-19 20:13:46,423 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.90 vs. limit=12.0 +2024-09-19 20:13:51,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=737080.0, ans=0.125 +2024-09-19 20:14:10,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=737120.0, ans=0.0 +2024-09-19 20:14:13,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=737120.0, ans=0.125 +2024-09-19 20:14:34,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=737160.0, ans=0.0 +2024-09-19 20:14:37,460 INFO [train.py:1198] (1/2) Epoch 41, batch 3300, loss[loss=0.2361, ctc_loss=0.1063, cr_loss=0.327, attn_decoder_loss=0.2433, over 28744.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1106, cr_loss=0.3488, attn_decoder_loss=0.2373, over 5798729.28 frames. ], batch size: 112, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:14:52,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=737240.0, ans=0.07 +2024-09-19 20:14:54,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=737240.0, ans=0.05 +2024-09-19 20:15:25,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=737320.0, ans=10.0 +2024-09-19 20:15:30,245 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:15:34,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=737320.0, ans=0.0 +2024-09-19 20:15:40,786 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=737360.0, ans=0.0 +2024-09-19 20:15:54,701 INFO [train.py:1198] (1/2) Epoch 41, batch 3350, loss[loss=0.2508, ctc_loss=0.1228, cr_loss=0.3723, attn_decoder_loss=0.2567, over 28777.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1114, cr_loss=0.3504, attn_decoder_loss=0.2382, over 5775164.33 frames. ], batch size: 104, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:16:08,343 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.656e+01 9.093e+01 9.789e+01 1.911e+02, threshold=1.819e+02, percent-clipped=2.0 +2024-09-19 20:16:11,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=737440.0, ans=0.125 +2024-09-19 20:16:17,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.33 vs. limit=15.0 +2024-09-19 20:16:22,296 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=737440.0, ans=0.1 +2024-09-19 20:16:23,141 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.48 vs. limit=12.0 +2024-09-19 20:17:02,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=737560.0, ans=0.0 +2024-09-19 20:17:04,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=737560.0, ans=0.0 +2024-09-19 20:17:10,491 INFO [train.py:1198] (1/2) Epoch 41, batch 3400, loss[loss=0.2078, ctc_loss=0.09444, cr_loss=0.3091, attn_decoder_loss=0.2135, over 29327.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1119, cr_loss=0.3511, attn_decoder_loss=0.2384, over 5767799.50 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:17:59,758 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=737720.0, ans=0.2 +2024-09-19 20:17:59,980 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:18:04,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=737720.0, ans=0.1 +2024-09-19 20:18:10,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=737720.0, ans=0.0 +2024-09-19 20:18:28,095 INFO [train.py:1198] (1/2) Epoch 41, batch 3450, loss[loss=0.2475, ctc_loss=0.1145, cr_loss=0.3657, attn_decoder_loss=0.2541, over 28277.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1122, cr_loss=0.3522, attn_decoder_loss=0.2388, over 5776023.30 frames. ], batch size: 111, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:18:33,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=737800.0, ans=0.125 +2024-09-19 20:18:41,844 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.606e+01 8.497e+01 9.130e+01 9.574e+01 2.613e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-19 20:19:03,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=737880.0, ans=0.0 +2024-09-19 20:19:07,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=737880.0, ans=0.5 +2024-09-19 20:19:31,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=737960.0, ans=0.2 +2024-09-19 20:19:42,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=738000.0, ans=0.0 +2024-09-19 20:19:43,462 INFO [train.py:1198] (1/2) Epoch 41, batch 3500, loss[loss=0.2119, ctc_loss=0.09186, cr_loss=0.2994, attn_decoder_loss=0.2186, over 29320.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1117, cr_loss=0.3511, attn_decoder_loss=0.2383, over 5776607.81 frames. ], batch size: 71, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:20:03,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=738040.0, ans=0.1 +2024-09-19 20:20:09,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=738040.0, ans=0.0 +2024-09-19 20:20:17,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=15.0 +2024-09-19 20:20:59,910 INFO [train.py:1198] (1/2) Epoch 41, batch 3550, loss[loss=0.2392, ctc_loss=0.1088, cr_loss=0.348, attn_decoder_loss=0.2459, over 29712.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1117, cr_loss=0.3514, attn_decoder_loss=0.2383, over 5783319.43 frames. ], batch size: 89, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:21:13,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=738240.0, ans=0.05 +2024-09-19 20:21:14,689 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.523e+01 8.996e+01 9.507e+01 2.339e+02, threshold=1.799e+02, percent-clipped=2.0 +2024-09-19 20:21:24,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.85 vs. limit=15.0 +2024-09-19 20:21:32,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=738280.0, ans=0.0 +2024-09-19 20:21:48,126 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.28 vs. limit=15.0 +2024-09-19 20:22:14,209 INFO [train.py:1198] (1/2) Epoch 41, batch 3600, loss[loss=0.2142, ctc_loss=0.09365, cr_loss=0.3166, attn_decoder_loss=0.2206, over 29491.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3512, attn_decoder_loss=0.2385, over 5791759.11 frames. ], batch size: 77, lr: 2.68e-03, grad_scale: 16.0 +2024-09-19 20:22:17,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=738400.0, ans=0.0 +2024-09-19 20:22:34,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=738440.0, ans=0.1 +2024-09-19 20:22:38,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=738440.0, ans=0.0 +2024-09-19 20:22:39,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=738440.0, ans=0.0 +2024-09-19 20:22:41,227 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:22:52,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.whiten.whitening_limit, batch_count=738480.0, ans=15.0 +2024-09-19 20:23:02,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=738520.0, ans=0.1 +2024-09-19 20:23:30,388 INFO [train.py:1198] (1/2) Epoch 41, batch 3650, loss[loss=0.2478, ctc_loss=0.1279, cr_loss=0.3886, attn_decoder_loss=0.2525, over 29487.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3499, attn_decoder_loss=0.238, over 5794556.33 frames. ], batch size: 90, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:23:38,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=738600.0, ans=0.0 +2024-09-19 20:23:46,677 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.451e+01 9.065e+01 9.454e+01 1.125e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-19 20:23:47,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=738640.0, ans=15.0 +2024-09-19 20:23:54,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=738640.0, ans=0.0 +2024-09-19 20:24:02,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=738680.0, ans=0.125 +2024-09-19 20:24:09,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=738680.0, ans=0.125 +2024-09-19 20:24:16,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=738720.0, ans=0.025 +2024-09-19 20:24:28,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=738760.0, ans=0.1 +2024-09-19 20:24:36,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=12.0 +2024-09-19 20:24:44,921 INFO [train.py:1198] (1/2) Epoch 41, batch 3700, loss[loss=0.2412, ctc_loss=0.1205, cr_loss=0.38, attn_decoder_loss=0.2461, over 29715.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1115, cr_loss=0.3508, attn_decoder_loss=0.2383, over 5804600.84 frames. ], batch size: 84, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:25:05,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=738840.0, ans=0.125 +2024-09-19 20:25:41,696 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.64 vs. limit=12.0 +2024-09-19 20:25:47,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=738960.0, ans=0.2 +2024-09-19 20:25:54,885 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:25:58,931 INFO [train.py:1198] (1/2) Epoch 41, batch 3750, loss[loss=0.2155, ctc_loss=0.1015, cr_loss=0.3315, attn_decoder_loss=0.2208, over 29321.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1117, cr_loss=0.3516, attn_decoder_loss=0.2384, over 5808426.46 frames. ], batch size: 67, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:26:17,098 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.679e+01 8.549e+01 9.026e+01 9.637e+01 1.696e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-19 20:26:21,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=739040.0, ans=0.125 +2024-09-19 20:26:24,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=739040.0, ans=0.125 +2024-09-19 20:26:31,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=739080.0, ans=0.125 +2024-09-19 20:26:37,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=739080.0, ans=0.125 +2024-09-19 20:27:02,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=739160.0, ans=0.0 +2024-09-19 20:27:14,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.99 vs. limit=10.0 +2024-09-19 20:27:15,513 INFO [train.py:1198] (1/2) Epoch 41, batch 3800, loss[loss=0.2445, ctc_loss=0.1096, cr_loss=0.3461, attn_decoder_loss=0.2518, over 29641.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1112, cr_loss=0.3498, attn_decoder_loss=0.2379, over 5799075.17 frames. ], batch size: 86, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:27:15,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=739200.0, ans=0.125 +2024-09-19 20:27:17,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=739200.0, ans=0.025 +2024-09-19 20:27:57,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=739280.0, ans=0.125 +2024-09-19 20:28:14,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=739360.0, ans=0.0 +2024-09-19 20:28:23,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=739360.0, ans=0.125 +2024-09-19 20:28:30,234 INFO [train.py:1198] (1/2) Epoch 41, batch 3850, loss[loss=0.2388, ctc_loss=0.1086, cr_loss=0.3471, attn_decoder_loss=0.2456, over 29244.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1113, cr_loss=0.3504, attn_decoder_loss=0.2379, over 5812858.98 frames. ], batch size: 100, lr: 2.68e-03, grad_scale: 8.0 +2024-09-19 20:28:33,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=739400.0, ans=0.125 +2024-09-19 20:28:47,071 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.67 vs. limit=15.0 +2024-09-19 20:28:47,850 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.446e+01 9.109e+01 9.536e+01 1.999e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-19 20:28:54,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=739440.0, ans=0.125 +2024-09-19 20:29:09,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=739480.0, ans=0.125 +2024-09-19 20:29:17,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.92 vs. limit=15.0 +2024-09-19 20:29:27,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=5.35 vs. limit=15.0 +2024-09-19 20:29:37,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=739560.0, ans=0.1 +2024-09-19 20:29:40,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=739560.0, ans=0.125 +2024-09-19 20:29:46,230 INFO [train.py:1198] (1/2) Epoch 41, batch 3900, loss[loss=0.2326, ctc_loss=0.1078, cr_loss=0.3465, attn_decoder_loss=0.2388, over 29635.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2385, over 5817224.06 frames. ], batch size: 86, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:29:55,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=739600.0, ans=0.125 +2024-09-19 20:30:17,790 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.88 vs. limit=12.0 +2024-09-19 20:30:43,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=739760.0, ans=0.2 +2024-09-19 20:30:59,903 INFO [train.py:1198] (1/2) Epoch 41, batch 3950, loss[loss=0.2468, ctc_loss=0.1278, cr_loss=0.3811, attn_decoder_loss=0.2515, over 29500.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2384, over 5836341.28 frames. ], batch size: 97, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:31:02,472 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.28 vs. limit=15.0 +2024-09-19 20:31:09,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=739800.0, ans=0.1 +2024-09-19 20:31:16,082 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.077e+01 8.615e+01 9.061e+01 9.543e+01 2.103e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 20:31:42,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=739880.0, ans=0.125 +2024-09-19 20:31:44,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=739920.0, ans=0.2 +2024-09-19 20:31:54,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=739920.0, ans=0.2 +2024-09-19 20:32:07,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=739960.0, ans=0.1 +2024-09-19 20:32:08,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=739960.0, ans=0.125 +2024-09-19 20:32:15,352 INFO [train.py:1198] (1/2) Epoch 41, batch 4000, loss[loss=0.2079, ctc_loss=0.08718, cr_loss=0.299, attn_decoder_loss=0.2146, over 29514.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1114, cr_loss=0.351, attn_decoder_loss=0.2383, over 5813751.78 frames. ], batch size: 74, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:32:17,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=740000.0, ans=0.0 +2024-09-19 20:32:24,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=740000.0, ans=0.125 +2024-09-19 20:32:36,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=740040.0, ans=0.0 +2024-09-19 20:32:40,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=740040.0, ans=0.1 +2024-09-19 20:32:40,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=740040.0, ans=0.2 +2024-09-19 20:32:48,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=740080.0, ans=0.05 +2024-09-19 20:33:04,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=740120.0, ans=0.0 +2024-09-19 20:33:07,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=740120.0, ans=0.125 +2024-09-19 20:33:09,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=740120.0, ans=0.0 +2024-09-19 20:33:30,565 INFO [train.py:1198] (1/2) Epoch 41, batch 4050, loss[loss=0.243, ctc_loss=0.1266, cr_loss=0.3548, attn_decoder_loss=0.2481, over 20199.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3505, attn_decoder_loss=0.238, over 5797561.11 frames. ], batch size: 209, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:33:30,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=740200.0, ans=0.05 +2024-09-19 20:33:34,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.33 vs. limit=6.0 +2024-09-19 20:33:39,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=740200.0, ans=0.125 +2024-09-19 20:33:48,069 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.562e+01 8.566e+01 9.117e+01 9.789e+01 2.862e+02, threshold=1.823e+02, percent-clipped=4.0 +2024-09-19 20:33:51,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=740240.0, ans=0.1 +2024-09-19 20:33:54,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=740240.0, ans=0.2 +2024-09-19 20:33:54,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.12 vs. limit=22.5 +2024-09-19 20:33:57,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=740240.0, ans=0.125 +2024-09-19 20:34:05,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=740280.0, ans=0.125 +2024-09-19 20:34:15,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=740320.0, ans=0.2 +2024-09-19 20:34:17,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=740320.0, ans=0.125 +2024-09-19 20:34:32,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=740360.0, ans=0.125 +2024-09-19 20:34:43,681 INFO [train.py:1198] (1/2) Epoch 41, batch 4100, loss[loss=0.247, ctc_loss=0.1218, cr_loss=0.3772, attn_decoder_loss=0.2525, over 29484.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2382, over 5793147.05 frames. ], batch size: 90, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:35:03,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=740440.0, ans=0.2 +2024-09-19 20:35:08,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=740440.0, ans=0.1 +2024-09-19 20:35:10,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=740440.0, ans=0.125 +2024-09-19 20:35:13,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=740480.0, ans=0.0 +2024-09-19 20:35:20,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=740480.0, ans=0.125 +2024-09-19 20:35:32,669 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:35:37,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=740520.0, ans=0.025 +2024-09-19 20:35:41,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=740560.0, ans=0.05 +2024-09-19 20:35:48,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=740560.0, ans=0.1 +2024-09-19 20:35:57,570 INFO [train.py:1198] (1/2) Epoch 41, batch 4150, loss[loss=0.231, ctc_loss=0.1144, cr_loss=0.369, attn_decoder_loss=0.2357, over 29519.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.2381, over 5798935.43 frames. ], batch size: 77, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:36:02,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=740600.0, ans=0.04949747468305833 +2024-09-19 20:36:07,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=740600.0, ans=0.0 +2024-09-19 20:36:16,236 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.325e+01 8.604e+01 9.031e+01 9.625e+01 1.845e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 20:36:16,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=740640.0, ans=0.1 +2024-09-19 20:36:17,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=740640.0, ans=0.2 +2024-09-19 20:36:37,519 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.59 vs. limit=12.0 +2024-09-19 20:36:50,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=740720.0, ans=0.125 +2024-09-19 20:37:12,305 INFO [train.py:1198] (1/2) Epoch 41, batch 4200, loss[loss=0.2481, ctc_loss=0.1284, cr_loss=0.387, attn_decoder_loss=0.2528, over 29512.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1116, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5800904.55 frames. ], batch size: 90, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:37:12,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=740800.0, ans=0.125 +2024-09-19 20:37:15,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-19 20:37:18,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=740800.0, ans=0.125 +2024-09-19 20:37:23,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=740800.0, ans=0.0 +2024-09-19 20:37:46,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=740880.0, ans=0.07 +2024-09-19 20:37:54,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=740880.0, ans=0.1 +2024-09-19 20:38:00,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=740920.0, ans=0.1 +2024-09-19 20:38:22,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=740960.0, ans=0.1 +2024-09-19 20:38:26,687 INFO [train.py:1198] (1/2) Epoch 41, batch 4250, loss[loss=0.2195, ctc_loss=0.09964, cr_loss=0.3345, attn_decoder_loss=0.2254, over 29521.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1114, cr_loss=0.3511, attn_decoder_loss=0.2387, over 5806773.52 frames. ], batch size: 74, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:38:36,341 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.67 vs. limit=15.0 +2024-09-19 20:38:42,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=741040.0, ans=0.2 +2024-09-19 20:38:44,064 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.725e+01 8.665e+01 9.196e+01 9.683e+01 5.015e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-19 20:38:46,475 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.57 vs. limit=15.0 +2024-09-19 20:38:59,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=741080.0, ans=0.125 +2024-09-19 20:39:40,136 INFO [train.py:1198] (1/2) Epoch 41, batch 4300, loss[loss=0.2391, ctc_loss=0.113, cr_loss=0.3482, attn_decoder_loss=0.2453, over 29560.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1114, cr_loss=0.3509, attn_decoder_loss=0.2389, over 5796639.75 frames. ], batch size: 87, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:40:00,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=741240.0, ans=0.2 +2024-09-19 20:40:00,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=741240.0, ans=0.0 +2024-09-19 20:40:06,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=741240.0, ans=0.025 +2024-09-19 20:40:08,984 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=15.0 +2024-09-19 20:40:40,299 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:40:51,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=741360.0, ans=0.125 +2024-09-19 20:40:55,593 INFO [train.py:1198] (1/2) Epoch 41, batch 4350, loss[loss=0.238, ctc_loss=0.1196, cr_loss=0.3773, attn_decoder_loss=0.2428, over 29478.00 frames. ], tot_loss[loss=0.2362, ctc_loss=0.114, cr_loss=0.3558, attn_decoder_loss=0.2419, over 5799069.64 frames. ], batch size: 97, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:40:56,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.11 vs. limit=22.5 +2024-09-19 20:41:13,113 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.893e+01 9.255e+01 9.747e+01 1.701e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-19 20:41:24,431 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.06 vs. limit=8.0 +2024-09-19 20:41:37,983 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=741520.0, ans=0.025 +2024-09-19 20:41:37,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=741520.0, ans=0.0 +2024-09-19 20:41:38,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.31 vs. limit=10.0 +2024-09-19 20:42:01,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=741560.0, ans=0.125 +2024-09-19 20:42:01,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.07 vs. limit=6.0 +2024-09-19 20:42:02,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=741560.0, ans=0.125 +2024-09-19 20:42:03,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-19 20:42:08,700 INFO [train.py:1198] (1/2) Epoch 41, batch 4400, loss[loss=0.2409, ctc_loss=0.1157, cr_loss=0.3485, attn_decoder_loss=0.247, over 27114.00 frames. ], tot_loss[loss=0.2381, ctc_loss=0.115, cr_loss=0.3581, attn_decoder_loss=0.2438, over 5767679.77 frames. ], batch size: 124, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:42:13,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=741600.0, ans=0.125 +2024-09-19 20:42:17,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=741600.0, ans=0.1 +2024-09-19 20:42:26,237 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=741640.0, ans=0.1 +2024-09-19 20:42:35,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=741640.0, ans=0.09899494936611666 +2024-09-19 20:42:40,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.90 vs. limit=10.0 +2024-09-19 20:42:56,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=741720.0, ans=0.125 +2024-09-19 20:43:01,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=741720.0, ans=0.0 +2024-09-19 20:43:03,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=741720.0, ans=0.025 +2024-09-19 20:43:05,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=741720.0, ans=0.0 +2024-09-19 20:43:23,296 INFO [train.py:1198] (1/2) Epoch 41, batch 4450, loss[loss=0.2529, ctc_loss=0.1365, cr_loss=0.3755, attn_decoder_loss=0.2575, over 20941.00 frames. ], tot_loss[loss=0.2406, ctc_loss=0.1188, cr_loss=0.3636, attn_decoder_loss=0.246, over 5577406.70 frames. ], batch size: 209, lr: 2.67e-03, grad_scale: 16.0 +2024-09-19 20:43:25,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=741800.0, ans=0.125 +2024-09-19 20:43:34,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=741800.0, ans=0.125 +2024-09-19 20:43:41,190 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.090e+01 9.304e+01 9.971e+01 1.121e+02 2.265e+02, threshold=1.994e+02, percent-clipped=2.0 +2024-09-19 20:43:53,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=741880.0, ans=0.0 +2024-09-19 20:44:20,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=741920.0, ans=0.0 +2024-09-19 20:44:38,332 INFO [train.py:1198] (1/2) Epoch 41, batch 4500, loss[loss=0.2469, ctc_loss=0.1274, cr_loss=0.3532, attn_decoder_loss=0.2523, over 20509.00 frames. ], tot_loss[loss=0.2426, ctc_loss=0.1218, cr_loss=0.3664, attn_decoder_loss=0.2479, over 5237844.78 frames. ], batch size: 209, lr: 2.67e-03, grad_scale: 8.0 +2024-09-19 20:44:57,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=742040.0, ans=0.125 +2024-09-19 20:45:02,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=742040.0, ans=0.125 +2024-09-19 20:45:08,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=742080.0, ans=0.2 +2024-09-19 20:46:06,176 INFO [train.py:1198] (1/2) Epoch 42, batch 0, loss[loss=0.2088, ctc_loss=0.08769, cr_loss=0.2949, attn_decoder_loss=0.2157, over 29626.00 frames. ], tot_loss[loss=0.2088, ctc_loss=0.08769, cr_loss=0.2949, attn_decoder_loss=0.2157, over 29626.00 frames. ], batch size: 73, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:46:06,177 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 20:46:24,582 INFO [train.py:1230] (1/2) Epoch 42, validation: loss=0.2127, ctc_loss=0.03579, cr_loss=6.428e-15, attn_decoder_loss=0.2324, over 944034.00 frames. +2024-09-19 20:46:24,582 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 20:46:35,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=742100.0, ans=0.125 +2024-09-19 20:46:36,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=742100.0, ans=0.0 +2024-09-19 20:46:41,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.90 vs. limit=15.0 +2024-09-19 20:46:44,488 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:46:48,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=742140.0, ans=0.2 +2024-09-19 20:47:14,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=742220.0, ans=0.0 +2024-09-19 20:47:21,852 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 9.381e+01 1.084e+02 1.178e+02 1.554e+02, threshold=2.167e+02, percent-clipped=0.0 +2024-09-19 20:47:42,178 INFO [train.py:1198] (1/2) Epoch 42, batch 50, loss[loss=0.2068, ctc_loss=0.09229, cr_loss=0.3075, attn_decoder_loss=0.2126, over 29437.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1129, cr_loss=0.3539, attn_decoder_loss=0.2386, over 1268336.09 frames. ], batch size: 70, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:47:44,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=742300.0, ans=0.1 +2024-09-19 20:48:26,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.58 vs. limit=10.0 +2024-09-19 20:48:59,796 INFO [train.py:1198] (1/2) Epoch 42, batch 100, loss[loss=0.212, ctc_loss=0.09169, cr_loss=0.3002, attn_decoder_loss=0.2187, over 29517.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1149, cr_loss=0.3588, attn_decoder_loss=0.241, over 2253027.65 frames. ], batch size: 76, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:49:00,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=742500.0, ans=0.125 +2024-09-19 20:49:05,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=742500.0, ans=0.125 +2024-09-19 20:49:49,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=742620.0, ans=0.125 +2024-09-19 20:49:56,419 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.813e+01 8.687e+01 8.987e+01 9.639e+01 1.254e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-19 20:50:05,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=742660.0, ans=0.0 +2024-09-19 20:50:14,291 INFO [train.py:1198] (1/2) Epoch 42, batch 150, loss[loss=0.2106, ctc_loss=0.09843, cr_loss=0.3283, attn_decoder_loss=0.2158, over 29422.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1129, cr_loss=0.3546, attn_decoder_loss=0.2387, over 3047416.37 frames. ], batch size: 70, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:50:18,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=742700.0, ans=0.025 +2024-09-19 20:50:21,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.12 vs. limit=15.0 +2024-09-19 20:50:34,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=742740.0, ans=0.1 +2024-09-19 20:50:43,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=742780.0, ans=0.1 +2024-09-19 20:50:55,018 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:51:00,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.35 vs. limit=22.5 +2024-09-19 20:51:01,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=742820.0, ans=0.125 +2024-09-19 20:51:05,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=742820.0, ans=0.125 +2024-09-19 20:51:07,389 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.66 vs. limit=10.0 +2024-09-19 20:51:17,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=742860.0, ans=0.0 +2024-09-19 20:51:31,400 INFO [train.py:1198] (1/2) Epoch 42, batch 200, loss[loss=0.2363, ctc_loss=0.1174, cr_loss=0.3579, attn_decoder_loss=0.2415, over 27418.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1129, cr_loss=0.3549, attn_decoder_loss=0.2383, over 3659427.52 frames. ], batch size: 125, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:51:56,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.37 vs. limit=6.0 +2024-09-19 20:52:20,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=743020.0, ans=0.1 +2024-09-19 20:52:27,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.26 vs. limit=15.0 +2024-09-19 20:52:31,004 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.655e+01 8.542e+01 9.078e+01 9.443e+01 1.255e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 20:52:47,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.50 vs. limit=6.0 +2024-09-19 20:52:49,279 INFO [train.py:1198] (1/2) Epoch 42, batch 250, loss[loss=0.249, ctc_loss=0.1191, cr_loss=0.3578, attn_decoder_loss=0.2555, over 29340.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1122, cr_loss=0.3523, attn_decoder_loss=0.2381, over 4141075.45 frames. ], batch size: 100, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:53:07,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=743140.0, ans=0.0 +2024-09-19 20:53:07,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=743140.0, ans=10.0 +2024-09-19 20:53:21,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=743180.0, ans=0.1 +2024-09-19 20:53:29,869 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.41 vs. limit=6.0 +2024-09-19 20:53:39,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=743220.0, ans=0.125 +2024-09-19 20:53:45,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=743220.0, ans=0.125 +2024-09-19 20:54:02,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=743260.0, ans=0.1 +2024-09-19 20:54:02,741 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.94 vs. limit=12.0 +2024-09-19 20:54:04,815 INFO [train.py:1198] (1/2) Epoch 42, batch 300, loss[loss=0.2539, ctc_loss=0.1263, cr_loss=0.391, attn_decoder_loss=0.2594, over 29514.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1117, cr_loss=0.3512, attn_decoder_loss=0.238, over 4510559.09 frames. ], batch size: 92, lr: 2.64e-03, grad_scale: 16.0 +2024-09-19 20:54:05,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=743300.0, ans=0.0 +2024-09-19 20:54:13,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.27 vs. limit=6.0 +2024-09-19 20:54:21,355 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.84 vs. limit=10.0 +2024-09-19 20:54:25,429 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.31 vs. limit=15.0 +2024-09-19 20:54:26,519 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:54:35,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=743380.0, ans=0.125 +2024-09-19 20:54:36,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=743380.0, ans=0.0 +2024-09-19 20:54:39,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=743380.0, ans=0.125 +2024-09-19 20:54:49,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten.whitening_limit, batch_count=743420.0, ans=15.0 +2024-09-19 20:54:58,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=743420.0, ans=0.95 +2024-09-19 20:55:03,821 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.584e+01 8.625e+01 9.047e+01 9.646e+01 1.583e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 20:55:08,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=743460.0, ans=0.2 +2024-09-19 20:55:10,670 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.52 vs. limit=15.0 +2024-09-19 20:55:22,744 INFO [train.py:1198] (1/2) Epoch 42, batch 350, loss[loss=0.2177, ctc_loss=0.102, cr_loss=0.3325, attn_decoder_loss=0.2232, over 29319.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1121, cr_loss=0.3525, attn_decoder_loss=0.2386, over 4795341.91 frames. ], batch size: 71, lr: 2.64e-03, grad_scale: 8.0 +2024-09-19 20:55:45,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=743540.0, ans=0.125 +2024-09-19 20:56:20,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=743620.0, ans=0.2 +2024-09-19 20:56:24,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=743660.0, ans=0.125 +2024-09-19 20:56:25,571 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 20:56:40,161 INFO [train.py:1198] (1/2) Epoch 42, batch 400, loss[loss=0.2368, ctc_loss=0.1161, cr_loss=0.3633, attn_decoder_loss=0.2422, over 29722.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1117, cr_loss=0.3523, attn_decoder_loss=0.2384, over 5025086.51 frames. ], batch size: 82, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:56:43,956 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.23 vs. limit=15.0 +2024-09-19 20:57:03,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.min_abs, batch_count=743740.0, ans=0.5 +2024-09-19 20:57:03,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=743740.0, ans=0.125 +2024-09-19 20:57:29,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=743820.0, ans=0.1 +2024-09-19 20:57:39,491 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.837e+01 8.484e+01 8.956e+01 9.498e+01 1.659e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 20:57:56,181 INFO [train.py:1198] (1/2) Epoch 42, batch 450, loss[loss=0.244, ctc_loss=0.1131, cr_loss=0.3522, attn_decoder_loss=0.2507, over 29691.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1118, cr_loss=0.3522, attn_decoder_loss=0.2386, over 5189019.73 frames. ], batch size: 83, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:57:57,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=743900.0, ans=0.1 +2024-09-19 20:58:11,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=743940.0, ans=0.2 +2024-09-19 20:58:20,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=743940.0, ans=0.05 +2024-09-19 20:58:39,532 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.45 vs. limit=15.0 +2024-09-19 20:58:46,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=744020.0, ans=0.125 +2024-09-19 20:59:10,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=744100.0, ans=0.125 +2024-09-19 20:59:12,127 INFO [train.py:1198] (1/2) Epoch 42, batch 500, loss[loss=0.254, ctc_loss=0.1316, cr_loss=0.3961, attn_decoder_loss=0.2588, over 29432.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1112, cr_loss=0.3514, attn_decoder_loss=0.2379, over 5332192.35 frames. ], batch size: 94, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 20:59:38,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=744140.0, ans=0.1 +2024-09-19 20:59:43,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=744180.0, ans=0.125 +2024-09-19 20:59:51,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=744180.0, ans=0.125 +2024-09-19 21:00:10,291 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:00:13,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=744220.0, ans=22.5 +2024-09-19 21:00:15,763 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.239e+01 8.359e+01 8.854e+01 9.452e+01 4.385e+02, threshold=1.771e+02, percent-clipped=2.0 +2024-09-19 21:00:32,313 INFO [train.py:1198] (1/2) Epoch 42, batch 550, loss[loss=0.2484, ctc_loss=0.1155, cr_loss=0.3618, attn_decoder_loss=0.2551, over 28901.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1112, cr_loss=0.3514, attn_decoder_loss=0.2382, over 5425083.12 frames. ], batch size: 104, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:00:32,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=744300.0, ans=0.0 +2024-09-19 21:00:46,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=744340.0, ans=0.0 +2024-09-19 21:01:08,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=744380.0, ans=0.1 +2024-09-19 21:01:13,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=744380.0, ans=0.2 +2024-09-19 21:01:13,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=744380.0, ans=0.2 +2024-09-19 21:01:25,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=744420.0, ans=0.125 +2024-09-19 21:01:36,039 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:01:47,812 INFO [train.py:1198] (1/2) Epoch 42, batch 600, loss[loss=0.2497, ctc_loss=0.1261, cr_loss=0.378, attn_decoder_loss=0.255, over 29278.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1113, cr_loss=0.3513, attn_decoder_loss=0.2383, over 5509644.60 frames. ], batch size: 100, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:01:49,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=744500.0, ans=0.125 +2024-09-19 21:02:03,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=744540.0, ans=0.125 +2024-09-19 21:02:03,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=744540.0, ans=0.125 +2024-09-19 21:02:18,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=744580.0, ans=0.2 +2024-09-19 21:02:19,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=744580.0, ans=0.0 +2024-09-19 21:02:22,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=744580.0, ans=0.125 +2024-09-19 21:02:47,680 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.362e+01 8.400e+01 8.795e+01 9.486e+01 1.602e+02, threshold=1.759e+02, percent-clipped=0.0 +2024-09-19 21:02:59,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.14 vs. limit=10.0 +2024-09-19 21:03:01,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=744700.0, ans=0.0 +2024-09-19 21:03:02,680 INFO [train.py:1198] (1/2) Epoch 42, batch 650, loss[loss=0.2397, ctc_loss=0.1069, cr_loss=0.3559, attn_decoder_loss=0.2466, over 29771.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1104, cr_loss=0.3493, attn_decoder_loss=0.2376, over 5586716.15 frames. ], batch size: 81, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:03:24,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=744740.0, ans=0.125 +2024-09-19 21:03:27,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=744740.0, ans=0.0 +2024-09-19 21:03:30,665 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=744740.0, ans=0.125 +2024-09-19 21:03:40,700 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.64 vs. limit=15.0 +2024-09-19 21:03:48,238 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.90 vs. limit=15.0 +2024-09-19 21:03:49,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=744820.0, ans=0.125 +2024-09-19 21:03:53,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=744820.0, ans=0.2 +2024-09-19 21:03:55,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.78 vs. limit=15.0 +2024-09-19 21:03:58,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=744820.0, ans=0.2 +2024-09-19 21:04:03,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=744820.0, ans=0.2 +2024-09-19 21:04:06,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=744860.0, ans=0.2 +2024-09-19 21:04:20,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=744860.0, ans=0.0 +2024-09-19 21:04:21,510 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=744900.0, ans=0.0 +2024-09-19 21:04:22,725 INFO [train.py:1198] (1/2) Epoch 42, batch 700, loss[loss=0.222, ctc_loss=0.1015, cr_loss=0.3249, attn_decoder_loss=0.2282, over 29534.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1109, cr_loss=0.3501, attn_decoder_loss=0.2382, over 5637869.57 frames. ], batch size: 76, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:04:33,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=744900.0, ans=0.125 +2024-09-19 21:04:37,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=744940.0, ans=0.1 +2024-09-19 21:04:39,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=744940.0, ans=0.125 +2024-09-19 21:04:53,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=744980.0, ans=0.125 +2024-09-19 21:05:15,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=745020.0, ans=0.125 +2024-09-19 21:05:17,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=17.59 vs. limit=22.5 +2024-09-19 21:05:23,212 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.314e+01 8.486e+01 9.011e+01 9.700e+01 3.654e+02, threshold=1.802e+02, percent-clipped=4.0 +2024-09-19 21:05:37,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=745100.0, ans=0.0 +2024-09-19 21:05:38,321 INFO [train.py:1198] (1/2) Epoch 42, batch 750, loss[loss=0.2355, ctc_loss=0.1143, cr_loss=0.3571, attn_decoder_loss=0.241, over 29703.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1111, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5675068.91 frames. ], batch size: 82, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:06:49,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=745260.0, ans=10.0 +2024-09-19 21:06:53,479 INFO [train.py:1198] (1/2) Epoch 42, batch 800, loss[loss=0.2178, ctc_loss=0.1048, cr_loss=0.3255, attn_decoder_loss=0.2231, over 29614.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1108, cr_loss=0.3497, attn_decoder_loss=0.2379, over 5707748.31 frames. ], batch size: 73, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:06:55,430 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:07:12,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.07 vs. limit=22.5 +2024-09-19 21:07:50,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=745420.0, ans=0.2 +2024-09-19 21:07:50,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=745420.0, ans=0.2 +2024-09-19 21:07:59,494 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.380e+01 8.594e+01 9.081e+01 9.628e+01 1.457e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 21:07:59,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=745460.0, ans=0.0 +2024-09-19 21:08:12,816 INFO [train.py:1198] (1/2) Epoch 42, batch 850, loss[loss=0.2429, ctc_loss=0.1188, cr_loss=0.3602, attn_decoder_loss=0.2487, over 29696.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1108, cr_loss=0.3496, attn_decoder_loss=0.2377, over 5736496.83 frames. ], batch size: 89, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:08:36,985 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=745540.0, ans=0.0 +2024-09-19 21:08:37,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=745540.0, ans=0.125 +2024-09-19 21:08:38,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=745540.0, ans=0.1 +2024-09-19 21:08:43,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=745580.0, ans=0.125 +2024-09-19 21:08:46,830 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.29 vs. limit=15.0 +2024-09-19 21:08:54,035 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:08:57,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=745620.0, ans=0.125 +2024-09-19 21:09:20,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=745660.0, ans=0.0 +2024-09-19 21:09:20,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=745660.0, ans=0.125 +2024-09-19 21:09:27,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=745700.0, ans=0.025 +2024-09-19 21:09:28,709 INFO [train.py:1198] (1/2) Epoch 42, batch 900, loss[loss=0.2148, ctc_loss=0.1008, cr_loss=0.3316, attn_decoder_loss=0.22, over 29641.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3501, attn_decoder_loss=0.238, over 5739805.95 frames. ], batch size: 73, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:09:31,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=745700.0, ans=0.125 +2024-09-19 21:09:43,354 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.88 vs. limit=15.0 +2024-09-19 21:09:47,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=745740.0, ans=0.0 +2024-09-19 21:09:47,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=745740.0, ans=0.0 +2024-09-19 21:09:59,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.77 vs. limit=12.0 +2024-09-19 21:10:03,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=745780.0, ans=0.07 +2024-09-19 21:10:08,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=745780.0, ans=0.0 +2024-09-19 21:10:09,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=745780.0, ans=0.0 +2024-09-19 21:10:20,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=745820.0, ans=0.0 +2024-09-19 21:10:23,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=745820.0, ans=0.125 +2024-09-19 21:10:30,382 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.128e+01 8.573e+01 9.060e+01 9.874e+01 1.680e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-19 21:10:30,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=745860.0, ans=0.0 +2024-09-19 21:10:30,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=745860.0, ans=0.025 +2024-09-19 21:10:43,718 INFO [train.py:1198] (1/2) Epoch 42, batch 950, loss[loss=0.2201, ctc_loss=0.09788, cr_loss=0.3177, attn_decoder_loss=0.2267, over 29499.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1109, cr_loss=0.3501, attn_decoder_loss=0.238, over 5741299.25 frames. ], batch size: 74, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:12:03,076 INFO [train.py:1198] (1/2) Epoch 42, batch 1000, loss[loss=0.2321, ctc_loss=0.1045, cr_loss=0.3407, attn_decoder_loss=0.2387, over 29487.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1118, cr_loss=0.3514, attn_decoder_loss=0.2388, over 5734697.71 frames. ], batch size: 77, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:12:09,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=746100.0, ans=0.0 +2024-09-19 21:12:20,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=746140.0, ans=0.2 +2024-09-19 21:12:59,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=746220.0, ans=0.125 +2024-09-19 21:13:05,355 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.540e+01 9.060e+01 9.719e+01 2.106e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 21:13:05,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=746260.0, ans=0.1 +2024-09-19 21:13:05,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=746260.0, ans=0.0 +2024-09-19 21:13:17,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=746300.0, ans=0.125 +2024-09-19 21:13:18,996 INFO [train.py:1198] (1/2) Epoch 42, batch 1050, loss[loss=0.24, ctc_loss=0.1157, cr_loss=0.3664, attn_decoder_loss=0.2457, over 29673.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2384, over 5742876.69 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:13:25,867 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.76 vs. limit=6.0 +2024-09-19 21:13:26,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=746300.0, ans=0.0 +2024-09-19 21:13:26,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=746300.0, ans=0.0 +2024-09-19 21:13:32,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=746340.0, ans=0.125 +2024-09-19 21:13:34,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=746340.0, ans=0.0 +2024-09-19 21:14:04,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.01 vs. limit=15.0 +2024-09-19 21:14:10,524 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.54 vs. limit=15.0 +2024-09-19 21:14:33,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=746500.0, ans=0.1 +2024-09-19 21:14:35,084 INFO [train.py:1198] (1/2) Epoch 42, batch 1100, loss[loss=0.2317, ctc_loss=0.1107, cr_loss=0.3457, attn_decoder_loss=0.2374, over 29442.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1116, cr_loss=0.3511, attn_decoder_loss=0.2382, over 5755304.48 frames. ], batch size: 78, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:14:39,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=746500.0, ans=0.125 +2024-09-19 21:15:00,083 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:15:07,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=746580.0, ans=0.125 +2024-09-19 21:15:22,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=746620.0, ans=0.0 +2024-09-19 21:15:31,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=746620.0, ans=0.0 +2024-09-19 21:15:39,239 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.586e+01 9.042e+01 9.812e+01 2.400e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 21:15:41,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=746660.0, ans=0.0 +2024-09-19 21:15:46,324 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.22 vs. limit=15.0 +2024-09-19 21:15:50,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=746660.0, ans=0.1 +2024-09-19 21:15:55,115 INFO [train.py:1198] (1/2) Epoch 42, batch 1150, loss[loss=0.2382, ctc_loss=0.114, cr_loss=0.35, attn_decoder_loss=0.2442, over 29434.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1118, cr_loss=0.3519, attn_decoder_loss=0.2383, over 5753998.46 frames. ], batch size: 78, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:15:55,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=746700.0, ans=0.1 +2024-09-19 21:16:10,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=746740.0, ans=0.2 +2024-09-19 21:16:17,250 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.89 vs. limit=22.5 +2024-09-19 21:16:21,965 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.92 vs. limit=22.5 +2024-09-19 21:16:30,335 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:16:31,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=746780.0, ans=0.0 +2024-09-19 21:16:38,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.41 vs. limit=15.0 +2024-09-19 21:16:51,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=746820.0, ans=0.0 +2024-09-19 21:17:03,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=746860.0, ans=0.125 +2024-09-19 21:17:06,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=746860.0, ans=0.0 +2024-09-19 21:17:10,793 INFO [train.py:1198] (1/2) Epoch 42, batch 1200, loss[loss=0.2401, ctc_loss=0.1182, cr_loss=0.3722, attn_decoder_loss=0.2454, over 29673.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1122, cr_loss=0.3527, attn_decoder_loss=0.2388, over 5746445.41 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:17:11,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=746900.0, ans=0.125 +2024-09-19 21:17:23,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=746900.0, ans=0.95 +2024-09-19 21:17:24,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=746940.0, ans=0.125 +2024-09-19 21:17:33,966 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:17:52,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=746980.0, ans=0.07 +2024-09-19 21:17:56,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=747020.0, ans=0.125 +2024-09-19 21:17:58,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=747020.0, ans=0.0 +2024-09-19 21:17:59,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=747020.0, ans=0.0 +2024-09-19 21:18:11,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=747060.0, ans=0.95 +2024-09-19 21:18:13,018 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.675e+01 9.072e+01 9.806e+01 1.661e+02, threshold=1.814e+02, percent-clipped=1.0 +2024-09-19 21:18:26,676 INFO [train.py:1198] (1/2) Epoch 42, batch 1250, loss[loss=0.2518, ctc_loss=0.1338, cr_loss=0.4065, attn_decoder_loss=0.2559, over 29503.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1125, cr_loss=0.3539, attn_decoder_loss=0.2394, over 5774220.85 frames. ], batch size: 92, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:18:47,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=747140.0, ans=0.0 +2024-09-19 21:18:47,772 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.75 vs. limit=15.0 +2024-09-19 21:18:53,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=747140.0, ans=0.125 +2024-09-19 21:19:04,796 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.73 vs. limit=15.0 +2024-09-19 21:19:25,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-19 21:19:35,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.94 vs. limit=15.0 +2024-09-19 21:19:47,433 INFO [train.py:1198] (1/2) Epoch 42, batch 1300, loss[loss=0.2402, ctc_loss=0.1166, cr_loss=0.355, attn_decoder_loss=0.246, over 28282.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1114, cr_loss=0.3519, attn_decoder_loss=0.2383, over 5779229.61 frames. ], batch size: 111, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:20:10,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=747340.0, ans=0.125 +2024-09-19 21:20:22,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=747380.0, ans=0.1 +2024-09-19 21:20:41,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=747420.0, ans=0.125 +2024-09-19 21:20:50,663 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.269e+01 8.538e+01 9.081e+01 9.476e+01 1.507e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-19 21:21:02,971 INFO [train.py:1198] (1/2) Epoch 42, batch 1350, loss[loss=0.2321, ctc_loss=0.1117, cr_loss=0.3413, attn_decoder_loss=0.2379, over 29762.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1111, cr_loss=0.3515, attn_decoder_loss=0.2381, over 5796262.74 frames. ], batch size: 81, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:21:09,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=747500.0, ans=0.1 +2024-09-19 21:21:12,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=747500.0, ans=0.1 +2024-09-19 21:21:29,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=747540.0, ans=0.125 +2024-09-19 21:21:47,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=747620.0, ans=0.125 +2024-09-19 21:21:52,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=747620.0, ans=0.1 +2024-09-19 21:21:52,920 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.95 vs. limit=6.0 +2024-09-19 21:22:02,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=747660.0, ans=0.125 +2024-09-19 21:22:13,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=747660.0, ans=0.125 +2024-09-19 21:22:15,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=747660.0, ans=0.0 +2024-09-19 21:22:17,774 INFO [train.py:1198] (1/2) Epoch 42, batch 1400, loss[loss=0.2057, ctc_loss=0.09281, cr_loss=0.3121, attn_decoder_loss=0.2113, over 29598.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.111, cr_loss=0.3517, attn_decoder_loss=0.2381, over 5807748.74 frames. ], batch size: 69, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:22:31,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=747740.0, ans=0.125 +2024-09-19 21:22:48,936 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:23:02,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=747780.0, ans=0.125 +2024-09-19 21:23:10,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=747820.0, ans=0.1 +2024-09-19 21:23:23,217 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.728e+01 8.442e+01 9.058e+01 9.585e+01 2.575e+02, threshold=1.812e+02, percent-clipped=1.0 +2024-09-19 21:23:30,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=747860.0, ans=0.5 +2024-09-19 21:23:35,309 INFO [train.py:1198] (1/2) Epoch 42, batch 1450, loss[loss=0.2507, ctc_loss=0.1239, cr_loss=0.3734, attn_decoder_loss=0.2565, over 29438.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.111, cr_loss=0.3512, attn_decoder_loss=0.2384, over 5805270.47 frames. ], batch size: 94, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:23:40,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=747900.0, ans=0.125 +2024-09-19 21:23:41,547 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=747900.0, ans=0.1 +2024-09-19 21:23:48,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=747900.0, ans=0.125 +2024-09-19 21:24:01,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=747940.0, ans=0.0 +2024-09-19 21:24:30,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=748020.0, ans=0.125 +2024-09-19 21:24:32,992 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.93 vs. limit=12.0 +2024-09-19 21:24:39,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=748060.0, ans=0.1 +2024-09-19 21:24:53,278 INFO [train.py:1198] (1/2) Epoch 42, batch 1500, loss[loss=0.2346, ctc_loss=0.1046, cr_loss=0.3213, attn_decoder_loss=0.2419, over 29606.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1109, cr_loss=0.3512, attn_decoder_loss=0.2388, over 5806535.61 frames. ], batch size: 86, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:24:56,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=748100.0, ans=0.1 +2024-09-19 21:25:33,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=748180.0, ans=0.125 +2024-09-19 21:25:51,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=748220.0, ans=0.0 +2024-09-19 21:25:53,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=748260.0, ans=0.025 +2024-09-19 21:25:56,568 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:25:57,687 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 8.590e+01 8.992e+01 9.499e+01 3.130e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 21:26:05,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=748260.0, ans=0.125 +2024-09-19 21:26:07,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=748260.0, ans=0.1 +2024-09-19 21:26:09,799 INFO [train.py:1198] (1/2) Epoch 42, batch 1550, loss[loss=0.2546, ctc_loss=0.1265, cr_loss=0.4009, attn_decoder_loss=0.2599, over 29488.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1115, cr_loss=0.3521, attn_decoder_loss=0.2387, over 5781523.24 frames. ], batch size: 90, lr: 2.63e-03, grad_scale: 8.0 +2024-09-19 21:26:16,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=748300.0, ans=0.125 +2024-09-19 21:26:27,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=748340.0, ans=0.1 +2024-09-19 21:26:27,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=748340.0, ans=0.07 +2024-09-19 21:26:36,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=748340.0, ans=0.2 +2024-09-19 21:26:49,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=748380.0, ans=0.125 +2024-09-19 21:27:01,645 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:27:04,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=748420.0, ans=0.0 +2024-09-19 21:27:09,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=748420.0, ans=0.2 +2024-09-19 21:27:26,805 INFO [train.py:1198] (1/2) Epoch 42, batch 1600, loss[loss=0.2485, ctc_loss=0.1169, cr_loss=0.3599, attn_decoder_loss=0.2551, over 29682.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1117, cr_loss=0.3525, attn_decoder_loss=0.2387, over 5763158.73 frames. ], batch size: 85, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:27:30,109 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:27:31,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=748500.0, ans=0.0 +2024-09-19 21:28:11,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.69 vs. limit=15.0 +2024-09-19 21:28:14,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=748620.0, ans=0.0 +2024-09-19 21:28:20,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=748620.0, ans=0.0 +2024-09-19 21:28:32,057 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.369e+01 8.535e+01 9.042e+01 9.603e+01 1.807e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-19 21:28:43,813 INFO [train.py:1198] (1/2) Epoch 42, batch 1650, loss[loss=0.2472, ctc_loss=0.119, cr_loss=0.3537, attn_decoder_loss=0.2536, over 29703.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1116, cr_loss=0.3518, attn_decoder_loss=0.2385, over 5758948.08 frames. ], batch size: 89, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:28:45,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=748700.0, ans=0.2 +2024-09-19 21:28:59,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=748740.0, ans=0.0 +2024-09-19 21:29:03,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=748740.0, ans=0.125 +2024-09-19 21:29:09,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=748740.0, ans=0.125 +2024-09-19 21:29:17,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=748780.0, ans=0.0 +2024-09-19 21:29:26,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=748780.0, ans=0.125 +2024-09-19 21:29:48,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=748860.0, ans=0.125 +2024-09-19 21:29:56,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=748860.0, ans=0.2 +2024-09-19 21:29:59,138 INFO [train.py:1198] (1/2) Epoch 42, batch 1700, loss[loss=0.213, ctc_loss=0.09967, cr_loss=0.316, attn_decoder_loss=0.2186, over 29570.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1112, cr_loss=0.3508, attn_decoder_loss=0.2383, over 5781474.60 frames. ], batch size: 69, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:30:34,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=748980.0, ans=0.1 +2024-09-19 21:30:49,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=749020.0, ans=0.125 +2024-09-19 21:31:02,255 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.72 vs. limit=15.0 +2024-09-19 21:31:04,407 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.536e+01 8.510e+01 9.136e+01 9.466e+01 1.659e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 21:31:15,935 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.74 vs. limit=22.5 +2024-09-19 21:31:16,643 INFO [train.py:1198] (1/2) Epoch 42, batch 1750, loss[loss=0.2027, ctc_loss=0.08569, cr_loss=0.2903, attn_decoder_loss=0.2092, over 29351.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1106, cr_loss=0.3498, attn_decoder_loss=0.2378, over 5790177.28 frames. ], batch size: 67, lr: 2.63e-03, grad_scale: 16.0 +2024-09-19 21:31:22,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=749100.0, ans=0.09899494936611666 +2024-09-19 21:31:35,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=749140.0, ans=0.125 +2024-09-19 21:31:50,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.91 vs. limit=22.5 +2024-09-19 21:31:57,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=749180.0, ans=0.125 +2024-09-19 21:32:07,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=749220.0, ans=0.1 +2024-09-19 21:32:18,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=749260.0, ans=0.0 +2024-09-19 21:32:34,205 INFO [train.py:1198] (1/2) Epoch 42, batch 1800, loss[loss=0.2285, ctc_loss=0.1017, cr_loss=0.3379, attn_decoder_loss=0.2351, over 29673.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.111, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5791961.95 frames. ], batch size: 83, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:32:43,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=749300.0, ans=0.0 +2024-09-19 21:32:52,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=749340.0, ans=0.125 +2024-09-19 21:32:54,849 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.98 vs. limit=15.0 +2024-09-19 21:33:07,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=749380.0, ans=0.1 +2024-09-19 21:33:13,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=749380.0, ans=0.125 +2024-09-19 21:33:15,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=749380.0, ans=0.2 +2024-09-19 21:33:15,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=749380.0, ans=0.1 +2024-09-19 21:33:39,226 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.440e+01 8.862e+01 9.428e+01 1.419e+02, threshold=1.772e+02, percent-clipped=0.0 +2024-09-19 21:33:49,904 INFO [train.py:1198] (1/2) Epoch 42, batch 1850, loss[loss=0.2352, ctc_loss=0.108, cr_loss=0.3374, attn_decoder_loss=0.2419, over 29617.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1109, cr_loss=0.35, attn_decoder_loss=0.2378, over 5797242.13 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:33:50,671 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.33 vs. limit=22.5 +2024-09-19 21:34:00,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=749500.0, ans=0.1 +2024-09-19 21:34:08,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=749540.0, ans=0.07 +2024-09-19 21:34:10,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=749540.0, ans=0.0 +2024-09-19 21:34:14,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=749540.0, ans=0.2 +2024-09-19 21:34:24,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=749580.0, ans=22.5 +2024-09-19 21:34:25,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=5.15 vs. limit=15.0 +2024-09-19 21:34:33,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=749580.0, ans=0.09899494936611666 +2024-09-19 21:34:45,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=749620.0, ans=0.1 +2024-09-19 21:34:48,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=749620.0, ans=0.125 +2024-09-19 21:34:55,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=749660.0, ans=0.025 +2024-09-19 21:35:06,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=749700.0, ans=0.125 +2024-09-19 21:35:07,214 INFO [train.py:1198] (1/2) Epoch 42, batch 1900, loss[loss=0.2467, ctc_loss=0.1223, cr_loss=0.3814, attn_decoder_loss=0.2521, over 29724.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1113, cr_loss=0.3512, attn_decoder_loss=0.2386, over 5805229.18 frames. ], batch size: 89, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:35:30,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=749740.0, ans=0.0 +2024-09-19 21:35:43,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=749780.0, ans=0.1 +2024-09-19 21:36:01,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=749820.0, ans=0.125 +2024-09-19 21:36:07,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=749820.0, ans=0.0 +2024-09-19 21:36:07,797 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-19 21:36:13,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=749860.0, ans=0.1 +2024-09-19 21:36:14,505 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.827e+01 8.670e+01 9.049e+01 9.659e+01 1.303e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-19 21:36:20,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=749860.0, ans=0.125 +2024-09-19 21:36:20,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=749860.0, ans=0.125 +2024-09-19 21:36:23,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=749900.0, ans=0.125 +2024-09-19 21:36:24,979 INFO [train.py:1198] (1/2) Epoch 42, batch 1950, loss[loss=0.2307, ctc_loss=0.1152, cr_loss=0.3642, attn_decoder_loss=0.2355, over 29435.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1119, cr_loss=0.3529, attn_decoder_loss=0.2396, over 5818932.10 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:36:37,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=749900.0, ans=0.125 +2024-09-19 21:36:50,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=749940.0, ans=0.025 +2024-09-19 21:37:13,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.06 vs. limit=15.0 +2024-09-19 21:37:40,247 INFO [train.py:1198] (1/2) Epoch 42, batch 2000, loss[loss=0.2012, ctc_loss=0.09356, cr_loss=0.3016, attn_decoder_loss=0.2064, over 29365.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1121, cr_loss=0.3524, attn_decoder_loss=0.2397, over 5796963.05 frames. ], batch size: 67, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:37:40,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=750100.0, ans=0.1 +2024-09-19 21:37:50,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=750100.0, ans=0.2 +2024-09-19 21:37:59,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=750140.0, ans=0.125 +2024-09-19 21:38:17,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.12 vs. limit=22.5 +2024-09-19 21:38:29,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=750220.0, ans=0.1 +2024-09-19 21:38:35,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=750220.0, ans=0.025 +2024-09-19 21:38:47,685 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.819e+01 8.670e+01 9.136e+01 9.850e+01 1.573e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-19 21:38:58,256 INFO [train.py:1198] (1/2) Epoch 42, batch 2050, loss[loss=0.2036, ctc_loss=0.09471, cr_loss=0.314, attn_decoder_loss=0.2087, over 29419.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1117, cr_loss=0.3516, attn_decoder_loss=0.2387, over 5788210.95 frames. ], batch size: 70, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:38:59,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=750300.0, ans=0.125 +2024-09-19 21:39:13,785 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=750340.0, ans=0.0 +2024-09-19 21:39:38,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=750380.0, ans=0.125 +2024-09-19 21:39:38,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=750380.0, ans=0.125 +2024-09-19 21:39:49,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.85 vs. limit=10.0 +2024-09-19 21:40:03,020 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:40:08,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=750460.0, ans=0.0 +2024-09-19 21:40:16,254 INFO [train.py:1198] (1/2) Epoch 42, batch 2100, loss[loss=0.2283, ctc_loss=0.109, cr_loss=0.3387, attn_decoder_loss=0.2341, over 29746.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1113, cr_loss=0.3506, attn_decoder_loss=0.2383, over 5800069.51 frames. ], batch size: 81, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:40:18,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=750500.0, ans=0.1 +2024-09-19 21:40:43,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=750540.0, ans=0.125 +2024-09-19 21:40:48,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=750580.0, ans=0.2 +2024-09-19 21:40:59,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=750620.0, ans=0.125 +2024-09-19 21:41:01,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=750620.0, ans=0.1 +2024-09-19 21:41:11,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=750620.0, ans=0.025 +2024-09-19 21:41:20,503 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.604e+01 9.019e+01 9.390e+01 1.185e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-19 21:41:31,110 INFO [train.py:1198] (1/2) Epoch 42, batch 2150, loss[loss=0.222, ctc_loss=0.1038, cr_loss=0.3346, attn_decoder_loss=0.2277, over 29420.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1108, cr_loss=0.3501, attn_decoder_loss=0.2378, over 5815176.47 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:41:38,244 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=4.98 vs. limit=15.0 +2024-09-19 21:41:50,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=750740.0, ans=0.125 +2024-09-19 21:42:23,117 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:42:24,500 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:42:35,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.78 vs. limit=12.0 +2024-09-19 21:42:38,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=750860.0, ans=0.035 +2024-09-19 21:42:48,186 INFO [train.py:1198] (1/2) Epoch 42, batch 2200, loss[loss=0.2338, ctc_loss=0.1057, cr_loss=0.3374, attn_decoder_loss=0.2405, over 29635.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1112, cr_loss=0.3511, attn_decoder_loss=0.2381, over 5812632.41 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:42:56,592 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.99 vs. limit=15.0 +2024-09-19 21:43:40,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=751020.0, ans=0.0 +2024-09-19 21:43:55,413 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.242e+01 8.649e+01 8.991e+01 9.667e+01 4.201e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 21:44:06,041 INFO [train.py:1198] (1/2) Epoch 42, batch 2250, loss[loss=0.2364, ctc_loss=0.1073, cr_loss=0.3497, attn_decoder_loss=0.2429, over 29687.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1109, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5811982.84 frames. ], batch size: 82, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:44:18,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=751100.0, ans=0.1 +2024-09-19 21:44:22,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=751140.0, ans=0.125 +2024-09-19 21:44:51,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=751220.0, ans=0.0 +2024-09-19 21:45:00,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=751220.0, ans=0.1 +2024-09-19 21:45:21,538 INFO [train.py:1198] (1/2) Epoch 42, batch 2300, loss[loss=0.21, ctc_loss=0.09376, cr_loss=0.3026, attn_decoder_loss=0.2162, over 29318.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1104, cr_loss=0.3489, attn_decoder_loss=0.2372, over 5800269.49 frames. ], batch size: 71, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:45:37,074 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=15.0 +2024-09-19 21:45:43,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=751340.0, ans=0.0 +2024-09-19 21:45:52,718 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.56 vs. limit=10.0 +2024-09-19 21:45:53,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=751380.0, ans=0.125 +2024-09-19 21:46:04,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=751380.0, ans=0.025 +2024-09-19 21:46:23,225 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.77 vs. limit=15.0 +2024-09-19 21:46:28,381 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.932e+01 8.459e+01 9.034e+01 9.702e+01 2.715e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-19 21:46:39,162 INFO [train.py:1198] (1/2) Epoch 42, batch 2350, loss[loss=0.2401, ctc_loss=0.1132, cr_loss=0.3404, attn_decoder_loss=0.2467, over 29706.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1107, cr_loss=0.3492, attn_decoder_loss=0.2373, over 5804097.39 frames. ], batch size: 83, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:46:46,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=751500.0, ans=0.1 +2024-09-19 21:46:51,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=751500.0, ans=0.125 +2024-09-19 21:47:18,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=751580.0, ans=0.125 +2024-09-19 21:47:40,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=751660.0, ans=0.2 +2024-09-19 21:47:56,840 INFO [train.py:1198] (1/2) Epoch 42, batch 2400, loss[loss=0.2148, ctc_loss=0.09678, cr_loss=0.3127, attn_decoder_loss=0.221, over 29527.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3505, attn_decoder_loss=0.2379, over 5808217.01 frames. ], batch size: 76, lr: 2.62e-03, grad_scale: 32.0 +2024-09-19 21:47:58,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_positive, batch_count=751700.0, ans=0.05 +2024-09-19 21:48:08,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=751700.0, ans=0.05 +2024-09-19 21:48:11,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=751740.0, ans=0.2 +2024-09-19 21:48:16,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=751740.0, ans=0.2 +2024-09-19 21:48:29,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.53 vs. limit=22.5 +2024-09-19 21:49:03,289 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.663e+01 9.186e+01 9.777e+01 4.524e+02, threshold=1.837e+02, percent-clipped=1.0 +2024-09-19 21:49:12,370 INFO [train.py:1198] (1/2) Epoch 42, batch 2450, loss[loss=0.2418, ctc_loss=0.1241, cr_loss=0.3811, attn_decoder_loss=0.2464, over 29699.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1117, cr_loss=0.3516, attn_decoder_loss=0.2388, over 5783888.25 frames. ], batch size: 82, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:49:25,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.82 vs. limit=15.0 +2024-09-19 21:49:28,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=751940.0, ans=0.025 +2024-09-19 21:49:38,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=751940.0, ans=0.0 +2024-09-19 21:49:41,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=751940.0, ans=0.1 +2024-09-19 21:49:49,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=751980.0, ans=0.1 +2024-09-19 21:50:00,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.48 vs. limit=22.5 +2024-09-19 21:50:01,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=751980.0, ans=0.2 +2024-09-19 21:50:08,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=752020.0, ans=0.2 +2024-09-19 21:50:15,342 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.51 vs. limit=15.0 +2024-09-19 21:50:22,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=752060.0, ans=0.025 +2024-09-19 21:50:31,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=752060.0, ans=0.05 +2024-09-19 21:50:37,331 INFO [train.py:1198] (1/2) Epoch 42, batch 2500, loss[loss=0.2388, ctc_loss=0.1135, cr_loss=0.3476, attn_decoder_loss=0.245, over 29611.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1118, cr_loss=0.3522, attn_decoder_loss=0.239, over 5794303.28 frames. ], batch size: 86, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:50:54,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=752140.0, ans=0.125 +2024-09-19 21:51:25,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=752220.0, ans=0.1 +2024-09-19 21:51:32,363 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.91 vs. limit=10.0 +2024-09-19 21:51:34,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=752220.0, ans=0.125 +2024-09-19 21:51:36,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=752220.0, ans=0.0 +2024-09-19 21:51:46,109 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.657e+01 8.685e+01 9.215e+01 9.799e+01 2.260e+02, threshold=1.843e+02, percent-clipped=2.0 +2024-09-19 21:51:54,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten.whitening_limit, batch_count=752300.0, ans=15.0 +2024-09-19 21:51:55,269 INFO [train.py:1198] (1/2) Epoch 42, batch 2550, loss[loss=0.2077, ctc_loss=0.09291, cr_loss=0.3152, attn_decoder_loss=0.2135, over 29356.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1121, cr_loss=0.3526, attn_decoder_loss=0.2392, over 5797332.20 frames. ], batch size: 67, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:52:37,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=752380.0, ans=0.1 +2024-09-19 21:52:41,209 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.33 vs. limit=22.5 +2024-09-19 21:52:49,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=752420.0, ans=0.0 +2024-09-19 21:52:50,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=752420.0, ans=0.125 +2024-09-19 21:53:05,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=752460.0, ans=0.125 +2024-09-19 21:53:09,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=752500.0, ans=0.125 +2024-09-19 21:53:10,743 INFO [train.py:1198] (1/2) Epoch 42, batch 2600, loss[loss=0.2314, ctc_loss=0.1055, cr_loss=0.3498, attn_decoder_loss=0.2376, over 29440.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1121, cr_loss=0.3526, attn_decoder_loss=0.2393, over 5793340.08 frames. ], batch size: 78, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:53:18,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=752500.0, ans=0.125 +2024-09-19 21:53:45,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=752580.0, ans=0.1 +2024-09-19 21:53:47,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=752580.0, ans=0.125 +2024-09-19 21:53:59,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=752620.0, ans=0.125 +2024-09-19 21:54:02,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=752620.0, ans=0.125 +2024-09-19 21:54:12,002 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 21:54:18,979 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.622e+01 9.143e+01 9.724e+01 1.437e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-19 21:54:19,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=752660.0, ans=0.0 +2024-09-19 21:54:27,296 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.72 vs. limit=10.0 +2024-09-19 21:54:27,792 INFO [train.py:1198] (1/2) Epoch 42, batch 2650, loss[loss=0.2489, ctc_loss=0.1213, cr_loss=0.38, attn_decoder_loss=0.2546, over 29212.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1123, cr_loss=0.3534, attn_decoder_loss=0.2396, over 5798576.53 frames. ], batch size: 100, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:54:31,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=752700.0, ans=0.125 +2024-09-19 21:54:48,278 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.64 vs. limit=10.0 +2024-09-19 21:55:26,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=752820.0, ans=22.5 +2024-09-19 21:55:31,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=752860.0, ans=0.125 +2024-09-19 21:55:45,581 INFO [train.py:1198] (1/2) Epoch 42, batch 2700, loss[loss=0.2359, ctc_loss=0.1127, cr_loss=0.3489, attn_decoder_loss=0.2418, over 29536.00 frames. ], tot_loss[loss=0.2342, ctc_loss=0.1125, cr_loss=0.354, attn_decoder_loss=0.2399, over 5795450.67 frames. ], batch size: 87, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:55:45,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=752900.0, ans=0.125 +2024-09-19 21:55:45,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=752900.0, ans=0.1 +2024-09-19 21:55:46,278 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.49 vs. limit=15.0 +2024-09-19 21:56:17,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=752980.0, ans=0.1 +2024-09-19 21:56:51,999 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.707e+01 9.259e+01 9.781e+01 2.020e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-19 21:56:58,318 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=753060.0, ans=0.125 +2024-09-19 21:57:00,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.89 vs. limit=12.0 +2024-09-19 21:57:01,147 INFO [train.py:1198] (1/2) Epoch 42, batch 2750, loss[loss=0.2257, ctc_loss=0.1091, cr_loss=0.3364, attn_decoder_loss=0.2311, over 29506.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1116, cr_loss=0.3524, attn_decoder_loss=0.2386, over 5794203.58 frames. ], batch size: 75, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 21:57:03,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=753100.0, ans=0.125 +2024-09-19 21:57:15,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=753100.0, ans=0.09899494936611666 +2024-09-19 21:57:17,831 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.82 vs. limit=22.5 +2024-09-19 21:57:24,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=753140.0, ans=0.125 +2024-09-19 21:57:36,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=753180.0, ans=0.0 +2024-09-19 21:57:48,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=753220.0, ans=0.125 +2024-09-19 21:57:48,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=753220.0, ans=0.0 +2024-09-19 21:57:53,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.61 vs. limit=22.5 +2024-09-19 21:57:57,878 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.70 vs. limit=15.0 +2024-09-19 21:58:15,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=753260.0, ans=0.2 +2024-09-19 21:58:18,585 INFO [train.py:1198] (1/2) Epoch 42, batch 2800, loss[loss=0.2599, ctc_loss=0.1541, cr_loss=0.3868, attn_decoder_loss=0.2631, over 20355.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1116, cr_loss=0.3521, attn_decoder_loss=0.2386, over 5774225.89 frames. ], batch size: 209, lr: 2.62e-03, grad_scale: 32.0 +2024-09-19 21:58:21,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=753300.0, ans=0.125 +2024-09-19 21:58:24,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=753300.0, ans=0.0 +2024-09-19 21:58:38,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=753340.0, ans=0.125 +2024-09-19 21:58:40,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.31 vs. limit=22.5 +2024-09-19 21:58:40,227 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.59 vs. limit=15.0 +2024-09-19 21:58:45,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=753340.0, ans=0.09899494936611666 +2024-09-19 21:58:47,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=753380.0, ans=0.0 +2024-09-19 21:58:54,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=753380.0, ans=0.125 +2024-09-19 21:59:19,817 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.12 vs. limit=22.5 +2024-09-19 21:59:23,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=753460.0, ans=0.1 +2024-09-19 21:59:29,300 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.790e+01 9.273e+01 9.887e+01 2.081e+02, threshold=1.855e+02, percent-clipped=1.0 +2024-09-19 21:59:35,330 INFO [train.py:1198] (1/2) Epoch 42, batch 2850, loss[loss=0.2278, ctc_loss=0.1093, cr_loss=0.3615, attn_decoder_loss=0.233, over 29464.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.112, cr_loss=0.353, attn_decoder_loss=0.2391, over 5761404.64 frames. ], batch size: 77, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 21:59:43,722 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.64 vs. limit=15.0 +2024-09-19 21:59:43,845 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.97 vs. limit=15.0 +2024-09-19 21:59:47,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=753500.0, ans=0.07 +2024-09-19 21:59:55,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=753540.0, ans=0.125 +2024-09-19 22:00:08,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=753580.0, ans=0.2 +2024-09-19 22:00:25,047 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.85 vs. limit=22.5 +2024-09-19 22:00:30,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=753620.0, ans=0.125 +2024-09-19 22:00:51,144 INFO [train.py:1198] (1/2) Epoch 42, batch 2900, loss[loss=0.2325, ctc_loss=0.1091, cr_loss=0.3516, attn_decoder_loss=0.2384, over 29431.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1126, cr_loss=0.3542, attn_decoder_loss=0.2402, over 5786903.65 frames. ], batch size: 79, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:01:12,426 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.63 vs. limit=22.5 +2024-09-19 22:01:12,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.60 vs. limit=15.0 +2024-09-19 22:01:24,740 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.14 vs. limit=15.0 +2024-09-19 22:01:40,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=753820.0, ans=0.125 +2024-09-19 22:01:52,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=753860.0, ans=0.025 +2024-09-19 22:02:00,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=753860.0, ans=0.2 +2024-09-19 22:02:02,878 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.702e+01 8.715e+01 9.227e+01 9.833e+01 2.599e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-19 22:02:07,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=753900.0, ans=0.125 +2024-09-19 22:02:08,884 INFO [train.py:1198] (1/2) Epoch 42, batch 2950, loss[loss=0.2297, ctc_loss=0.1161, cr_loss=0.3521, attn_decoder_loss=0.2345, over 29503.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1116, cr_loss=0.3517, attn_decoder_loss=0.2389, over 5781682.57 frames. ], batch size: 75, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:02:31,014 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.32 vs. limit=12.0 +2024-09-19 22:02:31,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=753940.0, ans=0.2 +2024-09-19 22:02:43,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=753980.0, ans=0.0 +2024-09-19 22:02:44,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys.whitening_limit, batch_count=753980.0, ans=6.0 +2024-09-19 22:02:57,670 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.83 vs. limit=15.0 +2024-09-19 22:03:10,538 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=12.0 +2024-09-19 22:03:11,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=754060.0, ans=0.0 +2024-09-19 22:03:26,596 INFO [train.py:1198] (1/2) Epoch 42, batch 3000, loss[loss=0.2233, ctc_loss=0.1062, cr_loss=0.3445, attn_decoder_loss=0.2286, over 29742.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.111, cr_loss=0.3507, attn_decoder_loss=0.2384, over 5782820.15 frames. ], batch size: 81, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:03:26,597 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 22:03:44,994 INFO [train.py:1230] (1/2) Epoch 42, validation: loss=0.212, ctc_loss=0.03659, cr_loss=6.044e-15, attn_decoder_loss=0.2315, over 944034.00 frames. +2024-09-19 22:03:44,994 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 22:04:06,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=754140.0, ans=0.125 +2024-09-19 22:04:08,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=754140.0, ans=0.0 +2024-09-19 22:04:32,431 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:04:35,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=754220.0, ans=0.1 +2024-09-19 22:04:37,343 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.37 vs. limit=6.0 +2024-09-19 22:04:54,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.10 vs. limit=22.5 +2024-09-19 22:04:56,824 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.446e+01 8.635e+01 9.210e+01 9.879e+01 1.269e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-19 22:05:03,031 INFO [train.py:1198] (1/2) Epoch 42, batch 3050, loss[loss=0.2269, ctc_loss=0.1157, cr_loss=0.3756, attn_decoder_loss=0.2309, over 29523.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.112, cr_loss=0.3527, attn_decoder_loss=0.2393, over 5777157.85 frames. ], batch size: 76, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:05:13,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=754300.0, ans=0.1 +2024-09-19 22:05:28,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=754340.0, ans=0.0 +2024-09-19 22:05:35,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.70 vs. limit=6.0 +2024-09-19 22:05:36,982 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.30 vs. limit=15.0 +2024-09-19 22:05:39,409 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=754380.0, ans=0.1 +2024-09-19 22:05:41,257 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=754380.0, ans=0.0 +2024-09-19 22:05:43,017 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:05:48,129 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.88 vs. limit=12.0 +2024-09-19 22:06:08,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=754460.0, ans=0.0 +2024-09-19 22:06:17,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=754500.0, ans=0.125 +2024-09-19 22:06:18,285 INFO [train.py:1198] (1/2) Epoch 42, batch 3100, loss[loss=0.2447, ctc_loss=0.1205, cr_loss=0.3463, attn_decoder_loss=0.2507, over 29250.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1115, cr_loss=0.3511, attn_decoder_loss=0.2388, over 5776606.62 frames. ], batch size: 100, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:06:38,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=754540.0, ans=0.125 +2024-09-19 22:06:47,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=754580.0, ans=0.125 +2024-09-19 22:06:56,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=754580.0, ans=0.5 +2024-09-19 22:07:17,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.13 vs. limit=15.0 +2024-09-19 22:07:25,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=754660.0, ans=0.0 +2024-09-19 22:07:30,010 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.298e+01 8.575e+01 9.075e+01 9.708e+01 6.330e+02, threshold=1.815e+02, percent-clipped=2.0 +2024-09-19 22:07:34,174 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.03 vs. limit=22.5 +2024-09-19 22:07:36,113 INFO [train.py:1198] (1/2) Epoch 42, batch 3150, loss[loss=0.2451, ctc_loss=0.1251, cr_loss=0.3831, attn_decoder_loss=0.2499, over 28872.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1115, cr_loss=0.3515, attn_decoder_loss=0.2389, over 5782731.97 frames. ], batch size: 104, lr: 2.62e-03, grad_scale: 8.0 +2024-09-19 22:07:36,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=754700.0, ans=0.1 +2024-09-19 22:07:49,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=754740.0, ans=0.0 +2024-09-19 22:08:01,204 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.01 vs. limit=15.0 +2024-09-19 22:08:05,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=754780.0, ans=0.125 +2024-09-19 22:08:12,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=754780.0, ans=0.125 +2024-09-19 22:08:17,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.85 vs. limit=12.0 +2024-09-19 22:08:53,272 INFO [train.py:1198] (1/2) Epoch 42, batch 3200, loss[loss=0.2429, ctc_loss=0.1253, cr_loss=0.3862, attn_decoder_loss=0.2473, over 29409.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1111, cr_loss=0.3504, attn_decoder_loss=0.2382, over 5792088.41 frames. ], batch size: 79, lr: 2.62e-03, grad_scale: 16.0 +2024-09-19 22:08:53,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=754900.0, ans=0.0 +2024-09-19 22:09:21,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=754940.0, ans=0.125 +2024-09-19 22:09:40,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.11 vs. limit=15.0 +2024-09-19 22:09:44,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.28 vs. limit=15.0 +2024-09-19 22:10:03,485 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.581e+01 9.115e+01 9.616e+01 1.393e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-19 22:10:09,482 INFO [train.py:1198] (1/2) Epoch 42, batch 3250, loss[loss=0.2355, ctc_loss=0.1124, cr_loss=0.3572, attn_decoder_loss=0.2412, over 29709.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1115, cr_loss=0.3514, attn_decoder_loss=0.239, over 5797780.25 frames. ], batch size: 84, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:10:09,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=755100.0, ans=0.125 +2024-09-19 22:10:34,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=755140.0, ans=0.0 +2024-09-19 22:10:39,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=755180.0, ans=0.5 +2024-09-19 22:10:58,610 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:11:01,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=755220.0, ans=0.125 +2024-09-19 22:11:22,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=755260.0, ans=0.1 +2024-09-19 22:11:26,689 INFO [train.py:1198] (1/2) Epoch 42, batch 3300, loss[loss=0.2385, ctc_loss=0.108, cr_loss=0.34, attn_decoder_loss=0.2455, over 28205.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1106, cr_loss=0.3493, attn_decoder_loss=0.2376, over 5796266.26 frames. ], batch size: 111, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:11:27,084 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=755300.0, ans=0.125 +2024-09-19 22:11:54,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=755340.0, ans=0.125 +2024-09-19 22:12:15,825 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.93 vs. limit=10.0 +2024-09-19 22:12:29,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=755460.0, ans=0.1 +2024-09-19 22:12:36,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=755460.0, ans=0.1 +2024-09-19 22:12:39,475 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.752e+01 8.624e+01 9.226e+01 9.886e+01 3.496e+02, threshold=1.845e+02, percent-clipped=4.0 +2024-09-19 22:12:44,130 INFO [train.py:1198] (1/2) Epoch 42, batch 3350, loss[loss=0.243, ctc_loss=0.1149, cr_loss=0.3617, attn_decoder_loss=0.2492, over 28905.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1112, cr_loss=0.3504, attn_decoder_loss=0.2385, over 5773200.21 frames. ], batch size: 104, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:12:49,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.58 vs. limit=15.0 +2024-09-19 22:13:13,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=755580.0, ans=0.0 +2024-09-19 22:13:32,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=755620.0, ans=0.125 +2024-09-19 22:13:44,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=755660.0, ans=0.0 +2024-09-19 22:13:46,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.min_positive, batch_count=755660.0, ans=0.05 +2024-09-19 22:13:59,650 INFO [train.py:1198] (1/2) Epoch 42, batch 3400, loss[loss=0.1948, ctc_loss=0.08564, cr_loss=0.3038, attn_decoder_loss=0.2001, over 29339.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1112, cr_loss=0.3506, attn_decoder_loss=0.2384, over 5767081.18 frames. ], batch size: 67, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:14:18,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=755740.0, ans=0.125 +2024-09-19 22:14:18,154 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:14:18,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=755740.0, ans=0.125 +2024-09-19 22:14:23,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=6.59 vs. limit=15.0 +2024-09-19 22:14:34,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=755780.0, ans=0.125 +2024-09-19 22:15:12,723 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.618e+01 8.954e+01 9.599e+01 1.831e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-19 22:15:17,196 INFO [train.py:1198] (1/2) Epoch 42, batch 3450, loss[loss=0.252, ctc_loss=0.1273, cr_loss=0.3955, attn_decoder_loss=0.257, over 28324.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1112, cr_loss=0.3508, attn_decoder_loss=0.2385, over 5776014.67 frames. ], batch size: 111, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:15:44,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=755940.0, ans=0.025 +2024-09-19 22:15:45,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=755980.0, ans=0.2 +2024-09-19 22:15:51,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=755980.0, ans=0.125 +2024-09-19 22:16:00,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=755980.0, ans=0.125 +2024-09-19 22:16:09,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=756020.0, ans=0.07 +2024-09-19 22:16:28,310 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.29 vs. limit=10.0 +2024-09-19 22:16:34,970 INFO [train.py:1198] (1/2) Epoch 42, batch 3500, loss[loss=0.2092, ctc_loss=0.09518, cr_loss=0.3179, attn_decoder_loss=0.2149, over 29339.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1109, cr_loss=0.3498, attn_decoder_loss=0.2378, over 5777161.48 frames. ], batch size: 71, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:16:35,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=756100.0, ans=0.2 +2024-09-19 22:16:36,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=756100.0, ans=0.1 +2024-09-19 22:16:38,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=756100.0, ans=0.0 +2024-09-19 22:16:44,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=756100.0, ans=0.125 +2024-09-19 22:16:53,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=756140.0, ans=0.2 +2024-09-19 22:17:21,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=756220.0, ans=0.025 +2024-09-19 22:17:39,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=756260.0, ans=0.07 +2024-09-19 22:17:44,786 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.395e+01 8.622e+01 9.000e+01 9.662e+01 3.411e+02, threshold=1.800e+02, percent-clipped=1.0 +2024-09-19 22:17:49,259 INFO [train.py:1198] (1/2) Epoch 42, batch 3550, loss[loss=0.2407, ctc_loss=0.1126, cr_loss=0.3455, attn_decoder_loss=0.2472, over 29693.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.111, cr_loss=0.3502, attn_decoder_loss=0.2381, over 5783445.41 frames. ], batch size: 89, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:18:29,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=756380.0, ans=0.07 +2024-09-19 22:18:32,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=756420.0, ans=0.125 +2024-09-19 22:18:54,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=756460.0, ans=0.1 +2024-09-19 22:19:00,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=756460.0, ans=0.95 +2024-09-19 22:19:01,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=756500.0, ans=0.125 +2024-09-19 22:19:02,913 INFO [train.py:1198] (1/2) Epoch 42, batch 3600, loss[loss=0.2342, ctc_loss=0.1142, cr_loss=0.3729, attn_decoder_loss=0.2392, over 29506.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.111, cr_loss=0.3504, attn_decoder_loss=0.238, over 5792490.81 frames. ], batch size: 77, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:19:10,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=756500.0, ans=0.2 +2024-09-19 22:19:16,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=756540.0, ans=0.125 +2024-09-19 22:19:24,909 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.99 vs. limit=10.0 +2024-09-19 22:20:07,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=756660.0, ans=0.125 +2024-09-19 22:20:14,584 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.291e+01 8.526e+01 8.930e+01 9.587e+01 1.613e+02, threshold=1.786e+02, percent-clipped=0.0 +2024-09-19 22:20:19,034 INFO [train.py:1198] (1/2) Epoch 42, batch 3650, loss[loss=0.2557, ctc_loss=0.1338, cr_loss=0.4009, attn_decoder_loss=0.2604, over 29474.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1108, cr_loss=0.3506, attn_decoder_loss=0.2376, over 5794608.58 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:20:49,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=756780.0, ans=0.125 +2024-09-19 22:20:49,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=756780.0, ans=0.125 +2024-09-19 22:20:57,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=756780.0, ans=0.1 +2024-09-19 22:21:34,293 INFO [train.py:1198] (1/2) Epoch 42, batch 3700, loss[loss=0.2485, ctc_loss=0.1207, cr_loss=0.3712, attn_decoder_loss=0.2545, over 29712.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.2379, over 5803300.20 frames. ], batch size: 84, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:21:46,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=756900.0, ans=0.2 +2024-09-19 22:22:04,265 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.30 vs. limit=15.0 +2024-09-19 22:22:19,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=757020.0, ans=0.2 +2024-09-19 22:22:25,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=757020.0, ans=0.2 +2024-09-19 22:22:44,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=757060.0, ans=0.0 +2024-09-19 22:22:45,731 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.08 vs. limit=10.0 +2024-09-19 22:22:46,050 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.519e+01 8.506e+01 9.125e+01 9.829e+01 2.175e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-19 22:22:50,498 INFO [train.py:1198] (1/2) Epoch 42, batch 3750, loss[loss=0.2084, ctc_loss=0.08639, cr_loss=0.3078, attn_decoder_loss=0.2152, over 29329.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1103, cr_loss=0.3492, attn_decoder_loss=0.2375, over 5807490.48 frames. ], batch size: 67, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:22:52,855 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=5.14 vs. limit=15.0 +2024-09-19 22:22:56,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=757100.0, ans=0.125 +2024-09-19 22:23:02,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=757100.0, ans=0.2 +2024-09-19 22:23:15,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=757140.0, ans=0.125 +2024-09-19 22:23:24,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=757180.0, ans=0.07 +2024-09-19 22:23:33,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=757220.0, ans=0.0 +2024-09-19 22:23:49,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=757260.0, ans=0.125 +2024-09-19 22:24:04,584 INFO [train.py:1198] (1/2) Epoch 42, batch 3800, loss[loss=0.2343, ctc_loss=0.106, cr_loss=0.3517, attn_decoder_loss=0.2407, over 29635.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.11, cr_loss=0.3481, attn_decoder_loss=0.2369, over 5796864.96 frames. ], batch size: 86, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:24:07,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=757300.0, ans=0.1 +2024-09-19 22:24:37,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=757380.0, ans=0.125 +2024-09-19 22:24:50,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=757420.0, ans=0.125 +2024-09-19 22:24:52,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=757420.0, ans=0.125 +2024-09-19 22:25:03,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=757460.0, ans=0.125 +2024-09-19 22:25:12,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=757460.0, ans=0.0 +2024-09-19 22:25:13,884 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.649e+01 9.029e+01 9.772e+01 5.131e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-19 22:25:18,244 INFO [train.py:1198] (1/2) Epoch 42, batch 3850, loss[loss=0.2349, ctc_loss=0.1088, cr_loss=0.3498, attn_decoder_loss=0.2412, over 29298.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.11, cr_loss=0.3481, attn_decoder_loss=0.2368, over 5811276.18 frames. ], batch size: 100, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:25:18,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=757500.0, ans=0.125 +2024-09-19 22:25:50,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.89 vs. limit=6.0 +2024-09-19 22:26:04,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=757620.0, ans=0.1 +2024-09-19 22:26:09,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=757620.0, ans=10.0 +2024-09-19 22:26:25,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=757660.0, ans=0.2 +2024-09-19 22:26:26,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=757660.0, ans=0.0 +2024-09-19 22:26:30,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.24 vs. limit=15.0 +2024-09-19 22:26:33,899 INFO [train.py:1198] (1/2) Epoch 42, batch 3900, loss[loss=0.2406, ctc_loss=0.1217, cr_loss=0.3841, attn_decoder_loss=0.2453, over 29619.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1104, cr_loss=0.3491, attn_decoder_loss=0.2374, over 5814872.43 frames. ], batch size: 86, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:26:35,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=757700.0, ans=0.025 +2024-09-19 22:26:47,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=757740.0, ans=0.125 +2024-09-19 22:26:50,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=757740.0, ans=0.015 +2024-09-19 22:27:09,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=757780.0, ans=0.0 +2024-09-19 22:27:44,476 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.680e+01 8.643e+01 9.033e+01 9.490e+01 1.279e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-19 22:27:49,120 INFO [train.py:1198] (1/2) Epoch 42, batch 3950, loss[loss=0.2472, ctc_loss=0.1259, cr_loss=0.3939, attn_decoder_loss=0.2519, over 29516.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1101, cr_loss=0.3487, attn_decoder_loss=0.2376, over 5834470.90 frames. ], batch size: 97, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:27:49,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=757900.0, ans=0.125 +2024-09-19 22:28:22,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.57 vs. limit=10.0 +2024-09-19 22:28:35,112 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.81 vs. limit=15.0 +2024-09-19 22:28:35,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.87 vs. limit=10.0 +2024-09-19 22:28:45,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=758020.0, ans=0.0 +2024-09-19 22:28:55,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=758060.0, ans=0.125 +2024-09-19 22:29:02,375 INFO [train.py:1198] (1/2) Epoch 42, batch 4000, loss[loss=0.2162, ctc_loss=0.09555, cr_loss=0.3211, attn_decoder_loss=0.2225, over 29484.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1103, cr_loss=0.3486, attn_decoder_loss=0.2375, over 5811291.36 frames. ], batch size: 74, lr: 2.61e-03, grad_scale: 32.0 +2024-09-19 22:29:11,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=758100.0, ans=0.0 +2024-09-19 22:29:11,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=758100.0, ans=0.125 +2024-09-19 22:29:18,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=758140.0, ans=0.125 +2024-09-19 22:29:18,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=758140.0, ans=0.0 +2024-09-19 22:29:55,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=758220.0, ans=0.2 +2024-09-19 22:30:03,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=758260.0, ans=0.125 +2024-09-19 22:30:13,115 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.365e+01 8.535e+01 8.990e+01 9.708e+01 1.890e+02, threshold=1.798e+02, percent-clipped=2.0 +2024-09-19 22:30:16,057 INFO [train.py:1198] (1/2) Epoch 42, batch 4050, loss[loss=0.2552, ctc_loss=0.1376, cr_loss=0.3752, attn_decoder_loss=0.26, over 20786.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1106, cr_loss=0.3489, attn_decoder_loss=0.2376, over 5796178.47 frames. ], batch size: 210, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:30:35,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.48 vs. limit=15.0 +2024-09-19 22:31:00,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=758420.0, ans=0.0 +2024-09-19 22:31:12,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=758420.0, ans=0.125 +2024-09-19 22:31:28,680 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.62 vs. limit=15.0 +2024-09-19 22:31:31,091 INFO [train.py:1198] (1/2) Epoch 42, batch 4100, loss[loss=0.2491, ctc_loss=0.1262, cr_loss=0.4039, attn_decoder_loss=0.2538, over 29503.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3502, attn_decoder_loss=0.238, over 5792124.13 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:32:10,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=758580.0, ans=0.0 +2024-09-19 22:32:30,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=758660.0, ans=0.0 +2024-09-19 22:32:37,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=758660.0, ans=0.125 +2024-09-19 22:32:42,742 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.799e+01 9.217e+01 9.992e+01 1.793e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-19 22:32:45,670 INFO [train.py:1198] (1/2) Epoch 42, batch 4150, loss[loss=0.2265, ctc_loss=0.1123, cr_loss=0.3429, attn_decoder_loss=0.2316, over 29514.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.111, cr_loss=0.3501, attn_decoder_loss=0.2379, over 5798655.96 frames. ], batch size: 77, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:32:57,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=758700.0, ans=0.2 +2024-09-19 22:33:10,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=758740.0, ans=0.125 +2024-09-19 22:33:21,051 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:33:22,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=758780.0, ans=0.125 +2024-09-19 22:33:31,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=758820.0, ans=0.0 +2024-09-19 22:33:33,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.83 vs. limit=6.0 +2024-09-19 22:33:44,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=758860.0, ans=0.125 +2024-09-19 22:33:58,999 INFO [train.py:1198] (1/2) Epoch 42, batch 4200, loss[loss=0.2561, ctc_loss=0.1317, cr_loss=0.3952, attn_decoder_loss=0.2612, over 29482.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1111, cr_loss=0.3504, attn_decoder_loss=0.2383, over 5801101.21 frames. ], batch size: 90, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:34:03,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=758900.0, ans=0.2 +2024-09-19 22:34:05,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.88 vs. limit=15.0 +2024-09-19 22:34:41,934 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:35:10,285 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.752e+01 9.321e+01 9.976e+01 3.736e+02, threshold=1.864e+02, percent-clipped=1.0 +2024-09-19 22:35:13,188 INFO [train.py:1198] (1/2) Epoch 42, batch 4250, loss[loss=0.2169, ctc_loss=0.1013, cr_loss=0.3247, attn_decoder_loss=0.2225, over 29493.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1104, cr_loss=0.349, attn_decoder_loss=0.2382, over 5806864.92 frames. ], batch size: 74, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:35:38,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=759140.0, ans=0.2 +2024-09-19 22:35:58,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.22 vs. limit=15.0 +2024-09-19 22:36:02,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=759220.0, ans=0.125 +2024-09-19 22:36:14,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=759260.0, ans=0.5 +2024-09-19 22:36:16,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=759260.0, ans=0.125 +2024-09-19 22:36:27,661 INFO [train.py:1198] (1/2) Epoch 42, batch 4300, loss[loss=0.2432, ctc_loss=0.1174, cr_loss=0.3589, attn_decoder_loss=0.2492, over 29533.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1107, cr_loss=0.3494, attn_decoder_loss=0.2384, over 5796201.35 frames. ], batch size: 87, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:36:33,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=759300.0, ans=0.125 +2024-09-19 22:36:33,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=759300.0, ans=0.09899494936611666 +2024-09-19 22:36:54,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=759340.0, ans=0.125 +2024-09-19 22:37:02,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=759380.0, ans=0.5 +2024-09-19 22:37:09,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=759380.0, ans=0.125 +2024-09-19 22:37:14,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=759420.0, ans=0.2 +2024-09-19 22:37:24,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=759420.0, ans=0.0 +2024-09-19 22:37:31,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=759460.0, ans=0.2 +2024-09-19 22:37:38,936 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.572e+01 8.652e+01 9.323e+01 9.871e+01 1.907e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-19 22:37:41,932 INFO [train.py:1198] (1/2) Epoch 42, batch 4350, loss[loss=0.256, ctc_loss=0.1322, cr_loss=0.3989, attn_decoder_loss=0.2609, over 29460.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1138, cr_loss=0.3559, attn_decoder_loss=0.242, over 5797522.14 frames. ], batch size: 97, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:38:09,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=759540.0, ans=0.125 +2024-09-19 22:38:14,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.17 vs. limit=15.0 +2024-09-19 22:38:20,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=759580.0, ans=0.0 +2024-09-19 22:38:51,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.39 vs. limit=15.0 +2024-09-19 22:38:52,416 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.65 vs. limit=12.0 +2024-09-19 22:38:56,125 INFO [train.py:1198] (1/2) Epoch 42, batch 4400, loss[loss=0.2494, ctc_loss=0.1263, cr_loss=0.3766, attn_decoder_loss=0.2547, over 27545.00 frames. ], tot_loss[loss=0.238, ctc_loss=0.1146, cr_loss=0.3576, attn_decoder_loss=0.2437, over 5768215.03 frames. ], batch size: 124, lr: 2.61e-03, grad_scale: 32.0 +2024-09-19 22:39:21,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=759740.0, ans=0.125 +2024-09-19 22:39:24,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=759780.0, ans=0.1 +2024-09-19 22:39:32,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=759780.0, ans=0.0 +2024-09-19 22:39:42,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=759820.0, ans=0.1 +2024-09-19 22:39:45,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=759820.0, ans=0.0 +2024-09-19 22:39:57,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=759860.0, ans=0.0 +2024-09-19 22:40:07,760 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.246e+01 9.179e+01 9.547e+01 1.014e+02 2.970e+02, threshold=1.909e+02, percent-clipped=2.0 +2024-09-19 22:40:08,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=759900.0, ans=0.0 +2024-09-19 22:40:09,217 INFO [train.py:1198] (1/2) Epoch 42, batch 4450, loss[loss=0.2553, ctc_loss=0.142, cr_loss=0.3625, attn_decoder_loss=0.2598, over 19998.00 frames. ], tot_loss[loss=0.2403, ctc_loss=0.1181, cr_loss=0.3629, attn_decoder_loss=0.2458, over 5576898.15 frames. ], batch size: 209, lr: 2.61e-03, grad_scale: 16.0 +2024-09-19 22:40:13,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=759900.0, ans=0.125 +2024-09-19 22:40:23,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=759940.0, ans=0.125 +2024-09-19 22:40:31,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=759940.0, ans=0.0 +2024-09-19 22:40:33,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=759940.0, ans=0.2 +2024-09-19 22:40:46,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=759980.0, ans=0.125 +2024-09-19 22:41:03,747 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:41:17,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=760060.0, ans=0.125 +2024-09-19 22:41:25,586 INFO [train.py:1198] (1/2) Epoch 42, batch 4500, loss[loss=0.2453, ctc_loss=0.1287, cr_loss=0.3846, attn_decoder_loss=0.2497, over 20148.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1205, cr_loss=0.3646, attn_decoder_loss=0.2473, over 5235255.73 frames. ], batch size: 211, lr: 2.61e-03, grad_scale: 8.0 +2024-09-19 22:41:33,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=760100.0, ans=0.125 +2024-09-19 22:42:41,118 INFO [train.py:1198] (1/2) Epoch 43, batch 0, loss[loss=0.2141, ctc_loss=0.09162, cr_loss=0.3107, attn_decoder_loss=0.2208, over 29613.00 frames. ], tot_loss[loss=0.2141, ctc_loss=0.09162, cr_loss=0.3107, attn_decoder_loss=0.2208, over 29613.00 frames. ], batch size: 73, lr: 2.58e-03, grad_scale: 16.0 +2024-09-19 22:42:41,119 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-19 22:42:45,046 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.0199, 4.9225, 4.4091, 4.6909], device='cuda:1') +2024-09-19 22:43:00,148 INFO [train.py:1230] (1/2) Epoch 43, validation: loss=0.2125, ctc_loss=0.03634, cr_loss=6.648e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-19 22:43:00,148 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-19 22:43:07,307 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-19 22:43:22,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.12 vs. limit=15.0 +2024-09-19 22:43:31,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.99 vs. limit=15.0 +2024-09-19 22:43:35,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=760280.0, ans=0.125 +2024-09-19 22:43:38,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.whiten.whitening_limit, batch_count=760280.0, ans=15.0 +2024-09-19 22:43:39,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.895e+01 1.042e+02 1.140e+02 1.225e+02 1.755e+02, threshold=2.281e+02, percent-clipped=0.0 +2024-09-19 22:44:11,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=760360.0, ans=0.025 +2024-09-19 22:44:14,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=760360.0, ans=0.125 +2024-09-19 22:44:17,480 INFO [train.py:1198] (1/2) Epoch 43, batch 50, loss[loss=0.2071, ctc_loss=0.08851, cr_loss=0.2892, attn_decoder_loss=0.2139, over 29407.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.113, cr_loss=0.356, attn_decoder_loss=0.2403, over 1269659.90 frames. ], batch size: 70, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:44:17,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=760400.0, ans=0.0 +2024-09-19 22:44:32,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=760440.0, ans=0.125 +2024-09-19 22:44:55,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=760480.0, ans=0.2 +2024-09-19 22:45:18,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.min_positive, batch_count=760560.0, ans=0.025 +2024-09-19 22:45:21,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.80 vs. limit=6.0 +2024-09-19 22:45:29,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=760560.0, ans=0.5 +2024-09-19 22:45:33,223 INFO [train.py:1198] (1/2) Epoch 43, batch 100, loss[loss=0.2371, ctc_loss=0.1236, cr_loss=0.3826, attn_decoder_loss=0.2412, over 29511.00 frames. ], tot_loss[loss=0.2354, ctc_loss=0.1133, cr_loss=0.355, attn_decoder_loss=0.2411, over 2252681.19 frames. ], batch size: 76, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:45:34,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=760600.0, ans=0.125 +2024-09-19 22:46:10,562 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.246e+01 8.774e+01 9.184e+01 9.707e+01 2.214e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-19 22:46:10,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=760680.0, ans=0.0 +2024-09-19 22:46:18,504 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=760720.0, ans=0.125 +2024-09-19 22:46:19,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=760720.0, ans=0.0 +2024-09-19 22:46:53,106 INFO [train.py:1198] (1/2) Epoch 43, batch 150, loss[loss=0.2178, ctc_loss=0.1051, cr_loss=0.326, attn_decoder_loss=0.223, over 29412.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1113, cr_loss=0.3509, attn_decoder_loss=0.239, over 3047069.27 frames. ], batch size: 70, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:47:07,061 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=760840.0, ans=0.2 +2024-09-19 22:47:42,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=760920.0, ans=0.125 +2024-09-19 22:48:07,804 INFO [train.py:1198] (1/2) Epoch 43, batch 200, loss[loss=0.2475, ctc_loss=0.1275, cr_loss=0.3736, attn_decoder_loss=0.2525, over 27439.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1109, cr_loss=0.3506, attn_decoder_loss=0.2378, over 3658866.59 frames. ], batch size: 124, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:48:16,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.61 vs. limit=15.0 +2024-09-19 22:48:17,744 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.90 vs. limit=15.0 +2024-09-19 22:48:18,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=761000.0, ans=0.2 +2024-09-19 22:48:38,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=761080.0, ans=0.2 +2024-09-19 22:48:45,406 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.183e+01 8.451e+01 8.919e+01 9.338e+01 1.606e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-19 22:49:23,042 INFO [train.py:1198] (1/2) Epoch 43, batch 250, loss[loss=0.2532, ctc_loss=0.1242, cr_loss=0.3698, attn_decoder_loss=0.2594, over 29228.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3504, attn_decoder_loss=0.2377, over 4142916.46 frames. ], batch size: 100, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:49:43,806 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.89 vs. limit=12.0 +2024-09-19 22:50:05,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer_ff2.min_abs, batch_count=761280.0, ans=0.1 +2024-09-19 22:50:06,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.59 vs. limit=22.5 +2024-09-19 22:50:26,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.47 vs. limit=15.0 +2024-09-19 22:50:40,731 INFO [train.py:1198] (1/2) Epoch 43, batch 300, loss[loss=0.2438, ctc_loss=0.1216, cr_loss=0.3813, attn_decoder_loss=0.2489, over 29491.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1102, cr_loss=0.3493, attn_decoder_loss=0.2374, over 4510841.92 frames. ], batch size: 92, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:50:41,078 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 22:51:20,734 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.675e+01 9.148e+01 9.609e+01 2.085e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-19 22:51:28,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=761520.0, ans=0.09899494936611666 +2024-09-19 22:51:33,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=761520.0, ans=0.1 +2024-09-19 22:51:47,550 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.45 vs. limit=15.0 +2024-09-19 22:51:51,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=761560.0, ans=0.025 +2024-09-19 22:51:59,191 INFO [train.py:1198] (1/2) Epoch 43, batch 350, loss[loss=0.2061, ctc_loss=0.09101, cr_loss=0.3062, attn_decoder_loss=0.212, over 29295.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1109, cr_loss=0.3507, attn_decoder_loss=0.2382, over 4795956.86 frames. ], batch size: 71, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:52:03,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.63 vs. limit=15.0 +2024-09-19 22:52:14,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=761640.0, ans=0.125 +2024-09-19 22:52:39,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=761680.0, ans=0.0 +2024-09-19 22:52:44,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=761720.0, ans=0.0 +2024-09-19 22:52:48,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.39 vs. limit=22.5 +2024-09-19 22:52:54,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.78 vs. limit=15.0 +2024-09-19 22:52:58,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=761760.0, ans=0.125 +2024-09-19 22:53:14,428 INFO [train.py:1198] (1/2) Epoch 43, batch 400, loss[loss=0.2385, ctc_loss=0.113, cr_loss=0.3845, attn_decoder_loss=0.2439, over 29690.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1106, cr_loss=0.3502, attn_decoder_loss=0.2377, over 5026444.94 frames. ], batch size: 82, lr: 2.57e-03, grad_scale: 32.0 +2024-09-19 22:53:16,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.12 vs. limit=15.0 +2024-09-19 22:53:19,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=761800.0, ans=0.0 +2024-09-19 22:53:45,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=761880.0, ans=0.0 +2024-09-19 22:53:47,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=761880.0, ans=0.07 +2024-09-19 22:53:53,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.678e+01 9.168e+01 9.670e+01 1.497e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-19 22:54:00,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=761920.0, ans=0.2 +2024-09-19 22:54:04,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=761920.0, ans=0.2 +2024-09-19 22:54:06,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=761920.0, ans=0.125 +2024-09-19 22:54:11,213 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.11 vs. limit=6.0 +2024-09-19 22:54:15,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=761960.0, ans=0.1 +2024-09-19 22:54:26,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=761960.0, ans=0.0 +2024-09-19 22:54:32,416 INFO [train.py:1198] (1/2) Epoch 43, batch 450, loss[loss=0.2364, ctc_loss=0.1096, cr_loss=0.3396, attn_decoder_loss=0.2429, over 29703.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1108, cr_loss=0.3503, attn_decoder_loss=0.238, over 5187165.90 frames. ], batch size: 83, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:54:32,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=762000.0, ans=0.1 +2024-09-19 22:55:04,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.72 vs. limit=5.0 +2024-09-19 22:55:05,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=762080.0, ans=0.1 +2024-09-19 22:55:05,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=762080.0, ans=0.125 +2024-09-19 22:55:09,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=762080.0, ans=0.0 +2024-09-19 22:55:12,582 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=762080.0, ans=0.125 +2024-09-19 22:55:12,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=762080.0, ans=0.125 +2024-09-19 22:55:38,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=762160.0, ans=0.0 +2024-09-19 22:55:50,316 INFO [train.py:1198] (1/2) Epoch 43, batch 500, loss[loss=0.2474, ctc_loss=0.1181, cr_loss=0.3731, attn_decoder_loss=0.2535, over 29415.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1106, cr_loss=0.3503, attn_decoder_loss=0.2374, over 5328870.64 frames. ], batch size: 94, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:55:50,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=762200.0, ans=0.09899494936611666 +2024-09-19 22:56:14,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=762240.0, ans=0.125 +2024-09-19 22:56:14,895 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=762240.0, ans=0.0 +2024-09-19 22:56:19,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=762280.0, ans=0.0 +2024-09-19 22:56:29,951 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.288e+01 8.617e+01 8.998e+01 9.696e+01 3.544e+02, threshold=1.800e+02, percent-clipped=2.0 +2024-09-19 22:56:36,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=762320.0, ans=10.0 +2024-09-19 22:56:43,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=762320.0, ans=0.125 +2024-09-19 22:57:01,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=762360.0, ans=0.0 +2024-09-19 22:57:06,489 INFO [train.py:1198] (1/2) Epoch 43, batch 550, loss[loss=0.2416, ctc_loss=0.1123, cr_loss=0.3496, attn_decoder_loss=0.2482, over 28791.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1108, cr_loss=0.3506, attn_decoder_loss=0.2377, over 5421502.84 frames. ], batch size: 104, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:57:19,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=12.29 vs. limit=22.5 +2024-09-19 22:57:29,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=762440.0, ans=0.0 +2024-09-19 22:57:41,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=762480.0, ans=0.0 +2024-09-19 22:57:54,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=762520.0, ans=0.07 +2024-09-19 22:58:24,034 INFO [train.py:1198] (1/2) Epoch 43, batch 600, loss[loss=0.2469, ctc_loss=0.1232, cr_loss=0.3807, attn_decoder_loss=0.2522, over 29295.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1112, cr_loss=0.3512, attn_decoder_loss=0.2379, over 5507455.25 frames. ], batch size: 100, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:58:28,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=762600.0, ans=0.125 +2024-09-19 22:59:05,157 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.540e+01 8.510e+01 8.971e+01 9.586e+01 1.722e+02, threshold=1.794e+02, percent-clipped=0.0 +2024-09-19 22:59:24,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=762760.0, ans=0.125 +2024-09-19 22:59:41,227 INFO [train.py:1198] (1/2) Epoch 43, batch 650, loss[loss=0.2341, ctc_loss=0.1087, cr_loss=0.3341, attn_decoder_loss=0.2407, over 29755.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1101, cr_loss=0.3484, attn_decoder_loss=0.2368, over 5585369.31 frames. ], batch size: 81, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 22:59:52,333 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=9.31 vs. limit=15.0 +2024-09-19 22:59:53,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=762800.0, ans=0.025 +2024-09-19 22:59:57,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.21 vs. limit=15.0 +2024-09-19 23:00:05,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=762840.0, ans=0.125 +2024-09-19 23:00:25,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=762920.0, ans=0.025 +2024-09-19 23:00:38,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=762920.0, ans=0.1 +2024-09-19 23:00:43,667 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.04 vs. limit=15.0 +2024-09-19 23:00:44,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=762960.0, ans=0.1 +2024-09-19 23:00:49,891 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.84 vs. limit=15.0 +2024-09-19 23:00:56,552 INFO [train.py:1198] (1/2) Epoch 43, batch 700, loss[loss=0.2267, ctc_loss=0.1069, cr_loss=0.3411, attn_decoder_loss=0.2325, over 29535.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1109, cr_loss=0.3504, attn_decoder_loss=0.2377, over 5635666.80 frames. ], batch size: 76, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:01:14,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=763040.0, ans=0.2 +2024-09-19 23:01:25,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=763080.0, ans=0.0 +2024-09-19 23:01:35,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.537e+01 8.576e+01 9.155e+01 9.558e+01 1.416e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-19 23:01:42,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=763120.0, ans=0.0 +2024-09-19 23:01:46,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=763120.0, ans=0.125 +2024-09-19 23:01:54,202 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=763120.0, ans=0.125 +2024-09-19 23:01:55,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=763160.0, ans=0.025 +2024-09-19 23:02:00,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=763160.0, ans=0.125 +2024-09-19 23:02:03,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=763160.0, ans=0.04949747468305833 +2024-09-19 23:02:14,565 INFO [train.py:1198] (1/2) Epoch 43, batch 750, loss[loss=0.2247, ctc_loss=0.108, cr_loss=0.3331, attn_decoder_loss=0.2303, over 29705.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1104, cr_loss=0.3485, attn_decoder_loss=0.2372, over 5675253.17 frames. ], batch size: 82, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:02:47,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=763280.0, ans=0.125 +2024-09-19 23:03:00,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=763320.0, ans=0.2 +2024-09-19 23:03:03,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=763320.0, ans=0.0 +2024-09-19 23:03:05,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.30 vs. limit=6.0 +2024-09-19 23:03:28,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.74 vs. limit=15.0 +2024-09-19 23:03:31,977 INFO [train.py:1198] (1/2) Epoch 43, batch 800, loss[loss=0.2064, ctc_loss=0.08905, cr_loss=0.294, attn_decoder_loss=0.2129, over 29595.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1108, cr_loss=0.3499, attn_decoder_loss=0.2376, over 5705231.03 frames. ], batch size: 73, lr: 2.57e-03, grad_scale: 32.0 +2024-09-19 23:03:34,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.00 vs. limit=12.0 +2024-09-19 23:03:39,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=763400.0, ans=0.125 +2024-09-19 23:03:47,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=763440.0, ans=0.0 +2024-09-19 23:04:13,917 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.340e+01 8.564e+01 8.973e+01 9.746e+01 2.709e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-19 23:04:14,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.72 vs. limit=15.0 +2024-09-19 23:04:20,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=763520.0, ans=0.125 +2024-09-19 23:04:29,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=763520.0, ans=0.125 +2024-09-19 23:04:32,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=763560.0, ans=0.1 +2024-09-19 23:04:36,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=763560.0, ans=0.0 +2024-09-19 23:04:42,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.62 vs. limit=15.0 +2024-09-19 23:04:47,009 INFO [train.py:1198] (1/2) Epoch 43, batch 850, loss[loss=0.2321, ctc_loss=0.1026, cr_loss=0.3281, attn_decoder_loss=0.2392, over 29736.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1101, cr_loss=0.3482, attn_decoder_loss=0.2369, over 5734550.08 frames. ], batch size: 89, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:04:48,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=763600.0, ans=0.125 +2024-09-19 23:05:00,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=763640.0, ans=0.025 +2024-09-19 23:05:09,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=763640.0, ans=0.125 +2024-09-19 23:05:33,876 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:05:44,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=763720.0, ans=0.2 +2024-09-19 23:05:53,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=763760.0, ans=0.0 +2024-09-19 23:06:03,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=763800.0, ans=0.0 +2024-09-19 23:06:04,722 INFO [train.py:1198] (1/2) Epoch 43, batch 900, loss[loss=0.2052, ctc_loss=0.08667, cr_loss=0.291, attn_decoder_loss=0.2119, over 29621.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1105, cr_loss=0.3491, attn_decoder_loss=0.2376, over 5739380.00 frames. ], batch size: 73, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:06:09,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=763800.0, ans=0.125 +2024-09-19 23:06:12,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=763800.0, ans=0.125 +2024-09-19 23:06:42,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=763880.0, ans=0.025 +2024-09-19 23:06:46,894 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.378e+01 8.546e+01 9.046e+01 9.640e+01 1.475e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 23:06:48,022 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=12.74 vs. limit=22.5 +2024-09-19 23:07:04,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=763920.0, ans=0.125 +2024-09-19 23:07:10,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=763960.0, ans=0.0 +2024-09-19 23:07:12,094 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.00 vs. limit=12.0 +2024-09-19 23:07:16,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=763960.0, ans=0.2 +2024-09-19 23:07:22,194 INFO [train.py:1198] (1/2) Epoch 43, batch 950, loss[loss=0.218, ctc_loss=0.09513, cr_loss=0.315, attn_decoder_loss=0.2247, over 29496.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1105, cr_loss=0.3491, attn_decoder_loss=0.2377, over 5740038.17 frames. ], batch size: 74, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:07:38,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=764040.0, ans=0.125 +2024-09-19 23:07:53,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=764080.0, ans=0.2 +2024-09-19 23:07:58,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=764080.0, ans=0.05 +2024-09-19 23:08:36,655 INFO [train.py:1198] (1/2) Epoch 43, batch 1000, loss[loss=0.221, ctc_loss=0.1039, cr_loss=0.3367, attn_decoder_loss=0.2265, over 29507.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1113, cr_loss=0.3504, attn_decoder_loss=0.2385, over 5735711.37 frames. ], batch size: 77, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:08:41,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=764200.0, ans=0.05 +2024-09-19 23:09:18,833 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.366e+01 8.655e+01 9.178e+01 9.837e+01 2.417e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-19 23:09:24,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.61 vs. limit=22.5 +2024-09-19 23:09:33,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.31 vs. limit=22.5 +2024-09-19 23:09:50,074 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=764360.0, ans=0.0 +2024-09-19 23:09:54,201 INFO [train.py:1198] (1/2) Epoch 43, batch 1050, loss[loss=0.237, ctc_loss=0.1145, cr_loss=0.358, attn_decoder_loss=0.2426, over 29666.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1114, cr_loss=0.3508, attn_decoder_loss=0.2381, over 5744350.81 frames. ], batch size: 85, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:09:56,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=764400.0, ans=0.2 +2024-09-19 23:10:14,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=764440.0, ans=0.125 +2024-09-19 23:10:18,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=764440.0, ans=0.125 +2024-09-19 23:10:32,697 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:10:35,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=764480.0, ans=0.2 +2024-09-19 23:10:43,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.27 vs. limit=22.5 +2024-09-19 23:10:57,756 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.49 vs. limit=22.5 +2024-09-19 23:11:06,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=764560.0, ans=0.125 +2024-09-19 23:11:11,227 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.82 vs. limit=15.0 +2024-09-19 23:11:11,872 INFO [train.py:1198] (1/2) Epoch 43, batch 1100, loss[loss=0.226, ctc_loss=0.107, cr_loss=0.3338, attn_decoder_loss=0.2318, over 29456.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3496, attn_decoder_loss=0.2379, over 5757890.24 frames. ], batch size: 78, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:11:15,226 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:11:16,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=764600.0, ans=0.0 +2024-09-19 23:11:33,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=764640.0, ans=0.1 +2024-09-19 23:11:41,399 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.48 vs. limit=15.0 +2024-09-19 23:11:48,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=764680.0, ans=0.0 +2024-09-19 23:11:54,302 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.200e+01 8.378e+01 8.891e+01 9.353e+01 1.322e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-19 23:12:11,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=764760.0, ans=0.0 +2024-09-19 23:12:21,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=764760.0, ans=0.125 +2024-09-19 23:12:27,897 INFO [train.py:1198] (1/2) Epoch 43, batch 1150, loss[loss=0.2153, ctc_loss=0.09932, cr_loss=0.3217, attn_decoder_loss=0.221, over 29437.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1104, cr_loss=0.3487, attn_decoder_loss=0.2375, over 5755684.19 frames. ], batch size: 78, lr: 2.57e-03, grad_scale: 8.0 +2024-09-19 23:12:32,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=764800.0, ans=0.1 +2024-09-19 23:12:36,030 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:12:40,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=764800.0, ans=0.125 +2024-09-19 23:12:48,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=764840.0, ans=0.125 +2024-09-19 23:13:00,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=764880.0, ans=0.125 +2024-09-19 23:13:09,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=764880.0, ans=0.125 +2024-09-19 23:13:12,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.max_positive, batch_count=764920.0, ans=0.95 +2024-09-19 23:13:13,008 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.95 vs. limit=22.5 +2024-09-19 23:13:15,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=764920.0, ans=0.1 +2024-09-19 23:13:21,726 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.29 vs. limit=15.0 +2024-09-19 23:13:37,389 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=11.51 vs. limit=15.0 +2024-09-19 23:13:37,683 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.30 vs. limit=15.0 +2024-09-19 23:13:45,768 INFO [train.py:1198] (1/2) Epoch 43, batch 1200, loss[loss=0.2474, ctc_loss=0.1202, cr_loss=0.3618, attn_decoder_loss=0.2535, over 29661.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1111, cr_loss=0.35, attn_decoder_loss=0.2384, over 5748452.26 frames. ], batch size: 85, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:13:49,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=765000.0, ans=0.125 +2024-09-19 23:14:28,137 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.366e+01 8.714e+01 9.128e+01 9.687e+01 4.379e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-19 23:14:34,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=765120.0, ans=0.125 +2024-09-19 23:14:56,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=765160.0, ans=0.09899494936611666 +2024-09-19 23:15:03,373 INFO [train.py:1198] (1/2) Epoch 43, batch 1250, loss[loss=0.2642, ctc_loss=0.1474, cr_loss=0.4502, attn_decoder_loss=0.2671, over 29539.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1119, cr_loss=0.3525, attn_decoder_loss=0.2393, over 5775370.11 frames. ], batch size: 92, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:15:23,406 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:15:27,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=765240.0, ans=0.125 +2024-09-19 23:15:35,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=765280.0, ans=0.0 +2024-09-19 23:15:41,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=765280.0, ans=0.0 +2024-09-19 23:15:48,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=765320.0, ans=0.0 +2024-09-19 23:15:57,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=7.26 vs. limit=10.0 +2024-09-19 23:15:59,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=765320.0, ans=0.1 +2024-09-19 23:16:07,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=765360.0, ans=0.025 +2024-09-19 23:16:19,063 INFO [train.py:1198] (1/2) Epoch 43, batch 1300, loss[loss=0.2435, ctc_loss=0.1133, cr_loss=0.3558, attn_decoder_loss=0.2501, over 28348.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1114, cr_loss=0.3512, attn_decoder_loss=0.2386, over 5779502.48 frames. ], batch size: 111, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:16:56,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=10.43 vs. limit=15.0 +2024-09-19 23:17:01,387 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.583e+01 9.046e+01 9.582e+01 1.774e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-19 23:17:18,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=765560.0, ans=0.035 +2024-09-19 23:17:37,164 INFO [train.py:1198] (1/2) Epoch 43, batch 1350, loss[loss=0.2377, ctc_loss=0.1138, cr_loss=0.3519, attn_decoder_loss=0.2436, over 29759.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1111, cr_loss=0.3506, attn_decoder_loss=0.2383, over 5797934.82 frames. ], batch size: 81, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:17:45,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.97 vs. limit=22.5 +2024-09-19 23:18:34,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=765720.0, ans=0.0 +2024-09-19 23:18:35,850 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:18:54,303 INFO [train.py:1198] (1/2) Epoch 43, batch 1400, loss[loss=0.2065, ctc_loss=0.09014, cr_loss=0.3006, attn_decoder_loss=0.2128, over 29588.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1105, cr_loss=0.35, attn_decoder_loss=0.2379, over 5808784.94 frames. ], batch size: 69, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:19:15,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=765840.0, ans=0.0 +2024-09-19 23:19:22,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=765880.0, ans=0.125 +2024-09-19 23:19:33,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=765880.0, ans=0.1 +2024-09-19 23:19:36,333 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.299e+01 8.462e+01 9.127e+01 9.642e+01 1.340e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 23:19:39,791 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=765920.0, ans=0.125 +2024-09-19 23:19:41,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=765920.0, ans=0.125 +2024-09-19 23:19:56,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=765960.0, ans=0.2 +2024-09-19 23:20:02,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=765960.0, ans=0.0 +2024-09-19 23:20:09,473 INFO [train.py:1198] (1/2) Epoch 43, batch 1450, loss[loss=0.2403, ctc_loss=0.1198, cr_loss=0.3761, attn_decoder_loss=0.2454, over 29437.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1106, cr_loss=0.3503, attn_decoder_loss=0.238, over 5805816.72 frames. ], batch size: 94, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:20:21,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=766000.0, ans=0.025 +2024-09-19 23:20:33,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=766040.0, ans=0.2 +2024-09-19 23:20:48,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=766080.0, ans=0.125 +2024-09-19 23:20:52,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.52 vs. limit=15.0 +2024-09-19 23:20:56,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=766120.0, ans=0.125 +2024-09-19 23:21:06,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=766120.0, ans=0.2 +2024-09-19 23:21:14,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=766160.0, ans=0.0 +2024-09-19 23:21:26,805 INFO [train.py:1198] (1/2) Epoch 43, batch 1500, loss[loss=0.2433, ctc_loss=0.1141, cr_loss=0.3603, attn_decoder_loss=0.2497, over 29635.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1105, cr_loss=0.3497, attn_decoder_loss=0.2383, over 5807238.34 frames. ], batch size: 86, lr: 2.57e-03, grad_scale: 16.0 +2024-09-19 23:21:30,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=766200.0, ans=0.1 +2024-09-19 23:21:40,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=766240.0, ans=0.0 +2024-09-19 23:22:09,440 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.081e+01 8.602e+01 9.131e+01 9.560e+01 1.543e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-19 23:22:25,483 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.72 vs. limit=22.5 +2024-09-19 23:22:29,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=766360.0, ans=0.0 +2024-09-19 23:22:30,115 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.22 vs. limit=22.5 +2024-09-19 23:22:30,314 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.36 vs. limit=15.0 +2024-09-19 23:22:38,394 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.24 vs. limit=15.0 +2024-09-19 23:22:39,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=766360.0, ans=0.1 +2024-09-19 23:22:45,498 INFO [train.py:1198] (1/2) Epoch 43, batch 1550, loss[loss=0.2539, ctc_loss=0.1239, cr_loss=0.3681, attn_decoder_loss=0.2602, over 29510.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1108, cr_loss=0.3503, attn_decoder_loss=0.2385, over 5782940.70 frames. ], batch size: 90, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:23:00,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=766440.0, ans=0.1 +2024-09-19 23:23:17,633 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.08 vs. limit=15.0 +2024-09-19 23:23:24,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=766480.0, ans=0.125 +2024-09-19 23:24:00,259 INFO [train.py:1198] (1/2) Epoch 43, batch 1600, loss[loss=0.2392, ctc_loss=0.1152, cr_loss=0.3697, attn_decoder_loss=0.2447, over 29692.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1107, cr_loss=0.3495, attn_decoder_loss=0.2382, over 5763852.38 frames. ], batch size: 85, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:24:03,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=766600.0, ans=0.125 +2024-09-19 23:24:44,022 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.796e+01 8.578e+01 9.126e+01 9.935e+01 1.775e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-19 23:24:45,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=766720.0, ans=0.025 +2024-09-19 23:25:09,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=766760.0, ans=0.025 +2024-09-19 23:25:10,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=766760.0, ans=0.125 +2024-09-19 23:25:17,747 INFO [train.py:1198] (1/2) Epoch 43, batch 1650, loss[loss=0.2462, ctc_loss=0.1192, cr_loss=0.3751, attn_decoder_loss=0.252, over 29739.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1107, cr_loss=0.3494, attn_decoder_loss=0.2381, over 5759616.47 frames. ], batch size: 89, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:25:22,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=766800.0, ans=0.2 +2024-09-19 23:25:31,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=766840.0, ans=0.025 +2024-09-19 23:25:52,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=766880.0, ans=0.2 +2024-09-19 23:26:10,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=766920.0, ans=0.09899494936611666 +2024-09-19 23:26:10,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=766920.0, ans=0.2 +2024-09-19 23:26:11,206 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=768, metric=18.79 vs. limit=22.5 +2024-09-19 23:26:20,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=766960.0, ans=0.1 +2024-09-19 23:26:34,714 INFO [train.py:1198] (1/2) Epoch 43, batch 1700, loss[loss=0.206, ctc_loss=0.09917, cr_loss=0.335, attn_decoder_loss=0.2104, over 29586.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1104, cr_loss=0.3492, attn_decoder_loss=0.2378, over 5780312.30 frames. ], batch size: 69, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:26:38,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=767000.0, ans=0.2 +2024-09-19 23:26:57,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff2.min_abs, batch_count=767040.0, ans=0.1 +2024-09-19 23:27:00,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=767040.0, ans=0.125 +2024-09-19 23:27:08,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=767080.0, ans=0.125 +2024-09-19 23:27:17,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=767080.0, ans=0.125 +2024-09-19 23:27:18,340 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.484e+01 9.017e+01 9.514e+01 1.146e+02, threshold=1.803e+02, percent-clipped=0.0 +2024-09-19 23:27:39,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=767160.0, ans=0.0 +2024-09-19 23:27:44,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=767160.0, ans=0.0 +2024-09-19 23:27:50,590 INFO [train.py:1198] (1/2) Epoch 43, batch 1750, loss[loss=0.2024, ctc_loss=0.09261, cr_loss=0.3115, attn_decoder_loss=0.2077, over 29381.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1099, cr_loss=0.3485, attn_decoder_loss=0.2372, over 5787517.25 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:28:09,669 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.56 vs. limit=15.0 +2024-09-19 23:28:25,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=767280.0, ans=0.125 +2024-09-19 23:28:46,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=767320.0, ans=0.125 +2024-09-19 23:29:06,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=767400.0, ans=0.2 +2024-09-19 23:29:07,598 INFO [train.py:1198] (1/2) Epoch 43, batch 1800, loss[loss=0.2441, ctc_loss=0.1203, cr_loss=0.3678, attn_decoder_loss=0.2497, over 29696.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1101, cr_loss=0.3487, attn_decoder_loss=0.2376, over 5789704.19 frames. ], batch size: 83, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:29:38,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=767480.0, ans=0.125 +2024-09-19 23:29:45,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=767480.0, ans=0.125 +2024-09-19 23:29:47,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=767480.0, ans=0.0 +2024-09-19 23:29:50,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=767480.0, ans=0.125 +2024-09-19 23:29:51,304 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.169e+01 8.348e+01 8.939e+01 9.600e+01 1.459e+02, threshold=1.788e+02, percent-clipped=0.0 +2024-09-19 23:30:12,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=767560.0, ans=0.0 +2024-09-19 23:30:12,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=767560.0, ans=0.025 +2024-09-19 23:30:15,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=767560.0, ans=0.125 +2024-09-19 23:30:20,604 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:30:23,258 INFO [train.py:1198] (1/2) Epoch 43, batch 1850, loss[loss=0.2485, ctc_loss=0.1216, cr_loss=0.3692, attn_decoder_loss=0.2544, over 29611.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1101, cr_loss=0.349, attn_decoder_loss=0.2375, over 5795027.02 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:30:36,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=767600.0, ans=0.09899494936611666 +2024-09-19 23:30:41,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=767640.0, ans=0.1 +2024-09-19 23:31:06,035 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:31:08,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.96 vs. limit=10.0 +2024-09-19 23:31:21,455 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.01 vs. limit=6.0 +2024-09-19 23:31:40,272 INFO [train.py:1198] (1/2) Epoch 43, batch 1900, loss[loss=0.2471, ctc_loss=0.1187, cr_loss=0.3766, attn_decoder_loss=0.253, over 29708.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1108, cr_loss=0.3498, attn_decoder_loss=0.2382, over 5804335.63 frames. ], batch size: 89, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:31:43,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=767800.0, ans=0.04949747468305833 +2024-09-19 23:31:45,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=767800.0, ans=0.1 +2024-09-19 23:31:55,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=767840.0, ans=0.125 +2024-09-19 23:32:06,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=767840.0, ans=0.125 +2024-09-19 23:32:07,945 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:32:24,371 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 8.779e+01 9.176e+01 9.742e+01 1.549e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-19 23:32:39,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=767960.0, ans=0.1 +2024-09-19 23:32:41,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=767960.0, ans=0.125 +2024-09-19 23:32:42,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=767960.0, ans=0.125 +2024-09-19 23:33:04,950 INFO [train.py:1198] (1/2) Epoch 43, batch 1950, loss[loss=0.229, ctc_loss=0.1128, cr_loss=0.356, attn_decoder_loss=0.234, over 29451.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.2394, over 5819362.59 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:33:14,456 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:33:20,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=768040.0, ans=0.2 +2024-09-19 23:33:31,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=768040.0, ans=0.125 +2024-09-19 23:33:37,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=768080.0, ans=0.1 +2024-09-19 23:33:37,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-19 23:33:55,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=768120.0, ans=0.1 +2024-09-19 23:33:55,235 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=768120.0, ans=0.0 +2024-09-19 23:34:01,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=768120.0, ans=0.09899494936611666 +2024-09-19 23:34:01,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=768120.0, ans=0.0 +2024-09-19 23:34:07,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=768160.0, ans=0.125 +2024-09-19 23:34:20,488 INFO [train.py:1198] (1/2) Epoch 43, batch 2000, loss[loss=0.2077, ctc_loss=0.09201, cr_loss=0.3097, attn_decoder_loss=0.2136, over 29346.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1118, cr_loss=0.3516, attn_decoder_loss=0.2394, over 5796871.44 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:34:39,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=768240.0, ans=0.125 +2024-09-19 23:34:51,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=9.90 vs. limit=15.0 +2024-09-19 23:35:00,837 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=768280.0, ans=0.0 +2024-09-19 23:35:07,937 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.936e+01 8.662e+01 9.256e+01 9.828e+01 2.553e+02, threshold=1.851e+02, percent-clipped=3.0 +2024-09-19 23:35:19,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.58 vs. limit=22.5 +2024-09-19 23:35:20,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=768320.0, ans=0.1 +2024-09-19 23:35:25,555 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.06 vs. limit=15.0 +2024-09-19 23:35:28,753 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.45 vs. limit=6.0 +2024-09-19 23:35:38,230 INFO [train.py:1198] (1/2) Epoch 43, batch 2050, loss[loss=0.2046, ctc_loss=0.08885, cr_loss=0.2911, attn_decoder_loss=0.211, over 29426.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1111, cr_loss=0.35, attn_decoder_loss=0.2384, over 5788160.41 frames. ], batch size: 70, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:35:46,102 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:35:59,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=768440.0, ans=0.0 +2024-09-19 23:36:35,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=768520.0, ans=0.125 +2024-09-19 23:36:35,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=768520.0, ans=0.025 +2024-09-19 23:36:55,474 INFO [train.py:1198] (1/2) Epoch 43, batch 2100, loss[loss=0.2307, ctc_loss=0.1045, cr_loss=0.3486, attn_decoder_loss=0.237, over 29751.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1103, cr_loss=0.349, attn_decoder_loss=0.2379, over 5799749.63 frames. ], batch size: 81, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:37:04,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=768600.0, ans=0.125 +2024-09-19 23:37:19,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=768640.0, ans=0.125 +2024-09-19 23:37:34,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=768680.0, ans=0.1 +2024-09-19 23:37:34,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=768680.0, ans=0.125 +2024-09-19 23:37:41,777 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.175e+01 8.395e+01 8.911e+01 9.448e+01 1.160e+02, threshold=1.782e+02, percent-clipped=0.0 +2024-09-19 23:38:01,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=768760.0, ans=0.1 +2024-09-19 23:38:10,689 INFO [train.py:1198] (1/2) Epoch 43, batch 2150, loss[loss=0.227, ctc_loss=0.1087, cr_loss=0.3452, attn_decoder_loss=0.2324, over 29442.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1101, cr_loss=0.3486, attn_decoder_loss=0.2373, over 5814097.03 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:38:14,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=768800.0, ans=0.125 +2024-09-19 23:38:32,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=768840.0, ans=0.125 +2024-09-19 23:38:44,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=768880.0, ans=0.2 +2024-09-19 23:38:47,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=768880.0, ans=0.025 +2024-09-19 23:39:04,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=768920.0, ans=0.125 +2024-09-19 23:39:14,420 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.39 vs. limit=15.0 +2024-09-19 23:39:28,381 INFO [train.py:1198] (1/2) Epoch 43, batch 2200, loss[loss=0.2386, ctc_loss=0.1191, cr_loss=0.3661, attn_decoder_loss=0.2438, over 29639.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1105, cr_loss=0.3493, attn_decoder_loss=0.2377, over 5810609.59 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:39:28,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=769000.0, ans=0.0 +2024-09-19 23:39:36,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=769000.0, ans=0.0 +2024-09-19 23:39:45,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.max_abs, batch_count=769040.0, ans=10.0 +2024-09-19 23:39:55,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=769040.0, ans=0.0 +2024-09-19 23:39:57,739 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.74 vs. limit=15.0 +2024-09-19 23:40:04,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=769080.0, ans=0.2 +2024-09-19 23:40:13,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=769120.0, ans=0.125 +2024-09-19 23:40:14,959 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.529e+01 9.034e+01 9.598e+01 1.063e+03, threshold=1.807e+02, percent-clipped=3.0 +2024-09-19 23:40:46,043 INFO [train.py:1198] (1/2) Epoch 43, batch 2250, loss[loss=0.2355, ctc_loss=0.1051, cr_loss=0.3241, attn_decoder_loss=0.2428, over 29723.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1103, cr_loss=0.349, attn_decoder_loss=0.2378, over 5809686.13 frames. ], batch size: 82, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:41:07,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=769240.0, ans=0.125 +2024-09-19 23:41:33,551 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.63 vs. limit=15.0 +2024-09-19 23:41:37,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=769320.0, ans=0.125 +2024-09-19 23:42:01,234 INFO [train.py:1198] (1/2) Epoch 43, batch 2300, loss[loss=0.2106, ctc_loss=0.09194, cr_loss=0.3037, attn_decoder_loss=0.217, over 29328.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1098, cr_loss=0.3482, attn_decoder_loss=0.237, over 5797930.02 frames. ], batch size: 71, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:42:30,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=769440.0, ans=0.125 +2024-09-19 23:42:39,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=769480.0, ans=0.0 +2024-09-19 23:42:42,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=769480.0, ans=0.1 +2024-09-19 23:42:49,953 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.425e+01 9.007e+01 9.590e+01 1.483e+02, threshold=1.801e+02, percent-clipped=0.0 +2024-09-19 23:43:03,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=769560.0, ans=0.0 +2024-09-19 23:43:03,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=769560.0, ans=0.125 +2024-09-19 23:43:05,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=769560.0, ans=0.125 +2024-09-19 23:43:13,376 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-19 23:43:13,465 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.82 vs. limit=12.0 +2024-09-19 23:43:19,010 INFO [train.py:1198] (1/2) Epoch 43, batch 2350, loss[loss=0.2514, ctc_loss=0.1315, cr_loss=0.3989, attn_decoder_loss=0.2558, over 29697.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1097, cr_loss=0.3481, attn_decoder_loss=0.2371, over 5804068.08 frames. ], batch size: 83, lr: 2.56e-03, grad_scale: 8.0 +2024-09-19 23:43:22,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=769600.0, ans=0.1 +2024-09-19 23:43:31,710 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.86 vs. limit=15.0 +2024-09-19 23:43:47,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=769680.0, ans=0.125 +2024-09-19 23:43:49,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=769680.0, ans=0.125 +2024-09-19 23:44:05,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=769720.0, ans=0.125 +2024-09-19 23:44:18,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=769760.0, ans=0.2 +2024-09-19 23:44:36,555 INFO [train.py:1198] (1/2) Epoch 43, batch 2400, loss[loss=0.2277, ctc_loss=0.1104, cr_loss=0.3471, attn_decoder_loss=0.2331, over 29539.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1101, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5807063.07 frames. ], batch size: 76, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:44:42,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=769800.0, ans=0.0 +2024-09-19 23:44:51,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=769840.0, ans=0.0 +2024-09-19 23:45:05,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=769880.0, ans=0.0 +2024-09-19 23:45:14,873 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.96 vs. limit=15.0 +2024-09-19 23:45:16,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=769880.0, ans=0.1 +2024-09-19 23:45:22,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=769920.0, ans=0.0 +2024-09-19 23:45:22,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=769920.0, ans=0.125 +2024-09-19 23:45:23,259 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.858e+01 8.868e+01 9.245e+01 1.005e+02 2.989e+02, threshold=1.849e+02, percent-clipped=3.0 +2024-09-19 23:45:40,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=769960.0, ans=0.0 +2024-09-19 23:45:46,966 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.91 vs. limit=22.5 +2024-09-19 23:45:52,121 INFO [train.py:1198] (1/2) Epoch 43, batch 2450, loss[loss=0.2437, ctc_loss=0.1227, cr_loss=0.3812, attn_decoder_loss=0.2487, over 29719.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.111, cr_loss=0.3502, attn_decoder_loss=0.2383, over 5784598.26 frames. ], batch size: 82, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:45:53,255 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.97 vs. limit=12.0 +2024-09-19 23:45:56,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=770000.0, ans=0.125 +2024-09-19 23:46:23,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=770080.0, ans=0.125 +2024-09-19 23:46:29,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=770080.0, ans=0.1 +2024-09-19 23:46:41,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=770120.0, ans=0.125 +2024-09-19 23:46:41,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=770120.0, ans=0.0 +2024-09-19 23:47:02,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=770160.0, ans=0.125 +2024-09-19 23:47:07,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=770160.0, ans=0.0 +2024-09-19 23:47:09,624 INFO [train.py:1198] (1/2) Epoch 43, batch 2500, loss[loss=0.2448, ctc_loss=0.1121, cr_loss=0.3554, attn_decoder_loss=0.2516, over 29641.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1114, cr_loss=0.3515, attn_decoder_loss=0.2386, over 5794424.45 frames. ], batch size: 86, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:47:35,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=770240.0, ans=0.0 +2024-09-19 23:47:37,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=770240.0, ans=0.125 +2024-09-19 23:47:56,926 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.431e+01 8.730e+01 9.095e+01 9.659e+01 1.544e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-19 23:48:03,411 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:48:18,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=770360.0, ans=0.1 +2024-09-19 23:48:28,151 INFO [train.py:1198] (1/2) Epoch 43, batch 2550, loss[loss=0.2094, ctc_loss=0.09626, cr_loss=0.3249, attn_decoder_loss=0.2148, over 29328.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1115, cr_loss=0.3517, attn_decoder_loss=0.2387, over 5798073.02 frames. ], batch size: 67, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:48:36,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.85 vs. limit=15.0 +2024-09-19 23:48:46,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=770440.0, ans=0.125 +2024-09-19 23:49:09,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=770480.0, ans=0.125 +2024-09-19 23:49:15,455 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.07 vs. limit=15.0 +2024-09-19 23:49:43,745 INFO [train.py:1198] (1/2) Epoch 43, batch 2600, loss[loss=0.2295, ctc_loss=0.1141, cr_loss=0.3708, attn_decoder_loss=0.2341, over 29465.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1117, cr_loss=0.3519, attn_decoder_loss=0.2389, over 5794684.44 frames. ], batch size: 78, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:49:54,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=770600.0, ans=0.125 +2024-09-19 23:49:56,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=770600.0, ans=0.125 +2024-09-19 23:50:06,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=770640.0, ans=0.125 +2024-09-19 23:50:07,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=770640.0, ans=0.0 +2024-09-19 23:50:28,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=770680.0, ans=0.0 +2024-09-19 23:50:32,553 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.619e+01 9.177e+01 9.694e+01 1.714e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-19 23:50:57,094 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:51:01,476 INFO [train.py:1198] (1/2) Epoch 43, batch 2650, loss[loss=0.2397, ctc_loss=0.1122, cr_loss=0.359, attn_decoder_loss=0.2459, over 29257.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1116, cr_loss=0.3521, attn_decoder_loss=0.239, over 5801868.68 frames. ], batch size: 100, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:51:15,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=770840.0, ans=0.125 +2024-09-19 23:51:33,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=770880.0, ans=0.0 +2024-09-19 23:51:52,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=770920.0, ans=0.025 +2024-09-19 23:52:18,213 INFO [train.py:1198] (1/2) Epoch 43, batch 2700, loss[loss=0.2425, ctc_loss=0.1116, cr_loss=0.345, attn_decoder_loss=0.2493, over 29529.00 frames. ], tot_loss[loss=0.2337, ctc_loss=0.1115, cr_loss=0.3519, attn_decoder_loss=0.2394, over 5798106.82 frames. ], batch size: 87, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:52:37,516 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.03 vs. limit=15.0 +2024-09-19 23:52:47,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=771080.0, ans=0.1 +2024-09-19 23:52:53,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.24 vs. limit=6.0 +2024-09-19 23:52:59,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=771080.0, ans=0.1 +2024-09-19 23:53:05,377 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.492e+01 9.068e+01 9.521e+01 1.768e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-19 23:53:16,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=771120.0, ans=0.125 +2024-09-19 23:53:34,513 INFO [train.py:1198] (1/2) Epoch 43, batch 2750, loss[loss=0.2276, ctc_loss=0.1119, cr_loss=0.3586, attn_decoder_loss=0.2325, over 29531.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1105, cr_loss=0.3502, attn_decoder_loss=0.2382, over 5797455.83 frames. ], batch size: 75, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:53:37,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=771200.0, ans=0.125 +2024-09-19 23:53:46,808 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=771200.0, ans=0.0 +2024-09-19 23:53:54,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.44 vs. limit=15.0 +2024-09-19 23:53:55,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=771240.0, ans=0.125 +2024-09-19 23:54:11,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=771280.0, ans=0.125 +2024-09-19 23:54:34,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=771320.0, ans=0.04949747468305833 +2024-09-19 23:54:48,047 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-19 23:54:52,207 INFO [train.py:1198] (1/2) Epoch 43, batch 2800, loss[loss=0.2472, ctc_loss=0.1324, cr_loss=0.3653, attn_decoder_loss=0.2518, over 20053.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1107, cr_loss=0.3501, attn_decoder_loss=0.2383, over 5778327.63 frames. ], batch size: 211, lr: 2.56e-03, grad_scale: 32.0 +2024-09-19 23:55:07,910 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.13 vs. limit=15.0 +2024-09-19 23:55:19,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=771440.0, ans=0.125 +2024-09-19 23:55:28,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=771480.0, ans=0.125 +2024-09-19 23:55:40,236 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.712e+01 9.201e+01 9.753e+01 5.037e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-19 23:55:59,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=17.31 vs. limit=22.5 +2024-09-19 23:56:03,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=771560.0, ans=0.125 +2024-09-19 23:56:07,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=771600.0, ans=0.125 +2024-09-19 23:56:08,851 INFO [train.py:1198] (1/2) Epoch 43, batch 2850, loss[loss=0.2284, ctc_loss=0.1065, cr_loss=0.3426, attn_decoder_loss=0.2343, over 29484.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.111, cr_loss=0.3505, attn_decoder_loss=0.2385, over 5762972.53 frames. ], batch size: 77, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:56:13,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=771600.0, ans=0.0 +2024-09-19 23:56:15,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=771600.0, ans=0.125 +2024-09-19 23:56:19,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=771600.0, ans=0.0 +2024-09-19 23:56:35,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=771640.0, ans=0.0 +2024-09-19 23:56:50,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-19 23:56:54,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=771720.0, ans=0.125 +2024-09-19 23:56:59,344 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=771720.0, ans=0.2 +2024-09-19 23:57:06,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=771720.0, ans=0.05 +2024-09-19 23:57:08,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.37 vs. limit=10.0 +2024-09-19 23:57:17,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=771760.0, ans=0.1 +2024-09-19 23:57:22,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=771760.0, ans=0.125 +2024-09-19 23:57:24,705 INFO [train.py:1198] (1/2) Epoch 43, batch 2900, loss[loss=0.2307, ctc_loss=0.1099, cr_loss=0.3549, attn_decoder_loss=0.2362, over 29406.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1117, cr_loss=0.3523, attn_decoder_loss=0.2397, over 5788103.58 frames. ], batch size: 79, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:57:27,080 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.05 vs. limit=12.0 +2024-09-19 23:57:53,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=771880.0, ans=0.125 +2024-09-19 23:58:14,811 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.451e+01 8.980e+01 9.523e+01 1.534e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-19 23:58:20,051 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.48 vs. limit=15.0 +2024-09-19 23:58:42,034 INFO [train.py:1198] (1/2) Epoch 43, batch 2950, loss[loss=0.2237, ctc_loss=0.1083, cr_loss=0.3523, attn_decoder_loss=0.2287, over 29510.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1104, cr_loss=0.3498, attn_decoder_loss=0.2384, over 5782224.05 frames. ], batch size: 75, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:58:48,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=772000.0, ans=0.125 +2024-09-19 23:59:16,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.81 vs. limit=22.5 +2024-09-19 23:59:43,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=772160.0, ans=0.0 +2024-09-19 23:59:55,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=772160.0, ans=0.0 +2024-09-19 23:59:57,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.85 vs. limit=12.0 +2024-09-19 23:59:59,894 INFO [train.py:1198] (1/2) Epoch 43, batch 3000, loss[loss=0.2369, ctc_loss=0.1081, cr_loss=0.3579, attn_decoder_loss=0.2432, over 29755.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1099, cr_loss=0.3487, attn_decoder_loss=0.2381, over 5783415.84 frames. ], batch size: 81, lr: 2.56e-03, grad_scale: 16.0 +2024-09-19 23:59:59,894 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 00:00:18,201 INFO [train.py:1230] (1/2) Epoch 43, validation: loss=0.2118, ctc_loss=0.03672, cr_loss=6.551e-15, attn_decoder_loss=0.2313, over 944034.00 frames. +2024-09-20 00:00:18,201 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 00:00:29,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=772200.0, ans=0.1 +2024-09-20 00:00:37,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=772240.0, ans=0.125 +2024-09-20 00:00:37,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=772240.0, ans=0.0 +2024-09-20 00:01:06,788 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.550e+01 8.607e+01 9.085e+01 9.850e+01 2.122e+02, threshold=1.817e+02, percent-clipped=1.0 +2024-09-20 00:01:11,872 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:01:17,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=772360.0, ans=0.0 +2024-09-20 00:01:17,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=772360.0, ans=0.125 +2024-09-20 00:01:34,010 INFO [train.py:1198] (1/2) Epoch 43, batch 3050, loss[loss=0.2262, ctc_loss=0.1131, cr_loss=0.3512, attn_decoder_loss=0.231, over 29539.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.111, cr_loss=0.3505, attn_decoder_loss=0.2392, over 5776778.23 frames. ], batch size: 76, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:01:54,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=772440.0, ans=0.2 +2024-09-20 00:01:54,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=772440.0, ans=0.1 +2024-09-20 00:01:56,211 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:01:57,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=772440.0, ans=0.0 +2024-09-20 00:02:12,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=772480.0, ans=0.05 +2024-09-20 00:02:14,622 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.84 vs. limit=15.0 +2024-09-20 00:02:51,502 INFO [train.py:1198] (1/2) Epoch 43, batch 3100, loss[loss=0.2445, ctc_loss=0.1216, cr_loss=0.3538, attn_decoder_loss=0.2503, over 29216.00 frames. ], tot_loss[loss=0.233, ctc_loss=0.1111, cr_loss=0.3498, attn_decoder_loss=0.2388, over 5778106.95 frames. ], batch size: 100, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:02:52,727 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.69 vs. limit=15.0 +2024-09-20 00:03:00,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=772600.0, ans=0.1 +2024-09-20 00:03:09,151 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=16.97 vs. limit=22.5 +2024-09-20 00:03:13,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.17 vs. limit=10.0 +2024-09-20 00:03:41,811 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.560e+01 8.944e+01 9.719e+01 1.343e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 00:03:54,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=772760.0, ans=0.125 +2024-09-20 00:04:02,546 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.95 vs. limit=22.5 +2024-09-20 00:04:09,807 INFO [train.py:1198] (1/2) Epoch 43, batch 3150, loss[loss=0.2381, ctc_loss=0.112, cr_loss=0.3575, attn_decoder_loss=0.2442, over 28902.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1108, cr_loss=0.3496, attn_decoder_loss=0.2386, over 5783711.43 frames. ], batch size: 104, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:04:12,052 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=14.42 vs. limit=15.0 +2024-09-20 00:04:15,284 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.80 vs. limit=6.0 +2024-09-20 00:04:19,082 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=772800.0, ans=0.025 +2024-09-20 00:04:29,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=772840.0, ans=0.125 +2024-09-20 00:04:35,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=772840.0, ans=0.1 +2024-09-20 00:05:07,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=772920.0, ans=0.2 +2024-09-20 00:05:14,105 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.84 vs. limit=15.0 +2024-09-20 00:05:25,183 INFO [train.py:1198] (1/2) Epoch 43, batch 3200, loss[loss=0.236, ctc_loss=0.1192, cr_loss=0.3771, attn_decoder_loss=0.2406, over 29413.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1109, cr_loss=0.3495, attn_decoder_loss=0.2383, over 5794489.62 frames. ], batch size: 79, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:05:25,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=773000.0, ans=0.0 +2024-09-20 00:05:41,985 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:05:50,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=773040.0, ans=0.125 +2024-09-20 00:05:55,035 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:05:55,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.94 vs. limit=15.0 +2024-09-20 00:06:07,098 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:06:17,438 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.172e+01 8.459e+01 9.068e+01 9.712e+01 1.068e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 00:06:43,195 INFO [train.py:1198] (1/2) Epoch 43, batch 3250, loss[loss=0.2456, ctc_loss=0.1246, cr_loss=0.3949, attn_decoder_loss=0.2503, over 29715.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1112, cr_loss=0.3506, attn_decoder_loss=0.2389, over 5800841.85 frames. ], batch size: 84, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:06:45,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=773200.0, ans=0.1 +2024-09-20 00:07:01,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=773240.0, ans=0.5 +2024-09-20 00:07:21,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=773280.0, ans=0.125 +2024-09-20 00:07:33,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=773320.0, ans=0.125 +2024-09-20 00:07:41,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=773320.0, ans=0.125 +2024-09-20 00:07:50,430 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=773360.0, ans=0.0 +2024-09-20 00:07:50,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=773360.0, ans=0.125 +2024-09-20 00:07:52,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=773360.0, ans=0.125 +2024-09-20 00:08:00,883 INFO [train.py:1198] (1/2) Epoch 43, batch 3300, loss[loss=0.236, ctc_loss=0.1097, cr_loss=0.3362, attn_decoder_loss=0.2425, over 28277.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1103, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5798078.31 frames. ], batch size: 111, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:08:01,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=773400.0, ans=0.1 +2024-09-20 00:08:21,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=773440.0, ans=0.125 +2024-09-20 00:08:40,636 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:08:41,929 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:08:43,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=773480.0, ans=0.0 +2024-09-20 00:08:46,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=773520.0, ans=0.125 +2024-09-20 00:08:47,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=773520.0, ans=0.0 +2024-09-20 00:08:47,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=773520.0, ans=0.0 +2024-09-20 00:08:52,111 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.624e+01 9.248e+01 9.741e+01 2.844e+02, threshold=1.850e+02, percent-clipped=2.0 +2024-09-20 00:09:08,606 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:09:10,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=773560.0, ans=0.125 +2024-09-20 00:09:13,784 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.08 vs. limit=15.0 +2024-09-20 00:09:16,242 INFO [train.py:1198] (1/2) Epoch 43, batch 3350, loss[loss=0.2498, ctc_loss=0.1216, cr_loss=0.3802, attn_decoder_loss=0.2556, over 29014.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1106, cr_loss=0.349, attn_decoder_loss=0.2381, over 5773758.72 frames. ], batch size: 104, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:09:28,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=773600.0, ans=0.2 +2024-09-20 00:09:37,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=773640.0, ans=0.1 +2024-09-20 00:09:40,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=773640.0, ans=0.0 +2024-09-20 00:09:50,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=773680.0, ans=0.125 +2024-09-20 00:10:04,233 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=773720.0, ans=0.1 +2024-09-20 00:10:34,069 INFO [train.py:1198] (1/2) Epoch 43, batch 3400, loss[loss=0.2058, ctc_loss=0.09137, cr_loss=0.3018, attn_decoder_loss=0.2118, over 29328.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1107, cr_loss=0.349, attn_decoder_loss=0.238, over 5765795.04 frames. ], batch size: 67, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:10:40,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=773800.0, ans=0.0 +2024-09-20 00:10:44,187 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.81 vs. limit=12.0 +2024-09-20 00:10:46,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=773800.0, ans=0.125 +2024-09-20 00:10:49,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=773840.0, ans=0.125 +2024-09-20 00:10:50,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=773840.0, ans=0.1 +2024-09-20 00:11:01,267 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.69 vs. limit=15.0 +2024-09-20 00:11:03,616 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=773840.0, ans=0.125 +2024-09-20 00:11:11,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=773880.0, ans=0.125 +2024-09-20 00:11:15,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=773880.0, ans=0.2 +2024-09-20 00:11:27,406 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.739e+01 8.527e+01 9.240e+01 9.845e+01 1.909e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-20 00:11:41,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=773960.0, ans=0.04949747468305833 +2024-09-20 00:11:47,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=773960.0, ans=0.1 +2024-09-20 00:11:47,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=773960.0, ans=0.0 +2024-09-20 00:11:51,437 INFO [train.py:1198] (1/2) Epoch 43, batch 3450, loss[loss=0.2259, ctc_loss=0.09544, cr_loss=0.3142, attn_decoder_loss=0.2334, over 28213.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1109, cr_loss=0.3501, attn_decoder_loss=0.2384, over 5774264.13 frames. ], batch size: 111, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:12:13,198 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:12:18,101 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.21 vs. limit=12.0 +2024-09-20 00:12:43,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=774120.0, ans=0.1 +2024-09-20 00:12:52,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=774160.0, ans=0.125 +2024-09-20 00:13:01,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=774160.0, ans=0.0 +2024-09-20 00:13:06,960 INFO [train.py:1198] (1/2) Epoch 43, batch 3500, loss[loss=0.2105, ctc_loss=0.09298, cr_loss=0.3048, attn_decoder_loss=0.2168, over 29315.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1104, cr_loss=0.3494, attn_decoder_loss=0.2378, over 5776231.11 frames. ], batch size: 71, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:13:08,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=774200.0, ans=0.125 +2024-09-20 00:13:12,323 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.34 vs. limit=22.5 +2024-09-20 00:13:19,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=774200.0, ans=0.125 +2024-09-20 00:13:22,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=774240.0, ans=0.0 +2024-09-20 00:13:38,446 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.10 vs. limit=15.0 +2024-09-20 00:13:59,841 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.916e+01 8.502e+01 8.947e+01 9.671e+01 2.846e+02, threshold=1.789e+02, percent-clipped=1.0 +2024-09-20 00:14:22,211 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=774400.0, ans=0.025 +2024-09-20 00:14:23,849 INFO [train.py:1198] (1/2) Epoch 43, batch 3550, loss[loss=0.2428, ctc_loss=0.1033, cr_loss=0.3393, attn_decoder_loss=0.2508, over 29690.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1104, cr_loss=0.3495, attn_decoder_loss=0.2379, over 5781522.46 frames. ], batch size: 89, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:14:35,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=774400.0, ans=0.125 +2024-09-20 00:14:54,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=774480.0, ans=0.0 +2024-09-20 00:15:01,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.10 vs. limit=22.5 +2024-09-20 00:15:03,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=774480.0, ans=0.0 +2024-09-20 00:15:06,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=774520.0, ans=0.2 +2024-09-20 00:15:09,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=774520.0, ans=0.0 +2024-09-20 00:15:11,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=774520.0, ans=0.2 +2024-09-20 00:15:19,231 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=1.70 vs. limit=15.0 +2024-09-20 00:15:34,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=774560.0, ans=0.1 +2024-09-20 00:15:35,398 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-20 00:15:37,572 INFO [train.py:1198] (1/2) Epoch 43, batch 3600, loss[loss=0.2292, ctc_loss=0.111, cr_loss=0.347, attn_decoder_loss=0.2346, over 29494.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1103, cr_loss=0.349, attn_decoder_loss=0.238, over 5790633.41 frames. ], batch size: 77, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:15:45,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=774600.0, ans=0.0 +2024-09-20 00:15:57,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=774640.0, ans=0.0 +2024-09-20 00:16:06,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=774640.0, ans=0.125 +2024-09-20 00:16:24,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=774720.0, ans=0.125 +2024-09-20 00:16:30,157 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.605e+01 8.541e+01 9.175e+01 9.569e+01 2.464e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 00:16:30,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=774720.0, ans=0.04949747468305833 +2024-09-20 00:16:36,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=774720.0, ans=0.0 +2024-09-20 00:16:40,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=774760.0, ans=0.125 +2024-09-20 00:16:49,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=774760.0, ans=0.1 +2024-09-20 00:16:52,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=774800.0, ans=0.5 +2024-09-20 00:16:54,083 INFO [train.py:1198] (1/2) Epoch 43, batch 3650, loss[loss=0.2424, ctc_loss=0.1243, cr_loss=0.3826, attn_decoder_loss=0.247, over 29485.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1098, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5793112.75 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:17:03,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.94 vs. limit=10.0 +2024-09-20 00:17:07,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=774840.0, ans=0.0 +2024-09-20 00:17:13,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=774840.0, ans=0.125 +2024-09-20 00:17:15,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=774840.0, ans=0.125 +2024-09-20 00:17:31,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=774880.0, ans=0.0 +2024-09-20 00:17:43,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=774920.0, ans=0.125 +2024-09-20 00:17:46,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=774920.0, ans=0.0 +2024-09-20 00:17:53,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=774960.0, ans=0.125 +2024-09-20 00:18:06,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=774960.0, ans=0.0 +2024-09-20 00:18:08,758 INFO [train.py:1198] (1/2) Epoch 43, batch 3700, loss[loss=0.2422, ctc_loss=0.1069, cr_loss=0.3568, attn_decoder_loss=0.2494, over 29706.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1096, cr_loss=0.3477, attn_decoder_loss=0.2374, over 5803602.83 frames. ], batch size: 84, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:18:10,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=775000.0, ans=0.125 +2024-09-20 00:18:40,510 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.81 vs. limit=15.0 +2024-09-20 00:18:42,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=775080.0, ans=0.125 +2024-09-20 00:18:42,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=775080.0, ans=0.125 +2024-09-20 00:18:45,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=775080.0, ans=0.125 +2024-09-20 00:18:58,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=775120.0, ans=0.2 +2024-09-20 00:18:59,087 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.832e+01 8.457e+01 9.128e+01 9.477e+01 6.609e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 00:19:00,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=775120.0, ans=0.125 +2024-09-20 00:19:11,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=775160.0, ans=0.0 +2024-09-20 00:19:18,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=775160.0, ans=0.125 +2024-09-20 00:19:23,195 INFO [train.py:1198] (1/2) Epoch 43, batch 3750, loss[loss=0.2063, ctc_loss=0.09031, cr_loss=0.3072, attn_decoder_loss=0.2124, over 29344.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1098, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5807304.54 frames. ], batch size: 67, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:19:38,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=775240.0, ans=0.1 +2024-09-20 00:19:44,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.92 vs. limit=15.0 +2024-09-20 00:19:53,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=775280.0, ans=0.1 +2024-09-20 00:20:06,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=775280.0, ans=0.1 +2024-09-20 00:20:06,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer2.prob, batch_count=775280.0, ans=0.125 +2024-09-20 00:20:12,928 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:20:26,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=775360.0, ans=0.125 +2024-09-20 00:20:39,071 INFO [train.py:1198] (1/2) Epoch 43, batch 3800, loss[loss=0.2337, ctc_loss=0.1162, cr_loss=0.3477, attn_decoder_loss=0.2391, over 29618.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1094, cr_loss=0.3468, attn_decoder_loss=0.237, over 5797917.32 frames. ], batch size: 86, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:20:46,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=775400.0, ans=0.2 +2024-09-20 00:21:03,729 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=17.78 vs. limit=22.5 +2024-09-20 00:21:28,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=775520.0, ans=0.125 +2024-09-20 00:21:29,526 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.047e+01 8.559e+01 9.199e+01 9.773e+01 2.259e+02, threshold=1.840e+02, percent-clipped=2.0 +2024-09-20 00:21:31,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=775520.0, ans=0.125 +2024-09-20 00:21:40,625 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=775560.0, ans=0.2 +2024-09-20 00:21:43,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=775560.0, ans=0.125 +2024-09-20 00:21:55,046 INFO [train.py:1198] (1/2) Epoch 43, batch 3850, loss[loss=0.2582, ctc_loss=0.1266, cr_loss=0.383, attn_decoder_loss=0.2643, over 29199.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1094, cr_loss=0.3471, attn_decoder_loss=0.2369, over 5811825.43 frames. ], batch size: 100, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:22:07,122 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=775600.0, ans=0.1 +2024-09-20 00:22:10,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=775640.0, ans=0.125 +2024-09-20 00:22:26,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=775680.0, ans=0.125 +2024-09-20 00:22:44,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=775720.0, ans=0.025 +2024-09-20 00:22:44,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=775720.0, ans=0.0 +2024-09-20 00:22:48,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=775720.0, ans=0.0 +2024-09-20 00:22:59,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.62 vs. limit=22.5 +2024-09-20 00:23:09,194 INFO [train.py:1198] (1/2) Epoch 43, batch 3900, loss[loss=0.2483, ctc_loss=0.1272, cr_loss=0.3921, attn_decoder_loss=0.253, over 29643.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1104, cr_loss=0.3499, attn_decoder_loss=0.2379, over 5816700.23 frames. ], batch size: 86, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:23:09,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=775800.0, ans=0.125 +2024-09-20 00:23:24,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=775840.0, ans=0.125 +2024-09-20 00:23:50,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=775880.0, ans=0.025 +2024-09-20 00:23:59,430 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.663e+01 9.061e+01 9.537e+01 1.215e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 00:24:11,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=775960.0, ans=0.125 +2024-09-20 00:24:18,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=775960.0, ans=0.0 +2024-09-20 00:24:23,505 INFO [train.py:1198] (1/2) Epoch 43, batch 3950, loss[loss=0.2512, ctc_loss=0.1272, cr_loss=0.4017, attn_decoder_loss=0.2561, over 29496.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1106, cr_loss=0.3506, attn_decoder_loss=0.2382, over 5835989.40 frames. ], batch size: 97, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:24:32,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=776000.0, ans=0.125 +2024-09-20 00:24:51,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=776080.0, ans=0.125 +2024-09-20 00:25:01,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.46 vs. limit=12.0 +2024-09-20 00:25:04,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=776080.0, ans=0.025 +2024-09-20 00:25:10,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=776120.0, ans=0.2 +2024-09-20 00:25:30,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.76 vs. limit=15.0 +2024-09-20 00:25:38,031 INFO [train.py:1198] (1/2) Epoch 43, batch 4000, loss[loss=0.2217, ctc_loss=0.1109, cr_loss=0.3472, attn_decoder_loss=0.2263, over 29490.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1108, cr_loss=0.3507, attn_decoder_loss=0.2383, over 5812831.16 frames. ], batch size: 74, lr: 2.55e-03, grad_scale: 32.0 +2024-09-20 00:25:38,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=776200.0, ans=0.025 +2024-09-20 00:25:44,976 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.86 vs. limit=12.0 +2024-09-20 00:25:51,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=776240.0, ans=0.125 +2024-09-20 00:26:03,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=776240.0, ans=0.125 +2024-09-20 00:26:10,026 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.88 vs. limit=6.0 +2024-09-20 00:26:25,623 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=776320.0, ans=0.0 +2024-09-20 00:26:26,281 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=6.95 vs. limit=12.0 +2024-09-20 00:26:29,832 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.878e+01 9.363e+01 9.780e+01 3.308e+02, threshold=1.873e+02, percent-clipped=2.0 +2024-09-20 00:26:53,237 INFO [train.py:1198] (1/2) Epoch 43, batch 4050, loss[loss=0.2467, ctc_loss=0.1354, cr_loss=0.3734, attn_decoder_loss=0.2507, over 20491.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1106, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5797468.92 frames. ], batch size: 209, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:27:08,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=776440.0, ans=0.025 +2024-09-20 00:27:15,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten.whitening_limit, batch_count=776440.0, ans=22.5 +2024-09-20 00:28:00,063 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.75 vs. limit=15.0 +2024-09-20 00:28:06,605 INFO [train.py:1198] (1/2) Epoch 43, batch 4100, loss[loss=0.2525, ctc_loss=0.1275, cr_loss=0.3806, attn_decoder_loss=0.2579, over 29469.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.111, cr_loss=0.3513, attn_decoder_loss=0.2384, over 5792826.62 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:28:11,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=776600.0, ans=0.125 +2024-09-20 00:28:17,403 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.84 vs. limit=15.0 +2024-09-20 00:28:21,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=776640.0, ans=0.1 +2024-09-20 00:28:57,813 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.596e+01 8.624e+01 9.289e+01 9.929e+01 2.714e+02, threshold=1.858e+02, percent-clipped=2.0 +2024-09-20 00:29:05,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.35 vs. limit=15.0 +2024-09-20 00:29:15,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=776760.0, ans=0.025 +2024-09-20 00:29:20,438 INFO [train.py:1198] (1/2) Epoch 43, batch 4150, loss[loss=0.2208, ctc_loss=0.09867, cr_loss=0.3231, attn_decoder_loss=0.2272, over 29496.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1105, cr_loss=0.3499, attn_decoder_loss=0.2378, over 5798167.59 frames. ], batch size: 77, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:29:32,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=776800.0, ans=0.1 +2024-09-20 00:29:35,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=776840.0, ans=0.07 +2024-09-20 00:29:48,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=776840.0, ans=0.125 +2024-09-20 00:29:53,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=776880.0, ans=0.125 +2024-09-20 00:30:02,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=776880.0, ans=0.0 +2024-09-20 00:30:10,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.17 vs. limit=15.0 +2024-09-20 00:30:20,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=776960.0, ans=0.125 +2024-09-20 00:30:36,201 INFO [train.py:1198] (1/2) Epoch 43, batch 4200, loss[loss=0.2527, ctc_loss=0.13, cr_loss=0.3892, attn_decoder_loss=0.2577, over 29470.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1105, cr_loss=0.3498, attn_decoder_loss=0.2379, over 5800030.74 frames. ], batch size: 90, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:30:45,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=777000.0, ans=0.125 +2024-09-20 00:31:16,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=777080.0, ans=0.125 +2024-09-20 00:31:29,059 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.533e+01 8.571e+01 8.984e+01 9.502e+01 1.265e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-20 00:31:36,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=777160.0, ans=0.125 +2024-09-20 00:31:49,479 INFO [train.py:1198] (1/2) Epoch 43, batch 4250, loss[loss=0.2158, ctc_loss=0.1061, cr_loss=0.328, attn_decoder_loss=0.2207, over 29536.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1107, cr_loss=0.3503, attn_decoder_loss=0.2383, over 5806456.85 frames. ], batch size: 74, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:31:50,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=9.90 vs. limit=12.0 +2024-09-20 00:33:01,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=777400.0, ans=0.125 +2024-09-20 00:33:02,930 INFO [train.py:1198] (1/2) Epoch 43, batch 4300, loss[loss=0.2342, ctc_loss=0.1117, cr_loss=0.3536, attn_decoder_loss=0.2399, over 29511.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1104, cr_loss=0.3496, attn_decoder_loss=0.2385, over 5795700.43 frames. ], batch size: 87, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:33:07,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=777400.0, ans=0.125 +2024-09-20 00:33:07,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=777400.0, ans=0.2 +2024-09-20 00:33:12,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=8.48 vs. limit=15.0 +2024-09-20 00:33:27,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=777440.0, ans=0.1 +2024-09-20 00:33:33,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=777480.0, ans=0.125 +2024-09-20 00:33:36,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=777480.0, ans=0.1 +2024-09-20 00:33:46,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=777520.0, ans=0.0 +2024-09-20 00:33:49,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=777520.0, ans=0.1 +2024-09-20 00:33:56,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=777520.0, ans=0.0 +2024-09-20 00:33:57,765 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.855e+01 9.292e+01 9.899e+01 2.383e+02, threshold=1.858e+02, percent-clipped=1.0 +2024-09-20 00:33:59,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=777520.0, ans=0.125 +2024-09-20 00:34:18,757 INFO [train.py:1198] (1/2) Epoch 43, batch 4350, loss[loss=0.2532, ctc_loss=0.1253, cr_loss=0.3822, attn_decoder_loss=0.2589, over 29474.00 frames. ], tot_loss[loss=0.2356, ctc_loss=0.1128, cr_loss=0.3552, attn_decoder_loss=0.2413, over 5797273.86 frames. ], batch size: 97, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:34:30,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=777600.0, ans=0.125 +2024-09-20 00:34:42,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=777640.0, ans=0.125 +2024-09-20 00:34:54,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=777680.0, ans=0.0 +2024-09-20 00:34:54,228 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=777680.0, ans=0.0 +2024-09-20 00:34:58,916 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.70 vs. limit=22.5 +2024-09-20 00:35:07,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=777720.0, ans=0.0 +2024-09-20 00:35:13,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=777720.0, ans=0.0 +2024-09-20 00:35:13,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=777720.0, ans=0.1 +2024-09-20 00:35:21,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.84 vs. limit=10.0 +2024-09-20 00:35:21,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=777760.0, ans=0.0 +2024-09-20 00:35:31,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=777800.0, ans=15.0 +2024-09-20 00:35:31,784 INFO [train.py:1198] (1/2) Epoch 43, batch 4400, loss[loss=0.2468, ctc_loss=0.1236, cr_loss=0.3771, attn_decoder_loss=0.2521, over 27151.00 frames. ], tot_loss[loss=0.2376, ctc_loss=0.1139, cr_loss=0.3577, attn_decoder_loss=0.2433, over 5764863.86 frames. ], batch size: 124, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:35:42,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=777800.0, ans=0.1 +2024-09-20 00:35:44,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=777840.0, ans=0.125 +2024-09-20 00:36:24,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=777920.0, ans=0.025 +2024-09-20 00:36:25,880 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.138e+01 9.169e+01 9.548e+01 1.005e+02 2.703e+02, threshold=1.910e+02, percent-clipped=1.0 +2024-09-20 00:36:27,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=777920.0, ans=0.125 +2024-09-20 00:36:46,775 INFO [train.py:1198] (1/2) Epoch 43, batch 4450, loss[loss=0.2488, ctc_loss=0.1239, cr_loss=0.3659, attn_decoder_loss=0.2545, over 20846.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1175, cr_loss=0.3634, attn_decoder_loss=0.2454, over 5572209.66 frames. ], batch size: 209, lr: 2.55e-03, grad_scale: 16.0 +2024-09-20 00:37:02,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=778040.0, ans=0.125 +2024-09-20 00:37:03,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=778040.0, ans=0.125 +2024-09-20 00:37:09,589 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=778040.0, ans=0.1 +2024-09-20 00:37:24,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=778080.0, ans=0.0 +2024-09-20 00:37:27,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=778080.0, ans=0.1 +2024-09-20 00:37:30,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=778120.0, ans=0.125 +2024-09-20 00:37:48,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=778160.0, ans=0.125 +2024-09-20 00:37:48,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=778160.0, ans=0.125 +2024-09-20 00:38:01,715 INFO [train.py:1198] (1/2) Epoch 43, batch 4500, loss[loss=0.2444, ctc_loss=0.1242, cr_loss=0.3429, attn_decoder_loss=0.2502, over 20254.00 frames. ], tot_loss[loss=0.2419, ctc_loss=0.1203, cr_loss=0.3656, attn_decoder_loss=0.2473, over 5229674.72 frames. ], batch size: 209, lr: 2.55e-03, grad_scale: 8.0 +2024-09-20 00:38:04,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.86 vs. limit=6.0 +2024-09-20 00:38:27,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=778240.0, ans=0.0 +2024-09-20 00:39:29,388 INFO [train.py:1198] (1/2) Epoch 44, batch 0, loss[loss=0.2158, ctc_loss=0.1008, cr_loss=0.3388, attn_decoder_loss=0.221, over 29591.00 frames. ], tot_loss[loss=0.2158, ctc_loss=0.1008, cr_loss=0.3388, attn_decoder_loss=0.221, over 29591.00 frames. ], batch size: 73, lr: 2.52e-03, grad_scale: 16.0 +2024-09-20 00:39:29,388 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 00:39:43,183 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.5.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.3692, 5.4703, 5.0992, 3.1988], device='cuda:1') +2024-09-20 00:39:47,832 INFO [train.py:1230] (1/2) Epoch 44, validation: loss=0.2131, ctc_loss=0.03639, cr_loss=8.375e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-20 00:39:47,833 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 00:39:52,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=778300.0, ans=0.2 +2024-09-20 00:39:54,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.01 vs. limit=12.0 +2024-09-20 00:40:05,917 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.560e+01 1.073e+02 1.152e+02 1.272e+02 3.214e+02, threshold=2.305e+02, percent-clipped=2.0 +2024-09-20 00:40:09,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.29 vs. limit=15.0 +2024-09-20 00:40:32,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.43 vs. limit=15.0 +2024-09-20 00:40:33,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=778420.0, ans=0.0 +2024-09-20 00:40:39,178 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.50 vs. limit=22.5 +2024-09-20 00:40:41,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=778420.0, ans=0.0 +2024-09-20 00:41:03,896 INFO [train.py:1198] (1/2) Epoch 44, batch 50, loss[loss=0.208, ctc_loss=0.09482, cr_loss=0.3227, attn_decoder_loss=0.2134, over 29479.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1125, cr_loss=0.3533, attn_decoder_loss=0.2394, over 1265257.35 frames. ], batch size: 70, lr: 2.52e-03, grad_scale: 16.0 +2024-09-20 00:41:08,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=778500.0, ans=0.125 +2024-09-20 00:41:18,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.44 vs. limit=22.5 +2024-09-20 00:41:29,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.33 vs. limit=15.0 +2024-09-20 00:41:42,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=778580.0, ans=0.0 +2024-09-20 00:41:58,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=778620.0, ans=0.125 +2024-09-20 00:41:58,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=778620.0, ans=0.125 +2024-09-20 00:42:10,713 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.76 vs. limit=22.5 +2024-09-20 00:42:13,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=778660.0, ans=0.07 +2024-09-20 00:42:14,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=778660.0, ans=0.1 +2024-09-20 00:42:23,332 INFO [train.py:1198] (1/2) Epoch 44, batch 100, loss[loss=0.2302, ctc_loss=0.1168, cr_loss=0.3607, attn_decoder_loss=0.2347, over 29543.00 frames. ], tot_loss[loss=0.2359, ctc_loss=0.1137, cr_loss=0.357, attn_decoder_loss=0.2416, over 2250498.94 frames. ], batch size: 76, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:42:41,356 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.570e+01 8.747e+01 9.046e+01 9.804e+01 1.542e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-20 00:42:41,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=778740.0, ans=0.2 +2024-09-20 00:43:11,619 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:43:22,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.48 vs. limit=15.0 +2024-09-20 00:43:37,842 INFO [train.py:1198] (1/2) Epoch 44, batch 150, loss[loss=0.207, ctc_loss=0.09263, cr_loss=0.3099, attn_decoder_loss=0.2129, over 29430.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1115, cr_loss=0.3512, attn_decoder_loss=0.239, over 3044969.81 frames. ], batch size: 70, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:43:44,495 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.25 vs. limit=6.0 +2024-09-20 00:43:49,252 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.29 vs. limit=10.0 +2024-09-20 00:43:51,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=778940.0, ans=0.0 +2024-09-20 00:44:16,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.18 vs. limit=15.0 +2024-09-20 00:44:38,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=779060.0, ans=0.025 +2024-09-20 00:44:52,617 INFO [train.py:1198] (1/2) Epoch 44, batch 200, loss[loss=0.2462, ctc_loss=0.1245, cr_loss=0.3783, attn_decoder_loss=0.2513, over 27141.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.111, cr_loss=0.3509, attn_decoder_loss=0.2382, over 3657293.12 frames. ], batch size: 124, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:45:13,157 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.338e+01 8.429e+01 8.994e+01 9.673e+01 1.827e+02, threshold=1.799e+02, percent-clipped=1.0 +2024-09-20 00:45:14,034 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.58 vs. limit=15.0 +2024-09-20 00:45:30,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=779180.0, ans=0.1 +2024-09-20 00:45:59,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=779260.0, ans=0.125 +2024-09-20 00:46:04,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=779260.0, ans=0.0 +2024-09-20 00:46:08,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=779260.0, ans=0.1 +2024-09-20 00:46:09,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.81 vs. limit=15.0 +2024-09-20 00:46:11,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=779300.0, ans=0.2 +2024-09-20 00:46:12,945 INFO [train.py:1198] (1/2) Epoch 44, batch 250, loss[loss=0.2473, ctc_loss=0.1179, cr_loss=0.3689, attn_decoder_loss=0.2535, over 29258.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1107, cr_loss=0.3503, attn_decoder_loss=0.2381, over 4140108.58 frames. ], batch size: 100, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:46:34,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=779340.0, ans=0.125 +2024-09-20 00:46:46,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=779380.0, ans=0.125 +2024-09-20 00:47:28,081 INFO [train.py:1198] (1/2) Epoch 44, batch 300, loss[loss=0.2441, ctc_loss=0.1218, cr_loss=0.3619, attn_decoder_loss=0.2497, over 29534.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1105, cr_loss=0.3504, attn_decoder_loss=0.2379, over 4508263.72 frames. ], batch size: 92, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:47:28,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=779500.0, ans=0.125 +2024-09-20 00:47:31,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=779500.0, ans=0.125 +2024-09-20 00:47:37,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=779500.0, ans=0.1 +2024-09-20 00:47:37,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=779500.0, ans=0.025 +2024-09-20 00:47:47,520 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.408e+01 8.498e+01 8.969e+01 9.392e+01 3.050e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-20 00:47:47,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=779540.0, ans=0.125 +2024-09-20 00:47:56,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=779580.0, ans=0.125 +2024-09-20 00:48:04,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=779580.0, ans=0.125 +2024-09-20 00:48:18,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.90 vs. limit=22.5 +2024-09-20 00:48:27,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=779660.0, ans=0.2 +2024-09-20 00:48:36,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.48 vs. limit=15.0 +2024-09-20 00:48:43,176 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.63 vs. limit=22.5 +2024-09-20 00:48:43,471 INFO [train.py:1198] (1/2) Epoch 44, batch 350, loss[loss=0.2162, ctc_loss=0.09987, cr_loss=0.3255, attn_decoder_loss=0.2219, over 29315.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1111, cr_loss=0.3514, attn_decoder_loss=0.2385, over 4794301.74 frames. ], batch size: 71, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 00:48:56,439 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=779700.0, ans=0.125 +2024-09-20 00:48:59,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=779740.0, ans=0.125 +2024-09-20 00:49:10,322 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:49:29,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=779820.0, ans=0.0 +2024-09-20 00:49:56,594 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.27 vs. limit=15.0 +2024-09-20 00:50:00,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=779860.0, ans=0.1 +2024-09-20 00:50:03,129 INFO [train.py:1198] (1/2) Epoch 44, batch 400, loss[loss=0.2338, ctc_loss=0.1147, cr_loss=0.3502, attn_decoder_loss=0.2393, over 29715.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1107, cr_loss=0.3509, attn_decoder_loss=0.2381, over 5024348.33 frames. ], batch size: 82, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:50:22,883 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.675e+01 8.550e+01 9.066e+01 9.796e+01 2.019e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 00:50:38,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=779980.0, ans=0.125 +2024-09-20 00:50:40,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=779980.0, ans=0.125 +2024-09-20 00:50:54,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=780020.0, ans=0.1 +2024-09-20 00:50:58,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=780020.0, ans=0.125 +2024-09-20 00:50:58,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=780020.0, ans=0.125 +2024-09-20 00:51:17,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=780060.0, ans=0.2 +2024-09-20 00:51:19,797 INFO [train.py:1198] (1/2) Epoch 44, batch 450, loss[loss=0.2485, ctc_loss=0.1184, cr_loss=0.3593, attn_decoder_loss=0.255, over 29714.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1112, cr_loss=0.3516, attn_decoder_loss=0.2386, over 5185219.60 frames. ], batch size: 83, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:51:33,722 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=780140.0, ans=0.0 +2024-09-20 00:51:42,663 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=780140.0, ans=0.035 +2024-09-20 00:51:44,791 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.02 vs. limit=12.0 +2024-09-20 00:52:04,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=780220.0, ans=0.125 +2024-09-20 00:52:33,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=780260.0, ans=0.1 +2024-09-20 00:52:35,604 INFO [train.py:1198] (1/2) Epoch 44, batch 500, loss[loss=0.24, ctc_loss=0.1121, cr_loss=0.3644, attn_decoder_loss=0.2461, over 29435.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1105, cr_loss=0.3501, attn_decoder_loss=0.2375, over 5328994.15 frames. ], batch size: 94, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:52:35,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=780300.0, ans=0.125 +2024-09-20 00:52:46,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=780300.0, ans=0.125 +2024-09-20 00:52:57,327 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.573e+01 8.977e+01 9.726e+01 1.793e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-20 00:53:32,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=780420.0, ans=10.0 +2024-09-20 00:53:48,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=780460.0, ans=0.0 +2024-09-20 00:53:55,482 INFO [train.py:1198] (1/2) Epoch 44, batch 550, loss[loss=0.2401, ctc_loss=0.1102, cr_loss=0.3436, attn_decoder_loss=0.2469, over 28770.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1104, cr_loss=0.3499, attn_decoder_loss=0.2376, over 5421725.11 frames. ], batch size: 104, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:54:03,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=780500.0, ans=0.2 +2024-09-20 00:54:07,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=780500.0, ans=0.125 +2024-09-20 00:54:24,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=780580.0, ans=0.125 +2024-09-20 00:54:27,216 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=780580.0, ans=0.0 +2024-09-20 00:54:27,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=780580.0, ans=0.125 +2024-09-20 00:54:37,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=780580.0, ans=0.1 +2024-09-20 00:54:39,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.max_abs, batch_count=780620.0, ans=10.0 +2024-09-20 00:54:56,642 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.94 vs. limit=15.0 +2024-09-20 00:54:59,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=780660.0, ans=0.125 +2024-09-20 00:55:10,821 INFO [train.py:1198] (1/2) Epoch 44, batch 600, loss[loss=0.2453, ctc_loss=0.12, cr_loss=0.386, attn_decoder_loss=0.2506, over 29242.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1101, cr_loss=0.3499, attn_decoder_loss=0.2377, over 5508764.59 frames. ], batch size: 100, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:55:30,172 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.190e+01 8.511e+01 9.110e+01 9.777e+01 1.650e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 00:55:44,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=780780.0, ans=0.125 +2024-09-20 00:56:00,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.46 vs. limit=15.0 +2024-09-20 00:56:08,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=780820.0, ans=0.125 +2024-09-20 00:56:13,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=780860.0, ans=0.1 +2024-09-20 00:56:26,263 INFO [train.py:1198] (1/2) Epoch 44, batch 650, loss[loss=0.2339, ctc_loss=0.1091, cr_loss=0.3461, attn_decoder_loss=0.2401, over 29777.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1091, cr_loss=0.3477, attn_decoder_loss=0.2371, over 5586566.65 frames. ], batch size: 81, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:56:27,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.94 vs. limit=15.0 +2024-09-20 00:56:38,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=780900.0, ans=0.95 +2024-09-20 00:56:43,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=780940.0, ans=0.0 +2024-09-20 00:57:07,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2.whitening_limit, batch_count=780980.0, ans=15.0 +2024-09-20 00:57:20,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=781020.0, ans=0.125 +2024-09-20 00:57:39,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=781060.0, ans=0.125 +2024-09-20 00:57:46,187 INFO [train.py:1198] (1/2) Epoch 44, batch 700, loss[loss=0.228, ctc_loss=0.1024, cr_loss=0.3487, attn_decoder_loss=0.2342, over 29503.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1096, cr_loss=0.3486, attn_decoder_loss=0.2376, over 5638444.53 frames. ], batch size: 76, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:57:48,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.68 vs. limit=22.5 +2024-09-20 00:58:02,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=781140.0, ans=0.0 +2024-09-20 00:58:05,626 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.694e+01 8.523e+01 8.995e+01 9.436e+01 1.726e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 00:58:12,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.99 vs. limit=10.0 +2024-09-20 00:58:31,013 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.33 vs. limit=12.0 +2024-09-20 00:58:51,839 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.43 vs. limit=6.0 +2024-09-20 00:59:00,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=781300.0, ans=0.2 +2024-09-20 00:59:01,550 INFO [train.py:1198] (1/2) Epoch 44, batch 750, loss[loss=0.2337, ctc_loss=0.1052, cr_loss=0.3392, attn_decoder_loss=0.2404, over 29681.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1094, cr_loss=0.3477, attn_decoder_loss=0.2374, over 5676215.28 frames. ], batch size: 82, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 00:59:07,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=781300.0, ans=0.0 +2024-09-20 00:59:10,767 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=781300.0, ans=0.0 +2024-09-20 00:59:25,918 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 00:59:27,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=781340.0, ans=0.0 +2024-09-20 01:00:03,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=781460.0, ans=0.0 +2024-09-20 01:00:16,838 INFO [train.py:1198] (1/2) Epoch 44, batch 800, loss[loss=0.2118, ctc_loss=0.09986, cr_loss=0.3337, attn_decoder_loss=0.2168, over 29619.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1094, cr_loss=0.3478, attn_decoder_loss=0.2373, over 5707831.19 frames. ], batch size: 73, lr: 2.51e-03, grad_scale: 32.0 +2024-09-20 01:00:20,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=781500.0, ans=0.0 +2024-09-20 01:00:37,969 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.478e+01 8.977e+01 9.680e+01 1.726e+02, threshold=1.795e+02, percent-clipped=0.0 +2024-09-20 01:00:51,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.78 vs. limit=8.0 +2024-09-20 01:00:55,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=781580.0, ans=0.125 +2024-09-20 01:00:58,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=781580.0, ans=0.025 +2024-09-20 01:01:17,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=781620.0, ans=0.2 +2024-09-20 01:01:34,654 INFO [train.py:1198] (1/2) Epoch 44, batch 850, loss[loss=0.2458, ctc_loss=0.121, cr_loss=0.3785, attn_decoder_loss=0.2513, over 29714.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1091, cr_loss=0.3471, attn_decoder_loss=0.2371, over 5737127.43 frames. ], batch size: 89, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:01:56,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=781740.0, ans=0.125 +2024-09-20 01:01:56,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=781740.0, ans=0.125 +2024-09-20 01:02:05,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=781780.0, ans=0.0 +2024-09-20 01:02:19,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=781780.0, ans=0.025 +2024-09-20 01:02:32,030 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.46 vs. limit=15.0 +2024-09-20 01:02:39,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=781860.0, ans=0.2 +2024-09-20 01:02:49,416 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=781860.0, ans=0.125 +2024-09-20 01:02:52,368 INFO [train.py:1198] (1/2) Epoch 44, batch 900, loss[loss=0.2169, ctc_loss=0.09752, cr_loss=0.3264, attn_decoder_loss=0.2229, over 29602.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1095, cr_loss=0.3476, attn_decoder_loss=0.2373, over 5741884.04 frames. ], batch size: 73, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:03:10,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=781940.0, ans=0.125 +2024-09-20 01:03:15,005 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.619e+01 9.074e+01 9.618e+01 1.505e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 01:03:15,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=781940.0, ans=0.5 +2024-09-20 01:03:25,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=781980.0, ans=0.5 +2024-09-20 01:03:27,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.33 vs. limit=22.5 +2024-09-20 01:03:31,791 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:03:39,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=782020.0, ans=0.2 +2024-09-20 01:03:41,657 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.64 vs. limit=15.0 +2024-09-20 01:03:45,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=782020.0, ans=0.125 +2024-09-20 01:03:52,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=782060.0, ans=0.1 +2024-09-20 01:04:07,360 INFO [train.py:1198] (1/2) Epoch 44, batch 950, loss[loss=0.2123, ctc_loss=0.08919, cr_loss=0.2939, attn_decoder_loss=0.2195, over 29494.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1097, cr_loss=0.3478, attn_decoder_loss=0.2375, over 5745363.79 frames. ], batch size: 74, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:04:08,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.33 vs. limit=15.0 +2024-09-20 01:04:16,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=782100.0, ans=0.125 +2024-09-20 01:04:28,630 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.14 vs. limit=15.0 +2024-09-20 01:04:37,289 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.62 vs. limit=15.0 +2024-09-20 01:04:39,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=782180.0, ans=0.125 +2024-09-20 01:04:47,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=782180.0, ans=0.0 +2024-09-20 01:04:47,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=782180.0, ans=0.1 +2024-09-20 01:04:52,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=782180.0, ans=0.2 +2024-09-20 01:05:11,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=782260.0, ans=0.2 +2024-09-20 01:05:21,749 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=782260.0, ans=0.125 +2024-09-20 01:05:24,399 INFO [train.py:1198] (1/2) Epoch 44, batch 1000, loss[loss=0.2208, ctc_loss=0.1045, cr_loss=0.3347, attn_decoder_loss=0.2262, over 29510.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1105, cr_loss=0.3493, attn_decoder_loss=0.2383, over 5739810.97 frames. ], batch size: 77, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:05:24,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=782300.0, ans=0.1 +2024-09-20 01:05:32,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=782300.0, ans=0.125 +2024-09-20 01:05:39,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=782300.0, ans=0.125 +2024-09-20 01:05:49,521 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.182e+01 8.837e+01 9.355e+01 1.004e+02 2.810e+02, threshold=1.871e+02, percent-clipped=1.0 +2024-09-20 01:06:01,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=782380.0, ans=0.0 +2024-09-20 01:06:03,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=782380.0, ans=0.125 +2024-09-20 01:06:09,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=782380.0, ans=0.1 +2024-09-20 01:06:14,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=782420.0, ans=0.1 +2024-09-20 01:06:26,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=782460.0, ans=0.2 +2024-09-20 01:06:26,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=782460.0, ans=0.1 +2024-09-20 01:06:31,291 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.02 vs. limit=22.5 +2024-09-20 01:06:35,350 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:06:40,776 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.87 vs. limit=15.0 +2024-09-20 01:06:42,625 INFO [train.py:1198] (1/2) Epoch 44, batch 1050, loss[loss=0.2383, ctc_loss=0.1069, cr_loss=0.3399, attn_decoder_loss=0.2453, over 29677.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1101, cr_loss=0.3484, attn_decoder_loss=0.2376, over 5746852.39 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:06:44,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=782500.0, ans=0.125 +2024-09-20 01:06:46,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=782500.0, ans=0.2 +2024-09-20 01:07:07,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=782540.0, ans=0.025 +2024-09-20 01:07:10,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=782540.0, ans=0.125 +2024-09-20 01:07:12,538 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.63 vs. limit=6.0 +2024-09-20 01:07:19,488 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:07:20,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=782580.0, ans=0.0 +2024-09-20 01:07:44,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.43 vs. limit=15.0 +2024-09-20 01:07:47,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=782660.0, ans=0.0 +2024-09-20 01:07:57,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=782700.0, ans=0.125 +2024-09-20 01:07:58,413 INFO [train.py:1198] (1/2) Epoch 44, batch 1100, loss[loss=0.2353, ctc_loss=0.1118, cr_loss=0.3498, attn_decoder_loss=0.2413, over 29442.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1096, cr_loss=0.3476, attn_decoder_loss=0.237, over 5757843.35 frames. ], batch size: 78, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:08:23,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.650e+01 8.487e+01 8.944e+01 9.556e+01 1.706e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 01:08:38,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=782780.0, ans=0.2 +2024-09-20 01:09:16,487 INFO [train.py:1198] (1/2) Epoch 44, batch 1150, loss[loss=0.2274, ctc_loss=0.1074, cr_loss=0.355, attn_decoder_loss=0.2329, over 29454.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1096, cr_loss=0.3478, attn_decoder_loss=0.2372, over 5753714.82 frames. ], batch size: 78, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:09:28,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=782900.0, ans=0.2 +2024-09-20 01:09:49,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=782980.0, ans=0.125 +2024-09-20 01:10:33,778 INFO [train.py:1198] (1/2) Epoch 44, batch 1200, loss[loss=0.2405, ctc_loss=0.1119, cr_loss=0.345, attn_decoder_loss=0.2471, over 29661.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.11, cr_loss=0.3483, attn_decoder_loss=0.2378, over 5746868.15 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:10:40,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=783100.0, ans=0.0 +2024-09-20 01:10:52,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=783140.0, ans=0.0 +2024-09-20 01:10:56,607 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.628e+01 8.611e+01 9.178e+01 9.686e+01 1.323e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-20 01:11:00,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=783140.0, ans=0.125 +2024-09-20 01:11:01,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=783140.0, ans=0.125 +2024-09-20 01:11:04,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=783180.0, ans=0.125 +2024-09-20 01:11:42,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=783260.0, ans=0.125 +2024-09-20 01:11:50,058 INFO [train.py:1198] (1/2) Epoch 44, batch 1250, loss[loss=0.2471, ctc_loss=0.1133, cr_loss=0.3485, attn_decoder_loss=0.2542, over 29513.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1106, cr_loss=0.35, attn_decoder_loss=0.2385, over 5774285.00 frames. ], batch size: 92, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:12:01,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.18 vs. limit=15.0 +2024-09-20 01:12:02,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=783300.0, ans=0.015 +2024-09-20 01:12:09,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=783340.0, ans=0.025 +2024-09-20 01:12:25,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=783380.0, ans=0.0 +2024-09-20 01:12:33,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=783380.0, ans=0.5 +2024-09-20 01:12:48,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=783420.0, ans=0.125 +2024-09-20 01:12:49,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=783420.0, ans=0.1 +2024-09-20 01:13:00,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=783460.0, ans=0.05 +2024-09-20 01:13:07,789 INFO [train.py:1198] (1/2) Epoch 44, batch 1300, loss[loss=0.2507, ctc_loss=0.1213, cr_loss=0.3649, attn_decoder_loss=0.257, over 28407.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1102, cr_loss=0.349, attn_decoder_loss=0.2378, over 5779010.53 frames. ], batch size: 111, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:13:09,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=783500.0, ans=0.125 +2024-09-20 01:13:23,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=783540.0, ans=0.0 +2024-09-20 01:13:23,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.81 vs. limit=15.0 +2024-09-20 01:13:30,710 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.490e+01 8.901e+01 9.557e+01 1.827e+02, threshold=1.780e+02, percent-clipped=0.0 +2024-09-20 01:13:54,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=783620.0, ans=0.0 +2024-09-20 01:13:57,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=783620.0, ans=0.125 +2024-09-20 01:14:19,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.09 vs. limit=22.5 +2024-09-20 01:14:20,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=783660.0, ans=0.125 +2024-09-20 01:14:25,847 INFO [train.py:1198] (1/2) Epoch 44, batch 1350, loss[loss=0.2298, ctc_loss=0.1086, cr_loss=0.3415, attn_decoder_loss=0.2356, over 29720.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1099, cr_loss=0.349, attn_decoder_loss=0.2377, over 5796406.59 frames. ], batch size: 81, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:14:46,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=783740.0, ans=0.2 +2024-09-20 01:14:46,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=783740.0, ans=0.125 +2024-09-20 01:14:46,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=783740.0, ans=0.1 +2024-09-20 01:14:48,270 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=783740.0, ans=0.0 +2024-09-20 01:14:51,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=783740.0, ans=0.1 +2024-09-20 01:15:07,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=783780.0, ans=0.025 +2024-09-20 01:15:22,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=783820.0, ans=0.2 +2024-09-20 01:15:25,914 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=783860.0, ans=0.0 +2024-09-20 01:15:31,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=783860.0, ans=0.125 +2024-09-20 01:15:40,845 INFO [train.py:1198] (1/2) Epoch 44, batch 1400, loss[loss=0.2042, ctc_loss=0.08623, cr_loss=0.2869, attn_decoder_loss=0.2109, over 29567.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1094, cr_loss=0.3483, attn_decoder_loss=0.2372, over 5807602.73 frames. ], batch size: 69, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:15:54,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=783940.0, ans=0.125 +2024-09-20 01:16:03,121 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.567e+01 9.208e+01 9.655e+01 2.033e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-20 01:16:14,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.93 vs. limit=12.0 +2024-09-20 01:16:16,612 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.10 vs. limit=15.0 +2024-09-20 01:16:35,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=784020.0, ans=0.1 +2024-09-20 01:16:47,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=784020.0, ans=0.1 +2024-09-20 01:16:53,067 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.39 vs. limit=15.0 +2024-09-20 01:16:56,844 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:16:59,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=784060.0, ans=0.125 +2024-09-20 01:17:05,607 INFO [train.py:1198] (1/2) Epoch 44, batch 1450, loss[loss=0.2459, ctc_loss=0.1244, cr_loss=0.3641, attn_decoder_loss=0.2513, over 29484.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1095, cr_loss=0.3482, attn_decoder_loss=0.2375, over 5804508.84 frames. ], batch size: 94, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:17:25,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=784140.0, ans=0.0 +2024-09-20 01:17:28,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=784140.0, ans=0.125 +2024-09-20 01:17:47,283 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=784180.0, ans=0.0 +2024-09-20 01:17:55,278 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.77 vs. limit=15.0 +2024-09-20 01:18:06,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=784260.0, ans=0.0 +2024-09-20 01:18:17,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=784260.0, ans=0.2 +2024-09-20 01:18:20,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=784260.0, ans=0.2 +2024-09-20 01:18:23,244 INFO [train.py:1198] (1/2) Epoch 44, batch 1500, loss[loss=0.241, ctc_loss=0.1205, cr_loss=0.378, attn_decoder_loss=0.246, over 29618.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1101, cr_loss=0.3492, attn_decoder_loss=0.2381, over 5804621.35 frames. ], batch size: 86, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:18:28,880 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.65 vs. limit=15.0 +2024-09-20 01:18:41,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.34 vs. limit=6.0 +2024-09-20 01:18:42,268 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.67 vs. limit=12.0 +2024-09-20 01:18:44,891 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=784340.0, ans=0.1 +2024-09-20 01:18:47,500 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.764e+01 8.610e+01 9.049e+01 9.653e+01 2.114e+02, threshold=1.810e+02, percent-clipped=1.0 +2024-09-20 01:18:50,031 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.68 vs. limit=10.0 +2024-09-20 01:19:12,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=784420.0, ans=0.0 +2024-09-20 01:19:38,998 INFO [train.py:1198] (1/2) Epoch 44, batch 1550, loss[loss=0.2581, ctc_loss=0.1301, cr_loss=0.3947, attn_decoder_loss=0.2635, over 29533.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1106, cr_loss=0.3498, attn_decoder_loss=0.2381, over 5779849.29 frames. ], batch size: 90, lr: 2.51e-03, grad_scale: 8.0 +2024-09-20 01:19:40,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=784500.0, ans=0.125 +2024-09-20 01:19:52,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=784540.0, ans=0.1 +2024-09-20 01:19:53,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=12.0 +2024-09-20 01:20:10,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.03 vs. limit=15.0 +2024-09-20 01:20:29,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.58 vs. limit=15.0 +2024-09-20 01:20:49,675 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-20 01:20:55,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=784700.0, ans=0.125 +2024-09-20 01:20:56,201 INFO [train.py:1198] (1/2) Epoch 44, batch 1600, loss[loss=0.2386, ctc_loss=0.1099, cr_loss=0.3469, attn_decoder_loss=0.2452, over 29673.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1104, cr_loss=0.3498, attn_decoder_loss=0.2379, over 5761692.75 frames. ], batch size: 85, lr: 2.51e-03, grad_scale: 16.0 +2024-09-20 01:20:56,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=784700.0, ans=0.125 +2024-09-20 01:21:17,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=784740.0, ans=10.0 +2024-09-20 01:21:20,302 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.634e+01 8.674e+01 9.197e+01 9.675e+01 9.690e+02, threshold=1.839e+02, percent-clipped=2.0 +2024-09-20 01:21:30,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=784780.0, ans=0.125 +2024-09-20 01:22:04,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.28 vs. limit=15.0 +2024-09-20 01:22:14,095 INFO [train.py:1198] (1/2) Epoch 44, batch 1650, loss[loss=0.2373, ctc_loss=0.1039, cr_loss=0.3313, attn_decoder_loss=0.2448, over 29700.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1098, cr_loss=0.3483, attn_decoder_loss=0.2375, over 5756680.97 frames. ], batch size: 89, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:22:24,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=784900.0, ans=0.1 +2024-09-20 01:22:42,952 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=784980.0, ans=0.125 +2024-09-20 01:22:50,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=784980.0, ans=0.2 +2024-09-20 01:23:29,265 INFO [train.py:1198] (1/2) Epoch 44, batch 1700, loss[loss=0.2114, ctc_loss=0.09354, cr_loss=0.3106, attn_decoder_loss=0.2176, over 29562.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1096, cr_loss=0.3478, attn_decoder_loss=0.2376, over 5778362.82 frames. ], batch size: 69, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:23:29,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.const_attention_rate, batch_count=785100.0, ans=0.025 +2024-09-20 01:23:34,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=785100.0, ans=0.0 +2024-09-20 01:23:41,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=785100.0, ans=0.2 +2024-09-20 01:23:52,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=785140.0, ans=0.04949747468305833 +2024-09-20 01:23:55,595 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.621e+01 9.129e+01 9.684e+01 1.448e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 01:24:16,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=785220.0, ans=0.05 +2024-09-20 01:24:32,907 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.55 vs. limit=15.0 +2024-09-20 01:24:38,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=785260.0, ans=0.125 +2024-09-20 01:24:46,809 INFO [train.py:1198] (1/2) Epoch 44, batch 1750, loss[loss=0.2139, ctc_loss=0.09729, cr_loss=0.3297, attn_decoder_loss=0.2195, over 29352.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1094, cr_loss=0.348, attn_decoder_loss=0.2375, over 5787360.89 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:25:04,608 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.69 vs. limit=15.0 +2024-09-20 01:25:20,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=785380.0, ans=0.125 +2024-09-20 01:25:35,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=785420.0, ans=10.0 +2024-09-20 01:26:03,910 INFO [train.py:1198] (1/2) Epoch 44, batch 1800, loss[loss=0.2415, ctc_loss=0.1167, cr_loss=0.356, attn_decoder_loss=0.2474, over 29698.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1098, cr_loss=0.3488, attn_decoder_loss=0.2378, over 5789928.54 frames. ], batch size: 83, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:26:08,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=785500.0, ans=0.125 +2024-09-20 01:26:27,989 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.581e+01 8.530e+01 8.993e+01 9.458e+01 1.310e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 01:26:37,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=785580.0, ans=0.2 +2024-09-20 01:27:17,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=785660.0, ans=0.5 +2024-09-20 01:27:17,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=785660.0, ans=0.0 +2024-09-20 01:27:19,811 INFO [train.py:1198] (1/2) Epoch 44, batch 1850, loss[loss=0.2388, ctc_loss=0.1148, cr_loss=0.3545, attn_decoder_loss=0.2447, over 29635.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1097, cr_loss=0.349, attn_decoder_loss=0.2376, over 5795555.77 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:27:26,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=785700.0, ans=0.0 +2024-09-20 01:27:36,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=785740.0, ans=0.125 +2024-09-20 01:27:49,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=785740.0, ans=0.1 +2024-09-20 01:27:50,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=785780.0, ans=0.0 +2024-09-20 01:28:19,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=785820.0, ans=0.125 +2024-09-20 01:28:37,314 INFO [train.py:1198] (1/2) Epoch 44, batch 1900, loss[loss=0.2522, ctc_loss=0.1203, cr_loss=0.3864, attn_decoder_loss=0.2583, over 29723.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1097, cr_loss=0.349, attn_decoder_loss=0.238, over 5803689.71 frames. ], batch size: 89, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:28:46,936 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.36 vs. limit=10.0 +2024-09-20 01:28:55,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=785940.0, ans=0.125 +2024-09-20 01:29:01,486 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.304e+01 8.601e+01 9.112e+01 9.762e+01 1.549e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 01:29:03,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=785940.0, ans=0.1 +2024-09-20 01:29:04,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=785940.0, ans=0.1 +2024-09-20 01:29:09,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=785980.0, ans=0.2 +2024-09-20 01:29:15,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.73 vs. limit=10.0 +2024-09-20 01:29:34,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=786020.0, ans=0.125 +2024-09-20 01:29:37,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=786020.0, ans=0.1 +2024-09-20 01:29:54,964 INFO [train.py:1198] (1/2) Epoch 44, batch 1950, loss[loss=0.2283, ctc_loss=0.1003, cr_loss=0.3277, attn_decoder_loss=0.2353, over 29442.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.1103, cr_loss=0.3501, attn_decoder_loss=0.2391, over 5818241.63 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:30:04,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=786100.0, ans=0.125 +2024-09-20 01:30:08,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.97 vs. limit=6.0 +2024-09-20 01:30:32,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=786180.0, ans=0.09899494936611666 +2024-09-20 01:30:50,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.12 vs. limit=15.0 +2024-09-20 01:31:02,942 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.18 vs. limit=15.0 +2024-09-20 01:31:08,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=786300.0, ans=0.125 +2024-09-20 01:31:09,698 INFO [train.py:1198] (1/2) Epoch 44, batch 2000, loss[loss=0.1971, ctc_loss=0.08472, cr_loss=0.2902, attn_decoder_loss=0.2032, over 29357.00 frames. ], tot_loss[loss=0.2335, ctc_loss=0.1108, cr_loss=0.3505, attn_decoder_loss=0.2393, over 5796196.93 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 32.0 +2024-09-20 01:31:36,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=786340.0, ans=0.125 +2024-09-20 01:31:37,699 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.322e+01 8.592e+01 9.152e+01 9.700e+01 1.620e+02, threshold=1.830e+02, percent-clipped=0.0 +2024-09-20 01:31:37,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=786340.0, ans=0.2 +2024-09-20 01:31:45,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=786380.0, ans=0.125 +2024-09-20 01:31:54,876 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.min_abs, batch_count=786380.0, ans=0.5 +2024-09-20 01:31:56,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=786420.0, ans=0.1 +2024-09-20 01:32:05,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=786420.0, ans=0.0 +2024-09-20 01:32:08,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=786420.0, ans=0.125 +2024-09-20 01:32:26,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=786500.0, ans=0.1 +2024-09-20 01:32:27,935 INFO [train.py:1198] (1/2) Epoch 44, batch 2050, loss[loss=0.2008, ctc_loss=0.08845, cr_loss=0.2835, attn_decoder_loss=0.207, over 29453.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1104, cr_loss=0.3497, attn_decoder_loss=0.2386, over 5787404.81 frames. ], batch size: 70, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:32:59,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=786580.0, ans=0.2 +2024-09-20 01:33:14,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=786620.0, ans=0.1 +2024-09-20 01:33:24,754 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.21 vs. limit=12.0 +2024-09-20 01:33:32,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.71 vs. limit=15.0 +2024-09-20 01:33:44,954 INFO [train.py:1198] (1/2) Epoch 44, batch 2100, loss[loss=0.2325, ctc_loss=0.1082, cr_loss=0.3408, attn_decoder_loss=0.2387, over 29754.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1099, cr_loss=0.349, attn_decoder_loss=0.2379, over 5800337.71 frames. ], batch size: 81, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:33:49,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=786700.0, ans=0.125 +2024-09-20 01:33:58,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=786740.0, ans=0.2 +2024-09-20 01:34:10,460 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.556e+01 8.592e+01 8.953e+01 9.546e+01 1.075e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-20 01:34:13,732 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=786780.0, ans=0.125 +2024-09-20 01:34:59,824 INFO [train.py:1198] (1/2) Epoch 44, batch 2150, loss[loss=0.2396, ctc_loss=0.1219, cr_loss=0.3951, attn_decoder_loss=0.2439, over 29433.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1095, cr_loss=0.3488, attn_decoder_loss=0.2375, over 5815014.52 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:35:06,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=786900.0, ans=0.07 +2024-09-20 01:35:09,364 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:35:26,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=786940.0, ans=0.0 +2024-09-20 01:35:43,136 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=786980.0, ans=0.125 +2024-09-20 01:35:49,189 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=787020.0, ans=0.125 +2024-09-20 01:35:50,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=787020.0, ans=0.1 +2024-09-20 01:36:07,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=787060.0, ans=0.0 +2024-09-20 01:36:17,940 INFO [train.py:1198] (1/2) Epoch 44, batch 2200, loss[loss=0.2352, ctc_loss=0.1096, cr_loss=0.3628, attn_decoder_loss=0.2411, over 29626.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1095, cr_loss=0.3484, attn_decoder_loss=0.2372, over 5811115.01 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:36:34,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=787140.0, ans=0.125 +2024-09-20 01:36:40,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=787140.0, ans=0.125 +2024-09-20 01:36:43,209 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.567e+01 8.551e+01 8.996e+01 9.508e+01 1.674e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 01:36:46,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=787180.0, ans=0.0 +2024-09-20 01:36:49,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=787180.0, ans=0.025 +2024-09-20 01:37:12,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=787220.0, ans=0.125 +2024-09-20 01:37:12,842 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:37:17,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=787260.0, ans=0.2 +2024-09-20 01:37:35,604 INFO [train.py:1198] (1/2) Epoch 44, batch 2250, loss[loss=0.2382, ctc_loss=0.1161, cr_loss=0.3454, attn_decoder_loss=0.2441, over 29714.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1095, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5809877.23 frames. ], batch size: 82, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:37:35,992 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:37:44,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=787300.0, ans=0.0 +2024-09-20 01:37:53,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.59 vs. limit=15.0 +2024-09-20 01:38:17,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=787380.0, ans=0.125 +2024-09-20 01:38:50,741 INFO [train.py:1198] (1/2) Epoch 44, batch 2300, loss[loss=0.2058, ctc_loss=0.09286, cr_loss=0.3223, attn_decoder_loss=0.2112, over 29311.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.109, cr_loss=0.3469, attn_decoder_loss=0.2363, over 5796722.15 frames. ], batch size: 71, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:39:18,221 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.593e+01 8.668e+01 9.192e+01 9.767e+01 1.748e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 01:39:20,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=787540.0, ans=0.0 +2024-09-20 01:39:23,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=787580.0, ans=0.125 +2024-09-20 01:39:32,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.46 vs. limit=15.0 +2024-09-20 01:39:36,160 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.62 vs. limit=6.0 +2024-09-20 01:39:58,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=787660.0, ans=0.025 +2024-09-20 01:40:08,534 INFO [train.py:1198] (1/2) Epoch 44, batch 2350, loss[loss=0.2417, ctc_loss=0.1243, cr_loss=0.3962, attn_decoder_loss=0.246, over 29685.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1095, cr_loss=0.3479, attn_decoder_loss=0.2369, over 5802072.37 frames. ], batch size: 83, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:40:14,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=787700.0, ans=0.125 +2024-09-20 01:40:19,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=787700.0, ans=0.125 +2024-09-20 01:40:20,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=787700.0, ans=0.125 +2024-09-20 01:40:31,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=787740.0, ans=0.1 +2024-09-20 01:40:55,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=787820.0, ans=0.125 +2024-09-20 01:41:21,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=787860.0, ans=0.2 +2024-09-20 01:41:22,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.91 vs. limit=6.0 +2024-09-20 01:41:26,338 INFO [train.py:1198] (1/2) Epoch 44, batch 2400, loss[loss=0.2229, ctc_loss=0.1003, cr_loss=0.3474, attn_decoder_loss=0.2288, over 29572.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1099, cr_loss=0.3492, attn_decoder_loss=0.2375, over 5806382.58 frames. ], batch size: 76, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:41:40,240 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=787940.0, ans=0.125 +2024-09-20 01:41:52,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=787940.0, ans=0.1 +2024-09-20 01:41:53,425 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.381e+01 8.728e+01 9.218e+01 9.758e+01 1.607e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-20 01:41:55,946 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.48 vs. limit=15.0 +2024-09-20 01:42:42,333 INFO [train.py:1198] (1/2) Epoch 44, batch 2450, loss[loss=0.244, ctc_loss=0.1262, cr_loss=0.4025, attn_decoder_loss=0.2482, over 29720.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1105, cr_loss=0.3504, attn_decoder_loss=0.2385, over 5782909.97 frames. ], batch size: 82, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:43:04,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=788140.0, ans=0.0 +2024-09-20 01:43:10,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=788140.0, ans=0.125 +2024-09-20 01:43:13,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=788180.0, ans=0.125 +2024-09-20 01:43:40,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=788220.0, ans=0.2 +2024-09-20 01:43:58,674 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=788300.0, ans=0.025 +2024-09-20 01:43:59,852 INFO [train.py:1198] (1/2) Epoch 44, batch 2500, loss[loss=0.2334, ctc_loss=0.1027, cr_loss=0.3407, attn_decoder_loss=0.2404, over 29631.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1102, cr_loss=0.3499, attn_decoder_loss=0.2384, over 5792796.82 frames. ], batch size: 86, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:44:22,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=788340.0, ans=0.2 +2024-09-20 01:44:26,964 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.358e+01 8.639e+01 9.215e+01 9.726e+01 1.262e+02, threshold=1.843e+02, percent-clipped=0.0 +2024-09-20 01:44:39,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=788380.0, ans=0.2 +2024-09-20 01:44:45,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=788420.0, ans=0.015 +2024-09-20 01:44:59,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=788460.0, ans=0.125 +2024-09-20 01:45:17,643 INFO [train.py:1198] (1/2) Epoch 44, batch 2550, loss[loss=0.2011, ctc_loss=0.08944, cr_loss=0.2988, attn_decoder_loss=0.2068, over 29325.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.11, cr_loss=0.3496, attn_decoder_loss=0.2383, over 5796723.91 frames. ], batch size: 67, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:45:31,365 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:46:01,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=788620.0, ans=0.0 +2024-09-20 01:46:23,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=788660.0, ans=0.1 +2024-09-20 01:46:26,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=788660.0, ans=0.2 +2024-09-20 01:46:32,704 INFO [train.py:1198] (1/2) Epoch 44, batch 2600, loss[loss=0.233, ctc_loss=0.1053, cr_loss=0.3393, attn_decoder_loss=0.2397, over 29430.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1098, cr_loss=0.3491, attn_decoder_loss=0.2383, over 5794029.51 frames. ], batch size: 78, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:46:54,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=788740.0, ans=0.125 +2024-09-20 01:47:00,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=788740.0, ans=0.125 +2024-09-20 01:47:03,305 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.497e+01 9.008e+01 9.570e+01 2.359e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 01:47:09,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=788780.0, ans=0.0 +2024-09-20 01:47:14,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=788780.0, ans=0.05 +2024-09-20 01:47:17,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=788780.0, ans=0.125 +2024-09-20 01:47:32,552 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:47:43,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=788860.0, ans=0.0 +2024-09-20 01:47:50,434 INFO [train.py:1198] (1/2) Epoch 44, batch 2650, loss[loss=0.2516, ctc_loss=0.1263, cr_loss=0.3905, attn_decoder_loss=0.2568, over 29305.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.11, cr_loss=0.3497, attn_decoder_loss=0.2385, over 5800891.33 frames. ], batch size: 100, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:48:08,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=788940.0, ans=0.125 +2024-09-20 01:48:54,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.min_positive, batch_count=789060.0, ans=0.05 +2024-09-20 01:49:07,973 INFO [train.py:1198] (1/2) Epoch 44, batch 2700, loss[loss=0.2548, ctc_loss=0.1171, cr_loss=0.3813, attn_decoder_loss=0.2616, over 29510.00 frames. ], tot_loss[loss=0.2331, ctc_loss=0.1104, cr_loss=0.3505, attn_decoder_loss=0.2389, over 5796905.50 frames. ], batch size: 87, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:49:18,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=789100.0, ans=0.0 +2024-09-20 01:49:21,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=789140.0, ans=0.0 +2024-09-20 01:49:23,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=789140.0, ans=0.0 +2024-09-20 01:49:36,542 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.386e+01 8.638e+01 9.038e+01 9.626e+01 7.105e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-20 01:49:38,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=789180.0, ans=0.125 +2024-09-20 01:49:46,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.34 vs. limit=10.0 +2024-09-20 01:49:47,352 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:49:47,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=789180.0, ans=0.1 +2024-09-20 01:50:04,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.22 vs. limit=22.5 +2024-09-20 01:50:08,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=789260.0, ans=0.1 +2024-09-20 01:50:22,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=789300.0, ans=0.025 +2024-09-20 01:50:23,490 INFO [train.py:1198] (1/2) Epoch 44, batch 2750, loss[loss=0.2262, ctc_loss=0.1081, cr_loss=0.3554, attn_decoder_loss=0.2314, over 29503.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1099, cr_loss=0.3489, attn_decoder_loss=0.2379, over 5795460.65 frames. ], batch size: 75, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:50:42,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=789340.0, ans=0.0 +2024-09-20 01:50:46,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=789340.0, ans=0.0 +2024-09-20 01:50:57,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=789380.0, ans=0.125 +2024-09-20 01:51:00,919 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.18 vs. limit=15.0 +2024-09-20 01:51:10,039 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.29 vs. limit=6.0 +2024-09-20 01:51:15,830 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.06 vs. limit=22.5 +2024-09-20 01:51:21,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=789420.0, ans=0.125 +2024-09-20 01:51:23,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=789420.0, ans=0.125 +2024-09-20 01:51:41,196 INFO [train.py:1198] (1/2) Epoch 44, batch 2800, loss[loss=0.2513, ctc_loss=0.1396, cr_loss=0.3926, attn_decoder_loss=0.255, over 19668.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1102, cr_loss=0.3494, attn_decoder_loss=0.2381, over 5776509.94 frames. ], batch size: 211, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:52:05,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=789540.0, ans=0.0 +2024-09-20 01:52:06,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.89 vs. limit=22.5 +2024-09-20 01:52:09,705 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.668e+01 8.779e+01 9.114e+01 9.644e+01 1.703e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 01:52:48,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=789660.0, ans=0.125 +2024-09-20 01:52:58,697 INFO [train.py:1198] (1/2) Epoch 44, batch 2850, loss[loss=0.2211, ctc_loss=0.09977, cr_loss=0.3344, attn_decoder_loss=0.2272, over 29507.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1104, cr_loss=0.35, attn_decoder_loss=0.2383, over 5761617.18 frames. ], batch size: 77, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 01:53:33,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=789780.0, ans=0.125 +2024-09-20 01:53:53,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=789820.0, ans=0.125 +2024-09-20 01:54:03,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=789860.0, ans=0.0 +2024-09-20 01:54:13,952 INFO [train.py:1198] (1/2) Epoch 44, batch 2900, loss[loss=0.2296, ctc_loss=0.1126, cr_loss=0.3644, attn_decoder_loss=0.2345, over 29422.00 frames. ], tot_loss[loss=0.2334, ctc_loss=0.1108, cr_loss=0.3511, attn_decoder_loss=0.2392, over 5787416.14 frames. ], batch size: 79, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:54:30,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=789940.0, ans=0.0 +2024-09-20 01:54:45,222 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:54:46,279 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.529e+01 8.571e+01 8.849e+01 9.680e+01 1.963e+02, threshold=1.770e+02, percent-clipped=2.0 +2024-09-20 01:54:51,702 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.79 vs. limit=12.0 +2024-09-20 01:55:18,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=790060.0, ans=0.0 +2024-09-20 01:55:31,609 INFO [train.py:1198] (1/2) Epoch 44, batch 2950, loss[loss=0.2296, ctc_loss=0.1057, cr_loss=0.3409, attn_decoder_loss=0.2357, over 29534.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1096, cr_loss=0.3483, attn_decoder_loss=0.2379, over 5784088.59 frames. ], batch size: 75, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:55:53,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.64 vs. limit=22.5 +2024-09-20 01:55:54,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=790140.0, ans=10.0 +2024-09-20 01:56:10,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=790180.0, ans=0.0 +2024-09-20 01:56:28,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=790220.0, ans=0.125 +2024-09-20 01:56:49,765 INFO [train.py:1198] (1/2) Epoch 44, batch 3000, loss[loss=0.2357, ctc_loss=0.1148, cr_loss=0.3625, attn_decoder_loss=0.2411, over 29763.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1093, cr_loss=0.3477, attn_decoder_loss=0.2377, over 5783879.94 frames. ], batch size: 81, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:56:49,765 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 01:57:08,098 INFO [train.py:1230] (1/2) Epoch 44, validation: loss=0.2127, ctc_loss=0.03705, cr_loss=7.369e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-20 01:57:08,099 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 01:57:17,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=790300.0, ans=0.0 +2024-09-20 01:57:26,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=790340.0, ans=0.025 +2024-09-20 01:57:29,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=790340.0, ans=0.125 +2024-09-20 01:57:30,235 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.37 vs. limit=15.0 +2024-09-20 01:57:32,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=790340.0, ans=0.125 +2024-09-20 01:57:38,252 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 8.680e+01 9.147e+01 9.757e+01 3.916e+02, threshold=1.829e+02, percent-clipped=1.0 +2024-09-20 01:57:40,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=790380.0, ans=0.05 +2024-09-20 01:58:00,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.07 vs. limit=10.0 +2024-09-20 01:58:03,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.92 vs. limit=15.0 +2024-09-20 01:58:14,775 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:58:26,516 INFO [train.py:1198] (1/2) Epoch 44, batch 3050, loss[loss=0.2327, ctc_loss=0.112, cr_loss=0.3502, attn_decoder_loss=0.2383, over 29525.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1099, cr_loss=0.3491, attn_decoder_loss=0.2384, over 5778749.02 frames. ], batch size: 76, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:58:40,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=790540.0, ans=0.0 +2024-09-20 01:58:49,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=790540.0, ans=0.125 +2024-09-20 01:58:52,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=790540.0, ans=0.0 +2024-09-20 01:59:01,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=790580.0, ans=0.125 +2024-09-20 01:59:12,251 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:59:22,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=790620.0, ans=0.0 +2024-09-20 01:59:22,989 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.40 vs. limit=22.5 +2024-09-20 01:59:24,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.69 vs. limit=15.0 +2024-09-20 01:59:40,807 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 01:59:41,916 INFO [train.py:1198] (1/2) Epoch 44, batch 3100, loss[loss=0.2428, ctc_loss=0.111, cr_loss=0.3582, attn_decoder_loss=0.2495, over 29261.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.11, cr_loss=0.3494, attn_decoder_loss=0.2384, over 5778818.51 frames. ], batch size: 100, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 01:59:43,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=790700.0, ans=0.125 +2024-09-20 01:59:46,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.const_attention_rate, batch_count=790700.0, ans=0.025 +2024-09-20 01:59:50,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.89 vs. limit=15.0 +2024-09-20 02:00:11,909 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.501e+01 8.989e+01 9.639e+01 2.477e+02, threshold=1.798e+02, percent-clipped=1.0 +2024-09-20 02:00:19,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=790780.0, ans=0.125 +2024-09-20 02:00:47,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=790860.0, ans=0.125 +2024-09-20 02:00:51,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=790860.0, ans=0.09899494936611666 +2024-09-20 02:00:58,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=790900.0, ans=0.125 +2024-09-20 02:00:59,875 INFO [train.py:1198] (1/2) Epoch 44, batch 3150, loss[loss=0.2451, ctc_loss=0.1245, cr_loss=0.3698, attn_decoder_loss=0.2503, over 28826.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1099, cr_loss=0.3485, attn_decoder_loss=0.2382, over 5783795.32 frames. ], batch size: 104, lr: 2.50e-03, grad_scale: 8.0 +2024-09-20 02:01:23,454 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.25 vs. limit=15.0 +2024-09-20 02:01:24,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=790940.0, ans=0.1 +2024-09-20 02:01:46,619 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=791020.0, ans=0.015 +2024-09-20 02:02:17,227 INFO [train.py:1198] (1/2) Epoch 44, batch 3200, loss[loss=0.2272, ctc_loss=0.106, cr_loss=0.3297, attn_decoder_loss=0.2333, over 29395.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1098, cr_loss=0.3487, attn_decoder_loss=0.2378, over 5792395.42 frames. ], batch size: 79, lr: 2.50e-03, grad_scale: 16.0 +2024-09-20 02:02:17,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=791100.0, ans=0.125 +2024-09-20 02:02:21,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=791100.0, ans=0.0 +2024-09-20 02:02:25,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=791100.0, ans=0.0 +2024-09-20 02:02:46,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=791180.0, ans=0.125 +2024-09-20 02:02:47,456 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.189e+01 8.647e+01 9.072e+01 9.601e+01 1.731e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 02:03:16,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=791260.0, ans=0.125 +2024-09-20 02:03:22,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=791260.0, ans=0.125 +2024-09-20 02:03:33,117 INFO [train.py:1198] (1/2) Epoch 44, batch 3250, loss[loss=0.2354, ctc_loss=0.1047, cr_loss=0.3406, attn_decoder_loss=0.2423, over 29697.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1101, cr_loss=0.3491, attn_decoder_loss=0.2382, over 5799170.43 frames. ], batch size: 84, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:03:42,542 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=791300.0, ans=0.0 +2024-09-20 02:03:50,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=791340.0, ans=0.125 +2024-09-20 02:04:50,970 INFO [train.py:1198] (1/2) Epoch 44, batch 3300, loss[loss=0.2425, ctc_loss=0.113, cr_loss=0.3351, attn_decoder_loss=0.2495, over 28495.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1094, cr_loss=0.3473, attn_decoder_loss=0.237, over 5797422.32 frames. ], batch size: 112, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:04:54,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=791500.0, ans=0.125 +2024-09-20 02:05:19,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=791580.0, ans=0.1 +2024-09-20 02:05:21,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=791580.0, ans=0.125 +2024-09-20 02:05:22,611 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.134e+01 8.597e+01 9.177e+01 9.695e+01 2.585e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 02:05:24,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=791580.0, ans=0.125 +2024-09-20 02:05:28,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=791580.0, ans=0.0 +2024-09-20 02:05:33,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=791580.0, ans=0.1 +2024-09-20 02:05:43,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=791620.0, ans=0.2 +2024-09-20 02:05:48,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=791620.0, ans=0.0 +2024-09-20 02:05:59,840 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.57 vs. limit=15.0 +2024-09-20 02:06:03,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=791660.0, ans=0.0 +2024-09-20 02:06:07,998 INFO [train.py:1198] (1/2) Epoch 44, batch 3350, loss[loss=0.2473, ctc_loss=0.1236, cr_loss=0.3758, attn_decoder_loss=0.2527, over 28845.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1098, cr_loss=0.3484, attn_decoder_loss=0.2377, over 5775575.45 frames. ], batch size: 104, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:06:49,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_na.min_abs, batch_count=791780.0, ans=0.02 +2024-09-20 02:07:18,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=791860.0, ans=0.125 +2024-09-20 02:07:22,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=791900.0, ans=0.1 +2024-09-20 02:07:23,727 INFO [train.py:1198] (1/2) Epoch 44, batch 3400, loss[loss=0.2121, ctc_loss=0.09895, cr_loss=0.3425, attn_decoder_loss=0.2171, over 29302.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1104, cr_loss=0.3494, attn_decoder_loss=0.238, over 5768693.98 frames. ], batch size: 67, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:07:24,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=791900.0, ans=10.0 +2024-09-20 02:07:55,384 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.682e+01 9.111e+01 9.724e+01 2.135e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-20 02:08:15,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=792020.0, ans=0.125 +2024-09-20 02:08:41,413 INFO [train.py:1198] (1/2) Epoch 44, batch 3450, loss[loss=0.2337, ctc_loss=0.1003, cr_loss=0.3282, attn_decoder_loss=0.2412, over 28314.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1103, cr_loss=0.3493, attn_decoder_loss=0.2382, over 5775054.91 frames. ], batch size: 111, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:09:05,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=792140.0, ans=0.07 +2024-09-20 02:09:44,088 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=792260.0, ans=0.125 +2024-09-20 02:09:58,571 INFO [train.py:1198] (1/2) Epoch 44, batch 3500, loss[loss=0.2065, ctc_loss=0.1003, cr_loss=0.3348, attn_decoder_loss=0.2108, over 29319.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1101, cr_loss=0.3493, attn_decoder_loss=0.2378, over 5776672.36 frames. ], batch size: 71, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:10:07,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-20 02:10:30,312 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.582e+01 8.980e+01 9.639e+01 1.678e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-20 02:10:57,315 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=792460.0, ans=0.0 +2024-09-20 02:11:00,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=792460.0, ans=0.2 +2024-09-20 02:11:03,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=792460.0, ans=0.0 +2024-09-20 02:11:13,199 INFO [train.py:1198] (1/2) Epoch 44, batch 3550, loss[loss=0.2391, ctc_loss=0.1039, cr_loss=0.3516, attn_decoder_loss=0.2463, over 29690.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1099, cr_loss=0.3486, attn_decoder_loss=0.2377, over 5783112.36 frames. ], batch size: 89, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:11:13,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=792500.0, ans=0.125 +2024-09-20 02:11:37,623 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.57 vs. limit=15.0 +2024-09-20 02:11:59,695 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.50 vs. limit=22.5 +2024-09-20 02:12:10,926 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=792660.0, ans=0.025 +2024-09-20 02:12:26,349 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.52 vs. limit=15.0 +2024-09-20 02:12:26,911 INFO [train.py:1198] (1/2) Epoch 44, batch 3600, loss[loss=0.2233, ctc_loss=0.1045, cr_loss=0.3255, attn_decoder_loss=0.2292, over 29523.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1097, cr_loss=0.3482, attn_decoder_loss=0.2377, over 5792254.27 frames. ], batch size: 77, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:12:46,008 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.02 vs. limit=6.0 +2024-09-20 02:12:58,441 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.636e+01 8.550e+01 9.094e+01 9.613e+01 3.759e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-20 02:13:06,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=792780.0, ans=0.0 +2024-09-20 02:13:11,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=792820.0, ans=0.125 +2024-09-20 02:13:12,048 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=8.70 vs. limit=10.0 +2024-09-20 02:13:17,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=792820.0, ans=0.2 +2024-09-20 02:13:21,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=792820.0, ans=0.1 +2024-09-20 02:13:29,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.53 vs. limit=15.0 +2024-09-20 02:13:41,890 INFO [train.py:1198] (1/2) Epoch 44, batch 3650, loss[loss=0.2446, ctc_loss=0.1164, cr_loss=0.3485, attn_decoder_loss=0.2511, over 29518.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1093, cr_loss=0.3474, attn_decoder_loss=0.2372, over 5793758.19 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:13:48,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=792900.0, ans=0.2 +2024-09-20 02:14:00,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=792940.0, ans=0.125 +2024-09-20 02:14:10,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=792940.0, ans=0.125 +2024-09-20 02:14:22,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=792980.0, ans=0.125 +2024-09-20 02:14:46,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=793060.0, ans=0.2 +2024-09-20 02:14:55,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.50 vs. limit=15.0 +2024-09-20 02:14:58,108 INFO [train.py:1198] (1/2) Epoch 44, batch 3700, loss[loss=0.2402, ctc_loss=0.1059, cr_loss=0.3357, attn_decoder_loss=0.2476, over 29725.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1095, cr_loss=0.3484, attn_decoder_loss=0.2375, over 5803890.66 frames. ], batch size: 84, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:15:01,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=793100.0, ans=0.0 +2024-09-20 02:15:13,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=793140.0, ans=0.2 +2024-09-20 02:15:14,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.67 vs. limit=10.0 +2024-09-20 02:15:16,399 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=793140.0, ans=0.1 +2024-09-20 02:15:16,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=793140.0, ans=0.125 +2024-09-20 02:15:32,678 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.629e+01 9.056e+01 9.534e+01 1.565e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-20 02:15:38,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.22 vs. limit=6.0 +2024-09-20 02:15:54,191 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.96 vs. limit=10.0 +2024-09-20 02:16:05,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=11.47 vs. limit=15.0 +2024-09-20 02:16:14,379 INFO [train.py:1198] (1/2) Epoch 44, batch 3750, loss[loss=0.2092, ctc_loss=0.09821, cr_loss=0.3158, attn_decoder_loss=0.2145, over 29332.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1097, cr_loss=0.3491, attn_decoder_loss=0.2374, over 5807683.14 frames. ], batch size: 67, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:16:24,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=793300.0, ans=0.1 +2024-09-20 02:16:32,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=793340.0, ans=0.1 +2024-09-20 02:16:38,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=793340.0, ans=0.125 +2024-09-20 02:16:45,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=793380.0, ans=0.09899494936611666 +2024-09-20 02:16:51,560 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=793380.0, ans=0.2 +2024-09-20 02:16:59,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.79 vs. limit=15.0 +2024-09-20 02:17:13,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=793460.0, ans=0.1 +2024-09-20 02:17:14,496 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.78 vs. limit=22.5 +2024-09-20 02:17:21,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.99 vs. limit=10.0 +2024-09-20 02:17:27,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=793500.0, ans=0.2 +2024-09-20 02:17:28,372 INFO [train.py:1198] (1/2) Epoch 44, batch 3800, loss[loss=0.2408, ctc_loss=0.1126, cr_loss=0.3657, attn_decoder_loss=0.247, over 29612.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1096, cr_loss=0.3485, attn_decoder_loss=0.2371, over 5797883.05 frames. ], batch size: 86, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:17:46,689 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.68 vs. limit=15.0 +2024-09-20 02:17:59,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=793580.0, ans=0.125 +2024-09-20 02:18:00,998 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.445e+01 8.351e+01 9.103e+01 9.836e+01 3.154e+02, threshold=1.821e+02, percent-clipped=2.0 +2024-09-20 02:18:40,153 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:18:42,686 INFO [train.py:1198] (1/2) Epoch 44, batch 3850, loss[loss=0.2452, ctc_loss=0.1241, cr_loss=0.3862, attn_decoder_loss=0.2501, over 29200.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1093, cr_loss=0.3487, attn_decoder_loss=0.2372, over 5811349.55 frames. ], batch size: 100, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:18:49,701 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.88 vs. limit=12.0 +2024-09-20 02:19:06,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=793740.0, ans=0.125 +2024-09-20 02:19:08,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=793740.0, ans=0.125 +2024-09-20 02:19:10,145 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.96 vs. limit=15.0 +2024-09-20 02:19:11,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_na.min_abs, batch_count=793780.0, ans=0.02 +2024-09-20 02:19:21,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=793780.0, ans=0.0 +2024-09-20 02:19:52,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=793860.0, ans=0.1 +2024-09-20 02:19:57,902 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.92 vs. limit=22.5 +2024-09-20 02:19:58,446 INFO [train.py:1198] (1/2) Epoch 44, batch 3900, loss[loss=0.2366, ctc_loss=0.1076, cr_loss=0.3394, attn_decoder_loss=0.2434, over 29647.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1097, cr_loss=0.3493, attn_decoder_loss=0.2375, over 5815762.56 frames. ], batch size: 86, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:20:06,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=793900.0, ans=0.0 +2024-09-20 02:20:12,401 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.39 vs. limit=22.5 +2024-09-20 02:20:19,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=793940.0, ans=0.2 +2024-09-20 02:20:31,105 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.718e+01 8.637e+01 9.253e+01 9.637e+01 1.224e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 02:20:36,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=793980.0, ans=0.125 +2024-09-20 02:20:42,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.29 vs. limit=15.0 +2024-09-20 02:21:08,387 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=794060.0, ans=0.0 +2024-09-20 02:21:12,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=794100.0, ans=0.1 +2024-09-20 02:21:14,111 INFO [train.py:1198] (1/2) Epoch 44, batch 3950, loss[loss=0.2531, ctc_loss=0.1233, cr_loss=0.3747, attn_decoder_loss=0.2591, over 29480.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1094, cr_loss=0.3495, attn_decoder_loss=0.2374, over 5835136.55 frames. ], batch size: 97, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:21:23,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=794100.0, ans=0.125 +2024-09-20 02:21:31,338 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.75 vs. limit=12.0 +2024-09-20 02:21:32,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=794140.0, ans=0.125 +2024-09-20 02:21:49,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=794180.0, ans=0.125 +2024-09-20 02:21:51,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=794180.0, ans=0.125 +2024-09-20 02:22:11,636 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=794260.0, ans=0.1 +2024-09-20 02:22:12,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=794260.0, ans=0.125 +2024-09-20 02:22:27,457 INFO [train.py:1198] (1/2) Epoch 44, batch 4000, loss[loss=0.2218, ctc_loss=0.09676, cr_loss=0.3126, attn_decoder_loss=0.2287, over 29498.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1097, cr_loss=0.3495, attn_decoder_loss=0.2376, over 5812776.30 frames. ], batch size: 74, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:22:36,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=794300.0, ans=0.1 +2024-09-20 02:22:43,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=794340.0, ans=0.5 +2024-09-20 02:22:48,811 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=17.57 vs. limit=22.5 +2024-09-20 02:22:51,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=794340.0, ans=0.125 +2024-09-20 02:22:58,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=794380.0, ans=0.0 +2024-09-20 02:23:01,247 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.802e+01 8.690e+01 9.242e+01 9.635e+01 1.653e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-20 02:23:16,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=794420.0, ans=0.125 +2024-09-20 02:23:16,947 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=794420.0, ans=0.1 +2024-09-20 02:23:28,121 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.27 vs. limit=15.0 +2024-09-20 02:23:41,485 INFO [train.py:1198] (1/2) Epoch 44, batch 4050, loss[loss=0.2507, ctc_loss=0.1383, cr_loss=0.3814, attn_decoder_loss=0.2547, over 19797.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1098, cr_loss=0.3492, attn_decoder_loss=0.2375, over 5796288.93 frames. ], batch size: 210, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:23:42,406 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.57 vs. limit=22.5 +2024-09-20 02:23:43,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.57 vs. limit=22.5 +2024-09-20 02:24:01,558 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-20 02:24:07,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=794540.0, ans=0.125 +2024-09-20 02:24:09,427 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=794580.0, ans=0.125 +2024-09-20 02:24:21,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=794580.0, ans=0.0 +2024-09-20 02:24:24,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=794620.0, ans=0.0 +2024-09-20 02:24:24,487 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-20 02:24:28,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=794620.0, ans=0.1 +2024-09-20 02:24:39,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=794660.0, ans=0.125 +2024-09-20 02:24:50,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=794660.0, ans=0.2 +2024-09-20 02:24:56,006 INFO [train.py:1198] (1/2) Epoch 44, batch 4100, loss[loss=0.2456, ctc_loss=0.1188, cr_loss=0.3768, attn_decoder_loss=0.2513, over 29509.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1101, cr_loss=0.35, attn_decoder_loss=0.2377, over 5792027.87 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:24:56,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=794700.0, ans=0.0 +2024-09-20 02:24:57,563 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=794700.0, ans=0.125 +2024-09-20 02:25:10,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=794740.0, ans=0.0 +2024-09-20 02:25:23,731 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=794740.0, ans=0.125 +2024-09-20 02:25:30,720 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.174e+01 8.703e+01 9.227e+01 9.918e+01 1.839e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-20 02:25:32,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=794780.0, ans=0.0 +2024-09-20 02:25:54,513 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=794860.0, ans=0.2 +2024-09-20 02:25:54,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.66 vs. limit=15.0 +2024-09-20 02:25:58,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=794860.0, ans=0.07 +2024-09-20 02:26:07,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=794860.0, ans=0.0 +2024-09-20 02:26:10,188 INFO [train.py:1198] (1/2) Epoch 44, batch 4150, loss[loss=0.2242, ctc_loss=0.1093, cr_loss=0.3378, attn_decoder_loss=0.2295, over 29480.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.11, cr_loss=0.3501, attn_decoder_loss=0.2375, over 5797517.99 frames. ], batch size: 77, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:26:17,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=794900.0, ans=0.125 +2024-09-20 02:26:27,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.88 vs. limit=15.0 +2024-09-20 02:26:29,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=794940.0, ans=0.1 +2024-09-20 02:26:33,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=794940.0, ans=0.1 +2024-09-20 02:26:40,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.68 vs. limit=15.0 +2024-09-20 02:26:48,955 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.97 vs. limit=22.5 +2024-09-20 02:26:55,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=795020.0, ans=0.125 +2024-09-20 02:26:57,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=795020.0, ans=0.1 +2024-09-20 02:27:23,537 INFO [train.py:1198] (1/2) Epoch 44, batch 4200, loss[loss=0.2539, ctc_loss=0.1361, cr_loss=0.4031, attn_decoder_loss=0.258, over 29525.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1101, cr_loss=0.3501, attn_decoder_loss=0.2376, over 5799826.37 frames. ], batch size: 90, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:27:31,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=795100.0, ans=0.2 +2024-09-20 02:27:55,084 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.67 vs. limit=15.0 +2024-09-20 02:27:57,346 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.818e+01 8.655e+01 9.286e+01 9.774e+01 5.497e+02, threshold=1.857e+02, percent-clipped=1.0 +2024-09-20 02:28:08,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=795220.0, ans=0.125 +2024-09-20 02:28:34,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=795260.0, ans=0.125 +2024-09-20 02:28:34,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=795260.0, ans=0.125 +2024-09-20 02:28:38,194 INFO [train.py:1198] (1/2) Epoch 44, batch 4250, loss[loss=0.2217, ctc_loss=0.101, cr_loss=0.3323, attn_decoder_loss=0.2277, over 29513.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1101, cr_loss=0.3503, attn_decoder_loss=0.2379, over 5805715.72 frames. ], batch size: 74, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:28:48,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=795300.0, ans=0.125 +2024-09-20 02:29:01,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=795340.0, ans=0.1 +2024-09-20 02:29:12,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=795380.0, ans=0.0 +2024-09-20 02:29:14,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=795380.0, ans=0.2 +2024-09-20 02:29:24,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=795420.0, ans=0.0 +2024-09-20 02:29:25,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=795420.0, ans=0.025 +2024-09-20 02:29:33,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=795420.0, ans=0.125 +2024-09-20 02:29:45,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=795460.0, ans=0.0 +2024-09-20 02:29:51,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=795500.0, ans=0.125 +2024-09-20 02:29:52,308 INFO [train.py:1198] (1/2) Epoch 44, batch 4300, loss[loss=0.2304, ctc_loss=0.09845, cr_loss=0.3334, attn_decoder_loss=0.2376, over 29541.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1097, cr_loss=0.3491, attn_decoder_loss=0.238, over 5794882.27 frames. ], batch size: 87, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:30:15,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.89 vs. limit=12.0 +2024-09-20 02:30:16,395 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=795540.0, ans=0.125 +2024-09-20 02:30:26,383 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.608e+01 8.755e+01 9.251e+01 9.683e+01 2.005e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-20 02:30:27,095 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.38 vs. limit=12.0 +2024-09-20 02:30:50,387 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:30:50,974 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.40 vs. limit=15.0 +2024-09-20 02:31:06,395 INFO [train.py:1198] (1/2) Epoch 44, batch 4350, loss[loss=0.2414, ctc_loss=0.1159, cr_loss=0.3584, attn_decoder_loss=0.2474, over 29493.00 frames. ], tot_loss[loss=0.2353, ctc_loss=0.1121, cr_loss=0.3548, attn_decoder_loss=0.2411, over 5796912.02 frames. ], batch size: 97, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:31:45,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=795780.0, ans=0.2 +2024-09-20 02:32:10,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=795860.0, ans=0.0 +2024-09-20 02:32:20,758 INFO [train.py:1198] (1/2) Epoch 44, batch 4400, loss[loss=0.2468, ctc_loss=0.1238, cr_loss=0.3827, attn_decoder_loss=0.252, over 27386.00 frames. ], tot_loss[loss=0.2372, ctc_loss=0.1131, cr_loss=0.3569, attn_decoder_loss=0.243, over 5765671.91 frames. ], batch size: 124, lr: 2.49e-03, grad_scale: 16.0 +2024-09-20 02:32:23,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=795900.0, ans=0.125 +2024-09-20 02:32:31,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=795900.0, ans=0.125 +2024-09-20 02:32:40,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=795940.0, ans=0.125 +2024-09-20 02:32:54,482 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.213e+01 9.042e+01 9.394e+01 9.819e+01 2.193e+02, threshold=1.879e+02, percent-clipped=1.0 +2024-09-20 02:32:56,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=795980.0, ans=0.2 +2024-09-20 02:33:09,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=796020.0, ans=0.125 +2024-09-20 02:33:12,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=796020.0, ans=0.1 +2024-09-20 02:33:15,412 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=796020.0, ans=0.125 +2024-09-20 02:33:22,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=796060.0, ans=0.0 +2024-09-20 02:33:34,340 INFO [train.py:1198] (1/2) Epoch 44, batch 4450, loss[loss=0.2555, ctc_loss=0.1412, cr_loss=0.396, attn_decoder_loss=0.2594, over 19589.00 frames. ], tot_loss[loss=0.2396, ctc_loss=0.1167, cr_loss=0.3621, attn_decoder_loss=0.2452, over 5570449.37 frames. ], batch size: 209, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:34:04,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=796180.0, ans=0.1 +2024-09-20 02:34:07,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=796180.0, ans=0.125 +2024-09-20 02:34:35,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=796260.0, ans=0.125 +2024-09-20 02:34:44,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=796260.0, ans=0.125 +2024-09-20 02:34:49,807 INFO [train.py:1198] (1/2) Epoch 44, batch 4500, loss[loss=0.2507, ctc_loss=0.1326, cr_loss=0.385, attn_decoder_loss=0.2553, over 20655.00 frames. ], tot_loss[loss=0.2416, ctc_loss=0.12, cr_loss=0.3652, attn_decoder_loss=0.247, over 5228368.23 frames. ], batch size: 210, lr: 2.49e-03, grad_scale: 8.0 +2024-09-20 02:34:50,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=796300.0, ans=0.1 +2024-09-20 02:34:51,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=796300.0, ans=0.125 +2024-09-20 02:34:56,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=796300.0, ans=0.2 +2024-09-20 02:34:56,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=7.64 vs. limit=15.0 +2024-09-20 02:34:57,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=796300.0, ans=0.0 +2024-09-20 02:35:19,679 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=15.18 vs. limit=15.0 +2024-09-20 02:35:26,136 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.915e+01 1.070e+02 1.147e+02 1.258e+02 2.122e+02, threshold=2.294e+02, percent-clipped=1.0 +2024-09-20 02:36:17,463 INFO [train.py:1198] (1/2) Epoch 45, batch 0, loss[loss=0.219, ctc_loss=0.09591, cr_loss=0.3232, attn_decoder_loss=0.2255, over 29611.00 frames. ], tot_loss[loss=0.219, ctc_loss=0.09591, cr_loss=0.3232, attn_decoder_loss=0.2255, over 29611.00 frames. ], batch size: 73, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:36:17,463 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 02:36:35,781 INFO [train.py:1230] (1/2) Epoch 45, validation: loss=0.2126, ctc_loss=0.03577, cr_loss=6.589e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 02:36:35,781 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 02:36:42,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=796400.0, ans=0.125 +2024-09-20 02:36:42,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.97 vs. limit=15.0 +2024-09-20 02:37:02,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=796440.0, ans=0.0 +2024-09-20 02:37:05,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=796440.0, ans=0.125 +2024-09-20 02:37:11,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=796480.0, ans=0.2 +2024-09-20 02:37:14,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=796480.0, ans=0.0 +2024-09-20 02:37:53,208 INFO [train.py:1198] (1/2) Epoch 45, batch 50, loss[loss=0.2089, ctc_loss=0.09461, cr_loss=0.3195, attn_decoder_loss=0.2145, over 29455.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1098, cr_loss=0.349, attn_decoder_loss=0.2381, over 1269172.78 frames. ], batch size: 70, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:37:58,129 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=796600.0, ans=0.0 +2024-09-20 02:38:08,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=796640.0, ans=0.0 +2024-09-20 02:38:11,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=796640.0, ans=0.125 +2024-09-20 02:38:25,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=796680.0, ans=0.125 +2024-09-20 02:38:33,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=8.83 vs. limit=15.0 +2024-09-20 02:38:39,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=796720.0, ans=0.0 +2024-09-20 02:38:40,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=796720.0, ans=0.125 +2024-09-20 02:38:42,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=796720.0, ans=0.125 +2024-09-20 02:38:45,985 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.47 vs. limit=15.0 +2024-09-20 02:39:03,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=796760.0, ans=0.125 +2024-09-20 02:39:09,103 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.307e+01 8.517e+01 8.971e+01 9.670e+01 3.092e+02, threshold=1.794e+02, percent-clipped=1.0 +2024-09-20 02:39:09,124 INFO [train.py:1198] (1/2) Epoch 45, batch 100, loss[loss=0.2223, ctc_loss=0.09855, cr_loss=0.3215, attn_decoder_loss=0.2289, over 29531.00 frames. ], tot_loss[loss=0.2345, ctc_loss=0.1113, cr_loss=0.3523, attn_decoder_loss=0.2403, over 2254328.17 frames. ], batch size: 76, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:39:31,864 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=796840.0, ans=0.1 +2024-09-20 02:39:47,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=796880.0, ans=0.025 +2024-09-20 02:40:01,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.45 vs. limit=6.0 +2024-09-20 02:40:15,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=796960.0, ans=0.1 +2024-09-20 02:40:15,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=796960.0, ans=0.125 +2024-09-20 02:40:24,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=797000.0, ans=0.0 +2024-09-20 02:40:25,365 INFO [train.py:1198] (1/2) Epoch 45, batch 150, loss[loss=0.2144, ctc_loss=0.09842, cr_loss=0.3261, attn_decoder_loss=0.2201, over 29398.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1091, cr_loss=0.3481, attn_decoder_loss=0.238, over 3049078.97 frames. ], batch size: 70, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:40:34,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=797000.0, ans=0.0 +2024-09-20 02:40:39,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=797040.0, ans=0.125 +2024-09-20 02:40:47,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-20 02:40:59,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=797080.0, ans=0.0 +2024-09-20 02:41:10,427 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.82 vs. limit=6.0 +2024-09-20 02:41:11,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=797120.0, ans=10.0 +2024-09-20 02:41:36,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=797160.0, ans=0.1 +2024-09-20 02:41:42,430 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.724e+01 8.414e+01 8.795e+01 9.375e+01 1.270e+02, threshold=1.759e+02, percent-clipped=0.0 +2024-09-20 02:41:42,451 INFO [train.py:1198] (1/2) Epoch 45, batch 200, loss[loss=0.2466, ctc_loss=0.1143, cr_loss=0.3592, attn_decoder_loss=0.2534, over 27304.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1086, cr_loss=0.3467, attn_decoder_loss=0.237, over 3659275.26 frames. ], batch size: 124, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:41:45,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=797200.0, ans=0.125 +2024-09-20 02:41:52,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.54 vs. limit=15.0 +2024-09-20 02:41:54,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=797200.0, ans=0.0 +2024-09-20 02:41:56,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=797240.0, ans=0.125 +2024-09-20 02:42:20,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=797280.0, ans=0.125 +2024-09-20 02:42:26,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=797320.0, ans=0.125 +2024-09-20 02:42:45,410 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.30 vs. limit=12.0 +2024-09-20 02:42:46,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=797360.0, ans=0.2 +2024-09-20 02:42:49,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=797360.0, ans=0.125 +2024-09-20 02:42:52,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=797360.0, ans=0.1 +2024-09-20 02:42:58,110 INFO [train.py:1198] (1/2) Epoch 45, batch 250, loss[loss=0.2475, ctc_loss=0.1143, cr_loss=0.3599, attn_decoder_loss=0.2543, over 29245.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1084, cr_loss=0.346, attn_decoder_loss=0.2371, over 4141352.84 frames. ], batch size: 100, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:43:01,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=797400.0, ans=0.125 +2024-09-20 02:43:18,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=797440.0, ans=0.125 +2024-09-20 02:43:53,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=797520.0, ans=0.125 +2024-09-20 02:44:16,168 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.164e+01 8.464e+01 8.917e+01 9.593e+01 1.535e+02, threshold=1.783e+02, percent-clipped=0.0 +2024-09-20 02:44:16,190 INFO [train.py:1198] (1/2) Epoch 45, batch 300, loss[loss=0.249, ctc_loss=0.1149, cr_loss=0.351, attn_decoder_loss=0.2561, over 29494.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1082, cr_loss=0.3457, attn_decoder_loss=0.2368, over 4509788.25 frames. ], batch size: 92, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:44:48,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=797680.0, ans=0.0 +2024-09-20 02:45:14,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=797720.0, ans=0.2 +2024-09-20 02:45:33,891 INFO [train.py:1198] (1/2) Epoch 45, batch 350, loss[loss=0.211, ctc_loss=0.09443, cr_loss=0.3245, attn_decoder_loss=0.2167, over 29337.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1087, cr_loss=0.3473, attn_decoder_loss=0.2371, over 4795555.80 frames. ], batch size: 71, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:45:43,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=797800.0, ans=0.125 +2024-09-20 02:45:44,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=797800.0, ans=0.0 +2024-09-20 02:45:52,129 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:46:01,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=797840.0, ans=0.0 +2024-09-20 02:46:01,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=797840.0, ans=0.2 +2024-09-20 02:46:02,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=797880.0, ans=0.125 +2024-09-20 02:46:04,843 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.15 vs. limit=12.0 +2024-09-20 02:46:22,988 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.60 vs. limit=10.0 +2024-09-20 02:46:23,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=797920.0, ans=0.125 +2024-09-20 02:46:35,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=797960.0, ans=0.1 +2024-09-20 02:46:40,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=797960.0, ans=0.125 +2024-09-20 02:46:44,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=797960.0, ans=0.05 +2024-09-20 02:46:48,812 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.652e+01 8.541e+01 8.980e+01 9.725e+01 1.224e+02, threshold=1.796e+02, percent-clipped=0.0 +2024-09-20 02:46:48,838 INFO [train.py:1198] (1/2) Epoch 45, batch 400, loss[loss=0.2365, ctc_loss=0.109, cr_loss=0.348, attn_decoder_loss=0.243, over 29685.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3468, attn_decoder_loss=0.2368, over 5025545.60 frames. ], batch size: 82, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:46:50,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=798000.0, ans=0.2 +2024-09-20 02:46:58,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=798000.0, ans=0.2 +2024-09-20 02:47:07,698 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.54 vs. limit=15.0 +2024-09-20 02:47:20,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=798080.0, ans=0.0 +2024-09-20 02:47:33,565 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=798080.0, ans=0.0 +2024-09-20 02:47:47,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=798120.0, ans=0.125 +2024-09-20 02:47:54,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=798160.0, ans=0.0 +2024-09-20 02:48:06,792 INFO [train.py:1198] (1/2) Epoch 45, batch 450, loss[loss=0.238, ctc_loss=0.1099, cr_loss=0.3485, attn_decoder_loss=0.2445, over 29692.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1086, cr_loss=0.3469, attn_decoder_loss=0.2369, over 5186927.60 frames. ], batch size: 83, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:48:28,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=798240.0, ans=0.0 +2024-09-20 02:48:34,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=798240.0, ans=0.125 +2024-09-20 02:48:48,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=798280.0, ans=0.0 +2024-09-20 02:48:50,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=798280.0, ans=0.2 +2024-09-20 02:48:53,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.58 vs. limit=22.5 +2024-09-20 02:48:56,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=798320.0, ans=0.125 +2024-09-20 02:48:58,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=798320.0, ans=15.0 +2024-09-20 02:49:24,763 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.897e+01 8.412e+01 8.859e+01 9.470e+01 4.425e+02, threshold=1.772e+02, percent-clipped=1.0 +2024-09-20 02:49:24,790 INFO [train.py:1198] (1/2) Epoch 45, batch 500, loss[loss=0.247, ctc_loss=0.1241, cr_loss=0.3907, attn_decoder_loss=0.252, over 29403.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1087, cr_loss=0.3475, attn_decoder_loss=0.2363, over 5329245.42 frames. ], batch size: 94, lr: 2.46e-03, grad_scale: 16.0 +2024-09-20 02:49:43,664 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.17 vs. limit=15.0 +2024-09-20 02:49:47,646 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=798440.0, ans=0.125 +2024-09-20 02:49:58,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=798480.0, ans=0.125 +2024-09-20 02:50:01,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=798480.0, ans=0.1 +2024-09-20 02:50:40,204 INFO [train.py:1198] (1/2) Epoch 45, batch 550, loss[loss=0.2436, ctc_loss=0.1159, cr_loss=0.3581, attn_decoder_loss=0.2498, over 28772.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1093, cr_loss=0.3482, attn_decoder_loss=0.2369, over 5422194.96 frames. ], batch size: 104, lr: 2.46e-03, grad_scale: 8.0 +2024-09-20 02:51:03,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=798640.0, ans=0.0 +2024-09-20 02:51:07,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=798640.0, ans=0.1 +2024-09-20 02:51:29,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=798720.0, ans=0.0 +2024-09-20 02:51:53,787 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 02:51:57,988 INFO [train.py:1198] (1/2) Epoch 45, batch 600, loss[loss=0.2502, ctc_loss=0.1233, cr_loss=0.3758, attn_decoder_loss=0.256, over 29284.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1093, cr_loss=0.3479, attn_decoder_loss=0.2372, over 5509726.45 frames. ], batch size: 100, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:51:59,419 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.525e+01 8.595e+01 9.062e+01 9.748e+01 3.862e+02, threshold=1.812e+02, percent-clipped=2.0 +2024-09-20 02:52:19,603 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.38 vs. limit=15.0 +2024-09-20 02:53:02,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=798960.0, ans=0.125 +2024-09-20 02:53:07,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=798960.0, ans=0.2 +2024-09-20 02:53:09,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=798960.0, ans=0.125 +2024-09-20 02:53:14,709 INFO [train.py:1198] (1/2) Epoch 45, batch 650, loss[loss=0.2292, ctc_loss=0.1054, cr_loss=0.3336, attn_decoder_loss=0.2355, over 29773.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1087, cr_loss=0.3466, attn_decoder_loss=0.2364, over 5586456.74 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:53:21,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=799000.0, ans=0.1 +2024-09-20 02:53:21,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=799000.0, ans=0.125 +2024-09-20 02:53:45,892 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.25 vs. limit=15.0 +2024-09-20 02:53:54,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=799080.0, ans=0.04949747468305833 +2024-09-20 02:53:59,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=799120.0, ans=0.0 +2024-09-20 02:54:07,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.17 vs. limit=22.5 +2024-09-20 02:54:30,730 INFO [train.py:1198] (1/2) Epoch 45, batch 700, loss[loss=0.2237, ctc_loss=0.1103, cr_loss=0.3704, attn_decoder_loss=0.2281, over 29528.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1093, cr_loss=0.3483, attn_decoder_loss=0.2373, over 5636799.99 frames. ], batch size: 76, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:54:32,187 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.994e+01 8.670e+01 9.106e+01 9.852e+01 1.537e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-20 02:54:39,220 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.62 vs. limit=15.0 +2024-09-20 02:54:41,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=799200.0, ans=0.2 +2024-09-20 02:55:13,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=799280.0, ans=0.125 +2024-09-20 02:55:27,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=799320.0, ans=0.0 +2024-09-20 02:55:29,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=799320.0, ans=0.0 +2024-09-20 02:55:36,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=799360.0, ans=0.0 +2024-09-20 02:55:48,584 INFO [train.py:1198] (1/2) Epoch 45, batch 750, loss[loss=0.2385, ctc_loss=0.1136, cr_loss=0.3687, attn_decoder_loss=0.2442, over 29715.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1092, cr_loss=0.348, attn_decoder_loss=0.2369, over 5675097.99 frames. ], batch size: 82, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:56:16,068 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.47 vs. limit=15.0 +2024-09-20 02:56:16,287 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.29 vs. limit=12.0 +2024-09-20 02:56:19,582 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.60 vs. limit=22.5 +2024-09-20 02:56:31,049 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff3.min_abs, batch_count=799480.0, ans=0.2 +2024-09-20 02:56:35,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=799520.0, ans=0.1 +2024-09-20 02:56:54,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=799560.0, ans=0.05 +2024-09-20 02:56:55,260 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.00 vs. limit=15.0 +2024-09-20 02:57:06,227 INFO [train.py:1198] (1/2) Epoch 45, batch 800, loss[loss=0.216, ctc_loss=0.08939, cr_loss=0.3012, attn_decoder_loss=0.2233, over 29593.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1099, cr_loss=0.3497, attn_decoder_loss=0.2374, over 5706017.00 frames. ], batch size: 73, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 02:57:07,691 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.784e+01 8.615e+01 9.052e+01 9.760e+01 1.570e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 02:57:57,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.05 vs. limit=22.5 +2024-09-20 02:58:00,020 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.50 vs. limit=15.0 +2024-09-20 02:58:15,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=799760.0, ans=0.125 +2024-09-20 02:58:18,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=799760.0, ans=0.0 +2024-09-20 02:58:21,372 INFO [train.py:1198] (1/2) Epoch 45, batch 850, loss[loss=0.2508, ctc_loss=0.1214, cr_loss=0.3733, attn_decoder_loss=0.2568, over 29700.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1096, cr_loss=0.3487, attn_decoder_loss=0.2372, over 5734686.71 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:58:21,728 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=799800.0, ans=0.2 +2024-09-20 02:58:36,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=799840.0, ans=0.0 +2024-09-20 02:58:39,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=799840.0, ans=0.1 +2024-09-20 02:58:40,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=799840.0, ans=0.2 +2024-09-20 02:58:56,365 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.37 vs. limit=6.0 +2024-09-20 02:59:09,279 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.77 vs. limit=5.0 +2024-09-20 02:59:46,329 INFO [train.py:1198] (1/2) Epoch 45, batch 900, loss[loss=0.2225, ctc_loss=0.107, cr_loss=0.3396, attn_decoder_loss=0.2278, over 29606.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1098, cr_loss=0.3489, attn_decoder_loss=0.2375, over 5739802.06 frames. ], batch size: 73, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 02:59:47,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.56 vs. limit=22.5 +2024-09-20 02:59:49,262 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.442e+01 9.122e+01 9.676e+01 4.269e+02, threshold=1.824e+02, percent-clipped=2.0 +2024-09-20 02:59:55,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=800000.0, ans=0.09899494936611666 +2024-09-20 03:00:04,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=800040.0, ans=0.125 +2024-09-20 03:00:09,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=800040.0, ans=10.0 +2024-09-20 03:00:17,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=800080.0, ans=0.125 +2024-09-20 03:01:03,277 INFO [train.py:1198] (1/2) Epoch 45, batch 950, loss[loss=0.2093, ctc_loss=0.09037, cr_loss=0.304, attn_decoder_loss=0.2158, over 29493.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1096, cr_loss=0.3485, attn_decoder_loss=0.2374, over 5742582.79 frames. ], batch size: 74, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:01:38,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=800280.0, ans=0.0 +2024-09-20 03:01:49,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.60 vs. limit=10.0 +2024-09-20 03:02:14,377 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.65 vs. limit=12.0 +2024-09-20 03:02:18,168 INFO [train.py:1198] (1/2) Epoch 45, batch 1000, loss[loss=0.2219, ctc_loss=0.1022, cr_loss=0.3218, attn_decoder_loss=0.228, over 29520.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1101, cr_loss=0.3497, attn_decoder_loss=0.2378, over 5737749.09 frames. ], batch size: 77, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:02:21,232 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.544e+01 8.681e+01 9.118e+01 9.953e+01 2.174e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 03:02:38,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=800440.0, ans=0.0 +2024-09-20 03:02:42,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=800440.0, ans=0.1 +2024-09-20 03:03:07,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=800520.0, ans=0.1 +2024-09-20 03:03:08,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=800520.0, ans=0.0 +2024-09-20 03:03:21,292 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.96 vs. limit=6.0 +2024-09-20 03:03:22,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=800560.0, ans=0.125 +2024-09-20 03:03:35,499 INFO [train.py:1198] (1/2) Epoch 45, batch 1050, loss[loss=0.2468, ctc_loss=0.1203, cr_loss=0.3693, attn_decoder_loss=0.2526, over 29685.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1099, cr_loss=0.3491, attn_decoder_loss=0.2372, over 5746219.65 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:03:44,977 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=800600.0, ans=0.125 +2024-09-20 03:03:51,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=800640.0, ans=0.2 +2024-09-20 03:03:58,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=800640.0, ans=0.125 +2024-09-20 03:04:06,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=800680.0, ans=0.125 +2024-09-20 03:04:06,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=800680.0, ans=0.0 +2024-09-20 03:04:07,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=800680.0, ans=0.2 +2024-09-20 03:04:13,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=800680.0, ans=0.1 +2024-09-20 03:04:53,633 INFO [train.py:1198] (1/2) Epoch 45, batch 1100, loss[loss=0.2243, ctc_loss=0.1024, cr_loss=0.3311, attn_decoder_loss=0.2305, over 29464.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1098, cr_loss=0.3488, attn_decoder_loss=0.2371, over 5757902.63 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:04:56,590 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.437e+01 8.469e+01 8.955e+01 9.647e+01 1.370e+02, threshold=1.791e+02, percent-clipped=0.0 +2024-09-20 03:05:06,537 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.36 vs. limit=12.0 +2024-09-20 03:05:17,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=800840.0, ans=0.2 +2024-09-20 03:05:40,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=800920.0, ans=0.1 +2024-09-20 03:05:40,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=800920.0, ans=0.125 +2024-09-20 03:06:09,166 INFO [train.py:1198] (1/2) Epoch 45, batch 1150, loss[loss=0.2268, ctc_loss=0.1112, cr_loss=0.3422, attn_decoder_loss=0.2321, over 29442.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1097, cr_loss=0.3486, attn_decoder_loss=0.237, over 5757522.60 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:06:23,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=801040.0, ans=0.125 +2024-09-20 03:06:38,601 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=801080.0, ans=0.125 +2024-09-20 03:06:41,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=801080.0, ans=0.125 +2024-09-20 03:06:50,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=801080.0, ans=0.125 +2024-09-20 03:06:57,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=801120.0, ans=0.0 +2024-09-20 03:07:10,729 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=801160.0, ans=0.125 +2024-09-20 03:07:17,146 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=12.0 +2024-09-20 03:07:18,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=801160.0, ans=0.015 +2024-09-20 03:07:26,998 INFO [train.py:1198] (1/2) Epoch 45, batch 1200, loss[loss=0.2498, ctc_loss=0.1204, cr_loss=0.3758, attn_decoder_loss=0.2558, over 29687.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1102, cr_loss=0.3494, attn_decoder_loss=0.2378, over 5749581.96 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:07:29,985 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.448e+01 9.125e+01 9.558e+01 3.990e+02, threshold=1.825e+02, percent-clipped=1.0 +2024-09-20 03:07:55,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=801280.0, ans=0.0 +2024-09-20 03:07:57,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=801280.0, ans=0.125 +2024-09-20 03:07:57,728 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=16.97 vs. limit=22.5 +2024-09-20 03:08:32,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=801360.0, ans=0.125 +2024-09-20 03:08:44,560 INFO [train.py:1198] (1/2) Epoch 45, batch 1250, loss[loss=0.2443, ctc_loss=0.1227, cr_loss=0.3824, attn_decoder_loss=0.2493, over 29503.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1107, cr_loss=0.3511, attn_decoder_loss=0.2385, over 5777240.23 frames. ], batch size: 92, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:08:57,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=801400.0, ans=0.2 +2024-09-20 03:09:04,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=801440.0, ans=15.0 +2024-09-20 03:09:36,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=801520.0, ans=0.125 +2024-09-20 03:10:00,503 INFO [train.py:1198] (1/2) Epoch 45, batch 1300, loss[loss=0.2396, ctc_loss=0.1117, cr_loss=0.3659, attn_decoder_loss=0.2457, over 28365.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1103, cr_loss=0.3503, attn_decoder_loss=0.2379, over 5780777.38 frames. ], batch size: 111, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:10:03,557 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.532e+01 8.720e+01 9.060e+01 9.963e+01 1.314e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 03:10:05,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=801600.0, ans=0.125 +2024-09-20 03:10:23,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=801640.0, ans=10.0 +2024-09-20 03:10:40,176 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:10:50,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=801720.0, ans=0.1 +2024-09-20 03:11:18,208 INFO [train.py:1198] (1/2) Epoch 45, batch 1350, loss[loss=0.2347, ctc_loss=0.1107, cr_loss=0.3637, attn_decoder_loss=0.2404, over 29750.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1098, cr_loss=0.3495, attn_decoder_loss=0.2379, over 5795982.08 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:11:25,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=801800.0, ans=0.0 +2024-09-20 03:11:30,440 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=801800.0, ans=0.1 +2024-09-20 03:11:50,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=801880.0, ans=0.0 +2024-09-20 03:12:19,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=801960.0, ans=0.1 +2024-09-20 03:12:35,462 INFO [train.py:1198] (1/2) Epoch 45, batch 1400, loss[loss=0.2018, ctc_loss=0.08961, cr_loss=0.2968, attn_decoder_loss=0.2077, over 29603.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1097, cr_loss=0.3493, attn_decoder_loss=0.2377, over 5807730.63 frames. ], batch size: 69, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:12:39,955 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.353e+01 8.806e+01 9.318e+01 1.165e+02, threshold=1.761e+02, percent-clipped=0.0 +2024-09-20 03:12:46,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=802000.0, ans=0.0 +2024-09-20 03:12:47,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=802000.0, ans=0.125 +2024-09-20 03:12:47,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=802000.0, ans=0.125 +2024-09-20 03:12:59,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=802040.0, ans=0.0 +2024-09-20 03:12:59,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=802040.0, ans=0.125 +2024-09-20 03:13:05,224 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.18 vs. limit=10.0 +2024-09-20 03:13:24,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=4.17 vs. limit=12.0 +2024-09-20 03:13:28,377 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=802120.0, ans=0.0 +2024-09-20 03:13:46,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=802160.0, ans=0.125 +2024-09-20 03:13:50,619 INFO [train.py:1198] (1/2) Epoch 45, batch 1450, loss[loss=0.2472, ctc_loss=0.1226, cr_loss=0.3748, attn_decoder_loss=0.2527, over 29465.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1097, cr_loss=0.3494, attn_decoder_loss=0.2379, over 5804096.50 frames. ], batch size: 94, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:13:56,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=802200.0, ans=0.0 +2024-09-20 03:13:58,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=802200.0, ans=0.2 +2024-09-20 03:13:58,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=802200.0, ans=0.0 +2024-09-20 03:15:02,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=802360.0, ans=0.2 +2024-09-20 03:15:05,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=802360.0, ans=0.125 +2024-09-20 03:15:08,090 INFO [train.py:1198] (1/2) Epoch 45, batch 1500, loss[loss=0.2338, ctc_loss=0.107, cr_loss=0.3548, attn_decoder_loss=0.24, over 29631.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1096, cr_loss=0.3494, attn_decoder_loss=0.2381, over 5805061.55 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:15:12,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.707e+01 9.148e+01 9.626e+01 3.931e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-20 03:15:55,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=802520.0, ans=0.125 +2024-09-20 03:15:55,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=802520.0, ans=0.125 +2024-09-20 03:16:16,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=802560.0, ans=0.0 +2024-09-20 03:16:19,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=802560.0, ans=0.125 +2024-09-20 03:16:23,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=802560.0, ans=0.0 +2024-09-20 03:16:25,837 INFO [train.py:1198] (1/2) Epoch 45, batch 1550, loss[loss=0.2494, ctc_loss=0.1282, cr_loss=0.3914, attn_decoder_loss=0.2541, over 29489.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1098, cr_loss=0.3501, attn_decoder_loss=0.2383, over 5781093.30 frames. ], batch size: 90, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:16:30,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=802600.0, ans=0.125 +2024-09-20 03:16:44,067 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=802640.0, ans=0.2 +2024-09-20 03:16:48,774 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=802640.0, ans=0.125 +2024-09-20 03:16:54,553 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=802680.0, ans=0.1 +2024-09-20 03:17:04,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=802680.0, ans=0.1 +2024-09-20 03:17:24,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=802760.0, ans=0.125 +2024-09-20 03:17:30,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=802760.0, ans=0.125 +2024-09-20 03:17:40,715 INFO [train.py:1198] (1/2) Epoch 45, batch 1600, loss[loss=0.2437, ctc_loss=0.1068, cr_loss=0.3416, attn_decoder_loss=0.2513, over 29677.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1099, cr_loss=0.35, attn_decoder_loss=0.2382, over 5765195.57 frames. ], batch size: 85, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:17:42,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.min_positive, batch_count=802800.0, ans=0.05 +2024-09-20 03:17:45,054 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.354e+01 8.517e+01 9.021e+01 9.788e+01 6.298e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-20 03:18:06,719 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=802840.0, ans=0.125 +2024-09-20 03:18:16,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.72 vs. limit=15.0 +2024-09-20 03:18:29,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=802920.0, ans=0.125 +2024-09-20 03:18:58,031 INFO [train.py:1198] (1/2) Epoch 45, batch 1650, loss[loss=0.2388, ctc_loss=0.1124, cr_loss=0.3528, attn_decoder_loss=0.245, over 29707.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1097, cr_loss=0.3494, attn_decoder_loss=0.238, over 5759659.92 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:18:59,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=803000.0, ans=10.0 +2024-09-20 03:19:22,449 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=803040.0, ans=0.0 +2024-09-20 03:19:46,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=803120.0, ans=0.125 +2024-09-20 03:19:53,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=803120.0, ans=0.0 +2024-09-20 03:20:04,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=803160.0, ans=0.0 +2024-09-20 03:20:07,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=803160.0, ans=0.125 +2024-09-20 03:20:15,552 INFO [train.py:1198] (1/2) Epoch 45, batch 1700, loss[loss=0.2073, ctc_loss=0.08758, cr_loss=0.2981, attn_decoder_loss=0.214, over 29593.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1096, cr_loss=0.3492, attn_decoder_loss=0.2378, over 5781341.52 frames. ], batch size: 69, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:20:19,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.12 vs. limit=6.0 +2024-09-20 03:20:21,493 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.570e+01 9.061e+01 9.508e+01 1.721e+02, threshold=1.812e+02, percent-clipped=0.0 +2024-09-20 03:20:21,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=803200.0, ans=0.09899494936611666 +2024-09-20 03:20:24,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=803200.0, ans=0.0 +2024-09-20 03:20:56,411 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=803280.0, ans=0.125 +2024-09-20 03:20:56,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=803280.0, ans=0.0 +2024-09-20 03:21:23,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=803360.0, ans=0.125 +2024-09-20 03:21:30,947 INFO [train.py:1198] (1/2) Epoch 45, batch 1750, loss[loss=0.2138, ctc_loss=0.1013, cr_loss=0.325, attn_decoder_loss=0.219, over 29336.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1094, cr_loss=0.3487, attn_decoder_loss=0.2375, over 5788872.29 frames. ], batch size: 67, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:21:47,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass_mid.scale_min, batch_count=803440.0, ans=0.2 +2024-09-20 03:21:53,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=803440.0, ans=0.125 +2024-09-20 03:22:17,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=803520.0, ans=0.125 +2024-09-20 03:22:40,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=803560.0, ans=0.2 +2024-09-20 03:22:47,894 INFO [train.py:1198] (1/2) Epoch 45, batch 1800, loss[loss=0.2294, ctc_loss=0.09519, cr_loss=0.3208, attn_decoder_loss=0.2372, over 29677.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1096, cr_loss=0.3493, attn_decoder_loss=0.2377, over 5791657.74 frames. ], batch size: 83, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:22:53,956 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.618e+01 8.492e+01 8.891e+01 9.479e+01 1.445e+02, threshold=1.778e+02, percent-clipped=0.0 +2024-09-20 03:23:15,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=803640.0, ans=0.125 +2024-09-20 03:23:25,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=803680.0, ans=0.1 +2024-09-20 03:23:26,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.18 vs. limit=10.0 +2024-09-20 03:23:27,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=803680.0, ans=0.0 +2024-09-20 03:23:29,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=803680.0, ans=0.125 +2024-09-20 03:23:53,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=803760.0, ans=0.1 +2024-09-20 03:24:03,352 INFO [train.py:1198] (1/2) Epoch 45, batch 1850, loss[loss=0.2321, ctc_loss=0.1047, cr_loss=0.3354, attn_decoder_loss=0.2388, over 29629.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1092, cr_loss=0.3487, attn_decoder_loss=0.2372, over 5797925.44 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:24:05,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.36 vs. limit=10.0 +2024-09-20 03:24:06,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=803800.0, ans=0.125 +2024-09-20 03:24:08,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=803800.0, ans=0.0 +2024-09-20 03:24:10,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.49 vs. limit=15.0 +2024-09-20 03:24:48,568 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.02 vs. limit=15.0 +2024-09-20 03:24:51,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=803920.0, ans=0.2 +2024-09-20 03:24:55,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=803920.0, ans=0.0 +2024-09-20 03:25:17,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=803960.0, ans=0.125 +2024-09-20 03:25:20,994 INFO [train.py:1198] (1/2) Epoch 45, batch 1900, loss[loss=0.2357, ctc_loss=0.1107, cr_loss=0.3635, attn_decoder_loss=0.2415, over 29707.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1094, cr_loss=0.3497, attn_decoder_loss=0.2378, over 5804538.58 frames. ], batch size: 89, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:25:27,053 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.514e+01 9.088e+01 9.657e+01 1.546e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-20 03:25:50,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=804080.0, ans=0.125 +2024-09-20 03:25:59,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=804080.0, ans=0.0 +2024-09-20 03:25:59,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=804080.0, ans=0.125 +2024-09-20 03:26:17,436 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.20 vs. limit=10.0 +2024-09-20 03:26:28,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=804160.0, ans=0.0 +2024-09-20 03:26:38,939 INFO [train.py:1198] (1/2) Epoch 45, batch 1950, loss[loss=0.2351, ctc_loss=0.1191, cr_loss=0.3676, attn_decoder_loss=0.2398, over 29441.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1098, cr_loss=0.3507, attn_decoder_loss=0.2386, over 5818624.97 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:26:45,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=804200.0, ans=0.125 +2024-09-20 03:26:57,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=804240.0, ans=0.2 +2024-09-20 03:27:25,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=804320.0, ans=0.0 +2024-09-20 03:27:28,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=804320.0, ans=0.125 +2024-09-20 03:27:54,316 INFO [train.py:1198] (1/2) Epoch 45, batch 2000, loss[loss=0.2068, ctc_loss=0.08535, cr_loss=0.3096, attn_decoder_loss=0.2135, over 29346.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1104, cr_loss=0.3516, attn_decoder_loss=0.2392, over 5797248.72 frames. ], batch size: 67, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:28:00,423 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.885e+01 8.761e+01 9.181e+01 9.636e+01 2.089e+02, threshold=1.836e+02, percent-clipped=2.0 +2024-09-20 03:28:00,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=804400.0, ans=0.0 +2024-09-20 03:28:08,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=804400.0, ans=0.025 +2024-09-20 03:28:46,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.04 vs. limit=12.0 +2024-09-20 03:28:57,142 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=804560.0, ans=0.2 +2024-09-20 03:29:00,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=804560.0, ans=0.09899494936611666 +2024-09-20 03:29:11,950 INFO [train.py:1198] (1/2) Epoch 45, batch 2050, loss[loss=0.2098, ctc_loss=0.09532, cr_loss=0.3067, attn_decoder_loss=0.2157, over 29436.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1099, cr_loss=0.3504, attn_decoder_loss=0.2381, over 5789158.59 frames. ], batch size: 70, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:29:25,761 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=804640.0, ans=0.2 +2024-09-20 03:29:46,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=804680.0, ans=0.125 +2024-09-20 03:29:57,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=804720.0, ans=0.125 +2024-09-20 03:30:11,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=804720.0, ans=0.125 +2024-09-20 03:30:29,645 INFO [train.py:1198] (1/2) Epoch 45, batch 2100, loss[loss=0.2325, ctc_loss=0.1123, cr_loss=0.3389, attn_decoder_loss=0.2383, over 29756.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1095, cr_loss=0.3492, attn_decoder_loss=0.2375, over 5800711.82 frames. ], batch size: 81, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:30:32,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.94 vs. limit=15.0 +2024-09-20 03:30:35,557 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.221e+01 8.482e+01 9.039e+01 9.529e+01 1.230e+02, threshold=1.808e+02, percent-clipped=0.0 +2024-09-20 03:30:35,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=804800.0, ans=0.125 +2024-09-20 03:31:01,714 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.29 vs. limit=6.0 +2024-09-20 03:31:17,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=804920.0, ans=0.1 +2024-09-20 03:31:35,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=804960.0, ans=0.125 +2024-09-20 03:31:43,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.34 vs. limit=15.0 +2024-09-20 03:31:44,355 INFO [train.py:1198] (1/2) Epoch 45, batch 2150, loss[loss=0.237, ctc_loss=0.1173, cr_loss=0.3811, attn_decoder_loss=0.2419, over 29430.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.3478, attn_decoder_loss=0.2369, over 5815436.60 frames. ], batch size: 78, lr: 2.45e-03, grad_scale: 16.0 +2024-09-20 03:32:15,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=805080.0, ans=0.125 +2024-09-20 03:32:18,612 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=805080.0, ans=0.0 +2024-09-20 03:32:21,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=805080.0, ans=0.0 +2024-09-20 03:32:23,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=805080.0, ans=0.0 +2024-09-20 03:32:28,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=805080.0, ans=0.05 +2024-09-20 03:32:56,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=805160.0, ans=0.125 +2024-09-20 03:32:58,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.91 vs. limit=12.0 +2024-09-20 03:33:01,958 INFO [train.py:1198] (1/2) Epoch 45, batch 2200, loss[loss=0.2429, ctc_loss=0.1181, cr_loss=0.3593, attn_decoder_loss=0.2487, over 29633.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.109, cr_loss=0.3477, attn_decoder_loss=0.2371, over 5812639.04 frames. ], batch size: 86, lr: 2.45e-03, grad_scale: 8.0 +2024-09-20 03:33:05,672 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.36 vs. limit=6.0 +2024-09-20 03:33:09,444 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.623e+01 8.976e+01 9.604e+01 3.634e+02, threshold=1.795e+02, percent-clipped=1.0 +2024-09-20 03:33:11,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=805200.0, ans=0.0 +2024-09-20 03:33:32,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=805280.0, ans=0.0 +2024-09-20 03:33:36,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.75 vs. limit=22.5 +2024-09-20 03:34:19,512 INFO [train.py:1198] (1/2) Epoch 45, batch 2250, loss[loss=0.2415, ctc_loss=0.1092, cr_loss=0.3646, attn_decoder_loss=0.2481, over 29703.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1087, cr_loss=0.3472, attn_decoder_loss=0.237, over 5811169.23 frames. ], batch size: 82, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:35:07,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=805520.0, ans=0.0 +2024-09-20 03:35:25,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=805560.0, ans=0.125 +2024-09-20 03:35:34,906 INFO [train.py:1198] (1/2) Epoch 45, batch 2300, loss[loss=0.2043, ctc_loss=0.08986, cr_loss=0.3033, attn_decoder_loss=0.2103, over 29297.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2361, over 5799991.29 frames. ], batch size: 71, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:35:42,373 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.561e+01 9.011e+01 9.517e+01 1.725e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 03:35:47,151 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:36:00,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=805640.0, ans=0.125 +2024-09-20 03:36:15,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=805680.0, ans=0.125 +2024-09-20 03:36:27,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=805720.0, ans=0.0 +2024-09-20 03:36:41,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=805760.0, ans=0.125 +2024-09-20 03:36:52,629 INFO [train.py:1198] (1/2) Epoch 45, batch 2350, loss[loss=0.2398, ctc_loss=0.1141, cr_loss=0.3501, attn_decoder_loss=0.246, over 29707.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1086, cr_loss=0.3472, attn_decoder_loss=0.2364, over 5805031.49 frames. ], batch size: 83, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:37:00,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=805800.0, ans=0.125 +2024-09-20 03:37:36,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=805920.0, ans=0.025 +2024-09-20 03:37:45,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=805920.0, ans=0.2 +2024-09-20 03:37:48,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=805920.0, ans=0.1 +2024-09-20 03:37:52,159 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=805920.0, ans=0.125 +2024-09-20 03:38:01,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.72 vs. limit=15.0 +2024-09-20 03:38:10,108 INFO [train.py:1198] (1/2) Epoch 45, batch 2400, loss[loss=0.2228, ctc_loss=0.1071, cr_loss=0.3634, attn_decoder_loss=0.2276, over 29548.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.3477, attn_decoder_loss=0.2368, over 5808677.84 frames. ], batch size: 76, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:38:17,562 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.673e+01 8.961e+01 9.495e+01 1.491e+02, threshold=1.792e+02, percent-clipped=0.0 +2024-09-20 03:38:25,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=806040.0, ans=22.5 +2024-09-20 03:38:27,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=806040.0, ans=0.025 +2024-09-20 03:38:34,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=806040.0, ans=0.125 +2024-09-20 03:38:46,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=806080.0, ans=0.1 +2024-09-20 03:39:03,379 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=806120.0, ans=0.1 +2024-09-20 03:39:04,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=806120.0, ans=0.025 +2024-09-20 03:39:09,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.42 vs. limit=15.0 +2024-09-20 03:39:15,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=806160.0, ans=0.125 +2024-09-20 03:39:24,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=806200.0, ans=0.125 +2024-09-20 03:39:25,797 INFO [train.py:1198] (1/2) Epoch 45, batch 2450, loss[loss=0.2394, ctc_loss=0.1183, cr_loss=0.3827, attn_decoder_loss=0.2443, over 29711.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1094, cr_loss=0.3487, attn_decoder_loss=0.2375, over 5785277.37 frames. ], batch size: 82, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:39:43,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=806240.0, ans=0.125 +2024-09-20 03:39:45,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=806240.0, ans=0.04949747468305833 +2024-09-20 03:39:51,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=806240.0, ans=0.2 +2024-09-20 03:40:10,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=806280.0, ans=0.125 +2024-09-20 03:40:37,610 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=806360.0, ans=0.125 +2024-09-20 03:40:43,786 INFO [train.py:1198] (1/2) Epoch 45, batch 2500, loss[loss=0.2447, ctc_loss=0.1145, cr_loss=0.353, attn_decoder_loss=0.2513, over 29641.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1096, cr_loss=0.3488, attn_decoder_loss=0.2376, over 5795177.44 frames. ], batch size: 86, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:40:51,307 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.491e+01 8.641e+01 9.220e+01 9.804e+01 1.997e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-20 03:40:54,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=806400.0, ans=0.125 +2024-09-20 03:41:01,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.70 vs. limit=15.0 +2024-09-20 03:41:08,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=806440.0, ans=0.0 +2024-09-20 03:41:12,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=806480.0, ans=0.1 +2024-09-20 03:41:19,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.prob, batch_count=806480.0, ans=0.125 +2024-09-20 03:41:28,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.66 vs. limit=15.0 +2024-09-20 03:41:58,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=806560.0, ans=0.125 +2024-09-20 03:42:01,677 INFO [train.py:1198] (1/2) Epoch 45, batch 2550, loss[loss=0.2062, ctc_loss=0.09508, cr_loss=0.3254, attn_decoder_loss=0.2113, over 29403.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1094, cr_loss=0.3488, attn_decoder_loss=0.2377, over 5799679.42 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:42:15,934 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.99 vs. limit=22.5 +2024-09-20 03:42:22,766 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=806640.0, ans=0.125 +2024-09-20 03:42:44,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=806680.0, ans=0.125 +2024-09-20 03:42:44,839 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.21 vs. limit=15.0 +2024-09-20 03:43:17,314 INFO [train.py:1198] (1/2) Epoch 45, batch 2600, loss[loss=0.2235, ctc_loss=0.09919, cr_loss=0.3318, attn_decoder_loss=0.23, over 29450.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1094, cr_loss=0.3485, attn_decoder_loss=0.2379, over 5795539.34 frames. ], batch size: 78, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:43:23,975 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.82 vs. limit=15.0 +2024-09-20 03:43:25,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=806800.0, ans=0.05 +2024-09-20 03:43:26,234 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.746e+01 8.807e+01 9.340e+01 9.891e+01 1.748e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 03:43:50,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=806880.0, ans=0.125 +2024-09-20 03:43:54,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=806880.0, ans=0.1 +2024-09-20 03:44:00,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=806880.0, ans=0.0 +2024-09-20 03:44:03,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=806920.0, ans=0.125 +2024-09-20 03:44:03,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.87 vs. limit=15.0 +2024-09-20 03:44:09,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=806920.0, ans=0.0 +2024-09-20 03:44:16,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=806920.0, ans=0.125 +2024-09-20 03:44:27,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=806960.0, ans=0.125 +2024-09-20 03:44:30,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=806960.0, ans=0.125 +2024-09-20 03:44:34,362 INFO [train.py:1198] (1/2) Epoch 45, batch 2650, loss[loss=0.2477, ctc_loss=0.1226, cr_loss=0.377, attn_decoder_loss=0.2532, over 29288.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1095, cr_loss=0.3488, attn_decoder_loss=0.2381, over 5800867.75 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:44:45,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=807000.0, ans=0.0 +2024-09-20 03:44:51,242 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=807040.0, ans=0.1 +2024-09-20 03:45:03,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=807080.0, ans=0.07 +2024-09-20 03:45:03,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.54 vs. limit=15.0 +2024-09-20 03:45:06,753 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=7.76 vs. limit=15.0 +2024-09-20 03:45:27,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=807120.0, ans=0.125 +2024-09-20 03:45:32,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=807120.0, ans=0.07 +2024-09-20 03:45:42,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=807160.0, ans=0.0 +2024-09-20 03:45:43,477 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.43 vs. limit=22.5 +2024-09-20 03:45:50,865 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.64 vs. limit=15.0 +2024-09-20 03:45:51,977 INFO [train.py:1198] (1/2) Epoch 45, batch 2700, loss[loss=0.2438, ctc_loss=0.1141, cr_loss=0.3688, attn_decoder_loss=0.25, over 29536.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.11, cr_loss=0.3502, attn_decoder_loss=0.2384, over 5796404.86 frames. ], batch size: 87, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:46:01,050 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.462e+01 8.586e+01 9.065e+01 9.630e+01 2.449e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 03:46:10,868 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.97 vs. limit=15.0 +2024-09-20 03:47:03,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=807360.0, ans=0.125 +2024-09-20 03:47:07,353 INFO [train.py:1198] (1/2) Epoch 45, batch 2750, loss[loss=0.2268, ctc_loss=0.1086, cr_loss=0.3438, attn_decoder_loss=0.2322, over 29529.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1093, cr_loss=0.3484, attn_decoder_loss=0.2373, over 5794808.14 frames. ], batch size: 75, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:47:09,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=807400.0, ans=0.125 +2024-09-20 03:47:19,814 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:47:21,161 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=807440.0, ans=0.1 +2024-09-20 03:47:44,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=807480.0, ans=0.125 +2024-09-20 03:47:57,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=807520.0, ans=0.125 +2024-09-20 03:48:05,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=807520.0, ans=0.0 +2024-09-20 03:48:11,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.35 vs. limit=15.0 +2024-09-20 03:48:22,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=807560.0, ans=0.125 +2024-09-20 03:48:24,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=807600.0, ans=0.125 +2024-09-20 03:48:25,188 INFO [train.py:1198] (1/2) Epoch 45, batch 2800, loss[loss=0.2557, ctc_loss=0.144, cr_loss=0.3752, attn_decoder_loss=0.2597, over 20324.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1098, cr_loss=0.3491, attn_decoder_loss=0.2375, over 5776595.44 frames. ], batch size: 210, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:48:28,531 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:48:28,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=807600.0, ans=0.125 +2024-09-20 03:48:34,099 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.700e+01 8.581e+01 9.021e+01 9.905e+01 2.529e+02, threshold=1.804e+02, percent-clipped=2.0 +2024-09-20 03:48:48,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=807640.0, ans=0.125 +2024-09-20 03:48:59,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.61 vs. limit=15.0 +2024-09-20 03:49:15,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=807720.0, ans=0.1 +2024-09-20 03:49:24,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=807720.0, ans=0.125 +2024-09-20 03:49:27,212 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.01 vs. limit=22.5 +2024-09-20 03:49:36,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=807760.0, ans=0.125 +2024-09-20 03:49:39,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=807760.0, ans=0.0 +2024-09-20 03:49:42,381 INFO [train.py:1198] (1/2) Epoch 45, batch 2850, loss[loss=0.2222, ctc_loss=0.107, cr_loss=0.343, attn_decoder_loss=0.2273, over 29501.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1103, cr_loss=0.3506, attn_decoder_loss=0.2379, over 5759826.70 frames. ], batch size: 77, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:50:03,859 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:50:29,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=807920.0, ans=0.125 +2024-09-20 03:50:46,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=807960.0, ans=0.0 +2024-09-20 03:50:58,366 INFO [train.py:1198] (1/2) Epoch 45, batch 2900, loss[loss=0.2316, ctc_loss=0.1098, cr_loss=0.3463, attn_decoder_loss=0.2374, over 29431.00 frames. ], tot_loss[loss=0.2333, ctc_loss=0.1109, cr_loss=0.352, attn_decoder_loss=0.2391, over 5786217.84 frames. ], batch size: 79, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:51:02,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=808000.0, ans=0.125 +2024-09-20 03:51:07,245 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.821e+01 8.668e+01 9.103e+01 9.766e+01 1.431e+02, threshold=1.821e+02, percent-clipped=0.0 +2024-09-20 03:51:13,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=808040.0, ans=0.1 +2024-09-20 03:51:27,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=808080.0, ans=0.125 +2024-09-20 03:51:34,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=808080.0, ans=0.125 +2024-09-20 03:52:15,546 INFO [train.py:1198] (1/2) Epoch 45, batch 2950, loss[loss=0.2313, ctc_loss=0.1192, cr_loss=0.3739, attn_decoder_loss=0.2355, over 29513.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1101, cr_loss=0.35, attn_decoder_loss=0.2378, over 5781169.85 frames. ], batch size: 75, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:52:26,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=808200.0, ans=0.0 +2024-09-20 03:52:48,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.59 vs. limit=15.0 +2024-09-20 03:52:53,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=808280.0, ans=0.2 +2024-09-20 03:53:18,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=808360.0, ans=0.125 +2024-09-20 03:53:21,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=808360.0, ans=0.1 +2024-09-20 03:53:33,024 INFO [train.py:1198] (1/2) Epoch 45, batch 3000, loss[loss=0.2386, ctc_loss=0.1119, cr_loss=0.3542, attn_decoder_loss=0.2448, over 29768.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1101, cr_loss=0.3503, attn_decoder_loss=0.2377, over 5782223.61 frames. ], batch size: 81, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:53:33,024 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 03:53:51,273 INFO [train.py:1230] (1/2) Epoch 45, validation: loss=0.213, ctc_loss=0.0366, cr_loss=6.956e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-20 03:53:51,273 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 03:54:00,589 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.339e+01 8.498e+01 9.089e+01 9.590e+01 3.857e+02, threshold=1.818e+02, percent-clipped=2.0 +2024-09-20 03:54:10,510 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.20 vs. limit=15.0 +2024-09-20 03:54:37,053 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:54:40,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=808520.0, ans=0.5 +2024-09-20 03:54:59,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=808560.0, ans=0.0 +2024-09-20 03:55:01,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.77 vs. limit=12.0 +2024-09-20 03:55:06,910 INFO [train.py:1198] (1/2) Epoch 45, batch 3050, loss[loss=0.2075, ctc_loss=0.08796, cr_loss=0.2978, attn_decoder_loss=0.2142, over 29531.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1106, cr_loss=0.3514, attn_decoder_loss=0.2385, over 5777600.86 frames. ], batch size: 76, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:55:33,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=808640.0, ans=0.125 +2024-09-20 03:55:42,248 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=808680.0, ans=0.05 +2024-09-20 03:55:50,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.42 vs. limit=12.0 +2024-09-20 03:56:24,620 INFO [train.py:1198] (1/2) Epoch 45, batch 3100, loss[loss=0.2508, ctc_loss=0.1277, cr_loss=0.3769, attn_decoder_loss=0.2561, over 29246.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1104, cr_loss=0.3505, attn_decoder_loss=0.2383, over 5777931.01 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:56:26,561 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:56:32,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=808800.0, ans=0.07 +2024-09-20 03:56:35,118 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.756e+01 8.685e+01 9.291e+01 9.894e+01 1.991e+02, threshold=1.858e+02, percent-clipped=1.0 +2024-09-20 03:56:43,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=808840.0, ans=0.0 +2024-09-20 03:56:44,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=808840.0, ans=0.125 +2024-09-20 03:56:51,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=808840.0, ans=0.125 +2024-09-20 03:57:10,609 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 03:57:16,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=808920.0, ans=0.025 +2024-09-20 03:57:21,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=808920.0, ans=0.125 +2024-09-20 03:57:42,013 INFO [train.py:1198] (1/2) Epoch 45, batch 3150, loss[loss=0.2515, ctc_loss=0.1263, cr_loss=0.3854, attn_decoder_loss=0.2568, over 28860.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1102, cr_loss=0.35, attn_decoder_loss=0.2382, over 5784215.17 frames. ], batch size: 104, lr: 2.44e-03, grad_scale: 8.0 +2024-09-20 03:57:46,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=809000.0, ans=0.125 +2024-09-20 03:57:57,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=809040.0, ans=0.125 +2024-09-20 03:58:10,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=809080.0, ans=0.125 +2024-09-20 03:58:12,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=809080.0, ans=0.0 +2024-09-20 03:58:21,350 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=809080.0, ans=0.0 +2024-09-20 03:58:39,653 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.24 vs. limit=15.0 +2024-09-20 03:58:52,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=809160.0, ans=0.125 +2024-09-20 03:58:56,889 INFO [train.py:1198] (1/2) Epoch 45, batch 3200, loss[loss=0.2219, ctc_loss=0.09486, cr_loss=0.3035, attn_decoder_loss=0.2293, over 29409.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1096, cr_loss=0.3494, attn_decoder_loss=0.2375, over 5793954.25 frames. ], batch size: 79, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 03:59:07,501 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.301e+01 8.632e+01 9.218e+01 9.587e+01 1.920e+02, threshold=1.844e+02, percent-clipped=1.0 +2024-09-20 03:59:10,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=809240.0, ans=0.1 +2024-09-20 03:59:28,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=809280.0, ans=0.125 +2024-09-20 03:59:32,775 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=809280.0, ans=0.0 +2024-09-20 03:59:42,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.01 vs. limit=6.0 +2024-09-20 03:59:53,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=809320.0, ans=0.125 +2024-09-20 03:59:53,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=809320.0, ans=0.05 +2024-09-20 04:00:14,530 INFO [train.py:1198] (1/2) Epoch 45, batch 3250, loss[loss=0.2445, ctc_loss=0.1182, cr_loss=0.3752, attn_decoder_loss=0.2502, over 29729.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1098, cr_loss=0.3499, attn_decoder_loss=0.238, over 5800359.47 frames. ], batch size: 84, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:00:16,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.50 vs. limit=22.5 +2024-09-20 04:00:28,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=809440.0, ans=0.2 +2024-09-20 04:00:32,172 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.99 vs. limit=15.0 +2024-09-20 04:01:16,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=809560.0, ans=0.125 +2024-09-20 04:01:22,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=809560.0, ans=0.0 +2024-09-20 04:01:23,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=809560.0, ans=0.0 +2024-09-20 04:01:32,411 INFO [train.py:1198] (1/2) Epoch 45, batch 3300, loss[loss=0.232, ctc_loss=0.1011, cr_loss=0.3186, attn_decoder_loss=0.2394, over 28205.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.3478, attn_decoder_loss=0.2367, over 5797213.36 frames. ], batch size: 111, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:01:33,533 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.25 vs. limit=15.0 +2024-09-20 04:01:37,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=809600.0, ans=0.125 +2024-09-20 04:01:42,958 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.316e+01 8.585e+01 9.187e+01 9.677e+01 1.727e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-20 04:01:55,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=809640.0, ans=0.125 +2024-09-20 04:02:14,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=809680.0, ans=0.1 +2024-09-20 04:02:17,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=809720.0, ans=0.125 +2024-09-20 04:02:26,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=809720.0, ans=0.0 +2024-09-20 04:02:32,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=809760.0, ans=0.0 +2024-09-20 04:02:40,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=809760.0, ans=0.125 +2024-09-20 04:02:41,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=809760.0, ans=0.0 +2024-09-20 04:02:47,606 INFO [train.py:1198] (1/2) Epoch 45, batch 3350, loss[loss=0.2449, ctc_loss=0.1133, cr_loss=0.3578, attn_decoder_loss=0.2516, over 28856.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1096, cr_loss=0.3491, attn_decoder_loss=0.2378, over 5773888.64 frames. ], batch size: 104, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:03:03,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=809840.0, ans=0.2 +2024-09-20 04:03:23,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=809880.0, ans=0.0 +2024-09-20 04:03:31,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=809880.0, ans=0.125 +2024-09-20 04:03:50,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=809960.0, ans=0.025 +2024-09-20 04:04:05,668 INFO [train.py:1198] (1/2) Epoch 45, batch 3400, loss[loss=0.2003, ctc_loss=0.08873, cr_loss=0.2975, attn_decoder_loss=0.2061, over 29320.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.11, cr_loss=0.3497, attn_decoder_loss=0.2378, over 5765688.48 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:04:09,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=810000.0, ans=0.05 +2024-09-20 04:04:10,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=810000.0, ans=0.2 +2024-09-20 04:04:18,541 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.607e+01 8.782e+01 9.254e+01 9.954e+01 2.335e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-20 04:05:18,961 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=810160.0, ans=0.125 +2024-09-20 04:05:23,098 INFO [train.py:1198] (1/2) Epoch 45, batch 3450, loss[loss=0.2448, ctc_loss=0.1138, cr_loss=0.3572, attn_decoder_loss=0.2514, over 28316.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1102, cr_loss=0.3501, attn_decoder_loss=0.2383, over 5773568.27 frames. ], batch size: 111, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:05:35,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=810200.0, ans=0.0 +2024-09-20 04:05:40,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=810240.0, ans=0.0 +2024-09-20 04:05:50,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=810240.0, ans=0.2 +2024-09-20 04:06:05,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=810280.0, ans=0.125 +2024-09-20 04:06:16,766 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.00 vs. limit=12.0 +2024-09-20 04:06:29,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=810360.0, ans=0.125 +2024-09-20 04:06:33,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.45 vs. limit=10.0 +2024-09-20 04:06:38,592 INFO [train.py:1198] (1/2) Epoch 45, batch 3500, loss[loss=0.2116, ctc_loss=0.09244, cr_loss=0.3081, attn_decoder_loss=0.2179, over 29358.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1101, cr_loss=0.3499, attn_decoder_loss=0.2378, over 5776147.48 frames. ], batch size: 71, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:06:41,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=810400.0, ans=0.0 +2024-09-20 04:06:48,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=810400.0, ans=0.125 +2024-09-20 04:06:49,190 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.777e+01 9.274e+01 9.867e+01 1.400e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 04:07:24,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.73 vs. limit=6.0 +2024-09-20 04:07:37,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=810520.0, ans=0.125 +2024-09-20 04:07:46,597 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=810560.0, ans=0.2 +2024-09-20 04:07:52,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=810560.0, ans=0.2 +2024-09-20 04:07:53,935 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=810600.0, ans=0.1 +2024-09-20 04:07:55,108 INFO [train.py:1198] (1/2) Epoch 45, batch 3550, loss[loss=0.2415, ctc_loss=0.1084, cr_loss=0.3534, attn_decoder_loss=0.2484, over 29698.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1097, cr_loss=0.3495, attn_decoder_loss=0.2376, over 5781977.20 frames. ], batch size: 89, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:08:21,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=810640.0, ans=0.125 +2024-09-20 04:08:35,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=810680.0, ans=0.125 +2024-09-20 04:08:39,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.40 vs. limit=22.5 +2024-09-20 04:08:48,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=810720.0, ans=0.0 +2024-09-20 04:08:49,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=810720.0, ans=0.0 +2024-09-20 04:09:05,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=810760.0, ans=0.0 +2024-09-20 04:09:10,801 INFO [train.py:1198] (1/2) Epoch 45, batch 3600, loss[loss=0.2144, ctc_loss=0.09572, cr_loss=0.3135, attn_decoder_loss=0.2206, over 29514.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1102, cr_loss=0.3506, attn_decoder_loss=0.2381, over 5790856.50 frames. ], batch size: 77, lr: 2.44e-03, grad_scale: 32.0 +2024-09-20 04:09:11,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=810800.0, ans=0.125 +2024-09-20 04:09:22,708 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.574e+01 8.599e+01 9.272e+01 9.719e+01 1.680e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 04:09:35,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=6.50 vs. limit=15.0 +2024-09-20 04:09:38,294 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.19 vs. limit=12.0 +2024-09-20 04:09:41,340 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=12.0 +2024-09-20 04:09:42,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=810880.0, ans=0.0 +2024-09-20 04:09:55,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=810920.0, ans=0.2 +2024-09-20 04:10:02,904 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=810920.0, ans=0.0 +2024-09-20 04:10:18,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.62 vs. limit=12.0 +2024-09-20 04:10:24,880 INFO [train.py:1198] (1/2) Epoch 45, batch 3650, loss[loss=0.2504, ctc_loss=0.1226, cr_loss=0.3719, attn_decoder_loss=0.2563, over 29506.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1095, cr_loss=0.3484, attn_decoder_loss=0.2375, over 5793558.77 frames. ], batch size: 90, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:10:32,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=811000.0, ans=0.125 +2024-09-20 04:10:34,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=811000.0, ans=0.2 +2024-09-20 04:10:35,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=811000.0, ans=0.125 +2024-09-20 04:10:50,514 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=811040.0, ans=0.125 +2024-09-20 04:11:05,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=811080.0, ans=0.0 +2024-09-20 04:11:19,364 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.82 vs. limit=22.5 +2024-09-20 04:11:19,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.70 vs. limit=12.0 +2024-09-20 04:11:27,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=811160.0, ans=0.125 +2024-09-20 04:11:38,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=811200.0, ans=0.125 +2024-09-20 04:11:39,703 INFO [train.py:1198] (1/2) Epoch 45, batch 3700, loss[loss=0.2309, ctc_loss=0.1026, cr_loss=0.3342, attn_decoder_loss=0.2377, over 29726.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1093, cr_loss=0.3478, attn_decoder_loss=0.2374, over 5804690.12 frames. ], batch size: 84, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:11:51,726 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.848e+01 9.366e+01 9.775e+01 1.224e+02, threshold=1.873e+02, percent-clipped=0.0 +2024-09-20 04:11:55,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=811240.0, ans=0.125 +2024-09-20 04:12:30,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=811320.0, ans=0.125 +2024-09-20 04:12:37,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=811360.0, ans=0.125 +2024-09-20 04:12:51,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=811360.0, ans=0.0 +2024-09-20 04:12:54,010 INFO [train.py:1198] (1/2) Epoch 45, batch 3750, loss[loss=0.2038, ctc_loss=0.09161, cr_loss=0.3111, attn_decoder_loss=0.2093, over 29327.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1093, cr_loss=0.3483, attn_decoder_loss=0.2373, over 5808578.46 frames. ], batch size: 67, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:13:00,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=811400.0, ans=0.0 +2024-09-20 04:13:03,523 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.01 vs. limit=15.0 +2024-09-20 04:13:38,103 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=9.84 vs. limit=22.5 +2024-09-20 04:13:52,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=811520.0, ans=0.1 +2024-09-20 04:13:52,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=811520.0, ans=0.0 +2024-09-20 04:13:53,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=811560.0, ans=0.125 +2024-09-20 04:14:09,793 INFO [train.py:1198] (1/2) Epoch 45, batch 3800, loss[loss=0.2415, ctc_loss=0.1158, cr_loss=0.362, attn_decoder_loss=0.2475, over 29613.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1089, cr_loss=0.3473, attn_decoder_loss=0.2366, over 5798594.34 frames. ], batch size: 86, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:14:21,616 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.415e+01 8.556e+01 8.957e+01 9.574e+01 2.203e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-20 04:14:24,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=811640.0, ans=0.0 +2024-09-20 04:14:47,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=811680.0, ans=0.0 +2024-09-20 04:15:21,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=811760.0, ans=0.0 +2024-09-20 04:15:24,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=811800.0, ans=0.2 +2024-09-20 04:15:25,791 INFO [train.py:1198] (1/2) Epoch 45, batch 3850, loss[loss=0.2447, ctc_loss=0.1134, cr_loss=0.3669, attn_decoder_loss=0.2512, over 29279.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1091, cr_loss=0.3475, attn_decoder_loss=0.2366, over 5813659.03 frames. ], batch size: 100, lr: 2.44e-03, grad_scale: 16.0 +2024-09-20 04:15:49,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=811840.0, ans=0.0 +2024-09-20 04:15:51,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=811840.0, ans=0.1 +2024-09-20 04:15:56,087 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.84 vs. limit=10.0 +2024-09-20 04:15:59,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=811880.0, ans=0.2 +2024-09-20 04:16:05,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=811880.0, ans=0.1 +2024-09-20 04:16:21,402 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.67 vs. limit=12.0 +2024-09-20 04:16:27,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.27 vs. limit=15.0 +2024-09-20 04:16:34,265 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=811960.0, ans=0.0 +2024-09-20 04:16:40,293 INFO [train.py:1198] (1/2) Epoch 45, batch 3900, loss[loss=0.2389, ctc_loss=0.1144, cr_loss=0.3561, attn_decoder_loss=0.2448, over 29624.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1096, cr_loss=0.3488, attn_decoder_loss=0.2372, over 5817303.41 frames. ], batch size: 86, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:16:52,118 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.943e+01 8.765e+01 9.119e+01 9.578e+01 1.365e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 04:16:55,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=812040.0, ans=0.0 +2024-09-20 04:17:01,617 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-20 04:17:10,031 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=812080.0, ans=0.125 +2024-09-20 04:17:16,059 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:17:51,427 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:17:54,016 INFO [train.py:1198] (1/2) Epoch 45, batch 3950, loss[loss=0.2513, ctc_loss=0.1261, cr_loss=0.3842, attn_decoder_loss=0.2567, over 29522.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1094, cr_loss=0.348, attn_decoder_loss=0.2372, over 5836640.05 frames. ], batch size: 97, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:17:57,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer_ff2.min_abs, batch_count=812200.0, ans=0.1 +2024-09-20 04:18:28,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=812280.0, ans=0.125 +2024-09-20 04:18:39,240 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.15 vs. limit=15.0 +2024-09-20 04:18:46,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.46 vs. limit=22.5 +2024-09-20 04:19:08,744 INFO [train.py:1198] (1/2) Epoch 45, batch 4000, loss[loss=0.2116, ctc_loss=0.08784, cr_loss=0.3023, attn_decoder_loss=0.2186, over 29549.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1091, cr_loss=0.3473, attn_decoder_loss=0.237, over 5814294.04 frames. ], batch size: 74, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:19:11,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=812400.0, ans=0.025 +2024-09-20 04:19:21,815 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.441e+01 8.441e+01 9.012e+01 9.623e+01 3.417e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 04:19:58,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=812520.0, ans=0.0 +2024-09-20 04:20:01,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=4.91 vs. limit=15.0 +2024-09-20 04:20:08,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.60 vs. limit=15.0 +2024-09-20 04:20:12,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=812560.0, ans=0.125 +2024-09-20 04:20:23,550 INFO [train.py:1198] (1/2) Epoch 45, batch 4050, loss[loss=0.2432, ctc_loss=0.1228, cr_loss=0.3335, attn_decoder_loss=0.2492, over 19657.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1085, cr_loss=0.3458, attn_decoder_loss=0.2366, over 5797519.82 frames. ], batch size: 209, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:20:44,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.47 vs. limit=22.5 +2024-09-20 04:20:51,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=812680.0, ans=0.125 +2024-09-20 04:21:22,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=812760.0, ans=0.5 +2024-09-20 04:21:23,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=812760.0, ans=0.0 +2024-09-20 04:21:24,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=812760.0, ans=0.125 +2024-09-20 04:21:29,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=812760.0, ans=0.125 +2024-09-20 04:21:33,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=812760.0, ans=0.1 +2024-09-20 04:21:36,998 INFO [train.py:1198] (1/2) Epoch 45, batch 4100, loss[loss=0.243, ctc_loss=0.122, cr_loss=0.3827, attn_decoder_loss=0.2479, over 29530.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1088, cr_loss=0.3464, attn_decoder_loss=0.2368, over 5792426.39 frames. ], batch size: 90, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:21:38,822 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=812800.0, ans=10.0 +2024-09-20 04:21:50,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=812840.0, ans=0.05 +2024-09-20 04:21:51,599 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.356e+01 8.673e+01 9.305e+01 9.853e+01 2.008e+02, threshold=1.861e+02, percent-clipped=1.0 +2024-09-20 04:22:49,251 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=813000.0, ans=0.125 +2024-09-20 04:22:50,427 INFO [train.py:1198] (1/2) Epoch 45, batch 4150, loss[loss=0.2229, ctc_loss=0.1092, cr_loss=0.3547, attn_decoder_loss=0.2277, over 29506.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1089, cr_loss=0.3467, attn_decoder_loss=0.2368, over 5797518.59 frames. ], batch size: 77, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:23:03,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=813040.0, ans=0.2 +2024-09-20 04:23:04,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=813040.0, ans=0.125 +2024-09-20 04:23:04,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.62 vs. limit=15.0 +2024-09-20 04:23:15,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=813040.0, ans=0.035 +2024-09-20 04:23:16,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=813040.0, ans=0.0 +2024-09-20 04:23:19,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=813080.0, ans=0.125 +2024-09-20 04:23:43,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=813120.0, ans=0.125 +2024-09-20 04:23:46,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=813120.0, ans=0.125 +2024-09-20 04:23:55,932 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=813160.0, ans=0.0 +2024-09-20 04:23:56,036 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=813160.0, ans=0.125 +2024-09-20 04:24:06,129 INFO [train.py:1198] (1/2) Epoch 45, batch 4200, loss[loss=0.2519, ctc_loss=0.1327, cr_loss=0.3972, attn_decoder_loss=0.2564, over 29481.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1088, cr_loss=0.3469, attn_decoder_loss=0.2369, over 5799179.47 frames. ], batch size: 90, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:24:20,924 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.773e+01 8.639e+01 8.983e+01 9.636e+01 1.465e+02, threshold=1.797e+02, percent-clipped=0.0 +2024-09-20 04:24:29,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=813240.0, ans=0.125 +2024-09-20 04:24:38,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=813280.0, ans=0.125 +2024-09-20 04:24:40,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=813280.0, ans=0.0 +2024-09-20 04:24:45,054 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.03 vs. limit=15.0 +2024-09-20 04:25:19,315 INFO [train.py:1198] (1/2) Epoch 45, batch 4250, loss[loss=0.2106, ctc_loss=0.08934, cr_loss=0.2951, attn_decoder_loss=0.2175, over 29489.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1087, cr_loss=0.3466, attn_decoder_loss=0.2372, over 5803836.92 frames. ], batch size: 74, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:25:26,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=813400.0, ans=0.0 +2024-09-20 04:25:38,309 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=813440.0, ans=0.0 +2024-09-20 04:25:43,088 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.89 vs. limit=22.5 +2024-09-20 04:25:44,090 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=813440.0, ans=0.125 +2024-09-20 04:26:09,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=813520.0, ans=0.0 +2024-09-20 04:26:13,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=813520.0, ans=0.0 +2024-09-20 04:26:15,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten.whitening_limit, batch_count=813520.0, ans=22.5 +2024-09-20 04:26:16,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=813560.0, ans=0.125 +2024-09-20 04:26:17,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=813560.0, ans=0.125 +2024-09-20 04:26:23,838 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=813560.0, ans=0.1 +2024-09-20 04:26:29,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=813560.0, ans=0.125 +2024-09-20 04:26:32,848 INFO [train.py:1198] (1/2) Epoch 45, batch 4300, loss[loss=0.2373, ctc_loss=0.1143, cr_loss=0.365, attn_decoder_loss=0.2428, over 29517.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1087, cr_loss=0.3465, attn_decoder_loss=0.2375, over 5793035.93 frames. ], batch size: 87, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:26:43,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=813600.0, ans=0.125 +2024-09-20 04:26:47,734 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.342e+01 8.880e+01 9.464e+01 1.001e+02 2.468e+02, threshold=1.893e+02, percent-clipped=1.0 +2024-09-20 04:26:59,888 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.21 vs. limit=22.5 +2024-09-20 04:27:32,450 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=813760.0, ans=0.125 +2024-09-20 04:27:33,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=813760.0, ans=0.125 +2024-09-20 04:27:36,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=813760.0, ans=0.125 +2024-09-20 04:27:38,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=813760.0, ans=15.0 +2024-09-20 04:27:43,638 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.18 vs. limit=6.0 +2024-09-20 04:27:48,499 INFO [train.py:1198] (1/2) Epoch 45, batch 4350, loss[loss=0.2502, ctc_loss=0.1201, cr_loss=0.3854, attn_decoder_loss=0.2561, over 29464.00 frames. ], tot_loss[loss=0.2349, ctc_loss=0.1115, cr_loss=0.3531, attn_decoder_loss=0.2408, over 5795885.55 frames. ], batch size: 97, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:27:50,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.39 vs. limit=22.5 +2024-09-20 04:27:59,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.53 vs. limit=15.0 +2024-09-20 04:28:14,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.78 vs. limit=15.0 +2024-09-20 04:28:15,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=813840.0, ans=0.125 +2024-09-20 04:28:16,645 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=813880.0, ans=0.1 +2024-09-20 04:29:01,439 INFO [train.py:1198] (1/2) Epoch 45, batch 4400, loss[loss=0.2364, ctc_loss=0.1197, cr_loss=0.3589, attn_decoder_loss=0.2414, over 27583.00 frames. ], tot_loss[loss=0.2368, ctc_loss=0.1127, cr_loss=0.3562, attn_decoder_loss=0.2427, over 5765878.65 frames. ], batch size: 125, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:29:04,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=814000.0, ans=0.0 +2024-09-20 04:29:10,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=814000.0, ans=0.0 +2024-09-20 04:29:15,734 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.358e+01 9.080e+01 9.421e+01 9.945e+01 1.972e+02, threshold=1.884e+02, percent-clipped=1.0 +2024-09-20 04:29:15,969 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=814040.0, ans=0.0 +2024-09-20 04:29:18,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=814040.0, ans=0.0 +2024-09-20 04:29:26,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=814040.0, ans=0.1 +2024-09-20 04:29:29,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=814080.0, ans=0.1 +2024-09-20 04:29:51,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.67 vs. limit=15.0 +2024-09-20 04:29:52,071 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=814120.0, ans=0.125 +2024-09-20 04:30:02,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=814160.0, ans=0.125 +2024-09-20 04:30:05,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=814160.0, ans=0.1 +2024-09-20 04:30:09,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=12.17 vs. limit=15.0 +2024-09-20 04:30:15,977 INFO [train.py:1198] (1/2) Epoch 45, batch 4450, loss[loss=0.2487, ctc_loss=0.1236, cr_loss=0.3703, attn_decoder_loss=0.2544, over 20339.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1158, cr_loss=0.361, attn_decoder_loss=0.2444, over 5570164.97 frames. ], batch size: 209, lr: 2.43e-03, grad_scale: 16.0 +2024-09-20 04:30:47,208 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.75 vs. limit=5.0 +2024-09-20 04:30:52,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=814280.0, ans=0.1 +2024-09-20 04:31:01,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=814320.0, ans=0.1 +2024-09-20 04:31:06,096 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=814320.0, ans=0.025 +2024-09-20 04:31:06,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=15.15 vs. limit=22.5 +2024-09-20 04:31:14,150 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:31:16,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=814360.0, ans=0.125 +2024-09-20 04:31:31,826 INFO [train.py:1198] (1/2) Epoch 45, batch 4500, loss[loss=0.2507, ctc_loss=0.1323, cr_loss=0.3683, attn_decoder_loss=0.2556, over 20716.00 frames. ], tot_loss[loss=0.2407, ctc_loss=0.1187, cr_loss=0.3626, attn_decoder_loss=0.2462, over 5231720.39 frames. ], batch size: 209, lr: 2.43e-03, grad_scale: 8.0 +2024-09-20 04:31:32,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=814400.0, ans=0.025 +2024-09-20 04:31:48,018 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.758e+01 1.032e+02 1.137e+02 1.254e+02 4.078e+02, threshold=2.275e+02, percent-clipped=1.0 +2024-09-20 04:31:49,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=814440.0, ans=0.2 +2024-09-20 04:32:39,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.76 vs. limit=15.0 +2024-09-20 04:32:47,519 INFO [train.py:1198] (1/2) Epoch 46, batch 0, loss[loss=0.2161, ctc_loss=0.1005, cr_loss=0.3288, attn_decoder_loss=0.2216, over 29601.00 frames. ], tot_loss[loss=0.2161, ctc_loss=0.1005, cr_loss=0.3288, attn_decoder_loss=0.2216, over 29601.00 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:32:47,520 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 04:33:04,847 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.2.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([5.1636, 4.9640, 4.7024, 4.4625], device='cuda:1') +2024-09-20 04:33:07,327 INFO [train.py:1230] (1/2) Epoch 46, validation: loss=0.2132, ctc_loss=0.03625, cr_loss=6.411e-15, attn_decoder_loss=0.2328, over 944034.00 frames. +2024-09-20 04:33:07,328 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 04:33:09,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=814500.0, ans=0.0 +2024-09-20 04:33:11,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=814500.0, ans=0.125 +2024-09-20 04:33:16,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=814500.0, ans=0.125 +2024-09-20 04:33:18,051 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=814500.0, ans=0.2 +2024-09-20 04:33:23,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.12 vs. limit=10.0 +2024-09-20 04:33:27,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=814540.0, ans=0.2 +2024-09-20 04:33:27,713 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.98 vs. limit=15.0 +2024-09-20 04:34:11,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.68 vs. limit=22.5 +2024-09-20 04:34:24,683 INFO [train.py:1198] (1/2) Epoch 46, batch 50, loss[loss=0.2014, ctc_loss=0.08442, cr_loss=0.2861, attn_decoder_loss=0.2081, over 29419.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1127, cr_loss=0.3563, attn_decoder_loss=0.2395, over 1269160.28 frames. ], batch size: 70, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:34:54,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.16 vs. limit=15.0 +2024-09-20 04:35:19,989 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.869e+01 8.768e+01 9.324e+01 1.041e+02 2.439e+02, threshold=1.865e+02, percent-clipped=1.0 +2024-09-20 04:35:23,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=814820.0, ans=0.0 +2024-09-20 04:35:29,961 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.11 vs. limit=10.0 +2024-09-20 04:35:41,089 INFO [train.py:1198] (1/2) Epoch 46, batch 100, loss[loss=0.2163, ctc_loss=0.09819, cr_loss=0.3176, attn_decoder_loss=0.2223, over 29529.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1114, cr_loss=0.3532, attn_decoder_loss=0.2396, over 2254008.49 frames. ], batch size: 76, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:35:51,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=814900.0, ans=0.125 +2024-09-20 04:35:53,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=814900.0, ans=0.125 +2024-09-20 04:35:56,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=768, metric=8.28 vs. limit=15.0 +2024-09-20 04:36:23,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=814980.0, ans=0.125 +2024-09-20 04:36:25,445 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=8.10 vs. limit=15.0 +2024-09-20 04:36:26,089 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=815020.0, ans=0.1 +2024-09-20 04:36:35,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=815020.0, ans=0.125 +2024-09-20 04:36:46,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=815060.0, ans=0.2 +2024-09-20 04:36:55,409 INFO [train.py:1198] (1/2) Epoch 46, batch 150, loss[loss=0.2053, ctc_loss=0.08858, cr_loss=0.302, attn_decoder_loss=0.2115, over 29429.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3479, attn_decoder_loss=0.2375, over 3048342.35 frames. ], batch size: 70, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:37:12,243 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff2_skip_rate, batch_count=815140.0, ans=0.0 +2024-09-20 04:37:25,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=815180.0, ans=0.1 +2024-09-20 04:37:27,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=815180.0, ans=0.0 +2024-09-20 04:37:39,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=815220.0, ans=0.125 +2024-09-20 04:37:51,965 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.524e+01 8.420e+01 9.019e+01 9.584e+01 1.300e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 04:38:04,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=815260.0, ans=0.1 +2024-09-20 04:38:12,897 INFO [train.py:1198] (1/2) Epoch 46, batch 200, loss[loss=0.241, ctc_loss=0.1176, cr_loss=0.3505, attn_decoder_loss=0.247, over 27289.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1088, cr_loss=0.3479, attn_decoder_loss=0.2366, over 3659917.42 frames. ], batch size: 124, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:38:13,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=815300.0, ans=0.125 +2024-09-20 04:38:28,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=815340.0, ans=0.125 +2024-09-20 04:38:28,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=815340.0, ans=0.0 +2024-09-20 04:38:56,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=815420.0, ans=0.125 +2024-09-20 04:39:23,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=815460.0, ans=0.0 +2024-09-20 04:39:23,720 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=7.05 vs. limit=10.0 +2024-09-20 04:39:27,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=815460.0, ans=0.0 +2024-09-20 04:39:30,378 INFO [train.py:1198] (1/2) Epoch 46, batch 250, loss[loss=0.2379, ctc_loss=0.1102, cr_loss=0.335, attn_decoder_loss=0.2447, over 29226.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1085, cr_loss=0.3469, attn_decoder_loss=0.2365, over 4141535.99 frames. ], batch size: 100, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:39:32,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=815500.0, ans=0.1 +2024-09-20 04:40:03,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=815580.0, ans=0.1 +2024-09-20 04:40:18,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=815620.0, ans=0.125 +2024-09-20 04:40:24,348 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.615e+01 9.020e+01 9.569e+01 1.385e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 04:40:38,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten.whitening_limit, batch_count=815660.0, ans=15.0 +2024-09-20 04:40:45,341 INFO [train.py:1198] (1/2) Epoch 46, batch 300, loss[loss=0.2465, ctc_loss=0.1241, cr_loss=0.3912, attn_decoder_loss=0.2514, over 29547.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1086, cr_loss=0.3462, attn_decoder_loss=0.2364, over 4509769.39 frames. ], batch size: 92, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:40:47,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=815700.0, ans=0.125 +2024-09-20 04:41:37,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.42 vs. limit=15.0 +2024-09-20 04:41:51,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=815860.0, ans=0.125 +2024-09-20 04:42:02,779 INFO [train.py:1198] (1/2) Epoch 46, batch 350, loss[loss=0.2173, ctc_loss=0.09855, cr_loss=0.3242, attn_decoder_loss=0.2233, over 29305.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1089, cr_loss=0.3472, attn_decoder_loss=0.2369, over 4795731.40 frames. ], batch size: 71, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:42:08,214 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.64 vs. limit=15.0 +2024-09-20 04:42:24,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.60 vs. limit=15.0 +2024-09-20 04:42:31,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=815980.0, ans=0.0 +2024-09-20 04:42:52,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=815980.0, ans=0.0 +2024-09-20 04:42:52,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=815980.0, ans=0.125 +2024-09-20 04:42:52,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=815980.0, ans=0.025 +2024-09-20 04:42:55,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=816020.0, ans=0.125 +2024-09-20 04:43:04,445 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.235e+01 8.751e+01 9.027e+01 9.740e+01 2.091e+02, threshold=1.805e+02, percent-clipped=1.0 +2024-09-20 04:43:07,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=816020.0, ans=0.1 +2024-09-20 04:43:10,877 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:43:19,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=816060.0, ans=0.0 +2024-09-20 04:43:27,886 INFO [train.py:1198] (1/2) Epoch 46, batch 400, loss[loss=0.2385, ctc_loss=0.1184, cr_loss=0.3815, attn_decoder_loss=0.2434, over 29688.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3474, attn_decoder_loss=0.2369, over 5023996.44 frames. ], batch size: 82, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 04:43:34,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=816100.0, ans=0.0 +2024-09-20 04:43:43,835 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.35 vs. limit=15.0 +2024-09-20 04:43:48,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=816140.0, ans=0.125 +2024-09-20 04:43:53,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=816140.0, ans=0.125 +2024-09-20 04:44:26,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=816220.0, ans=0.125 +2024-09-20 04:44:43,883 INFO [train.py:1198] (1/2) Epoch 46, batch 450, loss[loss=0.2396, ctc_loss=0.1138, cr_loss=0.3683, attn_decoder_loss=0.2454, over 29690.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1092, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5186139.49 frames. ], batch size: 83, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:44:53,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=816300.0, ans=0.125 +2024-09-20 04:44:58,397 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.98 vs. limit=10.0 +2024-09-20 04:45:04,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=816340.0, ans=0.125 +2024-09-20 04:45:09,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=816340.0, ans=0.125 +2024-09-20 04:45:14,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=816380.0, ans=0.0 +2024-09-20 04:45:17,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=816380.0, ans=0.07 +2024-09-20 04:45:22,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=816380.0, ans=0.07 +2024-09-20 04:45:40,121 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.987e+01 8.630e+01 9.037e+01 9.631e+01 6.120e+02, threshold=1.807e+02, percent-clipped=1.0 +2024-09-20 04:45:42,017 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=816420.0, ans=0.0 +2024-09-20 04:46:02,237 INFO [train.py:1198] (1/2) Epoch 46, batch 500, loss[loss=0.244, ctc_loss=0.1253, cr_loss=0.3631, attn_decoder_loss=0.2491, over 29457.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.109, cr_loss=0.3483, attn_decoder_loss=0.2369, over 5329559.63 frames. ], batch size: 94, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:46:02,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-20 04:46:23,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=816540.0, ans=0.125 +2024-09-20 04:46:27,110 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.40 vs. limit=15.0 +2024-09-20 04:46:29,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=816540.0, ans=0.0 +2024-09-20 04:46:47,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=816620.0, ans=0.1 +2024-09-20 04:47:01,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=816660.0, ans=0.0 +2024-09-20 04:47:09,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=816660.0, ans=0.125 +2024-09-20 04:47:20,170 INFO [train.py:1198] (1/2) Epoch 46, batch 550, loss[loss=0.2283, ctc_loss=0.107, cr_loss=0.3436, attn_decoder_loss=0.2341, over 28886.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3475, attn_decoder_loss=0.2369, over 5422105.27 frames. ], batch size: 104, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:47:30,032 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.33 vs. limit=22.5 +2024-09-20 04:47:30,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=816700.0, ans=0.035 +2024-09-20 04:47:38,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=816740.0, ans=0.125 +2024-09-20 04:47:41,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=816740.0, ans=0.125 +2024-09-20 04:47:45,007 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.75 vs. limit=12.0 +2024-09-20 04:47:45,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.42 vs. limit=10.0 +2024-09-20 04:47:47,652 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=816740.0, ans=0.125 +2024-09-20 04:47:47,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=816740.0, ans=0.125 +2024-09-20 04:47:52,807 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.78 vs. limit=15.0 +2024-09-20 04:48:16,367 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.351e+01 8.519e+01 9.115e+01 9.608e+01 2.263e+02, threshold=1.823e+02, percent-clipped=2.0 +2024-09-20 04:48:24,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=816860.0, ans=0.125 +2024-09-20 04:48:36,091 INFO [train.py:1198] (1/2) Epoch 46, batch 600, loss[loss=0.245, ctc_loss=0.1191, cr_loss=0.3773, attn_decoder_loss=0.2506, over 29279.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.109, cr_loss=0.3479, attn_decoder_loss=0.2371, over 5508219.81 frames. ], batch size: 100, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:48:49,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=816940.0, ans=0.1 +2024-09-20 04:48:51,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=816940.0, ans=0.125 +2024-09-20 04:49:22,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=817020.0, ans=0.125 +2024-09-20 04:49:29,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=817020.0, ans=0.125 +2024-09-20 04:49:53,428 INFO [train.py:1198] (1/2) Epoch 46, batch 650, loss[loss=0.2314, ctc_loss=0.1095, cr_loss=0.349, attn_decoder_loss=0.2372, over 29775.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1083, cr_loss=0.347, attn_decoder_loss=0.2368, over 5585476.91 frames. ], batch size: 81, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:50:09,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=817140.0, ans=10.0 +2024-09-20 04:50:33,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=817180.0, ans=0.1 +2024-09-20 04:50:39,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=817220.0, ans=0.025 +2024-09-20 04:50:49,215 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.561e+01 8.319e+01 8.831e+01 9.492e+01 1.301e+02, threshold=1.766e+02, percent-clipped=0.0 +2024-09-20 04:50:51,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=817220.0, ans=0.125 +2024-09-20 04:51:08,830 INFO [train.py:1198] (1/2) Epoch 46, batch 700, loss[loss=0.2203, ctc_loss=0.1024, cr_loss=0.3454, attn_decoder_loss=0.2258, over 29536.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1087, cr_loss=0.3478, attn_decoder_loss=0.237, over 5636140.56 frames. ], batch size: 76, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:51:09,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=817300.0, ans=0.125 +2024-09-20 04:51:12,747 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.37 vs. limit=10.0 +2024-09-20 04:51:22,541 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=12.0 +2024-09-20 04:51:23,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=817300.0, ans=0.125 +2024-09-20 04:51:41,957 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:51:43,890 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-20 04:52:00,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=817420.0, ans=0.0 +2024-09-20 04:52:06,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=817420.0, ans=0.1 +2024-09-20 04:52:27,350 INFO [train.py:1198] (1/2) Epoch 46, batch 750, loss[loss=0.2331, ctc_loss=0.1053, cr_loss=0.3272, attn_decoder_loss=0.24, over 29730.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.109, cr_loss=0.3485, attn_decoder_loss=0.237, over 5675535.47 frames. ], batch size: 82, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:52:27,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=817500.0, ans=0.2 +2024-09-20 04:52:29,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=817500.0, ans=0.07 +2024-09-20 04:52:30,089 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=9.81 vs. limit=15.0 +2024-09-20 04:52:30,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=817500.0, ans=0.0 +2024-09-20 04:52:39,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=817500.0, ans=0.0 +2024-09-20 04:52:41,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.min_positive, batch_count=817540.0, ans=0.05 +2024-09-20 04:53:02,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=817580.0, ans=0.125 +2024-09-20 04:53:07,101 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=817580.0, ans=0.0 +2024-09-20 04:53:08,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=817580.0, ans=0.125 +2024-09-20 04:53:08,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=817580.0, ans=0.0 +2024-09-20 04:53:23,358 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.270e+01 8.548e+01 9.089e+01 9.698e+01 1.282e+02, threshold=1.818e+02, percent-clipped=0.0 +2024-09-20 04:53:44,978 INFO [train.py:1198] (1/2) Epoch 46, batch 800, loss[loss=0.2116, ctc_loss=0.097, cr_loss=0.3215, attn_decoder_loss=0.2172, over 29628.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1091, cr_loss=0.3487, attn_decoder_loss=0.2369, over 5707331.97 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 04:54:14,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=817780.0, ans=0.125 +2024-09-20 04:54:28,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.23 vs. limit=22.5 +2024-09-20 04:54:56,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=19.68 vs. limit=22.5 +2024-09-20 04:55:00,148 INFO [train.py:1198] (1/2) Epoch 46, batch 850, loss[loss=0.2461, ctc_loss=0.1155, cr_loss=0.3619, attn_decoder_loss=0.2526, over 29722.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1086, cr_loss=0.3473, attn_decoder_loss=0.2364, over 5735909.59 frames. ], batch size: 89, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:55:10,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=817900.0, ans=0.0 +2024-09-20 04:55:27,473 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.85 vs. limit=22.5 +2024-09-20 04:55:54,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=818020.0, ans=0.125 +2024-09-20 04:55:59,848 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.524e+01 9.066e+01 9.505e+01 2.667e+02, threshold=1.813e+02, percent-clipped=1.0 +2024-09-20 04:56:09,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=818060.0, ans=0.125 +2024-09-20 04:56:10,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=818060.0, ans=0.0 +2024-09-20 04:56:18,041 INFO [train.py:1198] (1/2) Epoch 46, batch 900, loss[loss=0.2057, ctc_loss=0.08618, cr_loss=0.2967, attn_decoder_loss=0.2124, over 29623.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1085, cr_loss=0.3471, attn_decoder_loss=0.2366, over 5740907.02 frames. ], batch size: 73, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:56:30,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=818100.0, ans=0.0 +2024-09-20 04:56:43,509 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=818140.0, ans=0.1 +2024-09-20 04:56:45,418 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.29 vs. limit=10.0 +2024-09-20 04:57:03,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=10.23 vs. limit=15.0 +2024-09-20 04:57:12,116 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=818220.0, ans=0.125 +2024-09-20 04:57:15,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=818220.0, ans=0.0 +2024-09-20 04:57:16,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=818260.0, ans=0.125 +2024-09-20 04:57:17,509 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.99 vs. limit=6.0 +2024-09-20 04:57:24,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=818260.0, ans=0.0 +2024-09-20 04:57:34,898 INFO [train.py:1198] (1/2) Epoch 46, batch 950, loss[loss=0.2178, ctc_loss=0.0954, cr_loss=0.3184, attn_decoder_loss=0.2244, over 29485.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1083, cr_loss=0.3465, attn_decoder_loss=0.2366, over 5741792.99 frames. ], batch size: 74, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:57:50,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.65 vs. limit=15.0 +2024-09-20 04:57:56,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=818340.0, ans=0.125 +2024-09-20 04:58:01,507 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.10 vs. limit=15.0 +2024-09-20 04:58:06,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=818380.0, ans=0.125 +2024-09-20 04:58:32,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.692e+01 9.271e+01 9.926e+01 1.686e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 04:58:50,241 INFO [train.py:1198] (1/2) Epoch 46, batch 1000, loss[loss=0.2193, ctc_loss=0.102, cr_loss=0.3419, attn_decoder_loss=0.2247, over 29496.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1091, cr_loss=0.3483, attn_decoder_loss=0.2373, over 5736506.81 frames. ], batch size: 77, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 04:58:55,866 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.50 vs. limit=10.0 +2024-09-20 04:59:06,346 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.92 vs. limit=22.5 +2024-09-20 04:59:15,620 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 04:59:21,937 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.57 vs. limit=15.0 +2024-09-20 05:00:06,009 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=14.53 vs. limit=15.0 +2024-09-20 05:00:07,746 INFO [train.py:1198] (1/2) Epoch 46, batch 1050, loss[loss=0.2365, ctc_loss=0.1124, cr_loss=0.335, attn_decoder_loss=0.2429, over 29703.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1085, cr_loss=0.3466, attn_decoder_loss=0.2364, over 5744377.20 frames. ], batch size: 85, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:00:13,386 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=7.08 vs. limit=8.0 +2024-09-20 05:00:13,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=818700.0, ans=0.07 +2024-09-20 05:00:22,082 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.21 vs. limit=6.0 +2024-09-20 05:00:32,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=818740.0, ans=0.0 +2024-09-20 05:00:32,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.22 vs. limit=15.0 +2024-09-20 05:00:52,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=818820.0, ans=0.1 +2024-09-20 05:01:05,426 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.267e+01 8.722e+01 9.094e+01 9.715e+01 1.593e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 05:01:24,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=818900.0, ans=0.125 +2024-09-20 05:01:25,793 INFO [train.py:1198] (1/2) Epoch 46, batch 1100, loss[loss=0.2304, ctc_loss=0.11, cr_loss=0.3627, attn_decoder_loss=0.2358, over 29458.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1082, cr_loss=0.346, attn_decoder_loss=0.2363, over 5756394.92 frames. ], batch size: 78, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:01:30,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=818900.0, ans=0.07 +2024-09-20 05:01:30,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=818900.0, ans=0.125 +2024-09-20 05:01:41,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=818940.0, ans=0.2 +2024-09-20 05:01:45,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=818940.0, ans=0.025 +2024-09-20 05:01:59,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=818980.0, ans=0.125 +2024-09-20 05:02:05,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=818980.0, ans=0.0 +2024-09-20 05:02:41,365 INFO [train.py:1198] (1/2) Epoch 46, batch 1150, loss[loss=0.2314, ctc_loss=0.1124, cr_loss=0.3582, attn_decoder_loss=0.2366, over 29453.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1087, cr_loss=0.3469, attn_decoder_loss=0.2366, over 5754651.21 frames. ], batch size: 78, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:02:48,578 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.05 vs. limit=15.0 +2024-09-20 05:03:07,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=819140.0, ans=0.125 +2024-09-20 05:03:11,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=819140.0, ans=0.125 +2024-09-20 05:03:14,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=819180.0, ans=0.2 +2024-09-20 05:03:41,722 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.318e+01 8.675e+01 9.165e+01 9.732e+01 5.471e+02, threshold=1.833e+02, percent-clipped=2.0 +2024-09-20 05:03:42,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.13 vs. limit=15.0 +2024-09-20 05:03:58,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=819300.0, ans=0.125 +2024-09-20 05:03:59,530 INFO [train.py:1198] (1/2) Epoch 46, batch 1200, loss[loss=0.2382, ctc_loss=0.1069, cr_loss=0.3394, attn_decoder_loss=0.2453, over 29682.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1091, cr_loss=0.3479, attn_decoder_loss=0.2372, over 5746404.03 frames. ], batch size: 85, lr: 2.40e-03, grad_scale: 32.0 +2024-09-20 05:04:00,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=13.18 vs. limit=15.0 +2024-09-20 05:04:13,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=819340.0, ans=0.125 +2024-09-20 05:04:16,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.max_positive, batch_count=819340.0, ans=0.95 +2024-09-20 05:04:19,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=819340.0, ans=0.0 +2024-09-20 05:04:20,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.76 vs. limit=6.0 +2024-09-20 05:04:33,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=819380.0, ans=0.125 +2024-09-20 05:04:43,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=819420.0, ans=0.0 +2024-09-20 05:04:47,629 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.61 vs. limit=15.0 +2024-09-20 05:04:50,681 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.10 vs. limit=15.0 +2024-09-20 05:05:17,179 INFO [train.py:1198] (1/2) Epoch 46, batch 1250, loss[loss=0.2514, ctc_loss=0.124, cr_loss=0.377, attn_decoder_loss=0.2571, over 29507.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1097, cr_loss=0.3497, attn_decoder_loss=0.2379, over 5774475.54 frames. ], batch size: 92, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:05:23,502 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=819500.0, ans=0.0 +2024-09-20 05:05:56,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=819580.0, ans=0.125 +2024-09-20 05:06:14,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=819620.0, ans=0.1 +2024-09-20 05:06:16,051 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.294e+01 8.479e+01 9.052e+01 9.530e+01 1.493e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 05:06:22,996 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.65 vs. limit=15.0 +2024-09-20 05:06:25,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=819660.0, ans=0.125 +2024-09-20 05:06:32,584 INFO [train.py:1198] (1/2) Epoch 46, batch 1300, loss[loss=0.2267, ctc_loss=0.09715, cr_loss=0.3189, attn_decoder_loss=0.234, over 28196.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1088, cr_loss=0.3474, attn_decoder_loss=0.2369, over 5778826.55 frames. ], batch size: 111, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:06:35,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=819700.0, ans=0.025 +2024-09-20 05:06:53,478 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.38 vs. limit=10.0 +2024-09-20 05:06:55,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=819740.0, ans=0.95 +2024-09-20 05:07:01,595 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=819780.0, ans=0.0 +2024-09-20 05:07:15,724 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=819780.0, ans=0.0 +2024-09-20 05:07:24,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=819820.0, ans=0.0 +2024-09-20 05:07:43,466 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.out_combiner.scale_min, batch_count=819860.0, ans=0.2 +2024-09-20 05:07:50,557 INFO [train.py:1198] (1/2) Epoch 46, batch 1350, loss[loss=0.2375, ctc_loss=0.1108, cr_loss=0.3656, attn_decoder_loss=0.2435, over 29778.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3469, attn_decoder_loss=0.2368, over 5795626.95 frames. ], batch size: 81, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:07:51,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.57 vs. limit=15.0 +2024-09-20 05:08:15,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=819940.0, ans=15.0 +2024-09-20 05:08:28,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=819980.0, ans=0.125 +2024-09-20 05:08:31,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=819980.0, ans=0.125 +2024-09-20 05:08:48,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=820020.0, ans=0.0 +2024-09-20 05:08:49,308 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.548e+01 8.548e+01 9.009e+01 9.440e+01 1.283e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 05:09:08,072 INFO [train.py:1198] (1/2) Epoch 46, batch 1400, loss[loss=0.2074, ctc_loss=0.09375, cr_loss=0.3075, attn_decoder_loss=0.2132, over 29544.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1084, cr_loss=0.3468, attn_decoder_loss=0.2367, over 5806754.39 frames. ], batch size: 69, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:09:15,153 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-20 05:09:28,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=14.18 vs. limit=15.0 +2024-09-20 05:09:33,812 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=820140.0, ans=0.2 +2024-09-20 05:09:33,889 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=820140.0, ans=0.1 +2024-09-20 05:09:35,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=820140.0, ans=0.125 +2024-09-20 05:10:06,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=820260.0, ans=0.125 +2024-09-20 05:10:18,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.prob, batch_count=820260.0, ans=0.125 +2024-09-20 05:10:21,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=820300.0, ans=0.025 +2024-09-20 05:10:23,186 INFO [train.py:1198] (1/2) Epoch 46, batch 1450, loss[loss=0.2451, ctc_loss=0.115, cr_loss=0.3553, attn_decoder_loss=0.2516, over 29449.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1084, cr_loss=0.3465, attn_decoder_loss=0.2371, over 5802897.95 frames. ], batch size: 94, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:10:50,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=820340.0, ans=0.0 +2024-09-20 05:11:20,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.52 vs. limit=15.0 +2024-09-20 05:11:23,834 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.745e+01 8.716e+01 9.154e+01 9.658e+01 1.732e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 05:11:25,802 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:11:28,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=820460.0, ans=0.125 +2024-09-20 05:11:40,540 INFO [train.py:1198] (1/2) Epoch 46, batch 1500, loss[loss=0.2325, ctc_loss=0.1079, cr_loss=0.3338, attn_decoder_loss=0.2389, over 29620.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1086, cr_loss=0.3467, attn_decoder_loss=0.2374, over 5803825.29 frames. ], batch size: 86, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:11:42,415 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=820500.0, ans=0.0 +2024-09-20 05:12:05,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=820540.0, ans=0.025 +2024-09-20 05:12:18,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.72 vs. limit=15.0 +2024-09-20 05:12:37,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=820620.0, ans=0.125 +2024-09-20 05:12:40,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=820660.0, ans=0.125 +2024-09-20 05:12:43,596 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.15 vs. limit=15.0 +2024-09-20 05:12:44,696 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=820660.0, ans=0.125 +2024-09-20 05:12:46,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-20 05:12:52,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=820660.0, ans=0.0 +2024-09-20 05:12:58,451 INFO [train.py:1198] (1/2) Epoch 46, batch 1550, loss[loss=0.2416, ctc_loss=0.1183, cr_loss=0.36, attn_decoder_loss=0.2473, over 29509.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1089, cr_loss=0.3475, attn_decoder_loss=0.2373, over 5780363.02 frames. ], batch size: 90, lr: 2.40e-03, grad_scale: 16.0 +2024-09-20 05:13:08,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.76 vs. limit=15.0 +2024-09-20 05:13:24,851 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.84 vs. limit=22.5 +2024-09-20 05:13:26,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.50 vs. limit=10.0 +2024-09-20 05:13:31,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=820780.0, ans=0.125 +2024-09-20 05:13:57,689 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.852e+01 8.666e+01 9.165e+01 9.955e+01 1.733e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-20 05:14:00,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=820860.0, ans=0.125 +2024-09-20 05:14:14,157 INFO [train.py:1198] (1/2) Epoch 46, batch 1600, loss[loss=0.2385, ctc_loss=0.1112, cr_loss=0.3567, attn_decoder_loss=0.2447, over 29683.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1092, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5764263.93 frames. ], batch size: 85, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:14:18,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=820900.0, ans=0.2 +2024-09-20 05:15:06,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=821020.0, ans=0.0 +2024-09-20 05:15:31,300 INFO [train.py:1198] (1/2) Epoch 46, batch 1650, loss[loss=0.24, ctc_loss=0.106, cr_loss=0.3396, attn_decoder_loss=0.2473, over 29690.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3471, attn_decoder_loss=0.2371, over 5758899.23 frames. ], batch size: 89, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:15:43,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=821100.0, ans=0.1 +2024-09-20 05:16:09,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=821180.0, ans=0.0 +2024-09-20 05:16:31,207 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.361e+01 8.592e+01 9.131e+01 9.784e+01 1.419e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 05:16:48,215 INFO [train.py:1198] (1/2) Epoch 46, batch 1700, loss[loss=0.2094, ctc_loss=0.09766, cr_loss=0.3283, attn_decoder_loss=0.2145, over 29584.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.3467, attn_decoder_loss=0.2369, over 5780513.89 frames. ], batch size: 69, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:16:53,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=821300.0, ans=0.04949747468305833 +2024-09-20 05:16:59,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=821300.0, ans=0.125 +2024-09-20 05:16:59,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=821300.0, ans=0.07 +2024-09-20 05:17:06,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=821340.0, ans=0.125 +2024-09-20 05:17:09,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=821340.0, ans=0.125 +2024-09-20 05:17:15,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=821340.0, ans=0.125 +2024-09-20 05:17:26,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.min_positive, batch_count=821380.0, ans=0.05 +2024-09-20 05:17:29,147 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=821380.0, ans=0.025 +2024-09-20 05:17:32,052 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=821420.0, ans=0.125 +2024-09-20 05:17:37,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=821420.0, ans=0.125 +2024-09-20 05:17:44,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=821420.0, ans=0.0 +2024-09-20 05:17:44,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=821420.0, ans=0.125 +2024-09-20 05:17:57,917 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.08 vs. limit=12.0 +2024-09-20 05:17:59,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=821460.0, ans=0.1 +2024-09-20 05:18:03,232 INFO [train.py:1198] (1/2) Epoch 46, batch 1750, loss[loss=0.2113, ctc_loss=0.09574, cr_loss=0.3238, attn_decoder_loss=0.2169, over 29330.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3473, attn_decoder_loss=0.2368, over 5789510.64 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:18:05,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=821500.0, ans=0.125 +2024-09-20 05:18:43,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.58 vs. limit=15.0 +2024-09-20 05:18:50,778 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:19:06,023 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.095e+01 8.682e+01 9.175e+01 9.617e+01 1.208e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 05:19:12,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=821660.0, ans=0.0 +2024-09-20 05:19:20,746 INFO [train.py:1198] (1/2) Epoch 46, batch 1800, loss[loss=0.2538, ctc_loss=0.1264, cr_loss=0.3891, attn_decoder_loss=0.2593, over 29682.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1088, cr_loss=0.3479, attn_decoder_loss=0.2373, over 5792174.65 frames. ], batch size: 83, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:19:23,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=821700.0, ans=0.125 +2024-09-20 05:19:34,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=821740.0, ans=0.0 +2024-09-20 05:19:47,583 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.96 vs. limit=15.0 +2024-09-20 05:19:47,644 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.27 vs. limit=15.0 +2024-09-20 05:19:59,602 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.34 vs. limit=22.5 +2024-09-20 05:20:01,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=821780.0, ans=0.125 +2024-09-20 05:20:22,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=821860.0, ans=0.025 +2024-09-20 05:20:38,148 INFO [train.py:1198] (1/2) Epoch 46, batch 1850, loss[loss=0.244, ctc_loss=0.1118, cr_loss=0.3585, attn_decoder_loss=0.2507, over 29626.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1087, cr_loss=0.3477, attn_decoder_loss=0.2371, over 5798251.83 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:20:38,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=821900.0, ans=0.125 +2024-09-20 05:20:47,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten.whitening_limit, batch_count=821900.0, ans=15.0 +2024-09-20 05:20:51,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=821940.0, ans=0.0 +2024-09-20 05:20:54,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=821940.0, ans=0.125 +2024-09-20 05:20:56,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=821940.0, ans=0.125 +2024-09-20 05:21:07,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.71 vs. limit=15.0 +2024-09-20 05:21:38,308 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.515e+01 9.175e+01 9.634e+01 2.306e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 05:21:53,036 INFO [train.py:1198] (1/2) Epoch 46, batch 1900, loss[loss=0.2347, ctc_loss=0.1027, cr_loss=0.322, attn_decoder_loss=0.2422, over 29716.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3478, attn_decoder_loss=0.2376, over 5805528.77 frames. ], batch size: 89, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:21:59,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=822100.0, ans=0.0 +2024-09-20 05:22:19,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=822140.0, ans=0.0 +2024-09-20 05:22:27,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.78 vs. limit=6.0 +2024-09-20 05:22:51,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=822220.0, ans=0.0 +2024-09-20 05:22:57,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=822260.0, ans=0.0 +2024-09-20 05:22:59,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=822260.0, ans=0.0 +2024-09-20 05:23:05,564 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.27 vs. limit=22.5 +2024-09-20 05:23:10,795 INFO [train.py:1198] (1/2) Epoch 46, batch 1950, loss[loss=0.2303, ctc_loss=0.1082, cr_loss=0.3431, attn_decoder_loss=0.2363, over 29489.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1093, cr_loss=0.3491, attn_decoder_loss=0.2383, over 5819703.07 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:23:20,734 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.93 vs. limit=15.0 +2024-09-20 05:23:29,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=822340.0, ans=0.0 +2024-09-20 05:23:32,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=822340.0, ans=0.1 +2024-09-20 05:23:33,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=822340.0, ans=0.1 +2024-09-20 05:23:50,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=822380.0, ans=0.125 +2024-09-20 05:23:53,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=822380.0, ans=0.125 +2024-09-20 05:24:06,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=822420.0, ans=0.0 +2024-09-20 05:24:09,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=822460.0, ans=0.125 +2024-09-20 05:24:10,972 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.887e+01 8.689e+01 9.269e+01 9.722e+01 1.487e+02, threshold=1.854e+02, percent-clipped=0.0 +2024-09-20 05:24:16,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=822460.0, ans=0.1 +2024-09-20 05:24:18,637 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.21 vs. limit=6.0 +2024-09-20 05:24:22,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=822460.0, ans=0.2 +2024-09-20 05:24:24,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=822460.0, ans=0.0 +2024-09-20 05:24:24,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=822460.0, ans=0.125 +2024-09-20 05:24:28,129 INFO [train.py:1198] (1/2) Epoch 46, batch 2000, loss[loss=0.2158, ctc_loss=0.108, cr_loss=0.3495, attn_decoder_loss=0.22, over 29343.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1096, cr_loss=0.3499, attn_decoder_loss=0.2387, over 5796240.07 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:24:32,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=822500.0, ans=0.2 +2024-09-20 05:24:48,522 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.43 vs. limit=22.5 +2024-09-20 05:25:03,746 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.18 vs. limit=12.0 +2024-09-20 05:25:15,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=822620.0, ans=0.0 +2024-09-20 05:25:18,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=822620.0, ans=0.0 +2024-09-20 05:25:43,661 INFO [train.py:1198] (1/2) Epoch 46, batch 2050, loss[loss=0.2094, ctc_loss=0.09015, cr_loss=0.309, attn_decoder_loss=0.2158, over 29472.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.109, cr_loss=0.3481, attn_decoder_loss=0.238, over 5788563.68 frames. ], batch size: 70, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:25:47,097 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=822700.0, ans=0.125 +2024-09-20 05:26:17,394 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:26:19,506 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.32 vs. limit=12.0 +2024-09-20 05:26:24,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=822780.0, ans=0.0 +2024-09-20 05:26:33,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=822820.0, ans=0.125 +2024-09-20 05:26:35,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=822820.0, ans=0.1 +2024-09-20 05:26:35,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=822820.0, ans=0.125 +2024-09-20 05:26:41,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=822820.0, ans=0.0 +2024-09-20 05:26:47,717 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.548e+01 9.000e+01 9.590e+01 1.636e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 05:26:58,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=822860.0, ans=0.0 +2024-09-20 05:27:01,495 INFO [train.py:1198] (1/2) Epoch 46, batch 2100, loss[loss=0.2283, ctc_loss=0.1031, cr_loss=0.3355, attn_decoder_loss=0.2348, over 29762.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1088, cr_loss=0.3478, attn_decoder_loss=0.2374, over 5798852.38 frames. ], batch size: 81, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:27:05,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.58 vs. limit=15.0 +2024-09-20 05:27:12,253 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.min_positive, batch_count=822900.0, ans=0.05 +2024-09-20 05:27:16,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=822940.0, ans=0.1 +2024-09-20 05:27:18,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=822940.0, ans=0.025 +2024-09-20 05:27:21,478 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:27:27,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=822940.0, ans=0.125 +2024-09-20 05:27:28,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=822940.0, ans=0.125 +2024-09-20 05:27:58,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=823020.0, ans=0.125 +2024-09-20 05:27:59,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module2.whiten, num_groups=1, num_channels=192, metric=7.90 vs. limit=15.0 +2024-09-20 05:28:16,966 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:28:18,249 INFO [train.py:1198] (1/2) Epoch 46, batch 2150, loss[loss=0.2227, ctc_loss=0.1023, cr_loss=0.3409, attn_decoder_loss=0.2285, over 29428.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.108, cr_loss=0.346, attn_decoder_loss=0.2367, over 5814226.38 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:28:18,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=823100.0, ans=0.0 +2024-09-20 05:28:58,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=823180.0, ans=0.125 +2024-09-20 05:29:14,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=823220.0, ans=0.0 +2024-09-20 05:29:17,164 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=6.27 vs. limit=10.0 +2024-09-20 05:29:17,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=823260.0, ans=0.125 +2024-09-20 05:29:20,448 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.492e+01 8.498e+01 9.079e+01 9.733e+01 1.239e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-20 05:29:28,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=823260.0, ans=0.125 +2024-09-20 05:29:34,172 INFO [train.py:1198] (1/2) Epoch 46, batch 2200, loss[loss=0.2414, ctc_loss=0.1059, cr_loss=0.3376, attn_decoder_loss=0.2489, over 29629.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1084, cr_loss=0.3469, attn_decoder_loss=0.237, over 5811325.12 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:29:34,538 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:29:51,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=823340.0, ans=0.95 +2024-09-20 05:29:58,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=823340.0, ans=0.125 +2024-09-20 05:30:06,070 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=823380.0, ans=0.04949747468305833 +2024-09-20 05:30:06,207 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=823380.0, ans=0.1 +2024-09-20 05:30:52,034 INFO [train.py:1198] (1/2) Epoch 46, batch 2250, loss[loss=0.2217, ctc_loss=0.1029, cr_loss=0.3573, attn_decoder_loss=0.2269, over 29707.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1083, cr_loss=0.3463, attn_decoder_loss=0.2367, over 5809399.59 frames. ], batch size: 82, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:31:10,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=823540.0, ans=0.07 +2024-09-20 05:31:15,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.82 vs. limit=10.0 +2024-09-20 05:31:34,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=823580.0, ans=0.125 +2024-09-20 05:31:40,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=823620.0, ans=0.125 +2024-09-20 05:31:41,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=823620.0, ans=0.0 +2024-09-20 05:31:46,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=11.48 vs. limit=15.0 +2024-09-20 05:31:47,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=823620.0, ans=0.0 +2024-09-20 05:31:50,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=823660.0, ans=0.125 +2024-09-20 05:31:53,371 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.133e+01 8.446e+01 9.116e+01 9.634e+01 2.292e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 05:31:58,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=823660.0, ans=0.0 +2024-09-20 05:32:08,797 INFO [train.py:1198] (1/2) Epoch 46, batch 2300, loss[loss=0.2042, ctc_loss=0.09119, cr_loss=0.3101, attn_decoder_loss=0.2099, over 29327.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1076, cr_loss=0.3449, attn_decoder_loss=0.2359, over 5795962.27 frames. ], batch size: 71, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:32:23,989 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:30,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:33,107 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:36,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=823740.0, ans=0.0 +2024-09-20 05:32:36,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=823740.0, ans=0.125 +2024-09-20 05:32:43,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=823780.0, ans=0.0 +2024-09-20 05:32:44,319 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.69 vs. limit=15.0 +2024-09-20 05:33:12,496 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:33:20,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=823860.0, ans=0.025 +2024-09-20 05:33:24,357 INFO [train.py:1198] (1/2) Epoch 46, batch 2350, loss[loss=0.2342, ctc_loss=0.111, cr_loss=0.3536, attn_decoder_loss=0.2401, over 29686.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1079, cr_loss=0.3455, attn_decoder_loss=0.2363, over 5801735.00 frames. ], batch size: 83, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:33:24,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=823900.0, ans=0.125 +2024-09-20 05:33:28,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=823900.0, ans=0.125 +2024-09-20 05:33:28,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=823900.0, ans=0.125 +2024-09-20 05:34:23,912 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=6.60 vs. limit=15.0 +2024-09-20 05:34:26,078 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.494e+01 8.622e+01 9.111e+01 9.786e+01 2.523e+02, threshold=1.822e+02, percent-clipped=1.0 +2024-09-20 05:34:39,838 INFO [train.py:1198] (1/2) Epoch 46, batch 2400, loss[loss=0.221, ctc_loss=0.1005, cr_loss=0.335, attn_decoder_loss=0.227, over 29541.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.347, attn_decoder_loss=0.2368, over 5805891.48 frames. ], batch size: 76, lr: 2.39e-03, grad_scale: 32.0 +2024-09-20 05:34:48,177 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=824100.0, ans=0.2 +2024-09-20 05:35:08,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=824140.0, ans=0.0 +2024-09-20 05:35:12,343 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=824180.0, ans=0.1 +2024-09-20 05:35:15,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=824180.0, ans=0.125 +2024-09-20 05:35:39,727 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=824220.0, ans=0.125 +2024-09-20 05:35:52,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=824260.0, ans=0.1 +2024-09-20 05:35:59,534 INFO [train.py:1198] (1/2) Epoch 46, batch 2450, loss[loss=0.2353, ctc_loss=0.1131, cr_loss=0.3606, attn_decoder_loss=0.2409, over 29692.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1094, cr_loss=0.3487, attn_decoder_loss=0.2377, over 5782376.16 frames. ], batch size: 82, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:36:04,874 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.22 vs. limit=15.0 +2024-09-20 05:36:25,472 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:36:33,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=824380.0, ans=0.2 +2024-09-20 05:36:43,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=824420.0, ans=0.125 +2024-09-20 05:36:44,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=824420.0, ans=0.95 +2024-09-20 05:36:46,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=824420.0, ans=0.07 +2024-09-20 05:36:56,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=824420.0, ans=0.0 +2024-09-20 05:36:58,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=824460.0, ans=0.125 +2024-09-20 05:37:01,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=824460.0, ans=0.5 +2024-09-20 05:37:02,670 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.742e+01 8.776e+01 9.478e+01 1.012e+02 4.785e+02, threshold=1.896e+02, percent-clipped=1.0 +2024-09-20 05:37:14,643 INFO [train.py:1198] (1/2) Epoch 46, batch 2500, loss[loss=0.2508, ctc_loss=0.1189, cr_loss=0.364, attn_decoder_loss=0.2573, over 29641.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1095, cr_loss=0.3485, attn_decoder_loss=0.2377, over 5793437.97 frames. ], batch size: 86, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:37:16,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=824500.0, ans=0.125 +2024-09-20 05:37:37,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=824540.0, ans=0.09899494936611666 +2024-09-20 05:37:47,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=15.0 +2024-09-20 05:38:19,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=824660.0, ans=0.125 +2024-09-20 05:38:29,170 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=824700.0, ans=0.0 +2024-09-20 05:38:30,353 INFO [train.py:1198] (1/2) Epoch 46, batch 2550, loss[loss=0.2053, ctc_loss=0.09107, cr_loss=0.3219, attn_decoder_loss=0.2108, over 29327.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1092, cr_loss=0.3475, attn_decoder_loss=0.2375, over 5797133.30 frames. ], batch size: 67, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:38:39,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=824700.0, ans=0.0 +2024-09-20 05:38:52,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=824740.0, ans=0.2 +2024-09-20 05:38:55,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=824740.0, ans=0.2 +2024-09-20 05:38:58,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=824740.0, ans=0.125 +2024-09-20 05:39:13,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=824780.0, ans=0.0 +2024-09-20 05:39:28,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=824820.0, ans=0.0 +2024-09-20 05:39:33,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=824860.0, ans=0.0 +2024-09-20 05:39:36,273 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.630e+01 8.555e+01 9.140e+01 9.726e+01 1.841e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-20 05:39:36,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=824860.0, ans=0.1 +2024-09-20 05:39:50,516 INFO [train.py:1198] (1/2) Epoch 46, batch 2600, loss[loss=0.2239, ctc_loss=0.1019, cr_loss=0.3394, attn_decoder_loss=0.2299, over 29452.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.109, cr_loss=0.3473, attn_decoder_loss=0.2378, over 5793286.61 frames. ], batch size: 78, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:40:02,066 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.48 vs. limit=15.0 +2024-09-20 05:40:08,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_ff2.min_abs, batch_count=824940.0, ans=0.1 +2024-09-20 05:40:11,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=824940.0, ans=0.125 +2024-09-20 05:40:35,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=825020.0, ans=0.0 +2024-09-20 05:40:53,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=825060.0, ans=0.125 +2024-09-20 05:40:59,150 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.25 vs. limit=10.0 +2024-09-20 05:41:05,514 INFO [train.py:1198] (1/2) Epoch 46, batch 2650, loss[loss=0.2494, ctc_loss=0.1219, cr_loss=0.3779, attn_decoder_loss=0.2552, over 29244.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1095, cr_loss=0.3487, attn_decoder_loss=0.2384, over 5800181.74 frames. ], batch size: 100, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:41:36,470 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.22 vs. limit=22.5 +2024-09-20 05:41:52,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=825220.0, ans=0.1 +2024-09-20 05:41:55,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=825220.0, ans=0.2 +2024-09-20 05:42:09,972 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.633e+01 9.169e+01 9.571e+01 1.241e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-20 05:42:20,698 INFO [train.py:1198] (1/2) Epoch 46, batch 2700, loss[loss=0.2429, ctc_loss=0.1156, cr_loss=0.3525, attn_decoder_loss=0.2492, over 29542.00 frames. ], tot_loss[loss=0.2328, ctc_loss=0.1098, cr_loss=0.3497, attn_decoder_loss=0.2387, over 5795630.71 frames. ], batch size: 87, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:42:26,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=825300.0, ans=0.125 +2024-09-20 05:42:39,722 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:43:12,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=825420.0, ans=0.025 +2024-09-20 05:43:20,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=825420.0, ans=0.0 +2024-09-20 05:43:40,483 INFO [train.py:1198] (1/2) Epoch 46, batch 2750, loss[loss=0.2273, ctc_loss=0.1028, cr_loss=0.3255, attn_decoder_loss=0.2339, over 29503.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1092, cr_loss=0.3484, attn_decoder_loss=0.2377, over 5793618.54 frames. ], batch size: 75, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:43:48,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=825500.0, ans=0.1 +2024-09-20 05:43:51,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=825500.0, ans=0.0 +2024-09-20 05:44:32,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=825620.0, ans=0.125 +2024-09-20 05:44:46,006 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.730e+01 8.644e+01 9.121e+01 9.722e+01 2.212e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 05:44:56,657 INFO [train.py:1198] (1/2) Epoch 46, batch 2800, loss[loss=0.2465, ctc_loss=0.1242, cr_loss=0.3598, attn_decoder_loss=0.2521, over 20180.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1093, cr_loss=0.3486, attn_decoder_loss=0.2378, over 5774447.37 frames. ], batch size: 209, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:45:01,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=825700.0, ans=0.125 +2024-09-20 05:45:02,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=825700.0, ans=0.125 +2024-09-20 05:45:04,741 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.16 vs. limit=22.5 +2024-09-20 05:45:05,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.63 vs. limit=15.0 +2024-09-20 05:45:32,773 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=825780.0, ans=0.1 +2024-09-20 05:46:11,549 INFO [train.py:1198] (1/2) Epoch 46, batch 2850, loss[loss=0.2206, ctc_loss=0.1036, cr_loss=0.3347, attn_decoder_loss=0.2261, over 29516.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1095, cr_loss=0.3483, attn_decoder_loss=0.238, over 5759734.64 frames. ], batch size: 77, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:46:39,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=825940.0, ans=0.125 +2024-09-20 05:46:42,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=825980.0, ans=0.125 +2024-09-20 05:47:12,006 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:47:22,084 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.622e+01 8.726e+01 9.166e+01 9.745e+01 2.049e+02, threshold=1.833e+02, percent-clipped=1.0 +2024-09-20 05:47:22,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=826060.0, ans=0.0 +2024-09-20 05:47:31,170 INFO [train.py:1198] (1/2) Epoch 46, batch 2900, loss[loss=0.2313, ctc_loss=0.1183, cr_loss=0.3735, attn_decoder_loss=0.2355, over 29425.00 frames. ], tot_loss[loss=0.2332, ctc_loss=0.11, cr_loss=0.3496, attn_decoder_loss=0.2391, over 5785382.35 frames. ], batch size: 79, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:47:40,968 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.50 vs. limit=15.0 +2024-09-20 05:47:58,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=826140.0, ans=0.125 +2024-09-20 05:48:01,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=826180.0, ans=0.2 +2024-09-20 05:48:09,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=826180.0, ans=0.1 +2024-09-20 05:48:13,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=826180.0, ans=0.125 +2024-09-20 05:48:19,719 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 05:48:22,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.skip_rate, batch_count=826220.0, ans=0.07 +2024-09-20 05:48:38,295 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=22.54 vs. limit=22.5 +2024-09-20 05:48:46,542 INFO [train.py:1198] (1/2) Epoch 46, batch 2950, loss[loss=0.2279, ctc_loss=0.118, cr_loss=0.367, attn_decoder_loss=0.232, over 29531.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.109, cr_loss=0.3471, attn_decoder_loss=0.2377, over 5780283.74 frames. ], batch size: 75, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:49:01,764 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=826340.0, ans=0.07 +2024-09-20 05:49:07,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=826340.0, ans=0.125 +2024-09-20 05:49:39,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=826420.0, ans=0.125 +2024-09-20 05:49:53,238 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.789e+01 9.210e+01 9.770e+01 1.527e+02, threshold=1.842e+02, percent-clipped=0.0 +2024-09-20 05:50:02,335 INFO [train.py:1198] (1/2) Epoch 46, batch 3000, loss[loss=0.2371, ctc_loss=0.1099, cr_loss=0.3624, attn_decoder_loss=0.2432, over 29763.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1087, cr_loss=0.3467, attn_decoder_loss=0.2378, over 5781164.10 frames. ], batch size: 81, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:50:02,336 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 05:50:21,364 INFO [train.py:1230] (1/2) Epoch 46, validation: loss=0.2122, ctc_loss=0.03683, cr_loss=6.872e-15, attn_decoder_loss=0.2317, over 944034.00 frames. +2024-09-20 05:50:21,365 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 05:50:29,477 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=826500.0, ans=0.125 +2024-09-20 05:50:32,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=826500.0, ans=0.2 +2024-09-20 05:50:59,007 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.67 vs. limit=22.5 +2024-09-20 05:51:30,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.95 vs. limit=10.0 +2024-09-20 05:51:37,849 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=826700.0, ans=0.2 +2024-09-20 05:51:39,070 INFO [train.py:1198] (1/2) Epoch 46, batch 3050, loss[loss=0.2235, ctc_loss=0.108, cr_loss=0.3494, attn_decoder_loss=0.2286, over 29511.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1093, cr_loss=0.3481, attn_decoder_loss=0.2384, over 5775661.30 frames. ], batch size: 76, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:52:17,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=826780.0, ans=0.035 +2024-09-20 05:52:20,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=826780.0, ans=0.0 +2024-09-20 05:52:38,223 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=826860.0, ans=0.1 +2024-09-20 05:52:45,558 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.649e+01 8.604e+01 9.128e+01 9.681e+01 2.059e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 05:52:54,438 INFO [train.py:1198] (1/2) Epoch 46, batch 3100, loss[loss=0.2522, ctc_loss=0.1306, cr_loss=0.4012, attn_decoder_loss=0.2568, over 29279.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1093, cr_loss=0.3483, attn_decoder_loss=0.2381, over 5776892.62 frames. ], batch size: 100, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:53:03,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=826900.0, ans=0.0 +2024-09-20 05:53:05,863 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.19 vs. limit=10.0 +2024-09-20 05:53:08,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward3.hidden_balancer.prob, batch_count=826940.0, ans=0.125 +2024-09-20 05:53:19,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.07 vs. limit=12.0 +2024-09-20 05:53:24,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=826980.0, ans=0.2 +2024-09-20 05:53:31,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=826980.0, ans=0.1 +2024-09-20 05:53:40,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=827020.0, ans=0.0 +2024-09-20 05:53:44,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=827020.0, ans=0.125 +2024-09-20 05:54:12,265 INFO [train.py:1198] (1/2) Epoch 46, batch 3150, loss[loss=0.2415, ctc_loss=0.1118, cr_loss=0.357, attn_decoder_loss=0.2479, over 28801.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1093, cr_loss=0.3483, attn_decoder_loss=0.2379, over 5784243.89 frames. ], batch size: 104, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:54:21,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=827100.0, ans=0.125 +2024-09-20 05:54:24,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=827100.0, ans=0.125 +2024-09-20 05:54:27,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=827140.0, ans=0.125 +2024-09-20 05:54:32,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=827140.0, ans=0.2 +2024-09-20 05:54:36,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=827140.0, ans=0.125 +2024-09-20 05:54:50,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=827180.0, ans=0.125 +2024-09-20 05:54:57,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=827180.0, ans=0.125 +2024-09-20 05:55:09,406 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=827220.0, ans=0.125 +2024-09-20 05:55:14,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.90 vs. limit=10.0 +2024-09-20 05:55:20,996 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.407e+01 8.661e+01 9.228e+01 9.834e+01 1.754e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 05:55:28,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=827300.0, ans=0.125 +2024-09-20 05:55:30,094 INFO [train.py:1198] (1/2) Epoch 46, batch 3200, loss[loss=0.233, ctc_loss=0.1089, cr_loss=0.3439, attn_decoder_loss=0.2392, over 29413.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1092, cr_loss=0.3487, attn_decoder_loss=0.2376, over 5794530.86 frames. ], batch size: 79, lr: 2.39e-03, grad_scale: 16.0 +2024-09-20 05:55:31,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=10.96 vs. limit=22.5 +2024-09-20 05:55:43,006 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.26 vs. limit=22.5 +2024-09-20 05:55:50,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.06 vs. limit=15.0 +2024-09-20 05:55:53,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=827340.0, ans=0.125 +2024-09-20 05:55:59,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=827380.0, ans=0.125 +2024-09-20 05:56:03,976 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=827380.0, ans=0.125 +2024-09-20 05:56:10,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.64 vs. limit=15.0 +2024-09-20 05:56:15,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=827420.0, ans=0.2 +2024-09-20 05:56:16,309 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.83 vs. limit=15.0 +2024-09-20 05:56:30,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.82 vs. limit=15.0 +2024-09-20 05:56:45,890 INFO [train.py:1198] (1/2) Epoch 46, batch 3250, loss[loss=0.2447, ctc_loss=0.1152, cr_loss=0.3708, attn_decoder_loss=0.2508, over 29713.00 frames. ], tot_loss[loss=0.232, ctc_loss=0.1091, cr_loss=0.349, attn_decoder_loss=0.2379, over 5801229.89 frames. ], batch size: 84, lr: 2.39e-03, grad_scale: 8.0 +2024-09-20 05:56:47,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=827500.0, ans=0.125 +2024-09-20 05:57:09,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.90 vs. limit=15.0 +2024-09-20 05:57:10,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=827540.0, ans=0.2 +2024-09-20 05:57:11,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=827540.0, ans=0.0 +2024-09-20 05:57:20,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=827580.0, ans=0.125 +2024-09-20 05:57:37,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=827620.0, ans=0.125 +2024-09-20 05:57:44,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=827660.0, ans=0.125 +2024-09-20 05:57:53,417 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.254e+01 8.520e+01 8.954e+01 9.495e+01 2.408e+02, threshold=1.791e+02, percent-clipped=1.0 +2024-09-20 05:57:53,825 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=827660.0, ans=0.0 +2024-09-20 05:57:55,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=827660.0, ans=0.2 +2024-09-20 05:57:58,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=827660.0, ans=0.1 +2024-09-20 05:58:01,024 INFO [train.py:1198] (1/2) Epoch 46, batch 3300, loss[loss=0.2445, ctc_loss=0.1161, cr_loss=0.3714, attn_decoder_loss=0.2505, over 28350.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3477, attn_decoder_loss=0.2369, over 5798031.33 frames. ], batch size: 111, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 05:58:01,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=827700.0, ans=0.1 +2024-09-20 05:58:04,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=827700.0, ans=0.0 +2024-09-20 05:58:12,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=827700.0, ans=0.2 +2024-09-20 05:58:13,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.29 vs. limit=6.0 +2024-09-20 05:58:17,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=827740.0, ans=0.125 +2024-09-20 05:58:22,920 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=827740.0, ans=0.1 +2024-09-20 05:58:27,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=827740.0, ans=0.125 +2024-09-20 05:59:10,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.79 vs. limit=22.5 +2024-09-20 05:59:12,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=827860.0, ans=0.025 +2024-09-20 05:59:14,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=827860.0, ans=0.1 +2024-09-20 05:59:20,221 INFO [train.py:1198] (1/2) Epoch 46, batch 3350, loss[loss=0.2411, ctc_loss=0.1175, cr_loss=0.3606, attn_decoder_loss=0.2468, over 28902.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1092, cr_loss=0.3485, attn_decoder_loss=0.2375, over 5774021.87 frames. ], batch size: 104, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 05:59:49,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=827980.0, ans=0.125 +2024-09-20 05:59:59,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.79 vs. limit=15.0 +2024-09-20 06:00:10,648 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=828020.0, ans=0.04949747468305833 +2024-09-20 06:00:13,691 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=828020.0, ans=0.1 +2024-09-20 06:00:28,532 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.460e+01 8.801e+01 9.345e+01 9.836e+01 1.654e+02, threshold=1.869e+02, percent-clipped=0.0 +2024-09-20 06:00:34,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=828100.0, ans=0.125 +2024-09-20 06:00:36,121 INFO [train.py:1198] (1/2) Epoch 46, batch 3400, loss[loss=0.2036, ctc_loss=0.09002, cr_loss=0.3138, attn_decoder_loss=0.2093, over 29288.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1095, cr_loss=0.3492, attn_decoder_loss=0.2376, over 5766848.79 frames. ], batch size: 67, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:01:24,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=828220.0, ans=0.125 +2024-09-20 06:01:26,481 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=828220.0, ans=0.125 +2024-09-20 06:01:51,524 INFO [train.py:1198] (1/2) Epoch 46, batch 3450, loss[loss=0.2363, ctc_loss=0.1111, cr_loss=0.3303, attn_decoder_loss=0.2429, over 28304.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1093, cr_loss=0.3495, attn_decoder_loss=0.2378, over 5774286.47 frames. ], batch size: 111, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:01:58,447 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:03:03,200 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.738e+01 8.466e+01 9.080e+01 9.638e+01 4.809e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-20 06:03:08,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=828460.0, ans=0.0 +2024-09-20 06:03:10,697 INFO [train.py:1198] (1/2) Epoch 46, batch 3500, loss[loss=0.2079, ctc_loss=0.08629, cr_loss=0.3034, attn_decoder_loss=0.2147, over 29295.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1093, cr_loss=0.3494, attn_decoder_loss=0.2372, over 5776465.15 frames. ], batch size: 71, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:03:12,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=828500.0, ans=0.2 +2024-09-20 06:03:20,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=828500.0, ans=0.0 +2024-09-20 06:03:24,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=828540.0, ans=0.125 +2024-09-20 06:03:35,026 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=828540.0, ans=0.0 +2024-09-20 06:03:41,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=828580.0, ans=0.125 +2024-09-20 06:03:45,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=828580.0, ans=0.125 +2024-09-20 06:03:46,814 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=828580.0, ans=0.125 +2024-09-20 06:03:48,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=828580.0, ans=0.2 +2024-09-20 06:04:09,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer_ff3.min_abs, batch_count=828660.0, ans=0.2 +2024-09-20 06:04:22,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=828660.0, ans=0.0 +2024-09-20 06:04:25,149 INFO [train.py:1198] (1/2) Epoch 46, batch 3550, loss[loss=0.234, ctc_loss=0.1034, cr_loss=0.34, attn_decoder_loss=0.2409, over 29714.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1091, cr_loss=0.349, attn_decoder_loss=0.2372, over 5782804.55 frames. ], batch size: 89, lr: 2.38e-03, grad_scale: 8.0 +2024-09-20 06:04:32,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=828700.0, ans=0.0 +2024-09-20 06:04:38,710 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=828740.0, ans=0.125 +2024-09-20 06:04:43,192 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=828740.0, ans=0.125 +2024-09-20 06:05:12,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=828820.0, ans=0.1 +2024-09-20 06:05:22,293 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:05:26,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=828860.0, ans=0.125 +2024-09-20 06:05:32,134 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.578e+01 8.518e+01 9.140e+01 9.697e+01 1.857e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 06:05:39,446 INFO [train.py:1198] (1/2) Epoch 46, batch 3600, loss[loss=0.2263, ctc_loss=0.09783, cr_loss=0.3222, attn_decoder_loss=0.2334, over 29533.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.3485, attn_decoder_loss=0.2372, over 5791461.67 frames. ], batch size: 77, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:05:44,857 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.34 vs. limit=15.0 +2024-09-20 06:05:59,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.31 vs. limit=15.0 +2024-09-20 06:06:00,848 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=828940.0, ans=0.1 +2024-09-20 06:06:13,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=828980.0, ans=0.125 +2024-09-20 06:06:18,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.39 vs. limit=15.0 +2024-09-20 06:06:21,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=828980.0, ans=0.125 +2024-09-20 06:06:24,489 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:06:53,551 INFO [train.py:1198] (1/2) Epoch 46, batch 3650, loss[loss=0.2497, ctc_loss=0.1201, cr_loss=0.3848, attn_decoder_loss=0.2556, over 29496.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1086, cr_loss=0.3479, attn_decoder_loss=0.2368, over 5793113.62 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:06:56,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=829100.0, ans=0.125 +2024-09-20 06:07:05,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=829100.0, ans=0.0 +2024-09-20 06:07:08,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=829140.0, ans=0.0 +2024-09-20 06:07:11,887 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.29 vs. limit=12.0 +2024-09-20 06:07:12,097 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=5.08 vs. limit=10.0 +2024-09-20 06:07:20,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=829140.0, ans=0.125 +2024-09-20 06:08:03,938 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.537e+01 9.087e+01 9.420e+01 1.458e+02, threshold=1.817e+02, percent-clipped=0.0 +2024-09-20 06:08:11,549 INFO [train.py:1198] (1/2) Epoch 46, batch 3700, loss[loss=0.2453, ctc_loss=0.1161, cr_loss=0.3551, attn_decoder_loss=0.2518, over 29705.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1087, cr_loss=0.3481, attn_decoder_loss=0.237, over 5803603.98 frames. ], batch size: 84, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:08:16,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=829300.0, ans=0.2 +2024-09-20 06:08:19,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=829300.0, ans=0.0 +2024-09-20 06:08:21,666 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.47 vs. limit=15.0 +2024-09-20 06:08:32,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=829340.0, ans=0.025 +2024-09-20 06:08:34,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=829340.0, ans=0.0 +2024-09-20 06:08:43,547 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.37 vs. limit=15.0 +2024-09-20 06:08:46,298 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.25 vs. limit=15.0 +2024-09-20 06:08:53,393 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:09:25,406 INFO [train.py:1198] (1/2) Epoch 46, batch 3750, loss[loss=0.2114, ctc_loss=0.0894, cr_loss=0.3008, attn_decoder_loss=0.2183, over 29370.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1085, cr_loss=0.3471, attn_decoder_loss=0.2367, over 5807840.50 frames. ], batch size: 67, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:09:39,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=829540.0, ans=0.025 +2024-09-20 06:10:03,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=829580.0, ans=0.0 +2024-09-20 06:10:21,194 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=829620.0, ans=0.1 +2024-09-20 06:10:27,087 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=829660.0, ans=0.0 +2024-09-20 06:10:28,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=829660.0, ans=0.125 +2024-09-20 06:10:32,724 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.488e+01 8.638e+01 9.232e+01 9.625e+01 1.772e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 06:10:40,161 INFO [train.py:1198] (1/2) Epoch 46, batch 3800, loss[loss=0.235, ctc_loss=0.1029, cr_loss=0.3323, attn_decoder_loss=0.2423, over 29628.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1084, cr_loss=0.3462, attn_decoder_loss=0.2365, over 5797782.57 frames. ], batch size: 86, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:10:40,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=829700.0, ans=0.025 +2024-09-20 06:10:47,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=829700.0, ans=0.0 +2024-09-20 06:10:50,124 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.56 vs. limit=15.0 +2024-09-20 06:10:51,332 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.10 vs. limit=12.0 +2024-09-20 06:11:14,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=829780.0, ans=0.125 +2024-09-20 06:11:22,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=829780.0, ans=0.125 +2024-09-20 06:11:54,494 INFO [train.py:1198] (1/2) Epoch 46, batch 3850, loss[loss=0.2412, ctc_loss=0.1061, cr_loss=0.3384, attn_decoder_loss=0.2487, over 29220.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.108, cr_loss=0.346, attn_decoder_loss=0.2363, over 5811805.36 frames. ], batch size: 100, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:12:07,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=829940.0, ans=0.125 +2024-09-20 06:12:18,190 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=829940.0, ans=0.0 +2024-09-20 06:12:31,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=829980.0, ans=0.125 +2024-09-20 06:12:46,617 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=830020.0, ans=0.125 +2024-09-20 06:12:48,054 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=830020.0, ans=0.125 +2024-09-20 06:12:54,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=830060.0, ans=0.0 +2024-09-20 06:12:58,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=830060.0, ans=0.125 +2024-09-20 06:13:01,125 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.921e+01 8.600e+01 9.111e+01 9.601e+01 1.529e+02, threshold=1.822e+02, percent-clipped=0.0 +2024-09-20 06:13:08,418 INFO [train.py:1198] (1/2) Epoch 46, batch 3900, loss[loss=0.2451, ctc_loss=0.1143, cr_loss=0.3686, attn_decoder_loss=0.2515, over 29625.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2366, over 5815827.97 frames. ], batch size: 86, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:13:22,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=830100.0, ans=0.0 +2024-09-20 06:13:52,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=830180.0, ans=0.125 +2024-09-20 06:14:05,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.58 vs. limit=15.0 +2024-09-20 06:14:13,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=830260.0, ans=0.1 +2024-09-20 06:14:20,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer1.prob, batch_count=830260.0, ans=0.125 +2024-09-20 06:14:24,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.31 vs. limit=12.0 +2024-09-20 06:14:25,246 INFO [train.py:1198] (1/2) Epoch 46, batch 3950, loss[loss=0.2481, ctc_loss=0.1233, cr_loss=0.3881, attn_decoder_loss=0.2533, over 29465.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1083, cr_loss=0.3472, attn_decoder_loss=0.2368, over 5835360.64 frames. ], batch size: 97, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:14:28,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.prob, batch_count=830300.0, ans=0.125 +2024-09-20 06:14:34,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=830300.0, ans=0.0 +2024-09-20 06:14:39,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=830340.0, ans=0.125 +2024-09-20 06:14:57,027 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.75 vs. limit=6.0 +2024-09-20 06:15:01,709 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.12 vs. limit=15.0 +2024-09-20 06:15:14,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=830420.0, ans=0.0 +2024-09-20 06:15:31,666 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.428e+01 8.756e+01 9.097e+01 9.656e+01 1.303e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 06:15:38,977 INFO [train.py:1198] (1/2) Epoch 46, batch 4000, loss[loss=0.2129, ctc_loss=0.0917, cr_loss=0.2992, attn_decoder_loss=0.2197, over 29515.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1088, cr_loss=0.348, attn_decoder_loss=0.2368, over 5812635.35 frames. ], batch size: 74, lr: 2.38e-03, grad_scale: 32.0 +2024-09-20 06:15:49,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=830500.0, ans=0.125 +2024-09-20 06:15:58,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=830540.0, ans=0.0 +2024-09-20 06:16:04,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=830540.0, ans=0.0 +2024-09-20 06:16:11,847 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:16:31,761 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.47 vs. limit=15.0 +2024-09-20 06:16:47,355 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=830660.0, ans=0.1 +2024-09-20 06:16:52,990 INFO [train.py:1198] (1/2) Epoch 46, batch 4050, loss[loss=0.2479, ctc_loss=0.1309, cr_loss=0.3564, attn_decoder_loss=0.253, over 19999.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3469, attn_decoder_loss=0.2368, over 5796932.01 frames. ], batch size: 209, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:16:53,285 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=830700.0, ans=0.125 +2024-09-20 06:17:06,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=830740.0, ans=0.0 +2024-09-20 06:17:06,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten.whitening_limit, batch_count=830740.0, ans=22.5 +2024-09-20 06:17:10,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=830740.0, ans=0.1 +2024-09-20 06:17:15,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=830740.0, ans=0.1 +2024-09-20 06:17:15,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=830740.0, ans=0.0 +2024-09-20 06:17:19,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=830740.0, ans=0.125 +2024-09-20 06:17:22,419 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=830780.0, ans=0.0 +2024-09-20 06:17:28,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=830780.0, ans=0.1 +2024-09-20 06:17:54,843 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=830860.0, ans=0.125 +2024-09-20 06:17:57,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=830860.0, ans=0.1 +2024-09-20 06:17:58,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=830860.0, ans=0.0 +2024-09-20 06:18:01,508 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.547e+01 8.777e+01 9.283e+01 1.003e+02 3.559e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-20 06:18:08,721 INFO [train.py:1198] (1/2) Epoch 46, batch 4100, loss[loss=0.2534, ctc_loss=0.1293, cr_loss=0.4074, attn_decoder_loss=0.2581, over 29492.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3474, attn_decoder_loss=0.2371, over 5791303.63 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:18:09,534 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.48 vs. limit=15.0 +2024-09-20 06:18:38,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=830980.0, ans=0.025 +2024-09-20 06:18:38,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=830980.0, ans=0.025 +2024-09-20 06:18:41,719 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.37 vs. limit=12.0 +2024-09-20 06:18:48,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=830980.0, ans=0.1 +2024-09-20 06:18:50,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.82 vs. limit=15.0 +2024-09-20 06:19:21,756 INFO [train.py:1198] (1/2) Epoch 46, batch 4150, loss[loss=0.2277, ctc_loss=0.1023, cr_loss=0.3265, attn_decoder_loss=0.2344, over 29506.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1086, cr_loss=0.3469, attn_decoder_loss=0.2366, over 5797481.50 frames. ], batch size: 77, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:19:39,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.max_abs, batch_count=831140.0, ans=10.0 +2024-09-20 06:19:48,524 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:19:54,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=831180.0, ans=0.125 +2024-09-20 06:20:00,561 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=831180.0, ans=0.0 +2024-09-20 06:20:29,880 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.101e+01 8.815e+01 9.247e+01 9.794e+01 1.755e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 06:20:35,817 INFO [train.py:1198] (1/2) Epoch 46, batch 4200, loss[loss=0.256, ctc_loss=0.1397, cr_loss=0.4208, attn_decoder_loss=0.2595, over 29524.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1091, cr_loss=0.348, attn_decoder_loss=0.2373, over 5799159.27 frames. ], batch size: 90, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:20:40,496 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=831300.0, ans=0.125 +2024-09-20 06:21:01,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=831340.0, ans=0.0 +2024-09-20 06:21:02,864 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.88 vs. limit=15.0 +2024-09-20 06:21:14,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=831380.0, ans=0.125 +2024-09-20 06:21:33,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.74 vs. limit=15.0 +2024-09-20 06:21:38,966 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=831460.0, ans=0.0 +2024-09-20 06:21:49,878 INFO [train.py:1198] (1/2) Epoch 46, batch 4250, loss[loss=0.2144, ctc_loss=0.09456, cr_loss=0.3123, attn_decoder_loss=0.2208, over 29542.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1087, cr_loss=0.3471, attn_decoder_loss=0.2373, over 5806115.61 frames. ], batch size: 74, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:21:50,951 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.89 vs. limit=22.5 +2024-09-20 06:21:51,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=831500.0, ans=0.125 +2024-09-20 06:21:53,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=831500.0, ans=0.125 +2024-09-20 06:22:04,829 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.93 vs. limit=15.0 +2024-09-20 06:22:27,800 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=831580.0, ans=0.125 +2024-09-20 06:22:32,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=831580.0, ans=0.2 +2024-09-20 06:22:38,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=831620.0, ans=0.0 +2024-09-20 06:22:58,495 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.527e+01 8.665e+01 9.148e+01 1.004e+02 2.126e+02, threshold=1.830e+02, percent-clipped=1.0 +2024-09-20 06:23:04,401 INFO [train.py:1198] (1/2) Epoch 46, batch 4300, loss[loss=0.2406, ctc_loss=0.1124, cr_loss=0.361, attn_decoder_loss=0.2468, over 29539.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1086, cr_loss=0.3466, attn_decoder_loss=0.2373, over 5795223.61 frames. ], batch size: 87, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:23:13,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=831700.0, ans=0.125 +2024-09-20 06:23:22,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=831740.0, ans=0.125 +2024-09-20 06:23:23,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=831740.0, ans=0.1 +2024-09-20 06:23:41,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=831780.0, ans=0.1 +2024-09-20 06:23:52,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.32 vs. limit=15.0 +2024-09-20 06:23:58,290 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.84 vs. limit=22.5 +2024-09-20 06:24:18,201 INFO [train.py:1198] (1/2) Epoch 46, batch 4350, loss[loss=0.2497, ctc_loss=0.1185, cr_loss=0.3779, attn_decoder_loss=0.2559, over 29499.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1107, cr_loss=0.3518, attn_decoder_loss=0.2402, over 5797918.49 frames. ], batch size: 97, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:24:36,945 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.19 vs. limit=22.5 +2024-09-20 06:24:52,882 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=831980.0, ans=0.125 +2024-09-20 06:25:11,910 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=5.79 vs. limit=15.0 +2024-09-20 06:25:20,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=832020.0, ans=0.0 +2024-09-20 06:25:34,201 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.49 vs. limit=15.0 +2024-09-20 06:25:36,413 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.165e+01 9.019e+01 9.377e+01 9.847e+01 2.022e+02, threshold=1.875e+02, percent-clipped=1.0 +2024-09-20 06:25:42,289 INFO [train.py:1198] (1/2) Epoch 46, batch 4400, loss[loss=0.2374, ctc_loss=0.1238, cr_loss=0.3792, attn_decoder_loss=0.2416, over 27347.00 frames. ], tot_loss[loss=0.2363, ctc_loss=0.1122, cr_loss=0.3548, attn_decoder_loss=0.2423, over 5767617.90 frames. ], batch size: 124, lr: 2.38e-03, grad_scale: 32.0 +2024-09-20 06:25:44,012 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=832100.0, ans=0.125 +2024-09-20 06:25:56,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.nonlin_attention.balancer.prob, batch_count=832140.0, ans=0.125 +2024-09-20 06:26:01,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=832140.0, ans=0.0 +2024-09-20 06:26:13,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=832180.0, ans=0.2 +2024-09-20 06:26:17,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.29 vs. limit=22.5 +2024-09-20 06:26:18,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=832180.0, ans=0.125 +2024-09-20 06:26:29,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=832220.0, ans=0.125 +2024-09-20 06:26:32,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=832220.0, ans=0.0 +2024-09-20 06:26:40,804 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=832260.0, ans=0.025 +2024-09-20 06:26:54,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=832300.0, ans=0.0 +2024-09-20 06:26:55,246 INFO [train.py:1198] (1/2) Epoch 46, batch 4450, loss[loss=0.2518, ctc_loss=0.1339, cr_loss=0.3944, attn_decoder_loss=0.2562, over 20857.00 frames. ], tot_loss[loss=0.2384, ctc_loss=0.1154, cr_loss=0.3599, attn_decoder_loss=0.2441, over 5576503.56 frames. ], batch size: 210, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:26:59,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=7.35 vs. limit=10.0 +2024-09-20 06:27:09,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=832340.0, ans=0.125 +2024-09-20 06:27:15,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=832340.0, ans=0.1 +2024-09-20 06:27:27,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=832380.0, ans=0.1 +2024-09-20 06:27:30,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=832380.0, ans=0.0 +2024-09-20 06:27:53,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=832420.0, ans=0.125 +2024-09-20 06:27:54,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=832460.0, ans=0.125 +2024-09-20 06:28:06,278 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.388e+01 1.015e+02 1.122e+02 1.210e+02 5.487e+02, threshold=2.243e+02, percent-clipped=3.0 +2024-09-20 06:28:06,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=832460.0, ans=0.2 +2024-09-20 06:28:10,693 INFO [train.py:1198] (1/2) Epoch 46, batch 4500, loss[loss=0.2534, ctc_loss=0.1356, cr_loss=0.3712, attn_decoder_loss=0.2582, over 19795.00 frames. ], tot_loss[loss=0.2402, ctc_loss=0.1182, cr_loss=0.3622, attn_decoder_loss=0.2457, over 5235657.36 frames. ], batch size: 209, lr: 2.38e-03, grad_scale: 16.0 +2024-09-20 06:28:12,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=832500.0, ans=0.125 +2024-09-20 06:28:42,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=832580.0, ans=0.025 +2024-09-20 06:29:38,361 INFO [train.py:1198] (1/2) Epoch 47, batch 0, loss[loss=0.2163, ctc_loss=0.09544, cr_loss=0.3191, attn_decoder_loss=0.2226, over 29573.00 frames. ], tot_loss[loss=0.2163, ctc_loss=0.09544, cr_loss=0.3191, attn_decoder_loss=0.2226, over 29573.00 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:29:38,361 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 06:29:56,723 INFO [train.py:1230] (1/2) Epoch 47, validation: loss=0.2131, ctc_loss=0.03582, cr_loss=6.765e-15, attn_decoder_loss=0.2328, over 944034.00 frames. +2024-09-20 06:29:56,724 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 06:29:58,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=832600.0, ans=0.2 +2024-09-20 06:30:04,400 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=832600.0, ans=0.1 +2024-09-20 06:30:07,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=832600.0, ans=0.0 +2024-09-20 06:30:13,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=832640.0, ans=0.0 +2024-09-20 06:30:28,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=832680.0, ans=0.125 +2024-09-20 06:30:37,184 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=7.36 vs. limit=15.0 +2024-09-20 06:30:45,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=832720.0, ans=0.125 +2024-09-20 06:30:47,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=9.40 vs. limit=15.0 +2024-09-20 06:30:51,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=832720.0, ans=0.0 +2024-09-20 06:31:12,661 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=832800.0, ans=0.5 +2024-09-20 06:31:14,203 INFO [train.py:1198] (1/2) Epoch 47, batch 50, loss[loss=0.2077, ctc_loss=0.08875, cr_loss=0.2925, attn_decoder_loss=0.2144, over 29425.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1111, cr_loss=0.3524, attn_decoder_loss=0.238, over 1268515.67 frames. ], batch size: 70, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:31:16,130 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=832800.0, ans=0.125 +2024-09-20 06:31:26,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=832800.0, ans=0.125 +2024-09-20 06:31:31,158 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:31:34,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.01 vs. limit=6.0 +2024-09-20 06:31:39,168 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=8.47 vs. limit=15.0 +2024-09-20 06:31:48,929 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.774e+01 8.892e+01 9.712e+01 1.150e+02 2.007e+02, threshold=1.942e+02, percent-clipped=0.0 +2024-09-20 06:31:56,906 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass_mid.scale_min, batch_count=832880.0, ans=0.2 +2024-09-20 06:32:01,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=832920.0, ans=0.0 +2024-09-20 06:32:04,287 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=832920.0, ans=0.125 +2024-09-20 06:32:07,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=832920.0, ans=0.035 +2024-09-20 06:32:08,112 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=512, metric=9.17 vs. limit=15.0 +2024-09-20 06:32:09,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=832920.0, ans=0.0 +2024-09-20 06:32:10,280 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=832920.0, ans=0.1 +2024-09-20 06:32:10,846 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.08 vs. limit=22.5 +2024-09-20 06:32:14,917 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=832960.0, ans=0.025 +2024-09-20 06:32:29,708 INFO [train.py:1198] (1/2) Epoch 47, batch 100, loss[loss=0.2241, ctc_loss=0.108, cr_loss=0.3507, attn_decoder_loss=0.2292, over 29513.00 frames. ], tot_loss[loss=0.2341, ctc_loss=0.1112, cr_loss=0.3533, attn_decoder_loss=0.24, over 2250716.52 frames. ], batch size: 76, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:32:51,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=833040.0, ans=0.125 +2024-09-20 06:32:52,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=833040.0, ans=0.125 +2024-09-20 06:32:53,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=11.96 vs. limit=15.0 +2024-09-20 06:33:43,628 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.10 vs. limit=15.0 +2024-09-20 06:33:45,860 INFO [train.py:1198] (1/2) Epoch 47, batch 150, loss[loss=0.198, ctc_loss=0.08195, cr_loss=0.2834, attn_decoder_loss=0.2046, over 29441.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1096, cr_loss=0.3501, attn_decoder_loss=0.2382, over 3045973.07 frames. ], batch size: 70, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:33:55,970 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.53 vs. limit=15.0 +2024-09-20 06:34:05,883 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=833240.0, ans=0.025 +2024-09-20 06:34:11,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=833240.0, ans=0.125 +2024-09-20 06:34:22,973 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.674e+01 8.579e+01 9.254e+01 9.598e+01 1.367e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 06:34:42,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=833320.0, ans=0.05 +2024-09-20 06:34:55,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.61 vs. limit=12.0 +2024-09-20 06:34:56,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=833360.0, ans=0.0 +2024-09-20 06:35:00,799 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=833360.0, ans=0.125 +2024-09-20 06:35:02,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=833400.0, ans=0.125 +2024-09-20 06:35:03,356 INFO [train.py:1198] (1/2) Epoch 47, batch 200, loss[loss=0.2529, ctc_loss=0.1283, cr_loss=0.4022, attn_decoder_loss=0.2578, over 27360.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1088, cr_loss=0.3495, attn_decoder_loss=0.237, over 3659968.21 frames. ], batch size: 124, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:35:12,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=833400.0, ans=0.1 +2024-09-20 06:35:42,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=833480.0, ans=10.0 +2024-09-20 06:35:48,631 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=833520.0, ans=0.125 +2024-09-20 06:36:19,047 INFO [train.py:1198] (1/2) Epoch 47, batch 250, loss[loss=0.2438, ctc_loss=0.1159, cr_loss=0.3487, attn_decoder_loss=0.2503, over 29299.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1087, cr_loss=0.3487, attn_decoder_loss=0.2369, over 4141684.11 frames. ], batch size: 100, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:36:28,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=833600.0, ans=0.125 +2024-09-20 06:36:29,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=833600.0, ans=0.0 +2024-09-20 06:36:48,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=833640.0, ans=0.125 +2024-09-20 06:36:51,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=833680.0, ans=0.0 +2024-09-20 06:36:55,949 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.268e+01 8.644e+01 9.308e+01 9.912e+01 1.990e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-20 06:37:06,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=833720.0, ans=0.125 +2024-09-20 06:37:12,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=833720.0, ans=0.125 +2024-09-20 06:37:12,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=833720.0, ans=0.0 +2024-09-20 06:37:12,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=833720.0, ans=0.125 +2024-09-20 06:37:27,733 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=833760.0, ans=0.1 +2024-09-20 06:37:36,483 INFO [train.py:1198] (1/2) Epoch 47, batch 300, loss[loss=0.2488, ctc_loss=0.1191, cr_loss=0.3729, attn_decoder_loss=0.2549, over 29517.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1086, cr_loss=0.3477, attn_decoder_loss=0.2366, over 4510830.44 frames. ], batch size: 92, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:38:05,322 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=833880.0, ans=0.0 +2024-09-20 06:38:28,637 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=833920.0, ans=0.125 +2024-09-20 06:38:45,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=833960.0, ans=0.0 +2024-09-20 06:38:49,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=833960.0, ans=0.2 +2024-09-20 06:38:50,337 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=21.07 vs. limit=22.5 +2024-09-20 06:38:54,104 INFO [train.py:1198] (1/2) Epoch 47, batch 350, loss[loss=0.2044, ctc_loss=0.08448, cr_loss=0.2985, attn_decoder_loss=0.2111, over 29320.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1089, cr_loss=0.3489, attn_decoder_loss=0.2372, over 4795839.69 frames. ], batch size: 71, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:38:57,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=8.42 vs. limit=15.0 +2024-09-20 06:39:00,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=834000.0, ans=0.09899494936611666 +2024-09-20 06:39:11,030 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=834040.0, ans=0.125 +2024-09-20 06:39:12,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.50 vs. limit=22.5 +2024-09-20 06:39:28,606 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.657e+01 9.081e+01 9.524e+01 1.810e+02, threshold=1.816e+02, percent-clipped=0.0 +2024-09-20 06:39:32,799 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.65 vs. limit=15.0 +2024-09-20 06:39:58,807 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=834160.0, ans=0.0 +2024-09-20 06:40:08,858 INFO [train.py:1198] (1/2) Epoch 47, batch 400, loss[loss=0.2372, ctc_loss=0.1081, cr_loss=0.3336, attn_decoder_loss=0.2441, over 29698.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1087, cr_loss=0.3478, attn_decoder_loss=0.237, over 5026323.41 frames. ], batch size: 82, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:40:12,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=834200.0, ans=0.125 +2024-09-20 06:40:14,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.30 vs. limit=6.0 +2024-09-20 06:40:32,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=834240.0, ans=0.125 +2024-09-20 06:40:35,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=834240.0, ans=0.125 +2024-09-20 06:40:48,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=834280.0, ans=0.0 +2024-09-20 06:41:12,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=834360.0, ans=0.125 +2024-09-20 06:41:12,221 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=834360.0, ans=0.0 +2024-09-20 06:41:15,291 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:41:27,380 INFO [train.py:1198] (1/2) Epoch 47, batch 450, loss[loss=0.2342, ctc_loss=0.1077, cr_loss=0.3472, attn_decoder_loss=0.2406, over 29710.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1087, cr_loss=0.3481, attn_decoder_loss=0.2373, over 5190305.19 frames. ], batch size: 83, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:41:42,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=834440.0, ans=0.1 +2024-09-20 06:41:53,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=834440.0, ans=0.0 +2024-09-20 06:41:54,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=834440.0, ans=0.04949747468305833 +2024-09-20 06:41:54,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=834440.0, ans=0.025 +2024-09-20 06:42:02,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=834480.0, ans=0.0 +2024-09-20 06:42:03,669 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.609e+01 9.172e+01 9.678e+01 2.074e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-20 06:42:16,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=834520.0, ans=0.125 +2024-09-20 06:42:32,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=834560.0, ans=0.09899494936611666 +2024-09-20 06:42:45,360 INFO [train.py:1198] (1/2) Epoch 47, batch 500, loss[loss=0.25, ctc_loss=0.1205, cr_loss=0.3752, attn_decoder_loss=0.2561, over 29464.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1081, cr_loss=0.3469, attn_decoder_loss=0.2365, over 5333240.99 frames. ], batch size: 94, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:42:53,426 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=834600.0, ans=0.125 +2024-09-20 06:42:59,392 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=834640.0, ans=0.125 +2024-09-20 06:43:11,274 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=834640.0, ans=0.0 +2024-09-20 06:43:37,937 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=4.83 vs. limit=12.0 +2024-09-20 06:43:46,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten.whitening_limit, batch_count=834760.0, ans=15.0 +2024-09-20 06:43:56,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.40 vs. limit=22.5 +2024-09-20 06:44:00,791 INFO [train.py:1198] (1/2) Epoch 47, batch 550, loss[loss=0.2449, ctc_loss=0.1152, cr_loss=0.3672, attn_decoder_loss=0.2511, over 28831.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.108, cr_loss=0.3459, attn_decoder_loss=0.2363, over 5423609.92 frames. ], batch size: 104, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:44:18,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=834840.0, ans=0.1 +2024-09-20 06:44:38,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=834880.0, ans=0.2 +2024-09-20 06:44:39,385 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.709e+01 8.688e+01 9.011e+01 9.708e+01 1.487e+02, threshold=1.802e+02, percent-clipped=0.0 +2024-09-20 06:45:04,804 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=21.06 vs. limit=22.5 +2024-09-20 06:45:07,528 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=18.21 vs. limit=22.5 +2024-09-20 06:45:18,979 INFO [train.py:1198] (1/2) Epoch 47, batch 600, loss[loss=0.2484, ctc_loss=0.1257, cr_loss=0.3871, attn_decoder_loss=0.2535, over 29230.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1084, cr_loss=0.3471, attn_decoder_loss=0.2369, over 5510235.99 frames. ], batch size: 100, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:45:20,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=6.76 vs. limit=15.0 +2024-09-20 06:45:35,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=835040.0, ans=0.125 +2024-09-20 06:45:48,605 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn2.whiten, num_groups=1, num_channels=192, metric=8.31 vs. limit=22.5 +2024-09-20 06:46:02,924 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=835120.0, ans=10.0 +2024-09-20 06:46:04,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=835120.0, ans=10.0 +2024-09-20 06:46:22,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=835160.0, ans=0.125 +2024-09-20 06:46:36,765 INFO [train.py:1198] (1/2) Epoch 47, batch 650, loss[loss=0.233, ctc_loss=0.1045, cr_loss=0.3493, attn_decoder_loss=0.2396, over 29769.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1077, cr_loss=0.3459, attn_decoder_loss=0.2361, over 5586914.79 frames. ], batch size: 81, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:47:13,109 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.705e+01 8.697e+01 9.175e+01 9.724e+01 1.599e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 06:47:13,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=835280.0, ans=0.125 +2024-09-20 06:47:13,446 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=835280.0, ans=0.0 +2024-09-20 06:47:21,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=835320.0, ans=0.1 +2024-09-20 06:47:23,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=835320.0, ans=0.125 +2024-09-20 06:47:28,407 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:47:29,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=835320.0, ans=0.125 +2024-09-20 06:47:52,257 INFO [train.py:1198] (1/2) Epoch 47, batch 700, loss[loss=0.231, ctc_loss=0.1108, cr_loss=0.3502, attn_decoder_loss=0.2365, over 29527.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2366, over 5637194.93 frames. ], batch size: 76, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:48:02,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=835400.0, ans=0.0 +2024-09-20 06:48:02,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=835400.0, ans=0.0 +2024-09-20 06:48:26,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.53 vs. limit=22.5 +2024-09-20 06:48:29,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=835480.0, ans=0.125 +2024-09-20 06:48:29,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=835480.0, ans=0.125 +2024-09-20 06:48:47,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=835520.0, ans=0.2 +2024-09-20 06:48:56,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=835560.0, ans=0.125 +2024-09-20 06:49:00,152 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.82 vs. limit=22.5 +2024-09-20 06:49:09,602 INFO [train.py:1198] (1/2) Epoch 47, batch 750, loss[loss=0.2404, ctc_loss=0.1116, cr_loss=0.366, attn_decoder_loss=0.2466, over 29674.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1081, cr_loss=0.3458, attn_decoder_loss=0.2365, over 5675717.48 frames. ], batch size: 82, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:49:11,370 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=835600.0, ans=0.125 +2024-09-20 06:49:14,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=835600.0, ans=0.125 +2024-09-20 06:49:17,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=835600.0, ans=0.1 +2024-09-20 06:49:27,114 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.01 vs. limit=22.5 +2024-09-20 06:49:29,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=835640.0, ans=0.125 +2024-09-20 06:49:42,617 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:49:45,339 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.511e+01 8.705e+01 9.081e+01 9.642e+01 1.954e+02, threshold=1.816e+02, percent-clipped=1.0 +2024-09-20 06:50:06,996 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=835720.0, ans=0.2 +2024-09-20 06:50:15,370 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.08 vs. limit=15.0 +2024-09-20 06:50:24,784 INFO [train.py:1198] (1/2) Epoch 47, batch 800, loss[loss=0.2047, ctc_loss=0.07926, cr_loss=0.2793, attn_decoder_loss=0.2125, over 29590.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.108, cr_loss=0.3459, attn_decoder_loss=0.2365, over 5706752.51 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 32.0 +2024-09-20 06:50:31,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=835800.0, ans=0.0 +2024-09-20 06:51:03,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=835880.0, ans=0.0 +2024-09-20 06:51:07,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=835880.0, ans=0.0 +2024-09-20 06:51:30,571 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=835960.0, ans=0.95 +2024-09-20 06:51:37,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=835960.0, ans=0.125 +2024-09-20 06:51:39,737 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.74 vs. limit=12.0 +2024-09-20 06:51:42,575 INFO [train.py:1198] (1/2) Epoch 47, batch 850, loss[loss=0.2417, ctc_loss=0.1105, cr_loss=0.3467, attn_decoder_loss=0.2486, over 29709.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1078, cr_loss=0.3455, attn_decoder_loss=0.2364, over 5736275.39 frames. ], batch size: 89, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 06:51:45,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=836000.0, ans=0.0 +2024-09-20 06:52:05,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.73 vs. limit=15.0 +2024-09-20 06:52:10,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=836040.0, ans=22.5 +2024-09-20 06:52:22,245 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.573e+01 8.624e+01 9.106e+01 9.735e+01 2.135e+02, threshold=1.821e+02, percent-clipped=1.0 +2024-09-20 06:52:59,092 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=836200.0, ans=0.125 +2024-09-20 06:53:00,275 INFO [train.py:1198] (1/2) Epoch 47, batch 900, loss[loss=0.2041, ctc_loss=0.08633, cr_loss=0.301, attn_decoder_loss=0.2105, over 29618.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1079, cr_loss=0.3459, attn_decoder_loss=0.2366, over 5741300.30 frames. ], batch size: 73, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:53:10,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=836200.0, ans=0.125 +2024-09-20 06:53:20,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=836240.0, ans=0.125 +2024-09-20 06:53:20,929 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.25 vs. limit=15.0 +2024-09-20 06:53:48,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=836320.0, ans=0.0 +2024-09-20 06:54:01,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=836360.0, ans=0.125 +2024-09-20 06:54:03,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=836360.0, ans=0.0 +2024-09-20 06:54:03,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=836360.0, ans=0.2 +2024-09-20 06:54:09,564 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=836360.0, ans=0.1 +2024-09-20 06:54:09,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=836360.0, ans=0.125 +2024-09-20 06:54:15,111 INFO [train.py:1198] (1/2) Epoch 47, batch 950, loss[loss=0.2172, ctc_loss=0.09891, cr_loss=0.3287, attn_decoder_loss=0.223, over 29518.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1079, cr_loss=0.3458, attn_decoder_loss=0.2367, over 5742430.88 frames. ], batch size: 74, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:54:29,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=836400.0, ans=0.125 +2024-09-20 06:54:31,646 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.47 vs. limit=22.5 +2024-09-20 06:54:37,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=836440.0, ans=0.0 +2024-09-20 06:54:49,210 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 06:54:52,218 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.max_abs, batch_count=836480.0, ans=10.0 +2024-09-20 06:54:56,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.702e+01 9.182e+01 9.933e+01 3.090e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-20 06:55:32,625 INFO [train.py:1198] (1/2) Epoch 47, batch 1000, loss[loss=0.2237, ctc_loss=0.09996, cr_loss=0.334, attn_decoder_loss=0.23, over 29499.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1087, cr_loss=0.3476, attn_decoder_loss=0.2375, over 5735647.85 frames. ], batch size: 77, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:55:50,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=836640.0, ans=0.125 +2024-09-20 06:55:51,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=836640.0, ans=0.0 +2024-09-20 06:55:51,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=836640.0, ans=0.0 +2024-09-20 06:56:02,301 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=836640.0, ans=0.05 +2024-09-20 06:56:05,802 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.96 vs. limit=6.0 +2024-09-20 06:56:06,017 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.26 vs. limit=22.5 +2024-09-20 06:56:30,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=836720.0, ans=0.125 +2024-09-20 06:56:41,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=836760.0, ans=0.125 +2024-09-20 06:56:45,954 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=13.83 vs. limit=15.0 +2024-09-20 06:56:47,990 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.56 vs. limit=15.0 +2024-09-20 06:56:50,187 INFO [train.py:1198] (1/2) Epoch 47, batch 1050, loss[loss=0.232, ctc_loss=0.1063, cr_loss=0.3383, attn_decoder_loss=0.2385, over 29657.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1082, cr_loss=0.3467, attn_decoder_loss=0.2367, over 5745339.13 frames. ], batch size: 85, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:57:05,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=836840.0, ans=0.125 +2024-09-20 06:57:08,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=836840.0, ans=0.0 +2024-09-20 06:57:11,198 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-20 06:57:19,425 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=836880.0, ans=0.0 +2024-09-20 06:57:29,792 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.447e+01 8.620e+01 9.050e+01 9.577e+01 1.323e+02, threshold=1.810e+02, percent-clipped=0.0 +2024-09-20 06:57:30,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=836880.0, ans=0.125 +2024-09-20 06:57:32,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.45 vs. limit=15.0 +2024-09-20 06:57:39,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=836920.0, ans=0.0 +2024-09-20 06:57:42,072 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=836920.0, ans=0.0 +2024-09-20 06:58:05,273 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.40 vs. limit=15.0 +2024-09-20 06:58:05,800 INFO [train.py:1198] (1/2) Epoch 47, batch 1100, loss[loss=0.227, ctc_loss=0.1078, cr_loss=0.3398, attn_decoder_loss=0.2327, over 29463.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1081, cr_loss=0.346, attn_decoder_loss=0.2364, over 5756907.47 frames. ], batch size: 78, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 06:58:44,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=837080.0, ans=0.0 +2024-09-20 06:59:13,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=837160.0, ans=0.1 +2024-09-20 06:59:16,452 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.81 vs. limit=15.0 +2024-09-20 06:59:17,633 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=837160.0, ans=0.5 +2024-09-20 06:59:23,431 INFO [train.py:1198] (1/2) Epoch 47, batch 1150, loss[loss=0.2289, ctc_loss=0.1145, cr_loss=0.3569, attn_decoder_loss=0.2336, over 29441.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1082, cr_loss=0.346, attn_decoder_loss=0.2365, over 5754214.53 frames. ], batch size: 78, lr: 2.35e-03, grad_scale: 8.0 +2024-09-20 07:00:04,972 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.978e+01 8.402e+01 9.011e+01 9.514e+01 2.556e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 07:00:26,504 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=8.02 vs. limit=15.0 +2024-09-20 07:00:30,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=837360.0, ans=0.2 +2024-09-20 07:00:40,659 INFO [train.py:1198] (1/2) Epoch 47, batch 1200, loss[loss=0.2427, ctc_loss=0.1123, cr_loss=0.3592, attn_decoder_loss=0.2492, over 29674.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1083, cr_loss=0.3465, attn_decoder_loss=0.2373, over 5746856.94 frames. ], batch size: 85, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 07:00:48,505 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=837400.0, ans=0.025 +2024-09-20 07:01:33,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=837520.0, ans=0.125 +2024-09-20 07:01:48,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_positive, batch_count=837560.0, ans=0.05 +2024-09-20 07:01:56,179 INFO [train.py:1198] (1/2) Epoch 47, batch 1250, loss[loss=0.2471, ctc_loss=0.1204, cr_loss=0.3626, attn_decoder_loss=0.2531, over 29517.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1086, cr_loss=0.3475, attn_decoder_loss=0.2378, over 5774631.55 frames. ], batch size: 92, lr: 2.35e-03, grad_scale: 16.0 +2024-09-20 07:02:09,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=837600.0, ans=0.0 +2024-09-20 07:02:24,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=837640.0, ans=0.0 +2024-09-20 07:02:30,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=837680.0, ans=0.07 +2024-09-20 07:02:36,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.63 vs. limit=10.0 +2024-09-20 07:02:37,736 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.731e+01 8.615e+01 9.145e+01 9.650e+01 1.333e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-20 07:03:13,893 INFO [train.py:1198] (1/2) Epoch 47, batch 1300, loss[loss=0.2385, ctc_loss=0.108, cr_loss=0.3388, attn_decoder_loss=0.2455, over 28150.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2371, over 5777702.82 frames. ], batch size: 111, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:03:17,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=837800.0, ans=0.0 +2024-09-20 07:03:20,319 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=837800.0, ans=0.09899494936611666 +2024-09-20 07:03:26,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=837800.0, ans=0.0 +2024-09-20 07:03:33,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=837840.0, ans=0.125 +2024-09-20 07:04:12,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=837920.0, ans=0.125 +2024-09-20 07:04:30,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=838000.0, ans=10.0 +2024-09-20 07:04:31,982 INFO [train.py:1198] (1/2) Epoch 47, batch 1350, loss[loss=0.2341, ctc_loss=0.1104, cr_loss=0.3488, attn_decoder_loss=0.2401, over 29756.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3459, attn_decoder_loss=0.2369, over 5795627.16 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:04:42,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=838000.0, ans=0.125 +2024-09-20 07:05:04,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=838080.0, ans=0.125 +2024-09-20 07:05:04,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=838080.0, ans=0.0 +2024-09-20 07:05:09,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.91 vs. limit=12.0 +2024-09-20 07:05:10,470 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.518e+01 8.374e+01 8.876e+01 9.629e+01 1.227e+02, threshold=1.775e+02, percent-clipped=0.0 +2024-09-20 07:05:36,196 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=838160.0, ans=0.0 +2024-09-20 07:05:46,448 INFO [train.py:1198] (1/2) Epoch 47, batch 1400, loss[loss=0.214, ctc_loss=0.09901, cr_loss=0.3121, attn_decoder_loss=0.2198, over 29564.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3463, attn_decoder_loss=0.2368, over 5806366.81 frames. ], batch size: 69, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:05:50,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=4.09 vs. limit=12.0 +2024-09-20 07:05:54,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=838200.0, ans=0.1 +2024-09-20 07:07:03,987 INFO [train.py:1198] (1/2) Epoch 47, batch 1450, loss[loss=0.2497, ctc_loss=0.125, cr_loss=0.3956, attn_decoder_loss=0.2548, over 29462.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2371, over 5803532.26 frames. ], batch size: 94, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:07:45,071 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.353e+01 8.627e+01 9.137e+01 9.746e+01 6.249e+02, threshold=1.827e+02, percent-clipped=1.0 +2024-09-20 07:07:46,106 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.84 vs. limit=22.5 +2024-09-20 07:07:49,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=838520.0, ans=0.07 +2024-09-20 07:07:52,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=838520.0, ans=0.0 +2024-09-20 07:07:54,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=838520.0, ans=0.125 +2024-09-20 07:08:03,762 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.34 vs. limit=12.0 +2024-09-20 07:08:05,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=838560.0, ans=0.0 +2024-09-20 07:08:08,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=838560.0, ans=0.125 +2024-09-20 07:08:16,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=838560.0, ans=0.2 +2024-09-20 07:08:20,887 INFO [train.py:1198] (1/2) Epoch 47, batch 1500, loss[loss=0.2455, ctc_loss=0.115, cr_loss=0.3698, attn_decoder_loss=0.2518, over 29625.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1081, cr_loss=0.3468, attn_decoder_loss=0.2372, over 5805456.49 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:08:21,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=838600.0, ans=0.0 +2024-09-20 07:08:31,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.scale_min, batch_count=838600.0, ans=0.2 +2024-09-20 07:08:40,156 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=4.99 vs. limit=15.0 +2024-09-20 07:08:40,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=838640.0, ans=0.1 +2024-09-20 07:09:05,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=7.20 vs. limit=12.0 +2024-09-20 07:09:08,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=838720.0, ans=0.125 +2024-09-20 07:09:24,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=838760.0, ans=0.125 +2024-09-20 07:09:30,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=838760.0, ans=0.1 +2024-09-20 07:09:36,604 INFO [train.py:1198] (1/2) Epoch 47, batch 1550, loss[loss=0.2523, ctc_loss=0.1284, cr_loss=0.4062, attn_decoder_loss=0.2571, over 29486.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1085, cr_loss=0.347, attn_decoder_loss=0.2372, over 5781114.62 frames. ], batch size: 90, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:09:38,389 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=838800.0, ans=0.5 +2024-09-20 07:10:17,555 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.425e+01 8.567e+01 9.122e+01 9.785e+01 2.024e+02, threshold=1.824e+02, percent-clipped=1.0 +2024-09-20 07:10:17,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=838880.0, ans=0.1 +2024-09-20 07:10:38,352 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.73 vs. limit=15.0 +2024-09-20 07:10:49,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=838960.0, ans=0.1 +2024-09-20 07:10:53,713 INFO [train.py:1198] (1/2) Epoch 47, batch 1600, loss[loss=0.2333, ctc_loss=0.1032, cr_loss=0.3402, attn_decoder_loss=0.2402, over 29665.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.3468, attn_decoder_loss=0.237, over 5763991.85 frames. ], batch size: 85, lr: 2.34e-03, grad_scale: 32.0 +2024-09-20 07:11:35,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.min_positive, batch_count=839080.0, ans=0.05 +2024-09-20 07:11:38,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=839080.0, ans=0.0 +2024-09-20 07:11:50,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=839120.0, ans=0.125 +2024-09-20 07:11:53,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=839120.0, ans=0.1 +2024-09-20 07:11:56,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=839160.0, ans=0.0 +2024-09-20 07:12:08,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=839160.0, ans=0.125 +2024-09-20 07:12:11,183 INFO [train.py:1198] (1/2) Epoch 47, batch 1650, loss[loss=0.2323, ctc_loss=0.1024, cr_loss=0.3277, attn_decoder_loss=0.2395, over 29720.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1083, cr_loss=0.3461, attn_decoder_loss=0.2368, over 5759378.56 frames. ], batch size: 89, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:12:16,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=839200.0, ans=0.1 +2024-09-20 07:12:47,655 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=839280.0, ans=0.0 +2024-09-20 07:12:52,012 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.317e+01 8.634e+01 9.046e+01 9.641e+01 2.969e+02, threshold=1.809e+02, percent-clipped=2.0 +2024-09-20 07:12:56,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=839320.0, ans=0.2 +2024-09-20 07:12:58,512 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=839320.0, ans=0.125 +2024-09-20 07:13:01,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=839320.0, ans=0.1 +2024-09-20 07:13:02,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=839320.0, ans=0.125 +2024-09-20 07:13:26,658 INFO [train.py:1198] (1/2) Epoch 47, batch 1700, loss[loss=0.2112, ctc_loss=0.09415, cr_loss=0.3306, attn_decoder_loss=0.2168, over 29551.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1079, cr_loss=0.3458, attn_decoder_loss=0.2367, over 5780806.44 frames. ], batch size: 69, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:13:31,438 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=839400.0, ans=0.125 +2024-09-20 07:13:37,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=839400.0, ans=0.125 +2024-09-20 07:13:43,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=839440.0, ans=0.1 +2024-09-20 07:13:46,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=839440.0, ans=0.125 +2024-09-20 07:14:21,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.60 vs. limit=15.0 +2024-09-20 07:14:34,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=839560.0, ans=0.0 +2024-09-20 07:14:43,705 INFO [train.py:1198] (1/2) Epoch 47, batch 1750, loss[loss=0.2155, ctc_loss=0.09914, cr_loss=0.3288, attn_decoder_loss=0.2211, over 29354.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.108, cr_loss=0.3461, attn_decoder_loss=0.2367, over 5788380.40 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:14:45,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=839600.0, ans=0.125 +2024-09-20 07:14:46,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=839600.0, ans=0.07 +2024-09-20 07:14:50,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=839600.0, ans=0.2 +2024-09-20 07:15:25,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=839680.0, ans=0.125 +2024-09-20 07:15:26,380 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.597e+01 9.141e+01 9.828e+01 1.386e+02, threshold=1.828e+02, percent-clipped=0.0 +2024-09-20 07:15:27,213 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.97 vs. limit=10.0 +2024-09-20 07:15:31,220 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=839720.0, ans=0.0 +2024-09-20 07:15:46,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=839760.0, ans=0.2 +2024-09-20 07:16:00,711 INFO [train.py:1198] (1/2) Epoch 47, batch 1800, loss[loss=0.2296, ctc_loss=0.1023, cr_loss=0.3327, attn_decoder_loss=0.2364, over 29685.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5790598.17 frames. ], batch size: 83, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:16:10,308 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=839800.0, ans=0.0 +2024-09-20 07:16:21,317 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=12.51 vs. limit=15.0 +2024-09-20 07:16:22,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=839840.0, ans=0.1 +2024-09-20 07:16:44,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=839920.0, ans=0.025 +2024-09-20 07:16:50,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=839920.0, ans=0.0 +2024-09-20 07:16:52,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=839920.0, ans=0.125 +2024-09-20 07:17:16,945 INFO [train.py:1198] (1/2) Epoch 47, batch 1850, loss[loss=0.2388, ctc_loss=0.1093, cr_loss=0.3339, attn_decoder_loss=0.2457, over 29616.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3464, attn_decoder_loss=0.2368, over 5796729.90 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:17:21,117 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.16 vs. limit=15.0 +2024-09-20 07:17:23,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=840000.0, ans=0.0 +2024-09-20 07:17:39,834 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:18:01,053 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.710e+01 8.631e+01 9.188e+01 9.651e+01 1.430e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 07:18:01,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=840080.0, ans=0.125 +2024-09-20 07:18:09,148 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.60 vs. limit=15.0 +2024-09-20 07:18:34,071 INFO [train.py:1198] (1/2) Epoch 47, batch 1900, loss[loss=0.2425, ctc_loss=0.114, cr_loss=0.3664, attn_decoder_loss=0.2486, over 29699.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1082, cr_loss=0.3473, attn_decoder_loss=0.2373, over 5804423.29 frames. ], batch size: 89, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:18:40,324 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=840200.0, ans=0.0 +2024-09-20 07:18:50,881 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=840240.0, ans=0.125 +2024-09-20 07:18:54,027 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=840240.0, ans=0.0 +2024-09-20 07:18:54,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.16 vs. limit=22.5 +2024-09-20 07:19:00,993 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=9.76 vs. limit=10.0 +2024-09-20 07:19:11,292 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=840280.0, ans=0.025 +2024-09-20 07:19:21,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=840320.0, ans=0.09899494936611666 +2024-09-20 07:19:30,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=840320.0, ans=0.025 +2024-09-20 07:19:34,243 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=3.33 vs. limit=10.0 +2024-09-20 07:19:39,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=840360.0, ans=0.125 +2024-09-20 07:19:48,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=840360.0, ans=0.025 +2024-09-20 07:19:51,435 INFO [train.py:1198] (1/2) Epoch 47, batch 1950, loss[loss=0.223, ctc_loss=0.0948, cr_loss=0.3345, attn_decoder_loss=0.2298, over 29436.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1087, cr_loss=0.3481, attn_decoder_loss=0.2382, over 5818979.00 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:20:05,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=840440.0, ans=0.0 +2024-09-20 07:20:33,258 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.893e+01 9.385e+01 9.948e+01 2.061e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-20 07:20:53,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=840560.0, ans=0.0 +2024-09-20 07:20:57,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=840560.0, ans=0.125 +2024-09-20 07:21:06,476 INFO [train.py:1198] (1/2) Epoch 47, batch 2000, loss[loss=0.1948, ctc_loss=0.07885, cr_loss=0.2782, attn_decoder_loss=0.2015, over 29317.00 frames. ], tot_loss[loss=0.2327, ctc_loss=0.1092, cr_loss=0.3489, attn_decoder_loss=0.2387, over 5796573.89 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:21:25,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=840640.0, ans=0.0 +2024-09-20 07:21:38,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=840680.0, ans=0.125 +2024-09-20 07:21:54,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=840720.0, ans=0.2 +2024-09-20 07:22:17,200 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.50 vs. limit=15.0 +2024-09-20 07:22:17,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.81 vs. limit=15.0 +2024-09-20 07:22:18,153 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:22:18,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.11 vs. limit=15.0 +2024-09-20 07:22:24,171 INFO [train.py:1198] (1/2) Epoch 47, batch 2050, loss[loss=0.1998, ctc_loss=0.08284, cr_loss=0.3003, attn_decoder_loss=0.2061, over 29407.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1086, cr_loss=0.348, attn_decoder_loss=0.2375, over 5788815.04 frames. ], batch size: 70, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:22:25,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=840800.0, ans=0.125 +2024-09-20 07:22:27,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=840800.0, ans=0.025 +2024-09-20 07:22:43,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.77 vs. limit=6.0 +2024-09-20 07:22:53,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=840840.0, ans=0.0 +2024-09-20 07:22:57,330 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.60 vs. limit=22.5 +2024-09-20 07:22:57,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=840880.0, ans=0.0 +2024-09-20 07:23:09,566 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.215e+01 8.643e+01 9.120e+01 9.477e+01 1.642e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 07:23:21,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=840920.0, ans=0.2 +2024-09-20 07:23:40,688 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.33 vs. limit=6.0 +2024-09-20 07:23:41,217 INFO [train.py:1198] (1/2) Epoch 47, batch 2100, loss[loss=0.2224, ctc_loss=0.09714, cr_loss=0.3258, attn_decoder_loss=0.2291, over 29738.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1083, cr_loss=0.3473, attn_decoder_loss=0.237, over 5800471.34 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:23:53,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=841000.0, ans=0.125 +2024-09-20 07:24:01,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=841040.0, ans=0.125 +2024-09-20 07:24:05,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=841040.0, ans=0.125 +2024-09-20 07:24:07,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=841040.0, ans=0.1 +2024-09-20 07:24:11,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=841080.0, ans=0.0 +2024-09-20 07:24:34,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=841120.0, ans=0.025 +2024-09-20 07:24:44,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=841160.0, ans=0.125 +2024-09-20 07:24:50,489 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=841160.0, ans=0.0 +2024-09-20 07:24:54,282 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.59 vs. limit=6.0 +2024-09-20 07:24:56,333 INFO [train.py:1198] (1/2) Epoch 47, batch 2150, loss[loss=0.2243, ctc_loss=0.1095, cr_loss=0.3575, attn_decoder_loss=0.2291, over 29452.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1082, cr_loss=0.3472, attn_decoder_loss=0.2366, over 5815244.90 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:24:58,872 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.77 vs. limit=22.5 +2024-09-20 07:25:04,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=841200.0, ans=0.125 +2024-09-20 07:25:31,434 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=841280.0, ans=0.025 +2024-09-20 07:25:39,988 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 8.572e+01 9.031e+01 9.738e+01 1.571e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-20 07:25:47,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=841320.0, ans=0.04949747468305833 +2024-09-20 07:26:01,985 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:26:03,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=841360.0, ans=0.0 +2024-09-20 07:26:13,831 INFO [train.py:1198] (1/2) Epoch 47, batch 2200, loss[loss=0.24, ctc_loss=0.1069, cr_loss=0.3464, attn_decoder_loss=0.2471, over 29628.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1081, cr_loss=0.347, attn_decoder_loss=0.2366, over 5811388.18 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:26:14,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=841400.0, ans=0.0 +2024-09-20 07:26:23,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=841400.0, ans=0.025 +2024-09-20 07:26:35,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.86 vs. limit=15.0 +2024-09-20 07:26:51,102 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=841480.0, ans=0.0 +2024-09-20 07:26:54,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=841480.0, ans=0.125 +2024-09-20 07:27:09,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=841520.0, ans=0.125 +2024-09-20 07:27:15,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=841560.0, ans=0.0 +2024-09-20 07:27:15,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=15.0 +2024-09-20 07:27:31,970 INFO [train.py:1198] (1/2) Epoch 47, batch 2250, loss[loss=0.2397, ctc_loss=0.1139, cr_loss=0.3683, attn_decoder_loss=0.2455, over 29703.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1081, cr_loss=0.347, attn_decoder_loss=0.2367, over 5810677.46 frames. ], batch size: 82, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:27:36,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=841600.0, ans=0.95 +2024-09-20 07:27:50,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.87 vs. limit=15.0 +2024-09-20 07:27:57,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=841640.0, ans=0.125 +2024-09-20 07:27:57,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=841640.0, ans=0.125 +2024-09-20 07:27:57,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=841640.0, ans=0.0 +2024-09-20 07:28:03,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=841680.0, ans=0.1 +2024-09-20 07:28:10,226 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.90 vs. limit=22.5 +2024-09-20 07:28:15,524 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.560e+01 8.630e+01 9.023e+01 9.514e+01 2.412e+02, threshold=1.805e+02, percent-clipped=2.0 +2024-09-20 07:28:22,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 07:28:47,111 INFO [train.py:1198] (1/2) Epoch 47, batch 2300, loss[loss=0.2087, ctc_loss=0.09238, cr_loss=0.3021, attn_decoder_loss=0.2149, over 29338.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.107, cr_loss=0.3445, attn_decoder_loss=0.2356, over 5798047.48 frames. ], batch size: 71, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:28:53,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=841800.0, ans=0.1 +2024-09-20 07:29:20,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=841880.0, ans=0.5 +2024-09-20 07:29:23,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=841880.0, ans=0.125 +2024-09-20 07:29:25,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=841880.0, ans=0.0 +2024-09-20 07:29:27,130 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=6.43 vs. limit=15.0 +2024-09-20 07:29:30,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.45 vs. limit=22.5 +2024-09-20 07:29:34,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:29:41,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=841920.0, ans=0.0 +2024-09-20 07:30:04,681 INFO [train.py:1198] (1/2) Epoch 47, batch 2350, loss[loss=0.2489, ctc_loss=0.1256, cr_loss=0.3929, attn_decoder_loss=0.2539, over 29704.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1073, cr_loss=0.345, attn_decoder_loss=0.2357, over 5803719.48 frames. ], batch size: 83, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:30:04,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=842000.0, ans=0.125 +2024-09-20 07:30:06,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=842000.0, ans=0.125 +2024-09-20 07:30:37,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=842080.0, ans=0.125 +2024-09-20 07:30:47,584 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=842080.0, ans=0.125 +2024-09-20 07:30:50,388 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.856e+01 8.800e+01 9.287e+01 9.916e+01 3.475e+02, threshold=1.857e+02, percent-clipped=2.0 +2024-09-20 07:30:50,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=842120.0, ans=0.1 +2024-09-20 07:30:59,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=842120.0, ans=0.125 +2024-09-20 07:31:11,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=842160.0, ans=0.125 +2024-09-20 07:31:22,266 INFO [train.py:1198] (1/2) Epoch 47, batch 2400, loss[loss=0.2262, ctc_loss=0.1051, cr_loss=0.3515, attn_decoder_loss=0.2319, over 29550.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1076, cr_loss=0.3452, attn_decoder_loss=0.2361, over 5807582.72 frames. ], batch size: 76, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:31:25,521 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.min_abs, batch_count=842200.0, ans=0.5 +2024-09-20 07:32:05,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=842280.0, ans=0.125 +2024-09-20 07:32:05,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=6.16 vs. limit=15.0 +2024-09-20 07:32:11,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=842320.0, ans=0.1 +2024-09-20 07:32:12,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=842320.0, ans=0.0 +2024-09-20 07:32:14,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.11 vs. limit=15.0 +2024-09-20 07:32:15,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=842320.0, ans=0.125 +2024-09-20 07:32:38,369 INFO [train.py:1198] (1/2) Epoch 47, batch 2450, loss[loss=0.2237, ctc_loss=0.09652, cr_loss=0.3208, attn_decoder_loss=0.2307, over 29692.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5785282.05 frames. ], batch size: 82, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:32:52,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=842440.0, ans=0.0 +2024-09-20 07:33:01,109 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=842440.0, ans=0.125 +2024-09-20 07:33:02,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=842440.0, ans=0.05 +2024-09-20 07:33:11,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=842480.0, ans=0.0 +2024-09-20 07:33:15,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=842480.0, ans=0.1 +2024-09-20 07:33:21,570 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.575e+01 8.574e+01 9.096e+01 9.798e+01 1.804e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 07:33:25,064 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=842520.0, ans=0.125 +2024-09-20 07:33:26,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=842520.0, ans=0.0 +2024-09-20 07:33:51,665 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.32 vs. limit=22.5 +2024-09-20 07:33:55,466 INFO [train.py:1198] (1/2) Epoch 47, batch 2500, loss[loss=0.243, ctc_loss=0.1114, cr_loss=0.3604, attn_decoder_loss=0.2496, over 29623.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1085, cr_loss=0.347, attn_decoder_loss=0.2371, over 5795373.40 frames. ], batch size: 86, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:33:59,207 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.46 vs. limit=15.0 +2024-09-20 07:34:07,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=842600.0, ans=0.0 +2024-09-20 07:34:12,933 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.90 vs. limit=15.0 +2024-09-20 07:34:19,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=842640.0, ans=0.0 +2024-09-20 07:34:39,064 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.53 vs. limit=10.0 +2024-09-20 07:34:41,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=842720.0, ans=0.125 +2024-09-20 07:34:51,717 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.89 vs. limit=12.0 +2024-09-20 07:35:13,287 INFO [train.py:1198] (1/2) Epoch 47, batch 2550, loss[loss=0.2089, ctc_loss=0.09904, cr_loss=0.3198, attn_decoder_loss=0.214, over 29347.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1083, cr_loss=0.3473, attn_decoder_loss=0.237, over 5797882.23 frames. ], batch size: 67, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:35:22,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=842800.0, ans=0.0 +2024-09-20 07:35:48,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=842880.0, ans=0.125 +2024-09-20 07:35:48,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=842880.0, ans=10.0 +2024-09-20 07:35:52,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=842880.0, ans=0.2 +2024-09-20 07:35:56,712 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.552e+01 8.685e+01 9.047e+01 9.681e+01 1.454e+02, threshold=1.809e+02, percent-clipped=0.0 +2024-09-20 07:36:09,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=842920.0, ans=0.1 +2024-09-20 07:36:24,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=842960.0, ans=0.0 +2024-09-20 07:36:28,667 INFO [train.py:1198] (1/2) Epoch 47, batch 2600, loss[loss=0.2181, ctc_loss=0.09847, cr_loss=0.3345, attn_decoder_loss=0.224, over 29437.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1084, cr_loss=0.3475, attn_decoder_loss=0.2373, over 5795162.02 frames. ], batch size: 78, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:36:35,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=843000.0, ans=0.125 +2024-09-20 07:36:48,300 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=843040.0, ans=0.125 +2024-09-20 07:37:29,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=843160.0, ans=0.125 +2024-09-20 07:37:30,870 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=15.93 vs. limit=22.5 +2024-09-20 07:37:41,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=843160.0, ans=0.125 +2024-09-20 07:37:46,292 INFO [train.py:1198] (1/2) Epoch 47, batch 2650, loss[loss=0.2446, ctc_loss=0.1076, cr_loss=0.3345, attn_decoder_loss=0.2524, over 29225.00 frames. ], tot_loss[loss=0.2318, ctc_loss=0.1085, cr_loss=0.348, attn_decoder_loss=0.2378, over 5801231.75 frames. ], batch size: 100, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:38:17,479 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=843280.0, ans=0.0 +2024-09-20 07:38:31,923 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.003e+01 8.688e+01 9.037e+01 9.488e+01 1.743e+02, threshold=1.807e+02, percent-clipped=0.0 +2024-09-20 07:38:36,821 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=843320.0, ans=0.0 +2024-09-20 07:38:39,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=843320.0, ans=0.0 +2024-09-20 07:38:44,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=843320.0, ans=0.125 +2024-09-20 07:39:01,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=843360.0, ans=0.125 +2024-09-20 07:39:03,846 INFO [train.py:1198] (1/2) Epoch 47, batch 2700, loss[loss=0.2387, ctc_loss=0.114, cr_loss=0.3541, attn_decoder_loss=0.2447, over 29545.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1085, cr_loss=0.348, attn_decoder_loss=0.2379, over 5797936.64 frames. ], batch size: 87, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:39:14,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=843400.0, ans=0.1 +2024-09-20 07:39:19,001 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=843440.0, ans=0.1 +2024-09-20 07:39:50,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=843520.0, ans=0.2 +2024-09-20 07:39:58,448 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=843520.0, ans=0.025 +2024-09-20 07:40:19,198 INFO [train.py:1198] (1/2) Epoch 47, batch 2750, loss[loss=0.2231, ctc_loss=0.1031, cr_loss=0.3428, attn_decoder_loss=0.2288, over 29518.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1078, cr_loss=0.3461, attn_decoder_loss=0.2367, over 5796591.13 frames. ], batch size: 75, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:40:21,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=843600.0, ans=0.125 +2024-09-20 07:41:02,932 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.503e+01 8.699e+01 9.178e+01 9.870e+01 7.766e+02, threshold=1.836e+02, percent-clipped=3.0 +2024-09-20 07:41:03,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=843720.0, ans=0.07 +2024-09-20 07:41:07,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=843720.0, ans=0.04949747468305833 +2024-09-20 07:41:15,428 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=843720.0, ans=0.1 +2024-09-20 07:41:37,201 INFO [train.py:1198] (1/2) Epoch 47, batch 2800, loss[loss=0.2466, ctc_loss=0.1266, cr_loss=0.364, attn_decoder_loss=0.2518, over 21324.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1083, cr_loss=0.3471, attn_decoder_loss=0.2371, over 5777920.77 frames. ], batch size: 210, lr: 2.34e-03, grad_scale: 32.0 +2024-09-20 07:42:08,038 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=843880.0, ans=0.2 +2024-09-20 07:42:17,158 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=843880.0, ans=0.1 +2024-09-20 07:42:18,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=843880.0, ans=0.1 +2024-09-20 07:42:36,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=843920.0, ans=0.125 +2024-09-20 07:42:44,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=843960.0, ans=0.125 +2024-09-20 07:42:44,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=843960.0, ans=0.1 +2024-09-20 07:42:46,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.const_attention_rate, batch_count=843960.0, ans=0.025 +2024-09-20 07:42:47,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=843960.0, ans=0.125 +2024-09-20 07:42:51,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=843960.0, ans=0.125 +2024-09-20 07:42:54,619 INFO [train.py:1198] (1/2) Epoch 47, batch 2850, loss[loss=0.2197, ctc_loss=0.09959, cr_loss=0.3302, attn_decoder_loss=0.2257, over 29516.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1085, cr_loss=0.3473, attn_decoder_loss=0.2373, over 5762153.15 frames. ], batch size: 77, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:42:57,866 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=844000.0, ans=0.0 +2024-09-20 07:43:19,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=844040.0, ans=0.125 +2024-09-20 07:43:26,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=844080.0, ans=0.125 +2024-09-20 07:43:38,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=844120.0, ans=0.0 +2024-09-20 07:43:39,705 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.599e+01 8.812e+01 9.340e+01 9.979e+01 3.635e+02, threshold=1.868e+02, percent-clipped=1.0 +2024-09-20 07:43:44,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=844120.0, ans=0.1 +2024-09-20 07:43:54,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=844160.0, ans=0.125 +2024-09-20 07:43:55,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=844160.0, ans=0.0 +2024-09-20 07:43:58,006 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=844160.0, ans=0.1 +2024-09-20 07:43:59,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=844160.0, ans=0.125 +2024-09-20 07:44:09,785 INFO [train.py:1198] (1/2) Epoch 47, batch 2900, loss[loss=0.2201, ctc_loss=0.09431, cr_loss=0.3127, attn_decoder_loss=0.2271, over 29424.00 frames. ], tot_loss[loss=0.2324, ctc_loss=0.1092, cr_loss=0.3492, attn_decoder_loss=0.2384, over 5788163.31 frames. ], batch size: 79, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:44:50,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=844280.0, ans=0.0 +2024-09-20 07:44:57,699 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.96 vs. limit=10.0 +2024-09-20 07:45:03,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-20 07:45:06,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=844320.0, ans=0.0 +2024-09-20 07:45:10,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=844360.0, ans=0.1 +2024-09-20 07:45:27,377 INFO [train.py:1198] (1/2) Epoch 47, batch 2950, loss[loss=0.2322, ctc_loss=0.1116, cr_loss=0.3679, attn_decoder_loss=0.2374, over 29553.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1085, cr_loss=0.3476, attn_decoder_loss=0.2372, over 5782957.83 frames. ], batch size: 75, lr: 2.34e-03, grad_scale: 16.0 +2024-09-20 07:45:27,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=844400.0, ans=0.0 +2024-09-20 07:45:52,533 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.41 vs. limit=22.5 +2024-09-20 07:46:14,755 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.670e+01 9.173e+01 9.775e+01 4.031e+02, threshold=1.835e+02, percent-clipped=2.0 +2024-09-20 07:46:45,076 INFO [train.py:1198] (1/2) Epoch 47, batch 3000, loss[loss=0.2359, ctc_loss=0.1156, cr_loss=0.3698, attn_decoder_loss=0.241, over 29751.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1085, cr_loss=0.3475, attn_decoder_loss=0.2371, over 5783310.10 frames. ], batch size: 81, lr: 2.34e-03, grad_scale: 8.0 +2024-09-20 07:46:45,076 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 07:47:03,445 INFO [train.py:1230] (1/2) Epoch 47, validation: loss=0.2127, ctc_loss=0.03692, cr_loss=6.538e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 07:47:03,446 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 07:47:28,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=844640.0, ans=0.125 +2024-09-20 07:47:29,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=844640.0, ans=0.125 +2024-09-20 07:47:56,096 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=10.24 vs. limit=15.0 +2024-09-20 07:48:16,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.prob, batch_count=844760.0, ans=0.125 +2024-09-20 07:48:19,549 INFO [train.py:1198] (1/2) Epoch 47, batch 3050, loss[loss=0.2379, ctc_loss=0.1217, cr_loss=0.3777, attn_decoder_loss=0.2424, over 29530.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1088, cr_loss=0.3484, attn_decoder_loss=0.2374, over 5777499.87 frames. ], batch size: 76, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:48:45,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=844840.0, ans=0.0 +2024-09-20 07:49:00,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=844880.0, ans=0.0 +2024-09-20 07:49:06,337 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.834e+01 8.765e+01 9.371e+01 9.890e+01 2.296e+02, threshold=1.874e+02, percent-clipped=1.0 +2024-09-20 07:49:26,620 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=844960.0, ans=0.0 +2024-09-20 07:49:29,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=844960.0, ans=0.125 +2024-09-20 07:49:38,885 INFO [train.py:1198] (1/2) Epoch 47, batch 3100, loss[loss=0.2432, ctc_loss=0.1193, cr_loss=0.3909, attn_decoder_loss=0.2483, over 29258.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1086, cr_loss=0.3479, attn_decoder_loss=0.237, over 5775988.04 frames. ], batch size: 100, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:49:58,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=845040.0, ans=0.125 +2024-09-20 07:49:59,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=845040.0, ans=0.125 +2024-09-20 07:50:05,082 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.31 vs. limit=6.0 +2024-09-20 07:50:08,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=845080.0, ans=0.125 +2024-09-20 07:50:15,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=845080.0, ans=0.1 +2024-09-20 07:50:19,410 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=845080.0, ans=0.0 +2024-09-20 07:50:33,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=845120.0, ans=0.1 +2024-09-20 07:50:37,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=845160.0, ans=0.125 +2024-09-20 07:50:38,116 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=12.02 vs. limit=15.0 +2024-09-20 07:50:54,270 INFO [train.py:1198] (1/2) Epoch 47, batch 3150, loss[loss=0.2458, ctc_loss=0.117, cr_loss=0.3763, attn_decoder_loss=0.2517, over 28852.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3483, attn_decoder_loss=0.237, over 5781760.61 frames. ], batch size: 104, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 07:50:56,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=845200.0, ans=0.125 +2024-09-20 07:51:20,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=845240.0, ans=0.125 +2024-09-20 07:51:30,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.prob, batch_count=845280.0, ans=0.125 +2024-09-20 07:51:40,981 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.436e+01 8.583e+01 9.160e+01 9.723e+01 3.463e+02, threshold=1.832e+02, percent-clipped=1.0 +2024-09-20 07:51:48,859 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=845320.0, ans=0.1 +2024-09-20 07:52:09,910 INFO [train.py:1198] (1/2) Epoch 47, batch 3200, loss[loss=0.2368, ctc_loss=0.119, cr_loss=0.3753, attn_decoder_loss=0.2415, over 29421.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1085, cr_loss=0.3474, attn_decoder_loss=0.2368, over 5792302.26 frames. ], batch size: 79, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:52:13,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=845400.0, ans=0.125 +2024-09-20 07:52:20,828 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=845400.0, ans=0.125 +2024-09-20 07:52:34,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=845440.0, ans=0.0 +2024-09-20 07:52:46,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=845480.0, ans=0.125 +2024-09-20 07:53:22,372 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=845560.0, ans=0.1 +2024-09-20 07:53:28,506 INFO [train.py:1198] (1/2) Epoch 47, batch 3250, loss[loss=0.2359, ctc_loss=0.1093, cr_loss=0.3397, attn_decoder_loss=0.2424, over 29705.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1088, cr_loss=0.348, attn_decoder_loss=0.2374, over 5799579.65 frames. ], batch size: 84, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:53:50,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=845640.0, ans=0.125 +2024-09-20 07:53:57,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=845640.0, ans=0.0 +2024-09-20 07:53:58,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.75 vs. limit=15.0 +2024-09-20 07:54:17,342 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.625e+01 8.840e+01 9.453e+01 1.004e+02 3.254e+02, threshold=1.891e+02, percent-clipped=2.0 +2024-09-20 07:54:45,885 INFO [train.py:1198] (1/2) Epoch 47, batch 3300, loss[loss=0.2468, ctc_loss=0.1167, cr_loss=0.3384, attn_decoder_loss=0.2538, over 28270.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2364, over 5796497.10 frames. ], batch size: 111, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:55:06,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.94 vs. limit=22.5 +2024-09-20 07:55:07,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=845840.0, ans=0.0 +2024-09-20 07:55:08,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.19 vs. limit=22.5 +2024-09-20 07:55:09,170 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.22 vs. limit=22.5 +2024-09-20 07:55:37,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.min_positive, batch_count=845920.0, ans=0.05 +2024-09-20 07:55:37,917 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=8.53 vs. limit=15.0 +2024-09-20 07:55:43,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=845920.0, ans=0.1 +2024-09-20 07:55:46,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=845960.0, ans=0.125 +2024-09-20 07:55:52,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=845960.0, ans=0.1 +2024-09-20 07:56:00,878 INFO [train.py:1198] (1/2) Epoch 47, batch 3350, loss[loss=0.2371, ctc_loss=0.1106, cr_loss=0.3463, attn_decoder_loss=0.2435, over 28907.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.109, cr_loss=0.3479, attn_decoder_loss=0.2373, over 5773030.87 frames. ], batch size: 104, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:56:16,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=846040.0, ans=0.2 +2024-09-20 07:56:47,173 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.631e+01 9.228e+01 9.801e+01 1.993e+02, threshold=1.846e+02, percent-clipped=1.0 +2024-09-20 07:56:48,911 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=846120.0, ans=0.2 +2024-09-20 07:56:58,098 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=846120.0, ans=0.125 +2024-09-20 07:57:10,956 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 07:57:20,291 INFO [train.py:1198] (1/2) Epoch 47, batch 3400, loss[loss=0.2005, ctc_loss=0.08575, cr_loss=0.2791, attn_decoder_loss=0.2071, over 29339.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1092, cr_loss=0.3482, attn_decoder_loss=0.2371, over 5765265.64 frames. ], batch size: 67, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:57:26,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=846200.0, ans=0.125 +2024-09-20 07:57:28,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=846200.0, ans=0.2 +2024-09-20 07:58:36,091 INFO [train.py:1198] (1/2) Epoch 47, batch 3450, loss[loss=0.2344, ctc_loss=0.1031, cr_loss=0.3436, attn_decoder_loss=0.2414, over 28335.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1093, cr_loss=0.3483, attn_decoder_loss=0.2374, over 5773348.56 frames. ], batch size: 111, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:58:40,328 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.19 vs. limit=22.5 +2024-09-20 07:58:58,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=846440.0, ans=0.1 +2024-09-20 07:59:04,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=846480.0, ans=0.125 +2024-09-20 07:59:13,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=846480.0, ans=0.2 +2024-09-20 07:59:22,441 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.138e+01 8.718e+01 9.224e+01 9.719e+01 1.765e+02, threshold=1.845e+02, percent-clipped=0.0 +2024-09-20 07:59:33,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=846520.0, ans=0.2 +2024-09-20 07:59:51,158 INFO [train.py:1198] (1/2) Epoch 47, batch 3500, loss[loss=0.2121, ctc_loss=0.09599, cr_loss=0.3281, attn_decoder_loss=0.2177, over 29327.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1089, cr_loss=0.3478, attn_decoder_loss=0.237, over 5776235.95 frames. ], batch size: 71, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 07:59:51,548 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=846600.0, ans=0.125 +2024-09-20 07:59:59,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=846600.0, ans=0.0 +2024-09-20 07:59:59,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=846600.0, ans=10.0 +2024-09-20 08:00:08,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=846640.0, ans=0.125 +2024-09-20 08:00:13,971 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=846640.0, ans=0.0 +2024-09-20 08:00:21,958 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.76 vs. limit=15.0 +2024-09-20 08:00:29,257 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.43 vs. limit=15.0 +2024-09-20 08:00:30,241 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=846680.0, ans=0.125 +2024-09-20 08:00:31,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=846680.0, ans=0.125 +2024-09-20 08:01:05,574 INFO [train.py:1198] (1/2) Epoch 47, batch 3550, loss[loss=0.2561, ctc_loss=0.128, cr_loss=0.3869, attn_decoder_loss=0.2617, over 29726.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1086, cr_loss=0.3475, attn_decoder_loss=0.237, over 5782314.88 frames. ], batch size: 89, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:01:07,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=846800.0, ans=0.0 +2024-09-20 08:01:42,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=846880.0, ans=0.0 +2024-09-20 08:01:55,423 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.517e+01 8.386e+01 8.882e+01 9.420e+01 1.531e+02, threshold=1.776e+02, percent-clipped=0.0 +2024-09-20 08:01:58,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=846920.0, ans=0.125 +2024-09-20 08:02:23,261 INFO [train.py:1198] (1/2) Epoch 47, batch 3600, loss[loss=0.2306, ctc_loss=0.1127, cr_loss=0.363, attn_decoder_loss=0.2357, over 29482.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1084, cr_loss=0.3471, attn_decoder_loss=0.2369, over 5790930.57 frames. ], batch size: 77, lr: 2.33e-03, grad_scale: 32.0 +2024-09-20 08:02:42,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=847040.0, ans=0.125 +2024-09-20 08:03:08,058 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=847120.0, ans=0.125 +2024-09-20 08:03:08,156 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:03:15,903 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.78 vs. limit=15.0 +2024-09-20 08:03:18,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=847120.0, ans=0.125 +2024-09-20 08:03:18,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=847120.0, ans=0.0 +2024-09-20 08:03:27,182 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=847160.0, ans=0.025 +2024-09-20 08:03:27,200 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer1.prob, batch_count=847160.0, ans=0.125 +2024-09-20 08:03:34,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=847160.0, ans=0.0 +2024-09-20 08:03:37,746 INFO [train.py:1198] (1/2) Epoch 47, batch 3650, loss[loss=0.2528, ctc_loss=0.1269, cr_loss=0.3828, attn_decoder_loss=0.2582, over 29498.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1082, cr_loss=0.3465, attn_decoder_loss=0.2367, over 5792809.80 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:04:14,183 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=12.12 vs. limit=15.0 +2024-09-20 08:04:26,506 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.278e+01 8.685e+01 9.171e+01 9.762e+01 1.576e+02, threshold=1.834e+02, percent-clipped=0.0 +2024-09-20 08:04:26,880 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=847320.0, ans=0.0 +2024-09-20 08:04:28,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=847320.0, ans=0.0 +2024-09-20 08:04:29,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=847320.0, ans=0.07 +2024-09-20 08:04:43,268 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer1.prob, batch_count=847360.0, ans=0.125 +2024-09-20 08:04:51,911 INFO [train.py:1198] (1/2) Epoch 47, batch 3700, loss[loss=0.234, ctc_loss=0.1086, cr_loss=0.3258, attn_decoder_loss=0.2407, over 29702.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.3473, attn_decoder_loss=0.2369, over 5801946.72 frames. ], batch size: 84, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:04:52,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=847400.0, ans=0.1 +2024-09-20 08:04:53,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=847400.0, ans=0.125 +2024-09-20 08:05:20,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff2_skip_rate, batch_count=847480.0, ans=0.0 +2024-09-20 08:05:37,022 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=847520.0, ans=0.125 +2024-09-20 08:05:39,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=847520.0, ans=0.125 +2024-09-20 08:05:47,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=847520.0, ans=0.0 +2024-09-20 08:06:06,313 INFO [train.py:1198] (1/2) Epoch 47, batch 3750, loss[loss=0.2045, ctc_loss=0.08684, cr_loss=0.2968, attn_decoder_loss=0.211, over 29372.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1083, cr_loss=0.3473, attn_decoder_loss=0.2367, over 5806894.99 frames. ], batch size: 67, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:06:21,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=847640.0, ans=0.0 +2024-09-20 08:06:55,606 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.392e+01 8.560e+01 9.134e+01 9.769e+01 1.535e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-20 08:06:58,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=847720.0, ans=0.025 +2024-09-20 08:07:01,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=847720.0, ans=0.125 +2024-09-20 08:07:10,725 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:07:12,356 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:07:20,707 INFO [train.py:1198] (1/2) Epoch 47, batch 3800, loss[loss=0.2415, ctc_loss=0.1098, cr_loss=0.3619, attn_decoder_loss=0.2481, over 29608.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1083, cr_loss=0.347, attn_decoder_loss=0.2364, over 5798572.84 frames. ], batch size: 86, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:07:23,949 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=847800.0, ans=0.0 +2024-09-20 08:07:29,988 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=847800.0, ans=0.125 +2024-09-20 08:07:39,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=847840.0, ans=0.125 +2024-09-20 08:07:49,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=847840.0, ans=0.2 +2024-09-20 08:07:52,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=847880.0, ans=0.0 +2024-09-20 08:08:05,081 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.54 vs. limit=15.0 +2024-09-20 08:08:17,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=847920.0, ans=0.125 +2024-09-20 08:08:45,615 INFO [train.py:1198] (1/2) Epoch 47, batch 3850, loss[loss=0.251, ctc_loss=0.1203, cr_loss=0.3685, attn_decoder_loss=0.2573, over 29254.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1084, cr_loss=0.3474, attn_decoder_loss=0.2365, over 5811618.88 frames. ], batch size: 100, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:08:53,707 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.49 vs. limit=15.0 +2024-09-20 08:09:22,749 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:09:34,445 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.721e+01 8.837e+01 9.282e+01 9.780e+01 1.653e+02, threshold=1.856e+02, percent-clipped=0.0 +2024-09-20 08:09:56,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=848160.0, ans=0.0 +2024-09-20 08:09:59,712 INFO [train.py:1198] (1/2) Epoch 47, batch 3900, loss[loss=0.2447, ctc_loss=0.1106, cr_loss=0.3522, attn_decoder_loss=0.2518, over 29639.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1085, cr_loss=0.3468, attn_decoder_loss=0.2369, over 5816305.29 frames. ], batch size: 86, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:10:06,033 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=848200.0, ans=0.1 +2024-09-20 08:10:19,080 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer1.prob, batch_count=848240.0, ans=0.125 +2024-09-20 08:10:26,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=848240.0, ans=0.125 +2024-09-20 08:10:45,046 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=11.96 vs. limit=22.5 +2024-09-20 08:10:48,574 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=848320.0, ans=0.04949747468305833 +2024-09-20 08:10:54,390 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:11:13,504 INFO [train.py:1198] (1/2) Epoch 47, batch 3950, loss[loss=0.2425, ctc_loss=0.1165, cr_loss=0.3644, attn_decoder_loss=0.2484, over 29486.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.108, cr_loss=0.3463, attn_decoder_loss=0.2368, over 5835872.30 frames. ], batch size: 97, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:11:28,778 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=848440.0, ans=0.125 +2024-09-20 08:11:43,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=848480.0, ans=0.1 +2024-09-20 08:11:47,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=848480.0, ans=0.125 +2024-09-20 08:12:02,180 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.359e+01 8.442e+01 9.066e+01 9.725e+01 6.124e+02, threshold=1.813e+02, percent-clipped=2.0 +2024-09-20 08:12:27,091 INFO [train.py:1198] (1/2) Epoch 47, batch 4000, loss[loss=0.2168, ctc_loss=0.1023, cr_loss=0.3396, attn_decoder_loss=0.2219, over 29506.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1081, cr_loss=0.3464, attn_decoder_loss=0.2366, over 5812393.69 frames. ], batch size: 74, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:12:53,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer2.min_abs, batch_count=848640.0, ans=0.5 +2024-09-20 08:13:06,894 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=848680.0, ans=0.125 +2024-09-20 08:13:35,573 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.03 vs. limit=15.0 +2024-09-20 08:13:43,949 INFO [train.py:1198] (1/2) Epoch 47, batch 4050, loss[loss=0.247, ctc_loss=0.1283, cr_loss=0.3653, attn_decoder_loss=0.252, over 19940.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.108, cr_loss=0.3461, attn_decoder_loss=0.2365, over 5794877.04 frames. ], batch size: 210, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:14:06,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=848840.0, ans=0.0 +2024-09-20 08:14:11,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=848880.0, ans=0.0 +2024-09-20 08:14:14,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=848880.0, ans=0.1 +2024-09-20 08:14:33,189 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.120e+01 8.525e+01 9.075e+01 9.953e+01 1.624e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 08:14:48,829 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.82 vs. limit=15.0 +2024-09-20 08:14:56,871 INFO [train.py:1198] (1/2) Epoch 47, batch 4100, loss[loss=0.25, ctc_loss=0.1223, cr_loss=0.3865, attn_decoder_loss=0.2556, over 29516.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.108, cr_loss=0.346, attn_decoder_loss=0.2366, over 5790413.01 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:14:59,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=849000.0, ans=0.0 +2024-09-20 08:15:28,131 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.12 vs. limit=22.5 +2024-09-20 08:15:47,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.prob, batch_count=849120.0, ans=0.125 +2024-09-20 08:16:10,033 INFO [train.py:1198] (1/2) Epoch 47, batch 4150, loss[loss=0.2241, ctc_loss=0.1023, cr_loss=0.321, attn_decoder_loss=0.2305, over 29517.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2363, over 5796123.78 frames. ], batch size: 77, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:16:11,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=849200.0, ans=0.0 +2024-09-20 08:16:23,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=849240.0, ans=0.1 +2024-09-20 08:16:32,543 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.max_abs, batch_count=849240.0, ans=10.0 +2024-09-20 08:16:37,164 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.40 vs. limit=15.0 +2024-09-20 08:17:00,940 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.646e+01 8.758e+01 9.113e+01 9.755e+01 1.948e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 08:17:25,430 INFO [train.py:1198] (1/2) Epoch 47, batch 4200, loss[loss=0.2484, ctc_loss=0.1193, cr_loss=0.3858, attn_decoder_loss=0.2541, over 29524.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1084, cr_loss=0.3471, attn_decoder_loss=0.2367, over 5798493.07 frames. ], batch size: 90, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:17:36,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=849400.0, ans=0.09899494936611666 +2024-09-20 08:17:37,803 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=849400.0, ans=0.125 +2024-09-20 08:17:40,688 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:17:41,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=849440.0, ans=0.125 +2024-09-20 08:18:07,356 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.36 vs. limit=10.0 +2024-09-20 08:18:08,422 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:18:14,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=849520.0, ans=0.1 +2024-09-20 08:18:15,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer2.prob, batch_count=849520.0, ans=0.125 +2024-09-20 08:18:16,672 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=11.65 vs. limit=22.5 +2024-09-20 08:18:20,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=849520.0, ans=0.2 +2024-09-20 08:18:24,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=849560.0, ans=0.0 +2024-09-20 08:18:31,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=849560.0, ans=0.125 +2024-09-20 08:18:39,542 INFO [train.py:1198] (1/2) Epoch 47, batch 4250, loss[loss=0.2062, ctc_loss=0.08498, cr_loss=0.2873, attn_decoder_loss=0.2133, over 29513.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1082, cr_loss=0.3463, attn_decoder_loss=0.2369, over 5804813.39 frames. ], batch size: 74, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:18:47,952 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.55 vs. limit=22.5 +2024-09-20 08:18:53,118 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=849640.0, ans=0.2 +2024-09-20 08:18:56,133 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:19:29,569 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.747e+01 8.716e+01 9.310e+01 9.869e+01 2.948e+02, threshold=1.862e+02, percent-clipped=1.0 +2024-09-20 08:19:44,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=849760.0, ans=0.2 +2024-09-20 08:19:48,985 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:19:50,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=849760.0, ans=0.025 +2024-09-20 08:19:53,044 INFO [train.py:1198] (1/2) Epoch 47, batch 4300, loss[loss=0.2398, ctc_loss=0.1102, cr_loss=0.3564, attn_decoder_loss=0.2463, over 29511.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1079, cr_loss=0.3459, attn_decoder_loss=0.237, over 5794955.04 frames. ], batch size: 87, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:19:54,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=849800.0, ans=0.125 +2024-09-20 08:19:57,959 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=849800.0, ans=0.0 +2024-09-20 08:20:00,919 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=849800.0, ans=0.125 +2024-09-20 08:20:02,339 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=849800.0, ans=0.1 +2024-09-20 08:20:13,237 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.75 vs. limit=22.5 +2024-09-20 08:20:27,787 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.17 vs. limit=15.0 +2024-09-20 08:20:32,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=849880.0, ans=0.0 +2024-09-20 08:20:37,591 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.59 vs. limit=15.0 +2024-09-20 08:20:40,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=849920.0, ans=0.1 +2024-09-20 08:21:07,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=850000.0, ans=0.125 +2024-09-20 08:21:08,689 INFO [train.py:1198] (1/2) Epoch 47, batch 4350, loss[loss=0.2429, ctc_loss=0.1218, cr_loss=0.3806, attn_decoder_loss=0.2479, over 29503.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1109, cr_loss=0.3526, attn_decoder_loss=0.2406, over 5798205.11 frames. ], batch size: 97, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:21:25,346 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=850040.0, ans=0.125 +2024-09-20 08:21:47,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=850080.0, ans=0.125 +2024-09-20 08:21:58,353 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.538e+01 9.076e+01 9.411e+01 9.982e+01 1.475e+02, threshold=1.882e+02, percent-clipped=0.0 +2024-09-20 08:22:03,095 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=850120.0, ans=0.0 +2024-09-20 08:22:07,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=850160.0, ans=0.2 +2024-09-20 08:22:12,434 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.79 vs. limit=15.0 +2024-09-20 08:22:21,582 INFO [train.py:1198] (1/2) Epoch 47, batch 4400, loss[loss=0.2342, ctc_loss=0.1139, cr_loss=0.3552, attn_decoder_loss=0.2397, over 27388.00 frames. ], tot_loss[loss=0.2365, ctc_loss=0.1121, cr_loss=0.3548, attn_decoder_loss=0.2425, over 5768800.74 frames. ], batch size: 125, lr: 2.33e-03, grad_scale: 16.0 +2024-09-20 08:22:49,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=850280.0, ans=0.125 +2024-09-20 08:22:55,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.53 vs. limit=15.0 +2024-09-20 08:23:36,666 INFO [train.py:1198] (1/2) Epoch 47, batch 4450, loss[loss=0.2475, ctc_loss=0.1252, cr_loss=0.3689, attn_decoder_loss=0.2529, over 20410.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1153, cr_loss=0.3598, attn_decoder_loss=0.2446, over 5574932.92 frames. ], batch size: 209, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:23:38,581 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:24:10,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=9.45 vs. limit=12.0 +2024-09-20 08:24:12,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=850480.0, ans=0.125 +2024-09-20 08:24:25,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=850520.0, ans=0.1 +2024-09-20 08:24:29,097 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.530e+01 9.504e+01 1.076e+02 1.200e+02 1.579e+02, threshold=2.152e+02, percent-clipped=0.0 +2024-09-20 08:24:30,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=850520.0, ans=0.1 +2024-09-20 08:24:41,702 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=850560.0, ans=0.125 +2024-09-20 08:24:51,489 INFO [train.py:1198] (1/2) Epoch 47, batch 4500, loss[loss=0.2648, ctc_loss=0.1478, cr_loss=0.4006, attn_decoder_loss=0.2688, over 19573.00 frames. ], tot_loss[loss=0.2408, ctc_loss=0.1182, cr_loss=0.3622, attn_decoder_loss=0.2463, over 5237270.42 frames. ], batch size: 209, lr: 2.33e-03, grad_scale: 8.0 +2024-09-20 08:24:57,689 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=850600.0, ans=0.125 +2024-09-20 08:25:03,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=850600.0, ans=0.0 +2024-09-20 08:25:15,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=850640.0, ans=0.0 +2024-09-20 08:26:14,114 INFO [train.py:1198] (1/2) Epoch 48, batch 0, loss[loss=0.2116, ctc_loss=0.09392, cr_loss=0.3314, attn_decoder_loss=0.2173, over 29644.00 frames. ], tot_loss[loss=0.2116, ctc_loss=0.09392, cr_loss=0.3314, attn_decoder_loss=0.2173, over 29644.00 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:26:14,114 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 08:26:32,432 INFO [train.py:1230] (1/2) Epoch 48, validation: loss=0.2131, ctc_loss=0.03621, cr_loss=7.075e-15, attn_decoder_loss=0.2327, over 944034.00 frames. +2024-09-20 08:26:32,433 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 08:26:40,303 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_positive, batch_count=850700.0, ans=0.05 +2024-09-20 08:27:00,234 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=7.13 vs. limit=12.0 +2024-09-20 08:27:25,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=850820.0, ans=0.0 +2024-09-20 08:27:49,844 INFO [train.py:1198] (1/2) Epoch 48, batch 50, loss[loss=0.2056, ctc_loss=0.0882, cr_loss=0.3018, attn_decoder_loss=0.212, over 29418.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1094, cr_loss=0.3496, attn_decoder_loss=0.2377, over 1268714.62 frames. ], batch size: 70, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:27:55,424 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.29 vs. limit=6.0 +2024-09-20 08:28:04,073 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.52 vs. limit=10.0 +2024-09-20 08:28:04,958 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.285e+01 9.048e+01 9.836e+01 1.173e+02 2.253e+02, threshold=1.967e+02, percent-clipped=1.0 +2024-09-20 08:28:55,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=851060.0, ans=0.125 +2024-09-20 08:29:01,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=851060.0, ans=0.1 +2024-09-20 08:29:01,889 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=7.65 vs. limit=15.0 +2024-09-20 08:29:03,535 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.88 vs. limit=15.0 +2024-09-20 08:29:07,776 INFO [train.py:1198] (1/2) Epoch 48, batch 100, loss[loss=0.2212, ctc_loss=0.1063, cr_loss=0.341, attn_decoder_loss=0.2264, over 29539.00 frames. ], tot_loss[loss=0.2336, ctc_loss=0.1107, cr_loss=0.3522, attn_decoder_loss=0.2394, over 2253164.88 frames. ], batch size: 76, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:29:28,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=851140.0, ans=0.0 +2024-09-20 08:29:44,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.61 vs. limit=15.0 +2024-09-20 08:29:48,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=851180.0, ans=0.125 +2024-09-20 08:30:03,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=851220.0, ans=0.0 +2024-09-20 08:30:13,119 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.34 vs. limit=15.0 +2024-09-20 08:30:22,108 INFO [train.py:1198] (1/2) Epoch 48, batch 150, loss[loss=0.2067, ctc_loss=0.09413, cr_loss=0.3265, attn_decoder_loss=0.2119, over 29462.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1089, cr_loss=0.3483, attn_decoder_loss=0.2374, over 3047710.17 frames. ], batch size: 70, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:30:22,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=851300.0, ans=0.1 +2024-09-20 08:30:38,636 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.481e+01 8.661e+01 9.113e+01 9.779e+01 1.487e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 08:30:42,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=851340.0, ans=0.125 +2024-09-20 08:30:55,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.attention_skip_rate, batch_count=851380.0, ans=0.0 +2024-09-20 08:30:55,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=851380.0, ans=0.125 +2024-09-20 08:30:55,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=851380.0, ans=0.125 +2024-09-20 08:31:24,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=851460.0, ans=0.125 +2024-09-20 08:31:33,451 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.max_abs, batch_count=851460.0, ans=10.0 +2024-09-20 08:31:39,277 INFO [train.py:1198] (1/2) Epoch 48, batch 200, loss[loss=0.2404, ctc_loss=0.1207, cr_loss=0.3763, attn_decoder_loss=0.2454, over 27480.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2362, over 3660459.10 frames. ], batch size: 125, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:31:41,073 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=851500.0, ans=0.125 +2024-09-20 08:32:19,263 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=11.95 vs. limit=15.0 +2024-09-20 08:32:54,542 INFO [train.py:1198] (1/2) Epoch 48, batch 250, loss[loss=0.2373, ctc_loss=0.1175, cr_loss=0.3705, attn_decoder_loss=0.2424, over 29219.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1079, cr_loss=0.3466, attn_decoder_loss=0.2363, over 4140590.60 frames. ], batch size: 100, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:32:56,239 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:33:13,492 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.583e+01 8.550e+01 9.278e+01 9.687e+01 3.776e+02, threshold=1.856e+02, percent-clipped=1.0 +2024-09-20 08:33:18,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=851740.0, ans=0.125 +2024-09-20 08:33:19,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=851740.0, ans=0.125 +2024-09-20 08:33:23,258 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn1.whiten, num_groups=1, num_channels=768, metric=12.61 vs. limit=22.5 +2024-09-20 08:33:40,817 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=851820.0, ans=0.0 +2024-09-20 08:33:40,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=851820.0, ans=0.0 +2024-09-20 08:33:51,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=851820.0, ans=0.125 +2024-09-20 08:34:09,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=851860.0, ans=0.125 +2024-09-20 08:34:12,355 INFO [train.py:1198] (1/2) Epoch 48, batch 300, loss[loss=0.2389, ctc_loss=0.1175, cr_loss=0.3878, attn_decoder_loss=0.2438, over 29525.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1074, cr_loss=0.3459, attn_decoder_loss=0.236, over 4508493.64 frames. ], batch size: 92, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:34:14,255 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=851900.0, ans=0.0 +2024-09-20 08:34:18,742 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=851900.0, ans=0.1 +2024-09-20 08:34:31,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten.whitening_limit, batch_count=851940.0, ans=15.0 +2024-09-20 08:34:55,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=851980.0, ans=0.2 +2024-09-20 08:35:16,832 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=852060.0, ans=0.025 +2024-09-20 08:35:21,444 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=852060.0, ans=0.2 +2024-09-20 08:35:22,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=852060.0, ans=0.0 +2024-09-20 08:35:29,934 INFO [train.py:1198] (1/2) Epoch 48, batch 350, loss[loss=0.2135, ctc_loss=0.08479, cr_loss=0.2734, attn_decoder_loss=0.2217, over 29332.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3467, attn_decoder_loss=0.2368, over 4793540.12 frames. ], batch size: 71, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:35:46,308 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.632e+01 9.132e+01 9.604e+01 3.712e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 08:36:03,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=852180.0, ans=0.125 +2024-09-20 08:36:45,275 INFO [train.py:1198] (1/2) Epoch 48, batch 400, loss[loss=0.2335, ctc_loss=0.1065, cr_loss=0.3451, attn_decoder_loss=0.24, over 29691.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1076, cr_loss=0.3461, attn_decoder_loss=0.2364, over 5023712.06 frames. ], batch size: 82, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:36:56,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=852300.0, ans=0.125 +2024-09-20 08:37:18,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=852380.0, ans=0.125 +2024-09-20 08:37:58,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=852460.0, ans=0.125 +2024-09-20 08:37:59,705 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.63 vs. limit=12.0 +2024-09-20 08:38:03,191 INFO [train.py:1198] (1/2) Epoch 48, batch 450, loss[loss=0.2458, ctc_loss=0.1208, cr_loss=0.3707, attn_decoder_loss=0.2515, over 29675.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1076, cr_loss=0.3451, attn_decoder_loss=0.2364, over 5186700.41 frames. ], batch size: 83, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:38:13,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.08 vs. limit=15.0 +2024-09-20 08:38:14,141 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=852500.0, ans=0.125 +2024-09-20 08:38:19,716 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.823e+01 8.734e+01 9.234e+01 9.898e+01 1.385e+02, threshold=1.847e+02, percent-clipped=0.0 +2024-09-20 08:38:51,059 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=7.24 vs. limit=15.0 +2024-09-20 08:39:00,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=852620.0, ans=0.0 +2024-09-20 08:39:12,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=852660.0, ans=0.025 +2024-09-20 08:39:21,102 INFO [train.py:1198] (1/2) Epoch 48, batch 500, loss[loss=0.2388, ctc_loss=0.1099, cr_loss=0.3331, attn_decoder_loss=0.2458, over 29459.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1072, cr_loss=0.3447, attn_decoder_loss=0.2357, over 5329776.43 frames. ], batch size: 94, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:39:25,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=852700.0, ans=0.2 +2024-09-20 08:39:52,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=852780.0, ans=0.0 +2024-09-20 08:40:08,458 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=852820.0, ans=0.0 +2024-09-20 08:40:16,138 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=852820.0, ans=0.2 +2024-09-20 08:40:36,936 INFO [train.py:1198] (1/2) Epoch 48, batch 550, loss[loss=0.2431, ctc_loss=0.1098, cr_loss=0.3504, attn_decoder_loss=0.2501, over 28871.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1071, cr_loss=0.3446, attn_decoder_loss=0.2357, over 5422019.22 frames. ], batch size: 104, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:40:53,454 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.397e+01 8.626e+01 8.943e+01 9.744e+01 1.321e+02, threshold=1.789e+02, percent-clipped=0.0 +2024-09-20 08:41:07,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.91 vs. limit=15.0 +2024-09-20 08:41:41,224 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=853060.0, ans=0.0 +2024-09-20 08:41:50,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=853060.0, ans=0.2 +2024-09-20 08:41:54,605 INFO [train.py:1198] (1/2) Epoch 48, batch 600, loss[loss=0.2421, ctc_loss=0.1102, cr_loss=0.341, attn_decoder_loss=0.2492, over 29213.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1071, cr_loss=0.3447, attn_decoder_loss=0.2361, over 5508860.71 frames. ], batch size: 100, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:41:56,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=853100.0, ans=0.025 +2024-09-20 08:42:05,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=853100.0, ans=0.125 +2024-09-20 08:42:20,717 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.17 vs. limit=15.0 +2024-09-20 08:42:43,273 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.87 vs. limit=10.0 +2024-09-20 08:43:04,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=853260.0, ans=0.0 +2024-09-20 08:43:12,012 INFO [train.py:1198] (1/2) Epoch 48, batch 650, loss[loss=0.2313, ctc_loss=0.1087, cr_loss=0.3548, attn_decoder_loss=0.237, over 29782.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1063, cr_loss=0.3429, attn_decoder_loss=0.2352, over 5585649.31 frames. ], batch size: 81, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:43:15,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=853300.0, ans=0.1 +2024-09-20 08:43:28,434 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.608e+01 8.952e+01 9.536e+01 4.634e+02, threshold=1.790e+02, percent-clipped=1.0 +2024-09-20 08:43:48,612 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:43:51,703 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=853380.0, ans=0.125 +2024-09-20 08:43:54,734 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=853380.0, ans=0.0 +2024-09-20 08:44:03,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=853420.0, ans=0.1 +2024-09-20 08:44:05,836 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.31 vs. limit=22.5 +2024-09-20 08:44:24,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=853460.0, ans=0.125 +2024-09-20 08:44:27,293 INFO [train.py:1198] (1/2) Epoch 48, batch 700, loss[loss=0.2281, ctc_loss=0.1098, cr_loss=0.3538, attn_decoder_loss=0.2334, over 29520.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1069, cr_loss=0.3445, attn_decoder_loss=0.2357, over 5636393.15 frames. ], batch size: 76, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:44:42,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=853540.0, ans=0.125 +2024-09-20 08:44:43,995 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=853540.0, ans=0.125 +2024-09-20 08:44:47,086 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=853540.0, ans=0.0 +2024-09-20 08:44:51,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=853540.0, ans=0.0 +2024-09-20 08:44:53,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=853540.0, ans=0.0 +2024-09-20 08:45:04,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=853580.0, ans=0.025 +2024-09-20 08:45:05,045 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.85 vs. limit=10.0 +2024-09-20 08:45:11,195 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=853580.0, ans=0.125 +2024-09-20 08:45:13,417 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.12 vs. limit=6.0 +2024-09-20 08:45:21,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=853620.0, ans=0.0 +2024-09-20 08:45:45,552 INFO [train.py:1198] (1/2) Epoch 48, batch 750, loss[loss=0.2333, ctc_loss=0.1117, cr_loss=0.3591, attn_decoder_loss=0.2388, over 29678.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.107, cr_loss=0.3447, attn_decoder_loss=0.2355, over 5673829.58 frames. ], batch size: 82, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:45:47,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=853700.0, ans=0.0 +2024-09-20 08:45:51,831 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=853700.0, ans=0.1 +2024-09-20 08:45:53,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=853700.0, ans=0.125 +2024-09-20 08:46:00,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.min_abs, batch_count=853740.0, ans=0.5 +2024-09-20 08:46:01,950 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.518e+01 8.792e+01 9.309e+01 9.827e+01 1.298e+02, threshold=1.862e+02, percent-clipped=0.0 +2024-09-20 08:46:18,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=853780.0, ans=0.125 +2024-09-20 08:46:31,323 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.80 vs. limit=15.0 +2024-09-20 08:46:37,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.52 vs. limit=22.5 +2024-09-20 08:47:03,067 INFO [train.py:1198] (1/2) Epoch 48, batch 800, loss[loss=0.2043, ctc_loss=0.08484, cr_loss=0.2855, attn_decoder_loss=0.2113, over 29608.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1073, cr_loss=0.3449, attn_decoder_loss=0.236, over 5705317.22 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 32.0 +2024-09-20 08:47:10,867 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=853900.0, ans=0.125 +2024-09-20 08:47:42,541 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=853980.0, ans=0.0 +2024-09-20 08:47:44,040 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.prob, batch_count=853980.0, ans=0.125 +2024-09-20 08:47:47,100 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=854020.0, ans=0.125 +2024-09-20 08:47:48,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=854020.0, ans=0.125 +2024-09-20 08:47:50,502 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.98 vs. limit=22.5 +2024-09-20 08:48:18,093 INFO [train.py:1198] (1/2) Epoch 48, batch 850, loss[loss=0.2291, ctc_loss=0.1001, cr_loss=0.3323, attn_decoder_loss=0.236, over 29711.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1068, cr_loss=0.3437, attn_decoder_loss=0.2356, over 5734264.92 frames. ], batch size: 89, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:48:26,189 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:48:35,983 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.262e+01 8.678e+01 9.128e+01 9.659e+01 1.410e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 08:48:54,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=854180.0, ans=0.125 +2024-09-20 08:49:07,549 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=854220.0, ans=0.025 +2024-09-20 08:49:09,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=854220.0, ans=0.2 +2024-09-20 08:49:10,578 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=854220.0, ans=0.2 +2024-09-20 08:49:36,100 INFO [train.py:1198] (1/2) Epoch 48, batch 900, loss[loss=0.2097, ctc_loss=0.09065, cr_loss=0.3045, attn_decoder_loss=0.2161, over 29599.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1072, cr_loss=0.3447, attn_decoder_loss=0.2361, over 5739560.83 frames. ], batch size: 73, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:49:39,748 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.93 vs. limit=22.5 +2024-09-20 08:50:20,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=854420.0, ans=0.125 +2024-09-20 08:50:21,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=854420.0, ans=0.0 +2024-09-20 08:50:30,197 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.21 vs. limit=22.5 +2024-09-20 08:50:42,254 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 08:50:53,784 INFO [train.py:1198] (1/2) Epoch 48, batch 950, loss[loss=0.2151, ctc_loss=0.09202, cr_loss=0.3122, attn_decoder_loss=0.2218, over 29489.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1072, cr_loss=0.3444, attn_decoder_loss=0.2362, over 5742352.21 frames. ], batch size: 74, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:50:57,610 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=3.85 vs. limit=12.0 +2024-09-20 08:51:04,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=854500.0, ans=0.1 +2024-09-20 08:51:11,717 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.672e+01 8.747e+01 9.386e+01 9.871e+01 2.198e+02, threshold=1.877e+02, percent-clipped=1.0 +2024-09-20 08:51:57,893 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.55 vs. limit=12.0 +2024-09-20 08:52:00,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=854660.0, ans=0.0 +2024-09-20 08:52:08,494 INFO [train.py:1198] (1/2) Epoch 48, batch 1000, loss[loss=0.2322, ctc_loss=0.1154, cr_loss=0.3697, attn_decoder_loss=0.237, over 29528.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1082, cr_loss=0.3466, attn_decoder_loss=0.237, over 5736964.08 frames. ], batch size: 77, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:52:25,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=854740.0, ans=0.0 +2024-09-20 08:52:32,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=854740.0, ans=0.125 +2024-09-20 08:52:43,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=854780.0, ans=0.125 +2024-09-20 08:52:45,854 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=6.29 vs. limit=12.0 +2024-09-20 08:52:50,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=854780.0, ans=0.2 +2024-09-20 08:52:56,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=9.65 vs. limit=15.0 +2024-09-20 08:53:03,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=854820.0, ans=0.0 +2024-09-20 08:53:12,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=854860.0, ans=0.125 +2024-09-20 08:53:25,979 INFO [train.py:1198] (1/2) Epoch 48, batch 1050, loss[loss=0.2387, ctc_loss=0.112, cr_loss=0.3501, attn_decoder_loss=0.245, over 29667.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.108, cr_loss=0.3462, attn_decoder_loss=0.2367, over 5746922.98 frames. ], batch size: 85, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:53:44,086 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.543e+01 8.686e+01 9.233e+01 9.898e+01 2.337e+02, threshold=1.847e+02, percent-clipped=2.0 +2024-09-20 08:53:56,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=854980.0, ans=0.125 +2024-09-20 08:54:04,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=854980.0, ans=0.1 +2024-09-20 08:54:13,282 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=855020.0, ans=0.0 +2024-09-20 08:54:43,764 INFO [train.py:1198] (1/2) Epoch 48, batch 1100, loss[loss=0.2193, ctc_loss=0.09411, cr_loss=0.323, attn_decoder_loss=0.2261, over 29432.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1076, cr_loss=0.3453, attn_decoder_loss=0.2363, over 5757703.43 frames. ], batch size: 78, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:54:48,529 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=855100.0, ans=0.2 +2024-09-20 08:55:25,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=855180.0, ans=0.2 +2024-09-20 08:55:56,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=855260.0, ans=0.0 +2024-09-20 08:55:58,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=855300.0, ans=0.2 +2024-09-20 08:55:59,692 INFO [train.py:1198] (1/2) Epoch 48, batch 1150, loss[loss=0.2289, ctc_loss=0.1113, cr_loss=0.3342, attn_decoder_loss=0.2346, over 29446.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1079, cr_loss=0.3457, attn_decoder_loss=0.2365, over 5757322.31 frames. ], batch size: 78, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:56:13,737 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=855340.0, ans=0.0 +2024-09-20 08:56:19,237 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.603e+01 9.086e+01 9.808e+01 3.950e+02, threshold=1.817e+02, percent-clipped=2.0 +2024-09-20 08:56:20,411 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.36 vs. limit=15.0 +2024-09-20 08:57:17,294 INFO [train.py:1198] (1/2) Epoch 48, batch 1200, loss[loss=0.2345, ctc_loss=0.105, cr_loss=0.3456, attn_decoder_loss=0.2412, over 29666.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1081, cr_loss=0.3464, attn_decoder_loss=0.2371, over 5748307.11 frames. ], batch size: 85, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:57:18,135 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=6.83 vs. limit=15.0 +2024-09-20 08:57:28,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=855500.0, ans=0.0 +2024-09-20 08:57:29,634 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward3.hidden_balancer.prob, batch_count=855500.0, ans=0.125 +2024-09-20 08:57:55,432 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=855580.0, ans=0.125 +2024-09-20 08:58:12,077 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=13.15 vs. limit=22.5 +2024-09-20 08:58:21,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff2_skip_rate, batch_count=855660.0, ans=0.0 +2024-09-20 08:58:32,180 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=855660.0, ans=0.125 +2024-09-20 08:58:34,853 INFO [train.py:1198] (1/2) Epoch 48, batch 1250, loss[loss=0.2448, ctc_loss=0.1219, cr_loss=0.373, attn_decoder_loss=0.2501, over 29503.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1084, cr_loss=0.347, attn_decoder_loss=0.2375, over 5775664.28 frames. ], batch size: 92, lr: 2.30e-03, grad_scale: 16.0 +2024-09-20 08:58:54,651 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.675e+01 8.788e+01 9.389e+01 9.946e+01 2.084e+02, threshold=1.878e+02, percent-clipped=1.0 +2024-09-20 08:59:48,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=855860.0, ans=0.2 +2024-09-20 08:59:50,615 INFO [train.py:1198] (1/2) Epoch 48, batch 1300, loss[loss=0.2366, ctc_loss=0.1081, cr_loss=0.3372, attn_decoder_loss=0.2434, over 28295.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1083, cr_loss=0.3464, attn_decoder_loss=0.2374, over 5779597.57 frames. ], batch size: 111, lr: 2.30e-03, grad_scale: 8.0 +2024-09-20 08:59:56,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=855900.0, ans=0.125 +2024-09-20 09:00:10,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=855940.0, ans=0.04949747468305833 +2024-09-20 09:00:13,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer_ff3.min_abs, batch_count=855940.0, ans=0.2 +2024-09-20 09:00:16,727 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:00:34,137 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=13.62 vs. limit=15.0 +2024-09-20 09:01:09,005 INFO [train.py:1198] (1/2) Epoch 48, batch 1350, loss[loss=0.2294, ctc_loss=0.1017, cr_loss=0.3155, attn_decoder_loss=0.2366, over 29766.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1078, cr_loss=0.3458, attn_decoder_loss=0.237, over 5798061.06 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:01:10,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=856100.0, ans=0.0 +2024-09-20 09:01:22,782 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=856140.0, ans=0.2 +2024-09-20 09:01:29,676 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.601e+01 8.992e+01 9.491e+01 1.134e+02, threshold=1.798e+02, percent-clipped=0.0 +2024-09-20 09:01:29,955 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=856140.0, ans=0.1 +2024-09-20 09:01:47,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=856180.0, ans=0.2 +2024-09-20 09:01:53,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=856220.0, ans=0.0 +2024-09-20 09:01:59,890 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:02:17,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=856260.0, ans=0.125 +2024-09-20 09:02:25,970 INFO [train.py:1198] (1/2) Epoch 48, batch 1400, loss[loss=0.2062, ctc_loss=0.08993, cr_loss=0.3119, attn_decoder_loss=0.2122, over 29599.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1075, cr_loss=0.3451, attn_decoder_loss=0.2365, over 5809005.02 frames. ], batch size: 69, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:02:29,703 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.05 vs. limit=6.0 +2024-09-20 09:02:33,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=856300.0, ans=0.0 +2024-09-20 09:03:05,654 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-20 09:03:09,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=856420.0, ans=0.1 +2024-09-20 09:03:19,488 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.73 vs. limit=15.0 +2024-09-20 09:03:38,461 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=856460.0, ans=0.125 +2024-09-20 09:03:41,078 INFO [train.py:1198] (1/2) Epoch 48, batch 1450, loss[loss=0.2389, ctc_loss=0.09963, cr_loss=0.3289, attn_decoder_loss=0.247, over 29424.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1076, cr_loss=0.3455, attn_decoder_loss=0.2371, over 5805309.54 frames. ], batch size: 94, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:03:52,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.40 vs. limit=15.0 +2024-09-20 09:03:58,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=856540.0, ans=0.2 +2024-09-20 09:04:02,256 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.617e+01 8.708e+01 9.120e+01 9.678e+01 1.766e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 09:04:03,981 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=856540.0, ans=0.0 +2024-09-20 09:04:09,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=856580.0, ans=0.125 +2024-09-20 09:04:26,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=856620.0, ans=0.125 +2024-09-20 09:04:49,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=13.09 vs. limit=22.5 +2024-09-20 09:04:58,579 INFO [train.py:1198] (1/2) Epoch 48, batch 1500, loss[loss=0.2405, ctc_loss=0.1056, cr_loss=0.3294, attn_decoder_loss=0.2482, over 29623.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1077, cr_loss=0.3458, attn_decoder_loss=0.2374, over 5805172.51 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:05:00,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=856700.0, ans=0.125 +2024-09-20 09:05:15,520 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=856740.0, ans=0.0 +2024-09-20 09:05:18,658 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:05:21,599 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=856740.0, ans=0.0 +2024-09-20 09:05:30,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=856780.0, ans=0.125 +2024-09-20 09:05:38,002 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.59 vs. limit=22.5 +2024-09-20 09:05:41,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=856780.0, ans=0.05 +2024-09-20 09:06:13,645 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-20 09:06:17,038 INFO [train.py:1198] (1/2) Epoch 48, batch 1550, loss[loss=0.2355, ctc_loss=0.1097, cr_loss=0.35, attn_decoder_loss=0.2416, over 29514.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1078, cr_loss=0.3457, attn_decoder_loss=0.2372, over 5781196.23 frames. ], batch size: 90, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:06:30,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=856940.0, ans=0.0 +2024-09-20 09:06:35,555 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=856940.0, ans=0.0 +2024-09-20 09:06:38,058 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.059e+01 8.748e+01 9.189e+01 9.595e+01 2.151e+02, threshold=1.838e+02, percent-clipped=1.0 +2024-09-20 09:07:31,859 INFO [train.py:1198] (1/2) Epoch 48, batch 1600, loss[loss=0.2355, ctc_loss=0.1059, cr_loss=0.3512, attn_decoder_loss=0.2421, over 29659.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1076, cr_loss=0.3453, attn_decoder_loss=0.2367, over 5765535.37 frames. ], batch size: 85, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:07:33,682 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=857100.0, ans=0.0 +2024-09-20 09:07:51,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=857140.0, ans=0.05 +2024-09-20 09:08:49,422 INFO [train.py:1198] (1/2) Epoch 48, batch 1650, loss[loss=0.2503, ctc_loss=0.1206, cr_loss=0.3647, attn_decoder_loss=0.2566, over 29697.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3452, attn_decoder_loss=0.2366, over 5759324.33 frames. ], batch size: 89, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:09:09,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.12 vs. limit=22.5 +2024-09-20 09:09:10,379 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.669e+01 9.204e+01 9.828e+01 1.752e+02, threshold=1.841e+02, percent-clipped=0.0 +2024-09-20 09:09:33,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=857420.0, ans=0.0 +2024-09-20 09:09:47,632 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=857420.0, ans=0.125 +2024-09-20 09:09:53,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=857460.0, ans=0.1 +2024-09-20 09:10:07,180 INFO [train.py:1198] (1/2) Epoch 48, batch 1700, loss[loss=0.2005, ctc_loss=0.08553, cr_loss=0.2933, attn_decoder_loss=0.2068, over 29539.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1073, cr_loss=0.3448, attn_decoder_loss=0.2364, over 5779436.13 frames. ], batch size: 69, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:10:08,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer2.prob, batch_count=857500.0, ans=0.125 +2024-09-20 09:10:15,150 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:10:16,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=857500.0, ans=0.0 +2024-09-20 09:10:18,192 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:10:30,395 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:10:50,844 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.81 vs. limit=15.0 +2024-09-20 09:11:23,280 INFO [train.py:1198] (1/2) Epoch 48, batch 1750, loss[loss=0.2074, ctc_loss=0.09606, cr_loss=0.3261, attn_decoder_loss=0.2125, over 29322.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1072, cr_loss=0.345, attn_decoder_loss=0.2361, over 5787045.40 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:11:35,720 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=857700.0, ans=0.125 +2024-09-20 09:11:43,244 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=857740.0, ans=0.125 +2024-09-20 09:11:44,442 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.991e+01 8.682e+01 9.026e+01 9.554e+01 1.464e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-20 09:11:56,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=857780.0, ans=0.0 +2024-09-20 09:12:13,376 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=857820.0, ans=0.125 +2024-09-20 09:12:18,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.55 vs. limit=15.0 +2024-09-20 09:12:40,542 INFO [train.py:1198] (1/2) Epoch 48, batch 1800, loss[loss=0.2422, ctc_loss=0.1145, cr_loss=0.3639, attn_decoder_loss=0.2483, over 29685.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1072, cr_loss=0.3454, attn_decoder_loss=0.2363, over 5790576.88 frames. ], batch size: 83, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:12:40,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=857900.0, ans=0.5 +2024-09-20 09:12:40,999 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:12:43,873 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=857900.0, ans=0.0 +2024-09-20 09:12:48,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=857900.0, ans=0.125 +2024-09-20 09:12:55,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=857940.0, ans=0.125 +2024-09-20 09:13:00,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=857940.0, ans=0.07 +2024-09-20 09:13:11,624 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=8.12 vs. limit=15.0 +2024-09-20 09:13:58,142 INFO [train.py:1198] (1/2) Epoch 48, batch 1850, loss[loss=0.2475, ctc_loss=0.1228, cr_loss=0.3764, attn_decoder_loss=0.253, over 29621.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1076, cr_loss=0.3461, attn_decoder_loss=0.2365, over 5797645.45 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:14:00,517 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.16 vs. limit=10.0 +2024-09-20 09:14:04,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=858100.0, ans=0.125 +2024-09-20 09:14:19,246 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.577e+01 9.244e+01 9.733e+01 2.629e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 09:14:27,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=858180.0, ans=0.125 +2024-09-20 09:14:30,229 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=858180.0, ans=0.125 +2024-09-20 09:14:40,697 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=858180.0, ans=0.1 +2024-09-20 09:14:54,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=858220.0, ans=0.0 +2024-09-20 09:14:54,682 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.78 vs. limit=12.0 +2024-09-20 09:15:07,740 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=858260.0, ans=0.125 +2024-09-20 09:15:13,435 INFO [train.py:1198] (1/2) Epoch 48, batch 1900, loss[loss=0.2429, ctc_loss=0.112, cr_loss=0.3516, attn_decoder_loss=0.2496, over 29708.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1078, cr_loss=0.3464, attn_decoder_loss=0.237, over 5804819.68 frames. ], batch size: 89, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:16:07,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=858420.0, ans=0.0 +2024-09-20 09:16:07,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=858420.0, ans=0.125 +2024-09-20 09:16:12,222 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=858420.0, ans=0.125 +2024-09-20 09:16:20,680 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.89 vs. limit=15.0 +2024-09-20 09:16:30,249 INFO [train.py:1198] (1/2) Epoch 48, batch 1950, loss[loss=0.2196, ctc_loss=0.1014, cr_loss=0.3454, attn_decoder_loss=0.225, over 29461.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1082, cr_loss=0.3475, attn_decoder_loss=0.2379, over 5819554.38 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:16:53,365 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.839e+01 9.358e+01 9.818e+01 1.771e+02, threshold=1.872e+02, percent-clipped=0.0 +2024-09-20 09:16:54,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=858540.0, ans=22.5 +2024-09-20 09:17:07,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=858580.0, ans=0.1 +2024-09-20 09:17:12,635 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-20 09:17:13,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=858580.0, ans=0.0 +2024-09-20 09:17:30,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=858620.0, ans=0.0 +2024-09-20 09:17:49,893 INFO [train.py:1198] (1/2) Epoch 48, batch 2000, loss[loss=0.201, ctc_loss=0.08404, cr_loss=0.2885, attn_decoder_loss=0.2076, over 29339.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.1083, cr_loss=0.3474, attn_decoder_loss=0.2379, over 5798589.72 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 32.0 +2024-09-20 09:17:51,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=858700.0, ans=0.2 +2024-09-20 09:18:05,559 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=858740.0, ans=0.0 +2024-09-20 09:18:49,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=858860.0, ans=0.125 +2024-09-20 09:18:50,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=858860.0, ans=0.125 +2024-09-20 09:18:51,994 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=858860.0, ans=0.2 +2024-09-20 09:18:59,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=858860.0, ans=0.0 +2024-09-20 09:19:05,497 INFO [train.py:1198] (1/2) Epoch 48, batch 2050, loss[loss=0.2095, ctc_loss=0.08499, cr_loss=0.2997, attn_decoder_loss=0.2167, over 29441.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1077, cr_loss=0.3458, attn_decoder_loss=0.237, over 5790150.11 frames. ], batch size: 70, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:19:07,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=858900.0, ans=0.125 +2024-09-20 09:19:08,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=858900.0, ans=0.125 +2024-09-20 09:19:12,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 09:19:25,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff2_skip_rate, batch_count=858940.0, ans=0.0 +2024-09-20 09:19:28,040 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.210e+01 8.559e+01 9.116e+01 9.582e+01 1.621e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 09:20:04,220 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:20:08,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=859060.0, ans=0.07 +2024-09-20 09:20:17,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=859060.0, ans=0.2 +2024-09-20 09:20:20,537 INFO [train.py:1198] (1/2) Epoch 48, batch 2100, loss[loss=0.2312, ctc_loss=0.1047, cr_loss=0.3523, attn_decoder_loss=0.2374, over 29752.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1075, cr_loss=0.3456, attn_decoder_loss=0.2366, over 5801461.55 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:20:26,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=859100.0, ans=0.1 +2024-09-20 09:20:39,894 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.79 vs. limit=15.0 +2024-09-20 09:20:45,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=859140.0, ans=0.025 +2024-09-20 09:21:10,979 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=859220.0, ans=0.125 +2024-09-20 09:21:17,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=15.28 vs. limit=15.0 +2024-09-20 09:21:29,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=859260.0, ans=0.125 +2024-09-20 09:21:32,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=859260.0, ans=0.0 +2024-09-20 09:21:37,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=859260.0, ans=0.125 +2024-09-20 09:21:40,092 INFO [train.py:1198] (1/2) Epoch 48, batch 2150, loss[loss=0.2258, ctc_loss=0.1071, cr_loss=0.3491, attn_decoder_loss=0.2313, over 29439.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1072, cr_loss=0.345, attn_decoder_loss=0.2362, over 5814949.91 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:21:44,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=859300.0, ans=0.1 +2024-09-20 09:21:56,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.63 vs. limit=15.0 +2024-09-20 09:22:02,727 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.204e+01 8.576e+01 8.993e+01 9.601e+01 1.335e+02, threshold=1.799e+02, percent-clipped=0.0 +2024-09-20 09:22:16,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=859380.0, ans=0.1 +2024-09-20 09:22:16,686 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=859380.0, ans=0.025 +2024-09-20 09:22:39,178 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=859460.0, ans=0.0 +2024-09-20 09:22:43,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=859460.0, ans=0.125 +2024-09-20 09:22:55,663 INFO [train.py:1198] (1/2) Epoch 48, batch 2200, loss[loss=0.2409, ctc_loss=0.107, cr_loss=0.3448, attn_decoder_loss=0.2482, over 29617.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1077, cr_loss=0.346, attn_decoder_loss=0.2365, over 5812033.29 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:23:17,505 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.41 vs. limit=15.0 +2024-09-20 09:23:35,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=859580.0, ans=0.0 +2024-09-20 09:23:40,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.15 vs. limit=10.0 +2024-09-20 09:23:41,064 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:23:46,308 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.08 vs. limit=6.0 +2024-09-20 09:23:53,028 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:23:55,944 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff2_skip_rate, batch_count=859660.0, ans=0.0 +2024-09-20 09:24:00,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=859660.0, ans=0.0 +2024-09-20 09:24:10,683 INFO [train.py:1198] (1/2) Epoch 48, batch 2250, loss[loss=0.2417, ctc_loss=0.1169, cr_loss=0.3556, attn_decoder_loss=0.2476, over 29710.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.107, cr_loss=0.3445, attn_decoder_loss=0.2361, over 5811872.23 frames. ], batch size: 82, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:24:32,714 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=859740.0, ans=0.1 +2024-09-20 09:24:35,429 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.683e+01 9.115e+01 9.671e+01 7.163e+02, threshold=1.823e+02, percent-clipped=1.0 +2024-09-20 09:25:01,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=859820.0, ans=0.2 +2024-09-20 09:25:21,885 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=859860.0, ans=0.0 +2024-09-20 09:25:30,677 INFO [train.py:1198] (1/2) Epoch 48, batch 2300, loss[loss=0.2063, ctc_loss=0.09438, cr_loss=0.3221, attn_decoder_loss=0.2116, over 29322.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1067, cr_loss=0.3439, attn_decoder_loss=0.2354, over 5798276.93 frames. ], batch size: 71, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:25:41,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=859900.0, ans=0.0 +2024-09-20 09:25:45,841 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=859940.0, ans=0.1 +2024-09-20 09:26:05,465 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=859980.0, ans=0.125 +2024-09-20 09:26:07,422 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=17.95 vs. limit=22.5 +2024-09-20 09:26:34,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=860060.0, ans=0.0 +2024-09-20 09:26:46,311 INFO [train.py:1198] (1/2) Epoch 48, batch 2350, loss[loss=0.2343, ctc_loss=0.1126, cr_loss=0.3593, attn_decoder_loss=0.2399, over 29684.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1071, cr_loss=0.3449, attn_decoder_loss=0.2357, over 5804426.15 frames. ], batch size: 83, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:26:49,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=860100.0, ans=0.125 +2024-09-20 09:26:53,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=860100.0, ans=0.1 +2024-09-20 09:27:10,171 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.540e+01 9.100e+01 9.543e+01 1.555e+02, threshold=1.820e+02, percent-clipped=0.0 +2024-09-20 09:27:34,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=860220.0, ans=0.125 +2024-09-20 09:27:46,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=860260.0, ans=0.125 +2024-09-20 09:28:01,993 INFO [train.py:1198] (1/2) Epoch 48, batch 2400, loss[loss=0.2227, ctc_loss=0.1047, cr_loss=0.3558, attn_decoder_loss=0.2279, over 29535.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1077, cr_loss=0.3462, attn_decoder_loss=0.2362, over 5807889.15 frames. ], batch size: 76, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:28:36,305 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=860380.0, ans=0.2 +2024-09-20 09:28:45,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=860380.0, ans=0.125 +2024-09-20 09:28:46,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=860380.0, ans=0.125 +2024-09-20 09:28:59,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=860420.0, ans=0.0 +2024-09-20 09:29:04,066 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=860420.0, ans=0.125 +2024-09-20 09:29:08,476 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=860460.0, ans=0.125 +2024-09-20 09:29:11,644 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=860460.0, ans=0.125 +2024-09-20 09:29:14,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=860460.0, ans=0.0 +2024-09-20 09:29:21,827 INFO [train.py:1198] (1/2) Epoch 48, batch 2450, loss[loss=0.2391, ctc_loss=0.1148, cr_loss=0.3808, attn_decoder_loss=0.2445, over 29675.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1081, cr_loss=0.347, attn_decoder_loss=0.2372, over 5785344.29 frames. ], batch size: 82, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:29:30,986 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=860500.0, ans=0.2 +2024-09-20 09:29:45,554 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.777e+01 8.875e+01 9.472e+01 1.005e+02 1.888e+02, threshold=1.894e+02, percent-clipped=1.0 +2024-09-20 09:29:54,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=860580.0, ans=0.125 +2024-09-20 09:30:19,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.const_attention_rate, batch_count=860620.0, ans=0.025 +2024-09-20 09:30:21,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=860660.0, ans=0.025 +2024-09-20 09:30:27,143 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.28 vs. limit=22.5 +2024-09-20 09:30:36,885 INFO [train.py:1198] (1/2) Epoch 48, batch 2500, loss[loss=0.236, ctc_loss=0.0978, cr_loss=0.3234, attn_decoder_loss=0.2442, over 29616.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1081, cr_loss=0.3473, attn_decoder_loss=0.2372, over 5794802.87 frames. ], batch size: 86, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:30:40,186 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=860700.0, ans=0.1 +2024-09-20 09:30:43,656 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.70 vs. limit=15.0 +2024-09-20 09:30:46,748 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.61 vs. limit=22.5 +2024-09-20 09:31:15,550 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=860780.0, ans=0.1 +2024-09-20 09:31:34,396 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=18.71 vs. limit=22.5 +2024-09-20 09:31:35,991 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.36 vs. limit=15.0 +2024-09-20 09:31:41,621 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=860860.0, ans=0.125 +2024-09-20 09:31:47,383 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=860860.0, ans=0.0 +2024-09-20 09:31:53,216 INFO [train.py:1198] (1/2) Epoch 48, batch 2550, loss[loss=0.2113, ctc_loss=0.1057, cr_loss=0.3512, attn_decoder_loss=0.2153, over 29325.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1082, cr_loss=0.3472, attn_decoder_loss=0.2371, over 5797279.12 frames. ], batch size: 67, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:31:53,527 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=860900.0, ans=0.0 +2024-09-20 09:32:07,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=860940.0, ans=0.0 +2024-09-20 09:32:14,579 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=860940.0, ans=0.1 +2024-09-20 09:32:18,702 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.740e+01 9.125e+01 9.570e+01 1.327e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 09:32:45,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=861020.0, ans=0.2 +2024-09-20 09:33:12,680 INFO [train.py:1198] (1/2) Epoch 48, batch 2600, loss[loss=0.2195, ctc_loss=0.09165, cr_loss=0.305, attn_decoder_loss=0.2269, over 29454.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1084, cr_loss=0.3474, attn_decoder_loss=0.2372, over 5794295.54 frames. ], batch size: 78, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:33:15,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=861100.0, ans=0.025 +2024-09-20 09:33:50,826 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=3.68 vs. limit=12.0 +2024-09-20 09:33:58,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.max_abs, batch_count=861220.0, ans=10.0 +2024-09-20 09:34:23,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=861260.0, ans=0.025 +2024-09-20 09:34:24,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=861260.0, ans=0.0 +2024-09-20 09:34:27,392 INFO [train.py:1198] (1/2) Epoch 48, batch 2650, loss[loss=0.241, ctc_loss=0.1142, cr_loss=0.3576, attn_decoder_loss=0.2472, over 29228.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1085, cr_loss=0.3468, attn_decoder_loss=0.2373, over 5800907.81 frames. ], batch size: 100, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:34:53,030 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.303e+01 8.627e+01 9.156e+01 9.635e+01 1.174e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 09:35:24,788 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=861420.0, ans=0.2 +2024-09-20 09:35:33,798 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer1.prob, batch_count=861460.0, ans=0.125 +2024-09-20 09:35:42,638 INFO [train.py:1198] (1/2) Epoch 48, batch 2700, loss[loss=0.2341, ctc_loss=0.1157, cr_loss=0.3549, attn_decoder_loss=0.2393, over 29501.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1086, cr_loss=0.3468, attn_decoder_loss=0.2376, over 5796589.44 frames. ], batch size: 87, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:35:46,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=861500.0, ans=0.125 +2024-09-20 09:36:24,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=861580.0, ans=0.125 +2024-09-20 09:36:29,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=8.77 vs. limit=15.0 +2024-09-20 09:36:32,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=861620.0, ans=0.125 +2024-09-20 09:36:45,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=861620.0, ans=0.025 +2024-09-20 09:37:03,345 INFO [train.py:1198] (1/2) Epoch 48, batch 2750, loss[loss=0.2143, ctc_loss=0.09691, cr_loss=0.3164, attn_decoder_loss=0.2203, over 29502.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1079, cr_loss=0.3454, attn_decoder_loss=0.2366, over 5795666.58 frames. ], batch size: 75, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:37:06,792 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=861700.0, ans=0.0 +2024-09-20 09:37:28,906 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.868e+01 9.360e+01 1.005e+02 2.892e+02, threshold=1.872e+02, percent-clipped=3.0 +2024-09-20 09:37:35,365 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=861780.0, ans=0.125 +2024-09-20 09:37:54,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=861820.0, ans=0.125 +2024-09-20 09:37:56,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=861820.0, ans=0.125 +2024-09-20 09:38:02,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=861860.0, ans=0.1 +2024-09-20 09:38:11,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.ff3_skip_rate, batch_count=861860.0, ans=0.0 +2024-09-20 09:38:18,880 INFO [train.py:1198] (1/2) Epoch 48, batch 2800, loss[loss=0.2534, ctc_loss=0.128, cr_loss=0.3625, attn_decoder_loss=0.2593, over 19800.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1083, cr_loss=0.3461, attn_decoder_loss=0.2369, over 5776123.39 frames. ], batch size: 209, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:38:20,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=861900.0, ans=0.0 +2024-09-20 09:38:29,759 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=861900.0, ans=0.1 +2024-09-20 09:38:35,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=861940.0, ans=0.1 +2024-09-20 09:38:41,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=861940.0, ans=0.07 +2024-09-20 09:38:46,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.74 vs. limit=12.0 +2024-09-20 09:39:01,824 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.48 vs. limit=15.0 +2024-09-20 09:39:04,994 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.28 vs. limit=6.0 +2024-09-20 09:39:07,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=862020.0, ans=0.125 +2024-09-20 09:39:10,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=862020.0, ans=0.125 +2024-09-20 09:39:10,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=862020.0, ans=0.2 +2024-09-20 09:39:28,471 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=862060.0, ans=0.125 +2024-09-20 09:39:34,248 INFO [train.py:1198] (1/2) Epoch 48, batch 2850, loss[loss=0.2312, ctc_loss=0.1088, cr_loss=0.3588, attn_decoder_loss=0.2369, over 29518.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1086, cr_loss=0.3471, attn_decoder_loss=0.2372, over 5761619.03 frames. ], batch size: 77, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:39:36,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=862100.0, ans=0.125 +2024-09-20 09:39:37,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass_mid.scale_min, batch_count=862100.0, ans=0.2 +2024-09-20 09:39:39,106 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=862100.0, ans=0.04949747468305833 +2024-09-20 09:39:39,895 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.91 vs. limit=15.0 +2024-09-20 09:39:51,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=862140.0, ans=0.2 +2024-09-20 09:39:52,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=862140.0, ans=0.0 +2024-09-20 09:39:55,735 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=862140.0, ans=0.125 +2024-09-20 09:39:59,995 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.529e+01 8.779e+01 9.246e+01 9.697e+01 4.650e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 09:40:25,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=862220.0, ans=0.0 +2024-09-20 09:40:46,218 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-20 09:40:53,965 INFO [train.py:1198] (1/2) Epoch 48, batch 2900, loss[loss=0.2282, ctc_loss=0.1023, cr_loss=0.3499, attn_decoder_loss=0.2344, over 29415.00 frames. ], tot_loss[loss=0.2323, ctc_loss=0.1092, cr_loss=0.349, attn_decoder_loss=0.2382, over 5788184.79 frames. ], batch size: 79, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:41:00,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=862300.0, ans=0.125 +2024-09-20 09:41:07,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=862340.0, ans=0.1 +2024-09-20 09:41:24,526 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=862380.0, ans=0.05 +2024-09-20 09:41:32,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=862380.0, ans=0.125 +2024-09-20 09:41:39,091 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.75 vs. limit=12.0 +2024-09-20 09:41:58,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.scale_min, batch_count=862460.0, ans=0.2 +2024-09-20 09:41:58,368 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=862460.0, ans=0.0 +2024-09-20 09:41:58,385 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=862460.0, ans=0.2 +2024-09-20 09:42:07,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=862460.0, ans=0.125 +2024-09-20 09:42:10,010 INFO [train.py:1198] (1/2) Epoch 48, batch 2950, loss[loss=0.2217, ctc_loss=0.09924, cr_loss=0.3295, attn_decoder_loss=0.228, over 29507.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1082, cr_loss=0.3469, attn_decoder_loss=0.2371, over 5781972.42 frames. ], batch size: 75, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:42:30,170 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:42:37,510 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.396e+01 8.743e+01 9.257e+01 9.610e+01 1.643e+02, threshold=1.851e+02, percent-clipped=0.0 +2024-09-20 09:42:39,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=862580.0, ans=0.0 +2024-09-20 09:42:43,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=862580.0, ans=0.1 +2024-09-20 09:43:00,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 09:43:25,735 INFO [train.py:1198] (1/2) Epoch 48, batch 3000, loss[loss=0.2279, ctc_loss=0.1024, cr_loss=0.3392, attn_decoder_loss=0.2343, over 29751.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1079, cr_loss=0.3464, attn_decoder_loss=0.2368, over 5782898.00 frames. ], batch size: 81, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:43:25,735 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 09:43:41,475 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([6.3013, 6.2081, 5.6011, 5.9684], device='cuda:1') +2024-09-20 09:43:44,030 INFO [train.py:1230] (1/2) Epoch 48, validation: loss=0.2127, ctc_loss=0.03675, cr_loss=6.55e-15, attn_decoder_loss=0.2323, over 944034.00 frames. +2024-09-20 09:43:44,030 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 09:43:44,297 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=862700.0, ans=0.125 +2024-09-20 09:44:31,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=862780.0, ans=0.2 +2024-09-20 09:44:49,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=862860.0, ans=0.0 +2024-09-20 09:44:52,805 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.09 vs. limit=15.0 +2024-09-20 09:44:58,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=862860.0, ans=0.125 +2024-09-20 09:44:59,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=862860.0, ans=0.125 +2024-09-20 09:45:04,011 INFO [train.py:1198] (1/2) Epoch 48, batch 3050, loss[loss=0.2175, ctc_loss=0.09267, cr_loss=0.3249, attn_decoder_loss=0.2241, over 29501.00 frames. ], tot_loss[loss=0.2317, ctc_loss=0.1086, cr_loss=0.3477, attn_decoder_loss=0.2376, over 5777544.85 frames. ], batch size: 76, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:45:23,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=862940.0, ans=0.04949747468305833 +2024-09-20 09:45:26,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.conv_module1.whiten, num_groups=1, num_channels=192, metric=9.45 vs. limit=15.0 +2024-09-20 09:45:31,010 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.813e+01 8.880e+01 9.329e+01 1.001e+02 1.444e+02, threshold=1.866e+02, percent-clipped=0.0 +2024-09-20 09:45:32,902 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=862980.0, ans=0.2 +2024-09-20 09:46:18,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=863100.0, ans=0.0 +2024-09-20 09:46:19,417 INFO [train.py:1198] (1/2) Epoch 48, batch 3100, loss[loss=0.2407, ctc_loss=0.1115, cr_loss=0.3581, attn_decoder_loss=0.2471, over 29255.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1084, cr_loss=0.3469, attn_decoder_loss=0.2373, over 5777365.58 frames. ], batch size: 100, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:46:21,876 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.82 vs. limit=10.0 +2024-09-20 09:46:54,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=863180.0, ans=0.125 +2024-09-20 09:47:03,530 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=863220.0, ans=0.0 +2024-09-20 09:47:35,294 INFO [train.py:1198] (1/2) Epoch 48, batch 3150, loss[loss=0.2426, ctc_loss=0.1087, cr_loss=0.3379, attn_decoder_loss=0.25, over 28764.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1083, cr_loss=0.3467, attn_decoder_loss=0.2372, over 5782987.05 frames. ], batch size: 104, lr: 2.29e-03, grad_scale: 8.0 +2024-09-20 09:47:42,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn1.whiten, num_groups=1, num_channels=192, metric=12.53 vs. limit=22.5 +2024-09-20 09:47:52,341 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=863340.0, ans=0.0 +2024-09-20 09:47:55,982 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.ff3_skip_rate, batch_count=863340.0, ans=0.0 +2024-09-20 09:48:06,681 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.465e+01 8.651e+01 9.014e+01 9.549e+01 1.887e+02, threshold=1.803e+02, percent-clipped=1.0 +2024-09-20 09:48:06,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=863340.0, ans=0.0 +2024-09-20 09:48:18,053 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.65 vs. limit=10.0 +2024-09-20 09:48:48,133 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.11 vs. limit=15.0 +2024-09-20 09:48:50,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=863460.0, ans=0.0 +2024-09-20 09:48:54,865 INFO [train.py:1198] (1/2) Epoch 48, batch 3200, loss[loss=0.2318, ctc_loss=0.1203, cr_loss=0.3695, attn_decoder_loss=0.236, over 29412.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1079, cr_loss=0.3462, attn_decoder_loss=0.2367, over 5792352.01 frames. ], batch size: 79, lr: 2.29e-03, grad_scale: 16.0 +2024-09-20 09:48:55,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=863500.0, ans=0.07 +2024-09-20 09:49:21,514 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-20 09:50:10,579 INFO [train.py:1198] (1/2) Epoch 48, batch 3250, loss[loss=0.2505, ctc_loss=0.1282, cr_loss=0.3886, attn_decoder_loss=0.2555, over 29711.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1083, cr_loss=0.3471, attn_decoder_loss=0.2374, over 5799864.59 frames. ], batch size: 84, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:50:15,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=863700.0, ans=0.1 +2024-09-20 09:50:33,941 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.38 vs. limit=12.0 +2024-09-20 09:50:37,751 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.777e+01 9.225e+01 9.680e+01 2.463e+02, threshold=1.845e+02, percent-clipped=1.0 +2024-09-20 09:50:47,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=863780.0, ans=0.0 +2024-09-20 09:50:56,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=863820.0, ans=0.125 +2024-09-20 09:50:59,354 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=863820.0, ans=0.125 +2024-09-20 09:51:05,862 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.90 vs. limit=15.0 +2024-09-20 09:51:26,310 INFO [train.py:1198] (1/2) Epoch 48, batch 3300, loss[loss=0.2388, ctc_loss=0.1103, cr_loss=0.3469, attn_decoder_loss=0.2454, over 28316.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1075, cr_loss=0.3454, attn_decoder_loss=0.2362, over 5797433.61 frames. ], batch size: 111, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:51:41,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer_ff2.min_abs, batch_count=863940.0, ans=0.1 +2024-09-20 09:51:56,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=863940.0, ans=0.0 +2024-09-20 09:52:26,468 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=864020.0, ans=0.2 +2024-09-20 09:52:33,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=864020.0, ans=0.125 +2024-09-20 09:52:52,957 INFO [train.py:1198] (1/2) Epoch 48, batch 3350, loss[loss=0.2377, ctc_loss=0.1064, cr_loss=0.324, attn_decoder_loss=0.245, over 28804.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1079, cr_loss=0.3461, attn_decoder_loss=0.2368, over 5774126.54 frames. ], batch size: 104, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:53:20,156 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.461e+01 8.868e+01 9.379e+01 9.923e+01 1.602e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-20 09:53:20,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module1.balancer2.prob, batch_count=864140.0, ans=0.125 +2024-09-20 09:53:24,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=864180.0, ans=0.125 +2024-09-20 09:53:34,659 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=15.74 vs. limit=22.5 +2024-09-20 09:53:35,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=864180.0, ans=0.125 +2024-09-20 09:53:40,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass_mid.scale_min, batch_count=864220.0, ans=0.2 +2024-09-20 09:53:42,972 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=864220.0, ans=0.125 +2024-09-20 09:53:56,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=864260.0, ans=0.2 +2024-09-20 09:53:59,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=864260.0, ans=0.2 +2024-09-20 09:54:08,443 INFO [train.py:1198] (1/2) Epoch 48, batch 3400, loss[loss=0.2052, ctc_loss=0.08525, cr_loss=0.2995, attn_decoder_loss=0.2118, over 29354.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3458, attn_decoder_loss=0.2366, over 5765113.10 frames. ], batch size: 67, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:54:17,890 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=864300.0, ans=0.2 +2024-09-20 09:54:22,650 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=864340.0, ans=0.0 +2024-09-20 09:54:24,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=864340.0, ans=0.0 +2024-09-20 09:55:09,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer2.prob, batch_count=864460.0, ans=0.125 +2024-09-20 09:55:23,988 INFO [train.py:1198] (1/2) Epoch 48, batch 3450, loss[loss=0.2346, ctc_loss=0.1035, cr_loss=0.3368, attn_decoder_loss=0.2416, over 28173.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1075, cr_loss=0.3459, attn_decoder_loss=0.237, over 5772586.54 frames. ], batch size: 111, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:55:42,884 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=864540.0, ans=0.09899494936611666 +2024-09-20 09:55:55,191 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.532e+01 9.118e+01 9.502e+01 1.543e+02, threshold=1.824e+02, percent-clipped=0.0 +2024-09-20 09:55:59,125 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=4.64 vs. limit=6.0 +2024-09-20 09:56:22,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=864620.0, ans=0.2 +2024-09-20 09:56:35,058 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.52 vs. limit=6.0 +2024-09-20 09:56:43,110 INFO [train.py:1198] (1/2) Epoch 48, batch 3500, loss[loss=0.2142, ctc_loss=0.0957, cr_loss=0.3154, attn_decoder_loss=0.2204, over 29348.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1072, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5774683.90 frames. ], batch size: 71, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:57:10,533 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=864740.0, ans=0.1 +2024-09-20 09:57:19,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.11 vs. limit=15.0 +2024-09-20 09:57:22,318 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn2.whiten, num_groups=1, num_channels=192, metric=12.23 vs. limit=22.5 +2024-09-20 09:57:24,459 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=864780.0, ans=0.025 +2024-09-20 09:57:31,016 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.83 vs. limit=12.0 +2024-09-20 09:57:43,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=864860.0, ans=0.125 +2024-09-20 09:57:46,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.prob, batch_count=864860.0, ans=0.125 +2024-09-20 09:57:58,094 INFO [train.py:1198] (1/2) Epoch 48, batch 3550, loss[loss=0.241, ctc_loss=0.1112, cr_loss=0.3601, attn_decoder_loss=0.2475, over 29701.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1068, cr_loss=0.3443, attn_decoder_loss=0.2361, over 5782443.77 frames. ], batch size: 89, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 09:58:12,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.self_attn2.whiten.whitening_limit, batch_count=864940.0, ans=22.5 +2024-09-20 09:58:20,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=864940.0, ans=0.1 +2024-09-20 09:58:24,720 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.565e+01 9.018e+01 9.505e+01 1.694e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 09:58:31,007 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=864980.0, ans=0.0 +2024-09-20 09:58:44,546 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=865020.0, ans=0.1 +2024-09-20 09:58:57,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer_na.min_abs, batch_count=865060.0, ans=0.02 +2024-09-20 09:59:03,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=865060.0, ans=0.1 +2024-09-20 09:59:12,458 INFO [train.py:1198] (1/2) Epoch 48, batch 3600, loss[loss=0.2356, ctc_loss=0.1079, cr_loss=0.3496, attn_decoder_loss=0.2421, over 29505.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1071, cr_loss=0.3453, attn_decoder_loss=0.2364, over 5790717.79 frames. ], batch size: 77, lr: 2.28e-03, grad_scale: 32.0 +2024-09-20 09:59:47,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.78 vs. limit=15.0 +2024-09-20 10:00:15,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.10 vs. limit=22.5 +2024-09-20 10:00:20,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=865260.0, ans=0.0 +2024-09-20 10:00:25,567 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=1.92 vs. limit=6.0 +2024-09-20 10:00:26,303 INFO [train.py:1198] (1/2) Epoch 48, batch 3650, loss[loss=0.251, ctc_loss=0.1253, cr_loss=0.3763, attn_decoder_loss=0.2566, over 29486.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1065, cr_loss=0.344, attn_decoder_loss=0.2358, over 5793401.93 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:00:54,248 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.368e+01 8.575e+01 9.071e+01 9.730e+01 1.168e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 10:00:56,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=865380.0, ans=0.125 +2024-09-20 10:01:14,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=4.24 vs. limit=12.0 +2024-09-20 10:01:28,874 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=865460.0, ans=0.125 +2024-09-20 10:01:44,217 INFO [train.py:1198] (1/2) Epoch 48, batch 3700, loss[loss=0.2378, ctc_loss=0.1103, cr_loss=0.3498, attn_decoder_loss=0.2441, over 29713.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1068, cr_loss=0.3448, attn_decoder_loss=0.2359, over 5803672.56 frames. ], batch size: 84, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:01:46,009 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=865500.0, ans=0.125 +2024-09-20 10:02:24,019 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=14.39 vs. limit=15.0 +2024-09-20 10:02:39,480 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=865620.0, ans=0.125 +2024-09-20 10:02:50,060 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=865660.0, ans=0.2 +2024-09-20 10:02:52,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=865660.0, ans=0.125 +2024-09-20 10:02:57,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=865700.0, ans=0.0 +2024-09-20 10:02:58,586 INFO [train.py:1198] (1/2) Epoch 48, batch 3750, loss[loss=0.2117, ctc_loss=0.1014, cr_loss=0.3191, attn_decoder_loss=0.2168, over 29309.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1068, cr_loss=0.3446, attn_decoder_loss=0.2357, over 5807058.68 frames. ], batch size: 67, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:03:01,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=865700.0, ans=0.125 +2024-09-20 10:03:07,851 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=865700.0, ans=0.125 +2024-09-20 10:03:08,475 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.07 vs. limit=15.0 +2024-09-20 10:03:12,841 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.98 vs. limit=15.0 +2024-09-20 10:03:19,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=865740.0, ans=0.125 +2024-09-20 10:03:28,348 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.331e+01 8.668e+01 9.150e+01 9.729e+01 2.139e+02, threshold=1.830e+02, percent-clipped=2.0 +2024-09-20 10:03:49,079 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.45 vs. limit=6.0 +2024-09-20 10:03:51,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=865820.0, ans=0.0 +2024-09-20 10:04:02,344 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.out_whiten, num_groups=1, num_channels=192, metric=6.72 vs. limit=8.0 +2024-09-20 10:04:04,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.scale_min, batch_count=865860.0, ans=0.2 +2024-09-20 10:04:10,334 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=865860.0, ans=0.0 +2024-09-20 10:04:12,899 INFO [train.py:1198] (1/2) Epoch 48, batch 3800, loss[loss=0.2439, ctc_loss=0.1171, cr_loss=0.3684, attn_decoder_loss=0.2498, over 29632.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1066, cr_loss=0.3436, attn_decoder_loss=0.2355, over 5796678.72 frames. ], batch size: 86, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:04:17,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=865900.0, ans=0.025 +2024-09-20 10:04:39,978 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=865940.0, ans=0.125 +2024-09-20 10:04:43,347 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.73 vs. limit=22.5 +2024-09-20 10:04:45,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=865980.0, ans=0.1 +2024-09-20 10:04:47,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=865980.0, ans=0.025 +2024-09-20 10:04:53,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.48 vs. limit=6.0 +2024-09-20 10:05:27,016 INFO [train.py:1198] (1/2) Epoch 48, batch 3850, loss[loss=0.2452, ctc_loss=0.1127, cr_loss=0.3496, attn_decoder_loss=0.2521, over 29260.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1067, cr_loss=0.3438, attn_decoder_loss=0.2356, over 5811246.99 frames. ], batch size: 100, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:05:27,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=866100.0, ans=0.2 +2024-09-20 10:05:37,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass_mid.scale_min, batch_count=866100.0, ans=0.2 +2024-09-20 10:05:44,814 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:05:44,823 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=866140.0, ans=0.0 +2024-09-20 10:05:49,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=866140.0, ans=0.0 +2024-09-20 10:05:55,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=866180.0, ans=0.0 +2024-09-20 10:05:55,298 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=866180.0, ans=0.125 +2024-09-20 10:05:56,511 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.558e+01 8.668e+01 9.090e+01 9.614e+01 1.900e+02, threshold=1.818e+02, percent-clipped=1.0 +2024-09-20 10:06:02,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=866180.0, ans=0.125 +2024-09-20 10:06:14,003 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.68 vs. limit=22.5 +2024-09-20 10:06:32,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=866260.0, ans=0.125 +2024-09-20 10:06:32,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=866260.0, ans=0.07 +2024-09-20 10:06:36,865 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=866260.0, ans=0.2 +2024-09-20 10:06:40,954 INFO [train.py:1198] (1/2) Epoch 48, batch 3900, loss[loss=0.2361, ctc_loss=0.1057, cr_loss=0.3319, attn_decoder_loss=0.2432, over 29613.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1071, cr_loss=0.3445, attn_decoder_loss=0.2363, over 5815379.34 frames. ], batch size: 86, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:06:45,819 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=866300.0, ans=0.0 +2024-09-20 10:06:47,762 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.87 vs. limit=12.0 +2024-09-20 10:07:08,246 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.06 vs. limit=6.0 +2024-09-20 10:07:19,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=866380.0, ans=0.125 +2024-09-20 10:07:32,521 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.85 vs. limit=10.0 +2024-09-20 10:07:58,103 INFO [train.py:1198] (1/2) Epoch 48, batch 3950, loss[loss=0.2436, ctc_loss=0.1164, cr_loss=0.3563, attn_decoder_loss=0.2498, over 29534.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3448, attn_decoder_loss=0.2365, over 5835016.30 frames. ], batch size: 97, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:07:58,487 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=866500.0, ans=0.04949747468305833 +2024-09-20 10:08:27,414 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.654e+01 8.623e+01 9.056e+01 9.623e+01 1.586e+02, threshold=1.811e+02, percent-clipped=0.0 +2024-09-20 10:08:30,577 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=866580.0, ans=0.0 +2024-09-20 10:08:31,210 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.02 vs. limit=10.0 +2024-09-20 10:08:45,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=866620.0, ans=0.125 +2024-09-20 10:08:59,718 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=866660.0, ans=0.125 +2024-09-20 10:09:01,297 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:09:11,170 INFO [train.py:1198] (1/2) Epoch 48, batch 4000, loss[loss=0.2139, ctc_loss=0.08753, cr_loss=0.2989, attn_decoder_loss=0.2213, over 29520.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.344, attn_decoder_loss=0.2366, over 5812361.95 frames. ], batch size: 74, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:09:11,483 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=866700.0, ans=0.0 +2024-09-20 10:09:23,085 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.nonlin_attention.balancer.prob, batch_count=866700.0, ans=0.125 +2024-09-20 10:09:27,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=866740.0, ans=0.0 +2024-09-20 10:09:34,862 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=866740.0, ans=0.5 +2024-09-20 10:09:43,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=866780.0, ans=0.125 +2024-09-20 10:09:52,603 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.max_abs, batch_count=866780.0, ans=10.0 +2024-09-20 10:10:08,907 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=866860.0, ans=0.125 +2024-09-20 10:10:24,660 INFO [train.py:1198] (1/2) Epoch 48, batch 4050, loss[loss=0.2401, ctc_loss=0.1138, cr_loss=0.3129, attn_decoder_loss=0.2472, over 20006.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3432, attn_decoder_loss=0.2364, over 5796157.68 frames. ], batch size: 209, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:10:27,556 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=866900.0, ans=0.125 +2024-09-20 10:10:37,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=866940.0, ans=0.125 +2024-09-20 10:10:45,261 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer2.prob, batch_count=866940.0, ans=0.125 +2024-09-20 10:10:49,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=866940.0, ans=0.0 +2024-09-20 10:10:51,149 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=866940.0, ans=0.04949747468305833 +2024-09-20 10:10:53,680 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.713e+01 8.805e+01 9.236e+01 9.679e+01 1.942e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 10:10:55,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=866980.0, ans=0.2 +2024-09-20 10:11:11,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=867020.0, ans=0.09899494936611666 +2024-09-20 10:11:39,141 INFO [train.py:1198] (1/2) Epoch 48, batch 4100, loss[loss=0.2464, ctc_loss=0.1288, cr_loss=0.3848, attn_decoder_loss=0.2509, over 29524.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1075, cr_loss=0.3445, attn_decoder_loss=0.2368, over 5791480.74 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:11:39,367 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=867100.0, ans=0.0 +2024-09-20 10:11:48,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=867100.0, ans=0.1 +2024-09-20 10:12:11,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=867180.0, ans=0.2 +2024-09-20 10:12:11,993 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.44 vs. limit=22.5 +2024-09-20 10:12:21,461 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.96 vs. limit=15.0 +2024-09-20 10:12:31,415 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.30 vs. limit=15.0 +2024-09-20 10:12:31,540 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=3.51 vs. limit=12.0 +2024-09-20 10:12:37,048 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=867220.0, ans=0.125 +2024-09-20 10:12:42,372 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.07 vs. limit=6.0 +2024-09-20 10:12:54,723 INFO [train.py:1198] (1/2) Epoch 48, batch 4150, loss[loss=0.23, ctc_loss=0.1077, cr_loss=0.352, attn_decoder_loss=0.2358, over 29493.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1077, cr_loss=0.345, attn_decoder_loss=0.2366, over 5796553.94 frames. ], batch size: 77, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:13:00,942 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=867300.0, ans=0.0 +2024-09-20 10:13:12,885 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:13:22,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=867380.0, ans=0.0 +2024-09-20 10:13:23,987 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.793e+01 8.808e+01 9.166e+01 9.915e+01 1.612e+02, threshold=1.833e+02, percent-clipped=0.0 +2024-09-20 10:13:39,379 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=8.37 vs. limit=10.0 +2024-09-20 10:13:47,677 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=867420.0, ans=0.2 +2024-09-20 10:13:59,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=867460.0, ans=0.0 +2024-09-20 10:14:00,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.prob, batch_count=867460.0, ans=0.125 +2024-09-20 10:14:05,532 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=867460.0, ans=0.1 +2024-09-20 10:14:08,055 INFO [train.py:1198] (1/2) Epoch 48, batch 4200, loss[loss=0.2459, ctc_loss=0.1211, cr_loss=0.3707, attn_decoder_loss=0.2515, over 29484.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1081, cr_loss=0.3465, attn_decoder_loss=0.2369, over 5798781.09 frames. ], batch size: 90, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:14:31,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=20.37 vs. limit=22.5 +2024-09-20 10:14:42,882 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.91 vs. limit=10.0 +2024-09-20 10:14:45,577 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=21.33 vs. limit=22.5 +2024-09-20 10:15:05,835 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=867660.0, ans=0.125 +2024-09-20 10:15:18,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=867660.0, ans=0.0 +2024-09-20 10:15:18,848 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=5.80 vs. limit=12.0 +2024-09-20 10:15:22,556 INFO [train.py:1198] (1/2) Epoch 48, batch 4250, loss[loss=0.219, ctc_loss=0.1001, cr_loss=0.3204, attn_decoder_loss=0.2251, over 29514.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.108, cr_loss=0.3463, attn_decoder_loss=0.2371, over 5804182.47 frames. ], batch size: 74, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:15:41,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=867740.0, ans=0.125 +2024-09-20 10:15:47,544 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=867740.0, ans=0.125 +2024-09-20 10:15:54,014 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.563e+01 8.733e+01 9.174e+01 9.868e+01 2.354e+02, threshold=1.835e+02, percent-clipped=1.0 +2024-09-20 10:15:54,963 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.26 vs. limit=12.0 +2024-09-20 10:15:56,443 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.84 vs. limit=15.0 +2024-09-20 10:16:00,083 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.min_abs, batch_count=867780.0, ans=0.5 +2024-09-20 10:16:03,018 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=867780.0, ans=0.1 +2024-09-20 10:16:09,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=867820.0, ans=0.2 +2024-09-20 10:16:22,323 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=867860.0, ans=0.125 +2024-09-20 10:16:36,668 INFO [train.py:1198] (1/2) Epoch 48, batch 4300, loss[loss=0.2425, ctc_loss=0.1171, cr_loss=0.3679, attn_decoder_loss=0.2483, over 29541.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1076, cr_loss=0.3451, attn_decoder_loss=0.2371, over 5794786.82 frames. ], batch size: 87, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:16:53,813 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.39 vs. limit=22.5 +2024-09-20 10:17:00,566 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=867940.0, ans=0.0 +2024-09-20 10:17:02,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=867940.0, ans=0.125 +2024-09-20 10:17:16,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=867980.0, ans=0.125 +2024-09-20 10:17:23,435 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.16 vs. limit=15.0 +2024-09-20 10:17:50,557 INFO [train.py:1198] (1/2) Epoch 48, batch 4350, loss[loss=0.243, ctc_loss=0.1086, cr_loss=0.3447, attn_decoder_loss=0.2502, over 29471.00 frames. ], tot_loss[loss=0.2343, ctc_loss=0.1104, cr_loss=0.3518, attn_decoder_loss=0.2402, over 5797557.39 frames. ], batch size: 97, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:18:13,514 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:18:14,772 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=868140.0, ans=0.2 +2024-09-20 10:18:14,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=868140.0, ans=0.125 +2024-09-20 10:18:16,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=868140.0, ans=0.0 +2024-09-20 10:18:20,688 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer_ff3.min_abs, batch_count=868180.0, ans=0.2 +2024-09-20 10:18:21,800 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.740e+01 9.099e+01 9.551e+01 1.026e+02 1.775e+02, threshold=1.910e+02, percent-clipped=0.0 +2024-09-20 10:18:28,272 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.29 vs. limit=22.5 +2024-09-20 10:18:28,393 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.21 vs. limit=22.5 +2024-09-20 10:19:04,480 INFO [train.py:1198] (1/2) Epoch 48, batch 4400, loss[loss=0.2354, ctc_loss=0.1098, cr_loss=0.3448, attn_decoder_loss=0.2417, over 27591.00 frames. ], tot_loss[loss=0.236, ctc_loss=0.1112, cr_loss=0.3537, attn_decoder_loss=0.242, over 5767066.89 frames. ], batch size: 125, lr: 2.28e-03, grad_scale: 16.0 +2024-09-20 10:19:12,219 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=868300.0, ans=0.09899494936611666 +2024-09-20 10:19:15,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=868300.0, ans=0.09899494936611666 +2024-09-20 10:19:18,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=868340.0, ans=0.0 +2024-09-20 10:19:32,715 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=868380.0, ans=0.1 +2024-09-20 10:19:34,511 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.73 vs. limit=15.0 +2024-09-20 10:19:38,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=868380.0, ans=0.125 +2024-09-20 10:19:40,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=868380.0, ans=0.125 +2024-09-20 10:19:42,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=868380.0, ans=0.0 +2024-09-20 10:19:54,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=868420.0, ans=0.0 +2024-09-20 10:20:18,888 INFO [train.py:1198] (1/2) Epoch 48, batch 4450, loss[loss=0.2609, ctc_loss=0.1491, cr_loss=0.4081, attn_decoder_loss=0.2642, over 20462.00 frames. ], tot_loss[loss=0.2382, ctc_loss=0.1147, cr_loss=0.3586, attn_decoder_loss=0.244, over 5572597.95 frames. ], batch size: 209, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:20:24,556 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.whiten, num_groups=1, num_channels=192, metric=6.16 vs. limit=12.0 +2024-09-20 10:20:25,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=868500.0, ans=0.0 +2024-09-20 10:20:29,690 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=868500.0, ans=0.125 +2024-09-20 10:20:34,778 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.57 vs. limit=15.0 +2024-09-20 10:20:37,120 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=868540.0, ans=0.125 +2024-09-20 10:20:43,687 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=868540.0, ans=0.125 +2024-09-20 10:20:51,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=10.32 vs. limit=15.0 +2024-09-20 10:20:52,106 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.165e+01 9.214e+01 1.004e+02 1.130e+02 1.604e+02, threshold=2.007e+02, percent-clipped=0.0 +2024-09-20 10:21:13,466 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.61 vs. limit=6.0 +2024-09-20 10:21:13,775 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.50 vs. limit=15.0 +2024-09-20 10:21:29,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=868660.0, ans=0.125 +2024-09-20 10:21:33,682 INFO [train.py:1198] (1/2) Epoch 48, batch 4500, loss[loss=0.2508, ctc_loss=0.1356, cr_loss=0.3868, attn_decoder_loss=0.255, over 20881.00 frames. ], tot_loss[loss=0.2399, ctc_loss=0.1171, cr_loss=0.3606, attn_decoder_loss=0.2455, over 5234422.88 frames. ], batch size: 209, lr: 2.28e-03, grad_scale: 8.0 +2024-09-20 10:21:45,813 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:21:58,438 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=9.69 vs. limit=12.0 +2024-09-20 10:23:01,533 INFO [train.py:1198] (1/2) Epoch 49, batch 0, loss[loss=0.2151, ctc_loss=0.09674, cr_loss=0.3347, attn_decoder_loss=0.2209, over 29625.00 frames. ], tot_loss[loss=0.2151, ctc_loss=0.09674, cr_loss=0.3347, attn_decoder_loss=0.2209, over 29625.00 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:23:01,534 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 10:23:19,983 INFO [train.py:1230] (1/2) Epoch 49, validation: loss=0.2124, ctc_loss=0.03569, cr_loss=6.554e-15, attn_decoder_loss=0.2321, over 944034.00 frames. +2024-09-20 10:23:19,983 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 10:23:30,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=868800.0, ans=0.2 +2024-09-20 10:23:31,264 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=12.13 vs. limit=15.0 +2024-09-20 10:23:49,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=868880.0, ans=0.1 +2024-09-20 10:23:55,288 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.65 vs. limit=10.0 +2024-09-20 10:24:17,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=868920.0, ans=0.125 +2024-09-20 10:24:24,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=868960.0, ans=0.125 +2024-09-20 10:24:24,916 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=868960.0, ans=0.025 +2024-09-20 10:24:26,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=868960.0, ans=0.125 +2024-09-20 10:24:32,177 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.048e+01 9.535e+01 1.078e+02 1.164e+02 4.744e+02, threshold=2.156e+02, percent-clipped=1.0 +2024-09-20 10:24:35,436 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:24:36,537 INFO [train.py:1198] (1/2) Epoch 49, batch 50, loss[loss=0.2073, ctc_loss=0.09304, cr_loss=0.3166, attn_decoder_loss=0.213, over 29402.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1098, cr_loss=0.352, attn_decoder_loss=0.2383, over 1268831.92 frames. ], batch size: 70, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:24:55,004 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=869040.0, ans=0.125 +2024-09-20 10:25:11,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=869080.0, ans=0.1 +2024-09-20 10:25:19,110 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=869080.0, ans=0.0 +2024-09-20 10:25:19,787 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.10 vs. limit=15.0 +2024-09-20 10:25:41,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=869160.0, ans=0.125 +2024-09-20 10:25:42,057 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.67 vs. limit=10.0 +2024-09-20 10:25:53,941 INFO [train.py:1198] (1/2) Epoch 49, batch 100, loss[loss=0.2269, ctc_loss=0.1141, cr_loss=0.3541, attn_decoder_loss=0.2315, over 29530.00 frames. ], tot_loss[loss=0.2338, ctc_loss=0.1101, cr_loss=0.3515, attn_decoder_loss=0.2397, over 2253439.42 frames. ], batch size: 76, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:25:57,252 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=869200.0, ans=0.125 +2024-09-20 10:26:06,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=869200.0, ans=0.125 +2024-09-20 10:26:26,261 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=4.76 vs. limit=15.0 +2024-09-20 10:26:35,041 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.99 vs. limit=10.0 +2024-09-20 10:27:05,479 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.504e+01 8.667e+01 9.247e+01 9.821e+01 1.649e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 10:27:08,509 INFO [train.py:1198] (1/2) Epoch 49, batch 150, loss[loss=0.2114, ctc_loss=0.09767, cr_loss=0.3313, attn_decoder_loss=0.2167, over 29462.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1078, cr_loss=0.3458, attn_decoder_loss=0.2373, over 3047938.53 frames. ], batch size: 70, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:27:08,886 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=869400.0, ans=0.04949747468305833 +2024-09-20 10:27:51,567 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=869480.0, ans=0.0 +2024-09-20 10:28:09,711 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer1.prob, batch_count=869560.0, ans=0.125 +2024-09-20 10:28:26,275 INFO [train.py:1198] (1/2) Epoch 49, batch 200, loss[loss=0.2548, ctc_loss=0.1249, cr_loss=0.3995, attn_decoder_loss=0.2603, over 27549.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3459, attn_decoder_loss=0.2365, over 3661870.17 frames. ], batch size: 125, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:28:29,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=7.31 vs. limit=15.0 +2024-09-20 10:29:05,721 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=869680.0, ans=0.125 +2024-09-20 10:29:37,197 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=869760.0, ans=0.125 +2024-09-20 10:29:40,854 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.622e+01 9.248e+01 9.651e+01 1.394e+02, threshold=1.850e+02, percent-clipped=0.0 +2024-09-20 10:29:41,950 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.49 vs. limit=22.5 +2024-09-20 10:29:43,770 INFO [train.py:1198] (1/2) Epoch 49, batch 250, loss[loss=0.2404, ctc_loss=0.1079, cr_loss=0.3544, attn_decoder_loss=0.2472, over 29271.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1071, cr_loss=0.3449, attn_decoder_loss=0.2361, over 4144737.95 frames. ], batch size: 100, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:29:48,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=869800.0, ans=0.125 +2024-09-20 10:30:08,539 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.45 vs. limit=15.0 +2024-09-20 10:30:17,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=869880.0, ans=0.0 +2024-09-20 10:30:17,247 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=869880.0, ans=0.2 +2024-09-20 10:30:22,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.09 vs. limit=15.0 +2024-09-20 10:30:23,167 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=869880.0, ans=0.0 +2024-09-20 10:30:29,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=768, metric=2.64 vs. limit=15.0 +2024-09-20 10:30:30,757 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=869920.0, ans=0.1 +2024-09-20 10:30:33,985 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:30:53,472 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=869960.0, ans=0.125 +2024-09-20 10:30:55,203 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=19.97 vs. limit=22.5 +2024-09-20 10:30:55,457 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.92 vs. limit=12.0 +2024-09-20 10:30:59,202 INFO [train.py:1198] (1/2) Epoch 49, batch 300, loss[loss=0.2527, ctc_loss=0.1287, cr_loss=0.3835, attn_decoder_loss=0.258, over 29515.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.107, cr_loss=0.3446, attn_decoder_loss=0.2361, over 4511746.19 frames. ], batch size: 92, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:31:17,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.balancer.max_positive, batch_count=870040.0, ans=0.95 +2024-09-20 10:31:51,330 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=870120.0, ans=0.125 +2024-09-20 10:32:10,927 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=870160.0, ans=0.0 +2024-09-20 10:32:11,389 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.06 vs. limit=15.0 +2024-09-20 10:32:13,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.417e+01 8.601e+01 9.011e+01 9.321e+01 1.888e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 10:32:16,512 INFO [train.py:1198] (1/2) Epoch 49, batch 350, loss[loss=0.2088, ctc_loss=0.08572, cr_loss=0.309, attn_decoder_loss=0.2156, over 29317.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1071, cr_loss=0.3452, attn_decoder_loss=0.2365, over 4797009.94 frames. ], batch size: 71, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:32:18,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=870200.0, ans=0.125 +2024-09-20 10:32:22,709 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=870200.0, ans=0.125 +2024-09-20 10:32:25,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=870200.0, ans=0.2 +2024-09-20 10:32:25,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=870200.0, ans=10.0 +2024-09-20 10:32:25,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=870200.0, ans=0.125 +2024-09-20 10:32:34,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=870240.0, ans=0.125 +2024-09-20 10:32:42,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.41 vs. limit=22.5 +2024-09-20 10:32:49,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.min_positive, batch_count=870280.0, ans=0.025 +2024-09-20 10:33:19,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer1.prob, batch_count=870360.0, ans=0.125 +2024-09-20 10:33:31,555 INFO [train.py:1198] (1/2) Epoch 49, batch 400, loss[loss=0.2315, ctc_loss=0.1098, cr_loss=0.3566, attn_decoder_loss=0.2371, over 29695.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1069, cr_loss=0.3446, attn_decoder_loss=0.2361, over 5026235.43 frames. ], batch size: 82, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:33:49,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer2.prob, batch_count=870440.0, ans=0.125 +2024-09-20 10:33:50,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=870440.0, ans=0.1 +2024-09-20 10:33:53,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=870440.0, ans=0.0 +2024-09-20 10:33:54,421 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=21.69 vs. limit=22.5 +2024-09-20 10:33:59,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=870440.0, ans=0.125 +2024-09-20 10:34:23,124 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.11 vs. limit=22.5 +2024-09-20 10:34:25,664 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=870520.0, ans=0.0 +2024-09-20 10:34:27,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=870520.0, ans=0.0 +2024-09-20 10:34:37,536 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=870560.0, ans=0.125 +2024-09-20 10:34:45,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=870560.0, ans=0.1 +2024-09-20 10:34:46,411 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.637e+01 8.552e+01 9.241e+01 9.788e+01 2.728e+02, threshold=1.848e+02, percent-clipped=1.0 +2024-09-20 10:34:49,354 INFO [train.py:1198] (1/2) Epoch 49, batch 450, loss[loss=0.2276, ctc_loss=0.1012, cr_loss=0.3331, attn_decoder_loss=0.2343, over 29698.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1066, cr_loss=0.3443, attn_decoder_loss=0.2359, over 5188865.69 frames. ], batch size: 83, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:34:59,062 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.31 vs. limit=15.0 +2024-09-20 10:35:26,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.87 vs. limit=15.0 +2024-09-20 10:35:57,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=870760.0, ans=15.0 +2024-09-20 10:36:07,278 INFO [train.py:1198] (1/2) Epoch 49, batch 500, loss[loss=0.2414, ctc_loss=0.1121, cr_loss=0.3551, attn_decoder_loss=0.2479, over 29424.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1066, cr_loss=0.3443, attn_decoder_loss=0.2356, over 5331867.56 frames. ], batch size: 94, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:36:12,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=870800.0, ans=0.2 +2024-09-20 10:36:17,109 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=11.10 vs. limit=15.0 +2024-09-20 10:37:01,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=870920.0, ans=0.0 +2024-09-20 10:37:08,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=870960.0, ans=10.0 +2024-09-20 10:37:18,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=870960.0, ans=0.09899494936611666 +2024-09-20 10:37:18,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=870960.0, ans=0.125 +2024-09-20 10:37:19,725 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.778e+01 8.745e+01 9.074e+01 9.621e+01 1.472e+02, threshold=1.815e+02, percent-clipped=0.0 +2024-09-20 10:37:20,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=870960.0, ans=0.0 +2024-09-20 10:37:25,056 INFO [train.py:1198] (1/2) Epoch 49, batch 550, loss[loss=0.2451, ctc_loss=0.1174, cr_loss=0.3687, attn_decoder_loss=0.2511, over 28792.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1068, cr_loss=0.3448, attn_decoder_loss=0.2357, over 5424374.95 frames. ], batch size: 104, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:37:31,384 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=871000.0, ans=0.0 +2024-09-20 10:38:07,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=871080.0, ans=0.0 +2024-09-20 10:38:11,813 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=871120.0, ans=0.125 +2024-09-20 10:38:40,827 INFO [train.py:1198] (1/2) Epoch 49, batch 600, loss[loss=0.2323, ctc_loss=0.09819, cr_loss=0.3164, attn_decoder_loss=0.2402, over 29322.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1072, cr_loss=0.3458, attn_decoder_loss=0.236, over 5508554.25 frames. ], batch size: 100, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:38:45,802 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=871200.0, ans=0.1 +2024-09-20 10:38:50,273 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=871200.0, ans=0.0 +2024-09-20 10:38:50,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.scale_min, batch_count=871200.0, ans=0.2 +2024-09-20 10:38:54,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=871240.0, ans=0.1 +2024-09-20 10:39:00,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=871240.0, ans=0.125 +2024-09-20 10:39:18,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=871280.0, ans=0.125 +2024-09-20 10:39:41,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=871360.0, ans=0.025 +2024-09-20 10:39:54,157 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=871360.0, ans=0.125 +2024-09-20 10:39:55,327 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.840e+01 8.578e+01 9.036e+01 9.635e+01 5.589e+02, threshold=1.807e+02, percent-clipped=2.0 +2024-09-20 10:39:57,032 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=871400.0, ans=0.0 +2024-09-20 10:39:58,321 INFO [train.py:1198] (1/2) Epoch 49, batch 650, loss[loss=0.2291, ctc_loss=0.104, cr_loss=0.3344, attn_decoder_loss=0.2355, over 29762.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1062, cr_loss=0.3432, attn_decoder_loss=0.2352, over 5585897.11 frames. ], batch size: 81, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:40:15,599 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.28 vs. limit=15.0 +2024-09-20 10:40:32,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=871480.0, ans=0.025 +2024-09-20 10:40:50,150 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:41:10,863 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=871560.0, ans=0.2 +2024-09-20 10:41:10,908 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=871560.0, ans=0.125 +2024-09-20 10:41:13,707 INFO [train.py:1198] (1/2) Epoch 49, batch 700, loss[loss=0.2184, ctc_loss=0.09866, cr_loss=0.3274, attn_decoder_loss=0.2244, over 29551.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3442, attn_decoder_loss=0.236, over 5637421.97 frames. ], batch size: 76, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:41:17,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=871600.0, ans=0.0 +2024-09-20 10:41:22,986 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=12.69 vs. limit=15.0 +2024-09-20 10:41:23,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=871600.0, ans=0.125 +2024-09-20 10:41:23,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=871600.0, ans=0.1 +2024-09-20 10:41:26,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff3_skip_rate, batch_count=871600.0, ans=0.0 +2024-09-20 10:41:31,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=871640.0, ans=0.125 +2024-09-20 10:41:31,904 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=7.87 vs. limit=15.0 +2024-09-20 10:41:42,463 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.72 vs. limit=15.0 +2024-09-20 10:42:20,853 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=871760.0, ans=0.2 +2024-09-20 10:42:25,373 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=871760.0, ans=0.2 +2024-09-20 10:42:29,492 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.733e+01 8.695e+01 9.527e+01 1.020e+02 1.538e+02, threshold=1.905e+02, percent-clipped=0.0 +2024-09-20 10:42:31,055 INFO [train.py:1198] (1/2) Epoch 49, batch 750, loss[loss=0.2309, ctc_loss=0.1035, cr_loss=0.3308, attn_decoder_loss=0.2377, over 29707.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1065, cr_loss=0.3439, attn_decoder_loss=0.2357, over 5675847.03 frames. ], batch size: 82, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:42:49,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.94 vs. limit=15.0 +2024-09-20 10:43:00,482 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=4.27 vs. limit=6.0 +2024-09-20 10:43:35,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff2_skip_rate, batch_count=871960.0, ans=0.0 +2024-09-20 10:43:37,572 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=871960.0, ans=0.0 +2024-09-20 10:43:48,830 INFO [train.py:1198] (1/2) Epoch 49, batch 800, loss[loss=0.2165, ctc_loss=0.1016, cr_loss=0.3259, attn_decoder_loss=0.222, over 29597.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1067, cr_loss=0.3442, attn_decoder_loss=0.2358, over 5704978.26 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:44:02,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=872040.0, ans=0.0 +2024-09-20 10:44:02,568 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=872040.0, ans=0.025 +2024-09-20 10:44:13,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=872040.0, ans=0.0 +2024-09-20 10:44:14,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.scale_min, batch_count=872040.0, ans=0.2 +2024-09-20 10:44:14,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=872040.0, ans=0.0 +2024-09-20 10:44:20,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.min_positive, batch_count=872080.0, ans=0.025 +2024-09-20 10:44:32,607 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=872120.0, ans=0.0 +2024-09-20 10:44:40,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=872120.0, ans=0.0 +2024-09-20 10:44:43,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=872120.0, ans=0.125 +2024-09-20 10:44:59,294 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=872160.0, ans=0.0 +2024-09-20 10:45:03,401 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.964e+01 8.646e+01 9.267e+01 9.884e+01 3.056e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-20 10:45:03,423 INFO [train.py:1198] (1/2) Epoch 49, batch 850, loss[loss=0.2401, ctc_loss=0.1097, cr_loss=0.3462, attn_decoder_loss=0.2469, over 29713.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1063, cr_loss=0.3433, attn_decoder_loss=0.2355, over 5735300.79 frames. ], batch size: 89, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:45:08,626 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.46 vs. limit=15.0 +2024-09-20 10:45:12,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=872200.0, ans=0.5 +2024-09-20 10:45:17,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=872200.0, ans=0.07 +2024-09-20 10:45:25,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.01 vs. limit=15.0 +2024-09-20 10:45:28,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=872240.0, ans=0.0 +2024-09-20 10:45:41,805 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=872280.0, ans=0.125 +2024-09-20 10:45:56,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.50 vs. limit=15.0 +2024-09-20 10:46:04,816 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=872360.0, ans=0.025 +2024-09-20 10:46:11,231 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.99 vs. limit=15.0 +2024-09-20 10:46:11,282 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=9.91 vs. limit=15.0 +2024-09-20 10:46:21,060 INFO [train.py:1198] (1/2) Epoch 49, batch 900, loss[loss=0.217, ctc_loss=0.1001, cr_loss=0.3272, attn_decoder_loss=0.2227, over 29614.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1066, cr_loss=0.3442, attn_decoder_loss=0.2361, over 5741074.54 frames. ], batch size: 73, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:46:27,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_abs, batch_count=872400.0, ans=0.5 +2024-09-20 10:46:33,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer2.prob, batch_count=872400.0, ans=0.125 +2024-09-20 10:46:33,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=872400.0, ans=0.125 +2024-09-20 10:47:02,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.min_abs, batch_count=872480.0, ans=0.5 +2024-09-20 10:47:08,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=872520.0, ans=0.125 +2024-09-20 10:47:24,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=872560.0, ans=0.2 +2024-09-20 10:47:29,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.47 vs. limit=15.0 +2024-09-20 10:47:38,399 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.315e+01 8.666e+01 9.208e+01 1.007e+02 2.481e+02, threshold=1.842e+02, percent-clipped=1.0 +2024-09-20 10:47:38,430 INFO [train.py:1198] (1/2) Epoch 49, batch 950, loss[loss=0.214, ctc_loss=0.09318, cr_loss=0.2995, attn_decoder_loss=0.2207, over 29506.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1068, cr_loss=0.3445, attn_decoder_loss=0.2363, over 5743444.39 frames. ], batch size: 74, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:47:43,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=872600.0, ans=0.0 +2024-09-20 10:48:01,911 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=12.15 vs. limit=15.0 +2024-09-20 10:48:17,789 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=872680.0, ans=0.0 +2024-09-20 10:48:20,680 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=872680.0, ans=0.125 +2024-09-20 10:48:25,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.28 vs. limit=15.0 +2024-09-20 10:48:34,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=872720.0, ans=0.125 +2024-09-20 10:48:41,540 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=872760.0, ans=0.125 +2024-09-20 10:48:43,672 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.86 vs. limit=15.0 +2024-09-20 10:48:49,647 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.68 vs. limit=15.0 +2024-09-20 10:48:50,435 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=872760.0, ans=0.125 +2024-09-20 10:48:53,490 INFO [train.py:1198] (1/2) Epoch 49, batch 1000, loss[loss=0.2319, ctc_loss=0.1095, cr_loss=0.3602, attn_decoder_loss=0.2375, over 29475.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1076, cr_loss=0.3464, attn_decoder_loss=0.237, over 5737418.51 frames. ], batch size: 77, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:48:55,320 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=872800.0, ans=0.125 +2024-09-20 10:49:11,035 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=872840.0, ans=0.125 +2024-09-20 10:49:51,523 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=872920.0, ans=0.04949747468305833 +2024-09-20 10:50:06,998 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=7.42 vs. limit=15.0 +2024-09-20 10:50:10,720 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.554e+01 8.566e+01 9.140e+01 9.673e+01 2.370e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 10:50:10,742 INFO [train.py:1198] (1/2) Epoch 49, batch 1050, loss[loss=0.2438, ctc_loss=0.1196, cr_loss=0.3692, attn_decoder_loss=0.2494, over 29693.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2365, over 5744295.94 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:50:17,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer1.prob, batch_count=873000.0, ans=0.125 +2024-09-20 10:50:52,217 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=873080.0, ans=0.1 +2024-09-20 10:50:57,186 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.92 vs. limit=22.5 +2024-09-20 10:50:58,057 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer1.prob, batch_count=873120.0, ans=0.125 +2024-09-20 10:50:59,580 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=873120.0, ans=0.0 +2024-09-20 10:51:08,884 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.66 vs. limit=15.0 +2024-09-20 10:51:09,918 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer1.prob, batch_count=873160.0, ans=0.125 +2024-09-20 10:51:10,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=873160.0, ans=0.07 +2024-09-20 10:51:22,745 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=14.76 vs. limit=15.0 +2024-09-20 10:51:26,494 INFO [train.py:1198] (1/2) Epoch 49, batch 1100, loss[loss=0.2245, ctc_loss=0.1081, cr_loss=0.341, attn_decoder_loss=0.2298, over 29439.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1075, cr_loss=0.3461, attn_decoder_loss=0.2362, over 5757191.06 frames. ], batch size: 78, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:51:43,490 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1.whitening_limit, batch_count=873240.0, ans=10.0 +2024-09-20 10:52:02,380 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=873280.0, ans=0.125 +2024-09-20 10:52:21,285 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.83 vs. limit=10.0 +2024-09-20 10:52:32,659 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=873360.0, ans=0.125 +2024-09-20 10:52:41,723 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer1.prob, batch_count=873360.0, ans=0.125 +2024-09-20 10:52:44,484 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.640e+01 8.549e+01 9.114e+01 9.620e+01 1.410e+02, threshold=1.823e+02, percent-clipped=0.0 +2024-09-20 10:52:44,505 INFO [train.py:1198] (1/2) Epoch 49, batch 1150, loss[loss=0.2258, ctc_loss=0.1018, cr_loss=0.3411, attn_decoder_loss=0.232, over 29458.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1075, cr_loss=0.3461, attn_decoder_loss=0.2362, over 5756315.79 frames. ], batch size: 78, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 10:54:02,477 INFO [train.py:1198] (1/2) Epoch 49, batch 1200, loss[loss=0.2277, ctc_loss=0.1003, cr_loss=0.3151, attn_decoder_loss=0.2349, over 29680.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3464, attn_decoder_loss=0.2366, over 5747880.39 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:54:10,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=873600.0, ans=0.2 +2024-09-20 10:54:22,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=873640.0, ans=0.0 +2024-09-20 10:54:32,029 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.77 vs. limit=15.0 +2024-09-20 10:54:37,573 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=873680.0, ans=0.2 +2024-09-20 10:54:39,039 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=873680.0, ans=0.125 +2024-09-20 10:55:00,307 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=873720.0, ans=0.125 +2024-09-20 10:55:18,093 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.698e+01 8.765e+01 9.274e+01 9.697e+01 1.334e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 10:55:18,119 INFO [train.py:1198] (1/2) Epoch 49, batch 1250, loss[loss=0.2459, ctc_loss=0.1103, cr_loss=0.3616, attn_decoder_loss=0.2529, over 29589.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1082, cr_loss=0.3481, attn_decoder_loss=0.2372, over 5775149.76 frames. ], batch size: 92, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:55:36,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.01 vs. limit=15.0 +2024-09-20 10:55:40,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=873840.0, ans=0.025 +2024-09-20 10:55:43,328 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=873840.0, ans=0.1 +2024-09-20 10:55:47,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=873840.0, ans=0.1 +2024-09-20 10:55:48,724 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=4.50 vs. limit=15.0 +2024-09-20 10:56:07,531 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=873920.0, ans=0.1 +2024-09-20 10:56:21,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.min_positive, batch_count=873960.0, ans=0.025 +2024-09-20 10:56:35,756 INFO [train.py:1198] (1/2) Epoch 49, batch 1300, loss[loss=0.239, ctc_loss=0.1119, cr_loss=0.3601, attn_decoder_loss=0.2451, over 28127.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1077, cr_loss=0.3465, attn_decoder_loss=0.2365, over 5780383.19 frames. ], batch size: 111, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:56:36,122 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:56:48,933 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=874000.0, ans=0.0 +2024-09-20 10:57:07,518 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.03 vs. limit=10.0 +2024-09-20 10:57:09,321 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.93 vs. limit=15.0 +2024-09-20 10:57:28,777 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.50 vs. limit=15.0 +2024-09-20 10:57:35,715 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:57:36,464 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.06 vs. limit=6.0 +2024-09-20 10:57:44,801 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 10:57:53,496 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.289e+01 8.411e+01 9.030e+01 9.662e+01 1.974e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-20 10:57:53,517 INFO [train.py:1198] (1/2) Epoch 49, batch 1350, loss[loss=0.2322, ctc_loss=0.1107, cr_loss=0.3638, attn_decoder_loss=0.2376, over 29754.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1074, cr_loss=0.3462, attn_decoder_loss=0.2363, over 5796609.28 frames. ], batch size: 81, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:58:02,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.min_positive, batch_count=874200.0, ans=0.05 +2024-09-20 10:58:06,351 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.93 vs. limit=10.0 +2024-09-20 10:58:08,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=874240.0, ans=0.0 +2024-09-20 10:58:23,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=874280.0, ans=0.125 +2024-09-20 10:58:48,028 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=6.03 vs. limit=12.0 +2024-09-20 10:58:54,282 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.53 vs. limit=15.0 +2024-09-20 10:59:09,072 INFO [train.py:1198] (1/2) Epoch 49, batch 1400, loss[loss=0.2084, ctc_loss=0.09308, cr_loss=0.3239, attn_decoder_loss=0.214, over 29581.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.107, cr_loss=0.3454, attn_decoder_loss=0.2359, over 5808647.48 frames. ], batch size: 69, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 10:59:47,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=874480.0, ans=0.09899494936611666 +2024-09-20 10:59:59,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=874520.0, ans=0.05 +2024-09-20 11:00:00,383 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=12.0 +2024-09-20 11:00:17,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer_ff3.min_abs, batch_count=874560.0, ans=0.2 +2024-09-20 11:00:19,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=874560.0, ans=0.125 +2024-09-20 11:00:23,712 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=874560.0, ans=0.125 +2024-09-20 11:00:25,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=874600.0, ans=0.0 +2024-09-20 11:00:26,309 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.391e+01 8.532e+01 9.313e+01 9.693e+01 1.325e+02, threshold=1.863e+02, percent-clipped=0.0 +2024-09-20 11:00:26,330 INFO [train.py:1198] (1/2) Epoch 49, batch 1450, loss[loss=0.2458, ctc_loss=0.1208, cr_loss=0.3685, attn_decoder_loss=0.2515, over 29447.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1071, cr_loss=0.3452, attn_decoder_loss=0.2363, over 5804893.50 frames. ], batch size: 94, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:01:00,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff3_skip_rate, batch_count=874680.0, ans=0.0 +2024-09-20 11:01:03,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=874680.0, ans=0.09899494936611666 +2024-09-20 11:01:09,271 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=874680.0, ans=0.125 +2024-09-20 11:01:18,443 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:01:19,770 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=874720.0, ans=0.0 +2024-09-20 11:01:43,691 INFO [train.py:1198] (1/2) Epoch 49, batch 1500, loss[loss=0.2347, ctc_loss=0.1067, cr_loss=0.3495, attn_decoder_loss=0.2411, over 29615.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.107, cr_loss=0.3455, attn_decoder_loss=0.2365, over 5806298.56 frames. ], batch size: 86, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:02:07,361 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.15 vs. limit=6.0 +2024-09-20 11:02:18,169 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=6.32 vs. limit=15.0 +2024-09-20 11:02:35,675 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer1.prob, batch_count=874920.0, ans=0.125 +2024-09-20 11:02:44,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=874960.0, ans=0.025 +2024-09-20 11:02:59,720 INFO [train.py:1198] (1/2) Epoch 49, batch 1550, loss[loss=0.2449, ctc_loss=0.1136, cr_loss=0.3632, attn_decoder_loss=0.2514, over 29507.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1075, cr_loss=0.3461, attn_decoder_loss=0.2368, over 5783431.93 frames. ], batch size: 90, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:03:00,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=9.43 vs. limit=10.0 +2024-09-20 11:03:01,249 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.616e+01 8.780e+01 9.221e+01 9.714e+01 1.731e+02, threshold=1.844e+02, percent-clipped=0.0 +2024-09-20 11:03:22,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn2.whiten.whitening_limit, batch_count=875040.0, ans=22.5 +2024-09-20 11:03:26,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=875040.0, ans=0.0 +2024-09-20 11:03:35,534 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=875080.0, ans=0.125 +2024-09-20 11:04:01,203 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=875160.0, ans=0.1 +2024-09-20 11:04:17,871 INFO [train.py:1198] (1/2) Epoch 49, batch 1600, loss[loss=0.2377, ctc_loss=0.1053, cr_loss=0.3327, attn_decoder_loss=0.245, over 29644.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1076, cr_loss=0.3466, attn_decoder_loss=0.2366, over 5765812.00 frames. ], batch size: 85, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:04:23,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.min_abs, batch_count=875200.0, ans=0.5 +2024-09-20 11:04:47,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.49 vs. limit=22.5 +2024-09-20 11:05:35,284 INFO [train.py:1198] (1/2) Epoch 49, batch 1650, loss[loss=0.2419, ctc_loss=0.1059, cr_loss=0.3473, attn_decoder_loss=0.2492, over 29709.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1071, cr_loss=0.3458, attn_decoder_loss=0.2364, over 5760646.21 frames. ], batch size: 89, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:05:36,811 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.535e+01 8.731e+01 9.375e+01 1.033e+02 4.600e+02, threshold=1.875e+02, percent-clipped=3.0 +2024-09-20 11:05:37,193 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=875400.0, ans=0.125 +2024-09-20 11:05:49,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.40 vs. limit=6.0 +2024-09-20 11:05:52,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=875440.0, ans=0.125 +2024-09-20 11:06:22,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=875520.0, ans=0.125 +2024-09-20 11:06:25,337 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=875520.0, ans=0.0 +2024-09-20 11:06:28,417 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=875520.0, ans=0.0 +2024-09-20 11:06:30,451 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.31 vs. limit=15.0 +2024-09-20 11:06:45,236 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=6.68 vs. limit=15.0 +2024-09-20 11:06:50,363 INFO [train.py:1198] (1/2) Epoch 49, batch 1700, loss[loss=0.201, ctc_loss=0.0845, cr_loss=0.2969, attn_decoder_loss=0.2074, over 29570.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1069, cr_loss=0.346, attn_decoder_loss=0.2363, over 5781583.73 frames. ], batch size: 69, lr: 2.25e-03, grad_scale: 16.0 +2024-09-20 11:06:52,225 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=875600.0, ans=0.2 +2024-09-20 11:07:07,075 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=875640.0, ans=0.125 +2024-09-20 11:07:11,745 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=875640.0, ans=0.0 +2024-09-20 11:07:19,806 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=875640.0, ans=0.0 +2024-09-20 11:07:33,552 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:07:50,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=875720.0, ans=0.125 +2024-09-20 11:07:53,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=875760.0, ans=0.0 +2024-09-20 11:07:57,528 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_skip_rate, batch_count=875760.0, ans=0.0 +2024-09-20 11:08:03,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=875760.0, ans=0.125 +2024-09-20 11:08:07,654 INFO [train.py:1198] (1/2) Epoch 49, batch 1750, loss[loss=0.2118, ctc_loss=0.09515, cr_loss=0.3283, attn_decoder_loss=0.2175, over 29372.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1067, cr_loss=0.3453, attn_decoder_loss=0.2359, over 5789465.51 frames. ], batch size: 67, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:08:08,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=875800.0, ans=0.125 +2024-09-20 11:08:10,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.685e+01 8.566e+01 9.020e+01 9.576e+01 1.474e+02, threshold=1.804e+02, percent-clipped=0.0 +2024-09-20 11:08:15,535 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=875800.0, ans=0.125 +2024-09-20 11:08:34,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff3_skip_rate, batch_count=875840.0, ans=0.0 +2024-09-20 11:08:35,678 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=875840.0, ans=0.0 +2024-09-20 11:08:38,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=875880.0, ans=0.1 +2024-09-20 11:08:44,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=875880.0, ans=0.1 +2024-09-20 11:08:45,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.05 vs. limit=10.0 +2024-09-20 11:08:45,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=875880.0, ans=0.025 +2024-09-20 11:08:57,992 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=875920.0, ans=0.125 +2024-09-20 11:09:03,809 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=875920.0, ans=0.09899494936611666 +2024-09-20 11:09:11,793 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=2.59 vs. limit=15.0 +2024-09-20 11:09:11,823 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.93 vs. limit=22.5 +2024-09-20 11:09:17,943 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.whiten, num_groups=1, num_channels=512, metric=5.38 vs. limit=12.0 +2024-09-20 11:09:24,847 INFO [train.py:1198] (1/2) Epoch 49, batch 1800, loss[loss=0.2269, ctc_loss=0.103, cr_loss=0.3405, attn_decoder_loss=0.2331, over 29670.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1066, cr_loss=0.3453, attn_decoder_loss=0.236, over 5790763.84 frames. ], batch size: 83, lr: 2.25e-03, grad_scale: 8.0 +2024-09-20 11:09:34,404 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=876000.0, ans=0.2 +2024-09-20 11:09:54,124 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:10:40,174 INFO [train.py:1198] (1/2) Epoch 49, batch 1850, loss[loss=0.2391, ctc_loss=0.1046, cr_loss=0.3262, attn_decoder_loss=0.2468, over 29621.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.107, cr_loss=0.3464, attn_decoder_loss=0.2362, over 5796276.57 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:10:43,132 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.398e+01 8.600e+01 9.055e+01 9.654e+01 2.900e+02, threshold=1.811e+02, percent-clipped=2.0 +2024-09-20 11:10:43,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.51 vs. limit=15.0 +2024-09-20 11:11:21,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=876280.0, ans=0.125 +2024-09-20 11:11:30,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_skip_rate, batch_count=876320.0, ans=0.0 +2024-09-20 11:11:56,598 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=13.10 vs. limit=22.5 +2024-09-20 11:11:56,966 INFO [train.py:1198] (1/2) Epoch 49, batch 1900, loss[loss=0.2452, ctc_loss=0.1136, cr_loss=0.3679, attn_decoder_loss=0.2517, over 29697.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1073, cr_loss=0.3465, attn_decoder_loss=0.2368, over 5804275.90 frames. ], batch size: 89, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:12:04,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=876400.0, ans=0.1 +2024-09-20 11:12:17,899 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.82 vs. limit=22.5 +2024-09-20 11:12:23,358 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=876440.0, ans=0.1 +2024-09-20 11:12:46,758 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=6.82 vs. limit=15.0 +2024-09-20 11:12:47,681 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=876520.0, ans=0.0 +2024-09-20 11:13:06,508 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.37 vs. limit=15.0 +2024-09-20 11:13:14,820 INFO [train.py:1198] (1/2) Epoch 49, batch 1950, loss[loss=0.2337, ctc_loss=0.1238, cr_loss=0.3726, attn_decoder_loss=0.2377, over 29471.00 frames. ], tot_loss[loss=0.2319, ctc_loss=0.108, cr_loss=0.3478, attn_decoder_loss=0.238, over 5819180.08 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:13:17,850 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.639e+01 8.737e+01 9.338e+01 9.931e+01 1.218e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 11:13:32,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.self_attn2.whiten, num_groups=1, num_channels=512, metric=19.37 vs. limit=22.5 +2024-09-20 11:13:45,121 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer2.prob, batch_count=876680.0, ans=0.125 +2024-09-20 11:13:46,622 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=876680.0, ans=0.125 +2024-09-20 11:14:06,284 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:14:24,119 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer2.prob, batch_count=876760.0, ans=0.125 +2024-09-20 11:14:30,277 INFO [train.py:1198] (1/2) Epoch 49, batch 2000, loss[loss=0.204, ctc_loss=0.0896, cr_loss=0.3147, attn_decoder_loss=0.2097, over 29399.00 frames. ], tot_loss[loss=0.2322, ctc_loss=0.1082, cr_loss=0.3483, attn_decoder_loss=0.2382, over 5796678.06 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:14:34,159 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.03 vs. limit=6.0 +2024-09-20 11:14:53,340 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=876840.0, ans=0.0 +2024-09-20 11:14:59,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=876880.0, ans=0.0 +2024-09-20 11:15:47,891 INFO [train.py:1198] (1/2) Epoch 49, batch 2050, loss[loss=0.2072, ctc_loss=0.08921, cr_loss=0.297, attn_decoder_loss=0.2137, over 29423.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1077, cr_loss=0.3471, attn_decoder_loss=0.2372, over 5789602.60 frames. ], batch size: 70, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:15:50,913 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.585e+01 9.358e+01 1.005e+02 5.300e+02, threshold=1.872e+02, percent-clipped=1.0 +2024-09-20 11:15:53,948 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=768, metric=7.31 vs. limit=15.0 +2024-09-20 11:16:00,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=877000.0, ans=0.125 +2024-09-20 11:16:04,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.65 vs. limit=15.0 +2024-09-20 11:16:14,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=877040.0, ans=0.125 +2024-09-20 11:16:25,124 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=877080.0, ans=0.2 +2024-09-20 11:16:31,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=877080.0, ans=0.125 +2024-09-20 11:16:59,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=877160.0, ans=15.0 +2024-09-20 11:17:05,477 INFO [train.py:1198] (1/2) Epoch 49, batch 2100, loss[loss=0.2348, ctc_loss=0.1139, cr_loss=0.3764, attn_decoder_loss=0.2399, over 29780.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1076, cr_loss=0.3464, attn_decoder_loss=0.2369, over 5801960.17 frames. ], batch size: 81, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:17:10,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass_mid.scale_min, batch_count=877200.0, ans=0.2 +2024-09-20 11:17:24,230 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=3.82 vs. limit=15.0 +2024-09-20 11:17:40,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=877280.0, ans=0.125 +2024-09-20 11:17:45,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.04 vs. limit=15.0 +2024-09-20 11:17:46,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=877280.0, ans=0.09899494936611666 +2024-09-20 11:17:58,266 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:18:09,460 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=4.97 vs. limit=15.0 +2024-09-20 11:18:20,199 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.22 vs. limit=15.0 +2024-09-20 11:18:20,695 INFO [train.py:1198] (1/2) Epoch 49, batch 2150, loss[loss=0.2282, ctc_loss=0.1064, cr_loss=0.3592, attn_decoder_loss=0.2338, over 29453.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1073, cr_loss=0.3461, attn_decoder_loss=0.2364, over 5816855.09 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:18:23,754 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.475e+01 8.478e+01 8.920e+01 9.429e+01 1.261e+02, threshold=1.784e+02, percent-clipped=0.0 +2024-09-20 11:18:24,133 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.skip_rate, batch_count=877400.0, ans=0.035 +2024-09-20 11:19:09,810 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.const_attention_rate, batch_count=877520.0, ans=0.025 +2024-09-20 11:19:20,420 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=877520.0, ans=0.125 +2024-09-20 11:19:29,575 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:19:33,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=877560.0, ans=0.0 +2024-09-20 11:19:38,778 INFO [train.py:1198] (1/2) Epoch 49, batch 2200, loss[loss=0.2324, ctc_loss=0.105, cr_loss=0.3387, attn_decoder_loss=0.239, over 29627.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1072, cr_loss=0.3462, attn_decoder_loss=0.2365, over 5812010.08 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:19:56,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=877640.0, ans=0.2 +2024-09-20 11:20:09,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.63 vs. limit=15.0 +2024-09-20 11:20:19,232 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.68 vs. limit=15.0 +2024-09-20 11:20:35,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=877720.0, ans=0.1 +2024-09-20 11:20:48,315 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.23 vs. limit=6.0 +2024-09-20 11:20:53,613 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=877760.0, ans=0.125 +2024-09-20 11:20:56,325 INFO [train.py:1198] (1/2) Epoch 49, batch 2250, loss[loss=0.2435, ctc_loss=0.1197, cr_loss=0.3868, attn_decoder_loss=0.2486, over 29739.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1068, cr_loss=0.3451, attn_decoder_loss=0.2363, over 5811607.96 frames. ], batch size: 82, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:20:59,106 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.496e+01 8.798e+01 9.192e+01 9.899e+01 1.510e+02, threshold=1.838e+02, percent-clipped=0.0 +2024-09-20 11:21:07,760 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.91 vs. limit=15.0 +2024-09-20 11:21:19,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=877840.0, ans=0.1 +2024-09-20 11:21:25,040 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:21:34,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=877880.0, ans=0.125 +2024-09-20 11:21:35,484 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=877880.0, ans=0.1 +2024-09-20 11:21:36,960 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer2.prob, batch_count=877880.0, ans=0.125 +2024-09-20 11:21:46,050 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=877920.0, ans=0.125 +2024-09-20 11:22:05,492 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=877960.0, ans=0.125 +2024-09-20 11:22:08,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=877960.0, ans=0.125 +2024-09-20 11:22:11,329 INFO [train.py:1198] (1/2) Epoch 49, batch 2300, loss[loss=0.2123, ctc_loss=0.09865, cr_loss=0.3151, attn_decoder_loss=0.218, over 29344.00 frames. ], tot_loss[loss=0.2291, ctc_loss=0.1058, cr_loss=0.3429, attn_decoder_loss=0.2352, over 5797729.91 frames. ], batch size: 71, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:22:21,954 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=878000.0, ans=0.125 +2024-09-20 11:22:41,658 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=878080.0, ans=0.125 +2024-09-20 11:22:55,364 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=878120.0, ans=0.0 +2024-09-20 11:23:01,348 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=878120.0, ans=0.125 +2024-09-20 11:23:10,613 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.52 vs. limit=15.0 +2024-09-20 11:23:15,993 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=878160.0, ans=0.125 +2024-09-20 11:23:25,951 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=6.95 vs. limit=15.0 +2024-09-20 11:23:29,171 INFO [train.py:1198] (1/2) Epoch 49, batch 2350, loss[loss=0.2394, ctc_loss=0.1171, cr_loss=0.3783, attn_decoder_loss=0.2446, over 29702.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1061, cr_loss=0.3437, attn_decoder_loss=0.2355, over 5804039.25 frames. ], batch size: 83, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:23:32,110 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.643e+01 8.491e+01 9.028e+01 9.631e+01 3.047e+02, threshold=1.806e+02, percent-clipped=1.0 +2024-09-20 11:24:01,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=878280.0, ans=0.1 +2024-09-20 11:24:04,667 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=878280.0, ans=0.125 +2024-09-20 11:24:04,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=878280.0, ans=0.125 +2024-09-20 11:24:06,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=878280.0, ans=0.125 +2024-09-20 11:24:10,769 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer2.prob, batch_count=878280.0, ans=0.125 +2024-09-20 11:24:47,382 INFO [train.py:1198] (1/2) Epoch 49, batch 2400, loss[loss=0.2174, ctc_loss=0.1055, cr_loss=0.3309, attn_decoder_loss=0.2225, over 29520.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1068, cr_loss=0.3453, attn_decoder_loss=0.236, over 5808095.53 frames. ], batch size: 76, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:25:06,476 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.88 vs. limit=15.0 +2024-09-20 11:25:10,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=878440.0, ans=0.125 +2024-09-20 11:25:17,374 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=4.94 vs. limit=15.0 +2024-09-20 11:25:40,086 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.conv_module1.whiten, num_groups=1, num_channels=192, metric=7.27 vs. limit=15.0 +2024-09-20 11:25:43,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=878520.0, ans=0.1 +2024-09-20 11:26:02,941 INFO [train.py:1198] (1/2) Epoch 49, batch 2450, loss[loss=0.2319, ctc_loss=0.1123, cr_loss=0.3607, attn_decoder_loss=0.2372, over 29686.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1074, cr_loss=0.3468, attn_decoder_loss=0.2368, over 5783518.54 frames. ], batch size: 82, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:26:07,322 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.282e+01 8.775e+01 9.341e+01 9.851e+01 1.765e+02, threshold=1.868e+02, percent-clipped=0.0 +2024-09-20 11:26:11,980 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=878600.0, ans=0.1 +2024-09-20 11:26:13,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=878600.0, ans=0.0 +2024-09-20 11:26:28,943 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=9.69 vs. limit=15.0 +2024-09-20 11:26:34,491 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.bypass.scale_min, batch_count=878680.0, ans=0.2 +2024-09-20 11:26:48,023 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=878720.0, ans=0.125 +2024-09-20 11:27:02,651 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=17.36 vs. limit=22.5 +2024-09-20 11:27:14,276 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=878760.0, ans=0.0 +2024-09-20 11:27:19,940 INFO [train.py:1198] (1/2) Epoch 49, batch 2500, loss[loss=0.2374, ctc_loss=0.108, cr_loss=0.3401, attn_decoder_loss=0.2443, over 29626.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1074, cr_loss=0.3472, attn_decoder_loss=0.2368, over 5793881.12 frames. ], batch size: 86, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:27:27,716 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:27:34,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=878800.0, ans=0.2 +2024-09-20 11:27:50,922 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=878880.0, ans=0.0 +2024-09-20 11:28:28,099 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.06 vs. limit=10.0 +2024-09-20 11:28:33,304 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=878960.0, ans=0.125 +2024-09-20 11:28:37,629 INFO [train.py:1198] (1/2) Epoch 49, batch 2550, loss[loss=0.2041, ctc_loss=0.08591, cr_loss=0.3093, attn_decoder_loss=0.2104, over 29378.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1077, cr_loss=0.3476, attn_decoder_loss=0.2368, over 5796757.26 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:28:42,018 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.025e+01 8.758e+01 9.202e+01 9.559e+01 1.179e+02, threshold=1.840e+02, percent-clipped=0.0 +2024-09-20 11:29:04,962 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=879040.0, ans=0.125 +2024-09-20 11:29:10,037 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.30 vs. limit=6.0 +2024-09-20 11:29:12,693 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.48 vs. limit=10.0 +2024-09-20 11:29:30,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=879120.0, ans=0.1 +2024-09-20 11:29:35,908 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.36 vs. limit=15.0 +2024-09-20 11:29:50,598 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=879160.0, ans=0.1 +2024-09-20 11:29:53,824 INFO [train.py:1198] (1/2) Epoch 49, batch 2600, loss[loss=0.2322, ctc_loss=0.111, cr_loss=0.359, attn_decoder_loss=0.2377, over 29456.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1079, cr_loss=0.3477, attn_decoder_loss=0.2373, over 5793667.65 frames. ], batch size: 78, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:30:08,981 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:30:19,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=879240.0, ans=0.125 +2024-09-20 11:30:48,143 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer2.prob, batch_count=879320.0, ans=0.125 +2024-09-20 11:30:59,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass_mid.scale_min, batch_count=879360.0, ans=0.2 +2024-09-20 11:31:02,232 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=879360.0, ans=0.1 +2024-09-20 11:31:10,896 INFO [train.py:1198] (1/2) Epoch 49, batch 2650, loss[loss=0.2405, ctc_loss=0.1181, cr_loss=0.3763, attn_decoder_loss=0.2457, over 29234.00 frames. ], tot_loss[loss=0.2314, ctc_loss=0.1078, cr_loss=0.3477, attn_decoder_loss=0.2374, over 5799289.61 frames. ], batch size: 100, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:31:15,430 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.678e+01 8.630e+01 9.011e+01 9.615e+01 2.139e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 11:31:20,991 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=879400.0, ans=0.07 +2024-09-20 11:31:31,414 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=879440.0, ans=0.0 +2024-09-20 11:31:34,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=879440.0, ans=0.125 +2024-09-20 11:31:44,695 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=879480.0, ans=0.125 +2024-09-20 11:31:52,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=879480.0, ans=0.125 +2024-09-20 11:31:54,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=879480.0, ans=0.125 +2024-09-20 11:32:00,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.balancer1.prob, batch_count=879520.0, ans=0.125 +2024-09-20 11:32:11,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=879560.0, ans=0.1 +2024-09-20 11:32:12,938 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=879560.0, ans=0.0 +2024-09-20 11:32:27,636 INFO [train.py:1198] (1/2) Epoch 49, batch 2700, loss[loss=0.2466, ctc_loss=0.119, cr_loss=0.3788, attn_decoder_loss=0.2524, over 29517.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1079, cr_loss=0.3475, attn_decoder_loss=0.2376, over 5795243.03 frames. ], batch size: 87, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:32:28,305 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.47 vs. limit=6.0 +2024-09-20 11:32:37,395 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=512, metric=11.79 vs. limit=15.0 +2024-09-20 11:32:44,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass_mid.scale_min, batch_count=879640.0, ans=0.2 +2024-09-20 11:32:44,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=879640.0, ans=0.125 +2024-09-20 11:32:55,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.prob, batch_count=879640.0, ans=0.125 +2024-09-20 11:33:43,447 INFO [train.py:1198] (1/2) Epoch 49, batch 2750, loss[loss=0.2153, ctc_loss=0.09901, cr_loss=0.3229, attn_decoder_loss=0.221, over 29508.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.107, cr_loss=0.3453, attn_decoder_loss=0.2364, over 5794718.60 frames. ], batch size: 75, lr: 2.24e-03, grad_scale: 8.0 +2024-09-20 11:33:49,655 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.592e+01 8.773e+01 9.217e+01 9.860e+01 5.240e+02, threshold=1.843e+02, percent-clipped=1.0 +2024-09-20 11:34:02,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=879840.0, ans=0.125 +2024-09-20 11:34:06,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.self_attn_weights.pos_emb_skip_rate, batch_count=879840.0, ans=0.0 +2024-09-20 11:34:06,801 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.prob, batch_count=879840.0, ans=0.125 +2024-09-20 11:34:10,104 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.53 vs. limit=12.0 +2024-09-20 11:34:17,177 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:34:23,191 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=879880.0, ans=0.125 +2024-09-20 11:34:27,704 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=879920.0, ans=0.125 +2024-09-20 11:34:38,135 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=879920.0, ans=0.025 +2024-09-20 11:34:57,029 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=879960.0, ans=0.125 +2024-09-20 11:35:09,257 INFO [train.py:1198] (1/2) Epoch 49, batch 2800, loss[loss=0.2482, ctc_loss=0.1364, cr_loss=0.375, attn_decoder_loss=0.2523, over 20086.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1076, cr_loss=0.3465, attn_decoder_loss=0.2368, over 5776416.77 frames. ], batch size: 210, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:35:31,478 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=880040.0, ans=0.0 +2024-09-20 11:35:32,763 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=880040.0, ans=0.125 +2024-09-20 11:35:33,420 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=10.02 vs. limit=15.0 +2024-09-20 11:35:46,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=880080.0, ans=0.125 +2024-09-20 11:36:02,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=880120.0, ans=0.125 +2024-09-20 11:36:16,602 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=880160.0, ans=0.1 +2024-09-20 11:36:26,657 INFO [train.py:1198] (1/2) Epoch 49, batch 2850, loss[loss=0.2239, ctc_loss=0.1037, cr_loss=0.3395, attn_decoder_loss=0.2297, over 29524.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1083, cr_loss=0.3478, attn_decoder_loss=0.2375, over 5762188.66 frames. ], batch size: 77, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:36:32,610 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.555e+01 8.815e+01 9.180e+01 9.751e+01 2.075e+02, threshold=1.836e+02, percent-clipped=1.0 +2024-09-20 11:36:45,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module1.balancer1.prob, batch_count=880240.0, ans=0.125 +2024-09-20 11:36:48,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=880240.0, ans=0.125 +2024-09-20 11:37:07,888 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.prob, batch_count=880280.0, ans=0.125 +2024-09-20 11:37:19,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.min_positive, batch_count=880320.0, ans=0.025 +2024-09-20 11:37:29,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.scale_min, batch_count=880360.0, ans=0.2 +2024-09-20 11:37:42,425 INFO [train.py:1198] (1/2) Epoch 49, batch 2900, loss[loss=0.2194, ctc_loss=0.09385, cr_loss=0.3165, attn_decoder_loss=0.2263, over 29424.00 frames. ], tot_loss[loss=0.2325, ctc_loss=0.1091, cr_loss=0.3498, attn_decoder_loss=0.2384, over 5787678.24 frames. ], batch size: 79, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:37:44,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=880400.0, ans=0.2 +2024-09-20 11:38:01,389 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.47 vs. limit=22.5 +2024-09-20 11:38:16,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer2.min_abs, batch_count=880480.0, ans=0.5 +2024-09-20 11:38:22,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=880480.0, ans=0.125 +2024-09-20 11:38:30,333 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.46 vs. limit=15.0 +2024-09-20 11:38:35,847 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer1.prob, batch_count=880520.0, ans=0.125 +2024-09-20 11:38:36,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.45 vs. limit=15.0 +2024-09-20 11:38:44,945 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=880560.0, ans=0.0 +2024-09-20 11:38:51,424 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.2.prob, batch_count=880560.0, ans=0.125 +2024-09-20 11:39:02,487 INFO [train.py:1198] (1/2) Epoch 49, batch 2950, loss[loss=0.2332, ctc_loss=0.1094, cr_loss=0.3661, attn_decoder_loss=0.2388, over 29526.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1078, cr_loss=0.3469, attn_decoder_loss=0.2372, over 5783159.42 frames. ], batch size: 75, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:39:08,386 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.497e+01 8.608e+01 9.182e+01 9.827e+01 1.689e+02, threshold=1.836e+02, percent-clipped=0.0 +2024-09-20 11:39:16,957 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_whiten.whitening_limit, batch_count=880640.0, ans=15.0 +2024-09-20 11:39:21,011 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=880640.0, ans=0.0 +2024-09-20 11:40:18,499 INFO [train.py:1198] (1/2) Epoch 49, batch 3000, loss[loss=0.2289, ctc_loss=0.0982, cr_loss=0.3212, attn_decoder_loss=0.2363, over 29767.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1075, cr_loss=0.3462, attn_decoder_loss=0.2368, over 5783868.65 frames. ], batch size: 81, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:40:18,500 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 11:40:24,652 INFO [zipformer.py:1858] (1/2) name=encoder.encoders.4.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([4.6914, 4.6440, 4.2960, 4.6053], device='cuda:1') +2024-09-20 11:40:36,855 INFO [train.py:1230] (1/2) Epoch 49, validation: loss=0.2126, ctc_loss=0.03669, cr_loss=6.618e-15, attn_decoder_loss=0.2322, over 944034.00 frames. +2024-09-20 11:40:36,855 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 11:40:47,861 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.const_attention_rate, batch_count=880800.0, ans=0.025 +2024-09-20 11:40:48,575 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=8.15 vs. limit=15.0 +2024-09-20 11:41:13,259 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass_mid.scale_min, batch_count=880880.0, ans=0.2 +2024-09-20 11:41:14,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=880880.0, ans=0.125 +2024-09-20 11:41:28,381 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=880920.0, ans=0.125 +2024-09-20 11:41:28,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=880920.0, ans=0.125 +2024-09-20 11:41:31,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=880920.0, ans=0.0 +2024-09-20 11:41:37,524 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=880960.0, ans=0.125 +2024-09-20 11:41:43,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass_mid.scale_min, batch_count=880960.0, ans=0.2 +2024-09-20 11:41:47,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=15.96 vs. limit=22.5 +2024-09-20 11:41:51,205 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=881000.0, ans=0.0 +2024-09-20 11:41:52,486 INFO [train.py:1198] (1/2) Epoch 49, batch 3050, loss[loss=0.2134, ctc_loss=0.09553, cr_loss=0.3236, attn_decoder_loss=0.2193, over 29538.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1079, cr_loss=0.3464, attn_decoder_loss=0.2373, over 5778679.04 frames. ], batch size: 76, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:41:54,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=881000.0, ans=0.125 +2024-09-20 11:41:58,509 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.217e+01 8.535e+01 9.070e+01 9.568e+01 1.381e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 11:43:11,829 INFO [train.py:1198] (1/2) Epoch 49, batch 3100, loss[loss=0.2439, ctc_loss=0.1156, cr_loss=0.3545, attn_decoder_loss=0.2502, over 29293.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1074, cr_loss=0.3451, attn_decoder_loss=0.2365, over 5778009.30 frames. ], batch size: 100, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:43:43,818 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 11:43:57,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.skip_rate, batch_count=881320.0, ans=0.07 +2024-09-20 11:43:57,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer2.prob, batch_count=881320.0, ans=0.125 +2024-09-20 11:44:06,494 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=881320.0, ans=0.125 +2024-09-20 11:44:27,457 INFO [train.py:1198] (1/2) Epoch 49, batch 3150, loss[loss=0.2504, ctc_loss=0.1225, cr_loss=0.387, attn_decoder_loss=0.256, over 29011.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1076, cr_loss=0.3456, attn_decoder_loss=0.2368, over 5784236.03 frames. ], batch size: 104, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:44:33,450 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.545e+01 8.538e+01 9.268e+01 9.767e+01 2.524e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-20 11:44:39,750 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=881400.0, ans=0.125 +2024-09-20 11:44:56,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=881480.0, ans=0.125 +2024-09-20 11:45:05,046 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=881480.0, ans=0.125 +2024-09-20 11:45:11,906 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.43 vs. limit=15.0 +2024-09-20 11:45:20,134 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=881520.0, ans=0.125 +2024-09-20 11:45:27,497 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer1.min_positive, batch_count=881560.0, ans=0.025 +2024-09-20 11:45:27,643 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=881560.0, ans=0.125 +2024-09-20 11:45:42,744 INFO [train.py:1198] (1/2) Epoch 49, batch 3200, loss[loss=0.227, ctc_loss=0.1043, cr_loss=0.3503, attn_decoder_loss=0.2328, over 29425.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1073, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5793925.63 frames. ], batch size: 79, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:45:47,590 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=881600.0, ans=0.125 +2024-09-20 11:45:49,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=881600.0, ans=0.0 +2024-09-20 11:46:04,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=881640.0, ans=0.1 +2024-09-20 11:46:10,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=881640.0, ans=0.025 +2024-09-20 11:46:21,108 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=881680.0, ans=0.1 +2024-09-20 11:46:53,892 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=881760.0, ans=0.1 +2024-09-20 11:46:54,380 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module1.whiten, num_groups=1, num_channels=768, metric=6.10 vs. limit=15.0 +2024-09-20 11:47:02,653 INFO [train.py:1198] (1/2) Epoch 49, batch 3250, loss[loss=0.2359, ctc_loss=0.1118, cr_loss=0.3548, attn_decoder_loss=0.2418, over 29710.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1073, cr_loss=0.3451, attn_decoder_loss=0.2369, over 5799826.72 frames. ], batch size: 84, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:47:10,204 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.221e+01 8.742e+01 9.266e+01 9.794e+01 1.259e+02, threshold=1.853e+02, percent-clipped=0.0 +2024-09-20 11:47:24,730 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.67 vs. limit=10.0 +2024-09-20 11:47:35,790 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=881880.0, ans=0.0 +2024-09-20 11:47:50,748 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=881920.0, ans=0.2 +2024-09-20 11:47:55,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.max_abs, batch_count=881920.0, ans=10.0 +2024-09-20 11:48:04,842 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=8.03 vs. limit=15.0 +2024-09-20 11:48:17,509 INFO [train.py:1198] (1/2) Epoch 49, batch 3300, loss[loss=0.241, ctc_loss=0.1111, cr_loss=0.3482, attn_decoder_loss=0.2477, over 28294.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1066, cr_loss=0.3439, attn_decoder_loss=0.2358, over 5796788.63 frames. ], batch size: 111, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:48:27,474 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.29 vs. limit=15.0 +2024-09-20 11:48:48,181 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=882080.0, ans=0.125 +2024-09-20 11:48:48,317 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=882080.0, ans=0.0 +2024-09-20 11:48:52,693 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=882080.0, ans=0.0 +2024-09-20 11:49:32,906 INFO [train.py:1198] (1/2) Epoch 49, batch 3350, loss[loss=0.2505, ctc_loss=0.1311, cr_loss=0.3949, attn_decoder_loss=0.255, over 28918.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1075, cr_loss=0.3457, attn_decoder_loss=0.2367, over 5773928.82 frames. ], batch size: 104, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:49:39,155 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=882200.0, ans=0.125 +2024-09-20 11:49:39,199 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=882200.0, ans=0.125 +2024-09-20 11:49:40,360 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.602e+01 8.704e+01 9.230e+01 9.837e+01 1.570e+02, threshold=1.846e+02, percent-clipped=0.0 +2024-09-20 11:49:44,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=4.40 vs. limit=12.0 +2024-09-20 11:50:06,921 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=13.85 vs. limit=15.0 +2024-09-20 11:50:07,163 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.whiten, num_groups=1, num_channels=768, metric=5.69 vs. limit=12.0 +2024-09-20 11:50:20,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=882320.0, ans=0.0 +2024-09-20 11:50:36,735 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=14.32 vs. limit=22.5 +2024-09-20 11:50:52,873 INFO [train.py:1198] (1/2) Epoch 49, batch 3400, loss[loss=0.2044, ctc_loss=0.08935, cr_loss=0.3071, attn_decoder_loss=0.2103, over 29345.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1074, cr_loss=0.3452, attn_decoder_loss=0.2364, over 5765903.62 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:51:05,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=882400.0, ans=0.025 +2024-09-20 11:51:12,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=882440.0, ans=0.125 +2024-09-20 11:51:13,462 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=11.58 vs. limit=15.0 +2024-09-20 11:51:50,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=882520.0, ans=0.07 +2024-09-20 11:52:05,335 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=882560.0, ans=0.125 +2024-09-20 11:52:05,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=882560.0, ans=0.125 +2024-09-20 11:52:08,170 INFO [train.py:1198] (1/2) Epoch 49, batch 3450, loss[loss=0.2383, ctc_loss=0.1047, cr_loss=0.3346, attn_decoder_loss=0.2457, over 28202.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1074, cr_loss=0.3455, attn_decoder_loss=0.2367, over 5772384.89 frames. ], batch size: 111, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:52:15,707 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.614e+01 8.667e+01 9.196e+01 9.628e+01 1.869e+02, threshold=1.839e+02, percent-clipped=1.0 +2024-09-20 11:52:25,153 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=882640.0, ans=0.125 +2024-09-20 11:52:39,815 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=882680.0, ans=0.025 +2024-09-20 11:52:41,873 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.85 vs. limit=12.0 +2024-09-20 11:52:49,375 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.92 vs. limit=15.0 +2024-09-20 11:52:56,402 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.const_attention_rate, batch_count=882720.0, ans=0.025 +2024-09-20 11:52:57,001 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.88 vs. limit=6.0 +2024-09-20 11:53:19,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=882760.0, ans=0.2 +2024-09-20 11:53:23,247 INFO [train.py:1198] (1/2) Epoch 49, batch 3500, loss[loss=0.2081, ctc_loss=0.08918, cr_loss=0.3044, attn_decoder_loss=0.2146, over 29341.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.107, cr_loss=0.3447, attn_decoder_loss=0.2359, over 5775290.92 frames. ], batch size: 71, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:53:43,742 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=6.13 vs. limit=15.0 +2024-09-20 11:53:59,382 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=882880.0, ans=0.125 +2024-09-20 11:54:05,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=10.87 vs. limit=15.0 +2024-09-20 11:54:15,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.ff2_skip_rate, batch_count=882920.0, ans=0.0 +2024-09-20 11:54:15,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=882920.0, ans=0.125 +2024-09-20 11:54:17,094 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=882920.0, ans=0.125 +2024-09-20 11:54:17,234 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.attention_skip_rate, batch_count=882920.0, ans=0.0 +2024-09-20 11:54:17,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.prob, batch_count=882920.0, ans=0.125 +2024-09-20 11:54:20,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=882920.0, ans=0.0 +2024-09-20 11:54:24,753 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module1.balancer1.prob, batch_count=882960.0, ans=0.125 +2024-09-20 11:54:39,742 INFO [train.py:1198] (1/2) Epoch 49, batch 3550, loss[loss=0.2414, ctc_loss=0.1141, cr_loss=0.3659, attn_decoder_loss=0.2474, over 29720.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1072, cr_loss=0.3455, attn_decoder_loss=0.236, over 5781682.57 frames. ], batch size: 89, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:54:47,104 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.326e+01 8.606e+01 9.040e+01 9.689e+01 1.934e+02, threshold=1.808e+02, percent-clipped=1.0 +2024-09-20 11:55:13,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer_ff2.min_abs, batch_count=883080.0, ans=0.1 +2024-09-20 11:55:56,047 INFO [train.py:1198] (1/2) Epoch 49, batch 3600, loss[loss=0.2314, ctc_loss=0.1119, cr_loss=0.3589, attn_decoder_loss=0.2367, over 29495.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1071, cr_loss=0.345, attn_decoder_loss=0.2361, over 5791456.71 frames. ], batch size: 77, lr: 2.24e-03, grad_scale: 32.0 +2024-09-20 11:56:20,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=883240.0, ans=0.04949747468305833 +2024-09-20 11:56:22,456 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=883240.0, ans=15.0 +2024-09-20 11:56:23,171 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=883240.0, ans=0.125 +2024-09-20 11:56:39,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=883320.0, ans=0.125 +2024-09-20 11:56:43,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=883320.0, ans=0.125 +2024-09-20 11:56:46,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=883320.0, ans=0.025 +2024-09-20 11:56:57,068 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.min_positive, batch_count=883360.0, ans=0.025 +2024-09-20 11:57:10,212 INFO [train.py:1198] (1/2) Epoch 49, batch 3650, loss[loss=0.2413, ctc_loss=0.1235, cr_loss=0.3869, attn_decoder_loss=0.2458, over 29495.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1065, cr_loss=0.3441, attn_decoder_loss=0.2355, over 5794851.63 frames. ], batch size: 90, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:57:11,934 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=883400.0, ans=0.1 +2024-09-20 11:57:14,990 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=883400.0, ans=0.125 +2024-09-20 11:57:18,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass.skip_rate, batch_count=883400.0, ans=0.07 +2024-09-20 11:57:19,164 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.791e+01 8.757e+01 9.183e+01 9.714e+01 2.760e+02, threshold=1.837e+02, percent-clipped=2.0 +2024-09-20 11:57:22,393 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=883400.0, ans=0.0 +2024-09-20 11:57:33,571 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.71 vs. limit=12.0 +2024-09-20 11:57:40,312 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer2.prob, batch_count=883480.0, ans=0.125 +2024-09-20 11:57:47,685 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=883480.0, ans=0.05 +2024-09-20 11:58:10,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=883560.0, ans=0.125 +2024-09-20 11:58:24,153 INFO [train.py:1198] (1/2) Epoch 49, batch 3700, loss[loss=0.2476, ctc_loss=0.1139, cr_loss=0.3583, attn_decoder_loss=0.2544, over 29696.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1068, cr_loss=0.3443, attn_decoder_loss=0.2359, over 5804041.64 frames. ], batch size: 84, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:58:33,278 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=883600.0, ans=0.2 +2024-09-20 11:58:38,732 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=5.07 vs. limit=15.0 +2024-09-20 11:58:51,024 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer2.prob, batch_count=883640.0, ans=0.125 +2024-09-20 11:58:52,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=883680.0, ans=0.0 +2024-09-20 11:59:31,227 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=883760.0, ans=0.125 +2024-09-20 11:59:38,352 INFO [train.py:1198] (1/2) Epoch 49, batch 3750, loss[loss=0.2082, ctc_loss=0.09443, cr_loss=0.3119, attn_decoder_loss=0.214, over 29345.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1065, cr_loss=0.3438, attn_decoder_loss=0.2355, over 5806552.42 frames. ], batch size: 67, lr: 2.24e-03, grad_scale: 16.0 +2024-09-20 11:59:38,615 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=883800.0, ans=0.0 +2024-09-20 11:59:47,375 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.541e+01 8.555e+01 9.159e+01 9.903e+01 1.587e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-20 11:59:47,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer2.prob, batch_count=883800.0, ans=0.125 +2024-09-20 12:00:07,492 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=5.81 vs. limit=15.0 +2024-09-20 12:00:20,065 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=883880.0, ans=0.0 +2024-09-20 12:00:21,611 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff2_skip_rate, batch_count=883920.0, ans=0.0 +2024-09-20 12:00:24,706 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=883920.0, ans=0.125 +2024-09-20 12:00:24,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module2.balancer2.prob, batch_count=883920.0, ans=0.125 +2024-09-20 12:00:29,185 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=883920.0, ans=0.125 +2024-09-20 12:00:35,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=5.02 vs. limit=15.0 +2024-09-20 12:00:46,136 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=4.69 vs. limit=12.0 +2024-09-20 12:00:47,366 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=883960.0, ans=0.125 +2024-09-20 12:00:54,870 INFO [train.py:1198] (1/2) Epoch 49, batch 3800, loss[loss=0.245, ctc_loss=0.1125, cr_loss=0.3655, attn_decoder_loss=0.2516, over 29615.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1064, cr_loss=0.344, attn_decoder_loss=0.2355, over 5797248.34 frames. ], batch size: 86, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:01:21,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=15.70 vs. limit=22.5 +2024-09-20 12:02:10,428 INFO [train.py:1198] (1/2) Epoch 49, batch 3850, loss[loss=0.2436, ctc_loss=0.1141, cr_loss=0.3442, attn_decoder_loss=0.2504, over 29271.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1062, cr_loss=0.3435, attn_decoder_loss=0.2353, over 5812481.12 frames. ], batch size: 100, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:02:13,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.02 vs. limit=22.5 +2024-09-20 12:02:19,390 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.454e+01 8.680e+01 9.125e+01 9.699e+01 1.289e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 12:02:46,816 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.75 vs. limit=15.0 +2024-09-20 12:03:01,290 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.min_positive, batch_count=884320.0, ans=0.05 +2024-09-20 12:03:05,750 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:03:09,322 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=5.19 vs. limit=12.0 +2024-09-20 12:03:24,873 INFO [train.py:1198] (1/2) Epoch 49, batch 3900, loss[loss=0.2298, ctc_loss=0.09819, cr_loss=0.3345, attn_decoder_loss=0.237, over 29607.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1066, cr_loss=0.3446, attn_decoder_loss=0.2359, over 5817249.13 frames. ], batch size: 86, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:03:28,750 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=6.80 vs. limit=10.0 +2024-09-20 12:03:41,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=884440.0, ans=0.025 +2024-09-20 12:03:45,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=884440.0, ans=0.0 +2024-09-20 12:03:57,049 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=144, metric=5.95 vs. limit=10.0 +2024-09-20 12:04:09,538 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.balancer1.prob, batch_count=884520.0, ans=0.125 +2024-09-20 12:04:09,585 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=884520.0, ans=0.125 +2024-09-20 12:04:21,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=884520.0, ans=0.0 +2024-09-20 12:04:21,788 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.14 vs. limit=22.5 +2024-09-20 12:04:27,281 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=884560.0, ans=0.1 +2024-09-20 12:04:32,987 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=884560.0, ans=0.1 +2024-09-20 12:04:35,903 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.skip_rate, batch_count=884560.0, ans=0.04949747468305833 +2024-09-20 12:04:38,683 INFO [train.py:1198] (1/2) Epoch 49, batch 3950, loss[loss=0.2487, ctc_loss=0.1212, cr_loss=0.3738, attn_decoder_loss=0.2545, over 29487.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1068, cr_loss=0.3448, attn_decoder_loss=0.2364, over 5836220.31 frames. ], batch size: 97, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:04:46,258 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=884600.0, ans=0.1 +2024-09-20 12:04:47,486 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.284e+01 8.739e+01 9.137e+01 9.584e+01 1.763e+02, threshold=1.827e+02, percent-clipped=0.0 +2024-09-20 12:05:11,443 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=884680.0, ans=0.125 +2024-09-20 12:05:13,154 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.68 vs. limit=15.0 +2024-09-20 12:05:36,422 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=884760.0, ans=0.125 +2024-09-20 12:05:39,148 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=884760.0, ans=0.0 +2024-09-20 12:05:50,639 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=884760.0, ans=0.125 +2024-09-20 12:05:50,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=884760.0, ans=0.2 +2024-09-20 12:05:53,640 INFO [train.py:1198] (1/2) Epoch 49, batch 4000, loss[loss=0.2147, ctc_loss=0.09272, cr_loss=0.3064, attn_decoder_loss=0.2214, over 29492.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1065, cr_loss=0.344, attn_decoder_loss=0.2362, over 5812743.84 frames. ], batch size: 74, lr: 2.23e-03, grad_scale: 32.0 +2024-09-20 12:05:56,730 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=884800.0, ans=0.1 +2024-09-20 12:05:56,765 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=884800.0, ans=0.2 +2024-09-20 12:06:01,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=884800.0, ans=0.125 +2024-09-20 12:06:25,314 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=8.01 vs. limit=15.0 +2024-09-20 12:06:42,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=884920.0, ans=0.125 +2024-09-20 12:06:53,625 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.49 vs. limit=22.5 +2024-09-20 12:07:01,760 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.hidden_balancer.prob, batch_count=884960.0, ans=0.125 +2024-09-20 12:07:08,771 INFO [train.py:1198] (1/2) Epoch 49, batch 4050, loss[loss=0.2539, ctc_loss=0.1303, cr_loss=0.3679, attn_decoder_loss=0.2595, over 20567.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1068, cr_loss=0.3443, attn_decoder_loss=0.2362, over 5795287.56 frames. ], batch size: 210, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:07:18,926 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.846e+01 8.931e+01 9.287e+01 9.798e+01 1.744e+02, threshold=1.857e+02, percent-clipped=0.0 +2024-09-20 12:07:22,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=885040.0, ans=0.125 +2024-09-20 12:07:23,635 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=885040.0, ans=0.0 +2024-09-20 12:07:25,045 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.skip_rate, batch_count=885040.0, ans=0.09899494936611666 +2024-09-20 12:07:44,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=885080.0, ans=0.1 +2024-09-20 12:08:02,391 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=5.53 vs. limit=15.0 +2024-09-20 12:08:22,202 INFO [train.py:1198] (1/2) Epoch 49, batch 4100, loss[loss=0.2527, ctc_loss=0.1276, cr_loss=0.4093, attn_decoder_loss=0.2575, over 29508.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1073, cr_loss=0.3457, attn_decoder_loss=0.2366, over 5789950.79 frames. ], batch size: 90, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:08:28,359 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=885200.0, ans=0.1 +2024-09-20 12:08:41,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=885240.0, ans=0.0 +2024-09-20 12:09:20,320 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.84 vs. limit=15.0 +2024-09-20 12:09:22,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=885360.0, ans=0.125 +2024-09-20 12:09:22,654 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=885360.0, ans=0.0 +2024-09-20 12:09:22,779 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=885360.0, ans=0.125 +2024-09-20 12:09:32,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module2.balancer1.max_abs, batch_count=885360.0, ans=10.0 +2024-09-20 12:09:35,735 INFO [train.py:1198] (1/2) Epoch 49, batch 4150, loss[loss=0.2286, ctc_loss=0.1151, cr_loss=0.3727, attn_decoder_loss=0.233, over 29488.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1075, cr_loss=0.3463, attn_decoder_loss=0.2365, over 5797230.93 frames. ], batch size: 77, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:09:46,199 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.506e+01 8.769e+01 9.382e+01 9.981e+01 1.562e+02, threshold=1.876e+02, percent-clipped=0.0 +2024-09-20 12:10:05,122 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.2.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:10:14,913 INFO [scaling.py:1024] (1/2) Whitening: name=encoder_embed.convnext.out_whiten, num_groups=1, num_channels=128, metric=4.58 vs. limit=5.0 +2024-09-20 12:10:21,469 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=885520.0, ans=0.0 +2024-09-20 12:10:24,390 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=885520.0, ans=0.125 +2024-09-20 12:10:52,059 INFO [train.py:1198] (1/2) Epoch 49, batch 4200, loss[loss=0.2597, ctc_loss=0.135, cr_loss=0.4146, attn_decoder_loss=0.2643, over 29503.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1078, cr_loss=0.3468, attn_decoder_loss=0.237, over 5798863.30 frames. ], batch size: 90, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:10:52,313 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer_ff3.min_abs, batch_count=885600.0, ans=0.2 +2024-09-20 12:11:09,271 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.47 vs. limit=22.5 +2024-09-20 12:11:12,331 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=6.79 vs. limit=15.0 +2024-09-20 12:12:02,836 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=885760.0, ans=0.025 +2024-09-20 12:12:02,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=885760.0, ans=0.1 +2024-09-20 12:12:05,522 INFO [train.py:1198] (1/2) Epoch 49, batch 4250, loss[loss=0.2168, ctc_loss=0.09539, cr_loss=0.315, attn_decoder_loss=0.2232, over 29483.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1076, cr_loss=0.3462, attn_decoder_loss=0.2369, over 5804612.66 frames. ], batch size: 74, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:12:15,831 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.509e+01 8.756e+01 9.233e+01 9.751e+01 2.001e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 12:12:17,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=885800.0, ans=0.1 +2024-09-20 12:12:29,112 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.attention_skip_rate, batch_count=885840.0, ans=0.0 +2024-09-20 12:12:32,201 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer1.prob, batch_count=885840.0, ans=0.125 +2024-09-20 12:12:37,033 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.09 vs. limit=12.0 +2024-09-20 12:12:54,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=885920.0, ans=0.125 +2024-09-20 12:13:05,493 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=256, metric=5.49 vs. limit=15.0 +2024-09-20 12:13:13,442 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:13:19,049 INFO [train.py:1198] (1/2) Epoch 49, batch 4300, loss[loss=0.2431, ctc_loss=0.1059, cr_loss=0.3338, attn_decoder_loss=0.251, over 29540.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1072, cr_loss=0.3456, attn_decoder_loss=0.2368, over 5794689.68 frames. ], batch size: 87, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:13:24,852 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer1.min_positive, batch_count=886000.0, ans=0.025 +2024-09-20 12:13:43,149 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-20 12:14:00,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=886080.0, ans=0.0 +2024-09-20 12:14:10,131 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=886120.0, ans=0.125 +2024-09-20 12:14:15,018 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=11.71 vs. limit=15.0 +2024-09-20 12:14:18,856 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=886160.0, ans=0.1 +2024-09-20 12:14:24,751 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=886160.0, ans=0.125 +2024-09-20 12:14:34,870 INFO [train.py:1198] (1/2) Epoch 49, batch 4350, loss[loss=0.2501, ctc_loss=0.1247, cr_loss=0.3814, attn_decoder_loss=0.2555, over 29510.00 frames. ], tot_loss[loss=0.2339, ctc_loss=0.1097, cr_loss=0.3511, attn_decoder_loss=0.2399, over 5797428.30 frames. ], batch size: 97, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:14:36,960 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=13.85 vs. limit=22.5 +2024-09-20 12:14:45,120 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.682e+01 9.096e+01 9.498e+01 1.000e+02 1.959e+02, threshold=1.900e+02, percent-clipped=0.0 +2024-09-20 12:14:49,974 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.balancer.max_positive, batch_count=886240.0, ans=0.95 +2024-09-20 12:15:13,156 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=886280.0, ans=0.125 +2024-09-20 12:15:21,901 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff2_skip_rate, batch_count=886320.0, ans=0.0 +2024-09-20 12:15:48,111 INFO [train.py:1198] (1/2) Epoch 49, batch 4400, loss[loss=0.2438, ctc_loss=0.1168, cr_loss=0.3743, attn_decoder_loss=0.2495, over 27231.00 frames. ], tot_loss[loss=0.2355, ctc_loss=0.1105, cr_loss=0.3527, attn_decoder_loss=0.2415, over 5766722.36 frames. ], batch size: 124, lr: 2.23e-03, grad_scale: 32.0 +2024-09-20 12:15:58,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer2.prob, batch_count=886400.0, ans=0.125 +2024-09-20 12:15:59,480 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.56 vs. limit=15.0 +2024-09-20 12:16:06,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=886440.0, ans=0.125 +2024-09-20 12:16:06,630 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=886440.0, ans=0.0 +2024-09-20 12:16:08,044 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=886440.0, ans=0.025 +2024-09-20 12:16:19,855 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=886480.0, ans=0.125 +2024-09-20 12:16:27,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.ff3_skip_rate, batch_count=886480.0, ans=0.0 +2024-09-20 12:16:32,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward3.out_whiten.whitening_limit, batch_count=886520.0, ans=15.0 +2024-09-20 12:16:39,552 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=886520.0, ans=0.125 +2024-09-20 12:16:51,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=13.46 vs. limit=15.0 +2024-09-20 12:16:57,396 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=886560.0, ans=0.125 +2024-09-20 12:17:03,040 INFO [train.py:1198] (1/2) Epoch 49, batch 4450, loss[loss=0.2534, ctc_loss=0.1311, cr_loss=0.3826, attn_decoder_loss=0.2584, over 20335.00 frames. ], tot_loss[loss=0.2378, ctc_loss=0.1138, cr_loss=0.3583, attn_decoder_loss=0.2436, over 5572818.06 frames. ], batch size: 210, lr: 2.23e-03, grad_scale: 16.0 +2024-09-20 12:17:03,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=886600.0, ans=0.0 +2024-09-20 12:17:05,353 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.60 vs. limit=22.5 +2024-09-20 12:17:12,562 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.66 vs. limit=15.0 +2024-09-20 12:17:16,288 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.166e+01 9.203e+01 9.654e+01 1.067e+02 3.742e+02, threshold=1.931e+02, percent-clipped=2.0 +2024-09-20 12:17:16,683 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass.skip_rate, batch_count=886640.0, ans=0.07 +2024-09-20 12:17:20,362 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=12.35 vs. limit=15.0 +2024-09-20 12:17:25,657 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=886640.0, ans=0.125 +2024-09-20 12:17:30,214 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=886640.0, ans=0.025 +2024-09-20 12:17:43,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.attention_skip_rate, batch_count=886680.0, ans=0.0 +2024-09-20 12:17:49,795 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=886720.0, ans=0.1 +2024-09-20 12:17:52,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module2.balancer2.prob, batch_count=886720.0, ans=0.125 +2024-09-20 12:18:03,819 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=14.33 vs. limit=15.0 +2024-09-20 12:18:17,003 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=886800.0, ans=0.125 +2024-09-20 12:18:18,103 INFO [train.py:1198] (1/2) Epoch 49, batch 4500, loss[loss=0.2371, ctc_loss=0.1208, cr_loss=0.348, attn_decoder_loss=0.2422, over 19928.00 frames. ], tot_loss[loss=0.2394, ctc_loss=0.1161, cr_loss=0.3601, attn_decoder_loss=0.2451, over 5232513.16 frames. ], batch size: 209, lr: 2.23e-03, grad_scale: 8.0 +2024-09-20 12:18:19,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=886800.0, ans=0.125 +2024-09-20 12:18:22,827 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_positive, batch_count=886800.0, ans=0.05 +2024-09-20 12:18:34,447 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.dropout.p, batch_count=886840.0, ans=0.1 +2024-09-20 12:18:37,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=886840.0, ans=0.0 +2024-09-20 12:18:48,146 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=886880.0, ans=0.125 +2024-09-20 12:19:45,528 INFO [train.py:1198] (1/2) Epoch 50, batch 0, loss[loss=0.2109, ctc_loss=0.08504, cr_loss=0.2994, attn_decoder_loss=0.2182, over 29586.00 frames. ], tot_loss[loss=0.2109, ctc_loss=0.08504, cr_loss=0.2994, attn_decoder_loss=0.2182, over 29586.00 frames. ], batch size: 73, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:19:45,528 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 12:20:03,818 INFO [train.py:1230] (1/2) Epoch 50, validation: loss=0.2133, ctc_loss=0.03558, cr_loss=6.519e-15, attn_decoder_loss=0.2331, over 944034.00 frames. +2024-09-20 12:20:03,818 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 12:20:15,061 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.whiten, num_groups=1, num_channels=768, metric=5.79 vs. limit=12.0 +2024-09-20 12:20:42,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=886980.0, ans=0.0 +2024-09-20 12:20:57,071 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.187e+01 1.009e+02 1.098e+02 1.200e+02 1.487e+02, threshold=2.197e+02, percent-clipped=0.0 +2024-09-20 12:21:21,356 INFO [train.py:1198] (1/2) Epoch 50, batch 50, loss[loss=0.2116, ctc_loss=0.09968, cr_loss=0.3318, attn_decoder_loss=0.2166, over 29421.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1089, cr_loss=0.3492, attn_decoder_loss=0.2374, over 1268398.17 frames. ], batch size: 70, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:21:51,913 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=887180.0, ans=0.1 +2024-09-20 12:22:02,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=887180.0, ans=0.125 +2024-09-20 12:22:02,877 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.balancer2.prob, batch_count=887180.0, ans=0.125 +2024-09-20 12:22:04,213 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=887180.0, ans=0.025 +2024-09-20 12:22:13,267 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=887220.0, ans=0.2 +2024-09-20 12:22:19,454 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=887220.0, ans=0.125 +2024-09-20 12:22:33,059 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=887260.0, ans=0.025 +2024-09-20 12:22:34,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass_mid.scale_min, batch_count=887260.0, ans=0.2 +2024-09-20 12:22:37,263 INFO [train.py:1198] (1/2) Epoch 50, batch 100, loss[loss=0.2257, ctc_loss=0.1055, cr_loss=0.3335, attn_decoder_loss=0.2316, over 29539.00 frames. ], tot_loss[loss=0.2329, ctc_loss=0.1092, cr_loss=0.3496, attn_decoder_loss=0.2389, over 2251911.23 frames. ], batch size: 76, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:23:13,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=5.69 vs. limit=12.0 +2024-09-20 12:23:29,880 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.360e+01 8.808e+01 9.273e+01 9.833e+01 1.804e+02, threshold=1.855e+02, percent-clipped=0.0 +2024-09-20 12:23:33,363 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=887420.0, ans=0.0 +2024-09-20 12:23:53,822 INFO [train.py:1198] (1/2) Epoch 50, batch 150, loss[loss=0.2127, ctc_loss=0.09989, cr_loss=0.3192, attn_decoder_loss=0.2181, over 29452.00 frames. ], tot_loss[loss=0.2313, ctc_loss=0.1078, cr_loss=0.3464, attn_decoder_loss=0.2373, over 3046561.36 frames. ], batch size: 70, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:24:13,134 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=11.88 vs. limit=15.0 +2024-09-20 12:24:38,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=887580.0, ans=0.125 +2024-09-20 12:24:47,358 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.19 vs. limit=6.0 +2024-09-20 12:24:49,442 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=887620.0, ans=0.1 +2024-09-20 12:24:52,445 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=887620.0, ans=0.1 +2024-09-20 12:24:59,950 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=887660.0, ans=0.2 +2024-09-20 12:25:06,339 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.19 vs. limit=22.5 +2024-09-20 12:25:08,839 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=887660.0, ans=0.125 +2024-09-20 12:25:10,388 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=887700.0, ans=0.125 +2024-09-20 12:25:11,629 INFO [train.py:1198] (1/2) Epoch 50, batch 200, loss[loss=0.2387, ctc_loss=0.1134, cr_loss=0.3605, attn_decoder_loss=0.2447, over 27492.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.1071, cr_loss=0.3448, attn_decoder_loss=0.2366, over 3658918.25 frames. ], batch size: 125, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:25:22,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=887700.0, ans=0.125 +2024-09-20 12:26:04,373 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 6.975e+01 8.480e+01 9.009e+01 9.638e+01 2.120e+02, threshold=1.802e+02, percent-clipped=1.0 +2024-09-20 12:26:16,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=887860.0, ans=0.0 +2024-09-20 12:26:24,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=887860.0, ans=0.0 +2024-09-20 12:26:26,836 INFO [train.py:1198] (1/2) Epoch 50, batch 250, loss[loss=0.2394, ctc_loss=0.113, cr_loss=0.3545, attn_decoder_loss=0.2456, over 29232.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1066, cr_loss=0.3445, attn_decoder_loss=0.2363, over 4140898.17 frames. ], batch size: 100, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:26:27,716 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.77 vs. limit=15.0 +2024-09-20 12:26:41,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=887900.0, ans=0.2 +2024-09-20 12:27:27,871 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=4.00 vs. limit=12.0 +2024-09-20 12:27:39,403 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=888060.0, ans=0.125 +2024-09-20 12:27:43,875 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.out_combiner.scale_min, batch_count=888100.0, ans=0.2 +2024-09-20 12:27:45,107 INFO [train.py:1198] (1/2) Epoch 50, batch 300, loss[loss=0.2485, ctc_loss=0.1271, cr_loss=0.3988, attn_decoder_loss=0.2532, over 29532.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1065, cr_loss=0.3446, attn_decoder_loss=0.2358, over 4509877.16 frames. ], batch size: 92, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:28:25,834 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.83 vs. limit=15.0 +2024-09-20 12:28:32,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=888220.0, ans=0.1 +2024-09-20 12:28:35,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=888220.0, ans=0.125 +2024-09-20 12:28:40,141 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.872e+01 8.884e+01 9.251e+01 9.818e+01 2.212e+02, threshold=1.850e+02, percent-clipped=1.0 +2024-09-20 12:29:02,945 INFO [train.py:1198] (1/2) Epoch 50, batch 350, loss[loss=0.1976, ctc_loss=0.08119, cr_loss=0.2778, attn_decoder_loss=0.2044, over 29310.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1065, cr_loss=0.3445, attn_decoder_loss=0.236, over 4795040.10 frames. ], batch size: 71, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:29:10,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff3_skip_rate, batch_count=888300.0, ans=0.0 +2024-09-20 12:29:19,371 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=888340.0, ans=0.015 +2024-09-20 12:29:56,529 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=512, metric=8.14 vs. limit=15.0 +2024-09-20 12:30:15,144 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:30:17,905 INFO [train.py:1198] (1/2) Epoch 50, batch 400, loss[loss=0.231, ctc_loss=0.1068, cr_loss=0.3386, attn_decoder_loss=0.2373, over 29719.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1063, cr_loss=0.3435, attn_decoder_loss=0.2359, over 5025736.44 frames. ], batch size: 82, lr: 2.21e-03, grad_scale: 16.0 +2024-09-20 12:30:28,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=888500.0, ans=0.0 +2024-09-20 12:30:34,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=888540.0, ans=0.1 +2024-09-20 12:31:05,879 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=888620.0, ans=0.125 +2024-09-20 12:31:10,498 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=888620.0, ans=0.125 +2024-09-20 12:31:14,780 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.335e+01 8.622e+01 9.023e+01 9.604e+01 1.265e+02, threshold=1.805e+02, percent-clipped=0.0 +2024-09-20 12:31:18,298 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:31:30,230 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=888660.0, ans=0.0 +2024-09-20 12:31:35,879 INFO [train.py:1198] (1/2) Epoch 50, batch 450, loss[loss=0.2462, ctc_loss=0.1265, cr_loss=0.3855, attn_decoder_loss=0.2509, over 29698.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1066, cr_loss=0.344, attn_decoder_loss=0.2362, over 5187566.19 frames. ], batch size: 83, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:31:36,231 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_skip_rate, batch_count=888700.0, ans=0.0 +2024-09-20 12:31:51,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=888740.0, ans=0.1 +2024-09-20 12:32:07,123 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=22.04 vs. limit=22.5 +2024-09-20 12:32:26,169 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten.whitening_limit, batch_count=888820.0, ans=15.0 +2024-09-20 12:32:30,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=888820.0, ans=0.125 +2024-09-20 12:32:31,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module2.balancer2.prob, batch_count=888820.0, ans=0.125 +2024-09-20 12:32:54,119 INFO [train.py:1198] (1/2) Epoch 50, batch 500, loss[loss=0.2457, ctc_loss=0.1174, cr_loss=0.3837, attn_decoder_loss=0.2514, over 29396.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1059, cr_loss=0.3422, attn_decoder_loss=0.2354, over 5330610.55 frames. ], batch size: 94, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:33:15,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.scale_min, batch_count=888940.0, ans=0.2 +2024-09-20 12:33:45,656 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.skip_rate, batch_count=889020.0, ans=0.04949747468305833 +2024-09-20 12:33:48,200 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.453e+01 8.714e+01 9.199e+01 9.608e+01 6.151e+02, threshold=1.840e+02, percent-clipped=1.0 +2024-09-20 12:33:50,327 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.08 vs. limit=15.0 +2024-09-20 12:34:09,227 INFO [train.py:1198] (1/2) Epoch 50, batch 550, loss[loss=0.2368, ctc_loss=0.1088, cr_loss=0.3534, attn_decoder_loss=0.2431, over 28826.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1061, cr_loss=0.343, attn_decoder_loss=0.2353, over 5423417.14 frames. ], batch size: 104, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:34:09,562 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=889100.0, ans=0.2 +2024-09-20 12:34:12,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.attention_skip_rate, batch_count=889100.0, ans=0.0 +2024-09-20 12:34:43,783 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=889180.0, ans=0.1 +2024-09-20 12:34:51,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=889180.0, ans=0.0 +2024-09-20 12:34:57,175 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer1.prob, batch_count=889220.0, ans=0.125 +2024-09-20 12:35:27,185 INFO [train.py:1198] (1/2) Epoch 50, batch 600, loss[loss=0.2427, ctc_loss=0.1157, cr_loss=0.3654, attn_decoder_loss=0.2487, over 29286.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1064, cr_loss=0.3436, attn_decoder_loss=0.2354, over 5510733.57 frames. ], batch size: 100, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:35:29,413 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.40 vs. limit=15.0 +2024-09-20 12:35:31,997 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=889300.0, ans=0.035 +2024-09-20 12:35:35,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=889300.0, ans=0.125 +2024-09-20 12:35:41,163 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.ff2_skip_rate, batch_count=889340.0, ans=0.0 +2024-09-20 12:35:47,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=889340.0, ans=0.0 +2024-09-20 12:36:13,245 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=889420.0, ans=0.0 +2024-09-20 12:36:17,576 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.attention_skip_rate, batch_count=889420.0, ans=0.0 +2024-09-20 12:36:19,069 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.self_attn_weights.pos_emb_skip_rate, batch_count=889420.0, ans=0.0 +2024-09-20 12:36:23,302 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.988e+01 8.691e+01 9.098e+01 9.563e+01 1.951e+02, threshold=1.820e+02, percent-clipped=1.0 +2024-09-20 12:36:38,820 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=889460.0, ans=0.1 +2024-09-20 12:36:44,425 INFO [train.py:1198] (1/2) Epoch 50, batch 650, loss[loss=0.2294, ctc_loss=0.1071, cr_loss=0.344, attn_decoder_loss=0.2354, over 29742.00 frames. ], tot_loss[loss=0.2288, ctc_loss=0.1057, cr_loss=0.3419, attn_decoder_loss=0.2349, over 5587391.51 frames. ], batch size: 81, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:36:57,467 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.53 vs. limit=10.0 +2024-09-20 12:37:07,455 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=889540.0, ans=0.125 +2024-09-20 12:37:57,583 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass_mid.scale_min, batch_count=889660.0, ans=0.2 +2024-09-20 12:38:00,250 INFO [train.py:1198] (1/2) Epoch 50, batch 700, loss[loss=0.2246, ctc_loss=0.1086, cr_loss=0.3393, attn_decoder_loss=0.23, over 29537.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1065, cr_loss=0.3442, attn_decoder_loss=0.2358, over 5637237.92 frames. ], batch size: 76, lr: 2.21e-03, grad_scale: 8.0 +2024-09-20 12:38:37,757 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=192, metric=8.85 vs. limit=15.0 +2024-09-20 12:38:47,998 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=889820.0, ans=0.0 +2024-09-20 12:38:56,771 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.632e+01 8.634e+01 9.067e+01 9.623e+01 1.303e+02, threshold=1.813e+02, percent-clipped=0.0 +2024-09-20 12:39:15,078 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.attention_skip_rate, batch_count=889860.0, ans=0.0 +2024-09-20 12:39:15,144 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer2.prob, batch_count=889860.0, ans=0.125 +2024-09-20 12:39:17,907 INFO [train.py:1198] (1/2) Epoch 50, batch 750, loss[loss=0.2313, ctc_loss=0.1015, cr_loss=0.3353, attn_decoder_loss=0.2383, over 29729.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1067, cr_loss=0.3447, attn_decoder_loss=0.2355, over 5676360.45 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:39:21,188 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.skip_rate, batch_count=889900.0, ans=0.09899494936611666 +2024-09-20 12:39:22,649 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=889900.0, ans=0.125 +2024-09-20 12:39:30,773 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.37 vs. limit=6.0 +2024-09-20 12:39:42,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=889940.0, ans=0.125 +2024-09-20 12:40:07,342 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=890020.0, ans=0.125 +2024-09-20 12:40:22,310 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=890060.0, ans=0.125 +2024-09-20 12:40:35,478 INFO [train.py:1198] (1/2) Epoch 50, batch 800, loss[loss=0.2187, ctc_loss=0.09792, cr_loss=0.3284, attn_decoder_loss=0.2248, over 29623.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1069, cr_loss=0.3454, attn_decoder_loss=0.2357, over 5707210.77 frames. ], batch size: 73, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:40:37,820 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.88 vs. limit=12.0 +2024-09-20 12:40:50,840 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=890140.0, ans=0.1 +2024-09-20 12:40:53,581 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=890140.0, ans=0.125 +2024-09-20 12:41:02,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=890140.0, ans=0.0 +2024-09-20 12:41:02,975 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=890140.0, ans=0.1 +2024-09-20 12:41:10,314 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module1.balancer2.prob, batch_count=890180.0, ans=0.125 +2024-09-20 12:41:29,703 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.455e+01 8.785e+01 9.269e+01 9.766e+01 2.898e+02, threshold=1.854e+02, percent-clipped=1.0 +2024-09-20 12:41:34,482 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=890260.0, ans=0.125 +2024-09-20 12:41:35,242 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.57 vs. limit=6.0 +2024-09-20 12:41:47,172 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten.whitening_limit, batch_count=890260.0, ans=22.5 +2024-09-20 12:41:50,577 INFO [train.py:1198] (1/2) Epoch 50, batch 850, loss[loss=0.2392, ctc_loss=0.1084, cr_loss=0.3374, attn_decoder_loss=0.2463, over 29707.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1064, cr_loss=0.3442, attn_decoder_loss=0.2354, over 5736666.21 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:41:52,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=890300.0, ans=0.125 +2024-09-20 12:42:01,140 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=890300.0, ans=0.125 +2024-09-20 12:42:02,679 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer2.prob, batch_count=890300.0, ans=0.125 +2024-09-20 12:42:14,781 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=890340.0, ans=0.1 +2024-09-20 12:42:31,811 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=890380.0, ans=0.125 +2024-09-20 12:42:33,164 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=890380.0, ans=0.125 +2024-09-20 12:42:41,782 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.54 vs. limit=6.0 +2024-09-20 12:42:53,878 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=890460.0, ans=0.2 +2024-09-20 12:43:07,437 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff3_skip_rate, batch_count=890500.0, ans=0.0 +2024-09-20 12:43:08,693 INFO [train.py:1198] (1/2) Epoch 50, batch 900, loss[loss=0.216, ctc_loss=0.09374, cr_loss=0.3178, attn_decoder_loss=0.2225, over 29595.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1067, cr_loss=0.345, attn_decoder_loss=0.2359, over 5740522.55 frames. ], batch size: 73, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:43:14,043 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=2.70 vs. limit=6.0 +2024-09-20 12:43:14,870 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=890500.0, ans=0.0 +2024-09-20 12:43:33,103 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.min_positive, batch_count=890540.0, ans=0.025 +2024-09-20 12:43:53,226 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.const_attention_rate, batch_count=890580.0, ans=0.025 +2024-09-20 12:44:00,741 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff3_skip_rate, batch_count=890620.0, ans=0.0 +2024-09-20 12:44:02,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=890620.0, ans=0.125 +2024-09-20 12:44:03,752 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer2.prob, batch_count=890620.0, ans=0.125 +2024-09-20 12:44:06,530 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.711e+01 8.704e+01 9.222e+01 9.610e+01 2.090e+02, threshold=1.844e+02, percent-clipped=2.0 +2024-09-20 12:44:07,316 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.38 vs. limit=15.0 +2024-09-20 12:44:15,756 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=890660.0, ans=0.2 +2024-09-20 12:44:18,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=890660.0, ans=0.125 +2024-09-20 12:44:25,915 INFO [train.py:1198] (1/2) Epoch 50, batch 950, loss[loss=0.2221, ctc_loss=0.09853, cr_loss=0.3298, attn_decoder_loss=0.2285, over 29512.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1065, cr_loss=0.3444, attn_decoder_loss=0.2359, over 5742446.27 frames. ], batch size: 74, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:45:06,138 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.47 vs. limit=15.0 +2024-09-20 12:45:21,181 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.whiten, num_groups=1, num_channels=512, metric=4.53 vs. limit=12.0 +2024-09-20 12:45:22,053 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.out_combiner.scale_min, batch_count=890820.0, ans=0.2 +2024-09-20 12:45:27,937 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=890860.0, ans=0.0 +2024-09-20 12:45:32,552 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:45:34,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=890860.0, ans=0.125 +2024-09-20 12:45:41,062 INFO [train.py:1198] (1/2) Epoch 50, batch 1000, loss[loss=0.2196, ctc_loss=0.09543, cr_loss=0.3222, attn_decoder_loss=0.2262, over 29504.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1069, cr_loss=0.3451, attn_decoder_loss=0.2363, over 5736194.64 frames. ], batch size: 77, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:45:52,605 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.66 vs. limit=15.0 +2024-09-20 12:45:56,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=890940.0, ans=0.125 +2024-09-20 12:45:59,506 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer2.prob, batch_count=890940.0, ans=0.125 +2024-09-20 12:46:00,951 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=890940.0, ans=0.125 +2024-09-20 12:46:03,905 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=890940.0, ans=0.125 +2024-09-20 12:46:09,967 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=890980.0, ans=0.0 +2024-09-20 12:46:24,999 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_module2.balancer1.prob, batch_count=891020.0, ans=0.125 +2024-09-20 12:46:31,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.const_attention_rate, batch_count=891020.0, ans=0.025 +2024-09-20 12:46:34,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=891020.0, ans=0.0 +2024-09-20 12:46:38,847 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.499e+01 8.665e+01 9.186e+01 9.812e+01 1.609e+02, threshold=1.837e+02, percent-clipped=0.0 +2024-09-20 12:46:45,093 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=891060.0, ans=0.0 +2024-09-20 12:46:51,168 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=891060.0, ans=0.1 +2024-09-20 12:46:58,385 INFO [train.py:1198] (1/2) Epoch 50, batch 1050, loss[loss=0.2398, ctc_loss=0.1116, cr_loss=0.365, attn_decoder_loss=0.246, over 29665.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1068, cr_loss=0.3449, attn_decoder_loss=0.2359, over 5744188.96 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:47:28,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.nonlin_attention.balancer.max_positive, batch_count=891180.0, ans=0.95 +2024-09-20 12:47:33,910 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.25 vs. limit=15.0 +2024-09-20 12:47:40,932 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.25 vs. limit=15.0 +2024-09-20 12:47:48,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=891220.0, ans=0.0 +2024-09-20 12:47:52,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=891220.0, ans=0.1 +2024-09-20 12:47:55,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=891220.0, ans=0.125 +2024-09-20 12:48:16,829 INFO [train.py:1198] (1/2) Epoch 50, batch 1100, loss[loss=0.2181, ctc_loss=0.09623, cr_loss=0.3301, attn_decoder_loss=0.2243, over 29444.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1067, cr_loss=0.3446, attn_decoder_loss=0.2358, over 5756822.08 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:48:20,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module1.balancer2.prob, batch_count=891300.0, ans=0.125 +2024-09-20 12:48:38,499 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=891340.0, ans=0.125 +2024-09-20 12:49:12,783 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.273e+01 8.673e+01 9.174e+01 9.700e+01 1.224e+02, threshold=1.835e+02, percent-clipped=0.0 +2024-09-20 12:49:20,615 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 12:49:32,518 INFO [train.py:1198] (1/2) Epoch 50, batch 1150, loss[loss=0.2224, ctc_loss=0.09797, cr_loss=0.3232, attn_decoder_loss=0.2291, over 29440.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1068, cr_loss=0.3446, attn_decoder_loss=0.2358, over 5754658.89 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 12:49:43,511 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff3_skip_rate, batch_count=891500.0, ans=0.0 +2024-09-20 12:49:49,433 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=891540.0, ans=0.125 +2024-09-20 12:49:49,515 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.const_attention_rate, batch_count=891540.0, ans=0.025 +2024-09-20 12:50:14,458 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=20.14 vs. limit=22.5 +2024-09-20 12:50:20,157 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=4.02 vs. limit=15.0 +2024-09-20 12:50:27,739 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=891620.0, ans=0.0 +2024-09-20 12:50:45,072 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=3.87 vs. limit=10.0 +2024-09-20 12:50:50,194 INFO [train.py:1198] (1/2) Epoch 50, batch 1200, loss[loss=0.2436, ctc_loss=0.1107, cr_loss=0.3677, attn_decoder_loss=0.2502, over 29670.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1069, cr_loss=0.3445, attn_decoder_loss=0.2364, over 5746321.68 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:51:08,941 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=891740.0, ans=0.0 +2024-09-20 12:51:48,358 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.180e+01 8.827e+01 9.334e+01 1.012e+02 2.490e+02, threshold=1.867e+02, percent-clipped=1.0 +2024-09-20 12:51:49,400 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.75 vs. limit=10.0 +2024-09-20 12:52:07,783 INFO [train.py:1198] (1/2) Epoch 50, batch 1250, loss[loss=0.2555, ctc_loss=0.1306, cr_loss=0.4071, attn_decoder_loss=0.2603, over 29527.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1072, cr_loss=0.3455, attn_decoder_loss=0.2369, over 5774223.82 frames. ], batch size: 92, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:52:21,005 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=7.06 vs. limit=15.0 +2024-09-20 12:52:21,755 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=891940.0, ans=0.125 +2024-09-20 12:52:38,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_skip_rate, batch_count=891980.0, ans=0.0 +2024-09-20 12:52:52,486 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.balancer1.prob, batch_count=892020.0, ans=0.125 +2024-09-20 12:53:16,973 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=892060.0, ans=0.0 +2024-09-20 12:53:24,085 INFO [train.py:1198] (1/2) Epoch 50, batch 1300, loss[loss=0.2487, ctc_loss=0.1203, cr_loss=0.3835, attn_decoder_loss=0.2544, over 28240.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1067, cr_loss=0.3443, attn_decoder_loss=0.2365, over 5778031.43 frames. ], batch size: 111, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:53:27,545 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=892100.0, ans=0.0 +2024-09-20 12:53:49,936 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=892140.0, ans=0.125 +2024-09-20 12:53:53,008 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=892180.0, ans=0.125 +2024-09-20 12:54:14,264 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=892220.0, ans=0.125 +2024-09-20 12:54:17,859 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=4.36 vs. limit=15.0 +2024-09-20 12:54:19,787 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.029e+01 8.576e+01 8.998e+01 9.559e+01 1.394e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 12:54:24,638 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.conv_skip_rate, batch_count=892260.0, ans=0.0 +2024-09-20 12:54:32,958 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=892260.0, ans=0.0 +2024-09-20 12:54:38,931 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.scale_min, batch_count=892260.0, ans=0.2 +2024-09-20 12:54:41,688 INFO [train.py:1198] (1/2) Epoch 50, batch 1350, loss[loss=0.2304, ctc_loss=0.1102, cr_loss=0.3632, attn_decoder_loss=0.2356, over 29774.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1062, cr_loss=0.3434, attn_decoder_loss=0.2359, over 5793990.34 frames. ], batch size: 81, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:54:43,517 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff3_skip_rate, batch_count=892300.0, ans=0.0 +2024-09-20 12:54:50,834 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=892300.0, ans=0.2 +2024-09-20 12:54:59,588 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer_na.min_abs, batch_count=892340.0, ans=0.02 +2024-09-20 12:55:07,643 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=3.88 vs. limit=12.0 +2024-09-20 12:55:08,980 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=256, metric=9.48 vs. limit=15.0 +2024-09-20 12:55:16,382 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=256, metric=12.96 vs. limit=15.0 +2024-09-20 12:55:37,860 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.92 vs. limit=12.0 +2024-09-20 12:55:50,254 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=11.42 vs. limit=15.0 +2024-09-20 12:55:58,283 INFO [train.py:1198] (1/2) Epoch 50, batch 1400, loss[loss=0.1999, ctc_loss=0.08437, cr_loss=0.2972, attn_decoder_loss=0.2062, over 29575.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1063, cr_loss=0.344, attn_decoder_loss=0.2358, over 5805634.30 frames. ], batch size: 69, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:56:06,081 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=892500.0, ans=0.0 +2024-09-20 12:56:06,125 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass_mid.scale_min, batch_count=892500.0, ans=0.2 +2024-09-20 12:56:11,233 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.44 vs. limit=15.0 +2024-09-20 12:56:13,694 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=892540.0, ans=0.125 +2024-09-20 12:56:35,923 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=892580.0, ans=0.0 +2024-09-20 12:56:46,436 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.ff2_skip_rate, batch_count=892620.0, ans=0.0 +2024-09-20 12:56:53,533 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.789e+01 8.988e+01 9.426e+01 9.888e+01 1.632e+02, threshold=1.885e+02, percent-clipped=0.0 +2024-09-20 12:57:07,628 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=892660.0, ans=0.125 +2024-09-20 12:57:12,715 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=3.64 vs. limit=6.0 +2024-09-20 12:57:13,308 INFO [train.py:1198] (1/2) Epoch 50, batch 1450, loss[loss=0.2483, ctc_loss=0.1227, cr_loss=0.3799, attn_decoder_loss=0.2538, over 29479.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1062, cr_loss=0.3432, attn_decoder_loss=0.2359, over 5803828.39 frames. ], batch size: 94, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:57:26,850 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=892740.0, ans=0.1 +2024-09-20 12:57:32,490 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.56 vs. limit=15.0 +2024-09-20 12:57:33,123 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=892740.0, ans=0.1 +2024-09-20 12:58:30,652 INFO [train.py:1198] (1/2) Epoch 50, batch 1500, loss[loss=0.2461, ctc_loss=0.1151, cr_loss=0.3628, attn_decoder_loss=0.2526, over 29619.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1065, cr_loss=0.3437, attn_decoder_loss=0.2364, over 5804130.64 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 12:58:35,475 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.attention_skip_rate, batch_count=892900.0, ans=0.0 +2024-09-20 12:58:47,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=892940.0, ans=0.0 +2024-09-20 12:59:02,503 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=892980.0, ans=0.125 +2024-09-20 12:59:21,708 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.bypass.scale_min, batch_count=893020.0, ans=0.2 +2024-09-20 12:59:24,700 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer1.prob, batch_count=893020.0, ans=0.125 +2024-09-20 12:59:28,004 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.68 vs. limit=15.0 +2024-09-20 12:59:28,762 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.722e+01 8.732e+01 9.209e+01 9.748e+01 2.356e+02, threshold=1.842e+02, percent-clipped=2.0 +2024-09-20 12:59:31,286 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.76 vs. limit=15.0 +2024-09-20 12:59:48,130 INFO [train.py:1198] (1/2) Epoch 50, batch 1550, loss[loss=0.2467, ctc_loss=0.122, cr_loss=0.3834, attn_decoder_loss=0.252, over 29508.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.107, cr_loss=0.3447, attn_decoder_loss=0.2364, over 5780762.42 frames. ], batch size: 90, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:00:19,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=893180.0, ans=0.125 +2024-09-20 13:00:22,963 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten2.whitening_limit, batch_count=893180.0, ans=15.0 +2024-09-20 13:00:31,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.bypass_mid.scale_min, batch_count=893220.0, ans=0.2 +2024-09-20 13:00:34,488 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward2.hidden_balancer.prob, batch_count=893220.0, ans=0.125 +2024-09-20 13:00:37,692 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.scale_min, batch_count=893220.0, ans=0.2 +2024-09-20 13:00:40,818 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=893220.0, ans=0.125 +2024-09-20 13:00:44,276 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.self_attn2.whiten, num_groups=1, num_channels=768, metric=12.64 vs. limit=22.5 +2024-09-20 13:00:45,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer2.prob, batch_count=893220.0, ans=0.125 +2024-09-20 13:00:48,236 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=893260.0, ans=0.0 +2024-09-20 13:01:02,875 INFO [train.py:1198] (1/2) Epoch 50, batch 1600, loss[loss=0.2388, ctc_loss=0.1158, cr_loss=0.3668, attn_decoder_loss=0.2444, over 29683.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1071, cr_loss=0.3449, attn_decoder_loss=0.2362, over 5762811.96 frames. ], batch size: 85, lr: 2.20e-03, grad_scale: 32.0 +2024-09-20 13:01:08,107 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=16.47 vs. limit=22.5 +2024-09-20 13:01:12,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.72 vs. limit=15.0 +2024-09-20 13:01:20,312 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.whiten, num_groups=1, num_channels=768, metric=3.57 vs. limit=12.0 +2024-09-20 13:01:24,238 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.const_attention_rate, batch_count=893340.0, ans=0.025 +2024-09-20 13:01:27,460 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=893340.0, ans=0.125 +2024-09-20 13:01:35,286 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=893380.0, ans=0.125 +2024-09-20 13:01:54,604 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=893420.0, ans=0.0 +2024-09-20 13:01:59,034 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=893420.0, ans=0.1 +2024-09-20 13:02:00,387 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.886e+01 8.710e+01 9.146e+01 9.958e+01 1.437e+02, threshold=1.829e+02, percent-clipped=0.0 +2024-09-20 13:02:00,797 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward2.hidden_balancer.prob, batch_count=893420.0, ans=0.125 +2024-09-20 13:02:02,145 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=893460.0, ans=0.1 +2024-09-20 13:02:02,293 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=893460.0, ans=0.0 +2024-09-20 13:02:20,551 INFO [train.py:1198] (1/2) Epoch 50, batch 1650, loss[loss=0.2372, ctc_loss=0.1121, cr_loss=0.342, attn_decoder_loss=0.2435, over 29716.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1071, cr_loss=0.3445, attn_decoder_loss=0.2362, over 5758301.10 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:02:35,912 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.prob, batch_count=893540.0, ans=0.125 +2024-09-20 13:02:43,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=893540.0, ans=0.125 +2024-09-20 13:02:45,704 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=13.21 vs. limit=15.0 +2024-09-20 13:03:10,104 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=893620.0, ans=0.125 +2024-09-20 13:03:14,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=8.03 vs. limit=15.0 +2024-09-20 13:03:19,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.out_combiner.scale_min, batch_count=893620.0, ans=0.2 +2024-09-20 13:03:37,897 INFO [train.py:1198] (1/2) Epoch 50, batch 1700, loss[loss=0.2003, ctc_loss=0.08652, cr_loss=0.2985, attn_decoder_loss=0.2064, over 29520.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1064, cr_loss=0.3431, attn_decoder_loss=0.2359, over 5780676.27 frames. ], batch size: 69, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:03:42,717 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer2.prob, batch_count=893700.0, ans=0.125 +2024-09-20 13:03:47,114 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=893700.0, ans=0.125 +2024-09-20 13:03:56,277 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.balancer2.prob, batch_count=893740.0, ans=0.125 +2024-09-20 13:03:56,687 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.60 vs. limit=22.5 +2024-09-20 13:03:57,789 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:04:05,331 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=893740.0, ans=0.0 +2024-09-20 13:04:08,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=893780.0, ans=0.09899494936611666 +2024-09-20 13:04:35,201 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.624e+01 8.662e+01 9.260e+01 9.838e+01 1.206e+02, threshold=1.852e+02, percent-clipped=0.0 +2024-09-20 13:04:47,394 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=893860.0, ans=0.0 +2024-09-20 13:04:53,292 INFO [train.py:1198] (1/2) Epoch 50, batch 1750, loss[loss=0.2123, ctc_loss=0.09845, cr_loss=0.3248, attn_decoder_loss=0.2177, over 29313.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1064, cr_loss=0.3432, attn_decoder_loss=0.2357, over 5789086.97 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:05:02,698 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=893900.0, ans=0.09899494936611666 +2024-09-20 13:05:05,672 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=893900.0, ans=0.125 +2024-09-20 13:05:07,077 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=893940.0, ans=0.2 +2024-09-20 13:05:49,206 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=894020.0, ans=0.125 +2024-09-20 13:05:53,098 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.self_attn1.whiten, num_groups=1, num_channels=192, metric=10.95 vs. limit=22.5 +2024-09-20 13:06:05,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=894060.0, ans=0.0 +2024-09-20 13:06:08,367 INFO [train.py:1198] (1/2) Epoch 50, batch 1800, loss[loss=0.2478, ctc_loss=0.125, cr_loss=0.3994, attn_decoder_loss=0.2525, over 29680.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1065, cr_loss=0.3431, attn_decoder_loss=0.2359, over 5790843.07 frames. ], batch size: 83, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:06:27,378 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.attention_skip_rate, batch_count=894140.0, ans=0.0 +2024-09-20 13:06:38,091 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.skip_rate, batch_count=894140.0, ans=0.07 +2024-09-20 13:06:38,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=12.95 vs. limit=15.0 +2024-09-20 13:06:45,208 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=894180.0, ans=0.125 +2024-09-20 13:06:46,964 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=894180.0, ans=0.025 +2024-09-20 13:06:56,787 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.balancer1.prob, batch_count=894220.0, ans=0.125 +2024-09-20 13:07:10,020 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.815e+01 8.701e+01 9.171e+01 9.771e+01 2.069e+02, threshold=1.834e+02, percent-clipped=1.0 +2024-09-20 13:07:17,777 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=894260.0, ans=10.0 +2024-09-20 13:07:28,091 INFO [train.py:1198] (1/2) Epoch 50, batch 1850, loss[loss=0.2362, ctc_loss=0.1117, cr_loss=0.3513, attn_decoder_loss=0.2422, over 29627.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1066, cr_loss=0.3438, attn_decoder_loss=0.236, over 5794939.07 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:07:35,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.ff3_skip_rate, batch_count=894300.0, ans=0.0 +2024-09-20 13:07:40,452 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=894300.0, ans=0.2 +2024-09-20 13:07:47,247 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=6.45 vs. limit=15.0 +2024-09-20 13:08:19,970 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=894420.0, ans=0.125 +2024-09-20 13:08:21,516 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=894420.0, ans=0.025 +2024-09-20 13:08:31,754 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=894460.0, ans=0.2 +2024-09-20 13:08:37,893 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_skip_rate, batch_count=894460.0, ans=0.0 +2024-09-20 13:08:43,293 INFO [train.py:1198] (1/2) Epoch 50, batch 1900, loss[loss=0.2492, ctc_loss=0.1139, cr_loss=0.3674, attn_decoder_loss=0.2561, over 29725.00 frames. ], tot_loss[loss=0.2306, ctc_loss=0.107, cr_loss=0.3445, attn_decoder_loss=0.2366, over 5803582.92 frames. ], batch size: 89, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:08:53,447 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.whiten, num_groups=1, num_channels=192, metric=3.89 vs. limit=12.0 +2024-09-20 13:09:06,127 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_skip_rate, batch_count=894540.0, ans=0.0 +2024-09-20 13:09:42,595 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.413e+01 8.927e+01 9.438e+01 9.973e+01 1.317e+02, threshold=1.888e+02, percent-clipped=0.0 +2024-09-20 13:09:59,261 INFO [train.py:1198] (1/2) Epoch 50, batch 1950, loss[loss=0.2266, ctc_loss=0.09945, cr_loss=0.3131, attn_decoder_loss=0.2338, over 29431.00 frames. ], tot_loss[loss=0.2315, ctc_loss=0.1073, cr_loss=0.3455, attn_decoder_loss=0.2376, over 5818338.28 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:10:12,056 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=894700.0, ans=0.125 +2024-09-20 13:10:13,608 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.attention_skip_rate, batch_count=894700.0, ans=0.0 +2024-09-20 13:10:15,176 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.hidden_balancer.prob, batch_count=894740.0, ans=0.125 +2024-09-20 13:10:31,462 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=894780.0, ans=0.125 +2024-09-20 13:10:33,471 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.60 vs. limit=15.0 +2024-09-20 13:11:18,240 INFO [train.py:1198] (1/2) Epoch 50, batch 2000, loss[loss=0.2029, ctc_loss=0.08552, cr_loss=0.2998, attn_decoder_loss=0.2093, over 29345.00 frames. ], tot_loss[loss=0.2316, ctc_loss=0.1072, cr_loss=0.3449, attn_decoder_loss=0.2378, over 5795504.20 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:11:32,441 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=894940.0, ans=0.2 +2024-09-20 13:11:44,345 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.balancer1.prob, batch_count=894940.0, ans=0.125 +2024-09-20 13:11:47,421 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=894980.0, ans=0.0 +2024-09-20 13:12:00,744 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.hidden_balancer.prob, batch_count=894980.0, ans=0.125 +2024-09-20 13:12:17,047 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.737e+01 8.718e+01 9.150e+01 9.752e+01 2.823e+02, threshold=1.830e+02, percent-clipped=2.0 +2024-09-20 13:12:25,338 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.balancer.prob, batch_count=895060.0, ans=0.125 +2024-09-20 13:12:34,060 INFO [train.py:1198] (1/2) Epoch 50, batch 2050, loss[loss=0.2103, ctc_loss=0.09076, cr_loss=0.3094, attn_decoder_loss=0.2167, over 29424.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1071, cr_loss=0.345, attn_decoder_loss=0.237, over 5789036.33 frames. ], batch size: 70, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:12:37,266 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.min_positive, batch_count=895100.0, ans=0.05 +2024-09-20 13:12:46,362 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=895100.0, ans=0.1 +2024-09-20 13:13:02,943 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=895180.0, ans=0.1 +2024-09-20 13:13:09,832 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=192, metric=4.40 vs. limit=15.0 +2024-09-20 13:13:19,948 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.min_positive, batch_count=895220.0, ans=0.05 +2024-09-20 13:13:29,405 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.69 vs. limit=10.0 +2024-09-20 13:13:48,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=895300.0, ans=0.125 +2024-09-20 13:13:48,306 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=895300.0, ans=0.1 +2024-09-20 13:13:49,477 INFO [train.py:1198] (1/2) Epoch 50, batch 2100, loss[loss=0.239, ctc_loss=0.1169, cr_loss=0.3788, attn_decoder_loss=0.2442, over 29753.00 frames. ], tot_loss[loss=0.2304, ctc_loss=0.1067, cr_loss=0.3448, attn_decoder_loss=0.2365, over 5799419.00 frames. ], batch size: 81, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:13:59,303 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=768, metric=19.10 vs. limit=22.5 +2024-09-20 13:14:05,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.bypass.scale_min, batch_count=895340.0, ans=0.2 +2024-09-20 13:14:11,249 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=895340.0, ans=0.1 +2024-09-20 13:14:47,707 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer1.prob, batch_count=895420.0, ans=0.125 +2024-09-20 13:14:51,866 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.597e+01 8.617e+01 9.072e+01 9.604e+01 1.170e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 13:14:55,246 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.bypass.skip_rate, batch_count=895460.0, ans=0.035 +2024-09-20 13:14:59,965 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass_mid.scale_min, batch_count=895460.0, ans=0.2 +2024-09-20 13:15:01,302 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_skip_rate, batch_count=895460.0, ans=0.0 +2024-09-20 13:15:08,556 INFO [train.py:1198] (1/2) Epoch 50, batch 2150, loss[loss=0.2204, ctc_loss=0.1004, cr_loss=0.3231, attn_decoder_loss=0.2266, over 29436.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1062, cr_loss=0.3443, attn_decoder_loss=0.2358, over 5814439.31 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:15:24,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module1.balancer2.min_positive, batch_count=895540.0, ans=0.05 +2024-09-20 13:15:31,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer1.prob, batch_count=895540.0, ans=0.125 +2024-09-20 13:15:47,326 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.32 vs. limit=6.0 +2024-09-20 13:15:48,014 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=895580.0, ans=0.1 +2024-09-20 13:15:58,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_module2.balancer2.min_abs, batch_count=895620.0, ans=0.5 +2024-09-20 13:16:23,845 INFO [train.py:1198] (1/2) Epoch 50, batch 2200, loss[loss=0.235, ctc_loss=0.1077, cr_loss=0.3442, attn_decoder_loss=0.2415, over 29633.00 frames. ], tot_loss[loss=0.2299, ctc_loss=0.1063, cr_loss=0.3447, attn_decoder_loss=0.2359, over 5811811.52 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:16:26,304 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.81 vs. limit=15.0 +2024-09-20 13:16:31,626 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=895700.0, ans=0.125 +2024-09-20 13:16:37,694 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:16:44,830 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=895740.0, ans=0.125 +2024-09-20 13:16:47,930 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass_mid.scale_min, batch_count=895740.0, ans=0.2 +2024-09-20 13:16:48,016 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass.scale_min, batch_count=895740.0, ans=0.2 +2024-09-20 13:17:15,041 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_skip_rate, batch_count=895820.0, ans=0.0 +2024-09-20 13:17:23,883 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.920e+01 8.609e+01 9.131e+01 9.597e+01 2.793e+02, threshold=1.826e+02, percent-clipped=2.0 +2024-09-20 13:17:38,980 INFO [train.py:1198] (1/2) Epoch 50, batch 2250, loss[loss=0.2321, ctc_loss=0.1058, cr_loss=0.3493, attn_decoder_loss=0.2383, over 29684.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1057, cr_loss=0.3429, attn_decoder_loss=0.2356, over 5810113.11 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:18:02,470 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=895940.0, ans=0.0 +2024-09-20 13:18:06,736 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.min_positive, batch_count=895940.0, ans=0.025 +2024-09-20 13:18:26,062 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.ff3_skip_rate, batch_count=895980.0, ans=0.0 +2024-09-20 13:18:26,126 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.balancer1.prob, batch_count=895980.0, ans=0.125 +2024-09-20 13:18:40,099 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.scale_min, batch_count=896020.0, ans=0.2 +2024-09-20 13:18:53,868 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=896060.0, ans=0.125 +2024-09-20 13:18:56,925 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=896060.0, ans=0.125 +2024-09-20 13:19:02,860 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer2.prob, batch_count=896060.0, ans=0.125 +2024-09-20 13:19:05,562 INFO [train.py:1198] (1/2) Epoch 50, batch 2300, loss[loss=0.2091, ctc_loss=0.0924, cr_loss=0.3129, attn_decoder_loss=0.2152, over 29328.00 frames. ], tot_loss[loss=0.2286, ctc_loss=0.1052, cr_loss=0.3413, attn_decoder_loss=0.2348, over 5796602.72 frames. ], batch size: 71, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:19:07,288 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=896100.0, ans=0.125 +2024-09-20 13:19:35,551 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=896180.0, ans=0.1 +2024-09-20 13:20:05,618 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.458e+01 8.477e+01 9.129e+01 9.785e+01 2.320e+02, threshold=1.826e+02, percent-clipped=1.0 +2024-09-20 13:20:12,002 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=896260.0, ans=0.1 +2024-09-20 13:20:20,686 INFO [train.py:1198] (1/2) Epoch 50, batch 2350, loss[loss=0.2471, ctc_loss=0.1179, cr_loss=0.3635, attn_decoder_loss=0.2534, over 29703.00 frames. ], tot_loss[loss=0.2291, ctc_loss=0.1057, cr_loss=0.3424, attn_decoder_loss=0.2352, over 5802686.35 frames. ], batch size: 83, lr: 2.20e-03, grad_scale: 8.0 +2024-09-20 13:20:29,842 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=896300.0, ans=0.0 +2024-09-20 13:20:47,910 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=896340.0, ans=0.0 +2024-09-20 13:20:51,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=896380.0, ans=0.0 +2024-09-20 13:21:09,764 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.48 vs. limit=15.0 +2024-09-20 13:21:16,676 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.bypass_mid.scale_min, batch_count=896420.0, ans=0.2 +2024-09-20 13:21:22,618 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module1.balancer2.prob, batch_count=896460.0, ans=0.125 +2024-09-20 13:21:36,031 INFO [train.py:1198] (1/2) Epoch 50, batch 2400, loss[loss=0.22, ctc_loss=0.1046, cr_loss=0.3546, attn_decoder_loss=0.225, over 29542.00 frames. ], tot_loss[loss=0.2295, ctc_loss=0.1061, cr_loss=0.3436, attn_decoder_loss=0.2356, over 5807470.68 frames. ], batch size: 76, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:21:55,896 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:22:05,861 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=12.03 vs. limit=15.0 +2024-09-20 13:22:09,701 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=896580.0, ans=0.125 +2024-09-20 13:22:11,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=896580.0, ans=0.125 +2024-09-20 13:22:32,899 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=896620.0, ans=0.0 +2024-09-20 13:22:34,391 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=896620.0, ans=0.125 +2024-09-20 13:22:35,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.hidden_balancer.prob, batch_count=896620.0, ans=0.125 +2024-09-20 13:22:40,131 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.900e+01 8.885e+01 9.266e+01 9.766e+01 1.218e+02, threshold=1.853e+02, percent-clipped=0.0 +2024-09-20 13:22:49,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=896660.0, ans=0.09899494936611666 +2024-09-20 13:22:55,252 INFO [train.py:1198] (1/2) Epoch 50, batch 2450, loss[loss=0.2279, ctc_loss=0.1052, cr_loss=0.3492, attn_decoder_loss=0.2337, over 29708.00 frames. ], tot_loss[loss=0.2303, ctc_loss=0.1069, cr_loss=0.3451, attn_decoder_loss=0.2364, over 5784967.42 frames. ], batch size: 82, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:23:15,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.63 vs. limit=15.0 +2024-09-20 13:23:40,948 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:23:43,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=896820.0, ans=0.1 +2024-09-20 13:23:48,212 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=896820.0, ans=0.0 +2024-09-20 13:24:03,294 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.1.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:24:10,521 INFO [train.py:1198] (1/2) Epoch 50, batch 2500, loss[loss=0.236, ctc_loss=0.107, cr_loss=0.3452, attn_decoder_loss=0.2426, over 29615.00 frames. ], tot_loss[loss=0.23, ctc_loss=0.1067, cr_loss=0.3449, attn_decoder_loss=0.2361, over 5794318.60 frames. ], batch size: 86, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:24:16,726 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.bypass.scale_min, batch_count=896900.0, ans=0.2 +2024-09-20 13:24:28,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.conv_module2.whiten, num_groups=1, num_channels=512, metric=6.81 vs. limit=15.0 +2024-09-20 13:24:34,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.ff3_skip_rate, batch_count=896940.0, ans=0.0 +2024-09-20 13:24:35,015 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=896940.0, ans=0.125 +2024-09-20 13:25:10,798 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.343e+01 8.696e+01 9.094e+01 9.539e+01 5.829e+02, threshold=1.819e+02, percent-clipped=1.0 +2024-09-20 13:25:14,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=897060.0, ans=0.1 +2024-09-20 13:25:18,594 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_module1.balancer2.prob, batch_count=897060.0, ans=0.125 +2024-09-20 13:25:22,040 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.65 vs. limit=15.0 +2024-09-20 13:25:23,137 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=897060.0, ans=0.0 +2024-09-20 13:25:25,444 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=512, metric=3.87 vs. limit=15.0 +2024-09-20 13:25:25,884 INFO [train.py:1198] (1/2) Epoch 50, batch 2550, loss[loss=0.2112, ctc_loss=0.09228, cr_loss=0.3222, attn_decoder_loss=0.2173, over 29385.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1065, cr_loss=0.344, attn_decoder_loss=0.2361, over 5798608.96 frames. ], batch size: 67, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:25:42,684 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=897140.0, ans=0.1 +2024-09-20 13:25:45,647 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=897140.0, ans=0.0 +2024-09-20 13:25:46,253 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=768, metric=5.44 vs. limit=15.0 +2024-09-20 13:26:21,525 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=897220.0, ans=0.1 +2024-09-20 13:26:26,561 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.49 vs. limit=6.0 +2024-09-20 13:26:27,629 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward2.hidden_balancer.prob, batch_count=897220.0, ans=0.125 +2024-09-20 13:26:38,916 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.56 vs. limit=6.0 +2024-09-20 13:26:39,668 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.prob, batch_count=897260.0, ans=0.125 +2024-09-20 13:26:45,536 INFO [train.py:1198] (1/2) Epoch 50, batch 2600, loss[loss=0.2333, ctc_loss=0.117, cr_loss=0.3627, attn_decoder_loss=0.2382, over 29452.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1068, cr_loss=0.3448, attn_decoder_loss=0.2366, over 5796425.55 frames. ], batch size: 78, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:26:56,165 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=897300.0, ans=0.1 +2024-09-20 13:26:56,634 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=3.80 vs. limit=15.0 +2024-09-20 13:27:14,767 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module1.whiten, num_groups=1, num_channels=768, metric=3.94 vs. limit=15.0 +2024-09-20 13:27:45,538 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.691e+01 8.589e+01 9.158e+01 9.861e+01 1.661e+02, threshold=1.832e+02, percent-clipped=0.0 +2024-09-20 13:27:57,858 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.scale_min, batch_count=897460.0, ans=0.2 +2024-09-20 13:28:00,333 INFO [train.py:1198] (1/2) Epoch 50, batch 2650, loss[loss=0.2431, ctc_loss=0.1134, cr_loss=0.3506, attn_decoder_loss=0.2497, over 29219.00 frames. ], tot_loss[loss=0.2309, ctc_loss=0.1071, cr_loss=0.3454, attn_decoder_loss=0.2369, over 5802040.58 frames. ], batch size: 100, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:28:17,327 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.conv_module1.balancer2.prob, batch_count=897540.0, ans=0.125 +2024-09-20 13:28:18,653 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.const_attention_rate, batch_count=897540.0, ans=0.025 +2024-09-20 13:28:46,020 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=897620.0, ans=0.125 +2024-09-20 13:28:49,289 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.whiten, num_groups=1, num_channels=512, metric=3.86 vs. limit=12.0 +2024-09-20 13:28:50,457 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_skip_rate, batch_count=897620.0, ans=0.0 +2024-09-20 13:29:01,499 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=4.01 vs. limit=10.0 +2024-09-20 13:29:03,968 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.balancer1.prob, batch_count=897660.0, ans=0.125 +2024-09-20 13:29:13,076 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=897660.0, ans=0.125 +2024-09-20 13:29:15,759 INFO [train.py:1198] (1/2) Epoch 50, batch 2700, loss[loss=0.2273, ctc_loss=0.09269, cr_loss=0.3177, attn_decoder_loss=0.2352, over 29525.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.107, cr_loss=0.3457, attn_decoder_loss=0.2372, over 5796442.33 frames. ], batch size: 87, lr: 2.20e-03, grad_scale: 16.0 +2024-09-20 13:29:32,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=897740.0, ans=0.025 +2024-09-20 13:29:34,057 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:29:34,128 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=897740.0, ans=0.125 +2024-09-20 13:29:51,043 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=897780.0, ans=0.1 +2024-09-20 13:30:06,673 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=897820.0, ans=0.1 +2024-09-20 13:30:11,723 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.conv_module2.whiten, num_groups=1, num_channels=768, metric=4.70 vs. limit=15.0 +2024-09-20 13:30:19,833 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.526e+01 8.670e+01 9.155e+01 9.600e+01 1.586e+02, threshold=1.831e+02, percent-clipped=0.0 +2024-09-20 13:30:30,666 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass.skip_rate, batch_count=897860.0, ans=0.09899494936611666 +2024-09-20 13:30:32,184 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=897860.0, ans=0.1 +2024-09-20 13:30:35,020 INFO [train.py:1198] (1/2) Epoch 50, batch 2750, loss[loss=0.2289, ctc_loss=0.1124, cr_loss=0.3753, attn_decoder_loss=0.2335, over 29518.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1063, cr_loss=0.3441, attn_decoder_loss=0.2362, over 5795311.98 frames. ], batch size: 75, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:30:37,520 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.22 vs. limit=6.0 +2024-09-20 13:30:38,575 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=897900.0, ans=0.125 +2024-09-20 13:30:38,596 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=897900.0, ans=0.1 +2024-09-20 13:31:05,311 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=897980.0, ans=0.1 +2024-09-20 13:31:15,833 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module2.balancer1.prob, batch_count=897980.0, ans=0.125 +2024-09-20 13:31:23,333 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer2.prob, batch_count=898020.0, ans=0.125 +2024-09-20 13:31:23,938 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=5.17 vs. limit=15.0 +2024-09-20 13:31:26,162 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.balancer2.prob, batch_count=898020.0, ans=0.125 +2024-09-20 13:31:40,923 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=9.14 vs. limit=15.0 +2024-09-20 13:31:50,611 INFO [train.py:1198] (1/2) Epoch 50, batch 2800, loss[loss=0.2437, ctc_loss=0.1264, cr_loss=0.3597, attn_decoder_loss=0.2487, over 20059.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1069, cr_loss=0.3448, attn_decoder_loss=0.2366, over 5776612.07 frames. ], batch size: 210, lr: 2.19e-03, grad_scale: 32.0 +2024-09-20 13:31:50,940 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=898100.0, ans=0.125 +2024-09-20 13:32:07,405 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=898140.0, ans=0.0 +2024-09-20 13:32:46,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=898220.0, ans=0.1 +2024-09-20 13:32:50,829 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.self_attn_weights.pos_emb_skip_rate, batch_count=898260.0, ans=0.0 +2024-09-20 13:32:53,361 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.485e+01 8.751e+01 9.267e+01 9.840e+01 2.500e+02, threshold=1.853e+02, percent-clipped=1.0 +2024-09-20 13:32:57,543 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=2.19 vs. limit=6.0 +2024-09-20 13:33:05,416 INFO [train.py:1198] (1/2) Epoch 50, batch 2850, loss[loss=0.2351, ctc_loss=0.1117, cr_loss=0.365, attn_decoder_loss=0.2407, over 29485.00 frames. ], tot_loss[loss=0.2312, ctc_loss=0.1074, cr_loss=0.3461, attn_decoder_loss=0.2372, over 5763173.90 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:33:28,507 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=898340.0, ans=0.0 +2024-09-20 13:33:30,684 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.08 vs. limit=15.0 +2024-09-20 13:33:38,431 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.attention_skip_rate, batch_count=898380.0, ans=0.0 +2024-09-20 13:33:39,869 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_module2.balancer1.prob, batch_count=898380.0, ans=0.125 +2024-09-20 13:34:08,771 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=898460.0, ans=0.125 +2024-09-20 13:34:11,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer1.prob, batch_count=898460.0, ans=0.125 +2024-09-20 13:34:13,408 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.feed_forward1.out_proj.dropout_p, batch_count=898460.0, ans=0.1 +2024-09-20 13:34:23,631 INFO [train.py:1198] (1/2) Epoch 50, batch 2900, loss[loss=0.2198, ctc_loss=0.09121, cr_loss=0.3081, attn_decoder_loss=0.2272, over 29409.00 frames. ], tot_loss[loss=0.2321, ctc_loss=0.1079, cr_loss=0.3476, attn_decoder_loss=0.2381, over 5788084.39 frames. ], batch size: 79, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:34:31,360 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward1.hidden_balancer.prob, batch_count=898500.0, ans=0.125 +2024-09-20 13:34:40,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff2_skip_rate, batch_count=898540.0, ans=0.0 +2024-09-20 13:34:57,930 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=5.70 vs. limit=15.0 +2024-09-20 13:35:01,713 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.balancer1.prob, batch_count=898580.0, ans=0.125 +2024-09-20 13:35:16,670 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=898620.0, ans=0.1 +2024-09-20 13:35:21,210 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=898620.0, ans=0.1 +2024-09-20 13:35:26,849 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.844e+01 8.699e+01 9.096e+01 9.563e+01 1.472e+02, threshold=1.819e+02, percent-clipped=0.0 +2024-09-20 13:35:30,037 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=898660.0, ans=0.1 +2024-09-20 13:35:33,117 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_skip_rate, batch_count=898660.0, ans=0.0 +2024-09-20 13:35:38,792 INFO [train.py:1198] (1/2) Epoch 50, batch 2950, loss[loss=0.2229, ctc_loss=0.1064, cr_loss=0.3487, attn_decoder_loss=0.2281, over 29521.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.107, cr_loss=0.3452, attn_decoder_loss=0.2367, over 5783032.30 frames. ], batch size: 75, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:35:43,909 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.nonlin_attention.whiten1.whitening_limit, batch_count=898700.0, ans=10.0 +2024-09-20 13:36:22,743 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward1.hidden_balancer.prob, batch_count=898820.0, ans=0.125 +2024-09-20 13:36:28,762 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=898820.0, ans=0.07 +2024-09-20 13:36:30,329 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=898820.0, ans=0.125 +2024-09-20 13:36:54,238 INFO [train.py:1198] (1/2) Epoch 50, batch 3000, loss[loss=0.2347, ctc_loss=0.1122, cr_loss=0.3508, attn_decoder_loss=0.2406, over 29746.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1072, cr_loss=0.346, attn_decoder_loss=0.2368, over 5783219.13 frames. ], batch size: 81, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:36:54,238 INFO [train.py:1221] (1/2) Computing validation loss +2024-09-20 13:37:12,393 INFO [train.py:1230] (1/2) Epoch 50, validation: loss=0.213, ctc_loss=0.03629, cr_loss=7.081e-15, attn_decoder_loss=0.2326, over 944034.00 frames. +2024-09-20 13:37:12,394 INFO [train.py:1231] (1/2) Maximum memory allocated so far is 52672MB +2024-09-20 13:37:33,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer1.min_positive, batch_count=898940.0, ans=0.025 +2024-09-20 13:37:43,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer2.prob, batch_count=898940.0, ans=0.125 +2024-09-20 13:38:03,463 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=899020.0, ans=0.125 +2024-09-20 13:38:04,976 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:38:13,900 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.ff3_skip_rate, batch_count=899020.0, ans=0.0 +2024-09-20 13:38:19,549 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.783e+01 8.906e+01 9.324e+01 9.722e+01 1.754e+02, threshold=1.865e+02, percent-clipped=0.0 +2024-09-20 13:38:31,731 INFO [train.py:1198] (1/2) Epoch 50, batch 3050, loss[loss=0.2152, ctc_loss=0.09753, cr_loss=0.3168, attn_decoder_loss=0.2213, over 29502.00 frames. ], tot_loss[loss=0.2311, ctc_loss=0.1074, cr_loss=0.3467, attn_decoder_loss=0.2371, over 5776516.46 frames. ], batch size: 76, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:38:45,883 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:39:04,106 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten1, num_groups=1, num_channels=576, metric=4.73 vs. limit=10.0 +2024-09-20 13:39:05,428 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=2.69 vs. limit=15.0 +2024-09-20 13:39:10,032 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:39:23,500 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.const_attention_rate, batch_count=899220.0, ans=0.025 +2024-09-20 13:39:43,063 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.self_attn_weights.pos_emb_skip_rate, batch_count=899260.0, ans=0.0 +2024-09-20 13:39:44,532 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.0.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:39:47,288 INFO [train.py:1198] (1/2) Epoch 50, batch 3100, loss[loss=0.2501, ctc_loss=0.1209, cr_loss=0.3693, attn_decoder_loss=0.2562, over 29246.00 frames. ], tot_loss[loss=0.231, ctc_loss=0.1075, cr_loss=0.3468, attn_decoder_loss=0.237, over 5777475.21 frames. ], batch size: 100, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:39:56,587 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=899300.0, ans=0.125 +2024-09-20 13:40:20,856 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=16.36 vs. limit=22.5 +2024-09-20 13:40:23,485 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.attention_skip_rate, batch_count=899380.0, ans=0.0 +2024-09-20 13:40:23,519 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.const_attention_rate, batch_count=899380.0, ans=0.025 +2024-09-20 13:40:27,166 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.whiten, num_groups=1, num_channels=512, metric=7.01 vs. limit=12.0 +2024-09-20 13:40:34,597 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=768, metric=7.23 vs. limit=15.0 +2024-09-20 13:40:46,250 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward3.hidden_balancer.prob, batch_count=899460.0, ans=0.125 +2024-09-20 13:40:50,446 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.647e+01 8.844e+01 9.233e+01 9.806e+01 2.846e+02, threshold=1.847e+02, percent-clipped=1.0 +2024-09-20 13:40:50,846 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.balancer2.prob, batch_count=899460.0, ans=0.125 +2024-09-20 13:40:52,151 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward2.hidden_balancer.prob, batch_count=899460.0, ans=0.125 +2024-09-20 13:41:02,633 INFO [train.py:1198] (1/2) Epoch 50, batch 3150, loss[loss=0.2335, ctc_loss=0.1066, cr_loss=0.3424, attn_decoder_loss=0.24, over 28805.00 frames. ], tot_loss[loss=0.2308, ctc_loss=0.1073, cr_loss=0.3461, attn_decoder_loss=0.2369, over 5783142.85 frames. ], batch size: 104, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:41:07,441 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=11.02 vs. limit=22.5 +2024-09-20 13:41:17,154 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=899500.0, ans=0.1 +2024-09-20 13:41:25,508 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=899540.0, ans=0.125 +2024-09-20 13:41:41,614 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=899580.0, ans=0.1 +2024-09-20 13:41:50,609 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.bypass.skip_rate, batch_count=899620.0, ans=0.07 +2024-09-20 13:42:07,187 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer2.prob, batch_count=899660.0, ans=0.125 +2024-09-20 13:42:09,928 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.8.prob, batch_count=899660.0, ans=0.125 +2024-09-20 13:42:13,021 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.attention_skip_rate, batch_count=899660.0, ans=0.0 +2024-09-20 13:42:20,746 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:42:21,891 INFO [train.py:1198] (1/2) Epoch 50, batch 3200, loss[loss=0.2224, ctc_loss=0.0937, cr_loss=0.3261, attn_decoder_loss=0.2295, over 29405.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1066, cr_loss=0.3449, attn_decoder_loss=0.2362, over 5794232.10 frames. ], batch size: 79, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:42:24,205 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.whiten, num_groups=1, num_channels=768, metric=4.02 vs. limit=12.0 +2024-09-20 13:42:30,260 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.whiten2.whitening_limit, batch_count=899700.0, ans=15.0 +2024-09-20 13:42:38,716 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=899740.0, ans=0.0 +2024-09-20 13:43:25,246 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.493e+01 8.416e+01 9.001e+01 9.640e+01 1.386e+02, threshold=1.800e+02, percent-clipped=0.0 +2024-09-20 13:43:37,444 INFO [train.py:1198] (1/2) Epoch 50, batch 3250, loss[loss=0.2298, ctc_loss=0.1052, cr_loss=0.3411, attn_decoder_loss=0.2361, over 29707.00 frames. ], tot_loss[loss=0.2307, ctc_loss=0.1067, cr_loss=0.3456, attn_decoder_loss=0.2368, over 5800835.43 frames. ], batch size: 84, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:43:44,615 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.23 vs. limit=22.5 +2024-09-20 13:44:26,047 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer1.max_abs, batch_count=900020.0, ans=10.0 +2024-09-20 13:44:50,239 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.ff2_skip_rate, batch_count=900060.0, ans=0.0 +2024-09-20 13:44:53,041 INFO [train.py:1198] (1/2) Epoch 50, batch 3300, loss[loss=0.2424, ctc_loss=0.1108, cr_loss=0.3596, attn_decoder_loss=0.2491, over 28136.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1062, cr_loss=0.3443, attn_decoder_loss=0.2358, over 5796607.29 frames. ], batch size: 111, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:45:32,570 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer1.prob, batch_count=900180.0, ans=0.125 +2024-09-20 13:45:50,353 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.balancer2.prob, batch_count=900220.0, ans=0.125 +2024-09-20 13:46:01,978 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.677e+01 8.683e+01 9.254e+01 9.837e+01 3.581e+02, threshold=1.851e+02, percent-clipped=1.0 +2024-09-20 13:46:04,147 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=10.56 vs. limit=15.0 +2024-09-20 13:46:05,780 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=768, metric=3.54 vs. limit=15.0 +2024-09-20 13:46:12,367 INFO [train.py:1198] (1/2) Epoch 50, batch 3350, loss[loss=0.2465, ctc_loss=0.1177, cr_loss=0.3799, attn_decoder_loss=0.2523, over 28773.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.107, cr_loss=0.3461, attn_decoder_loss=0.2365, over 5774300.11 frames. ], batch size: 104, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:46:14,204 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=900300.0, ans=0.125 +2024-09-20 13:46:41,256 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.nonlin_attention.balancer.max_positive, batch_count=900380.0, ans=0.95 +2024-09-20 13:47:14,284 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.ff2_skip_rate, batch_count=900460.0, ans=0.0 +2024-09-20 13:47:27,585 INFO [train.py:1198] (1/2) Epoch 50, batch 3400, loss[loss=0.2169, ctc_loss=0.1075, cr_loss=0.3495, attn_decoder_loss=0.2213, over 29366.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1073, cr_loss=0.3465, attn_decoder_loss=0.2365, over 5767007.38 frames. ], batch size: 67, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:47:27,915 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.out_combiner.scale_min, batch_count=900500.0, ans=0.2 +2024-09-20 13:47:33,175 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=256, metric=12.81 vs. limit=15.0 +2024-09-20 13:47:33,215 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.feed_forward2.out_whiten, num_groups=1, num_channels=256, metric=5.44 vs. limit=15.0 +2024-09-20 13:47:38,586 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=900500.0, ans=0.2 +2024-09-20 13:47:44,558 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=900540.0, ans=0.1 +2024-09-20 13:47:52,850 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.2.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=4.54 vs. limit=15.0 +2024-09-20 13:48:14,824 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.attention_skip_rate, batch_count=900620.0, ans=0.0 +2024-09-20 13:48:32,608 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.588e+01 8.696e+01 9.262e+01 1.002e+02 2.353e+02, threshold=1.852e+02, percent-clipped=1.0 +2024-09-20 13:48:33,283 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.50 vs. limit=10.0 +2024-09-20 13:48:40,827 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:48:44,939 INFO [train.py:1198] (1/2) Epoch 50, batch 3450, loss[loss=0.2425, ctc_loss=0.1139, cr_loss=0.3533, attn_decoder_loss=0.2489, over 28300.00 frames. ], tot_loss[loss=0.2305, ctc_loss=0.1073, cr_loss=0.3463, attn_decoder_loss=0.2365, over 5775347.41 frames. ], batch size: 111, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:48:45,173 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer1.prob, batch_count=900700.0, ans=0.125 +2024-09-20 13:48:52,844 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module2.balancer2.prob, batch_count=900700.0, ans=0.125 +2024-09-20 13:48:54,326 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_skip_rate, batch_count=900700.0, ans=0.0 +2024-09-20 13:48:54,356 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.attention_skip_rate, batch_count=900700.0, ans=0.0 +2024-09-20 13:48:55,854 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff3_skip_rate, batch_count=900700.0, ans=0.0 +2024-09-20 13:49:04,624 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=900740.0, ans=0.0 +2024-09-20 13:49:07,651 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.bypass_mid.scale_min, batch_count=900740.0, ans=0.2 +2024-09-20 13:49:07,725 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=900740.0, ans=0.0 +2024-09-20 13:49:12,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_proj.dropout_p, batch_count=900740.0, ans=0.1 +2024-09-20 13:49:21,211 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=4.19 vs. limit=10.0 +2024-09-20 13:49:23,295 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=900780.0, ans=0.1 +2024-09-20 13:49:28,289 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=900780.0, ans=0.125 +2024-09-20 13:49:47,204 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=768, metric=20.54 vs. limit=22.5 +2024-09-20 13:50:02,672 INFO [train.py:1198] (1/2) Epoch 50, batch 3500, loss[loss=0.2074, ctc_loss=0.08378, cr_loss=0.2904, attn_decoder_loss=0.2147, over 29332.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1069, cr_loss=0.3457, attn_decoder_loss=0.2361, over 5776600.59 frames. ], batch size: 71, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:50:02,896 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module2.balancer1.prob, batch_count=900900.0, ans=0.125 +2024-09-20 13:50:07,522 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=900900.0, ans=0.125 +2024-09-20 13:50:10,397 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.bypass.scale_min, batch_count=900900.0, ans=0.2 +2024-09-20 13:50:25,914 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=256, metric=10.00 vs. limit=22.5 +2024-09-20 13:50:43,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.attention_skip_rate, batch_count=900980.0, ans=0.0 +2024-09-20 13:50:46,786 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.62 vs. limit=10.0 +2024-09-20 13:51:06,275 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.056e+01 8.687e+01 9.132e+01 9.623e+01 1.623e+02, threshold=1.826e+02, percent-clipped=0.0 +2024-09-20 13:51:06,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.const_attention_rate, batch_count=901060.0, ans=0.025 +2024-09-20 13:51:16,613 INFO [train.py:1198] (1/2) Epoch 50, batch 3550, loss[loss=0.2406, ctc_loss=0.1102, cr_loss=0.3582, attn_decoder_loss=0.2471, over 29700.00 frames. ], tot_loss[loss=0.2302, ctc_loss=0.1069, cr_loss=0.3457, attn_decoder_loss=0.2362, over 5782676.54 frames. ], batch size: 89, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 13:51:24,263 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=901100.0, ans=0.125 +2024-09-20 13:51:27,327 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.3.encoder.layers.4.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:51:30,115 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=901140.0, ans=0.125 +2024-09-20 13:51:46,518 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=901180.0, ans=0.0 +2024-09-20 13:52:04,833 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=7.88 vs. limit=15.0 +2024-09-20 13:52:16,590 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.whiten, num_groups=1, num_channels=256, metric=3.94 vs. limit=12.0 +2024-09-20 13:52:26,005 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.conv.5.prob, batch_count=901260.0, ans=0.125 +2024-09-20 13:52:30,409 INFO [train.py:1198] (1/2) Epoch 50, batch 3600, loss[loss=0.2353, ctc_loss=0.1193, cr_loss=0.3734, attn_decoder_loss=0.2399, over 29493.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1069, cr_loss=0.3455, attn_decoder_loss=0.2361, over 5791595.42 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:52:40,953 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=901300.0, ans=0.125 +2024-09-20 13:52:50,079 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.bypass.skip_rate, batch_count=901340.0, ans=0.09899494936611666 +2024-09-20 13:53:22,641 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.attention_skip_rate, batch_count=901420.0, ans=0.0 +2024-09-20 13:53:34,114 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.501e+01 8.620e+01 9.031e+01 9.703e+01 1.754e+02, threshold=1.806e+02, percent-clipped=0.0 +2024-09-20 13:53:44,526 INFO [train.py:1198] (1/2) Epoch 50, batch 3650, loss[loss=0.2566, ctc_loss=0.1408, cr_loss=0.4313, attn_decoder_loss=0.2599, over 29526.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1066, cr_loss=0.345, attn_decoder_loss=0.2358, over 5794825.92 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:53:59,669 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.bypass.skip_rate, batch_count=901540.0, ans=0.04949747468305833 +2024-09-20 13:54:04,150 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.attention_skip_rate, batch_count=901540.0, ans=0.0 +2024-09-20 13:54:05,557 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=901540.0, ans=0.1 +2024-09-20 13:54:05,605 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module1.balancer2.prob, batch_count=901540.0, ans=0.125 +2024-09-20 13:54:24,028 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=901580.0, ans=0.0 +2024-09-20 13:54:38,794 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.bypass.skip_rate, batch_count=901620.0, ans=0.09899494936611666 +2024-09-20 13:54:52,152 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=901660.0, ans=0.1 +2024-09-20 13:55:00,055 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.ff3_skip_rate, batch_count=901660.0, ans=0.0 +2024-09-20 13:55:02,720 INFO [train.py:1198] (1/2) Epoch 50, batch 3700, loss[loss=0.2485, ctc_loss=0.1235, cr_loss=0.3853, attn_decoder_loss=0.2538, over 29712.00 frames. ], tot_loss[loss=0.2301, ctc_loss=0.1068, cr_loss=0.3456, attn_decoder_loss=0.2361, over 5804705.03 frames. ], batch size: 84, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:55:05,118 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.feed_forward2.out_whiten, num_groups=1, num_channels=768, metric=9.66 vs. limit=15.0 +2024-09-20 13:55:17,946 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=901740.0, ans=0.125 +2024-09-20 13:55:32,336 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=901780.0, ans=0.1 +2024-09-20 13:55:37,652 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=256, metric=6.31 vs. limit=15.0 +2024-09-20 13:55:41,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=901780.0, ans=0.125 +2024-09-20 13:55:42,746 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.const_attention_rate, batch_count=901780.0, ans=0.025 +2024-09-20 13:56:01,857 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward3.hidden_balancer.prob, batch_count=901860.0, ans=0.125 +2024-09-20 13:56:05,881 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.463e+01 8.521e+01 9.140e+01 9.589e+01 3.115e+02, threshold=1.828e+02, percent-clipped=1.0 +2024-09-20 13:56:08,437 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=12.64 vs. limit=15.0 +2024-09-20 13:56:11,479 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.conv_module2.whiten, num_groups=1, num_channels=512, metric=4.78 vs. limit=15.0 +2024-09-20 13:56:16,332 INFO [train.py:1198] (1/2) Epoch 50, batch 3750, loss[loss=0.2042, ctc_loss=0.08542, cr_loss=0.2882, attn_decoder_loss=0.211, over 29365.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1065, cr_loss=0.3452, attn_decoder_loss=0.2356, over 5807761.27 frames. ], batch size: 67, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:56:25,591 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_skip_rate, batch_count=901900.0, ans=0.0 +2024-09-20 13:56:45,762 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=5.55 vs. limit=12.0 +2024-09-20 13:56:59,793 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff2_skip_rate, batch_count=902020.0, ans=0.0 +2024-09-20 13:57:01,490 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.5.encoder.layers.0.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 13:57:28,042 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=902060.0, ans=0.125 +2024-09-20 13:57:30,714 INFO [train.py:1198] (1/2) Epoch 50, batch 3800, loss[loss=0.239, ctc_loss=0.1092, cr_loss=0.3501, attn_decoder_loss=0.2456, over 29650.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1066, cr_loss=0.3455, attn_decoder_loss=0.2356, over 5798101.08 frames. ], batch size: 86, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:58:03,780 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=902180.0, ans=0.125 +2024-09-20 13:58:14,113 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer_ff3.min_abs, batch_count=902220.0, ans=0.2 +2024-09-20 13:58:14,132 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.bypass.skip_rate, batch_count=902220.0, ans=0.04949747468305833 +2024-09-20 13:58:14,566 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.00 vs. limit=15.0 +2024-09-20 13:58:27,352 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.4.conv_skip_rate, batch_count=902220.0, ans=0.0 +2024-09-20 13:58:34,452 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.410e+01 8.593e+01 9.127e+01 9.556e+01 1.815e+02, threshold=1.825e+02, percent-clipped=0.0 +2024-09-20 13:58:34,705 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=902260.0, ans=0.1 +2024-09-20 13:58:36,576 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=3.24 vs. limit=6.0 +2024-09-20 13:58:38,330 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.95 vs. limit=15.0 +2024-09-20 13:58:44,683 INFO [train.py:1198] (1/2) Epoch 50, batch 3850, loss[loss=0.2488, ctc_loss=0.1213, cr_loss=0.3692, attn_decoder_loss=0.2547, over 29213.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1063, cr_loss=0.3445, attn_decoder_loss=0.2354, over 5812017.56 frames. ], batch size: 100, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 13:58:57,887 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.layerdrop_rate, batch_count=902340.0, ans=0.015 +2024-09-20 13:59:00,015 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.3.feed_forward2.out_whiten, num_groups=1, num_channels=512, metric=10.18 vs. limit=15.0 +2024-09-20 13:59:25,291 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.const_attention_rate, batch_count=902380.0, ans=0.025 +2024-09-20 13:59:29,592 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_skip_rate, batch_count=902420.0, ans=0.0 +2024-09-20 13:59:39,093 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.78 vs. limit=15.0 +2024-09-20 13:59:50,111 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=902460.0, ans=0.125 +2024-09-20 14:00:00,220 INFO [train.py:1198] (1/2) Epoch 50, batch 3900, loss[loss=0.2513, ctc_loss=0.1192, cr_loss=0.3766, attn_decoder_loss=0.2576, over 29631.00 frames. ], tot_loss[loss=0.2298, ctc_loss=0.1064, cr_loss=0.345, attn_decoder_loss=0.2359, over 5817225.26 frames. ], batch size: 86, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:00:50,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=902620.0, ans=0.2 +2024-09-20 14:01:00,921 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.conv_module1.balancer2.prob, batch_count=902660.0, ans=0.125 +2024-09-20 14:01:02,386 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.balancer1.prob, batch_count=902660.0, ans=0.125 +2024-09-20 14:01:05,030 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.795e+01 8.747e+01 9.246e+01 9.668e+01 1.412e+02, threshold=1.849e+02, percent-clipped=0.0 +2024-09-20 14:01:08,585 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.5.encoder.layers.0.self_attn1.whiten, num_groups=1, num_channels=256, metric=14.10 vs. limit=22.5 +2024-09-20 14:01:15,505 INFO [train.py:1198] (1/2) Epoch 50, batch 3950, loss[loss=0.241, ctc_loss=0.1132, cr_loss=0.3597, attn_decoder_loss=0.2472, over 29536.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.1057, cr_loss=0.3433, attn_decoder_loss=0.2357, over 5836454.63 frames. ], batch size: 97, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:01:21,671 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_skip_rate, batch_count=902700.0, ans=0.0 +2024-09-20 14:01:24,776 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.3.ff2_skip_rate, batch_count=902700.0, ans=0.0 +2024-09-20 14:01:33,473 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.feed_forward3.hidden_balancer.prob, batch_count=902740.0, ans=0.125 +2024-09-20 14:01:45,915 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=768, metric=16.70 vs. limit=22.5 +2024-09-20 14:01:56,367 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=192, metric=5.69 vs. limit=15.0 +2024-09-20 14:01:59,872 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.balancer.prob, batch_count=902820.0, ans=0.125 +2024-09-20 14:02:10,279 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=902820.0, ans=0.125 +2024-09-20 14:02:13,690 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=512, metric=18.87 vs. limit=22.5 +2024-09-20 14:02:28,914 INFO [train.py:1198] (1/2) Epoch 50, batch 4000, loss[loss=0.2181, ctc_loss=0.1051, cr_loss=0.3444, attn_decoder_loss=0.223, over 29542.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1061, cr_loss=0.3436, attn_decoder_loss=0.2358, over 5814467.49 frames. ], batch size: 74, lr: 2.19e-03, grad_scale: 32.0 +2024-09-20 14:02:29,275 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=902900.0, ans=0.07 +2024-09-20 14:02:32,025 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.conv_module1.balancer2.prob, batch_count=902900.0, ans=0.125 +2024-09-20 14:02:34,939 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.conv_module2.balancer1.max_abs, batch_count=902900.0, ans=10.0 +2024-09-20 14:02:42,262 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.feed_forward1.hidden_balancer.prob, batch_count=902940.0, ans=0.125 +2024-09-20 14:02:52,606 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.conv_module2.balancer1.prob, batch_count=902940.0, ans=0.125 +2024-09-20 14:03:10,600 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=902980.0, ans=0.125 +2024-09-20 14:03:15,019 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module2.balancer2.prob, batch_count=903020.0, ans=0.125 +2024-09-20 14:03:26,871 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.conv_module1.balancer2.min_positive, batch_count=903060.0, ans=0.05 +2024-09-20 14:03:33,840 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.945e+01 8.787e+01 9.327e+01 9.838e+01 2.486e+02, threshold=1.865e+02, percent-clipped=3.0 +2024-09-20 14:03:42,693 INFO [train.py:1198] (1/2) Epoch 50, batch 4050, loss[loss=0.2433, ctc_loss=0.1291, cr_loss=0.3628, attn_decoder_loss=0.248, over 19721.00 frames. ], tot_loss[loss=0.2294, ctc_loss=0.1058, cr_loss=0.3433, attn_decoder_loss=0.2355, over 5797575.92 frames. ], batch size: 209, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:03:59,552 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=10.38 vs. limit=15.0 +2024-09-20 14:04:00,493 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.feed_forward3.hidden_balancer.prob, batch_count=903140.0, ans=0.125 +2024-09-20 14:04:02,537 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.feed_forward2.out_whiten.whitening_limit, batch_count=903140.0, ans=15.0 +2024-09-20 14:04:21,953 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.0.self_attn2.whiten, num_groups=1, num_channels=256, metric=11.46 vs. limit=22.5 +2024-09-20 14:04:25,357 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=903180.0, ans=0.1 +2024-09-20 14:04:34,166 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.balancer1.prob, batch_count=903220.0, ans=0.125 +2024-09-20 14:04:37,215 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=903220.0, ans=0.125 +2024-09-20 14:04:41,464 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer1.prob, batch_count=903260.0, ans=0.125 +2024-09-20 14:04:57,480 INFO [train.py:1198] (1/2) Epoch 50, batch 4100, loss[loss=0.237, ctc_loss=0.1053, cr_loss=0.3333, attn_decoder_loss=0.2442, over 29526.00 frames. ], tot_loss[loss=0.2297, ctc_loss=0.1061, cr_loss=0.344, attn_decoder_loss=0.2357, over 5793069.75 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:05:07,956 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.conv_module1.balancer1.prob, batch_count=903300.0, ans=0.125 +2024-09-20 14:05:15,374 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=903340.0, ans=0.125 +2024-09-20 14:05:19,347 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.dropout_p, batch_count=903340.0, ans=0.1 +2024-09-20 14:05:19,495 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_module1.balancer1.prob, batch_count=903340.0, ans=0.125 +2024-09-20 14:05:20,662 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder_embed.convnext.out_balancer.prob, batch_count=903340.0, ans=0.125 +2024-09-20 14:05:47,369 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.attention_skip_rate, batch_count=903420.0, ans=0.0 +2024-09-20 14:06:03,361 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.dropout_p, batch_count=903460.0, ans=0.1 +2024-09-20 14:06:04,477 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.754e+01 8.737e+01 9.246e+01 9.591e+01 2.033e+02, threshold=1.849e+02, percent-clipped=1.0 +2024-09-20 14:06:11,996 INFO [train.py:1198] (1/2) Epoch 50, batch 4150, loss[loss=0.2189, ctc_loss=0.09807, cr_loss=0.3245, attn_decoder_loss=0.2251, over 29521.00 frames. ], tot_loss[loss=0.2292, ctc_loss=0.1058, cr_loss=0.3431, attn_decoder_loss=0.2353, over 5798483.97 frames. ], batch size: 77, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:06:13,642 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.conv_module2.balancer1.prob, batch_count=903500.0, ans=0.125 +2024-09-20 14:06:31,413 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.ff2_skip_rate, batch_count=903540.0, ans=0.0 +2024-09-20 14:06:35,738 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.bypass.skip_rate, batch_count=903540.0, ans=0.07 +2024-09-20 14:06:38,569 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.const_attention_rate, batch_count=903540.0, ans=0.025 +2024-09-20 14:06:40,743 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.3.feed_forward1.out_whiten, num_groups=1, num_channels=512, metric=9.72 vs. limit=15.0 +2024-09-20 14:06:56,321 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.bypass.scale_min, batch_count=903620.0, ans=0.2 +2024-09-20 14:07:19,467 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=903660.0, ans=0.0 +2024-09-20 14:07:22,501 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.conv_module2.balancer2.prob, batch_count=903660.0, ans=0.125 +2024-09-20 14:07:25,109 INFO [train.py:1198] (1/2) Epoch 50, batch 4200, loss[loss=0.2514, ctc_loss=0.1289, cr_loss=0.4055, attn_decoder_loss=0.256, over 29502.00 frames. ], tot_loss[loss=0.2296, ctc_loss=0.106, cr_loss=0.3436, attn_decoder_loss=0.2357, over 5799548.73 frames. ], batch size: 90, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:07:32,845 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.0.balancer2.prob, batch_count=903700.0, ans=0.125 +2024-09-20 14:07:37,272 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module1.balancer1.prob, batch_count=903700.0, ans=0.125 +2024-09-20 14:07:37,423 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.conv_skip_rate, batch_count=903700.0, ans=0.0 +2024-09-20 14:07:40,198 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.bypass.skip_rate, batch_count=903740.0, ans=0.07 +2024-09-20 14:08:32,189 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.954e+01 8.657e+01 9.068e+01 9.554e+01 1.385e+02, threshold=1.814e+02, percent-clipped=0.0 +2024-09-20 14:08:33,929 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.3.conv_module1.balancer1.prob, batch_count=903860.0, ans=0.125 +2024-09-20 14:08:37,000 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=903860.0, ans=0.0 +2024-09-20 14:08:39,520 INFO [train.py:1198] (1/2) Epoch 50, batch 4250, loss[loss=0.2143, ctc_loss=0.08962, cr_loss=0.3017, attn_decoder_loss=0.2215, over 29538.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1054, cr_loss=0.3418, attn_decoder_loss=0.2355, over 5804661.45 frames. ], batch size: 74, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:08:41,209 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=903900.0, ans=0.125 +2024-09-20 14:08:52,747 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.bypass.scale_min, batch_count=903940.0, ans=0.2 +2024-09-20 14:09:34,796 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=904020.0, ans=0.1 +2024-09-20 14:09:53,370 INFO [train.py:1198] (1/2) Epoch 50, batch 4300, loss[loss=0.2414, ctc_loss=0.1107, cr_loss=0.3486, attn_decoder_loss=0.2481, over 29521.00 frames. ], tot_loss[loss=0.2293, ctc_loss=0.1052, cr_loss=0.3414, attn_decoder_loss=0.2355, over 5794333.52 frames. ], batch size: 87, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:10:15,826 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.feed_forward2.hidden_balancer.prob, batch_count=904140.0, ans=0.125 +2024-09-20 14:10:23,010 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.nonlin_attention.balancer.prob, batch_count=904180.0, ans=0.125 +2024-09-20 14:10:26,607 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.whiten, num_groups=1, num_channels=768, metric=4.79 vs. limit=12.0 +2024-09-20 14:10:43,640 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.nonlin_attention.balancer.prob, batch_count=904220.0, ans=0.125 +2024-09-20 14:10:45,179 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=904220.0, ans=0.0 +2024-09-20 14:10:55,885 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.4.conv_module2.whiten, num_groups=1, num_channels=768, metric=5.81 vs. limit=15.0 +2024-09-20 14:10:56,190 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.0.layers.1.feed_forward3.out_whiten, num_groups=1, num_channels=192, metric=11.47 vs. limit=15.0 +2024-09-20 14:10:59,382 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 7.761e+01 8.735e+01 9.239e+01 9.870e+01 1.478e+02, threshold=1.848e+02, percent-clipped=0.0 +2024-09-20 14:11:03,433 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.1.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=256, metric=9.67 vs. limit=22.5 +2024-09-20 14:11:04,254 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.ff2_skip_rate, batch_count=904260.0, ans=0.0 +2024-09-20 14:11:06,797 INFO [train.py:1198] (1/2) Epoch 50, batch 4350, loss[loss=0.2495, ctc_loss=0.1226, cr_loss=0.3849, attn_decoder_loss=0.255, over 29515.00 frames. ], tot_loss[loss=0.2326, ctc_loss=0.1077, cr_loss=0.3473, attn_decoder_loss=0.2388, over 5796767.78 frames. ], batch size: 97, lr: 2.19e-03, grad_scale: 8.0 +2024-09-20 14:11:15,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer1.prob, batch_count=904300.0, ans=0.125 +2024-09-20 14:11:22,554 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.conv_skip_rate, batch_count=904340.0, ans=0.0 +2024-09-20 14:12:00,160 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=904420.0, ans=0.125 +2024-09-20 14:12:11,139 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer2.prob, batch_count=904460.0, ans=0.125 +2024-09-20 14:12:15,553 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.2.encoder.layers.3.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 14:12:21,022 INFO [train.py:1198] (1/2) Epoch 50, batch 4400, loss[loss=0.2387, ctc_loss=0.1162, cr_loss=0.3593, attn_decoder_loss=0.2444, over 27346.00 frames. ], tot_loss[loss=0.2347, ctc_loss=0.1089, cr_loss=0.3496, attn_decoder_loss=0.2409, over 5767156.46 frames. ], batch size: 124, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:12:50,971 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten2, num_groups=1, num_channels=768, metric=4.35 vs. limit=15.0 +2024-09-20 14:12:51,025 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=768, metric=13.69 vs. limit=15.0 +2024-09-20 14:12:54,083 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.0.conv_module1.whiten, num_groups=1, num_channels=512, metric=11.29 vs. limit=15.0 +2024-09-20 14:13:22,898 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.conv_module2.balancer1.min_positive, batch_count=904660.0, ans=0.025 +2024-09-20 14:13:27,635 WARNING [optim.py:487] (1/2) Clipping_scale=2.0, grad-norm quartiles 8.488e+01 9.232e+01 9.764e+01 1.027e+02 1.631e+02, threshold=1.953e+02, percent-clipped=0.0 +2024-09-20 14:13:32,325 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.0.layers.1.feed_forward1.out_proj.dropout_p, batch_count=904660.0, ans=0.1 +2024-09-20 14:13:35,034 INFO [train.py:1198] (1/2) Epoch 50, batch 4450, loss[loss=0.2512, ctc_loss=0.1283, cr_loss=0.3618, attn_decoder_loss=0.2568, over 19885.00 frames. ], tot_loss[loss=0.237, ctc_loss=0.1122, cr_loss=0.3555, attn_decoder_loss=0.2429, over 5577143.51 frames. ], batch size: 209, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:13:47,692 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.0.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=8.80 vs. limit=15.0 +2024-09-20 14:14:09,024 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.4.encoder.layers.2.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.40 vs. limit=22.5 +2024-09-20 14:14:26,075 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.2.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=512, metric=11.64 vs. limit=15.0 +2024-09-20 14:14:28,349 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.ff3_skip_rate, batch_count=904820.0, ans=0.0 +2024-09-20 14:14:38,539 INFO [scaling.py:214] (1/2) ScheduledFloat: name=encoder.encoders.2.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=904860.0, ans=0.125 +2024-09-20 14:14:46,501 INFO [scaling.py:1120] (1/2) WithLoss: name=encoder.encoders.4.encoder.layers.1.self_attn_weights, loss-sum=0.000e+00 +2024-09-20 14:14:50,474 INFO [train.py:1198] (1/2) Epoch 50, batch 4500, loss[loss=0.2341, ctc_loss=0.12, cr_loss=0.3427, attn_decoder_loss=0.2392, over 20458.00 frames. ], tot_loss[loss=0.2388, ctc_loss=0.1151, cr_loss=0.358, attn_decoder_loss=0.2446, over 5237912.42 frames. ], batch size: 209, lr: 2.19e-03, grad_scale: 16.0 +2024-09-20 14:15:13,412 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn2.whiten, num_groups=1, num_channels=768, metric=18.04 vs. limit=22.5 +2024-09-20 14:15:17,977 INFO [scaling.py:1024] (1/2) Whitening: name=encoder.encoders.3.encoder.layers.3.self_attn1.whiten, num_groups=1, num_channels=768, metric=13.84 vs. limit=22.5 +2024-09-20 14:15:27,073 INFO [train.py:1496] (1/2) Done! diff --git a/exp/pretrained.pt b/exp/pretrained.pt new file mode 100644 index 0000000000000000000000000000000000000000..83d12ddbb3cd1a99eaf3f3d038cf8a53cffb52eb --- /dev/null +++ b/exp/pretrained.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393af324fbda9d0f9126f3909e3adc08ffcce03c29bdf3c344e1fa4256322b77 +size 697818201 diff --git a/exp/tensorboard/events.out.tfevents.1726460721.NGK_zengwei.62835.0 b/exp/tensorboard/events.out.tfevents.1726460721.NGK_zengwei.62835.0 new file mode 100644 index 0000000000000000000000000000000000000000..1bbeb4ed4dbf364e71b768cea40bd72f1b89d62a --- /dev/null +++ b/exp/tensorboard/events.out.tfevents.1726460721.NGK_zengwei.62835.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d5ea9552cc6f98c38a1e3b486979efd316da0384259612224252494eef2330 +size 2767319 diff --git a/exp/train.sh b/exp/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..004700ab7378b27f6b84a9ed8f0dbae308e6162c --- /dev/null +++ b/exp/train.sh @@ -0,0 +1,24 @@ +export CUDA_VISIBLE_DEVICES="0,1" +# for non-streaming model training: +./zipformer/train.py \ + --world-size 2 \ + --num-epochs 50 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp-large-cr-ctc-aed \ + --use-cr-ctc 1 \ + --use-ctc 1 \ + --use-transducer 0 \ + --use-attention-decoder 1 \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + --ctc-loss-scale 0.1 \ + --attention-decoder-loss-scale 0.9 \ + --enable-spec-aug 0 \ + --cr-loss-scale 0.02 \ + --time-mask-ratio 2.5 \ + --full-libri 1 \ + --max-duration 1200 \ + --master-port 12345